From 5f468cc21ef621151c200edfeea0411342c6d8bb Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Fri, 11 Sep 2020 09:11:35 +0900
Subject: [PATCH 0001/1009] [SPARK-32822][SQL] Change the number of partitions
 to zero when a range is empty with WholeStageCodegen disabled or falled back

### What changes were proposed in this pull request?

This PR changes the behavior of RangeExec with WholeStageCodegen disabled or falled back to change the number of partitions to zero when a range is empty.

In the current master, if WholeStageCodegen effects, the number of partitions of an empty range will be changed to zero.
```
spark.range(1, 1, 1, 1000).rdd.getNumPartitions
res0: Int = 0
```
But it doesn't if WholeStageCodegen is disabled or falled back.
```
spark.conf.set("spark.sql.codegen.wholeStage", false)
spark.range(1, 1, 1, 1000).rdd.getNumPartitions
res2: Int = 1000
```

### Why are the changes needed?

To archive better performance even though WholeStageCodegen disabled or falled back.

### Does this PR introduce _any_ user-facing change?

Yes. the number of partitions gotten with `getNumPartitions` for an empty range will be changed when WholeStageCodegen is disabled.

### How was this patch tested?

New test.

Closes #29681 from sarutak/zero-size-range.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../execution/basicPhysicalOperators.scala    | 105 ++++++++++--------
 .../spark/sql/execution/PlannerSuite.scala    |   7 ++
 2 files changed, 63 insertions(+), 49 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index c240a182d32bb..1f70fde3f7654 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -371,6 +371,7 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
   val step: Long = range.step
   val numSlices: Int = range.numSlices.getOrElse(sparkContext.defaultParallelism)
   val numElements: BigInt = range.numElements
+  val isEmptyRange: Boolean = start == end || (start < end ^ 0 < step)
 
   override val output: Seq[Attribute] = range.output
 
@@ -396,7 +397,7 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
   }
 
   override def inputRDDs(): Seq[RDD[InternalRow]] = {
-    val rdd = if (start == end || (start < end ^ 0 < step)) {
+    val rdd = if (isEmptyRange) {
       new EmptyRDD[InternalRow](sqlContext.sparkContext)
     } else {
       sqlContext.sparkContext.parallelize(0 until numSlices, numSlices).map(i => InternalRow(i))
@@ -562,58 +563,64 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
 
   protected override def doExecute(): RDD[InternalRow] = {
     val numOutputRows = longMetric("numOutputRows")
-    sqlContext
-      .sparkContext
-      .parallelize(0 until numSlices, numSlices)
-      .mapPartitionsWithIndex { (i, _) =>
-        val partitionStart = (i * numElements) / numSlices * step + start
-        val partitionEnd = (((i + 1) * numElements) / numSlices) * step + start
-        def getSafeMargin(bi: BigInt): Long =
-          if (bi.isValidLong) {
-            bi.toLong
-          } else if (bi > 0) {
-            Long.MaxValue
-          } else {
-            Long.MinValue
-          }
-        val safePartitionStart = getSafeMargin(partitionStart)
-        val safePartitionEnd = getSafeMargin(partitionEnd)
-        val rowSize = UnsafeRow.calculateBitSetWidthInBytes(1) + LongType.defaultSize
-        val unsafeRow = UnsafeRow.createFromByteArray(rowSize, 1)
-        val taskContext = TaskContext.get()
-
-        val iter = new Iterator[InternalRow] {
-          private[this] var number: Long = safePartitionStart
-          private[this] var overflow: Boolean = false
-          private[this] val inputMetrics = taskContext.taskMetrics().inputMetrics
-
-          override def hasNext =
-            if (!overflow) {
-              if (step > 0) {
-                number < safePartitionEnd
-              } else {
-                number > safePartitionEnd
-              }
-            } else false
-
-          override def next() = {
-            val ret = number
-            number += step
-            if (number < ret ^ step < 0) {
-              // we have Long.MaxValue + Long.MaxValue < Long.MaxValue
-              // and Long.MinValue + Long.MinValue > Long.MinValue, so iff the step causes a step
-              // back, we are pretty sure that we have an overflow.
-              overflow = true
+    if (isEmptyRange) {
+      new EmptyRDD[InternalRow](sqlContext.sparkContext)
+    } else {
+      sqlContext
+        .sparkContext
+        .parallelize(0 until numSlices, numSlices)
+        .mapPartitionsWithIndex { (i, _) =>
+          val partitionStart = (i * numElements) / numSlices * step + start
+          val partitionEnd = (((i + 1) * numElements) / numSlices) * step + start
+
+          def getSafeMargin(bi: BigInt): Long =
+            if (bi.isValidLong) {
+              bi.toLong
+            } else if (bi > 0) {
+              Long.MaxValue
+            } else {
+              Long.MinValue
             }
 
-            numOutputRows += 1
-            inputMetrics.incRecordsRead(1)
-            unsafeRow.setLong(0, ret)
-            unsafeRow
+          val safePartitionStart = getSafeMargin(partitionStart)
+          val safePartitionEnd = getSafeMargin(partitionEnd)
+          val rowSize = UnsafeRow.calculateBitSetWidthInBytes(1) + LongType.defaultSize
+          val unsafeRow = UnsafeRow.createFromByteArray(rowSize, 1)
+          val taskContext = TaskContext.get()
+
+          val iter = new Iterator[InternalRow] {
+            private[this] var number: Long = safePartitionStart
+            private[this] var overflow: Boolean = false
+            private[this] val inputMetrics = taskContext.taskMetrics().inputMetrics
+
+            override def hasNext =
+              if (!overflow) {
+                if (step > 0) {
+                  number < safePartitionEnd
+                } else {
+                  number > safePartitionEnd
+                }
+              } else false
+
+            override def next() = {
+              val ret = number
+              number += step
+              if (number < ret ^ step < 0) {
+                // we have Long.MaxValue + Long.MaxValue < Long.MaxValue
+                // and Long.MinValue + Long.MinValue > Long.MinValue, so iff the step causes a step
+                // back, we are pretty sure that we have an overflow.
+                overflow = true
+              }
+
+              numOutputRows += 1
+              inputMetrics.incRecordsRead(1)
+              unsafeRow.setLong(0, ret)
+              unsafeRow
+            }
           }
+          new InterruptibleIterator(taskContext, iter)
         }
-        new InterruptibleIterator(taskContext, iter)
-      }
+    }
   }
 
   override def simpleString(maxFields: Int): String = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index d428b7ebc0e91..ca52e51c87ea7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -994,6 +994,13 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
       }
     }
   }
+
+  testWithWholeStageCodegenOnAndOff("Change the number of partitions to zero " +
+    "when a range is empty") { _ =>
+    val range = spark.range(1, 1, 1, 1000)
+    val numPartitions = range.rdd.getNumPartitions
+    assert(numPartitions == 0)
+  }
 }
 
 // Used for unit-testing EnsureRequirements

From 328d81a2d1131742bcfba5117896c093db39e721 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 11 Sep 2020 09:22:56 +0900
Subject: [PATCH 0002/1009] [SPARK-32677][SQL][DOCS][MINOR] Improve code
 comment in CreateFunctionCommand

### What changes were proposed in this pull request?

We made a mistake in https://github.com/apache/spark/pull/29502, as there is no code comment to explain why we can't load the UDF class when creating functions. This PR improves the code comment.

### Why are the changes needed?

To avoid making the same mistake.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

N/A

Closes #29713 from cloud-fan/comment.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../org/apache/spark/sql/execution/command/functions.scala    | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
index fae8de4780102..d76b4b8894783 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
@@ -88,7 +88,9 @@ case class CreateFunctionCommand(
       } else {
         // For a permanent, we will store the metadata into underlying external catalog.
         // This function will be loaded into the FunctionRegistry when a query uses it.
-        // We do not load it into FunctionRegistry right now.
+        // We do not load it into FunctionRegistry right now, to avoid loading the resource and
+        // UDF class immediately, as the Spark application to create the function may not have
+        // access to the resource and/or UDF class.
         catalog.createFunction(func, ignoreIfExists)
       }
     }

From fe2ab255d14bbccb72b95ed776b74e86cb9762b6 Mon Sep 17 00:00:00 2001
From: yangjiang <yangjiang@ebay.com>
Date: Fri, 11 Sep 2020 08:05:34 -0500
Subject: [PATCH 0003/1009] [MINOR][SQL] Fix a typo at
 'spark.sql.sources.fileCompressionFactor' error message in SQLConf

### What changes were proposed in this pull request?

 fix typo in SQLConf

### Why are the changes needed?

typo fix to increase readability

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

no test

Closes #29668 from Ted-Jiang/fix_annotate.

Authored-by: yangjiang <yangjiang@ebay.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index dca421a09da62..dae715ae827e2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -573,7 +573,7 @@ object SQLConf {
       " a heavily underestimated result.")
     .version("2.3.1")
     .doubleConf
-    .checkValue(_ > 0, "the value of fileDataSizeFactor must be greater than 0")
+    .checkValue(_ > 0, "the value of fileCompressionFactor must be greater than 0")
     .createWithDefault(1.0)
 
   val PARQUET_SCHEMA_MERGING_ENABLED = buildConf("spark.sql.parquet.mergeSchema")

From 9f4f49cbaa3def9f7d8573629ff3b6cbd6833b2f Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 11 Sep 2020 06:15:58 -0700
Subject: [PATCH 0004/1009] [SPARK-32853][SQL] Consecutive save/load calls in
 DataFrame/StreamReader/Writer should not fail

### What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/29328

In https://github.com/apache/spark/pull/29328 , we forbid the use case that path option and path parameter are both specified.  However,  it breaks some use cases:
```
val dfr =  spark.read.format(...).option(...)
dfr.load(path1).xxx
dfr.load(path2).xxx
```

The reason is that: `load` has side effects. It will set path option to the `DataFrameReader` instance. The next time you call `load`, Spark will fail because both path option and path parameter are specified.

This PR removes the side effect of `save`/`load`/`start`  to not set the path option.

### Why are the changes needed?

recover some use cases

### Does this PR introduce _any_ user-facing change?

Yes, some use cases fail before this PR, and can run successfully after this PR.

### How was this patch tested?

new tests

Closes #29723 from cloud-fan/df.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../apache/spark/sql/DataFrameReader.scala    | 34 ++++++++-------
 .../apache/spark/sql/DataFrameWriter.scala    | 41 +++++++++++++------
 .../sql/streaming/DataStreamReader.scala      | 19 ++++++---
 .../sql/streaming/DataStreamWriter.scala      | 27 ++++++++----
 .../test/DataStreamReaderWriterSuite.scala    | 15 +++++++
 .../sql/test/DataFrameReaderWriterSuite.scala |  9 ++++
 6 files changed, 103 insertions(+), 42 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index ab6b1ff5daccf..ab18a3119c09f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -260,25 +260,22 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
         s"To ignore this check, set '${SQLConf.LEGACY_PATH_OPTION_BEHAVIOR.key}' to 'true'.")
     }
 
-    val updatedPaths = if (!legacyPathOptionBehavior && paths.length == 1) {
-      option("path", paths.head)
-      Seq.empty
-    } else {
-      paths
-    }
-
     DataSource.lookupDataSourceV2(source, sparkSession.sessionState.conf).map { provider =>
       val catalogManager = sparkSession.sessionState.catalogManager
       val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
         source = provider, conf = sparkSession.sessionState.conf)
-      val pathsOption = if (updatedPaths.isEmpty) {
-        None
+
+      val optionsWithPath = if (paths.isEmpty) {
+        extraOptions
+      } else if (paths.length == 1) {
+        extraOptions + ("path" -> paths.head)
       } else {
         val objectMapper = new ObjectMapper()
-        Some("paths" -> objectMapper.writeValueAsString(updatedPaths.toArray))
+        extraOptions + ("paths" -> objectMapper.writeValueAsString(paths.toArray))
       }
 
-      val finalOptions = sessionOptions ++ extraOptions.originalMap ++ pathsOption
+      val finalOptions =
+        sessionOptions.filterKeys(!optionsWithPath.contains(_)) ++ optionsWithPath.originalMap
       val dsOptions = new CaseInsensitiveStringMap(finalOptions.asJava)
       val (table, catalog, ident) = provider match {
         case _: SupportsCatalogOptions if userSpecifiedSchema.nonEmpty =>
@@ -303,20 +300,27 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
             sparkSession,
             DataSourceV2Relation.create(table, catalog, ident, dsOptions))
 
-        case _ => loadV1Source(updatedPaths: _*)
+        case _ => loadV1Source(paths: _*)
       }
-    }.getOrElse(loadV1Source(updatedPaths: _*))
+    }.getOrElse(loadV1Source(paths: _*))
   }
 
   private def loadV1Source(paths: String*) = {
+    val legacyPathOptionBehavior = sparkSession.sessionState.conf.legacyPathOptionBehavior
+    val (finalPaths, finalOptions) = if (!legacyPathOptionBehavior && paths.length == 1) {
+      (Nil, extraOptions + ("path" -> paths.head))
+    } else {
+      (paths, extraOptions)
+    }
+
     // Code path for data source v1.
     sparkSession.baseRelationToDataFrame(
       DataSource.apply(
         sparkSession,
-        paths = paths,
+        paths = finalPaths,
         userSpecifiedSchema = userSpecifiedSchema,
         className = source,
-        options = extraOptions.originalMap).resolveRelation())
+        options = finalOptions.originalMap).resolveRelation())
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 25ca186c65f04..bd1997bee53f7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -291,8 +291,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
         "parameter. Either remove the path option, or call save() without the parameter. " +
         s"To ignore this check, set '${SQLConf.LEGACY_PATH_OPTION_BEHAVIOR.key}' to 'true'.")
     }
-    this.extraOptions = this.extraOptions + ("path" -> path)
-    save()
+    saveInternal(Some(path))
   }
 
   /**
@@ -300,7 +299,9 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    *
    * @since 1.4.0
    */
-  def save(): Unit = {
+  def save(): Unit = saveInternal(None)
+
+  private def saveInternal(path: Option[String]): Unit = {
     if (source.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) {
       throw new AnalysisException("Hive data source can only be used with tables, you can not " +
         "write files of Hive data source directly.")
@@ -313,8 +314,16 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
       val provider = maybeV2Provider.get
       val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
         provider, df.sparkSession.sessionState.conf)
-      val options = sessionOptions.filterKeys(!extraOptions.contains(_)) ++ extraOptions.toMap
-      val dsOptions = new CaseInsensitiveStringMap(options.toMap.asJava)
+
+      val optionsWithPath = if (path.isEmpty) {
+        extraOptions
+      } else {
+        extraOptions + ("path" -> path.get)
+      }
+
+      val finalOptions =
+        sessionOptions.filterKeys(!optionsWithPath.contains(_)) ++ optionsWithPath.originalMap
+      val dsOptions = new CaseInsensitiveStringMap(finalOptions.asJava)
 
       def getTable: Table = {
         // For file source, it's expensive to infer schema/partition at each write. Here we pass
@@ -350,7 +359,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
                 // Streaming also uses the data source V2 API. So it may be that the data source
                 // implements v2, but has no v2 implementation for batch writes. In that case, we
                 // fall back to saving as though it's a V1 source.
-                return saveToV1Source()
+                return saveToV1Source(path)
               }
           }
 
@@ -358,14 +367,14 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
           checkPartitioningMatchesV2Table(table)
           if (mode == SaveMode.Append) {
             runCommand(df.sparkSession, "save") {
-              AppendData.byName(relation, df.logicalPlan, extraOptions.toMap)
+              AppendData.byName(relation, df.logicalPlan, finalOptions)
             }
           } else {
             // Truncate the table. TableCapabilityCheck will throw a nice exception if this
             // isn't supported
             runCommand(df.sparkSession, "save") {
               OverwriteByExpression.byName(
-                relation, df.logicalPlan, Literal(true), extraOptions.toMap)
+                relation, df.logicalPlan, Literal(true), finalOptions)
             }
           }
 
@@ -385,7 +394,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
                   partitioningAsV2,
                   df.queryExecution.analyzed,
                   Map(TableCatalog.PROP_PROVIDER -> source) ++ location,
-                  extraOptions.toMap,
+                  finalOptions,
                   ignoreIfExists = createMode == SaveMode.Ignore)
               }
             case _: TableProvider =>
@@ -397,30 +406,36 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
                 // Streaming also uses the data source V2 API. So it may be that the data source
                 // implements v2, but has no v2 implementation for batch writes. In that case, we
                 // fallback to saving as though it's a V1 source.
-                saveToV1Source()
+                saveToV1Source(path)
               }
           }
       }
 
     } else {
-      saveToV1Source()
+      saveToV1Source(path)
     }
   }
 
-  private def saveToV1Source(): Unit = {
+  private def saveToV1Source(path: Option[String]): Unit = {
     partitioningColumns.foreach { columns =>
       extraOptions = extraOptions + (
         DataSourceUtils.PARTITIONING_COLUMNS_KEY ->
         DataSourceUtils.encodePartitioningColumns(columns))
     }
 
+    val optionsWithPath = if (path.isEmpty) {
+      extraOptions
+    } else {
+      extraOptions + ("path" -> path.get)
+    }
+
     // Code path for data source v1.
     runCommand(df.sparkSession, "save") {
       DataSource(
         sparkSession = df.sparkSession,
         className = source,
         partitionColumns = partitioningColumns.getOrElse(Nil),
-        options = extraOptions.toMap).planForWriting(mode, df.logicalPlan)
+        options = optionsWithPath.originalMap).planForWriting(mode, df.logicalPlan)
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index 5302357d2bfa0..c22f917d3cf91 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -188,12 +188,20 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
    *
    * @since 2.0.0
    */
-  def load(): DataFrame = {
+  def load(): DataFrame = loadInternal(None)
+
+  private def loadInternal(path: Option[String]): DataFrame = {
     if (source.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) {
       throw new AnalysisException("Hive data source can only be used with tables, you can not " +
         "read files of Hive data source directly.")
     }
 
+    val optionsWithPath = if (path.isEmpty) {
+      extraOptions
+    } else {
+      extraOptions + ("path" -> path.get)
+    }
+
     val ds = DataSource.lookupDataSource(source, sparkSession.sqlContext.conf).
       getConstructor().newInstance()
     // We need to generate the V1 data source so we can pass it to the V2 relation as a shim.
@@ -203,7 +211,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
       sparkSession,
       userSpecifiedSchema = userSpecifiedSchema,
       className = source,
-      options = extraOptions.toMap)
+      options = optionsWithPath.originalMap)
     val v1Relation = ds match {
       case _: StreamSourceProvider => Some(StreamingRelation(v1DataSource))
       case _ => None
@@ -213,8 +221,9 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
       case provider: TableProvider if !provider.isInstanceOf[FileDataSourceV2] =>
         val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
           source = provider, conf = sparkSession.sessionState.conf)
-        val options = sessionOptions ++ extraOptions.toMap
-        val dsOptions = new CaseInsensitiveStringMap(options.asJava)
+        val finalOptions =
+          sessionOptions.filterKeys(!optionsWithPath.contains(_)) ++ optionsWithPath.originalMap
+        val dsOptions = new CaseInsensitiveStringMap(finalOptions.asJava)
         val table = DataSourceV2Utils.getTableFromProvider(provider, dsOptions, userSpecifiedSchema)
         import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
         table match {
@@ -247,7 +256,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
         "parameter. Either remove the path option, or call load() without the parameter. " +
         s"To ignore this check, set '${SQLConf.LEGACY_PATH_OPTION_BEHAVIOR.key}' to 'true'.")
     }
-    option("path", path).load()
+    loadInternal(Some(path))
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index 51ec1e7b8fea1..682f3b98ec2e8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -274,7 +274,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
         "path parameter. Either remove the path option, or call start() without the parameter. " +
         s"To ignore this check, set '${SQLConf.LEGACY_PATH_OPTION_BEHAVIOR.key}' to 'true'.")
     }
-    option("path", path).start()
+    startInternal(Some(path))
   }
 
   /**
@@ -292,7 +292,9 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
    * @since 2.0.0
    */
   @throws[TimeoutException]
-  def start(): StreamingQuery = {
+  def start(): StreamingQuery = startInternal(None)
+
+  private def startInternal(path: Option[String]): StreamingQuery = {
     if (source.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) {
       throw new AnalysisException("Hive data source can only be used with tables, you can not " +
         "write files of Hive data source directly.")
@@ -353,29 +355,36 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
         // file source v2 does not support streaming yet.
         classOf[FileDataSourceV2].isAssignableFrom(cls)
 
+      val optionsWithPath = if (path.isEmpty) {
+        extraOptions
+      } else {
+        extraOptions + ("path" -> path.get)
+      }
+
       val sink = if (classOf[TableProvider].isAssignableFrom(cls) && !useV1Source) {
         val provider = cls.getConstructor().newInstance().asInstanceOf[TableProvider]
         val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
           source = provider, conf = df.sparkSession.sessionState.conf)
-        val options = sessionOptions ++ extraOptions.toMap
-        val dsOptions = new CaseInsensitiveStringMap(options.asJava)
+        val finalOptions =
+          sessionOptions.filterKeys(!optionsWithPath.contains(_)) ++ optionsWithPath.originalMap
+        val dsOptions = new CaseInsensitiveStringMap(finalOptions.asJava)
         val table = DataSourceV2Utils.getTableFromProvider(
           provider, dsOptions, userSpecifiedSchema = None)
         import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
         table match {
           case table: SupportsWrite if table.supports(STREAMING_WRITE) =>
             table
-          case _ => createV1Sink()
+          case _ => createV1Sink(optionsWithPath)
         }
       } else {
-        createV1Sink()
+        createV1Sink(optionsWithPath)
       }
 
       df.sparkSession.sessionState.streamingQueryManager.startQuery(
         extraOptions.get("queryName"),
         extraOptions.get("checkpointLocation"),
         df,
-        extraOptions.toMap,
+        optionsWithPath.originalMap,
         sink,
         outputMode,
         useTempCheckpointLocation = source == "console" || source == "noop",
@@ -384,11 +393,11 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
     }
   }
 
-  private def createV1Sink(): Sink = {
+  private def createV1Sink(optionsWithPath: CaseInsensitiveMap[String]): Sink = {
     val ds = DataSource(
       df.sparkSession,
       className = source,
-      options = extraOptions.toMap,
+      options = optionsWithPath.originalMap,
       partitionColumns = normalizedParCols.getOrElse(Nil))
     ds.createSink(outputMode)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
index 64b0cb296635a..a59eca25fe28e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
@@ -798,4 +798,19 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
       }
     }
   }
+
+  test("SPARK-32853: consecutive load/start calls should be allowed") {
+    val dfr = spark.readStream.format(classOf[DefaultSource].getName)
+    var df = dfr.load("1")
+    df = dfr.load("2")
+    withTempDir { checkpointPath =>
+      val dfw = df.writeStream
+        .option("checkpointLocation", checkpointPath.getCanonicalPath)
+        .format(classOf[DefaultSource].getName)
+      var query = dfw.start("1")
+      query.stop()
+      query = dfw.start("2")
+      query.stop()
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
index c4ca85d6237b2..eaca63c74c875 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
@@ -1190,4 +1190,13 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
     verifyLoadFails(df.write.option("path", path).format("parquet").save(path))
     verifyLoadFails(df.write.option("path", path).format("parquet").save(""))
   }
+
+  test("SPARK-32853: consecutive load/save calls should be allowed") {
+    val dfr = spark.read.format(classOf[FakeSourceOne].getName)
+    dfr.load("1")
+    dfr.load("2")
+    val dfw = spark.range(10).write.format(classOf[DefaultSource].getName)
+    dfw.save("1")
+    dfw.save("2")
+  }
 }

From 94cac5978cf33f99a9f28180c9c909d5c884c152 Mon Sep 17 00:00:00 2001
From: Peter Toth <peter.toth@gmail.com>
Date: Fri, 11 Sep 2020 13:42:33 +0000
Subject: [PATCH 0005/1009] [SPARK-32730][SQL][FOLLOW-UP] Improve LeftAnti
 SortMergeJoin right side buffering

### What changes were proposed in this pull request?

This is a follow-up to https://github.com/apache/spark/pull/29572.

LeftAnti SortMergeJoin should not buffer all matching right side rows when bound condition is empty, this is unnecessary and can lead to performance degradation especially when spilling happens.

### Why are the changes needed?

Performance improvement.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

New UT.

Closes #29727 from peter-toth/SPARK-32730-improve-leftsemi-sortmergejoin-followup.

Authored-by: Peter Toth <peter.toth@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/joins/SortMergeJoinExec.scala     | 3 ++-
 .../src/test/scala/org/apache/spark/sql/JoinSuite.scala   | 8 ++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index 097ea61f13832..6e59ad07d7168 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -288,7 +288,8 @@ case class SortMergeJoinExec(
               RowIterator.fromScala(rightIter),
               inMemoryThreshold,
               spillThreshold,
-              cleanupResources
+              cleanupResources,
+              condition.isEmpty
             )
             private[this] val joinRow = new JoinedRow
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index 942cf24a3a873..8755dccb801c2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -757,6 +757,14 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
         )
       }
 
+      // LEFT ANTI JOIN without bound condition does not spill
+      assertNotSpilled(sparkContext, "left anti join") {
+        checkAnswer(
+          sql("SELECT * FROM testData LEFT ANTI JOIN testData2 ON key = a WHERE key = 2"),
+          Nil
+        )
+      }
+
       val expected = new ListBuffer[Row]()
       expected.append(
         Row(1, "1", 1, 1), Row(1, "1", 1, 2),

From f6322d1cb149983fbcd5b90a804eeda0fe4e8a49 Mon Sep 17 00:00:00 2001
From: "Rohit.Mishra" <rohit.mishra@utopusinsights.com>
Date: Fri, 11 Sep 2020 10:38:01 -0500
Subject: [PATCH 0006/1009] [SPARK-32180][PYTHON][DOCS] Installation page of
 Getting Started in PySpark documentation

### What changes were proposed in this pull request?
This PR proposes to add getting started- installation to new PySpark docs.

### Why are the changes needed?
Better documentation.

### Does this PR introduce _any_ user-facing change?
No. Documentation only.

### How was this patch tested?
Generating documents locally.

Closes #29640 from rohitmishr1484/SPARK-32180-Getting-Started-Installation.

Authored-by: Rohit.Mishra <rohit.mishra@utopusinsights.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 python/docs/source/getting_started/index.rst  |   3 +
 .../source/getting_started/installation.rst   | 114 ++++++++++++++++++
 2 files changed, 117 insertions(+)
 create mode 100644 python/docs/source/getting_started/installation.rst

diff --git a/python/docs/source/getting_started/index.rst b/python/docs/source/getting_started/index.rst
index cf4f7de11dbe3..0f3cea7d6ea58 100644
--- a/python/docs/source/getting_started/index.rst
+++ b/python/docs/source/getting_started/index.rst
@@ -20,7 +20,10 @@
 Getting Started
 ===============
 
+This page summarizes the basic steps required to setup and get started with PySpark.
+
 .. toctree::
     :maxdepth: 2
 
+    installation
     quickstart
diff --git a/python/docs/source/getting_started/installation.rst b/python/docs/source/getting_started/installation.rst
new file mode 100644
index 0000000000000..a2de0b2e2c9f4
--- /dev/null
+++ b/python/docs/source/getting_started/installation.rst
@@ -0,0 +1,114 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+============
+Installation
+============
+
+Official releases are available from the `Apache Spark website <https://spark.apache.org/downloads.html>`_.
+Alternatively, you can install it via ``pip`` from PyPI.  PyPI installation is usually for standalone
+locally or as a client to connect to a cluster instead of setting a cluster up.  
+ 
+This page includes the instructions for installing PySpark by using pip, Conda, downloading manually, and building it from the source.
+
+Python Version Supported
+------------------------
+
+Python 3.6 and above.
+
+Using PyPI
+----------
+
+PySpark installation using `PyPI <https://pypi.org/project/pyspark/>`_
+
+.. code-block:: bash
+
+    pip install pyspark
+	
+Using Conda  
+-----------
+
+Conda is an open-source package management and environment management system which is a part of the `Anaconda <https://docs.continuum.io/anaconda/>`_ distribution. It is both cross-platform and language agnostic.
+  
+Conda can be used to create a virtual environment from terminal as shown below:
+
+.. code-block:: bash
+
+    conda create -n pyspark_env 
+
+After the virtual environment is created, it should be visible under the list of Conda environments which can be seen using the following command:
+
+.. code-block:: bash
+
+    conda env list
+
+The newly created environment can be accessed using the following command:
+
+.. code-block:: bash
+
+    conda activate pyspark_env
+
+In Conda version earlier than 4.4, the following command should be used:
+
+.. code-block:: bash
+
+    source activate pyspark_env
+
+Refer to `Using PyPI <#using-pypi>`_ to install PySpark in the newly created environment.
+
+Note that `PySpark at Conda <https://anaconda.org/conda-forge/pyspark>`_ is available but not necessarily synced with PySpark release cycle because it is maintained by the community separately.
+
+Official Release Channel
+------------------------
+
+Different flavors of PySpark is available in the `official release channel <https://spark.apache.org/downloads.html>`_.
+Any suitable version can be downloaded and extracted as below:
+
+.. code-block:: bash
+
+    tar xzvf spark-3.0.0-bin-hadoop2.7.tgz
+
+Ensure the `SPARK_HOME` environment variable points to the directory where the code has been extracted. 
+Define `PYTHONPATH` such that it can find the PySpark and Py4J under `SPARK_HOME/python/lib`. 
+One example of doing this is shown below:
+
+.. code-block:: bash
+
+    cd spark-3.0.0-bin-hadoop2.7
+    export SPARK_HOME=`pwd`
+    export PYTHONPATH=$(ZIPS=("$SPARK_HOME"/python/lib/*.zip); IFS=:; echo "${ZIPS[*]}"):$PYTHONPATH
+
+Installing from Source
+----------------------
+
+To install PySpark from source, refer to `Building Spark <https://spark.apache.org/docs/latest/building-spark.html>`_.
+
+Refer to `Official Release Channel <#official-release-channel>`_ for steps to define ``PYTHONPATH``. 
+
+Dependencies
+------------
+============= ========================= ================
+Package       Minimum supported version Note
+============= ========================= ================
+`pandas`      0.23.2                    Optional for SQL
+`NumPy`       1.7                       Required for ML 
+`pyarrow`     0.15.1                    Optional for SQL
+`Py4J`        0.10.9                    Required
+============= ========================= ================
+
+**Note**: PySpark requires Java 8 or later with ``JAVA_HOME`` properly set.  
+If using JDK 11, set ``-Dio.netty.tryReflectionSetAccessible=true`` for Arrow related features and refer to `Downloading <https://spark.apache.org/docs/latest/#downloading>`_
\ No newline at end of file

From b4be6a6d12bf62f02cffe0bcc97ef32d27827d57 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 11 Sep 2020 11:48:34 -0700
Subject: [PATCH 0007/1009] [SPARK-32845][SS][TESTS] Add sinkParameter to check
 sink options robustly in DataStreamReaderWriterSuite

### What changes were proposed in this pull request?

This PR aims to add `sinkParameter`  to check sink options robustly and independently in DataStreamReaderWriterSuite

### Why are the changes needed?

`LastOptions.parameters` is designed to catch three cases: `sourceSchema`, `createSource`, `createSink`. However, `StreamQuery.stop` invokes `queryExecutionThread.join`, `runStream`, `createSource` immediately and reset the stored options by `createSink`.

To catch `createSink` options, currently, the test suite is trying a workaround pattern. However, we observed a flakiness in this pattern sometimes. If we split `createSink` option separately, we don't need this workaround and can eliminate this flakiness.

```scala
val query = df.writeStream.
   ...
   .start()
assert(LastOptions.paramters(..))
query.stop()
```

### Does this PR introduce _any_ user-facing change?

No. This is a test-only change.

### How was this patch tested?

Pass the newly updated test case.

Closes #29730 from dongjoon-hyun/SPARK-32845.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../test/DataStreamReaderWriterSuite.scala    | 29 ++++++++++---------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
index a59eca25fe28e..8d39704c61d4e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
@@ -43,11 +43,13 @@ object LastOptions {
   var mockStreamSourceProvider = mock(classOf[StreamSourceProvider])
   var mockStreamSinkProvider = mock(classOf[StreamSinkProvider])
   var parameters: Map[String, String] = null
+  var sinkParameters: Map[String, String] = null
   var schema: Option[StructType] = null
   var partitionColumns: Seq[String] = Nil
 
   def clear(): Unit = {
     parameters = null
+    sinkParameters = null
     schema = null
     partitionColumns = null
     reset(mockStreamSourceProvider)
@@ -101,7 +103,7 @@ class DefaultSource extends StreamSourceProvider with StreamSinkProvider {
       parameters: Map[String, String],
       partitionColumns: Seq[String],
       outputMode: OutputMode): Sink = {
-    LastOptions.parameters = parameters
+    LastOptions.sinkParameters = parameters
     LastOptions.partitionColumns = partitionColumns
     LastOptions.mockStreamSinkProvider.createSink(spark, parameters, partitionColumns, outputMode)
     (_: Long, _: DataFrame) => {}
@@ -170,20 +172,19 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
 
     LastOptions.clear()
 
-    val query = df.writeStream
+    df.writeStream
       .format("org.apache.spark.sql.streaming.test")
       .option("opt1", "5")
       .options(Map("opt2" -> "4"))
       .options(map)
       .option("checkpointLocation", newMetadataDir)
       .start()
+      .stop()
 
-    assert(LastOptions.parameters("opt1") == "5")
-    assert(LastOptions.parameters("opt2") == "4")
-    assert(LastOptions.parameters("opt3") == "3")
-    assert(LastOptions.parameters.contains("checkpointLocation"))
-
-    query.stop()
+    assert(LastOptions.sinkParameters("opt1") == "5")
+    assert(LastOptions.sinkParameters("opt2") == "4")
+    assert(LastOptions.sinkParameters("opt3") == "3")
+    assert(LastOptions.sinkParameters.contains("checkpointLocation"))
   }
 
   test("SPARK-32832: later option should override earlier options for load()") {
@@ -204,7 +205,7 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
       .load()
     assert(LastOptions.parameters.isEmpty)
 
-    val query = ds.writeStream
+    ds.writeStream
       .format("org.apache.spark.sql.streaming.test")
       .option("checkpointLocation", newMetadataDir)
       .option("paTh", "1")
@@ -213,8 +214,8 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
       .option("patH", "4")
       .option("path", "5")
       .start()
-    assert(LastOptions.parameters("path") == "5")
-    query.stop()
+      .stop()
+    assert(LastOptions.sinkParameters("path") == "5")
   }
 
   test("partitioning") {
@@ -787,13 +788,13 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
       withTempDir { checkpointPath =>
         withSQLConf(SQLConf.LEGACY_PATH_OPTION_BEHAVIOR.key -> "true",
           SQLConf.CHECKPOINT_LOCATION.key -> checkpointPath.getAbsolutePath) {
-          val query = df.writeStream
+          df.writeStream
             .format("org.apache.spark.sql.streaming.test")
             .option("path", "tmp4")
             .start("tmp5")
+            .stop()
           // The legacy behavior overwrites the path option.
-          assert(LastOptions.parameters("path") == "tmp5")
-          query.stop()
+          assert(LastOptions.sinkParameters("path") == "tmp5")
         }
       }
     }

From 4269c2c252d5eecf6a861160556026ee399ad976 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Sat, 12 Sep 2020 08:42:07 +0900
Subject: [PATCH 0008/1009] [SPARK-32851][SQL][TEST] Tests should fail if
 errors happen when generating projection code

### What changes were proposed in this pull request?

This PR intends to set `CODEGEN_ONLY` at `CODEGEN_FACTORY_MODE` in test spark context so that tests can fail if errors happen when generating expr code.

### Why are the changes needed?

I noticed that the code generation of `SafeProjection` failed in the existing test (https://issues.apache.org/jira/browse/SPARK-32828) but it passed because `FALLBACK` was set at `CODEGEN_FACTORY_MODE` (by default) in `SharedSparkSession`. To get aware of these failures quickly, I think its worth setting `CODEGEN_ONLY` at `CODEGEN_FACTORY_MODE`.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #29721 from maropu/ExprCodegenTest.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../scala/org/apache/spark/sql/test/SharedSparkSession.scala    | 2 ++
 .../test/scala/org/apache/spark/sql/hive/test/TestHive.scala    | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
index ee29b4b8fb32b..cfc92a780308d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
@@ -25,6 +25,7 @@ import org.scalatest.concurrent.Eventually
 import org.apache.spark.{DebugFilesystem, SparkConf}
 import org.apache.spark.internal.config.UNSAFE_EXCEPTION_ON_MEMORY_LEAK
 import org.apache.spark.sql.{SparkSession, SQLContext}
+import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode
 import org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation
 import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 
@@ -67,6 +68,7 @@ trait SharedSparkSessionBase
       .set("spark.hadoop.fs.file.impl", classOf[DebugFilesystem].getName)
       .set(UNSAFE_EXCEPTION_ON_MEMORY_LEAK, true)
       .set(SQLConf.CODEGEN_FALLBACK.key, "false")
+      .set(SQLConf.CODEGEN_FACTORY_MODE.key, CodegenObjectFactoryMode.CODEGEN_ONLY.toString)
       // Disable ConvertToLocalRelation for better test coverage. Test cases built on
       // LocalRelation will exercise the optimization rules better by disabling it as
       // this rule may potentially block testing of other optimization rules such as
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
index a2518e70a013b..f98534eb2b543 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -38,6 +38,7 @@ import org.apache.spark.internal.config.UI._
 import org.apache.spark.sql.{DataFrame, Dataset, SparkSession, SQLContext}
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener
+import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode
 import org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation}
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
@@ -58,6 +59,7 @@ object TestHive
       new SparkConf()
         .set("spark.sql.test", "")
         .set(SQLConf.CODEGEN_FALLBACK.key, "false")
+        .set(SQLConf.CODEGEN_FACTORY_MODE.key, CodegenObjectFactoryMode.CODEGEN_ONLY.toString)
         .set(HiveUtils.HIVE_METASTORE_BARRIER_PREFIXES.key,
           "org.apache.spark.sql.hive.execution.PairSerDe")
         .set(WAREHOUSE_PATH.key, TestHiveContext.makeWarehouseDir().toURI.getPath)

From ce566bed17f94ac3443ebed82ad406b43dbb13c2 Mon Sep 17 00:00:00 2001
From: Sean Owen <srowen@gmail.com>
Date: Fri, 11 Sep 2020 20:08:22 -0700
Subject: [PATCH 0009/1009] [SPARK-32180][FOLLOWUP] Fix .rst error in new
 Pyspark installation guide

This simply fixes an .rst generation error in https://github.com/apache/spark/pull/29640

Closes #29735 from srowen/SPARK-32180.2.

Authored-by: Sean Owen <srowen@gmail.com>
Signed-off-by: Takuya UESHIN <ueshin@databricks.com>
---
 python/docs/source/getting_started/installation.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/docs/source/getting_started/installation.rst b/python/docs/source/getting_started/installation.rst
index a2de0b2e2c9f4..914045e898b2d 100644
--- a/python/docs/source/getting_started/installation.rst
+++ b/python/docs/source/getting_started/installation.rst
@@ -75,7 +75,7 @@ Note that `PySpark at Conda <https://anaconda.org/conda-forge/pyspark>`_ is avai
 Official Release Channel
 ------------------------
 
-Different flavors of PySpark is available in the `official release channel <https://spark.apache.org/downloads.html>`_.
+Different flavors of PySpark are available in the `Apache Spark website <https://spark.apache.org/downloads.html>`_.
 Any suitable version can be downloaded and extracted as below:
 
 .. code-block:: bash
@@ -97,7 +97,7 @@ Installing from Source
 
 To install PySpark from source, refer to `Building Spark <https://spark.apache.org/docs/latest/building-spark.html>`_.
 
-Refer to `Official Release Channel <#official-release-channel>`_ for steps to define ``PYTHONPATH``. 
+Refer to `steps above <#official-release-channel>`_ to define ``PYTHONPATH``.
 
 Dependencies
 ------------

From 2009f953406aa5b4fdcdcd35f4c7c143f34d53e3 Mon Sep 17 00:00:00 2001
From: "sandeep.katta" <sandeep.katta2007@gmail.com>
Date: Sat, 12 Sep 2020 13:22:54 -0700
Subject: [PATCH 0010/1009] [SPARK-32779][SQL][FOLLOW-UP] Delete Unused code

### What changes were proposed in this pull request?
Follow-up PR as per the review comments in [29649](https://github.com/apache/spark/pull/29649/files/8d45542e915bea1b321f42988b407091065a2539#r487140171)

### Why are the changes needed?
Delete the un used code

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing UT

Closes #29736 from sandeep-katta/deadlockfollowup.

Authored-by: sandeep.katta <sandeep.katta2007@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../main/scala/org/apache/spark/sql/hive/client/HiveShim.scala  | 2 --
 1 file changed, 2 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index 99c9199e466f9..4ab0599e4477b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -1327,8 +1327,6 @@ private[client] class Shim_v3_0 extends Shim_v2_3 {
       inheritTableSpecs: Boolean,
       isSkewedStoreAsSubdir: Boolean,
       isSrcLocal: Boolean): Unit = {
-    val session = SparkSession.getActiveSession
-    assert(session.nonEmpty)
     val table = hive.getTable(tableName)
     val loadFileType = if (replace) {
       clazzLoadFileType.getEnumConstants.find(_.toString.equalsIgnoreCase("REPLACE_ALL"))

From bbbd907780cbd07507619bcc6d309e544e0c3471 Mon Sep 17 00:00:00 2001
From: KevinSmile <kevinwang013@hotmail.com>
Date: Sat, 12 Sep 2020 16:12:37 -0500
Subject: [PATCH 0011/1009] [SPARK-32804][LAUNCHER] Fix run-example command
 builder bug

### What changes were proposed in this pull request?

Bug fix in run-example command builder (as described in [SPARK-32804], run-example failed in standalone-cluster mode):
1. Missing primaryResource arg.
2. Wrong appResource arg.

which will affect `SparkSubmit` in Standalone-Cluster mode:
https://github.com/apache/spark/blob/32d87c2b595b4aac2d9274424a43697299638f61/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala#L695-L696

and get error at:
https://github.com/apache/spark/blob/f55694638d45f34ab91f6f6ec2066cbf7631f4af/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala#L74-L89

### Why are the changes needed?

Bug: run-example failed in standalone-cluster mode

### Does this PR introduce _any_ user-facing change?

Yes. User can run-example in standalone-cluster mode now.

### How was this patch tested?

New ut added.
Also it's a user-facing bug, so better re-check the real case in [SPARK-32804].

Closes #29653 from KevinSmile/bug-fix-master.

Authored-by: KevinSmile <kevinwang013@hotmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../launcher/SparkSubmitCommandBuilder.java    | 15 +++++++++++++--
 .../SparkSubmitCommandBuilderSuite.java        | 18 ++++++++++++++++++
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
index 383c3f60a595b..43e7f8debe17d 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
@@ -139,7 +139,7 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder {
 
         case RUN_EXAMPLE:
           isExample = true;
-          appResource = SparkLauncher.NO_RESOURCE;
+          appResource = findExamplesAppJar();
           submitArgs = args.subList(1, args.size());
       }
 
@@ -241,9 +241,11 @@ List<String> buildSparkSubmitArgs() {
     }
 
     args.addAll(parsedArgs);
+
     if (appResource != null) {
       args.add(appResource);
     }
+
     args.addAll(appArgs);
 
     return args;
@@ -401,6 +403,15 @@ private boolean isThriftServer(String mainClass) {
       mainClass.equals("org.apache.spark.sql.hive.thriftserver.HiveThriftServer2"));
   }
 
+  private String findExamplesAppJar() {
+    for (String exampleJar : findExamplesJars()) {
+      if (new File(exampleJar).getName().startsWith("spark-examples")) {
+        return exampleJar;
+      }
+    }
+    throw new IllegalStateException("Failed to find examples' main app jar.");
+  }
+
   private List<String> findExamplesJars() {
     boolean isTesting = "1".equals(getenv("SPARK_TESTING"));
     List<String> examplesJars = new ArrayList<>();
@@ -513,7 +524,7 @@ protected boolean handleUnknown(String opt) {
           className = EXAMPLE_CLASS_PREFIX + className;
         }
         mainClass = className;
-        appResource = SparkLauncher.NO_RESOURCE;
+        appResource = findExamplesAppJar();
         return false;
       } else if (errorOnUnknownArgs) {
         checkArgument(!opt.startsWith("-"), "Unrecognized option: %s", opt);
diff --git a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
index 752e8d4c23f8b..6cd089e256b93 100644
--- a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
+++ b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
@@ -245,6 +245,24 @@ public void testExamplesRunner() throws Exception {
     assertEquals("42", cmd.get(cmd.size() - 1));
   }
 
+  @Test
+  public void testExamplesRunnerPrimaryResource() throws Exception {
+    List<String> sparkSubmitArgs = Arrays.asList(
+            SparkSubmitCommandBuilder.RUN_EXAMPLE,
+            parser.MASTER + "=foo",
+            parser.DEPLOY_MODE + "=cluster",
+            "SparkPi",
+            "100");
+
+    List<String> cmd = newCommandBuilder(sparkSubmitArgs).buildSparkSubmitArgs();
+    assertEquals(SparkSubmitCommandBuilder.EXAMPLE_CLASS_PREFIX + "SparkPi",
+            findArgValue(cmd, parser.CLASS));
+    assertEquals("cluster", findArgValue(cmd, parser.DEPLOY_MODE));
+    String primaryResource = cmd.get(cmd.size() - 2);
+    assertTrue(new File(primaryResource).getName().startsWith("spark-examples"));
+    assertFalse(cmd.contains(SparkLauncher.NO_RESOURCE));
+  }
+
   @Test(expected = IllegalArgumentException.class)
   public void testMissingAppResource() {
     new SparkSubmitCommandBuilder().buildSparkSubmitArgs();

From 3be552ccc8d26089881229edc034d2ebf2e75511 Mon Sep 17 00:00:00 2001
From: Karol Chmist <info+github@chmist.com>
Date: Sat, 12 Sep 2020 18:15:15 -0500
Subject: [PATCH 0012/1009] [SPARK-30090][SHELL] Adapt Spark REPL to Scala 2.13

### What changes were proposed in this pull request?

This is an attempt to adapt Spark REPL to Scala 2.13.

It is based on a [scala-2.13 branch](https://github.com/smarter/spark/tree/scala-2.13) made by smarter.

I had to set Scala version to 2.13 in some places, and to adapt some other modules, before I could start working on the REPL itself. These are separate commits on the branch that probably would be fixed beforehand, and thus dropped before the merge of this PR.

I couldn't find a way to run the initialization code with existing REPL classes in Scala 2.13.2, so I [modified REPL in Scala](https://github.com/karolchmist/scala/commit/e9cc0dd54787351587237bbbee37d23ee744894c) to make it work. With this modification I managed to run Spark Shell, along with the units tests passing, which is good news.

The bad news is that it requires an upstream change in Scala, which must be accepted first. I'd be happy to change it if someone points a way to do it differently. If not, I'd propose a PR in Scala to introduce `ILoop.internalReplAutorunCode`.

### Why are the changes needed?

REPL in Scala changed quite a lot, so current version of Spark REPL needed to be adapted.

### Does this PR introduce _any_ user-facing change?

In the previous version of `SparkILoop`, a lot of Scala's `ILoop` code was [overridden and duplicated](https://github.com/apache/spark/commit/2bc7b75537ec81184048738883b282e257cc58de) to make the welcome message a bit more pleasant. In this PR, the message is in a bit different order, but it's still acceptable IMHO.

Before this PR:
```
20/05/15 15:32:39 WARN Utils: Your hostname, hermes resolves to a loopback address: 127.0.1.1; using 192.168.1.28 instead (on interface enp0s31f6)
20/05/15 15:32:39 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
20/05/15 15:32:39 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
20/05/15 15:32:45 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.
Spark context Web UI available at http://192.168.1.28:4041
Spark context available as 'sc' (master = local[*], app id = local-1589549565502).
Spark session available as 'spark'.
Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /___/ .__/\_,_/_/ /_/\_\   version 3.0.1-SNAPSHOT
      /_/

Using Scala version 2.12.10 (OpenJDK 64-Bit Server VM, Java 1.8.0_242)
Type in expressions to have them evaluated.
Type :help for more information.

scala>
```

With this PR:
```
20/05/15 15:32:15 WARN Utils: Your hostname, hermes resolves to a loopback address: 127.0.1.1; using 192.168.1.28 instead (on interface enp0s31f6)
20/05/15 15:32:15 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
20/05/15 15:32:15 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /___/ .__/\_,_/_/ /_/\_\   version 3.0.0-SNAPSHOT
      /_/

Using Scala version 2.13.2-20200422-211118-706ef1b (OpenJDK 64-Bit Server VM, Java 1.8.0_242)
Type in expressions to have them evaluated.
Type :help for more information.
Spark context Web UI available at http://192.168.1.28:4040
Spark context available as 'sc' (master = local[*], app id = local-1589549541259).
Spark session available as 'spark'.

scala>
```

It seems that currently the welcoming message is still an improvement from [the original ticket](https://issues.apache.org/jira/browse/SPARK-24785), albeit in a different order. As a bonus, some fragile code duplication was removed.

### How was this patch tested?

Existing tests pass in `repl`module. The REPL runs in a terminal and the following code executed correctly:

```
scala> spark.range(1000 * 1000 * 1000).count()
val res0: Long = 1000000000
```

Closes #28545 from karolchmist/scala-2.13-repl.

Authored-by: Karol Chmist <info+github@chmist.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../org/apache/spark/repl/Main.scala          |   0
 .../org/apache/spark/repl/SparkILoop.scala    |   0
 .../org/apache/spark/repl/Main.scala          | 138 ++++++++++++++
 .../org/apache/spark/repl/SparkILoop.scala    | 149 +++++++++++++++
 .../org/apache/spark/repl/Repl2Suite.scala    |  58 ++++++
 .../spark/repl/SingletonRepl2Suite.scala      | 171 ++++++++++++++++++
 .../org/apache/spark/repl/Repl2Suite.scala    |  53 ++++++
 .../spark/repl/SingletonRepl2Suite.scala      | 171 ++++++++++++++++++
 .../org/apache/spark/repl/ReplSuite.scala     |  27 ---
 .../spark/repl/SingletonReplSuite.scala       |  61 -------
 .../catalyst/util/CaseInsensitiveMap.scala    |   2 +-
 11 files changed, 741 insertions(+), 89 deletions(-)
 rename repl/src/main/{scala => scala-2.12}/org/apache/spark/repl/Main.scala (100%)
 rename repl/src/main/{scala => scala-2.12}/org/apache/spark/repl/SparkILoop.scala (100%)
 create mode 100644 repl/src/main/scala-2.13/org/apache/spark/repl/Main.scala
 create mode 100644 repl/src/main/scala-2.13/org/apache/spark/repl/SparkILoop.scala
 create mode 100644 repl/src/test/scala-2.12/org/apache/spark/repl/Repl2Suite.scala
 create mode 100644 repl/src/test/scala-2.12/org/apache/spark/repl/SingletonRepl2Suite.scala
 create mode 100644 repl/src/test/scala-2.13/org/apache/spark/repl/Repl2Suite.scala
 create mode 100644 repl/src/test/scala-2.13/org/apache/spark/repl/SingletonRepl2Suite.scala

diff --git a/repl/src/main/scala/org/apache/spark/repl/Main.scala b/repl/src/main/scala-2.12/org/apache/spark/repl/Main.scala
similarity index 100%
rename from repl/src/main/scala/org/apache/spark/repl/Main.scala
rename to repl/src/main/scala-2.12/org/apache/spark/repl/Main.scala
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/src/main/scala-2.12/org/apache/spark/repl/SparkILoop.scala
similarity index 100%
rename from repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
rename to repl/src/main/scala-2.12/org/apache/spark/repl/SparkILoop.scala
diff --git a/repl/src/main/scala-2.13/org/apache/spark/repl/Main.scala b/repl/src/main/scala-2.13/org/apache/spark/repl/Main.scala
new file mode 100644
index 0000000000000..95115934ed1d6
--- /dev/null
+++ b/repl/src/main/scala-2.13/org/apache/spark/repl/Main.scala
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.repl
+
+import java.io.File
+import java.net.URI
+import java.util.Locale
+
+import scala.tools.nsc.GenericRunnerSettings
+
+import org.apache.spark._
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
+import org.apache.spark.util.Utils
+
+object Main extends Logging {
+
+  initializeLogIfNecessary(true)
+  Signaling.cancelOnInterrupt()
+
+  val conf = new SparkConf()
+  val rootDir =
+    conf.getOption("spark.repl.classdir").getOrElse(Utils.getLocalDir(conf))
+  val outputDir = Utils.createTempDir(root = rootDir, namePrefix = "repl")
+
+  var sparkContext: SparkContext = _
+  var sparkSession: SparkSession = _
+  // this is a public var because tests reset it.
+  var interp: SparkILoop = _
+
+  private var hasErrors = false
+  private var isShellSession = false
+
+  private def scalaOptionError(msg: String): Unit = {
+    hasErrors = true
+    // scalastyle:off println
+    Console.err.println(msg)
+    // scalastyle:on println
+  }
+
+  def main(args: Array[String]): Unit = {
+    isShellSession = true
+    doMain(args, new SparkILoop)
+  }
+
+  // Visible for testing
+  private[repl] def doMain(args: Array[String], _interp: SparkILoop): Unit = {
+    interp = _interp
+    val jars = Utils
+      .getLocalUserJarsForShell(conf)
+      // Remove file:///, file:// or file:/ scheme if exists for each jar
+      .map { x =>
+        if (x.startsWith("file:")) new File(new URI(x)).getPath else x
+      }
+      .mkString(File.pathSeparator)
+    val interpArguments = List(
+      "-Yrepl-class-based",
+      "-Yrepl-outdir",
+      s"${outputDir.getAbsolutePath}",
+      "-classpath",
+      jars
+    ) ++ args.toList
+
+    val settings = new GenericRunnerSettings(scalaOptionError)
+    settings.processArguments(interpArguments, true)
+
+    if (!hasErrors) {
+      interp.run(settings) // Repl starts and goes in loop of R.E.P.L
+      Option(sparkContext).foreach(_.stop)
+    }
+  }
+
+  def createSparkSession(): SparkSession = {
+    try {
+      val execUri = System.getenv("SPARK_EXECUTOR_URI")
+      conf.setIfMissing("spark.app.name", "Spark shell")
+      // SparkContext will detect this configuration and register it with the RpcEnv's
+      // file server, setting spark.repl.class.uri to the actual URI for executors to
+      // use. This is sort of ugly but since executors are started as part of SparkContext
+      // initialization in certain cases, there's an initialization order issue that prevents
+      // this from being set after SparkContext is instantiated.
+      conf.set("spark.repl.class.outputDir", outputDir.getAbsolutePath())
+      if (execUri != null) {
+        conf.set("spark.executor.uri", execUri)
+      }
+      if (System.getenv("SPARK_HOME") != null) {
+        conf.setSparkHome(System.getenv("SPARK_HOME"))
+      }
+
+      val builder = SparkSession.builder.config(conf)
+      if (conf
+            .get(CATALOG_IMPLEMENTATION.key, "hive")
+            .toLowerCase(Locale.ROOT) == "hive") {
+        if (SparkSession.hiveClassesArePresent) {
+          // In the case that the property is not set at all, builder's config
+          // does not have this value set to 'hive' yet. The original default
+          // behavior is that when there are hive classes, we use hive catalog.
+          sparkSession = builder.enableHiveSupport().getOrCreate()
+          logInfo("Created Spark session with Hive support")
+        } else {
+          // Need to change it back to 'in-memory' if no hive classes are found
+          // in the case that the property is set to hive in spark-defaults.conf
+          builder.config(CATALOG_IMPLEMENTATION.key, "in-memory")
+          sparkSession = builder.getOrCreate()
+          logInfo("Created Spark session")
+        }
+      } else {
+        // In the case that the property is set but not to 'hive', the internal
+        // default is 'in-memory'. So the sparkSession will use in-memory catalog.
+        sparkSession = builder.getOrCreate()
+        logInfo("Created Spark session")
+      }
+      sparkContext = sparkSession.sparkContext
+      sparkSession
+    } catch {
+      case e: Exception if isShellSession =>
+        logError("Failed to initialize Spark session.", e)
+        sys.exit(1)
+    }
+  }
+
+}
diff --git a/repl/src/main/scala-2.13/org/apache/spark/repl/SparkILoop.scala b/repl/src/main/scala-2.13/org/apache/spark/repl/SparkILoop.scala
new file mode 100644
index 0000000000000..861cf5a740ce1
--- /dev/null
+++ b/repl/src/main/scala-2.13/org/apache/spark/repl/SparkILoop.scala
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.repl
+
+import java.io.{BufferedReader, PrintWriter}
+
+// scalastyle:off println
+import scala.Predef.{println => _, _}
+import scala.tools.nsc.GenericRunnerSettings
+import scala.tools.nsc.Settings
+import scala.tools.nsc.interpreter.shell.{ILoop, ShellConfig}
+import scala.tools.nsc.util.stringFromStream
+import scala.util.Properties.{javaVersion, javaVmName, versionString}
+// scalastyle:on println
+
+/**
+ *  A Spark-specific interactive shell.
+ */
+class SparkILoop(in0: BufferedReader, out: PrintWriter)
+  extends ILoop(ShellConfig(new GenericRunnerSettings(_ => ())), in0, out) {
+  def this() = this(null, new PrintWriter(Console.out, true))
+
+  val initializationCommands: Seq[String] = Seq(
+    """
+    @transient val spark = if (org.apache.spark.repl.Main.sparkSession != null) {
+        org.apache.spark.repl.Main.sparkSession
+      } else {
+        org.apache.spark.repl.Main.createSparkSession()
+      }
+    @transient val sc = {
+      val _sc = spark.sparkContext
+      if (_sc.getConf.getBoolean("spark.ui.reverseProxy", false)) {
+        val proxyUrl = _sc.getConf.get("spark.ui.reverseProxyUrl", null)
+        if (proxyUrl != null) {
+          println(
+            s"Spark Context Web UI is available at ${proxyUrl}/proxy/${_sc.applicationId}")
+        } else {
+          println(s"Spark Context Web UI is available at Spark Master Public URL")
+        }
+      } else {
+        _sc.uiWebUrl.foreach {
+          webUrl => println(s"Spark context Web UI available at ${webUrl}")
+        }
+      }
+      println("Spark context available as 'sc' " +
+        s"(master = ${_sc.master}, app id = ${_sc.applicationId}).")
+      println("Spark session available as 'spark'.")
+      _sc
+    }
+    """,
+    "import org.apache.spark.SparkContext._",
+    "import spark.implicits._",
+    "import spark.sql",
+    "import org.apache.spark.sql.functions._"
+  )
+
+  override protected def internalReplAutorunCode(): Seq[String] =
+    initializationCommands
+
+  def initializeSpark(): Unit = {
+    if (!intp.reporter.hasErrors) {
+      // `savingReplayStack` removes the commands from session history.
+      savingReplayStack {
+        initializationCommands.foreach(intp quietRun _)
+      }
+    } else {
+      throw new RuntimeException(
+        s"Scala $versionString interpreter encountered " +
+          "errors during initialization"
+      )
+    }
+  }
+
+  /** Print a welcome message */
+  override def printWelcome(): Unit = {
+    import org.apache.spark.SPARK_VERSION
+    echo("""Welcome to
+      ____              __
+     / __/__  ___ _____/ /__
+    _\ \/ _ \/ _ `/ __/  '_/
+   /___/ .__/\_,_/_/ /_/\_\   version %s
+      /_/
+         """.format(SPARK_VERSION))
+    val welcomeMsg = "Using Scala %s (%s, Java %s)".format(
+      versionString,
+      javaVmName,
+      javaVersion
+    )
+    echo(welcomeMsg)
+    echo("Type in expressions to have them evaluated.")
+    echo("Type :help for more information.")
+  }
+
+  /** Available commands */
+  override def commands: List[LoopCommand] = standardCommands
+
+  override def resetCommand(line: String): Unit = {
+    super.resetCommand(line)
+    initializeSpark()
+    echo(
+      "Note that after :reset, state of SparkSession and SparkContext is unchanged."
+    )
+  }
+
+  override def replay(): Unit = {
+    initializeSpark()
+    super.replay()
+  }
+}
+
+object SparkILoop {
+
+  /**
+   * Creates an interpreter loop with default settings and feeds
+   * the given code to it as input.
+   */
+  def run(code: String, sets: Settings = new Settings): String = {
+    import java.io.{BufferedReader, StringReader, OutputStreamWriter}
+
+    stringFromStream { ostream =>
+      Console.withOut(ostream) {
+        val input = new BufferedReader(new StringReader(code))
+        val output = new PrintWriter(new OutputStreamWriter(ostream), true)
+        val repl = new SparkILoop(input, output)
+
+        if (sets.classpath.isDefault) {
+          sets.classpath.value = sys.props("java.class.path")
+        }
+        repl.run(sets)
+      }
+    }
+  }
+  def run(lines: List[String]): String = run(lines.map(_ + "\n").mkString)
+}
diff --git a/repl/src/test/scala-2.12/org/apache/spark/repl/Repl2Suite.scala b/repl/src/test/scala-2.12/org/apache/spark/repl/Repl2Suite.scala
new file mode 100644
index 0000000000000..4ffa8beaf4740
--- /dev/null
+++ b/repl/src/test/scala-2.12/org/apache/spark/repl/Repl2Suite.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.repl
+
+import java.io._
+import java.nio.file.Files
+
+import scala.tools.nsc.interpreter.SimpleReader
+
+import org.apache.log4j.{Level, LogManager, PropertyConfigurator}
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.spark.{SparkContext, SparkFunSuite}
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
+
+class Repl2Suite extends SparkFunSuite with BeforeAndAfterAll {
+  test("propagation of local properties") {
+    // A mock ILoop that doesn't install the SIGINT handler.
+    class ILoop(out: PrintWriter) extends SparkILoop(None, out) {
+      settings = new scala.tools.nsc.Settings
+      settings.usejavacp.value = true
+      org.apache.spark.repl.Main.interp = this
+      in = SimpleReader()
+    }
+
+    val out = new StringWriter()
+    Main.interp = new ILoop(new PrintWriter(out))
+    Main.sparkContext = new SparkContext("local", "repl-test")
+    Main.interp.createInterpreter()
+
+    Main.sparkContext.setLocalProperty("someKey", "someValue")
+
+    // Make sure the value we set in the caller to interpret is propagated in the thread that
+    // interprets the command.
+    Main.interp.interpret("org.apache.spark.repl.Main.sparkContext.getLocalProperty(\"someKey\")")
+    assert(out.toString.contains("someValue"))
+
+    Main.sparkContext.stop()
+    System.clearProperty("spark.driver.port")
+  }
+}
diff --git a/repl/src/test/scala-2.12/org/apache/spark/repl/SingletonRepl2Suite.scala b/repl/src/test/scala-2.12/org/apache/spark/repl/SingletonRepl2Suite.scala
new file mode 100644
index 0000000000000..a4eff392a2c99
--- /dev/null
+++ b/repl/src/test/scala-2.12/org/apache/spark/repl/SingletonRepl2Suite.scala
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.repl
+
+import java.io._
+
+import org.apache.spark.SparkFunSuite
+
+/**
+ * A special test suite for REPL that all test cases share one REPL instance.
+ */
+class SingletonRepl2Suite extends SparkFunSuite {
+  private val out = new StringWriter()
+  private val in = new PipedOutputStream()
+  private var thread: Thread = _
+
+  private val CONF_EXECUTOR_CLASSPATH = "spark.executor.extraClassPath"
+  private val oldExecutorClasspath = System.getProperty(CONF_EXECUTOR_CLASSPATH)
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+
+    val classpath = System.getProperty("java.class.path")
+    System.setProperty(CONF_EXECUTOR_CLASSPATH, classpath)
+
+    Main.conf.set("spark.master", "local-cluster[2,1,1024]")
+    val interp = new SparkILoop(
+      new BufferedReader(new InputStreamReader(new PipedInputStream(in))),
+      new PrintWriter(out))
+
+    // Forces to create new SparkContext
+    Main.sparkContext = null
+    Main.sparkSession = null
+
+    // Starts a new thread to run the REPL interpreter, so that we won't block.
+    thread = new Thread(() => Main.doMain(Array("-classpath", classpath), interp))
+    thread.setDaemon(true)
+    thread.start()
+
+    waitUntil(() => out.toString.contains("Type :help for more information"))
+  }
+
+  override def afterAll(): Unit = {
+    in.close()
+    thread.join()
+    if (oldExecutorClasspath != null) {
+      System.setProperty(CONF_EXECUTOR_CLASSPATH, oldExecutorClasspath)
+    } else {
+      System.clearProperty(CONF_EXECUTOR_CLASSPATH)
+    }
+    super.afterAll()
+  }
+
+  private def waitUntil(cond: () => Boolean): Unit = {
+    import scala.concurrent.duration._
+    import org.scalatest.concurrent.Eventually._
+
+    eventually(timeout(50.seconds), interval(500.millis)) {
+      assert(cond(), "current output: " + out.toString)
+    }
+  }
+
+  /**
+   * Run the given commands string in a globally shared interpreter instance. Note that the given
+   * commands should not crash the interpreter, to not affect other test cases.
+   */
+  def runInterpreter(input: String): String = {
+    val currentOffset = out.getBuffer.length()
+    // append a special statement to the end of the given code, so that we can know what's
+    // the final output of this code snippet and rely on it to wait until the output is ready.
+    val timestamp = System.currentTimeMillis()
+    in.write((input + s"\nval _result_$timestamp = 1\n").getBytes)
+    in.flush()
+    val stopMessage = s"_result_$timestamp: Int = 1"
+    waitUntil(() => out.getBuffer.substring(currentOffset).contains(stopMessage))
+    out.getBuffer.substring(currentOffset)
+  }
+
+  def assertContains(message: String, output: String): Unit = {
+    val isContain = output.contains(message)
+    assert(isContain,
+      "Interpreter output did not contain '" + message + "':\n" + output)
+  }
+
+  def assertDoesNotContain(message: String, output: String): Unit = {
+    val isContain = output.contains(message)
+    assert(!isContain,
+      "Interpreter output contained '" + message + "':\n" + output)
+  }
+
+  test("SPARK-31399: should clone+clean line object w/ non-serializable state in ClosureCleaner") {
+    // Test ClosureCleaner when a closure captures the enclosing `this` REPL line object, and that
+    // object contains an unused non-serializable field.
+    // Specifically, the closure in this test case contains a directly nested closure, and the
+    // capture is triggered by the inner closure.
+    // `ns` should be nulled out, but `topLevelValue` should stay intact.
+
+    // Can't use :paste mode because PipedOutputStream/PipedInputStream doesn't work well with the
+    // EOT control character (i.e. Ctrl+D).
+    // Just write things on a single line to emulate :paste mode.
+
+    // NOTE: in order for this test case to trigger the intended scenario, the following three
+    //       variables need to be in the same "input", which will make the REPL pack them into the
+    //       same REPL line object:
+    //         - ns: a non-serializable state, not accessed by the closure;
+    //         - topLevelValue: a serializable state, accessed by the closure;
+    //         - closure: the starting closure, captures the enclosing REPL line object.
+    val output = runInterpreter(
+      """
+        |class NotSerializableClass(val x: Int)
+        |val ns = new NotSerializableClass(42); val topLevelValue = "someValue"; val closure =
+        |(j: Int) => {
+        |  (1 to j).flatMap { x =>
+        |    (1 to x).map { y => y + topLevelValue }
+        |  }
+        |}
+        |val r = sc.parallelize(0 to 2).map(closure).collect
+      """.stripMargin)
+    assertContains("r: Array[scala.collection.immutable.IndexedSeq[String]] = " +
+      "Array(Vector(), Vector(1someValue), Vector(1someValue, 1someValue, 2someValue))", output)
+//    assertContains("r: Array[IndexedSeq[String]] = " +
+//      "Array(Vector(), Vector(1someValue), Vector(1someValue, 1someValue, 2someValue))", output)
+    assertDoesNotContain("Exception", output)
+  }
+
+  test("SPARK-31399: ClosureCleaner should discover indirectly nested closure in inner class") {
+    // Similar to the previous test case, but with indirect closure nesting instead.
+    // There's still nested closures involved, but the inner closure is indirectly nested in the
+    // outer closure, with a level of inner class in between them.
+    // This changes how the inner closure references/captures the outer closure/enclosing `this`
+    // REPL line object, and covers a different code path in inner closure discovery.
+
+    // `ns` should be nulled out, but `topLevelValue` should stay intact.
+
+    val output = runInterpreter(
+      """
+        |class NotSerializableClass(val x: Int)
+        |val ns = new NotSerializableClass(42); val topLevelValue = "someValue"; val closure =
+        |(j: Int) => {
+        |  class InnerFoo {
+        |    val innerClosure = (x: Int) => (1 to x).map { y => y + topLevelValue }
+        |  }
+        |  val innerFoo = new InnerFoo
+        |  (1 to j).flatMap(innerFoo.innerClosure)
+        |}
+        |val r = sc.parallelize(0 to 2).map(closure).collect
+      """.stripMargin)
+    assertContains("r: Array[scala.collection.immutable.IndexedSeq[String]] = " +
+       "Array(Vector(), Vector(1someValue), Vector(1someValue, 1someValue, 2someValue))", output)
+//    assertContains("r: Array[IndexedSeq[String]] = " +
+//       "Array(Vector(), Vector(1someValue), Vector(1someValue, 1someValue, 2someValue))", output)
+    assertDoesNotContain("Array(Vector(), Vector(1null), Vector(1null, 1null, 2null)", output)
+    assertDoesNotContain("Exception", output)
+  }
+
+ }
diff --git a/repl/src/test/scala-2.13/org/apache/spark/repl/Repl2Suite.scala b/repl/src/test/scala-2.13/org/apache/spark/repl/Repl2Suite.scala
new file mode 100644
index 0000000000000..a93284a129e28
--- /dev/null
+++ b/repl/src/test/scala-2.13/org/apache/spark/repl/Repl2Suite.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.repl
+
+import java.io._
+import java.nio.file.Files
+
+import org.apache.log4j.{Level, LogManager, PropertyConfigurator}
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.spark.{SparkContext, SparkFunSuite}
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
+
+class Repl2Suite extends SparkFunSuite with BeforeAndAfterAll {
+  test("propagation of local properties") {
+    // A mock ILoop that doesn't install the SIGINT handler.
+    class ILoop(out: PrintWriter) extends SparkILoop(null, out)
+
+    val out = new StringWriter()
+    Main.interp = new ILoop(new PrintWriter(out))
+    Main.sparkContext = new SparkContext("local", "repl-test")
+    val settings = new scala.tools.nsc.Settings
+    settings.usejavacp.value = true
+    Main.interp.createInterpreter(settings)
+
+    Main.sparkContext.setLocalProperty("someKey", "someValue")
+
+    // Make sure the value we set in the caller to interpret is propagated in the thread that
+    // interprets the command.
+    Main.interp.interpret("org.apache.spark.repl.Main.sparkContext.getLocalProperty(\"someKey\")")
+    assert(out.toString.contains("someValue"))
+
+    Main.sparkContext.stop()
+    System.clearProperty("spark.driver.port")
+  }
+}
diff --git a/repl/src/test/scala-2.13/org/apache/spark/repl/SingletonRepl2Suite.scala b/repl/src/test/scala-2.13/org/apache/spark/repl/SingletonRepl2Suite.scala
new file mode 100644
index 0000000000000..b153a0261aaf5
--- /dev/null
+++ b/repl/src/test/scala-2.13/org/apache/spark/repl/SingletonRepl2Suite.scala
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.repl
+
+import java.io._
+
+import org.apache.spark.SparkFunSuite
+
+/**
+ * A special test suite for REPL that all test cases share one REPL instance.
+ */
+class SingletonRepl2Suite extends SparkFunSuite {
+
+  private val out = new StringWriter()
+  private val in = new PipedOutputStream()
+  private var thread: Thread = _
+
+  private val CONF_EXECUTOR_CLASSPATH = "spark.executor.extraClassPath"
+  private val oldExecutorClasspath = System.getProperty(CONF_EXECUTOR_CLASSPATH)
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+
+    val classpath = System.getProperty("java.class.path")
+    System.setProperty(CONF_EXECUTOR_CLASSPATH, classpath)
+
+    Main.conf.set("spark.master", "local-cluster[2,1,1024]")
+    val interp = new SparkILoop(
+      new BufferedReader(new InputStreamReader(new PipedInputStream(in))),
+      new PrintWriter(out))
+
+    // Forces to create new SparkContext
+    Main.sparkContext = null
+    Main.sparkSession = null
+
+    // Starts a new thread to run the REPL interpreter, so that we won't block.
+    thread = new Thread(() => Main.doMain(Array("-classpath", classpath), interp))
+    thread.setDaemon(true)
+    thread.start()
+
+    waitUntil(() => out.toString.contains("Type :help for more information"))
+  }
+
+  override def afterAll(): Unit = {
+    in.close()
+    thread.join()
+    if (oldExecutorClasspath != null) {
+      System.setProperty(CONF_EXECUTOR_CLASSPATH, oldExecutorClasspath)
+    } else {
+      System.clearProperty(CONF_EXECUTOR_CLASSPATH)
+    }
+    super.afterAll()
+  }
+
+  private def waitUntil(cond: () => Boolean): Unit = {
+    import scala.concurrent.duration._
+    import org.scalatest.concurrent.Eventually._
+
+    eventually(timeout(50.seconds), interval(500.millis)) {
+      assert(cond(), "current output: " + out.toString)
+    }
+  }
+
+  /**
+   * Run the given commands string in a globally shared interpreter instance. Note that the given
+   * commands should not crash the interpreter, to not affect other test cases.
+   */
+  def runInterpreter(input: String): String = {
+    val currentOffset = out.getBuffer.length()
+    // append a special statement to the end of the given code, so that we can know what's
+    // the final output of this code snippet and rely on it to wait until the output is ready.
+    val timestamp = System.currentTimeMillis()
+    in.write((input + s"\nval _result_$timestamp = 1\n").getBytes)
+    in.flush()
+    val stopMessage = s"_result_$timestamp: Int = 1"
+    waitUntil(() => out.getBuffer.substring(currentOffset).contains(stopMessage))
+    out.getBuffer.substring(currentOffset)
+  }
+
+  def assertContains(message: String, output: String): Unit = {
+    val isContain = output.contains(message)
+    assert(isContain,
+      "Interpreter output did not contain '" + message + "':\n" + output)
+  }
+
+  def assertDoesNotContain(message: String, output: String): Unit = {
+    val isContain = output.contains(message)
+    assert(!isContain,
+      "Interpreter output contained '" + message + "':\n" + output)
+  }
+
+  test("SPARK-31399: should clone+clean line object w/ non-serializable state in ClosureCleaner") {
+    // Test ClosureCleaner when a closure captures the enclosing `this` REPL line object, and that
+    // object contains an unused non-serializable field.
+    // Specifically, the closure in this test case contains a directly nested closure, and the
+    // capture is triggered by the inner closure.
+    // `ns` should be nulled out, but `topLevelValue` should stay intact.
+
+    // Can't use :paste mode because PipedOutputStream/PipedInputStream doesn't work well with the
+    // EOT control character (i.e. Ctrl+D).
+    // Just write things on a single line to emulate :paste mode.
+
+    // NOTE: in order for this test case to trigger the intended scenario, the following three
+    //       variables need to be in the same "input", which will make the REPL pack them into the
+    //       same REPL line object:
+    //         - ns: a non-serializable state, not accessed by the closure;
+    //         - topLevelValue: a serializable state, accessed by the closure;
+    //         - closure: the starting closure, captures the enclosing REPL line object.
+    val output = runInterpreter(
+      """
+        |class NotSerializableClass(val x: Int)
+        |val ns = new NotSerializableClass(42); val topLevelValue = "someValue"; val closure =
+        |(j: Int) => {
+        |  (1 to j).flatMap { x =>
+        |    (1 to x).map { y => y + topLevelValue }
+        |  }
+        |}
+        |val r = sc.parallelize(0 to 2).map(closure).collect
+      """.stripMargin)
+//    assertContains("r: Array[scala.collection.immutable.IndexedSeq[String]] = " +
+//      "Array(Vector(), Vector(1someValue), Vector(1someValue, 1someValue, 2someValue))", output)
+    assertContains("r: Array[IndexedSeq[String]] = " +
+      "Array(Vector(), Vector(1someValue), Vector(1someValue, 1someValue, 2someValue))", output)
+    assertDoesNotContain("Exception", output)
+  }
+
+  test("SPARK-31399: ClosureCleaner should discover indirectly nested closure in inner class") {
+    // Similar to the previous test case, but with indirect closure nesting instead.
+    // There's still nested closures involved, but the inner closure is indirectly nested in the
+    // outer closure, with a level of inner class in between them.
+    // This changes how the inner closure references/captures the outer closure/enclosing `this`
+    // REPL line object, and covers a different code path in inner closure discovery.
+
+    // `ns` should be nulled out, but `topLevelValue` should stay intact.
+
+    val output = runInterpreter(
+      """
+        |class NotSerializableClass(val x: Int)
+        |val ns = new NotSerializableClass(42); val topLevelValue = "someValue"; val closure =
+        |(j: Int) => {
+        |  class InnerFoo {
+        |    val innerClosure = (x: Int) => (1 to x).map { y => y + topLevelValue }
+        |  }
+        |  val innerFoo = new InnerFoo
+        |  (1 to j).flatMap(innerFoo.innerClosure)
+        |}
+        |val r = sc.parallelize(0 to 2).map(closure).collect
+      """.stripMargin)
+//    assertContains("r: Array[scala.collection.immutable.IndexedSeq[String]] = " +
+//       "Array(Vector(), Vector(1someValue), Vector(1someValue, 1someValue, 2someValue))", output)
+    assertContains("r: Array[IndexedSeq[String]] = " +
+       "Array(Vector(), Vector(1someValue), Vector(1someValue, 1someValue, 2someValue))", output)
+    assertDoesNotContain("Array(Vector(), Vector(1null), Vector(1null, 1null, 2null)", output)
+    assertDoesNotContain("Exception", output)
+  }
+}
diff --git a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index 1e92b36c336d8..95d908cec5de0 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -20,8 +20,6 @@ package org.apache.spark.repl
 import java.io._
 import java.nio.file.Files
 
-import scala.tools.nsc.interpreter.SimpleReader
-
 import org.apache.log4j.{Level, LogManager, PropertyConfigurator}
 import org.scalatest.BeforeAndAfterAll
 
@@ -86,31 +84,6 @@ class ReplSuite extends SparkFunSuite with BeforeAndAfterAll {
       "Interpreter output contained '" + message + "':\n" + output)
   }
 
-  test("propagation of local properties") {
-    // A mock ILoop that doesn't install the SIGINT handler.
-    class ILoop(out: PrintWriter) extends SparkILoop(None, out) {
-      settings = new scala.tools.nsc.Settings
-      settings.usejavacp.value = true
-      org.apache.spark.repl.Main.interp = this
-      in = SimpleReader()
-    }
-
-    val out = new StringWriter()
-    Main.interp = new ILoop(new PrintWriter(out))
-    Main.sparkContext = new SparkContext("local", "repl-test")
-    Main.interp.createInterpreter()
-
-    Main.sparkContext.setLocalProperty("someKey", "someValue")
-
-    // Make sure the value we set in the caller to interpret is propagated in the thread that
-    // interprets the command.
-    Main.interp.interpret("org.apache.spark.repl.Main.sparkContext.getLocalProperty(\"someKey\")")
-    assert(out.toString.contains("someValue"))
-
-    Main.sparkContext.stop()
-    System.clearProperty("spark.driver.port")
-  }
-
   test("SPARK-15236: use Hive catalog") {
     // turn on the INFO log so that it is possible the code will dump INFO
     // entry for using "HiveMetastore"
diff --git a/repl/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala
index e11a54bc88070..4795306692f7a 100644
--- a/repl/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala
@@ -380,67 +380,6 @@ class SingletonReplSuite extends SparkFunSuite {
     assertDoesNotContain("Exception", output)
   }
 
-  test("SPARK-31399: should clone+clean line object w/ non-serializable state in ClosureCleaner") {
-    // Test ClosureCleaner when a closure captures the enclosing `this` REPL line object, and that
-    // object contains an unused non-serializable field.
-    // Specifically, the closure in this test case contains a directly nested closure, and the
-    // capture is triggered by the inner closure.
-    // `ns` should be nulled out, but `topLevelValue` should stay intact.
-
-    // Can't use :paste mode because PipedOutputStream/PipedInputStream doesn't work well with the
-    // EOT control character (i.e. Ctrl+D).
-    // Just write things on a single line to emulate :paste mode.
-
-    // NOTE: in order for this test case to trigger the intended scenario, the following three
-    //       variables need to be in the same "input", which will make the REPL pack them into the
-    //       same REPL line object:
-    //         - ns: a non-serializable state, not accessed by the closure;
-    //         - topLevelValue: a serializable state, accessed by the closure;
-    //         - closure: the starting closure, captures the enclosing REPL line object.
-    val output = runInterpreter(
-      """
-        |class NotSerializableClass(val x: Int)
-        |val ns = new NotSerializableClass(42); val topLevelValue = "someValue"; val closure =
-        |(j: Int) => {
-        |  (1 to j).flatMap { x =>
-        |    (1 to x).map { y => y + topLevelValue }
-        |  }
-        |}
-        |val r = sc.parallelize(0 to 2).map(closure).collect
-      """.stripMargin)
-    assertContains("r: Array[scala.collection.immutable.IndexedSeq[String]] = " +
-      "Array(Vector(), Vector(1someValue), Vector(1someValue, 1someValue, 2someValue))", output)
-    assertDoesNotContain("Exception", output)
-  }
-
-  test("SPARK-31399: ClosureCleaner should discover indirectly nested closure in inner class") {
-    // Similar to the previous test case, but with indirect closure nesting instead.
-    // There's still nested closures involved, but the inner closure is indirectly nested in the
-    // outer closure, with a level of inner class in between them.
-    // This changes how the inner closure references/captures the outer closure/enclosing `this`
-    // REPL line object, and covers a different code path in inner closure discovery.
-
-    // `ns` should be nulled out, but `topLevelValue` should stay intact.
-
-    val output = runInterpreter(
-      """
-        |class NotSerializableClass(val x: Int)
-        |val ns = new NotSerializableClass(42); val topLevelValue = "someValue"; val closure =
-        |(j: Int) => {
-        |  class InnerFoo {
-        |    val innerClosure = (x: Int) => (1 to x).map { y => y + topLevelValue }
-        |  }
-        |  val innerFoo = new InnerFoo
-        |  (1 to j).flatMap(innerFoo.innerClosure)
-        |}
-        |val r = sc.parallelize(0 to 2).map(closure).collect
-      """.stripMargin)
-    assertContains("r: Array[scala.collection.immutable.IndexedSeq[String]] = " +
-       "Array(Vector(), Vector(1someValue), Vector(1someValue, 1someValue, 2someValue))", output)
-    assertDoesNotContain("Array(Vector(), Vector(1null), Vector(1null, 1null, 2null)", output)
-    assertDoesNotContain("Exception", output)
-  }
-
   test("newProductSeqEncoder with REPL defined class") {
     val output = runInterpreter(
       """
diff --git a/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/util/CaseInsensitiveMap.scala b/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/util/CaseInsensitiveMap.scala
index 352e5a4c59048..e18a01810d2eb 100644
--- a/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/util/CaseInsensitiveMap.scala
+++ b/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/util/CaseInsensitiveMap.scala
@@ -43,7 +43,7 @@ class CaseInsensitiveMap[T] private (val originalMap: Map[String, T]) extends Ma
     new CaseInsensitiveMap[B1](originalMap.filter(!_._1.equalsIgnoreCase(key)) + (key -> value))
   }
 
-  override def +[B1 >: T](kv: (String, B1)): CaseInsensitiveMap[B1] = this.updated(kv._1, kv._2)  
+  override def +[B1 >: T](kv: (String, B1)): CaseInsensitiveMap[B1] = this.updated(kv._1, kv._2)
 
   def ++(xs: IterableOnce[(String, T)]): CaseInsensitiveMap[T] = {
     xs.iterator.foldLeft(this) { (m, kv) => m.updated(kv._1, kv._2) }

From 3d08084022a4365966526216a616a3b760450884 Mon Sep 17 00:00:00 2001
From: Chao Sun <sunchao@apache.org>
Date: Sat, 12 Sep 2020 21:34:35 -0700
Subject: [PATCH 0013/1009] [SPARK-24994][SQL] Add UnwrapCastInBinaryComparison
 optimizer to simplify literal types

### What changes were proposed in this pull request?

Currently, in cases like the following:
```sql
SELECT * FROM t WHERE age < 40
```
where `age` is of short type, Spark won't be able to simplify this and can only generate filter `cast(age, int) < 40`. This won't get pushed down to datasources and therefore is not optimized.

This PR proposes a optimizer rule to improve this when the following constraints are satisfied:
 - input expression is binary comparisons when one side is a cast operation and another is a literal.
 - both the cast child expression and literal are of integral type (i.e., byte, short, int or long)

When this is true, it tries to do several optimizations to either simplify the expression or move the cast to the literal side, so
result filter for the above case becomes `age < cast(40 as smallint)`. This is better since the cast can be optimized away later and the filter can be pushed down to data sources.

This PR follows a similar effort in Presto (https://prestosql.io/blog/2019/05/21/optimizing-the-casts-away.html). Here we only handles integral types but plan to extend to other types as follow-ups.

### Why are the changes needed?

As mentioned in the previous section, when cast is not optimized, it cannot be pushed down to data sources which can lead
to unnecessary IO and therefore longer job time and waste of resources. This helps to improve that.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Added unit tests for both the optimizer rule and filter pushdown on datasource level for both Orc and Parquet.

Closes #29565 from sunchao/SPARK-24994.

Authored-by: Chao Sun <sunchao@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../sql/catalyst/optimizer/Optimizer.scala    |   1 +
 .../UnwrapCastInBinaryComparison.scala        | 236 ++++++++++++++++++
 .../UnwrapCastInBinaryComparisonSuite.scala   | 161 ++++++++++++
 .../spark/sql/FileBasedDataSourceSuite.scala  |  90 ++++++-
 4 files changed, 487 insertions(+), 1 deletion(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 296fe86e834e5..9216ab1631e7b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -107,6 +107,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
         RewriteCorrelatedScalarSubquery,
         EliminateSerialization,
         RemoveRedundantAliases,
+        UnwrapCastInBinaryComparison,
         RemoveNoopOperators,
         CombineWithFields,
         SimplifyExtractValueOps,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala
new file mode 100644
index 0000000000000..89f7c0f71b7ac
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala
@@ -0,0 +1,236 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.Literal.FalseLiteral
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.types._
+
+/**
+ * Unwrap casts in binary comparison operations with patterns like following:
+ *
+ * `BinaryComparison(Cast(fromExp, toType), Literal(value, toType))`
+ *   or
+ * `BinaryComparison(Literal(value, toType), Cast(fromExp, toType))`
+ *
+ * This rule optimizes expressions with the above pattern by either replacing the cast with simpler
+ * constructs, or moving the cast from the expression side to the literal side, which enables them
+ * to be optimized away later and pushed down to data sources.
+ *
+ * Currently this only handles cases where:
+ *   1). `fromType` (of `fromExp`) and `toType` are of integral types (i.e., byte, short, int and
+ *     long)
+ *   2). `fromType` can be safely coerced to `toType` without precision loss (e.g., short to int,
+ *     int to long, but not long to int)
+ *
+ * If the above conditions are satisfied, the rule checks to see if the literal `value` is within
+ * range `(min, max)`, where `min` and `max` are the minimum and maximum value of `fromType`,
+ * respectively. If this is true then it means we can safely cast `value` to `fromType` and thus
+ * able to move the cast to the literal side. That is:
+ *
+ *   `cast(fromExp, toType) op value` ==> `fromExp op cast(value, fromType)`
+ *
+ * If the `value` is not within range `(min, max)`, the rule breaks the scenario into different
+ * cases and try to replace each with simpler constructs.
+ *
+ * if `value > max`, the cases are of following:
+ *  - `cast(fromExp, toType) > value` ==> if(isnull(fromExp), null, false)
+ *  - `cast(fromExp, toType) >= value` ==> if(isnull(fromExp), null, false)
+ *  - `cast(fromExp, toType) === value` ==> if(isnull(fromExp), null, false)
+ *  - `cast(fromExp, toType) <=> value` ==> false (if `fromExp` is deterministic)
+ *  - `cast(fromExp, toType) <=> value` ==> cast(fromExp, toType) <=> value (if `fromExp` is
+ *       non-deterministic)
+ *  - `cast(fromExp, toType) <= value` ==> if(isnull(fromExp), null, true)
+ *  - `cast(fromExp, toType) < value` ==> if(isnull(fromExp), null, true)
+ *
+ * if `value == max`, the cases are of following:
+ *  - `cast(fromExp, toType) > value` ==> if(isnull(fromExp), null, false)
+ *  - `cast(fromExp, toType) >= value` ==> fromExp == max
+ *  - `cast(fromExp, toType) === value` ==> fromExp == max
+ *  - `cast(fromExp, toType) <=> value` ==> fromExp <=> max
+ *  - `cast(fromExp, toType) <= value` ==> if(isnull(fromExp), null, true)
+ *  - `cast(fromExp, toType) < value` ==> fromExp =!= max
+ *
+ * Similarly for the cases when `value == min` and `value < min`.
+ *
+ * Further, the above `if(isnull(fromExp), null, false)` is represented using conjunction
+ * `and(isnull(fromExp), null)`, to enable further optimization and filter pushdown to data sources.
+ * Similarly, `if(isnull(fromExp), null, true)` is represented with `or(isnotnull(fromExp), null)`.
+ */
+object UnwrapCastInBinaryComparison extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+    case l: LogicalPlan =>
+      l transformExpressionsUp {
+        case e @ BinaryComparison(_, _) => unwrapCast(e)
+      }
+  }
+
+  private def unwrapCast(exp: Expression): Expression = exp match {
+    // Not a canonical form. In this case we first canonicalize the expression by swapping the
+    // literal and cast side, then process the result and swap the literal and cast again to
+    // restore the original order.
+    case BinaryComparison(Literal(_, literalType), Cast(fromExp, toType, _))
+        if canImplicitlyCast(fromExp, toType, literalType) =>
+      def swap(e: Expression): Expression = e match {
+        case GreaterThan(left, right) => LessThan(right, left)
+        case GreaterThanOrEqual(left, right) => LessThanOrEqual(right, left)
+        case EqualTo(left, right) => EqualTo(right, left)
+        case EqualNullSafe(left, right) => EqualNullSafe(right, left)
+        case LessThanOrEqual(left, right) => GreaterThanOrEqual(right, left)
+        case LessThan(left, right) => GreaterThan(right, left)
+        case _ => e
+      }
+
+      swap(unwrapCast(swap(exp)))
+
+    // In case both sides have integral type, optimize the comparison by removing casts or
+    // moving cast to the literal side.
+    case be @ BinaryComparison(
+      Cast(fromExp, toType: IntegralType, _), Literal(value, literalType))
+        if canImplicitlyCast(fromExp, toType, literalType) =>
+      simplifyIntegralComparison(be, fromExp, toType, value)
+
+    case _ => exp
+  }
+
+  /**
+   * Check if the input `value` is within range `(min, max)` of the `fromType`, where `min` and
+   * `max` are the minimum and maximum value of the `fromType`. If the above is true, this
+   * optimizes the expression by moving the cast to the literal side. Otherwise if result is not
+   * true, this replaces the input binary comparison `exp` with simpler expressions.
+   */
+  private def simplifyIntegralComparison(
+      exp: BinaryComparison,
+      fromExp: Expression,
+      toType: IntegralType,
+      value: Any): Expression = {
+
+    val fromType = fromExp.dataType
+    val (min, max) = getRange(fromType)
+    val (minInToType, maxInToType) = {
+      (Cast(Literal(min), toType).eval(), Cast(Literal(max), toType).eval())
+    }
+    val ordering = toType.ordering.asInstanceOf[Ordering[Any]]
+    val minCmp = ordering.compare(value, minInToType)
+    val maxCmp = ordering.compare(value, maxInToType)
+
+    if (maxCmp > 0) {
+      exp match {
+        case EqualTo(_, _) | GreaterThan(_, _) | GreaterThanOrEqual(_, _) =>
+          falseIfNotNull(fromExp)
+        case LessThan(_, _) | LessThanOrEqual(_, _) =>
+          trueIfNotNull(fromExp)
+        // make sure the expression is evaluated if it is non-deterministic
+        case EqualNullSafe(_, _) if exp.deterministic =>
+          FalseLiteral
+        case _ => exp
+      }
+    } else if (maxCmp == 0) {
+      exp match {
+        case GreaterThan(_, _) =>
+          falseIfNotNull(fromExp)
+        case LessThanOrEqual(_, _) =>
+          trueIfNotNull(fromExp)
+        case LessThan(_, _) =>
+          Not(EqualTo(fromExp, Literal(max, fromType)))
+        case GreaterThanOrEqual(_, _) | EqualTo(_, _) =>
+          EqualTo(fromExp, Literal(max, fromType))
+        case EqualNullSafe(_, _) =>
+          EqualNullSafe(fromExp, Literal(max, fromType))
+        case _ => exp
+      }
+    } else if (minCmp < 0) {
+      exp match {
+        case GreaterThan(_, _) | GreaterThanOrEqual(_, _) =>
+          trueIfNotNull(fromExp)
+        case LessThan(_, _) | LessThanOrEqual(_, _) | EqualTo(_, _) =>
+          falseIfNotNull(fromExp)
+        // make sure the expression is evaluated if it is non-deterministic
+        case EqualNullSafe(_, _) if exp.deterministic =>
+          FalseLiteral
+        case _ => exp
+      }
+    } else if (minCmp == 0) {
+      exp match {
+        case LessThan(_, _) =>
+          falseIfNotNull(fromExp)
+        case GreaterThanOrEqual(_, _) =>
+          trueIfNotNull(fromExp)
+        case GreaterThan(_, _) =>
+          Not(EqualTo(fromExp, Literal(min, fromType)))
+        case LessThanOrEqual(_, _) | EqualTo(_, _) =>
+          EqualTo(fromExp, Literal(min, fromType))
+        case EqualNullSafe(_, _) =>
+          EqualNullSafe(fromExp, Literal(min, fromType))
+        case _ => exp
+      }
+    } else {
+      // This means `value` is within range `(min, max)`. Optimize this by moving the cast to the
+      // literal side.
+      val lit = Cast(Literal(value), fromType)
+      exp match {
+        case GreaterThan(_, _) => GreaterThan(fromExp, lit)
+        case GreaterThanOrEqual(_, _) => GreaterThanOrEqual(fromExp, lit)
+        case EqualTo(_, _) => EqualTo(fromExp, lit)
+        case EqualNullSafe(_, _) => EqualNullSafe(fromExp, lit)
+        case LessThan(_, _) => LessThan(fromExp, lit)
+        case LessThanOrEqual(_, _) => LessThanOrEqual(fromExp, lit)
+        case _ => exp
+      }
+    }
+  }
+
+  /**
+   * Check if the input `fromExp` can be safely cast to `toType` without any loss of precision,
+   * i.e., the conversion is injective. Note this only handles the case when both sides are of
+   * integral type.
+   */
+  private def canImplicitlyCast(fromExp: Expression, toType: DataType,
+      literalType: DataType): Boolean = {
+    toType.sameType(literalType) &&
+      fromExp.dataType.isInstanceOf[IntegralType] &&
+      toType.isInstanceOf[IntegralType] &&
+      Cast.canUpCast(fromExp.dataType, toType)
+  }
+
+  private def getRange(dt: DataType): (Any, Any) = dt match {
+    case ByteType => (Byte.MinValue, Byte.MaxValue)
+    case ShortType => (Short.MinValue, Short.MaxValue)
+    case IntegerType => (Int.MinValue, Int.MaxValue)
+    case LongType => (Long.MinValue, Long.MaxValue)
+    case other => throw new IllegalArgumentException(s"Unsupported type: ${other.catalogString}")
+  }
+
+  /**
+   * Wraps input expression `e` with `if(isnull(e), null, false)`. The if-clause is represented
+   * using `and(isnull(e), null)` which is semantically equivalent by applying 3-valued logic.
+   */
+  private[optimizer] def falseIfNotNull(e: Expression): Expression = {
+    And(IsNull(e), Literal(null, BooleanType))
+  }
+
+  /**
+   * Wraps input expression `e` with `if(isnull(e), null, true)`. The if-clause is represented
+   * using `or(isnotnull(e), null)` which is semantically equivalent by applying 3-valued logic.
+   */
+  private[optimizer] def trueIfNotNull(e: Expression): Expression = {
+    Or(IsNotNull(e), Literal(null, BooleanType))
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala
new file mode 100644
index 0000000000000..387964088b808
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans.DslLogicalPlan
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.IntegralLiteralTestUtils._
+import org.apache.spark.sql.catalyst.expressions.aggregate.First
+import org.apache.spark.sql.catalyst.optimizer.UnwrapCastInBinaryComparison._
+import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.types.{BooleanType, ByteType, DoubleType, IntegerType}
+
+class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelper {
+
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches: List[Batch] =
+      Batch("Unwrap casts in binary comparison", FixedPoint(10),
+        NullPropagation, ConstantFolding, UnwrapCastInBinaryComparison) :: Nil
+  }
+
+  val testRelation: LocalRelation = LocalRelation('a.short, 'b.float)
+  val f: BoundReference = 'a.short.canBeNull.at(0)
+
+  test("unwrap casts when literal == max") {
+    val v = Short.MaxValue
+    assertEquivalent(castInt(f) > v.toInt, falseIfNotNull(f))
+    assertEquivalent(castInt(f) >= v.toInt, f === v)
+    assertEquivalent(castInt(f) === v.toInt, f === v)
+    assertEquivalent(castInt(f) <=> v.toInt, f <=> v)
+    assertEquivalent(castInt(f) <= v.toInt, trueIfNotNull(f))
+    assertEquivalent(castInt(f) < v.toInt, f =!= v)
+  }
+
+  test("unwrap casts when literal > max") {
+    val v: Int = positiveInt
+    assertEquivalent(castInt(f) > v, falseIfNotNull(f))
+    assertEquivalent(castInt(f) >= v, falseIfNotNull(f))
+    assertEquivalent(castInt(f) === v, falseIfNotNull(f))
+    assertEquivalent(castInt(f) <=> v, false)
+    assertEquivalent(castInt(f) <= v, trueIfNotNull(f))
+    assertEquivalent(castInt(f) < v, trueIfNotNull(f))
+  }
+
+  test("unwrap casts when literal == min") {
+    val v = Short.MinValue
+    assertEquivalent(castInt(f) > v.toInt, f =!= v)
+    assertEquivalent(castInt(f) >= v.toInt, trueIfNotNull(f))
+    assertEquivalent(castInt(f) === v.toInt, f === v)
+    assertEquivalent(castInt(f) <=> v.toInt, f <=> v)
+    assertEquivalent(castInt(f) <= v.toInt, f === v)
+    assertEquivalent(castInt(f) < v.toInt, falseIfNotNull(f))
+  }
+
+  test("unwrap casts when literal < min") {
+    val v: Int = negativeInt
+    assertEquivalent(castInt(f) > v, trueIfNotNull(f))
+    assertEquivalent(castInt(f) >= v, trueIfNotNull(f))
+    assertEquivalent(castInt(f) === v, falseIfNotNull(f))
+    assertEquivalent(castInt(f) <=> v, false)
+    assertEquivalent(castInt(f) <= v, falseIfNotNull(f))
+    assertEquivalent(castInt(f) < v, falseIfNotNull(f))
+  }
+
+  test("unwrap casts when literal is within range (min, max)") {
+    assertEquivalent(castInt(f) > 300, f > 300.toShort)
+    assertEquivalent(castInt(f) >= 500, f >= 500.toShort)
+    assertEquivalent(castInt(f) === 32766, f === 32766.toShort)
+    assertEquivalent(castInt(f) <=> 32766, f <=> 32766.toShort)
+    assertEquivalent(castInt(f) <= -6000, f <= -6000.toShort)
+    assertEquivalent(castInt(f) < -32767, f < -32767.toShort)
+  }
+
+  test("unwrap casts when cast is on rhs") {
+    val v = Short.MaxValue
+    assertEquivalent(Literal(v.toInt) < castInt(f), falseIfNotNull(f))
+    assertEquivalent(Literal(v.toInt) <= castInt(f), Literal(v) === f)
+    assertEquivalent(Literal(v.toInt) === castInt(f), Literal(v) === f)
+    assertEquivalent(Literal(v.toInt) <=> castInt(f), Literal(v) <=> f)
+    assertEquivalent(Literal(v.toInt) >= castInt(f), trueIfNotNull(f))
+    assertEquivalent(Literal(v.toInt) > castInt(f), f =!= v)
+
+    assertEquivalent(Literal(30) <= castInt(f), Literal(30.toShort) <= f)
+  }
+
+  test("unwrap cast should have no effect when input is not integral type") {
+    Seq(
+      castDouble('b) > 42.0,
+      castDouble('b) >= 42.0,
+      castDouble('b) === 42.0,
+      castDouble('b) <=> 42.0,
+      castDouble('b) <= 42.0,
+      castDouble('b) < 42.0,
+      Literal(42.0) > castDouble('b),
+      Literal(42.0) >= castDouble('b),
+      Literal(42.0) === castDouble('b),
+      Literal(42.0) <=> castDouble('b),
+      Literal(42.0) <= castDouble('b),
+      Literal(42.0) < castDouble('b)
+    ).foreach(e =>
+      assertEquivalent(e, e, evaluate = false)
+    )
+  }
+
+  test("unwrap cast should skip when expression is non-deterministic") {
+    Seq(positiveInt, negativeInt).foreach (v => {
+      val e = Cast(First(f, ignoreNulls = true), IntegerType) <=> v
+      assertEquivalent(e, e, evaluate = false)
+    })
+  }
+
+  test("unwrap casts when literal is null") {
+    val intLit = Literal.create(null, IntegerType)
+    val nullLit = Literal.create(null, BooleanType)
+    assertEquivalent(castInt(f) > intLit, nullLit)
+    assertEquivalent(castInt(f) >= intLit, nullLit)
+    assertEquivalent(castInt(f) === intLit, nullLit)
+    assertEquivalent(castInt(f) <=> intLit, IsNull(castInt(f)))
+    assertEquivalent(castInt(f) <= intLit, nullLit)
+    assertEquivalent(castInt(f) < intLit, nullLit)
+  }
+
+  test("unwrap cast should skip if cannot coerce type") {
+    assertEquivalent(Cast(f, ByteType) > 100.toByte, Cast(f, ByteType) > 100.toByte)
+  }
+
+  private def castInt(e: Expression): Expression = Cast(e, IntegerType)
+
+  private def castDouble(e: Expression): Expression = Cast(e, DoubleType)
+
+  private def assertEquivalent(e1: Expression, e2: Expression, evaluate: Boolean = true): Unit = {
+    val plan = testRelation.where(e1).analyze
+    val actual = Optimize.execute(plan)
+    val expected = testRelation.where(e2).analyze
+    comparePlans(actual, expected)
+
+    if (evaluate) {
+      Seq(100.toShort, -300.toShort, null).foreach(v => {
+        val row = create_row(v)
+        checkEvaluation(e1, e2.eval(row), row)
+      })
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index a3cd0c230d8af..48b2e22457e3c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -31,12 +31,14 @@ import org.apache.spark.SparkException
 import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
 import org.apache.spark.sql.TestingUDT.{IntervalUDT, NullData, NullUDT}
 import org.apache.spark.sql.catalyst.expressions.AttributeReference
+import org.apache.spark.sql.catalyst.expressions.IntegralLiteralTestUtils.{negativeInt, positiveInt}
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical.Filter
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.datasources.FilePartition
 import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, DataSourceV2ScanRelation, FileScan}
-import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetTable
+import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan
+import org.apache.spark.sql.execution.datasources.v2.parquet.{ParquetScan, ParquetTable}
 import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, SortMergeJoinExec}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
@@ -881,6 +883,92 @@ class FileBasedDataSourceSuite extends QueryTest
       }
     }
   }
+
+  test("test casts pushdown on orc/parquet for integral types") {
+    def checkPushedFilters(
+        format: String,
+        df: DataFrame,
+        filters: Array[sources.Filter],
+        noScan: Boolean = false): Unit = {
+      val scanExec = df.queryExecution.sparkPlan.find(_.isInstanceOf[BatchScanExec])
+      if (noScan) {
+        assert(scanExec.isEmpty)
+        return
+      }
+      val scan = scanExec.get.asInstanceOf[BatchScanExec].scan
+      format match {
+        case "orc" =>
+          assert(scan.isInstanceOf[OrcScan])
+          assert(scan.asInstanceOf[OrcScan].pushedFilters === filters)
+        case "parquet" =>
+          assert(scan.isInstanceOf[ParquetScan])
+          assert(scan.asInstanceOf[ParquetScan].pushedFilters === filters)
+        case _ =>
+          fail(s"unknown format $format")
+      }
+    }
+
+    Seq("orc", "parquet").foreach { format =>
+      withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "") {
+        withTempPath { dir =>
+          spark.range(100).map(i => (i.toShort, i.toString)).toDF("id", "s")
+            .write
+            .format(format)
+            .save(dir.getCanonicalPath)
+          val df = spark.read.format(format).load(dir.getCanonicalPath)
+
+          // cases when value == MAX
+          var v = Short.MaxValue
+          checkPushedFilters(format, df.where('id > v.toInt), Array(), noScan = true)
+          checkPushedFilters(format, df.where('id >= v.toInt), Array(sources.IsNotNull("id"),
+            sources.EqualTo("id", v)))
+          checkPushedFilters(format, df.where('id === v.toInt), Array(sources.IsNotNull("id"),
+            sources.EqualTo("id", v)))
+          checkPushedFilters(format, df.where('id <=> v.toInt),
+            Array(sources.EqualNullSafe("id", v)))
+          checkPushedFilters(format, df.where('id <= v.toInt), Array(sources.IsNotNull("id")))
+          checkPushedFilters(format, df.where('id < v.toInt), Array(sources.IsNotNull("id"),
+            sources.Not(sources.EqualTo("id", v))))
+
+          // cases when value > MAX
+          var v1: Int = positiveInt
+          checkPushedFilters(format, df.where('id > v1), Array(), noScan = true)
+          checkPushedFilters(format, df.where('id >= v1), Array(), noScan = true)
+          checkPushedFilters(format, df.where('id === v1), Array(), noScan = true)
+          checkPushedFilters(format, df.where('id <=> v1), Array(), noScan = true)
+          checkPushedFilters(format, df.where('id <= v1), Array(sources.IsNotNull("id")))
+          checkPushedFilters(format, df.where('id < v1), Array(sources.IsNotNull("id")))
+
+          // cases when value = MIN
+          v = Short.MinValue
+          checkPushedFilters(format, df.where(lit(v.toInt) < 'id), Array(sources.IsNotNull("id"),
+            sources.Not(sources.EqualTo("id", v))))
+          checkPushedFilters(format, df.where(lit(v.toInt) <= 'id), Array(sources.IsNotNull("id")))
+          checkPushedFilters(format, df.where(lit(v.toInt) === 'id), Array(sources.IsNotNull("id"),
+            sources.EqualTo("id", v)))
+          checkPushedFilters(format, df.where(lit(v.toInt) <=> 'id),
+            Array(sources.EqualNullSafe("id", v)))
+          checkPushedFilters(format, df.where(lit(v.toInt) >= 'id), Array(sources.IsNotNull("id"),
+            sources.EqualTo("id", v)))
+          checkPushedFilters(format, df.where(lit(v.toInt) > 'id), Array(), noScan = true)
+
+          // cases when value < MIN
+          v1 = negativeInt
+          checkPushedFilters(format, df.where(lit(v1) < 'id), Array(sources.IsNotNull("id")))
+          checkPushedFilters(format, df.where(lit(v1) <= 'id), Array(sources.IsNotNull("id")))
+          checkPushedFilters(format, df.where(lit(v1) === 'id), Array(), noScan = true)
+          checkPushedFilters(format, df.where(lit(v1) >= 'id), Array(), noScan = true)
+          checkPushedFilters(format, df.where(lit(v1) > 'id), Array(), noScan = true)
+
+          // cases when value is within range (MIN, MAX)
+          checkPushedFilters(format, df.where('id > 30), Array(sources.IsNotNull("id"),
+            sources.GreaterThan("id", 30)))
+          checkPushedFilters(format, df.where(lit(100) >= 'id), Array(sources.IsNotNull("id"),
+            sources.LessThanOrEqual("id", 100)))
+        }
+      }
+    }
+  }
 }
 
 object TestingUDT {

From 0549c20c6fccc7ff412818d7352b141590f88b1b Mon Sep 17 00:00:00 2001
From: "bowen.li" <bowenli86@gmail.com>
Date: Sat, 12 Sep 2020 21:45:55 -0700
Subject: [PATCH 0014/1009] [SPARK-32865][DOC] python section in quickstart
 page doesn't display SPARK_VERSION correctly

### What changes were proposed in this pull request?

In https://github.com/apache/spark/blame/master/docs/quick-start.md#L402,it should be `{{site.SPARK_VERSION}}` rather than `{site.SPARK_VERSION}`

### Why are the changes needed?

SPARK_VERSION isn't displayed correctly, as shown below

![image](https://user-images.githubusercontent.com/1892692/93006726-d03c8680-f514-11ea-85e3-1d7cfb682ef2.png)

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

tested locally, as shown below

![image](https://user-images.githubusercontent.com/1892692/93006712-a6835f80-f514-11ea-8d78-6831c9d65265.png)

Closes #29738 from bowenli86/doc.

Authored-by: bowen.li <bowenli86@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 docs/quick-start.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/quick-start.md b/docs/quick-start.md
index e7a16a3461653..557fc187fb81d 100644
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -399,7 +399,7 @@ If you are building a packaged PySpark application or library you can add it to
 
 {% highlight python %}
     install_requires=[
-        'pyspark=={site.SPARK_VERSION}'
+        'pyspark=={{site.SPARK_VERSION}}'
     ]
 {% endhighlight %}
 

From a6d6ea3efedbad14d99c24143834cd4e2e52fb40 Mon Sep 17 00:00:00 2001
From: Chao Sun <sunchao@apache.org>
Date: Sat, 12 Sep 2020 22:19:30 -0700
Subject: [PATCH 0015/1009] [SPARK-32802][SQL] Avoid using SpecificInternalRow
 in RunLengthEncoding#Encoder

### What changes were proposed in this pull request?

Currently `RunLengthEncoding#Encoder` uses `SpecificInternalRow` as a holder for the current value when calculating compression stats and doing the actual compression. It calls `ColumnType.copyField` and `ColumnType.getField` on the internal row which incurs extra cost comparing to directly operating on the internal type. This proposes to replace the `SpecificInternalRow` with `T#InternalType` to avoid the extra cost.

### Why are the changes needed?

Operating on `SpecificInternalRow` carries certain cost and negatively impact performance when using `RunLengthEncoding` for compression.

With the change I see some improvements through `CompressionSchemeBenchmark`:

```diff
 Intel(R) Core(TM) i9-9880H CPU  2.30GHz
 BOOLEAN Encode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    1              1           0      51957.0           0.0       1.0X
-RunLengthEncoding(2.502)                            549            555           9        122.2           8.2       0.0X
-BooleanBitSet(0.125)                                296            301           3        226.6           4.4       0.0X
+PassThrough(1.000)                                    2              2           0      42985.4           0.0       1.0X
+RunLengthEncoding(2.517)                            487            500          10        137.7           7.3       0.0X
+BooleanBitSet(0.125)                                348            353           4        192.8           5.2       0.0X

 OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5
 Intel(R) Core(TM) i9-9880H CPU  2.30GHz
 SHORT Encode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    3              3           0      22779.9           0.0       1.0X
-RunLengthEncoding(1.520)                           1186           1192           9         56.6          17.7       0.0X
+PassThrough(1.000)                                    3              4           0      21216.6           0.0       1.0X
+RunLengthEncoding(1.493)                            882            931          50         76.1          13.1       0.0X

 OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5
 Intel(R) Core(TM) i9-9880H CPU  2.30GHz
 SHORT Encode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    3              4           0      21352.2           0.0       1.0X
-RunLengthEncoding(2.009)                           1173           1175           3         57.2          17.5       0.0X
+PassThrough(1.000)                                    3              3           0      22388.6           0.0       1.0X
+RunLengthEncoding(2.015)                            924            941          23         72.6          13.8       0.0X

 OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5
 Intel(R) Core(TM) i9-9880H CPU  2.30GHz
 INT Encode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    9             10           1       7410.1           0.1       1.0X
-RunLengthEncoding(1.000)                           1499           1502           4         44.8          22.3       0.0X
-DictionaryEncoding(0.500)                           621            630          11        108.0           9.3       0.0X
-IntDelta(0.250)                                     134            149          10        502.0           2.0       0.1X
+PassThrough(1.000)                                    9             10           1       7575.9           0.1       1.0X
+RunLengthEncoding(1.002)                            952            966          12         70.5          14.2       0.0X
+DictionaryEncoding(0.500)                           561            567           6        119.7           8.4       0.0X
+IntDelta(0.250)                                     129            134           3        521.9           1.9       0.1X

 OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5
 Intel(R) Core(TM) i9-9880H CPU  2.30GHz
 INT Encode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    9             10           1       7668.3           0.1       1.0X
-RunLengthEncoding(1.332)                           1561           1685         175         43.0          23.3       0.0X
-DictionaryEncoding(0.501)                           616            642          21        108.9           9.2       0.0X
-IntDelta(0.250)                                     126            131           2        533.4           1.9       0.1X
+PassThrough(1.000)                                    9             10           1       7494.1           0.1       1.0X
+RunLengthEncoding(1.336)                            974            987          13         68.9          14.5       0.0X
+DictionaryEncoding(0.501)                           709            719          10         94.6          10.6       0.0X
+IntDelta(0.250)                                     127            132           4        528.4           1.9       0.1X

 OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5
 Intel(R) Core(TM) i9-9880H CPU  2.30GHz
 LONG Encode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   18             19           1       3803.0           0.3       1.0X
-RunLengthEncoding(0.754)                           1526           1540          20         44.0          22.7       0.0X
-DictionaryEncoding(0.250)                           735            759          33         91.3          11.0       0.0X
-LongDelta(0.125)                                    126            129           2        530.8           1.9       0.1X
+PassThrough(1.000)                                   19             21           1       3543.5           0.3       1.0X
+RunLengthEncoding(0.747)                           1049           1058          12         63.9          15.6       0.0X
+DictionaryEncoding(0.250)                           620            634          17        108.2           9.2       0.0X
+LongDelta(0.125)                                    129            132           2        520.1           1.9       0.1X

 OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5
 Intel(R) Core(TM) i9-9880H CPU  2.30GHz
 LONG Encode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   18             20           1       3705.4           0.3       1.0X
-RunLengthEncoding(1.002)                           1665           1669           6         40.3          24.8       0.0X
-DictionaryEncoding(0.251)                           890            901          11         75.4          13.3       0.0X
-LongDelta(0.125)                                    125            130           3        537.2           1.9       0.1X
+PassThrough(1.000)                                   18             20           2       3726.8           0.3       1.0X
+RunLengthEncoding(0.999)                           1076           1077           2         62.4          16.0       0.0X
+DictionaryEncoding(0.251)                           904            919          19         74.3          13.5       0.0X
+LongDelta(0.125)                                    125            131           4        536.5           1.9       0.1X

 OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5
 Intel(R) Core(TM) i9-9880H CPU  2.30GHz
 STRING Encode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   27             30           2       2497.1           0.4       1.0X
-RunLengthEncoding(0.892)                           3443           3587         204         19.5          51.3       0.0X
-DictionaryEncoding(0.167)                          2286           2290           6         29.4          34.1       0.0X
+PassThrough(1.000)                                   28             31           2       2430.2           0.4       1.0X
+RunLengthEncoding(0.889)                           1798           1800           3         37.3          26.8       0.0X
+DictionaryEncoding(0.167)                          1956           1959           4         34.3          29.1       0.0X
```

In the above diff, new results are with changes in this PR. It can be seen that encoding performance has improved quite a lot especially for string type.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Relies on existing unit tests.

Closes #29654 from sunchao/SPARK-32802.

Authored-by: Chao Sun <sunchao@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 ...mpressionSchemeBenchmark-jdk11-results.txt | 168 +++++++++---------
 .../CompressionSchemeBenchmark-results.txt    | 168 +++++++++---------
 .../compression/compressionSchemes.scala      |  27 ++-
 3 files changed, 179 insertions(+), 184 deletions(-)

diff --git a/sql/core/benchmarks/CompressionSchemeBenchmark-jdk11-results.txt b/sql/core/benchmarks/CompressionSchemeBenchmark-jdk11-results.txt
index 4fd57a9e95560..d6a5a7d11c23b 100644
--- a/sql/core/benchmarks/CompressionSchemeBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/CompressionSchemeBenchmark-jdk11-results.txt
@@ -2,136 +2,136 @@
 Compression Scheme Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 BOOLEAN Encode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    3              3           0      21087.3           0.0       1.0X
-RunLengthEncoding(2.514)                            739            739           1         90.8          11.0       0.0X
-BooleanBitSet(0.125)                                378            379           1        177.4           5.6       0.0X
+PassThrough(1.000)                                    1              1           0      53450.1           0.0       1.0X
+RunLengthEncoding(2.496)                            533            545          10        125.8           7.9       0.0X
+BooleanBitSet(0.125)                                287            293           6        234.2           4.3       0.0X
 
-OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 BOOLEAN Decode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         147            147           1        456.1           2.2       1.0X
-RunLengthEncoding                                   731            732           1         91.8          10.9       0.2X
-BooleanBitSet                                      1410           1411           1         47.6          21.0       0.1X
+PassThrough                                         105            108           2        638.6           1.6       1.0X
+RunLengthEncoding                                   490            497           6        136.8           7.3       0.2X
+BooleanBitSet                                       911            914           4         73.7          13.6       0.1X
 
-OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 SHORT Encode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    7              7           0       9729.9           0.1       1.0X
-RunLengthEncoding(1.491)                           1576           1576           1         42.6          23.5       0.0X
+PassThrough(1.000)                                    3              3           0      20673.0           0.0       1.0X
+RunLengthEncoding(1.495)                            750            757           9         89.5          11.2       0.0X
 
-OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 SHORT Decode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1151           1152           1         58.3          17.2       1.0X
-RunLengthEncoding                                  1619           1621           3         41.4          24.1       0.7X
+PassThrough                                         637            647           7        105.3           9.5       1.0X
+RunLengthEncoding                                  1056           1069          17         63.5          15.7       0.6X
 
-OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 SHORT Encode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    7              7           0      10135.7           0.1       1.0X
-RunLengthEncoding(2.010)                           1659           1660           0         40.4          24.7       0.0X
+PassThrough(1.000)                                    3              3           0      21332.2           0.0       1.0X
+RunLengthEncoding(2.004)                            768            783          15         87.4          11.4       0.0X
 
-OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 SHORT Decode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1151           1151           1         58.3          17.2       1.0X
-RunLengthEncoding                                  1655           1655           0         40.5          24.7       0.7X
+PassThrough                                         640            643           4        104.9           9.5       1.0X
+RunLengthEncoding                                  1073           1078           6         62.5          16.0       0.6X
 
-OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 INT Encode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   23             23           0       2952.8           0.3       1.0X
-RunLengthEncoding(0.997)                           2356           2356           0         28.5          35.1       0.0X
-DictionaryEncoding(0.500)                          1402           1402           0         47.9          20.9       0.0X
-IntDelta(0.250)                                     213            213           0        315.2           3.2       0.1X
+PassThrough(1.000)                                    9              9           1       7640.9           0.1       1.0X
+RunLengthEncoding(1.003)                            882            883           2         76.1          13.1       0.0X
+DictionaryEncoding(0.500)                           587            624          33        114.3           8.7       0.0X
+IntDelta(0.250)                                     122            127           5        549.8           1.8       0.1X
 
-OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 INT Decode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1319           1319           1         50.9          19.7       1.0X
-RunLengthEncoding                                  1803           1806           5         37.2          26.9       0.7X
-DictionaryEncoding                                  931            931           0         72.1          13.9       1.4X
-IntDelta                                            817            821           4         82.2          12.2       1.6X
+PassThrough                                         684            709          27         98.1          10.2       1.0X
+RunLengthEncoding                                  1068           1075          10         62.8          15.9       0.6X
+DictionaryEncoding                                  517            526           6        129.8           7.7       1.3X
+IntDelta                                            541            545           4        124.0           8.1       1.3X
 
-OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 INT Encode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   23             23           0       2976.8           0.3       1.0X
-RunLengthEncoding(1.337)                           2552           2552           1         26.3          38.0       0.0X
-DictionaryEncoding(0.501)                          1377           1377           0         48.7          20.5       0.0X
-IntDelta(0.250)                                     213            214           2        315.3           3.2       0.1X
+PassThrough(1.000)                                    9             10           1       7475.0           0.1       1.0X
+RunLengthEncoding(1.339)                            908            922          12         73.9          13.5       0.0X
+DictionaryEncoding(0.501)                           629            652          16        106.6           9.4       0.0X
+IntDelta(0.250)                                     124            128           3        542.5           1.8       0.1X
 
-OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 INT Decode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1438           1439           1         46.7          21.4       1.0X
-RunLengthEncoding                                  1987           1988           1         33.8          29.6       0.7X
-DictionaryEncoding                                 1249           1250           0         53.7          18.6       1.2X
-IntDelta                                           1135           1136           3         59.2          16.9       1.3X
+PassThrough                                         778            783           8         86.3          11.6       1.0X
+RunLengthEncoding                                  1217           1217           1         55.2          18.1       0.6X
+DictionaryEncoding                                  690            704          12         97.2          10.3       1.1X
+IntDelta                                            691            699          13         97.1          10.3       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 LONG Encode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   45             45           0       1487.9           0.7       1.0X
-RunLengthEncoding(0.750)                           2496           2496           1         26.9          37.2       0.0X
-DictionaryEncoding(0.250)                          1433           1433           1         46.8          21.4       0.0X
-LongDelta(0.125)                                    215            215           0        312.6           3.2       0.2X
+PassThrough(1.000)                                   18             19           1       3772.0           0.3       1.0X
+RunLengthEncoding(0.750)                            985            987           2         68.1          14.7       0.0X
+DictionaryEncoding(0.250)                           665            668           4        100.9           9.9       0.0X
+LongDelta(0.125)                                    124            128           2        539.4           1.9       0.1X
 
-OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 LONG Decode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1468           1469           1         45.7          21.9       1.0X
-RunLengthEncoding                                  1977           1981           6         33.9          29.5       0.7X
-DictionaryEncoding                                 1248           1250           3         53.8          18.6       1.2X
-LongDelta                                           838            840           2         80.1          12.5       1.8X
+PassThrough                                         837            841           7         80.2          12.5       1.0X
+RunLengthEncoding                                  1177           1180           4         57.0          17.5       0.7X
+DictionaryEncoding                                  741            747           7         90.6          11.0       1.1X
+LongDelta                                           509            520          13        131.8           7.6       1.6X
 
-OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 LONG Encode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   47             47           0       1437.2           0.7       1.0X
-RunLengthEncoding(1.002)                           2743           2744           0         24.5          40.9       0.0X
-DictionaryEncoding(0.251)                          2016           2016           0         33.3          30.0       0.0X
-LongDelta(0.125)                                    215            217           5        312.1           3.2       0.2X
+PassThrough(1.000)                                   18             20           1       3769.4           0.3       1.0X
+RunLengthEncoding(1.005)                           1016           1054          54         66.1          15.1       0.0X
+DictionaryEncoding(0.251)                           923            928           4         72.7          13.8       0.0X
+LongDelta(0.125)                                    125            127           2        538.8           1.9       0.1X
 
-OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 LONG Decode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1468           1468           0         45.7          21.9       1.0X
-RunLengthEncoding                                  2020           2021           2         33.2          30.1       0.7X
-DictionaryEncoding                                 1248           1248           0         53.8          18.6       1.2X
-LongDelta                                          1131           1134           4         59.4          16.8       1.3X
+PassThrough                                         842            846           5         79.7          12.5       1.0X
+RunLengthEncoding                                  1222           1264          59         54.9          18.2       0.7X
+DictionaryEncoding                                  757            776          20         88.7          11.3       1.1X
+LongDelta                                           681            686           4         98.5          10.2       1.2X
 
-OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 STRING Encode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   71             71           0        939.6           1.1       1.0X
-RunLengthEncoding(0.890)                           6050           6052           2         11.1          90.2       0.0X
-DictionaryEncoding(0.167)                          3723           3725           2         18.0          55.5       0.0X
+PassThrough(1.000)                                   27             29           2       2510.4           0.4       1.0X
+RunLengthEncoding(0.888)                           1651           1663          18         40.7          24.6       0.0X
+DictionaryEncoding(0.167)                          1851           1863          17         36.3          27.6       0.0X
 
-OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 STRING Decode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        2804           2815          16         23.9          41.8       1.0X
-RunLengthEncoding                                  3390           3391           1         19.8          50.5       0.8X
-DictionaryEncoding                                 2901           2905           5         23.1          43.2       1.0X
+PassThrough                                        1485           1495          15         45.2          22.1       1.0X
+RunLengthEncoding                                  2010           2066          80         33.4          30.0       0.7X
+DictionaryEncoding                                 1788           1790           4         37.5          26.6       0.8X
 
 
diff --git a/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt b/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt
index 3f6fbe35a7b86..d4670070505aa 100644
--- a/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt
+++ b/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt
@@ -2,136 +2,136 @@
 Compression Scheme Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 BOOLEAN Encode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    3              3           0      21114.6           0.0       1.0X
-RunLengthEncoding(2.505)                            694            696           4         96.7          10.3       0.0X
-BooleanBitSet(0.125)                                366            366           0        183.4           5.5       0.0X
+PassThrough(1.000)                                    1              2           0      49671.6           0.0       1.0X
+RunLengthEncoding(2.501)                            470            487          25        142.7           7.0       0.0X
+BooleanBitSet(0.125)                                358            362           4        187.6           5.3       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 BOOLEAN Decode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         145            145           0        464.2           2.2       1.0X
-RunLengthEncoding                                   735            735           0         91.3          10.9       0.2X
-BooleanBitSet                                      1437           1437           1         46.7          21.4       0.1X
+PassThrough                                          90             95           5        746.2           1.3       1.0X
+RunLengthEncoding                                   550            559           8        122.0           8.2       0.2X
+BooleanBitSet                                      1082           1087           7         62.0          16.1       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 SHORT Encode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    7              7           0       9336.6           0.1       1.0X
-RunLengthEncoding(1.494)                           1912           1917           7         35.1          28.5       0.0X
+PassThrough(1.000)                                    3              4           0      20595.0           0.0       1.0X
+RunLengthEncoding(1.495)                           1074           1087          19         62.5          16.0       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 SHORT Decode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1104           1104           0         60.8          16.4       1.0X
-RunLengthEncoding                                  1627           1628           0         41.2          24.3       0.7X
+PassThrough                                         807            844          33         83.1          12.0       1.0X
+RunLengthEncoding                                  1077           1078           1         62.3          16.0       0.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 SHORT Encode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    7              7           0       9710.6           0.1       1.0X
-RunLengthEncoding(2.003)                           2021           2027           9         33.2          30.1       0.0X
+PassThrough(1.000)                                    3              3           0      23144.6           0.0       1.0X
+RunLengthEncoding(2.001)                           1067           1073           8         62.9          15.9       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 SHORT Decode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1104           1104           0         60.8          16.5       1.0X
-RunLengthEncoding                                  1621           1621           0         41.4          24.1       0.7X
+PassThrough                                         793            811          16         84.7          11.8       1.0X
+RunLengthEncoding                                  1099           1123          33         61.1          16.4       0.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 INT Encode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   24             24           0       2854.3           0.4       1.0X
-RunLengthEncoding(1.005)                           2395           2396           2         28.0          35.7       0.0X
-DictionaryEncoding(0.500)                          1366           1366           0         49.1          20.3       0.0X
-IntDelta(0.250)                                     286            287           0        234.2           4.3       0.1X
+PassThrough(1.000)                                   10             11           1       6979.9           0.1       1.0X
+RunLengthEncoding(1.000)                            985            994           9         68.1          14.7       0.0X
+DictionaryEncoding(0.500)                           896            903          10         74.9          13.4       0.0X
+IntDelta(0.250)                                     237            244           6        283.5           3.5       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 INT Decode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1248           1248           0         53.8          18.6       1.0X
-RunLengthEncoding                                  1738           1739           2         38.6          25.9       0.7X
-DictionaryEncoding                                  969            970           0         69.2          14.4       1.3X
-IntDelta                                            777            779           1         86.3          11.6       1.6X
+PassThrough                                         791            795           3         84.8          11.8       1.0X
+RunLengthEncoding                                  1111           1114           5         60.4          16.6       0.7X
+DictionaryEncoding                                  641            650          17        104.7           9.6       1.2X
+IntDelta                                            560            575          24        119.8           8.4       1.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 INT Encode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   23             23           0       2874.4           0.3       1.0X
-RunLengthEncoding(1.334)                           2581           2581           0         26.0          38.5       0.0X
-DictionaryEncoding(0.501)                          1490           1490           0         45.0          22.2       0.0X
-IntDelta(0.250)                                     286            286           0        234.5           4.3       0.1X
+PassThrough(1.000)                                    9             10           1       7181.9           0.1       1.0X
+RunLengthEncoding(1.336)                           1006           1006           1         66.7          15.0       0.0X
+DictionaryEncoding(0.501)                          1034           1045          15         64.9          15.4       0.0X
+IntDelta(0.250)                                     235            238           2        285.7           3.5       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 INT Decode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1389           1389           0         48.3          20.7       1.0X
-RunLengthEncoding                                  1903           1903           0         35.3          28.4       0.7X
-DictionaryEncoding                                 1231           1232           1         54.5          18.3       1.1X
-IntDelta                                           1103           1108           7         60.8          16.4       1.3X
+PassThrough                                         829            832           3         81.0          12.3       1.0X
+RunLengthEncoding                                  1199           1207          11         56.0          17.9       0.7X
+DictionaryEncoding                                  725            726           1         92.6          10.8       1.1X
+IntDelta                                            680            683           5         98.6          10.1       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 LONG Encode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   48             48           0       1405.2           0.7       1.0X
-RunLengthEncoding(0.757)                           2525           2525           1         26.6          37.6       0.0X
-DictionaryEncoding(0.250)                          1380           1381           1         48.6          20.6       0.0X
-LongDelta(0.125)                                    474            474           0        141.7           7.1       0.1X
+PassThrough(1.000)                                   20             22           1       3405.6           0.3       1.0X
+RunLengthEncoding(0.747)                           1097           1102           7         61.2          16.3       0.0X
+DictionaryEncoding(0.250)                           854            933          74         78.6          12.7       0.0X
+LongDelta(0.125)                                    322            328          11        208.5           4.8       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 LONG Decode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1348           1349           0         49.8          20.1       1.0X
-RunLengthEncoding                                  1850           1851           2         36.3          27.6       0.7X
-DictionaryEncoding                                 1190           1192           3         56.4          17.7       1.1X
-LongDelta                                           801            801           0         83.8          11.9       1.7X
+PassThrough                                         839            843           4         80.0          12.5       1.0X
+RunLengthEncoding                                  1234           1234           1         54.4          18.4       0.7X
+DictionaryEncoding                                  806            809           3         83.3          12.0       1.0X
+LongDelta                                           550            558           6        122.0           8.2       1.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 LONG Encode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   46             46           0       1451.2           0.7       1.0X
-RunLengthEncoding(1.003)                           2742           2743           1         24.5          40.9       0.0X
-DictionaryEncoding(0.251)                          1714           1715           0         39.1          25.5       0.0X
-LongDelta(0.125)                                    476            476           0        140.9           7.1       0.1X
+PassThrough(1.000)                                   20             22           1       3319.5           0.3       1.0X
+RunLengthEncoding(1.005)                           1153           1169          24         58.2          17.2       0.0X
+DictionaryEncoding(0.251)                           923            930           9         72.7          13.7       0.0X
+LongDelta(0.125)                                    327            332           4        205.0           4.9       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 LONG Decode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1362           1363           1         49.3          20.3       1.0X
-RunLengthEncoding                                  1862           1863           1         36.0          27.7       0.7X
-DictionaryEncoding                                 1190           1192           3         56.4          17.7       1.1X
-LongDelta                                          1079           1082           4         62.2          16.1       1.3X
+PassThrough                                         854            864          16         78.6          12.7       1.0X
+RunLengthEncoding                                  1242           1244           3         54.0          18.5       0.7X
+DictionaryEncoding                                  823            823           1         81.6          12.3       1.0X
+LongDelta                                           640            651           8        104.8           9.5       1.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 STRING Encode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   67             67           0        994.8           1.0       1.0X
-RunLengthEncoding(0.888)                           6135           6137           2         10.9          91.4       0.0X
-DictionaryEncoding(0.167)                          3747           3748           0         17.9          55.8       0.0X
+PassThrough(1.000)                                   29             32           1       2279.8           0.4       1.0X
+RunLengthEncoding(0.886)                           1723           1734          15         38.9          25.7       0.0X
+DictionaryEncoding(0.167)                          2667           2690          33         25.2          39.7       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.5
+Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz
 STRING Decode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        3180           3185           8         21.1          47.4       1.0X
-RunLengthEncoding                                  3658           3660           3         18.3          54.5       0.9X
-DictionaryEncoding                                 3292           3295           4         20.4          49.1       1.0X
+PassThrough                                        1847           1892          64         36.3          27.5       1.0X
+RunLengthEncoding                                  2305           2332          38         29.1          34.3       0.8X
+DictionaryEncoding                                 2134           2150          22         31.5          31.8       0.9X
 
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/compression/compressionSchemes.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/compression/compressionSchemes.scala
index 3cc59af9b7ce3..cb7efd3f7716b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/compression/compressionSchemes.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/compression/compressionSchemes.scala
@@ -23,7 +23,6 @@ import java.nio.ByteOrder
 import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.SpecificInternalRow
 import org.apache.spark.sql.execution.columnar._
 import org.apache.spark.sql.execution.vectorized.WritableColumnVector
 import org.apache.spark.sql.types._
@@ -182,8 +181,7 @@ private[columnar] case object RunLengthEncoding extends CompressionScheme {
     private var _uncompressedSize = 0
     private var _compressedSize = 0
 
-    // Using `MutableRow` to store the last value to avoid boxing/unboxing cost.
-    private val lastValue = new SpecificInternalRow(Seq(columnType.dataType))
+    private var lastValue: T#InternalType = _
     private var lastRun = 0
 
     override def uncompressedSize: Int = _uncompressedSize
@@ -195,16 +193,16 @@ private[columnar] case object RunLengthEncoding extends CompressionScheme {
       val actualSize = columnType.actualSize(row, ordinal)
       _uncompressedSize += actualSize
 
-      if (lastValue.isNullAt(0)) {
-        columnType.copyField(row, ordinal, lastValue, 0)
+      if (lastValue == null) {
+        lastValue = columnType.clone(value)
         lastRun = 1
         _compressedSize += actualSize + 4
       } else {
-        if (columnType.getField(lastValue, 0) == value) {
+        if (lastValue == value) {
           lastRun += 1
         } else {
           _compressedSize += actualSize + 4
-          columnType.copyField(row, ordinal, lastValue, 0)
+          lastValue = columnType.clone(value)
           lastRun = 1
         }
       }
@@ -214,30 +212,27 @@ private[columnar] case object RunLengthEncoding extends CompressionScheme {
       to.putInt(RunLengthEncoding.typeId)
 
       if (from.hasRemaining) {
-        val currentValue = new SpecificInternalRow(Seq(columnType.dataType))
         var currentRun = 1
-        val value = new SpecificInternalRow(Seq(columnType.dataType))
-
-        columnType.extract(from, currentValue, 0)
+        var currentValue = columnType.extract(from)
 
         while (from.hasRemaining) {
-          columnType.extract(from, value, 0)
+          val value = columnType.extract(from)
 
-          if (value.get(0, columnType.dataType) == currentValue.get(0, columnType.dataType)) {
+          if (value == currentValue) {
             currentRun += 1
           } else {
             // Writes current run
-            columnType.append(currentValue, 0, to)
+            columnType.append(currentValue, to)
             to.putInt(currentRun)
 
             // Resets current run
-            columnType.copyField(value, 0, currentValue, 0)
+            currentValue = value
             currentRun = 1
           }
         }
 
         // Writes the last run
-        columnType.append(currentValue, 0, to)
+        columnType.append(currentValue, to)
         to.putInt(currentRun)
       }
 

From fbb0f37685877499baceb5b7141c1a8e162f6735 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Mon, 14 Sep 2020 10:00:15 +0900
Subject: [PATCH 0016/1009] [SPARK-32869][BUILD] Ignore deprecation warnings
 for build with Scala 2.13 and sbt

### What changes were proposed in this pull request?

This PR changes SparkBuild.scala to ignore deprecation warnings for build with Scala 2.13 and sbt.
Actually, deprecation warnings are already ignored for Scala 2.12 but string matching logic for deprecation warnings should be changed for Scala 2.13.
Currently, if a warning message contains `is deprecated`, it's ignored but some warnings contain "are deprecated` and `will be deprecated`.

```
[error] [warn] /home/kou/work/oss/spark-scala-2.13/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala:656: multiarg infix syntax looks\
 like a tuple and will be deprecated
[error] [warn]         if (opt.clOption != null) { childArgs += (opt.clOption, opt.value) }
```
```
[error] [warn] /home/kou/work/oss/spark-scala-2.13/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala:35: view bounds are de\
precated; use an implicit parameter instead.
[error]   example: instead of `def f[A <% Int](a: A)` use `def f[A](a: A)(implicit ev: A => Int)`
[error] [warn] class SequenceFileRDDFunctions[K <% Writable: ClassTag, V <% Writable : ClassTag](
```

### Why are the changes needed?

Enable to build Spark with Scala 2.13 and sbt.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Build with the following command and confirmed deprecation warnings are not treated as fatal ( Build itself doesn't pass due to another problem).
`build/sbt -Pscala-2.13  package`

Closes #29741 from sarutak/scala-2.13-deprecated-warning.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 project/SparkBuild.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index c94ae4e510087..160b3b5e7edb3 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -296,7 +296,7 @@ object SparkBuild extends PomBuild {
       var failed = 0
       analysis.infos.allInfos.foreach { case (k, i) =>
         i.reportedProblems foreach { p =>
-          val deprecation = p.message.contains("is deprecated")
+          val deprecation = p.message.contains("deprecated")
 
           if (!deprecation) {
             failed = failed + 1

From e558b8a0fd1b1a2d3d37a18835951a7d2b3ef19e Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Mon, 14 Sep 2020 11:57:29 +0900
Subject: [PATCH 0017/1009] [SPARK-31847][CORE][TESTS] DAGSchedulerSuite:
 Rewrite the test framework to support apply specified spark configurations

### What changes were proposed in this pull request?
`DAGSchedulerSuite` exists some issue:
`afterEach` and `init` are called when the `SparkConf` of the default `SparkContext` has no configuration that the test case must set. This causes the `SparkContext` initialized in `beforeEach` to be discarded without being used, resulting in waste. On the other hand, the flexibility to add configurations to `SparkConf` should be addressed by the test framework.

Test suites inherits `LocalSparkContext` can be simplified.

### Why are the changes needed?
Reduce overhead about init `SparkContext`.
Rewrite the test framework to support apply specified spark configurations.

### Does this PR introduce _any_ user-facing change?
'No'.

### How was this patch tested?
Jenkins test.

Closes #29228 from beliefer/extend-test-frame-for-dag.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: beliefer <beliefer@163.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../apache/spark/TempLocalSparkContext.scala  | 100 ++++++++++++++++++
 .../spark/scheduler/DAGSchedulerSuite.scala   |  55 ++++------
 2 files changed, 120 insertions(+), 35 deletions(-)
 create mode 100644 core/src/test/scala/org/apache/spark/TempLocalSparkContext.scala

diff --git a/core/src/test/scala/org/apache/spark/TempLocalSparkContext.scala b/core/src/test/scala/org/apache/spark/TempLocalSparkContext.scala
new file mode 100644
index 0000000000000..6d5fcd1edfb03
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/TempLocalSparkContext.scala
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import _root_.io.netty.util.internal.logging.{InternalLoggerFactory, Slf4JLoggerFactory}
+import org.scalatest.BeforeAndAfterAll
+import org.scalatest.BeforeAndAfterEach
+import org.scalatest.Suite
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.resource.ResourceProfile
+
+/**
+ * Manages a local `sc` `SparkContext` variable, correctly stopping it after each test.
+ *
+ * Note: this class is a copy of [[LocalSparkContext]]. Why copy it? Reduce conflict. Because
+ * many test suites use [[LocalSparkContext]] and overwrite some variable or function (e.g.
+ * sc of LocalSparkContext), there occurs conflict when we refactor the `sc` as a new function.
+ * After migrating all test suites that use [[LocalSparkContext]] to use
+ * [[TempLocalSparkContext]], we will delete the original [[LocalSparkContext]] and rename
+ * [[TempLocalSparkContext]] to [[LocalSparkContext]].
+ */
+trait TempLocalSparkContext extends BeforeAndAfterEach
+  with BeforeAndAfterAll with Logging { self: Suite =>
+
+  private var _conf: SparkConf = defaultSparkConf
+
+  @transient private var _sc: SparkContext = _
+
+  def conf: SparkConf = _conf
+
+  /**
+   * Currently, we are focusing on the reconstruction of LocalSparkContext, so this method
+   * was created temporarily. When the migration work is completed, this method will be
+   * renamed to `sc` and the variable `sc` will be deleted.
+   */
+  def sc: SparkContext = {
+    if (_sc == null) {
+      _sc = new SparkContext(_conf)
+    }
+    _sc
+  }
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    InternalLoggerFactory.setDefaultFactory(Slf4JLoggerFactory.INSTANCE)
+  }
+
+  override def afterEach(): Unit = {
+    try {
+      resetSparkContext()
+    } finally {
+      super.afterEach()
+    }
+  }
+
+  def resetSparkContext(): Unit = {
+    TempLocalSparkContext.stop(_sc)
+    ResourceProfile.clearDefaultProfile()
+    _sc = null
+    _conf = defaultSparkConf
+  }
+
+  private def defaultSparkConf: SparkConf = new SparkConf()
+    .setMaster("local[2]").setAppName(s"${this.getClass.getSimpleName}")
+}
+
+object TempLocalSparkContext {
+  def stop(sc: SparkContext): Unit = {
+    if (sc != null) {
+      sc.stop()
+    }
+    // To avoid RPC rebinding to the same port, since it doesn't unbind immediately on shutdown
+    System.clearProperty("spark.driver.port")
+  }
+
+  /** Runs `f` by passing in `sc` and ensures that `sc` is stopped. */
+  def withSpark[T](sc: SparkContext)(f: SparkContext => T): T = {
+    try {
+      f(sc)
+    } finally {
+      stop(sc)
+    }
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index 436765808e22b..99be1faab8b85 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.scheduler
 
 import java.util.Properties
 import java.util.concurrent.{CountDownLatch, TimeUnit}
-import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger, AtomicLong, AtomicReference}
+import java.util.concurrent.atomic.{AtomicBoolean, AtomicLong, AtomicReference}
 
 import scala.annotation.meta.param
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Map}
@@ -125,14 +125,14 @@ class MyRDD(
 
 class DAGSchedulerSuiteDummyException extends Exception
 
-class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLimits {
+class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with TimeLimits {
 
   import DAGSchedulerSuite._
 
   // Necessary to make ScalaTest 3.x interrupt a thread on the JVM like ScalaTest 2.2.x
   implicit val defaultSignaler: Signaler = ThreadSignaler
 
-  val conf = new SparkConf
+  private var firstInit: Boolean = _
   /** Set of TaskSets the DAGScheduler has requested executed. */
   val taskSets = scala.collection.mutable.Buffer[TaskSet]()
 
@@ -297,11 +297,19 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
 
   override def beforeEach(): Unit = {
     super.beforeEach()
-    init(new SparkConf())
+    firstInit = true
   }
 
-  private def init(testConf: SparkConf): Unit = {
-    sc = new SparkContext("local[2]", "DAGSchedulerSuite", testConf)
+  override def sc: SparkContext = {
+    val sc = super.sc
+    if (firstInit) {
+      init(sc)
+      firstInit = false
+    }
+    sc
+  }
+
+  private def init(sc: SparkContext): Unit = {
     sparkListener = new EventInfoRecordingListener
     failure = null
     sc.addSparkListener(sparkListener)
@@ -310,10 +318,10 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     cancelledStages.clear()
     cacheLocations.clear()
     results.clear()
-    securityMgr = new SecurityManager(conf)
-    broadcastManager = new BroadcastManager(true, conf, securityMgr)
-    mapOutputTracker = spy(new MyMapOutputTrackerMaster(conf, broadcastManager))
-    blockManagerMaster = spy(new MyBlockManagerMaster(conf))
+    securityMgr = new SecurityManager(sc.getConf)
+    broadcastManager = new BroadcastManager(true, sc.getConf, securityMgr)
+    mapOutputTracker = spy(new MyMapOutputTrackerMaster(sc.getConf, broadcastManager))
+    blockManagerMaster = spy(new MyBlockManagerMaster(sc.getConf))
     scheduler = new DAGScheduler(
       sc,
       taskScheduler,
@@ -353,6 +361,8 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
    * DAGScheduler event loop.
    */
   private def runEvent(event: DAGSchedulerEvent): Unit = {
+    // Ensure the initialization of various components
+    sc
     dagEventProcessLoopTester.post(event)
   }
 
@@ -491,12 +501,8 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
   }
 
   test("All shuffle files on the storage endpoint should be cleaned up when it is lost") {
-    // reset the test context with the right shuffle service config
-    afterEach()
-    val conf = new SparkConf()
     conf.set(config.SHUFFLE_SERVICE_ENABLED.key, "true")
     conf.set("spark.files.fetchFailure.unRegisterOutputOnHost", "true")
-    init(conf)
     runEvent(ExecutorAdded("hostA-exec1", "hostA"))
     runEvent(ExecutorAdded("hostA-exec2", "hostA"))
     runEvent(ExecutorAdded("hostB-exec", "hostB"))
@@ -565,11 +571,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
   }
 
   test("SPARK-32003: All shuffle files for executor should be cleaned up on fetch failure") {
-    // reset the test context with the right shuffle service config
-    afterEach()
-    val conf = new SparkConf()
     conf.set(config.SHUFFLE_SERVICE_ENABLED.key, "true")
-    init(conf)
 
     val shuffleMapRdd = new MyRDD(sc, 3, Nil)
     val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(3))
@@ -861,11 +863,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       "not lost"
     }
     test(s"shuffle files $maybeLost when $eventDescription") {
-      // reset the test context with the right shuffle service config
-      afterEach()
-      val conf = new SparkConf()
       conf.set(config.SHUFFLE_SERVICE_ENABLED.key, shuffleServiceOn.toString)
-      init(conf)
       assert(sc.env.blockManager.externalShuffleServiceEnabled == shuffleServiceOn)
 
       val shuffleMapRdd = new MyRDD(sc, 2, Nil)
@@ -2888,11 +2886,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
   }
 
   test("SPARK-25341: abort stage while using old fetch protocol") {
-    // reset the test context with using old fetch protocol
-    afterEach()
-    val conf = new SparkConf()
     conf.set(config.SHUFFLE_USE_OLD_FETCH_PROTOCOL.key, "true")
-    init(conf)
     // Construct the scenario of indeterminate stage fetch failed.
     constructIndeterminateStageFetchFailed()
     // The job should fail because Spark can't rollback the shuffle map stage while
@@ -3220,10 +3214,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
   }
 
   test("test 2 resource profile with merge conflict config true") {
-    afterEach()
-    val conf = new SparkConf()
     conf.set(config.RESOURCE_PROFILE_MERGE_CONFLICTS.key, "true")
-    init(conf)
 
     val ereqs = new ExecutorResourceRequests().cores(4)
     val treqs = new TaskResourceRequests().cpus(1)
@@ -3241,10 +3232,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
   }
 
   test("test multiple resource profiles created from merging use same rp") {
-    afterEach()
-    val conf = new SparkConf()
     conf.set(config.RESOURCE_PROFILE_MERGE_CONFLICTS.key, "true")
-    init(conf)
 
     val ereqs = new ExecutorResourceRequests().cores(4)
     val treqs = new TaskResourceRequests().cpus(1)
@@ -3338,10 +3326,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
   }
 
   test("test merge 3 resource profiles") {
-    afterEach()
-    val conf = new SparkConf()
     conf.set(config.RESOURCE_PROFILE_MERGE_CONFLICTS.key, "true")
-    init(conf)
     val ereqs = new ExecutorResourceRequests().cores(4)
     val treqs = new TaskResourceRequests().cpus(1)
     val rp1 = new ResourceProfile(ereqs.requests, treqs.requests)

From 742fcff3501e46722eeaeb9d1ac20e569f8f1c2c Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Mon, 14 Sep 2020 13:15:14 +0900
Subject: [PATCH 0018/1009] [SPARK-32839][WINDOWS] Make Spark scripts working
 with the spaces in paths on Windows

### What changes were proposed in this pull request?

If you install Spark under the path that has whitespaces, it does not work on Windows, for example as below:

```
>>> SparkSession.builder.getOrCreate()
Presence of build for multiple Scala versions detected (C:\...\assembly\target\scala-2.13 and C:\...\assembly\target\scala-2.12).
Remove one of them or, set SPARK_SCALA_VERSION=2.13 in spark-env.cmd.
Visit https://spark.apache.org/docs/latest/configuration.html#environment-variables for more details about setting environment variables in spark-env.cmd.
Either clean one of them or, set SPARK_SCALA_VERSION in spark-env.cmd.
```

This PR fixes the whitespace handling to support any paths on Windows.

### Why are the changes needed?

To support Spark working with whitespaces in paths on Windows.

### Does this PR introduce _any_ user-facing change?

Yes, users will be able to install and run Spark under the paths with whitespaces.

### How was this patch tested?

Manually tested.

Closes #29706 from HyukjinKwon/window-space-path.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 bin/find-spark-home.cmd | 2 +-
 bin/load-spark-env.cmd  | 6 +++---
 bin/spark-class2.cmd    | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)
 mode change 100644 => 100755 bin/spark-class2.cmd

diff --git a/bin/find-spark-home.cmd b/bin/find-spark-home.cmd
index f795d146d49c7..3149d05039ba4 100644
--- a/bin/find-spark-home.cmd
+++ b/bin/find-spark-home.cmd
@@ -55,6 +55,6 @@ if "x%SPARK_HOME%"=="x" (
     set SPARK_HOME=%~dp0..
   ) else (
     rem We are pip installed, use the Python script to resolve a reasonable SPARK_HOME
-    for /f "delims=" %%i in ('%PYTHON_RUNNER% %FIND_SPARK_HOME_PYTHON_SCRIPT%') do set SPARK_HOME=%%i
+    for /f "delims=" %%i in ('%PYTHON_RUNNER% "%FIND_SPARK_HOME_PYTHON_SCRIPT%"') do set SPARK_HOME=%%i
   )
 )
diff --git a/bin/load-spark-env.cmd b/bin/load-spark-env.cmd
index fe725a4e1a368..5692af529fb66 100644
--- a/bin/load-spark-env.cmd
+++ b/bin/load-spark-env.cmd
@@ -24,7 +24,7 @@ rem conf\ subdirectory.
 if not defined SPARK_ENV_LOADED (
   set SPARK_ENV_LOADED=1
 
-  if [%SPARK_CONF_DIR%] == [] (
+  if not defined SPARK_CONF_DIR (
     set SPARK_CONF_DIR=%~dp0..\conf
   )
 
@@ -36,8 +36,8 @@ rem Setting SPARK_SCALA_VERSION if not already set.
 set SCALA_VERSION_1=2.13
 set SCALA_VERSION_2=2.12
 
-set ASSEMBLY_DIR1=%SPARK_HOME%\assembly\target\scala-%SCALA_VERSION_1%
-set ASSEMBLY_DIR2=%SPARK_HOME%\assembly\target\scala-%SCALA_VERSION_2%
+set ASSEMBLY_DIR1="%SPARK_HOME%\assembly\target\scala-%SCALA_VERSION_1%"
+set ASSEMBLY_DIR2="%SPARK_HOME%\assembly\target\scala-%SCALA_VERSION_2%"
 set ENV_VARIABLE_DOC=https://spark.apache.org/docs/latest/configuration.html#environment-variables
 
 if not defined SPARK_SCALA_VERSION (
diff --git a/bin/spark-class2.cmd b/bin/spark-class2.cmd
old mode 100644
new mode 100755
index 34d04c9856d2c..68b271d1d05d9
--- a/bin/spark-class2.cmd
+++ b/bin/spark-class2.cmd
@@ -30,12 +30,12 @@ if "x%1"=="x" (
 
 rem Find Spark jars.
 if exist "%SPARK_HOME%\jars" (
-  set SPARK_JARS_DIR="%SPARK_HOME%\jars"
+  set SPARK_JARS_DIR=%SPARK_HOME%\jars
 ) else (
-  set SPARK_JARS_DIR="%SPARK_HOME%\assembly\target\scala-%SPARK_SCALA_VERSION%\jars"
+  set SPARK_JARS_DIR=%SPARK_HOME%\assembly\target\scala-%SPARK_SCALA_VERSION%\jars
 )
 
-if not exist "%SPARK_JARS_DIR%"\ (
+if not exist "%SPARK_JARS_DIR%" (
   echo Failed to find Spark jars directory.
   echo You need to build Spark before running this program.
   exit /b 1

From b121f0d4596969ded3db9d5d7b0cb8adac8ac00c Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Mon, 14 Sep 2020 15:34:58 +0900
Subject: [PATCH 0019/1009] [SPARK-32873][BUILD] Fix code which causes error
 when build with sbt and Scala 2.13

### What changes were proposed in this pull request?

This PR fix code which causes error when build with sbt and Scala 2.13 like as follows.
```
[error] [warn] /home/kou/work/oss/spark-scala-2.13/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala:251: method with a single empty parameter list overrides method without any parameter list
[error] [warn]   override def hasNext(): Boolean = requestOffset < part.untilOffset
[error] [warn]
[error] [warn] /home/kou/work/oss/spark-scala-2.13/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala:294: method with a single empty parameter list overrides method without any parameter list
[error] [warn]   override def hasNext(): Boolean = okNext
```

More specifically, what this PR fixes are

* Methods which has an empty parameter list and overrides an method which has no parameter list.
```
override def hasNext(): Boolean = okNext
```

* Methods which has no parameter list and overrides an method which has an empty parameter list.
```
      override def next: (Int, Double) = {
```

* Infix operator expression that the operator wraps.
```
    3L * math.min(k, numFeatures) * math.min(k, numFeatures)
    3L * math.min(k, numFeatures) * math.min(k, numFeatures) +
    + math.max(math.max(k, numFeatures), 4L * math.min(k, numFeatures)
      math.max(math.max(k, numFeatures), 4L * math.min(k, numFeatures) *
    * math.min(k, numFeatures) + 4L * math.min(k, numFeatures))
```

### Why are the changes needed?

For building Spark with sbt and Scala 2.13.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

After this change and #29742 applied, compile passed with the following command.
```
build/sbt -Pscala-2.13  -Phive -Phive-thriftserver -Pyarn -Pkubernetes compile test:compile
```

Closes #29745 from sarutak/fix-code-for-sbt-and-spark-2.13.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../org/apache/spark/streaming/kafka010/KafkaRDD.scala      | 4 ++--
 .../src/main/scala/org/apache/spark/ml/linalg/Vectors.scala | 2 +-
 .../src/main/scala/org/apache/spark/mllib/feature/PCA.scala | 6 +++---
 .../main/scala/org/apache/spark/mllib/linalg/Vectors.scala  | 2 +-
 .../org/apache/spark/sql/execution/command/commands.scala   | 4 ++--
 .../spark/sql/execution/datasources/v2/V2CommandExec.scala  | 2 +-
 .../execution/datasources/v2/jdbc/JDBCTableCatalog.scala    | 2 +-
 .../streaming/state/SymmetricHashJoinStateManager.scala     | 4 ++--
 .../spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala   | 2 +-
 .../spark/streaming/receiver/ReceivedBlockHandler.scala     | 2 +-
 10 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala
index bd2e7e11b7383..46164e9b63365 100644
--- a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala
+++ b/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala
@@ -248,7 +248,7 @@ private class KafkaRDDIterator[K, V](
     }
   }
 
-  override def hasNext(): Boolean = requestOffset < part.untilOffset
+  override def hasNext: Boolean = requestOffset < part.untilOffset
 
   override def next(): ConsumerRecord[K, V] = {
     if (!hasNext) {
@@ -291,7 +291,7 @@ private class CompactedKafkaRDDIterator[K, V](
 
   private var okNext: Boolean = true
 
-  override def hasNext(): Boolean = okNext
+  override def hasNext: Boolean = okNext
 
   override def next(): ConsumerRecord[K, V] = {
     if (!hasNext) {
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
index 83973bcffef05..2c35ede8118c4 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
@@ -786,7 +786,7 @@ class SparseVector @Since("2.0.0") (
 
       override def hasNext: Boolean = i < localSize
 
-      override def next: (Int, Double) = {
+      override def next(): (Int, Double) = {
         val v = if (i == k) {
           j += 1
           k = if (j < localNumActives) localIndices(j) else -1
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala
index 356ed48e99387..c165d4810c934 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala
@@ -124,9 +124,9 @@ private[feature] object PCAUtil {
   // 6e541be066d547a097f5089165cd7c38c3ca276d/math/src/main/scala/breeze/linalg/
   // functions/svd.scala#L87
   def memoryCost(k: Int, numFeatures: Int): Long = {
-    3L * math.min(k, numFeatures) * math.min(k, numFeatures)
-    + math.max(math.max(k, numFeatures), 4L * math.min(k, numFeatures)
-    * math.min(k, numFeatures) + 4L * math.min(k, numFeatures))
+    3L * math.min(k, numFeatures) * math.min(k, numFeatures) +
+      math.max(math.max(k, numFeatures), 4L * math.min(k, numFeatures) *
+      math.min(k, numFeatures) + 4L * math.min(k, numFeatures))
   }
 
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
index c23088de85b8a..2fe415f14032f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@@ -984,7 +984,7 @@ class SparseVector @Since("1.0.0") (
 
       override def hasNext: Boolean = i < localSize
 
-      override def next: (Int, Double) = {
+      override def next(): (Int, Double) = {
         val v = if (i == k) {
           j += 1
           k = if (j < localNumActives) localIndices(j) else -1
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
index 18fd2a5ac2330..70f20cd8b7c06 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
@@ -78,7 +78,7 @@ case class ExecutedCommandExec(cmd: RunnableCommand) extends LeafExecNode {
 
   override def executeCollect(): Array[InternalRow] = sideEffectResult.toArray
 
-  override def executeToIterator: Iterator[InternalRow] = sideEffectResult.toIterator
+  override def executeToIterator(): Iterator[InternalRow] = sideEffectResult.toIterator
 
   override def executeTake(limit: Int): Array[InternalRow] = sideEffectResult.take(limit).toArray
 
@@ -119,7 +119,7 @@ case class DataWritingCommandExec(cmd: DataWritingCommand, child: SparkPlan)
 
   override def executeCollect(): Array[InternalRow] = sideEffectResult.toArray
 
-  override def executeToIterator: Iterator[InternalRow] = sideEffectResult.toIterator
+  override def executeToIterator(): Iterator[InternalRow] = sideEffectResult.toIterator
 
   override def executeTake(limit: Int): Array[InternalRow] = sideEffectResult.take(limit).toArray
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala
index 4be4a6b30edcd..7738f26dfd266 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala
@@ -44,7 +44,7 @@ abstract class V2CommandExec extends SparkPlan {
    */
   override def executeCollect(): Array[InternalRow] = result.toArray
 
-  override def executeToIterator: Iterator[InternalRow] = result.toIterator
+  override def executeToIterator(): Iterator[InternalRow] = result.toIterator
 
   override def executeTake(limit: Int): Array[InternalRow] = result.take(limit).toArray
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
index 0138014a8e21e..41f650d1f2ff5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
@@ -61,7 +61,7 @@ class JDBCTableCatalog extends TableCatalog with Logging {
         .getTables(null, schemaPattern, "%", Array("TABLE"));
       new Iterator[Identifier] {
         def hasNext = rs.next()
-        def next = Identifier.of(namespace, rs.getString("TABLE_NAME"))
+        def next() = Identifier.of(namespace, rs.getString("TABLE_NAME"))
       }.toArray
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala
index 1a5b50dcc7901..2aa2a18b9eaf4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala
@@ -171,7 +171,7 @@ class SymmetricHashJoinStateManager(
         return null
       }
 
-      override def close: Unit = {}
+      override def close(): Unit = {}
     }
   }
 
@@ -280,7 +280,7 @@ class SymmetricHashJoinStateManager(
         return reusedRet.withNew(currentKey, currentValue.value, currentValue.matched)
       }
 
-      override def close: Unit = {}
+      override def close(): Unit = {}
     }
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala b/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala
index f677c492d561f..6494e512713f8 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala
@@ -96,7 +96,7 @@ class WriteAheadLogBackedBlockRDD[T: ClassTag](
   @transient private val hadoopConfig = sc.hadoopConfiguration
   private val broadcastedHadoopConf = new SerializableConfiguration(hadoopConfig)
 
-  override def isValid(): Boolean = true
+  override def isValid: Boolean = true
 
   override def getPartitions: Array[Partition] = {
     assertValid()
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
index 12ed8015117e5..7a561ecb4990f 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
@@ -234,7 +234,7 @@ private[streaming] class CountingIterator[T](iterator: Iterator[T]) extends Iter
 
    private def isFullyConsumed: Boolean = !iterator.hasNext
 
-   def hasNext(): Boolean = iterator.hasNext
+   def hasNext: Boolean = iterator.hasNext
 
    def count(): Option[Long] = {
      if (isFullyConsumed) Some(_count) else None

From 978f531010adfc08110897450d49cb569e4805ab Mon Sep 17 00:00:00 2001
From: Cheng Su <chengsu@fb.com>
Date: Mon, 14 Sep 2020 08:49:51 +0000
Subject: [PATCH 0020/1009] [SPARK-32854][SS] Minor code and doc improvement
 for stream-stream join

### What changes were proposed in this pull request?

Several minor code and documentation improvement for stream-stream join. Specifically:

* Remove extending from `SparkPlan`, as extending from `BinaryExecNode` is enough.
* Return `left/right.outputPartitioning` for `Left/RightOuter` in `outputPartitioning`, as the `PartitioningCollection` wrapper is unnecessary (similar to batch joins `ShuffledHashJoinExec`, `SortMergeJoinExec`).
*  Avoid per-row check for join type (https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala#L486-L492), by creating the method before the loop of reading rows (`generateFilteredJoinedRow` in `storeAndJoinWithOtherSide`). Similar optimization (i.e. create auxiliary method/variable per different join type before the iterator of input rows) has been done in batch join world (`SortMergeJoinExec`, `ShuffledHashJoinExec`).
* Minor fix for comment/indentation for better readability.

### Why are the changes needed?

Minor optimization to avoid per-row unnecessary work (this probably can be optimized away by compiler, but we can do a better join to avoid it at the first place). And other comment/indentation fix to have better code readability for future developers.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests in `StreamingJoinSuite.scala` as no new logic is introduced.

Closes #29724 from c21/streaming.

Authored-by: Cheng Su <chengsu@fb.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../StreamingSymmetricHashJoinExec.scala      | 56 ++++++++++---------
 1 file changed, 30 insertions(+), 26 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
index 3d071df493cec..a52f5f4ac94ae 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
@@ -56,8 +56,8 @@ import org.apache.spark.util.{CompletionIterator, SerializableConfiguration}
  * - Apply the optional condition to filter the joined rows as the final output.
  *
  * If a timestamp column with event time watermark is present in the join keys or in the input
- * data, then the it uses the watermark figure out which rows in the buffer will not join with
- * and the new data, and therefore can be discarded. Depending on the provided query conditions, we
+ * data, then it uses the watermark to figure out which rows in the buffer will not join with
+ * the new data, and therefore can be discarded. Depending on the provided query conditions, we
  * can define thresholds on both state key (i.e. joining keys) and state value (i.e. input rows).
  * There are three kinds of queries possible regarding this as explained below.
  * Assume that watermark has been defined on both `leftTime` and `rightTime` columns used below.
@@ -134,7 +134,7 @@ case class StreamingSymmetricHashJoinExec(
     stateWatermarkPredicates: JoinStateWatermarkPredicates,
     stateFormatVersion: Int,
     left: SparkPlan,
-    right: SparkPlan) extends SparkPlan with BinaryExecNode with StateStoreWriter {
+    right: SparkPlan) extends BinaryExecNode with StateStoreWriter {
 
   def this(
       leftKeys: Seq[Expression],
@@ -157,14 +157,16 @@ case class StreamingSymmetricHashJoinExec(
       " the checkpoint and rerun the query. See SPARK-26154 for more details.")
   }
 
+  private lazy val errorMessageForJoinType =
+    s"${getClass.getSimpleName} should not take $joinType as the JoinType"
+
   private def throwBadJoinTypeException(): Nothing = {
-    throw new IllegalArgumentException(
-      s"${getClass.getSimpleName} should not take $joinType as the JoinType")
+    throw new IllegalArgumentException(errorMessageForJoinType)
   }
 
   require(
     joinType == Inner || joinType == LeftOuter || joinType == RightOuter,
-    s"${getClass.getSimpleName} should not take $joinType as the JoinType")
+    errorMessageForJoinType)
   require(leftKeys.map(_.dataType) == rightKeys.map(_.dataType))
 
   private val storeConf = new StateStoreConf(sqlContext.conf)
@@ -189,11 +191,9 @@ case class StreamingSymmetricHashJoinExec(
   override def outputPartitioning: Partitioning = joinType match {
     case _: InnerLike =>
       PartitioningCollection(Seq(left.outputPartitioning, right.outputPartitioning))
-    case LeftOuter => PartitioningCollection(Seq(left.outputPartitioning))
-    case RightOuter => PartitioningCollection(Seq(right.outputPartitioning))
-    case x =>
-      throw new IllegalArgumentException(
-        s"${getClass.getSimpleName} should not take $x as the JoinType")
+    case LeftOuter => left.outputPartitioning
+    case RightOuter => right.outputPartitioning
+    case _ => throwBadJoinTypeException()
   }
 
   override def shouldRunAnotherBatch(newMetadata: OffsetSeqMetadata): Boolean = {
@@ -246,13 +246,14 @@ case class StreamingSymmetricHashJoinExec(
 
     //  Join one side input using the other side's buffered/state rows. Here is how it is done.
     //
-    //  - `leftJoiner.joinWith(rightJoiner)` generates all rows from matching new left input with
-    //    stored right input, and also stores all the left input
+    //  - `leftSideJoiner.storeAndJoinWithOtherSide(rightSideJoiner)` generates all rows from
+    //    matching new left input with stored right input, and also stores all the left input
     //
-    //  - `rightJoiner.joinWith(leftJoiner)` generates all rows from matching new right input with
-    //    stored left input, and also stores all the right input. It also generates all rows from
-    //    matching new left input with new right input, since the new left input has become stored
-    //    by that point. This tiny asymmetry is necessary to avoid duplication.
+    //  - `rightSideJoiner.storeAndJoinWithOtherSide(leftSideJoiner)` generates all rows from
+    //    matching new right input with stored left input, and also stores all the right input.
+    //    It also generates all rows from matching new left input with new right input, since
+    //    the new left input has become stored by that point. This tiny asymmetry is necessary
+    //    to avoid duplication.
     val leftOutputIter = leftSideJoiner.storeAndJoinWithOtherSide(rightSideJoiner) {
       (input: InternalRow, matched: InternalRow) => joinedRow.withLeft(input).withRight(matched)
     }
@@ -459,8 +460,9 @@ case class StreamingSymmetricHashJoinExec(
      */
     def storeAndJoinWithOtherSide(
         otherSideJoiner: OneSideHashJoiner)(
-        generateJoinedRow: (InternalRow, InternalRow) => JoinedRow):
-    Iterator[InternalRow] = {
+        generateJoinedRow: (InternalRow, InternalRow) => JoinedRow)
+      : Iterator[InternalRow] = {
+
       val watermarkAttribute = inputAttributes.find(_.metadata.contains(delayKey))
       val nonLateRows =
         WatermarkSupport.watermarkExpression(watermarkAttribute, eventTimeWatermark) match {
@@ -471,6 +473,14 @@ case class StreamingSymmetricHashJoinExec(
             inputIter
         }
 
+      val generateFilteredJoinedRow: InternalRow => Iterator[InternalRow] = joinSide match {
+        case LeftSide if joinType == LeftOuter =>
+          (row: InternalRow) => Iterator(generateJoinedRow(row, nullRight))
+        case RightSide if joinType == RightOuter =>
+          (row: InternalRow) => Iterator(generateJoinedRow(row, nullLeft))
+        case _ => (_: InternalRow) => Iterator.empty
+      }
+
       nonLateRows.flatMap { row =>
         val thisRow = row.asInstanceOf[UnsafeRow]
         // If this row fails the pre join filter, that means it can never satisfy the full join
@@ -483,13 +493,7 @@ case class StreamingSymmetricHashJoinExec(
             .getJoinedRows(key, thatRow => generateJoinedRow(thisRow, thatRow), postJoinFilter)
           new AddingProcessedRowToStateCompletionIterator(key, thisRow, outputIter)
         } else {
-          joinSide match {
-            case LeftSide if joinType == LeftOuter =>
-              Iterator(generateJoinedRow(thisRow, nullRight))
-            case RightSide if joinType == RightOuter =>
-              Iterator(generateJoinedRow(thisRow, nullLeft))
-            case _ => Iterator()
-          }
+          generateFilteredJoinedRow(thisRow)
         }
       }
     }

From 5e825482d70e13a8cb16f1fbdac8139710482d17 Mon Sep 17 00:00:00 2001
From: Yuanjian Li <yuanjian.li@databricks.com>
Date: Mon, 14 Sep 2020 09:20:24 +0000
Subject: [PATCH 0021/1009] [SPARK-32844][SQL] Make `DataFrameReader.table`
 take the specified options for datasource v1

### What changes were proposed in this pull request?
Make `DataFrameReader.table` take the specified options for datasource v1.

### Why are the changes needed?
Keep the same behavior of v1/v2 datasource, the v2 fix has been done in SPARK-32592.

### Does this PR introduce _any_ user-facing change?
Yes. The DataFrameReader.table will take the specified options. Also, if there are the same key and value exists in specified options and table properties, an exception will be thrown.

### How was this patch tested?
New UT added.

Closes #29712 from xuanyuanking/SPARK-32844.

Authored-by: Yuanjian Li <yuanjian.li@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  2 +-
 .../sql/catalyst/catalog/SessionCatalog.scala |  7 ++--
 .../sql/catalyst/catalog/interface.scala      |  5 ++-
 .../apache/spark/sql/internal/SQLConf.scala   | 12 +++++++
 .../datasources/DataSourceStrategy.scala      | 21 +++++++-----
 .../datasources/DataSourceUtils.scala         | 34 +++++++++++++++++++
 .../sql/test/DataFrameReaderWriterSuite.scala | 22 +++++++++++-
 7 files changed, 89 insertions(+), 14 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 4516c71bbc514..7d591eeea2b79 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1025,7 +1025,7 @@ class Analyzer(
         case SessionCatalogAndIdentifier(catalog, ident) =>
           lazy val loaded = CatalogV2Util.loadTable(catalog, ident).map {
             case v1Table: V1Table =>
-              v1SessionCatalog.getRelation(v1Table.v1Table)
+              v1SessionCatalog.getRelation(v1Table.v1Table, options)
             case table =>
               SubqueryAlias(
                 catalog.name +: ident.asMultipartIdentifier,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 6fba3156c3919..e9a02c15f7362 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -43,6 +43,7 @@ import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.Utils
 
 object SessionCatalog {
@@ -783,7 +784,9 @@ class SessionCatalog(
     }
   }
 
-  def getRelation(metadata: CatalogTable): LogicalPlan = {
+  def getRelation(
+      metadata: CatalogTable,
+      options: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty()): LogicalPlan = {
     val name = metadata.identifier
     val db = formatDatabaseName(name.database.getOrElse(currentDb))
     val table = formatTableName(name.table)
@@ -801,7 +804,7 @@ class SessionCatalog(
         child = parser.parsePlan(viewText))
       SubqueryAlias(multiParts, child)
     } else {
-      SubqueryAlias(multiParts, UnresolvedCatalogRelation(metadata))
+      SubqueryAlias(multiParts, UnresolvedCatalogRelation(metadata, options))
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 4e63ee7428d72..be09e761272ce 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -36,6 +36,7 @@ import org.apache.spark.sql.catalyst.util.quoteIdentifier
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 
 /**
@@ -639,7 +640,9 @@ object CatalogTypes {
  * A placeholder for a table relation, which will be replaced by concrete relation like
  * `LogicalRelation` or `HiveTableRelation`, during analysis.
  */
-case class UnresolvedCatalogRelation(tableMeta: CatalogTable) extends LeafNode {
+case class UnresolvedCatalogRelation(
+    tableMeta: CatalogTable,
+    options: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty()) extends LeafNode {
   assert(tableMeta.identifier.database.isDefined)
   override lazy val resolved: Boolean = false
   override def output: Seq[Attribute] = Nil
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index dae715ae827e2..2f2b645360ed6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2732,6 +2732,18 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val LEGACY_EXTRA_OPTIONS_BEHAVIOR =
+    buildConf("spark.sql.legacy.extraOptionsBehavior.enabled")
+      .internal()
+      .doc("When true, the extra options will be ignored for DataFrameReader.table(). If set it " +
+        "to false, which is the default, Spark will check if the extra options have the same " +
+        "key, but the value is different with the table serde properties. If the check passes, " +
+        "the extra options will be merged with the serde properties as the scan options. " +
+        "Otherwise, an exception will be thrown.")
+      .version("3.1.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val TRUNCATE_TRASH_ENABLED =
     buildConf("spark.sql.truncate.trash.enabled")
       .doc("This configuration decides when truncating table, whether data files will be moved " +
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 3ccff6d89babd..1f8cfee308033 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.datasources
 
 import java.util.Locale
 
+import scala.collection.JavaConverters._
 import scala.collection.mutable
 
 import org.apache.hadoop.fs.Path
@@ -42,6 +43,7 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.unsafe.types.UTF8String
 
 /**
@@ -237,11 +239,12 @@ case class DataSourceAnalysis(conf: SQLConf) extends Rule[LogicalPlan] with Cast
  * data source.
  */
 class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan] {
-  private def readDataSourceTable(table: CatalogTable): LogicalPlan = {
+  private def readDataSourceTable(
+      table: CatalogTable, extraOptions: CaseInsensitiveStringMap): LogicalPlan = {
     val qualifiedTableName = QualifiedTableName(table.database, table.identifier.table)
     val catalog = sparkSession.sessionState.catalog
+    val dsOptions = DataSourceUtils.generateDatasourceOptions(extraOptions, table)
     catalog.getCachedPlan(qualifiedTableName, () => {
-      val pathOption = table.storage.locationUri.map("path" -> CatalogUtils.URIToString(_))
       val dataSource =
         DataSource(
           sparkSession,
@@ -251,24 +254,24 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan]
           partitionColumns = table.partitionColumnNames,
           bucketSpec = table.bucketSpec,
           className = table.provider.get,
-          options = table.storage.properties ++ pathOption,
+          options = dsOptions,
           catalogTable = Some(table))
       LogicalRelation(dataSource.resolveRelation(checkFilesExist = false), table)
     })
   }
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-    case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta), _, _, _, _)
+    case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta, options), _, _, _, _)
         if DDLUtils.isDatasourceTable(tableMeta) =>
-      i.copy(table = readDataSourceTable(tableMeta))
+      i.copy(table = readDataSourceTable(tableMeta, options))
 
-    case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta), _, _, _, _) =>
+    case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta, _), _, _, _, _) =>
       i.copy(table = DDLUtils.readHiveTable(tableMeta))
 
-    case UnresolvedCatalogRelation(tableMeta) if DDLUtils.isDatasourceTable(tableMeta) =>
-      readDataSourceTable(tableMeta)
+    case UnresolvedCatalogRelation(tableMeta, options) if DDLUtils.isDatasourceTable(tableMeta) =>
+      readDataSourceTable(tableMeta, options)
 
-    case UnresolvedCatalogRelation(tableMeta) =>
+    case UnresolvedCatalogRelation(tableMeta, _) =>
       DDLUtils.readHiveTable(tableMeta)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
index abb74d8d09ec6..b4308a872bb39 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.datasources
 
 import java.util.Locale
 
+import scala.collection.JavaConverters._
+
 import org.apache.hadoop.fs.Path
 import org.json4s.NoTypeHints
 import org.json4s.jackson.Serialization
@@ -26,11 +28,13 @@ import org.json4s.jackson.Serialization
 import org.apache.spark.SparkUpgradeException
 import org.apache.spark.sql.{SPARK_LEGACY_DATETIME, SPARK_VERSION_METADATA_KEY}
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogUtils}
 import org.apache.spark.sql.catalyst.util.RebaseDateTime
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
 import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.Utils
 
 
@@ -190,4 +194,34 @@ object DataSourceUtils {
     case LegacyBehaviorPolicy.LEGACY => RebaseDateTime.rebaseGregorianToJulianMicros
     case LegacyBehaviorPolicy.CORRECTED => identity[Long]
   }
+
+  def generateDatasourceOptions(
+      extraOptions: CaseInsensitiveStringMap, table: CatalogTable): Map[String, String] = {
+    val pathOption = table.storage.locationUri.map("path" -> CatalogUtils.URIToString(_))
+    val options = table.storage.properties ++ pathOption
+    if (!SQLConf.get.getConf(SQLConf.LEGACY_EXTRA_OPTIONS_BEHAVIOR)) {
+      // Check the same key with different values
+      table.storage.properties.foreach { case (k, v) =>
+        if (extraOptions.containsKey(k) && extraOptions.get(k) != v) {
+          throw new AnalysisException(
+            s"Fail to resolve data source for the table ${table.identifier} since the table " +
+              s"serde property has the duplicated key $k with extra options specified for this " +
+              "scan operation. To fix this, you can rollback to the legacy behavior of ignoring " +
+              "the extra options by setting the config " +
+              s"${SQLConf.LEGACY_EXTRA_OPTIONS_BEHAVIOR.key} to `false`, or address the " +
+              s"conflicts of the same config.")
+        }
+      }
+      // To keep the original key from table properties, here we filter all case insensitive
+      // duplicate keys out from extra options.
+      val lowerCasedDuplicatedKeys =
+        table.storage.properties.keySet.map(_.toLowerCase(Locale.ROOT))
+          .intersect(extraOptions.keySet.asScala)
+      extraOptions.asCaseSensitiveMap().asScala.filterNot {
+        case (k, _) => lowerCasedDuplicatedKeys.contains(k.toLowerCase(Locale.ROOT))
+      }.toMap ++ options
+    } else {
+      options
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
index eaca63c74c875..4e61dba4955af 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
@@ -40,7 +40,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression}
 import org.apache.spark.sql.execution.QueryExecution
-import org.apache.spark.sql.execution.datasources.DataSourceUtils
+import org.apache.spark.sql.execution.datasources.{DataSourceUtils, HadoopFsRelation, LogicalRelation}
 import org.apache.spark.sql.execution.datasources.noop.NoopDataSource
 import org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase
 import org.apache.spark.sql.internal.SQLConf
@@ -1199,4 +1199,24 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
     dfw.save("1")
     dfw.save("2")
   }
+
+  test("SPARK-32844: DataFrameReader.table take the specified options for V1 relation") {
+    withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "parquet") {
+      withTable("t") {
+        sql("CREATE TABLE t(i int, d double) USING parquet OPTIONS ('p1'='v1', 'p2'='v2')")
+
+        val msg = intercept[AnalysisException] {
+          spark.read.option("P1", "v3").table("t").count()
+        }.getMessage
+        assert(msg.contains("duplicated key"))
+
+        val df = spark.read.option("P2", "v2").option("p3", "v3").table("t")
+        val options = df.queryExecution.analyzed.collectFirst {
+          case r: LogicalRelation => r.relation.asInstanceOf[HadoopFsRelation].options
+        }.get
+        assert(options("p2") == "v2")
+        assert(options("p3") == "v3")
+      }
+    }
+  }
 }

From 7a17158a4d7fd6d22f9550eceab42d8af308aeb4 Mon Sep 17 00:00:00 2001
From: "tanel.kiis@gmail.com" <tanel.kiis@gmail.com>
Date: Mon, 14 Sep 2020 22:52:33 +0900
Subject: [PATCH 0022/1009] [SPARK-32868][SQL] Add more order irrelevant
 aggregates to EliminateSorts

### What changes were proposed in this pull request?

Mark `BitAggregate` as order irrelevant in `EliminateSorts`.

### Why are the changes needed?

Performance improvements in some queries

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Generalized an existing UT

Closes #29740 from tanelk/SPARK-32868.

Authored-by: tanel.kiis@gmail.com <tanel.kiis@gmail.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../spark/sql/catalyst/dsl/package.scala      |  6 +++++
 .../sql/catalyst/optimizer/Optimizer.scala    |  2 +-
 .../optimizer/EliminateSortsSuite.scala       | 26 ++++++++++++++-----
 3 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index 8b3243067a16c..b61c4b8d065f2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -197,6 +197,12 @@ package object dsl {
       Max(e).toAggregateExpression(isDistinct = false, filter = filter)
     def maxDistinct(e: Expression, filter: Option[Expression] = None): Expression =
       Max(e).toAggregateExpression(isDistinct = true, filter = filter)
+    def bitAnd(e: Expression, filter: Option[Expression] = None): Expression =
+      BitAndAgg(e).toAggregateExpression(isDistinct = false, filter = filter)
+    def bitOr(e: Expression, filter: Option[Expression] = None): Expression =
+      BitOrAgg(e).toAggregateExpression(isDistinct = false, filter = filter)
+    def bitXor(e: Expression, filter: Option[Expression] = None): Expression =
+      BitXorAgg(e).toAggregateExpression(isDistinct = false, filter = filter)
     def upper(e: Expression): Expression = Upper(e)
     def lower(e: Expression): Expression = Lower(e)
     def coalesce(args: Expression*): Expression = Coalesce(args)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 9216ab1631e7b..b7791cd442694 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -1044,7 +1044,7 @@ object EliminateSorts extends Rule[LogicalPlan] {
 
   private def isOrderIrrelevantAggs(aggs: Seq[NamedExpression]): Boolean = {
     def isOrderIrrelevantAggFunction(func: AggregateFunction): Boolean = func match {
-      case _: Min | _: Max | _: Count => true
+      case _: Min | _: Max | _: Count | _: BitAggregate => true
       // Arithmetic operations for floating-point values are order-sensitive
       // (they are not associative).
       case _: Sum | _: Average | _: CentralMomentAgg =>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
index e2b599a7c090c..265f0a9936759 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
@@ -197,13 +197,25 @@ class EliminateSortsSuite extends PlanTest {
     comparePlans(optimizedThrice, correctAnswerThrice)
   }
 
-  test("remove orderBy in groupBy clause with count aggs") {
-    val projectPlan = testRelation.select('a, 'b)
-    val unnecessaryOrderByPlan = projectPlan.orderBy('a.asc, 'b.desc)
-    val groupByPlan = unnecessaryOrderByPlan.groupBy('a)(count(1))
-    val optimized = Optimize.execute(groupByPlan.analyze)
-    val correctAnswer = projectPlan.groupBy('a)(count(1)).analyze
-    comparePlans(optimized, correctAnswer)
+  test("remove orderBy in groupBy clause with order irrelevant aggs") {
+    Seq(
+      (e : Expression) => min(e),
+      (e : Expression) => minDistinct(e),
+      (e : Expression) => max(e),
+      (e : Expression) => maxDistinct(e),
+      (e : Expression) => count(e),
+      (e : Expression) => countDistinct(e),
+      (e : Expression) => bitAnd(e),
+      (e : Expression) => bitOr(e),
+      (e : Expression) => bitXor(e)
+    ).foreach(agg => {
+      val projectPlan = testRelation.select('a, 'b)
+      val unnecessaryOrderByPlan = projectPlan.orderBy('a.asc, 'b.desc)
+      val groupByPlan = unnecessaryOrderByPlan.groupBy('a)(agg('b))
+      val optimized = Optimize.execute(groupByPlan.analyze)
+      val correctAnswer = projectPlan.groupBy('a)(agg('b)).analyze
+      comparePlans(optimized, correctAnswer)
+    })
   }
 
   test("remove orderBy in groupBy clause with sum aggs") {

From 0696f0467270969f40e9baa829533bdb55f4002a Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Mon, 14 Sep 2020 13:54:21 -0700
Subject: [PATCH 0023/1009] [SPARK-32876][SQL] Change default fallback versions
 to 3.0.1 and 2.4.7 in HiveExternalCatalogVersionsSuite

### What changes were proposed in this pull request?

The Jenkins job fails to get the versions. This was fixed by adding temporary fallbacks at https://github.com/apache/spark/pull/28536.
This still doesn't work without the temporary fallbacks. See https://github.com/apache/spark/pull/29694

This PR adds new fallbacks since 2.3 is EOL and Spark 3.0.1 and 2.4.7 are released.

### Why are the changes needed?

To test correctly in Jenkins.

### Does this PR introduce _any_ user-facing change?

No, dev-only

### How was this patch tested?

Jenkins and GitHub Actions builds should test.

Closes #29748 from HyukjinKwon/SPARK-32876.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/hive/HiveExternalCatalogVersionsSuite.scala       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index aa96fa035c4f0..cbfdb7fac88d8 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -243,7 +243,7 @@ object PROCESS_TABLES extends QueryTest with SQLTestUtils {
         .filter(_ < org.apache.spark.SPARK_VERSION)
     } catch {
       // do not throw exception during object initialization.
-      case NonFatal(_) => Seq("2.3.4", "2.4.5") // A temporary fallback to use a specific version
+      case NonFatal(_) => Seq("3.0.1", "2.4.7") // A temporary fallback to use a specific version
     }
   }
 

From 72550c3be7120fcf2844d6914e883f1bec30d93f Mon Sep 17 00:00:00 2001
From: Ankur Dave <ankurdave@gmail.com>
Date: Mon, 14 Sep 2020 13:58:15 -0700
Subject: [PATCH 0024/1009] [SPARK-32872][CORE] Prevent BytesToBytesMap at
 MAX_CAPACITY from exceeding growth threshold

### What changes were proposed in this pull request?

When BytesToBytesMap is at `MAX_CAPACITY` and reaches its growth threshold, `numKeys >= growthThreshold` is true but `longArray.size() / 2 < MAX_CAPACITY` is false. This correctly prevents the map from growing, but `canGrowArray` incorrectly remains true. Therefore the map keeps accepting new keys and exceeds its growth threshold. If we attempt to spill the map in this state, the UnsafeKVExternalSorter will not be able to reuse the long array for sorting. By this point the task has typically consumed all available memory, so the allocation of the new pointer array is likely to fail.

This PR fixes the issue by setting `canGrowArray` to false in this case. This prevents the map from accepting new elements when it cannot grow to accommodate them.

### Why are the changes needed?

Without this change, hash aggregations will fail when the number of groups per task is greater than `MAX_CAPACITY / 2 = 2^28` (approximately 268 million), and when the grouping aggregation is the only memory-consuming operator in its stage.

For example, the final aggregation in `SELECT COUNT(DISTINCT id) FROM tbl` fails when `tbl` contains 1 billion distinct values and when `spark.sql.shuffle.partitions=1`.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Reproducing this issue requires building a very large BytesToBytesMap. Because this is infeasible to do in a unit test, this PR was tested manually by adding the following test to AbstractBytesToBytesMapSuite. Before this PR, the test fails in 8.5 minutes. With this PR, the test passes in 1.5 minutes.

```java
public abstract class AbstractBytesToBytesMapSuite {
  // ...
  Test
  public void respectGrowthThresholdAtMaxCapacity() {
    TestMemoryManager memoryManager2 =
        new TestMemoryManager(
            new SparkConf()
            .set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), true)
            .set(package$.MODULE$.MEMORY_OFFHEAP_SIZE(), 25600 * 1024 * 1024L)
            .set(package$.MODULE$.SHUFFLE_SPILL_COMPRESS(), false)
            .set(package$.MODULE$.SHUFFLE_COMPRESS(), false));
    TaskMemoryManager taskMemoryManager2 = new TaskMemoryManager(memoryManager2, 0);
    final long pageSizeBytes = 8000000 + 8; // 8 bytes for end-of-page marker
    final BytesToBytesMap map = new BytesToBytesMap(taskMemoryManager2, 1024, pageSizeBytes);

    try {
      // Insert keys into the map until it stops accepting new keys.
      for (long i = 0; i < BytesToBytesMap.MAX_CAPACITY; i++) {
        if (i % (1024 * 1024) == 0) System.out.println("Inserting element " + i);
        final long[] value = new long[]{i};
        BytesToBytesMap.Location loc = map.lookup(value, Platform.LONG_ARRAY_OFFSET, 8);
        Assert.assertFalse(loc.isDefined());
        boolean success =
            loc.append(value, Platform.LONG_ARRAY_OFFSET, 8, value, Platform.LONG_ARRAY_OFFSET, 8);
        if (!success) break;
      }

      // The map should grow to its max capacity.
      long capacity = map.getArray().size() / 2;
      Assert.assertTrue(capacity == BytesToBytesMap.MAX_CAPACITY);

      // The map should stop accepting new keys once it has reached its growth
      // threshold, which is half the max capacity.
      Assert.assertTrue(map.numKeys() == BytesToBytesMap.MAX_CAPACITY / 2);

      map.free();
    } finally {
      map.free();
    }
  }
}
```

Closes #29744 from ankurdave/SPARK-32872.

Authored-by: Ankur Dave <ankurdave@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/unsafe/map/BytesToBytesMap.java     | 21 +++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
index 8eea9db393aff..d7940fc08e1a5 100644
--- a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
+++ b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
@@ -808,12 +808,21 @@ public boolean append(Object kbase, long koff, int klen, Object vbase, long voff
         longArray.set(pos * 2 + 1, keyHashcode);
         isDefined = true;
 
-        // We use two array entries per key, so the array size is twice the capacity.
-        // We should compare the current capacity of the array, instead of its size.
-        if (numKeys >= growthThreshold && longArray.size() / 2 < MAX_CAPACITY) {
-          try {
-            growAndRehash();
-          } catch (SparkOutOfMemoryError oom) {
+        // If the map has reached its growth threshold, try to grow it.
+        if (numKeys >= growthThreshold) {
+          // We use two array entries per key, so the array size is twice the capacity.
+          // We should compare the current capacity of the array, instead of its size.
+          if (longArray.size() / 2 < MAX_CAPACITY) {
+            try {
+              growAndRehash();
+            } catch (SparkOutOfMemoryError oom) {
+              canGrowArray = false;
+            }
+          } else {
+            // The map is already at MAX_CAPACITY and cannot grow. Instead, we prevent it from
+            // accepting any more new elements to make sure we don't exceed the load factor. If we
+            // need to spill later, this allows UnsafeKVExternalSorter to reuse the array for
+            // sorting.
             canGrowArray = false;
           }
         }

From d58a4a310aecb9fa1bee1be0f5cb02b3be078667 Mon Sep 17 00:00:00 2001
From: William Hyun <williamhyun3@gmail.com>
Date: Mon, 14 Sep 2020 16:03:19 -0700
Subject: [PATCH 0025/1009] [SPARK-32882][K8S] Remove python2 installation in
 K8s python image

### What changes were proposed in this pull request?
This PR aims to remove python2 installation in K8s python image because spark 3.1 does not support python2.

### Why are the changes needed?

This will save disk space.

**BEFORE**
```
kubespark/spark-py ... 917MB
```

**AFTER**
```
kubespark/spark-py ... 823MB
```

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?

Pass the Jenkins with the K8s IT.

Closes #29751 from williamhyun/remove_py2.

Authored-by: William Hyun <williamhyun3@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../src/main/dockerfiles/spark/bindings/python/Dockerfile | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile
index 8dfc5f7ff60c5..2f082f559ca5c 100644
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile
@@ -24,15 +24,9 @@ WORKDIR /
 USER 0
 
 RUN mkdir ${SPARK_HOME}/python
-# TODO: Investigate running both pip and pip3 via virtualenvs
 RUN apt-get update && \
-    apt install -y python python-pip && \
     apt install -y python3 python3-pip && \
-    # We remove ensurepip since it adds no functionality since pip is
-    # installed on the image and it just takes up 1.6MB on the image
-    rm -r /usr/lib/python*/ensurepip && \
-    pip install --upgrade pip setuptools && \
-    # You may install with python3 packages by using pip3.6
+    pip3 install --upgrade pip setuptools && \
     # Removed the .cache to save space
     rm -r /root/.cache && rm -rf /var/cache/apt/*
 

From 4fac6d501a5d97530edb712ff3450890ac10e413 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Tue, 15 Sep 2020 09:27:47 +0900
Subject: [PATCH 0026/1009] [SPARK-32871][BUILD] Append toMap to Map#filterKeys
 if the result of filter is concatenated with another Map for Scala 2.13

### What changes were proposed in this pull request?

This PR appends `toMap` to `Map` instances with `filterKeys` if such maps is to be concatenated with another maps.

### Why are the changes needed?

As of Scala 2.13, Map#filterKeys return a MapView, not the original Map type.
This can cause compile error.
```
/sql/DataFrameReader.scala:279: type mismatch;
[error]  found   : Iterable[(String, String)]
[error]  required: java.util.Map[String,String]
[error] Error occurred in an application involving default arguments.
[error]       val dsOptions = new CaseInsensitiveStringMap(finalOptions.asJava)
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Compile passed with the following command.
`build/mvn -Pscala-2.13 -Phive -Phive-thriftserver -Pyarn -Pkubernetes -DskipTests test-compile`

Closes #29742 from sarutak/fix-filterKeys-issue.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../src/main/scala/org/apache/spark/sql/DataFrameReader.scala | 4 ++--
 .../src/main/scala/org/apache/spark/sql/DataFrameWriter.scala | 4 ++--
 .../org/apache/spark/sql/streaming/DataStreamReader.scala     | 4 ++--
 .../org/apache/spark/sql/streaming/DataStreamWriter.scala     | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index ab18a3119c09f..b0d06e862ca7b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -274,8 +274,8 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
         extraOptions + ("paths" -> objectMapper.writeValueAsString(paths.toArray))
       }
 
-      val finalOptions =
-        sessionOptions.filterKeys(!optionsWithPath.contains(_)) ++ optionsWithPath.originalMap
+      val finalOptions = sessionOptions.filterKeys(!optionsWithPath.contains(_)).toMap ++
+        optionsWithPath.originalMap
       val dsOptions = new CaseInsensitiveStringMap(finalOptions.asJava)
       val (table, catalog, ident) = provider match {
         case _: SupportsCatalogOptions if userSpecifiedSchema.nonEmpty =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index bd1997bee53f7..6fc4dc5aed6e7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -321,8 +321,8 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
         extraOptions + ("path" -> path.get)
       }
 
-      val finalOptions =
-        sessionOptions.filterKeys(!optionsWithPath.contains(_)) ++ optionsWithPath.originalMap
+      val finalOptions = sessionOptions.filterKeys(!optionsWithPath.contains(_)).toMap ++
+        optionsWithPath.originalMap
       val dsOptions = new CaseInsensitiveStringMap(finalOptions.asJava)
 
       def getTable: Table = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index c22f917d3cf91..93a48946fbafc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -221,8 +221,8 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
       case provider: TableProvider if !provider.isInstanceOf[FileDataSourceV2] =>
         val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
           source = provider, conf = sparkSession.sessionState.conf)
-        val finalOptions =
-          sessionOptions.filterKeys(!optionsWithPath.contains(_)) ++ optionsWithPath.originalMap
+        val finalOptions = sessionOptions.filterKeys(!optionsWithPath.contains(_)).toMap ++
+            optionsWithPath.originalMap
         val dsOptions = new CaseInsensitiveStringMap(finalOptions.asJava)
         val table = DataSourceV2Utils.getTableFromProvider(provider, dsOptions, userSpecifiedSchema)
         import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index 682f3b98ec2e8..dda6dec9c4ebc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -365,8 +365,8 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
         val provider = cls.getConstructor().newInstance().asInstanceOf[TableProvider]
         val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
           source = provider, conf = df.sparkSession.sessionState.conf)
-        val finalOptions =
-          sessionOptions.filterKeys(!optionsWithPath.contains(_)) ++ optionsWithPath.originalMap
+        val finalOptions = sessionOptions.filterKeys(!optionsWithPath.contains(_)).toMap ++
+          optionsWithPath.originalMap
         val dsOptions = new CaseInsensitiveStringMap(finalOptions.asJava)
         val table = DataSourceV2Utils.getTableFromProvider(
           provider, dsOptions, userSpecifiedSchema = None)

From 7a9b066c66d29e946b4f384292021123beb6fe57 Mon Sep 17 00:00:00 2001
From: LantaoJin <jinlantao@gmail.com>
Date: Mon, 14 Sep 2020 18:24:52 -0700
Subject: [PATCH 0027/1009] [SPARK-32715][CORE] Fix memory leak when failed to
 store pieces of broadcast

### What changes were proposed in this pull request?
In TorrentBroadcast.scala
```scala
L133: if (!blockManager.putSingle(broadcastId, value, MEMORY_AND_DISK, tellMaster = false))
L137: TorrentBroadcast.blockifyObject(value, blockSize, SparkEnv.get.serializer, compressionCodec)
L147: if (!blockManager.putBytes(pieceId, bytes, MEMORY_AND_DISK_SER, tellMaster = true))
```
After the original value is saved successfully(TorrentBroadcast.scala: L133), but the following `blockifyObject()`(L137) or store piece(L147) steps are failed. There is no opportunity to release broadcast from memory.

This patch is to remove all pieces of the broadcast when failed to blockify or failed to store some pieces of a broadcast.

### Why are the changes needed?
We use Spark thrift-server as a long-running service. A bad query submitted a heavy BroadcastNestLoopJoin operation and made driver full GC. We killed the bad query but we found the driver's memory usage was still high and full GCs were still frequent. By investigating with GC dump and log, we found the broadcast may memory leak.

> 2020-08-19T18:54:02.824-0700: [Full GC (Allocation Failure)
2020-08-19T18:54:02.824-0700: [Class Histogram (before full gc):
116G->112G(170G), 184.9121920 secs]
[Eden: 32.0M(7616.0M)->0.0B(8704.0M) Survivors: 1088.0M->0.0B Heap: 116.4G(170.0G)->112.9G(170.0G)], [Metaspace: 177285K->177270K(182272K)]
1: 676531691 72035438432 [B
2: 676502528 32472121344 org.apache.spark.sql.catalyst.expressions.UnsafeRow
3: 99551 12018117568 [Ljava.lang.Object;
4: 26570 4349629040 [I
5: 6 3264536688 [Lorg.apache.spark.sql.catalyst.InternalRow;
6: 1708819 256299456 [C
7: 2338 179615208 [J
8: 1703669 54517408 java.lang.String
9: 103860 34896960 org.apache.spark.status.TaskDataWrapper
10: 177396 25545024 java.net.URI
...

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Manually test. This UT is hard to write and the patch is straightforward.

Closes #29558 from LantaoJin/SPARK-32715.

Authored-by: LantaoJin <jinlantao@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/broadcast/TorrentBroadcast.scala    | 32 ++++++++++++-------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
index 77fbbc08c2103..1024d9b5060bc 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
@@ -133,22 +133,30 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
     if (!blockManager.putSingle(broadcastId, value, MEMORY_AND_DISK, tellMaster = false)) {
       throw new SparkException(s"Failed to store $broadcastId in BlockManager")
     }
-    val blocks =
-      TorrentBroadcast.blockifyObject(value, blockSize, SparkEnv.get.serializer, compressionCodec)
-    if (checksumEnabled) {
-      checksums = new Array[Int](blocks.length)
-    }
-    blocks.zipWithIndex.foreach { case (block, i) =>
+    try {
+      val blocks =
+        TorrentBroadcast.blockifyObject(value, blockSize, SparkEnv.get.serializer, compressionCodec)
       if (checksumEnabled) {
-        checksums(i) = calcChecksum(block)
+        checksums = new Array[Int](blocks.length)
       }
-      val pieceId = BroadcastBlockId(id, "piece" + i)
-      val bytes = new ChunkedByteBuffer(block.duplicate())
-      if (!blockManager.putBytes(pieceId, bytes, MEMORY_AND_DISK_SER, tellMaster = true)) {
-        throw new SparkException(s"Failed to store $pieceId of $broadcastId in local BlockManager")
+      blocks.zipWithIndex.foreach { case (block, i) =>
+        if (checksumEnabled) {
+          checksums(i) = calcChecksum(block)
+        }
+        val pieceId = BroadcastBlockId(id, "piece" + i)
+        val bytes = new ChunkedByteBuffer(block.duplicate())
+        if (!blockManager.putBytes(pieceId, bytes, MEMORY_AND_DISK_SER, tellMaster = true)) {
+          throw new SparkException(s"Failed to store $pieceId of $broadcastId " +
+            s"in local BlockManager")
+        }
       }
+      blocks.length
+    } catch {
+      case t: Throwable =>
+        logError(s"Store broadcast $broadcastId fail, remove all pieces of the broadcast")
+        blockManager.removeBroadcast(id, tellMaster = true)
+        throw t
     }
-    blocks.length
   }
 
   /** Fetch torrent blocks from the driver and/or other executors. */

From 0811666ab104b41cf189233439f4158b18bc8282 Mon Sep 17 00:00:00 2001
From: "yi.wu" <yi.wu@databricks.com>
Date: Mon, 14 Sep 2020 21:15:06 -0700
Subject: [PATCH 0028/1009] [SPARK-32878][CORE] Avoid scheduling TaskSetManager
 which has no pending tasks

### What changes were proposed in this pull request?

This PR proposes to avoid scheduling the (non-zombie) TaskSetManager which has no pending tasks.

### Why are the changes needed?

Currently, Spark always tries to schedule a (non-zombie) TaskSetManager even if it has no pending tasks. This causes notable problems for the barrier TaskSetManager: 1. `calculateAvailableSlots` can be called for multiple times for a launched barrier TaskSetManager; 2. user would see "Skip current round of resource offers for barrier stage" log message for
a launched barrier TaskSetManager all the time until the barrier TaskSetManager finishes, which is quite confused.

Besides, scheduling a TaskSetManager always involves many function invocations even if there're no pending tasks.

Therefore, I think we can skip those un-schedulable TasksetManagers to avoid the potential overhead.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass existing tests.

Closes #29750 from Ngone51/filter-out-unschedulable-stage.

Authored-by: yi.wu <yi.wu@databricks.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 core/src/main/scala/org/apache/spark/scheduler/Pool.scala     | 4 +++-
 .../main/scala/org/apache/spark/scheduler/Schedulable.scala   | 1 +
 .../scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala  | 2 +-
 .../scala/org/apache/spark/scheduler/TaskSetManager.scala     | 3 +++
 4 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/Pool.scala b/core/src/main/scala/org/apache/spark/scheduler/Pool.scala
index 2e2851eb9070b..7333b31524f2a 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Pool.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Pool.scala
@@ -59,6 +59,8 @@ private[spark] class Pool(
     }
   }
 
+  override def isSchedulable: Boolean = true
+
   override def addSchedulable(schedulable: Schedulable): Unit = {
     require(schedulable != null)
     schedulableQueue.add(schedulable)
@@ -105,7 +107,7 @@ private[spark] class Pool(
     val sortedSchedulableQueue =
       schedulableQueue.asScala.toSeq.sortWith(taskSetSchedulingAlgorithm.comparator)
     for (schedulable <- sortedSchedulableQueue) {
-      sortedTaskSetQueue ++= schedulable.getSortedTaskSetQueue
+      sortedTaskSetQueue ++= schedulable.getSortedTaskSetQueue.filter(_.isSchedulable)
     }
     sortedTaskSetQueue
   }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Schedulable.scala b/core/src/main/scala/org/apache/spark/scheduler/Schedulable.scala
index 8cc239c81d11a..0626f8fb8150a 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Schedulable.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Schedulable.scala
@@ -39,6 +39,7 @@ private[spark] trait Schedulable {
   def stageId: Int
   def name: String
 
+  def isSchedulable: Boolean
   def addSchedulable(schedulable: Schedulable): Unit
   def removeSchedulable(schedulable: Schedulable): Unit
   def getSchedulableByName(name: String): Schedulable
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index 107c517ca06bc..2fcf13d5268f8 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -535,7 +535,7 @@ private[spark] class TaskSchedulerImpl(
     val availableResources = shuffledOffers.map(_.resources).toArray
     val availableCpus = shuffledOffers.map(o => o.cores).toArray
     val resourceProfileIds = shuffledOffers.map(o => o.resourceProfileId).toArray
-    val sortedTaskSets = rootPool.getSortedTaskSetQueue.filterNot(_.isZombie)
+    val sortedTaskSets = rootPool.getSortedTaskSetQueue
     for (taskSet <- sortedTaskSets) {
       logDebug("parentName: %s, name: %s, runningTasks: %s".format(
         taskSet.parent.name, taskSet.name, taskSet.runningTasks))
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index 673fe4fe27519..78fd412ef154c 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -951,6 +951,9 @@ private[spark] class TaskSetManager(
     null
   }
 
+  override def isSchedulable: Boolean = !isZombie &&
+    (pendingTasks.all.nonEmpty || pendingSpeculatableTasks.all.nonEmpty)
+
   override def addSchedulable(schedulable: Schedulable): Unit = {}
 
   override def removeSchedulable(schedulable: Schedulable): Unit = {}

From d8a0d8569243d29e7f091d545ee1e9eb780d3dc8 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Tue, 15 Sep 2020 14:38:01 +0900
Subject: [PATCH 0029/1009] [SPARK-32884][TESTS] Mark TPCDSQuery*Suite as
 ExtendedSQLTest

### What changes were proposed in this pull request?

This PR aims to mark the following suite as `ExtendedSQLTest` to reduce GitHub Action test time.
- TPCDSQuerySuite
- TPCDSQueryANSISuite
- TPCDSQueryWithStatsSuite

### Why are the changes needed?

Currently, the longest GitHub Action task is `Build and test / Build modules: sql - other tests` with `1h 57m 10s` while `Build and test / Build modules: sql - slow tests` takes `42m 20s`. With this PR, we can move the workload from `other tests` to `slow tests` task and reduce the total waiting time about 7 ~ 8 minutes.

### Does this PR introduce _any_ user-facing change?

No. This is a test-only change.

### How was this patch tested?

Pass the GitHub Action with the reduced running time.

Closes #29755 from dongjoon-hyun/SPARK-SLOWTEST.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
index decd1d6d08d27..22e1b838f3f3f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
@@ -20,11 +20,13 @@ package org.apache.spark.sql
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.catalyst.util.resourceToString
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.tags.ExtendedSQLTest
 
 /**
  * This test suite ensures all the TPC-DS queries can be successfully analyzed, optimized
  * and compiled without hitting the max iteration threshold.
  */
+@ExtendedSQLTest
 class TPCDSQuerySuite extends BenchmarkQueryTest with TPCDSBase {
 
   tpcdsQueries.foreach { name =>
@@ -64,10 +66,12 @@ class TPCDSQuerySuite extends BenchmarkQueryTest with TPCDSBase {
   }
 }
 
+@ExtendedSQLTest
 class TPCDSQueryWithStatsSuite extends TPCDSQuerySuite {
   override def injectStats: Boolean = true
 }
 
+@ExtendedSQLTest
 class TPCDSQueryANSISuite extends TPCDSQuerySuite {
   override protected def sparkConf: SparkConf =
     super.sparkConf.set(SQLConf.ANSI_ENABLED, true)

From c8baab1a1f2ac03951946ff899d1c51a69c2c8b3 Mon Sep 17 00:00:00 2001
From: herman <herman@databricks.com>
Date: Tue, 15 Sep 2020 06:24:54 +0000
Subject: [PATCH 0030/1009] [SPARK-32879][SQL] Refactor SparkSession initial
 options

### What changes were proposed in this pull request?
This PR refactors the way we propagate the options from the `SparkSession.Builder` to the` SessionState`. This currently done via a mutable map inside the SparkSession. These setting settings are then applied **after** the Session. This is a bit confusing when you expect something to be set when constructing the `SessionState`. This PR passes the options as a constructor parameter to the `SessionStateBuilder` and this will set the options when the configuration is created.

### Why are the changes needed?
It makes it easier to reason about the configurations set in a SessionState than before. We recently had an incident where someone was using `SparkSessionExtensions` to create a planner rule that relied on a conf to be set. While this is in itself probably incorrect usage, it still illustrated this somewhat funky behavior.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing tests.

Closes #29752 from hvanhovell/SPARK-32879.

Authored-by: herman <herman@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 project/MimaExcludes.scala                    |  5 ++-
 .../org/apache/spark/sql/SparkSession.scala   | 42 +++++++++++--------
 .../internal/BaseSessionStateBuilder.scala    |  6 ++-
 .../spark/sql/internal/SessionState.scala     |  7 ++--
 .../spark/sql/test/TestSQLContext.scala       |  9 ++--
 .../sql/hive/HiveSessionStateBuilder.scala    | 12 +++---
 .../apache/spark/sql/hive/test/TestHive.scala |  9 ++--
 7 files changed, 55 insertions(+), 35 deletions(-)

diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 0be7b4c1003a7..d32d31daae8e7 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -71,7 +71,10 @@ object MimaExcludes {
     ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.org$apache$spark$ml$classification$BinaryClassificationSummary$$sparkSession"),
     ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.org$apache$spark$ml$classification$ClassificationSummary$_setter_$org$apache$spark$ml$classification$ClassificationSummary$$multiclassMetrics_="),
     ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.org$apache$spark$ml$classification$ClassificationSummary$$multiclassMetrics"),
-    ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.weightCol")
+    ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.weightCol"),
+
+    // [SPARK-32879] Pass SparkSession.Builder options explicitly to SparkSession
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SparkSession.this")
   )
 
   // Exclude rules for 3.0.x
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index e5d53f5fd4c65..5704414df2d0d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -81,7 +81,8 @@ class SparkSession private(
     @transient val sparkContext: SparkContext,
     @transient private val existingSharedState: Option[SharedState],
     @transient private val parentSessionState: Option[SessionState],
-    @transient private[sql] val extensions: SparkSessionExtensions)
+    @transient private[sql] val extensions: SparkSessionExtensions,
+    @transient private val initialSessionOptions: Map[String, String])
   extends Serializable with Closeable with Logging { self =>
 
   // The call site where this SparkSession was constructed.
@@ -97,7 +98,7 @@ class SparkSession private(
     this(sc, None, None,
       SparkSession.applyExtensions(
         sc.getConf.get(StaticSQLConf.SPARK_SESSION_EXTENSIONS).getOrElse(Seq.empty),
-        new SparkSessionExtensions))
+        new SparkSessionExtensions), Map.empty)
   }
 
   sparkContext.assertNotStopped()
@@ -133,12 +134,6 @@ class SparkSession private(
     existingSharedState.getOrElse(new SharedState(sparkContext, initialSessionOptions))
   }
 
-  /**
-   * Initial options for session. This options are applied once when sessionState is created.
-   */
-  @transient
-  private[sql] val initialSessionOptions = new scala.collection.mutable.HashMap[String, String]
-
   /**
    * State isolated across sessions, including SQL configurations, temporary tables, registered
    * functions, and everything else that accepts a [[org.apache.spark.sql.internal.SQLConf]].
@@ -156,8 +151,8 @@ class SparkSession private(
       .getOrElse {
         val state = SparkSession.instantiateSessionState(
           SparkSession.sessionStateClassName(sparkContext.conf),
-          self)
-        initialSessionOptions.foreach { case (k, v) => state.conf.setConfString(k, v) }
+          self,
+          initialSessionOptions)
         state
       }
   }
@@ -244,7 +239,12 @@ class SparkSession private(
    * @since 2.0.0
    */
   def newSession(): SparkSession = {
-    new SparkSession(sparkContext, Some(sharedState), parentSessionState = None, extensions)
+    new SparkSession(
+      sparkContext,
+      Some(sharedState),
+      parentSessionState = None,
+      extensions,
+      initialSessionOptions)
   }
 
   /**
@@ -260,7 +260,12 @@ class SparkSession private(
    * implementation is Hive, this will initialize the metastore, which may take some time.
    */
   private[sql] def cloneSession(): SparkSession = {
-    val result = new SparkSession(sparkContext, Some(sharedState), Some(sessionState), extensions)
+    val result = new SparkSession(
+      sparkContext,
+      Some(sharedState),
+      Some(sessionState),
+      extensions,
+      Map.empty)
     result.sessionState // force copy of SessionState
     result
   }
@@ -939,8 +944,7 @@ object SparkSession extends Logging {
           sparkContext.getConf.get(StaticSQLConf.SPARK_SESSION_EXTENSIONS).getOrElse(Seq.empty),
           extensions)
 
-        session = new SparkSession(sparkContext, None, None, extensions)
-        options.foreach { case (k, v) => session.initialSessionOptions.put(k, v) }
+        session = new SparkSession(sparkContext, None, None, extensions, options.toMap)
         setDefaultSession(session)
         setActiveSession(session)
         registerContextListener(sparkContext)
@@ -1104,12 +1108,16 @@ object SparkSession extends Logging {
    */
   private def instantiateSessionState(
       className: String,
-      sparkSession: SparkSession): SessionState = {
+      sparkSession: SparkSession,
+      options: Map[String, String]): SessionState = {
     try {
-      // invoke `new [Hive]SessionStateBuilder(SparkSession, Option[SessionState])`
+      // invoke new [Hive]SessionStateBuilder(
+      //   SparkSession,
+      //   Option[SessionState],
+      //   Map[String, String])
       val clazz = Utils.classForName(className)
       val ctor = clazz.getConstructors.head
-      ctor.newInstance(sparkSession, None).asInstanceOf[BaseSessionStateBuilder].build()
+      ctor.newInstance(sparkSession, None, options).asInstanceOf[BaseSessionStateBuilder].build()
     } catch {
       case NonFatal(e) =>
         throw new IllegalArgumentException(s"Error while instantiating '$className':", e)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 83a7a557305e9..4ca1ac863addc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -57,7 +57,8 @@ import org.apache.spark.sql.util.ExecutionListenerManager
 @Unstable
 abstract class BaseSessionStateBuilder(
     val session: SparkSession,
-    val parentState: Option[SessionState] = None) {
+    val parentState: Option[SessionState],
+    val options: Map[String, String]) {
   type NewBuilder = (SparkSession, Option[SessionState]) => BaseSessionStateBuilder
 
   /**
@@ -97,6 +98,9 @@ abstract class BaseSessionStateBuilder(
     }.getOrElse {
       val conf = new SQLConf
       mergeSparkConf(conf, session.sparkContext.conf)
+      options.foreach {
+        case (k, v) => conf.setConfString(k, v)
+      }
       conf
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index cd425b04ef311..0f9a89741c192 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -136,9 +136,10 @@ private[sql] object SessionState {
 @Unstable
 class SessionStateBuilder(
     session: SparkSession,
-    parentState: Option[SessionState] = None)
-  extends BaseSessionStateBuilder(session, parentState) {
-  override protected def newBuilder: NewBuilder = new SessionStateBuilder(_, _)
+    parentState: Option[SessionState],
+    options: Map[String, String])
+  extends BaseSessionStateBuilder(session, parentState, options) {
+  override protected def newBuilder: NewBuilder = new SessionStateBuilder(_, _, Map.empty)
 }
 
 /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
index 17603deacdcdd..ac06e1f41bfb3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
@@ -39,7 +39,7 @@ private[spark] class TestSparkSession(sc: SparkContext) extends SparkSession(sc)
 
   @transient
   override lazy val sessionState: SessionState = {
-    new TestSQLSessionStateBuilder(this, None).build()
+    new TestSQLSessionStateBuilder(this, None, Map.empty).build()
   }
 
   // Needed for Java tests
@@ -66,8 +66,9 @@ private[sql] object TestSQLContext {
 
 private[sql] class TestSQLSessionStateBuilder(
     session: SparkSession,
-    state: Option[SessionState])
-  extends SessionStateBuilder(session, state) with WithTestConf {
+    state: Option[SessionState],
+    options: Map[String, String])
+  extends SessionStateBuilder(session, state, options) with WithTestConf {
   override def overrideConfs: Map[String, String] = TestSQLContext.overrideConfs
-  override def newBuilder: NewBuilder = new TestSQLSessionStateBuilder(_, _)
+  override def newBuilder: NewBuilder = new TestSQLSessionStateBuilder(_, _, Map.empty)
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index 78ec2b8e2047e..b9135733856a5 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -21,10 +21,9 @@ import org.apache.spark.annotation.Unstable
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.analysis.{Analyzer, ResolveSessionCatalog}
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener
-import org.apache.spark.sql.catalyst.optimizer.Optimizer
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.execution.{SparkOptimizer, SparkPlanner}
+import org.apache.spark.sql.execution.SparkPlanner
 import org.apache.spark.sql.execution.aggregate.ResolveEncodersInScalaAgg
 import org.apache.spark.sql.execution.analysis.DetectAmbiguousSelfJoin
 import org.apache.spark.sql.execution.command.CommandCheck
@@ -38,8 +37,11 @@ import org.apache.spark.sql.internal.{BaseSessionStateBuilder, SessionResourceLo
  * Builder that produces a Hive-aware `SessionState`.
  */
 @Unstable
-class HiveSessionStateBuilder(session: SparkSession, parentState: Option[SessionState] = None)
-  extends BaseSessionStateBuilder(session, parentState) {
+class HiveSessionStateBuilder(
+    session: SparkSession,
+    parentState: Option[SessionState],
+    options: Map[String, String])
+  extends BaseSessionStateBuilder(session, parentState, options) {
 
   private def externalCatalog: ExternalCatalogWithListener = session.sharedState.externalCatalog
 
@@ -116,7 +118,7 @@ class HiveSessionStateBuilder(session: SparkSession, parentState: Option[Session
     }
   }
 
-  override protected def newBuilder: NewBuilder = new HiveSessionStateBuilder(_, _)
+  override protected def newBuilder: NewBuilder = new HiveSessionStateBuilder(_, _, Map.empty)
 }
 
 class HiveSessionResourceLoader(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
index f98534eb2b543..497dda4e22213 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -224,7 +224,7 @@ private[hive] class TestHiveSparkSession(
 
   @transient
   override lazy val sessionState: SessionState = {
-    new TestHiveSessionStateBuilder(this, parentSessionState).build()
+    new TestHiveSessionStateBuilder(this, parentSessionState, Map.empty).build()
   }
 
   lazy val metadataHive: HiveClient = {
@@ -650,8 +650,9 @@ private[hive] object TestHiveContext {
 
 private[sql] class TestHiveSessionStateBuilder(
     session: SparkSession,
-    state: Option[SessionState])
-  extends HiveSessionStateBuilder(session, state)
+    state: Option[SessionState],
+    options: Map[String, String])
+  extends HiveSessionStateBuilder(session, state, options)
   with WithTestConf {
 
   override def overrideConfs: Map[String, String] = TestHiveContext.overrideConfs
@@ -660,7 +661,7 @@ private[sql] class TestHiveSessionStateBuilder(
     new TestHiveQueryExecution(session.asInstanceOf[TestHiveSparkSession], plan)
   }
 
-  override protected def newBuilder: NewBuilder = new TestHiveSessionStateBuilder(_, _)
+  override protected def newBuilder: NewBuilder = new TestHiveSessionStateBuilder(_, _, Map.empty)
 }
 
 private[hive] object HiveTestJars {

From 99384d1e831b7fe82a3a80ade1da976971624ee7 Mon Sep 17 00:00:00 2001
From: Zhenhua Wang <wzh_zju@163.com>
Date: Tue, 15 Sep 2020 06:46:17 +0000
Subject: [PATCH 0031/1009] [SPARK-32738][CORE] Should reduce the number of
 active threads if fatal error happens in `Inbox.process`

### What changes were proposed in this pull request?

Processing for `ThreadSafeRpcEndpoint` is controlled by  `numActiveThreads` in `Inbox`. Now if any fatal error happens during `Inbox.process`, `numActiveThreads` is not reduced. Then other threads can not process messages in that inbox, which causes the endpoint to "hang". For other type of endpoints, we also should keep  `numActiveThreads` correct.

This problem is more serious in previous Spark 2.x versions since the driver, executor and block manager endpoints are all thread safe endpoints.

To fix this, we should reduce the number of active threads if fatal error happens in `Inbox.process`.

### Why are the changes needed?

`numActiveThreads` is not correct when fatal error happens and will cause the described problem.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Add a new test.

Closes #29580 from wzhfy/deal_with_fatal_error.

Authored-by: Zhenhua Wang <wzh_zju@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/rpc/netty/Inbox.scala    | 20 +++++++++++++++++++
 .../apache/spark/rpc/netty/InboxSuite.scala   | 13 ++++++++++++
 2 files changed, 33 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala b/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala
index 2ed03f7430c32..472401b23fe8e 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala
@@ -200,6 +200,16 @@ private[netty] class Inbox(val endpointName: String, val endpoint: RpcEndpoint)
    * Calls action closure, and calls the endpoint's onError function in the case of exceptions.
    */
   private def safelyCall(endpoint: RpcEndpoint)(action: => Unit): Unit = {
+    def dealWithFatalError(fatal: Throwable): Unit = {
+      inbox.synchronized {
+        assert(numActiveThreads > 0, "The number of active threads should be positive.")
+        // Should reduce the number of active threads before throw the error.
+        numActiveThreads -= 1
+      }
+      logError(s"An error happened while processing message in the inbox for $endpointName", fatal)
+      throw fatal
+    }
+
     try action catch {
       case NonFatal(e) =>
         try endpoint.onError(e) catch {
@@ -209,8 +219,18 @@ private[netty] class Inbox(val endpointName: String, val endpoint: RpcEndpoint)
             } else {
               logError("Ignoring error", ee)
             }
+          case fatal: Throwable =>
+            dealWithFatalError(fatal)
         }
+      case fatal: Throwable =>
+        dealWithFatalError(fatal)
     }
   }
 
+  // exposed only for testing
+  def getNumActiveThreads: Int = {
+    inbox.synchronized {
+      inbox.numActiveThreads
+    }
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/rpc/netty/InboxSuite.scala b/core/src/test/scala/org/apache/spark/rpc/netty/InboxSuite.scala
index c74c728b3e3f3..8b1c602cd8e58 100644
--- a/core/src/test/scala/org/apache/spark/rpc/netty/InboxSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rpc/netty/InboxSuite.scala
@@ -136,4 +136,17 @@ class InboxSuite extends SparkFunSuite {
 
     endpoint.verifySingleOnNetworkErrorMessage(cause, remoteAddress)
   }
+
+  test("SPARK-32738: should reduce the number of active threads when fatal error happens") {
+    val endpoint = mock(classOf[TestRpcEndpoint])
+    when(endpoint.receive).thenThrow(new OutOfMemoryError())
+
+    val dispatcher = mock(classOf[Dispatcher])
+    val inbox = new Inbox("name", endpoint)
+    inbox.post(OneWayMessage(null, "hi"))
+    intercept[OutOfMemoryError] {
+      inbox.process(dispatcher)
+    }
+    assert(inbox.getNumActiveThreads == 0)
+  }
 }

From 316242b768a232ea541e854633374aebcd2ed194 Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Tue, 15 Sep 2020 13:07:03 +0000
Subject: [PATCH 0032/1009] [SPARK-32874][SQL][TEST] Enhance result set meta
 data check for execute statement operation with thrift server

### What changes were proposed in this pull request?

This PR adds test cases for the result set metadata checking for Spark's `ExecuteStatementOperation` to make the JDBC API more future-proofing because any server-side change may affect the client compatibility.

### Why are the changes needed?

add test to prevent potential silent behavior change for JDBC users.

### Does this PR introduce _any_ user-facing change?

NO, test only
### How was this patch tested?

add new test

Closes #29746 from yaooqinn/SPARK-32874.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 ...arkThriftServerProtocolVersionsSuite.scala | 147 +++++++++++++++++-
 1 file changed, 140 insertions(+), 7 deletions(-)

diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala
index fd45e7a48c0eb..69486eeb031b1 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala
@@ -148,6 +148,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest {
       testExecuteStatementWithProtocolVersion(version, "SELECT cast(1 as byte)") { rs =>
         assert(rs.next())
         assert(rs.getByte(1) === 1.toByte)
+        val metaData = rs.getMetaData
+        assert(metaData.getColumnName(1) === "CAST(1 AS TINYINT)")
+        assert(metaData.getColumnTypeName(1) === "tinyint")
+        assert(metaData.getColumnType(1) === java.sql.Types.TINYINT)
+        assert(metaData.getPrecision(1) === 3)
+        assert(metaData.getScale(1) === 0)
       }
     }
 
@@ -155,6 +161,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest {
       testExecuteStatementWithProtocolVersion(version, "SELECT cast(1 as short)") { rs =>
         assert(rs.next())
         assert(rs.getShort(1) === 1.toShort)
+        val metaData = rs.getMetaData
+        assert(metaData.getColumnName(1) === "CAST(1 AS SMALLINT)")
+        assert(metaData.getColumnTypeName(1) === "smallint")
+        assert(metaData.getColumnType(1) === java.sql.Types.SMALLINT)
+        assert(metaData.getPrecision(1) === 5)
+        assert(metaData.getScale(1) === 0)
       }
     }
 
@@ -162,6 +174,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest {
       testExecuteStatementWithProtocolVersion(version, "SELECT 1") { rs =>
         assert(rs.next())
         assert(rs.getInt(1) === 1)
+        val metaData = rs.getMetaData
+        assert(metaData.getColumnName(1) === "1")
+        assert(metaData.getColumnTypeName(1) === "int")
+        assert(metaData.getColumnType(1) === java.sql.Types.INTEGER)
+        assert(metaData.getPrecision(1) === 10)
+        assert(metaData.getScale(1) === 0)
       }
     }
 
@@ -169,6 +187,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest {
       testExecuteStatementWithProtocolVersion(version, "SELECT cast(1 as bigint)") { rs =>
         assert(rs.next())
         assert(rs.getLong(1) === 1L)
+        val metaData = rs.getMetaData
+        assert(metaData.getColumnName(1) === "CAST(1 AS BIGINT)")
+        assert(metaData.getColumnTypeName(1) === "bigint")
+        assert(metaData.getColumnType(1) === java.sql.Types.BIGINT)
+        assert(metaData.getPrecision(1) === 19)
+        assert(metaData.getScale(1) === 0)
       }
     }
 
@@ -176,6 +200,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest {
       testExecuteStatementWithProtocolVersion(version, "SELECT cast(1.2 as float)") { rs =>
         assert(rs.next())
         assert(rs.getFloat(1) === 1.2F)
+        val metaData = rs.getMetaData
+        assert(metaData.getColumnName(1) === "CAST(1.2 AS FLOAT)")
+        assert(metaData.getColumnTypeName(1) === "float")
+        assert(metaData.getColumnType(1) === java.sql.Types.FLOAT)
+        assert(metaData.getPrecision(1) === 7)
+        assert(metaData.getScale(1) === 7)
       }
     }
 
@@ -183,14 +213,30 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest {
       testExecuteStatementWithProtocolVersion(version, "SELECT cast(1.2 as double)") { rs =>
         assert(rs.next())
         assert(rs.getDouble(1) === 1.2D)
+        val metaData = rs.getMetaData
+        assert(metaData.getColumnName(1) === "CAST(1.2 AS DOUBLE)")
+        assert(metaData.getColumnTypeName(1) === "double")
+        assert(metaData.getColumnType(1) === java.sql.Types.DOUBLE)
+        assert(metaData.getPrecision(1) === 15)
+        assert(metaData.getScale(1) === 15)
       }
     }
 
     test(s"$version get decimal type") {
       testExecuteStatementWithProtocolVersion(version,
-        "SELECT cast(1 as decimal(18, 2)) as c") { rs =>
+        "SELECT cast(1 as decimal(9, 1)) as col0, 1234.56BD as col1, 0.123 as col2") { rs =>
         assert(rs.next())
-        assert(rs.getBigDecimal(1) === new java.math.BigDecimal("1.00"))
+        assert(rs.getBigDecimal(1) === new java.math.BigDecimal("1.0"))
+        assert(rs.getBigDecimal("col1") === new java.math.BigDecimal("1234.56"))
+        assert(rs.getBigDecimal("col2") === new java.math.BigDecimal("0.123"))
+        val metaData = rs.getMetaData
+        (1 to 3) foreach { i =>
+          assert(metaData.getColumnName(i) === s"col${i - 1}")
+          assert(metaData.getColumnTypeName(i) === "decimal")
+          assert(metaData.getColumnType(i) === java.sql.Types.DECIMAL)
+          assert(metaData.getPrecision(i) == 12 - i * 3)
+          assert(metaData.getScale(i) == i)
+        }
       }
       testExecuteStatementWithProtocolVersion(version,
         "SELECT cast(null as decimal) ") { rs =>
@@ -203,6 +249,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest {
       testExecuteStatementWithProtocolVersion(version, "SELECT 'str'") { rs =>
         assert(rs.next())
         assert(rs.getString(1) === "str")
+        val metaData = rs.getMetaData
+        assert(metaData.getColumnName(1) ==="str")
+        assert(metaData.getColumnTypeName(1) === "string")
+        assert(metaData.getColumnType(1) === java.sql.Types.VARCHAR)
+        assert(metaData.getPrecision(1) === Int.MaxValue)
+        assert(metaData.getScale(1) === 0)
       }
     }
 
@@ -211,6 +263,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest {
         "SELECT cast('char-str' as char(10))") { rs =>
         assert(rs.next())
         assert(rs.getString(1) === "char-str")
+        val metaData = rs.getMetaData
+        assert(metaData.getColumnName(1) ==="CAST(char-str AS STRING)")
+        assert(metaData.getColumnTypeName(1) === "string")
+        assert(metaData.getColumnType(1) === java.sql.Types.VARCHAR)
+        assert(metaData.getPrecision(1) === Int.MaxValue)
+        assert(metaData.getScale(1) === 0)
       }
     }
 
@@ -219,6 +277,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest {
         "SELECT cast('varchar-str' as varchar(10))") { rs =>
         assert(rs.next())
         assert(rs.getString(1) === "varchar-str")
+        val metaData = rs.getMetaData
+        assert(metaData.getColumnName(1) ==="CAST(varchar-str AS STRING)")
+        assert(metaData.getColumnTypeName(1) === "string")
+        assert(metaData.getColumnType(1) === java.sql.Types.VARCHAR)
+        assert(metaData.getPrecision(1) === Int.MaxValue)
+        assert(metaData.getScale(1) === 0)
       }
     }
 
@@ -226,6 +290,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest {
       testExecuteStatementWithProtocolVersion(version, "SELECT cast('ABC' as binary)") { rs =>
         assert(rs.next())
         assert(rs.getString(1) === "ABC")
+        val metaData = rs.getMetaData
+        assert(metaData.getColumnName(1) === "CAST(ABC AS BINARY)")
+        assert(metaData.getColumnTypeName(1) === "binary")
+        assert(metaData.getColumnType(1) === java.sql.Types.BINARY)
+        assert(metaData.getPrecision(1) === Int.MaxValue)
+        assert(metaData.getScale(1) === 0)
       }
       testExecuteStatementWithProtocolVersion(version, "SELECT cast(49960 as binary)") { rs =>
         assert(rs.next())
@@ -241,6 +311,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest {
       testExecuteStatementWithProtocolVersion(version, "SELECT true") { rs =>
         assert(rs.next())
         assert(rs.getBoolean(1) === true)
+        val metaData = rs.getMetaData
+        assert(metaData.getColumnName(1) === "true")
+        assert(metaData.getColumnTypeName(1) === "boolean")
+        assert(metaData.getColumnType(1) === java.sql.Types.BOOLEAN)
+        assert(metaData.getPrecision(1) === 1)
+        assert(metaData.getScale(1) === 0)
       }
     }
 
@@ -248,6 +324,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest {
       testExecuteStatementWithProtocolVersion(version, "SELECT cast('2019-07-22' as date)") { rs =>
         assert(rs.next())
         assert(rs.getDate(1) === Date.valueOf("2019-07-22"))
+        val metaData = rs.getMetaData
+        assert(metaData.getColumnName(1) === "CAST(2019-07-22 AS DATE)")
+        assert(metaData.getColumnTypeName(1) === "date")
+        assert(metaData.getColumnType(1) === java.sql.Types.DATE)
+        assert(metaData.getPrecision(1) === 10)
+        assert(metaData.getScale(1) === 0)
       }
     }
 
@@ -256,6 +338,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest {
         "SELECT cast('2019-07-22 18:14:00' as timestamp)") { rs =>
         assert(rs.next())
         assert(rs.getTimestamp(1) === Timestamp.valueOf("2019-07-22 18:14:00"))
+        val metaData = rs.getMetaData
+        assert(metaData.getColumnName(1) === "CAST(2019-07-22 18:14:00 AS TIMESTAMP)")
+        assert(metaData.getColumnTypeName(1) === "timestamp")
+        assert(metaData.getColumnType(1) === java.sql.Types.TIMESTAMP)
+        assert(metaData.getPrecision(1) === 29)
+        assert(metaData.getScale(1) === 9)
       }
     }
 
@@ -263,6 +351,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest {
       testExecuteStatementWithProtocolVersion(version, "SELECT null") { rs =>
         assert(rs.next())
         assert(rs.getString(1) === null)
+        val metaData = rs.getMetaData
+        assert(metaData.getColumnName(1) === "NULL")
+        assert(metaData.getColumnTypeName(1) === "void")
+        assert(metaData.getColumnType(1) === java.sql.Types.NULL)
+        assert(metaData.getPrecision(1) === 0)
+        assert(metaData.getScale(1) === 0)
       }
     }
 
@@ -270,28 +364,67 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest {
       testExecuteStatementWithProtocolVersion(version, "SELECT interval '1' year '2' day") { rs =>
         assert(rs.next())
         assert(rs.getString(1) === "1 years 2 days")
+        val metaData = rs.getMetaData
+        assert(metaData.getColumnName(1) === "INTERVAL '1 years 2 days'")
+        assert(metaData.getColumnTypeName(1) === "string")
+        assert(metaData.getColumnType(1) === java.sql.Types.VARCHAR)
+        assert(metaData.getPrecision(1) === Int.MaxValue)
+        assert(metaData.getScale(1) === 0)
       }
     }
 
     test(s"$version get array type") {
-      testExecuteStatementWithProtocolVersion(version, "SELECT array(1, 2)") { rs =>
+      testExecuteStatementWithProtocolVersion(
+        version, "SELECT array() AS col1, array(1, 2) AS col2") { rs =>
         assert(rs.next())
-        assert(rs.getString(1) === "[1,2]")
+        assert(rs.getString(2) === "[1,2]")
+        assert(rs.getObject("col1") === "[]")
+        assert(rs.getObject("col2") === "[1,2]")
+        val metaData = rs.getMetaData
+        (1 to 2) foreach { i =>
+          assert(metaData.getColumnName(i) === s"col$i")
+          assert(metaData.getColumnTypeName(i) === "array")
+          assert(metaData.getColumnType(i) === java.sql.Types.ARRAY)
+          assert(metaData.getPrecision(i) === Int.MaxValue)
+          assert(metaData.getScale(i) == 0)
+        }
       }
     }
 
     test(s"$version get map type") {
-      testExecuteStatementWithProtocolVersion(version, "SELECT map(1, 2)") { rs =>
+      testExecuteStatementWithProtocolVersion(version,
+        "SELECT map(), map(1, 2, 3, 4)") { rs =>
         assert(rs.next())
-        assert(rs.getString(1) === "{1:2}")
+        assert(rs.getObject(1) === "{}")
+        assert(rs.getObject(2) === "{1:2,3:4}")
+        assert(rs.getString(2) === "{1:2,3:4}")
+        val metaData = rs.getMetaData
+        (1 to 2) foreach { i =>
+          assert(metaData.getColumnName(i).startsWith("map("))
+          assert(metaData.getColumnTypeName(1) === "map")
+          assert(metaData.getColumnType(i) === java.sql.Types.JAVA_OBJECT)
+          assert(metaData.getPrecision(i) === Int.MaxValue)
+          assert(metaData.getScale(i) == 0)
+        }
       }
     }
 
     test(s"$version get struct type") {
       testExecuteStatementWithProtocolVersion(version,
-        "SELECT struct('alpha' AS A, 'beta' AS B)") { rs =>
+        "SELECT struct('alpha' AS A, 'beta' AS B) as col0," +
+          " struct('1', '2') AS col1, named_struct('a', 2, 'b', 4) AS col2") { rs =>
         assert(rs.next())
         assert(rs.getString(1) === """{"A":"alpha","B":"beta"}""")
+        assert(rs.getObject("col1") === """{"col1":"1","col2":"2"}""")
+        assert(rs.getObject("col2") === """{"a":2,"b":4}""")
+        val metaData = rs.getMetaData
+        (1 to 3) foreach { i =>
+          assert(metaData.getColumnName(i) === s"col${i - 1}")
+          assert(metaData.getColumnTypeName(1) === "struct")
+          assert(metaData.getColumnType(i) === java.sql.Types.STRUCT)
+          assert(metaData.getPrecision(i) === Int.MaxValue)
+          assert(metaData.getScale(i) == 0)
+        }
       }
     }
 

From 6f36db1fa511940dd43d597b7fe337fc3d5c2558 Mon Sep 17 00:00:00 2001
From: Abhishek Dixit <abhishekdixit0907@gmail.com>
Date: Tue, 15 Sep 2020 08:41:22 -0500
Subject: [PATCH 0033/1009] [SPARK-31448][PYTHON] Fix storage level used in
 persist() in dataframe.py

### What changes were proposed in this pull request?
Since the data is serialized on the Python side, we should make cache() in PySpark dataframes use StorageLevel.MEMORY_AND_DISK mode which has deserialized=false. This change was done to `pyspark/rdd.py` as part of SPARK-2014 but was missed from `pyspark/dataframe.py`

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Using existing tests

Closes #29242 from abhishekd0907/SPARK-31448.

Authored-by: Abhishek Dixit <abhishekdixit0907@gmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 python/pyspark/sql/dataframe.py | 7 ++++---
 python/pyspark/storagelevel.py  | 1 +
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index b750b8a8d30a1..db2ddde00c881 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -678,13 +678,14 @@ def cache(self):
         return self
 
     @since(1.3)
-    def persist(self, storageLevel=StorageLevel.MEMORY_AND_DISK):
+    def persist(self, storageLevel=StorageLevel.MEMORY_AND_DISK_DESER):
         """Sets the storage level to persist the contents of the :class:`DataFrame` across
         operations after the first time it is computed. This can only be used to assign
         a new storage level if the :class:`DataFrame` does not have a storage level set yet.
-        If no storage level is specified defaults to (`MEMORY_AND_DISK`).
+        If no storage level is specified defaults to (`MEMORY_AND_DISK_DESER`)
 
-        .. note:: The default storage level has changed to `MEMORY_AND_DISK` to match Scala in 2.0.
+        .. note:: The default storage level has changed to `MEMORY_AND_DISK_DESER` to match Scala
+            in 3.0.
         """
         self.is_cached = True
         javaStorageLevel = self._sc._getJavaStorageLevel(storageLevel)
diff --git a/python/pyspark/storagelevel.py b/python/pyspark/storagelevel.py
index 9c0d1ca661244..ecf8e5c82ea56 100644
--- a/python/pyspark/storagelevel.py
+++ b/python/pyspark/storagelevel.py
@@ -57,3 +57,4 @@ def __str__(self):
 StorageLevel.MEMORY_AND_DISK = StorageLevel(True, True, False, False)
 StorageLevel.MEMORY_AND_DISK_2 = StorageLevel(True, True, False, False, 2)
 StorageLevel.OFF_HEAP = StorageLevel(True, True, True, False, 1)
+StorageLevel.MEMORY_AND_DISK_DESER = StorageLevel(True, True, False, True)

From 888b343587c98ae0252311d72e20abbca8262ab3 Mon Sep 17 00:00:00 2001
From: ulysses <youxiduo@weidian.com>
Date: Tue, 15 Sep 2020 14:11:30 +0000
Subject: [PATCH 0034/1009] [SPARK-32827][SQL] Add
 spark.sql.maxMetadataStringLength config

### What changes were proposed in this pull request?

Add a new config `spark.sql.maxMetadataStringLength`. This config aims to limit metadata value length, e.g. file location.

### Why are the changes needed?

Some metadata have been abbreviated by `...` when I tried to add some test in `SQLQueryTestSuite`. We need to replace such value to `notIncludedMsg`. That caused we can't replace that like location value by `className` since the `className` has been abbreviated.

Here is a case:
```
CREATE table  explain_temp1 (key int, val int) USING PARQUET;

EXPLAIN EXTENDED SELECT sum(distinct val) FROM explain_temp1;

-- ignore parsed,analyzed,optimized
-- The output like
== Physical Plan ==
*HashAggregate(keys=[], functions=[sum(distinct cast(val#x as bigint)#xL)], output=[sum(DISTINCT val)#xL])
+- Exchange SinglePartition, true, [id=#x]
   +- *HashAggregate(keys=[], functions=[partial_sum(distinct cast(val#x as bigint)#xL)], output=[sum#xL])
      +- *HashAggregate(keys=[cast(val#x as bigint)#xL], functions=[], output=[cast(val#x as bigint)#xL])
         +- Exchange hashpartitioning(cast(val#x as bigint)#xL, 4), true, [id=#x]
            +- *HashAggregate(keys=[cast(val#x as bigint) AS cast(val#x as bigint)#xL], functions=[], output=[cast(val#x as bigint)#xL])
               +- *ColumnarToRow
                  +- FileScan parquet default.explain_temp1[val#x] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex[file:/home/runner/work/spark/spark/sql/core/spark-warehouse/org.apache.spark.sq...], PartitionFilters: ...
```

### Does this PR introduce _any_ user-facing change?

No, a new config.

### How was this patch tested?

new test.

Closes #29688 from ulysses-you/SPARK-32827.

Authored-by: ulysses <youxiduo@weidian.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/internal/SQLConf.scala   | 10 ++++++++
 .../sql/execution/DataSourceScanExec.scala    |  2 +-
 .../execution/datasources/v2/FileScan.scala   |  2 +-
 .../spark/sql/FileBasedDataSourceSuite.scala  | 23 +++++++++++++++++++
 4 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 2f2b645360ed6..0d1a3e365c918 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2370,6 +2370,14 @@ object SQLConf {
       "(nonnegative and shorter than the maximum size).")
     .createWithDefaultString(s"${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}")
 
+  val MAX_METADATA_STRING_LENGTH = buildConf("spark.sql.maxMetadataStringLength")
+    .doc("Maximum number of characters to output for a metadata string. e.g. " +
+      "file location in `DataSourceScanExec`, every value will be abbreviated if exceed length.")
+    .version("3.1.0")
+    .intConf
+    .checkValue(_ > 3, "This value must be bigger than 3.")
+    .createWithDefault(100)
+
   val SET_COMMAND_REJECTS_SPARK_CORE_CONFS =
     buildConf("spark.sql.legacy.setCommandRejectsSparkCoreConfs")
       .internal()
@@ -3344,6 +3352,8 @@ class SQLConf extends Serializable with Logging {
 
   def maxPlanStringLength: Int = getConf(SQLConf.MAX_PLAN_STRING_LENGTH).toInt
 
+  def maxMetadataStringLength: Int = getConf(SQLConf.MAX_METADATA_STRING_LENGTH)
+
   def setCommandRejectsSparkCoreConfs: Boolean =
     getConf(SQLConf.SET_COMMAND_REJECTS_SPARK_CORE_CONFS)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index ce5909a09442c..1b9ca63ea21d3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -55,7 +55,7 @@ trait DataSourceScanExec extends LeafExecNode {
   // Metadata that describes more details of this scan.
   protected def metadata: Map[String, String]
 
-  protected val maxMetadataValueLength = 100
+  protected val maxMetadataValueLength = sqlContext.sessionState.conf.maxMetadataStringLength
 
   override def simpleString(maxFields: Int): String = {
     val metadataEntries = metadata.toSeq.sorted.map {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
index f090d7861b629..363dd154b5fbb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
@@ -95,7 +95,7 @@ trait FileScan extends Scan
 
   override def hashCode(): Int = getClass.hashCode()
 
-  val maxMetadataValueLength = 100
+  val maxMetadataValueLength = sparkSession.sessionState.conf.maxMetadataStringLength
 
   override def description(): String = {
     val metadataStr = getMetaData().toSeq.sorted.map {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index 48b2e22457e3c..8d6d93d13d143 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -34,6 +34,7 @@ import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.expressions.IntegralLiteralTestUtils.{negativeInt, positiveInt}
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical.Filter
+import org.apache.spark.sql.execution.SimpleMode
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.datasources.FilePartition
 import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, DataSourceV2ScanRelation, FileScan}
@@ -969,6 +970,28 @@ class FileBasedDataSourceSuite extends QueryTest
       }
     }
   }
+
+  test("SPARK-32827: Set max metadata string length") {
+    withTempDir { dir =>
+      val tableName = "t"
+      val path = s"${dir.getCanonicalPath}/$tableName"
+      withTable(tableName) {
+        sql(s"CREATE TABLE $tableName(c INT) USING PARQUET LOCATION '$path'")
+        withSQLConf(SQLConf.MAX_METADATA_STRING_LENGTH.key -> "5") {
+          val explain = spark.table(tableName).queryExecution.explainString(SimpleMode)
+          assert(!explain.contains(path))
+          // metadata has abbreviated by ...
+          assert(explain.contains("..."))
+        }
+
+        withSQLConf(SQLConf.MAX_METADATA_STRING_LENGTH.key -> "1000") {
+          val explain = spark.table(tableName).queryExecution.explainString(SimpleMode)
+          assert(explain.contains(path))
+          assert(!explain.contains("..."))
+        }
+      }
+    }
+  }
 }
 
 object TestingUDT {

From 108c4c8fdc6c839bf5f43af7a55594aa024d2eb6 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Wed, 16 Sep 2020 08:11:43 +0900
Subject: [PATCH 0035/1009] [SPARK-32481][SQL][TESTS][FOLLOW-UP] Skip the test
 if trash directory cannot be created

### What changes were proposed in this pull request?

This PR skips the test if trash directory cannot be created. It is possible that the trash directory cannot be created, for example, by permission. And the test fails below:

```
- SPARK-32481 Move data to trash on truncate table if enabled *** FAILED *** (154 milliseconds)
  fs.exists(trashPath) was false (DDLSuite.scala:3184)
  org.scalatest.exceptions.TestFailedException:
  at org.scalatest.Assertions.newAssertionFailedException(Assertions.scala:530)
  at org.scalatest.Assertions.newAssertionFailedException$(Assertions.scala:529)
  at org.scalatest.FunSuite.newAssertionFailedException(FunSuite.scala:1560)
  at org.scalatest.Assertions$AssertionsHelper.macroAssert(Assertions.scala:503)
```

### Why are the changes needed?

To make the tests pass independently.

### Does this PR introduce _any_ user-facing change?

No, test-only.

### How was this patch tested?

Manually tested.

Closes #29759 from HyukjinKwon/SPARK-32481.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../org/apache/spark/sql/execution/command/DDLSuite.scala      | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index b8ac5079b7745..adc87cd307191 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -3118,6 +3118,9 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
         val fs = tablePath.getFileSystem(hadoopConf)
         val trashCurrent = new Path(fs.getHomeDirectory, ".Trash/Current")
         val trashPath = Path.mergePaths(trashCurrent, tablePath)
+        assume(
+          fs.mkdirs(trashPath) && fs.delete(trashPath, false),
+          "Trash directory could not be created, skipping.")
         assert(!fs.exists(trashPath))
         try {
           hadoopConf.set(trashIntervalKey, "5")

From b46c7302db73ee3671035ccfd8f51297b4d5e10e Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Wed, 16 Sep 2020 12:06:57 +0900
Subject: [PATCH 0036/1009] [SPARK-32704][SQL][TESTS][FOLLOW-UP] Check any
 physical rule instead of a specific rule in the test

### What changes were proposed in this pull request?

This PR only checks if there's any physical rule runs instead of a specific rule. This is rather just a trivial fix to make the tests more robust.

In fact, I faced a test failure from a in-house fork that applies a different physical rule that makes `CollapseCodegenStages` ineffective.

### Why are the changes needed?

To make the test more robust by unrelated changes.

### Does this PR introduce _any_ user-facing change?

No, test-only

### How was this patch tested?

Manually tested. Jenkins tests should pass.

Closes #29766 from HyukjinKwon/SPARK-32704.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../org/apache/spark/sql/execution/QueryExecutionSuite.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
index 83c80b4f3eb08..585ce4e40471d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
@@ -219,7 +219,7 @@ class QueryExecutionSuite extends SharedSparkSession {
         spark.range(1).groupBy("id").count().queryExecution.executedPlan
       }
     }
-    Seq("=== Applying Rule org.apache.spark.sql.execution.CollapseCodegenStages ===",
+    Seq("=== Applying Rule org.apache.spark.sql.execution",
         "=== Result of Batch Preparations ===").foreach { expectedMsg =>
       assert(testAppender.loggingEvents.exists(_.getRenderedMessage.contains(expectedMsg)))
     }

From 6051755bfe23a0e4564bf19476ec34cd7fd6008d Mon Sep 17 00:00:00 2001
From: Tanel Kiis <tanel.kiis@gmail.com>
Date: Wed, 16 Sep 2020 12:13:15 +0900
Subject: [PATCH 0037/1009] [SPARK-32688][SQL][TEST] Add special values to
 LiteralGenerator for float and double

### What changes were proposed in this pull request?

The `LiteralGenerator` for float and double datatypes was supposed to yield special values (NaN, +-inf) among others, but the `Gen.chooseNum` method does not yield values that are outside the defined range. The `Gen.chooseNum` for a wide range of floats and doubles does not yield values in the "everyday" range as stated in https://github.com/typelevel/scalacheck/issues/113 .

There is an similar class `RandomDataGenerator` that is used in some other tests. Added `-0.0` and `-0.0f` as special values to there too.

These changes revealed an inconsistency with the equality check between `-0.0` and `0.0`.

### Why are the changes needed?

The `LiteralGenerator` is mostly used in the `checkConsistencyBetweenInterpretedAndCodegen` method in `MathExpressionsSuite`. This change would have caught the bug fixed in #29495 .

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Locally reverted #29495 and verified that the existing test cases caught the bug.

Closes #29515 from tanelk/SPARK-32688.

Authored-by: Tanel Kiis <tanel.kiis@gmail.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../spark/sql/RandomDataGenerator.scala       |  4 ++--
 .../expressions/LiteralGenerator.scala        | 19 +++++++++++++++----
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
index 6bd7a27ac11f1..9fa27c7df3832 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
@@ -260,10 +260,10 @@ object RandomDataGenerator {
           new MathContext(precision)).bigDecimal)
       case DoubleType => randomNumeric[Double](
         rand, r => longBitsToDouble(r.nextLong()), Seq(Double.MinValue, Double.MinPositiveValue,
-          Double.MaxValue, Double.PositiveInfinity, Double.NegativeInfinity, Double.NaN, 0.0))
+          Double.MaxValue, Double.PositiveInfinity, Double.NegativeInfinity, Double.NaN, 0.0, -0.0))
       case FloatType => randomNumeric[Float](
         rand, r => intBitsToFloat(r.nextInt()), Seq(Float.MinValue, Float.MinPositiveValue,
-          Float.MaxValue, Float.PositiveInfinity, Float.NegativeInfinity, Float.NaN, 0.0f))
+          Float.MaxValue, Float.PositiveInfinity, Float.NegativeInfinity, Float.NaN, 0.0f, -0.0f))
       case ByteType => randomNumeric[Byte](
         rand, _.nextInt().toByte, Seq(Byte.MinValue, Byte.MaxValue, 0.toByte))
       case IntegerType => randomNumeric[Int](
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala
index d92eb01b69bf0..c8e3b0e157319 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralGenerator.scala
@@ -68,16 +68,27 @@ object LiteralGenerator {
   lazy val longLiteralGen: Gen[Literal] =
     for { l <- Arbitrary.arbLong.arbitrary } yield Literal.create(l, LongType)
 
+  // The floatLiteralGen and doubleLiteralGen will 50% of the time yield arbitrary values
+  // and 50% of the time will yield some special values that are more likely to reveal
+  // corner cases. This behavior is similar to the integral value generators.
   lazy val floatLiteralGen: Gen[Literal] =
     for {
-      f <- Gen.chooseNum(Float.MinValue / 2, Float.MaxValue / 2,
-        Float.NaN, Float.PositiveInfinity, Float.NegativeInfinity)
+      f <- Gen.oneOf(
+        Gen.oneOf(
+          Float.NaN, Float.PositiveInfinity, Float.NegativeInfinity, Float.MinPositiveValue,
+          Float.MaxValue, -Float.MaxValue, 0.0f, -0.0f, 1.0f, -1.0f),
+        Arbitrary.arbFloat.arbitrary
+      )
     } yield Literal.create(f, FloatType)
 
   lazy val doubleLiteralGen: Gen[Literal] =
     for {
-      f <- Gen.chooseNum(Double.MinValue / 2, Double.MaxValue / 2,
-        Double.NaN, Double.PositiveInfinity, Double.NegativeInfinity)
+      f <- Gen.oneOf(
+        Gen.oneOf(
+          Double.NaN, Double.PositiveInfinity, Double.NegativeInfinity, Double.MinPositiveValue,
+          Double.MaxValue, -Double.MaxValue, 0.0, -0.0, 1.0, -1.0),
+        Arbitrary.arbDouble.arbitrary
+      )
     } yield Literal.create(f, DoubleType)
 
   // TODO cache the generated data

From 2e3aa2f0232a539346da3df8a20cd8e7c2b7dd4f Mon Sep 17 00:00:00 2001
From: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com>
Date: Wed, 16 Sep 2020 06:05:35 +0000
Subject: [PATCH 0038/1009] [SPARK-32861][SQL] GenerateExec should require
 column ordering

### What changes were proposed in this pull request?
This PR updates the `RemoveRedundantProjects` rule to make `GenerateExec` require column ordering.

### Why are the changes needed?
`GenerateExec` was originally considered as a node that does not require column ordering. However, `GenerateExec` binds its input rows directly with its `requiredChildOutput` without using the child's output schema.
In `doExecute()`:
```scala
val proj = UnsafeProjection.create(output, output)
```
In `doConsume()`:
```scala
val values = if (requiredChildOutput.nonEmpty) {
  input
} else {
  Seq.empty
}
```
In this case, changing input column ordering will result in `GenerateExec` binding the wrong schema to the input columns. For example, if we do not require child columns to be ordered, the `requiredChildOutput` [a, b, c] will directly bind to the schema of the input columns [c, b, a], which is incorrect:
```
GenerateExec explode(array(a, b, c)), [a, b, c], false, [d]
  HashAggregate(keys=[a, b, c], functions=[], output=[c, b, a])
    ...
```

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Unit test

Closes #29734 from allisonwang-db/generator.

Authored-by: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../execution/RemoveRedundantProjects.scala   |  4 +-
 .../RemoveRedundantProjectsSuite.scala        | 54 ++++++++++++++++---
 2 files changed, 51 insertions(+), 7 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala
index ecb4ad0f6e8dd..2bcf86edbea37 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala
@@ -62,7 +62,9 @@ case class RemoveRedundantProjects(conf: SQLConf) extends Rule[SparkPlan] {
         val keepOrdering = a.aggregateExpressions
           .exists(ae => ae.mode.equals(Final) || ae.mode.equals(PartialMerge))
         a.mapChildren(removeProject(_, keepOrdering))
-      case g: GenerateExec => g.mapChildren(removeProject(_, false))
+      // GenerateExec requires column ordering since it binds input rows directly with its
+      // requiredChildOutput without using child's output schema.
+      case g: GenerateExec => g.mapChildren(removeProject(_, true))
       // JoinExec ordering requirement will inherit from its parent. If there is no ProjectExec in
       // its ancestors, JoinExec should require output columns to be ordered.
       case o => o.mapChildren(removeProject(_, requireOrdering))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala
index bc24436c5806a..930935f077665 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala
@@ -18,17 +18,21 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.sql.{DataFrame, QueryTest, Row}
+import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, DisableAdaptiveExecutionSuite, EnableAdaptiveExecutionSuite}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
+import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
 
-class RemoveRedundantProjectsSuite extends QueryTest with SharedSparkSession with SQLTestUtils {
+abstract class RemoveRedundantProjectsSuiteBase
+  extends QueryTest
+    with SharedSparkSession
+    with AdaptiveSparkPlanHelper {
 
   private def assertProjectExecCount(df: DataFrame, expected: Int): Unit = {
     withClue(df.queryExecution) {
       val plan = df.queryExecution.executedPlan
-      val actual = plan.collectWithSubqueries { case p: ProjectExec => p }.size
+      val actual = collectWithSubqueries(plan) { case p: ProjectExec => p }.size
       assert(actual == expected)
     }
   }
@@ -115,9 +119,41 @@ class RemoveRedundantProjectsSuite extends QueryTest with SharedSparkSession wit
     assertProjectExec(query, 1, 2)
   }
 
-  test("generate") {
-    val query = "select a, key, explode(d) from testView where a > 10"
-    assertProjectExec(query, 0, 1)
+  test("generate should require column ordering") {
+    withTempView("testData") {
+      spark.range(0, 10, 1)
+        .selectExpr("id as key", "id * 2 as a", "id * 3 as b")
+        .createOrReplaceTempView("testData")
+
+      val data = sql("select key, a, b, count(*) from testData group by key, a, b limit 2")
+      val df = data.selectExpr("a", "b", "key", "explode(array(key, a, b)) as d").filter("d > 0")
+      df.collect()
+      val plan = df.queryExecution.executedPlan
+      val numProjects = collectWithSubqueries(plan) { case p: ProjectExec => p }.length
+
+      // Create a new plan that reverse the GenerateExec output and add a new ProjectExec between
+      // GenerateExec and its child. This is to test if the ProjectExec is removed, the output of
+      // the query will be incorrect.
+      val newPlan = stripAQEPlan(plan) transform {
+        case g @ GenerateExec(_, requiredChildOutput, _, _, child) =>
+          g.copy(requiredChildOutput = requiredChildOutput.reverse,
+            child = ProjectExec(requiredChildOutput.reverse, child))
+      }
+
+      // Re-apply remove redundant project rule.
+      val rule = RemoveRedundantProjects(spark.sessionState.conf)
+      val newExecutedPlan = rule.apply(newPlan)
+      // The manually added ProjectExec node shouldn't be removed.
+      assert(collectWithSubqueries(newExecutedPlan) {
+        case p: ProjectExec => p
+      }.size == numProjects + 1)
+
+      // Check the original plan's output and the new plan's output are the same.
+      val expectedRows = plan.executeCollect()
+      val actualRows = newExecutedPlan.executeCollect()
+      assert(expectedRows.length == actualRows.length)
+      expectedRows.zip(actualRows).foreach { case (expected, actual) => assert(expected == actual) }
+    }
   }
 
   test("subquery") {
@@ -131,3 +167,9 @@ class RemoveRedundantProjectsSuite extends QueryTest with SharedSparkSession wit
     }
   }
 }
+
+class RemoveRedundantProjectsSuite extends RemoveRedundantProjectsSuiteBase
+  with DisableAdaptiveExecutionSuite
+
+class RemoveRedundantProjectsSuiteAE extends RemoveRedundantProjectsSuiteBase
+  with EnableAdaptiveExecutionSuite

From 550c1c9cfb5e6439cdd835388fe90a9ca1ebc695 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Wed, 16 Sep 2020 20:16:15 +0900
Subject: [PATCH 0039/1009] [SPARK-32888][DOCS] Add user document about header
 flag and RDD as path for reading CSV

### What changes were proposed in this pull request?

This proposes to enhance user document of the API for loading a Dataset of strings storing CSV rows. If the header option is set to true, the API will remove all lines same with the header.

### Why are the changes needed?

This behavior can confuse users. We should explicitly document it.

### Does this PR introduce _any_ user-facing change?

No. Only doc change.

### How was this patch tested?

Only doc change.

Closes #29765 from viirya/SPARK-32888.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/sql/readwriter.py                               | 3 +++
 .../src/main/scala/org/apache/spark/sql/DataFrameReader.scala  | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 982ab38f73654..ae715eea70b6d 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -430,6 +430,9 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
                         character. By default (None), it is disabled.
         :param header: uses the first line as names of columns. If None is set, it uses the
                        default value, ``false``.
+                       .. note:: if the given path is a RDD of Strings, this header
+                       option will remove all lines same with the header if exists.
+
         :param inferSchema: infers the input schema automatically from data. It requires one extra
                        pass over the data. If None is set, it uses the default value, ``false``.
         :param enforceSchema: If it is set to ``true``, the specified or inferred schema will be
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index b0d06e862ca7b..bd986d0138256 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -600,6 +600,9 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * If the enforceSchema is set to `false`, only the CSV header in the first line is checked
    * to conform specified or inferred schema.
    *
+   * @note if `header` option is set to `true` when calling this API, all lines same with
+   * the header will be removed if exists.
+   *
    * @param csvDataset input Dataset with one CSV row per record
    * @since 2.2.0
    */

From e88429058723572b95502fd369f7c2c609c561e6 Mon Sep 17 00:00:00 2001
From: Adam Binford <adam.binford@radiantsolutions.com>
Date: Wed, 16 Sep 2020 20:18:36 +0900
Subject: [PATCH 0040/1009] [SPARK-32835][PYTHON] Add withField method to the
 pyspark Column class

### What changes were proposed in this pull request?

This PR adds a `withField` method on the pyspark Column class to call the Scala API method added in https://github.com/apache/spark/pull/27066.

### Why are the changes needed?

To update the Python API to match a new feature in the Scala API.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

New unit test

Closes #29699 from Kimahriman/feature/pyspark-with-field.

Authored-by: Adam Binford <adam.binford@radiantsolutions.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/sql/column.py            | 29 +++++++++++++++++++++++++
 python/pyspark/sql/tests/test_column.py | 16 ++++++++++++++
 2 files changed, 45 insertions(+)

diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 8c08d5cfa692b..0e073d2a5da28 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -329,6 +329,35 @@ def getField(self, name):
                 DeprecationWarning)
         return self[name]
 
+    @since(3.1)
+    def withField(self, fieldName, col):
+        """
+        An expression that adds/replaces a field in :class:`StructType` by name.
+
+        >>> from pyspark.sql import Row
+        >>> from pyspark.sql.functions import lit
+        >>> df = spark.createDataFrame([Row(a=Row(b=1, c=2))])
+        >>> df.withColumn('a', df['a'].withField('b', lit(3))).select('a.b').show()
+        +---+
+        |  b|
+        +---+
+        |  3|
+        +---+
+        >>> df.withColumn('a', df['a'].withField('d', lit(4))).select('a.d').show()
+        +---+
+        |  d|
+        +---+
+        |  4|
+        +---+
+        """
+        if not isinstance(fieldName, str):
+            raise TypeError("fieldName should be a string")
+
+        if not isinstance(col, Column):
+            raise TypeError("col should be a Column")
+
+        return Column(self._jc.withField(fieldName, col._jc))
+
     def __getattr__(self, item):
         if item.startswith("__"):
             raise AttributeError(item)
diff --git a/python/pyspark/sql/tests/test_column.py b/python/pyspark/sql/tests/test_column.py
index 5e05a8b63b259..8a89e6e9d5599 100644
--- a/python/pyspark/sql/tests/test_column.py
+++ b/python/pyspark/sql/tests/test_column.py
@@ -139,6 +139,22 @@ def test_bitwise_operations(self):
         result = df.select(functions.bitwiseNOT(df.b)).collect()[0].asDict()
         self.assertEqual(~75, result['~b'])
 
+    def test_with_field(self):
+        from pyspark.sql.functions import lit, col
+        df = self.spark.createDataFrame([Row(a=Row(b=1, c=2))])
+        self.assertIsInstance(df['a'].withField('b', lit(3)), Column)
+        self.assertIsInstance(df['a'].withField('d', lit(3)), Column)
+        result = df.withColumn('a', df['a'].withField('d', lit(3))).collect()[0].asDict()
+        self.assertEqual(3, result['a']['d'])
+        result = df.withColumn('a', df['a'].withField('b', lit(3))).collect()[0].asDict()
+        self.assertEqual(3, result['a']['b'])
+
+        self.assertRaisesRegex(TypeError,
+                               'col should be a Column',
+                               lambda: df['a'].withField('b', 3))
+        self.assertRaisesRegex(TypeError,
+                               'fieldName should be a string',
+                               lambda: df['a'].withField(col('b'), lit(3)))
 
 if __name__ == "__main__":
     import unittest

From c918909c1a173505e9150f01ac7882fc621cd769 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Wed, 16 Sep 2020 20:22:11 +0900
Subject: [PATCH 0041/1009] [SPARK-32814][PYTHON] Replace __metaclass__ field
 with metaclass keyword

### What changes were proposed in this pull request?

Replace `__metaclass__` fields with `metaclass` keyword in the class statements.

### Why are the changes needed?

`__metaclass__` is no longer supported in Python 3. This means, for example, that types are no longer handled as singletons.

```
>>> from pyspark.sql.types import BooleanType
>>> BooleanType() is BooleanType()
False
```

and classes, which suppose to be abstract, are not

```
>>> import inspect
>>> from pyspark.ml import Estimator
>>> inspect.isabstract(Estimator)
False
```

### Does this PR introduce _any_ user-facing change?

Yes (classes which were no longer abstract or singleton in Python 3, are now), though visible changes should be consider a bug-fix.

### How was this patch tested?

Existing tests.

Closes #29664 from zero323/SPARK-32138-FOLLOW-UP-METACLASS.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/ml/base.py           | 23 ++++++---------
 python/pyspark/ml/classification.py | 28 +++++++------------
 python/pyspark/ml/evaluation.py     |  9 ++----
 python/pyspark/ml/param/__init__.py |  4 +--
 python/pyspark/ml/regression.py     | 20 ++++++--------
 python/pyspark/ml/wrapper.py        | 21 ++++----------
 python/pyspark/sql/types.py         | 43 +++++++++++------------------
 7 files changed, 52 insertions(+), 96 deletions(-)

diff --git a/python/pyspark/ml/base.py b/python/pyspark/ml/base.py
index fabfc3253e6f0..f1ae123250321 100644
--- a/python/pyspark/ml/base.py
+++ b/python/pyspark/ml/base.py
@@ -68,14 +68,13 @@ def next(self):
 
 
 @inherit_doc
-class Estimator(Params):
+class Estimator(Params, metaclass=ABCMeta):
     """
     Abstract class for estimators that fit models to data.
 
     .. versionadded:: 1.3.0
     """
-
-    __metaclass__ = ABCMeta
+    pass
 
     @abstractmethod
     def _fit(self, dataset):
@@ -134,14 +133,13 @@ def fit(self, dataset, params=None):
 
 
 @inherit_doc
-class Transformer(Params):
+class Transformer(Params, metaclass=ABCMeta):
     """
     Abstract class for transformers that transform one dataset into another.
 
     .. versionadded:: 1.3.0
     """
-
-    __metaclass__ = ABCMeta
+    pass
 
     @abstractmethod
     def _transform(self, dataset):
@@ -174,14 +172,13 @@ def transform(self, dataset, params=None):
 
 
 @inherit_doc
-class Model(Transformer):
+class Model(Transformer, metaclass=ABCMeta):
     """
     Abstract class for models that are fitted by estimators.
 
     .. versionadded:: 1.4.0
     """
-
-    __metaclass__ = ABCMeta
+    pass
 
 
 @inherit_doc
@@ -258,13 +255,11 @@ class _PredictorParams(HasLabelCol, HasFeaturesCol, HasPredictionCol):
 
 
 @inherit_doc
-class Predictor(Estimator, _PredictorParams):
+class Predictor(Estimator, _PredictorParams, metaclass=ABCMeta):
     """
     Estimator for prediction tasks (regression and classification).
     """
 
-    __metaclass__ = ABCMeta
-
     @since("3.0.0")
     def setLabelCol(self, value):
         """
@@ -288,13 +283,11 @@ def setPredictionCol(self, value):
 
 
 @inherit_doc
-class PredictionModel(Model, _PredictorParams):
+class PredictionModel(Model, _PredictorParams, metaclass=ABCMeta):
     """
     Model for prediction tasks (regression and classification).
     """
 
-    __metaclass__ = ABCMeta
-
     @since("3.0.0")
     def setFeaturesCol(self, value):
         """
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 6df425211242f..b5261b30d89e4 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -73,14 +73,12 @@ class _ClassifierParams(HasRawPredictionCol, _PredictorParams):
 
 
 @inherit_doc
-class Classifier(Predictor, _ClassifierParams):
+class Classifier(Predictor, _ClassifierParams, metaclass=ABCMeta):
     """
     Classifier for classification tasks.
     Classes are indexed {0, 1, ..., numClasses - 1}.
     """
 
-    __metaclass__ = ABCMeta
-
     @since("3.0.0")
     def setRawPredictionCol(self, value):
         """
@@ -90,14 +88,12 @@ def setRawPredictionCol(self, value):
 
 
 @inherit_doc
-class ClassificationModel(PredictionModel, _ClassifierParams):
+class ClassificationModel(PredictionModel, _ClassifierParams, metaclass=ABCMeta):
     """
     Model produced by a ``Classifier``.
     Classes are indexed {0, 1, ..., numClasses - 1}.
     """
 
-    __metaclass__ = ABCMeta
-
     @since("3.0.0")
     def setRawPredictionCol(self, value):
         """
@@ -133,13 +129,12 @@ class _ProbabilisticClassifierParams(HasProbabilityCol, HasThresholds, _Classifi
 
 
 @inherit_doc
-class ProbabilisticClassifier(Classifier, _ProbabilisticClassifierParams):
+class ProbabilisticClassifier(Classifier, _ProbabilisticClassifierParams,
+                              metaclass=ABCMeta):
     """
     Probabilistic Classifier for classification tasks.
     """
 
-    __metaclass__ = ABCMeta
-
     @since("3.0.0")
     def setProbabilityCol(self, value):
         """
@@ -157,13 +152,12 @@ def setThresholds(self, value):
 
 @inherit_doc
 class ProbabilisticClassificationModel(ClassificationModel,
-                                       _ProbabilisticClassifierParams):
+                                       _ProbabilisticClassifierParams,
+                                       metaclass=ABCMeta):
     """
     Model produced by a ``ProbabilisticClassifier``.
     """
 
-    __metaclass__ = ABCMeta
-
     @since("3.0.0")
     def setProbabilityCol(self, value):
         """
@@ -188,14 +182,12 @@ def predictProbability(self, value):
 
 
 @inherit_doc
-class _JavaClassifier(Classifier, JavaPredictor):
+class _JavaClassifier(Classifier, JavaPredictor, metaclass=ABCMeta):
     """
     Java Classifier for classification tasks.
     Classes are indexed {0, 1, ..., numClasses - 1}.
     """
 
-    __metaclass__ = ABCMeta
-
     @since("3.0.0")
     def setRawPredictionCol(self, value):
         """
@@ -229,12 +221,12 @@ def predictRaw(self, value):
 
 
 @inherit_doc
-class _JavaProbabilisticClassifier(ProbabilisticClassifier, _JavaClassifier):
+class _JavaProbabilisticClassifier(ProbabilisticClassifier, _JavaClassifier,
+                                   metaclass=ABCMeta):
     """
     Java Probabilistic Classifier for classification tasks.
     """
-
-    __metaclass__ = ABCMeta
+    pass
 
 
 @inherit_doc
diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py
index a69a57f588571..354921e9e04b1 100644
--- a/python/pyspark/ml/evaluation.py
+++ b/python/pyspark/ml/evaluation.py
@@ -32,14 +32,13 @@
 
 
 @inherit_doc
-class Evaluator(Params):
+class Evaluator(Params, metaclass=ABCMeta):
     """
     Base class for evaluators that compute metrics from predictions.
 
     .. versionadded:: 1.4.0
     """
-
-    __metaclass__ = ABCMeta
+    pass
 
     @abstractmethod
     def _evaluate(self, dataset):
@@ -84,14 +83,12 @@ def isLargerBetter(self):
 
 
 @inherit_doc
-class JavaEvaluator(JavaParams, Evaluator):
+class JavaEvaluator(JavaParams, Evaluator, metaclass=ABCMeta):
     """
     Base class for :py:class:`Evaluator`s that wrap Java/Scala
     implementations.
     """
 
-    __metaclass__ = ABCMeta
-
     def _evaluate(self, dataset):
         """
         Evaluates the output.
diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py
index 95f3c32b8bcec..1853a8816ff58 100644
--- a/python/pyspark/ml/param/__init__.py
+++ b/python/pyspark/ml/param/__init__.py
@@ -223,7 +223,7 @@ def toBoolean(value):
             raise TypeError("Boolean Param requires value of type bool. Found %s." % type(value))
 
 
-class Params(Identifiable):
+class Params(Identifiable, metaclass=ABCMeta):
     """
     Components that take parameters. This also provides an internal
     param map to store parameter values attached to the instance.
@@ -231,8 +231,6 @@ class Params(Identifiable):
     .. versionadded:: 1.3.0
     """
 
-    __metaclass__ = ABCMeta
-
     def __init__(self):
         super(Params, self).__init__()
         #: internal param map for user-supplied values param map
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 6bd32ed1d636d..e1b7ffb63f8fe 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -48,45 +48,41 @@
            'FMRegressor', 'FMRegressionModel']
 
 
-class Regressor(Predictor, _PredictorParams):
+class Regressor(Predictor, _PredictorParams, metaclass=ABCMeta):
     """
     Regressor for regression tasks.
 
     .. versionadded:: 3.0.0
     """
+    pass
 
-    __metaclass__ = ABCMeta
 
-
-class RegressionModel(PredictionModel, _PredictorParams):
+class RegressionModel(PredictionModel, _PredictorParams, metaclass=ABCMeta):
     """
     Model produced by a ``Regressor``.
 
     .. versionadded:: 3.0.0
     """
-
-    __metaclass__ = ABCMeta
+    pass
 
 
-class _JavaRegressor(Regressor, JavaPredictor):
+class _JavaRegressor(Regressor, JavaPredictor, metaclass=ABCMeta):
     """
     Java Regressor for regression tasks.
 
     .. versionadded:: 3.0.0
     """
+    pass
 
-    __metaclass__ = ABCMeta
 
-
-class _JavaRegressionModel(RegressionModel, JavaPredictionModel):
+class _JavaRegressionModel(RegressionModel, JavaPredictionModel, metaclass=ABCMeta):
     """
     Java Model produced by a ``_JavaRegressor``.
     To be mixed in with :class:`pyspark.ml.JavaModel`
 
     .. versionadded:: 3.0.0
     """
-
-    __metaclass__ = ABCMeta
+    pass
 
 
 class _LinearRegressionParams(_PredictorParams, HasRegParam, HasElasticNetParam, HasMaxIter,
diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py
index c1d060a51cf9d..da52788afea72 100644
--- a/python/pyspark/ml/wrapper.py
+++ b/python/pyspark/ml/wrapper.py
@@ -109,7 +109,7 @@ def _new_java_array(pylist, java_class):
 
 
 @inherit_doc
-class JavaParams(JavaWrapper, Params):
+class JavaParams(JavaWrapper, Params, metaclass=ABCMeta):
     """
     Utility class to help create wrapper classes from Java/Scala
     implementations of pipeline components.
@@ -117,8 +117,6 @@ class JavaParams(JavaWrapper, Params):
     #: The param values in the Java object should be
     #: synced with the Python wrapper in fit/transform/evaluate/copy.
 
-    __metaclass__ = ABCMeta
-
     def _make_java_param_pair(self, param, value):
         """
         Makes a Java param pair.
@@ -287,14 +285,12 @@ def clear(self, param):
 
 
 @inherit_doc
-class JavaEstimator(JavaParams, Estimator):
+class JavaEstimator(JavaParams, Estimator, metaclass=ABCMeta):
     """
     Base class for :py:class:`Estimator`s that wrap Java/Scala
     implementations.
     """
 
-    __metaclass__ = ABCMeta
-
     @abstractmethod
     def _create_model(self, java_model):
         """
@@ -321,30 +317,26 @@ def _fit(self, dataset):
 
 
 @inherit_doc
-class JavaTransformer(JavaParams, Transformer):
+class JavaTransformer(JavaParams, Transformer, metaclass=ABCMeta):
     """
     Base class for :py:class:`Transformer`s that wrap Java/Scala
     implementations. Subclasses should ensure they have the transformer Java object
     available as _java_obj.
     """
 
-    __metaclass__ = ABCMeta
-
     def _transform(self, dataset):
         self._transfer_params_to_java()
         return DataFrame(self._java_obj.transform(dataset._jdf), dataset.sql_ctx)
 
 
 @inherit_doc
-class JavaModel(JavaTransformer, Model):
+class JavaModel(JavaTransformer, Model, metaclass=ABCMeta):
     """
     Base class for :py:class:`Model`s that wrap Java/Scala
     implementations. Subclasses should inherit this class before
     param mix-ins, because this sets the UID from the Java model.
     """
 
-    __metaclass__ = ABCMeta
-
     def __init__(self, java_model=None):
         """
         Initialize this instance with a Java model object.
@@ -374,12 +366,11 @@ def __repr__(self):
 
 
 @inherit_doc
-class JavaPredictor(Predictor, JavaEstimator, _PredictorParams):
+class JavaPredictor(Predictor, JavaEstimator, _PredictorParams, metaclass=ABCMeta):
     """
     (Private) Java Estimator for prediction tasks (regression and classification).
     """
-
-    __metaclass__ = ABCMeta
+    pass
 
 
 @inherit_doc
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 43f3a8531871a..5a89d5ab9a7e5 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -102,13 +102,12 @@ def __call__(cls):
         return cls._instances[cls]
 
 
-class NullType(DataType):
+class NullType(DataType, metaclass=DataTypeSingleton):
     """Null type.
 
     The data type representing None, used for the types that cannot be inferred.
     """
-
-    __metaclass__ = DataTypeSingleton
+    pass
 
 
 class AtomicType(DataType):
@@ -121,11 +120,10 @@ class NumericType(AtomicType):
     """
 
 
-class IntegralType(NumericType):
+class IntegralType(NumericType, metaclass=DataTypeSingleton):
     """Integral data types.
     """
-
-    __metaclass__ = DataTypeSingleton
+    pass
 
 
 class FractionalType(NumericType):
@@ -133,33 +131,28 @@ class FractionalType(NumericType):
     """
 
 
-class StringType(AtomicType):
+class StringType(AtomicType, metaclass=DataTypeSingleton):
     """String data type.
     """
-
-    __metaclass__ = DataTypeSingleton
+    pass
 
 
-class BinaryType(AtomicType):
+class BinaryType(AtomicType, metaclass=DataTypeSingleton):
     """Binary (byte array) data type.
     """
+    pass
 
-    __metaclass__ = DataTypeSingleton
 
-
-class BooleanType(AtomicType):
+class BooleanType(AtomicType, metaclass=DataTypeSingleton):
     """Boolean data type.
     """
-
-    __metaclass__ = DataTypeSingleton
+    pass
 
 
-class DateType(AtomicType):
+class DateType(AtomicType, metaclass=DataTypeSingleton):
     """Date (datetime.date) data type.
     """
 
-    __metaclass__ = DataTypeSingleton
-
     EPOCH_ORDINAL = datetime.datetime(1970, 1, 1).toordinal()
 
     def needConversion(self):
@@ -174,12 +167,10 @@ def fromInternal(self, v):
             return datetime.date.fromordinal(v + self.EPOCH_ORDINAL)
 
 
-class TimestampType(AtomicType):
+class TimestampType(AtomicType, metaclass=DataTypeSingleton):
     """Timestamp (datetime.datetime) data type.
     """
 
-    __metaclass__ = DataTypeSingleton
-
     def needConversion(self):
         return True
 
@@ -226,18 +217,16 @@ def __repr__(self):
         return "DecimalType(%d,%d)" % (self.precision, self.scale)
 
 
-class DoubleType(FractionalType):
+class DoubleType(FractionalType, metaclass=DataTypeSingleton):
     """Double data type, representing double precision floats.
     """
+    pass
 
-    __metaclass__ = DataTypeSingleton
 
-
-class FloatType(FractionalType):
+class FloatType(FractionalType, metaclass=DataTypeSingleton):
     """Float data type, representing single precision floats.
     """
-
-    __metaclass__ = DataTypeSingleton
+    pass
 
 
 class ByteType(IntegralType):

From 3bc13e641257182dde097d759555698701a2fcc3 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Wed, 16 Sep 2020 14:08:59 +0000
Subject: [PATCH 0042/1009] [SPARK-32706][SQL] Improve cast string to decimal
 type

### What changes were proposed in this pull request?

This pr makes cast string type to decimal decimal type fast fail if precision larger that 38.

### Why are the changes needed?

It is very slow if precision very large.

Benchmark and benchmark result:
```scala
import org.apache.spark.benchmark.Benchmark
val bd1 = new java.math.BigDecimal("6.0790316E+25569151")
val bd2 = new java.math.BigDecimal("6.0790316E+25");

val benchmark = new Benchmark("Benchmark string to decimal", 1, minNumIters = 2)
benchmark.addCase(bd1.toString) { _ =>
  println(Decimal(bd1).precision)
}
benchmark.addCase(bd2.toString) { _ =>
  println(Decimal(bd2).precision)
}
benchmark.run()
```
```
Java HotSpot(TM) 64-Bit Server VM 1.8.0_251-b08 on Mac OS X 10.15.6
Intel(R) Core(TM) i9-9980HK CPU  2.40GHz
Benchmark string to decimal:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
6.0790316E+25569151                                9340           9381          57          0.0  9340094625.0       1.0X
6.0790316E+25                                         0              0           0          0.5        2150.0 4344230.1X
```
Stacktrace:
![image](https://user-images.githubusercontent.com/5399861/92941705-4c868980-f483-11ea-8a15-b93acde8c0f4.png)

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test and benchmark test:
Dataset | Before this pr (Seconds) | After this pr (Seconds)
-- | -- | --
https://issues.apache.org/jira/secure/attachment/13011406/part-00000.parquet | 2640 | 2

Closes #29731 from wangyum/SPARK-32706.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/expressions/Cast.scala | 41 ++++++--------
 .../org/apache/spark/sql/types/Decimal.scala  | 52 +++++++++++++++--
 .../sql/catalyst/expressions/CastSuite.scala  | 56 +++++++++++++++++++
 .../apache/spark/sql/types/DecimalSuite.scala | 30 ++++++++++
 4 files changed, 152 insertions(+), 27 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index caa8ceea0ab91..96154917e1637 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -669,19 +669,13 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
 
 
   private[this] def castToDecimal(from: DataType, target: DecimalType): Any => Any = from match {
-    case StringType =>
-      buildCast[UTF8String](_, s => try {
-        // According the benchmark test,  `s.toString.trim` is much faster than `s.trim.toString`.
-        // Please refer to https://github.com/apache/spark/pull/26640
-        changePrecision(Decimal(new JavaBigDecimal(s.toString.trim)), target)
-      } catch {
-        case _: NumberFormatException =>
-          if (ansiEnabled) {
-            throw new NumberFormatException(s"invalid input syntax for type numeric: $s")
-          } else {
-            null
-          }
+    case StringType if !ansiEnabled =>
+      buildCast[UTF8String](_, s => {
+        val d = Decimal.fromString(s)
+        if (d == null) null else changePrecision(d, target)
       })
+    case StringType if ansiEnabled =>
+      buildCast[UTF8String](_, s => changePrecision(Decimal.fromStringANSI(s), target))
     case BooleanType =>
       buildCast[Boolean](_, b => toPrecision(if (b) Decimal.ONE else Decimal.ZERO, target))
     case DateType =>
@@ -1185,20 +1179,21 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
     val tmp = ctx.freshVariable("tmpDecimal", classOf[Decimal])
     val canNullSafeCast = Cast.canNullSafeCastToDecimal(from, target)
     from match {
-      case StringType =>
+      case StringType if !ansiEnabled =>
         (c, evPrim, evNull) =>
-          val handleException = if (ansiEnabled) {
-            s"""throw new NumberFormatException("invalid input syntax for type numeric: " + $c);"""
-          } else {
-            s"$evNull =true;"
-          }
           code"""
-            try {
-              Decimal $tmp = Decimal.apply(new java.math.BigDecimal($c.toString().trim()));
+              Decimal $tmp = Decimal.fromString($c);
+              if ($tmp == null) {
+                $evNull = true;
+              } else {
+                ${changePrecision(tmp, target, evPrim, evNull, canNullSafeCast)}
+              }
+          """
+      case StringType if ansiEnabled =>
+        (c, evPrim, evNull) =>
+          code"""
+              Decimal $tmp = Decimal.fromStringANSI($c);
               ${changePrecision(tmp, target, evPrim, evNull, canNullSafeCast)}
-            } catch (java.lang.NumberFormatException e) {
-              $handleException
-            }
           """
       case BooleanType =>
         (c, evPrim, evNull) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
index 48ae49740f22d..6be6d81ec3bb7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
@@ -18,12 +18,13 @@
 package org.apache.spark.sql.types
 
 import java.lang.{Long => JLong}
-import java.math.{BigInteger, MathContext, RoundingMode}
+import java.math.{BigDecimal => JavaBigDecimal, BigInteger, MathContext, RoundingMode}
 
 import scala.util.Try
 
 import org.apache.spark.annotation.Unstable
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.unsafe.types.UTF8String
 
 /**
  * A mutable implementation of BigDecimal that can hold a Long if values are small enough.
@@ -550,9 +551,6 @@ object Decimal {
   private[sql] val ZERO = Decimal(0)
   private[sql] val ONE = Decimal(1)
 
-  private val LONG_MAX_BIG_INT = BigInteger.valueOf(JLong.MAX_VALUE)
-  private val LONG_MIN_BIG_INT = BigInteger.valueOf(JLong.MIN_VALUE)
-
   def apply(value: Double): Decimal = new Decimal().set(value)
 
   def apply(value: Long): Decimal = new Decimal().set(value)
@@ -589,6 +587,52 @@ object Decimal {
     }
   }
 
+  private def calculatePrecision(bigDecimal: JavaBigDecimal): Int = {
+    if (bigDecimal.scale < 0) {
+      bigDecimal.precision - bigDecimal.scale
+    } else {
+      bigDecimal.precision
+    }
+  }
+
+  private def stringToJavaBigDecimal(str: UTF8String): JavaBigDecimal = {
+    // According the benchmark test,  `s.toString.trim` is much faster than `s.trim.toString`.
+    // Please refer to https://github.com/apache/spark/pull/26640
+    new JavaBigDecimal(str.toString.trim)
+  }
+
+  def fromString(str: UTF8String): Decimal = {
+    try {
+      val bigDecimal = stringToJavaBigDecimal(str)
+      // We fast fail because constructing a very large JavaBigDecimal to Decimal is very slow.
+      // For example: Decimal("6.0790316E+25569151")
+      if (calculatePrecision(bigDecimal) > DecimalType.MAX_PRECISION) {
+        null
+      } else {
+        Decimal(bigDecimal)
+      }
+    } catch {
+      case _: NumberFormatException =>
+        null
+    }
+  }
+
+  def fromStringANSI(str: UTF8String): Decimal = {
+    try {
+      val bigDecimal = stringToJavaBigDecimal(str)
+      // We fast fail because constructing a very large JavaBigDecimal to Decimal is very slow.
+      // For example: Decimal("6.0790316E+25569151")
+      if (calculatePrecision(bigDecimal) > DecimalType.MAX_PRECISION) {
+        throw new ArithmeticException(s"out of decimal type range: $str")
+      } else {
+        Decimal(bigDecimal)
+      }
+    } catch {
+      case _: NumberFormatException =>
+        throw new NumberFormatException(s"invalid input syntax for type numeric: $str")
+    }
+  }
+
   /**
    * Creates a decimal from unscaled, precision and scale without checking the bounds.
    */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index 2d202ff0e7954..7caa4a55c06af 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -1349,6 +1349,29 @@ class CastSuite extends CastSuiteBase {
     val v = Literal.create(Row(1), new ExampleSubTypeUDT())
     checkEvaluation(cast(v, new ExampleBaseTypeUDT), Row(1))
   }
+
+  test("Fast fail for cast string type to decimal type") {
+    checkEvaluation(cast("12345678901234567890123456789012345678", DecimalType(38, 0)),
+      Decimal("12345678901234567890123456789012345678"))
+    checkEvaluation(cast("123456789012345678901234567890123456789", DecimalType(38, 0)), null)
+    checkEvaluation(cast("12345678901234567890123456789012345678", DecimalType(38, 1)), null)
+
+    checkEvaluation(cast("0.00000000000000000000000000000000000001", DecimalType(38, 0)),
+      Decimal("0"))
+    checkEvaluation(cast("0.00000000000000000000000000000000000000000001", DecimalType(38, 0)),
+      Decimal("0"))
+    checkEvaluation(cast("0.00000000000000000000000000000000000001", DecimalType(38, 18)),
+      Decimal("0E-18"))
+    checkEvaluation(cast("6E-120", DecimalType(38, 0)),
+      Decimal("0"))
+
+    checkEvaluation(cast("6E+37", DecimalType(38, 0)),
+      Decimal("60000000000000000000000000000000000000"))
+    checkEvaluation(cast("6E+38", DecimalType(38, 0)), null)
+    checkEvaluation(cast("6E+37", DecimalType(38, 1)), null)
+
+    checkEvaluation(cast("abcd", DecimalType(38, 1)), null)
+  }
 }
 
 /**
@@ -1405,4 +1428,37 @@ class AnsiCastSuite extends CastSuiteBase {
       checkEvaluation(cast(negativeTs, LongType), expectedSecs)
     }
   }
+
+  test("Fast fail for cast string type to decimal type in ansi mode") {
+    checkEvaluation(cast("12345678901234567890123456789012345678", DecimalType(38, 0)),
+      Decimal("12345678901234567890123456789012345678"))
+    checkExceptionInExpression[ArithmeticException](
+      cast("123456789012345678901234567890123456789", DecimalType(38, 0)),
+      "out of decimal type range")
+    checkExceptionInExpression[ArithmeticException](
+      cast("12345678901234567890123456789012345678", DecimalType(38, 1)),
+      "cannot be represented as Decimal(38, 1)")
+
+    checkEvaluation(cast("0.00000000000000000000000000000000000001", DecimalType(38, 0)),
+      Decimal("0"))
+    checkEvaluation(cast("0.00000000000000000000000000000000000000000001", DecimalType(38, 0)),
+      Decimal("0"))
+    checkEvaluation(cast("0.00000000000000000000000000000000000001", DecimalType(38, 18)),
+      Decimal("0E-18"))
+    checkEvaluation(cast("6E-120", DecimalType(38, 0)),
+      Decimal("0"))
+
+    checkEvaluation(cast("6E+37", DecimalType(38, 0)),
+      Decimal("60000000000000000000000000000000000000"))
+    checkExceptionInExpression[ArithmeticException](
+      cast("6E+38", DecimalType(38, 0)),
+      "out of decimal type range")
+    checkExceptionInExpression[ArithmeticException](
+      cast("6E+37", DecimalType(38, 1)),
+      "cannot be represented as Decimal(38, 1)")
+
+    checkExceptionInExpression[NumberFormatException](
+      cast("abcd", DecimalType(38, 1)),
+      "invalid input syntax for type numeric")
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala
index 7d0346fc0145e..7ce451ed6d577 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.Decimal._
+import org.apache.spark.unsafe.types.UTF8String
 
 class DecimalSuite extends SparkFunSuite with PrivateMethodTester with SQLHelper {
   /** Check that a Decimal has the given string representation, precision and scale */
@@ -256,4 +257,33 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester with SQLHelper
     assert(decimalLong.toScalaBigInt == scala.math.BigInt("123456789"))
     assert(decimalLong.toJavaBigInteger == new java.math.BigInteger("123456789"))
   }
+
+  test("UTF8String to Decimal") {
+    def checkFromString(string: String): Unit = {
+      assert(Decimal.fromString(UTF8String.fromString(string)) === Decimal(string))
+      assert(Decimal.fromStringANSI(UTF8String.fromString(string)) === Decimal(string))
+    }
+
+    def checkOutOfRangeFromString(string: String): Unit = {
+      assert(Decimal.fromString(UTF8String.fromString(string)) === null)
+      val e = intercept[ArithmeticException](Decimal.fromStringANSI(UTF8String.fromString(string)))
+      assert(e.getMessage.contains("out of decimal type range"))
+    }
+
+    checkFromString("12345678901234567890123456789012345678")
+    checkOutOfRangeFromString("123456789012345678901234567890123456789")
+
+    checkFromString("0.00000000000000000000000000000000000001")
+    checkFromString("0.000000000000000000000000000000000000000000000001")
+
+    checkFromString("6E-640")
+
+    checkFromString("6E+37")
+    checkOutOfRangeFromString("6E+38")
+    checkOutOfRangeFromString("6.0790316E+25569151")
+
+    assert(Decimal.fromString(UTF8String.fromString("str")) === null)
+    val e = intercept[NumberFormatException](Decimal.fromStringANSI(UTF8String.fromString("str")))
+    assert(e.getMessage.contains("invalid input syntax for type numeric"))
+  }
 }

From 355ab6ae94a972011d56b8449c612fd7ad30d860 Mon Sep 17 00:00:00 2001
From: KevinSmile <kevinwang013@hotmail.com>
Date: Wed, 16 Sep 2020 23:39:41 +0900
Subject: [PATCH 0043/1009] [SPARK-32804][LAUNCHER][FOLLOWUP] Fix
 SparkSubmitCommandBuilderSuite test failure without jars

### What changes were proposed in this pull request?

It's a followup of https://github.com/apache/spark/pull/29653.
Tests in `SparkSubmitCommandBuilderSuite` may fail if you didn't build first and have jars before test,
so if `isTesting` we should set a dummy `SparkLauncher.NO_RESOURCE`.

### Why are the changes needed?

Fix tests failure.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

mvn clean test (test without jars built first).

Closes #29769 from KevinSmile/bug-fix-master.

Authored-by: KevinSmile <kevinwang013@hotmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../spark/launcher/SparkSubmitCommandBuilder.java   | 13 +++++++++----
 .../launcher/SparkSubmitCommandBuilderSuite.java    |  4 ++--
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
index 43e7f8debe17d..d6ed1e3a3532d 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
@@ -404,12 +404,17 @@ private boolean isThriftServer(String mainClass) {
   }
 
   private String findExamplesAppJar() {
-    for (String exampleJar : findExamplesJars()) {
-      if (new File(exampleJar).getName().startsWith("spark-examples")) {
-        return exampleJar;
+    boolean isTesting = "1".equals(getenv("SPARK_TESTING"));
+    if (isTesting) {
+      return SparkLauncher.NO_RESOURCE;
+    } else {
+      for (String exampleJar : findExamplesJars()) {
+        if (new File(exampleJar).getName().startsWith("spark-examples")) {
+          return exampleJar;
+        }
       }
+      throw new IllegalStateException("Failed to find examples' main app jar.");
     }
-    throw new IllegalStateException("Failed to find examples' main app jar.");
   }
 
   private List<String> findExamplesJars() {
diff --git a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
index 6cd089e256b93..07a9dae1256ab 100644
--- a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
+++ b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
@@ -259,8 +259,8 @@ public void testExamplesRunnerPrimaryResource() throws Exception {
             findArgValue(cmd, parser.CLASS));
     assertEquals("cluster", findArgValue(cmd, parser.DEPLOY_MODE));
     String primaryResource = cmd.get(cmd.size() - 2);
-    assertTrue(new File(primaryResource).getName().startsWith("spark-examples"));
-    assertFalse(cmd.contains(SparkLauncher.NO_RESOURCE));
+    assertTrue(primaryResource.equals(SparkLauncher.NO_RESOURCE)
+            || new File(primaryResource).getName().startsWith("spark-examples"));
   }
 
   @Test(expected = IllegalArgumentException.class)

From 56ae95053df4afa9764df3f1d88f300896ca0183 Mon Sep 17 00:00:00 2001
From: "yi.wu" <yi.wu@databricks.com>
Date: Wed, 16 Sep 2020 15:00:31 +0000
Subject: [PATCH 0044/1009] [SPARK-32850][CORE] Simplify the RPC message flow
 of decommission

### What changes were proposed in this pull request?

This PR cleans up the RPC message flow among the multiple decommission use cases, it includes changes:

* Keep `Worker`'s decommission status be consistent between the case where decommission starts from `Worker` and the case where decommission starts from the `MasterWebUI`: sending `DecommissionWorker` from `Master` to `Worker` in the latter case.

* Change from two-way communication to one-way communication when notifying decommission between driver and executor: it's obviously unnecessary for the executor to acknowledge the decommission status to the driver since the decommission request is from the driver. And it's same in reverse.

* Only send one message instead of two(`DecommissionSelf`/`DecommissionBlockManager`) when decommission the executor: executor and `BlockManager` are in the same JVM.

* Clean up codes around here.

### Why are the changes needed?

Before:

<img width="1948" alt="WeChat56c00cc34d9785a67a544dca036d49da" src="https://user-images.githubusercontent.com/16397174/92850308-dc461c80-f41e-11ea-8ac0-287825f4e0c4.png">

After:
<img width="1968" alt="WeChat05f7afb017e3f0132394c5e54245e49e" src="https://user-images.githubusercontent.com/16397174/93189571-de88dd80-f774-11ea-9300-1943920aa27d.png">

(Note the diagrams only counts those RPC calls that needed to go through the network. Local RPC calls are not counted here.)

After this change, We reduced 6 original RPC calls and added one more RPC call for keeping the consistent decommission status for the Worker. And the RPC flow becomes more clear.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Updated existing tests.

Closes #29722 from Ngone51/simplify-decommission-rpc.

Authored-by: yi.wu <yi.wu@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/ExecutorAllocationClient.scala      | 19 ++--
 .../spark/ExecutorAllocationManager.scala     |  5 +-
 .../apache/spark/deploy/DeployMessage.scala   | 31 +++++--
 .../apache/spark/deploy/master/Master.scala   | 23 +++--
 .../apache/spark/deploy/worker/Worker.scala   | 28 +++---
 .../CoarseGrainedExecutorBackend.scala        | 60 ++++++-------
 .../cluster/CoarseGrainedClusterMessage.scala | 16 ++--
 .../CoarseGrainedSchedulerBackend.scala       | 86 +++++++------------
 .../cluster/StandaloneSchedulerBackend.scala  |  7 +-
 .../apache/spark/storage/BlockManager.scala   |  6 +-
 .../storage/BlockManagerMasterEndpoint.scala  | 18 +---
 .../storage/BlockManagerStorageEndpoint.scala |  2 +-
 .../deploy/DecommissionWorkerSuite.scala      |  4 +-
 .../spark/deploy/client/AppClientSuite.scala  |  7 +-
 .../scheduler/WorkerDecommissionSuite.scala   |  7 +-
 .../ExecutorAllocationManagerSuite.scala      |  6 +-
 16 files changed, 177 insertions(+), 148 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
index ce47f3fd32203..cdba1c44034c0 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
@@ -91,11 +91,13 @@ private[spark] trait ExecutorAllocationClient {
    * @param executorsAndDecomInfo identifiers of executors & decom info.
    * @param adjustTargetNumExecutors whether the target number of executors will be adjusted down
    *                                 after these executors have been decommissioned.
+   * @param triggeredByExecutor whether the decommission is triggered at executor.
    * @return the ids of the executors acknowledged by the cluster manager to be removed.
    */
   def decommissionExecutors(
-    executorsAndDecomInfo: Array[(String, ExecutorDecommissionInfo)],
-    adjustTargetNumExecutors: Boolean): Seq[String] = {
+      executorsAndDecomInfo: Array[(String, ExecutorDecommissionInfo)],
+      adjustTargetNumExecutors: Boolean,
+      triggeredByExecutor: Boolean): Seq[String] = {
     killExecutors(executorsAndDecomInfo.map(_._1),
       adjustTargetNumExecutors,
       countFailures = false)
@@ -109,14 +111,21 @@ private[spark] trait ExecutorAllocationClient {
    * @param executorId identifiers of executor to decommission
    * @param decommissionInfo information about the decommission (reason, host loss)
    * @param adjustTargetNumExecutors if we should adjust the target number of executors.
+   * @param triggeredByExecutor whether the decommission is triggered at executor.
+   *                            (TODO: add a new type like `ExecutorDecommissionInfo` for the
+   *                            case where executor is decommissioned at executor first, so we
+   *                            don't need this extra parameter.)
    * @return whether the request is acknowledged by the cluster manager.
    */
-  final def decommissionExecutor(executorId: String,
+  final def decommissionExecutor(
+      executorId: String,
       decommissionInfo: ExecutorDecommissionInfo,
-      adjustTargetNumExecutors: Boolean): Boolean = {
+      adjustTargetNumExecutors: Boolean,
+      triggeredByExecutor: Boolean = false): Boolean = {
     val decommissionedExecutors = decommissionExecutors(
       Array((executorId, decommissionInfo)),
-      adjustTargetNumExecutors = adjustTargetNumExecutors)
+      adjustTargetNumExecutors = adjustTargetNumExecutors,
+      triggeredByExecutor = triggeredByExecutor)
     decommissionedExecutors.nonEmpty && decommissionedExecutors(0).equals(executorId)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index b6e14e8210c86..341334c8a29c4 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -581,7 +581,10 @@ private[spark] class ExecutorAllocationManager(
       if (decommissionEnabled) {
         val executorIdsWithoutHostLoss = executorIdsToBeRemoved.toSeq.map(
           id => (id, ExecutorDecommissionInfo("spark scale down"))).toArray
-        client.decommissionExecutors(executorIdsWithoutHostLoss, adjustTargetNumExecutors = false)
+        client.decommissionExecutors(
+          executorIdsWithoutHostLoss,
+          adjustTargetNumExecutors = false,
+          triggeredByExecutor = false)
       } else {
         client.killExecutors(executorIdsToBeRemoved.toSeq, adjustTargetNumExecutors = false,
           countFailures = false, force = false)
diff --git a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
index 83f373d526e90..8bc909b096e71 100644
--- a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
@@ -61,13 +61,34 @@ private[deploy] object DeployMessages {
   }
 
   /**
+   * An internal message that used by Master itself, in order to handle the
+   * `DecommissionWorkersOnHosts` request from `MasterWebUI` asynchronously.
+   * @param ids A collection of Worker ids, which should be decommissioned.
+   */
+  case class DecommissionWorkers(ids: Seq[String]) extends DeployMessage
+
+  /**
+   * A message that sent from Master to Worker to decommission the Worker.
+   * It's used for the case where decommission is triggered at MasterWebUI.
+   *
+   * Note that decommission a Worker will cause all the executors on that Worker
+   * to be decommissioned as well.
+   */
+  object DecommissionWorker extends DeployMessage
+
+  /**
+   * A message that sent to the Worker itself when it receives PWR signal,
+   * indicating the Worker starts to decommission.
+   */
+  object WorkerSigPWRReceived extends DeployMessage
+
+  /**
+   * A message sent from Worker to Master to tell Master that the Worker has started
+   * decommissioning. It's used for the case where decommission is triggered at Worker.
+   *
    * @param id the worker id
-   * @param worker the worker endpoint ref
    */
-  case class WorkerDecommission(
-      id: String,
-      worker: RpcEndpointRef)
-    extends DeployMessage
+  case class WorkerDecommissioning(id: String, workerRef: RpcEndpointRef) extends DeployMessage
 
   case class ExecutorStateChanged(
       appId: String,
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 48516cdf83291..15f8be69d97bd 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -245,15 +245,27 @@ private[deploy] class Master(
       logError("Leadership has been revoked -- master shutting down.")
       System.exit(0)
 
-    case WorkerDecommission(id, workerRef) =>
-      logInfo("Recording worker %s decommissioning".format(id))
+    case WorkerDecommissioning(id, workerRef) =>
       if (state == RecoveryState.STANDBY) {
         workerRef.send(MasterInStandby)
       } else {
         // We use foreach since get gives us an option and we can skip the failures.
-        idToWorker.get(id).foreach(decommissionWorker)
+        idToWorker.get(id).foreach(w => decommissionWorker(w))
       }
 
+    case DecommissionWorkers(ids) =>
+      // The caller has already checked the state when handling DecommissionWorkersOnHosts,
+      // so it should not be the STANDBY
+      assert(state != RecoveryState.STANDBY)
+      ids.foreach ( id =>
+        // We use foreach since get gives us an option and we can skip the failures.
+        idToWorker.get(id).foreach { w =>
+          decommissionWorker(w)
+          // Also send a message to the worker node to notify.
+          w.endpoint.send(DecommissionWorker)
+        }
+      )
+
     case RegisterWorker(
       id, workerHost, workerPort, workerRef, cores, memory, workerWebUiUrl,
       masterAddress, resources) =>
@@ -891,10 +903,7 @@ private[deploy] class Master(
     logInfo(s"Decommissioning the workers with host:ports ${workersToRemoveHostPorts}")
 
     // The workers are removed async to avoid blocking the receive loop for the entire batch
-    workersToRemove.foreach(wi => {
-      logInfo(s"Sending the worker decommission to ${wi.id} and ${wi.endpoint}")
-      self.send(WorkerDecommission(wi.id, wi.endpoint))
-    })
+    self.send(DecommissionWorkers(workersToRemove.map(_.id).toSeq))
 
     // Return the count of workers actually removed
     workersToRemove.size
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 7649bc37c30b6..2e8474e3e3fc2 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -70,7 +70,10 @@ private[deploy] class Worker(
   if (conf.get(config.DECOMMISSION_ENABLED)) {
     logInfo("Registering SIGPWR handler to trigger decommissioning.")
     SignalUtils.register("PWR", "Failed to register SIGPWR handler - " +
-      "disabling worker decommission feature.")(decommissionSelf)
+      "disabling worker decommission feature.") {
+       self.send(WorkerSigPWRReceived)
+       true
+    }
   } else {
     logInfo("Worker decommissioning not enabled, SIGPWR will result in exiting.")
   }
@@ -137,7 +140,8 @@ private[deploy] class Worker(
   private var registered = false
   private var connected = false
   private var decommissioned = false
-  private val workerId = generateWorkerId()
+  // expose for test
+  private[spark] val workerId = generateWorkerId()
   private val sparkHome =
     if (sys.props.contains(IS_TESTING.key)) {
       assert(sys.props.contains("spark.test.home"), "spark.test.home is not set!")
@@ -668,8 +672,13 @@ private[deploy] class Worker(
       finishedApps += id
       maybeCleanupApplication(id)
 
-    case WorkerDecommission(_, _) =>
+    case DecommissionWorker =>
+      decommissionSelf()
+
+    case WorkerSigPWRReceived =>
       decommissionSelf()
+      // Tell master we starts decommissioning so it stops trying to launch executor/driver on us
+      sendToMaster(WorkerDecommissioning(workerId, self))
   }
 
   override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
@@ -768,16 +777,15 @@ private[deploy] class Worker(
     }
   }
 
-  private[deploy] def decommissionSelf(): Boolean = {
-    if (conf.get(config.DECOMMISSION_ENABLED)) {
-      logDebug("Decommissioning self")
+  private[deploy] def decommissionSelf(): Unit = {
+    if (conf.get(config.DECOMMISSION_ENABLED) && !decommissioned) {
       decommissioned = true
-      sendToMaster(WorkerDecommission(workerId, self))
+      logInfo(s"Decommission worker $workerId.")
+    } else if (decommissioned) {
+      logWarning(s"Worker $workerId already started decommissioning.")
     } else {
-      logWarning("Asked to decommission self, but decommissioning not enabled")
+      logWarning(s"Receive decommission request, but decommission feature is disabled.")
     }
-    // Return true since can be called as a signal handler
-    true
   }
 
   private[worker] def handleDriverStateChanged(driverStateChanged: DriverStateChanged): Unit = {
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index 48045bafe6e3f..d002f7b407e5e 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -40,7 +40,7 @@ import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.resource.ResourceProfile._
 import org.apache.spark.resource.ResourceUtils._
 import org.apache.spark.rpc._
-import org.apache.spark.scheduler.{ExecutorDecommissionInfo, ExecutorLossReason, TaskDescription}
+import org.apache.spark.scheduler.{ExecutorLossReason, TaskDescription}
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
 import org.apache.spark.serializer.SerializerInstance
 import org.apache.spark.util.{ChildFirstURLClassLoader, MutableURLClassLoader, SignalUtils, ThreadUtils, Utils}
@@ -79,12 +79,17 @@ private[spark] class CoarseGrainedExecutorBackend(
    */
   private[executor] val taskResources = new mutable.HashMap[Long, Map[String, ResourceInformation]]
 
-  @volatile private var decommissioned = false
+  private var decommissioned = false
 
   override def onStart(): Unit = {
-    logInfo("Registering PWR handler.")
-    SignalUtils.register("PWR", "Failed to register SIGPWR handler - " +
-      "disabling decommission feature.")(decommissionSelf)
+    if (env.conf.get(DECOMMISSION_ENABLED)) {
+      logInfo("Registering PWR handler to trigger decommissioning.")
+      SignalUtils.register("PWR", "Failed to register SIGPWR handler - " +
+      "disabling executor decommission feature.") {
+        self.send(ExecutorSigPWRReceived)
+        true
+      }
+    }
 
     logInfo("Connecting to driver: " + driverUrl)
     try {
@@ -166,17 +171,6 @@ private[spark] class CoarseGrainedExecutorBackend(
       if (executor == null) {
         exitExecutor(1, "Received LaunchTask command but executor was null")
       } else {
-        if (decommissioned) {
-          val msg = "Asked to launch a task while decommissioned."
-          logError(msg)
-          driver match {
-            case Some(endpoint) =>
-              logInfo("Sending DecommissionExecutor to driver.")
-              endpoint.send(DecommissionExecutor(executorId, ExecutorDecommissionInfo(msg)))
-            case _ =>
-              logError("No registered driver to send Decommission to.")
-          }
-        }
         val taskDesc = TaskDescription.decode(data.value)
         logInfo("Got assigned task " + taskDesc.taskId)
         taskResources(taskDesc.taskId) = taskDesc.resources
@@ -213,9 +207,17 @@ private[spark] class CoarseGrainedExecutorBackend(
       logInfo(s"Received tokens of ${tokenBytes.length} bytes")
       SparkHadoopUtil.get.addDelegationTokens(tokenBytes, env.conf)
 
-    case DecommissionSelf =>
-      logInfo("Received decommission self")
+    case DecommissionExecutor =>
       decommissionSelf()
+
+    case ExecutorSigPWRReceived =>
+      decommissionSelf()
+      if (driver.nonEmpty) {
+        // Tell driver we starts decommissioning so it stops trying to schedule us
+        driver.get.askSync[Boolean](ExecutorDecommissioning(executorId))
+      } else {
+        logError("No driver to message decommissioning.")
+      }
   }
 
   override def onDisconnected(remoteAddress: RpcAddress): Unit = {
@@ -264,17 +266,20 @@ private[spark] class CoarseGrainedExecutorBackend(
     System.exit(code)
   }
 
-  private def decommissionSelf(): Boolean = {
-    val msg = "Decommissioning self w/sync"
+  private def decommissionSelf(): Unit = {
+    if (!env.conf.get(DECOMMISSION_ENABLED)) {
+      logWarning(s"Receive decommission request, but decommission feature is disabled.")
+      return
+    } else if (decommissioned) {
+      logWarning(s"Executor $executorId already started decommissioning.")
+      return
+    }
+    val msg = s"Decommission executor $executorId."
     logInfo(msg)
     try {
       decommissioned = true
-      // Tell master we are are decommissioned so it stops trying to schedule us
-      if (driver.nonEmpty) {
-        driver.get.askSync[Boolean](DecommissionExecutor(
-          executorId, ExecutorDecommissionInfo(msg)))
-      } else {
-        logError("No driver to message decommissioning.")
+      if (env.conf.get(STORAGE_DECOMMISSION_ENABLED)) {
+        env.blockManager.decommissionBlockManager()
       }
       if (executor != null) {
         executor.decommission()
@@ -333,12 +338,9 @@ private[spark] class CoarseGrainedExecutorBackend(
       shutdownThread.start()
 
       logInfo("Will exit when finished decommissioning")
-      // Return true since we are handling a signal
-      true
     } catch {
       case e: Exception =>
         logError("Unexpected error while decommissioning self", e)
-        false
     }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
index 7242ab7786061..d1b0e798c51be 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
@@ -95,8 +95,17 @@ private[spark] object CoarseGrainedClusterMessages {
   case class RemoveExecutor(executorId: String, reason: ExecutorLossReason)
     extends CoarseGrainedClusterMessage
 
-  case class DecommissionExecutor(executorId: String, decommissionInfo: ExecutorDecommissionInfo)
-    extends CoarseGrainedClusterMessage
+  // A message that sent from executor to driver to tell driver that the executor has started
+  // decommissioning. It's used for the case where decommission is triggered at executor (e.g., K8S)
+  case class ExecutorDecommissioning(executorId: String) extends CoarseGrainedClusterMessage
+
+  // A message that sent from driver to executor to decommission that executor.
+  // It's used for Standalone's cases, where decommission is triggered at MasterWebUI or Worker.
+  object DecommissionExecutor extends CoarseGrainedClusterMessage
+
+  // A message that sent to the executor itself when it receives PWR signal,
+  // indicating the executor starts to decommission.
+  object ExecutorSigPWRReceived extends CoarseGrainedClusterMessage
 
   case class RemoveWorker(workerId: String, host: String, message: String)
     extends CoarseGrainedClusterMessage
@@ -136,7 +145,4 @@ private[spark] object CoarseGrainedClusterMessages {
 
   // The message to check if `CoarseGrainedSchedulerBackend` thinks the executor is alive or not.
   case class IsExecutorAlive(executorId: String) extends CoarseGrainedClusterMessage
-
-  // Used to ask an executor to decommission itself. (Can be an internal message)
-  case object DecommissionSelf extends CoarseGrainedClusterMessage
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index 0f144125af7bf..f6930da96a390 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -191,10 +191,6 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
         executorDataMap.get(executorId).foreach(_.executorEndpoint.send(StopExecutor))
         removeExecutor(executorId, reason)
 
-      case DecommissionExecutor(executorId, decommissionInfo) =>
-        logError(s"Received decommission executor message ${executorId}: $decommissionInfo")
-        decommissionExecutor(executorId, decommissionInfo, adjustTargetNumExecutors = false)
-
       case RemoveWorker(workerId, host, message) =>
         removeWorker(workerId, host, message)
 
@@ -272,10 +268,14 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
         removeWorker(workerId, host, message)
         context.reply(true)
 
-      case DecommissionExecutor(executorId, decommissionInfo) =>
-        logError(s"Received decommission executor message ${executorId}: ${decommissionInfo}.")
-        context.reply(decommissionExecutor(executorId, decommissionInfo,
-          adjustTargetNumExecutors = false))
+      case ExecutorDecommissioning(executorId) =>
+        logWarning(s"Received executor $executorId decommissioned message")
+        context.reply(
+          decommissionExecutor(
+            executorId,
+            ExecutorDecommissionInfo(s"Executor $executorId is decommissioned."),
+            adjustTargetNumExecutors = false,
+            triggeredByExecutor = true))
 
       case RetrieveSparkAppConfig(resourceProfileId) =>
         val rp = scheduler.sc.resourceProfileManager.resourceProfileFromId(resourceProfileId)
@@ -463,71 +463,47 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
    * @param executorsAndDecomInfo Identifiers of executors & decommission info.
    * @param adjustTargetNumExecutors whether the target number of executors will be adjusted down
    *                                 after these executors have been decommissioned.
+   * @param triggeredByExecutor whether the decommission is triggered at executor.
    * @return the ids of the executors acknowledged by the cluster manager to be removed.
    */
   override def decommissionExecutors(
       executorsAndDecomInfo: Array[(String, ExecutorDecommissionInfo)],
-      adjustTargetNumExecutors: Boolean): Seq[String] = {
-
-    val executorsToDecommission = executorsAndDecomInfo.filter { case (executorId, decomInfo) =>
-      CoarseGrainedSchedulerBackend.this.synchronized {
-        // Only bother decommissioning executors which are alive.
-        if (isExecutorActive(executorId)) {
-          executorsPendingDecommission(executorId) = decomInfo.workerHost
-          true
-        } else {
-          false
-        }
+      adjustTargetNumExecutors: Boolean,
+      triggeredByExecutor: Boolean): Seq[String] = withLock {
+    val executorsToDecommission = executorsAndDecomInfo.flatMap { case (executorId, decomInfo) =>
+      // Only bother decommissioning executors which are alive.
+      if (isExecutorActive(executorId)) {
+        scheduler.executorDecommission(executorId, decomInfo)
+        executorsPendingDecommission(executorId) = decomInfo.workerHost
+        Some(executorId)
+      } else {
+        None
       }
     }
 
     // If we don't want to replace the executors we are decommissioning
     if (adjustTargetNumExecutors) {
-      adjustExecutors(executorsToDecommission.map(_._1))
+      adjustExecutors(executorsToDecommission)
     }
 
-    executorsToDecommission.filter { case (executorId, decomInfo) =>
-      doDecommission(executorId, decomInfo)
-    }.map(_._1)
-  }
-
-
-  private def doDecommission(executorId: String,
-      decomInfo: ExecutorDecommissionInfo): Boolean = {
-
-    logInfo(s"Asking executor $executorId to decommissioning.")
-    scheduler.executorDecommission(executorId, decomInfo)
-    // Send decommission message to the executor (it could have originated on the executor
-    // but not necessarily).
-    CoarseGrainedSchedulerBackend.this.synchronized {
-      executorDataMap.get(executorId) match {
-        case Some(executorInfo) =>
-          executorInfo.executorEndpoint.send(DecommissionSelf)
-        case None =>
-          // Ignoring the executor since it is not registered.
-          logWarning(s"Attempted to decommission unknown executor $executorId.")
-          return false
-      }
+    // Mark those corresponding BlockManagers as decommissioned first before we sending
+    // decommission notification to executors. So, it's less likely to lead to the race
+    // condition where `getPeer` request from the decommissioned executor comes first
+    // before the BlockManagers are marked as decommissioned.
+    if (conf.get(STORAGE_DECOMMISSION_ENABLED)) {
+      scheduler.sc.env.blockManager.master.decommissionBlockManagers(executorsToDecommission)
     }
-    logInfo(s"Asked executor $executorId to decommission.")
 
-    if (conf.get(STORAGE_DECOMMISSION_ENABLED)) {
-      try {
-        logInfo(s"Asking block manager corresponding to executor $executorId to decommission.")
-        scheduler.sc.env.blockManager.master.decommissionBlockManagers(Seq(executorId))
-      } catch {
-        case e: Exception =>
-          logError("Unexpected error during block manager " +
-            s"decommissioning for executor $executorId: ${e.toString}", e)
-          return false
+    if (!triggeredByExecutor) {
+      executorsToDecommission.foreach { executorId =>
+        logInfo(s"Asking executor $executorId to decommissioning.")
+        executorDataMap(executorId).executorEndpoint.send(DecommissionExecutor)
       }
-      logInfo(s"Acknowledged decommissioning block manager corresponding to $executorId.")
     }
 
-    true
+    executorsToDecommission
   }
 
-
   override def start(): Unit = {
     if (UserGroupInformation.isSecurityEnabled()) {
       delegationTokenManager = createTokenManager()
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
index 34b03dfec9e80..b9ac8d2ba2784 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
@@ -178,9 +178,12 @@ private[spark] class StandaloneSchedulerBackend(
   }
 
   override def executorDecommissioned(fullId: String, decommissionInfo: ExecutorDecommissionInfo) {
-    logInfo("Asked to decommission executor")
+    logInfo(s"Asked to decommission executor $fullId")
     val execId = fullId.split("/")(1)
-    decommissionExecutors(Array((execId, decommissionInfo)), adjustTargetNumExecutors = false)
+    decommissionExecutors(
+      Array((execId, decommissionInfo)),
+      adjustTargetNumExecutors = false,
+      triggeredByExecutor = false)
     logInfo("Executor %s decommissioned: %s".format(fullId, decommissionInfo))
   }
 
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index ff0f38a2479b0..e1b4cb82cebf1 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -56,7 +56,7 @@ import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.serializer.{SerializerInstance, SerializerManager}
 import org.apache.spark.shuffle.{MigratableResolver, ShuffleManager, ShuffleWriteMetricsReporter}
 import org.apache.spark.shuffle.{ShuffleManager, ShuffleWriteMetricsReporter}
-import org.apache.spark.storage.BlockManagerMessages.ReplicateBlock
+import org.apache.spark.storage.BlockManagerMessages.{DecommissionBlockManager, ReplicateBlock}
 import org.apache.spark.storage.memory._
 import org.apache.spark.unsafe.Platform
 import org.apache.spark.util._
@@ -1809,7 +1809,9 @@ private[spark] class BlockManager(
     blocksToRemove.size
   }
 
-  def decommissionBlockManager(): Unit = synchronized {
+  def decommissionBlockManager(): Unit = storageEndpoint.ask(DecommissionBlockManager)
+
+  private[spark] def decommissionSelf(): Unit = synchronized {
     decommissioner match {
       case None =>
         logInfo("Starting block manager decommissioning process...")
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
index a3d42348befaa..3fcfca365846e 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
@@ -163,8 +163,7 @@ class BlockManagerMasterEndpoint(
       context.reply(true)
 
     case DecommissionBlockManagers(executorIds) =>
-      val bmIds = executorIds.flatMap(blockManagerIdByExecutor.get)
-      decommissionBlockManagers(bmIds)
+      decommissioningBlockManagerSet ++= executorIds.flatMap(blockManagerIdByExecutor.get)
       context.reply(true)
 
     case GetReplicateInfoForRDDBlocks(blockManagerId) =>
@@ -359,21 +358,6 @@ class BlockManagerMasterEndpoint(
     blockManagerIdByExecutor.get(execId).foreach(removeBlockManager)
   }
 
-  /**
-   * Decommission the given Seq of blockmanagers
-   *    - Adds these block managers to decommissioningBlockManagerSet Set
-   *    - Sends the DecommissionBlockManager message to each of the [[BlockManagerReplicaEndpoint]]
-   */
-  def decommissionBlockManagers(blockManagerIds: Seq[BlockManagerId]): Future[Seq[Unit]] = {
-    val newBlockManagersToDecommission = blockManagerIds.toSet.diff(decommissioningBlockManagerSet)
-    val futures = newBlockManagersToDecommission.map { blockManagerId =>
-      decommissioningBlockManagerSet.add(blockManagerId)
-      val info = blockManagerInfo(blockManagerId)
-      info.storageEndpoint.ask[Unit](DecommissionBlockManager)
-    }
-    Future.sequence{ futures.toSeq }
-  }
-
   /**
    * Returns a Seq of ReplicateBlock for each RDD block stored by given blockManagerId
    * @param blockManagerId - block manager id for which ReplicateBlock info is needed
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala
index a69bebc23c661..54a72568b18fa 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala
@@ -62,7 +62,7 @@ class BlockManagerStorageEndpoint(
       }
 
     case DecommissionBlockManager =>
-      context.reply(blockManager.decommissionBlockManager())
+      context.reply(blockManager.decommissionSelf())
 
     case RemoveBroadcast(broadcastId, _) =>
       doAsync[Int]("removing broadcast " + broadcastId, context) {
diff --git a/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala
index 9c5e460854053..abe5b7a71ca63 100644
--- a/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala
@@ -28,7 +28,7 @@ import org.scalatest.BeforeAndAfterEach
 import org.scalatest.concurrent.Eventually._
 
 import org.apache.spark._
-import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState, WorkerDecommission}
+import org.apache.spark.deploy.DeployMessages.{DecommissionWorkers, MasterStateResponse, RequestMasterState}
 import org.apache.spark.deploy.master.{ApplicationInfo, Master, WorkerInfo}
 import org.apache.spark.deploy.worker.Worker
 import org.apache.spark.internal.{config, Logging}
@@ -414,7 +414,7 @@ class DecommissionWorkerSuite
 
   def decommissionWorkerOnMaster(workerInfo: WorkerInfo, reason: String): Unit = {
     logInfo(s"Trying to decommission worker ${workerInfo.id} for reason `$reason`")
-    master.self.send(WorkerDecommission(workerInfo.id, workerInfo.endpoint))
+    master.self.send(DecommissionWorkers(Seq(workerInfo.id)))
   }
 
   def killWorkerAfterTimeout(workerInfo: WorkerInfo, secondsToWait: Int): Unit = {
diff --git a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
index fe88822bb46b5..a3438cab5b0a3 100644
--- a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
@@ -27,7 +27,7 @@ import org.scalatest.concurrent.{Eventually, ScalaFutures}
 
 import org.apache.spark._
 import org.apache.spark.deploy.{ApplicationDescription, Command}
-import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState}
+import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState, WorkerDecommissioning}
 import org.apache.spark.deploy.master.{ApplicationInfo, Master}
 import org.apache.spark.deploy.worker.Worker
 import org.apache.spark.internal.{config, Logging}
@@ -122,7 +122,10 @@ class AppClientSuite
 
       // Send a decommission self to all the workers
       // Note: normally the worker would send this on their own.
-      workers.foreach(worker => worker.decommissionSelf())
+      workers.foreach { worker =>
+       worker.decommissionSelf()
+       master.self.send(WorkerDecommissioning(worker.workerId, worker.self))
+      }
 
       // Decommissioning is async.
       eventually(timeout(1.seconds), interval(10.millis)) {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala
index 83bb66efdac9e..4a92cbcb85847 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala
@@ -31,7 +31,7 @@ import org.apache.spark.util.{RpcUtils, SerializableBuffer, ThreadUtils}
 class WorkerDecommissionSuite extends SparkFunSuite with LocalSparkContext {
 
   override def beforeEach(): Unit = {
-    val conf = new SparkConf().setAppName("test").setMaster("local")
+    val conf = new SparkConf().setAppName("test")
       .set(config.DECOMMISSION_ENABLED, true)
 
     sc = new SparkContext("local-cluster[2, 1, 1024]", "test", conf)
@@ -78,7 +78,10 @@ class WorkerDecommissionSuite extends SparkFunSuite with LocalSparkContext {
     val execs = sched.getExecutorIds()
     // Make the executors decommission, finish, exit, and not be replaced.
     val execsAndDecomInfo = execs.map((_, ExecutorDecommissionInfo("", None))).toArray
-    sched.decommissionExecutors(execsAndDecomInfo, adjustTargetNumExecutors = true)
+    sched.decommissionExecutors(
+      execsAndDecomInfo,
+      adjustTargetNumExecutors = true,
+      triggeredByExecutor = false)
     val asyncCountResult = ThreadUtils.awaitResult(asyncCount, 20.seconds)
     assert(asyncCountResult === 10)
   }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
index f1870718c6730..293498ae5c37b 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.streaming.scheduler
 
-import org.mockito.ArgumentMatchers.{eq => meq}
+import org.mockito.ArgumentMatchers.{any, eq => meq}
 import org.mockito.Mockito.{never, reset, times, verify, when}
 import org.scalatest.{BeforeAndAfterEach, PrivateMethodTester}
 import org.scalatest.concurrent.Eventually.{eventually, timeout}
@@ -101,12 +101,12 @@ class ExecutorAllocationManagerSuite extends TestSuiteBase
           val decomInfo = ExecutorDecommissionInfo("spark scale down", None)
           if (decommissioning) {
             verify(allocationClient, times(1)).decommissionExecutor(
-              meq(expectedExec.get), meq(decomInfo), meq(true))
+              meq(expectedExec.get), meq(decomInfo), meq(true), any())
             verify(allocationClient, never).killExecutor(meq(expectedExec.get))
           } else {
             verify(allocationClient, times(1)).killExecutor(meq(expectedExec.get))
             verify(allocationClient, never).decommissionExecutor(
-              meq(expectedExec.get), meq(decomInfo), meq(true))
+              meq(expectedExec.get), meq(decomInfo), meq(true), any())
           }
         } else {
           if (decommissioning) {

From 40ef5c91ade906b38169f959b3991ce8b0f45154 Mon Sep 17 00:00:00 2001
From: Linhong Liu <linhong.liu@databricks.com>
Date: Wed, 16 Sep 2020 16:53:25 +0000
Subject: [PATCH 0045/1009] [SPARK-32816][SQL] Fix analyzer bug when
 aggregating multiple distinct DECIMAL columns

### What changes were proposed in this pull request?
This PR fixes a conflict between `RewriteDistinctAggregates` and `DecimalAggregates`.
In some cases, `DecimalAggregates` will wrap the decimal column to `UnscaledValue` using
different rules for different aggregates.

This means, same distinct column with different aggregates will change to different distinct columns
after `DecimalAggregates`. For example:
`avg(distinct decimal_col), sum(distinct decimal_col)` may change to
`avg(distinct UnscaledValue(decimal_col)), sum(distinct decimal_col)`

We assume after `RewriteDistinctAggregates`, there will be at most one distinct column in aggregates,
but `DecimalAggregates` breaks this assumption. To fix this, we have to switch the order of these two
rules.

### Why are the changes needed?
bug fix

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?
added test cases

Closes #29673 from linhongliu-db/SPARK-32816.

Authored-by: Linhong Liu <linhong.liu@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/optimizer/Optimizer.scala       |  5 ++++-
 .../src/test/resources/sql-tests/inputs/group-by.sql   |  3 +++
 .../test/resources/sql-tests/results/group-by.sql.out  | 10 +++++++++-
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index b7791cd442694..6033c01a60f47 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -143,7 +143,6 @@ abstract class Optimizer(catalogManager: CatalogManager)
       RewriteNonCorrelatedExists,
       ComputeCurrentTime,
       GetCurrentDatabaseAndCatalog(catalogManager),
-      RewriteDistinctAggregates,
       ReplaceDeduplicateWithAggregate) ::
     //////////////////////////////////////////////////////////////////////////////////////////
     // Optimizer rules start here
@@ -197,6 +196,10 @@ abstract class Optimizer(catalogManager: CatalogManager)
       EliminateSorts) :+
     Batch("Decimal Optimizations", fixedPoint,
       DecimalAggregates) :+
+    // This batch must run after "Decimal Optimizations", as that one may change the
+    // aggregate distinct column
+    Batch("Distinct Aggregate Rewrite", Once,
+      RewriteDistinctAggregates) :+
     Batch("Object Expressions Optimization", fixedPoint,
       EliminateMapObjects,
       CombineTypedFilters,
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index fedf03d774e42..81e2204358bc9 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -166,3 +166,6 @@ SELECT * FROM (SELECT COUNT(*) AS cnt FROM test_agg) WHERE cnt > 1L;
 SELECT count(*) FROM test_agg WHERE count(*) > 1L;
 SELECT count(*) FROM test_agg WHERE count(*) + 1L > 1L;
 SELECT count(*) FROM test_agg WHERE k = 1 or k = 2 or count(*) + 1L > 1L or max(k) > 1;
+
+-- Aggregate with multiple distinct decimal columns
+SELECT AVG(DISTINCT decimal_col), SUM(DISTINCT decimal_col) FROM VALUES (CAST(1 AS DECIMAL(9, 0))) t(decimal_col);
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index 50eb2a9f22f69..5d9553f804059 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 56
+-- Number of queries: 57
 
 
 -- !query
@@ -573,3 +573,11 @@ org.apache.spark.sql.AnalysisException
 Aggregate/Window/Generate expressions are not valid in where clause of the query.
 Expression in where clause: [(((test_agg.`k` = 1) OR (test_agg.`k` = 2)) OR (((count(1) + 1L) > 1L) OR (max(test_agg.`k`) > 1)))]
 Invalid expressions: [count(1), max(test_agg.`k`)];
+
+
+-- !query
+SELECT AVG(DISTINCT decimal_col), SUM(DISTINCT decimal_col) FROM VALUES (CAST(1 AS DECIMAL(9, 0))) t(decimal_col)
+-- !query schema
+struct<avg(DISTINCT decimal_col):decimal(13,4),sum(DISTINCT decimal_col):decimal(19,0)>
+-- !query output
+1.0000	1

From 657e39a3346daf0c67cff3cf90fe68176c479747 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Wed, 16 Sep 2020 10:13:47 -0700
Subject: [PATCH 0046/1009] [SPARK-32897][PYTHON] Don't show a deprecation
 warning at SparkSession.builder.getOrCreate

### What changes were proposed in this pull request?

In PySpark shell, if you call `SparkSession.builder.getOrCreate` as below:

```python
import warnings
from pyspark.sql import SparkSession, SQLContext
warnings.simplefilter('always', DeprecationWarning)
spark.stop()
SparkSession.builder.getOrCreate()
```

it shows the deprecation warning as below:

```
/.../spark/python/pyspark/sql/context.py:72: DeprecationWarning: Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.
  DeprecationWarning)
```

via https://github.com/apache/spark/blob/d3304268d3046116d39ec3d54a8e319dce188f36/python/pyspark/sql/session.py#L222

We shouldn't print the deprecation warning from it. This is the only place ^.

### Why are the changes needed?

To prevent to inform users that `SparkSession.builder.getOrCreate` is deprecated mistakenly.

### Does this PR introduce _any_ user-facing change?

Yes, it won't show a deprecation warning to end users for calling `SparkSession.builder.getOrCreate`.

### How was this patch tested?

Manually tested as above.

Closes #29768 from HyukjinKwon/SPARK-32897.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Takuya UESHIN <ueshin@databricks.com>
---
 python/pyspark/sql/context.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index a9c5b3ba0c254..937d44ac5ecbc 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -66,9 +66,10 @@ def __init__(self, sparkContext, sparkSession=None, jsqlContext=None):
         >>> df.rdd.map(lambda x: (x.i, x.s, x.d, x.l, x.b, x.time, x.row.a, x.list)).collect()
         [(1, 'string', 1.0, 1, True, datetime.datetime(2014, 8, 1, 14, 1, 5), 1, [1, 2, 3])]
         """
-        warnings.warn(
-            "Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.",
-            DeprecationWarning)
+        if sparkSession is None:
+            warnings.warn(
+                "Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.",
+                DeprecationWarning)
 
         self._sc = sparkContext
         self._jsc = self._sc._jsc

From 7fdb57196313b0dfce1695fa4c165cf8998efbba Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Wed, 16 Sep 2020 13:42:04 -0500
Subject: [PATCH 0047/1009] [SPARK-32890][SQL] Pass all `sql/hive` module UTs
 in Scala 2.13

### What changes were proposed in this pull request?
This pr fix failed cases in sql hive module in Scala 2.13 as follow:

- HiveSchemaInferenceSuite (1 FAILED -> PASS)
- HiveSparkSubmitSuite (1 FAILED-> PASS)
- StatisticsSuite (1 FAILED-> PASS)
- HiveDDLSuite (1 FAILED-> PASS)

After this patch all test passed in sql hive module in Scala 2.13.

### Why are the changes needed?
We need to support a Scala 2.13 build.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
- Scala 2.12: Pass the Jenkins or GitHub Action

- Scala 2.13: All tests passed.

Do the following:

```
dev/change-scala-version.sh 2.13
mvn clean install -DskipTests -pl sql/hive -am -Pscala-2.13 -Phive
mvn clean test -pl sql/hive -Pscala-2.13 -Phive
```

**Before**

```
Tests: succeeded 3662, failed 4, canceled 0, ignored 601, pending 0
*** 4 TESTS FAILED ***
```

**After**

```
Tests: succeeded 3666, failed 0, canceled 0, ignored 601, pending 0
All tests passed.
```

Closes #29760 from LuciferYang/sql-hive-test.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../regression-test-SPARK-8489/test-2.13.jar    | Bin 0 -> 19579 bytes
 .../sql/hive/HiveSchemaInferenceSuite.scala     |   2 +-
 .../spark/sql/hive/HiveSparkSubmitSuite.scala   |   2 +-
 .../apache/spark/sql/hive/StatisticsSuite.scala |   2 +-
 .../spark/sql/hive/execution/HiveDDLSuite.scala |   2 +-
 5 files changed, 4 insertions(+), 4 deletions(-)
 create mode 100644 sql/hive/src/test/resources/regression-test-SPARK-8489/test-2.13.jar

diff --git a/sql/hive/src/test/resources/regression-test-SPARK-8489/test-2.13.jar b/sql/hive/src/test/resources/regression-test-SPARK-8489/test-2.13.jar
new file mode 100644
index 0000000000000000000000000000000000000000..0d10f7ff03b35a1829e4f64ddbc82a0124b72f1b
GIT binary patch
literal 19579
zcmb5V1CS`)(k0rqZQHhO+kM)$?bEhx+dOUCwr%5`@4xS#d*@Bei<ye5h>D8b6_vGj
z=FZHuQb8IR1O)&90s;WaiCz&mfeX1F002M<5CGusQvg{}6+v1_IWc-!K{-h=QDqf6
zS+VF?L8yKP1ksm(O+&i4f>WUmE6{SG=D2-666L5CNy&wXR(4dCGN>A`&=>xjlm1gE
z?wOcVE*n{x2XnEp77HuuS@*qx#N_oA3}H+GiY(s3$G9^<{$BM7A}HRdo5Vn~umq&-
zg2m(%`ALckp*=`Kix8NwjdA@D;nmgcGH^E8uNDa}UFsHPbT+bKSZb+dxDfH7d2N_>
zj9G#GL&{saRoanc<?_~#RKp)kWk+y@zbZogRT1F7RQ#*e|2QE3-u(|1|Mz0>|0!l;
z=xSn1|GyU|_+N$X9L?wq?G260P3WBL4IQoie@0XNAB=W3adM_*qGM$KulM|G@*w|C
zUU>1-l`bd%z&11hz~2)<*3iP1gwDv?(8(!T%|abn9mAK7OomBhu&ixSDoF!4kkYbR
zyBSJru5OVaWqlcL4@oRBorAf4eTmL>=fy{#FEs}_<|qq~|AoJ~bS4|5906!LtbL=C
z=SBBs=jZzk-GA#<mSEK^O-&~V!Mwvjhb@6~V4MtGjD9q#)G*z@2Ew%slh_QR>bd-2
zdns^|WB%5<lzO{3U(aADOE!RQV~i|1Fg`s_vekeCJ@L9&hpF1s;;f^@W+)bIYBSB5
zB^Z>AUaBTvP@wWwp(mE|Z1fJNK4D2F=MZ#S<Jjb^jj0CBr{rKYX@{b&oUJ{SY}+M8
z++e~xlu*xMbhP7cTx5VM02X?|LPK~kIa%OhAL%UI1t>oqwfzIkbGj&|TyvcL8xRkF
z@=#{t5e||@x(OC~5&tn2vB@AYhHH?h4H|nO%LWnRaExp=iz79|#eUsMhT2Pj5gdm6
z0*Zz{2;2EE4Tn6+=F#H3r5jkvQX2!Z3EG-lw}~d=Hz(6Y(iieW(Z$fP4S5P4kK?<_
z$@Z4l%N0a^&@A-g@zL*JvP7)x;}ArFxDw|m-t<#F?yKgjlnC;)a;L=GxWjat;s}Mz
z@eAqZ+CWaEvlIuumrImJ_>H7y3q*vokWU^1Pf1M!;Z&~O#BcDsad5a{;fm>TKtde-
zA&L}_*dCaIZs~dA2E|QLb6Grnd9vuxpc{8Y95uUV?h<)~)ZKuk+NAj?s~t&JN<Q>*
zRNu<ZmTNnh+@(NCrIO6K^qx{iOw+mr&p%l2WmPwptu`kePN5S5_y`EPggldmW)K!|
zokASMb+r*LDGh~z1)ydwF#CuBDOkPlQ#i!?ifVC-L@K@<JZQt5Jal!XQY6braZI|k
zf#+Q@zC0e4MJlB76Vg~>C&{>_<{qYcWM>JHVrodQX#zB=nWH1Tc%dMX5@j0Y8ZxL{
zZLX@srQy(Aob(Bab9-LW2B@57b~knjEb!8*@R{lSCkg|k0mGQzNj9$@O};%z_lPF#
zx6T|C4RtCt%*yIsNYD(luIWFj&Q$JD>R5G1>X^-{Qw+5Y31+l876QyUs2c3Kh9&Ae
zwZg(t?bDAFil1*HsAs4c(5F{)t$h{j8Ih57dV|tj%LgXwTXuO3BezipaLW1yG>X&|
zm?qGMZL%XkAp&O(#o`bvcNH-ZddRC9kxB#DRnn*w-f){}0Ldf~yR==^2eMs-9U@cL
z2u_jdFtO}mw|Lm`7i%F9d<wiYf$*T``$KPIjP!%3x_Sm5xFYb`7y8k(9+0RBP{YIN
zW6d@apWDIZj+IimONa`DsHZI>Mdy_zq#u!Qif1sC>`)@g6}2$fG3Cq2Qzd+7va3cx
zp%EZ0SmQB|S}D$-Me#c47M-WYhYrf|wGx#dFIwJMqk2|ZnmCVcvqM>Mx7{Pbsxv^-
zDTGm-_2Jw*4ol-sPpmS9w#HDIKLQswd~YwGD^4I*c-V9;2?~s#K?pAa7m2HQS2X!n
zNM2H__`DkZL34$qIMjP0kmWrw7LTzqM)oA0TwEx)IsKy~Icb3o$mx<YNpTza6tP#N
zC{k_S!161Ge)c}QWEpn5ocplbgU}1Jzt6QGYm;t0`IoxA)XsczJ-MY`B?PM<@@)yL
z?*E`h2X{vJYtK$AHQ}ny23UEUd<sMegZQCjzhdOh(hW3?m%hRX&P#d6&{IsA7``Ft
z&&hR>^gP>?n7-^{W9^Gi2L(owCA4fZ9ni&$tMxrodUHogy;N_A%F^~cOQe?(vHIt}
z+MQ?}U=-+~eoUO@qGpM|)#*(IHn-@#)nH<eel*f^{?_MUyI;B%zU?Nytqgp_|7S1w
zmlp>5>xCP9dGWr{008#W0RT|{)0>NkDu~L7h{_2|iYi~}YC3JSHTcfd_6}5!x5-I}
zwKy^DWgd{$i|dg&GI2gIfyT#yWRhUSB~^dEZ>&nh5s^x_kgCfnp=yF)Dfnr0Vn>&p
z5T4Ba=y~9E9CM7B%awwJkdR7BB={%5)FY0Qvj|g~v6GAvGL!R^Ktj&_Zp2^;r@%X)
zlvIF%jEZ#-@)P>DH^sA;)7*#ZAUAkkVjXjIE`EE1!TIC)bPnQvH+K=l&;@3zU${f&
zk@KG8=H_;?&wzrk=|99>i{Pk`;uL5^ULY)*3b>bcocN2voJaOX)tDP%ev5!8&5W{i
zY;_1S#*`D5Nni1eJ?*3O8vNzYEi}ZFg+M~o>nXudUvSz9`ziJwW)tYT%T57TV!~_)
z*aMMhmdDq^z(HhRJ@BBdZ|+SJgF|4%A)BF_WOazXw6otWrf?7>Tt4*rF0L6&M;sV1
z+xQIw`8F*|9KIZxG-Z=WqDjVNE5)Z_d@$nOOF;b=*Dmh~L%*ol>_}g{q$nyrOsLv0
z6dp?+P_t1)g&bF@WNtKmD8R7M;4rRtrObfi%$C#6vStr~8|~(~@7XW+w2&BI`vu7g
zcRp^OZXs~Ve#bR@%ts{&cO!(PB{NP*a)v1#=yh2EWpH&?3Ym;N8iKM<vT~e2-k0hH
zuW3RGgPd8CPmL(%6S4y@yPf3ZC;}gdvl`1f#6^+K;KLlTzEut)0A`XJ!Y5VK1h>;P
z(-j@ygABigHF+qK{<o1|EP)ZTm^r_k?OoQn_E9djAx^_0dcem1uGyPu*hNj)E_hkz
zXotIMAR^zK1MaOm26Fe_ge(#daCW^YLwoz62BFchYuPon2Y(S)*l>y4qcAMl?zivP
zK?N4}+7X)mI+_Tw6`rykeq)Dm$rR47Kl1CZ5(bP<K(a*~h>|^$js>}e6`(sEUeMb1
zNAV0}2NH~)9v_$y&91)Bg$R)_ov0q~vA<N0P|7Qje7?l5KyS9(swZpsb?EIVwI>Np
z?u}x&$GzY3xn2Gkk<`qrqaMEkjnu8;GRbMB%Qzr%BIAs3jO_s)!V2p9D9fB6t3{E&
z;U2W6YahQYvGZcMZtYwk3_`{DW@Z8yCWLEh2p2DaRN)HmX8x&o@Vx$bJ9nGfMq_o0
z5Zzd}wb33(%B-Jq--rST{2k+1E7lf7;p7sdKN0`g<Kh*Wnj$U}<&+S)wRo;xMx+*Q
zJF4jkJ`WDBi{Z(&1nIBhXl-)Avr|$&_`5yi`0Vv((19zTQ?RtoKc;<?8qr#ov@XmL
z>%uZ+kI5Ok&y-+w_S%3VzwzM)e$WMZhdu~|1Y6K4G-v|`fv2X>HZP+1x4$WKTu(MX
z+BAz^gejI?nP15(F(B(*>%{}EGlG9ZnL@U`M*2{Mt{P41N|i*C3^Y9Eog<PA5?BF8
zS+br)!$wvylO4r*upKdcu#n@;wsBo|M3fhLD*wK={uAA@h^M=UudhcyOw1qL7y8=i
zSo)f|z9B>|E?gZy&ac;}cN8X8rpJxFIYqxx6hbf7F)N%s8tggi{ZQj`DH@>&*+{~s
zIC3dDujUlM7~CwSqINI`8?1O8fIF9zH8nlScjsYElI5ZnpM?7~lI>)&HNo2jx)FZN
z+`#3l%z}n~AGg~2>)c$uR&(900^_d{tY1aj>t-KjCL?5rJqY(cfe)G$Vz9uJ(TPbW
zEN0%vG8!q+u$oOVsngGPD1p|85#aj1X}l9(4D@<@TG*8xN90DW!9@f~5Uguak4#Io
zJ;ixyH|-2w&+HX`0`P-*?VP{3OA+^$KLIJzlF!5dgI)49L7b@rWGPgU<Ct_*=j{@b
zq10*(^UUq8hk{D_vVCoFd8jS`+njN)%*Xk)`(}T<@b^eWUS4t?*etRe=VpBF^r^g|
zfOy`>MCmndHk!)KaC8G<r|as}VJ+|u7JmwI_{K}s2v{TD7p5Dao|>+eoZNC@#Mts3
zo>49*B67lQkV@aIw_F+}47x%ij$oW~wZ%8|mlA95qle4_mJT7q7GC}ozdD*)DSJ3X
z8t#wIzwj@$(68u^wNF3Y_*u$fL)|5BD$F59GRnqXTa%$#DX4QUcpYocK2ibS47vPB
z4|HW7BBpj^RS7EP`8*a&IASHbD#`n9xU~@}U5>N-$ti4ITQ1*dmsaIh-bs`~i01U`
z7w}X@+l;B1n|01@xbrnx1PrjGp`BObhXSfSx$bwQ`txz#*i`6GaPM0v9bfa#42LOg
zg{I_XmNROt|CP>ca}Brw-MndL&;+1s2^bdFO}h+745f|9g%Qh%X`~^Dc8e*oUhP07
z8ZUY&8E8}g@^)%$pw*Z;BFQn{vKe#2@g>wxL@=R^^2|mj$vUBJ`zzB)LtSk@Gx5sF
zXE;<Ed=%$`-MVe-?*2~@J^H;sVdfd;v1EC+ZRAj~EOom#piR5;I;9vp$5NDl0|z1F
zC<2PlQId!YAyEk=sws>ITxlNc^c+Ia=Wlxi31Y`kG7==uhB&IB_HKqaPR%HfZQWFr
z3zrD}<svD~f?=_vsK&k{P^>|Yh@xTFz>rda<Md)J`e8-DS~#p{Hn6_OQVs}AN!drf
z7y-x>S4DPHL&W$Mt=bfe7%8U)0P$r^@a|$Yk9-V^jq`RAd<aXI&G{#4-wpUrV8;W!
zCA#hrycN@nmgwczf?pC%3j;GW@Y&dDfYs~mc@Hp>R}#9WZ)DPylpojJrG^j{xIb0L
z%L>^)s|fep8Nw6P*FlicM5SD%mbbMQypkocV=4*y$uvu=G0Gz@vMT%F%llH%DWxYB
zI*%F&{$gFy!izJ)ty)ikOofhb?Rj@~qqA#AyRZCae!A-Zp)B}leY7-y`|6bidPLH_
zY&nQ0$8_v}vjxV<Pq7*`<QHxD$y|?mj?ikxDXk_6=I-A_WsjrtaXYqy=uE)77FW)?
zslpKEY5m52L3W1qgDJ&&5Geh=J2EdDl22J1HZ7ZXiaSgbGs1U+KXIU<Fph%9@eGb7
zbwsK?9Oo;R<EB&H!rCegTE%2@$N*aa+5+RApKm14qk2wd`1(&LUaQ|z5GZY<#1<s)
zuBB?v3#xwTTOOyk=WX?-F^NKGY_@WG1aP!US@Kw7;GMK0rG<7J1(sQh8^SmPG7~&5
z9JX<4XW!llE<KDVaL=m?rH;5v{M0QKgt9^&+xPy6p8P0DdfIo{?NVQCOt3b!kW-VL
z#2H6faL{W7U}Zriu~^-yLGmaL1<xob^#T>I)KR*zCPwFJSU9>tOwOt&+KC&wf0FB1
zXs^4=`nskw$TK}vkUXF6oKxEgn)sX<DCe_1E&OWeq{S?`f_wazcEMV3nVbeKAW9(j
zV)$xxEG$%#(?n`3r;q)Wooyw-qgNE?Wjb5w%fE6g!_HG^)sraEsZuYppXeFp)F(l+
z4EAVhW-ylk;+yhGo_vv}dCYNZ_3$YOoJ9o3RIar$*t3S{up<JT#X}!LAGBcE{m_DE
zXwa)N@rnBAmR`peE<D1jMZ?Azr>ttao7cbBxvd@iP?ol?#w-PBKzTVzMK7O@6ks&C
z4xD+$vK?tSy5aHpy0-j2Z0~}Tj%!cSlGlZ;w#qfrk9?<Q^dBDdS@_%QuWOhoWoa7%
zr^m?2Qe<DPGQKiQe}*ketgm9zOdA5jWE#iyewr3!wv*dlBlr+kxg3m#<K__jf-KsM
z*8I2g{sf;~=chZIzVZ>2t5D?{B=k&jGOk<+^%78@wMW~kqw?f!pzhV$>bGyKXmwo1
zl!OyFk+pWp6RZo-;$~O-4%eRgUKRauo%dUj!%B?%CB7<<Yfz$^>>|S<xjUoyli~WO
zWb2Q?I_AV<IFG`Q?{@7pu@(89W#IRaM%vM3&(f)m+TNB$!?9+!<jYj*x=V-KgXzb~
z9RwSC3)PM9k&r^i%>*-vn%ikANwHTz(z`v{*+VG2t)0_08c;vtbp7ok(lq^e=i6M|
zFU^e>0bXYu&t|%&Y#ZtKWx{erSC3fpiCvvy`BT=0V+tD#W%PiB<2i7GrqV66JWMtr
zjG_Iu`?HSe#aE#BtIOSw)Bs|fWjv4>#Sv$MZW#*-A>zdaQC?kI0?JR1_X@=0PYt$p
zu=&m_$aMj(_Vh_rtU@*Wi|W9pO02-;H;sn}_f;D8A$0g~S38a)wc7->)&|-dnY>^#
zyWo^K7;I5H*N*j;7KL-H!&l!{M^B5FB$k475?kx1;?3l6nU5mBnX7hMH*tJ?wX)3_
zL1~OpAiH36AIrDSXaeaKRhvu1^aKi=ZIZ>l;Q)B^O5+u(l1JK3e<0g<l{Me&2FItb
zZrmTe&W*!_&@%8~rs`YCGAHspXDPEwqWrf>$4~P=t-5br`2nuZbz%OT3zJGnDkkUb
z?O*GOM~pjGF>Vf=IA7xCWed8f5&4S(h!*viE;_988iA$HArC(S(XjYw@=U+vx7Ipg
z>WxGDP|Y}@HG);r3dNF$&uB1_IyI;szq4Q?6=~_>Bp;_zwX6$uUv$+Ks*_n-rsrzw
zdIU=)cU=jW_xP;OeDJb7?T6FtK{6txNE5ys)bPMBTc*y;^rojV1pW3X2-mP5(@C-l
ztmO6$|NcFN+}^fbLg5pV55-v=>X$ufh4z3PwE}Kt%H$p;vcGX>NCU6&mXVIr2l$El
z_T7@?PDU1DjB;ubv4#n`r@a)v^Xnerh_{-0fAODo<>91xcEbq~eLq!ES^JRg=Uav8
zal8{YudwFMx5~SbF^UASQ}UCUrbp5dL6RHQ;;2>~LKCJx8F;xr3fOt;1F0Wf$rp)r
zd0w~rZ-1t|5YK6ti;lZIT5q1o(EKDWt@Ou<nfgVO3Kuw{f7cyTq^`1R83))ECw~QM
zi@`*QF?&y0h&UkvI7fp~=GiYV;V?abvV)mYO0c4k0}3ir^T3cZklWp9=wyx~=sAJK
zdm*qQ`x|^kDXVVHo7Yh8x1xLW$(9$9`5>$yPmyh9B+u5lPGH(;tKRgZRVMb#rfB(4
zzR<)!`-2m0YQ&)vL_k23QMjEF%L-*g(NZ6O)gjSE+go{bQ`5gGou9E1Q{#>iC+Jn~
zuA|{g{;vFa{61@5{o2sBPCg$;L*}KILHby`Se`4of5b;Fm-KcU?Cw1xRd@pSRjun-
zX*%g~+u=H?R?o~*E$+qg%WfGjhKWeX+`S!UOR60&qU(a>D<)XRyD+89thj7+uz)v`
z&RIlDe?PPv&qOMaLUB!_AVPHZAt{>5fc*h%sCX0n)5-Ngr9YB<>=JT$FD}@Q)}v8J
z0BLa+4}q1uL{}3r>QLR9ga;0&wjh2BQ;aRcXULry@O*LvkW~BL``*VS!Ms;FMD-`K
zEFM&F*{~nqYL>tLSFfb=ocT|Q65UauAF@bHp4($#jOP;1yS^_%`}~l>*xL%<c>1x^
zs09ZZyg}>ltSnkXSH8UMh|+1Pm$)d%p^Hr<Y;;C){%Wg`nF-CG`-SiHg^lb)oRnMg
z1E?q0+<0+jS8!x5vMzxzWUfKGxH944<Vka~+Up)VQKEE}k9w^9oN$v2eIII@_RbrF
z>>uaZUPqG&fsXUEuOdYfVMs3lf^Y6$0qLn;@3}>3YZP`xgAITM^(f36mKrB?CQJS{
zC6%ba@;<3A;y(IfaifpeVAe;)JefpUq%5Xv()X~4+udy@=U1KGZs*hxDt90E-N025
zRfJAooLJPe7Dm|L{LI#a=^_K9N49R^%u?ce$*}qttPSeJQs{gfr*(FjX36?7qA(HR
zOu7{Sv4~GsH}7muVl#QpOQb(EBz;66lwoHx)e0#BcEU(AzF(4I-difGsZirx5=+aZ
zsG6|i>F2G*=?IL_wKDv`WeXFiTwzSmh}A%mO2|32ymbS}j&`fpMppWkhB^|eXtnb&
z+Fs_(uKW*+43qkC-iS9x(o7_+w}%@$PnrvC$B%tU>T}wRJgZ(o-FlHc`o1bB0UMg3
zBon3TVU_h)vn=kIRk;%qznX<Rz<}PWh+|yx6w|fARjLcmeL$(NpelZ=3zs>4ktJ^v
z`0h;8P#WHAW;Ch_i~~s%NM+P$)Xwy1XGIoXgb_!C1UM1m2^wBfeqdkM4g^jN<O*M)
zmHHPaSZQ}iRp_p;_^YJolh=0{9B(r;6RJEoyrKhOWA2BnN>!i(+(1K`OErY~A4E74
zvdA`y8YE^JAu=R5H0Uo2B<5kTIV7XAs31}gH;^fv_2?GHNRn<tlw5!e`C?Zdm{F{!
z*(dhah>}EvWRcF7_n3??QF^~==*Dil?eN}sy<`|%s_)T+5^ZjbxKpAb%Ny!f{mz^R
zmn&8EcFa0-_29~0LU)k39r7a;p6TYdWClKidcX&Tn9oZ_t@q*7D0Ulj=h|5D3xNY0
zl1_HyCK3)Qc9`=}r8&Afx-#OF6VMD3CDX=+n`7-l5Fn2VYEn{Tg&)@bZAudPCyKKW
zc&%oGk!$v9;xwi4c<ms<paPqFL*#)!0X^NFMrd^mX#Tp74_SqEOFnUB2gx!&ne>y5
z$tRHgr1Gtads3=pHYG7fjaEb>CqzDBiD|+G(U2$QS}et;SmxnGdD5l&OEuH%=%E65
zdQI2H15wo=UhBdtJ}))?U5VJqoBNLng1Yb5MaE`sV%lALlf&9H6fVIpNnNkFc|~@b
zNOP}MI*FW_WuVmG3DR?t710A9U>zbJ!#w~iD)TNu3U8Ule=O9b#GG~PCtMx^T1YE5
znSc{TEq_VSc#Lg32~=0@z-xarv3dhC{hfM-O8hDwv>DcReRD*+-mw60LM*{N@cr?M
z-EoaU>61BtP(HjwPlPQoR%8NaCQ*um*=1J2(Wgw3qm7_kzu#YiulC2A{p4H_Mp{#O
zagLmgX0Ez{wk3Qfm~Tsc1qQImMYolcuypWfd7*uVpdz%#;Lr&suB$v`(&{>hiKwuZ
zpo!HYwiwC=8%k<I#zYz|duwH*1}_R<00yW$4aVdJ@0h(}jM<?KyX8-Gou~?mx-HPD
zV>@sS0jM_CtU9h~^&To}*7Z|{LcnJ&Vc!rz%?M-jt%kFZ;|SKJwZZqTaCG@=E`mrN
znPNe3VCcLKw92aw_;3=mbMJs=lC{8z<C`25q`zw+#lu&#{Z*ca33b05-hG~YrR>gp
z$dTgP9dIgTv$RHN(zqM1WO8oA_3=2}t_NyNw2y5N=2$yPA(`e_)G&T^1gAzqRdL`s
zm7RlN2fMx<AuPd-Vbo?P^c{2ofQR>tG2BFM1^=Q-tKNQU^*^{r^{ci{ZO=URQRs$k
z&$fF@IZ(@~e8IQ-y%XT><-i#P?+{a%epE%PZ@cNyRiNinr5A>CTJU^?<1o+L#?gl5
z-P2_&N`$duVKum51#A#|hni-$Ik(Ppu#e;%9K%=B_j#p)LESRNPlrS?y0XhZFWL&d
z8+h1XIoxA;XAcohRmI+N1%xRRF-L*4%phk^b9?Ib9NU{*&eaNYnR3`?42zOZ(`It~
zb6Gp^7Hi&Vt$*Oq0=C16vO#yCp9mAXjCmhI(<XU*b)f0eQ7ouI-f*unj#~4BR7vsx
zS1_6%?vx&fCiEs690XLB)PAl}DMO<>xBaw<oUwcAF`a8;pA~0CwDW6)!(v)gt)`)g
zpe8)~;3Mn6)^di2Lj<9MkJZALw-KcRizylEH!6z}h?95ejs(=1PM3WcNqbk3b(+;@
z8~Q|^37F5<;g^+ADGdTy=l$*%6yCcBgbkqrYG@u5j568TMPs$0fHaX^j1H(WRpJ*X
zkWbBB{=nNV<6h`BWGP324{g0Mq}+n$%ZCbTw^@97&*D>=^UionE!>|SgF4~6n;*b`
z7tH^GEq8!yBU^z00Q&wymstM|Taq|?*qazRnix9UIWm$k{tLy7R?(J67DV}$vxBzD
zS3sl+q^MFVQ@GI8Qm9IcYcf~b9YQZP2_W6rGIdQY`jOISc-@7+6~~;h#uhfQ+6?35
zKDp0k-pOL-_w)Gy+@*AdP<Am~@3?m17L&&6cQTfww;<^6+CWO7kUCgp($Z6Op4{j2
zQuWfqDhMuu(u4nmxC4+B4n(kepf}|DDR(8C7M(+SmaN~CT;QXu#VKJ7R1`X{Eeig8
zii)=FJkzMrYYx76X4}A37S6D@Dr*gO=zleUF!bQ);95bwmF!>jkElcVi><RjWDrfl
zILtH1&c5f!B%L}}(HS!9cj76q7dsn!kPfoM)ydpagJTogg130sN~}=W=@9-1KSW&}
zRqs(!e}ErV|BPoYZgkjHPH+<)hVEIUkw?9L{ep12H(d3LkTn8yOKc1B3RdaXcEO=;
z>qVrg?|%w5YXJfcfOqJw6sjBW3S-_H0Zz6^k3*>8LeEFg(lz&E=!H^K#t;r*IOFV7
z;Apv!{4<m-qX9L9Wy~1OT2&e+8jmpN)4%y=RFTi7|MJ#%Fa1GImdvaJjcwLiUPWf(
zu?1qT3#$*^JzWH*mZR<*hY>hE!vfpOx6@+OXRb*jv6?gMT0uhj>SKH^)Z-WQOzJ1U
zB72we$!TF+fsMNkC1JPKMkovhyL;wZy>`mMJ?xWCy9ZVbz4ow|a)1feIZtQHP`TTc
zQJ5L=Y+f(~dT9@Y`#fVL#dz1p=E?~ax<(cH^K!#G=zXfYMB5;7e=0#izg6IYzp6Eu
z6^z4iNAQUC5<>v_=c_0xT+W_H9C;Y$vV#*SMC)CKNmgx0wq*QJB~{+265xpXAHxGH
z^A_is{8*e3Y(4b$K!^zxvUWSBv|VwgiF>e#%rrqdu~k#As7mR58-Hx^<RgwhgNUjE
z0>FFORL|`HUhe<kL&dy9ZC-zudHLV#e<Bvh$*V{Ti~3A5LIE)%2+aA)F&49pH56w<
z{Gv>Q4xVfar%8{KVRQFF?^`}+kA;uP-0vQb4BDr)fkCId$w(>VkbelKW6Fty_rkkh
z+@n5F^(3p6Wb%(JH!K(O!7svDmfNiRqPQEbL?L^G`q6j)U2$Z5g)#6-&#Z**#R72q
z|1rx>zUx-?zcd1bzgQOHKW6D6Y-eXJ{12PpU)ETyexZ!Lg3AALT~o9KT#sZhI4~V(
z9TXQw+Dl-VRTT^n;Er5}W7ObJvJBCElr|_g@AlEH&vQ_V%a!dd!5y7wVa9cq>RR%n
zXqM4&Qe(X4-#;s3p!Raj`;zt0dGbBo?)LMv^XC&5Kx~g4geG!i&YKT=s^X{t5h7q&
z{M=|MSJesg=l;5IQEvzW2T<EXeyZX-S;_a$cqj;v&ASNgjlO&`3u~oM)wQXz7z0w=
zb^~V>HBwtfnI=W?ye7-JzM0*H7FnrUMe!mDpy5kf0u}P!3z(pLi+I@=RX!q;1)&bO
zpRS5FNW6liGYplGq8drO2wIsk1r{fv)L_~wofDy?D`Z3#nJL|%a!oG8SD<J?cR_Ks
zGRZ#L6`)+gA;sA>v?Lm7%mc}Q?V$X7`$fS#U0O+*dCL1}$D)^YoQde~xHF<NGL$a~
zT17r_5JI|m$hn?zX6}1KeF+Oyl%~S;_X4QWi32ElUGzi|e0D1ms#!In2&G&RTt^)!
z`wrd{%OrLu;tb&gqhM@!1TM4ECjB@yk3REu`pAY&BCZh1W_*W92*$u?FYn3kiuuUr
zp;eehI|oQ}KjyJZr^lQ$ou|u3RM$Y>`aR>ec{&tas_v9;g8ge9&HTd>k)ot_VI+|m
z&)oew`vecwEx2nkSV+xuH+!~xZ<C|1_$V5wP1qPu(;)=S)f@@jmNr?%VxuN?mFz|y
zT(uDqm(ar2ZB(G~&1Vt84|hGH2cXo3^hjgwx<4jujYkKktNCSMp&k9P1$~24+BLg#
zf#M0w4n$?4Ih;Q*kYm5Rjbcg@@hRsmx#;Xi*czqQ2hQ9_PLm-#PI&u{GBmgGCg{CL
zjtJynWM=Tl$y~O%uWvHS9GNh0ci>A~SKLd5#PO{%ZC<>gayfM#;9fgPEUBl>LOaQK
zaFc|t9xm8P1dr7|P&&;eL&GBp=!3_!tQNGX)^o7FR%ilwKf?{e?++4)e#_X3M{t@X
zqZ}y+9PS6-OJy?Q(v(@aEv~S3cwsot=;}AkX5xV}+PKc<|1QF_#Vp?;M!Z2|#1Hj~
z8h8m>104yivgS1JR2Vz-By(%VmS-No*?P_#YCZxNw8=l}OR1jd%<M3nI!qvLQXe^x
zp>@`5z2r??Q5IsfUqpWApz~nvIXsL%KDA7IQ}7ts%dXSKrJ;8wjcPR=rMun{@3Cmu
zp&Qrc>&F!xaI<}mSEPc|V*W1b<!#hNUB{*yWcM=tQhJR~dFD#8jgNdoV)EGCjar##
z^!-Iua^2B-Uo)0&EbeMmZ`PojX2~oXUU-T!6rLtTym59`ux?e5#5_I##Tzp+ITWgU
zV$5Y?&#$@OGF$k&vaO4GuD=|+p)2U3l{eeQgP}oR|7W2<Gky|Ye0;9Cl>JL&H8!;S
z>N_l;P*A&`SpZ?BQA8jmT>CV+qCMx%nPu`bV$#kbEp29BCuaM3`kE5Za`@|h1ZA^!
zi8>g2W9soqMf*Co$tg%p2w%?H{2^SXW<sbiw70^}%aOhP5RM9;O09f_HO!-QmeMNE
z&dAd9<odhql|W%kB<`5mG)Uf^3>2ndRCEZz>;`ttPFhKT;dV1{qI~}L;Hr6W`PN5p
zcUe?j1(CR(^u?bavud4<`k*DuDCCM1E8c_}7dNq}0PyJ<td?S2qt7C7BLWP1Bs#LP
zfm%hTWQ){Awtd`(2=77B);ky@oN`?B{EEd29Oe72C4hYmU#2-ys8hM&cWBz|1<WKg
z&&lPvIuWoa+6h{~%}EDwmj&iX?H&iNXBAlYfd>CsXy_BMz%fTA6FCi%wl5#!#m#-u
zQl87^N{G9dxq>9sfLvx-vF@V#R9C&rVSmCD`l4fqKk?o1s77s4lMa~4un?uH+>wH{
zgt}(<NWP^UElR@(h$lFb9WG*G&{LyfyTaT(r;3~|$sc9EjWV=`@_6o(Y<<&1x^Lv&
zoHjeex}Kv6n_yRwjy9ezU!r5O*?TsGmm<^}fl>(E-G)FAIOgY$yVDAKYPQ>fUYH!M
zYE;iXszz^EBXz^bP4ZnFciiA~OWW8GLjPCz>DX_HInIz%x?c(Tv4;I|7%vsY3Bv+D
z>yuv|5B$?f3;x&VdA5rJum;{x&6$!knOs84wB_RNsRB9OU~Q>04OZY-FC*0D30h;R
z488%8hq3|;(@m^*!HRcqByzg(Wc2sep!hJ_y#T&41>*ss3O1!^zikoLIe*mZ1S26(
z#HtNypKP`Bo0l^Mw7uSd?q}`ACoGaXL4^2|;xg6Jg1JqbxI#F4t2$nkEt)f`+kjyP
zA&|S>)KHAeoE0Cd%!B(DMVZ@{Jt5R@naX$>>$OXp`HGU7plbZFF`^2?7xKww`4IfL
zshU45Xk&pXs&^PF+ap|)`kTv8ZQ$vfC(`n@`2J)z^i$Df7=kWCAT-(4ZuoW6|9}&o
z*I*KM%(R)<SuJgX^%$_O389hErRAq46+yi0^4WSGwZ*OLb4HIt!5gebA-6!LJum81
zX{a{Rpq=c&Z1d|zV*@&7kzci&FTZSqupzI?&U&6J=aI1pfidEg9R->D;?w@-wtJ6p
zf<$QTC+097MXV`Ls9iJAvU~qJStx<Lcu+7fjgYuP(e_z`GI^ba;Zdoo8}LNIyQrEA
z{|J`Uz-!aQYs)ko@W|~|P6?c0usN@^@Q&G?M^{b``uvX2>Q~qkGYDNr)}m`x;a!(a
zW?L<VgLc8Iwyk)53%Yp{#J;z^u~(#B+?l#~v$UXREv!dh(TWDn)Y23#PnbBulz!ke
z)jtvCNfl+k)8UF`^!kbqUP|3hpy+1>1cy@ty9J)^i}2rWIky4-eW}uqnY*WrJx#uv
z@eQJzlREoATK8w>E`K*T*I<HgUqRIv91hO6By(30w|6orw2OW(I|D1z_F&eI`rUfz
zN7R*UaVe{V9Dul@JNUw`^L!+ny~k{ScLUk3U=&#RDcslFNEvj7-Tp$_r(gbVB@do!
z&+OHzTGk%`1Y4|&BlHN;PagY?IZAgJ!Vt77{QmVQNjsA&jII>N*1Df7=6+S{8;XVV
z@W}_(Ei?0Wv1CcM4v!dY&MOK_<%ZGGe#$kjw^J7256F?DKe23bDSau9RRxb1=TUoT
zLv76@=RSJ5BkYogMp>ojMaAOn5NJkIe!BnA`?q(D9gU>WmNKtjuPcV7AI&D6_jcb2
zS>yqvbHnc}|G{NfpyO*hgD;RadKU?PGj5=Mzq~LEm{3e&+ha`_+>R*)qafir5NwKC
zvzvsT@Ei67vRASret}qcdZ_R-m2K;N47B-<Y=j=hk^b480Jc~;*z{QeG6IYzng-p~
zN^Ng7__Zs~QLyRYODevvm@nkEeTi)Ufp|VZZ0O}y`&k-2o|%zo`&-^JTXuylTQvG=
z!2sqvu8!XjC18k-u+Q0Z^txo*!MM83WpPL?WH?@XnD4lsuI6K5d%kdFzULq`ZPsww
zH7A_jT#vM~3^N!<`*-9tBtMYLw5hl7=9~dubbF*KxdTvzSc&psu&nKZsq$?w9iM0i
z1c@`D(X8!?scs!n9bHR9Jl>BuQJ;{vn-|ZKVwd^?6K-uJhm<}cI!Dn0_3{{Yd#HF|
zq3pn(Hvn~B2Hqj)I0u?@l;!2&O^kax20pP}{Q`6AH2ydKM#}p21}h`CU~5@W^jIqH
zb5VKjW7Wuy!e7ClGOPKu29pP0P{X`EwFHN6SbW38rwjaSTkBeY*VY_o)+oEvVu075
z-$0WC^v;b$pGX?T!y7<+DNIVo>KwOmb&c7Ei%!b?gpUc;se0YP=f?9hxY^6^Yo)n{
zp7{$_DLEnUV-@O9-R|c&q56&0s1%>@|IQZvA!Iw&&^(C!jlM<yUL5~K;r~s@{ud4T
ze_<B?n;UFzXG3T2XlHNY=xkx)L@iBAH9a}g$hyqD$UF=C>;wbxd<(xPML$kYdn;2v
zMK84?O(P*QEwMyRCqXlQEJZ^#CL=9F2RNQwgqC`Wb%u%M1PJi&Z~G^4H+%v}%K-)e
zfc(p>M*5#Kg#VXzoULjjk1dYiyKI2Zm)l2HTa=$4E@>TgfFQ9jNC$gFN?IRq43W%F
zXG)lg<F@=p2dnxc-*YS)QB0)zrB=>gaelcy)(|dAEbzg`%>FRh$b8}V{dFJ@0R0-$
zSB6j9S<`L(;L2SZ!1$?W#oj(Op||_GoKHc2)#4M&{?*)T@t$%qscuF8W~JBC?xeux
zGvf$HrB8AeuA&(QL(^&vHM8IHguT-&8nU<kBeKFUY41Bbf`tULmP65+Obu4u(|+I8
zl3|2#1sY#yPP|lN!<q7Dlp%@!l7Xb++?)VbSRAGhEMYdWX(F!qCUguT(0lIz1yhOl
zTiib9Z|ZH>jJe;n)DVlI_FzU_J3E5cZgbZnpWe(fhX!|r^bvsXJ_tDEZNE&w&2%M+
ztA58i@S6A8)Emx}KKUX@C8FB6me4Aavd`kf%4GvitDV)Hhjx}*b0vEXcZc`BVUH`Q
z$}<&L^CUrak7P&cfb0RrtCDt2SkEeqI}0s3P!#}~m58&+W>31g#dxdQ*69_vHMNym
zaHifPg4EENR~YOCgAV2nNqc=kvCo|@_>+v?4PqIWpwByjJ|i-5EbGufLYxc3{()U_
zeGS@j`W)@ta@1}Y!}3X>`{@3gPA))e{tkkliHFnbJ}7QTZ)WVQChHiJiQG<OKpu>4
zui<il{0kKGZ5DBa!eyYLQye5}iALNSB&@T4BLzP2bFlc(je}JBL?W#m`X{8bb%@uO
z!h28At3)=JU4O}gnn^NaIbgs<Sg||Jc9(T1gA~=Mgv4npUtG9~`-9G<CBYzuVOS7W
z$Vi9}^Rd^msl*`8I7p6tROq`n2GPoy;Tdu5#o2hZh*OZ-9!@iA0moONKvgPin7*r!
zblI#;v=OnwK~dBwfvO=PHzU}DBg&;&F^ks4-li@xeU`?N&<#0F=^X|2Cn+$iU_ZBj
zfq#LIR|E`G$wVTJaKO$!TZQSE?}&P;G}4dw4KSoNjYq8BAxf$6719LDiRGC4wIvAr
z2IbZ)q9|>9R^ba~eP~u2eVxV8UBXkD)KR!EoEgdV^pzKlco}W9*0})xK+%Y2x$oZM
z@GSph5-GkFDSm}Oj)ZzGeP3TAw^*^@4pLsekYQseeTd3ZDt8F$>X+q9nyW69I^+XZ
z^azw!hc?mC>F@zF?&3}^5I3GEcFZb9e~Xs*ZPxO|BXv%kt%+n+S%*PG&7uwC80tVw
z@cV!8R{yc)hi?{**S~Ea`hQDtf4e3At9#&IiN2_!t{svhLiXC$gyS0(wZ5&4Wh-qx
zX?bvwhW4OwzJdjzb7Y*#(Vh^x4p8S<Lt5Nv>^wNW_bvEqm^U*V$#AC3bXK~PSpxH)
z@6T6AfYLl-3f#RmDgq%#GQRMAowkKvZY+%>0Z>dr#tVKuIFaoFt2TCt1$OesBoxnV
zRVOmKh*jR5fxTiEy?R8SaP^-dA*|`rVq;7f!Q>?-Y;lsf3;kq<xtYTo^VS->{S3+H
zQ_x4UcV{_)B_&sx6(~ocs%`XMGaT%sWTa6{=*?VYM^WGRBK+9J9QaEL<*?*uf+x5Y
z7v6Ewm)9o7LC3y%?QR(y3hkf9t4s>86J39b_@I*yFwoUU&sS=vU9pt*y|<X6>@{K<
z78UP$D(dNr9blWg`rh1skl>kg^s&lRu2ynuJ6dIWI8T6>(G%64S=5;qMr2kpw+z=V
z&{!(-<8q<zp$WYnd%kvN^me(gn;d!adTvXN5jIvm%Y<MAubk;Wu7(Lsu56l-f<P|q
ze+x5#;SeE?<IT-<ycjH`bGhEmd9|VTXsiZYE!0WOXulbCo=uk5?o@B~YOVsCokbnD
zoOS3C0F5|FVtl(XPy~QLS>w$^X$kSj(#ZY}-m}X??GZk|P)p1=fKZeOgwZ5IVi-jf
zlf)@dNJR)8QHe_j@%xW@(4pgBZ?EFeEeQeuVEMOchVlP;Cvvy3&Q{&9-QYm*c~PH)
z@-PpO7%zs8tSacwg#Kl12_D}ln**hVHDOJnL|DGMo%eCYT|}ZSr<AAB=P&4ZJei(8
zVEE{vigp_%<wPmhJ+Rvgj8Z<?HY(rUc~<<>`ND}O^5^XvU{}BGm=Y=)69dvTfT2o5
z$}c=ClT;ZZwE+#283YVJbnHkxkCZA7NRW!6SO`2($`B=YDU>Y|3`wH~K^jF2900Ol
z0AIeOun{`#0f_LR!=d0-+7=CNjHWHLSS6N$oF>I&6U+?MUJ%<vSk!PjNj4lv-D>C|
z1h5??=_yb>U?6?nIAJ0t+SXrJN9sv%*@mpUXhZ9AU`O_Cmx9Ay1HUSM1<wor$Dg%5
z)f!d^tBm!z&LR1i$!{7)>dHXo_+R|(vM3c}!jVK~q4B^@WByq$Q~sD2XI*F77|Pkf
zdyefeX~eh^$zi%Nd#p>YSvs$@4ks?;P>UH-&nix$$vM>ls)IBp%=_AP#WqD3$;g0+
znp73-qc4K3RQ4QSwl>l$b48yYU@AOhz1U<7QarCtX;BG2VfQX9-5Hy*dl$FQt{gcr
zbVzBN{cFbFd>JuhMq0Z4_MZTL(ZeSn{wf?$!Ie^LIKL@cMTe@+9LOtTf>on*FQAn9
zPAE;t?=y}d+KZ?Os|=`ANLO0AGbl{qN_R=w<uu=IEZLH7MuG%YDBp0Eft?3}`JUGG
zk}fet|6sEx((je-_HOf-c{I#>8`*HT^0>N+gM)|deWhLm<47Mn<wvVVvin~Xk04*k
z)ta2YehI0(rVmM(O_khS0%>_Mf<{WE8_rqsxtb}qHJU|J%?B{l6QdtVEiX|-LbgQa
zjqNf*btVqSP!62Xur}>Ye}NrsDB8Q@gmf<qcj0Q^IS4pS$PW)>9$GR*_n*+hHz;W+
z(B|BQri<c$5Pnf<f~j0kY;ra>4aB?{q{ax^e7=|~6r2%HNK)Ool~nn)HiZQyW5|!8
z+iEfDC^k_ze=O&ziEgs>nrzw$psXvS3ro)D@0D$2Jx+pDcL9Z&yov5*UXc~&v~G>o
z2`N=ew7Row8>X2!BVT3L;p~N$mtw){V!6b~%}qNVbj4`zeObEgT-5qf^;~tICZW=#
zIrJ@ZnP<Kn*!=Or-SIk`-^{pi<=rG**S0%nsPveVlw2;UnBtV}O4=@Ru}&MG3yw+u
zK@K(4VvsdrfztP4PirAh{0X`kYytFxFM(7*Wu=t0t5~Qym=ah$i-KMnJyg}!V=ZZX
z$eh^m?^%JpFLd*se$Z++Y}~R#O<rvukN9O{*6qb-EmeV;m9bGOH-UuRy%zUK!KAlp
zp=FEGYM7?)6i}bbczZLkRGRgDEpHnSkjuzj_~v9<oO<hb>wIHk_67XuL<bx&Tm@XX
z+{!^;7AHb1$SkAmNzGBu%~~OrX^w1>Y4U2D;au<ebgkB*s9j0x_Esf#(C7E>DD<Ct
zdQ;PWsLkI(x*pBnTJ?YWC7Hj~bU9_wEiG-QjbT*Zm)cdw5@idpl57?W?33{N!_aDB
z3z2qPvcxdbI)YZPx=}iUh_5H!)b4;HW4CuT*%DUC%eBV8kkL)n5cFO9A5r>=Y@{2P
za;{Q6o`*yJB$svP{foB?p-)-Y!Y1&$r;C*xo_2F~n6FIr+UJiioq0AVqt;ri?eT8U
z0O*_A8(*!+p>wIOZcSa<n^gA7W_LFya=xlsXa_eRFE^*BJz8JgS!MN!r?0L1F!)zk
z$X83c8kK5pUVlg1?|v)h2l5Qa)<juiEmCgAUP*8hZE|Dv@Ofv{TBPRsU(OW>YER25
zM&bJCdMJF_7VH|`n0H=;*=6?9s^L?DRjOk`JwR{EQ5)H>a%ot6=I+x;MwK0V8uUi;
zDAclkb)KEr!E0{gPzsP}=X(T)9G6i`KP6vXSx(fU**GB99nDB+uG*fX8-&UNHYUV1
zl8@m?Mq%J2SIKdtl8{>dn7f+v!@a<Ig4H;7uwb}V7EpxMQ21C%9lZ;?tw!p74f9Ra
zXI&&ed8-$OBaZF^?e1nYXLGSF><Elw6?1!i;>`3WV&V?_Li+bgjp{7`FPgVVRQcTB
zZiHaOQjl^ZCtWDt;WP%K)ZFc=r#zc0(MQwNYQMka=I;7Fnrcbb+D)FZ3L%Sh%x8l)
z4_rIMC7WydGP)Z`BYIw+pKj<axPgxi2fzgncERen-T^j^lcA9W55-&zf|Y`($>*7Q
z6DR!G4rFwCwLVj`b&vJJerCzwJFpZ}P(8fe0nZVemf%nUyS1-yKzW8R$pPDsNEVmt
zv>_=W&X(}Rwd_=@t3r+*tz^-Dg0-7U=IvP|HBoV@IvX&@P~w6Mn1jrnD;?Z!3z?tl
zMs6LO2sw&@F2Xj61@&O~6@ou&pIn<Bx!Z~rF}*jfzd`fuD>^RWK5^TOtSR^{t+Zb2
zH^<<b;2?n%=QzEULj4Quuzu>%l<GVrbQpQ_QXQOSBr^4kgfNw+@tvUK0nGes$-$@R
zpyl3sPp=f<PHZcwXou`iAB_B1o);h~F88G8)9fLLX^Z`@`x$=$)t!CTYiR<;@knfq
z$|B$M4-^B<p#aw<@g*4#hWOhWfVp$@V{1d<EgP$G!f0rZ3JGe{?*qZ*;hAg<OxGdg
z;r3^t?<Yp~sMkFZlPDWKr;b6XkDr6r?FS*5Bd~MF0Bjy~!G4}~le7pbcoXAt)fTE$
z*~3Tq{>B@o+Kku?X|LNkb4(2Y<goI`0!X)53L;bgg%V`podW@i#>VCigt$HB!RBG8
zag-D!L3km(I?0lRQYT~Q0^XVR{)rL`EVq>SNIzKr*7d}Y(S0M)Rg0z8tTghe(%pnY
zX^ze%R#{~U7YDvY&a??u0D4fASlt05w*ny2l0u3aPE;8`S4J+C?~+vhmvcgt^nvBK
zd@#a^pv<;Gk2OoKQ&y}9kHU5bIb?s7E)i$E%GLzs>nHU_Bpm^$xYN5V>u<wu3&73e
z4zevcCz_Gy$%krA+<%9xh1ex?biHO7kBll}vW|D)Q`novnznCoe{Zs+H@Tl~s)x4n
z0Wnc0!5Zhk41j@t8WL?y5%Y|;;v&K~Jgu#7fOaURKBiz^+E;8$_|dt)ri4;Yy#xF$
z%>_tw4u;MyKq;uD)<A^n2+m%%e1rhU>ZYP0Bvldr4e83S#%D?$Z*?R_&xDaKB4}#U
zgl)g+=&W*NF_LmSzc3mb>|V}NK4K7jrYX$Ruc%WyF&71w(3<>=ER-vINufY@^w>gt
z8KxkDA8$A#MijVY4f>oyG_&+}$r2L#>cZnX+B>6^v+4gk_G+%=7x=g}${@0Wn5YDt
z)X7H?iSv0Y;(lqLjWqD{v14>FyEH#oLG8)@-)2Mmjn=!@0L}2H(V?Z0^EFd4=|-Gn
zR29j1ObLQu|LMW~-{=6G!OB1b7fS#skbB^>D=J>mI)%(z()V3K$Oj{-y`Gn7*yK)e
zQ}*tf+Mu~C2%X@{L~*$)PCGw)q4^tusBuQT!*`JLq#QRRuvMVcLV3^8!2+>6{?j^^
zxIPErqT8)UEd0M1<Bt+?XOf)y(OhTonZdWDK(>B>Z*+Q@mf+J2W*uPyhyns*unVN;
z9yFP60Cm9)x!>}BAAyFH#j?3X^T0>=TLSpL`2i|M;jS_wg7YXoSH&5SWx>ey@<DZu
z_t@2KvEXuKS4FKJsZwK?_icY#yK>YKL5@{pJ2VDey2xqCu4!ru<j&4f;9BbEyrN$t
zts5**>LUVv`^Sr@$xp<f2Ln5VT~yMR&5+ntWbOQlt$}E^rAuLb=YJN0i-R)a;xd`l
zRui-({gxp(!G4KUsUvD_8E#KCX5bOC5IdCr#N?qDC4RA81jcJt)TRS6sL%7>2N5rO
z#pzX)H5kgaYfj8!;ygi!LH(RVQU+y|87hg#pinj!^C<}yBR)!__XtpRaOp)6y$R8X
zr$cCg>VfG)uZEtlldr;n>Pe5}PL>ZnZgQc9hg^$Mgs@itw`J}tG}FLq2eBZwW&;{k
z#33vU_+hUnlHV|;+mk2IjMpM$l7OmKqkML>!8#3Z>jA&?L_WU^={ih|L9ozXL|;1a
z1>UUK`wgMR`4qiH_??ZTqU6JIP0~}!g!Q~>>~+6Orf6|lSm$?=A2Wmy_$=s#LB7e`
zVeJ*%hA~fpto=fZDO{g*Hf_?<s0vJ?f#~kE+^_5gfQ5@sx4~hX22{`sBfV6AJEu!P
zqXy-n93Kw}HAh&S$&VdNFn#I-Q~sKohFxeDCpfMe3PG!a$wQWr$UF^KZht2!C;hZ&
zzC;?R_V~x{b9Sk5Y3l3#>Io*{#(}`gX#I^h&e81Sa4lJ|H(iFap5~1j8C6Rjk}nDM
zDUI@UQL~YVLNe>Y^cPPVBGawlDa6oU)-JNscE8z`R38`<7AbCv@>|k2z2q9yAOxEa
z@?w(Ui1CGSzNAXKnae95_=9BJkq0lX0HiK?R$oH9Eb6UFPyJANvdOOJTGGS|m|2wy
zz6lf}#3{g<Cc&&4{aswxX(rr>(<}pVy#i9k><6x!;cxv?)9Fvy!vO6wj>8~4BG%eb
zg_85vO`<_A8PkV)>(PpSKkZAm``?rQPbF6#%~sxqt+CV+wTs45Ra&BIU&db4Uem=`
zikb>3ja_L*#a3ldweM>CFxFDFC6?ONnrdrE5e-Ta(<(|B`Ls<wW9EGGyMNqs&$-X@
zzV~;}{o}sRbDjdaF;qBOjFY(zO>6KDnis^}4Z2d;2y>%#&(brY;#t`XQHzs}VG)er
zbBL(v)x(@wNK0jSv!=ZJ=A1d!CR5?k;+e?0(W-2w){*J6-t^3C^F3F(;xqMMsX8`5
z`49rjSq<lJ$d22^mkhc5SWsJ1@{_vyNYpz_L#mM&`?JBKfQ!h4iPDSQv1K4=uU?_-
z{y#_Bn-W1^8qWI&<AK-~rs?Yn)mlotLaN1iv6)3+$9zktZR6uL#~vw9DGS#0xTPk=
zRA&p?ohph`1G9UjF1r8m{^LDNdO-RGm^1Zr(bUGPGoRyMSH1|2#x1jsrNqn>v0(g+
zmro>fSPsNkuw|BV+0!`+{0vzG$u><>OIpd+>Des2mmsif=oX*+F|PE5aCWegKu2kL
zEuzwA^6q9+uqHJZfCdNTkptWD>kJ|#{rygcMQrfGOzhhgBHTQ0^YF8pp%Q;-O+)W7
zs&4Vk-Pauk#OM|jD2|m*=`X|Ms&dr`Ovyf8l3YO;+`2rB_A21*rFLr6*&7&S!tV)N
zZaqcQO7^USiBxqjbLMeIYOpNAXdRCb@?MA?n=DgaX$GxQ)yn{<=p={WX2SvF^bqb|
zM~w|T%XJ#x&c|(@(dn8ov;7aeWjrhBA8Oi^b$b24sVViU-}l@~H=Y=$B(&1m-HFBC
z@NH?n9-Yy&W4$|<WmOi$bI-n_B6!6GO&!$nzS#KmNZ8pcisGjHdft}&9*b0_khCZP
z(Ywsm4llOx#O)=TGIU^rEvm_;VmFY8Szkm0HSMNQM2=e9ohn32^In?H0``v}U)0=d
z2A=C(%Q!Z{&oe}v*<?_C6Y3{>@2B{Qb^x45YY(hA=Ch=wBsa-z_MBcFY6@2v^;Y#@
z9SM)K*g4fPFjuJV?!%_c0aj{#>KFE5Fj2Lxmt}D`JE^F8>%x+&Tz~aOiVA{Fyz|29
z)5(Ph$WV$^0R3>zZQ&oR0En^SDo&p;Gd0lDunZ~2ToECDAJ~M8gMDG66wcjWmLK2M
zI|Q92hCHAd`~*UY=os|5dxAMurQ4Hf(a7lqza2!-xj?f%2DY0a{Q{<-?FUp1SL?jg
zvSwx%2}W9hDy{3xtFLM%poS8{b%>;3x(I0F9g+EPKv-8AXdz}6*6%-O)#+5;Wivag
zpHilx|119%2pa%(u1_~-fn=jN*{x@)LC>k16pDUd63;mi6q4um<vK1AsL~c0_1aP?
z9}g4D2>fuo;j!qQIa6RWee3kVavSI@12WVzl8$%>IDfOlv9Sy+Z};fc?mBA^6+DdK
z70NrrPr%wr8I>`)ga<d*e-!0jl#<j~dDv)<S>Jp@7+<Pu)#=ZYRi`h!IREZRmUBhx
z1nj~c8Vdt~zKY9;$n5T1Fp-VRzjT=}_DCM+RHLZv@2D7)qr(9Y2(Jo#ky2?rt)41_
zZW*Q4b_!j~A9p(4QU0?$-?$uS=AZ$**2mNjbX?_G3(sSbiai0Oe4E63W0TX^JTX1<
zgghyh9TUF0;&yTxPVL_41ouUk45P8(3Iel3*I3#}L#?#PCE?d2QIc5(w=nK$IfAs}
zy4PX{&ahvK#x$(WP50`n4pU1%bQ9>^(TMK)RZ-tgIk@uLHLGhwD1bqW%x(BgJYI7u
z_n50SRb(KcQs!Od$_%`qPvh;WwkaYnL}KR_7sgd5cMR~PO9!M8R-zxTx@G-akrxd!
zotRqaE&wjctvB}}#U9GB>2iMNH>?O9=i}1VFHg(@A&?4_jr*Hut>k>|%~c4!@6yX6
z)m&a6lh#^37_HXR=jW#s-GE~Yo{w7-O-v8GO65VrqsS&xyy>cWnpMJV$ZJw7QN#&f
z7wzIK6y;fOjV=Dv$BcalFcpMkBE976^;xnV%TwPXzNFY&m@=xLSe-@T+fla!hYCdQ
z*W%gWWh#85H*qoyo7-1*bOJ-O4W5;?jLvEpmX<-&Bh2GlfM*~C<mMnAzab&|=_Dha
zEqZ5DefSP`OEEE5R{wH=vZyc7Sr>C&g?~I;D_NIuV+iH-1zf)k`-OP?a?vUK_Td~H
zcsEOqD<S5k*_21Sap(^6KYW{ehAp&f;x*+->BYd1Oc&M;7Qym~SfGa%Q?8UxH~jOK
z>qXqs$|u{8yd-BQH4PW--{%SK|DXSN6(C8c?_w53y3hf`-^cYF;DJ=j+Ow1l{{$ns
z><$(Q#=Jk{{$BW7m=CGzqiYI(!|o3kmbI1s3jT`(_fM^m;(ZPliOu)_U?U59lzU4W
z*dLDj`Nmv;M3H#O;XjnXqY^5}KwSoky`jvjprjzxKJ3{gQ}}?t);`j~OUZMT8IsK7
zBtp^3fi;p6Olsn=Ly-(VCb|zk>{q0OA0@4l;qlTGwg08iN?C=}<YAGNTtyo4KTUp>
zOb_D^%Q9sA_Wl%}^p+1q8cN(z<9ad<B>!)pZhv@BNq^MTjZ9y&FH?X2zP`@jER5*^
Rq&wL658izZ!OW0!^(WL&1BL(q

literal 0
HcmV?d00001

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala
index 590ef949ffbd7..ce82756428849 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSchemaInferenceSuite.scala
@@ -129,7 +129,7 @@ class HiveSchemaInferenceSuite
     // properties out).
     assert(!externalCatalog.getTable(DATABASE, TEST_TABLE_NAME).schemaPreservesCase)
     val rawTable = client.getTable(DATABASE, TEST_TABLE_NAME)
-    assert(rawTable.properties.filterKeys(_.startsWith(DATASOURCE_SCHEMA_PREFIX)) == Map.empty)
+    assert(rawTable.properties.filterKeys(_.startsWith(DATASOURCE_SCHEMA_PREFIX)).isEmpty)
     schema
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index 6feaaea3dfb89..501a877e8b7fb 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -153,7 +153,7 @@ class HiveSparkSubmitSuite
     // For more detail, see sql/hive/src/test/resources/regression-test-SPARK-8489/*scala.
     // TODO: revisit for Scala 2.13 support
     val version = Properties.versionNumberString match {
-      case v if v.startsWith("2.12") => v.substring(0, 4)
+      case v if v.startsWith("2.12") || v.startsWith("2.13") => v.substring(0, 4)
       case x => throw new Exception(s"Unsupported Scala Version: $x")
     }
     val jarDir = getTestResourcePath("regression-test-SPARK-8489")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index be6d023302293..1f3878ad2925d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -1128,7 +1128,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
     def checkColStatsProps(expected: Map[String, String]): Unit = {
       sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS " + stats.keys.mkString(", "))
       val table = hiveClient.getTable("default", tableName)
-      val props = table.properties.filterKeys(_.startsWith("spark.sql.statistics.colStats"))
+      val props = table.properties.filterKeys(_.startsWith("spark.sql.statistics.colStats")).toMap
       assert(props == expected)
     }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index fbd1fc1ea98df..62b6c6c201c68 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -994,7 +994,7 @@ class HiveDDLSuite
       |""".stripMargin)
     val newPart = catalog.getPartition(TableIdentifier("boxes"), Map("width" -> "4"))
     assert(newPart.storage.serde == Some(expectedSerde))
-    assert(newPart.storage.properties.filterKeys(expectedSerdeProps.contains) ==
+    assert(newPart.storage.properties.filterKeys(expectedSerdeProps.contains).toMap ==
       expectedSerdeProps)
   }
 

From d936cb328d1562d280a2dff29e31fefa1ad8bdd6 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan@gmail.com>
Date: Thu, 17 Sep 2020 09:01:06 +0900
Subject: [PATCH 0048/1009] [SPARK-26425][SS] Add more constraint checks to
 avoid checkpoint corruption

### What changes were proposed in this pull request?

Credits to tdas who reported and described the fix to [SPARK-26425](https://issues.apache.org/jira/browse/SPARK-26425). I just followed the description of the issue.

This patch adds more checks on commit log as well as file streaming source so that multiple concurrent runs of streaming query don't mess up the status of query/checkpoint. This patch addresses two different spots which are having a bit different issues:

1. FileStreamSource.fetchMaxOffset()

In structured streaming, we don't allow multiple streaming queries to run with same checkpoint (including concurrent runs of same query), so query should fail if it fails to write the metadata of specific batch ID due to same batch ID being written by others.

2. commit log

As described in JIRA issue, assertion is already applied to the `offsetLog` for the same reason.

https://github.com/apache/spark/blob/8167714cab93a5c06c23f92c9077fe8b9677ab28/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala#L394-L402

This patch applied the same for commit log.

### Why are the changes needed?

This prevents the inconsistent behavior on streaming query and lets query fail instead.

### Does this PR introduce any user-facing change?

No.

### How was this patch tested?

N/A, as the change is simple and obvious, and it's really hard to artificially reproduce the issue.

Closes #25965 from HeartSaVioR/SPARK-26425.

Lead-authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Co-authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
Signed-off-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
---
 .../sql/execution/streaming/FileStreamSource.scala   | 12 +++++++++---
 .../execution/streaming/MicroBatchExecution.scala    |  4 +++-
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
index 03d86e42e4db7..42401fe069551 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
@@ -178,10 +178,16 @@ class FileStreamSource(
 
     if (batchFiles.nonEmpty) {
       metadataLogCurrentOffset += 1
-      metadataLog.add(metadataLogCurrentOffset, batchFiles.map { case (p, timestamp) =>
+
+      val fileEntries = batchFiles.map { case (p, timestamp) =>
         FileEntry(path = p, timestamp = timestamp, batchId = metadataLogCurrentOffset)
-      }.toArray)
-      logInfo(s"Log offset set to $metadataLogCurrentOffset with ${batchFiles.size} new files")
+      }.toArray
+      if (metadataLog.add(metadataLogCurrentOffset, fileEntries)) {
+        logInfo(s"Log offset set to $metadataLogCurrentOffset with ${batchFiles.size} new files")
+      } else {
+        throw new IllegalStateException("Concurrent update to the log. Multiple streaming jobs " +
+          s"detected for $metadataLogCurrentOffset")
+      }
     }
 
     FileStreamSourceOffset(metadataLogCurrentOffset)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index 468a8c975b478..5a91b24a0803f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -598,7 +598,9 @@ class MicroBatchExecution(
     withProgressLocked {
       sinkCommitProgress = batchSinkProgress
       watermarkTracker.updateWatermark(lastExecution.executedPlan)
-      commitLog.add(currentBatchId, CommitMetadata(watermarkTracker.currentWatermark))
+      assert(commitLog.add(currentBatchId, CommitMetadata(watermarkTracker.currentWatermark)),
+        "Concurrent update to the commit log. Multiple streaming jobs detected for " +
+          s"$currentBatchId")
       committedOffsets ++= availableOffsets
     }
     logDebug(s"Completed batch ${currentBatchId}")

From bd38e0be83528ec9ce0e5f533d4b3b25203dc917 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 17 Sep 2020 05:39:40 +0000
Subject: [PATCH 0049/1009] [SPARK-32903][SQL] GeneratePredicate should be able
 to eliminate common sub-expressions

### What changes were proposed in this pull request?

This patch proposes to make GeneratePredicate eliminate common sub-expressions.

### Why are the changes needed?

Both GenerateMutableProjection and GenerateUnsafeProjection, such codegen objects can eliminate common sub-expressions. But GeneratePredicate currently doesn't do it.

We encounter a customer issue that a Filter pushed down through a Project causes performance issue, compared with not pushed down case. The issue is one expression used in Filter predicates are run many times. Due to the complex schema, the query nodes are not wholestage codegen, so it runs Filter.doExecute and then call GeneratePredicate. The common expression was run many time and became performance bottleneck. GeneratePredicate should be able to eliminate common sub-expressions for such case.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Unit tests.

Closes #29776 from viirya/filter-pushdown.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../codegen/GeneratePredicate.scala           | 13 +++-
 .../sql/catalyst/expressions/predicates.scala |  2 +-
 ...CodegenSubexpressionEliminationSuite.scala | 73 +++++++++++++++++++
 3 files changed, 85 insertions(+), 3 deletions(-)
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodegenSubexpressionEliminationSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
index 6ba646d360d2e..7404030b661c8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
@@ -30,9 +30,17 @@ object GeneratePredicate extends CodeGenerator[Expression, BasePredicate] {
   protected def bind(in: Expression, inputSchema: Seq[Attribute]): Expression =
     BindReferences.bindReference(in, inputSchema)
 
-  protected def create(predicate: Expression): BasePredicate = {
+  def generate(expressions: Expression, useSubexprElimination: Boolean): BasePredicate =
+    create(canonicalize(expressions), useSubexprElimination)
+
+  protected def create(predicate: Expression): BasePredicate = create(predicate, false)
+
+  protected def create(predicate: Expression, useSubexprElimination: Boolean): BasePredicate = {
     val ctx = newCodeGenContext()
-    val eval = predicate.genCode(ctx)
+
+    // Do sub-expression elimination for predicates.
+    val eval = ctx.generateExpressions(Seq(predicate), useSubexprElimination).head
+    val evalSubexpr = ctx.subexprFunctionsCode
 
     val codeBody = s"""
       public SpecificPredicate generate(Object[] references) {
@@ -53,6 +61,7 @@ object GeneratePredicate extends CodeGenerator[Expression, BasePredicate] {
         }
 
         public boolean eval(InternalRow ${ctx.INPUT_ROW}) {
+          $evalSubexpr
           ${eval.code}
           return !${eval.isNull} && ${eval.value};
         }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index aa5cf4758564b..03066fb34cf27 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -73,7 +73,7 @@ trait Predicate extends Expression {
 object Predicate extends CodeGeneratorWithInterpretedFallback[Expression, BasePredicate] {
 
   override protected def createCodeGeneratedObject(in: Expression): BasePredicate = {
-    GeneratePredicate.generate(in)
+    GeneratePredicate.generate(in, SQLConf.get.subexpressionEliminationEnabled)
   }
 
   override protected def createInterpretedObject(in: Expression): BasePredicate = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodegenSubexpressionEliminationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodegenSubexpressionEliminationSuite.scala
new file mode 100644
index 0000000000000..471f25356887f
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodegenSubexpressionEliminationSuite.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.codegen
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.types.{BooleanType, DataType}
+
+/**
+ * A test suite that makes sure code generation handles sub-expression elimination correctly.
+ */
+class CodegenSubexpressionEliminationSuite extends SparkFunSuite {
+
+  test("SPARK-32903: GeneratePredicate should eliminate sub-expressions") {
+    Seq(true, false).foreach { useSubexprElimination =>
+      val leaf1 = ExprWithEvaluatedState()
+      val leaf2 = ExprWithEvaluatedState()
+      val leaf3 = ExprWithEvaluatedState()
+      val leaf4 = ExprWithEvaluatedState()
+
+      val cond = Or(And(leaf1, leaf2), And(leaf3, leaf4))
+      val instance = GeneratePredicate.generate(cond, useSubexprElimination = useSubexprElimination)
+      instance.initialize(0)
+      assert(instance.eval(null) === false)
+
+      if (useSubexprElimination) {
+        // When we do sub-expression elimination, Spark thought left and right side of
+        // the `Or` expression are the same. So only left side was evaluated, and Spark
+        // reused the evaluation for right side.
+        assert(leaf1.evaluated == true)
+        assert(leaf2.evaluated == false)
+        assert(leaf3.evaluated == false)
+        assert(leaf4.evaluated == false)
+      } else {
+        assert(leaf1.evaluated == true)
+        assert(leaf2.evaluated == false)
+        assert(leaf3.evaluated == true)
+        assert(leaf4.evaluated == false)
+      }
+    }
+  }
+
+}
+
+/**
+ * An expression with evaluated state so we can know whether it is evaluated.
+ */
+case class ExprWithEvaluatedState() extends LeafExpression with CodegenFallback {
+  var evaluated: Boolean = false
+  override def eval(input: InternalRow): Any = {
+    evaluated = true
+    false
+  }
+
+  override def nullable: Boolean = false
+  override def dataType: DataType = BooleanType
+}

From 92b75dc260eb43d906a425f9f9d8d63b78c48cee Mon Sep 17 00:00:00 2001
From: sychen <sychen@ctrip.com>
Date: Thu, 17 Sep 2020 06:50:30 +0000
Subject: [PATCH 0050/1009] [SPARK-32508][SQL] Disallow empty part col values
 in partition spec before static partition writing

### What changes were proposed in this pull request?
Write to static partition, check in advance that the partition field is empty.

### Why are the changes needed?
When writing to the current static partition, the partition field is empty, and an error will be reported when all tasks are completed.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
add ut

Closes #29316 from cxzl25/SPARK-32508.

Authored-by: sychen <sychen@ctrip.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/execution/datasources/rules.scala     | 22 +++++++++++++++----
 .../spark/sql/sources/InsertSuite.scala       | 22 +++++++++++++++++++
 .../apache/spark/sql/hive/InsertSuite.scala   | 22 +++++++++++++++++++
 3 files changed, 62 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index 60cacda9f5f1c..5fb1a4d249070 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -386,7 +386,8 @@ case class PreprocessTableInsertion(conf: SQLConf) extends Rule[LogicalPlan] {
   private def preprocess(
       insert: InsertIntoStatement,
       tblName: String,
-      partColNames: Seq[String]): InsertIntoStatement = {
+      partColNames: Seq[String],
+      catalogTable: Option[CatalogTable]): InsertIntoStatement = {
 
     val normalizedPartSpec = PartitioningUtils.normalizePartitionSpec(
       insert.partitionSpec, partColNames, tblName, conf.resolver)
@@ -402,6 +403,18 @@ case class PreprocessTableInsertion(conf: SQLConf) extends Rule[LogicalPlan] {
           s"including ${staticPartCols.size} partition column(s) having constant value(s).")
     }
 
+    val partitionsTrackedByCatalog = catalogTable.isDefined &&
+      catalogTable.get.partitionColumnNames.nonEmpty &&
+      catalogTable.get.tracksPartitionsInCatalog
+    if (partitionsTrackedByCatalog && normalizedPartSpec.nonEmpty) {
+      // empty partition column value
+      if (normalizedPartSpec.filter(_._2.isDefined).exists(_._2.get.isEmpty)) {
+        val spec = normalizedPartSpec.map(p => p._1 + "=" + p._2).mkString("[", ", ", "]")
+        throw new AnalysisException(
+          s"Partition spec is invalid. The spec ($spec) contains an empty partition column value")
+      }
+    }
+
     val newQuery = TableOutputResolver.resolveOutputColumns(
       tblName, expectedColumns, insert.query, byName = false, conf)
     if (normalizedPartSpec.nonEmpty) {
@@ -427,13 +440,14 @@ case class PreprocessTableInsertion(conf: SQLConf) extends Rule[LogicalPlan] {
       table match {
         case relation: HiveTableRelation =>
           val metadata = relation.tableMeta
-          preprocess(i, metadata.identifier.quotedString, metadata.partitionColumnNames)
+          preprocess(i, metadata.identifier.quotedString, metadata.partitionColumnNames,
+            Some(metadata))
         case LogicalRelation(h: HadoopFsRelation, _, catalogTable, _) =>
           val tblName = catalogTable.map(_.identifier.quotedString).getOrElse("unknown")
-          preprocess(i, tblName, h.partitionSchema.map(_.name))
+          preprocess(i, tblName, h.partitionSchema.map(_.name), catalogTable)
         case LogicalRelation(_: InsertableRelation, _, catalogTable, _) =>
           val tblName = catalogTable.map(_.identifier.quotedString).getOrElse("unknown")
-          preprocess(i, tblName, Nil)
+          preprocess(i, tblName, Nil, catalogTable)
         case _ => i
       }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index abd33ab8a8f22..32c4fb60b8c54 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -866,6 +866,28 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
     }.getMessage
     assert(message.contains("LOCAL is supported only with file: scheme"))
   }
+
+  test("SPARK-32508 " +
+    "Disallow empty part col values in partition spec before static partition writing") {
+    withTable("insertTable") {
+      sql(
+        """
+          |CREATE TABLE insertTable(i int, part1 string, part2 string) USING PARQUET
+          |PARTITIONED BY (part1, part2)
+            """.stripMargin)
+      val msg = "Partition spec is invalid"
+      assert(intercept[AnalysisException] {
+        sql("INSERT INTO TABLE insertTable PARTITION(part1=1, part2='') SELECT 1")
+      }.getMessage.contains(msg))
+      assert(intercept[AnalysisException] {
+        sql("INSERT INTO TABLE insertTable PARTITION(part1='', part2) SELECT 1 ,'' AS part2")
+      }.getMessage.contains(msg))
+
+      sql("INSERT INTO TABLE insertTable PARTITION(part1='1', part2='2') SELECT 1")
+      sql("INSERT INTO TABLE insertTable PARTITION(part1='1', part2) SELECT 1 ,'2' AS part2")
+      sql("INSERT INTO TABLE insertTable PARTITION(part1='1', part2) SELECT 1 ,'' AS part2")
+    }
+  }
 }
 
 class FileExistingTestFileSystem extends RawLocalFileSystem {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
index 421dcb499bd6a..ebc6cfb77d355 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
@@ -847,4 +847,26 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter
       }
     }
   }
+
+  test("SPARK-32508 " +
+    "Disallow empty part col values in partition spec before static partition writing") {
+    withTable("t1") {
+      spark.sql(
+        """
+          |CREATE TABLE t1 (c1 int)
+          |PARTITIONED BY (d string)
+          """.stripMargin)
+
+      val e = intercept[AnalysisException] {
+        spark.sql(
+          """
+            |INSERT OVERWRITE TABLE t1 PARTITION(d='')
+            |SELECT 1
+          """.stripMargin)
+      }.getMessage
+
+      assert(!e.contains("get partition: Value for key d is null or empty"))
+      assert(e.contains("Partition spec is invalid"))
+    }
+  }
 }

From e5e54a3614ffd2a9150921e84e5b813d5cbf285a Mon Sep 17 00:00:00 2001
From: Tom van Bussel <tom.vanbussel@databricks.com>
Date: Thu, 17 Sep 2020 12:35:40 +0200
Subject: [PATCH 0051/1009] [SPARK-32900][CORE] Allow UnsafeExternalSorter to
 spill when there are nulls

### What changes were proposed in this pull request?

This PR changes the way `UnsafeExternalSorter.SpillableIterator` checks whether it has spilled already, by checking whether `inMemSorter` is null. It also allows it to spill other `UnsafeSorterIterator`s than `UnsafeInMemorySorter.SortedIterator`.

### Why are the changes needed?

Before this PR `UnsafeExternalSorter.SpillableIterator` could not spill when there are NULLs in the input and radix sorting is used. Currently, Spark determines whether UnsafeExternalSorter.SpillableIterator has not spilled yet by checking whether `upstream` is an instance of `UnsafeInMemorySorter.SortedIterator`. When radix sorting is used and there are NULLs in the input however, `upstream` will be an instance of `UnsafeExternalSorter.ChainedIterator` instead, and Spark will assume that the `SpillableIterator` iterator has spilled already, and therefore cannot spill again when it's supposed to spill.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

A test was added to `UnsafeExternalSorterSuite` (and therefore also to `UnsafeExternalSorterRadixSortSuite`). I manually confirmed that the test failed in `UnsafeExternalSorterRadixSortSuite` without this patch.

Closes #29772 from tomvanbussel/SPARK-32900.

Authored-by: Tom van Bussel <tom.vanbussel@databricks.com>
Signed-off-by: herman <herman@databricks.com>
---
 .../unsafe/sort/UnsafeExternalSorter.java     | 69 +++++++++++--------
 .../unsafe/sort/UnsafeInMemorySorter.java     |  1 +
 .../unsafe/sort/UnsafeSorterIterator.java     |  2 +
 .../unsafe/sort/UnsafeSorterSpillMerger.java  |  5 ++
 .../unsafe/sort/UnsafeSorterSpillReader.java  |  5 ++
 .../sort/UnsafeExternalSorterSuite.java       | 33 +++++++++
 6 files changed, 88 insertions(+), 27 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
index 55e4e609c3c7b..71b9a5bc11542 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
@@ -501,11 +501,15 @@ private static void spillIterator(UnsafeSorterIterator inMemIterator,
    */
   class SpillableIterator extends UnsafeSorterIterator {
     private UnsafeSorterIterator upstream;
-    private UnsafeSorterIterator nextUpstream = null;
     private MemoryBlock lastPage = null;
     private boolean loaded = false;
     private int numRecords = 0;
 
+    private Object currentBaseObject;
+    private long currentBaseOffset;
+    private int currentRecordLength;
+    private long currentKeyPrefix;
+
     SpillableIterator(UnsafeSorterIterator inMemIterator) {
       this.upstream = inMemIterator;
       this.numRecords = inMemIterator.getNumRecords();
@@ -516,23 +520,26 @@ public int getNumRecords() {
       return numRecords;
     }
 
+    @Override
+    public long getCurrentPageNumber() {
+      throw new UnsupportedOperationException();
+    }
+
     public long spill() throws IOException {
       synchronized (this) {
-        if (!(upstream instanceof UnsafeInMemorySorter.SortedIterator && nextUpstream == null
-          && numRecords > 0)) {
+        if (inMemSorter == null || numRecords <= 0) {
           return 0L;
         }
 
-        UnsafeInMemorySorter.SortedIterator inMemIterator =
-          ((UnsafeInMemorySorter.SortedIterator) upstream).clone();
+        long currentPageNumber = upstream.getCurrentPageNumber();
 
-       ShuffleWriteMetrics writeMetrics = new ShuffleWriteMetrics();
+        ShuffleWriteMetrics writeMetrics = new ShuffleWriteMetrics();
         // Iterate over the records that have not been returned and spill them.
         final UnsafeSorterSpillWriter spillWriter =
           new UnsafeSorterSpillWriter(blockManager, fileBufferSizeBytes, writeMetrics, numRecords);
-        spillIterator(inMemIterator, spillWriter);
+        spillIterator(upstream, spillWriter);
         spillWriters.add(spillWriter);
-        nextUpstream = spillWriter.getReader(serializerManager);
+        upstream = spillWriter.getReader(serializerManager);
 
         long released = 0L;
         synchronized (UnsafeExternalSorter.this) {
@@ -540,8 +547,7 @@ public long spill() throws IOException {
           // is accessing the current record. We free this page in that caller's next loadNext()
           // call.
           for (MemoryBlock page : allocatedPages) {
-            if (!loaded || page.pageNumber !=
-                    ((UnsafeInMemorySorter.SortedIterator)upstream).getCurrentPageNumber()) {
+            if (!loaded || page.pageNumber != currentPageNumber) {
               released += page.size();
               freePage(page);
             } else {
@@ -575,22 +581,26 @@ public void loadNext() throws IOException {
       try {
         synchronized (this) {
           loaded = true;
-          if (nextUpstream != null) {
-            // Just consumed the last record from in memory iterator
-            if(lastPage != null) {
-              // Do not free the page here, while we are locking `SpillableIterator`. The `freePage`
-              // method locks the `TaskMemoryManager`, and it's a bad idea to lock 2 objects in
-              // sequence. We may hit dead lock if another thread locks `TaskMemoryManager` and
-              // `SpillableIterator` in sequence, which may happen in
-              // `TaskMemoryManager.acquireExecutionMemory`.
-              pageToFree = lastPage;
-              lastPage = null;
-            }
-            upstream = nextUpstream;
-            nextUpstream = null;
+          // Just consumed the last record from in memory iterator
+          if (lastPage != null) {
+            // Do not free the page here, while we are locking `SpillableIterator`. The `freePage`
+            // method locks the `TaskMemoryManager`, and it's a bad idea to lock 2 objects in
+            // sequence. We may hit dead lock if another thread locks `TaskMemoryManager` and
+            // `SpillableIterator` in sequence, which may happen in
+            // `TaskMemoryManager.acquireExecutionMemory`.
+            pageToFree = lastPage;
+            lastPage = null;
           }
           numRecords--;
           upstream.loadNext();
+
+          // Keep track of the current base object, base offset, record length, and key prefix,
+          // so that the current record can still be read in case a spill is triggered and we
+          // switch to the spill writer's iterator.
+          currentBaseObject = upstream.getBaseObject();
+          currentBaseOffset = upstream.getBaseOffset();
+          currentRecordLength = upstream.getRecordLength();
+          currentKeyPrefix = upstream.getKeyPrefix();
         }
       } finally {
         if (pageToFree != null) {
@@ -601,22 +611,22 @@ public void loadNext() throws IOException {
 
     @Override
     public Object getBaseObject() {
-      return upstream.getBaseObject();
+      return currentBaseObject;
     }
 
     @Override
     public long getBaseOffset() {
-      return upstream.getBaseOffset();
+      return currentBaseOffset;
     }
 
     @Override
     public int getRecordLength() {
-      return upstream.getRecordLength();
+      return currentRecordLength;
     }
 
     @Override
     public long getKeyPrefix() {
-      return upstream.getKeyPrefix();
+      return currentKeyPrefix;
     }
   }
 
@@ -693,6 +703,11 @@ public int getNumRecords() {
       return numRecords;
     }
 
+    @Override
+    public long getCurrentPageNumber() {
+      return current.getCurrentPageNumber();
+    }
+
     @Override
     public boolean hasNext() {
       while (!current.hasNext() && !iterators.isEmpty()) {
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
index 660eb790a550b..ff641a24a7b3e 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
@@ -330,6 +330,7 @@ public void loadNext() {
     @Override
     public long getBaseOffset() { return baseOffset; }
 
+    @Override
     public long getCurrentPageNumber() {
       return currentPageNumber;
     }
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterIterator.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterIterator.java
index 1b3167fcc250c..d9f22311d07c2 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterIterator.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterIterator.java
@@ -34,4 +34,6 @@ public abstract class UnsafeSorterIterator {
   public abstract long getKeyPrefix();
 
   public abstract int getNumRecords();
+
+  public abstract long getCurrentPageNumber();
 }
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillMerger.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillMerger.java
index ab800288dcb43..f8603c5799e9b 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillMerger.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillMerger.java
@@ -70,6 +70,11 @@ public int getNumRecords() {
         return numRecords;
       }
 
+      @Override
+      public long getCurrentPageNumber() {
+        throw new UnsupportedOperationException();
+      }
+
       @Override
       public boolean hasNext() {
         return !priorityQueue.isEmpty() || (spillReader != null && spillReader.hasNext());
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
index a524c4790407d..db79efd008530 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
@@ -89,6 +89,11 @@ public int getNumRecords() {
     return numRecords;
   }
 
+  @Override
+  public long getCurrentPageNumber() {
+    throw new UnsupportedOperationException();
+  }
+
   @Override
   public boolean hasNext() {
     return (numRecordsRemaining > 0);
diff --git a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
index 43977717f6c97..087d090c1c60e 100644
--- a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
+++ b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
@@ -359,6 +359,39 @@ public void forcedSpillingWithReadIterator() throws Exception {
     assertSpillFilesWereCleanedUp();
   }
 
+  @Test
+  public void forcedSpillingNullsWithReadIterator() throws Exception {
+    final UnsafeExternalSorter sorter = newSorter();
+    long[] record = new long[100];
+    final int recordSize = record.length * 8;
+    final int n = (int) pageSizeBytes / recordSize * 3;
+    for (int i = 0; i < n; i++) {
+      boolean isNull = i % 2 == 0;
+      sorter.insertRecord(record, Platform.LONG_ARRAY_OFFSET, recordSize, 0, isNull);
+    }
+    assertTrue(sorter.getNumberOfAllocatedPages() >= 2);
+
+    UnsafeExternalSorter.SpillableIterator iter =
+            (UnsafeExternalSorter.SpillableIterator) sorter.getSortedIterator();
+    final int numRecordsToReadBeforeSpilling = n / 3;
+    for (int i = 0; i < numRecordsToReadBeforeSpilling; i++) {
+      assertTrue(iter.hasNext());
+      iter.loadNext();
+    }
+
+    assertTrue(iter.spill() > 0);
+    assertEquals(0, iter.spill());
+
+    for (int i = numRecordsToReadBeforeSpilling; i < n; i++) {
+      assertTrue(iter.hasNext());
+      iter.loadNext();
+    }
+    assertFalse(iter.hasNext());
+
+    sorter.cleanupResources();
+    assertSpillFilesWereCleanedUp();
+  }
+
   @Test
   public void forcedSpillingWithNotReadIterator() throws Exception {
     final UnsafeExternalSorter sorter = newSorter();

From a54a6a0113115112f589d09c875f1cba5fd0bbca Mon Sep 17 00:00:00 2001
From: "yi.wu" <yi.wu@databricks.com>
Date: Thu, 17 Sep 2020 11:20:50 +0000
Subject: [PATCH 0052/1009] [SPARK-32287][CORE] Fix flaky
 o.a.s.ExecutorAllocationManagerSuite on GithubActions

### What changes were proposed in this pull request?

To fix the flaky `ExecutorAllocationManagerSuite`: Avoid first `schedule()` invocation after `ExecutorAllocationManager` started.

### Why are the changes needed?

`ExecutorAllocationManagerSuite` is still flaky, see:

https://github.com/apache/spark/pull/29722/checks?check_run_id=1117979237

By checking the below logs, we can see that there's a race condition between thread `pool-1-thread-1-ScalaTest-running` and thread `spark-dynamic-executor-allocation`.  The only possibility of thread `spark-dynamic-executor-allocation` becoming active is the first time invocation of `schedule()`(since the `TEST_SCHEDULE_INTERVAL`(30s) is really long, so it's impossible the second invocation would happen).  Thus, I think we shall avoid the first invocation too.

```scala
20/09/15 12:41:20.831 pool-1-thread-1-ScalaTest-running-ExecutorAllocationManagerSuite INFO ExecutorAllocationManager: Requesting 1 new executor because tasks are backlogged (new desired total will be 2 for resource profile id: 0)
20/09/15 12:41:20.832 spark-dynamic-executor-allocation INFO ExecutorAllocationManager: Requesting 2 new executors because tasks are backlogged (new desired total will be 4 for resource profile id: 0)
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

The flaky can't be reproduced locally so it's hard to say it has been completely fixed by now. We need time to see the result.

Closes #29773 from Ngone51/fix-SPARK-32287.

Authored-by: yi.wu <yi.wu@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/ExecutorAllocationManager.scala    | 13 ++++++-------
 .../org/apache/spark/internal/config/Tests.scala    | 10 +++++-----
 .../spark/ExecutorAllocationManagerSuite.scala      |  9 +++++----
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index 341334c8a29c4..1dd64df106bc2 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -28,7 +28,7 @@ import com.codahale.metrics.{Gauge, MetricRegistry}
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.DECOMMISSION_ENABLED
-import org.apache.spark.internal.config.Tests.TEST_SCHEDULE_INTERVAL
+import org.apache.spark.internal.config.Tests.TEST_DYNAMIC_ALLOCATION_SCHEDULE_ENABLED
 import org.apache.spark.metrics.source.Source
 import org.apache.spark.resource.ResourceProfile.UNKNOWN_RESOURCE_PROFILE_ID
 import org.apache.spark.resource.ResourceProfileManager
@@ -150,11 +150,7 @@ private[spark] class ExecutorAllocationManager(
   private var addTime: Long = NOT_SET
 
   // Polling loop interval (ms)
-  private val intervalMillis: Long = if (Utils.isTesting) {
-      conf.get(TEST_SCHEDULE_INTERVAL)
-    } else {
-      100
-    }
+  private val intervalMillis: Long = 100
 
   // Listener for Spark events that impact the allocation policy
   val listener = new ExecutorAllocationListener
@@ -247,7 +243,10 @@ private[spark] class ExecutorAllocationManager(
         }
       }
     }
-    executor.scheduleWithFixedDelay(scheduleTask, 0, intervalMillis, TimeUnit.MILLISECONDS)
+
+    if (!testing || conf.get(TEST_DYNAMIC_ALLOCATION_SCHEDULE_ENABLED)) {
+      executor.scheduleWithFixedDelay(scheduleTask, 0, intervalMillis, TimeUnit.MILLISECONDS)
+    }
 
     // copy the maps inside synchonize to ensure not being modified
     val (numExecutorsTarget, numLocalityAware) = synchronized {
diff --git a/core/src/main/scala/org/apache/spark/internal/config/Tests.scala b/core/src/main/scala/org/apache/spark/internal/config/Tests.scala
index a1ebe5ce0ca32..7b8b204bab640 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/Tests.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/Tests.scala
@@ -26,11 +26,11 @@ private[spark] object Tests {
     .longConf
     .createWithDefault(Runtime.getRuntime.maxMemory)
 
-  val TEST_SCHEDULE_INTERVAL =
-    ConfigBuilder("spark.testing.dynamicAllocation.scheduleInterval")
-      .version("2.3.0")
-      .longConf
-      .createWithDefault(100)
+  val TEST_DYNAMIC_ALLOCATION_SCHEDULE_ENABLED =
+    ConfigBuilder("spark.testing.dynamicAllocation.schedule.enabled")
+      .version("3.1.0")
+      .booleanConf
+      .createWithDefault(true)
 
   val IS_TESTING = ConfigBuilder("spark.testing")
     .version("1.0.1")
diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
index 3f8cbf59bf527..6a38bba5dd0e5 100644
--- a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
@@ -28,7 +28,7 @@ import org.scalatest.PrivateMethodTester
 import org.apache.spark.executor.ExecutorMetrics
 import org.apache.spark.internal.config
 import org.apache.spark.internal.config.DECOMMISSION_ENABLED
-import org.apache.spark.internal.config.Tests.TEST_SCHEDULE_INTERVAL
+import org.apache.spark.internal.config.Tests.TEST_DYNAMIC_ALLOCATION_SCHEDULE_ENABLED
 import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.resource._
 import org.apache.spark.resource.ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
@@ -1665,9 +1665,10 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
       .set(config.DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT.key, s"${executorIdleTimeout.toString}s")
       .set(config.SHUFFLE_SERVICE_ENABLED, true)
       .set(config.DYN_ALLOCATION_TESTING, true)
-      // SPARK-22864: effectively disable the allocation schedule by setting the period to a
-      // really long value.
-      .set(TEST_SCHEDULE_INTERVAL, 30000L)
+      // SPARK-22864/SPARK-32287: effectively disable the allocation schedule for the tests so that
+      // we won't result in the race condition between thread "spark-dynamic-executor-allocation"
+      // and thread "pool-1-thread-1-ScalaTest-running".
+      .set(TEST_DYNAMIC_ALLOCATION_SCHEDULE_ENABLED, false)
       .set(DECOMMISSION_ENABLED, decommissioningEnabled)
     sparkConf
   }

From 482a79a5e39d54048533d42e1ca1266fbe95fffb Mon Sep 17 00:00:00 2001
From: Chao Sun <sunchao@apache.org>
Date: Thu, 17 Sep 2020 07:50:39 -0700
Subject: [PATCH 0053/1009] [SPARK-24994][SQL][FOLLOW-UP] Handle foldable,
 timezone and cleanup

### What changes were proposed in this pull request?

This is a follow-up on #29565, and addresses a few issues in the last PR:
- style issue pointed by [this comment](https://github.com/apache/spark/pull/29565#discussion_r487646749)
- skip optimization when `fromExp` is foldable (by [this comment](https://github.com/apache/spark/pull/29565#discussion_r487646973)) as there could be more efficient rule to apply for this case.
- pass timezone info to the generated cast on the literal value
- a bunch of cleanups and test improvements

Originally I plan to handle this when implementing [SPARK-32858](https://issues.apache.org/jira/browse/SPARK-32858) but now think it's better to isolate these changes from that.

### Why are the changes needed?

To fix a few left over issues in the above PR.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Added a test for the foldable case. Otherwise relying on existing tests.

Closes #29775 from sunchao/SPARK-24994-followup.

Authored-by: Chao Sun <sunchao@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../optimizer/UnwrapCastInBinaryComparison.scala       |  7 +++++--
 .../optimizer/UnwrapCastInBinaryComparisonSuite.scala  | 10 ++++++----
 .../apache/spark/sql/FileBasedDataSourceSuite.scala    |  5 ++---
 3 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala
index 89f7c0f71b7ac..d0acfe036d443 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala
@@ -184,7 +184,7 @@ object UnwrapCastInBinaryComparison extends Rule[LogicalPlan] {
     } else {
       // This means `value` is within range `(min, max)`. Optimize this by moving the cast to the
       // literal side.
-      val lit = Cast(Literal(value), fromType)
+      val lit = Literal(Cast(Literal(value), fromType).eval(), fromType)
       exp match {
         case GreaterThan(_, _) => GreaterThan(fromExp, lit)
         case GreaterThanOrEqual(_, _) => GreaterThanOrEqual(fromExp, lit)
@@ -202,9 +202,12 @@ object UnwrapCastInBinaryComparison extends Rule[LogicalPlan] {
    * i.e., the conversion is injective. Note this only handles the case when both sides are of
    * integral type.
    */
-  private def canImplicitlyCast(fromExp: Expression, toType: DataType,
+  private def canImplicitlyCast(
+      fromExp: Expression,
+      toType: DataType,
       literalType: DataType): Boolean = {
     toType.sameType(literalType) &&
+      !fromExp.foldable &&
       fromExp.dataType.isInstanceOf[IntegralType] &&
       toType.isInstanceOf[IntegralType] &&
       Cast.canUpCast(fromExp.dataType, toType)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala
index 387964088b808..373c1febd2488 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala
@@ -26,14 +26,14 @@ import org.apache.spark.sql.catalyst.optimizer.UnwrapCastInBinaryComparison._
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
-import org.apache.spark.sql.types.{BooleanType, ByteType, DoubleType, IntegerType}
+import org.apache.spark.sql.types._
 
 class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelper {
 
   object Optimize extends RuleExecutor[LogicalPlan] {
     val batches: List[Batch] =
       Batch("Unwrap casts in binary comparison", FixedPoint(10),
-        NullPropagation, ConstantFolding, UnwrapCastInBinaryComparison) :: Nil
+        NullPropagation, UnwrapCastInBinaryComparison) :: Nil
   }
 
   val testRelation: LocalRelation = LocalRelation('a.short, 'b.float)
@@ -97,7 +97,7 @@ class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelp
     assertEquivalent(Literal(v.toInt) >= castInt(f), trueIfNotNull(f))
     assertEquivalent(Literal(v.toInt) > castInt(f), f =!= v)
 
-    assertEquivalent(Literal(30) <= castInt(f), Literal(30.toShort) <= f)
+    assertEquivalent(Literal(30) <= castInt(f), Literal(30.toShort, ShortType) <= f)
   }
 
   test("unwrap cast should have no effect when input is not integral type") {
@@ -119,10 +119,12 @@ class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelp
     )
   }
 
-  test("unwrap cast should skip when expression is non-deterministic") {
+  test("unwrap cast should skip when expression is non-deterministic or foldable") {
     Seq(positiveInt, negativeInt).foreach (v => {
       val e = Cast(First(f, ignoreNulls = true), IntegerType) <=> v
       assertEquivalent(e, e, evaluate = false)
+      val e2 = Cast(Literal(30.toShort), IntegerType) >= v
+      assertEquivalent(e2, e2, evaluate = false)
     })
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index 8d6d93d13d143..f72e3347510f9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -32,14 +32,13 @@ import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
 import org.apache.spark.sql.TestingUDT.{IntervalUDT, NullData, NullUDT}
 import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.expressions.IntegralLiteralTestUtils.{negativeInt, positiveInt}
-import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical.Filter
 import org.apache.spark.sql.execution.SimpleMode
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.datasources.FilePartition
-import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, DataSourceV2ScanRelation, FileScan}
+import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, FileScan}
 import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan
-import org.apache.spark.sql.execution.datasources.v2.parquet.{ParquetScan, ParquetTable}
+import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan
 import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, SortMergeJoinExec}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf

From 88e87bc8ebfa5aa1a8cc8928672749517ae0c41f Mon Sep 17 00:00:00 2001
From: Udbhav30 <u.agrawal30@gmail.com>
Date: Thu, 17 Sep 2020 09:25:17 -0700
Subject: [PATCH 0054/1009] [SPARK-32887][DOC] Correct the typo for SHOW TABLE

### What changes were proposed in this pull request?
Correct the typo in Show Table document

### Why are the changes needed?
Current Document of Show Table returns in parse error, so it is misleading to users

### Does this PR introduce _any_ user-facing change?
Yes, the document of show table is corrected now

### How was this patch tested?
NA

Closes #29758 from Udbhav30/showtable.

Authored-by: Udbhav30 <u.agrawal30@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/sql-ref-syntax-aux-show-table.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/sql-ref-syntax-aux-show-table.md b/docs/sql-ref-syntax-aux-show-table.md
index 0ce0a3eefa538..3314402ea3e2b 100644
--- a/docs/sql-ref-syntax-aux-show-table.md
+++ b/docs/sql-ref-syntax-aux-show-table.md
@@ -97,7 +97,7 @@ SHOW TABLE EXTENDED LIKE 'employee';
 +--------+---------+-----------+--------------------------------------------------------------+
 
 -- showing the multiple table details with pattern matching
-SHOW TABLE EXTENDED  LIKE `employe*`;
+SHOW TABLE EXTENDED  LIKE 'employe*';
 +--------+---------+-----------+--------------------------------------------------------------+
 |database|tableName|isTemporary|                         information                          |
 +--------+---------+-----------+--------------------------------------------------------------+
@@ -146,7 +146,7 @@ SHOW TABLE EXTENDED  LIKE `employe*`;
 +--------+---------+----------+---------------------------------------------------------------+
   
 -- show partition file system details
-SHOW TABLE EXTENDED  IN default LIKE `employee` PARTITION (`grade=1`);
+SHOW TABLE EXTENDED  IN default LIKE 'employee' PARTITION (grade=1);
 +--------+---------+-----------+--------------------------------------------------------------+
 |database|tableName|isTemporary|                         information                          |
 +--------+---------+-----------+--------------------------------------------------------------+
@@ -169,7 +169,7 @@ SHOW TABLE EXTENDED  IN default LIKE `employee` PARTITION (`grade=1`);
 +--------+---------+-----------+--------------------------------------------------------------+
 
 -- show partition file system details with regex fails as shown below
-SHOW TABLE EXTENDED  IN default LIKE `empl*` PARTITION (`grade=1`);
+SHOW TABLE EXTENDED  IN default LIKE 'empl*' PARTITION (grade=1);
 Error: Error running query: org.apache.spark.sql.catalyst.analysis.NoSuchTableException:
  Table or view 'emplo*' not found in database 'default'; (state=,code=0)
 ```

From a8442c282665c93384d3465c440be588394e8ab4 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Thu, 17 Sep 2020 14:01:52 -0700
Subject: [PATCH 0055/1009] [SPARK-32926][TESTS] Add Scala 2.13 build test in
 GitHub Action

### What changes were proposed in this pull request?

The PR aims to add Scala 2.13 build test coverage into GitHub Action for Apache Spark 3.1.0.

### Why are the changes needed?

The branch is ready for Scala 2.13 and this will prevent any regression.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

Pass the GitHub Action.

Closes #29793 from dongjoon-hyun/SPARK-32926.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .github/workflows/build_and_test.yml | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 1c0f50328ee72..17c040323d515 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -297,3 +297,29 @@ jobs:
         mkdir -p ~/.m2
         ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 install
         rm -rf ~/.m2/repository/org/apache/spark
+
+  scala-213:
+    name: Scala 2.13 build
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout Spark repository
+      uses: actions/checkout@v2
+    - name: Cache Maven local repository
+      uses: actions/cache@v2
+      with:
+        path: ~/.m2/repository
+        key: scala-213-maven-${{ hashFiles('**/pom.xml') }}
+        restore-keys: |
+          scala-213-maven-
+    - name: Install Java 11
+      uses: actions/setup-java@v1
+      with:
+        java-version: 11
+    - name: Build with Maven
+      run: |
+        export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
+        export MAVEN_CLI_OPTS="--no-transfer-progress"
+        mkdir -p ~/.m2
+        ./dev/change-scala-version.sh 2.13
+        ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 -Pscala-2.13 install
+        rm -rf ~/.m2/repository/org/apache/spark

From 5817c584b8a259f5c9be13a26f2adec905474ce6 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Thu, 17 Sep 2020 14:35:01 -0700
Subject: [PATCH 0056/1009] [SPARK-32909][SQL] Pass all `sql/hive-thriftserver`
 module UTs in Scala 2.13

### What changes were proposed in this pull request?

This pr fix failed and aborted cases in sql hive-thriftserver module in Scala 2.13, the main change of this pr as follow:

- Use `s.c.Seq` instead of `Seq` in `HiveResult` because the input type maybe `mutable.ArraySeq`, but `Seq` represent `immutable.Seq` in Scala 2.13.

- Reset classLoader after `HiveMetastoreLazyInitializationSuite` completed because context class loader is `NonClosableMutableURLClassLoader`  in `HiveMetastoreLazyInitializationSuite` running process, and it propagate to `HiveThriftServer2ListenerSuite` trigger following problems in Scala 2.13:

```
HiveThriftServer2ListenerSuite:
*** RUN ABORTED ***
  java.lang.LinkageError: loader constraint violation: loader (instance of net/bytebuddy/dynamic/loading/MultipleParentClassLoader) previously initiated loading for a different type with name "org/apache/hive/service/ServiceStateChangeListener"
  at org.mockito.codegen.HiveThriftServer2$MockitoMock$1850222569.<clinit>(Unknown Source)
  at sun.reflect.GeneratedSerializationConstructorAccessor530.newInstance(Unknown Source)
  at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
  at org.objenesis.instantiator.sun.SunReflectionFactoryInstantiator.newInstance(SunReflectionFactoryInstantiator.java:48)
  at org.objenesis.ObjenesisBase.newInstance(ObjenesisBase.java:73)
  at org.mockito.internal.creation.instance.ObjenesisInstantiator.newInstance(ObjenesisInstantiator.java:19)
  at org.mockito.internal.creation.bytebuddy.SubclassByteBuddyMockMaker.createMock(SubclassByteBuddyMockMaker.java:47)
  at org.mockito.internal.creation.bytebuddy.ByteBuddyMockMaker.createMock(ByteBuddyMockMaker.java:25)
  at org.mockito.internal.util.MockUtil.createMock(MockUtil.java:35)
  at org.mockito.internal.MockitoCore.mock(MockitoCore.java:63)
  ...
```

After this pr `HiveThriftServer2Suites` and `HiveThriftServer2ListenerSuite` was fixed and all 461 test passed

### Why are the changes needed?
We need to support a Scala 2.13 build.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
- Scala 2.12: Pass the Jenkins or GitHub Action

- Scala 2.13: All tests passed.

Do the following:

```
dev/change-scala-version.sh 2.13
mvn clean install -DskipTests -pl sql/hive-thriftserver -am -Phive-thriftserver -Pscala-2.13
mvn test -pl sql/hive-thriftserver -Phive -Phive-thriftserver -Pscala-2.13
```

**Before**

```
HiveThriftServer2ListenerSuite:
*** RUN ABORTED ***
```

**After**

```
Tests: succeeded 461, failed 0, canceled 0, ignored 17, pending 0
All tests passed.
```

Closes #29783 from LuciferYang/sql-thriftserver-tests.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../main/scala/org/apache/spark/sql/execution/HiveResult.scala  | 2 +-
 .../spark/sql/hive/HiveMetastoreLazyInitializationSuite.scala   | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
index 4d388e40fb8bd..dcec0b019da28 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
@@ -106,7 +106,7 @@ object HiveResult {
     case (n, _: NumericType) => n.toString
     case (s: String, StringType) => if (nested) "\"" + s + "\"" else s
     case (interval: CalendarInterval, CalendarIntervalType) => interval.toString
-    case (seq: Seq[_], ArrayType(typ, _)) =>
+    case (seq: scala.collection.Seq[_], ArrayType(typ, _)) =>
       seq.map(v => (v, typ)).map(e => toHiveString(e, true, formatters)).mkString("[", ",", "]")
     case (m: Map[_, _], MapType(kType, vType, _)) =>
       m.map { case (key, value) =>
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreLazyInitializationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreLazyInitializationSuite.scala
index 277df548aefd0..951f92793732f 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreLazyInitializationSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreLazyInitializationSuite.scala
@@ -31,6 +31,7 @@ class HiveMetastoreLazyInitializationSuite extends SparkFunSuite {
       .config("spark.hadoop.hive.metastore.uris", "thrift://127.0.0.1:11111")
       .getOrCreate()
     val originalLevel = org.apache.log4j.Logger.getRootLogger().getLevel
+    val originalClassLoader = Thread.currentThread().getContextClassLoader
     try {
       // Avoid outputting a lot of expected warning logs
       spark.sparkContext.setLogLevel("error")
@@ -64,6 +65,7 @@ class HiveMetastoreLazyInitializationSuite extends SparkFunSuite {
         exceptionString.contains(msg)
       }
     } finally {
+      Thread.currentThread().setContextClassLoader(originalClassLoader)
       spark.sparkContext.setLogLevel(originalLevel.toString)
       spark.stop()
     }

From ea3b979e95f6ce11e7f6e401625a51ede3e649fc Mon Sep 17 00:00:00 2001
From: jzc <jzc@jzcMacBookPro.local>
Date: Thu, 17 Sep 2020 14:50:47 -0700
Subject: [PATCH 0057/1009] [SPARK-32889][SQL] orc table column name supports
 special characters

### What changes were proposed in this pull request?
make orc table column name support special characters like `$`

### Why are the changes needed?
Special characters like `$` are allowed in orc table column name by Hive.
But it's error when execute command "CREATE TABLE tbl(`$` INT, b INT) using orc" in spark. it's not compatible with Hive.

`Column name "$" contains invalid character(s). Please use alias to rename it.;Column name "$" contains invalid character(s). Please use alias to rename it.;org.apache.spark.sql.AnalysisException: Column name "$" contains invalid character(s). Please use alias to rename it.;
at org.apache.spark.sql.execution.datasources.orc.OrcFileFormat$.checkFieldName(OrcFileFormat.scala:51)
at org.apache.spark.sql.execution.datasources.orc.OrcFileFormat$.$anonfun$checkFieldNames$1(OrcFileFormat.scala:59)
at org.apache.spark.sql.execution.datasources.orc.OrcFileFormat$.$anonfun$checkFieldNames$1$adapted(OrcFileFormat.scala:59)
at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:38) `

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Add unit test

Closes #29761 from jzc928/orcColSpecialChar.

Authored-by: jzc <jzc@jzcMacBookPro.local>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../datasources/orc/OrcFileFormat.scala       |  2 +-
 .../spark/sql/FileBasedDataSourceSuite.scala  | 14 ++++
 .../sql/hive/execution/SQLQuerySuite.scala    | 74 ++++++++++++-------
 3 files changed, 64 insertions(+), 26 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
index 69badb4f7d595..8e9a566d45971 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
@@ -45,7 +45,7 @@ import org.apache.spark.util.{SerializableConfiguration, Utils}
 private[sql] object OrcFileFormat {
   private def checkFieldName(name: String): Unit = {
     try {
-      TypeDescription.fromString(s"struct<$name:int>")
+      TypeDescription.fromString(s"struct<`$name`:int>")
     } catch {
       case _: IllegalArgumentException =>
         throw new AnalysisException(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index f72e3347510f9..77e07e5550f35 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -233,6 +233,20 @@ class FileBasedDataSourceSuite extends QueryTest
     }
   }
 
+  Seq("json", "orc").foreach { format =>
+    test(s"SPARK-32889: column name supports special characters using $format") {
+      Seq("$", " ", ",", ";", "{", "}", "(", ")", "\n", "\t", "=").foreach { name =>
+        withTempDir { dir =>
+          val dataDir = new File(dir, "file").getCanonicalPath
+          Seq(1).toDF(name).write.format(format).save(dataDir)
+          val schema = spark.read.format(format).load(dataDir).schema
+          assert(schema.size == 1)
+          assertResult(name)(schema.head.name)
+        }
+      }
+    }
+  }
+
   // Text file format only supports string type
   test("SPARK-24691 error handling for unsupported types - text") {
     withTempDir { dir =>
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 431790e1fbb6d..a69a949e3a3a2 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -2206,39 +2206,63 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
     }
   }
 
-  test("SPARK-21912 ORC/Parquet table should not create invalid column names") {
+  test("SPARK-21912 Parquet table should not create invalid column names") {
     Seq(" ", ",", ";", "{", "}", "(", ")", "\n", "\t", "=").foreach { name =>
-      Seq("ORC", "PARQUET").foreach { source =>
-        withTable("t21912") {
-          val m = intercept[AnalysisException] {
-            sql(s"CREATE TABLE t21912(`col$name` INT) USING $source")
-          }.getMessage
-          assert(m.contains(s"contains invalid character(s)"))
+      val source = "PARQUET"
+      withTable("t21912") {
+        val m = intercept[AnalysisException] {
+          sql(s"CREATE TABLE t21912(`col$name` INT) USING $source")
+        }.getMessage
+        assert(m.contains(s"contains invalid character(s)"))
 
-          val m1 = intercept[AnalysisException] {
-            sql(s"CREATE TABLE t21912 STORED AS $source AS SELECT 1 `col$name`")
-          }.getMessage
-          assert(m1.contains(s"contains invalid character(s)"))
+        val m1 = intercept[AnalysisException] {
+          sql(s"CREATE TABLE t21912 STORED AS $source AS SELECT 1 `col$name`")
+        }.getMessage
+        assert(m1.contains(s"contains invalid character(s)"))
+
+        val m2 = intercept[AnalysisException] {
+          sql(s"CREATE TABLE t21912 USING $source AS SELECT 1 `col$name`")
+        }.getMessage
+        assert(m2.contains(s"contains invalid character(s)"))
 
-          val m2 = intercept[AnalysisException] {
-            sql(s"CREATE TABLE t21912 USING $source AS SELECT 1 `col$name`")
+        withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> "false") {
+          val m3 = intercept[AnalysisException] {
+            sql(s"CREATE TABLE t21912(`col$name` INT) USING hive OPTIONS (fileFormat '$source')")
           }.getMessage
-          assert(m2.contains(s"contains invalid character(s)"))
+          assert(m3.contains(s"contains invalid character(s)"))
+        }
 
-          withSQLConf(HiveUtils.CONVERT_METASTORE_PARQUET.key -> "false") {
-            val m3 = intercept[AnalysisException] {
-              sql(s"CREATE TABLE t21912(`col$name` INT) USING hive OPTIONS (fileFormat '$source')")
-            }.getMessage
-            assert(m3.contains(s"contains invalid character(s)"))
-          }
+        sql(s"CREATE TABLE t21912(`col` INT) USING $source")
+        val m4 = intercept[AnalysisException] {
+          sql(s"ALTER TABLE t21912 ADD COLUMNS(`col$name` INT)")
+        }.getMessage
+        assert(m4.contains(s"contains invalid character(s)"))
+      }
+    }
+  }
 
-          sql(s"CREATE TABLE t21912(`col` INT) USING $source")
-          val m4 = intercept[AnalysisException] {
-            sql(s"ALTER TABLE t21912 ADD COLUMNS(`col$name` INT)")
-          }.getMessage
-          assert(m4.contains(s"contains invalid character(s)"))
+  test("SPARK-32889: ORC table column name supports special characters") {
+    // " " "," is not allowed.
+    Seq("$", ";", "{", "}", "(", ")", "\n", "\t", "=").foreach { name =>
+      val source = "ORC"
+      Seq(s"CREATE TABLE t32889(`$name` INT) USING $source",
+          s"CREATE TABLE t32889 STORED AS $source AS SELECT 1 `$name`",
+          s"CREATE TABLE t32889 USING $source AS SELECT 1 `$name`",
+          s"CREATE TABLE t32889(`$name` INT) USING hive OPTIONS (fileFormat '$source')")
+      .foreach { command =>
+        withTable("t32889") {
+          sql(command)
+          assertResult(name)(
+            sessionState.catalog.getTableMetadata(TableIdentifier("t32889")).schema.fields(0).name)
         }
       }
+
+      withTable("t32889") {
+        sql(s"CREATE TABLE t32889(`col` INT) USING $source")
+        sql(s"ALTER TABLE t32889 ADD COLUMNS(`$name` INT)")
+        assertResult(name)(
+          sessionState.catalog.getTableMetadata(TableIdentifier("t32889")).schema.fields(1).name)
+      }
     }
   }
 

From 4ced58862c707aa916f7a55d15c3887c94c9b210 Mon Sep 17 00:00:00 2001
From: Peter Toth <peter.toth@gmail.com>
Date: Fri, 18 Sep 2020 08:17:23 +0900
Subject: [PATCH 0058/1009] [SPARK-32635][SQL] Fix foldable propagation

### What changes were proposed in this pull request?
This PR rewrites `FoldablePropagation` rule to replace attribute references in a node with foldables coming only from the node's children.

Before this PR in the case of this example (with setting`spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation`):
```scala
val a = Seq("1").toDF("col1").withColumn("col2", lit("1"))
val b = Seq("2").toDF("col1").withColumn("col2", lit("2"))
val aub = a.union(b)
val c = aub.filter($"col1" === "2").cache()
val d = Seq("2").toDF( "col4")
val r = d.join(aub, $"col2" === $"col4").select("col4")
val l = c.select("col2")
val df = l.join(r, $"col2" === $"col4", "LeftOuter")
df.show()
```
foldable propagation happens incorrectly:
```
 Join LeftOuter, (col2#6 = col4#34)                                                              Join LeftOuter, (col2#6 = col4#34)
!:- Project [col2#6]                                                                             :- Project [1 AS col2#6]
 :  +- InMemoryRelation [col1#4, col2#6], StorageLevel(disk, memory, deserialized, 1 replicas)   :  +- InMemoryRelation [col1#4, col2#6], StorageLevel(disk, memory, deserialized, 1 replicas)
 :        +- Union                                                                               :        +- Union
 :           :- *(1) Project [value#1 AS col1#4, 1 AS col2#6]                                    :           :- *(1) Project [value#1 AS col1#4, 1 AS col2#6]
 :           :  +- *(1) Filter (isnotnull(value#1) AND (value#1 = 2))                            :           :  +- *(1) Filter (isnotnull(value#1) AND (value#1 = 2))
 :           :     +- *(1) LocalTableScan [value#1]                                              :           :     +- *(1) LocalTableScan [value#1]
 :           +- *(2) Project [value#10 AS col1#13, 2 AS col2#15]                                 :           +- *(2) Project [value#10 AS col1#13, 2 AS col2#15]
 :              +- *(2) Filter (isnotnull(value#10) AND (value#10 = 2))                          :              +- *(2) Filter (isnotnull(value#10) AND (value#10 = 2))
 :                 +- *(2) LocalTableScan [value#10]                                             :                 +- *(2) LocalTableScan [value#10]
 +- Project [col4#34]                                                                            +- Project [col4#34]
    +- Join Inner, (col2#6 = col4#34)                                                               +- Join Inner, (col2#6 = col4#34)
       :- Project [value#31 AS col4#34]                                                                :- Project [value#31 AS col4#34]
       :  +- LocalRelation [value#31]                                                                  :  +- LocalRelation [value#31]
       +- Project [col2#6]                                                                             +- Project [col2#6]
          +- Union false, false                                                                           +- Union false, false
             :- Project [1 AS col2#6]                                                                        :- Project [1 AS col2#6]
             :  +- LocalRelation [value#1]                                                                   :  +- LocalRelation [value#1]
             +- Project [2 AS col2#15]                                                                       +- Project [2 AS col2#15]
                +- LocalRelation [value#10]                                                                     +- LocalRelation [value#10]

```
and so the result is wrong:
```
+----+----+
|col2|col4|
+----+----+
|   1|null|
+----+----+
```

After this PR foldable propagation will not happen incorrectly and the result is correct:
```
+----+----+
|col2|col4|
+----+----+
|   2|   2|
+----+----+
```

### Why are the changes needed?
To fix a correctness issue.

### Does this PR introduce _any_ user-facing change?
Yes, fixes a correctness issue.

### How was this patch tested?
Existing and new UTs.

Closes #29771 from peter-toth/SPARK-32635-fix-foldable-propagation.

Authored-by: Peter Toth <peter.toth@gmail.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../catalyst/expressions/AttributeMap.scala   |   2 +
 .../catalyst/expressions/AttributeMap.scala   |   2 +
 .../sql/catalyst/optimizer/expressions.scala  | 121 +++++++++++-------
 .../org/apache/spark/sql/DataFrameSuite.scala |  12 ++
 4 files changed, 88 insertions(+), 49 deletions(-)

diff --git a/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala b/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala
index 75a8bec018a1f..42b92d4593c77 100644
--- a/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala
+++ b/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala
@@ -26,6 +26,8 @@ object AttributeMap {
   def apply[A](kvs: Seq[(Attribute, A)]): AttributeMap[A] = {
     new AttributeMap(kvs.map(kv => (kv._1.exprId, kv)).toMap)
   }
+
+  def empty[A]: AttributeMap[A] = new AttributeMap(Map.empty)
 }
 
 class AttributeMap[A](val baseMap: Map[ExprId, (Attribute, A)])
diff --git a/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala b/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala
index 4caa3d0461875..e6b53e3e6548f 100644
--- a/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala
+++ b/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala
@@ -26,6 +26,8 @@ object AttributeMap {
   def apply[A](kvs: Seq[(Attribute, A)]): AttributeMap[A] = {
     new AttributeMap(kvs.map(kv => (kv._1.exprId, kv)).toMap)
   }
+
+  def empty[A]: AttributeMap[A] = new AttributeMap(Map.empty)
 }
 
 class AttributeMap[A](val baseMap: Map[ExprId, (Attribute, A)])
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index b2fc3936e1a29..c4e4b25d570dd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -624,59 +624,82 @@ object NullPropagation extends Rule[LogicalPlan] {
  */
 object FoldablePropagation extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = {
-    var foldableMap = AttributeMap(plan.flatMap {
-      case Project(projectList, _) => projectList.collect {
-        case a: Alias if a.child.foldable => (a.toAttribute, a)
-      }
-      case _ => Nil
-    })
-    val replaceFoldable: PartialFunction[Expression, Expression] = {
-      case a: AttributeReference if foldableMap.contains(a) => foldableMap(a)
+    CleanupAliases(propagateFoldables(plan)._1)
+  }
+
+  private def propagateFoldables(plan: LogicalPlan): (LogicalPlan, AttributeMap[Alias]) = {
+    plan match {
+      case p: Project =>
+        val (newChild, foldableMap) = propagateFoldables(p.child)
+        val newProject =
+          replaceFoldable(p.withNewChildren(Seq(newChild)).asInstanceOf[Project], foldableMap)
+        val newFoldableMap = AttributeMap(newProject.projectList.collect {
+          case a: Alias if a.child.foldable => (a.toAttribute, a)
+        })
+        (newProject, newFoldableMap)
+
+      // We can not replace the attributes in `Expand.output`. If there are other non-leaf
+      // operators that have the `output` field, we should put them here too.
+      case e: Expand =>
+        val (newChild, foldableMap) = propagateFoldables(e.child)
+        val expandWithNewChildren = e.withNewChildren(Seq(newChild)).asInstanceOf[Expand]
+        val newExpand = if (foldableMap.isEmpty) {
+          expandWithNewChildren
+        } else {
+          val newProjections = expandWithNewChildren.projections.map(_.map(_.transform {
+            case a: AttributeReference if foldableMap.contains(a) => foldableMap(a)
+          }))
+          if (newProjections == expandWithNewChildren.projections) {
+            expandWithNewChildren
+          } else {
+            expandWithNewChildren.copy(projections = newProjections)
+          }
+        }
+        (newExpand, foldableMap)
+
+      case u: UnaryNode if canPropagateFoldables(u) =>
+        val (newChild, foldableMap) = propagateFoldables(u.child)
+        val newU = replaceFoldable(u.withNewChildren(Seq(newChild)), foldableMap)
+        (newU, foldableMap)
+
+      // Join derives the output attributes from its child while they are actually not the
+      // same attributes. For example, the output of outer join is not always picked from its
+      // children, but can also be null. We should exclude these miss-derived attributes when
+      // propagating the foldable expressions.
+      // TODO(cloud-fan): It seems more reasonable to use new attributes as the output attributes
+      // of outer join.
+      case j: Join =>
+        val (newChildren, foldableMaps) = j.children.map(propagateFoldables).unzip
+        val foldableMap = AttributeMap(
+          foldableMaps.foldLeft(Iterable.empty[(Attribute, Alias)])(_ ++ _.baseMap.values).toSeq)
+        val newJoin =
+          replaceFoldable(j.withNewChildren(newChildren).asInstanceOf[Join], foldableMap)
+        val missDerivedAttrsSet: AttributeSet = AttributeSet(newJoin.joinType match {
+          case _: InnerLike | LeftExistence(_) => Nil
+          case LeftOuter => newJoin.right.output
+          case RightOuter => newJoin.left.output
+          case FullOuter => newJoin.left.output ++ newJoin.right.output
+        })
+        val newFoldableMap = AttributeMap(foldableMap.baseMap.values.filterNot {
+          case (attr, _) => missDerivedAttrsSet.contains(attr)
+        }.toSeq)
+        (newJoin, newFoldableMap)
+
+      // For other plans, they are not safe to apply foldable propagation, and they should not
+      // propagate foldable expressions from children.
+      case o =>
+        val newOther = o.mapChildren(propagateFoldables(_)._1)
+        (newOther, AttributeMap.empty)
     }
+  }
 
+  private def replaceFoldable(plan: LogicalPlan, foldableMap: AttributeMap[Alias]): plan.type = {
     if (foldableMap.isEmpty) {
       plan
     } else {
-      CleanupAliases(plan.transformUp {
-        // We can only propagate foldables for a subset of unary nodes.
-        case u: UnaryNode if foldableMap.nonEmpty && canPropagateFoldables(u) =>
-          u.transformExpressions(replaceFoldable)
-
-        // Join derives the output attributes from its child while they are actually not the
-        // same attributes. For example, the output of outer join is not always picked from its
-        // children, but can also be null. We should exclude these miss-derived attributes when
-        // propagating the foldable expressions.
-        // TODO(cloud-fan): It seems more reasonable to use new attributes as the output attributes
-        // of outer join.
-        case j @ Join(left, right, joinType, _, _) if foldableMap.nonEmpty =>
-          val newJoin = j.transformExpressions(replaceFoldable)
-          val missDerivedAttrsSet: AttributeSet = AttributeSet(joinType match {
-            case _: InnerLike | LeftExistence(_) => Nil
-            case LeftOuter => right.output
-            case RightOuter => left.output
-            case FullOuter => left.output ++ right.output
-          })
-          foldableMap = AttributeMap(foldableMap.baseMap.values.filterNot {
-            case (attr, _) => missDerivedAttrsSet.contains(attr)
-          }.toSeq)
-          newJoin
-
-        // We can not replace the attributes in `Expand.output`. If there are other non-leaf
-        // operators that have the `output` field, we should put them here too.
-        case expand: Expand if foldableMap.nonEmpty =>
-          expand.copy(projections = expand.projections.map { projection =>
-            projection.map(_.transform(replaceFoldable))
-          })
-
-        // For other plans, they are not safe to apply foldable propagation, and they should not
-        // propagate foldable expressions from children.
-        case other if foldableMap.nonEmpty =>
-          val childrenOutputSet = AttributeSet(other.children.flatMap(_.output))
-          foldableMap = AttributeMap(foldableMap.baseMap.values.filterNot {
-            case (attr, _) => childrenOutputSet.contains(attr)
-          }.toSeq)
-          other
-      })
+      plan transformExpressions {
+        case a: AttributeReference if foldableMap.contains(a) => foldableMap(a)
+      }
     }
   }
 
@@ -684,7 +707,7 @@ object FoldablePropagation extends Rule[LogicalPlan] {
    * List of all [[UnaryNode]]s which allow foldable propagation.
    */
   private def canPropagateFoldables(u: UnaryNode): Boolean = u match {
-    case _: Project => true
+    // Handling `Project` is moved to `propagateFoldables`.
     case _: Filter => true
     case _: SubqueryAlias => true
     case _: Aggregate => true
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index d95f09a4cc839..321f4966178d7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -2555,6 +2555,18 @@ class DataFrameSuite extends QueryTest
     val df = Seq(0.0 -> -0.0).toDF("pos", "neg")
     checkAnswer(df.select($"pos" > $"neg"), Row(false))
   }
+
+  test("SPARK-32635: Replace references with foldables coming only from the node's children") {
+    val a = Seq("1").toDF("col1").withColumn("col2", lit("1"))
+    val b = Seq("2").toDF("col1").withColumn("col2", lit("2"))
+    val aub = a.union(b)
+    val c = aub.filter($"col1" === "2").cache()
+    val d = Seq("2").toDF("col4")
+    val r = d.join(aub, $"col2" === $"col4").select("col4")
+    val l = c.select("col2")
+    val df = l.join(r, $"col2" === $"col4", "LeftOuter")
+    checkAnswer(df, Row("2", "2"))
+  }
 }
 
 case class GroupByKey(a: Int, b: Int)

From 68e0d5f2962d4045bd159b5430a8f1ae2dfde4c3 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Fri, 18 Sep 2020 08:29:29 +0900
Subject: [PATCH 0059/1009] [SPARK-32902][SQL] Logging plan changes for AQE

### What changes were proposed in this pull request?

Recently, we added code to log plan changes in the preparation phase in `QueryExecution` for execution (https://github.com/apache/spark/pull/29544). This PR intends to apply the same fix  for logging plan changes in AQE.

### Why are the changes needed?

Easy debugging for AQE plans

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Added unit tests.

Closes #29774 from maropu/PlanChangeLogForAQE.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../adaptive/AdaptiveSparkPlanExec.scala      | 45 +++++++++++++++----
 .../adaptive/AdaptiveQueryExecSuite.scala     | 20 +++++++++
 2 files changed, 56 insertions(+), 9 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index 014358b663bbb..6c197fedd8c56 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ReturnAnswer}
-import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor}
+import org.apache.spark.sql.catalyst.rules.{PlanChangeLogger, Rule}
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec._
@@ -75,6 +75,8 @@ case class AdaptiveSparkPlanExec(
     case _ => logDebug(_)
   }
 
+  @transient private val planChangeLogger = new PlanChangeLogger[SparkPlan]()
+
   // The logical plan optimizer for re-optimizing the current logical plan.
   @transient private val optimizer = new AQEOptimizer(conf)
 
@@ -109,7 +111,8 @@ case class AdaptiveSparkPlanExec(
 
   @transient private val costEvaluator = SimpleCostEvaluator
 
-  @transient private val initialPlan = applyPhysicalRules(inputPlan, queryStagePreparationRules)
+  @transient private val initialPlan = applyPhysicalRules(
+    inputPlan, queryStagePreparationRules, Some((planChangeLogger, "AQE Preparations")))
 
   @volatile private var currentPhysicalPlan = initialPlan
 
@@ -231,7 +234,9 @@ case class AdaptiveSparkPlanExec(
 
       // Run the final plan when there's no more unfinished stages.
       currentPhysicalPlan = applyPhysicalRules(
-        result.newPlan, queryStageOptimizerRules ++ postStageCreationRules)
+        result.newPlan,
+        queryStageOptimizerRules ++ postStageCreationRules,
+        Some((planChangeLogger, "AQE Final Query Stage Optimization")))
       isFinalPlan = true
       executionId.foreach(onUpdatePlan(_, Seq(currentPhysicalPlan)))
       currentPhysicalPlan
@@ -413,11 +418,14 @@ case class AdaptiveSparkPlanExec(
   }
 
   private def newQueryStage(e: Exchange): QueryStageExec = {
-    val optimizedPlan = applyPhysicalRules(e.child, queryStageOptimizerRules)
+    val optimizedPlan = applyPhysicalRules(
+      e.child, queryStageOptimizerRules, Some((planChangeLogger, "AQE Query Stage Optimization")))
     val queryStage = e match {
       case s: ShuffleExchangeLike =>
         val newShuffle = applyPhysicalRules(
-          s.withNewChildren(Seq(optimizedPlan)), postStageCreationRules)
+          s.withNewChildren(Seq(optimizedPlan)),
+          postStageCreationRules,
+          Some((planChangeLogger, "AQE Post Stage Creation")))
         if (!newShuffle.isInstanceOf[ShuffleExchangeLike]) {
           throw new IllegalStateException(
             "Custom columnar rules cannot transform shuffle node to something else.")
@@ -425,7 +433,9 @@ case class AdaptiveSparkPlanExec(
         ShuffleQueryStageExec(currentStageId, newShuffle)
       case b: BroadcastExchangeLike =>
         val newBroadcast = applyPhysicalRules(
-          b.withNewChildren(Seq(optimizedPlan)), postStageCreationRules)
+          b.withNewChildren(Seq(optimizedPlan)),
+          postStageCreationRules,
+          Some((planChangeLogger, "AQE Post Stage Creation")))
         if (!newBroadcast.isInstanceOf[BroadcastExchangeLike]) {
           throw new IllegalStateException(
             "Custom columnar rules cannot transform broadcast node to something else.")
@@ -534,7 +544,10 @@ case class AdaptiveSparkPlanExec(
     logicalPlan.invalidateStatsCache()
     val optimized = optimizer.execute(logicalPlan)
     val sparkPlan = context.session.sessionState.planner.plan(ReturnAnswer(optimized)).next()
-    val newPlan = applyPhysicalRules(sparkPlan, preprocessingRules ++ queryStagePreparationRules)
+    val newPlan = applyPhysicalRules(
+      sparkPlan,
+      preprocessingRules ++ queryStagePreparationRules,
+      Some((planChangeLogger, "AQE Replanning")))
     (newPlan, optimized)
   }
 
@@ -630,8 +643,22 @@ object AdaptiveSparkPlanExec {
   /**
    * Apply a list of physical operator rules on a [[SparkPlan]].
    */
-  def applyPhysicalRules(plan: SparkPlan, rules: Seq[Rule[SparkPlan]]): SparkPlan = {
-    rules.foldLeft(plan) { case (sp, rule) => rule.apply(sp) }
+  def applyPhysicalRules(
+      plan: SparkPlan,
+      rules: Seq[Rule[SparkPlan]],
+      loggerAndBatchName: Option[(PlanChangeLogger[SparkPlan], String)] = None): SparkPlan = {
+    if (loggerAndBatchName.isEmpty) {
+      rules.foldLeft(plan) { case (sp, rule) => rule.apply(sp) }
+    } else {
+      val (logger, batchName) = loggerAndBatchName.get
+      val newPlan = rules.foldLeft(plan) { case (sp, rule) =>
+        val result = rule.apply(sp)
+        logger.logRule(rule.ruleName, sp, result)
+        result
+      }
+      logger.logBatch(batchName, plan, newPlan)
+      newPlan
+    }
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index 7e7248c312e11..8799dbb14ef34 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -1238,4 +1238,24 @@ class AdaptiveQueryExecSuite
       }
     }
   }
+
+  test("Logging plan changes for AQE") {
+    val testAppender = new LogAppender("plan changes")
+    withLogAppender(testAppender) {
+      withSQLConf(
+          SQLConf.PLAN_CHANGE_LOG_LEVEL.key -> "INFO",
+          SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+          SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80") {
+        sql("SELECT * FROM testData JOIN testData2 ON key = a " +
+          "WHERE value = (SELECT max(a) FROM testData3)").collect()
+      }
+      Seq("=== Result of Batch AQE Preparations ===",
+          "=== Result of Batch AQE Post Stage Creation ===",
+          "=== Result of Batch AQE Replanning ===",
+          "=== Result of Batch AQE Query Stage Optimization ===",
+          "=== Result of Batch AQE Final Query Stage Optimization ===").foreach { expectedMsg =>
+        assert(testAppender.loggingEvents.exists(_.getRenderedMessage.contains(expectedMsg)))
+      }
+    }
+  }
 }

From 9d6221b9368ab3d23c63a9f24a2ba42a6f709d54 Mon Sep 17 00:00:00 2001
From: zhengruifeng <ruifengz@foxmail.com>
Date: Fri, 18 Sep 2020 08:57:52 +0800
Subject: [PATCH 0060/1009] [SPARK-18409][ML][FOLLOWUP] LSH
 approxNearestNeighbors optimization 2

### What changes were proposed in this pull request?
1, simplify the aggregation by get `count` via `summary.count`
2, ignore nan values like the old impl:
```
      val relativeError = 0.05
      val approxQuantile = numNearestNeighbors.toDouble / count + relativeError
      val modelDatasetWithDist = modelDataset.withColumn(distCol, hashDistCol)
      if (approxQuantile >= 1) {
        modelDatasetWithDist
      } else {
        val hashThreshold = modelDatasetWithDist.stat
          .approxQuantile(distCol, Array(approxQuantile), relativeError)
        // Filter the dataset where the hash value is less than the threshold.
        modelDatasetWithDist.filter(hashDistCol <= hashThreshold(0))
      }
```

### Why are the changes needed?
simplify the aggregation

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
existing testsuites

Closes #29778 from zhengruifeng/lsh_nit.

Authored-by: zhengruifeng <ruifengz@foxmail.com>
Signed-off-by: zhengruifeng <ruifengz@foxmail.com>
---
 .../org/apache/spark/ml/feature/LSH.scala     | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala
index 6d5c7c50dbacc..9d647f3e514c5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala
@@ -139,21 +139,21 @@ private[ml] abstract class LSHModel[T <: LSHModel[T]]
       val modelDatasetWithDist = modelDataset.withColumn(distCol, hashDistCol)
 
       val relativeError = 0.05
-      val (summary, count) = modelDatasetWithDist.select(distCol)
-        .rdd
-        .mapPartitions { iter =>
-          if (iter.hasNext) {
-            var s = new QuantileSummaries(
-              QuantileSummaries.defaultCompressThreshold, relativeError)
-            var c = 0L
-            while (iter.hasNext) {
-              val Row(dist: Double) = iter.next
-              s = s.insert(dist)
-              c += 1
+      val summary = modelDatasetWithDist.select(distCol).rdd.mapPartitions { iter =>
+        if (iter.hasNext) {
+          var s = new QuantileSummaries(
+            QuantileSummaries.defaultCompressThreshold, relativeError)
+          while (iter.hasNext) {
+            val row = iter.next
+            if (!row.isNullAt(0)) {
+              val v = row.getDouble(0)
+              if (!v.isNaN) s = s.insert(v)
             }
-            Iterator.single((s.compress, c))
-          } else Iterator.empty
-        }.treeReduce { case ((s1, c1), (s2, c2)) => (s1.merge(s2), c1 + c2) }
+          }
+          Iterator.single(s.compress)
+        } else Iterator.empty
+      }.treeReduce((s1, s2) => s1.merge(s2))
+      val count = summary.count
 
       // Compute threshold to get around k elements.
       // To guarantee to have enough neighbors in one pass, we need (p - err) * N >= M

From 75dd86400c3c2348a4139586fbbead840512b909 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Fri, 18 Sep 2020 10:47:06 +0900
Subject: [PATCH 0061/1009] [SPARK-32908][SQL] Fix target error calculation in
 `percentile_approx()`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
1. Change the target error calculation according to the paper [Space-Efficient Online Computation of Quantile Summaries](http://infolab.stanford.edu/~datar/courses/cs361a/papers/quantiles.pdf). It says that the error `e = max(gi, deltai)/2` (see the page 59). Also this has clear explanation [ε-approximate quantiles](http://www.mathcs.emory.edu/~cheung/Courses/584/Syllabus/08-Quantile/Greenwald.html#proofprop1).
2. Added a test to check different accuracies.
3. Added an input CSV file `percentile_approx-input.csv.bz2` to the resource folder `sql/catalyst/src/main/resources` for the test.

### Why are the changes needed?
To fix incorrect percentile calculation, see an example in SPARK-32908.

### Does this PR introduce _any_ user-facing change?
Yes

### How was this patch tested?
- By running existing tests in `QuantileSummariesSuite` and in `ApproximatePercentileQuerySuite`.
- Added new test `SPARK-32908: maximum target error in percentile_approx` to `ApproximatePercentileQuerySuite`.

Closes #29784 from MaxGekk/fix-percentile_approx-2.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../sql/catalyst/util/QuantileSummaries.scala |   2 +-
 .../test-data/percentile_approx-input.csv.bz2 | Bin 0 -> 124614 bytes
 .../sql/ApproximatePercentileQuerySuite.scala |  21 +++++++++++++++++-
 3 files changed, 21 insertions(+), 2 deletions(-)
 create mode 100644 sql/core/src/test/resources/test-data/percentile_approx-input.csv.bz2

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala
index 2797a40614504..ae7066d87d530 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala
@@ -254,7 +254,7 @@ class QuantileSummaries(
 
     // Target rank
     val rank = math.ceil(quantile * count).toLong
-    val targetError = relativeError * count
+    val targetError = sampled.map(s => s.delta + s.g).max / 2
     // Minimum rank at current sample
     var minRank = 0L
     var i = 0
diff --git a/sql/core/src/test/resources/test-data/percentile_approx-input.csv.bz2 b/sql/core/src/test/resources/test-data/percentile_approx-input.csv.bz2
new file mode 100644
index 0000000000000000000000000000000000000000..f85e2896b3a89562bc5c49ca7eb915997297d9a4
GIT binary patch
literal 124614
zcmYhhWl$Rcv#8xtpt!rcyGx+O-3bmw0|A1&^t8CU26qSq3k2sa?(UG#;_lkg-ud1;
z^PQQyfA*PK`?IsNJG07GZnC0EmRu&5+LvX-7^LNs@7|I8&o8GB1L_sXsbr1jfvz?O
zII?rcuSm+Y6CUicMKz`5Wl=4+W?JtlnwLu<w(TC^VqMe-{I=EHTk<X0$}A|@(}<#>
zvs2AzC)g|O`e_ck_Nu{FMN(JQ<73dN8WM_+jDHV7PA=t&AQ3x#CR<Tgg}sNpGg|JQ
zP&3f-mKndbfO{^|)4JkfS1MejcP$T`!)|TukS%U1vEkJan4lvt`U0MY)C)*zZ*P6h
z)`J_XK&&Y%ojy=3)lf80DoL$YuYoxYJdGIJ2hc*GRgM*@j!Se~5#*vc#hDT`=Ak{-
zd}3R><2iQiwKdRd9O!~v&o=k-ugr!lTkGK!2}0d#v>+8+^Ot<NZ8ckxo_dXvZ949h
z*^so7Ei42V)EJ!mnG+`yf{^br9YewrdgYVQ==B*0)QM`&L>rtdRir}?H@-Aob5k!|
z%2(Uzg&}LqwzWXxX4^o>2R+o;92;~U!7nkHH?vdHst{}<DYb6p(gXxT;6|08X=@J%
z-;%pa%K8)#irfan@jbEJcJ$b-Mz?CD7H1$ZttvcqajPXyv*Gp4bq}fKj$jWYw^F^I
z{c=0P5{+syceT~lvuFZW!aIR5x8+t39!uj{2-3+DjVAS*^q>Q#B*EPwNQjY@6uPgI
zViiP(Zre=ESu6A)w3cumY2DMl^c+G?>mTO}6x7fb@gdimAW+y^+$?eqQVY98Idnp&
zKtps?aG+rp+{1Gt*f%w=wNq*+Yn|K$o@<$Qwbp5hgmm&-X;ID9I8zc*Re+x6U7E%b
z){qX1rPgSRgv-?cIuEnlLUFU@Ibb!cqe8vPZHH~j?M0g0ti9DzoMo%^Y>3BZN2-X$
z1XK$}<`sbvo>Iv2CXl*Uisu0g+3Uh<>z<RE#Mc!ZWDSAMSn@V{R&F<alPuqYm|XyE
zB>Fy@9@Wy?gFNV^Am5ONi|e!Bt!Z0nJM$z#?H%i8o^XUxN2ePyx5*lk*#;?MZ8EB+
z!Z$JQsO{2Q+_8k|pU4%a+M<FGmW_lpADs0B%HI92D1Q52diRdv9nZUWcv$b=m6QEn
zdiM_F8%ECmZ|3>0^_}7W`oi%3FY@3S@zwu%d}(Fl!!k}J#o;hE{f@(;T%)3!0VAs*
zf#q^&^S#3}R;|EDZ6iyhsF7IHgmugr!GPM;HH2;u*#Bnxe>5LVo1%s<%SesR5$9b3
zX0CB{B~A{(cy%g01h0}JNeihAB&*TPh$YDq1e4_&SE#X?MgKqH|HY+DzC=o1O}-=^
zPNOzhRyEZ&Hge7gp=Da5iU-82u200N;eci3;%H&=Dd~VJ*|gU`zr%Y+0etrki-LzN
zD;8*^%)tR92G#IsYmyq9CJ92Td&QLbKEtA{-I5@%m>nI@SZ(AFEH*($C1h4k1F1HU
zhoZ)mSBnK^wq5<59b(*IYKm2Bs)mPw!&S%$1R8JdfYtOQl6VZ=)>sL_NL(HsRX9zS
zaa62X2h@cElIww2UZDg8a$>`g)9j#*MK#J+0$x0|NC(vh?Enc}WHng>GQCRcouQGf
zG$?4XQpuD~u%Sv1%*%^3@vPhFgOry`%&1rIV0^A&cbq$e>2JFUCb{WBE0$p9ri`1%
z+wkj`dhn^?&U%kkzzUoV8)2EZ$(adnG%LMqUi(({xw13a8X}3L+sc_%F&6v=rs!Bv
z?)?yG6)TA(Y+77~N+KolzDdm?+l_doMpJf3-d)s}G+}-tG}0y`t<qd#LDJ0_mrEK<
zRfd`pn%p~belY!8TIuq6_4fPcoW~drT#1AjYhjdnzb~9HT#3fYV9w#sj}Vqh=|fuE
ztw+9pvr&Pk0BX}p%y?-T7ec3fYL*$9#k;ZE%O(ZUUF~z~zDh@mVZhlPW=24wCet|0
zT5R{}RizQ*_xGU5*y5^rreI~SRL`WNb6bLQuPMuu2OZd5Z)R`mi_LuHs7#-T%Tc@u
zN4{s+gL6D|%oEHbAS@$=L!egsG;Q^VMP1GFha78jY%JxS#6Wt`>*lxK7y!U_6OdPP
zdF@;!+-1L_>2RY24NR5k?($k_!M!Fs`Mi;J_uS^67s?)yGET}=uh@*LUfJz7`oh!8
z%8FChd8PO<O=G=rkLUw23TLZ8;2FeMUzU_jH#jaD<d0`(W@!`u`Hgm!X|@FLlfx#2
z?n@~A1b?2tB@(K-6U7_zz#FJnS_A_eYpvin>avNCC3T2#t?sZf*UVCL0hFjTTyp}R
zZMwESYr=kQ@d>9arCM|g9A(;}pz&>9^mWowmEUXU*L@3J<a}sc3m$HNa_w72EIDQH
zKK7BXi3yAKxO4Jf4CD>eYc~~6F$*YHHtMt5N8Q17N!beeLmgy2%k23>;)I0RkHDkv
zT?pYxXN(ot5&XY8W|Xf~5aQ?H?LoE~hFw&=Yb*QqPKr^Pctq(_h#WiAJlasllIx5w
zFKKuH^^(Kh5dNk8M%d$r)P58D3vwegWG}|}e8n-YSWzL}nteY)s-RcwiGZ!C#eX+o
zr6)u!&_gCzGXw&-lVyb?f6Huqakm`}JTAl+7W=XHFme5Zw+wWc)DN^-0~fT{zKT`@
z%UO@Se{8!@nFU?-Ddb4CHEc)(nkjuaeMQfG4N>9dAQJq<7hiPllPfeM-Wu8cNoHr<
zh@drTa8qs+?*#7htT(TtW85VVMn%3iOi|=gdA6_KkM)@NPE(=dlvO6N=p4wW7v;UN
zAX8r?JD;%)bhNH8A&@`(+pjWrN^&yK2+Q?YtG%#Hs!1-k!?*@1_6oPuKZTY^MC<Aq
z#*(vU_=21$`ule}Vf|kmLq8XGEI4(yeT0qMMTvkd6~bN1@?PWy=aonpUR<k6)z{qb
zYW~{;7kP_|>sz%PvG*>S5J9?-bDi~`l317B37~pxL4D{&pxy`FPmpfh^BN`6Y^Rt5
zw$FoW$q80mkMsI+M}<AM;83-eptOtK@5QU~6FwDC+D0^7aN^Lp0DRr+ck5K-2~5*2
z3K1x@L|224BdSWR8MAc{aq+(vPIL$;T;~TQ`-B^0ln!h%`JSP-Ai1$8h8}!tPP$J;
zNxpP0oQ+8ehC5fYst&Q0S~5LoE1&#A_a#j9Tvyk}d{Dj@wvwIa$pA<TZ?_EehEXW?
zF+AXl=V5PEtiMojaqs&f3ilbBQq9aV{^+6Z`loR;3HkSBRfVrEB(q^?{?R-B58mph
zk)GL`nH*fmsK4tj^cfVXTXy=pQakvKJ@kBTiC8|2=930Rxk@F*?J;Z%iEWz~)c)4)
zQ6&0k$8a$Z6Yh*F)dVUEf8*}*@Dj88Ey#*jNM=%&Z5@JkHCTL!OzPZ|ai9+#so#%q
zEIzCCV?@0S+~6tC7BH#gNj86Yfzr<?MEX(=pK3A5f5;mwnn)7)PMl6Cj_r(EC>D}9
z9VB|HuxfMb$v&zc9Cr8vKN0s&T!vP16tSCTQ=SEL3{%b)+aou}8LvDQnmCJwd)BQu
z&M?whV{T;EeqZeKR5ipA%I8xs9@$KrM(Rc5*krKMb2@eo_Gl{c-OWdE1<Qsf+x1WQ
z&D>B)kr|3w62gPpAYASZ<`uWdrYydo@Vpd!8%cQ8OyCEK<oOq!ZU1iGBBf%xX}cdP
zI=%!)&!li1RpZ@>NKwc233ewYkZEdicCOp_LVcg=CF87iM?#geo2twCP5+`sOI2MD
z#WD6UvO^*#<Y(xh%f~g5A8=yVE`cpbOt>lQ{cbxk#UI7_QdK>-{8IM!A8cDSDe{?c
zBNO*)gg;079opjSHE9-aIXq#$4om!jlgEt-lP#FFrd0>i80ePj8K2vzCS!@)eJMBu
z2rR|W!1gv&G#!>F*qp5VjZhil5I+(#tD~#yXLlFURO_X`yeO4aDP%(`f4btMk%HV2
z8~M}*fGxpt(9g|13v(qzh2z8N!L-noNHrNC*V&6K7jU{8+1CF*f$)RV@Dxu<JB5$w
zi@Dvb58^v*4T-Z4wEo(c=N1I%U6$p&u7yV~+fUoS+*x>@+0+KSWkKs!-|JFEU4P_|
z0Zc}=YSuC%7RHUBt4xJ0OL~SCHH<xES2JTR2JvhPLRi?d_>6UEPa!V{jPeS<=Q7*=
zLd|$u=k{efJ#PLr9y>svQ$mbcx8`9Dp@S?!&j?@gSTSMv@~r2Ws=li%ZK32wgI6J`
z`%7Q)L80`NenLqs(OZW}4<@G<eha<|ps-M=;cW99XBZdrUmf2wA&ScYSH3qnvLAui
zRHY(kC$`YZ&*%6YVua(*Nm?K$w7UELj7gEu*{G$HA~lKTOI?Bbe&3CPz5?ygZC41(
zv=(D&iZ>g<m}pX=Y!yhJxF@;ZzN9!^0IxzL<E7aoKg}5TK)dho2dCvp`br>q+*=(^
z0JGm~l9$H`@R+S%rfD#CYeatFlJZN%A4$Ns1A~IkU@Y5Of^5aN$i{(ab=^sM+2Hu}
zxYB{2q6&sT_-Y}KwrxTd>u=--TO3*4x~FjwtkTLq?6Lt8*kqixLp23qd@dTp?Gq#)
z1=j*Tg-@peDlfXpt&Fc;V70=m`Z^3OuXBwK4@=9JWH>4Ue@S!wu4qm-z8SFEcx$Gf
zL^jYircr4KT;<h?f5@}Kn4yf&C4G^TkZwzsx>_QVstXShcUt{FkRsz=R}hv7bJOsN
z*nNbdy6RJ&@wk@3CEtukLK!HtaNB>p|M}`o^H*RC5CDtd69U6K-7`&9crO^ln}4Go
zAKn!Xv~-a>G8+x``02{I#W)u)k;K(T)SGS<@ZLEFJ`Qtx_X&MzI3-54XOKR06KHjB
zsd!-t%S+204D%e>qgose`kKf)b<On~r9#DzZD-%F?GS-}KYN5?DZvgI?3WIC;fM7T
z*^J=abYH)pg3%-O(X5OE?@fQfINQ#GJI$N=xn)1jY4bfJ_#6o@9>PY}wrZk-9`Akx
zFgx;(z&*v~(oIIT2Z-BN%g^t;Vs8x31A7-W!+Ta>>Vy+VabB~s8Q<Xp^25~&pSUv|
z#no8cW6DInYu5Y%VSB{ShL?O2AJ<<<6fRn8Yh@Oc#xad>ydKwQ784XU*@&qYumCl2
z#g{a6j}Rm&vyt}{8~9H@QE6B?3b5m81Qm>h7XKU13K>{(P<2=|6Q-Y`h~T{6_|Ekh
z6Of>k*9H;9gT^zRm)0ZidM$FT;uD~;-9u~x>wMOSLxLA#GoJKUAqE452%6kybzG87
z)?&r;imwfZM=3SAsP149fj>syK4DI#D@Y=;dP`f{5Te6uu)_t@$j0LPs}^s<pKcFR
zEQ;aT=EzY}t4?95c!ruE2*(Z1l!|kj6+KVo_H}#`Zn8(c{)0dsuEH^!TmhH->3y2a
zKCAN3Wg-qCrF@!nM!KdDzxFz)1rsU^CGaNOf2GS4DC%$GEPtkbd&13KXd2Z#Mbz)v
zj)ZJDUT)BL=7xN+ReA55NvI89$371Yr$sT0l?eoQP%AOeD)tZ*(~remu0&xm_eQ)d
zX0}Me#cOEvzx1Y88lLO-3kPDcD(8v9LjrRU8OkwG&8cazkRbDFhySis#mV8qOhgaG
z)3bF}x1U_1uCBgMr4z_Y?46bF_F$qv15pPmnw*<DPy3wF6Dfli0ar+^5Bki8iUr1O
zO(6MdP`!yA&zT9Q^x~{17ern+r^m(d0s!-t{csU&5F_2=dzvmP)jQ2a$kZVJl9}kI
zSRfLILv`g>{Z%jMr5Q@H-sO_mqo6fa=GyH0C$l@1WZz@ce)Biw!Rli!W0ndQT`t;c
zsb)g+oQf{Tg**^r3$vIm>_$=00CCN8e2^~As5G!)xjNx0JL?by+=3%je{|K<w~L0H
zhi_^fc!cBTUSWbL7MO7|(cY1m9oy<eAzFMa5iyt?buOEv5iz{Em#icUy{j@8VX;rn
z^a&_Ws;`c03LmRT92guIS|Pxuk5_CzcK$q7O%njSWsP6!ITy+szp?_iE?lXUPh2h8
zhVY@&Z;@$tv*15LLbm-<(K#c_HDSe&L>)h*E2g+I!QG|(5qmL0$<38jkKKSG!WH>Q
zJj$L_7SoqbEG(hEwy(H^Ub-qC%yjS{U9<UOyVrWs|Dx>5+pU1WOz=w{K2$3xbHe5*
zWLypx_mt4z4iCXCf}&<NFj4!ocjoPH?UnsL|6y_8H~o=ZlvxzVnBk5YW<!4tZV?zf
z0_@G%7ARcC6zA7|w))K^>yq@v6)ojS=jyt^Tg20+LC5IdC@!bIiYsbubff6i>LBTz
zz~~Be%*{sM*p*Qem*#=*4oXu?5^ZjLYMA7sDW<vSHMFIiA{ESAa56@jyqqQ~$4>(|
zi-*=c$`DvlQrsW#y5)YUB=qDYr4bVqWUOtiNZXa}MT%|B9k>UnT}Dpmn?|(Se<v5V
zv0h@*P^T+nu(EU=rGq^GgP1Znq*%0e^k2jgO!|7gI&l~aw9Dj&o`l^a^?x|_7Gs-$
zqU`7kHmYk=^D|noKYcP=JO7>iZSrqmIEF8bbu}lA*6<~_Ci0gm(~kkWnTu{JJz}eR
z&mKjZ!LPH;dIIBEY6NqGF@Lv>{A}y*%kM%n9SIzsx`Q?tuAc4}sRR>Zr#dKcy&UN7
z-oTa#6uRbZ=Q6y1;Iuj+>r@*|V?;GJ^{wfgr53?S1abAKR+D|^9u4|#^eXIn=EjfW
z_uV#-ibMt>?lL6oRzsS~EF8l=XI<;HsLRoF*%?Xl&=|C)uyx*k<zmU&VGPcD<n+Dz
zHDpjJThCs7-?LptK`8bcbPu7cY?LXbl9#)_`^^v4Ggo|;9z-A1eaAA1avz4h;h3*A
z`i4e$o#f22`DNs0s<t>>=5~4g_Apb4r&Am>4avKnx3<k9t@-d=u(;F>{OGc~OfzzT
zKYL)m8@Au^Cz`T!q#w5SU^3C6DRaVQ>vp-VQ0hQhqmvRvR4iS7UuqtaYvvsuDYe>O
z$ECpxS*e?+%x+-@^{*EX26phOdUtvIV7lYi*)E4Y5bxijcKpc)D8aV}vLOKmhC426
zF7B{ZEXl&c+PM!hhrV{FghsjUZwUwI@y;7BY2IBp;4grW>tYj%(`q~tx*xArB@)R-
zp*3oK{w07%lCN$!!Lsta|D#rbSXmBhmQIIRPyZ?$drk8fiXlMRkbn{mq*UDJ_m=ko
zdF*BOvJWg*XS6F1Fx%C6C;RIS98>yet0m(X$4~A(2b@ExL`FRZ1Bl}fb;a8vnc>@8
zBxskOq$RW6CmGS55t6!t>CBx?$gSR;nU*GhWHxd$L6F6PF5)eG8+`0G{eT6PyGHi=
zmj;UHZ3(80%9)-cG>G%P)!V&vFYz9|IFgSwhcId4rPT*TtZ0}>q_NN)>tD#0sANlR
zGHCP*n~)(-MV){ewPp3D9uw&ifB)g@mk$^7+cdZ+=hr^oJytF+n0|v49Yxhk3il{)
z1Y)S>c_*Ei7Rcr!ct$72+7`_2E6+1)0bf}c@{gm0-o{}G`V~RpV`it(*b}u2bscI_
zPKqq{wGV3gI*}$lGrNj60?a_QI;_Le|G0~FL5s|IE3o>!!m<^5zWs}1aFWJjWX<tS
z)v5C8!pvt*r1JPE>Gu)@*zTGlA62wcr&1kd=iNOJD|PCng1}gtEQ@b&sxEE*G(5#T
zmyt+IgP4UGBmSX@y(~tc67pv!u?3+ONy});M!yiNldhB*k5RvoEZC--paOLdUK1pm
zX|m~_gS`RbG(Peo@21Qs(*t5j8yjFcmpRNCpmZ+2e+C}Njpeo-+EErHTMklxcyg7*
za9n9#6%TVa!V1+I6-e_Adns9H3at$s;C)V6=sxUWAHrgdAk_#3Q|FO3iVxEwz%_zO
zq^@$kM*X{U$+A1cAs}1~t~@~VMN)HkDoJzb`99(#hNICED)%&7RL>i4Lp5s4dE$!E
zFe4eXGwo=6X|_?xCqADOqE|e1+gB7}7Fn^NRwVp+=e8j{X>w)S3oWZ*1-DYw*-4tr
zGcSzEC6dD7=G2^UYR3>w%rtTE3d0Rnl~;7e(k{%mxe;w<D%|`(9VKb`r2@wX#ld7p
zF2`hyer_o*Je41*M!ix>Q{UByP?bxgUYlCv`P8ew*8W~jbaYJZx2ty6_WuQKo5|i8
zkMhMLT?`J=+S@rE&@=;6Ruai9PG3m75B-(~8V#0@8uo2;u2li%{KM9YKaQD-aOUO`
zQMtf79L;IUaalBzt)$8dcTq8q%9(Y#_uO^fw6Sg~9LL&1KU;HY*0uAgSv7~LCc|D(
zDm+=W^~2WRuJ-~gmYpSKA$5{4o}{V;VY>`>1hWTPb051>Hsl{zIh=9X*S08YEV6oU
zul_ja-wrEx?SF_~Qj1kt)!O=_xk+pFzdz3Yad%;^p56Z23k)upx?__qh;WseCfiQu
z(VU#>b3+|`JDqB8YeR-GF=CD=3xl{?T5voY7UXXj!`XJB%8!HFY4qz-R-u}w55Yd(
z>@HzAXt{@(%#IVCKCwG=vGeQGJUtLK;;C%3_?JQpi6I|O5iB1Is}k|jo9rz`ixGq>
z)b>f)cAZPo)fxEr4MfHVcb}wKHzuuqBvX67-rwNDj!@fo$s3;|&cd1WmzWS31_`S=
z!pXfPj?vlL%UQEea1?TdqY@#7(F3U2j`Mnq@tO(mzLOf657lJjLARK@?EILQVy+v&
z(Nv$C)|rttVw<(-$E_O8&GyaSQ)3X~1K3S1@(IK`+q04ul;|ZsREaz}{-(^E)R>xm
zQW@_rX!@!i_Qx&E4<L)zIFXRv#-}Y!5J&{our>(bcjQ-0Z@jS~xc2hpFfMIr6-VcA
zTlopqgcrfm#D=~Wdsf~{Om)O44mPJ6(tPc?rU?GvrFFn@Y<M52obJfP3S2mpYvRwQ
zAKVf98%FSZ>deUuyCn6sVZ=?I?_*#~;Vey_GH<f)oA|B^H+l|lG%`@0_oS(m+-$PE
zMa)G`(XnX%jqWt<1GmY92FW=kmdLQ_$SC6&RTyqPfg}e@H|g)ypmu+q?`toftFN&~
zGJ>B%=<qi-|IbtENb}<TiDW%yGw<?TM;-Ozwf*B4p4vCN75fhfBpyab5#QN;V-UFk
zW>0%hd6{e81giu3D(8ISO>U-kG21tv#~Ni@S*6p*t#|EAOzx=ahxsyVwtIzrL%xm4
zBUS^g%%YhR5rK)34~^)J*3{jQ+xX1O<?z@uW?|Oo<^Af`!8S6|r<slI7ysjQhW$z8
zFAk@x5S-W|E<lKGL*(ifTa2m3*?7ildzj0_L-&hhj2dZiTp|2`;KXdW2DpG5oc&%f
zlobd1*R@Lp5G<C4EG|%<+i0*o{AD*V6c$qZH}memu{-a6#-#5TGFbGavan^<+|4B1
z_VYRl$VtZJz?9{l6*k{1?g&t!7j>j~H4!UJoEriy08SYx7i>ACJBNyP${!|Ee3Bv2
ziB^rl?>I~{mGW}bgmRdh?ipU%ehc*2<l;XNU5dRuD^Ya?USszkJuB!(J@P|RK+dj~
z2&%nC=Z7lXLD=Rux<2_BM$l(Tr%3IoKFQ|=&<g8RY`Rj4FV>%dbJQB5OCyky@N_<W
zgLY_sram-8%g1*mmbtBtzDg{1l}1%$<t3(Ic53kpp5-;>(#11@LgNh~#aW<gnAs6y
zAlm15e)1T<2Wi*VVaSH2CB1Ci=d^PPqW&3s5f#be2?Zl8L`SSKM<rg{UtCq!;*#Vj
zme8gW)dh1>ZzY!^K7nOhK<{oP{>G$|9(Tj1KuxA>bg+}OuYs-qumQc%j7F&(<<W||
zZa-zMuCzG6fo#<D_ScQH%CxTfdu3}nFM<}?<1n3+bzyN;5Y*-DYaiYp>ZqP54I0~V
zvh`&1EjyT{p{mW&03bgEBoWK2@A(f$$lrctw1W%Tm^v?9>eqrV;XubaU)toSs{5Rl
z^B?JWCr{m3MIO7tp>o=4F~03IesOP%j(fks9;aC$)<H<@Md=i#_{t*V?*BL{^D>M&
zNMpz{7dWIWkXkXXUx?n_LcJ)f!Ips2?*%q&byQuMISOGNOY_gKa+<QWHzpYTt?F|5
zFG#R1?pfM?NfGgm41MOWr4qzS%tybIx#ynrcSC3MT(_dc<&o&c(#;$?b#GFk*AdP8
zV&Q?7558DSQs5F-|KF}ScV>g_xh08iJv%bBzjh=-{$50SJFZ8`sk8bJj;5>$6;C36
z%HfL4oKFDXbDFZgjR&0!jC|~LV2s%0)+fQ;3B}N~Rv|H<XKDR4pl7><=vbTCMMcJ8
z4fPH)-Pp$-wFcC}q*ViS?25Z<3RdX@UE8M)>ezVvQ*o35S_I6DhBO)LsRH;GIVoN#
zikDVxafM)!lKo|tf!*eL&bkduy}MEJ!^!jUFw>R2&$TUt3LJewC#rp2xG>kgV3u_U
za60Xax8#QSr2Y<F0jT1VyD8yy^B-oN@ox&P4=Za|j|NZ<QA72N(ml*Dp&ReiUkuI@
zyyWznekd+)Z@mUgfge<2kiDRCC4A*SraCm@8%EA@ov2rnB>yRY;8<{N<K>^lCvoLF
z+5@sHYVqKUw)@(<_!-;I>>6$DR<Us#J!MOpuiQhJLIvX0Sst}Q3O(84=o#?yQ~Z{@
z#n^$5(ZP3%03_tfIIXL!Q0id*V<|CJG)GGE>iz~W>G6aISZs$<n$sW+$~@Ve49ZUP
zLyrOyG%x@fjuf|3-};-af50Zc7dJc3k?a4a>*~<o(86F~?Ec#vgM&uUP5R<!jT|4p
zUkZ6N_K}nBOlIReFwYI^!Mob};K6KYl0%^q+09-Uxsk7nQlKrR@yIKg9NYewX9KP`
zs)~_UNnt1MzMME(vpDtT^_i8HI~qHT6V5wC(;Eg7$ePn|$Cp?OH+;Qp=9o7gUgkoJ
z$m_M|npc_A7;*U3bth=bI&Kh*T`zFq_Q%?_q{w|&2`cjL@Z!q~4b+%u2^<Wa!y=gw
z@!ORrHD9|!3|t3iEys=cYNf<-fm~*WGyHL40OJPmM;e<x%tv!zL4AJFE`tkOK#8E5
ztb%v$3tJni`_|*`A+yqIkMv`(6JlnKbfr|@&-}ZDbdYAcQ%^9e@#{c83p-2b?T99w
zlIVjF6J7VRIE-G|N3y6EFsoQcGU{f<SJdxR<L*D2oUDmFWKfCvv$Ik#HP8P3GJNgs
z=u2_Ep6D}RRf94}Nntv}C%N>fbSFJk=|_N17se#aM!t2$g6nhSo;(BKvP`=pX^A5p
zgUdD4!+1&DD&b9NhIgUi^<F`IL3C7HHZ7c!XlUtu561`E!`oTllI_y!otQ`x*kodD
zV@`1e0RhyA4Cp3K3M!vOEf!5ARtb2*d(r)o3C$a_dok>iMl_PdoDI6W0A}DMuaIaZ
zY8l4$$}($efA^C6@qBRIWRT5FB2_$a?%-jpO7dt5Qxf`|U(4G#P|R_=87*Z@^yb23
zJ%sF6TYF6K<p}b2jxuhrL8dK8%}^$uAv+z^T;!+ipG3nFScy~wYxk=Y$HjD6#k%~x
zF3)FQM5Ho1j_eL1?=$U}DM#QDU_8vZLqI-R`HxQ1b)IS6<Jw($V2l!fSZAuTi~I7{
z2z<>PD3uYz6Gz&g&OMdmvl1StwoaYE*7GMptEb0$t5-a~QNy#p>m(#O);b_EhU}p<
z1DDPxx!O6qx#8Yh>d%ffqm;ybW=1GE7J<t)QNp&0(~l-(3Tw*VDzW{Y!4^!!Ibw3b
zrq7EhgZ7}mCftQvZhfs`(QRneX)wWlMpigmaXDc<+5uxp!Q$~vxlvr}B%$YPP%tXs
zt-Ia}@mxmd2f%7@`YPMi{QThg)I3{R5RA*XYyiUbCphs6&5ia`VH#9#LGBULM4CIO
zD~-A0U#l(g?b-y$)A&!Sn1Mxf%@+x1Og)B{yLUREMEv+Axd9~@5??y#;Fot>V{bOZ
zLuKw}z|LPOjG+nkiUX0;|Iqp>6X#B$zu2U=TCHfv4M#qv6_W{!;&J*a=f*>;{B!zD
zPG6r_l3nSibk}_{rp;?jg`IU-EaxtJZ_Ih6T=X=OCA&q501o7#-tCLGp}bgeW@^gz
zt3B!uHsRE(LBD!on%+fkLlP-PQ8B+DwzHo8)q&z&{{7)G93zPNMYP=%TY=AtX;Pg*
zwQdaWs$;rF74b-lwCG#R)AC^4$_?HlTOu2MEFLk6*qsA>rnb@5VnlbfHW~6w8$GXD
zFd~x54UQ|5nJc&QHEE(bIOgNc=2uDjDA^Sgp1&RTBqpG5)>J`ZeN~Y-lAzV-D}DQI
znC9%6CL<x(qmuSxJ9;CkVsXqN|EyjfogQ5;bBEQi{f|(kU%zVk!TM;QyrR@wK!+x-
zD<C5+WnbqJe<0&nSpK^X&!benSX*0N_W?_;Hnr~b>%XFccK`Lul{kko$p5%h$xyMD
z_Dsl&Uu%-zv3=6=RP+k)PsUur`x0`u-g*aw_|xArv5RU&{<r>4#mI@oA0N`=EtK5b
zeA>}0(s_tN$6$9=rX|5i`F*x{Q-2)Bv_R^fzcX#2SfaEyva=DjDGSSv4Xh}_a9~zZ
zwUf4Pg4p`j+Og#-W`m9Bv=owS&Ny*fS9k4dsdMaHnnv;+$$LhM#g39e$AEF1eWkp!
z6{j!sPxSJ(!RCT-+FYLqPM;z1?*sj(DrCiAE+vODx2w{DE=gjqB8hB9lxNS>m#p-$
zZJ~Ux?t`CC*V=to0+(qqJ8-##7Ti>3sP$@ZPbA3`&+E=(PjEbf-db%6LHHw`Tjuio
zT}wvCp)^B&*C{VPdpGuu^>mlCuJF=rpTXnpy>Zg46#STpOx^!suOn3Wl92!XXo8);
zi85z@LGi0Z+JZm`7tP2ba^+|MOU10N^-rqR11)(Dd2v~p9~!yZO3EBI*<YqC+)`#@
zn0su>wiK<ShIiC;01WsyMI%CKP`>iHXu|l7mQjvRfu{ya+<hJNTn8#VD=3=3$2mj?
z=d$Pp%mR0La-Qy92BOxCN$hz{o<-CRSehMNgRFN!>*mvb^5Qyzkv6w+9x|Vp@KZcC
zC=`5$#~hXBD{s)3j6lGH#DOwttr!>KCzY_E{oTfu#$4(i+_n9Pn7|e-)8B!+^K#1!
z4ycV&O?g{uClO^5c!Z=G1A#pBA3jfZAqVZwX6$6JNKF<N(`dSmOyM(WEg_aK%^HP6
zA3lnM7PtE_B-T~xzI<t{FH%^G`jSy?n^w5JQQV2Ewmab9tcg}`pWA%BqctmVam`mB
z_7@ahs^bf_3f-x&4{I-f;+PsDrr($q6P@c^)yO&s4=Q+yeXlYq7<^QHHiybPi;~PO
zFriE-a@<X5`b`?p7n)rBB0=$&&d!8Mobk!?+S>OD>YNvNk6ZtR$|2Y|NX4dpd2U{t
z1pepBTR_eppjO6?1C5yp)MgL>_z38NU6}zce4Z*b?k>+$Y$)AiNto?_EGrwt7G}yC
zESbJ|zIA0;)}@o`a#r>fdJdlS96#jj|5G~EsEDKZ%z3zV{Z^`Z&t&4w_$)GFK>gWh
zqcCZ+iGEd3;X=dDS-KnhoML3u?~A|I-`w24UM@3njRP6qajgPtP(g(3X(cENFY&X1
zwXREq7{t4B^BZR|CD98aY4@0Sm^<Kq7((`6@(R?LED})(xuIk{a(_D6!I*&qC3x|%
z75P9tyNhL_qUwcu7+)ludY`XC%r|$wX}NkFkMKrlOnJAL-A5Q3GVv?n#DEyj)Z6Tf
zJQsz;9wR^r$;SKYQhL_2Rd@YjRHFpn-NDZu5J^?N#*$Y!>K$y{-O4$(ii7flSn3=i
zf?H^<2<eaK52DVZ;MW`(Cxso^)8(`AOp@_9aS3YeZnAoJ%J(Df`W<UkXjv_^hG{bU
zC~kt&2On|zP`Sq5ATaXxnL1=FsYH0%@jE}sNDSGm2_^sZeEZV%^V6_+Z!?7+->I~u
zUXU=rLx${MC>*CRu-VI)P8DlMX!^fXZcX_ZMO0K3gRSBa=7^bOgf9ku7;_CBcx)<D
zTxiP287R@@1V(O3&HDR3wm8kRaWFA!NZRmtXlV|`qs`QRzv=<3HiH+M(*&=H8hz6q
zUyr@b5z-R}fL~FR!C~*hNd&H=ncnS(m065wzi_1a8%&1%Zm}O2XTYZ~Er1?UC97JZ
zBg4X;EBt3ss;Nh%&)qx)K*ccSRjXVl)K!Ix$baahl@#<Q>bClC1DqNt`-GsQ-F>aB
zOtKPHR&y4tN_s&R6BDE2nsrbvM6=4iXI_1>-cut_np?{F6_6E;+%5Fd`>GoNfcr5?
zixaa9Gz;CLT}MjRg^QkNsJnW<xZQdPTo=WknWw(AuX5LquJm1Z^*ToPmKOD)ZYMXw
zxw_Vh?|#Xxu*!8tOa^O-I8JrumJs@cnhh!Li)=a1^4nSat149C1N?fgjfF9=_0#(b
zO@VIk_#=N`!pr=kpH7o}WB;*Fnq>4!Ltjh%L0g9z(b!y`S6y6wJ@7>(vaAQJ@5({z
z7mEmF=3N>%gLcoZ=cYF;3p&DAt151cuB^c}k?&#ik@@mX!XEbLjG{-4Y}E$qc4P>S
z1vN5>c_ljQtqq>J%hvvNqbE9=UOiF|#hl=zSI_521Gr`03q$R13u-??P>`Y6fi<W^
znaf$2gq7`^ChMrX4IsV_Zsr~H=#;q2qE$RT)4ddu#0mD*Q(}m=auoFz5b#`)0@WQZ
zKvzu|p^*PrQo>)qsLr}&ynO%R#rN$><7&WyseV}|EWeHIkA~=goik;Nip*)0zVPRy
zlK{@ni|E4Z@PmLjkNdD7R;8<1qOJD4fHDCQt?&0QLZ>Z)rFM5H?(wrU^j`0`WtQzQ
z#D>h_@lSylNQ`5xe(8Xhdll*aQ~z=k{bI)&K{u22w?EWI2Pe^FotI)yqlFl0%ATTv
znHJ;OM@6$ubrm<3GE-38X@^tCS&_`xj`T!sO#==@vFQ0-uKP=n9>p0^kVp^(z}7-I
zvZ>w)E1d0!Idwa~sl|dT@Y~<zVc{*`ir|5(*fM;$JPF_%3!-L>D%O&HG!*vL6?E2~
z+|RtQE&1>Cjz7W+3TStqq`eB0rVcp8n+&5^SQ6IBYYDTeLUX(8O!PcgNzk_E7Kp>y
zsCdUj)}BHJ<;7a4Ta^2U$#FXSWLZJodR~^x5EUJge=YtuMrS=zy?a=KNWeY5cx|bZ
zpzQVez)RYkY}J?MR4ETo?nTcUlc&5fO;>$T^ZdP2LL9VGpVE*!lt{O42j-1{R0{<@
zg_N@GKbJA3SOENS{5I)YJ-^_SJp3lwwbaw3x)zE~{7CP#a}&{2J{mQ9>AbUfVg;vo
z_#D#*!Y7sc3jU+ZHicLodCB7HG;bw;U=$#pF6};U3DC%Id{19!5h{8%&e=p+1%k3<
z?2V*OblfI`M`gDB&6v=h@e6=p4e!d@%lw4oab6<kOtOsguH~O?S68?_wm}xAn@+mL
z<7Fn}5zh%d?)w=#dqU*Zb}Ra`x2UW8U8y8zm9B98-(3^2+P8+qtP%RZaiF`0Z^F5z
z@eN9O)t-6x_r6sj+16S&pX`d20EQ~YIPv|~Z%Gp5h0Y6FE1q;qF#^R4usO|AOy6E#
z*HG39ei3C}#?aMy_Y8SEP7I0stZ3r*bpj5{I^EQ5mSo19jq`6`57gIb#WT1Q;#2qq
z5eaXgpEeREnL(^<_>=Zgt~*JjY$PjZer1W9U6b+zAz`e!6{P|zyO+H@OHWi@pT~qE
z9HijI&I!ENfWn0At>)h@PAcRQYJOb1W`>MRbBJ)DsPCN3)$a|d1)`j=9h@ZA7bAb)
zy31wNFp<w0gFU#7SJ(6s00ify%<qHiwy9)MeuVs1nx0z@l5p;6Xlx7ht#q32Wxc^h
zBT#J3L|uLJwNq_m+_ZY!$OKM{ff`aA2TVwYM!sLk^Y{-ZwCeLHp|0G%<X{_Rm8iO~
zrGKnr>Pt8S_mt0oC9#k4(ot6Y2bwcgF#L6UpvjlKU&`C#b|<wZ_nu0tFXG|n0L(+R
zF|(w1cAwh!AX?yFY=2|3oo|?Br0V@x=X<VM0tlWdMTOunj4<nBJ>-NX;ox|QRk0yd
zWHo;GqvPAed;Zq9wRQrfD>09fsGD?(f*8y-Ijh+PVY5v&yV50`HbTm`pal<FT&vxD
z@vYNA`gO7`kZDwpD0wX}yiU%<J@k0zGCg#q*;}d3Y0K7PwC8|QW{`Ckz_;gF3zrk0
zaSTU|-TvCMAac6mMF?bs1St{PJy}WTx-#o%L@GW?(4B>a9bG<9+T5KCK*eW{n!LLm
z)A=QdiX4V}j|+@wGEV#LcnDCO&Mb2amd#^`Jcu2ByrliO^N9)8L+>E3KOET%!(Y2R
zRC-u}G_9$diO#zRRGut>8Fm)0S}ZLxH!`?ljj!wKLcgS!$d85{nEAX_Iu}uB@&2e;
z@=3`#uKx9Pe>)s9HmRPNYocTewK%Bfp4>K}U;fj0!O7Jq9dX%*;qYM;>uQ?YJD_b8
zyr9P+M!ghCJX>@=rV#6HT`&G{6w|>Ql{yT5HG<v;Cb?~334Z_>dK)@{8AOY}ZaMGM
z7?d)EHjd4NNDhJIYMd~GF6ku^`ac~Y3=?fo`Nn+xIVBLNC$=-U<;K*{9*7Tf%*6<;
zH)m<K=k)OO)i_JGnSG%G6`Kw%GjI_y41Z2*>`SfB^7s%I6(V(k2BUn8SC5a*7<HD9
zXLnb9j+q{3zX{hU-9jZz1ZO5hOSrd~a-A>LG5ZC-_J;-1u?fU9w}#8N!j97O%^I25
zq9n@LJrhqcFCw3VU`(vbzdjI^toiL<k11<wAW?S(vd`l(m22|@GQdu@&OJ3SX5-OS
z)l&SBv8mRGmV(!$n~xbqY&PUu!eqC%Y2S>G_^ImQw!I^rTWZMIK4FdaL5$BB&gFdD
z^0zBJ*5dE=IsSdF-3V1{(+$V<?{1m)XB=X}xHcNz0mD5IevV!La4lzqUCzjC22Xub
zVEKY`d2z-dtNhpcg73jXNziubhNt`#kc}2;oLuOnSra|^Fh8kWI3&kR(bHi~=;123
zJh1YGszx216GKfjHzAXFAz!2T9F$`oHx<nEdYszeX<LuXBPi9u;#Gq#{i-;7K7I<a
z?M_V)_l8e&RSD~_zWLy8C*Oyj=84ZFUjKGYoM+0hy)y=SE!7PJiz6#JLj=3e_Gq|@
z1-x!2lpK&kC3Zo0cN~g_yZpw_$+e6cyUO1E<7$q({XA#-bSn`*BKb6V5qUowoR+hi
zk({;<GkKYcsGb2cgUml#7wk%9q}WFKPrSc=x+cYYg9i2!i(B3L6oNT`*b(hN4YZuu
zcy!PkwnEfe?ftWIW_N@bMTe)4c`1s+D!`o6l>R2l_?_b6wF`GE*G^ZvJ&NsCV{d1A
z8de1-L7=ShSU=AvpKI&)Ey^>}{lh|0&S7sR&N)83&4TMl`iBND0f{(EEVEQTFnXM7
zoCz3PsaYQV)jSv<hfw6+(yV`_L)x>=E1Tc(615MBU%v6+9URTH(?^#II)Zxk5($FF
z{b(bLK7V#P3*W(3T^bqA<dPmCR;%VLJ!o1^^l+3fF3c9WG(6~@ckmyHWw*EilcZa_
zJ9)3PIp4Em>*OeL1gI4ZxGMREGZO0;^W2sQmje4`LQ9`a)LnM2mujN(n0hgQgVlZ#
zd0p6xqwQKY;v!YLk`p`1u*BoGcHN{=v+m79zBN?EM?*SQSY|bTW9q|5W3#!IB*%sR
zx}_7gbj)S6Jh5wDeVD^rP35TCJ)_-IliQ7Gux8};mu|nuzLmg0bg4#UOU-Z}u;*%L
zdK>I2ie*P;O!GAcV=24~P|IEz1%zU^o|82vmeqNE2>Del)>5)L9C6bV9M;4nBMGjb
zduDv(YmdPc*AEHDCO73m5zK8tYM4tbkG_5T7p=%edLWR^A<A;+!PZ3t96hztwwr2Z
zb~SROuo{QA#WK9O*Gp3yYdF?)<lMq#YnjVHV$y@Q&t~3zS{rMzw)pM6SJwKkZnQ>P
zM9DTrklD_V=BXlqaTQC2-ab4AZT{7aVcEJGjY|f9V>7w0m-0~2a09FV)8@@|vcU%L
z?zm%_)ydCd+k{0*H^eswVoTEXZ^u)be;*$13F(ac*x4dLzVDIA-usv|G#j2);!*3`
zMi21O%+rV~%Nv-UojEY^9r}uydV(LSB9cTjIgi47FR3?MA3jf~Tzw<<GoEoQNFS3%
zb>7pBOZN#lKS9Zjy+dN4AI6T7-iZr?Y|CGKZ3#*1k_d@y_xBb7(*&{wypEiYCx3$#
z0gsA?lw1?)(n$W`N{u+hQR3N*APk^<&j<Z8?BNza0(m6(IddH=vx!8R2vk<Yswymb
zxAdgOLdB-yH{S!2io{T(hVo&e5!a_ozdG(ph}Ei+oqRK<^Doi5al{K9&O+<sOP6nH
zy9>R|`NVoFRij$n3amAkXvwMy1J4oz2JGvJ@&u=z2fwAlrUh%=oMO#^=L-BaoJFrF
zh`$PfR3CNVpat?idN2g1K|f)#01%L@^JC0<H`XrEvPl1V_SuxmufU2VsJJ_w2~(K+
z3Vf>I^#??y$L^M9B<K6pD)O0qlUKOkyLpsz`>((px$L~%oAk2p5;+?OzwT!I?sIr)
z6gj2?JJS_%A;@F!aT6DHSYSgQH4>9-!q3;sp1c{Mj~3_;9tF@EmhkL3(`>S2yvTKQ
zuD=i|gA3i65SJncN`~TLcxE1_KfOGosV7s}&5&f);WQ59^t4SbrFIBC&5_96fLF$y
zd3{-9R|eTzVR==mH~nRqi1emCJfEPDb=KmxoOm{KBdZ2mEO~j+x0Y49qeHh!%>$l_
zQ4igmzbKsxdQ0-_=XIZ-NH^Gkvh3qtlNWr&fAxBK;EP-JAHCPwc;*E56^?hy*z3%$
z+!ohuF%A^Ty51m%9V;VYbZ&Tb?AbD9BX`#AK5d0rcJ-@1-4o;UapvxG7%MlW=Z(1?
z1KWNKr6YO+EggpoP0^)k{Tnk&@H%Z+qcm{w)!LU<N+#s>iavP@RsBhP&}fzmfVtEj
zo;4cHkVrH98iHsGAvF(oY>zNV54pI9c`3MTL-<rpp+3IbvVoNRODog()1``Z1KWrj
zruy(4Y_%e%iVybf836(_^9E=d*v<n*(}@5e;EU9~{iFMUBwGfL`@t2#(NA-A4jB#!
z15j|a;ZL<2Lf%fPUpIl3NA@gFMD+nYWR5iInGwB5=ggR3{J=`et@;vqih5iq2NcJu
z!TMzP&)H<hX)-Z+!G3<I3;U=T&ldCdI*sJl`X{)yKucNYstG&lZ~xKU8<Y3)SX<bf
zul=+k6UwsK3t~x-7P#ecMd?=N8(Ki?n{Xp2nD#J9ku7Gk4_J~v;}B&l&No_*{Rw(P
zh^1bhJeypu?+V^!D*CfqeQ7?sA|Ncji1Qj(jn;59bI>s@@@-o-iv^u;R`<^-n$m9v
zMwD~6k?&SD6`kII4}|2xx6aXSX!E`r2V%|kvCh<?=+dCq%;lHVK842lsZ{BHi%pC?
zDxwlvGVA_7ke|ZTFN%vH{}?hd#vGh*emtEdTxp2LYX}D;yn`@QW(vn0Jy?74=z<8W
zqqRks_K8`;?5SG(N;%X+H5@h+xkefg9s0@%yJq+t@yQiExI~l?HF4wyi@$qVo0J*p
z=!?K>@x?&t^}5C0I0dA^oO6hPZBZ$Eb>wosJfVI)P^|LVRYA?NdFk7Iu8OvsLkzL|
z*>1$|R@TWl$J=7L1AqKJ_JmTKe+m!AnfY6H>7t@pb!CF=RWcEex(I`K<2<L|12;0D
zh9w`T_6hrutfshs@i<wBB*`7qUO5Z)pN~lJf#NGpqdnn<&Wq&NQ&pf*Z>;w$Wp)Z~
zzdo~V%Rq3~ilNRrj#dYJKbxK^TsQQ43V1ut31bjC7T$K&K&EyA@e-?J)VS&8iUEVj
za*wh$1L-I7o&>WbI){{CMZ~1@mafeC1G6uUrA5&1PeEzDtHofPGYNfSzS|P<l;euu
zE~9E}Q=};;(0LOhNw%-M$C&qXg)w45=8^iPL8t7sdk6lZFG>Y2^O=A8ukkUG<4j7f
z|H!dLL3eaq(&lOV7<Y_z9_@It1&0$3)PC$*RqEH%R1tYB`#w9+5~9U$<F{to2nX9t
zglhUpNSZD3#|?yp!ZV0^a;{>3M@U{1XojSj;GItul`g$s!$Xuz(chWZ*=_b$KfId$
z{V7})USI?%Eio*0f<ME&^iG(Z+8s$k{zWd@{r9Mh%@*y*3k|#RiMoF#iB%m1tSY-#
zWo+9&ffaT_^nyP5X?e-R6`wU0#b2nvFUD^i7Z`I7u9{|{DfgH&D5<6yf$Z9}4sqE}
zzE2#QczKMhnp@JcXa3EKjpJ_kSI;&(FkA7qEXddlawXOfFdTs3t$(!5I`z2NseXKN
z#SCkDx}!*<E2;dgqy2o0kF`B|7*F-N{$N6V!*{|TMu?=5`hN4p3a()jCPmvMG4ifF
z?8Y03>bN?x*|W7wS7RJ0S!lkt=ANL&e(?`-b0ZRv>hl%Ww#xT$GoaR>IA8{!l2(Vr
zMCcR{XR#T0P5Gkf5W`U&g3kk$T6^@EzXa{$An-9)D8)nXz=8>7c-~&s3|W_#-6wXF
zV7L9v(5@2k^79BGQ12wd@p5~qZn4WUuz(s7`hBaY$Ua%wgnIpuHk){EqNV8CW5R(X
zwGv7BB(xUD@KRvyFN6Ah@8_WXb|WwYoPG;(obqT>79ddZC~IL30v_`z#x2va<(%@)
zZ_iwj5YpurKV0evjprGi^1ltkTLb4KLgeCW{5UBXkH?KVO4ey`D>YAr$w^()`=s+$
zLL@e8G)fI^H$cge3zI+6o$vjeIZ~(yz$ywpjt8C78yru@0HWEN+74x(Lq6OlwfI?J
z;r&hz;!4VSn{B3gcTU0RZsRPuhp}t51^K23gwXljq#+|zkNDi&Q(VT!cD<K0N+B*8
z4yroU0<LnYrx<{*J${tYsl(YK<FU`JSu>x>#E8n#ZsEC`BfcTAhXVTj@p$Z~Qt)8%
z1+X$s?x>}6bgtmKrCdNNn;$mosXQeQ)Nesx4*R_pao)?8Vl8Q(SXuhm9AkJ;Up&N_
z;r_byjz(3uk$ii;Gvs1ifqX^QBG7J~<VsnMu)Y3<H|I;RNV%+aMW%$;$9{{xC$awd
zXRGd=hEL#u=pV3e%QUh$Ke-pW(H*Gh8GOoLg;`$i-OZmwNQpAfTFi1cs-s)*c&;9I
zCU9wqB{$!IjohkJV4n=iOxC)U-I*(5=?kj1L35p|RO}IjG{#TnTLcpM<|5OUEq!YZ
zzL`t=S!2W*YB_2G*_w<b&jzBU(nQhAL)>|KD^ST_90fbN5L84<$Bm5UuHMb3sHc~j
ztCag&&p(=KpGcI;VP99h?4(2{Z3^eNO{9u6X(O@KhgVtvK9u2oro@HRnXyF??F(}G
zNoOR8B5~`+zzObvc`FTxwCHJ)zkh!JIApUWO0=BF%dV?k*e#06z8D_$w6ne8d7l*f
zBzwoAYz0&?Cig2VXZ!DCRG7bgc!m9S07qB7G6YQPrf6xKjgy})Y3G4H%G?fjsX}#J
z@JCuZf8md_uRfpaOcii7A7`cuZI+Xejd2QJZBteVL`Uw*1`c^5v_(@L&f6KlMc545
z|4PDOlY{fn@cgRvWUFNrmcArT)J+Lk8MYSKs_Rb-C{^u{Y>g=6StG@B{{JXC3$8Y_
ztzGX@S}5-B?(Ps;+}+&??hrztUyHjG*A{no0&HAEa1Fto5Q>EscF(xI|6z_f=6dB>
zm`~5vMLm2-&67eA+9WK3mIEZVQ9&nz3zW#-yW!y1p;bm;j}di4szyPY`8`BCxXN;-
zjW0@KQ%^SQ#N?PlRKW>C-Rfm0iB5%k99~;m?R`4qj&*mQqr^(yG$$**S(u|ylXP!a
zFkO<eD16{-rfd5ezmThWZrV<NxApV)Q(s6jpSg%=*D&f#S|jsn@Q<hIb#~6nQS8W|
zDEMdflKdz3>G?{^{)CmEKicJM(UeRT{Z&R%buZT@Et)AQ^a`gYU#6Uf;03xi8IxFq
z&dzgtQ*Ao9CYucUKi3&R$s}NMh?uJn;i<1%4Y<6Rrtq-%qU~Ol9ZCbZL5EZAv}wkh
zT^r5{@WY7|RONcJUnX(;g+6xu*dSfBmerK4uPiC8F+4^BGy+VqrPskeS=M=jhy1YO
zC@i*o8AP-~^T6<`UtdBWO<bccetMZ{xLxiHYxu<%M_d1@1$SM1loKci(b;%fZ4#A+
z=KG!O`116tfbd{5uP54b7n5BwD(k=LRkPu9@TaD?*h-_TY<hM0663n7w^I;)S-^fE
zCE~Lh{vZQIgt*oHa;EdW4Yj(Ql#1H93gq4*f_dHb>(?84uiMUY?<Ujz6vqilU2d`W
zM6PH#wq<BE@?ux6918`0vay}(1ny3?Cp19Cc2Xl|>~R)pJcjogp0MPi=}BkX0#)%o
z4I4zoGh;JXeFSg^6}IXxa<HMO7jd6t;b6~r-t#Y4FH`}PO%V!QHr(VlM<&WNODu5f
zj!?Tqp3gr<&xh+1P4AWA{q#gc764aqH4L|_Rb*j%>^MyXt8RgI(h6<D01#Fo4@p(N
zcVYiV)W1*UFYZL~7_b%AL1^l1ZV~VP;9h^zPXvJRs{T@11{aU7M57I)nWT!^BNoe{
zWf8<nzjXcHb_^yOH)cc2<b7Gci8i#>e=<x*{=IE`r4Wy+eTh%IjI*GqW5q8jH$(k3
zw}Q99V@Zvh+_SmjNqtdW+-5Hlmox!?*qxK>W(7D?(wEbUv?<IRPEi8K2CR@}MajgF
zUK@OpOM&CRvtqQ_$Wqk52x%(LP#k_9GE?Matm_{G?0idkS1Oi=HpNb=X`cvfl(^F@
zu)lQeaZjK*``-kR_Who!5&G+UHZxwVIOFsF<x*r1oHVM*f@LDrNPclWl%x=g7t7Sp
zR&@F}X8F{^NudY*Zf;Jma0m6vCQ?#b^Eg)&TDeIypf`6RGuhhxFTgqk=}oC-F2$+r
z5G|wT=7;LDLH-}ukNr;c7m$pPerB@zzNl3$V&Fe}6=({9^^q)>_tBB!Em0;jmdtQk
zLk$HH`jfu%_*OaNsFC{D-l>$3u-1Akn;Lxo?F=c9fPe8<WhQ^1*?)hK)w9D731pJk
zzLt<BOG1c5Qh$bJW&gV9L_>IHG9`SmGr@G#4cU{Q<Tg|Q;uOi(4|d?v&<6_ze7?Rr
zhR_IawRXKMYlu&X2m)CjYWmdqY31Krc{}WGyzu6ltIIWyb&iC5C$84AQ>IxBo3a-e
z5O&3mt&>x*YYp>$GGHsYSw51@YVcX1pB@gu_M9@GOw#AP0x)<hBD%ks7y(Y^7gbFc
z?XL|z>FdO_m_#l44wdF=ldYkh5e(c$-iZkK)dE}f@WjCg#1+On(wBMs^TYyZ5+&Bk
zNc*UB%ft2hroKVhi^V*gPvr~B3-*dX^Gj!o$n_~A73qvGzCihv$trCr-`5(XD=^<2
zsm|Fl{ZH)CXkMwp!68!ZJ?CRl$Z|slwn|!&?J>I2+6sG_V7EuIsbqZXkNV1($Y}#Z
ze}hCYB~xI+SEtxHo_|HuSj?;VI`Vt$7M52%-^iDp{c+rPr62yTl#Cni+*a#2@}EM-
zFyl!1`xkZk-ol71%IV;(iQx);vByII4PE`#Tm=;}O|h#h-kk0;nBRlPqP;*FMR)N5
z5e=JZ=VqKr6G@xAF$Ev6yAZG`Tc2{^QJz~z&Ou8p!zbqhmlNM_l&9P_KlgizWLTT*
zgpCR$4Lpb*)~GBcPE`!`y<vMwyH}YFEW3ibyGi`|l`6U(o2L01?22yuwr)R&iCK)b
zw(Xpk#+4{8fOO+MG6-k(OUNd52ZlFfsKw~}=K4y4T1v}cF6qoZXzGBMqY^D@k(55c
z@DI@Oa?DvQz)!$T5JJYK7Z3!On<TO|t+f=aOOVW8U@6VdK?0fr&J8u>H@vOZ*~|%B
zikN!8-BrWsf_sqTpLOb#DzcS}>tcq@5K@)Tt$w%QWT3EH8RORw`cqlG{ni%tXNkpn
z6ztXwI;hHXxa0#C7>ZV?@ej%Aki=Jw-qy7Xo`(4a7UukEOYLNHkg&%m7Fuhr@_Z#6
zzbHm<JGe$QOPcj{3Z|y|W!mkx{%t7EH$%8qYb{cvyNMQ}Eck||VXt4YmTyKaa2P^L
z#XTd$?bbg0^!|~Wj;N#c>a74p>6$j#gEE@ky+_M$5_lC2yd65U1*+boC|{Mw{rh88
z)i@2fNdjMBG<+H$>Aku!UPjmKXjx`(;G%R5T_UKej|JbZIa61OZHSI2Tf*nJ32dH&
zvWG9W0P@Z#`6Ecd#GpgumnQGz#Y`)b!syRfU;!XohOgHhn4PF+yL8YFSFVZ;#j<*v
zQ7SYzJGwTv_i6PIOI~X?s(x;ZZ@tIHW&>F=ykzj>Bc5;JqUMN!8PzpyBRKhYm=WD(
z!k*Rnm!XlF9L=9%ZZM$vr-Y7}=ptX=1(gyzKxLWW{fk+=S{c)%hb8HOC+n=AnOfA4
z7*w`VrhdcD!jZpx%*Ks$NhYno_8L#};coZQOF$=Mu4c+TEAGV+dMLTWueqltB(k#T
zW%hc0JXx}iI9=TqiqI^$$>k=tjc;R9LF~CXgx^lUo@>xe>>C>l;&PaL^$Q<nwPs)G
z?YpA5!SGTIZlT-eh1*aaQzI<<P8-(|7`HKCs44zI)2TARPd`E_^~7YsluP$vKA-Ny
z$nVyH<3Bd+y)?hV=`AMapw{4t7sm@Gvxw$ULQ#`xb68|soisOx$nO-Z*|cHbN7tA*
zCR@?Q)VM}1#d4gE{Cd2wv#7SQA*kJa9J?_cbH^j$>1>>PXECtNyU)M3hcoFaLf_VJ
zpJ2D4_;hvP0v~(My;ceW5uxF*eJ%h~*pFxh&XLoTHnSBSvnrA3PJuV}`fiH#;WA}T
zy+S9%R-yAg(rk2RZ4G#s58dhSe>+Wp@l>h6zy0gC)iI*Wd30pu)<@P=<nNv;#}PLo
z-|on!3D31;a;yBs`8T^;0>SBz*HL(qp_9jTAClbo2(&{0;z~EpzN2R2@*fI5n`6SE
zrSZ#m{Mgd)NT0LLDRJ!vCo<;E?~d~_j@kO*%Quj3-++F30%_)l{(x;}rdr#5mV(jk
zjE!#PgO#^)b(4>a@zDnH`1u(eE(W{PZqnQ8!|DO%%`zGUL7pUOiOs>K{H>lnHLDzE
zCX7=aT-sn^-ek5LoGr;ARCZwmTT@~Y6Ao#eoo=!Qvj_P2IL;LuR%-2h^UB#e3#wDt
zPyXtxpd%WrJR=%wD8_*APV>6ALQTN;Tr9}Df?eoC<+qBp6SGFu1Rq(PCfPOJD966d
z@nA%)R=`&M!bZ$Xs@PMX0W^H~RSUZy!5#Q@M)2CRKaMwk$K&V13CLt*I)&Lg_WH;=
z64L2~;xlwT!Zu+27&TEKioIEPS*v+lS$jrFHM;kC@XQPph9@!^uSxBK5hyqttL@P~
zM9)P+$Pz$ut%RFlDw*fq?^dTCq?0}fVQ`WOsj+#^YMc;`CS$w*hQ=sm*eet_F(-mt
zT)Sf<dC6_t%X<C=d0|S!#m5egh@7|8qyI<dS04%Re@nG%iN0>v=*;!9Z_@$!t={Zl
zi@CMK0aM%fL%=3>?T?`-#Aa~HW%t3-!3ig35Je22uyknkOnK*uyf#}0XiS&Q$z{%b
zE10>{;H!(MNiuPSciB#HH^{kRAhW7(e9bvLIk*(lh4RfjWp3NZ!20;pYe^27m=6U<
zqi>#CNXz{U68A|GxYLi`?YN^3OV}ijb{@$1*Sqp%*Ytsgsh1vs)(@2TxRjnLBK2Rt
z=%$Fdp!D4dXl8VsRo4Y&B5rnPtJ3C^fuUwcC>ncy$~F4kve6!*cUSl%rW9}#;nQ|^
zuZnP1@0o*}{eEHG(`goUrO8<SdtU&8puh@g@Z;enjStSL0ro>!+6F49odfJx=GC??
zy{q5%9`9w?nPKz%wItJDY)6bC?`;<ZQsn)Le!TDop3By5_y_G=VdTyeH$l#Z!ogK-
zVdrI6HRoiYG^ybD5cQ;r60U{9#`U7$s2C7`D-|OA&#|`Y3;W@C8zP1L{S6ni5t#HS
z8TgSbSuUt;;}PDbMAccNkdidn3io4X>G}`YBEmYX1a!g~-*gf+_>dGry9S9lwio%%
zV=Ln?^eKC_v3vJ;^~^CUb%h_~w$UW6Dbp2}$7fZv#k%S_&?OT3*q^}mCaal%T<WC)
zNejPp6Xa{~U?L^S8%W||?|9Ce-~BOqmvt;EbtPdw<he@o)uGnPr9WqvV$*c`jIt7^
zp*pY2`PU^DbG+`Ho^efN^sDCMor>@JF+~r!V9?fax=-&2%EjQ_a0vTGy1*OW2(_95
z$dV5<OhN~IG>|g+BD4c9OIPfm-A6ykwN0)w@L<J4W-j`&J~*NvZ7aMBz{|JB+9Nbn
zpFd0Z`7wb1P$Y2n8_|IEC!f#JUapTzKaQ^j&;nEzDWbm}xEf3I-*%JPm&ET)N?{xH
zdN#>fY8_WG<TZb#Phn&7CTp(-pgqqQoi-mNWRtnbZk1YK^|61xGC~ee4kAGRTC+~<
z2`LK#J14b9JuPVw!vqE(Eqk5WV8|`2^RMk<!;31qt?_yB$#1V5pGRwisV+52Dkn$K
z3rQ;dccr5*!pSp5l+i;8c4`|PtEh6z7oSftmz!-Vg~iLMAK1u<veAQM81rF@yx~Xg
zHckFKgoziCRphVVLJA`86;75YWbyZi!S{^NzR%pD44*v2H>flTMjJL^#za;$d*xAw
zl;ZBg7Lz(kif=Vjh3o4BxtDnje&p;Soay8WBYj<J;;q&9PSC%*7p=nYj|9EdpK@NR
zKSsX%PvTM<hh*XZB6<vq0#ssu4rLIQl)>0Dw&+HxTwfBC&!?D}96YzoqgT$iyQb&r
zlyyhbXiUDY2Z;1FrJ!}!L#TI^m^^~Pl9`qEPFYI7^1{Cix>f9}<g22@t(i3B<suW8
z+q)?xTEXnG!%AwNfHoUBl{#ZbFJ($^B$w|epY-lGxN!0c4Nk&#w~_ha_pZEQbhJ9v
zpm#O7xM!@JU8&WY?*NS*Zp$0L0WRU=g9_hiNmoS`mWs<P)SYHiZd_U$>W(TFoT>JM
zbM$4bWGU&`=?LPE`&yciRj6vuWm~y06R&U~L4;U}0@v(xy1QuKkOjN~hXB0D7f%zZ
zd1M>zv1MA8nRY6}u8(Xf>w56!i|N49Z+ZC59O0!>UPdze0l~eyhI3?RBtS-6dFjeR
z&!C6dEi>k{U8o5P^R+c$cPGTQwpd$CyNJ^ClwCO9#lw_pW3CQw44p>1Y>As6?kqx;
z(9c9>Ce6(js~@O`vQ|0?+<@#WSZ9DN1Cw?aV!VR@0|wviGwssOa7Gl2XV<*S<GA~(
zU&sYJOFt@V&)st}ZMOVuLqMJxzs%}Pq>NftASIoIl{}`F6ws>R!|bPIqf>CT)|@re
zuwjwNacy-Ro>~KU5K|&VHA6Sx6(Y4raD4jd=1AGIo7M3Le36%b#j2J3b@8lzJbSwE
zx%y_q17AHfNxEkS?F$D~_T(+LNT~;AxeNWuaJDF)r98%K_qeC>y$=N;I0EnnpLE^E
zMfLY!CSI$~F3c4|(&>5dCool#uu^~foV!mGa8q<#v&b8mlOSv(lLh+$CzuS?DHJ#8
z3^B!0vVQrlyTj>H%Oxo6KU5l5z1JSNa1eRMe?qEEvmoLZMtU0It*9supkHIPH)GUT
z_ffHFm(lZWzMt&$mIqOCJr_=g9;&J82!Gd*=OW#-%XNB8eyMJTD5HtSYUGQf;PL@7
z0+-gfpJ7O@Mv1fgvI*=c#0IdbD(IONSk!bJytF)~gUj?@?oz@PyDUTpS9h;?t_5Rq
z9FXC*tvK!v`GH*-#kErw=MebAh7%}%+0wcT&Uq#7*tW2UUYMWbZGC?~f3hH>dctYS
zB)${R#%D3~by0SA^;JRZ1wCKMHT55Y91JdO(di$${V4G#bZG&_^sh;_$v+ISIU+wg
zBo}gdg+AXslt8vUAiZBBmOwdvuJESj<LeN7=o#D8LUz649u#*Q9=9p&<c%+x4Da-G
z+$;$+scGryx!kXfqx_Xf0h28!cD8d`mZn}cj31164Th4sUrNuc2~z7)q2qHIUmmI^
z5SAAvzRq=`ho>+@4lk=XTIc631$BnOJJ&Q?2*GBU;a8WpxOQoRTQR_KHKCOZzDvwC
z4vOY6gxvh-HKSzm&9Z-;u(ZwAZT51yx3Dk=EZOZr(81mcC`!=P)iqv)F9pff^9>UX
z9p|b6zxLQi(7o^{O!_R)+yf53-dNTXX)^srwj)p1E2!SW7|MuHxvAM}B?mIl89wv_
zC8dC{Peb)(j}gU_*n<I9OE+EAf*K<?2+w-;%CDdtRJ%FlQK+enx6LTegz=kQauRdC
zKejT2_7ik!m`9;kQq%=V?A&jEz!10Ef21uy1P1zm`_HVbWU%7c6>cqb*Pj-J*}-uI
z@$!q~SYZeDFfFBXG<?txcA+tp5t;V7-gP|`;N*5xdOXmo20~hNJnQR+hiw<NdNAni
zhsve^(rvzuh8hHp_x&jm<oL<LmVzkrw3`bPj6kTcJ<y%hWfAo!!(@eGE;>vrujHws
zc^i0Mtx+&=d0;`F-qO?F(|zBDA3;Xo?Dl4R>ZKq4t}V)Ey*FxAY^PB}--Tnx+EgWd
z6}r8RKz{2%z@84w-s{ay!;j59#zAFnQyTWe$mN~1k^iz12W-DZC8sC|%0-#Qn{$cH
zfi^lC^ZYH`gLgHRyW2(qw1*oTegZ3iQrY^+mIg|Np!?40D%xy*zUaw4>4oG$=V^hv
zSxeK4WOH%RPx{;~y*2m%V3El6`Qj!WZAX@rL-ba$+#so%cMi&1@*0%+<Q~FUhBj=e
zKqc8owJ&VPf)%xXRkkbBT230ArF||@Kce`ebeEtfthTS#=|jM2xGy?xksKAZ!JFMn
zrO#xoq(D9Rqs;2P!X=gcJf|YszE=(pIhy0#LtScy>MrCG33cAY?f6R1(;oamcD7rW
z%pli0t68zbxk>k>WxK#h)b3`r?vNmjkRYrUDw2y_9M2qk?<{mCbR*5nlBg%Y@}uu2
zb;H(0@UTgBnl9fkFXC<KK(-hBNU0yLeGK>@Lvz;8w!q<uUu~(4RWz1-><k&MPV~hP
z8?k?Yl)dWvWd-kU%lDy@Qh1J^-D&)~<}L5<Z(Je+kY*?4lX;GFpV=!<)7<3_`8;tN
z2xEW#$O7#G-mXar;GT&`s}ktb7;ayo*dvs&yZ9vlBELB1AKuq6?qKF-*s1{i0slP_
zpgW=r`hiMuKc3AYhM7QFhKrdoR@Fcb2gZ6Dsqsmpp8;f4wS#eGi%ygt6ZTQP-h;Ef
zoL9Z{I6?Y53m)PZ?Ps#l_e~J#Q~1w$6WcvyaWN-+h$CYBro{2eP+Jp0qS6oTie`=4
zxf?_kWVA=7uH`^kb#^0|YNhRHP`t@8Qok#wdJD$V27w5rF-blcJOss8sqX<{;@B9P
zLyqol1rMmyF_*cnN5@^JZeHA`(Iq6w=DpC>8A+K=PU58?%QD#3+UyKHulcgA=7=Nw
zvh;$R3IWA~elI<8krsUumogGxWm9d+kk3^yRXy>8%4&|%(ZRtiyQ>O40sY!B2|4$>
zr4l;ZBHpo!ESH+gB}lnoNozCXe8oVybAGm~X!6S>p#<9e{yRk5OO!E4-N2x|v%4Pt
zYxuIA;kJ%7Ph~XvyE+|cOFD{CQ@b18r4=mElROswTYHK}TFGACphJ@o@Q!~T<Pu4m
zo%VtDVfML7I`r?A_gJ>54AR+T4wJ^Qt-n}dmwVuXK8t(4IuF7PEm?Nx{ACZ{40RkP
z2I`$<jzIdJLBTXf@I`ZNZqGb=!?HY<nEU7KGSR|*U)K%1xsde;bwbah!|$O0W_*@S
z0{~6n+pw@Hnz7YeE59||sX&gU3Zc;rf}zw-yQT>MaYo=wv*!Az6<XUCXbTEh1TSm0
zVwoX&#@nfFT%}aw=s^-;yYww|8~<8`fCvuln{SiUy+)gEV}K2u&L6fq`M09yH0=Wx
zhFP9YsB?Yc3R*PZtzfcfu#l;CNsQZ;yEYuT0FU%YKKo;r-!(Jtn4P3YLW@{(C*<o_
zl+72zzpFv6iQrz%=8<g7v-;x@*EKNAuoofLQ|f+$Gp!W^DdSkaST;vSw*;=p7sCwq
z_-ZOMuUWC_9W&kAi9+gBzLFjz6Ad)Ki@D+7+MnxY!!lFXuq)GNZUtMKj<OA9+SbSt
zEaT8<ja+L_ckx&a_rHGApmyoMF3Kq*P-wo_fDySwaP5S+!M2o@zu|&1k?I|VN)4zW
z-Hc9J`NhZr-lOO(YXn@athx7f)K3`rAE|uzPRAc5{!erBDP>Gw8grvoC|7|)LZz5r
z$VC^NS@NOmUaV@0p!TXdU{k)nd`^6e_?2NGk%&(MQb|7<Gv5!~+X9XeLwK?1(nnMp
z0)<TN?ISK4QoN2jyBE4gIx;Ad_;CgO{ay$GGhGRSY~3}8COjv);-^-KeZdddiL`ka
zAb;BXiQn>>+LvJl7ooh6g%kSh@FQb`>iT*x2DfL;Pf|C4XEEoaY{~y{^41ZJ5(upY
zX+Y_!4uib=?9_w8qr#^C<G(z^j8%<Tg}P{qKoGAJEa`ml{Sne0HGkt>^S9K6+;S#^
zlweL3%W*wNGOug@34k3Rxtv*qn85Yp<C#mwjX$b<82J)jmzIpuHUtXFs0I4XZ~Ygj
z9&h}tlMsKdYa!Ye3=G|fDEOSNXI#p-rI79?3PAWyTMBuOXfO+e!_6dX8sCN}kgEiT
z=@D@kRW9d)^)sY;Gi1Fjn@9qIq_;rXrN^GJ2d|&U?48wEfnIvqVYULKxfLix@pP{o
zx;~bqkM#*YXE%7rNoGP-ac3G$8!O#c(}a0S|KX8PB+nSr!krpdFtm`Sb!!x4LV2hf
z2_Dw@@%Y`jh?P&R7O+?Lxv*+mLzkTs4jYfu8b6H<!S_6Uq@KhE8OE*iF)TF(D8FTo
zZ0+6J9~c%%iDWKsUb!xEF%x$wNEjoljDO6G3mL0&w)%n{PG<a3En_B-G5z1924;ox
z1<vUsBWe@*6JC;Q)cP??na)l2PVBu3-)my*1VFD8L8V(nU;Tpa+9R34ekrb7SJ3-v
z7xvO=B$LTS$S(tGf9h3;cs8OPF$_PtGvRXZi<2m(fa1d(^l*`L^6$6<gbf&N{n<29
z_|w&<c@s)2iw5zb<jN+du_9*;$1CgjoQS9l3#q8T*jv{R|NM62W;!@)=`55@VYFpv
zPJxHJrkW{m7g_n6{ne#%g#sy3@7Skt7`@ZGMB*q}ZK45IRxl4$s5MN+o2ND*kukab
zM3}wylX<$0S@BI&MpYAmRnOF#S3>SQ84}~UbRQfQU@Vf;9}+J6*1tFM#*w_2MOz_L
zH^F(q$15iA@_b+#;TQRGu&#CVxq|kzI{ggO3^Y{x{x;;um{p}U{?Pr`>ag?VV*>79
zpZe}`LFZZ<CY!hW&CJZ)GKK!Uolx-%J2Ow>1-jXO&L{LC8@5U`-a>aZ>i6;{Z{Qn^
z?60Gzo28wNq>_EAW;sp_52xVOg=RFrtq@V7b8oEAE%!8|xJ{JO5kZFH6ImCR=Dncj
z<ZQE~%}%^SFjsohd-HWU=dxSMyqv3<&<6%FC?18#xI<c18W62n%C{oDB5=<pFh1i%
zqpI!S>HP9`;yD^abr+sM5@*tz$|T$sIAB0E1jd?)aKm+?(WmTcwxR!l>4Ujhg{DH-
zLdFkLtj#u#s0?@naVQfdqNSbHvTi>2{S{>iFsF9KAh*2{4bm?c-$Nko#9URud7Mt8
zH)`xFZjjJjHwtihyk!k-Eoo?)sOddUX$~p5tj);TgOo<#0SSa7I`cfj>w1{=0E3-b
zcS+OZmg@xQ4I{IDNBB@feb)BY4AUcoM&PbDps^limiysaLhOq!3tdt#491TFldzzZ
zk>+<NfV%jkO%noD5}>%`n6<f6FJG(Zf_ALt*BXiPLiU#`E>M@1I4C4=6t|k0HcB2#
z{Ty38ktzHk?{+T^+iXe(Q(2AA(N|4Xdb|K-X{fpD=_JFesVH#m?ZB+>-kH>&+o*o`
zwjC3_x!xz4d7SY(Wk}bJ#SlzWJ)us4i_Oj8clI7-b1G1Gqm7>^HS=|Z%!03Z(Ul9q
zc$5O?#z}+silY~izoeMbQdd$#y{OT%3kQ9F;>{Jqna!QTIP7|5V82%~&FlZuY$p6W
z?)!}AI;>B^iu>qs1m%h#e%76>(i>qH9k8NCrbPz#^4ySSBj50#U&lbRfP#8>Swq;j
zPQV<jWAB-{E4sHVp+NI4R-!YVE1-%FJr`cTW5f|xA{9)3(~+RC8OXXQ2r{NVbYo9-
z2XS>aU1;YSs{Y3s91x+qEaCdh9y65-UB1NiE$9_`S@i6GsFsOg|2ER!j^E1qTW!i$
z6rY%DgT{NMyWcTk;}qn;#bdvzNg1+7)4$-7(-DL_6TNl$Uh0_!ZNFluj3u)uz%U%w
z+H)VlsKzGcU{}`HJ#8J`<hKzF$qjHen=W-%97SUDWKRIpuJ_dIcWgrrxjUKY-^&(;
z-#s&ENJh1mh1}ItU84(HWqAWf(#u|iT{A)<lsGjvwR)&#9l>AzjP>TmW6s_txh%`%
z^yH)4X6jtdHwbdU)2nf*>1&1QBHEBst%#5@hM(&++Z+p&hk4Sq5;LDp<O&W{EvLGO
zq-fE@<Y_6o@#<DR@(myP<`3alh;a$~4R6{yn=aXk2g`&3fk5{a9$hfi-r8Qhq|GpO
z*m+0%2l6906TNiJN5HMmTs%ImhA{;T&>%uM=zcVKsOy*S7{viZM63J7<uaR-$^zDY
zToqm0uqC>lf)}s*;bkL?kph`NEy})S${j3LohT-H328|3@VQ~^YqP?8cNKW&_>()&
zTV80>`ABnjaN}Yq6U5)wQ76u*d<hbWVO#{Cxj&58%q)_!!Wlx78fT&(2p0S{e!oH}
z8C!JPeoSUB&^8bn{FpWg%lUliOmwgFkE#=O@MQk>+2!h>>!OR{?p&T7>+<xQkSV@G
zIo~s}ukVhK%H#OkWQU{j>+SYzp@r_Y$qKposl55gzkv3`{tTwEFi<VwrF|%eDc4)z
zMsuPP%l(&B-ZfDd8o7AOxgL)K;w(m={A<f!6NCE9@}W2;A;KT74}Lk0dtwJTbMrjj
z40<>>T65a>y7jhS9iw)fH~CGb*4;@8c@cys+}2`sCVhNQDb63@N#rd6=*ArXo1L|%
ze4BT50_kWzT9Q{I#rwYcqTUd1*|K^oFpS@L0*)Ehe%CnthL#}m<~7kHn35VB*$MNe
zw1WTFx97U250Y#TTyL;vFhumXzrd%zJ!?L+-I+JZQ<Vt3`1u+p<BiXpsu6h^DfwG?
zlTr9fL(Cz@lDnJ1q>Py}rI^9&iUeweIJRPA|KHbcz}=VWT<<c~b$%#rnkEq`4n0OB
zx*~4Wn|)IjuOOag$qser(sz99jzn=J&*#nw8u-0jKG+@$Q!1<oC6F14tlKo$?wvk7
z_eK%>*ZC}Y-U$a^p&RpiC~RK??Dh~Lg(2c&Eq=C4kOI5#3@{O9(;7;;Elm%|Sy(mz
zcAYex(W-aS3~v@C*mn>M`3y|83n`}zIMYYBuG<<FGI=-u==oVvUW@H}oecI0g)deH
z-QS8`*Q$E}Ir(JNKGWEZ4r5e|(TV<&U5P|x#!^2uu7SNvEHggMi5dmO+H<WggVT|A
zQl)JzRNW}t!v2-s_OGlMB4NWvXJ(hLrU2fBeVgKdH3&ky7fa_V<{p~BxjuY|=gnKj
zWwM!u@ds^sKj*(I>s7lh0&-=Iv{mSrn8Hfcl6M~(mC_z^euXZzchlMB_fsqkZ_fe6
z<J>?-*1@c__-ce%KqY(WAzDX`$?xDl@hjgHWUT*pBi!J=eA=&w`LvQ0g<E)_ju%h{
zHX!Ybv!a=ruzMf$%#ec5voFYUkMU(RZbH|vP#1V=OI--_$o-J%g#kIM&0@KcoX>mS
z$<*PTq%vNU^peXC;zRI|>74djZwPL1s8IOnH7DUtgtv$1m(xB3x<uk7v4XJSnpR?v
zTy>z=YO!oKM|RKUrdM^?B@x=+zdcP@f(^ZxfnKFuU89iEZa+8WzBBZ9T-q+5rs$jh
zK8(z(9PgagtWAVh8nu?vaz*7lw$V4q+0Z!!o4uF+?heW|hy=YG4b`}5VPdaw9yk5H
zNL#w-LGSF`E_=|Mu^XxPUeb4Uo2Rv<VQFNub6%*Nvvr#|!<ERW1TwO5z`&4hhJqWO
zx7oHNJjdR+8=_~{bvIixcdqiROPM~6O)xb@3t4O_*q_2N<j>sU3uLtKq}?NddBAE5
z-m7pDt%z1TFF1c9x~E+MZl%&w6R{e0GR$CC8uj-5Rgt>4{pW8Bsa4+etUhzHeLTNP
zCA*5IEbu%+6$irxHyb;@EcsyA$Vjfu%NvCWOhY&He!td6{3t#mEoTWoGsf-(wfMrG
z?x{IBv2_7M%g^-k?nbv>NPYMI$WBo`hJa9!>j|Ka#IzK{GF_x=TU?Remnz~Sigf52
zUqx83g`eE+H}$JHQ%hvt-uoY`7aJ6LdjsGm-^N`UhwiwZdOTv9884ikL@n54j;Dz0
z+Vh*cq*hYa8X?KYNxE^J0inc~UiQ)pTVvt$itv5U1=;s@PKs6VM~w(Tkr+u1r{iW!
zV6zWdca5?#e<~xuR0Ko0hibUp8VMveULGj?di~U!KcIAx$f|}OCQ{Yi^C6#}b+)S`
zh;z!R_%v^)Up1PlH_lzYHNHYQ2+Elc6DJYdcEP2T`AtGBm0UURH-IKuGZ|F8>2uP_
z=u0Q{o)YUc#jI4)hq3Xq8uIo4Yl67D?0~6fKCpeiaK(<OSVr7DCz2QU8}D`?8^()V
z_Jtlr?yb(_RXv2f+jqa#<h#cF*cb@;y{;m!gEy(bZ|)t{#V4`woNM|E7Cz|!!X_T%
zw(%g1?t8K#A^cZTE}h#bR$^F#W_#eqw@o76+(azDZ0Wo<!_ZH|M(P(?-2-yD_M6Gx
zr1L$pk~6OD$W{N~s_UcT)!uY*d8iFdcx5CL!TdEZ`kZreJjN_dNeUuPirHP*E}lxt
zq%y>*{~4!(`h1OXhQ2G&`Par2&wlfAK9@feTF?uRSSZ~x?2pddz(_xa0n*$?MJT%r
zT&f^fe`=(=ZlnP-Cd9a*uH;BLto8;vOZN=L6d8Gl1tTxiJ{yV?%6nbAeFpgF8g*aX
z89A8Sr4cU3kr;q^t4@r+b$3ei(LJ>>jjj4@$wj&!`Rgc)saW91<L3FDrTBKQbcOk9
zY}zEQPz=UuX=;yTRz@@phMIm(_agV4Z|qe#0x|ukY+`qO)wE@xZTdt|*hyEPW#}H`
zA+8~MsynD4uOrp0TDjZZB*axJx3Gr-TH9nDh%gc|Jb<&q0#w$|EFjyO&RK0bhA{ty
zoHNnFeA2Gq(V3S*KPDolau3Zb73^Z7-BT2FAIztrI34$PL)5-wRFPgMrRP@I>Bc53
z!<i;vZyIdGzpv68Bg;AQUppcFp5>xjkD?*aaR~~96>2W_>=9o@o%o$3ee@L9__1Hu
zl#Aied-O{;$)3$e2%)R5kX~NbSt%jo0`5Ia9Wwi@zVSbQq-FAB>o3qxCa{{eM=>h0
zPf6Be<UD6`w6d}!(NAOw{f(PP!REyxj%1v-outL&+49!>Za>ZT-F?2^JafcvW~szJ
zsCJeNJ_3#UusLA~8mWp$j#k5<0hvtL6)#%-a!>WYYtsxGYs_-b8G(Z8+UWuS^bhwL
z;p|ub#Cce`RQ7Rb-~-&TwPl+kLsQlM=Qk#cK68mzWRwC^-d*)bF0^Sy&I}e3ESF)o
zX(DJ^Bi>qYf+a4##pMQ{VIc}s9>+7-X{00g8_w$3214F$m)iS7%50KXkK@i$WhJ50
zy63`9lwTp1XUzUGi~TM{vnI*&hn3q=Au1PpaN&~re8rJZP40et8&6GMzu9jvA6kWb
z-*^8Qe`K+1-ufH06g_9WwJ;b!LBN5;EfR4y1)$tu$;qTY)p-=@1Zw$Qt&rY`Z?a81
zwIa@~0x4}=5IQBXE;l5`2MgbQrtNpZlZ=UJW<oR_DzE8`zcFAtVR8<(w<|?V+mNc)
zG;+=2P5INlL#9bhm)lC|raBGXM5x3*VI8BX7)%5SntJ|dre49|g<nxHK^yOTnG$fO
zKN>5fVy&SRD`zoHcX_jnbeZ0{9LK0l{44NJIz!q`xu!<@ibPt~#WayMjm;<wE&QD_
zAbFN1935udPHOs?`dd)Rg;Nh&fKp^s{)w76Vq9jxMZg#32hhKQh(G-tHG}C~3V8gH
z{in4cj3x15-zr7!TbWKH>G(7eb9sUMiVcx27n^(UZui+O>9&IMa--GGWOCzmz+Nzi
zYh344*)Uz#kgXctG-G=+=7ywqn+@Z?Ett<XiD71Tox?r;z=xJokYlXW^l@6vGdZcJ
z9qf*i5<MbavQ}yHj$V>_7W$dSw*1tmp@BdhM!P-WD9eK*{NF}>52WD{;^Hh2v-}gw
z;t(^+Mo2Wql_9^$(QZtfs2nBE+^JwDoVxq<4q_rwXQ*`tjaojd4R>eIg1CIPGXJ)k
z0my>qWd)t<E{DvyCfZ<ymxl=$_2WE6%nxv{Gw3V$RD?<oqMpX*=x-i=^91Fa(hAq(
zKtr~N#a{D=ZuzRIC@bgg4cP(cF+H0(y|1Bnuc0{c!lu0YW;GCLBb4T!qXv%qqe6XI
zb?&O9vdLT5lc5QOUp8X7lXxlHV>e;L!!<nkQp;B2uoV#^u8uY$+2L~IH)ile8jj<-
z3Ox=~{ARL<8T%ibG_vw<zHARLP=trs9%%Mw|27UiYqb?O($DxHj<@oxxhgN>mlmk8
z$w(hlp+Smbq#g<jr(TtAClt+{u(Pk{4B03H<9AYu&K^mSd@mf*lCOAw^r{;Q911#b
zpDtSSuf~(<lxu(ZXEw*v;$3DaKl<}~{T7Vxt5@xIzkT7!GC2-(C(@eCwua2^hwnmC
z9Ez%x9NHD<avn<9gKz8fR);$qwM&)r7kd{e@O8VnZ+Uug1#dE6qu)WdSl?G?;yJfN
z;HHTk!_sTPnEi&c1%}Qs?WTTTQMMxWQzTi&e%rHUMH!2v4RUeU7?n6CQ&jRPGe2JZ
zz3+oGSZdGDy?ZyU^z&B}x=1DyGWYqD#IEYaUKVn_5rg0nIKK5W(e6{Vp06xVKHVve
z72-`5?+Hz=8H0ZRyZkYea*FH19Ku$o;lbc^&0|?sTa<Q4#RC?3ebDv5z`389n7u#?
zdai9|c+F}}<n@C@=;4+o+J~l|HP*_5|Ky<?jJ{p#=mne{*J&Sn$`nqDez-YTM=KAf
zK&^Y381=%)k&y7TONiA%#3*h$AO~myEk>-_-|S)^c1~?)5w2%va6h{}F1mK^GF+D}
zU)qh1W<l+>gQ};PEMK|J_Igns3s#pUR(?Z~QWNI3QZF(y?vG_1WxG$Q0bjlDxNjz}
z*s&D?uJcJXE&B>F0SB)lH*<|jd&`iOGmem^8iz(Xwt^L9zh%f~dTZOLR1(=$GGGSH
z3g|O2Gph6oTsMe2cU}W`YG4gsn#T;G+=qM_Y*l(@=TW6&_mftXi>}i%vq@NK-|)r6
zR%O>s+hbK_+=M^U3U5k+$=O>%CqnF@h4tj~>tcV-D?VwoQ@fR3C!fKA3<D}%2V8V&
zova&YB7Ztf2ijuYbsT4h#Woz<489AzZ(=P%F*1hBy6L`xsut>g=`O$9^&dmiDywXg
zpXyf7l8P23f#JR%4V%}&8^6u!eb6K}pH~;H#MJpRJHY=^*}#@w<Isi)S~H^xR0uOu
zPj-HO?SHw-KeV8ua7@SXcYE$9ci4lG=Wa4HYDH8qVrybnX-2|_O|D;N;BpI{EwFF%
zTpO#&-$mlD1u!GSb=Nq86s4#;4B$In;XT_eg&IvS!)*;*kP}Q2HB!TJxjIVLZKz^_
zJyfTK)nPW30Bx(#nIN0(<eW!^06vO`qWz>~Pv`P5{+*D?!?8ZDEITTHi(4+yrZq)!
zkl|0!2OIp~VK<BT^1X8_gF$sqgc7s|M~MlEbVH4&r^owbwpMt1%6#=Vm)AF(90K5J
zXxgJqKDj$Hu)9gglr^DcCwU}lbH;PanMq)SABlk<+)5{)B%Nn~)NVbT44<0VbEiLV
z`Sx7wm+xqf=%LP=*|`^ag4`&Vr-lBMO>k~pP}=@%B9C`9%jHFYlUyRp?BEMDJwFw7
zGFo~c;G@qR0tgSqg@n^Pk$u~KKS;MtJ+YsAmZW@V6(UMv5qnAyT-*p6wJ3EAqIs=<
zWwbkNY^mw;NHhprZ4Z1M9$S#3AeWL8pY!6m(b$S@j1PFgbKfxt`|~B2WXdqfc{f0x
zuQhv&nDrA_U#sFX&tXuoB0>?O%FxqCMH+R{d=)V*soKr<;rQohvU}j&Yk2x&!Razr
z=Fa#KQanFr&FjLVRH1xhbtFi6;dR01nRm0!CbsIC^x1Ej<G0>u5i#sbslQ`1CYOHg
zTjSEKE1=>M@zWsPO}ez5Kj@DnKHsLD!a8u?Jg+~8Hv{7C6OXQ>p%^45&g&w?ShQB_
zmI6nKHKO@filx2PsWWV#M>LWUKgg2EjKY5V+m5N7k(3l!Br<!-jqE?8VXdi4Bcd(P
zu6-!BAIZdaw{4A`h`>}<#%2qCU$~j_wfJ_jqhxQOo&#0wo?*5buvMm$74P1@jA^)7
zCMqMamN}itGa8zM3DL2uIZV-+Y?Tvri9oMS7;}+DDqU%J=fvz0GNx!#&{O1al^8_g
z71c0wY*Wl+KaS+cHl*_R9E{t~a%aMI$)p#htA_7y%=~!KFel7mfIqZ10+F$$ES9Lc
zu!(4{+$lUR9=z1uJ#KNlo9bP04VL}cD_c*^4D35Vx*>_J9-kdp192nvpiz#X8BeBV
zicV{e2w&QC*lU#R%oJ&oO5cnnePLq5GB*Vavbd-<9m^|<%by^je-#`Sg}WfRDYd-1
z3S~B1p((c9@Q=WAv=ll^XNP*3Y}e$27tEpqofsmjZ2yX^X`{pzWY@QmPwbfM*3YgF
zPM%cz+K<28W>IZB1rb(+n4~MkYL(nSqHgNM;LEARp8k2&J;749>q!z!T*@N!8{mF-
zOyDM)xf-!if9WtNIla@BAYZpv?lZXJ{MOK)RyYe>A_ojY_ot{}+<!I_+H}xf)yTNI
zOyE295PHPlO(qa|OsF$(12yI~$7rXUbFE1l5!Uo9^U5jj_kwSBsf#^D9;PjeFFU1?
z#ki72);48NwGOef6!Yp|iAs*18h4s?a?>+J!Q4SGHYUaS8fkU6GW508o?py}vLdg*
z&}JTAa(UTFkiJFQB^N2gj+ViwdAcsRNuNBz)$^@9?QU443f4ZILe30oOIW#W>DFZS
zypfk+vh_z9yh$FJb~+I`%eN+BB}LalW^#JAKrwPXb*JA*1Sm`{LGEqMEDyRh|IsK8
z@~?5)VrmO~7rHDnZ7-_9GeBk>b@5%KvgVf@rES=1ZTp~qHq=Af`a6AP2}xKbmDJvU
zW|wGw@xH*31=JzSVf%@`s0fcitNL2~8bG#HGBt0lf=SXf0{OANFuCs#4L$}?6X?wC
z+BD&64P?qF$#nh&a!t^>GkkRTbhdSBldp|4U>T}f`GEi&U`mwRJ-0V(V`adL-o^^W
z>Q11}n2K=qV{%{<J9|O0IY;O$i)r;S=bc;3hZ%Uin$KDbE~8PLzmL`UdfQ$%idQ(l
zON4@-j)$C`G9vwp&YCxjYx(X*GJx}lB1yriKT+;69Xp_&=N}`7Q(U7^`V@)PJDA!f
z-XVZg)L423RGk}Ou&vY=Y*PYGr>}kQM@&jv9cZN^)dJQCoy_iz+wHp~S=c;c<RWVB
z8x@ST(*#LqB^o_FHySMv#`roITB&f1S}{6bOnu0FP`y`mqZ&V1zDx^EFAP^BM8XW$
z;G65NY@CgfT+bW7ZtG|JL{fXYg{^Am5jP$fzYv`w?d|-MhO1Dy27y!Wt>v>|d8(Cp
zmDT=zovBkL4Zdy#<v}VTRw8lvcOF9rc3XO2=+=PA-+CeMSIQ|p^_;t!pG)j~h_ta1
z(RBjyP9I4ncf+5k8b;4fXu<dsPM6PQw{ZtBpk1NxeVlS%<EiQ3?f3dhIF4V-jiyZA
zrqA790EW?x_o^Zn_(OwIR7i(ArIHtgvTXy2a&&=Q!=6BeAOX94v>qbWSmvtZvg9Wu
zAR*1QMa<Op8@1Sk^lY98kK>j%syYkA4TKrz_exai@;qnO<Wcz`+BKG5f}WA4Rs35S
z`<%kyy}fx(E*-Hh8Pq~FM1C+0pJ0YU?5vu)eUm<3@N#qJH~BgvRDo1D?_5(yVfS_@
zCV}VE$^*S>m$02zd6L_ujWwd<0hDrMlU=>x2lWu(#A;Tss1cnb?3_g-gvkZp+j(t6
zB`jUUNyio1Mbp^3?Je-=ZE96ceLinC@VmrY$}=IJk~TWx)<r_RO^eokmyhM+sgI$>
zT?q0nDgqjYT&&+Ug7}Dp7rAR7EwAM$>VV(u6*rZ#K8bGHmK}`&e~8yEVWmq#G{_Yb
z8I8356pK19MiqNn7u?U1CUVWg7h3!>)m!A(&!je3mEv~i$~SrWk~LG^l!lXqMjJlY
zW_@Ih1fH%f$;x+sT`a<K@8*5xzSKlU!_@6!GDVbVsgUbFhZGjIK4L(}?Dx@WQFroJ
z5i}z`FPaf^9_AJu^4sq=G)`BHp75IyiDY;ZkxS8+eG31~p$g^hYaQ1r_P5;jkUWwr
zf|!gXlnz-M_q=|oR}d5$015Hl4aCzgNl}Q5?7eJ#+plmw)N|K_-YK9tEdR5;9P4?J
zto-l_XEuW>9F9Ojpe|x$L9c`zJJxHb{b;XSp<~ZOme1!rpuMG>c@gZ*@Y-zqdjl6<
zJEueQxQ4(I1#U`BGC#ol<woFz=MJ<ML9eyKVbN}(n5RxZU$UsPk{A|mgJOdW1DDNR
z&gnm^YIwwdmjeG;3EnupwZqQv-o9Dz)QTUJbl103I(VQ2GR!Vo5>+2JA2x}4kr*^&
zj>%+KeVG4XP>9W&m%DoBVb2m2L)9me*gKH+v_XKF4Qyd2jG30UJcycQKT{qTW@Q_U
z?ilCkUtktX1=gFnWbCM`p(W{a2ly=d4jU1{YOGuFq-iQ=>JR>X)W)XrUT@XwkT22H
zIE;!AazW?WZBZ%J9Y4Ki@qiEUA?!&{18RJ%)-s{-Y81<CGg<%Q@Ux6vA>=icNsRaX
zJ&mq+wm`t(Wgptz)b(kTM&g!PFZcBrrCt%pp0iJ-$ztE(Yfsbf;_C!=_iv?EP^pEV
z!S##y<HkQa2~rRe0LwlDYvp^gi*q{M-kblkvtOfFRAm|0nvkvJ8eEp2dz<uYW<P7n
z{pO?PJbG9sK-nNSw|2*$ekIOi+(mGs0%B%_s?4Jwpqtz}Vq(b{92d=NDc65bHk59=
zQ(Y)aCNZ7S<vEV-(JVM#w*rlKJ)IHsm&(5hhugdvjFJw&M4aDyYP!4yI{s|th9+E}
z@JJ_9!KV{`Sb-5ym=CpFRj+S2^0#EGT!`P)l)OaSgTK<Ki1<44={#ieSj`)JOCrl9
z+ibN<uUKbh_xv!5AAYc>2&nveJo4<8;K3Q>RhFoAFR|HtMBW={@Tq>}>B+t+VlN?g
zbJG~<Ow&e)%vjBfsL(UEJ>DA54Yw>4dA3~sr&j+^_sevo_VF7aDn;QYWBybYvvow^
z4BN;Wa_?;Rv3hehc@C^#OOlr2!Ua)f7^OPtvG#n#E~_>bX-`x#$}42>Bic)l>lB_j
z2jm>}lEP;B=KM7L`3kf!lkL(xq!t1zzR8nlM#ROL`VrC8%<Lk{U;1s*IKlch!-~9V
z8~NpSfZZLk_y}-J@hl@hgD?*^l`H$J^x>+L0xPmQmb#hEeJu)&GW4`qPl5yl50)Z*
zS{(F>k%BMh(ajd(oji+laLc6?U8`lQvDps;(zZ}6FI6$G%)zIhA3=m;#hY2)5Q;q~
zm#7c8V!7-2Lvgb#x5Di+m*daXcQk*^{o@pn>{-K5i$vO9SuhJfYIy}oly<%Eh7)vW
zWX@66UR#lKdHAOEWab?F(TslVQ@naxp<&hXMVVyY!3(oqp~NB4tx1jzkh^f@tzv9u
zhSyIdPeiRyRB*@GH|?bF1fS`C(X5D}A3M*K79i>|jP#Elo>(~`O^*2d6l@hnUx2Ax
z1Un@=o=Sk?@z)$oyM8zRn)X^;6KiI>?`rbR#9xFsa3&4Mv2zvtjUR_uB6~9q#&M`Q
zrEQYqk|ZlRk<hX<@FB^Wx&~r%;Xo5r2vQZ?2ms<X`|iOMexC<9Ck>ehGfhr^8$ump
zN2HtJ9tow<_CbhVMhu$6I*}}HI~zUbfAKnnqA5LhKXa~qhwKSGFh{b<%?F@x(xHCq
zY{3p$FJQ$pp~}`;g5~VDJbn%Z*|zoV{Knn60>fAbu!{s~mC=<lx%nOY+l|G>z<TRv
z&yBw&BVSIg6h@Gw<EE8=SA5u{AM~gzKD%M4MswP)<B{Rr`*{wTE=jr$G0V>_$}OT*
zo;&AjF>9I?n3kdZX*PAHp(ClS07k-q(=-U)3n_77vF=>NjbpU!7LnoTT5B-Sb;98-
zIK;D~J3ipzrZ<c}jE;0E5#HE2p&goQTCnupslsVIDVg<XjrYN7cT?b-W;_J>_YYRP
z0Vb3mPR<q6oD)NDP$>38K?rNZH7NNtK=Gr=E=R~G(_vV{!=^@_?V*H|$(&@N2}Rw1
z_$Wo#kjX_kZK#r@<UK2cM_0?q=lP)FGX|*?JA>-9MPmhKEXn3W|96M}L}z0RQ*&&k
z?D&degPd;(M4GmHK^LELWx*XX342Lt++|o(DURH#&pM%5OGox%Iv`V`i>lv0(rR~&
z0H!_$0kh<o`Vt`UN74y(B}RY;Rw6{&)}J!@fvoEQa+yT80Pnww$A_-nG1ZsP7{O{h
zoV3#At~6?`j`f1W^JWVTntHknTtV?Q`sY-|I1CHU`}BZHqm8)>x4bAvCHVpkXq&xd
zxZxkagByc7QRmDs>gH)sPq{z`<=x3?M6a}@AV&zD$|)U#=Z_5CG1*Y%R}s^R-=ZL*
z0M;N7xWqa4CbkgCm2g>jv%Pe*#<yrfK2Hk20;%9D79@Rc=BKL1yT6dLC9j!kKp7C*
zNW`WC-O<+MsE+O-SN>g}Oa1Gfm+pAyhBB%s2!!Zdt!TH_@eVQL9*Ao+UoA5-PS&1Z
zqtlz|9nvV=t1_=k^DvaqSRx@<+o(F4PQdm=5pMDo+2I4NStQYOol$vESylThLk0go
zhR%Ym4Xj(Ew=G(vNO70q?u5V;cXuxq+&zKL;O_3lU4ugy+}$Aw6bVi!779$i=kh1c
z-shb6UCS0!=lIY3jCZg5pkH&VeL(9cEcaePcs{U(2tU{plQg4TB|>}>1au{qf4<gV
z$fy=~$1ke%-I#dhzpf7M(ZnT2d9%u}Fvexj$N4#0D<$ELq>n7s2|}D*b7igd4)ne=
z6d7VzY>8CYSYBsQ8_9GzN~giqfTHOttpTDFsXkU}MVrQQlM;r&-OPzE$xSwoBD8vS
zL^fMy@{N8Bs^SjUr7C|uH@W(46MM_==AUS5k1<-1a}VuOO6*sBC?utD1Pm9#r*#Q4
zPD|Sf*%c$@YAcIW!s4F>TS2$wFzU@lda(f3A6tX3-ujhzC5a{Z0OHQ`nc$rqO9O&O
z{wUqDPQQtjQ=f6(+=0aG-~j+>#O$EWZ;rC&D~E5HS)AS1;)}@?!VI{3^SAQg3nXqb
z)lR5h|Eoz|n2i5r2|a7*M?tm)G<kG8nITg;g)-f(IrJ*q&<YmebBjw#Z6DHT-5=~X
zcx{v=EbA)={8Zt1<@sPo{)uCTjp)|}Ynj|&Pr1WN0h|z^|M|i&QAv8;vuHXrgdba*
z@?Wz0eb*=+!JdtH)+}uO*D0MI^oGTId*|_B=CqCuty$+jv^7!ZZc&ah51xKpOO#m;
z|B)q)2R$>r#U>+<3bfmahZ$P>H}0*Rd39>Mw>c*irk9mndR%^g!&b+WaIwEvd{Mhc
zg$v$H;d<ROjG%xl`Ui0WaX$*?zQuf_$u+6Bdo#VtK!lQzEEX}M-pTT;8RxM;MV{mq
z2Q$!O_c%^wE2%2+OJ2D#Xhgphb$7Rb?>A|>HP2%6f38@+<ueD6vvlou({4p0SObHC
z&?N?QTc(a72ZQ(M7L4~`Pd^UH*8ENuI}&-}byIWRXM<Ciekt<}l^C8n?fRh>k}&(P
zegfbUA-Lxq_oKw=du1{!#K*%T973_Mq-J&17jrN;l6Y@vF#?<bFmnXUkTzDhO4maW
z2{NRzlM^C-SPE%)87I>TNvR4aF5#E`(ms>={oFH`9wRbR%AcCf#D!g>NUS`6H0&Pw
zFPM^MC0Rd}9s;SR!W}QN+0<Rx1BHoEoy9Z;rA~|ZvXUi5DWGKOu$edWkzs<!=6Y4i
zYvpOOv1<Qk4@LYRE=Tfwd{G#24S@<)qPWP#>A=DK`?CzqfQ`$MKK3(jzi*fIXf8jx
zHBOFw?TntQnv+JFd?`^7rtit!j<mI3?#F&3xIE$$NK&Qw_lrOR=_=T6O&_Joe13|Y
z1j4Cjx;fM}wvF&0o;nv+SJa+scv*v@Gmci4i1vv|*U7u7Jip;@`9-`5Q@(@t{o4mk
zXuT-D8T>t$!mIS++4PqdE1hCpLF1Fb)y>Pvq92OPyx|{&a<fNh!(>D&G)0K8WKRO^
zZl!z%Eig5ttNB-E^$Q9pvq&8cTjC{uEX_f*haa1UNzuTUS45QL&gq^F?|KimLaFe=
zH@2#Lx1+wOEg59rN4blxl@j|rkXc_`v6)`Ww|!A{l$`%Lu2Uk3HuCCB?Bv(%*7(IP
zqj8Va5Gy@O_haR?7U)~Qq{{VP;+~Wz5+;AV=z^$jSv6NvdCG#<JoHY{&oBJt&I%2(
zw9aRBEFZE`_hsvnEB#I#p$7ImFU9A#hOPj#mZ}CPWgp>$=;Nsn<f605OeH>hDvErX
zINPbO)MBfUqD~okZ*1HL10HZ#UW<f_;Y#q$US}E!O59P6R2k<r%vEVNEp{(FmHh2t
zeIBP?fbTLq6m3!8jM{2GLOO)0CLn&Cp9%VHOZ^GwpW9WN_A9=@MnJt1v-cj>W9+s>
za>t*X^HdJ;Fpm%$DL->2*uRdPoG|gBJoq@c#jdh<u__b=gu&DK?}iO(mj9_Ckp<{w
zi@o+pS<5EX%IVa2(B7V8pL>yN5q_4EzM+`^Jt%%qOGb_yMxWI+p<Is#3kcYPur2)j
zMZjMJi6@G~I~9ObvGoY?n@;EPx!p<HH5QU^{)Zv~svVPND#mhPZxnhF=tC-PQEd1T
z&g3OkrnWbe1dyzZA{qFn$FHGGJ;eo0im(i1TnsI&?V<0{k6}KP?a{kEnuNFqdu)p$
zn$UCa#c#UIVP?BtF$>cD#M<qtW48iQ)to;@>dF_jh%s7daEfB^nW0n*E~T4(e}RBe
z_d1@djkMRHdUYd%BV^jTXR5}`(j=hznz2g16wjAb-f9D7Q&2sV9(VHcaj)qI7vrEk
zwfW=ali$D6p>-}*5^LUK@#eu1Q&|$2A&mdmu}tRxc&~ytB}uisyF8}cJ57K_r+~`E
zj4#EkjJ)M)x{YC9H8E)iHzWp62a!nLsQ9-`ZwbYdz>qw|zMi09>XL+=kM|NPjwJOY
zYrx;PTMb6FjqFs4@bxYrk}48_hYW=d`8BDru<Aiq%}*)s6C1k=EGk!bcRlq289lkV
z9a7SRdPL~<;JsSnmzc`&K<N1VR!PsB`$<}*H$M*l?y-1>aVOrQrAg0cB2r7GEHv+W
zIW<}B+q&+al>JAaJkVlVg=A!ENU%d@M_Y+E&o2P!ZS3s32hdti{GX!J%~aI8hNJUd
zRr!xH@g8*5u`ALbiO$$O6|KP0uI17X{!9b54a?mo$Me=KC3^J36GE{v(pWB_#`d&^
zwP2yo8jq8!TwZX6h&`L}KPJU;5elIc8NZWDIP;p}Kw@jL5^68*8S~~;Gj9i_{y!%?
zQ&PG<SJ5Zhfj5(08XUQ9F&WJU-h9jnNsZ7d8OLNPmAkz1`vhC!MWMz8d!s=9+BC%k
zKse(OZx$`GzN(OIvs+%tktRpk;c-ZN-Tp0u1?s+8mRZCRZ^F*KuUQB--#S(>x*<cF
z6}zA$M&mTE-o}|NMW^xC8GikP=r*4NSj#zNBuK;;tI04sDvQSHJwxK&Ek-@L5+y_A
z<TrOfC_~B0WB&$yai&SwDSvHsDSi6C1U7?UEk~Q~s8yEfOqD=kfU_PdDxAmbOU$gc
zmAs%AX+LFmnAN6mitJvh^r=^Ju4&wfyIgY16K}X~+S2-oxTj)DtT_4_;+dRb*&(Y^
zV;PVRmoD5P{v6i&86dFnz$Y*QxA9tr&GcWIssEaQn0Gr9Adgo@b(1D7`~V?)5c}!+
z-XMch4W@ETSO_MBzVIagX>Ml2sZvmdKTZ&9;to3t$w0;>Wf2!l<9NJej?yf8dK-*?
z5eQT`InrfhUg^8mcYE?C(4}p$s|W3T6L(ijmFodkES{!Oye+(`rft+ct7Ag^y_ZD2
zE+d|n0+K*YEVR(Y4ul^yr+mThVDCN?>R0hGq2hz@2D^3V@D&>DoJcJ#JFq&Bj2rF#
zpYQ_Y(tQ3w=DK3TL?-WiZO+*tAjZp>2mqEmKr+laFi&0e9?}yubEiLtw9GYgCn(Er
z6<+Z-R$fc^OY9_>HWK^JV!Va^d^fo><L)b+>&hqKC3*c6VXkl)n#zI60RLQFwhFji
zYx!ncT^X!$)=s|pBKaT}RkW{^482x&t|D(wazwPBwf*7J|CXZsaffI0>dV=3czp)T
z-RA=;2d83>*bc-_ocrc$zcmqjab4((wKC08OW;~yc-Rq5(KPbn=K|>PFhA^^q>Lpt
z%@b!4aFFbA_+XI$X@F>%8;w0Ves5I%Tv#`MC<yzoCfZthNgLd6Xoa**KM)KzS!}yx
zSLGK~H9kHS<rbhvy-&agD_1$Z@zI*Tu)AU9jy}$54$Rc@%@5Q&&hHvr+N${VMX?Sh
zFM$Jq7%A;}e&W@B$$kMk*?$A|BzQF-@G2`q+fXO)qhYXQw#?2!I5n+G8VjCv_{`bR
z#%+HSCjX(dRU9IB7IEM=)KviXe?;sPje}3Ko{d4=ktGaa!3q^PAo8^ek#p&5P??!w
z5!ht$FO`>{lt;+#3+OD3Z9zEZL&kmS6COcG)IF(g^~TP>I$CaVne`qgCwxzqlnn*b
z&`%n7-T>0+bH@P8Um&X*vBz$Ws6d@6A-+&2^3hk<N0=_zzWX7sRlt8^yrOMufiMN8
zj0YXA=a%@LRY(`REW~Q`-aGI*@m%Iu5d5M#g+t2oLK>UqBGqO!`W)1YSLq&$YG(`z
zTy7ieeQav$yrC#EjvZKGJ00ViB8sQ0JKOeyHin(ZnYW%6wfn-9mmKvQ$O6<N%?D%P
zCGf?#v@fB7S4kI8gW-Une1NKf*-~Z(QQ!GHXFQ8@YA;LsPRADX=?DAL@B8jLICHA}
z!+m0;9Sbz5kog%jMkmgj>hQ5d$m`V0K|0vv4{5MFehr5b!g@0FRY2k{TGCqLiLyO>
zOC>&kP_i!azX8BYuIhy?TT*%0IepltX0vFOey^9xT0f!t%S9>Y++V-4^a(xh^-hR5
ze$*rX#$u(R$D|O>aKd=|Q48ou*s+AgRPRl%Ats^aeAF%}yy3LRi;T&NSg&fdBOs{(
zh@cvM;!piN*W`4vc%cX(beeu>8u9g$L;T#G%OXSYDu;V`3rv_YiKfbh*%sU}q%0f-
zc_af5Qsfk#Jk70+Is1(ZnJCFeyqx0?MP4a3|0j!#c0CxVw)fu_Z9v1}%D{%XB3jm;
zJ<aa@5gg;-_%J^`2Mxeo&Tg-noRi^ux&b=rQ6Ijqwe{8O*;n(GV<I|*5PZGoRn-x)
zyxzpjmg`iTkkTJ1n5W8U=McJ-22s29pvjjYCxeH*#?tOTMLRf3Gc#>=|HMdp2QEvs
zcu<?xjm;(3XR6RtIQaEY?nKR=vO^_j`I4AYV<&z4w-U)9))|2~!E!DgGj+yGv|mop
zx4%p!Kc<#AE7XuylpjesTopGD^&Q7@o6bS8AlIFU1=OJj-F1<X;^6lRhHU21hmqKU
z5*Y}^g?%mq-&oKKUU-Wz6V!pf3vOx-X8Vp=m#Ns>&$1Qu3)oL18T#j?sePshDlpV=
zpx9UzdZhhVQ0^a=;{mqE8ddBq1A)g=?uG!Rc5j=H;Z`@DZc};=BrP5w@z4KM3xCj8
z06R<7h?tjiGP;f($$@%4PLQg}EmbQR&cVE^af=q00){S&p=9$$_X1QIw+g==EHcYL
z+T>W)6ONs3VGyOHF!E{=Z>oc|e!A21O4jK*X=kDOdk4IdN=YyJfTkjl+elZ>Qk8-a
zx-|p)FMhW7@>O9~=?L2Cxx(kxK-IPQPji|R6+Z@itQ5x#MS+}-9C@Suv_65$C0JXc
zzPpd3;xabH3%ug_OZDr<tEgxNQ}YXA34ow)IWr5I(T;G6%A-RCepUGEn@wF#{XhhF
z;;$JcX;G(HQ<uSxA1bGvzl$ZDaSdl}RaaI>0UfR>CmuH;<sYnnAfjp@YJF6?5E)j{
zRIbA_@h@|Qv}=87@bN2h)8b7W?g%@}#7wh6QQ@O%L-+j70bAv42_+;u+-rlBwNc`^
zLIs4o-i1Qi!SCxN>!FjtjQg2bgYOMjMILV{7Vbv=Q=_^vPIT^AP{+!8kAN54T}kfK
zr#i#%Vo0SZqTk+)`YRX}_MQ0_DJ`zcXna5w*0M!F$Q@eXu3S}asy_yH+kNEPm{zgb
z!g<(=#6H>*CKC^}S}oaviC)Dk``r*qZdVPYT=C6A4(5mbJhnM=1niqW1%E+CVi)>f
z)g=skeoe&OzrVAJ1jc@yoOq|7{@C5(R|fep^lREq91)ERziRJymBq(uNn4wsPts8v
z4%a}?6uS#d8B|uu)riG!HL}Blp)p(Eq_%eShP5bvYHf5`z?DQvj_`+ln~Eig#hdSf
zdVOI16ZU&6NAgwIZ;A0@e6x;L*XyQ^%`bJ(!SyKUhsQwjK&!_NNkfX57Dd3B!^dR5
z0T~OAU^yWzBd#Cl-MZ=1UheZ`ff3EylH0?WR?eDsN~l?k>}zS4&cW<BJXqY{x?w0*
zd#oJiY*POn>i}DO&kD*e3Q+3c42<N>6%+WV2(l%kw(w2lD&?|KCPVkj@9BkWGfoVe
z8U0rOr^Zx0LfUIhl<NseYcRj6u{Vq{O%}Fxr{Z}+P3+4KgS|sY)Y?PtTRoz91~mPB
zIO@v8sizWZGGC!<R@d%DC#2Lp{>DYq8ePt%LHt*6Dz~|dbVV#!*y&u5&{AE2?!>cX
ziRBW9s(N^ZajXi}&^N=|2|b0Z4<pBQ>zrP8$k`xYidPj>>?;`nSZ7LZX}Tw~f^+Z;
zHCfmTe-uhVtgu^YK)C6s*8d}Atakj>U%f<#eQrokUtxofT%7J`iD{W#KJK`cT(I-y
zO~1f!Vw;j3ok+MM{Jq?wt?zy9q+km+mOJWWCFIeW;n)<x%NnlhA>xV`MbVx?6{&Hu
zjf&r=6)6>y^)IYt9WW|Isk9Sc?<e1Z@;N>?<`UI|gDENJv=Ju)sWRuN%XJVk{0Fb+
zlMUCY&D?2La+K(-$(F4x#F?c~;b7n(o%HPL8LKyyy1&K<Kkrn62r5S-0?87m+ex0c
zg-{4p*4rH@vkHH5T2K;ELT~nO>R<lc9?7p2*17(<z{{eLytWgH;Awd2+8c#SJ*-SH
zk*b6HWWY6dzIfkLZv(CcT7g>4dlNLgqs(#$+ylpUJn8<)ti|WO$1_5q^XBBo;wLJX
zdmqnvNdqZ=qfUwcWDnaR>Qu!+Asn1_w5fZBXcsg+XPaV`@QmMVWs`Lr+ii&O`bn>m
zysS_PYwu6HlK@dO4R#a1$(L5xkiWFoTiN*Wx_|{})mx~NVpNyxA^<gooX&WsCik7L
zBU=RwK$3KuUle5T7luk)sm?<FS>Gg@9D}7-NVtDbn67#Nw<ODS2(eeZ5J&f_FQ0~~
zL#MzBl}n_hYulsZEY1*@qOmQp`!`0oM;yeWc8VKFg!H11R&INl^a}35O*)a(IThaK
z9!hO0Tt4MR(0Ws)#+alYW`A!VcAwMbn<{?X2yy+Dzb8;Db~MGUmhoudG#d{1D)1N9
zBt4r}xVahWrdV$OhW^1|VR;u`%+#D4nYOtgQBnfbB{nTgEA+<FBB(mm>%1mtj5!sk
zvu4c{b8Qp?H+gVuZaqNc$TPKwt`CQ>jlHkk*e{An9~UgOwLeI!s0GEQ>Q^oXs<*;{
zvR0M_Sjx3jy9Hs=5K!}-ppe1UaA$*#7u$i!im)3(Z#*aR{powHk#{Xq-GaG=zKnrf
z$-1fZS7vt~f})&t-4U<b;5Gaqw9QsG1njoa+*${7oydZ=)22`KNhd_9R&3anv#i=b
zYlx9#)so^3#kk+irNO$pyd>2jq5o=WzCBR4x(K|fLT!qJEH?x=Q)HSmK6-b)alUGp
zi1=U#RGFWwJ?Do1EBxmZlbCNR2|$sMW=Eq|0_xmhlt8x*&_W+~t##Q#P<WcFAt9?>
z+Dg&_zaem>+t-=Mkh+z|)164VWp7}a$d|>AJ^EKfqpoDQPvWXtKpb86-E&<cvtSoj
zsV?)%qj6OTO^=Vq?Qjq#s99~Ubna)zYuI{gy`0c!ve+hIYz$XE!vCO}Ayyv$Nz`cZ
zTKrL)|1E<<x6=@F-+-3x&w`UVxLfm$dGa81w_^0>bUMmP<l7kmfiX2+(dJU0I^*df
zy??^fh%ZL>WH^0yhRy@e^M@<d|L-UZzR$$&5faE8Tm0}99+mQ!&#YNElh~?fz-7JS
z%x)O#s<uX*NERDwK+MHr!nyO5W?*O{A*t`j+-US{nH(k`Y$bMOfi{cQ+_Q<t3i%V;
zJl$W7U$eG*FYSA1&Q_5I@VVjPq#IC7o*x4`%Z{sUn`Y}wpSJO%^bNI=G<gtw7eh(G
z%={J#<W!OO<bFMgpJxjruuQtN#ugNSuI_YTNsmw7(d7K(DU`%jijA48dUZVcS~bBR
zuNC9jRU&E2RX<sE%kfl4r-O0rDmPX?v3|Y654yxkGdJTW<Im@wO54SCyKIRgNi&bP
zB3-k4aVx;Nv0x(*LtyV2iZMTbs0W}o?M*|k*6!U*t|Ysto9NvFYS^p;LdoI}?VXCb
z<W}r3pevOCb*I0b#3-!4<8Lmd>Mv(oaLB6(oBheaQ)h6oCbCe{Hg(o<{dsxTqS~GO
zeIl9KIRm?_B!&NB%nr>cB&q^Z;LHG$Y<2irSKG0@A6QTq4|@<@Aa3C4f7D$9ef*yL
za|t4wa=VAjRqwdR27f5<5k*gY?ZEM@y<9owEQ8mk?Fk!yg2`-P!WC!4Gz>TvB40a1
zc!@f~>}DYaCE^=`q<L$eb{QclaR9xk{{ARA$~a+Pa-cVl2<yhxU5xsME}M|*7kJgJ
zRd~GiTI82|7a)w*@-p=%mNH?p8}m$l-Rh5^5xJ68)Xq>z<7@dbSF>a3A;Pz_RSHFZ
z=UAefa9&?_30BdZrdKvQ=>xZP>%SXUlStz~NQ4LmSj@6n&(m}Xk1HM~UVlm6AbGnJ
zJwB=kmpI^yImyP?)LoyGs;Gh<J?49ewb@Lqv@JvKO2xJ>hGrXd*tWeWr&3$q`lNDr
zGv{qxC7l+92BOAmGnZ0h8?4QutV!>WeL|+cr}$a}4;CyI4O9Q)-cxOLmnpq}-gLrN
z<&rjBu%TL67vWAw58)(bW|4Fl!OId~8=3%T_<QO};%a_r(<T!gx{mgI>M(gsqs9YJ
zYA+=)!Yw2Rcx^@Rq*g_<P1G%So}i!w3=yBkvyHK}swDFAMj?S})EriCj8Q0H6g~+d
zHxkF<Wce(>RtsO_5Yh#Gi?tSNbK3MA#5JLI_Z3sTz^;A6g;PFDuwBlta%T`H$p@v*
zuFlQWBPlFX3+kMP_N~xz#?7W^nFjaxm%vD{&qkPM_ii=w)JNa3#arJ2T8B*@nYExk
zG=+9c55xlU9z&12Lx1q~^1QR8r#O~LQJYm#(%8}vqCda2`Xhxx;(2yMIcJ3MQnj33
zdHsPM>F%FzhzH*Ad*#2ks3vF8#mP8lq~M^ho7gR<zdb`ELqgwi4g`EsU7RQgx}4iA
zjl;9~@pGTm3ob}xx2a=HBml^NxI_lXwl1%v>xKk1i0Y+2pM$Dki2pT0r?f23WzRwF
zm>sxxt{TVcfWkfK)Sb~eQ0Aa5P%Y+d96XWp^CyqBqbNjlBWRSp(cFHg8}HOZ(+o!`
zL|PfOd@(-)|8fvNUmvVBjHEZ+lnNQEiD3L1ySe&a_FEonU~0>iKsE?IUNinN+4?8M
z;tu$&x%n8EL=3K+TMxzx7l92u)&}AU7{V_O2&lGe$b8(`guplWvB44Nmjsmn6?wK!
z>%0#&sx}{F1%ywFJ|Yx}65l{}mW7ifOb(4f7;K}GdT(#yw-S`qE-QLpi5EysVMd9Y
zO}TBH&~QaP_Nm_AT*W)sBW*2=LMW10?(x<yZa7A9Z|bvf@#Imh7;P3y6ocF+!mc6Y
z#Fwl6&V(e{JOhqNb_uDVt5NhjYVSTPi_JMp;XSXT5D_Fbj=d6fg+&t;0dq$oMy<5f
zuCk_DFu+D?j+~9q;e{DLJl|m?xXo}omhf>UcCSY?j~y@(VkDLXs=UspFMg)lglunu
zi9VvhqQH#SF|DUE|I)XKGcN<HKnjdrIeSIUUteBG(Lz$eNhb^x)y4VwYYMrg*U6bT
zgPaPDe+4>$_%QDpMmP30WTEo`?dAUBEQhjuoWOONJ9f|Rmo$OUogtS(aHYB;Z0C4T
zTSHaH)m>Zm!)Op?#QF|w<M`<CsC?O=NTUOJEwV4&r{K`SFxrtBBPO&K6G-c*(zKH^
zg<~9@nA2Qy&JOF{u3Zg6ThvDGp0=cH>c_B%uU}jD-jiFKNyqd8PRPv|Onx56NE8+s
z^5zfPd`j;!cxg@3*)VCr0`t@u$Xk93+QaaLPD^`3v4M;H<tjm=;1pPDT^^GxV}&8n
zqKEvBY{zU5^38;!TH!x)@MM-7(COPq&uA{eQgcE7n&*M3??c_B8XO)Exb}GU7W1Zy
zt$U=mC2UpZNE7>lp*W{_iw^nmSPN#aNx5s%`=TkHf?OzbMWrd!&d3}!rI*-#?%HlI
zsOF@;;q%tOyZPFV(eNx^W9Ap(Zb6~lCMOSM>+C0U(@wijv(eDO*cANjRI{P6qjSiQ
z8oTPyWSgDJApVi;kUPIOsJo(MyaI8uck1lq!yNCsdnGpS-K%v4!|Wo&CYK~F?sTkC
z!$!N_P-36drPn@7G?L?^`HtcC29>t-tO23tzxohZY&~|e@2Blp&%NYT73SaNs?9*_
zqV!A;=^at^{_Vo%$~6-XK_`axEA%B?%$84G`7+7fBLQV&qg<dP2~;h$l&{zMa)4+5
zf+M*qbCl1n@4EEmdh+)W`xm?+2+ToVKTp!x+8J8%CjIewk_t4G2Q@Dj<y;@mrCs-G
zB(U*6AfMB~M_@^pi4lW~H6g?F|Ni)9D>y(@!w2F%r5diJvd2FyrhA;HGsoLXHVYH}
zUcG6zv)&>nAWDi~k%^jD$Vl9Z5~JP<m6l9F95UAUR1mbk!WEF^#YkG1WcaE|1jXex
z$)2U3gJg(sp2TL&GAs?8G_(+2jRuP<gT|A@Jm5Og5_twbO1X-Sfrf;R^NLdS3Ceq{
zJ9^E{ekH&k=sG)TtKy<XFSb<o^L5BKAG<*iI#<xGR%dj)H2=FU3fbVnuq88hXi}fB
zC<svKQwWRozmw%(_O7ci&rSh+U)xN+;dcR?+_yU5q>J1GV&rzD8GB0T0j~MK>q=VK
zF&@Cx**TgYr&u!F$+cN(f)Zu!40$e(`lNxZjlPu*;wO^gU|QY*{^IaCaPYhvwP+_W
z6{>L-&zG{dfKLor5+)iH1Wtp8w@OZ#MI5NC^TEp=cO@Dz$bB7u`P#GH{FTp7g0&#A
za+Ue<dN8mLy)gHq$Fo%9(d5Hn;AxjjN#E>pVXYJP8ObHGc|*vRAf1M*U)`)mHbZps
zRq>GyD`S&Ht117cuHH<9B`s;+Hj#kfPEqWZ;lK;^z%FH6#nn?_${c(|{;6d5^YtyX
zhl%tS!r(qhe0e6bDJ*dyhyZZ=yHED3O56Bs$<B#E-;~d$WqBPZ2&}czsW@a!hxWx?
z)}cEgY3Zu5RrFp9@~UwJ;h%JhO~s((Ikaf^_4#v*p1TQO2rx{0PPN%)ooUwInv)@2
z#X_7bQjih<oPRjfto^6zT>BW|oegg2wW7MP=%Pi-qlCM7BBdTf;fiWJR7ZfU-4k@;
zYhD;7Yo6-iC7EMASw<CJtQ`EkPEAsb`JQ;AG@spif-Em%R+-=C`^1mC3xZX?N|x`I
zM?1i5eJuHHioIyy4A1mnlq@oPPcc^&$+akC9v^Cb$VO!bj+l_q<><I9wa6S0KILZ0
zm7Ag&kn+wgve1;cQIpYTMj#;Yh~_1WFEKUY2fmE(1daLg=+Em0JeqAMwFR9dSGN=o
z`!zbDBao!|yPPn!1DK+zkVQBA;9tk^t4)}aMK(=_Lw<?*m&*Cd_sqX_&`50vOaN@l
zD0Mw8a9?xebTv(p%~A!xjk2j(xmx<{MPDY~jr|DJYTY?M0IP&DTvmFKUMWI2EsnR!
zAX4h%tfY+b{+3F7meL?d+o%Aa@GUxGepX(#*lK!8=Wmu=Vf3airLvXsUzSbw{r`EL
zddEXg7gk{0BQB1$9l|}u4V{7n<Mdl#m&xc$xIa#+Trp4rm11LvtZIix|8!w^sNW_Q
znC-dS3LRSl>VhM!YFP&Zz|XJlA7em#0Hu!Vrp#h}wl^>^%LvKE&K?vEO@jsL_MChf
z@ccD>dFwzK>`b<f6}V*tD(e)<QOGFl`b*2%HIR5u=hj#lX(l8SwBWx<u4gSIpxuxM
z$xHpqyd#xh%`q=YZY7m9(`qy{s~tj&W8yS0VbQkvE5PlwzvZ?=LLkFN-lCdacyBQE
zao)-IN-tA^1tPo>ol9k4%Ds3-&3wHTt8u$HCPRDyv{#q?+bDEsT6_vAPd?z+<c+Q;
ztzTy+3)<z~-jUik85E0bfr&z`cO56pFZo=f>?&#I_wuj<NRMXqvWJ@34AjyGIf$?_
zg&JW$N8`Jybd~zHBMUB}dCop9+8Fv#E)K@}?E3O+L1QUTnN~uXEnEeq6TRi_(!P5(
zMC^U{uWKtbM&{DZl+(O_PkiX9eNlI6O{ova5)`?inevye+W%DSokc*Yk73KzjDLet
z)U?O;M&>7|5W*H2vvIz3T`yzr5Quqgit^SjEV1|!^@A$WOl>$O$iku_tvr7_JIx1q
zEV4a#q~z#fbB3&QSy?UmB&G@l@W)mTw!b$JnMWybU(SC!X)|$6-&f)g?fL8+gwoqR
z;b~8~bRg;?hgqJSXj$X$@^q^$4*)lSUIBj_4qw_F9ia!#zl_BRZ`7$miU@t0bK<cm
zO*ZLSHsv~fouDZ)$l}kaP|4l@U5e7C?NTX7684Dw@DpI|hVjHfK74Tap+Xyqtd*~}
z_v#4_+01$aR?=G*FspY+D=Y-LOGR(e-Cuo1;ZrFvF2`9i2|WQ~1nB&iHnfKQneRq&
zg34^7kgmL4syUxkr&II&btWFRY$@O+!aT#@mX)k?D{NUM{lRbSkzZuny}2g5rPpi6
zk?ZxVL^Q({JZJ7L@L)#@|AiJ08MUsH|9ZR(^y0(BvJQ1E<rSd??j3Eh9xIdvRY`5O
z-yB-Q&iCwp^aIxqc<i+e;|?u<9uCU5WxU39)mD`3ZD)l1w?0zZ?jDx0*<1tETb?l+
zZO$cTs!3NM^AzVi@n0Pn_jWs}oeZF8OqN(i@MIpUfo>QJ#q3l-0rb|FQn2P;YgC@>
zR*o1GorN7$`y5t}nXo$}OH~gOHPlM`u~>Z5dtiXF6J?#%a=m1W_X~OD$n*u(l@KA~
zsH?y!BD~wQV6g`9nMe!q_GGZp*$#*i0dwFh&8|Iig<8N!+kQEh_5Eo(+hA&^9v=x@
zxDYcce4%ICF>ebw84P*qH^U99?UP>P;7)g7qAS*UexWXP$9&PTcRSRC7+Tt^^NUij
zSs!lY)Q^?;pCj^i|A5fY?a@$k5&m1FO{oXsI4|JTl~m{a*FKgZXKx3&F&74S0KosK
zQ$-ecdb2zNC;u#o*IUSmqIHtAbR><@Vo3J0f;=GTmdejPv=7zw$i;U{^hc-6Eevk}
zB395(7G-=CCFDy#8Hh~&7l%SdDjg8#A%)71YH37O3J{B|onG9V_Cb0{9F$XO9yky!
zcR#0szM(StMx7Sk1bEH4DQaMx_GIRw$CL$AJ@%JLy|u{9?`#$9Q_`2wv(VPc{QK-=
z1F_#fZlpq;{Lt>xQ%nU%z}z4O=82~i0P?CQ@Ex!+SDSMd;$wXQw!2kUmz;6T=J$w-
zWh{jmIwqfPeMd4FMu*dL*|GPwqdEI{))GtQ{|vwZ&6`vG;?{#a2n505k|N!zDBX}O
z7`ec0h-X$fRuTxi!Zn^Vz0l%N_d9aA@+Ro`IWmd)*k!LBURZzhsA4)1I1U6kA@tw>
z=C{vKHQ6qqoLGewrT6Vj5J~#afHXrn{->gKQTvWMEkaUKivG^*^Hb2dPqKN3zm3M8
z-I#;i8W<v)xZNHe2)h5<OrU+zLvFfy;;_q1P!?nMI_5)hpy~>jwe8Pz>3fj9QT`|-
zFh@`O5w;Y|Vou!=zo^J>u4JP^^I0+MTh27zdUN#ceBksIde&LK&yTKu36A(fr`FQl
zAE3nA%5!1bulCb^_n`u2tdq*bo!z8WA-O$z%yKzHqb#};gMQrnp;#*+Yq~ppf<acA
zXEe#{N<-OhBhEJNQP-u>u12OiOTE8OtMsM#Z@xTgUVDF8Cl1!6>WSbwL+tC|w8(I;
z0h}6jE##WHu5XtdiIH_b)m<J}-hM|Lo7B2%8dKyadnb{yyo%FC1Z+~2zO#p@<T&$1
zFV7`BO2juUy`Nh>);YQiYNqTawX@qev*#`G#yg^}$*B4eu8NeyQ01SjHwD;t>?U*V
zTV6Ugozo{m18qivu+p9$Qrd~hjdIbahNY~nsX&EM<Yam81yW%@&<)uY_=eHz^}6>2
z>Fm>QzRX=}9C@fj^%-5t?K6(B4s{(+KQrnInfw<#&E1t?VJ|bPmoycMj*c;IQ*o$7
zW;Ptn2YU3?QLE6}VIgwLk;ju%pmrNcY6ats<X8BT``pow;=7`EHw@bV84)s!9bzM<
zsbSBqf9+~cORfchTprnQXu2=zHBxqo3(zH1#|lIvijZ0Q3lqD8LA19qLw%^Dh*t@-
zFwf2u4Jv=G<Z5MNv**IE+%YGujCEF2ouyRz(Q5)IDNU2J)rRzWBcCU^BQ-I;(RR;}
z#IThSr=5@7UI{{VC;VK=lTky0)QL6Fsm3k5{cm$rpr+^mWVoroEU}`W5l}bK&RHAt
zn~AW%nr)+w0P8pD@SkHZULS7>&#7$~!`<$G{6Doxv|Wjw?Tm=1=KO=~D<ef!Xw*Bq
zT&F1+<#-;FH*)$3xpFrS(UYV~ra@bzq@vxO)IOc+l((-&gv_DIewW`yrd8S^+Yh7~
zS<HI8@m6-}2E&w_rTr2p@_KJ+k=`nqNWKbv3}s%)zRtsD;>fn>kptzDSa`D<`lKp}
zZ4@+MKXX_#HO*kYgBgth_`J@Wf+ZxbH9f9Pa=Oo=_8NTi%QWSpal$JiCxqS_E2chA
z-C<QH(7qn|)Xd6<CAkj8WzT;@TSFDP$TyXdUlA%+G`!e6=s%&}^&+9c{zrI|GJaOI
z`OhB`Crj-^@f0=?_b0Yt5d-w`ZniFx*?FA_!}$>ef{fjJSHjdj_S1#-QnpfYtsHiT
zT-_75bLL3mCSHp64m`XKwEVGrasG_2mf^V5CGla1uOuaTn^YVlIjV<2$A_z$IC@I?
zjoW1^|Gg0kAxqc;yfks8x&1Sw(jrt@T;`lATvwWx$I+S6X0MS0&wk2KKm2^IMD7^L
z%jQhPEp^a*YbLzL0MV6c<U62CBK*sENJ}k_e;x-|bJZCCz&oRq!-ZpSGm^4d`(fr=
z0C(EE<$()DZmT<k>0$$Um-#3)=9Doy_LcR_)UxvTno(pU(w|5s8n@V#4+B|s%0({I
zOb(9(2*@8CH;SX;@T0TEPITWd(ZWxcXo#*8b0(m1G$#5$<ASu(XHkxmkvSH6ujHYX
zL)&OK9`i2}K6js&_#03B(-q?t-c}Ni^WHv(!^RasfkHi2NfC3@v<a_npV0%6-FTlQ
z7$tHkjhp6IXMq=MXtr6ZHo&EGQTMfE;veMhDdBJ;AW+TnjvSfZelj`ycD%R%x|4D3
zv4(A?5Q?yAih6HH|7<MG3Zg8h2lUNGl4Z2<C|W<m$<3Xr<@WhdDoNqqJfu~dEKN*b
zzD)cMf7xRRbmV7!84i54QTShIiGT=8&?4ihWQ)RGOJVSh^E@@}c*nvbDxQ7Bv@wN=
zOs4(R{o}{}Y~2PykNFQB!}(!dq!Zf$$WR-uizi6fa>2Gg_4f!}o$YD<pIzZ`h)&f7
zfoXV8fE!XZDv?83!YWIoMG6}FPl7DLB1zvAg)bq=tbd&6g1*#$1I~`<>+i<rgugG;
zuGW?l!g)M#!{_~vt6F}e4hBFQ+bj|^pKEG7vD2g&an**r#>EOGq;jC;B?!1mef?*H
zLG-kPFFc0TyMJ)E;Sobd>^q<koUXO#BRp=;CgB#(f>IZ=yW%Mg$tn7vD=X<ScL{*%
zwv5ezevN+G5&gsrqZu-6Lv3yT*D=2pcZw}`jstuE4IW_!JSC=|U{?_I+T7U4_^Fv~
z&Jdofo?@Pa8%HMi9~^92FdLLq45l5afIvIg0ZigKIdcIqcOUIb@>)4SMgNR_SZkci
zQ;BG|YN3#vAiUl=G<j=1@&yUlpPhGoH7)Dr`qNh#A#*>_WD~`TDoDqi_eBIui90qJ
z1~V$uOM)oFB^~{OZTXrzXfvE>7R%Z<C(W}9d;|kiE5~HfNzUPX3!nD#R8!P-7S@Fh
zj;qU6ro8>9+|nyL&WEZ8<3ns%LOon=ocod8yp-FcvL6_zbU!~rqA*dY#(`9BNuH^Q
zva+;}<F&R8B5P13^gD)e#2x_E8-<am_~-I@o5kUo{nS!)&gH=CBQ9&Hk1yJbynbnw
zW0QFC9R`e}zM^8*Q6zWf(#iB+6+W9iE}9C=(~ZrsHqL!)e2;Cts<^~!R6xfmSX_tW
zDlMcICr;G5y`b22I(s)SD4R9~gX+jxw25a6nmdfLGPGq3{^%`IOO;yT;geGA3$zJ|
zh~KNd_*cd7K1-AUpn&?a7;pDELI!#z(_NZ3mtsJx>kmp=zjfxT9&b+<&FPNK4&y~*
z+;R$XMB+^AljEx988MB^X7(7bnrcsC`?jRO8ftI7$vsLtYVh5U<8wU<K~>=SPyuIU
z+Okt?c~?jKJ}8Y;VvmtV6lwX0V=>q}2>cx}>vSxefZ{_?=K}ap;1IFVQ+`n%#Kftl
zWXQ?4uAtwk1J_N;Tgw~x@`~uEGH%0xD2$^y5MKo~kpox#5;b@r?TXS8jN;G|IIQ5=
zz<*2Etwe#5CplB^C=%@e>LJ>BSrJ`KNdu*)13LrY2&KG1A&eP2D__(}#?OX^oggla
zyrI*zcHY?=)1O`yc)oVDhTE_HACvlnc?+=Kwd(rP8_Fa{A==cU>+F|;R;44GVfk0`
z*+7-jZB;V2H42!pPZMBq1g7b)3SsxG4?17#>sv6K23+EOiuC=eBo(gFr|@m@dy|p8
zXLKyI7`Y_znM(O7;Gt@dAUGGJ_`yomsdv+1f>StOR8UKWE|d9b`@bJl)@P2xeeWj4
z#5d25@`ZzfKpz2lb^KbNV+xKXZHtp8EsNGTy(Qd=?{sCQFB5$i+MRIY4;PXuUxn($
zMI6oFe_U>P4e+f!xS@#|i88f`wNdtUrHZWghcPTp+gmr@*o=guy#Ao&N86(H?Kq7W
zJtYUG`z?9&#l@psZ)8|ZX+G;Y{T$tF)r9zs4L)Mb4#bQNCvI^m=F(d&fm@0<{0T;^
zGI9RL8fKaFq`=(?;rA-a4++cva5bG9-YV}iL0Xill}c<yKbY+I#WNo9?N)!a3Kk0d
zSJTGPj!VF5EC>w+{6PJfJyi!abUBBCaKdPB9qsxfr<Zqh=h2N_eQSP!hwl#>1d<LE
zx`|_RWl58XQVYWMF$NBD6X^4-%+INpV?OhSV#Yt9ebW6zsVq+7Gkc|LR;-_!p)FXG
z>67cF{ojRT53i%G^#e)I8EK4kPQx1sEmL1YTeh~gCNnmE^3CP{2b$uf^+`#(rY*%N
zLU%sJr(6BsQLZ3+CNX+A<M?hnG(~w<lKbN8y;ibhyxgqKE<;0GbFc4<8$g}G<R)~W
z;PSdvl^(~Q@&1`{l1EYHr1mEWE-uuYF!N#xJ|T1dA*@uB^i!CS?#5R66k2O@SqUn=
z@*Wiioj)){h;UT0Pc_aQA!k}1)M~{9*)`0>6$eQUGf@-z?RHi{*_vx?-itq_A`jaV
zwmoSRj#K&XhdO->H@&kdxkeTo3|4hW=H2oCrCBzC@9jbIG|3!XxD>DF#RLiM6og+!
z>Sb;JVmmRF98CSj>PHQ#44QpC^GR^02Zu(VP+-wFr{I2~m)^bblAWGQ#I!cC{z7kM
zCk+6jK1qiY1ZRP8xq0zrg`F#k5=@g?3zH7QNc!8B=%Vn(wtu}Hczr%Z%Wr=aXp0RC
zfvL`n?YP){T<`Ygg@L|sVqq!G2A<p7YTX|_1PwN765^bL!GJ&3EzNqY_3lO?&*C}i
zn3j`Y0>&TCg3f6r`4W^=Zi2jBp2qDqZh*XQL9X>}lPOBq`VB$KHmoqi?Rm_{q=Ajv
zBfGVeh`mQuN&_X<ea{5N=3Zrzz;~)#-9kGrjIAuf#8<z<%#UpAuXC}&qUbLAVpCe0
z9RduGO|Yu=g>_}H$)O!oM|iQ7S5JvWKDx%~2hWsKxto8?uP&L{c_fWWhL!?O`wWzb
z8Sn?s8@^>^mzrEvo_k@YG$yZK@lh&U!nO77fj@2xH_fs@?O%f>IBb&_0PwA3ZQaT^
z>1TvvGOwRr%P8K?2LQE(RX;+39wW%s9cQXtqg+i^8O<>7g!(2FP=UJM=oQn?No_jd
znwG&{>$;uqpkd{0XZqmsE7{I;aD-V$-i>Vd$sehMp>y61tN1-56K>j8!&E&IyAZTD
zpSMyBG%9mt2PZ_=RDC4*W&uM(c3G2}hfa53m7W8~bDX;z7F(_4OYjsAb}Kqq4vOKM
zr+dY}9y=yCR^Lm{{s18Lh4MVl!BsnjHjJHUBfML)d)}sJ5p5hwg=3amP8!7ujyYL@
zBf_4;DI4Zw>~foYhot#3K^BNvumt$=WLscOnxsetbwOR?H|)!>yKKbVva`$Qy|{O#
z*kzplZ?H&brc|Qc4J7&KOQ8Ya5;?#BVOk%0U7luvBWP)r9e!RUNQcrzlCecimsPwN
zwA{^nKZ>g~>#)2sq1}qOv<kHOs-DEv(b^CXQ@gPZmDZty2l<W!3#E~Ss^|-ZqM9ZV
ziT$wVcO{7(P?Pi#2IJj?pxxjNPpGeg#G*jL_l9JLQXGw6a2XPiDWFHeRA9YU!p25L
zG+(NbN-32}7-1Z@<;alP94XCk&{P>WrA-$($VycQ@x`TT%?BykD@oXiUu=X$VblF=
zB!*BptrYHSdGF-cl$ZzZn1@<{6vN{R!WMd=Vg~xe*-}Z;wTm?+(jsgAG8Qc9>v_31
z{qI+?L}(*Crnk?jishDrm*DOXceiK8)~pbbyIGxu#eB23r<NUHjC4ugUAlWI>CP_I
zT$YK~sY@S{avt5kq*tRiwQHji>-9c~q#<qhBFdLi{;IR8n4V*+02AXKY{N)+<tV1(
z#b9r&cJ#=>(EsIj`1XwDdN3uvKA8=#BVK9ZN7jU7_7~Aw7F=InoD<7SCWhGuZyn#Z
zM>`8?<~`Cc^)3$>@yj#CUNX6!#Ou#rltr<cPjxffdr7`qzpYs*PuV!;dB9E@cAin5
zILob#hWwRyJJLq?GGNX56O7msmHA2F?!%JJoaWoDOj-A+&mtE~R-FU!3;A`4Uvw9f
zM$aC~Gd`DYOPDu&!q2yP@J+j~*DX!4P=9pY<uvzBR7KB<I^O<kqI;^ZF;}3^Fiq_>
zJdxsP++B{tCkEl`EU?ZP)O3c=4oN}Gv;ukvuV&EB8=*2AH@PeRyJ22KOS;g}7j@5>
zYe)u(Lss#JagjbpE4#@<FXn2uoE6cju^vg&w3hb%oK|dSbs4JuTzi#)c#oAzA~>DU
zKRTEJhxw%Wi%wl2!!OZge|A~Cw#Lv}bvdpgGQOd~P!EJ+=%m=%rKoLV29cBmp`v8_
zZE<~J!K)}N*#kkM#C%JatSoH5jbd9lJPo@NgPAw2m!?jpHKepha&P>z0pGNZUq1gi
zH~8h+>axEeNicTknqGfz(T4*DD%_%`_Nx?3^)xmfrr*TOeSKw;l5XlJ5bWBDWeEs*
zDnA4pkS4o6H6-(wr#u-B)z{{CT08x>yHAJo!CVFA_%blRc~1kj*rcg1=y8ruOLjIf
z#K?}MB`SYCh{qf#b?tKAGKL=Cnd%L4h$NSJf8J!WTE7DTIM{5Y-`d@-JB_k+)1DXf
zcnxeETbq=%r+JwTAlG}<v7eoCHUY=zqOZT$l0%r?Np||)Z5QA@HQ6&x1Naf4MZpf>
zTtQ7UGA~2~O&>a9odv@$^iR(=*TWLnd2_yfvnyEbSK7ezr=Z8&-KiLx2t^Ej@l=<5
zkYB*_d_=3+6Vb7nE5yXDsN*K-+beFcOwhf+Pj+8S7dTRmv&U*ky}o7FRQ$JQ*kB=$
zW}PG?l}G(BGUrf1!x2qbmle9zr^~-)P&)D{1J9nHxP7wMsHu7WBtWj|%%x;>?t+q$
z&t6HmGC^@$?66YqXdE!)`TngfZgtKH#|ic(<&dyo9tHL`9d)jxrS}O_#g#QluI9B@
z%e#7YPgq=>e;Bi@6|Gu+sP%m~9!X-{G%3XGCwAvDA5-GfyR&JU;`N!XA5CZpqmB7i
z<77rR2O^kXGd^-#k{X*hvj0n;W@|y*W1YBVAxb^Sgg-C!%gmbq`=?8{@lOYis96)z
zKAF(Ds?R~<@!tK~pmfW5*IkZ_t`A8Kq>l@xGVbzP(>^(C1KY4%m*(3(oMLTRxm8`C
z$!Md<3@+G){3!)Inb0?OL8@xS$~-V9C%SaL-n0?>v`@+$3@?F9>HK!m-<Or%qSV5a
z5Bw8<RM5-<3vUDgvY;HIMd2u@-&kyUJ|AYEL_^?ur*;oLx7X~=PZ6uqTp)6Z@7+a&
z)FmSzSCasc$5baaZ;mHd@MYm&+ud;!@+!KUrdUD=HPwV#gK%g8yf{t-_Gpy?shQ09
zN~`S_?{=buj5z@uP@ud@{0=d5JMErUN*Zo_ySH)*PTP}e<(x-rYpvP~Y!&Fg64r5%
zhqT$5li!tHkBy_aq7Vw)?2kKwC>cF5hk&qAqElGBB;2P4bOb(fmd5#+#EWuv`YI3Y
z;8;Xr53<tF*dcpcG5<6=)2U8O?oIb#QgL*6U+;~E$&x?Dr>McN&SG~{bN_3vQ(`mq
zDDM8fxF$@g5Bvod2(hrdp0aD?l5H@qx1UuS4H!Z=2-6wjwn|wON$JUl%7o9x*SO*?
z_33n<{1M0pJS>*>VP1k+zS)A&<eCKenGeh3p=BzJOF0ru3=_tB!uinbpV#TlN@K>!
zvV*z$-AsO&d#82C2U52;S#Fz6%wLHIG^^p#=PxDK$1|iSYE>`^tYnrZ-|SS&Fh$H^
zJKX*Qce`Vtb(jy@S9LEy@j#Doffw-P-R;)wflcIx_HWerD_oak`g{(z0weX9P;u7&
z1h*A&CE@3z$0@fuKcU@TAdn9F*2DkdSR%a>&A0wL#l_OF`CN6X<CiDT3_P7rI>H5Y
zY}XlG^$SzXy*D6TV*P$7_he|^p0D(uiBF1MSvvI6l3Jb>%_bhJf9Y5>Ma76n=t#89
zdFo0fsOBKg-A_wHqkoHvL%C{Y%Rho|$F`>0O9y}UJ|Aqc;`dZ%>=+y6-0p|I3fB9S
z3QjIhE|F23FX>OV%+rr4(n2+3Es7%SK1dSWRW=-U)yj`{d7BP*kvtVViAJqCBoc^|
zQyRCOSjm*;UF`-2+kAYr?;UT)#f_^9jmCqAG>kfr9@2g@+B22{C|y1AcU`d&9@5-b
zH>deFTwB-K7?JFq#{QblV4n+b?Y&ea)>`KE+gK(!;6TY*NYV@dq{rwMvFx1u7>t45
z-2kP7tLM0x_`F;!u>(GPyAQ|L*|VdZTJ@o9m_FUg&46P+-}O?TAU_3?&8II>O|@-a
z%)94)8-ZZ#@ski&6ou6$F-;o6g(BWpdEGf?XoxhSps8?9!riN95tPF5Fy20TZ5~Rv
z!f1USVCPa>XK6feMKG6hx61)zjyk}>u?=mnt+Pf$OJ;jtATsaAQtY0()H<xXW?IPH
z(9K8)9y!Q&P<AZV9=m+}L^woe@RUh$OSV1CAR2sk03_h`OWy1u)=HI$0@#=k?a(=Y
zR}AK03Xp1fj?UYuCYH;F^ZnDV4U7DMFSCA~oglEFZ{Rhbw!Mu}MHb)jR(2~G5IPE`
z)%GApKi@TSeAOkjPBJp-TG+nK3}?tN%gu>4oCz>^Vwq=+UpOub-|(AMu?-D@;$~0J
z^@(!X0Kw`;N1_&{-#a1nC}KTRg}?MU3FIz2PgHxdzNSX@e>eY!2DS-)fJ-@wo#c@T
zyG$A4PDwzRds=!L9M;b_P_n1teCO31qmD>FZ+De^RmkD}J8Z9Zq-p%)$d=c-5(Fw?
z(ZWVkvF0fZBgzn*YaebvzhZZgvuS&i>~xSYr)!3K&|gaTbtjnqKSO8X*97<f?XOCA
zHz?iRF_iA^?$J3$k4Ne5jw#)p1GZbbMt4o=9L-SB-|O*@IOh}RdS8cgfx(kI=%=8H
zD6^Qbffm$yOObUhji$$E+>*L#r~6uHPcUyK0+8HVGz?Z{nex_q4>~9U=);IJ^AgJF
z7yMVaKIKbh(&b6=UF2qLeAL}r{QGv?xo+@iwIurToxbz`swMuyyXUHeLqvx!E8j^W
zFzEvc?4$PUERPqjy-A2WTyR%Cx8$$5Cxn-~P*r{1!6#>hB8#@1YKh0*N97<Fut;!1
z$Q^xG%a%N+0@>i<-W(=uw@v{%pn-RNfAq4}HAjlSedi03rtzhz);q0JU%QF4<nWul
zF_I!WY5gIC$E<H>2t22xeALa0=UpuJ*6Qvb=q;D`MTz>AW8U6EZA=97w5kBai*V`i
zQ>*Tfx)QOH<O$6pm<v~6OPDaz_79uYjF~=P^wyEcYtBKErwV7^5UMvbyCI$IT0#@}
zk5HMX3?5r=&K1k^$^mxcGsfEc4&sK&GS8RhJ&H&3OH6BN9At<0gxdSCM=<ktN~_RM
za_Kr*==hP}@Q~Lwv)eP<b#0xA)B0JV_9|M$VaggzyFhCvmQ+E<ptHE1!FAeFU>7;n
zYf}KN_W7rve2y0>yaB=WmM`)u8SKOz3(`1yfmSAVnjoet<P*vI^8jP7=q1fv%=$y!
z@bbxDn=SrUU;S?m<Zqtxd|hTV=VNIMJ0)wu9#KXZWX}4GyR7BNthV>}R)NX;1<l6k
zVW94jy7vXT<U(pOSmJoyTF;7?GaU`kH0Hm$yK$VF)NuY8RJX$JU`;!v8gT6%W{;zq
zK;1;TLw1RF2B#Ma4QF-FX;pS}GPvyQ?5jvBJZR4Ynt(U>t^S#Pyd-9C=s)W|l6_D4
zh?mQ;g2RtKE*;<`K(Mo_VM#n%v<(_`ILk-MNruS#qzlvJVj7<W?klHMcD5+ESATyh
zAH%JrMmT@VrelE9;Hxva+I{EF`w_T4x02x(f<YcG5jRczrkY8{-_yY-E)dh>5eJHM
z!}}{1l1-RA#r0k4tg0+?I(!K~2+sSB`DeH)Hah6UDl=auD=<8j33A=UP<@`GTP&9b
zb$x9#a%OHC#zsOY<M_ufyOZ<|3&|pj*iwmCXW1SkK%Qs0;+CeSdfgLxNGB#)ZDI(o
zeU5WYS;Il^hnlFeiAibU*4SZ}jELTT+M@XVWmqUKRAN+8CPBLZUpZIAG#*gedazQh
z;yR(HLb?NZbN!!w#zJ^V#i5jfDq4E%N7&zXumkS#@V6;JO5MRw2tub5J#1j-j#V-w
zyWG2xt$t-PZ(BfS5?Vg~JJ{YK<GTN5W32m%ChG3D`l4RK#1KC|DJT+V&RrTGtHT8V
z@Ms{!1*`;Y0Jhw1A<r3^B>eB?oTO1Km#Bje2(s!h^XEj34e{tTx0UyiAu$#0bq-b+
z<9N9=MRZSWf^XsU%WsVqn$vjY3Enc%iF+r4#?PhGZ{5dS?5z@Bj%nASOLi(PxMqer
zm3LxyjaNP|bwO$JD8aS+i(d8yz|s_Z)6z%|Io4BD@%a1?Isc@sA?GpWHU7+9cUKc_
zSb*B`u5dUyEINV8r~^_VT*$v-NuS*x_B|4$5F9IW(0U_|i=F7jBQJCcM%M8YaRyy)
z9OV&PFowHi=2j}H>TiUhSPUZH@nz`j%5)dzzT3<2>f#(+7ghxv-3Yw7kS}(5?F$tL
zrawFoyfiW80-izJun*x*vRFwGq+TrFy+%61{g{hV21<fc#^-9DR-N(jV7d(^;{NYM
zDu!$hreP<43h=*b#J*98daak4*z#xmer&lm)s^TGR}d8}#S1!YDr!<X760;|9KnnL
zy7GZ+)VvgL2W;&P$|z4_d*`VL_9^=rrM_0zQ=S*8F2n+YDhuID&g1cEDDk3*PU%X&
zqeDER6GdBRAcZ|;Prs3OcPm#NT_{}kc4c-1<8ttHYz(3_I;$<*@5e{b7fCWD6HAMl
z@BQnl(w2hX1M0$^m&3)GCkJ$dS=cVA>S7d1h3O<r;ezt(FLx5Z(vlOppPRCRbxh<S
z$8kC$wqNz8*Zf%r5>`-K#ihh8;*i*HqZ;M2*%m$ZnrsZ&86dF_sJx;ZnE|}WWLj?p
zw~t|e939?sHhF!jd?N&m*D!Dod0%C=)ivB+0MODN_q5QmmoKg2hSHG#CQFZnV&n(h
zu=SnN$&Do;8SBOe_C8orKBfy|WHUiwkzceMu42CGr90oU(J$AH!@|G95gXP!n|f=7
z0jdc4J=<N<>FG<8)ZGe$&Ol|GQx4%J{i@j=p~28_c0LrIZ<qx7QSCS8)T@49D1`CK
zMtZv-8_bqDL;dYbOmoUIz|!}h!nL|v?weY1BNNfu<BXP1zy0L?&S;m=p5u5eGp!3_
zO;n$3lGfp-dl}n3){ECwE#1C<_}lWI#V%ThZd$Q?2hZQa<PHV;;SKMd83t=d>I<Z}
zbzij@5HHzHbvmCb!)`pji%$#dNJK`cTPwZAhpGS<pVzzUtbn1i6%9DvQ2sC&$Z51K
zEh`^dT*q`TE@{QuMi(o&LFA?y+<DFj$4wH)yH?$J@7Bofbvv!r$%mSf=9`M>?YM(9
zTU*R4Q7a^qPj-Yu!;Sl`8{Jzeyv+;pJjBsGX~%kpFsz~n+8%!SVN=62E#vcafc)Bc
z2^Dc+o$tU5AovMOUGA!uEnoO8Vy;JIp4pz&BYH4kCK4jv<4Lp^;xAp9<Ab@?DtG<}
zws3wsw~0I3`d|McjQGAhXmWX_kJsMt)^0mb4&!=wj&?y&SxfNgs;wLUx)lK!od!&i
zw3gn__NBjOfaYX@BuACw>|D!afN*fBKS9%{j+)pZLnKyxl#ses2Bx3s`b-9C&vyQj
z0`R2q77t1gaG}eo59RJ1-}gJ8BEMTB)G6s96m~FXPJS23v_N2uH>xKwf<MQ_Fv^s`
zQu5FKNfQ%2RRAnQI|NkFSp-I$vAb6xul6*PXsFs$rgiC(D_nKcK|=Hao3FBSX?3Hk
zunqSfgG3X;oeS|MHf(|DmDT>iiHxk8G)W_;UTM91Escu2pi)7sV!XSPw^4f^?(qDX
ze%|Tv3`Q{lE}?)nA~VMphEV5?BO1wLGIOJ9k+j1FRUW@KkssQTtKkVeC=F@rp<ks+
zJA+iu3W5SMp|z07jZ9wtaVfU0p+7-(5gG-qf}B02;!0KYW7=&KYWQE78bJb6in_17
zr8nHqI<k;RQo+cA%<zbi1x_=Ze4zsi<d0nC$H=||mBCVt)aS_9=L9m<LAbB23q?dA
z#?CLO6zv{Zt!9B0FRW!^+<k}lY4ku?kMAAsm!13G1AiO0j{s)_$8D&e&k115fz=?=
zTn};G-6^T!nz~6(mVtVFu3ecFUA|(vF>3yO*m@b89@$#d6w0_$R^nJDOqoh}&|V@G
z6-BibbLK4{9IW`X`%!}wr(G*{#W?h;P2j}5qwV|2Pg4hQ4r8|x1j_h%zrj@N7?0z=
zHTJBVvJF&eMnTzoIj|n&-SPE()edacz!g=?TJxJkQrkO99jjufWkPC#zoFHItezoc
zV!J(p|3;wKSA3{GH4u_1-#8F+M_<O~*Cx5xxXAOHY!Z9ZZpulaT<Al|SA|p9g;zAM
zgMA8O!8dh+YMnuOkkKrBwVBV{3OBT8A9_`V_HYVoPxoN&{Syyn&U{BVBM4PcLH65m
zc_r$dA`m_@+2Ss0w(FFMY1E~Xt^|J44Y-LMj_ACrp-=3SRqKa%7(Me8GjSQfY<n{6
ztfa}s{>H9|r9krQj%Tv>L{;?l+|t}O@NLMvV@}WA$gHUTw3#?<AM?ZR-lZ99JW8RJ
z<b@SPARCIhv<0P=xVX<o736ne&2hR<dDo(PX1o9OurLRWNOGi(;hu+*&4-4*t}gU`
z3wkA={~diYyHfmDDm*B|__3&PYdM9XUHX%SEJv`1;J-kEU|>Q`Rb@5Bx0X4-9~K_V
zFAqFif#Zk)8gcl40mv$;>-^(jy_~Bjb-y8@;D<E})0cWr)v3*1=nlP~+le!`JtLZ7
z6ePmnv-y94Z?4ohoNYxMM^WvM52&MbUMv>j&a3_xiXZTxosg&Q@L{}4_PT<EUu*c2
zn;|F=!G;j&RDzn<+a4nL1zfcO7I&(a*7c>juyKEOpM>*$q2`hU)$_8~*@HUgjSXld
zo!KC4ZBi0So7T<5CRDD>;*~5rOW`k<E+b9y%=Rdsh(9hB*{l#ZmC)ZnUsjV;z#WS|
zlK8c+u@b=BJA2qP^eUNfDl#Hp>1G{0&UPN}Uz|GzWK|2i?IXHjNx!{SZ&Q&Uk>9TT
zrTD8E^mc-s0CGl4hd-GKU@V&7v+i|Dr|p-|S8(04UyN$jFq6peh`@yJ$bSJe2K0wv
z-$#CK2@+_2WVBF5XoPmpN_?_H^2QwOWLT}_#mPkca^0BT-oF+%aQ;XK62d09Rm4)v
zPqyldUPHwGV;w(%==paRcBVg4#YozYg1(9Bq<4+)@%PG2GD`Lt1_s(OIpxbjZR2qm
zt+T4GL`?1;NXSst<Z>uq3(L<PM#Pv`Fm<91##Lsv-I`f;Qg(6mhZnLU)+FlHP^M19
zN;im;l46%HiONUDpyhygNBda+Z$$43mI~1O5Ta6-!K+FBt)-rDYnodfu6NhtE*_UC
zEmgNbuo`Mse2mj$pM1p5Ig17~xiro4WT{+#KP(*aAN;{7aZjE`a4qX!Y7;|G(8p-0
zd_m41Plx-ye(U&rJlv#K=^7vkcb<^1dt1pK=%6C>KcQX5h{jX8EP(PI^DzLVJ$GIr
z5;TCxjC8UMO03SEw0|$=IP2_@HaMOsuPQJNyJ}r8J)eX*cxQV}vQxDcI*an=KjIFn
z=+q?5Hw&>2jtNvwx-UWXRH=*J`+e=sGS-=EekD%wb55N;v}!nc7bZ{@bn&?6+ASG6
z7JID3Od)}3VW^r6?0%Lvs5M*`hCoI387*g&_{A-gl^WSCcdIxxBHt@u;MiVzB1!zc
z=Qi-_jn+US^0GmLW^$}|yZbECurJ2fsR6nt^oN_B1*i78UF#7Tf`V82MQ8R+Gj^Ga
z>eL{{E6W$mmtArp^IN9FCKhIy;MqYv6Qt}wcOEN2!;+m0xEUMEW0Xfqw4q*r_u`8s
zP#}rYBCON!N~uqRyEsMfx+#hd1Mcgci)d@}{5?<KK?EQekzpPLFLc-5scaL)Zsu77
z@~dr%L2|8rHeJnka$3U(6&dZhQXu2t4P{qQ(B~)p<8vVodqAXy!a~S%*y~vpM<R@C
zqC{zce={9}Kb2q~<Sq2<mt>>TumzpP&+$#HCH&x{?_dyeu4iPy9Q=S}VVgF8BrBQ0
zA<}n{&UXH&A}=yD+!Be(Re!eXX^~DFm8J)X*jYV)xTw8qS<85iEjt(!2Qg`*CsTyk
z8P`O`2hv17JbpBias+~Z6tpyth%&F$=i%Vukfma=dL3?8fGGj{9brM16ofEhZBr(j
z3fp-|%C2ZxnY5@z&u=+@+k-4c)9YF{WlF4LnvRegpLQ+T)Hi}a`Jw<tudiCvY(+W~
zz&c#A*^8?ZaV$AAY3bF0NK>LQMk@&r;U1vE3_m%buD0qqb*iRt(cY3=%?L<*dtM9`
zUzI2|k<NwhK2hup2^{GvsiWfyp|S$Xfr1lv74Eh3`Mj{~Hr_TNLGAVPn8!>p@09Ao
zUYq7g$Ru}d_7F-!@U`=Rv9G7eRZ3FN=R=^%ibMFL2{JX9<=$yee{KL&r%>U6`7c;S
zTT<3c?vB%UphApcV<DyQ@WaTty3g-NwcY%5QZy{iy*%c{DXiN!OGvK&-=N^!wN=tR
zm`G6ho}XQ@`L3VG7BV;tKb&iH2sNoWRq!CXIDbP2NkAuNMn*U|h-pU96@>x{rP&|i
zaz$Atwtf|IhS~L%oUw3r49{tc+)Kh0L;hazj0N%yk&?${%sK}KW{r>lvm1Z9S{2u2
zjMo^Lf2|y<=@`*7rsQ_B>Pi^-bhiGd19Rg`d~*5ri5@Q0B{^@$UgR4CY#x6PkS!z;
zYzT3!TaUoUzOzHjlfB9$^MfmNX>Q`p7TaWAJkQdZ5>*RnJ1#NkHcd~la^wJ=RUV4r
zgw=fdOPM!|RBC*)qRNCsk79w6!O8|xEY81kbAzi{yy_P<W|um*po3Opxr<Nn7aU1s
zDOu_|BabMn7U#;z8$Zq;_-f9~yrtyH6z>dA)}07mN>V)7qdcSlNh|eNp_G4{bn8ff
zZd{-Ir`PS#(5S4A{;kZAq_QPe3p4+K70JWd*-5KFH+Lx=W$)6hz;5=)D-+RR4EsdQ
zwcQrunAYed@fD%r6JKyEg=!Lib0_g?V9DVpkOTWge6?Xa@DD<<3~BW~E>@lOusSlq
z@s3+9*kql07}5tGfO2FiFh{S8Oagsa2O`}m3w26N*f>LvdjI-+iR<#6s?9$$ADXv_
z&6P|i_NbG0!<}iORG<GXh<!HJx!&W-;|PwJ6l)faqtKOV!!1S*LYg5&eqA`%Xh83`
z>s}cRu=Xb0Z8~4HSvv^F<?!>RNJi02N0Nz+w*e5Rgu`nf_@D56wFfbBlFsxM#q!Cw
z7@+Ng^drC46#<EC!Z*iK;pWnB^R0WaUS^xVSsf;5q&+w=2JamXuCspIYQxn~{1LBx
zZxZl0&qH5)GK)D4t-2h^d3E~1uZ!Fa4X(V?<zVLD(HuiK*-r(Vxs2JX$T_f{r-l%w
zUf(CX#FDl&j#r^>=e%VBFHa8^D-q(3Haq_9O+|U&@=O%c_gRs~HyWP+0Q^T~k0~g5
z598!*L!Unu573~x$CH6SPZyj9f|0AZEOV}?P?wsNqIi*X=G~cH1CgoIlmQ1AHur4z
zhw#xA53SElGe;5r@45OS%h>i&DQiRxU~kinJ!JSFq>QQ}Q?HNGUoU#|+zqUz(Lqtc
zkDmUPykb1(rWM_>^Bci*Y-gQUb0wNZXL9EnAQD9|p!PUa?z?)h|7N27r7`=!A^WzQ
zcdKOArs!KWDpgr+e&;zCn#)l3o*w&aeRcWxmWy#laoQ@SLH4a;K3|PB<bgc)iIJQ6
zR9{Ul3vAbNa6<4@x$~!1#MD&9?P$%Qb5ca>S%OF>>$?cWCJlG~T4rPR%-TRJl5`h7
zMx0{=_6H+(VuGErkZTGqr7S8J)z#4nzXy8Sq8?GjP)n!^%zuaj=8J&iVV(tkc7+Jx
zM%(9oB-dWdhAR4C6VRe|zEz9-YE?%zn90tx2W<^JLzoxS!$}6`!l97V^zK)#4I2;4
zhCGd(US<I^?(Dm*uB>p6<!6FY53>-wL!4>80mh0IJ1DUHRAiFst3a}*HYG$6zUq<i
zU+HeD0v)CT<W=8oFmPEL<cXWijF=?#B((WL-fq`~svE?1A{Yz)L8o>$B2+KrpXuzC
z|NS?i!nrx{U`%KJfQm}#N>qQKjzqv)o_($G^YzmeUv*SjtKkjnIDEFw5&46NFjwn8
zRTD-6T-uVsj{QZK{JQI{12DGUD942S9K%F;mk9F|i4CMw_5?vr=}yN?j8(w0SIR<N
z)P7E*p*w4%w}MD*C?8!&YQ|Y?wX~uy?=On`z0daOmZZ)(_KQ;ey>GlydDGnJ*ag+d
z)O8rAQ8<X0d7*ga{%qx4Ci3Q6vmpO)wH2xHfv|OCbSxCo3lFKVY$S{AEkA5XM4r>n
zyvliQYi6mF-4T6yl{%n>n4LEsiQW@A{`AWerr{3nYM@iOm<kjys<4a^6}%LpjS_^?
zQ8F=2dp*Fl13UThB*GN8BMZ0A;V7@^q2V1-6|AlQ1Qos)_4@tS6C0zKA}gCtso+cp
zp=np0gEOP<g|S65uD<IRS-n`4(vUen)zXK#SSJqsUpM_inl?kYAD~{LSJ?yc44mIF
z%9Vm7-BqpdvVU+R%yQMPReo-lj~I>Od}#P;#cyde*(}P&>$yPY0G@qX>;)DmL{C+W
z#))N?*)8(kw8*%VH&VyNkhC(kUZ?A9q+t4>ZA+k<HBZ35U`9L!Xf#TXQAZ{BVnrh^
zxuh*Ic7*H7hXYT)Q*P?M%FesysKvUH{IfNOdL#>#R^iD7f?UfrRWPrP-f=zu5m1Ut
z=+iHi?UYPkkC*)HTQ>m<^hX-*XPd-k#NyOL7{XH~ob1;0l}iGi?7ydt>yR-3q2G;P
zCl4SqieCt2Oi166PQ_wcWEA|}RJ?sat-N}c4)y$~LPK9{c@cVQnSidRy@wG?K4{lF
z?Xt@g4>jDrV8lEn^oEENEnJDFe9P(kL`(5)?;!<@Tq;JG{}EV$ZXp;<3Oo8hkTOS)
zr$)lKE~DfPzMQf5pe1%`|Ey1seZZmK>v{C}-eoU72}%E~jKv3`=b{mvr;73$l0d(H
zH(0r~>>a!fs4<ofbAAnpth}E@6zf|C9@0T{_L(|euU^4VLyU%7MPdjqK9j<gQ=V!2
z{a3r$2a#X5<#ysA){+;WFh;GfeSJS+C;1T3;aQ^EUwQrTe+g0%{$e}NV<(Mmyq#8f
zG1Z{zNg9Fe3O$qIw-87+2~*4%?{RXiHM-ZdnwyY@E=qL3KXv*FPOU13ozXE_2<-mz
z5QHl=dNE|CTbmh1{ao%k-BdPTr42c0bjezlI+503CoFV{(X)?O%u-PEO${d3-akE6
z08A_tUmGyBXCKImO1AFpgCFfnV50F~8Pv&r_R_oIRY8g8)XK`@WjvyD?4^4%)rsK)
zW#0nh)RDyej(N6()N||ihu%XjXw%KPrci98Mv%KIP2>lUa;_!52$nt92)^~ld$%yZ
ziNT`5@Fe34e5vEJzth%*MV>SY8y&D?gg9<`=_RUsiSy@@XU`kDCepDUDPAHwU9#UR
zk^Fx9aRYeO;!ypF|J&CmN^e$KKf3(f9B~L`7f|7&i7%Y~lo`T$P2qVZKNs1-<GXr1
ztJDVm`X0(dIL<%VUAs=gma$ieSlIzJca^sb_OdSjdzLZ)@Kc8NXOiVhHNoBW6>TUY
zyIiA^sb&$8x%eXd<v7K=#SB_#s2=WSa7gW&@J;@nEQ%F@GA={)%j0e{1R>6<s%}z#
zA+J#iU(!wyVCOn(HT_}wZ0$)Xf^jK(%X&A-{^>SDX~KgWc%Ye$m#Bbv?&3A*V?b^J
zZ|o})paWuXI@2sE*U7S&0SDqy_y*SEjDqL4SY^Ngh(q@98x-xapRl6YFnX5<yy;vC
zxDQ1xA4syvF@}e(4dRSb9=(xY|HCJlon7z?jPbFr)oFkaOM3Uu6ql)7vD`hCTY%!B
zI_yV7@y9wckXCu*worxbLzA@SfZ2{Amd4uy1MyC#VQa}>Q?za*gYlEow}mV=IZfd;
zCMIaw!Z>r3SofI|k&e>Nq@1Y?>b==o{6@z^lwISQ4WOs^O9nVNsX^U>?!mycZX`#A
z#N)9j?!gNBOe1k&`$aX~F92d2>J|ggs~<E`=#qPB!iAnpvOfy!7Y;^1jywU|BZ_u4
zDONdjkBX<xLxHQ9IMaJ9u|Maz%nyL?e^KlpA*K|brO#Ek(094DEgCKZAa9zti7G{!
zB0(y)9|IS_*|0B>DiOetzI;E?t3#?>io;38VO#;Pab|R&=I;oi(i3?y^|J8ZGTZVl
zW^!qhcrQ<KxKMMaCp#AoMNWYEWiL|HAFL;|RmEbUz~SyJFbF%~(0w9~%u1b`8pf2*
zueP+y*eos#ZujZg1PTODy;pk<DE>J%6YOje%2HuU*Zk@?@sAXaf*;Dcb9HrH-@j&U
zj!C=sKeQh2FZ%1;Z;M7c^ofKhv%;1>I!7Ob1S_JNm<je~lw|k0g4)~4Vz!z*zkT4%
zI9uz=hkYvRmOPy|&Mr?*g7BljX>-+wg{=R3lkilao?Q<71tq>VkviUEGfH>747n7P
zwu4IC2>_Cv62)uIJGsL{yuIlVb*H@}eYxTt4ONB_qfIHYliRw5T=aR2Mr{8Wc#_gM
z*UU}_nJs9ldA0JcqTNyaJ;I_ODSz5}RutSrcBuQ_<KRh6%&SjyOzL~MX~JK;yO%8e
zUEhwKOl#jk?|RFxB>#8lqj0z&1rGa{+zmK26O$e2V-9HSrE31-NvEvoQ7zk2F{6oc
z&@nHS8fMyNgV~ryc=dV?QwMMkiYvUl>D3ix6)HC99Tg4v3BB4=`S*d8FV&{VPYn0W
zfyPC%6El~LbaOnhh$kKgeM_j%kZ8l%H04GlZ*I4eB_jMlII&%Tar|$><M7U8eD#k`
zRt9%GwFv&&kGOzr^w0Z;s*r|N_ideQl%<-iJLgr@ljQqapk=bXOXhoje2P&xxdz>^
zz5a%yyjAw2(VEg)GJgA?U=(wpFstTSL9=Bkv2#P^m4yvuVbcD5`*zfsglY|F>3175
z_ev-yo7eHWMX?I1Gj<EdNhXw5+l}rn%luRM+Ob(v&Z!=x<3aUv`6}$x>|rOVk6YKh
zumAi){(I;77djKhNS{l3oMnn|`MUgOU%}OgUKJNtsjh5#r^81X2;M$5p`H1h$$GIq
z){lm{t>a?takOoOe+$U=_UG=pbn_ncN8K1=e78%}>}|AQL3Q9yXOlDLquLy{XL|OW
zz+;pE8ayDp=qOKqqi<7pqRelEU0}rU%SEMgv8>BJxG^s0)BL?&CeweqFHNmvIz)A;
z#6?j%(|ZQPR8O(Rx@%Y)rr<?qG79}}+?1!(j8AYAvVS@Gm6_j1^yi`OenYqh2u+by
zHtanxD{OCQr>gyQ9gSSJndabY=H|7<(cB9WU5y>mPZ1r$>5?u}+>N%5FH%-iI1V>o
z(6$()9InnDdzDo1j^-b+WleXm`1ga5TFU4agW`Vt3Gg(+p0{`?FQU32T_lKp`fkI7
z^}3@)I&=PvHqwrm%iqlYJX_#9|8O?PjsZKYW0jum+LN}UgFMHoQ)PYt+l7F;{()Zj
ztAVynjU}&bN&Je)IJ|3}IQsnhhe>m$B8SEW2LowxV)%0iAt?Rjb&zxikqIU~hlYsg
z#d~T55?%eXd$|#}{Hxd%*>B}cGAcfwY8GuOh&dW8y4&B`E}CdFHp$#QStHwwM>i6`
z?5&}p_GOEvmh#4=4nAhs{EleZ;uxKEs)^$d6F?~v=g8sKRiXP|si(!OzSZ+yGy2)r
zK?`&i7B0g$WOGc)er1dLzCD2}XCK1tL>1H?SbD6e&xa8R&axK@eYRg<YiI*JlOX(J
z1oDNWjKrXt0f5nuWlYI@M^eL){xv9$)U3v9M~t)0h1pVh>~M$#_cDPaDyduu-#FY0
z6u~Xm%o4jNO9=WOrg_udJzxdk^#4c}yvgn=P5DxI5k&hMft!Cz*X0zsMa;gGEdaKs
zp3E##Ext9s^gsUgJOu`xd7UK)OuD8&wSgAE$CfH%K`}p*6geoDAi1*p>!xD1ucRXN
zO%gZdL-4MDW_3pLAD#Q9hwFEmVaNYP?h!`N%bG^&sG{Qc?;bu15BLgzAs-Gv+w#nF
zIm&^*<&W{7y>Fun<8dVNs?DUfnvS<bebYKcE8<)U=I{AL6XD_NV#bW_PCs`aqEFdd
z*4X<_3_P|Ffy|7C8~PCrqL->6LPVIrmE%4Om}Df_D_>zS;yTi!iSTo(_$*gShHxh=
z?c1N*RM}}DH0E!)$%q_Ghw^DD>F2?s{l@2FnhTSq(dyscDt-c1Hp!r33g&yvJDg0d
zQNc{Yef6*(WS?t7@Dp=74{r+$tLx8ZT{-|QfVR%~Ue>$n_G>|y9r`RSdUV~~fxljO
zpw&rm0zL-LncRh#PU3M1)hEXKM^<tH)%}`qLW!v-cY2pxJ(~$r+XFN8FD711ZIxCA
zBwpV;nd5yDpRuajpOj5{l@~d*COT>2zzd|~?eIHsa=pE{4f<r$Vz>7N35^17l${3j
zNd$hooZS{Ij*82=w`U?l7<9m&J&>ErKa>EHuu*?HiaYr}Rm(w?`1qjN#)gRRofv0H
zjfjZ`gX{UG4fBY-_CcsaEnh#>+$L@cC8P!LjwmW085Al%)S=;Vo)W<XZ^n<UHaWI~
zJThcw*%XtQXs=-7Hm9ePJC$Su7wD8ryW^zpIw8E5d9(}pz1Qz`yURj}8jzIF7A9*U
z)(h=fHSUbg$ow|sbV1?=_5;L9Gt`ib$6qM)EXZ|p`K|!W>HlNwKdG3o*o(p|{_i6%
zHAiRnLcp{CIBT`fGhFU`!8x@vW~oomBLBM<7DIv;?il7{XoG9mT+>yQ@i@8Ebi5rw
z(mb=2g6Y9JOpo_bpImA(eYC6kVSFjRO>TY?f@0RFU!oxEOipHF%}w7l)!Hsj6z(e*
zN6Yo8F#X#I1dPbWTcbm3S~HSEYhTsY&IhKw5wpkV{Q%24T3f}3d&YV;)?!g+=<tk(
zvpu~$<%C$%Am4|DKB{ROe<^6H^t-Tn8a0cA0|i5VZ1RERCef5GLwgDTDuHqp!8pA=
zaPRcwN-cmhsW??Af@uCIj}2zQew_9~2-@>YawR3kokU-5<C_4MRZinDA%Mal9KpFv
z*^q*aJ)@=Chw3;9Gz`@1B}?}mVKY>7w#JU+QAUjKmB`wBuYgBzCAb>>M0#g#``c-~
z1nkoe9AIhW{gn`A(!RK>*)veF@r%Tj%*!YIXxTLt(4_Kc0M5JA^Px6ZX@Sf4-_wK(
zkY$|mQ|{}6=7$9$8yQvX|DknO=NYBQuz<NRmB=LUhhQrF&rKY6O$Flb**rU9f}r-9
zbFSA@e<4RhI=y)9>6FF7LoyRDy*a364NdCZzcu>?RSo}in`&OC`fE)tIFi}t_|9wJ
za^L)7=%A)~k&UM`kmr#Jhz!6B^|2Y<Sc$9G-CdynFyeOy(*m^?IE@v4;B7gs4mRxd
zu@>&@oO0jPqDj%vzEB!aVZ(?F6byyi6vCwZy7?1*s1}u3i`Bl2S@JyEZH~%32^tw!
zdYO*#4)MH$=UW?(9O{GCOVTj?!Y$kRacsQmVtuPFO#(pFhoS}F^lP&?!QI>GW>>_K
zk#d7|BlV=sc{t@xqJ`NG?Amn52G46IbgR1}vZ#j8Cg~2mmoJtkYZdq&$3}@(GSjJ~
znR}&rib%9lINrj{gak$~re-zes3<9%O$v~@QM@>OHmjV_$K84pbkmIrVV`{WDL437
znDX4F_XmEMd7bnGZPWbCNVr@=IEB2Q$wkb#F_Dlp*sc^6OW{6MMvo|@_AXW($e@C#
zcDo&QWo!ahgJR-BUC0VYmHTY0w??H(Qdh$UEnVoftn4_~rIv+@^xDDjYP#d~o~B^N
zK@aBboJkv-_zxaOU{@nX`ZIyDOuR;=U<icKB09lR7#jsXj!-Xna&FJhC(hKNC76<b
z)B3LTy8MSHeV9?HaGC$iSz%p(s)K$`y<@{6KR|scNbY{YJL-$O%^5(vM^A1G=sSPI
zRO)KgAHCU-covX8lEb@s!g%s#aAwx7ROEgdX8KwaT$z}>JAssEDZ5WVOz6#>x)J7%
zDLA=vDX`}GV9a%~io*CouqCS7A=#$EqNw1N{7eabwKmeXjLKUj&hi^#-*`Z+XiL^<
zB);qt?7|r!o%g$?_HNJdLE^T=jYzrnH#H43fsS6*dJz+tsr$Rx`8Lc;V-MIpJ@c@|
zFO4rgljJ<TT(H<z=U9&(_G$tj#IkLS`)T=(JOg<-uVXsS-vC{?`>m!r%wYQDSo|TW
z+afU!kSZM(^SUj{YOCp%5`j2BbDIl;2*dqwxv8*U`lmblm#hREBywJEH!xinu|dQc
z80gZs=p}Hr95l+F4!$%-(zJ#Ifkj;C&l|E4FP_Eu#sd{yl;-6-I5#DDc@U;>Now@g
z=}D&IURG6HAXf}!Po<b(xyS9Mxi`WLiDVk$n~>+6P*DBC;Jpj_@@|ubt+1>}m%mu9
zIqv+4GP6!XOzdy29-AGXrew%t?OIi{-vq9LHw}57Dc@f)m^^*PAx$xrme;(=L>WIE
z*a#@1A!v}fd;&#lqpY<3enL%l820NA_I9mB!mkM7{AG!6W&df+`K7I9wo3Zg0S4RN
z`bH+CwF-P||4~PFuktf;Cay;a+&0z5IN{$NPl}3%G3`B3^#L}-tjL~<G4zzyG**4f
zCsmS9Pr70jDEf=r^ftTl(0t3Z%-Z)~?ehEk47B8uFVkJ=SN!8orF70&(z|4dxL(1T
zb}~LcxDqP%`o;Zw_4o6}5yChsgoN9(huuHirrwGPxsQMQJDPo>Zko}?3)$v`&ib2O
zP8l$8{S<BE*sI0GJQN5(G=j&bFf9PJ<^VR8SyH30DSJ<T)PV6qsC&s?5#VIWd`2Wr
zi^==QEi%7Dym1TOSG(67*hW__UGu{Psu2xoIJ|qTc#W$4IVr5ck<@P8#>rPK=;f1P
zn=wS{Kuevn&&~bj!l=grTu2gICRo$tqH}#-x^>E(M;J?=TY@zb8o4Z3uGJreabt7b
znv*3PRWMwy%TxH`n7#`*9CF-f%eY}G{C;WqW$SX3<)i(^V!5?OL^m{P6Dfn7yry{%
zYeHC(Y`FmoLoDZ%a)K`{i-|HUAOh=*+68s}^(}|*Dbxa)tae+Wb$R=5lO_x#gf8qh
zND}Rdf@~U-2YPXAniMj#ufFT*zI>rS_~nJ9EFTRWbzIR1WRYR<b9=J`{^c~#?ZXt@
z*>?p3G$Ln|f7a;MEVBxg4#`EXEMu~Mi{Em0FzIeazC%)Rt<A&S?uJDxRcdNf6J0|4
zNeH?&k!SHE->bBzS;Uj{|1Gx;!=3MuyDQK$0qC_%HqbQc@zO{E(*pO1-2iOc<*LgT
z&CM=IyY4;gcVIbfh+N?N1_AYplbSaUrWE5TdC&m=#a5LWz3Fk=D}SfB(lY-&jwRl3
zXp}&j01R&`R$%|9i8i#DdzAP?ChtPTTK_X7shis(&;bn631?u&p%+s_pr(6=^yeZE
zpC!WmT5BI#d>?MS@r1Be`QhVfBVv}Hb2CO=0r@+9Ax}jwx(F)B)K2Aa$t<!gO>LSr
z#Tu{9mFD)><*wzBTyllpYSMF-*-=uq?PdOu%0DjhOBzAWbn}N4Kul2!^NOF@HHUnc
z)~?KbQNgdfbfP3%K2FVs)0~#~TpG@COcNthb}OrD`+f6PqoU5nY+af981Ep3M?%hf
z=bze*r#K*xF(EAzB7?bFK#Na70KYlszkz{YkIFH<$R@cHj)me*!PMNn<bMRj(8O2z
ztMzGI$u!L^e|02R&!((%M)MZJNB3gFv%fzu*8aS|b;^c;F0O4za)aTyhFrENz8vax
z)n<V6ED-JAYiGa$BuZi8di&DF>OVm02Uxig9CVShAP7@VziL5d_v+f0im=g)@$)gL
z+%JS!A`Sy8LLRUN%l}PeOg&}7o#ill9#zFz8DsbmX`@^P1H|Z~NbY{k*@P~WNIbb7
z%Xi>=-Rvinu200>QGR*u&@7S0GTU4-6YFxS$IlNx4IoZXSZ8R5nW!yQdK$1~=?4M8
zk9^F;pt1;%#TTGAf)C3zKUkusGR@GniF#CpV=rySdqkStPj=ic-U6)G%W->|uJo^I
z6YGnT(H>I`ufknpCcEWoJ`?McljGTIglGo4gSZ&EloVFQhm*#75pbxLbBCLSfmJ1~
zgP;8tg2|^0q2q$nsHzm?;CF3OT@E|D-6pp?jK$NCR1^4N)S*2w{@h@xzCB~BB9aj+
z`yVhBUT2#x-M1Abw$Su%>o@XkvU)|j)^Atpw2{uR=W3y9eoJL<3vU!^k{kcq>W8Uh
zQ8-~&!f`QIS{*4%{a23=@Pc%<tMfTZxZbtIKrx{7YKH2*uC8=?#%@4Vgb<^;n|J)f
z>xKD{?Waa!{VZhWU^U`KcZO>>gS<qCZuH=<2RFPW`3eO}f263_Je_>Y{cW(o(Ztha
zHo3f@2;QnvCoZ4BQPPnv@+@fAF|SRiy&x&7ELVEWM|oY6;QsHjPVgC&7CIff>9OtF
ztkBTxDU}B`q7PkfSqG*TlBr%zuga4vllD@xCdJaCxl-!C{B3b>B3#G-MhG+aL3;nn
z)!!3xiG(R)H8=p{&2L1<;)3PdEN978(A{1K>N@&duefqcl&2@Z1@Vf4>g?VmCX=%f
z5GyzrTbHpX&U#v-M_R{)PN)a7UN@v?_YdFgIT!i$4n_P)vH{80SwDApTUpATE$QnQ
z+Jw$Mi=h_9gKnQ9)-Cucu13Q%`U>e?Y79d7k0pBxq|UlAmON>}oExn#2A<0_Oul9j
zp;`*5vb0~;BoV&Z#ELR6mOB_1-d81e&ZVLc5-+9QZM~K31hC~{x9#|(_Eo>TpZV!f
z6{2qP5ouV<#MIcp;zZa>`LnA>dws@y59_JOs+QH~9GUvs0qe`1zWiBI``L=xhl13B
z_8sq*y39`9)>3O$M@zROq4882Y1CLFNB*g|;;g)iO7p|hRkpwWg0(O<#=jAo=PXkd
zWrSWB6yW+-=`Zhx2LVh0@55A`Mx?cDdcR5?Y1%$c926%wGyNudVQhO_)Bo3qDXBmY
zmg!BLFSst9DJ2#&yPJnb%Gzw(m&~O0ziSa9p6k(AD>*~i<1p_&6ZRov1?S}xjuv`a
zuI91S#KDk?$X%||&8>f}Lc@23;eLvSKK>r#Ux_RS7zY;^37fpV#QB>HiTNJ+1oz$S
zgP9*1wrzcv*U()Cqc_)@C-17mPM-G6Z+O-p%cJ;zm`&|5<yHy%66=@l8}82t<8_88
z5FETjewD2@1RKe7T{8ix^J0DQ!yl;tPAPeEYCE4lS;Aw3EHt?N6&jrz%qrI?LnyyD
zW>|M!&k`|tauE<8eWJWl9MZ^{>>8C4i`!(K)O9SjXg#s6wPi)#Hm=3lGM25%sh769
zUbl56uR|YN2m&tRGOd64haBGf=>B%T+VT|(mj-@}Bo?svKAUX@Nu2<i=1mnh6~$Oq
zk#&}i!fG~aL@y3=sQx;ag!u;WYLN;7?^h%T)AqjyapDu!+i-7uI`PRH6z+wt{+aX)
zM_Gj*$!`VK4LQ;Cm{dhiZMhO2YxD_YYYuB0*{)Z7WvuvcEmg)*<PVG)T^U$35ian`
z_2%qB8>Y0@Y3K?uE0n!RDBk!F(#<e?IQlkTUU-*uO9Twe`GR-Dj~m;v3XY@g<y4zy
zn7B3FrAa_@Tie-ik!*?~&0n$cA-1gsg5&W5^IdYa7bGt&%Ni~xDr-adIV}pm#!b(A
zU^i}>(y?o##7luJhNY?Nk1IHL$)lUQKm2$-F`qh1Q_A8Z^erg%5Bk6wH14B#7u`%2
zS(t}`BFnlI#FK%Zrgisti8ob0URSmX9t25S&_2HdtaMel2YByQN$Z8+gV*t5P+C@@
zdkOos4;`m)MnJy137C77lN8>X?k;OGd91Bq&Y9GvrK|t_pmlo$gsoKGI)8q-E9Lcu
z6|!#h6WDV?tZW*tr<!PyG_kUrPDN_&4~~=d@en=iz%eFwA*!UW8h0V`=Rwu;?2PU4
z=GER0F(gV_VYzty4+#Za&bGNp;qLVa^OYz$KQ!^ct`t!dM0g!GZ>VPTk20V`dyr^>
zg1_vVq>}oKJ~y>yq=Wvwea)JsmSqy+M||D|qEV`tB3=yHm+`vu9!KN6hRI#bsoDNQ
zi0r_)pYfG24tn)Ie#U!xa%=Su{IYMQs{*oxDF(8+&95PbZH~Y2A5h#&M*J4kfAR@d
zB!56~E=xt?<OWOu*k(Ap7#uZy+&s&~wjPR8I$yyr$vj(?d|Uf0YYTxGP7n|+fgDX|
zzg5jnT%TuZZ<<V+v-9QlU0X^zR3sC!=b3(2IO`MsvyagH5Ub^+Aw-xVawD?0nq<NU
zhcN<@;rp4gI=t6s0j#jUa#Hr`lsHlP>&{%Br%Kq1E}oR}NoHN0;IBpF8k~HQhqBWA
zQV<D6`{5xETy6Wbh3foMAV(eRMfavMa{_@IChP>gpO4)_E<}e-R&fRmm`AiKyokyr
zecifoAeUIX7_C0>T!@s~bZFsTfh5-To%<2*wCWV~vAz#nnXD@yISv=HpA4EJb?hIO
z5}Bk69zA<NBif{g{qsU)(wvCJ1qe>KFEyC)PH@~N90q<@5zfH&LtPj{uA8Mw)Zd4h
z;ZK(4R_|^cWHKg7BI@<Beld4hq+lPV30`N)SPwv&#yh>GT6;wu)0R`|oP#*5&3}Ij
zYSc@Nxhl@_M8%2p1zZrEm>5|2-eh>TwtP4oYz^IQ+NlqopTFKly3iF<Ok6vH*46WE
zip|4*!6BZ7Rz9VIES#G*D2wP7Stnl$8kjwi!t>^+EKTt$H4-09>v*cs8`>D*Z1siG
z+dtv-`>H~#a3q()1+g28Dtr57MJRJk1f4b8wVo(#Rd=(Ct*63Hc1%0tN~oaL!9~J+
zmbUdC>q4-d)mV%-0rqf#$|<9nGCgJ%p0ns_U1UKN9+_Q|Q}ENDbT!u4%Eui=&zN!=
z9!%?A@NRBC8vG$-g*NPFsJH0vxl5(0PHlD~@2iePsDzI}@dbFtFeoCSCuLM>aZK)`
z<TDGKE4Kgw(TSe!y_?~>jM0m(npC&B&K1W}H{)JC<A4`$A%ZS4lpr1ajzeKSD@kGL
zC8N75(pHHTMf#W2Hz7_Zh#b}vg9gmc9P)Cviv@1%Ld8T~m=ct+9@A=nLXTAiW`l4+
zuhx1<*oB;B0Q}DY>KzDo9V+5Z#Luj(5+48Vlrq}PXvtToRtG6Lw!qJ%?gump4e(#-
zx@`R~*CG+I(5Abx<Cgbp2jzNg?^HveZ!8?>F6!prxF7y>#}d&wmZqTXAd1F(H}C|y
z3}mnJLX$VbZSBfug#8UF2-=|{-oz}s=@_Bm57uOoI#t3sTFrE!n_k|)OF%IZ^(-Ft
zhmw^8JT(;JxKHfme!YxbPQe`9i=4x7(pUA3nUt-2@z^dEtBwlc>9(V${!C4DKWBNm
z&~gdWvG@Nv+OSUzQc?YAM=0Kbm~TNK_YK08vHD~HY4pVnGO_J0ABtYV@gk>vi`pTA
zwaOJiqO+T4J9+Q>F`b7AAiAA5c5v$hx1H=$Ef*N0o%4d5f1uE|ZM%ByGB3ZZ?}<L0
zaQ)BAi9(%k;WM?7WD&@LAAVQd$Vc#+UB=^(A`CuMRn*rWSIw&EVoaoWc@7}u&!6tW
z51jLG@Ll@y0zeJoLR9veLvwgon&d(8)^4gJ@i(wI^?RqYE6$zbtvgNhVABLiu7ruW
zJJadlW+U+CpG9`=dk9Vo2pZ_)F5?-e1^^p2kW8-s!`{VwFNBafB_ucT$0_rh5BlzE
zD&Fv`m*;lQT6-8JtLPPdNP^Z`Pl}qHUuhvE8R`73R;xQrk%z7|qLHir?kbFAP_<6p
zNXBb#fhz=TU|hVLJHiJ96SKuLMQ@}zN;YzRw@hSA8$$_<!C`_nbJ(nHu&TyTes#HU
z=5a^}UdCeph9Y(}(N-gXVzksfRq>E<Pja;q&SutQ=*|~}>FS8pGD06<%&dz8vJ;+#
z`K2q*qYom>$IHDL(b1Xyg0wE*O#R_KV)yO#P`S>cgNksOlh#Y42}n)3KmtT)CAga@
z<&Jn}EUP>K_)7k1<S(pCjs!|Jx+wL^VP?|SvdEd0Bx5vwDFB!FlV##yVwVyyJm_T4
z^04c|aEdP3BFx(;i`{NHqw$^I7N1a^mI^I{A=jQ|O^o#uXH|9f6zVc}M<@<4!FwG#
zv4Jsa$7pPvFmse!#)XR;+N>{xe}2{{?K+MMKUzjdXeX3@f}uJOV#bJLG_-rgT`(fw
zM5n4QPoCM~pp^X0<(YQsXehS;z70vX7w!laP&q9f$FHvFvS53BRsMe=FJ^C!lU9F@
z)B!)0^qdYD_(d3<4j&@{u(kW$T@&8N4CUWfN)%uzIvWB=0(GuQQ*B1juNR`V1HY5D
z!tEh7ENjE4$PdEg@Fu>&J<dSrHIYA^eo*2zwj#UR=(brP?HB4KeFx<_my4h0*K0+Y
zdXQ2Ed5{RKinB}RD|?8CVt$JS2MRN){w;vL4?+l?X87tcCjsB%jT5wOj`)NB>^gUS
ztSh7SPYt`p6y1ZTY;}rvJ0-Rn2)vNVcoOm2oV!XpcG;Z=E4%dv^6-6Rqj^>n9r_09
zaFSvF7Fg+ECKl8syyLAFa<WO%HUFtokUf2DYMI3<tmS<9f7VHg!}Wn1^k0(12QGtg
zI*ps6u0DsPd=EUb&=}6fr~ZO%HaA#q?YNvt_MByoRO9SV5;<3Pys=|ZF0r;ca`oK{
zdvSX}KRdD9_*qBWw-!mPZo3neI(VI>y5|D<0AV)LQbf2CG<QhEBi8`DN`bT$J}Xwf
zA&>Ljfi*S6nUN4+UdHZKbT2+|3ij`KxqnDw=08KZ&LO>4mWs-bzvOu|Zq)h?ZNpKN
zbxYSE<VDA;<i#@DTiRlgV(E9N4$g6Ld*MZB+&Kxkt9hEQu<=9!j}@hpP#N21u0Vp8
z1!I^u`&wx9_C^byyBZYn@N(^{;~orE)r4^A=TM^3GcAp;U4zvd1ux0=-})@8dTvPY
zi2Vz3&iq!6@_Zl^(gp~SwnPNq>)p;of)#}&UnZJ9Ra6f(4TXg}<&{2ksEA`BQU?Q{
z0X#d@$sfW@JyiM@#?=+<M(JQ^@+*XJe)|R5;z+#bkK00f0-mQ!-7ZbmQ3!GQ1whQk
zuQhO`a2J>rTStRhyu?<o^sH%fPAHLWH6OYvoxWx1PnAw|a0%{w=tZjfw%Rjm|KjNF
z8tiHsG~v0xD-@KOtu5c5)>t}vnjIE+<r%1avX<{Y1<`8cq;tDdGp^dw<V4P@eu$>3
zMy&<=&+*IK2c9>&*hGMydti1S_1H7s3#<VKXg0)d+WC>X+W^4>{MQ~fK9!X&{JXTG
zD4yhQ1D%<SOTrDC-lTPNDd|))f0!;dNy>e?Jxq`B#9=oqLHW*Ik`FO6Yk4A<S-?sx
z5!?#C9wR@%llH_h$>H8JpG((G{kC~hj0a`aj3=;`VTXe{@X9Vyt|>(eh+R@_kQrm%
z>QtmeUWIvmCoLOpW?o}j4_v8=lUVvseQoJuLJ{2twlhPuB+t&o%1SGThNfek1hBhT
zgWS=LVE|p7w65Z<5tfY-X7i$Qs&b-MlZWQ2M>K|l(0q(Qj>}`(fswU?Nb(6K;vSbm
ze3HydH9>v8!E9ax67A>GC=a6Zy5vPKJ}O<H`SG;2TjkJm-{C))_K;f>?kZIoWEg!d
zL+1jrHX*2`buRx|6!M0Vr*P&Es;XSv9KQ%*$d34-9ZpOvy=KP~@^Y&^8@{sKy=vaF
zSGa?XW&J>^T-cd?-WF%qd}Ct25MtOGX4I1hosN)F@Vxn2JN`EAG_dU}>sASGd||bs
z0AkHc>RUxuW3q%m^VJl-3Bz$~FvEkT-x_;55FFy$I4hFueemfk<63&H^pX(Jk3PVz
z=i#~eM4nY4<E(!!hQ*-!TK%tpOy68fA_!f-7d7|8K9NjS?-<f6@pmY&E&IVN`76PO
z{bY>^(U?aOPDDSKa?Wtwa+W@IyZcw*n+-jMn%5nd4yPZ2r(%DD-JDDM*Vc!;n{i}=
z$NuUeBz3m(*1Q=}yN@ei3e%Lx-N=>6qHuRT`JN$mEpI3OEpY;{?-V!PljkgN>dc;(
zahI>1snt>Go;Hb#n!Ua1`0X(pw&6jWJwr2yLp@Yk=BOwm8|I(!`$jbVXRY&mn3t-_
z+7J6>|90W<x#^+Mm6aWW2+22MPO|LrBIqp=1F%66^1FLp*Y@#8_S!g8tIbJ^tl%ro
zzmcGw>sdb(`TwWrYy6qs-~U4sbI-O$!qzY|Wt^4do@tYtnOPeun^ErS(4iYQ2gkjs
z*-Rt%QJaPtx#?ESGL&xCggTw$?wmvPjiWxI^Z4!e2kh~9z4v~-uj_d|ujli+w&Q$J
zb4P5`-Mb%sPCn`^-kuByUW(OiipscVUSu)+I?!}~MHDomw~0i%{7>oOZ8w{>;;dWK
z&y1fOjQCOe<!{>Mm!il{r{v)xd=Om^WI%sBGKDbPP`M;tG$kwF7Sd~DXnGA-$S)d<
zOx|JT>mM!!T~VTNKMnBrKbm`Sp{|UgoTi@iy7m;G_-e@TVoKNI^0CNAFTSQ-%yJ$-
z{QY|JRr?o=<?4*)Q<)c!rRnc(J(fz2@HmkiqxI&U)=egVY3aPqzB=24+vWCz22vo$
zAZ@hU{z1r3yMKG-Z}s@uou7>S&N%ptr2l&~a*u19ss7zEe}Cty_1zOVD<9a>{^a*x
zqX&*<*qs0-wnx0WkhvCkJc)2R>Xf&n@99&^p+69<*)<>jes*spejBs2?te33vAOkQ
z#KoDARL=}Mp^JC?+Wc1mn7#F_@R;_gB(5KEDdW!Fg?G<*iuu(z_9IfVH=wELihgnJ
zp`*ji;k;v4mg4`o==A*KVe_jm-kq=(=<r`1k*zJ9zWMGLAZz9JaJ`=UZg%;@8xKDo
zwzW<ynP5s_-S7_r$&UvQxHU&BgX1f(4iig$`$`_Pk?pqI#`=G($}-D0Uz1v+|8DKP
z%6ake!oA;TtJX5tzQ(+rc)WtA8*VlozWgyJ>z~x1Hq)%u^I1{dpT1s?tG*yPmpd0f
zci@?W@|fPULi9|})#&W5_sf|}Lq}}fiLu8H9)BRz9j@0mi?pcPq0K!D|CE=RZ?!_b
zX8ExDO{n()pkd@$^p(902avt~7414B?RpXCzMfh)UY-2O=!;bzOlzj)Pf%LlhiS#e
zk>sMYlsR+U=$AqNt4f!=ObV}f`{+NT?83TtsR4~oS7MCQo@d~S-_87TH+lRTB=~J`
z-0bGFy}uX)+<6(eoEZP%7U4Zjr?-{0KXk#6`S+dMhpby4^i@Rmt-{XrRVUAx7He6Z
zEDMM?l0Hj_oXtU9LO(bue*Nt98yex^qC?G<5QlWf^{s!O+W5g*dz|jSd2k5~?SG8>
z_jSzpx7eY9`K+dIyPtbr{`5Et{;}<6_WwErKKjT@d%IpJel~q_VD7h#yLazLT1}?~
z!?dk3*7i4lOcp+blJ?i`<$kw#8CcC4xb`=;#Lyo+jxU46O>Ar3`{V6hTlpEQ7p;CC
z?|<)teY<>eB4RY|S%U@M@;BteY6<07NB%2~nUfR0M^W}3S^4OD>EPeXkF;;8!yAN3
zaq|(AQsc~Dek0O5JbnVDKjma-Hh{c;`R?%dd)KBt07nN<U&6M4^0~Fo&;R$p!Qxf&
zm+SiCt^GXIH^%$+N%VSs^oH$Ub2+mMPO%^CIwbVkPSl%@uiGcD--_;+mc93R&rd9S
zTO7MNH+kAWaNt_{fj>!|?vDbhS|{V|8~jglsXu)?=(sfxKDUCobgp9?{`sz65@Wo6
z>b(3i;Q(wR;B&y?p!h7?!)xJ8-Lt1YA4|N*{^iK2v->jw7e4)Md8GW-U!`AHRC?g;
zsjN$#!T(iUO0sFGaY7fi5<t!~c!vIK-)fvy>gxnXjP~I-M68WtSq4*@BQEeY#c&mF
z1W~TQxTRC%jI?y#%^zwc8xT<ea$xD6suil496JzQVy~6SUD9-xa#{7d-1U%Z(O!YX
zO|p%{&f;~{-j71LGH4-uL<kcJ_d-410Sv=)(Ab<r8%JHb-B?TsB%4k`mqF0%tx^iw
zHlo7Y%Yu|080So!e<U#*UhNl&ayNiZXpsnB!6fp>@`R9JF?XScYluQN+U?FFkkX1e
zZ$=}*W$?T~l+G?cfu$pcYe-Gw?dhp`j$<pVkajl)sG2Pzy;x&)VR#P3j@r$V%Jzsb
zrGc^0O;R451N6r6%RCVRsPJ@m)$qFE{BAOlL@x6PJl+m>ypD`PxU#Xk0wp0-v~Juz
z0H?VyR!b72mH@=<o|Sn?0hn9?BL+s=cS08zw(&dZXRmpC_&iN}*0-DjRGUY9dr*PD
zJeDtVC)hjUidaRX&{5zq`h2k%V-;H7%4|2CM~F6eCZd~M3!+=<txn1)-q4?Va77EU
zKqimV<;|Q+)Qd)kg^74D7FE-c-!@3IfzA1}=tfflawIJx8l)|A*rP=RQfA8CXZDz|
zY;O0L>3IoR6$H$RhAGt~is>K*s4+F)C{Qt0T?|ptLVau$bEDFBfkSquD-%p;dHyej
z;1J?1fMt?;3kVc$U~&y%V7dY;#@Pz&p2@rf=ZQRrJFKD-MXsyh;-+vngD*(4ts#<-
zdzJt`8e?F;dZ#_YW8+dJ&(`#eoJa-Yk&Wmg3kg&{kCKQ5X+<>F&7?fl<zR0o5seD(
z6^SrxX{kvvV26z<gn35b3Z3?Da#>2GMR)1<Tq5LXPzrzE1_4n01ne{o0jtj<N-qyn
z878CyC?v)1@Dd>U0I3Cnddd(QjFNL$h1gOYZ$}ArL}dqpK`UV*6mBl8uSDn|ej6;Y
zKuAFxa^<4cs!dB_w_*L)At?LpS>v<=y_U9AoPlA@;DE2pS73oq@SlfYIj5u1sTm{U
zviOllE0^Nz02)xRz7yHTI$-#lT{%+;h)yhyfz8`ET}g8KZ-@eX?1)9k0=?{A>(@7`
zj2mawC99&mP3I%(G5RoUDgKT;40@rmW+#`pRZ~ltH=rpGMH#x*{u$GYrrCSiLQ`y9
z2O*hU(Q1=Rk)<hCi*jPoUT{^uVxt?b-S_jB2AJ!|s~~_ZFF;dj_=CqPi-_)CvcTA@
zgCbkO-ga9kz{J!eItI)v4NAUC4fk2)<uo?SF(`PV7R8V@XErKf7*g%jvV1YO8yP+=
zM;pXjt~=a3AheXStA$u9i6Pb{n6UJ1Uv%og#^ao;*k7mNMgEYUWP4BrbBo8K0QV5v
za+)AIT1W8|^`a6iTZW#ufyoFW8(WqQYEKm11}ntYw|X5Nw>64wS3>2ev_fr9M>L+2
zqAy9vnuV>{G?!}3PKY6U1ffJ44JhIlIzBTLeE~kji(PX4?MOhc6&p;bKEfW5uM8w8
zp?Pc&h_nhaf{)lB>|w-u820eYEgQ*?vn4G3rqeB=imN<zJKL%pF|odt3?I?$wR<t2
z?;h8f=#>z$Kas==aA2|b{w2h<GJ*Ao0)mTzC)EB8gQ<^%b9-}Is4a8mOb8I_ga(DH
z!?oo0z?`m`z?5ApI*UbNP@q1IYJPbhP!XL{K;|la4KaB;3L2;$rVpD-05;lL_k!&`
z0^}Zj87ZYctzM^LE4uimz2!cU(c}Ri7Q`sq!734pDQ&cqt=8h9xg{74L)xy*!eWMw
zI>kKBUNo6h*tG0>B?t$XV{BQY!_wRVMh@Dg_A<LhF13loRD>i048knyn<iSGEhTzp
z#96V3*j#F(__(RPwk+UT5^7h|^#}bpw^)2zN};V>BTQ;dxRO%9VDmt5d9fu-Y51z9
zrY?;b$_z7orNw9=LShV@<Pfu`y=m6?fT89gWDCwz*FM&Z<}xV7Xm7P~$RYq-e!YYR
z$S1C7EZAVYHj;FqWEjmr`M-iRk(47A!H9EeG@C3!cs?d?kOmw&B*N_Q;D|Xuu%pl|
znxbC87~w+EqGm#g#)gdyZIpY(*ZA^SCyf{}0xVO*!}plgoqHNW+FQvr)in$w?hIdR
zQBnd5qW^2DWYjw~xqqFk>%Gjc8KoFSHKxd7zV#GM)DmQjU3VrFHjbW7WLqJ&NKX9X
zlFQN!cGOSKcafu-j4Bvk@@2b_4D~Xp_Vx69qk4(i{HFLvyc@x_h~;MJQVLcjjF1`|
z>u9km7^r8*GX5jXf%${;5rZ;@`u7)BSJ_H0BGt%i1(*-2@Upihp@rqK6zCA;0A!EQ
z8ds(vwAUa7XG<7nR*!sA5J!8pwk}Ws(Uy*%tITpnz!gC=U|puPO$orG=K$-x{jJd_
z2PSybMjZgxlvtcM><6gC0mpKhpGJ#U^v;TGg(X7Q(JM>Q*nu8?F6)&6hhj9Vbx(sP
zWlxAz3HC8jH%@W^orgdn69D)_rg~n|(k%jY&owcBn6;uePhb^!5*6@4enW)L)Ksz8
zms(sA6pYm%%G62(iF)4Amj{o`Vy#+;qS`UsXN6wo<u-mE%w-sygJk$Re<ctN_+V6m
zp%jn@S9&RwzN|TKFOX`dD+}TaB}2>!?kVtEyx7_&KyD8oAq9C!kByl}x8#aM7*X*k
zs;E~QVp!m_Cu-aP3Z00yaEd8`SIpW&>|8&z#;NW{8eo@Die*a`JniB}WMGemKnFhf
zg=-AGu_85qs{EwS6f#Rr3dyd-M#2b;rhKhn@^i0~;-=H{X)Gzbqh-W9s*w@cAhhY<
z%>7DJD*@$=(3ZjQLg$Z}gTiIsUuhm$zK8kjWuI=va|w=1L*0d)QOYs7w6Unz%W~Sr
zR!1s-4!x||vZ9x`%gxtRFJaF@KFwZB?CC7hHQZZF0fvM*jg$taajOQE-j^pyj8X)q
zq9?OO5To=36W!ywO>JH;?^!6c(ou`PV{~GZN5iP!e{tJ!Ha5{Em+Jr8M;yxJb(T|1
z77FIcJGtTc9xC{`Wt%)-Zs;CtJMjuw@90Q5e$I}H%1M72F*YjR8Kq=<i2ykg^__WJ
zQ&IWvNs6*!ir24HOSWS)Fo?|C<f&F7(3d4>4I?suT8y6PpPWFBN-nn>hj31eMbcNC
zXTb20U&Mm8poC_&d+gbErqHG_rDl)@AC*u5gf;4nCIXX}FZ^gH0_l;}+{a=A>QoWS
z>aKOd0jm~4fE>gvhK_bGndNWU3yDPH&Y<eU<pkmzoAXajIku@rvh$y9O8G-@vk4Pw
zqQ>rPR6CZ_%hJZ4*pOsT#7-?8r5arnlzDkjmblttzt2Z3Xj+a7D|xmrVYCTUyz?s0
zN=$tiw+Mi9Z_-)R^l6dH#G=r~3JehoA+c-AOYgv7#?J5p|BEd(n~rtAfRr?s*-{yo
z2NjAhwuUmYLUtESaIjM|^#~ZGW)>OWhJ{T?j-N}>E`<;2Xehe~0)swinjXPioHyF$
zJ5?%Q4z~6|*9^04X(4v-+|a9Ic^q=Sps_qa>pupns7BE7{#L~DKKvE8f*6>h&TFUY
z4QtNeF<lAPV>nw*Q|vU%#&SifAg_rK0L731h0Cz?W2YggHV#J1Q>8da0{QSunH|yJ
zQnDiY`ep$*+D!zyMNboGSd9OkCp2A(oeC{BSY4KTCg&uW$#v(weX0ZL+nMs8e$daf
zr`^%iab*7ZUh7S+c@pKdqPm|T-5Jwz9^|0QgrPEo2slUtxxPS9k^f1+H94XA0x&2q
zCxvI;L~S7^tBc*4mC?|ry+8_?KYV!fYQ79;9#ftBn{FH|q0l|^ZH!Di+|c91exHY%
zYWOa1-_K~*(c2REWjSjT^`Cei5{N4U%n?2+&Df!{YK#1lou{#Z?}v9!?M3r%Ad#A?
z=vn6J1h%^T!p7=WmH9Zfz}tI0?>S=vx6f;(1f<ots?IvKs729nt^%B;p;9_Y<!Obs
zB!{!c0rg@5+Ji8xqXtFJ0>=t)fHNk-4LypEZY;e#I*!J|aD^oh9ksI^wx%)*r*09~
z!QPEt3|1HGe$QcCErbcT3{2nRo9107#zKrAyTX-V?otbu2jDfpbbU>A9u6Q*`A!ul
zdcyP@TljL3JtkLaTAo)x&uWqd8jGb;1yY%K#oH7nqI%=LAoJMDs}mFgD`LbRSUBS&
z35tdelF$Y<h~(v<=>2DRTV5!^7-WRd+*cc_4Rq1G<SElSIVW?3XTS;I7AKepvDm#)
z(Z#?nx(N7`kvlnMtUlVx+~gKpKI#eyd+L7s&!d%86pu@*0M#ZGKRml2LjzoM!lRo@
zLp*L<iNC=7wa=j|=^Y4~$M+W*_*`kTZG_gQ#dx^SYd!V!Sh*mp3bzOSrQqU)1x%Tm
zmoh*IAMvZrj6<xzLK$r80gLbgU$e3Q&7B(?O@H_3>KMN^FhxTN_8`c7SC~o*Ny|7r
z!m7)=2S%pP1>1IXtt(C11Ey3P-uavsPZ-tCmD7UqX#;|{1T*9_PpFiyW>UpYwdC=D
zfc*XSm2)99R-xnKlHrQWb3BE-J`FKQP7?xhkap_?<(?AhmaWJQAKRjEAGgYi$KFxm
z3ci+xdk2fRa*Nx{Gb$mf;o1-&y1tcB;aiLrE^8ucYI;jSzKWXd)lE+K;lcT-Dv{Es
z|HW-5O|hsV<T=jjjs>h_{7kapm857i2x~n~qL7tl!-jiH^sUOrZJZVMOEg)ctsx3r
zW+W>3HJvRB3MhR7qXuSFjvrsjEl+;$jD)8Zq&!&ig=Q8opF~A>OWjSSx%0c`F#;U}
zYJieJBJ%{4Af^n@5Ubq4V7Q!ga5eXqhG-rF%%&?%t~JpzRbhVWY*GVuD_3xJD6Jxp
z(YCBUSV?G3t7C4}bliCkSqV1^!G;=Gr8IV*l?$;)FQjtayG1Kj-9ncl8X%WuorsYp
zYi|HW`3gO+hfP?%3SMS}go$c~jklNzr*B7lFoJV~y>r{udWJ51jwCLkG=S5H++*a!
z!ir3KxH1bY_6xs=b;fkgYs<iLKNA&5fxPx(0t;;rr8;{ZPyiL8u|RC<qwtM7B7<x(
z7Pgi9m?o-kHTlOKc@x+dX0Z-V*7)!Mr^D$8OlW?X(&mzrb|f}U-?NTmbNjHo*pr6J
zIN#V<ob7c88L*-*LE0JR<+MPw@)z5Xs!emb)YO-L+Vca^=u-7fPQZ|a%qzvShz*Xa
zCm})ecQ$in0N?9!mJGV7a}iXJkm}-!OsRJfQY8b&4jltGw5u^1s$C16_Kkyf#wOpR
z3nPlf`eMx9L)91%O@%aRu;R$e@-5&*G-^a6VuU#ItBvzI7$(zM!8Bk=>R&|RUx0&I
z61#8&QDZ}!bkND-uC5_K_^(06!9HuhunjZ+8zieM$oe}a1xe~n9)TtM684xXYH@l@
zh$S{JuMEKL98J*{%^5d0WL&OqQu?Ivhq@PP1Hba5AQw4{uTW@jVCJsAf6vQbrl0{L
zU|T^?RwEgcKFYu$zT}W=xX(<@TU>_37>sr?!)Ke$(w${BINetQD8^!}y@A<i=HPrW
zC8U;-%Mu_NWWC;G@#I06ssOfdj+xi_%G8ZzHXx)OPL|b2#;FH%y>=$Z%K>FpVi%z<
z;d7<FqXPK!d(E)>!>ff&(#JnJO?yUyNN$fMj1-OpS2R(7Ta4u@DZ!2+fW0+Y5hi4T
z+?Wd8Rg`Iba(ncX>g&a3Wm@9v16)Jh$A3_Qom08%X89vz1?+kEvyu{f*Ie2h7%Bu<
zwHeXB9TMtAi}xyuoZ6PtV66I9f?J5G{;C^uE_0OPO9FFc;6g_#nA1WFrI)xCnI5eo
zmldbjYiL@uURKNnTQSrpCp3sfB=XyyLlNHII0L9}RCKwo3JbKE4TV~D1to+Nq6j7R
zue)%09JG|jD+u6uMh&$PEvAdlA|*|EW?Yl?U}wW{e%bov+y_1Z!%|2a^2$YP3WW?{
zZ`pgqNf_Svd+d&Ga6T1D#@IRlO2H})<gV=!v=AksSe$jVCU|0t7V7B5I@Zjob+aP5
zzD*Y1cO6>%{QJB$6T_e6uJ5sM^dM~53RmDfj*8F$*2O0W<UGFZyZ|ito;$CtG*PC&
zTt&pZJUNI6artlfbCO#|4`u~yj4L%z?t6Uk;*6%peJ(VFhzf3{B;S_Ynn}b!cHISN
z3oK!dUSWb14b2&^9eE4{7y~TmMH?NbW>h6VaWd^CcTQkx{peA%9g?VcHA9j<$*$Pa
znP_rO(%K7ohipk!Z<H8Ra?P2RA|<qzKG8+DhlQh3q@IosPMBK6`DzBRHx8=Trx~z2
zCpqPd-_&J-Rv?fg{!-tfFLrr>Nq@Ddfs4ZgC@&A2?DqUA(GAb=BIZ2uJ?cx<u_i1t
z0uupq0AuUM%-j+Im`iGu1jZ6{nJB<IpD!oUlqTze1S2+)n*I`$)0jRDK|A8Zy{Nt=
z9%Dgt+B}0m67v-nq)A0#%Tr~rGtuL+la9@8i6uV)XO(_K@fX@PSDK?rij1WwUED5e
z04K4y%bUJ_@rVYi(AHQCQNKnD7u7qP%rf1gx_5)PhMV<Hnvy+Bp^z9NnSsfo`<21A
zz5rB+m6$Do7PfqR?nWoo7WiF3HT~oNRc(sYaEJ2|;~+e31xC=o-uOcn2wi-)($_?x
zP!;O})%&iG8bUvzsg`d0RvTC+AR)C3U@N>iMqiSaD@ee(%^2HZz5s_!Q3R0?MDzy&
zDdH{dEom9Vo0>OzXWtmbj>NJpq3Q@B6dlB(Bn>8#@~orL5H%@m3sY30jL{GYE>EOz
z?rBJ6F;*vu&rGe)5=AlE%a=A+Y~5Ni8=<+1dG|Q+EWuUbI`oLy-iymC8X(MN3=g>b
zT5P3^Q7K@p?gf`YS^CR45IySa^PmgydW(7?VPSyNZ_7~PBI7o6ZbR7a(#Fe0pSoA<
z04Tt!Jw`Y4{OjO5#ru6OR!Scw<gFwZu?oh3e&k;EJSK->dG=jU2xmV%uNg5%A+$%w
zXe&^eHC)yErO{ZMIt6oNYKpZ_BN(95OW4318wWXGyf%5!*|^0fRnBV1w?Iyo&@j71
z&y3L{(J|Hu42ni|4pO``DDx)dsjNiyz|oZaWlb^FjQ=7zl(2$m%VCf!DBj0TvM~Cs
zhMT$m!HtlYiq54~_(++?nR=&-vv28hSBDtBM2yRGukf&ECg8kCNM@lUKzqy6`9os~
z*w|MFibgpe-#|U#Y<wEPtFEC}*oKOXL+0EgMTQvi2c;ZVvikb}hFX-k<vqqR>Wy{)
zEr;l%cn0A&#az%<OTClCPO;HzHcZ`nE>SOtEV6g5RL}&O1<*7utwaGF6`Qx3DO1|M
z<bRQt^ia_rzQ*Wq1Wm#Id$L+YN-4OEwWVH`8yJ$V4i}5J5MMCbQ(@3MM30mT(-k+S
zjB!;(P_TC<m_%wYhJe*DtVf_MBR7C@R)KeP8jsx`>#NKhP0(>C&_vcDrmp!)26Tu0
z`CJ#?rRGl&jLORJQT>kuYV)MwrX#+&u(=@EakJ3z!$d<YTTwH(Dwdx95e(B$9TaK4
zZUMe3B?zk%!edp-$C~A?vp&rgtNAX2lcGxRtnY=0Is(;$7XAfwRZa~eWYCE;$o_==
ztDH--M}=q2*h1iFo1JuJUZ4X3BE_I`u_)A6nU?=;{AuZy7o<vFg87V=HEk7@h-U2!
zjtpeo^XO@Cmf>=)%02LjE~BcuINY`~lov>l1=5!<^AUMZeOYewtuM7+0-$+{Q3bt3
zp*t4vU*C1*3rB1Nck;@1qH6{Izov3Rg_T&USb+(kDps3@qBJ%h&Ly4W)jTE4V@kcK
zIw-QujCqglS&`Ibj7>!rC)&IDfH{XoZ!dxhVuP3>Q2}Agz<EGR%xPwyF+5ZikT(J!
zZ7G^S1#s@!PAK*_KJvvpN+Di-hYWEDrGrCMIR^+#)W40=?54)hL?bvh%|>!kBq|MI
z|1oUjZHld9>ROV>BRx4yzrVg(Bq}o1w{?1fkOky}3LP<d3Zos)f8u0eXeZ3%l-CNG
z0sc^xr@+Ap5N~GK2Flx;;OR_*prrti9=(KC`shM02Z=O5Hj?cD+-G2nH6wRRy}#;R
zceU3Rrv+3;C+=k(twIAl@MQRJ0VXEKXpaCctDwh?9=fz7RzNgSL2;h#8m4XeRGn+K
z3&24Id^uoP*7Bw3Xa~O46syhBfFkenY;HIH=|$9cHfM~03YY?;Ss-`mPBON^k*LVM
zrD^rcabyg@)|}q<92(XAzMGgXpF5VOw^|tY2Lo+n)6Z7eW`0iJ?_-gr3TFn?Am6Io
zur@~nPNH61SKPf^1+P$I?j;m7KhL=Gvqp&sP<oQo=SibfCi)a*czn`JYqmDf6JvTy
zf%{jrNxsIxP6@H9D4}+BaC^oAw!EKWfw1><+KYA_>I!VHW7Gx(d6NOTBKvhO>b*io
zA>0ZH)<}KI6bZz8%{;MMi7-{AO^otosYn1!L3CpiKs#CO0ed9w@HZH1NA8b<00So9
zcZdD8k!J!WXI^2qwN<FF3veWgx!rSgY=4gm#B%+ioICY276=kT-4n$OWx5Q7Ot{f+
z!8xcART&}Wh37n^`={7rxm1R_yqIh&x0%_~Ato+ccsToaUz~8^E-0|<FZGhdbui6^
z#}?HlvU0lU$p(fzkhM>LksPJz)B@sx_Av$FzDo1VV3)D7GY`@Xs3&RqO_$%r&w|W|
zVdeA{;0SFlr4R+?xzULasyz`8#V&dIvP9izMi(WsAkb@*&*T4>rW&tnW54_J%B_K2
ziW0k$06PhwB?OCW!c-VeDz*xnUx-C@0rMKl^V92gqMhN|E*wO?3`9jANLmY5Ptm_$
zGg9lbm_HC}spRJlke%K|M3y&j643CWtg#gKTV@%on7sE(j|^F!AjupybxKu0z#ijz
zv4&qT0><hth4ky2V)ad#_ZS7h{32cd!MQrY82N%Y)Pr)rV;H9C{dI27Y>Ebnu|J@=
z^sn>5o*afn%iCzi9%D-h2kqu%Pm}3KC@nGcP`o(2K0@98z<Hb0W%1EIGtv6X8n)78
zH}(O+9(8%rP?_lFYh(&?3#bT=gpaCh%bC@^MvOsaK+ZvHf>P627L=x9I*A|}bX0Xt
z6r)jGOK|gbQl`O3Eu!MQF&kZJCvl<?Tx7U3Eh_M_`+B9o3l#Rv4x_sAB?eT0_jk5n
z{~~+oBQzl}oMM?eh$#b=HaQ?IY+Rl)%cLOd6kZ)pkf`1i#+tvSenpl$aa%%BljiX-
zrI}MyfR{p`y(O$tb)%FHc%9uTsEDX&izb#bw0H7(oK-^L82O7$V`lzjr;(3%)1|0n
z`(81Uvd2(V=ww!8T4DsUaXheSbK3@{$|oUQ(~@;EsBhxSQ~gC1y=lfDru}xHM}XT@
z|8}zlTf;t9>F%Qn(@nxFOhH=BUn@b~siR1g5YSLv40T?D?$p>@-4UEf5?PQE2+JYF
zjpdYQGF$^<qAA=`Ari!>88Nsw&kEw+J%N2h|DSKh)uO7xJJ9Gs5z!t3AgtKy7Izt=
zfhutAs^Iso*w`T2%lb^5odUgkgT`?AG>y+6W?Op4?niLa)bDXkVI=5NN*b>Lv55gd
z_VlWf_S#EXBCuW}F+G>UBRTXkR@qX(Pjs5*yAYyGe2_+ZPl;wwLP%LW#-^eOny9@D
z<qvHKdgD}(Pm8BbLs{5-eW7(>bx>9I69C7Oj#b52i65kBv&v{d&h1$i1jGbZGAigS
zQSr5xqC6){eu{=#o;E;xIi+ayd^nGN#Y;p&K6%`2I1k_y>lnm)hp%>)<5VCn5D)&5
zWVG8Ge{e>9M2`R=QwRG0RHaAbuwF2{EWF;OdRBk6`12F7LV3dC2j33MX?ZRN&f&#M
z&}wd~B3sf*KjYDFrJ8G|JF{nDE5F8(de_z%2PiN`>RA5>m_N{GXi(=(&<!H6h*QKV
z8GyNIOF~-K*lQH4c!)VQY9=?eAwsMh8+;GWLBfr)e5F#r=~lAs$#XIKj{>s-xKbEx
zw|4+H?3q8_&VWWjgHC(2V2r|`6mR;9{=+n_CTZJ98LT?N*Tk$0z#n$N<kcqN#aXMP
z=Gn0?n9CNDg#D^o#lu&D-J2{z4jVVdAbxJDr-I9L=?8n3)yJMjH{Ku1SN7)>eE)AO
zc`eXu=YJDtBUEjZ?3T?^oPLBklSkX2!OM*F#Y%!ufF$3j1kr%y=QPHA2FAqCjGKr@
zkoph`zKM2*pOAu?x+a!cNbIcX!jck9Za6&u%jVLCt*H|FJYb}JNo@*>p8eHXgn_N6
zs9@Y`?j>Y7!QSKjnv50FktX=5(IKtD<tceK4z`UHLvSuPE?7>~1|?Mf_^zfBZ<-LF
zu9JA}fKTe%1WaB74Aamb>K)yr+)n^KEB~uM@G{SKfeOxHmBzepS$`w6j$7q_DWI>o
z@CT_0b~FInZ)f;aoTPP(9A-)izCyr&0A&YyuNFRW^V+hGW|0!DQtl?aRKApI2ok+N
z`}7y|7`YV0iEpjgyQ)6v0=x%NY1;D<{b9*O`o$J1mQHQVC2ITz-@Nf|@4GbA#g&cl
zzl>sVjgTUzPuTqCw1SKiCGNh)AtZ|0x~5|-Vc7tw+R|ZIW&~33NIJl=yc{I(syr&W
zg&0x8<MWCpRU|CXY?8C6mk?vsE)lnlkZ^8Z80l}{95Da@2#`CMVxt0Po9fN^B0XPW
zTd7zDHi4S1-^W&tRR!T=+=HEEnz4Is;k0fyxy@)^_+%S}&*+&mrz?M9e&KEAhM6rx
z>@{sC1dH11i>g-n{`g`_;JpBz28{+PJ^$uIi*7<=${b_%@jEgWC=U&~M?uptY(ZlT
zOE`H!_%^zw&P%imMv$@t#xG%cd{yEwh8Es-iaRZ7`JE;skggWC=uyy#j0zv1ihn)U
zE=UB*jo>0IuOizQ8pyODL4I)E?rAkr#9<|<5Hq<Tq<)nvpfqji28%^Stob4($S;=Z
zHj`k1<t~8>C5<gc`Szf@ivl~0HM5pUy;^9M{&E@Ys^BW@;q`Y&Xsk<-+5ZNmWmb14
z^zaOm3<9f$L(y1O$ruK<0m5c=o>hy#C0nNHDBKdse3d49%CGYLkNFm7^#IsZ2{N(+
zezk^sxIon;dS(jQAITWKKd5k=Ay+WQ)<U5dR&<$0+ENh+(j){<ntygQS1su-PEa2y
zY)Z!NJSXp{y3ua<CfDzSZ|PsoWj)~1KpHgGT;lfYO+CW+wzY>Cr4@7K#;J4m#mJbu
z-Ss*5;!kG|uT8sspEL@gW-sdD;3JkoREw0Ry6VNq<6r+Q4vBCC0RS?eCSnE?W1WBo
zVQeQ3M&x8iFP0)RhbKQ~T~AAKY<{I|P0;OC)%61b@B1#LU*&%N`R8`>olhGl;~7?q
z{Ez7Bn>^h<aMd(*_EcO`>Imljy~^aa=B-VQFoY=i(D|vTrPK0e6Dz-Ya>ykO*ZQNB
zX#g#`W+DWd{Ged<t`wtF6^yktbFFuZDXj}!6d>=Gr#0JB%V-i*f7J}U>5_Iw^FSoM
z<Rmj+<B~~F*#5MEi?b+U{ShOMtjqu2W$SUqY%0HW)-mwjsR!*?BmQ=|{q6a!@%fPx
zSKz60>o`_QtN#z@hrh3;Jsh2mxG9h5iacRpGtktR@M7R<S@jLqDdpWG<xbWoE<E7x
zOIk6Yf!NjBUHEl~lId;(pJE<?Obp;e?Fow`kK>YSoWW14`X8^I!cDKm#m=U-EiD~C
z51iI|8brV8UnN+3{jw)zFJRn!2^=W-&B=JqCViizXZ_TtS-6p)RxUgtRd!swQMq~T
z!<zoy1HC5-4_8Q+Vmki07F%_Lgf&I<6DA&BO6qO=LAsT@ULK}?>-t|n1magcKO;1@
zi_^!bJvCm98AwN!YX0Qintd)m)N7vpV%!P}?Dh9M=Wo9Cqks$jeaQM=oBc^xeQnww
z65tu<y}RhnTthBrI5aNgND8a<*zeRpP1hrsPWsb4q09Al*gyMD^4$GQOjF0=n3SW#
z=E{E5!AifjV+pZ7w?}Qdq{Thl{)-PX!#K$1)=?~2iQu%CS)K~w5Pp7`TYiIoepP-?
z@9PF|o_semHzOZCg8#`bLo&SQh(pQ>Bj$!KeL^C7hr6M3>sD3z9E9)>sa^jo^z}k%
zYstY&PV!XiV$V)fvz3$Yj;|+Zrv8@XAfi_l7Yx+*i}(eG;UbZ;xZ44Y+iS9J>h#~m
zM{+mj%8QqGSA_sv$ismQ-w)jf8}FV&Mr!i@RX$Gr)57t4Y+X{ipBBdlWzcx)2`zoK
z$au=O;}e7s_GVweM0CP!_!0N)$x{if_w4*D!;D$u{KF#_Yu1vjjIsTR=YLueMDakM
z+C7Rsf23_J#uMX1%tnf!<g6ZFV((Z%K+V{H^nuI^YZrsOaTU-e+6F5H)Y|*R_B!-p
zl_za|Uyq2EEbR#Ux@Rf6853q%s~L<%y?KADbYk*ntTY_BIe5m5MVsH!y&fWdn&t_g
zmvzRYlDF+v-XBQvKQ0X19542!7Uv&4^LGs6(9HN&ZdU%h5TK8p4UA35rq35vnzsgB
zIPe{OVa@knz9gvXVSXSZbBZ*94ENsv-53`%t;gT!<-n`Y^-t?d`sYiX|0<4AiFG0g
zk>~tNtW9p#&j{X)krkng@%*o*lCr()p<hI)FBZ!j+Afa8y*X~`q*5+dC%w_3Zl_>Q
z#Kc2-Uw0SW1$h26Ub2BTFca;Sb*)Rw-ud;wP4-PYYC0BcJ!y@&muMNcX2V{NQZ*vh
zsApClyG@^|ad~(S4w*hy#W-NwIUBStQ(}^lx1+`@zc_l2qY}#lqXz;SXha#=Goniq
zoyv`JDj)iDnJizB%8z9Xs98Ecs`@`RXa!et{!WJjavhiE^7hW>zIQ1{w{q@wKPb4x
zE+&ACh^Bz%-O)dDNnKf&CrV*{1>1bH6_;g3X{jd>eVUq88AT6w;g5i-<7ud`WBlcS
z12uH&qJ3(0<>R;fwmfu7`P%8}#Ao&%&nLMb4w;&`Yt*fN(TRa!nvZ_d9Srr2z;=QB
zz~ozhZE}vKKAQg3QLpA*>*(FUy8i!GrxBN*h91<g8uiBRe6!a6Y{`1s=>Jd=vEVv?
z(gtfA8M?=0Ew-}phdT4dIQmmXX(!q-`;z+vuB=Mho+&Avi=?yMg75-SO_~1_*!v^!
ziXYi=*B-8&ig^{z6j-DBT=LI7eDK~Hs3Ec6`Z%4%;RU5HoaC%BTt|8$aL+oh*NXj4
zKYZWI3y{z9xbE8-Lo=cFI?41&p<{YhAavoT7Jq6%5uQpcK2+C_T!14>m9|srPm&Dq
z{ty3t)&Bkr^E_ZJwg+_S69)A1&Zi%hL}gTxF7AfI?lTUcGK1uK%XqBP(tT_{+s}g#
z!fCdna^I{J4tJaez0YfVe(y)X#QB+#-_B7Cs=77(@8@Bk1PurSoc!ibRu6=lygpEo
zb~jWRV>)fLxMaq5A6Ne;!^Xz(MUQj*^RARzn@z?0d!gwAS3k5Fn2+TqGLh}~aHM;o
z(UX$7W;=Vo)n@*P$c1w^`+iMbPaG|ydJC1P*XuI1*>r;zW_(8;l^C|7>dsCuZuIb;
z9!&ztvRC=_sS`;(9Zt<F)cbEaO|bFRk(RA;ZE@ei*WveN(Xal&bRaIAjp*yzd@Bd_
z2VWYv1`|X?KnqyjK31hps(DAFRlb}nvWDLA@P83SVQsU?x6`wK-z$8DI<Pp9<hND0
zxHtI(4k*0V{k+q7v~lAge#XJ89dzNMc`N1c*#A_i>|AO4y_>5mN2VrtwAD7CY0?}e
zE+e-)ejjeO;rs!M$l-oyvwIIoXC1tEAGl`n1-4@y(r}?O=jG!Bt5$tHSKP@xWn{YY
z_(#^*R+m8wqNqs+k6`#?ep}l6&lTtX$f!WS_=ax4Zr=l^ZY&v)kSl(t2a6BOt^TV^
zz`hth$z3vD>-8pU4WQFXY4Dg(e@uzpjZ2ptL|2!?Z#Q*+k{46a?TeytD!J0cOq4y|
zQPci@=wi=b7jPGQML2u1?0KA1%y>gQ3j?%wTMA77Z4*MqM7*L(TwZ2%e|S7&9wV!}
zlRS}>aC2Xh%l1DHEbt3I&OXw`mHS@{C{+n)dJngN+ulY7yy`@|%*9#{?QbUU6`Y&;
z*s*!j;m|+H4o}!U!2#d@NYm*za;d6gdt&!Oug|^5*;SQxoz&64eZ;P#*#nQ4x5th%
z;5{~zH3yRLIgOI+a|5_$R4I8mMs|%<+HBuD+9)BTSYZlLb@la!^6SRUj$!+FZI`e`
zrZ3Kz9z6N=Qd}|m<7H-<=;6So-u2b)DXMK%sYw|=K61Bo7*a&C%>4Y)!<+jUiHJv5
zy}dh*ar&6r(D8>XbvmipdF4>mgAmhv-`+dR|4q<@ex3ZVP2NJOVD?g|YTe5NWbL>^
z_Jff?w-Gy;K7On8P5#>0+=a6lx~(SVIO)y0u^oAM>A=wQo`#=WF9?69sWwzg{-^N7
zQpv)Rn4a6=twt{6Jb>>~$290LwCcyFBiN24$)fp*eqU%*p^wA;k1v}IsMm&*9+Z}g
zkM~_F=>y+J$V)gU>k=QNzwzNsdaWHlI8LstD!4xq=lEn`?N!a0^MvAiMZ^wkddW(m
z$Bm1Tr<tOo6S{K&CUVr<rjwgbCnHMXM^A3}1Cg^^qfgCR8_MtR)Rb+N<~N)<eDUnZ
ztg}}%Zm8=X+#i=dZg$T*?n68B&gnkM%%Nyg@rT0lAPz#`I(8(Yoc{A!^mcZ4UaHSK
z;T4D8lb_#o*hzrMuVa{r7tR*WdD&MdoDZg;B;Oo=%$99;>wY_E$cVA_CZBx#=0-~S
z*v7t3<wtBjRK4u^9Q$Mdy`mU6uqB#(5nt6?iY$+LB)=J%{Ak?#tlaL{hx>=D?ML$6
zXYZMvz8bzW&Z+Sr$lWRcMtcU%a{6MkzMa42H&zzV{M&~I-tpG&BJ7x2XFJX`9bCY|
z{Y|Z$`^*k{jXyZQYNpsmiFSsr^?lTxd_1u~K+wGs-F8^8<U%$N7@56QD$q(s@;Oyg
zBH^=B?|(Kru(#9c)J0Tr8|oc9`R)?6>$hX+DeD)txzpuOgH9xF<Q;)82<o|rW;pjB
zlVg2l*QTK%qS?Y0W*)M<es$3zty5m=y0b=xJp3U06Z#6^O(%B#bX#X2)uy$2pdt2o
z(|cL=&qu6Jxu>qb_@PUVC;^N9(Fr~n=Q9fkb*i4VpPX8px%<c1l(MvDocC91BxPn6
zlB7FYCO7ku8<3XP5(?|$><Ut%vX|W6)(qWVlv3f0l<KW6F9s0#@b~C=+bNBW-#BNh
zFPuNbOevo7DClXtwpjam?8Vvw?HSeO3@7wdMqTPU<+gF?(E|(DIbDj4h2L?lpw@_V
zqteZL@xi{EU;2v01XB}<ij~4BwW`;cAgoGe{`_|05g_nHa`w2Yy>UUYeS?K&jD%I$
zGw=3a!!*6aoll-k_c5^P`^xz~@^!4FU-bT&Q(0R3{YnQrLzxr{o7U`9M%b>UPwZnA
z%Y7;~_0rP0vXjOmJ2$?eYvbBj7k(CGjJphfbNqY({ydaZd9E|L&wRivLM6)Nm<ev8
zW|S%ABk;}vE?m&yk-R6V_qQbB_v(7YaGxnR3DovA!eR24*NLcucNT`f{#Et7`l1e`
zdlIcoZtt0=Z@G=jzBERxk9WW@vs#FcoyGTIGHr3`A`e>jVHi-jC|w`#Ol|%-VDkp-
z7Scx-?2W6gK67EfE``YY7Y;l`a(SNMFlOX$8as`+080Qim#0GNmS#5`paBsp$;~eX
zrC&`r2cxPIL%aX&rCGGN_sveoPXs&l`da@o?!11kN)a+)R-Jx8v&+*h`E-l>tl&Dm
z)U)S-pr`)o@9q=Q?t}mPFlJQ(JNQY;VMGSlJ!vYsbFk^(()|_3RT6S!2lM*$rNe&U
ztiyk7b+pP1)=osRLnDTaEBsQH!~Rj)zAs2?XbF3*9R0MlwmOqJUKY^P)@D@h6JW-D
zSlArz{?9Re>#X6b_vo~U*}ST^j@N5$z{EA&H|=+feIryV*|Wk9l_mE_<)koA92%H-
zK{YJV`>2OxPlGKNk)_j*JO1tO?9rD^rhl6!h3|as{<iiJ<af0&&U$Dh?u^7j5_5%r
zv4`K@Hy+}CesO=`kbO`(KA$a0&d^T1)A4)=(2`Jbck#bVKb^q`hj7;u_P6fT%ClQb
zl%FqJe(cDW<OyQ?Hh!i`1S`)NI(w2(lx|Y(5AL<L{R5|iyAMB|J`gH&xEFRUE)v!k
zW|O6TCF9bk17|K>v_JE_@$TsalzD7+>8UZq=YO7iJW5$A%sTQ<_{g28n0Wgwe*OK2
zGYe<T&nK#=7bR-@_00$DrH4<M0&~-k=p*rKOX-T}1F45#I#1(VZbhl&vPT+vHe}aM
z%mrjWs2ysqqleF)P(ITc;A(w1h#D332olkZ$pL=sHtN_LOoMYj*>pU+=V+hnOF>$7
z{PD1*xX_0w;VRnMZZ(yEuW`g5Svqwy_+A74%c*@C7YFpQH`<ScnzMKC{6dadp@T>{
z6Z+Hmfm4LF(?5=lWZzf5?L>sQ`JZS_cy0Y?!5(aos{Id@bIC?s>cZ~5dVcM{G*8Sx
z6}$&g>#i%yGWZyG?*i#-dcY6u<EI~IR-V0M=Qwc6r(2>AOSbo?ko}Sw)+byGwhzwM
zoc2{Sxrv^<dB0?73U($b?u@yyZEyI#??%5%wh{UGjlRpTQ+|s_F*{F3G&lA*88!X8
z!El@1;Wul-7VOk+SeDMc!-Mu-k1W?pLm#MV@IduHI(Hj`TPbdC%f1t)|H5H(YTxRI
zTQ_tLF5X#CjwC+NcyY{1P|vXYX5G4Q5dYeyNpUyMx2{IDMi<R0ZT)ZQkFTHR{cR4<
zvoKIukN;lt(KL^<58fY&Jn$uOEHP9iX=FJ}RQ8ln-2-@?y_YBJR>GORZC%cG&mU8Y
zK6OsVFi$^{5L>?8Sv|sW&kxULZ$v(NziZwCV}A2pq`$V=_S&h$fX0V`L&YO~k>5X^
z`Ke<~d!4q3E)&D;VMYh`aoUS9#T-joJlDZ2`$zg!|F4meM}Mt3L^pKinrLPmJ*4%w
zJH8!T{B=1*gu(DXe!AwF!M_l29QoH@j?p&ny>X{qDn7nQu?NI^(F~j(twHm!Ivp<U
z!x1}M`*<Vlxob_A?9Cp)B-HxkGaTeXo)K@!;c#B{*N9ssq44Z;zdv6Y>n}-#ZG_1|
z-^U$4FGp8|occNC{oPyWqX}2vUHfnRNo%=n>J^WH_G9DDP!cEmen~<R-9x_p_i5>L
zvP^4b_OGVjrl(Gw`_McV+cvR*!Fi<V8qP_?!g-pFRut9zNjLIwNR4y_TNtABOREuh
zgKg0c4uUC;UQ^YmEKN|_j6yzJm^+%#`FqsOUzvmQ0~-H2bicXqC}$yRyFb>mG0>CN
z0W)H^>PJmoNSZe4oU9wa@<Cx<0%)vR*I6<>`}^_Hp!0~rC#PiKJAwzcq4*^2)Z)CX
z^iYO*j40mQthH4y`B=rb-^ZR3LlY-Xo?bd)ZBkVR=yPndJ9kFlaJa9%6gk_se|(3l
zW!3DqI8)s4;0h$2hrfq%c1UUM@&5k)?mWh&?tT%1_?PvaxDO|vj~>6iQt7mZSEz*&
z+;ty|zS+0Cb}5>17~9$_>J^q>%lP@T)6sD7Jhl0K`A?@OYx;~U{ylo7d(r}a|Ksbe
zUe(SR`Qh<LH<A(@&utmhwkCAP_E$VC=;HRKE@^MA1v~}!G)N*(`TU5T0q5m4f1YL;
zedtf5%ouxz3YHc-7gARf@JDS1$NCSoEZ*^0`_yy7{@26MThtUsT!{Sa{%KOc53zKG
zlDo=_iArQ`AMFml=krZRzol(1dsQ`pE4j<?#BBN1m-jZR-M~_4l3g;t0`XH`33iTT
zFnOw-!Fk;_?0nH7=t*@ju^Dsf+?Ss|dmdBxn~zDvKOPj#_QQca&DF=YbwXVx7>}b<
z)E%jlw)>8ElU^U4n3Uh&d~&fr<KJhMN_gIWligblNI&|V8q^OjRe5R9QTYMWRjH%!
zu9DryQWsP^Q@$@O>mRg=?Tie}VV_pywwB|}JPyrP#rVuFls8u0U@962p(P8IWv3Z-
z(uUr|uR-ORKh~&e4NadX!u5Wod18L`UAq3lv-rg2vSiIqJyFrU@E_;qFQP-E4v7R3
z&xo5=*DwYX=ajCm$9$(8oL}yPv5>N9of9}AKa{k-^tRJ*0Pm51H^rfe<DPOi{*zL&
z_J83E9|H#fvl6)+G2yT_^wAc)^L;+Vt}PLl_kE~k);q0|VQ=F)m$l#kk`-^2HXu&h
zMtb-V`o`M>{#wzuJu@Lsvft|3;{8YvZSAqYPc64wdXj!JJ@V3}<h0wdEyf|I+K*nj
zVIr&`**Gcx>;JQFygIqz$O>v@3IKorjvve(zaI*?$&ZOIiGKxeAFI}+T}lIgzoK(R
zum3eBl-dR9>#g~HM!L9D7<dmg_UozEH2K-3|Es8re%nGG|H1e8Zc4lS7GUyW_<)h3
zZG6VGXuvVtFGa9f+0!hjj-4&)K{dsjVF3C=p-D0O*bfK^%@3?2@asRAXSn-LiUP;c
z3tGLej0{Djx<v4*WJXZD{eCDCH?{fH#PrxNCFp^P7ZaCjR;b@2w{^$^D7-A=p9`b~
zH0Q6i(+P`XYlh+YW(2iGRzC4TM6LaNV=bi&1kgY6{%-uQZU>QVO=e{a^;iCVwsETe
zKnToj7(w~#;Khf-fCK(_{<?GmUh@64v&P2mM^!ft7|$B;-&mXbDBm?bM&Cae_JrQR
zKc=-WG2Y(m)NT%N%&Pm3-<CGQBq9BU4IJyEdTXaYb#+{g06NfZpF4PmhsGu1M!xfJ
z4&)87LVFHP>`bkmUmZE`^ISY2DVsL?p_1h0pDUT_10VF~aqR#7@eAXCXKd&Ga|2Zl
z{Y6s_%Nf>}rVe86T=^x|28IBnLpqYay_iY=ToU!pkM}O_h2j1y>WtGle@5SU4)LA%
zXDNMs-odJsDLN4u>bm4#8N5CadgaA~!}xOR$-|LR7t+UjcosDMuBOnV0ne)PzP?I*
zSJ!8j0G+@uF*mpAwFP|#M*Uiwmk#g$AbyJablt&wbHkNuar4S*-SN#LQBDwE#|qp4
zbCGLM69V#bfgG~Bm}u1m$hFssRLOw|wnQ5-ky`?Sk#<8=vRjcpf)it<F4wX{6t-B2
zvH69On0%#)p%O1v6ip!X?-!W@5F)D<h!0RazpE~|Nn4j7(smtm<*3gQG?wt12$)Pw
zXycfdk5btarMVcDpb~GVWoIR}lBmn!|I0-sc-2KSc1e6%N=*zo&<eaagY7z)E7WCj
zp_(mxwoQvwlL1sELR1op111f*BrgzW4{5g)v9la3A^2IL>&0x5>(KtQ`&9-;uz|6O
zam+4~I)@<hX@R6^qX>X}D~aZe8c2f$v$tx3b8z`G0ujyut3%oRT-k1Idu^CNb5RQi
zVCRkcMF$4@0DNN9n@~UkOAnuDOVefQdQ~trSo$#QkkLTHG8Tde!6SFuYl!rnd`+#I
zU_us0N1<JU(I#?fR!!yJ1fod8uuLn(We7UzI~l+0_t(KzEy=m-UYj*+k+w~vH$IRF
zV9W4ms}_3%dtT!d9y*$UPec^jz|_1DDhFwxmuxpNUjd`4=it~PH6|P7#m^zbhj2N*
zRLyRhz?*(}8b-4f__Ej-RoC7WMpN@5BB;h<c+NeV6*^aC*tC(D&`1eh^Sf$BktRw*
zK}tR^CruEui<hgm$VT~~t;h)bT_}n-E>~&XLeq;*(L$;Z5;dqn-v9qt_0D0Y@SGHF
zYo=8T$2;1HslQqXBYBDHm?VlZ5-*08%&GJ*8Y~fj6AXhG+V3LL)W(VWR$%RRA3Kn~
zBoqNsx5Ef^=`;<Qnt<RIeR*_`$~J^p#!J+Mh*CV6OF}kM46$y$)C7VW0R*z9+aqf6
zV355!MV%8!2XHi928sEC5JPvm25GmVmWj~<=a#^zP>zWy{Chq>hk_3z*f`-S1e#Uz
zTtF^XWvXTJ@*yUw^8wLpBY++w2k8q9qHD91Ky|iS02`bCRB0%L(FAI&{F*WOZq;K3
zuvP9dTt%W9(^jPW|0sGFN2dS({l91>RL({!W=?a+rb&vNR&ySPP2{jS&h(-;l0zlO
zIn5X*%DG`q!yKcSWlo(m$LgI^uZZ3eK1ko+^Y<q_Pxr^;`M9p@c3D_DYxY`zsd$l!
zVKZmT%v#7Jb?8|?K2(Ncws(YylC!us0P=;dN}&Pq&FG;{F_li`Sqn86vCb4ZN<eu>
zDw5=chBeLLP;ICY{|KPNYD1H4hBc2ZU}>UeEGW}VPE#n!SxiXN8h>iU+*jsdl?GEn
z`>3xm#by8np@plYLyaetm59!YG{CJb%0F#@;U-ckS~AX8tYxUHXg^IejLf527)b=H
z4J{hbf=T0{$cM5gGqpu*9yariE2G_x=wzswX$vW)AR?2vD%_J8Bc={C18QnPItizo
zhb9xL3rAdi&nT#9AHj>115aH#Q_f6qob8|@?qhE$anxEEL6r-!%zW56fO-U1cC)^m
zUo=dBa+B}T>P4zlkRrf~X^j*|JLn70Y#>0u@P$9Z3G=8$$!J+Btt=*#b{qjhH=hRH
zV4rfFB~w~KsFR9J9K_>AfG-_p;iicNokX4#24f#$p_oaLmdH|@Jlv6RU_jWT?ai!;
zQ<RS@qY|oplmfw)3MxQEkYa5#07SQ`cokf8;v*z<k|#k=5=dYKQq5;1m`4|stUpx&
znayUp5I94*3`_0tx5ky-ODY-=6}2F#I7{P&%9X2did3eSatIomIAEy&B!*B>Bn-_4
z59lP)+@utX2Q<NagHR})1|BGrrE05}`AOuq2}_m36Pd?Vq8^4=Gz+oq(T-rv6JRrp
zO2g(Wn*oqdq_T9aFeVWYY2YG?^oKeE@W45<;=vo}^&@o;HKocC(?DD=8z=Nq%|cUg
z%!hE~m-7E_KoKbugW`la=6)qU&5!U$0EP7)U)<i8>1QqOE36=MGELUm5)2|5QB8c)
zxtxb_V?=djD_?DqvNF+Q#siVcB$do0AP^xW5IcetmLyU!c;KUGhSO@A<;;+POIwr8
zCO`q3pE{IQXNiX{SWXHY5EY-;Xm)c6(d1rTE>SdT+vt%I-)M*nm0{7&0h9J20g^-s
zPkOWng|A9B80Xp%Mfor=?Zhz1LK-XqWEL&TkD@PPUgc#bF-4C}is&RYn9-?R`cXG)
zz$LrKEuMg~lGcFe$uXWi5tx`Kac;-(kU_k1h@LvT*}497xruL#12mUtG$>I_<%QVW
zS%8NDBbh|R@Nu@J1qptVCCs$5vp%iDbI>=Ua+8U9Y0?;+H4ezUqa%e{0)YLx42U8f
zLrle-vd}uog&p(q_Zx1O(-HA0qSmS-P!F&88cnJrz)9*=Q#fhB)n=1a^bD#Yk%t%d
zBmzDq1CE8AHHb$zB2XL=I5JdlQaV%t$XrU1)=q~^C=-Oi=HN21Yd#>Na`_QK6{uDX
zm^xxYWgrWwii+?__(Qg2OuMDT=3AC1P037b{v|v~P7{d6{ok)5ni~qZEKR-~nhnHk
z7S+`ewxQIzAvy3%ra2fq0wj#(j&X&h72wdZF{rbaNLiII9;z)OT6Vm7Qrz6vcu+x|
zebmKcz@Y{zbJV8}ElN^hHp`?S(ve7!NSfmRW<dffXq2ets}6-jab|!siU=7e(w)J4
zQ7Zi?8iZ0YN+x2~L{o7fu#`^XOF2yxwKz#eMSvDHLyBNi%FHD)L}_jkazOm@*CUJo
zGz+p+5Mq;HO4irr0BhVi9|z^|WJ@W)sx6%kII2Yl(^b-rBZOYo)@D+j$h#`(JmJt3
zwoWdJR<$5Cd!&j7tjI6nG+5#*;=^W&q;<NQ4_neY&CNp7oQU5NW)Mw%ue6;q!=Hbf
zQgt*yUm)dYhpuAL)vVJjC9UDw(5F?k8WJo}3Gudac|@KB%alkf2P&y9@+VVWGC_bD
z6?iraZkn26%35+@ffSn{?)OMoL15`jVjgFQfT=QY>f(?eRfe3Q7JrYQ?(FVC#hdt8
zFJ^*}bSmXY%ZTh52`*3K^9Koed;uR0N_>SDdRT>ggyH6!y@QF0$)R%1@SsGh(U=)e
z2=J#0$qH2oE1XtIl*YrrPg}vqO`A0^#g7Eohg!;I?ds51S<OuBSk-B^Y?ATI1yFd#
z5e%&y*fPO^rB{~7h2pSDT$nl9T+%v|k)MPCCGvp`vpU-qE$UyDCk4FaaG`sEdA^vB
zhYabCDYr@ypl4j+ru$=_r0;=uYZH0OWt#2uT>pk->AtUk7mX3gl^$_}_+Koby1Edp
znvfl;rnxO5Xe%zHx*mfW3B1&Zkk5yQ2V>-9Ycp>uOHO7FpK&Uh)ylFmC4(65jI<MJ
z?#`L<!39l%aiXA|_ufT1EUxt6PN5Mc9u6POvP;VH_pd(j#^lqYO_S&Po<S(f3F{RS
z^El#^f|_Tkbxh<EuPK_*#Iv~0c0<>cPx$(4QYaPuP1E6P0=GHs#mD+>&g&aqK_)@z
zS-hK|F{uv%FqP-lw#-Ew7A+zvwHq5m+J@++G*~OmoBit6&X12%R|0RJ(R$#~s2i*;
z7z(jjaGY^Z5BM<8EE=G^V3&JG@E42X$AO3t-lJH68nPb`kB%)$e7yfri3F1TZ?3=n
z{k4UA6Rlf&KYh0b18+(DYGGv*ZK)Xd=u@1adXL$98B9aHV{z7H@x+iXSIdh%=KO~1
zHvs|+!=dc?jOMhpWD0+8ZL1r|Wc~TuwS_Z~z@SFuP+Ov%Bjdi|<#C;r;<mwa0iS&r
zZnnkv-@XNIaoMqz{U@e$L+M@;&7>sMdA28U*8Jf*GyeY5b<1>f7yP2>Y^$OmQdmL!
z{zh;jKl4d}-W=mD`5*kj+R(Q^iNjN^|C^rkco!6zc)T23F@daI6!%T2fvhu!;DuWr
zUip^M`dqm}aQBGRXaBEhnoT-l!I*}4OMDh5&S1Ra`@W!SeV$$A<d{9%U)x`c-&@Ol
z^ybsJ<v5ys&UcTQ{ibQHlIfLx0vi5j`uzgQYkEg((>1b5!^`HC+)r^s2<hwtwaD$l
z)r3Gv)ExHaJQ=Rj;N#@=4WZ=wS|P_Pw!gWy!fI2}bg<B3Juk|eJxUYrlp1a#`#oB#
z%vAi4F5@CU$3i|$&(YekcoD5x#-X3NrCfJ4`o~-`ft;b-=iIbZp`)vVa#V!$6@Tmv
z&P7$@Bl?samc$GRJWE_v$L(&m&*TGqk@w7}54w_xl-IZacJ6Nc*QBrI)4=`syc1iI
zAdQv4f`t3UkGIcEJJ|)+PW&&zbyf=sF&us1=>6uq@UBme^a^`8f!3mNjwFe3HT@c%
zn0{B>NpRI_MEkBubTk4g$p}gqA&_s}`S8!SQE%%5FK}l{C!E8~$$FBX%|wT%+{pc|
zOc<H_Uqy1rzEkh>B#2*3@(oBuhMcHE%s+4!-DA<g`m;{raREDp_r9h_xU6AaFT9t+
zx)tQR^gZ%ZlG6OHtouArFVpG}cIHdth{sh39I_*MyDKKb3~jBvr2MeE1lx+k+62e@
zx>C*~o(tA%cJ)WCP|qL>;32y4LPhJ(4oj@%iu=V*x7OFvE77Bu;v70trP|>|6xa|v
ze{XVl4VA-celtHF@iB2_3r>wN1t;wC8MEj9<CkG@7i1Db(zcjdQtpf3vnv}7kyl}=
zcUpN*wnU@cCA%cr>Yet<du=gC(hq#&OIJ2v@88#dm;bFYY=c^~s=YEX;{lwUKP!Jd
z956gQOlg|>&pbpvqCFc?Ognkd@l7&PZT(~WGqbD;>)w3t@(#^ET$fs<y{Bshgj(6g
z_Rr%E5$go(rOBWiXN@ZUw=OT+n7CeSyh<n%az<}mrZL`tmi0co-{ikzXWsX9wJ6+a
zx{#+!*|%zQjxa|mfA~#NVM*GB{OlkP2Y&DhS6zA*H8mdS)6PqKh6ck-TGMl#EXl`0
zWSyKMTNeGA3X184YZ|kp?0Mfr(!~ZNfqMiUc4keq;v5=#)47Vn2+8@Ekb`+wZWb%b
zZFxO|tXoi`Drcz3^2Af%KOQbM@%P|6)WiO}monXo5(}~hYp?rnxEcBgetdrQRrE(%
ze(hd<drAN2pW^N=dZ)r4Wg_CcN<J%SllzERjrZJ-7I64=U(k)5_at(AL!jXS-=nzT
zr~jR(A(s^*<ge}0z*J#mLxLka!2))HbL*A~VQ=kTQjD8<fA%gZAu00oS9eC<xv?(C
zxp@CsEjiUNAJ1y6GrJeXEh-*p1$#@SoYL)gR4Z?+=6kDoogz`*Maa#@2&N~=kZ+$}
z4w<>$w#HxeH-A&(`vcT87a*cpr*k!^^W_5<Y2A~me5+&oH@^O~RCIYP>6O~)U;FRG
z-&RlmV~6(q#l&=8yC&UuyZ7<;wLg19QLeH6J}>;XfyID#{mIj><7+9i=OfvjtfnP6
zy;6o-p{bf)QqBa<>40IPv+5@H`)9<XxQ~}pe=KHesI0M9peWCvSD6@>TOPV2V9FcT
zl!Qz!6&0S=BcwiG&o}3=`>!p1Dy9Z?oIUR;rnoXX?A4m^$sn*is=r~I^YcHv6OFB=
z&8uN=@vg>WnXdGvsUK^7pY@&A@C)v924ZIeILx{PM&KIfWua)Bm8;z1Nw(tCurBF*
z)k2BA72HL>@nIWgE0o&^?fl%SgW@s;%$mQgpK9K>$c5{UG3NH*zbZuQH?eE5%HOr#
zmP>N4F|-RDg&%YxVQ)e10~@fqz$a{`uB(dVd@v3B<xRVeJl~JpGdlS9*42iTdapab
zO=oNrrpYh2HvQuME_q+FHt?VL{0yav%?kWiF1)yqB<hm2#VC&L@Uj!Q4eQ*{iSIqI
zedO)=kF@U7_ld^*lIBmESW>*w;XHgQ%b$|>Wrm`|on&kq7bUk(mR0du9EY*vWRLeX
zhL`^@@81h5Kne89?upD7*?lYj`e}yu8R05Coda;5Nw;k>dIw9R{%1(e?SPkOW`DSO
zJjBB<@aGqw1k#K3JvAg7MhQ)EY)pGRY-Ji$!drJ%y*A6dFR|dDljXUbgv9_iv0dfG
z{#L&&%Hd^NB+}(aeq8QzA9JT^!-!3~eZ23+&i-K%$T(DO_-zIG(^!vG{%il)To#(P
z03%@H4TnvjJb}L2t^WnCa;t)m_o+Ng(B`J|W13{a4^&>hlQ7r{kTm|%%nd$(NH6qh
z)=}=$vViqCLn2P|Ez6`U9~EtI!W%(xEUE^}F0YVOIaRpBdnJ*700-+DJSIRjwF_6a
z8`^Lfo8xO$9og1>Gh)N2kS{5N3z!d{e;G3mhrC*wqw+dC-}n4ePmR?=XN|7N)&Gks
zm%T(l03Fe~9;*^g)s>19V^_uQAi#fHrDbKEKu(I2Yp&n=iGL;%p0kwLvzlcsR<<F7
zD{g*H)p>#e&~>x9ZAQe0?TM_|FSoB#|67+vnfG8em_9H}>)hBGNmwtK^7zB}=XWX9
zDA)PUnr<7zXbzPW+XD2R*sJ{)O}u5fZqAh>{Vz9uv*ODOUCx=MJzZA(Kg8^vvq|0t
zDWeIg{Wb>y=+gbjL8u3aX7$DHLV1Uauf|kR=ilP5Now=9hbjkZlOz>Fe*LJcb9>SJ
zodR4q^Y3$qnJOoxL$l7gZTVpIpB%=Qd?97Y3nbdy%E0*fd0~cb*$so4B3~9fadXmD
z`PcKcdmlJ|rgK(!@Eo#5lt=aeG5<dKonoxk<CFX6N$2c0?BYK6+me6khq>G=Ry_z?
zlitKp+!<7eAIh@U$u8HP0qqw^1f!bZXZQzic}v6`yo$n=@Jh7J4U$}|&Mo+vB7)3%
zm6Mm2hf(gW5aZQbO>2n%dq(B$1vb}6c=_Wfm)d2ebxrMW>iVPhN|J6;cdPCb-yL=R
zUF$_ZCQkTl;{ru~ZhpOWV8GDB)Wt(6Cb{y>i51%4T|V3nZBUHSc-s`Z78Jg#oy8K|
zE1H-yY!f>_+cfSzLdsR*{O!D7{AlZjv6F(s7W6}?9B1ZEBbaV9qQAGdv8(Z;#ZjTD
z;HReMxjEHs%tEFU7FXw|S@i?iwtzxeouZl1_F^(01lXZa=c^sA=jpmg9~Yq<nsFY7
z;X@KE^*8LSNZI;iSXyB#;$qeA#%G}>-=l8b6oUKA&2Q_eSCE{Kp0RhKL`Y1<?4Lii
zQ<Mx_Z?)*s=1H2U@IlqZotJ;OfC{=J!^tobXuk^923c|FG$HNpR_9fuk9}U|iw2`&
z*IG@Ci!ND1&fWP@2=1eH?Optxb!joU=p9MTCLdcYIDYbV=A`82N0*2(9|{>>G!bJL
z7~{Ke&U<pN@dNI@5Ky^-Th(NS&G=<6)jx2I*7b7u>`4M&YWAsJy0&6-p!{$kv_4}@
zI9W<TxBvXm&l=+V*sI$Q2+K2D5sx~=@t8lUWA8BQRodK(TgN(EcGuMHj^6Z1h)>!_
zxnBG&MowSax81j@+Bdg-Bn_^+p0V+%_?N!FHKw-)o_W5_$**`chWc8Y$mQz3u#Fb?
zAA7@j6ZO}TT@sX`>}E6As2<*5{I|`eDXx1c<4i*v*;}y8NJV@OxEr+IbNYvibW9Y3
zA7J?B6#{6uO8z@qZV469+h1!C4T<}e)xPxva^+0x&Ih+@KNz;b_a?V)A)Xv?%6q-_
z{Qvbt8~gawexw==d>u_|a$%mce{9-L_|}DWXx$aHZzTSj9thQnd}Vd(89v)s@cbLD
z5Dz=KQnuqDk@tHm26n!u5^wWf`<B`n;sw6o%lk;gH;=4#P?wvh#f2xivji6qishp8
zr?3Z=_>C+z%Z`K{62H<C*NpapnRhNez+Ahhd{v_wdk+2dOJnPI&ljDW8+b9#QNXMD
zN^crldn=UHh?#F}*T7J|QrpP=P4H?fN#XYpl6wGN-oH8>oN?Vo7xZW9&YdNleehp<
z>on||wg5{8x<lQS{29YO!R#Sf?RUAl2EXgnQA9{)re!Ag7bp8|#7flH#5I)k=xbTm
zSyyd&CV{>SHP9vbZPH_n>9v`DqC2bWiJEEUfL64Rd;D9qk@HN0+U+)Pn;X2e;5E?t
ztcL-1YdPV&)X^X*>-98-N<z70toSs<&|Ee=O;`NjenV+MNO3us#nnI1OCY}9-nhWX
zS=#GITTiy#`PWM`aB4>uWd1uT$?{FoianLDMc4{a?&FW;_Zz6pC1%)8LaYq_>2~sb
z3_;Q&Rf0t^8GSv6YuBTWXXfb5c18Q$LXGhe1S`OU-T|);EJ#im(R@8IeZ}pBD=+-A
zwymP`hUd-Ttscf*2KB6Uy30=fwUBj|Ze4gj4};TuK2u}!<mY79>+A+ne)7K8WTo^Q
zulxC%E_Zhu{}eA$X4w&g|8#H8zL~T8s6Udtr6(LZ-{RRWw*3l(3z$pHqRW~e+TBzQ
zn~pE^$f`!d2DJjZ8M_GZ=JfNW#7K{CpU3|ENu4`N{sj@JrMpdm-x7+3;?@uK8k5(g
zKINq|Ch&lNke86_ef~%9j7!3!IKOa;?xmI-e%8)0j$Be7p7eYDZ|KUcaA$2vtm_q<
zvVK~N&x$9AQr}zCy~qsga#)?$VQQx7`04ok&C9uPwIxvRJ>1QJEt;10LxkYc+?w~P
zghZ6<M0+ntKdat|R<GLm#diu|SR$TobqUV-E-o4uQ!0jzMNRk;^1H{E-#IDl>GcjQ
z-&V<Vdt1>WIeKEvGvH3>=Lr!?EjeoM2O?luvKErc{vUrS_t*Ou=`F%$(L<d#$J=ho
zcvy;rX|Hv=0=l<L^c@vY<?m6*HTsM=b{4-!M<ySpNbn5b^}W3o&}cdA{eAcC;M>=Y
zr$o>Bd8^N>V<ABL?Qi!WOF@m@7p_~mWuj5krmCT9;yZP@DE#u3l$)GaQqA2Xk#$3p
zuoiR0$@mSw!@_Y^EU|0|l=gS?srn}6{%VNk5W3!)su8Q#>{iw#!epktO}rW%ALy&P
zrnwl@{>75!oFamr=qEmXzxju;UXOh0YlWTDx@8j7s>`i)zi#+;{|5Vp|J-~3+!L>3
zi?#yV|1avlH*vWT54muaPl-8C=RxMy98Oo@F~MFQq}1YdL8XQw2C=g)=7{VCW{pdh
z`)!iw;mpmSns?jqw~+N)i{ci|Jn(*MtX-!+)hXyu`|K6N1(M7mOe6l<yT_-h9|v2_
zq2|nOcqRKAY{9F;19<UhC{tr)f@3_{<auvGCH4F0rOuCp8{SXggS%prl}m5B0?JzI
zSnd7(d&p6<d96zp6?MgYYqi)rXP^9In71@Hx_aVGscgBeW$3=gm$&{;Os9=rJs>aQ
z5_kTe?>z_B6WFP%XY!P!rwVb9SqZ7oYYUg&GUm?AoFZ^D+)vl|dn0;sBIJ4F@pGAf
z1~Nu;JRtv9xMsDdYCn=)M(|ee-v0Vero_)5uZGkv0p!BTJ9=vprKP)}@Aj9onmH#F
z3pM{&_!EB>j$CuwQQ!UX;oGkN@fSfQmru>sC)Xin{B5#p5~k%%#41;^`;?YSQ_2rc
z+HxsGW`<4W0@fiO*5oX9)9cRI%CjbkCEw8ifLLcA-Qi>v)8T@vBRKDru9wEe#dR%D
z-}f*cRoxcj<czU07Oh|excI%II_}jwVXei6L)i7!U7c%h`#vwJFDKT5Ff7x!KgFSy
zdKaJ^AQ5q+rGkr}>u#&O7B(ZFt+#dE_?gH2cx1*Df_c%qk3J5G?jV5aVC#mgHvJIr
zeTzSm?>{)N4M>m=qeAcJNc=h$FB0r-pOD+>ZZh^_&aY!XTBRnTRWZWKM0H2%UA+aQ
zL+HL#qtycgb#D`K(yr0%?!>G&)Skd)OON3}@Zpb+Ez=*m=Lgh@9cQ~9R6APAo^B(Z
z|E8z;Ayg`%d&7~Yalvcm0_~D!UswJmo7A<0wj`M4igg#!Ia1_)QPO{@$!O6sAh!%J
z0zhT}zd<1w7fEF;U^7mph4>7Y!3NXi(AW$poGUV?u7XECaz3vwKnVlq^Dw3G({P{W
zE{kl04IhRMIj(Fqcch8wD)F#<HT-myF%4OMS|87}p`XO*q#y{HI6H}CACh`A2rfDZ
zJ`NS#9}Z8IA{f1tLsu2kPoW(U7R|6$SxP%gG;uN)4H{KfS3_&wf9P9qd>LjzGHA<-
zA$m|3EEy}-nU7v!WR9Yx@xq8_vEfHB&N>*#aI;Y!*ba?SuE>+JVOpM7_otQB9nG8q
z@@Astpoi`f6BuMMt9U30*DQx_E}}JK@Y)zI?6?Xy&)NDXK!3Mru|Qdg(xLL1wOqQ0
z&S|Kr9r3hDI2;NylVO$kTIH?WW7_#n%B+Ean7oQJTNdwJE(k$AN|Vje5?yF<lSG$8
zlFsA0UoHYzO<9$5RV*67(ZPlIoo^M~OQ{nRoN9*&Jp))_cIH0r0=J_WZDoMKreid0
zGu%t0vOvF*j9-mth67YN%Buw8B6m?(;<PdwbrQoP$Vp(d;YZDQXsuO6>guscoQ}*d
zVdqE{T*QjSW}d>r*#fr^Y=*NC6;q4D!cXR;yMVE@GBFiEEmj&m8Yn>#lYxs9fuL+H
zyjJGn=Ex)+BS6vo?9qyRTxW_trm@UEQke~q$d2pt%!w&*%WvZ=N@iBM3II9`rfC*I
zi+hO_pdhsuLg~thIz|JEsaa?eh_l|w$2B*?eJmv+1X_3log>%KnL^EJFEL0c-pSnD
zhh0;(8t@T6DFo0_A7w7M&DwP+$N^BgbJmN(hE%5C(Hl0&aW>mHAU26RTi~Y<D#~YC
zJDU^5Ey}<tY~ap@kTQ$2*fxA;O&yg?m{z8VkK(ws?(^f#Mahm5tPBW0U087M<*EIF
z<0|QDM1Vt?m}|oqkf4I{#nbwLMB}1RG_Avuk)edxux_B)vtmaQ*v;^Zf=G4sYRgO}
zL6m=kB>5<<O59ovfG^6*{7R~YG_Lq)YMs_+fs$N2Qri5Iec0=bi1JnNm?a?2m1mhP
zo^(FDorKIQX&3*;r`ekUsNvx7NBGm^ZFK-W+)e$Z!a8L$AmdYY19ft{S*iI&u#8AE
zx70Ti5bny!MM*3fX9Cn?7)xKnfRZKwSfQ+J9+}Eb9D!m0IaZVh8Gg=TIm2TeOnh1a
zB8KT(Z5e^IvzWNKwX;RDkw8pD=a?B@Sn7DE@}5N*M8b$J43NU)NJnkf#KU3WUouFU
zf-OME3urxu_F1(jN^6qT4RBaF6eh9w<&C=q0R?_w0L=vpDeH#Pv)Jr#D)V>^6rREc
z7x|m4BLP{Ib=G%*Aa^gRRyh5Hn1%wVHXT5y$hn1Biq|xY4oVyw#f8WlK#RD<$wjz&
zRjxJ>RH;lZuf+p$CoDwNZdqL&J|~*Qvl%WEMTt}u7blL9v|W0~axFq4R&<JokLz=_
zPpoN3g`C$fbzs9%!A^-Q@BM^t$!F5(q7@c3HP8|gD8(S+AzM=q3eN-i(M4H2B}<63
zEWki=lL8PKs`+$K3bf$uYmlo+d!qh={oOWNi`ltQIi1PUWuey3GLpF{0QQmY`nY$1
zbjqO$_>$5H;vq-8LqETj2jH_@nXTCqnMj)3{i2ET0)Q-#fo#J3Y<n7!m`YAB0Q5|j
z=*SRs)lFepIP*1`g?OvtY-hI?Ua71FDSl<qf)dX0^m|3T6|K$&4>PQ}wE*IWb-LLg
z7;}OH9?bk`eJixonq@HJU^M7?@82SNt{KzM&rjFVHHin#U0xJc<YOr3NHU;@b?mwG
z9Kz{xoSQXgwK)R>q1ANmy?6;|Wx|;t;{ovEnJWe$$k0)nmqtZ{*=#>-t<W-0^c=AV
zEfXkJUeYS*6J7~=XCNvI=YoznOAG3NsWxkq#6*eyqL)~^QiST(Tqi$hoeD5olXE-g
zZ;pDZ-n@n+Q(3jKzAn90MJ2?<WJg`kwl$bo*1gj3VtSs-Sk0{1DRj&_GI#DI?tH~a
z;%*QD`pPpr*9@N049~;#ShSB-yo?(6&3v>70FxEzv@}^|_NnL6U7a?`zKo*xe5aUD
zqnEmDm$2^D<^`qjRGchD=#}*oAA+Pf*pQl0Qay?k4^W4pPK~|9u&OoPZszk)MFWxI
zD3FXx?NwVi5MmYpQr&{v86D9fSt92pE>fJ!c57GG8O<8Tf=X-#8gw$`xr|X^T;+(R
zv;wQbYfQyC%MK8`;1A14P8VEyn@^ort}-6;smR7K*`kyTyi6^|xqG;sVvcxK&BG$8
zuVu-LP~Hgz%!2cb6*BohmL;PPt1yje=_gn3(^u~e^fL}b`&nb;^U>;8gLrY6D;Y0c
z>1O+t@RNMW-v|Ep<B~{&Ip;cdfepOx&mRT}Emn?He2>j583+B1_)+%X&W8sF8eqM&
z3(YlF2YM!Ldlw5@H^W6IOJ~LQYL%*kT!Z(Fic|#scejf#LtXzA{u8%~|87X?e}3;K
zTf<|*?9XeW@}CLEF`k%|hJyR7G{c>Y4FMO;1*o+#563PCL3B=^_9xRS_D7iU(MF5|
zeT--3{s+PR|1x#9lM<*)`8(2HeVi_2J&Aqg_eJL(qO2>bW+`4(N>{!0L+xuMb1`iF
zYN))TA2xiFOTRcWgVgTrzkhVgCS0y}(z4+k;bsZ?6~xNqeo;cz_4D3Ry`wI=8PaRU
z0zcCE7M4SGddVYl<f;e4{kx{jojH9y8#%U3HHPDI`)k5K4(9PQPm^{VyQTvM-ce2c
zT=spt{0Btn(v!nG1wjHguPWH`<c1fAaDhoejuv5A8AtEak&Hr5(sko_OXFWts-EZ)
z(gR;&faO;7oN=nhJ8Q_@_(+WN3M6gYo!e*dgFZLPNqYjRwb^_1doV4qOw!7=IyKm*
z_jYjc8>#T8?7LbyORGYBzUt(p(V}^!p6U&?u1(Z$WyJQX*K6f|2A6l`{>1YfmoAdb
z*$~y$^P5i^GS<|HH;itpn~;!K@}?$y-Coz_VuMk;Q+g2>h(kep^lwv7tl}G5o<X~I
z>~6-E+DFV}&lExG>SZaMhwX2&dYBYX<!@`ot08QvEAzuY^9*8B@oEV3Sy5!sBZ1O5
zPtEPqT>-_IiN&?JwLk4#D6O-hB=`$T$U11`UK8JCr{F@>mEXugTA(0=kQT0E?oK>r
zzx}yp%Rpq-(FSfTfW`mQ=32Q@|M^=?Sy>j0c>~T}@O$o&E=YT;wZ9iLR<I*J3NuWy
zI|S9=OF5i_SGs~GYlBm8UHfC_n=2GN!Z3f{9d2*W&Js^++|l9D%xSIcWHH_yU&=eC
zY>a;#K!o+g5I)au71)1h6w0kuhocK9?u3-D&-}O8vr$%(vXa5UO&_}UcV=#S%+09}
zoNTaqW~(3~St+pLL25m|)cnxm*GfZLb&cPhYrl{%(R=SoXvD}ix5a7^|DaK`y;tuS
zy#Fc@UZK{E%Hgc-1ObFErQGZGon=fKRxbaEn8RRL&v)0!sTI><kNAlE6-!<fE?N(Y
zs~?!jhQ(HIh4f7yUL;a)5q>=S&yVhw!wz29j5BTBr!D6T=k<5jJFV%2i24R*;X$Y|
zZCbH3Bs?E-9<8$0-F=+;XY(M={!rI=>X_bMRFbuts9*bxQZ5KJBW2SAvz>>VsGYM9
z`S4w|ZJ|+4r?hW5>hGxhO6}HVt%VD%J2rsc^{l~nTg&C@#O{lWpT-5-3wGA)iGs1i
z+=dV%0i0(oEwNW9h0f3k2W@<*Wnhd-W<#UqL)hh#K}3kB--44w8{{TUtiOYib9-~N
zZ$8oJXE2N;caf8ELTkv8vtFV4Bt__~R3>F1;Qj*aZ<~w}Jx8liJ2NWV$tgwce)HlE
z=Z^%G`Ql3Kq?Pijc(KFc+U=E^%W;<SznkugiO|eRQynoNI2ui9XNz7WvaC-P9wdFX
zM;URwW^C_nf8wiia;C_;UD?-Y1&{o6{e3Zhvb)=7Fy3#B<=V{sQj4+%OrJHvc5XCe
z5x6?kMfWQRSU9XjEEE;c=1T_U&fPS)D9K+^u8a^fJkk*?ikH%4YtK1hd3PrQr5lLn
z-)Eabxi|C#6KZw}qYPL(tC9eMUj(0|p0`oZ-hDe6JKE+B#cn$MWxonO_ax8rcin-%
zB7I_LCq7qQ#+(9a9$z`G;^ecAYrc)>A&bo3RA950y2xVL8C_>xWpH2aA`;k*F6lTO
z1S2An`|9z}Ycq8$qrVgWs5=aJ@yc}LTW%A-ODou&7T7qEkkr#zJNe^LXqZ)~tDkuj
zT5Iod+6}gJ*Zr?S5jG3)tt`h+eVr8@FjXQ{St}O7P)A5rTqIuU@VitI5#^QYxBfQ4
z*3Reoo3)AM7s-CN@cTbv&n7cdqmPaZPt0&CWW^%tmg_?pAwlJe=R;-O3WjCk_B1;)
z{GNG6Vy@Z8|8tEx9r0DM(9}|5#L1J?aK)j##+CAgt+4G}zds$mfg`LE(w1O`jGwp5
z*!RY+GP=A=n?AMHO37<(YHe;RSq~WKeYW^n;+s|VyKc{TyTR&PBQIZOypRe(4~I>A
z=-r*l){q+BYCV@6a@*!=Y}^C&Sh34$M$1ECsJw=^O={A<Twb=_=byTEY!@5P?NNWk
za)qwQ(;sOJ*gbqwjO4GBVbD?Vg8JJ*m9WnUF&tf*=tPn-y+`r%tJ_PAOuqBr&TSnB
z{1ilSN@0^=>c8{rKr1OS_d=g(((!ovNM@WzmOp3sX-s6?z|dy2Uqga@r`~<JzY_tN
zc6Nof?e6z)AXL<kj-|ruo=SwrBE<k>wAQt>-#2P6-%)3b{ddFi)qmB(Y*A}lfV;js
zZCkbJj@QOaZ<pfB1;5PuA8(uftx4-OFKxxAAAsei=6hrv+E?ajba&Gpz1XK86mR4p
z3lS^rr3e3fTD4tE4;H9AaBvR*JihPUvVh?H!Ca-w{1pPbp@!DGA1qIzolQgVP0g}2
zf8PDAp62-m*P&^`u0(QQ^1K`UOG47OYqq*WPk4B6ROL{(ZvI^ZNkyY^-&<-%LtUU%
z@IpXBL09w-@j<v?QSw+M-PP!$r&~#&H$br7*6<!=>cL55tpCo)pu~aR+PUA)Ra8Y(
zc(d11BHY6~^EG@CH?j{eva{!9kjy%YQeahx{Q&8t=dvhg*w3yrK;>sgy2}F-#}bp@
zZHvcPbdLWUWouR2p~8SRzrQ_qA=xuml9hs9wB25YuX(ixabSF@+{$sn9bbf{*MWaA
zyps6(nSV)SsV^C9=r!H`pxTm6R4yFNCGTw`jaTqlwxl^L4%gfyIu?>veez!`7P=Of
zw1TS)h2}m!o}8t%nQ(~Q%lQu4anN(J2YZ#1wPn4c&v@24(y+KTwZV(q^ou&WN26>m
zQiV+JeD6Urw#?#I{M_OO<%|jiMfn!h3XGyTmxld!u4jr%{WG05|Bk$9bA{bc!o&p3
zb@>Y9pAo-=0=1l3v3dL~=h}3Mb9tE+TxgQ&GTZsR+|xT6<JIjQg%@cXW@{&bB=i)M
zuctg86Nq^620vFYr8t0cLa%yN$<>VBHh{J%F9wq%l~ZSQ%-YljAeA;HK2a8Li5`0*
zrF`4l1oXRv`Qe7^_kRtWZMbU*z;BAj?U(cPzBZkB`qNMm*>JvxO*$HE8er-`48Ewh
zt`b9R4K-Xb_!oOqUA_Nmo^217idVk777-9$JfXk1KTV#m=KTG1xi->3N8V$Re5gA$
z_bA*n<l{N|Nd@_zwssByy8)N_J&P8kr6osX6`ls1Tb{dhsk>wRn8fepqB$NzJ}d4U
z{yRkax582LM!tqXg60vaLw?%R4kV?gbc<iRjIEp-ep?+56RBk99|EsG{oum|<0PB@
z^58pFWnJ36I@^=i)!f7;|7_$VSvR~V^+|d<q^6-y`Tn`#rj~qO*3ut}!^*!gqfZ$5
zoN3&v7(kn8SK0s%H6b_8$22#wK^2|D{kBT{+iyG#6}R=x6Q~Fa$m5e9Pc%v-6~`wo
z^xu9p7uvcKaJ!Q{@8HvClvN%zxKg8!U-61A+{(N<roVpmsllms1(6H~>wWOl3KD;y
z_vPxeW7A!z)QkPa=C-EQuEO)JMsK4TDIXN-16p`}4lq6C1T=u_NUB_-<TrhA|IM1d
z!PeoJ<RXa5)#ZCTW9JBZa=#;A&}B&-(JIG#3l;X#$)bZ_rd~MmllIcXc6TnMME93S
zo*?7%=`j|JK=1#wD#Sh4->}KMjp%Ddym2krhV4&u4z1VAooU+*O0c}?tg`D%PUzac
zf1IJc^8Xx=OoDep@^=1nt;aTTx%_jFPPjKWi9{z1N6N@6)<yqF_451t`OZIND-V6(
zSEphD>rM8uo`2!iV;A9SQHQ$7pdRd;h<fgLSAL5Lp7nT(s&y?MU@ppMM~xrc)e2m>
zbRT;yKVc>Cvd5z9kM&e1Gn(!Nq8PSGeI9|5jaI8h91E`q-|N>W$jUcpom5VB@<&P#
zRd(i8TM^AGy7Z!jwl(X_H&-9`iY*~V{ccO&R#)V@^0N-*v>l&Auh7~Lr_Tj_Bpf?D
z>85e@(_s3QGf&quL?^AZMd=2jI?LrxS473|SHo8wq19qZ?yV~g7s{=8GS7>fGE`=c
zIeIqTC7)lPwo)71ROrCKc{Ov0l>j{aS%>{+jfu@iuC(ej&5DS@6>xI(;fOy+kffB*
zZS=9qKJ=HyNr}m(DJkjF_`D;S^}*Vl+f_j~3f|w+TTxd|?P!EBGjZiT;SK(ump-m*
zEVceVGk!v<1@p|H;dlh<2jb0}nYosZ+Qx1V&}c-4XNcH!NNVOcrFj_AHb}K}XOUei
zk?GJDO-*O&+9W@?8*YV~tjkdHH-1v;*@Snsua=f7UGdoqEusw(_K(^5^XI#-E!u=w
zIScH`%7Tl^A#VJ!f(+L!G5^T)14ex0e(;aW1+if<fw{PftmjKSMQR1fl+&3auhogG
z{9ow{QpBYuz4iSV9T(s(S$zI>%f;i6)*t%3P<LhiMc?yo*EMa><m+P#B=L1%BexJB
z*U?p!_S~P7b))5(<tN6lqyF{y>xSEp*ELgFFo4h*-~n5xS98ix9%DE94}NOC((acv
zrte1)UH_){YrYoeb_+L}?=-NA#Wf?Iq|_K(Ryyl=d*h&UA*yYaV6mt9l);Yyd5`ZL
zM@TFdZvXK;ZJhCan_s~B2zwi4qkCH;76~6Mjl8%iv0;1i$&Ysq8hTD!iUupLRVYCd
zm~_MFQ9}&y1kx271h4T2-63!}4Xt-6niDNHt60z~NxVTI%VRf`rH?p;dn3*bdkrAW
zzP-=5|8B3XQQO<_$9C(=TwXKhP{_@+*JAo_=NUDt>xLi8*ET#Rb2=veUGnH6!Lw)J
z@=|^CH%|@lG0dvv52ts2nheB*I)^kBB3gb<PP9Gd6&19}<d7+8WB)PZI+?pA5MdRE
zbAoQe50Z9)hTH1dguB~CPYS~Fzp&2C82l@Fx&>so%V^xe9BhA-Z_S#Q3C1||(^j7j
zHgpfgzrVFf-qVrcsK!QJ9d<*eu$SVf2mYclTrJnb-+Fzyj%5Q7tDbtZ#Yx>6jan<N
ztlZOYV>r_3IT>e0xZp(#-|{yx_Qb#BF&gBOiEpSg2Cyq3deG^%^Ns9H;OE^>^WaXt
zUV!BpiLrcZOyvVlpXV##S0Xz9O{AO&Q}hM7E-x0P<x<{P+SIh1LYJbS6b3$hF$}88
z_}Mh);!pUd<bPSDdiWfA^GcM!LN2!<T+Do?zogv>b#9xsdX74I@q1c$iEo3s%<kH?
zg?Hai+n#bKuaXv#UqUCG-G8)h+MCvM=+KkDw`wGtY+L+3*yy%T!I$Tti#h*1$_!Q<
zkc{{yrG9fNOq~Q7Ov8@`+rL_|1hSJzI>t2?{GqMu_<$+c>;oj*tQ;-8DKb(2DGhPl
z+D2EX=us08`Od2)J2!bG?XT{31o+QvC?tN3P%cRt_S1n&76E3w<@Q3@NaCQNPPfcL
za{H49)OV$QV%%aLP&EHz?%9+-#?EB1aCLt>7tY^OM*t)76@%1p3viVv=INKOzlrj@
z?$U+Ikkin$m^;yAz~vhMzN;q5P+r(Syr<WDf46xuXQ?DL?5N)Q%>H%@bw9AfeH{Cv
zEUhKyCu=1}46S`dBr!!LtBjb>nR?=fn5A&nr@qB^XPC|uc?Y`~T&WI!(so(0{o({z
z$)a^?3vT3bb31?MQGD+}?WC!ti?gZ}V5r*DUMo24BK%Hg!_=>ewg0A1!galDdqTU;
zAba2*F1^WLpog^Ob0wcV1AA<WDot$?4agS+IzX}8nl_v{>Nk1)>i9~|k4~G^&mS7=
zJ0=gbofKW1PqjGhJ-h-5xdoSr-8fr_(|LferQAB}Fw5oLm%ClpXQ(nmlZ=`so=o$N
zW3f+ejmo%+Ta(WmD|$WIIv35jTJ0Tt-RvND*znUE##D!QqlU)Hod1cHx1F|yep5<L
zOZ3_m&qe10qU~^8u5{FrlYL}=t8ip9JS&vB7uMmyZ1KrF(191~iIlIBKHoj#<lryt
zX(t^~sM*SMAzy1Z<=ES3jR4TdQh&P;(fIWWF{n39Hra99&T3~74oMpsp2cz=HnY^M
zdF2_w+Ah*y9Uv=Py3Dir^Q3L65&4r7qQ$FJoW!i&?MX7Cb4KVY&IKdC@)Xi^QYC^d
zGhrk+t#e>52TTx_s7)8D6VB%(&9_RV7;s@B6{fBFY^~+y??L66oPipooRzs(yYOqt
z5lPY-9P}*+kOjiIX{;h`(OgR@5@gImXcbi)8$_{maU#M*HCwK+%d>zpq9>sMez_G7
z&tii{WMv6JGY_1$!<gftE(fRM{FLlCYytiMZ*Jg~RWu?pz@$!`toCUQ33%#QZ@qlh
zjklbJqfVE*af*{2p%{p&y8o}Kv=BExF%T$4#nX4w&U|P5UUDQ&Yt{Nm=lSoxX<AW>
zvXw-ZrzM!6jrCuq)+(<vMJ?v}nL~z!`E<m40Mxu)o5_q?AqdA3jVNW2m8Z))5Gc3|
z>vs2Y1wE0q;smg>;jD;{=rjO+DTAJ~!jmB*Ln54+CIlbz^Rj48P;}dTn5q7%*~Djr
zy@cy(I4)Cs#aG<SHRJRZ`dwb5AkR4xDu~W+GUO!NAS$wYf)mYvmJ?b{Icv&_S*@Tn
zrOsANm0;cp8|2plF8wCtG8v-xc}CG)ksV^uVurug9)B;}W0<XpUAR@Kt`AdKX2{$O
zc`1lIC4nPv$d>ke(sTCKniL1D-=@|Z<0RKE84X{lp`alvTEn$Di&0zE9?zm@WI07D
z(D1ehf-X;cU_&y<Aj0X^pHYDxPBRvD4VwZx)UTu#V>(_2W<U1N<<JaD#Ky<sRd6Mb
z1Q%rT4FBC2S+8XRosp4h0WH3ngId=tGh^+;=@cbDdQmo>EL4t*WVG9?x6gM@AV<UV
zGRMzm=9@>%vMTKFDz6nK3Unj4PuV-Im*YI^<(15_=flh<+QG}Kem(;&%Xz@B4NG$y
zS=#f6=O<P$h;w&wk&J{%0--$(!?{4+|1K6JwzaF~7x^b+$GeAv3lt}Z%x~U0Yfb`r
zhLB()8Oq_OA*^qr(up_2JYPzE3eX0k5iH>8$SDqGz_jL{W3**`ny2y`H^feHU=<Sq
zLVTk&<z_gj$9~rF_O*LsmW)RGTQ+=*4r<hHU4J>_ls`v@3(2l_k#<>cSwHEKvMlo}
zMI8Z8eBD-OiY(#-mm5QTMQPQ=&9>qh7B7M`gI!!a{Y-rB>YPB#ZzFk;3+2puu?FWd
zcg2S_&!e6w@XlFAB?kb1rMNkpvddz2dz4@mW|&>LILdI;Pyk83@sU>Tqj~=gzKNXi
zih`}v7TC(lhkZR2(5ye#aN~Vic_w*|$gNPDcg}D%!*E2(GcE0kKX#@i)-q)qIg82J
z>MrW$5>8%iH;8PY+(3LxUl8EDhrVhbi1@X!rTDg+Kx>pAtGE)PEt1AV)wflEpV*uU
zon&#_$~|^{N!FMEw1>@tmFzA2&WzuBPu^6zzU+-7nE}&oSeVhIl@kTfdpSM6xa>I)
z^PD65$%udF2p#f>jJ>Pni53(I*iL?aZ1n&us{lyy*sBqg3~!bIKN}N@EWVCtwR%EZ
z4jmd{!$WY(^nTAdy@R5`vtjx}&lP6Oi|l43Zu7GtU)01$WiaXKPamzTX*I~0dpLS+
zYj+@ZPq)giY%%f*UitLc3Vr?{zXS_uv)1IPwE`w2a2@w|a=V~I6*1K4iA3sIj=M`O
z*%cHhTjek?Q%#7KnR8(jzLV9!d^;q?kib9|8701v2CEiKE&?-dGi*pkT1Bx-=2lXd
z!r7kpHW6~J4RtW6cswdO(D^HNxy*-uYZOJ%caf51IxK_ab${h!@oO;K?d#Ch=Myz1
zb_m+$q#*Jz8B4exQ#4uY++E1w{z>IODCcr&9m*$-7A{z5mhXz(x|LsKqNrS3pisDG
z*JeBcAHC6;6eDu0dLX;qM93OSxkz+me1FsP^!M)e;X$j6Z74vXTMQ)ov{>0ujkz-G
zf4hO>9l0#~Nb!7aNs99qGTd3qW3XfEN$&@H)Nvc)wSvc6;%Z$Pds7}FzZ!fClGj@@
zw9k5sy}Yh|l39(esquFf)OuO7#_tS^F+v>t_b$bTwYcjN5Yyp%h5zB>t+p=;nqxO+
zxKzR5N}cg!P50D>W$ooKO5fkX3pSYbX76Xsjqlxd26;Dw>kHha%AMOcrw<O~JX^4B
zy}C%4*tPb|#e0SL%u$jI20!&0^Kr!gKpj#%ZqfNCwcAzW8{>B0?o=72WIsMval)%h
z2vV1=Ba+%waDf9>pZ8dh&9v>jQ33ut-nH-araCa<|CUFx2g2bx(!OG3-9IChpYdrx
z_Ig`mg;a}so_+G*@_!8U4i0)wtA!-YhWgfC6r@ic&P^rcE)d+CDUip%3xDgBl!T}R
zNae#*U+6@|ngq04qA$9piRzUfvx=lo56);CTCLsGI~o5y<r2}~0$s$8j+l~lE#Zvb
zUwG~-i^A{aFU#3zy=DMT-;j2-u&ros=fBnRwkAD2X?OLiT1pjMh{%IHLy4&6=lt5A
z)#R6n1S`pelI|bRsI0w5D>ZD6S4q&?rxa_nO7!U{_u>hu=hd}i-ECW28xr#O=!z-T
zc9y*OC#9FeV-hw7MW?G2L}HXlicXI4V&=c|-|wJG3X)sck)N7E|E#eJKDUd9R+k(w
zhC<NU7D8*LGa+%@DEBd2kGNc2kxA<Fj%el3uK18GF_6((R$S^?$4mBa*y@*J(KoZZ
z=^&>Ej#FAe4)K8zBoJv=<)-v-cZvV}70Pv5XuOGyu{zf|@awUZk8wz)T%FeI!HfjE
z{SBOsZgtSFKDCVFZF`HzCuE=2H*BKA<*fz9MWpMsKxoek{+6eOXS|rrio}KYC(>dX
z`7Bobx2{Y%|J~DlcZy`QS}d3Tq#}ZzcH+U8#e!BzU&>$P;R)Ww-|p5@H0y8XRkSk+
zyJ6dNB_UtN9qwwIgLtxXw)ui^>bLw#nKdidFekD%_-tMmlJ($vXy4}RJm;q`_u$$r
zl1uGeMxiQMj)^k&qZ_514Jc1;agTcusKA?eAEFh!Ct$vypGxxJ+`meudK`|mICPiH
zdFurtDwWfJ7UsGZ^QPVL0>6-N1}=(e#h1spqmuHR$xOuHSzwnc>*7tz_>p}z{5GmG
zi!+JEqONgu^Ez(+P@SoC%O!)-CS5XLQA8C+DJp{2oMDfvXL<mH55G|Ud?)esCOJ=I
zCM=M>21X`rQyn~8o9AejG;6s+lU8tL*SC42A<E7$P=UO~@MdBc_s-x#C|sXjQe9h>
zlf<Ieo2eVmmlkHOD)TUPqFTX5`}=d#sD$mQZ|NR6?~@l4i;H%@yMMel6ViVrsvtdY
zX$#Mb$>2|ucNSJpj<T0J7+t*wrE6^A(X2^wnl41w10}O#6Z&+nJ$PdvT07zb=jYEa
ztkX5m`4XIJYXR>q6k3J&m(ZXTBEOcF2BVAW<AL(2O8HUK@HJHFVeibumkV>K*Ab=G
zw(*umGo1N$D|HAZj?UQTMucC#^c8`C^bx4nRXT^1-_xI#XB4nurFYd9y}Ke!djVFY
z^@7w#!;R7X8yUeje{0M|X_QAB4K?_WWStsO)!YBCu9oa-^vP+<OsR(ERrf8drH4mX
zArE`HgCpj`!D3DJOCq0|gpZ<uH3p9O+&(er{Px>-Tyhmk$izHEtq$ePPV?XGb~}1N
z0+00un0Ph+x(K~NgI%cNw6r#7U7LI0*c$rW?Z=Cxp4NuA)u574RMp1Jgk^8CsgoIt
z^G#_mQ~Tmw5MTju)>c5||Kfq4_7q%i-6>vv;qS=sYQs2rlQZ3Q{Z_z%N&(l4$`3{k
zPMyCJ9+K}uj7XT*8S3g1r^=^=Kkg~|@_@Lgx?j4UI$zyBK-WGK<nrU?;L>DKZv6Lu
z;=>H*L@%$T+ydR~ZGJVmE%2H@+@9VF9^7Vm1-WlbXiV*%q$S^YZ-O34%7|EUy7Yxl
zJg^){diI>ITgOxX8+)WBq^$1x6C@per6<{<PP2YZ^I2&Odr^!x1Q@bk`3GK)iFR@n
zsP2dNTUf7r-`bSaMOX!28|Vvg8BMu$pze$N_IcMbukDvOB5%dbj$IDnUWZR6wh^RE
zY->JqD4wI~@N=!e=|~#O*|eR^9o-+p=WDy=H&oM~`k$HF`F@2{FEyB!(7PhHcUZ-P
zgfcI^cRSqi(X<wA8YX$Ghi--V7ao`dvQjGFZ{(X08#~st=K`uFrPH6aURwsg2$i!q
zF(SBd>fqE)#l`Vs=yV~AsQpEIhudFe?5nMOt#s$Y8#KgQd#qpMDvI98E7_qvJBxl5
z)ut~}#-lYgW9N$!e0QUd;3}mHGW&?HsMe*;N_Z%4d2p){h9HooY*aFhH+N@jeKYx5
zwuG;GOU_r_ur7+^u_vjIuN(9PHc8{|4K1FLUJ2JuBjDSfXZ~3w;Xixc9x%PvG^WV?
z5%wKxespS$(y0EW`rT_6Z<7#>@vp|kqa`-ntuD}H%t7q4KH~rh)o871tO>QPCZ!OU
z!<H*eLRHssAWYMUNn_gcrK_J8@_5@*a*#5kCn^49c&g!>JkzROodwfrV6(7kSoDlK
z)<U!*Tyaae%&ODcw6_oZ&vsI}DRnAQE>~TjWB0(P6|z5~vk>`I{kdN_OWc}Qv50Y3
zyT4o=xBXnbT<Y1?$p4R`^YCZ0?f?FLx7BJ<Vil!U>`m>}BKF=%?7c#9p=$IhHDb>Y
zv-V1iAf(n+I})^{R*cq&wn6WTaiRKqJb%M+zK+*<e7~R1`^}ciBXf~)!8sJ`UxvO&
z#`LF(^aRDWy$0GlO|7?1(Cvsg=ewH4g*u=O`V^5k7wt$nig&qrc(bGM{dmm#S$n{f
zgf~;pP~TTGH*|4FryR<z+&~(>RTPM-#6A$#(5P0j{`6_tMz5yXThj3l(Fm6I_{c8`
z6Md98(`myi;V3!lL+l3L-Rm_9_2h3~yUA@o_K5j7f>pm03Pyc!DEl?pU%c!b8#t)M
z0n!P2s%w0q6Z!qHBJiYH7{dfnEhc(Q3Tte%Z$xRNDmiHT(>-AoVzhL%S`Ai}wo$*O
zj5b=3^HFFJ4pj+hE<<5~`H$44kXVCk-V?QQ)T7hWl5CfP=cbSY!Yyrc#|i!RV-G3}
zYLgV44F!|CDXk*1(f4=;Mc;J;W6!{!{U&4@U?DbMipQA>^E`=O|ITUYW!VJVVznu;
zr@P5q7YV%fun?N8eJ)j3HLK-&M9kUhZnp3TMV1qWrjiuzzo1C&Cm1&4v9F+N8`?-U
zPJ4|bg-0<q*#q&BaK*6mawXC7mDdBwPu;YB13B40-e#wS08(vjAza(!Xg!!2cCWA2
zt3MCQ!LHm@-GO7ek%y2vP*NMRR(Jz{*u6k1O5qc`_36WIt~S-s>6C^mxpvWBp2vAF
zlbP2`@6t}+T(lYaJB##KEwqYhCQd)inda4Fj=p-WX;d9ygJESkDEY>$IJpPSs0d|p
zZWOJ%EXyaWSH(6)k8$VD<<{_<Plx`awaluDS+_{Id=XS#Db4KgkD<`yyL>pyLHXI|
z-;Fn+IKff_MfrbpRd}A|H>?fhJdSyCUs<#(TzOJ*U)+75;+PiLmArJIK@b^;^~kn?
z;bG4CN*{@y#NH*!ZtQUAI}Bfxyycn|GN<9SX!=LQq>0gNeA=9FH##E?_+dw|JeW@!
zMR7TQ@bKjfA?1hTP56hLTl%kYhOw!-SfSQ4%JEJ!wAs5n2FZrOtnh`Q<e!Hty$Acv
zL>s1YF?$9iM#jEs_I5<%Uvz0>Mhqh(jt-X8PYD)53k!&9*zCG_#&th`rBy*)?u+4t
zvyGnEkgVnPs@V8FcwFL|E^^~tm^5cM`nB`PTjo#7Swehhon#K>FVnE6gF1qMz>|vs
z-&@U*B7-J*R4=|DF=weKAD>VX4#$n4mn6W2X#eO)2ZeZ$W8O`(Syco|VXTA6RbE3u
zG7PKxE=L=jlQ&G-^mjcf`Xyc+e4`l{vTZK0CEF_<p7_u7sHmWUT@K5;6R33K-_0|$
z`nTGi8U<qQ8smY5-DfrmgtnJEv<%Wl(_H7pjz(1k=YtV+HS#ssiUi1RXvk@}Gl<Vp
zD}DsGbKY}Te8pB0cjd+4N_d|m#@|&0z)L~4{416Qmn&k`{aL5;uER%`41TAoh~Pn6
zvYDd*NoDITu-kcP&ig-XtC!PP10jc}Pe2W*%jF8Pt8qI>QvBDX*Kvwn@hc+8t_Wk#
z$q}8htE?=A=jc%SzBzU;_vcM^S}>4@^JP_6xNTP-)u9xD?k&E_<xg<`T%&wF;!*Nj
z8|shqY<FBI8qCeWA!*KKz`&?B`yS=yYIz%YNZs-XWDGo+c!sZ)fRV<wy#oxA4eDup
z_Ue~Tc8Qyt(Pn{C)*S0!dp45a5~bO;Uu5mQt?r;$40|Mom-prD#SE00z4k|46qEST
zPTe$yc41wY#OQDEV@S;DW}kX|c&?p$4TZ!`0|?)y54DU;jktR)m5-=h%*juw*~Llc
zgZca9B=obMo%`>z45i3q$B95N(psO!QQ?qaZrJ+8+^a(-LT66{-NC>`AJ<K{NTw^m
z4~!R<^}rKz|FWIJpUpHEb}C4iEj-2zFQQ((x#8i0z-kr2XNmpG6|W?3mq!JNL!z(z
z1id$kI5X|4!PyU-R2&uJ*vmNs&-|1D5f+0bIX*NP56LUoxOGw)!&w~}11KkUi+!`w
z8{FvjLKt7X*iiK+YHik*4v!$^&t~TF=hU=D7YUBvyKnW|h0+LIfQeveFuaUbGHVe8
zb>J36^HgIVXy+MWxe;e$T0|85qH}3zOl#!D2zWo_(;GzQ4#<L_@Za=KLvaZ<GBQUp
z&H3G6cp55Yep#qFz0IbT8nmQsZZR6Re@cdNt6AXlgY;aAPxgLrUG4$im0|i$uD$_&
zK3wH761iDA*1$Pvj;D+e^O6@1;kO1o+*BN!_Ls~zzq**Bq->3*lRzg_^8Y@}v@3iu
zhIbM!9!kG4BZw+rQXE^;?Q21gP1d_ux5@^R>2=%Zj)S}mbo#kyZG<~J$Kn6{)(Kko
zYB+`hrH(D$+@W!>%aGbmG*>eYs_>!pT)(##Fh5$f%y^K+JbpK_P&@ZLlE$1TC}S;Z
z!m}^Fv5*_Qlo%Pbnhe|g5c-AnSvu}M6K=gV83Z5CXyIC8*=>{M*_H^yljaExDA?pp
z$<2YbuX+0<EkRpUJ17Sp0B7}q@4}DSAeTS1D=}yMvNJe(G;B&RF4dE#jod#}IV$XI
z^85~a-UQ_{!p{-U{`49Bn`5fYZuHCK(`H$y)RE)D(39>vbW4v;h4YGcbzM-#!ap(@
zJ;JCIwye$R9|7}rY_yzlRV5+RcyqA9BbhWrm&+wdHB|8!nUcZC-U2K4aeHQW_cP$3
zY9Sj%Z8pDA2AxSE>nTsQ#HTqg?TCpgWQE@K>*gLXqKft_*0*pnAS$?HLCtkgAt`^c
z$Rz+>-O)^orso}_8FZ9dX;Sfc&!k{pfw4=EjpGwcyULUMhA*&A+#8aq5O46@MzCq(
z?#SF0&sGAlqx`k#cXk6fTldh+`|FbN#u!EP=V(*gqRVHlR)+9xo?3X8v+4NK-Y}4w
zbSrZs^_{m<p`D_%gFaefXdWxhGxmr;Cq4(&4rWfAAr3qHsdzuwlE|_@?bPv!ANEyf
zrufV(Ab`#Zr_`r*){t^heV(MA`hZiaS7v9CGSEaQR|K?sL#C}`y=Iua^Hz4xp-b2b
z>=K8Oyrl`ry7_S$GZd2+uZZF8zX^KZ5JKP8!_@FQ<@)QG{=xyjhKLPAqMxe5U*Wu+
z4@e~E8klz8mFoLYHq5(V9=wd~bVhz7LZl1S43!ma^d%$dR2CiwWjJ;k3X6)0VRzaU
z+Vqjo&@c5M*1dEIl1|jIVc+*$2+3K~>Sn4HIrPt-I1Y(gD`J79;5G{FU|{ToGsW((
zxe+1<5sqLI?#vf?PZMZ)mVUr)DA=d?Ae`WcsC6!Uj4(G^<0&tZg`-2i2HKzlNK-(O
zQ7y#Jml59Lo1yx4G7c~iZqA60Dij8~TR);GC9$a>P{r)n?ykpPaanP8%e4KdVdLe_
zVbS}YDc0hRb-s{cTO9G9>EcYg9$bLoGL~0eKY}#Lsg!a4-bbdvi^_|M3(AE17s)#8
z5uKDcd%><V0Tax352cu}9O?nykM1e--t7UohN=eIfb8{Zmu1Ec!|z8rF$H;a<vz~#
zTmKMb6&J2$T8dZETS<ckEj%q*9f5IvT9V56WcZ*SR-xCK8;HU_>8~S5O>w($Q;jyk
znOx)QaC^niYCB8=UBD!72S$SQ3V^}m2cQR7Y^AV1t1%{&^>h-s<I)>N)Om$e>EcWX
zl8i2&t-B{Z?AAYK^W1AhWf1SBE^5f73KS$W*e)y9PxvI8f<xLOaY+X|@q@cG9Y1Ee
ze1{<Y&Cz3Sk&81=ZyPk$VIwo$k4__nWweBEgFFz}iVLnBc*MqfEfw}Sr%92kkD*tE
z6FVEaQjS>%XKxrPpU&9x$W*)y^U>QCQ&BO0o*#wCgXR-ccSC`H3*qhM3J6Iztja*3
z0l)iPS!)LtHNH-igo<eQ8t6}U%Z8Uc;zoa-@}mIE1_0@JeV~)i-Cb*Y8*NFZ-qAB^
zi8aTnlI$MiQa{B|iYo<-bfI{@Y4?n=VK52r7dbnKDkZVI7O1=x#bvO|bu=uAM0>8v
zJ5%q_oXft+e~{eqD$EH@Zx^^Ixx1EA3)NaUH~oSLwI?tV7%6sANPA$CcuQ8J&p5g~
z^DTFbS4yVPxX_u6#8P8<FbIzUx6qMNHyY$y!lzt)N;FHD#$Q+jsXc8mU5`=a@I)Zy
z$i;T+#A3P~1hFFmX9$_Y5m4!5eFD<`iPvu;hrxKDr6*}c`CYfWlHxI;gtZP)J7@a=
zq`E{Tzm#wM3StPKEzQ3V6=)%7CywCcZ~PnmsyJ1@{x#@Lh^sWqcP{2-<#%5hu!s!5
zu8XU2eoo-6#X?mHE4AZA$h?Fj{b`AfG#EM~%0=YpWhJ-!%5-9jUDtw@oGF~yU2k(b
zF**g^_f;7p><ciP6tBjHnP<OlXDR=hGPJ~SMD(<EM9QB7z#>s-2%oleZbA3A@44xz
z{11f)i5`FUR-kqZCQNAs=gXkF6|+M8LDJ}cX;h&>#(gtu$Hgu3cF3NXZNhDu`_uT0
zyJ!5!7eUd7>=z<|sI=W$?|X<yY0`Qf;!jQZ`e~mMuCk!Fgr1~lP~GrnHk=*6S;y`W
zF6>%8z_`kO(Gs|2-q2B?RuokfzBCf+&rOnylAM3cu?@Zu+-TZAetEGbEk$<H8Z`GF
z@l-mLa|!z7m{_`8>D^*>Qra<l>^omCZ4p_1v5!k{sMe-%l`iAi*hD@zj}$!JX?aC*
zO7dQ2quqh9Rd%i?U#-rnE{Mt-nCl-KL2{U$cn^uxHhrJGidbs@>hmV%zxF#NYWcs%
zIY*THmL{f}gAL!el5Mqd88g#^=8;s1jcHh8;!zOKhoG+KujcbYxg^k%w%rsjp7Wbn
zkdjBfuTZPu)85OymTxt)gAv-#^VCimyp@wnWcuHq&c6fcXF;oAuPqSJEig-uR`F}h
z_=q!9YCE@cPSbWi1@P;U(Ua$AMvscw!m=zoD`qSkCeShsm3`+Sqia=)9g3X4N(DK3
zvg`E;-(dhIn6NE;Zk_y~S8~?eBuNsEwYQ_8lTFW@a3Xs<)+_!VGXAB^{;`!S(t$>b
z<^96t)fH^ZyYjzhnlssLv>P0Ju3x*9X82R#xx3UBw*YuWkyIq)?+o3Vgw*$+yLdxI
zyVio|6+OzW<^~c!glBYH*G@)T_CD!i-%U^YxKMNA>9U2o38du2otdu_{cSOS-d!!~
zx@~9P6eor^>{S+;b~Ti?<m{cm&X{Pk8#<}jci+MO^?R|NG>6%W!e#4Ik~?=xb@dJ1
zY8l|Cj%}<izx9=Q$u)zM?1T5@%0FX4>(4qgLFW2*Vq{}OJC!6eFfTWnB0g}{?9g;x
zNBkN3=Xa{8a>nW7NiK<wen*YfER_4KgXs*<XJ}$<zJkWimg$#4kEfrlp26LwYln+b
z3L>WJd!*0)V9P(R12^5L?G3K!7b1VU+Lj8Ej3W+ZvCmn4E{yq_io!nE?mWL(a<v=`
zJ6g*OboKJ%M-38$IuxB>7T~6P`}OK>h|zyx`#;=FmgIiASCb8PhGwX!guLzkiM*mI
z_|lq!OR(bq`G$Q<!1!RrHU^p&LPQlfw8&mV1&CZ$$6i2K{*a}=Txc)wa0RbmSn>9G
z4Zw(%?}fRKGE*d!GpkiSxbVg3<<?L=y@7F*;C|3N^0l11z-+|-x_y+>D?ySkeD{i%
z*GZ8O;&s$KsySm0lQDyS(zMUvd?;=H_*?Y9E4g3+cVDmOD)U#(iF)H(dXmS`u)?}2
zz6AgF9Fd;E>xZK9MrbEQ_Vt@;Kd(-z+D=#m;Yw8P)9ZJ*g8Me~+t2AoDenx=SH3rQ
zM-k5cm~?I1Xa>Bj(9c>Do3#Of30Ah9Ev-a*q$yVwZYdMC?U)b}OH`;9apZHXL7}sS
zK;gEsdg|Yxxwx@|mb~1XvOR-wEuh+R?Sl*AV!XlTJaoeGJ&`!7z4BmVgBqh~&*rR3
z-nhN1K*wSX-!XC}5M0_s&Sug+em&^nc$o*Pd2Ut!yrgM&>AHUUJZ*U2EWXO*CwSDV
zTCKt(jc4zlLB!Ljhb|m}KBZIhO4JLDX0FNiRmSKc98%X^jvP-ZE4j4c*=9e++dPk7
zALF(GC=&RWsOm|hIAM(Jh?u7GYOKQ5=KF|fy@6G|693p)S02dD%_*|4r_Qb3^Sq#?
z>kdiITHxL)_u5f}JJnaBNG1>Y-^DVWXsk7A<MahuK}|HrkcWaD->q$|3R#6VW~O*8
z^vyb!@5E5)KOgKk?w+Q_A-GITB{Qy>UD%iLjz1DjB8;O7EakJY7fMoWn;%chbotJK
zrxn^(8&?JOlNRRoGUs9xa|?FMwcH~xeDbM8pa(N#PkbJQk+`@%iW{pI(FG@V_sJL6
zMtt(xk$NcVkF*CB7t~Q3L&g-$icsJkIW)<}U+YHd*H{P-D$kS27Q*v9>5g{V%sHE`
zGEV-tXhb!S8h|Ce`~jU`_Pxo*D!s_znzsU<eK=q$Na77TQ1xS;jW5}XlXuDr&MOxL
z4TqfUP`bJ@T5<;-0r0@OP@4eBZEnG2Jz{VNAP(V}iIoPQm#|-^vC*Vx^IPp~`^{eU
z<Z!8-9VV|9T`~T$zTENO@tkOrh<{1WhYE(Q@m3;VTGvb^o9w{p|H5W;ZaQ&g^1qtl
z?3~dlR1<;$Z-D*GA}tU;u2y#X^ZpTtVi2Ey>>e?bB?|~o%Bb08lAGN@7-Q^%goMgQ
z&sL);V!KigH9Dx?*rt(oh4ie{HJb;M7sbCfB#ya@9O$QjHa+A%7j~!Pc*Ms)<pg3V
z+c$~38zF&Ev3(ta17TKPd1<r3vH`HOUdXE--+fnDp~!#??jx4phU_FX2b+tzciyy0
ztXm&PY7s7q$FcZhFYgzf<7!2pdaFzOgodhB$CG&T9I(nQDjdW@kHP7MuID`p^OB?Q
zMGO%V?b$M%)gf(F1Fucehs=O2n3d}nXkULe#q3CikSU#RmaoIRWz~uOh2)Ep{XPl2
zku}_Tc^2YE&Jp6(x8ojNgkg!Vh%aXn?4`y>T<n+LorhjXer7kK<qk06c|EEWkKRY)
z)7#iHl_h>ggszOE6Z~6H7U%L8wB2!b>LimpMXumMQfaCS|FiT3bqSLbW-y(@I9m4p
zsbE|%+zu_fPFlXh-!q|*OOo2Vlg%kzYwHGFqpvdk2v;H@;S41wO4&}ky-Q4yr{($X
zjf3$&HKVDniPy}PCvqtAl+J?4yPoQIxsP!!7Mv8J!UD9INQav9I(>fdw=>Q^v55GY
zU$uu?-d8v^Enn{tLz`0YlkmIcj{pIT7H<>M@bxcinI^|_F?_~vqiV-KhRU#g<Ea|#
z<{@#svb}kgQa)c7&zO;^$e0;Nb2CF@k?bHN2onc7+|}E8sF;9V&NeSy%8gFUcMIw9
z{|1HeSLc0pQXx*F%D2d^3=c@fDSj<p75TCW;aK)f*P&u|1LYjEBkoi)S>FxR4ObL@
z9Ol<@=@k3$;K{~XE5FuCO>Qh_*VF5VE{P-HV#lGlh)~<k#ZV>9fNeKQu*y2u;Bi_R
zL&EBXu&E3m?`p<E&W#v&+n&k6A6MSxdqFWfd@!LIm`J;=igO{pP(nDm-o$Yj38Jz3
zXQX&N8&3MZNp;F6XZ^kVjCO%tMN>RAAe)R_n0H9&(i0`S`4x}IxWe&m#UroxMK4>x
zFtX#~vS>e1#h@oW(Q3Ahw$E$jBhyx(z&wmdRob0j3NMXnLcflD7)C^<JQxADXp{}b
zProGc6Q{UzD-<~*HAe=A3@yAM5qowoI0r@o`4)~YsueCTLT7d>74P7kmrTrTt`~wH
zV}N@bm>tc?hwGtw#!q0-_~S2wg}ch?1!B8X4_49@axM!0x>8_tx6SVAo9vQs2#!<b
z2UF0b<Yp5_v&XR!m1#W$y4qbv6G7gqCBrU2M}3_qH+3jKv7(=09{B(LR6kyU)MkpM
z_+=XtHmR3$%=n8!?Hg{$k5{S9OOp<!X2!WSB+_`SOlu{<dQ}UF^Ad`~EJHBgM&{@;
z#)Y#ZrVohaSBR%&_>!;!k@}9B<!tY(f?g6|enU&R+q8@8O>wlnAw)@G$+28PmfcU#
zA-*?5H_ne=tG^d0RX)a;SQU(i2S~77^e`wVM7s#t9Y5o`j$(PO5b)DKZ(Jc;l|k0_
z^4a-1u_skO3l^*ELQ<ACcz#tv%bX_{d0TA`1=tmaq6>KPOE?0~|02?C*x7|);th=E
z-+#PIn`PGR!swWNYII-ur!@R+!Z->CU&@(wa+4BcAni@QbzOO1apunW);o}Pu>*gt
zcbNxFUZF$(Tn!9N3chFiE=g0AiMPu`cW`UdZ7AVA4r<Eie{;tRP1wPuFtuMr3?Ipd
z7Ax1OhAMXu0o@^G8iP%2sP=a^>^ub|+2%Zg>-0C`4A#Py8_8to%bgFa-OyJqifXPj
z7GJIrKV4<e#%E?-)bZkF7p|fv-J!RSZBjfNog+g?swFK07Q<?ywjE$%3P(n(lvX|V
zLxTfir32dB*eGuUxF)2bS}QF0M?$^ys}?EVoIKTHd?r7FIIEl5(QPkD!C!TC^x4iv
zkLlKV98RcL-wx2PQfghA3lUx(@Q@oNDpGdb%tMm%_v&D$T}7EdwnDiuKAVX7h<z$s
z!%`s8G4rhZo?zPQtxJ_@HxE5K<~>X%*P_Y-Ch8_p5X&hx;J3>o+)*|gF%7)N-Xm7N
znrk(!5;^jpsR&KnsO+_8UqZ{c@@RAziHS5nFvy0C#K<FCXpqd!XQ9%d_5|_YZ<^=g
zl*tR+cf5V}_RP}u59pG=P&x(GijxQ-)Tm~%2{s>K9+hIczLyt2QLim|JE27Ht(P27
z$lN0~*}Xu@RCWtRGb|VKHn_AmWxuZ1RZCvw=rDmShw}T>R1VMHv`HHnY?g4}9e4C7
zc|?Zyd$Wb8U>^6`gPhB2>ROIM$55))!e@rSupfS4oc}&;%8<X*0VGtOa5P`ZDJgxn
zjCfKzuUg(eb}CH)r>6%!^2hk|h3wod{cPCbap~0-RED6K>pvpgVtPdHbw}1fbq(yi
z1J8gs5kQ47T|1(Pqo0eNY+n}h0>d~pq@){w#}O#Ma9{!L!taEE-&cZt{~1czM=iB3
zMDr}+LI7lfLa2k8RYQw+o90WAV@J&Pjkf8OXuhf)6G+oFRglpr>e`8x_Ngok%osXf
zcDPU}TXmr%o<$0~gv;9TO6XJk7TyDMC>-a|b`hs>g&rkLdL7NV<OHW()t_=8bMK@r
zW}|b;`uc^27)!Q^c#qO)Ur|aE=hp+EeolP8p>eW6@YTa7skUPCtPA<KOy)K+9j5v!
zXLmRMaTc2E@!yJ6E%m7&vu``{z8vE-9XCH$VvBtApJq#?h>48aoOR1<ODQof4yItl
z<^__mbNOaGfEgCW5q|pv4q8s`ix?KO?eG4aTV0hbHuRB2^a}NqYZr7)TDp2SNA19G
zKGwkDu#_&(<_26uH%B4wwElpM#mQb?hVDol<C9TF%LXWk^UrLD=v$@4h~@lqw1C?0
z7x3dT=65=D?+Q=n#*%N?NIqn2uTXXjxxcl|^g-D~O>R3YA1?XfKcP-6IxsZb`Oh%&
z`0vtR)p}3m=9w3sjzGK(@5%fgi>T@ah&?)(e%C*f>5#FxXC%`@_Iz*S2R;Ac<>cJW
zF~!D(8Qz(X4m^K>`t?(7_xH@!M=T;|8Y+8N1%<bv>mg|&l720<&VL*p)76UzO}sAR
zx#-b8fSqQ7ksi{Muhu)Yo-GD_@HaSrpp9u@y+{~S%z9)uAF>4Ed^v;ef$QoIr9RE_
z_XTtqtCPD>BcTUDQajb0oQl=;HaMQc5+6gef{#tnU&qyurPBT4Yxb>*e$cmFA|bM0
zCU1o-tFTVX))F#dQkabxlL7@tML_Ls)AVc$;+Sb`qpzP!FBDm}LM{!Z%!W_J;ZQ$)
z&kT|pzDF0h?oIwJYi4MIN2+sKM%ew;C)^VAn9%Lqsy#|1qd6|f2Px~tK0@-Of@2yt
zqt>?9w3YFjd40EUrbeP_rJSlaz$n>w)1bS?-4IV$UpWo~D~}0|Ep`${X4C;WCIahh
z=F~Hl<dEgx8!{BR7G3;Afo2ip_f$hBJdfo@ZFxyJ&TD~;vgvP~E}HO_5~N541g|Hh
z-Ze9JDT&9WdG<G#ylH9G1Zv~s1mybd5V^5P95V8jxt}tEWH8wz+yB<vMmQV0j113D
zKk^f<{T>yF=IfPdUs`&36snA_+MQx^B$Y&CzVZ>?N9-a98FQOwkv_)xw)r-zez}E>
zxdk}+J+n8$*|)c?YD)+c{fn6M<yYKso{B*Rtt#f;oPIt=_Y}X(KQ2<C==upGoJthg
z>s^C!F}93Pj!L;yj15gBnuV&PX~OiKLTwm`Hn(+*q`0gJGp2TTPl+AY3SHtb;&Dup
zg4BL$T;!Nr)6|c^GxoA6uG4@yGZ^ncItxX!KnxhJHp~8r!d7f3N%fcL-y5SFC{Yp}
zXbX-EZ7c>2ym%2(Q|beHMckNN*ny!F1XuKHts-J+b&h%J-QM}{Gll-6wc8tEC<qk9
zjGK8T4k)%6fRx7x0SE$ns3)A*->!lFv>FHlqz4(Mo^kq+$!A1VepKdbGdo@w9A#-P
zqj&(}n>_j-mVi;q@pIjjQ#q<N;bzLb=jS3}D&^c@P@wDdpz#owz(??kG4c9UmMJQ@
zu1J1A(2yx&yyGS0?!)@&@zruN*FlLuF4tEs<4(7A^g*9fY^iwdb49!mDUg$1;#Pue
zQ>`=!Bq(W=W#cCuFz>0*&b0z@{3r-h`K8E1Hh|4R<LdDW{h&VSf!&_|mVKy9e*42O
zZzKBxvu--lhR+RoN3;&qUPOIBKEmZJ#`V`_FVrDHGPT>F<QU*;N!EZ(d1)Ku047JJ
zu$rNXID<H(kT|vT@7jGk#Y2jzWm&DT(0X3ME4rC)a%RBP1&|`)N)~aLj}9z#lJK&*
zidtVxAm^*KZ-Ny^<0>fPfz<96PS5B`(~1-{hCV;b`-y7_Ynbe(Q9JdR*+nI4Z`ZAC
zryD{BZ*a2~;mhIVt6z`!%s8U`o7T(&gn5K)7lC@wJM4BcYtTTYW%+nQ6`7IhC+y4$
zo8Wh)MWcNbm@6Dknv)hbugWH5oyC2Ih<*wU;Q5wx$zMCJmLb=LIEC~&Jdc)0eQ<<M
zrqNu#mSdO5%#ypOcSy_k9C7?@$WMW<w~ifh<Uw~{R^osLp#T~U=0gyf(~0!tnW9z;
zvV9b2+`WF$KK&SIg7YtQprc+@_e{`5bG7>(dFP$pc+>7p+fzw%Dlt6f76G;&Wh7Vf
zh58rO!)Y;mdFJLpA0a+cj$i66x2Ot_m3y9ZKp^ZgJ(y$HSb<vMox-#I7wTdVg_=Y>
zjSWr%4Zhac!RpTjJ9b!8UAUUskG`xqz*8uEks%c(qD?VWVJo1)>jZ|#k_3HO8Iu8I
z))UMHo#u;q(`3hvG9V(8G$?2W2fzd_MjE(rtGPAkruJ9QU`64|J?J1+I)NT>FCz;1
za1|G0czJjC)&^@+jAEVLf^LHmhF+(EV_ba6#q7~+zi4Oe6Xg^=Ni+cmgY<w;|IDbB
zotZ>5`Lm&_4%6Tlm3+~$`upy7EgFB9AmJJ0i%CQYk3e-}<j=_by~>kij&~=24-`#N
zt(FE#pL8VRHL4>rt>;m8EwI~W=gUTRPfSRYnV0-G_P3@%KfcqA&m3H^+F}^vkqgML
zoAs&rwQq01`Rx|CC8pcg|9eZf^lt=(a&Nk^o2!YXw#)bU^z_SwnDv1F?C7LGGk(&A
zI1zqM!!<j&{$AacH(D5Z>Slc6&mH0PSeAKlsI+*wgnk=<b!qxeN^T+oNNLio@jVP$
z!`G!=eETa79vJD_(J5cUw2!%zmw7lFwTj)G2g&!7pax->64{_$$Bejc)<r<zBQK<2
zaCb=`f0J|pD6krJq*vrpcs6=k+#Ff3wn|ThuKIfhexBO^WE-Z9xPCqQI%g@u;C6sQ
zCV=(`0WLbV!p_RC_>~IxkJ_dI3(5i?ZcFsX75pzTgrut?$O^-ITykzddaEd&^lvSl
zY`~9w=bD~g-h+30oB);^))gF;VkxRdZBOv@YSrqs#!50+U=&U(h!D$X*#_zu%vHJD
zTdlu%F<NfZ@=m(#ilE1yZ07>=$$cXkQ56=!BG{-O{zWZ4b+7%zsH)XwbuB2bggl|%
zJA-@Y^ox$a|BlrKT3FTx`umCHZGF^r`Hf)`A6`q!9RI<_;^2HdGq33<looIKP*QUE
zE%rfHz~$H7ZO%0OW6vurXUOm6F{9nj!QgVatPJ>TH%I^F^j4otC>Cpbx*8qTG1s3G
zRO2A29h7fGMJ?0ZqnDUEtuM6Z))h1hx;S5o)?Xi@v==gUj*0$#id)@md|0R-rhU~)
z+>7PvzjfOQ5)RFQse2s^6Gn@NiD-8>O<<wz*8D8Q=W2y3&AK4)evA*op*L&NC!KV)
zoXr>998WgNQ&vWd(F`&qvWmCa-2<07yqd7*R2jP#4$hULPQ8V#EnLhAn#yFQ?Fndk
zD4dJrd81dh_%NUMvcbkV4L8Z()6;!IkPn>3C7$W&^At+MWgl@Vnp!Q$@mdX+o}yz#
zVlZ6?87)0h9AO($nT=auU9qFYem1t*{vFqSIhu<Nv-%5VVT#(PrKuc|cL($r^vP}X
z*J>psD(yHh5`TH&S4ZK?-z9ZDR90tzp$lE1X&b51r3>pBp+p?xnde`b_h?JQ!Lm-F
zyy}c*O)Od7HcZBAAxYmp*@r>Ae~SBfUxoYKuk!zkB+5)g>G(xV6dc;|4|YEt@-&G4
z;e9$V(zzZ82A}yEm8UA%4*I^>$5nQ-Y$PrBN%Mf%zY0$Hsa?Ol(sk!UC(d+Lc{a|Z
zSznVUk4NjSBAtVDv0rM);!Q^`ei-|+&+G<QGyPDSj(3?yFKe6OMM~m(PdDJy**`bF
ztDL=y$rK4j#Xr|do=fRh7_Q=C{<yM2l~XFC7A*!jpG_*&MNSD3n*Mv9i}%8CyKJ=!
z%#)Cpa(7KW)h&zX(U1m?eI7>}yt}#y;+r9|ZIsV!1zp!CQV>m*?LS_dQG<u8lU=$3
z_0K`k`lycCE$ZaQx&3ZFj)n`lLUp|FcNqiviUEXcAq|2gA_p9$uJZa^&2UjCLRRnG
zh~Dmv;0v|F`+5~t8wBegUB|8@p<txA9UyI)N5A{4szB{$31%!UFLexU_bp1ffiGEv
z&BOPruLSDg-vM@~sh0NC?t1`#n8;!AMy)nam2buMYca}81t*%;%<V~g<Fi@6a9I&9
zxdaTaAMHGvCyg?ME8D9d?A6GWi*J}^W3_3A-ojwwFW%DOZEOla)u>$7!IWV~QB4sa
z{#Ey|Bj;erYxCFgjps~cEPlOhj>2HuTV2iNt(-=2C+S$KCBC0ilrk_8T0bl(dm_zT
zu8#^QP@{`#?p`o`@C!dIm2%7H4cx21>pDYZpx~9*^P5Dw6!MBTtJxELfeP5MY3V-p
zimKzGJSQy?g6Ih5mhXiojNJLw)IM7~zw=gj1Rz_xct-e#T;~J!<~wy=IESs4tuJ3q
z&!#|gVhRU$)L~p-YOf!hum07$eEgDB)~xes%}H3LJX7ZJV`NTT@dQN4o%mZ!Oj_w+
z3`G23Ua1}<Th;B3EtW!Pjh!th#I9Ff$F6aJNy3+`IT-!AINy{$9WzL%;jq2|_2O6M
z@dv(eEYZX<qBt!^0bOb<^(!jSFHew__&)+=7+CQ1?^c6g!>FW`mju=`YC{7#r}Hy`
z*>*vpqw~lk>{D1LlCb04ysoj_eSRSQ{5c0`KSVtbA|Gg}8(E=_d(o4woij5BYf_vw
z5Bm=5Cx<KnbfbA!TU>Tgt`)%%j=+BVEV{=9zoZj=RhQ4K)b**E#n`G&4WieNI33FO
zqnT9+W997b!e)<hU{-ZKOt=H`^LtAK^X^`S-j7RHQ!|jnwXptMjf!CY&GNVH4K*+*
z3!JjjWnl{B*>aEdS0jj12M74BJvZJpR|BPz9e%f@_Yd_tckw&|GWS(&6so7xE+mK}
zg@)!_c-i27)-ehm1Goq)_qF$)>ot?@kfe@nqt2w*(`wZatNJE^C>f(CX9pZMtv9>3
zgXWeNG{m7xuH=Q~dXV?#rj)az*itvuX!T0$r^KddyWr^<4>_Tq*XybeN9LLn<mp4x
z28y#~_$<XXQmK=x$W(vX&)W21p=w3GJsCo+9kZJ^DCcPxRUQ2?%fscZhE~}&me;Rl
zusXYxDd01kpHD}XLv3XkcE>-P_AI+txo-T|$7b<6C%@uD0pTl+{?#F4%#3v9ien>o
z?QtPI#&%8PbRw8lXyE>d5T9dKKdO3|@O#!`SozGt+~i;Y#$dHMp!P|GKuzM@fz^6T
zwb<d9SJj2=jaxr8hd*lRc%p(vZ%He`KpJC{d)e#N%2kp3oCyI^g@c73hf=4pbEe}{
z9PH^2tV}v{UCnrScE_if-Y$8&BYkY20<05GGs7SSHo&sWZ`Fz>KNsB&&;f|k?vd^T
zi_=rebDoL)ayh}tA|=twNH|^1^C0GG?8&^s$fKzAgtM)UO8t+;-!;S_whMZ90@A(p
zF8?8sw!I&5OS0kocjbP(O1kO=mCnpBRohnnjCA^eX)d8=@xv~^3)g#+7f!kloW})!
zxv5v1QA2XH^zSIBY+U_k2kPQESg_}KgUU_Qe%KIyC`qQ{l$hjTf5e6=o%~uNxrlGK
z%Ynxq$TZo_Kknyh$-413TkI8?_wRzw5jR-3W^d)8k-Twaz!}~f;4YK=vv)eCatxmB
zE<DW}NKxVa`#O5Q#=oZ4IoHF0GY8Kq_suKuWm1*Drz;dE*9>(F%25tV&O!%o9PP=>
zmwytu+4x3iDhWz`d!8U2lpu8si;ku|HOx`i$d|EuBp9En9C14@*DX-AK}q(`g@`@n
z&lpN6<@zWx=s{=uSEuvWF^PHuYn@?8jkMLKm}YNZ!HrZ=<0f@>+0~_F_#CY4I<bGw
zjzehP9h^l~PUAcKdoKq}Q<&%O%uiAqLz|_2=NuB_A)Uni6|UZ6xsc0>SUDBpmzf+X
zm|{cy&w`aKY1@O%q&~R3A(+cSWv(gzjuSJ-c5LV#mHIx-|D{8({OP{Ju!ZzeJz_vS
zJp(6&Oj$mBGyePA-*q#Na&d*)7-XQvH6ro(NQcfBo`d9-n#=8Gn(T#)1TK&-_~#;j
zA!H=+$SIL5HB?+k8N)RBVIvf!^tO#K)*1Pkofkdh=@JDDtXjL%mzB*)hKH;PCxZD`
z>`?a={k@8tpz?8y`u=}GnlEH_t1PCu5{m)a%u=9>zlC-MmSRp*1<%NFeJ`6WiQ!O_
zrSx<wa?D%$xo17tTKo9UmJ5_{S|s&Vg*#78Wv7mx;1uMEeEGeIy}{1cW_o84zNPtX
zpr<FOfb2vN+9GxJa*s8dxWfG{ac}&SIcCkxT6)caOJZnDr`OyEWmH4~Q6KCmOOd71
zGsKfzGI|tJdfi4HMPm03x^NW1!)wp%gic+-m$4%og8ZxO+M|&0ABo}7*6*edYULUl
zefq+V!OjKG^X^zFKMtZ7b~C}(MllV$D-To<q$`#V6?Hscqnd?^L%g~k;zN&|YnQQ6
z1B3@IUbe~+ijP;bH5cUs)|{)n(7M%68+uGQ`?m%>iF>JiFO+MaYSQQD&IUd4vb%z5
zp8gpX&scOft*pS&SoIX6r-ORz6<0<S{_;$3vQx~cBgk9mvpG|#nd6N84ZJ~%{7(%?
zHkYrc>T*(O|M(ZPm7$1CZ}$S>6L{v&CG%#<G@pAOW^W^&K?@YvIf0HX@qf&m1ljiw
ziOVOL<&H$<^_bPtLjru>7u=ogPHZNDlC#1(<z#ei;ej}Dd?F=?kEX=G;?k2L4ZL27
z@qgLp1JKFm_0@j+9GJ~C8dAEEzr0x*l{w$=C~qf>O}c<}unZxpDo`ol>(Y)DT<YyR
z2Tl&}*}M)ke%D%8RBQIB)yo76b<8&`Dd0!qhmQ8+r0|t*fjKI3@vV3A7`~uZS$NFG
zWa15EwEph>PDl>|;Iu^;S}uAu@A<k}=@zymB()Y#DmdS`6#l368!3kME>t3Z@uNUF
zzXpf3i$cL>n;Ud1%JgMj9i_U30))y4Eo1Ap;E)V<wWOJ-q|j-y;1elESjg_1Ig6lI
z!ItJeMi>_yoi{8)wD-<>fuTHb4q6=Azx~O@tT-mHOc;Coi=yk#m<{*c@y>+7VJ)Y`
zH7^7!95U5VfHwXRuP<1Xn<oL}ln(`c5X4h?W8@_7vpo)#Q8WTZ$+m^7GDPn=;E=hz
z5qhRJ%Nt_AC|rE0^CWA6?2lkrK4Hk%Cz%<!9&>9E0H((a)#Ei4w54M^6L;5p`W9uK
zqNM^kZ5crYth^~JZNrEiSA;6vPvMtB#FB%Q!-YL4^38vUk$o|~1T%9bg}8!cRk>2L
zoS466Ntm73m#QVE8IoO<M}CgR-^Em>#8f153=oRXWqqfWxY|L8-HK5%xhn)4Q?N?k
zxw1VJ`PeV|Cm!j2_`~GCGX$(36gFf|0PzuA(r2KNL}i6`Uu2;1gs0-13(pML+6TK)
zwr*1HC_6K`6ie~jk5rZYA+q`u2$2y?<BjnS`C;(&mTBv|=tD?k^cjE+Y7C8l)PssP
z_T@_&s$J_M1!F5ha(mJTFk3)29Be2B;dY3`7_8&?8gE4$i9jP6%VQq+D#n>L_Mw#e
zq7R&11cgPc8v2wG*LMlFaK@B6qev;qhf&K!^@_cT^i)P*Bcpw)nd5FU+jpmv9%8xI
zt3%SK#JrbOCs{*4!M1qYee?T5LdIJS4gDl9h-rzeSs3=YSu&$ByF&~kk@mL4-wllD
z9w^E{L=8=DN6Zdll|oVukBodk=#;wP`GP7z5w57)KE3xL$pAK<Ujdifo12Qi#85-C
z{i}K5BO%M|&i@!8z*G0byJAli6vlmqS7q%BUg#j<c^7bB>wBsUio`)hZu)$!bix3c
zX;lvIlHMpR$(iur<c5`{k1g$`Koe(2Q`RRq8SV`f@`!-h4GiA;0c9ihdW=fcPcUCq
zR25>F!dnEMqkK){O0*mA=bRlQ<3##GlFUQD7A}_p(Hmwm>F{Kco5HVfAh%U{qlr&b
z+l%f3?c(RpQsgNl9#^RS$1KA)ST^Y~`g{^Rd^0Xg;JxA_-_IXy3^Wz!l*PL)WPW(0
zQFQ!2X0)yHuihSL|LPiSy=9v5=k4*dZaoNn31K*G@W*F)F%J<hi;s_3-`V%Yl`HE6
zEv&)sGqci7YiEW8T-`Tvg^e$4Ut}Jcp)U`%t!h5?9>6C(u9_-~V9fFEastnp?gn%4
zhqJsPv>s1AoZloX9|0ZY3(5lTT%DFQ8~o}aeTH>;o;F2a>H;tB;fnSyBrWi$z*QE&
z#lyZ8t~H6+hxEQacE_&WH_9%3=u7fzJ^DW2<*iitod@_IrqyW9Tc;&ErFVNb?IPMc
zYlqh!BU2A2nH9g%z`W?r3#nRn(K>I<6_167R%Arcu2@3!tVC>)0adCst%HplOX_l7
zv*O1fdtl7q?1i0dkf;)BGJ5?e)P^Q|cUp$|o0(&@J<Up_aCazgA{xNcK#8UvbCF`s
zW0X4Yqi}bv_f<J{ZQrNyCPp&g%=L6`K+K$VKq+(+Sz~7!Hj#I|-W4U(vTFCS3LkXP
zEO<Ut>NwrwyYbok^8Z|{it|ooMeY>vOU8pob5q7J-xyzw-kSp-hqX9Lv$zgaeF?Hc
zItT7uOxQCTBbEy$Z(dsJ-+AnSe8X;uH<e}1ay^y-nf~V|mE<;111OQa?EGqxOoKl{
zA8*O{jH`wyqq5vf*9G~^-1L+?a0>3zT$wq16+ye{^+9TX@|;dO$oI5;8zUYrr;RhH
zyUX{}j1tZ3)i2Xu4d&U~_z$n!a4Yi%IyXXXe4pLy<S`wgNAgchOLWEB!|6iZM*B-}
zCo}7I$A0#KY$q!72?}+1zBJjou4228AJ8K<QZ=7!KR!)~(Vx<Ncr&Ehr@^#~3{(0Q
zdxt9LHW$%e67GU6Xp@9lynY2PPsOYsin@+TH;8$z2UK2&Z)HcoNZuwPVCU~UJo_QD
z>uS}NkWca0sFc@`U~}C12JHUv#w9)T${U?PpuA|T&`j;%kF7L~^@0P%XS_Y{`K<~K
zyTCt}ohwv0<4jR@fY-kJN}NJ5nuU-z(DmsLd<5cU3$V3&xF$MD*qg^kE1nHd*QrQR
zXggd>6CKE8Dl(#})Udgpt45*byU&Ow@!ih6PzR_fRVqifUfzxzJ^qFQG0e(ik4l{m
zfEOZJ0&IT{lv@PDy1&~z2J2%4YHuL=jrUtzBhN#GKM=is=id{Y9{nr}m<A+zct2m7
zu>HzL(^fA>LYHKXBZ=E?<$qm21m6iSzK7L!$D?c)N`Fled<OKRt33OTp{#04@Hq9e
z@^TTwEueWA^D;@TNz7V$q<!4R6vr%fP<&N&#9($%KHGWuEVktF-Ql;dt;$!Rli5*E
zeui=O=Q#?(S=IB;HYXVjZAGV((8DWL*Ha9Uiueej$|wzT(j{-GyvhMoA}UwZ^|{?i
zYSSnzONux6ENb!5a`a-VS^YG=K;@F<_p}&~An>-NTwEF5BJa<JEC@L9lCUdGNLrrL
z;YmEbSmctJN>^oGEMc+sGwp4Z)<B%Gn%D>r4+rK*f*ALvVw5P33Fj}Cp2+l{irG!_
z7p7az*2ERkO+MGP^*)qH{B>qC9!HjVg?5+yHU+>yV>Aoaad*k-LV<Vws<lf0_lB-I
zpzTlW^Ic$Yl%JxdzjqPc`}}8QCFp?xE`sR3<ekA&Ois00?o&nwW4M|z)hd=NTU64O
zfJ8t7H!RL?KlFVu+HYP>X&^k#ed&RhrkRZ3^6qD6qF+tioS&BOfi$+(p{X3UUgTXs
zMw8vAy_qz9jU+hYeCbZhaGuB9*#HL};xtYEJXqE{1n0?D18-{OXd%H-H&K*h&WhkC
zebczh>(s<uRn3U0I3+yORSaE8AEdA|S$`ZbI^X#(OaStzYyuFx#WG8aMTI}fXqf@e
zEg(%uACgltd5ZQ)mEO+HVIrz(sRBW<m6w-kiwTJM->Z3tlR3d+tFLwQE=ZrWFQLr_
z?n2dS+d+o;{O<s(+LAIn=fmCpVe?OYI2y8LDUqUL9a&bQ0w%W?5(LD1cQX^OZp&1U
zxGr)vxs7OCi||>!k_BpXv*_U6#s1s+IamddgLS<Utb$M<GEUGUf-wA%3R<JI*GjzD
zS8tpUt(cneDKHJCtaj250Vw?B<jUYgto4vl13HLmx6M)dpCMf@XDjn`l{$B(xH|UB
zQ!-$G2K5WL$?rQ{dVWj>8w7oNEz$}_<?SDVCsS<VNdc!n<HK0a7yhI~e0BbPn$7U2
z@=Af-b`Ki?;IM0M5yVJTY=(w_5s!d@*AEzxd(;-H+-hORF2(se^?pdL{p9c^B%mr_
zGAs7Td0T(WGTlq!yGE+;N!@yd={0vYG@hVP*HlxD`8RdwtOXf?P+IzRkt*hLn5uY~
z!88ACH7efQifbUG*U_XB@ig(}vZAeO0s0ugwws|<I7B2VEEOj_>=ycUog$Rcru>c4
z>N*&iB_((&DL6j7sA=vs>(S%c@&okwxlQWko{t5lwv2qGp7ZA?GtuodZhfivn3qid
zJ7Ei|VJ#~1Wuw!!FD%W^#fX($04(UgUVpD#lb0RiryM)dx1V3@<S9YI)+6fR*?7(^
z6zV|q*#QY7c4T=LZtwpc7rnV*#1`1~3ni{}Z*BE<!5#kI*aDq&Sk%AyEm)Ki%-l7S
zF<26^ie6lo+n28hms;{n83>AM*DQ)iL0<G@`aj?3yBv#rz7!{VT@}uHe<y|LP*NXO
zkLV(`YYE_gt3L&q)I4_?wthGG%n&`&5Ko5E^OWT?o6Nombk5iDCb=fOZqJDhuhB>C
zZBY#UWeGvy$YQ--xg$TsXAY_qNlLTl2jplaL#R>D3zoJYi#?0&ntX@KR7hovTpv5D
z)?~WYM|<ukOh?^rZ<=l82d?T=iyQ+WLQb})X|@}|EK_H5$~@UTwU(}bqVZPTX#L}x
z=_{yUQ8yr{%=&o2p5Pbfr*t;MUp-)<X~mtd)1W{rB1zdA{PK2tfN1#kTt0>HVQ6))
z;hBl~;g|JmpWe4#psYvD;FEb}?dfm@x0}9x#4?IX`Q%Ly1B3%~uKUoBJ7!Cl*6*z{
zER7_lDgy)S+Pm7L1D0LGCk%MhN<U9JiG@rq$YTH#B#}HhnPa0epL_{<!y+wFf$@`z
z-p1-GY|3R+>(Ll%qW`iwCXAu(%)he-t4tw%Ri;O`;ngblBbi=JjwqTgTc6AK|Ig*E
z(+zrpzY1A0X9i-j%ImW&GhPz8*6y@sj{q#G7#~~JoK0}1-Bm&u7U@U>DEd#`bBFgn
z`G|80KQ1JOv8$~%%tTdso^NO^WKFig<AeUL-HA?+-Bv{w1$#kxZseNN%!fzN_O9mA
zN-y_A%$g1G_pf8eoCM!b5$qXNg=mA{8*>K_eV7dQ;zamMf3Sm!hy3lBWkUn&*w1lq
zAUW>b8GoYGmU;U4Laqf|gj=#J$_N8GIFpb9Bh>HX--AVQm`oehlkv%wJpCgN2lCDy
zI4>T=QgFKc-yi?W4hN#l50ryDrxFEYeqyo@3SLE=wgm?CP%q>`ZN`<s%iGiu{g!5R
z$%U<_k)7v1UGKRo;~4pjm+}7|=!74ckAUGe*-cFyEIx82UKh1OlfAmaD-omhA49Gl
zGp_|?UaA@OuepuA5D@>n&UEK4%hRh-8Jz#<v_a1=KdAq#oI#x~l$k4%L3@vi_UC?4
zO9JCHebeT7bYnv1{^}cX4dO~lIlJ4PztZz7z36&pHF>GdC9d$SB*HaW=R%IH0b-y$
z@xx7QbQ*2s*gGD&bAr$2T#{s&eS@>z$vge$8q;!+BP053X$8cR{zg*Bu~RYrdkt0m
zs&}nz#jx(!ytG#O^1vY*duD2HZk(9}&^1mtsU2rBu8ww8Yo2uf?j>AV685wEpcH{9
zBPHa?G;|C+6f1N&=&_9WaIzsPX4}yz(=k!`zN@+?N89h=^Z2qHgMZ%^{vP2<FbUC9
zsaEmXp&_^NE8JJ7RPZI}al&_lQII01Bs_*Vrg;0CSMRrk-r}ico$Cijm>i(~EC(Cr
z=OE$9?t9>WosZcE)3ZsI5lJf7ty>d6=vheC<IUUpwlQM92=k%HlX08~E3EM7+e9Xe
zAN-$UC$1`?Vb9xe`tM)Qa@Ui&?LvA^1pW=qp}1kEwi<%hQ^hoyXC>C(rn=s<k=`HZ
z__(q`qUM(M!fyXESaqWy1^z7)QfB7wfwELwGnui-+>(^B+`u!}@B4DJTs*w5h)wSv
zraVaWX?vpa=d$XgwWA5AtW~8yXi{LbkM>R-T|=3?s68vb_`#5#h_#x#p}nm9X@a*5
z5d<;|3E?LdFMrMYMAaU%i89W)1~u*W%^x~<e7xM2iDt1HdeHaXWq94rF8j<;d-_He
zU~IX8?bF(<uVWP$(x(FbqkrrQ@%1w}R>6K<2o7Dx91c^!Zbm0{YX1Nr>|y|d7&P>L
ziF3G3i5Waz>Cwlhj0>x!B#?|`R-Ey*6<)9j^6#4kQMT{51FkzC4=~}Nm%NuOmZAh{
zgY71&aQ!*GV+7;d;y&*D#Kg5X=j{^*#V5J#gm9s2ghNkF3&x^+N9~LFh^V}Cg4K(w
zpF<zrY8O`802=E`%-CEVn&kyC7ZzgPr7D;)b{A#w&!^|_`0dW8mff|QADfi8OQ!$7
z07fFY-T5?=kK{GS%x|*aa_)?KaYS$R*=aj>I)?3(5wsG}+eo<7xn@1`%oxGXyJjtR
z5fP=OHbtfJz1%s)laCdjOHBE$)9TX^=C)HD+{bCww3bAJAsvOMuI~v3m`Zy&j5lE_
zkg1Iul*GGXQGq@vG=k?&6dDGlRb#43NGg%75RI!?#<EkwSRw{xJ8w6+spjTzv{NhP
zEC~sfLdsjmm4@IXD#mBmYH}pT4qYY*_PiMdkm_yY^y7LGZIc<3lBHbM%*$kZ1R#<c
zhQ_6ZAju-<+)!!}5nPhxB!;ujUUM;Vl?o^Wy2CG$&dyWpA;g$SnRn#NBNl09CR#4H
zvPgFJ<xHH&<f6olu_S!nv1={ZTa%XyS(kLRmvmam-*Rp=qW8D+i2LQqOVJxa!KJF$
zyOmJND@auV3CX#Qg`{ndBtFaLcQLlRA!#EbH<@H=^Cj4?o4mM55oaug;GmKP__O8<
z@NvkDK`UXkY$9lNK@5>3A;!gVgp&yrRfcx(#E=RCvg1<j@(^{)`uTg4b1u<#NJy+Q
zb2F14cFQ`{ID~QuaTTky%)7x>$ps=g+@khZ9%LTv+c1*u)GXr)s8dBx1Pf5UD^+3=
z&K;OT&o4zJXbK9EX7?kR3(3o^EY!YaZzL9^o6F4Gmegx5N1Fo`2<VptV3JqPWF9&=
zm5PSuW?YQBHzp$~k(%8a<2K20F_&`fiHuBK?ne2X!s~_*gptZqxqNk5jOtKz-s{rZ
z1}hB}(sCDI#KcLjD|l{#1QTmsQ8xtWw&?q4R~ae&EyfZ2(jw-lpiMW|od!{`?JqKs
zAp=FSRwhPnVc7Ae>&i$#4Gw9TRI<$G37N4s=3xyWkSU}RX;hC`r*{R+s3D7NS#vqR
zZLN#2WkE(Q!o<EZUEeQTz_gY+mL;SW6K368do-#&?*M$#6(Vm{OUNDwJ7miNI;a^@
z9ct{k_8OSYi)`5uL+39WjhDv9MAYlJmhRJS8+O`I%PFee%*3_HeaKvxjgWOqWr*CX
zL~Ag<W$(FE08yBSE?8aI^LIHl&S{xhA!ExOLltwh#bqjG#}rj=GDh2_24QzmaCv>a
zd6w9swGm{4b6nlOVR!YHS3?KOb?oKEaUrmhHp}hC_Tb402x_yhyt=Q=O#5k=)S4x*
zNdi7=*V4+0l!`Q@Cg1CZwG(b1q%e4r5E#E%e<yZjUBpWpY|BiLK-gEa1;25ev=IB4
z5v5M2sDY_mX=~)KUfq24D?1fyP~YFV&y)8$EW~r%!B`?WLffpbn?VtN@6Jr*iPSM8
zafPK_-SvrY+Z)+yUCC?{<RB<N4;r*L#J|L-zo182CoCzPxH+Z4O<_dDtlqcemv82n
zwQs4uX)%j6zp2;8E}@SuWX-1Uavndlv0b5d)j?yF#QT?q=6>4*MeL91?oE80wo>_z
z=7=Gax2bJnYjlCcChgnGYRiXEkGZ>>$TPPU7hwTWHMI2o$(Gj<c!vksm&Hv($ybwF
zwhl2P%VGLMWVWv%CoZ{cKUo&BsN{~wmFEY%nw?H|P=4e&N}rM^Q2DFx>xqA#TuYqv
z^Su485v$)kJ1NzKAO{FX<IQ2UW9d|p@wxe;)2h`(k~=u&Z{3?|Zam}*{rc`VnPlMW
zEwQ)qe8O+7j}q7O_U>i4mY-$5;fzo-e!wv_bbF1TcT9>B0rerxBjnITwW&yeIc!Fu
z1@m-{Wr88b6^aX7$_fry6vbvLrheNuNq5`k%>8z%d4ysju6A_DaXx6iB%|Jz*MD8V
z5<em9f{{GK#_<d2#u$8g1VvK!i$Q`tc!VdueW;&&oA`{z-K{<rL_b=(raVpZ5UW%O
zw5!k1zthcMWb`dGaXYXv-6ke5>K|p{w4)S_((ADD*4bqmV~ZgiVRv8C%+Wt;vkx}m
zUhJn-ksL*u$5QqmnlZQ64pI+4UF6jBN7>@TjAO>D7a@20%u7vcSk>}X{(;Bc_VZT#
z&tMuNs9@LYAX>c3xaEB)?Z7RrrScK1hPRCEmRhNic>ZYlJI#HEy#qdd`wL_87L=!x
zt)X+z*%e0W+ptt<KDN(*pCm&*n1jST3Dzn3G!Y#a$E%np!4i7g1kv>$j$-$%t^F&h
zLvvA9#&TM0{b8ucOl2RM^E%JmqA^#k6`i~=q90`iQy7SGn4de}aKz@>IM2tItUAD5
zWz4+H><qS6T%*r_a`s5A9Br<%A|lj~8*?LtvP!WLw2vF}c^!2{a&-flE2$9am-_&p
zK#F2RR#)q*{G~gQ6zN0n8eb7-)J2+$>S6tDf${Pn)$Nt@h$wbL=D%ceeYbT-w(Q)8
zlBT_V%hva`!#079b&gs2#&+4#ffkYa(c!f*W8%mp(ng4dmWG~!115aRIM}ixD1C-p
zg<?Kay}!KarV&JS#t6QCqVr;&gjmFnoK2<U<aW%@&edV8EmA%|W!If`I+h`ivzv$?
zLK*r-Jc)@V>c$x4s@I<-X(R@lJoLA){5|W-r4<l)5wDJrGHuo6yly95+IfYBS`~+T
zRKHkiVzwQX6JrB&CB~^k(jAk<^~{z*Z5BT^*YD7e<nny+K4J?(j{5kV{oVGy-<b-p
zlsV}K<Mc!?qE#XZsS!1H$6FdpTCb_gTzrUzi1Ne{4Iqz&pHoU&UuWAEt>))&nK7cs
z&SW~7usYgog4=hSm$D-9s-Jq}9Zzh0kqbX7eh7*3MVcTh+=K337mG|*m(ZneWcep2
zdGmC%r^!d3V=LDboV%_#`&|1?&Sqt{huM5)r{=KKs?S-7zn{=4WClf$EYb9c(mey-
z4fydzp;1p6#ucer<8q5rTCGBgexXt%J|HHWhtK3tRFc*vF9#J;vdM*FPfy;4$*)p<
z(&+rxcW$N6q_MUlpCyPJkU{q#nRiv9dVaXqSmGbE>xrsz%RK64UFS$!jSBKZV<cjz
zl0wNOn#Y8IvPnN!eAgU*F}Bk^Dc)Z$Qnw4ZEq+g*k$y~KTqelE7Q}8<?{9U1)V{^*
z8Z}4EbM()oe5uMs*2LpcGC>q$$U`LbA#OI%)8VGN*wT-_?qEwuigeAI8R@3)*)l2!
z>vl@(mkjzGL-)|f?T1uWtssw?`T4gHZ?<8_>383!UEVgD#P)L+E8F$?t~`wO%32fj
z7Y_+fm=%IPB>O2mRGN!=*{7bnnXuNcn=ZW}^HEw-W$sqEi?1>Z%>8_K^y0{{Ymk-2
zXVPq_tiJu4()kQagC~2LtXEc42hdp6_VWpHu#jZ0h{ChmHRjU8uh$b*#5jlM#L(G9
zE)hPE<4fF!-QjVP8*;7@`X4oYk>B65DPPlA*Ia!*oU<x4LP#>-&F!eOP2A%)(QW1(
zWT)5U=HA=CiEeV|({%insjXUHr4#6P(-C;aWVHHC^ti}_**lgI+mwmqyC>!~ujDWD
z%R1VgV7S$4cd@y}c)Rqvx?N7?TFN4s`!G1oVUaPeAsbFI$sq`jzHBd;%fF$=5lT~u
z2MEfxh_j?*`F!V@9>N(L5GLGX4<A~mWGPhFiDMxq%C}I~{SVnODMUi|F^PPoX6;dC
z+fx%WnTuIZSX*ZtUKhmtn#Y!YPP2PuLfainkIXuo)`e-q%$fBHB=M|RFzb7TsyjHA
ziX25J7qGJ#CN`8DOn`5ch}2xr29RGzWFLuM@*$d%&i;jtY_!+NrJ>i`3_ksRclR#8
zB>iWM^DcIVakDM1Az6QZ$vL-e9Hjg5auJ$^cv@CbWsok`5I2g_F!&_7z3$}8EgPOo
zPccx@2qPIDCZ;n|sy|vYs8wN|sdDEquY9KIVjN@h4oOWtn0*ZICo%{_B%f|(%5{^_
z&cN?GWJj|s?6R;_%#|B@+dyjid|d(ZMDFity97`{5Rc6OT`Qhx`3!X#D8|O-%Pj-l
zJ%2nOu6}sxR|F4Jb)qptr7G`pyDreEAw{$q4EovvpUo9%>K0%yuZsFMOsFlta$0iq
zv+to_JrzX|Zf51G(Gp1%Yh-n(d9NnE*n7}w;*%l!BR_6Zn5xnt<SycSC7+TsT#qGv
zeJn2e+987xY9FNJnIR}i4_(Y>eB2g%pT4Jomx!XBWoFtHQIH9eY$HqQ1Ko$*<%b%j
zYEM4T*DRutUnQFxfgB^+em+v#HAu0wRrL-dCj{$=r2F;66U(_n6I1r}!YI9QJ<`bg
z=I-{~vv>9t$MCOGwRPr6m#v~BcLP$%CPU=AgCtbfj~i<>WfPFj&L+RFx%AS|RV&Gj
zqwagNe6^a&sh9Q#a{S_Jyr_*@R(yuIDr+_e8u`UT=|nC{FSm4Y{E=>B8m>NTiGQ|V
z<itGHZIh@Un|MI$$LmdtL*)eSr)R`c9&900nWTWWyo(?%={WmN5e8!(%4UREs#nEA
zeG(@rBSm$M3U>Ktd-~7Mp7v3>mKLXd11R%e=z%_UnLS6?Y?hIDyeb4p;xVI$c@T@m
zw1yFI&8qju$ktf1FuR!|(QNG2)o+$owV5g>Hhr+w6~`Ir9D1o0MuV^9*YXQm_DoB;
z8*E<4pPX!KslJA_s@21}giE&XkEE;+Q0p4vX8yC2r`hB@F&&qO%*o?7I^p<2*D-xg
zZ7U8i!G~UMT{jB1@<i_4n*DLeWt(JaOsDI)mopDguC!D^U)g@T<Co@abK7!@8K1Me
zVDz$y#dr0dzjo%?%khWY?c~Z;AEzIYO}J_cY><szz+b-Jd(7rF)co5<$&W1h`*Bwo
zqD$Mhy|Vczw0p}16tv5WZt_|zncS_&qAw^`enClG?T5)cN%=d<b<Dh9DO|wHrj4|%
z4Iq&L8#Gv+qN|^TGcL@z7$MF|Evu1AJe`of*>M|z8vWm2k=aio{+#ExiK0Dn$;WQC
zOH}injFo~bN}_y`1wK!*b0HIrZNrJrrhPPPk(ETUR(2kA$plTGHmG&mn~W_}B-?_=
zx@EToFR+QhlB~bimQQ-~7)6OSh(Dnmkwfy_D|c)ytonXU6h+2*MD*`B?yjBEP9#ju
zT$4R3iQJz*CA7X)Ahlnab6ovw&A#@B=k2GK^XoN1h{q)<#Oqm*a#m#GW<9#|&ZPN=
z%wNb<=jd+!IAi38=4)Cy)+-WnRotxX&$BV{>yPG&qD6`<+onzCm`(kEA+ePWL~vzc
z#5`w-&SpSak4JLF6n<%~+XCIlW0{@U9m+49%v16cG_gVKUNyh6W>|&KyOm>`9f5JB
zeYE0?)W$GjjIoQ%J;+}v^RH8>a)m=0#Z9}9xL?%PcHXna&zoN|HHe~%qr|Rz_133*
zjiQ|iGwv*t?_nPoCBX<cMf9q03DqF^@_pG#c1nLkor^~9UZ)&gl*qKaLT{zXIP&6|
zmvz*6cS<ULg{7m<E(g}1Co@pOburJZ`6eRc`JS`XtKWT<e3$xTjC&3@N0{&H`N|Qb
zL1_2rVq!dfWNY*Lov~}JUnCx<&2<ylu;QZ(NFaz;GIOTwPo+T`q8a9~`Qu#KMXzKz
zEeuQ*bJEU8M3u%BugxuPK^~%!c?jzi*rY#0uYPXLO=4BFOothBQ`g(7{kd!2wp;Tt
zpp)zGcGP;1oJZG*iPkG(S~!7U8FGpjZw}f6>3)7C5CR}$Z;v?B`L@-iJK1ssxfV{f
zY2SQ8@9U(pnnoC7Sray7VF*ZP32h`1#F8T!ES5z2USful?z_vhMrA_MMdi}`*rQGg
zx%Y7P^*i>|_FeIqjF}~7O2IINP!&SAb1yO28r<8zyS>=F<f&#!Y`?R9<ysuxaZQ>R
zNn&cHTUQ|!ncrsYq+rRmche0g(pwmcttlfZpEq?2?7PDEZN0J#;#E)uUnW-eX^=RY
z*Ej1n>SUV<*>{cLEwiaIBMZfrElY<GZZhsBQYaIywJzM#Yuk2?+mhRJh1+P%%Y_Qb
zAdLhqB1s1gI&G$zrr(NDo-paH%)z+Q5<r=|F*DYQ<1Q3;Gc30VRFMSufw8em#^iv<
z>ui%%PA6ma{du@cR-}?nNKAnQzoy+qUF>ZtzQH!xOdLW7S1onG#0dK*fFAG_B1^=v
z4Ru}3vhGGkW!@64GB`O*M2(l0<g#}+NwFGflWc*;vf;N2O@uy$>|9-hab-dbQtnK$
z9`2U$-d(<O!E@ZAfC{;@WK(tPh2z6LWHSv`9+*I+gb1XHq*E!BS~R9+dvt4e(B{*!
z?6X{uiYK-OrovHsOLN#4i5DUD^~{Uhm8r$fVUkG+23}LH+fAZ*%r}%djqNToArzN!
zw0S2JQt01a=F8V6PVR~rh%0OxGP%%&qzp0PxQIz4fHiOIy0!6hH!#(WB$5!R+|Qc%
z?e}Mm)Pxm#DRRuX<+EMcq?FddlO|>ta~#WWAF^PI4&*IE<|(Ne+pB9+uV-0$*H32V
zkePX=2GvnT1~^=~N-}0=95Ia8wne=##~E;BMiN{mvALz~#m3yG^70sw!qP!wTBO40
z!*PQ=y_my@(_oS@dHJG7a{~uiU14tKA6vw`MU~`|5=3njU#VftRpi$ga}~ghR}?<+
zG-kXbH@3-rJAJ&-a8sFUByvk(Z9(?SSk%cTwB46^xlz@7voo0B_F{E9U0A$?fJ_<I
zzgF|>G*&57)??q--c26%^XMGqGtcLnDORFXXctnvh1~7Otj2LJ^|U!6>lq~qqTH4H
z_j5VeL932kPeO7LuC!Z5qny1<x^mVx?7ua)2>m87Q7@D9%cTyXqy`8p=cdY}mNCR4
z%pe6h)hzg1!YYX4)~xiaOC1^}Pk!cK?sAXKM_P_GDX*FOYhr$r_h5a<4(0Jv^HKJ;
zb$bq^GSaloMEpm@T|sUdKvDGh2Kg(hCUle>8IX!XF26OdD_bGiC~2<9KXU&jdmEPy
zdh^@*n%#o5ieA&MCX9u)AFkxl&nZvTD!$vsT2k67yw73B-)-?DLaEUsD?6AN)-`GM
z?V2>5WqX%rPeR%{n*RH@Txw2Ve&4yErzAfi%Z|$T?){GmOGe2{8>G8dG*6)om6gxa
zP~tGN?`^LSv*vBhow~%2q|$e1T`DJA`);YG)8^AHihY*l#)_YqRhfGA$L4ReUOj{h
z26j0}5<#~7S~fLg9nc2SAeeA*G1sD6Y#CEvPO{n7Cvx~Tj>s0r_djIax09DgnztT1
z$$PgctW9d;=JS%ZIPWvqQHp&CjGkMRq*u&CbgMr<eNMbXqPO)5lSCGGIr{4IOoRnG
z)OzX>qw-I->&<)UVhSAe3+yX?nfZs(FC-RvRdM+%?)K5<W<`z-7JS^ct!Y-3rTt7t
zGn#rle=8;JZH6}opJw^DKU$ei<XKKn%@MNU*52eQ-c;Lz*^6>W<d&YL=NFmwwOG6b
z^|A32FdsLG5E%&j;>MeVoV$=Ym|vKyBt&iylh;+LZb%;_c##+F_1o=&BSWN@kTbCF
zB|dx1B5^Xw1#7Z91BrVG)#MA~wjZ5W8<sY|m?1w!t8wFK$x-Z;ZEw40+r!yZwelP3
zHfmIML-TKyiSltjT<(`^YaW+O+c?^M^7(!JDqJlNjcMp!zg>KsO@DCp>s@{J*{ae%
zZdr$5Em~W*woTIN?&cE|vTV9OO>EezeLC2AJ)YMs-QT=(n2T9%J~9}|)Zbe>%3zf(
z=3JrqC1!WU!w3W#bm7Fkt$O9LyQ)e!c)7`yq)|or;<m?fW%b+2BqE3JOZruyrf&PQ
zPd2Hap{^w7FB|;|*X9Ug0=!FFc+pXM-LpNWu}aUv59(#F1M0RMF@XrkDWVAoA4J{G
zc~^e8h&*C(samLW+iUqT3COCC=2KZ!AIojF9yi)oZJ(LO`$_g+6Ns^*K7P19pPLX~
z=3gBydf<&D6TM6lKBO(u4iOKAb;m-4BHc9*(meT5!wv+#C=w17eU$TR&!mos=)OAr
z$anOH%<fv{%Db4=D5Yja_T7rBdn{UBLtMzTqBHXgHHQ6n=D!EtX9lfmDJIrgP!i7&
zKGp2yoV{G3FQhxYx;UEG8jMW6ZGJ*X$Z|vWS(v15S#)0^&P?P1=Pc@O%C4bl^NNes
zzV=v%ajfJk60&m3e1-LgBC^l2mRC1%g~)iFFcHNcRj&q8NZ2W>D3XtyP+|fTy1@A9
zht-Zxa#5jEARnJ(4oGzg`}NV*x6_0AV*bj!kCWx(bL8Sc`55=3BcOK!YsmQ@lP|!E
zX}`XBMCFGFM^C=mENjA8;u}lkul64O!w}<*b=;g{59Es)kK|qK`(<Fx%P()qZVy`;
zw9nUt;cb?YYYfcR#2vn2VdLJG8lo>VnGLy%jPUJ}s5Xito;`k}<WcZQCuk^c@_-%X
za{BS#(jx2E=eZWV(ZWk{4ElYs@ZWiP&TH+J&qAYrNUG9<df8}?GEr*M)5#N3MTTx!
zf_gynHdc;ebcGPgw8t%e`NGK{0!hTVE+P=^QTnNiAs>zWh0<PE^c}SR&XJ!stb$B@
z(8))BmBGOaL%RcBv99)6B;ETlQP@w@mjx#NyiJ5&Bx)xavyHW?oce3KQ1_vZa^;Vp
zT3nTG35v65p{aSeIZJBdVEMRZWmY3Aspi!+yvpNlZI~;Kd|{bjq8>^S-OPRK2>r}t
zUI)=a87%?v42YuNS}`$Rv8B}6WusKJj2li|ddfdjGUHuJzn<l59B<~jhvs>{IJr;V
z-^{V~tWaM5r2PBKZHll_REvq+qKWm)r=PcUU1#PPj?k^!{gC<!q!dLvxyKV^dm*?(
zVU`X;SrvmSN}Nw-{g(Z=V0o@TpNnO3;VvFP^K_4)Uz=^{@ikNC_6t+8t)Y9V+{+P{
z3*!>_LH1ZY?&0s*deVYCIdy)>dP<jjfJ$2ZJpRbPIdUiEPn<{euUuPVzn#P0!rlln
zAEYn$CATLzXHybvTR>l={Kkh8D8HIoiAtcVE+@Zjo=tB#QtI!wKT0v<@6tJ(zBVx}
zJtgd>$T1*&$bL?`ncU1e{(f=ut*RQ-UN96zNhg?c5E?^|FLU<fxqn0KktoFaMWtMl
zKO}Zc{POM3KqIJomuq3giENMHWqY<Cxc!kM_lTqlo&i3&b=1a}N!j1C98Ho=z_Cjb
zdy~G}Y=Dxzj(tv`ZipPvcFPDBY9<Uwq`CRTt<9O1tuhk%^v5CPoUr|++I&cdmdWnF
zr>BJwESQ#a7xLef$%IG_tYc5wtgKi_HkBW3#tHSED<c?I3~%M@B*@x6cx$WOV++MZ
zBZef+TCyy5a`+<O%ry*4X!_xNpFB>u>MGJPBUs0i8>sc-a`pRf$0s7kk4v!j{>^bU
zFS=)69vj>)tqmAOjCPlqsCMEzXdjTmFo+`a3?hB8><D7(h;OzTIchIZtTL8yH?+~0
za}<2V`NrLZ*5pPeWby<tlwruUScG`{lUAfyEAnlT{hj7&jmf;S+UFr}!)^2H^Iy3!
zjdAN7b7rQmVUHRY8WcG8U&;G!`D7b-_X$`#aG_45R=vA!L+9qdXRMxK`a|32xmua@
zFSC|@x~TPy2%hY?%28{dW%=17n`FECbvsPt$A4jC@ej}J#N&dBD9^9STaCfgHZN^e
zxtLlZ%1?65-*EGDBizhoV(qHWcKkn{0-q&CIR0!yp4-G8LmK=%*nXcp!;P)nzhtWS
zG_upu%a<ovM;vwQ5cxhG?5$(TELC~g=_?q9w$G=WYmG4588%(fmQT%jxeqq4SkluW
z%bk*<MOWnR%@xtqk`eswb{*!r_QEvAJNm-k&k*nJzQJdF!Ix$s8~S=>y_~q+=2sm=
z{gzndB8q$0bViTgK#4Eww^O$&HQ%P2D38(z)^-K{xo3okzcDS1FJ~?$ewLr4$3B-K
z9FSq|cWd##hU?@XyU(|tgdZj{*K9cBS=Meqy0%wdxRW6EQ5&CKT#?p|bjb>}!AL{g
zht!p#Kzf5d#H^2WP~UfkRcXd5b?w$%81tA}Ahq?ut%bEzog^^~EDk<X0<$T45YHcW
zA=Cod1&Ixs*!!fhr`a!wXinjD@_DrPZvQxy$7k!L*+&;__qSSIWP5xst)6F+0zbU~
zmpsYd<rw*`N0P9AAm7^sA0iPsY<a(sc^gDyT)coX(*-U_KK2*+qY>}cdf}WgFA(eG
z;=1{%yCWN)rAaE>c#F9M?IC5syuT4!Yu0|Hq$^9`CES1{9*!S~_<=bfO=A-BA0}wQ
z<@?$ei?e0>(zQD#v`5Lt)u{>8^Fw}?W)rOu*#$L=-7GDLUG=w+INo~uvL=bV@9Ye9
z732@eUC3*f+pmz1TI_b{X*!Yoo*35BWO$N(!p6W>v`J{76`!Y8^nSEdAe0Zj*8c2%
z`p_6}?GFk?0YpnXbs9L4;=ebadd%O_#q5SMy2SSWx`?LjxBHWhp>NKik3A^keRjxx
z<<}bdt@-8G>yqGh^65`Mkm7S@t2R~1iEbtDE~l=|9cbf6SeGIA<=-dVC(?;!WBMQ=
z2>K?=mmk)~5H+t8XUO~OOws4!f=kGx*ye%zR~SpYbXOpbkdRLhClb!L>ou$HK#>UV
zls=ZciD%5Ht0T5$Ecqs&qK_Nub1%MD*MPI;elpXIw!KSQq4q@<PId{@7B6n_r2!ym
z0k&BsCi$WP`{eBJO*Sju(oHxYV)bTfP-H+uhwAE{gIo0<kGzj^U2AchlB`(8KAYDq
z7Z@zdrx(SLIQawYrrE}#s!^mW`HM6`B>T@6mVJg1WP030+zXGDOu0wnWs$^sWP&ad
zM;f#ka`anivVNG^nRmGkWb=xiPbFiX7;s)?-!@!o<(FP@vL>?XNIPi$d&8CYA*haA
zzOwFPv6x)SXEO5BXMVcqwyvR)1c83c##fkxdwKS2h`EezX}1d{<h7?36t{NT%#GdG
zwiSlUxG~q~HcFp%&bY?9obBc3ZVe-Hvd$L^hBU@j5)cfzQt4E#JErEc<1I>CxKg%H
zvo)N{nZn|gVRv+?i0P?|n|qR)(ppp=5({$+Eje;CFD~2mX=~l<nyANQOSWs3RX`nM
zm7*$;e(TF#I^%zK!-OVz3DRM3&4Y7mys^(tb9Gh7w7toTPIoU3eP$*qDm;D2VBv6g
z49JDt+>542n~+-w5^m;*y6wQpZe$j=*^XgZXhzZxnsim9kD(>Q5&@{g!%=#v7}QO6
zvvzlDb9Xr6YZ2YGrNZ*tE;7Z$x90+iD;ch^c*}(Jt#@Ygxn;RI6NcTU^Om|U!faTO
zOqX`zVlR?fNWjOaS>DUk?{HS)hNAtRxx&%+4C`g4swwL<l1MnssU_bsW?h=j<1I0p
zDAYt*cX5s0KQj-LZXzy}vdAWxNkG8~C(F^8(x#EdgQggCf`gq>QTnN&Hp4b3ilC!y
zxmRrWU}3XWE}BQ=`QJqqc#Arft}{8ix{9!IEUER+G%6M(5MHv9tz<Zb=3Pk}nX+De
z?E_?lhSRTyoNpnjzOdJeZl_wkxR|^ba<R7MYGue}D|v8nn{wJ@gcc5ErN?BprV=80
z5fN>hozC;QR}oqx_GE;CmHC+YzT1~`IVECF$h?M2m&Ds9yJV@dBJ(G*2<j*#n{Y2~
u+{tlSw`IE3@^>Z4-6Z2NwwD`{qDGrG?ZOiyD_{U0#oUoj6eJS6C&R$o4OaR9

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala
index 2b4abed645910..4991e397eb11c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala
@@ -150,7 +150,7 @@ class ApproximatePercentileQuerySuite extends QueryTest with SharedSparkSession
       (1 to 1000).toDF("col").createOrReplaceTempView(table)
       checkAnswer(
         spark.sql(s"SELECT percentile_approx(col, array(0.25 + 0.25D), 200 + 800) FROM $table"),
-        Row(Seq(499))
+        Row(Seq(500))
       )
     }
   }
@@ -296,4 +296,23 @@ class ApproximatePercentileQuerySuite extends QueryTest with SharedSparkSession
     buffer.quantileSummaries
     assert(buffer.isCompressed)
   }
+
+  test("SPARK-32908: maximum target error in percentile_approx") {
+    withTempView(table) {
+      spark.read
+        .schema("col int")
+        .csv(testFile("test-data/percentile_approx-input.csv.bz2"))
+        .repartition(1)
+        .createOrReplaceTempView(table)
+      checkAnswer(
+        spark.sql(
+          s"""SELECT
+             |  percentile_approx(col, 0.77, 1000),
+             |  percentile_approx(col, 0.77, 10000),
+             |  percentile_approx(col, 0.77, 100000),
+             |  percentile_approx(col, 0.77, 1000000)
+             |FROM $table""".stripMargin),
+        Row(18, 17, 17, 17))
+    }
+  }
 }

From b49aaa33e13814a448be51a7e65a29cb515b8248 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Thu, 17 Sep 2020 22:07:47 -0700
Subject: [PATCH 0062/1009] [SPARK-32906][SQL] Struct field names should not
 change after normalizing floats

### What changes were proposed in this pull request?

This PR intends to fix a minor bug when normalizing floats for struct types;
```
scala> import org.apache.spark.sql.execution.aggregate.HashAggregateExec
scala> val df = Seq(Tuple1(Tuple1(-0.0d)), Tuple1(Tuple1(0.0d))).toDF("k")
scala> val agg = df.distinct()
scala> agg.explain()
== Physical Plan ==
*(2) HashAggregate(keys=[k#40], functions=[])
+- Exchange hashpartitioning(k#40, 200), true, [id=#62]
   +- *(1) HashAggregate(keys=[knownfloatingpointnormalized(if (isnull(k#40)) null else named_struct(col1, knownfloatingpointnormalized(normalizenanandzero(k#40._1)))) AS k#40], functions=[])
      +- *(1) LocalTableScan [k#40]

scala> val aggOutput = agg.queryExecution.sparkPlan.collect { case a: HashAggregateExec => a.output.head }
scala> aggOutput.foreach { attr => println(attr.prettyJson) }
### Final Aggregate ###
[ {
  "class" : "org.apache.spark.sql.catalyst.expressions.AttributeReference",
  "num-children" : 0,
  "name" : "k",
  "dataType" : {
    "type" : "struct",
    "fields" : [ {
      "name" : "_1",
                ^^^
      "type" : "double",
      "nullable" : false,
      "metadata" : { }
    } ]
  },
  "nullable" : true,
  "metadata" : { },
  "exprId" : {
    "product-class" : "org.apache.spark.sql.catalyst.expressions.ExprId",
    "id" : 40,
    "jvmId" : "a824e83f-933e-4b85-a1ff-577b5a0e2366"
  },
  "qualifier" : [ ]
} ]

### Partial Aggregate ###
[ {
  "class" : "org.apache.spark.sql.catalyst.expressions.AttributeReference",
  "num-children" : 0,
  "name" : "k",
  "dataType" : {
    "type" : "struct",
    "fields" : [ {
      "name" : "col1",
                ^^^^
      "type" : "double",
      "nullable" : true,
      "metadata" : { }
    } ]
  },
  "nullable" : true,
  "metadata" : { },
  "exprId" : {
    "product-class" : "org.apache.spark.sql.catalyst.expressions.ExprId",
    "id" : 40,
    "jvmId" : "a824e83f-933e-4b85-a1ff-577b5a0e2366"
  },
  "qualifier" : [ ]
} ]
```

### Why are the changes needed?

bugfix.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Added tests.

Closes #29780 from maropu/FixBugInNormalizedFloatingNumbers.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Liang-Chi Hsieh <viirya@gmail.com>
---
 .../sql/catalyst/optimizer/NormalizeFloatingNumbers.scala | 6 +++---
 .../org/apache/spark/sql/DataFrameAggregateSuite.scala    | 8 ++++++++
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala
index 10f846cf910f9..bfc36ec477a73 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala
@@ -129,10 +129,10 @@ object NormalizeFloatingNumbers extends Rule[LogicalPlan] {
       Coalesce(children.map(normalize))
 
     case _ if expr.dataType.isInstanceOf[StructType] =>
-      val fields = expr.dataType.asInstanceOf[StructType].fields.indices.map { i =>
-        normalize(GetStructField(expr, i))
+      val fields = expr.dataType.asInstanceOf[StructType].fieldNames.zipWithIndex.map {
+        case (name, i) => Seq(Literal(name), normalize(GetStructField(expr, i)))
       }
-      val struct = CreateStruct(fields)
+      val struct = CreateNamedStruct(fields.flatten.toSeq)
       KnownFloatingPointNormalized(If(IsNull(expr), Literal(null, struct.dataType), struct))
 
     case _ if expr.dataType.isInstanceOf[ArrayType] =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index e954e2bf1c46d..353444b664412 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -1043,6 +1043,14 @@ class DataFrameAggregateSuite extends QueryTest
     checkAnswer(sql(queryTemplate("FIRST")), Row(1))
     checkAnswer(sql(queryTemplate("LAST")), Row(3))
   }
+
+  test("SPARK-32906: struct field names should not change after normalizing floats") {
+    val df = Seq(Tuple1(Tuple2(-0.0d, Double.NaN)), Tuple1(Tuple2(0.0d, Double.NaN))).toDF("k")
+    val aggs = df.distinct().queryExecution.sparkPlan.collect { case a: HashAggregateExec => a }
+    assert(aggs.length == 2)
+    assert(aggs.head.output.map(_.dataType.simpleString).head ===
+      aggs.last.output.map(_.dataType.simpleString).head)
+  }
 }
 
 case class B(c: Option[Double])

From 8b09536cdf5c5477114cc11601c8b68c70408279 Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Fri, 18 Sep 2020 07:06:38 +0000
Subject: [PATCH 0063/1009] [SPARK-27951][SQL] Support ANSI SQL NTH_VALUE
 window function

### What changes were proposed in this pull request?
The `NTH_VALUE` function is an ANSI SQL.
For examples:
```
CREATE TEMPORARY TABLE empsalary (
    depname varchar,
    empno bigint,
    salary int,
    enroll_date date
);

INSERT INTO empsalary VALUES
('develop', 10, 5200, '2007-08-01'),
('sales', 1, 5000, '2006-10-01'),
('personnel', 5, 3500, '2007-12-10'),
('sales', 4, 4800, '2007-08-08'),
('personnel', 2, 3900, '2006-12-23'),
('develop', 7, 4200, '2008-01-01'),
('develop', 9, 4500, '2008-01-01'),
('sales', 3, 4800, '2007-08-01'),
('develop', 8, 6000, '2006-10-01'),
('develop', 11, 5200, '2007-08-15');

select first_value(salary) over(order by salary range between 1000 preceding and 1000 following),
	lead(salary) over(order by salary range between 1000 preceding and 1000 following),
	nth_value(salary, 1) over(order by salary range between 1000 preceding and 1000 following),
	salary from empsalary;
 first_value | lead | nth_value | salary
-------------+------+-----------+--------
        3500 | 3900 |      3500 |   3500
        3500 | 4200 |      3500 |   3900
        3500 | 4500 |      3500 |   4200
        3500 | 4800 |      3500 |   4500
        3900 | 4800 |      3900 |   4800
        3900 | 5000 |      3900 |   4800
        4200 | 5200 |      4200 |   5000
        4200 | 5200 |      4200 |   5200
        4200 | 6000 |      4200 |   5200
        5000 |      |      5000 |   6000
(10 rows)
```

There are some mainstream database support the syntax.

**PostgreSQL:**
https://www.postgresql.org/docs/8.4/functions-window.html

**Vertica:**
https://www.vertica.com/docs/9.2.x/HTML/Content/Authoring/SQLReferenceManual/Functions/Analytic/NTH_VALUEAnalytic.htm?tocpath=SQL%20Reference%20Manual%7CSQL%20Functions%7CAnalytic%20Functions%7C_____23

**Oracle:**
https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/NTH_VALUE.html#GUID-F8A0E88C-67E5-4AA6-9515-95D03A7F9EA0

**Redshift**
https://docs.aws.amazon.com/redshift/latest/dg/r_WF_NTH.html

**Presto**
https://prestodb.io/docs/current/functions/window.html

**MySQL**
https://www.mysqltutorial.org/mysql-window-functions/mysql-nth_value-function/

### Why are the changes needed?
The `NTH_VALUE` function is an ANSI SQL.
The `NTH_VALUE` function is very useful.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Exists and new UT.

Closes #29604 from beliefer/support-nth_value.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: beliefer <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/analysis/FunctionRegistry.scala  |   1 +
 .../expressions/windowExpressions.scala       |  76 +++++-
 .../analysis/AnalysisErrorSuite.scala         |  22 ++
 .../org/apache/spark/sql/functions.scala      |  29 ++
 .../sql-functions/sql-expression-schema.md    |   7 +-
 .../inputs/postgreSQL/window_part1.sql        |   4 +-
 .../inputs/postgreSQL/window_part2.sql        |   4 +-
 .../inputs/postgreSQL/window_part3.sql        |   3 +-
 .../resources/sql-tests/inputs/window.sql     |  94 ++++++-
 .../results/postgreSQL/window_part3.sql.out   |  11 +-
 .../sql-tests/results/window.sql.out          | 251 +++++++++++++++++-
 .../sql/DataFrameWindowFunctionsSuite.scala   |  60 +++++
 12 files changed, 549 insertions(+), 13 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 7e73667e4b85f..f62c8bb0c2931 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -510,6 +510,7 @@ object FunctionRegistry {
     expression[Lag]("lag"),
     expression[RowNumber]("row_number"),
     expression[CumeDist]("cume_dist"),
+    expression[NthValue]("nth_value"),
     expression[NTile]("ntile"),
     expression[Rank]("rank"),
     expression[DenseRank]("dense_rank"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index c8b6433207355..07a2b6fa96c12 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -476,7 +476,7 @@ case class Lag(input: Expression, offset: Expression, default: Expression)
 
 abstract class AggregateWindowFunction extends DeclarativeAggregate with WindowFunction {
   self: Product =>
-  override val frame = SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow)
+  override val frame: WindowFrame = SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow)
   override def dataType: DataType = IntegerType
   override def nullable: Boolean = true
   override lazy val mergeExpressions =
@@ -549,6 +549,80 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction {
   override def prettyName: String = "cume_dist"
 }
 
+@ExpressionDescription(
+  usage = """
+    _FUNC_(input[, offset]) - Returns the value of `input` at the row that is the `offset`th row
+      from beginning of the window frame. Offset starts at 1. If ignoreNulls=true, we will skip
+      nulls when finding the `offset`th row. Otherwise, every row counts for the `offset`. If
+      there is no such an `offset`th row (e.g., when the offset is 10, size of the window frame
+      is less than 10), null is returned.
+  """,
+  arguments = """
+    Arguments:
+      * input - the target column or expression that the function operates on.
+      * offset - a positive int literal to indicate the offset in the window frame. It starts
+          with 1.
+      * ignoreNulls - an optional specification that indicates the NthValue should skip null
+          values in the determination of which row to use.
+  """,
+  since = "3.1.0",
+  group = "window_funcs")
+case class NthValue(input: Expression, offsetExpr: Expression, ignoreNulls: Boolean)
+    extends AggregateWindowFunction with ImplicitCastInputTypes {
+
+  def this(child: Expression, offset: Expression) = this(child, offset, false)
+
+  override def children: Seq[Expression] = input :: offsetExpr :: Nil
+
+  override val frame: WindowFrame = UnspecifiedFrame
+
+  override def dataType: DataType = input.dataType
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType, IntegerType)
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    val check = super.checkInputDataTypes()
+    if (check.isFailure) {
+      check
+    } else if (!offsetExpr.foldable) {
+      TypeCheckFailure(s"Offset expression '$offsetExpr' must be a literal.")
+    } else if (offset <= 0) {
+      TypeCheckFailure(
+        s"The 'offset' argument of nth_value must be greater than zero but it is $offset.")
+    } else {
+      TypeCheckSuccess
+    }
+  }
+
+  private lazy val offset = offsetExpr.eval().asInstanceOf[Int].toLong
+  private lazy val result = AttributeReference("result", input.dataType)()
+  private lazy val count = AttributeReference("count", LongType)()
+  override lazy val aggBufferAttributes: Seq[AttributeReference] = result :: count :: Nil
+
+  override lazy val initialValues: Seq[Literal] = Seq(
+    /* result = */ Literal.create(null, input.dataType),
+    /* count = */ Literal(1L)
+  )
+
+  override lazy val updateExpressions: Seq[Expression] = {
+    if (ignoreNulls) {
+      Seq(
+        /* result = */ If(count === offset && input.isNotNull, input, result),
+        /* count = */ If(input.isNull, count, count + 1L)
+      )
+    } else {
+      Seq(
+        /* result = */ If(count === offset, input, result),
+        /* count = */ count + 1L
+      )
+    }
+  }
+
+  override lazy val evaluateExpression: AttributeReference = result
+
+  override def toString: String = s"$prettyName($input, $offset)${if (ignoreNulls) " ignore nulls"}"
+}
+
 /**
  * The NTile function divides the rows for each window partition into `n` buckets ranging from 1 to
  * at most `n`. Bucket values will differ by at most 1. If the number of rows in the partition does
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index a99f7e2be6e7e..d3a14e511cdc2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -233,6 +233,28 @@ class AnalysisErrorSuite extends AnalysisTest {
           SpecifiedWindowFrame(RangeFrame, Literal(1), Literal(2)))).as("window")),
     "window frame" :: "must match the required frame" :: Nil)
 
+  errorTest(
+    "the offset of nth_value window function is negative or zero",
+    testRelation2.select(
+      WindowExpression(
+        new NthValue(AttributeReference("b", IntegerType)(), Literal(0)),
+        WindowSpecDefinition(
+          UnresolvedAttribute("a") :: Nil,
+          SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
+          SpecifiedWindowFrame(RowFrame, Literal(0), Literal(0)))).as("window")),
+    "The 'offset' argument of nth_value must be greater than zero but it is 0." :: Nil)
+
+  errorTest(
+    "the offset of nth_value window function is not int literal",
+    testRelation2.select(
+      WindowExpression(
+        new NthValue(AttributeReference("b", IntegerType)(), Literal(true)),
+        WindowSpecDefinition(
+          UnresolvedAttribute("a") :: Nil,
+          SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
+          SpecifiedWindowFrame(RowFrame, Literal(0), Literal(0)))).as("window")),
+    "argument 2 requires int type, however, 'true' is of boolean type." :: Nil)
+
   errorTest(
     "too many generators",
     listRelation.select(Explode($"list").as("a"), Explode($"list").as("b")),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 6201492d04b0c..b20e8c241ef9d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -993,6 +993,35 @@ object functions {
     Lead(e.expr, Literal(offset), Literal(defaultValue))
   }
 
+  /**
+   * Window function: returns the value that is the `offset`th row of the window frame
+   * (counting from 1), and `null` if the size of window frame is less than `offset` rows.
+   *
+   * It will return the `offset`th non-null value it sees when ignoreNulls is set to true.
+   * If all values are null, then null is returned.
+   *
+   * This is equivalent to the nth_value function in SQL.
+   *
+   * @group window_funcs
+   * @since 3.1.0
+   */
+  def nth_value(e: Column, offset: Int, ignoreNulls: Boolean): Column = withExpr {
+    NthValue(e.expr, Literal(offset), ignoreNulls)
+  }
+
+  /**
+   * Window function: returns the value that is the `offset`th row of the window frame
+   * (counting from 1), and `null` if the size of window frame is less than `offset` rows.
+   *
+   * This is equivalent to the nth_value function in SQL.
+   *
+   * @group window_funcs
+   * @since 3.1.0
+   */
+  def nth_value(e: Column, offset: Int): Column = withExpr {
+    NthValue(e.expr, Literal(offset), false)
+  }
+
   /**
    * Window function: returns the ntile group id (from 1 to `n` inclusive) in an ordered window
    * partition. For example, if `n` is 4, the first quarter of the rows will get value 1, the second
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 855ba3f00a4e6..45f561a61df78 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -1,8 +1,8 @@
 <!-- Automatically generated by ExpressionsSchemaSuite -->
 ## Summary
-  - Number of queries: 339
-  - Number of expressions that missing example: 34
-  - Expressions missing examples: and,bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint,struct,cume_dist,dense_rank,input_file_block_length,input_file_block_start,input_file_name,lag,lead,monotonically_increasing_id,ntile,!,not,or,percent_rank,rank,row_number,spark_partition_id,version,window,positive,count_min_sketch
+  - Number of queries: 340
+  - Number of expressions that missing example: 35
+  - Expressions missing examples: and,bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint,struct,cume_dist,dense_rank,input_file_block_length,input_file_block_start,input_file_name,lag,lead,monotonically_increasing_id,ntile,!,not,nth_value,or,percent_rank,rank,row_number,spark_partition_id,version,window,positive,count_min_sketch
 ## Schema of Built-in Functions
 | Class name | Function name or alias | Query example | Output schema |
 | ---------- | ---------------------- | ------------- | ------------- |
@@ -191,6 +191,7 @@
 | org.apache.spark.sql.catalyst.expressions.Not | ! | N/A | N/A |
 | org.apache.spark.sql.catalyst.expressions.Not | not | N/A | N/A |
 | org.apache.spark.sql.catalyst.expressions.Now | now | SELECT now() | struct<now():timestamp> |
+| org.apache.spark.sql.catalyst.expressions.NthValue | nth_value | N/A | N/A |
 | org.apache.spark.sql.catalyst.expressions.NullIf | nullif | SELECT nullif(2, 2) | struct<nullif(2, 2):int> |
 | org.apache.spark.sql.catalyst.expressions.Nvl | nvl | SELECT nvl(NULL, array('2')) | struct<nvl(NULL, array(2)):array<string>> |
 | org.apache.spark.sql.catalyst.expressions.Nvl2 | nvl2 | SELECT nvl2(NULL, 2, 1) | struct<nvl2(NULL, 2, 1):int> |
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part1.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part1.sql
index 6e95aca7aff62..d12bee6e47223 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part1.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part1.sql
@@ -95,7 +95,7 @@ SELECT last(ten) OVER (PARTITION BY four), ten, four FROM
 (SELECT * FROM tenk1 WHERE unique2 < 10 ORDER BY four, ten)s
 ORDER BY four, ten;
 
--- [SPARK-27951] ANSI SQL: NTH_VALUE function
+-- [SPARK-30707] Lead/Lag window function throws AnalysisException without ORDER BY clause
 -- SELECT nth_value(ten, four + 1) OVER (PARTITION BY four), ten, four
 -- FROM (SELECT * FROM tenk1 WHERE unique2 < 10 ORDER BY four, ten)s;
 
@@ -301,7 +301,7 @@ FROM tenk1 WHERE unique1 < 10;
 -- unique1, four
 -- FROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four);
 
--- [SPARK-27951] ANSI SQL: NTH_VALUE function
+-- [SPARK-30707] Lead/Lag window function throws AnalysisException without ORDER BY clause
 -- SELECT first_value(unique1) over w,
 -- nth_value(unique1, 2) over w AS nth_2,
 -- last_value(unique1) over w, unique1, four
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part2.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part2.sql
index ba1acc9f56b4a..50c0bc3410312 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part2.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part2.sql
@@ -105,7 +105,7 @@ FROM tenk1 WHERE unique1 < 10;
 -- select sum(salary) over (order by enroll_date range between '1 year' preceding and '1 year' following
 --   exclude ties), salary, enroll_date from empsalary;
 
--- [SPARK-27951] ANSI SQL: NTH_VALUE function
+-- [SPARK-28310] ANSI SQL grammar support: first_value/last_value(expression, [RESPECT NULLS | IGNORE NULLS])
 -- select first_value(salary) over(order by salary range between 1000 preceding and 1000 following),
 -- lead(salary) over(order by salary range between 1000 preceding and 1000 following),
 -- nth_value(salary, 1) over(order by salary range between 1000 preceding and 1000 following),
@@ -116,7 +116,7 @@ FROM tenk1 WHERE unique1 < 10;
 -- lag(salary) over(order by salary range between 1000 preceding and 1000 following),
 -- salary from empsalary;
 
--- [SPARK-27951] ANSI SQL: NTH_VALUE function
+-- [SPARK-28310] ANSI SQL grammar support: first_value/last_value(expression, [RESPECT NULLS | IGNORE NULLS])
 -- select first_value(salary) over(order by salary range between 1000 following and 3000 following
 --   exclude current row),
 -- lead(salary) over(order by salary range between 1000 following and 3000 following exclude ties),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part3.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part3.sql
index f4b8454da0d82..6f33a07631f7a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part3.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part3.sql
@@ -399,8 +399,7 @@ SELECT range(1, 100) OVER () FROM empsalary;
 
 SELECT ntile(0) OVER (ORDER BY ten), ten, four FROM tenk1;
 
--- [SPARK-27951] ANSI SQL: NTH_VALUE function
--- SELECT nth_value(four, 0) OVER (ORDER BY ten), ten, four FROM tenk1;
+SELECT nth_value(four, 0) OVER (ORDER BY ten), ten, four FROM tenk1;
 
 -- filter
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/window.sql b/sql/core/src/test/resources/sql-tests/inputs/window.sql
index 72d812d6a4e49..5de6db210ce36 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/window.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/window.sql
@@ -16,6 +16,26 @@ CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
 (3, 1L, 1.0D, date("2017-08-01"), timestamp_seconds(1501545600), null)
 AS testData(val, val_long, val_double, val_date, val_timestamp, cate);
 
+CREATE OR REPLACE TEMPORARY VIEW basic_pays AS SELECT * FROM VALUES
+('Diane Murphy','Accounting',8435),
+('Mary Patterson','Accounting',9998),
+('Jeff Firrelli','Accounting',8992),
+('William Patterson','Accounting',8870),
+('Gerard Bondur','Accounting',11472),
+('Anthony Bow','Accounting',6627),
+('Leslie Jennings','IT',8113),
+('Leslie Thompson','IT',5186),
+('Julie Firrelli','Sales',9181),
+('Steve Patterson','Sales',9441),
+('Foon Yue Tseng','Sales',6660),
+('George Vanauf','Sales',10563),
+('Loui Bondur','SCM',10449),
+('Gerard Hernandez','SCM',6949),
+('Pamela Castillo','SCM',11303),
+('Larry Bott','SCM',11798),
+('Barry Jones','SCM',10586)
+AS basic_pays(employee_name, department, salary);
+
 -- RowsBetween
 SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val ROWS CURRENT ROW) FROM testData
 ORDER BY cate, val;
@@ -124,4 +144,76 @@ WINDOW w AS (PARTITION BY cate ORDER BY val);
 -- with filter predicate
 SELECT val, cate,
 count(val) FILTER (WHERE val > 1) OVER(PARTITION BY cate)
-FROM testData ORDER BY cate, val;
\ No newline at end of file
+FROM testData ORDER BY cate, val;
+
+-- nth_value() over ()
+SELECT
+    employee_name,
+    salary,
+    nth_value(employee_name, 2) OVER (ORDER BY salary DESC) second_highest_salary
+FROM
+    basic_pays
+ORDER BY salary DESC;
+
+SELECT
+    employee_name,
+    salary,
+    nth_value(employee_name, 2) OVER (
+      ORDER BY salary DESC
+      RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) second_highest_salary
+FROM
+    basic_pays
+ORDER BY salary DESC;
+
+SELECT
+    employee_name,
+    salary,
+    nth_value(employee_name, 2) OVER (
+      ORDER BY salary
+      RANGE BETWEEN 2000 PRECEDING AND 1000 FOLLOWING) second_highest_salary
+FROM
+    basic_pays
+ORDER BY salary;
+
+SELECT
+    employee_name,
+    salary,
+    nth_value(employee_name, 2) OVER (
+      ORDER BY salary DESC
+      ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) second_highest_salary
+FROM
+    basic_pays
+ORDER BY salary DESC;
+
+SELECT
+    employee_name,
+    salary,
+    nth_value(employee_name, 2) OVER (
+      ORDER BY salary DESC
+      RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) second_highest_salary
+FROM
+    basic_pays
+ORDER BY salary DESC;
+
+SELECT
+    employee_name,
+    salary,
+    nth_value(employee_name, 2) OVER (
+      ORDER BY salary DESC
+      RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) second_highest_salary
+FROM
+    basic_pays
+ORDER BY salary DESC;
+
+SELECT
+	employee_name,
+	department,
+	salary,
+	NTH_VALUE(employee_name, 2) OVER  (
+		PARTITION BY department
+		ORDER BY salary DESC
+		RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+	) second_highest_salary
+FROM
+	basic_pays
+ORDER BY department;
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out
index 08eba6797b01d..b63b5601715a8 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 29
+-- Number of queries: 30
 
 
 -- !query
@@ -385,6 +385,15 @@ org.apache.spark.sql.AnalysisException
 cannot resolve 'ntile(0)' due to data type mismatch: Buckets expression must be positive, but got: 0; line 1 pos 7
 
 
+-- !query
+SELECT nth_value(four, 0) OVER (ORDER BY ten), ten, four FROM tenk1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'nthvalue(spark_catalog.default.tenk1.`four`, 0)' due to data type mismatch: The 'offset' argument of nth_value must be greater than zero but it is 0.; line 1 pos 7
+
+
 -- !query
 DROP TABLE empsalary
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out
index ede044a44fdaa..a8875fd449bad 100644
--- a/sql/core/src/test/resources/sql-tests/results/window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 24
+-- Number of queries: 29
 
 
 -- !query
@@ -19,6 +19,30 @@ struct<>
 -- !query output
 
 
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW basic_pays AS SELECT * FROM VALUES
+('Diane Murphy','Accounting',8435),
+('Mary Patterson','Accounting',9998),
+('Jeff Firrelli','Accounting',8992),
+('William Patterson','Accounting',8870),
+('Gerard Bondur','Accounting',11472),
+('Anthony Bow','Accounting',6627),
+('Leslie Jennings','IT',8113),
+('Leslie Thompson','IT',5186),
+('Julie Firrelli','Sales',9181),
+('Steve Patterson','Sales',9441),
+('Foon Yue Tseng','Sales',6660),
+('George Vanauf','Sales',10563),
+('Loui Bondur','SCM',10449),
+('Gerard Hernandez','SCM',6949),
+('Pamela Castillo','SCM',11303),
+('Larry Bott','SCM',11798),
+('Barry Jones','SCM',10586)
+AS basic_pays(employee_name, department, salary)
+-- !query schema
+struct<>
+-- !query output
+
 
 -- !query
 SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val ROWS CURRENT ROW) FROM testData
@@ -391,3 +415,228 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 window aggregate function with filter predicate is not supported yet.;
+
+
+-- !query
+SELECT
+    employee_name,
+    salary,
+    nth_value(employee_name, 2) OVER (ORDER BY salary DESC) second_highest_salary
+FROM
+    basic_pays
+ORDER BY salary DESC
+-- !query schema
+struct<employee_name:string,salary:int,second_highest_salary:string>
+-- !query output
+Larry Bott	11798	NULL
+Gerard Bondur	11472	Gerard Bondur
+Pamela Castillo	11303	Gerard Bondur
+Barry Jones	10586	Gerard Bondur
+George Vanauf	10563	Gerard Bondur
+Loui Bondur	10449	Gerard Bondur
+Mary Patterson	9998	Gerard Bondur
+Steve Patterson	9441	Gerard Bondur
+Julie Firrelli	9181	Gerard Bondur
+Jeff Firrelli	8992	Gerard Bondur
+William Patterson	8870	Gerard Bondur
+Diane Murphy	8435	Gerard Bondur
+Leslie Jennings	8113	Gerard Bondur
+Gerard Hernandez	6949	Gerard Bondur
+Foon Yue Tseng	6660	Gerard Bondur
+Anthony Bow	6627	Gerard Bondur
+Leslie Thompson	5186	Gerard Bondur
+
+
+-- !query
+SELECT
+    employee_name,
+    salary,
+    nth_value(employee_name, 2) OVER (
+      ORDER BY salary DESC
+      RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) second_highest_salary
+FROM
+    basic_pays
+ORDER BY salary DESC
+-- !query schema
+struct<employee_name:string,salary:int,second_highest_salary:string>
+-- !query output
+Larry Bott	11798	NULL
+Gerard Bondur	11472	Gerard Bondur
+Pamela Castillo	11303	Gerard Bondur
+Barry Jones	10586	Gerard Bondur
+George Vanauf	10563	Gerard Bondur
+Loui Bondur	10449	Gerard Bondur
+Mary Patterson	9998	Gerard Bondur
+Steve Patterson	9441	Gerard Bondur
+Julie Firrelli	9181	Gerard Bondur
+Jeff Firrelli	8992	Gerard Bondur
+William Patterson	8870	Gerard Bondur
+Diane Murphy	8435	Gerard Bondur
+Leslie Jennings	8113	Gerard Bondur
+Gerard Hernandez	6949	Gerard Bondur
+Foon Yue Tseng	6660	Gerard Bondur
+Anthony Bow	6627	Gerard Bondur
+Leslie Thompson	5186	Gerard Bondur
+
+
+-- !query
+SELECT
+    employee_name,
+    salary,
+    nth_value(employee_name, 2) OVER (
+      ORDER BY salary
+      RANGE BETWEEN 2000 PRECEDING AND 1000 FOLLOWING) second_highest_salary
+FROM
+    basic_pays
+ORDER BY salary
+-- !query schema
+struct<employee_name:string,salary:int,second_highest_salary:string>
+-- !query output
+Leslie Thompson	5186	NULL
+Anthony Bow	6627	Anthony Bow
+Foon Yue Tseng	6660	Anthony Bow
+Gerard Hernandez	6949	Anthony Bow
+Leslie Jennings	8113	Foon Yue Tseng
+Diane Murphy	8435	Foon Yue Tseng
+William Patterson	8870	Leslie Jennings
+Jeff Firrelli	8992	Diane Murphy
+Julie Firrelli	9181	Diane Murphy
+Steve Patterson	9441	Diane Murphy
+Mary Patterson	9998	Diane Murphy
+Loui Bondur	10449	Jeff Firrelli
+George Vanauf	10563	Jeff Firrelli
+Barry Jones	10586	Jeff Firrelli
+Pamela Castillo	11303	Mary Patterson
+Gerard Bondur	11472	Loui Bondur
+Larry Bott	11798	Loui Bondur
+
+
+-- !query
+SELECT
+    employee_name,
+    salary,
+    nth_value(employee_name, 2) OVER (
+      ORDER BY salary DESC
+      ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) second_highest_salary
+FROM
+    basic_pays
+ORDER BY salary DESC
+-- !query schema
+struct<employee_name:string,salary:int,second_highest_salary:string>
+-- !query output
+Larry Bott	11798	Gerard Bondur
+Gerard Bondur	11472	Gerard Bondur
+Pamela Castillo	11303	Gerard Bondur
+Barry Jones	10586	Pamela Castillo
+George Vanauf	10563	Barry Jones
+Loui Bondur	10449	George Vanauf
+Mary Patterson	9998	Loui Bondur
+Steve Patterson	9441	Mary Patterson
+Julie Firrelli	9181	Steve Patterson
+Jeff Firrelli	8992	Julie Firrelli
+William Patterson	8870	Jeff Firrelli
+Diane Murphy	8435	William Patterson
+Leslie Jennings	8113	Diane Murphy
+Gerard Hernandez	6949	Leslie Jennings
+Foon Yue Tseng	6660	Gerard Hernandez
+Anthony Bow	6627	Foon Yue Tseng
+Leslie Thompson	5186	Anthony Bow
+
+
+-- !query
+SELECT
+    employee_name,
+    salary,
+    nth_value(employee_name, 2) OVER (
+      ORDER BY salary DESC
+      RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) second_highest_salary
+FROM
+    basic_pays
+ORDER BY salary DESC
+-- !query schema
+struct<employee_name:string,salary:int,second_highest_salary:string>
+-- !query output
+Larry Bott	11798	Gerard Bondur
+Gerard Bondur	11472	Pamela Castillo
+Pamela Castillo	11303	Barry Jones
+Barry Jones	10586	George Vanauf
+George Vanauf	10563	Loui Bondur
+Loui Bondur	10449	Mary Patterson
+Mary Patterson	9998	Steve Patterson
+Steve Patterson	9441	Julie Firrelli
+Julie Firrelli	9181	Jeff Firrelli
+Jeff Firrelli	8992	William Patterson
+William Patterson	8870	Diane Murphy
+Diane Murphy	8435	Leslie Jennings
+Leslie Jennings	8113	Gerard Hernandez
+Gerard Hernandez	6949	Foon Yue Tseng
+Foon Yue Tseng	6660	Anthony Bow
+Anthony Bow	6627	Leslie Thompson
+Leslie Thompson	5186	NULL
+
+
+-- !query
+SELECT
+    employee_name,
+    salary,
+    nth_value(employee_name, 2) OVER (
+      ORDER BY salary DESC
+      RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) second_highest_salary
+FROM
+    basic_pays
+ORDER BY salary DESC
+-- !query schema
+struct<employee_name:string,salary:int,second_highest_salary:string>
+-- !query output
+Larry Bott	11798	Gerard Bondur
+Gerard Bondur	11472	Gerard Bondur
+Pamela Castillo	11303	Gerard Bondur
+Barry Jones	10586	Gerard Bondur
+George Vanauf	10563	Gerard Bondur
+Loui Bondur	10449	Gerard Bondur
+Mary Patterson	9998	Gerard Bondur
+Steve Patterson	9441	Gerard Bondur
+Julie Firrelli	9181	Gerard Bondur
+Jeff Firrelli	8992	Gerard Bondur
+William Patterson	8870	Gerard Bondur
+Diane Murphy	8435	Gerard Bondur
+Leslie Jennings	8113	Gerard Bondur
+Gerard Hernandez	6949	Gerard Bondur
+Foon Yue Tseng	6660	Gerard Bondur
+Anthony Bow	6627	Gerard Bondur
+Leslie Thompson	5186	Gerard Bondur
+
+
+-- !query
+SELECT
+	employee_name,
+	department,
+	salary,
+	NTH_VALUE(employee_name, 2) OVER  (
+		PARTITION BY department
+		ORDER BY salary DESC
+		RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+	) second_highest_salary
+FROM
+	basic_pays
+ORDER BY department
+-- !query schema
+struct<employee_name:string,department:string,salary:int,second_highest_salary:string>
+-- !query output
+Gerard Bondur	Accounting	11472	Mary Patterson
+Mary Patterson	Accounting	9998	Mary Patterson
+Jeff Firrelli	Accounting	8992	Mary Patterson
+William Patterson	Accounting	8870	Mary Patterson
+Diane Murphy	Accounting	8435	Mary Patterson
+Anthony Bow	Accounting	6627	Mary Patterson
+Leslie Jennings	IT	8113	Leslie Thompson
+Leslie Thompson	IT	5186	Leslie Thompson
+Larry Bott	SCM	11798	Pamela Castillo
+Pamela Castillo	SCM	11303	Pamela Castillo
+Barry Jones	SCM	10586	Pamela Castillo
+Loui Bondur	SCM	10449	Pamela Castillo
+Gerard Hernandez	SCM	6949	Pamela Castillo
+George Vanauf	Sales	10563	Steve Patterson
+Steve Patterson	Sales	9441	Steve Patterson
+Julie Firrelli	Sales	9181	Steve Patterson
+Foon Yue Tseng	Sales	6660	Steve Patterson
\ No newline at end of file
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index bc6adfb857b02..c5dcdc44cc64f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -541,6 +541,66 @@ class DataFrameWindowFunctionsSuite extends QueryTest
         Row("b", 3, null, null, null, null, null, null)))
   }
 
+  test("nth_value with ignoreNulls") {
+    val nullStr: String = null
+    val df = Seq(
+      ("a", 0, nullStr),
+      ("a", 1, "x"),
+      ("a", 2, "y"),
+      ("a", 3, "z"),
+      ("a", 4, nullStr),
+      ("b", 1, nullStr),
+      ("b", 2, nullStr)).
+      toDF("key", "order", "value")
+    val window = Window.partitionBy($"key").orderBy($"order")
+    checkAnswer(
+      df.select(
+        $"key",
+        $"order",
+        nth_value($"value", 2).over(window),
+        nth_value($"value", 2, ignoreNulls = false).over(window),
+        nth_value($"value", 2, ignoreNulls = true).over(window)),
+      Seq(
+        Row("a", 0, null, null, null),
+        Row("a", 1, "x", "x", null),
+        Row("a", 2, "x", "x", "y"),
+        Row("a", 3, "x", "x", "y"),
+        Row("a", 4, "x", "x", "y"),
+        Row("b", 1, null, null, null),
+        Row("b", 2, null, null, null)))
+  }
+
+  test("nth_value on descending ordered window") {
+    val nullStr: String = null
+    val df = Seq(
+      ("a", 0, nullStr),
+      ("a", 1, "x"),
+      ("a", 2, "y"),
+      ("a", 3, "z"),
+      ("a", 4, "v"),
+      ("b", 1, "k"),
+      ("b", 2, "l"),
+      ("b", 3, nullStr)).
+      toDF("key", "order", "value")
+    val window = Window.partitionBy($"key").orderBy($"order".desc)
+    checkAnswer(
+      df.select(
+        $"key",
+        $"order",
+        nth_value($"value", 2).over(window),
+        nth_value($"value", 2, ignoreNulls = false).over(window),
+        nth_value($"value", 2, ignoreNulls = true).over(window)),
+      Seq(
+        Row("a", 0, "z", "z", "z"),
+        Row("a", 1, "z", "z", "z"),
+        Row("a", 2, "z", "z", "z"),
+        Row("a", 3, "z", "z", "z"),
+        Row("a", 4, null, null, null),
+        Row("b", 1, "l", "l", "k"),
+        Row("b", 2, "l", "l", null),
+        Row("b", 3, null, null, null)))
+  }
+
   test("SPARK-12989 ExtractWindowExpressions treats alias as regular attribute") {
     val src = Seq((0, 3, 5)).toDF("a", "b", "c")
       .withColumn("Data", struct("a", "b"))

From 9e9d4b6994a29fb139fd50d24b5418a900c7f072 Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Fri, 18 Sep 2020 07:41:21 +0000
Subject: [PATCH 0064/1009] [SPARK-32905][CORE][YARN] ApplicationMaster fails
 to receive UpdateDelegationTokens message

### What changes were proposed in this pull request?

With a long-running application in kerberized mode, the AMEndpiont handles `UpdateDelegationTokens` message wrong, which is an OneWayMessage that should be handled in the `receive` function.

```java
20-09-15 18:53:01 INFO yarn.YarnAllocator: Received 22 containers from YARN, launching executors on 0 of them.
20-09-16 12:52:28 ERROR netty.Inbox: Ignoring error
org.apache.spark.SparkException: NettyRpcEndpointRef(spark-client://YarnAM) does not implement 'receive'
	at org.apache.spark.rpc.RpcEndpoint$$anonfun$receive$1.applyOrElse(RpcEndpoint.scala:70)
	at org.apache.spark.rpc.netty.Inbox.$anonfun$process$1(Inbox.scala:115)
	at org.apache.spark.rpc.netty.Inbox.safelyCall(Inbox.scala:203)
	at org.apache.spark.rpc.netty.Inbox.process(Inbox.scala:100)
	at org.apache.spark.rpc.netty.MessageLoop.org$apache$spark$rpc$netty$MessageLoop$$receiveLoop(MessageLoop.scala:75)
	at org.apache.spark.rpc.netty.MessageLoop$$anon$1.run(MessageLoop.scala:41)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)
20-09-17 06:52:28 ERROR netty.Inbox: Ignoring error
org.apache.spark.SparkException: NettyRpcEndpointRef(spark-client://YarnAM) does not implement 'receive'
	at org.apache.spark.rpc.RpcEndpoint$$anonfun$receive$1.applyOrElse(RpcEndpoint.scala:70)
	at org.apache.spark.rpc.netty.Inbox.$anonfun$process$1(Inbox.scala:115)
	at org.apache.spark.rpc.netty.Inbox.safelyCall(Inbox.scala:203)
	at org.apache.spark.rpc.netty.Inbox.process(Inbox.scala:100)
	at org.apache.spark.rpc.netty.MessageLoop.org$apache$spark$rpc$netty$MessageLoop$$receiveLoop(MessageLoop.scala:75)
	at org.apache.spark.rpc.netty.MessageLoop$$anon$1.run(MessageLoop.scala:41)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)
```

### Why are the changes needed?

bugfix, without a proper token refresher, the long-running apps are going to fail potentially in kerberized cluster

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

Passing jenkins

and verify manually

I am running the sub-module `kyuubi-spark-sql-engine` of https://github.com/yaooqinn/kyuubi

The simplest way to reproduce the bug and verify this fix is to follow these steps

#### 1 build the `kyuubi-spark-sql-engine` module
```
mvn clean package -pl :kyuubi-spark-sql-engine
```
#### 2. config the spark with Kerberos settings towards your secured cluster

#### 3. start it in the background
```
nohup bin/spark-submit --class org.apache.kyuubi.engine.spark.SparkSQLEngine ../kyuubi-spark-sql-engine-1.0.0-SNAPSHOT.jar > kyuubi.log &
```

#### 4. check the AM log and see

"Updating delegation tokens ..." for SUCCESS

"Inbox: Ignoring error ...... does not implement 'receive'" for FAILURE

Closes #29777 from yaooqinn/SPARK-32905.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/deploy/yarn/ApplicationMaster.scala  | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 5ca624a8d66cb..5f632fbb259ff 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -779,6 +779,11 @@ private[spark] class ApplicationMaster(
       driver.send(RegisterClusterManager(self))
     }
 
+    override def receive: PartialFunction[Any, Unit] = {
+      case UpdateDelegationTokens(tokens) =>
+        SparkHadoopUtil.get.addDelegationTokens(tokens, sparkConf)
+    }
+
     override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
       case r: RequestExecutors =>
         Option(allocator) match {
@@ -813,9 +818,6 @@ private[spark] class ApplicationMaster(
           case None =>
             logWarning("Container allocator is not ready to find executor loss reasons yet.")
         }
-
-      case UpdateDelegationTokens(tokens) =>
-        SparkHadoopUtil.get.addDelegationTokens(tokens, sparkConf)
     }
 
     override def onDisconnected(remoteAddress: RpcAddress): Unit = {

From 78928879810a2e96dbb6ec4608b548a0072a040f Mon Sep 17 00:00:00 2001
From: William Hyun <williamhyun3@gmail.com>
Date: Fri, 18 Sep 2020 18:13:11 +0900
Subject: [PATCH 0065/1009] [SPARK-32930][CORE] Replace deprecated
 isFile/isDirectory methods

### What changes were proposed in this pull request?

This PR aims to replace deprecated `isFile` and `isDirectory` methods.

```diff
- fs.isDirectory(hadoopPath)
+ fs.getFileStatus(hadoopPath).isDirectory
```

```diff
- fs.isFile(new Path(inProgressLog))
+ fs.getFileStatus(new Path(inProgressLog)).isFile
```

### Why are the changes needed?

It shows deprecation warnings.

- https://amplab.cs.berkeley.edu/jenkins/view/Spark%20QA%20Test%20(Dashboard)/job/spark-master-test-sbt-hadoop-3.2-hive-2.3/1244/consoleFull

```
[warn] /home/jenkins/workspace/spark-master-test-sbt-hadoop-3.2-hive-2.3/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala:815: method isFile in class FileSystem is deprecated: see corresponding Javadoc for more information.
[warn]             if (!fs.isFile(new Path(inProgressLog))) {
```

```
[warn] /home/jenkins/workspace/spark-master-test-sbt-hadoop-3.2-hive-2.3/core/src/main/scala/org/apache/spark/SparkContext.scala:1884: method isDirectory in class FileSystem is deprecated: see corresponding Javadoc for more information.
[warn]           if (fs.isDirectory(hadoopPath)) {
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the Jenkins.

Closes #29796 from williamhyun/filesystem.

Authored-by: William Hyun <williamhyun3@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 core/src/main/scala/org/apache/spark/SparkContext.scala   | 2 +-
 .../spark/deploy/history/EventLogFileWritersSuite.scala   | 6 +++---
 .../apache/spark/sql/hive/execution/HiveDDLSuite.scala    | 2 +-
 .../scala/org/apache/spark/streaming/util/HdfsUtils.scala | 8 ++++++--
 4 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 85a24acb97c07..409e3065492b0 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1881,7 +1881,7 @@ class SparkContext(config: SparkConf) extends Logging {
           if (!fs.exists(hadoopPath)) {
             throw new FileNotFoundException(s"Jar ${path} not found")
           }
-          if (fs.isDirectory(hadoopPath)) {
+          if (fs.getFileStatus(hadoopPath).isDirectory) {
             throw new IllegalArgumentException(
               s"Directory ${path} is not allowed for addJar")
           }
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileWritersSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileWritersSuite.scala
index 060b878fb8ef2..e9b739ce7a4c6 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileWritersSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileWritersSuite.scala
@@ -213,7 +213,7 @@ class SingleEventLogFileWriterSuite extends EventLogFileWritersSuite {
       compressionCodecShortName)
 
     val finalLogPath = new Path(logPath)
-    assert(fileSystem.exists(finalLogPath) && fileSystem.isFile(finalLogPath))
+    assert(fileSystem.exists(finalLogPath) && fileSystem.getFileStatus(finalLogPath).isFile)
     assert(expectedLines === readLinesFromEventLogFile(finalLogPath, fileSystem))
   }
 }
@@ -357,10 +357,10 @@ class RollingEventLogFilesWriterSuite extends EventLogFileWritersSuite {
       expectedLines: Seq[String]): Unit = {
     val logDirPath = getAppEventLogDirPath(logBaseDir, appId, appAttemptId)
 
-    assert(fileSystem.exists(logDirPath) && fileSystem.isDirectory(logDirPath))
+    assert(fileSystem.exists(logDirPath) && fileSystem.getFileStatus(logDirPath).isDirectory)
 
     val appStatusFile = getAppStatusFilePath(logDirPath, appId, appAttemptId, inProgress = false)
-    assert(fileSystem.exists(appStatusFile) && fileSystem.isFile(appStatusFile))
+    assert(fileSystem.exists(appStatusFile) && fileSystem.getFileStatus(appStatusFile).isFile)
 
     val eventLogFiles = listEventLogFiles(logDirPath)
     val allLines = mutable.ArrayBuffer[String]()
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 62b6c6c201c68..44c551cf4a4c1 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -1192,7 +1192,7 @@ class HiveDDLSuite
       expectedDBUri,
       Map.empty))
     // the database directory was created
-    assert(fs.exists(dbPath) && fs.isDirectory(dbPath))
+    assert(fs.exists(dbPath) && fs.getFileStatus(dbPath).isDirectory)
     sql(s"USE $dbName")
 
     val tabName = "tab1"
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala
index 146577214de17..006bcad5d68c2 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala
@@ -58,7 +58,7 @@ private[streaming] object HdfsUtils {
         // If we are really unlucky, the file may be deleted as we're opening the stream.
         // This can happen as clean up is performed by daemon threads that may be left over from
         // previous runs.
-        if (!dfs.isFile(dfsPath)) null else throw e
+        if (!dfs.getFileStatus(dfsPath).isFile) null else throw e
     }
   }
 
@@ -92,6 +92,10 @@ private[streaming] object HdfsUtils {
   def checkFileExists(path: String, conf: Configuration): Boolean = {
     val hdpPath = new Path(path)
     val fs = getFileSystemForPath(hdpPath, conf)
-    fs.isFile(hdpPath)
+    try {
+      fs.getFileStatus(hdpPath).isFile
+    } catch {
+      case _: FileNotFoundException => false
+    }
   }
 }

From 105225ddbc4574a8b79e4a483124a6f998a03bc1 Mon Sep 17 00:00:00 2001
From: Tom van Bussel <tom.vanbussel@databricks.com>
Date: Fri, 18 Sep 2020 11:49:26 +0000
Subject: [PATCH 0066/1009] [SPARK-32911][CORE] Free memory in
 UnsafeExternalSorter.SpillableIterator.spill() when all records have been
 read

### What changes were proposed in this pull request?

This PR changes `UnsafeExternalSorter.SpillableIterator` to free its memory (except for the page holding the last record) if it is forced to spill after all of its records have been read. It also makes sure that `lastPage` is freed if `loadNext` is never called the again. The latter was necessary to get my test case to succeed (otherwise it would complain about a leak).

### Why are the changes needed?

No memory is freed after calling `UnsafeExternalSorter.SpillableIterator.spill()` when all records have been read, even though it is still holding onto some memory. This may cause a `SparkOutOfMemoryError` to be thrown, even though we could have just freed the memory instead.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

A test was added to `UnsafeExternalSorterSuite`.

Closes #29787 from tomvanbussel/SPARK-32911.

Authored-by: Tom van Bussel <tom.vanbussel@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../unsafe/sort/UnsafeExternalSorter.java     | 31 +++++++++++++------
 .../sort/UnsafeExternalSorterSuite.java       | 30 ++++++++++++++++++
 2 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
index 71b9a5bc11542..e4a882d609fc2 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
@@ -503,7 +503,7 @@ class SpillableIterator extends UnsafeSorterIterator {
     private UnsafeSorterIterator upstream;
     private MemoryBlock lastPage = null;
     private boolean loaded = false;
-    private int numRecords = 0;
+    private int numRecords;
 
     private Object currentBaseObject;
     private long currentBaseOffset;
@@ -527,19 +527,25 @@ public long getCurrentPageNumber() {
 
     public long spill() throws IOException {
       synchronized (this) {
-        if (inMemSorter == null || numRecords <= 0) {
+        if (inMemSorter == null) {
           return 0L;
         }
 
         long currentPageNumber = upstream.getCurrentPageNumber();
 
         ShuffleWriteMetrics writeMetrics = new ShuffleWriteMetrics();
-        // Iterate over the records that have not been returned and spill them.
-        final UnsafeSorterSpillWriter spillWriter =
-          new UnsafeSorterSpillWriter(blockManager, fileBufferSizeBytes, writeMetrics, numRecords);
-        spillIterator(upstream, spillWriter);
-        spillWriters.add(spillWriter);
-        upstream = spillWriter.getReader(serializerManager);
+        if (numRecords > 0) {
+          // Iterate over the records that have not been returned and spill them.
+          final UnsafeSorterSpillWriter spillWriter = new UnsafeSorterSpillWriter(
+                  blockManager, fileBufferSizeBytes, writeMetrics, numRecords);
+          spillIterator(upstream, spillWriter);
+          spillWriters.add(spillWriter);
+          upstream = spillWriter.getReader(serializerManager);
+        } else {
+          // Nothing to spill as all records have been read already, but do not return yet, as the
+          // memory still has to be freed.
+          upstream = null;
+        }
 
         long released = 0L;
         synchronized (UnsafeExternalSorter.this) {
@@ -555,6 +561,11 @@ public long spill() throws IOException {
             }
           }
           allocatedPages.clear();
+          if (lastPage != null) {
+            // Add the last page back to the list of allocated pages to make sure it gets freed in
+            // case loadNext() never gets called again.
+            allocatedPages.add(lastPage);
+          }
         }
 
         // in-memory sorter will not be used after spilling
@@ -577,11 +588,12 @@ public boolean hasNext() {
 
     @Override
     public void loadNext() throws IOException {
+      assert upstream != null;
       MemoryBlock pageToFree = null;
       try {
         synchronized (this) {
           loaded = true;
-          // Just consumed the last record from in memory iterator
+          // Just consumed the last record from the in-memory iterator.
           if (lastPage != null) {
             // Do not free the page here, while we are locking `SpillableIterator`. The `freePage`
             // method locks the `TaskMemoryManager`, and it's a bad idea to lock 2 objects in
@@ -589,6 +601,7 @@ public void loadNext() throws IOException {
             // `SpillableIterator` in sequence, which may happen in
             // `TaskMemoryManager.acquireExecutionMemory`.
             pageToFree = lastPage;
+            allocatedPages.clear();
             lastPage = null;
           }
           numRecords--;
diff --git a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
index 087d090c1c60e..a1b66ccfaef03 100644
--- a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
+++ b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
@@ -392,6 +392,36 @@ public void forcedSpillingNullsWithReadIterator() throws Exception {
     assertSpillFilesWereCleanedUp();
   }
 
+  @Test
+  public void forcedSpillingWithFullyReadIterator() throws Exception {
+    final UnsafeExternalSorter sorter = newSorter();
+    long[] record = new long[100];
+    final int recordSize = record.length * 8;
+    final int n = (int) pageSizeBytes / recordSize * 3;
+    for (int i = 0; i < n; i++) {
+      record[0] = i;
+      sorter.insertRecord(record, Platform.LONG_ARRAY_OFFSET, recordSize, 0, false);
+    }
+    assertTrue(sorter.getNumberOfAllocatedPages() >= 2);
+
+    UnsafeExternalSorter.SpillableIterator iter =
+            (UnsafeExternalSorter.SpillableIterator) sorter.getSortedIterator();
+    for (int i = 0; i < n; i++) {
+      assertTrue(iter.hasNext());
+      iter.loadNext();
+      assertEquals(i, Platform.getLong(iter.getBaseObject(), iter.getBaseOffset()));
+    }
+    assertFalse(iter.hasNext());
+
+    assertTrue(iter.spill() > 0);
+    assertEquals(0, iter.spill());
+    assertEquals(n - 1, Platform.getLong(iter.getBaseObject(), iter.getBaseOffset()));
+    assertFalse(iter.hasNext());
+
+    sorter.cleanupResources();
+    assertSpillFilesWereCleanedUp();
+  }
+
   @Test
   public void forcedSpillingWithNotReadIterator() throws Exception {
     final UnsafeExternalSorter sorter = newSorter();

From e2a740147c04a15e4f94c20c6039ed4f6888e0ed Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Fri, 18 Sep 2020 11:55:27 +0000
Subject: [PATCH 0067/1009] 
 [SPARK-32874][SQL][FOLLOWUP][TEST-HIVE1.2][TEST-HADOOP2.7] Fix
 spark-master-test-sbt-hadoop-2.7-hive-1.2

### What changes were proposed in this pull request?

Found via discussion https://github.com/apache/spark/pull/29746#issuecomment-694726504

and the root cause it that hive-1.2 does not recognize NULL

```scala
sbt.ForkMain$ForkError: java.sql.SQLException: Unrecognized column type: NULL
	at org.apache.hive.jdbc.JdbcColumn.typeStringToHiveType(JdbcColumn.java:160)
	at org.apache.hive.jdbc.HiveResultSetMetaData.getHiveType(HiveResultSetMetaData.java:48)
	at org.apache.hive.jdbc.HiveResultSetMetaData.getPrecision(HiveResultSetMetaData.java:86)
	at org.apache.spark.sql.hive.thriftserver.SparkThriftServerProtocolVersionsSuite.$anonfun$new$35(SparkThriftServerProtocolVersionsSuite.scala:358)
	at org.apache.spark.sql.hive.thriftserver.SparkThriftServerProtocolVersionsSuite.$anonfun$new$35$adapted(SparkThriftServerProtocolVersionsSuite.scala:351)
	at org.apache.spark.sql.hive.thriftserver.SparkThriftServerProtocolVersionsSuite.testExecuteStatementWithProtocolVersion(SparkThriftServerProtocolVersionsSuite.scala:66)
	at org.apache.spark.sql.hive.thriftserver.SparkThriftServerProtocolVersionsSuite.$anonfun$new$34(SparkThriftServerProtocolVersionsSuite.scala:351)
	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
	at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85)
	at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83)
	at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
	at org.scalatest.Transformer.apply(Transformer.scala:22)
	at org.scalatest.Transformer.apply(Transformer.scala:20)
	at org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:189)
	at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:176)
	at org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:187)
	at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest$1(AnyFunSuiteLike.scala:199)
	at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306)
	at org.scalatest.funsuite.AnyFunSuiteLike.runTest(AnyFunSuiteLike.scala:199)
	at org.scalatest.funsuite.AnyFunSuiteLike.runTest$(AnyFunSuiteLike.scala:181)
	at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(SparkFunSuite.scala:61)
	at org.scalatest.BeforeAndAfterEach.runTest(BeforeAndAfterEach.scala:234)
	at org.scalatest.BeforeAndAfterEach.runTest$(BeforeAndAfterEach.scala:227)
	at org.apache.spark.SparkFunSuite.runTest(SparkFunSuite.scala:61)
	at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTests$1(AnyFunSuiteLike.scala:232)
	at org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:413)
	at scala.collection.immutable.List.foreach(List.scala:392)
	at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401)
	at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:396)
	at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:475)
	at org.scalatest.funsuite.AnyFunSuiteLike.runTests(AnyFunSuiteLike.scala:232)
	at org.scalatest.funsuite.AnyFunSuiteLike.runTests$(AnyFunSuiteLike.scala:231)
	at org.scalatest.funsuite.AnyFunSuite.runTests(AnyFunSuite.scala:1562)
	at org.scalatest.Suite.run(Suite.scala:1112)
	at org.scalatest.Suite.run$(Suite.scala:1094)
	at org.scalatest.funsuite.AnyFunSuite.org$scalatest$funsuite$AnyFunSuiteLike$$super$run(AnyFunSuite.scala:1562)
	at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$run$1(AnyFunSuiteLike.scala:236)
	at org.scalatest.SuperEngine.runImpl(Engine.scala:535)
	at org.scalatest.funsuite.AnyFunSuiteLike.run(AnyFunSuiteLike.scala:236)
	at org.scalatest.funsuite.AnyFunSuiteLike.run$(AnyFunSuiteLike.scala:235)
	at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:61)
	at org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213)
	at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210)
	at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208)
	at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:61)
	at org.scalatest.tools.Framework.org$scalatest$tools$Framework$$runSuite(Framework.scala:318)
	at org.scalatest.tools.Framework$ScalaTestTask.execute(Framework.scala:513)
	at sbt.ForkMain$Run$2.call(ForkMain.java:296)
	at sbt.ForkMain$Run$2.call(ForkMain.java:286)
	at java.util.concurrent.FutureTask.run(FutureTask.java:266)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)
```

In this PR, we simply ignore these checks for hive 1.2

### Why are the changes needed?

fix jenkins

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

test itself.

Closes #29803 from yaooqinn/SPARK-32874-F.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../SparkThriftServerProtocolVersionsSuite.scala         | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala
index 69486eeb031b1..fa001b11253f5 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala
@@ -27,6 +27,7 @@ import org.apache.thrift.protocol.TBinaryProtocol
 import org.apache.thrift.transport.TSocket
 
 import org.apache.spark.sql.catalyst.util.NumberConverter
+import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.unsafe.types.UTF8String
 
 class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest {
@@ -355,8 +356,12 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest {
         assert(metaData.getColumnName(1) === "NULL")
         assert(metaData.getColumnTypeName(1) === "void")
         assert(metaData.getColumnType(1) === java.sql.Types.NULL)
-        assert(metaData.getPrecision(1) === 0)
-        assert(metaData.getScale(1) === 0)
+        if (HiveUtils.isHive23) {
+          // For Hive 1.2 the o.a.h.j.JdbcColumn.typeStringToHiveType can not recognize `null` as
+          // type name.
+          assert(metaData.getPrecision(1) === 0)
+          assert(metaData.getScale(1) === 0)
+        }
       }
     }
 

From 664a1719de2855d913c3bb1d2a94bd8681bc1a0d Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Fri, 18 Sep 2020 22:24:33 +0900
Subject: [PATCH 0068/1009] [SPARK-32936][SQL] Pass all `external/avro` module
 UTs in Scala 2.13

### What changes were proposed in this pull request?
This pr fix all 14 failed cases in `external/avro` module in Scala 2.13, the main change of this pr as follow:

- Manual call `toSeq` in `AvroDeserializer#newWriter` and `SchemaConverters#toSqlTypeHelper` method because the object  type for case match is `ArrayBuffer` not `Seq` in Scala 2.13

- Specified `Seq` to `s.c.Seq` when we call `Row.get(i).asInstanceOf[Seq]` because the data maybe `mutable.ArraySeq` but `Seq` is `immutable.Seq` in Scala 2.13

### Why are the changes needed?
We need to support a Scala 2.13 build.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

- Scala 2.12: Pass the Jenkins or GitHub Action

- Scala 2.13: Pass 2.13 Build GitHub Action and do the following:

```
dev/change-scala-version.sh 2.13
mvn clean install -DskipTests  -pl external/avro -Pscala-2.13 -am
mvn clean test -pl external/avro -Pscala-2.13
```

**Before**
```
Tests: succeeded 197, failed 14, canceled 0, ignored 2, pending 0
*** 14 TESTS FAILED ***
```

**After**

```
Tests: succeeded 211, failed 0, canceled 0, ignored 2, pending 0
All tests passed.
```

Closes #29801 from LuciferYang/fix-external-avro-213.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../scala/org/apache/spark/sql/avro/AvroDeserializer.scala     | 2 +-
 .../scala/org/apache/spark/sql/avro/SchemaConverters.scala     | 2 +-
 .../src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala   | 3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
index 360a7fcff4363..aabf9d92ce7d8 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
@@ -256,7 +256,7 @@ private[sql] class AvroDeserializer(
           if (nonNullTypes.length == 1) {
             newWriter(nonNullTypes.head, catalystType, path)
           } else {
-            nonNullTypes.map(_.getType) match {
+            nonNullTypes.map(_.getType).toSeq match {
               case Seq(a, b) if Set(a, b) == Set(INT, LONG) && catalystType == LongType =>
                 (updater, ordinal, value) => value match {
                   case null => updater.setNullAt(ordinal)
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
index 27d5871070608..905f90fa79373 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
@@ -118,7 +118,7 @@ object SchemaConverters {
             toSqlTypeHelper(Schema.createUnion(remainingUnionTypes.asJava), existingRecordNames)
               .copy(nullable = true)
           }
-        } else avroSchema.getTypes.asScala.map(_.getType) match {
+        } else avroSchema.getTypes.asScala.map(_.getType).toSeq match {
           case Seq(t1) =>
             toSqlTypeHelper(avroSchema.getTypes.get(0), existingRecordNames)
           case Seq(t1, t2) if Set(t1, t2) == Set(INT, LONG) =>
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index 8a8a7681abd1c..b995a667be2b1 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -543,7 +543,8 @@ abstract class AvroSuite extends QueryTest with SharedSparkSession with NestedDa
 
     val array_of_boolean =
       spark.read.format("avro").load(testAvro).select("array_of_boolean").collect()
-    assert(array_of_boolean.map(_(0).asInstanceOf[Seq[Boolean]].size).toSet == Set(3, 1, 0))
+    assert(array_of_boolean.map(_(0).asInstanceOf[scala.collection.Seq[Boolean]].size).toSet ==
+      Set(3, 1, 0))
 
     val bytes = spark.read.format("avro").load(testAvro).select("bytes").collect()
     assert(bytes.map(_(0).asInstanceOf[Array[Byte]].length).toSet == Set(3, 1, 0))

From 2128c4f14b498e3bc98e79f0dd42d9023e718112 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Fri, 18 Sep 2020 10:38:30 -0500
Subject: [PATCH 0069/1009] [SPARK-32808][SQL] Pass all test of sql/core module
 in Scala 2.13

### What changes were proposed in this pull request?

After https://github.com/apache/spark/pull/29660 and https://github.com/apache/spark/pull/29689 there are 13 remaining  failed cases of sql core module with Scala 2.13.

The reason for the remaining failed cases is the optimization result of `CostBasedJoinReorder` maybe different with same input in Scala 2.12 and Scala 2.13 if there are more than one same cost candidate plans.

In this pr give a way to make the  optimization result deterministic as much as possible to pass all remaining failed cases of `sql/core` module in Scala 2.13, the main change of this pr as follow:

- Change to use `LinkedHashMap` instead of `Map` to store `foundPlans` in `JoinReorderDP.search` method to ensure same iteration order with same insert order because iteration order of `Map` behave differently under Scala 2.12 and 2.13

- Fixed `StarJoinCostBasedReorderSuite` affected by the above change

- Regenerate golden files affected by the above change.

### Why are the changes needed?
We need to support a Scala 2.13 build.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

- Scala 2.12: Pass the Jenkins or GitHub Action

- Scala 2.13: All tests passed.

Do the following:

```
dev/change-scala-version.sh 2.13
mvn clean install -DskipTests  -pl sql/core -Pscala-2.13 -am
mvn test -pl sql/core -Pscala-2.13
```

**Before**
```
Tests: succeeded 8485, failed 13, canceled 1, ignored 52, pending 0
*** 13 TESTS FAILED ***

```

**After**

```
Tests: succeeded 8498, failed 0, canceled 1, ignored 52, pending 0
All tests passed.
```

Closes #29711 from LuciferYang/SPARK-32808-3.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../optimizer/CostBasedJoinReorder.scala      |  19 +-
 .../StarJoinCostBasedReorderSuite.scala       |   2 +-
 .../q27.sf100/explain.txt                     | 210 +++++------
 .../q27.sf100/simplified.txt                  |  22 +-
 .../q7.sf100/explain.txt                      | 108 +++---
 .../q7.sf100/simplified.txt                   |  10 +-
 .../approved-plans-v1_4/q13.sf100/explain.txt | 112 +++---
 .../q13.sf100/simplified.txt                  |  12 +-
 .../approved-plans-v1_4/q17.sf100/explain.txt | 120 +++----
 .../q17.sf100/simplified.txt                  |  10 +-
 .../approved-plans-v1_4/q19.sf100/explain.txt | 204 +++++------
 .../q19.sf100/simplified.txt                  |  36 +-
 .../q24a.sf100/explain.txt                    |  94 ++---
 .../q24a.sf100/simplified.txt                 |  18 +-
 .../q24b.sf100/explain.txt                    |  94 ++---
 .../q24b.sf100/simplified.txt                 |  18 +-
 .../approved-plans-v1_4/q25.sf100/explain.txt | 120 +++----
 .../q25.sf100/simplified.txt                  |  10 +-
 .../approved-plans-v1_4/q29.sf100/explain.txt | 118 +++----
 .../q29.sf100/simplified.txt                  |  10 +-
 .../approved-plans-v1_4/q31.sf100/explain.txt |  40 +--
 .../q31.sf100/simplified.txt                  |  12 +-
 .../approved-plans-v1_4/q45.sf100/explain.txt | 102 +++---
 .../q45.sf100/simplified.txt                  |  20 +-
 .../approved-plans-v1_4/q50.sf100/explain.txt | 104 +++---
 .../q50.sf100/simplified.txt                  |  10 +-
 .../approved-plans-v1_4/q6.sf100/explain.txt  | 224 ++++++------
 .../q6.sf100/simplified.txt                   |  74 ++--
 .../approved-plans-v1_4/q61.sf100/explain.txt | 127 +++----
 .../q61.sf100/simplified.txt                  |  17 +-
 .../approved-plans-v1_4/q62.sf100/explain.txt | 108 +++---
 .../q62.sf100/simplified.txt                  |  10 +-
 .../approved-plans-v1_4/q66.sf100/explain.txt | 136 +++----
 .../q66.sf100/simplified.txt                  |  16 +-
 .../approved-plans-v1_4/q72.sf100/explain.txt | 334 +++++++++---------
 .../q72.sf100/simplified.txt                  |  34 +-
 .../approved-plans-v1_4/q80.sf100/explain.txt |  98 ++---
 .../q80.sf100/simplified.txt                  |  38 +-
 .../approved-plans-v1_4/q84.sf100/explain.txt |  86 ++---
 .../q84.sf100/simplified.txt                  |  10 +-
 .../approved-plans-v1_4/q85.sf100/explain.txt | 304 ++++++++--------
 .../q85.sf100/simplified.txt                  |  44 +--
 .../approved-plans-v1_4/q91.sf100/explain.txt | 202 +++++------
 .../q91.sf100/simplified.txt                  |  26 +-
 .../approved-plans-v1_4/q99.sf100/explain.txt | 108 +++---
 .../q99.sf100/simplified.txt                  |  10 +-
 .../approved-plans-v2_7/q6.sf100/explain.txt  | 224 ++++++------
 .../q6.sf100/simplified.txt                   |  74 ++--
 .../approved-plans-v2_7/q72.sf100/explain.txt | 334 +++++++++---------
 .../q72.sf100/simplified.txt                  |  34 +-
 .../q80a.sf100/explain.txt                    |  98 ++---
 .../q80a.sf100/simplified.txt                 |  38 +-
 52 files changed, 2204 insertions(+), 2239 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
index a64e8bcd68175..8b019f35263f3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
@@ -150,9 +150,16 @@ object JoinReorderDP extends PredicateHelper with Logging {
     // Level i maintains all found plans for i + 1 items.
     // Create the initial plans: each plan is a single item with zero cost.
     val itemIndex = items.zipWithIndex
-    val foundPlans = mutable.Buffer[JoinPlanMap](itemIndex.map {
-      case (item, id) => Set(id) -> JoinPlan(Set(id), item, ExpressionSet(), Cost(0, 0))
-    }.toMap)
+    val foundPlans = mutable.Buffer[JoinPlanMap]({
+      // SPARK-32687: Change to use `LinkedHashMap` to make sure that items are
+      // inserted and iterated in the same order.
+      val joinPlanMap = new JoinPlanMap
+      itemIndex.foreach {
+        case (item, id) =>
+          joinPlanMap.put(Set(id), JoinPlan(Set(id), item, ExpressionSet(), Cost(0, 0)))
+      }
+      joinPlanMap
+    })
 
     // Build filters from the join graph to be used by the search algorithm.
     val filters = JoinReorderDPFilters.buildJoinGraphInfo(conf, items, conditions, itemIndex)
@@ -198,7 +205,7 @@ object JoinReorderDP extends PredicateHelper with Logging {
       topOutput: AttributeSet,
       filters: Option[JoinGraphInfo]): JoinPlanMap = {
 
-    val nextLevel = mutable.Map.empty[Set[Int], JoinPlan]
+    val nextLevel = new JoinPlanMap
     var k = 0
     val lev = existingLevels.length - 1
     // Build plans for the next level from plans at level k (one side of the join) and level
@@ -231,7 +238,7 @@ object JoinReorderDP extends PredicateHelper with Logging {
       }
       k += 1
     }
-    nextLevel.toMap
+    nextLevel
   }
 
   /**
@@ -316,7 +323,7 @@ object JoinReorderDP extends PredicateHelper with Logging {
   }
 
   /** Map[set of item ids, join plan for these items] */
-  type JoinPlanMap = Map[Set[Int], JoinPlan]
+  type JoinPlanMap = mutable.LinkedHashMap[Set[Int], JoinPlan]
 
   /**
    * Partial join order in a specific level.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/StarJoinCostBasedReorderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/StarJoinCostBasedReorderSuite.scala
index d9cf629b47c18..703be48c6a2a9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/StarJoinCostBasedReorderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/StarJoinCostBasedReorderSuite.scala
@@ -345,8 +345,8 @@ class StarJoinCostBasedReorderSuite extends JoinReorderPlanTestBase with StatsEs
 
     val expected =
       f1.join(d3, Inner, Some(nameToAttr("f1_fk3") === nameToAttr("d3_pk")))
-        .join(d1, Inner, Some(nameToAttr("f1_fk1") === nameToAttr("d1_pk")))
         .join(d2, Inner, Some(nameToAttr("f1_fk2") === nameToAttr("d2_pk")))
+        .join(d1, Inner, Some(nameToAttr("f1_fk1") === nameToAttr("d1_pk")))
         .join(t4.join(t3, Inner, Some(nameToAttr("t3_c2") === nameToAttr("t4_c2"))), Inner,
           Some(nameToAttr("d1_c2") === nameToAttr("t3_c1")))
         .join(t2.join(t1, Inner, Some(nameToAttr("t1_c2") === nameToAttr("t2_c2"))), Inner,
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/explain.txt
index fa01042350149..b3b11b60ded0b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/explain.txt
@@ -11,15 +11,15 @@ TakeOrderedAndProject (77)
    :              :     :- * Project (17)
    :              :     :  +- * BroadcastHashJoin Inner BuildRight (16)
    :              :     :     :- * Project (10)
-   :              :     :     :  +- * BroadcastHashJoin Inner BuildLeft (9)
-   :              :     :     :     :- BroadcastExchange (5)
-   :              :     :     :     :  +- * Project (4)
-   :              :     :     :     :     +- * Filter (3)
-   :              :     :     :     :        +- * ColumnarToRow (2)
-   :              :     :     :     :           +- Scan parquet default.date_dim (1)
-   :              :     :     :     +- * Filter (8)
-   :              :     :     :        +- * ColumnarToRow (7)
-   :              :     :     :           +- Scan parquet default.store_sales (6)
+   :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
+   :              :     :     :     :- * Filter (3)
+   :              :     :     :     :  +- * ColumnarToRow (2)
+   :              :     :     :     :     +- Scan parquet default.store_sales (1)
+   :              :     :     :     +- BroadcastExchange (8)
+   :              :     :     :        +- * Project (7)
+   :              :     :     :           +- * Filter (6)
+   :              :     :     :              +- * ColumnarToRow (5)
+   :              :     :     :                 +- Scan parquet default.date_dim (4)
    :              :     :     +- BroadcastExchange (15)
    :              :     :        +- * Project (14)
    :              :     :           +- * Filter (13)
@@ -43,11 +43,11 @@ TakeOrderedAndProject (77)
    :              :     :- * Project (45)
    :              :     :  +- * BroadcastHashJoin Inner BuildRight (44)
    :              :     :     :- * Project (38)
-   :              :     :     :  +- * BroadcastHashJoin Inner BuildLeft (37)
-   :              :     :     :     :- ReusedExchange (33)
-   :              :     :     :     +- * Filter (36)
-   :              :     :     :        +- * ColumnarToRow (35)
-   :              :     :     :           +- Scan parquet default.store_sales (34)
+   :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (37)
+   :              :     :     :     :- * Filter (35)
+   :              :     :     :     :  +- * ColumnarToRow (34)
+   :              :     :     :     :     +- Scan parquet default.store_sales (33)
+   :              :     :     :     +- ReusedExchange (36)
    :              :     :     +- BroadcastExchange (43)
    :              :     :        +- * Project (42)
    :              :     :           +- * Filter (41)
@@ -65,11 +65,11 @@ TakeOrderedAndProject (77)
                   :     :- * Project (63)
                   :     :  +- * BroadcastHashJoin Inner BuildRight (62)
                   :     :     :- * Project (60)
-                  :     :     :  +- * BroadcastHashJoin Inner BuildLeft (59)
-                  :     :     :     :- ReusedExchange (55)
-                  :     :     :     +- * Filter (58)
-                  :     :     :        +- * ColumnarToRow (57)
-                  :     :     :           +- Scan parquet default.store_sales (56)
+                  :     :     :  +- * BroadcastHashJoin Inner BuildRight (59)
+                  :     :     :     :- * Filter (57)
+                  :     :     :     :  +- * ColumnarToRow (56)
+                  :     :     :     :     +- Scan parquet default.store_sales (55)
+                  :     :     :     +- ReusedExchange (58)
                   :     :     +- ReusedExchange (61)
                   :     +- ReusedExchange (64)
                   +- BroadcastExchange (70)
@@ -78,50 +78,50 @@ TakeOrderedAndProject (77)
                            +- Scan parquet default.item (67)
 
 
-(1) Scan parquet default.date_dim
-Output [2]: [d_date_sk#1, d_year#2]
+(1) Scan parquet default.store_sales
+Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), GreaterThanOrEqual(d_date_sk,2451545), LessThanOrEqual(d_date_sk,2451910), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int>
+Location [not included in comparison]/{warehouse_dir}/store_sales]
+PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451545), LessThanOrEqual(ss_sold_date_sk,2451910), IsNotNull(ss_cdemo_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)]
+ReadSchema: struct<ss_sold_date_sk:int,ss_item_sk:int,ss_cdemo_sk:int,ss_store_sk:int,ss_quantity:int,ss_list_price:decimal(7,2),ss_sales_price:decimal(7,2),ss_coupon_amt:decimal(7,2)>
 
-(2) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#1, d_year#2]
+(2) ColumnarToRow [codegen id : 5]
+Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8]
 
-(3) Filter [codegen id : 1]
-Input [2]: [d_date_sk#1, d_year#2]
-Condition : ((((isnotnull(d_year#2) AND (d_year#2 = 2000)) AND (d_date_sk#1 >= 2451545)) AND (d_date_sk#1 <= 2451910)) AND isnotnull(d_date_sk#1))
+(3) Filter [codegen id : 5]
+Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8]
+Condition : (((((isnotnull(ss_sold_date_sk#1) AND (ss_sold_date_sk#1 >= 2451545)) AND (ss_sold_date_sk#1 <= 2451910)) AND isnotnull(ss_cdemo_sk#3)) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_item_sk#2))
 
-(4) Project [codegen id : 1]
-Output [1]: [d_date_sk#1]
-Input [2]: [d_date_sk#1, d_year#2]
+(4) Scan parquet default.date_dim
+Output [2]: [d_date_sk#9, d_year#10]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), GreaterThanOrEqual(d_date_sk,2451545), LessThanOrEqual(d_date_sk,2451910), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(5) BroadcastExchange
-Input [1]: [d_date_sk#1]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3]
+(5) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#9, d_year#10]
 
-(6) Scan parquet default.store_sales
-Output [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/store_sales]
-PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451545), LessThanOrEqual(ss_sold_date_sk,2451910), IsNotNull(ss_cdemo_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)]
-ReadSchema: struct<ss_sold_date_sk:int,ss_item_sk:int,ss_cdemo_sk:int,ss_store_sk:int,ss_quantity:int,ss_list_price:decimal(7,2),ss_sales_price:decimal(7,2),ss_coupon_amt:decimal(7,2)>
+(6) Filter [codegen id : 1]
+Input [2]: [d_date_sk#9, d_year#10]
+Condition : ((((isnotnull(d_year#10) AND (d_year#10 = 2000)) AND (d_date_sk#9 >= 2451545)) AND (d_date_sk#9 <= 2451910)) AND isnotnull(d_date_sk#9))
 
-(7) ColumnarToRow
-Input [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11]
+(7) Project [codegen id : 1]
+Output [1]: [d_date_sk#9]
+Input [2]: [d_date_sk#9, d_year#10]
 
-(8) Filter
-Input [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11]
-Condition : (((((isnotnull(ss_sold_date_sk#4) AND (ss_sold_date_sk#4 >= 2451545)) AND (ss_sold_date_sk#4 <= 2451910)) AND isnotnull(ss_cdemo_sk#6)) AND isnotnull(ss_store_sk#7)) AND isnotnull(ss_item_sk#5))
+(8) BroadcastExchange
+Input [1]: [d_date_sk#9]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11]
 
 (9) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [d_date_sk#1]
-Right keys [1]: [ss_sold_date_sk#4]
+Left keys [1]: [ss_sold_date_sk#1]
+Right keys [1]: [d_date_sk#9]
 Join condition: None
 
 (10) Project [codegen id : 5]
-Output [7]: [ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11]
-Input [9]: [d_date_sk#1, ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11]
+Output [7]: [ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8]
+Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, d_date_sk#9]
 
 (11) Scan parquet default.customer_demographics
 Output [4]: [cd_demo_sk#12, cd_gender#13, cd_marital_status#14, cd_education_status#15]
@@ -146,13 +146,13 @@ Input [1]: [cd_demo_sk#12]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16]
 
 (16) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [ss_cdemo_sk#6]
+Left keys [1]: [ss_cdemo_sk#3]
 Right keys [1]: [cd_demo_sk#12]
 Join condition: None
 
 (17) Project [codegen id : 5]
-Output [6]: [ss_item_sk#5, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11]
-Input [8]: [ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, cd_demo_sk#12]
+Output [6]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8]
+Input [8]: [ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, cd_demo_sk#12]
 
 (18) Scan parquet default.store
 Output [2]: [s_store_sk#17, s_state#18]
@@ -173,13 +173,13 @@ Input [2]: [s_store_sk#17, s_state#18]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19]
 
 (22) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [ss_store_sk#7]
+Left keys [1]: [ss_store_sk#4]
 Right keys [1]: [s_store_sk#17]
 Join condition: None
 
 (23) Project [codegen id : 5]
-Output [6]: [ss_item_sk#5, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, s_state#18]
-Input [8]: [ss_item_sk#5, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, s_store_sk#17, s_state#18]
+Output [6]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_state#18]
+Input [8]: [ss_item_sk#2, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_store_sk#17, s_state#18]
 
 (24) Scan parquet default.item
 Output [2]: [i_item_sk#20, i_item_id#21]
@@ -200,13 +200,13 @@ Input [2]: [i_item_sk#20, i_item_id#21]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22]
 
 (28) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [ss_item_sk#5]
+Left keys [1]: [ss_item_sk#2]
 Right keys [1]: [i_item_sk#20]
 Join condition: None
 
 (29) Project [codegen id : 5]
-Output [6]: [i_item_id#21, s_state#18, ss_quantity#8 AS agg1#23, ss_list_price#9 AS agg2#24, ss_coupon_amt#11 AS agg3#25, ss_sales_price#10 AS agg4#26]
-Input [8]: [ss_item_sk#5, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, s_state#18, i_item_sk#20, i_item_id#21]
+Output [6]: [i_item_id#21, s_state#18, ss_quantity#5 AS agg1#23, ss_list_price#6 AS agg2#24, ss_coupon_amt#8 AS agg3#25, ss_sales_price#7 AS agg4#26]
+Input [8]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_state#18, i_item_sk#20, i_item_id#21]
 
 (30) HashAggregate [codegen id : 5]
 Input [6]: [i_item_id#21, s_state#18, agg1#23, agg2#24, agg3#25, agg4#26]
@@ -226,31 +226,31 @@ Functions [4]: [avg(cast(agg1#23 as bigint)), avg(UnscaledValue(agg2#24)), avg(U
 Aggregate Attributes [4]: [avg(cast(agg1#23 as bigint))#44, avg(UnscaledValue(agg2#24))#45, avg(UnscaledValue(agg3#25))#46, avg(UnscaledValue(agg4#26))#47]
 Results [7]: [i_item_id#21, s_state#18, 0 AS g_state#48, avg(cast(agg1#23 as bigint))#44 AS agg1#49, cast((avg(UnscaledValue(agg2#24))#45 / 100.0) as decimal(11,6)) AS agg2#50, cast((avg(UnscaledValue(agg3#25))#46 / 100.0) as decimal(11,6)) AS agg3#51, cast((avg(UnscaledValue(agg4#26))#47 / 100.0) as decimal(11,6)) AS agg4#52]
 
-(33) ReusedExchange [Reuses operator id: 5]
-Output [1]: [d_date_sk#1]
-
-(34) Scan parquet default.store_sales
-Output [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11]
+(33) Scan parquet default.store_sales
+Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
 PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451545), LessThanOrEqual(ss_sold_date_sk,2451910), IsNotNull(ss_cdemo_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)]
 ReadSchema: struct<ss_sold_date_sk:int,ss_item_sk:int,ss_cdemo_sk:int,ss_store_sk:int,ss_quantity:int,ss_list_price:decimal(7,2),ss_sales_price:decimal(7,2),ss_coupon_amt:decimal(7,2)>
 
-(35) ColumnarToRow
-Input [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11]
+(34) ColumnarToRow [codegen id : 11]
+Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8]
+
+(35) Filter [codegen id : 11]
+Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8]
+Condition : (((((isnotnull(ss_sold_date_sk#1) AND (ss_sold_date_sk#1 >= 2451545)) AND (ss_sold_date_sk#1 <= 2451910)) AND isnotnull(ss_cdemo_sk#3)) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_item_sk#2))
 
-(36) Filter
-Input [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11]
-Condition : (((((isnotnull(ss_sold_date_sk#4) AND (ss_sold_date_sk#4 >= 2451545)) AND (ss_sold_date_sk#4 <= 2451910)) AND isnotnull(ss_cdemo_sk#6)) AND isnotnull(ss_store_sk#7)) AND isnotnull(ss_item_sk#5))
+(36) ReusedExchange [Reuses operator id: 8]
+Output [1]: [d_date_sk#9]
 
 (37) BroadcastHashJoin [codegen id : 11]
-Left keys [1]: [d_date_sk#1]
-Right keys [1]: [ss_sold_date_sk#4]
+Left keys [1]: [ss_sold_date_sk#1]
+Right keys [1]: [d_date_sk#9]
 Join condition: None
 
 (38) Project [codegen id : 11]
-Output [7]: [ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11]
-Input [9]: [d_date_sk#1, ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11]
+Output [7]: [ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8]
+Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, d_date_sk#9]
 
 (39) Scan parquet default.store
 Output [2]: [s_store_sk#17, s_state#18]
@@ -275,37 +275,37 @@ Input [1]: [s_store_sk#17]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#53]
 
 (44) BroadcastHashJoin [codegen id : 11]
-Left keys [1]: [ss_store_sk#7]
+Left keys [1]: [ss_store_sk#4]
 Right keys [1]: [s_store_sk#17]
 Join condition: None
 
 (45) Project [codegen id : 11]
-Output [6]: [ss_item_sk#5, ss_cdemo_sk#6, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11]
-Input [8]: [ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, s_store_sk#17]
+Output [6]: [ss_item_sk#2, ss_cdemo_sk#3, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8]
+Input [8]: [ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_store_sk#17]
 
 (46) ReusedExchange [Reuses operator id: 15]
 Output [1]: [cd_demo_sk#12]
 
 (47) BroadcastHashJoin [codegen id : 11]
-Left keys [1]: [ss_cdemo_sk#6]
+Left keys [1]: [ss_cdemo_sk#3]
 Right keys [1]: [cd_demo_sk#12]
 Join condition: None
 
 (48) Project [codegen id : 11]
-Output [5]: [ss_item_sk#5, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11]
-Input [7]: [ss_item_sk#5, ss_cdemo_sk#6, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, cd_demo_sk#12]
+Output [5]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8]
+Input [7]: [ss_item_sk#2, ss_cdemo_sk#3, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, cd_demo_sk#12]
 
 (49) ReusedExchange [Reuses operator id: 27]
 Output [2]: [i_item_sk#20, i_item_id#21]
 
 (50) BroadcastHashJoin [codegen id : 11]
-Left keys [1]: [ss_item_sk#5]
+Left keys [1]: [ss_item_sk#2]
 Right keys [1]: [i_item_sk#20]
 Join condition: None
 
 (51) Project [codegen id : 11]
-Output [5]: [i_item_id#21, ss_quantity#8 AS agg1#23, ss_list_price#9 AS agg2#24, ss_coupon_amt#11 AS agg3#25, ss_sales_price#10 AS agg4#26]
-Input [7]: [ss_item_sk#5, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, i_item_sk#20, i_item_id#21]
+Output [5]: [i_item_id#21, ss_quantity#5 AS agg1#23, ss_list_price#6 AS agg2#24, ss_coupon_amt#8 AS agg3#25, ss_sales_price#7 AS agg4#26]
+Input [7]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_sk#20, i_item_id#21]
 
 (52) HashAggregate [codegen id : 11]
 Input [5]: [i_item_id#21, agg1#23, agg2#24, agg3#25, agg4#26]
@@ -325,55 +325,55 @@ Functions [4]: [avg(cast(agg1#23 as bigint)), avg(UnscaledValue(agg2#24)), avg(U
 Aggregate Attributes [4]: [avg(cast(agg1#23 as bigint))#71, avg(UnscaledValue(agg2#24))#72, avg(UnscaledValue(agg3#25))#73, avg(UnscaledValue(agg4#26))#74]
 Results [7]: [i_item_id#21, null AS s_state#75, 1 AS g_state#76, avg(cast(agg1#23 as bigint))#71 AS agg1#77, cast((avg(UnscaledValue(agg2#24))#72 / 100.0) as decimal(11,6)) AS agg2#78, cast((avg(UnscaledValue(agg3#25))#73 / 100.0) as decimal(11,6)) AS agg3#79, cast((avg(UnscaledValue(agg4#26))#74 / 100.0) as decimal(11,6)) AS agg4#80]
 
-(55) ReusedExchange [Reuses operator id: 5]
-Output [1]: [d_date_sk#1]
-
-(56) Scan parquet default.store_sales
-Output [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11]
+(55) Scan parquet default.store_sales
+Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
 PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2451545), LessThanOrEqual(ss_sold_date_sk,2451910), IsNotNull(ss_cdemo_sk), IsNotNull(ss_store_sk), IsNotNull(ss_item_sk)]
 ReadSchema: struct<ss_sold_date_sk:int,ss_item_sk:int,ss_cdemo_sk:int,ss_store_sk:int,ss_quantity:int,ss_list_price:decimal(7,2),ss_sales_price:decimal(7,2),ss_coupon_amt:decimal(7,2)>
 
-(57) ColumnarToRow
-Input [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11]
+(56) ColumnarToRow [codegen id : 17]
+Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8]
+
+(57) Filter [codegen id : 17]
+Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8]
+Condition : (((((isnotnull(ss_sold_date_sk#1) AND (ss_sold_date_sk#1 >= 2451545)) AND (ss_sold_date_sk#1 <= 2451910)) AND isnotnull(ss_cdemo_sk#3)) AND isnotnull(ss_store_sk#4)) AND isnotnull(ss_item_sk#2))
 
-(58) Filter
-Input [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11]
-Condition : (((((isnotnull(ss_sold_date_sk#4) AND (ss_sold_date_sk#4 >= 2451545)) AND (ss_sold_date_sk#4 <= 2451910)) AND isnotnull(ss_cdemo_sk#6)) AND isnotnull(ss_store_sk#7)) AND isnotnull(ss_item_sk#5))
+(58) ReusedExchange [Reuses operator id: 8]
+Output [1]: [d_date_sk#9]
 
 (59) BroadcastHashJoin [codegen id : 17]
-Left keys [1]: [d_date_sk#1]
-Right keys [1]: [ss_sold_date_sk#4]
+Left keys [1]: [ss_sold_date_sk#1]
+Right keys [1]: [d_date_sk#9]
 Join condition: None
 
 (60) Project [codegen id : 17]
-Output [7]: [ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11]
-Input [9]: [d_date_sk#1, ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11]
+Output [7]: [ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8]
+Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, d_date_sk#9]
 
 (61) ReusedExchange [Reuses operator id: 43]
 Output [1]: [s_store_sk#17]
 
 (62) BroadcastHashJoin [codegen id : 17]
-Left keys [1]: [ss_store_sk#7]
+Left keys [1]: [ss_store_sk#4]
 Right keys [1]: [s_store_sk#17]
 Join condition: None
 
 (63) Project [codegen id : 17]
-Output [6]: [ss_item_sk#5, ss_cdemo_sk#6, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11]
-Input [8]: [ss_item_sk#5, ss_cdemo_sk#6, ss_store_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, s_store_sk#17]
+Output [6]: [ss_item_sk#2, ss_cdemo_sk#3, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8]
+Input [8]: [ss_item_sk#2, ss_cdemo_sk#3, ss_store_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, s_store_sk#17]
 
 (64) ReusedExchange [Reuses operator id: 15]
 Output [1]: [cd_demo_sk#12]
 
 (65) BroadcastHashJoin [codegen id : 17]
-Left keys [1]: [ss_cdemo_sk#6]
+Left keys [1]: [ss_cdemo_sk#3]
 Right keys [1]: [cd_demo_sk#12]
 Join condition: None
 
 (66) Project [codegen id : 17]
-Output [5]: [ss_item_sk#5, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11]
-Input [7]: [ss_item_sk#5, ss_cdemo_sk#6, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, cd_demo_sk#12]
+Output [5]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8]
+Input [7]: [ss_item_sk#2, ss_cdemo_sk#3, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, cd_demo_sk#12]
 
 (67) Scan parquet default.item
 Output [1]: [i_item_sk#20]
@@ -394,13 +394,13 @@ Input [1]: [i_item_sk#20]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#81]
 
 (71) BroadcastHashJoin [codegen id : 17]
-Left keys [1]: [ss_item_sk#5]
+Left keys [1]: [ss_item_sk#2]
 Right keys [1]: [i_item_sk#20]
 Join condition: None
 
 (72) Project [codegen id : 17]
-Output [4]: [ss_quantity#8 AS agg1#23, ss_list_price#9 AS agg2#24, ss_coupon_amt#11 AS agg3#25, ss_sales_price#10 AS agg4#26]
-Input [6]: [ss_item_sk#5, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, i_item_sk#20]
+Output [4]: [ss_quantity#5 AS agg1#23, ss_list_price#6 AS agg2#24, ss_coupon_amt#8 AS agg3#25, ss_sales_price#7 AS agg4#26]
+Input [6]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_sk#20]
 
 (73) HashAggregate [codegen id : 17]
 Input [4]: [agg1#23, agg2#24, agg3#25, agg4#26]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/simplified.txt
index fc7202e739bcc..d14061de1d1f4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/simplified.txt
@@ -13,7 +13,11 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                         Project [ss_item_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt]
                           BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk]
                             Project [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt]
-                              BroadcastHashJoin [d_date_sk,ss_sold_date_sk]
+                              BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                Filter [ss_sold_date_sk,ss_cdemo_sk,ss_store_sk,ss_item_sk]
+                                  ColumnarToRow
+                                    InputAdapter
+                                      Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt]
                                 InputAdapter
                                   BroadcastExchange #2
                                     WholeStageCodegen (1)
@@ -22,10 +26,6 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                           ColumnarToRow
                                             InputAdapter
                                               Scan parquet default.date_dim [d_date_sk,d_year]
-                                Filter [ss_sold_date_sk,ss_cdemo_sk,ss_store_sk,ss_item_sk]
-                                  ColumnarToRow
-                                    InputAdapter
-                                      Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt]
                             InputAdapter
                               BroadcastExchange #3
                                 WholeStageCodegen (2)
@@ -61,13 +61,13 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                         Project [ss_item_sk,ss_cdemo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt]
                           BroadcastHashJoin [ss_store_sk,s_store_sk]
                             Project [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt]
-                              BroadcastHashJoin [d_date_sk,ss_sold_date_sk]
-                                InputAdapter
-                                  ReusedExchange [d_date_sk] #2
+                              BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                 Filter [ss_sold_date_sk,ss_cdemo_sk,ss_store_sk,ss_item_sk]
                                   ColumnarToRow
                                     InputAdapter
                                       Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt]
+                                InputAdapter
+                                  ReusedExchange [d_date_sk] #2
                             InputAdapter
                               BroadcastExchange #7
                                 WholeStageCodegen (8)
@@ -93,13 +93,13 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                         Project [ss_item_sk,ss_cdemo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt]
                           BroadcastHashJoin [ss_store_sk,s_store_sk]
                             Project [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt]
-                              BroadcastHashJoin [d_date_sk,ss_sold_date_sk]
-                                InputAdapter
-                                  ReusedExchange [d_date_sk] #2
+                              BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                 Filter [ss_sold_date_sk,ss_cdemo_sk,ss_store_sk,ss_item_sk]
                                   ColumnarToRow
                                     InputAdapter
                                       Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt]
+                                InputAdapter
+                                  ReusedExchange [d_date_sk] #2
                             InputAdapter
                               ReusedExchange [s_store_sk] #7
                         InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/explain.txt
index 6071139e809cf..220d661fd45e9 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/explain.txt
@@ -10,15 +10,15 @@ TakeOrderedAndProject (34)
                :     :- * Project (17)
                :     :  +- * BroadcastHashJoin Inner BuildRight (16)
                :     :     :- * Project (10)
-               :     :     :  +- * BroadcastHashJoin Inner BuildLeft (9)
-               :     :     :     :- BroadcastExchange (5)
-               :     :     :     :  +- * Project (4)
-               :     :     :     :     +- * Filter (3)
-               :     :     :     :        +- * ColumnarToRow (2)
-               :     :     :     :           +- Scan parquet default.date_dim (1)
-               :     :     :     +- * Filter (8)
-               :     :     :        +- * ColumnarToRow (7)
-               :     :     :           +- Scan parquet default.store_sales (6)
+               :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
+               :     :     :     :- * Filter (3)
+               :     :     :     :  +- * ColumnarToRow (2)
+               :     :     :     :     +- Scan parquet default.store_sales (1)
+               :     :     :     +- BroadcastExchange (8)
+               :     :     :        +- * Project (7)
+               :     :     :           +- * Filter (6)
+               :     :     :              +- * ColumnarToRow (5)
+               :     :     :                 +- Scan parquet default.date_dim (4)
                :     :     +- BroadcastExchange (15)
                :     :        +- * Project (14)
                :     :           +- * Filter (13)
@@ -35,50 +35,50 @@ TakeOrderedAndProject (34)
                         +- Scan parquet default.item (25)
 
 
-(1) Scan parquet default.date_dim
-Output [2]: [d_date_sk#1, d_year#2]
+(1) Scan parquet default.store_sales
+Output [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), GreaterThanOrEqual(d_date_sk,2450815), LessThanOrEqual(d_date_sk,2451179), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int>
+Location [not included in comparison]/{warehouse_dir}/store_sales]
+PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2450815), LessThanOrEqual(ss_sold_date_sk,2451179), IsNotNull(ss_cdemo_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)]
+ReadSchema: struct<ss_sold_date_sk:int,ss_item_sk:int,ss_cdemo_sk:int,ss_promo_sk:int,ss_quantity:int,ss_list_price:decimal(7,2),ss_sales_price:decimal(7,2),ss_coupon_amt:decimal(7,2)>
 
-(2) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#1, d_year#2]
+(2) ColumnarToRow [codegen id : 5]
+Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8]
 
-(3) Filter [codegen id : 1]
-Input [2]: [d_date_sk#1, d_year#2]
-Condition : ((((isnotnull(d_year#2) AND (d_year#2 = 1998)) AND (d_date_sk#1 >= 2450815)) AND (d_date_sk#1 <= 2451179)) AND isnotnull(d_date_sk#1))
+(3) Filter [codegen id : 5]
+Input [8]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8]
+Condition : (((((isnotnull(ss_sold_date_sk#1) AND (ss_sold_date_sk#1 >= 2450815)) AND (ss_sold_date_sk#1 <= 2451179)) AND isnotnull(ss_cdemo_sk#3)) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_promo_sk#4))
 
-(4) Project [codegen id : 1]
-Output [1]: [d_date_sk#1]
-Input [2]: [d_date_sk#1, d_year#2]
+(4) Scan parquet default.date_dim
+Output [2]: [d_date_sk#9, d_year#10]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), GreaterThanOrEqual(d_date_sk,2450815), LessThanOrEqual(d_date_sk,2451179), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(5) BroadcastExchange
-Input [1]: [d_date_sk#1]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3]
+(5) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#9, d_year#10]
 
-(6) Scan parquet default.store_sales
-Output [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_promo_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/store_sales]
-PushedFilters: [IsNotNull(ss_sold_date_sk), GreaterThanOrEqual(ss_sold_date_sk,2450815), LessThanOrEqual(ss_sold_date_sk,2451179), IsNotNull(ss_cdemo_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)]
-ReadSchema: struct<ss_sold_date_sk:int,ss_item_sk:int,ss_cdemo_sk:int,ss_promo_sk:int,ss_quantity:int,ss_list_price:decimal(7,2),ss_sales_price:decimal(7,2),ss_coupon_amt:decimal(7,2)>
+(6) Filter [codegen id : 1]
+Input [2]: [d_date_sk#9, d_year#10]
+Condition : ((((isnotnull(d_year#10) AND (d_year#10 = 1998)) AND (d_date_sk#9 >= 2450815)) AND (d_date_sk#9 <= 2451179)) AND isnotnull(d_date_sk#9))
 
-(7) ColumnarToRow
-Input [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_promo_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11]
+(7) Project [codegen id : 1]
+Output [1]: [d_date_sk#9]
+Input [2]: [d_date_sk#9, d_year#10]
 
-(8) Filter
-Input [8]: [ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_promo_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11]
-Condition : (((((isnotnull(ss_sold_date_sk#4) AND (ss_sold_date_sk#4 >= 2450815)) AND (ss_sold_date_sk#4 <= 2451179)) AND isnotnull(ss_cdemo_sk#6)) AND isnotnull(ss_item_sk#5)) AND isnotnull(ss_promo_sk#7))
+(8) BroadcastExchange
+Input [1]: [d_date_sk#9]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11]
 
 (9) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [d_date_sk#1]
-Right keys [1]: [ss_sold_date_sk#4]
+Left keys [1]: [ss_sold_date_sk#1]
+Right keys [1]: [d_date_sk#9]
 Join condition: None
 
 (10) Project [codegen id : 5]
-Output [7]: [ss_item_sk#5, ss_cdemo_sk#6, ss_promo_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11]
-Input [9]: [d_date_sk#1, ss_sold_date_sk#4, ss_item_sk#5, ss_cdemo_sk#6, ss_promo_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11]
+Output [7]: [ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8]
+Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, d_date_sk#9]
 
 (11) Scan parquet default.promotion
 Output [3]: [p_promo_sk#12, p_channel_email#13, p_channel_event#14]
@@ -103,13 +103,13 @@ Input [1]: [p_promo_sk#12]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15]
 
 (16) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [ss_promo_sk#7]
+Left keys [1]: [ss_promo_sk#4]
 Right keys [1]: [p_promo_sk#12]
 Join condition: None
 
 (17) Project [codegen id : 5]
-Output [6]: [ss_item_sk#5, ss_cdemo_sk#6, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11]
-Input [8]: [ss_item_sk#5, ss_cdemo_sk#6, ss_promo_sk#7, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, p_promo_sk#12]
+Output [6]: [ss_item_sk#2, ss_cdemo_sk#3, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8]
+Input [8]: [ss_item_sk#2, ss_cdemo_sk#3, ss_promo_sk#4, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, p_promo_sk#12]
 
 (18) Scan parquet default.customer_demographics
 Output [4]: [cd_demo_sk#16, cd_gender#17, cd_marital_status#18, cd_education_status#19]
@@ -134,13 +134,13 @@ Input [1]: [cd_demo_sk#16]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20]
 
 (23) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [ss_cdemo_sk#6]
+Left keys [1]: [ss_cdemo_sk#3]
 Right keys [1]: [cd_demo_sk#16]
 Join condition: None
 
 (24) Project [codegen id : 5]
-Output [5]: [ss_item_sk#5, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11]
-Input [7]: [ss_item_sk#5, ss_cdemo_sk#6, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, cd_demo_sk#16]
+Output [5]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8]
+Input [7]: [ss_item_sk#2, ss_cdemo_sk#3, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, cd_demo_sk#16]
 
 (25) Scan parquet default.item
 Output [2]: [i_item_sk#21, i_item_id#22]
@@ -161,18 +161,18 @@ Input [2]: [i_item_sk#21, i_item_id#22]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23]
 
 (29) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [ss_item_sk#5]
+Left keys [1]: [ss_item_sk#2]
 Right keys [1]: [i_item_sk#21]
 Join condition: None
 
 (30) Project [codegen id : 5]
-Output [5]: [ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, i_item_id#22]
-Input [7]: [ss_item_sk#5, ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, i_item_sk#21, i_item_id#22]
+Output [5]: [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#22]
+Input [7]: [ss_item_sk#2, ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_sk#21, i_item_id#22]
 
 (31) HashAggregate [codegen id : 5]
-Input [5]: [ss_quantity#8, ss_list_price#9, ss_sales_price#10, ss_coupon_amt#11, i_item_id#22]
+Input [5]: [ss_quantity#5, ss_list_price#6, ss_sales_price#7, ss_coupon_amt#8, i_item_id#22]
 Keys [1]: [i_item_id#22]
-Functions [4]: [partial_avg(cast(ss_quantity#8 as bigint)), partial_avg(UnscaledValue(ss_list_price#9)), partial_avg(UnscaledValue(ss_coupon_amt#11)), partial_avg(UnscaledValue(ss_sales_price#10))]
+Functions [4]: [partial_avg(cast(ss_quantity#5 as bigint)), partial_avg(UnscaledValue(ss_list_price#6)), partial_avg(UnscaledValue(ss_coupon_amt#8)), partial_avg(UnscaledValue(ss_sales_price#7))]
 Aggregate Attributes [8]: [sum#24, count#25, sum#26, count#27, sum#28, count#29, sum#30, count#31]
 Results [9]: [i_item_id#22, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39]
 
@@ -183,9 +183,9 @@ Arguments: hashpartitioning(i_item_id#22, 5), true, [id=#40]
 (33) HashAggregate [codegen id : 6]
 Input [9]: [i_item_id#22, sum#32, count#33, sum#34, count#35, sum#36, count#37, sum#38, count#39]
 Keys [1]: [i_item_id#22]
-Functions [4]: [avg(cast(ss_quantity#8 as bigint)), avg(UnscaledValue(ss_list_price#9)), avg(UnscaledValue(ss_coupon_amt#11)), avg(UnscaledValue(ss_sales_price#10))]
-Aggregate Attributes [4]: [avg(cast(ss_quantity#8 as bigint))#41, avg(UnscaledValue(ss_list_price#9))#42, avg(UnscaledValue(ss_coupon_amt#11))#43, avg(UnscaledValue(ss_sales_price#10))#44]
-Results [5]: [i_item_id#22, avg(cast(ss_quantity#8 as bigint))#41 AS agg1#45, cast((avg(UnscaledValue(ss_list_price#9))#42 / 100.0) as decimal(11,6)) AS agg2#46, cast((avg(UnscaledValue(ss_coupon_amt#11))#43 / 100.0) as decimal(11,6)) AS agg3#47, cast((avg(UnscaledValue(ss_sales_price#10))#44 / 100.0) as decimal(11,6)) AS agg4#48]
+Functions [4]: [avg(cast(ss_quantity#5 as bigint)), avg(UnscaledValue(ss_list_price#6)), avg(UnscaledValue(ss_coupon_amt#8)), avg(UnscaledValue(ss_sales_price#7))]
+Aggregate Attributes [4]: [avg(cast(ss_quantity#5 as bigint))#41, avg(UnscaledValue(ss_list_price#6))#42, avg(UnscaledValue(ss_coupon_amt#8))#43, avg(UnscaledValue(ss_sales_price#7))#44]
+Results [5]: [i_item_id#22, avg(cast(ss_quantity#5 as bigint))#41 AS agg1#45, cast((avg(UnscaledValue(ss_list_price#6))#42 / 100.0) as decimal(11,6)) AS agg2#46, cast((avg(UnscaledValue(ss_coupon_amt#8))#43 / 100.0) as decimal(11,6)) AS agg3#47, cast((avg(UnscaledValue(ss_sales_price#7))#44 / 100.0) as decimal(11,6)) AS agg4#48]
 
 (34) TakeOrderedAndProject
 Input [5]: [i_item_id#22, agg1#45, agg2#46, agg3#47, agg4#48]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/simplified.txt
index 4576b8cef59ee..61cc7daa76456 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/simplified.txt
@@ -12,7 +12,11 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                       Project [ss_item_sk,ss_cdemo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt]
                         BroadcastHashJoin [ss_promo_sk,p_promo_sk]
                           Project [ss_item_sk,ss_cdemo_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt]
-                            BroadcastHashJoin [d_date_sk,ss_sold_date_sk]
+                            BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                              Filter [ss_sold_date_sk,ss_cdemo_sk,ss_item_sk,ss_promo_sk]
+                                ColumnarToRow
+                                  InputAdapter
+                                    Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_cdemo_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt]
                               InputAdapter
                                 BroadcastExchange #2
                                   WholeStageCodegen (1)
@@ -21,10 +25,6 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                                         ColumnarToRow
                                           InputAdapter
                                             Scan parquet default.date_dim [d_date_sk,d_year]
-                              Filter [ss_sold_date_sk,ss_cdemo_sk,ss_item_sk,ss_promo_sk]
-                                ColumnarToRow
-                                  InputAdapter
-                                    Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_cdemo_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt]
                           InputAdapter
                             BroadcastExchange #3
                               WholeStageCodegen (2)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt
index 586abbd8f3fef..8ee427262b332 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt
@@ -11,14 +11,14 @@
             :     :     :- * Project (15)
             :     :     :  +- * BroadcastHashJoin Inner BuildRight (14)
             :     :     :     :- * Project (9)
-            :     :     :     :  +- * BroadcastHashJoin Inner BuildLeft (8)
-            :     :     :     :     :- BroadcastExchange (4)
-            :     :     :     :     :  +- * Filter (3)
-            :     :     :     :     :     +- * ColumnarToRow (2)
-            :     :     :     :     :        +- Scan parquet default.customer_demographics (1)
-            :     :     :     :     +- * Filter (7)
-            :     :     :     :        +- * ColumnarToRow (6)
-            :     :     :     :           +- Scan parquet default.store_sales (5)
+            :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (8)
+            :     :     :     :     :- * Filter (3)
+            :     :     :     :     :  +- * ColumnarToRow (2)
+            :     :     :     :     :     +- Scan parquet default.store_sales (1)
+            :     :     :     :     +- BroadcastExchange (7)
+            :     :     :     :        +- * Filter (6)
+            :     :     :     :           +- * ColumnarToRow (5)
+            :     :     :     :              +- Scan parquet default.customer_demographics (4)
             :     :     :     +- BroadcastExchange (13)
             :     :     :        +- * Filter (12)
             :     :     :           +- * ColumnarToRow (11)
@@ -39,46 +39,46 @@
                         +- Scan parquet default.customer_address (29)
 
 
-(1) Scan parquet default.customer_demographics
-Output [3]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3]
+(1) Scan parquet default.store_sales
+Output [10]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer_demographics]
-PushedFilters: [IsNotNull(cd_demo_sk), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Advanced Degree)),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College))),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,2 yr Degree)))]
-ReadSchema: struct<cd_demo_sk:int,cd_marital_status:string,cd_education_status:string>
-
-(2) ColumnarToRow [codegen id : 1]
-Input [3]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3]
+Location [not included in comparison]/{warehouse_dir}/store_sales]
+PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_hdemo_sk), Or(Or(And(GreaterThanOrEqual(ss_net_profit,100.00),LessThanOrEqual(ss_net_profit,200.00)),And(GreaterThanOrEqual(ss_net_profit,150.00),LessThanOrEqual(ss_net_profit,300.00))),And(GreaterThanOrEqual(ss_net_profit,50.00),LessThanOrEqual(ss_net_profit,250.00))), Or(Or(And(GreaterThanOrEqual(ss_sales_price,100.00),LessThanOrEqual(ss_sales_price,150.00)),And(GreaterThanOrEqual(ss_sales_price,50.00),LessThanOrEqual(ss_sales_price,100.00))),And(GreaterThanOrEqual(ss_sales_price,150.00),LessThanOrEqual(ss_sales_price,200.00)))]
+ReadSchema: struct<ss_sold_date_sk:int,ss_cdemo_sk:int,ss_hdemo_sk:int,ss_addr_sk:int,ss_store_sk:int,ss_quantity:int,ss_sales_price:decimal(7,2),ss_ext_sales_price:decimal(7,2),ss_ext_wholesale_cost:decimal(7,2),ss_net_profit:decimal(7,2)>
 
-(3) Filter [codegen id : 1]
-Input [3]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3]
-Condition : (isnotnull(cd_demo_sk#1) AND ((((cd_marital_status#2 = M) AND (cd_education_status#3 = Advanced Degree)) OR ((cd_marital_status#2 = S) AND (cd_education_status#3 = College))) OR ((cd_marital_status#2 = W) AND (cd_education_status#3 = 2 yr Degree))))
+(2) ColumnarToRow [codegen id : 6]
+Input [10]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10]
 
-(4) BroadcastExchange
-Input [3]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#4]
+(3) Filter [codegen id : 6]
+Input [10]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10]
+Condition : ((((((isnotnull(ss_store_sk#5) AND isnotnull(ss_addr_sk#4)) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_cdemo_sk#2)) AND isnotnull(ss_hdemo_sk#3)) AND ((((ss_net_profit#10 >= 100.00) AND (ss_net_profit#10 <= 200.00)) OR ((ss_net_profit#10 >= 150.00) AND (ss_net_profit#10 <= 300.00))) OR ((ss_net_profit#10 >= 50.00) AND (ss_net_profit#10 <= 250.00)))) AND ((((ss_sales_price#7 >= 100.00) AND (ss_sales_price#7 <= 150.00)) OR ((ss_sales_price#7 >= 50.00) AND (ss_sales_price#7 <= 100.00))) OR ((ss_sales_price#7 >= 150.00) AND (ss_sales_price#7 <= 200.00))))
 
-(5) Scan parquet default.store_sales
-Output [10]: [ss_sold_date_sk#5, ss_cdemo_sk#6, ss_hdemo_sk#7, ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_sales_price#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14]
+(4) Scan parquet default.customer_demographics
+Output [3]: [cd_demo_sk#11, cd_marital_status#12, cd_education_status#13]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/store_sales]
-PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_hdemo_sk), Or(Or(And(GreaterThanOrEqual(ss_net_profit,100.00),LessThanOrEqual(ss_net_profit,200.00)),And(GreaterThanOrEqual(ss_net_profit,150.00),LessThanOrEqual(ss_net_profit,300.00))),And(GreaterThanOrEqual(ss_net_profit,50.00),LessThanOrEqual(ss_net_profit,250.00))), Or(Or(And(GreaterThanOrEqual(ss_sales_price,100.00),LessThanOrEqual(ss_sales_price,150.00)),And(GreaterThanOrEqual(ss_sales_price,50.00),LessThanOrEqual(ss_sales_price,100.00))),And(GreaterThanOrEqual(ss_sales_price,150.00),LessThanOrEqual(ss_sales_price,200.00)))]
-ReadSchema: struct<ss_sold_date_sk:int,ss_cdemo_sk:int,ss_hdemo_sk:int,ss_addr_sk:int,ss_store_sk:int,ss_quantity:int,ss_sales_price:decimal(7,2),ss_ext_sales_price:decimal(7,2),ss_ext_wholesale_cost:decimal(7,2),ss_net_profit:decimal(7,2)>
+Location [not included in comparison]/{warehouse_dir}/customer_demographics]
+PushedFilters: [IsNotNull(cd_demo_sk), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Advanced Degree)),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College))),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,2 yr Degree)))]
+ReadSchema: struct<cd_demo_sk:int,cd_marital_status:string,cd_education_status:string>
+
+(5) ColumnarToRow [codegen id : 1]
+Input [3]: [cd_demo_sk#11, cd_marital_status#12, cd_education_status#13]
 
-(6) ColumnarToRow
-Input [10]: [ss_sold_date_sk#5, ss_cdemo_sk#6, ss_hdemo_sk#7, ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_sales_price#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14]
+(6) Filter [codegen id : 1]
+Input [3]: [cd_demo_sk#11, cd_marital_status#12, cd_education_status#13]
+Condition : (isnotnull(cd_demo_sk#11) AND ((((cd_marital_status#12 = M) AND (cd_education_status#13 = Advanced Degree)) OR ((cd_marital_status#12 = S) AND (cd_education_status#13 = College))) OR ((cd_marital_status#12 = W) AND (cd_education_status#13 = 2 yr Degree))))
 
-(7) Filter
-Input [10]: [ss_sold_date_sk#5, ss_cdemo_sk#6, ss_hdemo_sk#7, ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_sales_price#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14]
-Condition : ((((((isnotnull(ss_store_sk#9) AND isnotnull(ss_addr_sk#8)) AND isnotnull(ss_sold_date_sk#5)) AND isnotnull(ss_cdemo_sk#6)) AND isnotnull(ss_hdemo_sk#7)) AND ((((ss_net_profit#14 >= 100.00) AND (ss_net_profit#14 <= 200.00)) OR ((ss_net_profit#14 >= 150.00) AND (ss_net_profit#14 <= 300.00))) OR ((ss_net_profit#14 >= 50.00) AND (ss_net_profit#14 <= 250.00)))) AND ((((ss_sales_price#11 >= 100.00) AND (ss_sales_price#11 <= 150.00)) OR ((ss_sales_price#11 >= 50.00) AND (ss_sales_price#11 <= 100.00))) OR ((ss_sales_price#11 >= 150.00) AND (ss_sales_price#11 <= 200.00))))
+(7) BroadcastExchange
+Input [3]: [cd_demo_sk#11, cd_marital_status#12, cd_education_status#13]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14]
 
 (8) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [cd_demo_sk#1]
-Right keys [1]: [ss_cdemo_sk#6]
-Join condition: ((((((cd_marital_status#2 = M) AND (cd_education_status#3 = Advanced Degree)) AND (ss_sales_price#11 >= 100.00)) AND (ss_sales_price#11 <= 150.00)) OR ((((cd_marital_status#2 = S) AND (cd_education_status#3 = College)) AND (ss_sales_price#11 >= 50.00)) AND (ss_sales_price#11 <= 100.00))) OR ((((cd_marital_status#2 = W) AND (cd_education_status#3 = 2 yr Degree)) AND (ss_sales_price#11 >= 150.00)) AND (ss_sales_price#11 <= 200.00)))
+Left keys [1]: [ss_cdemo_sk#2]
+Right keys [1]: [cd_demo_sk#11]
+Join condition: ((((((cd_marital_status#12 = M) AND (cd_education_status#13 = Advanced Degree)) AND (ss_sales_price#7 >= 100.00)) AND (ss_sales_price#7 <= 150.00)) OR ((((cd_marital_status#12 = S) AND (cd_education_status#13 = College)) AND (ss_sales_price#7 >= 50.00)) AND (ss_sales_price#7 <= 100.00))) OR ((((cd_marital_status#12 = W) AND (cd_education_status#13 = 2 yr Degree)) AND (ss_sales_price#7 >= 150.00)) AND (ss_sales_price#7 <= 200.00)))
 
 (9) Project [codegen id : 6]
-Output [11]: [cd_marital_status#2, cd_education_status#3, ss_sold_date_sk#5, ss_hdemo_sk#7, ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_sales_price#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14]
-Input [13]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3, ss_sold_date_sk#5, ss_cdemo_sk#6, ss_hdemo_sk#7, ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_sales_price#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14]
+Output [11]: [ss_sold_date_sk#1, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, cd_marital_status#12, cd_education_status#13]
+Input [13]: [ss_sold_date_sk#1, ss_cdemo_sk#2, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, cd_demo_sk#11, cd_marital_status#12, cd_education_status#13]
 
 (10) Scan parquet default.household_demographics
 Output [2]: [hd_demo_sk#15, hd_dep_count#16]
@@ -99,13 +99,13 @@ Input [2]: [hd_demo_sk#15, hd_dep_count#16]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17]
 
 (14) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [ss_hdemo_sk#7]
+Left keys [1]: [ss_hdemo_sk#3]
 Right keys [1]: [hd_demo_sk#15]
-Join condition: (((((((cd_marital_status#2 = M) AND (cd_education_status#3 = Advanced Degree)) AND (ss_sales_price#11 >= 100.00)) AND (ss_sales_price#11 <= 150.00)) AND (hd_dep_count#16 = 3)) OR (((((cd_marital_status#2 = S) AND (cd_education_status#3 = College)) AND (ss_sales_price#11 >= 50.00)) AND (ss_sales_price#11 <= 100.00)) AND (hd_dep_count#16 = 1))) OR (((((cd_marital_status#2 = W) AND (cd_education_status#3 = 2 yr Degree)) AND (ss_sales_price#11 >= 150.00)) AND (ss_sales_price#11 <= 200.00)) AND (hd_dep_count#16 = 1)))
+Join condition: (((((((cd_marital_status#12 = M) AND (cd_education_status#13 = Advanced Degree)) AND (ss_sales_price#7 >= 100.00)) AND (ss_sales_price#7 <= 150.00)) AND (hd_dep_count#16 = 3)) OR (((((cd_marital_status#12 = S) AND (cd_education_status#13 = College)) AND (ss_sales_price#7 >= 50.00)) AND (ss_sales_price#7 <= 100.00)) AND (hd_dep_count#16 = 1))) OR (((((cd_marital_status#12 = W) AND (cd_education_status#13 = 2 yr Degree)) AND (ss_sales_price#7 >= 150.00)) AND (ss_sales_price#7 <= 200.00)) AND (hd_dep_count#16 = 1)))
 
 (15) Project [codegen id : 6]
-Output [7]: [ss_sold_date_sk#5, ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14]
-Input [13]: [cd_marital_status#2, cd_education_status#3, ss_sold_date_sk#5, ss_hdemo_sk#7, ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_sales_price#11, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14, hd_demo_sk#15, hd_dep_count#16]
+Output [7]: [ss_sold_date_sk#1, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10]
+Input [13]: [ss_sold_date_sk#1, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, cd_marital_status#12, cd_education_status#13, hd_demo_sk#15, hd_dep_count#16]
 
 (16) Scan parquet default.date_dim
 Output [2]: [d_date_sk#18, d_year#19]
@@ -130,13 +130,13 @@ Input [1]: [d_date_sk#18]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20]
 
 (21) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [ss_sold_date_sk#5]
+Left keys [1]: [ss_sold_date_sk#1]
 Right keys [1]: [d_date_sk#18]
 Join condition: None
 
 (22) Project [codegen id : 6]
-Output [6]: [ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14]
-Input [8]: [ss_sold_date_sk#5, ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14, d_date_sk#18]
+Output [6]: [ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10]
+Input [8]: [ss_sold_date_sk#1, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, d_date_sk#18]
 
 (23) Scan parquet default.store
 Output [1]: [s_store_sk#21]
@@ -157,13 +157,13 @@ Input [1]: [s_store_sk#21]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22]
 
 (27) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [ss_store_sk#9]
+Left keys [1]: [ss_store_sk#5]
 Right keys [1]: [s_store_sk#21]
 Join condition: None
 
 (28) Project [codegen id : 6]
-Output [5]: [ss_addr_sk#8, ss_quantity#10, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14]
-Input [7]: [ss_addr_sk#8, ss_store_sk#9, ss_quantity#10, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14, s_store_sk#21]
+Output [5]: [ss_addr_sk#4, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10]
+Input [7]: [ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, s_store_sk#21]
 
 (29) Scan parquet default.customer_address
 Output [3]: [ca_address_sk#23, ca_state#24, ca_country#25]
@@ -188,18 +188,18 @@ Input [2]: [ca_address_sk#23, ca_state#24]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26]
 
 (34) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [ss_addr_sk#8]
+Left keys [1]: [ss_addr_sk#4]
 Right keys [1]: [ca_address_sk#23]
-Join condition: ((((ca_state#24 IN (TX,OH) AND (ss_net_profit#14 >= 100.00)) AND (ss_net_profit#14 <= 200.00)) OR ((ca_state#24 IN (OR,NM,KY) AND (ss_net_profit#14 >= 150.00)) AND (ss_net_profit#14 <= 300.00))) OR ((ca_state#24 IN (VA,TX,MS) AND (ss_net_profit#14 >= 50.00)) AND (ss_net_profit#14 <= 250.00)))
+Join condition: ((((ca_state#24 IN (TX,OH) AND (ss_net_profit#10 >= 100.00)) AND (ss_net_profit#10 <= 200.00)) OR ((ca_state#24 IN (OR,NM,KY) AND (ss_net_profit#10 >= 150.00)) AND (ss_net_profit#10 <= 300.00))) OR ((ca_state#24 IN (VA,TX,MS) AND (ss_net_profit#10 >= 50.00)) AND (ss_net_profit#10 <= 250.00)))
 
 (35) Project [codegen id : 6]
-Output [3]: [ss_quantity#10, ss_ext_sales_price#12, ss_ext_wholesale_cost#13]
-Input [7]: [ss_addr_sk#8, ss_quantity#10, ss_ext_sales_price#12, ss_ext_wholesale_cost#13, ss_net_profit#14, ca_address_sk#23, ca_state#24]
+Output [3]: [ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9]
+Input [7]: [ss_addr_sk#4, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, ca_address_sk#23, ca_state#24]
 
 (36) HashAggregate [codegen id : 6]
-Input [3]: [ss_quantity#10, ss_ext_sales_price#12, ss_ext_wholesale_cost#13]
+Input [3]: [ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9]
 Keys: []
-Functions [4]: [partial_avg(cast(ss_quantity#10 as bigint)), partial_avg(UnscaledValue(ss_ext_sales_price#12)), partial_avg(UnscaledValue(ss_ext_wholesale_cost#13)), partial_sum(UnscaledValue(ss_ext_wholesale_cost#13))]
+Functions [4]: [partial_avg(cast(ss_quantity#6 as bigint)), partial_avg(UnscaledValue(ss_ext_sales_price#8)), partial_avg(UnscaledValue(ss_ext_wholesale_cost#9)), partial_sum(UnscaledValue(ss_ext_wholesale_cost#9))]
 Aggregate Attributes [7]: [sum#27, count#28, sum#29, count#30, sum#31, count#32, sum#33]
 Results [7]: [sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40]
 
@@ -210,7 +210,7 @@ Arguments: SinglePartition, true, [id=#41]
 (38) HashAggregate [codegen id : 7]
 Input [7]: [sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40]
 Keys: []
-Functions [4]: [avg(cast(ss_quantity#10 as bigint)), avg(UnscaledValue(ss_ext_sales_price#12)), avg(UnscaledValue(ss_ext_wholesale_cost#13)), sum(UnscaledValue(ss_ext_wholesale_cost#13))]
-Aggregate Attributes [4]: [avg(cast(ss_quantity#10 as bigint))#42, avg(UnscaledValue(ss_ext_sales_price#12))#43, avg(UnscaledValue(ss_ext_wholesale_cost#13))#44, sum(UnscaledValue(ss_ext_wholesale_cost#13))#45]
-Results [4]: [avg(cast(ss_quantity#10 as bigint))#42 AS avg(ss_quantity)#46, cast((avg(UnscaledValue(ss_ext_sales_price#12))#43 / 100.0) as decimal(11,6)) AS avg(ss_ext_sales_price)#47, cast((avg(UnscaledValue(ss_ext_wholesale_cost#13))#44 / 100.0) as decimal(11,6)) AS avg(ss_ext_wholesale_cost)#48, MakeDecimal(sum(UnscaledValue(ss_ext_wholesale_cost#13))#45,17,2) AS sum(ss_ext_wholesale_cost)#49]
+Functions [4]: [avg(cast(ss_quantity#6 as bigint)), avg(UnscaledValue(ss_ext_sales_price#8)), avg(UnscaledValue(ss_ext_wholesale_cost#9)), sum(UnscaledValue(ss_ext_wholesale_cost#9))]
+Aggregate Attributes [4]: [avg(cast(ss_quantity#6 as bigint))#42, avg(UnscaledValue(ss_ext_sales_price#8))#43, avg(UnscaledValue(ss_ext_wholesale_cost#9))#44, sum(UnscaledValue(ss_ext_wholesale_cost#9))#45]
+Results [4]: [avg(cast(ss_quantity#6 as bigint))#42 AS avg(ss_quantity)#46, cast((avg(UnscaledValue(ss_ext_sales_price#8))#43 / 100.0) as decimal(11,6)) AS avg(ss_ext_sales_price)#47, cast((avg(UnscaledValue(ss_ext_wholesale_cost#9))#44 / 100.0) as decimal(11,6)) AS avg(ss_ext_wholesale_cost)#48, MakeDecimal(sum(UnscaledValue(ss_ext_wholesale_cost#9))#45,17,2) AS sum(ss_ext_wholesale_cost)#49]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/simplified.txt
index e410b27e9cf3b..b457788dbd0b2 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/simplified.txt
@@ -12,8 +12,12 @@ WholeStageCodegen (7)
                       BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                         Project [ss_sold_date_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit]
                           BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk,cd_marital_status,cd_education_status,ss_sales_price,hd_dep_count]
-                            Project [cd_marital_status,cd_education_status,ss_sold_date_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit]
-                              BroadcastHashJoin [cd_demo_sk,ss_cdemo_sk,cd_marital_status,cd_education_status,ss_sales_price]
+                            Project [ss_sold_date_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,cd_marital_status,cd_education_status]
+                              BroadcastHashJoin [ss_cdemo_sk,cd_demo_sk,cd_marital_status,cd_education_status,ss_sales_price]
+                                Filter [ss_store_sk,ss_addr_sk,ss_sold_date_sk,ss_cdemo_sk,ss_hdemo_sk,ss_net_profit,ss_sales_price]
+                                  ColumnarToRow
+                                    InputAdapter
+                                      Scan parquet default.store_sales [ss_sold_date_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit]
                                 InputAdapter
                                   BroadcastExchange #2
                                     WholeStageCodegen (1)
@@ -21,10 +25,6 @@ WholeStageCodegen (7)
                                         ColumnarToRow
                                           InputAdapter
                                             Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
-                                Filter [ss_store_sk,ss_addr_sk,ss_sold_date_sk,ss_cdemo_sk,ss_hdemo_sk,ss_net_profit,ss_sales_price]
-                                  ColumnarToRow
-                                    InputAdapter
-                                      Scan parquet default.store_sales [ss_sold_date_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit]
                             InputAdapter
                               BroadcastExchange #3
                                 WholeStageCodegen (2)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt
index e24b656e843aa..a17356ae04a03 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt
@@ -39,15 +39,15 @@ TakeOrderedAndProject (57)
                :           +- * Sort (39)
                :              +- Exchange (38)
                :                 +- * Project (37)
-               :                    +- * BroadcastHashJoin Inner BuildLeft (36)
-               :                       :- BroadcastExchange (32)
-               :                       :  +- * Project (31)
-               :                       :     +- * Filter (30)
-               :                       :        +- * ColumnarToRow (29)
-               :                       :           +- Scan parquet default.date_dim (28)
-               :                       +- * Filter (35)
-               :                          +- * ColumnarToRow (34)
-               :                             +- Scan parquet default.store_returns (33)
+               :                    +- * BroadcastHashJoin Inner BuildRight (36)
+               :                       :- * Filter (30)
+               :                       :  +- * ColumnarToRow (29)
+               :                       :     +- Scan parquet default.store_returns (28)
+               :                       +- BroadcastExchange (35)
+               :                          +- * Project (34)
+               :                             +- * Filter (33)
+               :                                +- * ColumnarToRow (32)
+               :                                   +- Scan parquet default.date_dim (31)
                +- * Sort (51)
                   +- Exchange (50)
                      +- * Project (49)
@@ -177,75 +177,75 @@ Arguments: hashpartitioning(cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2
 Input [7]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16]
 Arguments: [cast(ss_customer_sk#3 as bigint) ASC NULLS FIRST, cast(ss_item_sk#2 as bigint) ASC NULLS FIRST, cast(ss_ticket_number#5 as bigint) ASC NULLS FIRST], false, 0
 
-(28) Scan parquet default.date_dim
-Output [2]: [d_date_sk#19, d_quarter_name#20]
+(28) Scan parquet default.store_returns
+Output [5]: [sr_returned_date_sk#19, sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_quarter_name:string>
+Location [not included in comparison]/{warehouse_dir}/store_returns]
+PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number), IsNotNull(sr_returned_date_sk)]
+ReadSchema: struct<sr_returned_date_sk:bigint,sr_item_sk:bigint,sr_customer_sk:bigint,sr_ticket_number:bigint,sr_return_quantity:int>
 
-(29) ColumnarToRow [codegen id : 9]
-Input [2]: [d_date_sk#19, d_quarter_name#20]
+(29) ColumnarToRow [codegen id : 10]
+Input [5]: [sr_returned_date_sk#19, sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23]
 
-(30) Filter [codegen id : 9]
-Input [2]: [d_date_sk#19, d_quarter_name#20]
-Condition : (d_quarter_name#20 IN (2001Q1,2001Q2,2001Q3) AND isnotnull(d_date_sk#19))
+(30) Filter [codegen id : 10]
+Input [5]: [sr_returned_date_sk#19, sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23]
+Condition : (((isnotnull(sr_customer_sk#21) AND isnotnull(sr_item_sk#20)) AND isnotnull(sr_ticket_number#22)) AND isnotnull(sr_returned_date_sk#19))
 
-(31) Project [codegen id : 9]
-Output [1]: [d_date_sk#19]
-Input [2]: [d_date_sk#19, d_quarter_name#20]
+(31) Scan parquet default.date_dim
+Output [2]: [d_date_sk#24, d_quarter_name#25]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_quarter_name:string>
 
-(32) BroadcastExchange
-Input [1]: [d_date_sk#19]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21]
+(32) ColumnarToRow [codegen id : 9]
+Input [2]: [d_date_sk#24, d_quarter_name#25]
 
-(33) Scan parquet default.store_returns
-Output [5]: [sr_returned_date_sk#22, sr_item_sk#23, sr_customer_sk#24, sr_ticket_number#25, sr_return_quantity#26]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/store_returns]
-PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number), IsNotNull(sr_returned_date_sk)]
-ReadSchema: struct<sr_returned_date_sk:bigint,sr_item_sk:bigint,sr_customer_sk:bigint,sr_ticket_number:bigint,sr_return_quantity:int>
+(33) Filter [codegen id : 9]
+Input [2]: [d_date_sk#24, d_quarter_name#25]
+Condition : (d_quarter_name#25 IN (2001Q1,2001Q2,2001Q3) AND isnotnull(d_date_sk#24))
 
-(34) ColumnarToRow
-Input [5]: [sr_returned_date_sk#22, sr_item_sk#23, sr_customer_sk#24, sr_ticket_number#25, sr_return_quantity#26]
+(34) Project [codegen id : 9]
+Output [1]: [d_date_sk#24]
+Input [2]: [d_date_sk#24, d_quarter_name#25]
 
-(35) Filter
-Input [5]: [sr_returned_date_sk#22, sr_item_sk#23, sr_customer_sk#24, sr_ticket_number#25, sr_return_quantity#26]
-Condition : (((isnotnull(sr_customer_sk#24) AND isnotnull(sr_item_sk#23)) AND isnotnull(sr_ticket_number#25)) AND isnotnull(sr_returned_date_sk#22))
+(35) BroadcastExchange
+Input [1]: [d_date_sk#24]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26]
 
 (36) BroadcastHashJoin [codegen id : 10]
-Left keys [1]: [cast(d_date_sk#19 as bigint)]
-Right keys [1]: [sr_returned_date_sk#22]
+Left keys [1]: [sr_returned_date_sk#19]
+Right keys [1]: [cast(d_date_sk#24 as bigint)]
 Join condition: None
 
 (37) Project [codegen id : 10]
-Output [4]: [sr_item_sk#23, sr_customer_sk#24, sr_ticket_number#25, sr_return_quantity#26]
-Input [6]: [d_date_sk#19, sr_returned_date_sk#22, sr_item_sk#23, sr_customer_sk#24, sr_ticket_number#25, sr_return_quantity#26]
+Output [4]: [sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23]
+Input [6]: [sr_returned_date_sk#19, sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23, d_date_sk#24]
 
 (38) Exchange
-Input [4]: [sr_item_sk#23, sr_customer_sk#24, sr_ticket_number#25, sr_return_quantity#26]
-Arguments: hashpartitioning(sr_customer_sk#24, sr_item_sk#23, sr_ticket_number#25, 5), true, [id=#27]
+Input [4]: [sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23]
+Arguments: hashpartitioning(sr_customer_sk#21, sr_item_sk#20, sr_ticket_number#22, 5), true, [id=#27]
 
 (39) Sort [codegen id : 11]
-Input [4]: [sr_item_sk#23, sr_customer_sk#24, sr_ticket_number#25, sr_return_quantity#26]
-Arguments: [sr_customer_sk#24 ASC NULLS FIRST, sr_item_sk#23 ASC NULLS FIRST, sr_ticket_number#25 ASC NULLS FIRST], false, 0
+Input [4]: [sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23]
+Arguments: [sr_customer_sk#21 ASC NULLS FIRST, sr_item_sk#20 ASC NULLS FIRST, sr_ticket_number#22 ASC NULLS FIRST], false, 0
 
 (40) SortMergeJoin [codegen id : 12]
 Left keys [3]: [cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)]
-Right keys [3]: [sr_customer_sk#24, sr_item_sk#23, sr_ticket_number#25]
+Right keys [3]: [sr_customer_sk#21, sr_item_sk#20, sr_ticket_number#22]
 Join condition: None
 
 (41) Project [codegen id : 12]
-Output [7]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#23, sr_customer_sk#24, sr_return_quantity#26]
-Input [11]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#23, sr_customer_sk#24, sr_ticket_number#25, sr_return_quantity#26]
+Output [7]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#20, sr_customer_sk#21, sr_return_quantity#23]
+Input [11]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23]
 
 (42) Exchange
-Input [7]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#23, sr_customer_sk#24, sr_return_quantity#26]
-Arguments: hashpartitioning(sr_customer_sk#24, sr_item_sk#23, 5), true, [id=#28]
+Input [7]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#20, sr_customer_sk#21, sr_return_quantity#23]
+Arguments: hashpartitioning(sr_customer_sk#21, sr_item_sk#20, 5), true, [id=#28]
 
 (43) Sort [codegen id : 13]
-Input [7]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#23, sr_customer_sk#24, sr_return_quantity#26]
-Arguments: [sr_customer_sk#24 ASC NULLS FIRST, sr_item_sk#23 ASC NULLS FIRST], false, 0
+Input [7]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#20, sr_customer_sk#21, sr_return_quantity#23]
+Arguments: [sr_customer_sk#21 ASC NULLS FIRST, sr_item_sk#20 ASC NULLS FIRST], false, 0
 
 (44) Scan parquet default.catalog_sales
 Output [4]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32]
@@ -261,7 +261,7 @@ Input [4]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_quanti
 Input [4]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32]
 Condition : ((isnotnull(cs_bill_customer_sk#30) AND isnotnull(cs_item_sk#31)) AND isnotnull(cs_sold_date_sk#29))
 
-(47) ReusedExchange [Reuses operator id: 32]
+(47) ReusedExchange [Reuses operator id: 35]
 Output [1]: [d_date_sk#33]
 
 (48) BroadcastHashJoin [codegen id : 15]
@@ -282,18 +282,18 @@ Input [3]: [cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32]
 Arguments: [cast(cs_bill_customer_sk#30 as bigint) ASC NULLS FIRST, cast(cs_item_sk#31 as bigint) ASC NULLS FIRST], false, 0
 
 (52) SortMergeJoin [codegen id : 17]
-Left keys [2]: [sr_customer_sk#24, sr_item_sk#23]
+Left keys [2]: [sr_customer_sk#21, sr_item_sk#20]
 Right keys [2]: [cast(cs_bill_customer_sk#30 as bigint), cast(cs_item_sk#31 as bigint)]
 Join condition: None
 
 (53) Project [codegen id : 17]
-Output [6]: [ss_quantity#6, sr_return_quantity#26, cs_quantity#32, s_state#11, i_item_id#15, i_item_desc#16]
-Input [10]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#23, sr_customer_sk#24, sr_return_quantity#26, cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32]
+Output [6]: [ss_quantity#6, sr_return_quantity#23, cs_quantity#32, s_state#11, i_item_id#15, i_item_desc#16]
+Input [10]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#20, sr_customer_sk#21, sr_return_quantity#23, cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32]
 
 (54) HashAggregate [codegen id : 17]
-Input [6]: [ss_quantity#6, sr_return_quantity#26, cs_quantity#32, s_state#11, i_item_id#15, i_item_desc#16]
+Input [6]: [ss_quantity#6, sr_return_quantity#23, cs_quantity#32, s_state#11, i_item_id#15, i_item_desc#16]
 Keys [3]: [i_item_id#15, i_item_desc#16, s_state#11]
-Functions [9]: [partial_count(ss_quantity#6), partial_avg(cast(ss_quantity#6 as bigint)), partial_stddev_samp(cast(ss_quantity#6 as double)), partial_count(sr_return_quantity#26), partial_avg(cast(sr_return_quantity#26 as bigint)), partial_stddev_samp(cast(sr_return_quantity#26 as double)), partial_count(cs_quantity#32), partial_avg(cast(cs_quantity#32 as bigint)), partial_stddev_samp(cast(cs_quantity#32 as double))]
+Functions [9]: [partial_count(ss_quantity#6), partial_avg(cast(ss_quantity#6 as bigint)), partial_stddev_samp(cast(ss_quantity#6 as double)), partial_count(sr_return_quantity#23), partial_avg(cast(sr_return_quantity#23 as bigint)), partial_stddev_samp(cast(sr_return_quantity#23 as double)), partial_count(cs_quantity#32), partial_avg(cast(cs_quantity#32 as bigint)), partial_stddev_samp(cast(cs_quantity#32 as double))]
 Aggregate Attributes [18]: [count#35, sum#36, count#37, n#38, avg#39, m2#40, count#41, sum#42, count#43, n#44, avg#45, m2#46, count#47, sum#48, count#49, n#50, avg#51, m2#52]
 Results [21]: [i_item_id#15, i_item_desc#16, s_state#11, count#53, sum#54, count#55, n#56, avg#57, m2#58, count#59, sum#60, count#61, n#62, avg#63, m2#64, count#65, sum#66, count#67, n#68, avg#69, m2#70]
 
@@ -304,9 +304,9 @@ Arguments: hashpartitioning(i_item_id#15, i_item_desc#16, s_state#11, 5), true,
 (56) HashAggregate [codegen id : 18]
 Input [21]: [i_item_id#15, i_item_desc#16, s_state#11, count#53, sum#54, count#55, n#56, avg#57, m2#58, count#59, sum#60, count#61, n#62, avg#63, m2#64, count#65, sum#66, count#67, n#68, avg#69, m2#70]
 Keys [3]: [i_item_id#15, i_item_desc#16, s_state#11]
-Functions [9]: [count(ss_quantity#6), avg(cast(ss_quantity#6 as bigint)), stddev_samp(cast(ss_quantity#6 as double)), count(sr_return_quantity#26), avg(cast(sr_return_quantity#26 as bigint)), stddev_samp(cast(sr_return_quantity#26 as double)), count(cs_quantity#32), avg(cast(cs_quantity#32 as bigint)), stddev_samp(cast(cs_quantity#32 as double))]
-Aggregate Attributes [9]: [count(ss_quantity#6)#72, avg(cast(ss_quantity#6 as bigint))#73, stddev_samp(cast(ss_quantity#6 as double))#74, count(sr_return_quantity#26)#75, avg(cast(sr_return_quantity#26 as bigint))#76, stddev_samp(cast(sr_return_quantity#26 as double))#77, count(cs_quantity#32)#78, avg(cast(cs_quantity#32 as bigint))#79, stddev_samp(cast(cs_quantity#32 as double))#80]
-Results [15]: [i_item_id#15, i_item_desc#16, s_state#11, count(ss_quantity#6)#72 AS store_sales_quantitycount#81, avg(cast(ss_quantity#6 as bigint))#73 AS store_sales_quantityave#82, stddev_samp(cast(ss_quantity#6 as double))#74 AS store_sales_quantitystdev#83, (stddev_samp(cast(ss_quantity#6 as double))#74 / avg(cast(ss_quantity#6 as bigint))#73) AS store_sales_quantitycov#84, count(sr_return_quantity#26)#75 AS as_store_returns_quantitycount#85, avg(cast(sr_return_quantity#26 as bigint))#76 AS as_store_returns_quantityave#86, stddev_samp(cast(sr_return_quantity#26 as double))#77 AS as_store_returns_quantitystdev#87, (stddev_samp(cast(sr_return_quantity#26 as double))#77 / avg(cast(sr_return_quantity#26 as bigint))#76) AS store_returns_quantitycov#88, count(cs_quantity#32)#78 AS catalog_sales_quantitycount#89, avg(cast(cs_quantity#32 as bigint))#79 AS catalog_sales_quantityave#90, (stddev_samp(cast(cs_quantity#32 as double))#80 / avg(cast(cs_quantity#32 as bigint))#79) AS catalog_sales_quantitystdev#91, (stddev_samp(cast(cs_quantity#32 as double))#80 / avg(cast(cs_quantity#32 as bigint))#79) AS catalog_sales_quantitycov#92]
+Functions [9]: [count(ss_quantity#6), avg(cast(ss_quantity#6 as bigint)), stddev_samp(cast(ss_quantity#6 as double)), count(sr_return_quantity#23), avg(cast(sr_return_quantity#23 as bigint)), stddev_samp(cast(sr_return_quantity#23 as double)), count(cs_quantity#32), avg(cast(cs_quantity#32 as bigint)), stddev_samp(cast(cs_quantity#32 as double))]
+Aggregate Attributes [9]: [count(ss_quantity#6)#72, avg(cast(ss_quantity#6 as bigint))#73, stddev_samp(cast(ss_quantity#6 as double))#74, count(sr_return_quantity#23)#75, avg(cast(sr_return_quantity#23 as bigint))#76, stddev_samp(cast(sr_return_quantity#23 as double))#77, count(cs_quantity#32)#78, avg(cast(cs_quantity#32 as bigint))#79, stddev_samp(cast(cs_quantity#32 as double))#80]
+Results [15]: [i_item_id#15, i_item_desc#16, s_state#11, count(ss_quantity#6)#72 AS store_sales_quantitycount#81, avg(cast(ss_quantity#6 as bigint))#73 AS store_sales_quantityave#82, stddev_samp(cast(ss_quantity#6 as double))#74 AS store_sales_quantitystdev#83, (stddev_samp(cast(ss_quantity#6 as double))#74 / avg(cast(ss_quantity#6 as bigint))#73) AS store_sales_quantitycov#84, count(sr_return_quantity#23)#75 AS as_store_returns_quantitycount#85, avg(cast(sr_return_quantity#23 as bigint))#76 AS as_store_returns_quantityave#86, stddev_samp(cast(sr_return_quantity#23 as double))#77 AS as_store_returns_quantitystdev#87, (stddev_samp(cast(sr_return_quantity#23 as double))#77 / avg(cast(sr_return_quantity#23 as bigint))#76) AS store_returns_quantitycov#88, count(cs_quantity#32)#78 AS catalog_sales_quantitycount#89, avg(cast(cs_quantity#32 as bigint))#79 AS catalog_sales_quantityave#90, (stddev_samp(cast(cs_quantity#32 as double))#80 / avg(cast(cs_quantity#32 as bigint))#79) AS catalog_sales_quantitystdev#91, (stddev_samp(cast(cs_quantity#32 as double))#80 / avg(cast(cs_quantity#32 as bigint))#79) AS catalog_sales_quantitycov#92]
 
 (57) TakeOrderedAndProject
 Input [15]: [i_item_id#15, i_item_desc#16, s_state#11, store_sales_quantitycount#81, store_sales_quantityave#82, store_sales_quantitystdev#83, store_sales_quantitycov#84, as_store_returns_quantitycount#85, as_store_returns_quantityave#86, as_store_returns_quantitystdev#87, store_returns_quantitycov#88, catalog_sales_quantitycount#89, catalog_sales_quantityave#90, catalog_sales_quantitystdev#91, catalog_sales_quantitycov#92]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/simplified.txt
index 216adf3588eca..bfb59441f483b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/simplified.txt
@@ -69,7 +69,11 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,s
                                           Exchange [sr_customer_sk,sr_item_sk,sr_ticket_number] #8
                                             WholeStageCodegen (10)
                                               Project [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity]
-                                                BroadcastHashJoin [d_date_sk,sr_returned_date_sk]
+                                                BroadcastHashJoin [sr_returned_date_sk,d_date_sk]
+                                                  Filter [sr_customer_sk,sr_item_sk,sr_ticket_number,sr_returned_date_sk]
+                                                    ColumnarToRow
+                                                      InputAdapter
+                                                        Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity]
                                                   InputAdapter
                                                     BroadcastExchange #9
                                                       WholeStageCodegen (9)
@@ -78,10 +82,6 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,s
                                                             ColumnarToRow
                                                               InputAdapter
                                                                 Scan parquet default.date_dim [d_date_sk,d_quarter_name]
-                                                  Filter [sr_customer_sk,sr_item_sk,sr_ticket_number,sr_returned_date_sk]
-                                                    ColumnarToRow
-                                                      InputAdapter
-                                                        Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity]
                   InputAdapter
                     WholeStageCodegen (16)
                       Sort [cs_bill_customer_sk,cs_item_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/explain.txt
index 0fbe0ccef6d13..88b5168f6049c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/explain.txt
@@ -12,15 +12,15 @@ TakeOrderedAndProject (45)
                :           :- * Project (17)
                :           :  +- * BroadcastHashJoin Inner BuildRight (16)
                :           :     :- * Project (10)
-               :           :     :  +- * BroadcastHashJoin Inner BuildLeft (9)
-               :           :     :     :- BroadcastExchange (5)
-               :           :     :     :  +- * Project (4)
-               :           :     :     :     +- * Filter (3)
-               :           :     :     :        +- * ColumnarToRow (2)
-               :           :     :     :           +- Scan parquet default.item (1)
-               :           :     :     +- * Filter (8)
-               :           :     :        +- * ColumnarToRow (7)
-               :           :     :           +- Scan parquet default.store_sales (6)
+               :           :     :  +- * BroadcastHashJoin Inner BuildRight (9)
+               :           :     :     :- * Filter (3)
+               :           :     :     :  +- * ColumnarToRow (2)
+               :           :     :     :     +- Scan parquet default.store_sales (1)
+               :           :     :     +- BroadcastExchange (8)
+               :           :     :        +- * Project (7)
+               :           :     :           +- * Filter (6)
+               :           :     :              +- * ColumnarToRow (5)
+               :           :     :                 +- Scan parquet default.item (4)
                :           :     +- BroadcastExchange (15)
                :           :        +- * Project (14)
                :           :           +- * Filter (13)
@@ -38,58 +38,58 @@ TakeOrderedAndProject (45)
                            :  +- Exchange (29)
                            :     +- * Filter (28)
                            :        +- * ColumnarToRow (27)
-                           :           +- Scan parquet default.customer_address (26)
+                           :           +- Scan parquet default.customer (26)
                            +- * Sort (35)
                               +- Exchange (34)
                                  +- * Filter (33)
                                     +- * ColumnarToRow (32)
-                                       +- Scan parquet default.customer (31)
+                                       +- Scan parquet default.customer_address (31)
 
 
-(1) Scan parquet default.item
-Output [6]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, i_manager_id#6]
+(1) Scan parquet default.store_sales
+Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/item]
-PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,8), IsNotNull(i_item_sk)]
-ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_brand:string,i_manufact_id:int,i_manufact:string,i_manager_id:int>
+Location [not included in comparison]/{warehouse_dir}/store_sales]
+PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store_sk)]
+ReadSchema: struct<ss_sold_date_sk:int,ss_item_sk:int,ss_customer_sk:int,ss_store_sk:int,ss_ext_sales_price:decimal(7,2)>
 
-(2) ColumnarToRow [codegen id : 1]
-Input [6]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, i_manager_id#6]
+(2) ColumnarToRow [codegen id : 4]
+Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5]
 
-(3) Filter [codegen id : 1]
-Input [6]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, i_manager_id#6]
-Condition : ((isnotnull(i_manager_id#6) AND (i_manager_id#6 = 8)) AND isnotnull(i_item_sk#1))
+(3) Filter [codegen id : 4]
+Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5]
+Condition : (((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_store_sk#4))
 
-(4) Project [codegen id : 1]
-Output [5]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5]
-Input [6]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, i_manager_id#6]
+(4) Scan parquet default.item
+Output [6]: [i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, i_manager_id#11]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/item]
+PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,8), IsNotNull(i_item_sk)]
+ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_brand:string,i_manufact_id:int,i_manufact:string,i_manager_id:int>
 
-(5) BroadcastExchange
-Input [5]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7]
+(5) ColumnarToRow [codegen id : 1]
+Input [6]: [i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, i_manager_id#11]
 
-(6) Scan parquet default.store_sales
-Output [5]: [ss_sold_date_sk#8, ss_item_sk#9, ss_customer_sk#10, ss_store_sk#11, ss_ext_sales_price#12]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/store_sales]
-PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store_sk)]
-ReadSchema: struct<ss_sold_date_sk:int,ss_item_sk:int,ss_customer_sk:int,ss_store_sk:int,ss_ext_sales_price:decimal(7,2)>
+(6) Filter [codegen id : 1]
+Input [6]: [i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, i_manager_id#11]
+Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 8)) AND isnotnull(i_item_sk#6))
 
-(7) ColumnarToRow
-Input [5]: [ss_sold_date_sk#8, ss_item_sk#9, ss_customer_sk#10, ss_store_sk#11, ss_ext_sales_price#12]
+(7) Project [codegen id : 1]
+Output [5]: [i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10]
+Input [6]: [i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, i_manager_id#11]
 
-(8) Filter
-Input [5]: [ss_sold_date_sk#8, ss_item_sk#9, ss_customer_sk#10, ss_store_sk#11, ss_ext_sales_price#12]
-Condition : (((isnotnull(ss_sold_date_sk#8) AND isnotnull(ss_item_sk#9)) AND isnotnull(ss_customer_sk#10)) AND isnotnull(ss_store_sk#11))
+(8) BroadcastExchange
+Input [5]: [i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12]
 
 (9) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [i_item_sk#1]
-Right keys [1]: [ss_item_sk#9]
+Left keys [1]: [ss_item_sk#2]
+Right keys [1]: [i_item_sk#6]
 Join condition: None
 
 (10) Project [codegen id : 4]
-Output [8]: [i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, ss_sold_date_sk#8, ss_customer_sk#10, ss_store_sk#11, ss_ext_sales_price#12]
-Input [10]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, ss_sold_date_sk#8, ss_item_sk#9, ss_customer_sk#10, ss_store_sk#11, ss_ext_sales_price#12]
+Output [8]: [ss_sold_date_sk#1, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10]
+Input [10]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5, i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10]
 
 (11) Scan parquet default.date_dim
 Output [3]: [d_date_sk#13, d_year#14, d_moy#15]
@@ -114,13 +114,13 @@ Input [1]: [d_date_sk#13]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16]
 
 (16) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [ss_sold_date_sk#8]
+Left keys [1]: [ss_sold_date_sk#1]
 Right keys [1]: [d_date_sk#13]
 Join condition: None
 
 (17) Project [codegen id : 4]
-Output [7]: [i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, ss_customer_sk#10, ss_store_sk#11, ss_ext_sales_price#12]
-Input [9]: [i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, ss_sold_date_sk#8, ss_customer_sk#10, ss_store_sk#11, ss_ext_sales_price#12, d_date_sk#13]
+Output [7]: [ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10]
+Input [9]: [ss_sold_date_sk#1, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, d_date_sk#13]
 
 (18) Scan parquet default.store
 Output [2]: [s_store_sk#17, s_zip#18]
@@ -141,111 +141,111 @@ Input [2]: [s_store_sk#17, s_zip#18]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19]
 
 (22) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [ss_store_sk#11]
+Left keys [1]: [ss_store_sk#4]
 Right keys [1]: [s_store_sk#17]
 Join condition: None
 
 (23) Project [codegen id : 4]
-Output [7]: [i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, ss_customer_sk#10, ss_ext_sales_price#12, s_zip#18]
-Input [9]: [i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, ss_customer_sk#10, ss_store_sk#11, ss_ext_sales_price#12, s_store_sk#17, s_zip#18]
+Output [7]: [ss_customer_sk#3, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, s_zip#18]
+Input [9]: [ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, s_store_sk#17, s_zip#18]
 
 (24) Exchange
-Input [7]: [i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, ss_customer_sk#10, ss_ext_sales_price#12, s_zip#18]
-Arguments: hashpartitioning(ss_customer_sk#10, 5), true, [id=#20]
+Input [7]: [ss_customer_sk#3, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, s_zip#18]
+Arguments: hashpartitioning(ss_customer_sk#3, 5), true, [id=#20]
 
 (25) Sort [codegen id : 5]
-Input [7]: [i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, ss_customer_sk#10, ss_ext_sales_price#12, s_zip#18]
-Arguments: [ss_customer_sk#10 ASC NULLS FIRST], false, 0
+Input [7]: [ss_customer_sk#3, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, s_zip#18]
+Arguments: [ss_customer_sk#3 ASC NULLS FIRST], false, 0
 
-(26) Scan parquet default.customer_address
-Output [2]: [ca_address_sk#21, ca_zip#22]
+(26) Scan parquet default.customer
+Output [2]: [c_customer_sk#21, c_current_addr_sk#22]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer_address]
-PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip)]
-ReadSchema: struct<ca_address_sk:int,ca_zip:string>
+Location [not included in comparison]/{warehouse_dir}/customer]
+PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)]
+ReadSchema: struct<c_customer_sk:int,c_current_addr_sk:int>
 
 (27) ColumnarToRow [codegen id : 6]
-Input [2]: [ca_address_sk#21, ca_zip#22]
+Input [2]: [c_customer_sk#21, c_current_addr_sk#22]
 
 (28) Filter [codegen id : 6]
-Input [2]: [ca_address_sk#21, ca_zip#22]
-Condition : (isnotnull(ca_address_sk#21) AND isnotnull(ca_zip#22))
+Input [2]: [c_customer_sk#21, c_current_addr_sk#22]
+Condition : (isnotnull(c_customer_sk#21) AND isnotnull(c_current_addr_sk#22))
 
 (29) Exchange
-Input [2]: [ca_address_sk#21, ca_zip#22]
-Arguments: hashpartitioning(ca_address_sk#21, 5), true, [id=#23]
+Input [2]: [c_customer_sk#21, c_current_addr_sk#22]
+Arguments: hashpartitioning(c_current_addr_sk#22, 5), true, [id=#23]
 
 (30) Sort [codegen id : 7]
-Input [2]: [ca_address_sk#21, ca_zip#22]
-Arguments: [ca_address_sk#21 ASC NULLS FIRST], false, 0
+Input [2]: [c_customer_sk#21, c_current_addr_sk#22]
+Arguments: [c_current_addr_sk#22 ASC NULLS FIRST], false, 0
 
-(31) Scan parquet default.customer
-Output [2]: [c_customer_sk#24, c_current_addr_sk#25]
+(31) Scan parquet default.customer_address
+Output [2]: [ca_address_sk#24, ca_zip#25]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer]
-PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)]
-ReadSchema: struct<c_customer_sk:int,c_current_addr_sk:int>
+Location [not included in comparison]/{warehouse_dir}/customer_address]
+PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip)]
+ReadSchema: struct<ca_address_sk:int,ca_zip:string>
 
 (32) ColumnarToRow [codegen id : 8]
-Input [2]: [c_customer_sk#24, c_current_addr_sk#25]
+Input [2]: [ca_address_sk#24, ca_zip#25]
 
 (33) Filter [codegen id : 8]
-Input [2]: [c_customer_sk#24, c_current_addr_sk#25]
-Condition : (isnotnull(c_customer_sk#24) AND isnotnull(c_current_addr_sk#25))
+Input [2]: [ca_address_sk#24, ca_zip#25]
+Condition : (isnotnull(ca_address_sk#24) AND isnotnull(ca_zip#25))
 
 (34) Exchange
-Input [2]: [c_customer_sk#24, c_current_addr_sk#25]
-Arguments: hashpartitioning(c_current_addr_sk#25, 5), true, [id=#26]
+Input [2]: [ca_address_sk#24, ca_zip#25]
+Arguments: hashpartitioning(ca_address_sk#24, 5), true, [id=#26]
 
 (35) Sort [codegen id : 9]
-Input [2]: [c_customer_sk#24, c_current_addr_sk#25]
-Arguments: [c_current_addr_sk#25 ASC NULLS FIRST], false, 0
+Input [2]: [ca_address_sk#24, ca_zip#25]
+Arguments: [ca_address_sk#24 ASC NULLS FIRST], false, 0
 
 (36) SortMergeJoin [codegen id : 10]
-Left keys [1]: [ca_address_sk#21]
-Right keys [1]: [c_current_addr_sk#25]
+Left keys [1]: [c_current_addr_sk#22]
+Right keys [1]: [ca_address_sk#24]
 Join condition: None
 
 (37) Project [codegen id : 10]
-Output [2]: [ca_zip#22, c_customer_sk#24]
-Input [4]: [ca_address_sk#21, ca_zip#22, c_customer_sk#24, c_current_addr_sk#25]
+Output [2]: [c_customer_sk#21, ca_zip#25]
+Input [4]: [c_customer_sk#21, c_current_addr_sk#22, ca_address_sk#24, ca_zip#25]
 
 (38) Exchange
-Input [2]: [ca_zip#22, c_customer_sk#24]
-Arguments: hashpartitioning(c_customer_sk#24, 5), true, [id=#27]
+Input [2]: [c_customer_sk#21, ca_zip#25]
+Arguments: hashpartitioning(c_customer_sk#21, 5), true, [id=#27]
 
 (39) Sort [codegen id : 11]
-Input [2]: [ca_zip#22, c_customer_sk#24]
-Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0
+Input [2]: [c_customer_sk#21, ca_zip#25]
+Arguments: [c_customer_sk#21 ASC NULLS FIRST], false, 0
 
 (40) SortMergeJoin [codegen id : 12]
-Left keys [1]: [ss_customer_sk#10]
-Right keys [1]: [c_customer_sk#24]
-Join condition: NOT (substr(ca_zip#22, 1, 5) = substr(s_zip#18, 1, 5))
+Left keys [1]: [ss_customer_sk#3]
+Right keys [1]: [c_customer_sk#21]
+Join condition: NOT (substr(ca_zip#25, 1, 5) = substr(s_zip#18, 1, 5))
 
 (41) Project [codegen id : 12]
-Output [5]: [ss_ext_sales_price#12, i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5]
-Input [9]: [i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5, ss_customer_sk#10, ss_ext_sales_price#12, s_zip#18, ca_zip#22, c_customer_sk#24]
+Output [5]: [ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10]
+Input [9]: [ss_customer_sk#3, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, s_zip#18, c_customer_sk#21, ca_zip#25]
 
 (42) HashAggregate [codegen id : 12]
-Input [5]: [ss_ext_sales_price#12, i_brand_id#2, i_brand#3, i_manufact_id#4, i_manufact#5]
-Keys [4]: [i_brand#3, i_brand_id#2, i_manufact_id#4, i_manufact#5]
-Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#12))]
+Input [5]: [ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10]
+Keys [4]: [i_brand#8, i_brand_id#7, i_manufact_id#9, i_manufact#10]
+Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))]
 Aggregate Attributes [1]: [sum#28]
-Results [5]: [i_brand#3, i_brand_id#2, i_manufact_id#4, i_manufact#5, sum#29]
+Results [5]: [i_brand#8, i_brand_id#7, i_manufact_id#9, i_manufact#10, sum#29]
 
 (43) Exchange
-Input [5]: [i_brand#3, i_brand_id#2, i_manufact_id#4, i_manufact#5, sum#29]
-Arguments: hashpartitioning(i_brand#3, i_brand_id#2, i_manufact_id#4, i_manufact#5, 5), true, [id=#30]
+Input [5]: [i_brand#8, i_brand_id#7, i_manufact_id#9, i_manufact#10, sum#29]
+Arguments: hashpartitioning(i_brand#8, i_brand_id#7, i_manufact_id#9, i_manufact#10, 5), true, [id=#30]
 
 (44) HashAggregate [codegen id : 13]
-Input [5]: [i_brand#3, i_brand_id#2, i_manufact_id#4, i_manufact#5, sum#29]
-Keys [4]: [i_brand#3, i_brand_id#2, i_manufact_id#4, i_manufact#5]
-Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#12))]
-Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#12))#31]
-Results [5]: [i_brand_id#2 AS brand_id#32, i_brand#3 AS brand#33, i_manufact_id#4, i_manufact#5, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#12))#31,17,2) AS ext_price#34]
+Input [5]: [i_brand#8, i_brand_id#7, i_manufact_id#9, i_manufact#10, sum#29]
+Keys [4]: [i_brand#8, i_brand_id#7, i_manufact_id#9, i_manufact#10]
+Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))]
+Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#31]
+Results [5]: [i_brand_id#7 AS brand_id#32, i_brand#8 AS brand#33, i_manufact_id#9, i_manufact#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#31,17,2) AS ext_price#34]
 
 (45) TakeOrderedAndProject
-Input [5]: [brand_id#32, brand#33, i_manufact_id#4, i_manufact#5, ext_price#34]
-Arguments: 100, [ext_price#34 DESC NULLS LAST, brand#33 ASC NULLS FIRST, brand_id#32 ASC NULLS FIRST, i_manufact_id#4 ASC NULLS FIRST, i_manufact#5 ASC NULLS FIRST], [brand_id#32, brand#33, i_manufact_id#4, i_manufact#5, ext_price#34]
+Input [5]: [brand_id#32, brand#33, i_manufact_id#9, i_manufact#10, ext_price#34]
+Arguments: 100, [ext_price#34 DESC NULLS LAST, brand#33 ASC NULLS FIRST, brand_id#32 ASC NULLS FIRST, i_manufact_id#9 ASC NULLS FIRST, i_manufact#10 ASC NULLS FIRST], [brand_id#32, brand#33, i_manufact_id#9, i_manufact#10, ext_price#34]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/simplified.txt
index c8737d8a70782..05fa3f82e27df 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/simplified.txt
@@ -13,12 +13,16 @@ TakeOrderedAndProject [ext_price,brand,brand_id,i_manufact_id,i_manufact]
                         InputAdapter
                           Exchange [ss_customer_sk] #2
                             WholeStageCodegen (4)
-                              Project [i_brand_id,i_brand,i_manufact_id,i_manufact,ss_customer_sk,ss_ext_sales_price,s_zip]
+                              Project [ss_customer_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact,s_zip]
                                 BroadcastHashJoin [ss_store_sk,s_store_sk]
-                                  Project [i_brand_id,i_brand,i_manufact_id,i_manufact,ss_customer_sk,ss_store_sk,ss_ext_sales_price]
+                                  Project [ss_customer_sk,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact]
                                     BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                      Project [i_brand_id,i_brand,i_manufact_id,i_manufact,ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price]
-                                        BroadcastHashJoin [i_item_sk,ss_item_sk]
+                                      Project [ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact]
+                                        BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                          Filter [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk]
+                                            ColumnarToRow
+                                              InputAdapter
+                                                Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price]
                                           InputAdapter
                                             BroadcastExchange #3
                                               WholeStageCodegen (1)
@@ -27,10 +31,6 @@ TakeOrderedAndProject [ext_price,brand,brand_id,i_manufact_id,i_manufact]
                                                     ColumnarToRow
                                                       InputAdapter
                                                         Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact,i_manager_id]
-                                          Filter [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk]
-                                            ColumnarToRow
-                                              InputAdapter
-                                                Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price]
                                       InputAdapter
                                         BroadcastExchange #4
                                           WholeStageCodegen (2)
@@ -52,25 +52,25 @@ TakeOrderedAndProject [ext_price,brand,brand_id,i_manufact_id,i_manufact]
                         InputAdapter
                           Exchange [c_customer_sk] #6
                             WholeStageCodegen (10)
-                              Project [ca_zip,c_customer_sk]
-                                SortMergeJoin [ca_address_sk,c_current_addr_sk]
+                              Project [c_customer_sk,ca_zip]
+                                SortMergeJoin [c_current_addr_sk,ca_address_sk]
                                   InputAdapter
                                     WholeStageCodegen (7)
-                                      Sort [ca_address_sk]
+                                      Sort [c_current_addr_sk]
                                         InputAdapter
-                                          Exchange [ca_address_sk] #7
+                                          Exchange [c_current_addr_sk] #7
                                             WholeStageCodegen (6)
-                                              Filter [ca_address_sk,ca_zip]
+                                              Filter [c_customer_sk,c_current_addr_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.customer_address [ca_address_sk,ca_zip]
+                                                    Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
                                   InputAdapter
                                     WholeStageCodegen (9)
-                                      Sort [c_current_addr_sk]
+                                      Sort [ca_address_sk]
                                         InputAdapter
-                                          Exchange [c_current_addr_sk] #8
+                                          Exchange [ca_address_sk] #8
                                             WholeStageCodegen (8)
-                                              Filter [c_customer_sk,c_current_addr_sk]
+                                              Filter [ca_address_sk,ca_zip]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
+                                                    Scan parquet default.customer_address [ca_address_sk,ca_zip]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt
index 6e0a5ced1992a..ffcf6bd4f6d47 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt
@@ -296,15 +296,15 @@ Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery scalar-subquer
                      :           :           :           :- * Sort (60)
                      :           :           :           :  +- Exchange (59)
                      :           :           :           :     +- * Project (58)
-                     :           :           :           :        +- * BroadcastHashJoin Inner BuildLeft (57)
-                     :           :           :           :           :- BroadcastExchange (53)
-                     :           :           :           :           :  +- * Project (52)
-                     :           :           :           :           :     +- * Filter (51)
-                     :           :           :           :           :        +- * ColumnarToRow (50)
-                     :           :           :           :           :           +- Scan parquet default.store (49)
-                     :           :           :           :           +- * Filter (56)
-                     :           :           :           :              +- * ColumnarToRow (55)
-                     :           :           :           :                 +- Scan parquet default.store_sales (54)
+                     :           :           :           :        +- * BroadcastHashJoin Inner BuildRight (57)
+                     :           :           :           :           :- * Filter (51)
+                     :           :           :           :           :  +- * ColumnarToRow (50)
+                     :           :           :           :           :     +- Scan parquet default.store_sales (49)
+                     :           :           :           :           +- BroadcastExchange (56)
+                     :           :           :           :              +- * Project (55)
+                     :           :           :           :                 +- * Filter (54)
+                     :           :           :           :                    +- * ColumnarToRow (53)
+                     :           :           :           :                       +- Scan parquet default.store (52)
                      :           :           :           +- * Sort (65)
                      :           :           :              +- Exchange (64)
                      :           :           :                 +- * Filter (63)
@@ -327,57 +327,57 @@ Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery scalar-subquer
                                  +- Scan parquet default.store_returns (88)
 
 
-(49) Scan parquet default.store
+(49) Scan parquet default.store_sales
+Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/store_sales]
+PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)]
+ReadSchema: struct<ss_item_sk:int,ss_customer_sk:int,ss_store_sk:int,ss_ticket_number:int,ss_net_paid:decimal(7,2)>
+
+(50) ColumnarToRow [codegen id : 2]
+Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5]
+
+(51) Filter [codegen id : 2]
+Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5]
+Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2))
+
+(52) Scan parquet default.store
 Output [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
 PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)]
 ReadSchema: struct<s_store_sk:int,s_store_name:string,s_market_id:int,s_state:string,s_zip:string>
 
-(50) ColumnarToRow [codegen id : 1]
+(53) ColumnarToRow [codegen id : 1]
 Input [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23]
 
-(51) Filter [codegen id : 1]
+(54) Filter [codegen id : 1]
 Input [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23]
 Condition : (((isnotnull(s_market_id#21) AND (s_market_id#21 = 8)) AND isnotnull(s_store_sk#19)) AND isnotnull(s_zip#23))
 
-(52) Project [codegen id : 1]
+(55) Project [codegen id : 1]
 Output [4]: [s_store_sk#19, s_store_name#20, s_state#22, s_zip#23]
 Input [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23]
 
-(53) BroadcastExchange
+(56) BroadcastExchange
 Input [4]: [s_store_sk#19, s_store_name#20, s_state#22, s_zip#23]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#48]
 
-(54) Scan parquet default.store_sales
-Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/store_sales]
-PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)]
-ReadSchema: struct<ss_item_sk:int,ss_customer_sk:int,ss_store_sk:int,ss_ticket_number:int,ss_net_paid:decimal(7,2)>
-
-(55) ColumnarToRow
-Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5]
-
-(56) Filter
-Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5]
-Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2))
-
 (57) BroadcastHashJoin [codegen id : 2]
-Left keys [1]: [s_store_sk#19]
-Right keys [1]: [ss_store_sk#3]
+Left keys [1]: [ss_store_sk#3]
+Right keys [1]: [s_store_sk#19]
 Join condition: None
 
 (58) Project [codegen id : 2]
-Output [7]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5]
-Input [9]: [s_store_sk#19, s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5]
+Output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23]
+Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, s_store_sk#19, s_store_name#20, s_state#22, s_zip#23]
 
 (59) Exchange
-Input [7]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5]
+Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23]
 Arguments: hashpartitioning(ss_item_sk#1, 5), true, [id=#49]
 
 (60) Sort [codegen id : 3]
-Input [7]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5]
+Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23]
 Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0
 
 (61) Scan parquet default.item
@@ -408,15 +408,15 @@ Right keys [1]: [i_item_sk#6]
 Join condition: None
 
 (67) Project [codegen id : 6]
-Output [12]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
-Input [13]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
+Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
+Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
 
 (68) Exchange
-Input [12]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
+Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
 Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#51]
 
 (69) Sort [codegen id : 7]
-Input [12]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
+Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
 Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0
 
 (70) Scan parquet default.customer
@@ -447,15 +447,15 @@ Right keys [1]: [c_customer_sk#14]
 Join condition: None
 
 (76) Project [codegen id : 10]
-Output [14]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17]
-Input [16]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
+Output [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17]
+Input [16]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
 
 (77) Exchange
-Input [14]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17]
+Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17]
 Arguments: hashpartitioning(c_birth_country#17, s_zip#23, 5), true, [id=#53]
 
 (78) Sort [codegen id : 11]
-Input [14]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17]
+Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17]
 Arguments: [c_birth_country#17 ASC NULLS FIRST, s_zip#23 ASC NULLS FIRST], false, 0
 
 (79) Scan parquet default.customer_address
@@ -486,15 +486,15 @@ Right keys [2]: [upper(ca_country#27), ca_zip#26]
 Join condition: None
 
 (85) Project [codegen id : 14]
-Output [13]: [s_store_name#20, s_state#22, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25]
-Input [17]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17, ca_state#25, ca_zip#26, ca_country#27]
+Output [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25]
+Input [17]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17, ca_state#25, ca_zip#26, ca_country#27]
 
 (86) Exchange
-Input [13]: [s_store_name#20, s_state#22, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25]
+Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25]
 Arguments: hashpartitioning(cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint), 5), true, [id=#55]
 
 (87) Sort [codegen id : 15]
-Input [13]: [s_store_name#20, s_state#22, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25]
+Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25]
 Arguments: [cast(ss_ticket_number#4 as bigint) ASC NULLS FIRST, cast(ss_item_sk#1 as bigint) ASC NULLS FIRST], false, 0
 
 (88) Scan parquet default.store_returns
@@ -526,7 +526,7 @@ Join condition: None
 
 (94) Project [codegen id : 18]
 Output [11]: [ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25]
-Input [15]: [s_store_name#20, s_state#22, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25, sr_item_sk#30, sr_ticket_number#31]
+Input [15]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25, sr_item_sk#30, sr_ticket_number#31]
 
 (95) HashAggregate [codegen id : 18]
 Input [11]: [ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/simplified.txt
index f51d1972b630f..10f874f8f5543 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/simplified.txt
@@ -21,7 +21,7 @@ WholeStageCodegen (14)
                                         InputAdapter
                                           Exchange [ss_ticket_number,ss_item_sk] #12
                                             WholeStageCodegen (14)
-                                              Project [s_store_name,s_state,ss_item_sk,ss_ticket_number,ss_net_paid,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state]
+                                              Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state]
                                                 SortMergeJoin [c_birth_country,s_zip,ca_country,ca_zip]
                                                   InputAdapter
                                                     WholeStageCodegen (11)
@@ -29,7 +29,7 @@ WholeStageCodegen (14)
                                                         InputAdapter
                                                           Exchange [c_birth_country,s_zip] #13
                                                             WholeStageCodegen (10)
-                                                              Project [s_store_name,s_state,s_zip,ss_item_sk,ss_ticket_number,ss_net_paid,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country]
+                                                              Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country]
                                                                 SortMergeJoin [ss_customer_sk,c_customer_sk]
                                                                   InputAdapter
                                                                     WholeStageCodegen (7)
@@ -37,7 +37,7 @@ WholeStageCodegen (14)
                                                                         InputAdapter
                                                                           Exchange [ss_customer_sk] #14
                                                                             WholeStageCodegen (6)
-                                                                              Project [s_store_name,s_state,s_zip,ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_paid,i_current_price,i_size,i_color,i_units,i_manager_id]
+                                                                              Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id]
                                                                                 SortMergeJoin [ss_item_sk,i_item_sk]
                                                                                   InputAdapter
                                                                                     WholeStageCodegen (3)
@@ -45,8 +45,12 @@ WholeStageCodegen (14)
                                                                                         InputAdapter
                                                                                           Exchange [ss_item_sk] #15
                                                                                             WholeStageCodegen (2)
-                                                                                              Project [s_store_name,s_state,s_zip,ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_paid]
-                                                                                                BroadcastHashJoin [s_store_sk,ss_store_sk]
+                                                                                              Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip]
+                                                                                                BroadcastHashJoin [ss_store_sk,s_store_sk]
+                                                                                                  Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk]
+                                                                                                    ColumnarToRow
+                                                                                                      InputAdapter
+                                                                                                        Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid]
                                                                                                   InputAdapter
                                                                                                     BroadcastExchange #16
                                                                                                       WholeStageCodegen (1)
@@ -55,10 +59,6 @@ WholeStageCodegen (14)
                                                                                                             ColumnarToRow
                                                                                                               InputAdapter
                                                                                                                 Scan parquet default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip]
-                                                                                                  Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk]
-                                                                                                    ColumnarToRow
-                                                                                                      InputAdapter
-                                                                                                        Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid]
                                                                                   InputAdapter
                                                                                     WholeStageCodegen (5)
                                                                                       Sort [i_item_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt
index cbac3787cab6c..73f36e3a9ca23 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt
@@ -296,15 +296,15 @@ Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery scalar-subquer
                      :           :           :           :- * Sort (60)
                      :           :           :           :  +- Exchange (59)
                      :           :           :           :     +- * Project (58)
-                     :           :           :           :        +- * BroadcastHashJoin Inner BuildLeft (57)
-                     :           :           :           :           :- BroadcastExchange (53)
-                     :           :           :           :           :  +- * Project (52)
-                     :           :           :           :           :     +- * Filter (51)
-                     :           :           :           :           :        +- * ColumnarToRow (50)
-                     :           :           :           :           :           +- Scan parquet default.store (49)
-                     :           :           :           :           +- * Filter (56)
-                     :           :           :           :              +- * ColumnarToRow (55)
-                     :           :           :           :                 +- Scan parquet default.store_sales (54)
+                     :           :           :           :        +- * BroadcastHashJoin Inner BuildRight (57)
+                     :           :           :           :           :- * Filter (51)
+                     :           :           :           :           :  +- * ColumnarToRow (50)
+                     :           :           :           :           :     +- Scan parquet default.store_sales (49)
+                     :           :           :           :           +- BroadcastExchange (56)
+                     :           :           :           :              +- * Project (55)
+                     :           :           :           :                 +- * Filter (54)
+                     :           :           :           :                    +- * ColumnarToRow (53)
+                     :           :           :           :                       +- Scan parquet default.store (52)
                      :           :           :           +- * Sort (65)
                      :           :           :              +- Exchange (64)
                      :           :           :                 +- * Filter (63)
@@ -327,57 +327,57 @@ Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery scalar-subquer
                                  +- Scan parquet default.store_returns (88)
 
 
-(49) Scan parquet default.store
+(49) Scan parquet default.store_sales
+Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/store_sales]
+PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)]
+ReadSchema: struct<ss_item_sk:int,ss_customer_sk:int,ss_store_sk:int,ss_ticket_number:int,ss_net_paid:decimal(7,2)>
+
+(50) ColumnarToRow [codegen id : 2]
+Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5]
+
+(51) Filter [codegen id : 2]
+Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5]
+Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2))
+
+(52) Scan parquet default.store
 Output [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
 PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)]
 ReadSchema: struct<s_store_sk:int,s_store_name:string,s_market_id:int,s_state:string,s_zip:string>
 
-(50) ColumnarToRow [codegen id : 1]
+(53) ColumnarToRow [codegen id : 1]
 Input [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23]
 
-(51) Filter [codegen id : 1]
+(54) Filter [codegen id : 1]
 Input [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23]
 Condition : (((isnotnull(s_market_id#21) AND (s_market_id#21 = 8)) AND isnotnull(s_store_sk#19)) AND isnotnull(s_zip#23))
 
-(52) Project [codegen id : 1]
+(55) Project [codegen id : 1]
 Output [4]: [s_store_sk#19, s_store_name#20, s_state#22, s_zip#23]
 Input [5]: [s_store_sk#19, s_store_name#20, s_market_id#21, s_state#22, s_zip#23]
 
-(53) BroadcastExchange
+(56) BroadcastExchange
 Input [4]: [s_store_sk#19, s_store_name#20, s_state#22, s_zip#23]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#48]
 
-(54) Scan parquet default.store_sales
-Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/store_sales]
-PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)]
-ReadSchema: struct<ss_item_sk:int,ss_customer_sk:int,ss_store_sk:int,ss_ticket_number:int,ss_net_paid:decimal(7,2)>
-
-(55) ColumnarToRow
-Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5]
-
-(56) Filter
-Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5]
-Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_customer_sk#2))
-
 (57) BroadcastHashJoin [codegen id : 2]
-Left keys [1]: [s_store_sk#19]
-Right keys [1]: [ss_store_sk#3]
+Left keys [1]: [ss_store_sk#3]
+Right keys [1]: [s_store_sk#19]
 Join condition: None
 
 (58) Project [codegen id : 2]
-Output [7]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5]
-Input [9]: [s_store_sk#19, s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5]
+Output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23]
+Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, s_store_sk#19, s_store_name#20, s_state#22, s_zip#23]
 
 (59) Exchange
-Input [7]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5]
+Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23]
 Arguments: hashpartitioning(ss_item_sk#1, 5), true, [id=#49]
 
 (60) Sort [codegen id : 3]
-Input [7]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5]
+Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23]
 Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0
 
 (61) Scan parquet default.item
@@ -408,15 +408,15 @@ Right keys [1]: [i_item_sk#6]
 Join condition: None
 
 (67) Project [codegen id : 6]
-Output [12]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
-Input [13]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
+Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
+Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
 
 (68) Exchange
-Input [12]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
+Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
 Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#51]
 
 (69) Sort [codegen id : 7]
-Input [12]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
+Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
 Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0
 
 (70) Scan parquet default.customer
@@ -447,15 +447,15 @@ Right keys [1]: [c_customer_sk#14]
 Join condition: None
 
 (76) Project [codegen id : 10]
-Output [14]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17]
-Input [16]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
+Output [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17]
+Input [16]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
 
 (77) Exchange
-Input [14]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17]
+Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17]
 Arguments: hashpartitioning(c_birth_country#17, s_zip#23, 5), true, [id=#53]
 
 (78) Sort [codegen id : 11]
-Input [14]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17]
+Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17]
 Arguments: [c_birth_country#17 ASC NULLS FIRST, s_zip#23 ASC NULLS FIRST], false, 0
 
 (79) Scan parquet default.customer_address
@@ -486,15 +486,15 @@ Right keys [2]: [upper(ca_country#27), ca_zip#26]
 Join condition: None
 
 (85) Project [codegen id : 14]
-Output [13]: [s_store_name#20, s_state#22, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25]
-Input [17]: [s_store_name#20, s_state#22, s_zip#23, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17, ca_state#25, ca_zip#26, ca_country#27]
+Output [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25]
+Input [17]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17, ca_state#25, ca_zip#26, ca_country#27]
 
 (86) Exchange
-Input [13]: [s_store_name#20, s_state#22, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25]
+Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25]
 Arguments: hashpartitioning(cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint), 5), true, [id=#55]
 
 (87) Sort [codegen id : 15]
-Input [13]: [s_store_name#20, s_state#22, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25]
+Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25]
 Arguments: [cast(ss_ticket_number#4 as bigint) ASC NULLS FIRST, cast(ss_item_sk#1 as bigint) ASC NULLS FIRST], false, 0
 
 (88) Scan parquet default.store_returns
@@ -526,7 +526,7 @@ Join condition: None
 
 (94) Project [codegen id : 18]
 Output [11]: [ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25]
-Input [15]: [s_store_name#20, s_state#22, ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25, sr_item_sk#30, sr_ticket_number#31]
+Input [15]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25, sr_item_sk#30, sr_ticket_number#31]
 
 (95) HashAggregate [codegen id : 18]
 Input [11]: [ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/simplified.txt
index f51d1972b630f..10f874f8f5543 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/simplified.txt
@@ -21,7 +21,7 @@ WholeStageCodegen (14)
                                         InputAdapter
                                           Exchange [ss_ticket_number,ss_item_sk] #12
                                             WholeStageCodegen (14)
-                                              Project [s_store_name,s_state,ss_item_sk,ss_ticket_number,ss_net_paid,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state]
+                                              Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state]
                                                 SortMergeJoin [c_birth_country,s_zip,ca_country,ca_zip]
                                                   InputAdapter
                                                     WholeStageCodegen (11)
@@ -29,7 +29,7 @@ WholeStageCodegen (14)
                                                         InputAdapter
                                                           Exchange [c_birth_country,s_zip] #13
                                                             WholeStageCodegen (10)
-                                                              Project [s_store_name,s_state,s_zip,ss_item_sk,ss_ticket_number,ss_net_paid,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country]
+                                                              Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country]
                                                                 SortMergeJoin [ss_customer_sk,c_customer_sk]
                                                                   InputAdapter
                                                                     WholeStageCodegen (7)
@@ -37,7 +37,7 @@ WholeStageCodegen (14)
                                                                         InputAdapter
                                                                           Exchange [ss_customer_sk] #14
                                                                             WholeStageCodegen (6)
-                                                                              Project [s_store_name,s_state,s_zip,ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_paid,i_current_price,i_size,i_color,i_units,i_manager_id]
+                                                                              Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id]
                                                                                 SortMergeJoin [ss_item_sk,i_item_sk]
                                                                                   InputAdapter
                                                                                     WholeStageCodegen (3)
@@ -45,8 +45,12 @@ WholeStageCodegen (14)
                                                                                         InputAdapter
                                                                                           Exchange [ss_item_sk] #15
                                                                                             WholeStageCodegen (2)
-                                                                                              Project [s_store_name,s_state,s_zip,ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_paid]
-                                                                                                BroadcastHashJoin [s_store_sk,ss_store_sk]
+                                                                                              Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip]
+                                                                                                BroadcastHashJoin [ss_store_sk,s_store_sk]
+                                                                                                  Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk]
+                                                                                                    ColumnarToRow
+                                                                                                      InputAdapter
+                                                                                                        Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid]
                                                                                                   InputAdapter
                                                                                                     BroadcastExchange #16
                                                                                                       WholeStageCodegen (1)
@@ -55,10 +59,6 @@ WholeStageCodegen (14)
                                                                                                             ColumnarToRow
                                                                                                               InputAdapter
                                                                                                                 Scan parquet default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip]
-                                                                                                  Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk]
-                                                                                                    ColumnarToRow
-                                                                                                      InputAdapter
-                                                                                                        Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid]
                                                                                   InputAdapter
                                                                                     WholeStageCodegen (5)
                                                                                       Sort [i_item_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt
index 87a72d3bbe777..c6dc3db869003 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt
@@ -39,15 +39,15 @@ TakeOrderedAndProject (57)
                :           +- * Sort (39)
                :              +- Exchange (38)
                :                 +- * Project (37)
-               :                    +- * BroadcastHashJoin Inner BuildLeft (36)
-               :                       :- BroadcastExchange (32)
-               :                       :  +- * Project (31)
-               :                       :     +- * Filter (30)
-               :                       :        +- * ColumnarToRow (29)
-               :                       :           +- Scan parquet default.date_dim (28)
-               :                       +- * Filter (35)
-               :                          +- * ColumnarToRow (34)
-               :                             +- Scan parquet default.store_returns (33)
+               :                    +- * BroadcastHashJoin Inner BuildRight (36)
+               :                       :- * Filter (30)
+               :                       :  +- * ColumnarToRow (29)
+               :                       :     +- Scan parquet default.store_returns (28)
+               :                       +- BroadcastExchange (35)
+               :                          +- * Project (34)
+               :                             +- * Filter (33)
+               :                                +- * ColumnarToRow (32)
+               :                                   +- Scan parquet default.date_dim (31)
                +- * Sort (51)
                   +- Exchange (50)
                      +- * Project (49)
@@ -177,75 +177,75 @@ Arguments: hashpartitioning(cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2
 Input [8]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18]
 Arguments: [cast(ss_customer_sk#3 as bigint) ASC NULLS FIRST, cast(ss_item_sk#2 as bigint) ASC NULLS FIRST, cast(ss_ticket_number#5 as bigint) ASC NULLS FIRST], false, 0
 
-(28) Scan parquet default.date_dim
-Output [3]: [d_date_sk#21, d_year#22, d_moy#23]
+(28) Scan parquet default.store_returns
+Output [5]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), EqualTo(d_year,2001), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
+Location [not included in comparison]/{warehouse_dir}/store_returns]
+PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number), IsNotNull(sr_returned_date_sk)]
+ReadSchema: struct<sr_returned_date_sk:bigint,sr_item_sk:bigint,sr_customer_sk:bigint,sr_ticket_number:bigint,sr_net_loss:decimal(7,2)>
 
-(29) ColumnarToRow [codegen id : 9]
-Input [3]: [d_date_sk#21, d_year#22, d_moy#23]
+(29) ColumnarToRow [codegen id : 10]
+Input [5]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25]
 
-(30) Filter [codegen id : 9]
-Input [3]: [d_date_sk#21, d_year#22, d_moy#23]
-Condition : (((((isnotnull(d_moy#23) AND isnotnull(d_year#22)) AND (d_moy#23 >= 4)) AND (d_moy#23 <= 10)) AND (d_year#22 = 2001)) AND isnotnull(d_date_sk#21))
+(30) Filter [codegen id : 10]
+Input [5]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25]
+Condition : (((isnotnull(sr_customer_sk#23) AND isnotnull(sr_item_sk#22)) AND isnotnull(sr_ticket_number#24)) AND isnotnull(sr_returned_date_sk#21))
 
-(31) Project [codegen id : 9]
-Output [1]: [d_date_sk#21]
-Input [3]: [d_date_sk#21, d_year#22, d_moy#23]
+(31) Scan parquet default.date_dim
+Output [3]: [d_date_sk#26, d_year#27, d_moy#28]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), EqualTo(d_year,2001), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
-(32) BroadcastExchange
-Input [1]: [d_date_sk#21]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#24]
+(32) ColumnarToRow [codegen id : 9]
+Input [3]: [d_date_sk#26, d_year#27, d_moy#28]
 
-(33) Scan parquet default.store_returns
-Output [5]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_net_loss#29]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/store_returns]
-PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number), IsNotNull(sr_returned_date_sk)]
-ReadSchema: struct<sr_returned_date_sk:bigint,sr_item_sk:bigint,sr_customer_sk:bigint,sr_ticket_number:bigint,sr_net_loss:decimal(7,2)>
+(33) Filter [codegen id : 9]
+Input [3]: [d_date_sk#26, d_year#27, d_moy#28]
+Condition : (((((isnotnull(d_moy#28) AND isnotnull(d_year#27)) AND (d_moy#28 >= 4)) AND (d_moy#28 <= 10)) AND (d_year#27 = 2001)) AND isnotnull(d_date_sk#26))
 
-(34) ColumnarToRow
-Input [5]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_net_loss#29]
+(34) Project [codegen id : 9]
+Output [1]: [d_date_sk#26]
+Input [3]: [d_date_sk#26, d_year#27, d_moy#28]
 
-(35) Filter
-Input [5]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_net_loss#29]
-Condition : (((isnotnull(sr_customer_sk#27) AND isnotnull(sr_item_sk#26)) AND isnotnull(sr_ticket_number#28)) AND isnotnull(sr_returned_date_sk#25))
+(35) BroadcastExchange
+Input [1]: [d_date_sk#26]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29]
 
 (36) BroadcastHashJoin [codegen id : 10]
-Left keys [1]: [cast(d_date_sk#21 as bigint)]
-Right keys [1]: [sr_returned_date_sk#25]
+Left keys [1]: [sr_returned_date_sk#21]
+Right keys [1]: [cast(d_date_sk#26 as bigint)]
 Join condition: None
 
 (37) Project [codegen id : 10]
-Output [4]: [sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_net_loss#29]
-Input [6]: [d_date_sk#21, sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_net_loss#29]
+Output [4]: [sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25]
+Input [6]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25, d_date_sk#26]
 
 (38) Exchange
-Input [4]: [sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_net_loss#29]
-Arguments: hashpartitioning(sr_customer_sk#27, sr_item_sk#26, sr_ticket_number#28, 5), true, [id=#30]
+Input [4]: [sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25]
+Arguments: hashpartitioning(sr_customer_sk#23, sr_item_sk#22, sr_ticket_number#24, 5), true, [id=#30]
 
 (39) Sort [codegen id : 11]
-Input [4]: [sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_net_loss#29]
-Arguments: [sr_customer_sk#27 ASC NULLS FIRST, sr_item_sk#26 ASC NULLS FIRST, sr_ticket_number#28 ASC NULLS FIRST], false, 0
+Input [4]: [sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25]
+Arguments: [sr_customer_sk#23 ASC NULLS FIRST, sr_item_sk#22 ASC NULLS FIRST, sr_ticket_number#24 ASC NULLS FIRST], false, 0
 
 (40) SortMergeJoin [codegen id : 12]
 Left keys [3]: [cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)]
-Right keys [3]: [sr_customer_sk#27, sr_item_sk#26, sr_ticket_number#28]
+Right keys [3]: [sr_customer_sk#23, sr_item_sk#22, sr_ticket_number#24]
 Join condition: None
 
 (41) Project [codegen id : 12]
-Output [8]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_net_loss#29]
-Input [12]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_net_loss#29]
+Output [8]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_net_loss#25]
+Input [12]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25]
 
 (42) Exchange
-Input [8]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_net_loss#29]
-Arguments: hashpartitioning(sr_customer_sk#27, sr_item_sk#26, 5), true, [id=#31]
+Input [8]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_net_loss#25]
+Arguments: hashpartitioning(sr_customer_sk#23, sr_item_sk#22, 5), true, [id=#31]
 
 (43) Sort [codegen id : 13]
-Input [8]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_net_loss#29]
-Arguments: [sr_customer_sk#27 ASC NULLS FIRST, sr_item_sk#26 ASC NULLS FIRST], false, 0
+Input [8]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_net_loss#25]
+Arguments: [sr_customer_sk#23 ASC NULLS FIRST, sr_item_sk#22 ASC NULLS FIRST], false, 0
 
 (44) Scan parquet default.catalog_sales
 Output [4]: [cs_sold_date_sk#32, cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35]
@@ -261,7 +261,7 @@ Input [4]: [cs_sold_date_sk#32, cs_bill_customer_sk#33, cs_item_sk#34, cs_net_pr
 Input [4]: [cs_sold_date_sk#32, cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35]
 Condition : ((isnotnull(cs_bill_customer_sk#33) AND isnotnull(cs_item_sk#34)) AND isnotnull(cs_sold_date_sk#32))
 
-(47) ReusedExchange [Reuses operator id: 32]
+(47) ReusedExchange [Reuses operator id: 35]
 Output [1]: [d_date_sk#36]
 
 (48) BroadcastHashJoin [codegen id : 15]
@@ -282,18 +282,18 @@ Input [3]: [cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35]
 Arguments: [cast(cs_bill_customer_sk#33 as bigint) ASC NULLS FIRST, cast(cs_item_sk#34 as bigint) ASC NULLS FIRST], false, 0
 
 (52) SortMergeJoin [codegen id : 17]
-Left keys [2]: [sr_customer_sk#27, sr_item_sk#26]
+Left keys [2]: [sr_customer_sk#23, sr_item_sk#22]
 Right keys [2]: [cast(cs_bill_customer_sk#33 as bigint), cast(cs_item_sk#34 as bigint)]
 Join condition: None
 
 (53) Project [codegen id : 17]
-Output [7]: [ss_net_profit#6, sr_net_loss#29, cs_net_profit#35, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18]
-Input [11]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_net_loss#29, cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35]
+Output [7]: [ss_net_profit#6, sr_net_loss#25, cs_net_profit#35, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18]
+Input [11]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_net_loss#25, cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35]
 
 (54) HashAggregate [codegen id : 17]
-Input [7]: [ss_net_profit#6, sr_net_loss#29, cs_net_profit#35, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18]
+Input [7]: [ss_net_profit#6, sr_net_loss#25, cs_net_profit#35, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18]
 Keys [4]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13]
-Functions [3]: [partial_sum(UnscaledValue(ss_net_profit#6)), partial_sum(UnscaledValue(sr_net_loss#29)), partial_sum(UnscaledValue(cs_net_profit#35))]
+Functions [3]: [partial_sum(UnscaledValue(ss_net_profit#6)), partial_sum(UnscaledValue(sr_net_loss#25)), partial_sum(UnscaledValue(cs_net_profit#35))]
 Aggregate Attributes [3]: [sum#38, sum#39, sum#40]
 Results [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, sum#41, sum#42, sum#43]
 
@@ -304,9 +304,9 @@ Arguments: hashpartitioning(i_item_id#17, i_item_desc#18, s_store_id#12, s_store
 (56) HashAggregate [codegen id : 18]
 Input [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, sum#41, sum#42, sum#43]
 Keys [4]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13]
-Functions [3]: [sum(UnscaledValue(ss_net_profit#6)), sum(UnscaledValue(sr_net_loss#29)), sum(UnscaledValue(cs_net_profit#35))]
-Aggregate Attributes [3]: [sum(UnscaledValue(ss_net_profit#6))#45, sum(UnscaledValue(sr_net_loss#29))#46, sum(UnscaledValue(cs_net_profit#35))#47]
-Results [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, MakeDecimal(sum(UnscaledValue(ss_net_profit#6))#45,17,2) AS store_sales_profit#48, MakeDecimal(sum(UnscaledValue(sr_net_loss#29))#46,17,2) AS store_returns_loss#49, MakeDecimal(sum(UnscaledValue(cs_net_profit#35))#47,17,2) AS catalog_sales_profit#50]
+Functions [3]: [sum(UnscaledValue(ss_net_profit#6)), sum(UnscaledValue(sr_net_loss#25)), sum(UnscaledValue(cs_net_profit#35))]
+Aggregate Attributes [3]: [sum(UnscaledValue(ss_net_profit#6))#45, sum(UnscaledValue(sr_net_loss#25))#46, sum(UnscaledValue(cs_net_profit#35))#47]
+Results [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, MakeDecimal(sum(UnscaledValue(ss_net_profit#6))#45,17,2) AS store_sales_profit#48, MakeDecimal(sum(UnscaledValue(sr_net_loss#25))#46,17,2) AS store_returns_loss#49, MakeDecimal(sum(UnscaledValue(cs_net_profit#35))#47,17,2) AS catalog_sales_profit#50]
 
 (57) TakeOrderedAndProject
 Input [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, store_sales_profit#48, store_returns_loss#49, catalog_sales_profit#50]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/simplified.txt
index 8e61cf9c519fd..ad9fa718ff2bd 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/simplified.txt
@@ -69,7 +69,11 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                           Exchange [sr_customer_sk,sr_item_sk,sr_ticket_number] #8
                                             WholeStageCodegen (10)
                                               Project [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss]
-                                                BroadcastHashJoin [d_date_sk,sr_returned_date_sk]
+                                                BroadcastHashJoin [sr_returned_date_sk,d_date_sk]
+                                                  Filter [sr_customer_sk,sr_item_sk,sr_ticket_number,sr_returned_date_sk]
+                                                    ColumnarToRow
+                                                      InputAdapter
+                                                        Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss]
                                                   InputAdapter
                                                     BroadcastExchange #9
                                                       WholeStageCodegen (9)
@@ -78,10 +82,6 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                                             ColumnarToRow
                                                               InputAdapter
                                                                 Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
-                                                  Filter [sr_customer_sk,sr_item_sk,sr_ticket_number,sr_returned_date_sk]
-                                                    ColumnarToRow
-                                                      InputAdapter
-                                                        Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss]
                   InputAdapter
                     WholeStageCodegen (16)
                       Sort [cs_bill_customer_sk,cs_item_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/explain.txt
index 35e24698c517e..a949b93f3bcb0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/explain.txt
@@ -39,15 +39,15 @@ TakeOrderedAndProject (61)
                :           +- * Sort (39)
                :              +- Exchange (38)
                :                 +- * Project (37)
-               :                    +- * BroadcastHashJoin Inner BuildLeft (36)
-               :                       :- BroadcastExchange (32)
-               :                       :  +- * Project (31)
-               :                       :     +- * Filter (30)
-               :                       :        +- * ColumnarToRow (29)
-               :                       :           +- Scan parquet default.date_dim (28)
-               :                       +- * Filter (35)
-               :                          +- * ColumnarToRow (34)
-               :                             +- Scan parquet default.store_returns (33)
+               :                    +- * BroadcastHashJoin Inner BuildRight (36)
+               :                       :- * Filter (30)
+               :                       :  +- * ColumnarToRow (29)
+               :                       :     +- Scan parquet default.store_returns (28)
+               :                       +- BroadcastExchange (35)
+               :                          +- * Project (34)
+               :                             +- * Filter (33)
+               :                                +- * ColumnarToRow (32)
+               :                                   +- Scan parquet default.date_dim (31)
                +- * Sort (55)
                   +- Exchange (54)
                      +- * Project (53)
@@ -181,75 +181,75 @@ Arguments: hashpartitioning(cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2
 Input [8]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18]
 Arguments: [cast(ss_customer_sk#3 as bigint) ASC NULLS FIRST, cast(ss_item_sk#2 as bigint) ASC NULLS FIRST, cast(ss_ticket_number#5 as bigint) ASC NULLS FIRST], false, 0
 
-(28) Scan parquet default.date_dim
-Output [3]: [d_date_sk#21, d_year#22, d_moy#23]
+(28) Scan parquet default.store_returns
+Output [5]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_return_quantity#25]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,9), LessThanOrEqual(d_moy,12), EqualTo(d_year,1999), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
+Location [not included in comparison]/{warehouse_dir}/store_returns]
+PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number), IsNotNull(sr_returned_date_sk)]
+ReadSchema: struct<sr_returned_date_sk:bigint,sr_item_sk:bigint,sr_customer_sk:bigint,sr_ticket_number:bigint,sr_return_quantity:int>
 
-(29) ColumnarToRow [codegen id : 9]
-Input [3]: [d_date_sk#21, d_year#22, d_moy#23]
+(29) ColumnarToRow [codegen id : 10]
+Input [5]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_return_quantity#25]
 
-(30) Filter [codegen id : 9]
-Input [3]: [d_date_sk#21, d_year#22, d_moy#23]
-Condition : (((((isnotnull(d_moy#23) AND isnotnull(d_year#22)) AND (d_moy#23 >= 9)) AND (d_moy#23 <= 12)) AND (d_year#22 = 1999)) AND isnotnull(d_date_sk#21))
+(30) Filter [codegen id : 10]
+Input [5]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_return_quantity#25]
+Condition : (((isnotnull(sr_customer_sk#23) AND isnotnull(sr_item_sk#22)) AND isnotnull(sr_ticket_number#24)) AND isnotnull(sr_returned_date_sk#21))
 
-(31) Project [codegen id : 9]
-Output [1]: [d_date_sk#21]
-Input [3]: [d_date_sk#21, d_year#22, d_moy#23]
+(31) Scan parquet default.date_dim
+Output [3]: [d_date_sk#26, d_year#27, d_moy#28]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,9), LessThanOrEqual(d_moy,12), EqualTo(d_year,1999), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
-(32) BroadcastExchange
-Input [1]: [d_date_sk#21]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#24]
+(32) ColumnarToRow [codegen id : 9]
+Input [3]: [d_date_sk#26, d_year#27, d_moy#28]
 
-(33) Scan parquet default.store_returns
-Output [5]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_return_quantity#29]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/store_returns]
-PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number), IsNotNull(sr_returned_date_sk)]
-ReadSchema: struct<sr_returned_date_sk:bigint,sr_item_sk:bigint,sr_customer_sk:bigint,sr_ticket_number:bigint,sr_return_quantity:int>
+(33) Filter [codegen id : 9]
+Input [3]: [d_date_sk#26, d_year#27, d_moy#28]
+Condition : (((((isnotnull(d_moy#28) AND isnotnull(d_year#27)) AND (d_moy#28 >= 9)) AND (d_moy#28 <= 12)) AND (d_year#27 = 1999)) AND isnotnull(d_date_sk#26))
 
-(34) ColumnarToRow
-Input [5]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_return_quantity#29]
+(34) Project [codegen id : 9]
+Output [1]: [d_date_sk#26]
+Input [3]: [d_date_sk#26, d_year#27, d_moy#28]
 
-(35) Filter
-Input [5]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_return_quantity#29]
-Condition : (((isnotnull(sr_customer_sk#27) AND isnotnull(sr_item_sk#26)) AND isnotnull(sr_ticket_number#28)) AND isnotnull(sr_returned_date_sk#25))
+(35) BroadcastExchange
+Input [1]: [d_date_sk#26]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29]
 
 (36) BroadcastHashJoin [codegen id : 10]
-Left keys [1]: [cast(d_date_sk#21 as bigint)]
-Right keys [1]: [sr_returned_date_sk#25]
+Left keys [1]: [sr_returned_date_sk#21]
+Right keys [1]: [cast(d_date_sk#26 as bigint)]
 Join condition: None
 
 (37) Project [codegen id : 10]
-Output [4]: [sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_return_quantity#29]
-Input [6]: [d_date_sk#21, sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_return_quantity#29]
+Output [4]: [sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_return_quantity#25]
+Input [6]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_return_quantity#25, d_date_sk#26]
 
 (38) Exchange
-Input [4]: [sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_return_quantity#29]
-Arguments: hashpartitioning(sr_customer_sk#27, sr_item_sk#26, sr_ticket_number#28, 5), true, [id=#30]
+Input [4]: [sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_return_quantity#25]
+Arguments: hashpartitioning(sr_customer_sk#23, sr_item_sk#22, sr_ticket_number#24, 5), true, [id=#30]
 
 (39) Sort [codegen id : 11]
-Input [4]: [sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_return_quantity#29]
-Arguments: [sr_customer_sk#27 ASC NULLS FIRST, sr_item_sk#26 ASC NULLS FIRST, sr_ticket_number#28 ASC NULLS FIRST], false, 0
+Input [4]: [sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_return_quantity#25]
+Arguments: [sr_customer_sk#23 ASC NULLS FIRST, sr_item_sk#22 ASC NULLS FIRST, sr_ticket_number#24 ASC NULLS FIRST], false, 0
 
 (40) SortMergeJoin [codegen id : 12]
 Left keys [3]: [cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)]
-Right keys [3]: [sr_customer_sk#27, sr_item_sk#26, sr_ticket_number#28]
+Right keys [3]: [sr_customer_sk#23, sr_item_sk#22, sr_ticket_number#24]
 Join condition: None
 
 (41) Project [codegen id : 12]
-Output [8]: [ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_return_quantity#29]
-Input [12]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28, sr_return_quantity#29]
+Output [8]: [ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_return_quantity#25]
+Input [12]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_return_quantity#25]
 
 (42) Exchange
-Input [8]: [ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_return_quantity#29]
-Arguments: hashpartitioning(sr_customer_sk#27, sr_item_sk#26, 5), true, [id=#31]
+Input [8]: [ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_return_quantity#25]
+Arguments: hashpartitioning(sr_customer_sk#23, sr_item_sk#22, 5), true, [id=#31]
 
 (43) Sort [codegen id : 13]
-Input [8]: [ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_return_quantity#29]
-Arguments: [sr_customer_sk#27 ASC NULLS FIRST, sr_item_sk#26 ASC NULLS FIRST], false, 0
+Input [8]: [ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_return_quantity#25]
+Arguments: [sr_customer_sk#23 ASC NULLS FIRST, sr_item_sk#22 ASC NULLS FIRST], false, 0
 
 (44) Scan parquet default.catalog_sales
 Output [4]: [cs_sold_date_sk#32, cs_bill_customer_sk#33, cs_item_sk#34, cs_quantity#35]
@@ -305,18 +305,18 @@ Input [3]: [cs_bill_customer_sk#33, cs_item_sk#34, cs_quantity#35]
 Arguments: [cast(cs_bill_customer_sk#33 as bigint) ASC NULLS FIRST, cast(cs_item_sk#34 as bigint) ASC NULLS FIRST], false, 0
 
 (56) SortMergeJoin [codegen id : 17]
-Left keys [2]: [sr_customer_sk#27, sr_item_sk#26]
+Left keys [2]: [sr_customer_sk#23, sr_item_sk#22]
 Right keys [2]: [cast(cs_bill_customer_sk#33 as bigint), cast(cs_item_sk#34 as bigint)]
 Join condition: None
 
 (57) Project [codegen id : 17]
-Output [7]: [ss_quantity#6, sr_return_quantity#29, cs_quantity#35, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18]
-Input [11]: [ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#26, sr_customer_sk#27, sr_return_quantity#29, cs_bill_customer_sk#33, cs_item_sk#34, cs_quantity#35]
+Output [7]: [ss_quantity#6, sr_return_quantity#25, cs_quantity#35, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18]
+Input [11]: [ss_quantity#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_return_quantity#25, cs_bill_customer_sk#33, cs_item_sk#34, cs_quantity#35]
 
 (58) HashAggregate [codegen id : 17]
-Input [7]: [ss_quantity#6, sr_return_quantity#29, cs_quantity#35, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18]
+Input [7]: [ss_quantity#6, sr_return_quantity#25, cs_quantity#35, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18]
 Keys [4]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13]
-Functions [3]: [partial_sum(cast(ss_quantity#6 as bigint)), partial_sum(cast(sr_return_quantity#29 as bigint)), partial_sum(cast(cs_quantity#35 as bigint))]
+Functions [3]: [partial_sum(cast(ss_quantity#6 as bigint)), partial_sum(cast(sr_return_quantity#25 as bigint)), partial_sum(cast(cs_quantity#35 as bigint))]
 Aggregate Attributes [3]: [sum#40, sum#41, sum#42]
 Results [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, sum#43, sum#44, sum#45]
 
@@ -327,9 +327,9 @@ Arguments: hashpartitioning(i_item_id#17, i_item_desc#18, s_store_id#12, s_store
 (60) HashAggregate [codegen id : 18]
 Input [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, sum#43, sum#44, sum#45]
 Keys [4]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13]
-Functions [3]: [sum(cast(ss_quantity#6 as bigint)), sum(cast(sr_return_quantity#29 as bigint)), sum(cast(cs_quantity#35 as bigint))]
-Aggregate Attributes [3]: [sum(cast(ss_quantity#6 as bigint))#47, sum(cast(sr_return_quantity#29 as bigint))#48, sum(cast(cs_quantity#35 as bigint))#49]
-Results [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, sum(cast(ss_quantity#6 as bigint))#47 AS store_sales_quantity#50, sum(cast(sr_return_quantity#29 as bigint))#48 AS store_returns_quantity#51, sum(cast(cs_quantity#35 as bigint))#49 AS catalog_sales_quantity#52]
+Functions [3]: [sum(cast(ss_quantity#6 as bigint)), sum(cast(sr_return_quantity#25 as bigint)), sum(cast(cs_quantity#35 as bigint))]
+Aggregate Attributes [3]: [sum(cast(ss_quantity#6 as bigint))#47, sum(cast(sr_return_quantity#25 as bigint))#48, sum(cast(cs_quantity#35 as bigint))#49]
+Results [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, sum(cast(ss_quantity#6 as bigint))#47 AS store_sales_quantity#50, sum(cast(sr_return_quantity#25 as bigint))#48 AS store_returns_quantity#51, sum(cast(cs_quantity#35 as bigint))#49 AS catalog_sales_quantity#52]
 
 (61) TakeOrderedAndProject
 Input [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, store_sales_quantity#50, store_returns_quantity#51, catalog_sales_quantity#52]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/simplified.txt
index f10b8e245c50e..ea91af9e8f755 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/simplified.txt
@@ -69,7 +69,11 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                           Exchange [sr_customer_sk,sr_item_sk,sr_ticket_number] #8
                                             WholeStageCodegen (10)
                                               Project [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity]
-                                                BroadcastHashJoin [d_date_sk,sr_returned_date_sk]
+                                                BroadcastHashJoin [sr_returned_date_sk,d_date_sk]
+                                                  Filter [sr_customer_sk,sr_item_sk,sr_ticket_number,sr_returned_date_sk]
+                                                    ColumnarToRow
+                                                      InputAdapter
+                                                        Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity]
                                                   InputAdapter
                                                     BroadcastExchange #9
                                                       WholeStageCodegen (9)
@@ -78,10 +82,6 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                                             ColumnarToRow
                                                               InputAdapter
                                                                 Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
-                                                  Filter [sr_customer_sk,sr_item_sk,sr_ticket_number,sr_returned_date_sk]
-                                                    ColumnarToRow
-                                                      InputAdapter
-                                                        Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity]
                   InputAdapter
                     WholeStageCodegen (16)
                       Sort [cs_bill_customer_sk,cs_item_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/explain.txt
index d3b013660ba28..9f123c4044cc8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/explain.txt
@@ -138,7 +138,7 @@ Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#2))
 Output [3]: [d_date_sk#4, d_year#5, d_qoy#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,3), EqualTo(d_year,2000), IsNotNull(d_date_sk)]
+PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2000), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_qoy:int>
 
 (5) ColumnarToRow [codegen id : 1]
@@ -146,7 +146,7 @@ Input [3]: [d_date_sk#4, d_year#5, d_qoy#6]
 
 (6) Filter [codegen id : 1]
 Input [3]: [d_date_sk#4, d_year#5, d_qoy#6]
-Condition : ((((isnotnull(d_qoy#6) AND isnotnull(d_year#5)) AND (d_qoy#6 = 3)) AND (d_year#5 = 2000)) AND isnotnull(d_date_sk#4))
+Condition : ((((isnotnull(d_qoy#6) AND isnotnull(d_year#5)) AND (d_qoy#6 = 2)) AND (d_year#5 = 2000)) AND isnotnull(d_date_sk#4))
 
 (7) BroadcastExchange
 Input [3]: [d_date_sk#4, d_year#5, d_qoy#6]
@@ -236,7 +236,7 @@ Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_addr_sk#2))
 Output [3]: [d_date_sk#17, d_year#18, d_qoy#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2000), IsNotNull(d_date_sk)]
+PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,3), EqualTo(d_year,2000), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_qoy:int>
 
 (26) ColumnarToRow [codegen id : 7]
@@ -244,7 +244,7 @@ Input [3]: [d_date_sk#17, d_year#18, d_qoy#19]
 
 (27) Filter [codegen id : 7]
 Input [3]: [d_date_sk#17, d_year#18, d_qoy#19]
-Condition : ((((isnotnull(d_qoy#19) AND isnotnull(d_year#18)) AND (d_qoy#19 = 2)) AND (d_year#18 = 2000)) AND isnotnull(d_date_sk#17))
+Condition : ((((isnotnull(d_qoy#19) AND isnotnull(d_year#18)) AND (d_qoy#19 = 3)) AND (d_year#18 = 2000)) AND isnotnull(d_date_sk#17))
 
 (28) BroadcastExchange
 Input [3]: [d_date_sk#17, d_year#18, d_qoy#19]
@@ -311,7 +311,7 @@ Right keys [1]: [ca_county#23]
 Join condition: None
 
 (42) Project [codegen id : 42]
-Output [3]: [store_sales#16, ca_county#23, store_sales#28]
+Output [3]: [ca_county#10, store_sales#16, store_sales#28]
 Input [4]: [ca_county#10, store_sales#16, ca_county#23, store_sales#28]
 
 (43) Scan parquet default.store_sales
@@ -402,13 +402,13 @@ Input [3]: [ca_county#36, d_year#31, store_sales#41]
 Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#42]
 
 (62) BroadcastHashJoin [codegen id : 42]
-Left keys [1]: [ca_county#23]
+Left keys [1]: [ca_county#10]
 Right keys [1]: [ca_county#36]
 Join condition: None
 
 (63) Project [codegen id : 42]
 Output [5]: [store_sales#16, store_sales#28, ca_county#36, d_year#31, store_sales#41]
-Input [6]: [store_sales#16, ca_county#23, store_sales#28, ca_county#36, d_year#31, store_sales#41]
+Input [6]: [ca_county#10, store_sales#16, store_sales#28, ca_county#36, d_year#31, store_sales#41]
 
 (64) Scan parquet default.web_sales
 Output [3]: [ws_sold_date_sk#43, ws_bill_addr_sk#44, ws_ext_sales_price#45]
@@ -424,7 +424,7 @@ Input [3]: [ws_sold_date_sk#43, ws_bill_addr_sk#44, ws_ext_sales_price#45]
 Input [3]: [ws_sold_date_sk#43, ws_bill_addr_sk#44, ws_ext_sales_price#45]
 Condition : (isnotnull(ws_sold_date_sk#43) AND isnotnull(ws_bill_addr_sk#44))
 
-(67) ReusedExchange [Reuses operator id: 28]
+(67) ReusedExchange [Reuses operator id: 49]
 Output [3]: [d_date_sk#46, d_year#47, d_qoy#48]
 
 (68) BroadcastHashJoin [codegen id : 22]
@@ -492,7 +492,7 @@ Input [3]: [ws_sold_date_sk#43, ws_bill_addr_sk#44, ws_ext_sales_price#45]
 Input [3]: [ws_sold_date_sk#43, ws_bill_addr_sk#44, ws_ext_sales_price#45]
 Condition : (isnotnull(ws_sold_date_sk#43) AND isnotnull(ws_bill_addr_sk#44))
 
-(82) ReusedExchange [Reuses operator id: 49]
+(82) ReusedExchange [Reuses operator id: 7]
 Output [3]: [d_date_sk#57, d_year#58, d_qoy#59]
 
 (83) BroadcastHashJoin [codegen id : 28]
@@ -556,7 +556,7 @@ Right keys [1]: [ca_county#62]
 Join condition: None
 
 (96) Project [codegen id : 41]
-Output [3]: [web_sales#56, ca_county#62, web_sales#67]
+Output [3]: [ca_county#51, web_sales#56, web_sales#67]
 Input [4]: [ca_county#51, web_sales#56, ca_county#62, web_sales#67]
 
 (97) Scan parquet default.web_sales
@@ -573,7 +573,7 @@ Input [3]: [ws_sold_date_sk#43, ws_bill_addr_sk#44, ws_ext_sales_price#45]
 Input [3]: [ws_sold_date_sk#43, ws_bill_addr_sk#44, ws_ext_sales_price#45]
 Condition : (isnotnull(ws_sold_date_sk#43) AND isnotnull(ws_bill_addr_sk#44))
 
-(100) ReusedExchange [Reuses operator id: 7]
+(100) ReusedExchange [Reuses operator id: 28]
 Output [3]: [d_date_sk#69, d_year#70, d_qoy#71]
 
 (101) BroadcastHashJoin [codegen id : 35]
@@ -632,26 +632,26 @@ Input [2]: [ca_county#74, web_sales#79]
 Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#80]
 
 (113) BroadcastHashJoin [codegen id : 41]
-Left keys [1]: [ca_county#62]
+Left keys [1]: [ca_county#51]
 Right keys [1]: [ca_county#74]
 Join condition: None
 
 (114) Project [codegen id : 41]
-Output [4]: [web_sales#56, ca_county#62, web_sales#67, web_sales#79]
-Input [5]: [web_sales#56, ca_county#62, web_sales#67, ca_county#74, web_sales#79]
+Output [4]: [ca_county#51, web_sales#56, web_sales#67, web_sales#79]
+Input [5]: [ca_county#51, web_sales#56, web_sales#67, ca_county#74, web_sales#79]
 
 (115) BroadcastExchange
-Input [4]: [web_sales#56, ca_county#62, web_sales#67, web_sales#79]
-Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#81]
+Input [4]: [ca_county#51, web_sales#56, web_sales#67, web_sales#79]
+Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#81]
 
 (116) BroadcastHashJoin [codegen id : 42]
 Left keys [1]: [ca_county#36]
-Right keys [1]: [ca_county#62]
-Join condition: ((CASE WHEN (web_sales#67 > 0.00) THEN CheckOverflow((promote_precision(web_sales#56) / promote_precision(web_sales#67)), DecimalType(37,20), true) ELSE null END > CASE WHEN (store_sales#41 > 0.00) THEN CheckOverflow((promote_precision(store_sales#28) / promote_precision(store_sales#41)), DecimalType(37,20), true) ELSE null END) AND (CASE WHEN (web_sales#56 > 0.00) THEN CheckOverflow((promote_precision(web_sales#79) / promote_precision(web_sales#56)), DecimalType(37,20), true) ELSE null END > CASE WHEN (store_sales#28 > 0.00) THEN CheckOverflow((promote_precision(store_sales#16) / promote_precision(store_sales#28)), DecimalType(37,20), true) ELSE null END))
+Right keys [1]: [ca_county#51]
+Join condition: ((CASE WHEN (web_sales#56 > 0.00) THEN CheckOverflow((promote_precision(web_sales#67) / promote_precision(web_sales#56)), DecimalType(37,20), true) ELSE null END > CASE WHEN (store_sales#41 > 0.00) THEN CheckOverflow((promote_precision(store_sales#16) / promote_precision(store_sales#41)), DecimalType(37,20), true) ELSE null END) AND (CASE WHEN (web_sales#67 > 0.00) THEN CheckOverflow((promote_precision(web_sales#79) / promote_precision(web_sales#67)), DecimalType(37,20), true) ELSE null END > CASE WHEN (store_sales#16 > 0.00) THEN CheckOverflow((promote_precision(store_sales#28) / promote_precision(store_sales#16)), DecimalType(37,20), true) ELSE null END))
 
 (117) Project [codegen id : 42]
-Output [6]: [ca_county#36, d_year#31, CheckOverflow((promote_precision(web_sales#56) / promote_precision(web_sales#67)), DecimalType(37,20), true) AS web_q1_q2_increase#82, CheckOverflow((promote_precision(store_sales#28) / promote_precision(store_sales#41)), DecimalType(37,20), true) AS store_q1_q2_increase#83, CheckOverflow((promote_precision(web_sales#79) / promote_precision(web_sales#56)), DecimalType(37,20), true) AS web_q2_q3_increase#84, CheckOverflow((promote_precision(store_sales#16) / promote_precision(store_sales#28)), DecimalType(37,20), true) AS store_q2_q3_increase#85]
-Input [9]: [store_sales#16, store_sales#28, ca_county#36, d_year#31, store_sales#41, web_sales#56, ca_county#62, web_sales#67, web_sales#79]
+Output [6]: [ca_county#36, d_year#31, CheckOverflow((promote_precision(web_sales#67) / promote_precision(web_sales#56)), DecimalType(37,20), true) AS web_q1_q2_increase#82, CheckOverflow((promote_precision(store_sales#16) / promote_precision(store_sales#41)), DecimalType(37,20), true) AS store_q1_q2_increase#83, CheckOverflow((promote_precision(web_sales#79) / promote_precision(web_sales#67)), DecimalType(37,20), true) AS web_q2_q3_increase#84, CheckOverflow((promote_precision(store_sales#28) / promote_precision(store_sales#16)), DecimalType(37,20), true) AS store_q2_q3_increase#85]
+Input [9]: [store_sales#16, store_sales#28, ca_county#36, d_year#31, store_sales#41, ca_county#51, web_sales#56, web_sales#67, web_sales#79]
 
 (118) Exchange
 Input [6]: [ca_county#36, d_year#31, web_q1_q2_increase#82, store_q1_q2_increase#83, web_q2_q3_increase#84, store_q2_q3_increase#85]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/simplified.txt
index 9ec06b597cb64..c7b69500ed8a6 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/simplified.txt
@@ -7,7 +7,7 @@ WholeStageCodegen (43)
             BroadcastHashJoin [ca_county,ca_county,web_sales,web_sales,store_sales,store_sales,web_sales,store_sales]
               Project [store_sales,store_sales,ca_county,d_year,store_sales]
                 BroadcastHashJoin [ca_county,ca_county]
-                  Project [store_sales,ca_county,store_sales]
+                  Project [ca_county,store_sales,store_sales]
                     BroadcastHashJoin [ca_county,ca_county]
                       HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ss_ext_sales_price)),store_sales,sum]
                         InputAdapter
@@ -116,9 +116,9 @@ WholeStageCodegen (43)
               InputAdapter
                 BroadcastExchange #14
                   WholeStageCodegen (41)
-                    Project [web_sales,ca_county,web_sales,web_sales]
+                    Project [ca_county,web_sales,web_sales,web_sales]
                       BroadcastHashJoin [ca_county,ca_county]
-                        Project [web_sales,ca_county,web_sales]
+                        Project [ca_county,web_sales,web_sales]
                           BroadcastHashJoin [ca_county,ca_county]
                             HashAggregate [ca_county,d_qoy,d_year,sum] [sum(UnscaledValue(ws_ext_sales_price)),web_sales,sum]
                               InputAdapter
@@ -140,7 +140,7 @@ WholeStageCodegen (43)
                                                               InputAdapter
                                                                 Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price]
                                                           InputAdapter
-                                                            ReusedExchange [d_date_sk,d_year,d_qoy] #9
+                                                            ReusedExchange [d_date_sk,d_year,d_qoy] #13
                                           InputAdapter
                                             WholeStageCodegen (25)
                                               Sort [ca_address_sk]
@@ -169,7 +169,7 @@ WholeStageCodegen (43)
                                                                     InputAdapter
                                                                       Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price]
                                                                 InputAdapter
-                                                                  ReusedExchange [d_date_sk,d_year,d_qoy] #13
+                                                                  ReusedExchange [d_date_sk,d_year,d_qoy] #4
                                                 InputAdapter
                                                   WholeStageCodegen (31)
                                                     Sort [ca_address_sk]
@@ -198,7 +198,7 @@ WholeStageCodegen (43)
                                                                 InputAdapter
                                                                   Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_addr_sk,ws_ext_sales_price]
                                                             InputAdapter
-                                                              ReusedExchange [d_date_sk,d_year,d_qoy] #4
+                                                              ReusedExchange [d_date_sk,d_year,d_qoy] #9
                                             InputAdapter
                                               WholeStageCodegen (38)
                                                 Sort [ca_address_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/explain.txt
index 0232d56ab7481..54e117e6cac10 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/explain.txt
@@ -34,12 +34,12 @@ TakeOrderedAndProject (46)
                   :                 :  +- Exchange (22)
                   :                 :     +- * Filter (21)
                   :                 :        +- * ColumnarToRow (20)
-                  :                 :           +- Scan parquet default.customer_address (19)
+                  :                 :           +- Scan parquet default.customer (19)
                   :                 +- * Sort (28)
                   :                    +- Exchange (27)
                   :                       +- * Filter (26)
                   :                          +- * ColumnarToRow (25)
-                  :                             +- Scan parquet default.customer (24)
+                  :                             +- Scan parquet default.customer_address (24)
                   +- BroadcastExchange (39)
                      +- * Project (38)
                         +- * Filter (37)
@@ -127,75 +127,75 @@ Arguments: hashpartitioning(ws_bill_customer_sk#4, 5), true, [id=#13]
 Input [3]: [ws_bill_customer_sk#4, ws_sales_price#5, i_item_id#11]
 Arguments: [ws_bill_customer_sk#4 ASC NULLS FIRST], false, 0
 
-(19) Scan parquet default.customer_address
-Output [3]: [ca_address_sk#14, ca_city#15, ca_zip#16]
+(19) Scan parquet default.customer
+Output [2]: [c_customer_sk#14, c_current_addr_sk#15]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer_address]
-PushedFilters: [IsNotNull(ca_address_sk)]
-ReadSchema: struct<ca_address_sk:int,ca_city:string,ca_zip:string>
+Location [not included in comparison]/{warehouse_dir}/customer]
+PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)]
+ReadSchema: struct<c_customer_sk:int,c_current_addr_sk:int>
 
 (20) ColumnarToRow [codegen id : 5]
-Input [3]: [ca_address_sk#14, ca_city#15, ca_zip#16]
+Input [2]: [c_customer_sk#14, c_current_addr_sk#15]
 
 (21) Filter [codegen id : 5]
-Input [3]: [ca_address_sk#14, ca_city#15, ca_zip#16]
-Condition : isnotnull(ca_address_sk#14)
+Input [2]: [c_customer_sk#14, c_current_addr_sk#15]
+Condition : (isnotnull(c_customer_sk#14) AND isnotnull(c_current_addr_sk#15))
 
 (22) Exchange
-Input [3]: [ca_address_sk#14, ca_city#15, ca_zip#16]
-Arguments: hashpartitioning(ca_address_sk#14, 5), true, [id=#17]
+Input [2]: [c_customer_sk#14, c_current_addr_sk#15]
+Arguments: hashpartitioning(c_current_addr_sk#15, 5), true, [id=#16]
 
 (23) Sort [codegen id : 6]
-Input [3]: [ca_address_sk#14, ca_city#15, ca_zip#16]
-Arguments: [ca_address_sk#14 ASC NULLS FIRST], false, 0
+Input [2]: [c_customer_sk#14, c_current_addr_sk#15]
+Arguments: [c_current_addr_sk#15 ASC NULLS FIRST], false, 0
 
-(24) Scan parquet default.customer
-Output [2]: [c_customer_sk#18, c_current_addr_sk#19]
+(24) Scan parquet default.customer_address
+Output [3]: [ca_address_sk#17, ca_city#18, ca_zip#19]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer]
-PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)]
-ReadSchema: struct<c_customer_sk:int,c_current_addr_sk:int>
+Location [not included in comparison]/{warehouse_dir}/customer_address]
+PushedFilters: [IsNotNull(ca_address_sk)]
+ReadSchema: struct<ca_address_sk:int,ca_city:string,ca_zip:string>
 
 (25) ColumnarToRow [codegen id : 7]
-Input [2]: [c_customer_sk#18, c_current_addr_sk#19]
+Input [3]: [ca_address_sk#17, ca_city#18, ca_zip#19]
 
 (26) Filter [codegen id : 7]
-Input [2]: [c_customer_sk#18, c_current_addr_sk#19]
-Condition : (isnotnull(c_customer_sk#18) AND isnotnull(c_current_addr_sk#19))
+Input [3]: [ca_address_sk#17, ca_city#18, ca_zip#19]
+Condition : isnotnull(ca_address_sk#17)
 
 (27) Exchange
-Input [2]: [c_customer_sk#18, c_current_addr_sk#19]
-Arguments: hashpartitioning(c_current_addr_sk#19, 5), true, [id=#20]
+Input [3]: [ca_address_sk#17, ca_city#18, ca_zip#19]
+Arguments: hashpartitioning(ca_address_sk#17, 5), true, [id=#20]
 
 (28) Sort [codegen id : 8]
-Input [2]: [c_customer_sk#18, c_current_addr_sk#19]
-Arguments: [c_current_addr_sk#19 ASC NULLS FIRST], false, 0
+Input [3]: [ca_address_sk#17, ca_city#18, ca_zip#19]
+Arguments: [ca_address_sk#17 ASC NULLS FIRST], false, 0
 
 (29) SortMergeJoin [codegen id : 9]
-Left keys [1]: [ca_address_sk#14]
-Right keys [1]: [c_current_addr_sk#19]
+Left keys [1]: [c_current_addr_sk#15]
+Right keys [1]: [ca_address_sk#17]
 Join condition: None
 
 (30) Project [codegen id : 9]
-Output [3]: [ca_city#15, ca_zip#16, c_customer_sk#18]
-Input [5]: [ca_address_sk#14, ca_city#15, ca_zip#16, c_customer_sk#18, c_current_addr_sk#19]
+Output [3]: [c_customer_sk#14, ca_city#18, ca_zip#19]
+Input [5]: [c_customer_sk#14, c_current_addr_sk#15, ca_address_sk#17, ca_city#18, ca_zip#19]
 
 (31) Exchange
-Input [3]: [ca_city#15, ca_zip#16, c_customer_sk#18]
-Arguments: hashpartitioning(c_customer_sk#18, 5), true, [id=#21]
+Input [3]: [c_customer_sk#14, ca_city#18, ca_zip#19]
+Arguments: hashpartitioning(c_customer_sk#14, 5), true, [id=#21]
 
 (32) Sort [codegen id : 10]
-Input [3]: [ca_city#15, ca_zip#16, c_customer_sk#18]
-Arguments: [c_customer_sk#18 ASC NULLS FIRST], false, 0
+Input [3]: [c_customer_sk#14, ca_city#18, ca_zip#19]
+Arguments: [c_customer_sk#14 ASC NULLS FIRST], false, 0
 
 (33) SortMergeJoin [codegen id : 12]
 Left keys [1]: [ws_bill_customer_sk#4]
-Right keys [1]: [c_customer_sk#18]
+Right keys [1]: [c_customer_sk#14]
 Join condition: None
 
 (34) Project [codegen id : 12]
-Output [4]: [ws_sales_price#5, ca_city#15, ca_zip#16, i_item_id#11]
-Input [6]: [ws_bill_customer_sk#4, ws_sales_price#5, i_item_id#11, ca_city#15, ca_zip#16, c_customer_sk#18]
+Output [4]: [ws_sales_price#5, ca_city#18, ca_zip#19, i_item_id#11]
+Input [6]: [ws_bill_customer_sk#4, ws_sales_price#5, i_item_id#11, c_customer_sk#14, ca_city#18, ca_zip#19]
 
 (35) Scan parquet default.item
 Output [2]: [i_item_sk#10, i_item_id#11]
@@ -225,32 +225,32 @@ Right keys [1]: [i_item_id#11#22]
 Join condition: None
 
 (41) Filter [codegen id : 12]
-Input [5]: [ws_sales_price#5, ca_city#15, ca_zip#16, i_item_id#11, exists#1]
-Condition : (substr(ca_zip#16, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR exists#1)
+Input [5]: [ws_sales_price#5, ca_city#18, ca_zip#19, i_item_id#11, exists#1]
+Condition : (substr(ca_zip#19, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR exists#1)
 
 (42) Project [codegen id : 12]
-Output [3]: [ws_sales_price#5, ca_city#15, ca_zip#16]
-Input [5]: [ws_sales_price#5, ca_city#15, ca_zip#16, i_item_id#11, exists#1]
+Output [3]: [ws_sales_price#5, ca_city#18, ca_zip#19]
+Input [5]: [ws_sales_price#5, ca_city#18, ca_zip#19, i_item_id#11, exists#1]
 
 (43) HashAggregate [codegen id : 12]
-Input [3]: [ws_sales_price#5, ca_city#15, ca_zip#16]
-Keys [2]: [ca_zip#16, ca_city#15]
+Input [3]: [ws_sales_price#5, ca_city#18, ca_zip#19]
+Keys [2]: [ca_zip#19, ca_city#18]
 Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#5))]
 Aggregate Attributes [1]: [sum#24]
-Results [3]: [ca_zip#16, ca_city#15, sum#25]
+Results [3]: [ca_zip#19, ca_city#18, sum#25]
 
 (44) Exchange
-Input [3]: [ca_zip#16, ca_city#15, sum#25]
-Arguments: hashpartitioning(ca_zip#16, ca_city#15, 5), true, [id=#26]
+Input [3]: [ca_zip#19, ca_city#18, sum#25]
+Arguments: hashpartitioning(ca_zip#19, ca_city#18, 5), true, [id=#26]
 
 (45) HashAggregate [codegen id : 13]
-Input [3]: [ca_zip#16, ca_city#15, sum#25]
-Keys [2]: [ca_zip#16, ca_city#15]
+Input [3]: [ca_zip#19, ca_city#18, sum#25]
+Keys [2]: [ca_zip#19, ca_city#18]
 Functions [1]: [sum(UnscaledValue(ws_sales_price#5))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#5))#27]
-Results [3]: [ca_zip#16, ca_city#15, MakeDecimal(sum(UnscaledValue(ws_sales_price#5))#27,17,2) AS sum(ws_sales_price)#28]
+Results [3]: [ca_zip#19, ca_city#18, MakeDecimal(sum(UnscaledValue(ws_sales_price#5))#27,17,2) AS sum(ws_sales_price)#28]
 
 (46) TakeOrderedAndProject
-Input [3]: [ca_zip#16, ca_city#15, sum(ws_sales_price)#28]
-Arguments: 100, [ca_zip#16 ASC NULLS FIRST, ca_city#15 ASC NULLS FIRST], [ca_zip#16, ca_city#15, sum(ws_sales_price)#28]
+Input [3]: [ca_zip#19, ca_city#18, sum(ws_sales_price)#28]
+Arguments: 100, [ca_zip#19 ASC NULLS FIRST, ca_city#18 ASC NULLS FIRST], [ca_zip#19, ca_city#18, sum(ws_sales_price)#28]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/simplified.txt
index 1eab468e67bc0..0e9662bb6aca5 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/simplified.txt
@@ -45,28 +45,28 @@ TakeOrderedAndProject [ca_zip,ca_city,sum(ws_sales_price)]
                               InputAdapter
                                 Exchange [c_customer_sk] #5
                                   WholeStageCodegen (9)
-                                    Project [ca_city,ca_zip,c_customer_sk]
-                                      SortMergeJoin [ca_address_sk,c_current_addr_sk]
+                                    Project [c_customer_sk,ca_city,ca_zip]
+                                      SortMergeJoin [c_current_addr_sk,ca_address_sk]
                                         InputAdapter
                                           WholeStageCodegen (6)
-                                            Sort [ca_address_sk]
+                                            Sort [c_current_addr_sk]
                                               InputAdapter
-                                                Exchange [ca_address_sk] #6
+                                                Exchange [c_current_addr_sk] #6
                                                   WholeStageCodegen (5)
-                                                    Filter [ca_address_sk]
+                                                    Filter [c_customer_sk,c_current_addr_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.customer_address [ca_address_sk,ca_city,ca_zip]
+                                                          Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
                                         InputAdapter
                                           WholeStageCodegen (8)
-                                            Sort [c_current_addr_sk]
+                                            Sort [ca_address_sk]
                                               InputAdapter
-                                                Exchange [c_current_addr_sk] #7
+                                                Exchange [ca_address_sk] #7
                                                   WholeStageCodegen (7)
-                                                    Filter [c_customer_sk,c_current_addr_sk]
+                                                    Filter [ca_address_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
+                                                          Scan parquet default.customer_address [ca_address_sk,ca_city,ca_zip]
                     InputAdapter
                       BroadcastExchange #8
                         WholeStageCodegen (11)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/explain.txt
index df1197d7c925e..741ee50f800ec 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/explain.txt
@@ -25,15 +25,15 @@ TakeOrderedAndProject (35)
                +- * Sort (29)
                   +- Exchange (28)
                      +- * Project (27)
-                        +- * BroadcastHashJoin Inner BuildLeft (26)
-                           :- BroadcastExchange (22)
-                           :  +- * Project (21)
-                           :     +- * Filter (20)
-                           :        +- * ColumnarToRow (19)
-                           :           +- Scan parquet default.date_dim (18)
-                           +- * Filter (25)
-                              +- * ColumnarToRow (24)
-                                 +- Scan parquet default.store_returns (23)
+                        +- * BroadcastHashJoin Inner BuildRight (26)
+                           :- * Filter (20)
+                           :  +- * ColumnarToRow (19)
+                           :     +- Scan parquet default.store_returns (18)
+                           +- BroadcastExchange (25)
+                              +- * Project (24)
+                                 +- * Filter (23)
+                                    +- * ColumnarToRow (22)
+                                       +- Scan parquet default.date_dim (21)
 
 
 (1) Scan parquet default.store_sales
@@ -112,72 +112,72 @@ Arguments: hashpartitioning(cast(ss_ticket_number#5 as bigint), cast(ss_item_sk#
 Input [14]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18]
 Arguments: [cast(ss_ticket_number#5 as bigint) ASC NULLS FIRST, cast(ss_item_sk#2 as bigint) ASC NULLS FIRST, cast(ss_customer_sk#3 as bigint) ASC NULLS FIRST], false, 0
 
-(18) Scan parquet default.date_dim
-Output [3]: [d_date_sk#21, d_year#22, d_moy#23]
+(18) Scan parquet default.store_returns
+Output [4]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,8), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
+Location [not included in comparison]/{warehouse_dir}/store_returns]
+PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk), IsNotNull(sr_customer_sk), IsNotNull(sr_returned_date_sk)]
+ReadSchema: struct<sr_returned_date_sk:bigint,sr_item_sk:bigint,sr_customer_sk:bigint,sr_ticket_number:bigint>
 
-(19) ColumnarToRow [codegen id : 5]
-Input [3]: [d_date_sk#21, d_year#22, d_moy#23]
+(19) ColumnarToRow [codegen id : 6]
+Input [4]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24]
 
-(20) Filter [codegen id : 5]
-Input [3]: [d_date_sk#21, d_year#22, d_moy#23]
-Condition : ((((isnotnull(d_year#22) AND isnotnull(d_moy#23)) AND (d_year#22 = 2001)) AND (d_moy#23 = 8)) AND isnotnull(d_date_sk#21))
+(20) Filter [codegen id : 6]
+Input [4]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24]
+Condition : (((isnotnull(sr_ticket_number#24) AND isnotnull(sr_item_sk#22)) AND isnotnull(sr_customer_sk#23)) AND isnotnull(sr_returned_date_sk#21))
 
-(21) Project [codegen id : 5]
-Output [1]: [d_date_sk#21]
-Input [3]: [d_date_sk#21, d_year#22, d_moy#23]
+(21) Scan parquet default.date_dim
+Output [3]: [d_date_sk#25, d_year#26, d_moy#27]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,8), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
-(22) BroadcastExchange
-Input [1]: [d_date_sk#21]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#24]
+(22) ColumnarToRow [codegen id : 5]
+Input [3]: [d_date_sk#25, d_year#26, d_moy#27]
 
-(23) Scan parquet default.store_returns
-Output [4]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/store_returns]
-PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk), IsNotNull(sr_customer_sk), IsNotNull(sr_returned_date_sk)]
-ReadSchema: struct<sr_returned_date_sk:bigint,sr_item_sk:bigint,sr_customer_sk:bigint,sr_ticket_number:bigint>
+(23) Filter [codegen id : 5]
+Input [3]: [d_date_sk#25, d_year#26, d_moy#27]
+Condition : ((((isnotnull(d_year#26) AND isnotnull(d_moy#27)) AND (d_year#26 = 2001)) AND (d_moy#27 = 8)) AND isnotnull(d_date_sk#25))
 
-(24) ColumnarToRow
-Input [4]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28]
+(24) Project [codegen id : 5]
+Output [1]: [d_date_sk#25]
+Input [3]: [d_date_sk#25, d_year#26, d_moy#27]
 
-(25) Filter
-Input [4]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28]
-Condition : (((isnotnull(sr_ticket_number#28) AND isnotnull(sr_item_sk#26)) AND isnotnull(sr_customer_sk#27)) AND isnotnull(sr_returned_date_sk#25))
+(25) BroadcastExchange
+Input [1]: [d_date_sk#25]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#28]
 
 (26) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [cast(d_date_sk#21 as bigint)]
-Right keys [1]: [sr_returned_date_sk#25]
+Left keys [1]: [sr_returned_date_sk#21]
+Right keys [1]: [cast(d_date_sk#25 as bigint)]
 Join condition: None
 
 (27) Project [codegen id : 6]
-Output [4]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28]
-Input [5]: [d_date_sk#21, sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28]
+Output [4]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24]
+Input [5]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, d_date_sk#25]
 
 (28) Exchange
-Input [4]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28]
-Arguments: hashpartitioning(sr_ticket_number#28, sr_item_sk#26, sr_customer_sk#27, 5), true, [id=#29]
+Input [4]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24]
+Arguments: hashpartitioning(sr_ticket_number#24, sr_item_sk#22, sr_customer_sk#23, 5), true, [id=#29]
 
 (29) Sort [codegen id : 7]
-Input [4]: [sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28]
-Arguments: [sr_ticket_number#28 ASC NULLS FIRST, sr_item_sk#26 ASC NULLS FIRST, sr_customer_sk#27 ASC NULLS FIRST], false, 0
+Input [4]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24]
+Arguments: [sr_ticket_number#24 ASC NULLS FIRST, sr_item_sk#22 ASC NULLS FIRST, sr_customer_sk#23 ASC NULLS FIRST], false, 0
 
 (30) SortMergeJoin [codegen id : 8]
 Left keys [3]: [cast(ss_ticket_number#5 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_customer_sk#3 as bigint)]
-Right keys [3]: [sr_ticket_number#28, sr_item_sk#26, sr_customer_sk#27]
+Right keys [3]: [sr_ticket_number#24, sr_item_sk#22, sr_customer_sk#23]
 Join condition: None
 
 (31) Project [codegen id : 8]
-Output [12]: [ss_sold_date_sk#1, sr_returned_date_sk#25, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18]
-Input [18]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, sr_returned_date_sk#25, sr_item_sk#26, sr_customer_sk#27, sr_ticket_number#28]
+Output [12]: [ss_sold_date_sk#1, sr_returned_date_sk#21, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18]
+Input [18]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24]
 
 (32) HashAggregate [codegen id : 8]
-Input [12]: [ss_sold_date_sk#1, sr_returned_date_sk#25, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18]
+Input [12]: [ss_sold_date_sk#1, sr_returned_date_sk#21, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18]
 Keys [10]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18]
-Functions [5]: [partial_sum(cast(CASE WHEN ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))]
+Functions [5]: [partial_sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))]
 Aggregate Attributes [5]: [sum#30, sum#31, sum#32, sum#33, sum#34]
 Results [15]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, sum#35, sum#36, sum#37, sum#38, sum#39]
 
@@ -188,9 +188,9 @@ Arguments: hashpartitioning(s_store_name#9, s_company_id#10, s_street_number#11,
 (34) HashAggregate [codegen id : 9]
 Input [15]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, sum#35, sum#36, sum#37, sum#38, sum#39]
 Keys [10]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18]
-Functions [5]: [sum(cast(CASE WHEN ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))]
-Aggregate Attributes [5]: [sum(cast(CASE WHEN ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint))#41, sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint))#42, sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint))#43, sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint))#44, sum(cast(CASE WHEN ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))#45]
-Results [15]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, sum(cast(CASE WHEN ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint))#41 AS 30 days #46, sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint))#42 AS 31 - 60 days #47, sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint))#43 AS 61 - 90 days #48, sum(cast(CASE WHEN (((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint))#44 AS 91 - 120 days #49, sum(cast(CASE WHEN ((sr_returned_date_sk#25 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))#45 AS >120 days #50]
+Functions [5]: [sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))]
+Aggregate Attributes [5]: [sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint))#41, sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint))#42, sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint))#43, sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint))#44, sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))#45]
+Results [15]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint))#41 AS 30 days #46, sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint))#42 AS 31 - 60 days #47, sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint))#43 AS 61 - 90 days #48, sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint))#44 AS 91 - 120 days #49, sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))#45 AS >120 days #50]
 
 (35) TakeOrderedAndProject
 Input [15]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, 30 days #46, 31 - 60 days #47, 61 - 90 days #48, 91 - 120 days #49, >120 days #50]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/simplified.txt
index 5d6f38e882a5c..be11a69176810 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/simplified.txt
@@ -42,7 +42,11 @@ TakeOrderedAndProject [s_store_name,s_company_id,s_street_number,s_street_name,s
                           Exchange [sr_ticket_number,sr_item_sk,sr_customer_sk] #5
                             WholeStageCodegen (6)
                               Project [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number]
-                                BroadcastHashJoin [d_date_sk,sr_returned_date_sk]
+                                BroadcastHashJoin [sr_returned_date_sk,d_date_sk]
+                                  Filter [sr_ticket_number,sr_item_sk,sr_customer_sk,sr_returned_date_sk]
+                                    ColumnarToRow
+                                      InputAdapter
+                                        Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number]
                                   InputAdapter
                                     BroadcastExchange #6
                                       WholeStageCodegen (5)
@@ -51,7 +55,3 @@ TakeOrderedAndProject [s_store_name,s_company_id,s_street_number,s_street_name,s
                                             ColumnarToRow
                                               InputAdapter
                                                 Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
-                                  Filter [sr_ticket_number,sr_item_sk,sr_customer_sk,sr_returned_date_sk]
-                                    ColumnarToRow
-                                      InputAdapter
-                                        Scan parquet default.store_returns [sr_returned_date_sk,sr_item_sk,sr_customer_sk,sr_ticket_number]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/explain.txt
index 511e1b46cd7a7..675cff99ad729 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/explain.txt
@@ -11,30 +11,30 @@ TakeOrderedAndProject (50)
                      :  +- Exchange (27)
                      :     +- * Project (26)
                      :        +- * BroadcastHashJoin Inner BuildRight (25)
-                     :           :- * Project (10)
-                     :           :  +- * BroadcastHashJoin Inner BuildRight (9)
+                     :           :- * Project (19)
+                     :           :  +- * BroadcastHashJoin Inner BuildRight (18)
                      :           :     :- * Filter (3)
                      :           :     :  +- * ColumnarToRow (2)
                      :           :     :     +- Scan parquet default.store_sales (1)
-                     :           :     +- BroadcastExchange (8)
-                     :           :        +- * Project (7)
-                     :           :           +- * Filter (6)
-                     :           :              +- * ColumnarToRow (5)
-                     :           :                 +- Scan parquet default.date_dim (4)
+                     :           :     +- BroadcastExchange (17)
+                     :           :        +- * Project (16)
+                     :           :           +- * Filter (15)
+                     :           :              +- * BroadcastHashJoin LeftOuter BuildRight (14)
+                     :           :                 :- * Filter (6)
+                     :           :                 :  +- * ColumnarToRow (5)
+                     :           :                 :     +- Scan parquet default.item (4)
+                     :           :                 +- BroadcastExchange (13)
+                     :           :                    +- * HashAggregate (12)
+                     :           :                       +- Exchange (11)
+                     :           :                          +- * HashAggregate (10)
+                     :           :                             +- * Filter (9)
+                     :           :                                +- * ColumnarToRow (8)
+                     :           :                                   +- Scan parquet default.item (7)
                      :           +- BroadcastExchange (24)
                      :              +- * Project (23)
                      :                 +- * Filter (22)
-                     :                    +- * BroadcastHashJoin LeftOuter BuildRight (21)
-                     :                       :- * Filter (13)
-                     :                       :  +- * ColumnarToRow (12)
-                     :                       :     +- Scan parquet default.item (11)
-                     :                       +- BroadcastExchange (20)
-                     :                          +- * HashAggregate (19)
-                     :                             +- Exchange (18)
-                     :                                +- * HashAggregate (17)
-                     :                                   +- * Filter (16)
-                     :                                      +- * ColumnarToRow (15)
-                     :                                         +- Scan parquet default.item (14)
+                     :                    +- * ColumnarToRow (21)
+                     :                       +- Scan parquet default.date_dim (20)
                      +- * Sort (42)
                         +- Exchange (41)
                            +- * Project (40)
@@ -65,112 +65,112 @@ Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3]
 Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3]
 Condition : ((isnotnull(ss_customer_sk#3) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_item_sk#2))
 
-(4) Scan parquet default.date_dim
-Output [2]: [d_date_sk#4, d_month_seq#5]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_month_seq:int>
-
-(5) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#4, d_month_seq#5]
-
-(6) Filter [codegen id : 1]
-Input [2]: [d_date_sk#4, d_month_seq#5]
-Condition : ((isnotnull(d_month_seq#5) AND (d_month_seq#5 = Subquery scalar-subquery#6, [id=#7])) AND isnotnull(d_date_sk#4))
-
-(7) Project [codegen id : 1]
-Output [1]: [d_date_sk#4]
-Input [2]: [d_date_sk#4, d_month_seq#5]
-
-(8) BroadcastExchange
-Input [1]: [d_date_sk#4]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8]
-
-(9) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [ss_sold_date_sk#1]
-Right keys [1]: [d_date_sk#4]
-Join condition: None
-
-(10) Project [codegen id : 5]
-Output [2]: [ss_item_sk#2, ss_customer_sk#3]
-Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, d_date_sk#4]
-
-(11) Scan parquet default.item
-Output [3]: [i_item_sk#9, i_current_price#10, i_category#11]
+(4) Scan parquet default.item
+Output [3]: [i_item_sk#4, i_current_price#5, i_category#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_item_sk)]
 ReadSchema: struct<i_item_sk:int,i_current_price:decimal(7,2),i_category:string>
 
-(12) ColumnarToRow [codegen id : 4]
-Input [3]: [i_item_sk#9, i_current_price#10, i_category#11]
+(5) ColumnarToRow [codegen id : 3]
+Input [3]: [i_item_sk#4, i_current_price#5, i_category#6]
 
-(13) Filter [codegen id : 4]
-Input [3]: [i_item_sk#9, i_current_price#10, i_category#11]
-Condition : (isnotnull(i_current_price#10) AND isnotnull(i_item_sk#9))
+(6) Filter [codegen id : 3]
+Input [3]: [i_item_sk#4, i_current_price#5, i_category#6]
+Condition : (isnotnull(i_current_price#5) AND isnotnull(i_item_sk#4))
 
-(14) Scan parquet default.item
-Output [2]: [i_current_price#10, i_category#11]
+(7) Scan parquet default.item
+Output [2]: [i_current_price#5, i_category#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_category)]
 ReadSchema: struct<i_current_price:decimal(7,2),i_category:string>
 
-(15) ColumnarToRow [codegen id : 2]
-Input [2]: [i_current_price#10, i_category#11]
-
-(16) Filter [codegen id : 2]
-Input [2]: [i_current_price#10, i_category#11]
-Condition : isnotnull(i_category#11)
-
-(17) HashAggregate [codegen id : 2]
-Input [2]: [i_current_price#10, i_category#11]
-Keys [1]: [i_category#11]
-Functions [1]: [partial_avg(UnscaledValue(i_current_price#10))]
-Aggregate Attributes [2]: [sum#12, count#13]
-Results [3]: [i_category#11, sum#14, count#15]
-
-(18) Exchange
-Input [3]: [i_category#11, sum#14, count#15]
-Arguments: hashpartitioning(i_category#11, 5), true, [id=#16]
-
-(19) HashAggregate [codegen id : 3]
-Input [3]: [i_category#11, sum#14, count#15]
-Keys [1]: [i_category#11]
-Functions [1]: [avg(UnscaledValue(i_current_price#10))]
-Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#10))#17]
-Results [2]: [cast((avg(UnscaledValue(i_current_price#10))#17 / 100.0) as decimal(11,6)) AS avg(i_current_price)#18, i_category#11 AS i_category#11#19]
-
-(20) BroadcastExchange
-Input [2]: [avg(i_current_price)#18, i_category#11#19]
-Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#20]
-
-(21) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [i_category#11]
-Right keys [1]: [i_category#11#19]
+(8) ColumnarToRow [codegen id : 1]
+Input [2]: [i_current_price#5, i_category#6]
+
+(9) Filter [codegen id : 1]
+Input [2]: [i_current_price#5, i_category#6]
+Condition : isnotnull(i_category#6)
+
+(10) HashAggregate [codegen id : 1]
+Input [2]: [i_current_price#5, i_category#6]
+Keys [1]: [i_category#6]
+Functions [1]: [partial_avg(UnscaledValue(i_current_price#5))]
+Aggregate Attributes [2]: [sum#7, count#8]
+Results [3]: [i_category#6, sum#9, count#10]
+
+(11) Exchange
+Input [3]: [i_category#6, sum#9, count#10]
+Arguments: hashpartitioning(i_category#6, 5), true, [id=#11]
+
+(12) HashAggregate [codegen id : 2]
+Input [3]: [i_category#6, sum#9, count#10]
+Keys [1]: [i_category#6]
+Functions [1]: [avg(UnscaledValue(i_current_price#5))]
+Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#5))#12]
+Results [2]: [cast((avg(UnscaledValue(i_current_price#5))#12 / 100.0) as decimal(11,6)) AS avg(i_current_price)#13, i_category#6 AS i_category#6#14]
+
+(13) BroadcastExchange
+Input [2]: [avg(i_current_price)#13, i_category#6#14]
+Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#15]
+
+(14) BroadcastHashJoin [codegen id : 3]
+Left keys [1]: [i_category#6]
+Right keys [1]: [i_category#6#14]
+Join condition: None
+
+(15) Filter [codegen id : 3]
+Input [5]: [i_item_sk#4, i_current_price#5, i_category#6, avg(i_current_price)#13, i_category#6#14]
+Condition : (cast(i_current_price#5 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#13)), DecimalType(14,7), true))
+
+(16) Project [codegen id : 3]
+Output [1]: [i_item_sk#4]
+Input [5]: [i_item_sk#4, i_current_price#5, i_category#6, avg(i_current_price)#13, i_category#6#14]
+
+(17) BroadcastExchange
+Input [1]: [i_item_sk#4]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16]
+
+(18) BroadcastHashJoin [codegen id : 5]
+Left keys [1]: [ss_item_sk#2]
+Right keys [1]: [i_item_sk#4]
 Join condition: None
 
+(19) Project [codegen id : 5]
+Output [2]: [ss_sold_date_sk#1, ss_customer_sk#3]
+Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, i_item_sk#4]
+
+(20) Scan parquet default.date_dim
+Output [2]: [d_date_sk#17, d_month_seq#18]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_month_seq:int>
+
+(21) ColumnarToRow [codegen id : 4]
+Input [2]: [d_date_sk#17, d_month_seq#18]
+
 (22) Filter [codegen id : 4]
-Input [5]: [i_item_sk#9, i_current_price#10, i_category#11, avg(i_current_price)#18, i_category#11#19]
-Condition : (cast(i_current_price#10 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#18)), DecimalType(14,7), true))
+Input [2]: [d_date_sk#17, d_month_seq#18]
+Condition : ((isnotnull(d_month_seq#18) AND (d_month_seq#18 = Subquery scalar-subquery#19, [id=#20])) AND isnotnull(d_date_sk#17))
 
 (23) Project [codegen id : 4]
-Output [1]: [i_item_sk#9]
-Input [5]: [i_item_sk#9, i_current_price#10, i_category#11, avg(i_current_price)#18, i_category#11#19]
+Output [1]: [d_date_sk#17]
+Input [2]: [d_date_sk#17, d_month_seq#18]
 
 (24) BroadcastExchange
-Input [1]: [i_item_sk#9]
+Input [1]: [d_date_sk#17]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21]
 
 (25) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [ss_item_sk#2]
-Right keys [1]: [i_item_sk#9]
+Left keys [1]: [ss_sold_date_sk#1]
+Right keys [1]: [d_date_sk#17]
 Join condition: None
 
 (26) Project [codegen id : 5]
 Output [1]: [ss_customer_sk#3]
-Input [3]: [ss_item_sk#2, ss_customer_sk#3, i_item_sk#9]
+Input [3]: [ss_sold_date_sk#1, ss_customer_sk#3, d_date_sk#17]
 
 (27) Exchange
 Input [1]: [ss_customer_sk#3]
@@ -282,7 +282,7 @@ Arguments: 100, [cnt#35 ASC NULLS FIRST], [state#34, cnt#35]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 6 Hosting Expression = Subquery scalar-subquery#6, [id=#7]
+Subquery:1 Hosting operator id = 22 Hosting Expression = Subquery scalar-subquery#19, [id=#20]
 * HashAggregate (57)
 +- Exchange (56)
    +- * HashAggregate (55)
@@ -293,39 +293,39 @@ Subquery:1 Hosting operator id = 6 Hosting Expression = Subquery scalar-subquery
 
 
 (51) Scan parquet default.date_dim
-Output [3]: [d_month_seq#5, d_year#37, d_moy#38]
+Output [3]: [d_month_seq#18, d_year#37, d_moy#38]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)]
 ReadSchema: struct<d_month_seq:int,d_year:int,d_moy:int>
 
 (52) ColumnarToRow [codegen id : 1]
-Input [3]: [d_month_seq#5, d_year#37, d_moy#38]
+Input [3]: [d_month_seq#18, d_year#37, d_moy#38]
 
 (53) Filter [codegen id : 1]
-Input [3]: [d_month_seq#5, d_year#37, d_moy#38]
+Input [3]: [d_month_seq#18, d_year#37, d_moy#38]
 Condition : (((isnotnull(d_year#37) AND isnotnull(d_moy#38)) AND (d_year#37 = 2000)) AND (d_moy#38 = 1))
 
 (54) Project [codegen id : 1]
-Output [1]: [d_month_seq#5]
-Input [3]: [d_month_seq#5, d_year#37, d_moy#38]
+Output [1]: [d_month_seq#18]
+Input [3]: [d_month_seq#18, d_year#37, d_moy#38]
 
 (55) HashAggregate [codegen id : 1]
-Input [1]: [d_month_seq#5]
-Keys [1]: [d_month_seq#5]
+Input [1]: [d_month_seq#18]
+Keys [1]: [d_month_seq#18]
 Functions: []
 Aggregate Attributes: []
-Results [1]: [d_month_seq#5]
+Results [1]: [d_month_seq#18]
 
 (56) Exchange
-Input [1]: [d_month_seq#5]
-Arguments: hashpartitioning(d_month_seq#5, 5), true, [id=#39]
+Input [1]: [d_month_seq#18]
+Arguments: hashpartitioning(d_month_seq#18, 5), true, [id=#39]
 
 (57) HashAggregate [codegen id : 2]
-Input [1]: [d_month_seq#5]
-Keys [1]: [d_month_seq#5]
+Input [1]: [d_month_seq#18]
+Keys [1]: [d_month_seq#18]
 Functions: []
 Aggregate Attributes: []
-Results [1]: [d_month_seq#5]
+Results [1]: [d_month_seq#18]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/simplified.txt
index dcebba331afb3..73d42163240f0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/simplified.txt
@@ -16,55 +16,55 @@ TakeOrderedAndProject [cnt,state]
                               Exchange [ss_customer_sk] #2
                                 WholeStageCodegen (5)
                                   Project [ss_customer_sk]
-                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                      Project [ss_item_sk,ss_customer_sk]
-                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                      Project [ss_sold_date_sk,ss_customer_sk]
+                                        BroadcastHashJoin [ss_item_sk,i_item_sk]
                                           Filter [ss_customer_sk,ss_sold_date_sk,ss_item_sk]
                                             ColumnarToRow
                                               InputAdapter
                                                 Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk]
                                           InputAdapter
                                             BroadcastExchange #3
-                                              WholeStageCodegen (1)
-                                                Project [d_date_sk]
-                                                  Filter [d_month_seq,d_date_sk]
-                                                    Subquery #1
-                                                      WholeStageCodegen (2)
-                                                        HashAggregate [d_month_seq]
+                                              WholeStageCodegen (3)
+                                                Project [i_item_sk]
+                                                  Filter [i_current_price,avg(i_current_price)]
+                                                    BroadcastHashJoin [i_category,i_category]
+                                                      Filter [i_current_price,i_item_sk]
+                                                        ColumnarToRow
                                                           InputAdapter
-                                                            Exchange [d_month_seq] #4
-                                                              WholeStageCodegen (1)
-                                                                HashAggregate [d_month_seq]
-                                                                  Project [d_month_seq]
-                                                                    Filter [d_year,d_moy]
-                                                                      ColumnarToRow
-                                                                        InputAdapter
-                                                                          Scan parquet default.date_dim [d_month_seq,d_year,d_moy]
-                                                    ColumnarToRow
+                                                            Scan parquet default.item [i_item_sk,i_current_price,i_category]
                                                       InputAdapter
-                                                        Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                        BroadcastExchange #4
+                                                          WholeStageCodegen (2)
+                                                            HashAggregate [i_category,sum,count] [avg(UnscaledValue(i_current_price)),avg(i_current_price),i_category,sum,count]
+                                                              InputAdapter
+                                                                Exchange [i_category] #5
+                                                                  WholeStageCodegen (1)
+                                                                    HashAggregate [i_category,i_current_price] [sum,count,sum,count]
+                                                                      Filter [i_category]
+                                                                        ColumnarToRow
+                                                                          InputAdapter
+                                                                            Scan parquet default.item [i_current_price,i_category]
                                       InputAdapter
-                                        BroadcastExchange #5
+                                        BroadcastExchange #6
                                           WholeStageCodegen (4)
-                                            Project [i_item_sk]
-                                              Filter [i_current_price,avg(i_current_price)]
-                                                BroadcastHashJoin [i_category,i_category]
-                                                  Filter [i_current_price,i_item_sk]
-                                                    ColumnarToRow
+                                            Project [d_date_sk]
+                                              Filter [d_month_seq,d_date_sk]
+                                                Subquery #1
+                                                  WholeStageCodegen (2)
+                                                    HashAggregate [d_month_seq]
                                                       InputAdapter
-                                                        Scan parquet default.item [i_item_sk,i_current_price,i_category]
+                                                        Exchange [d_month_seq] #7
+                                                          WholeStageCodegen (1)
+                                                            HashAggregate [d_month_seq]
+                                                              Project [d_month_seq]
+                                                                Filter [d_year,d_moy]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet default.date_dim [d_month_seq,d_year,d_moy]
+                                                ColumnarToRow
                                                   InputAdapter
-                                                    BroadcastExchange #6
-                                                      WholeStageCodegen (3)
-                                                        HashAggregate [i_category,sum,count] [avg(UnscaledValue(i_current_price)),avg(i_current_price),i_category,sum,count]
-                                                          InputAdapter
-                                                            Exchange [i_category] #7
-                                                              WholeStageCodegen (2)
-                                                                HashAggregate [i_category,i_current_price] [sum,count,sum,count]
-                                                                  Filter [i_category]
-                                                                    ColumnarToRow
-                                                                      InputAdapter
-                                                                        Scan parquet default.item [i_current_price,i_category]
+                                                    Scan parquet default.date_dim [d_date_sk,d_month_seq]
                       InputAdapter
                         WholeStageCodegen (12)
                           Sort [c_customer_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt
index e616934bbd073..58a60763b2b57 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt
@@ -1,7 +1,7 @@
 == Physical Plan ==
-TakeOrderedAndProject (75)
-+- * Project (74)
-   +- BroadcastNestedLoopJoin Inner BuildRight (73)
+TakeOrderedAndProject (69)
++- * Project (68)
+   +- BroadcastNestedLoopJoin Inner BuildRight (67)
       :- * HashAggregate (47)
       :  +- Exchange (46)
       :     +- * HashAggregate (45)
@@ -49,31 +49,25 @@ TakeOrderedAndProject (75)
       :                             +- * Filter (37)
       :                                +- * ColumnarToRow (36)
       :                                   +- Scan parquet default.customer_address (35)
-      +- BroadcastExchange (72)
-         +- * HashAggregate (71)
-            +- Exchange (70)
-               +- * HashAggregate (69)
-                  +- * Project (68)
-                     +- * BroadcastHashJoin Inner BuildRight (67)
+      +- BroadcastExchange (66)
+         +- * HashAggregate (65)
+            +- Exchange (64)
+               +- * HashAggregate (63)
+                  +- * Project (62)
+                     +- * BroadcastHashJoin Inner BuildRight (61)
                         :- * Project (59)
                         :  +- * BroadcastHashJoin Inner BuildRight (58)
                         :     :- * Project (56)
                         :     :  +- * BroadcastHashJoin Inner BuildRight (55)
                         :     :     :- * Project (53)
-                        :     :     :  +- * BroadcastHashJoin Inner BuildLeft (52)
-                        :     :     :     :- ReusedExchange (48)
-                        :     :     :     +- * Filter (51)
-                        :     :     :        +- * ColumnarToRow (50)
-                        :     :     :           +- Scan parquet default.store_sales (49)
+                        :     :     :  +- * BroadcastHashJoin Inner BuildRight (52)
+                        :     :     :     :- * Filter (50)
+                        :     :     :     :  +- * ColumnarToRow (49)
+                        :     :     :     :     +- Scan parquet default.store_sales (48)
+                        :     :     :     +- ReusedExchange (51)
                         :     :     +- ReusedExchange (54)
                         :     +- ReusedExchange (57)
-                        +- BroadcastExchange (66)
-                           +- * Project (65)
-                              +- * BroadcastHashJoin Inner BuildLeft (64)
-                                 :- ReusedExchange (60)
-                                 +- * Filter (63)
-                                    +- * ColumnarToRow (62)
-                                       +- Scan parquet default.customer (61)
+                        +- ReusedExchange (60)
 
 
 (1) Scan parquet default.store_sales
@@ -290,31 +284,31 @@ Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#31]
 Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#31,17,2) AS promotions#32]
 
-(48) ReusedExchange [Reuses operator id: 8]
-Output [1]: [d_date_sk#7]
-
-(49) Scan parquet default.store_sales
+(48) Scan parquet default.store_sales
 Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
 PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_sold_date_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)]
 ReadSchema: struct<ss_sold_date_sk:int,ss_item_sk:int,ss_customer_sk:int,ss_store_sk:int,ss_ext_sales_price:decimal(7,2)>
 
-(50) ColumnarToRow
+(49) ColumnarToRow [codegen id : 14]
 Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6]
 
-(51) Filter
+(50) Filter [codegen id : 14]
 Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6]
 Condition : (((isnotnull(ss_store_sk#4) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_item_sk#2))
 
+(51) ReusedExchange [Reuses operator id: 8]
+Output [1]: [d_date_sk#7]
+
 (52) BroadcastHashJoin [codegen id : 14]
-Left keys [1]: [d_date_sk#7]
-Right keys [1]: [ss_sold_date_sk#1]
+Left keys [1]: [ss_sold_date_sk#1]
+Right keys [1]: [d_date_sk#7]
 Join condition: None
 
 (53) Project [codegen id : 14]
 Output [4]: [ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6]
-Input [6]: [d_date_sk#7, ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6]
+Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6, d_date_sk#7]
 
 (54) ReusedExchange [Reuses operator id: 15]
 Output [1]: [i_item_sk#11]
@@ -340,75 +334,48 @@ Join condition: None
 Output [2]: [ss_customer_sk#3, ss_ext_sales_price#6]
 Input [4]: [ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#6, s_store_sk#19]
 
-(60) ReusedExchange [Reuses operator id: 39]
-Output [1]: [ca_address_sk#24]
-
-(61) Scan parquet default.customer
-Output [2]: [c_customer_sk#22, c_current_addr_sk#23]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer]
-PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)]
-ReadSchema: struct<c_customer_sk:int,c_current_addr_sk:int>
-
-(62) ColumnarToRow
-Input [2]: [c_customer_sk#22, c_current_addr_sk#23]
-
-(63) Filter
-Input [2]: [c_customer_sk#22, c_current_addr_sk#23]
-Condition : (isnotnull(c_customer_sk#22) AND isnotnull(c_current_addr_sk#23))
-
-(64) BroadcastHashJoin [codegen id : 13]
-Left keys [1]: [ca_address_sk#24]
-Right keys [1]: [c_current_addr_sk#23]
-Join condition: None
-
-(65) Project [codegen id : 13]
+(60) ReusedExchange [Reuses operator id: 42]
 Output [1]: [c_customer_sk#22]
-Input [3]: [ca_address_sk#24, c_customer_sk#22, c_current_addr_sk#23]
-
-(66) BroadcastExchange
-Input [1]: [c_customer_sk#22]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#33]
 
-(67) BroadcastHashJoin [codegen id : 14]
+(61) BroadcastHashJoin [codegen id : 14]
 Left keys [1]: [ss_customer_sk#3]
 Right keys [1]: [c_customer_sk#22]
 Join condition: None
 
-(68) Project [codegen id : 14]
+(62) Project [codegen id : 14]
 Output [1]: [ss_ext_sales_price#6]
 Input [3]: [ss_customer_sk#3, ss_ext_sales_price#6, c_customer_sk#22]
 
-(69) HashAggregate [codegen id : 14]
+(63) HashAggregate [codegen id : 14]
 Input [1]: [ss_ext_sales_price#6]
 Keys: []
 Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))]
-Aggregate Attributes [1]: [sum#34]
-Results [1]: [sum#35]
+Aggregate Attributes [1]: [sum#33]
+Results [1]: [sum#34]
 
-(70) Exchange
-Input [1]: [sum#35]
-Arguments: SinglePartition, true, [id=#36]
+(64) Exchange
+Input [1]: [sum#34]
+Arguments: SinglePartition, true, [id=#35]
 
-(71) HashAggregate [codegen id : 15]
-Input [1]: [sum#35]
+(65) HashAggregate [codegen id : 15]
+Input [1]: [sum#34]
 Keys: []
 Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))]
-Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#37]
-Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#37,17,2) AS total#38]
+Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#36]
+Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#36,17,2) AS total#37]
 
-(72) BroadcastExchange
-Input [1]: [total#38]
-Arguments: IdentityBroadcastMode, [id=#39]
+(66) BroadcastExchange
+Input [1]: [total#37]
+Arguments: IdentityBroadcastMode, [id=#38]
 
-(73) BroadcastNestedLoopJoin
+(67) BroadcastNestedLoopJoin
 Join condition: None
 
-(74) Project [codegen id : 16]
-Output [3]: [promotions#32, total#38, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#32 as decimal(15,4))) / promote_precision(cast(total#38 as decimal(15,4)))), DecimalType(35,20), true)) * 100.00000000000000000000), DecimalType(38,19), true) AS (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#40]
-Input [2]: [promotions#32, total#38]
+(68) Project [codegen id : 16]
+Output [3]: [promotions#32, total#37, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#32 as decimal(15,4))) / promote_precision(cast(total#37 as decimal(15,4)))), DecimalType(35,20), true)) * 100.00000000000000000000), DecimalType(38,19), true) AS (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39]
+Input [2]: [promotions#32, total#37]
 
-(75) TakeOrderedAndProject
-Input [3]: [promotions#32, total#38, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#40]
-Arguments: 100, [promotions#32 ASC NULLS FIRST, total#38 ASC NULLS FIRST], [promotions#32, total#38, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#40]
+(69) TakeOrderedAndProject
+Input [3]: [promotions#32, total#37, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39]
+Arguments: 100, [promotions#32 ASC NULLS FIRST, total#37 ASC NULLS FIRST], [promotions#32, total#37, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/simplified.txt
index 039ccb1aa18cf..87f2b3ae03746 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/simplified.txt
@@ -86,25 +86,16 @@ TakeOrderedAndProject [promotions,total,(CAST((CAST(CAST(promotions AS DECIMAL(1
                                 Project [ss_customer_sk,ss_store_sk,ss_ext_sales_price]
                                   BroadcastHashJoin [ss_item_sk,i_item_sk]
                                     Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price]
-                                      BroadcastHashJoin [d_date_sk,ss_sold_date_sk]
-                                        InputAdapter
-                                          ReusedExchange [d_date_sk] #2
+                                      BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                         Filter [ss_store_sk,ss_sold_date_sk,ss_customer_sk,ss_item_sk]
                                           ColumnarToRow
                                             InputAdapter
                                               Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price]
+                                        InputAdapter
+                                          ReusedExchange [d_date_sk] #2
                                     InputAdapter
                                       ReusedExchange [i_item_sk] #3
                                 InputAdapter
                                   ReusedExchange [s_store_sk] #5
                             InputAdapter
-                              BroadcastExchange #10
-                                WholeStageCodegen (13)
-                                  Project [c_customer_sk]
-                                    BroadcastHashJoin [ca_address_sk,c_current_addr_sk]
-                                      InputAdapter
-                                        ReusedExchange [ca_address_sk] #7
-                                      Filter [c_customer_sk,c_current_addr_sk]
-                                        ColumnarToRow
-                                          InputAdapter
-                                            Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
+                              ReusedExchange [c_customer_sk] #6
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/explain.txt
index e9a2b7a375b01..b74dfb49c9f03 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/explain.txt
@@ -10,15 +10,15 @@ TakeOrderedAndProject (32)
                :     :- * Project (16)
                :     :  +- * BroadcastHashJoin Inner BuildRight (15)
                :     :     :- * Project (10)
-               :     :     :  +- * BroadcastHashJoin Inner BuildLeft (9)
-               :     :     :     :- BroadcastExchange (5)
-               :     :     :     :  +- * Project (4)
-               :     :     :     :     +- * Filter (3)
-               :     :     :     :        +- * ColumnarToRow (2)
-               :     :     :     :           +- Scan parquet default.date_dim (1)
-               :     :     :     +- * Filter (8)
-               :     :     :        +- * ColumnarToRow (7)
-               :     :     :           +- Scan parquet default.web_sales (6)
+               :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
+               :     :     :     :- * Filter (3)
+               :     :     :     :  +- * ColumnarToRow (2)
+               :     :     :     :     +- Scan parquet default.web_sales (1)
+               :     :     :     +- BroadcastExchange (8)
+               :     :     :        +- * Project (7)
+               :     :     :           +- * Filter (6)
+               :     :     :              +- * ColumnarToRow (5)
+               :     :     :                 +- Scan parquet default.date_dim (4)
                :     :     +- BroadcastExchange (14)
                :     :        +- * Filter (13)
                :     :           +- * ColumnarToRow (12)
@@ -33,50 +33,50 @@ TakeOrderedAndProject (32)
                         +- Scan parquet default.warehouse (23)
 
 
-(1) Scan parquet default.date_dim
-Output [2]: [d_date_sk#1, d_month_seq#2]
+(1) Scan parquet default.web_sales
+Output [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, ws_warehouse_sk#5]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_month_seq:int>
+Location [not included in comparison]/{warehouse_dir}/web_sales]
+PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_ship_mode_sk), IsNotNull(ws_web_site_sk), IsNotNull(ws_ship_date_sk)]
+ReadSchema: struct<ws_sold_date_sk:int,ws_ship_date_sk:int,ws_web_site_sk:int,ws_ship_mode_sk:int,ws_warehouse_sk:int>
 
-(2) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#1, d_month_seq#2]
+(2) ColumnarToRow [codegen id : 5]
+Input [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, ws_warehouse_sk#5]
 
-(3) Filter [codegen id : 1]
-Input [2]: [d_date_sk#1, d_month_seq#2]
-Condition : (((isnotnull(d_month_seq#2) AND (d_month_seq#2 >= 1200)) AND (d_month_seq#2 <= 1211)) AND isnotnull(d_date_sk#1))
+(3) Filter [codegen id : 5]
+Input [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, ws_warehouse_sk#5]
+Condition : (((isnotnull(ws_warehouse_sk#5) AND isnotnull(ws_ship_mode_sk#4)) AND isnotnull(ws_web_site_sk#3)) AND isnotnull(ws_ship_date_sk#2))
 
-(4) Project [codegen id : 1]
-Output [1]: [d_date_sk#1]
-Input [2]: [d_date_sk#1, d_month_seq#2]
+(4) Scan parquet default.date_dim
+Output [2]: [d_date_sk#6, d_month_seq#7]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_month_seq:int>
 
-(5) BroadcastExchange
-Input [1]: [d_date_sk#1]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3]
+(5) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#6, d_month_seq#7]
 
-(6) Scan parquet default.web_sales
-Output [5]: [ws_sold_date_sk#4, ws_ship_date_sk#5, ws_web_site_sk#6, ws_ship_mode_sk#7, ws_warehouse_sk#8]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/web_sales]
-PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_ship_mode_sk), IsNotNull(ws_web_site_sk), IsNotNull(ws_ship_date_sk)]
-ReadSchema: struct<ws_sold_date_sk:int,ws_ship_date_sk:int,ws_web_site_sk:int,ws_ship_mode_sk:int,ws_warehouse_sk:int>
+(6) Filter [codegen id : 1]
+Input [2]: [d_date_sk#6, d_month_seq#7]
+Condition : (((isnotnull(d_month_seq#7) AND (d_month_seq#7 >= 1200)) AND (d_month_seq#7 <= 1211)) AND isnotnull(d_date_sk#6))
 
-(7) ColumnarToRow
-Input [5]: [ws_sold_date_sk#4, ws_ship_date_sk#5, ws_web_site_sk#6, ws_ship_mode_sk#7, ws_warehouse_sk#8]
+(7) Project [codegen id : 1]
+Output [1]: [d_date_sk#6]
+Input [2]: [d_date_sk#6, d_month_seq#7]
 
-(8) Filter
-Input [5]: [ws_sold_date_sk#4, ws_ship_date_sk#5, ws_web_site_sk#6, ws_ship_mode_sk#7, ws_warehouse_sk#8]
-Condition : (((isnotnull(ws_warehouse_sk#8) AND isnotnull(ws_ship_mode_sk#7)) AND isnotnull(ws_web_site_sk#6)) AND isnotnull(ws_ship_date_sk#5))
+(8) BroadcastExchange
+Input [1]: [d_date_sk#6]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8]
 
 (9) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [d_date_sk#1]
-Right keys [1]: [ws_ship_date_sk#5]
+Left keys [1]: [ws_ship_date_sk#2]
+Right keys [1]: [d_date_sk#6]
 Join condition: None
 
 (10) Project [codegen id : 5]
-Output [5]: [ws_sold_date_sk#4, ws_ship_date_sk#5, ws_web_site_sk#6, ws_ship_mode_sk#7, ws_warehouse_sk#8]
-Input [6]: [d_date_sk#1, ws_sold_date_sk#4, ws_ship_date_sk#5, ws_web_site_sk#6, ws_ship_mode_sk#7, ws_warehouse_sk#8]
+Output [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, ws_warehouse_sk#5]
+Input [6]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, ws_warehouse_sk#5, d_date_sk#6]
 
 (11) Scan parquet default.web_site
 Output [2]: [web_site_sk#9, web_name#10]
@@ -97,13 +97,13 @@ Input [2]: [web_site_sk#9, web_name#10]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11]
 
 (15) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [ws_web_site_sk#6]
+Left keys [1]: [ws_web_site_sk#3]
 Right keys [1]: [web_site_sk#9]
 Join condition: None
 
 (16) Project [codegen id : 5]
-Output [5]: [ws_sold_date_sk#4, ws_ship_date_sk#5, ws_ship_mode_sk#7, ws_warehouse_sk#8, web_name#10]
-Input [7]: [ws_sold_date_sk#4, ws_ship_date_sk#5, ws_web_site_sk#6, ws_ship_mode_sk#7, ws_warehouse_sk#8, web_site_sk#9, web_name#10]
+Output [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_ship_mode_sk#4, ws_warehouse_sk#5, web_name#10]
+Input [7]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_web_site_sk#3, ws_ship_mode_sk#4, ws_warehouse_sk#5, web_site_sk#9, web_name#10]
 
 (17) Scan parquet default.ship_mode
 Output [2]: [sm_ship_mode_sk#12, sm_type#13]
@@ -124,13 +124,13 @@ Input [2]: [sm_ship_mode_sk#12, sm_type#13]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14]
 
 (21) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [ws_ship_mode_sk#7]
+Left keys [1]: [ws_ship_mode_sk#4]
 Right keys [1]: [sm_ship_mode_sk#12]
 Join condition: None
 
 (22) Project [codegen id : 5]
-Output [5]: [ws_sold_date_sk#4, ws_ship_date_sk#5, ws_warehouse_sk#8, web_name#10, sm_type#13]
-Input [7]: [ws_sold_date_sk#4, ws_ship_date_sk#5, ws_ship_mode_sk#7, ws_warehouse_sk#8, web_name#10, sm_ship_mode_sk#12, sm_type#13]
+Output [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_warehouse_sk#5, web_name#10, sm_type#13]
+Input [7]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_ship_mode_sk#4, ws_warehouse_sk#5, web_name#10, sm_ship_mode_sk#12, sm_type#13]
 
 (23) Scan parquet default.warehouse
 Output [2]: [w_warehouse_sk#15, w_warehouse_name#16]
@@ -151,18 +151,18 @@ Input [2]: [w_warehouse_sk#15, w_warehouse_name#16]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17]
 
 (27) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [ws_warehouse_sk#8]
+Left keys [1]: [ws_warehouse_sk#5]
 Right keys [1]: [w_warehouse_sk#15]
 Join condition: None
 
 (28) Project [codegen id : 5]
-Output [5]: [ws_sold_date_sk#4, ws_ship_date_sk#5, w_warehouse_name#16, sm_type#13, web_name#10]
-Input [7]: [ws_sold_date_sk#4, ws_ship_date_sk#5, ws_warehouse_sk#8, web_name#10, sm_type#13, w_warehouse_sk#15, w_warehouse_name#16]
+Output [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, w_warehouse_name#16, sm_type#13, web_name#10]
+Input [7]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_warehouse_sk#5, web_name#10, sm_type#13, w_warehouse_sk#15, w_warehouse_name#16]
 
 (29) HashAggregate [codegen id : 5]
-Input [5]: [ws_sold_date_sk#4, ws_ship_date_sk#5, w_warehouse_name#16, sm_type#13, web_name#10]
+Input [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, w_warehouse_name#16, sm_type#13, web_name#10]
 Keys [3]: [substr(w_warehouse_name#16, 1, 20) AS substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10]
-Functions [5]: [partial_sum(cast(CASE WHEN ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 30) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 60) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 90) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 120) THEN 1 ELSE 0 END as bigint))]
+Functions [5]: [partial_sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))]
 Aggregate Attributes [5]: [sum#19, sum#20, sum#21, sum#22, sum#23]
 Results [8]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10, sum#24, sum#25, sum#26, sum#27, sum#28]
 
@@ -173,9 +173,9 @@ Arguments: hashpartitioning(substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, w
 (31) HashAggregate [codegen id : 6]
 Input [8]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10, sum#24, sum#25, sum#26, sum#27, sum#28]
 Keys [3]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10]
-Functions [5]: [sum(cast(CASE WHEN ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 30) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 60) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 90) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 120) THEN 1 ELSE 0 END as bigint))]
-Aggregate Attributes [5]: [sum(cast(CASE WHEN ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 30) THEN 1 ELSE 0 END as bigint))#30, sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 30) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 60)) THEN 1 ELSE 0 END as bigint))#31, sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 60) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 90)) THEN 1 ELSE 0 END as bigint))#32, sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 90) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 120)) THEN 1 ELSE 0 END as bigint))#33, sum(cast(CASE WHEN ((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 120) THEN 1 ELSE 0 END as bigint))#34]
-Results [8]: [substr(w_warehouse_name#16, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#13, web_name#10, sum(cast(CASE WHEN ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 30) THEN 1 ELSE 0 END as bigint))#30 AS 30 days #36, sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 30) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 60)) THEN 1 ELSE 0 END as bigint))#31 AS 31 - 60 days #37, sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 60) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 90)) THEN 1 ELSE 0 END as bigint))#32 AS 61 - 90 days #38, sum(cast(CASE WHEN (((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 90) AND ((ws_ship_date_sk#5 - ws_sold_date_sk#4) <= 120)) THEN 1 ELSE 0 END as bigint))#33 AS 91 - 120 days #39, sum(cast(CASE WHEN ((ws_ship_date_sk#5 - ws_sold_date_sk#4) > 120) THEN 1 ELSE 0 END as bigint))#34 AS >120 days #40]
+Functions [5]: [sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))]
+Aggregate Attributes [5]: [sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33, sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34]
+Results [8]: [substr(w_warehouse_name#16, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#13, web_name#10, sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30 AS 30 days #36, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31 AS 31 - 60 days #37, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32 AS 61 - 90 days #38, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33 AS 91 - 120 days #39, sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34 AS >120 days #40]
 
 (32) TakeOrderedAndProject
 Input [8]: [substr(w_warehouse_name, 1, 20)#35, sm_type#13, web_name#10, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/simplified.txt
index 59cfc4b7b249a..9b16b44792ca4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/simplified.txt
@@ -12,7 +12,11 @@ TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,web_name,30 days
                       Project [ws_sold_date_sk,ws_ship_date_sk,ws_ship_mode_sk,ws_warehouse_sk,web_name]
                         BroadcastHashJoin [ws_web_site_sk,web_site_sk]
                           Project [ws_sold_date_sk,ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk]
-                            BroadcastHashJoin [d_date_sk,ws_ship_date_sk]
+                            BroadcastHashJoin [ws_ship_date_sk,d_date_sk]
+                              Filter [ws_warehouse_sk,ws_ship_mode_sk,ws_web_site_sk,ws_ship_date_sk]
+                                ColumnarToRow
+                                  InputAdapter
+                                    Scan parquet default.web_sales [ws_sold_date_sk,ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk]
                               InputAdapter
                                 BroadcastExchange #2
                                   WholeStageCodegen (1)
@@ -21,10 +25,6 @@ TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,web_name,30 days
                                         ColumnarToRow
                                           InputAdapter
                                             Scan parquet default.date_dim [d_date_sk,d_month_seq]
-                              Filter [ws_warehouse_sk,ws_ship_mode_sk,ws_web_site_sk,ws_ship_date_sk]
-                                ColumnarToRow
-                                  InputAdapter
-                                    Scan parquet default.web_sales [ws_sold_date_sk,ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk]
                           InputAdapter
                             BroadcastExchange #3
                               WholeStageCodegen (2)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/explain.txt
index 4b863587b08d9..5db04537d6371 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/explain.txt
@@ -14,15 +14,15 @@ TakeOrderedAndProject (55)
             :              :     :- * Project (17)
             :              :     :  +- * BroadcastHashJoin Inner BuildRight (16)
             :              :     :     :- * Project (10)
-            :              :     :     :  +- * BroadcastHashJoin Inner BuildLeft (9)
-            :              :     :     :     :- BroadcastExchange (5)
-            :              :     :     :     :  +- * Project (4)
-            :              :     :     :     :     +- * Filter (3)
-            :              :     :     :     :        +- * ColumnarToRow (2)
-            :              :     :     :     :           +- Scan parquet default.ship_mode (1)
-            :              :     :     :     +- * Filter (8)
-            :              :     :     :        +- * ColumnarToRow (7)
-            :              :     :     :           +- Scan parquet default.web_sales (6)
+            :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
+            :              :     :     :     :- * Filter (3)
+            :              :     :     :     :  +- * ColumnarToRow (2)
+            :              :     :     :     :     +- Scan parquet default.web_sales (1)
+            :              :     :     :     +- BroadcastExchange (8)
+            :              :     :     :        +- * Project (7)
+            :              :     :     :           +- * Filter (6)
+            :              :     :     :              +- * ColumnarToRow (5)
+            :              :     :     :                 +- Scan parquet default.ship_mode (4)
             :              :     :     +- BroadcastExchange (15)
             :              :     :        +- * Project (14)
             :              :     :           +- * Filter (13)
@@ -46,60 +46,60 @@ TakeOrderedAndProject (55)
                            :     :- * Project (41)
                            :     :  +- * BroadcastHashJoin Inner BuildRight (40)
                            :     :     :- * Project (38)
-                           :     :     :  +- * BroadcastHashJoin Inner BuildLeft (37)
-                           :     :     :     :- ReusedExchange (33)
-                           :     :     :     +- * Filter (36)
-                           :     :     :        +- * ColumnarToRow (35)
-                           :     :     :           +- Scan parquet default.catalog_sales (34)
+                           :     :     :  +- * BroadcastHashJoin Inner BuildRight (37)
+                           :     :     :     :- * Filter (35)
+                           :     :     :     :  +- * ColumnarToRow (34)
+                           :     :     :     :     +- Scan parquet default.catalog_sales (33)
+                           :     :     :     +- ReusedExchange (36)
                            :     :     +- ReusedExchange (39)
                            :     +- ReusedExchange (42)
                            +- ReusedExchange (45)
 
 
-(1) Scan parquet default.ship_mode
-Output [2]: [sm_ship_mode_sk#1, sm_carrier#2]
+(1) Scan parquet default.web_sales
+Output [7]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/ship_mode]
-PushedFilters: [In(sm_carrier, [DHL,BARIAN]), IsNotNull(sm_ship_mode_sk)]
-ReadSchema: struct<sm_ship_mode_sk:int,sm_carrier:string>
+Location [not included in comparison]/{warehouse_dir}/web_sales]
+PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_ship_mode_sk)]
+ReadSchema: struct<ws_sold_date_sk:int,ws_sold_time_sk:int,ws_ship_mode_sk:int,ws_warehouse_sk:int,ws_quantity:int,ws_ext_sales_price:decimal(7,2),ws_net_paid:decimal(7,2)>
 
-(2) ColumnarToRow [codegen id : 1]
-Input [2]: [sm_ship_mode_sk#1, sm_carrier#2]
+(2) ColumnarToRow [codegen id : 5]
+Input [7]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7]
 
-(3) Filter [codegen id : 1]
-Input [2]: [sm_ship_mode_sk#1, sm_carrier#2]
-Condition : (sm_carrier#2 IN (DHL,BARIAN) AND isnotnull(sm_ship_mode_sk#1))
+(3) Filter [codegen id : 5]
+Input [7]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7]
+Condition : (((isnotnull(ws_warehouse_sk#4) AND isnotnull(ws_sold_date_sk#1)) AND isnotnull(ws_sold_time_sk#2)) AND isnotnull(ws_ship_mode_sk#3))
 
-(4) Project [codegen id : 1]
-Output [1]: [sm_ship_mode_sk#1]
-Input [2]: [sm_ship_mode_sk#1, sm_carrier#2]
+(4) Scan parquet default.ship_mode
+Output [2]: [sm_ship_mode_sk#8, sm_carrier#9]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/ship_mode]
+PushedFilters: [In(sm_carrier, [DHL,BARIAN]), IsNotNull(sm_ship_mode_sk)]
+ReadSchema: struct<sm_ship_mode_sk:int,sm_carrier:string>
 
-(5) BroadcastExchange
-Input [1]: [sm_ship_mode_sk#1]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3]
+(5) ColumnarToRow [codegen id : 1]
+Input [2]: [sm_ship_mode_sk#8, sm_carrier#9]
 
-(6) Scan parquet default.web_sales
-Output [7]: [ws_sold_date_sk#4, ws_sold_time_sk#5, ws_ship_mode_sk#6, ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/web_sales]
-PushedFilters: [IsNotNull(ws_warehouse_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_ship_mode_sk)]
-ReadSchema: struct<ws_sold_date_sk:int,ws_sold_time_sk:int,ws_ship_mode_sk:int,ws_warehouse_sk:int,ws_quantity:int,ws_ext_sales_price:decimal(7,2),ws_net_paid:decimal(7,2)>
+(6) Filter [codegen id : 1]
+Input [2]: [sm_ship_mode_sk#8, sm_carrier#9]
+Condition : (sm_carrier#9 IN (DHL,BARIAN) AND isnotnull(sm_ship_mode_sk#8))
 
-(7) ColumnarToRow
-Input [7]: [ws_sold_date_sk#4, ws_sold_time_sk#5, ws_ship_mode_sk#6, ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10]
+(7) Project [codegen id : 1]
+Output [1]: [sm_ship_mode_sk#8]
+Input [2]: [sm_ship_mode_sk#8, sm_carrier#9]
 
-(8) Filter
-Input [7]: [ws_sold_date_sk#4, ws_sold_time_sk#5, ws_ship_mode_sk#6, ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10]
-Condition : (((isnotnull(ws_warehouse_sk#7) AND isnotnull(ws_sold_date_sk#4)) AND isnotnull(ws_sold_time_sk#5)) AND isnotnull(ws_ship_mode_sk#6))
+(8) BroadcastExchange
+Input [1]: [sm_ship_mode_sk#8]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10]
 
 (9) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [sm_ship_mode_sk#1]
-Right keys [1]: [ws_ship_mode_sk#6]
+Left keys [1]: [ws_ship_mode_sk#3]
+Right keys [1]: [sm_ship_mode_sk#8]
 Join condition: None
 
 (10) Project [codegen id : 5]
-Output [6]: [ws_sold_date_sk#4, ws_sold_time_sk#5, ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10]
-Input [8]: [sm_ship_mode_sk#1, ws_sold_date_sk#4, ws_sold_time_sk#5, ws_ship_mode_sk#6, ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10]
+Output [6]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7]
+Input [8]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, sm_ship_mode_sk#8]
 
 (11) Scan parquet default.time_dim
 Output [2]: [t_time_sk#11, t_time#12]
@@ -124,13 +124,13 @@ Input [1]: [t_time_sk#11]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13]
 
 (16) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [ws_sold_time_sk#5]
+Left keys [1]: [ws_sold_time_sk#2]
 Right keys [1]: [t_time_sk#11]
 Join condition: None
 
 (17) Project [codegen id : 5]
-Output [5]: [ws_sold_date_sk#4, ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10]
-Input [7]: [ws_sold_date_sk#4, ws_sold_time_sk#5, ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10, t_time_sk#11]
+Output [5]: [ws_sold_date_sk#1, ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7]
+Input [7]: [ws_sold_date_sk#1, ws_sold_time_sk#2, ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, t_time_sk#11]
 
 (18) Scan parquet default.date_dim
 Output [3]: [d_date_sk#14, d_year#15, d_moy#16]
@@ -151,13 +151,13 @@ Input [3]: [d_date_sk#14, d_year#15, d_moy#16]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17]
 
 (22) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [ws_sold_date_sk#4]
+Left keys [1]: [ws_sold_date_sk#1]
 Right keys [1]: [d_date_sk#14]
 Join condition: None
 
 (23) Project [codegen id : 5]
-Output [6]: [ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10, d_year#15, d_moy#16]
-Input [8]: [ws_sold_date_sk#4, ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10, d_date_sk#14, d_year#15, d_moy#16]
+Output [6]: [ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, d_year#15, d_moy#16]
+Input [8]: [ws_sold_date_sk#1, ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, d_date_sk#14, d_year#15, d_moy#16]
 
 (24) Scan parquet default.warehouse
 Output [7]: [w_warehouse_sk#18, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24]
@@ -178,18 +178,18 @@ Input [7]: [w_warehouse_sk#18, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25]
 
 (28) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [ws_warehouse_sk#7]
+Left keys [1]: [ws_warehouse_sk#4]
 Right keys [1]: [w_warehouse_sk#18]
 Join condition: None
 
 (29) Project [codegen id : 5]
-Output [11]: [ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15, d_moy#16]
-Input [13]: [ws_warehouse_sk#7, ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10, d_year#15, d_moy#16, w_warehouse_sk#18, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24]
+Output [11]: [ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15, d_moy#16]
+Input [13]: [ws_warehouse_sk#4, ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, d_year#15, d_moy#16, w_warehouse_sk#18, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24]
 
 (30) HashAggregate [codegen id : 5]
-Input [11]: [ws_quantity#8, ws_ext_sales_price#9, ws_net_paid#10, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15, d_moy#16]
+Input [11]: [ws_quantity#5, ws_ext_sales_price#6, ws_net_paid#7, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15, d_moy#16]
 Keys [7]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15]
-Functions [24]: [partial_sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
+Functions [24]: [partial_sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
 Aggregate Attributes [48]: [sum#26, isEmpty#27, sum#28, isEmpty#29, sum#30, isEmpty#31, sum#32, isEmpty#33, sum#34, isEmpty#35, sum#36, isEmpty#37, sum#38, isEmpty#39, sum#40, isEmpty#41, sum#42, isEmpty#43, sum#44, isEmpty#45, sum#46, isEmpty#47, sum#48, isEmpty#49, sum#50, isEmpty#51, sum#52, isEmpty#53, sum#54, isEmpty#55, sum#56, isEmpty#57, sum#58, isEmpty#59, sum#60, isEmpty#61, sum#62, isEmpty#63, sum#64, isEmpty#65, sum#66, isEmpty#67, sum#68, isEmpty#69, sum#70, isEmpty#71, sum#72, isEmpty#73]
 Results [55]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121]
 
@@ -200,35 +200,35 @@ Arguments: hashpartitioning(w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21
 (32) HashAggregate [codegen id : 6]
 Input [55]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121]
 Keys [7]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#15]
-Functions [24]: [sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
-Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123, sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124, sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125, sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126, sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127, sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128, sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129, sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130, sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131, sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132, sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133, sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134, sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135, sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136, sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137, sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138, sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139, sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140, sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141, sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142, sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143, sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144, sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145, sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146]
-Results [32]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, DHL,BARIAN AS ship_carriers#147, d_year#15 AS year#148, sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123 AS jan_sales#149, sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124 AS feb_sales#150, sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125 AS mar_sales#151, sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126 AS apr_sales#152, sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127 AS may_sales#153, sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128 AS jun_sales#154, sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129 AS jul_sales#155, sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130 AS aug_sales#156, sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131 AS sep_sales#157, sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132 AS oct_sales#158, sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133 AS nov_sales#159, sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#9 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134 AS dec_sales#160, sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135 AS jan_net#161, sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136 AS feb_net#162, sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137 AS mar_net#163, sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138 AS apr_net#164, sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139 AS may_net#165, sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140 AS jun_net#166, sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141 AS jul_net#167, sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142 AS aug_net#168, sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143 AS sep_net#169, sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144 AS oct_net#170, sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145 AS nov_net#171, sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#10 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#8 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146 AS dec_net#172]
+Functions [24]: [sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END), sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)]
+Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123, sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124, sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125, sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126, sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127, sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128, sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129, sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130, sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131, sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132, sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133, sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134, sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135, sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136, sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137, sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138, sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139, sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140, sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141, sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142, sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143, sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144, sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145, sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146]
+Results [32]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, DHL,BARIAN AS ship_carriers#147, d_year#15 AS year#148, sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#123 AS jan_sales#149, sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#124 AS feb_sales#150, sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#125 AS mar_sales#151, sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#126 AS apr_sales#152, sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#127 AS may_sales#153, sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#128 AS jun_sales#154, sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#129 AS jul_sales#155, sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#130 AS aug_sales#156, sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#131 AS sep_sales#157, sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#132 AS oct_sales#158, sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#133 AS nov_sales#159, sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#6 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#134 AS dec_sales#160, sum(CASE WHEN (d_moy#16 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#135 AS jan_net#161, sum(CASE WHEN (d_moy#16 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#136 AS feb_net#162, sum(CASE WHEN (d_moy#16 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#137 AS mar_net#163, sum(CASE WHEN (d_moy#16 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#138 AS apr_net#164, sum(CASE WHEN (d_moy#16 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#139 AS may_net#165, sum(CASE WHEN (d_moy#16 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#140 AS jun_net#166, sum(CASE WHEN (d_moy#16 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#141 AS jul_net#167, sum(CASE WHEN (d_moy#16 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#142 AS aug_net#168, sum(CASE WHEN (d_moy#16 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#143 AS sep_net#169, sum(CASE WHEN (d_moy#16 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#144 AS oct_net#170, sum(CASE WHEN (d_moy#16 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#145 AS nov_net#171, sum(CASE WHEN (d_moy#16 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#7 as decimal(12,2))) * promote_precision(cast(cast(ws_quantity#5 as decimal(10,0)) as decimal(12,2)))), DecimalType(18,2), true) ELSE 0.00 END)#146 AS dec_net#172]
 
-(33) ReusedExchange [Reuses operator id: 5]
-Output [1]: [sm_ship_mode_sk#1]
-
-(34) Scan parquet default.catalog_sales
+(33) Scan parquet default.catalog_sales
 Output [7]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_sales]
 PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_sold_time_sk), IsNotNull(cs_ship_mode_sk)]
 ReadSchema: struct<cs_sold_date_sk:int,cs_sold_time_sk:int,cs_ship_mode_sk:int,cs_warehouse_sk:int,cs_quantity:int,cs_sales_price:decimal(7,2),cs_net_paid_inc_tax:decimal(7,2)>
 
-(35) ColumnarToRow
+(34) ColumnarToRow [codegen id : 11]
 Input [7]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179]
 
-(36) Filter
+(35) Filter [codegen id : 11]
 Input [7]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179]
 Condition : (((isnotnull(cs_warehouse_sk#176) AND isnotnull(cs_sold_date_sk#173)) AND isnotnull(cs_sold_time_sk#174)) AND isnotnull(cs_ship_mode_sk#175))
 
+(36) ReusedExchange [Reuses operator id: 8]
+Output [1]: [sm_ship_mode_sk#8]
+
 (37) BroadcastHashJoin [codegen id : 11]
-Left keys [1]: [sm_ship_mode_sk#1]
-Right keys [1]: [cs_ship_mode_sk#175]
+Left keys [1]: [cs_ship_mode_sk#175]
+Right keys [1]: [sm_ship_mode_sk#8]
 Join condition: None
 
 (38) Project [codegen id : 11]
 Output [6]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179]
-Input [8]: [sm_ship_mode_sk#1, cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179]
+Input [8]: [cs_sold_date_sk#173, cs_sold_time_sk#174, cs_ship_mode_sk#175, cs_warehouse_sk#176, cs_quantity#177, cs_sales_price#178, cs_net_paid_inc_tax#179, sm_ship_mode_sk#8]
 
 (39) ReusedExchange [Reuses operator id: 15]
 Output [1]: [t_time_sk#11]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/simplified.txt
index 465d269a847c3..ddfb04d8df5e3 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/simplified.txt
@@ -20,7 +20,11 @@ TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_stat
                                       Project [ws_sold_date_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_net_paid]
                                         BroadcastHashJoin [ws_sold_time_sk,t_time_sk]
                                           Project [ws_sold_date_sk,ws_sold_time_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_net_paid]
-                                            BroadcastHashJoin [sm_ship_mode_sk,ws_ship_mode_sk]
+                                            BroadcastHashJoin [ws_ship_mode_sk,sm_ship_mode_sk]
+                                              Filter [ws_warehouse_sk,ws_sold_date_sk,ws_sold_time_sk,ws_ship_mode_sk]
+                                                ColumnarToRow
+                                                  InputAdapter
+                                                    Scan parquet default.web_sales [ws_sold_date_sk,ws_sold_time_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_net_paid]
                                               InputAdapter
                                                 BroadcastExchange #3
                                                   WholeStageCodegen (1)
@@ -29,10 +33,6 @@ TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_stat
                                                         ColumnarToRow
                                                           InputAdapter
                                                             Scan parquet default.ship_mode [sm_ship_mode_sk,sm_carrier]
-                                              Filter [ws_warehouse_sk,ws_sold_date_sk,ws_sold_time_sk,ws_ship_mode_sk]
-                                                ColumnarToRow
-                                                  InputAdapter
-                                                    Scan parquet default.web_sales [ws_sold_date_sk,ws_sold_time_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_net_paid]
                                           InputAdapter
                                             BroadcastExchange #4
                                               WholeStageCodegen (2)
@@ -68,13 +68,13 @@ TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_stat
                                       Project [cs_sold_date_sk,cs_warehouse_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax]
                                         BroadcastHashJoin [cs_sold_time_sk,t_time_sk]
                                           Project [cs_sold_date_sk,cs_sold_time_sk,cs_warehouse_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax]
-                                            BroadcastHashJoin [sm_ship_mode_sk,cs_ship_mode_sk]
-                                              InputAdapter
-                                                ReusedExchange [sm_ship_mode_sk] #3
+                                            BroadcastHashJoin [cs_ship_mode_sk,sm_ship_mode_sk]
                                               Filter [cs_warehouse_sk,cs_sold_date_sk,cs_sold_time_sk,cs_ship_mode_sk]
                                                 ColumnarToRow
                                                   InputAdapter
                                                     Scan parquet default.catalog_sales [cs_sold_date_sk,cs_sold_time_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax]
+                                              InputAdapter
+                                                ReusedExchange [sm_ship_mode_sk] #3
                                           InputAdapter
                                             ReusedExchange [t_time_sk] #4
                                       InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt
index a100b6659f162..3f8106c96379a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt
@@ -24,15 +24,15 @@ TakeOrderedAndProject (79)
                :           :     :           :     :           :- * Project (17)
                :           :     :           :     :           :  +- * BroadcastHashJoin Inner BuildRight (16)
                :           :     :           :     :           :     :- * Project (10)
-               :           :     :           :     :           :     :  +- * BroadcastHashJoin Inner BuildLeft (9)
-               :           :     :           :     :           :     :     :- BroadcastExchange (5)
-               :           :     :           :     :           :     :     :  +- * Project (4)
-               :           :     :           :     :           :     :     :     +- * Filter (3)
-               :           :     :           :     :           :     :     :        +- * ColumnarToRow (2)
-               :           :     :           :     :           :     :     :           +- Scan parquet default.household_demographics (1)
-               :           :     :           :     :           :     :     +- * Filter (8)
-               :           :     :           :     :           :     :        +- * ColumnarToRow (7)
-               :           :     :           :     :           :     :           +- Scan parquet default.catalog_sales (6)
+               :           :     :           :     :           :     :  +- * BroadcastHashJoin Inner BuildRight (9)
+               :           :     :           :     :           :     :     :- * Filter (3)
+               :           :     :           :     :           :     :     :  +- * ColumnarToRow (2)
+               :           :     :           :     :           :     :     :     +- Scan parquet default.catalog_sales (1)
+               :           :     :           :     :           :     :     +- BroadcastExchange (8)
+               :           :     :           :     :           :     :        +- * Project (7)
+               :           :     :           :     :           :     :           +- * Filter (6)
+               :           :     :           :     :           :     :              +- * ColumnarToRow (5)
+               :           :     :           :     :           :     :                 +- Scan parquet default.household_demographics (4)
                :           :     :           :     :           :     +- BroadcastExchange (15)
                :           :     :           :     :           :        +- * Project (14)
                :           :     :           :     :           :           +- * Filter (13)
@@ -49,26 +49,26 @@ TakeOrderedAndProject (79)
                :           :     :           :                 +- Scan parquet default.item (26)
                :           :     :           +- BroadcastExchange (43)
                :           :     :              +- * Project (42)
-               :           :     :                 +- * BroadcastHashJoin Inner BuildRight (41)
-               :           :     :                    :- * Filter (35)
-               :           :     :                    :  +- * ColumnarToRow (34)
-               :           :     :                    :     +- Scan parquet default.date_dim (33)
-               :           :     :                    +- BroadcastExchange (40)
-               :           :     :                       +- * Project (39)
-               :           :     :                          +- * Filter (38)
-               :           :     :                             +- * ColumnarToRow (37)
-               :           :     :                                +- Scan parquet default.date_dim (36)
+               :           :     :                 +- * BroadcastHashJoin Inner BuildLeft (41)
+               :           :     :                    :- BroadcastExchange (37)
+               :           :     :                    :  +- * Project (36)
+               :           :     :                    :     +- * Filter (35)
+               :           :     :                    :        +- * ColumnarToRow (34)
+               :           :     :                    :           +- Scan parquet default.date_dim (33)
+               :           :     :                    +- * Filter (40)
+               :           :     :                       +- * ColumnarToRow (39)
+               :           :     :                          +- Scan parquet default.date_dim (38)
                :           :     +- * Sort (58)
                :           :        +- Exchange (57)
                :           :           +- * Project (56)
-               :           :              +- * BroadcastHashJoin Inner BuildLeft (55)
-               :           :                 :- BroadcastExchange (51)
-               :           :                 :  +- * Filter (50)
-               :           :                 :     +- * ColumnarToRow (49)
-               :           :                 :        +- Scan parquet default.warehouse (48)
-               :           :                 +- * Filter (54)
-               :           :                    +- * ColumnarToRow (53)
-               :           :                       +- Scan parquet default.inventory (52)
+               :           :              +- * BroadcastHashJoin Inner BuildRight (55)
+               :           :                 :- * Filter (50)
+               :           :                 :  +- * ColumnarToRow (49)
+               :           :                 :     +- Scan parquet default.inventory (48)
+               :           :                 +- BroadcastExchange (54)
+               :           :                    +- * Filter (53)
+               :           :                       +- * ColumnarToRow (52)
+               :           :                          +- Scan parquet default.warehouse (51)
                :           +- BroadcastExchange (64)
                :              +- * Filter (63)
                :                 +- * ColumnarToRow (62)
@@ -80,50 +80,50 @@ TakeOrderedAndProject (79)
                            +- Scan parquet default.catalog_returns (69)
 
 
-(1) Scan parquet default.household_demographics
-Output [2]: [hd_demo_sk#1, hd_buy_potential#2]
+(1) Scan parquet default.catalog_sales
+Output [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/household_demographics]
-PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,>10000), IsNotNull(hd_demo_sk)]
-ReadSchema: struct<hd_demo_sk:int,hd_buy_potential:string>
+Location [not included in comparison]/{warehouse_dir}/catalog_sales]
+PushedFilters: [IsNotNull(cs_quantity), IsNotNull(cs_item_sk), IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_hdemo_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_ship_date_sk)]
+ReadSchema: struct<cs_sold_date_sk:int,cs_ship_date_sk:int,cs_bill_cdemo_sk:int,cs_bill_hdemo_sk:int,cs_item_sk:int,cs_promo_sk:int,cs_order_number:int,cs_quantity:int>
 
-(2) ColumnarToRow [codegen id : 1]
-Input [2]: [hd_demo_sk#1, hd_buy_potential#2]
+(2) ColumnarToRow [codegen id : 4]
+Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8]
 
-(3) Filter [codegen id : 1]
-Input [2]: [hd_demo_sk#1, hd_buy_potential#2]
-Condition : ((isnotnull(hd_buy_potential#2) AND (hd_buy_potential#2 = >10000)) AND isnotnull(hd_demo_sk#1))
+(3) Filter [codegen id : 4]
+Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8]
+Condition : (((((isnotnull(cs_quantity#8) AND isnotnull(cs_item_sk#5)) AND isnotnull(cs_bill_cdemo_sk#3)) AND isnotnull(cs_bill_hdemo_sk#4)) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_ship_date_sk#2))
 
-(4) Project [codegen id : 1]
-Output [1]: [hd_demo_sk#1]
-Input [2]: [hd_demo_sk#1, hd_buy_potential#2]
+(4) Scan parquet default.household_demographics
+Output [2]: [hd_demo_sk#9, hd_buy_potential#10]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/household_demographics]
+PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,>10000), IsNotNull(hd_demo_sk)]
+ReadSchema: struct<hd_demo_sk:int,hd_buy_potential:string>
 
-(5) BroadcastExchange
-Input [1]: [hd_demo_sk#1]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3]
+(5) ColumnarToRow [codegen id : 1]
+Input [2]: [hd_demo_sk#9, hd_buy_potential#10]
 
-(6) Scan parquet default.catalog_sales
-Output [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_bill_hdemo_sk#7, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/catalog_sales]
-PushedFilters: [IsNotNull(cs_quantity), IsNotNull(cs_item_sk), IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_hdemo_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_ship_date_sk)]
-ReadSchema: struct<cs_sold_date_sk:int,cs_ship_date_sk:int,cs_bill_cdemo_sk:int,cs_bill_hdemo_sk:int,cs_item_sk:int,cs_promo_sk:int,cs_order_number:int,cs_quantity:int>
+(6) Filter [codegen id : 1]
+Input [2]: [hd_demo_sk#9, hd_buy_potential#10]
+Condition : ((isnotnull(hd_buy_potential#10) AND (hd_buy_potential#10 = >10000)) AND isnotnull(hd_demo_sk#9))
 
-(7) ColumnarToRow
-Input [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_bill_hdemo_sk#7, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11]
+(7) Project [codegen id : 1]
+Output [1]: [hd_demo_sk#9]
+Input [2]: [hd_demo_sk#9, hd_buy_potential#10]
 
-(8) Filter
-Input [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_bill_hdemo_sk#7, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11]
-Condition : (((((isnotnull(cs_quantity#11) AND isnotnull(cs_item_sk#8)) AND isnotnull(cs_bill_cdemo_sk#6)) AND isnotnull(cs_bill_hdemo_sk#7)) AND isnotnull(cs_sold_date_sk#4)) AND isnotnull(cs_ship_date_sk#5))
+(8) BroadcastExchange
+Input [1]: [hd_demo_sk#9]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11]
 
 (9) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [hd_demo_sk#1]
-Right keys [1]: [cs_bill_hdemo_sk#7]
+Left keys [1]: [cs_bill_hdemo_sk#4]
+Right keys [1]: [hd_demo_sk#9]
 Join condition: None
 
 (10) Project [codegen id : 4]
-Output [7]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11]
-Input [9]: [hd_demo_sk#1, cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_bill_hdemo_sk#7, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11]
+Output [7]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8]
+Input [9]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, hd_demo_sk#9]
 
 (11) Scan parquet default.customer_demographics
 Output [2]: [cd_demo_sk#12, cd_marital_status#13]
@@ -148,13 +148,13 @@ Input [1]: [cd_demo_sk#12]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14]
 
 (16) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [cs_bill_cdemo_sk#6]
+Left keys [1]: [cs_bill_cdemo_sk#3]
 Right keys [1]: [cd_demo_sk#12]
 Join condition: None
 
 (17) Project [codegen id : 4]
-Output [6]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11]
-Input [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, cd_demo_sk#12]
+Output [6]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8]
+Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, cd_demo_sk#12]
 
 (18) Scan parquet default.date_dim
 Output [2]: [d_date_sk#15, d_date#16]
@@ -175,21 +175,21 @@ Input [2]: [d_date_sk#15, d_date#16]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17]
 
 (22) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [cs_ship_date_sk#5]
+Left keys [1]: [cs_ship_date_sk#2]
 Right keys [1]: [d_date_sk#15]
 Join condition: None
 
 (23) Project [codegen id : 4]
-Output [6]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16]
-Input [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date_sk#15, d_date#16]
+Output [6]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16]
+Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date_sk#15, d_date#16]
 
 (24) Exchange
-Input [6]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16]
-Arguments: hashpartitioning(cs_item_sk#8, 5), true, [id=#18]
+Input [6]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16]
+Arguments: hashpartitioning(cs_item_sk#5, 5), true, [id=#18]
 
 (25) Sort [codegen id : 5]
-Input [6]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16]
-Arguments: [cs_item_sk#8 ASC NULLS FIRST], false, 0
+Input [6]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16]
+Arguments: [cs_item_sk#5 ASC NULLS FIRST], false, 0
 
 (26) Scan parquet default.item
 Output [2]: [i_item_sk#19, i_item_desc#20]
@@ -214,137 +214,137 @@ Input [2]: [i_item_sk#19, i_item_desc#20]
 Arguments: [i_item_sk#19 ASC NULLS FIRST], false, 0
 
 (31) SortMergeJoin [codegen id : 10]
-Left keys [1]: [cs_item_sk#8]
+Left keys [1]: [cs_item_sk#5]
 Right keys [1]: [i_item_sk#19]
 Join condition: None
 
 (32) Project [codegen id : 10]
-Output [7]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16, i_item_desc#20]
-Input [8]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16, i_item_sk#19, i_item_desc#20]
+Output [7]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20]
+Input [8]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_sk#19, i_item_desc#20]
 
 (33) Scan parquet default.date_dim
-Output [2]: [d_date_sk#22, d_week_seq#23]
+Output [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_week_seq:int>
+PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)]
+ReadSchema: struct<d_date_sk:int,d_date:date,d_week_seq:int,d_year:int>
 
-(34) ColumnarToRow [codegen id : 9]
-Input [2]: [d_date_sk#22, d_week_seq#23]
+(34) ColumnarToRow [codegen id : 8]
+Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25]
 
-(35) Filter [codegen id : 9]
-Input [2]: [d_date_sk#22, d_week_seq#23]
-Condition : (isnotnull(d_week_seq#23) AND isnotnull(d_date_sk#22))
+(35) Filter [codegen id : 8]
+Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25]
+Condition : ((((isnotnull(d_year#25) AND (d_year#25 = 1999)) AND isnotnull(d_date_sk#22)) AND isnotnull(d_week_seq#24)) AND isnotnull(d_date#23))
 
-(36) Scan parquet default.date_dim
-Output [4]: [d_date_sk#24, d_date#25, d_week_seq#26, d_year#27]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)]
-ReadSchema: struct<d_date_sk:int,d_date:date,d_week_seq:int,d_year:int>
+(36) Project [codegen id : 8]
+Output [3]: [d_date_sk#22, d_date#23, d_week_seq#24]
+Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25]
 
-(37) ColumnarToRow [codegen id : 8]
-Input [4]: [d_date_sk#24, d_date#25, d_week_seq#26, d_year#27]
+(37) BroadcastExchange
+Input [3]: [d_date_sk#22, d_date#23, d_week_seq#24]
+Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#26]
 
-(38) Filter [codegen id : 8]
-Input [4]: [d_date_sk#24, d_date#25, d_week_seq#26, d_year#27]
-Condition : ((((isnotnull(d_year#27) AND (d_year#27 = 1999)) AND isnotnull(d_date_sk#24)) AND isnotnull(d_week_seq#26)) AND isnotnull(d_date#25))
+(38) Scan parquet default.date_dim
+Output [2]: [d_date_sk#27, d_week_seq#28]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_week_seq:int>
 
-(39) Project [codegen id : 8]
-Output [3]: [d_date_sk#24, d_date#25, d_week_seq#26]
-Input [4]: [d_date_sk#24, d_date#25, d_week_seq#26, d_year#27]
+(39) ColumnarToRow
+Input [2]: [d_date_sk#27, d_week_seq#28]
 
-(40) BroadcastExchange
-Input [3]: [d_date_sk#24, d_date#25, d_week_seq#26]
-Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#28]
+(40) Filter
+Input [2]: [d_date_sk#27, d_week_seq#28]
+Condition : (isnotnull(d_week_seq#28) AND isnotnull(d_date_sk#27))
 
 (41) BroadcastHashJoin [codegen id : 9]
-Left keys [1]: [d_week_seq#23]
-Right keys [1]: [d_week_seq#26]
+Left keys [1]: [d_week_seq#24]
+Right keys [1]: [d_week_seq#28]
 Join condition: None
 
 (42) Project [codegen id : 9]
-Output [4]: [d_date_sk#22, d_date_sk#24, d_date#25, d_week_seq#26]
-Input [5]: [d_date_sk#22, d_week_seq#23, d_date_sk#24, d_date#25, d_week_seq#26]
+Output [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27]
+Input [5]: [d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27, d_week_seq#28]
 
 (43) BroadcastExchange
-Input [4]: [d_date_sk#22, d_date_sk#24, d_date#25, d_week_seq#26]
-Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#29]
+Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29]
 
 (44) BroadcastHashJoin [codegen id : 10]
-Left keys [1]: [cs_sold_date_sk#4]
-Right keys [1]: [d_date_sk#24]
-Join condition: (d_date#16 > d_date#25 + 5 days)
+Left keys [1]: [cs_sold_date_sk#1]
+Right keys [1]: [d_date_sk#22]
+Join condition: (d_date#16 > d_date#23 + 5 days)
 
 (45) Project [codegen id : 10]
-Output [7]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, i_item_desc#20, d_date_sk#22, d_week_seq#26]
-Input [11]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16, i_item_desc#20, d_date_sk#22, d_date_sk#24, d_date#25, d_week_seq#26]
+Output [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27]
+Input [11]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20, d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27]
 
 (46) Exchange
-Input [7]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, i_item_desc#20, d_date_sk#22, d_week_seq#26]
-Arguments: hashpartitioning(cs_item_sk#8, d_date_sk#22, 5), true, [id=#30]
+Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27]
+Arguments: hashpartitioning(cs_item_sk#5, d_date_sk#27, 5), true, [id=#30]
 
 (47) Sort [codegen id : 11]
-Input [7]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, i_item_desc#20, d_date_sk#22, d_week_seq#26]
-Arguments: [cs_item_sk#8 ASC NULLS FIRST, d_date_sk#22 ASC NULLS FIRST], false, 0
+Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27]
+Arguments: [cs_item_sk#5 ASC NULLS FIRST, d_date_sk#27 ASC NULLS FIRST], false, 0
 
-(48) Scan parquet default.warehouse
-Output [2]: [w_warehouse_sk#31, w_warehouse_name#32]
+(48) Scan parquet default.inventory
+Output [4]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/warehouse]
-PushedFilters: [IsNotNull(w_warehouse_sk)]
-ReadSchema: struct<w_warehouse_sk:int,w_warehouse_name:string>
-
-(49) ColumnarToRow [codegen id : 12]
-Input [2]: [w_warehouse_sk#31, w_warehouse_name#32]
+Location [not included in comparison]/{warehouse_dir}/inventory]
+PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)]
+ReadSchema: struct<inv_date_sk:int,inv_item_sk:int,inv_warehouse_sk:int,inv_quantity_on_hand:int>
 
-(50) Filter [codegen id : 12]
-Input [2]: [w_warehouse_sk#31, w_warehouse_name#32]
-Condition : isnotnull(w_warehouse_sk#31)
+(49) ColumnarToRow [codegen id : 13]
+Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34]
 
-(51) BroadcastExchange
-Input [2]: [w_warehouse_sk#31, w_warehouse_name#32]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#33]
+(50) Filter [codegen id : 13]
+Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34]
+Condition : (((isnotnull(inv_quantity_on_hand#34) AND isnotnull(inv_item_sk#32)) AND isnotnull(inv_warehouse_sk#33)) AND isnotnull(inv_date_sk#31))
 
-(52) Scan parquet default.inventory
-Output [4]: [inv_date_sk#34, inv_item_sk#35, inv_warehouse_sk#36, inv_quantity_on_hand#37]
+(51) Scan parquet default.warehouse
+Output [2]: [w_warehouse_sk#35, w_warehouse_name#36]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/inventory]
-PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)]
-ReadSchema: struct<inv_date_sk:int,inv_item_sk:int,inv_warehouse_sk:int,inv_quantity_on_hand:int>
+Location [not included in comparison]/{warehouse_dir}/warehouse]
+PushedFilters: [IsNotNull(w_warehouse_sk)]
+ReadSchema: struct<w_warehouse_sk:int,w_warehouse_name:string>
+
+(52) ColumnarToRow [codegen id : 12]
+Input [2]: [w_warehouse_sk#35, w_warehouse_name#36]
 
-(53) ColumnarToRow
-Input [4]: [inv_date_sk#34, inv_item_sk#35, inv_warehouse_sk#36, inv_quantity_on_hand#37]
+(53) Filter [codegen id : 12]
+Input [2]: [w_warehouse_sk#35, w_warehouse_name#36]
+Condition : isnotnull(w_warehouse_sk#35)
 
-(54) Filter
-Input [4]: [inv_date_sk#34, inv_item_sk#35, inv_warehouse_sk#36, inv_quantity_on_hand#37]
-Condition : (((isnotnull(inv_quantity_on_hand#37) AND isnotnull(inv_item_sk#35)) AND isnotnull(inv_warehouse_sk#36)) AND isnotnull(inv_date_sk#34))
+(54) BroadcastExchange
+Input [2]: [w_warehouse_sk#35, w_warehouse_name#36]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#37]
 
 (55) BroadcastHashJoin [codegen id : 13]
-Left keys [1]: [w_warehouse_sk#31]
-Right keys [1]: [inv_warehouse_sk#36]
+Left keys [1]: [inv_warehouse_sk#33]
+Right keys [1]: [w_warehouse_sk#35]
 Join condition: None
 
 (56) Project [codegen id : 13]
-Output [4]: [w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_quantity_on_hand#37]
-Input [6]: [w_warehouse_sk#31, w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_warehouse_sk#36, inv_quantity_on_hand#37]
+Output [4]: [inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36]
+Input [6]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34, w_warehouse_sk#35, w_warehouse_name#36]
 
 (57) Exchange
-Input [4]: [w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_quantity_on_hand#37]
-Arguments: hashpartitioning(inv_item_sk#35, inv_date_sk#34, 5), true, [id=#38]
+Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36]
+Arguments: hashpartitioning(inv_item_sk#32, inv_date_sk#31, 5), true, [id=#38]
 
 (58) Sort [codegen id : 14]
-Input [4]: [w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_quantity_on_hand#37]
-Arguments: [inv_item_sk#35 ASC NULLS FIRST, inv_date_sk#34 ASC NULLS FIRST], false, 0
+Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36]
+Arguments: [inv_item_sk#32 ASC NULLS FIRST, inv_date_sk#31 ASC NULLS FIRST], false, 0
 
 (59) SortMergeJoin [codegen id : 16]
-Left keys [2]: [cs_item_sk#8, d_date_sk#22]
-Right keys [2]: [inv_item_sk#35, inv_date_sk#34]
-Join condition: (inv_quantity_on_hand#37 < cs_quantity#11)
+Left keys [2]: [cs_item_sk#5, d_date_sk#27]
+Right keys [2]: [inv_item_sk#32, inv_date_sk#31]
+Join condition: (inv_quantity_on_hand#34 < cs_quantity#8)
 
 (60) Project [codegen id : 16]
-Output [6]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26]
-Input [11]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, i_item_desc#20, d_date_sk#22, d_week_seq#26, w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_quantity_on_hand#37]
+Output [6]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24]
+Input [11]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27, inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36]
 
 (61) Scan parquet default.promotion
 Output [1]: [p_promo_sk#39]
@@ -365,21 +365,21 @@ Input [1]: [p_promo_sk#39]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#40]
 
 (65) BroadcastHashJoin [codegen id : 16]
-Left keys [1]: [cs_promo_sk#9]
+Left keys [1]: [cs_promo_sk#6]
 Right keys [1]: [p_promo_sk#39]
 Join condition: None
 
 (66) Project [codegen id : 16]
-Output [5]: [cs_item_sk#8, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26]
-Input [7]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26, p_promo_sk#39]
+Output [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24]
+Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24, p_promo_sk#39]
 
 (67) Exchange
-Input [5]: [cs_item_sk#8, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26]
-Arguments: hashpartitioning(cs_item_sk#8, cs_order_number#10, 5), true, [id=#41]
+Input [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24]
+Arguments: hashpartitioning(cs_item_sk#5, cs_order_number#7, 5), true, [id=#41]
 
 (68) Sort [codegen id : 17]
-Input [5]: [cs_item_sk#8, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26]
-Arguments: [cs_item_sk#8 ASC NULLS FIRST, cs_order_number#10 ASC NULLS FIRST], false, 0
+Input [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24]
+Arguments: [cs_item_sk#5 ASC NULLS FIRST, cs_order_number#7 ASC NULLS FIRST], false, 0
 
 (69) Scan parquet default.catalog_returns
 Output [2]: [cr_item_sk#42, cr_order_number#43]
@@ -404,33 +404,33 @@ Input [2]: [cr_item_sk#42, cr_order_number#43]
 Arguments: [cr_item_sk#42 ASC NULLS FIRST, cr_order_number#43 ASC NULLS FIRST], false, 0
 
 (74) SortMergeJoin
-Left keys [2]: [cs_item_sk#8, cs_order_number#10]
+Left keys [2]: [cs_item_sk#5, cs_order_number#7]
 Right keys [2]: [cr_item_sk#42, cr_order_number#43]
 Join condition: None
 
 (75) Project [codegen id : 20]
-Output [3]: [w_warehouse_name#32, i_item_desc#20, d_week_seq#26]
-Input [7]: [cs_item_sk#8, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26, cr_item_sk#42, cr_order_number#43]
+Output [3]: [w_warehouse_name#36, i_item_desc#20, d_week_seq#24]
+Input [7]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24, cr_item_sk#42, cr_order_number#43]
 
 (76) HashAggregate [codegen id : 20]
-Input [3]: [w_warehouse_name#32, i_item_desc#20, d_week_seq#26]
-Keys [3]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26]
+Input [3]: [w_warehouse_name#36, i_item_desc#20, d_week_seq#24]
+Keys [3]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24]
 Functions [1]: [partial_count(1)]
 Aggregate Attributes [1]: [count#45]
-Results [4]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, count#46]
+Results [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count#46]
 
 (77) Exchange
-Input [4]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, count#46]
-Arguments: hashpartitioning(i_item_desc#20, w_warehouse_name#32, d_week_seq#26, 5), true, [id=#47]
+Input [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count#46]
+Arguments: hashpartitioning(i_item_desc#20, w_warehouse_name#36, d_week_seq#24, 5), true, [id=#47]
 
 (78) HashAggregate [codegen id : 21]
-Input [4]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, count#46]
-Keys [3]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26]
+Input [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count#46]
+Keys [3]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24]
 Functions [1]: [count(1)]
 Aggregate Attributes [1]: [count(1)#48]
-Results [6]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, count(1)#48 AS no_promo#49, count(1)#48 AS promo#50, count(1)#48 AS total_cnt#51]
+Results [6]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count(1)#48 AS no_promo#49, count(1)#48 AS promo#50, count(1)#48 AS total_cnt#51]
 
 (79) TakeOrderedAndProject
-Input [6]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, no_promo#49, promo#50, total_cnt#51]
-Arguments: 100, [total_cnt#51 DESC NULLS LAST, i_item_desc#20 ASC NULLS FIRST, w_warehouse_name#32 ASC NULLS FIRST, d_week_seq#26 ASC NULLS FIRST], [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, no_promo#49, promo#50, total_cnt#51]
+Input [6]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, no_promo#49, promo#50, total_cnt#51]
+Arguments: 100, [total_cnt#51 DESC NULLS LAST, i_item_desc#20 ASC NULLS FIRST, w_warehouse_name#36 ASC NULLS FIRST, d_week_seq#24 ASC NULLS FIRST], [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, no_promo#49, promo#50, total_cnt#51]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/simplified.txt
index 39dba3af02359..918508787c4b0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/simplified.txt
@@ -23,7 +23,7 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                             InputAdapter
                                               Exchange [cs_item_sk,d_date_sk] #3
                                                 WholeStageCodegen (10)
-                                                  Project [cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,i_item_desc,d_date_sk,d_week_seq]
+                                                  Project [cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,i_item_desc,d_week_seq,d_date_sk]
                                                     BroadcastHashJoin [cs_sold_date_sk,d_date_sk,d_date,d_date]
                                                       Project [cs_sold_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,d_date,i_item_desc]
                                                         SortMergeJoin [cs_item_sk,i_item_sk]
@@ -38,7 +38,11 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                                           Project [cs_sold_date_sk,cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity]
                                                                             BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk]
                                                                               Project [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity]
-                                                                                BroadcastHashJoin [hd_demo_sk,cs_bill_hdemo_sk]
+                                                                                BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk]
+                                                                                  Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_sold_date_sk,cs_ship_date_sk]
+                                                                                    ColumnarToRow
+                                                                                      InputAdapter
+                                                                                        Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity]
                                                                                   InputAdapter
                                                                                     BroadcastExchange #5
                                                                                       WholeStageCodegen (1)
@@ -47,10 +51,6 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                                                             ColumnarToRow
                                                                                               InputAdapter
                                                                                                 Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential]
-                                                                                  Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_sold_date_sk,cs_ship_date_sk]
-                                                                                    ColumnarToRow
-                                                                                      InputAdapter
-                                                                                        Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity]
                                                                               InputAdapter
                                                                                 BroadcastExchange #6
                                                                                   WholeStageCodegen (2)
@@ -79,12 +79,8 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                       InputAdapter
                                                         BroadcastExchange #9
                                                           WholeStageCodegen (9)
-                                                            Project [d_date_sk,d_date_sk,d_date,d_week_seq]
+                                                            Project [d_date_sk,d_date,d_week_seq,d_date_sk]
                                                               BroadcastHashJoin [d_week_seq,d_week_seq]
-                                                                Filter [d_week_seq,d_date_sk]
-                                                                  ColumnarToRow
-                                                                    InputAdapter
-                                                                      Scan parquet default.date_dim [d_date_sk,d_week_seq]
                                                                 InputAdapter
                                                                   BroadcastExchange #10
                                                                     WholeStageCodegen (8)
@@ -93,14 +89,22 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                                           ColumnarToRow
                                                                             InputAdapter
                                                                               Scan parquet default.date_dim [d_date_sk,d_date,d_week_seq,d_year]
+                                                                Filter [d_week_seq,d_date_sk]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet default.date_dim [d_date_sk,d_week_seq]
                                       InputAdapter
                                         WholeStageCodegen (14)
                                           Sort [inv_item_sk,inv_date_sk]
                                             InputAdapter
                                               Exchange [inv_item_sk,inv_date_sk] #11
                                                 WholeStageCodegen (13)
-                                                  Project [w_warehouse_name,inv_date_sk,inv_item_sk,inv_quantity_on_hand]
-                                                    BroadcastHashJoin [w_warehouse_sk,inv_warehouse_sk]
+                                                  Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,w_warehouse_name]
+                                                    BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk]
+                                                      Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk,inv_date_sk]
+                                                        ColumnarToRow
+                                                          InputAdapter
+                                                            Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand]
                                                       InputAdapter
                                                         BroadcastExchange #12
                                                           WholeStageCodegen (12)
@@ -108,10 +112,6 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                               ColumnarToRow
                                                                 InputAdapter
                                                                   Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name]
-                                                      Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk,inv_date_sk]
-                                                        ColumnarToRow
-                                                          InputAdapter
-                                                            Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand]
                                   InputAdapter
                                     BroadcastExchange #13
                                       WholeStageCodegen (15)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/explain.txt
index 057d786afbcdd..9ac081b356c94 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/explain.txt
@@ -37,12 +37,12 @@ TakeOrderedAndProject (108)
                :              :     :        +- * Project (23)
                :              :     :           +- * Filter (22)
                :              :     :              +- * ColumnarToRow (21)
-               :              :     :                 +- Scan parquet default.date_dim (20)
+               :              :     :                 +- Scan parquet default.promotion (20)
                :              :     +- BroadcastExchange (31)
                :              :        +- * Project (30)
                :              :           +- * Filter (29)
                :              :              +- * ColumnarToRow (28)
-               :              :                 +- Scan parquet default.promotion (27)
+               :              :                 +- Scan parquet default.date_dim (27)
                :              +- BroadcastExchange (37)
                :                 +- * Filter (36)
                :                    +- * ColumnarToRow (35)
@@ -193,67 +193,67 @@ Join condition: None
 Output [7]: [ss_sold_date_sk#1, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12]
 Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, i_item_sk#14]
 
-(20) Scan parquet default.date_dim
-Output [2]: [d_date_sk#17, d_date#18]
+(20) Scan parquet default.promotion
+Output [2]: [p_promo_sk#17, p_channel_tv#18]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-22), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_date:date>
+Location [not included in comparison]/{warehouse_dir}/promotion]
+PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)]
+ReadSchema: struct<p_promo_sk:int,p_channel_tv:string>
 
 (21) ColumnarToRow [codegen id : 6]
-Input [2]: [d_date_sk#17, d_date#18]
+Input [2]: [p_promo_sk#17, p_channel_tv#18]
 
 (22) Filter [codegen id : 6]
-Input [2]: [d_date_sk#17, d_date#18]
-Condition : (((isnotnull(d_date#18) AND (d_date#18 >= 11192)) AND (d_date#18 <= 11222)) AND isnotnull(d_date_sk#17))
+Input [2]: [p_promo_sk#17, p_channel_tv#18]
+Condition : ((isnotnull(p_channel_tv#18) AND (p_channel_tv#18 = N)) AND isnotnull(p_promo_sk#17))
 
 (23) Project [codegen id : 6]
-Output [1]: [d_date_sk#17]
-Input [2]: [d_date_sk#17, d_date#18]
+Output [1]: [p_promo_sk#17]
+Input [2]: [p_promo_sk#17, p_channel_tv#18]
 
 (24) BroadcastExchange
-Input [1]: [d_date_sk#17]
+Input [1]: [p_promo_sk#17]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19]
 
 (25) BroadcastHashJoin [codegen id : 9]
-Left keys [1]: [ss_sold_date_sk#1]
-Right keys [1]: [d_date_sk#17]
+Left keys [1]: [ss_promo_sk#4]
+Right keys [1]: [p_promo_sk#17]
 Join condition: None
 
 (26) Project [codegen id : 9]
-Output [6]: [ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12]
-Input [8]: [ss_sold_date_sk#1, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, d_date_sk#17]
+Output [6]: [ss_sold_date_sk#1, ss_store_sk#3, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12]
+Input [8]: [ss_sold_date_sk#1, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, p_promo_sk#17]
 
-(27) Scan parquet default.promotion
-Output [2]: [p_promo_sk#20, p_channel_tv#21]
+(27) Scan parquet default.date_dim
+Output [2]: [d_date_sk#20, d_date#21]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/promotion]
-PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)]
-ReadSchema: struct<p_promo_sk:int,p_channel_tv:string>
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-22), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_date:date>
 
 (28) ColumnarToRow [codegen id : 7]
-Input [2]: [p_promo_sk#20, p_channel_tv#21]
+Input [2]: [d_date_sk#20, d_date#21]
 
 (29) Filter [codegen id : 7]
-Input [2]: [p_promo_sk#20, p_channel_tv#21]
-Condition : ((isnotnull(p_channel_tv#21) AND (p_channel_tv#21 = N)) AND isnotnull(p_promo_sk#20))
+Input [2]: [d_date_sk#20, d_date#21]
+Condition : (((isnotnull(d_date#21) AND (d_date#21 >= 11192)) AND (d_date#21 <= 11222)) AND isnotnull(d_date_sk#20))
 
 (30) Project [codegen id : 7]
-Output [1]: [p_promo_sk#20]
-Input [2]: [p_promo_sk#20, p_channel_tv#21]
+Output [1]: [d_date_sk#20]
+Input [2]: [d_date_sk#20, d_date#21]
 
 (31) BroadcastExchange
-Input [1]: [p_promo_sk#20]
+Input [1]: [d_date_sk#20]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22]
 
 (32) BroadcastHashJoin [codegen id : 9]
-Left keys [1]: [ss_promo_sk#4]
-Right keys [1]: [p_promo_sk#20]
+Left keys [1]: [ss_sold_date_sk#1]
+Right keys [1]: [d_date_sk#20]
 Join condition: None
 
 (33) Project [codegen id : 9]
 Output [5]: [ss_store_sk#3, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12]
-Input [7]: [ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, p_promo_sk#20]
+Input [7]: [ss_sold_date_sk#1, ss_store_sk#3, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, d_date_sk#20]
 
 (34) Scan parquet default.store
 Output [2]: [s_store_sk#23, s_store_id#24]
@@ -366,28 +366,28 @@ Output [7]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_s
 Input [9]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_item_sk#47, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, i_item_sk#14]
 
 (58) ReusedExchange [Reuses operator id: 24]
-Output [1]: [d_date_sk#17]
+Output [1]: [p_promo_sk#17]
 
 (59) BroadcastHashJoin [codegen id : 19]
-Left keys [1]: [cs_sold_date_sk#45]
-Right keys [1]: [d_date_sk#17]
+Left keys [1]: [cs_promo_sk#48]
+Right keys [1]: [p_promo_sk#17]
 Join condition: None
 
 (60) Project [codegen id : 19]
-Output [6]: [cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56]
-Input [8]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, d_date_sk#17]
+Output [6]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56]
+Input [8]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, p_promo_sk#17]
 
 (61) ReusedExchange [Reuses operator id: 31]
-Output [1]: [p_promo_sk#20]
+Output [1]: [d_date_sk#20]
 
 (62) BroadcastHashJoin [codegen id : 19]
-Left keys [1]: [cs_promo_sk#48]
-Right keys [1]: [p_promo_sk#20]
+Left keys [1]: [cs_sold_date_sk#45]
+Right keys [1]: [d_date_sk#20]
 Join condition: None
 
 (63) Project [codegen id : 19]
 Output [5]: [cs_catalog_page_sk#46, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56]
-Input [7]: [cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, p_promo_sk#20]
+Input [7]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, d_date_sk#20]
 
 (64) Scan parquet default.catalog_page
 Output [2]: [cp_catalog_page_sk#58, cp_catalog_page_id#59]
@@ -500,28 +500,28 @@ Output [7]: [ws_sold_date_sk#80, ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales
 Input [9]: [ws_sold_date_sk#80, ws_item_sk#81, ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, i_item_sk#14]
 
 (88) ReusedExchange [Reuses operator id: 24]
-Output [1]: [d_date_sk#17]
+Output [1]: [p_promo_sk#17]
 
 (89) BroadcastHashJoin [codegen id : 29]
-Left keys [1]: [ws_sold_date_sk#80]
-Right keys [1]: [d_date_sk#17]
+Left keys [1]: [ws_promo_sk#83]
+Right keys [1]: [p_promo_sk#17]
 Join condition: None
 
 (90) Project [codegen id : 29]
-Output [6]: [ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91]
-Input [8]: [ws_sold_date_sk#80, ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, d_date_sk#17]
+Output [6]: [ws_sold_date_sk#80, ws_web_site_sk#82, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91]
+Input [8]: [ws_sold_date_sk#80, ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, p_promo_sk#17]
 
 (91) ReusedExchange [Reuses operator id: 31]
-Output [1]: [p_promo_sk#20]
+Output [1]: [d_date_sk#20]
 
 (92) BroadcastHashJoin [codegen id : 29]
-Left keys [1]: [ws_promo_sk#83]
-Right keys [1]: [p_promo_sk#20]
+Left keys [1]: [ws_sold_date_sk#80]
+Right keys [1]: [d_date_sk#20]
 Join condition: None
 
 (93) Project [codegen id : 29]
 Output [5]: [ws_web_site_sk#82, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91]
-Input [7]: [ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, p_promo_sk#20]
+Input [7]: [ws_sold_date_sk#80, ws_web_site_sk#82, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, d_date_sk#20]
 
 (94) Scan parquet default.web_site
 Output [2]: [web_site_sk#93, web_site_id#94]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/simplified.txt
index 7b73e4307dcf0..ec00b49e71989 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/simplified.txt
@@ -17,9 +17,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                 Project [ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id]
                                   BroadcastHashJoin [ss_store_sk,s_store_sk]
                                     Project [ss_store_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss]
-                                      BroadcastHashJoin [ss_promo_sk,p_promo_sk]
-                                        Project [ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss]
-                                          BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                      BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                        Project [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss]
+                                          BroadcastHashJoin [ss_promo_sk,p_promo_sk]
                                             Project [ss_sold_date_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss]
                                               BroadcastHashJoin [ss_item_sk,i_item_sk]
                                                 Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss]
@@ -54,19 +54,19 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                             InputAdapter
                                               BroadcastExchange #6
                                                 WholeStageCodegen (6)
-                                                  Project [d_date_sk]
-                                                    Filter [d_date,d_date_sk]
+                                                  Project [p_promo_sk]
+                                                    Filter [p_channel_tv,p_promo_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.date_dim [d_date_sk,d_date]
+                                                          Scan parquet default.promotion [p_promo_sk,p_channel_tv]
                                         InputAdapter
                                           BroadcastExchange #7
                                             WholeStageCodegen (7)
-                                              Project [p_promo_sk]
-                                                Filter [p_channel_tv,p_promo_sk]
+                                              Project [d_date_sk]
+                                                Filter [d_date,d_date_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.promotion [p_promo_sk,p_channel_tv]
+                                                      Scan parquet default.date_dim [d_date_sk,d_date]
                                     InputAdapter
                                       BroadcastExchange #8
                                         WholeStageCodegen (8)
@@ -83,9 +83,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                 Project [cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id]
                                   BroadcastHashJoin [cs_catalog_page_sk,cp_catalog_page_sk]
                                     Project [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss]
-                                      BroadcastHashJoin [cs_promo_sk,p_promo_sk]
-                                        Project [cs_catalog_page_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss]
-                                          BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                      BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                        Project [cs_sold_date_sk,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss]
+                                          BroadcastHashJoin [cs_promo_sk,p_promo_sk]
                                             Project [cs_sold_date_sk,cs_catalog_page_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss]
                                               BroadcastHashJoin [cs_item_sk,i_item_sk]
                                                 Project [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss]
@@ -112,9 +112,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                 InputAdapter
                                                   ReusedExchange [i_item_sk] #5
                                             InputAdapter
-                                              ReusedExchange [d_date_sk] #6
+                                              ReusedExchange [p_promo_sk] #6
                                         InputAdapter
-                                          ReusedExchange [p_promo_sk] #7
+                                          ReusedExchange [d_date_sk] #7
                                     InputAdapter
                                       BroadcastExchange #12
                                         WholeStageCodegen (18)
@@ -131,9 +131,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                 Project [ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id]
                                   BroadcastHashJoin [ws_web_site_sk,web_site_sk]
                                     Project [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss]
-                                      BroadcastHashJoin [ws_promo_sk,p_promo_sk]
-                                        Project [ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss]
-                                          BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                      BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                        Project [ws_sold_date_sk,ws_web_site_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss]
+                                          BroadcastHashJoin [ws_promo_sk,p_promo_sk]
                                             Project [ws_sold_date_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss]
                                               BroadcastHashJoin [ws_item_sk,i_item_sk]
                                                 Project [ws_sold_date_sk,ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss]
@@ -160,9 +160,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                 InputAdapter
                                                   ReusedExchange [i_item_sk] #5
                                             InputAdapter
-                                              ReusedExchange [d_date_sk] #6
+                                              ReusedExchange [p_promo_sk] #6
                                         InputAdapter
-                                          ReusedExchange [p_promo_sk] #7
+                                          ReusedExchange [d_date_sk] #7
                                     InputAdapter
                                       BroadcastExchange #16
                                         WholeStageCodegen (28)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/explain.txt
index ae0b996ec28be..83ec6391d7736 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/explain.txt
@@ -20,15 +20,15 @@ TakeOrderedAndProject (36)
       :        :        :                 +- Scan parquet default.customer_address (4)
       :        :        +- BroadcastExchange (21)
       :        :           +- * Project (20)
-      :        :              +- * BroadcastHashJoin Inner BuildLeft (19)
-      :        :                 :- BroadcastExchange (15)
-      :        :                 :  +- * Project (14)
-      :        :                 :     +- * Filter (13)
-      :        :                 :        +- * ColumnarToRow (12)
-      :        :                 :           +- Scan parquet default.income_band (11)
-      :        :                 +- * Filter (18)
-      :        :                    +- * ColumnarToRow (17)
-      :        :                       +- Scan parquet default.household_demographics (16)
+      :        :              +- * BroadcastHashJoin Inner BuildRight (19)
+      :        :                 :- * Filter (13)
+      :        :                 :  +- * ColumnarToRow (12)
+      :        :                 :     +- Scan parquet default.household_demographics (11)
+      :        :                 +- BroadcastExchange (18)
+      :        :                    +- * Project (17)
+      :        :                       +- * Filter (16)
+      :        :                          +- * ColumnarToRow (15)
+      :        :                             +- Scan parquet default.income_band (14)
       :        +- * Filter (27)
       :           +- * ColumnarToRow (26)
       :              +- Scan parquet default.customer_demographics (25)
@@ -82,63 +82,63 @@ Join condition: None
 Output [5]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6]
 Input [7]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6, ca_address_sk#7]
 
-(11) Scan parquet default.income_band
-Output [3]: [ib_income_band_sk#10, ib_lower_bound#11, ib_upper_bound#12]
+(11) Scan parquet default.household_demographics
+Output [2]: [hd_demo_sk#10, hd_income_band_sk#11]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/income_band]
-PushedFilters: [IsNotNull(ib_lower_bound), IsNotNull(ib_upper_bound), GreaterThanOrEqual(ib_lower_bound,38128), LessThanOrEqual(ib_upper_bound,88128), IsNotNull(ib_income_band_sk)]
-ReadSchema: struct<ib_income_band_sk:int,ib_lower_bound:int,ib_upper_bound:int>
+Location [not included in comparison]/{warehouse_dir}/household_demographics]
+PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)]
+ReadSchema: struct<hd_demo_sk:int,hd_income_band_sk:int>
 
-(12) ColumnarToRow [codegen id : 2]
-Input [3]: [ib_income_band_sk#10, ib_lower_bound#11, ib_upper_bound#12]
+(12) ColumnarToRow [codegen id : 3]
+Input [2]: [hd_demo_sk#10, hd_income_band_sk#11]
 
-(13) Filter [codegen id : 2]
-Input [3]: [ib_income_band_sk#10, ib_lower_bound#11, ib_upper_bound#12]
-Condition : ((((isnotnull(ib_lower_bound#11) AND isnotnull(ib_upper_bound#12)) AND (ib_lower_bound#11 >= 38128)) AND (ib_upper_bound#12 <= 88128)) AND isnotnull(ib_income_band_sk#10))
+(13) Filter [codegen id : 3]
+Input [2]: [hd_demo_sk#10, hd_income_band_sk#11]
+Condition : (isnotnull(hd_demo_sk#10) AND isnotnull(hd_income_band_sk#11))
 
-(14) Project [codegen id : 2]
-Output [1]: [ib_income_band_sk#10]
-Input [3]: [ib_income_band_sk#10, ib_lower_bound#11, ib_upper_bound#12]
+(14) Scan parquet default.income_band
+Output [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/income_band]
+PushedFilters: [IsNotNull(ib_lower_bound), IsNotNull(ib_upper_bound), GreaterThanOrEqual(ib_lower_bound,38128), LessThanOrEqual(ib_upper_bound,88128), IsNotNull(ib_income_band_sk)]
+ReadSchema: struct<ib_income_band_sk:int,ib_lower_bound:int,ib_upper_bound:int>
 
-(15) BroadcastExchange
-Input [1]: [ib_income_band_sk#10]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13]
+(15) ColumnarToRow [codegen id : 2]
+Input [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14]
 
-(16) Scan parquet default.household_demographics
-Output [2]: [hd_demo_sk#14, hd_income_band_sk#15]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/household_demographics]
-PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)]
-ReadSchema: struct<hd_demo_sk:int,hd_income_band_sk:int>
+(16) Filter [codegen id : 2]
+Input [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14]
+Condition : ((((isnotnull(ib_lower_bound#13) AND isnotnull(ib_upper_bound#14)) AND (ib_lower_bound#13 >= 38128)) AND (ib_upper_bound#14 <= 88128)) AND isnotnull(ib_income_band_sk#12))
 
-(17) ColumnarToRow
-Input [2]: [hd_demo_sk#14, hd_income_band_sk#15]
+(17) Project [codegen id : 2]
+Output [1]: [ib_income_band_sk#12]
+Input [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14]
 
-(18) Filter
-Input [2]: [hd_demo_sk#14, hd_income_band_sk#15]
-Condition : (isnotnull(hd_demo_sk#14) AND isnotnull(hd_income_band_sk#15))
+(18) BroadcastExchange
+Input [1]: [ib_income_band_sk#12]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15]
 
 (19) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [ib_income_band_sk#10]
-Right keys [1]: [hd_income_band_sk#15]
+Left keys [1]: [hd_income_band_sk#11]
+Right keys [1]: [ib_income_band_sk#12]
 Join condition: None
 
 (20) Project [codegen id : 3]
-Output [1]: [hd_demo_sk#14]
-Input [3]: [ib_income_band_sk#10, hd_demo_sk#14, hd_income_band_sk#15]
+Output [1]: [hd_demo_sk#10]
+Input [3]: [hd_demo_sk#10, hd_income_band_sk#11, ib_income_band_sk#12]
 
 (21) BroadcastExchange
-Input [1]: [hd_demo_sk#14]
+Input [1]: [hd_demo_sk#10]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16]
 
 (22) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [c_current_hdemo_sk#3]
-Right keys [1]: [hd_demo_sk#14]
+Right keys [1]: [hd_demo_sk#10]
 Join condition: None
 
 (23) Project [codegen id : 4]
 Output [4]: [c_customer_id#1, c_current_cdemo_sk#2, c_first_name#5, c_last_name#6]
-Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, hd_demo_sk#14]
+Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, hd_demo_sk#10]
 
 (24) BroadcastExchange
 Input [4]: [c_customer_id#1, c_current_cdemo_sk#2, c_first_name#5, c_last_name#6]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/simplified.txt
index 1fbc57ee7e47a..16087526bc130 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/simplified.txt
@@ -30,7 +30,11 @@ TakeOrderedAndProject [c_customer_id,customer_id,customername]
                               BroadcastExchange #4
                                 WholeStageCodegen (3)
                                   Project [hd_demo_sk]
-                                    BroadcastHashJoin [ib_income_band_sk,hd_income_band_sk]
+                                    BroadcastHashJoin [hd_income_band_sk,ib_income_band_sk]
+                                      Filter [hd_demo_sk,hd_income_band_sk]
+                                        ColumnarToRow
+                                          InputAdapter
+                                            Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk]
                                       InputAdapter
                                         BroadcastExchange #5
                                           WholeStageCodegen (2)
@@ -39,10 +43,6 @@ TakeOrderedAndProject [c_customer_id,customer_id,customername]
                                                 ColumnarToRow
                                                   InputAdapter
                                                     Scan parquet default.income_band [ib_income_band_sk,ib_lower_bound,ib_upper_bound]
-                                      Filter [hd_demo_sk,hd_income_band_sk]
-                                        ColumnarToRow
-                                          InputAdapter
-                                            Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk]
                   Filter [cd_demo_sk]
                     ColumnarToRow
                       InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/explain.txt
index ee550f1af4947..7c3f00d33f24e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/explain.txt
@@ -12,30 +12,30 @@ TakeOrderedAndProject (57)
                :           :- * Project (31)
                :           :  +- * BroadcastHashJoin Inner BuildRight (30)
                :           :     :- * Project (25)
-               :           :     :  +- * BroadcastHashJoin Inner BuildLeft (24)
-               :           :     :     :- BroadcastExchange (5)
-               :           :     :     :  +- * Project (4)
-               :           :     :     :     +- * Filter (3)
-               :           :     :     :        +- * ColumnarToRow (2)
-               :           :     :     :           +- Scan parquet default.date_dim (1)
-               :           :     :     +- * Project (23)
-               :           :     :        +- * SortMergeJoin Inner (22)
-               :           :     :           :- * Sort (16)
-               :           :     :           :  +- Exchange (15)
-               :           :     :           :     +- * Project (14)
-               :           :     :           :        +- * BroadcastHashJoin Inner BuildRight (13)
-               :           :     :           :           :- * Filter (8)
-               :           :     :           :           :  +- * ColumnarToRow (7)
-               :           :     :           :           :     +- Scan parquet default.web_sales (6)
-               :           :     :           :           +- BroadcastExchange (12)
-               :           :     :           :              +- * Filter (11)
-               :           :     :           :                 +- * ColumnarToRow (10)
-               :           :     :           :                    +- Scan parquet default.web_page (9)
-               :           :     :           +- * Sort (21)
-               :           :     :              +- Exchange (20)
-               :           :     :                 +- * Filter (19)
-               :           :     :                    +- * ColumnarToRow (18)
-               :           :     :                       +- Scan parquet default.web_returns (17)
+               :           :     :  +- * BroadcastHashJoin Inner BuildRight (24)
+               :           :     :     :- * Project (18)
+               :           :     :     :  +- * SortMergeJoin Inner (17)
+               :           :     :     :     :- * Sort (11)
+               :           :     :     :     :  +- Exchange (10)
+               :           :     :     :     :     +- * Project (9)
+               :           :     :     :     :        +- * BroadcastHashJoin Inner BuildRight (8)
+               :           :     :     :     :           :- * Filter (3)
+               :           :     :     :     :           :  +- * ColumnarToRow (2)
+               :           :     :     :     :           :     +- Scan parquet default.web_sales (1)
+               :           :     :     :     :           +- BroadcastExchange (7)
+               :           :     :     :     :              +- * Filter (6)
+               :           :     :     :     :                 +- * ColumnarToRow (5)
+               :           :     :     :     :                    +- Scan parquet default.web_page (4)
+               :           :     :     :     +- * Sort (16)
+               :           :     :     :        +- Exchange (15)
+               :           :     :     :           +- * Filter (14)
+               :           :     :     :              +- * ColumnarToRow (13)
+               :           :     :     :                 +- Scan parquet default.web_returns (12)
+               :           :     :     +- BroadcastExchange (23)
+               :           :     :        +- * Project (22)
+               :           :     :           +- * Filter (21)
+               :           :     :              +- * ColumnarToRow (20)
+               :           :     :                 +- Scan parquet default.date_dim (19)
                :           :     +- BroadcastExchange (29)
                :           :        +- * Filter (28)
                :           :           +- * ColumnarToRow (27)
@@ -48,126 +48,126 @@ TakeOrderedAndProject (57)
                +- * Sort (51)
                   +- Exchange (50)
                      +- * Project (49)
-                        +- * BroadcastHashJoin Inner BuildRight (48)
-                           :- * Filter (43)
-                           :  +- * ColumnarToRow (42)
-                           :     +- Scan parquet default.customer_demographics (41)
-                           +- BroadcastExchange (47)
-                              +- * Filter (46)
-                                 +- * ColumnarToRow (45)
-                                    +- Scan parquet default.customer_demographics (44)
-
-
-(1) Scan parquet default.date_dim
-Output [2]: [d_date_sk#1, d_year#2]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int>
-
-(2) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#1, d_year#2]
-
-(3) Filter [codegen id : 1]
-Input [2]: [d_date_sk#1, d_year#2]
-Condition : ((isnotnull(d_year#2) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1))
-
-(4) Project [codegen id : 1]
-Output [1]: [d_date_sk#1]
-Input [2]: [d_date_sk#1, d_year#2]
-
-(5) BroadcastExchange
-Input [1]: [d_date_sk#1]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3]
-
-(6) Scan parquet default.web_sales
-Output [7]: [ws_sold_date_sk#4, ws_item_sk#5, ws_web_page_sk#6, ws_order_number#7, ws_quantity#8, ws_sales_price#9, ws_net_profit#10]
+                        +- * BroadcastHashJoin Inner BuildLeft (48)
+                           :- BroadcastExchange (44)
+                           :  +- * Filter (43)
+                           :     +- * ColumnarToRow (42)
+                           :        +- Scan parquet default.customer_demographics (41)
+                           +- * Filter (47)
+                              +- * ColumnarToRow (46)
+                                 +- Scan parquet default.customer_demographics (45)
+
+
+(1) Scan parquet default.web_sales
+Output [7]: [ws_sold_date_sk#1, ws_item_sk#2, ws_web_page_sk#3, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
 PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_page_sk), IsNotNull(ws_sold_date_sk), Or(Or(And(GreaterThanOrEqual(ws_sales_price,100.00),LessThanOrEqual(ws_sales_price,150.00)),And(GreaterThanOrEqual(ws_sales_price,50.00),LessThanOrEqual(ws_sales_price,100.00))),And(GreaterThanOrEqual(ws_sales_price,150.00),LessThanOrEqual(ws_sales_price,200.00))), Or(Or(And(GreaterThanOrEqual(ws_net_profit,100.00),LessThanOrEqual(ws_net_profit,200.00)),And(GreaterThanOrEqual(ws_net_profit,150.00),LessThanOrEqual(ws_net_profit,300.00))),And(GreaterThanOrEqual(ws_net_profit,50.00),LessThanOrEqual(ws_net_profit,250.00)))]
 ReadSchema: struct<ws_sold_date_sk:int,ws_item_sk:int,ws_web_page_sk:int,ws_order_number:int,ws_quantity:int,ws_sales_price:decimal(7,2),ws_net_profit:decimal(7,2)>
 
-(7) ColumnarToRow [codegen id : 3]
-Input [7]: [ws_sold_date_sk#4, ws_item_sk#5, ws_web_page_sk#6, ws_order_number#7, ws_quantity#8, ws_sales_price#9, ws_net_profit#10]
+(2) ColumnarToRow [codegen id : 2]
+Input [7]: [ws_sold_date_sk#1, ws_item_sk#2, ws_web_page_sk#3, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7]
 
-(8) Filter [codegen id : 3]
-Input [7]: [ws_sold_date_sk#4, ws_item_sk#5, ws_web_page_sk#6, ws_order_number#7, ws_quantity#8, ws_sales_price#9, ws_net_profit#10]
-Condition : (((((isnotnull(ws_item_sk#5) AND isnotnull(ws_order_number#7)) AND isnotnull(ws_web_page_sk#6)) AND isnotnull(ws_sold_date_sk#4)) AND ((((ws_sales_price#9 >= 100.00) AND (ws_sales_price#9 <= 150.00)) OR ((ws_sales_price#9 >= 50.00) AND (ws_sales_price#9 <= 100.00))) OR ((ws_sales_price#9 >= 150.00) AND (ws_sales_price#9 <= 200.00)))) AND ((((ws_net_profit#10 >= 100.00) AND (ws_net_profit#10 <= 200.00)) OR ((ws_net_profit#10 >= 150.00) AND (ws_net_profit#10 <= 300.00))) OR ((ws_net_profit#10 >= 50.00) AND (ws_net_profit#10 <= 250.00))))
+(3) Filter [codegen id : 2]
+Input [7]: [ws_sold_date_sk#1, ws_item_sk#2, ws_web_page_sk#3, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7]
+Condition : (((((isnotnull(ws_item_sk#2) AND isnotnull(ws_order_number#4)) AND isnotnull(ws_web_page_sk#3)) AND isnotnull(ws_sold_date_sk#1)) AND ((((ws_sales_price#6 >= 100.00) AND (ws_sales_price#6 <= 150.00)) OR ((ws_sales_price#6 >= 50.00) AND (ws_sales_price#6 <= 100.00))) OR ((ws_sales_price#6 >= 150.00) AND (ws_sales_price#6 <= 200.00)))) AND ((((ws_net_profit#7 >= 100.00) AND (ws_net_profit#7 <= 200.00)) OR ((ws_net_profit#7 >= 150.00) AND (ws_net_profit#7 <= 300.00))) OR ((ws_net_profit#7 >= 50.00) AND (ws_net_profit#7 <= 250.00))))
 
-(9) Scan parquet default.web_page
-Output [1]: [wp_web_page_sk#11]
+(4) Scan parquet default.web_page
+Output [1]: [wp_web_page_sk#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_page]
 PushedFilters: [IsNotNull(wp_web_page_sk)]
 ReadSchema: struct<wp_web_page_sk:int>
 
-(10) ColumnarToRow [codegen id : 2]
-Input [1]: [wp_web_page_sk#11]
+(5) ColumnarToRow [codegen id : 1]
+Input [1]: [wp_web_page_sk#8]
 
-(11) Filter [codegen id : 2]
-Input [1]: [wp_web_page_sk#11]
-Condition : isnotnull(wp_web_page_sk#11)
+(6) Filter [codegen id : 1]
+Input [1]: [wp_web_page_sk#8]
+Condition : isnotnull(wp_web_page_sk#8)
 
-(12) BroadcastExchange
-Input [1]: [wp_web_page_sk#11]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12]
+(7) BroadcastExchange
+Input [1]: [wp_web_page_sk#8]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9]
 
-(13) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [ws_web_page_sk#6]
-Right keys [1]: [wp_web_page_sk#11]
+(8) BroadcastHashJoin [codegen id : 2]
+Left keys [1]: [ws_web_page_sk#3]
+Right keys [1]: [wp_web_page_sk#8]
 Join condition: None
 
-(14) Project [codegen id : 3]
-Output [6]: [ws_sold_date_sk#4, ws_item_sk#5, ws_order_number#7, ws_quantity#8, ws_sales_price#9, ws_net_profit#10]
-Input [8]: [ws_sold_date_sk#4, ws_item_sk#5, ws_web_page_sk#6, ws_order_number#7, ws_quantity#8, ws_sales_price#9, ws_net_profit#10, wp_web_page_sk#11]
+(9) Project [codegen id : 2]
+Output [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7]
+Input [8]: [ws_sold_date_sk#1, ws_item_sk#2, ws_web_page_sk#3, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wp_web_page_sk#8]
 
-(15) Exchange
-Input [6]: [ws_sold_date_sk#4, ws_item_sk#5, ws_order_number#7, ws_quantity#8, ws_sales_price#9, ws_net_profit#10]
-Arguments: hashpartitioning(cast(ws_item_sk#5 as bigint), cast(ws_order_number#7 as bigint), 5), true, [id=#13]
+(10) Exchange
+Input [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7]
+Arguments: hashpartitioning(cast(ws_item_sk#2 as bigint), cast(ws_order_number#4 as bigint), 5), true, [id=#10]
 
-(16) Sort [codegen id : 4]
-Input [6]: [ws_sold_date_sk#4, ws_item_sk#5, ws_order_number#7, ws_quantity#8, ws_sales_price#9, ws_net_profit#10]
-Arguments: [cast(ws_item_sk#5 as bigint) ASC NULLS FIRST, cast(ws_order_number#7 as bigint) ASC NULLS FIRST], false, 0
+(11) Sort [codegen id : 3]
+Input [6]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7]
+Arguments: [cast(ws_item_sk#2 as bigint) ASC NULLS FIRST, cast(ws_order_number#4 as bigint) ASC NULLS FIRST], false, 0
 
-(17) Scan parquet default.web_returns
-Output [8]: [wr_item_sk#14, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_order_number#19, wr_fee#20, wr_refunded_cash#21]
+(12) Scan parquet default.web_returns
+Output [8]: [wr_item_sk#11, wr_refunded_cdemo_sk#12, wr_refunded_addr_sk#13, wr_returning_cdemo_sk#14, wr_reason_sk#15, wr_order_number#16, wr_fee#17, wr_refunded_cash#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
 PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number), IsNotNull(wr_refunded_cdemo_sk), IsNotNull(wr_returning_cdemo_sk), IsNotNull(wr_refunded_addr_sk), IsNotNull(wr_reason_sk)]
 ReadSchema: struct<wr_item_sk:bigint,wr_refunded_cdemo_sk:bigint,wr_refunded_addr_sk:bigint,wr_returning_cdemo_sk:bigint,wr_reason_sk:bigint,wr_order_number:bigint,wr_fee:decimal(7,2),wr_refunded_cash:decimal(7,2)>
 
-(18) ColumnarToRow [codegen id : 5]
-Input [8]: [wr_item_sk#14, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_order_number#19, wr_fee#20, wr_refunded_cash#21]
+(13) ColumnarToRow [codegen id : 4]
+Input [8]: [wr_item_sk#11, wr_refunded_cdemo_sk#12, wr_refunded_addr_sk#13, wr_returning_cdemo_sk#14, wr_reason_sk#15, wr_order_number#16, wr_fee#17, wr_refunded_cash#18]
 
-(19) Filter [codegen id : 5]
-Input [8]: [wr_item_sk#14, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_order_number#19, wr_fee#20, wr_refunded_cash#21]
-Condition : (((((isnotnull(wr_item_sk#14) AND isnotnull(wr_order_number#19)) AND isnotnull(wr_refunded_cdemo_sk#15)) AND isnotnull(wr_returning_cdemo_sk#17)) AND isnotnull(wr_refunded_addr_sk#16)) AND isnotnull(wr_reason_sk#18))
+(14) Filter [codegen id : 4]
+Input [8]: [wr_item_sk#11, wr_refunded_cdemo_sk#12, wr_refunded_addr_sk#13, wr_returning_cdemo_sk#14, wr_reason_sk#15, wr_order_number#16, wr_fee#17, wr_refunded_cash#18]
+Condition : (((((isnotnull(wr_item_sk#11) AND isnotnull(wr_order_number#16)) AND isnotnull(wr_refunded_cdemo_sk#12)) AND isnotnull(wr_returning_cdemo_sk#14)) AND isnotnull(wr_refunded_addr_sk#13)) AND isnotnull(wr_reason_sk#15))
 
-(20) Exchange
-Input [8]: [wr_item_sk#14, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_order_number#19, wr_fee#20, wr_refunded_cash#21]
-Arguments: hashpartitioning(wr_item_sk#14, wr_order_number#19, 5), true, [id=#22]
+(15) Exchange
+Input [8]: [wr_item_sk#11, wr_refunded_cdemo_sk#12, wr_refunded_addr_sk#13, wr_returning_cdemo_sk#14, wr_reason_sk#15, wr_order_number#16, wr_fee#17, wr_refunded_cash#18]
+Arguments: hashpartitioning(wr_item_sk#11, wr_order_number#16, 5), true, [id=#19]
 
-(21) Sort [codegen id : 6]
-Input [8]: [wr_item_sk#14, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_order_number#19, wr_fee#20, wr_refunded_cash#21]
-Arguments: [wr_item_sk#14 ASC NULLS FIRST, wr_order_number#19 ASC NULLS FIRST], false, 0
+(16) Sort [codegen id : 5]
+Input [8]: [wr_item_sk#11, wr_refunded_cdemo_sk#12, wr_refunded_addr_sk#13, wr_returning_cdemo_sk#14, wr_reason_sk#15, wr_order_number#16, wr_fee#17, wr_refunded_cash#18]
+Arguments: [wr_item_sk#11 ASC NULLS FIRST, wr_order_number#16 ASC NULLS FIRST], false, 0
 
-(22) SortMergeJoin
-Left keys [2]: [cast(ws_item_sk#5 as bigint), cast(ws_order_number#7 as bigint)]
-Right keys [2]: [wr_item_sk#14, wr_order_number#19]
+(17) SortMergeJoin [codegen id : 9]
+Left keys [2]: [cast(ws_item_sk#2 as bigint), cast(ws_order_number#4 as bigint)]
+Right keys [2]: [wr_item_sk#11, wr_order_number#16]
 Join condition: None
 
-(23) Project
-Output [10]: [ws_sold_date_sk#4, ws_quantity#8, ws_sales_price#9, ws_net_profit#10, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_fee#20, wr_refunded_cash#21]
-Input [14]: [ws_sold_date_sk#4, ws_item_sk#5, ws_order_number#7, ws_quantity#8, ws_sales_price#9, ws_net_profit#10, wr_item_sk#14, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_order_number#19, wr_fee#20, wr_refunded_cash#21]
+(18) Project [codegen id : 9]
+Output [10]: [ws_sold_date_sk#1, ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_refunded_cdemo_sk#12, wr_refunded_addr_sk#13, wr_returning_cdemo_sk#14, wr_reason_sk#15, wr_fee#17, wr_refunded_cash#18]
+Input [14]: [ws_sold_date_sk#1, ws_item_sk#2, ws_order_number#4, ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_item_sk#11, wr_refunded_cdemo_sk#12, wr_refunded_addr_sk#13, wr_returning_cdemo_sk#14, wr_reason_sk#15, wr_order_number#16, wr_fee#17, wr_refunded_cash#18]
+
+(19) Scan parquet default.date_dim
+Output [2]: [d_date_sk#20, d_year#21]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int>
+
+(20) ColumnarToRow [codegen id : 6]
+Input [2]: [d_date_sk#20, d_year#21]
+
+(21) Filter [codegen id : 6]
+Input [2]: [d_date_sk#20, d_year#21]
+Condition : ((isnotnull(d_year#21) AND (d_year#21 = 2000)) AND isnotnull(d_date_sk#20))
+
+(22) Project [codegen id : 6]
+Output [1]: [d_date_sk#20]
+Input [2]: [d_date_sk#20, d_year#21]
+
+(23) BroadcastExchange
+Input [1]: [d_date_sk#20]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22]
 
 (24) BroadcastHashJoin [codegen id : 9]
-Left keys [1]: [d_date_sk#1]
-Right keys [1]: [ws_sold_date_sk#4]
+Left keys [1]: [ws_sold_date_sk#1]
+Right keys [1]: [d_date_sk#20]
 Join condition: None
 
 (25) Project [codegen id : 9]
-Output [9]: [ws_quantity#8, ws_sales_price#9, ws_net_profit#10, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_fee#20, wr_refunded_cash#21]
-Input [11]: [d_date_sk#1, ws_sold_date_sk#4, ws_quantity#8, ws_sales_price#9, ws_net_profit#10, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_fee#20, wr_refunded_cash#21]
+Output [9]: [ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_refunded_cdemo_sk#12, wr_refunded_addr_sk#13, wr_returning_cdemo_sk#14, wr_reason_sk#15, wr_fee#17, wr_refunded_cash#18]
+Input [11]: [ws_sold_date_sk#1, ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_refunded_cdemo_sk#12, wr_refunded_addr_sk#13, wr_returning_cdemo_sk#14, wr_reason_sk#15, wr_fee#17, wr_refunded_cash#18, d_date_sk#20]
 
 (26) Scan parquet default.reason
 Output [2]: [r_reason_sk#23, r_reason_desc#24]
@@ -188,13 +188,13 @@ Input [2]: [r_reason_sk#23, r_reason_desc#24]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25]
 
 (30) BroadcastHashJoin [codegen id : 9]
-Left keys [1]: [wr_reason_sk#18]
+Left keys [1]: [wr_reason_sk#15]
 Right keys [1]: [cast(r_reason_sk#23 as bigint)]
 Join condition: None
 
 (31) Project [codegen id : 9]
-Output [9]: [ws_quantity#8, ws_sales_price#9, ws_net_profit#10, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_fee#20, wr_refunded_cash#21, r_reason_desc#24]
-Input [11]: [ws_quantity#8, ws_sales_price#9, ws_net_profit#10, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_reason_sk#18, wr_fee#20, wr_refunded_cash#21, r_reason_sk#23, r_reason_desc#24]
+Output [9]: [ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_refunded_cdemo_sk#12, wr_refunded_addr_sk#13, wr_returning_cdemo_sk#14, wr_fee#17, wr_refunded_cash#18, r_reason_desc#24]
+Input [11]: [ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_refunded_cdemo_sk#12, wr_refunded_addr_sk#13, wr_returning_cdemo_sk#14, wr_reason_sk#15, wr_fee#17, wr_refunded_cash#18, r_reason_sk#23, r_reason_desc#24]
 
 (32) Scan parquet default.customer_address
 Output [3]: [ca_address_sk#26, ca_state#27, ca_country#28]
@@ -219,84 +219,84 @@ Input [2]: [ca_address_sk#26, ca_state#27]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29]
 
 (37) BroadcastHashJoin [codegen id : 9]
-Left keys [1]: [wr_refunded_addr_sk#16]
+Left keys [1]: [wr_refunded_addr_sk#13]
 Right keys [1]: [cast(ca_address_sk#26 as bigint)]
-Join condition: ((((ca_state#27 IN (IN,OH,NJ) AND (ws_net_profit#10 >= 100.00)) AND (ws_net_profit#10 <= 200.00)) OR ((ca_state#27 IN (WI,CT,KY) AND (ws_net_profit#10 >= 150.00)) AND (ws_net_profit#10 <= 300.00))) OR ((ca_state#27 IN (LA,IA,AR) AND (ws_net_profit#10 >= 50.00)) AND (ws_net_profit#10 <= 250.00)))
+Join condition: ((((ca_state#27 IN (IN,OH,NJ) AND (ws_net_profit#7 >= 100.00)) AND (ws_net_profit#7 <= 200.00)) OR ((ca_state#27 IN (WI,CT,KY) AND (ws_net_profit#7 >= 150.00)) AND (ws_net_profit#7 <= 300.00))) OR ((ca_state#27 IN (LA,IA,AR) AND (ws_net_profit#7 >= 50.00)) AND (ws_net_profit#7 <= 250.00)))
 
 (38) Project [codegen id : 9]
-Output [7]: [ws_quantity#8, ws_sales_price#9, wr_refunded_cdemo_sk#15, wr_returning_cdemo_sk#17, wr_fee#20, wr_refunded_cash#21, r_reason_desc#24]
-Input [11]: [ws_quantity#8, ws_sales_price#9, ws_net_profit#10, wr_refunded_cdemo_sk#15, wr_refunded_addr_sk#16, wr_returning_cdemo_sk#17, wr_fee#20, wr_refunded_cash#21, r_reason_desc#24, ca_address_sk#26, ca_state#27]
+Output [7]: [ws_quantity#5, ws_sales_price#6, wr_refunded_cdemo_sk#12, wr_returning_cdemo_sk#14, wr_fee#17, wr_refunded_cash#18, r_reason_desc#24]
+Input [11]: [ws_quantity#5, ws_sales_price#6, ws_net_profit#7, wr_refunded_cdemo_sk#12, wr_refunded_addr_sk#13, wr_returning_cdemo_sk#14, wr_fee#17, wr_refunded_cash#18, r_reason_desc#24, ca_address_sk#26, ca_state#27]
 
 (39) Exchange
-Input [7]: [ws_quantity#8, ws_sales_price#9, wr_refunded_cdemo_sk#15, wr_returning_cdemo_sk#17, wr_fee#20, wr_refunded_cash#21, r_reason_desc#24]
-Arguments: hashpartitioning(wr_refunded_cdemo_sk#15, wr_returning_cdemo_sk#17, 5), true, [id=#30]
+Input [7]: [ws_quantity#5, ws_sales_price#6, wr_refunded_cdemo_sk#12, wr_returning_cdemo_sk#14, wr_fee#17, wr_refunded_cash#18, r_reason_desc#24]
+Arguments: hashpartitioning(wr_refunded_cdemo_sk#12, wr_returning_cdemo_sk#14, 5), true, [id=#30]
 
 (40) Sort [codegen id : 10]
-Input [7]: [ws_quantity#8, ws_sales_price#9, wr_refunded_cdemo_sk#15, wr_returning_cdemo_sk#17, wr_fee#20, wr_refunded_cash#21, r_reason_desc#24]
-Arguments: [wr_refunded_cdemo_sk#15 ASC NULLS FIRST, wr_returning_cdemo_sk#17 ASC NULLS FIRST], false, 0
+Input [7]: [ws_quantity#5, ws_sales_price#6, wr_refunded_cdemo_sk#12, wr_returning_cdemo_sk#14, wr_fee#17, wr_refunded_cash#18, r_reason_desc#24]
+Arguments: [wr_refunded_cdemo_sk#12 ASC NULLS FIRST, wr_returning_cdemo_sk#14 ASC NULLS FIRST], false, 0
 
 (41) Scan parquet default.customer_demographics
 Output [3]: [cd_demo_sk#31, cd_marital_status#32, cd_education_status#33]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
-PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status)]
+PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Advanced Degree)),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College))),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,2 yr Degree)))]
 ReadSchema: struct<cd_demo_sk:int,cd_marital_status:string,cd_education_status:string>
 
-(42) ColumnarToRow [codegen id : 12]
+(42) ColumnarToRow [codegen id : 11]
+Input [3]: [cd_demo_sk#31, cd_marital_status#32, cd_education_status#33]
+
+(43) Filter [codegen id : 11]
 Input [3]: [cd_demo_sk#31, cd_marital_status#32, cd_education_status#33]
+Condition : (((isnotnull(cd_demo_sk#31) AND isnotnull(cd_marital_status#32)) AND isnotnull(cd_education_status#33)) AND ((((cd_marital_status#32 = M) AND (cd_education_status#33 = Advanced Degree)) OR ((cd_marital_status#32 = S) AND (cd_education_status#33 = College))) OR ((cd_marital_status#32 = W) AND (cd_education_status#33 = 2 yr Degree))))
 
-(43) Filter [codegen id : 12]
+(44) BroadcastExchange
 Input [3]: [cd_demo_sk#31, cd_marital_status#32, cd_education_status#33]
-Condition : ((isnotnull(cd_demo_sk#31) AND isnotnull(cd_marital_status#32)) AND isnotnull(cd_education_status#33))
+Arguments: HashedRelationBroadcastMode(List(input[1, string, false], input[2, string, false]),false), [id=#34]
 
-(44) Scan parquet default.customer_demographics
-Output [3]: [cd_demo_sk#34, cd_marital_status#35, cd_education_status#36]
+(45) Scan parquet default.customer_demographics
+Output [3]: [cd_demo_sk#35, cd_marital_status#36, cd_education_status#37]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
-PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Advanced Degree)),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College))),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,2 yr Degree)))]
+PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status)]
 ReadSchema: struct<cd_demo_sk:int,cd_marital_status:string,cd_education_status:string>
 
-(45) ColumnarToRow [codegen id : 11]
-Input [3]: [cd_demo_sk#34, cd_marital_status#35, cd_education_status#36]
-
-(46) Filter [codegen id : 11]
-Input [3]: [cd_demo_sk#34, cd_marital_status#35, cd_education_status#36]
-Condition : (((isnotnull(cd_demo_sk#34) AND isnotnull(cd_marital_status#35)) AND isnotnull(cd_education_status#36)) AND ((((cd_marital_status#35 = M) AND (cd_education_status#36 = Advanced Degree)) OR ((cd_marital_status#35 = S) AND (cd_education_status#36 = College))) OR ((cd_marital_status#35 = W) AND (cd_education_status#36 = 2 yr Degree))))
+(46) ColumnarToRow
+Input [3]: [cd_demo_sk#35, cd_marital_status#36, cd_education_status#37]
 
-(47) BroadcastExchange
-Input [3]: [cd_demo_sk#34, cd_marital_status#35, cd_education_status#36]
-Arguments: HashedRelationBroadcastMode(List(input[1, string, false], input[2, string, false]),false), [id=#37]
+(47) Filter
+Input [3]: [cd_demo_sk#35, cd_marital_status#36, cd_education_status#37]
+Condition : ((isnotnull(cd_demo_sk#35) AND isnotnull(cd_marital_status#36)) AND isnotnull(cd_education_status#37))
 
 (48) BroadcastHashJoin [codegen id : 12]
 Left keys [2]: [cd_marital_status#32, cd_education_status#33]
-Right keys [2]: [cd_marital_status#35, cd_education_status#36]
+Right keys [2]: [cd_marital_status#36, cd_education_status#37]
 Join condition: None
 
 (49) Project [codegen id : 12]
-Output [4]: [cd_demo_sk#31, cd_demo_sk#34, cd_marital_status#35, cd_education_status#36]
-Input [6]: [cd_demo_sk#31, cd_marital_status#32, cd_education_status#33, cd_demo_sk#34, cd_marital_status#35, cd_education_status#36]
+Output [4]: [cd_demo_sk#31, cd_marital_status#32, cd_education_status#33, cd_demo_sk#35]
+Input [6]: [cd_demo_sk#31, cd_marital_status#32, cd_education_status#33, cd_demo_sk#35, cd_marital_status#36, cd_education_status#37]
 
 (50) Exchange
-Input [4]: [cd_demo_sk#31, cd_demo_sk#34, cd_marital_status#35, cd_education_status#36]
-Arguments: hashpartitioning(cast(cd_demo_sk#34 as bigint), cast(cd_demo_sk#31 as bigint), 5), true, [id=#38]
+Input [4]: [cd_demo_sk#31, cd_marital_status#32, cd_education_status#33, cd_demo_sk#35]
+Arguments: hashpartitioning(cast(cd_demo_sk#31 as bigint), cast(cd_demo_sk#35 as bigint), 5), true, [id=#38]
 
 (51) Sort [codegen id : 13]
-Input [4]: [cd_demo_sk#31, cd_demo_sk#34, cd_marital_status#35, cd_education_status#36]
-Arguments: [cast(cd_demo_sk#34 as bigint) ASC NULLS FIRST, cast(cd_demo_sk#31 as bigint) ASC NULLS FIRST], false, 0
+Input [4]: [cd_demo_sk#31, cd_marital_status#32, cd_education_status#33, cd_demo_sk#35]
+Arguments: [cast(cd_demo_sk#31 as bigint) ASC NULLS FIRST, cast(cd_demo_sk#35 as bigint) ASC NULLS FIRST], false, 0
 
 (52) SortMergeJoin [codegen id : 14]
-Left keys [2]: [wr_refunded_cdemo_sk#15, wr_returning_cdemo_sk#17]
-Right keys [2]: [cast(cd_demo_sk#34 as bigint), cast(cd_demo_sk#31 as bigint)]
-Join condition: ((((((cd_marital_status#35 = M) AND (cd_education_status#36 = Advanced Degree)) AND (ws_sales_price#9 >= 100.00)) AND (ws_sales_price#9 <= 150.00)) OR ((((cd_marital_status#35 = S) AND (cd_education_status#36 = College)) AND (ws_sales_price#9 >= 50.00)) AND (ws_sales_price#9 <= 100.00))) OR ((((cd_marital_status#35 = W) AND (cd_education_status#36 = 2 yr Degree)) AND (ws_sales_price#9 >= 150.00)) AND (ws_sales_price#9 <= 200.00)))
+Left keys [2]: [wr_refunded_cdemo_sk#12, wr_returning_cdemo_sk#14]
+Right keys [2]: [cast(cd_demo_sk#31 as bigint), cast(cd_demo_sk#35 as bigint)]
+Join condition: ((((((cd_marital_status#32 = M) AND (cd_education_status#33 = Advanced Degree)) AND (ws_sales_price#6 >= 100.00)) AND (ws_sales_price#6 <= 150.00)) OR ((((cd_marital_status#32 = S) AND (cd_education_status#33 = College)) AND (ws_sales_price#6 >= 50.00)) AND (ws_sales_price#6 <= 100.00))) OR ((((cd_marital_status#32 = W) AND (cd_education_status#33 = 2 yr Degree)) AND (ws_sales_price#6 >= 150.00)) AND (ws_sales_price#6 <= 200.00)))
 
 (53) Project [codegen id : 14]
-Output [4]: [ws_quantity#8, wr_fee#20, wr_refunded_cash#21, r_reason_desc#24]
-Input [11]: [ws_quantity#8, ws_sales_price#9, wr_refunded_cdemo_sk#15, wr_returning_cdemo_sk#17, wr_fee#20, wr_refunded_cash#21, r_reason_desc#24, cd_demo_sk#31, cd_demo_sk#34, cd_marital_status#35, cd_education_status#36]
+Output [4]: [ws_quantity#5, wr_fee#17, wr_refunded_cash#18, r_reason_desc#24]
+Input [11]: [ws_quantity#5, ws_sales_price#6, wr_refunded_cdemo_sk#12, wr_returning_cdemo_sk#14, wr_fee#17, wr_refunded_cash#18, r_reason_desc#24, cd_demo_sk#31, cd_marital_status#32, cd_education_status#33, cd_demo_sk#35]
 
 (54) HashAggregate [codegen id : 14]
-Input [4]: [ws_quantity#8, wr_fee#20, wr_refunded_cash#21, r_reason_desc#24]
+Input [4]: [ws_quantity#5, wr_fee#17, wr_refunded_cash#18, r_reason_desc#24]
 Keys [1]: [r_reason_desc#24]
-Functions [3]: [partial_avg(cast(ws_quantity#8 as bigint)), partial_avg(UnscaledValue(wr_refunded_cash#21)), partial_avg(UnscaledValue(wr_fee#20))]
+Functions [3]: [partial_avg(cast(ws_quantity#5 as bigint)), partial_avg(UnscaledValue(wr_refunded_cash#18)), partial_avg(UnscaledValue(wr_fee#17))]
 Aggregate Attributes [6]: [sum#39, count#40, sum#41, count#42, sum#43, count#44]
 Results [7]: [r_reason_desc#24, sum#45, count#46, sum#47, count#48, sum#49, count#50]
 
@@ -307,9 +307,9 @@ Arguments: hashpartitioning(r_reason_desc#24, 5), true, [id=#51]
 (56) HashAggregate [codegen id : 15]
 Input [7]: [r_reason_desc#24, sum#45, count#46, sum#47, count#48, sum#49, count#50]
 Keys [1]: [r_reason_desc#24]
-Functions [3]: [avg(cast(ws_quantity#8 as bigint)), avg(UnscaledValue(wr_refunded_cash#21)), avg(UnscaledValue(wr_fee#20))]
-Aggregate Attributes [3]: [avg(cast(ws_quantity#8 as bigint))#52, avg(UnscaledValue(wr_refunded_cash#21))#53, avg(UnscaledValue(wr_fee#20))#54]
-Results [5]: [substr(r_reason_desc#24, 1, 20) AS substr(r_reason_desc, 1, 20)#55, avg(cast(ws_quantity#8 as bigint))#52 AS avg(ws_quantity)#56, cast((avg(UnscaledValue(wr_refunded_cash#21))#53 / 100.0) as decimal(11,6)) AS avg(wr_refunded_cash)#57, cast((avg(UnscaledValue(wr_fee#20))#54 / 100.0) as decimal(11,6)) AS avg(wr_fee)#58, avg(cast(ws_quantity#8 as bigint))#52 AS aggOrder#59]
+Functions [3]: [avg(cast(ws_quantity#5 as bigint)), avg(UnscaledValue(wr_refunded_cash#18)), avg(UnscaledValue(wr_fee#17))]
+Aggregate Attributes [3]: [avg(cast(ws_quantity#5 as bigint))#52, avg(UnscaledValue(wr_refunded_cash#18))#53, avg(UnscaledValue(wr_fee#17))#54]
+Results [5]: [substr(r_reason_desc#24, 1, 20) AS substr(r_reason_desc, 1, 20)#55, avg(cast(ws_quantity#5 as bigint))#52 AS avg(ws_quantity)#56, cast((avg(UnscaledValue(wr_refunded_cash#18))#53 / 100.0) as decimal(11,6)) AS avg(wr_refunded_cash)#57, cast((avg(UnscaledValue(wr_fee#17))#54 / 100.0) as decimal(11,6)) AS avg(wr_fee)#58, avg(cast(ws_quantity#5 as bigint))#52 AS aggOrder#59]
 
 (57) TakeOrderedAndProject
 Input [5]: [substr(r_reason_desc, 1, 20)#55, avg(ws_quantity)#56, avg(wr_refunded_cash)#57, avg(wr_fee)#58, aggOrder#59]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/simplified.txt
index e7aee17172e60..3fa7d84f55966 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/simplified.txt
@@ -18,23 +18,15 @@ TakeOrderedAndProject [substr(r_reason_desc, 1, 20),aggOrder,avg(wr_refunded_cas
                                   Project [ws_quantity,ws_sales_price,ws_net_profit,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_fee,wr_refunded_cash,r_reason_desc]
                                     BroadcastHashJoin [wr_reason_sk,r_reason_sk]
                                       Project [ws_quantity,ws_sales_price,ws_net_profit,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash]
-                                        BroadcastHashJoin [d_date_sk,ws_sold_date_sk]
-                                          InputAdapter
-                                            BroadcastExchange #3
-                                              WholeStageCodegen (1)
-                                                Project [d_date_sk]
-                                                  Filter [d_year,d_date_sk]
-                                                    ColumnarToRow
-                                                      InputAdapter
-                                                        Scan parquet default.date_dim [d_date_sk,d_year]
+                                        BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                           Project [ws_sold_date_sk,ws_quantity,ws_sales_price,ws_net_profit,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_fee,wr_refunded_cash]
                                             SortMergeJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number]
                                               InputAdapter
-                                                WholeStageCodegen (4)
+                                                WholeStageCodegen (3)
                                                   Sort [ws_item_sk,ws_order_number]
                                                     InputAdapter
-                                                      Exchange [ws_item_sk,ws_order_number] #4
-                                                        WholeStageCodegen (3)
+                                                      Exchange [ws_item_sk,ws_order_number] #3
+                                                        WholeStageCodegen (2)
                                                           Project [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_sales_price,ws_net_profit]
                                                             BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
                                                               Filter [ws_item_sk,ws_order_number,ws_web_page_sk,ws_sold_date_sk,ws_sales_price,ws_net_profit]
@@ -42,22 +34,30 @@ TakeOrderedAndProject [substr(r_reason_desc, 1, 20),aggOrder,avg(wr_refunded_cas
                                                                   InputAdapter
                                                                     Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_web_page_sk,ws_order_number,ws_quantity,ws_sales_price,ws_net_profit]
                                                               InputAdapter
-                                                                BroadcastExchange #5
-                                                                  WholeStageCodegen (2)
+                                                                BroadcastExchange #4
+                                                                  WholeStageCodegen (1)
                                                                     Filter [wp_web_page_sk]
                                                                       ColumnarToRow
                                                                         InputAdapter
                                                                           Scan parquet default.web_page [wp_web_page_sk]
                                               InputAdapter
-                                                WholeStageCodegen (6)
+                                                WholeStageCodegen (5)
                                                   Sort [wr_item_sk,wr_order_number]
                                                     InputAdapter
-                                                      Exchange [wr_item_sk,wr_order_number] #6
-                                                        WholeStageCodegen (5)
+                                                      Exchange [wr_item_sk,wr_order_number] #5
+                                                        WholeStageCodegen (4)
                                                           Filter [wr_item_sk,wr_order_number,wr_refunded_cdemo_sk,wr_returning_cdemo_sk,wr_refunded_addr_sk,wr_reason_sk]
                                                             ColumnarToRow
                                                               InputAdapter
                                                                 Scan parquet default.web_returns [wr_item_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_order_number,wr_fee,wr_refunded_cash]
+                                          InputAdapter
+                                            BroadcastExchange #6
+                                              WholeStageCodegen (6)
+                                                Project [d_date_sk]
+                                                  Filter [d_year,d_date_sk]
+                                                    ColumnarToRow
+                                                      InputAdapter
+                                                        Scan parquet default.date_dim [d_date_sk,d_year]
                                       InputAdapter
                                         BroadcastExchange #7
                                           WholeStageCodegen (7)
@@ -79,12 +79,8 @@ TakeOrderedAndProject [substr(r_reason_desc, 1, 20),aggOrder,avg(wr_refunded_cas
                         InputAdapter
                           Exchange [cd_demo_sk,cd_demo_sk] #9
                             WholeStageCodegen (12)
-                              Project [cd_demo_sk,cd_demo_sk,cd_marital_status,cd_education_status]
+                              Project [cd_demo_sk,cd_marital_status,cd_education_status,cd_demo_sk]
                                 BroadcastHashJoin [cd_marital_status,cd_education_status,cd_marital_status,cd_education_status]
-                                  Filter [cd_demo_sk,cd_marital_status,cd_education_status]
-                                    ColumnarToRow
-                                      InputAdapter
-                                        Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
                                   InputAdapter
                                     BroadcastExchange #10
                                       WholeStageCodegen (11)
@@ -92,3 +88,7 @@ TakeOrderedAndProject [substr(r_reason_desc, 1, 20),aggOrder,avg(wr_refunded_cas
                                           ColumnarToRow
                                             InputAdapter
                                               Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
+                                  Filter [cd_demo_sk,cd_marital_status,cd_education_status]
+                                    ColumnarToRow
+                                      InputAdapter
+                                        Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/explain.txt
index 69b02557c4750..4e85516b594f7 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/explain.txt
@@ -13,14 +13,14 @@
                   :     :     :- * Project (16)
                   :     :     :  +- * BroadcastHashJoin Inner BuildRight (15)
                   :     :     :     :- * Project (9)
-                  :     :     :     :  +- * BroadcastHashJoin Inner BuildLeft (8)
-                  :     :     :     :     :- BroadcastExchange (4)
-                  :     :     :     :     :  +- * Filter (3)
-                  :     :     :     :     :     +- * ColumnarToRow (2)
-                  :     :     :     :     :        +- Scan parquet default.customer_demographics (1)
-                  :     :     :     :     +- * Filter (7)
-                  :     :     :     :        +- * ColumnarToRow (6)
-                  :     :     :     :           +- Scan parquet default.customer (5)
+                  :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (8)
+                  :     :     :     :     :- * Filter (3)
+                  :     :     :     :     :  +- * ColumnarToRow (2)
+                  :     :     :     :     :     +- Scan parquet default.customer (1)
+                  :     :     :     :     +- BroadcastExchange (7)
+                  :     :     :     :        +- * Filter (6)
+                  :     :     :     :           +- * ColumnarToRow (5)
+                  :     :     :     :              +- Scan parquet default.customer_demographics (4)
                   :     :     :     +- BroadcastExchange (14)
                   :     :     :        +- * Project (13)
                   :     :     :           +- * Filter (12)
@@ -33,61 +33,61 @@
                   :     :                 +- Scan parquet default.customer_address (17)
                   :     +- BroadcastExchange (34)
                   :        +- * Project (33)
-                  :           +- * BroadcastHashJoin Inner BuildLeft (32)
-                  :              :- BroadcastExchange (28)
-                  :              :  +- * Project (27)
-                  :              :     +- * Filter (26)
-                  :              :        +- * ColumnarToRow (25)
-                  :              :           +- Scan parquet default.date_dim (24)
-                  :              +- * Filter (31)
-                  :                 +- * ColumnarToRow (30)
-                  :                    +- Scan parquet default.catalog_returns (29)
+                  :           +- * BroadcastHashJoin Inner BuildRight (32)
+                  :              :- * Filter (26)
+                  :              :  +- * ColumnarToRow (25)
+                  :              :     +- Scan parquet default.catalog_returns (24)
+                  :              +- BroadcastExchange (31)
+                  :                 +- * Project (30)
+                  :                    +- * Filter (29)
+                  :                       +- * ColumnarToRow (28)
+                  :                          +- Scan parquet default.date_dim (27)
                   +- BroadcastExchange (40)
                      +- * Filter (39)
                         +- * ColumnarToRow (38)
                            +- Scan parquet default.call_center (37)
 
 
-(1) Scan parquet default.customer_demographics
-Output [3]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3]
+(1) Scan parquet default.customer
+Output [4]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer_demographics]
-PushedFilters: [Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Unknown)),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,Advanced Degree))), IsNotNull(cd_demo_sk)]
-ReadSchema: struct<cd_demo_sk:int,cd_marital_status:string,cd_education_status:string>
-
-(2) ColumnarToRow [codegen id : 1]
-Input [3]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3]
+Location [not included in comparison]/{warehouse_dir}/customer]
+PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)]
+ReadSchema: struct<c_customer_sk:int,c_current_cdemo_sk:int,c_current_hdemo_sk:int,c_current_addr_sk:int>
 
-(3) Filter [codegen id : 1]
-Input [3]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3]
-Condition : ((((cd_marital_status#2 = M) AND (cd_education_status#3 = Unknown)) OR ((cd_marital_status#2 = W) AND (cd_education_status#3 = Advanced Degree))) AND isnotnull(cd_demo_sk#1))
+(2) ColumnarToRow [codegen id : 7]
+Input [4]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4]
 
-(4) BroadcastExchange
-Input [3]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#4]
+(3) Filter [codegen id : 7]
+Input [4]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4]
+Condition : (((isnotnull(c_customer_sk#1) AND isnotnull(c_current_addr_sk#4)) AND isnotnull(c_current_cdemo_sk#2)) AND isnotnull(c_current_hdemo_sk#3))
 
-(5) Scan parquet default.customer
-Output [4]: [c_customer_sk#5, c_current_cdemo_sk#6, c_current_hdemo_sk#7, c_current_addr_sk#8]
+(4) Scan parquet default.customer_demographics
+Output [3]: [cd_demo_sk#5, cd_marital_status#6, cd_education_status#7]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer]
-PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)]
-ReadSchema: struct<c_customer_sk:int,c_current_cdemo_sk:int,c_current_hdemo_sk:int,c_current_addr_sk:int>
+Location [not included in comparison]/{warehouse_dir}/customer_demographics]
+PushedFilters: [Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Unknown)),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,Advanced Degree))), IsNotNull(cd_demo_sk)]
+ReadSchema: struct<cd_demo_sk:int,cd_marital_status:string,cd_education_status:string>
+
+(5) ColumnarToRow [codegen id : 1]
+Input [3]: [cd_demo_sk#5, cd_marital_status#6, cd_education_status#7]
 
-(6) ColumnarToRow
-Input [4]: [c_customer_sk#5, c_current_cdemo_sk#6, c_current_hdemo_sk#7, c_current_addr_sk#8]
+(6) Filter [codegen id : 1]
+Input [3]: [cd_demo_sk#5, cd_marital_status#6, cd_education_status#7]
+Condition : ((((cd_marital_status#6 = M) AND (cd_education_status#7 = Unknown)) OR ((cd_marital_status#6 = W) AND (cd_education_status#7 = Advanced Degree))) AND isnotnull(cd_demo_sk#5))
 
-(7) Filter
-Input [4]: [c_customer_sk#5, c_current_cdemo_sk#6, c_current_hdemo_sk#7, c_current_addr_sk#8]
-Condition : (((isnotnull(c_customer_sk#5) AND isnotnull(c_current_addr_sk#8)) AND isnotnull(c_current_cdemo_sk#6)) AND isnotnull(c_current_hdemo_sk#7))
+(7) BroadcastExchange
+Input [3]: [cd_demo_sk#5, cd_marital_status#6, cd_education_status#7]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8]
 
 (8) BroadcastHashJoin [codegen id : 7]
-Left keys [1]: [cd_demo_sk#1]
-Right keys [1]: [c_current_cdemo_sk#6]
+Left keys [1]: [c_current_cdemo_sk#2]
+Right keys [1]: [cd_demo_sk#5]
 Join condition: None
 
 (9) Project [codegen id : 7]
-Output [5]: [cd_marital_status#2, cd_education_status#3, c_customer_sk#5, c_current_hdemo_sk#7, c_current_addr_sk#8]
-Input [7]: [cd_demo_sk#1, cd_marital_status#2, cd_education_status#3, c_customer_sk#5, c_current_cdemo_sk#6, c_current_hdemo_sk#7, c_current_addr_sk#8]
+Output [5]: [c_customer_sk#1, c_current_hdemo_sk#3, c_current_addr_sk#4, cd_marital_status#6, cd_education_status#7]
+Input [7]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, cd_demo_sk#5, cd_marital_status#6, cd_education_status#7]
 
 (10) Scan parquet default.household_demographics
 Output [2]: [hd_demo_sk#9, hd_buy_potential#10]
@@ -112,13 +112,13 @@ Input [1]: [hd_demo_sk#9]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11]
 
 (15) BroadcastHashJoin [codegen id : 7]
-Left keys [1]: [c_current_hdemo_sk#7]
+Left keys [1]: [c_current_hdemo_sk#3]
 Right keys [1]: [hd_demo_sk#9]
 Join condition: None
 
 (16) Project [codegen id : 7]
-Output [4]: [cd_marital_status#2, cd_education_status#3, c_customer_sk#5, c_current_addr_sk#8]
-Input [6]: [cd_marital_status#2, cd_education_status#3, c_customer_sk#5, c_current_hdemo_sk#7, c_current_addr_sk#8, hd_demo_sk#9]
+Output [4]: [c_customer_sk#1, c_current_addr_sk#4, cd_marital_status#6, cd_education_status#7]
+Input [6]: [c_customer_sk#1, c_current_hdemo_sk#3, c_current_addr_sk#4, cd_marital_status#6, cd_education_status#7, hd_demo_sk#9]
 
 (17) Scan parquet default.customer_address
 Output [2]: [ca_address_sk#12, ca_gmt_offset#13]
@@ -143,71 +143,71 @@ Input [1]: [ca_address_sk#12]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14]
 
 (22) BroadcastHashJoin [codegen id : 7]
-Left keys [1]: [c_current_addr_sk#8]
+Left keys [1]: [c_current_addr_sk#4]
 Right keys [1]: [ca_address_sk#12]
 Join condition: None
 
 (23) Project [codegen id : 7]
-Output [3]: [cd_marital_status#2, cd_education_status#3, c_customer_sk#5]
-Input [5]: [cd_marital_status#2, cd_education_status#3, c_customer_sk#5, c_current_addr_sk#8, ca_address_sk#12]
+Output [3]: [c_customer_sk#1, cd_marital_status#6, cd_education_status#7]
+Input [5]: [c_customer_sk#1, c_current_addr_sk#4, cd_marital_status#6, cd_education_status#7, ca_address_sk#12]
 
-(24) Scan parquet default.date_dim
-Output [3]: [d_date_sk#15, d_year#16, d_moy#17]
+(24) Scan parquet default.catalog_returns
+Output [4]: [cr_returned_date_sk#15, cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
+Location [not included in comparison]/{warehouse_dir}/catalog_returns]
+PushedFilters: [IsNotNull(cr_call_center_sk), IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_customer_sk)]
+ReadSchema: struct<cr_returned_date_sk:int,cr_returning_customer_sk:int,cr_call_center_sk:int,cr_net_loss:decimal(7,2)>
 
-(25) ColumnarToRow [codegen id : 4]
-Input [3]: [d_date_sk#15, d_year#16, d_moy#17]
+(25) ColumnarToRow [codegen id : 5]
+Input [4]: [cr_returned_date_sk#15, cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18]
 
-(26) Filter [codegen id : 4]
-Input [3]: [d_date_sk#15, d_year#16, d_moy#17]
-Condition : ((((isnotnull(d_year#16) AND isnotnull(d_moy#17)) AND (d_year#16 = 1998)) AND (d_moy#17 = 11)) AND isnotnull(d_date_sk#15))
+(26) Filter [codegen id : 5]
+Input [4]: [cr_returned_date_sk#15, cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18]
+Condition : ((isnotnull(cr_call_center_sk#17) AND isnotnull(cr_returned_date_sk#15)) AND isnotnull(cr_returning_customer_sk#16))
 
-(27) Project [codegen id : 4]
-Output [1]: [d_date_sk#15]
-Input [3]: [d_date_sk#15, d_year#16, d_moy#17]
+(27) Scan parquet default.date_dim
+Output [3]: [d_date_sk#19, d_year#20, d_moy#21]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
-(28) BroadcastExchange
-Input [1]: [d_date_sk#15]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18]
+(28) ColumnarToRow [codegen id : 4]
+Input [3]: [d_date_sk#19, d_year#20, d_moy#21]
 
-(29) Scan parquet default.catalog_returns
-Output [4]: [cr_returned_date_sk#19, cr_returning_customer_sk#20, cr_call_center_sk#21, cr_net_loss#22]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/catalog_returns]
-PushedFilters: [IsNotNull(cr_call_center_sk), IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_customer_sk)]
-ReadSchema: struct<cr_returned_date_sk:int,cr_returning_customer_sk:int,cr_call_center_sk:int,cr_net_loss:decimal(7,2)>
+(29) Filter [codegen id : 4]
+Input [3]: [d_date_sk#19, d_year#20, d_moy#21]
+Condition : ((((isnotnull(d_year#20) AND isnotnull(d_moy#21)) AND (d_year#20 = 1998)) AND (d_moy#21 = 11)) AND isnotnull(d_date_sk#19))
 
-(30) ColumnarToRow
-Input [4]: [cr_returned_date_sk#19, cr_returning_customer_sk#20, cr_call_center_sk#21, cr_net_loss#22]
+(30) Project [codegen id : 4]
+Output [1]: [d_date_sk#19]
+Input [3]: [d_date_sk#19, d_year#20, d_moy#21]
 
-(31) Filter
-Input [4]: [cr_returned_date_sk#19, cr_returning_customer_sk#20, cr_call_center_sk#21, cr_net_loss#22]
-Condition : ((isnotnull(cr_call_center_sk#21) AND isnotnull(cr_returned_date_sk#19)) AND isnotnull(cr_returning_customer_sk#20))
+(31) BroadcastExchange
+Input [1]: [d_date_sk#19]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22]
 
 (32) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [d_date_sk#15]
-Right keys [1]: [cr_returned_date_sk#19]
+Left keys [1]: [cr_returned_date_sk#15]
+Right keys [1]: [d_date_sk#19]
 Join condition: None
 
 (33) Project [codegen id : 5]
-Output [3]: [cr_returning_customer_sk#20, cr_call_center_sk#21, cr_net_loss#22]
-Input [5]: [d_date_sk#15, cr_returned_date_sk#19, cr_returning_customer_sk#20, cr_call_center_sk#21, cr_net_loss#22]
+Output [3]: [cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18]
+Input [5]: [cr_returned_date_sk#15, cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18, d_date_sk#19]
 
 (34) BroadcastExchange
-Input [3]: [cr_returning_customer_sk#20, cr_call_center_sk#21, cr_net_loss#22]
+Input [3]: [cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23]
 
 (35) BroadcastHashJoin [codegen id : 7]
-Left keys [1]: [c_customer_sk#5]
-Right keys [1]: [cr_returning_customer_sk#20]
+Left keys [1]: [c_customer_sk#1]
+Right keys [1]: [cr_returning_customer_sk#16]
 Join condition: None
 
 (36) Project [codegen id : 7]
-Output [4]: [cd_marital_status#2, cd_education_status#3, cr_call_center_sk#21, cr_net_loss#22]
-Input [6]: [cd_marital_status#2, cd_education_status#3, c_customer_sk#5, cr_returning_customer_sk#20, cr_call_center_sk#21, cr_net_loss#22]
+Output [4]: [cd_marital_status#6, cd_education_status#7, cr_call_center_sk#17, cr_net_loss#18]
+Input [6]: [c_customer_sk#1, cd_marital_status#6, cd_education_status#7, cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18]
 
 (37) Scan parquet default.call_center
 Output [4]: [cc_call_center_sk#24, cc_call_center_id#25, cc_name#26, cc_manager#27]
@@ -228,31 +228,31 @@ Input [4]: [cc_call_center_sk#24, cc_call_center_id#25, cc_name#26, cc_manager#2
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#28]
 
 (41) BroadcastHashJoin [codegen id : 7]
-Left keys [1]: [cr_call_center_sk#21]
+Left keys [1]: [cr_call_center_sk#17]
 Right keys [1]: [cc_call_center_sk#24]
 Join condition: None
 
 (42) Project [codegen id : 7]
-Output [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cr_net_loss#22, cd_marital_status#2, cd_education_status#3]
-Input [8]: [cd_marital_status#2, cd_education_status#3, cr_call_center_sk#21, cr_net_loss#22, cc_call_center_sk#24, cc_call_center_id#25, cc_name#26, cc_manager#27]
+Output [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cr_net_loss#18, cd_marital_status#6, cd_education_status#7]
+Input [8]: [cd_marital_status#6, cd_education_status#7, cr_call_center_sk#17, cr_net_loss#18, cc_call_center_sk#24, cc_call_center_id#25, cc_name#26, cc_manager#27]
 
 (43) HashAggregate [codegen id : 7]
-Input [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cr_net_loss#22, cd_marital_status#2, cd_education_status#3]
-Keys [5]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#2, cd_education_status#3]
-Functions [1]: [partial_sum(UnscaledValue(cr_net_loss#22))]
+Input [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cr_net_loss#18, cd_marital_status#6, cd_education_status#7]
+Keys [5]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#6, cd_education_status#7]
+Functions [1]: [partial_sum(UnscaledValue(cr_net_loss#18))]
 Aggregate Attributes [1]: [sum#29]
-Results [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#2, cd_education_status#3, sum#30]
+Results [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#6, cd_education_status#7, sum#30]
 
 (44) Exchange
-Input [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#2, cd_education_status#3, sum#30]
-Arguments: hashpartitioning(cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#2, cd_education_status#3, 5), true, [id=#31]
+Input [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#6, cd_education_status#7, sum#30]
+Arguments: hashpartitioning(cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#6, cd_education_status#7, 5), true, [id=#31]
 
 (45) HashAggregate [codegen id : 8]
-Input [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#2, cd_education_status#3, sum#30]
-Keys [5]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#2, cd_education_status#3]
-Functions [1]: [sum(UnscaledValue(cr_net_loss#22))]
-Aggregate Attributes [1]: [sum(UnscaledValue(cr_net_loss#22))#32]
-Results [4]: [cc_call_center_id#25 AS Call_Center#33, cc_name#26 AS Call_Center_Name#34, cc_manager#27 AS Manager#35, MakeDecimal(sum(UnscaledValue(cr_net_loss#22))#32,17,2) AS Returns_Loss#36]
+Input [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#6, cd_education_status#7, sum#30]
+Keys [5]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#6, cd_education_status#7]
+Functions [1]: [sum(UnscaledValue(cr_net_loss#18))]
+Aggregate Attributes [1]: [sum(UnscaledValue(cr_net_loss#18))#32]
+Results [4]: [cc_call_center_id#25 AS Call_Center#33, cc_name#26 AS Call_Center_Name#34, cc_manager#27 AS Manager#35, MakeDecimal(sum(UnscaledValue(cr_net_loss#18))#32,17,2) AS Returns_Loss#36]
 
 (46) Exchange
 Input [4]: [Call_Center#33, Call_Center_Name#34, Manager#35, Returns_Loss#36]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/simplified.txt
index f64791821893d..87beb3b565cc1 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/simplified.txt
@@ -12,12 +12,16 @@ WholeStageCodegen (9)
                       BroadcastHashJoin [cr_call_center_sk,cc_call_center_sk]
                         Project [cd_marital_status,cd_education_status,cr_call_center_sk,cr_net_loss]
                           BroadcastHashJoin [c_customer_sk,cr_returning_customer_sk]
-                            Project [cd_marital_status,cd_education_status,c_customer_sk]
+                            Project [c_customer_sk,cd_marital_status,cd_education_status]
                               BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
-                                Project [cd_marital_status,cd_education_status,c_customer_sk,c_current_addr_sk]
+                                Project [c_customer_sk,c_current_addr_sk,cd_marital_status,cd_education_status]
                                   BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk]
-                                    Project [cd_marital_status,cd_education_status,c_customer_sk,c_current_hdemo_sk,c_current_addr_sk]
-                                      BroadcastHashJoin [cd_demo_sk,c_current_cdemo_sk]
+                                    Project [c_customer_sk,c_current_hdemo_sk,c_current_addr_sk,cd_marital_status,cd_education_status]
+                                      BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk]
+                                        Filter [c_customer_sk,c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk]
+                                          ColumnarToRow
+                                            InputAdapter
+                                              Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk]
                                         InputAdapter
                                           BroadcastExchange #3
                                             WholeStageCodegen (1)
@@ -25,10 +29,6 @@ WholeStageCodegen (9)
                                                 ColumnarToRow
                                                   InputAdapter
                                                     Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
-                                        Filter [c_customer_sk,c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk]
-                                          ColumnarToRow
-                                            InputAdapter
-                                              Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk]
                                     InputAdapter
                                       BroadcastExchange #4
                                         WholeStageCodegen (2)
@@ -49,7 +49,11 @@ WholeStageCodegen (9)
                               BroadcastExchange #6
                                 WholeStageCodegen (5)
                                   Project [cr_returning_customer_sk,cr_call_center_sk,cr_net_loss]
-                                    BroadcastHashJoin [d_date_sk,cr_returned_date_sk]
+                                    BroadcastHashJoin [cr_returned_date_sk,d_date_sk]
+                                      Filter [cr_call_center_sk,cr_returned_date_sk,cr_returning_customer_sk]
+                                        ColumnarToRow
+                                          InputAdapter
+                                            Scan parquet default.catalog_returns [cr_returned_date_sk,cr_returning_customer_sk,cr_call_center_sk,cr_net_loss]
                                       InputAdapter
                                         BroadcastExchange #7
                                           WholeStageCodegen (4)
@@ -58,10 +62,6 @@ WholeStageCodegen (9)
                                                 ColumnarToRow
                                                   InputAdapter
                                                     Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
-                                      Filter [cr_call_center_sk,cr_returned_date_sk,cr_returning_customer_sk]
-                                        ColumnarToRow
-                                          InputAdapter
-                                            Scan parquet default.catalog_returns [cr_returned_date_sk,cr_returning_customer_sk,cr_call_center_sk,cr_net_loss]
                         InputAdapter
                           BroadcastExchange #8
                             WholeStageCodegen (6)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/explain.txt
index c547e7af5d790..34eba382992c3 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/explain.txt
@@ -10,15 +10,15 @@ TakeOrderedAndProject (32)
                :     :- * Project (16)
                :     :  +- * BroadcastHashJoin Inner BuildRight (15)
                :     :     :- * Project (10)
-               :     :     :  +- * BroadcastHashJoin Inner BuildLeft (9)
-               :     :     :     :- BroadcastExchange (5)
-               :     :     :     :  +- * Project (4)
-               :     :     :     :     +- * Filter (3)
-               :     :     :     :        +- * ColumnarToRow (2)
-               :     :     :     :           +- Scan parquet default.date_dim (1)
-               :     :     :     +- * Filter (8)
-               :     :     :        +- * ColumnarToRow (7)
-               :     :     :           +- Scan parquet default.catalog_sales (6)
+               :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
+               :     :     :     :- * Filter (3)
+               :     :     :     :  +- * ColumnarToRow (2)
+               :     :     :     :     +- Scan parquet default.catalog_sales (1)
+               :     :     :     +- BroadcastExchange (8)
+               :     :     :        +- * Project (7)
+               :     :     :           +- * Filter (6)
+               :     :     :              +- * ColumnarToRow (5)
+               :     :     :                 +- Scan parquet default.date_dim (4)
                :     :     +- BroadcastExchange (14)
                :     :        +- * Filter (13)
                :     :           +- * ColumnarToRow (12)
@@ -33,50 +33,50 @@ TakeOrderedAndProject (32)
                         +- Scan parquet default.warehouse (23)
 
 
-(1) Scan parquet default.date_dim
-Output [2]: [d_date_sk#1, d_month_seq#2]
+(1) Scan parquet default.catalog_sales
+Output [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, cs_warehouse_sk#5]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_month_seq:int>
+Location [not included in comparison]/{warehouse_dir}/catalog_sales]
+PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_ship_mode_sk), IsNotNull(cs_call_center_sk), IsNotNull(cs_ship_date_sk)]
+ReadSchema: struct<cs_sold_date_sk:int,cs_ship_date_sk:int,cs_call_center_sk:int,cs_ship_mode_sk:int,cs_warehouse_sk:int>
 
-(2) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#1, d_month_seq#2]
+(2) ColumnarToRow [codegen id : 5]
+Input [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, cs_warehouse_sk#5]
 
-(3) Filter [codegen id : 1]
-Input [2]: [d_date_sk#1, d_month_seq#2]
-Condition : (((isnotnull(d_month_seq#2) AND (d_month_seq#2 >= 1200)) AND (d_month_seq#2 <= 1211)) AND isnotnull(d_date_sk#1))
+(3) Filter [codegen id : 5]
+Input [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, cs_warehouse_sk#5]
+Condition : (((isnotnull(cs_warehouse_sk#5) AND isnotnull(cs_ship_mode_sk#4)) AND isnotnull(cs_call_center_sk#3)) AND isnotnull(cs_ship_date_sk#2))
 
-(4) Project [codegen id : 1]
-Output [1]: [d_date_sk#1]
-Input [2]: [d_date_sk#1, d_month_seq#2]
+(4) Scan parquet default.date_dim
+Output [2]: [d_date_sk#6, d_month_seq#7]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_month_seq:int>
 
-(5) BroadcastExchange
-Input [1]: [d_date_sk#1]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3]
+(5) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#6, d_month_seq#7]
 
-(6) Scan parquet default.catalog_sales
-Output [5]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_call_center_sk#6, cs_ship_mode_sk#7, cs_warehouse_sk#8]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/catalog_sales]
-PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_ship_mode_sk), IsNotNull(cs_call_center_sk), IsNotNull(cs_ship_date_sk)]
-ReadSchema: struct<cs_sold_date_sk:int,cs_ship_date_sk:int,cs_call_center_sk:int,cs_ship_mode_sk:int,cs_warehouse_sk:int>
+(6) Filter [codegen id : 1]
+Input [2]: [d_date_sk#6, d_month_seq#7]
+Condition : (((isnotnull(d_month_seq#7) AND (d_month_seq#7 >= 1200)) AND (d_month_seq#7 <= 1211)) AND isnotnull(d_date_sk#6))
 
-(7) ColumnarToRow
-Input [5]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_call_center_sk#6, cs_ship_mode_sk#7, cs_warehouse_sk#8]
+(7) Project [codegen id : 1]
+Output [1]: [d_date_sk#6]
+Input [2]: [d_date_sk#6, d_month_seq#7]
 
-(8) Filter
-Input [5]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_call_center_sk#6, cs_ship_mode_sk#7, cs_warehouse_sk#8]
-Condition : (((isnotnull(cs_warehouse_sk#8) AND isnotnull(cs_ship_mode_sk#7)) AND isnotnull(cs_call_center_sk#6)) AND isnotnull(cs_ship_date_sk#5))
+(8) BroadcastExchange
+Input [1]: [d_date_sk#6]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8]
 
 (9) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [d_date_sk#1]
-Right keys [1]: [cs_ship_date_sk#5]
+Left keys [1]: [cs_ship_date_sk#2]
+Right keys [1]: [d_date_sk#6]
 Join condition: None
 
 (10) Project [codegen id : 5]
-Output [5]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_call_center_sk#6, cs_ship_mode_sk#7, cs_warehouse_sk#8]
-Input [6]: [d_date_sk#1, cs_sold_date_sk#4, cs_ship_date_sk#5, cs_call_center_sk#6, cs_ship_mode_sk#7, cs_warehouse_sk#8]
+Output [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, cs_warehouse_sk#5]
+Input [6]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, cs_warehouse_sk#5, d_date_sk#6]
 
 (11) Scan parquet default.ship_mode
 Output [2]: [sm_ship_mode_sk#9, sm_type#10]
@@ -97,13 +97,13 @@ Input [2]: [sm_ship_mode_sk#9, sm_type#10]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11]
 
 (15) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [cs_ship_mode_sk#7]
+Left keys [1]: [cs_ship_mode_sk#4]
 Right keys [1]: [sm_ship_mode_sk#9]
 Join condition: None
 
 (16) Project [codegen id : 5]
-Output [5]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_call_center_sk#6, cs_warehouse_sk#8, sm_type#10]
-Input [7]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_call_center_sk#6, cs_ship_mode_sk#7, cs_warehouse_sk#8, sm_ship_mode_sk#9, sm_type#10]
+Output [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_warehouse_sk#5, sm_type#10]
+Input [7]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_ship_mode_sk#4, cs_warehouse_sk#5, sm_ship_mode_sk#9, sm_type#10]
 
 (17) Scan parquet default.call_center
 Output [2]: [cc_call_center_sk#12, cc_name#13]
@@ -124,13 +124,13 @@ Input [2]: [cc_call_center_sk#12, cc_name#13]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14]
 
 (21) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [cs_call_center_sk#6]
+Left keys [1]: [cs_call_center_sk#3]
 Right keys [1]: [cc_call_center_sk#12]
 Join condition: None
 
 (22) Project [codegen id : 5]
-Output [5]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_warehouse_sk#8, sm_type#10, cc_name#13]
-Input [7]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_call_center_sk#6, cs_warehouse_sk#8, sm_type#10, cc_call_center_sk#12, cc_name#13]
+Output [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_warehouse_sk#5, sm_type#10, cc_name#13]
+Input [7]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_call_center_sk#3, cs_warehouse_sk#5, sm_type#10, cc_call_center_sk#12, cc_name#13]
 
 (23) Scan parquet default.warehouse
 Output [2]: [w_warehouse_sk#15, w_warehouse_name#16]
@@ -151,18 +151,18 @@ Input [2]: [w_warehouse_sk#15, w_warehouse_name#16]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17]
 
 (27) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [cs_warehouse_sk#8]
+Left keys [1]: [cs_warehouse_sk#5]
 Right keys [1]: [w_warehouse_sk#15]
 Join condition: None
 
 (28) Project [codegen id : 5]
-Output [5]: [cs_sold_date_sk#4, cs_ship_date_sk#5, w_warehouse_name#16, sm_type#10, cc_name#13]
-Input [7]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_warehouse_sk#8, sm_type#10, cc_name#13, w_warehouse_sk#15, w_warehouse_name#16]
+Output [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, w_warehouse_name#16, sm_type#10, cc_name#13]
+Input [7]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_warehouse_sk#5, sm_type#10, cc_name#13, w_warehouse_sk#15, w_warehouse_name#16]
 
 (29) HashAggregate [codegen id : 5]
-Input [5]: [cs_sold_date_sk#4, cs_ship_date_sk#5, w_warehouse_name#16, sm_type#10, cc_name#13]
+Input [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, w_warehouse_name#16, sm_type#10, cc_name#13]
 Keys [3]: [substr(w_warehouse_name#16, 1, 20) AS substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13]
-Functions [5]: [partial_sum(cast(CASE WHEN ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 30) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 60) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 90) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 120) THEN 1 ELSE 0 END as bigint))]
+Functions [5]: [partial_sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))]
 Aggregate Attributes [5]: [sum#19, sum#20, sum#21, sum#22, sum#23]
 Results [8]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28]
 
@@ -173,9 +173,9 @@ Arguments: hashpartitioning(substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, c
 (31) HashAggregate [codegen id : 6]
 Input [8]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28]
 Keys [3]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13]
-Functions [5]: [sum(cast(CASE WHEN ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 30) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 60) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 90) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 120) THEN 1 ELSE 0 END as bigint))]
-Aggregate Attributes [5]: [sum(cast(CASE WHEN ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 30) THEN 1 ELSE 0 END as bigint))#30, sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 30) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 60)) THEN 1 ELSE 0 END as bigint))#31, sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 60) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 90)) THEN 1 ELSE 0 END as bigint))#32, sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 90) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 120)) THEN 1 ELSE 0 END as bigint))#33, sum(cast(CASE WHEN ((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 120) THEN 1 ELSE 0 END as bigint))#34]
-Results [8]: [substr(w_warehouse_name#16, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, sum(cast(CASE WHEN ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 30) THEN 1 ELSE 0 END as bigint))#30 AS 30 days #36, sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 30) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 60)) THEN 1 ELSE 0 END as bigint))#31 AS 31 - 60 days #37, sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 60) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 90)) THEN 1 ELSE 0 END as bigint))#32 AS 61 - 90 days #38, sum(cast(CASE WHEN (((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 90) AND ((cs_ship_date_sk#5 - cs_sold_date_sk#4) <= 120)) THEN 1 ELSE 0 END as bigint))#33 AS 91 - 120 days #39, sum(cast(CASE WHEN ((cs_ship_date_sk#5 - cs_sold_date_sk#4) > 120) THEN 1 ELSE 0 END as bigint))#34 AS >120 days #40]
+Functions [5]: [sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))]
+Aggregate Attributes [5]: [sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33, sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34]
+Results [8]: [substr(w_warehouse_name#16, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30 AS 30 days #36, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31 AS 31 - 60 days #37, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32 AS 61 - 90 days #38, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33 AS 91 - 120 days #39, sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34 AS >120 days #40]
 
 (32) TakeOrderedAndProject
 Input [8]: [substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/simplified.txt
index de3b1913ae25c..b25b16136992c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/simplified.txt
@@ -12,7 +12,11 @@ TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,cc_name,30 days ,
                       Project [cs_sold_date_sk,cs_ship_date_sk,cs_call_center_sk,cs_warehouse_sk,sm_type]
                         BroadcastHashJoin [cs_ship_mode_sk,sm_ship_mode_sk]
                           Project [cs_sold_date_sk,cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk]
-                            BroadcastHashJoin [d_date_sk,cs_ship_date_sk]
+                            BroadcastHashJoin [cs_ship_date_sk,d_date_sk]
+                              Filter [cs_warehouse_sk,cs_ship_mode_sk,cs_call_center_sk,cs_ship_date_sk]
+                                ColumnarToRow
+                                  InputAdapter
+                                    Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk]
                               InputAdapter
                                 BroadcastExchange #2
                                   WholeStageCodegen (1)
@@ -21,10 +25,6 @@ TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,cc_name,30 days ,
                                         ColumnarToRow
                                           InputAdapter
                                             Scan parquet default.date_dim [d_date_sk,d_month_seq]
-                              Filter [cs_warehouse_sk,cs_ship_mode_sk,cs_call_center_sk,cs_ship_date_sk]
-                                ColumnarToRow
-                                  InputAdapter
-                                    Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk]
                           InputAdapter
                             BroadcastExchange #3
                               WholeStageCodegen (2)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/explain.txt
index ab246a3449557..1b9e8f37e9418 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/explain.txt
@@ -11,30 +11,30 @@ TakeOrderedAndProject (50)
                      :  +- Exchange (27)
                      :     +- * Project (26)
                      :        +- * BroadcastHashJoin Inner BuildRight (25)
-                     :           :- * Project (10)
-                     :           :  +- * BroadcastHashJoin Inner BuildRight (9)
+                     :           :- * Project (19)
+                     :           :  +- * BroadcastHashJoin Inner BuildRight (18)
                      :           :     :- * Filter (3)
                      :           :     :  +- * ColumnarToRow (2)
                      :           :     :     +- Scan parquet default.store_sales (1)
-                     :           :     +- BroadcastExchange (8)
-                     :           :        +- * Project (7)
-                     :           :           +- * Filter (6)
-                     :           :              +- * ColumnarToRow (5)
-                     :           :                 +- Scan parquet default.date_dim (4)
+                     :           :     +- BroadcastExchange (17)
+                     :           :        +- * Project (16)
+                     :           :           +- * Filter (15)
+                     :           :              +- * BroadcastHashJoin LeftOuter BuildRight (14)
+                     :           :                 :- * Filter (6)
+                     :           :                 :  +- * ColumnarToRow (5)
+                     :           :                 :     +- Scan parquet default.item (4)
+                     :           :                 +- BroadcastExchange (13)
+                     :           :                    +- * HashAggregate (12)
+                     :           :                       +- Exchange (11)
+                     :           :                          +- * HashAggregate (10)
+                     :           :                             +- * Filter (9)
+                     :           :                                +- * ColumnarToRow (8)
+                     :           :                                   +- Scan parquet default.item (7)
                      :           +- BroadcastExchange (24)
                      :              +- * Project (23)
                      :                 +- * Filter (22)
-                     :                    +- * BroadcastHashJoin LeftOuter BuildRight (21)
-                     :                       :- * Filter (13)
-                     :                       :  +- * ColumnarToRow (12)
-                     :                       :     +- Scan parquet default.item (11)
-                     :                       +- BroadcastExchange (20)
-                     :                          +- * HashAggregate (19)
-                     :                             +- Exchange (18)
-                     :                                +- * HashAggregate (17)
-                     :                                   +- * Filter (16)
-                     :                                      +- * ColumnarToRow (15)
-                     :                                         +- Scan parquet default.item (14)
+                     :                    +- * ColumnarToRow (21)
+                     :                       +- Scan parquet default.date_dim (20)
                      +- * Sort (42)
                         +- Exchange (41)
                            +- * Project (40)
@@ -65,112 +65,112 @@ Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3]
 Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3]
 Condition : ((isnotnull(ss_customer_sk#3) AND isnotnull(ss_sold_date_sk#1)) AND isnotnull(ss_item_sk#2))
 
-(4) Scan parquet default.date_dim
-Output [2]: [d_date_sk#4, d_month_seq#5]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_month_seq:int>
-
-(5) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#4, d_month_seq#5]
-
-(6) Filter [codegen id : 1]
-Input [2]: [d_date_sk#4, d_month_seq#5]
-Condition : ((isnotnull(d_month_seq#5) AND (d_month_seq#5 = Subquery scalar-subquery#6, [id=#7])) AND isnotnull(d_date_sk#4))
-
-(7) Project [codegen id : 1]
-Output [1]: [d_date_sk#4]
-Input [2]: [d_date_sk#4, d_month_seq#5]
-
-(8) BroadcastExchange
-Input [1]: [d_date_sk#4]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8]
-
-(9) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [ss_sold_date_sk#1]
-Right keys [1]: [d_date_sk#4]
-Join condition: None
-
-(10) Project [codegen id : 5]
-Output [2]: [ss_item_sk#2, ss_customer_sk#3]
-Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, d_date_sk#4]
-
-(11) Scan parquet default.item
-Output [3]: [i_item_sk#9, i_current_price#10, i_category#11]
+(4) Scan parquet default.item
+Output [3]: [i_item_sk#4, i_current_price#5, i_category#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_item_sk)]
 ReadSchema: struct<i_item_sk:int,i_current_price:decimal(7,2),i_category:string>
 
-(12) ColumnarToRow [codegen id : 4]
-Input [3]: [i_item_sk#9, i_current_price#10, i_category#11]
+(5) ColumnarToRow [codegen id : 3]
+Input [3]: [i_item_sk#4, i_current_price#5, i_category#6]
 
-(13) Filter [codegen id : 4]
-Input [3]: [i_item_sk#9, i_current_price#10, i_category#11]
-Condition : (isnotnull(i_current_price#10) AND isnotnull(i_item_sk#9))
+(6) Filter [codegen id : 3]
+Input [3]: [i_item_sk#4, i_current_price#5, i_category#6]
+Condition : (isnotnull(i_current_price#5) AND isnotnull(i_item_sk#4))
 
-(14) Scan parquet default.item
-Output [2]: [i_current_price#10, i_category#11]
+(7) Scan parquet default.item
+Output [2]: [i_current_price#5, i_category#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_category)]
 ReadSchema: struct<i_current_price:decimal(7,2),i_category:string>
 
-(15) ColumnarToRow [codegen id : 2]
-Input [2]: [i_current_price#10, i_category#11]
-
-(16) Filter [codegen id : 2]
-Input [2]: [i_current_price#10, i_category#11]
-Condition : isnotnull(i_category#11)
-
-(17) HashAggregate [codegen id : 2]
-Input [2]: [i_current_price#10, i_category#11]
-Keys [1]: [i_category#11]
-Functions [1]: [partial_avg(UnscaledValue(i_current_price#10))]
-Aggregate Attributes [2]: [sum#12, count#13]
-Results [3]: [i_category#11, sum#14, count#15]
-
-(18) Exchange
-Input [3]: [i_category#11, sum#14, count#15]
-Arguments: hashpartitioning(i_category#11, 5), true, [id=#16]
-
-(19) HashAggregate [codegen id : 3]
-Input [3]: [i_category#11, sum#14, count#15]
-Keys [1]: [i_category#11]
-Functions [1]: [avg(UnscaledValue(i_current_price#10))]
-Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#10))#17]
-Results [2]: [cast((avg(UnscaledValue(i_current_price#10))#17 / 100.0) as decimal(11,6)) AS avg(i_current_price)#18, i_category#11 AS i_category#11#19]
-
-(20) BroadcastExchange
-Input [2]: [avg(i_current_price)#18, i_category#11#19]
-Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#20]
-
-(21) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [i_category#11]
-Right keys [1]: [i_category#11#19]
+(8) ColumnarToRow [codegen id : 1]
+Input [2]: [i_current_price#5, i_category#6]
+
+(9) Filter [codegen id : 1]
+Input [2]: [i_current_price#5, i_category#6]
+Condition : isnotnull(i_category#6)
+
+(10) HashAggregate [codegen id : 1]
+Input [2]: [i_current_price#5, i_category#6]
+Keys [1]: [i_category#6]
+Functions [1]: [partial_avg(UnscaledValue(i_current_price#5))]
+Aggregate Attributes [2]: [sum#7, count#8]
+Results [3]: [i_category#6, sum#9, count#10]
+
+(11) Exchange
+Input [3]: [i_category#6, sum#9, count#10]
+Arguments: hashpartitioning(i_category#6, 5), true, [id=#11]
+
+(12) HashAggregate [codegen id : 2]
+Input [3]: [i_category#6, sum#9, count#10]
+Keys [1]: [i_category#6]
+Functions [1]: [avg(UnscaledValue(i_current_price#5))]
+Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#5))#12]
+Results [2]: [cast((avg(UnscaledValue(i_current_price#5))#12 / 100.0) as decimal(11,6)) AS avg(i_current_price)#13, i_category#6 AS i_category#6#14]
+
+(13) BroadcastExchange
+Input [2]: [avg(i_current_price)#13, i_category#6#14]
+Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#15]
+
+(14) BroadcastHashJoin [codegen id : 3]
+Left keys [1]: [i_category#6]
+Right keys [1]: [i_category#6#14]
+Join condition: None
+
+(15) Filter [codegen id : 3]
+Input [5]: [i_item_sk#4, i_current_price#5, i_category#6, avg(i_current_price)#13, i_category#6#14]
+Condition : (cast(i_current_price#5 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#13)), DecimalType(14,7), true))
+
+(16) Project [codegen id : 3]
+Output [1]: [i_item_sk#4]
+Input [5]: [i_item_sk#4, i_current_price#5, i_category#6, avg(i_current_price)#13, i_category#6#14]
+
+(17) BroadcastExchange
+Input [1]: [i_item_sk#4]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16]
+
+(18) BroadcastHashJoin [codegen id : 5]
+Left keys [1]: [ss_item_sk#2]
+Right keys [1]: [i_item_sk#4]
 Join condition: None
 
+(19) Project [codegen id : 5]
+Output [2]: [ss_sold_date_sk#1, ss_customer_sk#3]
+Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, i_item_sk#4]
+
+(20) Scan parquet default.date_dim
+Output [2]: [d_date_sk#17, d_month_seq#18]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_month_seq:int>
+
+(21) ColumnarToRow [codegen id : 4]
+Input [2]: [d_date_sk#17, d_month_seq#18]
+
 (22) Filter [codegen id : 4]
-Input [5]: [i_item_sk#9, i_current_price#10, i_category#11, avg(i_current_price)#18, i_category#11#19]
-Condition : (cast(i_current_price#10 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#18)), DecimalType(14,7), true))
+Input [2]: [d_date_sk#17, d_month_seq#18]
+Condition : ((isnotnull(d_month_seq#18) AND (d_month_seq#18 = Subquery scalar-subquery#19, [id=#20])) AND isnotnull(d_date_sk#17))
 
 (23) Project [codegen id : 4]
-Output [1]: [i_item_sk#9]
-Input [5]: [i_item_sk#9, i_current_price#10, i_category#11, avg(i_current_price)#18, i_category#11#19]
+Output [1]: [d_date_sk#17]
+Input [2]: [d_date_sk#17, d_month_seq#18]
 
 (24) BroadcastExchange
-Input [1]: [i_item_sk#9]
+Input [1]: [d_date_sk#17]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21]
 
 (25) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [ss_item_sk#2]
-Right keys [1]: [i_item_sk#9]
+Left keys [1]: [ss_sold_date_sk#1]
+Right keys [1]: [d_date_sk#17]
 Join condition: None
 
 (26) Project [codegen id : 5]
 Output [1]: [ss_customer_sk#3]
-Input [3]: [ss_item_sk#2, ss_customer_sk#3, i_item_sk#9]
+Input [3]: [ss_sold_date_sk#1, ss_customer_sk#3, d_date_sk#17]
 
 (27) Exchange
 Input [1]: [ss_customer_sk#3]
@@ -282,7 +282,7 @@ Arguments: 100, [cnt#35 ASC NULLS FIRST, ca_state#24 ASC NULLS FIRST], [state#34
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 6 Hosting Expression = Subquery scalar-subquery#6, [id=#7]
+Subquery:1 Hosting operator id = 22 Hosting Expression = Subquery scalar-subquery#19, [id=#20]
 * HashAggregate (57)
 +- Exchange (56)
    +- * HashAggregate (55)
@@ -293,39 +293,39 @@ Subquery:1 Hosting operator id = 6 Hosting Expression = Subquery scalar-subquery
 
 
 (51) Scan parquet default.date_dim
-Output [3]: [d_month_seq#5, d_year#37, d_moy#38]
+Output [3]: [d_month_seq#18, d_year#37, d_moy#38]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)]
 ReadSchema: struct<d_month_seq:int,d_year:int,d_moy:int>
 
 (52) ColumnarToRow [codegen id : 1]
-Input [3]: [d_month_seq#5, d_year#37, d_moy#38]
+Input [3]: [d_month_seq#18, d_year#37, d_moy#38]
 
 (53) Filter [codegen id : 1]
-Input [3]: [d_month_seq#5, d_year#37, d_moy#38]
+Input [3]: [d_month_seq#18, d_year#37, d_moy#38]
 Condition : (((isnotnull(d_year#37) AND isnotnull(d_moy#38)) AND (d_year#37 = 2000)) AND (d_moy#38 = 1))
 
 (54) Project [codegen id : 1]
-Output [1]: [d_month_seq#5]
-Input [3]: [d_month_seq#5, d_year#37, d_moy#38]
+Output [1]: [d_month_seq#18]
+Input [3]: [d_month_seq#18, d_year#37, d_moy#38]
 
 (55) HashAggregate [codegen id : 1]
-Input [1]: [d_month_seq#5]
-Keys [1]: [d_month_seq#5]
+Input [1]: [d_month_seq#18]
+Keys [1]: [d_month_seq#18]
 Functions: []
 Aggregate Attributes: []
-Results [1]: [d_month_seq#5]
+Results [1]: [d_month_seq#18]
 
 (56) Exchange
-Input [1]: [d_month_seq#5]
-Arguments: hashpartitioning(d_month_seq#5, 5), true, [id=#39]
+Input [1]: [d_month_seq#18]
+Arguments: hashpartitioning(d_month_seq#18, 5), true, [id=#39]
 
 (57) HashAggregate [codegen id : 2]
-Input [1]: [d_month_seq#5]
-Keys [1]: [d_month_seq#5]
+Input [1]: [d_month_seq#18]
+Keys [1]: [d_month_seq#18]
 Functions: []
 Aggregate Attributes: []
-Results [1]: [d_month_seq#5]
+Results [1]: [d_month_seq#18]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/simplified.txt
index 2700741b82c04..3cbd44fc5a7d9 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/simplified.txt
@@ -16,55 +16,55 @@ TakeOrderedAndProject [cnt,ca_state,state]
                               Exchange [ss_customer_sk] #2
                                 WholeStageCodegen (5)
                                   Project [ss_customer_sk]
-                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                      Project [ss_item_sk,ss_customer_sk]
-                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                      Project [ss_sold_date_sk,ss_customer_sk]
+                                        BroadcastHashJoin [ss_item_sk,i_item_sk]
                                           Filter [ss_customer_sk,ss_sold_date_sk,ss_item_sk]
                                             ColumnarToRow
                                               InputAdapter
                                                 Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk]
                                           InputAdapter
                                             BroadcastExchange #3
-                                              WholeStageCodegen (1)
-                                                Project [d_date_sk]
-                                                  Filter [d_month_seq,d_date_sk]
-                                                    Subquery #1
-                                                      WholeStageCodegen (2)
-                                                        HashAggregate [d_month_seq]
+                                              WholeStageCodegen (3)
+                                                Project [i_item_sk]
+                                                  Filter [i_current_price,avg(i_current_price)]
+                                                    BroadcastHashJoin [i_category,i_category]
+                                                      Filter [i_current_price,i_item_sk]
+                                                        ColumnarToRow
                                                           InputAdapter
-                                                            Exchange [d_month_seq] #4
-                                                              WholeStageCodegen (1)
-                                                                HashAggregate [d_month_seq]
-                                                                  Project [d_month_seq]
-                                                                    Filter [d_year,d_moy]
-                                                                      ColumnarToRow
-                                                                        InputAdapter
-                                                                          Scan parquet default.date_dim [d_month_seq,d_year,d_moy]
-                                                    ColumnarToRow
+                                                            Scan parquet default.item [i_item_sk,i_current_price,i_category]
                                                       InputAdapter
-                                                        Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                        BroadcastExchange #4
+                                                          WholeStageCodegen (2)
+                                                            HashAggregate [i_category,sum,count] [avg(UnscaledValue(i_current_price)),avg(i_current_price),i_category,sum,count]
+                                                              InputAdapter
+                                                                Exchange [i_category] #5
+                                                                  WholeStageCodegen (1)
+                                                                    HashAggregate [i_category,i_current_price] [sum,count,sum,count]
+                                                                      Filter [i_category]
+                                                                        ColumnarToRow
+                                                                          InputAdapter
+                                                                            Scan parquet default.item [i_current_price,i_category]
                                       InputAdapter
-                                        BroadcastExchange #5
+                                        BroadcastExchange #6
                                           WholeStageCodegen (4)
-                                            Project [i_item_sk]
-                                              Filter [i_current_price,avg(i_current_price)]
-                                                BroadcastHashJoin [i_category,i_category]
-                                                  Filter [i_current_price,i_item_sk]
-                                                    ColumnarToRow
+                                            Project [d_date_sk]
+                                              Filter [d_month_seq,d_date_sk]
+                                                Subquery #1
+                                                  WholeStageCodegen (2)
+                                                    HashAggregate [d_month_seq]
                                                       InputAdapter
-                                                        Scan parquet default.item [i_item_sk,i_current_price,i_category]
+                                                        Exchange [d_month_seq] #7
+                                                          WholeStageCodegen (1)
+                                                            HashAggregate [d_month_seq]
+                                                              Project [d_month_seq]
+                                                                Filter [d_year,d_moy]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet default.date_dim [d_month_seq,d_year,d_moy]
+                                                ColumnarToRow
                                                   InputAdapter
-                                                    BroadcastExchange #6
-                                                      WholeStageCodegen (3)
-                                                        HashAggregate [i_category,sum,count] [avg(UnscaledValue(i_current_price)),avg(i_current_price),i_category,sum,count]
-                                                          InputAdapter
-                                                            Exchange [i_category] #7
-                                                              WholeStageCodegen (2)
-                                                                HashAggregate [i_category,i_current_price] [sum,count,sum,count]
-                                                                  Filter [i_category]
-                                                                    ColumnarToRow
-                                                                      InputAdapter
-                                                                        Scan parquet default.item [i_current_price,i_category]
+                                                    Scan parquet default.date_dim [d_date_sk,d_month_seq]
                       InputAdapter
                         WholeStageCodegen (12)
                           Sort [c_customer_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt
index c2627bd7e4cc9..a7f328537b7ac 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt
@@ -24,15 +24,15 @@ TakeOrderedAndProject (79)
                :           :     :           :     :           :- * Project (17)
                :           :     :           :     :           :  +- * BroadcastHashJoin Inner BuildRight (16)
                :           :     :           :     :           :     :- * Project (10)
-               :           :     :           :     :           :     :  +- * BroadcastHashJoin Inner BuildLeft (9)
-               :           :     :           :     :           :     :     :- BroadcastExchange (5)
-               :           :     :           :     :           :     :     :  +- * Project (4)
-               :           :     :           :     :           :     :     :     +- * Filter (3)
-               :           :     :           :     :           :     :     :        +- * ColumnarToRow (2)
-               :           :     :           :     :           :     :     :           +- Scan parquet default.household_demographics (1)
-               :           :     :           :     :           :     :     +- * Filter (8)
-               :           :     :           :     :           :     :        +- * ColumnarToRow (7)
-               :           :     :           :     :           :     :           +- Scan parquet default.catalog_sales (6)
+               :           :     :           :     :           :     :  +- * BroadcastHashJoin Inner BuildRight (9)
+               :           :     :           :     :           :     :     :- * Filter (3)
+               :           :     :           :     :           :     :     :  +- * ColumnarToRow (2)
+               :           :     :           :     :           :     :     :     +- Scan parquet default.catalog_sales (1)
+               :           :     :           :     :           :     :     +- BroadcastExchange (8)
+               :           :     :           :     :           :     :        +- * Project (7)
+               :           :     :           :     :           :     :           +- * Filter (6)
+               :           :     :           :     :           :     :              +- * ColumnarToRow (5)
+               :           :     :           :     :           :     :                 +- Scan parquet default.household_demographics (4)
                :           :     :           :     :           :     +- BroadcastExchange (15)
                :           :     :           :     :           :        +- * Project (14)
                :           :     :           :     :           :           +- * Filter (13)
@@ -49,26 +49,26 @@ TakeOrderedAndProject (79)
                :           :     :           :                 +- Scan parquet default.item (26)
                :           :     :           +- BroadcastExchange (43)
                :           :     :              +- * Project (42)
-               :           :     :                 +- * BroadcastHashJoin Inner BuildRight (41)
-               :           :     :                    :- * Filter (35)
-               :           :     :                    :  +- * ColumnarToRow (34)
-               :           :     :                    :     +- Scan parquet default.date_dim (33)
-               :           :     :                    +- BroadcastExchange (40)
-               :           :     :                       +- * Project (39)
-               :           :     :                          +- * Filter (38)
-               :           :     :                             +- * ColumnarToRow (37)
-               :           :     :                                +- Scan parquet default.date_dim (36)
+               :           :     :                 +- * BroadcastHashJoin Inner BuildLeft (41)
+               :           :     :                    :- BroadcastExchange (37)
+               :           :     :                    :  +- * Project (36)
+               :           :     :                    :     +- * Filter (35)
+               :           :     :                    :        +- * ColumnarToRow (34)
+               :           :     :                    :           +- Scan parquet default.date_dim (33)
+               :           :     :                    +- * Filter (40)
+               :           :     :                       +- * ColumnarToRow (39)
+               :           :     :                          +- Scan parquet default.date_dim (38)
                :           :     +- * Sort (58)
                :           :        +- Exchange (57)
                :           :           +- * Project (56)
-               :           :              +- * BroadcastHashJoin Inner BuildLeft (55)
-               :           :                 :- BroadcastExchange (51)
-               :           :                 :  +- * Filter (50)
-               :           :                 :     +- * ColumnarToRow (49)
-               :           :                 :        +- Scan parquet default.warehouse (48)
-               :           :                 +- * Filter (54)
-               :           :                    +- * ColumnarToRow (53)
-               :           :                       +- Scan parquet default.inventory (52)
+               :           :              +- * BroadcastHashJoin Inner BuildRight (55)
+               :           :                 :- * Filter (50)
+               :           :                 :  +- * ColumnarToRow (49)
+               :           :                 :     +- Scan parquet default.inventory (48)
+               :           :                 +- BroadcastExchange (54)
+               :           :                    +- * Filter (53)
+               :           :                       +- * ColumnarToRow (52)
+               :           :                          +- Scan parquet default.warehouse (51)
                :           +- BroadcastExchange (64)
                :              +- * Filter (63)
                :                 +- * ColumnarToRow (62)
@@ -80,50 +80,50 @@ TakeOrderedAndProject (79)
                            +- Scan parquet default.catalog_returns (69)
 
 
-(1) Scan parquet default.household_demographics
-Output [2]: [hd_demo_sk#1, hd_buy_potential#2]
+(1) Scan parquet default.catalog_sales
+Output [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/household_demographics]
-PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,1001-5000), IsNotNull(hd_demo_sk)]
-ReadSchema: struct<hd_demo_sk:int,hd_buy_potential:string>
+Location [not included in comparison]/{warehouse_dir}/catalog_sales]
+PushedFilters: [IsNotNull(cs_quantity), IsNotNull(cs_item_sk), IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_hdemo_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_ship_date_sk)]
+ReadSchema: struct<cs_sold_date_sk:int,cs_ship_date_sk:int,cs_bill_cdemo_sk:int,cs_bill_hdemo_sk:int,cs_item_sk:int,cs_promo_sk:int,cs_order_number:int,cs_quantity:int>
 
-(2) ColumnarToRow [codegen id : 1]
-Input [2]: [hd_demo_sk#1, hd_buy_potential#2]
+(2) ColumnarToRow [codegen id : 4]
+Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8]
 
-(3) Filter [codegen id : 1]
-Input [2]: [hd_demo_sk#1, hd_buy_potential#2]
-Condition : ((isnotnull(hd_buy_potential#2) AND (hd_buy_potential#2 = 1001-5000)) AND isnotnull(hd_demo_sk#1))
+(3) Filter [codegen id : 4]
+Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8]
+Condition : (((((isnotnull(cs_quantity#8) AND isnotnull(cs_item_sk#5)) AND isnotnull(cs_bill_cdemo_sk#3)) AND isnotnull(cs_bill_hdemo_sk#4)) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_ship_date_sk#2))
 
-(4) Project [codegen id : 1]
-Output [1]: [hd_demo_sk#1]
-Input [2]: [hd_demo_sk#1, hd_buy_potential#2]
+(4) Scan parquet default.household_demographics
+Output [2]: [hd_demo_sk#9, hd_buy_potential#10]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/household_demographics]
+PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,1001-5000), IsNotNull(hd_demo_sk)]
+ReadSchema: struct<hd_demo_sk:int,hd_buy_potential:string>
 
-(5) BroadcastExchange
-Input [1]: [hd_demo_sk#1]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3]
+(5) ColumnarToRow [codegen id : 1]
+Input [2]: [hd_demo_sk#9, hd_buy_potential#10]
 
-(6) Scan parquet default.catalog_sales
-Output [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_bill_hdemo_sk#7, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/catalog_sales]
-PushedFilters: [IsNotNull(cs_quantity), IsNotNull(cs_item_sk), IsNotNull(cs_bill_cdemo_sk), IsNotNull(cs_bill_hdemo_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_ship_date_sk)]
-ReadSchema: struct<cs_sold_date_sk:int,cs_ship_date_sk:int,cs_bill_cdemo_sk:int,cs_bill_hdemo_sk:int,cs_item_sk:int,cs_promo_sk:int,cs_order_number:int,cs_quantity:int>
+(6) Filter [codegen id : 1]
+Input [2]: [hd_demo_sk#9, hd_buy_potential#10]
+Condition : ((isnotnull(hd_buy_potential#10) AND (hd_buy_potential#10 = 1001-5000)) AND isnotnull(hd_demo_sk#9))
 
-(7) ColumnarToRow
-Input [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_bill_hdemo_sk#7, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11]
+(7) Project [codegen id : 1]
+Output [1]: [hd_demo_sk#9]
+Input [2]: [hd_demo_sk#9, hd_buy_potential#10]
 
-(8) Filter
-Input [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_bill_hdemo_sk#7, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11]
-Condition : (((((isnotnull(cs_quantity#11) AND isnotnull(cs_item_sk#8)) AND isnotnull(cs_bill_cdemo_sk#6)) AND isnotnull(cs_bill_hdemo_sk#7)) AND isnotnull(cs_sold_date_sk#4)) AND isnotnull(cs_ship_date_sk#5))
+(8) BroadcastExchange
+Input [1]: [hd_demo_sk#9]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11]
 
 (9) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [hd_demo_sk#1]
-Right keys [1]: [cs_bill_hdemo_sk#7]
+Left keys [1]: [cs_bill_hdemo_sk#4]
+Right keys [1]: [hd_demo_sk#9]
 Join condition: None
 
 (10) Project [codegen id : 4]
-Output [7]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11]
-Input [9]: [hd_demo_sk#1, cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_bill_hdemo_sk#7, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11]
+Output [7]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8]
+Input [9]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_bill_hdemo_sk#4, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, hd_demo_sk#9]
 
 (11) Scan parquet default.customer_demographics
 Output [2]: [cd_demo_sk#12, cd_marital_status#13]
@@ -148,13 +148,13 @@ Input [1]: [cd_demo_sk#12]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14]
 
 (16) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [cs_bill_cdemo_sk#6]
+Left keys [1]: [cs_bill_cdemo_sk#3]
 Right keys [1]: [cd_demo_sk#12]
 Join condition: None
 
 (17) Project [codegen id : 4]
-Output [6]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11]
-Input [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_bill_cdemo_sk#6, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, cd_demo_sk#12]
+Output [6]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8]
+Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, cd_demo_sk#12]
 
 (18) Scan parquet default.date_dim
 Output [2]: [d_date_sk#15, d_date#16]
@@ -175,21 +175,21 @@ Input [2]: [d_date_sk#15, d_date#16]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17]
 
 (22) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [cs_ship_date_sk#5]
+Left keys [1]: [cs_ship_date_sk#2]
 Right keys [1]: [d_date_sk#15]
 Join condition: None
 
 (23) Project [codegen id : 4]
-Output [6]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16]
-Input [8]: [cs_sold_date_sk#4, cs_ship_date_sk#5, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date_sk#15, d_date#16]
+Output [6]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16]
+Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date_sk#15, d_date#16]
 
 (24) Exchange
-Input [6]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16]
-Arguments: hashpartitioning(cs_item_sk#8, 5), true, [id=#18]
+Input [6]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16]
+Arguments: hashpartitioning(cs_item_sk#5, 5), true, [id=#18]
 
 (25) Sort [codegen id : 5]
-Input [6]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16]
-Arguments: [cs_item_sk#8 ASC NULLS FIRST], false, 0
+Input [6]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16]
+Arguments: [cs_item_sk#5 ASC NULLS FIRST], false, 0
 
 (26) Scan parquet default.item
 Output [2]: [i_item_sk#19, i_item_desc#20]
@@ -214,137 +214,137 @@ Input [2]: [i_item_sk#19, i_item_desc#20]
 Arguments: [i_item_sk#19 ASC NULLS FIRST], false, 0
 
 (31) SortMergeJoin [codegen id : 10]
-Left keys [1]: [cs_item_sk#8]
+Left keys [1]: [cs_item_sk#5]
 Right keys [1]: [i_item_sk#19]
 Join condition: None
 
 (32) Project [codegen id : 10]
-Output [7]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16, i_item_desc#20]
-Input [8]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16, i_item_sk#19, i_item_desc#20]
+Output [7]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20]
+Input [8]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_sk#19, i_item_desc#20]
 
 (33) Scan parquet default.date_dim
-Output [2]: [d_date_sk#22, d_week_seq#23]
+Output [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_week_seq:int>
+PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)]
+ReadSchema: struct<d_date_sk:int,d_date:date,d_week_seq:int,d_year:int>
 
-(34) ColumnarToRow [codegen id : 9]
-Input [2]: [d_date_sk#22, d_week_seq#23]
+(34) ColumnarToRow [codegen id : 8]
+Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25]
 
-(35) Filter [codegen id : 9]
-Input [2]: [d_date_sk#22, d_week_seq#23]
-Condition : (isnotnull(d_week_seq#23) AND isnotnull(d_date_sk#22))
+(35) Filter [codegen id : 8]
+Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25]
+Condition : ((((isnotnull(d_year#25) AND (d_year#25 = 2001)) AND isnotnull(d_date_sk#22)) AND isnotnull(d_week_seq#24)) AND isnotnull(d_date#23))
 
-(36) Scan parquet default.date_dim
-Output [4]: [d_date_sk#24, d_date#25, d_week_seq#26, d_year#27]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)]
-ReadSchema: struct<d_date_sk:int,d_date:date,d_week_seq:int,d_year:int>
+(36) Project [codegen id : 8]
+Output [3]: [d_date_sk#22, d_date#23, d_week_seq#24]
+Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25]
 
-(37) ColumnarToRow [codegen id : 8]
-Input [4]: [d_date_sk#24, d_date#25, d_week_seq#26, d_year#27]
+(37) BroadcastExchange
+Input [3]: [d_date_sk#22, d_date#23, d_week_seq#24]
+Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#26]
 
-(38) Filter [codegen id : 8]
-Input [4]: [d_date_sk#24, d_date#25, d_week_seq#26, d_year#27]
-Condition : ((((isnotnull(d_year#27) AND (d_year#27 = 2001)) AND isnotnull(d_date_sk#24)) AND isnotnull(d_week_seq#26)) AND isnotnull(d_date#25))
+(38) Scan parquet default.date_dim
+Output [2]: [d_date_sk#27, d_week_seq#28]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_week_seq:int>
 
-(39) Project [codegen id : 8]
-Output [3]: [d_date_sk#24, d_date#25, d_week_seq#26]
-Input [4]: [d_date_sk#24, d_date#25, d_week_seq#26, d_year#27]
+(39) ColumnarToRow
+Input [2]: [d_date_sk#27, d_week_seq#28]
 
-(40) BroadcastExchange
-Input [3]: [d_date_sk#24, d_date#25, d_week_seq#26]
-Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#28]
+(40) Filter
+Input [2]: [d_date_sk#27, d_week_seq#28]
+Condition : (isnotnull(d_week_seq#28) AND isnotnull(d_date_sk#27))
 
 (41) BroadcastHashJoin [codegen id : 9]
-Left keys [1]: [d_week_seq#23]
-Right keys [1]: [d_week_seq#26]
+Left keys [1]: [d_week_seq#24]
+Right keys [1]: [d_week_seq#28]
 Join condition: None
 
 (42) Project [codegen id : 9]
-Output [4]: [d_date_sk#22, d_date_sk#24, d_date#25, d_week_seq#26]
-Input [5]: [d_date_sk#22, d_week_seq#23, d_date_sk#24, d_date#25, d_week_seq#26]
+Output [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27]
+Input [5]: [d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27, d_week_seq#28]
 
 (43) BroadcastExchange
-Input [4]: [d_date_sk#22, d_date_sk#24, d_date#25, d_week_seq#26]
-Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#29]
+Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29]
 
 (44) BroadcastHashJoin [codegen id : 10]
-Left keys [1]: [cs_sold_date_sk#4]
-Right keys [1]: [d_date_sk#24]
-Join condition: (d_date#16 > d_date#25 + 5 days)
+Left keys [1]: [cs_sold_date_sk#1]
+Right keys [1]: [d_date_sk#22]
+Join condition: (d_date#16 > d_date#23 + 5 days)
 
 (45) Project [codegen id : 10]
-Output [7]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, i_item_desc#20, d_date_sk#22, d_week_seq#26]
-Input [11]: [cs_sold_date_sk#4, cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, d_date#16, i_item_desc#20, d_date_sk#22, d_date_sk#24, d_date#25, d_week_seq#26]
+Output [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27]
+Input [11]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20, d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27]
 
 (46) Exchange
-Input [7]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, i_item_desc#20, d_date_sk#22, d_week_seq#26]
-Arguments: hashpartitioning(cs_item_sk#8, d_date_sk#22, 5), true, [id=#30]
+Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27]
+Arguments: hashpartitioning(cs_item_sk#5, d_date_sk#27, 5), true, [id=#30]
 
 (47) Sort [codegen id : 11]
-Input [7]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, i_item_desc#20, d_date_sk#22, d_week_seq#26]
-Arguments: [cs_item_sk#8 ASC NULLS FIRST, d_date_sk#22 ASC NULLS FIRST], false, 0
+Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27]
+Arguments: [cs_item_sk#5 ASC NULLS FIRST, d_date_sk#27 ASC NULLS FIRST], false, 0
 
-(48) Scan parquet default.warehouse
-Output [2]: [w_warehouse_sk#31, w_warehouse_name#32]
+(48) Scan parquet default.inventory
+Output [4]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/warehouse]
-PushedFilters: [IsNotNull(w_warehouse_sk)]
-ReadSchema: struct<w_warehouse_sk:int,w_warehouse_name:string>
-
-(49) ColumnarToRow [codegen id : 12]
-Input [2]: [w_warehouse_sk#31, w_warehouse_name#32]
+Location [not included in comparison]/{warehouse_dir}/inventory]
+PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)]
+ReadSchema: struct<inv_date_sk:int,inv_item_sk:int,inv_warehouse_sk:int,inv_quantity_on_hand:int>
 
-(50) Filter [codegen id : 12]
-Input [2]: [w_warehouse_sk#31, w_warehouse_name#32]
-Condition : isnotnull(w_warehouse_sk#31)
+(49) ColumnarToRow [codegen id : 13]
+Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34]
 
-(51) BroadcastExchange
-Input [2]: [w_warehouse_sk#31, w_warehouse_name#32]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#33]
+(50) Filter [codegen id : 13]
+Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34]
+Condition : (((isnotnull(inv_quantity_on_hand#34) AND isnotnull(inv_item_sk#32)) AND isnotnull(inv_warehouse_sk#33)) AND isnotnull(inv_date_sk#31))
 
-(52) Scan parquet default.inventory
-Output [4]: [inv_date_sk#34, inv_item_sk#35, inv_warehouse_sk#36, inv_quantity_on_hand#37]
+(51) Scan parquet default.warehouse
+Output [2]: [w_warehouse_sk#35, w_warehouse_name#36]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/inventory]
-PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)]
-ReadSchema: struct<inv_date_sk:int,inv_item_sk:int,inv_warehouse_sk:int,inv_quantity_on_hand:int>
+Location [not included in comparison]/{warehouse_dir}/warehouse]
+PushedFilters: [IsNotNull(w_warehouse_sk)]
+ReadSchema: struct<w_warehouse_sk:int,w_warehouse_name:string>
+
+(52) ColumnarToRow [codegen id : 12]
+Input [2]: [w_warehouse_sk#35, w_warehouse_name#36]
 
-(53) ColumnarToRow
-Input [4]: [inv_date_sk#34, inv_item_sk#35, inv_warehouse_sk#36, inv_quantity_on_hand#37]
+(53) Filter [codegen id : 12]
+Input [2]: [w_warehouse_sk#35, w_warehouse_name#36]
+Condition : isnotnull(w_warehouse_sk#35)
 
-(54) Filter
-Input [4]: [inv_date_sk#34, inv_item_sk#35, inv_warehouse_sk#36, inv_quantity_on_hand#37]
-Condition : (((isnotnull(inv_quantity_on_hand#37) AND isnotnull(inv_item_sk#35)) AND isnotnull(inv_warehouse_sk#36)) AND isnotnull(inv_date_sk#34))
+(54) BroadcastExchange
+Input [2]: [w_warehouse_sk#35, w_warehouse_name#36]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#37]
 
 (55) BroadcastHashJoin [codegen id : 13]
-Left keys [1]: [w_warehouse_sk#31]
-Right keys [1]: [inv_warehouse_sk#36]
+Left keys [1]: [inv_warehouse_sk#33]
+Right keys [1]: [w_warehouse_sk#35]
 Join condition: None
 
 (56) Project [codegen id : 13]
-Output [4]: [w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_quantity_on_hand#37]
-Input [6]: [w_warehouse_sk#31, w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_warehouse_sk#36, inv_quantity_on_hand#37]
+Output [4]: [inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36]
+Input [6]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34, w_warehouse_sk#35, w_warehouse_name#36]
 
 (57) Exchange
-Input [4]: [w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_quantity_on_hand#37]
-Arguments: hashpartitioning(inv_item_sk#35, inv_date_sk#34, 5), true, [id=#38]
+Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36]
+Arguments: hashpartitioning(inv_item_sk#32, inv_date_sk#31, 5), true, [id=#38]
 
 (58) Sort [codegen id : 14]
-Input [4]: [w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_quantity_on_hand#37]
-Arguments: [inv_item_sk#35 ASC NULLS FIRST, inv_date_sk#34 ASC NULLS FIRST], false, 0
+Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36]
+Arguments: [inv_item_sk#32 ASC NULLS FIRST, inv_date_sk#31 ASC NULLS FIRST], false, 0
 
 (59) SortMergeJoin [codegen id : 16]
-Left keys [2]: [cs_item_sk#8, d_date_sk#22]
-Right keys [2]: [inv_item_sk#35, inv_date_sk#34]
-Join condition: (inv_quantity_on_hand#37 < cs_quantity#11)
+Left keys [2]: [cs_item_sk#5, d_date_sk#27]
+Right keys [2]: [inv_item_sk#32, inv_date_sk#31]
+Join condition: (inv_quantity_on_hand#34 < cs_quantity#8)
 
 (60) Project [codegen id : 16]
-Output [6]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26]
-Input [11]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, cs_quantity#11, i_item_desc#20, d_date_sk#22, d_week_seq#26, w_warehouse_name#32, inv_date_sk#34, inv_item_sk#35, inv_quantity_on_hand#37]
+Output [6]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24]
+Input [11]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27, inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36]
 
 (61) Scan parquet default.promotion
 Output [1]: [p_promo_sk#39]
@@ -365,21 +365,21 @@ Input [1]: [p_promo_sk#39]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#40]
 
 (65) BroadcastHashJoin [codegen id : 16]
-Left keys [1]: [cs_promo_sk#9]
+Left keys [1]: [cs_promo_sk#6]
 Right keys [1]: [p_promo_sk#39]
 Join condition: None
 
 (66) Project [codegen id : 16]
-Output [5]: [cs_item_sk#8, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26]
-Input [7]: [cs_item_sk#8, cs_promo_sk#9, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26, p_promo_sk#39]
+Output [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24]
+Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24, p_promo_sk#39]
 
 (67) Exchange
-Input [5]: [cs_item_sk#8, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26]
-Arguments: hashpartitioning(cs_item_sk#8, cs_order_number#10, 5), true, [id=#41]
+Input [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24]
+Arguments: hashpartitioning(cs_item_sk#5, cs_order_number#7, 5), true, [id=#41]
 
 (68) Sort [codegen id : 17]
-Input [5]: [cs_item_sk#8, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26]
-Arguments: [cs_item_sk#8 ASC NULLS FIRST, cs_order_number#10 ASC NULLS FIRST], false, 0
+Input [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24]
+Arguments: [cs_item_sk#5 ASC NULLS FIRST, cs_order_number#7 ASC NULLS FIRST], false, 0
 
 (69) Scan parquet default.catalog_returns
 Output [2]: [cr_item_sk#42, cr_order_number#43]
@@ -404,33 +404,33 @@ Input [2]: [cr_item_sk#42, cr_order_number#43]
 Arguments: [cr_item_sk#42 ASC NULLS FIRST, cr_order_number#43 ASC NULLS FIRST], false, 0
 
 (74) SortMergeJoin
-Left keys [2]: [cs_item_sk#8, cs_order_number#10]
+Left keys [2]: [cs_item_sk#5, cs_order_number#7]
 Right keys [2]: [cr_item_sk#42, cr_order_number#43]
 Join condition: None
 
 (75) Project [codegen id : 20]
-Output [3]: [w_warehouse_name#32, i_item_desc#20, d_week_seq#26]
-Input [7]: [cs_item_sk#8, cs_order_number#10, w_warehouse_name#32, i_item_desc#20, d_week_seq#26, cr_item_sk#42, cr_order_number#43]
+Output [3]: [w_warehouse_name#36, i_item_desc#20, d_week_seq#24]
+Input [7]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24, cr_item_sk#42, cr_order_number#43]
 
 (76) HashAggregate [codegen id : 20]
-Input [3]: [w_warehouse_name#32, i_item_desc#20, d_week_seq#26]
-Keys [3]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26]
+Input [3]: [w_warehouse_name#36, i_item_desc#20, d_week_seq#24]
+Keys [3]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24]
 Functions [1]: [partial_count(1)]
 Aggregate Attributes [1]: [count#45]
-Results [4]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, count#46]
+Results [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count#46]
 
 (77) Exchange
-Input [4]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, count#46]
-Arguments: hashpartitioning(i_item_desc#20, w_warehouse_name#32, d_week_seq#26, 5), true, [id=#47]
+Input [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count#46]
+Arguments: hashpartitioning(i_item_desc#20, w_warehouse_name#36, d_week_seq#24, 5), true, [id=#47]
 
 (78) HashAggregate [codegen id : 21]
-Input [4]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, count#46]
-Keys [3]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26]
+Input [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count#46]
+Keys [3]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24]
 Functions [1]: [count(1)]
 Aggregate Attributes [1]: [count(1)#48]
-Results [6]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, count(1)#48 AS no_promo#49, count(1)#48 AS promo#50, count(1)#48 AS total_cnt#51]
+Results [6]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count(1)#48 AS no_promo#49, count(1)#48 AS promo#50, count(1)#48 AS total_cnt#51]
 
 (79) TakeOrderedAndProject
-Input [6]: [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, no_promo#49, promo#50, total_cnt#51]
-Arguments: 100, [total_cnt#51 DESC NULLS LAST, i_item_desc#20 ASC NULLS FIRST, w_warehouse_name#32 ASC NULLS FIRST, d_week_seq#26 ASC NULLS FIRST], [i_item_desc#20, w_warehouse_name#32, d_week_seq#26, no_promo#49, promo#50, total_cnt#51]
+Input [6]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, no_promo#49, promo#50, total_cnt#51]
+Arguments: 100, [total_cnt#51 DESC NULLS LAST, i_item_desc#20 ASC NULLS FIRST, w_warehouse_name#36 ASC NULLS FIRST, d_week_seq#24 ASC NULLS FIRST], [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, no_promo#49, promo#50, total_cnt#51]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/simplified.txt
index 39dba3af02359..918508787c4b0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/simplified.txt
@@ -23,7 +23,7 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                             InputAdapter
                                               Exchange [cs_item_sk,d_date_sk] #3
                                                 WholeStageCodegen (10)
-                                                  Project [cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,i_item_desc,d_date_sk,d_week_seq]
+                                                  Project [cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,i_item_desc,d_week_seq,d_date_sk]
                                                     BroadcastHashJoin [cs_sold_date_sk,d_date_sk,d_date,d_date]
                                                       Project [cs_sold_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,d_date,i_item_desc]
                                                         SortMergeJoin [cs_item_sk,i_item_sk]
@@ -38,7 +38,11 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                                           Project [cs_sold_date_sk,cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity]
                                                                             BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk]
                                                                               Project [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity]
-                                                                                BroadcastHashJoin [hd_demo_sk,cs_bill_hdemo_sk]
+                                                                                BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk]
+                                                                                  Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_sold_date_sk,cs_ship_date_sk]
+                                                                                    ColumnarToRow
+                                                                                      InputAdapter
+                                                                                        Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity]
                                                                                   InputAdapter
                                                                                     BroadcastExchange #5
                                                                                       WholeStageCodegen (1)
@@ -47,10 +51,6 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                                                             ColumnarToRow
                                                                                               InputAdapter
                                                                                                 Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential]
-                                                                                  Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_sold_date_sk,cs_ship_date_sk]
-                                                                                    ColumnarToRow
-                                                                                      InputAdapter
-                                                                                        Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity]
                                                                               InputAdapter
                                                                                 BroadcastExchange #6
                                                                                   WholeStageCodegen (2)
@@ -79,12 +79,8 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                       InputAdapter
                                                         BroadcastExchange #9
                                                           WholeStageCodegen (9)
-                                                            Project [d_date_sk,d_date_sk,d_date,d_week_seq]
+                                                            Project [d_date_sk,d_date,d_week_seq,d_date_sk]
                                                               BroadcastHashJoin [d_week_seq,d_week_seq]
-                                                                Filter [d_week_seq,d_date_sk]
-                                                                  ColumnarToRow
-                                                                    InputAdapter
-                                                                      Scan parquet default.date_dim [d_date_sk,d_week_seq]
                                                                 InputAdapter
                                                                   BroadcastExchange #10
                                                                     WholeStageCodegen (8)
@@ -93,14 +89,22 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                                           ColumnarToRow
                                                                             InputAdapter
                                                                               Scan parquet default.date_dim [d_date_sk,d_date,d_week_seq,d_year]
+                                                                Filter [d_week_seq,d_date_sk]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet default.date_dim [d_date_sk,d_week_seq]
                                       InputAdapter
                                         WholeStageCodegen (14)
                                           Sort [inv_item_sk,inv_date_sk]
                                             InputAdapter
                                               Exchange [inv_item_sk,inv_date_sk] #11
                                                 WholeStageCodegen (13)
-                                                  Project [w_warehouse_name,inv_date_sk,inv_item_sk,inv_quantity_on_hand]
-                                                    BroadcastHashJoin [w_warehouse_sk,inv_warehouse_sk]
+                                                  Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,w_warehouse_name]
+                                                    BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk]
+                                                      Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk,inv_date_sk]
+                                                        ColumnarToRow
+                                                          InputAdapter
+                                                            Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand]
                                                       InputAdapter
                                                         BroadcastExchange #12
                                                           WholeStageCodegen (12)
@@ -108,10 +112,6 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                               ColumnarToRow
                                                                 InputAdapter
                                                                   Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name]
-                                                      Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk,inv_date_sk]
-                                                        ColumnarToRow
-                                                          InputAdapter
-                                                            Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand]
                                   InputAdapter
                                     BroadcastExchange #13
                                       WholeStageCodegen (15)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt
index e6210f4a26281..025e5a6f94741 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt
@@ -44,12 +44,12 @@ TakeOrderedAndProject (125)
             :           :           :              :     :        +- * Project (23)
             :           :           :              :     :           +- * Filter (22)
             :           :           :              :     :              +- * ColumnarToRow (21)
-            :           :           :              :     :                 +- Scan parquet default.date_dim (20)
+            :           :           :              :     :                 +- Scan parquet default.promotion (20)
             :           :           :              :     +- BroadcastExchange (31)
             :           :           :              :        +- * Project (30)
             :           :           :              :           +- * Filter (29)
             :           :           :              :              +- * ColumnarToRow (28)
-            :           :           :              :                 +- Scan parquet default.promotion (27)
+            :           :           :              :                 +- Scan parquet default.date_dim (27)
             :           :           :              +- BroadcastExchange (37)
             :           :           :                 +- * Filter (36)
             :           :           :                    +- * ColumnarToRow (35)
@@ -210,67 +210,67 @@ Join condition: None
 Output [7]: [ss_sold_date_sk#1, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12]
 Input [9]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, i_item_sk#14]
 
-(20) Scan parquet default.date_dim
-Output [2]: [d_date_sk#17, d_date#18]
+(20) Scan parquet default.promotion
+Output [2]: [p_promo_sk#17, p_channel_tv#18]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-09-03), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_date:date>
+Location [not included in comparison]/{warehouse_dir}/promotion]
+PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)]
+ReadSchema: struct<p_promo_sk:int,p_channel_tv:string>
 
 (21) ColumnarToRow [codegen id : 6]
-Input [2]: [d_date_sk#17, d_date#18]
+Input [2]: [p_promo_sk#17, p_channel_tv#18]
 
 (22) Filter [codegen id : 6]
-Input [2]: [d_date_sk#17, d_date#18]
-Condition : (((isnotnull(d_date#18) AND (d_date#18 >= 10442)) AND (d_date#18 <= 10472)) AND isnotnull(d_date_sk#17))
+Input [2]: [p_promo_sk#17, p_channel_tv#18]
+Condition : ((isnotnull(p_channel_tv#18) AND (p_channel_tv#18 = N)) AND isnotnull(p_promo_sk#17))
 
 (23) Project [codegen id : 6]
-Output [1]: [d_date_sk#17]
-Input [2]: [d_date_sk#17, d_date#18]
+Output [1]: [p_promo_sk#17]
+Input [2]: [p_promo_sk#17, p_channel_tv#18]
 
 (24) BroadcastExchange
-Input [1]: [d_date_sk#17]
+Input [1]: [p_promo_sk#17]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19]
 
 (25) BroadcastHashJoin [codegen id : 9]
-Left keys [1]: [ss_sold_date_sk#1]
-Right keys [1]: [d_date_sk#17]
+Left keys [1]: [ss_promo_sk#4]
+Right keys [1]: [p_promo_sk#17]
 Join condition: None
 
 (26) Project [codegen id : 9]
-Output [6]: [ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12]
-Input [8]: [ss_sold_date_sk#1, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, d_date_sk#17]
+Output [6]: [ss_sold_date_sk#1, ss_store_sk#3, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12]
+Input [8]: [ss_sold_date_sk#1, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, p_promo_sk#17]
 
-(27) Scan parquet default.promotion
-Output [2]: [p_promo_sk#20, p_channel_tv#21]
+(27) Scan parquet default.date_dim
+Output [2]: [d_date_sk#20, d_date#21]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/promotion]
-PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)]
-ReadSchema: struct<p_promo_sk:int,p_channel_tv:string>
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-09-03), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_date:date>
 
 (28) ColumnarToRow [codegen id : 7]
-Input [2]: [p_promo_sk#20, p_channel_tv#21]
+Input [2]: [d_date_sk#20, d_date#21]
 
 (29) Filter [codegen id : 7]
-Input [2]: [p_promo_sk#20, p_channel_tv#21]
-Condition : ((isnotnull(p_channel_tv#21) AND (p_channel_tv#21 = N)) AND isnotnull(p_promo_sk#20))
+Input [2]: [d_date_sk#20, d_date#21]
+Condition : (((isnotnull(d_date#21) AND (d_date#21 >= 10442)) AND (d_date#21 <= 10472)) AND isnotnull(d_date_sk#20))
 
 (30) Project [codegen id : 7]
-Output [1]: [p_promo_sk#20]
-Input [2]: [p_promo_sk#20, p_channel_tv#21]
+Output [1]: [d_date_sk#20]
+Input [2]: [d_date_sk#20, d_date#21]
 
 (31) BroadcastExchange
-Input [1]: [p_promo_sk#20]
+Input [1]: [d_date_sk#20]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22]
 
 (32) BroadcastHashJoin [codegen id : 9]
-Left keys [1]: [ss_promo_sk#4]
-Right keys [1]: [p_promo_sk#20]
+Left keys [1]: [ss_sold_date_sk#1]
+Right keys [1]: [d_date_sk#20]
 Join condition: None
 
 (33) Project [codegen id : 9]
 Output [5]: [ss_store_sk#3, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12]
-Input [7]: [ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, p_promo_sk#20]
+Input [7]: [ss_sold_date_sk#1, ss_store_sk#3, ss_ext_sales_price#6, ss_net_profit#7, sr_return_amt#11, sr_net_loss#12, d_date_sk#20]
 
 (34) Scan parquet default.store
 Output [2]: [s_store_sk#23, s_store_id#24]
@@ -383,28 +383,28 @@ Output [7]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_s
 Input [9]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_item_sk#47, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, i_item_sk#14]
 
 (58) ReusedExchange [Reuses operator id: 24]
-Output [1]: [d_date_sk#17]
+Output [1]: [p_promo_sk#17]
 
 (59) BroadcastHashJoin [codegen id : 19]
-Left keys [1]: [cs_sold_date_sk#45]
-Right keys [1]: [d_date_sk#17]
+Left keys [1]: [cs_promo_sk#48]
+Right keys [1]: [p_promo_sk#17]
 Join condition: None
 
 (60) Project [codegen id : 19]
-Output [6]: [cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56]
-Input [8]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, d_date_sk#17]
+Output [6]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56]
+Input [8]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, p_promo_sk#17]
 
 (61) ReusedExchange [Reuses operator id: 31]
-Output [1]: [p_promo_sk#20]
+Output [1]: [d_date_sk#20]
 
 (62) BroadcastHashJoin [codegen id : 19]
-Left keys [1]: [cs_promo_sk#48]
-Right keys [1]: [p_promo_sk#20]
+Left keys [1]: [cs_sold_date_sk#45]
+Right keys [1]: [d_date_sk#20]
 Join condition: None
 
 (63) Project [codegen id : 19]
 Output [5]: [cs_catalog_page_sk#46, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56]
-Input [7]: [cs_catalog_page_sk#46, cs_promo_sk#48, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, p_promo_sk#20]
+Input [7]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#50, cs_net_profit#51, cr_return_amount#55, cr_net_loss#56, d_date_sk#20]
 
 (64) Scan parquet default.catalog_page
 Output [2]: [cp_catalog_page_sk#58, cp_catalog_page_id#59]
@@ -517,28 +517,28 @@ Output [7]: [ws_sold_date_sk#80, ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales
 Input [9]: [ws_sold_date_sk#80, ws_item_sk#81, ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, i_item_sk#14]
 
 (88) ReusedExchange [Reuses operator id: 24]
-Output [1]: [d_date_sk#17]
+Output [1]: [p_promo_sk#17]
 
 (89) BroadcastHashJoin [codegen id : 29]
-Left keys [1]: [ws_sold_date_sk#80]
-Right keys [1]: [d_date_sk#17]
+Left keys [1]: [ws_promo_sk#83]
+Right keys [1]: [p_promo_sk#17]
 Join condition: None
 
 (90) Project [codegen id : 29]
-Output [6]: [ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91]
-Input [8]: [ws_sold_date_sk#80, ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, d_date_sk#17]
+Output [6]: [ws_sold_date_sk#80, ws_web_site_sk#82, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91]
+Input [8]: [ws_sold_date_sk#80, ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, p_promo_sk#17]
 
 (91) ReusedExchange [Reuses operator id: 31]
-Output [1]: [p_promo_sk#20]
+Output [1]: [d_date_sk#20]
 
 (92) BroadcastHashJoin [codegen id : 29]
-Left keys [1]: [ws_promo_sk#83]
-Right keys [1]: [p_promo_sk#20]
+Left keys [1]: [ws_sold_date_sk#80]
+Right keys [1]: [d_date_sk#20]
 Join condition: None
 
 (93) Project [codegen id : 29]
 Output [5]: [ws_web_site_sk#82, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91]
-Input [7]: [ws_web_site_sk#82, ws_promo_sk#83, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, p_promo_sk#20]
+Input [7]: [ws_sold_date_sk#80, ws_web_site_sk#82, ws_ext_sales_price#85, ws_net_profit#86, wr_return_amt#90, wr_net_loss#91, d_date_sk#20]
 
 (94) Scan parquet default.web_site
 Output [2]: [web_site_sk#93, web_site_id#94]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt
index 13781c8bd5993..ad59968740aaa 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt
@@ -32,9 +32,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                               Project [ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id]
                                                                 BroadcastHashJoin [ss_store_sk,s_store_sk]
                                                                   Project [ss_store_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss]
-                                                                    BroadcastHashJoin [ss_promo_sk,p_promo_sk]
-                                                                      Project [ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss]
-                                                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                      Project [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss]
+                                                                        BroadcastHashJoin [ss_promo_sk,p_promo_sk]
                                                                           Project [ss_sold_date_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss]
                                                                             BroadcastHashJoin [ss_item_sk,i_item_sk]
                                                                               Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss]
@@ -69,19 +69,19 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                           InputAdapter
                                                                             BroadcastExchange #8
                                                                               WholeStageCodegen (6)
-                                                                                Project [d_date_sk]
-                                                                                  Filter [d_date,d_date_sk]
+                                                                                Project [p_promo_sk]
+                                                                                  Filter [p_channel_tv,p_promo_sk]
                                                                                     ColumnarToRow
                                                                                       InputAdapter
-                                                                                        Scan parquet default.date_dim [d_date_sk,d_date]
+                                                                                        Scan parquet default.promotion [p_promo_sk,p_channel_tv]
                                                                       InputAdapter
                                                                         BroadcastExchange #9
                                                                           WholeStageCodegen (7)
-                                                                            Project [p_promo_sk]
-                                                                              Filter [p_channel_tv,p_promo_sk]
+                                                                            Project [d_date_sk]
+                                                                              Filter [d_date,d_date_sk]
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.promotion [p_promo_sk,p_channel_tv]
+                                                                                    Scan parquet default.date_dim [d_date_sk,d_date]
                                                                   InputAdapter
                                                                     BroadcastExchange #10
                                                                       WholeStageCodegen (8)
@@ -98,9 +98,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                               Project [cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id]
                                                                 BroadcastHashJoin [cs_catalog_page_sk,cp_catalog_page_sk]
                                                                   Project [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss]
-                                                                    BroadcastHashJoin [cs_promo_sk,p_promo_sk]
-                                                                      Project [cs_catalog_page_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss]
-                                                                        BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                    BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                      Project [cs_sold_date_sk,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss]
+                                                                        BroadcastHashJoin [cs_promo_sk,p_promo_sk]
                                                                           Project [cs_sold_date_sk,cs_catalog_page_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss]
                                                                             BroadcastHashJoin [cs_item_sk,i_item_sk]
                                                                               Project [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss]
@@ -127,9 +127,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                               InputAdapter
                                                                                 ReusedExchange [i_item_sk] #7
                                                                           InputAdapter
-                                                                            ReusedExchange [d_date_sk] #8
+                                                                            ReusedExchange [p_promo_sk] #8
                                                                       InputAdapter
-                                                                        ReusedExchange [p_promo_sk] #9
+                                                                        ReusedExchange [d_date_sk] #9
                                                                   InputAdapter
                                                                     BroadcastExchange #14
                                                                       WholeStageCodegen (18)
@@ -146,9 +146,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                               Project [ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id]
                                                                 BroadcastHashJoin [ws_web_site_sk,web_site_sk]
                                                                   Project [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss]
-                                                                    BroadcastHashJoin [ws_promo_sk,p_promo_sk]
-                                                                      Project [ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss]
-                                                                        BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                                    BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                                      Project [ws_sold_date_sk,ws_web_site_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss]
+                                                                        BroadcastHashJoin [ws_promo_sk,p_promo_sk]
                                                                           Project [ws_sold_date_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss]
                                                                             BroadcastHashJoin [ws_item_sk,i_item_sk]
                                                                               Project [ws_sold_date_sk,ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss]
@@ -175,9 +175,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                               InputAdapter
                                                                                 ReusedExchange [i_item_sk] #7
                                                                           InputAdapter
-                                                                            ReusedExchange [d_date_sk] #8
+                                                                            ReusedExchange [p_promo_sk] #8
                                                                       InputAdapter
-                                                                        ReusedExchange [p_promo_sk] #9
+                                                                        ReusedExchange [d_date_sk] #9
                                                                   InputAdapter
                                                                     BroadcastExchange #18
                                                                       WholeStageCodegen (28)

From 3309a2be071f2d3f6122f3634aea998d6fa53876 Mon Sep 17 00:00:00 2001
From: Peter Toth <peter.toth@gmail.com>
Date: Fri, 18 Sep 2020 13:56:19 -0700
Subject: [PATCH 0070/1009] [SPARK-32635][SQL][FOLLOW-UP] Add a new test case
 in catalyst module

### What changes were proposed in this pull request?
This is a follow-up PR to https://github.com/apache/spark/pull/29771 and just adds a new test case.

### Why are the changes needed?
To have better test coverage.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
New UT.

Closes #29802 from peter-toth/SPARK-32635-fix-foldable-propagation-followup.

Authored-by: Peter Toth <peter.toth@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../optimizer/FoldablePropagationSuite.scala         | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
index 0d48ecb31cfa4..59dfd3a7932bd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
@@ -180,4 +180,16 @@ class FoldablePropagationSuite extends PlanTest {
       .select((Literal(1) + 3).as('res)).analyze
     comparePlans(optimized, correctAnswer)
   }
+
+  test("SPARK-32635: Replace references with foldables coming only from the node's children") {
+    val leftExpression = 'a.int
+    val left = LocalRelation(leftExpression).select('a)
+    val rightExpression = Alias(Literal(2), "a")(leftExpression.exprId)
+    val right = LocalRelation('b.int).select('b, rightExpression).select('b)
+    val join = left.join(right, joinType = LeftOuter, condition = Some('b === 'a))
+
+    val query = join.analyze
+    val optimized = Optimize.execute(query)
+    comparePlans(optimized, query)
+  }
 }

From f1dc479d39a6f05df7155008d8ec26dff42bb06c Mon Sep 17 00:00:00 2001
From: "yi.wu" <yi.wu@databricks.com>
Date: Fri, 18 Sep 2020 14:02:14 -0700
Subject: [PATCH 0071/1009] [SPARK-32898][CORE] Fix wrong executorRunTime when
 task killed before real start

### What changes were proposed in this pull request?

Only calculate the executorRunTime when taskStartTimeNs > 0. Otherwise, set executorRunTime to 0.

### Why are the changes needed?

bug fix.

It's possible that a task be killed (e.g., by another successful attempt) before it reaches "taskStartTimeNs = System.nanoTime()". In this case, taskStartTimeNs is still 0 since it hasn't been really initialized. And we will get the wrong executorRunTime by calculating System.nanoTime() - taskStartTimeNs.

### Does this PR introduce _any_ user-facing change?

Yes, users will see the correct executorRunTime.

### How was this patch tested?

Pass existing tests.

Closes #29789 from Ngone51/fix-SPARK-32898.

Authored-by: yi.wu <yi.wu@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 core/src/main/scala/org/apache/spark/executor/Executor.scala | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index 54b50e6d2fa4a..27addd8fc12e2 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -400,7 +400,9 @@ private[spark] class Executor(
       // Report executor runtime and JVM gc time
       Option(task).foreach(t => {
         t.metrics.setExecutorRunTime(TimeUnit.NANOSECONDS.toMillis(
-          System.nanoTime() - taskStartTimeNs))
+          // SPARK-32898: it's possible that a task is killed when taskStartTimeNs has the initial
+          // value(=0) still. In this case, the executorRunTime should be considered as 0.
+          if (taskStartTimeNs > 0) System.nanoTime() - taskStartTimeNs else 0))
         t.metrics.setJvmGCTime(computeTotalGcTime() - startGCTime)
       })
 

From f893a19c4cf62dd13bf179de75af6feb677c4154 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Sun, 20 Sep 2020 10:58:17 +0900
Subject: [PATCH 0072/1009] [SPARK-32180][PYTHON][DOCS][FOLLOW-UP] Rephrase and
 add some more information in installation guide

### What changes were proposed in this pull request?

This PR:
- rephrases some wordings in installation guide to avoid using the terms that can be potentially ambiguous such as "different favors"
- documents extra dependency installation `pip install pyspark[sql]`
- uses the link that corresponds to the released version. e.g.) https://spark.apache.org/docs/latest/building-spark.html vs https://spark.apache.org/docs/3.0.0/building-spark.html
- adds some more details

I built it on Read the Docs to make it easier to review: https://hyukjin-spark.readthedocs.io/en/stable/getting_started/install.html

### Why are the changes needed?

To improve installation guide.

### Does this PR introduce _any_ user-facing change?

Yes, it updates the user-facing installation guide.

### How was this patch tested?

Manually built the doc and tested.

Closes #29779 from HyukjinKwon/SPARK-32180.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/docs/source/conf.py                    |   6 +-
 python/docs/source/getting_started/index.rst  |   2 +-
 .../docs/source/getting_started/install.rst   | 138 ++++++++++++++++++
 .../source/getting_started/installation.rst   | 114 ---------------
 python/setup.py                               |   3 +
 5 files changed, 147 insertions(+), 116 deletions(-)
 create mode 100644 python/docs/source/getting_started/install.rst
 delete mode 100644 python/docs/source/getting_started/installation.rst

diff --git a/python/docs/source/conf.py b/python/docs/source/conf.py
index 738765a576290..9d87bbe27df2a 100644
--- a/python/docs/source/conf.py
+++ b/python/docs/source/conf.py
@@ -57,7 +57,11 @@
 .. _binder: https://mybinder.org/v2/gh/apache/spark/{0}?filepath=python%2Fdocs%2Fsource%2Fgetting_started%2Fquickstart.ipynb
 .. |examples| replace:: Examples
 .. _examples: https://github.com/apache/spark/tree/{0}/examples/src/main/python
-""".format(os.environ.get("RELEASE_TAG", "master"))
+.. |downloading| replace:: Downloading
+.. _downloading: https://spark.apache.org/docs/{1}/building-spark.html
+.. |building_spark| replace:: Building Spark
+.. _building_spark: https://spark.apache.org/docs/{1}/#downloading
+""".format(os.environ.get("RELEASE_TAG", "master"), os.environ.get('RELEASE_VERSION', "latest"))
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
diff --git a/python/docs/source/getting_started/index.rst b/python/docs/source/getting_started/index.rst
index 0f3cea7d6ea58..9fa3352ae27d8 100644
--- a/python/docs/source/getting_started/index.rst
+++ b/python/docs/source/getting_started/index.rst
@@ -25,5 +25,5 @@ This page summarizes the basic steps required to setup and get started with PySp
 .. toctree::
     :maxdepth: 2
 
-    installation
+    install
     quickstart
diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst
new file mode 100644
index 0000000000000..03570e6626d90
--- /dev/null
+++ b/python/docs/source/getting_started/install.rst
@@ -0,0 +1,138 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+============
+Installation
+============
+
+PySpark is included in the official releases of Spark available in the `Apache Spark website <https://spark.apache.org/downloads.html>`_.
+For Python users, PySpark also provides ``pip`` installation from PyPI. This is usually for local usage or as
+a client to connect to a cluster instead of setting up a cluster itself.
+ 
+This page includes instructions for installing PySpark by using pip, Conda, downloading manually,
+and building from the source.
+
+
+Python Version Supported
+------------------------
+
+Python 3.6 and above.
+
+
+Using PyPI
+----------
+
+PySpark installation using `PyPI <https://pypi.org/project/pyspark/>`_ is as follows:
+
+.. code-block:: bash
+
+    pip install pyspark
+
+If you want to install extra dependencies for a specific componenet, you can install it as below:
+
+.. code-block:: bash
+
+    pip install pyspark[sql]
+
+
+Using Conda
+-----------
+
+Conda is an open-source package management and environment management system which is a part of
+the `Anaconda <https://docs.continuum.io/anaconda/>`_ distribution. It is both cross-platform and
+language agnostic. In practice, Conda can replace both `pip <https://pip.pypa.io/en/latest/>`_ and
+`virtualenv <https://virtualenv.pypa.io/en/latest/>`_.
+
+Create new virtual environment from your terminal as shown below:
+
+.. code-block:: bash
+
+    conda create -n pyspark_env
+
+After the virtual environment is created, it should be visible under the list of Conda environments
+which can be seen using the following command:
+
+.. code-block:: bash
+
+    conda env list
+
+Now activate the newly created environment with the following command:
+
+.. code-block:: bash
+
+    conda activate pyspark_env
+
+You can install pyspark by `Using PyPI <#using-pypi>`_ to install PySpark in the newly created
+environment, for example as below. It will install PySpark under the new virtual environemnt
+``pyspark_env`` created above.
+
+.. code-block:: bash
+
+    pip install pyspark
+
+Alternatively, you can install PySpark from Conda itself as below:
+
+.. code-block:: bash
+
+    conda install pyspark
+
+However, note that `PySpark at Conda <https://anaconda.org/conda-forge/pyspark>`_ is not necessarily
+synced with PySpark release cycle because it is maintained by the community separately.
+
+
+Manually Downloading
+--------------------
+
+PySpark is included in the distributions available at the `Apache Spark website <https://spark.apache.org/downloads.html>`_.
+You can download a distribution you want from the site. After that, uncompress the tar file into the directoy where you want
+to install Spark, for example, as below:
+
+.. code-block:: bash
+
+    tar xzvf spark-3.0.0-bin-hadoop2.7.tgz
+
+Ensure the ``SPARK_HOME`` environment variable points to the directory where the tar file has been extracted.
+Update ``PYTHONPATH`` environment variable such that it can find the PySpark and Py4J under ``SPARK_HOME/python/lib``.
+One example of doing this is shown below:
+
+.. code-block:: bash
+
+    cd spark-3.0.0-bin-hadoop2.7
+    export SPARK_HOME=`pwd`
+    export PYTHONPATH=$(ZIPS=("$SPARK_HOME"/python/lib/*.zip); IFS=:; echo "${ZIPS[*]}"):$PYTHONPATH
+
+
+Installing from Source
+----------------------
+
+To install PySpark from source, refer to |building_spark|_.
+
+
+Dependencies
+------------
+============= ========================= ================
+Package       Minimum supported version Note
+============= ========================= ================
+`pandas`      0.23.2                    Optional for SQL
+`NumPy`       1.7                       Required for ML 
+`pyarrow`     0.15.1                    Optional for SQL
+`Py4J`        0.10.9                    Required
+============= ========================= ================
+
+Note that PySpark requires Java 8 or later with ``JAVA_HOME`` properly set.  
+If using JDK 11, set ``-Dio.netty.tryReflectionSetAccessible=true`` for Arrow related features and refer
+to |downloading|_.
diff --git a/python/docs/source/getting_started/installation.rst b/python/docs/source/getting_started/installation.rst
deleted file mode 100644
index 914045e898b2d..0000000000000
--- a/python/docs/source/getting_started/installation.rst
+++ /dev/null
@@ -1,114 +0,0 @@
-..  Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-..    http://www.apache.org/licenses/LICENSE-2.0
-
-..  Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-
-============
-Installation
-============
-
-Official releases are available from the `Apache Spark website <https://spark.apache.org/downloads.html>`_.
-Alternatively, you can install it via ``pip`` from PyPI.  PyPI installation is usually for standalone
-locally or as a client to connect to a cluster instead of setting a cluster up.  
- 
-This page includes the instructions for installing PySpark by using pip, Conda, downloading manually, and building it from the source.
-
-Python Version Supported
-------------------------
-
-Python 3.6 and above.
-
-Using PyPI
-----------
-
-PySpark installation using `PyPI <https://pypi.org/project/pyspark/>`_
-
-.. code-block:: bash
-
-    pip install pyspark
-	
-Using Conda  
------------
-
-Conda is an open-source package management and environment management system which is a part of the `Anaconda <https://docs.continuum.io/anaconda/>`_ distribution. It is both cross-platform and language agnostic.
-  
-Conda can be used to create a virtual environment from terminal as shown below:
-
-.. code-block:: bash
-
-    conda create -n pyspark_env 
-
-After the virtual environment is created, it should be visible under the list of Conda environments which can be seen using the following command:
-
-.. code-block:: bash
-
-    conda env list
-
-The newly created environment can be accessed using the following command:
-
-.. code-block:: bash
-
-    conda activate pyspark_env
-
-In Conda version earlier than 4.4, the following command should be used:
-
-.. code-block:: bash
-
-    source activate pyspark_env
-
-Refer to `Using PyPI <#using-pypi>`_ to install PySpark in the newly created environment.
-
-Note that `PySpark at Conda <https://anaconda.org/conda-forge/pyspark>`_ is available but not necessarily synced with PySpark release cycle because it is maintained by the community separately.
-
-Official Release Channel
-------------------------
-
-Different flavors of PySpark are available in the `Apache Spark website <https://spark.apache.org/downloads.html>`_.
-Any suitable version can be downloaded and extracted as below:
-
-.. code-block:: bash
-
-    tar xzvf spark-3.0.0-bin-hadoop2.7.tgz
-
-Ensure the `SPARK_HOME` environment variable points to the directory where the code has been extracted. 
-Define `PYTHONPATH` such that it can find the PySpark and Py4J under `SPARK_HOME/python/lib`. 
-One example of doing this is shown below:
-
-.. code-block:: bash
-
-    cd spark-3.0.0-bin-hadoop2.7
-    export SPARK_HOME=`pwd`
-    export PYTHONPATH=$(ZIPS=("$SPARK_HOME"/python/lib/*.zip); IFS=:; echo "${ZIPS[*]}"):$PYTHONPATH
-
-Installing from Source
-----------------------
-
-To install PySpark from source, refer to `Building Spark <https://spark.apache.org/docs/latest/building-spark.html>`_.
-
-Refer to `steps above <#official-release-channel>`_ to define ``PYTHONPATH``.
-
-Dependencies
-------------
-============= ========================= ================
-Package       Minimum supported version Note
-============= ========================= ================
-`pandas`      0.23.2                    Optional for SQL
-`NumPy`       1.7                       Required for ML 
-`pyarrow`     0.15.1                    Optional for SQL
-`Py4J`        0.10.9                    Required
-============= ========================= ================
-
-**Note**: PySpark requires Java 8 or later with ``JAVA_HOME`` properly set.  
-If using JDK 11, set ``-Dio.netty.tryReflectionSetAccessible=true`` for Arrow related features and refer to `Downloading <https://spark.apache.org/docs/latest/#downloading>`_
\ No newline at end of file
diff --git a/python/setup.py b/python/setup.py
index b4cc24a6d239f..7fac7b3138486 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -99,6 +99,7 @@ def _supports_symlinks():
 # If you are changing the versions here, please also change ./python/pyspark/sql/pandas/utils.py
 # For Arrow, you should also check ./pom.xml and ensure there are no breaking changes in the
 # binary format protocol with the Java version, see ARROW_HOME/format/* for specifications.
+# Also don't forget to update python/docs/source/getting_started/install.rst.
 _minimum_pandas_version = "0.23.2"
 _minimum_pyarrow_version = "1.0.0"
 
@@ -203,6 +204,8 @@ def _supports_symlinks():
             'pyspark.examples.src.main.python': ['*.py', '*/*.py']},
         scripts=scripts,
         license='http://www.apache.org/licenses/LICENSE-2.0',
+        # Don't forget to update python/docs/source/getting_started/install.rst
+        # if you're updating the versions or dependencies.
         install_requires=['py4j==0.10.9'],
         extras_require={
             'ml': ['numpy>=1.7'],

From 7fb9f6884f5e085e97b60fe45055247c2d17245c Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Mon, 21 Sep 2020 09:39:34 +0900
Subject: [PATCH 0073/1009] [SPARK-32799][R][SQL] Add allowMissingColumns to
 SparkR unionByName

### What changes were proposed in this pull request?

Add optional `allowMissingColumns` argument to SparkR `unionByName`.

### Why are the changes needed?

Feature parity.

### Does this PR introduce _any_ user-facing change?

`unionByName` supports `allowMissingColumns`.

### How was this patch tested?

Existing unit tests. New unit tests targeting this feature.

Closes #29813 from zero323/SPARK-32799.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 R/pkg/R/DataFrame.R                                | 14 ++++++++++++--
 R/pkg/R/generics.R                                 |  2 +-
 R/pkg/tests/fulltests/test_sparkSQL.R              | 13 +++++++++++++
 python/pyspark/sql/dataframe.py                    |  9 ++++-----
 .../main/scala/org/apache/spark/sql/Dataset.scala  |  8 ++++----
 5 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 089e1f26b7d3b..2ce53782d9af0 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2863,11 +2863,18 @@ setMethod("unionAll",
 #' \code{UNION ALL} and \code{UNION DISTINCT} in SQL as column positions are not taken
 #' into account. Input SparkDataFrames can have different data types in the schema.
 #'
+#' When the parameter allowMissingColumns is `TRUE`, the set of column names
+#' in x and y can differ; missing columns will be filled as null.
+#' Further, the missing columns of x will be added at the end
+#' in the schema of the union result.
+#'
 #' Note: This does not remove duplicate rows across the two SparkDataFrames.
 #' This function resolves columns by name (not by position).
 #'
 #' @param x A SparkDataFrame
 #' @param y A SparkDataFrame
+#' @param allowMissingColumns logical
+#' @param ... further arguments to be passed to or from other methods.
 #' @return A SparkDataFrame containing the result of the union.
 #' @family SparkDataFrame functions
 #' @rdname unionByName
@@ -2880,12 +2887,15 @@ setMethod("unionAll",
 #' df1 <- select(createDataFrame(mtcars), "carb", "am", "gear")
 #' df2 <- select(createDataFrame(mtcars), "am", "gear", "carb")
 #' head(unionByName(df1, df2))
+#'
+#' df3 <- select(createDataFrame(mtcars), "carb")
+#' head(unionByName(df1, df3, allowMissingColumns = TRUE))
 #' }
 #' @note unionByName since 2.3.0
 setMethod("unionByName",
           signature(x = "SparkDataFrame", y = "SparkDataFrame"),
-          function(x, y) {
-            unioned <- callJMethod(x@sdf, "unionByName", y@sdf)
+          function(x, y, allowMissingColumns=FALSE) {
+            unioned <- callJMethod(x@sdf, "unionByName", y@sdf, allowMissingColumns)
             dataFrame(unioned)
           })
 
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 839c00cf21aeb..a6a71666ae588 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -638,7 +638,7 @@ setGeneric("union", function(x, y) { standardGeneric("union") })
 setGeneric("unionAll", function(x, y) { standardGeneric("unionAll") })
 
 #' @rdname unionByName
-setGeneric("unionByName", function(x, y) { standardGeneric("unionByName") })
+setGeneric("unionByName", function(x, y, ...) { standardGeneric("unionByName") })
 
 #' @rdname unpersist
 setGeneric("unpersist", function(x, ...) { standardGeneric("unpersist") })
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index e008bc5bbd7d9..5008d3005b5b1 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -2696,6 +2696,19 @@ test_that("union(), unionByName(), rbind(), except(), and intersect() on a DataF
   expect_error(rbind(df, df2, df3),
                "Names of input data frames are different.")
 
+
+  df4 <- unionByName(df2, select(df2, "age"), TRUE)
+
+  expect_equal(
+      sum(collect(
+          select(df4, alias(isNull(df4$name), "missing_name")
+      ))$missing_name),
+      3
+  )
+
+  testthat::expect_error(unionByName(df2, select(df2, "age"), FALSE))
+  testthat::expect_error(unionByName(df2, select(df2, "age")))
+
   excepted <- arrange(except(df, df2), desc(df$age))
   expect_is(unioned, "SparkDataFrame")
   expect_equal(count(excepted), 2)
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index db2ddde00c881..94a7df33f335e 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -1569,11 +1569,10 @@ def unionByName(self, other, allowMissingColumns=False):
         |   6|   4|   5|
         +----+----+----+
 
-        When the parameter `allowMissingColumns` is ``True``,
-        this function allows different set of column names between two :class:`DataFrame`\\s.
-        Missing columns at each side, will be filled with null values.
-        The missing columns at left :class:`DataFrame` will be added at the end in the schema
-        of the union result:
+        When the parameter `allowMissingColumns` is ``True``, the set of column names
+        in this and other :class:`DataFrame` can differ; missing columns will be filled with null.
+        Further, the missing columns of this :class:`DataFrame` will be added at the end
+        in the schema of the union result:
 
         >>> df1 = spark.createDataFrame([[1, 2, 3]], ["col0", "col1", "col2"])
         >>> df2 = spark.createDataFrame([[4, 5, 6]], ["col1", "col2", "col3"])
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 4cb923d94cc55..87b9aea80c823 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2038,10 +2038,10 @@ class Dataset[T] private[sql](
    * The difference between this function and [[union]] is that this function
    * resolves columns by name (not by position).
    *
-   * When the parameter `allowMissingColumns` is true, this function allows different set
-   * of column names between two Datasets. Missing columns at each side, will be filled with
-   * null values. The missing columns at left Dataset will be added at the end in the schema
-   * of the union result:
+   * When the parameter `allowMissingColumns` is `true`, the set of column names
+   * in this and other `Dataset` can differ; missing columns will be filled with null.
+   * Further, the missing columns of this `Dataset` will be added at the end
+   * in the schema of the union result:
    *
    * {{{
    *   val df1 = Seq((1, 2, 3)).toDF("col0", "col1", "col2")

From 9c653c957f5cd9237cc2ad0a5bc28ead2dab75cb Mon Sep 17 00:00:00 2001
From: itholic <haejoon309@naver.com>
Date: Mon, 21 Sep 2020 12:29:17 +0900
Subject: [PATCH 0074/1009] [SPARK-32189][DOCS][PYTHON] Development - Setting
 up IDEs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

This PR proposes to document the way of setting up IDEs

![스크린샷 2020-09-21 오전 10 43 12](https://user-images.githubusercontent.com/44108233/93727715-5c2a6e80-fbf7-11ea-821b-555723b00bc8.png)
![스크린샷 2020-09-21 오전 10 43 45](https://user-images.githubusercontent.com/44108233/93727716-5f255f00-fbf7-11ea-9c6c-7b8a973bc511.png)

### Why are the changes needed?

To let users know how to setup IDEs

### Does this PR introduce _any_ user-facing change?

Yes, it adds a new page in the documentation about setting IDEs.

### How was this patch tested?

Manually built the doc.

Closes #29781 from itholic/SPARK-32189.

Authored-by: itholic <haejoon309@naver.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/img/pycharm-with-pyspark1.png            | Bin 0 -> 160166 bytes
 docs/img/pycharm-with-pyspark2.png            | Bin 0 -> 84813 bytes
 docs/img/pycharm-with-pyspark3.png            | Bin 0 -> 15981 bytes
 python/docs/source/development/debugging.rst  |   2 +-
 python/docs/source/development/index.rst      |   1 +
 .../docs/source/development/setting_ide.rst   |  62 ++++++++++++++++++
 6 files changed, 64 insertions(+), 1 deletion(-)
 create mode 100644 docs/img/pycharm-with-pyspark1.png
 create mode 100644 docs/img/pycharm-with-pyspark2.png
 create mode 100644 docs/img/pycharm-with-pyspark3.png
 create mode 100644 python/docs/source/development/setting_ide.rst

diff --git a/docs/img/pycharm-with-pyspark1.png b/docs/img/pycharm-with-pyspark1.png
new file mode 100644
index 0000000000000000000000000000000000000000..6e2c0bc02d2b76c1a80db842f1e85840ee095d8b
GIT binary patch
literal 160166
zcmcG#byQnT)aVVxy_Dh<D6}{PcPYh6(H1SP#odAiEpDZ_7Hx5gy9Sry6nFPv0g~MG
zd0zXzyY4@Ct(&vX%4BBt-e=^D?O!6*RTT(usc=zHPzV&?$$msZL8C)KL0!YfM8@z;
zTMDC~;EvnK$fzsI$k401I(@dWw?sjC7nz)jrKxf7ED+&4E%R3K?fz3A1yR((P}(;K
zTAFX4y<>U#3Z1z1S5q)4DRFtk`wz|@7#5rnEDdrsw-}#fzwMbxYih1y(3*O!d7otq
z?LAs`@OgcPLlJI!In5{uR&ITp%DR++93LuQ@%|1c&P^EXf)Sp-Au<UPtKM=hGSdbE
zS>9OaA+#h{(SC<^I7lQOt>q$gzMtQH<U&z@*25~!DulNoXd$r`^|~1)@8+dvuUzvc
z&gq*pCTU#RyKeLnm5bl#CCa>1%J;Gg_YBHtC~BL|uWV2fC?c{G2UKJ(6WWLyNH0Pe
zLc!R+t{615Jp)f0hKGl9)|s(0U&067E@eM{9?o?whP?(wiA#kiG41_yQ^8$TV<EM2
zB}3%uKssNuYY4W7<NSK}L{a@!e$60EVsMZ!v6jWsae*JtI|-h|k$?eK=F>UM6dn_g
zGma1!baUM2FTq7yxD;<b-nZFRf)x~h`uc)(ja_SVB;#|O#b@JB@-fR?>$N*jJoHJI
z=`7}$Y_YMam>&t0bjHnw4AI}_C&)4GM~|uLrF>Ja)b>b!?bI{}@6$v7i11xQ<jT{t
zrhwiK;vKn&7`2#X<E$gbHKi;{uG>8;%;A%%HWqJtu1nYPzAqzZ+}R{Fl4JB_=ufu4
zxI&@LB;be7(d0^@Ho*p%cn92)A)K3wx;)1YCl07-`}03_)+PLZ5xu;hrPL3=MWLlc
z>5}GaxC5e4jiLOG$gN4YMtvjaQ{|1O!!(rnOXcPn=F^~Od8h<=c=$n-)tEP3lu#}p
z8t5s0&l~=CD0nF3*l)-Z0_99tbupse;4I)bgy2?_)1U~fvj`JrblW)78a$crQgJ31
z$CSvUYs2haXQT+B$Hq#0$ySJZAl;wv+JcDZrP?4JZWzONbQa3@A+ibhdbkxq@AE$X
zc(O<|`&LT2BoFeN^ar&M_VwG4-(vfCF~N%51am<u>z}wuK6k%#<`<_^4RKq4()4r{
zJGEEqoVW=`lE^sZtViP94v3DSj>{tYp3XAXEZ^^g=?_^=w)eq$acuI*-%HgAerRhl
z9}1RY354Fq^ymMy#$2R2i6eYFkk`CVbLD#F<BrjSxe|K%W=;M!KYVhH$0A0s|Jl|T
zQ}az8n>4vJ24%|Kr@OCq8Fsmf=^(_>;HjEoU2^-Uf~c8shQWsME3poJ4W_L%{tLu3
zge5_>ectOm&NK#GmDr6WUg%zgUU>8I51~3;M;j*>sCTk$*qv0yU#fa@!D~H4>jT)s
z=;p!HFJrnGIhgDjG)eSugs@?uJo&hj-}a?rn2Jesh({s``l9=o`)+Y@aWQc#afxx?
zh9%?f;0`?JV!vYZWBTxp?3MAyXNvNR??jbt86{q|Gbar(Sva{eh@`e98Yp$V36O^u
zzSFAvV8MR*T!%%QO^|i<qhEonI<K}FE2lb}I-7>*cXt_^cj<Zd`Nw6WAB-vue)_$K
zc3ei_YJpW>E5BB<Y=56UOq!hfoCX}askZR}mlcbJoW)=B`E0_W($2?u^|GJjKVkeF
zW8eCVi>BXOE7%y;nQBFq7TA{CcGwmjQqIzfMmfH<n&vym+#fi&-8Y_bpV77TtM{-a
zux*@?pRJ#1pM@TR4`UC#W;?C9`5y&%`5UbH?Zjtg?ewhWt+}R_3%ozLy^Aj1sRYlm
z&52fCzoVXO_#HQ6l<)uMq2(qbu2MBGPS>hr93%UsVM}GBW6P=FN^MJskEVp0#P?Of
zb~m3Yic)+s#-tD9`zH8Dc1KtTBUiUaUI+1%X;gg_<$~7yK-+ix7F5Mlj(qk)G=fun
z8Rm5}KUVh>dAieVIJj3$mu<vt&1|1p{hBNMvt9JFg>|W}t8VowlJd{!2**JA@Nt~T
zT$W9aX~)uIF^2ty#mnj+bxo!?`l9-{oL>5#`ljUu6|&k|+P4*oa~wzFM?ObJ)GaDM
z#%wb`XA0HEe11K?$Xn(%=cUiQq`YLiG~oHX)ueUSv%>T2nDe;#Ci%vTW{?n-kex=4
zW>r*A^zy~6u%vUo=$MP%(ah=9*dOqx&E<9ReY5Sa$G=8)B$N8_ttzdIJi$%(O_k0P
zC-;BG{&3EpG%eAc&Ye=8dYl5RGRJo(Dsxu$OxL?F7S1NOhR&65miPW_6`g@EN-up*
z9rni{lG_qpyjDfEan-AL3-|B*RPST%TcBJJ2Z#(5<DT@v1FG?8{>c8Ai$Q^2h%tkO
zk0px1g=3G`gC2(NfYUNDXX^H+{&>SAb=|W2#mmBk9|=(jd*8{vfBiita-Ov=qQ57e
zZYQq8-zb6~TGQL`MS?(xU;+p6`Uy2L?FY&WhAA#r8w;!F0=A}_hBE`!x%(=U@#dpu
z7Ca|4?`srm>T3#I0$hq*Bra+<Q^Ceczm=ad)xFw$W==x%I&Kt_+vslu`l|8q!&E!8
z7y{Wd{*>2D@7A3j_8w0JPwkcY*R8KdUprlUgtf8~pL-5helx8EcwyZNOk_633Ju<_
zr(R-xl=!HmVO;E8=%7OV9EdZFF7wvdH0<2LNBw^FYBHuerk8Rn-6cKt(_Q&?xduZN
zi_A=7MP_#H3TMM}@<?CTW!Hv1e5JRq!V44JJL+Die!-WYxBUEz&Z6Br$2-|Omki7$
znppJ}Fx=U(V0O_0M#DhGz)H<abxy4V%o?}Y+MMyu{eq2r-(ekD(xIqiF_`S+b+#O1
z+boUI>a1)qG;F!=&1e#+2onus$9o9Ce*E6?uIAlGcTmN{V!Kp6=sA3$e%y3r++!`;
z8?2*buQdF=qjImZv!&Zn2Kwb9_9u0#por6AEoJMcI_N4{6{kQETM<T)zy6-R!(#t}
zcH2qsP1sV^H<6T+73t-dOOqP3daJ*tt@n1bjoHyjz1aQPsJ(2?^cwBlg`93HBvyT?
zZVrz*|2$acI<JMmf*vwUl}m>SO^CK%06)CYU-3@xuD(gw{r**EjWC+<c1Cv2eWqn*
zZq8=jp)~x+@;XG}YuvK<p;ui|cZ(^AvAt%SLYPVH=CEWt_w)sk$Z9~acm9UeR<2ie
zVGf65a0ZzWfe>DXuii7g7B!4qTPF`E325{fAYAPzw^F>o=kxTZ%|AW0TYI?Uv2~G4
zU&c$a&+LpB;CF8y!^B<?zc3`1;WFxY?tJi;xeT&w89!1|)nq8*k?dU$&4APwAF0l(
z!4I#FhPd`Bi>bMU^c-7O?N4%6{BKg`D<F+9N8^^>4!4uj<<^y3vr~ztT=&jB)U)o9
z@N}Xz$zfmq8{iJfXwqiMu-ZtDja1qF*kfZq?OqOFdqDequ7U(;dEmqs)_QeY1q%ZV
z0){cKFl~(&JH;g^xPg$_HXT?MxN6`_6#V;TyrI+R>f>YhE!}+hR`|o(`_;HkeT2~#
zY=bC{Zc(8}A;*98URY4?QE^LS%xZUU9N=P%*-z#V_UA-=hF4ts4&E|vecIYGiworw
z!Ocs+C@Fq&e#*^D#7tk?qk<9vDc(I$3@V1BGT}Tj=VUZ9gDC?;98mho(dn@#lOqJB
z`N7nIkvuGm)Ln#xm)C&AV$lV)9fp_FuXd;J?tEXMI9H(fCB?+YpU~1Fv}2z?JPmBf
z`YA$-LOCTH5x*!qlkMN;cP^Bkae;8}@3iCo5X5-b3TK}CNRkFRVdg+CaJ4OU6+f$}
zps*ps*eIysHYn)G5GwMfLf*)=@YgRWSjg{Z$Xhn=$$zGz(dD82XBc(uZ$@bi8AV0p
zw}!c^rKO{rwUfJfj;j$;)trr{uDh;^vZ%R}1DC0VlbIzK(82ky2#PpR6d81|bT_33
zI@mk9i2@}U{>dSV4F8P=Fwp;##obPVL03hcUdGAQl3sv|pNpG85|^HyUfk8<v*<@z
z`Tr0{{*qv@c6WCc1pvIfytushxSU+A06ZchA^>h)052~mG6$!dx1+l$kkiqP@n0qX
zsYlk*&D_<-+1<v;k^Zk<Q!^(IcL@fDzlQ$n^Y3$70&V`=lB3&y6bo5Ez~2-A4;MG!
zzjPx-#s9{Ns@ni9?R8~s9FWz6v>_=ZA|n1z{{JoIzb*bNspV$rD&ypU6m*yT@A~}@
z;s2TV{}=qn<fs2O$uIQ(G5J4I{zFn6@VDguqbB~X=6_<5^(=`i4*0J&lf>P%lVd_b
zd4r-TE3FAcJ#NPew3JIm7)wm^^}XB^mPw&`j)wgskRi(KEw~!)K2}y~IuBkU-8zJi
ziR88JlByH)VSgolD(mVxC_?CBc8l;+T{-jLTj{8J@R&|YD@_m3@;D4Fbvx3|b}Ktv
z4Qf5>@CBYOH!GT&n9vgt5wVNe;?v$aNc%m<V}{Uji%Pd_F$H+pcvTJp?g6kWnEaG~
zp{|!|GlF7i_)eqWc!hYy1rl(;buyj|Eg6-vgtJ1_Zhs`WQ7Jsfvy#gNO%BxAOizyq
zt$o9EY~D%Pkc%d3CP^eU*QkAIZSq=zpPzqfXwa?6eQ~GyO#!{+cBYMqMzQiwC^_`c
z0Xx#7+U@&h#9vEmyof%LWN*G?XsQ*qN5TmT{o(s9mh;b4x;0W!u%Zhs{~rs+(ev=P
zoo>6MifHr3AXv4dHuyT$*6X$!HVsBmeQMN}zGY1TFe~2gxWHf}<FdIRUCvP>gUTU_
z5L!fNO#ti`wP6@}ZkuO0Pb}*ciRjTBX#!2Ck|TNmC206gANdUMl#9k$)gxMskvluS
z2sO404f#>#jqiRr{wXdGjfb~&G7eFYZP}_$Ai*+lE4J4mhwg1?o%2S|)o&Rh=$d)F
z#IBFNWwO>(3?<*Ale6=0)5mukrD?z!?3BQ@5F*Jm+JL)_=g^9d`$L;pfw^kSbelL@
zn<5mjft8o4{gDnJz~MUj+Ir3-!olLp(~X`~1Mr6q=ZNgN(_X<6&du4Tl!VHt$9%*T
zVi=~=pL?okU|!U(m`mPsM>jCb3(qhce$?1qBP$VEWo{{n@GvHQC|IB)sh8UJx#5Ec
zhr~*8``*~^%Zsm|$ghhJO584Q9VF`*iKygCu1rJrCT@9ok186C^6l&LwD7ZHeb3tG
zKlongd#!koj1vS!-CLO32>iw?%?&uj>eSNtG>i3WkU{OjHsrd8G<G;rYe2V{|2oHP
z6_*g_yNWh-Ie8`D&wxC}=g?1$$$BmMe;=C(0&hKOSenZAT1~%MFemCvJ+N<F3urwt
zZx=bKpEv4AMqBW|T}g$*Z#zp?2%PTM>*gMEv80S)hdR^IMpyZDU^4wTbEH18cyIM+
zPZ81Z`+5F4=fuX`d&ic8Vw+w9q4H~4eV=*_=PUm)f%!XE*lwm%j_0z=fOrsPr(IRM
zV7A}o(1C-KZ|2MtY3f7gG9t+vF)&^)z6>ftR4u@A;cO2})7{0|#&g1DZJS2tx~e|F
zL7z{8a#}N_LOZ=)>hfjUQVbaExL%oCHD5(&$|@Q%sGE8QT)L!z+ldNaU+Ae(JGHrF
zh<ezntd{eC5qfkMcQ%pV3a_o5l&5~Jt=Z(F>^wIA`RC&AoBQ<RWNp2lQUMxR(_P=%
z!fC$L#5tU#s@FFw${1ND>QJvf<=nWB(Fmx8sffr0oKmZ_^F#jjXO%fvc)`UM9YWZl
z{d%tBas&o1k%F<pb^sv7qpHq;Mb{Bde|r(cBV_uMRd({tvfBhdzxa#~g70+MxtE}$
zwA`060C%JZCbdNKBNy?wchG+3Ulo5*lk;BatT7Go=VIWYxg)u_>f7#kW+oYjZbHsg
zA#;&X+Xg{B+LYa65v(M~_5J;c6X<A5oan2n*a^Rv349Il0P6TS&UUXNw4xag-sc)-
zHa<nTBGl$!Rg!9*A6x#cWk9D8?-7~r5YhfI?zP6(ONh-7r~XBzDx)m`@atq@q~(~8
z07L~^?uY-LWiXNQ3FS=CCd-eWI18z75CzIIdK&sqz9;o|J3quP!|1-#1*})r&*_a%
zM3&Myg-#3(G27q8N3W@S8WqwUDvHjLeCWan_U#YI_Zu#h0(Z@Lrtp;L=BPvTgD+<r
z7&`YmuR0F{azY=RG94N&9{y;@PII*$Z{FS_---k7oblS=ppL56(-!o~ew?RRRF{-O
zi>K4~K3ZGhcs02fF;WhGnU`S&E~>7iXjUGS;UfdWzQCSW@6}t)l&SP!tf0Y{;f4M;
ze+KrV+$JQ1VE6N#s^kXF(mul)Ih%f6jPEMV#FF;o2Bvdv_>EssWeKKL=KA}K?xvah
z)o-2m5_wS>Wju-9mV+jqS)ZhiMyTm|FvgM$X^1~@j(v!ti}wST5=Ir+0~DZJGoWoT
zy$434MB`X;txj2?&>|Ov<q))S5CB8CtRQCZXa-oPsI1)A*vSn1&s}ai-%D<J%nxHd
z`Ni$nhH5;k>!Gz&X%QkSU1N2OWnfYsT>a$NZid}l>{Hc3RI9efbw*mQi4?C_seFuE
z8{vG6Mtz$h_)J7ks>NIO8c<18G$bumXw!3^@h^P$crysVfevb}L_e0J9u(|@E^ZOb
zwumizD)w6Y+is`{HJR1sf@&zqW!9effR6WxxZziq$Hq?BSnh&(U#$bSc~&yX{Ywyj
zKW`ogl4)F`1ZW@i9?uY9Z{L0mP!zwwA}WCW8sUOKT=gC5lOZKBC*`@&QLCv{-?MC8
zmu}Mq5CZ0JxlRPSlgSWrGFtSQH=G_ZhPxWKznOCifMm+Hvn)Bbo=hnj-+!c2+qzSL
zP6XWW(a}j=M5+}cU!#MAE;J%#a$`RiQP|aV`$?w#ugH-_hsKrC3ylI5TF{TiCuOS-
zCylw&`E(7&{)mUQTdk*e+yObhry>nz?hRFS>v`Oco#$bID<2|Gs`q?W<H}fce$`@L
zHSv?GG}PLolfBYdFSbR^AE+I6e7bWv_4mKEJg#F&ou}CWkt1+%Jm_)j;BG5ksjNz}
zSEsPa$N+6I=Cpf5wDW$p>PD+_vLB%<RBzlu?>H5V#&JlOW=$hC3i>ACZQ=9Y^{E4&
z@isY3=z-Ax)ordl;^!HQ=i?Nj<+Qn&uomv#=y7T&T{uDtgZHlH1V77v>ncgTrEG%#
zn@u5z*x1{-Ys?a=NIM!<b_Ik+c5V-(NqpYgQIN<0m70G88{im)vR3GN5KCQ9HZYqP
z2H_RgIh%824I(?*eMx+u<o7zWdje^HMc9w4&ev`T2jplUnM`yQ-`gF{>n!Tnc7f@I
z&Rzb;sBai+Pcq=HmyRh5qk(Fp?R*;!e>kfez&6yuH^6tI!NND7t@e9ZTJvoCJN6%B
zxs59#TdDTwR}~0$bSl2C+;x^8&E?J<3}K<zD#`J&_4zYmwEO7%o~!fGwTiE8T<lET
zD>zPg6ML=Us(=pmqf~7V-g_>&9*FkCNc3`$aXKmYal1u=VxulR?Z$NZb{awJfAs;R
z3zlK0A#{)6E0FYqL&=5Jj6!Uyg1xrZ8rQY`Y1@#K)4XLFqV|5AZHQQ*jbQ^FcIwsX
zrsoX0pU%CXGr98K{~mrHHS)(Q_r&#+sRF<r&38I;bsMsRoc_O!3vb0-&6G&adJK5L
zcZE6+_Pl?0qY;U(x5zPnt!lZQ4w!rzO=$FfT6vg3*#^A+tglR_)#4tMXnfWA_ExG^
zD%JKO)eS0a({cGd2;Gux<!aqv+WgT%PoJmyMFR&xVyYEOhk5lr+Z+mwH_J+xa2$S8
z2=vzQI?6+XO2mAKys~f4@9buvg`GfUyTBc(*7)97uxqnDpZ_-NG#NQ&Z1_2khs-XQ
zRdtA-RQD2|T5~rTxSa?bjEkKrAC=d}Imv+&c8X{yMrYEs;+43X`(lCGs`;qi8)4)v
z1zoskO%!{=0AXHoL-$0nC-zzlU6AQqc%2wwd0!xMmZ8gNSTptYa(C8rrungSv*eV^
z-6WEWMcXf_pYF_sJ>j>@Hw^$mmAot-RbI{B4WwvmGw1!WuO0K#LJj{?%Lxd$i1w$E
zBhS+RuF~<k!t$|%APdLBm#d5wz2e(C%Vfiz#F=*h(^I2;>~?2(d@}AfWZn~|bu+Bf
zF*UJnZ+_kV-6RYf)(#zW^7JkzyigF+oqW22>?+4)ujo-_PET9UPRp3PtjWj6_;N1r
zQ{$7uGzuR`Xr9y=AdDc!<*ietA$&i)DN=b6GxxDAAF2E;{gvE?$ziGRc}GxxN5R!r
z3xW;njf8XL8e*iz&t1!@^=(iKeIe^My-*03+K*pme1to=zpL%5NWVsu)NNjzqopR~
zzN&eijn<|)adA0vUEMtd{Gqz3#L-b$kQG6_k@zKYP}9`Cxl+Ywovn-U@_CzL5=4B+
z)=A1ef9TR}i6P5fLmSzUYsVxG{>4{PrzJNJPP?P&j>>NR<_!{*d*W#3^kvjKhTh4Y
z2T>Ku`;;=NooWG>Nk5u9=+0V>xYUjUEMAju(I~vVg=E-OZeY!6HFRTA&xkuqa_^rt
zZ4r85u7}iil~&V42z%NogHf!K(U~{-#oAN?Q<5<oSYC#-RZ4km5K-_r<@CJ#fTC*n
z18WC8R;iG60NQnd8h0^aiw|J^j&F7NTugKPy9>VWIN^9{R{r(8@x)Ff;4ZOoYDOXl
z7J>zNcGrfzjvV?5#CZ3^L}KqtRm%0`Itn+Ek0dpZo(Y5YHS4~r8QJWcQDso{G55R^
zvT^NbM?{ubBbUUW>#&B7=MQ=ZppogvHA9_B!zGXLTPgDsi>2IX1xY~kp{pc+-p_(d
zL3SnyGXXuzs1NAE7snWun?)20z`LL&%JuK3=oc2x_8S8>=QntHUw5#*oS<`vkp}iD
z`{1V3CopG3OtYTlgre(kWTauuJzXnWa)iEsBjs~=I4OA?Q<C<o2IKV(_lZ}Y!v^4;
zn^#f|sAK$Cp*n5PfcI)3lLtaf33i5ZBknIQOd&!h?U3xV<I{I3(;`IHE=CJqNwc3d
zu$UgZMZ5?}OGW`cbEc>0<0-^Bw}#u`7iIB-gkQ$YXPKLT3n<s6t+(VrR&wF<xV=g+
zx{@%hFYl<mtF`d4I59W!>D-47vBIys4S7)E$eHVBHmG@KqBD>BiF^5B_1K|6m3M3}
zx(=q^xp1`Gam~Oz8_|8qO50-EuLh}$@5<r~v^&8hWw3lkrZ*&8pBM3{rSge&U9nn9
za=BLJ;87y*A5zIjoeTzuQ`9$G=q|U{w?-Si3Vo4Vky8#e)|&D;os%cVhbc#?;ccfb
zfXf&g+KgE%WYxc?_hu4fQegT>MaeErMPa!<2#xUjR?Jh_eUM5w{zUrPCVD_a>Mjz?
zPfMtK@zQ*#B<n_YQEs*n%ecE+g9^w;=VI>PrP0Az+{P+3{DDfmXk&U5EqZ4-WS&Wf
zdpP>>^oZ{I2fxopsucmqRO3~tIFNWwS;Oe6^Xh717!xZ&M7WX<$KI*usa584wX^2W
z!mrBXs#Wmzf@CHYUU+<~!rad&1yzG1v{3LJ8*cp0eAggof7WaKqkgehHuc1UVr=^b
z#trzP3FK<`kyBkbGfA^3+K$eS!oaa%{P7s;ZpPS2wNczBTN2kM$(w~qs+Tt;YbN?d
zfeIq;dlhVk;=N6TX+UsDZZ@6GFpjXo)x}xBZR<g4S>>g}C#xLqVkhD)3}*9d&@L-*
zlIw81EZ#*g<8tY8u*9()&O)aYpzr(LE{S|cT{WWmbH*hD=wJo$cPqeYcf0nrQL0SK
zMc={7?@&h)VtzD?rA$!=HCMdv#@d@%ssf5Cm?&myVPSf#$_F#%cTW<R=3=8^1=w@`
z_N-)*;$jj8%2VYzU<*3#8fp=jYYiB5Vkztlyu#?zC0BXhhx?7Bua&X0$9k|TZ-qEX
zD^NC_r@d*rjG*-GH_B<bfIWfq4?2aDNmtWF6Oxr6@&K?Bw^<Q5S6AAykBxlPCCX32
z;^#HX3hU`qD(Vm(+4R)e)+ET_BmuwSq53sW$s7F~?kgg(V{R`=Rq+d1;|-HpQ{)K7
zcsMYC`G*X=;ne?58^h>fasK|!=PsJ))+M!3=0ktOYeL1^uR*32qAbM@g_eW)jF&Mr
zZGUhnY5e97ah!V0Vla*GQR()+G|)awoBM0+VgP{BD>-Wj_$@p|W-&bJ^`8r$2s%qP
za?n98gw9VX)WL3)8GT>Zazp(AhdC>u1I8%^bV4{^>B?=y>TLdOi0U+}>~_n2Fp#1j
zx5d#P)(!h8mdLpLImXYVv_P^DBL=wxmlj872IwjaEfEFuX*8RRS1lt(W?)rzG+UKN
zT80|k5y^AcT5G}K!x36sX(KH2AXY=f`-_nvcm)Zk!~+TJij}Sv)I~Tgy_qfH=`Jeu
zklnnftSwqTP^b`?hLbXjkLilBqTCV`IKc1C)XF5qq|Ss3++>l3{(kzy`qA-=Mg4`{
zoLbX(NlUXr!@L&i`!c`oNb}4LzeVIL;>ajwyjI+i30+3~E(yNUy`qa@IGVt1zhV>y
zP8rs&#f_4~=4%a$Ob$MXev>RSqZ2~6>$RkQ%H9x@&Un4j97oZBM>Z-nU}jl!Ly;3R
zATDCo_Ko9Z{ZNbtsU)zAQfz}+f%S6o4v;2Q!P{%>5ssxw<QCWi+=-5hpKt}jwW@ry
zU^{oke(kUE!ec~bKW&nj^2AD}yVXU>{IR~D*eOD&c_Q{ie@c<TyiRX#;JWw52cQ?N
zk-p4vHasenFMAfJ);aKQb8{8zgyP*#14}Pm)boC!Q%#lEnbi2nE}~#ZG{u>#sx-!9
zy1z2|;<=uvW^7}wzU_6Tb5x7m>`8U<%`m#soc+{gXfhkVe@|`HjA-%LX<@%bF5_!n
z#5#Rhr)WF#KPzKazbee@D5d}!+w`)-s`Pa#Tc;3*@NmiJX1UK|be+M(g0_peN{FL<
zB12+(W0lFy+n919SU>zVMa_B?qy0=BDOWTse)b5}cW2YtUfn*T7gdv;UPV0W`KS=j
z-mAUs!XGh}DOykIH-<ElsxJG`MP!1#n%qCi1x$htK+ph|TVpFN7Q4b2sgcXO5wDMT
ztC&?UK9kCA`B5T99tU9hy9j&TgN{LSy<4XmP!z0I{Et+}$?z&S@8~nZktwQUkKVG{
z0bN5Q?j62Zhv<xtoK{-~{J_uT92$df7`QH@;YtW(sVPCfYSIU$j^BV?cn?crI%ma6
znCO|1hjHJa^V!K}pyv-|3%Q1Z{uFGjo;(r(4@>jfeiz*X#$tYsdyMioo$9FClEsTy
zX>o+yYPLO$d=`v8R_{mC0^Ifr5o}dWPkhjRrmSC%r@wkfqM70bB~N2oG}=+|yEJU-
z5AjP}n^c;VY8gZxym*|}0|5t;Y`4a@8hbwqDqZ?<zv&z|Sw6CEIi-V!AF2Dpc@Zhl
z#s#`&BIioqgm_U8l?<1Z`ns5>UF%R1p)b$NeeiOy;-bwEAGpE@g@~-X#OcGgpQuG!
zjRC%iyuBl?1PZVHyWzh#x_tPt4EitKc3WnD&ltf;w`L$N&xfUBLfVV?dH5dqBbwK;
z%O1Cr*vqC&?yi0Cl*70cHPNo8l1_XyRynuK8v*=>(#<!dNw-xTqS8Xyruu_4M`#Lu
zi9Co2x!*ygI~sALU>3Uc=7X84NqE3cYs!zd+Wo#1&hOTF$jijWmOj4iM1Z0!#?+fX
zYp~w9?OlPpz~j8kk99^AY+_fhUrYzd*cOcH^6b<>1}Ji9|6W^jDEjS~<eYl@DU#;i
zJ8ULCI%)~_EDeYb-rx-<-+gSEj(|a0j|lFWGM%1J$0YG0xM*ENT20L;?i5tn`uA~T
zt6W<+R&I*tq8!_UfqA}PLWgS1fD{;V7Pio4+H_!{Uo|^czuW>*+*cU&puOTBDO`d>
zVfm1SoJ-ciHrvi=s%ErW@k5=rlW&k$*RMQOx;Z_zDLlf`;pdGBUO`=D@|{$xqw^TC
zvMOb=98TKw+e^P0Sq}mZe-q_0t$hVhvHWvU<YHoCVIEgQsw@2N-o#Cw>;h?jjIj+=
z_P_FK>`~Z_tBGSF#D$y?4EsA*ojA(~b|r}6doK)}y&<e-y_t9|qDIk8kjjAeD}E|b
z>X@DFi&>2r-ubWJoqi%p$$oZ?Vx+=YTI~h_JJ$R=L>nC5v-y6Kg5V%;yUt1P<Qdw#
znLq;_7=w@_Gl?I=g)C4dv{+|_smG3iUoM~(lPP~B@^_t5qrTY_fA4>z?q^^3etiqb
zh|t*53pO1Mw?dU!?q|RJpvP0_z<>Qzxf$lY`s|vUEtKMR-Ew`7zC%*`!D1##d&CKF
z@};4^h}zu3k%6DF*<$b?V}nlfU9#>X#2KCkC$}E~2Hnw*W*|<yn-c#Q+?fxLmlFy`
z@_tI(@AwEzav{5HCgm*KCJT;0iWvg8k<6FlY$(nw-4i$k_1(0P<+Z@%60%&~ZO1o>
z&^IXu29oZ{ehr=**xqlxe0M1<G?Wye&=1gg*fD8(JiVzsOw#}Tt*TAY90SY$V<WDz
z;Sjq;_RiaZvf<EaOuNgcfX#(SS$3nIXz!_fVo6|+O$gZaLX*R&C-SO?*l8nzX%n}E
zfJyWsx(0~DJcXy$7RHzn!Hzt>+V2DOZ@$iY2N1s?i?e2^bY3*cC+Vb!PIYEzKkw5Q
z&W~(DDvMNoH2YT>$^W&oKpWvpcS|5%>LhFYwKok4SHSN?l2N=KHH{7kk537)5jfsH
z({KH&6E;0+sWYc(vNO~+)J(=MX|3FD)JifP)h3zM^N)0gPGM>E6p!O@ym*)nr1I|H
zqG8e{N0*pgJX0+sS1o+a`ES{P(Z(@gCE-LP;3g-{q^!Y4UAHWA01p{pL)K>_ve8+V
z#3gJRlO+2NtlJh!auCM6+C!|;N2*G#?7m5E{*SU8Af5t0v5v3<_hjV%GiCV8ej{VO
zGSQcZ7#h#}#pAGhYyV6&NSd+=-8+Q!!J*}w=8Xtl8-Wj+b)(Nu19ds{J3@NPo;rY0
z`xE#E^~-0`#-ns=)MT^Glstwepre6nw3pc)Tqo->44tlV`Hwd@jYB03+T`)ddIH`e
z)fXX~nLFD@c+ZA?yQw*j%7OgQdF&Jx<OGumE6|ZT!ia~l7Aqk}w%%xTs=N>w@jYlJ
z7r%@?sJWVW_s0`qepl<W!V!N-xtwtw_e)U$IzD33$5S_CMeE%3vXn-p#g7Rap$3b|
zq);f<$`E`iUslps%Og498JA#u@x1NXELD&Ug-QT*t{`-iF~`B3i{lZyanR|eCGP7Y
za)D!fOtd0a1G+jS&t9F$eirYFi1kNYG?WGDJGEDcI*$*_ui}C&xWNstEe8U%@E)Av
zP|eGj*aFLi_wD5Z=D;oJ!Lf5CxLX?1ms#COOlcpo#M+Fx*)7KM7}q{xmu-IvN(wA<
z&~D%E9WtOmzw0Ev%q9WDhD(h8ByNtOGH&ZrW~&?)!#)L6N*|B*Tgyhg5Zsh+iqNd&
z?as_L+-dR7*vV%+4Wr)Z!D7k@CKiG!=uOH#ru`E`4oJo^+Ah1bjD087qOAgZ=hWBk
z%xG}e)u+L|$%J^vo)JMe{ZGth5ifG^sHp-8ec_*PhQ}OD0g9%0TEbFZE(I1CAtJC^
z?xO}!IQo1dN_q?YWX#yh)^n=qW+#rc@oIxBFB)4<^NEST>AqrImNhVeF^ZFTV4?Yg
z)?7?MmhL<@dF-$jld!9qx_9E*N=ZT7x6`6B?D(dspC)~O_a9*R(+9$X&xn02q06To
zz@N(JOrYelD!~ysg=IXUo-iV1aSTSs%lNwd$h$(AR;%hhmY6dQwlKE$!&5$tgT+9r
zgfh1EJwai>rfNtcB@g?HKB#iI)(k?+;3N-I-kY=cj>65@5S66EBi-_dkF4QwouK+-
zl^v&)R0VaTUsb`Zy(Y9S_x?NKb`n3d!)&X5Y-zw*UfJuL?@4x}iI#sXM$qi5dP$kf
zsz%l)94|}@x4`J6x}idYE7G<`?v-KuG#3RN-HaT=#96{UawQa46@zCT@PD>eCl6{{
zO{W@WE7RV`W3PjMipO`x9b>njf-Zg!Shn8XnXtuhC_IXZ6!8OeSNo}QHQ$NGbHi<7
zDi`J;6BJu&<#OfboGX^QG&`1$V^m3%5d}ttLSV~B0K_;Qxs0p22zL2C2B+*Z@8a&q
zJAe6wj?jEbDz}^?m%u|Ch2z9G5oHlEY~#`&F!-J78^>5_1i(eCqXvCf)>-;peKQ^u
z|AuRAeU$##-|TlI2u{Y)2#4T?CKj3IL}^(mRX?VFd0ZwdE1OL`)DLUx_}GML1?C^I
z$;%s0|G~ZzYBS|_N-Am79Yz%M6#Bk$v)rf3n%pRf=`xYV<U`6jG1);VU^UKo(YR77
z`*-lY9A5>z(Dh76^}~IxJ7YGnLfU#Tv4R9I{7%>@A~?E#z?e6dNAdo)OglNTa>G>l
z8Q_AxvQZd%ck<*eC}`fWt>w@l`9YuPdu>7qS?`xFxO`hHZJN1f|Lo_7?-1`EC=aNM
zqp?V(WB<iaKqw>CCN_xy_auADBa=}ZSw5+<kK^z&gS&4evI=^xq~y?Il5Zw24_S+g
z)Vd=`HJN<=$-c=YsfDoH#9_W9DgS#QZTxhm>Rp+!Eb>t~k6;r=z9VCe_yB{ZKr$^M
zJd<C4SkKXRXLSwNqEo!Add!=N1<#kR`j=|4g_&9+uRfRgL8iYS_t#MtX`9XGnSjsh
z2X#nGZNf|$r9`%izl05;P2m2bp3e0H-o3JL_b|-7Of0%l>ey;s*M??Tw~xs?;c|;>
zOJBaqlCH}qIlR$n%Kn=5QNcAw!nuoPa#B9XavbTP+4J$dO0A!Qa^6qC>aV1z`BESc
z@$5h0<+QYC2=O03UkwO!#G1D+PP};DXdh`sUB0UmNnFWe)phdB2qIuD@ma^bh5`HV
zqO-KXX<ne|u#94vg%19e&ZUb{oy+fh(V3>E$8Hnbn5Suzxn*)&;gQJ+S*!Tqe1|Dg
zs3lx0_azq$5HE7k5qUoen`(<^AhelerzIe>(XoI{-vnhkwhT@9o%0VRkKxMJUnY`T
zzJpSELm#1dQ*GXE({CRn-(fCQ!K~y8$6LMWW|y<S&_M`--d7{+X0>z_HL$SKhYaA4
zP>Aw)Tb;>Ex;MwP{8`*^`ZQ=+&+$wz%BZT=7^ceja9Md)v084<(xF`QijQIC_EsK8
znv~R7wvZIGQGGoY)eNy5$@{Vpr*42*A0)e%_m~u2TaGOl{8`~T<gQ<;2Q89Ws~GtE
zs{V5LyM#(Go7CE35EnUDol}eM@3F$VT=NY%6#qdl%vib=Spw$?dkskkV~<%al9LZh
zixFDIy26RMCIw*P+$-QBK+g=L`x_m5++AU`pYH~k1*Qh`>VD$;Zt7XSq<)k_AVbGg
zlmh)NDh0ryPe;#%Zc{!lCNrgDi8AoWcl->rwR-U9({t}H@8OGY0nkKMn|($}8;LyW
zj?@RIJ?1ITyzFmxLF6*j*;`lEpo*ly!uY!F+n_@hVq+UH>@otF|9UQq+hMUTVk((T
z^34?c@LhpRXgu5W=XzsE<b5uA%og)uE#0q(s=Cd9{Y>GlqS(9l?2ebG%u+^+|3q#J
zxb!PgUnFZUZ?C8YwAKBic+hS$CRK2gy`pKzw_MykMu5Gd?(2a0h6W)4q+NtgXmNOe
zVS@Yao-LU4{<)@_y5Pk^`~8A>{QY{vML#vOWIQ1n4@CAF4HAHcd4U8SQXD&Bo<eat
zJj~lQ3Uo>#b9u2si-SAdc{kJ<Xc3qAf`iT1NJoD{y;t(A_O)V4>29OLPKA*zq9DGm
zG1=#8sUzS*bD(CC;$nvOfMDx|H!)T~ykv`CLrBXs*@61O=LfS)b1I*=p<s8eA{vow
zJ2sJTjYY^a3=;lB{$jEFldGYd%gJjsgr)P6^%L`;L-o!4AW~_x3?-*p5kj<95oYCW
zc*vK$8fWhph+*Mh49loRu}XSq4(6)mGBj>xi))_X;ej5E8`<T43Oeg*PK$&;wOslU
z&W!#+4r7}*vkgo3TR<Pzrx+^y6(u?IhF;l2!|ZrV9NHrDdep@WxY%mp%0{#A>Ot^e
z=xKs|>&bSbN4}_S!O>Kv%CqwQr|yZ~Q@tLGo=XnQu#EaytNd!ClfYaiu}(oPqXVPM
znSe<=V-oA6N^*m%KJ$wR@#x36USbdG=?}*3>my2tSO@R?lCp#ER(x8j%W7S94x6~e
z6q1EFo4IZ~jX#5Sb)&W<=N4v<;2}bmp*29^kJq*3v7qv_sW`ByMxSS@`(#(tkD%SO
zUbcw-SmA~~eskG+_e3m8ZIp%L(4-Bl7Kb0V(gP-L4znf&1VZk`Ne4jZnso)RAU_<k
zJ8_-%xS3R&SQSj)E0tr-r=OC#gjiT^zA5y$sPo_zvzl2lD64HOnWDC7(7WgpZsY{;
zsTDsCZIUiYXU`ID3Lhjg_cf(60Rq<LeJ*j=4~TU2sdNjO0R~`VCR(|Gj@-!)Gj!e<
z2c<uyhPC8#jcD(y7O!OE-Od`1Gz7ji>G<#)4kn@~T=m2gVp;Y$3_q*b*Q#v0tFW%=
zr_EJz;P4l}rMopPjK3{C>ZFn}r=pof1r{<_<@t(V+7}Ng-0Kq3T@n}jX-K-^-sffg
zdKQHlg~_Cb*0R<#kXD<l@U{(9AWM_d{Zc>Ynl*0`6pYviZ|>e!7ka3$kJ7aU+SgBw
zsa{xM-2tk#(!iWnJIOj1mNtueFX9!H=9H<k@EZmjf;mq`m5Wb*l#RPZ0Oli^B}eqd
zG9K;)KK&pm^Y03}K8){zlEj(qXF6YKei|i*djB$_x-U@!sKJD8GMV4Er1H>h@T{n{
zD)c&~2tYRJHr_Xiu(^I-G`sL&=wBA{*`yl~Tcm=FJ!Jf%$id!PyA&jnhy88V%lshg
zaNaon0m@j;KBySojs1|0_-HrEz7t+I;a>VTN+1%k@=z<5B%jR3arU@t`829ttk{ii
z!PxoM)&ho)qn~#kC5|oaA~zq?&kj1&yrWJY+7#&d#F`=~;$xRQFYQVygrNhS+`gqx
z*Q`!ycNG&YZ<m;|#9G!lOf!#D6k79hiycX+>?IWAI6*q%1?+#3jCuZmXP^|c#qlK)
zDGM@x*ceuPxmHZK@l*Vg^%BzFRCg7>?86a3URua3henkd>5D;m(mtH}8i=ek?)r3}
z0eAg?Pqs_Iqx_n>7?3w<Lk#2Ai8My1wTU0q?=+H=a-%IG;Sfh%O}^1U%2Hv<w$F14
zpGTJT0%4TFodRq+nEF|5S-1dKTJ0{}x7ZMS%H3$=N3%tw>u@Sjf%d=;k~%33K0jl+
z;55#!0>(N!#Oz+O1JsD5uGD=llXA0)GeWkLZFocCyPdh)w9onUwuv}<s)yoIJB3!y
zX95(0J)c8Yzi`Y1lnFKtdC7Smq0dD#I!jxAQ(*LA_~k4vdDl&JpJcTUXuq0r-;87~
z-inv%U~Y}~+h&v)x#v_Lh;^`;`pDFFuSfZ@F!F@F(jBdn>C_s^(5a8M|6bH^`Ti_r
zdC1UfqW9oiYe})3P3gR4hJT+s!h^i{L?~@in(ZesRrN*{Va$6TWAe~G-5peqq1Qh(
zX+9?FEPqT|AS{}NH#CKTdC^4ovfQSSG{wnPJmN0?fN}!ZpLRXf-jW{=YJU!1zG+>9
zagF6jEPePqo|R50vYk+egdwgOSkLo=@`Grp5;%)CjkYG}Z?2z;RgKc;ZiX~clHFKJ
zx#8J=3~!?<9b-Lk)x__86SpVdB2iS%U8_8D{(g1B|HkcL@u>12AAKyHzHm&f9K{(I
z3IoDKfeCVEw~ivxD9)LHHV?H@*YnEum?oHCOGElyAM0*ceKXO53!VpC<%M>2gg8Qv
zmELtow@VrwpG>GeKjC*-&3R8$E#PDIX$a9bAeC6zyi4`I|3$-Ez{AydB7f^;_pxXy
zO7-Gs{4D-?CGJh@1ck*0{=2q*ohz}kHn&Bal5F{%ptku=`1QfV8ls~%hS$7B#VAy<
z*lhUS?dEqy4}QPHIs49Y(lWwfcCO3R9C+sZoB8vUaPp#L9sQ+yS(wU{d%r(XiP)~>
z_WY+c-*rz*>~;7arqNK2#__Myy1HMT_Q5@HG>^nb!BSm#7{Kpk`&AJ@CDW;+xuE}-
z;9`J@`Gs}LfVfSWUn+&>GZ}$8Id!j=Fa`3rwc@+xEzv&|<dcs|HD@uCSuw+Zf))0E
z_p|U7vnMj>a1d^zrQ(XMp}~wmdzQRmbX9U=&RIA>1Ltmx1M0Oq`OVm*K6_&mxZN=0
z8KgdSyYBE~&|%HW!<RrZ_Np|7sYd~Cbxh@*ZJc2|&?g$u&%M8njd9VV@~dB|?HuhV
z4o;8;g8=Bx0hZLAbj!`69gG$;{&yLynZGFf*mNI>CL{nq^dsHE>2ro&D|*J}lTicK
z7rNpso$&jkb~Sq>^J#efBHEL`nT<MH;cs#D9{Z`|!3CdaAv_XB%{_GghghtgN8)d%
z9taJ|2pN;lw!g5#Z=O$LJJ6fP4eNi+ite>&>s?ZD#Toi~SC3*!HcoKYJmJq}_?JXL
zGH1SvNRY2I-{3SKf35TgF-Ve6W1bR3$v8jqW^m0UgkXx+_c#Yh26o;D9<$R6K2-{k
zTL6pca6<ad28|7c2KuV4J6qNGb3O~B9j2hw9D$z=4qV(V&Ud6}>pOU`59v~@u05>Y
zvPIFzQIKzLgwuKY=lKyV8ltCA;?a&I&#hNR19A#><k$_sT{vSEI&4n;3UvN;A+qF_
z{rvBE)|iZUqOx7C8U4V4iqoFw^~dBk8GdGxmqGHDDouAdl@!7U44{ks+@tSy42DSx
zd(j*nN2W%<^2&Ozsg?I6EUI>HG6js)d^3T2w8i?=-31Uj#Rmr-;a&f^jVz6p)2N`Y
z9UEV$r~YC>?lT`0X0O}tHlwGzvFHr;z-1K@Y#KCBJb8|$O>Dd3A(H$y-1S6+?#g8N
z+icq|El1M9yC~NEWU>$IXL)Jybv?|Om1WvGrNLtU<zDX^FQPBkUqA>R^}cm0?2m!1
z?vSp!ZEC2xK$wy9Zp&Zz=HSzWkG9Xsk<lKJ_aKDrUgBn<;FNAthGqu|?uqm`^nlfz
z!Ci`gqpjS<S$l+Ks^M<jK)BT7M&144=$6R(=t<L7+yN5Fv0k(-tC;%BTwvHXngzaJ
z^Es;M5DT2Au9c&FFhO#SZmn1}VY=j#rOFq&=en!PTaV{o_=Y}ktn$#=XsL>miKpDY
zk2d~>#j(p+aUMAkJaB1bgM9u#uwNmm<Ub<poB<zbtN$q_Ewbr+i@<<kG%`q{_FbQO
zx4E~z=x~G$kbNS}l$}mv47)1EzEgq2LI0~g(X+G;mPBT^oWi}0=AJJ)jN7gvv^vKA
z^-?NbJPE(w{qD!o2#Tu57^BYLa+gplV5CV9+{7H!(N!i>jmU-MwK;Y+n?57kM}I51
zgU|0Rz|4qhHzlVV&a@@BjKpe%Dr)+v4AHQ#c4+<1r9r{L=o{>D+#XMcMGpv7`d+I<
z`0>6%JlQ+@%c1Gp*T`|$s)US?p*%SdYCllP`E2(F{~P%CXj#lrmEco$Y^fWf?ocEc
z!DUT<cBs=s++p#Q96AyQ_waA#dQT$NVr|<3qE$blgJP#9@9C!*?apVDfBx-{kAho(
zdmc{aA^F25k679@10$HdIO~crUi>S?ky)qaClaB{7|X37g%+cO!79~)Jc<nO<XDXO
zo%X3s(ya)*wsi8-wDX0ygj2csXeYoUt@DANi{o2`fKVlin9QBAHfP`N&dcPg!V-xS
zn}hl)|B8ZV5T=r<fa|gjf-=Q<d->yB_Qkt!$BcXEUyziY7Kzg8=}-bkPmm1Ih`(4$
zgK?x054rR~S!K&`V@j=9U?54?O%sx77pp*yKZGQeR>vjjXXLa8i&I?;ZRBf$G|vBa
zf}t3fz7qx2!q~e+wJP@{!KpHiDcrd)CA=H#j>z*jYqMnzr`<5Ie>;#)3?tY1XQQUS
z9fGgIFHWJc6Z6xxuEj%Tu`_li^Pr>(pZSue0|G*C`S2^P`2ky)_AfE(&l_;0zorMk
zxs^U-S;UzINRCXP(<F30?f{_p*zEM?=-QrN%2(=~66-T%_PxtUml~}RUkSu5yVHG;
zqsyLgw8cm0v6WQjX&XdUijPm*p=seBi5JpZ@!h+KYXU^5%CA@|eqHp{K>F*LMp_*D
zHM#GETnvlrjpIW1devq~g*$A`YV=D;JDJoj<pN4HI9XxgBihU^_fT!3GmYwzV9-Wt
zT;*8@c3^4Dg58OyS;v~4yjHNw(Hi_@ZR+p(NJtp@_H`i1UWuVxZivgrQ!ogc3cB!p
z{;a_^>n|rQ2k`lDzlMMrpII%pV!d!z_gRJH>=__^=$$1<e2&>MP<)3~?ShlWet^E3
z_wvXXA^OKFWx4w==a7q}j@1BLaMKY?HMKwTLDC^K4IjT%Z`uY1OwxdW`vu_=jQkq?
zQk?*SIH7AkB$>Vz_B)h)!vA*6DL~^-4i5xt3rjVoN6)TeXV2ws7d1#H!N>(ndVu6C
zFHS~BE0A2}74NQ37C8;+lPdCgvOVOY$Eo9&fYo?bADua#e_;3vqysNpEcY+CwgqW;
z`GhX5Z9hLci|%?|%y|8qI)8POz;BPLSRVKS4#d812c}A~io#|dm%dCdF0oB795ZnL
zaGBbfRoQkqe3jDJ27VD=bDp`=B&D*T>+PtWBL7KZy_gy=*VT`899SJT9LutHHoF3!
zymKbb+=<7!$(V0rx<~~7#o8nXpC)0ge&f^25#^>3joXy0)K9X~*Uu1kfJRQq8Eq0X
z>kX5JDv5=$Q$QEs2CL25#JP3BkVZI{NS~bKEr@UmPh{$m69=ZG&G*?L<MRZ?aW*kr
z$lB&oV@Q9rhcj+Sb14k$M*4+!L;2?+o%a+Oeh6}xK9?Qp)%mb;fMhGZ26C}z>@Dvg
zxd?;C7keS`DSrV!_|@~h>%Z(bHO=|!tbqH2g9ygBcYi5ae<e)sVQwD^=I`K*?xBqX
zfm^Yia!*$j&o%#Bl{4=NUq1l!FOR3^S?;A659-i0@{rH9E%OC5fqI8+MBRx~>z?>E
zD~b!!Uy!&wM&Cy}#5)(QaB)kptg&MfQKMr$vD&C?J-yUfgX9IdCB<;=<$^i})cg-M
z{MpHy{{ATd($Cs0vVn@>lw^TNPV_n<Ttg;<Xa7JA)S{Cf{!C~UXugQVc>VOr#a;o=
zOYf!QYd5S+Poz1PMl`iIYpSd!C;L2Pu}<dU86Qt7e?e&fnD?{M?V`8D%HbOcqdDe3
z*x2jF)4KP%PAju$>bLeQMK`LI9Eb_CJ!pO3YEvqJ`SWGa$Tk$atjCrVoq-S!5==yX
z45kJxX%T_6{Nfk^;z2$mKeU+R9yKvJHsxuOJxsC#CbwAwgy-4HOy>}pKAPzkeSQi?
zmHN!a+e+k9E~81OMW79VE!0*)3MGw`_{KS&)2Bokx2X2Eq;9+BwiwI=acA}Ryycew
z?;#~Wj3)!@1Wv$^D)Do5G==px$haFv7Fv$@lPw!Y-&PzX-KChpQvhk;)Mj508xJ9F
z7^z;AUPm1oP3Kz~dMVH3I|67se+$?Pe+bC+zj46|n|;1II9*mXEBpEb|L@WuUU=Ql
zuw+?$gi+A1Pqm+~*-nFzL||oP>Mw4=b?hwHVb%bX7q1BOAp>bWz;16X_`>WP6Thwx
z>&rNVcx9wn1X?BrrdhvBS;xV1`c5Z=M5>tV#p2kL24F$JVzCyHMXiti+sUr4Q-Swp
zDMGG~-~Djuh^|f)?9EZ80~UPbGIVAx-|IcbSL&>Pj;rWA^V<{i$2muCP%5RMc5+k%
z`^gjprWTY{VV17_*uUq(5_Jh8>V>L6>p%b8l*vC(FDSiY#|lNW+wu@^O5;6t-yj7f
z4(q(7Jv;K`VQ422?et4aeB}^%@<DH$3u<%{Z<!7fk`g+M$x+`1=-=N)PhIwA9s;#;
z#i+>Cdb@wz6IwN0VP(N(|M0uLdqfe9Q;P3m?<P&VLSkcG#OsPXNCbQr(3BuezuX|~
z=wV{NdBB89yU8hf*+t60sb77!ZssCcnEmXw05vwz^6`j5>VHvnmQihWU9?78D8-7q
zw6xIT5L^osX|dw&?(Pt*#oZ;qi@UqKyGw9)4^DE^?~eQ9j{HqBlAP>y_Fi+&XDQfO
z8*I;CMGHNdzwsh9%24^;Aa;kB!p*hjtmI4GjgGja+7GT-`t?OJ06<-s;hF}k9x1s(
zpxZsIos{#1yQ61P%`%S|Dq38J78aOhx{9^Dhy2^SM#A^GY&wy$5{Zzo&7{AQs;`p%
z=UZ|ypx%)B^!185>M!`2@BsTf70>YKV|v|3S7Yum3ahb}T0#GEB)zAZ<haa$i%uBS
zXa+4?k52M|hmw)ucpTT-_mEC~^jZ!OUY@`3AoqOJ8vMM){>Z~aji+<Askvt*(f-t`
z8@~}SILPLv7}0yqgkZ1YYfWkp&{$8UebGw**P?`H-cziL3$=XeBi3<sPN=8Nkn4bY
z@uQWEJWVr~=A64X1h0JLvNa)FVpONx5Zhxc!P1lfD>$CD`Yy9~EBy$#)4%x~TNrZM
zYYGXp$uIMi;kcZO=Qi&5D?5DXtBGSK{f-Pe>q3JD`#zvj%{E=Qw|>$>@BkQSecjQ!
zVEYre%kz%vd1-KlLHFu?XXo@s2FgJSH;~;<RFH2v(Eq9jjgq{|bGS5`?LlKtpRs#*
zvm42XMh?x6HXN~VGftY>!R+t6!l6foV<iIBgJduo<(xm!!oNEO2WO1UThmQz-#b#@
zE+cwZ8jx0k*iy>2?E{;Vk@pfzeBV1_&}jhCV3OmR$l?3t-7{wM$g9EVRTs~uO2;c7
zrM=siY#Ok-gi`uhp>bIkrtmr|P8!g7h&oTFEiqyVyoBw54Z32Uk0R;}R!1FdndO46
ztmL^epcF1chtT)8#We*8kZFF9UW(&wZT_q<{AYQDE*1c~Lw2b6z7z=@NI+{8Ezn7T
znSlU`7q)V2q*`GRRzm{r5EvFAy<+b5HuI0;5LxdQd@G(Utk9=Hy=iZ68wen~+Og%K
z)%I;rsfKx$`AzpWlq<;#&No`Tz$>j|u!P+_Nq9#>WeTaj8e$a6hQc<KdjBWyWy617
zQq}exNwI~DP`C-ek0tFLDv^7{H@$^H;Yxc*@4cYC01zIHXr<h^igJ7KU|4tV7gL^q
zuMnd;zrdc9u6q&n@h;UKl5P=oc%a885s7?+S9{{uyHaY<)l4tBV3UDMqD^M*TUB2B
zG@keYQ20%c>IA%zm(+mr)DAnk;L@wrTPYc%?y(FesQ}Uthp1hy+@tuQ2iisf$%0W~
z@~~U^&_(2JzfEF+o7742cu-;7IATTQVUm{ZRk<n;W&66rx&)nil0PFjWJ_8w<F57=
zpi1E$_y$=8aH(8ElE#V^7TMo9oSW4~8xX>;z$@_g^|nx$LIaYxpKFPi7>Y1Wk0P-_
z^R8gYA920*bxKPwN<`q39mJu(Cz_}0N2%jp_)Dxr`-y45Q?P1FVg&)%ZG6^QXu`-}
z`a~zMWi0R8R&KGY7dSef>5fw0gojGDV+4@oeXNw_NaJ6Qcj39%YN<U!JgSap01h-~
zMftkZ8xTz4s?PUv*?Ypi^;;gvLx_DuYwMqBlAC(ju-{6F*PYqhFdtw7wF&IMjstF6
zA{^XC+Ibs2Ei`uQP0)i8JyH=bbzgh7@Y-N`!+Sn2I~^DKR4YTN=+G6Ujjz3k1{DV#
zHt0)y0xx}NoBm%Bi5Wh&b;ottqG07Z5x()c{vLSsm&4nG*k7#s_$`<bZB7HCib3u@
zG3OYPah3uiFnGa{gUcq$SY>~5{J&%)UN|xBU^mQkajNMF3S<?!$c%U@OBFWyf+WoB
zI7>g?Z@Brc7)=Te=;&o4v8p>NQF!de8qM6?RxN)25UN4Chhn@rzezkg_zMQv?kuQZ
z)J*4J)jTE)ZQM{?Sa_Z}cLlJ>Beq{9{QXZP$Sv|dchsaf!xGhIvt?k3CH}R@r{%my
zf#1-h(s?fauutAZ96B=(Gk?ABA-j<CZJTZI`{GrvQs1++!BEq#R_3X1&sYM&3L}bZ
z5cZn3^*j#}vxsK8u?(sZl>h9($wM{PcSSqd3N_kfJwg?_MD&f<1240y3NESQJS*hc
zN!Q|%>P;^PDd~$ND<B_adf7frxOMDAg{=GGk0zQjny5tmS1n^<7#W#|R=r^<6M1#b
z2dxqHa}b*D8bPJP-&a{NYClg)#O8ftbN#iKEwlv%TMz)W2j*@dU`mj`{DN0{LJroI
z6fxf~Q};);*0Fbqn8E3}29s|*F7?*P6AYsc(#86)4sU`9uxVXHg>Su`a;XF|ap`XZ
z;hpzh)6^#VG7qSI0_M&FFv`S1O#V-yJ4<BB(*Pd>m4S4w?oMLp`5ME3Rz<){oczIq
z$_;M4Z1rX6Tbd}m%8@s8m5%y3;qif+;vKv+j};;|^%Z#lR8`>s9O+K_N{EJzQ6f54
z<}Hg*j!N5X5MaiXVY0e|{f>)n`U8OSY9WPIZ<rP(SS%gqAObKU<(&}m3cmVgv|IkV
z8q1v2n12)ei{GaOv#=`+B?pCvtc%K^u0w*vx23a>rpLb+-tm&yyxBBE+knl9K++B$
z7$_|#-HlX9-6OdwTl1N7-!`aSEuzauiE4Sgus@o!8<H5QkX42#&4?pQ|3W?vXIps4
z;roqZ9fQ0c&U5QGsIaDnypZj)L2KvV^Sw$kv@Hj>QU|Y$Sk^S+U=nPe^oLw;w|lti
zj>+(Q@k;bCe}7v|zfSP*f>v7>a9jrA>Us&QUR>^-M@2XvM++X7#oI)sf)BIfj(Y@>
zr267Op|Va6$VMrQz59eQr~<qb>ajf(FKZKTer;$qmmN&Wx0Niy@tEj6-L*Z3;8eWP
zXuvqfQ^=;^(O-hSC!co}eRoSLIk#WEU9BoGki)Vkjx6tt$-2)GI6%5dYUXF4RV~)d
z$P2spo8n=eXgEe%rAWtmU|ix;9)2&*GSMZ%2ulC*(d>AW!_K|-)f{P$@t;^hIh~$a
z)Rk;CKvlI@PND6~N9%v`UxPBRy7%!4sIm7aS9Fh{mt{K~-V8;T-21mygyoTvm0^3D
zH<`fpG({g#HWQFWcxAP+Mrb%n5+|m)oW6|K^q{aRYSJ7}O&KImF$=!x^AVRXYFhCd
zBZbEr19?b+LtrcO0hEQNWgwU7a&-DOl&i>M7WI{hmAoz@=-iky!gDOs$4Ap=;5b|d
z%dS?#r+&UnXW_eD5%+vr99Y<A#ksT>6IkTzQ0G$Pj6!uQ;kCp)(I17l6Is*mGM-a5
zn6AptBOu9d{LtIDT%nGoOD2c6usf|v=|S+gd}V(8)8mNlIHHC)XTnI62S<YM`cu0A
zM6doR^aCJlx0SxJz)M-<PK)@Y<8;yW+N!9UV*>3;H@@$kRVz%nf@g*)^jXi5Kwep>
zG;MTLpolA}$z0!Krc2#O?uK&UBT1Fp0(R}^;=sM7O2*(O16v2dA!6?d5bA8eU-TCy
ztkd-UP^6Dw-!-ugjL>TQkW*f#{fE-yg4F5Gaa$1Q|6>6ZP{{aRcJqdcUA8eH@xIzZ
zEV-uW#z&|Nt2HJd7HNVydc6<ZjzyT3o&4Gh3D0L$TrNr!e-^Ey8(Y}@(oIJ0XT)ne
zdh7;l+%xXR?!+v~4=XDZ=i!mny41ZY_};#m8N4!G3LN*0vFo)d)r)T+nlL|$X_F$`
zL&u76)++NX0<|6X=~dCTkAUGj42QJaU4pO|+XGS0*r>cL;=taIcDjt&&s7`M-(RFx
zvrvc`j>_4B6j^6RrGIDeyXh6`SO!jFCWZ3QYv~$|=M%52((zf#c{%HSE9Z)Gfmz5{
zZA%LEScHo=*6rO~zrO{PrlF4V<S;0~RohUj??zaaX#>M{E7{wtZLnq-N=4K5$kyY{
zGL@w{vHiKc{+J6bWuk*nKx6G#b))21ePcHUronIHy9rSCs-BXx_7x3oT<*Ssp54r+
z%#N86i;VikU!@f_C3X;`v#ns+C`@9fgK-Gj^q+a}mvS!JE|Vt<hd)=U%%g7}zYC^#
z&oC?xdMs#K$?AQH@lNve?r|YpJM*1e3BzNzg9^xHn>~Ilc%OM&`{{#QL}>*{Tg9Iu
z$1?vk(8<c>_S>+fe@ETtb&uY}O#6QJtq)G)#NH5bmBf!MLBQSno(plZD<i%ImoKeQ
zE2r19Gf^C7Z~gHWMM9H-x<iR@n#rp};cK`m!MA<;i|8@KiZ2Q3p(kWlmJQ1>`&)FA
zqWWll6;=d`+P6-@xQEwnp{V=0k48~Pf-||`kc~u2*(()|gbfmw(lDMI%lFkBO+)u0
zZS*%o`nskhmV;mQa@l1Rw<he-eo_?tx#PDYn%+Xqk!n5GbUIvv`pp1HJnrIHwnG?*
z9g-&^(Xf-2xI$*x7Rcs4O%BIlPXc>qVm3gi(^LlH7QSs<(qb@ipd_+9{;SoPL9Cw+
z!z+p_Cx*7?QGTyjgG;ai_(B{|Kq4w-^dF$eDrVFkEM~cvccUI3`Hsq+UE#G_H0uNT
zaLV5EY6|J(iUp!M=!cocjw!1@{Ub^#9bv8u22C!OoWX5N3Q+DXbmCjDPSXV);McI*
z%osXM<m}!KsNPWeD!6w*DkVHI1Nn-{Yt-fPThQuzqIn><!HA^C>t!g<HjY6acvCjz
zTD;>)MJfEuGeUjF;7_dWzh@A=61~n6tpq$tX{Gjhdtc+!k>$T-)`fgk-HMopsdE;(
zGAU|4^Ag0CwJ1uQVg)!DFa~AWTRt+09$)F1^nDDGMAp8+D$)?@DWzV~?07HA2tqji
zbw_Xazc+u}gBG&Xy@kX!u<OIu(ZA1-4@-nf>Y1Y-)7Y4FDM#HHM1{B0r2TvwnBlMh
z4I2M_urYP{Nw_Qt(}Q2nZ_AF|gC-H@F{=`&-tvi4h`AeBL)`%AW=ZCEl0HE0>S{xH
zygSzFAQ@1<xW-f+bsO}*!PF3fSj1nPdX5ZEzq@vt_(M`T1=YPSK9PE44B3s6;^qG0
zdE0`Moy_Xt!OJsGMf~1c*b+rQR+s0hXYiV<!)DLMeVgLsH#{*s2+b8ze4^Ts$5Ifs
zxmVHhAj@=S=;(v<5!lGL@TFYKh-!H57cqvSqy8N8tU&J1S_o+B@b`W!<6yPis1lTX
zOlYQ=WC_wAbI0V*R_RCtbUZ(t1OQ|c@b+4nL0V7+Fw)QoK9~li`Y7=g8Y{{2h=bMP
zE~%-HBhMo)M8p$ZL^Z>PEiqQA4c|f295tYY-}ZcaVtG;%4t>on*Q-x#+$y#UWnrt5
zVgb>K!g86AlXYB=E_=jPXwLTgg(f3w$(Eng*GG)Qs^)PvmoF!geaee0s|q8~kd4-1
zsocn;V+kywWS`!1zN<X%Pwi$R=|VKz{I74=>44Idfc63zaL~8=+}d0=HbuUeuU?|k
zp_Xg+`1nckF|~5#;<{MaSLeE%%@0F@^FcA}WXMGy0sYc-jYK9Rw2co>S}g#bTj>*4
zMbY?!szqXb|2CdOKb|@x%XKZA5<F&`b&kHq(9-BkC5Uq_JiuHZ?|KA1m_LS}oi~CS
zItHmoPG-nF@h5?URFt+K;}JQd^9OqOi-zL2`B;DOi6p%x*M7dR-wy*_)Mcp<)ce*w
zQwj9oZNcq!up+a3>1W9!-*vI;=yRGQ%tQBQ-#nF;!_GrJpO4OsIK%K*&0|BUG;_PL
z`&Pi;h#QP);F1781=ptGZ$Qz`n<Y=wpnB}<pa`W+?nDWn(SvV+h?J<kD87XzPeTA?
zJUrxf_scrWU{Nzk^{aRN1A`3nG12z4@oc+fHknON36*zlC5ySQHrKL2rhJ+w1l&P{
zhQZU+3DSIlGv*IHf93Hi;Yxw|Wy4cM5KHF2fJ<!_$IUTAZnredC~q85uzpRgTKkBr
zZTgn{D8cl_X122z^BeZ!8U(K#C(7!@LHC2&0RPYWWDb!>&-xyHnqxIKLwq$srX!<P
zVZR)0XRJwY&Ox5e_mXl_1yvhtxAM(B#)o=m1oC^K1Zii{m$bS7meuJ^S*Xp~aev`(
zW9o%C(Uv&y{#E5x*ik$5Rs4ucFcG@A(e#lbr_y8(S~}sa%>@#c>N^f5xs6y+{W;;q
zI;3PkRX5J2Pw9Z#Lx+FDvHwP<`<sIqvbgeu^xyQOh~N#oWi(db-p3XU0nC7SiARsl
z%F#mNe2s{0cchpsiTcs|l1N&;#c-~>FB_T$96XB43YJ6Mf(mrikbaV6J4oq&6{vP1
zw&=37eNn%^b%R$JXJre>SZ99HO|V|#Ghcqng6?EgEwFfodNMv0qPo!r6s6C=uGh-F
z=#PUq?mGiU{-AqnG{Kr0bcs-E>5D3iO9#avuhS~3k)1LKLWIxM>8XB1gTm7|r5m@i
zz`<&V_!QWbl96TlD0`W7$49^Ud|woOni&rq%Vp^fRnD&AI6jvoZ<b=`0xleenDaQj
zM6|`Dauk!7VLqj-?<@+@Hc!b>;xri&d^6&(QRsuxlkX8sh)~tYLie$$uR(_?Wm0Mr
zIWKGQ@=Npt!YK8bBnk0+HvnZBtr$drf{Wtg##oZQ{#ygy<8{a`Q(Y2P*B`z?DMK_$
z%O=ocn7qa;yp~=)tlSer=e=&FOBqhgIuUE14#2jbOZW2*e~E~w=amoIOkvixi`o-*
z$oWIw!p9TOcYuaj5gUcnO>_6d{*#aG52Cyz0M7dBT(z{Og>MH@fMQ;6jxQO(X(n7t
zNTACMiTd;!wgBUVniDHM(`b9z&NA)zVA$|<z1u!wAlTxQ@Ene=((H)Ly?Q!$3{LHF
z2V*;l%0JFG#pMdLHen#Jg+zBND~~q1N(q?+NM2`NBP-(3;hSWy(U<RW9EH6^DSf4o
z`yrVFNkP}T-6+RxRhvHf;S83iIwMrQ?fi7#X)$}&<>C6~T3?96n2OvO=}IN~G-4^_
zQ0Q&N8@qsOY+TE_1_JC0=K^u=w*2vtclU+z&ZfCegSp2()|EAb1SDjcN#Yg4B4QmR
z5NI@=0p_DS#p8T?P~+0>!Ogchcp2W{7U}r1GfT9Xz12siNMEYF`GlLPZz=4|=9lb1
zpq76)f@ESeT>14l<V0oH?Dgk2aafs<D+bHaTlI5>u+q30m^RL#sYWM^Si#V4@s(<;
z{i&0m@)i-Qve;(nK6ytoI#?ZYvGgc;jXX3#p!yn(s`1!4xO~O7>`pPp^!}2otpJJ{
zbHrfu9>k}-8%GtMcBV%1;?zo@wp!A_xo4(b8nC<A!;dz6@%@?{$%X`$|5!%h9OqKf
zG^07)$$+%Rc(NmZdXOg#v%58!{ps`)JCPaknl)_xVKSiCn`?V-9vuUS@|{8Fhk!A%
zfAm|DbBe*y@o#Jo_(p~r*pF59B<EZnjO>yFsCqj9ldoAsY%XM(!~S}N7JV<17~YV8
zay<gE1}DtT)DOf%8X9ccszUY{w4bQ>pn%Z5;c9UqJdmt2bXV>=@~axd5I0vHRojd6
z#PP{Xx$VAo_r_>Z!`u2%h$xwPe1C;q!ZxfGf|0Q}do=9w@E>Q|C(mK$=0xw|2<Ny`
z^;>bFC&nkWECuZ+DmRk*R7qf`$2u@6`8m2|@TAx)b7?C#LYNCd1+y~e7mm#icGLJ(
zVGgBAM|Aq>tqJt%DtS1?VI4^z6ntyPTHk+w-fK)bt$yefK1mj1hGLHwxpLvN`lkrP
zK?tg){p8ybFzN(5@bna}tS}Q^6y7%T1oC^YJPG^cFT(&T!BN+rI+uixQIF9v935Uf
zKHUoo$?+65Doxu{RrzclkV{e)J(jDn61Ukn>VI{V?5(l2CG#V|83cF&z&fEM@!tFW
zfeaCz!QdY|m2P!pvET9c#Kh1(w~;XDy&Hw&<5rE;bG}hgzA=tq-gxi>i*?XuPVa#~
zo7HcLP<R@Lr^<tL#VUW;21?jeYCshRX;g>gk@;M3LFyE2J#V7hx~#UjZl=UZuNbeV
zb*4Md(IDvC4MLyYPo?3BXT5v9wuM!gZC5Z`sMYBX>(2XBiA~SJlgXy#Q-*N?#!r#8
z93PoGJ<UHE2ti3j$*>R8u6-~7+Y?2WJcj!L*U`J&U^tkF9ojFH%{=P{OE-rVvKaBh
z5oZm<d0yK&u!iYIlNT+yr;WK{rP(extjc1szTMzz`=9-8XTD3eaW$b~$vS*+v|x$|
z6aW8vFb0!un#c#<*gOW>sV`Yz>s{ZuF2#wOtsxHftsCxvqt#u8N=Z9lNw}MvYIY;r
zp|~eF%Nf=6!7{eYq?a!kVYRz=-u>`S$NNCy=m=~<Z$Q*&qo68@DY#kP;F1NGFM!7b
z)U09EMj2<9SXHj)-T#5|_m<rwxFtru`i4vlIM2o7{v0{cTwf==C~zb3pB){-<a_f@
zU-#+jyyEEF3noW_6={;r)O9CS`DBg61qBz1uHh55iQySq>DvLKV@AER;KmAK2xr(Y
zc{y#gl%Bm}a5NPD=Z`$rsG*4l5p{?CQR~q(6yRF57aP>W72a1b45k9Q{|upL4@1fP
zK&tKonfF@Hh1s8)SnRuk3ybIf>n|S{lwBtRoe4EwbkTYh?cKM_GIY?7#W=pYEEd3g
zhNA>sIN-8v-9O~eUU)=Gq0#$s0Xy@&HlR`VM=p;L*S0I;E0W69fKG2qKM&KN6IP>z
zNx#a&QTRwVGf8%tWSb~$rwkcAc@LONLXiu57^ODD+dd%A(^8PX4V;&F_Pb&wHZ8+Z
zMxh&H{V@zs+1s@1X4jsuZo)Y)u-s3^j%7U}CU7hvjdg?}&Zg$)XIlmVJ|DB2=x29l
zi*;=M<|X>sY{COlKrD{!{KPZDO)90ir=;%~WUt&MD#XrcOgCg+de^#Tcdh-F7Wcp(
zQ^QiYY)a#mkah6C#LLU~WI;!(z7|O2HSsm?Vh4yRnLA68mE@~a*TH)8nA%Ej9d&{U
z><&yVG^HIDI=?Vtw`xzWa!G*}fZujKn@<!QvouYiN@cHc7elPMDUS20r|h`4>*l%6
zGrVpZDh^V?vmEQHre~gD-IKh*^Hw{OVv}MCcrY{9KXO5r*G7mY8WV%kb#T$3W$gl;
zk$e8!gG!C@qBpUd7@bPP#xoL}6cUBHH+^M3dQ&oBYJP2=bf2EZDD*nQ;4ZI?sjV;j
z+&{b+onjWnXMa*Rg4ej5Zz7JIYw368V{aboqaTCT8>z%7fi9rKoszkL>}a%7Kp>+$
zBg6J%_l8e0Ga+IufTYnBWaL`wiyOF@RiqV4R+YH3X)P1e$+u`<6KW%6J0xmGe79jO
z8V^T7qM_#nV-Bvpt;Cqs0scxlL6JX>-#d6RmIy!fLqXl#`)yJ@=ZycDIQC6opN4wl
zYYc16>s<LP@c8%)b+IHGnVWiWxaou6PK<GK7yID`ej-@D-5lWV_%_DiI<k?7c))(o
zJ;b2Hl_C@95}{x#d^>>6_ngeBl8<rj3HZaf@q?Jcr3iA7PynOWi$ZnRuT?~m%!>(N
zf^NVqaEbp?3h+(=(}!eDo+A)=maDeG0XfXne$FCIXZAwdN8WRXurzOw(#yJdzC2k!
zlkl?fq@wjs^q;@7zrcUIafLR+p<CQys4c?(TZ7r60H&{4PNa%b!QS9EbfOipuUd)+
zc)!pwqX6vsqXn&JEu)5|Y6jXIRb`f&M09$Kd~i3xq+34BeFY_6@eJ;2th88jS(Jsl
zg!=O?8D!x4$R8AxwKx6h18?d>k#b#^BAhaNT19MG(@j?`HZ5Ao+KH3YtqECWei|e>
zUADNYw;|Nx<(-E39qf5vM^A6Q`yF8v>+o@Oa;zvkwNmw5{-`>};VWwkCf9ByGS>%z
zVk+f#tU}9ajkw6hz`Hx9-n`X~36<f&&l8Wa*8qliY_bRB?~+5@DHu)>s)O~4YN30H
z`qCs2BxEu24AYiVe4dgM_TwxrZiu{BxkXR8v-D0z<<BAV;dI8`6n$Z3w7Bj$Aj9>w
zHr;LPRWnro7mQ{_?@9zI`BST|RIrj{M=R;OD|DpJh%KMt57>gYQyg~dZFTV9JMW)q
z=pR*)_4o@F+;~%Kmtx~dA~jnUbzZ7Tih5$DmIghoW|o-iniIF^MvC}m6%64eOv$Xi
zK(PRWFVwdaJK)}snQw)lI3c)R#OSaru0P$yu-L*)(Xk<cr`UIU<P>4sjy6r0ljFaq
zf}A@JfB{Fuawh<&lmdXWgW8kBav0omn3)M@krga<+|}pOB+caboBTjt`n&I~Pl(G}
zF?PtFJBB%l;Se}FNu$m?;G(O5Fp0s{`*xXT=!{CM1VP{HsZzs5X$*H@nw1`KE<=gL
zpwX9L(n}HXf?d0SsRnppZHtwpsC6>>DXoN%K?a@!f6t4R&sas`dPF9U8fZ+4HaOP}
z2Anfdk(gS-afQuVzzM~hRmE@voq4zaY<;<|YS)V*x0FpLurnTnCskyY2~k&}5eSk)
zV5x8OG!x_Yzm5VD8O^`Wxia4h%<G@CX48!>KB01p`Ds?nbT)3dd$0*O71OjryPf59
z_uGe!TVA}gBLjyy*o#=2Hjy^cU&GYeKE3YJ3IDY77}WyJ$3Z>Z&jad4B(#<&A;lKi
z9!hOCo>PV;ZBsMW+=Ey14NiA8&&NZTIk)LbcE2t>W`=C2q+2k>b*H=BIqktVjgKWe
za7YTDuA*-XpKXpPT#H4r+PoSA48A@c*KMG?s{7ZgrFFDfZVr&y-982Y{@D1V?g$vV
zg`>yg&Jym-PdYRen-8!D@mT=2*#K<JY>Yl9c<6|x0<E#=$X?o~s6(~+#~j3^#9#ph
z%C4&+QQ3m@>7{|Yg!Rk<wIF(|-k7VaT6|Ob)(w4DOMgUZ`ecnv$xD>tJ`!m+BTJ+7
zay<#tud3*blF4@Ew_IN15y|tV$=(1N@a{7q=Z?R(ov;hMeb6?Y9NMpt^WQ8q3Jw}l
z9%ij_@BRV%p67erVS@;)@V(PGu-f#F{90K+oH`#|Zl3rf+QG$-<_(2IMohAZ%Cmop
z_vgTmkIUU`SRah(7uzd@n;h-`8(E$9l{0y_NL;K9!vYD;!(*-W{2w=&B3b9<hOyGj
z*v3H^&ep9G+LGj%I>Y*hz0Zsk6VR@1UJ_Q)jC+fP=RQ37+d_Fsa)%#Js|P~KJfGq2
z#QTE(xuxO8A)TJBe@4$_ICw2<KJTCJpYQ1~cNT6P+rBN}ZvU3y3w_k3ZGjUrypHl&
zd@SI`;#x~mTx_8Sea}g+^>`p0issraU0|-yf%)=sAT0lnzB1cSq-XuQWOqJ2C9Uxf
zAh0vlwVh(NWD5C*>2kwvyKldch0{nYt)!mBsGfeWm}U9Qd<-U!i{jl+X^BYYczN!G
zwI&N5@J{~RfD4<)HB_*NLA0fg=kpUD@8E2>@EB_8#~YNv^v{L%ibpCdG$!aymJ(!2
zz4F5tp<(vn7&VChcP;4}2UqEg>`z0n@u2#|RR2shG1dqGRRkbnP5hjIIBMEn9gJxl
zcu=^xoIND>E~u73o!Bs*2pshQ81^)j(*W)eM!k380?zR+jvN*9dwYZ9>P=$#C4x+3
zkF4>G(Bm4%fRU(>M&I1wN%owVRp9Z#8F%xWdSUduj5(HuEbVz<DHZ*28yJ!Ww!591
zh0?smm22cVNn5wtZgC3zx{6z;ca4an4a`6a36DEMe1=HgYl~eU)_$5OAM><#y%QsH
zeAO6%*m2AyKe_1m9wf~)A7rM(3r}S}I!hbtKU$*5t|A(`jYpv1(e2J(*N>|<H9j7<
zN*gFV=5BvIK8Y#W)DJ;}&g5a{76ALCy;8kbOV9h(jSL}3v3!0f?DgVkH(uTqbr()*
zlmBO~oArGe^nJ+d{`37AP496;4Obpj=y`dvpb-9oqko{cUI*>}TAz+fXMLduzOETJ
zn*#6g#!$$tH?bn|65!g95J#6=4C`+=2M<Hf!}v=c{93_pi@E;8nP!x5<@dJ2t({>U
zPI`QuguTe8u^9!CxtF}s30<N2R(18Vz{gwU{i6Zuz6{F)*{fZW*(tTzuxlfe**4)Q
zRQIPLNujuo=e3IhhRQbf4Er85Lk+mR;9!OD{Pzs<@9WHRcRr3g&&qPUTwU6z5v%w1
z-nHmNNYxULud<I48r2!;@7)la%BEF^#s*!T&=!!`rVP~lDJ1>Lb-SjGvIbAa^CKmS
zpeGxE<@U51yX^a{PoHR|O8?$KKJEM^-$Bge$4T1CIu<7--<1OP<DYkU)()wPdw+Su
zWt!mSrGdt#p&(>e-a{RIZ9s2h{Agc(YyuzO(Rb&igCQ|%9O8veQE%BfT}|DqfnSLo
zB1I|&yX?~tMPb^cZ!)m<Pzpr<96jN<K+bGcTlq)bVL|xGFkZdm9Aq|xy7Do=2y45%
z?e>DRqC(hY=!#3L=VpMWkX?X30NGaRpSAjEGZ0%2Zk%@|ticBorQw)D|4>Ej$qJ&E
z&pcR`*-k~hz1uYNi;{DzN|9AvM=guE;|$FFbdxr*=C-kh>SJ&J>)nO>ApZ;&|7nct
zvPrV@QGRm4%>+NoKX7k-t-r_vyn=B5vs`v)ZvVKufnUN&i&w~^+QLK@<K@qQ`CQ-E
zMc;bqznO5$5^*QWIsD)1{Bku|NGJR_{C{&Q7$fOk1Ih{_Ur7&Vx{N0Q@cwQpnalqS
zJ)4{V2`BKK)y-O_3S4d@5@Of9Q`2_%%TLVWy@ftY1-%Tu)q_FwT$WAKZIYIN#my%|
zUZXV54kjI@+PLD#cykCW!`%v|Dv|S?0d}q1;SIzpw$?^|%_K+t%ZJ=M&aossE9<?g
z3D%ndRuYw9p|T9(=n<@*AlfsO>TZqpB23>A@A^IryAX#r5ePKor4x~+L|MfSiZ29>
z*2N{5wr@iHIIe5EG4S6Z(!_DdZ?^7&!3C^!H`y-3+{PgcLL6fr$77-MwMlj|Yg@wv
zT(}q>fhqy~XTf^&&@dKvkNUSp3-8SNsoj@-H8YTBER^Pfy}5ZigeGMX8ITJMj0o=;
zZ9I4P=ZqEi&DhO1n!DF$M9#1r9~Xm+)ytDR{^W>p$RuQ({o#wTww?HwZ~T%&k^$u6
zD1)=|!h&mXY^5}0vrb3VO<>9NFP<Sf1+W7-V_Hvvl9g=3NslZ4H<x+T5NqZyn#)YL
ziAwv23pjpO9$w~&X5kr--$pj@podGE2<)GC=B2nWGj080fSZ;UHPal2gCxe5&s-pJ
z`)EFj|J)qX&rhFizIkQgHy;BO1v*WZaaipySXg-}t(t1tCN`w;GF<Ivn($s$1=gB=
z!N+8@z?8WYaDs!IAGFHt1>Sidn8Mj#nWhZPSbb$0wE~maN6WQl&s;hqOXp=>(t7`b
zKXpe1=}&=-<ne`Y_wi9L6}0LGM;ES0Q&yr>S)|>J{Z~m$Y}WX5e5nU_?}*E^k{^>$
ztcVV}T-+7X|FvPl?!mT9!+*k%*nxnal2I}R?L8&v6$fq}r6H;1B<r+xbCAZibPAIb
zz8R~#iQ><9S<qPQTGpSqW4n$`*1)XRLGhYm@@kl%MnzC$^L|dWd&5jUdvIs6L!y-F
zX<EU9R3}Wfv=01yeMrvuLD0F8YGIB5Pq1;VNx4b(b;r1CJOgvFv%Y&rn>*Q#hZr&^
z7KLHx^vv(#GT3z+3e<rO-9POKaEkhRoTUDuf4pqGmV2&6`42zgI_fd*Bp$4Gw2@CD
zH?Nwx+j%I?@@oTrCE5t3cg)-kDlp7=36+y0Rj_scVnoo-TA8C(V!|g?=u|mZ6K?eW
zinF81)k)R^Hro{%5(Ek$Ug*WYkq0@bQhFX{-#$+Iu4g3u1871;eoXw0Aau|8WG1C9
zdXU%(4^rKjALf^t|5u3OE_i^5O1?QDDWCVd>ht<4UZ1SyHJr|RRZe#*#++@g|I<>x
z<{(v;Yx(n<gXDcWVZBQnw`2`wxFe-Kj?|9!?*Xc)rje9jcfWpQ=KX}lwtY^h{>i^I
z$Un18ywIgTHsenf0uA}!m{o$HM$iCi6^F|R0=S~8?z9uEojCnB`vjab_5cDLbl)m~
zu^(ciDKmsTZzY*tr*C0C=KxzpQ$<j`^*f`o=I)?rQwd>LG86dbZlsKodKgnD!eX>p
z;eF2HXR-k&s`T^;zMV1WR|q%h`1h9Uu+iDKMEku>mpz;PFZEQFg|nnI$u<L@H-=Ms
zva+3b1J{|TT5Dy2w!#Co(<v`<{AD^rW?%X|Ofpk8)`d!pee*0jh0_aO2atn@OVW>u
z8mD`+NEN<Yn`@{}`6gi-!EcFm8!9MvhfLAhGj${yO2vWQa8$379CNR^kt;c<8c#|0
z1|VZ6ERYdYQx;wZD>+v^hc9!_-Q3)lWCzg1rhBKi3kdMRT0~RRADc_AbizTV`b*SN
z(!L4eoCke1uVr5#$~}M@B`fI0wCdE@brB@ycUo-%o`(av6H-2JH;Q(^@8ZwDhQ1_g
z$3Y(kg$veP!Q&)VYhgeAk%7Wb)6p%d4Z9CQ?<-Y|UH-nd4>7<|ReG40c?lP%Q^C5B
z5;e$~dsi0s{g{?fE{9zZ<gA`cHoB~%c#r#MbcMmuJePEIdAo+^pBj?%!oMfhX_Q)k
zgM*4@gY9v_e``IObaPyAOal)sUT~7Nk+rg_XQG#3!u)au_wWBgB5)#eoZbCi+dp=5
z?=9SHw-&yv`3O(cbyNLO7_j7LxB&I;BJeZNlm%V-h1Y(8zo(2cGEe=#EW6zoUw9sa
zujfi%#K?Y0i2W-bZtgUu1AzK99fi=ySaTxscV?VT&DOwD*`30-yIkXuHbuiD>v}#e
zGc@(~u6L8SccY}AV@dX`-x{`}AFwQ;?5Z|?2`<B}(pbLx1Rd>!hTC|h?%(>t5#25K
z9j&wL^6O*`10xSBJD$+)23<5ZO)L)#4Y4sfXQMbKapLLSB81>&9jrat4eX&n@?EoE
zq=&d|SaK(4?3BdWteotY7`$g=AG7iiHqQzpM9sSz?pi8V9WIgR$Yf!ul}BU7Cc4|H
z(lUQ;J`argG5DT;^%y~COx$zID6X~Wk<RWl*lGDKU0COAE8OPygocBxa#kPK6{sHr
zx{Ro&A|#B7mQj8i4QKq%PBLw}zuf!68h02Yvp#w?GB5f$QOr6X{8t0C4Tf^4!>9KY
zcsQ$!5UP~YR<D<RFKb-)gf=id4Z!Q~40!#nf%NY@r7CCBM-Ozgdt5|f)Hw&COn3q&
zU1-{7D&NQjs63#`bazwJnXD26H)uXhBd~aM`DYB3x<d9T2fm%Zy)p8R`Dn^`?*(G(
z9>B4sqHOzZYTFh`LLH3(KeOw3lQ*5WoXo&83R$WeCIrHPJ<v^f3TX!OGw3=R?7w6t
z{JadTw3i|*%uo>*%<zFzq0hA>+;_<3-LeOHgQ@(HW7_ikZL)~f;n2?K8F=FUK#PK{
zVI6+fC~S5*czPYxI^gkD5uHXL)$fn6k(=~fsQm{ExAAFdXKA#sf|o0Ji60#Fdg9XZ
z(s&)%`n@S+@O=KS%=y6J)JXqMgILFj(BA_cUMpR*hw#|**!j4DUMT9sOOI+<ZWP}y
zMrmyhR$I(VRe99~uXf91D7XF2c%%A8ZE=7w24mckkg<;_i4^woI%LxK3jJtkv2EcF
z!*-8gcvkvXVWzj`6L>Ef-ZS!Hw5c8ET1eGPkb9lY8Vm1Oq>WZ1JOBO^*>1BHD@i3Q
zNq!Y}+t>+Z%5a%TJ@S91f~UipJv_V7brTkJt;U@jm?Gg+i^P2lnV*dt4qkl68&SXX
z*i%sjFJhcwg7*cpa>C;ctr+>7r{mvY#@tgBjT`%PM;A?yiMnoDvOFTnr=Gg$0kW0Z
zz2F%Z8zlHdg%g|B1wM-nANZl-<or0ut^yF*yP2d>mU5?=TRlRerA@KCdRGWV;fi@R
z?!R2o$4_X?TBZ-ts%cK)dOo#2v`q*{!3s*w0-1eWzQWP#o&3ry^JGT$p_E(Z{JqDQ
zp3Et-&QsPV<@o<pNn=qhrz|SDd))n#IlE_peTW}L*0S@-Q|Tu)k%JX?5ALi8=#!PT
z=_ymQES-aO7G=5CdCD7BENLd#iTCqR8~7lehLCD>DrrC?Sjb^NFld&R`J2ZEk{xTu
zS$FmOIfI69$aq=iNo6b7>!2+|{X#PG77s4FCy)JvBNu~Um&Yeo8_j!e(KL?o2=pp*
zeKd2w;Q%Ion=OjYHv00GVbycx%b_rS-TOBd<W630p`8IBilZo_d#uJKvBpU`Js*R6
zZgh9Y@Yv|-8+dF;;B}j*^OD<s>od9groZiJwP+x~Poz(<k3obj=e!dJ^fkiP;`Yc1
z$Mc0xs&FS6efbw6<!9Thd5z^BxSB!L6&Z;)pp_6>7)D%IgOlH*di}L^Eyhnu_ja}B
zrPaGRzw0S(u6LP7>*2vu0xpv;z17Kr6(0M(dNsK}qC=W}mrUmg?ekL2RwmxoI0gXb
zLBaNiso^PE{~`k*(#^38>iEH&k*lE~Gn1lg^V0Xkqs02$y2AW|cU4S-%ej><iyD)M
zK{J?l@0spO<pH_JgRuMAx_Oi2yz*1~GrdzAA?#F|s+Hr?e7Lz_j5ZGWG||>APY?7k
zT30Dc2)3@ZdFuVyDL+W@JbN2$_bBNb=K=@t<h|a3>CW7`5NSPl1X&Ld11F{V&#v>F
z;bUYTvx~*Bef;N5cwToUg@0<<w94}BdIT2$CWUBT;W4z&?azWRYdai1?ls>FL3k9{
z!>i@|V(~Hkuj`7%!7=Sv(6w^<7d8$H{2;he79^z<RG#Kcj%QVuOjaY<Y!ooAWWhDe
zPnz>%->aVDJsq+$LtOU7%m@D?x$OKKt~wArowKoRa_y7D-H}Rrb8pJy;fil=P+?J_
zR6#WBOE~lxGjaU0qI7YV)$woIS+J37MGQ7poWswT7*BDDK)>yrCjMY%L1Xs`%(-XT
z(w=-mo4UIZiyM-|{(x_>$(r9T?@He$bsvd!t}}&{-qMuHHY$7Rf49Rr?$P$xOH4cs
zlgvF5WlYWS&oA;H20CHe`RsHrPMfY8e6A*aqVuvRK%VH8Als`f+4+1qNRJXkR2o^9
zaww}~O=HA>=1+iwH9@b>*Z~Ji-=31KUj}*hzo8T$e}E4bI0UQN+lK9YEKh7I_HW?6
z=Ws9tI*bE`4)@%DFIZJ%%U};k4R*Q8K>vA+S4s6F_WlZ)vK!j(OSYxV^BUvL%4!3=
zJ$B~4+$ESN9xb-81&P4MUhias1z<58;LJC|Plu~@aW1RDRGvI%&Ci(@`EdA;+S0E9
zct<FMkSQh0b;aetd3l3bmI2%le?aaYbF*Z&@j5dZCCEIU{`^(7>B2{Nb9s`DL4LrI
z3u{r^=&bHIVt#h@(a9$Idf<rb8QR_3FvHGKH+1YAsu9wAB*LHE1HFgXoQyu+Ed<BQ
z@qQam;~f>A8dwLm9}~=`(t(+g0LwE<W7G%ZYUdxOpeyzj&Rb`a@}^~ExTq|z2tu9!
ze9*@ST<`53^DkQ68efK3F5ixtV`d)7auJrR7%IOS9i{^#vh!K@(bjQ4@pxw3pkg_Z
zkUPCMGaFj5b4PJ3iOBR&*C|l{5>ysBLBg9lH&P4D7}#6~GB^jWHk&D5jb9hhJ&C;g
z)^s^e@XKnzILT7oH-3DEgy@9{Gc}owwSSfEhT~Aa*LAC4i3ru=vFGpIw1gIA9Skb_
z0TheqB<HpS<Y#EFt$eCo3u9?MIs5E`Ly!k=DgQ<;v$kh>uryE+ZeVh(d~i2IqfJ26
zy4#d%)aoBSy|eTYBlnu;CCe|YjmQ`m3oZNoRonL!(y{k1Sme4Bme|)M7?$Yv7j6uF
zB!f`a6S#?bvqu8(CXKcds_MfReQOCy;jNizj;@zAZ+M9F1uk0*?5Bp)a!j5!oXqg!
zEtd+u)#YF%A>E`T?K`Z@RC$i*{t=KEb`@rDI#8cqO|eYX1X0y$w$7DR(*ue>9XSjZ
zuHCPWM?f(#qEfAz?(nS|Gu*1N^K|yWRt;;e&vgSOfSDK)CtGT=+TsB}f_KDZOFyuh
z7so$3B<q#8Q7=a7y1n}fkPHxHr8PSvu_{ne%ZfEEW23RLEoA~>FZ)yq&XQVHBvv#Z
zm-K1LO$gkS-A-Nr<w@%R^w2a7D>BrO*o&y7>b}b$s(sGqL<@J|N+stT1}AZSWitBi
z!&kJ<>8i9%eTF%*%9`^cUI;hx!e#fBqg9~QdvARLI(D6>`x=biO*)RforK?}I1yza
zs@!VtFVnax{AF0WwkD?7%4gi3y+jSA?Jdx%2;be^C%>%vBuwnSPm3&6CP@-P4~K~Y
zZApptU8|$BfCi9o_OXnXbK*4QJ`xwB#QeLgl>SG0Qgsv#)@8f%?&;0MS!;+sB--H>
z#VgUgvWtW*^DxuGPGmRJu>TWEI|P;3?r}?t=M`kLReAwh+-5{O5O?SQjV6w4LwX3=
z&*ak9uhc$ScYXkR>{SrSu{_)!waFN_>-NL1Th*#1H9wWD=n9_hb1-Jq$1q;$_0G!J
zaeKhWaq(()X?0e>Oxc{)*=qktsf_%yuv4e=qM%#hV?)4JPyxY33%KG4yumxrnfWNn
zj~X!DS}e=uB)@g`{vhwn8a)4a0OKvMJ@@p8&L*)SUJ&?r@Ut1FN$Njmy7EhD7WsZX
z`Mmyyip-|YffOW^vd;v{%4|82yQ3{D^$wq3_Y&mIFqs$-0MEK#I$_JDTUT4~Z5`0%
z@nQ=fGO*SKV<T=e8lxpi{28A1cex%K6dVfb*ov#VyS}4O2om|}rA=@*+Khgna71q>
zpR*riCACaFspaL4oXF2WNR~cECJ>KKS*!`mG@b90*6DD<yk@)j2DwXN?Q}$V_pb5j
zsU+AWG<K2%XRuMvt?PTeFNfIKqKY3XgWy~!1i{E`Nagt_8TJKHP_X)8MsoMwyXGjS
zEHL8u3^sYo|K<AnV$Ru}SCe^$mD^q-1bSr}_#q#RQFU3T3>igkEq}_fa~Q`q`Fi@M
zL1RnCzA3S>(Lr6CYVTCvMbVorxttax%?odG;o3#<AO06;`9_t62S3990Y2BM8JeOQ
zIThEmraJ9`{J}1lA3i_Jr*wgBkbE{k<2yqMih8Qi-jg3P)};_<Ci1~v_iWgT)vs2`
zS%mxe_Bp!rtdv|RqDZ<NqX`$CQW!}>qC%*DBu?m7MD=j(EnL|Y)G<Ba?YPWmDTJKa
z-$!sR+V|qO!H@BClYa#a&3?Ep*^=#W$C$1l-W-tMf}Hn{%<|*q_=dB%+P4W<+{jYH
zR=k5e!j-jZ?loDXJ(f%cb!Om={<x0Yd6q(K*t2heTE`0kOxVi|$$ZiI74{mJ{NoJ{
zfnmt!kd%DGvg+)4Qa<x>G0gIkX1<rwpqvtX@zSD_wBqcNxw1M_k<xwMKH@JSDc3=;
z`x_mz4Y!-Nd-j*l*Gv4(_n_MQ6|3_03I2QUMal&^-`4FBj{S4C;$&TYG7EY&ql--4
zcLP`%jcdT*;G4y#)s-Uv@*573g_$)koy{LZso!y7wL3UxYDwm`+GYMi9(wU%kzlH*
zG{&zBOMlpL&yv4(yrPY&2$ZAD7EyO~h)C;581XbTcJdZe^9wixf0p!ELN6R?nH3t%
z3&OQQmv#9#U>r)jLgsn!&1lZL6_!eMw$JkCu9)Dk0Q<U~Qk3Y$=aQ)JwWih}(mb6k
zF8|ZYEZCbJz4Mq`CGD?ac#)yMtp(zKom-O7@sJ>+_3PRN@IcT(Jxipnv<;X`L$TE7
z)cJbnlybb)8_Yi=FG%6ywroOFiTOE!)9R;`GvU?ISBioU*NnE4A0<);T>c{E?bg#!
z%zyL9Vn&x?a-1w2CC#tzM}oPbvyQB>o<AOFrPsd;$TEG2L`d!?pE}B9OhlsdpGQ#3
zLNpv!>wXR^W0l+x%0VnE(Kgw}AN#b3kR<S3P~p$In(<#?`!mlj(_BT)lSe|=e~bU6
ziO&C#($-XNRuII|o&C=dlnx&=Hj2R<{W}EfyNq)=@BU%w<FGl&rSNF<0@7W-f^Ki!
z$i1i?0(Y9a0e-K+jL4!=!$ztiibY(s?r$lx(_if|7qgq$ZPvb{A2fJGT<#=W4h1|7
zqY_{AEuOXHjlZK;N4bXFMlv7%v`w$YOBi|2r;_X8$~RPYxb=3hijMqK^DAE@DCWbL
zkHUwk@2?a!b@+=WO6FAX<23TcVwD)DG8m`k0XGVg=LUi(u|*-2eRXu7Zl_wuKHWUw
zMBd#Q*-$B!9OF$_xGou>-WWy)R1oegEbe?sSp>yQYC|hPiBBziZR!Z<&X6&DbOLmI
zbnhi3Yx`xXzMRz??3y=8GvON;mx~@RrJK1i4!758uSc4N-T1k@A^BKY?_+{@{Js+-
z{e(=L2dclHYH%-?+~DOHOm-xAs}1=nE5ynxfwyZ~hnw{+$r`XKpGU9zLB>DLs)F~-
zc@spexbrvU2=qGjO;ex3o)qH+*{?S*bv|yEzEdPTHdT!}@#;g2A$a=D_10|1Yn+)Q
zkz(MTmv-;O`|#D`6v;&DiM!};b{R{0$`Jwzq)b0LCU6FVanbMoR*K<)J{<o6#Zal_
zbtuVpm~@4b&P|+$gD4s~cb_v2wUkt3oZme@lAcc)`#1u53E|(`nF!r`F4jvuTErY&
zJHa;^TLD+!V0qnvsQl!%YFri<?sK#3UraSAu#e~YDYfM%6G#uG=?UTXLnCGPWys_q
zWYDXQ6Eee3wR$~^nJgvx^$P*IH$BsLI{7~<<>Y_5${GKn(7(?Mq{jU3Ev1X?_u`sE
z7jC5aogdpE$#BnvlcLInOUf~Hd-A_4_9Fo@yPiB~bJ(2pyq@Z3RYi<`EHN7Jc>_v?
z2ulIAstE35962KfI22@PJ%k2~re?~0j#|6+O^RQr$_ryEJrfp(8^wPFejROuPCevp
zAAC99N-!4eLmf|<$V(HUid^I@iH)vz82j%t;8Y&y=hBf~dK=sa8B<WR&2y>hmEh%M
z(-RNgk7#?)HGOQxq%ZwkN|?yN+x&WoD=sS+!!yY^9$jnslsNf$<k}^B_i0Mw`)=Ze
zp}(R3xru)&PV+(CtelejZEM?9I%|uy=!!<saYfnNtcsp+SCz&D0`xIPbZytyc#B~J
z>6R1xrY^6h<n^<`bzUKgHWU%h0v0w!H+V-qZ#q}fD<P`gVyP~vUh$2#x(ne1V~Wr?
zx=Z!<^tZ(C%ulm<LdKb737E_L8qNLU{}cBUronGQh%le?<M(KJql9$oQH69ODa#z;
zlUPZtMtX*TP%4KVNnOPJda+;VB<;w#*Zboeg2PZFAP@tAcbA8nugabLQ3}OeQl+b{
zR<uJdeI7ieWS&x_s?m`w5BQ@v{~i?eGS#Uol>42>P-$e4_K>87ODbqDU4cgepPf|1
zr&)U@^Mv^4cd)(Ll;DozFt+$uGLoxbBiZ)Nl&m#Kz-IK8OLRz0wx~*Jm)-R9<96>7
z&b@!b-&`aUlglEN;bh9M`F(Cn!V@%&HJv5F9XUiMb%ique|E1au2m%OZBk8={L5$Q
zOUvRWcn?6{8d3d7{YbUdZ%rk3DRo65{2Y_!H_`~9B=y?G+yEwoeGkvmSs>~iQsWyb
z``}#>ZaJB>-&^+Qn1bm95KQkJH(GVkM>3#;x#ptXCKo0ePWwg2GrjpFMbsgD!+h!}
zWI9G%sba`|7Z>)nCGaijRi2S@7poHX-%Kn7o42mWLiEy`iFCmy>fU2wNS7!Qs8x3h
z4RiCoVv~XoiBrwm1KZ|NsluQHgx-n3RBE=w1T(IE+fTzG7HUvJS%!2(MR9Bn8gztb
z`+$U_&mD#{Vl>nU4!eqRdyC{*#lw|LkWDpFff)I|uONKQ-|;Mb5?Z2H4R-*fAJ3Q*
zD%pRFbroix*nfO~zUlZ#v8*QpK4sy35n_A#@%*#yi!~y}oK)8*hssspUXbc4Q=Q23
ze|5Qtu{Sqt$#Jqet#S@T%A70M$s^Db!aJgNyf4{StG*d0X;iW^*ucxcI(LGGWs<#8
z_5alM8ydcct^%|+GILg~&HWoU(m_vHnldNaUIn!62<Z?~n>W~pdrNsvOnMLw$EeQ>
zy`Ii_^}IUXdqx&(vdK{aSRF@0&-b0MayMXLMl_bxFV*U<K|_DIGhktBDpY1kVcSj>
znac0lp4y%s;q=@>4KoT+1mb63&Mu}zj`M3sS*}d}cj#|6pJM?H%5NV!yLJ@d<PSf<
z1d_MDKH*b=uDKzP)%uiD6#3`ttu~b?zek#19797z;A@vubZyn)z@(>H8Y-c~Fs7L3
zue-S^<J2?_%Hb=8=ADPr)eFFh{&&k=zV0Z<6yBI27h_O^LV-1=i!VsNulJ~~99T^G
zq%;4Arn6v+>U+OFAuTPSbV>_KcXxM(C@tOHC9QOKH$yi_H$#bZ&d}ZQ9KOH*bDeiE
zv(MRgeAZeT<jDbu=|VIg&*-8Q6B!=bNIfNDPP)oO$l4trHmZp^_>ZecXhL0~UA7`a
z3zkC*O`|_vxQoNi=d_HjVrk+nWf}14{_nY^*3BtC#e;S99Bwf#g3B%Tpkd^*yn*R*
zik5Q!yAO{(f#OxzqSlC(FQ39n5lt$v6hKJ+jZ~nAP}9ig>NP^uo%b8ToSaH+w|qSe
zm3=T@7qG|WTaK&W2VvTykG&B;u<QC6_~(PMD(O!}wnUMS64b)95U}&Trl48|OfF*P
zUm5I(fcnu~j9Z&yxFOH^M`2HbAN-V`;XqWOGxqrJNy5YCS|J&$Zg!}2y;J<#ePbYw
zI|vo!lT<b5U9O+j`HHWLgncmWkGKK9w_Dd>`$GDqYU^11yKuve5sJ0M{<l@!$lo3C
zT1_7S%Hd9A<a)AIG&Ub{4dfC@$Iz#i;hhh&*<FkrVMY{{a*Op{vO)wT-p>6YJL-Mb
zKhWQ?r|ut8&3Mo?8j`Zp&uRAz$?$&3eRw+Y%Lc4Bz@RBnGzT5kTH*hDx@jli6bM%>
zW+F*4ro^RaPNX)Ncz@`$E#<3_c~CMh(G?d9R^D&>wM(?gj_H2QPHX>n-*K~=o|4r6
z;BvQ<?54dBxY6V@;ultE8y9K&cPU((wgM>Y<IOD)yy_#Z_^o{2Ydaoo{5ww0TYsx&
zw6u8+55YNXXF?d>bKxz1?e6aaa(Ul)6G+L<Onze1AcEwXXT%5WIk2d;N9zmQ%_d+Q
zI~$RGpsi))CgyOD(^OSKb}_6WswNZ$`#X^=z0=XMpL<jnbUGQNA+y^-&GFV8b>8Z|
ztQrr@WZ1uADd!Wf%PAXcX%79>@gvIe`c+^yrAQchleSC0@=hwE{DolO$n8xDV}8DN
z6sU4o91659c&~wiR8mGym{|7onWnO?t8PqbZcI$e=DtKe1R}<mVIf3IS@)D%y`)-g
zuJkeu`WU*UO+6TD0$kcaEkK5rFeI?DXNad@j)}%@$u9aFT$EBUt2g+ugR@5<%J$SZ
zbGg8}NZ&({D{?wo@e!wLwFbP5Wrjx#1SGFw#k-I*$<qcBZ<GTS!fi8?Fx@^7%c*1u
zkOqoGKWYNzSXb?o4|Urx5RU?*`H2}g{bxTs`)9u)yGYI=rs@7ToR5k)4P*C&6#j)F
zhhOX)b|YjNyF@>~jBb5&iZZ7~Bp;1*7-pp`DRD{*v9m>Y{I9Z3=={(PA(o|_U3)pv
zJ1T=0>NT~*!-j6(fo(PBrHbDbc`;Y#m0U+s0GB}-o8`Am|0%ySnmM!Xsl}CJdhvNN
z`d*gDh@<74wn=Dd$j#L5cQ=LedYhr|Y?#EN(CCdcs!L*tf~YSjm2roGZ#xuYTU2c2
zF_wP`)96`+ew3C0-Os--6g{Y-;I#^{H3<k`W@ADpY8BTN#rDQ0sdLx$hOkN<r^bJr
z;**s6vpxLVyuh;(L!gTcA)Bx-c6~d9z%YgW;xu(@O@(DsD#!8bXI!RkPbp%|BAtz3
zi=4g#eqrU~STKsm(&^ut;2VC^Xh&(?GxZ`W=Eg-6SN;;wA&aWfwGrp{HE|Q{XsqK5
zu~ViL(Y2?#n1TE-&!;PTG4SMj0YW|k1ilRRSXRO*YyoCU#~WY2lwdZaMoTfh=|-P7
zFv@_*;pHNCO}&3;K$eSIPCuqqEIihbvQPSCa5yH7;ZP@vx+bmS7I5nc+bs7^N2dMB
zxDRf}g#@jRxkaIqn9P-#OIT8c0lF`CKCEd5652*8zQ3lTQ>}cWK)K4O`01j)MNME%
zCmGN?L9#@)q7n8oRP03kV+a}XD7L&^I+Op$_lHP)Hp(W|o}0e$cShS{C)a&VtuNd6
zcD;S1iQY}wmO7iME{+Prc6NSeBWV%hc>2S-_9Nkg5(EYX2f!^2+|$1i%*?j4VBrzR
zYUmd;;*f=#mL25?x$W2AUfj_!uKGS4vC-U5y^^ByI_4yHCATX3dhX-IXduoroHtaK
z2~>s)q*v<(vZN={tHZ9m+}PAJ#^F*DMc|v$cT5_UG@cowv%6_LBg2E=0}$}FyVJWc
zK79F(iq3ur8%JP_fQ1I(I#}up_GE?0{UN?rD$u(7H=NYA+npPMU+DQXN~NomC)0VU
z6x#68v5SOyT6vhGN)qe*{;`*@5CLf-+x20>wp8nz-B|>`cEs$t9+cA_1(P~Q2Vs|B
z0^UE#oR?3b8wxQcu&*L@Rf-Kn)os8fTuLWFUy7xg$@OmiH1&^H<Gd8pU5ac6eo??G
zC-ho5=!2hd%0Kc{TR(Pb8tE|8!s7ESUB{a2DxhGvITpLtGF>P+#Pr(<{<E(5Fx>6e
z&4YXgGP+LGXR_jp7^-V2Vx|NgCCIECf<)fXSPS*ySSSx`8T=zt8L_VK>ZebBr}!2F
z^DD<e-;t!p-6!ChOIcxNWuv+oJGNoD_wi(AZ5R?T&?(Yk`Df^v8ziwvo_egg+R~op
zF^T5N9h+Q7f-_x9m-+W!R^>F_g{$e|Re)0~ux2O*5=yCSBQIqcD`#WM^zqTgywPer
ztn))J=8gP(kc1%?7xR+-Y)ZmxRIvBOU0$>>G#7R1boQlR0*v=i3Vxy4-$X$3K3#sx
zCQCN-yJ`Rp4;anQmTO;#2*f(T*k8RP9NKTwiL9+^y-s7dT}HEnY>Hu-V!bsIb__?t
zVv`N5C%0$pVK^Ves}P6zdXVlR{m~i5Bow{3g;MO?1xC7F30dHoy-4Ed<hppb`Tu(X
zsCF_1?G82h(iOi@7bSni+m=fsLHo%tBTLFtS7k9gIER8HPM|tOg<ZCm3xp;ULeM?9
zxl7im+w6f*-EKvsJxXD!r31bml*%bso}SU*)|Z}q1;HZ`6K-m5+Ic;r+2S|m17Xc|
z-m*Cow%Rnyco{gvuc>EVRcf98o&RJ6$8AzyuFK!sP-_1BOH9~;+stD4KBBSxj5Y5O
z?X!7}?W|Yw917fS3O@IjGC<0&-aX)-YSyO@h(EFzA7|B+JXnHWS-pn%Xrbq`Sm>*H
zSpnJpY%zdpGm*H=sz3vfNTsPHqoiM%P+Tw6cwR>MX24;~HOa3Rg|?v6=YjAKFbPwv
zTKKyFm?_)p+^qTe5%M})@Sb+Vcc_?n-A`-_UW~ClD}4X@F{0Fh$hP@w>uE0<I1ud<
zM7ivBmv;3GFsIpqx9dUI6{A=3jWF?EFsh4w4Re|o2}PE8fAbbw+gXS%SuZzXDbCs<
z8NTBK?jf2a!3;r-4AxbW3i{@Ro#BoyfZ<)?+T$)4hLh(XY$<BbOyE*VnZsXn7$j+X
z+c0{PfylblO@q4cG4&u0Gb80VgzLN=vPCN<MT#{IQHD=5>GPuN;C-HBAIxwnGEE|l
z4`t!d&&IYaElfIh7{-=^dPCVoJ>(#THDh(7MLbtjLeG9%<7hgk-=}%c*_t_0@SJWY
zBD+nIgT;-xjXfaaqp>*a9^2Aj2F$$xBe<nadU1cJ%qI2je~%?|@!qxd-Y9Ml%t9I1
zKY9wE2jeBxyEEElDFWQWe@F}ysLbI@D!}X&)Ohhdpn)!W!$Qfv@O>(he0x+}Nr9+i
zlS8KW#^@5cjLGN&2uQXN&H3Z9{K6o%@YRo)dpn=VR3*lHiHw7aOEwVk&H?}YZ%^}o
z7uK<PKq_9#p3%#_7VmNM)=YBeB~2JAf-!C(5|Rnv>nUrV;SidCi`5!`IqCPzM&kE)
zW*IAReR>A?hnIi203=p2fD@&cuKPyFQFBF2FREpA$F(Diy_RYF+rem}J^EbQEcd+3
zIKb186JS<sI9898!kL1Ziac|-UipAaU`W=4f6oqfqe;H{B+gW(q|e2{Q7H5KV<8a0
zPK!>&IcAgUb=*SIT9?(BG3kwV+VmOtres=9=QUJZTAv&C!}d@iF_wH=WRWAr>hj}Y
zn2{4tfJK+iN+d0Au)9`pznqrB1-}DK3VpLD>DwhNR)8S~<I@JFpY#pjbJ=<|uScR%
zr&*iv4sA>njkUk5UO-SeC_YZ|Mm+$dyvseqW|tC9U4T|47Tp@%<g7V{muqUGJMoKg
zZI5e^@3xV`1+>RvM(oD?Y87UC!yu@~Uqk47rR9N5qXPWmjR=NZXbm?4X(5(T_owB-
zWv}c!(e~nLENDTo%V}N^STynzPHT56Eq6j4DLWEO3|Rc7ZKph=)P!^o^fvQnj9pl{
z=^OD<s=L#l2uBwAP8!s|aAV+vh-cIhPK#{p6A$v8_|PmUb&0d%V33wRcI>E6jl=p^
zqmsnfSzQlew4k0tb)x3K-%UTicN5K~s~o8Ej;@cF6Wy8mv_y6jdELT%@A_a#w)Z?G
z_+yeIVOw<~bK-hZE5XF?x$M8j1rmdwq7(1s`?y8s#qco(u4C+S^@{*5ZO|pa7V}B>
zD{!rB9{`ToJT{Gg&7kRPk44q56nU(w{n?ZlCQbMA*D#XZRV#N@OyPl#5t)fQQ2J9!
zRV*r+SrI^uQ@K+jGCLljm;l7APqj|>EthLm2Ulol8O#ZDfE^BHi>o+wLxggYJBT12
zQY@GB(5-r5sd%r}8M7&Wo|*NQEd|tfuyQ-%myDv!+cyFyGmjs4yj8naQ=MC5p6;*G
zKi!M!Go3nU=S(}*g3^l!V&T6<o57Aa@?OHgq`R;RbAsM5J&<Ajc2tG2^z0w(x)<Vk
zcfsv?#re-h?<<ghP@b{m+pSJ=@3B+T{xOVZTMAQXYVOx~J58qjX0_VIgF<Pnq|B<9
zovNq#Oq=EQ2o*R$NMK6rDre>Gk%zZKvX$EJw<#3DWrKpEwANL+j&JQl1(}43JWLHt
zS_x1k9<qJU&zuLyCZ)Uh&r2Gw^&;bN`TJ}sk;7z<cBJ~pe+hd0f!D?VxfOi*rykoN
z`C7`H5Iqq($IphisfJ&rnBJ!brQkU_nk2-#9KI1MN>s4)+m`?9yFv>MN6nR5?3x+!
z$nbhXW=9ql68v9*FNZcYxJ9q|_(vmRRsUu4%;F$;NB~{@ctcf)D#7yFCOO^<5uNb6
z?HI0E8Zs3swWMHGE3la}v-0;UQjQ;pA+ZkYCb-vi9HzOjG$919_XMHOJMB*8ie<h4
zR+28dNtv-UN!pZJazIU7PbH41R-5vi2;h18%D+QwsaFeu^^wZQQj)frQpUX?*Tz_5
zaXs04cV^)*n}^r8n%(`32HZ8UoUFTT>Q30oADx8XaTHSTY2ko0DfN4Pm=j_bW*#XZ
zVlPQ}d0GM5M9r*azUzluS%Tviimpw9_amOFdQ&p%@m!?!uVa3s{{yy&ze|Amr*A6n
zjMe!QZo0ILWFnc`9SN(aqll{74B0&wFXyce;KQWEJqbH{5G_|dkZw(TJz;2rmbfv!
zBNIYFqg8DM=$!X?q)SQ8lLl_Z*<=m{o7yi_Hto=4^mZ{pDwx3K)4a6L>@o45mOoh&
z=NPfy>KMB%@k%@}8rUPkT<jL!Uyn8`7s}XmWs`bCRN#Lst*jd?OzVl3N^yL4eHe#l
zL}c{$#a!K%J)Qgn|2_V@6E^%QA4urRkw_t*?8{N|OqqkO)2Jz{h(V{a<nRtJW-`1W
zh0qeFfb-E2najby$ynMS8yHQT50Oa&eZ6z)6AjQDmQrATlf$QLtSNa+;W)RGY?K8o
zSLOLgDo&cuM;L85DQ{<$G(6Ax39NVZzsF0=jIwm;x9tX7pMSW6R4{4zFPv`l`Jga^
zo1`Jrg3PUuyi!BX9VSIvvacnKnfBT#p8X}LU~_wrqo}Kw&%e=8&<?jX)0ED3)<l7w
zz=<{V;2_j#<%5FxU58g!Y2F<l<EPg@Y97uoZgqM2d;15p9(ZR@hPduRif;(I=tbyk
zjPg1}u-NW+L#F$<PU^HB7q<ICC(A!g8ibcz+ibpP2<PrHu`bC=Yi&4r_95(n530Me
zRy8^zet#@#xgRGP2o{E`gq4kR{$7TIrv;9C-0TZ0D$ep1ZMDlV^g8lel{gD{5sX9h
zHWOZgL=qKN`&?O@1W?VyP4L_tI{#yCD&qRs9-n0B`xi8L>}BzJ4bJ3m+Hw0q)n&5W
zCk3TwA6b@6Y#^1OvoLu@724sprsQ=}Yrb@NVB~c2kB{))$lM@=d)YVa^5kVXTaovU
z`pi~#ZPj!E6ybWX>U}W|u54OKn-F?=yd1gJnsFRuN(R1|jFr|p6Rv-v7Q6Fh<aa>^
z(Gx!VSL8(nwNXmKl<0i5LN?{g5-M<~eG|JSUs~X+h-@Y0qi|9WSvr4^v2^}D62`xw
zED@ibcuvWDuzN_y@37BFkO|mXP&f~COJp%ig4ZzA$^-weJ4!_`Gu<Z5Kr15LV1;Sc
z@z31Ds)r<(N&|MZ5mJc0PX|T|Ov4is@*2EXvuXl(Xkj7PZ;E7U&GPE&#@1i+LtZVi
z!2p(iRI}c7+t1=S9Tqc`hZKv@`5M@84}>&i#qggG{UJ=cpG{QcVP`$sBI(U|$^h7N
zo>;gYWJv-e@7jgJ?Q_Qr4*Zm)*Vnyw%}(ZZj!V|)FFE`HHav3>w(7Z=cI_K7TtnLL
z5^vtt6}eBw&?`6?C5XlZebC$L&ik@2g#3y7JxHYTFFxzOd+@>=Zx_e+gwOEUE4WYD
z;57;<Iyj#j*l%iD#$&%M#4+9OU7;Wj!gbiSb5aPrETci?9KWQ2-XBh7lcXVH=qj@m
z^fi=DTsXe<P2bJ7dj?&%S{FM_3Mrk<zhqr6c_}F%9kA@J2VPzn5oh^6r2n45RpL*K
zxtsfJnQqf0)5eGjZdlMy_w`-eDD#uu2e-L-N}z32cOAEaPO`in9swbaLL^J=_xoil
z>w-+zW%|s=w%;=pzXa@Fo}uFrxL^m}oaT#(HUNw;qA~E<ne&8w7JSm}yAohOfuRk&
z4im!@UYn2WFu6}8hBFQB`y-8d>|d3*5dUQ3tjEEGTFD)pXv+T>M(l%zTI5n&(XR;W
z+Q7LupISzgZvfejC&u{uoG0<^mK>I+SukTye}ulFTtttC%nx}mL4V{Z7E<>A>p-0k
zEn!eOCTepn{R|9Bbb*)03r@o6n#C3mK+fTCNgKmdLvyuY_Ze*fkZ&q0C7Qeov`~ps
zjpJn;rJ|RI{T7WhU8Sv72c%B58A=%e6@Dy9tQZClbtYg)PS~Hlpol$~GBL`sIqR|n
zh}9|?r`s(6*_)U2#W+`X<_BW(OS5q#>#wqD4frX#+gUp03h|muQ%}t64ZD-kL<>ty
z-jI+P;<+kFIifZk@wX)nS7LiK$B4*UG2<EM$(3(2i3^VcM`s-ZU}C4STyF~HtGfDs
zEj?9>sd_bdUu>jLx^8UmGfmX&fKyzop`|ys-u$skw1)4nS!}=Bu>5Y1jPCvh&-HW~
zv@RMnqteB%OGR4^6rulUpPm!eiamDI^b}Pig<mYQ{2uzqj;aO3U6qWUX9&OxT|JwB
zLN9lN7aEjLb3p@X<;7^9Zk;?KJugMs#O})MB2J&X>JQvz0601yv?<G2#N4<fnv07$
zkNLbmQ_ewBn5?a&;0QEZehJSPYY$)wB$8jBUjFSM&f%`5(S>d6K^cu+WbgO2IzJ-G
zvyi_n;0&e0PO5AWT9qq`(4c;g!&oZpV)6&RZB8HcU@h^9ew2zXv7hnI9^=?CsNH@o
z2tO(GH!i+Z(A1Z;t$j%VU)Xn~>-}uy^j)7?q1ah95bfB8yPBozQdZYzP9xAA-@BeW
z&OY(Kf<ZnVq1DsOwH4CC?p%t!TA50Q`ced;&00z4fLSNQT%{2|-?$Nd5<Nc60dJ(z
zW=osfb{>N0Riye&oK|U~0zpdhT9i2PYaB;tE($CBx-*^4J|8BKFfcYeBG@JD++5+7
z7LFikSJd28YH~+AW}Zu;3Dpz$XiDmP_*7gp<G%Qi{nX*5IxzQ5+=F~C&yL+&sYIxY
zp;vmuXsz}-t}X0!&h*D1tHNH|599+9Oun20>VD0Jl(qKU_{QKMQdWF_{k=5ESHhRB
zWN?PlrAD<1nbdm^oxYtcVe0j)06I$d&!3lSYdj0*$vO<v@|DOc@gh`kWiIF<ZK|O%
z@>j4kz>J3}THCDjvOtoN)blde`F2A{>EbP<nzY7i?BwC$q#7xWD`&lf@&|KOh~YRH
zyS44<sP1=eao@Q0<~LjKD)Lv^n+1#SS}+-Ztk|wSHwLhIpbHSqL~a?$GqN*Pmy=os
z+`Z>S?}Tr7e;%@DR08WgX;>FD8@HQfehj)1^cu-+VKpd2_r*8zjsRumc~myu6Mpuj
zifAN0s%Dag<=lRs|IUHnhn*=HU#d55wIYI|Yy8=&xXQt7JzRhuy+nz$;hRC^-ERs{
z+4ryxdRI2G!?cq(BiFI_Zw<B|Fq_`5D5u^khYE7DCB<UwVpT@!F9k8pUHfeRZ>D){
z5fDq|sAN6?fBh}j&G2v~UZAb)pT>uLQgC`v|CztJxnuEs(x$uo2}x=P#DRb5<n$4s
zwEy0b$H4|5*-HX_#4WqFe|)Uu6bn{7R4~Jf238nboyi^w)r3pxZ4{whN<l<eM~InO
zL+}2fP~CptI`4J4katQ7y*Q{OdoWLlrHkT&|AqJ_It`}~s(e8&JI}Za&WjHr{qrlN
zlp*Ai^k=T%7oQC@Bk7Zl7ei0wd<+8A?URNRL(AL{BsA_D`Qn$eOZH^XQ2%aH8(49<
zX%XFjXF$P&!dQ;Z;CZuhGZV?ukrK+(m_mRUqYtfz+J~5aPY89v^>^Qv`h&M!t~w`q
z_?<6@lx+B?#On0(m84yqppYksIO_~r*QK8pBY3O@Jrv5!+|4M@ru$%k%w~*39B2<$
z_f*rRZa2ko9oEYBAmyTDy@}?UHBYV8eS`^H>rgP^Lr;iP*^?OfiGAdT!?EQ>y!5l0
zMIk)+C~l(*H|BH}yK}KZL2xf2+|b5&OZ263{k?{4O)7!F&&%5cbtYw|DIY+e7qi^w
z%s6l5yffyU&8NYhxx&;PF6Ffs2m}8^aCj*TXuo?WhA*QU$8Bvg(gdUEzVr_L2>xJ6
zo7)s2nOv-_s9;E+?Za%mmmk)Cw5u5k$7cVI4uJ`=9dr%LiNFx@ML+^+s?<+dbrMP-
zyz|&0b7FD1g;(jsJt2(u_Cdg=b++102>$E!3Aw_|=J{IoHPTJ!U_y4ndku2SYM0?$
zAu_DUA=K{uUm2@}for~DB<x7nT$bQ*P6e?rbOrd`bgoQ*0k&Mobd|B|crv>mw^NcW
zkJja|sU1(UL_fOsILl^{6-$1fg#S1Ow^RWgyWCcbm3f9*CaPr5fl9mRt>&-NLL1Cl
zjV{8KUbe_)MAEDp;zC7N8f67-M0}$#EDT1y;c+hJj+-wqn9Uw5`dG704{&FEA5PLG
znL48Sgbcl&Ty|k$?2@A2g1-jl2mPjqxz*vI6=!!v|KUe1c^~(bc%*_EzvbMG^q{u#
z-spz*7+uOI;V)BaRB)XA=MJR^$?&NWJZr_+6zxtK3B8rQ=68O!f}V~>bkTev8TRaS
z0SG)95`yQ`5fXV!_us`naiXWslZvr182wlKq$3Cp$9vs&FtmyI{qWPxM(r`+X%yG?
z0wk6ssGe3-c5hf0fxLtnK-a(;vXU~=3$)HyM~(k%8|vg(I@n2^r$nGYu#$^^sQzgX
z*<*JA9`bi(p~`pxTScL#OLoWYp*QH$OV<lyzkpF&PrH*(Ow`b;u&{`V$<;^KJkup=
z6nUikV;+z{7L$n~>Am>w49KqmJ-!CMT+LbSy~>N)s~F8|Tn}EOtCn0A44gE1T-DG{
zz5Xrc2?)NWfkqIdus1pf`Ty)z7m?nO{<s38_iq`lN*zp&E4x|Ak{hyMSFjr0VPIaj
zTVv=mH)k2$AXp61g744eiT3mGzAx^jyN0gF`Gx!Z$<??ZPY1<$ImlHur?D90#~mW8
z_-kbl)7L-mNfnPim$62fKYk`DLeIjO8|p{t5)?eTtt1h!u|j7?Ui%J?Mh}e;J)bXV
z0%M0g@pqI<>x+RzHe&sp{pW4j3-Ba;IepBV?279r1-M99%*{TbH`#TJ&l{>wAV+2*
zFyl)J9u-H+lbNuaovurO_sgrx5CXbRKefx(5^`9X3Mr2(FlU7}RQtZ!oi~wz)LZXy
zeiqGbO!+1_g29cs{O(w)3Rc1VIt(@QcA&C^=La`Z(lG3UMog^f9|-=<X8^BanR_ZW
z9Ky_6By+ucX3L9T$*Z6zZ1(vjX5hRT)i|)GoSy&1Y%Jw+H{Gs?xoKHdZT8b?O!w@6
z-JJ3Df7^Jkwd}l=Lz@6k|7;G^4pl?6x>;4Zzp%M(T~Bl}MSI98v~RCoby)P0`^Zz>
z$a2nUjdRVLKN%0HvF>bu*<A^q@XZiaEccgXpe^E%@ZIgf_Mnc5!lEqp3ABAUYElBm
zZRVA9e0r}TEcyO8(>8!;*hPvNB%YJc_RA4xV%XrPpeJB(cb68QdoRx30-3FtWZqku
zgNyd}@Mjb_HQcT^fN6vMk{Ys@CsffM$!+SdXmv*W0V7aRy{20ft*FQg<RM)RlOw=E
zd((f$V!gd#^0dMEA(#twjEkYmty-`u=x}!V7yguqqx+!N5UYjOZp+fuJn^)Oxd7<X
z7In034vW9f&{VD~>8i(#jX+e#I2lQ<4P=h(sB7)H)Z=6rx`MfSLl?yu!Q#TT;tmF-
z3gWK1_o)qtUL@R4Ow$_p?ySuU2DmI$Ujr#VpLQegK`-4wm@*0{emUcTAZJOnF4u=u
zH*i&(53Rzis1&8K$R3><>DgD1SmP`!(sEohlcaz&j!Hu|)pFmn>osqKpTB33q^T65
z;Uan?ey$y#%4zwzlT34zclp?PDK;vPl9p}P$iL<{3nh3h%`AT@as>faqd?9;Vf%Nu
zbRQ;@qV#`_(CY`GA1<2{4Q3G*@<Vi($683!z(pfW%`AVc3i#z+e*QsmpM>I6a@eei
zbhGb09JjGN0bxXPK%y8~vAtkPQ<0fB7GP<M&Lt0g9GuPOpx7;$3aQ8W4Q(We*o844
zFj9q1o?kgHBJuhu!G)f`5G&bhU^AB7S<x;28ZvRX{W`{7MchFD+Vo#+7|_`H7JDWK
z{+)!={iJdQg?|fMEE?^Ke90QDMwy}$@HP1$(hruQoY!A%j)>h}L_uQal5|5YgRpW{
z+6l6eR=<{U-M%+1NcwaOkE@Mo2&<d^&eRIFavbIzyl1^P3{wv0Hv9;Etf6b3uE+tk
zLGB@!lu&f*<#}2NBE<@+t`{83KF9Ts*&#`xCQP2~fLjAK`3LJW&v{GF?TeaAkeEmt
zM*{VpRX6{W&jdRg3x$~7U##@tR2n=zIo=~Nq$-}DOwLKRQD59UB$BT&)0NXd_H>!~
z4Ldyq`E2XSg3_<Uwv0{nKV+_d^}TO=aslCP*R9J4Kq}f|Qde)3Qn~rhPzZHn3@;tA
zlFos`#zo&ti}}A#_y7G{lxY4ai#=IbBd>BsDL4zkcyLkDB5o*DmFKfbcL~yjxKAtK
zt$+(I;r#c5&E}C`Pyk2*xav<Dr~9MidyXsRk)Buc8*RRCS1D!{f&Sr)6@k&u(0v2J
z>Aip_=-R|<Jhwa*ZeZ0m>KtU`zZTJ~%LyJ?l#BV(h%uI7y#E1Q{|rbj>3>3lIF=m0
zc<I5Iw@H<AeyD(zf_3dKf-RkgiGzV|ThfI3o4LNFn{Md-?mr1r5ZtvB`w}#KVqFFn
zuk1=Gu@74kRvv8n*jX9<hL-;1=<@oHTf*Q#pxr@$2TQHxEM+7UgSp6*MtBmbV=U-B
zf)<9T^t)iZ=yyM<l(l4>J2^Y58ccX^heS3C;Qq!S7aoM->%%oDDZ*pd!GX%r!E|J~
zSLXp6k8$Lv2HIfkeC+ph&Qb9pH7t>TO5uIx;52^mQchmqq+wlT;6H?EkA}q)Za);|
zUnQ{L>J~+c31(rmmE^-)fuD2AkJ(ch`UW=k5e%4wwU|-Mf-|N9B~6PFJ<qq&7)>_W
zDn{0Z{bAu9uN;|PSCZq*tdjRnm-pr~N_OrHB=&v3p>agnibw~KD}VJ*@(gQ*-D-rh
ztonNoOA8emYCnI?1vVGvs*(cs(o127rXzS%2+qIiNP|nYj=+KshrhMMdd!m4<f2S!
z4B7lJsI-+5Z9DJGs-KKAk+XrJ)GE!7f?;*$q8}u$Ohj9lC-yAuLe}#ZhVBDeOgCVq
z{Lik-=RP%okSr46mTxXC_AMp?IszfIgUcO3JCNu~qItny)TEyGd<7T79Zw)Kf^ien
z6KZri7`~dL`6owx8cx&!kyr=qwE7@C=^ae+UzsRvbqV~~wmx38*-ibL)pkN{Cg#0>
zX>_UYqVP~mR!*hN-xaC<0=>yWBVKgWmBpeIO-rdnX_oOu4I27^t1-a^Ipn;$6&Xa3
zih63zBQB8J!#~l@(5hbs4cJI;G|m2~$QRlAp-E(bMA?Q5p6-<%`etlBrlh&5q<M4`
zPlQ*VikY;B2Bz-B&^8V2Uyj;C-VY_NQ+9U7&&GL^h^h|lAw?LXg(}C+6-6C>)PDtd
zx)1FFMyve3F!RV$j1@8n?eYKXxQw(v%-jO^Sc`YZK`jxyoNK~eE+NxuV+Przq-?UI
z>hJJVl81QMta?=K%yj$>H%e(lIsMKejpVsWe$)*+{YeYQ1(iA|ejALrvVe)~h251u
zF;NeUhvSuVGHHEBq*EuD1l2O2Ar2RMxtrwpb>DvmM5tCS<j$fVC~7NDMj>SoOA2ZG
zD<-gTo@Y#}cZCt8NA$K89Djv<9v})?!#iSlsXY+weiujPWhO4h-!J2j1GFx0IyRbm
zDg!EdoMxNgsc_&`_QN%i7Eq1@t9mG#m8B6&(pAv558606P{~>DdiqEMGm!|537zI!
zRJ`!!hW9=p3vg?YTC=8LAku{y4Nc30ol^<*Vl^eRkdvZnWH7AT9F-DWi%8oFNzIyk
z(XM<-{QcdOF5~CZc(+TSpATPeVa^lsoYb$oTftlYrmmUFD)C@hmvaLaTO16s#;s>D
zF&llH`Ce`v^>pHlM8G85h2+aP`82L*a3qiylQ{Oj;a~s)q{I_IChyU0q&sepu7awJ
z;4;aaxBkqo3y0iaLH|}@+dPeUJQv4|ter-Rfm`P;veRWJ_g>9=N>l1R-(9}i4lMy8
zo;d!cz(ncT0@k1JfY=@IDH}Hnxr$JU!KRWdxLPn;`x}Lk;l3FWk;5{^S*nyT8SRMm
zzQ|~reoDt=+FJl}5>+Lb-9TTlr>*x;UfZ1|ZSbP<U8TtQ#V8}z^8<LZW)7Tu5xW4#
z-8JwVR^o2{^1UhI_nk$==3&enG!4WAUR<mqb24dE&=?2Iu854Gd|Pe{ax}ekPoIbV
z1h{?OWPNPfj*Dy@Qzx%~7{=l^dFAk){v&-4eS{yyS}R?(REKva_<b8C$w(N**eskh
z<DI@SCIeq}=hK8%HTfm0R$AVZ37z9yU*`-Su4d8>WmDvG%wcv*Xn*Ng>W8K4Y0pbE
zD1*bpRCa3uJf(glXQY3rmWEg*K|*}*bJaH=F~*WRJ@S^K&Kwc(D(rLmLcB+VejaSL
z#hv6DZ>IN*|BdavwAYVrJ{I5XeMnf%_S(2SZjUUGabnIp2m$k5K-$;xpewMEMKTHF
z0^A(qP12rCe)?Stem%}-iLoxRlK9M#2unKaIyRDHeAcdV+;4$ZSiRUZcDOn9_jH@a
z|IC-HvVp~Cbzi9#DB8ZYqBLb1`%Ru}?F5yd_MV6dGqNH&1`<@o=5P#oY<Bg9rR%Yd
zV5G09Kq(2q9)E`s1|43Sh#ys>&YI)pt<{_v42@~gW~BIz@HQ+ckxUU5M%M+V0M9rC
z9uy$duLVj%CDpL%88)T}!#Ti!>HEyVII2McmZf6Q3-jN-95Np)(Qw(o!7if4)cv+f
zzoXVgive%Sd!pKD0k@oQkm)^l0_CLWa^#$nfNT39RG2roAtK1Ys=2{tV9fP{P2(Z5
z_nLr>I7xdFj%8a7|NcDsBH`$yQM$(MW0)jo?*|lx8ozY!^PJ*@*ncNfMvFoK#JEQ;
z<b3+%W@8GmxZ}+aGjloW=*1*<=mZD>I(!HzKBRP=I{K$;K1^U1>XMug%ws4$-<1k!
z7gAIptMWJ|ELeglSB0d=O_8~lI|*yqpP|T~Rp9p<_f~+%w4MVtpzGp`ia5CfA4Hx2
zbF5#lA-u1aISUZ&x1-O9)|CxOQap=jWrClLNIk48YD0@NYhJLR@FC%QP4ttrv=x?$
zL*KdrqB)m_jG6b6w`dpO%o&$Hda0gqrSwfqe5y2ELzBc^Qc5=Y%A<%&#I`^??^m;i
zlutAu<q{I5_-ffzJvv>F7Fs5ERPBU)CqHPy{@Kr-Tbf%q%9C}ohm;2L7LmNmr))3w
zncu6r{ru<f{^<TB{g1BsLV-0$0WTexz5I<k0wXIMt28EeWPiMZSu}WVBCLc9|DJXQ
zli$L`K|?z(aK2Z``(NM!sN#&+{MYFsg^Vq{esb`|Ud(J_hO1zqppavvUQqqEGtWs`
zwHUfNCA|7WJ}TlMZGssyGMxKSbd2FWe13ek+iKA`$a(*qM#c$se`k;mIWe(<%;whq
zp)&AN0{;7%XydU42{EVpH2wkQ$P_A(lcBj^33dMJbJ?yq8yn{Lc&v@<NHkj8F63`(
zm|S{*Ro^e?ktfW(FmD=YXbgm!z)bdmik&tdthv+;A(&Yj^@Hwc&E%oV3fqlWq?q-@
z&R=3+@9idhtL-kHhBFXg^D=z4NORigpL2-Eku$jqIaSxs@~OJ>Pz^}hpy_9OOZbvi
zz<yR(3_jsZ|B^u4F)Y6M^&a5_uE?bB7D)F_zSL!<+^i9H*^1`6=6As#`?dJnpPsty
zxV1bJ$yso}Bp+wm-kuQH1$hS%<2T?SgtO+0-}hL4t>Re^Zv}?yao3i-GhLS4^e`;K
z#8=@NVynNE1T9JIU6XXoK8${wW)5ImtoOH&pz*0=wuj_~%@Yni1fMou-E@?jf7B{i
z$X0kLqEWy+0-dHCSU~K*s|UMiR<NZe1=nj4M_SgZ-{etj>U=AgBzAXz!SAT~*R67j
z{cb;5mV8W`4kNKnV?Zv9)XBe$1S4pZ)!+ISZ3@+m+%lsyaKAihfphj^KJqPT9{eF(
zfy@VQwJ=gw7-sTh)6GMlzFw<yXsumgo57nYZIw*zGAWI`Jc81NQLRJ^J+1S?#Dfx=
zAypC>&Y0g<X!8_Wc`Le!DkHRfxVFD7Ez#a=>S-lUzjq$QLwWc|$XW)K=J^5(v$%Go
z6=Or2HCsY+FzB*KHo^Vrv2UaH=P#(W!Q5eW#(FVMqFQ=<WZIL2HEmhhu#s}dlLPNY
zg#XmQFaA6YeE0cdKw^v!(Zsiy!K`Z3#(6JY+7zR}zjp6n>>RWveuP)9g253%zr}0S
zM*HG=%7@hjlgX8={Q~%82r2lh1qRu_+Kw6KR#8iJP(&g$GgTFaC6XG#=}wKxZZ;MB
zZq)%+1`MyPwkgvhkbSIo<jpEYQv8NC98*nvBjQ5D<i^4?PVW9v2SzHWrs{J0f=hP>
zr00)Kczv6arA{0Rt<}8AHQlaMkzQj#6sC;(Y+PoDH?kfA8@^4JldOf3!#5#bsRrW?
zGgpEcM_}Ue#Eq1OsEqk@9Zd{gyQAS&3U9Qy^$lw;wqo?^k3?c&E`Iu(lArwKd<=A`
zWIV_Phuk4EiQcFcn*PuaiIeJ#zhE2R#_b@bB-<K*$eunEf--s|4Modxc+wd7Iui-@
zK*_umcn@ls(`bj7(NYZFq~|11RL5!20LDR|XeT|rV($0E3=dj`oi7uSoVr=u>+Z^A
z0@H)nZW!ER{1#!isc%(B$Io+P9SIVv|3hNUVI?{3`BTgiaVHRypM9oe@=5Oy(Y<cn
zWH1Fxm+qyhD7T$6-*>=3lu}rGrmMA?_?Sz5o!O*1zE;2ST-fiy$fvm~^%0O{FB#Oo
z0`P6Uu4<buvnwwg(ArRCmO0PMwqEyQaMmPL+N0^jvh<4p2;5se1>iik`0Gs?xvuc?
zyISz41j?wVn2?l*+N&Daq(l_*9r&o58ouiH6iU@|%DxFDEgYoCxsr&W_6*%OIUbCr
zcY+QEqYFCz--hkxxWkGB{<hSp`u`}?gdzT3h3UBEB84*ihgX>Cu9ezRZQiHsyq|&?
zVZV$|iQhs9Bh;wbw!Ut~cZW*qi-4{_#1%Nh1!Bw!690PtWmq4|QndRBR9+9(`ipt@
zjTevfP@hSd6(&z?+DaM@4cdx`*eY~Rh_<ypRP$d_3so4{y_68~PZ^mz;FjT9#r=<e
zcLNDqtMPX^6DsV62`5K+vL8s3E<D>z@hGC^lgUqRx_Mi54gv$D&AS!zERQ0p%M7<w
zkA{%iOW)4B-So|O5hT%ka#=~`qOR(@zl%MDCqYctst~IYS{LMRp7+pVr3?1d31|Ad
zY<@JXtW-;_s|g4Q^vk{}I5m5TlvMn8nr5+=?lyj!Nx<J}IQe3=l9hRe*-u!Sc9{M(
z7~Qk}*|2Qr%$b-dc7PXNnL#U}`~fP?g1Y8q*_OC69D0Z+V`^;YdSA1^$}X>4<HQlK
z`klO+V^_z81VcAw4dOb06>2NT`v{p7{Q31qla;rfAbEp0#QO@Yhm}Z8L{jJNF0=IR
zTmwZILtz$7@3KsqtiD%;2OOkei*s(f{OZrSNq5FX8R?DJzi~CirzZ~W(Dg~p7ln>f
zPN>Z|{J3+3j^+lXZhwnb%ho}La2oo1VYui=kraf<)=mM8M*uJThu>w>(|C=cV8j1v
zf*5>@c_m?%@wQF#`Y=hZZM|Erpf%W{Z)a?>q@>~h<5;s_h=qeUV`hNKO9G5x6ayAX
z7zaK#EVVyryqDlj%OsB~hSTQ7eQb>T4nO4YjXx&FC}8lk?Srg+x};$O%bV*S%d`7l
zipRw04Sq**siuL>(Qm!bzLB-v`-AEedT+Kbc;OtBR|n#PE%OY_jQA<72>j>MV&68I
znR%+%y1{XEfvK>>unk$q>H&X^4HVMhj5Yl=Pg2V|{YwL?R>c_XG%<zLoZ2PDMb=>3
zE?))-;pcD^!pG112dg>!S*WNJz1JId5Kc|cngi-!*%VP_Qm4LW@ius)Sg@>t>*e)_
z7d#LT0W=9z^0P@^{Nd0zTnGf2^LQ|2R4Hvd#HK>A?K&^fNLAXqI(I=anyY*AA=Opc
zB<SokIYLIqxJv+*O!1?uq`z2NTou0~HQz2Ld~H}y**UQ2LyUcY6mhYx^VcY_MypX{
zd|33uPQE~cXeZ<_SVj9k$%k9Luw1=w+pR%-*mNWr+b>_iTM{#@rg$<^8ll}Yh}28W
zNLsnd@0{CHuIc<mgw&QxZW3++`5?+8ph<zgrqkx@pp)><M$WQ@-IuCt&%uk~$JP61
zw%l!lzbU9QsXln>@3jJ!s=)*e`DhUcCmz0j51t?CjIu4z4x6`PTz}THJx196u+*>Z
zW~vkJbJ`kc@VuvFLF<!Ng!y?-u|TiP<6pPRy1xV$4%Z6XQ6)+e<W~mEg-~}ThS9!b
za5;vnRLofQ?4>If`Mi6~rPX>oKm-O19@8bO*uU(<8$!R-FZ%zz7FYkf%wV4`B?M8S
zB4)`atfG_<%a-l@g6@TW9+cFBpRlp+R^%R9HInk_&T&?pEV7#M&Lm@p%UxaSsMK1h
zb+?D~j|mauw`pFf$z@;5y>OFxnftP4wFfym$19;R)5f}#n2GX3OrhfsEz`cZzrH^F
zB4ef)OuC3KZ%*e{;8xPE3S0Qq+L1K5Qf1v0FO#GcVMWJsxRy4+XnO-V`l24_{3@bB
z9cEG~Kn8BskFOirztB7+@KmAXpS0M0w@?-IdH-!U>3UL6mv;|b5gjA^%vzvH{yTAA
zrwh|!D+K)Xde+3O-Rp>HH2fkZI}<jsTZI#S&uf0jO52U|Lb~A-s?SZMu-2r!?O&%!
zf#k*Nz|%?U3~fpETQ4)G?!#NNdtkD2OReWmUlT2GxocrrXh&F++GBe;`&-TsM%@SP
zol~Zil$9}W_~XFW``^U}>U6i!5q5<i`xUw_uKCm^SEEZMC{#Xn-btQ@1mIxde~_+s
zB99wYuk1cIsd|Ai#(`PVgQBYI39<+dmz9#s8aI+q#v@|hMmB|qj|4L{iN6(vud4HQ
ztn)sLE=%-YP<{CJg!ILMeck#FZW}~q-Yf9EEPaQKf`OR8RQFS2Uc<C+or`AT=eu&b
zhlQosKn+y`_x>{cGo;%#SV)-7w9;N;R1LgKz5$3LgJi{z$7KVgob27%7Z88~-dLA-
zusY%*YokgkMpe;t2{H5CPX2HG*TKYU(M@c5y8Mi=i_}Ac-GU(A1*a+Tj{oKIwkges
z_MZCV)j_6A8^7yZ8$VSHQt$os(Z9m62~-pmL)F@DsjslavDXLU$O5Blm#?z`d?S5|
zinAr)gLnH*+h2zjwSlOcWNPG+I+TCOzxZqW1<c=eY4<C2?U@pG7^iLn{OUd`n40y*
z?yGags<`7eH~F+2I!b|6YuS#uat^E6ve44Sl>6PoR#@lMPWTdZ4+gC!LJCLN>gj*g
zxKBt=*I2oGxCeyD<CObp-W?Q`sUznDL8Lq?jBvJY)Y(vV9bb^p=pM~a?bV`;-@U`w
z?1L!ry+dJq#sWp!8{y<#b@3C^tQRE>*}PZ`3M<cfeF5vn+ho8-6(x+ONc;2L&x0x2
z<4hX}vAXiZpFh{hk4sNYpHz+^hIL4FQ!GUzAz5Gje)MGGba3Ye?WE#s4OHUUCH{<Z
zf4KZOK^Iq(bFKj_Sy@M!S^tkpEXJUII<{!F4dO>HI;?B4#$-NTUzwUMFd|lPn0c8k
z@Xm-wyt=I@xDf`1`PrIiteexTE#!w60Q{T;P*5$GfNS$2&&CQj+YGROOV(ymYPJ;B
z#YLzJ?#TX-^ohTC4$e_9?QKFp&T6?xa~--i5@egu9cBB^*%0|((YFi%)n!K5?o06y
zT@Ie+CP__AJ)0}%zALW%Rta#mrrgtRnwmFU)Xc-$TNqAlbc!KS6yb_V2XW^wBDuKP
z?uTcf^D;(jx^}mnH?S8p+AO@@w6Xl*$%_n({w(~SWyPHu2MU{brJ|n4(Fy5biDdnx
z`w{vj=PiYq;o__eL=57T&bx7DL29EP8k2cbHNTYjW_>R;cpi$@ZD2r_T?7MiU2ECi
zvV_gu<}+MPi(hnUbS;!AW7Yq-ED}H0mXHeG+gB<rcJb0qCeoE0+8tXJUi}G;xBn7{
z5WD`7(2H`Bj4uM;OT8`!RjS%BI}Vf4JqI^B=M%wV80B90!n<p!B-s?VamC)?j_(WF
zd87ZWz*!&kj1N4F<cy5mrry#owQ)_C?Plk;76Qkcwg=8Y;ekld+uajry<bsPbN7NO
zMj~wC02eu6@MrVtiQO@J3xwG(Y2Y1P087u5M!8;9Z6?-Ce3%n1PC7cTAUMihhzerH
zz%N)cMt)t`9hhYAu(lp9f$K{<@INvb_9BDn0}?385f0k;BmAyX$HQ1|R$Y|3hS*o<
zh~~tA#(?%JW*NGLl$OnlWPlW?YJRkXz5sE=P$%mu9ZT3D4W9*+yNq3HCsi@@GY;9_
z@Si<j_(K{~DLJlFi^2N)A;u&!<@g$ntU7<SJ!J0^HFJs98r5jllEV2D1Rqbuz`&xg
zYP@Q8i?;Qnf{nFiyNo-`l>_Aub$_9IMk+p=Dfo2<vpR(%Iw)bPnfAlLF(3|5wa{7f
zHJ6jDc=30VU@8J-dxeP(4eA7lDfbJY7o&cn2>XJw1{%6{Q-mzkK559+4Mcs|s!0V7
zuxaahAX<AIs7x2*b@u_tuScHo>(9pN%f_$qAr>UK2kGa!afL~`BmUVc5!d<DWTso;
z#Cw(-YF~lhI<_eCQ)>w~nOdzHeb#3*WGy%j1V@n+VzxX)5wnysHF)KbOro{RbFuys
zIDdTmbP-0W5z+<w&BfYpE{QW~);^zBE@O#if-w21bL3d~oron0U!0@bVs?-!Txe93
z7a79Vcl}l~5XZQ(J;*Vk@^@dYuoTfc9(ZJN3HMb|25`tbhiWvZD6*f#$W?w`a}@|&
zuQJ*+%*~JF#zZm+Nn-gg^}u^D!?-@>KMhTs$Dfsis;zl@nM4n+dPSVf>&*=pH@J#1
zQU4=DA`cFxF3%ZQe14}>`Mw9aNzAm-Z)Y}~@!YZtiKwDl;&$n1=%{*|ihh*KJhCb|
zw&<75T%eF=$WzOUilTM<LF={}k!MLIyRM2%so%>p>2+o73K`8#+bgsC-?RSP4%W<%
zmGFFwZL+{&YoO&D22!vxIFqJ`+3yu}0a!{0fS+%e2_>>kStJT`J|Ctn4#MCa+Vid*
z@_C(_AI6J4zwY&_T6PH)!C79%sfq$u>QVuEZYwU}vq7>G?~zyl*SrJ_gZa*h4^-{N
z0qr*#fG94}R|4zeqHiEGfcu+PzgKZpO<C0dUMP@uH-$uZFl+{F1zi7#@tcBqqH2C&
zI{_FMJDzUUVPUYj%>iNakNM4WKTBVVH*y$8l;cA7owEh;A;9a#9IdU}ws4JYF*qF1
zM3M}Vkiy8}*glYD2V@v_o+p_%GqKNWu`?`w9tor+D3+_w7rNKXO{+;&Z5yqivG780
zqLzL<;T|M&UeB!qbkXGMs23yj{0Fq269AQw&~JHpMgqt6L>4kPEq$qM+$Btc)iy`P
zDa&x5u>*_36WH_I0bV&D4LqSV&{EoL*~m|I(o3FgHZz3Q6_y<=+$Vv~|45pOlZ`xS
zm~*d>@(mo2m}`ueOm9|Z#3gKa?wY#f79OAW#fJvhvM$p>-GQ5JJM)c8D{iZqpPol)
zEDym-#r<K-S)4ETyOs*kyEUAfT(ra|rAC-Sx5}RSm`_8An!m(A8ibwOzo0=y!_bQQ
z3B{(zgKScf`@@n%u$$XIfvckhE$?q3Atw}^4Y4KqnO-re?tIHNpK9F<!=_zd`ks$Y
zgi!x!;N;sg-s+J{-EG=PP2R}Iq&Vtm1u;JbNP0j*F@UAL)IFfM&pZZNWx$c_p&ax-
z!1H?mPD?T0N@d=`p%tto+5XLoh1o#JjGivpKb=5CDFmr?;C7N%LKAwG@<v$d0pcIs
zB|z8pYT>`>lE}4eA4Fw2{Z}hHoL!unYo@%qbN>Uf6p2d$*9JeJ>^fRrUpxfFVo;z$
zlpQ2-%eeYpiJbq@3vKTZ5C}!kIR^K94BYu(o)KS#bxldlNfi1ePYtN+weY(Kv3TFG
zqH6BLpVCgllM3v?s9rgsDSK|iyp>6{7;^H7H9?Ns|GG&yg|gLl+^L5-lU{0GJ#cw<
zC76wPnXP2acwf-8mYD!}kz!n>AhDXJ;eTna6U8&?)63Nxd3LxEnkmy|saq<Un`yte
zRC;OO(L4xhcl&eVYr}s9oc_Y>k&Bo4YrdlLs^%{Sj?hBQ-RP!OX@V@juY{d{kUcJ>
z3O3|e3egK<6enYSG<W=+ioUgw2`PAQjbC0DH1)Sp2o$NL=f~We>U#dgc69em)g-+7
z>%5IERkSxKkZt?C&^4p6VE7wwzz(~-pu$L&Q00{nz!V_#D(b`FVIMDrn(A3tO_O^0
zPP(Su?&?|2{BaX6R*9!21-Hwc>Dh)j7Z8^wY7(c$@LoeMhw@_=pY48L|LW3D;#9^Q
z1pb|j)fBp?1}Oe@*Bu#Gak_0QJxLB(M4zv3v+Jpk3Lm_Y*&W2+$#^i{siB9bvzl8g
zN+B-RbcfE^QT6C~OI_T`?_~q`h74o*W#6t_>&y-Ov5SztHo;!|Hhn|zqR$;bJ^oJb
zTNQ7}MmxS*8KjIe#Zlcbl}=y@%Hw`uw}=zEj5EXHNG~;*rN>XHWR5G=Rj?s&CUiLM
zLGtP%Dq^X}93SUeJG7oxUIo5LwtLY_^TxnbEz>I94e;jaisb^<;??~Hf_!x>fW+UG
z(tY5<Gcj=%d&C08nv=}?RK%`#xRZPPMIF4sW3FHh6jug1{%*m%HTg|^?O4V%thlTw
z6|v$hJUdy`)Pazz7)MF@-kqGRU!5==QSwnO=1~`bL2V-YWcw2yNuiXk$Bv4t;y}sG
zLp0agTMrY`oCPcmm-iw8N<)6;n+47{ByPh2Izk-m*QbIM06r3gtgaNUT@_^L&P<4R
z=b#Ii=+Ep)#VQ)z?VCYeqh;L*lbm%ew`Bp*SCb4?k}mI2=RY?v3^PNQRL2RotZQq9
zx4+%eWN;1NaXTA22`AIhd8Gk3d_xY}EXS)np<ve^Wn`D>GV|9wJ5y87bPhDs8+3W<
z`$-@lYN~C28rQG-tvXBBSWp@Bv=^uQ1?GT#-}a282+4EM?uf3l58TN2BRPpwu=jk~
zw!xg_ia%=A*je{$2;Phq*OS%#O~;LgfNW4Bj&0VXh^JHU8>w>c<CJe;SF8WW(_00^
z)wE5!gS)$Xkl^kRJh;0ixCe)z!5xCTyA#|sXmAg~-Q5TF;(7mXAI}^u>D66#)m3;S
z3R8G7pH{JSF{LU;+|#6r694$*W0&L~KeO;3o=&p~uy1ydbF9B{^CgikeAp?#BlFIB
zA8=1|AAI8jFlI^bFEB7YuNRG3h0-3)A#M#N)%N--qU>>-7N1&v)rZ6~&It#%Aw79D
zKI3P;RCw`Xs^i~%Sw=(rni+yZv{!bjza7T9#rEmJe{joojjyo**k3HjIP$_ntm>ru
zy*A}y$m4wt@OG15s(|)9mDc6o*0>Vs#NBX^oUVo@den1gX~mU73i}-<0s7WcUoy@5
z1d%3OiB|)mwSOz~Ha*;*ke&{G029W}QzvPHM|Atc>$5XHOV`d90c<EZ=2CcR=kJ!6
zFV`%N(51xm*5NdvB{<SC*turZ)KFWnu}<=u-b39+&Khm6A;+^eZ{PpS#(R85UcfHL
zZAHBHF&9m@V5X#{5J5;~Nq+U+onHto-bCkQZ4*21##KhvF;zLueHK$z>t&jq`JNOk
zkT8QZ87<e3OLTO{8j65a+4cLVX;E(5=<|N2YjSbm2>sMbO12ZC*UeO60c&vykEbNU
zT@s|F<beUE7n+WXBd^pb)Dlbi@i#&ht9tP%DjzAdRuc9klsWZkG+kkt1e^sY5<a0x
zL9jgkUW<_p#Pd$W(~(V~!$|n1bRhRm@E$L@8!k<X>7(#IYKWclXE1o*@b8GIk(wqI
zaTM}rJZm+UhapRum!dd1&!ukhM~&z34|nr5zVprsO!?mb@gY7Unf6S_&&*=AS^)eE
zrT3y168na1JWxcbpo?alF19_mwjcFc{}xaihcfEz>IQKSftrYL9&}S?el>|BtnQ?e
zEVuMtd^N$e^7@+@xkZ^&vPnZyIATZtM~=>&cOZQt4ha7xEu>G(<i=Jc{~9Q*>H8#U
zjXd`GU3LK-i+N1Qu-IH1ax+k4DEutbq6#k1T>L#TE6{GNOH76|gO}&^w?@hHZrLJQ
z(p+<RgRz~sy&Go7zxE7L6oGX@?Hgn=J5)Tq5=a(APiFZrQ9$<SG1hIbFexm+9qn`1
zdb9d#(o%SXI?#2ii$mIDb?F;(`*Zg9nOc*`jDG5A!H-YhCWSJxRsQL5CWZS@`!``y
zE&Km9SKw%4s}gkDZD=djkDi179YSQAiaYZZp5gEm3c-Hy@CU)6$o+B&p{BxX+sKE8
zsTLg=8Cd&;r~Hnzh}s7=8Top86Bc415kswzRVS^OO;h+yQ6d%X8xhjt)(8Aur;1L<
z#^R#AUi~#8zis5nmv!?OYnBG@jqkMnx>-es{x|bfK&NEA_tEz7H)BrdM>a71PWQ{G
z+$+RkxP|ny2G3uz$Eb_i+V3&KosGZ0HoHs*@0S=6UtZsgj<FPRaIMcoX@s_`Mb|C)
zelbM~`YVcXw-FPP5%)LeT;vq(aVaO3eJ&Z_1abd|L(Mz}4Q_Nd#6vtgifaLw*Dsj`
z41sa-1zFixz2yI;NgzO;&q^_?*KK0?zZ{bllkfaHedHd64(Q^I(1IK#Ql<K~mVo?t
z_u<R+<h^>rdrZ3;^%TgIo^zR1e%wyLo9v5>M<z~sqG!5&$qS{O)jOXSIJz2RX)bWS
zo!80`5lmgo_H;H)mj%8CWT7zc9YZ&O3)VCKY5nz%M}Y94H=9B}jk19`X84U}vxT*-
z7xj#UhS(_^Df?6Lm#dQt`6)i-|Mnjn=Tc*wsSOhOImC(Z#>l(`LhBGqd7!A3TTmk8
zj*u9H?P>gij=O^0PYzBL5_av#gpi`a5M()VJfi&LK^I|8CT<rthy4Cq^fZ3L|6>7A
zMlw~RIP6KMhnzeW_C_*&Xfv_0yH(eLIgS0)x9oWtoHAPwE1cZ)u*Wl-!t<BfuKv*c
z$+j|e-bkM3bvp;uB=RMB0c{69(=>GB^-VC2`T2O)YF?!xOb&D<FtnOfJxv(BXynIa
z;4u<>7`#8^Nx{>lC~CUGa%tV2%e4hqSF5v0eRjfKmd*SWYK}iy?3l%N-YK#mTqtQS
zt>O{{-7+f0`Y?k}Bf6J9#habLj|*&Y912HA$j>}sJZxJAWhP2l<@^K+B9%ce=ZB4p
zw1ppQ+H_gw;AR;j@J(nKMQj3?EphKe_B*a;dv)Qik{4nby9OL%<2s&uO&|5^I&%#W
z|KTl$6WN499a0!cXA&6nCQU~<%H)F|U)4m2KV0VL|4grF1B5O#)G$%@_KDEsBy2&{
zX=Of3@z>FhW8vzt$U>1f49WjVU0^0qT+Z6C(Cmt}p*ko-98GA>`OhIMglU~GvNUap
zu+NjnwJJ9&U>*?Xvr$cDb{nNHF}<RXnG|{JxF;T^qLR1alcV~}SB^F?UNYIi=f?HK
z0dwu7!gzt9=ihCC8_Y3!sbE4?thYPB7lWWu=d_hJQufPv^e}27{GQVZ$hHcZ)Jp$T
z$r$6k65Q~ph0l`Qr_FTy5qNxI3kVTYNieC^f0tIrNy#=Oo?a_}Du`dsO?d@b@jdYn
zJOK%^Q|fOXmr=xgqd6n#h;xN>TFw)BJ{<nV^JGG{8~gP7TSCU!k28;WnI(9A&oQcv
zs}W;m*pGAd&_R|yJ%7SIL&nNdG7q$<K^I=fH3iw_&NG3Krw3e)8{`42$qi%8?h2iR
z--{YWn%!3H1*(G4nRat#Zwkj6!=9;c;gM4<M4#43zn8k(w;Yk<M97vO)WLW$*5IuI
zc1C(3mWu-*r?)4+zU;c#Fc*e_n9(lDFPnD|R?4-X37%>55_Ql>43+J_!C|`L2j7Sk
zhLU*>vvg!{jkO_;YU%r$KCRu~$@q00^tl|-mlOx&q#3DDxygO>aVs4A<(<)$1@3+b
zMrmw$1$w4p{uX6F<>7O?e3Opu!$3ace^?vtR5l=n*4-EQxcP^0&jg!F%Ak+p!UtDD
zIXvuc?Ar#}TOEe4M3gKA&55Kx1Rh-|SeI@(E!Oi07daQV$n`zVih@||&bwqgruFVD
zv$cW%{kf4kOIrw;dLE_J%zvDt6$`u?38yD_|2X_vF_Mm-f($bTAx~UKU(RF3ELrD6
z!XvNOjU^PV|LS`Q!9{-5+8izJ(jMKXo$b`bQsG-%hW%fokc_+*ZRF(DU~Jk2wJnc1
zf%TFcUoVA5K%s7Sn@Ajbk}%H7+!!lGoFtt@i{ZnlJ;FjA1uB+~t!~WvH@wCYugi3D
zO;%g*bTb+u3_%zEu@Ks246O8=2$e21Jn-uNxafZxH>)Rlb9-F5_XUo}f6YQOaT|UA
z(+HeVtIuAdj0rS0h{F)6J7vXJVg+x)+y&Ro_6^EI=|ml;LcCqXvjeLA*?Mq2e|LTH
z?7v+sLc+O!iuHR4G(<fw%+^g@Yfj<jNc<rqcr(=xe9gpX6$Q>pF#?-s^_}k%&gR=N
z@xxK)b0zutDPzV@GUb)$01)JG!Q@rNU3cEFy0g_9t4g&#oN(gdQY9Cm!cpT_9yi`u
zCy!9--=w;;e;pL$oo!H}Xxf<BT{YM;`d*6>pOw6f)W!$eYq+I@8mvQ#a(N~SZnX&5
z<IUg-`D=DH?y(;&;YiGIjTT=W{|pB!HTe})T-AGDYX}LDX@D7RI`)dIi%Ea#B;hkx
z1IuE?4lW;%AfAVuy&A9Wisjp#{rOJgm+m`^dat#*&4!6Goht!KJDCA|N%xf(ikSP^
zuSsFlX#*uSzlro1fKuRz*wa|S9{~hTN}yR5oIyWRyCgYt<Zuu!0TzW0fah#Q-rBeg
z$|qvih*Z9a`iZ{wG9>a^=bkC0gge7N&q%uvicG@KX_UNOG6zX^%P~8DeG5$cmf6hN
zJfGuvim~l~{ozg@eXT4($1HjeRszFO-r?x!?VMgAP^%LIvbbxNl}@DrC>=8Fv=C%3
z3CH?@p!(=og%7i)`!apc=~h0{`UyugxlDlZ=7S4UZPJ>v7FMk5f+4NY?Z7sood7-f
zKH#lbr6P*m@=r(aAZW2?nqpinU^pZaaVi|M?pGz>{>hjIFRk|&)Cks??tJpA^W-i&
zLuyI~v@TXXgHP+7$WG9UC)Yq2b>IAN73}gkqsQt<Z_C?B>BD*P=}xa-@SUTs&zo|=
zqaoMhYKH|c!v)-!mPtDrcS9N!X3zIDF-;Fa#lyw?r|4NRYsZ27Q*k8{0{y!I*&S0<
zoT!A?Y)oE1LldOhU9C~`NVH7erZ|tLRWT^ZbQclQR>l7}xwts269u<L5ZK+uQHXbx
zfYd&>t(f@$oskM6FPmuX*!{22KsBHUf2Qvsq#?KUS6R9V1XLtNi~<U3uh_o=pA8T{
z1`k3e!K?@%8W0qJ)Ghu5mb3XPkZ*z=)a~~gvp;J-4x|^yt8tABJElF9BQpc${FS>-
zZ5<IE@%)i52*^2cRkT<&57>JcbG1ahdj!jB?wWJ8Y%wH_K;B{qA%AkUS~$H1lSJ<I
z!5tIZfI_T@L<fo4BV6o9&=?`6Ok@#kCF+RYkGu*o0b0qPaZg;pVC=Y{WU%=6i5HGk
zsJ37#SGhr;^xhiv5|cVPyJ2KEWVpe3^saxgN5A8QQO8&5*hJ7?or!XVk~Fg?Wge}C
zzvtU3qySW6qm4)+?3O(dUYjboGWgUkpZocHUB}MKD}PAUMjvfg9oD`hZC>PGNI$2#
zoNIC6#uoboWSp2Ub2Z&>(=&0jn_tM+*iDrPEl^b!ui^_%rf5t}q<8+)%7Iyi5Uw#l
zUY_^zjxCV7t<mTN-j)*?R<Nlq)@<QYq`wM=S4dig;ujZa0-$iNsYI3SHiFa=H`!wU
ztF9S#+n0blTw@I0_`gR`qzxsT>G6#i=?njKAZZzEdmu@0)qZo90&Y@sccdx$c{&M(
zFho|fM1Rs_P)C3)7c-^2l}E+bo~I-DSI*QT7Si;EhOoZ1v)0AzHuR#@eNty))N9tP
z2?L1FD7vD&3_hCim0z+TwpoYKM3IJ3q(S(0G)zMiP^v{7HY9s*<2{_B$6^dTJy+j!
z({b9uYdhr_HBdzIf8B;?fo}0X$KSPr2^$YT4bZztbistaIDVMJ^a%6kuhKo<*rVfU
zC$;aUYqRgxN6{GAbo6P!TgQ!d$!)(aUvc;wUTW?~Fm>uEHD^&s;W1CB-AE?HX4pc<
zP_;}i0B4VOiWhxKR-*1i)Td%YYh-pY`b5>u?WqLW|H{zFe_%sn{C|YPKalczWk!Xv
zbZ2(}cm1`A$IA@74lFWhKxHBZTxF?|y4(QC9cTnWx}YE)lNgm4F<;MYKk$|^%C;7Q
zA<KfX)(k8ICceM(s<%f8EdrJk-~-|-G&g4{=Lw8A{z1A4ds&uNx4PD^5RxxU&2?V1
zKH?O@oFa-WJS_xkqgZ5-?ZfVm4$+UGLdIb^j-ehUgTBBtAejc{ery_zf*F&)5=(J}
zWt<Tu7L>n&hZ2N#>nGS?Y;a1|Uy@DUebQT1Cy*YXqF2Wex`G(C;}uw`F;Jl9lwuiZ
z`3>i&ASMi8MMpxzkar`~z|1S<m9?_i?sLAMoX*o!oyEqby;fc?cBsJLxOr6y^1;i$
z=WmK-Rf|F_$zd+k=#SdLs>~;1&;6fji%L_oNU7)l`B7(R)s~M)H#bS{YBH@v#pD;E
z*gZMFT^lJ5mJ+`Mt|DMe1B9?&3WRqD>n@2fX{K0}Qmtrs3FO&KYH|T=*d+8+y+@NT
z1k<zk%co<|=i81Jb0c&qRi`i2f~as*q*GT;)avw)e2t3<KEhp>(GELX|0M+e6SZKG
zE{OhKtRhV|H(9znf~br!$^Y;<&RnP%`7f>9P^Jvx^>7*s&&lRPJ!l;)O6aqDZ945P
zVTd};&yvod|3IeE7-r>K)WA4{FWNg*bfJftME)Hxue@~03Nb*jcvW%Ko6rOUYBuIA
z%VbAy_<ooa423}@La}}|Ah08yy{qMfd}jg)F|ea0ST5I`bIU@3Tn8Vmx}#D45QLki
z`AG6tqyewaF4Q~H{wuv(iyE(QyyMb|<7c+vAL2{ZU~oyyUn5$P#f`9qUZ}^5VJ$nF
zq#}hea~ZQ<zGT2uY<$<yQ>iZ~K!r<w!u%{%SEl<+C&JNJ<3qCWoz<%k)>`6$9{86v
z0A{c-w-%HI9<Y)TNJB?dd>B(mBE5eGWTL#{pP?La7Ni)|<p~-=bp*MAUP8JM<H)#s
zh#mm4qW~rIdo{+4=!mSp+61E!Ey07@?}nn)c2e;9ry-uJYj`A3CycM>5qim9*c|<?
z9-&MA5F0$okDB?)Lzs&a`z8s;LfOnRFf*DzYIj*9*#Z;c9;nb_Im#=pe%pVAmy=iT
zU?M#N*XK$G1)v@ZI1)^w32fGyg~E2`e!I>st9J5bMi{$hdQB#(Qq$NEJ{40WO4-nz
zXLq<E$NTqy2#3zR)Lr%A>OVUVAY!T@)uQzR;2MjDC-q}CjYuKk_n<NzBde4=bNeen
zr}8$_a-<H9*Yc}NKMiKyxA$KgbaU%IVWg$cXUxIMaDY%+&Q;(8^g!1Bi~f}m3>pgl
zGaDL1preThNW6%dTbR;8-4@-?SJt!H<D#Ar#wdxBLpX(&ztI(1Sp3~~DtL_0oOu<L
z^PCe82W~;VFE;baa=i2uWd3J<VK~nIU7vn=mZGrclC~W&j-k=pU47lZ&vkuvP5+zY
z{3n1cs~j8_b~iMFJ=;a?uelLLjs&wLO?9GV2pAx_LASDdr-+;1mC2;aO`cmi8zS%2
zlpBo}lN~%YXvlJ>!w+h6{+<(?s(z_hkWynDom-NkotwodZxJ(0KCk?5?mZRFG^J9Q
zHIdFC)R1*a_YQBFhfFbeGu3rk`k5_1(Vnwg*m?6o=x*ZjaD$8Akavyb?@FZ|u6@g9
z2&dQ00V>ru*CB44EQ@TIj=h3L$qZydQbcA%gL-a?o(5bUD18*|0HFIs$MBR<Nak?~
z)&;5wa`}|7^e2R8G&e7gWqcDv_Z5l*fzwpNNMfb-hC;ztN+p>;?9&QDa9%%%@IRsZ
z_<J52j)d8OM1+tTdC}597R024TQVx-&Yt48S5_hznUrRa)vAp+J!ojD<h2Y~DM$6?
z0*!ZaJQ{FC1zC#vHQW)VH1UZf!4Xve5HVI^ifWX%XEtS(r}{PzhnK;=TaHHUu8E3%
zi}bFCfpm2Iew}MDv<a4lbGMq3=dWG|BYNvRLghdLnU1P9+s^FD|DqJlFv8^kMD;Jg
zcLG*yJ%0iM;H0fPAuWD1*p9uL=gbd@*7og-s=s0{W+%;eZW{-J&g*~T6$Mv3FI!U$
z_B@^zfM2OSoOMAW^dk|x68L;;R<Re}?W+wJ-fwg@G}v|{IJ!FuQfwbJRF5d-8h=>>
zBu~gQh)O=sC#=Y!5e|4<MlKmxk8(1j8?}%Du1xN1JT<NB>tg;MgG_i*b&XjZwa&h6
za$DwpXgBIaJb!H$baN?w7#BUE0@GNk12|C;7`7o<!M`IV;1^Ms4`j8>f2L4`(o`3s
zuy>9`6mGXX4_X$q?EQH8Pyg1Ckxr{%;NJPLlU-@#z??;8=lW!T=gk{)o|8@;xby4e
z39H^LkN0vz<u-8K`~{}_^0UJ<LZ1m>BcVF-h37VhwUtu;zT%4K{agX@*xD4x3ELWI
zE|2g3ho8=E*UgnUJFIrEJ`k#4y14tEptk-glD<@Fa;|VOKY8)PX@G6!RH5^rf+H=1
zv?|BBt&f-0_I_lIH)B=%;f#uMS0qeZJj~L;Pnkg5treC0?aVZxkf;x=7lk>*E%0$W
z$Ws+1(3NHSJ@nG#MfJ$`d9cRLf5e=Ku`C@qWqM;d0<_H1pav$U!wC+4sLi{__f%96
zC5C!mFuE>t#!|(ESbY@U^Z->#mei-_<6J3}2==reM}$DbjRjyy$j>@`*6ZqRWB*65
z5^#&WTa<>0SpX*f_bMw~)CwjRnez>FVdFvaIb31t@%*M>;2h(<D{tbr`)T(sg4_k+
zdB3UMC&^bCQx5qbRUU>{WIWbYNynQh+`c7@tB-@7Uj+_gDw21Jt>2ER=cA>`|1Q}7
zX2)1I0omybYjl(ODVa$87~c<fM?M-Ym&IOfWmUU>WO4Ax?d7vlCgF$uTXMNQMB684
z<b644Rn@p4`wnB|2hc4FzhTqhNL-Yw5CaILfTB(SV5A`J^!H07(@d#K!Hhf5Rdw^Q
z9h-qF97^mIRxqq0<<k6o-rfCs8^}y60#1RVV+^<S;J!kj$B=`>=VscfAevhi_=jJ#
za1Jp*kwjGk_rj6wR64z2>udn!MvDK{R0BA{G<eJzVNZG($GAg{0CI>`I}n>bd^tq=
z;q4x0E`##il+ohZ@;z6g3fT&>HN-Z-j9P@nc_VHkzw*8p{}##)+Oik?HfGsYVWcD5
z6y75R-uz{;{-D<kGpQpdTl|=~?}2Dj$^-|E90CHaroc~v*~AV9Y3;l$Z|h+F@6P2v
z5~jaBn{ka)Hm+N1oTf}nGjsN~Jq}qg+?8O|BYefrgSnwFps^>r6Q>#_$l7B30dLGv
zYRh#DV^+xjIQ22XA?8(YYTEN@^k;!ovXT`q)dAKV|NmeOc1)mBPr$9;fj~xd386{q
zebIXZsGsf+`G-v?Oc8iUFJw$pg`3eI0wDOIGNSTsvge~9E)p%6IgdJ-lnRSV%y|!H
z%avYJ+Odu`jO_+nzHQF{@Y_U1!OMXLV8CAVe~d*d0F)`G1;mB{7X}x;MDX!?w;w+M
z)0`b$iFyPiR3(7T=1lT{+4KaQT@`@4t;D|_#erFn{(SOF$!kB$eTK-PBi@k`sObBc
z!q$i=`neAwU=f%pgLPYM@D3t7ovJ(UbkD~)I4Eb#_;z`pb^W_%0m`R{f%6}(L*Rd%
zRM-nP$6RD6kpDK*K&ZF+_;8`Wj62E&V{VPH_yc8yl>K)>yQ2_uE5VCUG;Ve=^72{i
z9XRl=QAYFQRCsK+B|lk6J+RLF7Wq=={TClE&<@ExdYVWuJbqS%u41&ijKV`SfP4TG
zm_OZ+u?`|{M50Z2@t_d$CdgwYEzzPO)JsQr{DgIG#G+5r=Q*%=ORX_Sa}$@<HLAQR
zc4zxyZiS3VCGQ3oMa)WMe{O%N3=9q|X5bf#;*hEbi|4ajiP&lsJDv2=IY_{(5$P9|
zTipIV6XD-Sa~reqM^C=Jwp3Ja?qZ&GZQ>OvCgsAc`M;GP#3&wuoR*IfRBBexQ<Ck)
zz|{4ywHEpn@yEN=NZ%lu;N`QcWXU8g+v#dM^Bp5>-~je~U__y9&ulyqn&kW8hI{9F
zP8Jce#g{=GE%}PddfB#pVEFJpK2_lPMrjWe$TYk@9hU%ii0vJ@$o0XY^H%fdZ?nVG
zp^~5O*tY3Z?k6|J9`rHhc3V3vB`L8hLtck&u-khyZw8<>s#ipAUl+0Xj4UQ508I+S
z=Z(&5p8IJv9Ak?mE096?uRt9<P)hmT?hVBRS*d*#6%rJGZSDi=5;gSa5=|OZ?NPa}
zRNJv(Q`V6Wm<d;%L!a7F%~`!aGjuTEITl9)J%Sv<X9&`8I%5Pq5<`WDMcFxj#@bSt
zZQBG|n(>lZk4(nIdd`h#Nj^+9rF%~+mskS`64JUJp<Ew|oNMmK>L5cpRhIP?rDb1`
zpY&-c-zgdsgMrH_X?!`@zL~BbY(+zvx99Jhxd}amDaD!_Zes?+CY&Y<VDPx^BUfeS
z#_n?jo7X*8v9POFYWi|f9+Ty=O?zWaOO%IB;CH{v3j7ajxrUo6=vkkI#Qiiz=Jw=!
zQa3aM5bxgkW(LPT;8ML&xkw-01Dy<lva+%fp|RF`4=J-;vd-?M@&f46uZ5+BN=-1S
z$^qNGoF6mMQCtqvHc3X>4UPCIYU<e5N;>N4ZLk$SX3Nr+2@~Jcn)J^AE+&Om0FEVa
zG#7+`H3QUhZ&;~d6Fc@mkXAv1?1AXWA@JSj5HfPXZMjz6mnhK#sC?Eh_>bCvVdpe1
z(`l66H7j<+`N>AU9IM!uZKY3`4-h#_kuKT|M8M@LeizXaLF}p6j?k;u`>0V-l51WJ
z2TWRDsC&sZbqFqIob5p$v4!kUegViD!}JdwkD8+6FRS<_P$Hf)TYrUKVSl>`Q5R6$
z<}~{~{a)-!hD9T>9twhs3t|+vzFmWAnx-I~I;bnj6qK?R^v~n>E&YeCTP&6d$xx9W
z%KrxmPfp^=ArzX_`eXf+gY4sIRYGhQXekQ6r}N8c0dqf$icVtwr-pUGs*u%&)mP;)
z*X3C#!zGbe)hS_T)qt^FEz&HSC9<hWlBj2#r*=rA1tlDYN7D`C3$&r0Oq$unw(`dA
ztx)$M(kYl|-ZG3GHXg3GUG1*Z8u`N@v%C_@8K3;~)V1BH|9|hQ0dGKzzL$tbD!3$6
zWx(b9&ihw7bQ)vgfgw<nN>%d(D((ex@D|?zr6%6RLFi1ud?!Goy%O{)VdqyqHN_(=
zflQ$P&<ghJxSU1#=!{%5(}tO^cegJXd3?if_1jSEesURQ)K1SE;2h-iyRkjLsO<;B
z6%K|h^=`tA2O<s8|E7U^x2T)tbT#q|$ZK>!cp539;%}&+@qZcFhCyHkp>7mjd|5OO
zTN`AP&7Vb`jb!Pvl+F?3K8LwT;9fSxQW_-Wb25E;JIR9c*BHEgvoO&SRhLky%|)X8
zve;c0Un(<~Ls&3P9YN!!P)VPTPZdGumVnR5HfUHo7$HRXHIimw2?<}0^*81q>d9+&
z&Ux$g-Dz7pV~a$*K<)XOfb(8vn)7=5{de<;OdY^Z#t>iP*!o_9V5|LA;spRpT%Z5o
z$G@HrkGDpS12BKH)f$!bI)L1Y^@{-OgM8@+@Z4h>P-41{Ty6ZqJ*IAL950D`d0+3<
zlxbnb^c&{wA^bw`;=BM<y!iwbg`IojKxZyO4lVdqOEeb3X`V}~$ieRRv|;Y2+#m}=
zR$ww0$Hr4Rlx6kBOd~|4uj8=aV-CxRM=Ss7TR#+e!=JAOkdcsll0H%7280_kAY_<_
z4g1|??42vQ5ctRZftK7X%mUjU{(Y6$skSo_LG=iEtt7+L;0xIB(ON@^>3KnM;Q|=S
z>p;q$juk0S3XzG7`*piw{kKh+)Dpx=61pUkoY6?<rwVTa@qlS@*U$kb%RW%l9Tip}
zHD=NQj<YKD#6P{P86H&0w@B6dS<h+do$35cqknH32m5AuZ&M_X6d%W8pAO39i(q9@
z#vchm7tQjvQ=xLW1z_OAPzTWm&-9DRuIOmG)|&7tLtiif1!Glaf_FO#y4DMD@R6(}
z5a1yrS-zXNSc3j|*9Snen6rb#-@D(V5iH9eu_$lyBbjC2Ns0zaj%0ykxhRV`9vfKQ
zt4?EPcij36cpRQiz$16a_vgA!-pCw9elfC>=$Q8g@-#mOxjV??<B>jX578UHOT#Bh
ze%vCgaSr_@y~T-e+Ux@^oN@Lq>c;R1tXl}dK7d62c>jsCTiRUU4j|mJC=Y2M@l?Hl
zL*bBunt_c0i=o9`>^UEY`^!YQZctsnLqXMn<S@l%)9ta*8~-~1+$x%Dp|oXL!s?L^
zC@HcKUe5x0&;G)j*Ou~Mo>ZVH1L-!g2~ZKpWH{g`cw|FWl+Hg<#JD1$MtI%#_!O8=
z<E4Hu{^%Mgn+6jLYHO_?yPW~)_q!i8D2?Mhwj5}8`CkO031=4jUd5-*6`oz+$c!-y
zQQxLFr|e$8xT(X>hZ~kAd%$Y5`0n|C1CQpJF-Z?QOvQHEJk9R<$qKdK%VgNSqKM!K
z&%enUC|pN|Mq~N6uZ&G>x1>&^`C#XgZG-wHp)v89a^nc|2*&W%VfV|L9;W>Om*+><
ziW{^n%Th8QQTMPICAkgkKfGgjT_GG=y-b#3zV=j}8ds7i-AB(k7L}}yaSn!G%Fm9V
zgfm-+rr%l$r&=ixBG{7%>9{V;W<a8_wIWtC)PaOO0vmyYb>sft$<P;Y53C$@E7lch
zIOLIZLOko<&cUwtXa73S3V*7&{xA|Ck5X9&->-L<Q!9Q8=ra<Yj56lT0=PX0*?cM!
zZOLh$8CwR{(*{!rK&o%jB2OwSF0%@!97;R6Nt(JNk-<7UNbsf<EC|Lj-RFQiZ?48j
zBv!ko`}HZjR9fJqHV_5K>W3S<08v)_u&qTUN`4Fuj(OiVW;&5q3qng}TZ6U1%HxB*
zln#mG!4-}Dj#ryV_rmqOj@&$^(PF$`AF8;L&pD>6TNc+JC)_d5H2J>00&|pUmAc_=
z_YuZ5VCQd3mCg@DZ6pRC49t==W^e#+Cosat_Zw2=-GpXQrTsadq-AJj<P#Nx!o8Hc
z#jupUIqr~n&Qop3C~M?0E$KWvx~-oN5jA-yc6udWGl*l(zwX_2^MZaOw>h+B1oHp)
z1}TW!jg>T{;|(iO1k?{U*9CBXfCbHayTk-+N&btw>aWKy<&9MWF%*1)eb~7~V%&*M
zp!w6^{#_}$(#ixY`LSL+`ExhEtF~_KJ)}L_v+sa>aC`327P0uVHB!e$_{*0-ac1hT
zTZQ|x9oOR}&MTEp&D=X8Z>ti~ZdqwDLVMcHG*3`|@Oj}2L!M``5_H{GngQ1zp2X<k
z86HaQL_K!|I3`sGq7b4d_^p}ORW<rof-=-IP+;ft#-{U1mCPE)u9$}V^lRd56%+Lo
zCEx0_MUI23r-+34yxfK?@YSXryiCD(POH9SC5CCTD2ap*^;RHO)y_9*H<d5(#}*3s
zoPuxRdumx1lKU*QQWodkZ?E1~ix{iYGWZz&74OXfq~NOT-dyj2FpR^}f*AHVs5z*H
z)Xp+Ri5sss@cl;}#s{;elFlF#$DMd-etvXj{f9-UqcxB1XharDxM(twwfNxS&=|+^
z7O&|KJw!;J-#mK_V8x-qUzRo(MeJ93o0F!Gf!y<$>oklDXoTIzm(X(RfSGxd)FKh$
z3w5r|2dLog%*i!U{q5dfVRE)ydfnZ|ByC637K@-(GYR@GC2v1R`f-H*6p^UE$*?Z#
zT|Po;Vlsx5Ux6|Gx*Tr8wNOp=>A+N#B0jB>2-t<j#=RBmN~^e44QifQv&yQ6gUq1K
zWj@wl#YcPuvKEoqsSMh84$vb(y#&pww$1b)`&<no(wC0$&q1HY)-avEVWws=w^iq(
z-BbFrCtfZ?*Ny}tHO7x1OUkyt(|$d+QMd%yHPp_OiV|?l&koq60YZ(~;aWjSIyy#H
zeq%X<_|TG`Ysqn+6HRv{H+~S$2Mi8u28_HN?aHg|0nYQW=7-P0i4tA^I$6J1CR^$V
zm_QX+Rxo4e%%z|Xqm!Uv;<k<L)RF37=j>`du@Hi_%9F_!Z$!r9+-{8Kq=dx|5w|!d
zEB%OQkI1h2l>^wVTkCKhtwYbt^mbkgaMk+My$U`|O2s@HYD6*a<|*qUhsM?LKgvmZ
z{_g>qu_kZVWmCS<!4gfM)-@+xk!$(+?%u`0?3rY4KT&Np55nt*y#Lr2saXOP(<~Kq
zv_u&*g3jRC5SJ!75NLC}oj5=tpOlS^&g1$k#YYgdsxspm?^bM{uQo%;+#1NEVv`XH
zEVEn}zHwKZNHF$27%v0`T@EtB3g3=<?F4=6qvH)v!(JE#34T-Q*Df$JdwSuV>Q;*7
zH166W`S_qr@*&_rD~O6UW+q;XInlS|=ml)(dde;6$HIZp&2@KZ9LS>&uP4WZ5^J4q
z-Cw}(CsnOE->~ah77$~dn~Z_ob;8Dw4FQ;OX+Xwof}pslq3t!(W-<*>A)GME0Rqkv
zW~9`7%azE2yz%YOmdgV~t1}55M3Mg)WzKJq{-$J_OzxNKPrUA@nkkwFR`V_csZ=u%
z&!;)0UJt!D^NpdNcY6RZY>Ki>U<OW?vE}JakL$E^a;;43-q330rtFYWA#eS0QQ&6V
z;bsnu*7X)RC->Ty3f(~Tc-~dfvM@wkgYipp_qg5bsfv<EKBc$r892Bz?IbteYOa6w
zn=Pi-8r3)s#-lC%1O?|6@lxSsm0M!;cEsa_aU(Y=^yck#QYU?4&wABPHJjeQketb?
zNl2^}t0>i-mM>bvtvMAl@z2vWQ>U!bZ#?+q&a6P1%#p%GB9FGlll~^Sn7(<su!EIb
z>}^=8IZx+7yT5HO>iUS7dHeO+azLTRD;lNID)!&_)gM(H4~aD1r&qk+kz=E=s!&C+
zd$1@jzx{!Sm2txtm>udx*CyPy05g~GzZ$&%tz3qRyqL5BP{I@6`TES@D~V&1I=<fs
zXNoQEn+sxnNkzMT9#?F#jvQm*(_6?_<pr2*$D7Mwd2QE$A_iJ>eoLtEWz>~{6D87-
z>IM306u#ZT-yO4T!sQ0v*cCiq)~$_ua(fSB;7Dar>e;P1r}PR^XahT=y-eJuw@{;^
zt!6m|X>&YQx0bPj?roxr<ur9Xi<G!<#jj%o&T2nujENz?zK0`_rt$P0dy@yN3^|Cz
zHUfcf@Pm;xxw-I7uwC3><|b_%&cTR`kS6~l=(8v*Y-k>Ra)2*4u{LX#S_>+H6N6kL
zWC4UM)Lgwx7X9!CWfZ&QJn!j1qs{2LK%!VldN}?S|CZnBhgKsr$?|e2+U!CfbGs*M
zYl<)N3={1;PbUwh;ZAanyzXSXpHC01R!2&T1JU?+|M`yD9kMk4%a)2JP4b#^TkL;P
zf21@_0{r3v-^6{trc?HRy(#TiJBN8L6zX~d&s|*7S`B5nE0*NASrZC9!FS5<DJh9O
z%kytOY@0fIe>|;Q%svcwK2xIg*j!|@YRwxfav#{f`)mLCuWk3&6~zMc=C!*Nm(|a-
zh3l4_c4aij^JM;QUik%F^!-$0_tW55c6l~)<oee`KMwHMIr;>c>!d>DCsjr{mK(BN
z$E`Kh&}Y-fr>L10G0jv)eA%r6<SWV?rJEXl*0N!(tRISL$&=oy_N`+FznoqSSW~VF
z=0wW^RG~;_L1+4mjI@Gwt}gVe5v<gXB8QfJKBgy2b#4d*f*!StMan6SRKw)h^qk{y
zwE>h!)4wjKlcSumQA*85pGoDj1%ViqqI!PJL$}2?BoDs1@x?qG`Bq;@j-MU^4=XOw
zSC1(-?!1q)`xOE4NF8ZSx-90!=1nxNuIHt)>*n(yI|(#vCKaiminPz(`@m^s!><Y=
z`&)z?j(;}Q*(Z6ZCCOoJ4r6N1sBtI?l-D|RP%SZ}0bwqr5XaJhW1J2ZD4IeAq{Egg
zcmjrOi1_SiP8Om33ARi0t8t~zhrIl14}6AW@99BYg#@Ft6Yljqyjfg5+ca1don+0z
zs+(G2@Z7h%=u^t|lGqZT%r(#FZmT$RPDR1PlsyPP(VKR!;n<%!r(%&e*NNV-EB0l1
z)p|!II>@>_%19p&roZ2EEf)9chP7PZyQzv;cbx68J+7`*;LTmJ3->I2tZvg?F?o*P
z>mH9Qnhi`(dwgCelaXU=EPTcUYi-XH9M_b;T~K2<;qiOPz6q53$)~Yu_i2n(+m}0l
zE26%q+}0G!c0Fi~<V;{o&Vy?bpI`BSUGDi^cwRKd!)HzWum&<HShe3mQ=J<0V!*Y_
zeZQ#;DCjF3lh|?p(krG&zn^R#2tSl;XWsxdLK+KFrkWBf<)}mv7%D;>afjOQv7Z8L
z5cjF|+f$c~-Ocl@HoGUDFmUbm=jyY7GNQWPpJRFguYh$&_GObnvUD9#NNTz2xv#JG
zqFd%_wSA?+mtz5mbD`SMu624mOj>8nlhE??dC_xKP7zq2^ik+m<{L)gzmrN#;!lT!
zY*3M>30|;*h(O2ASFB<O-JdF}T_NBHjmgt71#`{!9mR%JSX#=)W$<%}sdk4ay*Kw&
z+)4)r?8PtkSW-uuFN+#pFK%Z!?XaF|w2Yi3633W3B{T*lD_YuIPP5m5^hX1bI4Sy!
z^;TwegT*ztoI4%Bg@b6H(xB}pX`(EC-m&j+HniB)<a<;-j6Uq>r;P!7kt}#+=y_Pg
zX&er5*8M%Ng9er{+tBwAC#j<Ir$C%?|50#&n}VIL>Mss}bk9P7eH8u7B9zO<#b%lQ
zM7Egc@SwF?CQ(6{N0iktC(^KhH_G9DxZrZYiYW!lZ1{n#`2$!<>o044iv>D)Bz8!)
zVuYZd$N(+w72dr;bTM<1a{KGCI^Q5T)`mcxv#n33#EgMU8Mu@FoVAm}&z!SRQlRko
z)y!NI3MP9Re1eu#+t%IWnDPWG9caBy^kg65*Nae*R7oxtc2>x%vp@RVA7guaqu?5%
z#vyh_Cd<qO5FGfaQgEd5b+!XOVyl#BON*7;E8X&$@iEf;hLT!T@_SN~K3Xx)u+v}o
zna$6)X)Ve7r4KEo?0*Y{kp#*Gazzz}=viMAH)QOFS`Whp2YHcHt*gDV4*84+*?=5{
zFhm|`k2mC&;=+oWq{CR0{+NK>u}LgQS?64oUNoakJu=?q8{C1e%*5nr#b-2;n_A=;
zEv{!)YS*d?1)pc8M5&|RvqA52Y)o-sl7C-oggV@|!!5EEEx?y(hbKTLD>UY*HO@Pw
z;ZE14&_oy8<BEQ}A*Mjs$1J+0Wp<9k-0dsZETz$E_@q#*RQ@A>N3}{j4IFck+ZPGW
z!jU)g4<WCiP_s5Q3X0+>L=3{MZ)|v??ghV&x}T#_H6|Y$h`z!Wj480CN>|DZZx%f4
zkcRWuClVp2AFdB5TusNQ<@rSkR({-R?s(gSZ9Rvnif~4q&@_0`baW9o5O4H8j^{oy
z{J-m^19h@EtqE*Oqcd>c2{iUHELP}Y3iIU!FSWvf-3yrBx>sMo&}X0dFi;}@Wg>hU
zJwXuQ)Cyg;tI0}}zOCbbLVNy*u3O*MiKj2B{UkFFi^VstzY`QMVQFkG>=<vt=B+qC
z9qdF3OIWQ1R75R0YkIyw+JLB1?!US<Zl^|1vN--4D+a!z<AkMu$c2<GTK+7dU(~w$
zd!Vz5;y2U5d1G3_6yp|$M<e#onnPn$(Rp5<HuqLQX7*C`tL>;v<(jxf#S8mjGibRI
zQG*C4b@49=d!^tc-^*kQ@!lT4q+hMmAC8oeDy0~?Nv;?Vz*^0}HK&{7>>R&;X|Lqo
z5|c><sc+}MRLHTka|(gSISx~3vlE%8Ua3x6o)dD2Q<Dh<>23`yH*corV&}M-Knd!_
zQqr~!Ch3KA7_^oa6Gqk~Lzr?X%71Zh<hIwk|JA^pR20pdCF)m2qt|rXixD<WXf-~(
zWZw*bjkUfx7C4H0s(M{@e}h`d+?OI^_3Jv-92zv#=xRBDgJOIl`$f*iB4K|+Hbn{>
zrf9DErp;uT?#ll-QTo{wubxJHG31TGlJSPDYNq4{jmbTkd&R>aqDpPBLy@+SPD!1+
zMgo%vO)9$|wj5Q9!U)>L>QQ(3|GlI=j8-6HO(P=_IadKyVB>uxxkXaOL6v@lUc`}a
zS4CV<wBl$L9U@5SYBKjOD|5KO*+fdGtv?bDhXh9>FFpI<JEdZbg`bb8+l4GE+!L+5
z?|;~Ba+4=6x$e7&`S(;@Y8_g@@KK0KkRQH{v5~r>^eT%oF3sHLGsD7@--|?Ob%BRZ
ziI+FiZ9Z6zJ`&~I$uAmyH(1#sT9-GlX$RuHn(A@Fof=RZV@7+QGFp=L7bwEkgLRcB
zj0|Gi8cyoPaejFbN#?BR=BC2Sb6IIAD0N?-)yz;JMn42_Qx6Cog+_!7#(ET>@F>O#
zKZWU6xhc!F7Z<^y%8Kw^&>Dt<2;pqM@}9JfRa<REvJ@9IkYDn2^XV|F$rUy(=*w+-
z4;9D%jgo)wbXfVm-pSSIXZ7@tpgRF$mgf{4*g4CfyWe{t@)lCnsrWRjp;f5XpxhaI
zN4{)v?t|^~C2U7GPbOdBti}Raouzsn{Whf$!yt)G?Fmolz6U&2^2k87TAFw)_D}F~
zLS^W{!7wzyo8=DABzq9uD$g$|-LH#`!M|C!6wbli!ll-GoG)sw^t`;d(2mWAs_#F&
zhjqxB{G50#9(5eqPWmB<)6T|Uxn0k;PO^kZ$xe<@jz1p)8F>}jdFfC$3n|r-T+1@j
zz016@_*gI{L5q-#WZL$Mk%l9Q=^k{Xefsn0dQQ}~mj2?b^fPSIG;yqdAUO;nM4`W0
zZ02$4+bE1Yu7cL%4_)tjqd#t}dK)^Rh=+hnrCoe1br)3sb-_)nM~Cib&7%StHGg}H
zG%mB~!`16aoF$sqq?J!p`4RN;)oJ((*627v2DT->?DHlr6q0?EV7$+?S&pht%9}_f
zJc^5@$}A%)3Sg0o+#E!#S)M-wE~h_Z_^c}*iIgk6=So!$zyA3-H^0zImSNlQdBqf9
z3ACRpT;6bFs{hSF)&vp*TZeev50YL=z9XG`8Ct(p67#Xh1wgA_h5Mt|l+Glbq!JDJ
zj_gDEVEc?Hm_-gI1+8s;)YirWxvMI1r@_f5+^l_%)sPmlVUrY%W0}WUjWSCL<aw(3
zd7&Tfzn+y~z_DJ5p~y!+#e8>qshkd4C+0){cPlJxK+W0jlWNH1l_M0Y*7t##-k_N;
ztquA4#M+3hoLi0x6h0<CK5;K?j-?ybWQgPVe%N7H-NY-4=Wl2&!Tij+b)@PaC)YnR
z499LZ0?{<h`<E=^{_x!cfY%1##o(f<+EaM2xT{>&Q4v|D3Qc^(stm{)ub3UgGs*-q
z^$GSJm%k-EPOJ^nVq8_{F^-*HD@nsWhPrs|PHosLyvMwLX+e4W;hZ8mjtslQLopNX
z9BZ4F$s4QUfvk`b$f-9{cL<PzyZ$+iITU@Yen_B{?zEYTEbB2O4#_Ehj+G{JC8;L!
zxv7hz|1c?hUwMtEpX<INCMYH>Hw9%o3omu}@rq^@J(;Y^L*+NR2-TByHcvpgMnkz}
z-tT>m4cz^+bk$H1oUhxWD=f0Eq}dn0ydRTY&7^{n%<c|~z4*OEe*SRtJv~6^j7CI8
z9NA1qlbGOn>p0R<BaITOP;cPiCXMRCU&U)^paR5;#;Eew=5jyH49=0hT#6a|Q-lOa
z0}Xj<wYE3@0?n?ssrI+ger-oT#++xe5S6YsY}K>#=oRcwP+Qq_?W#nrJ7O8#9Et8y
zUpQ#=_eK@s_{B45fRic7tmv)c9!Z6YY7$}UcP{_w_;sJ<DY{%qy5Rf9*Ho~v9lGCX
z)pKs<#l#p>bEZ3hHphBitmQ#5JArZ9P1534^Kg|hh=N*0S{5%Zy8HxwuLm?4u5TU(
zG|A1{x#9lQ+)QD8VC0d*E<8L27Wdeb6mTB>KEHE(#p`bEf9}p?gu9w?g+}df36cu*
z4v_Zc1&$BoP?@k7H?6|{ZE{x81F?ScLD9KCYNC_0slPlR2o39Z|JssE&*Qd19PX!^
z@-$1#K^#QGgX-6^&Nv_*)mWTnxU6#prN-cbezPI_4~~PNUo<77a4;a!Z`bEQHaxOg
zCW*%UI6ZA?-nlMvj~DQdfNc9Uy~F~acXJK<O1ycFMv^Ga)1*#_gMwl`*eZP`-Cb~g
zr;<>og{`r|@A8|&l-<oJxTGq{@QboCZk6SlUp<w`6WOhxULK_guSV|^%MI>JzvH*J
zUoT=`Dz4^3C*%QRKz8&WK#uL1tv(g16;QJusrcW%?Z0716s%*j^^`67NVT@tzTzxA
z>#^W#6$&Q;Rdl>E%EsTVN&mlq+u>Y+1pzJ^qqh<f0k7t0YM#l{*rk>^><^m`l7Pgg
z@8wwD2vmQ_ILJ<Tt^w}2a5NFmgz3&YpkAZ8W<?`Fve%ZNXqWK!R&+Lr%E2GKcama^
zKAro$eJ2DsA5UlQ-4)<@DLw9e32&-SApD;EebK<(v)ID?o{KHvY8xpJVw39++rcyD
zlp*WP1FI-yh|CUB3nKJ)ScwhHhfJSWh-q##&<qZU;oz`s-Z;f6K({O7_fY1sL=eS#
z+Rt8<D_3Kw+)uw4U=OV2$Nizhc;ayFdNpo9<`VGi84h#OA1FzzKHCL_Y(>y352IOT
zp~PtJw7-pus*m8|@#^Aj{VCRnw5bYjZ9h#ewt3LJ%|TwFBs^w6mfbN}7VEGik(+jG
z&9|6;8ltO}PYQiKRHozR-Ft>(;Z&af!AYbwwM=fa;5YJZzW8Za+Jt!P6^;crH-Q1n
zBjR*`v08CcgR~CQjFCR^S+Gas_2H}1^($~m<Rp6O)!q-_@AnB%iSsQ%QD(~3?o88c
zMp!ZynxH8mtF3{X@Td?V0P#CRrV09-<gmXVP2`>VqWx{TuPRc=`f430b2T&x47e!m
zQ089uz8=UW2^o(1k3Ugf(E09jaO-aal)#S~X}9Dzu45aB!xN_)!(CiVSVT_jYQLtX
zh&+`tfu-?Z^>NEj6Y4C_y|hEgszI8%Pjs+Y&zHt7?|Sm%?oehCMAsW);~1St9Ck~o
zsWO6c%&dPr1ZQUkA35<7Vk&nGOy8Ft`NjAo82K>-Ur1gJ6msjH3Zxe0K^uJaP}63*
zB(9NAslUWuw8K8n;pADehCmMBUJYV^l=Rx>(3!QJVa6NCVLsV&@9ImF4VyMmn``AW
z%KG0>z{w~Wm{YI%;gORawkiKz^^@=)rRkOe$12zBPv*W^Lm=&bzQAkVu;JU0?f;3^
zBi-!%^FV~;x#2ZwQL-2j9Lm2=giJ6W-cBUQsNgN$obN^!$N~(fEr}SM-?GTAGVXJd
zFO{zTy~V{Ut1C8ID(Oog4IjS@l#T$=?^BY9G1i2vn)RPw%L3pEe|&UEE6m|hn)*Gb
zFv*%d+r8v?IQL6k>HR*QTKlgf3QWE!WMFbAkI=n|rJt70d(HD;Q$9`zoXP0<9_#%P
z6Ko!gYs4C7=y<srd)?-z?PJmcsM$tFIq%$2JR?8w?V5JNa+m`5GBt_wI2*?*R!AeY
zZ6AX1c(?*T8=_PUu~i@>6|8tJe-LNGc(pnabwl7?gM1~u2^kS)Mi?p~=P;33brlo(
zlKwR?tNA8`oB%u_Yi3ig;u(TkL{d9Rhmkc9=KIzqN{eE2-Ln265d1>pOgEOfhwMN}
z$WUq$ygb4I^G4gzB}2YFNCm`b!;O4x^8@JuON(sP%bE~cwG}-$BHI0H5J`*)ajEiW
zR}DjD?h&oxFA_oC1^T|&AfstY8Ih|93gbt8I4ZHHh2e+@ozM+~26e&6SEOpark!iD
zN6vu>ZGCNFFdBp5ppJWSa00TwIamwg=1fEia#)xb54@lLJHggTGDFfr7N<8Jdr{|Y
z4g&>NT4e(KD}1Z%p!*<=K~d}NYrnL7$cBL<U8#g=V1yULv>!5sDfhoOv9}rV!O`K6
z*n`~PEhDRZ;!+FMpUwN>(C&}mc5KmA>KVCM7p*r&^|F-$;ZWUp9ekgn>|ADl9Tk2z
zQVOl=9F{*0PLso}jlpN@>(<lf3tO6tmu%)>s;pGD-f&(3{UVHHte5$6Ez|W3xVC0t
z2U$FOd5tp~)&1GmOghF^GGKJ9ijq2-0+)Tw*_L|PY0W=87&IoQRvDY`0E55yPS`N)
zTcC+`xwD+h>nhc?w5G<P2{l*;@h;QzsH|j$V`awwVh&#&cu7O5f!+tEUm~v2AGH)u
zY7+jn4sR3tY`oq!{UFS%$c)n?B-0^sUQl`V23bK|LD)g0k&f*5h?p5;Za~5WamEFO
zHKTFz?_;;vKOh~<x!&?)qF35PhC)<%fA{Y<FF-qhs>~?j+vMN%V?cX^Pk@dh-6eRc
z$o34Y*wtgcqLFrxx~tC3^umX1nEgfYb8;FTz{tXAFSLFP(!u$~P&KySOYRozoB!5p
zR8O4)X=$1AeBOlz)*=IxkyFVxPHt1#u2gZ!@2nN6w_z)w@70IMUxra07eurqVXq|1
z3tg%B)p!gRPt{1xZuCauZZyxjui|sgyB)B-)mh|O&XjTS9$!OR-c&@Na~+LbxZ5$&
zT-~TYj7t4CIXUn~sg1GQip08QEc^WLrv1(%bb=re-}N}r6ir88zHKztr#zcZu#{7v
zuka*<XtCs0LNDtb(8Q8xM*RS!0Of!vZA*Z*kB<+Rj;Eouam=EqVXO>v8B8PEG7vf}
z=^R_T%C|RX?W#(HHhjW|75aV`b<cnPz>P_f1aUhqyAw^f4BguK?il=QhVUdv1d%&L
zZek~GgFqF21+;s1>99dT;|sRvnRv5evA1JM5rx1{B0ed6kt&vV&W7y?Mr_pMHMkVg
zf`!XL)FjkB+BMm*P9+fK?5P;}XVWjY#_@z_gUu-&*V|;MeR78~#WRqEJi*4yC?}or
z4FcvSGn}jizuK*tGO%KJF0ERl=RDyqZc?O1WxDnFAcDvAggeIDF!4ukEyEFA|6^}c
zxYG1@c)ej3@~+e%fs~BRJpemUZjda~UjEEtwTu#Vwp(v3iu5>y6Fukp{mK0;aHfNL
z?EW`6G{r+(xMTd!hmdxMSI+ttvaaGkmvD>9d|+voG$`zEq)#<JYgX_HhIjB?`#N@g
zr}8J{v)g7Ny8v?mEpGNMoVUd&o#*>-9i7nQyr8Vi;*AEUTK_xl0UNJD?4mA-r=t99
zE3Dn=?P&Z!NI^D<ipTZX2f>8@kF2wBi?VB@{QwdQNJ$Grcc;?b-5t{1-7Ou`-5nx>
zbf<uHgEZ1TNW*!2-|xH5cdqjf3|zCH*?T|tTI;vwS9~qsK`y1m-uo`B5T4kT=wzr{
z8-2IQXD_6K{k}M4!unIj_hDr{Reo&%dF+9y{VVEQw08umuZR|iyz*G#dSd#3a?sz`
z=rnL4LHMzU{kFqEQIbVk)i00r4wcZJ-d_9f?>78odc*LWssT&iI0l4;Del7-Qkr^z
zk>iHDIB!P1Eu)!1OAP*7oVj)2k+3S9Z#yn{S~91qsUIX*4mshDL^3=doU7UmzRTFu
zoc0$WIKR<V^a+`Dtblp{CWej8VW(V~zC#GMe{lFiIFiw93)zIkD_;Um=m5<}sN>e^
z9c<Bgygb|w>IymAjsBHU=kx;fGY0S?!DWVEpr8=`SH+@Njsr;Nr@y<y-4LN=!`;^-
zN}XhR{yo@Bz6g$Q?>NFWMtw2zem_ZCPjSr&camG0-O+_niUx??Viq-r=GC%t(LFIs
z=@(SK%)a?4_EMzU@`C=oG+-C^+s0mpNU~_xM%i*|-KdD<)Oz*HYCzmaG=J?E$I8`!
z9Nn9(tTRE<s0IehDWW{=r=uvF^2}SBqA$~b5vl3FZ$G>52>+7)B+CWNdSy^u=LDq0
zjt{6}M8rDn`3zju+z)H{a!LKivC-aGvM5-qQP(Bf$T~Hv#^A9Qej`c2P?zw4Aq>R;
zxO*P->dY-Wm~EN9*Hwm`G_babe}FY|9mKb2CVs&Cy6vF=NXZcL{p!y9L#qk?8z_<^
zlTGwGP*I)WUn*~71s84g+DxakMG;O+oo_c;bs7LI@s8Se+&%Tjh$;p-ut=_Hp|fNk
z<NTKeFeb89o)@!jw#mA2jPL)o4iWjgOE)%7O2cXWef$~QAmR~UB~}+!@OPGn^uGlo
z+pTz%kTfaH>+9>5#gm<2eqTqQU<>yzZyvFqf~c-W>{>R%35OLt*8Q4R`n*mB_i89z
z)2tk?KMf|kV{O_15mfs1ryAN5<lLc~w%;XbhE}$wo>454u+8C1kFZjKwnUF1G)AV`
zS1T}&+&|TflAGU1UKHSf1UAZ|A2HP4Rn-RA-WMM7&EvC}U<&EuljgnQfp2$~i4Nu+
z1cf7FB1y{97NZ}I(&eHa0;fdOIxI!$R_6FoUdET`9{6rafqR-LIcB;x>c}`7)5)j=
z2URK*%>DfG)2`*R7X=)`GJ7O@B}g!EhFZ>5u71y$7vvZ5m$GI!MLV`~gS4S*{!o@C
zUH4NFXf4&IxY61J&|dx@^1fGI&v!}a7GjZtwdVuQ3l-nJJT(5WlJ6LP(Tq&QC~f&y
zlNjQdTyrpAugX)nV&EFK&wq2aDH)n)eD8JDJV{|^tp*b!^6DLDtp5#PU71{Ny269`
zRR&t&s_!nnA?tZh;O+84r}>LZ;RT4nv9A~Hx%i8M46qId02_;gF{`M)8%I-?%N0Hl
z9XkTP3i1MAVPzfKFTzEgzun<IV4>GyEhPQ*RlH`21oP&<nViEB{KMM}l>c;C{sHuK
zsR$zX^twdd(iTa$xuWNGMaQ#UpA1!>t_-=p#0<3VN5nU{qx3kS)R!2^Hwm#T*t8R3
zw_CEe#-KM`>0$j1a3R!$lnvd5)t4WwH-IvYh0-;-Mtr}^s10#a&)56AiVGkO87OZg
zIHr@~d9(a|!ZK)>L82Wl|B?!m=Gx;8I>GTtIGkWDBRPo9uv?0CH{uKKYojw}>dQpY
z+$-ZTLy#d5z;-`=TTcpZmj9-2_Ey;-k(qc(RyAY!QvD~TH6aPl&Wz#@=b_5oXcaq3
zH^%B1me%1&93`;-to5N_$%r^Y#xz^C72(iimyhjT`47zkt+XNsXy!xwd(-nxu-O#x
zeqk&`au7S40Xc5cB^G0#Ln89Dqqgv;_6u}6Je0nG@FrhmNUChN@0%rc@u^_=r;=He
z|47gzsB$wRi?#Pb9#ciF3iINqb#EK<@Dq3xOzjee$U}qeCzI;b_^h@*A>=Wnnqe>^
zz?~)^Gk0@Fvc<##oACm9-EpTc@EWV};gcQ<R+JzQFUvZR-cMmOR|L7^G2jsw<6oGZ
zz50VgvPu&!W96wf`%yF3$P!)s0vJc<96Vc8&!{6HMOQir!U|24@$-K6Oj$5y!AiV2
zPPZa;Q3|t;VQKhjp7&)!CBS?)1)5U@bE<C_^62EndfxY$)fYrbn@?thwXre)(sjVp
zSL>33H8Iz+Qh<^1x=4?f81N(Zg&U@$sJn$vU?!>Ejab#H!{Gb4r*GbkA_!`_v_wQ?
zz06_+)hhOk+nW%lSWv{_qkpNAt5ce@qXn<^#KZXl5bnD$Ef~k>crI-ikhf{8`7kaj
zlwz51GS~gxCFPic%w2iw@LuUVJv}gy6OoGwDW0Dgoh!)*16T)zitbAUP6qY}JMO&U
zROSYjdBK?=k!WZJqT-Pu{(fGqNZRffNo>0209PT}Z@reH0+_ZSVXe}C%mGrUEDZ%c
z+HKmqZ;fovvoF}SsiTaNtWDF;Wi2Kn*kV!!t%zPuwviPmfI*DxM6?wxU^{zgXBJ^X
z{Jf*aKO>#fUe^tv%#!eW%-zGj<~FOcVoeiw)ikXwXXV+ooqYc@u@124Fb5Sfe%qto
z5@}((yPHFdi$u<pa8mcgT<H;l?)pGIKleukjuIaXi1!RoXM>5ogwL&wApDaQAhvvn
z<ifRym8t3K5GYMP|C{Fp=%bkuC;}P9CV}VoFuB@99kma+(^`k*4b;ufv0;$Fh5(`h
z@CnoDtN5!Khx4&JVXRxU^<Lk%zUP6_TY$rfa7rTO`)KTGWUEAQQk)o+l64#2^iCxo
zYk1x&75s5ZVI%>(EB-RTJi$alrt(`EYIqlVFW#W-Zf43-wI+Epet-04(LwVCiW?#{
z7GgCuVM(87tOjgwR+ep(VTUc^sj~h`3AnAu+d;(ivM*VO=0(Bek-FUx7XD<ie3_%%
z#&^nqbl3g9ENIZ`!qzC<#hTB$qco(4^3qf6ym*^6G%+{;r&bfCc`WoK5dj5nk|7FJ
zV9miA(UNTSt79r~2aCj~6#ArqM_{`%*okyZ7<kcBE$Q>9<vJ<=uUaZ64cHDFaDVqk
zu#}l)@^YSQ7twt}S(uw!YBu^)1Q=X@YoNMTOj!#D+W5=4;MApmxF_09dxIba(inkL
zJM8x=M-w6n%uS1(z&;K2m>j99uAVAYpNSf+OUK#g56Ha#F?0Z9_tj;{@WndNX35*+
z0?qr-$6Qo$#|KBm8O545O$kSQMq}Z+thQ=*lx@F|7VTD?UtiTeP{B|QOR3H|2FdU!
zYRdOe2_&65Q7(utK(ciCV>{rSJz%cjDcc0P|3rA*-+khOoi53Ldo;4}9!O9X5kZrd
z68J{*=_@r3x(@x}bw?#}?2S&lmS%DjBE?4gzfNUvzMd(R&lbUQ&bGTt#O15Nbya>V
z2@PWcE-6lVIwRai|N0p|D!|QLo19X0(LcLQy_3ut*KQg*Yp0itEq1I{@9ayX@5;L5
z%c|Nllf**)p)E8THyldNzn>p6oz7~amm@!<6>$L^DvLx>jo@bG#xy)Ts$TR#gmBa&
zIjz8GC*a}7Quxs0gTF!C7xP;3FvaS08q|P}_vYm$;3e%ZsdLQvgGcYoIIu{+)d`4Q
z7Cg$2?OagLk8A8~Kg%No#kPYTr&!bH=?8diqd)gkgRmkfEe<HR6H8!&oYCjqa)-t^
z!Fj@6l^^Ej-DPFx7s`f+y%_A8`raPAXWS1lf4UBrZ8tDTijP*KFtF0LPL*DH)z?VU
z9*+NNswxwc`Ge`6jj+kjSxd@}Ko-4E?*ui(q9i1OEIx)N;V;C~X6y?_?`Hu(7Pi6(
zyjRvnIK{AOQ|^|t2;7o<9cbZi`g)>8Mv53y(Tod@^pL0UM5r_#>>SI<*vNirz0@*w
z&TdYGXO7RxQp(I(xDkKcA3Jpo3nVc6{cx7KC2AOgi!G9O&Pshmx+4}BmLA9XP~03%
z3by-K82zXUhZ<FLWRWeQ);a(e2KKzv;U&Uu`bC?Y!V@bQ4sM%?*8UO)eV-udN{18N
zpkT5;S&$o)&txOGxQKE?J^u;a{L9xYikR%fWO5Xkdo4j{0A$FkgK;F_*c05z@4r56
zKVv&MXp0EZwQEcvH&h>`ndo{XAgSCEL1s*@{PocJ*umtZWo`7wsvBeM{mBSObuz)C
zNtxCGV}oJ^&6-126B}d*p$@DUYFd+k(Ng76DN+TEl@sM-r#?L}CgJGYzD@x_ZKg%z
z`O1e$i)M5)WdzS2kpZaFiy%J9(VdyP8dqVMDjZ4npUA{@yMe5q?Xw%8>7F!g)Hb^*
ziNFRd4jzv^(Arj1irG;Jm$|22LiuN0x?Z5GO(WFaXySO#^@2QNR!?9#f0J}Q^T#DJ
zsn}vvskRZvin^MrUm!aZDxMVwBlVFX_EHVsQ67#OfX%$lGa%vC>7`z}<Jos1mZV>9
zjrJa9o-NR!Ixn-j4iYfIDaOZrBZnw>qnlTkGF2-=y=zes#9WTB9KG5wS`uG&PdYl7
ziT1R57sd_}E7x!9rjPI7cSgY?+t|{mloZN@KDr!a*JL@Uagk%QG=~LE5$u!m;GAmJ
zq%;ow4WHHBuvkjZ5DTu>reD)Mo$FfHv`fI9JekX&{$r)KLIoOIUZ(DaI`FhqH~Mu&
zgj!!GAZl?otKl;zSv$%wg6bQt|7`lM*masTkC0Uky<@w$!_8F$Y|o{ibhxcszjX$7
zc<V;41B;mMzT&p$TF^^6Kj$WURNWB=@)#pnOh6Y51WYylB=S$6mFS|V*E=i-z8!CH
zobg>%{z4aH>n?q=4%_@T;Z{+-AEuBpba`JJJk#>4cJJ+Hd^e6Kl>@;$2;Md<d5@-b
zTOelw0dbEcrF9v(a3oGh%q+yeq*;R~I>EO}Z)p>O>XQ^rH55Fa_%rkE@-yvI)sMNS
z{5{ABqZ^@;5+1?(O`In*mq5N~@dVZ3I%rop78QCjg=Has%2{j%$B!PZ0%j?Su<sv6
z`93iJGx>E%Lb$&<NJJRhGi(C$VWhEm3qh(Xc+B!cEimK3G*7Rl?;bi1gB_^^GVClz
zo4e61tE%}is<$l}8;8&|GyYM`{u)Ju_5de8Z;$jVYk1}>IncaR5CKeq6RDJJ=bPgw
zvpe`ljqesXuNYuDY()mL$|U@|s;a+Rd;e~+n2|69C)(eQ8n$Qr1Tr`d8-6>BMZtNd
zZtC=>&D#ouV31{33xg=QmG4e-ds<S5dRi2DPB8t_Ad|<=TStO$HHHV801?WA@98Bg
zA{|g(SVO8Cwnz=cr6eki#bX@&Jv*I+xln}A``t*Wo8pZd%H3EJ3#nMRh4BVVj=)pW
z&(zQ#^82_fPzW+!oUiEv9U<M0HUfg;LjTrdO;<<b)^we%f_=2izIxkzgam3N>RWMp
z@43=S%+h(+>zs$sN12x&Gvc_Mg^dr2Kc(;(?OU9S$uCf6%9RppX(@LnsGbmUaY_(4
zhyE28%dGw%!{WQ%N{Lhn(U5R<sN->=XB!`5>A}+2=}(3ad8NKFi3F(o)!&?3q<j?G
z*3Eo)CE@C<R-x;4PZp)=*M_c}vuJ5wfRDY!wC=VMu-=*PXAnmSuWkc1dVeq4=V-1c
zu$ABv;uC5PDrG~Ju`3?VpR6iJzPga<3$Mva#rvv85U?3`x!^~J`f$h+q!uW^4vPJA
z<_9$W${lHkFc?0<OoG<<Zx-3@AQuu=nK)<ycROD$VV>oA?uYBGj#-CFTXWrDysy8R
znDArzLf7p+&BGFts)kvUD;J}jieq&H++;#59GENS+!L0s^AUIOipWY&S`$IWMhZ#d
zUOhwVG0j%7S~wiZvH}P}2kHP(W`$i*@XK1j9)A<|QxU&+U%mWObQ)!$$7nIa(;nts
z_GEy4)%ctSwAY1^EHsSu6VG9W_RLd+=yTDp)w+bqSpgLGQ>+S=Qmmbb)Hy@_zOsMw
zHW>tXE!|mGRoC|uD%(egYswv+4Qfr*Kq^ULzMG!8BZ&+Ay><P@A5p^~w4K7sOX6(n
z*AFw3Ovj0kiuHU-fi(NSWAQjxNvT6^fCu?tdk>@*Higg{MvAI$cIN_P<J+yWhZ?WR
zS^hw&&$R?!7086B=^HEI`NE3a?X9DtV}Wc{MV`BYcTx;7s4MGYC{y*w?7FTLtRC-h
zi>dRx*=pK9#e~9ir|H_4bB6@yUDO}${80*MMWNVW0e#n9(1|UqNs60f=z-BmxSi}n
zD>XhsQS~z6d1-f<&RJGF<Y!bd)m-84>eAF{KI=8;_wV}DXZWW~umm^Hhx8i!RcqO)
z?Sy~k^7o2E85|l9OOE5}Ma3Bza<r_>9zc`ca#WP{Ic||`J*wr!Hous#svo#7>stG>
zFzXYvW)*N!>N8r?RNMyKVtxv~JX+y0`s+D7U6<s9Lz{fg6g8#Bi>0w<Hb_He#(7VY
zQs;uDMUGDZ!j{z$V`mE0s;A$*5U*=(*V?_+1CfW^uN~(<AswB0YY!V89H-Ek=}<zd
z;(C*K1&=j9kJkM{<XzSKJP068loCmtlISy1cQnlk>Lfk;y-eyQHe=k!J9WQqpmvW|
z5_m$XL2#SrzTrdR6iCV69n^XmQKe#+sup%?VQXfdJm@Mf&s!;rA&bW{SL9)Ty2^WD
zz=d@$6_GV4L0FywMce-~8R#Cf+Sra4m<2hQ9L|)vrFEsQuHjG>jPIpT$)zXE>$%9E
zMX*=dkr?%tq#5WCfEy~inZ5YWEJVd<;1q*ICHCN)2c`Uo84+bLzhfI^1XW9J1Bvbr
z@N_*HB6)b=F;B}Q*BqNBTBr9uJL#^oiIOD^&$mMycJ~nIdehpJgc_TQHFlInfZH#!
zHMiK(^DrA!cbIRFUW45r+-`bp%<5w`K_lrf`D(=p!H4V>*C}}1(j$j<QXon-ZZ&Hh
zK})d}ap1^kohMKu%rQ~C9mmg9L$cvN<}_3~r>U>cP9<3u(0T8iS^KB`;yz%9jD^uO
z@}@1*Fa0uZnMN2)3)vz^<4<l~m@nxYic#>pXG(voZaI|e_G@ObC{AINiJc<n&kFnd
zprem94j(uBu>jzHO-3B=b0}a|ESXl8jK%JHsoab`KxO>aWBJjj>uJW`=Q!7X`fmN%
zJ^s_KhkQ{yUe0gj4_$U$4@CI14?hAklN5L^iXj<fUD*JR(P~l!Zsx!uyTJ2q72aH5
zM3G191%mAY`n4md(A=~Ne4;#Tjv5LooA|8ji#9VX+OE{zyP+7=b!2Ocr-&X53F+az
z-;Mzraik-OF}IrMg^4-o<3J?QHOkfHtm6i!!6Rq5-BkVWA@#4;DNuwUpkWkx&LnMh
z=R{xDne|6jG~ST(n)tgB0h@V?qSKI4fr8BeFh)@%w1X%UaNFzsL)8D<nl<du4(Gxn
zce@AJjQ;Ebx!kOAbo<4>_yg}ZhgW|SZeJR`DaclT5kUEX1!~WGD`xxI6A1U0y3K1-
z`(0(NyOtm6;gg3*IRl_{?`2@hfW=PCz2<4c^E=(-xZqn40%0+(foBhC<&v%i5R{Vw
zoG51ZUl1%uIc}^2LEvRm783gAsSTB(_C;QEc36S%sYG0MWXCsBpJ;Xxooj=d-xioD
z(0r#lX&oI$$glPysXl|d5lqOP#l`6hRrXvhW}`3*FX^r$HkK^qMuJNDK+rwt#w`gO
zFzOzU41ZrO@V;bzUvf9q(PZ7KCwR+uPJOR@nq=VXRM>s+RN=qT4FL=>NMdD>m|pg`
z`u;7m9kSe4y_VAfPVXFXT;dOp@ce3}J0KyaOl_xhA(d2qM_Ib1W#9m|lW!}v6_Pi^
zmlkWwg7Y3_?O#ssRywRQO46kNhSoded2PG=fT)0AnMJNQRZa7^c_*m_Ub_DCza28E
z4tk{g=S>eYqG$r?624DIRuz<K?_TUH9J>4u7YePSV$cX~=2rloA-_+tzQ;QGi8X4~
zi**&b-js&)x1J4`5h!BN7q(wj=>t($EKDpY>GhxSQM`#uRwq(#^1^p~W0WxO<9RlF
z%>=e^OO%vyTO|D4x>T_Yjh;Oh|F&`;5U$@!=%{!z{I6h6W*G=GX6wu6)oo%wP00PY
zGURdk+T-fbU15QQ!A(cD{cMp2D~$>z!L0EQc1ZBB!$(#Zk>n4U%3*`?;5;CL&<q_U
zbXfe{?b*X#+$d&w@Z$?GZ1@QbH0CJ)3qy76o0#;yya0i}h0$-FVsR0|-xicx0XhP8
zj%$*r;Tc_f=mU=NS1;<kfakjdAa8qNi!_LI{kK)Lzckh?^MPXnevj=NDkeLVsV||D
zwWtVBPcagn)vXo!2#u$EO=O~TDfzH!wFgdiRXD4)@1hjUJFE~V)SqvR7>AtEn~jXh
z5q2*LM=qVFg24HyTm+{gqVPmL3(dc5S$v<M_47e;&kHtIw00zBum!6EQN^6`G2sE7
zP!;ck-GI`P!=t&kLXLP!z(qM+XKAhhqxvL2z9(0kSo=Y{Q|7li1WR5)%iivW2VYBH
zRS}<9%;#{fs_d|$cJ?aDbgHRir&jz+20%o*?4mBiw%HM+<uO-q?z*RYbpiZ#DxVQQ
zlx3&p$KUOYU-vzsN#VwJvFLl>?OGiOUt2w#xJ%j8cBEQnLLJ;fRmK*|wuBA0L?xT3
z<brHD5+w0VpxAu*+m$B|6Z~8LA1+gAe#)lUsg=jH_7t(KZmDQzE5#KreL?c@1bERt
z>|g6f)9L?N`JAdC#QxI}U8;k!G3G8-w>}~b!DmC;X4k`|mO>f(5`<0;g4_L&8-TRq
zf2d_WOQ3fMNsHq-ET8QP7@9W(M!@S=)$or@xT(#OwFh`$plU;^L7ttJ3`gHpSu#=A
zb2E${&g<`l#exk1jX3+ZuRIWO5HYgDbCI{qyFu|5sPO26QcWXf`F(n42OyQ6NAGr>
zPG>-ao{Upfr6I&&$S_cl2An2<KTk$9ML8d)ehoS-5=1T0&(OqV2e+rUZ)A~C4*m*y
ze5{jou1^99eKg_)RmCM~nxqx#gy<MJ;=yystzaLrqoioLBu)1@hkcqi$siT&F#ohE
zeqZ{duFKpD^77=dzJUA0|B{Yl=vog=#&O~0{CY<kK00a@1dr=j%*aX8ado;<c(NzZ
z^%UnXcQvU{9D$sP`tECk>#njtE?wV<2pH8WQT(AyUDJ$zB%P(4M&mWX-9|{fYbnZ)
zcpg0oe&~R0$Y%R&+~&~UXAHvIv&J8FUPJ3&6nI#3{7ze})(kq-h}(aDxxcqz!z)p=
zH+0IYHd&_i)8#tONtiG+y;XHRRoQ*6dMS;37|{$P@NG3OdBJ0!PG$2N`(|7a{UIVL
zI!f=*!+)zSQ8Jw;&*w+|Z1-wS5WRz!04ni^=FjiT*IWGOe?e^o57riDjBO?!k*JEA
zk9h<>^&`F5JG35@LJmr^sJP&>v4C8~z-d{=5oPKvRAf{73sp(Lf#1OCiCJbFM<A)N
zT5}wgKQ23)fVue-_WF-(mN!}fGCaU-`UEp&G@aDy?$^LqPt}L|6Cj8gj3&=~{<kS-
z?(N4Nbce;e#yY>BHe!x7y`QZzzOQ*7K^INpDa}=Z%C0Ox4CHQ$ZUhL-dv8%af5VY?
z3!*4dvP=c4OETrC21=*EkX<drZ?${22vxyWu)X6bF;+lmH7{6D)$A(f%jyvzz{5$R
zXQq<<N)u3S50eh}O++Ytv6!}x+l-|>eJ$hC-up6=ddBSA2KTa&*nL1%jk!@900Af&
zWp0sU9x72i6YzsrKCVI%de7e)cz>OD;Q-1qI_dPCc2lfEt{1Kg&u1DHwl!g|L?Koh
zbzVLtM|B-c*9RR&-o{(V2f~b>UMk6)#T`Ti_V6Sb4yl|_C*3i>noC}DYP`r~I{s`t
zn7Hig7rdO3B$fx-D$RLJXA7(X^(t(W-IOh5u32xVWbw7OUatuF;mDu~K#g?75yqmK
zpHxZ16%_>UF3@HYfQXau;gCG{Iz^9hCP`~yz}#;F)!0<0xcj0C463FoSNrYa1fp;m
z)tU_<LA{le5W>6tJlm^?iq7{+KVmLfp`il9yNc4-w}ah54{O*hcd}-|<4;ESI89ZL
zO2>Mqluduf+>X&Xw_9Q_xosCHG{i`cr;@*TDCdw2G0dAvk6dG&ttW#Ki*uvBcS=yX
zYZZ^~ye5k!^~C*e`@q(}R@Naf2R80|TH2=-fSx2T<}$f-P?ndQ?)J;z^gH7W;1I|<
z1?p3#Z!Ig*TQ=@R%>~{oLrii8PNH~-<WX)ri#RJk)+d_lR@@W$0^AmexQ3S|mQuu%
z>vQ>aYgKLQVc9+_$KS4>w&15fO<nSMU=$jN+;Lq<eP4LaSaA#x50Ds<p-&-g);+Yg
zZ>b#^;nIWDCje1N@2P|V8ZJ=M60-ZYInk@LcW{<pORQZ$aa*Y&tZ-dg-1+$EHRuZu
zlCShl1G|}jvjT)SZ#W%+4I8qzwK1ULO1ZI3LI1})t9CB(>J#O()CJq2@8nJL>uc6F
zxb7GvLrMNG9cVkUo}TyT+wo?uD9}#)Uu|<GDUiWWd44ytX)#?NfWWA>b4n&PP}OM9
z#fRR!N9<F=?*G!}x?<>{7=3N1C((a+$oyg%dSCo@_T1bqxuA4@F)W5@>U*y`ihj+v
z%Sz{#WqSSmfwS$wr$N=k6^FKt<!_KeO<%QrM(Fli^PiCzwYuwzkAynO0UF*i1)+-k
z*BnjTxDj#FWKZFcZr_e+80Wr2&a=l_0voeQlm@vmo_#rAOT?C)s|g#^WQ;)s$#s+|
zttogS&fBWamyZ$0%`fTT7<Y~N!%6#5bVJPrM9DI`J4|VQ3J>#7CM<<iC}4rr=rd2G
ze}|GwML{<LnVH^y3{}{7^=CMM7$1dlJ37C|;*977Je6BCIipdNi~r3pg0iyz`Vq2F
z`@%xpbmxky>@X*S;1Ov=$cl!{GOsxP&f*8hBuOgbpq;{k)%ZVs)vk=YPk{HjnN)<n
z<3F*y*vK6goYzwuj9mjjDs#M6^)JsAH$TsFEAoI}{NNFQ1s!T5qnsp?OO^uq^@_ck
z;oXSl^hwit?H){G$34x~Uh&B3*<ZxDg`QUPIKvOVtFf7}T^o%Ro%t+wPIPv#w+V+_
zmiZWxJ&MDcquZxKmah~{{6>?#lV8I~gvLiHxA|d|*|bJsx+zx#xdHvSpQ7N?p$E1M
z{A@gdTe=~KkXo@TOn-EQaP^T*v4LAc&nHUhG8lw~_RUvSsK4=3$`thVb3}eWi}__#
zqP{LGF}M^K6EG9AU-m4DZB!f{JkV#{Ffa<b(*ysO0$*HXScPC5@%GO8c3{6%bY`_o
z=$Z;AEJIc2&UZDrk79TN0&=U&di4s?jGdmX=Dm$P<ea5HxmP&sT9?e@QaRTjJ7Sv?
z*Hw8N3>jOsSilYFfa(;zo0Vbh@cl=rLG`%(xjuaH&0`PR^r?;@o4DXDy!QmN&UIdD
z{yTiz_mcs;^`$Yr?;UOONzp-(nU=AmEGSlf&^&U@N8vtgF3Zfzk43yS!<x#;Tt6g<
zX^+5B%}wK{$?-tnRs~Aq3T-AsMlwNqJwp1NxqKcq2XwN&rfvvc(!Ad5EezkWbA^k!
z1nt#IC`_q4X0)l(PvAyCh~f!Ew%G)4v>)G;_YUPKYfV$8>20wce)UWps6r7#Uy4Xn
z^PR?j`ej{JGaVoBXtH3;NbHJ5tpNQ@53(LkAU(N!5Q_GX|FHELiffn92E5<UCT%IO
zqY%mXc&OdF?!YX6z87t)o2}->hy=Q}JegYu;KTl`2fREXm;iLRgc}VbmV|+xrZw-X
zA`}e#&)<{w!O<QLz!wMUeM7&PN32;}Z!*;SNLll7)htb)p~}Od!)IicmcWRVGei&M
zEiyTNOtKKc@L|F4#In~d9oXpv(G_liT8J}C4PcjiO-@plI_N98tYFFz6YAi1r2F=F
z?~RQh5Sy41T9&&-=!3hFBGbnpjpk}N4OvHKi|ilBOulY}?@`y(H#1-`w<ryD_RkXQ
zq?=i-fLX(ea)-}Xn@-gK3ey|&6H)2amZZTkBL>9aJ<~fIAM(kP0;;c#$`6i8U_2cg
zP&_Hw0;Pd)pz)fN5<3xD2zht2wM^|VAGORNX1xT%B4+m@OXP>ac4L?%bMpbhhn9@{
z@{`oxQz5~)q|U6N`#4v;!<mW?uFW~-plA>jVqqYUnzWpBW)3(iAz@;EJ2MIrckN%A
zDM%3WPcty52yUZ^-NF>$V#tX?gyqz!{Prm;e+JXvTNr+UH|u`|@1~rR4k-wLq6pr{
zSIxQ3A@Pg-N>Ci#m)NJ+!L?iPb8Mp=zE5L_p%!bcrCU~;;<V6DwlW#k#QQKzd#{2Q
znZUz?E5gf~!laQ?K9;0j5rl7Plv&E%Zqu?~dVw^Ju^qezJE1l1#l=EW$XdoBAK=}e
z&Xg-PVo46u3UeBWxB4{>xWl#?!+ej-54q?WrZ<ZzBnHTSamjsTn!Ukn4U-0@dGm#r
zkHGRdq=7eOEVx9_gqeYyLlMh4oUJE1K9UEPM9jK3JsF{sKR`Dt&I3Buxh(h5p8w#H
z;Aub;X@v2pL0t2ZMuAg;62+S>z-)-RxAqh`8avI;Q9W>Dil@&lw`IAamwdVJvgXR8
zKF%^J=K`-^ICqS@<Q#Tk%Vs|j!tLC8H_Tx^AaI|`&qbc8x{*20#bAcRtyAKB{<7iK
z1CQ%j|6#r1kW+ii_k2Nx%eC3WkAXS)bXg>@KObbTIp)oxrBvFY3)hGmJ7bG?E3OjQ
zw?%^f1C1=u;@KD}VsLcOtlc$#V+d}zM!L*;&j|%P=5<%1+8c*So3$sx+A&wL0njk^
zoj4aXP|I^B;Z2*7xP71_YTpv7jDyfI{=6B9VYBWD#~3#nN}j|=@Qh4K`L~f(M}Dmj
zh_{lGBIdf6;(ep3Vp&F?;9j97)X9x%;vWgYF81->gVoi`vAWXdCM~H+1aBi}X+!*L
z_1_%}mIPo$BQ=tv#m?XoviC@b8zSFX2FF>V#sM`>H(i#jPoE#^f<-z<H~U^cg>pYJ
zBZ&yu*@?esWZEcI{W~}{CP=613&aPgVLU`-2YX{KI1BW|uT{<LW&>&QcIH@DZQW|W
zW!&w;`2x;wm-xy;GLfin0iZ}W34_-&(#zU}?XWwn0#bJ3e+sG_%lX*17IZ?1$=+>6
zeZdMLiMBz=?zg;X>Uy%vGr?n}`=!(CX+#y)zt2j7u?^6!A7R{Kti>qF+Gz(7Uz?bG
zk>|Sj7>?gS2busZeLmkq{qROO9ynO&m9g7As^lZ$2sl+^3&+PwQ^SYG<{hLB?M#*C
ztB_o0lH!iOn_BddFJvetiUK1)P`kdasw#a#MH2!uk^#dsGILa5GOK}^l6E`)2X2BI
zU}&pe{@5o8rjjFw`KJ8Iyi!a6l&xG}+Zhfc5m<VGx-lTm_lXL|xD~XYR7S6oSj9d<
z26n37Ln;{w$z>zID~w~~M<m-E=GNkJ!Xw2)1^JmcjfUc~Wsa7~P^8e3BFpUfK)Qw-
zjEne=#Jwi_8A5jG1w-5jWaOpmA8jF85CO|H9X!8F>FhSUog=hU;gEMKkdD?DC4U)#
z;%UJnyXHMEs(zOBmknb6{5<(q^Z@&)q=v*Zfsa*0PcxFTZ+b5q{tml7%D|zV8hLK$
zWMK^=q!H^*O4ks-@h-WgEp|dpv%)vB?pHZ5>gA*hv8!ZR9tB5!1MspFMU~@|h{&H@
z0A8D-uCmwi_JT$zF_$}ef|P=7Z7OPHNn<B-nQ#cgQe6&yRBA<QPIDQHh+MBCu_hyz
zH@VsYM~m1?9#`hi3Nt_?Wko4h*U%pD5zO{JtjK`yjZ>ipnwe8oZqdCdOdywYgGjGF
z{8cd6Y=q9DV9Bl!Fv>X+;HP|C3Dc`x49L7IHgHNJ;bF$KI`z5@5-Y!fXzB<;^cr9@
zMUFz^)WYKhj4Fz@^eb3)9(Lige}2yO7S~_i#m%;g=AuOmfz52cAWE_fn8dk;8G%ar
z(z*mB<JPKdlHi;ula{b2pvj+?r#kGmP!N(OA?falZ%L`LsIVxAbRJgN?Ql!=XvFTA
zAT`##(9I|T$W3%Zoxyw)u22TOF)QX0uG-b?!WS+Cot75}D}iWLi*~2OKC^78MRO7<
zIke72|LZVhvhrb9oA$nl?Oyv|GmIQ6$CgcrSa)Q_1<Q3c;u-v<C(b#=&Qwrh?>5l_
zy#Xb=vCpDeWFyK)4D4vfyigdk@SuO)7-*r`Vs_%YR`KAXmKyPWV}1C58cMw(XR=`2
zQ)))ka6JRRF}neH<PB+4hQoUQ;tsJ;L37d1Vxes)$bO^$`ITs=sr~bRWZDF3zB)tc
zAOTeb%68CtMmo|6nG9~nn8F95$iT@iyq-K;Le*ynkVgKmg%6Oge$s;-k>8vjUc)AZ
z!1T$E2Bx;wf|b&uHxL6+Nn&C8vteFX!rIUE)LcJb!{ZS`CWu0#Iz5ZKhST_l?t)Xj
z*%<|YGbErP2KrUAM7mtAq7ccWtJ%|QLvlK&!5_3<DwARS9`4afVU8=lycEhC4|D#w
zA;8$FdYN>V37Gn_Qc-PwW4JK&r*QUO(xFnPBBVM$gjxd4ZYUd)sFUmj7RS~}di+;j
zv#>U5^*{f$<1x_1MDV?G#tWC<!Yh?EE0&&dNN2Y}BL2}HrILnO&z^VQVgAEcp{#+$
z>Mz|yk#6rOaGVVNuICN_;MrF+9vi_*Zf+ZS*jm+Q5mwP|9Tc3MZ|T`L>OPMwue5xE
z<Gm%xp?|N+GH6z4bTO1DX>-^goy;FsGGL51nHoMe(L19f&#cHY@C~JexiRCBnH~C~
zI=(o9KOwx~Zgs9P-aBgE`EX;EkK<Xv9fE?8K$-tRg(10=W7XQ00ZH0<Hek1sA)y3=
zrp$XNfu2AUi>L);i_oK{7`On^T8J5k%O#qUF++td^{-HF+StaL!k@Jn2WN<>6l8n9
zmuAK4Yc$>Gfja<g*6#9|zpvb7|KxD@-b@>&wnCRN4>#F@R)7-RBL#$=e=%$8x4rQA
zx4xWfIJo9>7pUq>QL&Y;w?3O$R-DvY=-4!p<<V#Mg7;2C;&uu#T0@nMo*sBm5YMo-
z=%9HI!fjHm^VD_}P7Z}NBUSr`@OZ9aF$uBIgm^s&=Ew8tkN5wPnTrvSU=D|hh^c;E
zehBuT)_04O)o7Qai-#9XfbKgkK|jqGlM^BC)R(6s@m9~^@wk=;@wsUuUjo1guNuTQ
zO04>*fhCHG+6M*!XVbhtg_n?__-jITtOCS4u?v9B=)dJBiv%8915=DRKU}ivEIocH
z7a#7Awaze0l|J@KQq{e7?Dzx@cMj#ZsdR}qQyUncv-nX1MCO3rDF59Q39ijiA*|+M
zB=_!lB}^~YLpvIqz|3`iJoMwB9=^~o=QR1e*-0Ci%;czn_>;X6{l`e`v}A6b^2is#
zvIe-u_=QcryEAkWYXz^1grE{TNz6vbb+Bm23~-k&S!$|b;Un4tzK>L4%xq9;Lx#My
zNw`;>Y4K$F!UC}T4@%IyN23{qmjY!GS1y%=Z2|0|jD@bB>^G$M>lA|C;p4se`LR$=
zLblfG{XCbEMDP>ZPa#D?tA<$QDWzV+{jV~emm5iE0gaCVKTwTNI5T@enXX*^Uq5_Z
zZt~`eqSH<zxK**t?^qvWN)yOEY^1GP@V(W}w+JLQ(6eD|1rxisB|G)H7@Xm@9bK-b
z|LeC%%cCW_h#BZEH<(GpKfmlHs%rcuH?v%Crj{!=L)@huIVsw<qMM(!``yiSra4Zc
zhAat}{o@jnO|(UZpwB;M+donaEvW?Q4V*$dojSzl%@i=>UIC^`nZi^K?=JB(&}ayZ
z8l7u#qp4OZDg-;i-Z+liH_QFtK0JBX4hIpfvBgIXuidKfZqFH3%Pb`qT3scf-SHU|
znoUOp9l!q(`NzEFJTmMQqV4O78W2Nz-)Av-IMYr^VD89?$|O2F+JJH%_pCV@fy##=
z&uis$UJ9O?Sbh1FC<mpcQ|aMBW0zrcdr8E8x}oytkwk;O=B5JjUL~Xr&I%c{MG;;F
zT@Pm(!n9oXxCo_mG*d5nB5ykinD3r-Uq;A#qTig_C^O$iU|=zZ5$Ywpa}%N0j@n^I
z(fP0K5qdU5j+#E9NGTZV*f<usuleftSVINhbpFo5#9vDVn^i<S-<K6OCdV)<z)}J8
z&+finEw+Eh5RQ0q_Ib1ClPgy+`Q^O$a$wUxmoeZiP?baJsKV+*XmpXY4%wU>v<#Al
z^9ld%N-SjHfRV--TQsEe21Y^88t;@1q)ycFA#fCCs;4%%lg^=cDYydRaEtMKMk+to
z<49rWU6?P3M+I))nTh8qUekCO@ecy_C30@1WR2rpQ1Evp?7OMu4FWsVyGjSRSBLYv
zh=|caEf3%)T33_19>Z7jc`L>K0}P7$aMormFG}0J3D?7;{B5r4M}i;TojAuk+a#!&
zf$|OAf}VFLSP1ex00Gq*js7MtxNyRFF_APEGlCUAX%-gmsOwBLQCBG{lijJ__a3q4
z5TLaE$2p3E$MNCUMYm{$u!cmeh}PWjw58?b$VYyfR!bMM+<}==D<1u-pW2S6{KC+g
zx+z(+loZWO@`p!pdkzGwBV)%29mgK{u3`C{s+B*Pg-nkRjW1)#Z||8~9S?`n?K75V
zK}SXQ|C!=)t@u!A8l7Onz#^$Bmold!e8wqLYFiR61%-nKV5bo~QM#pPoKj$g%+GDr
z=*f5whOa;G4sv}IgBvdv5Sk_5E8Qlrq;D$irKEF6M>J)6juydu^%-tZvXjjYB}2ii
z-aqI!685N4_cgspi!;$B9NbJNs2dt%dWR><Bk9i-tY_&IwGir?BEF)pl8C0|K-(3)
zBgtzjfqj{E&phB%@b13+u*-#}sEH?&Y1!OOb7W<*>u|CwVPYogJ2p?l@MM=4?&X04
zH1=5)uBI|h6%wYJDT1H(o<$<_2)rbJY4eV22~t@2T5Y@Ugq3LFA9RF~!ff|2{F<C@
z%|bwGrh(>7+~IXjNvm1OJZJY9I!5gXbl`vN5h3k3Pcm*_yn60SGV6SI^!Yb)5mmc{
z^Uy5K*@X(b>rX8;upTZSx4|#v$6oAJzROjT>9$;}ruu4rejVJb)%M)Nc#Dxaet;A=
zFb#sEZk#DogrmcmL~pJ#70QF5HM<~;)DG%SHVD<f!xqL;S~9|RtP1E}JI?KokHbf7
zL$Ck&5x3BYg(!ir;k{BK-W*!K?8oHKmvksh$*&zKre)0KX(%%^@^Y<$;w?9$P-y5b
zQ($o4hzdZAL9Ya(pmo@HP}(L|>vvDwb}Y7QFOzS#7{Cc}lZB1i0*t}z_ZG%}*<VOy
z^dYqY<rIc|+X9Eve&l(%V#qZ_J6K^7?b{iWZAEr-JgzI%1Q(MpEJ~HT<kEy(Iv;`F
zc%9X^e$Fu&1ou*ck2N%>39or};XL4amB*tV#LGDHO;xOon5Km!MFz*k4{6$Ttn)<)
zZf}dwI7JuBS%!alfE}vXcvn>Su4DjqE}&br{I*~dK*Do6C`lXF*bWJNizfK^m4yJ0
zqGa6iX0PO`MVU!y_6dPuh;f#Z{k4!pkXE1tNIIW^CFv+IS*^G|Fi#-Z8kJL><zPVi
zStN#cch+@hvHk$4h?Xfk^zFS~D8;2xpxf^kMPb27fO9m{2x5MRAU!7ZYL<FEza`;}
z>oA*me<QsAK5_#N?GoQ>D7&5zeZd(nP!Xa!B+}WR___yWHv*yG5IVR~8LpPT`>eTB
z^$NF=0qlr|8UR#I-cNXRH_$=!QM3uL>_6=X@@T&tHa?&`Z{kU3aD>Vd;83T6pNb=e
zqZN8Ngd#|D2$0<U=eugl3GLqc?VZIAl*=^snBSc{<?PcxO)wUhvrl{N<)kTm+)Z``
zB>nN(QpFt7ww=E==l^K>=EXtr*m$H4GR=?8DXQ!84<!XdbqU^Rx-6*p4Xb69Cgn<H
zjs4*%Vw{BB_SqI4i|5XO-~SIU%M4cF#{bpw0pKoV07tj@dR~|L>3*SW+_Z)mP2kqZ
z!SP2m%)>##Uk8-y|F7n6x2-?bipn%BQT?Ok7jcjH0?^(W1VY;c1J6x(M6uhY3IEi2
zEt6QtZmKSoh)Pr1XO0MC;a1@9%ur?77J_i{KPUg4q}{wGw*#P?Yu)vY?xGI>lfMjr
zxet;yz8=ltzACP66npRhG@qM@^VMei-M<`Gt@zNr=ZpA&Y@o4`A9v~n;QcdxDf!DP
zc!HJ*UUS|94S4$R*Z#)cuc->(8Ko0>ZUixnVw|M^*!0JEll^rm;OWSap6_qxY8&`i
zGt{;>+R$kLo5|tqSq;y(N8ycLJQk)9*wJzkAi7Pf2URbjk>xwH#xd!rzO$SLsexf0
z38x8PyvTF>)>;mLW@owhm+cGa4@q#b*SofM$q9q+`#;(MThENwUTzwI2o*#q0ey;C
z!5%@qgV(|i{N5J7+`9}D^Daq0PiR6WxvyWSb01C|VQ1t<#NtK~lr?y%IjN3gse0;<
zb181}?fXvOuU8-k(>~0EIIb#ag|E47VPu%sruV*DOC{d&E}rWcX}&|6RHudP8@dlh
zNX>4Co|D28hEkF4<?d5LmW34i74lDVhJG)b)2=TCMa+fjS(~cR?rUXV**ggS9hbza
z;%!AI0eVfK#U2H*1fQJe;?v)-JaY$BWsilaB^ihi{$qKp2vV5rmW&chjg6;9^JM67
z>g$@BK@?0}YuT%T9`Bb4P#U++D=P!f9WR<ttSlA+l2Puku$ceoYb6B;C#Ko<Q2b44
zHfRfQO7ZeP-)?6>RfpdH#_xWsMNQ;$T+c<v@(DEE%!6^78Cn2GVt$qr1cz7($e*<J
zifBSU0M7paE#OvE$N2}P9SZ-|7(2qRzfBtlEXB(*voMZd0Q+Pd6nL~X{<Om+V|^mj
z|5`!aYFG1X)~t)<k9h&}Sj<;13*Fa5B5mir$SZ+bc8znG1$9d~&!=r?Txcl#DC|3*
z&J!FJ;X-yo-~YJ@S_cwnUsCnmm->aQUY?(b1m5$Yqyar~<2PrkE)#BZMR7bMK~zA9
z{E@@=_B^y+f^2yW2JyX-z+SeC0+}U3xMb(h$uBVbEv!Eaiy<;y^;Ds=!~)#?2p(&a
zMl?J$=gfwLVw@{qshg$&Jy03Fl%)Jt>3#y&@G_04v%}Q(6$@_c4`@NkXE!%}l_s?`
zDjVDt!N&_UtDWJKnf##ZEjUFZn{G8Dn^*}L0;^C;?Cm4__{r+$73``6<}xyUN#29(
zSke)6o7%yFLo7RZ)abmo=y<cD>cg6~G|Q<Di*|RX=?@c+62p>TY8my(hUDiW3cTPy
zuLKq{LljLnrRI%@RBJMxk^MA}@*c#xR&o5oFSeq-y6W2IRRoMP?wIK9=u9d%+&yVh
zaTdS7G%0;0j|jdO#`g(vZ46S~pIIlF6OEIy#EsNxjuGZ;!`yu9_4OM*o`3GY948mf
z^T0)YuThGpuiN;)<n1d_93MaRm8edPW2-yUbDoz>bxVTVne&giog(gMX;|cQ{$L7W
zv#cccqaZZk6pu1@@%;kqL{@+MAplpwXA258w3yF85y-6l7|0O?^iNkrModo#BK5Nt
z7A)FouGH~B-(dP&mS1m~WIS;xccQ@JF;;gwpdj4G`v&@5AHyndfvzC9MIoUUaO~js
ztd@ADrZuXKJ##v36~Him#oIm9g$w_0Ss+7OW2Z^7@w=Fmw6{KRHIQi;E`VomY968u
zHW!t`GdeF-98)oZ7@-&|vHJ8Hh@Th5;^}TEn=_>Qc>}f0KSTW4W)iV(!$RAEXb;a{
z1jO$fAkqBr(Wf6n(z8!x`8uuZky0~j^Qi*eKS;sTVKx{MP70X?jwY!3J6>T4Xcbf$
zs2x}wR|u!)jVS^ak~5G5nlm*B>JB<H2^gs=PAV}T5Po|5ZV|p)EfnBRB@#V4Je*bw
zLvDv|$xfg8oMzrLK|Xi4?)1H{9ZppW(cF`=H|3oO4_@H;ECCS;fYV#1AfADbgX2R)
zTYhcfY2^9JO~}KObnNLY|Izcc<ja)GZ!H>$r?TBJJ|gK<!|bJQq4iet=g4=LZlADO
z2>UCR%T3syL_hr|72FVl4tvZ#;c}m*(BnKoP-wPldxOPZ=x@MF;%`LXiOLvrTt~b=
zg3t^-1|i<b-=|fZ-=}eKe=q0Adh1(og*0P}sr-%cfB6CPXL=K8Py0@<Pf87+rEfZO
z-|G4#E>)%-4s*`!#RM~TK&Rs@uQoz-idX6k@qd|rUyDq-8?y$92A4Y$$p<dt<Ad27
zo-4iU=!JL!^IU<$YlV`oDH=a1HL-w&0Vw(T`6h52;+1wI`t~CQOcXv8PL5qC9%F_L
zrWEXR%nJ9X)9+!qF;e?gw|A~^{@X4yi5n-o`=1W}U!F@~tf4)H7pjW<a|meRSQ0Xg
z7gH(*o%TyNng(72;x*TP)5X$O-GMME!`foO1D{DYF$ZE46!3)ch|f*JL*2^8W{qf!
zK3pHn3^8epiKs8Y-LScci=fR#;$v}9mJoeZ<4#Ao2M+DAa6~8`oAwKPtKi&Ym((7Y
zfR>wp9RbF2P@3<XS5+?f0p<yrbsdX3Z!W4MK$Q9H<u=o9n))UrIxF&6bX%0)IOMta
zKle!zejZ}lqmBJ|`FXZO(gS7|ffm9g@9;>W@Aoql@=)8Rz5{3B0#JV=Oi?k{SxZg<
z!&yN{n)KEFXxgu8SF&j&RU4BM&=aI5fNl_-MUJaNhmkx2VOqixR!6i?l+0XcIE?dp
ze@`ox%eHGf&dTvgD5ZQI<nt7QWO;TZLV~zQrug~}DrlJi2IYKAie%=?W9oz^xt2fP
zuAb37^$k$L`}RW$11^#8ATW3G;W!Wo2`2Jb*=V8Z_mQ7P-0GLx&)7HX5?VV-E6}8z
zGl^6nQ4O!(=<821nJ_$rUJ~0tkD(He@%gbRTi;ox^PjxdSC6TFT<XKu+Fx2Pj-lk_
z=A3^m3>fF}ENz_nWk&<m!r1<|C{(loeX&O~-g%ohhD0r;_wQdEB*=lZu}4?)=suUr
zQPWlQ${AY5-8oY>kqx=M=IntbEJXvYnSyQck_Iev_tmfiy;<wsUfy~)&_DcWqT@EF
zE+`&X;PjJo0gvPba3?fg^q`22hE|~mf^o^rRe**mJMu8)$9cU1fH<=cWBsQNrXB7)
zL)mDMa$*aT+~a2NVPiJuRdY?$hip>DgE5N`5=)~~XZ2+#+p$gLEIE;D#TLL~f9(vr
z=A=MDh{-!_BxIUIJTk7tx<SL`LSfCdz{6g$gqbngF*;^(jAsoeB(RdW84@$>MmsO~
za}TF6B#&&F9z<RAi)Ue!+=x66BrS340t-#Do{naglY7a0tW%@keT68*P6>NT_GWQZ
z`87I2!EYKR86x|8u?tJsavt{P(<FbdO9$w{lNrx;Q(0NPNt_&c(>fCobS7jv;N`(R
zsNSk-QNm;_>uQXCv@8!{_#@jpkTz;iDl_9;#g`+3)vA~379+!GM6;#NoAEZ%h{_T6
z!CI+rRE&nzIwxIjs=orPHcvd(rEdgD$`I6>eCZylWnK`@AYCob@^s71np|P7*eB<0
zhMHXM)$bcM#SJFwOjd`=XA;0Ng+*MKA=KlhQ*?TL-+V3QBBztT`Huexwn?4fW+0SF
z_R#2cG^YrfdR@vd+gH@Up?gWod2WT;els--y?A_@8M<Oj70@%ffIhcJA9>e9dmeet
z8pZCvPGBDRjr=A|C}nK+y#MN<7ckpq*lOF$o@c)J`s1y<iDz=fZ1?dr2*kn6l#7ke
zdNmcAY2IHX%%sDfc~}0s$0vRrrU9v$Zc_!Sjal@E;m<fb#h8fExydXjcxf?-^KGDU
zAlo1UI|4)UvyhE?w~E&e*u!%AHFb6ZAV&^!Oz>TKriPM3PAFqgV&aYa!tq_%8-iEx
zPOKOkZ9Qx_PIg&~g7;q8Sg|B&LE@{hPl{2i!NSaA=8phq_8V)s4Sr;h;-B}%?h%0?
zirY=|!OXWfVJ5D^5yn$GBI|fow!~4SX;=sZ&W<*Hx1TNouDvt9pDNbAi1U9l%?JH}
zjS-1eN7vinfRhrH#(KqqYB@GRixQeloJP5z&Q4tHi2cTKRPbuS;lC^Z0(eVkteQXo
z=ujl;(uoJQ+I<FP7)y~KMtdrJLlmxUSZLWVzQYx3LKc^Pt6SM5UdEy8&Yhqr<8!>=
zqtv^vXq}F6{@s$bLc=4MZYLcwG2Oqk)8M6WI=<ok8l{cF+3Az^{lz5{QWpD-tWL+j
z;n{@4^iKUL0{ITQe!00#B^_sa5&5jGy4grtIAdna68h|BMf|=6_r0{Xz{0*D$NRhF
z3QW_~U<wp&!mvVd_usi^f;WM_H;GX)v!dpB*rdEof_m3v6F2L8qMh^}(?fsh3<pNU
zDr*c4|A(ftj;iAOzdoG;(%s!Hl9%q5Zlpm#y1SHaq`Ol>x+SC=Bm|_p``*iM{CuBh
z&Hs0?W`=Xly!YO(o${Fe=|8|!Gg61=S+c#=rB});y>O<3sRT$dHMfIxdKx=VSA^&m
zS%saiY6m5dUGExhPe!Ey#iRfz=WaJWm(Dg%#W%gXmvu28k(@ss%8QjhulQ{jJQMhG
zw!9zHBlBTwToc~Weyd4V8?3(7B2m0Zt^#9r?9Ru1c@Ly1y+!-EN62<A0_#XAP@WWf
zHR6`0nYdl2M@6v9<!eIvhESJTmQ2iSxdum>M3VA|YHh4ThxXgds8jffYZUZ>@CPn-
z{?<?yhP5IQ8$x)KUQai|7INZ2CZ;vqjEaMk@ul7G^YnlRTR&ll4FT*REhmCaS!4@8
z5yHf$(}qDzlVDXpt|~5ZR}x^dZS%#}%`<bXNtBI>v1k#f%{z69%t?1V+356I1vtiJ
z^ytPPYd+T(gi#JtB4P8&ws})CmIE&r$7>OVgGsJPyUVJUV?VPv8XY3aiR3|-L{>0Y
zBA+W9QIs1_Gb~TL-G@>x)|M@aT;_}3Y)neg;ha&1i!a=A@*y`*Lx@E0<cNtjnvr(p
zUMS6q%8r^i06dAOvT%SiqsoRellgk$@NE$RIYb@Z2UG0vz%JL^VeR!1j+oJlnWZ03
zKb^I%P4gLQ;#b-OdA289$uDKM>~CB-*ijR^R<g@>{GmeXusyF8$8(vyYO~orAH;v>
zvBs`<rNq*;mVcJg`XtaQ|F1=Et$Qflu!MGmtdwwv#TDN{o-oI&r<khC!iTkt^7Ia}
zpo`veNDB?pHTCfDJ<eoITjq4EraZdDCT%-S(>0w%e|R{mWXw~JVOs?SsDM}`>y4?$
zv*ZRhyf$(<x(?y=TVEBVG7L?i@oS~{&28uq!qsb^z*xNv4sHqmQJeyDB3~lRnPKuN
zxwLAU=aKg=#0Nv&4n=nxs3!Yl%TLe|ZFO1zf1r?R`IKUsowJnF56`%K5lf5p#Xf&}
zYD@q6UrEc7I6!C!s1FpHY+9jePZ-P92?7sfVpz?DxT)Zj%i?qO4RWR0-HL4g=C!*1
z>Aorubqrv#nBGR?&TzTz%7<mhQ3fp3kxDn2mn6vK2D4fVG%9J|2?oOiU7z}TYz91(
za$5`&QNjK^J~=r%pQURNVddv?BY{yr`1yjcl|yKeviZk3^13)dSS*ghgQ>ih!K57=
zc#U1e4a8}ppdv-gE*nYVg_v7sHK`l^JRsRC{qmUES)La9?4fyY#$AM9>m~U5h~H@j
zKm2rdxr9TKW_UuBxPKlFPSCAaadBCvatVC{R^#c3abSwDe?fjC%{3Z{Md>uiG~L+O
z+Wo>y+x~juij-$hO5!c^b2%?!|9juJ6PTwr4{m!&&Jrf@i+>$0;6qH~=2E@NnNpp8
zirA&za60fk;%EieRz~yF@KBX?m2)=s{PcKJ>@jPPqVL_t=VM7>(EW?Z#pafLW-{MU
z8?5+rWkDzMvhHSgW`0s~g?8cxi4!P+JgkB4^c%c7wg#^377hpJ?N*ULejg%K!V(3m
z8|dmh0{=^SCfQ)$=e6z!pM|ps!o0=1-R6lct2tw@^)zT*+Z6f<tXrFj*~7YrjgWDj
zi78}N7h;9NE+fT(_j*#_OMS)&UojmTK=)eY{%tpx#fQ@W(uBn8n0dc<?Yy&e?V^ho
za=qNESZ&8wSH4t3#&7-UxLZA+&p>tsc*W}?w8}?fjNtzGI4cT4OK>5L#!BJ=p=BR3
zLkH37XS2oXvy%qg9kdE(f^MQR&NF#xII%6s_vOeOKrko^E<_)jlG<lZp(i9KCC5;y
zK4oNn_WHzxoKK$)wFnKw)XFq_UR_=*wEs6Pmj(zJ6&AXXaAJ{~c^8st3`oN($nOBw
zOjo1(>bIqobI1(tFG+E{60am2S~H30U@Apq4e?i|id$<585uSz#fxsG3=Z-VB_+7R
z2MSb$`&USh+rimdMy)wKUFPoqpb~8FUn7{`gRt9FPJc&#NyI@zAKrxWA&T_2-!zuS
zom<}`on3wRy<*L8N!abepORYtYV*$axfQ}Vc)pp-U{eD%Ky9g7bThg*;<Ja2T0_Kp
z*p2zt-F_*y^J@zk(a2q-X|W>3n;N}BJI3|iGfsf>{-VMsf1m%}&-IH}iMKd%jzK-q
zZK|gl9+qw4`c;+MC9NXixnY~xwocR?b3N)WkIoKI`xN8e7wz(P{^9_Nx4xk(G8MjB
zM1E*G$G`uQ;k^xJdz`O(4>xEsQ4<ZZ^h<cUDZA}`7*XOi6b=ZC`-1uQDK!dxE(Dp9
zDq7`8=cyiY(m}dAUB*d5Mc8pGC08fPMYlOS;1nnvU!H1h7h9cW^EYp)`(?k%;d
z*<|VPo<<_&-Dmw7tnW!`ajLAf4gNw3I%o!))nAwfqv~^^0Vj@{@x!Gr=YC89o|UC@
z6TAv8I5(Nr-?-1JR;D1OZ`oN859Y=>)E+ip@|&LzxZMzgq&;uqmdl-;?k1a;4wjo8
zmJ<SRi<>t**UFNLWy{TDIM1;Zmy*9X<anL?937R*mg-w)$v9Z#&6dSPJ%_}w&MR^D
z6fv3GcpJs`_KIyecineDz@95EjMIEYaMx7MDZNw2yY)0biwxw;G|4JBeO+D?sp&@r
zSh2rkzDRaftn5ML><aU69I43iu{q?`HNGAXb)!S;s{J1`wCL37xI7eL#*w0topc15
z8w!I=!9*H?O+BY7;GAl>6n(8lwMsUc+@Oc6MFv={W6FXyLd#w^-NSy;SyxvY8jPk$
zCO#$sdVJK|2n6rC2f!QxL|%{14&K+(AI3llNIvu;#EagqgllJTbxT7B?rJzwYZ8=r
z{G+lW+%SmYPvCJwGW3(a)FGZ?&?8Ltq1PhP!~S4GYtE7)IrB3Jk_b=9v?fZd>waG4
z8~Nlmx_stVMrc&hSWr<h6aWOH>Uy55_-@#ms`v|9wD~O4HGPn#ui9|D1}Y9ik%`T6
zg~WdYeVszJO-n+WG{b(=iT#b?>Pfy~NK+}+*;YUJWIc3DtkP~fH(w>FUiaJ&M7_$p
zTbg!p)nb*gm@#jputXDe^jv%r!dlx8@|3WYMA>C*kYskV+@=KQR$MINN(<3VX}m>W
z!RU6QThuxzM6V)^ffFJppQ0roS_Vc~eYmBiIHb?y`aITYF~qkk?YEUS!+Nus&VQby
z6~Z)k)Yv6^Ej~3(a~rN2laA$7M(6+#F;9Da_itjKHkGSui##y6s&>_+&h^uKW2nj~
z^m7aPDm78IN?n)C%tR%U3T4m3VhU@M$9neI<afEc_$fwBJse^djAw?C1!uY6C;xSF
zNq_n2qaXt(1NR{X!C++MA)|$iQVT?-CyFvEL1F6rDtk;uAEA$XdaPXGUg&pQiSl`G
z?CT-l1kF7Io1K$kybK(<f|+X(%TnjCyL8hoKB_w-EE`8Id-Fxz8RPeyE23#6m^kDZ
zsmdi^QXdg*J)^UOm^64Q4srZG)aFkrVT2SlwY)T~`DLTlSyl`Y)7|n{NpeRabP>a7
zQhhUwX<kVKP2YEXf)-^m&2jUn^GRy5=pL_mV(+m9s27TYCz&?_rn`>>YjgEJ{-U3Y
zot#AFdIc(O?ZsygTK@zH14$KOyO0QiR2)7BlKZ2f`7~YQ%s+k)qK^Je-UbS2zsS8d
z&O8XKYnqFmxO+%~`uwc$8?uu*c6`?C<J=M-%|lyg=-9W&+mqtb)OH8Z>Sd)vZ#X;p
zu-m6ZfgIu~x18?zCua5v8NG|R08n=!imuM!&)XrwhCg3IcKbrX+b5V0Qw1YX2<4x}
z>o1#7j_%qSzuN>MGO1+8>jR+F=lyYfo5?~;TJ*qHh0EuWiXCOs%wE@SW*Y4@QciL@
zbQ+Vrt7-#!My9ItKRiKP$hzMTb2QIs+yxUHY|>k4LTExN72$Ai_b8|UD9Q7$pWI<n
zPu+XEBo2-sQI+=q6--Be4@;c_i-dtoEZRLU(D^rVcsJ^2ZL9Ka<|9+6Mr`Hfqc^>J
z5ldj#HRpP{D5}}hOp{bqNFP0R+8b3XkboY)I-m3+E#08HxPxls6bEbr2Rr&mn+qjT
zDo>}F_>X&Hq>03-IVe8oSna5va>W&}A{7iR+R*!xJZZuQf4AO(Yl#K_*87n7B$p3U
z&)j`KR{g_$oznr4dM5BG#<T?cB#^tdVI2+b2Xk%BAsP?{@$^0HO(;1i!6U2nz?<-n
zAnq|W@QFy9A2NXH`5Vo;BJ)X`nM~K%rL+d8N#V$DmdK)p9|||_)2u<jGa4L_rmdrT
zkmup~d_kK~c2IH)*3TY%Lk57G^_8=ym5DBXi%ooVwhLMO%9vB)AX>x^O;0DN#m;0U
zFFr)%w6{K&kg0rsa_YuEka${*C7RsRd7~Olaj%|bS_d~6$)3(&3!6Mi)`NStS1)W6
z5h5qtnpjM0bd<xxXj!Fe)Q@};HEfe^U*9-EDbSJF%iMuc>NMV6wYUqF6T41W6?!(Z
z+Fxeqe+RYK(o7Oh>eFI`l?=ZA5K_Ge19@vUJEIm_OF`C@(LqTc8{hG4-iO9h*qG(c
zZ<~r|LSFg|uA370ZJ82|&IX-fqGA)`td!pxlrNCh26N4^I>sO{qy=x(@qF7?=nq4D
zOI(11QbiYTugjR`jru;*SNvyAry=oAxNAZa+mPG8<D7;^1zo)aDQaZpfQFWz4(uDq
z^TJZ-E!4AY4v1RcB5cf`CDA!j3CQ0~EKhv5QJ)lSI+qUmsPPcXJ4NL%l5B@w+q&{8
zMWqGBaADz!TqdZ7jU1W$vzf2yEX?6FbreOibt*a$K?=UE0d=nHYNe*6&YZRbAZtvI
zhR(8W3hIkq->9nn(lL1+2gB}}GsD!d3WWc?>SA>G8G?N795f|UN=N2-*#B3+!aybZ
zt@&<Xl`Xzr*t?Z2R)Z40CITOa?{l%d<pB?C0_3ILN}fr{+`xk?xf9Scjh-DkANfnN
zhL%;k7Ss#hmHEa=uag;WrKa)EXH&3QeF_MIjvda^2PwYj0lDkEG9mbowaM+Ox9LMu
zJ5;<LOwsVbK95mWh>d_Hf{yf;;+}cECe{`@2+f115ZA15AUPT#8bNFur_V9fgRBP9
zSE^|02<bV}I(W`>gCbbDPF}>i<7Y1!tNFxz#ppm8%$)Z<sVyH3J=q&?2A1)qso%S7
z<<B?3@RZ-CJV#AliKcg<p0ZM#g;55sJ5}8Dl=7~*$PGv0tGp~-UZEKk!MmmqV>QXG
z`^L3W-=sc99k}6Fi@bmzX~gX69ueAi>c5&cBe!~x#FoU{rhK3)AB$vnn+S);>w9XD
z`>qZdo<GXGf43S(`p_fQ2P?bj4^fF)h#<x)MK!30E!c1=C!Yh~yt!uU5wf6C@<}P1
zvX3cRq>veCI?Rcos)9==ycnX++TkY{NSiD|+?~y%u=PjkyNc&Q^xnTxsMKgw?FymD
z)%EG~I?gUWnSGUo);79=)pWA;^&3on(bzJu(tpmhvX-2b_%8a|x*5niS>{0lslB9{
zSNA*At`02u<9k5<%MtSd2Qkb=HZFdTlBXq_MwM<@BGl~-?r#_TodddMH2pT^T36GL
zV2LOaCXO#t51Y5uFIe#S;YEs*Kd!8yoTJArV~WNj_mRq`rRS1V_xSme<??0*N#eSj
znfb;LX#@Y+bfT(2O;r0d+}V1unFvM!_uW>WR~#4UBX04W^Acrxb%sS!A0KT<T@SBM
zBG#D2KTy!qoZP?+4x$~9&-}WDA_Hs3nCHrkp|oX1%$y|w1u({?{iR3~ByaHGf*H*4
zy-6-}Nnmwgw_!c+`ep?ekub6Z97{icwaIbmAh1eEu`)+mbZX%bp6PIjr<b&`=hAR8
z5ZCo&%U5&k4&Jy8qquuM!QRjG%!P-1^~bk4j!_kzKj)Ty&3@r8-_NwjVy6)(B>76X
zQ99i<Vx|()LYFvUmPL}r#TR(Z{575Y$|lQs399L?MR>F#6bg1%IozJ$-6gyj<C|=6
zrUI?@hkT!D-CC+|56ZHr`+|N3?TkB*GIRwoqy??#79d8+pQl-!XIFXFG0Oh_IOq9F
zT-dgF-aD)om3olJ+M5g3hGQ?mz&Nk$v>WbkO^#l>$z-XNFOWAoPVIY(*xPnB<u@m7
zN37yn?U-D~?NXE@awqj$O&IffjW4k4bRkQ5Hi)F&KQ)OVQz<>o&<Qwt%vU2ccyC#&
z?()a$iF|_$U5}`)B#7SY+1uG=R?!lv)Ia=)?R>y#*qbgeD&Q6uF#AxTL1;b+BLwG$
z5fo5DQNWJ}mxkiF3io%;@+{EiY{7e*o{$x85&b#TPR=Oc*Ua%Yj|mnl8~e^kP$Gfe
zXLDyRt05`(LK*VIog&<(3)pmwphY!S1R*5;_Rz{W>h|XiESS?CSA@67MEmPMJsS4l
zII4t0hR&Pk`M0t{;Pb+I<9EMmQh>4~mciS^j_Y`0VyI}b9FHUK8CQcs+}fCqe(o$+
z?p=W2J$+CFbtd;l=dLH1)j6n25@dpk(V%0xE_Klp?M|Jq8nQFNM4&zDpn1)F$1`=4
z*dKpCV2mO2Qsr0wx4e|eqJ9!wuC_~QA^28PH_bJeZ7*QkxSB2t+_g{yJ$S`YW10qY
z_Wp}p^%=T%i_);>uZ;=96X&4UC$-K{_RiDB6n4+yYoM!o;eI{ylVs^OA2Y0^wg4`P
zR}q;9EHp%%wQoXELeB4_EP4eR9@ZDMhEf7ubK|Wy{D|sR4Na?ie&uIg%4)IxJU~#M
z_NY=rt!#PITlg-6hUb9HtxN{b;GQ>Mf6hT96cd<PSiW)dz~p!J7AQwssYWx?@zF-l
z5BmnY-#prvDRMj$1QnGpcFj#;n%?14*~<jgO3NHHa|gAIt91TND5YBOa|LO9Txg9B
z8b$x(UJyujsa3Z2!J2>2#|ONde2DDBaHNWuOJcfs)i6PO+<~2y)yyam^44Mri;5pS
zUCHb=`p}!4FDDW3S}l1w?WEKEW~k=4{hN3^;aKvA#r)%ADat(GV`gN0Gf?pV?rsf&
z;^-<f>!xJDYC23h<`Q+Gzw$RO>Z^6Q@b4!ACnimB1HRfdw3NCXw{uGKEYV2M=BZe0
zLN@sUo*H6eb=i-o2M*(Mpa>3g1uvvR{1SZIHzg?Hbn(LP({j``iwGF*VE!(QQMD8m
zj1At>ll&lx8Fk5zj@a>>)!b0mdZWV1?Sjd*PgH=PI+2A%kOO~=VPkxrw^D+&#h<k_
zdz>`Bh%CgSx#hU}pxd+Ebyz6ha;JMxg==~8#QNdvu=2?bQ~d%vNI)AM$K0Mjuw`7%
z+e<Vsq1m3?EH5vSlutOR7yoM_CH~r@?oO+St>KkdK*?}}Epn9qMC}fAZS`-o?+0W1
z-y6|NEmG@Hkv2Lf)N;Kv5hr7=`T2~a-4wMSN4`L>^j(70Rpyz}5rM(K&xC(h0CNa9
zR<hk&&Or#)JOKo4OG^Z%U3|6w;JFjuxc~Hb(Dt-BIs*0fc7qrK%D@cL;Q47fC=aFM
zvil;S{s-1iz&wzcJ~#Z!a11f9b;I+fZbgsxU&X96U)!@dW}Hai^L&08fXATU0L(#0
zt3b6@d5&xJ@(hLRDD|J#Pk=$fj?w4Gr{-B%ARYnR3nUtScRK%N8V{s1X%FG2t>Glz
z8vO<OSyTaYjIn|I8unX>(|?!;+81{qNxSkfROU+ulO^co_jC=4W6#sRU52rzg3l&i
z-s|Z-1ZbfM#1W$3N#P6$wA~Vy?DOr@3@+Jf53%5fDReu<?lU#25kGM$z~Eggqxy_$
zR;mM|JO2S&7xsss^*E9v6Kv|ff4|-=7k>P)+`?b7McBYLMMP6pim|V(8e@*yBkme_
zx0FWeW?Ny;QlXseJ73U~$#Zxx;`yOPv#d~j<JmIw*b2rStTpVGBoNPZr7AQwGVn!w
zQelsh_8ujy_t@Y{(|A{n)Dq`{dMA<Qg8TgWh4O)Jmy*NN9wVrGmL!^qU|nuiuCT5N
zY%G9>-E>wgAlAvY+6f$<=Qg4D0*;rD?vxrFmj8}0|GS=b;Fw^T$h%-x+CIV#fAt<(
z#mXy|9y(^bnvfFgFa6gP?5d?ga1l=~{9|bs;HL;&GKMwsPQ9o7M*~6~+6u>2m1hgQ
zV1sPFLh?hPKnG${VBy&t`QGQn`coXw3GR^+z*V4%Er#`<gJE1<WX192U#~ekJNwZW
z+W61kPJkflpP0jcZl$%f*fVVPx`xQp?;{j-Q8bI+8sNU`VmW;L=c8(cguFAB)zSOt
zy>t*B5gFjGnDtC&HjYg4J}FJUPZs9=D1kWfXWoMkXu$~Pa8(*`TBaz(^ucZi0K0h{
zm5_l@93BR<f&gO=c9_)(PZmAQe$@sFYXvVm&#w5_Hu}<LELS;soE058vxsmuGxQ@{
zfo@B5FRk6IGFh=;K@TnTwjc?#L&&MjD!BMKA2eI_I4`siHGDOwq;Qs~myX?{k(3xz
z!d6ezJMudYT!?tqCXf@smI#7eg?wO~m5CiYJ%oNhoNE4F)K3ilwQT6G>c5w@aGe{L
zcQhyTSUrHJ@Hle~w6tb9lyL@L#e4{tqc))95>?t##ejxZ%N<39nxdZVm7tRL&Gpa{
zC`I4t{`-y)7r^QBz}|{LM3ABB)O8W%g|*j8AH4FEIR-QeC3M@tzj-AW{s+Q8D%2kU
zPtP60D3g#L#&s6%m^w{^`WEK9dZPpVAjyv7s_HJ&DK5u|K6F+zQs$t2-h3CqF`R%+
zKnxC;Bz4#8l!WAbW*;Qs0Z<9Ou7NvAE`<wJG1GzfP3OX6Q@X(OZoZikUX#H<JOS!~
zRe^=U-Qvbo6kk60K(@&QDQQaq-p5Ms>aFcAtc4fj9<e10)}?`jS+VxJ(rT|Xv=@1p
z1E{Zj#JHvryc%pVtf7YN7C9HO?Ks@$iU&f_F(PAt#v~}rbgv5Qy!qH0BjS;0orN@T
zp}DjlOToCBB0))h>Cuq;%e@3~p0&YMl)@s2s4AB1VFvtb9)wWq-bZ_ruLr0_OhGtM
zdomvk;6bud(8L&#8;|XjUq~lx*GYr6*G!<)A2vaY{pn$)VXL?m)f_u;_|@L-lD+iE
z$2Ze7uX@3h*Unw9ZyUQ7Q>WntLQf7gMn!fFf0<^(G#}?>fx10uTe2m;V)2PJ1Jf@&
z)Z1<Z4)ux+bM`?n>3|yj$;uaF@1KsGrSLl0^%ulT8kWn~Pt8ZiTm50(VLoELfK8{X
z9QWy;{+<QkUyRG^>@v#l+&gRD6G!}Gl=+^wABYHx5>G0n*r~(iT`jI+g>jO@KJU8Z
zet`H)If&f<gHe3VQAg9eRTa7(rJYYv6U&VGM7`~&m9NgWQiJP&2$^8M2ndxy^kQLv
zG$@028A0E1Qcm`E%CeoZ&H-4(EnxBZprL_WO76Kr{SU|6-nNm{Fqlc0PF*7zPCKj;
zGxHNT$ou*3XWE?FZ;%Q$TWE5Kd7|>Eead}{7AmGYP^1Xk2%BDr_8$Gv_7d&*jTQW;
z0x`XP4Z2u1aWtGe?Dz~-;m?LPVX>a(nnUI{JJQodBm9qi77Kb7u;eLThCRjl3S8zw
zXGr!~F4|mMlX4F@s~BMIj50kCTmkZBBr5pObRS5@jqFZt){Np}%SurWpbWHZb$43*
zQWs(lN-)(r9-&;-n{gve+M9-ag$_R+84r2kJk3Y;@W#VHZ|i%JBEUO=iN!o9tj>~F
zLj}_k`pDe&U9U5A`eLKYx-PF_McK>(!x)IG>awGjE$%VQS4c$W)^x8-s;CqZ)=7Q0
zY_LWga`6;L2SYGDuTbvo))xM=ptH3~1zytd-;T#1iKdkBwM7BbUI|Dl=wo-G9Xo_W
z+39)Q6#6k{zqRMCRx1b!pruv|EJpx-+CRUhgNN@}d7h5i+HEsM(_n^?tHOXlcL@zF
z9ZX_=!2NGWm%ihWY;1zJefPjexNX6Pp@KQT&y7maFPVXs>L#uFl^h`NpQ~NXEY$`o
zr26h;0n~YBmxwJT$8h$n)S2l~tBV09<Sd0_z?osB<4MGsp8BsQS#j(@Ou!DsDfRw2
zTo)b<<p}r_8<&d;{4cvW?T%An@*e`(-X9R-h?7#GYV88V{i$o-L8=v$R2~@X1#=|O
zdKyfLlesztjG*fP<d@s2BZ3U;ySI3z)5+*{vThpK9v)nSi*NkHEIv<(ayi>p{=lc5
zRsT>*HT*&c{^d>z<9liI{<6@{4?0%Tqq#q=e;Y~zv4XZls~2FGl3xmN-!1on9;Ijx
zk%OxAD1T)Z{cKxJ5VJ*oJ)~RIh;9ViExZfNkThU^Ja|!4bIZSxSK+>S&=9<msCdQ(
zW0U2>TM4zyQYXB8a}?M3LwEu9z6tAwKA)5WIHeXQiU8^b$zu5eT-2_cUaYwnH@3t-
z`2mG|o^=uPolWdDClPagZioI${dT8){YS=Q)|AP^)7_s*gl&voou{7W1vA7}Q5Uf?
z0suoUJzfHExGYOW_);}W64xWIx36C=zss6uEZzGTvG2gw6$jVJA4^ZlWhYKxHP_zt
zaQ7WNGN$swA3x_w$;MUtYW;1wO4wWHmPH*CzYtRVC6v@SYN^PQG#!Iqiuh=le6n#Q
zdx--xkbe#=OwrpJzHzpu6~zJWPXGU`tX>-)y4DwnCrqbIEJ462>NGOlxU1*BrC#5s
zFzBSV)udKMF!PzN;Fs^<__s+UUnF}%GdfDT%Hlrj3Jh2^J1JB%T(YXOGJoTK?_79k
z)>s1#Zu)MYM&-C7*=F^$)m2nO{t2PSdDS#WZK@Hob#Wa&b-mQ(`mFW@lPb<*;ioQF
z`<;5DN79^_-X5lsE>BvyUmwA-Jde2SEQc*u2OXlXpKXd%F3UMkNgTl*vx|?F3K=X<
z^YPhX>xmw>l@d^v0x~<eE8d`EKLiR|i~ZG~9u#BZM%BGN+_MzvnZ3dm&#_;73)Fh;
zA58b$m%UQl()tktKQF}1iK43R-fGi)MA|o_3EyR+36SZWVa8#?vE>YbLS9O(nZfXQ
z`|+cmtGY9repcngX$PBBJtyGHBbi+5<Ayq}K(keB1UBsUNHc60vS7plxQsW}`d8W)
z_5_3<*%E6_!=C}$V!3nB%Nmj-fHL>Xi1AwUkR{w1j#C1_n)!qiJcDDOoK<0JArG6I
zRyEAR_bV1|8XZ8S9T(kQA3P>$Nbr=-cP0-*^#-K}I1Gt38*=Jk8AZWt3hr{G*!p0M
z?#jn~ZH=PvWvXpiGBC*w!AZoO-gsU}IoMt9g?iMNuK-8njnpp&Y_=mU_F)+B8jAY8
zU^19!)Ul}@p7t^xXk3jNw%UO$+*f4`I5eNd;T1-J8&bLSzbt+PdhdRS=-0%LTdKA>
z=K{M5hfN$>G&&D0d;4y(Sd>^Jo857mI4>1;#TP@IuMHi+^5Q;7RlB6_ELby`jnc%0
z5sltciF?mTeN|HYCQ}}T_RR3}-ISVD7~T;`oSiaXzu6ofg{=*$1!xFA&eS2<ZoHE^
zi0)<Ce#Uc>=+D2Pa4hDnK*s>fkV5~=OX2I2h7=_|6~FNqGtDgo1xiQ-jP1x#{rFoE
zaG4LjnwRiBYT<mVbF2(y3T>Ggtr}agbx_cc-^Ph7N?i{5Mqx3lpeHYBlms6U9f>pt
zh4Q}JYd7|{CIY#;97Rv7H@CcvRaCB1`x70bo&V+gTV|ZYB!uCF<^6_dm$ba3;S-w>
zQzRe%$A|Py{i&x~Y7_-tkJ~x5kdv$NwQsJw6yPbBOd3thX!tr(Ha06*Rt;Ae-P}^Z
z)mTdKJ4y&4mx_&v_j~vYV8iV>6IGCY_*E#ZKNAfShF*vBM-ZGyp`3i3`x&ZtZy~<@
z9w5x&V*BVBDOpklvov+ltB~RV%sO;>!absYI{$-bI1>p|yPj}ED9o0^%m{yF=g%yR
zfwd(@qm0j2QX0-BE=&40i@J9QO~$gwgfM<J#7ygSwm4H6Lw}i++ow~pr!sVDosYNU
zBhnZR-SsNs`QeAq1$W&ccgwBWB9<yB=ZS5}R;9GdR^4bi6*WYAVAq^yx*4)`Oi`{k
zM0YT-3anRew{!V43X=TAMbUGZ(Sj-Wg0<}B#-iU8@QHKM!46?Z3&7OmsUaO$m$#xC
zyoftqEA;&RMbENdk3(S!<NN$+h@Q9ff@KF*9z873Ah|Hd6U<MLAoaw}AkJffE4S#p
zLwRd^!INgWI>Wy^1@^Fl_ScdJEo+%S>|g<_z+(OCqnqi;E~g2%fs<TwTuy~j&|jB}
zys8K4pyOc!GJ+;u*L|kKevZwcaRUzpmskEk?8%tnwez5Li`D)Vf+vx4+tE!Q@`*sn
z-jaWkh*}TAFglyhM_G_2@8P(ZiK*v4ZiCZh_De`JhR>C(7CL!|HWWjXxEDRQwEKQ_
zJmwcHH_S`dTaU-fZO@y-8e_Sq;N-3#m@y?Cqf6S600u?|jz&tKsZOBP1J^+w#+0t;
zs9_m)Yy>Dlv)f7^*ZYS41@7DT!(0TWkFwt&sES$d=r7HSS&A$ZVJ`gxpLzIMC@9lC
zyOUwr@yez&ZMvHNCarl`)*eof%vX4u3HCpadL?|(r&sC5+47rNFG-{iRut#NLdiX5
zf~jtTsV7$ExvAr!)`GX3(q1;h7r!8Ybx*6UevhR$;WHB^R-Y9iL(e2O^9!uO<{G4o
z-pYc14}Y88yQ*HWOYB+e9u7M5fj4TZG++2dOGEhwHW@y+_>&emP0jBS>9sAgWTQjN
zwzOCZ`n{+y3A8Lq3KEgbyY<wB9yje;WPSs6M?dHO>g{g6>9!^9-OI;E!giX@-%ywQ
z9hQKP%v<6$Qz3NX=GsQ7L6DR9y%c+AdI`HK{Xg+oTT$F$#Brl<l7%r9gF}?KFvU91
z!s>o&t^SEF<L*PQ4quuo4s^TZ6h6Ot8#$l$X1D7+UXRw5#vU_p50H)tfJ2cHN23*D
z>txQC;~QW}D@{*N&)IXcwsO-KbzE0=9k}kV;aI|fSt|!Yj*PV8=L--T?jPP&(Kv|6
zi+=yM+c95o*g=ki`_|{W;5*MIsHXT+!OEfPl3uG(P$PZtK&5)8ujHlCTQf#0b+pfN
zu++qQXt&A8C`e|>gnhXrB7w?33sRbx5;_(NR4R}qc*BepMY7L74Ye&LT{dduN2ZY`
z;pJ>mtqN)U@FPTv$aB)R(?bvKt;Lv1&>TFfDvbQVYPXXh47>fw4WjzE-1ar$MMEGE
zh4s!KJgVhLvP-$ss?PSHgF3Ou@~1lqq*Y+^n=6ohSB)nBHPO1~H7%_&>?J?#CrOY2
zj&{scWuT7G^}&e0w&qU#&HrXtVyMm6J2gKqc8~*!!WYAAkbJ;KmrA2*R2+I){S7Dh
zz5K?96jdu;U->BoyMZ)_CoVPF0RWdf(B*It!C8#b*I(WJT%2P;UKmTblwuUwNmxhw
zX(e*{`D-*;_?%0YXRh^4Q*xOKs<M-oj2MWiZ{ut`<S(}<YrV04*ot4cuEKmILe{Lf
z0V9{&G+BV201F>UlfG;j3%2c^k7VYF_Ms?V^l03H8jSi>6RWOB<6lwn>f`FrS|YfO
z)NuR4gS1(Hk>B1Ub<Tl*v250!U^^psSt{SQ8+R;}9~#F-cCQsri@dc{%4b2B+{f~`
z*3LkKObShLC*nkV;XsRoP=ofU5N*|EL920~8(p!0B8G0OI5hS_)iCf*w9%?f)&z~U
z0MA1?=~K1&5GB&zFtBud;Np!95-hQ7!uCbFZy4ynQk~JiyD9uPQ!<C%?}|em{tX68
zw3ByG2E<?gyU9YR$}?CaFi*xujNEtg(i|Vez|xo9UgDT(DI-|1_&AFe8XX6%wzt@H
zo^sY2Y);xWcV;TG-9bLO#%8m^owB^0(fIJ7hl{|6PNznoX5<?M-<7jmX20>oIXqk}
z)BDq-eiD4Pcl0X4I9oSinhjGzCoA;zyaI)Fvav(Oj5-N*wyBLYA$BrwdYIXnEkDQy
z3v{t?!&blbB7K4DrmZ9pPXmQ4eldZ=!+5`{R;Q*?E8+_mx|FA=?X!!lhq25|)!_;|
zWDR4L+rN4F9<*}3gTbXD)8{sn@31IPpu_c~a}^c&Qr4LDQPM^^V<{G%kzEu&%@!Te
zd+BDn+m`hr5+re6ePsiEH~Xb!F+&JS(K~Dm3Y4W}c8lj2+-wYsH>tCEk_&h~zY-YT
zY{k_~)g|m%@aWDaMN^PT!{xQQ>h>#aeSW!6K{=8bgn0R-@~!V|9eAZ&6H6e;a}(tM
zKP6ik4wqkHNS9c*8dg7JW||52U7Mx$tkSJtamZC#xs&$0|D;e$wQw$nS&1sIro&@@
z03D+MdY(=A)K4YCi2l1%@vb?ru0o8B4B4{Lf_JvH(Sbl%@dIG-?PH9I|B4oFxQs|>
zv?VXjL=Hq-oE&o%b$5=c)JjiN=wZd+nKEQ5!Im2tbJ>-;a7?!<7Q-U6gtC*JF9N-q
zFhnU7R6UwvV)4W}!=GWNxE-eFnYI`Z-p4HHfW|QHwBJj7fK2?2;+a`VHsD%Pa_7ac
zT>>SymC^5|HoGjArfj!ePmT%#yPag1%&%zAV7uUe&)oT=Zv(uf?tBM8mU6~8y5e5G
z3eIbf2Uj0heZ`6SrH<y3%7nbZLUPdaj7A06^HTAvg7Xk?u}pId94b{or4Lf)1gNtX
zmg@NFzz<vNFO+u;HE4C%yZEBzN7;qqImqcv`0`5yP7M5smlveskw6SEdQ>BHJWqY{
zx(~BE#K~7Rx!K;t3V_@h4zw^df~9yQK(M}bj{<2eEqb1JxJk02SKdM{^V5NrAE|lN
zbkQw4k5PcSlFr$rq431wiBJKKVXN;?%Dj`ijdivejdp5q#sN0hw)uh2c5=RB*d^-9
zPyXN4{R$%p><Q19TbW0I*H{~_t@d4*B*x<143$fTgaS+l4@{nps&_OKRt$<6?_gk`
zTibi>w|JMB)D$9bXTFYms2Ey9f9-5I$<8cLE_bcCOKKY@L>XQpvmmP(T8dBO2OM9Y
z7E>;`2&DBBuHU9+JlP0PEf>eJMA1hsHQP-5MRA0D??Ldx+;N^Zy<2pSBm^_>i5$vA
z*-B%r{=3_p&&rf*!}VkKa||Qzo?<ZJ-HeAxi!qLkq>5{EIYcAB;s<%R1v$OdoqcC1
zZCK|M<1)Z8On6<cjMt`Hg_~p4-2qBtQ(2H^*jCf{pPQx}fq>}!2NE7Xu<G?rgrU8v
zbT18;jr2@Re)MBR^o^HIF10R1K0c+|dj?24PvH<G#cO8bU&yo2b1DVDZHg=0)^ODj
zL*ZBX93G##vED-#gHT3~T0qFh>tUsi!h`5(y)~|_ow&@QKDd=zeEcJH6j2s-HdsUc
zTb8Z&yDqpM{2Km>mT|4cJe^q+N13R2@i4;6mFSq$r^1ysUZjq~t_s!3P>mK`)3L+i
zmp2l_@Q(CGUz#_xy`=;>ACcM0Rr}80oE*VB*^%GqLS&&u`6HB&U<OxIsQUEIbl3Yx
zVbW95$4-5If_%Ald(C!uC^ax};ZLN!sF^l9UB`TU3DkBia=yg6_CSku?dX0@W@i?o
zbaBe04swg1r471~D`x#qH>^tSOO%N!CB5ikR7p*rEqSY>GRMcDx=>#ni2P#bg4~<E
zGcpd*aZ-3r0AK)vb95gb8pr*es>s}q+>^vj<zZsvie3Bv9{=;(&Sp~tfn8V`PZPLn
z>4yD*ud+4cI29=+7MxL>KCYQhoo)U;@HFs}cZ{(p%FQ%IQF2DeD3}37V+-(+aTgVn
z!ZU<IOn8>?EpBKt=pHa!GlK8;mws5!=<p37Ml1S$sgyKA{v?MT+_Oyoxkh7-_ykiK
zzUoqAfkD2Cmu#IAv%_|$zT^2}Km1&utp|(8_@>;Hi8q$dcWxNudeoHshz!9TuJ37x
zgv*@Cn)>O@kArH$#>Y|d^HGJ9r&~ZS2*LQOJ_Dp}t2U5~Q(H){=<CLN<(d{96k9kJ
zR)xl@W7JOBugYI&ZlKBg-V5WDTZ3~|HgNxf|9#beGvK}j#t`-cUw+5NCAgHuD`IQ!
z4`)a!I%{(kfC!!_UR-Sg0hqr2SN|}xK5i3JH4b-oetxP<wluBZ{T}7gdR`$HjOz}b
zE9OyJsR;>+#~+X>4aGh?z%`I=p-IC957x?l)7*hELux>1cE!A`8S_V6N4U55x(v>9
zLYT=ZKyn(K=s<y$fo=N{{TSON&xFZ!qj@Rfh-u7SjBzJvSJ~H8cVy@}8)`kVHs8Lr
z7l-a~?pvk!k*(_x{h}+BLz!5PiA*DwMjSHrv?eotcKrBZbBB=sQRxPzd9W!EoKCi(
z-uSnqgF5IL|Lcn+>GR>XYw0^}UUEO?pl6G@v}Skn4*sVRNc~E+=_S^0<V|0w&%|xi
z#<s?TmZ2hTvb3j8u=_;J6z@jQ`$juGeUuf#WH!ENgpLfD&6fXmdImMP8hm@@w#V({
zh%~dW^yv+71Azo_I?2PH-Q5h}8<y&t>m+8%a6Ic}j{$Wo(swnj8|-)GE~o|mw>wgp
zj|iczKV_ou5>wo?;nbd$x7wyWhMP9)P(?6<fU*gDk2^_N)VKkWg0d8t9edm3JYGU?
zZBcj=Ul?O?*Zq16nG7CyKl1O|%{$YGcL>dblXq=@r*&ZP!%3u#pK+;rhBpHWREI$0
z1ya!5ILh<C_0;<9JYK>6vH2QD%3r-?E4@--4n`hx%RjmjTRHDyc_rwcY0!MU9Se&Z
znH79J{BT>x_>dncwA34~9zpmkpkn7@Ld;hfNWRbKLvPD}PL+3W+*T%ZsE^_SL+k%t
zw+3)9Ts73ivT=$Q!?6lpe|*aE-%7soN>c?v+~tf7-6r|Fcf9@x?*er_&Z|zHXLE|I
zwW&XV$?$x{lo~LCavx`JE$IdCaUKb8KE*3I1Eb_XCu=cevd_VT(r$4>fB#8G((zK}
z)V^dP1VV~>4h^5)&Sg!)UI4Dj+x={3Uu4$-^$+Q^Q-Bz0Y!}1+L!Py!LGmA8fBrv8
zKM+ffA5HJFGlDxr9vGTXN9RDA;=_Ds(;lTXGd1ZNsDexNMSP2>3CHydomt;Qy`ASt
z0H&0KuR*Z@rVT?}OFX*@=D6b5L>tdKPE|?SZC}7c`ADAkTmdPIdfz1Lss^LIroasr
zOIOq!%q!~cTJ54KzIkndvqaJ#ZScm(J1BSH0`~AGevlTjCB9i;^3`B(*wtVw<gxt?
z^b#{G4NRPHG(%&c(d71|#L_Pgi3)0_on9jO1tih}_M;L5*EIX}6D~ltAHiZ*K9`}4
z%$u*rJUlO3xs<IBn?WFBAhLf1Oq~pAisb*dx9Gt6PgD3F7Ty9h#gY1dWVh_Ie<^V*
z9J8_L9`*c4Iv5y>iv+85{b2%Bals=XcYExf%ES;JF1DK}HFq^anNQ)fo8-P7L1-W-
zZf6!I$*@PDXBN^7{8aYTD<I)ziuJt`_C7}3m;^rUd7>e{tiGj=53gRJe0`K-#Z*4;
z-vKharQ^q&>hhkzz|k2jXPRyr=yMS>*pXKAnG9gs>%4C`e_NFW{Z7q@ex@P9Gws{4
z=l`52&YQAjPW;x4&wgr4KwQkK-r)niSnTH0ed~u9<xm~z)C9vgiLN#6GYlhok;JlV
z4HlkDuYhrWGbWg91UXj<-^aq+Kb;q)X?XEzUH@GAlf0kLo7*Nl%Ze0Xz_z%x1+Z<7
zALM_2uv~+6zR@7R-sp%!IxEEMEKAw!QH#yzZvIGo6b!O(n5JwGPljiOcb<O(HQz*m
zUWOV{fF#lJgVVXo+5~1fJ*3)ar4kJN*Fj&@_v=n3m&azf;e+Oj#WK0yw*G>IaGbd$
z&~CqrROWrnafVV*?o0q(1I=D`iwTOA8nAp7i5KLzFF?RQ@i&Qe*q0&G|Ga*wN#bAE
zd98L0DpWUdM(cEbC(kDFeWRlTW)Kd4E)!Mw74%fpk$|xw)^-zvA;A*RO>ukLH0@i9
zJiD?&-c?RY?gY$*|B32+XMox*Ozu7-Ayyyc2XvL;QkQ8&Hf;HyyXCtvL0oo*7maGa
z{N=Zuq3Sf7eF(!6Tj8);=sxc_IsTP)bEFIh$=da_`oy$FG(#9%RWj`@E=%t5_vU!?
z)H`_fk%p+k{t6`;<uQejig1Q_EB)RG**uj{Gx&UK>~2(pcAi#KoQLX<MA=~6yFYq&
z1-c=+^O))?V$AW26Ww*il#XBx3Z5Wu5|sHF7}CUCWP*UIa%31AaAxljQvz_>k}XO(
zCV9Smc+?L2uFiPl)vY=_9T2RRJ+XRt5IBfykZv$U_H>i6XS%}~>KUO~JdvM1^933i
zx=m)4h2(Cenmv)fRAOt``P#wIa-}Oee6Bm%n-`>~y>1!Zb+x0jUSIab7JZf`BnttU
z<@IR>?v0AF<&X=yTEYe2dW3TNA>L{G0sxk=2?tZurkNvNR7=ES+E6x<kvJeB_?#9Z
zrU)C2QfH{gDfap_UCzIxM;maYWU2_r-NhU2LPhh&CJ>=GU=k5t8jit#f-4gBhFEj$
zCUR0>87LFt1E2^?x;KJOR;M<}_-uG3!!v^^d@RKCH3*Nf0%|$$#om^Qy$K%v(){xq
z>=c|E(vRRXm~vQ2W`bDl-$Cau2@urHR1eG1L^ZqRWsLb8x5*?1TLbWMb-gN<=%qM`
z05t4Y|Lk37e@1T+mc3iY{c%t!GKDW!Tw0pN%HNpEN!cR+^Pq|aJqY15Q8fvAe=Kiz
z30nAAiavkpHkBr8%kW}scLxYd8YGZiTW5-vfic!nejUSHr!o+D8>=y1x*3JQBVZfd
zp=3{2kGcD5-|z@X#H!NfljQDa`kmkUOh$9E;xZ;RenQkRtpASk=4=w#WfrY~^A48I
zI1c2hA3xQG*tnQ}<wj@aO+n~tRc+fL=2~M<J@jRv^v&YcGx~B`mp1dRT4dc2<x|Sm
zv}eD$3>ETl1v-NTR3nAvF=`bT66f2@ZPi_P_!-+`A%e$NV`thUE~q(-J|DaM7&;>O
zgzw0+&Ou}etrI}p1ojBA3n8PxnX-aJ^dp)OMYsU`b{pUU|2UEL#0~*Z3T%Edy$4c1
zf7aD;0HHeniMYRpi?<R8Ao@(!?y{ObDNgooU{ndL>jPijHMaogXr-z5u`Wf={{`rm
z`z@Trt1p32KpX#l07zn~wYe`bu4}*BaG!s!aW$d4{4O7SN3xkth<3vIPmpn6cNr;N
z)HU58J^u4~5F_`#(xg&C!^}}u)fuICvHaPCkqOL%r_8lb4)7YvpM6uY5v&13ZB6~Y
zTi$q^6;DW?GxBErY_Xi0X1%2{B`^_KPO#Ng#iv4)h9m9Ykge)~6USjm$FT-CE$*}*
z<f~;ExhohLX^`}^+$|k-f4?{yfqn|P4vfC=H|&7KugzUzn<|t3@_zvAFr~sCkagba
zF@YfWRdJH*eZx=b(3j1TxiBn|Z|NrXM+-bP^pimEpxw>nJ2eQR(`LZI2H$=6_x+%w
zp^TD)d_Qi-o;?CPA9kmX{VLXZiv*B8S8M{EX=t3l@AvElSkT2~qXXj4kG;U_dh~Lw
zWQn~L_OO}Hz=GK=(S^*Y?vzN;N8DPl`q=cQ>dKJ82V50JxBIo!A^-qI+RDPSlDD%P
z!8}&D>Y=~=Od6Dn{+o8E%1-zRnQ|#V9K2y+B?Q=SvB9<8pW)N62&2QC<iKtgi&jSK
zugAPw!}bdK#X2u9Y79?~lxY+Bq4(fo<b~7DpKNGN!V^EX{0#6CdQuz<ftXSn!na*J
ze_&OZ_=(dV^?OS8?WR2LWe{LeITXCy^pqiH;$G_jx^UWJM+Iwm`!~t^BJu@~CU~cn
z0q(Vf5MZDXrFI7dmFH(*LyQ_POk*!<D+d;8F*^s*dH~LX*%>wgoJ#C~@uj=*p9*wG
z+kIV6iT`<fDZ>yRMVtUp_J?P~Fq{C(l1c%Wo5)c~$|sL5K)Dei&R9+W-0EB#eiz+e
z<1iFNSUCY7U--u$Krnpv(+1_ts|rt^v@zLij8s+E9e>2bu3>L88A!Pp(~u+IbLffk
zS0iDk<NG-oy@^baiHqpa3%TdL@XZ5u-z1pcUl6V!VkRCZ+C`W|7wiF;b4{cfzAt;y
zHpM0FnBUCB4K9>u=hOqfeP<JS0J<3=uW>5Ha`B4!Cg6FycB>2}r9jWh+b~L~xCW+<
zGCHQo`v9OsTcy6|+Ma~HX(QJD^<LPyTyQ^!cJkfHsa+W{@~B8_1F)zcn$NJX>wD-f
z6PkTbADYX`H>I6qq_<kAC}+v|PKUdzxJvKN9iX72aG`|iyVJ5fU(k+R+FJU}C}6V1
z{e6H%)1>AH-q~)+-&6@rKUz>mizW+M9{P54yG5WdJB{cH43YEc$Ss1fTPNDce`!H&
z-1$gBe`t~Wv!b&!HK#YVQ#XFBh>mYsR6HWnU+KQcLM5o}`{oyBvq&EOfbc-`SaG>w
z%5`}F1!#eNgygj$yt!R}k77Si{B`g@Op{pM#B+hdf03xmEBWB4U%S%hg{{@KKeE_K
z$)7r!6o_$^^jf1Z$?YjlYMSDW)nE)t1oyuUqU-7ZRQx;Y00iD1JRqd$Zy8v>JP#^!
z++hlzHb{HVKL_LA@m0>Jm6qoP3UcCnOU9w9!W-i`ArTxQxBrK+I(QtVO;G;lej{wS
z0Bjj1TvG?%Ztv=YME_M6*;%xIPx)3uYm0Cqsi!=(`zc_+BSv03l{ci%00So-g8^30
z0@V=@EA?Aiiz*@1KjX27NVh6b?B6mM|07~9^m>2<&pVYbKrwh)hW9fUOxqNKwId2Q
z0Ye5)vbY1DF4esD{7=*!{Ayj#q^dyPGQ492(mX0f1NFa4!B_g$*^4^SQTT$}z`LJh
zxN!Y0|G|Q~wH>MahkBzC`p0bN6wh((>1R?`Im#i2(8taM-)xsiVr^jspUm$|oA^3L
z)RX+5>uxK&|G1A&9x>QGg0yy{%JyNM<iZ%9<bHRL-n_uA_s>8G!t3I1xxCl#RoW?B
zJz^CC+m^?b_;1x%SC!SjFH0Vh5QohhBZcY;vY^W)Z2kmD&1q*aeGID2oLS;wBr~YG
zOt|h8nV$Jok8X^-K?#yiOSoFX@4l84JxaB{gKk}TJ&u%~44S_7qyI2Ej4sol6w*Kq
z%nVZbRpd)q^6}^po|*s$usu@2QZP&m>?79x{+ncD()lrD9G|xcm^(Bx;vS#i*c~+G
zLr&=ecbV8=Xu#{4i)VHp<&Mal_+IyxrT;$*pjASF8t4f4$n6+FM`6_Vt(_eRaO|YQ
zB$c2*2eVj=zQNoHVt?Apa;W<*ti98L&PJFz8yQXyhmLTr!-O7Z2Ydpxu+nUX_|j7d
zwfXJ+607^pcP%;{joGZebW)qCC-u43#zn{}MbGS;bYJv`xm!k9hUzRITjiWDaBsg7
z`*D1uJ`f30ZlE)Gx0NSiWD<#~PX{?!J(*{3-8kka1iYAXBo%QL)^o8qRs!qp7QTOf
zG0w5^PA01gLz<hbKFTJLf;c8cZj|lLIQ^-+>3l8jsDG-UsQQ2Z<9&B``|MC(7Gh9r
ze*Q5MeW|>;;onI_fnCb5#-sOkF2;_Xy{>3eC)OROfJ_)M0BdvmVfRPNA79dv2xKe;
zLSyfci7@9QbN*IOhVmW~?m(nn4b)LTZuyyDPe@D$3cIEbVQ3J{XN|&;W6R$fq>m~=
zL|8#C<Tge=7z+E82sK9G$GBUf28J}V%=5Usi(X9T#`F9FgtZf(=hZyWP6N&kg~uew
z*AUwp9IJ+E(&I2DKHE@Nc+)_m4Q5BT-`OPeX#1a;eKlR&L~k{Tzu09r*mp^5YaA#B
z`r<hxeR4D!C32wghjcIOIh^{8gFD8smzj)rOezZVtWKYq6X#K$4o2tiqK3Ve$ngoE
z!KIL<h-&~|tnW~<7%2b1qWf>s&2fSrvHo~O3(gSog91TylXy?bks6$i7);SK;T!yd
z1$3$@Uj$bADdaE66<x>Ctj6f#LThAArQOZJ&Og6wAC-|hDgKMjXX!?d5M(=>t9Lw;
zRF@N)26E^63=`Xh9<s0U@YS}BjU5FUj9dj<r^P4Vi%pA!&7JaW@SAgBPfl}mYjx;S
zD*C^{dVeVntKV-({rS<kZ8q{~y=h8e1blbM7lv1$t!v27-ZC*K+mCclf<W*F-tUi$
z+h}=Y$<37LbYLgMs!(5>^44cROHPM47({0ANytMun4bVqytkqqvqES+Q}47^WS7<?
z_>*m-`8a&zoZvOOw(8y+rRDAB5}uY5PNeS_C?cVzR9?1OIXCP<H=$);W3WVZtUYRZ
z0Rz0OIlcs3(npLa?=PI}zw09oKVU-~t{-`+N>0VH<Ld0eXA9ap7yZ@ocY=@Al>eFR
zltDPQ*)>PL6OZ+{e^Mg${+(tW(hLm@=6a!b^{XCY{g5y|_d|6`zh~Kir6TwLW9zM=
zqJ01FZ$P?1I;9(w?v#-3Zs~528oEPTy1S)2loaWZ?(QCHcy2!5-~a4cbHK^0S;O3Q
zUGKg3Yo}?m{hU>mbJwa)#z(r5fr-uQsg&v}U&hTIVR`dzlt!drw706TLtj)o`u)j7
z^I!RztxuTZL9Pg6$o6kE>Y6AA?c(fSv>86pm0`M%0)fnKER%(DLnh!Z1Lmgk__ptL
zO^IWixe^gd$Nc4qr_|A|U5DuP_VQ<yJR*z1L&KMMYuDjEqg$W2|5BqMa_m6|>&;+>
znm&!J`7o$zR#uZP53^}qAGxuiWqloQoM9CxeYI9=gZ`^~LSIdyj9w<Zkr!MI1=WnN
zZ7gO$Zk6$FKZouGEG;q*Q|5VNp0UVkig_L5mwkolB$SNk^$DUZq)r(c)VWdgNsfa8
zBb2Jc)Ojbv`C2o0W0VRdiuy6CWMPneL$RJ19*tEYK4^n{sH~)8*LxCC5}}y(>O*0T
z3Mpm5U!<Yo&wRlm4}29Gu|0}{VOA=(Nz1AjE(Vw3jzX^lTBNVC=oTt#|ML|eQTJL{
zflC@ptQ6%~+lqLYzvkO^4Q#zRx<4=Xd3N$dDvU=U{tmhZJf(eFes5lIJG*f<Kqcr-
zgu;U@0C`v<!6si4zVNDP&40(3VWU-dPGc^YfofUVLoNmqAl}eF!m7j*a>}*fqr$|B
zZEi|~5}})sm7$zWj<gua+NvYVsuoc?GtqS44G)gBr9j`1<u3_JU>sOXMa1B2rHM6N
z-H^|R&vQ1+d(485L%2Qiv0X~y_s)r+TgyKkGdHA*a1s_jlsV>qoV~8aL4GXSgFecS
z5YPAAZ6O8;fm+0(`1(RCd8f$=46e1TCG%oMme3V~HDg5@86Oq~^Z6YVziP#E@3g;0
z6gfOM2dQ6S3{ZhPG`Jv;*53?;Y~X13|Asn1j#kzfuu?WFfdO?k4KZ7xzmx<Hb|6Hu
z5pYPuJ6BD8IMQ%@e=PLFeV~1u4?_$GW8N!oZ0C2-k~e0Ut=8AFDg72eBxp|(eeo_k
zJ4(F*gcbuB;|2e!Z3|kGPGn;^l0s^zBxe!vPsU*e>G}lUsR%kA%g7oI2a_2qNm4#+
z@Azhm{X(j%67Q_q_{C?rsi+QZWMu)-;4ol3{QFEbGyiUP?s~$PucG2f@^nl@v~{Tr
zmKPl~F6e(xKlnY+i{xwb`h~&oIs(y#uaK!0Z%D;bP7m@gp{mkt49Vfb^0c1EKMIxP
zui*(I^TcNJXAoQ7nECr$m3wH=j>>-?<^)k`Y|%yV9#4-BQ$Nc)$v}4sdbH2rhnI7Z
zBhvs+_hqcEkfK+fd-Bn>PA4#A3b&molk4Px5#BGvZMr-Mz4~<Atpf&fQk94@@lMvB
z=c_oGw9Yq2$lJ?MHURUl0+5+2ao>W4mJ76bbwe>G5_d%-a;K!b@{{654)Q;#)|#!M
zkPw3EjvJMBhQi5guYI?X^nPdas}b$@nuiNTFRj{oCq<!GUJTZo(J^c6C<S0{OV5*U
zccG}!Ii~Etbgd;7i%OS$S&EBP<GjWIS_n|u2Ddrvh#Z2E9&Fos$JI9oPM<%Xay-~_
zt{|>E9t?rE+xgF_AZ(praou8yqTFzv(6t|3)XXOToftw%qDtKy%y<UCp27Qb9glkl
z6}Vg9VbL=nw8AeTXF*s)TknX4#80ec@<%uIWYRgRNme9c%4&^Q*tY3_xV;C<#8hDk
z+epfkVlDOuFAY~v^??P0%f=sN0!*0zJ7aE+n|w2;vk@9xB6TfkTde+aLUqe$S(+<t
zs}*v7n6{S;?K2kHtitp%`14Ene{(76y)59C(8%H#MhBJ4!qCUkX9$!_i)v0y`Dh2e
zF4MX#wb^*tzGHESO$pM(SQ8b5cE5c#>T>=vJZAa$z+ZD)i`?|N9dEdHe|-^LE>VD=
z9cPqXN4;X6+#UtubT=uz!r%Q4;!=FC`m3Uv?db=|eVIV>|E`g)L@45@yOb^HZIVxw
zTG{j|K;l&bhMo*~{@O2Kn`aEGQ%T3ol9@;2!4+k@jSe)j2Kv8mBp}I)#QjyLjh|&Q
z1r<*8`FI!@DLYL7m2|?Tv*wb}z3cYM>OrZIWnp6VSG?Go__EZ&PF=3q4-$=}pp*oO
zoQFN8%kfitolmc2grx6RZMbu5ripB09<)gFglKM`6~s=R+M=e%)+=eNL{-9HzdT5<
z(xXB!F(5HLTWmE&E8RIDW#tj-=W_Fr0|ROBOkb3&<1U6@yb+zURNoCh+4X6F2-;(S
zh!E@lqI4;uj+C=EC1<lt3Deh<fi*M%1Hl(FwE)iGe!lk<_78%O2`Vu+v(V-5_f0~)
z?Ir56fI8@p^zUk+)(9kB$llsAjhTld8o#XCy7L{M?V@JW=7SLv8kdUlNby)jcsnL%
zKpRYp4Qg390Rj#Y`L`LJlGvb`!JcaWn#_le?zOD*`AI;Q-IEscfzabN7R06pl`@es
zfTiuKqk4|onk!Ugqw84n?D`}Sy5_){X5CRiB&1A(MwePC<KU$AU9ju=00AVX_Q>4r
zcQPQ($dr-dY)<H8K7S>#->!NTPJjMe(fQ_!mv6%UkwJMc-z3gTn!86GKsmnWeKHYP
zi3jcU&o{pDH?~|6P?kED>82R@ml>GFftI>#0h4X^-i|Ij=k1q(34d2@rh;o|GYK&$
zyF|MUCmLwAU5!O9b%$Np)(RNWhJ`?&Ex}4cH|<a!_NNV${W3NqW+iQo-+n|;;uFjV
z^nbv2CS-WT=M4PQ$^i#T4vOl*^YW_2oEz#6-cp$mVWxuK+Wd1Of(N^jVDr2x@?-uw
z-u0bFO3Oa^%Pm-vF1}~~nRsV?1e_9!BhI(;kM1g*HcBqlam7L^qW9K_!KQ|r@Hv0o
zGgABY+R8C~Eo}LugA7L&$nWLC%I@SGp68@tfxM864~?Z=k}(RZE?4?tuI`o{myZ%~
zzrT?WH4`x=>SOzXe(DU(DcT9i2dktnd{169AF_n{0!Y4R8N{y8i1Nrv!+q9aQ`uZ4
z`e?ZdpSM8s6SnTVKrL0T+|Vw+c<{jX2d#zD@0t$r1yIeQMi+Ur0H;ewQ{9*F8TyX3
zWFKa*AXKA(6vX_YLPAJ<a5|x`8P<SFu_6zb&HXuW=_XWbc3TLCCfjNJhtF@3!QbFz
z@Juu-IJ#L5h@v@SL#E)QYB-3jpZOb!d#sh~J+HJ-W-4QQFdi~J$h)!uU~*iD>#}{z
zgLG&7FdqB=i;)34D%QZxIuiUUKva_UDq?se<r>~Q11{wyn5#GN?8yDijFKm8GvHL8
z*(|-L^w4ibRO?C%YPSQ18rHLADkax|aK*Xm@`okzVm2g=_ARFnxo@Eeh)bgUfFk1e
zrKsh-A}@vg&k<TpZU3UbMMH6<nU(W#upsbsT6Z##1tU3Ee_PzUZAH?xKj(*&x#;)_
zJbq4Y1N1=f%Dt`4kz;7@raK&aHU5D@D=wl#UbWhWy}@(}RQ0vn?;ue(Mp3TQ0(rPB
zUX18qJ7`Ag(o`!1QO!)K_FjnB9C(58n404TsmnF&k-oya4T1C`Jn*5i8#h`$$-@-Z
z>O2utXT7boKPH6Sz8!BmH`qfdLC(L$RvczH4_r=n`TRU^VO1h)5|C8?+8swp`52ok
zEznPr5*$$2s#fG872fHaJhb*BCW7*RYhWFu3!!)Vm?KdOe)p^Ue(X5_Z<QVCqq(F}
znbIyc;2-9xa?A7Da0z9a7hlp(kI(|($Bh@tm4&A6-10a{fTFa&w!H`5j#qlCuWbh$
zW7Q;ioONOpNiKH7ZEpe*s?Uz`<$t}{Ks*U$AIrYX@RJu{HhrIIpJ@1PH|1tX1oG(=
zNFC*@1P`%+9^}&IdM=+yIV|M)pH3PKKyOJqX%goUTvBn}Ydau&m$@3^-_Y@G(X7re
zXy*P~!)4VcuMc{`<H+BG>_?h{@(L~2-N!J=TIE7J<6u-&>FiJkZ($lLpz`qg=lX1s
zM2DgFtniT$=V5yrD+rP*6Cf2NbKw?-@hWpJXv0H;J}S#t)#~{C$y`?x_9_G}0)q=o
zkd$fTR*`{+A<^F@t6&Lsaz#k<nW&+e&34OJvJMTHa#(mXH&QW>O1N=s{v+{ceaQce
zU@@>x?{&-)B$u#;(z3rdDG5ImQQd!R(ED%o;)Aq7d!n=M|8nA;(nF{H8SzP8r`fF0
zkx+}Cu4uu~=P$8S4sfP-cLVvTYWcTlTS+}Y0w@8z-b_NkPort)beRFXp{wLKT06iW
z0UG?mh#d}efkB`r|3O|bzkT4G8BlssKd@WVaV86B4P7i0Mv$wt8Q}W(iZTq8rr9zf
z`_TidV<g1acT3;k4&vjvqXJZK#VjqdaBQK=@=;&aFrCOQkO^86d)tWe^_7G9Cxkg@
zvt7>?fy$c)+s7f1IJwvEht!)Mj6t0*0YS%6oTDfV`n^H;oV*P`3n0_g2OKvXpty7q
zOUCkT@>nZkp&-p0J}W<$hnXIkZ_neGeudQWJ+=?~m+#Mh%yL6oUz6g<Bd?G2Ahf)p
zYhaVX@aF$4x(sxI0DZkz_z<e}+4TPWEkDZA4c_vgAOt$ub3LKD**Y)8bEgLEsVDCI
z2@1xbjDS-sTZ*Y2$VOpn<^zXAnN|BKAs*JzxE5nRXeSvr#@7J!TRez(uGasyvhiw^
zNsmDkt8W1L7%tPIrhbqJ{yC&%ko^;1B%(fFQ)44FGtD<=-|-3Q`LdnQ2HdCAez%oI
z3chy_{&~K>z);iK{~HswD9ki2djRgo1%|v_<`4R1)N?Tc-@HR<D2%N`zmA|XKAM^n
z<(G+8a;?7hz8r>ZE6L{){ghc!CLd<2dVveg^byK-+4^FgMMhtSTJOp$E3=1Dh9^-R
z>2erV=<+*21TTDfb(m%+8uwc}yi6giWT~mXgQT8DjE=vTid3f3M-W=XC6$)hEG=m8
zyw8&fbi@^xD?sowOlBGUWci@S%E$rgI&5SEYYClQ{{m3HmLgid%7ER49{a~n>|TQ+
z$N=S1ma-_+hg%2IwyMjB%8|DV++(rTrV8N5Hwh-g>Wf~O4^^}Q0%VKryRwHdOJR-?
zrt@UTn+DQRU>U^Ry=okc_Qv}{+xW3-9>z=eI4g4w1R*8{l1K>m-!SDJGxDlsF-EVC
z3z~}jj7;BV(qIvrP$DKMJQokJ;Q}Yf=bv(@=WQ=YAAn$O4iH_-d6P&M-J{Xmodc^t
za~*thOkCQ!Zzy6Sft9`^wU_qyIR1B3CPDdyv)cGP?AY^|$xJvHD;#g~T3kk2=b<tT
zeNj|dMj!77&cqLG$jQUlXE!k&@$Kq4PUVWp`DT+7k*wK;Eekhd!q-(C(*PhcsitRh
z&<)UpivM!2Gy6&T2LS#7PhDk5gl$cehptmT+>8Rx8hr+?$bpz_<E9eiSXK4z;ZXSS
z)8C3I46j~LBllk0Mi9yjZkj>Ppx{fg%chdGEx&ZhFrX9sUTAhx_iI9?BDx`jgLTNK
zb|25xZAMlfie_!&HSp?tD4?KxEEPgV`%jr`S@W=cY{7GR*%!G7o6Nq4fPgNg!%o+6
zuGq)Rj>1tc&^2;sp#nsQP>m*~1K&Hk-rPEZ5{{qOXuEVG?gV#{e*$m~t9P}>$z6ws
z?bdd3L-@72D-X*+J?<m9{C>$l+|510$(w*|lRLK7cVSbt|Lh!`G69RrgZLEU&LgI@
zsl!46jAZNvG|VZ2blS{%x$Md?Wt(z}?FR+XsHSiA)bCh**S;_(o7cY`uwzREO~jg4
zpp2lEM0b%g%4hVA82?sT?FMKZMF8n`9ci+8J5HdT)p0hIoAZjPBX5@Rc5CP7A68rG
z9()D4Ics?s-)3!;KF@GVd^`-pR!QeogRSdUpfur8pc%->EXN`rlOl5rB`Hpa)KtZ3
z8MvpgVAwgx5b-)0w*&Dg(xWun71{^=f`9Aus9`S3xd4Tfvv#}S8St1S8FbQ1WA(vB
zpMO*Ac{z2gmcLwW2X95&x-$$t2ByH%Ai|ZrMd$*qmTNVP!jJ{@7@?#sMxYZ3wO!&U
zp=8{rp`)S8EhzHkU!g96yED&Uf%RwWm)m)z^FR1r;ioNfC?om=>Z@a;11^7Rzmowy
z3sE9Bjl|yu$lC)9VD{t4MLyLNZoxk0U9=&PyE!dMa47ipjrUvxH7=8he3x6`a~u6F
z9ZQrhB_cF<x)~`Ma!VPq&4u5kbEs?|z-<ujDg8VWi`jj`xt4)=&Tlh&K+tn2{;NXu
z@mGR$3DxVxliSS{NFkiWTYJg7(y_Sf{?XZ6ZB$$9S&z=j*I%ZE9OOa#=egmz`d%CI
zi8Ve-O#P~K0(B%Rfl-YMCyGEsqG>MNJ9%Mn!r>+{Xh#O`Ve(37exT@Cp!>D)Fs3K!
zk=efCFE<kT0-#%VouDsUs<MPc!Fsbn`%Bk_Y(N61p)tjv%X4v0V;FmPEJdDl!n@Cy
zRYM2KAs69qBem;Z#-L*)(ddw6<YNV|ge$&=n%G0&7IS3=rvmyhQha6-PMYdOT8^Qu
zqLjoRQ{C}-BN`~km`F`8j2CX#%!cbpPIzlPXVQDSjhrS;w7Qs<+K$#=n5E&4YEMSr
zS^x{XYde=n>SP-3Tha%43np>&wm;NF##I86*;6&GTTz;+^-wNx0<+@+=O+`F3H{`6
znu!S2TW>;jsBhdC-!(E|V$gUwNc@yB!Y?dcg<|rFFSqS6<QQ*WYV>fS4}tu|^a3LZ
z>Q<8ouVwpefV$5j(|&%u3%Qbdo>X`OSYFM_<)vc?A<K*<4OO+~7h>8Pf1^@+rz&dR
zE1L|nOPmEiJ7&ue(8AVpW4fenvXpTfEMzr#<baDDo9_=dm*oF!+1#Ad_epudL4;D?
zulFqL-M+l^^BK<`Mvt13$ehK%uiMavtVX{jeXprdL+PJm?0-s>jsw$DNN^Z%2ohUh
z)t{7Y4!mboTiMd~FL|y?XW6C>&s{OvU%<B;p=58c*Qs|_A*&<6f12Pnn*h4`b(+=#
zHaE4Xqk;gn6CF#@HIR<2Tp!u^6(7$~{id2h!o-=TpBgJ3>lIfX&l*E6NXWA_m*G&z
zbV%qrSQTZ8WUmz%v`Yw%vC)Q40cI2`Z%9BwY~!F(|JQibqN_1mE$Yb8hV$+xo9R6(
zCED2qnrZX|S0YOV6>X@`;5LrR<(c2@$$$vb0*#3s)+j6l-UvRO58e=;UfpvX!y^Y=
zOz<K9rRUbZrqFmR8|uMm3vS17Oas~RGvA|4#?dh@*~uUjRx7AT+kZ;_SMQ|}DcP|b
zX5||<)-=e!L^RV|(p&dD{B=AER~{D$1o)G^?14@1m}E3<9=gqT+bSIj>o6mciK+rC
zjF`mhaPhph73aWyIsl*CK#F*JU-+I+@5QO*DB>Jw(mo#6@O3y&@jBQzf^+y>{-J(@
z#Fp}FrDso3%!I(H940}LifSrU?#o^Y?JVNneBcjR;e)@uslnrw1$MZ7yJbyFJQ8KI
zy)C)(G%=(CiuHw#x9$NOR><!3aLJ*>%|Q`IlPiKBVrT7i{!FGCzuSNVSHMO6^73Qw
zTr=Nq>>*QR(S>z?NYN_(+wxCeWd0AV<lO6{6tS(?`ix7DH$O|;?=6S(d{Nod#3V)G
zfz5R<%qyy%Kc1e7*BBW_7&Qx=g^g)`E|LABY)bru4Z8@|L)jkeueP-vif0D<rTe*l
ztSl{C$kw8+t{Gyo8q(VsRb*ELAC1ErtCI5p{6DXs5k_psdDjq1U)-!bzU;y_fLSF2
zZAH2Py(h(m@qGOC7AEdO%FYbnfeBE;-hxELr4cKB5Ku+W{<8VjqcW1csoh2|==J)}
zdSm7K4a6m4-#8V<_0VwKzKc4Hk?^$SnPyu&JinSBm*`;b4ysfNXt*tTwu6o&rW&Yb
zY?vzwep)AejVgZlUnIa<3kC{HRV%^g>HzvS1Zp5Hy9?&%efI^_)nM{9{dw$_n4g7B
zhuj$DHc`_1|8$rpAK91wWn4{d3a$EE+NXcpTQ1o-q@)jRh0n&-G@FcJ$M_R81-A)K
ze-_<l6i#)+mQf;=%e5t47!yK%QkT?x)f`pf=hu^%O2UG^A`bL-SSa(_@r~129Qj5H
z%#%^!3JovICMNbDWQuAALp08T8bbvUa36;7tTyps>T$Udj39Z2vn6t;kwkufMphE6
z%)4-+JPTWC=II=IILf07poHahCVp3Bdh4g)N#nJmwG>qQD^!!FNsI|v2wI%pjQo`|
z$)~O8!$*)o8boM@B}$^JxSPb=s$2qk$9mD`lrZ+ion_uzj>ABk_n+49=U6Mf82~2K
z8<4)}Z3tbg2heiA?*sipN+OddL>T32Zhn){;YA{^hxD26jr@0aIi35xuJh+IRh0*0
zeb)W_cM@Jp3L#!*OP?`<<~;SdR=s8v4o%ZZ76^rXu_0w@GAz{6uZ8EqBDdv@5r#@O
zRQVIN-BR8X4cI*TphEeK<q_GRN#stBU6&=ei4?iI)M%6OqY5o6$=vI9t}3>`M&+K3
z&3{MwSouMBE7!NoVmOEBTAg1v;n&_2Q3lt-b0>Ikb;EEndMwu`bR6>v6|R86v9-*b
zRjc$=b>MG}6cjG_{swX%0y>kJWIcZyAI!AFUM<C*4Mx85qx3lUn0knm0(PvMTF?uV
z(R^Gh=zADC>CsoE^|JLOMA0gI9h>rET8d~@ao)|WI#V+!E%IB-DM|dMCd!3Mkt^by
z_p&%9QD3wLAV4qFk~_<UmE{8(5kM}z#NxqQ(f49amJvVwd>z?29K}4}ccxmiY|zD^
z;ScQKJZyJ^d1QgAUn1O$6*e5Rm9C>6{I)7!4&OKtF6sh*43iIk4l^F0n79DjijF^8
z#ERHa(L2VcQQLh-<mlAU4#kZuq3BE|n&nN)1Ow%_mZrTmbBDGk;k%8tuq-G|cw4~M
zhvAuzZ|FN>@B@P>^r?WqWMm0JM|PHeCEmAymAjbu^L^&}3VStc?q{pB+d}eE`<7+e
zSRx_*zkCZ>WGI|JpAr=PB$XU{wmtlu!-`Lv7w3KPUvCl*yPwWIGvmNiiQh}t5~A4;
z7J$2K^58J%xR(mnypE^69|O<mR-^KU65-wmagz+_`fsyLZRee=x3YhFhrg#*waGVK
zKY#ug`Mtk<7MVhSOA-DFRjy?_bi6d*z@_Ug$L0oVJ`)n}tDu9<c$(>UjM^5<{XLWl
zyLU0rvIky@(bBw+cOaf4MhXlq%Dn#t29>ENKS0k*L1{vO!iYK9Q~-YP&IPZ1UXK}S
zB^UwYPJC5&mn^FeiFi{)$0_H14Na?{i4Fm~naudv1wcKZQ%sE4!$IOnwoAzGdGJky
zcPG?o>(zk2$DRM_yG^`)A>|65@=PV7LfQnRaXKxSs;*Wb(b>@iTi|u^CNXK2bR53}
z!^g*tW3R6QTTK2$s2-ocykgL(v0P}(etVAr=E<l~<%I%)HL>C;Di6oXnN>sNe$4h%
zuI%<u8_<qS?In5M0R~+NJ!J^)RTNBCm$QVHSvgX^&LK6FefY?mXDK(v1Xcc=BSW`3
zELOQKs+=2ax@|`3X%R6q!juyt_rAIz9&@~a&(1i*J$13M^`g9SO#cu2$9txX89JPV
zgZm$rgGEX#R}9xfa}g`4l3`N$x^we7W)MnKn+jdx!mB|+A^z&7JN_ff7`qMqE0uA3
zw<1=y!OH>n@X{GYK8^hQoG+d>zsM}m@N^n!RzJKKM%e>(?#HWh98E{KJk}I2N{VoR
zfUOMveGyQVUe&%8X78XwA$_lEmbX0fv_8CU--e&wvrE1`A;bdcbZ;+!g2D5z>ze>e
z)p#B}xvMB@6jHPmAiSb(X%_}Y+c>^wRa$?XZz1UmcV$MPC#$*?O&^JbYr6e_TKUb>
zfgNy;pOa4GYn6A(2+kajd~%$PsMLK4x*cCil@ZVWvX_)*)I)ZZw9qkKNEsNf3A5qs
z(eXt=_Ze$Le?yu6XV_Qh^1+|}?*SN0{wSxhRJt-t^<RSAM;<2M)Im%q!=fAfMwnUY
zF9A=(Bb~Z4%vVo-=d1O5<h_I@AzV<5lNs(Fr_<$xubHoYa=Mxp-d4+tU+9L0)xn1}
zVa4unYaG9`^S{9}F|ZwoneKj}aqM}D19#fmetv~)GTs8-!=g|#@eq`KjzfwF%DN30
zl;Q;4G_1nOj;kD>&2Iy<sTe=|+=ySHj(WYKW+_Bl4DW~U?-wm|W^{2_LNH7wXTJus
z>g-VQERAtkoi&cR{3*)B83M{^&J?NUUkV=BIw$*aN_&_}?(G^TDmo7|Vnp5KId?*`
zOkh9_v!BL42Y@UGu`=irRj80FO}+QpV=}L6&Uy|bh}|~M8>AriAsVIsO<X7gfl{eO
zV-Whh+;^wL53_VrIm$%ZR?((GDKIEvep>*Breeafo;$ir1;@$R6drvyObGC`_yK!6
zN9iaohS=vNjY&8^%UA?twES<^!c@0z#}n4^MnXs;ek)Fk3PkwZEY1J=&1W6$w&A_0
z5;0ApKsSXyzK=faY6HtgW$KGBsR1nHc{8X4e9eF3Dvuvo5Y%C)ovo46NX;!VN04xD
zdWI+S4=Jh>v<*?Vv+^|;AN$-nk(7zPcep@T*oZuWzecVK&~4VbfRmiimOL`gi>Pq4
zyw}98i`{!#YaVCWvTOZ>SlezA8`p9nRqc;zLhXb6VH#RGG}*<<p)Ap?se5F+TKp=T
zg!>tmXN?D@(Ix2?AI9(O4s+8WYR6)G@>IDbDQE5bYd4`>I|VdEK=D=<m36%m(lJ*}
znjXQFD!lFcYyxjC|9h-sNi}cDV^yg?LPvnTULJ$7arHr0x%CV`ymVLlN14e71U){+
zUL|-JjC9yotY6Z+<Ij4IJt^q_Kq4tzZz*O@ndmV9bLeP2blRfi*!7~wFMF)1X=uIM
zSWGs*0{_k*k=-9$mNXp0&aoiy+$KdBt((80@h(Fh!qslD4w~T_7=Q2#LOam^Pa2!a
zhedo{{1IU~z*GuMeCamNev8{xA!oNq)wk(v`%_GUvxD4c>9`4YLNjsZQ@v3TZuyVY
z{GJ9~@2rM>d+xi=`Jz-_OpWnY_db`cKmH~|<jXh4nnM1ApovFmnb5p&E>t_HPKm@N
zTcnTwCfuN5*IiBZ7`<}(Z+YD=AZjQZZm7_I3N~@E<v8HyajHIQ-u+`>1-@|WcOI(r
zo3;p&o5RaO(721Y(jZvyg>$!Q9yMkEBt$W=4Y(LR5PvJHF+x7*+PJvos_LqBEO%E+
zZUPK;6@CpY8Tt&*Zm~m5p(?z&K|W<os9M3S#%*+glKZu9Jfuz=5bG7dXA%7ciBBG@
ztTR6z3Jh!kw-8kq)}6Bb+_W@5SJGPRPEsTyjgAwRX6HaW#@44zf8ojH8eO9>=4<7n
zn%~m~2)+nz-^@%O>duavJ`U`5jGQc#WZK!Zwdqv^#|UN8Ks2H0KRWIIJCdOX3GLNd
z7pG|5#Sla(CRspz`|zrWMcu5TPEe3s-MmzB$Yk^qWtACqi{KDDAdg^sU#|LDzf6Cw
zr5F~NOncH)QgY6MO-2bj+xEmAvsXb!tzYn;DgI9Y@7<9JSU(xZi66;d018UFil0pI
zQ9N-6%DUNT5|z6+h1>Q$D{S%gg=8^QN=%#Z(WfMDS5`fLn_5qUm2+2|lXC#rdNHis
zl;2*Pr5>FO^oP0LuLGgBy|ld?D?VHO8>x>Fe)EG2L8bxm-l?UK_**<hu3n-vOb&HC
z=L2q@`?P_tbt}d53KF=dk&4QS8Oc5D11ryS;9`6unHL03;N&92VacnOqL;))^336)
zde4=46DWSbK4bp)_-UBRD*N9Nod3K1Sy~O!i`)+_jle-#&eWN^z6Vzx`rfyqK<3#9
zaNG~{NkPFryW!~5FW~jLnx1!@4&l$)%n(zfSdLa}YN4%Kf1emsYPkujb4^H0tmF?S
z9y<#&akT@qp3}!+cC`ov3O(wNe`WIM)(N@kj_I-0rNs{vR^yh^I^$>pis8~4vA>jk
zeeg|g1JbB>U5}BRRp#0mdFYpAc(XF^c8o4!?)>?To5t~NLCG&{odC$!2FbX|9$ubE
z_-GjE!|XrpaH25FF#YwcDpUetJ){W1^Vd8NW|(!=O>8D?<%@5hRTGKvIFsdqG+8u!
zCCU+ZVd`ukHE9&=IA412OE_!it&Q#Ljy?xQHnS-6cus|Z)Ig_{{Bp0&cNT-n8e%UQ
za#7Nj&rg-UcZ>U+*WQqrp=UyMp6A2pp+fF6z*e&Roqt>Dg}}Kk-F!ZsYnH%L>0xFX
z<^He8kEtXAk(Fhz9`ziTJ6QQ<2f{N-o)c5<@tUvkU&ch81={pgi6z0O#74|>>H+v2
zP&R#oqG*qxd0lQ~7|0c@p(dt&n9Oq2{~xJJ_e;|J3+HWaUX!p3<$fURv5d%Wytbp2
zZgjY_#3SH;x5jtB^G{!SOAP`+1FyVgclji_o<=6@14Wp>0WKcA2uVaD`xEWmBw0i^
zngLCJE1)i7xrm|FANmmoiG<!+xvJ(IC7=!O`bg%x*cfx4VC+a!*!$Uxqdlh}e1h0K
zEBv%s?#=ba{PS#0wmtT&lUAVnv3N4M)Bqxs(j%GA^Lu*#o?PW#g>qg5vH{lrp9^;i
zpkALnSncSy%D-TH?SrnGwKvqP#Vc2qi@i3?3R(qoc1UYo``9*XdD)kcf+(8f8Wm;s
z^&hQtk8P8}D$PZHXf&qY{-P(>9*-EFH^2-ne??Em@+HAOK+oLt{#UD^QUP;<a3Dnp
z73QM=Ri^*iJx+!A+2%*hor-p#$5+rOQ?Jq|SbYV@WY?S&9IN-jco0nnx5nt?t9r&f
z+#T)<7d`VQ*mjK^JtmMUMBQo|Lu97xDh~t1UWx$GNgXEV`w)LNm}|cGsFBHBJJR<0
z{h;!#olgLJ+`eUXPcp|(vgJQ~5j`-?r^g1@VSgyFXy8P>Q#`8;KY(H&6GeYx7?lQB
zf4J+HLQ)|djid|xB|lw*a`Mg<h}=<&1J!pKA(Zvm{c5$;X%;Z=V~Dsa=EH7FPe_{q
zxFEkeS0C!z;Oqt<eN6}AI(gQv116|Cem)?-(s@5VstX$3%XPHwVu>ts1tv5`rffvq
z@6FT{u^mzYg*GQxf>*+E*{(ss{+9v_tM93;k*mOb9X4Uy=bQGoAzmC_T475)AE(N?
z<}5dA42o8E)<49A?q9jF!r>U?Ife`Hvqd#K3Cn5Q$tj|q|6=GtFO%(PA8+${2<EVS
zC-A9DmC;*~CvEOYDU@%5WmMATm|*kt6Va26O&eLoh}m!OxXc2}j);aClNKMq;^KJ)
zl@b?0pjM250{aFhveWz#EVa@k^epdqbW-cp;oSCF)@K^nKNasEv|x7Q+}vJACS2_3
zK)Va;prpe>9{<*efM4+p#7RQcu>}85LD}$u9|li=Alv^xVyZbd0o(PUm(a)b3t~3R
z;L=cwXTi3mimxK}*DFdH!@nU4KZ6kS4^mz){#ePMcm@x=Q&t4;*MA+N?H6zOL=g}l
z%lpRn<MB}>CT?&<f7&DgJzpLsNRxfRH}uMf7BKb)cBt+VhvOawJ;11x)ESLtANVQ_
zcUnH<K3k$BYrd~YI<cxj>~B@U)Z#h~Mrg>`Z}`<WjX~2XYYzLy|2lKM6%;aIG5y_x
z8YeoMwVnPH80N3d?8ow~Imy_;+)a88ej&y_j^cam!|?yFUCphAGHRL!r?O^eb8z>R
zDgih&V^cLGi%0^w35Aa|ZgDmN9qQD$PUteVM~w_HQ)B1i=O!%B8)_=$;zc@h8**??
z?!$)K;<~kC3`Z^d7{w=Np{g*oW4X1o-zL$Tem5zIMD#;F?j$XN5o)b+JM_R*1$S2%
z_3ofoq}9xGP}QaKpRlL%Op_qL*TI4wK)mnc5FEP!%4LAbqdD~j;EgDi#L=k}&iDEl
zJo<}Vd4?R62yE3#T#;hZ<T`GUs&{3q;Yo*%(<4&e9mBt`Ca0gYdT)e_3dN5;*~b_@
zdL#0MQk>Xf5E%12id(k=e_pbwK67A&K%LuGH>Gkj_&3kr0aEHU8Rx#WTu@dXN79`p
zF}I!ec*c$wz|8w6TMtOq9VY-0RkjM3qJ9rW4$f!!JEBsoYA=LaTb!3%*ta!)woi!2
z`$XY7l+Cx#zFA{-{CYxUT^5!Qh;l?r7<VW;@~k)oE3JRqC)IPjS`z_;Ex#o|oPTSK
z-g4n4UzN3u{bGkV>ZmMzWy}<wDC~{o7V;Nd<=ywK*VT{X#t36}xt%c2V1SE&+$aVr
z>)Sh#QP<e#fk^KBUxk!;%*N)3y|pCSEs};dwniEs`I6I@>;&v94YfBXKP+8JbBx?D
zxx(Lq#ed~O8n@4%p;m16n8<%h6~NC3fT*>A(ijOvnevXOiR`V~LOv3?Hl_8+61t<I
z?*Ef+p}G5P)mfp$#{_>8`0WfQXeu7vxBqbIdaz>PC`5>W(Z;f`0Nz?dE&2}x=yp{u
z;)@%JmW*`l{zWAgzfxzF;@L(U--+8usvXj9nuc~@ok{ayB@#uyn}=U%{3o{u0ITJR
z?Hd!Sao$9_nx)-@^GhPzDwWFM{(h7NCY%gl{j5v4PY+l0ruA9_j9=Vwn_EvBCZx%Z
zm%HVe0eqtq@6Nf}h0kK;N_Oz0Ti%01o@mGDk&$tam*}MYvk#r_H1P$#v%hZ=ayd;K
z>U<HaT262C*r0Pg{RmNq;ClP6sJk&CjOq*eP)uK%=rtXPVV4d*3$b~WxL6SvlzQX#
zwj8I(r0a5OB?K@%TNk!BmrSM)J6XC|Lw(h96<{3LIzmmZwAF}&yPy{%GTDrGl0esK
zBposOcF8xKVil6SZQ(`-I(qOI^+Bfb5ZaTjb(Zo#u4gg>j9j)zZm>X%ao{w#ixsHJ
zJQd<Yv=?TfvCwel=nFO5=nb5io*dOK33%&k;Ye&EA^(k9R!#OkS%EBGxem7ukp}J<
zp`+%q^F;O@sx#rAc9PT8qU^?4J5Ln~0RL^75xQUbieCYUb*!gAh}4~nU&LIAbS2$o
ztKqeRTZK%}tB#HkF?)h9@*4FlO~-=Jeoe5lnoOHAPWaJkA)f@l2Eb4bEf{nG0XPMZ
zU;Kaxt=cWiW63%)^p2XAU)jvX8yLl0S+`u3X)x1~r^E91Tlo6#LgN~td-o6VU3iUY
zYyA_vx^)Pp6uyQ`6Zq`0#V^6rh$$%EL{*H%AsE;`wu{y~ThtU5Lb7qM=k1>hm4L^m
zlE?}V(P4kS+h6vjk(8g(&xfT?h!6ce5)X3B6t-U&*Fk3HfzH!<4>Qg2*bTK~S5-Zb
z#~f>hC~0z}=I;OiP6~#eAeOB=dMv6S<YVE;iLIgX5?Y(8KJ`ZhS>qs_DCjwU^m)&*
zyla0LveumU{XMz8$U_#pK4=kF2=o{&$(ps|x$4aHz)$8=yP(Mo^zI(6daf(bWxFNf
z2O0r-LQcrGPef2BplB}1)&q(raidxqELs(8>u<NzI#zwBKDfg;fwPaXZ^W@<>9TkY
zIG(CBLvIG5$AiK{V2pgbH=K_5K9Jca9DaUsCCr_)3crxNig*!tOt>a_4DIKF)_T~q
zC6oik1+T|+A0n`*?1aQ9`f!f{uT5nNa6;=AyRZ+?Op<bMFXq=u!7<K}e*Hokc#-E4
z{I^1o8N-C}^I=I%iet8(3Dkh}yNvcO>oVFS_*<0|J~5p-@vY+x^1sD0z3q%4S<5ce
zO0OaRidh?hCn_wT+Aj5|>-vTvSgHFtq2yem>)FrfXAEHMThH(QMim9$on2Ym-43m^
zuFRtT=KJ<$g0}cs7iN5EEXFUqUu#9x^q4*)l>wE;2IOcXBlrVwMBzBH`JipI3FL5c
z-)f=XqWx{dQ_k8|Q6N6yU2wy@urieFw}wbfo?IxaviMv<B<0X}{%AcUfvEGh9bOic
z4R{=wO}u)%BHBJKuS2+#-rzE1a>fOw$B$DC(SuMzJoQ#GwRLmBg`L$|F<QJ&pu6sO
zy-}3kdbkr@F_rsSr(i9x#9#+t%SuasVDVn3pV!_C{7P3kALFASIE%3qd}o27Gyp60
z`Ln2KVj@Q-WL;}j$nmbeL-1cqzg~rjrFVN}WqZ&4n*5x)C)cQz>m-WmNf&J#rL!Q=
z4CxjNw;;r6J^dNqKm<q=h{wO{`9&oaGl`Z!NE2m<RTF5I6W#vsVzK2IW;hC%wX_N)
zXj<u9e9MQ{$?@x4A8|o5F-`-&Lr*xrKi(eG4UvO7A$wp!^vFR3(34>JZ+b1%Q#nUe
zQ0enaeXaZsu2Ql>EpnlobLXX>eXB*tc9Dr2uufy_nQUekgo(|rBoDkjwD4s};X1ZF
zaT@m%_63!-h6~0bGd>(sV;(lrSKLB|J`>p!TVgUfiLlcD1Si)2Ku8Ien1xOM7#fbu
zb}iOj*q6{_#!Uy!w^2mHdVE4Iy+!zxDj4{0$C3Yee>|JYhth1_F@<sY+g;01@W>}v
zcBDXce~uWJ2H+|CXDbxN46>DSb=K&8wfii%SQ|YIS#>G4{^fminRnUO6XhXM`l;6`
z9q7tjXgkc3bq31AZFl&Ipw#utZZ1wwnQA)=61;hD3nZiQh)%<=$NAW@)D#J@!lN+p
zt6gl6&^}1reNl`Vm+&SMlqPjg9uI!~MGnb3b?knyXamUSZlBnG`Efg}Xg|2>BBZcL
z%@x`)DsrDP5-zgbxBpxpt1InEc&=Gp$-t;BOct--BEmGtiC{lF?E|B#TN8ZKH0mwz
zT}me3*`#Wz#ni6pic$jU|1^vAw@f1b$joZZhs((KNtap(YnSj?{X_ZK^#F_z?&U3E
zAO@9QJ;sP_uejegOU9h6ehso7GjCSz{eG`*28_t)Wx_V56BY4u1W+xfg=!zwMg4V&
zM8HJo3bt=OxcHvGOKs<c3m5cAC<;y0i~ESbLGLVUXI59BdtCwE4zl+v#@pOj`cBM7
zV)WCzt67<-dK&V3S53E(`}4BZmwr<VosTjyf6&!id`<D8{^4AG8jVRLuiLWxI)eET
zmYpatcb_eRSj38zd_teL?eD^{^tG(n;qNw=-}wfgstXgGYS{LJYS|0`48-Gns;p2K
zv3~35z!8&H&8#(<x}a5_@`&*ZAe~n+my^_^t%HRDeb)tuF(8*3I2rQccc!L+@zN5!
z!FMUX;b_Eo389!J%y~R1DjmFCA)u_5@z5GhVP-;@a>~heYf|fl?-{7rG3YIop&e#7
zmEvY?cfoAl<$8SCL%UkDwY!K+yo0#*iIdu_Y?m$SDi;WfDR#4E2{jY44#YVAM+CTp
z$egI$oKwo>5k~B_R|mYY(QL?In82?kQ(_nj6<*YT>W<gRpyA<1i-ieLkxzqcPB{^O
z<frGA1nnibQ}aGmF>Oo{zh75+G!%Zgd9Tyul7h|0%Ii(R`9Yk(Xnw9-P0#ymQLWv6
z`R^rlHtj-~kuM9<smlJ(aWACVWO6|+n>DY5no|19(@*t@Bp2bScJjfJ$N!=<0>2_(
z`K>1}mwb;W3tw|cymTz{-Lic<0yeNPuMY+4!ao}2iB5sktj$EqwxN%Fj@7JDYKt{f
z=~*_5`!UaCOUW^@U8!O=Wnxeei~-4?hXef!BAgG{JNx@-i=r&dVDu;rWpyuf6c?&N
zVF_KL4{zvSxA;IX@p5l`k^1PCV^n%=gym1v4sKW&m{Wd_)cV}GG;AP<hG>!kCs$Zh
zM9`739qK^Ft^3YI1*;^_j4B@G3`zVeHHt~C$I+BfUHLr6uTxDtmloYnM_AW2d;+-W
z8M=p{pq||tgANB+I@_P+n0bcr?uv^MHQ(!os7Oh>3JlMZb)S_v<K@=%jEdp1R3$AW
zZBaJ6eT*LaQr3!`ldSZ|>7CXhjAxbSN_osb>4*9j&S7C>>$YwptmZza%S?PRc}|$N
z2p~{cG870j74jjf_6(_c^{oux*RNAZW!>uMhjUWNp`Urpk(Rs~Nqrton(&|cQ}|r*
z1yVu=^4{r~7>^4Jp!oiPDE~u8I;C9mZ^Lf8Cs|s;w_7v+7Q?HD8*x5|AxRtGB6itv
zwDV~=*XHjB3?{JTQiEUaGJaDX74^4J60sWY`noubTKK?dge0bvz6<=C$^4z_v3ZSN
zv&n`<OyIgP1%a$I*tzRR@tEX?v}2*Ht8p<=q5FAh-UjtsYn!#Q8Fzw;vW}<?knxoG
z@2@3T;=i^58bP9x$dmfRw^TElUt#gRor9D*s7os=f@2IoNjm&w|A1T_Z-sf=Rg=m0
z_-JFq`mfFpoG`wY31VnReCR}|HeLsqZk%!Qhhn86%xs|NGAIoDlbF;G(hrQ5xB#e$
zO4G1>BwT%0tIZb`He}7JLbTgMJbd5FKJE%Bo<^b<bJ!P_p9`YVZ7?Yt=Xg;m*t`wR
zUQ<+A^-`{Uh2k$poii(O;_`{-o^m4t;|#i&o;OQ);NjqY)iXB?c~>|7P<yQV7(nP@
zI&o_AnnwY5W`phW(j3*oFqE;jb7)WLy(J8}Fj4mp-#a}e-G@Ksgo&0=YzUHH+@|!S
z9biABR&?@{t{t{^L%~+LI^K8PRnp<KziNMO#CAowR!2Bn>q6K&yi;zrov#}FWra3c
z?}XH`&8pw1jDH(i=h!r5F*k}kS=+*>CF~=rlB|~gJY1k$O)P~##0R%>yRbVlWYEDu
z*m0TogL899rLA0HOpDs+Ax!F|94|m+*!x&9I;EV<S43X|7X@Y-Rx{4#<?0=TSj3<1
z>(`+kimTOw6DvdlnDjW&+Pmmt|1RCDe*&TGet*$bqgo3DnivwgS`4XUGxtjc1*KL^
z8kvA=MIxZ%1IH@RTI~Xm0c<$Q9UKNpQ+{>B_<9Yz|Mf(hRu3u7^`Cx$X+;l-{_5C%
zNE@p^5Ib-6eR6WqvFByfJ`?$sH+B~d8aOl`VdvwaO6x3M1bsZLKkbpI1{VGNgOlv>
z_j<$K9MsR%g)C+=KtptKbnMtIi4zq0lf1;B-t6y@#ubWY)54(O^5c*2KSKk^RdCh;
z4-NV;ybs|c(<Sc2@*UpN&C3n~i_nf8KN5H2U!Pr>qimS7+ubzDoFAe$Kjn`BB^-uC
zCYM-GWyFX<*cF|3!TTTGndDop?nKQ%E3W54(*?VlF0)h8<(8pR0o!n43~{-Fiw+Vy
zMm~R=Tqyfx@Z820x>I=|A3#TqhpG`RfvfHAEWxiU-(1bnOIS~0T7v#yl5Q>6_GlWC
zcycg*KUt}VnxW#Xap7EL{`L$Tl&fQYC4@>~_K+)b{Gc@j-rr`~A=Ccws8>8%@cmiS
z?;~HBC@sbJQAgU(9gkR3Ufs_YKYe*EZPVM!65MQ?weM^?M7`}6RFL`I=S>inu0A48
z)pQTOM0FIpzDR2KV3k__&aBI!GR(2bi6*wr`uJ&D(KlQA%DqA`b{rECvFIujsnLTQ
z78QD`ZlX>98Ap6c^-$|&<}hzkTBVmq9!y^G14*R{=X13OY~|)dS*gI1#fUEbNDri4
z^h#i7`;@@oVjO3KZzl@5EzPogwp>pMA6<j`b$;0)$xiTx6h(+BI-dQ&E`Wz{eakqE
z%xM__MNRF|9JE#h5B>#~jLQmh(+0R2v_rc)e7{04%CH50WLZiQ_aR2cu3Pd!EU}C-
z9H$g{&%6etO3PG`>k(ak>FAgUA=c=O{0+rXV0-0|S0IkjzP9ORKs#*28dk}ZCxtk6
zLfXfqs!l6w+Lo{`5SK2u>4s;Ngjg^{*mW&TZD4rrr?_~^zM}f9Mx6Hs@6669>PQPe
zef}l?S}on}fx_wh%pmgZ8G3nG6fPkycSDCA7B!G4P_%U1pva-?aeCv=gt<-w%`_%k
zgvW|Ck3mCy{<|tWx|<3DvJ>f#f<NMk$#xAI#u6#Mv%<C(j$Jz{{*u6+S-xb$P#b%g
zzfXkDroN4gJUoiM`~=I50qeG;!JYCAJ`Fv}L0NQBn!bC=JXxpZ`dP<$VSG0Nj#Rvs
z#jq1G8onjf`=g+Rap^Mor`)tCPvNzGPtisg+M1WVJAHhtT1{pn=;G9<D!h9B#EFw2
zI226PQ?<j;x^Rk59=CdqUB!;q55M+Aj*jB6_ymfD7=xfv`iN43@#u~9Pa6QzRjInq
z!x;`SC}v@$?K;G9TODe&?UEwJlfC%4&TudV><_*&d$1~Ev9>DvX*doZk$(CUY*_ap
zf!6_MAJTU9MBa+=Z$WE&`{n+X4nq(e&@CjWW^Mq$xkRD9d=^u`18?gaX~exRf!AWZ
z`jenwO(G(qH(HAdf)kmt|3e59CJIl9iD5l5u=!j`KVY3?&AV!I@~{3lWLUnW!B*mf
z8kQI2FT-?OUPIW;W<xz}{EDCzGf!+LTiSV(MhW_#M|(td!a)6_YWXQW8^#}sbaO63
z8yO;5$|UM8XhIRlXWELzUNkZBgAqW(5i6dXW7W@{aB_dqsvASZPg+5|r@8%LtN2*=
z4f72_gUPGv1<KC+1@*w^l8vV$+uDjttR2^{B<+7~DGUWNr=Y+)GZQ<2N;GL8px0=}
zAQ49v#MkxGlrwtyHC~wZHJH8IYJ_sp<cV~L`FGela!0_L^Mb)NvRYpli7V>SdK!<(
zY@@Wuy8pht^<pQSM?Wd^J9I(iv(95hJJs8qC7WN;s9N!I9nGgZ+wnsqJ@tRi)I%c7
zc4`1BkGQu<F!P-)EB8$MQ;v_@SCVl9vbt9&z8lZ*{J16<U)n!qQ-x+^B{a@w*I(#z
zs1-7}=J<tvU+lXRfI@oqCHy&qe|YWf50qh^2rO)W9Q`vT5hoSF;d|0&d_``v^Vj(L
zLM#bOO<uTiA>mvEL`QyGgJS;ADuGQa$KxY#A;f$RKxt{yf-jZTNF|_c(Q9$QO}se8
zFJ0GVhqYmZq4DBY%ofHSl*aSm4Ts+<)pXJvFn452VjlM@B}-O|iG?C`NW;*}QW4(h
z^*ub!-FNmSut=TSXl465STd4CM@2umFeT+j{KrzQx$tZ#0OwbomXaXzUCBw(h$1Pp
z)#)r}xiTe1rCOa6R*go&4jkxxQD0yb%eB}yi%$!xlh<Oq8Q&mzQGD?&n&|!X9+L8V
zN;0&h0&PFeXhLJ`(P~jft$dx{(Rmb8J^)=5i9_VCX=8n?i`5jtS4KXkTxczK{3TS*
zN0^@APU68ns1Rdf;Vq1hl!K$d&Yco4<`Q!kT1VW){{dlg?XEOTs1!fvZg`@3)N}#c
zJ~M`;`C2Wdrv{=9M@0e=!=+yOwyP*6F5e4a42AM5FP_9_rCz(`r>YExDHw5cx#Rp&
z0z!-@L538(-KeIN<eclv{3;5YMhx-jvg=rFb~*REKVI(Ym~0_0RZQzR?^NUY%Xl!G
z@E!5#b*KSCzY=+q^K?Fg!0vggF-AI>vF~SlI9<$c(vQ#Y?+^1Eqht1%@Sel#Lo$x^
z?9aqg=Mt4`K-cQ$&0saaT3Al^E4%wlO=TbnZcwy#Dm%EQWAZn0n#M^x=YMY_D2A(~
zFF$HUsga$kGT@-?Dq$Qgp6B;;*DoRmtTL7PVE$;~%dXD*E@n_kdhy~*@Qn_nPaIM1
ze99z&2CbxLwJx`LOJBI$rKk0zbqBW?NJCNeyba_v*oA2oGN@y~cpgYb(lYQkli=3(
z?KnfmgF}0_fyD)xed8WW1r?#gs^84}&&8S)QfI#AnU&q^XncI=7g%$EE1`5=!o^)4
z{Kw^b*7<EIkEKNCcDW@N(j=W@-zwAor7&#RWC?fPchV_8CJNXy?Emr5Ru|rDTKE$7
zx1{8cCm0%ygcDI~N!_!<cPByG4Vnin9QvI;$-$wr>X%C(QLPmhYFVhX?pGwheev)o
z_Z1^2?rSk(*!Q@1>b{qv@>g~H!{w#ftOoV1PvG}Ty%|Y%gs%gzUShhkg<-KD7us?f
zk0%XXuCHc~fvDiz$Uo;6xdm32X(T#X9y{;rsn+;^T&E;FzjT^hnr_y7*d7+%$Nxe+
zE_>wNCG$LS)@JUGzFDa(y*Xai@;qV20HwZgzR<-&Tu|owR$P{=U6>$1cm!?WGnypb
z_$95f%yX75G=_&6SqzUM#}u#3{=SivFc^XFOg`W6V`wSGCq)rQVW74{tyY&vn+WB<
z1s5u`#0M6{9Z|>O(9i?~s^ClnQzJ3IvjT0K!k9<&t`F@m1hJTtle{D1nX3D=3^VaS
z$Ku2lu=72DPmkFc{48Pmug>JBKv0_#(B?i#h;7vB!j>uQ_c&~xW>B&=`swT7LHlIn
zdpcPVEW#r|V0`y2ox?p9=;YCU!~LPjy)avNd`x%+K9FQ;8u0@;YiF(qsJk0;?`+$5
zlh3#p>KhPur&<D1J95BGxSB)4O4Pt>GxSGFAX>SfDgDVWbOBTj^SeY8*4ADHM`=<t
z8VA~8=evU`iG>xih;({+i7?azl|m`&HS|mbnB$qh;OxyKIt}b?8;V#o2>pe?WDyy<
z{&!mX<Uqr9hv39U)NDNrS-upr_PvD-1;|aVMV>F;@8tdp>9SQ^CM<|k`#e_&_Jle@
z4u;|)a=G9`G7sWfe9M~u`?C>oFCoK}Rm|0Lh06IdXQ5(L|9Yh$ET{g+A>h<GpBPPy
zK3cTkE)i*RT<?p0$EA?Lo#6L)lbR*SV=ou!Zd^Rw`TtS%)=^b;U$`(JDc#-O-AH$L
zNeW0xcS?6j%b}#DySqU_y1PN@0K!4yZv4IXj_>=!fnzv^>{xrPIiLB2t=8Ds&^mOv
z<Czdg36TZnlc89k&N8W>^b|-?%4^pOx;>u<1!=5_P5H6U`TQDiL5s}1r60p?rL2?*
zTEdL6kY>V-^{=QhG#_6%d3!mfSr`q!P<TIkJ&?!{6bY5=oEN^a&9GK#z-`T_NGh}k
zP@%z)EO-bFJ9-cwor+anPxJnL)|K4P2dN{A&Ppbqz+l0yME+SoO<?-*N2Ug@=fVO|
zr2~tm7ek&HM#kC6dC|4{4Qv0Z;N-6@s#&Z(C6~#`A##@FFC@Pm5_&Wr!-G)ZQb3bp
zFaEdQ>f0YPZJ5rrj|97t-;!H@00DsdN?f*=No*GBtL}yAa5La8rxVa#+Tr|?KVGUU
z6aiorTwhK>mA}9h=ncJq+$N_v<^k(qB~DFak*lw>91!JPq^`(e7%8hf0E%BpfXAM#
zi3%JCV^Yf=#Y+8&hIY-AAK-k|;@TM{)5o*YRo4Xoeaalhw~YY`p*u4s;aODvHHx0U
z>lZvukw4$Of`vw|$?p~O$eSc<0>GzaDr4IMWy;?YX5EamY4l2F-7>9m_}J#z5lht>
znIBur<YzC<2=s}lSgaXji0FP|TCbO1789Y`6aINI1G=>Fa*Vr^u9i3wNd6>$;9b|<
z-cBsrpK%VsjGc+U1<$S3689@W)EIO@pxy-M2wCA%VdOX7jpwN>?(7fhx-sPEL|r8k
zSXldRB5#q<<$XgCYZm0Jm~bQcJ~gK^YL(asy6m(b-7=1{ix+BwuJ=#utAm4^NKC${
z-9$F=mL#~)d%OOeNk12dRC|d~NqF~)eMzhrc&;9y@%*%ufP5_F=Z^yhSJW^}o|)kY
z3MItsqQEq^`ae#KrcIgtEs*T;Kj_N^hw{+VKFM`^6l{<2i!|H2JY4Yt0E5hV-Nk3h
zT607k^Bm^iNR|7a&qLiX^CG=RXFK`~@KElvhP*)fyOaWl+eJc9C-*;;RY41rw!^G-
zxoKMDVox_(yy7+_&ZxLuZ~>ftJJ!lWXtvw^;WG`^YDl@r=_%;e<?h2czsIpqQM<KE
z5JH2L<MP|O-GRiPipY?SOdt_Xc_Xx2Rt<61WrQ@+qb$<y8Aw+A!d%@dd417&osb?`
zZuS~2womXw+nhYJb$CA!5d}x{NI0J1eAUACS~{O<okf^9HXG!6)P3?O5mxiA!6>s5
zX`JO$oAoepEKQbu$#$842>&x%=QpZ5D@ujX2h=&DsmPE`Xt8wNrNu(g@iJGfE7pE8
zYKPfM!ETW)(_H_kHy-J8{gSrre`OO$qSfo;w?D42eo=dC_+HuP5|EsAYs-p>7}hks
z^S(y7$6*0G^+51C)+RGn9eY%&96#p=P`VI8Y|3`&-~3VW(_m7`1^|A2>8bENQ2tmK
z^XN5@y^<z_j6Y$okMZYL&-B<3+gI|wd1q`SFG;BB^aIgi^FyI&YzNx+q>$qX=CnR8
zy#WitM~Mog()xS)qp`&3jm+cBZ$m9e{G0v_oe;pK?AhnAu%H4M!_(J&e&J1f9;4@o
zy2LBMv4FrOcgj!IRZ)|}r3E}!<EJL5;eWtdv?;|`<zxI#J$*ft51;;ef;=I%j=qD_
zOxjcsq6|@6MI$Y?2n?1zSLkU(-5nJiMiKuohOq>~`0QI8flz8NjTl@gwdivR`H3Ip
zpVWX-N4TaWD?)EDqJUrAsJ)OP0B1M-P3HU3MLZNgf>{@H7tD>Qvxd++1Ylg;<%?3b
zH;bXbOA0vo&fw0D*h2jVLaBsispgw=9#>3e@Nsa&@w3QyQDvpd{_ba(x>tmkM@1>F
zB-v>Hy4><LE5Xa8{J_YKDZlWutaRVi+?$b06>hlqK6DFu)2~$AE;u36Y0F>)JI(Ir
zJ^$0P0Jq_+vb5gAkyNFJI+Yx@{D7JQu0E|gXMfApb^gu6w=#{SnzOz{FnY+9M8*bX
zq#3>u#;?!j?>tU&Go?l$(Luw8Xb~^(_O_(li85tfC7(E2iH^HFz$eUgX8uj8X@Fi&
z>~zPQ$o#8#Bus5dd7sP9C<@lRA(2YJ-SL}?iTC>0u3rJ&C~5th1BzUq1;V36?x=3(
znnrQ=O2C*^%X7oW`jhWXlEt2=!-xC(6g3+V<oW5(P-0sRSX-F`O$OI>&4#A|?YT1+
z4rTYgp{Lr~66x*ZOOSV=czB-dx@Xjh2jPbF@9xOetwb(S;6?-fx{Lz%F?r+#ri@D!
zLm@A?=CiH+-Lm;?NPag_d2yZCEk&VixM`5PW<B<-qxNAAB%%K@TObvAf@x-i8q7)(
zd=j;m!Y1E_cn82Lx>JY4;7TOS&wQT07pi?R7+77$-z>V~Xa@Jvc99JUG{i0X`(7KU
zTp)Il$5D=C<fko@X&+<~9DF?$Y|Cxx9As3c&-JGI14GQNXByBX64o_y^hutUQ}f`H
z{`+wQMyJlJ2ww)zA#JZo#{!LBUP{nMwOx8amvOxeHseNbwwr!$0mp^#!S-SAWMxsF
zbZ=6uQeziiv@V{s;?FWxJi0B%7z}FsyMan=H+Yw^7aO$*x6=W2<r1=r)PXej<g_!w
zw<4+S+^i<mRW@PeI_}|DiOGlrjxEgHahum`Z559kR<1YuG_x1^P+H%2HFcjY65rvC
zIR+oevUlHqBf?MMQ;JXih0OMS22R;bqs4@@^>{Mo0faC$260x(^_y&K*UdKxXn|0|
znYtd+Cf~e4TOwy0e$t6X-?e5~OKJ|TyV;ULm*?!n2gwd^kfBIL1}SB*&f8j{DJ)o;
zC;PBJy$@MmI^}oxoQ;0Yv9hfh1{XNJiky)C_9kEAjrq-%5Yo%6nlP`sgjmhz7s^Ny
zmN!=;gT_0iz7X2lQm7FvW^ZSfD<A9QPNjr+ZF=-~g!Y!IpUqR5-+7);PKjLe1m5`H
zhWKJ6AtNaeYsvulltC#NGUO`wsU+j8(v(C`uea4)jj7nJ-4H*vQX=f`2LM5Yeq96D
z-g>t`jtM}2P13Znsi1VM9v7PZVKt-Iv;vg~2ss$QtqE@;<oG&?y}zHhrK-vLO174O
ztQy7VD4<F4i%%WAjl(C$>&qDz3RN>XBud%Oq7L7c%D9limU6rJVlQfo05f<nu*B?g
zB@8^5t?+Y7WDM!R`HMyM>&vgA-J||<z~(_a-j@iGN8kw6eCI2MuUxAmjo^BeP2NrG
zuk5QCCH7;(1o3M?L8F9Urp0KrDL@?_olC5`_zf*JFOR)t%{8{W7u9bh{Ll6ek48}c
z3sA$JhIp$AH#GWCB*i$wM?#BoJYCG`cp5JvUxp3_o%BlBY6s{e>4%RqpbL407qZ_i
zvXnBNOm;7DTP`!R_k2s;%SZ}+93Dr|VlylCP_>m{o5Jbcuo@^es1Y4Fas8|)drVA|
zmy1P2=Af24PJSqh+8VWSJ(RV-UE`7>O9YYZf-&sA0xz4YD1eRS#xux=)ga%?W-a<B
zx4-%Y!9F&)WK_O=#*G)l+VVBo-&Kdj+cH`->GEy6(#|U6d~MugFyy`Va>uE5!-Ra(
z%R$bjm?bgawnbhZufc%7IqiwiF+4h1<*Y_^CwSaOk!^Dc42OG|=k{so4Z2AFC(U>$
z9HiPGc%i)yRCn6&(nudp!9Einq(0sYCvzS~v!@cc2!EpJE=GUJl@c-W$aXH$0Qv6{
zoSL}!1_&3hroXt};`FQ<{a8nwmTen?>Y{qg<(@iLDa}1GF$41t4IP72WePhwrJv(y
z6r4sR5J-z;p{>)M4e|3jZelo?a9ae_jTwf+Iw8cnA=iZoLW=NcgUYyhHkP%CM%n!t
zn~wop$p?VBe$uR(m~q}QPn^wXjx=li+BmI#8<Od$x<&{^t>}6j`(ne#$*<w%&#H`f
zzBW7|Hg7Q$K}7MUN+Na8Uzlb`H@$boEcN`ydioA{Q6Im7TMrAlMaYeqCA>)9QTDO6
z5SVUU<pr_f8^@JPN|e)W<`Yuq#rXeW>-6xlfJSgn5Odws*`FjAnvisDRLvc~gQF9p
zNFjSX)?CLvwFs=xUVfjm35ZNbFh*%$ng0HKyc$x_bUICVC`s3n@<!fw*VC!?{^><n
z#?Y6gm^QP87oudqXZ?LGz-IX!vOrY`rBH|0MYY>3Bt2D2il8mwy33#isE9@xOF`eJ
z{qO1SlpWqqw4FjA5buq(vcHdOz^Hv`;D~dbHOI+Z?BR$y{(fLo(7(eMpwX3ma0Art
zc6;RAjI`)P0{*k(ypAY;$7+k#dgB*C-3H6&zX`q+v%&`Fleo2NA3#2ec2B8T^<<f=
z1yMtcJCGLrFA)alnYxHN7xtqWb~nf}!@pe|v^wxUmXxSKB*;Wv{bE@8r;`$b<ZNM`
z-)gz;JewD>I<ce3#_nBw@4VI2rm$UeUcv1m^Uo$#+f&$Z>E7o5iHbt_6!U2(0;$lQ
zQ<{Ub2tOh>c29aJwLy=8J8dvfJ>jRnJY3b!&<Kqw*ZFUQk`~V#m*k9PYuW=(HQ_ab
zS7F;yCCW2=JVr*wd7@pM7k@~NZzjn5rveMqe?1D8rZWl!CE9~~wPI&M+9tSr4HpS=
z1jyA(*8H97nH(bO)ji%XH*tHXt={3vaUTPA`5|e$Q;GdwD9sQp>0pi%*09wX*tyW+
z(M#+xXp@{YvKH`B<)WRv0rJ7zQvP=TpwGqByF1XY`>lv-5E@pAF9(fK8VQp`xh#}s
ziRTg(W`02(r3UVM{wnG-q8YGw$n>`4POFk^&j%YaUkUZ%2VBN-SM^1G4RUT~CIy#)
zP|JNnGL}*>IX{#>9#$6tAomz}wjX2UUxZkVb7ROWLB;~2uALjyc4(tTP{U*HHR)C6
zFDXXGACi)6+I}KI6J;VcopzF)GhghecSNZjF*V6NmMcbcvloV$MrjRW7yF&96JP)F
z71pJ=5$fX8AkDSH3?&e&a_2RA=;j13e|?(EYs1Itq;N!0&8?X+DKigI<cFEdABgMu
z1b&K?HoMFiKg@gkoLiCeS`)s(%(s3c6zUgy`giJ>rt@%^`p#0qdi#ljm<adO9imTL
zVVy#~>+Iqn|FdqGoyJ#IvQ;&mR&F4AcDkdl?SPU~1OTv<cvsYn$PWA8bHVb#LZ2v?
z%j!5R)-}XExp$`aj*s@EQzECs-;pv8q0<q^3g7=V8&rdT-gb)h3pq+<dGz+|mmTeq
z`EXP24g3B3=w56Hz(duw!EpX+A!9G_ooaFABOZ*M|1V=5Y11~nKHgj`9A{CH5KYxv
zk{<qBf?tw{=n-j^Uhd~*io$6+rgB^=WoQvltF>v|K5K#m(3Q~>^rXja;$N}6``id2
z3|nM4C#7bzwwEKG+oQ6{{>9N_2re|_@ygvWCzr#?0wu^nPx8ObfYXsMTHHhCyqtwg
zm849&a62hMOL)g#9EQAmf1eET#+zgMY08=ce+$4-7b3-dlKq(M-5gTjT%}D9+bZI!
zJKGJL((?rx`X^@WCE}PALBm#K7+ZHV9kAFK<%!o*q~m303`P8~a(cgUf@U6Y$^x3w
zBf=*N;)XxR{s^<f#p^^Za-QuRPpOeb-w*vkNHBRB^$@$z=Zsjt6s)+fMTLrn1xDP7
z%JZZ$_s;VFkR14k>nltE0Q0hx7&7O5?}CN77bRPB9Ym#_a|XESDy~7XuIpWd`#vbP
zA@wtFsqQeKs@L^v&4L{>Wu0U)vC>e`@V~p73sOIEbXv<yJGUaHV`2(s<Xn#fOovm^
z%+XL}E5U$>4t}==Zq>sNF>+8dFu?B<|2)iCO7Aj#3up=JB(~R%+blAd;~@<JtiMn7
zQZPr04@eo$M}H0U!K6bfvm^G9u}t7h(Z7|&okio>2JAHXGWBNu{j?P{sAtvic?_9Z
zEl}6FW6&y`u8!-El$L-W5q~@rrKv3gdNjH$aF!Yz#G2(%914@g^Uf7ArDpldxUNs-
ziNmP8^T85+9ZP(qRHt)A-<0RvKFAFfDYe{xAop&KJarpf)WK<sWRbKanZ!g-c4>9y
z733sBqrT=KLMv70Y)3CNr=^!|djR;!?o|l!=n?SyRIYT@l-n!lvQRsnZfyQmHxwn_
zu=xN1TB3Qq`8SLYZT$dVBow~MY}D4=yH}7ki43;><5bRj!!R4@)1rLm8m;ZO_n_$G
zdsPkea2Hl?Rh%num;l^Rj!r?_mnu2Yya2m7hB_zWjMIr6VwyH=#&JC|o9s1`eY&85
zD{EkG3;*)}A1tabJJGEt`z_!TmQt+=m<k9IO#&$QcVS8Z6Z}LtoZ`OE7|(JTvI54d
z;DW)}bO%02O^N*aO=YID_+gBt84R<czBR02PvkM;H@?yQPjhV(C)bXjWGrdDNpo+$
z=ONG<mvXhm3M3{i^1`;}a|c^dy0mNe`>D72d+2*ZH2A4L`cIs*jfUS|Y3l~g8J^*!
zTqGM#9rkeE(+8nlO$~y9r%>xdt)2sX4Y6@4ac{fkuWSTfZ4XhWb|Mf~o!gatA`niQ
zQmi8oFzYRWk1Rs`mO{LyLAE1iX_f%7-CLlL>*z-NNGF(v)NM}OMe&bGEng$$1((nM
zX*|K)F>SQ}M~l4?xFnUN3vlOKP3MKl;k{j<YWZycaP|iglCL85Wsg&0I*+@XZ$_s7
z@hcb+i-cXL9;kMUVvtMiP{~D&;^Vbf337lV0{Hd4B@*I=#>XGTTkAr7RZpdK+~#ql
z%C%N@6@80lLfIJ7iaeR@7!173Ce<an>Jtc|_jV^h{v$tK<V45hqxPi2Qt&!SZO+1^
zBeW&ordp4i$5NOg6#rHUY})NPzw1+c-DH7%-)oOd^xq?4V#1T4*C~pdd`0MjRT7~t
zp4zuSU%fANI5Ol5<`~ad?%LbGq+T&dlY}sCwT4wtNYv!8ZX3c<e|EhRqugvlF|y&|
zfP*-c;DzA{L{zg~YQWSFl7Fz@9p$C!JaEPOI8H!W-Mhyu*-J0f;V*PaU+P{01m2C|
z!IWUD?^+>hhlu-6#Vd|hj{jebunYtG5F*YnIJ0ABDZYOb({>jm9beu!l0xlP+*gaJ
z_>Sj(RQ?y=FQd@-e4#3)=UifN!8@fd`v`5@zuzA--WaZ?BwqN@nEIs9OG$o<u0e;7
zv;LAa71*G@+GcXx#H<Ov@7oi0=R<rE_^boiT^|*;+cXBfJl6yMVs<xQnd%!4fMxUV
zZT#=YeAs6q^2WCzAu59Y11f6JN*3)R_Khq|aUM*KiK!3j`8*sJohbXLC8`k?7?!Y}
z$LUiVu#h$OFmtbo(L^t(+jFkhN&x9G+D{0(aQV^+5FQZy%`@j-ZE?y|#T=f%SlYoj
z;&30v<NrupOIcfo=<ix6BfF~qyK+4}8j2eA!wT;FnY8Oe!EApHCuSU4YTZ&!=6p$!
zM)*4O)pHSwv8ef4Eh;hmMMRJGinBTqF04=&1G6bA;>gE0dmXtpHXE7#tR663-s}^5
zr;%SBA1A~~MK5`B2+y5wj;3cy)yU(w^XQs~hk;ih0#gcFl+<MfMG|xKm$0UZIGp$K
z>!a#5CUxZPJN|e@ZeBSTD8b{HTS~k2)79eSgy_F;La*{2Iu8hNM)O;1q{wm0axiau
zasOIu;TGxuIdHqfr!N9<A4ZsdpB1QR;MO5R$|%^xyj_f}yx)-Xhk*Ql1t%(^JSum;
zF_BRCeOAYLFDyInRYaw&*-xquP~IZM+cEW<oSWU*j#yA8C-JdqTaot6dDkmVgVMU1
zE2ww7fkc*M#;em#kKDbM@w67YO2z1nsQ<2&j}!)2({G7Wv?j3M`n7&k;Xy+B+QxKP
z7AHbh_l!^sxHWo*VdaGFp}w6O)MI2N@FwLyNgS(DxR1P$c$kX&I$*0uD&#IN=C=FF
zbK&5tw>-Hi=Mb%@L7$4(yilB!?ViXjp7G#c>%r`K$6Tua7P+AgKK9NVWL~w`>`05g
zeq(Q_zSi*m)A0hI8kwITFzz<4@pYBHc=+gF1ZQmV+WP_<4^?l*V4I!vB_XC|Ic~3g
zp2Xz*5KCy30MCvf!`Ma=rP?n+HV)GeB}x!=bOP0f<Dswq=+Y|vV$U85Qn}etF6U-F
zflo!&Q@PDsBB+_btR|ldHob-|wY_Rp$;CDSr|*&>B83y*y1R!-l1xmF@zxs5APY>{
z6zgX1;|Lkb^)OFSnJ(HskrK8Cg^u1Y<qyj`AUaUM@OYN~Wc^~AKFDXeAx<Xi2wW`^
z?A&#7Jiiw0iToYz##_V+a($vXvjb}CV2HO&5lzb5F>G*=xnr`1>n6iAkMj-f^UNMX
zVa><wn$36~QowM+uE29PthkQzwN_C@^DGVzFF->jAar&;`Wbylveu$eWG`M{%aQ2i
zwnU|v5^hxPmXc4C?D?aH7Cm=Ub7R=OyzkxV*!QMl@Hjp5emgzhnAr0#qZOWsWb7AC
zQ{tQc#_IQKPHa=pPT?rR`N2G8*{r*ctFmllZQ`e=i(}~;_|2(ZEj84e&yBi{!$hg*
z^zF80e?4oc@kyrjJ&)9t=w6H%?z-!>O`g4rVIg^eJ%$grWz0Qntm(-X>0`SWLWzps
zO4C=rV0u5kG=Jf}cLWRHj=rCmJKFeiRa-|4b1Y8LL2!=V-L^9j5=2=ZkHL2H+wL|h
zG<VnYjV*~nt+0_jVAVY&?Mj53t32WD3^vFu<Ah*}i0;--Q=|D#ia(~v>7?1K*4h2a
zm(bDIfB!l?!3?Hq<&QA~OrDHkTlh?wI3s+Jz{LXAhdb!-ibyQUB#&u3?2o&uN@L|F
zXDrJ^@WXX~7cw+z*i){=DwDO)9T}t$cv7Re7sf?}7m}M<U7HBwQ`+-`p03ltgh%OB
zEXl+#UHaX*-MXjQe4}{aj?-e=dX;M<NDrk-W2f<iQQO5@x=l7Kzl0A93sc(aq)l~%
zT$0*A1pHXce6j0(v`U(qtLd5VgbW4xrUg-l-d4xeIPWLRJ2t#^yNM^hrP0}7sCVSv
zx@HM%xs-Dj5V>Mq+|?yQ0~HqfcsHM|IGE!3esw}_N`3MZk~Y5LEKKHvG3)p-J!K04
zii+s{tb`u&n17AJ00u*UEYp#>w@uVsh)Pm8_l2B~cZYUt7L{I!Cw%p_{Df<fvznWp
zRid6qv;4WYOEk!Bs>aU!=0Zcxgs4R`hGSws>D>uqVyI_D#YRj`Nmck&oo*M+O$dwO
zdE8gYA6G;!6YLzK>EmVm=>T)UUZK!;`2?vT+2!P{XY{FtEzO+&<(t;P5h~L0R3YAY
z7dv~JJ((#@)3mJ?+Z!%VRavtRPap8bgyEQsoZv2B+eS6ORx^7-bgmM7cck&wC&1)B
z5}r8+pGx<$oNB(gvCW8ECR?!;=N(+l@-QKFf}Ty}hXrYW7$-ePusx9b&E3@S+C<{P
zWEND+mTj%6Q^O{p86Qea@k!;+4*ld}8Hj;Nsb$Nsm)LSCV}QGWTZ~lxY7jN5<5Lf%
zmH&JPaJO6KjF(ZJzP{(1Og<FT*T6*1YAMs#t#;um`N5{$ZV0rR&68>S6-t)F{`#`=
zv3jYazr%-S>Qr(~t~mm#3twCk3eAN9nxYKirPZfgw^1GG5BskL7EorkornkwmhVNZ
zvN`P<fY67+B)dM65gr7@FW3^}T#~FHgtz_nxb#jP2hdnnB|O#z!TuLgVcozdkAk}x
z^{5QJcrYXNlC0w+NHyt5Y(y_-cT;oUZSCKj;QAH)^|~fmBT;JogJ(@dY&0p6809OY
z4m1z~#B)daUSaU^w9S!w1LBC~1G;VCYZz--5unytF4p=~d1b@E9mM!$s<aZxQmBf#
zy^MzYbdk#T^DV_s;sR=UkF9UE^E`4NgH7c-7ys3#TYnA0x%S$9z%BInrIL<3z#Xci
z!3do(5zkiNJFDA=Or_uFWV-oiG<sisXaiD+_Oh&V6>JV^hAO_VI(DcywnIL$Ci2Hu
zV*KJPA9;fymqH8Jb9Vm76qM4fF`<6(dkKKpKJbhPPYzCaq1@=pcC)*PKrm3?MBG^F
zQdvwZI&Zg!6BlsfOl{v>({xwPGNt5UJ;xN2zLsvg7!1SlU#>SHiKoJLlzH)fJap&%
zm%W5;mEMywZPm~cK4jzf`HVxuVYtQ22~JEScTN-;B&*V0B;=4M$&KCzG+*x4jot7>
z-IEn@uzqK!{#X7V>AMfSNgoCD(nWI{WlNpbJ#^g!v9`T01RBEUf~~1pWNRv`<_Te>
zKWjat*Dy`n`n+M!qb3i^;|M{e1a_q*0a&aWP>ALkuCoX`Pa8HlKh-9{&|prlNqRs%
zv0vX~6dV0Qt5g&i;B}VG#EMjw>c@8;hukGA^K1Wfp^6&c*u8FV9_;8mRTSrsacu~J
zbnafz`7o-nE7|Ht<4iH5qWQ|oFruLIGpQj%2S=c)bPv#OrLjGd>IoA8=Vl3|ddO0{
zf3%d7tVu0x{@+82DMkaJA1)vG%8xel(Ej0qS0kioAjFE+_385C_dU*)`2hXLpMytw
z7yfOEXyI3wd9Jqy_d2e_`BZJ@(%kXtLmWL767NkuxL3rI8&cqtuS+`$R<*Gh=~gPz
z;Eoy#cZ3fZz3<mXp-l;3{}5*e^jdOT_rwS9%Z&f>5y05WQkeUD5Bryd$%I%ndZlK3
zh*`Kt+ARivwz6-cfsglP!kxx$m}Bpqx*1IaZ`0B*I%ic~d!Z5XB`r@H%fSz)H4`D>
z$X16ro(q4>u@(Rk6*U5>d#%H2Rdy%j&wKD0|K{j;8gsGR;iT)md8E>r&^cv63<p|j
z@f2SGRy$kR2;4-WKgC=FGb$Z|O1!!X+Bh*BsH1c8zG-Hbf_z2BFJsVZ_&Qj9N~E@m
z+qGF4g7yZk9}QUONg9Z$y|X!I@kqnd`!#~UPlM$`i74h)+cptOKe~`(Y$&Bo{`6(?
z>MY&7LXJa(^_SbNbqozN;^+?eD*_~U>WWReoO0=l;-3mXJlZz<a@gemB4|5<&{ode
zOyXA*4g4b9)*QHYbrGV*yF&Gp-y<&F^-X$A!9Ih@$!T$b73EYLNq>T?oz_E!Ik&>i
zMUzMzL1t0$Hw$ZdIlrQ;U;nc@5q5=Lq?RXI@4oDsCE`=>kXR426AEc}iDJ+CHc2Et
zdl%vM3b_G)HfO%cATPU&oC{f10P|4Stzd-l!~E&_b8i-n$uC1nmS-!xT&ITyT-ezf
zui21aQnKmQ-a*3rpMEaZuby|5k5>ouQ=aIpVK}cZ6t>`YHen1ODEgpS>{l}XjO1pT
zAQtl@Bt)j`d3!IHr~G@izVdZ{hJmTdUP$&d|BCH?f1hq2=P7e7z8Jdo<1f=iajUUi
zpv;ruXqt+-0{Ik54zU_ao$#NMsw)!4v=p*uT9aa*=7Q}yN=8R-2YKN_-v1~Q<S;`+
zLtm7{+|-A$E3wBtwIlvMR(JfSQZqZ5pDWk0TBtObsWjj|*8hb$JmMZOs+`TIaxzz5
zXoB`VtI2LLosfgh(6I@3!Lu^`34w%S)nQ%Mu*HrQn_hK}uc*+!r1{h8IAtVCDtQp-
zTW+FZligUo)dZ*p3Os*w*;oxXe*f4<>5Vfw7f-Ky5S^B~cRwq__4UqLt)!`M$&B#Y
z!3>OxB8gRuIwWRiLkR2#qYSVI_$?`dd4Y_ywOsWWPW;H}vPinQ8U4nfc<&A#z8H>3
z#@JItm%eurHgT4H#$VRUi0#%RKAZo#5xEh$u)ehnt9Sm`SYh$ZrxrLI;v!r6zt$Ys
zh`BMg46oNTAAW}FCb(QZVdG;A$Efc#UOz1TiqB1W9qtF_zI*@Ye11Z1T=SO1@%lqK
zYG|QLp=^wL-bPok!E8xXTnGY=qU)o73CaC_`3+!!R*Pl3GkCFgY@Mr19Z81GrfoUX
zE%3D2qU63zNTk}0)lvOT<y)LlYNXhgNtcfa-~ei)H2%O8^iHVcfuu}YFmXhWy}jJ)
zjf=o;ki0{RkmZ2w7mTA0G+v06h^hQdPSfGpAYN&$I<bTE9gNjQl&9Jp-nDOTVfC-g
zv=o9tRNvtvbU&d%LAT5N{xPDU4l3wgt<Vy1yh&2H46HP1QuJKTvgdCMUTAUJG<k<B
z;&ry7(?wmeSgE5XM;6l4lUSyEeegnc(fK#QdaRiPtCN<)@6VJber`&)zLw-}=mr*D
z6?}QO)g9O$TThN^?r-iTlSZ{(!Kb-Zg02)bF(fS?ph1PAW4&h{v=w%4-vsYZ%nnZx
zymnqgh?a|vOoAToKh2reZ)?=yRB--pVU~>{IvdEc8Sh=@Sb$7&7PZvR%H6mwsukwZ
zB4hylO_9ebpFX0=h!4HZyI!uNE-$Aney7|xUz^m_-olvzDTL#-^1{9OYFdCt+Zsp6
zEqgw&1D?zgwsmKAEgn}xM|kn<m%MSRU3&Z@e4h02XSNcxw>pn<;RW<&eFOVS!g#F-
z0vV@iBo|z{m@uKk#Da2l9r(&Bp`zzfJ7K0t-SFpE|Dk>f`KU+hOrqx%3YAE}I2(uf
z>%QX4^OF?-vkz*ct3KAgqCPqjTcvWVdc|!`ys*s-uw~c!ubk~U%wNlD+}1z)SDd=<
zyuP+;HT4Y|GqvdR?R1~V#FV{_9a*&dYuH36%D;n{5l~*q%96UGKd~Ky4cY}C>y&Gi
zI?nM(q$O!&JxUvEDjA*d6qE^!7kNC=i4jmC`9sEj=5XWZHmqZlf53IqGx4z+EYPXY
z<pNS@3l>>vL~-;TUqw%vQoVuP<=d9p5#r*&-=L9%iDAkqF9Au6Yp9{B%-ivA_GL_k
zErbL*H0S0N;qNU$98CIndfaX5M#%nxr&;y22Zel{6qM!Kn;c+~{CZDg0y5E0asK=J
z2z^@eB$0oF;!yP*Sy<HKM)>DH(vdGodkfMm-a+)jf>3rYlZ(!NAoLzh7xH4QKL5SQ
z<Le$C1ulvNCU58FF?-e-xH$6rj7Y`~e;1E2z`3v0g4pois(x)~hY&B@g4H`757S)L
zi07QEf)9>QEa2ZFrqV5N7)3U%p2zVOKy|+HQ9o)zMbl`zh)idGXQvz(-@h3&^nHJj
zo#zk>0JBkoZ>TRHv$cxcB&<>ECh)PGEtSY5-Pd>=yN$XRb&v6FNdD7-*pa{(sU}Z^
zipqoxqwDZz5=QecO^R%B(tJU96Iv3bxs~R9Ng?1Vz*Jx_qoKV0yXj=~!FireEMI^3
z(u+K(iKInRz*@CqM~M0#dKw6;=sW;eL^H)DvW$fmn0{BP#Q2C0nb?q(cZaRsA?mLL
zePCp<Y&90a-69>6g?xT<U0frIGXBRNuz8+ih-RtMtH}lM6{+gB10vK8lk}PD{{RU)
z7?{cHV1&pPBzI-nR#(hC=drhsw5i+Rh#VWzqq#CD+r1n4q%bBcoHz`<ssrSW!mP)y
zaX~(t`SGl~-)PwcE*5L@ZhkM%;(?r4k`^X0<e`K2E0BvFPyc8d0eo5x5dtQDd}|TO
z+S16?Wl-{J+khapm$2T2yltJxHTZ7E>$16;%Co2x3^>Q8pEa+lew+j`zEXQYfTjLY
z{-0%nG%M6#l}M;UCO$bR5d4x|@9PLln-rQ?w5%654Nqo`BNiWC?uQ{_Nn``+qbNZF
zNV|F|n>1H8K0T_CnkiT>tiX3S0$!mp8n2xw1zp;X;rXgYS_lk;&oo6!3Vr4wix7W{
zxVL9!!63puR{%{d6*{r)Ipl`sxXCcXOJT5Nlyv8@LQy%O8_FP85+Yrkkc{fJWR3vX
zY_`7Y=PyAxiY8=l(t!_^j}-<_$7OcMX~v!m2IP`v+`p(Re?AK0hRJkeR3c|85P)Fr
zl%=eAe=JBP!yqF2sy`A^RHSl~WMmD7sH0z6i99Rh`RT9;4vlZf>cC0Z+anewh-0o1
zuQh>A?_(}6^D_)Fq>Er7nxF>EZW`kMPp3U#SD6Z{0?-WJzJ2@SQXYww`hgW=>tGDf
zVNF-TocFv0W&vbYC&~p&0vKMRV58eCizt`0JU*zJ0fSO#NQe!SP8ouR_CZ#dee+=+
z(~n>@9u_T4%YLP243D=u8Ugv-5VG3OloFDf{ySFL!q3&Ko;<(@fg+ucs8w{dozGv0
zH!0J4M{${^+O_kp9Rux#rCqqyR&es>ec4!_pEm98Yl`*Ub)T)?<8BkJl$o1!?>8pL
zguIwrMr_Q{Ni=MCi!QGS2?y3FJv&>@FcxyUuXk1t&R1##yfjINg1%bh_NO4QYQEFA
zACe}J;l*z@!Po@#Me`zPBKGg6diR9I`Sc#Afu<0s9nBHV&RVlSgmcGZ2ne|cAY5EI
zM;-mlwbGB+H<8~C{LyYiKm|~>nB-qY%e?PF2qNB>W`Z}rq?q#?KS-I)Qk!Yu@;_Od
zy&H9ab^j@cWE7*(e;sUstm3S|@%~S8a$|F7TCRzj#(Vs_82=p+-5Ntyw}T()%MFL4
z4xq!yENU4U8CM{t{fl;d+DR(;L%;2tlN<?&qx!L$z{gvu=Fr#rj0UDy=G-wW^lu=g
zF;VMY$n^)1!LU6M@xx3gApxn8ihRv$D@o$HWZu3}_-VBnTq^dY@S1#LJ6nQk2y91u
z?654~cMfiS^qsh{gB@C@+-!rTk<ig^iMD<p+_Vyza82K++W{{6f?5LQR2T9_@9dU8
zqQ%7m$H7S!BpexkxQt{`UWj||bZUGwnSq^eO&wyQKbO0h(8K>NwKQhlCG+vK8VNyS
zBHpK^!$i8Tk0S(ZXAZz2MHnT{i4r4Hz597P8TA6*TK}r$=X<_)ltx;UT!s{ONuSx!
zePnx?tO{?7-k=GH3wfQh0B#5^Mt)~arv=S+Qd=z&z{`K;6Ua_wMS#Bbcb*sImN+c+
ztp46+7gvB|txwP{N{zE!S7`8yk<odVH*xX?!{?|V@*BaqVR2K@Vai+ltL7i}1`m4(
zSyv6*sUeTjrcG-Apqx}aC6>_fs>Y*ez+{kcFz!<8?;k~OhDAl&6Qo5JUa*_FGiV(;
zd0Jn@^{W|sz{p?R{oOFN5T$pv+G0xHHct#QpYT~mr_wV@r57*avm@6LOGGBtB)x2E
z2qCo80DcAZ&S@7UnAuuE0L#l#0kgy%&M`qUPfqbX4r-H!fQ90t8dkX-;0JX>7pGn`
zO-OGF7c#ndQf$BdU<M{7BEEyr?_O{;6}~P2Dj3?OM(R~!Y<HK-T(Up6y@u|*;;k(R
z8JMm_cz4he7%Xz7Y{?#S?_72Uxx%0R-emxS!)E0yV*;3LEa=r%PpxlG_4ZV+8+fS{
zqyL`7zUKylC`U;9c?8+X8w~%;Fx_A--`#LUB1R43Akvj)n`OzBCi_xMRUkb2B|TX?
z_wkhhf=bMr&SorxfcGF?_(&U@G1{`Jk_xbrOo&2d!|BG}kJw{h5QBfeblt5{v%YUm
zEld<L^eM7<jGp{f3Of*7@<{E_`RHuCaX&^shmf0)#$1K?BAz{d|K`OmgWO}B>-B?F
zKx}k0lw08qiw#AJka1|rm+fJa=8Q+vKAs@e1d9&gzhgdGl$?Cs*<UA5=?TD?V=g9i
zM?oAU*IV8op?8p#h*^vhZg_xJ5U_`f1Xp(8KUUSUEW0wWv{dp*`1rHi`ok?%)<xfa
zRB{IMOZuwgn>+`?+g}W0W5dji`k{xpL8`fa=Ynky-zxxWzZc^wWmgE{_DGq9)XO>n
zsVxDT*NT<Mg?nONYcbX4E!_=kz`c_rctGIadd~_M??}k8`dzwP467@PcRpZH%cH31
zJSAOL;lOg9U#FoB>%@<T%|*g~2V21T1<q?s^<BI5f>~9|e-+4lh&Y=hmF!=CSR~b&
zkz{n#E<{-bHvQkK-(O~DC5UvkZnv(32`3Npyt;X8mMC;XDod>>zh)?$uW+i2;v2M@
z6#xwI(t5p`u13`3g=*GRAkbQA8IF!~!9=;gRdYiitmUsmp_PDJt_xKGP8=54=V!#U
z!fKV%V>NsE7<xS=Omv7{*&~NKP_n&_!JYq&<2M(G=MP_J@y4<7cJ}fAw<JJ=@t-#3
zTfrU1W4us(2`BnJw_TJ{7;E#3V~-jht<223^+ON}iw~h%E84Ki;1sgsOi^^_pOuOk
z<Y5Ht?7+)Dg!XEh#Nd66dgo~>ND$)u{aw$CajCk-59&k_7;393kn2ggrhcm~S-_qK
zp|EGAv-RD&=dzBf*sgmyP^hf=9Om?M_(~-R!dEKp19>|scQ1Eh%BO~vTJ3;`3XAQ$
zT?Les;_lsmG`msI01;tAGNU7`tpx2ZddM5K1Tn71)U1pG+;LVOq)L}FCjK=4#flr!
zpoe36+l|h)GuK>E`S=t{bxN4Ws#ihe|E5UaL!j_M)^6vxDZ<}TF^z>~GAhVL;JChy
z1bh5Iu*$^wnuyE5%KT0GHX|2j#+R;J)m8)ZE4{bytMn=r37d|>;lc=4S%`=lfl9=}
zgzRqS=a(-#Q~=l8w1~hjk|TKpAcNc{E?#u~HT5}M_u8LPVoS5HZdVM<eQE$C1F@9=
zE|oqGG&?%ayNP0K>^+b5Aom>wS0FZR?E8EL=ED_{RoT;EFc>>P8?XAp9IAU`K5Tb5
zQ4Dd)0RkO)10oN&(cmQ-i6@RcCI{L6E7<?LHQ<Mto6P42!|LvLcjJwobM|xFtb+3n
zrE0qbIH%5R++5bWwTcGbxe25w30pIWhf#!#Se2o&c`Q8?J>u^$YT|uVWKqc3W=mC4
zF6dOgAhWlsn*=?1q@4&zbYH-Z0BjzV#^$$+cw-;DxjolC^xBR~vKw;>uc};jsO~Mc
zppRF*$gAu&tGoO|5m{t799@sBqOVXN1?|KnSUCfKB{Pp`Lhj!TQ+T2_#$)50^s<bi
z9)>x8YF(aSpWXa!`paXa2dG`j%!Po4r0x==Gcqqg07D!KWojAz>&#)w-jNO<kN|_g
z*EflXSawLA4ZX5E#@wuJDFwD`Goe!~KptThC$B-qXJOukd#*4!Bc}Ztq1!!ZN;hH6
z+C}bU5ie(MUeNilVqZ^;61DKDpTu$${W^MSwinM}KkhWZOqs;F_t|y!C;!;=2ud=s
zW2?l$j|h6&^tZ8ZJ+uZr>j<vnv-y%m1ALM3#|D?4OLozXf4D=;NsdLQvia2j<qs>c
zb$hlO5Q_nqc!~HcKgw@nfOpCtQysR3d&<8RXIqri22~vR$A7m$L6KyPa)*#h{5{|A
z*7S6o2#>|J$@k_anU0L;!OckGV}sd7El5^|f_*T=Wj_u^kIe21Dsl0b4Zphuc_ybW
zNP6er4L_iNGy9|#aU;x8k2edzOvZu<O3CO?<ysxhAm0^R@L-DCc+;X0GXUPDX%xyG
z-%Gtw-L{+mp7vuX;WuT~aASq-Qh5%#&RM~@)F(5N_Oq!1{nf(CZ&wJ<XhAzz1ZiS{
zGt6*kzGxwm+(BqV*@cfB2PXnr({;ys^TCx@JV0;OiNIB!3@sIUM?^%_84bRZ3$^(9
zmABuIZgD>As=&F}{T_0dhpD}vl4=wG+RdQ^E6a$#BNudzy<RRr$BYYmu>ZnF9G|#f
z6l(%06t`{S&=$gNC!oYPAoP;8@gu=wHDcHmeUK;fxxsXe9rKEa221-0Fk_+#`n0)@
z_90A!Qj;eQ#}iMScbwskm2MW>Hk4?RoBIQP`nXyAUb+7F+j3w?*p!qT1aQn`)O);r
zYf$H(<$k`VVAyC=5IZgSoW#&zJ=L{$T$+~-SkmZ1)*{q<7J;yum1*sfn#bPC&Z|?N
zN(zia*ypswynqRJNS%2p?}Qoj04M1MR3-F3yl6fYik!$uYveBmYJ6IS?pO>i^Nn(0
zA%+K~gAbEdwdb0P+_GcG*$rSXHw7Wi<+V@u%uPm-GJfoI-5<Ac21%|K%AxxJu;I7*
zr0DB&pye_GL}XxKbBVBvPnGXBQpyD74k9xxaF)Sof%J}$r8-Nw10JtN`mN(%JtF|a
z5-u3PLx@=KQfh;-HM*bzpQgHCcN*J9Uk)yN(c$H^2H6UHmtr|{w^_*H?Cw$I^Ah?p
z2FNrEod)$Q{GI7!GVO}3qTgDj?3YH8qU?r7^z}-?_annlv4^0`d{h&>b@$nN{<{YN
z#A#KljmgGW;>aEdWX}2i!I4u6^r-Xok0CISApB>AVgw=+6mmY4*r|*$9X(UpiYw<&
zkxvTG{#SVx*BQ*fFnF-~bcw`yTr6(hd><ZvJQa$SiMYp*Apv9LM;bXQB`TqBy8v!N
zjLcN-JBvIuqD<mfC7?b6^aj0@P2|=*;+-(9YyjHxkwz(If6sUv#)8=6<-qqNK!T>_
z2zUw!0@9kjW$V28=BoEC>3};|x3U;P4hK#3-FL0h1b9S2RFgArstZ7KH`cuB$UIvs
zn>5@H1m+;5$ytQlk_>Q9iBNa^-nokcqv+!ek9_NmjgXVSJzYdMcs*NRX&MetFm0XX
zVDtWR2~Oi>#;I1$$VG7Sw>V|*>>4H>V1fVvV5WSh6zy)lOGVeg4sB8K`;{3@MQKtS
zyT$Ue{SNz)AIujf&lWHKKW-7KzVh3%v3DL(X_u2AmU4cBWgw`139L5-{x}?S*{;C~
z=SIRByl=8&<X`uuE`hD=AE?tlSsao=Ia|&K=;J&)$;jvv5PTzw!(6w7N{LJp(ckU@
zd*RA5yCdBEif9uaBNvJ!EuUKkdUmcF5_u&9?~|xfzo!!lxtE2nR1t+OtY9oLz`B~>
zXV^r!{dP|%*XI$4ab_EQa@`1lJJDJAOLwt-ssIEpB+14TjiCwu&d)_B!ZcbO8N^3H
zX)w|FpxqDH0qVVjPoY;!2V7^;DQP!LRI*RVL*rNs>ZQAAyw+SIXX+B5mAqfjhX)6v
zu|3wwQLDVEYQ`TV4Mk(SA5NIJi5g!KRns_<fSYO8AzZyOhXeNZBPuJUM9lqX^r=Ar
zIX*#7Z55gL_YTytF}J#K*^AdE8<RbWgLFSrw4-Fz-&qXFRH%#odAh?E*cPhnGb>>F
zEil*Lj_g#urOgB|jU83Oh=^!tn$(CH#CY~I#|g;H@b^+mRCC%j^PHx%|K8bijVDhn
z<>Y7oUoU`BU5|nH&Apvpt`M-!0<esY0{tJ0=6NaiM@zTyuTZjRx*ckC7|>~0kYzy!
zE=xvxk<UfDYXIQ5x=T$ePJ}K+Ap`5}q*F|CR{~M5ZR1N|uPt+TGhQ<!KxOZ``4Yje
zrn{kq*~h332L~tWFU0@FxZ;$e6)z-c)Bjp!@FSX~8`1n$UuZGF89I52VqzXeA>a_V
zF$!z^$+tQRByG_EYS$piWM4@%u}A0I@3P^e>YhgkQ$~wlKm^q}xkRcjufBI%!9i+l
zZF^X4fWPdVJdo`ZY58z-yz(1}`^F_w2I9yvh>X=luB5qy0UsjZl+Bl=>RpEzy;WqL
z`t{+;VY-MuNM%3$!MDr}I~>dD?sUGYA{O-xGgBm`Eg@PYsc)Gm%~ASAROF97fDBJl
zY(`|mrz^T@Pu}&*Tl8{)n@N!~NqWlX6Znnne00#g=+ZK<VR7L@t13L~#Io;ESFWF~
zwKa6_J0<YCw;?WFG5<02B`0npd+yj&_!O|L4H2m6I2QuC$Q9fdKpk`U=>eNB0hLiq
z%#{E0{w-i}#{`5ZMfy?;kls1zXN-7|0*OTgAE$Dw;I;CFPs$3zWcz?3vjQHji;)Mm
zRhA>Sl`M2tL~2L2@wb}mH%@NLQ4H4e`HBqp-%A=m;2!%^2jC^>p@O_R&9NQPT50j{
zIwm8KHMBwa-&RWj6D?d$4pN2IWW|A#@Vi^|?WbfqICVE2;W}^VMUceXNVp1aH0^m#
zsNma@D8e3}61(vKgnwUaw49$0OUSnW3kUJBDc3nFCS|Vy)mDG1C?Q^btBa&>FTWh~
zo`DsefdlgTpezxa{}}>l!ye6>B_Y5+Z#7l`8}R#&lbHpvJ>?E=J*2+f1MM;6ofH7h
zfnR-=8T{ww14#_NHsv&6NqM+W>&tvZC<w?VZj12?W#Y-jOzxbS5_7F?gw(G}5$I`r
z8LPV=>>vQ~bI-3*=l5KBifE5yfIuRNpk`@A_-V`}XdD+SzJ?<w*6PQs^X^RzVDVON
zj%CBhf1ZZ4|Idv@Dj#qU!UIXsqxkx_(pG#>9U^7)7HC945b-kVH<)l9q9P&z1!aRs
zoED=-C}RJ7J{TZO(@7@Z6H;ge6Q-y!kPwJC3ISG<bpG0}|6wqS6a~XMioymkW)eJa
z-L<5oB5>8<UzB+y%Y_xCu_2fX=Ld~@S`Ci?(Pbro@fp^QD=4(d^AiS3Q9X~<I8IKz
z^utzszNA`cn7#1063-WXAd_}Aqx_?{2a$-8rRr{IZqkV=&x%bH$C~iSPwB+C{|-<g
zm`?n|oiPl?!f0sW7bsL@IvpdI0$I5)&qtleeIn2<95hrZG6qM%k4r|lX1U4Jonp+%
zWXge;KQI!$h6U|#H-rAy^z+cQPK_*FDn35n_2{Yb52Ux)EYC!;Me1cu;i}0EP!q7E
z#b=NqcK5aev8~~!l$rNQ-{h!sk5GN!w&w)*W2P%4zxDNpuLFUs28|m@$USZHkHCTQ
z3(=5=?N#yei$-X6Ypap@&vlX%TJ-pCRxfkv^gy4??FEJJu6CuD;C`x>q>iHXpBKQC
zcM;Ka?`OHm81jME2TSRH$Co7rOy|^r-CnsFZvHhTShI4}hu@5%QVafJOD7+*V2l{c
z+!uR7|8Rv)K>DT<sb)vQ8hrf3C`_xY@x~141`G0|KW`KM9}YK$c-dFrRP8UGfJJ~f
zw6T<5fGI*Ml9GO?jon3Nofps?R@-Kz-Tm_XH^X+aT!%-rm0n)t_H0$J)YOGkF;6t0
z@dQYto8j)_xruTXOL}~KEHk(qbw6EDDSdxMQle1DHrF7rg-hi2>1})skV`6F>NkBK
zHH*-GpXCipq<TxG_9r{yM{n?(`KajG_Z$BT%&$@6cpFS<u2)_~liwqVuPtHI_(jCN
zh2tnV=Zk)nqS2wNv;Kq?Kr_8JU>N(00hz>Q!VGXp%x3=i;krN&bfS#1tx!Dhic}Ou
zKyCJxg0l+_LuYezsf<{hmU%e($+=EY3%>lc)l<G2jWkprKqK+vVB&MPiA$0RwYFGw
zDibMGV}~h!{y4)t-%1FR6$=%_-DX6Rpc70GekXG@cBr9<4v(G4!<IQKk}KfdY@_K1
ztyotfk&H((S-mmc4k*CE5}Z>S?`fU29n8U`cp?k1kC3c?<B1=#6yDqb03HFidBlAs
z+Vqyvrd)njxZilnKyNla@jC&UzXBj-eXX-u;h;s)T_K;%b~^rKB4Koy=@n<dj&hI}
zFA;m|<(9?wFZq5ggQuDk#%hztf{z{B<ho4k7E1>PZozS~;`}g!#6fH<=+V<v_h!?P
z$S5hSeV2$9Ew99&*LjN_;@-?IM&91Q$M5idvCvcLTx4hq!r4?@jQV>j_&-CaPeibD
zCBKzQzlW6A`7)4B1W-l=Xnpz!wnuxeOBC>@{rwMSjDg}`uF$DSCn75UeVAM@(CGMI
ziB*IN?K|0E>ph4H3r7F=d10%6z5_|!FfugP0(HQH!Bs2l7d%|$uBrAR_X>Pu8K^(G
zk;FRXqSx(yjsyT;RR@9#81<@+vfx#R`+>&+Xfq;8KP}Z3I8xa}j6_gaH~z1XTSj#L
zYF9m}!|I|~9U98ib1g+9LZBUF3|j(~MT9L7)~c)s4U7UgK+pxS$sU*sQDF^+q~r3-
zg{&!K=nM-2kGIz<o7;d@$gE7w8f9k3TRp1{j|X2X(7#sozmAmyc3i60M|A7iS+R$j
zu>c~j6cVIdhJsUs9AsD;DSawl`e|CUHj4jkWz!_T2ff@Uusz>%^o7d28_0ALa$qr%
zFi!w!R?wU0<b5Sh{6Ds^Ha-lGsB0TXZCNA3m*BGBwH#RR;oR?>RaT+sThJq^>C>41
zT%5d%CnCm_lhcNYjrgl4Fd6)-Wl>Vk0vxOY4AGy{<h`B&(Q?{rP-s1{8%=EZUF10a
z8J+@Y!kG6sh$1fsJZpK1&YSFMoh&VpBeZXXqLIrwhNLW^fK~8%EtLq+Ii5QGxa;yr
zZoP!i(ai>vTbDy@`5!vdAw_(-%Qv>OucW19pev$fP}gfQF%|?^(8c-r6dzvaB7?}R
z^IOY^kYMb^ykD2o(u6-E74=Sroxs6LSRBX0o4dxw^uAL6Xt@vn&sK>;l*$PkRFxgs
zajmj@FtP9VDJ?LVxHp*?-LYw2cLpdGQEjY>{(Dr}Fu_KSh(kDff;tFt1_Bm3?bN-3
zF<*#JcCF07TcPN79n^t$!amR8_?D-inv?0`i9?0@oW{P;f?-Zng|299r|ZKPj<wl0
zqca807Q?2*MsB`J{&X|_l0bT0Y&B6u6}k<$E+u2D31IHSsiNH{;X*FeP)mNPX~RDM
zfHm&mf)H~BdQzB43qEo~&=5ap7?=B8AIt!H)MfZh{{J`qv&jB$`ZwTKsO{un5M`*z
zl+o3tTyK_k`3y?aVhImlMr@cMW|?FlcQ)(3neT9Y_$!m}in^bN1p=L*ULw@4-Rrjr
zps{?gt__@LOps5UxL)hUd^10~L~7>jdC?RPCjiYq;}YvS=1vy(NrA?UYeo9FHF$5V
z9!7YZ%Y|O=Jb#q{eymKR&~pDIOfM{E7m(}i-0uYA;bCk7L7?85xQaPK9$_7QuDjo#
z|DMs~gMzvZ{|{Mj9hODZ^^FRGAl==Fbcl49BHf+RjdX*QNOyO4mvl)>cPQP`-Dlr^
z&vV}Iyyq`2F1TZ6&+J+2R|_@M=PH@Vc{f{j%G9SdVfZPKJc_n5xtqdm+vnyp!rf_<
zw$I~bv_g*KyW8q-3JDZ(qvtc0dOyg|Hatbz3x=tW_C76xm_CN<zY9vZX+hzmlU+-$
zkZnfee?-N!1^eCq5fv}f<-@!2N#;#wF4W*{?Gw{7CYb6z>E5CIX<!g-VTECR;S15}
z+}ICu`2YHfI^3Lu4!@fciP15bb%lqD*j_&eQ=YCt_cJ)|AJ3EVtsnH}31lB^aE7>W
zJ0K=SS$=n#(?mTl+GoQ1b~cMi^@f$t_pBG-INd~ar3HG!@J`{lS398;;ZX-Xj@zkS
zmYoX!`rba?f8~t5;@EEqP1+j5G=$$6>?WJx_4QMsR~rCr4{;=2nBE85fQNChd(ECL
z`T=u3pdpH$%#*+bu&6FU69GLaFlkVI(}?iyt5_gZ<;#YSKiY1iUExR{Sj0}kXzVIo
z^h0T65;n8aei~0b+@5bijHIHc>$_mKLn4HKsR2Vjhg8&bs2~Xwb!HPHdLN0wfK?1L
zIWU)ZZMdk}=>SZm$e`)@hD<*^U4i7wMtQQE8@Vr4@$`5<3?TGkLem0`fa>=Ub;3pO
zJ5n5l?w4;;ynh;0;={ov7X&HBMsPP{e+fat8T-yA^{zg|2jnNdYDJ7S23`ZrCD8kD
zZ~S_{*?xZ%+k4`N#I-7P`*%uCTk6c%5CO^52Os{ds(1zpj+kal;JVBc<W|<mWqQRq
zo}mg3K;poTmYZr^js;jR@-XuO5w`d@)^1~SWlR7x>aW1IAr!&24zCmU0NCWA-`C5z
z8JQqSWdO8DTLY94adxCW3}F3i^T*&Gj}>Muk9DJBsT{UPeM6r~l3^CC7we*-?op$9
zyj^}cCwI?VqkI*GFP7Hr-wNkK6YIH19_rJ>e-iH7W)jqoi8!CtvQ0<f=<5Lw5JjOZ
z_@i(NJRCN<kS{%J{6XK~LM{|K`HmH+xOQS*`Lt$6S18M18W|MhF}Fh)ZZHwM9+HYM
zS+2!-DKw=ty3-R!tF#Gx+@+2{QKQEibp=73mTWt8K>tYxNe*z_oBZH@eMG!rp&?4v
z27lr)B)HFl`Bpkg*#DKp{ng<V{ZCL&XrV#Z@z(TAzb&jYxNy}p%aB7#bTEBJdyR@I
zpa`8F(^uriZ~KRDZ>kWLgiNsW=0;dFhn0aY97cdky}wZ-dd-(3g)T}i;|?y{va0rI
zy*D)fbM8mYfwMV1QcCJfK53>OtI)Rw-LBdl6@TCQ2x{2e+^^(&Tp!s#u2dwF%%iUN
z5!wc<vA$Mvav!M57QK||!&cKKzdvpiyu0S#wf%B2_blW5>QT_<fj2B%g6_H9<2Y7x
zUjLwi_c+Edl2<mv{f;*IYC}0b-}MFdRbkF161zN&ouBHkhHBT11ZmbX%5nYww+
zPoE7R#Kr?*Q0I1}5V{}tcuzF)K`u^NC_|bX^N@eFLMXmEA%ha{%Q?3^WA2z1&SuZE
z(-g~!iQ?D`gLW`c*#2t4R0;%OZ-IS&cGt;M0&5aLY8A$i%q8EKa6=x)ZMY?%5>e)b
zp0@lih+99*3iVA$<^%9XQN`(B$I+BFf8)H7cYG9mqbUldoOU;iaBX(_CS=aO>~z<+
zShWjYzD;xK`vbi3<EkhM?KPZs^n-0j>h~bs1_sT#Pd6+EJNG~TxDtcFtQru^T4d!e
zCUCon$-O;qxYmnD4Yt#mSbd`^ADw3BXxsLuj{*eO(N>MWzmy4@ms{^fe|A63$Bp-<
z*-dVC+$*u&7J%_;JZw;DT2@I#Z*}h>oYd-R?(096|3Y=NChf_^G;7947hISSh4w0F
zTS0Z=W-iiaz1E<EQ~dUC;=gWE8m7uwo1gdxFa!6!V%~eyf17yCj`z0p*~pLt=fJEi
z4kwTo10e=uXX-tpPO~x&gE|?QJQMN9^rglR&hwP(vnZfI2vDd159JOjXiv-3R@tk@
zINKTk?TUZx$X;e>O`&5%2`Sz0;yPJFpEl-{7SLLka5BN5pu#dqp*$kru)rwC;9l{g
z$MmvqU54eS-x_>=RdK8FrNyBFPv;P{4H0#gbFzfh5)QvmXYq~DrZLEIn`qKm!QszU
zv>g=h{BAhp7?L8~=v<Ei_Zrn<7pH*tH`_d`GeQBYA7P$md1*dRZj<mCiC~cj6~syN
z|3r~xB5iymU_@@aIvf1n%0`YU-kftPe$k$6xFsoi9~O0dZ%Yj}-vI^~N$xoWS~$*3
zkw5KrgRncUo@V%!=2j;peYEOnnZ3no+LVgHhfoNKu7X=Ry04fQ0GTCPow4&PIoH(Y
z&CtCdX5swev{Ya>N(VYL(#|h)H9P+sP7G@U5X|ZSkM;ab*&9CG7VWZMru4<%{zzq6
z9?zeAB7=?TUCB(Mo;NuFs?PYgPinMN*X75nb3ISD5(#cY<XCou0ofHJ$9eAyN3xq7
zAkf6IS?<GU_Qy18rx|IK*hKM9lpt=!l;4A=+BZW#%{=y9pP$Or<Mgw=du~%}Y*6mb
z0$qz}-(0Px3hKYSOCiA9BryH-zR!?ov*le33qeH7D*GZv<PXpFkCF-8X!T?~Fm}mg
zg8OyGpqM4gMWgv993AAQHg70jv-bO|br)~UpBOIUP|G2WkBDN(7~^)5I4ketYZMU7
zE8DInJ?IMiSZmfk;OYeI)lplr@p71QQ)1fhLjo_}%;~u3AZUmGM4NYo`GNjEm+tCd
zPOcD6Q@J<1e^QeG$iF)Z%`zij1S=WczUlJ+!0a~}iipz^>#^%fpc0Y(jcBjWQxwpU
zq`Q6&n%qcg{bE~fO8v5DoiDFTKWF**eM#&3Lp(fyx)nb#=WN_mE#(2?V_L!B`LiMV
z%3X@{NC5(BJ)g^XqRac-x(`zYGSh)PHp~4r2TZ)f8Q-%mCqtPBO0$k<EN$_F_Ehr(
z2`-5GZ`GA1G0~YLxVP~bqLx*Ao+}uC-t!|+3LNFN>gdY6YMZTILu(~uaNL;LhY2^W
zBfo*mNNstzsSEyDR7%}{+w{+1zz-is)==^1`Sa&ZSS`O-aA#>T@x!U8ikrS$ZKDv$
zLU3-;jVmLCuH6XS91N?SPCt;sRVrY*fwW#r9sN-=HAsa1Y%O1zj?o_w+bRpg1-m42
zSb(XpQJ+uQ`2wX$xoCq{QTKH9i(4g0B7al!_g6$Ye6OY0d0t;%{Q!;l#b>*5VK~d%
z#1q%5GW4rkv&p21BE?uzPr7!(gMpnM@%z%hL~@hP#}hgbE>;I`6rK)g#kCLLbZC@?
zZT_nQJ)Qt$Cbx?JI!u7=o5v~jPu{4zT77$6$4TV<0}utd9N%r;xAbLPThY7A!&JNu
zXXt=W*_FRq416xIyY%(j-YUawR4`-|AzNeQSrC0(m#+jKJqrwV|G?5bq=FUA_E?L5
z8dotG$trWWIlJ-ktWbX!On3GZ13cz0+T7PytL*<Y;!I#_WE>T>dp@;HigH0x=qLmY
z-EWt#M_XV)?c!e#MjU+xi6KHCBdP@6$j$(&Mi~y8qql@do;^d06?*z#!UpVpPx<r-
zPq9Ha+_x5#Qc`*7a+P>Pz2q$P@R5wxF*y8pblAC?b}1OQGaGOHPeM+}G3D5p`h1>}
zS@yA%{9G)~Hd;jlx_VBv+O3iSx&I<&J{GK?S-D0ioK8gr?PrhKn0E5r^kX=x>QiQo
zX0gKX2`^=Z5GnXumK3AgY=IL=?I0YQ;K3a<47O~Q1&3CF&6kavv)KY)K0*Faob%w8
zlsnR<;aw^=2FF=f2T_q0lLJHa_Vb@K3LPFCu8%AXRd#N1c!wouYHU36`D3paX<8#s
zl23c$%+DhIc{VIIFy=hJSkU*Jy(N4@*gv6(554!#9qs-G4_7qHgj^_<EK$j0_n~u;
zN`UbT`k$EhJNtFx6JkHxk!6W_>JrBUp0&;619vuLx!il57AB2Lrl~dEMYblo_{%v&
z1PJe58_3eaX%Q3N*fT7vSAFjJ7EBA}tt_m6xcJ1Fa*VPRl~LS!>Ja1{^LFxXi}wq3
z1e6#xi+0m!ivaIn@ml844QJB>jC#WRe+XcfPX8$D)zqmXG(9CEd0CCVd0ZS;&wVu2
zg8H(sVQx_op3}gF$TiHG7v51!`l@e*<ho-ih)zgZQ@}<mFaOsfr$Rwrcy5~X1nFY<
z1Z~x?W)0M4>XObeiA%CptWP<ALnf@a<omhjHp2ONjZvM&BE~#M%eu&uZvV7B5HTTs
zmEy;9t!E+TnJB&9I<t&7^cak)7<cq~rccQYwWb!t!wjZq{GY`6=J8F!#bb!hLLwB|
z8fW7CbFWjtf8DFBq`u1r(zD?>@CJOsO$?>#_F1y9Gn6Qf`BSx7v}T1*L?NO&^JXDi
z%KNdgv7{C{ZG&)<###2O`{+T^y@I#3l|$_Qc=IBiC1hsS$UrCze{Yxfr<>+KLR2K*
z8VDf{_jEcr`SUN33`>t<(Ud;(Tqec~B$mOH)}$qQi+OyAH`@BtL&<h|=;dbgQ;5zo
zNxHp%xGTFKz8<MhkRy%G6rwIbUg&FDMIz8AM=M1th6M5a$A1e5fkm`c9*3i6to$f7
zLJ`yoNvGeem1~c2MB`OVbsOB$YLl#U^2sNf$RT*=<D(MU6YGj$Vu;cLpXHT_ga+Wk
zSdo1Mzt#atB0N<h0{{Ih^y#BS{P!k#ftBz6qwxOErxlUF*h{e-(v=O$Q>`5QuwdAg
zbi`WqVIfJ%=Jc14tI&LnL4q`G?*FqH%7vIyMt&!m=Qh~#Y?dw-m5jKPgbwzt;c4VA
zDevw*nJ_L^_Y1S%q+jo$|4FPbC{AeS`5}NtEQ<>6^I4T{lx-Cg{<y?`fm)m$xbPrH
zTa*p|19m6PIgoQecB%gpB_pmIB;|U*wTChyST&n3%1j3%UACiJcx=?_HBJcA$GJdi
z<+=@NX>aI=uTmHlKY5f+BFkoP5~;)8=tS5G7$LHDfLxGdJM-c?&Yrw!RWV#b9qcJB
zP3;>=9KNpUUELF3<^tX#h~ohLla(TRoMy(U{8~?-gxr&6{Ia@^`tRszwj`X_z1$~I
zpI6lo8pXSrp`+1?ocMkk!MI=bZ}NIDdU7Ox7wk})iN<?v3Ot<fx69c)^MrCB2v<A{
z#x|lrHWOikQtC{9H<2}f{A^)B@9K(mu$!0R+rr2YZ9=!mx23IcBGWijE*8DJX5I&!
z$XuxZ3bll~Z%#h#V$sapC}b@Pj6P|~b;0XbI~bZt#jr-d9SXW~ch3JuT$G6N;*c4&
ze(N>X5X<uO;smE8A*^S`3+|#;rpLF1$RW1h`|(Z!u)5Y=ZUijsI1qOqgPsnJlU8Hm
z`Xmvp-&z;(F9*AChray$%cI+$`yI|!ZFcsFg@t1O+ul>ms@lL?HNdMY_l{iUR!11K
zK-+Y2lVAulPA6*MMd^CvXWGsuUwrpxo((ItGtN#k9l3|zAMo|la0$m?L*cs&lZ=%s
zvtW9%aopnu1ILsha?3qH{os@A`A+}9gT|(aij8G#V!1iDUzXjTSCc<pqe)n#*=S4E
zMPqoqwk_Gb#2p^XP)F-z6T{ltG<*mJY%UOfvz^cNe1y~ZaIxNgFF$eL_HJAhH6B;b
z^PR={%|;~RoWteblsHq%UsfwK^*pC+nd3igTL8X~j;0j<Xe}#nYlP2fqwVMg^hCq4
zastJ3J`WqDTgN`nEkwXC1d*CrENl+O8vp!`UIUzegWxY7_=4V^(^p`&Lrj-H9O9O?
z+g5z{S6u9T7LHfl^OitK_B-G;X52NH$;1E|hQ*-Us-7A<1_nA&gRzmB5r?3Un?CE@
z$POZzJj;y~(EI}`*-x9`g>ONBG|`R%;v&F!nZ%+aP;ZVCuPw}hm^%?SX3*0dfamQS
z03gJIL0dZWHj8!U#+$u@sFR9Jr(BoQN`W_4=D`Zs0DzEvBw48Y!+1E=c&Z8q-MVAf
zo23_ojw}2ep!C%oYd3(#h=P#fV@YV=XOW`QPJ1J50oCojmolp|-%_r3V6<XMStv}{
zk)MC=Xx9@Js<WGoXIV&>)r0*#FF<JG+4h+4#U@Qeo1P1g@f$xF=l~&T-vNBgU*aeY
zEL~8T(j0>@gogH5$>I%}!Z5M^nG$_(Fl8{fci#9`i77^WmH$!cJ)>3YP>SeL7(1R(
zlAn6CS8=p|dn_$Cqpa4L!Z}}06nc%6em}Tfo>XjV-mJQ@Pw=QDW2D+bXI|)2f4B2I
zO=!tM()#sd7#=x9x=Bk$XjSD45w^cw)dkD(mJAa5dIn0kVQ?vsLkN*HyFPq-FjY(A
z4`ke%HN}q(NS_Kat_C0ygeEYaYzrVKwX858ck_{BBm9Mc)BbcB($;pQ&qFSe)3m}z
z7eL<*0p8SxZ&t&Un0qrybQ1(j#sZ;(E~ItmUjI(XDvTw74BOEF9>ar><{YqtiD0^!
z6vv9E13s7|7e>U*s^lkcVfGB(Cjkf^{s|}CK*pAI6I3P;=TB9bgDFpmfYzT=&-nqm
zsawcZr=i#M5yq$+$k%N4K=}0aKkZt&%OCTb>@s1Xm=7BK3iT2uJ0*Ri>8;;2_p?V<
zS$WG*a{&ybyk`Pzey&aw$|^=F9n+@{SM7u4k6*3PEi9^IsbA0CoQ8~RwA_}(&0b4K
zFh~_93%(^Oe8RzcovHhw0CDnGbx=xJOD25Y{KR(n=H8aXjtmW6nE?8|<2$$+>ggex
zN#PBkXE5CAZ1#Qa)l5K9G7W7&vPaN3_hy5e8S!yNpX;EkRvP}>Dj4MX4ypyv>D?>-
zr~+nwH*(%*n5BX?%UrcF%i!M7@KBeo(r3AhIRQ|3@{5SAyy7pQ(+F*RJR4>n1`v+)
zp!rIBTUm7<fuBgg>MaN<9|Zi<*7IY>v#E@MLK8sIVWCKScfgFaL3Q#n53cPbLo3rO
z$emJUBp+E7ugNVh1n+UwRQO1~DVxEkA<qgj3h3bP1HgIJ1^5r+FjGqzEdv9s>>aqb
zYwYjBKI{U(uIijR(^C<WB6tgmT$>&BU`Rl<R0qg%FZine&ZHL~s}hI;qy+%o`^!D;
znZF3kID4}Pi1jqbNr~A$KhN(f*tnZ|f)#D-=tadQO=7jN=kA?uQ8f9OxqbxM8h7o3
zLkuJ46g7>*S?|YaGCaH`((22C(?+wI3pMc_@3OepGi{j<)O>xK^V05OPt|%Qg4ZGu
z*s148A40E1+int<5$y=zVW8vvzqZp!pX#T?!^I)Xp3YEa_n{Um%8B&dTFmg#l&Yp^
zvc<phJ5Gy;;&uM3o+=rv{}&38x|>skD*?TXocFl3R)hQGR)l;H$e?1i2%uWiR$H@;
z?-T%C2-EA|ImcaKgbqPa+)jIOx4<gZ11E=5Z}Zo{o|jW;C*qV0&AT}`E`ZB#c0RN)
zN15);5&6oD6Atb^RUW5(qp86(<}HQ-;jD7-05H0&^2|#6Jd!=8{vMu&9n4c1Z?x5u
zBVgV=-?~6X3}v2;?%({0DOX2pBJHFF29a+5*ob&9^=Wn$Itg+<J4wM{dlhv3I|Jm>
z*xkFl#V?<$8jGT4m)W4`;wiLaDY^&8S5RA3OO%_bpD(8565qc{Cx;{UjGCAf!8kbZ
zxz%<UVK0tInPS9p&5PYWYaCLhbL8qR_9^Za%jRBmo0Lr%MYt?`Gr@U$=iiaNorPl%
ziZK30c>@n}qQ40phFzdFPQkgmuf_Y$7#5hs_E!bhbOn*<(&wCU>f(Ejn~Pn`hhQb8
z1jt8>z4AuShbo0qN<8V&cgJW)>nHIjgK(`70gi@AvT+%Lc^o!00z{$1IMVgVP!p!@
zcPD*31eX?`sA)lKi7JR3a60*=2bnX@WDp<~+*-P#_|iKfd#+%z7))2Dr4^mfaNIlB
zT*qbigL3=l$7?P2G71_glByh6b@#ped;oGM&-^?OJ~()<N`%D}hglGs?Wqr|DKA=9
z@+0f%%<gx2@C#DYl6RLF^60KUt}If%8AhzRZ$IIVg-+8l6DP-}e&>>78~n5CG$>b>
zQYDl=yDsO?uwj{vX1dI_BpHJPYdjmfy28^T_=EFX$0L&+I7_}EqWT4NSciz-L^m#u
z<Bs*-KR>6~@ETm+lT}Q3o<mJMT%`nIQ)<kmwi~6@p5_ZltRf0b1(LY$lAj0b?_;Q<
zc)mlc`t9P+!zJ;)e6I&pFiy*+sh4PWt#9rD%-!xB%J9B3Po<cVXHGi$iOaocaEBSO
zC;i(%5}Ob03M7W>Y=mY?SjvbRXyQ(#$hTI_w}QFB$WHHu!Qh&G{~EvUP1y@HtJ)Ec
z0+y^O7VJ=SWs)BszqEko!bnr5xebFt#7r@pbf&=Xv?B$MD)_Z56dcc6y(nluj9KzB
zg&UZrKr`WPvI=%IGiw-EcJ<VsOnFP=id^py#eL~-C0wa{ij}fa#nlB&m}U0GbJo<Q
zqw{199{~Qrp87I%rfxs@8Po}vy%&Dc175#t=xN7Yi7O529~OVSqDRB`?A32NkKipo
zmK~OGsLbSHTw9sH#BH|Jsc+xng%?E*9Im=}__e}=7bHZ6R>TEkvbWgGQLoHMaOl+8
zm6PorsKNi;>-Uv3#|)pzg#m1`MpX)%(*#%i2G$50DLAq2r>6S#RPFN~1mgCpdKB_-
zu%@Am4J<3K18h~hDajaeSXSpcr{BVuq23njY<_kVJ{8jFn)BaNhCd2rsQF&5+Pkl&
zu4lH9pQu|CU)ow;VBi{mcFx09Z#by0i7C5dS<zI0JA&gu2;Z%4(P>_o?i>@Z#>8Y8
zrib87^8BzP2!z_qBM;JMVU}ynUo3)99m|e->l0rU*jIz}=HK!+UpPqfN<H03CHI#0
z-O5VlRectIRFEP-3Fs?_<8jwG;{?0AN4EzS#^eda$&;UM*Tle58WX#YcWG|I{Dk?=
zZ;jtwM_<S@5*HOWHgS8LyKU1hpl@>BRQE9-yRu`=-Qj_EBkaAg4l&v9V~YA>@v*8a
z1paX=-2{@Xwuefe_zZpz1id*vE}zlRx%GSk>bJdNL@mm3!voQ&B1+vW(SydObEy6e
zozGI~D5ts0!yWL;%Mxou-#anL(cl5|oK<aV(y_D>-A<2;c78kV8}e@S>Qa8I+w`cW
z5$*^DFa2G~!5kx@pCIvF_3}xm%J4ye;;9qK^Q-HQlMsf|Vcgr+=YYgDQ7Er;7qM(K
zDFi>4CMYQ(FKF)fu<{YhE4j;8L)nJz^WBKd+9>89{cE|E8mjzUJJ&L>yX6z$)dkfh
z@d9ZhO&xC7aRrZu{m?kCelft$k<Q`4v*G$eWZz?Oid?+H#)`}ojb6NR;(u5;yl8_v
zMq$q0%n|yweRm~ZV~fCu!A(%ItV3k%vRI0)1!=)LJ12@k@~A!|z38VOi1`*k*vsMd
z`+~W2pOQ!E)(87Ix@t)$4F8<<=w_5*tEMjZ+eEj=!DFg-gAvAOg_*^&{BtX+H%pah
z|E^mxm;~***o(b_lvl!x1m@i~JjR!$dnrG!MMMAoenBP5a2#B$!)sRZAfIV3Y>kES
zw2uhCh8(y}LsF-Zxh^aaGO{esXV%gb0_U`UfGEJCE)S$jD3+M{{*QcTA>t0V;Mf-<
z!FoXZ3VFfn-;U=yA87KcxBOQ1%y~6g_0;rfZ0~P&5@PzTC)W2EymVcmUiT060l1iJ
z(&v~GrkCe(+v`D&O8GJfLvWWF#H>GO-|mz~wr$2e|EM<H1fXU@qthR&#=sUr{sPEo
z0gIJa#lcTR8Wwx`MNFuI4^>QuKz(L^O4Y!0haq(lqvLX4uOmpGy6iTW-oW$e8Cxyx
z@%p#itFkmzOyA@3=CzqQo_^OE71e0|>v<(rU8i^?>VvR`OnjXXG`HwemjfxfZG_3i
zTL-_6dX*Q>-kY|#B>&j}?ig0|GQ4lulk+t{#xs3Z$xrD6Qm&c1>|><gc4)?iePPL(
z1?K#p<<R1y>2$6v%Gu|NejJofqr6uMSE*9x!6OY9eB6_(nUthg+20cDc<C#Y6;7@{
z1gB?jGCexLB!l~BrtyQUrgJpUVBeKW>B%8t+s^>=>DLS>o>_ukQ5DXCtkLv=vZjby
zC}C$DSSiGDGYaqd8Ato{9K$b(H-Y9ip4oAKbh7y!hA*-10y!>Kx9e``F<X7k$xO&d
z^jqmPeaR2??KIbaTx{<R`s}~89EET<OO8xDN5`v86u;2K$8-eA-C@t;**%+nh|Dae
z{VRKEYe997@5gwQh{-F*ac%0giyZV&xmFv`DAREtI{i@xR_WoE_%st38zRP&J;$X|
z@>f@rl0$y%YY)ZjwLTrp)rerX({TiJOkt%kUFi#~>HU?@0~bryB-*@JboJT!$mIOe
za=XiJXNVGisuts}*7p|)2!F!S!T*q<C9R<8_o*O1kp$#$r3I`SnBEd(ZXd;f389J9
z`y#~Q%V%~Y>}tFc71kq?f3>eT2=>(*oD7_wjIIw`(#o3mOU|n3h?1f-2p>UB9F_a*
z>R8}e#Xv#GqT}=Xa?;`SB8Tw3N!hZ;O546;V<b1dz~l5(%|WF%m0^E)^+?f|D@r`e
z*u~P#TilFB<cfwRXWm-fzo)4y13pjZq#KPXS5T{ck)(j?a*2aXx>gmm%$rv)c&YCz
zbV@U6*1X9Y?(=d^4~Sm;^`!d@+SiZ0!nnZALWkh{@PvvzbGmB<%6)L}bb1DU9aR%q
zFQ-SO7UqlnJjKQkV9YpBaF(Z3>%4ic-eeI)hY5$PN+DsLKlF#`VfE9|OiA9&A2KFJ
z-t>M6*-F-fexOOuKgfFSD<dfrZqL&c&e}MxefBmPb>T+{fphapIHb&-KE3hK8T~R`
z-cVzVC%$h}QQ3MH!qD+>c2#K9x9FuPpalD>cN?tg8PkIMX&Gt9+DF30x9V1JochyU
zh7cC}NO%q{C`Vij`g`t;5mGbM+jWs{on=zKP|9_c`&Srk$(l}86|Gw1srWKwCVn0A
zrrba0TsdLB;LQPx`EQf>KR;kGeN!yboqHFzZ=+xg)8EER^aUz1(hM{vf0cga2Hylz
zh6g_>&;^nj6)RI|lt{#D6U@lX&B9Rhuz<yPx1ua{_m%S80BG)+<aXV6`}bZcp%J#t
z9<@2zE<HNMFm;5UlibpffCp}p<G53^$3E#WJ_`O>7%iz*DeF2PA_Ie_BsRhB{6I;2
zRs5`I^zGen<>rykpK>WQT^i2a<~>BG31X`~xn{PFOdXL~xlbY5wQ&g^Dc-qQCUds<
zDT$Un=KN!$h{@-?&vz{0t^ePnYXHqH=}av`D$-(p$w|nAg%4~oH*cfoD1!p$?EAz>
z>rB*mZ>2Udy`uPiJjUq{=*#<2xdFu)eN+#{%M}Ib<&P22e)iQ*?oI4V^=;n6*R@;r
zm3W?kp6vUUw<o!%*5~=8)<3fAr3k@Hx$uYkE}!A?&pk%xj12>KGWZCX4|7w_E6PEa
zAgV$7?SbMq(Qkb8_dx069O4DMkk($e*MGd^<_{eMvF}P_^Y4D?3)`dp@Rm6jKe0Sx
zgWVK9R{5*^KE2`MPN{uHQcT0}4Z@+T;rYAg=_RgL;I_;sKdEuP$rBd@fh&Y>sRcNc
zWj!7+xmBiCR2Kuqe89d2S!8b(m=dQC7+QyP2T<KCXN)3c{eenmU+_6kad{MJ;0YES
zDd5@OJR{qZZP{&;zrL9bF^*;(hBK{``liOL{6<psE8W5q_0q7QO^42_r;EbokdTDV
zmHmC2=H@=$doOK}*w1kosw2Uv+`F;rk5c!Rd`aD<gqZKf-T_D4*Uq`lw$#DA4+z2Y
zols_JH95f~pfl6GoR+bg=YE98`_y<do!=+>aZ0ic@eS&;Cj*p`zY_TB+G!d^=00n1
zS|bUwDscH6e-fRO<K0>n>^R@yp@2N(w#ZM$#DgTCy&tWEb-sD4n%P#8_~N#)NXq^;
z3s!9*yPDKjvu5~}Kit#33A)_LNCEo!w;ttvYy=39mxC>h8h)OxW&c@zxUN(hUY5f3
zEdljA4Jn--8jS7zyeE`NS~Va$NnuL&DA0sMVbTszBa;mg!D!*0U3}|*M9un1twiL0
zAar|V?U)GduZBUDVNiq#<Ga2<|LeC-0=&cNvs;3n*WYOT@Zhvo;HOqhWZZ@gRxyfR
zE|7P7a?7}N?3=(?(C~lIa)12y+bP^?<k+uQ)r0y^JKr72SoFxp=f9a#^RY@>T=+*a
z6_oO(9dMbC8u74NxEDk$sWhL$4gXcGPc0^ds#&f;VQq%iuiv(<)zMq{^=iC;IWhcv
z{=)<CMkNA<)Ug2pm<;QXFZLO(-Vv%8!9btC^6kq=cAN5F#ei4Ffo>Sgl4x_M@eY<N
z5pKpBQz;c9Z($)E3W+7oqlu0cIsF8+H~SLWE9SS-``#3+s^@QZlOpblNFzI$KfXK8
z8asq$7s;z1487k|?2|9Ws^}njs?fffxw)(rJEdrj{DiQ#uQ)dKE|=3U;ryCoxOb(}
z$9q%nfu8~!>_ze4I^&KDq;{wh<CE)~`C8~m-p{C1eKa&l8y&Ah2{!L{2!H+8*=9C+
zG1<QVk#xvJP>UP;*V-un4QcLrMm}pP@S~X_K&eg`<u~4OI9iq1#KD1`{4adc7@Sh#
z8MNS(H=1i~vwWGDE=GWoE{2U##M};*V@ic{Jaew<X=bB1a#3CTlc0>thijI?2lIIE
z@!Dx#Rs>1u{iIqMhWLSdxsNAuKk6^8nGv4@Jc%Inf*+b$p$E}S(YFcRctj3VtI80#
zjeHX+@sX*4GBTAiJwMhQV;X7|%|ec336qw^CCA!X5GJh2@!L@;eaKz%ER$ZIAA|+e
z{B~X%+)lNvP%?%yb&BNKr^ycc0>*tE6x65<LChDa)(y$pps~f*c?0b`E3{gWYE$#r
ztqZJM3phLG;a9ogLi)m@ogDZ-Z7DG)v3-rug^0$c{ry;@&$LYk{I@Qv5vsJfcP<al
zef5Wi=}tc}Mw#-a6;5$)+m5&f{T1uo?Pe!falhFa+_?blKSdOrvTYLI(>FOHaF1lI
zqa|f;BQMo!dkBbT+L1Uov(AP88hMOlvklfDm=ntN>5b$zd*jseU+7WW>vE>SwI3q>
zx?XmOWTb=M0!M8@j}k$`uL*&mwQ#%uGK?#IrBo<e-$!G(aDF}}xN??l=+HK!rq{+)
z%Y_J|`Td1b*{$fB*W=}!aS*X{6fj#i1eSlO2ls4Iaq)x;jESH2C;MCyc3zox_WAyt
z(h7E8M;VZUr*&CxY{m(U13Y=Qup*FgTz^lSIoxVJ$9>Rw`mi;0cG-zI5u7%ApE!D1
z#MVWsJoNlH+`Zx(@t;Bff1hw(Ht1tq7z)9dewcDP^uenm;whD-B7>p$JJ;`hhwQBJ
zqQe)@R=$;b0d^aG3*BT8UrYMNxiHsS0H)(}UN}Se_%(oX;D^M`=YjIacq5#%;V-Y#
zaXtv+Wr~Ybj9PD1LF??;0B#RXDPo)!c(4zRd&0<X&!|-$S(5Wgoi#R;ycYRQLyH4T
zK!4^qE2P?6Quw#KT8MXvP$IN;UskDm-qk#GJex=z2~N%w8OEm{!4-oc&=9JWVqV^C
zd>u)k&|KvKGxgu|VT(MgeWSpYWdwmL@8k2y`M6T8cNCeNpam6T1IpXDJ%l~Q&;msa
z2qM@|>eC8Am20|Q5zd>`(om47PYk^z>}I`gZyzsJt7Pyf&QGXm%cu$a1Or_j_JUPu
z?~hseFNzdIm_;_|GNx2o(kF{?mmt_trA5<cOW8Zycg7!5l>2wvd_~;oPAQd|ZenDY
zG+W|S$M{R<oloEp;Z`PLcSG>-g}X<gq}dgPogcDK$9OP;A*&3`hRK%sP<jy>>YQvJ
z7dq2l8viV#NYTl?CB7uee<=lz@QJ@UXgdTl_oz8?5>4y!q<&5}77z=wXWX8EQP4<<
zKa*!jLC7baO38^l6UyR-uWFooYTm(By8rj|7)BFvtCUp=4G|G!#u7QZ`BZ!{G+`)-
zWte;6Pybqi*6ue2dm(~&w$A+<6P4C#<yF%z!L<dG)9WRwb;viQfiWq;nb2iNdQvH!
zoe-XC6>_xGQQ>2o?Unj&$=0Ddwb?i~e<jb#e9@dZtl0cFGoaou-D$Sk?c!({YKtmS
zn<DpPcnmn<;x=<_>GQR;B4jL;==#P|S4f6KD}XSe8)j@=Zaoq?0Vfy>x)sxL;mRUC
zaoDs>gvJaiF!?DV*nM@#w;yZMi#lr{@%FV_?S<HiaDkdKHXZiTg=Sz{xA_V4y2j6v
z8(w9)rwd*YPzorMK-W52>u5!Zmg67(_8}?_DoBnA?Df}w?}p!hiJI<TlMGuLjE;qJ
zuxC;(oosN#$D9lG^3{L@b$VQMmKE8_J3&{F1{4Q2t&yRu%P6_n{?R7TAIysBXL?-)
z=R0d*j+@b=$-|U<3?nK|McUWOWyvt@ahF|}8C+I~y~Qq#QOz#j+Be!w*tFa4ax-S8
zw<g1Ea=27vb(uCg8S}28IO6_Av@y8?gMoTZ@H}rc9v#VrmL!O7X4O;oXyYS(SHaeq
zFuUc|rLI^y1S>#h@u3kaxQ+kh<r-n^KGI%p?4&o(g<;bgZt1MY#EQ>zn~fwFP`w@U
zXx)NLE-P=ZAQWN~`p_^TeQzNMY~j7DcR>zmZmMKnE$t=dX#R#7@<%L$FUZpo%ID*C
zi48JI!dMl<p{|S&sfmcuzYK*-sgr+*iqEu)+fd6wd<tC2^*d#Lh(m`3`(+?`)n-n5
zo?N4!v2c9&x)x=*H@i+{|AR2wcH0XoLyo;-9YR$Sc?JSSCiR$ikQIScB1^(AR`?p@
zmQ+r2$dcCegSwj}9-ELF<;-yGtF*`!AB|c*sr)qyccq&sXy9YEvs1fRwcXuZm2LjW
z1U_4?!|C4xX%cO>*3-xBZfnjc!2(}p=5RVWO~uEg<WjV7W<SE82DDedrdeeUf)8Kk
z_<O2cGH6(iu+tsd=XU&`DiPY@wJ$;IXm!&0`U&d(32X1yTlpla3!efSGiX>peFZuW
zzZ}?}rJ=V9*o#-@D#h$Oi8u7teQ$0Ev8<7OT0$q=F#91l1_O+<o>AF@XDyZ1^d1WF
zTZ^^~#uCE#e-#%z%pwZ(;943mwv0M;tr%7))d&Bb#)R#qMD@ouZ;w^ic%81ncM;X-
z`J=&iUCE`tTjZdt<9o5cmS3rGAsm(<^}7qn-EFvs_vQ5`=O3{kwS&J<BndxVVeP~e
z_;kqmyv2cvy)SoRD#IwnN)@bk7!mq$StDk`?PR7DQJ=J{WY0k_p_w$-WL9g#A%Q$+
z{{99xK2);5;|s4C<PHCGHo%Xn7Zfb1oS8Dd_USKV{2bSoDfo>R2E#sAHBCavBshzN
zDpkm}8i*v|PJOK}$LH~ySN$|dkl%@b7+uh&t8y|{C9zvD#c(L3k!J6s{?KN6b<hXF
z%jnR20kPG=>UL(Nl^t?qC}PSS@Bv3dEJ4?OPyMI?+GlFl{@;<DWB6`<sbI|>KD9x3
z=T=X4aet?hPzcAVn1!XDarHyU!fR|&SI_YJiEP+yZR4|fWIfAGVdlygrw6PpX^knr
zq27)!!h0D1aD43VFG9rkK3ald0dWw@u5rbiewewVm6j6Q)qG;>Kc}uK?1cws^{7n&
z64ovG-?zQp0s5n8?&LJlq$3jAh}pGS!EvKo*r5zt)>8@jh<3ixxvR%`zWKB^DP+vV
zpYr5`Z#-5gQNTGI68*~0UFEp>oB#}FOW%P@+6Ls=WWsy6{5m)Roxp3!@mBcr7`3Lk
zo?}+vjdTMKN+ZDW+8?)Fo6Hv5FPR}X-=e3^{-`jc1gA_B4vNFSyuvuKD34LSRJui^
zTSmk-#;IJLYK2H6q0b&F{sXPKdFCIb0lGRhEArgJyd-OSTiEA@5J4Bw0LWG82(#E|
z%X}+%wYM5rvl$~b1g0d?^#q}E(hsz@j)F6x4G{J{oZ1JVQ{Sv;Iif3lN;{(qm3SWv
z%*ZTFK-UN1#S30!K*&~5Q*+kFb5m!MRwtjAhTwo}CJRD%RVZ$VW7EMAe65(XQIMqm
z2+J-~8<P<|r0sSx?+DL->JZshYmWarM_wmSt@lH)mxbENj;UGh&HV`4KeCYx`qKJo
zuG&yMnaw<gmK>3>F6wm`eOZ-AuXNWP&=+q4vt6Rz(^(?+^->k)1E5Z+pYP^6@&MI1
z`wTobzaQ-Ajtp{oAVoz1x_&6?4FB?QP8O58vNEweKU&~xb?(UknsEMDPXrbHQ42Q>
za@j4kCi@(2Nu;(|lLk!7H<p7p+%hMXo)3p_zl(y4u#OgHz8Xgg9tV|Z@^)Ro<QLv8
zpmPb@Mj_F_4S(D%7aXT#fEQ49LI%MUk6-mWB&rE$+WD1UB@Kz{SDbL~q)}>F*T>H^
zfsv++9T%O`m4MyQp4Id5S^(}!NMkniX@BDuNErr!$B7=Se}+}lR>F1%M94e@V1<;K
z77zup3D#wFl)nqB7DyK~IR3aAf-F>G8K{$*Z|l5AKSaT@P%r5FBX6Sm-1yU4kGpFL
zKAlExfw-&UheKPR;S`~F(3BOS?hwrb@(virJH)YMWjOzJ0m5Bx^ha`r0ZN>=^cmos
z13*A;sR1gx@;~6Y_XgNsTxJ`ny@7pc7&KBwh10P9z$&Q@jOtY2ER<_I&8XB&d?Xwk
z^S*j7+~m6gE^kIq`!z}@v2>*dLuyNC-&g4RLpJRG+)%^*h_`9fdeCY(1E2P!8Nc``
z#}Tt<OG;+vi=@p=SkX&3)Mx#|N?7gCUmbfy|F>O``o7-i8xqe9iz+1{r4M>uF>*7X
zrkE_$^TGUxZeQ)L|Js};D+w}4)*dJ*2O#2$<BsR{WpH_L=o~;aBE7hJ%V%@{Ve(-<
z-;h4v1VJX9akTCJdOsEiXyk?Vo5V8+e{BS0APr}X4f7%qyhmJ==HIEipMbW&cJypc
z+aY|lr)E~u^0j1e5ST@~5=Sn0ebahAiW!$rU)ib%E>6ME&6snO@>$Op#J-OZ9|En}
zMdDY3z_C71@EAzGR!RasU;+A6R^t#_B#?lTJtI1J4|1fKfO>#4@MR997l=dpbD-ya
zn|~xEIIQ|sv>1;?|LZa!1RLn^L;68LZ29kpHE=KB#FYWiSsr38u1y14cISe5;3571
zD{r&fG^gtU>8Y6f;ZQmBQlIZTo`*THL4rM43IegLSD`NrY)#zc6|rFS6b3mL3IhEk
z6_T*yim|Zrc@jh<CTc_|b%Q7t>ofyP`et?!oDeP%RSp?$sFz>#RI{az`egB$1YSIZ
z_}rZ$rial$c60*YF>#qV#LPo-=7=N)cK(9FbG@+`iDNG+!4*5I%llm>DTe9*PoQ@|
z)}@@q=>B210pq*)$f)amYb?A}O0D+d1?7f>n6UEr$9G*RHGKq@^NHSRMWBU}yBGao
zGE)wEBOM5ZpK4VjK`a1n^|o}S9o&waK!_dZ>av|Y1C6<UT5%G@vRN%+;Kcf)@C>9i
zRKL;379M9pj^sQ0t|0@M!+&}N1O+fx;U^=<^xk59DJ>g_WlhpRsiH#z`$|XBnFxW@
z@bkmiGagA=&-rOTDQ76cPH9DR2}n2-cr!xh%j*2=UB8i+Nq#k8jZ$U?zh9hyg+GuE
z)Wl~qi<0quIR+#=-Ju6RSA!Mpx?fYz*KwDLG35V+(gi8M$L%d^O~5>)iJnm8w)OYe
z>)-7`hY8h3sbj(EWyg%d3=1n<IVkD4x`y5LPDws)epks?r!CWPnk?O*d;@=0H(UB(
z3me_G&(Lys7xVKF>XS8;Pm9Jn<SxUG?DVpo9;zjkPB^k`jP2#|rt)~WH@Wq=W;7mB
z93Bu(RhHVRKujKA8V|fW`mGfq_~hDU|FvF!C*7r?@)7vCiZAx2`ms>=*9Y2xp<C;H
z+&lh#5c9$Q!Pi_<H(|#=-jDfWobm$Gb&dU}Lv=6Cuv#M<4sIDWWXR>~S;4!BNUIN(
zeB=q~Kqxu{WFQ*ipG;{D3h3;-NUC5yyXi}+>uQMXtwlD-|Gc|A*im74+>Dho*=cjX
z9Gc|1&edkeu2QdhMo=P#r@_pZqS{&F0!o`ru%BWxkkFUcDYg&bV3NIjCh3oN@bvTy
z!QJ^k*t{7$R9_aseaSmS@|D35XEaRb`dwc*Qnc-`92m4TN9;$0^8+0pW9|a7U1~Sn
zD9msY2`svtFKB#BX6(hSLPNog>ko(pIJR%=v&GfE=wm!|%|uI8E!lZ@uXs0cK}sSm
zq18+D^t=+C@{4^1#+`>9j$5sJCRU5izO((4y_)|04VhcBeV$wU(L`qk80in)B3)QT
zih9w&DD?~G0y3C9F2Xd!tq%)oKjD=pnJoXMGUkIFiFi?=4C>`6ACQ@}{g4e?flYvG
z-#2_si!3cDa?;27awTX|NKoo!h|1I5JRvMB&<`Y_C(!k2ilqn<1QutnCZv5E8Wf+%
zdS>VirFvb8-p-8-TVV8=f35lSw|SV4sI{2)ze)<3yz@uC)FDL`P7lL0+?!j!ZlSfi
z16#6|!-f{5X<s}|A{dvsa~W$P=GjqAy6Nk;Ws1JPQX6VSbVD$lydK(gb{{I|H$*6B
zw?vvd-_)x&DrVE@==$<~=pvQg;dnav4!Qi|BSGmG8I7Vy>A}SI(MH!x%k6XS%dPv$
zLaCLA+~GTfh~t-2E;oyj@lb+@4`@ku{*};E(Mg(t%us;i^gIvu+B*hPn%58NFn@ry
z!QFS0o{zV}5PwiE-r_rTc$FASLQG}H6OSckzeRz{nszu<R{PhN9PRjA+LW8eAbrBg
z^Mm9;7X0IR=5t)4uKQp`da?9P8=6_@L7#=+N-t<94I=*LTeIz?nX5`u_Y9_CSAU5p
z^g?AA8}9!A?M(r~0Ft@W4HbAEFQ(ENAYCq~ykMa&A_%0b-&;%}P}Y?o9-qDD?-%N@
zy3`bL-{TvbXm)>1!J}uhY;|8Wss|>QyZr4P_X&Km-~nTh_l5fTmc<>P_F>ZLTfm++
zbTamBxM9hPo_;@V-be7l)=m7NisNBl6Ab)XGAj)>^?C@!K?^XvOM+(KmE)jdNOx6&
zEfHgrVlT1Cm)1_Jl3!u<7yLRR^AIZdDGyR~O`^}cF1((smx61$6`V4@shi1jHmNVG
zpJrGv+B>cJ64@@JOG2;lF8q6>hWe`A_m4t-DPLcVDl-(wW{a&x$%Jj(Du@#sw_}2N
z**Rq(x&)uJcKrZfDIbOJM5rsJYtCEM&0rsA3;8q;{XsTqC;*c^;ut*ZY91$a!6y2P
zZtYhZ?#@jCi%Kwd3p@m1wXBJCM}fa&IFKSPV?7aGr5?u_UE><eHOhYY5h<QwYF?|m
zZP%W*r$8gl)tD>&#Y^B2kt%Y_t@=+DE1o18y=fEcCp|mYLI%=RB~oTHf}ydwNJp>`
z3_sxKZ{D4SGFzAy<2TSmjG=r-e8?4#oJj22YRCynwZEnNiQ&)Aet3yEvtsSmSwE{S
z#nirO@SfPVW-xMVW==OAPz`fL&pgh%X7rAK8=t+eRQ4rwpzXwo^>Nj<`WTob;EJYR
z@90t2wqtg{M<TL0$#Rop-MA+AddJYz7@jcGdE(PtQV|J)KY0&f`5{Z{8eFnZX}WoK
z{Thq5DR<6I7s@K9kRN^DV<9d5j)3FB(Cs`ua<hp!F+r}+B@rrACMrAupWd>fruAY9
zRRN~Z8i_d}@q@&`JsKB(H}}vdbA1bJNnt}@=W0*jQ#9tN>2qBd_mz7Ig@}NIT3}Z_
zZ3R#+WYX!RMP=F515zin2dS5y=KYUslEbME5xw8@y^=Zl9O)K6HVJ2Mq*#qS?q5hm
z(rUP6#IYBcu@#b9$2X*z7L@CHXseNpW5572UO9&Y@r&1P$t!^$BB%O%g5N4oJGVLM
z?ssOQ#6<=n$J;u6HC*kLe#%GiLR$3{))47<!&t1>C@)adq8>>?H>kQ$Lc&#hSaZ(A
z<D!=Sn3g>0I%&8gCp_xIKe2VLojsoP=JK}o^UPk|S3b&_07AF3kjO+2q6fsUv}6te
z9UPdD0&npZH2sC10*}ic9b(&UiL|a&j^jalRayJ2`}Xp38vSNlm!4O)K<~@u0xVQB
z0?Wo~Ry95V0K+b+y?jReV=NPR7gyM~Pc}VQ=qev%u2_yO-~J5=pMvv`Yi>EwH<5wA
zu-E=wberUru2G514k_RN(LzZ|Fcu|Fi`aUcXXLN9_sydvwu{-C{5&)_y>wnhG1DcP
z-R7)c92Ny_R`?)U*4iV;0#)^9)ym`O_I^#+DkhKZb?3^{Y}%ssyzO^E0{{5@0;T3C
zQzt~^B#Lad@jI%`gS$H&O>JedIIbI9miPK5&yVMLJgOOUL=WZy|G=oX(6`L@WjR0f
zM9Jlr&_*6o#`cx1el#vxlo}s5+}2mhT!!-Vs@EgJ*(cDOkS0hzS*JArp+N=Dqg!04
zb9S}<+C{}J!>2?03kJyk`#+64_>B%7ilK}>Nu{SwGgwgXg8P5{LlF^75|xUm%R67O
z|NTnvKKeG~`X5oRr;@a!bAJD)HU7VUSOvDp3#@<q|G(1BR|r|D!eDWx^(&9-|N2JI
zh)mF2X=*|1)vJGA*lqoPzAYdtT1CE^t9`yrQZs|g{J%d7@^zrZvrM*sB_ndd*xmlG
z*9noaG@=U~k7HNP`}6%5Q2O`G`_Df*p!&}57#@8lWCWDR|9WE-D9@4M3}@I=Q1%#u
z84G74U=&B4`INoP2YVZ`anOi@@uQ&-FEONfu4Xt}7UEt+`tNbkVfsdm4Bh|Mq>w`<
zp3&8P%4_pHaT3Spq%wIu6LevKI1&@@Arp(7kjeCzn50WOXyrCgqXJCJPS)Jsq!4T_
zjF@*eU{%2DZh#_`fmO>tdZHN~bWOrQ>-<?#55PKkAYZ$Dm`oG#zn=+|b|1)$YR|lz
zw}+HSeRL>YIvS#+({(A+d6Xq&c1dcX%6S5QC8<!Y$d<+j^8AX*tbDJb`j_31qj?f(
zncu3ubkgr`wAyugdU}AsvP?nGZgT_{M*if+1B|TwwT?B^ul<5M<$n$l>)#=+rO?Z@
zv9-oZ-?g_fOb1_;8ksOwlQ6t(djY4y)A_S{cUi$`-G%*ZvXFRKQJ_fYs;-n)so)dH
z<uAtkOhQ@pi*<NlJX^G9kuA~=)%m}x(D@K1G+;7v?=%!&_=k8wfu~5GzXvBkh5&9V
z(7={Yg;(;+YO5D-w2R<z(Tz0i0bFs{L)SM_nLwc0R>je>sxBDF@nj-<akNSb*+%}m
zx=`X|M(@RQT7PJ}Sxo0y?ES7S$?_a`R)C_7R9ecHL`X^&{7aGS{ozlMM2T-4zC{+j
zcr1NCbDls3@aIHJr*SE8%<KE}^y+z@1|k7{KF5MlEYEGZ8-Qt|7Kk>$#AN|3J<zeY
ztUSs_0KN*JMZeRk7t(P;r2J=d9ZyaTHBIqQPg@VN4xv7Eg#Yu^&T*j`<3|WuLLi-6
z2Jp{9q{C$$Pj@xl2=r_p1?Gk;+O9MYAPE5rbge_tg(k(lh9A?9&nzx!($wl>p8JQS
z)CTvxO7e~|C^lyFPo8$_&-M%JS<Cr44pqBysKSdfvS(3eh_O(sKH0_pnrt%P8Ymu*
zYwL02o{hd^T8A{g|8%1OZW{VVXp<DpcmKEcZL1(~6lmyLQbRfdNK}<oFO4msagH+j
zS&iodkkk$Xa3FPTXmX=3Jho}!I81>Fa(G2ebkYCsr2_5C5hf&MTzf|*+{Xn3^V03Y
zYf0}6zabH@eTopcGvnUP^4~9Il{*7!V*b6^a;?%+I`IsH=6K!qAswfMl=0sP9uCKK
z-tm8%?IsP{)dUU`ac)lm^X}|ep60g0KyLel{Gl|63#Xtgo|ADTF4E1;jzt6=Vz)HT
z4#Y{pWq3~lv0yxaF7DSTCHnuIb#T4O<N7{7t~|4c!jXnmyQ1BE@dP^jp^t>tbwZ3>
zY)Wc&DFW|bFAP-Ou)D5$%61#jy?TBSpJCO(4tu_xV{kJSD`!|#*+k`4Mx2deR4t8x
z(=8p&P2Z<|f1qv<!e2C7QLX2CCP%yNQS#ALbYkn<KCe!b9W9QxK&|vE8Xxv8v#*Y3
zvYom>ha70G3i^No?Jr%hm%1w;1f=W%pc8Ra3F;ys`-2lSTV4UQvf@N`0m}eGX;|rn
z1ldBo;Sk|`&2CO8f>J4PoA5z`EcdM&tA9x`HHndU-+Ts6l~xT=NMoS^tPZB7-4|7$
zW7Pr{32Y6ZXg3C>eyQk`{0cb_pz5U840=ldu%q?|MBo7dvFuCZw6D);53h`?;A8%0
zlPn^G(Ntpk0b0S4v$YU77M7Pr^HrZ4c9BcdDUSc8X9AD~UlM`>rGaea2IQA#ql!|D
zRF&;_B>)%UtpWPlVUQxh1Zkir6t6)J6~-pt6*D2%mn;nx5Tk8+NI{t}^i{Pw9(8L#
ziDycu9JS&F7Ix%x{N(wz)0cQ-$30yY_zI64Rb5iq-*lIWXiXQsnz!fPDmAJrbWJ<P
z(X}p>vyUGv@=Y7*_6n71-%4YTRK^FbNI&jpMPS0&=ui<}RmMc5cx%Ncbwy5F5JzX0
zt5NY*Xi<@RDp+nY$X@<zskfiW^_CeqaCn?{T1qJ%(&WB2Y?!v;{<$?iy~6!Y{1PNC
zPyTX-pm-0P)PlvLLk}%4QTgN{tRV!6_`OR5O+&sse-rlQy=}vS$r;420IVWRV|?df
zf9s}XyD!Q?KwyQe$1P@oPBm6K97oFM!Bs(Eq&)+n&cEYAz!jhkK_3Qy(@9?vWs789
z{S<;*0fb=)h`Bg#_e(1S+TF~`j<u<k>Z)#tfEx;=ye(aE3<VFnsSlY`oBen915JT7
z84God28r)Pt~jC(4N^W-erFFrY;8%cDx3G|l<<IpMR1U=w9J4sEZT*{^F&yQ<;Kht
zWF)_yjRP*H&|yjG%ysSSq}VB0UYV1g4^Nh#UNOYuwvjquBvQ(al=Ysyqt+&<2&E7f
z!XG%lv2PvlptLmpswr4d`Yd)mN>O@Ip*Z|~z<O+U$b?>Puxm+qP!02gF1{+g_;Z;g
zheomF(`CqAa1pjKZu*eZ8FR_z_1sttQ!}IX*b&9BbZLxM{ZSvi>Y%fw>Y!KU?2u_L
z=5qpxQ>c(rhxjSkmU(>{jmg$><akn?4wIb}uOvTK5G1irR`AhL!?x{mpY0OBHi#&)
z%Xx^*tyNW6ivT}hROkM2`bAQlumCwOLdSXG!^xn~E!i_<vrfM{qyB%oy7qXc+xIUf
zB&H-$ER>w3$Z0l*q*N%0ltYmmlJmoy=8(v-axSNkLq>&ih(-?6Cd!EL*fh+s%|_cY
z+xMfs&+qB?{rtJlXRrIZ@B6wB*ZX>3*9FyP36KH5brzuNgR)}UG$3{RRNv<rU4X5y
zgz>Ja&C!YNVgqCtZ_8cMM%QQh9?tn&wN}3~Yapc?K9)!fK5M2AK6`c|J^Y`;4cK!K
zf>{da>>#_y;ls@ZcpDjyR52iBry>A$JOX^R6EljH79JBoJlIOsSJkaB0cBYYRU_6+
zrc?${erw#a$ob2Io%6m;m#?Kmb<7Iitxq&;M;$ox$!fKLaYM*%$2<HROx5lS(l*9l
zR*nq4I%?;=?EWy*?=0*fc;OQ2YP3u3+xs=N)mM4ur1k07w7&fxct4v}-tjHl(Oc@2
zY>OJW%zZeBx!=Zo>S!)nmlKjx^XRF!h*!JKZJ!DiV_<-lN_F~2R1%wY46vifEdD(_
zl0Ut2jno7Ygq=TUDh#qj4~)W@M7{<`qS*Qq0=rjD{v58f?P-jZ*x?>N*b#^0FyL%4
zP}OtYH0G<+M9lK_@X=j4vDX56B-&dPH$>%pn@;7~qiucGbjJVKt+B?gtXaF#k(k?!
zYeS2hlg6$;gmQ#Kd2dw{*vpyh;Si68mwbl!jfb_CH<fc&^JGjtPjA0nq8gO-cNR2Z
zgBK=M5m9btB48qw-*7ZCIGbICKHW56cfwqSFg}!W`9oMC>UgxuUjB#H7WdEDE%9GZ
zRgrh8c|R2KO65xA^}nmEC9*;`>^?{cjI7{q2rpo57aK}%G<KvMUGqr+5q2?dp3zs=
z^X^*v7-iI!Aa%3uuO98U^}14Pz2rk4iQiK+T2Du>*SO?=Bl-rAjtMLSRFJArbG|HL
zHN`yoo3R_h({j^_9?6#h^I1B|#zrZ8ZGIY6pO+{sd@u1#rlJs%m6&u4b@bfCdrc5T
z{C(0cZ$8z#6+gcR@A~updK4}ngBk`Z{ccrs@OToQFsQm$wBjS(B=@kNU+aQmW*63p
zU-*j16>lJ6(1|peoz!h6`uULAsw<1=2gSRyqZ?XHqt7BSq1>9%jb1w%PQ)%CY@Ze^
z!EPf-qg|Cods)sbfvS~#Q`H)naWYuEq~f%;^l{pN?^bG6oZMob1}#^>cE8L-%1R$+
z1oZIrM#YKqu|tQf7Ve&_wyac#e#zjAJ~n&LyAd78#rigC8RWDY0YO?If!p9M_<$8k
zOzKX%A0#eHw!apT@Q}FCS?@QwtOU6W+`zBhX`a4!s#f;YSjTmM1E9SkBjDekrlgxE
z<|NMWp3<FDG)<&@$hLQ0==nA)vTmeM&~>r}ec@9!5-xB#)^3t70NiaP#xQ<H;0sc6
zb|1NNG53mMpB~e_<UmQ-WFSsmaf#X^eKU5S&vwY&ui)~6mjDksKQIWZTsf9V033r8
zu#=<+!xuA3lw0~}UEK1QjY`Wi?JSYcqnX_P2zkFK#rla1^Yn=p%QwzW?tX<GK~qKJ
zZ&db-8aewwk7}y5B=waz)wmN4<jKoV(`TQ%LSNWcRm{J`zlfHv?`rkd8b9rh-zLwE
zFwUOzzdKZA4(hr4fTBvCA=BrSMD}tjNApqY=QEDH8PDbl20wqr<8+pHZ`x4Dq7d^o
zGq~o*%EXi7y7+qPTVn62t|@@?QUBZlZUb}po))wFJ>TcCteT6NHed1-KTRF6E>Z(L
z73fGHZuZ2;B9Kx-9Cci_icoEPgieVmf?eP2z<_!ZZLA1s{-WSnj#W^f;`2ZBEaBj%
zi(gUiaBl0$9*CngGe8`N&TfzUFF&|&IR@6J`2wYnJ23T9hhnVnL<JV@B8zIMPV>Dt
zHC1~Te=Y<FR@70p&{4$AI%E~9SrnO`E{4TxvisgCS3uPFBrL6o_;f>F)MVS*RgalE
zrG|dWuJ%xV0uDYlAORl1UhNXsFR)lraZuG%sE;*BZD3$@GLoU}a25DMj-&ori5$XW
zvb}@0VpJJNuIbD8y#&<o`U=Cl{j&k6DYAUY5)l-BNv}U_dL?U#zAY;Ir}pqS_UB6-
zas~T??g;b;D)CHK(dGSD^sGV_N@WesgD(I5Ui<5(_@lZT1CsZC{&m%_6nEjkzx$H!
zp4gVdPqgrS`l<Oxo^5Nn@3jb%Xm4z*ruVu(>%~6yZknV8&Qup_I5-<rQ$Wed$};0&
z&JxPG_O`)HS5%21l;{b&QY*#sXROLYqU8dI)a9K*up_zzNp)$2LC)l}LciCM&LB)T
zq9y!Cp@wiIC7?L$ulQH2$gCP2-9!GmW8uQW)Y-h;ZDQC9(0Osy0G+*a9t5%kC(X$v
zYm0sH+wARN$9GIV67k>br*u&F^`+I4kG}+zUbGIP<2rBWXza&j9U6gNq-$IkZ@LlF
z6Tz)To1o+z934V@;y5~cP{kq=1KDg|#e_ZwPk1M)AZF@9d<*ITPT-5+9CxIWKetQG
z(=nZ=IUd=Pd;_O%{_&3s;~tRd37;uVyE^Dd87^3Y8M0lJroAH(^HSBi_g-N7`&C`V
zvlddBnR7|jDXNAV<HogC8@vsHv%`-Ag<gpg1!b`oDaeYw_8>-J?QwOF-4egBnqRX}
zQYt3+6@`G?x6p<SqR<a5rkos}hgjml5>uSD#BbMRU5@Xl=8X&S$O-pDwU-(c0=6C1
zK+|H|b$iR43YlZ>NiV%kl;m#8Lj~MD=Whq<0f}s{(wYhgUZP{>BY*VaAkYU@3dXw*
z`ntzEb0(hqrH?A)l*xN610^%-^fB^e0_|xsCQ#TqTiw3+QVuLu?W#^Blj@jb<)dQU
z|J(*ZCEtvQZMjpgde>HmUzj=u$`i#{UhS`O&oq5^C+oQNtDBRhm;VJRfFN7BCpw!c
zuP+sa`#MA*mecB5Uz`e+GBh`OC&XH?SH9H=-RnR9>XY`B8p@%s9xK)L-*=?xRm|9Y
zV^j^z-oZJpV3z{QND3G0db*d)C!ru4ZDy?h@{VU^h%x?<e;RsuP)@A=N*DS^L7#1`
z&k_lH40qG|Ew;hp;GE|BHj|hq@c)OOh-`gr*0+a#Ro<Gv&YUf6sUu*YDmk)h*Dj}|
z0RO|{{;heA!rMI7$Gq)7es*w7K{Sg0yLA4mnZlyH&3Eq3x^=SJNtXZ4>u>QOd$wKe
z)Ki1N=l#U`|M%sI3?6*lbF)=XIy&f@>p%VZuLRV1g_te`Gm=e&n{6S*{|5wYHS4({
z=(<*Qitqma{J!;7$K*+#*|Od_=LL36<iQM6yp^_*Tkp>6wqnp>qrrvE$?SbO-wzME
z%{wUM2hyO5aO<v?L?VP41ZK5MFkGNWElxLcKczT~`_r)VCO;g^QiwYFS3cIwe+Nr-
z#ya<{(cHT}f<i$=o;@w}BLFeY0-5M^dUz`Y!(n~mcoXEy_JTZPnC^*?eV}W`)RcpR
zIIqNTRth6FK98rCJ_mLAzOGy;L$QBnMYZPLF}91B+sqvwqU!>R3&clnUPE&5_{?72
zUg@GTggytvy~{&M?tp(shjDJO@2~297^ItW)XAHqdMJ&bq&=uaYr-+u&Y-E$H7Ns*
z;P48Ni+UI+XxHV85-UjxD~o=pog8R#@O;d+PKg+(+alsfs#0i)FC{57uQfP%3|IAy
zRsMmIvU=<Wr3T<I$01vVI+P$=Lb0W4y2*a;?+OjI1l@Z3EGm*1?nq<XVzj8MaY2xj
zwy3=9bodMcK?So(oU?HPdQ>Qr-W~J^%wm8WSJ@*H<68oUMkBe+2fkC_$a@!#vGadu
zH?mhJ>EzK<E!k-@hHrAVPllT)l6T5?gEG7(1;l7=0W_iR8GRY;^6QJ5UW<ml(GLP;
zr%V=SC(4|7*Pn)P3d}6}@OUer)s`dQI6nIa3CvZ}Gk}Wo)#3f|Vu&@fHY(C{kyTlT
zXXcStKBseMF_JGxZuxj|@`6-on=ftYEw@b_f=Ln@&?Lxufmv?~iu9}@Y(AjN(nv17
zU+>f^d$jQHZ*D@bGAxMZNvoAi4Sq0HPS2yD>GkTU#!G)Hq1b1}ag)=B+k7BD5ihju
zCSSN`n`l6`Dn!hbsCBo^T}TwAd+MBir%QJ*7WW}kdc0f#Gz40l!&^PcSU8|PfKizf
zEdq<I$03P>QN&Kl;qMuM7WC3{n<BgE#l3`m+#Bo+hD#gQj3dOl<&1-CV7f-LMQH55
zmy?CC0-d;Xt1)+gOW^Bh(y<k6!U;h%fS8<Xkb{_a?N^*TP%&JV)>cnUy}r0!pdajP
z^|&?bdX!v6D%>yk1d*~5Vc3XE&@aq2qpW0#auz~^CV$O0tE!xA4bM6~TIbYY(vN}k
zrv2=NK)})woc6S#Rm`AQ?x#=8E~ezKq4Gd-G4xzmQ7;q!lD&)M_vq<nlv)4VuY+VV
z13cJ-<|gTfw!{TZ<rhT2yf}g<*1f-V-#@XEyc5}JdQO_BS-&kc2n!{|#wx;@7jub?
z<<({}`fJ@&vMgsSrx)P!Ui(?moyQ51mFqvFi~QhB0ZstdZjMgQ0VC(=Zl|N!(=A7s
z4%35h@~71e{JjiKR^W@=LHeuvV-sraiE+pa%$<N*(#ch8U+<GOdyDrWCwey$Z{?Gp
ztQIw*_ZxZaq=ss1brLcGIy+E)VIUIL#v_k5f&P#Tozevv&{%Zs!f8$J4Mt;Va^Mil
zstfb_F#6yNjD9S~#D08B{YV_u9TUFmEIjz|5CAIg7uGupqb`p!*c<yu!Kpgz3m85(
zA-5fJ#dDLGeHkChG=gP;>4yB?WBN6a-OMNM*N}dVBMk5;nE?l^u%u1s3w!E7xtc9Z
z&!oD9=Ru_LdTwp8?RNDv6YFu8d7vnO$i~@mDYM)`hNP~q7<;xDEk*&)P6?2~QNJa6
zY%uYYt_vh(c=FdQDGtts-D9qA<aISqlXf!Bzz9pmW>gxRNm{Y}&YI!pmYPdX9>8_L
z+a>9YE0f4GChO#sJR&2taK00_sUIX=$zcE9c0xJQNS2XcYQ(cZC<0XER^hmMz-+Ld
z<-ww2>y9?N4#r!J<Dw2yV|n>O330@DKCenT6ylkrT-F}>*4gSxk)k!3HGp#o7FkV%
zF)KB9r@wH^!$?5eq122GO=QLlMSPGeR}7DY0l69<iMXc9!I}~1`N|9z7L>Lr-K@bK
z`^Ne-hf()zQ@7p-GhH47an}MrxXCap!ET6iY#5#4GKpMpt0e6nbV-G4arRdF{4~VO
zx))_a7B{;L>y$apR?C6q<N$RJd6fFu0&*q|4gGNFyt($C8yqNDj-dLeLJLdhL*X-X
z1u}vROl4iequ`aPHrSLOB_ErF3!BW5G|72jE$u`{76Ru5W;3^lU>U(-x6C0bD4c7#
z>3lQCb7qQ>v^G&Rx~V73_%YEf4bNhd+}w)3cvbCZFQs?6N&)|q9CW$BA?8!H6t=Wd
zb!D@weTf360ILkMxk5QJl*%2hrYHzA(5(@t-vV$CSX$5D^#|{>M*>69R%UE{H|KsC
zgW$Z{1L2;6-JBAHg%QC^uTpfj5!{|w!7Mv;A(0p*%!x%4t_E;l*OL7J0^-*^QgSSu
z!TwUd$+`uV^Z(vSNr^0kip<`TR&+ApgkA0Kgh%bex^0PvY3OVa-x@^LtL)%Z^cjtT
zjC@3@%Jilg<l$!oHycZMMndCTZv)skV5E4gbK$6wTRT&YR7gC)bH?22WSyyN+<yS>
CLOk05

literal 0
HcmV?d00001

diff --git a/docs/img/pycharm-with-pyspark2.png b/docs/img/pycharm-with-pyspark2.png
new file mode 100644
index 0000000000000000000000000000000000000000..8acefc47476c90437b3a10d9f1a4e5368b2d74d8
GIT binary patch
literal 84813
zcmeFZWmsIxwl0jj26qSq2yVe$f(H!}q;W!UcL>3q;1EKiA-H=(;~F3V0>K@ELu1`Y
zb31G8bN4>yuKRpHzQ31#cxH7~jZvdU)vOxzzH@f0_A6z49BLdSBqV%Q6-8YnBs5MW
zBorVP1|mmURvrrp31`wlK|xzpL4i@*!`05g$rcGoB{m}qQ(tfY$@|B^83lP&`QJnV
z%Hqf+5%jYAul41hsIW7!pb>XAw0$5WBd&^3({b}gx8aUqZ&9khK{r-R-?Ng_*Iz}a
zxAa}}Km9DScW>V<=xYauJ$mltw<D$4dk$=B7*W0F(y3t)_#RCROd08UAUKmHv<Q=^
z+j1+lGVt?bm$iBG_*!}u^?O9Ov((4KHQ=LR;Tcqy2TA)$-*d_5BDfpEHd0%0&)Sg+
zu9<xLmD)G4Ph_)M<!}_Cy=Y~c=ikxFGz6$MZWWbppK72YX>Gc(I3T4^#(e%btf>G=
z0TH*5oku*47{m(nK&Pee8zyQQ8ym}CXT$o$1RwE)DC*jc0X@DvJPVDJl!;Db-7ELh
z#97s1C$sk;e*_v{fS+;d33o+fH>jYfYO@s8k32|?j1YWmWVdzsC4}pr22bNkK_Ao*
zG(7kuIw6?^h>;Zbbonh*hJ*SGBH4cQ+hJGzq4<k&D&6xn&exmcxpql5b^zm-3Cld|
zjXN-0v>)y>d29)vB_^g5zNSzy0NTx-#;4CuQJ~*`JECEf3Cz6I+9PY&sc--An-R7k
zCU|wBRF#uA?dR_-*$sS5&`MYa<Q+1vspnDg-0az7j2%yd*!`V&ARdzgQRAn4pGjz?
zCm6}mP`0ByV6b+Q!8^Nn3iSww52hHn`+U;j+?$I={725m&dAxn=gSS(rGgs>nQnhl
znS|gV(NiJy$O*PU{g9|9kiN$N>vP^B%PIxb`lA}Mj(%#;yncc~6!xS58NUD*FN~@V
z<GP0m#^Z+?N`%)ZE2M&ii$sAXOP=yx$>O;YdYmlwFT9p;oH`0xq!;V#q6E3U4sP_O
zDDyp<ZWNLjQUwekjQ(|I%5X+3%#TbQCCK}7Ln+T}2>F?`Mi_7+pBAFAQ>le3rr^E7
zsSZ;s(EW<CNcdA;My{;j;yc+_ngFaT`S9-&zi|^jsPf^@g=wxE^O4x~GPwy!GQ0}+
zTt{gmTE)uhe|<*WhAmA92tVzUI&<_xL)XS(7guAjO|&Ws(y{!isL!GH;Y||9%Z$Pb
zZTznW`fLZn6__s~ZWD%z%HLuvQXeM~$PX8^|E9h4xD4<@@4#4zIFVg@c~cbqV~yV?
zL3rrNR+OdnCci_rQub2~s$HU8mffejJYN_th~o#R>%SOLI1vdWe@Zg@V3xd+=seJ3
z*;ya_i<p+6EUa<Bf4$F*)|96PtChqT&6mIzcRu+p!m#IX<M<pIstCdYQv;%E`+<XN
zeT3`7SYv3`A842odYHLbou2BGyulX1dWhgJ!ugT@TP}h13yC4|cr5-v`~cg)4Gs<t
z22KqQF^+s>2F?!7Fewk`B}WjejtV&oK=+C2%S9D&jd#paEM07AqpUWr9#6%xKp#!j
zyJbUO!b?<MH|f}LLP!nS4LF3Kuj&RBD{2cESUu;~=FsNQ6EE~qa8Su9a4I^g{H9}G
zV_F`h2J42z;JhAu^-SZLx^0)*=P|O3EK=IRh)t~x9h~Qw>=f+5`lLTo4%Bya=d~-#
zDas#&xF*tvzLd_Wy;XKFYqESDS5f?~>RtD{(gUiW^x|<Y^7b==`=5Rf@8A3e%zDil
zy$fpgeuw|Ab@t`Y=Gm^Fu!F&a!~@@-;J182_b&v5THXpdO8!)IeDn6@Tb}9VVt*Y^
zmH00^HG@An=EQ5RRA}Z}z9-F^7X{1SbzH|J)x0W5GO{n5ME}fW)=|^y(s3fZ(%2Cm
zpf9B*Rk$kL<rz>*S%F8+oTfAR+XC;<@ep%={PO0|cR!gbn|gq<O8Bji-#Zl{8|p9A
zE`m-Xw8GPZxz<gyUsr#B<nPUP;Nn}gTy~IrXZ7xheZySI!ggtS$MdD8o~G5ySgM6@
z<6Og4V@FA1b9oN=mfcJDU(lU4Y?$i4Hnmw|n~0m>aQmA0m{?YsRx293Hn^!)o#Q%`
zJPbHAr|HoAI`Qt4-6xU81iNRGivpFNbG{~QOBze>mWF*uJ1shY`c(Uz9&sPFUuRtV
z(vA=y6L8YLp<NY!BMzav5tVi;5}$B?b2xi)Ik7Nkyt%wC`P*tc^{8QdM>=f?&%Vaq
z+-I=uwynlZ>iBkHVu5@9xNV8yWbTCO#QTKT{?p{{R89WMp5=P)`LEL-Tcc+h*UNhg
zTcxLi=M|8E6X)L(7t-5OJp%TnjY)N@&|kMIL9cETZaZK+7tR+7F!WopJ8ziYz4blk
zJrJD|tpt4*6Ax1yod??qw+}56%^AC6YR=Mgq4{XTB5U2YmyW3<<!ee@%3dLPVe0oe
zv9r8&F_S&X97jn*p;j@xi2DAPC@Fjq{3-0mXDBqp^g2}MPp5f29Bk}KU%a!_H=7-P
z3;eD5BiZ_!l@0%Ky;{9$eRF-Wdx(3fyVQB(X4W7;{ksMcYZJ@n6KfK}XGz~KfUUvi
zp{aVhI@4XSFBcbk0ONvoM$g`yNHttBTrC#s)UDLRRIo>%=<B?Xq&{Oc>6SITzL+;J
zrarYMij3T>XF)J^rF7Nx0AKt|oHc1k{jkT-6yyPxk!Q{U+PABhKN9K^`l+^Z+;b9*
zp;g;edQaon6=pwHfBFnu;cg+Nhz;~u_GsC|Q<rCnE=lp~ZhDp#h4*sa)~<oUrprIy
zKf@nLKFA~fu{M==4BY+8>bzqR6&)EJGpiuWEvt$5C!ph<!>Pbr!^3#t&hzmlL&_R<
z(;r{~H`@u0&5DHA;F=aQvyR*T+_o3hk>Zh@xOX8~_l4am^(wkvq1AVbT{1<Xr0`$O
zla?!!-fQvxgNEu(>SJo%HG4JSj$Ri9Sk!r9IZdaqnCoIARi|+iY?Zv0`$Z{7DSBzJ
z$)1z*;?OSx&~g8D<Wg<ASmyDH+%nVB4?X%fs|_=6_jW%6IMK*_IfFT9d>u~Tv^si;
zxZYGttp-wG9~^Nn+}Q%%*1{jc?mks$RE!Z=5N^}?>Cl<1_^0^SU8n38rYfuv#1q`i
zD$aS$cFfMrIm|m(L?7B-g)65fElVEwHkI~vScWop)o)XZvPxVZlx+h~=m^DDLq7Ny
zZP;%CeLt7vbGdxTB^SXL!OabP^W;s37CP{qtGBBZEPjGFTIBUv0^H)FiH(AT)N
zhclVj6uT5PS@!wV5%3ERmA{Xapd+R;qfp>6?<RHImuIWIShh_bFRN`c6Z6jSZ-(Vw
zG=DjKHLnFfxI7%?*{k_N!z1#>rDN6UIDaMhI&;4IqV>TA(9z%Rd3>_mxpHH5BGm@;
z0`DQ8_KrvA5Uxp&1qxmJ?T~y++bkQ?8qar-sl1)IZyloF%NOhl>6!;BONA~E9|t~k
zUf$F`MDmXCj$vG4yaOzPC8a3&{4Rci3?FI-Yloxa;DwN6GuLmc_xI5^4D-=j(RXWV
zt4Uy!NAs<R4Z<XbMdd!_{NQi5qQY<PRk!pe?05GjdEEgRL*&7O!Q78_@an6;ksG!x
z<E<^LqzFMVoPreevM(rSCwu~gY>bV4nn*DhUv~FZ!@j_gS+Vcg@^jnS2C3eMJ0lHL
zp)q3p$cPb^6B?v>AIs0aNYg_=0J-A*_(lAe*3MI=8J67{C^V1`$*meGC@mp5`Iw&J
z(IAoZj_7?$Ubz@O64kU~O!A`Q?B`%m(3wb1?)jtF5ZIAVCyW`|31^$rCCLswX5&Hx
zxK_4Cs&<;1NF0bX77_~b6C_kb3K{W1igrLk`zwuv#EQ5gA)%&5Az>n}PZ0mw1t|Yj
zipE)h`d?`j;Gc?edJ3wlh^wBphpnxP=UZ1VjTQa?L{Kg5pl{@5q^Tip?dr^9Y2#{T
z%j4(l_D2Ls(oY<bbhh=fWb|`(a`6=RlY07B4RJ*JPd4vU#=oj~IZ8b>($r>DaP_cd
ze8D5c!}nAghmny{(!<71Tvzeszr+#Wq@KR@@^TaB<@NRT<?$8darLn0<rfnZ<K+|J
z6%gP?)Zq5?ck#0H<96|6{#(g^=~1-xwDxdt^Kx)?Vf>@l(#qA_OX}&<KMnowzrXFY
z^>g@lOD>-O>K3Adynjk~`FZ$w|4Gct!S4Se_NU};vA_EDcXN_|l!?FgwDnMMb#}IO
z@sj?xb0z<3>Aw~J+s?lQwH^Fyos1M65R#sVK1uWQ35)zo?0**hqp0z}MTJH9|5Nfm
zO8!IgPanj!JRA^)TK;jOG(zV8$op4)N!~vm{tu7;J)D2#BF0l1N0Rq{&kboDN?eX4
zBqUiRRYf^{Kjfp&m_c-dS1rn^fygu>PS#FN)~LeVm+$oOBXYlEKa+i{`koXjuYwE@
z`!0{o5|RHs@N~tCj}BFshjKaawB_<x5YkRveZAV5T4Q4zGNolGA>q0E?OSJKr=a~*
z-g9bxD@EgI0y@*fx$2cTGEP%f)mTn$Ztuat!J4Y7Z;x?tarZx_W7wo7_nv;hd^!PH
z_~?hr9L^^bZwTvKyUo4@p2$drUfws~peY9m6V2Tzp8^tPIFe^5l^yO&&)FViw_GP=
zoN8>9ZzCg*yDDH=yTCbK>c#uO-Vf+5yWetL78-18G(QQ~ygisLx7B}Agj&XQ_}jtn
z@PqPB7H7Z#%94;kB>1c~%QP3R`<DJn2Jrzy<O~W=tA!w5kNfbMhq61~iN+!xzsvg{
z7s^w><}KJvkrM8Fi7JVR%d)7UP{=)$gBnL1g)l-W{jk^4<LE79%+~s~C4WY;JAAA>
zV@f~d8Z&F-t+K)^=uGR+N3X{PIvp4#7{F0xAU%Un$#ARw#8{@G`207la<6t(Cg9P|
z-^r#sWa;ARyv`{@SLh}D$DPPNmE{(J1oc#|Fub6<>Yn`;es?<ltwh)<JyHxJ*eKxW
zHeF8}0U?D@MiS2YQPO7sT&5kf!($PpB=8-(V2iZG*+&l2giESosFb}!RMv^iO6YY^
zy|f%deMs?Maz1!Y<|rrCtEpQdSIQd8L`DsVqzsWQGY76B+=mtK)k=x{F=RNO5X9T?
zQA$E({;teD@$sP%$yTX5{)oyD-@Llm$i7Y}zO{H%NP6?)yf1CQ6{cY~V(%~zX#-M)
z$V>MPsf)(0jj3!l3PGa&U)KNg#Q*O(0rZl7ORD53cfV)EIV*6h(P-B(eY56U3J5we
zNTLhpGMw&A=3MaL+-q4|Z6D#czgz+9TAmq&x#vKKvPE3;``w2&2jeZhxlLNUoR?az
zz+W^!+1W%2NLpVew`uPt?C7loHUfNs--728&Ci<c50~r&Z;HKreI#1fX1w1nilq(P
z3?y5Mu`=ZItwM}C=!(A>Rz9$@Nk;A1Y*z$rfX4*-PLyvGt_US65X(vB<b_DkiILG1
z5W-8e5jc4ABB|q%!^H7*pX#Yh*CR7pUm#1!sm<zZ7M7HJnE?2AZVoBKqKi6!QEPp-
zjdEhB5c~(!o;v`ZL?<CENIfcv=I57iJC`B#kZ$6;oc*>-h0~B!tmFMyCYL)d1&@bk
z;FY@)Cf!k-@JSZb;{02$Ogf3*b%a~)k1At9hM}3gR2A0QdTV9iuGWW8%Py{f?GL;w
z)_Nx%0CGa#36o~`34@=m#w?EeMd&UU(_trUhrspQ)`jJUbV4puf6p`<*1?ZrCLnB9
zx;StX_ev6cNEb$*#G;Y5X)#F-?0pRlWS@N$NESGx&Omguh~iVeOt%(`lpfc~jF!xm
zgmtfq>E7gLvzwC@K?!dC1N}<Bn99kAo0HDd4%o$B=V#y51tkd&|BvaBo89(P`5CL>
z2E0@0XQ%CZ`4{(rC++s~@09clZ)F(h-27g=n`M8bKb<K{UV2-cSn(5HB5b^j$q=+I
z4ToAM_rH=j-acvDBJe%y#q11uxP2|eiuIAOf&gw{z{q+S$h!To_6XlwY;`?7wDv!2
z4B`}&thm4JgfpFxlf(Of-P#OO42PZ@IuBY8uFk(hHTz!L&7oWBoX_gHJd~4|rM)d6
zUx|Vft`?2_Q2I3IY#RTZWF<YO3|f+Ax)4C8QUPh$W8K8Ca}%^4!kY*Ad!Dj~i+0xQ
zm|B?CyKTse1mAkK!fy`)Pl4tZXH(GF-1eYcL;ZxeVnfX)_79MQ>fExX1X2p_;@iY*
zk)eyc#Vy|&>qZ`PT~Y0_>Xe@!-M&2J4X|^`d_(<II=Hp9#{rhtVY&bL*yh_8sCWbL
z0drj^#99xvmqr?~LeS%IRzep}!OHah17KV~0JmAkEH|Kg*}1M4<D_Zk)x~Lr=;Vvu
z$9_T?uO*kx>Q9pX4T0-nm>|;{Dz)Hk*Y=&vlV8@!SG&mrCR7nyJIWFPw&^tOP@HqP
zal1!p=l+*Zpa!e1tGNr&Lav42n-xyY^|XVWFF}ih@FnHUPbSXI8Ll#`J^ZN6ErUxy
z?rt0a{_UgpyxHFMlB-PXDC?*1EYVkNSM>t1jcy<i)Iaw+jV-ncZ@OY6aY#?~{5AC5
z_Yhvu+cJnDJ|8nz#X84*ut@CI6B)zD+BU5#_yio@Z2!t{_RogRx-8^{M$dI$(C~Zw
zW#{Se04Z5z4@bOT+?Z*a_uRSUMUkB0ez!u9`6s7e3OMceMz?ab^is#H%}H-1ODt`B
z=fC(IFJ{a$`(abc4Ex%F54br5;^L;iy3BGGHGFkZ8q_g9tUByA6<_X>xO?HNy;nRw
zgW^AD#|hTGGd%x@LB#`Sx!q&vs9a~9Saeq2)Tqf&Sk=xSqydf*-U82V%(#zurLBG<
zB9D_B4?aZ90kfVmlOW%O1@<6g`c-1G?g2yL?VXSCyOp~isUyANnDe)=WGi?yLx`Kq
z-NF>;2`~Ur1GZXWHJK;o@~h7BB$C4XEHK_$M>bGit>e+(K-(}d`Bz|M@eU!7jodY8
zNI`7KewgogzYOTJ)y=RM0>1!uP6#i38xMiQ_EtD+L7QxsG_IifWWjb??ojrr%|t@_
zkac;99v~sr_XT_4adM_i`NMYg<W=-MDWgG_pHwVRz&7qN#mbf;*h?rgh9~-S^o~JU
z5NBF(WZGES2KlL644ka!!Qjgn1$>U83g}L-@MANW<s1}r1;288pSW`4!!mio*-ySp
zq!lkB4WXPIvR973L4T?Yt$eJGyaA{R5w(rD;J64gglxLc7kagGzME?T=~V^zElg~#
z&HccYQ1iano%D6tGZRo{<s>0SH-d*<p#fM5M78_`1ycG+=SKX*-areVw|1gCXDR{2
za@!kJbaV`;@peJeMM3Weu1p0&YnHkmh)4`~R2Eo8X5jC~r~4q!NgCSf;6n^CBUJi-
zV$y`$wTeA0w_!?QpW2TlcUn^nYlI(S^S!AKd42LvvoG(1kDKRB*SdpOqG()xW2UTh
zcwdY^N$DCSH)%6*ZU4#rrfKBr_m6%ssu96C@bP}wgrY8J<9wLG_ebXXN%!5!1?(#-
z=p`WNJhGqK>a^>|r_pZ0gCY3xqv40Mkb&G{-_zCAqOIfBHJ@+{e8{+#U-VPLO4LkM
zi{oAo;)4i*NNj13`f2EPnidEh=_+3;u%+|%pt8%V@O7m@H;$GP`A0zTRmPInjAroN
zYT&69?<c7H4@lB!_uV9~CezB<_Q=J4e&C6_#8F-Nl5^{dz^Tl^cnN(a>LT`<;FRRn
zVcayjzlAEy=#^;*Y^OVzP#RhgxL=wjiP05s<-r9m3~s`w#oywrY3my|^Ih={UQ{5I
z+J?G+hW5yHsiJ}_YdSa_O?uH`ro3+NX1*lW&LCEqBl_S=9o}T0%hR>BEQNOHJhh-*
z!mwlKam$5TB{m5w6pp;h?mD6_(VKKFI2YS`F-QUMVlR5jc9kdWoZER=(-~xb5Q&~_
zrgg|ht8g!9nJ6QvxZbO^RU3C+d-bu7a0iD?>QO!<&E6>O%T4VnxSQcOeCDEz9tfry
z#|e~$MNj)$o9WEIg++te#$CVtu1IfGObY}OJx4#Y)}cOsSxf>nIA0HM4SJ$REB(AK
zvksHt8+i)_{vNd6;>^y0R@07>zVVD^zG3E;nBjwg)YtK`L!zA5P|@i#Yks>i5p(lw
zAS6+v`?JYHUbKH-U8yl`xZqcLgPm)dP^Aw0o5xLe4u+$#(X$=OC{_He2=FaKaM7mu
z^aF)-=y6DSPjt3N+^{=<D)PpZlC_I}Ep$#epL0FD=siQ4*K{4JV!2B;-wzhL5Y}?*
zp@eG^%S`tI6~CkU;pLt%)UGG_l7P^MW*|}j)YO=ushA65Wj?u@2l#3v<RGmDT}*_?
zhxB2y`6*$NcvTVS$TC|)&o)B#2N1K8L9;4af)p9#2`j9#&~@Fh=!%hlAG8*7S8d(U
zJ<ikFra&Mzz$t!K7N#5Jb;z*2v!ZdA{p}OiG}G9N)iP#)Az}u1XSKev34=k#dGAV@
zjgsqFNX3lGX#;*2x|}Sxn=ib#vh8_~yxIUktSsKcbiR46wHfLUOr&xaYUb$-?8vN|
zx!Y`aBVVe=vkaTaY{Q!~I*LVmt+ukQlg>j_dSV?_{Y?!gJAIB1d{(>-!FYR!eY~}`
zuF!=@au<tM3H=yJM7qwx?8EfI{l(P981rJRRcwbYFR3V(=ZgC<T~`CTKlpx!_u<9M
zjUSbUypPQEMaNMNlr2K>TjFv<G&|c|X+WW^9ZFrLe8?|8sc$4v8{5m9`rT(`TKbpI
zt`Ntl0Qj$qANg5iH<<i8q{+<5A9wZ;qe!?lm$~Ok;wYqC{CXx-Vj>XlA~U);>7WSH
zk3~RZobxT20#*p+O9RbqW9cNzy}G(mlLy2Wmp0ZKbVlR41Ik!(z2^WxO#lU8Bcq2}
zNU-w_DfBnIQj#c1MgfmgDs+sPeYn_X3UIspy-=a66eZ4w6hrj&Ks1U&x}|d8IY6g`
zPX_Xp*=&Y&ClkZyRRXQp^uupu83-uC7zM@HeEYmn^j=OjC@dNisKqXmeZO@^GLA(`
zy5!WDsC7D9o+hyw!yH7Pj$O>`h&1iKPL)Y4?P(DeJt{`>4$D4A<99WW4-4?QjrMyZ
z$p(eZdqhO%8IC~+86%l?%S(Kr)TG$~tm<z>Y+QjmX!~>I_xj(3ZMK-vaM4KdERBtl
z*HuVE8Hl{wcXJ&DoVi>bFCQY3DRh30n9v)BOr*qGp+}-R_Yq?68@az1+j*5I`eS@(
zI;`|5U6I+8TOAH?bSvf=RwnN(%_jBtNrn*0Q*qz@BiOlbuLl*RsASV#jW?Z>-Lfr;
z{WD`Fr=`^6<$Y^6!)hye3%u_~419}itQWcjRc++ECnvb0J>L<See6+&I^Gk54Uf99
zPs#dHPQV?3aKG_5kL>YBPo}s4=k2lG$;(Nhac+tt9K~}o(#T>NAB=O54hlsqjRYu#
ze5ffSO8_9$hiMrJf?zTOzOKKZPh#!g9GU6-$$E14OpkU_=v-&xFh(hoIE1t4md6A%
zaFHtVjnG7w_S_)<{Fi?=ux>EgzoB`}#$8Ak_PQ4PS43NsC~hb{$`0wKe1hc{c-H+r
za&g_`FMr7-zy4{zBgt(tc;X{$xTM{%);vTEGlcY&G|Z-!eSL8WNqCjbh+LKyaQ`Vi
zo!4Ioa~iayV%;~%;$5654Dm=&0zFiLH~i9sNU_<~vDW;y61~Rpy!WRE_n1HMNp9cl
zeeSd&rDOe6Ol%3&i7L>j6M_itw|G?&?hPr^pJ886${gFYD`b5Q8iDZd+sC3`MaV~D
z#xdqOev(XLB{at^*6z_o*$EreBgCbFH{t4C)t#`WJ)^dxlA|5htcxiK+H4n|do`^o
zEYQ!V##JR>C3RMGRPfnUf-mB2S%#*Bj<_5qy<b7#W-QyX%p*)oW>+pO%!+veis?Tm
zY-9YZ2K;krbE96gFXd<Xebr{DWb)@|7`BO|7<U!q3SQq$bdB3*`B;D9XR1q|MgjSA
z{Avs#zNV52rM|fsHZgGPO}r1c=#gzb+u3`lVGx~3FjxLDg(0r<D>*vu>BXp0jaS(U
zr&P=)A=$Qo?E22+4|<`2XvIBTe=dBuUq1I6*VAq@FW}l0s(o&^OAD<{M!4p|2>F+@
zMOoAw%ZI9jGfFKj4w-ZXaa7V`K5*nQ<*pzc%D??0WV45jWozpzBocgue@HRs+h;B5
z<lkqVy#|fddz^X5n$@D-x`-CuU{5V22Bp1BraumTQiq1RX>L)3!yuE1L1~;v7rXah
zy%%IWJdA*fRaqye`?fy0cJKRV<1MA8$=q)chB!oIkc|x;Aj@M6YzG#rFrny3dn=Of
zvy&W|eJjZ=Wz@LzPv$eWH%2GNunc}HXg~+`xA>6UjAXM@A5`>bGR2~f>t&}e1!g@X
z8c`aRHI0cLt!wBL+LRArg`_7#>@Ft8TUnGp+ISZvMfA6Le6Ff4lpI4h3<A_~Z5e`Z
zGYkbD4grs2O*Hv+L6aYQlb=;K3F<ezuWX(fYbf4%;lHk)!w#r(u`DN=|IiU(v$KzY
zzg$58Bj7YtYfI*kMv^yy%Sri=H#6T0&p#0;`UeQazKg#iNxHhaFE6R8d(|!=pq}?)
zlV;xPmK1y#ZD?eZj;!B4QXPXK(-?5+QTkEL&OzuqwnXD-{iI7$GbQcW2e!|^EK+>F
z5p`-*RzL1UX4zluQ$!C--Nz><hv>i#or@uunT)nk)e}SW<gWnkJdt_lmLR=WvuF=x
zi}YUE-UBKbW#Jv8j?ZivT5nWlCGw{xHSG{56fsD`yp3lKq|!XJe>kZR@Gx~Ea;xaa
zU@RH}9-P{O9hu&&E&4wGu!o*^_Xt=4gRjrKpSU%}6osiiBhk30<Z~EQWdT`A_svCf
z;Z2FG1*I7a6YchHerU+Pe;POtL%dpxqK41fqA`8W-Am!mg_mu!g_O1;?+51(8=gm1
z*Ob0H5(KA*@m}rvL8@C<A%J5Y#dVA6Al*8p1Bp}679zBC0e`}@ErwbwpkBq_gxcr|
zOH&(l^aNiXRMy@K=$w2v$Sfp^_J{~*G{I6%quP`UnS@39{qm1ta&ICW&NY%Nh(N3n
zB0+bf@nopul16=~nOu9a#0n;b+R{7n7Q@Nsm}=Se*24j~*p(#5Xe2d(Z=cDDFXiY`
zEXF>K<~F5Ti#To)IQCbNCZ#qs_b(mBmOQua^&V=)*nb(Z1vPo&H+!dbDswn>iF=#)
z+2q$PaB%o9&po&v2R^zVGyRzW&MG%x0EQJI-(i96@nUDg!?kakx!-EFh_o(NlgrL$
zhuh2F>Llc3>>LJ)lBI_)ml7~|jnaC5#9#=%-Q<ks#<Wbqv=knj=QM9SqZGYPY0l~z
z-E1>Ydv&s&G~B^fwszQUEZ+AbIWv4+gT!hd^N*(W{qIf7reyv^%DDD{7Bs+88ZZUX
zy*dp;CRLPX2yJKv;Ji<>1MY(z5jxm2$5-D?A|p~YivW5}0N*dl8c>!_KA+KVz{0gq
zSnoueCHL|??|JBt?%&U-vkq%O7k)5(v#0!|vAzB))lB}((2_(WuzMxs?iikF>ye;@
z7D84>&-mH**|>I{xkc)@w}MXnFnu7@Ytnsvbb$65GKn{9uf`W9T3KCR))lVTveG*W
zN7Hh~qCaiK6u}`6peIotOqD>HSa@N9g;nwg)Ka7Y#n06m6Se)8(R90O7e_w5XnCbv
zWqBTPwG{pzsm5;PKZIH+2Z)k0=J*Af@zoCl(Dnk4o3qw7!udqM&#?i6rthK(s-f$A
z{ss{li4E2xDNN=Iih<hV?=}4Z1@C)5vXawMnEMq{gh_@#VHQX75<0TqC-t<XesV@?
zvm1X-U}z^&9<!ywlIZ>(eHQ(JfxwSxT-}U&k`3)~dZ^zXWGU)Y7K;z#TG>n(PIB1l
z7wE4W-Fs^zAoiwXkSdvO=C3-xwkv}+7*6#rNe&t2eE+T}O%bKpeG9w=fV6&SRfU!u
z+q+Y3w2vC4-2rsA>o^3rkZB!B@wxK_mogLqG;vQ)H+dFKR`jOKYq;L9ez;O{D4DIJ
z;r>Q36lrM=0uA{07MHxIjNyN~KCCXm!zOt{RTRqVIn+?@^6NwEa#nm)dwu*%=yu|p
z>l<~Mx^CHWOVCI?UxE+)7@O35J1^s{y@y8B=y;Y3EI|g~e|{MRxhSo?A0r$T?S@TW
zY<%K1gW`rBGG8B!T!CV5l*b((Mr?=}vaH=zL6&bM0X3cwIaPrz*g7*$tcXVZ-njQG
zX?9gRNroVAGwCr*E@Wp_Ok4SdgW;?|AE@Xl=t96deqLf;tij*c>b1q`SRI$TeS3uF
zB{$s;nm-IeNF{7!W8qLg^Gd?s7G*3_owsXND?cY8`s$pjIFywH7jMyp9P`|Z^(@^n
zgp!LP$RLCrcxk$~-Tw~Fs+H8wxF~!&#_UDF)xpk8%rHOO{LmQ7WFOyxnDPOZOx)AS
z#470$62je-QGTkDaGGNcHMU7VDFwI5RI)<_M07JS)$i9=sL}WG{C3d4|H7}^bPu+@
zr`{S+ejn$Uh3jheI5rg?^uP(|xCIUY=9Jq`H*XBPJ^$qta5DG*bi%;1^C*Y&p!Ug8
z%x9wsd*QX;cPNyW-oFOzurod7AR#xWHz8*DQih|9!p^d-WOtt@$_@z1&gY;V)rY-y
z$FINWslCnOmDbQABZnV^>M&s4;h1rUyrND2D=*3B0A3sC81or3<R>`j+%lk|?(F6v
z`S^K#0Q5XL^yni&y@?sL@hD<N$PGT?LE?k55#C2-pSLlUc0e{K@w)B(JOeHT7n{+$
z57QV*xCH!Dh!t8*tyt}f!vZFZ)OfB_>hZx{uhcK2cF%Pdvrn4%vw;{JLEJ^gu)#p|
zetF+Vq^?Jfv-mBCJs&*&X3->a*I_;$dr>t~I2kmIjC9k8UJowwcZXuh;IGdS_As9%
zck8>+ReI4nVkxul?;MeQ2(8Qgux5ThHac>yWLs2)_q5N=uLcq+^C0(7d%qgH#%5vt
z7l<JhI(~}L70S_Fk<Og@@DLw~Mvf~QW3!K@uIAL590i3B0x{KaxJ1rwdgg%aJeT(~
z7g|#?QjfB-Q;(NZp=WW)oW~$0=|^WC*)E0Y3$VM)V|-CF@JBPRFH4^Q{$ZXMO6SIw
z@*(3~;W0>3t0`ze0+hgNsB`)}ezfYFiDg{)LInaFItbkvD~AM@Z^G*Wxd(<fIo9+#
z5RtpFEmhgy)R+PFjDCoLfh6Ds{5w>AODj>P|NM2(s`cJA-VbPH1-kT@d?Hu~zG?pa
z@*cc>Iim$0!EKRn`^gXDZ!C>AMl2K3+!b}7mwq$>Di<5#s_!e^P4lHhxm+cs_CZf-
z5hi#w#Z&>lK4v%qydPoq4yD~2S_Tir;NLL4g$m9jt?Bj^duh$7wQruoZhr_thWxGo
z_yKIZ5vjpm0cH#f{lnM&geq($n@&YTFN$znnQ1~_P>FezFr>3d@!{ijI^f|=GQUu~
zOy}M^+au;18jjB%s5q|_P%I|{kp=BcSHMNgb&r@3qXfPMz`As=q%VBqPGs`+GV;{}
z;ORG%QlZD6tsWFk!Eu58w=s11oR9j(-ZJ|Sm4ASZ9p9K$@cB(usP5bQ!t>}M0ClKA
z?&0t-=})RqvWaYd<~#O5mP2;5i1$Q9DDtvcSjbF_|NasdAYagyKc>U~f0X>Ino8vR
zbh2`bCu#pAjm)GCz)~^R0jN~c{-@S|H^E4TqH9p{K~>|Q^~(k`G1@8xGgW*1U(C}-
zsQ-w=%8&hTmH*j=t3TFVMK+BQqx@q7x_{JjWDl^Q{iFU_3WRmD6w@5a|HCc+YVH5K
z^FrmdAM}&{_Dxj|R?zi=y`$aWYsWv{kPRdH^{%oSheg%u@75W)|2RWQE$EM}Va=3b
zUDu0_+%8?YJZKR;iwQlIwcQ=UA^>NKKb@%(Mk}d`kqaGtmNj5ikq_VD4K1f`j~e7M
zY55im-H3KsZga0GDH%BKJngPgh$e6};?y_$he3FofN)`bZp}ODQWc|Sc@7nF?l;>f
zpo>=`p1T@=ftt<}4kab5>vvcPNO`Y&mb-OXKrg_-p`7N$yL6KVn^?!irt-tZ=1NBd
z#K~&*FUKHelEtyod-uNIKZ;idVo6q}OVvG5ejr$m`L00U`Bqnep($6Ew^;8R^(Q|=
z=TdRalqa_PZ}P-EL6-=0z1?mi_ou3=cTrVctREBGFYoDFTKW0yj=i^;S^)@Xk>feX
z^|I%b201BJ^_%9gyxMBy!~VIDbTs?V-kGiP^=%!}0@<>1VMSKAm-{Z$C8~%~eDOP#
z_g|xIfiH)oGzjv)SVpi5r*B%EavJC2DR_Ml$XBAF%jMxbFI(-Cc>=WA(7B{TQ^V>!
z3E!GOj0XikUw>QN`#_&f>1S+1(W`eI)Dc5$RjRMQR_ihY;6mOi6^1pU<_6jb3TtJ(
zH#|5y)Ot9{ySD2(SK?|`PtYAFotZE2+UW*C)pwftER?P?T3UpCDEjl{gF%&%j;n2D
z!7`Q`(5dOiOE`rG0gdptX6L0j=Y~;MORr)A@c9r;QkZM2%L+&HML40f8=ZuYSMcp+
zIVnTF#ftasepx3jFX+270?f124ac@#@IruRZHS}8jG0?6I!{x`JH8B!uOn5)jgl>)
z(0+zx;U%ZE9v$M>F^4@_W*FTF__%^7k4VjA&UY<%k9+)yA~fWxnBjr_xvIOoMYX&I
z;*p6E(}qd$Ls3@Tb}V{QYD9>U6l%A(+1x}WXxFyUAAKhPb+ht1Y2WJrz#&;Kh|__O
z<*(`aChK2Oe0i004ls{VqZJ45g-|0qI08(RU~Ni>^;vcwZeA?H#En>cIPpC>YMiQ>
zaYrLJxLNT91|rbH<*87(r!N9|h6Q?EOo;G~bd6<yRf;EffkAd$;P<c`9u3MvoP3g@
zG}S6w*|E4cC~aSC0SMNqQT4RRVP-i^OM0I`Y@6t`b2AK+-mTiCmB%P8ih%CGc83OJ
zP}cKY?~5pXjl);<EFPDKt9h;d+&D~31S;#aqbFX<!8kNvgG$o%hYfMB)(E7sp4opV
z`(BRv1}9h&WM2b<s6Fb!!H;*~pW2l_^;l3_WFGdlPR{xXYxTqa1XP>~p?6Db264&2
zGsz;LLhEWkrAeJd*xiI{NSdX3+&GH?v$mdCEeH~KHxGvIka5sB)ych3)3B>{kdk+B
zu;t5^UC4>*J4}g^nG6@$LmbVn!)N^7+)HX2v-kGU6={7ZH^FGVn3U*TWapgTq^ND3
zjFeSScpr;>qm;(FA9ZoIYr^xS<sAI@P}_O48n_|@KPRMkv(f!<z4RAlS7j;st&=sf
zUpQjJ)LCUbOMU3zRb?go0<l#}NVi-gn2DPy1d_0DT-Q%<lIvJ=9(;vhV||P~F6ynE
ziw;0(jT@<i5<{gy7M4y!lI&cKEx~hDAgUXP+|MDGSu)$f7-D9Syw^rwBplVCkc)d-
zYNdgxC3ApHQ}M8Gvru~7W<AdCb2QrJP8Q(y>}D<WG=uWIw)jOgiAel&&J-Q?$jIjm
zkmp=D)nC6pCukmxj(U<Pl*#9S`Ly}DmVae55&*vrtL>!Yup&!mnF`JA;BJ6K%VrXm
zo22TgqUvrLu;`|_W5Lvbh;nT)b{)pYhYV)|A|TTvMy_2~9F{b@$<bT$O!?*`7frV3
zN-+#xgJg!HHT~AfQU)m%CB3zb=&CV9U#-!McygzTo>rlN93EbuxgWyzfHz-OG^7=5
zmDu$*UQmQ}9LPMvZ@?>BArKPRL@e<?q>98w1j)T8E$+_w*07=fd~G}Xmr60~+s=~?
zPC8G5nxJ37kWn@l#MJiwGi$oi<<nysNlAi4Beh>rE<gnJ%#El)pwk&?R}J&&dPaJm
z?6znbPWA-Ofo*9X-lg^oz*lQsvB*?2pa(SfYCW9Lj+D&cI-B1bRP+J+95G5RKVC==
zk&&HWP~)0s5J#eq#W>AUl9DyyPdqaq`{dG5F4AnO_id`tENh5-ia`8k*)utCBa-6D
zry&Y|bMhc#i-tSIOb(Q$|NV!c-Lg&c4>?`!TEQ1tUaxcN(@6HLBBo@QH<~?5wHHI@
z_3zfEl!8OIl6b%4=e*@T9xgwn4?Nlc#{j&3D8X%q1!ILU1RhTxA7o^x2FnQCC9_gD
z>uPQXc|$fZ2xx!*dX>S8^>Rw;CNKGL4Gfk*aPtBHSlYJD)(c}=m_V<0|GS<8Ran_)
zpIoqkB<~+*+jsLAnY8Dh=}o1i9_XxGvy5Vm*SW=TySE=WgYZ1Q`c5|sv^yj(j5Rj2
z?M4}Dj#u*d%t~Q{g@T!!I|td&4oJcd*g2}q9WLrjBVsW#J%c{JP!3>a+aEzg*YlOu
zrVBdjJ-I!soswjze=DC!Y<-Bh(WYL}Wya2%2RiTOA?<G8or1xwD7(RN3V!+H*S&G#
z`R+qh@b<xND1yDXf^jpjo?+6uy$>x*O-Wp{SA!}C`Y~Kol?8n}ywCw10UizvyYVG!
z6X39^&;WIz$wUM?e(D=d=aU^8ZRY*+_3NR7%+{3QreG#6K>&RoPlHzn0*cFVLdC#y
zYh4TJ1c;A&hwkRs-~Wnq9FGP%Jq*>w0*0IBpUq#j@FUh|1VJ~GQ+%gUTKqmEnJfVG
zX#7TZCG`Y($m>$6im<@cx#z>i4fwGOoZJ(WY_+Y`H;HR<m3((tUX~A0mmL`n6y_4#
zo_HNHVsBX^p&r<OxYlT|1ujV_?TJWC_2aH)RW>##k*((8=KSq<C5K0(qtAz|^0Wzn
z0BS8>J(Jb#_A?b8j4(EyZH`n5=HN)ZRiY*QEMksE8DqIQ)Ds&<OX$v=c}Dcu2z>Yk
zHr3`>FUS~uLBGJJk-ZKMSVM4<Ar3zf)ZP&r81${)$4cJ&yBucwiqFyno%gJc`NsM0
z;^Yoj@WUMMM_J2Z2si5kbP_9g%7~77jM!Ncj;B3z7D+3^6?(tLutIL)w1L^UJ4g@W
zV{Ym_|HH=e<87VU5T0-iTYQg6MyuQGR{nT_fG8IUqk3~{u|aKApds5v9|;;)DB1fk
zAynSVw2Y~XC(p1C+oxn6gB~G6A`#7GxJdcO?xA$&0okB~rsy!87;I5n=Kjn*nP7gC
zu2DkX@LH$&xLDaYW3TB^`6{zEGG;{}8Ol`b=M~{*&bRW5E%91dvur3E7aY#*4i|id
zHs7K?AgObcUMM5D^Es<*jq`l#a_ltD)o@A7oVL1G3}E!fyS2xA&nKK~S%8oQUeyiQ
z*l^A~AasSlo-o!PEiH)k#z+8iWcM6d9o;_^aw2*8U7<^U;}1m|ilFv`gN;SBq)+@;
zi8b=-^(TfHG94@z6@u$T;fS4ft8T<9mhJQ*_<CXZR|ReLq=9fq)hn@`^lzA00@@TN
zN!IrXB$S4_u9-TJ`h#H^xH}mvGC^gDqEFyr$5I(<wf8ct77SlR@X;agemn*T#>^16
z30NVPm1O_j^YXQI1>O+48S|{5c^U?NcCz1Vgi@C&k7qq?Po02fpGx4voucI~3n$2%
zFz7%-!k_sWTRP;t*mO==qz_$MgYP~R4%S=a5kGk#gE6%j*o|aW-a7?<y34N5Oq4k@
z32TlawoH#nVE*+Lc~xFVE}iUL^^<1L)_HTaSK&@+b#gNscc{Xax~{$(NuJ+kEWETg
z+;+u#zS_IF@i~Wvn;9q9fu4?~b3?ZdMR1z;qWey26(=4n${mDw9c392N*}WI8yQpb
z*Y!WALKUcC2Ebr_1F7_ZmV)hr;S!Fg#-FC*6FfsiR{e^7{o+YX&9AFyc9nVtNS(#L
zw6k-b6+RJ{YJ|{iepKaf4yV-LEFm2XCze?I{t^R6)H!_9-J;hJyGnh1@LePY=N#qk
zeI|tj>e?BuxoJ``lg4KAt(y%a(OhgTDSLuZ>U)itUYv09nlfV4U6!<cKdE<%`E{=h
z>}s|~J~He5T(V&?s|*@L9Utd6^dCvm(;V9ppS1_X%p32?7cemrh3sgU0EPUsQ~2!q
z7sKo!DNG8q>T({>Iko8p?c~ZWNyzf+p&(h~dc^JK&uP9I&K{<7CP@B(*byECfw2Ve
zx2Z8V#Zr1yu5_N?_*WtC?PX4lGlh$rdeq{%*K&R<2vfbtS%7j&Nlug7Y{W<N3!I{+
zi?}MzB`mOdS%z>fRmt`>mGF`1dkQ6P?jj)S3Qt?5+-{;;$6U#T#B-hAZH7{OlBKO=
znN?1X-!$v7v|OtG0GmUR-Ltj25+2FB8B>a}5uYw9Yi((AJdLUg*qquyDCn-R&ZT|v
z175LdaJ)PHv2~uk`dX|4Hqr3p_5v}S$YeKEHNEm)C|E+(&TNQG(O4&o%n7R@jkQQv
z)fww*{B^Wx98PxwSsdg!M^#>jt`Xy!%RA1>nk3bSLRQ=Jf_U)2H@rm5Co-Au&))ZG
z@Z5U<(je(&XIOmC?KTr~%)VAxoDB$~II^-1fh1!T)Lm`n({>Em@nBXq16h>N2~#&j
zNZZ1~Fy%o!Vq^2fP!EYaIB%0z_0w|y@VuLO=Fevs&xs5yUak+M60x4)alj6uAefDM
z7>ugCs|ROq#AvBx-6r~5_<mdBmj^!!Cw!(y&7mI%$;hZ(efI2GhGN8V<!JMmp?PUv
zQCe=gA^Xw0MT$-5@9jiJbXX-hqC<xntb{nWq&QsR{k24DnCX>96c!U%ibw1ek40lP
zr0N_t{FX{$k3W<d{6o==2#TtD^ntbO?3B=se8n$4toS)Z-W%}<&?$)SoFEZw-|x=j
z5X3Y!c#2+oYk$fvElkP8eCz_ST*#D}=MQo{(-_DyHPibustUQ1AU!B$Z*q74lPnXL
z_CauE`w7#|frt^dEK{aKt$NMjMD|f_=tCs~*z(!v3tKTA;ltAs_zmJ|5dCtKnie-g
z%AI%NqgZ=F-_UVX+sCF12>d*_C-#^;NrqZM3QT<ia%X(tLanlWUTM9gP+fz89@X5K
zxn`@U<r)>Qmpl&Db>z(2dmC4_OXi<Nig=l2l8hw*Xe6e5xcfomEdj%1<m}J65<lfp
zNb#P@i^()Yyt(jD@Vbp~-p6vF;+Y3zPgxa>c_6hM_OVQ182x;nA;$!0mVUU@t3CVr
zYV~%r)XeB(o#|i<g7aN?x4Hj`!YYre#?WRAm)>cVjKenpc-4KcO>fZ>ugZdIoi39y
z%9NK(kuT~l)s~qpVl9QLLw>_K9=M5<>lY%?8Zg@=HNsEDDmY)SeMK-{u(ED3XO5?z
z?I#Nx!`+z`GSxf6`APBajq45xN~GfZJxlNBn)IudRXc<a*@Z+FMadb8PtLEhRMkYo
zEbsdw&?6-y&}o-?E|h3&1#Xy?S;g$=vRFRt;j3dZWj>J~s24<Gz^hZ~!F(0;6!GZx
zLw|0q_`T6d7@4&5Q9JRNI*l;qz9jF26Ob?N^rxo7tvo0R2H}ex&PBl-W&hd+M{pQP
zt+N{|bl{imx0<OtZx;_!PXihUq8NhHcT|m(8-zwztt~s#Zh>awDZGXzNA&My3~A~l
zH>IsUx#~^3fzRY3k})F-On2<lrn19JYd3zOMpEP8#oSGe_!$el#<Eqq0(uyb(BZ}*
zSgtcetc|K-kz*=I8?y1O<!3OFt;)+|B8${;k3M#N{j+9jd`@2RO@->J^TS_!MC(ck
z&8Q?^BV71xKBBoDzl-8Iq}FHVsN<1uv3PaNv9c)};3T*xSR^a*{sFH{%0wB)b@{Fa
zPLaa(^To0yrms!Ir`l*Isb+)D2Pr~`-ti9bCDr0C<Fcds`(L0l%m6Zf)d3~K8PiGd
zp0?4m6MoBmmY$X@QtmRKBdw+o<g)I&-8dzE;i#rCp5^4vEnTIoxkNI?m!}ajT~Bl+
z+%DR)6%c;$QJH?9e;t?YRTm#wG>&n828|4a%*XpW(l6MH2h9Kz<wXwG_IUpQ9;enl
zms|2ua_zRp@^KEeD=z@(52){^k3>;(>i!bV(BQ|0hky>Wh;%NCm-i0E)Tz(uQWA}*
zM+GDgd|*3Sf%YNSN?E@6nN(1fkt<0dTaPy(c^Co<ee+L9l!vULqrEzBy<_-A#t68$
zdn2FGYb?^B>>T2%dVkBpRFX~$y+vl&(pLQ8+UC>p4ZTvE%UhfD@_0$a6^woO@w#KX
zV>LnW_Y|lfHZpL>LdzSmL;UI>RF#2fW*K#4{3phl1zvw%iaW*)@tjn$Bm~R2OR3s2
zs}dZz%k}7y0elv3xXge3Ub`u08s}N$bB-9nXSAT9G)=ojsCa$V*U9;vCNeU>6*1yj
zF6BLBP9>$Qv+?<8?k-kxvh&<C4-uUHDykQ0975G8!Gl5}bcTc|psIcfVb)|LT|Rc^
zd{vrpLZh0V9)f2~nMju1e4&(D@kg|plBl$-k6exgwCQ9mNolndL4$Tk%^fjxywJ_4
zEq*QyM)Q{JJpo4kwBEX|vxq%%yE9p7jHap=wv05%3S!~KYAw4cgl^LNC298-DJw6H
z^d<^W9a+-FvIFG{X*KdZ&u{T6;S0^{h!;XU7{>NsW0_{@$uq(ui<Zg^RCg3};lgtz
zQtnji>G6=COOQ8R$CSx?c~`%UUKB8H9&&nz0c-0J9ADK`>K93fk(iGL2d@dO?JSza
zb|mz58LyF<7byxpI&!|825D+H2HQ_!^QCuu&p(T@W+o_UvqnT%<(gG#>&tHht7iwM
zdflUMFy_`CfJpl3Wc+;2%-3@wr;+4GzoF&(nhMJkoKhc5xoEMoer5L8LOeS1m@jpI
z)-OdcJ9uEHy#17!MrNOh*K)C+g^WQ_axcdZpI)*pmCQ05lPYC!;iG6?P=hDwsZ)1%
zbhlW-RMOcz#ZRTjLf(euD0hJ^GOTfhbikKi_6Y+7GzlO5_UsQuzPMr4t9qKT3(jBd
zDiCeqRmnW>&x~X-cFL{tdu)vL1Gy|LexNNzM1r{yrB%zyP*&?a>j&Vy`%&LB4py0}
zG(@XSraLr%Y2qEaIFS?Q^aS>T>`K`I>w39|oX#FSc8)j=@ruP|a<z`_Dj&2qZ2qPG
zbS-kSNHA@rwgG9kLFzAU#DbZ$yCgKxTu#A@W)20}BxpyRZH``939K&C!fxODo|-C3
z=p;a1KU?b9GKF#Diy0CBFeok&rOoOi5g{3@h^%#L(df5vOBf#=q}f%+(Ua!2rg39C
zw~3FRN9{gn8Md*YroL6L7og(eE7*&-70oW!?D|M|>KWv|9S|A9Rz*PTXkyOJY0}p5
zi)r|*F~s0gcS^N+oU0_38$*ugcGN6K_pD%;eXL^lDDo6qH@l#m<}HWmh)=y~(k<VQ
z3jA{dTiw_f6jS_*)PPhu+8aT*z_hkE?4xR+-_6jPh{Kp5eg0?m>xriWn}&P!HFt!W
zWb26^hFEXr%xkDfR^2r8RzlKt|J=Q>8r595{c$YAmUFK-xe}^GXXN+?&t@i*H&1-d
zAv5{*oXnabT&DX9L{p?g%t0)Kv89~NX5CM1m14g3{f)9~qlXEN#c|}2>hNOmK?hMo
zRH~wf_~`*+b+kkXq+VMBjk==Y!zlS=M~=|83CCDT3U9Fy%K%$avqIU4M|7G}W>zdt
zysY)9OtVs@_JgbGHVeZzp7geb>a<aeJ;8V=Q@Wd^wYEWaUlCCpj*Yr8GS1V#q3w}4
z1mJI6=#YX9Rqge~vRif>D=CpfDObY|dyOTeZ>wYtmeu&;J~H~`m%I_Xun%aqyYH#?
zS9o+YTleR>_it!cM++N@hzu3S;^Yh1sr~}uUnU@QCPFtW91vf(l@Jgkh$vR>4<n$>
zhV@n?MLg`aeB?i~C%I3x**e2!n*_$#{>Gm1Nszm_;{H3?fFL<i-cex%g8t4=Bt~wj
zd-6XCjqi0tD$d5e^tFF=RMuMF;`jH$#B#BB@&BE8c`Jl9Vy{%(p8hW8zZd*X?t~46
z{prk>cm>(tLxtcW{?l}7d;|p{l&fAI_1}3Q<iFkfmKLkz+E{08!p!{5F8pSJ^k#4t
z-Q{FDqo!@7-8=V%d*_wuJYBbO*=<eI7zxThHgSL*hT&+;@3bk|>SSBKRO^b_>b9UJ
zBBN`hkq4fY;tR^Xv%4({_@F%ak0ubITqVlNadEEt@6m~PAIw&HpH7#?FWF|9HGE-v
z#$~LtE1eJO!3P?==I6IQhm>$-Y$ASTh2t{?nbH@k<^lw8+<sUKUadxea3@ublNFIH
zwq*Zos0Wiak?r8_C)X3iJ3zaeI4C$TRb>O-PzyTN)5y4%w^mS2mvv1U?8Ytk12o5D
z-o6nZZ_nWspRcO)boL$pU+jHlR94v=ucU+^4bt5updct9-5oE2lpx)b(%m2pf+($Y
zcSs9T(%q>bT@rU6$C+{FKi}@U-|kv0mn`MwoPG9r_OpMr+k<?rE`QY1$tD<O)B!Cj
z@@?{O>~$N|<7LLZa_&_vy&TQo0&i371m3!{1b}$%YL`7j=V5u?+MU_@@!B$KU8gGk
zoCuDJhKkit#s8w8gypdqgUbxl9LMY+q4m7S$<ug#EJZhBw`Vh?&dWi#2W9v5&iovN
zQnmZ5KhpI(^53&~0~n=VU~T%`2ymty2x{Vf{uu(2UTS#Cx&Y{J2GU;NpU_7rE+{!Y
z=Od{5CeoXK`<7bZe@v!=MijiyCP-s9fuFL_pLV@HH#@QOvqDFmZfq^SyThA&6b66x
z<_mjgu;Zx%%h%`JCSYjU`6A14q(J}yX!Kpcw)1s4X#$AbJ9gqVb@$A6FSazBdv>vf
zCrg2bn;(y=de%vsJ|deFQsmA;bdBd>CPgifmd;-8ie|w=Oqbe2bgv+u{R#krKMpTX
z=I0-sF8C#1AeO~Ha@{v_2I$ziJ0xBg08Q%5Of5d&!yn(uF<l`;0|}35bTuWgs2FRw
zPM)%;OECQBl+{3hM^e?`v6-7GJ*1}X4^foG&0M9Rsx*J{?f&*i55~p}|ABzr!CKLJ
z%xmU}gvj?O>nNqS{OOojGzx4HZeqTGXrL&etC|5Hp-_`IuALDSLUZp>S~$RLWOFCh
zp!!-^tt+t~$D}#~__ZZKF|9x%Kla)c9Ny=jiGsp2Bl&wCT^<fww_)(Ucx_o0@)9y;
zKpo@rfj8R;>`%9P#qP8gOGc6J0Fu!DC*E~&cEG(K+aqy&w~KUHi4$nQ$#o4L&niCL
z6HB)NoS3H>nxp9EmqKk`XNV{Iu~W}c%2s>Ijj#MWgZkHaw3v&$rR)_h*a3{w9<Dw8
zIAGrFQ#FGo-i(;)Cu-~{dhFQAC%9WH_ut#x&^a-klpJ1XS=#qcS`Y<oMApfBANbW4
zgxiNQfp*KCXW1-_Brs;*Y_T85+n4?n9Z0>x0A#{*ZpW6aN#-_jdvLnyvh_ZfR6(@#
zh42+1QsR`oZxL~}JOWTwQBBRn`VRni1IKaWdORVD=~zMXz;KQ{i8F)_0%&dqP+;0S
z-Uyhx`^xv*dx3VU`cV-8lrsg+R$?DfghqlZYJcaF#BCC3Lz2MC$jn~<jY|)6-21qF
z)dO%qT4#;6m;YJ5N{F!;lZ=t97@3&%Vx3xH9Jab-TT@k9ftT&NdbRY!H0Ysc-xIDA
zBAtt-1FR>}Z@aw7m6(lR?89$)N8hX{bUDHy{c*|LjSQ&W1xb2EbYE`YqYzw)7PSxA
zH<P<kRuhIP3NxBCovo2jbS5)~qM7!xRUb;qE+ZPlhoa@#%r{*HAN1?^+rzbfN0xnb
z`p(oxBAjS(+7XiL2cY$e!jOGqRskq|@^H_KweLNq*Td%i|6H6#=-`pmHI$S|RhD>t
zo;jK+4LBvU7KZh(-EkKyC-1kq2-ZGcTDIOC9{-WYKw~UU9MA#WcbjZfN*WXU=!h39
z3_^SMx*kXBL9OsC{=m{T-W712z){>FZ(hZw)EA1#f<c20e0<Xx9~>*n>{QSoFkP3)
zci->W9ipefi+!Mhb6VHp>qjPgJonNj$afiHx6Zz$e`@2W#b{m~R{qbj=RvJBXiTZG
zw`Hkr?#hR`S^s7_&jX#;?24<}I&7dP76%VM;bs8Ri``BJm|8py0r_)OZ2B@CKRPOi
zu7up?F!i@3oLiISxwq%sz822SBZm6wQ~-vr29u1x#-J_Gbv=>wL>T)^m6diiB#!zT
zrgPR-;W9Z=__W^1^9;od_I$>+4MPxJR!r$hs?(HN(<7LMek0tgo1wjJUXtvd>C)H#
zSXKZ0rhpz1&00%&+UfA?U=PO5X#YJ9E%i!_Vn>$d*KwtJ!M&0OckQ1qj8k_;_u;_b
zFJMtp6Nb0|p(!EKXBiv%0vc5h<*8%)usd$JuY43j_GE$4+XJ4o9(XtA1x~~QS5eyx
zNcZ;2>~=pz(qEn*IY2<<ygXt@cOUQkw=^$9X&(;sEw?c(6W2LFKzBgIaBw~~2SxKh
zmY0Yy!@3>ApIHF^G4>j`@Zw;-VC>aA-d${Kr0{olN0Qe&edjPQ;Yo+X$}%;_)tR<G
zGuKojN}V67lYl?D<=^%h_<MT3Cw+{GhnVPlW-6nDh{ni0{xu?0uXUCPSz!x_&-K-n
zNo1H`&NaA3vPtiyVKXdEVrPMx8B$-;zPh>`)jM8`Won6Xi;SOV*(A5LQhxhidJYRN
zcy*I?jO=@b)y_p-^J%Z|JQyMM2)DSOHB<M|T|=oag_*g#9srqVR&tY%mleIvRWmZ+
z1sxKk`Ph;X`YJ9aPh)zXvy58$FAswC*F?7HOM?4_7829XHt%0OlD{w+^`LD$K2P0p
z$ZE6t4IY7wZ@2I&mDMO$!nyl*Wf@`YrNHp`e1-YnKlrQDD$qqFP8|q!w*~zxy{wSP
zByjXKK?;HFJe0@EFrfY5m-JqRS+X=3c7*!Icc?T%t0$XQS^JM(ttv~ck4Cxp75u#l
z+Fj=>=z9xKgustlJL)8!TbB3W{N2k4Q(?oF<M0D48;pK-lUwL;?(G-H)y+qd_RceQ
zlFhOM6j!@9O|;R*E6DlgJG40K;UkFe^(5W}5e6xtz#Y~_;WguNI3y{k=#lueFtjtt
z&TZzsYOYK(v{-b;U-7(tSj0PSIFunihq@2en_=`qQH8A^XX}$>s^bPjIBbZ2&U_S~
z5fq+p#+2`0Q%OLrq;Q+W0)+KJ>sX`eJg}9Gc>ri)XI<#J)?&7<^2b5?wa#)+xi|>;
zf`vU6y4V%Pzvj6|@P=WTChFl5WbBcAIZ;+8*xRpKnfeJ<t#o4hdVA`WSSBXOZ#7nd
z|C1g%DGe`eM$@m2!O!*=7(b1)h6anHw?I4ILL7BN1K)YhH5c;3{p>9mN&qmKUBBPM
zLMDLw7-Dd%qc=wL69sl#5N4spW_fb)rtytwf@M7uNU<$*8^>!eOs|Ge&wc0;T>9Q#
z_Y|lov%o>_aRrq`tD40?-Q+!pQ(}=R@qUXb!K(b0J(ga6zaCY23^3k1=f~S$-?*;b
zIRUIS-^$^)$>DJuptBHUnWlyG9-f-newh5-)ondR(%J=rM~+7IW>o=IdcXt7wXN8i
zGMnjn@Os|p!_CMFXJDQZWM{vP6tMW}Qw4KN8Cg~D+LLGa+-`>(e7)um2ApQ|Ze8Ae
zFZ9X@tcO3Uz6b%Xb>@jSI+g10@+T&ik()oCnq&Bl*r9CMM1IoqzzU!ofJ}A$Y&tcA
zWG}y!Mq0(~J^Dy{;*YCf3s&BPCbrH9lJ!LG8F1ISz`q<o=yuoZtXB`xm@G#MUAYvO
z2s3fnEs61(^pjqzdjW%2WHzy-qx<#c$q%b|Ehl(IsnpDh1xc-R&-3@1MVx0w)mgZT
z4~HI|Z)AEtXv5dkSuDQ-I?K;-mw=@jws{{;n!rDj#b)5KMRNhD)>`l&84g50KHD}i
zlO_EMQH0OTJQ;v-@j-Em7Gyhg{4`*H+tYvhNqZrY)b-?m=UUDE)6$Kx!q?4XM?-5m
z#w%NXj7B9m(p!qG4z&xu=nd47g8P6ptGP)Y?6mj$j_hp$3eUMQWwvp@d{T9LpLX3B
zwkU;A>XsBd!dgtW*L6i!uSanNjeqL2RwTVqpY|rOk?GCHqFlqt5nO}2;Hy%V(AsN7
z=coZvn{jRR==G&Aj^o(ni2GCBm*2gv9Y8mqM)TPo!5!;xbGVCzcR^fNODnnB&GbuF
zNMaC@#FIM6B{fa?49S}{4(lfgy&{YW5>;Niy@mO}=2V8OT;Prlzk_DGu)_EV^vl~R
zyf!<simh&7L9w+Yy9E0h{x~^CmPpVS$yt>K-Zhj)6%>m>p+oJtm9UM6kMC+c2)u%q
zIK&0|8icD`euvYRmmw}F`Q&)~K+!a{W$xSa_r{6%4t&I1nPGp-DrSKA`dHsg4`T`S
zIh&U97Ja|o76GgFWH)<m3JrMiMa6E2!{d%@4(Bpx-0nqB(0ZHaozz}-i$;fhu+l;^
zee9j+&8`@vra==LcgeSM!pd;?goH0UB<^3iJnUznqR%)T*5{gS_VOy0EB#~!JU&rm
z0#$FV?$!nf3Id>N2W;;8FKQ$tq+t&7n2!#Xnh}GtIT`I>SBCzpXc|Ztb6(dTVMX>U
zDO{yp=Ekw|_xHw_7^OSK3E1*u?T;Qi6@MB_S1Nfs^Dg}mG%o@!X?LG>Zdr!*5d2kG
ziBaJrbhUuEY$pcaddv5N_8FUY^6V!fglgJc(_2%P5w2B&Et=wExn1&w<$J}EE}zqY
zKho+tAfa;00`2bOAK(Fhee`Ypg9B9&T+RUSXMR!-SCmy91Inx!K;7Yb;nK@>YX=gI
zSt9Pr4}MT*IRRElP4jMTo5t1Vf)gOg`4NtS>*_gjC@Zk~uB3zxedhaEqOwj2p1&vh
z5N|nlzW{~X2GupBS$j4qox0S6PG7laB4ag$!(rq2=K3&K=!k%WPi7Xpte5-5`HDcV
zb{T`2;HbYD6VY<h7$wPHXr)l0XY^)iDj1EtgJhF8n;u1jnk9_2i-9B>CaP*xf9(ec
zrLasZdO`zwHS3bYdJ38OE3LQqE6kVbD%p7VzynpQnyED&67vur{mB0nh;AcsV=+Fb
z9$<fsv%-pcR`yObNTVkPPcwvSRa$<P)9!iWeLXp=KF1#F{F2cMOuNTst0e>vF#^UZ
zF}zZPK9Th!4J$R3DS0jyL{kW&o_?@cpZwf$5dWH>xF$7a4Sf|)M1rxnH0X>u%1`z}
zr~#C6-H=@%HLG{%f6%4<$g*-&V=fn|mqRHgBRccC^zbdcf16xFni9PNCsP+_vbwz)
z5yX+M=*zHWf?{HDnmZU|wYqU}G{_J23M9g-CJe)Tt2$N)&V%;@P-`ybTx0V|ZH-R|
zF2di|<e}OXLj8<&-6nk$Zm*)#oU6%qlPvmu^Av`;E6`uEesz4NrBZd(UT@WWd3+#q
zv0XLYKh$iRF7;cpRVn^f`Mf|_dT%zB&b5?@Jq&pwteJV;VsgIrImudIY@pA_Z%nzV
zF$TsWB}US#=d>NhTxyVp3>K{K6zR}3r6T_oi-R|Uv`HaoqSJW7^@14Y{*-h$u44zi
z3IRbc|Kl~Vf)}Yfep!fD6P7;+C_|&7iL1Wv?*&u^F9SY;5P%(g97edvhx<}NnLR)a
zS_3LX+ee2<<3v|4?CbOK=8H!uMy`JQetP#?O<IAad)1lbF})xGG6wO=Bon$9YFwMs
z2T3wVwN6b@9r9jM*j$Sk!8P7D<c}+J!RKJzoYcMF!;5l_9AFt1uf9Q}z%E(`fB|lz
zfIYhcWflDg-q(>SSoKjePSaLEh}wT3%b-M{Xa9*#>oR#HE1bu5X}gP=dfHJ`x9X~5
zIa0t)78{RPKLA(ZJmFsh$@oN@m@TebzM!wt$J)9-@D;ydXNKApBXWh_#uIjJW4a+)
zd-rj-dO3uNoJgMsdS65gqdz^dZ#d^po-oTzn<+79Yv6)s2>|Qla53(>#VVjhw}(GJ
zCd0LAI9}%AAnzi6L0VO6oh%I}gZSe^Pn~&F<kBLA>MRIl_(7w$vSfeimeNjA1(*K{
zn?}N=_x6_9qhwp{v-Z+AZlH$l&+VBp94*RnuJy`UhsSIw<yGyD-dBKe5>+KVSw7@=
zKL@KMqOdP}%CeObA$Ycedjhmb^^i*PO}3J8M?C(!nYyOqfErysl<<@KEQ<~mKb@Lq
zR=xqqdf$>u7E_~&_Y*k2Iaq!t`W^&CNWx2%CT2*ol@r7`8GVfCQG81%UXiFmO{5d>
zamB4O)ko4WWV`fo(iT$wz}073J~UlZSQ_wh?8&h5hj6_&OCUA!WmR7H^AHq)xWV;(
zY{#YEy0eks0~(f1U1l-uCF6TJ2b1L{nonfDC`}PGTbrHZdW5_r6i@#Bu>=UeReqwu
z8cAvzsD`tBHB=^TcQ~3$UG?;UYutYI1RpzlGHq0yQ{6x5*7C47+ZCqZsXm1#&`n>a
z7}E=7l`4?Z-S*@2WE48xB8qxu=!*klw(jsTJoD|K^{s4WC87M@Nn-OYo<go)QDD1*
z**@viXc?oqHABgu)yUB>L+Xr?HS)n4QFB5X+fjELVocQn(;JQuAf+w1L4`J|uH8aS
za;J)seL|P>{Tyg?B-e;n>NMmQz9>D7510W7Gr4m`9#lnyJ%ujm3ay-kJBP;SM0(FQ
zrx}G{_M#7gUq;ZRtbk(Qk$<|fYXAZ5@&2ypWEX7@{RrchQhOnLhQ~YZE+Ey&&l4s;
zK@6>eq?OM;Yos<(AGq(J)5M`mqan-Qjyl_llUHbRH@IW*EQ!sSgu0_xJLRhV2ZQw4
z7}j(y|KI4a1u}euiWc?;t<LlsU6I~b)FRW>hENt2JP&yrY_=baMU!s3Xpj4EU-hAq
z$G%n0lu$oc>3ti;5hHie*XH>U(*mXHvds`q7rDe`PLjL_Ex7o*97@u}osQLsx;<Lm
z-Zt!2F<36p$eun$<3z7|%&a%IYZ_GsPt0W$oGx`9yP(&A8DK7}BsNBwR9Y{h(%2@Q
zj7rJwS@?ySW$Od$mR|aJlhQcLH=#hAs%Q6SATjJYE2E&&lLgU$D?dLG#%~U5j9-@Q
zkF-efGs2(A3!;hAKVkLIPYh*$T6g*q^=n?I!tIB<-p(1lWf%#h2JhqL-w#fdD*Z5z
z@;+DHz8i-KZ6|(itlqkz@W>d8hjoXxR?%$_nK!XKIrl(f?qJIph(yrYJd`e~>6NhV
zX+^cZ93xT7x=GdK*2P3_68rMB1Uc_hf|Z=cJFvozQk-=W8YDd6WNNoxjxfE0Qtyn_
z#f24j$L<5BO;RE%cWhD#8>0rysnzJVjK!38UX}k*FM)#UcTSs$rY+t4I_7*Om(jy1
z!Vec%>n?mGJA5haD)4k9J&5K0B^S~j{Y&;wZ{zu$J~McaRQBWO>w5EP**AZO1g_GN
z_ocY?o&`H(^uz+47OFo+(#LZ03#-uJ{<uR^qMx@HlpxRD6N*D<skMcVKP?d&%2f1X
z3szi^RA<}JjAIvoWU+h3)k~;H(=?V5-s14mF(d?*%8D85L=SWMX7YuAq3P;C)U{=N
z9mvZ@ekgOsYR8H;xi{ENrVeW~>6EkO$uBX688a4aD~f6aiB!?))du7_!;r0z5q^{n
zw2w835!SJ?R7bn0uTPC1yD59c8d$yW-viTl?}}N0ZxPXETjaWa3CzH-Zs$iOsVy<^
zEHnkLYKqC-cON&C`e)Psz9IXG@DtyM@4%>cyO&R<9dA8V8b6))6n&iMyl#UjCl7@#
zv!tb!nwC^<Ngq2MPwt7bO35s(UZSO$M0YyN=W+e28%A7~c%24pK882Cye!W;HJ7Jt
znr9o&)?P=Z-LS<oQ$R_UdNyl-+o7Z&sLLYY+0OH(c;4k8HxidY;V|-MYyt0*>x0{0
zLG7iD$BoLjYLq9}VBx#nq4TEjj+s#X(*;Qbs7bU8VD9ayc=m3$$9cOjB2GK_D}GN&
zmD;)~fy|8JjGiT`r3ZgRs{1%X0Ad3Ep-BO2NsjPjaEy@^3_b=~bnl;3hEHI)aNWQf
zMlF)o)>xfZ$a3P{0@DXA@oZ0)*qCF=-j<jSzaSgW(W)i%bUM?&>657I_DKMq_I5uV
zgx)qIlLvWV5nxjVtP^xD(hD$_+?#_sw8n!juo~|0b+MC;>o`OthGcjbvX{QmUkWhs
zqSsFPQJ(fzOW)tVpKm!lwZY&~8z<(q?KkN&Mv2{1#WS!aOxFx=v9Llt(BdmV*k%#a
zzQw>ovze4X*QwZ8xZ`wd7-g9R5&fo9YRes(eu+hlY9}@|t-Z2W+9(bCPhVQl2w37R
zxpf$qEQ_VyvJrT~zs}nCR&U;QJxy2oapT5t)}3auPbLNeYxU$X-GzL3IwUmg;Qf|&
zFHd%^bp8h}Gp#}4=_!<D`Pj%+_8i8^YM&EQ92!bTxyv(Y$Z>L3aq{qG;@G3I^P-jy
zz9F|meW$RlD3*Gem+bgR{DVHhq}(0X&@0%6QHD{1?DzLN4+7VdmQ|bNcUvpm7fUdA
z$12!hiEn2@aFaNtCEhszJj|Fhp;hy>n_r^5@2igc9*lt^dg$#WqdBaxuU-@|=~C@X
z7I`#8#8Ip*H_)%r_)*l$w}*TU_`*T>?l9B3YKETBrm7-E=QiG}k>sy(>A!Clf1>Gt
zzB8>zz`v6uo59AZEs=B4OD|qNsHkI?OL0c)u0gb_qSk^tUo^b0enO<F7A-_>;>oyF
zmMjNa?e^7`<%nL)0Sj(3RwHf8WMLR@+`Z}<`>^?;k4_q;hf^a0YHF0XJJi@e$ZOTE
zjICBiRljlr&w>iUSp3V83^VtfXMu~k?7c>EX~!a~r*V3}b(;{p@6$>-c*@37wxGs;
zRNqkVWAy36#;Cvd8Rn$E9|p6QNI}k4-h_mA3kVf4)*3=7%1rsQ;{I`Kr&y*J3HVw?
zccv3W_MQrtm9W&oQMPcUl0S0e@lMG@v*3mgn&)|LW$o!_{)5Kk*i7k2+4wJD2&!&q
zLBg3c7+JQvQZaw+EB^62l;%;gstQg4KD<?L&HN`FCai(jbg|!CU?%eix%p?IbV0EM
z0Q!5D_$OMgo@5aHguf$*e42Oay4BaIri0%p4gcfU`wN>Re)#cDx>s05l;tNp^*bf{
zYjF|8GCfSdH^vSfPKc`wU`f@+PQw#lryFW^%pk)RxW6v<JBac}#r^|xqD3TbH14=q
zH7?>&QK_)eScziE+$~dCf6nS?t(?^Y5#s;2*I(aYp!-M^7chodG7*T$glP4al;+97
zo)~3<rML#rM(WaU?mi2AE#&1Hf68gN`6YWNf_Kgqz(Q!5Bx)|JJ@M8cBfuJP#MZ~(
zCNVx(*D^nw0C8a4gs-`qrgRXBu1DUhIwXi1rj!wYEenX$i95}gCp*wkxb+v~^XDM=
z+{5ybsJ~_OsmB!Oj+RnfLcF*JzLRk%byz4GhIpy=PD)&#lW|e4wpK;a-LqxTG|xa<
zYq#TXT1;1A98I=Q>t5Ia!FnDZg%A^f-;I?g2La{y6<D4Y0X$TGOIt;UzLeM}r6(C|
zSV@30(iv~!gL=Ms94x*v$1zw|hUKQ^u#yYLm)$N`CAAXte&FLFQv6{5rkFKOk1>ey
z9R8v&CEodSf0{r&&>c60es;$;+v{PF-2C5r2@Dj@RMYqzk>Dw(n~LZdkl{OmQ{7(4
zNVa_^k-%mpfzVODM{T=it?zX`==!;1ql&kB6FCM_aiv1@+oSiha_n;y({~_-&h2C;
ztt{)C<-I~t9Yf*=c9zRr-yX81?~Sh|<rFk|{NVQ-1SmP#>32Z9RWeoSd*o@vD?R_8
zXDv(y4nYAZs9v1n4zMffST#4Tvt&kZswTz>41Z=QS1k`H5+AxXK;T+o%M!|OQnouv
zj2;S1a>h$2?g0D$nPv<8eNf1ot+=>2CQG<Y(^)5k7CU~M80va-eQ}_BrzP<Wcu+vx
z(#lh>%cD!9d*oe-PAl%fezF5{<97Atm=IDp4SZh$IF3w%j8*iVdr=oae|5y~$1c$7
za+r=b-xj(U>=k<;oTl_dA88+KglZu+tBXyyVPdZ%-EByr04|SW|K;JvE9^wIAdyNW
zI^op3T!U$ljVt*mc1vDx88`*a#w^rzQa$KKfYZ<YCDi@oE9aHfq{9r$h9pRgSH{LG
zPms3skl~6DE*tpUkCrDXe*?6CPDsi}@S|jsyfVF^*+V$d(V?00LMf6CUu?feRmp4C
zRCVC$R3Nvj+G%Ai<{{rTd!Er|<n`LBYR`cFoRK%_b8Kf;S~dktopaJeOB#d_=K~th
z1q8G;z#D?GbUs+?JCF%WVADPi!RtZV2ctY1afiE;1rKa}iosqY&SY8qy&;e7g0LPh
zFat!{>#wId9}5yGiA0+~U4qqPpR!R6?b<4<=@cTb@ah30y8?Ob7N(F}&LwmgV<7J>
z%T1M|9yx(w^GQeI8Ew3@4z#U+oE%_VQFY9u`>)yzM>QCYWg6FmCd8qa?Qe=-S)v$5
zp@UmZ>Gv|(Vk?2*#6E16Ah{+5dH1Q$JtThwad%w*gqw_{yYemVUO*bK=e}1{>#)x6
z3HCy<>&6+8hVZV(qlEbp;Ei|523DWTFo;q}R+L$)d`PNH>+jg=djsiHl-I>G4qWmA
z6p0Ap)p-0=DBy99+22-bk00*?o57vQ*0}h>xp!Vzz3DH0>-6VJ2LG3{8;r&VHxT;#
z6@JH(`WXIfN}pX{Fa7hP$L}o`6XFgX<)14$_f9)ImnSMZY`>;|?Q<|Hpx{1qJ0zqT
zd(mAWfXK$hs0^(UVDttR)nA0)mjKkH4zd^sn6}&dg}6X|$US=%vN=v}+~;^yjO5`;
z1*`z3F10{uj|t;Z%d5&_Vv%nYeNb7e;i#_hZui4x<?TI)X8Bj)9aMrwI@B>mgONoh
zw5Jf`2B~<*QViUy48uSRE)&DrGQ$oYFSaE#^b3)aY?ET$MtA;eCAGKV8}rRx^hH>3
zJ+1pofONKHT@4<F>?Nn~9=0TQLW!f!d3kKb@oir)6Z}h%a?e8Evc?etxprs57OcB!
zg{36=D2H__+))Ow;T}W&%1gol?7ip_SNJ%^TZgzTdXoQJHAKZI>zzLa!lvUK&F2fc
zk1U_A)K#(C%Tv?cz|&Oo&v-70hR(_=vNhLOe<JCVjzC|?$YiS%A}T?uq7jsI2vT2u
zwau&Wf*i2ckDBi8u!Rbc)tx-|fL4+fVWDHGtw2!Z!I?|pw^KXdkT@!{+mR+S=}TE<
zdjJcK32cRCf>}?YRb%0;0*%f{vah9{Kht3B1VeGv&f(PZ$_nHJ2+)dCbYJENMgB)6
z_7~2qD7dkZ@apl6W<w2ovF7X!51pZ<4Ov$s<yT^itWV-9oY=y8$pp#}5F_tS`XHFZ
zV(E?F1Sk_eI*giDd87Nw7hWJ$fnv9t1v)^S!ZJhCUJV5O3v{^y*+)Y>Qtw0H0A6Ne
zn|N#l)<WIKKD9!aj~<A;OKm)voBk2s-Dz>ONHm6v^cnb^x14|Iuiq|UVx$O4*)!&N
zcMCnp@vTRIY1pEXF)f+Hs6Ys4%ZTJa@R&BAB>?a|6Y=oGv1FL0OEZ;>wa5xAR|w#X
zw{%|S`~BD5h6*cJ;6GRIx>Y)bNg?=fS&S=TWI<83XRS6NyA79(g}|nwk)+<G#zyo9
zeOmd?#&#7uhceGBbMG2UPXjwj^fCPH*oo+ptd`cc1T%j#e^KyomR;K%;yB}MVmCS2
z^g!R<aL)>TjE>QH36Y+U10ZcCvrRvCUfaRY^sKfpbPZl=Vfw`&hrOZ&t)lckYf+9%
zK+X8tl=h>s92#x7asdU$4WLVU0d{k)j%C4`|9nLXj!@K`NuRn(%%CiSwO+wUeP}u!
zUmTNG({o#+;`@e11q<(X_1YR}W&Om(t?==?Ls*z&mUcz4HPu?WjQC$?P_r{oa9m8H
zP+%;!KO)gnac}UbG((TwIlWDXtRSToV<3<Zdt|Qq-R0%S2d^`H_svxkMtL`3Bn@)9
zRz;1U21_|nO8i+rE$;ZE;$Em^>SUK}?*PHicY4J)sbTVvJny%6`t1<)plPCyzow<@
z(0OD!cD*;zs;U}MdgM{ijo4#(-q{KnA6UYIC=^SZACrSZH8_aX0&lvz@KGJaT7<Rk
zX8?wr&LqBc=dr;}hy|T1GY8eae?|Ha*Dm9OQAU-5fBsHN4tR@zGHO&LRam<-ob-~q
zPMh$JuL(d`z!Tn+M<DVR6T2mO2ZRhJGQr(!gb@V9;SuKKVi3wGOzpS-k_i6!iC-v7
z#+X^Yc+-$WZ@=(UD(|Qm2#Ez;sZ(;AcL`(bBD3rV5Z$QZJYf)Y#^Z$C+Oq^$9Vbar
zJAdauAWTj^>0v3Zq3}HYC=fnt-^<p>t-B>;zan+gFYx^|EwDtPRm5)@G-w;nWN5~U
z>4J}5RTtSt>-@9WM1$_wxWgL`A!eUN0UF<uPU@Ea2l=rRIZ0$ekb3P>yW#MQY!UEJ
zd)~n_<8u%%^&ZxYBVlFvXd?R%vzZ@ohs7tW_A9R>&#hcujRKv{I4}`*v5NwDE$HiP
zeKn9{4K%1M11>R-`lc7#q|0*CQw;`dX6=7G&OMswCt5?N@08GgHuOhPH>Q!xFFANX
z$}_Liw+R_RZ6&dVU3_C7Howp=AW7>pz_#nA3OX%}S2is<+5<@hjb*+KAO!m`(pTAq
zsAIKcgqEau^^@}j(7_#)GCuvsZ^Nc;kt`F50ra_;YT3^%037@pC3o=t^itqD<E`+C
z)#re(aY{Mo12G8$FG2HQvafVsgBZlQ+CqD2y{SF$u|N1&xdFrbda+X2(S=R%0=AK;
zrqlFKLCzQ^;uw)J((^&kw&h&!caOaaB6rLCI#YWvWfwmp1D?hhAL2yibHEB)1ONNL
zhzIEIJwuQ^nbD-Trz#H!uyfNr-IfTtK%0}i%<H_PbFvsjah^>Mgl5kRUC}3zKmDtR
zqJr;OlorL{fR(g7^*9>!^p41^v7W7yRvuE~NZ6u7^!Jv0tJmzgb1)#}tmk=_J^u+I
zEF|}hTMuC3>Vf&U<OyS?_3V^<GFJx+3F|YWpd|qB^Gz7l-b^4kEpdT7`&rVl4dwkI
zxD0OT1b~ri5;Zf_pm>++-k?$Suj2T&2%9n37)f{S%q<wz;preCeHUS|7~8-xu><sC
zpYhsSQa?JST2hintv580c0hT#P!Ahvg);D9JvHkCBbb#w5}_?s)7D?(@jzox2YHy?
zSxT$;0C#-JjUJS&bm5BkMmy=L2qd_)u3aT?{vyp#tB}U;+Du?D14e5H;)x;krE4<l
z!33c28;=kp?@>UI6(Z6#AuGDUr<-^^f_v?_jkx`m4T64G|J=3txCy^C?F_$TeAZ(i
z{LpmpZGmOfqvIqB(vz*x-q*y-%1zNPfMB)OxHqA;=xKc&7}<HRWHCVv0+*ue+3%j3
z@A>c$uTQ&#m^%U(T;{th>xx2(U&|5jJAK;#z<+E*M<dwidWy96-$=3y1Jf~28*u`|
zXraYtlo?Sr#MVqQ<ugz<(Jp9`7&>Dmd<?*gTGskBmU|&0;uD6@XX%InKm?3cW>)ZQ
zf7@Yyp;Vf+d$Fo)lY&#W%UN>?EaNkvw34n30CA+9#QSJE*$<}3gU3kr`zxR3D$Z(_
zBM+Kbpps*APD<$irTyWY7l@shdy!J8c}9$S&uWV2P4?r|gQFubrpE#ZQd&cl>><ol
zh-khyfZ+aZO$L1OUJVHMpd$!~m$)x3uA?A0*A9=z^v}ye3l{zuhp)kahYm?FG<9}6
zzErK0A8wH=`<8r0XvQA|(B(=S;7#{r!m}#pE$6##x2~MeI?p>cUmX?C!rdfYSXOeo
z0D4eo@Cbc0b-sZM--(fB-?LZtw*+Y#a~#N_2nkg>uB-8$Y<%kl=v{Zo*1i<L@p|I%
zKwz_?(6^E{ch{zp1&X5@AbTJsKRes1ZNn6(X};XHi46G+S(1CbFTtk3(S5IRw-w!^
zap}lNTVw-@to^CEdV?hd%<rW^KZPu-VING!g#KAqK~0Aqg#`*clBNc%esA1+-1T#M
z^@JgMSkV}EM;9pb3Ce1)H9#DlQtIJpaVe8!=ys7hgp&ZO0!m^fU56B<KR}h4y*IA2
zyxau@7;K^w$KL_|c@&D;3JY_;*z3}z@?P>IlZh79oeH%_4ro``e7eg2VngY@HD3`V
zIcj_+&|}PmCJQ@7a|2!813av%KvE>sX3L}Ypqu+3>w~p;gAW7)%z{tdhT`(aa}_C^
z!3S&J#M%23n*JyJqJ%P%Tn3*TQ(0QTDIUN?zm&Ild6kzLX+K-<9L2Euu*(?{4Y_~`
z$|W5LqeIzSYmcqVl+s7-(a^S9jmP0jZIdwq(v-u5TbU9O_AaAAC0D|WLtx2p28Hz{
z3u?j(|4_U*LcV5Es*lz;PT?Lf*~fKR!(pKXL0ep7N+`MRWWWx51nn5Ecn7rAtYtO(
z_+hE6m;E60_7?lU%HbaiZV5e&lC3dP?E2Jsfs~4xzFCV(<d-ih-)F&A{`g+&C8%P@
zXRy#xAe`2W@xeh5!S;<+@`R3v=s{DJvn74xR#6;e+_?oWj#L2&&1ibnL|T2}dv8{;
zw@<YPAJZxqxn;CGHnRsx%^D!s5Go%=@nnbe(VD83F<bBW{&RW&tvh%X_>8`)d(R5n
zGchYsk0j*Lyx0Xt7C?d8L3`1Pwa}(@ee@DKF`%oT0;ZTmCly8hh&jX{Hwp?#uugpH
zWCv++8jKMWsHDZ7+K<NCm#no6bb!ga9f(1)24!u3KUf0jniI?@_#YMG&u<Fequ_~Y
z`541#pBY>#k$kWaM{=A7{On~_HeYT#F?>)m?~JWq@iAztwKKQMbym7*q7F6i0Yp%d
zSPD)IP>%@^K}g|u{O}UAP*5a`7E>B-dB1*QED)SOw0N|q=o1wVw3dBGAn;pTqXZA}
zLlm2q$K&&VHAR11&)Y~WMs|Ol0wh4Ay(NcHBpJi6R~=^FC%}if3p*LDW5nUGsv9sw
zfY6igbJS2%)Gv1!!TWhV0MTBA+=+G)1PegohxJRH1>plA+ACqb>fWIR_U`&;t|(V@
zF;(>9i$3YMtGvaNUlqQoFaS&1u^{h4#}IvkP7peK&0CdP{(ngjf6byCDkUCJnabSb
zJMXN#eeAbiI?7Mhi&lUp5lO0rB`)5CR{HH3SitWc?|Das0iL*iuJL6(C>d>#w5n7T
zwTZn<nV9?n_V(7@0I4E3m1_v_1E+hevGQtr;Bg?1Jx83NCPFg!*{L5U7>$0S(f#Mt
zDXF4?%KGC3A3U@dsSa05gw3{4RhyTFM#yYS4aFk5Bz3G%J193~k>}wz0vwehw}iMj
zwU?4{N>#0Y_Md+~`kyT|F)=)bj20XpAWwy^P6Go~4Lv6AtR!e#?|E|I-1&C_|MN%H
zNPLiU3-X-MHJ$}(6h5g1nKL%`#)QSu;r@BWKWm|>JBpgF&EV-&F2O$+*T4R=1rac(
z6c`Z7js7h>`tw(Ts~nubjxNh6)ibF46CnP5tpj?$Kwy2bm_Sl>{7*qNj}r*gGX16r
z+x~CmcOb|L>mYXe_@60SSnv^Ka7--3Kkzkv{B}t{Z|rAvEHFUf(Q4bu-}<9w3L}W?
z&ycR`lB#|aOk0^b1QR16?MZxXVf_Q%EXDU<M~i$k+}Vo8e38(h6?rE74);t9o$Ny#
zsK11ApFm8IkHv6K-;;2K*?UiG#>uK;el6}1H=zp|s8o~vc_H~oKKR-Fvlq(BS$T|h
zMK#GR%ptmLbgMm~pJaPyvW8e@%IDd`@CaCDH>VS7>B}~?*~XbzD?lby1jq-MPvQCG
z_;suggOqO&>iNncWCd#cpolTSLa*zqqP)Bdw?HnuB0>1p#`7empg;NxU=9JXV#m5K
zD|Mj0TFC-2K{7N{s9ED%Q_9c?KCjhchL)Aw^H(h9Uq08C<?IBM%Fdj+_Rd~!Yi*^-
z7+|-(_f!-*-=Klc)v5+&5eQPNTUuHQ7x6_sg20uvf%NZ?CgSFIP=+*{@h!8mQBNe?
z3`SZ1m|J<ch%)^`*B4>?(r>_#0AEM*H3jcy|D1VajEGIwS1-o_FlIiQyl#-l{?vZ$
z4p>$p_;VI)6!I_D`qREPYpmmyAk$r$z+SzyF)Wu-9^!hejLk!<Vu%;d<2XzZS63S*
zJ9|9qSK+?^B|~t=gOGI9_z@t1s0W82v{YJ*_m>*I<L5DKM{x$~nOZ8*z?D9yo!aa4
z#)BJ)8QoL6R5Vf&cvy77zP^;fOBvqi$e*!k>hhD0cukbsQ|}fh!5l>5)bGAa>Of@n
zMHR<-r7tYZb#Xv2Hc`F#=7kA6uX~V55~h0Yhq<CR)9WMew}uPA#%C-~IkUp7R|TG$
zIXDj7P0;H#<!qVwta9IFK6XB2?Hpe$<2|qf)xW3OoO%G_%t*l53SX~*6{QTo-VAA!
zoBPNE*Fb9G1mIz90Fn;R*?e>lm|?f}Vx)DRx&%`43GlJoU`2EqFM8|;x(cdUYD#$M
zEBc;~+?z~48GJ45tanz}Sa8_g2yo$*K;^|RAFjXwoJYj>EjNxK$I&!svk#PH%zsL7
zxsRbZ%n{TuJEtRJd{oiI;Luyy_M<5wo760pC-RB8I&DipBI!hP&C#&y%l*c03(7zt
zrkI*&e0j3)zG9k>AN1F06`NAKpvFwU3nEWZo+^t8W+E?8gaS^rfUsFbgQN?|tO^*4
zUarP#rY^TC;)nE{f;D0Q>|zbUi^X|s6}ZBBR~v!wcD6FSCw7bI1Vfp8bLHbriV855
zh9Rxw3+ZxxiZSifD<!EN7R{mx5dC%Sa9IZq)2MPkv#;O!ke^x(ojBmEVTH+n$&CW^
z4=BOj&=+2ecPf5B77H|1_%SWOmh;R#8o(_x(5V}c5Lz~_16Y8<86!o|!pzDf#BH~3
z4huwXm3=w4s&jkOwNb1&<&my-KPQY&ny6*P)vQNGpeSF6=xMK3`SVGD*_eT`P@sp^
zy?Fl!Y^gtaSQ$y`13<3|5~Jh467ffJ?XCw$>9iBL0K?A&aK)HF!gE>Z?2HL)Nz%m(
z{oti;3EQ1VK<<3UI9br6?h+{H?qv7TD+?YG#QXr^F%;EJ-C&KkfG8b#$j7k_=1H|x
z)nq3mjk8R7N^2!}2T!?x40`m?)Aw%K%KdZ;hdJcuAPDcFHUJ>E3N%smL+g<I?T!1s
zbZ%Vq8Q^Y6G6$iUDeirgb-@L11f40}Mz=|bO_HiCMncBwq*TS{M85M$LP6;A1r!3W
zBgMy`9LWU$jh`)Xdhs>=XCre-<@6Kfw8SZI_R)g9I`x?#t4I)jup-YO*SzcQ{^ay-
zj85T`dtTirOb-t}b7M$ANyf9_$P0E1)TAv3urIt*@Z;pW4Unm6am{lyUl>9*ieb`=
zk5FdC9aqnJFV#+9H#2zj^7zI0cVNNV%MEoukid8$2zHxQ;MA15dsF~S<mcU26v^O+
zq|cfZw_bj4-#;S)5eQL_EP?*E1VrVL457(s8BlZ)T1j#hM#++%M!mlWp%&)m$k+rc
zR5qy#V$|l@#`-;s5?shUnU~$5kNpM=i}vHF5@HmEu+rAX0RKxMNa_OhkGCs%*%I1{
zMKWMA?~qW(CPn(tpi{*x1B0D|2ML*+C~D|BaiY+UTl;1jpIru-eK*yPn-0iQl%VJl
zL+eN@1F~x~0dNPgk!X=*P7?-^gbKYcE@!Uk?V?g`TTvq)@MjRU&sw~3$hjoE#QF7f
zr6k{Os6MY7;ph0{<>!W5TYP3p=6Sl<kJaL13!V|bUxMvRU8;~+A1v!|!5qqx!9|e!
zygE1KT^g<&jJ4$f99s`0J3&U=%tOaiJZM$Bd*rvtKqN(^w$U=!W_nA4X=Nt5nX6wb
zY2x&2A%*UE(QEoS6dat$*(2#)eL4qMzaZ)=J9bX<TZCf@!nV5K!1U}Fenyc=5GW_m
zavE?~f&lq`;5Z1h+8Rr(0Ub!1&r48@U-hBh(u#s3(+!r;0x6dBj`OJ|mxyyIk3FEJ
zLG<=lGtM+>Mn2rZPnE{Z=+l}yb^%s^RPG;Ka|39VU_Yf7$f2|xeEWEZub=P>L_@Pb
zlHgHD1eDQa<+!dzL8aZ%sAhdrtEp?w5$afk)l{lEvm{^RJi~3*<t&hyTx6}|Cml;a
z3FeA9uoXFC^6j@^<zg3$&|ye$^b-=Cw4df~^I7*lI?)+FNxt&oj=buBkSYb`Xpa_Z
zk3Trv_{tZabp8<!U2uk?;1xEW*Q1Pm?g&Qc5wF&%+&)Jc_Y+*q%=29htVZuhV|v6#
zvw>?+3&JmEs7M7k!?)HCov96Mj~7w)n}w7u&#V@!S+;aZ_-yq%F9~yl#Sq1Wp)W2f
zN)vJVxHt*un|)#LK_`jlciU}gInP5L^Skb^WGM&1L3y4(PM4!*8Sf&e>J7$5f3qm6
z?Hu**IOjyW<6-hnniMCTwY-JA^I;d~gzGkKD?jl^Q$**I8`r=?xHb|w+BNpVFpK5Q
z`<r5c3*jF(Ti|`gBo-L;dG1h|`wuMxcg5XLsG>=nE~Kbp8VmX@gCfsACm8_ceD!qy
zq0ph~eS@Q;tx*T$yUL_uAN<qsbr}L5U~_eY^~oA!{3UDy2)zH!$0L&nq*MYBFxWgC
z!Hw|o<CpiU3@6OWIgj;8%LalR`5}{UD#zql7n9<JB*j}jBbF8EaW7k((k7%p_REA#
z8YpuO6R9Xx@5UEB^mA64ecgtSpFJn;`igF)Uti?PhT@{!yxD(?TCj$GB&vgNnU+~-
z{KmS{$dUfjWJbusV`-lj2hJ|4F_TH=V*xLn<^c5@Go2r*WI<%g9F^vy_Lvmu3G-=`
z2%Zj0JNHuYrRXm;`_U9(^M5R^n<(8{uOg%S!@z1H{02oI&tMRW+uPYjBZ4PAcz21n
zJg*?aCy+Tgr~I*Y!3{3Ff`;#Y!`pmdr1Grpc3$p`2>Z7F%%pMZxe(iPxc26V`E|kM
zXOi0!8NqIm$@kDnwT)N$#Dd8U5J_l)^FkpL=_f|?{ew09@B&JPaE(+PqV805V*AH|
z$#IIrw|EfOV+dimM_3G3{PyI2#JQ-hM2jZoH!WDP=&+iut}rkgA`lP~yAe3V=%NJ^
z&t1q4nQ}<qv5-(H)&<|%`piQr2qrLE+ekKm$NAQEPM=iR%+IOA<-)~m6|#VhwWHbT
zfB;>Mr2gv=u~i0<Kq+6fahd{wiGkn`-m>&6j5}<&*f11@0y(XkTV9u4HiLADI!igx
z00;6YQskCLHPL~o%v!Jph8lhGYICdnqefA}+aazL1k+?L|7=%G;6E2K9H!>om@|C5
zQODcRDsG@-M^57mC70%g^z63AVd~y};CbP~d%hW!xZ=E?lW*Bs-CL+Ky7kHMdpfLF
zgFW5w<@J&E7Zaf*gX_z(Cank~9PLWO+E=`|z~c~Ay4D8Tv2b3$({7yf#39%4r@DpG
zJgB~z@Fs9Gt?NlDOWEe5qh9dI{7bpFOUUl`xYi>TFbQou+;!u+^aRXw_=C#=Hjxo9
z2Q8G1SUTR{?qpKES>~t_9n@gwTPnwcWRp&r0e>ad)&RN|YoAF-tNau_ht(w)G4o6=
zch{yTWVp{RBeZJ9UN*5^68H4|<M;b#4$5Z_5~98?_DUT+W!&Lw*r+cMc&sL?lqBeX
z@I?_j<m<ZF#qMGgmD@THU_01&p`N@_J9$!<_gcTPg6(Iqr-V<&8H9hG9HYVytp<$r
za6%@eG|3$(H-XVzq9=2NYwL3`5Bj=6-m`@{uta^zN~JNp(dMgcDN>=u;QbyAg{Fm%
z>mpb%P&UU#Q;Hzw`Lo3PAxso3xE7|g5<eOXJ}A%c{K$E~?bM~X({{frMr2BiJdxLx
z(-T3Xp!#t=r+G*L3Yr2LAEyU04M7lQ=2`eH?g;;294}0A)};-k%scA{uh>_N>yT}%
zQPJIC>F|=Mo%d-zvs$KXzrYcy^toTEal?Wai|JJ%UalZfU&ZK2hg=Umo2$wLmYVxS
zbtTUu5ObUtW<FARKMM>d2sft=vn8@i3sy6DlpyEdG%zhbD4B&3$@slsftZ^P?U_KN
ziJIs4nb3Oj<c+?poh&9i=NIYrJxtag@H|M4FUW11g^nkl?T|~24kT-c)80~j68u@8
zALnvJ@+Hz8Ta4wMds|f4KYJ!DA9vj7a_V`pY(=Q(iXdJP!!?Zh+{1L4_S2K~eWl$0
zgXik?E19vUEoL8yF~WO(ARQ<T_y!V9%H3QnL?>D&!tvbUvq@=L(0?U5)~-K7LUtCH
zfJ)KMZlys>&2**feB&}cf2@Q!;=U6_m7mCy{b=4a7lH<CB!n%1xXam7bF446uyAWN
zdk?)NkXSj2VCCvT5wg+zSliNlH;k*b<E!{hbpEnq#C1Dz|1_$mTj5W0^lUF-r?K5|
zIeGpaUytWM9_Y|eGez=m<*l5fhwH|U<(|y1pj*FuM4H)`Jl3^-Fu&3vbut`$v8Ohl
zMF*Bde<#k4)yybCdj;+>x|X{><q_{?4owIi7{5rrr>@_^Pg-!YFumEF+2c)ScDEoT
zWAVw^;fyAqT1|1%IWC>@nd;NqFT^&FjL*4Re_Xko)+^001-ZQerjO&#+yi{a-<Dnc
zcx})`md+n2D9+KgdkjOqcZ_6$*6%ntM#6`3GmUExPHpy)t0yHhA`1mGuf=~Ze`5Ob
z@bNhB(#(P!iQpEa>V7VZJ-BcpBd)Lk2|=K(nQ)}y8f_avl<;mF#_&B$M(FQm4D?TD
z_8s=s<TS)W4&e3Lz5lA@uJoM-c^?Npc22x~D^^xnM9o`ng=HRhtDm$=E8|+w!YZ4q
zaUDM@mx?bNJ537V7L&7i=ck%+^D)8%s943gu*5&Uh%9LdyhnQ>_43r+Hl1rhC$+>m
z$II3V*7mL@#=;}G=AB;2jszaZRsg~cNhH;K7iw8@TfG~CRz_38Ad+Y6b7SkHmW(v9
zp|`x|%$N^rO<GTc>A}E7@iivf2Fs{Q?ZinU-tvf*dRvmUwf??e{H~N^aX|0rpZgaV
z_(?%Yi#{|(g`W$Ok_aN#U6s3hNmq~gcxgKQo4KD5hZpURMsUsX1wDiJ!wcaeo(%!#
zQVuWf`yVV8MI}ths^S#iDAbu{R$o*MbAD1_Z=uF{^lV&Atiw*bxa@<)IeMs8*yyC{
z11}YM18;fyNHiH~b%~q^7|8YJl25t+wCL#LtCkl&AVHvZ+y-ao``3WIeqwy0CR-}2
z`#3uOT+Dh!j?e2PJ~WX#srmOt_I{rxQy36DQQ~h@K3j^!j}b5~+b~J}b$8uI#GRw^
zUB4bQWvDkbOGe&C5?EXVC`+oSS-1keX0jwjMZKy5U8DBEn@8Mzx=WimSJtRO;)X*5
z?bCSc7qwGbrMiXcRqCyth7qB1Ii7JY*(@$TVsluItwAVgh&KuQ$pp`MMjRgZ1z&?b
z_xGIe6+=owUN3gXep*Gdbr4APM{5OM^fJ1)-JzBtOpVZ1DNY4mOcNv?yp0>P?lWHu
z=PR->)Rh{)H?S$SN$bt+%Y>jjj4UG>acA##F-?qoW9Vz4*JWkW+EcYA!vxv*zY2*6
zHlq@!^}7!&jeoQd|N3(^4eUr)FVXTn{`C*gw@RI$-%##kPx`>}tKk3spZ@=k{u+({
zhr=X1x<4me`EwB%qxS)wV?p1Ndetv90m2l7k*Eg&h}At-1XAz;AO@0fk6(j$7gjF$
zoSXUT`Oh(kc}jp*5eDENuD%3E{s$~&5;?b^D~~4g5%f4WN__nO*ZlvZ+)>KH1N1ti
zZ?7xg4lDi7EC33xiSNzV)*yzpw9fJC+Bc(?O`jrwOA}3lzB`PqyoP}XBdZ26UhmhY
z0Zit<zGUwD=y{%XJgK(cPDSqs38dt}M0@eBJet}>oN801`nS9J36g-@DlmqZO<?_G
zfrp5R{(?YdrGSz4=_bJ+Gy3;GeJbVj0h0be@iO=WKQ_SzYZjQVQoxG~O;?LoEi<(a
zg&-{X7_gc-1s&fe%L~DY(f7f*2yvTV(x78P8-Z+LGe#?C)dTSA$6#A0;R)QKmldmw
zKEGX7i#dFNEi0ag$*!B3Pah?Q)zAJL6}tvPi9$ao>ex&7(>>341~j<Vdzmi+N|;~R
zT`x3%9pQb`LCV(;9G=%L%`lH__Phk?f!iPQi|p%Q-62&x)-w~^-vKc>22h)dRWZ75
ze{D5EGTguJ(5FnsyMyz9Enx6YL1rrcW8x`j4192?g-33%a1ly=*=#BC0>s1@f+4`R
zhkwadk2CO<>lIjyQw_gu{}G9B0)m9VlyC~nZfB6~ViU~mhbGM*U&52L(bmz7i5mDK
zCMD!4$T+{?Iz~8nh-8E!bgbRlfR)M;;$Xo!A#KOA&Uz>PTWnPmt=3>CY$X52ZHs9c
z1|$epfYWgr=FfOR%Y9K<3Q#bM$+B!H;)d*9I4GS10P-^h{d}dvz@u?-Rm9;-5S}iP
z2J!F~<3(wO%~$6jLnC(*VBM`-!#jy$u4fAb9lTb{alcKY9xNXbJ_59N+`($E{vHd;
zmXX^z_dcf!%@;LaxmgTl1ib#@{cI6L36*||WtaxaTZ_Jwd*xuJPXI{J9O9^;-JlbM
ze@Qc>t&oyECIfrWCz)&hYf!MQb@p>ooceENVIYpy?LJVJR9m+L6Lg{dYR`S6?r0<a
ztBsQ5b_f82cAxU;g0qn21v>r$gNRc<f@=vgPRuoKs8~;EJ^E8T35%BG-X+)=lfAAv
z-mLj^=s|I(gAYh5Q$|U9oMW$nXlAMny)H7|EfB7qL1K#s$=8p85pxRQbJjEx;kI3N
z55ECap`Xg9+;=oV>!u-~Y90GA^u(!Y8!)|CcV2=Nq+<Z%@vS4Q(g1=c6N8L@<N-_T
z85C?Y4bJtNS%hMTbV0;ZK`K6-GWj+Wdo?%$_FjTbQpoY&72s_F4x<>J7}X2pGjn38
z=)fxIpY++VS~euWi~k%H06PkT#P(!wQD4R&hBq#)@}D+9`c_zYcU1~VHL&l;{GLZC
z6d41m#T)^LZbM+5K7URC;0$adc-})}bGDGc(exg+k^1*w6j=RwzoKJ*o8`0%qU-l3
zjc^I6?gE6mdV?O_rvEkX0HiZCcL2EKG=PvTAea1Wc7SCC)f}WA^6RwXU6)EAeO1`y
zSLNVi28MU*dlqAdL1pLk*Ffc*^jS1cB#qy3#*?Z?mf&z)qNyf(iG*GC$KUIk5&^2w
z`R640X)w`XO}||Z^<agW>JN}Mu4B{&Sg*~o+=V_u01204$Cm)M_m1jam)AEX&Y*+!
z2%3zxQLa;l-bTLgVrBHJybED~ZA|+BM6TJSfX=AbU?YF%Qi1_8I#=H*fVbbN&ZVsr
z^uFc*wT%);EZ$d9FH+4_=vaeFSLeRGdR2eO-!HBfbu=^DQqF7ZGH_Zu?PThLr>#Kp
zLUDu|f18=ZWMVlrp?MDtR02C_Z2?a_4MG750EH9yPRQ-hv^obd%-oIU0$6~npwJCA
z>Q7VnqaW1EAw>!}w}bp-Ecz|xSg;h`J@b(g$aln+^+^r)@CcO8Zf!{ziQny|=0=5P
zNJf?$v<y0$=A-TD@lm_-zWWZ2h6YNsdaDnNy)M_kE<`Cy*wyX@;uCC!FMaJD`mNH(
zIl{juMVAwT^rA8Hw{~t;$D&?MNo8}V-cP)g35i}%-{jiIn#tEJe(~vrF7$5n;K6yu
z3B1^dR!`F!1V;)+-dRrua&>P1OfD>pQln2X)We)EZ8wLxK7fKxiyj#P4EKG+yw*A^
zeEdLeV%!#Vo5Mae4GMqTKpk_^_dw%l2rvTr=u&X&de4%DWcV<ABmDq#GYw9vVXNs6
z;0XJ^6h}A`Yc4=&{rK3<T{Zr_!*BB>1qCp2o&3%_q*~daRIuEEBT>NeY(fY3(sl;(
zpXKN#7bL7q-*$L$toT7Wdkb`x;0X$kA#$F!eH9-8n-3MmcxLr6TeaxHulnXvO0d^U
zK*_<I^9+#;KWp!+4C1g!UEj21D+ua}-F-*>ql2k9I0H|h*8YHb`SkqY{O|S9r%dif
zE<$>qN$qYaG#4g3i#c3a=cny8R$k{FLK8<A6*o_>xd}7_c~bA2FZRPAlD*JFdBxx9
zuVotig_#tj5pX5NTSqgga&Cy2P#O18m5C(K|KfJ&L?HX<Sf*r@)OXhqHQ|827ZXs?
zh~XRHc0g8l)dPpM8k8^{76~yV{=tBLkx*(DG-=H`sE|-T_CE9b6FdHcAAo5iUQ8J_
zU|a7kb##0{9!|1Tb4lw^NuVxI#b;XBh=Ya=FHv}gTC!s_|Kjfl0Cg}-aAOaa?;X4M
zgqjjdY1OKH$o;#h{B><=Sn&25YSHM0&qM!uh5q`Buoi@F{{J8KM)|)RCUq7mfl;|%
zqyS?jBE4LalKI!1(BG<7v@i&n@T*6fPAJdNH_e9qY7n^?26GkDKjX4w|NZO=kf0uv
zsZ`}2MpnC0KIN~^{bR!X`qM`a0z*;hvVTD)e|1ei|H60|JoGps^Z&!%dxvA$|NrCJ
zduPw=C^NcjWkohwAwm+eLm446BOzplWQVL|%dE(r*(GFTWb=KVjr;EX`Mi(cKfgb}
z-{ZKC`?wBqUDtV@ukn06){~O`znolHAK{^i@d`c5I)@CWM>yzUnkwzdR6=IH&aHju
z3BDY{78#;OAt?h|$}ym9zQMRQW%JH+v1RZ2-T=Gl4G1h$uv<|bW5}1~`j08Z4sXpf
zXQiJB<9!g@5AhRY6VjMLe;<|387HAEqZ)26A81@u;rv<U4Kx#3u3jqQ%@|ALbry({
z=-+@Q4iN(hHc1wgu^ALvUYE7DbSeM6d~SV!F1hvNbs!>?jJpuikxmj6TosMj^uzEi
zpH$w0;&#{+z8Y}0NeD@g<Sk~cOue3KBp>J2O}bo+rx1p}an@N|;_sQ{$pMqV)JY`~
zc?9wlh#khR(I4v$WDO2OnxGLo9U5B|F#UsI<ynH%G(-!n5St|)C1(cq<Wk6E>P_{S
z!8`xqf%`RIkZ67B7PQAq6m!YGTnTI=YQFV(yy+sV5F|-h&e4k6CAh6ld>SpcwMM86
zMD+4Gvkuuu@+Sc$v_`T5B~n-Fb?tvxO+b6uq9fINjXll{XgF2QNT<ogSBU-NIud)h
zh=O5aSpxE=L=)&R^TiG6-aZJCBS?T0S&yGi4r~;+j}dtfC9hspjbnO^sQst`XrsOX
z>Ehg0o61cL(%C&auMR{+{35_s>66UP6vhkNx%Oz$JKqc{3y=SPxTRd0BKv1LPyHNI
zkIwob=~ze*)fhxYgogyfTa6-pYzAMVAf&JmxScMVBPiksn}-ROu4akX-x}aa0!|fa
z>jGjJk$KWayb^FdCCXG<c973kp@4NQgcbJ3M>!H&9c6Wi<#YMQuS?C_kD-G60mbPU
z7%+)~8w{;ete}c11zlRTE0i@=M6^P01)30N@Dc#dB6h<c^@Z0ZiaxO47{#}mF?+TG
zCqmcdtO@z_2EV(zTT+fSk5V1@k)xuEu7^pt<VJWBpztSYrgEx4s!e450Z2nzB<TQY
z`E&;%=z`mUpi2(GoRF{iIEW&K?5m0oX)^V=*E(fD637q_!GI%iZe3#YH0n=5(sVUn
zudu*<b)v5NuWbsrV1Gax3>&nykrCY<*nl)((<LIbPR4*Zv6u8)ZH9u7*LmI>quCRQ
zxm@(!nZ}{^YgDh^sFOy?E)KnPfE}|Ga#Y?knu29X#vaa<&-am*dZ5G>c4r7ap@oIQ
zdW2Lm3VOwdNV-zFNXSkC@SQGRq_j=`Pa%QsRX(*Ap3~#J4~>%0Jms|ud9mzQP;D&3
zk)#qy(c@^ubNL`Lgc<W<TpIEM?Lbsr0!akJ;4557=J~eq8iwCDP<yzjKSS6%#No%E
z%!Y8pWT#QQk5I(sb@HhVNec3Cv{_E3s`E#wIx;E$J&c^$(rs8?JL&NKybTE_*pB4S
zo3XrV_yX^03&}r@nR}{34Ij56@nb6-CZ%&FKJ8jIoufyNVZ3b;JeMMMFxqh4<oxGz
zy3>4*7LrfOpSCM$q2fEd{bu<~B-w{YD%#~@2~i>Bm`Yd-HJSbvs`?#@j>9aVP$*v<
z$iMoj+AU^esXupiF`^H<QLqG}GT)FyVlkXI%|PwAG_FF73#yIC-fHnZ$Q`~WYiB*<
zgHt{M=g@y34^)^Z4WqHna3DyNpE0fgBf8v^Ty#^P<3sn2K6aXC^AU7DMp!{_s1=WN
zQnnX*y<bJj8FYievB0J;TMvYt!{3lrPf13NWt4-jWO1r{mGeYN=+51Y_=ZqF)lR8)
zKxnK;k};Y(%Ce|l`#wMaKG6cSxJ4R|d(P_!huo%>@j0OD!W?~4C{P|mvc-9KHo+p&
z>7_G~xJv0j?&jYs+>NjTO@3tO@IxoxK?JiI4+|bT_G>9|$OJPuhg{8Z1^~dl2UE4B
z!E+7%eGJ?ou?1Se#Ejah8l0+jq;dDjBB$TqrMz%TD8qB(%N;NmRXhJoWO>?$xI_}{
zBJAtAIl9mXZ_-G5tkG3BC-#<$@$dxPBp<yC%jFvd-o#Y@@2i2hgshZEnXT`eom(s_
zUvZJPK%+61jB1&E=0UkU9Afv%@|wJmxI5}ZLGU$IVq@ES6=!2FzX1p0GWcFE<?G=6
zJ(@D`+6~D*C@B4`ulO+y&X9AVIrQM-J4Bi-qN!tq8%;cDG$N>Z?w`wQ3O4tY*m-a9
zLG@vP1(Kh=rx!F>Y9g3VplZA-X@KLuKvIx&AqDAaSGj_2qfCABhtrK>?ysO@jy5I;
z$ni59of6&cE8m6buKZ=&({R!cOWMh3?Fk73S<M0)_nEgHwnozr`W`4qcdWVrn2s5R
zvpp@_kJER-acXUlESb81d1hnZ;abHc$wr`XpE4?vc<}@J^8)+EQTo>?9k0Z{mJ7cy
z^>l64tctH|S24*hI(eqYASIowC+iwu5>-71cB5~TI6U6TpL)=am4TCxP0F#*6%!nS
zKan1N>lO}y6)XvTh0CN@)cXbNeXy2PPF^%1jMa}Qz4;>ThX40t0DGuB<9M|QRG)p<
zxe=yAJ+Xqu#Xys&a`IY+hJofe1Fl8dI|*4l0V1l7b(j9V;pyin%nJEUJ<}dq?lTmM
z@H#&$kVHnB<_YvO!wD+ADO~*6&OAz3Hl-%>ev-iH&g7e2v}^$iaC{Rl0~K<@6XIou
zLW&xqDO~YSzPNa%I<Zjxd!hF8PCB<R7nIf2)LsN$`t_fz0<tqJ)UMz9d3cHyLWhP4
zArFhD1~LjhJ3*kY5rEQ{4bNWrqqg}u#2>T3A1Q0<(f*nNHh82aWX3Pq?6En1*G|7m
zq)N;TF2(q<FL~yF&y;^&KTiQTTxFk9`|#Jr^I)P;6+Hf@tcDi_oGjLXcm02VSpe-`
z7v<GH$uFEihT^aP(#<dd82qc;`}am7#{p`n(&jFzdH%;w|NJq&qHI@~|FgL3|JFbM
zet%eTp!_2c`TXC@KN_gQ@PqWi{{1F@-W5K|8L9dC`hn~CFP{tF7)mS}qtpMMuYTUH
zUmx8b2s_woO&zBH`knmA9{opc`yX%r|1S2Q+wuSF$@=AC^_^+?HB3D@*1&-=0=n3+
zuLY*H)muQgx0k58gLu;x$tn8=>4)WDx4H3DZQ-9!M_!~)Ax{}~of2j)3;OOUN05Du
zKxk(n)Md(n2rx5Z==b>ZyZEurINq4O&fZp7@@xpG&J=_6=gpZbSKWRSEI-Fa`+XP{
zw6fYiPqy4=F%eBa<c&A4Yo|ac<`@|45~xjV<vKvyX9p3twwX8MPXBc-(KJIhdX>>|
z|2ZHX>DMWCpwjpTKwvSbMdf`^N>69nGZY>cxXgEn+6@aVfu9%AmGafBUA$W5_)O;>
zbdMZ?eZe3L4lSsrqPo3XW6)Ii8&pB(U(Ui>>Dq>hdY>%Thvd19S;nXpT&=u)yAwB>
zM%r69@o1dUeSA7Qz6_+jh^+@4D{Tc(HMb&(>gJGMe(A{tPXzXdERSI%=`He}l$KSp
z&)&ih$SXxupeAEbNM4OsI{x>p2WS8ZdRPHB^N>~{%p30Kn6RyzA*QxNXf;?4k)l+&
z97}4Bk>?US^{;tbfksD9NUVp9-o7jvK}-MCy7pkGx&wQb;S2SY0yuwEK)TH+wAEdP
z(?TUYsS!x}{>J(<bm$EnbUumGpjbZoVN3zUUW%xNPqU)L=tOQ=A|2*RT3et>zY)i)
zf2S*`N(j(f_ZLeLnj@Hf3juP(#2j^zH1RGmXV{%b!3}1EB*rtke!PI3dgukOKR|N9
zgV?SI5Q!E;<=tBGFqe5_zT3S|*1!b=`)^RgZ(R;nu638Q1MpKnZIyH=c)D!X06XN&
z#LLsoor!efqi;OrN6|oleF$v}zTAQ9@S-+s?1JvS$VoAb*W%&XiegBD@BQ3Nwgy{O
z_17Q(q(}3WeS|+?rnEyt<z>)~mVspU{ZkR!FOiT8ufezO*N)4cr<zRJ4nkOK$km8&
zTj*`M4fwwV_|`s2w}t%J0X(q)%2rXEUUqtEud0EU*Cxdn_aKIWmD5Wqqq6}|ZcinS
zXXHkb{g=*%`>Sxe2$6XHQz`Rvrod$JWGx2XV_v!MmIy4~lrLg`0!-)7vZWE>ohZP9
z@%QZ`t$0<r@Ln2DzqscSj4lqayBvXf1PWr#ja?tO>oGtQc(lT-<l4oPN<beVlEYb=
zhMjE(4&^JHTFjRKkBs1c41jaB_ffvii%3A|JH_ONw<GZ?#MDm)#p_z_oXf4-I1CoP
zVkpk_*Hn7MjO-}EnPop~1tWAuaaYGas!Q#6lsloSa{#I)G=foL5GmZwGqRcNqI4V|
z9Dsne^{Ub%cuKTAEP#4mikHgVP`zw!)Uoq@wh%O^3hN$Rs?h<XfA2Lpq?lz5Hv+5P
z-^zlj#O723dV-fTWwyD1NVBg8n<A(GD{~-4DnRQv068cN48#lTg5FubOGj}oEcHkv
zP4^B65nT<1rK9?SwoWGq2|;gYwx<?DChl&wHd#8QFsLQCaH5&yE&i%N01PF8?X43L
z_`_P(DTX#^T?JvVc5w36@IkbomA5g}=DjqV_RoCjm`PH_!^2<jGCKa?nSEOltc0Rq
z6SDAr2=135a7;=dVaIHJF<k2UYaRlOR+1nn>)Q2q$pRd;%Tv&Csv*`Y-2$e#FcmgD
zs(iIn$wayT812p%)3vX=ZI?A`ohfa_$oegi@B>E++tLC}%@^US)Qg@o(RpW)<r7A0
z^%j@*bKWkoiIv7pa;UiQ$tY{AIIi_+3!T;fk5#&q?a{JJ*<e9?<V)?Jlv^NsF|_%m
zs`-QzO{67SdW@9i_v}*bUE)kJRUR}nxpT%{665(JT+}A}c<zy|tE>8d-R9py*{n6w
zekJ`+t@TgQ1utZIU>Id(Wc~&C{u%x-131)SdXp;^q5s$F0zMW!DfjC?1V=0j+F3gT
z|KcmLZ0Uif17yGUAr)RU1-;lo-pspyFPneLJ}g8CfPZ&-!0u<A;n%A)wY)H3*OeA$
z|GBeQ|NKCX7deVG!;p=0C<K)!_iHm`E#~y=3|$M}x>Z<Kq!vFWenr19F`Y)~q@fm}
zM<>tkS+dJ6VC%;~xjT2-<oBPkpa*g^oFM4tPkaB~m*D{@)NAkb^8Gcb{#^R!rzfJH
z?LIkpBeQNIxA2>%^ut$7|M1v}vY_SGZ5!6~BmJjEL5r-&Uq4T3h@~5=#xLS-IH@Pb
ztFm3Lzx?asdAJekS@+|U{1FaiGbefT#d~C$u9tp3Co>`h`!k$1kS3Xv(PK+1kNJ5_
z{<re`RgL-KE6GMvQm#DCZ~W&n{&_(e*D&lZoOXM3pE>`9X91C3Sb+h3|CQ|j`l!#q
zgLs?KgZJ|=p3-3Zvi1g$&!;oe0c?7*WV{CS`Y5<c`QHFnTnJ}+h0R;mB7Sx>h(`Jj
zLb5{C5m_kzr`>i{4iCr9*wcW)p<pUb7C<IX1z1K6tV~@~A0g9WWE)oh>&s)6l|V@p
zYP~_&2JrUS0U23#YaoxK8)qZa1yW>6Ab*BRth)p;V?=4i!+u$Pmz^X?^do(j(qr&^
z4ucci0=hTb2(`gJW(|__qC(Xgl0xYIF$f~oIWf_X136l3LqAx#$R!ZCzwb4i@vk-q
z5j|X6vw2MoiEGR5!h3aKTU~}O8WmtJyAeCV)hUwJowmHKrNn5306cG2M!sCR`)dG(
zDX?7Wmly4!4$R=RR(RH~@)9OVAm@LSO~f?nuf=&aOM(8iuHm43@+<<#us$SQDm>?+
zo`jC#8C*B8jkf7=eA*oj=Wv_>MRqB|2#Zd5SXdV$y?(ot9l=L`6_)>H*wEY~0{|r)
z1C;0+Y>;Ib#HfbbRnyq{y1Ck=LB=t#TMvPG@dh|HY}Y*mj++7rmf_|Ur%K%$1r4MH
zFn_Llr;pPEh@Cxix)IO}cp6)6rGD(Y0lQvLK^#0mI|$O^5@dHldXLbH+=_Jx(wuzA
zSKST^Gqt|PHDA|hv&LtBa-7@{ARdn-yqCb1IP7%Fe#aEie(QCG0f{;Q2V_D0V>82r
z3YQP@>JE7uP(hXZVc=w|Wc*n|G&vD54JTg=50rkuG8+5res0C}md*kx(+@DVe32V4
z>bbyvBFN>`^8(?{SfbX$Q^5?^H~Hrvy1N{9)-h;Rah95+u>|}!V%F$AQfx|*Wk{y6
zSmzxmX4ECxhM+dJh0Cg&w!;<YMXhd7jL61*grFvS1XBfm`hK6XC*T|@XucgB@xxH#
zBub_(wE_!x1MY7P5wr9-(uG=Fq-^P0Xbx-gS(HKzQaC#DEQ1F0=#fjWB-9Zd-t`v_
zYg}5G-+f8C6bY^k1W$U^@g+mtxg-9{DU~z7SB@4IQpyX_JTn<I3RWpsZ1pT7RB4gv
z&*F{_Mk8lMlFSjX16P7vp4NOO+>DLJ7HJtBjUEZ9v`8MmJpwJm_ZqK>1TT6I<E*6r
z_d5_FQLVWTQ3bD0v(W&S`kw`C#3SJ_)hda7>tv@O@JFy3Vt*!kqTwN(IUF)XaRr-g
zjOG!LS9k1>!69VAbbQS?2{L$%;^zkRuazThgN({<nnN|6BK%Zr`rRgjK$a9+SKH@V
z1|+uL`s?ko9>qb39k*D#U|>c!V44<*djh3-K7(2sAY*rY95lA^U8-y&0VC#QsveM*
z&pX~6t5~LMfvMQ6W-=uXrikE<4Vp>$YGVL^e?ZkCk<t<4N0jXc%@c>z;-9TzeTJh#
zF%-F7-bWAukE8Lq<`CxRC>~exBUjz@kkfqfSCLlE3Y$eRloj{Vr@p4Y#C}nZ**#xP
z#I`Egp{OJ<2{q&tgCY_I;c{K*5bGIp-^<rZAvL4maEZue*p$L#L<;ffDDjV#q-jwh
zRO`&pym!T8tsaE&li%-wslh+E{Exy{86Cyhls4B<UUSO2#i;HC4{}5~AV+fe#&4cy
zYxaWRm2cTj2T-*0$$LY<&Yc!Y{b8g*gJR67#yvC|u1VkHT8s@ijJyd6L+m_|X8*|u
z6RFdk0!lh^1pAmBoss;FuAEt#Acj5fyG=9AR(mc8C@pyQL_n~cpyNdWg%e7h7$lNr
z;psZqnSC??pol0&Dl5dc75))9kqG#v_N>C9JHh08;M6Qe8e1jm!q9wB)~7C<%rF0h
zlos)#RL|C_cKa=$r)uSS)!8MVqG|09%_Wy_JOi$@v^&e;w)~+6{OA@bTF0O*7^6*;
z7tnkOXtu<lvX$qkSz(~h;oj?|Fp7{m|DMi0Li*cP=9Z_E)1ZgY)%r#*mZ#`kn63P_
zGE=Uh6zel)_(cOJtdmhl?AF84kazUbM}RJm!IG#mCzqyR;PT+8D>kOsG2B)h_Pe>$
z$vT_wpaSKH>Y0Ci17+jC4ko**(`>5r&}U`gGu)s`-j_Ab5mEdPmqJ~*#zgr1cW<&0
z$Afu#+|=_k<@x2nBNaO7*g(0*_X78C1Ne2D2gf^Hx<y<Bbvl8#`LMw$Z56nPJKNI$
zbgw4O)!A@)$J~4B^{12zVU@L1*rABKZO_nm?os&#6Dy!gb#c*YR#0d)W(=mVAl{~K
z_HW03KnAKr#cjhiCumIMx+|dQgekxiDM&|^jh0SvN@eQzx#Z02gc&e@&m|MfE3#-=
z?b)C~Ih<+_DuKc0m#N<%HXOP8-qP~Yq;4O<;hA*K<}m@tX6ox)#LZ9?g40W6SFo2^
zrIv^U#2KDciO#0W-lVssCB?fhH;qKxL^3xmk#(Q?dehANQI(hcMV2*0_5VpUnxl--
zVwAr+^a9h6X|@SV8D8__ty7NU9_Dv1)+ue*-p-br+IbB-CZU`g!u|toHYgT#ut&t~
zy%~MA9$g8e2fM%Tn(bhEdi``VtMheOA)KRWX(~(&j{Ut*POVYQtAlOBux1|EP<uDu
zJ+pR7&Cwn@yoC@k$RUozuw?TKs;n3=*>T(@I~Oj_;a{YyFXke{9j6|*-jvuo=1TLL
zNtQ>Q2)LM~S!~Q;j=~|`BO>E9f7X3U^;M(t+`a<JWZXv}&Aq)i=d?GLl7d0P%*>0~
zsY|mH{%6Sq`vbr7G6-&78SPxfY>G{B?8oWSN)~`7HbU&bk0*X8^ygNcO1g*Oa>&TA
zK&m3(IB8x^i|`(}Moo>;3UvAFunF+tdTw{&PsoyTph#*2w8P0x_CK^ZG22xi$L_&r
z%(*1Ra^_=*IaP8}cM_ez6M;vhs|jXFvyEiix>k>*)n`V-eNfluuCIZswmYsPr!<}g
z$M;N(R}DuqK@ZazrViYi^D2iS-p2O~v&h;ykbDY4OH}cFcI+4P3tRIWNw)GQiI`c*
z1XMy38a-zt27!3$F2}OQ?2{MpIqWA3zZ34VM`PUM7xPfMeN3D*n@jL%w-W<%76*DW
z!RIOVQFw6U?J~rLs&8KglBq65*kqSX>6&}T6gM|bGMigd_oW&nfW5EVHCkR?<^h{K
zH*cQtH5>-1+y`GNH*fGZdZCsKY#*GyjKWy3K#_&2ekZp}O6!$-6Kcmw&N+O%Flm~x
zCu2ow(p1L%%ei<TH;Xl0gjc(#N9<Qk#ENN<hL-7Rcl`r~Vi>KO8khXFDcn=K69lby
zpdrc-Zs_*>tHav^d2#fi)>$c+2&7f@l|=$s`h;di55`@(U*n#bkZ<g#9(>S{uEv!(
zx1nSIu_ffZL-@^+QY&qdZvsDb%Bh3VS|yiA1Ufa^rJ@M}$rr;L-}Mt)0tDd*Oc!ke
z`E=lE9QiF$Pd}YgUlSVFtc<Xh*w)nM2(YEwYKzq|mox=Fgeah#f-tdy3CW0&%-F`K
z+)*?5#6lB}7td)Kpn}-Wd=)rF)WaknBS0BqLyqDvrJTSI*x)!YS0l!Jfe}qJqJm=9
zl9ZPeAPVA6h1`Y_keR0GF0oxk$?B2_fi#P)6Kfu$9?y7Ibx+3of$h6d>c&aT7rKA&
zAldt}9<^b;CvCLEG&SshX)xxZPxJ|dD6TOph6|Hr3lz6v*j_YqG&}c#Lh+U$6Rmlx
zvR}+B5aaKbjJFVP&};&1_uKL=W@SRxlhp-01os5n{WF`SVFy2KN|mP&(C~%SuISxb
zrsEW4>1>_i6Q|r*F_|qAa<NPuji(4hZ26~mQQSy}8meJ-|HgXpxJ%fIZ#T}$+X;9F
z*pQrQ2<by-OA+0%5zg&=yVFwQ|Lpx!ilcyLbg9M=%1u)HCUJZeM#4H7RWjYC4g+f=
zmSpn+^dNyAG06der|D2PZc-mEL8TRuEPkP$<!-2(o}=Yk1=)tM)6z#2#;TOoSUnQc
zB*YBaIUjG2E(Geg?AdTO+IQ_AI!Y&GVzg0hpVmUvz4WNvq|J_Wj0nMOL{r`*Zi;2y
zld=6M|4GI)_&A%NVyk(}!i1{x4+bx5$ObsJ;hXDuNHLzs((sk555wSUgD(x%kIamt
zvGbfY-HXR(s^{hA0)C$V#j<a&-BMPx6ilaWv0^+yMZCFi;4W^GMUcI<AKO@lmduY*
zGT5ZB@PSmJj?G4|*xBvvF$WoIT;cNjfr^>zc^}RYUdoZ|Bl1d+WLv9KZxpIg?|R-j
z-$!=T-7f6b<YDEcIQo_2$~|Q{>Fkh?qNrjfJTlw|2d^eDULL%!6yOLRx-0a-0xB#w
z)r7nL8;vhdPbVtHMaThjSaSigNW7CbFy_@zGRqc5xIseX(owJcxAX<VbRuSgH?ZDq
z&pY}!O(L?|B46{V=Et}sd-th(_a#xhj_T7M-`g(DSMGB5-nacMBfsupz87Av6fjwQ
z!(1(cW%x(a#g`09{$q`Bzq2h!QyOFWl1a<R<}v%8)_m*^&1kw1YT+L!DyH}rXKPDG
z+{?s&c|+WP5+NJ=E<C{PV*j%Q#EO6!B;Vm!>bqd}k2Bh>a=Cx}AcXiCSfym9lQhPe
zr;=(o;+_O@(`s4uzt+6CBknR2(CS@XN<(&|@WEOVUGL{vNdltBX>MYr(W>holFq*j
z>7jWpbLF03Y|jPP5Ng(w0-UuM7NT(KZMSzM!kU?Vr&0%c&f7l;cj-HK@IA*N1MB9s
zck-Aa*9?StOq7`{@KtvxGh;O@W&86*bOs^<FTehR@*jL6kP>wIT#aAr<+V}lO0m`L
z<JIFMhX}9as6}4Y$qYiO(U#M1MTm-M<?2-T1Q(p6Tb2i}v0eSZZ(Zo5iu3EF-bvz@
z<Hj>%6o1PImmo|d_xtvV_M_$ax4BMjue>Aq#$)U{Z!H1<hF~Voa0N1qZhp^vGZGsx
zo%%4-Ur5M%OukAwagHGJcJRjjq3&{v7OAW5qu_Er{IiLwBs{K+8Jjqhoip7poJV)g
zO*>>07uuWeDSp(t;O##oTE!?y9T&Bs<sCR{>L?JzNSXXDi}@boABxa#{UU<0I&%yV
z|6GsJ$PC_L3X~;MX`9YLE$|`t^vK-oz^ej>ix0y8Ktd}1vUxW{A9Ks}3C$I>$Hw;l
z3%<yv;*qBGym4QZw2k}eXG4JNeN24j7e+s5bFu!x7XE(0aS1R84&18>zu!hS8`Gdn
zg%v%J^CMfviLF{(Zt-9GBh49vZg868(fo}?X;a`96{Lti?+TOsjw>(5+kYI^ru!Pf
zlhRqqbF}Y^(PL>ElJQ53{y9zk{jroW4R_1+zSK^K6|_repFa5;5gEs25G+=^O&fRl
zU%+b+-S4e<Pf%vY<-X6J{3cytHEeMgf8Q?T0^eZ%aH!Avre`Wb{qtS8&m(-@xUq`J
zpIi289Q+;)I0)|8=BCU0_fLPnP9G-)_Ur%SPYVsTq>D0*eyL}`G;zkuTAMb%<Zp8c
z2LkGFv>9>y`&|Bl<t%04ffKTv{xcbWqh-H-L$(7U-{uVR+5f$~e?cQZLfo4l{{Guo
zc0Z6qjDSPH9zwAzTo-yDpE0uf=VL()C+9mVz4W9aud=CYwQe|1-!RT?!cG`S$FqXZ
zf88t)6Hqj0eTRbA0E7a-Ki$=PU5b8+Al!KIH!_&sL`L&{ytH7fW}IiNWmWH${m?bL
z{o6*vXp~ZimfoJR7k)n~{L3e-6(IYy5Dxdwx+kg1%YKu<vVk1Mc7cX`%=wl-x->-W
zJJ#PSqA7HRp*O|d`hB4wqyYk~06CmkUipidY!7Kzr;Ix@6m=!fGQReTaHg}fS{4XT
z9_vW|{!4BaKQJtvF%a*rOWTf9OO==~>bJK~dw;j>%aK^0xX4J?Ud8y(V4>;D`y#P_
zK8_URre+<TJEPZn--JncE)2`>D1@sfy%JY@zi^#d&r+d;Oy!SzB39CmcH_n9i&vAC
zcSYz%A5%WG?^%~2nr|tS$sRtg^KS2=muQea+AVziGC}3{IEGP4fvLxSK&q7%Q+=i4
z1D(Dzb46yaqus)KQ#D0iGC&>yebLEM_bSTmMubd+Qjmw?C!{b1z)UfO8e3km_8|co
z-C>z0(pz;OI%h3IbXhr~NWV2$U;|2MwIqI?MnuI2fcCrUBCYC-$P(#`$47_A@dU(x
zPZc<NO2|3a;M9twpIBUGlvy&_x9aKsF<PcIXmz+;Xc1#y;GJ1Wu5*dkFlBMe{!5ch
zm*4lE_vZ^eQ}hPyK1Hy(A>I;1kv|L!*XO-O@uh-Us?BiK<S#6TpvCnmVlKbg!>YD8
zu8y;`5As4IP{fSa#^qLkM0o@v7R?Z33JDw-1pG_&6_qlQfinVFV|0%L8f`rPwcCjD
z%N}Bxi@F-`tkG*Rtc(mkza&>WybH*}GJ>taO^_nnz1pE|G7DCnVaQc9KOinLe%&iN
zfvP^<ooR-6xZa%<f>p2LS!t=If>(B6Cb@H9PT^K?PDm#O=CumHpOW-d8(wn$EnJ^H
z_X4{^(L%enC?oT3Z-C6kE5g1{L}~b%USVhS{L+11qO9+6IDNu`OPRibfqn?T!Y`L#
zBW|*5uQC!PGluBOpc`@-1TVh!HdRhkhwe`5fZr`W?f|OK0>m%2&h@19Gz2u13+%>~
zt_uX7b>{}2KUxm&t#uy2x4lxIvfS@n|3En7`NJX>lPDth5;loc^LV>s<}rckEQ7cm
z_haj!kpua+3b72HbJ0Jb!{ocI4*?|7!=j@%*(%eNWg>dpZ1r3|Jgh@NB|dma+yg5#
zTT&3G_YLAlixKlQ;#NUmpzmujp$E{a?G<PsM{nASV~EGqflTy`{=3JVCMn7;0P>Cj
zcUS;&rEf+><C&fKgDt1|8+_qOI!8VMj&jm$I8%wFO)xJ<-_*0^c`c~iQY<ndXB8TQ
z%`0%IF%fJir*a3<kF0l5OC(#Mq=0%%Wsl3T=!l`Uo2q%YIng1TVwD#)843qhzUQ5)
zy;-`C8ap6Eas+f#z1edPv&_B+C9RX~s$)qN_X){kEe#j)nl~<Tj@P83e2!|4)#D<6
zbVKNW>A2mViZtrRn9pQg?_RB|6$gRO-HJTBLkZmq;cLb=ea8aZ=1CUG1ld+eU&Ahj
z{ToyBwBgYyIVAAn6%r~RM9g>#$+i4G^;DfEZthuG)*t*5>|LLFsSJDFE?;@ps~fMo
z^?v*Hi0_dli8{kAig_<f#-TAz?L+MHtHXFEeCjnAwLwWtLN^xM+g%yhd(9xDxofAD
zbPs8ar})uA_-%4JGhm+gx*kO+KmH-GjZdbWJBK(PK&{z<NX+g*w#FxnZvecS`yTCD
z=h>}{|IWXufRmx9L$pd_KW!_@(El;PwrVH-ljxVru(XbVKGYf(g1W{ve*(T@!20hH
z%|@()o9V{VSmjGZ*BWFV#pXXL6aD}dIztg(QNY;EC9(Grpqp!$7y3(B@wC6Kxd-7s
zH?{=BO(@yG+ef!#g$9B*QYOQ6iPu$oF~hoST&9}>i)3DOzL(ktH}x<;-19#4Kw&>s
zO|`ifLmO72_1NlpK4*QoL(O-^YReTb9-j+XsFyoq8;D}jfwjy3;=VcamD{4;9Qu)s
zN2zN20ruR5WA7-sf~MyCSK^k#&g+f7HOzn9Hu8mL-rr`#G}<8jKm2s;xcvY+2Hvl<
z>1%~Uk2M4aCjmI-qcF_i)~|JE;EMhTU_mQQFFe~<$6vo9_`o#{T2?y<H_A++a|A0E
zhf*_*jalp+AS0Z*uoa&lVz9KLi6dw`V7Bf$%82@47VRu|I62Z!?*~9i5&^7y8pfY;
zAh5M8ifjlb^}F2d&%$)!KH0C*_aP<6X(7S1U64?NJ((%RsrunnYS$=;7enkkoqU&%
zrS@8u?-*6)Cue5pmAhHwM>c2NdnLVc09(gHwRsJJD+@rMSAZ(7e*!uwS|3n&I|9;J
zm79(zM;{WWuafu3#-50ltE=ycs>+^ii*jGM)Sa(Sd)RWH2!9A^GU|Spn@x~oK!F8u
zwjo}>>KvTx>@4yX-W9wR%1+B$SQjznbVnfL0}1CDgI)dGW=wDmg5M1knT*x)`~U-)
zvv@*~T&?4bhZq>YKOHV??@E73usCCqavDbp;UNTUoh9C<<nV+arZad<`2o}wKU}(F
zGA-%w=+xtfdu+ua)uxhmeRW8b&>^2ddBkKK76BFJL$%vTd%La+(<B0c*o?faLH$)x
ziQ28YWDic;Wz7<u*CL;rT~ffrISk5mQGK(9?Z!aQfUc@_ie$(cVVME?8lsF>w>D;8
z+x0(`G4>_o^0}Ah`6Y2Jcg{z8WEpZVyLK5}zbY_Zrb@jL;G=x*3ZH)u^$LID;uy4e
z8F%o_Q+Ob(9CNw$Rfk;t=gLP5_6SI9|7N+O%Y5F=a?~`f4mHVPsOPju`#vf1n!wqv
zd?<Hb=Tt%>R9aUerKHCGY<BLAK7HrUGy{_XTfFbariY&VsPzquNG!b-F_h>W?lQd@
zDszZMSxc0a?(y6hDSY%OrqFz#{JLEyjy(vvj!$x2*vxI9Y4x<u`aaculaj;_#dHwB
zu6UB|)<bIkJJ_rnEgDi=y3HZx6VmgB!Y2X(aAcV#1X(@XK`I>lshu;3z$G>DaT6V-
zDel-j393~-PZExmKj_?=Wu?BR2aF$VL7Hk+2q^=eq@<anmbA-|(_1@e(R*7rDTG2%
zTd=@D;so2{T)sNHv{VuL8CU8`KF78YTL5wLW308P>MCsp$LbF4uQ!ot^`}SV-FtU8
zX(XxdTJcEe`3IJ)G&d0^0Z?t-CL|ZV1zQDgnFpD_6=QvX%iJcAUGL<*pj3bnK+Lq_
zWGuj828p_SkNt4e_ISOqn`_~A7IpXW1A*mn-Wh4H{6<>w+We>`E6|1Rii=1Of$X5S
zaS=VrRRY`8dkenumQ(5G#}Louu6}-9M`^UH66DOgRuNxM5Nb6w*LLNI`@)tx2pdK0
zk_G1LW}0~-?9)NgLl|vz+gNVlJP$(>jt}N#ihNXM0;F{KXNsuRFORxI1whv)QgxhY
z)CW3xcbPrWeTNlFpL<mrw4bs*frT&|%H|JT%06jMt+YR2xgINQZgl7aampe?p3}=2
zqV;L_sJmM3qXDlTP8LA^e4E0pw<V2@fdOHYlzSgRY-Fz5(%kWGRV7)S!FY2zNBX{S
zxL4h=%US=XQASk-)}{x0Ey~g()1<9hxWd=cax9$O@R+Re-$E4>9>Sf`|1p#;$<6pK
zS4LD-MYzH2rXdh>pUQ|jC!n57c6qdq1`C;n<HdX42TW`GCuCOa_gJmcnNw)8zBX$S
zp6~R4=`ubld{CQyfUpIh@}-F~SBx+3;mr5=9p&70@s(cy+w>D#>1|44As>PrwT*R1
zGOn)g7~f#9zDrwgvnf`uPU$^-%(?TUSM))(rqa>ihQx^%U^hEkiu84M_N)Mwaa-*z
z`LZ8djg12+r$4#!_)K~k7IQSQc}Z1uhgfFRNnd<C+01h#uWDbkfPbYrgDVVUVG{T~
zeJ+M)sHe3JNj}9Ob1x`~RO9iHu0Q@F{n-J%>HPj&+rq+F_bOVO*@KM})|?pZ#T(un
zMKVr@wW)Su>k<v+n};dZ6pLsSMH04YS|p1pm(w;02M4!HH-+!(=tcaGo&5v}0X$5%
zE?lPU-OP70H^YDSi-~A8cNNBAn8(Nz&6@TLaZu!{U2EvVU~l5)AsLaB|31~COCdAU
z5F!#ygyO9cU{$rmx){QNZTZn~x(};<cKyLpE4qWE2a!g&feZ0Gek)UEoR8}cRN?Mp
z(is7y&V=|6)*O@1(_EIsWs~;i`Q&&;-Gm#bnYVLz#@Ob`HR|q?%Iv5L#`6=r`~!21
zsXk#^h*z<?E?B3LAX*Ssq$ITjfyT36+sd21KQJ?PBy!U8m~KqqW=z&M)?pf_Wt?Y{
z<)1D~ZY!BiaRSJ&c-CO{!sF<DJIsKHV)H%KywY3^O_|n{Bq@@P6rbMSrJ9>(`XE0=
zI!N2q)?7PJDM{UyvY&d1oq+;{HFfDEW-Ds@cJYxZv$_ncJn80ol0O?ZlhzP{HI_4I
zcf>No>@zTSyc;}eIA~s=E_yi5PuGCST&pAbRG?PlDrqa`VOXHTEPEKqRz@vu*U1dU
zul!#_uQ#vK*C)zfH*cTz!oM7=9zIzmJw>EEc^t5p;PU_@>a?=_klD`qJ@Qy2gGqUo
zH|^U_;dZoeT}_Tfa`RNCgA3yn#mP}MX$C496ZuRt58)5N(r+i}*Eh~%)zMgt)0Z)F
zts^a;3GXsEg$yY%Ew(vedtPc2%=qeP-~}mhe9YStO=3m#pDm`Eqwgi&3p&>yg-g|C
zp{gU3j4t0JKkx>`3(<RDB~%%kKeiZ#jp-9{{mBC0V^GJnTH6klRyub=OZ<Q{8e4JG
zbF{5iK8bYFw4#GV_~?gu@%lEc-dw~jdf!#xnZhTTvsD@%-V5~cL7ci3ed>$T?^N8R
zAX=@_&`&+wbCcp3)gbi?96l|Ek4m3HJf>E~iD@6y1#Vz+aeWRBtmkz)XtJ3O6Ta_9
zyhqYSGHLC--x1Jvavt+Abe`FV!aV-h9H{!S-xw{RRP}My(X40ENmGX;*+8<A%)8lz
z)VH#9ImVpW*1}ie`lrzJvP(PR*-HE_R$B8<UR(=1n<KqsuZGb<mnifT%DRE+;c+UN
zwmAjIPBE#5O#V_X3FD3aSeeI1BrdT(rmhkQtnD<vm%}~PElKFn_;K2ti^iEza+Udn
z_g>Yi>c(2}{k0R#0c6ZfO$YtNU4fHTO_!pnS0CYZX>2VB_bhL%r6}KhnY1aHupiCv
z<6YPplajYxqhG|g!+KraQpFv%bi+8y_2^G(Ezm|^FVRlMll*#}W@qW*T_z?nrd9#W
zfS~gv)-}sPIOqAlv3+d{Z3q_8a<pSHjdIs@B(#X_={Ls8WWZsWyktCb;ZX9va7198
zdKvC#mvEdBFZ`f0nOWw_Eh+7otfxtnsZ_UAw)RQSoiyT9EEPma&g%~GdJqSPxN`?B
zbxPM?knb?_c=ZK`Sc$MC-R$kurMF^%jed>t^{k_v8hv=#))HYPL%k2(jdyJ>XxRzy
zgp&KwVq+eju94P^Rre9@-9no@_V(Us6${o{N@<|6m(RwOK`qzJ#V``<C17zZ(s<J<
zp)pip(=2-JPu?JO*drPW6?JC*s1X{wX2abawWfp3c3d_<AJi!B;mxp7PvNy8@v!;v
z!KR?YSL_X;MYVb>m6MZ=3Ni~8(n6sdsI!ik&-2`qrPn{IOViYpQBJR(@D9>)+QW1s
znjrWV!!(;z|MF6C?bkqxHpkY-sTP-XO}=KzOMO%yZ5zA)wHY<_y5a!ag*cdH!QZL{
z0)@0?I$KI)z0GN#N8MFecsAk0Q1wKz{t@;m6|@)JzTHkaAL|_<xbe7_%l@QH<h^Or
zrq=;6F_YIB8pc<p!#IY0lFKDEo&nE#cgmGe<)lj&&X#<6nh#@3k5V`0I~JMcFG=-I
z#{9!=pDsJO;V&9r9lK27yY8Q+w69-LqV(#~jh<PCS>qj#XKcK41qN58<URv|5_=C_
zGTkCeuq8m#T+zRVR54VmE&nj2p}{b^kTk3vgVH?8lm>04F%l>4F)7nUPY;%YC&4b#
zWxRIWm`gJehLO8CGE8|*j|*4q`hwr_^6tCk4ZR9=ktyS!kF1Q#pCMXGqgJon_BE0k
z9n0;wQ}V$gHf6UdoVbMM`Zzc0S&!sNp-=4uL*#OVY)u5%0^Zj_j<kaIB-lWasf{z4
z6*rP}b0^p{(wxC_mY9u~etMcFGl7hcDO)FwvH#(zwj42n$?LMunk6tjzAE`E>NrZ|
z1TX8leVLYMe?PC*!f4G&EJ^5<oLW#a!*Rt>nrJf7^rcy9j(yQ2q&g(pVGKPzeY3wg
zN_F3>Khc&kyo<#-OuCWVDOUbFo;d&EDptMwB?fv^qRY~IWa^lTF}V-pxl<S(E@4Pt
z>)&3JXQx#ZR;MjG8^n1iwbPi+gDKG&V5pf9Y3^aZ>2!9-+X^$eo`A*zx0QVeYo1{{
zoVp@__|25q3W>Cq_s1nU4PB;}GK*uA4#@<!UuhT~)mctn&by1_hFO`po*p&Hj7{WN
zO&&P2VJ5#Z)-b6X?!n-SI#*TH_#-1;l8|X_+95hEd)7@$-H~2y==g&r{q)3)nu0M~
zxEn)U|0x5#lUIGEs^$a4XE(L<VwrqmGEoouUTGvb9c{b`mBS1ssINc5XwyCTIE@ud
z)GZ`0KuRmu&pp&6@xgz2+xz4EbH;uS?h9Rw%ju~(KiZrK7k`YQqHBuPCUTBGFHsdJ
zIgTwEBu%2|{nPl|3slc^+5hN__T|s+5N$(ItG=r^l)75V@?mFxD3@jNx$aMBk|pc}
zNwnm*(pF58!A7kcOu7O`f^&z=-Q0{Q8G)0D5|J_fq*tlZXGy%Du^XRfIzyvu7Ae-s
zKw=>2C?9CPFJ5xz#Bww4DdtRaN!4JEl$Du5EFALuns||gBoC9*Uk=0vZjH^mv?_?7
z0sZxE2PqrAMM_?IfyTRmQ(G-Iyy}FC*A1_sXA*MJFGUGB$Z$&v2s?Xro_>J}#$-Eq
z&`ll`B|z8OEIGX)X^<Iw33rgYf!l0F#G9>Xb)!KU6RV4ac{EC}`|#~0o4nG1t<B6k
z;UCG)Hi(k}<#8@E`ApE|tD*j34+$^nTf;fRF+(vkU<>cI#eq;nz}89Ji0y5q>QaAk
zp<2^MNv$y&MpTKxL;p4F@bfDU2D>*Cknr|EuTcN=6?rB|tL%^2I=AD!ZqsZf|GhD~
zdE7G@Z<8%t%!lg{w<v1tV}M0)&%LJJ(_{4ba}Rq%`PlPFswf1HotTTCU|~PK7R<q8
zQ#T^$kb0zXwN{>SmO)Y2d>@yL4CS(8-&))>oV`Nx>LBv^N#}4|l}*CKNdm=8_f1ka
zSteHtM=~ov%&lFo-Mwr+npddLxy*g2orp{Nt2yg}Mc!8z=nq6s$+J$KU=@0FPC5_$
z1y7giq<W+d&I($0)Ioos%Xk{fLa;ZWCn5`vpg!iH>pKWFFs;!J0<a}?sDCbm>n>^O
zF(l%do4XbA3)86)Gxt}Dr*>E~9#1MKKl!%tW<_T65alD!#)1-`lK0J!KB!~qkM_<W
zYw{+j5V*KwqQK3Y@&H--?y{IiG7$W5|B=ZaFHQUWMOM^Vjh(wqX~dO9W!Osz5t3!4
z`^6th9L0acQ|;KZ<OdV0JPm`mHGp`X)md(_bB|jpYSJuUCkjq4W*LLXj76rZM6S>W
zTP5AhUV}6!MCdL@MRHCuTnL};Jw@CQ2kxPgc-o-vKD&>M@%eBHMc*8ZE()dxxfqI1
zx@YTa`MM~0BEPrF(LM}#-1=f;vW#E-e0A7WYF1N^V*E&>eeowhRwEY6N;>MpE7Pct
zZmxdt7@+Gn?iMW?8l>Vs?(<35j(C{UpcS2d>v&?_j*X5@MRG}Z-iwY&QE+%VaJoH!
zo$cN9d2h2F!L3kN6!BMzEQ?x<2TLk%$=K>7H8)7xcM-jzl(Dc|n3n6{b-bXJnE2}A
zNDOACGojfHxGg;mVv7lRcD*}4rYy~~PFkM`B6#_Qonbu9S`B?s^*Aea^Qv#xP@u!g
ztghP%^imRD3o&kdM*e>3&h;aUMVubyg6SEKq5`wpOYeO0mI6<$_Pvg%KO$I~7GQf6
zUZE`-gtzY{{BlylP%H}V!zSrqElZx?quWJNTj^163!TX3<UC|9`m(TMV35isEf6$T
z2abFsG2l-J6thd&j{=<w&#l2MtWWuHcJDrax32^T1aS%yvM;ub-(_f>-oKx<d*i(5
zbb6+m&nL{z5b@UUy8avO8jMX&k<ses7`D;&yk!({w#5{sT)w}4GJ)oQp-r%eY-nWZ
z@|7YP>X!brjY(;uEkD8fAr#HRy@KwhvIjYLb<$9$l9_|E)BH!)`MJcrshbn^t$R%!
z4%Y9<-rW;#bJG5(WO!0y*p!Cc)6X=S)@`xh$xZchar#l#>~>WBCtUkxPXdR{shx&)
z;wO~uQteUdi>mc(jFaZxu@~6YW0}^KoMhdT9gll&v%YX7$L5G+wxWpebBV6#lU{N0
zSw+pK5mlKUFydCEO?oq+$_VakDoyJ9TLo7Jc&EB(EpK}^F^DmJTb*!P?AuUdoLPCU
z>n)UHVQ>HDYe+~_r7DU`TH<-BMDrcv>gQOI1v#-_i*07lCi=E?zWFJ~;?}@SpbjfZ
zju3b$WG8DCBa`r?1=Em*B;{b55TAtSKyZFqfMVJ&nT-F)gW70jvGpeI)>bCx^xZX_
zBrT2XI)Uyu9CD@`LeZ#CuLlE#JO$>_*kW)}R2LJv&z-puIvuBCaC4(h<E5MW!yH$6
z-Km_f)s4hjFCMyuFEz^DD6ap0LfJq}i^L}dO|kQ9ypuQUVzslHXFJz5Cw7w&)ANUq
zg*;`62)rMVeaaDg*I2f&7%m)DAw-v!I}Sip7w!%APf<J3=S>W?_A-m~b*o+5atN^L
zTTRyLLaXCx8o#)QmeHu?zTGfiK1qloWcIOsElQ(r9%|8?!yAp=xTL_?yP!IN(_}SY
zokns;mHBPMmqISnjL2OVr%9S4d10)fwNq!i?oUZYXuZ20fXdsscd{<((5>wP<#Ya`
z1LLS!%Vt`-Cyuqv^UeGHNgBn4S)sSs+LSNL%$)bHV`z)I7gH@iKqi%lzx)>MBp>gm
zt#mJ{xfZ9H7iwBJG@17oHs3n2IPj5wnRnBiTwA^LNO)n!!1~^YfV;irGLdY{?VE|G
z#`ZipZD;Rg($AQnEgiVMIcu~$8CfvVyO96w;e~pI)Q92;5d1Qob}RE%?oR=hW+UF3
zkyHi`ZXvM*k8CsZTU7{oi4rP9nc{}#(_>%AZfZ8pj>LCuUEotU>7~_7tgy0wZkrI2
zje7R3MfA?-vE4y|yZw(Q+ECIDpDTQpZ?w`;dQ<4g3`T_JaNka89tv0h$Xa--L4pUW
zX3f?(>Ond2A{!@0%CE}$qBf7x+03@0>yKYVVG>Q&cc1*EqHmp@FR<92cPu^kCMPK4
zp0xA*oM^|x-nc8eeT%i+U0MshjLozV6tzvRC6X96Kigd0(dHl@o5@N^Pgnzo1a<2K
zr|hjRP7|jQLB|k^7;Ywht+s>Lx|8Gd({B8im_;XE7foE4t`E3-=oZySt(p|IjN+25
znpo(}n5bK6EYq@|le7t_=W<&9JQ1F6QJm~Lc7d_6G$;6F=JAheM^<vQi@I9*@^8&F
z{r6C*DkSsG)na-KcpDqlQ<Skeu8lu%`feZ(MY~*vQfb|aXJn9Hrxr_D-?bx(RpE@0
zw&i)}7eY&VCpE$TAX}vkf>zCHyn+wyVdl+2hVS0u8VZb8xc2e*EYrl9-R3<t#<5_0
z+ULV%Qw&ACR$oyPC5~(F>MvV<!1?m(m8gPY?#5&}zxP$u3+9%wZqU<kjlOY-An^<7
zyjjN+HBNC}a*OhEFLD;T`?z6V5_Fwhg*$pz<Rj^g8JP{qG!|Ed?e)*TvW_&V_4zQV
zG+S93Q2_C-sn3x#wpbT*+df0BuAV&47WM}8>+;+JIC?+*6Tc|`Ko-r_u<IyZatE!W
znFo{P1U}Y9Y*(!nzI;Cun9i2+k1oKLlqHri7;9sdmiEcN#JKDPUV*A=TgT^DS&vgj
zrN7@uKEpuyy5Ge1Z`*D9eawTJdtb6%4JZD^`md{*9i7jm-iYLRsq^r&=;EAAf!@}8
zHlB=4OJ~)tFjel;5x@Nu;BcX0vOufJQCL$crsGlhBR@!Iph)Q3jo1Dpu7+`XAYLi%
z-)OsEk_BW}4^skCKg|8Zz<;y(at|Q7iT`W<H1EmcXs+|`N2-a2r<x$}486ig?H?;L
zw=E_lP8k>Tyff4iDAWEU4tY)SweN+-N@ASOpIa}BiLYw*K0y%c?1esA{4o8_|7uk#
zMcm~TZ)1NZZ2eD3M|wIU1^ENj|BruFk@3sz^DWEmvp$)7bJf@Gs%1U9JIu7lWmldc
z&X7|-eYpT;8<>w?Z4UyMzz}2~lOP%K1cu?Re>vPiD^uDMG+3&zq1_B4LEGiDE*T7q
zb@{2R81+)bHDe$1hb8XK<_*Ps{+v&1%b?Edbs=P<unXi6{;T)MDaNE5eCYuAZ6qGe
z^Gqtr3f`P)xSf<K4Zp3o&(Y;<axYggMbMwGKnQkS0d0<dFD<=>e8=JlJ!x~Ra<Jqu
z`^VVo#$W=ucJDH%8k8yYJ(0HxYXCED__hWKc!aCnS8s!a;1x*jKT`1-L$ZKRwG>pK
zqL8x|{{=(`Zgw}il5LQ7oaMm8-QE0Zj@U>fR@^>8uuTQ@i5>ye`pdf1dOI7R$-5_K
zKa~K#d0I$HeAZQIfP?RBXS3S{cg^;DV0#$lOtQO<I4v&o*Pf%Lo!(F!kjP__zw&Vc
z@kmgTy%<}HxUSX;@iAiR(8;72am#=)A;2<DDU!hk{3vQGwsQXt4<2$qj?3}lN(%dG
zV+OV2ZsdIz29IJ%74&qom~tu*kO763%~yP<1q9}HfCZM*3f~z16FdWK@E8&{3Wh$%
z)&oU)drO)xIE=23%r&EE1e+jp$acPe7<i=uhsip1YU0TJqUugjuz(`1K9Q`NpDvy>
z+_5)1GY{<>?xVHdd+9wQ?Ttt++=7>SA<JdVZODXesRU^p`s@wx>&g{n)4-*cK{x6V
zIT9($0x>6GDGed~(;i$n9W`28K9F=bWLEk+Vh3q{{yntM8ya=IEu8@4FVX<er2u$$
z>o@SG%b+3=9He??8h}Gm0Y+l6)Q+H=f8QNkKe~bZF+Ilf=j&>xiVc!jty@O3UY5cv
z;vw|=l`r#g3gU|z5$*lbWPQDhfNq{{2Q;%6ZUZw<UGeW|m``F1G)Oc|4>d%?0HruC
z#uw5h%U3mpJ_i2FPol)pz=-tFZiD08wasf!x7tZ|>f4%|a{S<F^@Dl0eeXK3Z(ZKr
z1-g>(1<u7X95Pk_-Q@@FwS-YBEM6D|DZuWT({8f`vKa=m1?lz({{)^Xi5CjmHrFxc
z)ggbvKlq=#b{M~(QxUOHp<=`Uaaek0#<_FGU17bBl)5;`0ZXHSe)(K%2(HX$wsA*7
z@3#`1!<?n*Fm~+3ZF;jpCtW9jjM7-S_X20H5-h^32$o4{^;FcDdfgZJ_^wl=cEVeG
zz?Xo@m!GX@;>fGtowGXN^W8xth8Fnwk)qdM=%}aByMG3OMrjf+UH{A2nqEUOwUs4(
z-Cu-<(`T#Pe!$%2>Wdp6w1-^5J@|=3D-dQXj}TfmtH4?FgPres(0`ki+lyTxIv_L!
zQTG%QG&(V$<1Xnx;D}q9$s(G!s>X8z)AqFwO75!ASmh}szOIcAIqHbD$Jo>c?DZ`W
zXA?OHdp{rvhvXbl31D)=aS_*XP|U5rf^0iEV6C&Ry7PUIgn=@t+{T25ky2Ni!0yRA
zz8pnRjKxx-Bc>}CRWn(N3q8-|a4fvTAlR!LNvO&=X&^BJdYmZxUP^uAX;-s@n5aN8
z2y<O<#aSwCp}J~mLUQfGx~Rj%c$egcYP0h94z9GSp;s%w*h+`)N9Ma1GYR0SWLF1x
z4j(}Rl>Jg+Wd#y`?S?WCX5SI-2MNO<6EMw=qRW3Rs;}AZH_!AbUxWFbiM@5%ejj$x
z@_0o`*nqj;VHz-_;rZ^1?#_aMCGDzXDVQ2XA$KW1)u8_?!YA4an<XG^OT~yF&59<U
zz#7|DeQhw}+EIP&{{5KJs$<j<<N`6G5wPMKkQOWG$}4D|dor9EN@ww{Rz6~{5=(^c
zUPLYDE%^w>%V%Uyg7GH=oho)hUVOadcwPdv<nnIi)#p3$wQss>Ox@q?OA2qi-|%7D
z{89-o5@)H(bq|kdjzscqm-CKONlBl(A`v?@v~aEb0~h0bC|9cE(hY7|M~DM6sP{Si
z2K|jU4aaTs?gWpWqDXTs$f@?edSF|X>?KmJ=*COqy8Ai)%>s+j3ptzNV)NT2YS)7F
zF-c`lpuzvr`3P>N6k|2Au0Buh^uIEi?`t2IRSu)w%~ybHq=ggtl|lS)5Gh1hwtvCg
z+<CvYM>Z0x7N?(y^8_dAVeiRM$KIHWWonF@mdZM3t6djn#7B&d%^}DzUyV^&Pkl^#
zAf@1J?s|5fM2(4txDEMxzk-|{t3h+lW%a>G$J)DT1uJhQL({fozU*$yx4n3=b9mP|
z%%36DkGQ^m_$AnaN+5Bed^q^C2QJI~6X>`snr&E$HHW6<!rw+;>v$18yd@#DlaAF1
zy8d^B&PAP%mnVDb54IA%pK1?5e*KXKn%g4^49dKEtF`bY)tFSv=&Du)-4R_kF5Am_
zuxRdk!&b585L1_Z%f0jtyWXRsmA40b#-^rqM(G#F%GGi4kQ;^Et(+zFH{Xg)3dIzT
z#Pdbs!-81h3k_hwlv>Qbb+e~qH1?M58`~H`dxf9(7XEkZ%?UQ%Tl<t*hw!}}u+Gt?
zi>g!R6fPH4o~PrAM7|z!Nd;(Vih2cTuEe2#e)8)|;oowxXbhuAgQR}{q9EE?BZZ59
z-hDCuL}S?A-OpkU$QSf`Oxb?JYWcAg9$nC%_vx3tZ^I(ekjee~lm2;M+_`J_%@mH*
za>@Q&+^_L~d}kKf;tXt$lmA@A@1b(S?|^``puPUI2y)rKAI7g&vP77;di^J&GXCSI
z$S*zeMf0cJJ^x(s_bvSAQ%H0C;LFoZX`TFEUtpbIwwU!eyZ`6o`;U(jA!RYp!jKI6
z-(TPe3s-M$GM0|)-w*ipDjgf0QiS`m*6+{fzTc1_Xo0WEd*z?^*rgnXuHYM2&2vQk
zDNhkj{Q3t?9&|*@l6x-j&ObLTAFl?&2=eXU&0}Z#&FREc|G6{&Tta#)IwtQe)k|3a
z`$lLWH=;j_?|<Kj668iuXrsaH(lnWZWwZkH8M<RASdQ+&)Xld~60v=PC@J*}4Grz@
zaWWIj95^Bgjrmq7pa4aBME?NW#g(ztRT9v7Br<~NuW0l-qwwUsCiUVQFgU9XvBvAh
zl#Vd1A_5}*x*gV1gSxSp4na2h*c;Y$V@Y2e#a6rCKC4K++yxy@k?|Em5bm)rb@^cT
z`-`-h{maWljbYkvSj%u=eAG5nIn929e7^EP0_xW8&k&&S3Q1i-+CS}7K$E*s=tylr
zNjdV;tpF<mVrb}JzN^=oLi&9kSzIusOvmNV(G9Vf03F&LNLi%HI&^!cZL$xof!@RV
z@px;YKg(_qhX9}3@w~%CO{~I354dz+x7~&$v>m}-#{a|KTSis2g@2=pVr&eQQjk!T
z4n;yiN+hK_Bt$w@*hnKNC@3H$(jA-blu}UyX*RV<5h>{gf%jP)>v-OKKioUU9e2Fr
z|4D|h*Isk3x#oQ07nh?iJ>gk5u|D>s5hKC-WUll)`B&TOSozAykIQSpBFN3x$qwN@
zD&;ZDzw7JS@x1l!{XGVl_E>JzH{e2M^#HyjdYzsZu18^o1MNCX1mQIc=mkrPi+Vx>
zvbo@Oo9;VMr!~sR#ME(#9`Ljvaw2z6nHxcCEv4o_v}EY35R|7u&GHCu-J}^5_3j9g
zOhm*v7x3(R8E<en=4R!QMHM-PJy#APZ^}bc15YunL@8wVNYln2Y^w9|AhOv5JBl@4
zrdDxLW>IfFpdl$AOd?v&E^s6j0r(}tgG7paD^~A4Xm9tA0WY$LdS^|13aktfv0=n>
zet`u-JhYTG5W>|U?jA;MeZMK}kND6<1AZt}Txp+kDd_3piV&kgpq9(LtsrC7{2~Q*
z=W2EkD{6rnsdn{^^&%YG`p$FWE4$hx1EBKLBk+Ch?b<L`+VwGV0i%*kUiQLLAj~7o
z-*O;?d^QBRQY&#vbTFSW`ZXdlRK|D-+$mlnPE1xl&x_<Due2(zr*gPuZI>W`I!ZX5
z=SRkUz252q?biy2wB;+OvtP!&bZK$n1gG#wO-i;+2pY>ChU1xpYbG+@SoqeJ9aDY)
zRRqH^YLe@!rpMmvzWt=MI->WilI~2WmNk0RpKs<$4FC292)cxOI3V((&JouB?Ahiq
z_I(KN-WAA*9~#CR;lwlnJ&`CFnZrtll3b75&Zh2zTm9Km2y}EB0H>YlYV4a0y4(8U
zcB4Tv6@Wz^066BIvJ|xS_+X;-TOi&JZCipkQ0GkL(vFp~K+p=4u5v`7V9ZKdb1{F2
zTd)g<$mt34m6?8=wm-mLb)Vd0)W8+eYVczgveLW<|Je|39NPcZ;m(As$L+_LP9pDn
z*Z!r!BHNv<n|;)_${b8K0DgUscr%9bjLX-0(A;2wf8cx}kKjd{Nq=BtW`2fmvwlZ7
zg@$xLk-o68zzU!x-%){*jCusOr9N=JL14i4oyX4O@`dBDORyZeTf+MYLz^4>=#&%4
z_348S{4;HkcFc{v4E~npciuJNQ*x;p6P|yH0LkrO<&}>-nC|5wwQbW!q!RGo7WG#G
zdNSM2%FLY5<SBksmpb7QZwwV>^O%gCbX0mT`F;g`VR4=!czxd?Sn~%KA+Y2&`>vYn
z5=z{gR-*7wz{{nHJ-G>D26td$+$;cy`3#IGnMgBM1KPy9=v&7TWaKg7oVqCg=M4=F
z{r7^aK=>Ozl-pZMr!&R1uAzfbR8di}0N%boNY`+yybQ*@HGG6QoeM1ZS`XRO%LpS1
zpwr9QcWMrKsuRBR!52<d&q?+v_XB4`HEBVqnhtk$!ZXh%Dvx!_cy9r*+}|>%=0GmB
z>sn0+4)llgg@<5HkKWn_*m;Y#xOM{#!%3&{xF>yfTc~3CSKJ{BB{dyP#q5V5;T9e`
zo9v{%LuKn94WRB^8U|7VkGr`bl=Z<@ddq2JaA$k0_vHT1`iRgbbGd%44>D!=drP`u
z_|ycv?YXfcR0MdWmj=CPa>ge+t8ziD;XPmocS`cYiqygqdLyl!w21~+a@>+vyuE~n
zXu0j@hPq?6mr}YB;%t-dPS)~Zin3*>j6VNm<0ffGe@6D*ASHGU@2z+RMSHXVqFa4K
z&GM=gw;Q|R#)E?FAXAzX?q}@I!f{%PGI11dxtfP4YFO*TD^7dHLpFbcq6vJ%%dMS3
zUtCW(rQPKi?*xFjrb<N)xGj|{96~UT{bPvRwnvjw;a8JG#bP(UPg}d?3w|FyPHOum
zZMqAwgs!Gkn_WIS24L|?v8yJD;)JO=pxCcf{tbp+j6ldMj_<U+dAgCzJD$-4Il-)*
zX<{8@C)7=<(-qd~q<gR%l)`hh9z)OYDK%GRib+YZ5(Lv=%_{v`$5QLHeePxXZJJjj
zm8Jd*rI0v}d|3H_J>YVOYX5zfL*%6==*fjYi~E^W_yS7zeFL_LM|i9i#IL^0#8}{{
z*bs_A*=OM=ZHOj5nYL}f<~Q*ZCj22Z_QOT^H5VUDCrgLv3+H@5v<wKg>+jK`3j}Tw
z?!WBYsnEtHd1iSYNY~+F%!pwmR!rhCjtG5A=0WQr@3Cm7Ce8)1)x6#~aNdx!t%4n`
z8o}Ck9oXUp{%t?Qv5GE}*rDT0e4tU)Uv|kXU5q;u!i!!@a?!hR8bKN#^;KZ1?~z`D
zQ?>$4`1_<*URVjT!Y^nIBesZBoO`CK=!N$*6!NqR5;>6^p_m*@p6rldu<WXeftwf0
zHhCPZsE;d@i5_-T?7vfgb2mg(zl>*zE3lhN@^v0_S>0KX)@)5iGvo5qnx*r;avg6M
zTBw&agng-+X|GJfA+ZxB(D7zrP>(y7ob0k+!H^(ZEl!jNtFh#c!p7cSN1+gf0F`dg
zomU}w;)*DhkUEQ)R&8LjuWO}O9qq#CJ@VL#QXaP#kdHeV?i$Eqit<|65b|6|B_a)R
z*24BED83M-+E9oUazEwPH0+4&Q52%TwElY7Msi8vwA-zUE@FIq=3q`yVM;T!Sx7;X
z7mTqpag%;+<yy+uCk<&z7*pb!;!!Im|72-~SEE7CT^5A*;AashwiF!JZblqA(4Pxs
z1f_<jh8lK8sCVwJrX?ToR4U<jbxIpCV+!m|OJtM_fBz*L!%pFPbdY^}plcg3B6R9K
z$W$ORmhw>hB9jR9i13k$<Dreo6-*Injh=a3mx#9VbJZL!P}{e)dvaz<3D%fg<e_BN
z#;IX*CLeG+ydAaoM<QMMEAB`6vaJVQcSOGL;KI(BzCEhyayvwF!1?Q0^#D1UN#vQh
zlC}^DEFG+j#*6>LH;mF8*V4>19_~s%<n_7SI<=6aO3twZ!>fd{Pn1g2LXR`j4(DMO
zHm4_FO=wx*i#WH9dm2b4oDJr=Rpmh$mZ*2AY_mwq{NVV#tc}*sP8)W&YEEF&t&LsU
zigCl->(ixcXF$4bMm_JC*G9RXXLfc`RodHcvr!h!7jnPtJ$ObBm5i^%DLr*L(s2?q
zpVIv}+mm-es*IYGLrl(Do?e}DM=wp(phal3Ao^*iS4;2NVJF61{`!bX*a~ULOxmBG
z^9`2IITz`ibN)4Jk5hlB_i90tH7E+)+god3_`S%YH=RSll1<df`jm9;y&b#dK}O#f
z3kivEImNspcJ-X<>2`Kq|ACZ-^lRslxY+&kp$u$M=BC@qw^B?@TS#}Hoh43v>Ln`?
z583GD8lU)!HiQr~`QvS@Xi+kwS++(bx$1@7)~0fkHQ&igzQu7RZb6lZ8=q0kpK6*4
zcRFL4@MAKE<2z#jrLw%znjuYj&N8<s>-)SV-T^UqAvj}_T6{e1{7W?f-|7}y;zAiE
z^*8=cr9V+`XXH)pfhcO7jUn#D$#B(J>qYDVemf<RW$ab#)dn#o{9s#Y{Hq0W1lNqF
z+OxFzc)~@OndK8@i{!%F=9=BM+ubr$vTeS8y4Mased7*$ZGUlU7fx5I;pX90Z^Ob@
z$&(5nbSuMoiCm9lMsiLI*D%Vk%C|^#Nhdf2bB2idpT}}0-W}r{c+o=Pd%vZCp+CK5
z<K)a0@9Fs-CCXyUxq~lfPO!ejFp>`|TqT!@HC0COCYnE+;V&H9*`D2DQ8#`oBihDr
z0ZSQ@XPG@Ih5f2naGauEV&?#R>6eqm7cm}+sMq7<$>vTa(Y6i=uX~>SldidEMe*S$
zFeN8$xg`naiCwhQ5=mg{IN(9DEj8x@fAU46+HEtll~};k7v=IOQ^=E=D{kT}$ZkKL
zGBGu&sPyjG28|AmZC`ZW@Juf$OSox`{!6QV0)F8ek~kOFok{=f=1GGC&*aM<3g@K8
zQ%nqZG0&}kH_xF-o$5AMFX_#scQSE#{x102E8c9#CEj<9weV7*bk2Y%;|zs`%S{TF
z*e{)wEq86uLL@l~Z5w+Wc^@U0m`n;YQI9T<jq4W5OgyOCjEcA)xJG<96FW?Op`6Kh
zdOdR}H0x1Txm=A$8XLpQZ#MS2`WiAHTlksi@y`_)dE?=4-%Hg@5NbE(Ja=8#Tkp2A
zai(lVXJX4ZiD0J4^!)ZVw)jI;kxA<tY@A+HCz+&$e{<+$NpE&5u_Mz*Yqw3UnEna~
zLo3;y#xD~S6W@I4TyCp7IrtopgnTP;Q)DV^e9*snMAwCXBl4z`ihxGMqGk5T8uzA}
z<su`|W=@=#5MwO;%`F~DcO`uGrKM<P%T=ZHH_Q8<&C^wUaMl?V(AiCGeQ0wyirT*J
z-;rzPEVS<G)ZE|h-lfdcE)_9xENi%CgEr_fJfw~bd(i?*({9|7{^K1yEuK>3_u{N$
zwj`ckKO;_XPXKXXREVEUWMVX#hu1~biN*ADF7~zxy$Hc+qX^C$w`xwKiHU-mG;l<#
z6BPF|&kjA}WeWB6$K%*AQty(EqKhHkukeITBc*9fqdi;GUfF+ET3U0D;-F)U^TNk@
zZSOIn?K7@185vBi5{FsWQd=3M!q3IsF*$LQe?fIBa&3=S!`vdfWfxalI9+MtyA^}`
zxA3cMv(3!p*1{*(KF);NPS<~Rtxu}pbp^Tp+5x@`mxBYgkJ7P8MReQgKG7kX@~)>~
z`{1;@WxPQ>U;j=(5WPQeUU<W*=R^3DZ7(U7)WW)@SL@PBMQ-AYoG+veKO)PUSXgNA
zAt^7WJq}H~#9AiCX)^x$>*o}5?O|NP+3C{cIf;kwcon1aJq`~GdDbV@N2rf6rbK=w
zz0<K2@=n&0t>)#sjjj&kqv^*FGIjX9TRiM`tV&K*b#(bLIdy8w*j>dI7LC~J4kxwW
zj&eA}r13LFd@FH8Jr(m|FnRH<dv{!QORkFiJw<AUZH1*B--1pjbNHA!>GLZ%jNaHi
zvl;xV!|mh6WtyJriOMW7JVPF);@-@Av8^PIH{v=UUx5Ux|L%^aBNU*t<S>YWbzd1t
z`gBGu#S(GYEv00`t&W(insd)QrM;&mAKc<+6{{&Upp`}S$m%{QnpP>bz(qDy7bhiW
zdygr$Z!qj0T@QuM?tgVa@sP^ojXmy~XXSm`h0O5J__CA;rJ2&;3tl|<tEj?3%&^4y
z;u}4carfOkB$)iG&QW^k;?eVDT=oByOyu?Ux@ZaiPr2$>sp;oz3H{%dtDu1NQ2b}T
z>etK@)>T27=zD}B?cZ~L2#S1qqDSW#knQ<bt!nqDbut?D05>JG-HOidOTj@)euyw5
zbuX3Zf9hp4J5a$|u$J=u>rxQl?1Ku{|3xh^_S}xjKX)dit|w_27RB?r)I}h@FHPTY
zSUbIfo&A|c_AwQ^<!aH+f^$W^uH1jzr#$ukQyL1?SsY_f@H1B)Vt=dEUD89+nGvUv
z{yw*kdQ?Ft<m?}7O<osjq6)g%*T#JJ;E58g>D!fZ9Tt-e7CoMJcUpmJR3%Kh9ekQB
zzjfy#^$PRui%;zmC4dcAjFdv4?dFwuwQX8X^Qo`s342I*Ey0LnMjdI+`0@;NgVHV~
zK$jjL<~HwSK|I6K3JswWkZfKCr9T|x)bf@wJ2{=2<{B{qF4D^nraD=5QU-URW1$X;
zZW&M{aSej5)`Jx=y6OhqWJ@5?BI?WUS0{WYCxK_Ud3bgWQsGQcLCwlaU7R;OIXi{8
zv;k3h`hl8L<LI?CmFj~(s)P(zVa_i+kTfBLeSpioKE&u~+kU1MTtU4nuH2n+-XN?Y
zog*w6h*2$64qr|n0>DTwkntG^YBWQiFl2E%7~iIE9-#n(6PX2l<lXCjT;CO0V$fTP
z<_ix>shGR&o7Ezg(H<m-M;t;eeaWJsU$x$7ZBh5)ESz}f_`qIe8sVKHg&{6p(q}#R
zWViH2N0Gu`c@GSBtcZCQ@z;`x;GVG=taQ=NxxSq4hlt5pu2vz^yJV-Xf~i+y*E)16
zN@Be{=+v~)AxF<#Y3X+|6cis(Rs*ddOAt;=UfHk(Zc>?$=cErWwXWP}m&`rl0!?p=
zpZsiCFj&HbB)PblP{M@lbC=Dm|F%R)-kUm~|0v%w6nk1tMWqLt!v>&vn==;UCcvQ{
zpi(PZ_Z@Dg-0y+Fv5@AnwnF-@JENLS#bXeq<rC1EJg$Xi<v1P*!g4%Hb*<a73S0&(
zVPzD<43=(@TP{M!9tUF7CRLmLv$wcFb}rqQioX|HPJ4AN-)EzUU&9CNKnd)XU-}XR
z0Z2M$^q}wFlPKgkEaMCmnJ$I$?FjDV;EgD`yiLuq7ygS976)T~ynV5p`0DxboY@4K
z*Y~(Po{QZ}Ha3&fr>d;lvuvGJw~@i6czy7tE||NlK-H@U00pPAMcyvUfm-7f)W|5o
z`7QbO+HkOy+m-C((}*?zw_f!g9JCkvi`ORQnHoX9aLKZCWg6DM9h6YY8S!q0vY{Pz
zpbCOR(g4Ur)2FJ<IBTH}V#NM>&chcv*rV$p5IBOy5T&2ML;BkM8{-L9PHgB>j~aH}
zl>w*O!itsN24%N2?sVx6Fnd!ysACzj2zG)7;k*H_0z;nh%IdGr8ROG(;>E+)tw2+)
zTpJN3M9gkD$%R+n!s})N=52`tU{c%pWecI^LrGEjntWUt@tLb&sUp4kMzU)b^r>?L
zXvMvF=v`vIImgdTC5Ghu>{?7lLVsnl@51O~q`Z6hTmHR}<>|H#uWMVrjqJ0L=yvR|
z+6z5IS`;J}dk{KnrAw#08(NPmbO(y5PY_2(5Gr4=-<nwYaQT2Waj<-gh&h=_nmccg
z+O2z@P!wxJ>n!^^;)*Q$D?tUdOz5%K0yV^<$!MZz*9}viZ@RNPE5%j2V$sR2sY;pD
z_{YI(^ow5t<`DELqfP6pp$s(Ao1s9f=}#L`_GY@P7sbQ}BIZz_T<p_CVR|zbr3+_a
zOVG{i=V-Adz&E!&${8rEL`!VY%6T}36MMnAQqvl^`F;|U$s>t3QPrQqE`JD78|ci`
z!Xan`;MFH-bs<J9b|5X9lAhe_nt5;@)biAD+-}>>s~R8L&}u?-mrNtXRFM{Nr}hKw
zVl5>(TR|p?1R<{h@uYW)l4xCV?C`2+)RX47%s-Bi;7T}g+N#6~2>0%vd{OPPnO#s^
zdD&@b<UnU!X%A+L>05vH*KTGdUUiMh!b?FY7D(t^Y+S%Mro?X^Na@4L9ww>ZL8iO^
z`qRm!P0v#)P+nQ>v++-7?%$r<8>05?idmU=wmG8YMN3*>4JL1F5_#|{l>?c=?C4P_
z1`zM*1{wFhO@xe$P}@Q7w0KI7IW^9cGNf{2lKDX-RHK`wmG+3ZpYpAg&$-UnUUuv2
zV_kvRxzSMFWQ+VGN>TnEhj_*D4>*@&!@8V&ueEL*LA^eI(|a_&_XU%B+zAR25)w9v
zc@T3f8!<yYmD7Ct8C00KG8yAA<baS_Dd%v+N*oF~N}p*Hk7l)<Wmwm{amC-`rd0UG
zi2*l6Cmr&-FL<P=`xlax$B59n^qm3UY&smyH#Bqq+{I;(P0K;sgakG%1@~4WquyM;
zEbGNkw&Kbu6%B_+Vs_Z6XODzLr!tz=DQ{-###(1#g_U0D^_ASSy5W(mlDhcq5kcB(
zkhYz_lZc~kKJ~hl2kVUnJHmEDg*_sh&au}iNSx)zR1<iDdF{T9>nz`l(vT69d`Wy>
zo);?lBjtsuBd~*$??jqv&*R1MkQ>H-X{OhRAh{@s7Rc)i_Pnb&bbX4W4-hAIKoc)7
z0xK(Twz`u|p=$zZM?d-{EI^?&jv&#b^%FvQ9)Y)sOQ+C-4yA5#!$()?T;=6QJg9>6
zY&jETg4i;$G~HF-<Fac^=xa%HL?TNXe&iEz7}_#HBBiuN2O#7s4Ro1_ml30>ChGI3
ze>Bge#372AxJ97pu?KHiXsD>QG2>49X}guCbg{FyHsA1YmkY173*JbITOjwCjoA}t
z>8SXaw6_notiB-b0xwUu1(CJ+5c0y<LR!UXHEjWmNXF}M)DYiya>*PhZMPqK_Rf(w
zRbKw=<fXLAnYnbT2N(>I<~7B3<~Ncb>ePJ9^6^+PP4t)KAsJ9Ox6$e#2qL3)+v{Ty
zVge?;(rLt4FXazc0Un`BKzF_~`=N*HLX<84(jjG;%Tmra(ux=vURtDy&kj_`)KIV%
zVVusLX^}t+D6FE>6zJClbSHNkQxJJ^vClO;?&TUJ)kMPQ4hQ$V_n8)TPdPUA-kjOg
z`+Uo3BVxStJ4R^9;PBA|wI_1Vj5nxR?_!!N#S{vx2M01)cbz~96G&eznSyhqU*GKF
zgbJFmByRa2@mz}%9p{Gd;6rPWY0XMV<6^NrGhGg?q;@|R`3kcbF^|-s>=VRhU_@U6
z>ayB;HxiB)NF>IT#dBa3^|dhzrcTNc2`lToZ$Gqgy;z4%X<zDz%nu408WW_ame;oc
zQy`74X|8STQv23*x%paq=4KotT@byT-Kqzb<py+CN|!kl3&)_#Xh$o{$waFy^+4Br
z>jcv3*d6V0coCLIcx_%DqRBac&*ZVwmi2tA?O}*7u7|NXGe;>}!^qIr!;|@B;6j?T
zvZB!SsD5BomuYhIV?>^rGMU^3ORN&c;~Sm2&)5;f$Gmla;hsAk+jo=5&&G-J7TSMx
z-JCX_bhZL3s2N5=a{Sn1>7;5M#QS9^<Rc1n+;_YNtGuQ<oHCEc)$*<O7*iL&_nIsN
zGpj!3gPFCld!ASNB9A>ki`kjQ&3T6$`!&_2vgvL)G*>1&+glvWEf1o<aIq!=Wmv%J
zJJ&7Qs4ZoydAzHyTd!^`N~>J!-+p&rU?*1i(KqO26`iOdKUa3#wt~y!3$$qpK#;3K
z^OCL@1(&9n@M&Mp!`9iVLu1D$X3S+M=NYxN1!CM9xvSVe{M>&tPP^xUI`)(8$Fc*5
zwp!Cgn<QWwg|tt$+#%tT=8sr{eF}*}^BDD0Dtg;0Uuj#(Fhf&_wanyVb@fZ-mm1A0
zDt1#t_^n}=`_e^exSk7sxAnQ(YP<Q?Vfh9DdOS#GzC{3ExFGp@vLr22L6PU=k(AVt
z$BUkWDQV1+^z$yrVz@wcb8yWZ<ri~5$o|aE)!fJ;amo*&%!O()``St$cM&oca<@_a
z+~`={Eq45j8hzTzTni4ssS#VvwU4aV;cWzNEWbXJiIg&vVK)^BK~e#Spq6tl6d3!r
z@AiCcRHHupc~yLR(cz@-vgHGPNTU}t)IMK&8<XHiRLGv&kJ_I^pgdmUJzBszd8Jp^
zN3tD_%*JlYBXkzJ|HO)+We`uf3klIOS)G1ONbuvHcGI3Zd78$I@gwGEDgI880HQz-
zi_*a(k^IjO{PQV)B;~rINbsKzeUXICDw+lZWBdyPus{po1wp?K*u{R{-jAOgPC_z9
zFOepUbg2K8jD3e>tj?dd@Q<YE$H1ZiSMlX=pxM9v9{D(pAd;qyNU;9(CI9~zj`Eij
za<koWN?gcE%=^?tG0-Ovqhh6!o2T5<x3ZA;YI#!LQ0@3F16Z&D4n2S1UWBws^bAC`
zEAk@A@D2q(uh#V{&bgP*oS`9-uj+dzK(IdHHIvA%k^)41m8gq(@s&CbO%!|g`_RqR
ze!Fje@_8V`Gb-ib-h^xQNO}Y>5f1tmnQq|Tm~)$OS|R`xY^{N&QQQw3ilj{<E~bEx
zVv%hQB3|6WXoGgepV?ppSnNV-)BsZVPs~0<f_LFW@A6bvv(cwlw?SbGUAVRVR?|Gw
z&}C!k5_S+gZPii+Q@TKH{2d~t4N@`w*4vO2d;<Zd9B^Lgg{;Y3yWGxqww(ftEwwRT
zB^sCqAsWrauZ`N;Z~{dN&QnESbGWb5OzX@tK>K4QOe4@2zAU0lq(s9aU)q)-94=Jr
z8ll=_?ZlHq8_i<lphDNageYZ$^0Yo=lgJ<#409pR>;dDg`vWm~dXxkI$^=!7!_2Qz
zehIwEcCW?<6u){vtJnZ^|8obwf90_QbXhkjXqoZ!+Xf-p=UM!@>YotQCNn#GG1$G;
z;va`$2^3qm#)}r|*P_hZlYKY)imi3YGUT_?j1jdN+*URKfcx>qnglj27MCl*WrHm>
zF7~5}nzkWGVA-x}^fU+XV6osTO~GrMim4Fj2iF!hNB|V0nn7~OW6waE+bQqW9K18@
zSB?)-{Zj*lhrV(c$`>wO&q-+?3Q-b`ccgui9gK_fB^Ttol5eLGrLCe8f&gYZ0f^`X
zM>y>x5)Bn;Bl<Zc4SXnEbf)y>;)iu;xPJ3EOb%GJXYAp;D@gi>LjWngunerBL4ALd
zGwmya1r&8#EguYX4Mx0yDEJ)<_~)wmoHO3<*~70Wc|%xv-;51+6)^+i82SxdvG09Q
z>4CM?g;-aXgO2$(P;T&3TLld9t6p1B7JpxSp%S7l1a)h$dbhHPHkZ;_?4Lcsy_dE+
zoTP9}vsDq=9FOoebk_SQU*de~naZ~tNi<^O#H7Y4mJq5OqJzH)_cnqyIkbZmDU%PL
zLGr-3^?&j}-Yg_%OP>P#_<f}V1#VwIg(X`z9|RLthz7FzK?mKc{^3d^BoZrqHgfHp
z_%R*p6`(<8G0~P}HHcWdL*G0Ub2**y*jS{EU;52aP7fhUjSH0G4xI5q)Ak?|<r0F`
z3Bpof->14D=A#vWGvd~Ncc`lqYLcDkp`fdeDFQNXb=-SnNHoYy6L*48yUaEhlGpC0
zD<ek`*Cx~R9$A||lR!(T%Xo{LFRcSsBPYR+mx(G>-VpWT45ZP?F2jlD3m{|MT^B(6
zHX^7}?i#hBPuY_o)i*z3WvOI%&offVLc@aK^-v_~HXdP`!X;sjI&4rx=nh<+GD^Fg
zDO3YajwT3H9W-KkT=KJqcP6{dqC60F2`kn|kQ`pqaVy)Je_j&-NW^l?0hHAEl?_r|
zB|J{7@L*1{C9V^n6fxDBOf@&zfj8DFBqW4Qet6%pG$k~ci+;lABz|`aoLv!=iB;o1
zeANOY4-ewWKHYn@B%OJNH@i$b3p^2F9L|a%NM<fYJrfHWk66Fk){F99PBu&M3pI=B
z&Bjop4vmYupDYwrVmnDmd<Dq?Pd1oVcmu@hTv8Bw=%mWtQ~0i5`pVGee4tl{pg(QR
zc{xqn-hsvXnzE>pRaY4)K!IHgx5~Jscwc@mJRqju#_gN&3ZTD8*V$?n@yMLv*LK3^
zu%#ADnp<bkO>?@EcC3nh@|yGSm&^_YFIiNVuPVG`tRyAYJ0UBb4~&jG_f_+9^6(Yv
zdRA;=&o~)M9b`6t)i244WPcI6*<aJbkL>Tz7G!@iwDcXNYQ`F-U<+agZkHvi6D#G?
zak?lWlP`}tFUWt;o`AV8fb_L=&qr`Fn=Zlxr7vIZ=xIK`?pHP0h8Gy#_8C_W{=OwA
zPVsuJnBMT>odWCE`+LcK?In(=uHEsPlEI2OB=x4OOhwvWiz+8gR5lg}dnB0bCc_f?
zEQqu`BQ^D|`y<dG#8P&}q<3c9gIlW1l%z?b)X00^t+>msNQQTuKGPJ*@M8YS@bJX`
zo#A<?pt%3Z@Q(jC!%O8yaDUB(@kvl8J-A;??v?3>nUcyz-Lk%oEH%T?qvSm*+3d&g
zNHX&U_|V*i$?lbnswpz=9tey%;9z)OY_-SiTKWPQjJ@*@uyVV0XHJaLE>rw2pq6@I
z)3OGm6HFB3WLCqHly46*DT_rpqV2ZYS>7z4cghZ+mfG;TmRrH^v|_tST)ms}^{2a<
z?v*H{Rgucz(V(b8a^+73n;ot)8i!JpkCFGB4EbJa=y(RZj41mE`H(h!!={HW&VsoS
z-*Y@9uY2s>nOk>(aVWT?I$8IIvXYeTk$zf%E5rVAcbGh5PQ^Eu*^y5pSsmH$tnO`?
zw52z%|2yuv(?4Pyvrq`3deIdotdQ&6cgKnG<ojhBb@%XPlY8HtCK&AgcA!UYhwzbC
zv$=Yu8^`0KiHQ@MCQ5Q%ElMm%Qb+!{^608$0#W?Rrt>x$pc33$421K5J?udg{423?
z86ul}kn!?vke`lCoH)xH?UpUBNk@l%rxKb>vnKRN?7m6;Qz~DKDXFH3qSB>2N%E+G
zG+$IkVsGfF)VUARUt<G~hgSwNi58y4-t&X}4h!9%4HD@VxwC=K;S_b@(Wjfggb6oF
z+@qd{p{j6NwG(DR@;jOz`Q6>*1RK<Cp{O5$Sk6~`*D!<fuej?3NJE>KUB0s<<mhPy
zBWmeacc`T!&E?#P_^Sj-o<?)W#Vt?nH=CE<Nf2ejV!9Z7R;F>~XzFCys7X@e4*iz`
zmVi(U*mM!7FuN^_<um$pif73U#5rxI6jJlE{n9&Mwp?0L_Cc!JGOhAg@hLmzv3FPt
z-PY!YSDN@{O}fk^r&WYRrl=TF<Dm$7eeiX9re@4%sC3xX9Pw3di4Sy@YA_4&<hFop
zkr8*QuvzV_h_9*wq1eF3GX=6@`l!(MQqijyUb1>NBcfMYayQzg5wD9Z?;pt?*JZ`R
z`Q2ns=bvQnIK7RG8vW+ay5<bhW#H=yyez!O{Uk~1q3xUH94j|RRJpB;Q?3=ik##T%
z3*pjq33mUH?NJW>$o515_kB7m-wjFkCP>JS#)_Uker>LLt$1GZ)g?2x5=ECg15n#9
z9!l)wls~B2ex3L^{PT>}Ow%LzL@K{#RW&_pG^X%E$AL`4p(@O30^WCXfV1^qk0tla
z1)ygbZ-tgcuBX$+V=?MJa0|22eB5a%uO~LxN9{Nsw|#M5O*h6(rAOKFdG8o=M;WOm
zjV&N+bHP`L4s8NLXw6h^M?;m21yv|M<L5aiEG~|nQsE4`HoWjF`LpYi?Gouyqm@8Z
z0ui3eThX*a5n4Us*KHC+x@v`=;vn(6{l8EAg8hVgpo1aecAeh(Rpqc04?T~AWD(Lu
zN@9)`XHBDxmf3*5Z3fWKdx&$}bu3MFKd}`jljO$kBh%(o_o?|i_wzi<_2%sNce}Zt
zi;hcsrYW7PruSf$8viAcy@Z*$FJ8shU-*hO#wH1Q;GObjG}F}|c-O1|qfc4Bw6sKA
zD9PBqD9MtNr|_Y5TTwdPFTqCP;Zvm3yqmIoVR`|e`wWeMbk0#^(I3IjI$hH8JLn%b
z5p~e`<cLl!NI%F{pR=;mQ*hBhS*mz2R3(QNI;NXJnIZv_S7YeEldGt8RNkfhH~@<1
zy(!icF0t2_#jZZ~k5HH0jQ1mA(~_Fy!hiDXi;1-$4a=>)N(Gw>-(KI;62rMSjt>SR
zFaD8xm7)^ENTSRO|3L=-nRo5of1e0avIvT#|D;#3La?8s_TMc0Z+aC08(Lo<gXhn|
z|4Q2MA-_|gQBV6XZoUu6;=YP8{yXi19g^V%Tnw)Uh`E1%&lfsKBYAkF{^)!CNCe#B
zV#J4Dn*FzP#{+q6X{iG0uk>>7e$;tD^b6%EBi-a*x4ZjODa6rrjlPP1{Wlz9G;lF|
zk0j0i+;g1_%(<0S@E<wYkKh##F2+E}g8gs1(+XzuzD)x0-(P=-SOPBQu7@H0&prP?
zdNe|!R<*lNrbE{qY#}qYqpunNVp%}x<ODKzd&5NF$Nm!tU9|t|>8Aa^zZ*6wJ#q<e
zom1g|KVBgh5_q~lnvVaPEg^3RlNeeFMDqV$MXA{^?8&K+vDbe;D+UFa-3yMlO-TQq
zk!2N_5SNODNS?ocbAbsS;tlpAjek!(WWcb?W~)Un{>UPJjVJa7)J1~FJ>xX~b;qy@
zI@I6>mY3^o2LAp{a4IaxlNA>^34UGX$K%~VZs13v{jZ_ItijyGlX)5ay;`AF3%rmn
z=4OW<Y3#2BC?5}VBenFVh2`%B_HR<lpEn_;40H2;<inA#+CN}Fauo5M>T3r0ls<@9
zCIPjVgZ43?bbG-C$pY}RRW8upIUVxxLOaB4R)|6<8xV$#WT*w%>4h)+evqdRD;oqq
zlivLge3C1p!Acgultdg{wQ}^_8)F0q?fUMCZ-NW0C3I|kudN_`F%J^Atts71FxU-n
zFwa{ANNzDS<Z|g$I#R!d3%L4{%iaF<q}(&0V;?|57KYT@-W)yxY*s|Mnutz12MI=y
z+!VZGZ}je~1PUP1gXaToZzhl`Oc#Up{(hqFiU3bgm{fbZa`vInN7N^1S%dYvWBIqd
zhQMu;m<59f#4#7BA1Pc`ech13j`+wy`wE{HD91B+L<ox*=L(+1rLAOdpByhnoMHtw
zzg<=u5Zs>k?*!3lM9!G;(C3t3geEasKI}zd0e))t(ZEGZ>jzJxaKzcx>by9@p+iV@
zJ;qIu6O%C<hElKBpnbFp5njK;OqdTwO*m+um;u=}4#;8*(GLI;VAr7_URpHe%&D1}
zm&Z}L13El;V0VXTK!1Z|m6Ou{*EvNazls2CR<jt5;r~QWf+t!gVx72x1hP<YN~bIY
z&axE@IlOZ1GeLVV$z%?~t>*65+lK?~)?3C~@Rw+JcSNjo1)SP?kiH)ze|}Tn%s1-5
z^iNw}A4s0X{43pW0h_?;1WK@$E8lvul(P5Kf&Th84^qMJbB_>u1|oc1D(jY32DT<u
zi83*K53<r1p}BZhI2<S>y|M!91_<$3TT8|5N_Ff*Upx}jE0aN_ri2uN7-R1>T^&R0
z9&bN6bN?xGLKkphN{g7E-iMS5(TVS0PrJmMf^_8yWIsSLXG}Wy_jyetUkp2NN9NSW
z9@v5d@R3erS8HeQNdWiHj02*;Dq_VjDB5w7kt{<Uz}{0G%<{R=yy?PF^H@b9n#Ex4
zNI~|lrH+8L4)f5X2r}><gcFWf4FY@}2O}^;x~_cRJ1|Nzw3mo8<|cfYvvT_FaE+|5
zPOB6*Q@dJr0mM<3h15p)B&#z9Ux37-2daQ?HG(P;E1SML)U@+_E!i~8GFr#1xHlei
z5mm-udfxk*v*O)5Q0cfwJ?J#c->#pC@gPCyI_&R%7VKe6hYx{^!i5o4I96ZzJcn0M
z=>A?AN%<Bb+9SbBI=DQU@RM<VRe@q^i2JYA{zp0?#JA=o-95kxbj^QWjb9XT$DJ4Q
zv}1T0=U0wLEr9;k-CCjYP>3<8O#A&%s9z?A9buvaZ>81OxrJL0`)=&!^5i3i-t5?S
zkZfzK4%z`0?LBN6ebb9oNGE!*EY=r~5~Ikyb6HyDAnx~B)cG8?dajWsg%d)teutNU
zx#G*wVmjd>6O?TK4GMl%Uml7vn~*=Dx|zm!LP1jaWaeJziIVS);{;|}JHzoz_Y=|4
zaV|X_nct-l)IdKFx{?EEG@Y)0SgZ;2@ln~)YF#3tW_bnlHK+QIX>AxcbKj%bBs7Eb
zMNwf2d<|!9%*}jYl>InOV%ZMoR2ylRg-1fcJwstHx^FqiJp9rfFPU)8k1~xd!N*xO
zF}2Jj-{t2%1N-d(rGu}~+H$trbf=ZXHG~K)op79!>$<QH=+onXbuiVLEAc74xu;{g
zVRM2FC2hiwV)eiTkVv687-H{3E?G8W9{!|rxb8gCNX5TUa5otmWYgsH^T4t<P>j6x
zdrL(laSCPGt4I`_ojQ9nuihICQR{`o!5ERsMue<cvmAg_6+t@H5lkS{BY*MwCdun7
z0JQ8soSxwt9IPhnhp@AO%0$68^l;y;Y7sQ^y7M-dc7K-qcxb)p-eyEcZN=2bJCL+?
z32ipqgcPX|6<xBrmNyZ-W2>8Lx|ur}MJB^?zV$6i(gj8zppdL(DT9jP$<5gL^v4?P
zCuAVlX1KMCR)4o{Qp3G<PZ|jsCIhRXe74A8nDB}Y(JW}c+L0?4y7c%3olN+vnD78L
zH*Rz~8@v%GNHl3_FPUzk0GXZ=e||=8Rl~CDs?q)EiAjCIUr~wBG?8ATPw#_9lS?oq
zcih3P)yR1THpjXv*&Ien#S1*<ozm=*L7m-oT=H?Kd`qV`v0tzasNi;@E*?$n)Zr!n
zBR<=W4utxN;Z)$sW;Xs$43>$+8RqPiN5%gMNq(;!D<lMp=nBdIPjJ+U{H8F}&+9)C
z2oDmQG)~oP{3mKL$G5jN&8)EHJhOK{IFmu^pzGc3t8~A{z6Y;RC-%YY!0DKMH9pTV
zyFBBJlcz4~?IHfXWS|q{iOhI@o#lJZ7a1jW&wm9!|3vb8@H|OjJSMj%{~pdBIA9N{
z8q|s!{d_fkjhCSS-r4H}O1~e(&#M|-fQ*5ejpOfeJ`{>Wx?`HJHh#bPKVCQ*W{3hc
z?gjiT*8Gm@P^=K^wY=&02Oj$KE0Ekl1IZYCo_+drpu6LdH-U?Jk|w+U=br1N;N?-h
zW$<@+y2l-ABpD0^7nlEg&#Dj}=6ybQ`HwmO=NpMN;bL)~n4hqL-}jt{^w<6gN`5Vj
z-Fb^`fs3J7rGbO}bLM~i^dbai^M4-A|EovSLFx|WavT^dL@XIYkT%1K$bm%gJ+QL8
znxxs@R5lqt<)EUf+6!ocZ=Pg2IU4y!Bul(sd*3^An#oY{QBS8FgG`v!-KWj$Smv+h
z=mPC-v*uX5nbtotH*MjZrk|N~tt*xh&*9{2&~E$Kymv~16(EZkFuHx?x}2^%>MX{E
zZWO1uZ{>WW=2o*g$ji^@^g+2h0~}>eh^MY+-NLCa(o*b1FStuk9-<+H2=qkC(9s{u
ztmA_FK!sC;Yw&>Ej!Oh|`Vhz*dV-kXKAYgyQV<mRGicDpnx>2eXWNU60<C)sOq2I$
zG)MDa{wTZK19t}~E`Bk><VIFnhxuNa!wvujm7)9sM8zIZM!AE4ED?7bXvtZCWGool
z*mSij&xwd^z>$7$bsf=7fjS@#T5Hi9Zs)Qp*E{RnL3gIax~I6;j2UN=;2YS9t8&{(
zw3NtbFE#}{Uolcd))QHs>CXhM8R&4nwa0asTJ@FWB2ASdKsRUtal9~X8zTN51dde%
zh@}p|S+PStAl$aX)DCq{OT_!LA-{=ts-*Q4;t5j7i~a<#yWM6Ov@2z_=rkx`yg{v3
z?GNJPe=kIAM5Y0och8+kFip57JuQuCEWtdiKC{>R`}f-rd$O%1O^EOsYbO9s?E@~<
zzUl~gD}$F0g_>q^nVN0$+9vaSc&fJds#@}@u)b4Jzd023r6yXz|8&jeRlNa7NX$Da
zuD+~lCg)V=D!+!UN_kf_kL*<v^sv=M5&kMO$`U-M798k^Q=wO=IgB;|C5W3yDwP4k
zFhwAAgM+4%$>a^@r>)R!MH)|tb~;)a8_-D{q`XeDXWkT=URfiR6Uh-}O`D05LDEqg
zSPdN=olG6{!^;>Y^auS*YN}&-kj#{?!uorO9&f2Z)9E_Ti<<WqV5W0FP!$<`Jo$hP
zrOcprLN<IOj`4E%f%qX0JPJ+Yvl>n++tB2?yN(3&WanEiA%#r)kjWB&9X3FuCuWhz
zG_sH0+Y+fOE;fES^IBxBcg@mqS+YyG3pac7Yw{k&JWdc(!U3wZuL4-x)UP^CTdt;*
zsiICzs1I=VuQS+_R95Ac9)B|}gU+Uez%pywfQpIQFuwY6mdLK9QsTn~=r~OXr1wjG
z+~&6g8nYF36>OdE7RW)9Agc3Y(@wR)o0P}|3!<6VoGLe_2q2iPWqp0seon9Jwpz<|
zT;);HZ_0YYl2H!mwh(yPgE-u-sK;?#M*OPP#Dw?a?=6^%uS`<Y6aLs0VH%ay(p=SX
zsk3-X$}GJp304tkyj<gvx=wo|xZ~q@gnB}tYEspkS}hFtO6>vVX>Ie&!ai4*_AKje
z?95$6MaKeQ+=zjE$>NdOn<`=sfrY}PVbPB!=e!)VZDc|NOOsHl3@?&o%iDk@KMmB|
zVw+AbX}hZN4f=4gDF4}|O`Ez=0ePHazd7o)VPIg`xK5h`735y?g*?R4txri~0=Jyb
zsSl&=MgMoR;cE6!R$BNldJcO@)e$t~`_p$u`NlUg6i5XO0|&=kAd2iuJ>s&M89?9-
zLzaoEJVyPz#W*T7`f0o)vGk!?C7SE)d`rGnVzxJul&4j2(1IOGc4ruh#~dXmiHr3M
zEW*ry_5AyEIW@ZIOT#inV?#%Y(j?a}S&Z~68md0&r|)M6grXHcvE{QeUFXvk$$ZqK
z^J6a>Tw)ibFj@U+6|Gf4VQY3zb{faao+#PJ?5q+GG$Yg)8YMW8!=wU^|2PdNeII&F
zn%lZp3U|I(z1wzo`yFwJV5AzLU2i-}erdA*@uVXxuD2SWXD?@7)}g7TfKiPwT#2{F
zg#d2Om{U9UoVF9!hLB*|EI6R%!k)n|aNht)h|IZ6!ruXqm{8Czjl;WXxE=HH>e|GG
z@a_1f{6H4s>W>#pEs0_{uir>Ob=;OiSFODs?y_j2@7kon7N&bYNxn7#&ty~2hew`I
z5_V1@0escW$3e~Hm|r!h)KNTg;kc$(ojLNx(t`}<0b-eIFIWu2b<MRHOfOx}9E`QD
zR7#Sbl2#e8?vmK!g3Bz#CsuC3d#Xv??*9<@W$8^$U<b`+t?X1NG!EuSU&TmLUD4ek
zgV~<3mf>`959?sh`?`|E9F%di_vQ4}HOHm2MFpBKAanI#EOFwJraebjqLWa&rvZkG
zmw0J{oaIb-<5pnf`WHv)MCgR|2c|#i&3x$JX|2#Aw%P{1+@qwI8?iz6k?}Bryh~!>
zZN8t!!p*%diy6mOK-Iz&G4Tcuvu=E{j?!6ddY7483!$R&_Qm+g7G5%FmPSr>Okqe|
zZ@BNg$Bi`?$d1afOii_<Pqr%Y7raREAP6stwKHM$HjtdU<P2INR;&knpGshLWM*47
zD52qD<9-P3QCO0q<I+}Ysvsxh-$JCeRN!rEsLb7Wfb|eY_CDR%i~1Tt84&_&Gv!CR
z56-K&4sIJ{JDz*)!H&{5<@Ifn8w;Csoku_RCqc_Nzo-dq_$n}GxI~W5PFj#Jz00?J
zHImq1vFkRV_t~=XxLl)r@9OLbqzT0ALiTCGgf!;Xl5}5+$-boRPOc5$qz^!<B0x~%
zmy&a{H40jrz0IxpMKspxkjYtT^xRg$>GmiUx^*tJ5G^4`l+O#vsEOXBE%G<nP1bZ{
zCv}?7rB(1A_dPkUB+X$eo^1HWa&Fnk=2+!(oy+`2CDI+ho2NEGjWw)j8rlf-bY|Km
zmDzDE*CqGgrUodD&=yJEinX<vYR90H=j9(^xBHR(foO>YAHzW18h;B#I<%3ZZZZ&Q
zlVZK(6sEB}V?Fzsl#O@4%JSpaGtOZaey7=pmIbg}l7;*#+C7nSMd+zlm@Mt&E*k1-
z*?&@OcZR*48Bg18Q~x)B)rJ7J_2X5V|NQ0#gx3G_k^h;ozp@nK|5-5q^S=BavR%Z+
zkBfhKh@6S6zzMr=z$&;e{Er%gkSS6Sn4S`uf(p@oe8<%pZI8@&BqsP-f(Xt+$}#z7
zXWE7SKQ0fvV7Lg3Kt#pH64D^=^0hzeIYP2fVYsiK04zNad)g0y@f{GbDO6`M0(lnO
z*??bJEetCe{?v>~!U2iUe5(=0)dt2Kla^KwjRQ*{ocP7`LhoOD=v<{c&mRN-0;+81
z4s!Y+Q?aZql)!*MSW)Ur<E^w*IdJ$sf>Jp40aerQx=5@SD4(&&-9tiqcSpKdAy3^H
zeK2ffQ;&OCi}<P5_&6s`w5OD(0p~HwS;aKX2D&(>vBi)XY6<v9wF-ay-M|=vj!%v6
z?qhh7D|AIZ?mB#5@<UI>%6%j#rtuF4S^I>|)^64u#6R110gq(jardvsO04=YfW&h-
z$qpcmPw+6EDH46IA1{P<NQ^-k`7!l7V&|$*X&a`9e+*PRjY&w>HwPs@j~^5~c}_Rs
zj5B}bz`I2{`2dDD#y^r2A&|LQ-<--@vmAT6J42!;C63Uny|sf<Inq(s$$g)V@f+Dk
zAkZS;zHi;pxuQ}4k3`4gCx=i-uMvuZ>iOGeC=rOd(C4Rz1>?`%bCQwc*QKRvD-@yJ
zcqdy?Ps|KdkTKlQzN4;AS=p6Ty6o;gw=OX5>Lef&@uz}b>Sa8P81I+zJ28`2yk7|9
zz=ymy^S91q(SH}fIoAD}3z`?Y5R|VkCnD6KSlPpCLqcnb3zVty{V3uLj9yJRuT^*O
z>hH&nh0>k!>JSmKu7n;x<Xu6jzW817B4kKJs)3V=NVAym&=;pnA1=={8hm#juB7IB
zKg)gY8&DaHch$CTLb~jAQnBjK?2WJte-sLVgmM7AedMa}&k6ojZcJ4{Rx5TiKKJ)M
z{qs|Yi~I8wcMyimn=!xZ_4jXSa^B<DgZ3w-)<|aT-v9jeVRbW}^uA%$f1@#wbsC6(
zC=^2<z5o5n|NN;2HS;5r`Ev#sVLmUQubLeBPdH`pos%&2k3Zf0veX8=#!YM0P`T{X
zu_BX>5^_ra01~dx34bC+{<+;R2)3kB6!SBE`h6uc1WqC5O#I(^rZXatSj@2ecSZT3
zknetVz5M@v25yH-n<0-4w`Rlq?oFb@QB`FX6%9i}r1{&nRQdXKAJPJa{BNd0@WfB_
z^N**)PWY_WqQ(Os$*Hw+^*sk~W$RLaYcD7T7eX~2srjgq+GjPX5H%aB81cBae5#D>
zuVzxq(s|(0r+DI1Yj@(76kqzRzWFVkQ|_<#GcQpGgP>no3?RWZ02i4FEy*J1_4|nG
z(xz!^yyt{C&*5KMn7gw%hliBKwtybnS>f~m93oXZqil5Lv>jA(gy}$Xg`>h6)S6XO
zyo}LV;f^}1R+lLR%lmqZ81lu0A8?nqC@dFsc*^Atoqu`oQf;$waF_+#x2h7F6qN)$
zPqs1-;*H}C00_0DuqwITc7mAP@RH3}X{0QN2ub6!=J_5Y{qha-F9FHUl4`Is&jhBN
z#)JAU`?mxgdzM3pW5+u)BV;14TrwxSmah9M+|gAap-&Uule`0lMdwZ#3ZwA2YwCQy
z%8=wRwB-2gxjLrj3Bn!8Nv%W?mPKI{k^z(Pro44!Q(5Lvpm>SYkB&~ay`aM*#k}kO
zqk$o`bwa(&*0hsyVIwx-p{GTjj6EG$Lyt|E#+2%QrjsfpuQke~M_5on_m8QXn6%~@
zHr$H6KBZ_7$jOxmhI!mpeOGug7;(eWVP_uw(R~6qPWk>NQ3hRuEZ0VZa}gFq(I0E?
zFK1-1<V%SupBD|0iBmLE@OqrUED@(b0-nyfT0)m!Z;oJ$9?kNYqr-YO)FQO!bn^{E
zs=mnUiHs`iEdRNS2*tV*n7D5{CxI~Jo0*DL9YkuoFLlNl8*!5#N`(a&elE$<?k|dT
z?c8RLJD{C6_?rE3`0;e<rn6)bdMc0fZa32CDSU}pk-1Z3GbV*Id?Yp1%~I4m@03;8
zrpc2vP()>^)|*_jqF<=O?J1>kkeN7X(OlUi@y3-#brbG6fqZILj!m3ieg*fX%}t5?
zMV-TA96W|M%x|Se-#X`sH6PjC0Dr~+9Y*fXONToSZ-K^>2~csLAk<_#1St#lsnY=R
zR=>judG%f0w$8-8KkmDJvHOmHAyTtwKX-bx;fV^}!TMS{`=KPQ^y^b9#3SMfS7dw5
zFxq@3pn3k`J}!L|Rqi;X&*u`6WcB!o_1nY3(b48A?{NhpDN>tNt~WCz9at(G$5=bk
zEl?qO(FfvdIhNJW`ua&oH$T4|K`o7O+}xkv$eY!>+3#=U?`CAzK7h5!hVm-6VLfrN
zpC^^2)Ss;?_YKAu4aNQ5ilFtL0M$9Im3KsP-Oyhgvl|310k=UNA)-ZK@r<=df<=*8
zU`#*gW|;Ivrbp2ep#C{Ul{E(>gI-|yPf7cPC`aG2;Kbf`AQWOy<+X1gEpj2WU&E!+
zt+XzwxQw#$s`iL(g?78wRJyrUnzcMMIy9k>W`=uduGTZJ#a6f}U}`~2PnPYbt=KMv
zNC7xrGtJxcNKBRU2<g6Mca*j?zYSMUzKM#?a;m^jV&Co_4WO%Ayf3^p$z|?MVp87k
zBk6Vfbm;@IW+;txU6w-{1f19It~!9$W-j7FjYvQirOUNt>k2vzxgWF`s&+4ef#Y;{
zHq^8VOagF%citc28-7fLz#Q2S+cVS266Oi-C&1>8T!v<~T|Zi_d}muv)|pC)jk`4X
zW8z0zaX(wRN3SQ-(xqQIPCgA>bppgplsi>*tSKc(3^bTjH7qS=|5S#2!qf9Z7*e~?
zvedP%R!oT!#l>+}qr%Bn>$aSqwZW_`<|*~+Thq~%?h>)04_?flaR%}v`<Z!*HBHB?
zsyen84QZWL(!J)lK2A2R76pbKh(CMTaYG!ozQ6&B&?BgL-Wp93f&T62o*ec@zuBo`
zWj7W6H1^QAD_HZ-|6bIp?%^v33;f<>Xyj)AQ7QezccAAN4{om%Tk*cv-CiKu#AdsW
zdJu=R-3KAZvgyfcvyN4$WElahy-*vBlau{c0Xp>A<D}c#TWSj^z$XWglij-Ou60}_
z$_$^y@7jVy`?wAMO<B+lP!e|qDd_Y<+ODI!FG$deDQ7Lh3c%xcY#Tw$x_ul3HDUmJ
z(<Hf<N`_1Uz?y~|>s=4`?y~?_ZEn-H^TnbwWyfGDZLVlF&D!MNBd{Pft?(hOiS;h@
zC}a|dRL$)zBkJu;yZ5OVf3uLz^QF!-VTR#+SJcar>i7F<QhoF@RZa-X?j`H~Waj*o
zEo1v^GD`LF?5n9k4!*Gq`9#HB6nW<M&Fr&WW!L?&3KbdKo6iVlf1kpHsd9Lw&N#}I
zbzr*pe~^vh_2f}=8Z5HZ+5{5Rtw9l5x?}uH^5uZtu>_&?qV=y4<*yMgi)FcGi~Hd~
z-a2p|_cA5+S^%g6-`A;GI3g%yVoy*Ym{*AK??mxEu$wY4*;`++D*Ni-tXbo>8nVnJ
zyEyodE+FU-^M~Vchc|&v!)SjGut2rX>YL?`mVOO4w}8T(tuRdwLsIXoc8Iu~KZ30%
zcpQoD)K~$o5$|fHi<0xctm_(yw=BNb`LG-dvgte~^C<45aQH_1q`y>Do*p(Zh|)u^
zHdrr8B5810)9U7<hc#Z7X&J9sz7AyfDn$-@p3w+U$+@1APGJM!t^i{Jr`(<$e7U_>
z&#pai{osnpPm7kOb_y>eUHKV5?Z%Lnq;*OAP74eI(V)bC(0NhjS}fr}_UeX*^5FOM
zN12}6TE$k=MV?irx&W^lsqF*+{Oa`f<HJN5fJ?znVuOz-0Tldw#SCmaGb&ZeF@w~D
z>W_GK7m+p*f=T0H+J>?{A_H7D!H)Fi;M{F}9f8LuU<okunM^SW6V7LfX<Ng3o(sFu
z6(b-yID$x+I&9wOBco_sW9$qkAsXYlOH0>jn%hRVqrByCYU%(knC_G1P%lxR*-F!K
z61#RM-|3kOGMomEweakY`Q!p<VKnE*q3)Mtc!1<q?9iJxo;9C|Pnfnvs*o){Y&^$-
zUryX=0bI2KtJmWRxnzIFl2QsZXD-$yxuC9@C_hTpMSK#lX(d1)vqbP>a_3)z!egK9
zQQv3;l(c!EO7rV^Y6OI~!5VJV1{||c5v4N-00KH3=q*~`GHB(f1h#Pv20xQw1EnWr
zVJey0ma;PfjN=K@DN%OexFz5M*X|>7EoeAEVKJ1iRkkHC0?XJM(VQ^#eGW1+Ed};w
zm5OZK&`2tz@(K9L%19t4!<i$j&1~7RiY=eoSDuk``dPh&(cDP;oHg^eO`4q~+K67?
za`vKTZVfAuM}@3DA*j-7qvd=#8!?os391etQT^XD44zq}`AqX3JW*REs+PR2O)k|@
z>Eh%;(yILJ59D&JEWwur%LnJ`5C|7<BVej@zDL;e8!bZnsQ{>KeU;xi)TP-a5YL7w
zWs{os(!5y+M}^?ANd*DF#*JrEX2Ok#{f3OWIxxxl$;lAx97S#%7e#@@*lFX)fbMsZ
z{~Tkf?-9XcM8UcgeGQa~H|<sNnGrRcV$~<yW|L1m_)t{Z0Le<8-~a~MJ@>fOt$CE2
zYXT#mCWXjX6pLx_O=P0T`E%kYL-z<*_6DX!M+{o)tId{OIV8oIGv<MRS?vHVgKK^(
z3TGWevf^gVnc4X=O>Y`iDC6$yBxq%kvaD@%$5^HN)x;<!t$AvtUMe$E;gzz>Ey)8P
zOgd5n5E&3xfYb$->K@tVYtet&Qzal9RvV(mpY{jH56R5cXo6yec_}mFV)Z;oKXxSC
z#D2u~o<J0mEfBIk`w@p0;e(|Cq-k<nCoWXEM!TUY+y=6Sye3~=Huj^?h+PQpybEon
z^2$$wJL_^}hY(#6>1TFK_9t{c)O3YRnIl_}w^>YdcsyvUpU-GcKSAjD`AH|90{J!j
z94&$yv=_fNMvRg69*IcC%}!y{L6gLa)i1mmmZA#bkrpDhXUmI|T_koI7o+Y7T@3t;
z8DuLkKBi$(en4(->E}kem9}?Y^9cHkdN7Mm(9D8*|FgT>E-m`ztrGA<Ubg(Kp;D?c
zX+~DK3d0RDHJ)fOHHNfK5Cq5eChOJ4`G@saX7^ARX<8Oja4bU;yNp6dA%E`ZYe%c1
ze{DqCr1r3=EULE7k(M_sd|9qd^EiS!bDmCSM?OYuO5PxXlTv4k8XNU0e&xc~#;=R8
z-<0=uytEx%oDQ3@4<E?(yi|B9SuDQp$~9KH{%wP0bM5DVW9>sa_vX;*$z!={d2_7i
z_0FXEY1>}IGOp?H_F=PIsdpBKZW9Z$hHoqQn0_oa(%rJmx?rQlX?DvuJQB-ENGa)#
zpLsbhjz2-%-|j@i!osA8^a`p|MdPmNg58AaIi{3_47Gs1r<s4`v>o*Bx4gcJuiucN
zNR|AcwiT)j1d23{FpK6|x=@a=f(t>{G?{45q)M;^Sye5V-)=;pT{2UckIPcmir^i{
zH*;^~B4SxPYtJOXm~vfq-$!2cvkWr*EjH-FoKGNmVytCdffn<>jJ+d8k~%jUhNoCx
zOlkQ;Py*S<Vz1zlPsGZemqTUDum;^qW+leugI;*@#6``!oElQag1OJBE~L|EJSJO|
z{uI<Oc8i}2ds{Te%9G;riw5^8voJkBv9l@eUx+ha6k1)$^hz()g0}H}o@7-=8Se8^
z{tQvq9P&(Wlgez$$BK;pS&xHqSF-B<_V!FbVCp^IZxCqF>%wret+;kY`GqO;1|@q*
zEv4c6hY<DRcQOv^k?F=77r`e|k}y4%Kx>-!oq^*2TJ0p)t$$7bPAJ~sblK75y!YV>
zkp&kP*L>-T1x{lH$g(N}TcMAhX@eFn=U%U`0S*#9|95bspR2FF>{Hkv2QU;?NHc+U
z1^UmvpfvSY(&l~r8!m7#)M~C#FF2ugyCuDJ<E*N@n`<w*`LEjW>A}gZslH9SysOjP
zzE0LZvOK}XH78Txz|7QbrRBVqnm(th8qcPjy<jC$b#8;0-^CFAyF9m+ZPocAFJ=2K
zq><P4htT!y9%1sgk(wh83;uCr+-ed#I>#ur>$U%t`NsaW%U(HazF6?peAC}W;^(i%
z8h`cK7VzZN){UydjkEMN%$%Okn!jQ`^OAa*Ul;AQ{s-S*y!c)Re3S~(&2PK}Tn;yJ
z|LdU339jcZJ4~9(#Mqglm4g)dP<1&hQYn*X?m<eI(ENsP{a53jb&(UVPCSV;mI>=+
zsR#o1;R}hFVT@%Qm<8N5v+>y*OCyXS7Zr{d4`o@jg#vzH-rbPxmjJu~fJOV7jJ-SN
k$jJtW$jF&TlMH{?XY7mOe135MDh43%boFyt=akR{0CvvtV*mgE

literal 0
HcmV?d00001

diff --git a/docs/img/pycharm-with-pyspark3.png b/docs/img/pycharm-with-pyspark3.png
new file mode 100644
index 0000000000000000000000000000000000000000..7a4113dd4e658d8c99e254c1d9df71f3b4a5122f
GIT binary patch
literal 15981
zcmdVBbyyuu_brGV+#x`4x8N?pCAho0ySo!45L|;NxVvkx;0X@F-Q9JX_x+OZH#2vh
z`R_i@C5N2us@_%ARlBSAT6+<pASaFlj|&e528JXlA*uuh20;ZJhrvPv|Bp`I;evs|
zk6DU{C`gKk5Gy#@n_1eJf`LgyB&EQpDDR;Mym(KF2ulj@qI!w*faiq}3ht?@2%}3-
zlaPbZT5FnuFfq}}!le`)T%k<p!>Jp@s&Ak)MH6?7gj7_Pp$HA#S3FNLId>k-J6PS#
zp6_0qce0wnV$Gd<*JU(t0_YSg$XR}dp=HPZ=z2mv6+|%zd|$QUkZ+{s;Xy5EqV=N6
zzYO^^q{Egk?qDVRMLqZQR*3;j0sT7-FAXQc8ixtr#wUtqu$*fWw;r+Pb+}`}R7xRu
z(c5lNq3qdDP@xP9uFSot_&uo%B$)iV1Gyzw?A!3nxB*#_^Vl}D2F$Y%(vUt_Zzm{x
z!tVp94MRgiS*ujA=_JoToX<s-%!aa^3Z5wZKk*8LB~b1ZJIlf^%Tr^TJ7K+Kt6y|d
z&?<AZhr!iIKu9W(f35!U#P{O|a$GI7sofkKf@i{W0$nUrpA4(|emd7EZ?t|mFNd?;
zE?Xfy<lH%L^Wm=Lw)E4t0?l{=niX2r_2DmO(I#g4njfMT8CGkz?hrt~949lVqB7r)
zPDB;O;t=UK>ykz$&W>Y4-H#mN5es-HU&`-b)@)Tb2kjEy&2zI}U5J&XWlVT@+VXZ}
zzeLGLE$U|+kgZ5(;4s|mm_rX8O|(&a+Ay3ujrE2OpD<-&;Pa0XV}T$xL!IvKnlbtw
z%p$R+Lo9=Ipb_?%_=D-!7c`g;ZI5ihQ+H>J)mQm^Yfwn;r*X9X;K2xSz`BH38*V+o
za7V#@hG$o&S%3?Qc~yEss#6Z8*T`O@L!$<w=YS*SARq?fR6$>N;oLEJK>DL1eivku
z07C%7h84t$4G=S+(SZ6S2sek=5DZ_1jSt4YO3j7*rQ6bhPzPeROV$CK7n(1Js13Sj
zmF#UWF)U0R$-6x8J)!<s3KJA&68RrQ@E=KYLDV=>!J@H<TJYt8QaMUR5DO^N!U95t
zITt@Mi}1W)uY`ktzTZWN3X)_(nhBI$)nvjj>n3qv<0X;{c3y>OLS2SU=}|pJYl7oP
z(GNcP&Ub3<0fJJ1r{<9&GW~4y)ko2=NL1yWRFGEmyAMgZB??GIYARIw93?R9A@@=J
zUyCiE7jTcFk%b3xns@Opoi4pxpjx1pLXHJjKHPi_`?bPs62;Mvz7c9@yv}TyDwawr
zgR_mgO}<UK%}_veffm^}QC*;cZG*}Io*u0mq#LvJ*|xXAu(jHE4h<i<FtE1QbM?Ce
zz79hLY$Jv{$Q{`oVK(L=M7`@^?dS~rR<sSa6IVa9vM0N5<vYsi0PGORI0%m<s*8+{
z(uPz8Lko@*_9=w<EBvp-U7;w-0t|Ju;RvMO$X=@68+dqlX!r_vG<e~UN$^|n18*2;
zFW>o4DoS9H>nov4epry;k+CA<BX6fl7^F0@cOvCZX^Yd5?hy3*@SG>1TBm42d;Uh9
zTJ0SN&9aiuH&F!^H6t2&g?9??lzDPpL@XuJa%{dHmX0XuRp=D^NZoauhr_G($x+Bq
zNSn4xWe#B`rM$uK3t5+6Q-r61p~j~6Re3WVyDz<^G^<crj9vW1MmL(+Uywg3Wg%{<
zTW6^HspOkgnN^2X{yxq$A<ri}Ve?7Wz4YCIy_;SADVHe?E1!B-D<rGNsSnfjQ|;4t
z`+fVL_uZ#EEtuFI*;&{cEZD4hr$w!`EIwE;Oe}u$RCJbzEZC~(n|?RLQ*kAMH`DMl
zdP?uBui!(=b$E1zTu!uxdEppTCW&rKMWbEIF~?GEOR$#;pFCggGDo|!SLNFhL@crd
z#j#xj!~^RCn7!f4n*;Z~7@Snx-nV5O7Hl3?5^N^81-N#sHk|kz6Rcm1>!ym9cjK75
zQ!VM3mJJszd994B(9LUR^5!@5i(6=Z*LBq`Uq;}}j||fdlnot5bI)X0W*K(;ek_2p
zSu-K2DynNTgwy8HhNpMecGEU2(<v8KQ&qbumz<$H;63m<(8FtyEgH2-H%sTNjWVMc
zTVN@5o^jWv`YrR@>i2-#n^uF?X}5B>lSBH$=If+ucl;m7;K;Q2TKLO6T0G|jH(dM<
zUwK9ywGO6^FGuJ5G}jkbd3TLA;}2_wxA+tK5zQ;i_1yZJ?wcwc_>S)9N9XBhkD7iH
z9nT!&9J?Mfnx~I#k5^<Z?HI0hpUs{8+88{QxnA6v-^f4dJ1aT&I=0;%z2M*E>tZp_
zuZ^x+zMZ?5@R7TZx^KB-xUjttxr4gLd~m%}el&iheawb>3(A9<f<c7gfntEOLHG{(
z2(pE18J{t9p07V#Ge}uA?Is|}i!F-%6uXm)l^g$ahWj*Qm0Np<H_e(?ovo1@F{HYu
zA(Rh^6KNdog#rQ(jZhKijC6v*$<oC94ZD@0itf~aMfR@juNdPIBNOJMYN=|;>iX($
zj((2$j(lge>nVNu(m!QTDeK7B(Ty=sD56I$vKxK%{Nt6C6ers63N9{o^fhyuiJiOC
zK1v~QBgm5*$8W?R#CJM<=Tgmxd*e1#k!V=K=niwkKAztAne)fZYRWl`5}%T^vVMVQ
zo~<n28xOc4kchCp;m1>3FNOQ%%U@AdQ9U>tX^v^1HE+u{%aln!QHxB)m8WNBFVQ!=
z!H)2DT6AjIL6jCI56g>n>8PVf2}S%cYid?QWYX@L<(cG}jn&7%6IU6}IMmrOXLQ!m
z2MGlZ1(TAK;*e6uIIZ7eWqHCfQ}Z;OyG1knTm5YXwa%|j76;SOck3lls+|=Ly1Fg*
zJztvG%Rlmbq(yk}gMG~Hkf@eWa`7*JSZEjc>i_0>u71pLY0Pyc(z8!p+D3Xvs-t43
zqO+yjPUJ51>~k?*D+jmzLM=|KX5HN~RwX@q{=0mrd|&My8{38cIkmQ<p6idlD-*es
zkCucMNq+xQCe&K4nY7s1&eW#`VY<`$zQc33Jke^jcHy+YDd$`E#=YJ@q@RB<&30G`
zehPd@FOex3LN-9zB=Arq&|dP4^{l#%-Oh~{SwW6OzL^r8ahYnFnwhbjwJixdFue*E
zkB?sD-FL6c?`|>lCu^_Xe9J}o{(8S~Gy9kTg?rgA$n)!(`9`*TW?mMZUC<XSP9#o*
zFWy?{S}pQW*;e+h_I!7dql{tl2iX<8-@MEwHP`3A*KX{<kA1F-_#HY{n0aEYKlgkq
z{P^)b0UCiWwg`h>#~X(|VXD%LMbnt!!pbIHZr3Ew`nxX|^#uoVv+~dTmj{CkI~4_Z
z44hhaEz34XSxdgx$+P7bjZb#^Ej=C1N5_k;OE*Txd`;Odojc$s-NRvNC@cI!-fY(%
zTNon=>xDz|!&#OBrT3$cjs1i>S*-1T?X%h9eEy3AN8V4Zmp7G99~plz4nbc+Tj?)!
z^76f9^0=68Q-7-Ls~iaZ^qhMhqia91{P-AlLo^$<5%#bmwH)24{i3(=w1yH*v>^Un
zJj-|Fo{K~4QF23h)O>qqjL}gax*yB8&zJti?795P`^OE{hUUhGQFI6^H+)VkRAB+c
z=`j-v3KenfcUiFTi-PSv$-shVa7wsGs;n=~RDC!B!M0$%WgueMUrFH{LTr6_0TIm9
z3wT|~$mds#aRoec@>`@NljPfzx3}H|U=HPAJ_%7VF-L?%FKVCPJfH?NWE67~g5gYv
zhQ}<3PG$PG`J8g5eK~t^>F=~=QVb-!ZGEPiQNl>|Kcb=o0$d|g4M{UuS+IA&F)SDa
zI64?4a0Cwg0}HbR1O0Oh21W^dgMmTDhl0TX-{`=vLJq{AS_rBf$UnzmVXqeoDT_!-
z0^iEUPNt@I&KCAAL@a!wKw!;hsiNVcAuGdUY;Vh8Xku?<%HUz^@M;3a>%jvY+M2o;
z5_{O%*g5lf@R9y=1rKoiTFgjF{LdvW)_kNIvI@i^_D-h6><nxSOr-qq#KgqBP9|nN
zN}?bBV-B40ky^O8IPfqsy1TnGxU(|YJDD>wb8~YuGO;kSu+Rfn&^vqDxfpuT+c}f{
z+sL1GL`|KIoh%((EbZ-xU)wb_vUhdiBPD(9=<m<J<23cK{BKWo&i}bB;076AYZ#ds
zm>B<8W-gXy{~u<rHUFCZb6@{<$NSnCkE*k&lZd^ot*M<0|9>Z!_n)5rZ<YTY=f8#u
zmL8@y8lskfr896({Os(^|FQai*8DFmHUDeL%*y^hE&oT&|FC?$1s-`POJJOauXD%`
z^!EQM`_J{fjIR^?KPLR&)%>RvSWbR;UdF$!!4FS4;tRMc(>F;`Ar%ksqfD68H)8kw
z0tSPIgWP12Ado03D)>g=Thv$#)-X~tKGZA0v&ZbUD2Gq*yTz*!q=iL40yn88V)KGF
zGg2r9o@TWlrhc+cPA(;cZI$n678KI@K9%lQG<K8_I@vBXP)Wa)2mt-(Lo5cnDTnsn
z0vawB>^~n1h&NP+u;hXe|9)u10@lg%zXbRHy+9$O3gz!f-W%{w35iz7xFY|#o?OuJ
zJO3YJ@>nGhwVYB3w*}(g4*K98{&W>9D20~8C0V?k!~C~DpmWH-C;z)Uw-gnGs3whV
zI5!_`wb3j4WXuf=MUvd1#!#BuRq=vctE*((9GIUS=YxUUg$srkNq7_@%6QC;AWeL@
zSV1GWO?2)>ZE87m>CAdfScMD?gHtAGP$HA!I6E{taiDXmAQsiz9fyN0#L4Cza`QT}
z%Ao(OgcBmTmz<M~a>4OO2lV6?TajT2FwpLiiGTz<&|mQkDfdX$MhZuAg6zU(Rq6Ux
ztl$`Ftot7R;LGGhhO1)R7w0d63z=lIp=|;3UeoY61F+~0WMDCagOS>UzX(xUC`@^Q
z0mj2^DzAqSCX#bPl-{7NW?>M)6aIH8A;1SrGmp|myj7LDJr-iMKtm>nao@oy35Moq
zTjydRicV?a^02^{TZ^X&>pqa%9zyE07>b7@1=cC$KP`TDKue)kmHISeL==f+m@Eu&
zM$+ZY$*-enwFzn}!{C0uO%9Gu{Ld#G5^x$WLDRUu3mu1%!xfQ1;2i(QHF^WV`Xupl
zY9#(+LLB%5P9Z~dESg*LkMphz0-lMiph^D6wFEao`2IV+<#S}>(LaKq5H@dC{8M}%
zuJ<-6q>~d{Tn~owQ}o__gvWHEC8a#0lFv$P^StQKD-*@4@qh6fMRpC+j6kPA*W^1O
z{dc5+iR7r&Tz6Nq`lgS!XHJi|j)MtQF>h$qWb{|F_`FSa#xl<*<pl<jI2Vbp4rXHx
zJ`(Z!Xz<@{Vl<zAN7(vATM=%l%W5`)`Un*&6bucO^=bFL=u)0!f{&+mgU#N1ej*g)
zOkUFLm;28yrQYp%JB+v?29pJ{HFiraITu`tQcqb@J)uap8)JnDoln>4KBwI<=lyv0
z9xaC$RB}Ha?=JF_@9DotCB5?;GDgqj_tkp7-9Yi}3~tYC;&<MTiXOPgby(}l-esl8
zAKO`O_ck3)rjMspXDC)H|B_c&C~mSC2npkZ!C?&!1^*@%9`oI<vYczK?o6dV<87H{
z?KXG!m{%q*WuOI|D7hT5On)>%sn}qWx*pBw7j}$A7ubPlf&{ThtUVzD%NDmIT+TqF
zkip}nRw#~otLb+Igm1{)4nb1ntF<*XpoV06Z8a8F)~n#9#NaSq5EkKjtJ_gLi|NoV
zOkTwJUN$PF2(~ZvZ{(xIqy3XOv-Tj+&|<T*>E@3(T&M*?!rJw|$b*y7bj}!w44?bG
zLI{-Ar<;{*GSnVCPV+H*`PmxBjSqXXmD#|!KiuD(F#11VWb<vYMl{&3RPgzuQo89)
zh=d_qt_26SIBxVibwgpNmUYNa`R-2@7iFVWQ^O>Dq){%AxI5pv%0qZMx<9B)8A_sk
zIK1+z^1Lw9Y;wrecaqrzil{hOd}_C`N*FE&#`zy)oOZ_O?=E&y(pb%h=o=Tm9-Y(1
z>PryAV8Ci;to8>EfR^Kb+jr`gsFjzT8K^}DoRn)fCcjfH`B9|!%|&ZO@Q_fCC%8v|
zvF$V{nmr3X97OiE>^ms<Dbsx!r<w__Y`_W~x;@+fSyMs34t$y1L#oRHn07BUlc5mt
z)g||pX%ynBw(T$0<Wyct#6Ov6>!C}Ty6^bJZ-)Cv=qHMzI%mtpV+ex;LUKjIhJZ=j
z`WC}Ku|oC)6AT-&euu@By5c;}=QK!>0t=EQ2iH>y-^FFO0-1&pr}6u?L=WWPzZ2{d
zH2HvMq&NmVE})zD&p+im@3!$YTU;zf*EhdLGN3=EfiYrag?V6+a!s>)U)vnf>@Byq
z`i-rpG21S%E!{w#5GeBtND>#K0dryy14qbln)=odg8JoQ<%KOB@XG`FULgqsw*cOw
zh5E^zZ+8P8Os9f<O3`P+f~lgUev3ANs3Lr*l+;8kWJn%h8@a&-A^6eE(jrVoeZ3^~
zyh=Okdbqwq94}GYPr>+Vgz>)_57#lM1HY5IS1da1i0KB0M94b&6|UNP-EF1R$Myz9
zq;1<aE)`5Z-fqOO*vv7_H`u1@_?-1k6v`9vC4^CDayfoA*jtnLzfX{ZL`l}wvNk<M
zHoA^<`YBKq6~TXZIVpdT#ys)e8@zIavGeik(rvrfRlHm#k0iBrER{;eWPSI;*?wuI
zRc<I8__4Hgmv2OVgkUbh*=ZLfio|#pU%vCs*bJTD0I)REkZi~7BkQzkWkoQ$ux_Wl
z#Lqn_0zXVsbhj^za>3}t*(`nvdP{fBF$n4i<9}gUsIw%qw6yHFhsUBT`WSQ8WV6(w
zs#dO*e=;zUFKum)kbH4CM5D##q4{$R+8KEr{Gnrng-sMJS^qE@?U(qeOmLAH?qMaX
zG<!i?m;4+O3R;_tovZC!m5{)DW^hr#l_09E)$ZVqE$2jbuS*NdsiHVK{SF=PvN+^%
z8dxG;d#l-s^6b{8WC<JOHfTSpKv8tq2U~(Be#c2R+l8rVxjZ4Q%?Q;1!S2;KYD1%B
zjZ<Pyy>y8RDEw2@;L;foV$RKyd-iirHJn6n-S(QvSoSdR@K17#CPDp987flUxD0qB
zmC8noy}XRZdr0K9OG3}G?0v%5e6#Fr`XiQv#q->xn;7xe&vI?fyYpejE64|6U!Sn7
zPILxV_aN-${Ye|y+n>Vu^)+FsfvKVdDEzlMZC5keUNpyN14Off3Om5<U3HxSC(;ks
zN2aS?fnLPOs(#e(nSX?|aP;!s?|$<#2zojV$>FySuqE=l&Am$G?-ztNTz^B1OB}C=
zk}R_Z*To;6M5}JK25yK%0xNHs^R~;A?49s5IEev0BqXgU!M<3lAYGY8weSge;9MYa
z*K=fPWrr5Z#HkgrtpRu3p>20Nbf9XU1ez@a^viUq`h#p2D!3uMJhX=3qcHhe8tA<+
z)TMlQGo|d8-N8_0-Bj2$tuv!QIE|K&t}3r41)^SfQ5X+{dBkA_)Sg*g_skBgI*eX@
za*p(tJ4uK^Qq+bW5GSl1)`(RZIEu}p%1i*8gmrsy*;yQ6PjDcNLK`3DhtwRxDeTW&
zfvXn38_NBfj2j}uSj>_sSKp$TA$p~PliA9_X~t7f$!EJRiR5JPJ8Lx%E$82)uJU{y
zAlESrYO|iLn0}zt^FCrfqUh?EtHVsjBtAbi2utl-z3Ih}CyqqccnMPnKYM#gj4GVZ
z?pc-%ypEHQ33zsuHN1MWkPdsg=x`z5LZe<zr`xqiG71=C9jkQc;zLW&b<gSIT?^+h
z3dQ1};I|?4grVG|Ma;YAEpwg*xaSxTM0Y$8WW7NQf17c!1wicqo6sB8{WGv}7;Bs&
z6#vunRo-LnDv&bW5^0W~hx#fr6G;`mGhwy|gJD1eLe+0O48{c{LaN1DjdeIg3#xEf
zhmTCo2&Xn($fcu{@&>w1exQR3Ot&s;)17A9Utd3(g><x;45A<5fLI^9zlEH4G3ca!
z_I)G|G<n_ALRj8p5|M5mRCdlVI4#PYA$o8b^k}jLb&v6nps$snyrHDDEjwV{+TI-3
z*?)O=tS8{TXK+1pC>s>%#&DzS#0HRDf*HgbeBd6tv!&|tkuf`2-g3+SFTO`YUEFj7
zbM<5)pJwZdQ0$yBi0x2U<HHmJ)VF}w%5?C1XrZ}aa4MC%+<gwn_@(+>23Q6%83q5G
z#_v3k;yzs9tj@6D2Z9mS-R&^;f@Q(;53H{HDxF`up^c!1Gc*VTsjw0VZHNbm58pTl
z$g7i1Gjwf0oZ-H$dV|CqIr>;Mi(7R@6!b<9-{vta4Ayk+s8xu6D#oRXfDTf`K(`+f
zs=q%Ha^}=8mCG8Zz?RtH)Vv;hVAp+Z!G=D2fIOBCqT4+(k|Nd8fDGS^X}e!Ax%H$&
zyK<#x`jan)`=Vo52Mr4gOWna_{#^LiQNlN^fs|h%R4GS7Pk(-wwdJLZOEw)1OT*CF
zG{b^HiExY|BZziH2DhvdA+kR8SltXUpi*$q{RNMa2riGoC7a6BeGMMnPIw!v4n9l>
z361zuntyvOhD8RJihf~mB((Yk0(=<}v0~01LI|YJe~d||Ow5&Jj|~%tvA+NWC<W$|
zO#@6CWUK6R8c?U*=g_=g&3^XeL_F{(OGh`&qOQ_(lfyjtAGYz$4TK8JfyJP6qz0!+
zS`St02~bM9FS({%C2lYuzptacelvfCV$$z~@l0H>S{{%BB^?nOR}bMK>mWNbT1~lJ
z!2k>QXy^p8hlPknlfgS+G<?R09yP=o-6TiFhA@or+1*q)cfiqtw3B(PfSb8y*6u+>
zu#Cm>G(ubDL3xbKXJY2~I@0;L#dvRMIVSc1hn#9Fl$Uxst**bflGk;jv^ocR-G>Jy
zyI~VKl4^9fs!=Idl?&wv_=0`6lGH(+1c6%^u+nt)24LcyAr}2~c(t~OO$7v1=3ups
zu-Ij@@AHvtBK@BZ^tDusn0m}~+9JRmgU->IltZ-%9{8CYzE>!0h1FmX;*5+PRUpkG
zJ?Q(KiM4ZMLT-hlkv)#|#>dp#--)&O1P(w5gKn64xo&gC+G<X=%N=ND7;zz)c`H!y
z-};;p*h0yJL;JVo2l4!>nA3=q_lRVYgit1vDBHSKMeWCaTt=XT(@+w7>ALztl)~0$
z00F)hkx%Kt@{>ct`jBbw6iyMw%}o!Zu%)UOp^~Wr)1JhT5$;}%y4s|C@|X)h{pkDx
zAC5d&a4rj@iHX@ba2A49?Evq66PO3VDa;Angu%Aa4v%bqU#N0n+UJi612ptQ!Bpg@
zszw|1LVB>gO}pL5md3Wh3BD1ZSRtU61HSZ?N|JlaM3tw;kp|#}KyYLlITH`r%~oVC
zdL5MC2S)pX&ppEbGDwyu&O)nbzt(yhqV^j%8cAjLLJ=ye;|bm(3fE}MgdT?laf!m(
zeE0N_VoLAt`nUy-NQa7>Ytuclz<_N34R{eD{<y7Xr^D<H4zNC$$dB}^?Ma1WL_StL
z2(`vA=Qpec7Jdx_-;v-gH3{M>eIQf!_OL^+?j1GcI4Yfd?qg|1i`0$x)C~|OTc@A8
zq52Uc-#{EAdEgO)es0e>#K^Zgb&+NSY+>7IiFN%Jp9vT}?N<ibLx(^lSN>|SfTX*a
zqc;|DAl1*A?JZ~EWh9yVNu&s~FG^rXQbjO&<6z@f8hl0`OAXOHg)X~myG>G7^$d$g
z*qe&8N-5SAHSh?FXZHN-5jOqqDGt@_RxW)-=qwZVf>`ltHP>pH<$D=wC$_PkYH!y^
zO?GJ6+7*f^NWPI_X^r!`UndgJ?ki`dCrw^e*)30ooz7>EK{#B{WrBubVZ5oooBMw;
zyk!~@+)2c0=!pa)37majeF9qVwG#jv`(`t8S>9djQ`n?18(OoL8;FGZW+vc#gxHji
z=$JW#0D8gWN|Kj03M|0Z0>uZVEvu^mzAd`T31Y+eiok7^3UkDnQEf|WSm<swdaFVM
zM3t_*w*0w|y(TI{gxF>rj)5QxEgTuxLVmLA+H$e7Dk6e;2pUiZx@zZ{{-6wiY4tn+
zYpe476^?<A7fOlL{865GDi}XFBa=x8$pV{9=5&dMe!U!An$zt&$g<44rpz;A$o&(7
zt1b2W#6qyGKGHec?I^M40z@E+Zx(d4oPcUb;<o399A@4zeu8_yP9tN$n3d~Bm?lw4
zfyN<jg?AArNE{g63kM&#8{m!MO;+W-TsCTigwJ4y*b>8covQCUM;@u4bcOr%Dm+h%
zFYsF9W<AyT@e}z>T{(%4-Ao&AG#@yk)U9q@7scq@Y%lsNz+QcPMZVw+O{Kq!>wO=g
zpfh@GrV#7!4ItqnmZz|9hciF4#c4BDs&GTvgZvVs0{@{gi1|TZ(W&ex(*m*f$_DK2
z#%?@lX%Eis#y-x<|3eQs_@RcEa)ApYSH!vIWqhX}H?n^bu`@|bjRZ*mY3XyiG`vV7
z4QRODD8a=($q_nJp&jzTy6j}Q=RcfP7j592&DM<;-elY#svtH10sIpNwqO+HAAUiC
z4&WDh&&THLi-~MThn|1UWcX&b@u2@$1J#@uFOP7yr77Q^PpFk?#7#g?G&zR}#*v!l
z&5i1g5d>S(06d8V1WaUdTrfV7@VTL=58S5GI{9ICWlW_c-IESgz)L;uOF1~?e`p6$
z;ecVDI0a+Ezf46FKx#$cX<^Cyp~O((fbmEaF{AutDi#59s27PI;qQ>cL%{q=IMA(-
zJ);TUQ)c^|N4&<|B`U?Fw<svIDn+ZYWL#Fqi_I+d%WXNZiK=B9%me3CGO5JCmOkz3
zzBeJ}4~?1ll}av)m!eQEv!TAip!>05hm6nja@W!Eo%7=@_xW%8-<!v+m5DW`(jT;B
z7tfpLiER6--x1k*-CyF+C=lz{QQmFE%ybwJ$nBmB&u?L!m`9DK)LS=7t$l~e^SM*v
zsGcu$K(e&-B)8Y&4;2wM>h+Wk$Vzz2`2+wKx?1z`nHS}Y0@?ID^YJWzEjg>Q8cd>1
zS@rK=_fJbpJO3q4G#E!F-dp_0*l{bY=XYmXtX`RAv)H8Qb#qJ)AR(mEV-re3KCe%C
z*j|Iz_Ctwc%&fL+-}PMy-U<qT)T~t-#<MSrr`Ns?k&3Uo&^wIjZ_-G9sr3Jx()nbo
z!nO_C`rOYS9xX4BNjLhXos{5x2aZaEwvR$+07k9Z6KUh(dBIetL6b^3lS-D~z43gr
zv%cWFxRZ|b(udWNv`$dp83-;m0G7fV(YyQk>A`d`o`S`4f}BRPCeP(~QB$kVBDbA7
zMaL<kvY_4jW++DBnVL#2BbLK<LE<wmyN@GR6Ob^y%;vt^PPgAb+1uMA8MdD(OCw&Y
z9p|T=)%S1zQl6Duq1!r?%4|G!_+qD=h!5~ZlyVu=0IxC2kq9s(e0lG!gkIT`SJ+i&
zH7!->UOTC75rw?aXitZbJT;Zy{&Y>=mmjj|AmaSgE|pZnTq5?Qpp*RY`yv>6>0J!J
zm&7~uXx;Ye7BI$Zab@+k;h27o&CmTEBqAMgIIJ8c-rqt;zF^YHEv;wTtTZwz!&MVa
zJt+%q(>2Lmp9qjY-9O2fsHT(o-n(3zE;ZOTA`<Y3XT5ZKIc+74`s{cD)UHD^Sl^%~
zmLgFSwW6@!^PRaqcdPr!3~8>*-b9D<sfi$)piJUBRjC&DlL|8@Ad?ox7`J7x(&2kP
zZ=O8}L<fKeWWIMF>JEmfAAp{}*c>E#<wV%6XEdZR27mmB#n$r>VrV-RE><l~MkM0b
z`4B@`KAFO(ulEWAuU7Hjp(?M<?Ws$K=$EL{RuXP*xgHCDEGVWZ3vxQ^tACQxZud%f
zJnbP(<FIYdE8IzFu(cli%&AA2FO5CBhsCHz|MGMq&*HqTw#Cl#&CfnRXwiK|RjJht
z)AbnJ^Wmu2=}_Z(;@3uue}&F|bJ9V3RolgwFVD;FFM**Z$6;SrWE`hfl@gWyn|Ad|
zch}5Xv+^X<VV0>?CCU5ib~*a&q)E%|5lu=WA0}l-fuj8iYIf`7<fT@WA=dHB)1);Q
z)bc0kl!~~@jxUVP+cwG^m};bTpU#(E6?_YeD%|F(?5D4?ReZNnWzuRSwHoLuMO~CB
z;_*D!H~Ret<EH_>;tfE)bgrxDeZpqkb!irIl)Lt}?973Hl$$D5S3GX34PLMPu9nf%
z_pOOu+l^+8N2^3#K4n(Fb5>o%Y}@dvvx-w?c1dA>ro3caKD$GT!+!ZhIyiWoKRq|u
z*=dhLq!ZSU1RVVo*vHOt!;ip|ouCol5@+(bDxp;GcYNoxnaeR#dDtp0%zEYTiUO}b
z)D!A=VH0^UIc-W<F4PsX*)7g7zN>nKLcl6wKmM)+pj>^P@Xw~hK`-J49;(ntxa0s|
zI;)qi`0axwBRohJaE|(}pd9!CIsJS`{<oZVS-7`1L&h=n>cct%TlKSM7HE^@+V6EQ
z!qFxZe-7m*o7hcw6ZA)=F(yzk{#ws78-=%0*dzYkc#<%PVfN$crZN7j7y?aT{D8S@
zty%qaj^2j`yCpLBB@XFjuY)K?WElU)xZ?dMSwfFh_<9|8DbEthVm`5p%{Xy8^ux;A
z0RhV5YX-`f$DONBD?(Skp<g7;5wJUhgnc6V{iX!0rsyadKGC$czEeyj85a<c97>YL
z?xAA6D2l?rOXt1Op>5tT36%<`SAlRXz@pN-Z8{5M;`Tg`vPzM2TJ17)MUjaUcq
zFTD~tGV8>Cc$7wbcI!bZ50=*&Yw9DTxfZis@sl~ylu{Nx39%QDz1qi|QnI4bx13Wx
ze>i!_lTHcka7MKsztkoPe%4cmJJipU%aoE5oVn?{0AAn3>96d$7X-9_I7(ACKpuK_
z;#LuXI<vfv>SwJ^;F4ABn%1B@*|rgQ88wmv1*@T3&#O0sN1MWfe+Y89?8f5y)S8dX
zJaSbi7e@1|R_JiI8_sSPs}#rU*})Htb=7QKJ*{?=4Gq_6-QRSoAT?3AuoLj8B<)SA
zKR~ZsUP4&y?T564Doc?k-lWePML_!_V3kdH9!!^(w241MnzwsRCt{a=`J$P9l?Bop
zpH`n$t=)*c+Vi|n8!nKY_l5^NvRHJS<j!vbbI3Z0kC<MkN$xj>#)qPlUvn{^lPcTY
zDsASfbP*DORetwNQBn8Ai->6IcZ03vmTa?BdI0}I?ar(f>Qa%}!!-U~)0u$JZIRZ*
zVtI(+dsXp<)aSjz`UPc-eXsJJrNf&EznSxOS46b)ryCSYMRY3>`u*qAo)?-xHzm))
zu}?9K9Y2<LJIu!oG2Lade4L`s)!bqMx`J14yG93{iwej~^ybb(qY#$qbZSra?1>C!
zue{J>*tq#V95r5rPIEhN1FV(El*oR7)7dhC30$&HRU7&)do^vRR=t(e_6Uxy-RPJ3
z2M&OCuY}&+7uxdTMvKnW8;TUe9VweF<0)@qhy$GL0Ifv#y|*NctILUBS`@*1{o@wv
z3>yvS56AvRUjZO^gemf}m1}WGqt_P8_&zy{g{6j*Af59wJSNWp9IX<$zs5Q_?nC#^
z)oiCy@Y2Z;t&-MNJDX<XaR8ge>Px@Ge9cGS>qfgJiHNih9Vc=yc8@Vc0yMulvJ|Dc
z&4(FPORCO=O}Q`+A}_2u8AZ@hMp)7^;ZQAm!sJzkBRgqP)UdO`LaA&YZ)MMye<VAd
zw9oF9Z4G@^eR8Ggiy)B5dSCfEQ%!gJ{!gxH0{$J#t>BE%$sd5M!zKnzBo_TdB?Pc9
z`=<Z_z^-~|qwQ}jlRh9Ani8=t^LacQ@N>)^8kqRGm7-tKhrB%3t%qSVf^^w(i@AN;
zSDJ||*2@dedT{@FV)JZ3JIp7Q(^5(Nl+&OK?dsj>2hW?O;{DNZF=kAvT!u28=6I!c
zH@o^3qxS0zd@ctGPXmr&DC`VO`P}m_7r*8@owmGiE-x!t9Hq?nev){9+oG-X$!((!
zlQ~oLZ8;P7>9cC4V7&;h%!-Pt9q33PL0-4@54R%YA=p<n^B7<Dxp==7u~6Bj!MWdI
z@ouj1BmiqRi8dQE5(tJ=@TO+d$Z1}h?UvN8coCjHxzwG}paeiuMgdPOqN4puhi>P2
zBx$)c`_sdXdiv?{6gxXW|N6V_{uDZL{ZYyxGow<8$6r3K`4Tvr2jp`ms@CSOtQya&
zLqhn-JbqKs)62G#{1iV?uTDmY31?qzUE;2E(;EJC!+aeE08}U`8@kzH9*1K2)GCd4
zYIF=Yi_V$L7t97-R92AIEei+E5qir!&Xlwknz=G*>sq2CnXOW%p-@AXb5+S4M0hOB
z!2=y{LV%!fFq+^Y5vHVVfh@5-F^|mVSyG;nRG#;jD50l8+=5Pv-t{qGMe_KBt<pla
zaQm-0{T7!NEPL&SL)9RY&N8if#iPdg91hIvrC3}2+Lw6+pT%3U4&Mi*5?Qq(4w`)4
zAaNCKTa}%*r?D9I2a)+5f6VKZWJdkco2bKiXN8l`-Grzikqjlp{`S4}gqECkizMw{
zzF&`*zFLvF+GnEdkjM~vn$#^MxM)4!&$@9@cpP!-cd*e%TThqJHC|Ga0O6R%3$#=g
zZ}ar_mZ=qnH)bZs^owS*Q)Wx$hvI{=%8E6T`q_iBFvJFNyzKp1`3H1qAmJF=c4ln7
z@9*hGhen7@cVHlLP^?^dNvYO~2nmCX$aAE|uZIxY2&ISJHjmMi7Xks$`n%?29Ea=U
zptesoLD=tQ7c~4!vON{Q(lkwtWu_lAD&Ak-X^HFkx{6(eioW-aO7q?slTB>2tU&ZQ
zC!-hSajz&#_NvkU%#lA`q^Jmu+$PiU=#(b&sMfxH=RmRLID|!I-^Q%S|Kd_9`EIuE
z<pa_9mlfSe44VcdL^a1S^$&QQ6}i3L;PvkLpS1kGlrQ7*;H36ld)HkX%YW$fqo4Y%
zE<N3m;c1mE301$=61{-2@zM}(yOo?E=aX^!EquBC_t0uLM!sw$W8QCDilo%Cjd|yd
zX+Nq5x)ek2xy~1^G?ud}#tN~?@yoO0sLqnc&QH{KV`makzC3AZg^94-TeL7RI5<|F
zzh=|!7waTf9@f1U6{S3-Yf~_@y-L1$;n~&H*<U0j(xpg8?C)E_)dVxzz9SOsO`%UO
zpoE4>TphwQv8p>0f;#W@lqF8Vm09w?Tx<K#8;O$A1>|8b3sxL1J|+-mf%6hG=gdKk
z7@bwQT)53`eZI!$B?cn&M6pP<Qk=^hD`yw$s~#Qo5LFwtYvVz1X(_o*IoxDsj)=_L
z4Wwk<)@$kMlI0bp8nrmshw>{ir#5r>(C3e9-#_+vt1e8{yB^rFd;U1^hFZ>$zmjh}
zi00s*uFSrshz6eBb&YIJb_(H)JeHCDY5kvgB6u95rM%qJTir14DaFY1S;!VXyy8JQ
zAQ`?P8s<oj3LePVd061=IbVn_bT%UVTFIRSv=nV`q-XFI?o_-UT{hzifVrD&5LUlQ
z{@_g@U!R8@$aXPgv7ag`J?07lB$&Bo83V5q{sF|~abSE@k|E<IfAi!)z_trwUBmf{
zPsz~%(FLiGn(SYEDg!unD%|*ADFDz&pCtNPt<_%}13-n)E#ndTS2z*@2s`3ovsM4?
z6|e*WDSSAy@2r1w`jUVYBwqBk?8{%9RG?KRj4U9u{8tAcK?>kv^6%!;e{H&fR)f+`
z|Efg<(SgB}qw$yi1<J6Lf{y4TA?ANIEb$V+;3G~o+5duTCZJUVO5VSUlvq(<@c*Yx
zNjK<~2mnM1h|v;pta?Xy0)B06_xq(aK+fe&H7_sg;-P@oWsz~9-C1-RpJ;!p4<`oc
z)<*qH({%RaM#55uosY=;z_lw`3c<<Nb1v6ee|#~zsKM!K_c?%0DT^9TOPjA3tY4D|
zuA^MmGjyICpE*qceL#h!GIL<U69P6vqVPxf!G(HjTUqzW#?d$ZgB~d-eTfox9!COt
zHb06jm2!PE=XhQ_A$8{%8-5aTbV|mKksvk$y$1LKFORiAK9J4Kl(L4H>H*LI;LAEj
zx{n-^uq<zRGYNfN+sJ>s;Ff(?SBdeW(l|bNtrBtJ^pcnVsk$y0)P}0j;!^B$HLcD9
zWcQ1e@;|ZJFY9<+AJR}rCH|a$xZEQH@JRev=KF&aKq8S441*%|^89plqu1^w3B-X=
zL%#DhrT}JDmjbYbtENPi<s{|ZP3H^!WTAWtfLbKdIczs)wC$5>Oo#U$Z_O{ZN1axD
zZ{Tc%o?(c56yB*<lx+Z%1pP+;=Zky-7L#xQ@D1K9mHX+;0`<h!Gi7nK6-~lcGi5XY
z_)Rv?^6Xa+ofN8@Ju!J1Nn_nQYFxo;sz4+3e{>v7=d4V8MX#Pa1dfs7SwOOH7)9Wb
z7*Kj7Twb~?VR0^dd`o1|#r;*|ab|RP;{Q@Q9CUz>*ac`R7P5xihbn0uZq0Xp<|;Kg
z7`|pf|3O|r?6}wR%DFf1evRBLNVki(xJ@m|ypNH@*q<za`PTW(_vcQbOd2V@cEi*S
z#65G6_KVZL0FZZ{zWGq0(@aiACbDAQr0#XOo6DW+(|GN=|Dty<&vu0HpdfJnhKo+K
zMwG{8ml%cjG(e?PP1bBYtGvCz|KaxvAYh@CU`U{pjRyoa`rbmoyLWf)#TEK0M)~3S
z`QhZtS`-dz9-v`y0`jp4_x`}@xwujB7d%P`wAW>8&&xeQdM+KJ>;PWrXpPighzNct
zhQn@^NQk)NbwJN&0eH@Z5`a1OX3EnWFi=nryqE|l9&S!<bbE;WA4$Zc@qeM@Z4AT=
zztSq0?^I&N2|Yy%?|@W({QzVz2DBcc#%-^xC4`+5Ec#T$BPOj{Vi=L1hB&A7j0{31
zKs5{jxqr&4N!*P6fvcwtqL<kH>C$0=mpge|&W=W_=^?F<4ae7o6bP9B#1ij*H`=Gb
zVNe37a)l94yG;8SBx>_c2sR0M@LkUtw-CJMKWWs;QY)W6M7<^PJm35k+`TthXbMpN
zueoY^xwgcUM53$rP_-%_y9vHyK0zA(`YPrXJK)IX6HC-x{9;mA^)f5{GCner2=HS=
zfF6TNUDq{fo6qOgfxf2wlCkhP_>md`Vhy0#rXGrk6&z87yF_eS>MNCBV%#5s-t7wj
zn5`qb)n$*gvhDm+RtWFiX2PqcLjs@-UHxl6VZdTP=IZlY`A=HO9U-8h7{YW%?w}rD
z0D2Gv8|G_nzHAwSI3F)dfkvmy`!MW>o@Re)xkatuGa3;vQt;sUP$axMrtYqAoHamK
z<rAEaZCu%LZ^dUUzTkPWZD}7d$m##=)4Y|e4YD49^>8?xtL_m6L>=7^K>?3VhO$Kf
z&$o)gTB|^!@|5am_;bLf-1T6F=lx703K2hkNm2M%SP9Wb?s=3G)aS0>c^}=oL5&`;
zeewcND&Ez^)M!l3A5(_Hr4b#PmI2|DMcg%@&PiW)U6Q*Z%T@N%7a~D6z~JRMn5|6V
zaoyi_<8xRI$TAe^A{Ff!P&t_Lm%(k7uY4l%#aI>v(Fv`-Wp&$(Y4A6U?e9Voif#bI
z4#%v+fp%-W{wSM!pRN5Ov~00}eTUZ72_&oUJ*G=kYn-;Ee06dJ!F^9UFDInEi~_B{
zo4^KPcSrh4V3+tRF84-YYyrSz=84d9%~JS<a~-rJd?u8k?Qzn6Q@7Lq1^H;Y-df#v
zsQ9Ui1FL)R+2NSzm^(tux+*FJHQ;>U^HPQG`y;E5DUf!@4zmFyTryo{PrxR!U7Dq;
zxlwJN_nPkZz}MJ-ym+8MJfl!~23X%c{M7Dr1%O#`ep@@Pa5YdCgbJ1W#Prr+<_So>
z?+S!-jY!Du0y>=Q{zcbWUH+IUK&WE$RafBKAc|v+&z;kgL)Z;`20((70Gg*s;lX;R
z7&y|cllm9!6k-x5lI`jc)RX6}eBVIoi39O}r=;v^<E`)<h;DKnKe=~gA~KCSmCY`D
z=Dd6y#$$bzy{_Mw8#n8XbydIih4y3!Pw36wdL-lZLvXB&NqE0sY35~yhUF<&KAb`5
z#jAn_2F7d=)?wI4J{0clTseSB;tKLTPqu%=%-6uZEj#65BGfuVev4n-2oZ#UB03ic
zxDx`ysg5h~Y&L&|MK{`BE>a|GSBHAwJcKawyv1WO2zb2yZGV2;dNSm`;!E`zkF!CV
z(1qd4qiU(z^f33X`6;YH1jrU#o$dN7`FoGO7dZ~EJ^f#oM2trVCfO-X77Me9F>dVO
zH*XI)z#m4cM4?b%DCuAIUiJd%k{EmD&yROk=L!Z-)foXg&ZF#cUv_Bp*`9iop^`<H
zvlo(Oc*z4j(7PZZhjC^C=D|2*jU0Lono-G9KN<@_tjh$nx&dl4rA|NJ_JT+XbVL`u
z9zeNL0wXda2&B8A73c^&F{$2;Fa`H^AcETLB27s~de2s!;T=l9e+xkJn!|5$KhbM%
zTTP^5Vpur1F%qbNB!#y=05mKZO+9!VHh|7gg*SQabTw?QOEAJm!>nuMILrI^*WypL
zIl(B|(bFP>046*KS*YA07<a}yS?<8>s=bzjipgQMIpYvhYMYD?`b;EPJ~EnXeqO-j
znEpFCz^kEV6c@Yln%#A^5NuUaxKmiamnDSVKoi=#Z*{B6+byJKTq|^m8N|?5RfMrj
zPj|6;!(X-bpab{o5=F<D>jovobu3BGk#IgjHU|}MM!-X`YWsJkzQEnM|MNGPo@(QP
z{mf$z?=BFp1u=+et{qTMdTkpos|eyApa%^akNOV~xbJqOON-Cd&F2U~c3v4YKHl15
z%-LQmteW-T0DH;a{^Gf@LT^rtFT^SCs_ff?k+C`aeb$8o_gbQ>nIc6IXk>h9eZRZ>
z4i206I_N@=HnERxy{^l?H%nfVm1jX~JZLEPm@@@uSg_D0P3Pq#DtCGp+Qaa?p}Y5w
z7vuGx4Nx4dT_eSZ2g;uykE;D{CSJ<-j-r?DQ?y^Y(NmNitn@c8oSx2X^yhv8oK$-R
z_7k%3T+0gN*_`;thX(hP<&H)-hvg83Ym()o`z|89_BEWoWvtWhwf&&RM@rqiY`dc!
zwF>PQhgyM*2bhB^L~>MR2&-UqnPCJv8Mq|6tu$EPqZ?iNJ`<Xo;+1BH4r+Vx<a79t
z1PU|DX<UzFuW!WC<(}rn>iM;aun4V(7L@{r@I5174^vv+KXmq!j_L9u*hJF>CcH4>
z#-(4}YkH(#J~g~F)mJ9?>aCO7>ep*>Nk6?2!g^cO&efn=93h&#!1d*-4_kk(e_%b&
zE1j`qN14thydcWrAuOe(xom?H6DQjV1c=b6Zvhu=|A4W&F0a;gNMH|U$jV@*wmq8T
za2RoY-&0H*UuSo{#%c2v?$>rq!Li5A)x=At&FMtv;=(|&Pp??|O8KM3Z)`n|wq_&j
z)#l!lXi=w?`6P4qvHj1rk@8p0XLkBJ{tVQqDlkG1WAqSnThT;3M&VHb^Y$C2*-|F#
zOtW_~t3Jp29!EHkr3p>l*0Ub9&=0@g#)|{c!1K`AX7mjecY~#;;VaIFMTJ;IO-CIm
z3;Bafx@dv?VP#%w^q&G^5&(vv2c;$b1w<Ycf{yD+Z~mKfBo6_wj4)Fz`X7V@UJ2N=
z#T)VbDF_Tf1z;Aenc6?d?zQLDB(S~+btM_`Ke!7R${Q-U?h&cKV5SX#W@l2L_Wl;2
z3j?=6lBx;!2i*jy12%e+%mjZ51ciZIA~}ea_AjhDl?qtzrKKq#`-`(+0YnsHU`Fv5
tXXyd3%7dcM<!=ET2>{mq|0d;`2rST=0!Oxq{PnL}B*o-JD})V#{x1#c;RXNz

literal 0
HcmV?d00001

diff --git a/python/docs/source/development/debugging.rst b/python/docs/source/development/debugging.rst
index 5dbe913026397..c5f3351527f11 100644
--- a/python/docs/source/development/debugging.rst
+++ b/python/docs/source/development/debugging.rst
@@ -35,7 +35,7 @@ with JVM. Profiling and debugging JVM is described at `Useful Developer Tools <h
 
 Note that,
 
-- If you are running locally, you can directly debug the driver side via using your IDE without the remote debug feature.
+- If you are running locally, you can directly debug the driver side via using your IDE without the remote debug feature. Setting PySpark with IDEs is documented `here <setting.html#setting-up-pyspark>`__.
 - *There are many other ways of debugging PySpark applications*. For example, you can remotely debug by using the open source `Remote Debugger <https://www.pydev.org/manual_adv_remote_debugger.html>`_ instead of using PyCharm Professional documented here.
 
 
diff --git a/python/docs/source/development/index.rst b/python/docs/source/development/index.rst
index 9b486976c6a71..bf8b2ccafaa9e 100644
--- a/python/docs/source/development/index.rst
+++ b/python/docs/source/development/index.rst
@@ -25,3 +25,4 @@ Development
     contributing
     testing
     debugging
+    setting_ide
diff --git a/python/docs/source/development/setting_ide.rst b/python/docs/source/development/setting_ide.rst
new file mode 100644
index 0000000000000..dcb44c1483006
--- /dev/null
+++ b/python/docs/source/development/setting_ide.rst
@@ -0,0 +1,62 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+===============
+Setting up IDEs
+===============
+
+
+PyCharm
+-------
+
+This section describes how to setup PySpark on PyCharm.
+It guides step by step to the process of downloading the source code from GitHub and running the test code successfully.
+
+Firstly, download the Spark source code from GitHub using git url. You can download the source code by simply using ``git clone`` command as shown below.
+If you want to download the code from any forked repository rather than Spark original repository, please change the url properly.
+
+.. code-block:: bash
+
+    git clone https://github.com/apache/spark.git
+
+When the download is completed, go to the ``spark`` directory and build the package.
+SBT build is generally much faster than Maven. More details about the build are documented `here <https://spark.apache.org/docs/latest/building-spark.html>`_.
+
+.. code-block:: bash
+
+    build/sbt package
+
+After building is finished, run PyCharm and select the path ``spark/python``.
+
+.. image:: ../../../../docs/img/pycharm-with-pyspark1.png
+    :alt: Select the Spark path
+
+
+Let's go to the path ``python/pyspark/tests`` in PyCharm and try to run the any test like ``test_join.py``.
+You might can see the ``KeyError: 'SPARK_HOME'`` because the environment variable has not been set yet.
+
+Go **Run -> Edit Configurations**, and set the environment variables as below.
+Please make sure to specify your own path for ``SPARK_HOME`` rather than ``/.../spark``. After completing the variable, click **Okay** to apply the changes.
+
+.. image:: ../../../../docs/img/pycharm-with-pyspark2.png
+    :alt: Setting up SPARK_HOME
+
+
+Once ``SPARK_HOME`` is set properly, you'll be able to run the tests properly as below:
+
+.. image:: ../../../../docs/img/pycharm-with-pyspark3.png
+    :alt: Running tests properly

From 0c66813ad9867e366689b47c81bdd8a94ac17828 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 21 Sep 2020 13:28:31 +0800
Subject: [PATCH 0075/1009] Revert "[SPARK-32850][CORE] Simplify the RPC
 message flow of decommission"

This reverts commit 56ae95053df4afa9764df3f1d88f300896ca0183.
---
 .../spark/ExecutorAllocationClient.scala      | 19 ++--
 .../spark/ExecutorAllocationManager.scala     |  5 +-
 .../apache/spark/deploy/DeployMessage.scala   | 31 ++-----
 .../apache/spark/deploy/master/Master.scala   | 23 ++---
 .../apache/spark/deploy/worker/Worker.scala   | 28 +++---
 .../CoarseGrainedExecutorBackend.scala        | 60 +++++++------
 .../cluster/CoarseGrainedClusterMessage.scala | 16 ++--
 .../CoarseGrainedSchedulerBackend.scala       | 86 ++++++++++++-------
 .../cluster/StandaloneSchedulerBackend.scala  |  7 +-
 .../apache/spark/storage/BlockManager.scala   |  6 +-
 .../storage/BlockManagerMasterEndpoint.scala  | 18 +++-
 .../storage/BlockManagerStorageEndpoint.scala |  2 +-
 .../deploy/DecommissionWorkerSuite.scala      |  4 +-
 .../spark/deploy/client/AppClientSuite.scala  |  7 +-
 .../scheduler/WorkerDecommissionSuite.scala   |  7 +-
 .../ExecutorAllocationManagerSuite.scala      |  6 +-
 16 files changed, 148 insertions(+), 177 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
index cdba1c44034c0..ce47f3fd32203 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
@@ -91,13 +91,11 @@ private[spark] trait ExecutorAllocationClient {
    * @param executorsAndDecomInfo identifiers of executors & decom info.
    * @param adjustTargetNumExecutors whether the target number of executors will be adjusted down
    *                                 after these executors have been decommissioned.
-   * @param triggeredByExecutor whether the decommission is triggered at executor.
    * @return the ids of the executors acknowledged by the cluster manager to be removed.
    */
   def decommissionExecutors(
-      executorsAndDecomInfo: Array[(String, ExecutorDecommissionInfo)],
-      adjustTargetNumExecutors: Boolean,
-      triggeredByExecutor: Boolean): Seq[String] = {
+    executorsAndDecomInfo: Array[(String, ExecutorDecommissionInfo)],
+    adjustTargetNumExecutors: Boolean): Seq[String] = {
     killExecutors(executorsAndDecomInfo.map(_._1),
       adjustTargetNumExecutors,
       countFailures = false)
@@ -111,21 +109,14 @@ private[spark] trait ExecutorAllocationClient {
    * @param executorId identifiers of executor to decommission
    * @param decommissionInfo information about the decommission (reason, host loss)
    * @param adjustTargetNumExecutors if we should adjust the target number of executors.
-   * @param triggeredByExecutor whether the decommission is triggered at executor.
-   *                            (TODO: add a new type like `ExecutorDecommissionInfo` for the
-   *                            case where executor is decommissioned at executor first, so we
-   *                            don't need this extra parameter.)
    * @return whether the request is acknowledged by the cluster manager.
    */
-  final def decommissionExecutor(
-      executorId: String,
+  final def decommissionExecutor(executorId: String,
       decommissionInfo: ExecutorDecommissionInfo,
-      adjustTargetNumExecutors: Boolean,
-      triggeredByExecutor: Boolean = false): Boolean = {
+      adjustTargetNumExecutors: Boolean): Boolean = {
     val decommissionedExecutors = decommissionExecutors(
       Array((executorId, decommissionInfo)),
-      adjustTargetNumExecutors = adjustTargetNumExecutors,
-      triggeredByExecutor = triggeredByExecutor)
+      adjustTargetNumExecutors = adjustTargetNumExecutors)
     decommissionedExecutors.nonEmpty && decommissionedExecutors(0).equals(executorId)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index 1dd64df106bc2..596508a2cf8c8 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -580,10 +580,7 @@ private[spark] class ExecutorAllocationManager(
       if (decommissionEnabled) {
         val executorIdsWithoutHostLoss = executorIdsToBeRemoved.toSeq.map(
           id => (id, ExecutorDecommissionInfo("spark scale down"))).toArray
-        client.decommissionExecutors(
-          executorIdsWithoutHostLoss,
-          adjustTargetNumExecutors = false,
-          triggeredByExecutor = false)
+        client.decommissionExecutors(executorIdsWithoutHostLoss, adjustTargetNumExecutors = false)
       } else {
         client.killExecutors(executorIdsToBeRemoved.toSeq, adjustTargetNumExecutors = false,
           countFailures = false, force = false)
diff --git a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
index 8bc909b096e71..83f373d526e90 100644
--- a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
@@ -61,34 +61,13 @@ private[deploy] object DeployMessages {
   }
 
   /**
-   * An internal message that used by Master itself, in order to handle the
-   * `DecommissionWorkersOnHosts` request from `MasterWebUI` asynchronously.
-   * @param ids A collection of Worker ids, which should be decommissioned.
-   */
-  case class DecommissionWorkers(ids: Seq[String]) extends DeployMessage
-
-  /**
-   * A message that sent from Master to Worker to decommission the Worker.
-   * It's used for the case where decommission is triggered at MasterWebUI.
-   *
-   * Note that decommission a Worker will cause all the executors on that Worker
-   * to be decommissioned as well.
-   */
-  object DecommissionWorker extends DeployMessage
-
-  /**
-   * A message that sent to the Worker itself when it receives PWR signal,
-   * indicating the Worker starts to decommission.
-   */
-  object WorkerSigPWRReceived extends DeployMessage
-
-  /**
-   * A message sent from Worker to Master to tell Master that the Worker has started
-   * decommissioning. It's used for the case where decommission is triggered at Worker.
-   *
    * @param id the worker id
+   * @param worker the worker endpoint ref
    */
-  case class WorkerDecommissioning(id: String, workerRef: RpcEndpointRef) extends DeployMessage
+  case class WorkerDecommission(
+      id: String,
+      worker: RpcEndpointRef)
+    extends DeployMessage
 
   case class ExecutorStateChanged(
       appId: String,
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 15f8be69d97bd..48516cdf83291 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -245,27 +245,15 @@ private[deploy] class Master(
       logError("Leadership has been revoked -- master shutting down.")
       System.exit(0)
 
-    case WorkerDecommissioning(id, workerRef) =>
+    case WorkerDecommission(id, workerRef) =>
+      logInfo("Recording worker %s decommissioning".format(id))
       if (state == RecoveryState.STANDBY) {
         workerRef.send(MasterInStandby)
       } else {
         // We use foreach since get gives us an option and we can skip the failures.
-        idToWorker.get(id).foreach(w => decommissionWorker(w))
+        idToWorker.get(id).foreach(decommissionWorker)
       }
 
-    case DecommissionWorkers(ids) =>
-      // The caller has already checked the state when handling DecommissionWorkersOnHosts,
-      // so it should not be the STANDBY
-      assert(state != RecoveryState.STANDBY)
-      ids.foreach ( id =>
-        // We use foreach since get gives us an option and we can skip the failures.
-        idToWorker.get(id).foreach { w =>
-          decommissionWorker(w)
-          // Also send a message to the worker node to notify.
-          w.endpoint.send(DecommissionWorker)
-        }
-      )
-
     case RegisterWorker(
       id, workerHost, workerPort, workerRef, cores, memory, workerWebUiUrl,
       masterAddress, resources) =>
@@ -903,7 +891,10 @@ private[deploy] class Master(
     logInfo(s"Decommissioning the workers with host:ports ${workersToRemoveHostPorts}")
 
     // The workers are removed async to avoid blocking the receive loop for the entire batch
-    self.send(DecommissionWorkers(workersToRemove.map(_.id).toSeq))
+    workersToRemove.foreach(wi => {
+      logInfo(s"Sending the worker decommission to ${wi.id} and ${wi.endpoint}")
+      self.send(WorkerDecommission(wi.id, wi.endpoint))
+    })
 
     // Return the count of workers actually removed
     workersToRemove.size
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 2e8474e3e3fc2..7649bc37c30b6 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -70,10 +70,7 @@ private[deploy] class Worker(
   if (conf.get(config.DECOMMISSION_ENABLED)) {
     logInfo("Registering SIGPWR handler to trigger decommissioning.")
     SignalUtils.register("PWR", "Failed to register SIGPWR handler - " +
-      "disabling worker decommission feature.") {
-       self.send(WorkerSigPWRReceived)
-       true
-    }
+      "disabling worker decommission feature.")(decommissionSelf)
   } else {
     logInfo("Worker decommissioning not enabled, SIGPWR will result in exiting.")
   }
@@ -140,8 +137,7 @@ private[deploy] class Worker(
   private var registered = false
   private var connected = false
   private var decommissioned = false
-  // expose for test
-  private[spark] val workerId = generateWorkerId()
+  private val workerId = generateWorkerId()
   private val sparkHome =
     if (sys.props.contains(IS_TESTING.key)) {
       assert(sys.props.contains("spark.test.home"), "spark.test.home is not set!")
@@ -672,13 +668,8 @@ private[deploy] class Worker(
       finishedApps += id
       maybeCleanupApplication(id)
 
-    case DecommissionWorker =>
-      decommissionSelf()
-
-    case WorkerSigPWRReceived =>
+    case WorkerDecommission(_, _) =>
       decommissionSelf()
-      // Tell master we starts decommissioning so it stops trying to launch executor/driver on us
-      sendToMaster(WorkerDecommissioning(workerId, self))
   }
 
   override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
@@ -777,15 +768,16 @@ private[deploy] class Worker(
     }
   }
 
-  private[deploy] def decommissionSelf(): Unit = {
-    if (conf.get(config.DECOMMISSION_ENABLED) && !decommissioned) {
+  private[deploy] def decommissionSelf(): Boolean = {
+    if (conf.get(config.DECOMMISSION_ENABLED)) {
+      logDebug("Decommissioning self")
       decommissioned = true
-      logInfo(s"Decommission worker $workerId.")
-    } else if (decommissioned) {
-      logWarning(s"Worker $workerId already started decommissioning.")
+      sendToMaster(WorkerDecommission(workerId, self))
     } else {
-      logWarning(s"Receive decommission request, but decommission feature is disabled.")
+      logWarning("Asked to decommission self, but decommissioning not enabled")
     }
+    // Return true since can be called as a signal handler
+    true
   }
 
   private[worker] def handleDriverStateChanged(driverStateChanged: DriverStateChanged): Unit = {
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index d002f7b407e5e..48045bafe6e3f 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -40,7 +40,7 @@ import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.resource.ResourceProfile._
 import org.apache.spark.resource.ResourceUtils._
 import org.apache.spark.rpc._
-import org.apache.spark.scheduler.{ExecutorLossReason, TaskDescription}
+import org.apache.spark.scheduler.{ExecutorDecommissionInfo, ExecutorLossReason, TaskDescription}
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
 import org.apache.spark.serializer.SerializerInstance
 import org.apache.spark.util.{ChildFirstURLClassLoader, MutableURLClassLoader, SignalUtils, ThreadUtils, Utils}
@@ -79,17 +79,12 @@ private[spark] class CoarseGrainedExecutorBackend(
    */
   private[executor] val taskResources = new mutable.HashMap[Long, Map[String, ResourceInformation]]
 
-  private var decommissioned = false
+  @volatile private var decommissioned = false
 
   override def onStart(): Unit = {
-    if (env.conf.get(DECOMMISSION_ENABLED)) {
-      logInfo("Registering PWR handler to trigger decommissioning.")
-      SignalUtils.register("PWR", "Failed to register SIGPWR handler - " +
-      "disabling executor decommission feature.") {
-        self.send(ExecutorSigPWRReceived)
-        true
-      }
-    }
+    logInfo("Registering PWR handler.")
+    SignalUtils.register("PWR", "Failed to register SIGPWR handler - " +
+      "disabling decommission feature.")(decommissionSelf)
 
     logInfo("Connecting to driver: " + driverUrl)
     try {
@@ -171,6 +166,17 @@ private[spark] class CoarseGrainedExecutorBackend(
       if (executor == null) {
         exitExecutor(1, "Received LaunchTask command but executor was null")
       } else {
+        if (decommissioned) {
+          val msg = "Asked to launch a task while decommissioned."
+          logError(msg)
+          driver match {
+            case Some(endpoint) =>
+              logInfo("Sending DecommissionExecutor to driver.")
+              endpoint.send(DecommissionExecutor(executorId, ExecutorDecommissionInfo(msg)))
+            case _ =>
+              logError("No registered driver to send Decommission to.")
+          }
+        }
         val taskDesc = TaskDescription.decode(data.value)
         logInfo("Got assigned task " + taskDesc.taskId)
         taskResources(taskDesc.taskId) = taskDesc.resources
@@ -207,17 +213,9 @@ private[spark] class CoarseGrainedExecutorBackend(
       logInfo(s"Received tokens of ${tokenBytes.length} bytes")
       SparkHadoopUtil.get.addDelegationTokens(tokenBytes, env.conf)
 
-    case DecommissionExecutor =>
+    case DecommissionSelf =>
+      logInfo("Received decommission self")
       decommissionSelf()
-
-    case ExecutorSigPWRReceived =>
-      decommissionSelf()
-      if (driver.nonEmpty) {
-        // Tell driver we starts decommissioning so it stops trying to schedule us
-        driver.get.askSync[Boolean](ExecutorDecommissioning(executorId))
-      } else {
-        logError("No driver to message decommissioning.")
-      }
   }
 
   override def onDisconnected(remoteAddress: RpcAddress): Unit = {
@@ -266,20 +264,17 @@ private[spark] class CoarseGrainedExecutorBackend(
     System.exit(code)
   }
 
-  private def decommissionSelf(): Unit = {
-    if (!env.conf.get(DECOMMISSION_ENABLED)) {
-      logWarning(s"Receive decommission request, but decommission feature is disabled.")
-      return
-    } else if (decommissioned) {
-      logWarning(s"Executor $executorId already started decommissioning.")
-      return
-    }
-    val msg = s"Decommission executor $executorId."
+  private def decommissionSelf(): Boolean = {
+    val msg = "Decommissioning self w/sync"
     logInfo(msg)
     try {
       decommissioned = true
-      if (env.conf.get(STORAGE_DECOMMISSION_ENABLED)) {
-        env.blockManager.decommissionBlockManager()
+      // Tell master we are are decommissioned so it stops trying to schedule us
+      if (driver.nonEmpty) {
+        driver.get.askSync[Boolean](DecommissionExecutor(
+          executorId, ExecutorDecommissionInfo(msg)))
+      } else {
+        logError("No driver to message decommissioning.")
       }
       if (executor != null) {
         executor.decommission()
@@ -338,9 +333,12 @@ private[spark] class CoarseGrainedExecutorBackend(
       shutdownThread.start()
 
       logInfo("Will exit when finished decommissioning")
+      // Return true since we are handling a signal
+      true
     } catch {
       case e: Exception =>
         logError("Unexpected error while decommissioning self", e)
+        false
     }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
index d1b0e798c51be..7242ab7786061 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
@@ -95,17 +95,8 @@ private[spark] object CoarseGrainedClusterMessages {
   case class RemoveExecutor(executorId: String, reason: ExecutorLossReason)
     extends CoarseGrainedClusterMessage
 
-  // A message that sent from executor to driver to tell driver that the executor has started
-  // decommissioning. It's used for the case where decommission is triggered at executor (e.g., K8S)
-  case class ExecutorDecommissioning(executorId: String) extends CoarseGrainedClusterMessage
-
-  // A message that sent from driver to executor to decommission that executor.
-  // It's used for Standalone's cases, where decommission is triggered at MasterWebUI or Worker.
-  object DecommissionExecutor extends CoarseGrainedClusterMessage
-
-  // A message that sent to the executor itself when it receives PWR signal,
-  // indicating the executor starts to decommission.
-  object ExecutorSigPWRReceived extends CoarseGrainedClusterMessage
+  case class DecommissionExecutor(executorId: String, decommissionInfo: ExecutorDecommissionInfo)
+    extends CoarseGrainedClusterMessage
 
   case class RemoveWorker(workerId: String, host: String, message: String)
     extends CoarseGrainedClusterMessage
@@ -145,4 +136,7 @@ private[spark] object CoarseGrainedClusterMessages {
 
   // The message to check if `CoarseGrainedSchedulerBackend` thinks the executor is alive or not.
   case class IsExecutorAlive(executorId: String) extends CoarseGrainedClusterMessage
+
+  // Used to ask an executor to decommission itself. (Can be an internal message)
+  case object DecommissionSelf extends CoarseGrainedClusterMessage
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index f6930da96a390..0f144125af7bf 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -191,6 +191,10 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
         executorDataMap.get(executorId).foreach(_.executorEndpoint.send(StopExecutor))
         removeExecutor(executorId, reason)
 
+      case DecommissionExecutor(executorId, decommissionInfo) =>
+        logError(s"Received decommission executor message ${executorId}: $decommissionInfo")
+        decommissionExecutor(executorId, decommissionInfo, adjustTargetNumExecutors = false)
+
       case RemoveWorker(workerId, host, message) =>
         removeWorker(workerId, host, message)
 
@@ -268,14 +272,10 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
         removeWorker(workerId, host, message)
         context.reply(true)
 
-      case ExecutorDecommissioning(executorId) =>
-        logWarning(s"Received executor $executorId decommissioned message")
-        context.reply(
-          decommissionExecutor(
-            executorId,
-            ExecutorDecommissionInfo(s"Executor $executorId is decommissioned."),
-            adjustTargetNumExecutors = false,
-            triggeredByExecutor = true))
+      case DecommissionExecutor(executorId, decommissionInfo) =>
+        logError(s"Received decommission executor message ${executorId}: ${decommissionInfo}.")
+        context.reply(decommissionExecutor(executorId, decommissionInfo,
+          adjustTargetNumExecutors = false))
 
       case RetrieveSparkAppConfig(resourceProfileId) =>
         val rp = scheduler.sc.resourceProfileManager.resourceProfileFromId(resourceProfileId)
@@ -463,47 +463,71 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
    * @param executorsAndDecomInfo Identifiers of executors & decommission info.
    * @param adjustTargetNumExecutors whether the target number of executors will be adjusted down
    *                                 after these executors have been decommissioned.
-   * @param triggeredByExecutor whether the decommission is triggered at executor.
    * @return the ids of the executors acknowledged by the cluster manager to be removed.
    */
   override def decommissionExecutors(
       executorsAndDecomInfo: Array[(String, ExecutorDecommissionInfo)],
-      adjustTargetNumExecutors: Boolean,
-      triggeredByExecutor: Boolean): Seq[String] = withLock {
-    val executorsToDecommission = executorsAndDecomInfo.flatMap { case (executorId, decomInfo) =>
-      // Only bother decommissioning executors which are alive.
-      if (isExecutorActive(executorId)) {
-        scheduler.executorDecommission(executorId, decomInfo)
-        executorsPendingDecommission(executorId) = decomInfo.workerHost
-        Some(executorId)
-      } else {
-        None
+      adjustTargetNumExecutors: Boolean): Seq[String] = {
+
+    val executorsToDecommission = executorsAndDecomInfo.filter { case (executorId, decomInfo) =>
+      CoarseGrainedSchedulerBackend.this.synchronized {
+        // Only bother decommissioning executors which are alive.
+        if (isExecutorActive(executorId)) {
+          executorsPendingDecommission(executorId) = decomInfo.workerHost
+          true
+        } else {
+          false
+        }
       }
     }
 
     // If we don't want to replace the executors we are decommissioning
     if (adjustTargetNumExecutors) {
-      adjustExecutors(executorsToDecommission)
+      adjustExecutors(executorsToDecommission.map(_._1))
     }
 
-    // Mark those corresponding BlockManagers as decommissioned first before we sending
-    // decommission notification to executors. So, it's less likely to lead to the race
-    // condition where `getPeer` request from the decommissioned executor comes first
-    // before the BlockManagers are marked as decommissioned.
-    if (conf.get(STORAGE_DECOMMISSION_ENABLED)) {
-      scheduler.sc.env.blockManager.master.decommissionBlockManagers(executorsToDecommission)
+    executorsToDecommission.filter { case (executorId, decomInfo) =>
+      doDecommission(executorId, decomInfo)
+    }.map(_._1)
+  }
+
+
+  private def doDecommission(executorId: String,
+      decomInfo: ExecutorDecommissionInfo): Boolean = {
+
+    logInfo(s"Asking executor $executorId to decommissioning.")
+    scheduler.executorDecommission(executorId, decomInfo)
+    // Send decommission message to the executor (it could have originated on the executor
+    // but not necessarily).
+    CoarseGrainedSchedulerBackend.this.synchronized {
+      executorDataMap.get(executorId) match {
+        case Some(executorInfo) =>
+          executorInfo.executorEndpoint.send(DecommissionSelf)
+        case None =>
+          // Ignoring the executor since it is not registered.
+          logWarning(s"Attempted to decommission unknown executor $executorId.")
+          return false
+      }
     }
+    logInfo(s"Asked executor $executorId to decommission.")
 
-    if (!triggeredByExecutor) {
-      executorsToDecommission.foreach { executorId =>
-        logInfo(s"Asking executor $executorId to decommissioning.")
-        executorDataMap(executorId).executorEndpoint.send(DecommissionExecutor)
+    if (conf.get(STORAGE_DECOMMISSION_ENABLED)) {
+      try {
+        logInfo(s"Asking block manager corresponding to executor $executorId to decommission.")
+        scheduler.sc.env.blockManager.master.decommissionBlockManagers(Seq(executorId))
+      } catch {
+        case e: Exception =>
+          logError("Unexpected error during block manager " +
+            s"decommissioning for executor $executorId: ${e.toString}", e)
+          return false
       }
+      logInfo(s"Acknowledged decommissioning block manager corresponding to $executorId.")
     }
 
-    executorsToDecommission
+    true
   }
 
+
   override def start(): Unit = {
     if (UserGroupInformation.isSecurityEnabled()) {
       delegationTokenManager = createTokenManager()
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
index b9ac8d2ba2784..34b03dfec9e80 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
@@ -178,12 +178,9 @@ private[spark] class StandaloneSchedulerBackend(
   }
 
   override def executorDecommissioned(fullId: String, decommissionInfo: ExecutorDecommissionInfo) {
-    logInfo(s"Asked to decommission executor $fullId")
+    logInfo("Asked to decommission executor")
     val execId = fullId.split("/")(1)
-    decommissionExecutors(
-      Array((execId, decommissionInfo)),
-      adjustTargetNumExecutors = false,
-      triggeredByExecutor = false)
+    decommissionExecutors(Array((execId, decommissionInfo)), adjustTargetNumExecutors = false)
     logInfo("Executor %s decommissioned: %s".format(fullId, decommissionInfo))
   }
 
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index e1b4cb82cebf1..ff0f38a2479b0 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -56,7 +56,7 @@ import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.serializer.{SerializerInstance, SerializerManager}
 import org.apache.spark.shuffle.{MigratableResolver, ShuffleManager, ShuffleWriteMetricsReporter}
 import org.apache.spark.shuffle.{ShuffleManager, ShuffleWriteMetricsReporter}
-import org.apache.spark.storage.BlockManagerMessages.{DecommissionBlockManager, ReplicateBlock}
+import org.apache.spark.storage.BlockManagerMessages.ReplicateBlock
 import org.apache.spark.storage.memory._
 import org.apache.spark.unsafe.Platform
 import org.apache.spark.util._
@@ -1809,9 +1809,7 @@ private[spark] class BlockManager(
     blocksToRemove.size
   }
 
-  def decommissionBlockManager(): Unit = storageEndpoint.ask(DecommissionBlockManager)
-
-  private[spark] def decommissionSelf(): Unit = synchronized {
+  def decommissionBlockManager(): Unit = synchronized {
     decommissioner match {
       case None =>
         logInfo("Starting block manager decommissioning process...")
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
index 3fcfca365846e..a3d42348befaa 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
@@ -163,7 +163,8 @@ class BlockManagerMasterEndpoint(
       context.reply(true)
 
     case DecommissionBlockManagers(executorIds) =>
-      decommissioningBlockManagerSet ++= executorIds.flatMap(blockManagerIdByExecutor.get)
+      val bmIds = executorIds.flatMap(blockManagerIdByExecutor.get)
+      decommissionBlockManagers(bmIds)
       context.reply(true)
 
     case GetReplicateInfoForRDDBlocks(blockManagerId) =>
@@ -358,6 +359,21 @@ class BlockManagerMasterEndpoint(
     blockManagerIdByExecutor.get(execId).foreach(removeBlockManager)
   }
 
+  /**
+   * Decommission the given Seq of blockmanagers
+   *    - Adds these block managers to decommissioningBlockManagerSet Set
+   *    - Sends the DecommissionBlockManager message to each of the [[BlockManagerReplicaEndpoint]]
+   */
+  def decommissionBlockManagers(blockManagerIds: Seq[BlockManagerId]): Future[Seq[Unit]] = {
+    val newBlockManagersToDecommission = blockManagerIds.toSet.diff(decommissioningBlockManagerSet)
+    val futures = newBlockManagersToDecommission.map { blockManagerId =>
+      decommissioningBlockManagerSet.add(blockManagerId)
+      val info = blockManagerInfo(blockManagerId)
+      info.storageEndpoint.ask[Unit](DecommissionBlockManager)
+    }
+    Future.sequence{ futures.toSeq }
+  }
+
   /**
    * Returns a Seq of ReplicateBlock for each RDD block stored by given blockManagerId
    * @param blockManagerId - block manager id for which ReplicateBlock info is needed
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala
index 54a72568b18fa..a69bebc23c661 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala
@@ -62,7 +62,7 @@ class BlockManagerStorageEndpoint(
       }
 
     case DecommissionBlockManager =>
-      context.reply(blockManager.decommissionSelf())
+      context.reply(blockManager.decommissionBlockManager())
 
     case RemoveBroadcast(broadcastId, _) =>
       doAsync[Int]("removing broadcast " + broadcastId, context) {
diff --git a/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala
index abe5b7a71ca63..9c5e460854053 100644
--- a/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala
@@ -28,7 +28,7 @@ import org.scalatest.BeforeAndAfterEach
 import org.scalatest.concurrent.Eventually._
 
 import org.apache.spark._
-import org.apache.spark.deploy.DeployMessages.{DecommissionWorkers, MasterStateResponse, RequestMasterState}
+import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState, WorkerDecommission}
 import org.apache.spark.deploy.master.{ApplicationInfo, Master, WorkerInfo}
 import org.apache.spark.deploy.worker.Worker
 import org.apache.spark.internal.{config, Logging}
@@ -414,7 +414,7 @@ class DecommissionWorkerSuite
 
   def decommissionWorkerOnMaster(workerInfo: WorkerInfo, reason: String): Unit = {
     logInfo(s"Trying to decommission worker ${workerInfo.id} for reason `$reason`")
-    master.self.send(DecommissionWorkers(Seq(workerInfo.id)))
+    master.self.send(WorkerDecommission(workerInfo.id, workerInfo.endpoint))
   }
 
   def killWorkerAfterTimeout(workerInfo: WorkerInfo, secondsToWait: Int): Unit = {
diff --git a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
index a3438cab5b0a3..fe88822bb46b5 100644
--- a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
@@ -27,7 +27,7 @@ import org.scalatest.concurrent.{Eventually, ScalaFutures}
 
 import org.apache.spark._
 import org.apache.spark.deploy.{ApplicationDescription, Command}
-import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState, WorkerDecommissioning}
+import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState}
 import org.apache.spark.deploy.master.{ApplicationInfo, Master}
 import org.apache.spark.deploy.worker.Worker
 import org.apache.spark.internal.{config, Logging}
@@ -122,10 +122,7 @@ class AppClientSuite
 
       // Send a decommission self to all the workers
       // Note: normally the worker would send this on their own.
-      workers.foreach { worker =>
-       worker.decommissionSelf()
-       master.self.send(WorkerDecommissioning(worker.workerId, worker.self))
-      }
+      workers.foreach(worker => worker.decommissionSelf())
 
       // Decommissioning is async.
       eventually(timeout(1.seconds), interval(10.millis)) {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala
index 4a92cbcb85847..83bb66efdac9e 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala
@@ -31,7 +31,7 @@ import org.apache.spark.util.{RpcUtils, SerializableBuffer, ThreadUtils}
 class WorkerDecommissionSuite extends SparkFunSuite with LocalSparkContext {
 
   override def beforeEach(): Unit = {
-    val conf = new SparkConf().setAppName("test")
+    val conf = new SparkConf().setAppName("test").setMaster("local")
       .set(config.DECOMMISSION_ENABLED, true)
 
     sc = new SparkContext("local-cluster[2, 1, 1024]", "test", conf)
@@ -78,10 +78,7 @@ class WorkerDecommissionSuite extends SparkFunSuite with LocalSparkContext {
     val execs = sched.getExecutorIds()
     // Make the executors decommission, finish, exit, and not be replaced.
     val execsAndDecomInfo = execs.map((_, ExecutorDecommissionInfo("", None))).toArray
-    sched.decommissionExecutors(
-      execsAndDecomInfo,
-      adjustTargetNumExecutors = true,
-      triggeredByExecutor = false)
+    sched.decommissionExecutors(execsAndDecomInfo, adjustTargetNumExecutors = true)
     val asyncCountResult = ThreadUtils.awaitResult(asyncCount, 20.seconds)
     assert(asyncCountResult === 10)
   }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
index 293498ae5c37b..f1870718c6730 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.streaming.scheduler
 
-import org.mockito.ArgumentMatchers.{any, eq => meq}
+import org.mockito.ArgumentMatchers.{eq => meq}
 import org.mockito.Mockito.{never, reset, times, verify, when}
 import org.scalatest.{BeforeAndAfterEach, PrivateMethodTester}
 import org.scalatest.concurrent.Eventually.{eventually, timeout}
@@ -101,12 +101,12 @@ class ExecutorAllocationManagerSuite extends TestSuiteBase
           val decomInfo = ExecutorDecommissionInfo("spark scale down", None)
           if (decommissioning) {
             verify(allocationClient, times(1)).decommissionExecutor(
-              meq(expectedExec.get), meq(decomInfo), meq(true), any())
+              meq(expectedExec.get), meq(decomInfo), meq(true))
             verify(allocationClient, never).killExecutor(meq(expectedExec.get))
           } else {
             verify(allocationClient, times(1)).killExecutor(meq(expectedExec.get))
             verify(allocationClient, never).decommissionExecutor(
-              meq(expectedExec.get), meq(decomInfo), meq(true), any())
+              meq(expectedExec.get), meq(decomInfo), meq(true))
           }
         } else {
           if (decommissioning) {

From 1ad1f7153592344d3b2adc1196ffe8cc921e0292 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Mon, 21 Sep 2020 16:35:00 +0900
Subject: [PATCH 0076/1009] [SPARK-32946][R][SQL] Add withColumn to SparkR

### What changes were proposed in this pull request?

This PR adds `withColumn` function SparkR.

### Why are the changes needed?

### Does this PR introduce _any_ user-facing change?

Yes, new function, equivalent to Scala and PySpark equivalents, is exposed to the end user.

### How was this patch tested?

New unit tests added.

Closes #29814 from zero323/SPARK-32946.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 R/pkg/NAMESPACE                       |  1 +
 R/pkg/R/column.R                      | 31 +++++++++++++++++++++++++++
 R/pkg/R/generics.R                    |  3 +++
 R/pkg/tests/fulltests/test_sparkSQL.R | 13 +++++++++++
 4 files changed, 48 insertions(+)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 53a0b7856567e..f27913ae0b1bd 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -428,6 +428,7 @@ exportMethods("%<=>%",
               "weekofyear",
               "when",
               "window",
+              "withField",
               "xxhash64",
               "year")
 
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 7926a9a2467ee..36d792c647e52 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -356,3 +356,34 @@ setMethod("%<=>%",
 #' }
 #' @note ! since 2.3.0
 setMethod("!", signature(x = "Column"), function(x) not(x))
+
+#' withField
+#'
+#' Adds/replaces field in a struct \code{Column} by name.
+#'
+#' @param x a Column
+#' @param fieldName a character
+#' @param col a Column expression
+#'
+#' @rdname withField
+#' @aliases withField withField,Column-method
+#' @examples
+#' \dontrun{
+#' df <- withColumn(
+#'   createDataFrame(iris),
+#'   "sepal",
+#'    struct(column("Sepal_Width"), column("Sepal_Length"))
+#' )
+#'
+#' head(select(
+#'   df,
+#'   withField(df$sepal, "product", df$Sepal_Length * df$Sepal_Width)
+#' ))
+#' }
+#' @note withField since 3.1.0
+setMethod("withField",
+          signature(x = "Column", fieldName = "character", col = "Column"),
+          function(x, fieldName, col) {
+            jc <- callJMethod(x@jc, "withField", fieldName, col@jc)
+            column(jc)
+          })
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index a6a71666ae588..604308c8803eb 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -729,6 +729,9 @@ setGeneric("over", function(x, window) { standardGeneric("over") })
 #' @rdname eq_null_safe
 setGeneric("%<=>%", function(x, value) { standardGeneric("%<=>%") })
 
+#' @rdname withField
+setGeneric("withField", function(x, fieldName, col) { standardGeneric("withField") })
+
 ###################### WindowSpec Methods ##########################
 
 #' @rdname partitionBy
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 5008d3005b5b1..0ad7f9e88b0fd 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1803,6 +1803,19 @@ test_that("column functions", {
   )
 
   expect_equal(actual, expected)
+
+  # Test withField
+  lines <- c("{\"Person\": {\"name\":\"Bob\", \"age\":24}}")
+  jsonPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp")
+  writeLines(lines, jsonPath)
+  df <- read.df(jsonPath, "json")
+  result <- collect(
+      select(
+          select(df, alias(withField(df$Person, "dummy", lit(42)), "Person")),
+          "Person.dummy"
+      )
+  )
+  expect_equal(result, data.frame(dummy = 42))
 })
 
 test_that("column binary mathfunctions", {

From c336ddfdb81dd5c27fd109d62138dc129a02c30b Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Mon, 21 Sep 2020 09:15:12 +0000
Subject: [PATCH 0077/1009] [SPARK-32867][SQL] When explain, HiveTableRelation
 show limited message
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
In current mode, when explain a SQL plan with HiveTableRelation, it will show so many info about HiveTableRelation's prunedPartition,  this make plan hard to read, this pr make this information simpler.

Before:
![image](https://user-images.githubusercontent.com/46485123/93012078-aeeca080-f5cf-11ea-9286-f5c15eadbee3.png)

For UT
```
 test("Make HiveTableScanExec message simple") {
  withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
      withTable("df") {
        spark.range(30)
          .select(col("id"), col("id").as("k"))
          .write
          .partitionBy("k")
          .format("hive")
          .mode("overwrite")
          .saveAsTable("df")

        val df = sql("SELECT df.id, df.k FROM df WHERE df.k < 2")
        df.explain(true)
      }
    }
  }
```

After this pr will show
```
== Parsed Logical Plan ==
'Project ['df.id, 'df.k]
+- 'Filter ('df.k < 2)
   +- 'UnresolvedRelation [df], []

== Analyzed Logical Plan ==
id: bigint, k: bigint
Project [id#11L, k#12L]
+- Filter (k#12L < cast(2 as bigint))
   +- SubqueryAlias spark_catalog.default.df
      +- HiveTableRelation [`default`.`df`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, Data Cols: [id#11L], Partition Cols: [k#12L]]

== Optimized Logical Plan ==
Filter (isnotnull(k#12L) AND (k#12L < 2))
+- HiveTableRelation [`default`.`df`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, Data Cols: [id#11L], Partition Cols: [k#12L], Pruned Partitions: [(k=0), (k=1)]]

== Physical Plan ==
Scan hive default.df [id#11L, k#12L], HiveTableRelation [`default`.`df`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, Data Cols: [id#11L], Partition Cols: [k#12L], Pruned Partitions: [(k=0), (k=1)]], [isnotnull(k#12L), (k#12L < 2)]

```

In my pr, I will construct `HiveTableRelation`'s `simpleString` method to avoid show too much unnecessary info in explain plan. compared to what we had before，I decrease the detail metadata of each partition and only retain the partSpec to show each partition was pruned. Since for detail information, we always don't see this in Plan but to use DESC EXTENDED statement.

### Why are the changes needed?
Make plan about HiveTableRelation more readable

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
No

Closes #29739 from AngersZhuuuu/HiveTableScan-meta-location-info.

Authored-by: angerszhu <angers.zhu@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/catalog/interface.scala      | 41 ++++++++++++-
 .../hive/execution/HiveTableScanSuite.scala   | 61 +++++++++++++++++++
 2 files changed, 100 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index be09e761272ce..db01999ab9bb2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -24,6 +24,8 @@ import java.util.Date
 import scala.collection.mutable
 import scala.util.control.NonFatal
 
+import org.apache.commons.lang3.StringUtils
+
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow, TableIdentifier}
@@ -31,8 +33,7 @@ import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, Cast, ExprId, Literal}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils
-import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateFormatter, DateTimeUtils, TimestampFormatter}
-import org.apache.spark.sql.catalyst.util.quoteIdentifier
+import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -693,4 +694,40 @@ case class HiveTableRelation(
   override def newInstance(): HiveTableRelation = copy(
     dataCols = dataCols.map(_.newInstance()),
     partitionCols = partitionCols.map(_.newInstance()))
+
+  override def simpleString(maxFields: Int): String = {
+    val catalogTable = tableMeta.storage.serde match {
+      case Some(serde) => tableMeta.identifier :: serde :: Nil
+      case _ => tableMeta.identifier :: Nil
+    }
+
+    var metadata = Map(
+      "CatalogTable" -> catalogTable.mkString(", "),
+      "Data Cols" -> truncatedString(dataCols, "[", ", ", "]", maxFields),
+      "Partition Cols" -> truncatedString(partitionCols, "[", ", ", "]", maxFields)
+    )
+
+    if (prunedPartitions.nonEmpty) {
+      metadata += ("Pruned Partitions" -> {
+        val parts = prunedPartitions.get.map { part =>
+          val spec = part.spec.map { case (k, v) => s"$k=$v" }.mkString(", ")
+          if (part.storage.serde.nonEmpty && part.storage.serde != tableMeta.storage.serde) {
+            s"($spec, ${part.storage.serde.get})"
+          } else {
+            s"($spec)"
+          }
+        }
+        truncatedString(parts, "[", ", ", "]", maxFields)
+      })
+    }
+
+    val metadataEntries = metadata.toSeq.map {
+      case (key, value) if key == "CatalogTable" => value
+      case (key, value) =>
+        key + ": " + StringUtils.abbreviate(value, SQLConf.get.maxMetadataStringLength)
+    }
+
+    val metadataStr = truncatedString(metadataEntries, "[", ", ", "]", maxFields)
+    s"$nodeName $metadataStr"
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
index 67d7ed0841abb..bdccfccbc5bdb 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
@@ -18,6 +18,8 @@
 package org.apache.spark.sql.hive.execution
 
 import org.apache.spark.sql.Row
+import org.apache.spark.sql.functions.col
+import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.test.{TestHive, TestHiveSingleton}
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.hive.test.TestHive.implicits._
@@ -187,6 +189,65 @@ class HiveTableScanSuite extends HiveComparisonTest with SQLTestUtils with TestH
     }
   }
 
+  test("SPARK-32867: When explain, HiveTableRelation show limited message") {
+    withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
+      withTable("df") {
+        spark.range(30)
+          .select(col("id"), col("id").as("k"))
+          .write
+          .partitionBy("k")
+          .format("hive")
+          .mode("overwrite")
+          .saveAsTable("df")
+
+        val scan1 = getHiveTableScanExec("SELECT * FROM df WHERE df.k < 3")
+        assert(scan1.simpleString(100).replaceAll("#\\d+L", "") ==
+          "Scan hive default.df [id, k]," +
+            " HiveTableRelation [" +
+            "`default`.`df`," +
+            " org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe," +
+            " Data Cols: [id]," +
+            " Partition Cols: [k]," +
+            " Pruned Partitions: [(k=0), (k=1), (k=2)]" +
+            "]," +
+            " [isnotnull(k), (k < 3)]")
+
+        val scan2 = getHiveTableScanExec("SELECT * FROM df WHERE df.k < 30")
+        assert(scan2.simpleString(100).replaceAll("#\\d+L", "") ==
+          "Scan hive default.df [id, k]," +
+            " HiveTableRelation [" +
+            "`default`.`df`," +
+            " org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe," +
+            " Data Cols: [id]," +
+            " Partition Cols: [k]," +
+            " Pruned Partitions: [(k=0), (k=1), (k=10), (k=11), (k=12), (k=13), (k=14), (k=15)," +
+            " (k=16), (k=17), (k=18), (k=19), (k..." +
+            "]," +
+            " [isnotnull(k), (k < 30)]")
+
+        sql(
+          """
+            |ALTER TABLE df PARTITION (k=10) SET SERDE
+            |'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe';
+          """.stripMargin)
+
+        val scan3 = getHiveTableScanExec("SELECT * FROM df WHERE df.k < 30")
+        assert(scan3.simpleString(100).replaceAll("#\\d+L", "") ==
+          "Scan hive default.df [id, k]," +
+            " HiveTableRelation [" +
+            "`default`.`df`," +
+            " org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe," +
+            " Data Cols: [id]," +
+            " Partition Cols: [k]," +
+            " Pruned Partitions: [(k=0), (k=1)," +
+            " (k=10, org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe)," +
+            " (k=11), (k=12), (k=1..." +
+            "]," +
+            " [isnotnull(k), (k < 30)]")
+      }
+    }
+  }
+
   private def getHiveTableScanExec(query: String): HiveTableScanExec = {
     sql(query).queryExecution.sparkPlan.collectFirst {
       case p: HiveTableScanExec => p

From d01594e8d186e63a6c3ce361e756565e830d5237 Mon Sep 17 00:00:00 2001
From: Zhen Li <zhli@microsoft.com>
Date: Mon, 21 Sep 2020 09:05:40 -0500
Subject: [PATCH 0078/1009] [SPARK-32886][WEBUI] fix 'undefined' link in event
 timeline view

### What changes were proposed in this pull request?

Fix ".../jobs/undefined" link from "Event Timeline" in jobs page. Job page link in "Event Timeline" view is constructed by fetching job page link defined in job list below. when job count exceeds page size of job table, only links of jobs in job table can be fetched from page. Other jobs' link would be 'undefined', and links of them in "Event Timeline" are broken, they are redirected to some wired URL like ".../jobs/undefined". This PR is fixing this wrong link issue. With this PR, job link in "Event Timeline" view would always redirect to correct job page.

### Why are the changes needed?

Wrong link (".../jobs/undefined") in "Event Timeline" of jobs page. for example, the first job in below page is not in table below, as job count(116) exceeds page size(100). When clicking it's item in "Event Timeline", page is redirected to ".../jobs/undefined", which is wrong. Links in "Event Timeline" should always be correct.
![undefinedlink](https://user-images.githubusercontent.com/10524738/93184779-83fa6d80-f6f1-11ea-8a80-1a304ca9cbb2.JPG)

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Manually tested.

Closes #29757 from zhli1142015/fix-link-event-timeline-view.

Authored-by: Zhen Li <zhli@microsoft.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../apache/spark/ui/static/timeline-view.js   | 53 ++++++++++++-------
 1 file changed, 33 insertions(+), 20 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js
index 5be8cffd1f8db..220b76a0f1b27 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.js
@@ -42,26 +42,31 @@ function drawApplicationTimeline(groupArray, eventObjArray, startTime, offset) {
   setupZoomable("#application-timeline-zoom-lock", applicationTimeline);
   setupExecutorEventAction();
 
+  function getIdForJobEntry(baseElem) {
+    var jobIdText = $($(baseElem).find(".application-timeline-content")[0]).text();
+    var jobId = jobIdText.match("\\(Job (\\d+)\\)$")[1];
+    return jobId;
+  }
+
+  function getSelectorForJobEntry(jobId) {
+    return "#job-" + jobId;
+  }
+
   function setupJobEventAction() {
     $(".vis-item.vis-range.job.application-timeline-object").each(function() {
-      var getSelectorForJobEntry = function(baseElem) {
-        var jobIdText = $($(baseElem).find(".application-timeline-content")[0]).text();
-        var jobId = jobIdText.match("\\(Job (\\d+)\\)$")[1];
-       return "#job-" + jobId;
-      };
-
       $(this).click(function() {
-        var jobPagePath = $(getSelectorForJobEntry(this)).find("a.name-link").attr("href");
-          window.location.href = jobPagePath
+        var jobId = getIdForJobEntry(this);
+        var jobPagePath = uiRoot + appBasePath + "/jobs/job/?id=" + jobId;
+        window.location.href = jobPagePath;
       });
 
       $(this).hover(
         function() {
-          $(getSelectorForJobEntry(this)).addClass("corresponding-item-hover");
+          $(getSelectorForJobEntry(getIdForJobEntry(this))).addClass("corresponding-item-hover");
           $($(this).find("div.application-timeline-content")[0]).tooltip("show");
         },
         function() {
-          $(getSelectorForJobEntry(this)).removeClass("corresponding-item-hover");
+          $(getSelectorForJobEntry(getIdForJobEntry(this))).removeClass("corresponding-item-hover");
           $($(this).find("div.application-timeline-content")[0]).tooltip("hide");
         }
       );
@@ -125,26 +130,34 @@ function drawJobTimeline(groupArray, eventObjArray, startTime, offset) {
   setupZoomable("#job-timeline-zoom-lock", jobTimeline);
   setupExecutorEventAction();
 
+  function getStageIdAndAttemptForStageEntry(baseElem) {
+    var stageIdText = $($(baseElem).find(".job-timeline-content")[0]).text();
+    var stageIdAndAttempt = stageIdText.match("\\(Stage (\\d+\\.\\d+)\\)$")[1].split(".");
+    return stageIdAndAttempt;
+  }
+
+  function getSelectorForStageEntry(stageIdAndAttempt) {
+    return "#stage-" + stageIdAndAttempt[0] + "-" + stageIdAndAttempt[1];
+  }
+
   function setupStageEventAction() {
     $(".vis-item.vis-range.stage.job-timeline-object").each(function() {
-      var getSelectorForStageEntry = function(baseElem) {
-        var stageIdText = $($(baseElem).find(".job-timeline-content")[0]).text();
-        var stageIdAndAttempt = stageIdText.match("\\(Stage (\\d+\\.\\d+)\\)$")[1].split(".");
-        return "#stage-" + stageIdAndAttempt[0] + "-" + stageIdAndAttempt[1];
-      };
-
       $(this).click(function() {
-        var stagePagePath = $(getSelectorForStageEntry(this)).find("a.name-link").attr("href")
-        window.location.href = stagePagePath
+        var stageIdAndAttempt = getStageIdAndAttemptForStageEntry(this);
+        var stagePagePath = uiRoot + appBasePath +
+          "/stages/stage/?id=" + stageIdAndAttempt[0] + "&attempt=" + stageIdAndAttempt[1];
+        window.location.href = stagePagePath;
       });
 
       $(this).hover(
         function() {
-          $(getSelectorForStageEntry(this)).addClass("corresponding-item-hover");
+          $(getSelectorForStageEntry(getStageIdAndAttemptForStageEntry(this)))
+            .addClass("corresponding-item-hover");
           $($(this).find("div.job-timeline-content")[0]).tooltip("show");
         },
         function() {
-          $(getSelectorForStageEntry(this)).removeClass("corresponding-item-hover");
+          $(getSelectorForStageEntry(getStageIdAndAttemptForStageEntry(this)))
+            .removeClass("corresponding-item-hover");
           $($(this).find("div.job-timeline-content")[0]).tooltip("hide");
         }
       );

From 5440ea84eeb2008d70cf890f0e3765167c2b6a62 Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@databricks.com>
Date: Tue, 22 Sep 2020 11:04:14 +0900
Subject: [PATCH 0079/1009] [SPARK-32312][DOC][FOLLOWUP] Fix the minimum
 version of PyArrow in the installation guide

### What changes were proposed in this pull request?

Now that the minimum version of PyArrow is `1.0.0`, we should update the version in the installation guide.

### Why are the changes needed?

The minimum version of PyArrow was upgraded to `1.0.0`.

### Does this PR introduce _any_ user-facing change?

Users see the correct minimum version in the installation guide.

### How was this patch tested?

N/A

Closes #29829 from ueshin/issues/SPARK-32312/doc.

Authored-by: Takuya UESHIN <ueshin@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/docs/source/getting_started/install.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst
index 03570e6626d90..8516d514c7c72 100644
--- a/python/docs/source/getting_started/install.rst
+++ b/python/docs/source/getting_started/install.rst
@@ -129,7 +129,7 @@ Package       Minimum supported version Note
 ============= ========================= ================
 `pandas`      0.23.2                    Optional for SQL
 `NumPy`       1.7                       Required for ML 
-`pyarrow`     0.15.1                    Optional for SQL
+`pyarrow`     1.0.0                     Optional for SQL
 `Py4J`        0.10.9                    Required
 ============= ========================= ================
 

From f03c03576a34e6888da6eeb870dae1f6189b62c1 Mon Sep 17 00:00:00 2001
From: Peter Toth <peter.toth@gmail.com>
Date: Mon, 21 Sep 2020 21:43:17 -0700
Subject: [PATCH 0080/1009] [SPARK-32951][SQL] Foldable propagation from
 Aggregate

### What changes were proposed in this pull request?
This PR adds foldable propagation from `Aggregate` as per: https://github.com/apache/spark/pull/29771#discussion_r490412031

### Why are the changes needed?
This is an improvement as `Aggregate`'s `aggregateExpressions` can contain foldables that can be propagated up.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
New UT.

Closes #29816 from peter-toth/SPARK-32951-foldable-propagation-from-aggregate.

Authored-by: Peter Toth <peter.toth@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/catalyst/optimizer/expressions.scala  |  19 +-
 .../optimizer/FoldablePropagationSuite.scala  |  12 +
 .../q14a.sf100/explain.txt                    |  50 +-
 .../q14a.sf100/simplified.txt                 |  12 +-
 .../approved-plans-v1_4/q14a/explain.txt      |  50 +-
 .../approved-plans-v1_4/q14a/simplified.txt   |  12 +-
 .../q14b.sf100/explain.txt                    |  30 +-
 .../q14b.sf100/simplified.txt                 |  10 +-
 .../approved-plans-v1_4/q14b/explain.txt      |  30 +-
 .../approved-plans-v1_4/q14b/simplified.txt   |  10 +-
 .../approved-plans-v1_4/q41.sf100/explain.txt |  12 +-
 .../q41.sf100/simplified.txt                  |   4 +-
 .../approved-plans-v1_4/q41/explain.txt       |  12 +-
 .../approved-plans-v1_4/q41/simplified.txt    |   4 +-
 .../approved-plans-v2_7/q14.sf100/explain.txt |  30 +-
 .../q14.sf100/simplified.txt                  |  10 +-
 .../approved-plans-v2_7/q14/explain.txt       |  30 +-
 .../approved-plans-v2_7/q14/simplified.txt    |  10 +-
 .../q14a.sf100/explain.txt                    | 530 +++++++++---------
 .../q14a.sf100/simplified.txt                 |  60 +-
 .../approved-plans-v2_7/q14a/explain.txt      | 530 +++++++++---------
 .../approved-plans-v2_7/q14a/simplified.txt   |  60 +-
 22 files changed, 775 insertions(+), 752 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index c4e4b25d570dd..0e7a39c54050e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -633,11 +633,16 @@ object FoldablePropagation extends Rule[LogicalPlan] {
         val (newChild, foldableMap) = propagateFoldables(p.child)
         val newProject =
           replaceFoldable(p.withNewChildren(Seq(newChild)).asInstanceOf[Project], foldableMap)
-        val newFoldableMap = AttributeMap(newProject.projectList.collect {
-          case a: Alias if a.child.foldable => (a.toAttribute, a)
-        })
+        val newFoldableMap = collectFoldables(newProject.projectList)
         (newProject, newFoldableMap)
 
+      case a: Aggregate =>
+        val (newChild, foldableMap) = propagateFoldables(a.child)
+        val newAggregate =
+          replaceFoldable(a.withNewChildren(Seq(newChild)).asInstanceOf[Aggregate], foldableMap)
+        val newFoldableMap = collectFoldables(newAggregate.aggregateExpressions)
+        (newAggregate, newFoldableMap)
+
       // We can not replace the attributes in `Expand.output`. If there are other non-leaf
       // operators that have the `output` field, we should put them here too.
       case e: Expand =>
@@ -703,6 +708,12 @@ object FoldablePropagation extends Rule[LogicalPlan] {
     }
   }
 
+  private def collectFoldables(expressions: Seq[NamedExpression]) = {
+    AttributeMap(expressions.collect {
+      case a: Alias if a.child.foldable => (a.toAttribute, a)
+    })
+  }
+
   /**
    * List of all [[UnaryNode]]s which allow foldable propagation.
    */
@@ -710,7 +721,7 @@ object FoldablePropagation extends Rule[LogicalPlan] {
     // Handling `Project` is moved to `propagateFoldables`.
     case _: Filter => true
     case _: SubqueryAlias => true
-    case _: Aggregate => true
+    // Handling `Aggregate` is moved to `propagateFoldables`.
     case _: Window => true
     case _: Sample => true
     case _: GlobalLimit => true
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
index 59dfd3a7932bd..fe43e8e288673 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
@@ -192,4 +192,16 @@ class FoldablePropagationSuite extends PlanTest {
     val optimized = Optimize.execute(query)
     comparePlans(optimized, query)
   }
+
+  test("SPARK-32951: Foldable propagation from Aggregate") {
+    val query = testRelation
+      .groupBy('a)('a, sum('b).as('b), Literal(1).as('c))
+      .select('a, 'b, 'c)
+
+    val optimized = Optimize.execute(query.analyze)
+    val correctAnswer = testRelation
+      .groupBy('a)('a, sum('b).as('b), Literal(1).as('c))
+      .select('a, 'b, Literal(1).as('c)).analyze
+    comparePlans(optimized, correctAnswer)
+  }
 }
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt
index c3e9f9418cef5..b346701fa3148 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt
@@ -517,15 +517,15 @@ Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#36, isEmpty#37, cou
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#40, count(1)#41]
-Results [7]: [store AS channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#40 AS sales#43, count(1)#41 AS number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#40 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#40 AS sales#42, count(1)#41 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#40 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44]
 
 (86) Filter [codegen id : 39]
-Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45 as decimal(32,6)) > cast(Subquery scalar-subquery#46, [id=#47] as decimal(32,6))))
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44 as decimal(32,6)) > cast(Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
 
 (87) Project [codegen id : 39]
-Output [6]: [sales#43, number_sales#44, channel#42, i_brand_id#7, i_class_id#8, i_category_id#9]
-Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45]
+Output [6]: [sales#42, number_sales#43, store AS channel#47, i_brand_id#7, i_class_id#8, i_category_id#9]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44]
 
 (88) Scan parquet default.catalog_sales
 Output [4]: [cs_sold_date_sk#18, cs_item_sk#19, cs_quantity#48, cs_list_price#49]
@@ -601,15 +601,15 @@ Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#54, isEmpty#55, cou
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#58, count(1)#59]
-Results [7]: [catalog AS channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#58 AS sales#61, count(1)#59 AS number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#58 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#58 AS sales#60, count(1)#59 AS number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#58 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#62]
 
 (105) Filter [codegen id : 78]
-Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6))))
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#62]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#62) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#62 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
 
 (106) Project [codegen id : 78]
-Output [6]: [sales#61, number_sales#62, channel#60, i_brand_id#7, i_class_id#8, i_category_id#9]
-Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63]
+Output [6]: [sales#60, number_sales#61, catalog AS channel#63, i_brand_id#7, i_class_id#8, i_category_id#9]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#62]
 
 (107) Scan parquet default.web_sales
 Output [4]: [ws_sold_date_sk#22, ws_item_sk#23, ws_quantity#64, ws_list_price#65]
@@ -685,26 +685,26 @@ Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#70, isEmpty#71, cou
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#74, count(1)#75]
-Results [7]: [web AS channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#74 AS sales#77, count(1)#75 AS number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#74 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#74 AS sales#76, count(1)#75 AS number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#74 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#78]
 
 (124) Filter [codegen id : 117]
-Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6))))
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#78]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#78) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#78 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
 
 (125) Project [codegen id : 117]
-Output [6]: [sales#77, number_sales#78, channel#76, i_brand_id#7, i_class_id#8, i_category_id#9]
-Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79]
+Output [6]: [sales#76, number_sales#77, web AS channel#79, i_brand_id#7, i_class_id#8, i_category_id#9]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#78]
 
 (126) Union
 
 (127) Expand [codegen id : 118]
-Input [6]: [sales#43, number_sales#44, channel#42, i_brand_id#7, i_class_id#8, i_category_id#9]
-Arguments: [List(sales#43, number_sales#44, channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, 0), List(sales#43, number_sales#44, channel#42, i_brand_id#7, i_class_id#8, null, 1), List(sales#43, number_sales#44, channel#42, i_brand_id#7, null, null, 3), List(sales#43, number_sales#44, channel#42, null, null, null, 7), List(sales#43, number_sales#44, null, null, null, null, 15)], [sales#43, number_sales#44, channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84]
+Input [6]: [sales#42, number_sales#43, channel#47, i_brand_id#7, i_class_id#8, i_category_id#9]
+Arguments: [List(sales#42, number_sales#43, channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 0), List(sales#42, number_sales#43, channel#47, i_brand_id#7, i_class_id#8, null, 1), List(sales#42, number_sales#43, channel#47, i_brand_id#7, null, null, 3), List(sales#42, number_sales#43, channel#47, null, null, null, 7), List(sales#42, number_sales#43, null, null, null, null, 15)], [sales#42, number_sales#43, channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84]
 
 (128) HashAggregate [codegen id : 118]
-Input [7]: [sales#43, number_sales#44, channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84]
+Input [7]: [sales#42, number_sales#43, channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84]
 Keys [5]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84]
-Functions [2]: [partial_sum(sales#43), partial_sum(number_sales#44)]
+Functions [2]: [partial_sum(sales#42), partial_sum(number_sales#43)]
 Aggregate Attributes [3]: [sum#85, isEmpty#86, sum#87]
 Results [8]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84, sum#88, isEmpty#89, sum#90]
 
@@ -715,9 +715,9 @@ Arguments: hashpartitioning(channel#80, i_brand_id#81, i_class_id#82, i_category
 (130) HashAggregate [codegen id : 119]
 Input [8]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84, sum#88, isEmpty#89, sum#90]
 Keys [5]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, spark_grouping_id#84]
-Functions [2]: [sum(sales#43), sum(number_sales#44)]
-Aggregate Attributes [2]: [sum(sales#43)#92, sum(number_sales#44)#93]
-Results [6]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, sum(sales#43)#92 AS sum(sales)#94, sum(number_sales#44)#93 AS sum(number_sales)#95]
+Functions [2]: [sum(sales#42), sum(number_sales#43)]
+Aggregate Attributes [2]: [sum(sales#42)#92, sum(number_sales#43)#93]
+Results [6]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, sum(sales#42)#92 AS sum(sales)#94, sum(number_sales#43)#93 AS sum(number_sales)#95]
 
 (131) TakeOrderedAndProject
 Input [6]: [channel#80, i_brand_id#81, i_class_id#82, i_category_id#83, sum(sales)#94, sum(number_sales)#95]
@@ -725,7 +725,7 @@ Arguments: 100, [channel#80 ASC NULLS FIRST, i_brand_id#81 ASC NULLS FIRST, i_cl
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 86 Hosting Expression = Subquery scalar-subquery#46, [id=#47]
+Subquery:1 Hosting operator id = 86 Hosting Expression = Subquery scalar-subquery#45, [id=#46]
 * HashAggregate (157)
 +- Exchange (156)
    +- * HashAggregate (155)
@@ -871,8 +871,8 @@ Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#97 as de
 Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#97 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#98 as decimal(12,2)))), DecimalType(18,2), true))#108]
 Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#97 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#98 as decimal(12,2)))), DecimalType(18,2), true))#108 AS average_sales#109]
 
-Subquery:2 Hosting operator id = 105 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47]
+Subquery:2 Hosting operator id = 105 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
 
-Subquery:3 Hosting operator id = 124 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47]
+Subquery:3 Hosting operator id = 124 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt
index c6dbfcaa3fe43..5b93392d023db 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt
@@ -9,7 +9,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                 InputAdapter
                   Union
                     WholeStageCodegen (39)
-                      Project [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id]
+                      Project [sales,number_sales,i_brand_id,i_class_id,i_category_id]
                         Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                           Subquery #1
                             WholeStageCodegen (8)
@@ -53,7 +53,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                                         Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price]
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk] #17
-                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                             InputAdapter
                               Exchange [i_brand_id,i_class_id,i_category_id] #2
                                 WholeStageCodegen (38)
@@ -189,10 +189,10 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                                   InputAdapter
                                                     ReusedExchange [ss_item_sk] #4
                     WholeStageCodegen (78)
-                      Project [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id]
+                      Project [sales,number_sales,i_brand_id,i_class_id,i_category_id]
                         Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                           ReusedSubquery [average_sales] #1
-                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                             InputAdapter
                               Exchange [i_brand_id,i_class_id,i_category_id] #18
                                 WholeStageCodegen (77)
@@ -221,10 +221,10 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                         InputAdapter
                                           ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #14
                     WholeStageCodegen (117)
-                      Project [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id]
+                      Project [sales,number_sales,i_brand_id,i_class_id,i_category_id]
                         Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                           ReusedSubquery [average_sales] #1
-                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                             InputAdapter
                               Exchange [i_brand_id,i_class_id,i_category_id] #20
                                 WholeStageCodegen (116)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt
index c1b77321f16e6..3f0cc9e7acb1e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt
@@ -461,15 +461,15 @@ Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#33, isEmpty#34, cou
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37, count(1)#38]
-Results [7]: [store AS channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37 AS sales#40, count(1)#38 AS number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37 AS sales#39, count(1)#38 AS number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41]
 
 (76) Filter [codegen id : 26]
-Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42 as decimal(32,6)) > cast(Subquery scalar-subquery#43, [id=#44] as decimal(32,6))))
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41 as decimal(32,6)) > cast(Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
 
 (77) Project [codegen id : 26]
-Output [6]: [sales#40, number_sales#41, channel#39, i_brand_id#6, i_class_id#7, i_category_id#8]
-Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42]
+Output [6]: [sales#39, number_sales#40, store AS channel#44, i_brand_id#6, i_class_id#7, i_category_id#8]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41]
 
 (78) Scan parquet default.catalog_sales
 Output [4]: [cs_sold_date_sk#16, cs_item_sk#17, cs_quantity#45, cs_list_price#46]
@@ -533,15 +533,15 @@ Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#50, isEmpty#51, cou
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54, count(1)#55]
-Results [7]: [catalog AS channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54 AS sales#57, count(1)#55 AS number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54 AS sales#56, count(1)#55 AS number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58]
 
 (92) Filter [codegen id : 52]
-Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6))))
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
 
 (93) Project [codegen id : 52]
-Output [6]: [sales#57, number_sales#58, channel#56, i_brand_id#6, i_class_id#7, i_category_id#8]
-Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59]
+Output [6]: [sales#56, number_sales#57, catalog AS channel#59, i_brand_id#6, i_class_id#7, i_category_id#8]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58]
 
 (94) Scan parquet default.web_sales
 Output [4]: [ws_sold_date_sk#20, ws_item_sk#21, ws_quantity#60, ws_list_price#61]
@@ -605,26 +605,26 @@ Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#65, isEmpty#66, cou
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69, count(1)#70]
-Results [7]: [web AS channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69 AS sales#72, count(1)#70 AS number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69 AS sales#71, count(1)#70 AS number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73]
 
 (108) Filter [codegen id : 78]
-Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6))))
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
 
 (109) Project [codegen id : 78]
-Output [6]: [sales#72, number_sales#73, channel#71, i_brand_id#6, i_class_id#7, i_category_id#8]
-Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74]
+Output [6]: [sales#71, number_sales#72, web AS channel#74, i_brand_id#6, i_class_id#7, i_category_id#8]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73]
 
 (110) Union
 
 (111) Expand [codegen id : 79]
-Input [6]: [sales#40, number_sales#41, channel#39, i_brand_id#6, i_class_id#7, i_category_id#8]
-Arguments: [List(sales#40, number_sales#41, channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, 0), List(sales#40, number_sales#41, channel#39, i_brand_id#6, i_class_id#7, null, 1), List(sales#40, number_sales#41, channel#39, i_brand_id#6, null, null, 3), List(sales#40, number_sales#41, channel#39, null, null, null, 7), List(sales#40, number_sales#41, null, null, null, null, 15)], [sales#40, number_sales#41, channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79]
+Input [6]: [sales#39, number_sales#40, channel#44, i_brand_id#6, i_class_id#7, i_category_id#8]
+Arguments: [List(sales#39, number_sales#40, channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 0), List(sales#39, number_sales#40, channel#44, i_brand_id#6, i_class_id#7, null, 1), List(sales#39, number_sales#40, channel#44, i_brand_id#6, null, null, 3), List(sales#39, number_sales#40, channel#44, null, null, null, 7), List(sales#39, number_sales#40, null, null, null, null, 15)], [sales#39, number_sales#40, channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79]
 
 (112) HashAggregate [codegen id : 79]
-Input [7]: [sales#40, number_sales#41, channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79]
+Input [7]: [sales#39, number_sales#40, channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79]
 Keys [5]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79]
-Functions [2]: [partial_sum(sales#40), partial_sum(number_sales#41)]
+Functions [2]: [partial_sum(sales#39), partial_sum(number_sales#40)]
 Aggregate Attributes [3]: [sum#80, isEmpty#81, sum#82]
 Results [8]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79, sum#83, isEmpty#84, sum#85]
 
@@ -635,9 +635,9 @@ Arguments: hashpartitioning(channel#75, i_brand_id#76, i_class_id#77, i_category
 (114) HashAggregate [codegen id : 80]
 Input [8]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79, sum#83, isEmpty#84, sum#85]
 Keys [5]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, spark_grouping_id#79]
-Functions [2]: [sum(sales#40), sum(number_sales#41)]
-Aggregate Attributes [2]: [sum(sales#40)#87, sum(number_sales#41)#88]
-Results [6]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, sum(sales#40)#87 AS sum(sales)#89, sum(number_sales#41)#88 AS sum(number_sales)#90]
+Functions [2]: [sum(sales#39), sum(number_sales#40)]
+Aggregate Attributes [2]: [sum(sales#39)#87, sum(number_sales#40)#88]
+Results [6]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, sum(sales#39)#87 AS sum(sales)#89, sum(number_sales#40)#88 AS sum(number_sales)#90]
 
 (115) TakeOrderedAndProject
 Input [6]: [channel#75, i_brand_id#76, i_class_id#77, i_category_id#78, sum(sales)#89, sum(number_sales)#90]
@@ -645,7 +645,7 @@ Arguments: 100, [channel#75 ASC NULLS FIRST, i_brand_id#76 ASC NULLS FIRST, i_cl
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 76 Hosting Expression = Subquery scalar-subquery#43, [id=#44]
+Subquery:1 Hosting operator id = 76 Hosting Expression = Subquery scalar-subquery#42, [id=#43]
 * HashAggregate (141)
 +- Exchange (140)
    +- * HashAggregate (139)
@@ -791,8 +791,8 @@ Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#92 as de
 Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#92 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#93 as decimal(12,2)))), DecimalType(18,2), true))#103]
 Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#92 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#93 as decimal(12,2)))), DecimalType(18,2), true))#103 AS average_sales#104]
 
-Subquery:2 Hosting operator id = 92 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44]
+Subquery:2 Hosting operator id = 92 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
 
-Subquery:3 Hosting operator id = 108 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44]
+Subquery:3 Hosting operator id = 108 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt
index 604bd792f5ffd..dfa8c1bcc1579 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt
@@ -9,7 +9,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                 InputAdapter
                   Union
                     WholeStageCodegen (26)
-                      Project [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id]
+                      Project [sales,number_sales,i_brand_id,i_class_id,i_category_id]
                         Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                           Subquery #1
                             WholeStageCodegen (8)
@@ -53,7 +53,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                                         Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price]
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk] #14
-                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                             InputAdapter
                               Exchange [i_brand_id,i_class_id,i_category_id] #2
                                 WholeStageCodegen (25)
@@ -165,10 +165,10 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                                     InputAdapter
                                                       Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
                     WholeStageCodegen (52)
-                      Project [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id]
+                      Project [sales,number_sales,i_brand_id,i_class_id,i_category_id]
                         Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                           ReusedSubquery [average_sales] #1
-                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                             InputAdapter
                               Exchange [i_brand_id,i_class_id,i_category_id] #15
                                 WholeStageCodegen (51)
@@ -189,10 +189,10 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                         InputAdapter
                                           ReusedExchange [d_date_sk] #12
                     WholeStageCodegen (78)
-                      Project [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id]
+                      Project [sales,number_sales,i_brand_id,i_class_id,i_category_id]
                         Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                           ReusedSubquery [average_sales] #1
-                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                             InputAdapter
                               Exchange [i_brand_id,i_class_id,i_category_id] #16
                                 WholeStageCodegen (77)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt
index f71ceaaf91f47..2d2b56e32bdb8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt
@@ -496,15 +496,15 @@ Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#38, isEmpty#39, cou
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42, count(1)#43]
-Results [7]: [store AS channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42 AS sales#45, count(1)#43 AS number_sales#46, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42 AS sales#44, count(1)#43 AS number_sales#45, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#46]
 
 (86) Filter [codegen id : 78]
-Input [7]: [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47 as decimal(32,6)) > cast(Subquery scalar-subquery#48, [id=#49] as decimal(32,6))))
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#44, number_sales#45, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#46]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#46) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#46 as decimal(32,6)) > cast(Subquery scalar-subquery#47, [id=#48] as decimal(32,6))))
 
 (87) Project [codegen id : 78]
-Output [6]: [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46]
-Input [7]: [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47]
+Output [6]: [store AS channel#49, i_brand_id#7, i_class_id#8, i_category_id#9, sales#44, number_sales#45]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#44, number_sales#45, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#46]
 
 (88) ReusedExchange [Reuses operator id: 4]
 Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4]
@@ -584,18 +584,18 @@ Input [6]: [i_brand_id#54, i_class_id#55, i_category_id#56, sum#60, isEmpty#61,
 Keys [3]: [i_brand_id#54, i_class_id#55, i_category_id#56]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#64, count(1)#65]
-Results [7]: [store AS channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#64 AS sales#67, count(1)#65 AS number_sales#68, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#64 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69]
+Results [6]: [i_brand_id#54, i_class_id#55, i_category_id#56, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#64 AS sales#66, count(1)#65 AS number_sales#67, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#64 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#68]
 
 (106) Filter [codegen id : 77]
-Input [7]: [channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#48, [id=#49] as decimal(32,6))))
+Input [6]: [i_brand_id#54, i_class_id#55, i_category_id#56, sales#66, number_sales#67, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#68]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#68) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#68 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#47, [id=#48] as decimal(32,6))))
 
 (107) Project [codegen id : 77]
-Output [6]: [channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68]
-Input [7]: [channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69]
+Output [6]: [store AS channel#69, i_brand_id#54, i_class_id#55, i_category_id#56, sales#66, number_sales#67]
+Input [6]: [i_brand_id#54, i_class_id#55, i_category_id#56, sales#66, number_sales#67, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#68]
 
 (108) BroadcastExchange
-Input [6]: [channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68]
+Input [6]: [channel#69, i_brand_id#54, i_class_id#55, i_category_id#56, sales#66, number_sales#67]
 Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [id=#70]
 
 (109) BroadcastHashJoin [codegen id : 78]
@@ -604,12 +604,12 @@ Right keys [3]: [i_brand_id#54, i_class_id#55, i_category_id#56]
 Join condition: None
 
 (110) TakeOrderedAndProject
-Input [12]: [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46, channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68]
-Arguments: 100, [channel#44 ASC NULLS FIRST, i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST], [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46, channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68]
+Input [12]: [channel#49, i_brand_id#7, i_class_id#8, i_category_id#9, sales#44, number_sales#45, channel#69, i_brand_id#54, i_class_id#55, i_category_id#56, sales#66, number_sales#67]
+Arguments: 100, [i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST], [channel#49, i_brand_id#7, i_class_id#8, i_category_id#9, sales#44, number_sales#45, channel#69, i_brand_id#54, i_class_id#55, i_category_id#56, sales#66, number_sales#67]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 86 Hosting Expression = Subquery scalar-subquery#48, [id=#49]
+Subquery:1 Hosting operator id = 86 Hosting Expression = Subquery scalar-subquery#47, [id=#48]
 * HashAggregate (136)
 +- Exchange (135)
    +- * HashAggregate (134)
@@ -780,7 +780,7 @@ Condition : (((((isnotnull(d_year#11) AND isnotnull(d_moy#89)) AND isnotnull(d_d
 Output [1]: [d_week_seq#29]
 Input [4]: [d_week_seq#29, d_year#11, d_moy#89, d_dom#90]
 
-Subquery:3 Hosting operator id = 106 Hosting Expression = ReusedSubquery Subquery scalar-subquery#48, [id=#49]
+Subquery:3 Hosting operator id = 106 Hosting Expression = ReusedSubquery Subquery scalar-subquery#47, [id=#48]
 
 Subquery:4 Hosting operator id = 95 Hosting Expression = Subquery scalar-subquery#50, [id=#51]
 * Project (144)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt
index 37186560cb3b8..d6b8ba4395d2e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt
@@ -1,7 +1,7 @@
-TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
   WholeStageCodegen (78)
     BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
-      Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+      Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
         Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
           Subquery #2
             WholeStageCodegen (8)
@@ -45,7 +45,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_
                                         Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price]
                                   InputAdapter
                                     ReusedExchange [d_date_sk] #16
-          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
             InputAdapter
               Exchange [i_brand_id,i_class_id,i_category_id] #1
                 WholeStageCodegen (38)
@@ -190,10 +190,10 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_
       InputAdapter
         BroadcastExchange #17
           WholeStageCodegen (77)
-            Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+            Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
               Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                 ReusedSubquery [average_sales] #2
-                HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                   InputAdapter
                     Exchange [i_brand_id,i_class_id,i_category_id] #18
                       WholeStageCodegen (76)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt
index b68ce0e9f2264..1f31ded51f1ef 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt
@@ -446,15 +446,15 @@ Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#35, isEmpty#36, cou
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39, count(1)#40]
-Results [7]: [store AS channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39 AS sales#42, count(1)#40 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39 AS sales#41, count(1)#40 AS number_sales#42, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43]
 
 (76) Filter [codegen id : 52]
-Input [7]: [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44 as decimal(32,6)) > cast(Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43 as decimal(32,6)) > cast(Subquery scalar-subquery#44, [id=#45] as decimal(32,6))))
 
 (77) Project [codegen id : 52]
-Output [6]: [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43]
-Input [7]: [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44]
+Output [6]: [store AS channel#46, i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43]
 
 (78) Scan parquet default.store_sales
 Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4]
@@ -537,18 +537,18 @@ Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#57, isEmpty#58,
 Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61, count(1)#62]
-Results [7]: [store AS channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sales#64, count(1)#62 AS number_sales#65, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66]
+Results [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sales#63, count(1)#62 AS number_sales#64, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65]
 
 (96) Filter [codegen id : 51]
-Input [7]: [channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
+Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#44, [id=#45] as decimal(32,6))))
 
 (97) Project [codegen id : 51]
-Output [6]: [channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65]
-Input [7]: [channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66]
+Output [6]: [store AS channel#66, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64]
+Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65]
 
 (98) BroadcastExchange
-Input [6]: [channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65]
+Input [6]: [channel#66, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64]
 Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [id=#67]
 
 (99) BroadcastHashJoin [codegen id : 52]
@@ -557,12 +557,12 @@ Right keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50]
 Join condition: None
 
 (100) TakeOrderedAndProject
-Input [12]: [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43, channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65]
-Arguments: 100, [channel#41 ASC NULLS FIRST, i_brand_id#6 ASC NULLS FIRST, i_class_id#7 ASC NULLS FIRST, i_category_id#8 ASC NULLS FIRST], [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43, channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65]
+Input [12]: [channel#46, i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42, channel#66, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64]
+Arguments: 100, [i_brand_id#6 ASC NULLS FIRST, i_class_id#7 ASC NULLS FIRST, i_category_id#8 ASC NULLS FIRST], [channel#46, i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42, channel#66, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 76 Hosting Expression = Subquery scalar-subquery#45, [id=#46]
+Subquery:1 Hosting operator id = 76 Hosting Expression = Subquery scalar-subquery#44, [id=#45]
 * HashAggregate (126)
 +- Exchange (125)
    +- * HashAggregate (124)
@@ -733,7 +733,7 @@ Condition : (((((isnotnull(d_year#11) AND isnotnull(d_moy#86)) AND isnotnull(d_d
 Output [1]: [d_week_seq#28]
 Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87]
 
-Subquery:3 Hosting operator id = 96 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
+Subquery:3 Hosting operator id = 96 Hosting Expression = ReusedSubquery Subquery scalar-subquery#44, [id=#45]
 
 Subquery:4 Hosting operator id = 88 Hosting Expression = Subquery scalar-subquery#51, [id=#52]
 * Project (134)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt
index 6e6950d4cb33a..7bbf83e3de707 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt
@@ -1,7 +1,7 @@
-TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
   WholeStageCodegen (52)
     BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
-      Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+      Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
         Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
           Subquery #2
             WholeStageCodegen (8)
@@ -45,7 +45,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_
                                         Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price]
                                   InputAdapter
                                     ReusedExchange [d_date_sk] #13
-          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
             InputAdapter
               Exchange [i_brand_id,i_class_id,i_category_id] #1
                 WholeStageCodegen (25)
@@ -166,10 +166,10 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_
       InputAdapter
         BroadcastExchange #14
           WholeStageCodegen (51)
-            Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+            Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
               Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                 ReusedSubquery [average_sales] #2
-                HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                   InputAdapter
                     Exchange [i_brand_id,i_class_id,i_category_id] #15
                       WholeStageCodegen (50)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/explain.txt
index c5eb50e25d82c..13d73e61e1443 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/explain.txt
@@ -73,19 +73,19 @@ Input [2]: [i_manufact#2, count#9]
 Keys [1]: [i_manufact#2]
 Functions [1]: [count(1)]
 Aggregate Attributes [1]: [count(1)#11]
-Results [3]: [count(1)#11 AS item_cnt#12, i_manufact#2 AS i_manufact#2#13, true AS alwaysTrue#14]
+Results [2]: [count(1)#11 AS item_cnt#12, i_manufact#2 AS i_manufact#2#13]
 
 (12) Filter [codegen id : 2]
-Input [3]: [item_cnt#12, i_manufact#2#13, alwaysTrue#14]
-Condition : (if (isnull(alwaysTrue#14)) 0 else item_cnt#12 > 0)
+Input [2]: [item_cnt#12, i_manufact#2#13]
+Condition : (item_cnt#12 > 0)
 
 (13) Project [codegen id : 2]
 Output [1]: [i_manufact#2#13]
-Input [3]: [item_cnt#12, i_manufact#2#13, alwaysTrue#14]
+Input [2]: [item_cnt#12, i_manufact#2#13]
 
 (14) BroadcastExchange
 Input [1]: [i_manufact#2#13]
-Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#15]
+Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#14]
 
 (15) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [i_manufact#2]
@@ -105,7 +105,7 @@ Results [1]: [i_product_name#3]
 
 (18) Exchange
 Input [1]: [i_product_name#3]
-Arguments: hashpartitioning(i_product_name#3, 5), true, [id=#16]
+Arguments: hashpartitioning(i_product_name#3, 5), true, [id=#15]
 
 (19) HashAggregate [codegen id : 4]
 Input [1]: [i_product_name#3]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/simplified.txt
index 350aa9a3c572b..2d14d75ca9362 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/simplified.txt
@@ -16,8 +16,8 @@ TakeOrderedAndProject [i_product_name]
                     BroadcastExchange #2
                       WholeStageCodegen (2)
                         Project [i_manufact]
-                          Filter [alwaysTrue,item_cnt]
-                            HashAggregate [i_manufact,count] [count(1),item_cnt,i_manufact,alwaysTrue,count]
+                          Filter [item_cnt]
+                            HashAggregate [i_manufact,count] [count(1),item_cnt,i_manufact,count]
                               InputAdapter
                                 Exchange [i_manufact] #3
                                   WholeStageCodegen (1)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt
index c5eb50e25d82c..13d73e61e1443 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt
@@ -73,19 +73,19 @@ Input [2]: [i_manufact#2, count#9]
 Keys [1]: [i_manufact#2]
 Functions [1]: [count(1)]
 Aggregate Attributes [1]: [count(1)#11]
-Results [3]: [count(1)#11 AS item_cnt#12, i_manufact#2 AS i_manufact#2#13, true AS alwaysTrue#14]
+Results [2]: [count(1)#11 AS item_cnt#12, i_manufact#2 AS i_manufact#2#13]
 
 (12) Filter [codegen id : 2]
-Input [3]: [item_cnt#12, i_manufact#2#13, alwaysTrue#14]
-Condition : (if (isnull(alwaysTrue#14)) 0 else item_cnt#12 > 0)
+Input [2]: [item_cnt#12, i_manufact#2#13]
+Condition : (item_cnt#12 > 0)
 
 (13) Project [codegen id : 2]
 Output [1]: [i_manufact#2#13]
-Input [3]: [item_cnt#12, i_manufact#2#13, alwaysTrue#14]
+Input [2]: [item_cnt#12, i_manufact#2#13]
 
 (14) BroadcastExchange
 Input [1]: [i_manufact#2#13]
-Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#15]
+Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#14]
 
 (15) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [i_manufact#2]
@@ -105,7 +105,7 @@ Results [1]: [i_product_name#3]
 
 (18) Exchange
 Input [1]: [i_product_name#3]
-Arguments: hashpartitioning(i_product_name#3, 5), true, [id=#16]
+Arguments: hashpartitioning(i_product_name#3, 5), true, [id=#15]
 
 (19) HashAggregate [codegen id : 4]
 Input [1]: [i_product_name#3]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt
index 350aa9a3c572b..2d14d75ca9362 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt
@@ -16,8 +16,8 @@ TakeOrderedAndProject [i_product_name]
                     BroadcastExchange #2
                       WholeStageCodegen (2)
                         Project [i_manufact]
-                          Filter [alwaysTrue,item_cnt]
-                            HashAggregate [i_manufact,count] [count(1),item_cnt,i_manufact,alwaysTrue,count]
+                          Filter [item_cnt]
+                            HashAggregate [i_manufact,count] [count(1),item_cnt,i_manufact,count]
                               InputAdapter
                                 Exchange [i_manufact] #3
                                   WholeStageCodegen (1)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt
index 25a1ca79cc500..dad6098ce4685 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt
@@ -496,15 +496,15 @@ Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#38, isEmpty#39, cou
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42, count(1)#43]
-Results [7]: [store AS channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42 AS sales#45, count(1)#43 AS number_sales#46, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42 AS sales#44, count(1)#43 AS number_sales#45, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#46]
 
 (86) Filter [codegen id : 78]
-Input [7]: [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47 as decimal(32,6)) > cast(Subquery scalar-subquery#48, [id=#49] as decimal(32,6))))
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#44, number_sales#45, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#46]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#46) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#46 as decimal(32,6)) > cast(Subquery scalar-subquery#47, [id=#48] as decimal(32,6))))
 
 (87) Project [codegen id : 78]
-Output [6]: [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46]
-Input [7]: [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#47]
+Output [6]: [store AS channel#49, i_brand_id#7, i_class_id#8, i_category_id#9, sales#44, number_sales#45]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#44, number_sales#45, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#46]
 
 (88) ReusedExchange [Reuses operator id: 4]
 Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4]
@@ -584,18 +584,18 @@ Input [6]: [i_brand_id#54, i_class_id#55, i_category_id#56, sum#60, isEmpty#61,
 Keys [3]: [i_brand_id#54, i_class_id#55, i_category_id#56]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#64, count(1)#65]
-Results [7]: [store AS channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#64 AS sales#67, count(1)#65 AS number_sales#68, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#64 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69]
+Results [6]: [i_brand_id#54, i_class_id#55, i_category_id#56, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#64 AS sales#66, count(1)#65 AS number_sales#67, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#64 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#68]
 
 (106) Filter [codegen id : 77]
-Input [7]: [channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#48, [id=#49] as decimal(32,6))))
+Input [6]: [i_brand_id#54, i_class_id#55, i_category_id#56, sales#66, number_sales#67, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#68]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#68) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#68 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#47, [id=#48] as decimal(32,6))))
 
 (107) Project [codegen id : 77]
-Output [6]: [channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68]
-Input [7]: [channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#69]
+Output [6]: [store AS channel#69, i_brand_id#54, i_class_id#55, i_category_id#56, sales#66, number_sales#67]
+Input [6]: [i_brand_id#54, i_class_id#55, i_category_id#56, sales#66, number_sales#67, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#68]
 
 (108) BroadcastExchange
-Input [6]: [channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68]
+Input [6]: [channel#69, i_brand_id#54, i_class_id#55, i_category_id#56, sales#66, number_sales#67]
 Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [id=#70]
 
 (109) BroadcastHashJoin [codegen id : 78]
@@ -604,12 +604,12 @@ Right keys [3]: [i_brand_id#54, i_class_id#55, i_category_id#56]
 Join condition: None
 
 (110) TakeOrderedAndProject
-Input [12]: [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46, channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68]
-Arguments: 100, [channel#44 ASC NULLS FIRST, i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST], [channel#44, i_brand_id#7, i_class_id#8, i_category_id#9, sales#45, number_sales#46, channel#66, i_brand_id#54, i_class_id#55, i_category_id#56, sales#67, number_sales#68]
+Input [12]: [channel#49, i_brand_id#7, i_class_id#8, i_category_id#9, sales#44, number_sales#45, channel#69, i_brand_id#54, i_class_id#55, i_category_id#56, sales#66, number_sales#67]
+Arguments: 100, [i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST], [channel#49, i_brand_id#7, i_class_id#8, i_category_id#9, sales#44, number_sales#45, channel#69, i_brand_id#54, i_class_id#55, i_category_id#56, sales#66, number_sales#67]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 86 Hosting Expression = Subquery scalar-subquery#48, [id=#49]
+Subquery:1 Hosting operator id = 86 Hosting Expression = Subquery scalar-subquery#47, [id=#48]
 * HashAggregate (136)
 +- Exchange (135)
    +- * HashAggregate (134)
@@ -780,7 +780,7 @@ Condition : (((((isnotnull(d_year#11) AND isnotnull(d_moy#89)) AND isnotnull(d_d
 Output [1]: [d_week_seq#29]
 Input [4]: [d_week_seq#29, d_year#11, d_moy#89, d_dom#90]
 
-Subquery:3 Hosting operator id = 106 Hosting Expression = ReusedSubquery Subquery scalar-subquery#48, [id=#49]
+Subquery:3 Hosting operator id = 106 Hosting Expression = ReusedSubquery Subquery scalar-subquery#47, [id=#48]
 
 Subquery:4 Hosting operator id = 95 Hosting Expression = Subquery scalar-subquery#50, [id=#51]
 * Project (144)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt
index 37186560cb3b8..d6b8ba4395d2e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt
@@ -1,7 +1,7 @@
-TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
   WholeStageCodegen (78)
     BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
-      Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+      Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
         Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
           Subquery #2
             WholeStageCodegen (8)
@@ -45,7 +45,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_
                                         Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price]
                                   InputAdapter
                                     ReusedExchange [d_date_sk] #16
-          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
             InputAdapter
               Exchange [i_brand_id,i_class_id,i_category_id] #1
                 WholeStageCodegen (38)
@@ -190,10 +190,10 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_
       InputAdapter
         BroadcastExchange #17
           WholeStageCodegen (77)
-            Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+            Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
               Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                 ReusedSubquery [average_sales] #2
-                HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                   InputAdapter
                     Exchange [i_brand_id,i_class_id,i_category_id] #18
                       WholeStageCodegen (76)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt
index ea0e8319f3fe0..1af2e69d57338 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt
@@ -446,15 +446,15 @@ Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#35, isEmpty#36, cou
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39, count(1)#40]
-Results [7]: [store AS channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39 AS sales#42, count(1)#40 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39 AS sales#41, count(1)#40 AS number_sales#42, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#39 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43]
 
 (76) Filter [codegen id : 52]
-Input [7]: [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44 as decimal(32,6)) > cast(Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43 as decimal(32,6)) > cast(Subquery scalar-subquery#44, [id=#45] as decimal(32,6))))
 
 (77) Project [codegen id : 52]
-Output [6]: [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43]
-Input [7]: [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44]
+Output [6]: [store AS channel#46, i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#43]
 
 (78) Scan parquet default.store_sales
 Output [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4]
@@ -537,18 +537,18 @@ Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#57, isEmpty#58,
 Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61, count(1)#62]
-Results [7]: [store AS channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sales#64, count(1)#62 AS number_sales#65, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66]
+Results [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sales#63, count(1)#62 AS number_sales#64, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#61 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65]
 
 (96) Filter [codegen id : 51]
-Input [7]: [channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
+Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#44, [id=#45] as decimal(32,6))))
 
 (97) Project [codegen id : 51]
-Output [6]: [channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65]
-Input [7]: [channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#66]
+Output [6]: [store AS channel#66, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64]
+Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#65]
 
 (98) BroadcastExchange
-Input [6]: [channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65]
+Input [6]: [channel#66, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64]
 Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [id=#67]
 
 (99) BroadcastHashJoin [codegen id : 52]
@@ -557,12 +557,12 @@ Right keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50]
 Join condition: None
 
 (100) TakeOrderedAndProject
-Input [12]: [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43, channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65]
-Arguments: 100, [channel#41 ASC NULLS FIRST, i_brand_id#6 ASC NULLS FIRST, i_class_id#7 ASC NULLS FIRST, i_category_id#8 ASC NULLS FIRST], [channel#41, i_brand_id#6, i_class_id#7, i_category_id#8, sales#42, number_sales#43, channel#63, i_brand_id#48, i_class_id#49, i_category_id#50, sales#64, number_sales#65]
+Input [12]: [channel#46, i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42, channel#66, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64]
+Arguments: 100, [i_brand_id#6 ASC NULLS FIRST, i_class_id#7 ASC NULLS FIRST, i_category_id#8 ASC NULLS FIRST], [channel#46, i_brand_id#6, i_class_id#7, i_category_id#8, sales#41, number_sales#42, channel#66, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 76 Hosting Expression = Subquery scalar-subquery#45, [id=#46]
+Subquery:1 Hosting operator id = 76 Hosting Expression = Subquery scalar-subquery#44, [id=#45]
 * HashAggregate (126)
 +- Exchange (125)
    +- * HashAggregate (124)
@@ -733,7 +733,7 @@ Condition : (((((isnotnull(d_year#11) AND isnotnull(d_moy#86)) AND isnotnull(d_d
 Output [1]: [d_week_seq#28]
 Input [4]: [d_week_seq#28, d_year#11, d_moy#86, d_dom#87]
 
-Subquery:3 Hosting operator id = 96 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
+Subquery:3 Hosting operator id = 96 Hosting Expression = ReusedSubquery Subquery scalar-subquery#44, [id=#45]
 
 Subquery:4 Hosting operator id = 88 Hosting Expression = Subquery scalar-subquery#51, [id=#52]
 * Project (134)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt
index 6e6950d4cb33a..7bbf83e3de707 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt
@@ -1,7 +1,7 @@
-TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
   WholeStageCodegen (52)
     BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
-      Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+      Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
         Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
           Subquery #2
             WholeStageCodegen (8)
@@ -45,7 +45,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_
                                         Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price]
                                   InputAdapter
                                     ReusedExchange [d_date_sk] #13
-          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
             InputAdapter
               Exchange [i_brand_id,i_class_id,i_category_id] #1
                 WholeStageCodegen (25)
@@ -166,10 +166,10 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sales,number_
       InputAdapter
         BroadcastExchange #14
           WholeStageCodegen (51)
-            Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+            Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
               Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                 ReusedSubquery [average_sales] #2
-                HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                   InputAdapter
                     Exchange [i_brand_id,i_class_id,i_category_id] #15
                       WholeStageCodegen (50)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt
index 8c697ff080952..38292528b42fc 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt
@@ -608,15 +608,15 @@ Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#36, isEmpty#37, cou
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#40, count(1)#41]
-Results [7]: [store AS channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#40 AS sales#43, count(1)#41 AS number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#40 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#40 AS sales#42, count(1)#41 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#40 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44]
 
 (86) Filter [codegen id : 39]
-Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45 as decimal(32,6)) > cast(Subquery scalar-subquery#46, [id=#47] as decimal(32,6))))
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44 as decimal(32,6)) > cast(Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
 
 (87) Project [codegen id : 39]
-Output [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44]
-Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#45]
+Output [6]: [store AS channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#44]
 
 (88) Scan parquet default.catalog_sales
 Output [4]: [cs_sold_date_sk#18, cs_item_sk#19, cs_quantity#48, cs_list_price#49]
@@ -692,15 +692,15 @@ Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#54, isEmpty#55, cou
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#58, count(1)#59]
-Results [7]: [catalog AS channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#58 AS sales#61, count(1)#59 AS number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#58 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#58 AS sales#60, count(1)#59 AS number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#58 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#62]
 
 (105) Filter [codegen id : 78]
-Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6))))
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#62]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#62) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#62 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
 
 (106) Project [codegen id : 78]
-Output [6]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62]
-Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#63]
+Output [6]: [catalog AS channel#63, i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#62]
 
 (107) Scan parquet default.web_sales
 Output [4]: [ws_sold_date_sk#22, ws_item_sk#23, ws_quantity#64, ws_list_price#65]
@@ -776,35 +776,35 @@ Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#70, isEmpty#71, cou
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#74, count(1)#75]
-Results [7]: [web AS channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#74 AS sales#77, count(1)#75 AS number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#74 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#74 AS sales#76, count(1)#75 AS number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#74 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#78]
 
 (124) Filter [codegen id : 117]
-Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6))))
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#78]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#78) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#78 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
 
 (125) Project [codegen id : 117]
-Output [6]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78]
-Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#79]
+Output [6]: [web AS channel#79, i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#78]
 
 (126) Union
 
 (127) HashAggregate [codegen id : 118]
-Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44]
-Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9]
-Functions [2]: [partial_sum(sales#43), partial_sum(number_sales#44)]
+Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43]
+Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9]
+Functions [2]: [partial_sum(sales#42), partial_sum(number_sales#43)]
 Aggregate Attributes [3]: [sum#80, isEmpty#81, sum#82]
-Results [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#83, isEmpty#84, sum#85]
+Results [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#83, isEmpty#84, sum#85]
 
 (128) Exchange
-Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#83, isEmpty#84, sum#85]
-Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#86]
+Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#83, isEmpty#84, sum#85]
+Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#86]
 
 (129) HashAggregate [codegen id : 119]
-Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#83, isEmpty#84, sum#85]
-Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9]
-Functions [2]: [sum(sales#43), sum(number_sales#44)]
-Aggregate Attributes [2]: [sum(sales#43)#87, sum(number_sales#44)#88]
-Results [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum(sales#43)#87 AS sum_sales#89, sum(number_sales#44)#88 AS number_sales#90]
+Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#83, isEmpty#84, sum#85]
+Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9]
+Functions [2]: [sum(sales#42), sum(number_sales#43)]
+Aggregate Attributes [2]: [sum(sales#42)#87, sum(number_sales#43)#88]
+Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum(sales#42)#87 AS sum_sales#89, sum(number_sales#43)#88 AS number_sales#90]
 
 (130) ReusedExchange [Reuses operator id: 84]
 Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#91, isEmpty#92, count#93]
@@ -814,15 +814,15 @@ Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#91, isEmpty#92, cou
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#94, count(1)#95]
-Results [7]: [store AS channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#94 AS sales#43, count(1)#95 AS number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#94 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#96]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#94 AS sales#42, count(1)#95 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#94 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#96]
 
 (132) Filter [codegen id : 158]
-Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#96]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#96) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#96 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6))))
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#96]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#96) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#96 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
 
 (133) Project [codegen id : 158]
-Output [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44]
-Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#96]
+Output [6]: [store AS channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#96]
 
 (134) ReusedExchange [Reuses operator id: 103]
 Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#97, isEmpty#98, count#99]
@@ -832,435 +832,435 @@ Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#97, isEmpty#98, cou
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#100, count(1)#101]
-Results [7]: [catalog AS channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#100 AS sales#61, count(1)#101 AS number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#100 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#102]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#100 AS sales#60, count(1)#101 AS number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#100 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#102]
 
 (136) Filter [codegen id : 197]
-Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#102]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#102) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#102 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6))))
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#102]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#102) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#102 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
 
 (137) Project [codegen id : 197]
-Output [6]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62]
-Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#102]
+Output [6]: [catalog AS channel#103, i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#102]
 
 (138) ReusedExchange [Reuses operator id: 122]
-Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#103, isEmpty#104, count#105]
+Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#104, isEmpty#105, count#106]
 
 (139) HashAggregate [codegen id : 236]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#103, isEmpty#104, count#105]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#104, isEmpty#105, count#106]
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#106, count(1)#107]
-Results [7]: [web AS channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#106 AS sales#77, count(1)#107 AS number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#106 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#108]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#107, count(1)#108]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#107 AS sales#76, count(1)#108 AS number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#107 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#109]
 
 (140) Filter [codegen id : 236]
-Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#108]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#108) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#108 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6))))
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#109]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#109) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#109 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
 
 (141) Project [codegen id : 236]
-Output [6]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78]
-Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#108]
+Output [6]: [web AS channel#110, i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#109]
 
 (142) Union
 
 (143) HashAggregate [codegen id : 237]
-Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44]
-Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9]
-Functions [2]: [partial_sum(sales#43), partial_sum(number_sales#44)]
-Aggregate Attributes [3]: [sum#109, isEmpty#110, sum#111]
-Results [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#112, isEmpty#113, sum#114]
+Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43]
+Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9]
+Functions [2]: [partial_sum(sales#42), partial_sum(number_sales#43)]
+Aggregate Attributes [3]: [sum#111, isEmpty#112, sum#113]
+Results [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#114, isEmpty#115, sum#116]
 
 (144) Exchange
-Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#112, isEmpty#113, sum#114]
-Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#115]
+Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#114, isEmpty#115, sum#116]
+Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#117]
 
 (145) HashAggregate [codegen id : 238]
-Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#112, isEmpty#113, sum#114]
-Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9]
-Functions [2]: [sum(sales#43), sum(number_sales#44)]
-Aggregate Attributes [2]: [sum(sales#43)#116, sum(number_sales#44)#117]
-Results [5]: [channel#42, i_brand_id#7, i_class_id#8, sum(sales#43)#116 AS sum_sales#89, sum(number_sales#44)#117 AS number_sales#90]
+Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#114, isEmpty#115, sum#116]
+Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9]
+Functions [2]: [sum(sales#42), sum(number_sales#43)]
+Aggregate Attributes [2]: [sum(sales#42)#118, sum(number_sales#43)#119]
+Results [5]: [channel#47, i_brand_id#7, i_class_id#8, sum(sales#42)#118 AS sum_sales#89, sum(number_sales#43)#119 AS number_sales#90]
 
 (146) HashAggregate [codegen id : 238]
-Input [5]: [channel#42, i_brand_id#7, i_class_id#8, sum_sales#89, number_sales#90]
-Keys [3]: [channel#42, i_brand_id#7, i_class_id#8]
+Input [5]: [channel#47, i_brand_id#7, i_class_id#8, sum_sales#89, number_sales#90]
+Keys [3]: [channel#47, i_brand_id#7, i_class_id#8]
 Functions [2]: [partial_sum(sum_sales#89), partial_sum(number_sales#90)]
-Aggregate Attributes [3]: [sum#118, isEmpty#119, sum#120]
-Results [6]: [channel#42, i_brand_id#7, i_class_id#8, sum#121, isEmpty#122, sum#123]
+Aggregate Attributes [3]: [sum#120, isEmpty#121, sum#122]
+Results [6]: [channel#47, i_brand_id#7, i_class_id#8, sum#123, isEmpty#124, sum#125]
 
 (147) Exchange
-Input [6]: [channel#42, i_brand_id#7, i_class_id#8, sum#121, isEmpty#122, sum#123]
-Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, 5), true, [id=#124]
+Input [6]: [channel#47, i_brand_id#7, i_class_id#8, sum#123, isEmpty#124, sum#125]
+Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, 5), true, [id=#126]
 
 (148) HashAggregate [codegen id : 239]
-Input [6]: [channel#42, i_brand_id#7, i_class_id#8, sum#121, isEmpty#122, sum#123]
-Keys [3]: [channel#42, i_brand_id#7, i_class_id#8]
+Input [6]: [channel#47, i_brand_id#7, i_class_id#8, sum#123, isEmpty#124, sum#125]
+Keys [3]: [channel#47, i_brand_id#7, i_class_id#8]
 Functions [2]: [sum(sum_sales#89), sum(number_sales#90)]
-Aggregate Attributes [2]: [sum(sum_sales#89)#125, sum(number_sales#90)#126]
-Results [6]: [channel#42, i_brand_id#7, i_class_id#8, null AS i_category_id#127, sum(sum_sales#89)#125 AS sum(sum_sales)#128, sum(number_sales#90)#126 AS sum(number_sales)#129]
+Aggregate Attributes [2]: [sum(sum_sales#89)#127, sum(number_sales#90)#128]
+Results [6]: [channel#47, i_brand_id#7, i_class_id#8, null AS i_category_id#129, sum(sum_sales#89)#127 AS sum(sum_sales)#130, sum(number_sales#90)#128 AS sum(number_sales)#131]
 
 (149) Union
 
 (150) HashAggregate [codegen id : 240]
-Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Keys [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
 Functions: []
 Aggregate Attributes: []
-Results [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
 
 (151) Exchange
-Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#130]
+Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#132]
 
 (152) HashAggregate [codegen id : 241]
-Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Keys [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
 Functions: []
 Aggregate Attributes: []
-Results [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
 
 (153) ReusedExchange [Reuses operator id: 84]
-Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#131, isEmpty#132, count#133]
+Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#133, isEmpty#134, count#135]
 
 (154) HashAggregate [codegen id : 280]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#131, isEmpty#132, count#133]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#133, isEmpty#134, count#135]
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#134, count(1)#135]
-Results [7]: [store AS channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#134 AS sales#43, count(1)#135 AS number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#134 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#136]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#136, count(1)#137]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#136 AS sales#42, count(1)#137 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#136 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#138]
 
 (155) Filter [codegen id : 280]
-Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#136]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#136) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#136 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6))))
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#138]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#138) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#138 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
 
 (156) Project [codegen id : 280]
-Output [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44]
-Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#136]
+Output [6]: [store AS channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#138]
 
 (157) ReusedExchange [Reuses operator id: 103]
-Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#137, isEmpty#138, count#139]
+Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#139, isEmpty#140, count#141]
 
 (158) HashAggregate [codegen id : 319]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#137, isEmpty#138, count#139]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#139, isEmpty#140, count#141]
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#140, count(1)#141]
-Results [7]: [catalog AS channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#140 AS sales#61, count(1)#141 AS number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#140 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#142]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#142, count(1)#143]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#142 AS sales#60, count(1)#143 AS number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#142 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#144]
 
 (159) Filter [codegen id : 319]
-Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#142]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#142) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#142 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6))))
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#144]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#144) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#144 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
 
 (160) Project [codegen id : 319]
-Output [6]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62]
-Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#142]
+Output [6]: [catalog AS channel#145, i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#144]
 
 (161) ReusedExchange [Reuses operator id: 122]
-Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#143, isEmpty#144, count#145]
+Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#146, isEmpty#147, count#148]
 
 (162) HashAggregate [codegen id : 358]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#143, isEmpty#144, count#145]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#146, isEmpty#147, count#148]
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#146, count(1)#147]
-Results [7]: [web AS channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#146 AS sales#77, count(1)#147 AS number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#146 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#148]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#149, count(1)#150]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#149 AS sales#76, count(1)#150 AS number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#149 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#151]
 
 (163) Filter [codegen id : 358]
-Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#148]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#148) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#148 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6))))
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#151]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#151) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#151 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
 
 (164) Project [codegen id : 358]
-Output [6]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78]
-Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#148]
+Output [6]: [web AS channel#152, i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#151]
 
 (165) Union
 
 (166) HashAggregate [codegen id : 359]
-Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44]
-Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9]
-Functions [2]: [partial_sum(sales#43), partial_sum(number_sales#44)]
-Aggregate Attributes [3]: [sum#149, isEmpty#150, sum#151]
-Results [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#152, isEmpty#153, sum#154]
+Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43]
+Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9]
+Functions [2]: [partial_sum(sales#42), partial_sum(number_sales#43)]
+Aggregate Attributes [3]: [sum#153, isEmpty#154, sum#155]
+Results [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#156, isEmpty#157, sum#158]
 
 (167) Exchange
-Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#152, isEmpty#153, sum#154]
-Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#155]
+Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#156, isEmpty#157, sum#158]
+Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#159]
 
 (168) HashAggregate [codegen id : 360]
-Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#152, isEmpty#153, sum#154]
-Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9]
-Functions [2]: [sum(sales#43), sum(number_sales#44)]
-Aggregate Attributes [2]: [sum(sales#43)#156, sum(number_sales#44)#157]
-Results [4]: [channel#42, i_brand_id#7, sum(sales#43)#156 AS sum_sales#89, sum(number_sales#44)#157 AS number_sales#90]
+Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#156, isEmpty#157, sum#158]
+Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9]
+Functions [2]: [sum(sales#42), sum(number_sales#43)]
+Aggregate Attributes [2]: [sum(sales#42)#160, sum(number_sales#43)#161]
+Results [4]: [channel#47, i_brand_id#7, sum(sales#42)#160 AS sum_sales#89, sum(number_sales#43)#161 AS number_sales#90]
 
 (169) HashAggregate [codegen id : 360]
-Input [4]: [channel#42, i_brand_id#7, sum_sales#89, number_sales#90]
-Keys [2]: [channel#42, i_brand_id#7]
+Input [4]: [channel#47, i_brand_id#7, sum_sales#89, number_sales#90]
+Keys [2]: [channel#47, i_brand_id#7]
 Functions [2]: [partial_sum(sum_sales#89), partial_sum(number_sales#90)]
-Aggregate Attributes [3]: [sum#158, isEmpty#159, sum#160]
-Results [5]: [channel#42, i_brand_id#7, sum#161, isEmpty#162, sum#163]
+Aggregate Attributes [3]: [sum#162, isEmpty#163, sum#164]
+Results [5]: [channel#47, i_brand_id#7, sum#165, isEmpty#166, sum#167]
 
 (170) Exchange
-Input [5]: [channel#42, i_brand_id#7, sum#161, isEmpty#162, sum#163]
-Arguments: hashpartitioning(channel#42, i_brand_id#7, 5), true, [id=#164]
+Input [5]: [channel#47, i_brand_id#7, sum#165, isEmpty#166, sum#167]
+Arguments: hashpartitioning(channel#47, i_brand_id#7, 5), true, [id=#168]
 
 (171) HashAggregate [codegen id : 361]
-Input [5]: [channel#42, i_brand_id#7, sum#161, isEmpty#162, sum#163]
-Keys [2]: [channel#42, i_brand_id#7]
+Input [5]: [channel#47, i_brand_id#7, sum#165, isEmpty#166, sum#167]
+Keys [2]: [channel#47, i_brand_id#7]
 Functions [2]: [sum(sum_sales#89), sum(number_sales#90)]
-Aggregate Attributes [2]: [sum(sum_sales#89)#165, sum(number_sales#90)#166]
-Results [6]: [channel#42, i_brand_id#7, null AS i_class_id#167, null AS i_category_id#168, sum(sum_sales#89)#165 AS sum(sum_sales)#169, sum(number_sales#90)#166 AS sum(number_sales)#170]
+Aggregate Attributes [2]: [sum(sum_sales#89)#169, sum(number_sales#90)#170]
+Results [6]: [channel#47, i_brand_id#7, null AS i_class_id#171, null AS i_category_id#172, sum(sum_sales#89)#169 AS sum(sum_sales)#173, sum(number_sales#90)#170 AS sum(number_sales)#174]
 
 (172) Union
 
 (173) HashAggregate [codegen id : 362]
-Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Keys [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
 Functions: []
 Aggregate Attributes: []
-Results [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
 
 (174) Exchange
-Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#171]
+Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#175]
 
 (175) HashAggregate [codegen id : 363]
-Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Keys [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
 Functions: []
 Aggregate Attributes: []
-Results [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
 
 (176) ReusedExchange [Reuses operator id: 84]
-Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#172, isEmpty#173, count#174]
+Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#176, isEmpty#177, count#178]
 
 (177) HashAggregate [codegen id : 402]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#172, isEmpty#173, count#174]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#176, isEmpty#177, count#178]
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#175, count(1)#176]
-Results [7]: [store AS channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#175 AS sales#43, count(1)#176 AS number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#175 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#177]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#179, count(1)#180]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#179 AS sales#42, count(1)#180 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#179 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#181]
 
 (178) Filter [codegen id : 402]
-Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#177]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#177) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#177 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6))))
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#181]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#181) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#181 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
 
 (179) Project [codegen id : 402]
-Output [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44]
-Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#177]
+Output [6]: [store AS channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#181]
 
 (180) ReusedExchange [Reuses operator id: 103]
-Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#178, isEmpty#179, count#180]
+Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#182, isEmpty#183, count#184]
 
 (181) HashAggregate [codegen id : 441]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#178, isEmpty#179, count#180]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#182, isEmpty#183, count#184]
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#181, count(1)#182]
-Results [7]: [catalog AS channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#181 AS sales#61, count(1)#182 AS number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#181 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#183]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#185, count(1)#186]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#185 AS sales#60, count(1)#186 AS number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#185 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#187]
 
 (182) Filter [codegen id : 441]
-Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#183]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#183) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#183 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6))))
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#187]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#187) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#187 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
 
 (183) Project [codegen id : 441]
-Output [6]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62]
-Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#183]
+Output [6]: [catalog AS channel#188, i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#187]
 
 (184) ReusedExchange [Reuses operator id: 122]
-Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#184, isEmpty#185, count#186]
+Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#189, isEmpty#190, count#191]
 
 (185) HashAggregate [codegen id : 480]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#184, isEmpty#185, count#186]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#189, isEmpty#190, count#191]
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#187, count(1)#188]
-Results [7]: [web AS channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#187 AS sales#77, count(1)#188 AS number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#187 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#189]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#192, count(1)#193]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#192 AS sales#76, count(1)#193 AS number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#192 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#194]
 
 (186) Filter [codegen id : 480]
-Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#189]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#189) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#189 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6))))
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#194]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#194) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#194 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
 
 (187) Project [codegen id : 480]
-Output [6]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78]
-Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#189]
+Output [6]: [web AS channel#195, i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#194]
 
 (188) Union
 
 (189) HashAggregate [codegen id : 481]
-Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44]
-Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9]
-Functions [2]: [partial_sum(sales#43), partial_sum(number_sales#44)]
-Aggregate Attributes [3]: [sum#190, isEmpty#191, sum#192]
-Results [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#193, isEmpty#194, sum#195]
+Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43]
+Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9]
+Functions [2]: [partial_sum(sales#42), partial_sum(number_sales#43)]
+Aggregate Attributes [3]: [sum#196, isEmpty#197, sum#198]
+Results [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#199, isEmpty#200, sum#201]
 
 (190) Exchange
-Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#193, isEmpty#194, sum#195]
-Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#196]
+Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#199, isEmpty#200, sum#201]
+Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#202]
 
 (191) HashAggregate [codegen id : 482]
-Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#193, isEmpty#194, sum#195]
-Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9]
-Functions [2]: [sum(sales#43), sum(number_sales#44)]
-Aggregate Attributes [2]: [sum(sales#43)#197, sum(number_sales#44)#198]
-Results [3]: [channel#42, sum(sales#43)#197 AS sum_sales#89, sum(number_sales#44)#198 AS number_sales#90]
+Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#199, isEmpty#200, sum#201]
+Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9]
+Functions [2]: [sum(sales#42), sum(number_sales#43)]
+Aggregate Attributes [2]: [sum(sales#42)#203, sum(number_sales#43)#204]
+Results [3]: [channel#47, sum(sales#42)#203 AS sum_sales#89, sum(number_sales#43)#204 AS number_sales#90]
 
 (192) HashAggregate [codegen id : 482]
-Input [3]: [channel#42, sum_sales#89, number_sales#90]
-Keys [1]: [channel#42]
+Input [3]: [channel#47, sum_sales#89, number_sales#90]
+Keys [1]: [channel#47]
 Functions [2]: [partial_sum(sum_sales#89), partial_sum(number_sales#90)]
-Aggregate Attributes [3]: [sum#199, isEmpty#200, sum#201]
-Results [4]: [channel#42, sum#202, isEmpty#203, sum#204]
+Aggregate Attributes [3]: [sum#205, isEmpty#206, sum#207]
+Results [4]: [channel#47, sum#208, isEmpty#209, sum#210]
 
 (193) Exchange
-Input [4]: [channel#42, sum#202, isEmpty#203, sum#204]
-Arguments: hashpartitioning(channel#42, 5), true, [id=#205]
+Input [4]: [channel#47, sum#208, isEmpty#209, sum#210]
+Arguments: hashpartitioning(channel#47, 5), true, [id=#211]
 
 (194) HashAggregate [codegen id : 483]
-Input [4]: [channel#42, sum#202, isEmpty#203, sum#204]
-Keys [1]: [channel#42]
+Input [4]: [channel#47, sum#208, isEmpty#209, sum#210]
+Keys [1]: [channel#47]
 Functions [2]: [sum(sum_sales#89), sum(number_sales#90)]
-Aggregate Attributes [2]: [sum(sum_sales#89)#206, sum(number_sales#90)#207]
-Results [6]: [channel#42, null AS i_brand_id#208, null AS i_class_id#209, null AS i_category_id#210, sum(sum_sales#89)#206 AS sum(sum_sales)#211, sum(number_sales#90)#207 AS sum(number_sales)#212]
+Aggregate Attributes [2]: [sum(sum_sales#89)#212, sum(number_sales#90)#213]
+Results [6]: [channel#47, null AS i_brand_id#214, null AS i_class_id#215, null AS i_category_id#216, sum(sum_sales#89)#212 AS sum(sum_sales)#217, sum(number_sales#90)#213 AS sum(number_sales)#218]
 
 (195) Union
 
 (196) HashAggregate [codegen id : 484]
-Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Keys [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
 Functions: []
 Aggregate Attributes: []
-Results [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
 
 (197) Exchange
-Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#213]
+Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#219]
 
 (198) HashAggregate [codegen id : 485]
-Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Keys [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
 Functions: []
 Aggregate Attributes: []
-Results [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
 
 (199) ReusedExchange [Reuses operator id: 84]
-Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#214, isEmpty#215, count#216]
+Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#220, isEmpty#221, count#222]
 
 (200) HashAggregate [codegen id : 524]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#214, isEmpty#215, count#216]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#220, isEmpty#221, count#222]
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#217, count(1)#218]
-Results [7]: [store AS channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#217 AS sales#43, count(1)#218 AS number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#217 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#219]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#223, count(1)#224]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#223 AS sales#42, count(1)#224 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#223 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#225]
 
 (201) Filter [codegen id : 524]
-Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#219]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#219) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#219 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6))))
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#225]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#225) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#225 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
 
 (202) Project [codegen id : 524]
-Output [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44]
-Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#219]
+Output [6]: [store AS channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#225]
 
 (203) ReusedExchange [Reuses operator id: 103]
-Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#220, isEmpty#221, count#222]
+Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#226, isEmpty#227, count#228]
 
 (204) HashAggregate [codegen id : 563]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#220, isEmpty#221, count#222]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#226, isEmpty#227, count#228]
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#223, count(1)#224]
-Results [7]: [catalog AS channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#223 AS sales#61, count(1)#224 AS number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#223 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#225]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#229, count(1)#230]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#229 AS sales#60, count(1)#230 AS number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#229 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#231]
 
 (205) Filter [codegen id : 563]
-Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#225]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#225) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#225 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6))))
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#231]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#231) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#231 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
 
 (206) Project [codegen id : 563]
-Output [6]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62]
-Input [7]: [channel#60, i_brand_id#7, i_class_id#8, i_category_id#9, sales#61, number_sales#62, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#225]
+Output [6]: [catalog AS channel#232, i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#231]
 
 (207) ReusedExchange [Reuses operator id: 122]
-Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#226, isEmpty#227, count#228]
+Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#233, isEmpty#234, count#235]
 
 (208) HashAggregate [codegen id : 602]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#226, isEmpty#227, count#228]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#233, isEmpty#234, count#235]
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#229, count(1)#230]
-Results [7]: [web AS channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#229 AS sales#77, count(1)#230 AS number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#229 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#231]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#236, count(1)#237]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#236 AS sales#76, count(1)#237 AS number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#236 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#238]
 
 (209) Filter [codegen id : 602]
-Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#231]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#231) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#231 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#46, [id=#47] as decimal(32,6))))
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#238]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#238) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#238 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
 
 (210) Project [codegen id : 602]
-Output [6]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78]
-Input [7]: [channel#76, i_brand_id#7, i_class_id#8, i_category_id#9, sales#77, number_sales#78, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#231]
+Output [6]: [web AS channel#239, i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#238]
 
 (211) Union
 
 (212) HashAggregate [codegen id : 603]
-Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sales#43, number_sales#44]
-Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9]
-Functions [2]: [partial_sum(sales#43), partial_sum(number_sales#44)]
-Aggregate Attributes [3]: [sum#232, isEmpty#233, sum#234]
-Results [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#235, isEmpty#236, sum#237]
+Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43]
+Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9]
+Functions [2]: [partial_sum(sales#42), partial_sum(number_sales#43)]
+Aggregate Attributes [3]: [sum#240, isEmpty#241, sum#242]
+Results [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#243, isEmpty#244, sum#245]
 
 (213) Exchange
-Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#235, isEmpty#236, sum#237]
-Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#238]
+Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#243, isEmpty#244, sum#245]
+Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#246]
 
 (214) HashAggregate [codegen id : 604]
-Input [7]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum#235, isEmpty#236, sum#237]
-Keys [4]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9]
-Functions [2]: [sum(sales#43), sum(number_sales#44)]
-Aggregate Attributes [2]: [sum(sales#43)#239, sum(number_sales#44)#240]
-Results [2]: [sum(sales#43)#239 AS sum_sales#89, sum(number_sales#44)#240 AS number_sales#90]
+Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#243, isEmpty#244, sum#245]
+Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9]
+Functions [2]: [sum(sales#42), sum(number_sales#43)]
+Aggregate Attributes [2]: [sum(sales#42)#247, sum(number_sales#43)#248]
+Results [2]: [sum(sales#42)#247 AS sum_sales#89, sum(number_sales#43)#248 AS number_sales#90]
 
 (215) HashAggregate [codegen id : 604]
 Input [2]: [sum_sales#89, number_sales#90]
 Keys: []
 Functions [2]: [partial_sum(sum_sales#89), partial_sum(number_sales#90)]
-Aggregate Attributes [3]: [sum#241, isEmpty#242, sum#243]
-Results [3]: [sum#244, isEmpty#245, sum#246]
+Aggregate Attributes [3]: [sum#249, isEmpty#250, sum#251]
+Results [3]: [sum#252, isEmpty#253, sum#254]
 
 (216) Exchange
-Input [3]: [sum#244, isEmpty#245, sum#246]
-Arguments: SinglePartition, true, [id=#247]
+Input [3]: [sum#252, isEmpty#253, sum#254]
+Arguments: SinglePartition, true, [id=#255]
 
 (217) HashAggregate [codegen id : 605]
-Input [3]: [sum#244, isEmpty#245, sum#246]
+Input [3]: [sum#252, isEmpty#253, sum#254]
 Keys: []
 Functions [2]: [sum(sum_sales#89), sum(number_sales#90)]
-Aggregate Attributes [2]: [sum(sum_sales#89)#248, sum(number_sales#90)#249]
-Results [6]: [null AS channel#250, null AS i_brand_id#251, null AS i_class_id#252, null AS i_category_id#253, sum(sum_sales#89)#248 AS sum(sum_sales)#254, sum(number_sales#90)#249 AS sum(number_sales)#255]
+Aggregate Attributes [2]: [sum(sum_sales#89)#256, sum(number_sales#90)#257]
+Results [6]: [null AS channel#258, null AS i_brand_id#259, null AS i_class_id#260, null AS i_category_id#261, sum(sum_sales#89)#256 AS sum(sum_sales)#262, sum(number_sales#90)#257 AS sum(number_sales)#263]
 
 (218) Union
 
 (219) HashAggregate [codegen id : 606]
-Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Keys [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
 Functions: []
 Aggregate Attributes: []
-Results [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
 
 (220) Exchange
-Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Arguments: hashpartitioning(channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#256]
+Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#264]
 
 (221) HashAggregate [codegen id : 607]
-Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Keys [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
 Functions: []
 Aggregate Attributes: []
-Results [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
 
 (222) TakeOrderedAndProject
-Input [6]: [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Arguments: 100, [channel#42 ASC NULLS FIRST, i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST], [channel#42, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Arguments: 100, [channel#47 ASC NULLS FIRST, i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST], [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 86 Hosting Expression = Subquery scalar-subquery#46, [id=#47]
+Subquery:1 Hosting operator id = 86 Hosting Expression = Subquery scalar-subquery#45, [id=#46]
 * HashAggregate (252)
 +- Exchange (251)
    +- * HashAggregate (250)
@@ -1327,7 +1327,7 @@ Input [2]: [d_date_sk#10, d_year#11]
 
 (230) BroadcastExchange
 Input [1]: [d_date_sk#10]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#257]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#265]
 
 (231) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#1]
@@ -1335,7 +1335,7 @@ Right keys [1]: [d_date_sk#10]
 Join condition: None
 
 (232) Project [codegen id : 2]
-Output [2]: [ss_quantity#3 AS quantity#258, ss_list_price#4 AS list_price#259]
+Output [2]: [ss_quantity#3 AS quantity#266, ss_list_price#4 AS list_price#267]
 Input [4]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, d_date_sk#10]
 
 (233) Scan parquet default.catalog_sales
@@ -1372,7 +1372,7 @@ Input [2]: [d_date_sk#10, d_year#11]
 
 (240) BroadcastExchange
 Input [1]: [d_date_sk#10]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#260]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#268]
 
 (241) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_sold_date_sk#18]
@@ -1380,7 +1380,7 @@ Right keys [1]: [d_date_sk#10]
 Join condition: None
 
 (242) Project [codegen id : 4]
-Output [2]: [cs_quantity#48 AS quantity#261, cs_list_price#49 AS list_price#262]
+Output [2]: [cs_quantity#48 AS quantity#269, cs_list_price#49 AS list_price#270]
 Input [4]: [cs_sold_date_sk#18, cs_quantity#48, cs_list_price#49, d_date_sk#10]
 
 (243) Scan parquet default.web_sales
@@ -1406,55 +1406,55 @@ Right keys [1]: [d_date_sk#10]
 Join condition: None
 
 (248) Project [codegen id : 6]
-Output [2]: [ws_quantity#64 AS quantity#263, ws_list_price#65 AS list_price#264]
+Output [2]: [ws_quantity#64 AS quantity#271, ws_list_price#65 AS list_price#272]
 Input [4]: [ws_sold_date_sk#22, ws_quantity#64, ws_list_price#65, d_date_sk#10]
 
 (249) Union
 
 (250) HashAggregate [codegen id : 7]
-Input [2]: [quantity#258, list_price#259]
+Input [2]: [quantity#266, list_price#267]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#258 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#259 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#265, count#266]
-Results [2]: [sum#267, count#268]
+Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#266 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#267 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#273, count#274]
+Results [2]: [sum#275, count#276]
 
 (251) Exchange
-Input [2]: [sum#267, count#268]
-Arguments: SinglePartition, true, [id=#269]
+Input [2]: [sum#275, count#276]
+Arguments: SinglePartition, true, [id=#277]
 
 (252) HashAggregate [codegen id : 8]
-Input [2]: [sum#267, count#268]
+Input [2]: [sum#275, count#276]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#258 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#259 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#258 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#259 as decimal(12,2)))), DecimalType(18,2), true))#270]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#258 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#259 as decimal(12,2)))), DecimalType(18,2), true))#270 AS average_sales#271]
+Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#266 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#267 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#266 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#267 as decimal(12,2)))), DecimalType(18,2), true))#278]
+Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#266 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#267 as decimal(12,2)))), DecimalType(18,2), true))#278 AS average_sales#279]
 
-Subquery:2 Hosting operator id = 105 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47]
+Subquery:2 Hosting operator id = 105 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
 
-Subquery:3 Hosting operator id = 124 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47]
+Subquery:3 Hosting operator id = 124 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
 
-Subquery:4 Hosting operator id = 132 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47]
+Subquery:4 Hosting operator id = 132 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
 
-Subquery:5 Hosting operator id = 136 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47]
+Subquery:5 Hosting operator id = 136 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
 
-Subquery:6 Hosting operator id = 140 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47]
+Subquery:6 Hosting operator id = 140 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
 
-Subquery:7 Hosting operator id = 155 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47]
+Subquery:7 Hosting operator id = 155 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
 
-Subquery:8 Hosting operator id = 159 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47]
+Subquery:8 Hosting operator id = 159 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
 
-Subquery:9 Hosting operator id = 163 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47]
+Subquery:9 Hosting operator id = 163 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
 
-Subquery:10 Hosting operator id = 178 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47]
+Subquery:10 Hosting operator id = 178 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
 
-Subquery:11 Hosting operator id = 182 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47]
+Subquery:11 Hosting operator id = 182 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
 
-Subquery:12 Hosting operator id = 186 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47]
+Subquery:12 Hosting operator id = 186 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
 
-Subquery:13 Hosting operator id = 201 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47]
+Subquery:13 Hosting operator id = 201 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
 
-Subquery:14 Hosting operator id = 205 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47]
+Subquery:14 Hosting operator id = 205 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
 
-Subquery:15 Hosting operator id = 209 Hosting Expression = ReusedSubquery Subquery scalar-subquery#46, [id=#47]
+Subquery:15 Hosting operator id = 209 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt
index e4a9b46cf741d..30856e02f2b62 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt
@@ -40,7 +40,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                               InputAdapter
                                                                                 Union
                                                                                   WholeStageCodegen (39)
-                                                                                    Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                                                    Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                                                       Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                                                         Subquery #1
                                                                                           WholeStageCodegen (8)
@@ -90,7 +90,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                                                                       Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price]
                                                                                                                 InputAdapter
                                                                                                                   ReusedExchange [d_date_sk] #22
-                                                                                        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                                                        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                                                           InputAdapter
                                                                                             Exchange [i_brand_id,i_class_id,i_category_id] #6
                                                                                               WholeStageCodegen (38)
@@ -226,10 +226,10 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                                                                 InputAdapter
                                                                                                                   ReusedExchange [ss_item_sk] #8
                                                                                   WholeStageCodegen (78)
-                                                                                    Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                                                    Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                                                       Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                                                         ReusedSubquery [average_sales] #1
-                                                                                        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                                                        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                                                           InputAdapter
                                                                                             Exchange [i_brand_id,i_class_id,i_category_id] #23
                                                                                               WholeStageCodegen (77)
@@ -258,10 +258,10 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                                                       InputAdapter
                                                                                                         ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #18
                                                                                   WholeStageCodegen (117)
-                                                                                    Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                                                    Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                                                       Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                                                         ReusedSubquery [average_sales] #1
-                                                                                        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                                                        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                                                           InputAdapter
                                                                                             Exchange [i_brand_id,i_class_id,i_category_id] #25
                                                                                               WholeStageCodegen (116)
@@ -303,24 +303,24 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                                         InputAdapter
                                                                                           Union
                                                                                             WholeStageCodegen (158)
-                                                                                              Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                                                                   ReusedSubquery [average_sales] #1
-                                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                                                                     InputAdapter
                                                                                                       ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6
                                                                                             WholeStageCodegen (197)
-                                                                                              Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                                                                   ReusedSubquery [average_sales] #1
-                                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                                                                     InputAdapter
                                                                                                       ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #23
                                                                                             WholeStageCodegen (236)
-                                                                                              Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                                                                   ReusedSubquery [average_sales] #1
-                                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                                                                     InputAdapter
                                                                                                       ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #25
                                                   WholeStageCodegen (361)
@@ -337,24 +337,24 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                         InputAdapter
                                                                           Union
                                                                             WholeStageCodegen (280)
-                                                                              Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                                                   ReusedSubquery [average_sales] #1
-                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                                                     InputAdapter
                                                                                       ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6
                                                                             WholeStageCodegen (319)
-                                                                              Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                                                   ReusedSubquery [average_sales] #1
-                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                                                     InputAdapter
                                                                                       ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #23
                                                                             WholeStageCodegen (358)
-                                                                              Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                                                   ReusedSubquery [average_sales] #1
-                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                                                     InputAdapter
                                                                                       ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #25
                                   WholeStageCodegen (483)
@@ -371,24 +371,24 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                         InputAdapter
                                                           Union
                                                             WholeStageCodegen (402)
-                                                              Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                                   ReusedSubquery [average_sales] #1
-                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                                     InputAdapter
                                                                       ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6
                                                             WholeStageCodegen (441)
-                                                              Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                                   ReusedSubquery [average_sales] #1
-                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                                     InputAdapter
                                                                       ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #23
                                                             WholeStageCodegen (480)
-                                                              Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                                   ReusedSubquery [average_sales] #1
-                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                                     InputAdapter
                                                                       ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #25
                   WholeStageCodegen (605)
@@ -405,23 +405,23 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                         InputAdapter
                                           Union
                                             WholeStageCodegen (524)
-                                              Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                   ReusedSubquery [average_sales] #1
-                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                     InputAdapter
                                                       ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6
                                             WholeStageCodegen (563)
-                                              Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                   ReusedSubquery [average_sales] #1
-                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                     InputAdapter
                                                       ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #23
                                             WholeStageCodegen (602)
-                                              Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                   ReusedSubquery [average_sales] #1
-                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                     InputAdapter
                                                       ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #25
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt
index c54ad0e36216d..238053a3428e3 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt
@@ -552,15 +552,15 @@ Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#33, isEmpty#34, cou
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37, count(1)#38]
-Results [7]: [store AS channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37 AS sales#40, count(1)#38 AS number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37 AS sales#39, count(1)#38 AS number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#37 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41]
 
 (76) Filter [codegen id : 26]
-Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42 as decimal(32,6)) > cast(Subquery scalar-subquery#43, [id=#44] as decimal(32,6))))
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41 as decimal(32,6)) > cast(Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
 
 (77) Project [codegen id : 26]
-Output [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41]
-Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#42]
+Output [6]: [store AS channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#41]
 
 (78) Scan parquet default.catalog_sales
 Output [4]: [cs_sold_date_sk#16, cs_item_sk#17, cs_quantity#45, cs_list_price#46]
@@ -624,15 +624,15 @@ Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#50, isEmpty#51, cou
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54, count(1)#55]
-Results [7]: [catalog AS channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54 AS sales#57, count(1)#55 AS number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54 AS sales#56, count(1)#55 AS number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#54 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58]
 
 (92) Filter [codegen id : 52]
-Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6))))
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
 
 (93) Project [codegen id : 52]
-Output [6]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58]
-Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#59]
+Output [6]: [catalog AS channel#59, i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#58]
 
 (94) Scan parquet default.web_sales
 Output [4]: [ws_sold_date_sk#20, ws_item_sk#21, ws_quantity#60, ws_list_price#61]
@@ -696,35 +696,35 @@ Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#65, isEmpty#66, cou
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69, count(1)#70]
-Results [7]: [web AS channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69 AS sales#72, count(1)#70 AS number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69 AS sales#71, count(1)#70 AS number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#69 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73]
 
 (108) Filter [codegen id : 78]
-Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6))))
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
 
 (109) Project [codegen id : 78]
-Output [6]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73]
-Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#74]
+Output [6]: [web AS channel#74, i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#73]
 
 (110) Union
 
 (111) HashAggregate [codegen id : 79]
-Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41]
-Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8]
-Functions [2]: [partial_sum(sales#40), partial_sum(number_sales#41)]
+Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40]
+Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8]
+Functions [2]: [partial_sum(sales#39), partial_sum(number_sales#40)]
 Aggregate Attributes [3]: [sum#75, isEmpty#76, sum#77]
-Results [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#78, isEmpty#79, sum#80]
+Results [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#78, isEmpty#79, sum#80]
 
 (112) Exchange
-Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#78, isEmpty#79, sum#80]
-Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#81]
+Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#78, isEmpty#79, sum#80]
+Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#81]
 
 (113) HashAggregate [codegen id : 80]
-Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#78, isEmpty#79, sum#80]
-Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8]
-Functions [2]: [sum(sales#40), sum(number_sales#41)]
-Aggregate Attributes [2]: [sum(sales#40)#82, sum(number_sales#41)#83]
-Results [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum(sales#40)#82 AS sum_sales#84, sum(number_sales#41)#83 AS number_sales#85]
+Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#78, isEmpty#79, sum#80]
+Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8]
+Functions [2]: [sum(sales#39), sum(number_sales#40)]
+Aggregate Attributes [2]: [sum(sales#39)#82, sum(number_sales#40)#83]
+Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum(sales#39)#82 AS sum_sales#84, sum(number_sales#40)#83 AS number_sales#85]
 
 (114) ReusedExchange [Reuses operator id: 74]
 Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#86, isEmpty#87, count#88]
@@ -734,15 +734,15 @@ Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#86, isEmpty#87, cou
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#89, count(1)#90]
-Results [7]: [store AS channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#89 AS sales#40, count(1)#90 AS number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#89 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#91]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#89 AS sales#39, count(1)#90 AS number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#89 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#91]
 
 (116) Filter [codegen id : 106]
-Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#91]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#91) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#91 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6))))
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#91]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#91) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#91 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
 
 (117) Project [codegen id : 106]
-Output [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41]
-Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#91]
+Output [6]: [store AS channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#91]
 
 (118) ReusedExchange [Reuses operator id: 90]
 Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#92, isEmpty#93, count#94]
@@ -752,435 +752,435 @@ Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#92, isEmpty#93, cou
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
 Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#95, count(1)#96]
-Results [7]: [catalog AS channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#95 AS sales#57, count(1)#96 AS number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#95 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#97]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#95 AS sales#56, count(1)#96 AS number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#95 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#97]
 
 (120) Filter [codegen id : 132]
-Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#97]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#97) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#97 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6))))
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#97]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#97) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#97 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
 
 (121) Project [codegen id : 132]
-Output [6]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58]
-Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#97]
+Output [6]: [catalog AS channel#98, i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#97]
 
 (122) ReusedExchange [Reuses operator id: 106]
-Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#98, isEmpty#99, count#100]
+Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#99, isEmpty#100, count#101]
 
 (123) HashAggregate [codegen id : 158]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#98, isEmpty#99, count#100]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#99, isEmpty#100, count#101]
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#101, count(1)#102]
-Results [7]: [web AS channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#101 AS sales#72, count(1)#102 AS number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#101 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#103]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#102, count(1)#103]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#102 AS sales#71, count(1)#103 AS number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#102 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#104]
 
 (124) Filter [codegen id : 158]
-Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#103]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#103) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#103 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6))))
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#104]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#104) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#104 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
 
 (125) Project [codegen id : 158]
-Output [6]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73]
-Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#103]
+Output [6]: [web AS channel#105, i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#104]
 
 (126) Union
 
 (127) HashAggregate [codegen id : 159]
-Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41]
-Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8]
-Functions [2]: [partial_sum(sales#40), partial_sum(number_sales#41)]
-Aggregate Attributes [3]: [sum#104, isEmpty#105, sum#106]
-Results [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#107, isEmpty#108, sum#109]
+Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40]
+Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8]
+Functions [2]: [partial_sum(sales#39), partial_sum(number_sales#40)]
+Aggregate Attributes [3]: [sum#106, isEmpty#107, sum#108]
+Results [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#109, isEmpty#110, sum#111]
 
 (128) Exchange
-Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#107, isEmpty#108, sum#109]
-Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#110]
+Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#109, isEmpty#110, sum#111]
+Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#112]
 
 (129) HashAggregate [codegen id : 160]
-Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#107, isEmpty#108, sum#109]
-Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8]
-Functions [2]: [sum(sales#40), sum(number_sales#41)]
-Aggregate Attributes [2]: [sum(sales#40)#111, sum(number_sales#41)#112]
-Results [5]: [channel#39, i_brand_id#6, i_class_id#7, sum(sales#40)#111 AS sum_sales#84, sum(number_sales#41)#112 AS number_sales#85]
+Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#109, isEmpty#110, sum#111]
+Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8]
+Functions [2]: [sum(sales#39), sum(number_sales#40)]
+Aggregate Attributes [2]: [sum(sales#39)#113, sum(number_sales#40)#114]
+Results [5]: [channel#44, i_brand_id#6, i_class_id#7, sum(sales#39)#113 AS sum_sales#84, sum(number_sales#40)#114 AS number_sales#85]
 
 (130) HashAggregate [codegen id : 160]
-Input [5]: [channel#39, i_brand_id#6, i_class_id#7, sum_sales#84, number_sales#85]
-Keys [3]: [channel#39, i_brand_id#6, i_class_id#7]
+Input [5]: [channel#44, i_brand_id#6, i_class_id#7, sum_sales#84, number_sales#85]
+Keys [3]: [channel#44, i_brand_id#6, i_class_id#7]
 Functions [2]: [partial_sum(sum_sales#84), partial_sum(number_sales#85)]
-Aggregate Attributes [3]: [sum#113, isEmpty#114, sum#115]
-Results [6]: [channel#39, i_brand_id#6, i_class_id#7, sum#116, isEmpty#117, sum#118]
+Aggregate Attributes [3]: [sum#115, isEmpty#116, sum#117]
+Results [6]: [channel#44, i_brand_id#6, i_class_id#7, sum#118, isEmpty#119, sum#120]
 
 (131) Exchange
-Input [6]: [channel#39, i_brand_id#6, i_class_id#7, sum#116, isEmpty#117, sum#118]
-Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, 5), true, [id=#119]
+Input [6]: [channel#44, i_brand_id#6, i_class_id#7, sum#118, isEmpty#119, sum#120]
+Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, 5), true, [id=#121]
 
 (132) HashAggregate [codegen id : 161]
-Input [6]: [channel#39, i_brand_id#6, i_class_id#7, sum#116, isEmpty#117, sum#118]
-Keys [3]: [channel#39, i_brand_id#6, i_class_id#7]
+Input [6]: [channel#44, i_brand_id#6, i_class_id#7, sum#118, isEmpty#119, sum#120]
+Keys [3]: [channel#44, i_brand_id#6, i_class_id#7]
 Functions [2]: [sum(sum_sales#84), sum(number_sales#85)]
-Aggregate Attributes [2]: [sum(sum_sales#84)#120, sum(number_sales#85)#121]
-Results [6]: [channel#39, i_brand_id#6, i_class_id#7, null AS i_category_id#122, sum(sum_sales#84)#120 AS sum(sum_sales)#123, sum(number_sales#85)#121 AS sum(number_sales)#124]
+Aggregate Attributes [2]: [sum(sum_sales#84)#122, sum(number_sales#85)#123]
+Results [6]: [channel#44, i_brand_id#6, i_class_id#7, null AS i_category_id#124, sum(sum_sales#84)#122 AS sum(sum_sales)#125, sum(number_sales#85)#123 AS sum(number_sales)#126]
 
 (133) Union
 
 (134) HashAggregate [codegen id : 162]
-Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Keys [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
 Functions: []
 Aggregate Attributes: []
-Results [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
 
 (135) Exchange
-Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#125]
+Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#127]
 
 (136) HashAggregate [codegen id : 163]
-Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Keys [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
 Functions: []
 Aggregate Attributes: []
-Results [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
 
 (137) ReusedExchange [Reuses operator id: 74]
-Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#126, isEmpty#127, count#128]
+Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#128, isEmpty#129, count#130]
 
 (138) HashAggregate [codegen id : 189]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#126, isEmpty#127, count#128]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#128, isEmpty#129, count#130]
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#129, count(1)#130]
-Results [7]: [store AS channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#129 AS sales#40, count(1)#130 AS number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#129 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#131]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#131, count(1)#132]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#131 AS sales#39, count(1)#132 AS number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#131 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#133]
 
 (139) Filter [codegen id : 189]
-Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#131]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#131) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#131 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6))))
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#133]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#133) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#133 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
 
 (140) Project [codegen id : 189]
-Output [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41]
-Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#131]
+Output [6]: [store AS channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#133]
 
 (141) ReusedExchange [Reuses operator id: 90]
-Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#132, isEmpty#133, count#134]
+Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#134, isEmpty#135, count#136]
 
 (142) HashAggregate [codegen id : 215]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#132, isEmpty#133, count#134]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#134, isEmpty#135, count#136]
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#135, count(1)#136]
-Results [7]: [catalog AS channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#135 AS sales#57, count(1)#136 AS number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#135 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#137]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#137, count(1)#138]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#137 AS sales#56, count(1)#138 AS number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#137 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#139]
 
 (143) Filter [codegen id : 215]
-Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#137]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#137) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#137 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6))))
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#139]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#139) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#139 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
 
 (144) Project [codegen id : 215]
-Output [6]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58]
-Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#137]
+Output [6]: [catalog AS channel#140, i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#139]
 
 (145) ReusedExchange [Reuses operator id: 106]
-Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#138, isEmpty#139, count#140]
+Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#141, isEmpty#142, count#143]
 
 (146) HashAggregate [codegen id : 241]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#138, isEmpty#139, count#140]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#141, isEmpty#142, count#143]
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#141, count(1)#142]
-Results [7]: [web AS channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#141 AS sales#72, count(1)#142 AS number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#141 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#143]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#144, count(1)#145]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#144 AS sales#71, count(1)#145 AS number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#144 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#146]
 
 (147) Filter [codegen id : 241]
-Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#143]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#143) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#143 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6))))
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#146]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#146) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#146 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
 
 (148) Project [codegen id : 241]
-Output [6]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73]
-Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#143]
+Output [6]: [web AS channel#147, i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#146]
 
 (149) Union
 
 (150) HashAggregate [codegen id : 242]
-Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41]
-Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8]
-Functions [2]: [partial_sum(sales#40), partial_sum(number_sales#41)]
-Aggregate Attributes [3]: [sum#144, isEmpty#145, sum#146]
-Results [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#147, isEmpty#148, sum#149]
+Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40]
+Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8]
+Functions [2]: [partial_sum(sales#39), partial_sum(number_sales#40)]
+Aggregate Attributes [3]: [sum#148, isEmpty#149, sum#150]
+Results [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#151, isEmpty#152, sum#153]
 
 (151) Exchange
-Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#147, isEmpty#148, sum#149]
-Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#150]
+Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#151, isEmpty#152, sum#153]
+Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#154]
 
 (152) HashAggregate [codegen id : 243]
-Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#147, isEmpty#148, sum#149]
-Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8]
-Functions [2]: [sum(sales#40), sum(number_sales#41)]
-Aggregate Attributes [2]: [sum(sales#40)#151, sum(number_sales#41)#152]
-Results [4]: [channel#39, i_brand_id#6, sum(sales#40)#151 AS sum_sales#84, sum(number_sales#41)#152 AS number_sales#85]
+Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#151, isEmpty#152, sum#153]
+Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8]
+Functions [2]: [sum(sales#39), sum(number_sales#40)]
+Aggregate Attributes [2]: [sum(sales#39)#155, sum(number_sales#40)#156]
+Results [4]: [channel#44, i_brand_id#6, sum(sales#39)#155 AS sum_sales#84, sum(number_sales#40)#156 AS number_sales#85]
 
 (153) HashAggregate [codegen id : 243]
-Input [4]: [channel#39, i_brand_id#6, sum_sales#84, number_sales#85]
-Keys [2]: [channel#39, i_brand_id#6]
+Input [4]: [channel#44, i_brand_id#6, sum_sales#84, number_sales#85]
+Keys [2]: [channel#44, i_brand_id#6]
 Functions [2]: [partial_sum(sum_sales#84), partial_sum(number_sales#85)]
-Aggregate Attributes [3]: [sum#153, isEmpty#154, sum#155]
-Results [5]: [channel#39, i_brand_id#6, sum#156, isEmpty#157, sum#158]
+Aggregate Attributes [3]: [sum#157, isEmpty#158, sum#159]
+Results [5]: [channel#44, i_brand_id#6, sum#160, isEmpty#161, sum#162]
 
 (154) Exchange
-Input [5]: [channel#39, i_brand_id#6, sum#156, isEmpty#157, sum#158]
-Arguments: hashpartitioning(channel#39, i_brand_id#6, 5), true, [id=#159]
+Input [5]: [channel#44, i_brand_id#6, sum#160, isEmpty#161, sum#162]
+Arguments: hashpartitioning(channel#44, i_brand_id#6, 5), true, [id=#163]
 
 (155) HashAggregate [codegen id : 244]
-Input [5]: [channel#39, i_brand_id#6, sum#156, isEmpty#157, sum#158]
-Keys [2]: [channel#39, i_brand_id#6]
+Input [5]: [channel#44, i_brand_id#6, sum#160, isEmpty#161, sum#162]
+Keys [2]: [channel#44, i_brand_id#6]
 Functions [2]: [sum(sum_sales#84), sum(number_sales#85)]
-Aggregate Attributes [2]: [sum(sum_sales#84)#160, sum(number_sales#85)#161]
-Results [6]: [channel#39, i_brand_id#6, null AS i_class_id#162, null AS i_category_id#163, sum(sum_sales#84)#160 AS sum(sum_sales)#164, sum(number_sales#85)#161 AS sum(number_sales)#165]
+Aggregate Attributes [2]: [sum(sum_sales#84)#164, sum(number_sales#85)#165]
+Results [6]: [channel#44, i_brand_id#6, null AS i_class_id#166, null AS i_category_id#167, sum(sum_sales#84)#164 AS sum(sum_sales)#168, sum(number_sales#85)#165 AS sum(number_sales)#169]
 
 (156) Union
 
 (157) HashAggregate [codegen id : 245]
-Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Keys [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
 Functions: []
 Aggregate Attributes: []
-Results [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
 
 (158) Exchange
-Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#166]
+Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#170]
 
 (159) HashAggregate [codegen id : 246]
-Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Keys [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
 Functions: []
 Aggregate Attributes: []
-Results [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
 
 (160) ReusedExchange [Reuses operator id: 74]
-Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#167, isEmpty#168, count#169]
+Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#171, isEmpty#172, count#173]
 
 (161) HashAggregate [codegen id : 272]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#167, isEmpty#168, count#169]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#171, isEmpty#172, count#173]
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#170, count(1)#171]
-Results [7]: [store AS channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#170 AS sales#40, count(1)#171 AS number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#170 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#172]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#174, count(1)#175]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#174 AS sales#39, count(1)#175 AS number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#174 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#176]
 
 (162) Filter [codegen id : 272]
-Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#172]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#172) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#172 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6))))
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#176]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#176) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#176 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
 
 (163) Project [codegen id : 272]
-Output [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41]
-Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#172]
+Output [6]: [store AS channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#176]
 
 (164) ReusedExchange [Reuses operator id: 90]
-Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#173, isEmpty#174, count#175]
+Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#177, isEmpty#178, count#179]
 
 (165) HashAggregate [codegen id : 298]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#173, isEmpty#174, count#175]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#177, isEmpty#178, count#179]
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#176, count(1)#177]
-Results [7]: [catalog AS channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#176 AS sales#57, count(1)#177 AS number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#176 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#178]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#180, count(1)#181]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#180 AS sales#56, count(1)#181 AS number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#180 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#182]
 
 (166) Filter [codegen id : 298]
-Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#178]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#178) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#178 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6))))
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#182]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#182) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#182 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
 
 (167) Project [codegen id : 298]
-Output [6]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58]
-Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#178]
+Output [6]: [catalog AS channel#183, i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#182]
 
 (168) ReusedExchange [Reuses operator id: 106]
-Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#179, isEmpty#180, count#181]
+Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#184, isEmpty#185, count#186]
 
 (169) HashAggregate [codegen id : 324]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#179, isEmpty#180, count#181]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#184, isEmpty#185, count#186]
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#182, count(1)#183]
-Results [7]: [web AS channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#182 AS sales#72, count(1)#183 AS number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#182 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#184]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#187, count(1)#188]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#187 AS sales#71, count(1)#188 AS number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#187 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#189]
 
 (170) Filter [codegen id : 324]
-Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#184]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#184) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#184 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6))))
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#189]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#189) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#189 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
 
 (171) Project [codegen id : 324]
-Output [6]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73]
-Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#184]
+Output [6]: [web AS channel#190, i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#189]
 
 (172) Union
 
 (173) HashAggregate [codegen id : 325]
-Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41]
-Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8]
-Functions [2]: [partial_sum(sales#40), partial_sum(number_sales#41)]
-Aggregate Attributes [3]: [sum#185, isEmpty#186, sum#187]
-Results [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#188, isEmpty#189, sum#190]
+Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40]
+Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8]
+Functions [2]: [partial_sum(sales#39), partial_sum(number_sales#40)]
+Aggregate Attributes [3]: [sum#191, isEmpty#192, sum#193]
+Results [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#194, isEmpty#195, sum#196]
 
 (174) Exchange
-Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#188, isEmpty#189, sum#190]
-Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#191]
+Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#194, isEmpty#195, sum#196]
+Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#197]
 
 (175) HashAggregate [codegen id : 326]
-Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#188, isEmpty#189, sum#190]
-Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8]
-Functions [2]: [sum(sales#40), sum(number_sales#41)]
-Aggregate Attributes [2]: [sum(sales#40)#192, sum(number_sales#41)#193]
-Results [3]: [channel#39, sum(sales#40)#192 AS sum_sales#84, sum(number_sales#41)#193 AS number_sales#85]
+Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#194, isEmpty#195, sum#196]
+Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8]
+Functions [2]: [sum(sales#39), sum(number_sales#40)]
+Aggregate Attributes [2]: [sum(sales#39)#198, sum(number_sales#40)#199]
+Results [3]: [channel#44, sum(sales#39)#198 AS sum_sales#84, sum(number_sales#40)#199 AS number_sales#85]
 
 (176) HashAggregate [codegen id : 326]
-Input [3]: [channel#39, sum_sales#84, number_sales#85]
-Keys [1]: [channel#39]
+Input [3]: [channel#44, sum_sales#84, number_sales#85]
+Keys [1]: [channel#44]
 Functions [2]: [partial_sum(sum_sales#84), partial_sum(number_sales#85)]
-Aggregate Attributes [3]: [sum#194, isEmpty#195, sum#196]
-Results [4]: [channel#39, sum#197, isEmpty#198, sum#199]
+Aggregate Attributes [3]: [sum#200, isEmpty#201, sum#202]
+Results [4]: [channel#44, sum#203, isEmpty#204, sum#205]
 
 (177) Exchange
-Input [4]: [channel#39, sum#197, isEmpty#198, sum#199]
-Arguments: hashpartitioning(channel#39, 5), true, [id=#200]
+Input [4]: [channel#44, sum#203, isEmpty#204, sum#205]
+Arguments: hashpartitioning(channel#44, 5), true, [id=#206]
 
 (178) HashAggregate [codegen id : 327]
-Input [4]: [channel#39, sum#197, isEmpty#198, sum#199]
-Keys [1]: [channel#39]
+Input [4]: [channel#44, sum#203, isEmpty#204, sum#205]
+Keys [1]: [channel#44]
 Functions [2]: [sum(sum_sales#84), sum(number_sales#85)]
-Aggregate Attributes [2]: [sum(sum_sales#84)#201, sum(number_sales#85)#202]
-Results [6]: [channel#39, null AS i_brand_id#203, null AS i_class_id#204, null AS i_category_id#205, sum(sum_sales#84)#201 AS sum(sum_sales)#206, sum(number_sales#85)#202 AS sum(number_sales)#207]
+Aggregate Attributes [2]: [sum(sum_sales#84)#207, sum(number_sales#85)#208]
+Results [6]: [channel#44, null AS i_brand_id#209, null AS i_class_id#210, null AS i_category_id#211, sum(sum_sales#84)#207 AS sum(sum_sales)#212, sum(number_sales#85)#208 AS sum(number_sales)#213]
 
 (179) Union
 
 (180) HashAggregate [codegen id : 328]
-Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Keys [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
 Functions: []
 Aggregate Attributes: []
-Results [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
 
 (181) Exchange
-Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#208]
+Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#214]
 
 (182) HashAggregate [codegen id : 329]
-Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Keys [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
 Functions: []
 Aggregate Attributes: []
-Results [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
 
 (183) ReusedExchange [Reuses operator id: 74]
-Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#209, isEmpty#210, count#211]
+Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#215, isEmpty#216, count#217]
 
 (184) HashAggregate [codegen id : 355]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#209, isEmpty#210, count#211]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#215, isEmpty#216, count#217]
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#212, count(1)#213]
-Results [7]: [store AS channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#212 AS sales#40, count(1)#213 AS number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#212 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#214]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#218, count(1)#219]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#218 AS sales#39, count(1)#219 AS number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#218 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#220]
 
 (185) Filter [codegen id : 355]
-Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#214]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#214) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#214 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6))))
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#220]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#220) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#220 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
 
 (186) Project [codegen id : 355]
-Output [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41]
-Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#214]
+Output [6]: [store AS channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#220]
 
 (187) ReusedExchange [Reuses operator id: 90]
-Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#215, isEmpty#216, count#217]
+Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#221, isEmpty#222, count#223]
 
 (188) HashAggregate [codegen id : 381]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#215, isEmpty#216, count#217]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#221, isEmpty#222, count#223]
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#218, count(1)#219]
-Results [7]: [catalog AS channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#218 AS sales#57, count(1)#219 AS number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#218 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#220]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#224, count(1)#225]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#224 AS sales#56, count(1)#225 AS number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#224 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#226]
 
 (189) Filter [codegen id : 381]
-Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#220]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#220) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#220 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6))))
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#226]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#226) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#226 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
 
 (190) Project [codegen id : 381]
-Output [6]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58]
-Input [7]: [channel#56, i_brand_id#6, i_class_id#7, i_category_id#8, sales#57, number_sales#58, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#220]
+Output [6]: [catalog AS channel#227, i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#226]
 
 (191) ReusedExchange [Reuses operator id: 106]
-Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#221, isEmpty#222, count#223]
+Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#228, isEmpty#229, count#230]
 
 (192) HashAggregate [codegen id : 407]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#221, isEmpty#222, count#223]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#228, isEmpty#229, count#230]
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#224, count(1)#225]
-Results [7]: [web AS channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#224 AS sales#72, count(1)#225 AS number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#224 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#226]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#231, count(1)#232]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#231 AS sales#71, count(1)#232 AS number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#231 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#233]
 
 (193) Filter [codegen id : 407]
-Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#226]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#226) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#226 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#43, [id=#44] as decimal(32,6))))
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#233]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#233) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#233 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
 
 (194) Project [codegen id : 407]
-Output [6]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73]
-Input [7]: [channel#71, i_brand_id#6, i_class_id#7, i_category_id#8, sales#72, number_sales#73, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#226]
+Output [6]: [web AS channel#234, i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#233]
 
 (195) Union
 
 (196) HashAggregate [codegen id : 408]
-Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sales#40, number_sales#41]
-Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8]
-Functions [2]: [partial_sum(sales#40), partial_sum(number_sales#41)]
-Aggregate Attributes [3]: [sum#227, isEmpty#228, sum#229]
-Results [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#230, isEmpty#231, sum#232]
+Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40]
+Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8]
+Functions [2]: [partial_sum(sales#39), partial_sum(number_sales#40)]
+Aggregate Attributes [3]: [sum#235, isEmpty#236, sum#237]
+Results [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#238, isEmpty#239, sum#240]
 
 (197) Exchange
-Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#230, isEmpty#231, sum#232]
-Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#233]
+Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#238, isEmpty#239, sum#240]
+Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#241]
 
 (198) HashAggregate [codegen id : 409]
-Input [7]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum#230, isEmpty#231, sum#232]
-Keys [4]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8]
-Functions [2]: [sum(sales#40), sum(number_sales#41)]
-Aggregate Attributes [2]: [sum(sales#40)#234, sum(number_sales#41)#235]
-Results [2]: [sum(sales#40)#234 AS sum_sales#84, sum(number_sales#41)#235 AS number_sales#85]
+Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#238, isEmpty#239, sum#240]
+Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8]
+Functions [2]: [sum(sales#39), sum(number_sales#40)]
+Aggregate Attributes [2]: [sum(sales#39)#242, sum(number_sales#40)#243]
+Results [2]: [sum(sales#39)#242 AS sum_sales#84, sum(number_sales#40)#243 AS number_sales#85]
 
 (199) HashAggregate [codegen id : 409]
 Input [2]: [sum_sales#84, number_sales#85]
 Keys: []
 Functions [2]: [partial_sum(sum_sales#84), partial_sum(number_sales#85)]
-Aggregate Attributes [3]: [sum#236, isEmpty#237, sum#238]
-Results [3]: [sum#239, isEmpty#240, sum#241]
+Aggregate Attributes [3]: [sum#244, isEmpty#245, sum#246]
+Results [3]: [sum#247, isEmpty#248, sum#249]
 
 (200) Exchange
-Input [3]: [sum#239, isEmpty#240, sum#241]
-Arguments: SinglePartition, true, [id=#242]
+Input [3]: [sum#247, isEmpty#248, sum#249]
+Arguments: SinglePartition, true, [id=#250]
 
 (201) HashAggregate [codegen id : 410]
-Input [3]: [sum#239, isEmpty#240, sum#241]
+Input [3]: [sum#247, isEmpty#248, sum#249]
 Keys: []
 Functions [2]: [sum(sum_sales#84), sum(number_sales#85)]
-Aggregate Attributes [2]: [sum(sum_sales#84)#243, sum(number_sales#85)#244]
-Results [6]: [null AS channel#245, null AS i_brand_id#246, null AS i_class_id#247, null AS i_category_id#248, sum(sum_sales#84)#243 AS sum(sum_sales)#249, sum(number_sales#85)#244 AS sum(number_sales)#250]
+Aggregate Attributes [2]: [sum(sum_sales#84)#251, sum(number_sales#85)#252]
+Results [6]: [null AS channel#253, null AS i_brand_id#254, null AS i_class_id#255, null AS i_category_id#256, sum(sum_sales#84)#251 AS sum(sum_sales)#257, sum(number_sales#85)#252 AS sum(number_sales)#258]
 
 (202) Union
 
 (203) HashAggregate [codegen id : 411]
-Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Keys [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
 Functions: []
 Aggregate Attributes: []
-Results [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
 
 (204) Exchange
-Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Arguments: hashpartitioning(channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#251]
+Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#259]
 
 (205) HashAggregate [codegen id : 412]
-Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Keys [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
 Functions: []
 Aggregate Attributes: []
-Results [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
 
 (206) TakeOrderedAndProject
-Input [6]: [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Arguments: 100, [channel#39 ASC NULLS FIRST, i_brand_id#6 ASC NULLS FIRST, i_class_id#7 ASC NULLS FIRST, i_category_id#8 ASC NULLS FIRST], [channel#39, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Arguments: 100, [channel#44 ASC NULLS FIRST, i_brand_id#6 ASC NULLS FIRST, i_class_id#7 ASC NULLS FIRST, i_category_id#8 ASC NULLS FIRST], [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 76 Hosting Expression = Subquery scalar-subquery#43, [id=#44]
+Subquery:1 Hosting operator id = 76 Hosting Expression = Subquery scalar-subquery#42, [id=#43]
 * HashAggregate (236)
 +- Exchange (235)
    +- * HashAggregate (234)
@@ -1247,7 +1247,7 @@ Input [2]: [d_date_sk#10, d_year#11]
 
 (214) BroadcastExchange
 Input [1]: [d_date_sk#10]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#252]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#260]
 
 (215) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#1]
@@ -1255,7 +1255,7 @@ Right keys [1]: [d_date_sk#10]
 Join condition: None
 
 (216) Project [codegen id : 2]
-Output [2]: [ss_quantity#3 AS quantity#253, ss_list_price#4 AS list_price#254]
+Output [2]: [ss_quantity#3 AS quantity#261, ss_list_price#4 AS list_price#262]
 Input [4]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, d_date_sk#10]
 
 (217) Scan parquet default.catalog_sales
@@ -1292,7 +1292,7 @@ Input [2]: [d_date_sk#10, d_year#11]
 
 (224) BroadcastExchange
 Input [1]: [d_date_sk#10]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#255]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#263]
 
 (225) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_sold_date_sk#16]
@@ -1300,7 +1300,7 @@ Right keys [1]: [d_date_sk#10]
 Join condition: None
 
 (226) Project [codegen id : 4]
-Output [2]: [cs_quantity#45 AS quantity#256, cs_list_price#46 AS list_price#257]
+Output [2]: [cs_quantity#45 AS quantity#264, cs_list_price#46 AS list_price#265]
 Input [4]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46, d_date_sk#10]
 
 (227) Scan parquet default.web_sales
@@ -1326,55 +1326,55 @@ Right keys [1]: [d_date_sk#10]
 Join condition: None
 
 (232) Project [codegen id : 6]
-Output [2]: [ws_quantity#60 AS quantity#258, ws_list_price#61 AS list_price#259]
+Output [2]: [ws_quantity#60 AS quantity#266, ws_list_price#61 AS list_price#267]
 Input [4]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61, d_date_sk#10]
 
 (233) Union
 
 (234) HashAggregate [codegen id : 7]
-Input [2]: [quantity#253, list_price#254]
+Input [2]: [quantity#261, list_price#262]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#253 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#254 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#260, count#261]
-Results [2]: [sum#262, count#263]
+Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#261 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#262 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#268, count#269]
+Results [2]: [sum#270, count#271]
 
 (235) Exchange
-Input [2]: [sum#262, count#263]
-Arguments: SinglePartition, true, [id=#264]
+Input [2]: [sum#270, count#271]
+Arguments: SinglePartition, true, [id=#272]
 
 (236) HashAggregate [codegen id : 8]
-Input [2]: [sum#262, count#263]
+Input [2]: [sum#270, count#271]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#253 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#254 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#253 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#254 as decimal(12,2)))), DecimalType(18,2), true))#265]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#253 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#254 as decimal(12,2)))), DecimalType(18,2), true))#265 AS average_sales#266]
+Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#261 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#262 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#261 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#262 as decimal(12,2)))), DecimalType(18,2), true))#273]
+Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#261 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#262 as decimal(12,2)))), DecimalType(18,2), true))#273 AS average_sales#274]
 
-Subquery:2 Hosting operator id = 92 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44]
+Subquery:2 Hosting operator id = 92 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
 
-Subquery:3 Hosting operator id = 108 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44]
+Subquery:3 Hosting operator id = 108 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
 
-Subquery:4 Hosting operator id = 116 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44]
+Subquery:4 Hosting operator id = 116 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
 
-Subquery:5 Hosting operator id = 120 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44]
+Subquery:5 Hosting operator id = 120 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
 
-Subquery:6 Hosting operator id = 124 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44]
+Subquery:6 Hosting operator id = 124 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
 
-Subquery:7 Hosting operator id = 139 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44]
+Subquery:7 Hosting operator id = 139 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
 
-Subquery:8 Hosting operator id = 143 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44]
+Subquery:8 Hosting operator id = 143 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
 
-Subquery:9 Hosting operator id = 147 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44]
+Subquery:9 Hosting operator id = 147 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
 
-Subquery:10 Hosting operator id = 162 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44]
+Subquery:10 Hosting operator id = 162 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
 
-Subquery:11 Hosting operator id = 166 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44]
+Subquery:11 Hosting operator id = 166 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
 
-Subquery:12 Hosting operator id = 170 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44]
+Subquery:12 Hosting operator id = 170 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
 
-Subquery:13 Hosting operator id = 185 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44]
+Subquery:13 Hosting operator id = 185 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
 
-Subquery:14 Hosting operator id = 189 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44]
+Subquery:14 Hosting operator id = 189 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
 
-Subquery:15 Hosting operator id = 193 Hosting Expression = ReusedSubquery Subquery scalar-subquery#43, [id=#44]
+Subquery:15 Hosting operator id = 193 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt
index fc86da1801926..e96f1d6fed14f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt
@@ -40,7 +40,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                               InputAdapter
                                                                                 Union
                                                                                   WholeStageCodegen (26)
-                                                                                    Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                                                    Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                                                       Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                                                         Subquery #1
                                                                                           WholeStageCodegen (8)
@@ -90,7 +90,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                                                                       Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price]
                                                                                                                 InputAdapter
                                                                                                                   ReusedExchange [d_date_sk] #19
-                                                                                        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                                                        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                                                           InputAdapter
                                                                                             Exchange [i_brand_id,i_class_id,i_category_id] #6
                                                                                               WholeStageCodegen (25)
@@ -202,10 +202,10 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                                                                   InputAdapter
                                                                                                                     Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
                                                                                   WholeStageCodegen (52)
-                                                                                    Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                                                    Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                                                       Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                                                         ReusedSubquery [average_sales] #1
-                                                                                        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                                                        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                                                           InputAdapter
                                                                                             Exchange [i_brand_id,i_class_id,i_category_id] #20
                                                                                               WholeStageCodegen (51)
@@ -226,10 +226,10 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                                                       InputAdapter
                                                                                                         ReusedExchange [d_date_sk] #16
                                                                                   WholeStageCodegen (78)
-                                                                                    Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                                                    Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                                                       Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                                                         ReusedSubquery [average_sales] #1
-                                                                                        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                                                        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                                                           InputAdapter
                                                                                             Exchange [i_brand_id,i_class_id,i_category_id] #21
                                                                                               WholeStageCodegen (77)
@@ -263,24 +263,24 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                                         InputAdapter
                                                                                           Union
                                                                                             WholeStageCodegen (106)
-                                                                                              Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                                                                   ReusedSubquery [average_sales] #1
-                                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                                                                     InputAdapter
                                                                                                       ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6
                                                                                             WholeStageCodegen (132)
-                                                                                              Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                                                                   ReusedSubquery [average_sales] #1
-                                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                                                                     InputAdapter
                                                                                                       ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #20
                                                                                             WholeStageCodegen (158)
-                                                                                              Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                                                                   ReusedSubquery [average_sales] #1
-                                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                                                                     InputAdapter
                                                                                                       ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #21
                                                   WholeStageCodegen (244)
@@ -297,24 +297,24 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                         InputAdapter
                                                                           Union
                                                                             WholeStageCodegen (189)
-                                                                              Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                                                   ReusedSubquery [average_sales] #1
-                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                                                     InputAdapter
                                                                                       ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6
                                                                             WholeStageCodegen (215)
-                                                                              Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                                                   ReusedSubquery [average_sales] #1
-                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                                                     InputAdapter
                                                                                       ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #20
                                                                             WholeStageCodegen (241)
-                                                                              Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                                                   ReusedSubquery [average_sales] #1
-                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                                                     InputAdapter
                                                                                       ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #21
                                   WholeStageCodegen (327)
@@ -331,24 +331,24 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                         InputAdapter
                                                           Union
                                                             WholeStageCodegen (272)
-                                                              Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                                   ReusedSubquery [average_sales] #1
-                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                                     InputAdapter
                                                                       ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6
                                                             WholeStageCodegen (298)
-                                                              Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                                   ReusedSubquery [average_sales] #1
-                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                                     InputAdapter
                                                                       ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #20
                                                             WholeStageCodegen (324)
-                                                              Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                                   ReusedSubquery [average_sales] #1
-                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                                     InputAdapter
                                                                       ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #21
                   WholeStageCodegen (410)
@@ -365,23 +365,23 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                         InputAdapter
                                           Union
                                             WholeStageCodegen (355)
-                                              Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                   ReusedSubquery [average_sales] #1
-                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                     InputAdapter
                                                       ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6
                                             WholeStageCodegen (381)
-                                              Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                   ReusedSubquery [average_sales] #1
-                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                     InputAdapter
                                                       ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #20
                                             WholeStageCodegen (407)
-                                              Project [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                   ReusedSubquery [average_sales] #1
-                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),channel,sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                     InputAdapter
                                                       ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #21

From 3118c220f919ba185a57abfbe55eac1822c89a52 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Mon, 21 Sep 2020 22:32:25 -0700
Subject: [PATCH 0081/1009] [SPARK-32949][R][SQL] Add timestamp_seconds to
 SparkR

### What changes were proposed in this pull request?

This PR adds R wrapper for `timestamp_seconds` function.

### Why are the changes needed?

Feature parity.

### Does this PR introduce _any_ user-facing change?

Yes, it adds a new R function.

### How was this patch tested?

New unit tests.

Closes #29822 from zero323/SPARK-32949.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 R/pkg/NAMESPACE                       |  1 +
 R/pkg/R/functions.R                   | 15 +++++++++++++++
 R/pkg/R/generics.R                    |  4 ++++
 R/pkg/tests/fulltests/test_sparkSQL.R |  1 +
 4 files changed, 21 insertions(+)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index f27913ae0b1bd..6d28caff0d56f 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -405,6 +405,7 @@ exportMethods("%<=>%",
               "sumDistinct",
               "tan",
               "tanh",
+              "timestamp_seconds",
               "toDegrees",
               "toRadians",
               "to_csv",
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 5d9c8e8124d9a..1d75819cb6133 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -4407,3 +4407,18 @@ setMethod("current_timestamp",
             jc <- callJStatic("org.apache.spark.sql.functions", "current_timestamp")
             column(jc)
           })
+
+#' @details
+#' \code{timestamp_seconds}: Creates timestamp from the number of seconds since UTC epoch.
+#'
+#' @rdname column_datetime_functions
+#' @aliases timestamp_seconds timestamp_seconds,Column-method
+#' @note timestamp_seconds since 3.1.0
+setMethod("timestamp_seconds",
+    signature(x = "Column"),
+    function(x) {
+        jc <- callJStatic(
+            "org.apache.spark.sql.functions", "timestamp_seconds", x@jc
+        )
+        column(jc)
+    })
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 604308c8803eb..a7a9379b927b1 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1357,6 +1357,10 @@ setGeneric("substring_index", function(x, delim, count) { standardGeneric("subst
 #' @name NULL
 setGeneric("sumDistinct", function(x) { standardGeneric("sumDistinct") })
 
+#' @rdname column_datetime_functions
+#' @name timestamp_seconds
+setGeneric("timestamp_seconds", function(x) { standardGeneric("timestamp_seconds") })
+
 #' @rdname column_collection_functions
 #' @name NULL
 setGeneric("transform_keys", function(x, f) {  standardGeneric("transform_keys") })
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 0ad7f9e88b0fd..1c65dabaf6656 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1424,6 +1424,7 @@ test_that("column functions", {
     date_trunc("quarter", c) + current_date() + current_timestamp()
   c25 <- overlay(c1, c2, c3, c3) + overlay(c1, c2, c3) + overlay(c1, c2, 1) +
     overlay(c1, c2, 3, 4)
+  c26 <- timestamp_seconds(c1)
 
   # Test if base::is.nan() is exposed
   expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE))

From 790d9ef2d3a90388ef3c36d5ae47b2fe369a83ba Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Tue, 22 Sep 2020 14:46:27 +0900
Subject: [PATCH 0082/1009] [SPARK-32955][DOCS] An item in the navigation bar
 in the WebUI has a wrong link

### What changes were proposed in this pull request?

This PR fixes an link in `_layouts/global.html`.
The item `More` in the navigation bar in the WebUI links to `api.html` but it seems to be wrong.
This PR also removes `api.md` because it and `api.html` generated from it are not referred from anywhere.

### Why are the changes needed?

Fix the wrong link.

### Does this PR introduce _any_ user-facing change?

Yes. "More" item no longer links to `api.html`.

### How was this patch tested?

`SKIP_API=1 jekyll build` and confirmed that the item no longer links to `api.html`.
I also confirmed `api.md` and `api.html` are no longer referred from anywhere by the following command.
```
$ grep -Erl "api\.(html|md)" docs
```

Closes #29821 from sarutak/fix-api-doc-link.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/_layouts/global.html |  2 +-
 docs/api.md               | 27 ---------------------------
 2 files changed, 1 insertion(+), 28 deletions(-)
 delete mode 100644 docs/api.md

diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
index 09f7018262a0b..d6548f0fa9534 100755
--- a/docs/_layouts/global.html
+++ b/docs/_layouts/global.html
@@ -110,7 +110,7 @@
                         </li>
 
                         <li class="nav-item dropdown">
-                            <a href="api.html" class="nav-link dropdown-toggle" id="navbarMore" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">More</a>
+                            <a href="#" class="nav-link dropdown-toggle" id="navbarMore" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">More</a>
                             <div class="dropdown-menu" aria-labelledby="navbarMore">
                                 <a class="dropdown-item" href="configuration.html">Configuration</a>
                                 <a class="dropdown-item" href="monitoring.html">Monitoring</a>
diff --git a/docs/api.md b/docs/api.md
deleted file mode 100644
index 241d552d14329..0000000000000
--- a/docs/api.md
+++ /dev/null
@@ -1,27 +0,0 @@
----
-layout: global
-title: Spark API Documentation
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
- 
-     http://www.apache.org/licenses/LICENSE-2.0
- 
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-Here you can read API docs for Spark and its submodules.
-
-- [Spark Scala API (Scaladoc)](api/scala/index.html)
-- [Spark Java API (Javadoc)](api/java/index.html)
-- [Spark Python API (Sphinx)](api/python/index.html)
-- [Spark R API (Roxygen2)](api/R/index.html)
-- [Spark SQL, Built-in Functions (MkDocs)](api/sql/index.html)

From 61456214957cd446c2338fc70a0872c4bc22f77d Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 22 Sep 2020 08:49:58 -0700
Subject: [PATCH 0083/1009] [SPARK-32659][SQL][FOLLOWUP] Broadcast Array
 instead of Set in InSubqueryExec

### What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/29475.

This PR updates the code to broadcast the Array instead of Set, which was the behavior before #29475

### Why are the changes needed?

The size of Set can be much bigger than Array. It's safer to keep the behavior the same as before and build the set at the executor side.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

existing tests

Closes #29838 from cloud-fan/followup.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/sql/execution/subquery.scala  | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
index b56c0792f1aa4..1a6b99a455bf7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
@@ -23,7 +23,7 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.{expressions, InternalRow}
-import org.apache.spark.sql.catalyst.expressions.{AttributeSeq, CreateNamedStruct, Expression, ExprId, InSet, ListQuery, Literal, PlanExpression}
+import org.apache.spark.sql.catalyst.expressions.{CreateNamedStruct, Expression, ExprId, InSet, ListQuery, Literal, PlanExpression}
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.internal.SQLConf
@@ -114,10 +114,10 @@ case class InSubqueryExec(
     child: Expression,
     plan: BaseSubqueryExec,
     exprId: ExprId,
-    private var resultBroadcast: Broadcast[Set[Any]] = null) extends ExecSubqueryExpression {
+    private var resultBroadcast: Broadcast[Array[Any]] = null) extends ExecSubqueryExpression {
 
-  @transient private var result: Set[Any] = _
-  @transient private lazy val inSet = InSet(child, result)
+  @transient private var result: Array[Any] = _
+  @transient private lazy val inSet = InSet(child, result.toSet)
 
   override def dataType: DataType = BooleanType
   override def children: Seq[Expression] = child :: Nil
@@ -133,14 +133,14 @@ case class InSubqueryExec(
   def updateResult(): Unit = {
     val rows = plan.executeCollect()
     result = if (plan.output.length > 1) {
-      rows.toSet
+      rows.asInstanceOf[Array[Any]]
     } else {
-      rows.map(_.get(0, child.dataType)).toSet
+      rows.map(_.get(0, child.dataType))
     }
     resultBroadcast = plan.sqlContext.sparkContext.broadcast(result)
   }
 
-  def values(): Option[Set[Any]] = Option(resultBroadcast).map(_.value)
+  def values(): Option[Array[Any]] = Option(resultBroadcast).map(_.value)
 
   private def prepareResult(): Unit = {
     require(resultBroadcast != null, s"$this has not finished")

From dd80845735880de5943e97b08a487add3ed53139 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Tue, 22 Sep 2020 11:01:44 -0700
Subject: [PATCH 0084/1009] [SPARK-32964][DSTREAMS] Pass all `streaming` module
 UTs in Scala 2.13

### What changes were proposed in this pull request?

There is only one failed case of `streaming` module in Scala 2.13: `start with non-serializable DStream checkpoint ` in `StreamingContextSuite`.

`StackOverflowError` is thrown here when `SerializationDebugger#visit` method is called.

I found that `inputStreams` and `outputStreams` in `DStreamGraph` can not be matched in `SerializationDebugger#visit` method because `ArrayBuffer` in not `Array` in Scala 2.13.

The main change of this pr is use `mutable.ArraySeq` instead of `ArrayBuffer` to store `inputStreams` and `outputStreams` in `DStreamGraph`, then it can be matched in `SerializationDebugger#visit` method.

### Why are the changes needed?
We need to support a Scala 2.13 build.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

- Scala 2.12: Pass the Jenkins or GitHub Action

- Scala 2.13: Pass GitHub 2.13 Build Action

Do the following:

```
dev/change-scala-version.sh 2.13
mvn clean install -DskipTests  -pl streaming -Pscala-2.13 -am
mvn test -pl streaming -Pscala-2.13
mvn test -pl core -Pscala-2.13
```

streaming module:

```
Tests: succeeded 339, failed 0, canceled 0, ignored 2, pending 0
All tests passed.
```

core module:

```
Tests: succeeded 2648, failed 0, canceled 4, ignored 7, pending 0
All tests passed.
```

Closes #29836 from LuciferYang/fix-streaming-213.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/streaming/DStreamGraph.scala      | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
index 37cc1b8a6d2ab..43aaa7e1eeaec 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
@@ -19,7 +19,7 @@ package org.apache.spark.streaming
 
 import java.io.{IOException, ObjectInputStream, ObjectOutputStream}
 
-import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable
 import scala.collection.parallel.immutable.ParVector
 
 import org.apache.spark.internal.Logging
@@ -29,8 +29,8 @@ import org.apache.spark.util.Utils
 
 final private[streaming] class DStreamGraph extends Serializable with Logging {
 
-  private val inputStreams = new ArrayBuffer[InputDStream[_]]()
-  private val outputStreams = new ArrayBuffer[DStream[_]]()
+  private var inputStreams = mutable.ArraySeq.empty[InputDStream[_]]
+  private var outputStreams = mutable.ArraySeq.empty[DStream[_]]
 
   @volatile private var inputStreamNameAndID: Seq[(String, Int)] = Nil
 
@@ -91,14 +91,14 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
   def addInputStream(inputStream: InputDStream[_]): Unit = {
     this.synchronized {
       inputStream.setGraph(this)
-      inputStreams += inputStream
+      inputStreams = inputStreams :+ inputStream
     }
   }
 
   def addOutputStream(outputStream: DStream[_]): Unit = {
     this.synchronized {
       outputStream.setGraph(this)
-      outputStreams += outputStream
+      outputStreams = outputStreams :+ outputStream
     }
   }
 

From fba5736c50f93d53f5fa0bf0edc97dc147b88804 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 22 Sep 2020 11:05:35 -0700
Subject: [PATCH 0085/1009] [SPARK-32757][SQL][FOLLOWUP] Preserve the attribute
 name as possible as we scan in SubqueryBroadcastExec

### What changes were proposed in this pull request?

This is a minor followup of https://github.com/apache/spark/pull/29601 , to preserve the attribute name in `SubqueryBroadcastExec.output`.

### Why are the changes needed?

During explain, it's better to see the origin column name instead of always "key".

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

existing tests.

Closes #29839 from cloud-fan/followup2.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/execution/SubqueryBroadcastExec.scala   | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SubqueryBroadcastExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SubqueryBroadcastExec.scala
index ddf0b72dd7a96..70ba13550afbf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SubqueryBroadcastExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SubqueryBroadcastExec.scala
@@ -47,8 +47,15 @@ case class SubqueryBroadcastExec(
   // `SubqueryBroadcastExec` is only used with `InSubqueryExec`. No one would reference this output,
   // so the exprId doesn't matter here. But it's important to correctly report the output length, so
   // that `InSubqueryExec` can know it's the single-column execution mode, not multi-column.
-  override def output: Seq[Attribute] = Seq(
-    AttributeReference("key", buildKeys(index).dataType, buildKeys(index).nullable)())
+  override def output: Seq[Attribute] = {
+    val key = buildKeys(index)
+    val name = key match {
+      case n: NamedExpression => n.name
+      case Cast(n: NamedExpression, _, _) => n.name
+      case _ => "key"
+    }
+    Seq(AttributeReference(name, key.dataType, key.nullable)())
+  }
 
   override lazy val metrics = Map(
     "dataSize" -> SQLMetrics.createMetric(sparkContext, "data size (bytes)"),

From 7c14f177eb5b52d491f41b217926cc8ca5f0ce4c Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Tue, 22 Sep 2020 12:45:19 -0700
Subject: [PATCH 0086/1009] [SPARK-32306][SQL][DOCS] Clarify the result of
 `percentile_approx()`

### What changes were proposed in this pull request?
More precise description of the result of the `percentile_approx()` function and its synonym `approx_percentile()`. The proposed sentence clarifies that  the function returns **one of elements** (or array of elements) from the input column.

### Why are the changes needed?
To improve Spark docs and avoid misunderstanding of the function behavior.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
`./dev/scalastyle`

Closes #29835 from MaxGekk/doc-percentile_approx.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Liang-Chi Hsieh <viirya@gmail.com>
---
 R/pkg/R/functions.R                                  |  6 ++++--
 python/pyspark/sql/functions.py                      |  4 +++-
 .../aggregate/ApproximatePercentile.scala            | 12 +++++++-----
 .../main/scala/org/apache/spark/sql/functions.scala  |  5 +++--
 4 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 1d75819cb6133..2d1667f563490 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -1417,8 +1417,10 @@ setMethod("quarter",
           })
 
 #' @details
-#' \code{percentile_approx} Returns the approximate percentile value of
-#' numeric column at the given percentage.
+#' \code{percentile_approx} Returns the approximate \code{percentile} of the numeric column
+#' \code{col} which is the smallest value in the ordered \code{col} values (sorted from least to
+#' greatest) such that no more than \code{percentage} of \code{col} values is less than the value
+#' or equal to that value.
 #'
 #' @param percentage Numeric percentage at which percentile should be computed
 #'                   All values should be between 0 and 1.
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index f01bdb0165f8c..14d101a65252a 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -592,7 +592,9 @@ def nanvl(col1, col2):
 
 @since(3.1)
 def percentile_approx(col, percentage, accuracy=10000):
-    """Returns the approximate percentile value of numeric column col at the given percentage.
+    """Returns the approximate `percentile` of the numeric column `col` which is the smallest value
+    in the ordered `col` values (sorted from least to greatest) such that no more than `percentage`
+    of `col` values is less than the value or equal to that value.
     The value of percentage must be between 0.0 and 1.0.
 
     The accuracy parameter (default: 10000)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
index d06eeeef23936..7a1eec1a30d78 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
@@ -49,11 +49,13 @@ import org.apache.spark.sql.types._
  */
 @ExpressionDescription(
   usage = """
-    _FUNC_(col, percentage [, accuracy]) - Returns the approximate percentile value of numeric
-      column `col` at the given percentage. The value of percentage must be between 0.0
-      and 1.0. The `accuracy` parameter (default: 10000) is a positive numeric literal which
-      controls approximation accuracy at the cost of memory. Higher value of `accuracy` yields
-      better accuracy, `1.0/accuracy` is the relative error of the approximation.
+    _FUNC_(col, percentage [, accuracy]) - Returns the approximate `percentile` of the numeric
+      column `col` which is the smallest value in the ordered `col` values (sorted from least to
+      greatest) such that no more than `percentage` of `col` values is less than the value
+      or equal to that value. The value of percentage must be between 0.0 and 1.0. The `accuracy`
+      parameter (default: 10000) is a positive numeric literal which controls approximation accuracy
+      at the cost of memory. Higher value of `accuracy` yields better accuracy, `1.0/accuracy` is
+      the relative error of the approximation.
       When `percentage` is an array, each value of the percentage array must be between 0.0 and 1.0.
       In this case, returns the approximate percentile array of column `col` at the given
       percentage array.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index b20e8c241ef9d..acf845d6eceaf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -684,8 +684,9 @@ object functions {
   def min(columnName: String): Column = min(Column(columnName))
 
   /**
-   * Aggregate function: returns and array of the approximate percentile values
-   * of numeric column col at the given percentages.
+   * Aggregate function: returns the approximate `percentile` of the numeric column `col` which
+   * is the smallest value in the ordered `col` values (sorted from least to greatest) such that
+   * no more than `percentage` of `col` values is less than the value or equal to that value.
    *
    * If percentage is an array, each value must be between 0.0 and 1.0.
    * If it is a single floating point value, it must be between 0.0 and 1.0.

From 779f0a84eaa0bcee928576e406fdc37c94c5bbc1 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Wed, 23 Sep 2020 09:28:33 +0900
Subject: [PATCH 0087/1009] [SPARK-32933][PYTHON] Use keyword-only syntax for
 keyword_only methods

### What changes were proposed in this pull request?

This PR adjusts signatures of methods decorated with `keyword_only` to indicate  using [Python 3 keyword-only syntax](https://www.python.org/dev/peps/pep-3102/).

__Note__:

For the moment the goal is not to replace `keyword_only`. For justification see https://github.com/apache/spark/pull/29591#discussion_r489402579

### Why are the changes needed?

Right now it is not clear that `keyword_only` methods are indeed keyword only. This proposal addresses that.

In practice we could probably capture `locals` and drop `keyword_only` completel, i.e:

```python
keyword_only
def __init__(self, *, featuresCol="features"):
    ...
    kwargs = self._input_kwargs
    self.setParams(**kwargs)
```

could be replaced with

```python
def __init__(self, *, featuresCol="features"):
    kwargs = locals()
    del kwargs["self"]
    ...
    self.setParams(**kwargs)
```

### Does this PR introduce _any_ user-facing change?

Docstrings and inspect tools will now indicate that `keyword_only` methods expect only keyword arguments.

For example with ` LinearSVC` will change from

```
>>> from pyspark.ml.classification import LinearSVC
>>> ?LinearSVC.__init__
Signature:
LinearSVC.__init__(
    self,
    featuresCol='features',
    labelCol='label',
    predictionCol='prediction',
    maxIter=100,
    regParam=0.0,
    tol=1e-06,
    rawPredictionCol='rawPrediction',
    fitIntercept=True,
    standardization=True,
    threshold=0.0,
    weightCol=None,
    aggregationDepth=2,
)
Docstring: __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",                  maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction",                  fitIntercept=True, standardization=True, threshold=0.0, weightCol=None,                  aggregationDepth=2):
File:      /path/to/python/pyspark/ml/classification.py
Type:      function
```

to

```
>>> from pyspark.ml.classification import LinearSVC
>>> ?LinearSVC.__init__
Signature:
LinearSVC.__init__   (
    self,
    *,
    featuresCol='features',
    labelCol='label',
    predictionCol='prediction',
    maxIter=100,
    regParam=0.0,
    tol=1e-06,
    rawPredictionCol='rawPrediction',
    fitIntercept=True,
    standardization=True,
    threshold=0.0,
    weightCol=None,
    aggregationDepth=2,
    blockSize=1,
)
Docstring: __init__(self, \*, featuresCol="features", labelCol="label", predictionCol="prediction",                  maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction",                  fitIntercept=True, standardization=True, threshold=0.0, weightCol=None,                  aggregationDepth=2, blockSize=1):
File:      ~/Workspace/spark/python/pyspark/ml/classification.py
Type:      function
```

### How was this patch tested?

Existing tests.

Closes #29799 from zero323/SPARK-32933.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/ml/classification.py |  70 +++---
 python/pyspark/ml/clustering.py     |  40 ++--
 python/pyspark/ml/evaluation.py     |  48 ++--
 python/pyspark/ml/feature.py        | 332 ++++++++++++++--------------
 python/pyspark/ml/fpm.py            |  16 +-
 python/pyspark/ml/pipeline.py       |   8 +-
 python/pyspark/ml/recommendation.py |  24 +-
 python/pyspark/ml/regression.py     |  64 +++---
 python/pyspark/ml/tuning.py         |  24 +-
 python/pyspark/sql/streaming.py     |   2 +-
 10 files changed, 318 insertions(+), 310 deletions(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index b5261b30d89e4..9c3fd8610b786 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -593,12 +593,12 @@ class LinearSVC(_JavaClassifier, _LinearSVCParams, JavaMLWritable, JavaMLReadabl
     """
 
     @keyword_only
-    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction",
                  fitIntercept=True, standardization=True, threshold=0.0, weightCol=None,
                  aggregationDepth=2, blockSize=1):
         """
-        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction", \
                  fitIntercept=True, standardization=True, threshold=0.0, weightCol=None, \
                  aggregationDepth=2, blockSize=1):
@@ -611,12 +611,12 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
 
     @keyword_only
     @since("2.2.0")
-    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                   maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction",
                   fitIntercept=True, standardization=True, threshold=0.0, weightCol=None,
                   aggregationDepth=2, blockSize=1):
         """
-        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction", \
                   fitIntercept=True, standardization=True, threshold=0.0, weightCol=None, \
                   aggregationDepth=2, blockSize=1):
@@ -1026,7 +1026,7 @@ class LogisticRegression(_JavaProbabilisticClassifier, _LogisticRegressionParams
     """
 
     @keyword_only
-    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
                  threshold=0.5, thresholds=None, probabilityCol="probability",
                  rawPredictionCol="rawPrediction", standardization=True, weightCol=None,
@@ -1036,7 +1036,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  blockSize=1):
 
         """
-        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
                  threshold=0.5, thresholds=None, probabilityCol="probability", \
                  rawPredictionCol="rawPrediction", standardization=True, weightCol=None, \
@@ -1055,7 +1055,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
 
     @keyword_only
     @since("1.3.0")
-    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                   maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
                   threshold=0.5, thresholds=None, probabilityCol="probability",
                   rawPredictionCol="rawPrediction", standardization=True, weightCol=None,
@@ -1064,7 +1064,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   lowerBoundsOnIntercepts=None, upperBoundsOnIntercepts=None,
                   blockSize=1):
         """
-        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
                   threshold=0.5, thresholds=None, probabilityCol="probability", \
                   rawPredictionCol="rawPrediction", standardization=True, weightCol=None, \
@@ -1405,13 +1405,13 @@ class DecisionTreeClassifier(_JavaProbabilisticClassifier, _DecisionTreeClassifi
     """
 
     @keyword_only
-    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                  probabilityCol="probability", rawPredictionCol="rawPrediction",
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini",
                  seed=None, weightCol=None, leafCol="", minWeightFractionPerNode=0.0):
         """
-        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  probabilityCol="probability", rawPredictionCol="rawPrediction", \
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini", \
@@ -1425,14 +1425,14 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                   probabilityCol="probability", rawPredictionCol="rawPrediction",
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
                   impurity="gini", seed=None, weightCol=None, leafCol="",
                   minWeightFractionPerNode=0.0):
         """
-        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   probabilityCol="probability", rawPredictionCol="rawPrediction", \
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini", \
@@ -1639,14 +1639,14 @@ class RandomForestClassifier(_JavaProbabilisticClassifier, _RandomForestClassifi
     """
 
     @keyword_only
-    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                  probabilityCol="probability", rawPredictionCol="rawPrediction",
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini",
                  numTrees=20, featureSubsetStrategy="auto", seed=None, subsamplingRate=1.0,
                  leafCol="", minWeightFractionPerNode=0.0, weightCol=None, bootstrap=True):
         """
-        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  probabilityCol="probability", rawPredictionCol="rawPrediction", \
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="gini", \
@@ -1661,7 +1661,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                   probabilityCol="probability", rawPredictionCol="rawPrediction",
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, seed=None,
@@ -2018,14 +2018,14 @@ class GBTClassifier(_JavaProbabilisticClassifier, _GBTClassifierParams,
     """
 
     @keyword_only
-    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, lossType="logistic",
                  maxIter=20, stepSize=0.1, seed=None, subsamplingRate=1.0, impurity="variance",
                  featureSubsetStrategy="all", validationTol=0.01, validationIndicatorCol=None,
                  leafCol="", minWeightFractionPerNode=0.0, weightCol=None):
         """
-        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
                  lossType="logistic", maxIter=20, stepSize=0.1, seed=None, subsamplingRate=1.0, \
@@ -2041,7 +2041,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
                   lossType="logistic", maxIter=20, stepSize=0.1, seed=None, subsamplingRate=1.0,
@@ -2049,7 +2049,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   validationIndicatorCol=None, leafCol="", minWeightFractionPerNode=0.0,
                   weightCol=None):
         """
-        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
                   lossType="logistic", maxIter=20, stepSize=0.1, seed=None, subsamplingRate=1.0, \
@@ -2346,11 +2346,11 @@ class NaiveBayes(_JavaProbabilisticClassifier, _NaiveBayesParams, HasThresholds,
     """
 
     @keyword_only
-    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                  probabilityCol="probability", rawPredictionCol="rawPrediction", smoothing=1.0,
                  modelType="multinomial", thresholds=None, weightCol=None):
         """
-        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  probabilityCol="probability", rawPredictionCol="rawPrediction", smoothing=1.0, \
                  modelType="multinomial", thresholds=None, weightCol=None)
         """
@@ -2362,11 +2362,11 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
 
     @keyword_only
     @since("1.5.0")
-    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                   probabilityCol="probability", rawPredictionCol="rawPrediction", smoothing=1.0,
                   modelType="multinomial", thresholds=None, weightCol=None):
         """
-        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   probabilityCol="probability", rawPredictionCol="rawPrediction", smoothing=1.0, \
                   modelType="multinomial", thresholds=None, weightCol=None)
         Sets params for Naive Bayes.
@@ -2544,12 +2544,12 @@ class MultilayerPerceptronClassifier(_JavaProbabilisticClassifier, _MultilayerPe
     """
 
     @keyword_only
-    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxIter=100, tol=1e-6, seed=None, layers=None, blockSize=128, stepSize=0.03,
                  solver="l-bfgs", initialWeights=None, probabilityCol="probability",
                  rawPredictionCol="rawPrediction"):
         """
-        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  maxIter=100, tol=1e-6, seed=None, layers=None, blockSize=128, stepSize=0.03, \
                  solver="l-bfgs", initialWeights=None, probabilityCol="probability", \
                  rawPredictionCol="rawPrediction")
@@ -2562,12 +2562,12 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
 
     @keyword_only
     @since("1.6.0")
-    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                   maxIter=100, tol=1e-6, seed=None, layers=None, blockSize=128, stepSize=0.03,
                   solver="l-bfgs", initialWeights=None, probabilityCol="probability",
                   rawPredictionCol="rawPrediction"):
         """
-        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxIter=100, tol=1e-6, seed=None, layers=None, blockSize=128, stepSize=0.03, \
                   solver="l-bfgs", initialWeights=None, probabilityCol="probability", \
                   rawPredictionCol="rawPrediction"):
@@ -2761,10 +2761,10 @@ class OneVsRest(Estimator, _OneVsRestParams, HasParallelism, JavaMLReadable, Jav
     """
 
     @keyword_only
-    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                  rawPredictionCol="rawPrediction", classifier=None, weightCol=None, parallelism=1):
         """
-        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  rawPredictionCol="rawPrediction", classifier=None, weightCol=None, parallelism=1):
         """
         super(OneVsRest, self).__init__()
@@ -2774,10 +2774,10 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
 
     @keyword_only
     @since("2.0.0")
-    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                   rawPredictionCol="rawPrediction", classifier=None, weightCol=None, parallelism=1):
         """
-        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   rawPredictionCol="rawPrediction", classifier=None, weightCol=None, parallelism=1):
         Sets params for OneVsRest.
         """
@@ -3195,13 +3195,13 @@ class FMClassifier(_JavaProbabilisticClassifier, _FactorizationMachinesParams, J
     """
 
     @keyword_only
-    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                  probabilityCol="probability", rawPredictionCol="rawPrediction",
                  factorSize=8, fitIntercept=True, fitLinear=True, regParam=0.0,
                  miniBatchFraction=1.0, initStd=0.01, maxIter=100, stepSize=1.0,
                  tol=1e-6, solver="adamW", thresholds=None, seed=None):
         """
-        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  probabilityCol="probability", rawPredictionCol="rawPrediction", \
                  factorSize=8, fitIntercept=True, fitLinear=True, regParam=0.0, \
                  miniBatchFraction=1.0, initStd=0.01, maxIter=100, stepSize=1.0, \
@@ -3215,13 +3215,13 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
 
     @keyword_only
     @since("3.0.0")
-    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                   probabilityCol="probability", rawPredictionCol="rawPrediction",
                   factorSize=8, fitIntercept=True, fitLinear=True, regParam=0.0,
                   miniBatchFraction=1.0, initStd=0.01, maxIter=100, stepSize=1.0,
                   tol=1e-6, solver="adamW", thresholds=None, seed=None):
         """
-        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   probabilityCol="probability", rawPredictionCol="rawPrediction", \
                   factorSize=8, fitIntercept=True, fitLinear=True, regParam=0.0, \
                   miniBatchFraction=1.0, initStd=0.01, maxIter=100, stepSize=1.0, \
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index 6fb14240f95d5..a304b15960236 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -337,11 +337,11 @@ class GaussianMixture(JavaEstimator, _GaussianMixtureParams, JavaMLWritable, Jav
     """
 
     @keyword_only
-    def __init__(self, featuresCol="features", predictionCol="prediction", k=2,
+    def __init__(self, *, featuresCol="features", predictionCol="prediction", k=2,
                  probabilityCol="probability", tol=0.01, maxIter=100, seed=None,
                  aggregationDepth=2, weightCol=None, blockSize=1):
         """
-        __init__(self, featuresCol="features", predictionCol="prediction", k=2, \
+        __init__(self, \\*, featuresCol="features", predictionCol="prediction", k=2, \
                  probabilityCol="probability", tol=0.01, maxIter=100, seed=None, \
                  aggregationDepth=2, weightCol=None, blockSize=1)
         """
@@ -356,11 +356,11 @@ def _create_model(self, java_model):
 
     @keyword_only
     @since("2.0.0")
-    def setParams(self, featuresCol="features", predictionCol="prediction", k=2,
+    def setParams(self, *, featuresCol="features", predictionCol="prediction", k=2,
                   probabilityCol="probability", tol=0.01, maxIter=100, seed=None,
                   aggregationDepth=2, weightCol=None, blockSize=1):
         """
-        setParams(self, featuresCol="features", predictionCol="prediction", k=2, \
+        setParams(self, \\*, featuresCol="features", predictionCol="prediction", k=2, \
                   probabilityCol="probability", tol=0.01, maxIter=100, seed=None, \
                   aggregationDepth=2, weightCol=None, blockSize=1)
 
@@ -649,11 +649,11 @@ class KMeans(JavaEstimator, _KMeansParams, JavaMLWritable, JavaMLReadable):
     """
 
     @keyword_only
-    def __init__(self, featuresCol="features", predictionCol="prediction", k=2,
+    def __init__(self, *, featuresCol="features", predictionCol="prediction", k=2,
                  initMode="k-means||", initSteps=2, tol=1e-4, maxIter=20, seed=None,
                  distanceMeasure="euclidean", weightCol=None):
         """
-        __init__(self, featuresCol="features", predictionCol="prediction", k=2, \
+        __init__(self, \\*, featuresCol="features", predictionCol="prediction", k=2, \
                  initMode="k-means||", initSteps=2, tol=1e-4, maxIter=20, seed=None, \
                  distanceMeasure="euclidean", weightCol=None)
         """
@@ -667,11 +667,11 @@ def _create_model(self, java_model):
 
     @keyword_only
     @since("1.5.0")
-    def setParams(self, featuresCol="features", predictionCol="prediction", k=2,
+    def setParams(self, *, featuresCol="features", predictionCol="prediction", k=2,
                   initMode="k-means||", initSteps=2, tol=1e-4, maxIter=20, seed=None,
                   distanceMeasure="euclidean", weightCol=None):
         """
-        setParams(self, featuresCol="features", predictionCol="prediction", k=2, \
+        setParams(self, \\*, featuresCol="features", predictionCol="prediction", k=2, \
                   initMode="k-means||", initSteps=2, tol=1e-4, maxIter=20, seed=None, \
                   distanceMeasure="euclidean", weightCol=None)
 
@@ -928,11 +928,11 @@ class BisectingKMeans(JavaEstimator, _BisectingKMeansParams, JavaMLWritable, Jav
     """
 
     @keyword_only
-    def __init__(self, featuresCol="features", predictionCol="prediction", maxIter=20,
+    def __init__(self, *, featuresCol="features", predictionCol="prediction", maxIter=20,
                  seed=None, k=4, minDivisibleClusterSize=1.0, distanceMeasure="euclidean",
                  weightCol=None):
         """
-        __init__(self, featuresCol="features", predictionCol="prediction", maxIter=20, \
+        __init__(self, \\*, featuresCol="features", predictionCol="prediction", maxIter=20, \
                  seed=None, k=4, minDivisibleClusterSize=1.0, distanceMeasure="euclidean", \
                  weightCol=None)
         """
@@ -944,11 +944,11 @@ def __init__(self, featuresCol="features", predictionCol="prediction", maxIter=2
 
     @keyword_only
     @since("2.0.0")
-    def setParams(self, featuresCol="features", predictionCol="prediction", maxIter=20,
+    def setParams(self, *, featuresCol="features", predictionCol="prediction", maxIter=20,
                   seed=None, k=4, minDivisibleClusterSize=1.0, distanceMeasure="euclidean",
                   weightCol=None):
         """
-        setParams(self, featuresCol="features", predictionCol="prediction", maxIter=20, \
+        setParams(self, \\*, featuresCol="features", predictionCol="prediction", maxIter=20, \
                   seed=None, k=4, minDivisibleClusterSize=1.0, distanceMeasure="euclidean", \
                   weightCol=None)
         Sets params for BisectingKMeans.
@@ -1405,13 +1405,13 @@ class LDA(JavaEstimator, _LDAParams, JavaMLReadable, JavaMLWritable):
     """
 
     @keyword_only
-    def __init__(self, featuresCol="features", maxIter=20, seed=None, checkpointInterval=10,
+    def __init__(self, *, featuresCol="features", maxIter=20, seed=None, checkpointInterval=10,
                  k=10, optimizer="online", learningOffset=1024.0, learningDecay=0.51,
                  subsamplingRate=0.05, optimizeDocConcentration=True,
                  docConcentration=None, topicConcentration=None,
                  topicDistributionCol="topicDistribution", keepLastCheckpoint=True):
         """
-        __init__(self, featuresCol="features", maxIter=20, seed=None, checkpointInterval=10,\
+        __init__(self, \\*, featuresCol="features", maxIter=20, seed=None, checkpointInterval=10,\
                   k=10, optimizer="online", learningOffset=1024.0, learningDecay=0.51,\
                   subsamplingRate=0.05, optimizeDocConcentration=True,\
                   docConcentration=None, topicConcentration=None,\
@@ -1430,13 +1430,13 @@ def _create_model(self, java_model):
 
     @keyword_only
     @since("2.0.0")
-    def setParams(self, featuresCol="features", maxIter=20, seed=None, checkpointInterval=10,
+    def setParams(self, *, featuresCol="features", maxIter=20, seed=None, checkpointInterval=10,
                   k=10, optimizer="online", learningOffset=1024.0, learningDecay=0.51,
                   subsamplingRate=0.05, optimizeDocConcentration=True,
                   docConcentration=None, topicConcentration=None,
                   topicDistributionCol="topicDistribution", keepLastCheckpoint=True):
         """
-        setParams(self, featuresCol="features", maxIter=20, seed=None, checkpointInterval=10,\
+        setParams(self, \\*, featuresCol="features", maxIter=20, seed=None, checkpointInterval=10,\
                   k=10, optimizer="online", learningOffset=1024.0, learningDecay=0.51,\
                   subsamplingRate=0.05, optimizeDocConcentration=True,\
                   docConcentration=None, topicConcentration=None,\
@@ -1695,10 +1695,10 @@ class PowerIterationClustering(_PowerIterationClusteringParams, JavaParams, Java
     """
 
     @keyword_only
-    def __init__(self, k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst",
+    def __init__(self, *, k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst",
                  weightCol=None):
         """
-        __init__(self, k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst",\
+        __init__(self, \\*, k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst",\
                  weightCol=None)
         """
         super(PowerIterationClustering, self).__init__()
@@ -1709,10 +1709,10 @@ def __init__(self, k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst
 
     @keyword_only
     @since("2.4.0")
-    def setParams(self, k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst",
+    def setParams(self, *, k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst",
                   weightCol=None):
         """
-        setParams(self, k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst",\
+        setParams(self, \\*, k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst",\
                   weightCol=None)
         Sets params for PowerIterationClustering.
         """
diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py
index 354921e9e04b1..f2dbd7a1987b5 100644
--- a/python/pyspark/ml/evaluation.py
+++ b/python/pyspark/ml/evaluation.py
@@ -155,10 +155,10 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction
                     typeConverter=TypeConverters.toInt)
 
     @keyword_only
-    def __init__(self, rawPredictionCol="rawPrediction", labelCol="label",
+    def __init__(self, *, rawPredictionCol="rawPrediction", labelCol="label",
                  metricName="areaUnderROC", weightCol=None, numBins=1000):
         """
-        __init__(self, rawPredictionCol="rawPrediction", labelCol="label", \
+        __init__(self, \\*, rawPredictionCol="rawPrediction", labelCol="label", \
                  metricName="areaUnderROC", weightCol=None, numBins=1000)
         """
         super(BinaryClassificationEvaluator, self).__init__()
@@ -217,10 +217,10 @@ def setWeightCol(self, value):
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, rawPredictionCol="rawPrediction", labelCol="label",
+    def setParams(self, *, rawPredictionCol="rawPrediction", labelCol="label",
                   metricName="areaUnderROC", weightCol=None, numBins=1000):
         """
-        setParams(self, rawPredictionCol="rawPrediction", labelCol="label", \
+        setParams(self, \\*, rawPredictionCol="rawPrediction", labelCol="label", \
                   metricName="areaUnderROC", weightCol=None, numBins=1000)
         Sets params for binary classification evaluator.
         """
@@ -279,10 +279,10 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol, HasWeigh
                           typeConverter=TypeConverters.toBoolean)
 
     @keyword_only
-    def __init__(self, predictionCol="prediction", labelCol="label",
+    def __init__(self, *, predictionCol="prediction", labelCol="label",
                  metricName="rmse", weightCol=None, throughOrigin=False):
         """
-        __init__(self, predictionCol="prediction", labelCol="label", \
+        __init__(self, \\*, predictionCol="prediction", labelCol="label", \
                  metricName="rmse", weightCol=None, throughOrigin=False)
         """
         super(RegressionEvaluator, self).__init__()
@@ -341,10 +341,10 @@ def setWeightCol(self, value):
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, predictionCol="prediction", labelCol="label",
+    def setParams(self, *, predictionCol="prediction", labelCol="label",
                   metricName="rmse", weightCol=None, throughOrigin=False):
         """
-        setParams(self, predictionCol="prediction", labelCol="label", \
+        setParams(self, \\*, predictionCol="prediction", labelCol="label", \
                   metricName="rmse", weightCol=None, throughOrigin=False)
         Sets params for regression evaluator.
         """
@@ -428,11 +428,11 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio
                 typeConverter=TypeConverters.toFloat)
 
     @keyword_only
-    def __init__(self, predictionCol="prediction", labelCol="label",
+    def __init__(self, *, predictionCol="prediction", labelCol="label",
                  metricName="f1", weightCol=None, metricLabel=0.0, beta=1.0,
                  probabilityCol="probability", eps=1e-15):
         """
-        __init__(self, predictionCol="prediction", labelCol="label", \
+        __init__(self, \\*, predictionCol="prediction", labelCol="label", \
                  metricName="f1", weightCol=None, metricLabel=0.0, beta=1.0, \
                  probabilityCol="probability", eps=1e-15)
         """
@@ -527,11 +527,11 @@ def setWeightCol(self, value):
 
     @keyword_only
     @since("1.5.0")
-    def setParams(self, predictionCol="prediction", labelCol="label",
+    def setParams(self, *, predictionCol="prediction", labelCol="label",
                   metricName="f1", weightCol=None, metricLabel=0.0, beta=1.0,
                   probabilityCol="probability", eps=1e-15):
         """
-        setParams(self, predictionCol="prediction", labelCol="label", \
+        setParams(self, \\*, predictionCol="prediction", labelCol="label", \
                   metricName="f1", weightCol=None, metricLabel=0.0, beta=1.0, \
                   probabilityCol="probability", eps=1e-15)
         Sets params for multiclass classification evaluator.
@@ -582,10 +582,10 @@ class MultilabelClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio
                         typeConverter=TypeConverters.toFloat)
 
     @keyword_only
-    def __init__(self, predictionCol="prediction", labelCol="label",
+    def __init__(self, *, predictionCol="prediction", labelCol="label",
                  metricName="f1Measure", metricLabel=0.0):
         """
-        __init__(self, predictionCol="prediction", labelCol="label", \
+        __init__(self, \\*, predictionCol="prediction", labelCol="label", \
                  metricName="f1Measure", metricLabel=0.0)
         """
         super(MultilabelClassificationEvaluator, self).__init__()
@@ -639,10 +639,10 @@ def setPredictionCol(self, value):
 
     @keyword_only
     @since("3.0.0")
-    def setParams(self, predictionCol="prediction", labelCol="label",
+    def setParams(self, *, predictionCol="prediction", labelCol="label",
                   metricName="f1Measure", metricLabel=0.0):
         """
-        setParams(self, predictionCol="prediction", labelCol="label", \
+        setParams(self, \\*, predictionCol="prediction", labelCol="label", \
                   metricName="f1Measure", metricLabel=0.0)
         Sets params for multilabel classification evaluator.
         """
@@ -702,10 +702,10 @@ class ClusteringEvaluator(JavaEvaluator, HasPredictionCol, HasFeaturesCol, HasWe
                             typeConverter=TypeConverters.toString)
 
     @keyword_only
-    def __init__(self, predictionCol="prediction", featuresCol="features",
+    def __init__(self, *, predictionCol="prediction", featuresCol="features",
                  metricName="silhouette", distanceMeasure="squaredEuclidean", weightCol=None):
         """
-        __init__(self, predictionCol="prediction", featuresCol="features", \
+        __init__(self, \\*, predictionCol="prediction", featuresCol="features", \
                  metricName="silhouette", distanceMeasure="squaredEuclidean", weightCol=None)
         """
         super(ClusteringEvaluator, self).__init__()
@@ -717,10 +717,10 @@ def __init__(self, predictionCol="prediction", featuresCol="features",
 
     @keyword_only
     @since("2.3.0")
-    def setParams(self, predictionCol="prediction", featuresCol="features",
+    def setParams(self, *, predictionCol="prediction", featuresCol="features",
                   metricName="silhouette", distanceMeasure="squaredEuclidean", weightCol=None):
         """
-        setParams(self, predictionCol="prediction", featuresCol="features", \
+        setParams(self, \\*, predictionCol="prediction", featuresCol="features", \
                   metricName="silhouette", distanceMeasure="squaredEuclidean", weightCol=None)
         Sets params for clustering evaluator.
         """
@@ -816,10 +816,10 @@ class RankingEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol,
               typeConverter=TypeConverters.toInt)
 
     @keyword_only
-    def __init__(self, predictionCol="prediction", labelCol="label",
+    def __init__(self, *, predictionCol="prediction", labelCol="label",
                  metricName="meanAveragePrecision", k=10):
         """
-        __init__(self, predictionCol="prediction", labelCol="label", \
+        __init__(self, \\*, predictionCol="prediction", labelCol="label", \
                  metricName="meanAveragePrecision", k=10)
         """
         super(RankingEvaluator, self).__init__()
@@ -873,10 +873,10 @@ def setPredictionCol(self, value):
 
     @keyword_only
     @since("3.0.0")
-    def setParams(self, predictionCol="prediction", labelCol="label",
+    def setParams(self, *, predictionCol="prediction", labelCol="label",
                   metricName="meanAveragePrecision", k=10):
         """
-        setParams(self, predictionCol="prediction", labelCol="label", \
+        setParams(self, \\*, predictionCol="prediction", labelCol="label", \
                   metricName="meanAveragePrecision", k=10)
         Sets params for ranking evaluator.
         """
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 18ce33dee96c7..1fb813e4b64d4 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -124,10 +124,10 @@ class Binarizer(JavaTransformer, HasThreshold, HasThresholds, HasInputCol, HasOu
                        typeConverter=TypeConverters.toListFloat)
 
     @keyword_only
-    def __init__(self, threshold=0.0, inputCol=None, outputCol=None, thresholds=None,
+    def __init__(self, *, threshold=0.0, inputCol=None, outputCol=None, thresholds=None,
                  inputCols=None, outputCols=None):
         """
-        __init__(self, threshold=0.0, inputCol=None, outputCol=None, thresholds=None, \
+        __init__(self, \\*, threshold=0.0, inputCol=None, outputCol=None, thresholds=None, \
                  inputCols=None, outputCols=None)
         """
         super(Binarizer, self).__init__()
@@ -138,10 +138,10 @@ def __init__(self, threshold=0.0, inputCol=None, outputCol=None, thresholds=None
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, threshold=0.0, inputCol=None, outputCol=None, thresholds=None,
+    def setParams(self, *, threshold=0.0, inputCol=None, outputCol=None, thresholds=None,
                   inputCols=None, outputCols=None):
         """
-        setParams(self, threshold=0.0, inputCol=None, outputCol=None, thresholds=None, \
+        setParams(self, \\*, threshold=0.0, inputCol=None, outputCol=None, thresholds=None, \
                   inputCols=None, outputCols=None)
         Sets params for this Binarizer.
         """
@@ -389,10 +389,10 @@ class BucketedRandomProjectionLSH(_LSH, _BucketedRandomProjectionLSHParams,
     """
 
     @keyword_only
-    def __init__(self, inputCol=None, outputCol=None, seed=None, numHashTables=1,
+    def __init__(self, *, inputCol=None, outputCol=None, seed=None, numHashTables=1,
                  bucketLength=None):
         """
-        __init__(self, inputCol=None, outputCol=None, seed=None, numHashTables=1, \
+        __init__(self, \\*, inputCol=None, outputCol=None, seed=None, numHashTables=1, \
                  bucketLength=None)
         """
         super(BucketedRandomProjectionLSH, self).__init__()
@@ -403,10 +403,10 @@ def __init__(self, inputCol=None, outputCol=None, seed=None, numHashTables=1,
 
     @keyword_only
     @since("2.2.0")
-    def setParams(self, inputCol=None, outputCol=None, seed=None, numHashTables=1,
+    def setParams(self, *, inputCol=None, outputCol=None, seed=None, numHashTables=1,
                   bucketLength=None):
         """
-        setParams(self, inputCol=None, outputCol=None, seed=None, numHashTables=1, \
+        setParams(self, \\*, inputCol=None, outputCol=None, seed=None, numHashTables=1, \
                   bucketLength=None)
         Sets params for this BucketedRandomProjectionLSH.
         """
@@ -541,10 +541,10 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, HasInputCols, HasOu
                         typeConverter=TypeConverters.toListListFloat)
 
     @keyword_only
-    def __init__(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error",
+    def __init__(self, *, splits=None, inputCol=None, outputCol=None, handleInvalid="error",
                  splitsArray=None, inputCols=None, outputCols=None):
         """
-        __init__(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error", \
+        __init__(self, \\*, splits=None, inputCol=None, outputCol=None, handleInvalid="error", \
                  splitsArray=None, inputCols=None, outputCols=None)
         """
         super(Bucketizer, self).__init__()
@@ -555,10 +555,10 @@ def __init__(self, splits=None, inputCol=None, outputCol=None, handleInvalid="er
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error",
+    def setParams(self, *, splits=None, inputCol=None, outputCol=None, handleInvalid="error",
                   splitsArray=None, inputCols=None, outputCols=None):
         """
-        setParams(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error", \
+        setParams(self, \\*, splits=None, inputCol=None, outputCol=None, handleInvalid="error", \
                   splitsArray=None, inputCols=None, outputCols=None)
         Sets params for this Bucketizer.
         """
@@ -759,11 +759,11 @@ class CountVectorizer(JavaEstimator, _CountVectorizerParams, JavaMLReadable, Jav
     """
 
     @keyword_only
-    def __init__(self, minTF=1.0, minDF=1.0, maxDF=2 ** 63 - 1, vocabSize=1 << 18, binary=False,
-                 inputCol=None, outputCol=None):
+    def __init__(self, *, minTF=1.0, minDF=1.0, maxDF=2 ** 63 - 1, vocabSize=1 << 18,
+                 binary=False, inputCol=None, outputCol=None):
         """
-        __init__(self, minTF=1.0, minDF=1.0, maxDF=2 ** 63 - 1, vocabSize=1 << 18, binary=False,\
-                 inputCol=None,outputCol=None)
+        __init__(self, \\*, minTF=1.0, minDF=1.0, maxDF=2 ** 63 - 1, vocabSize=1 << 18,\
+                 binary=False, inputCol=None,outputCol=None)
         """
         super(CountVectorizer, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.CountVectorizer",
@@ -773,11 +773,11 @@ def __init__(self, minTF=1.0, minDF=1.0, maxDF=2 ** 63 - 1, vocabSize=1 << 18, b
 
     @keyword_only
     @since("1.6.0")
-    def setParams(self, minTF=1.0, minDF=1.0, maxDF=2 ** 63 - 1, vocabSize=1 << 18, binary=False,
-                  inputCol=None, outputCol=None):
+    def setParams(self, *, minTF=1.0, minDF=1.0, maxDF=2 ** 63 - 1, vocabSize=1 << 18,
+                  binary=False, inputCol=None, outputCol=None):
         """
-        setParams(self, minTF=1.0, minDF=1.0, maxDF=2 ** 63 - 1, vocabSize=1 << 18, binary=False,\
-                  inputCol=None, outputCol=None)
+        setParams(self, \\*, minTF=1.0, minDF=1.0, maxDF=2 ** 63 - 1, vocabSize=1 << 18,\
+                  binary=False, inputCol=None, outputCol=None)
         Set the params for the CountVectorizer
         """
         kwargs = self._input_kwargs
@@ -943,9 +943,9 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWrit
                     "default False.", typeConverter=TypeConverters.toBoolean)
 
     @keyword_only
-    def __init__(self, inverse=False, inputCol=None, outputCol=None):
+    def __init__(self, *, inverse=False, inputCol=None, outputCol=None):
         """
-        __init__(self, inverse=False, inputCol=None, outputCol=None)
+        __init__(self, \\*, inverse=False, inputCol=None, outputCol=None)
         """
         super(DCT, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.DCT", self.uid)
@@ -955,9 +955,9 @@ def __init__(self, inverse=False, inputCol=None, outputCol=None):
 
     @keyword_only
     @since("1.6.0")
-    def setParams(self, inverse=False, inputCol=None, outputCol=None):
+    def setParams(self, *, inverse=False, inputCol=None, outputCol=None):
         """
-        setParams(self, inverse=False, inputCol=None, outputCol=None)
+        setParams(self, \\*, inverse=False, inputCol=None, outputCol=None)
         Sets params for this DCT.
         """
         kwargs = self._input_kwargs
@@ -1026,9 +1026,9 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReada
                        typeConverter=TypeConverters.toVector)
 
     @keyword_only
-    def __init__(self, scalingVec=None, inputCol=None, outputCol=None):
+    def __init__(self, *, scalingVec=None, inputCol=None, outputCol=None):
         """
-        __init__(self, scalingVec=None, inputCol=None, outputCol=None)
+        __init__(self, \\*, scalingVec=None, inputCol=None, outputCol=None)
         """
         super(ElementwiseProduct, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.ElementwiseProduct",
@@ -1038,9 +1038,9 @@ def __init__(self, scalingVec=None, inputCol=None, outputCol=None):
 
     @keyword_only
     @since("1.5.0")
-    def setParams(self, scalingVec=None, inputCol=None, outputCol=None):
+    def setParams(self, *, scalingVec=None, inputCol=None, outputCol=None):
         """
-        setParams(self, scalingVec=None, inputCol=None, outputCol=None)
+        setParams(self, \\*, scalingVec=None, inputCol=None, outputCol=None)
         Sets params for this ElementwiseProduct.
         """
         kwargs = self._input_kwargs
@@ -1136,9 +1136,11 @@ class FeatureHasher(JavaTransformer, HasInputCols, HasOutputCol, HasNumFeatures,
                             typeConverter=TypeConverters.toListString)
 
     @keyword_only
-    def __init__(self, numFeatures=1 << 18, inputCols=None, outputCol=None, categoricalCols=None):
+    def __init__(self, *, numFeatures=1 << 18, inputCols=None, outputCol=None,
+                 categoricalCols=None):
         """
-        __init__(self, numFeatures=1 << 18, inputCols=None, outputCol=None, categoricalCols=None)
+        __init__(self, \\*, numFeatures=1 << 18, inputCols=None, outputCol=None, \
+                 categoricalCols=None)
         """
         super(FeatureHasher, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.FeatureHasher", self.uid)
@@ -1148,9 +1150,11 @@ def __init__(self, numFeatures=1 << 18, inputCols=None, outputCol=None, categori
 
     @keyword_only
     @since("2.3.0")
-    def setParams(self, numFeatures=1 << 18, inputCols=None, outputCol=None, categoricalCols=None):
+    def setParams(self, *, numFeatures=1 << 18, inputCols=None, outputCol=None,
+                  categoricalCols=None):
         """
-        setParams(self, numFeatures=1 << 18, inputCols=None, outputCol=None, categoricalCols=None)
+        setParams(self, \\*, numFeatures=1 << 18, inputCols=None, outputCol=None, \
+                  categoricalCols=None)
         Sets params for this FeatureHasher.
         """
         kwargs = self._input_kwargs
@@ -1230,9 +1234,9 @@ class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures, Java
                    typeConverter=TypeConverters.toBoolean)
 
     @keyword_only
-    def __init__(self, numFeatures=1 << 18, binary=False, inputCol=None, outputCol=None):
+    def __init__(self, *, numFeatures=1 << 18, binary=False, inputCol=None, outputCol=None):
         """
-        __init__(self, numFeatures=1 << 18, binary=False, inputCol=None, outputCol=None)
+        __init__(self, \\*, numFeatures=1 << 18, binary=False, inputCol=None, outputCol=None)
         """
         super(HashingTF, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.HashingTF", self.uid)
@@ -1242,9 +1246,9 @@ def __init__(self, numFeatures=1 << 18, binary=False, inputCol=None, outputCol=N
 
     @keyword_only
     @since("1.3.0")
-    def setParams(self, numFeatures=1 << 18, binary=False, inputCol=None, outputCol=None):
+    def setParams(self, *, numFeatures=1 << 18, binary=False, inputCol=None, outputCol=None):
         """
-        setParams(self, numFeatures=1 << 18, binary=False, inputCol=None, outputCol=None)
+        setParams(self, \\*, numFeatures=1 << 18, binary=False, inputCol=None, outputCol=None)
         Sets params for this HashingTF.
         """
         kwargs = self._input_kwargs
@@ -1360,9 +1364,9 @@ class IDF(JavaEstimator, _IDFParams, JavaMLReadable, JavaMLWritable):
     """
 
     @keyword_only
-    def __init__(self, minDocFreq=0, inputCol=None, outputCol=None):
+    def __init__(self, *, minDocFreq=0, inputCol=None, outputCol=None):
         """
-        __init__(self, minDocFreq=0, inputCol=None, outputCol=None)
+        __init__(self, \\*, minDocFreq=0, inputCol=None, outputCol=None)
         """
         super(IDF, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.IDF", self.uid)
@@ -1371,9 +1375,9 @@ def __init__(self, minDocFreq=0, inputCol=None, outputCol=None):
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, minDocFreq=0, inputCol=None, outputCol=None):
+    def setParams(self, *, minDocFreq=0, inputCol=None, outputCol=None):
         """
-        setParams(self, minDocFreq=0, inputCol=None, outputCol=None)
+        setParams(self, \\*, minDocFreq=0, inputCol=None, outputCol=None)
         Sets params for this IDF.
         """
         kwargs = self._input_kwargs
@@ -1598,10 +1602,10 @@ class Imputer(JavaEstimator, _ImputerParams, JavaMLReadable, JavaMLWritable):
     """
 
     @keyword_only
-    def __init__(self, strategy="mean", missingValue=float("nan"), inputCols=None,
+    def __init__(self, *, strategy="mean", missingValue=float("nan"), inputCols=None,
                  outputCols=None, inputCol=None, outputCol=None, relativeError=0.001):
         """
-        __init__(self, strategy="mean", missingValue=float("nan"), inputCols=None, \
+        __init__(self, \\*, strategy="mean", missingValue=float("nan"), inputCols=None, \
                  outputCols=None, inputCol=None, outputCol=None, relativeError=0.001):
         """
         super(Imputer, self).__init__()
@@ -1611,10 +1615,10 @@ def __init__(self, strategy="mean", missingValue=float("nan"), inputCols=None,
 
     @keyword_only
     @since("2.2.0")
-    def setParams(self, strategy="mean", missingValue=float("nan"), inputCols=None,
+    def setParams(self, *, strategy="mean", missingValue=float("nan"), inputCols=None,
                   outputCols=None, inputCol=None, outputCol=None, relativeError=0.001):
         """
-        setParams(self, strategy="mean", missingValue=float("nan"), inputCols=None, \
+        setParams(self, \\*, strategy="mean", missingValue=float("nan"), inputCols=None, \
                   outputCols=None, inputCol=None, outputCol=None, relativeError=0.001)
         Sets params for this Imputer.
         """
@@ -1755,9 +1759,9 @@ class Interaction(JavaTransformer, HasInputCols, HasOutputCol, JavaMLReadable, J
     """
 
     @keyword_only
-    def __init__(self, inputCols=None, outputCol=None):
+    def __init__(self, *, inputCols=None, outputCol=None):
         """
-        __init__(self, inputCols=None, outputCol=None):
+        __init__(self, \\*, inputCols=None, outputCol=None):
         """
         super(Interaction, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Interaction", self.uid)
@@ -1767,9 +1771,9 @@ def __init__(self, inputCols=None, outputCol=None):
 
     @keyword_only
     @since("3.0.0")
-    def setParams(self, inputCols=None, outputCol=None):
+    def setParams(self, *, inputCols=None, outputCol=None):
         """
-        setParams(self, inputCols=None, outputCol=None)
+        setParams(self, \\*, inputCols=None, outputCol=None)
         Sets params for this Interaction.
         """
         kwargs = self._input_kwargs
@@ -1841,9 +1845,9 @@ class MaxAbsScaler(JavaEstimator, _MaxAbsScalerParams, JavaMLReadable, JavaMLWri
     """
 
     @keyword_only
-    def __init__(self, inputCol=None, outputCol=None):
+    def __init__(self, *, inputCol=None, outputCol=None):
         """
-        __init__(self, inputCol=None, outputCol=None)
+        __init__(self, \\*, inputCol=None, outputCol=None)
         """
         super(MaxAbsScaler, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.MaxAbsScaler", self.uid)
@@ -1853,9 +1857,9 @@ def __init__(self, inputCol=None, outputCol=None):
 
     @keyword_only
     @since("2.0.0")
-    def setParams(self, inputCol=None, outputCol=None):
+    def setParams(self, *, inputCol=None, outputCol=None):
         """
-        setParams(self, inputCol=None, outputCol=None)
+        setParams(self, \\*, inputCol=None, outputCol=None)
         Sets params for this MaxAbsScaler.
         """
         kwargs = self._input_kwargs
@@ -1970,9 +1974,9 @@ class MinHashLSH(_LSH, HasInputCol, HasOutputCol, HasSeed, JavaMLReadable, JavaM
     """
 
     @keyword_only
-    def __init__(self, inputCol=None, outputCol=None, seed=None, numHashTables=1):
+    def __init__(self, *, inputCol=None, outputCol=None, seed=None, numHashTables=1):
         """
-        __init__(self, inputCol=None, outputCol=None, seed=None, numHashTables=1)
+        __init__(self, \\*, inputCol=None, outputCol=None, seed=None, numHashTables=1)
         """
         super(MinHashLSH, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.MinHashLSH", self.uid)
@@ -1981,9 +1985,9 @@ def __init__(self, inputCol=None, outputCol=None, seed=None, numHashTables=1):
 
     @keyword_only
     @since("2.2.0")
-    def setParams(self, inputCol=None, outputCol=None, seed=None, numHashTables=1):
+    def setParams(self, *, inputCol=None, outputCol=None, seed=None, numHashTables=1):
         """
-        setParams(self, inputCol=None, outputCol=None, seed=None, numHashTables=1)
+        setParams(self, \\*, inputCol=None, outputCol=None, seed=None, numHashTables=1)
         Sets params for this MinHashLSH.
         """
         kwargs = self._input_kwargs
@@ -2100,9 +2104,9 @@ class MinMaxScaler(JavaEstimator, _MinMaxScalerParams, JavaMLReadable, JavaMLWri
     """
 
     @keyword_only
-    def __init__(self, min=0.0, max=1.0, inputCol=None, outputCol=None):
+    def __init__(self, *, min=0.0, max=1.0, inputCol=None, outputCol=None):
         """
-        __init__(self, min=0.0, max=1.0, inputCol=None, outputCol=None)
+        __init__(self, \\*, min=0.0, max=1.0, inputCol=None, outputCol=None)
         """
         super(MinMaxScaler, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.MinMaxScaler", self.uid)
@@ -2111,9 +2115,9 @@ def __init__(self, min=0.0, max=1.0, inputCol=None, outputCol=None):
 
     @keyword_only
     @since("1.6.0")
-    def setParams(self, min=0.0, max=1.0, inputCol=None, outputCol=None):
+    def setParams(self, *, min=0.0, max=1.0, inputCol=None, outputCol=None):
         """
-        setParams(self, min=0.0, max=1.0, inputCol=None, outputCol=None)
+        setParams(self, \\*, min=0.0, max=1.0, inputCol=None, outputCol=None)
         Sets params for this MinMaxScaler.
         """
         kwargs = self._input_kwargs
@@ -2248,9 +2252,9 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWr
               typeConverter=TypeConverters.toInt)
 
     @keyword_only
-    def __init__(self, n=2, inputCol=None, outputCol=None):
+    def __init__(self, *, n=2, inputCol=None, outputCol=None):
         """
-        __init__(self, n=2, inputCol=None, outputCol=None)
+        __init__(self, \\*, n=2, inputCol=None, outputCol=None)
         """
         super(NGram, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.NGram", self.uid)
@@ -2260,9 +2264,9 @@ def __init__(self, n=2, inputCol=None, outputCol=None):
 
     @keyword_only
     @since("1.5.0")
-    def setParams(self, n=2, inputCol=None, outputCol=None):
+    def setParams(self, *, n=2, inputCol=None, outputCol=None):
         """
-        setParams(self, n=2, inputCol=None, outputCol=None)
+        setParams(self, \\*, n=2, inputCol=None, outputCol=None)
         Sets params for this NGram.
         """
         kwargs = self._input_kwargs
@@ -2330,9 +2334,9 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav
               typeConverter=TypeConverters.toFloat)
 
     @keyword_only
-    def __init__(self, p=2.0, inputCol=None, outputCol=None):
+    def __init__(self, *, p=2.0, inputCol=None, outputCol=None):
         """
-        __init__(self, p=2.0, inputCol=None, outputCol=None)
+        __init__(self, \\*, p=2.0, inputCol=None, outputCol=None)
         """
         super(Normalizer, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Normalizer", self.uid)
@@ -2342,9 +2346,9 @@ def __init__(self, p=2.0, inputCol=None, outputCol=None):
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, p=2.0, inputCol=None, outputCol=None):
+    def setParams(self, *, p=2.0, inputCol=None, outputCol=None):
         """
-        setParams(self, p=2.0, inputCol=None, outputCol=None)
+        setParams(self, \\*, p=2.0, inputCol=None, outputCol=None)
         Sets params for this Normalizer.
         """
         kwargs = self._input_kwargs
@@ -2466,10 +2470,10 @@ class OneHotEncoder(JavaEstimator, _OneHotEncoderParams, JavaMLReadable, JavaMLW
     """
 
     @keyword_only
-    def __init__(self, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True,
+    def __init__(self, *, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True,
                  inputCol=None, outputCol=None):
         """
-        __init__(self, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True, \
+        __init__(self, \\*, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True, \
                  inputCol=None, outputCol=None)
         """
         super(OneHotEncoder, self).__init__()
@@ -2480,11 +2484,11 @@ def __init__(self, inputCols=None, outputCols=None, handleInvalid="error", dropL
 
     @keyword_only
     @since("2.3.0")
-    def setParams(self, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True,
-                  inputCol=None, outputCol=None):
+    def setParams(self, *, inputCols=None, outputCols=None, handleInvalid="error",
+                  dropLast=True, inputCol=None, outputCol=None):
         """
-        setParams(self, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True, \
-                  inputCol=None, outputCol=None)
+        setParams(self, \\*, inputCols=None, outputCols=None, handleInvalid="error", \
+                  dropLast=True, inputCol=None, outputCol=None)
         Sets params for this OneHotEncoder.
         """
         kwargs = self._input_kwargs
@@ -2631,9 +2635,9 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol, JavaMLRead
                    typeConverter=TypeConverters.toInt)
 
     @keyword_only
-    def __init__(self, degree=2, inputCol=None, outputCol=None):
+    def __init__(self, *, degree=2, inputCol=None, outputCol=None):
         """
-        __init__(self, degree=2, inputCol=None, outputCol=None)
+        __init__(self, \\*, degree=2, inputCol=None, outputCol=None)
         """
         super(PolynomialExpansion, self).__init__()
         self._java_obj = self._new_java_obj(
@@ -2644,9 +2648,9 @@ def __init__(self, degree=2, inputCol=None, outputCol=None):
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, degree=2, inputCol=None, outputCol=None):
+    def setParams(self, *, degree=2, inputCol=None, outputCol=None):
         """
-        setParams(self, degree=2, inputCol=None, outputCol=None)
+        setParams(self, \\*, degree=2, inputCol=None, outputCol=None)
         Sets params for this PolynomialExpansion.
         """
         kwargs = self._input_kwargs
@@ -2794,10 +2798,10 @@ class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, HasInputCols
                             typeConverter=TypeConverters.toListInt)
 
     @keyword_only
-    def __init__(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001,
+    def __init__(self, *, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001,
                  handleInvalid="error", numBucketsArray=None, inputCols=None, outputCols=None):
         """
-        __init__(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001, \
+        __init__(self, \\*, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001, \
                  handleInvalid="error", numBucketsArray=None, inputCols=None, outputCols=None)
         """
         super(QuantileDiscretizer, self).__init__()
@@ -2809,10 +2813,10 @@ def __init__(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.
 
     @keyword_only
     @since("2.0.0")
-    def setParams(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001,
+    def setParams(self, *, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001,
                   handleInvalid="error", numBucketsArray=None, inputCols=None, outputCols=None):
         """
-        setParams(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001, \
+        setParams(self, \\*, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001, \
                   handleInvalid="error", numBucketsArray=None, inputCols=None, outputCols=None)
         Set the params for the QuantileDiscretizer
         """
@@ -3006,10 +3010,10 @@ class RobustScaler(JavaEstimator, _RobustScalerParams, JavaMLReadable, JavaMLWri
     """
 
     @keyword_only
-    def __init__(self, lower=0.25, upper=0.75, withCentering=False, withScaling=True,
+    def __init__(self, *, lower=0.25, upper=0.75, withCentering=False, withScaling=True,
                  inputCol=None, outputCol=None, relativeError=0.001):
         """
-        __init__(self, lower=0.25, upper=0.75, withCentering=False, withScaling=True, \
+        __init__(self, \\*, lower=0.25, upper=0.75, withCentering=False, withScaling=True, \
                  inputCol=None, outputCol=None, relativeError=0.001)
         """
         super(RobustScaler, self).__init__()
@@ -3019,10 +3023,10 @@ def __init__(self, lower=0.25, upper=0.75, withCentering=False, withScaling=True
 
     @keyword_only
     @since("3.0.0")
-    def setParams(self, lower=0.25, upper=0.75, withCentering=False, withScaling=True,
+    def setParams(self, *, lower=0.25, upper=0.75, withCentering=False, withScaling=True,
                   inputCol=None, outputCol=None, relativeError=0.001):
         """
-        setParams(self, lower=0.25, upper=0.75, withCentering=False, withScaling=True, \
+        setParams(self, \\*, lower=0.25, upper=0.75, withCentering=False, withScaling=True, \
                   inputCol=None, outputCol=None, relativeError=0.001)
         Sets params for this RobustScaler.
         """
@@ -3174,10 +3178,10 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
                         "lowercase before tokenizing", typeConverter=TypeConverters.toBoolean)
 
     @keyword_only
-    def __init__(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None,
+    def __init__(self, *, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None,
                  outputCol=None, toLowercase=True):
         """
-        __init__(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, \
+        __init__(self, \\*, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, \
                  outputCol=None, toLowercase=True)
         """
         super(RegexTokenizer, self).__init__()
@@ -3188,10 +3192,10 @@ def __init__(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None,
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None,
+    def setParams(self, *, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None,
                   outputCol=None, toLowercase=True):
         """
-        setParams(self, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, \
+        setParams(self, \\*, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, \
                   outputCol=None, toLowercase=True)
         Sets params for this RegexTokenizer.
         """
@@ -3294,9 +3298,9 @@ class SQLTransformer(JavaTransformer, JavaMLReadable, JavaMLWritable):
                       typeConverter=TypeConverters.toString)
 
     @keyword_only
-    def __init__(self, statement=None):
+    def __init__(self, *, statement=None):
         """
-        __init__(self, statement=None)
+        __init__(self, \\*, statement=None)
         """
         super(SQLTransformer, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.SQLTransformer", self.uid)
@@ -3305,9 +3309,9 @@ def __init__(self, statement=None):
 
     @keyword_only
     @since("1.6.0")
-    def setParams(self, statement=None):
+    def setParams(self, *, statement=None):
         """
-        setParams(self, statement=None)
+        setParams(self, \\*, statement=None)
         Sets params for this SQLTransformer.
         """
         kwargs = self._input_kwargs
@@ -3408,9 +3412,9 @@ class StandardScaler(JavaEstimator, _StandardScalerParams, JavaMLReadable, JavaM
     """
 
     @keyword_only
-    def __init__(self, withMean=False, withStd=True, inputCol=None, outputCol=None):
+    def __init__(self, *, withMean=False, withStd=True, inputCol=None, outputCol=None):
         """
-        __init__(self, withMean=False, withStd=True, inputCol=None, outputCol=None)
+        __init__(self, \\*, withMean=False, withStd=True, inputCol=None, outputCol=None)
         """
         super(StandardScaler, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.StandardScaler", self.uid)
@@ -3419,9 +3423,9 @@ def __init__(self, withMean=False, withStd=True, inputCol=None, outputCol=None):
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, withMean=False, withStd=True, inputCol=None, outputCol=None):
+    def setParams(self, *, withMean=False, withStd=True, inputCol=None, outputCol=None):
         """
-        setParams(self, withMean=False, withStd=True, inputCol=None, outputCol=None)
+        setParams(self, \\*, withMean=False, withStd=True, inputCol=None, outputCol=None)
         Sets params for this StandardScaler.
         """
         kwargs = self._input_kwargs
@@ -3610,10 +3614,10 @@ class StringIndexer(JavaEstimator, _StringIndexerParams, JavaMLReadable, JavaMLW
     """
 
     @keyword_only
-    def __init__(self, inputCol=None, outputCol=None, inputCols=None, outputCols=None,
+    def __init__(self, *, inputCol=None, outputCol=None, inputCols=None, outputCols=None,
                  handleInvalid="error", stringOrderType="frequencyDesc"):
         """
-        __init__(self, inputCol=None, outputCol=None, inputCols=None, outputCols=None, \
+        __init__(self, \\*, inputCol=None, outputCol=None, inputCols=None, outputCols=None, \
                  handleInvalid="error", stringOrderType="frequencyDesc")
         """
         super(StringIndexer, self).__init__()
@@ -3623,10 +3627,10 @@ def __init__(self, inputCol=None, outputCol=None, inputCols=None, outputCols=Non
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, inputCol=None, outputCol=None, inputCols=None, outputCols=None,
+    def setParams(self, *, inputCol=None, outputCol=None, inputCols=None, outputCols=None,
                   handleInvalid="error", stringOrderType="frequencyDesc"):
         """
-        setParams(self, inputCol=None, outputCol=None, inputCols=None, outputCols=None, \
+        setParams(self, \\*, inputCol=None, outputCol=None, inputCols=None, outputCols=None, \
                   handleInvalid="error", stringOrderType="frequencyDesc")
         Sets params for this StringIndexer.
         """
@@ -3782,9 +3786,9 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
                    typeConverter=TypeConverters.toListString)
 
     @keyword_only
-    def __init__(self, inputCol=None, outputCol=None, labels=None):
+    def __init__(self, *, inputCol=None, outputCol=None, labels=None):
         """
-        __init__(self, inputCol=None, outputCol=None, labels=None)
+        __init__(self, \\*, inputCol=None, outputCol=None, labels=None)
         """
         super(IndexToString, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.IndexToString",
@@ -3794,9 +3798,9 @@ def __init__(self, inputCol=None, outputCol=None, labels=None):
 
     @keyword_only
     @since("1.6.0")
-    def setParams(self, inputCol=None, outputCol=None, labels=None):
+    def setParams(self, *, inputCol=None, outputCol=None, labels=None):
         """
-        setParams(self, inputCol=None, outputCol=None, labels=None)
+        setParams(self, \\*, inputCol=None, outputCol=None, labels=None)
         Sets params for this IndexToString.
         """
         kwargs = self._input_kwargs
@@ -3879,10 +3883,10 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol, HasInputCols,
                    "is true", typeConverter=TypeConverters.toString)
 
     @keyword_only
-    def __init__(self, inputCol=None, outputCol=None, stopWords=None, caseSensitive=False,
+    def __init__(self, *, inputCol=None, outputCol=None, stopWords=None, caseSensitive=False,
                  locale=None, inputCols=None, outputCols=None):
         """
-        __init__(self, inputCol=None, outputCol=None, stopWords=None, caseSensitive=false, \
+        __init__(self, \\*, inputCol=None, outputCol=None, stopWords=None, caseSensitive=false, \
                  locale=None, inputCols=None, outputCols=None)
         """
         super(StopWordsRemover, self).__init__()
@@ -3895,10 +3899,10 @@ def __init__(self, inputCol=None, outputCol=None, stopWords=None, caseSensitive=
 
     @keyword_only
     @since("1.6.0")
-    def setParams(self, inputCol=None, outputCol=None, stopWords=None, caseSensitive=False,
+    def setParams(self, *, inputCol=None, outputCol=None, stopWords=None, caseSensitive=False,
                   locale=None, inputCols=None, outputCols=None):
         """
-        setParams(self, inputCol=None, outputCol=None, stopWords=None, caseSensitive=false, \
+        setParams(self, \\*, inputCol=None, outputCol=None, stopWords=None, caseSensitive=false, \
                   locale=None, inputCols=None, outputCols=None)
         Sets params for this StopWordRemover.
         """
@@ -4020,9 +4024,9 @@ class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Java
     """
 
     @keyword_only
-    def __init__(self, inputCol=None, outputCol=None):
+    def __init__(self, *, inputCol=None, outputCol=None):
         """
-        __init__(self, inputCol=None, outputCol=None)
+        __init__(self, \\*, inputCol=None, outputCol=None)
         """
         super(Tokenizer, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Tokenizer", self.uid)
@@ -4031,9 +4035,9 @@ def __init__(self, inputCol=None, outputCol=None):
 
     @keyword_only
     @since("1.3.0")
-    def setParams(self, inputCol=None, outputCol=None):
+    def setParams(self, *, inputCol=None, outputCol=None):
         """
-        setParams(self, inputCol=None, outputCol=None)
+        setParams(self, \\*, inputCol=None, outputCol=None)
         Sets params for this Tokenizer.
         """
         kwargs = self._input_kwargs
@@ -4109,9 +4113,9 @@ class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol, HasHandleInva
                           typeConverter=TypeConverters.toString)
 
     @keyword_only
-    def __init__(self, inputCols=None, outputCol=None, handleInvalid="error"):
+    def __init__(self, *, inputCols=None, outputCol=None, handleInvalid="error"):
         """
-        __init__(self, inputCols=None, outputCol=None, handleInvalid="error")
+        __init__(self, \\*, inputCols=None, outputCol=None, handleInvalid="error")
         """
         super(VectorAssembler, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.VectorAssembler", self.uid)
@@ -4121,9 +4125,9 @@ def __init__(self, inputCols=None, outputCol=None, handleInvalid="error"):
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, inputCols=None, outputCol=None, handleInvalid="error"):
+    def setParams(self, *, inputCols=None, outputCol=None, handleInvalid="error"):
         """
-        setParams(self, inputCols=None, outputCol=None, handleInvalid="error")
+        setParams(self, \\*, inputCols=None, outputCol=None, handleInvalid="error")
         Sets params for this VectorAssembler.
         """
         kwargs = self._input_kwargs
@@ -4267,9 +4271,9 @@ class VectorIndexer(JavaEstimator, _VectorIndexerParams, JavaMLReadable, JavaMLW
     """
 
     @keyword_only
-    def __init__(self, maxCategories=20, inputCol=None, outputCol=None, handleInvalid="error"):
+    def __init__(self, *, maxCategories=20, inputCol=None, outputCol=None, handleInvalid="error"):
         """
-        __init__(self, maxCategories=20, inputCol=None, outputCol=None, handleInvalid="error")
+        __init__(self, \\*, maxCategories=20, inputCol=None, outputCol=None, handleInvalid="error")
         """
         super(VectorIndexer, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.VectorIndexer", self.uid)
@@ -4278,9 +4282,9 @@ def __init__(self, maxCategories=20, inputCol=None, outputCol=None, handleInvali
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, maxCategories=20, inputCol=None, outputCol=None, handleInvalid="error"):
+    def setParams(self, *, maxCategories=20, inputCol=None, outputCol=None, handleInvalid="error"):
         """
-        setParams(self, maxCategories=20, inputCol=None, outputCol=None, handleInvalid="error")
+        setParams(self, \\*, maxCategories=20, inputCol=None, outputCol=None, handleInvalid="error")
         Sets params for this VectorIndexer.
         """
         kwargs = self._input_kwargs
@@ -4410,9 +4414,9 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, J
                   "indices.", typeConverter=TypeConverters.toListString)
 
     @keyword_only
-    def __init__(self, inputCol=None, outputCol=None, indices=None, names=None):
+    def __init__(self, *, inputCol=None, outputCol=None, indices=None, names=None):
         """
-        __init__(self, inputCol=None, outputCol=None, indices=None, names=None)
+        __init__(self, \\*, inputCol=None, outputCol=None, indices=None, names=None)
         """
         super(VectorSlicer, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.VectorSlicer", self.uid)
@@ -4422,9 +4426,9 @@ def __init__(self, inputCol=None, outputCol=None, indices=None, names=None):
 
     @keyword_only
     @since("1.6.0")
-    def setParams(self, inputCol=None, outputCol=None, indices=None, names=None):
+    def setParams(self, *, inputCol=None, outputCol=None, indices=None, names=None):
         """
-        setParams(self, inputCol=None, outputCol=None, indices=None, names=None):
+        setParams(self, \\*, inputCol=None, outputCol=None, indices=None, names=None):
         Sets params for this VectorSlicer.
         """
         kwargs = self._input_kwargs
@@ -4601,11 +4605,13 @@ class Word2Vec(JavaEstimator, _Word2VecParams, JavaMLReadable, JavaMLWritable):
     """
 
     @keyword_only
-    def __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
-                 seed=None, inputCol=None, outputCol=None, windowSize=5, maxSentenceLength=1000):
+    def __init__(self, *, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025,
+                 maxIter=1, seed=None, inputCol=None, outputCol=None, windowSize=5,
+                 maxSentenceLength=1000):
         """
-        __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, \
-                 seed=None, inputCol=None, outputCol=None, windowSize=5, maxSentenceLength=1000)
+        __init__(self, \\*, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, \
+                 maxIter=1, seed=None, inputCol=None, outputCol=None, windowSize=5, \
+                 maxSentenceLength=1000)
         """
         super(Word2Vec, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Word2Vec", self.uid)
@@ -4614,11 +4620,13 @@ def __init__(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025,
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1,
-                  seed=None, inputCol=None, outputCol=None, windowSize=5, maxSentenceLength=1000):
+    def setParams(self, *, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025,
+                  maxIter=1, seed=None, inputCol=None, outputCol=None, windowSize=5,
+                  maxSentenceLength=1000):
         """
-        setParams(self, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, seed=None, \
-                 inputCol=None, outputCol=None, windowSize=5, maxSentenceLength=1000)
+        setParams(self, \\*, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, \
+                  seed=None, inputCol=None, outputCol=None, windowSize=5, \
+                  maxSentenceLength=1000)
         Sets params for this Word2Vec.
         """
         kwargs = self._input_kwargs
@@ -4807,9 +4815,9 @@ class PCA(JavaEstimator, _PCAParams, JavaMLReadable, JavaMLWritable):
     """
 
     @keyword_only
-    def __init__(self, k=None, inputCol=None, outputCol=None):
+    def __init__(self, *, k=None, inputCol=None, outputCol=None):
         """
-        __init__(self, k=None, inputCol=None, outputCol=None)
+        __init__(self, \\*, k=None, inputCol=None, outputCol=None)
         """
         super(PCA, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.PCA", self.uid)
@@ -4818,9 +4826,9 @@ def __init__(self, k=None, inputCol=None, outputCol=None):
 
     @keyword_only
     @since("1.5.0")
-    def setParams(self, k=None, inputCol=None, outputCol=None):
+    def setParams(self, *, k=None, inputCol=None, outputCol=None):
         """
-        setParams(self, k=None, inputCol=None, outputCol=None)
+        setParams(self, \\*, k=None, inputCol=None, outputCol=None)
         Set params for this PCA.
         """
         kwargs = self._input_kwargs
@@ -5015,11 +5023,11 @@ class RFormula(JavaEstimator, _RFormulaParams, JavaMLReadable, JavaMLWritable):
     """
 
     @keyword_only
-    def __init__(self, formula=None, featuresCol="features", labelCol="label",
+    def __init__(self, *, formula=None, featuresCol="features", labelCol="label",
                  forceIndexLabel=False, stringIndexerOrderType="frequencyDesc",
                  handleInvalid="error"):
         """
-        __init__(self, formula=None, featuresCol="features", labelCol="label", \
+        __init__(self, \\*, formula=None, featuresCol="features", labelCol="label", \
                  forceIndexLabel=False, stringIndexerOrderType="frequencyDesc", \
                  handleInvalid="error")
         """
@@ -5030,11 +5038,11 @@ def __init__(self, formula=None, featuresCol="features", labelCol="label",
 
     @keyword_only
     @since("1.5.0")
-    def setParams(self, formula=None, featuresCol="features", labelCol="label",
+    def setParams(self, *, formula=None, featuresCol="features", labelCol="label",
                   forceIndexLabel=False, stringIndexerOrderType="frequencyDesc",
                   handleInvalid="error"):
         """
-        setParams(self, formula=None, featuresCol="features", labelCol="label", \
+        setParams(self, \\*, formula=None, featuresCol="features", labelCol="label", \
                   forceIndexLabel=False, stringIndexerOrderType="frequencyDesc", \
                   handleInvalid="error")
         Sets params for RFormula.
@@ -5344,11 +5352,11 @@ class ANOVASelector(_Selector, JavaMLReadable, JavaMLWritable):
     """
 
     @keyword_only
-    def __init__(self, numTopFeatures=50, featuresCol="features", outputCol=None,
+    def __init__(self, *, numTopFeatures=50, featuresCol="features", outputCol=None,
                  labelCol="label", selectorType="numTopFeatures", percentile=0.1, fpr=0.05,
                  fdr=0.05, fwe=0.05):
         """
-        __init__(self, numTopFeatures=50, featuresCol="features", outputCol=None, \
+        __init__(self, \\*, numTopFeatures=50, featuresCol="features", outputCol=None, \
                  labelCol="label", selectorType="numTopFeatures", percentile=0.1, fpr=0.05, \
                  fdr=0.05, fwe=0.05)
         """
@@ -5359,11 +5367,11 @@ def __init__(self, numTopFeatures=50, featuresCol="features", outputCol=None,
 
     @keyword_only
     @since("3.1.0")
-    def setParams(self, numTopFeatures=50, featuresCol="features", outputCol=None,
+    def setParams(self, *, numTopFeatures=50, featuresCol="features", outputCol=None,
                   labelCol="labels", selectorType="numTopFeatures", percentile=0.1, fpr=0.05,
                   fdr=0.05, fwe=0.05):
         """
-        setParams(self, numTopFeatures=50, featuresCol="features", outputCol=None, \
+        setParams(self, \\*, numTopFeatures=50, featuresCol="features", outputCol=None, \
                   labelCol="labels", selectorType="numTopFeatures", percentile=0.1, fpr=0.05, \
                   fdr=0.05, fwe=0.05)
         Sets params for this ANOVASelector.
@@ -5443,11 +5451,11 @@ class ChiSqSelector(_Selector, JavaMLReadable, JavaMLWritable):
     """
 
     @keyword_only
-    def __init__(self, numTopFeatures=50, featuresCol="features", outputCol=None,
+    def __init__(self, *, numTopFeatures=50, featuresCol="features", outputCol=None,
                  labelCol="label", selectorType="numTopFeatures", percentile=0.1, fpr=0.05,
                  fdr=0.05, fwe=0.05):
         """
-        __init__(self, numTopFeatures=50, featuresCol="features", outputCol=None, \
+        __init__(self, \\*, numTopFeatures=50, featuresCol="features", outputCol=None, \
                  labelCol="label", selectorType="numTopFeatures", percentile=0.1, fpr=0.05, \
                  fdr=0.05, fwe=0.05)
         """
@@ -5458,11 +5466,11 @@ def __init__(self, numTopFeatures=50, featuresCol="features", outputCol=None,
 
     @keyword_only
     @since("2.0.0")
-    def setParams(self, numTopFeatures=50, featuresCol="features", outputCol=None,
+    def setParams(self, *, numTopFeatures=50, featuresCol="features", outputCol=None,
                   labelCol="labels", selectorType="numTopFeatures", percentile=0.1, fpr=0.05,
                   fdr=0.05, fwe=0.05):
         """
-        setParams(self, numTopFeatures=50, featuresCol="features", outputCol=None, \
+        setParams(self, \\*, numTopFeatures=50, featuresCol="features", outputCol=None, \
                   labelCol="labels", selectorType="numTopFeatures", percentile=0.1, fpr=0.05, \
                   fdr=0.05, fwe=0.05)
         Sets params for this ChiSqSelector.
@@ -5546,11 +5554,11 @@ class FValueSelector(_Selector, JavaMLReadable, JavaMLWritable):
     """
 
     @keyword_only
-    def __init__(self, numTopFeatures=50, featuresCol="features", outputCol=None,
+    def __init__(self, *, numTopFeatures=50, featuresCol="features", outputCol=None,
                  labelCol="label", selectorType="numTopFeatures", percentile=0.1, fpr=0.05,
                  fdr=0.05, fwe=0.05):
         """
-        __init__(self, numTopFeatures=50, featuresCol="features", outputCol=None, \
+        __init__(self, \\*, numTopFeatures=50, featuresCol="features", outputCol=None, \
                  labelCol="label", selectorType="numTopFeatures", percentile=0.1, fpr=0.05, \
                  fdr=0.05, fwe=0.05)
         """
@@ -5561,11 +5569,11 @@ def __init__(self, numTopFeatures=50, featuresCol="features", outputCol=None,
 
     @keyword_only
     @since("3.1.0")
-    def setParams(self, numTopFeatures=50, featuresCol="features", outputCol=None,
+    def setParams(self, *, numTopFeatures=50, featuresCol="features", outputCol=None,
                   labelCol="labels", selectorType="numTopFeatures", percentile=0.1, fpr=0.05,
                   fdr=0.05, fwe=0.05):
         """
-        setParams(self, numTopFeatures=50, featuresCol="features", outputCol=None, \
+        setParams(self, \\*, numTopFeatures=50, featuresCol="features", outputCol=None, \
                   labelCol="labels", selectorType="numTopFeatures", percentile=0.1, fpr=0.05, \
                   fdr=0.05, fwe=0.05)
         Sets params for this FValueSelector.
@@ -5631,9 +5639,9 @@ class VectorSizeHint(JavaTransformer, HasInputCol, HasHandleInvalid, JavaMLReada
                           TypeConverters.toString)
 
     @keyword_only
-    def __init__(self, inputCol=None, size=None, handleInvalid="error"):
+    def __init__(self, *, inputCol=None, size=None, handleInvalid="error"):
         """
-        __init__(self, inputCol=None, size=None, handleInvalid="error")
+        __init__(self, \\*, inputCol=None, size=None, handleInvalid="error")
         """
         super(VectorSizeHint, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.VectorSizeHint", self.uid)
@@ -5642,9 +5650,9 @@ def __init__(self, inputCol=None, size=None, handleInvalid="error"):
 
     @keyword_only
     @since("2.3.0")
-    def setParams(self, inputCol=None, size=None, handleInvalid="error"):
+    def setParams(self, *, inputCol=None, size=None, handleInvalid="error"):
         """
-        setParams(self, inputCol=None, size=None, handleInvalid="error")
+        setParams(self, \\*, inputCol=None, size=None, handleInvalid="error")
         Sets params for this VectorSizeHint.
         """
         kwargs = self._input_kwargs
@@ -5739,9 +5747,9 @@ class VarianceThresholdSelector(JavaEstimator, _VarianceThresholdSelectorParams,
     """
 
     @keyword_only
-    def __init__(self, featuresCol="features", outputCol=None, varianceThreshold=0.0):
+    def __init__(self, *, featuresCol="features", outputCol=None, varianceThreshold=0.0):
         """
-        __init__(self, featuresCol="features", outputCol=None, varianceThreshold=0.0)
+        __init__(self, \\*, featuresCol="features", outputCol=None, varianceThreshold=0.0)
         """
         super(VarianceThresholdSelector, self).__init__()
         self._java_obj = self._new_java_obj(
@@ -5752,9 +5760,9 @@ def __init__(self, featuresCol="features", outputCol=None, varianceThreshold=0.0
 
     @keyword_only
     @since("3.1.0")
-    def setParams(self, featuresCol="features", outputCol=None, varianceThreshold=0.0):
+    def setParams(self, *, featuresCol="features", outputCol=None, varianceThreshold=0.0):
         """
-        setParams(self, featuresCol="features", outputCol=None, varianceThreshold=0.0)
+        setParams(self, \\*, featuresCol="features", outputCol=None, varianceThreshold=0.0)
         Sets params for this VarianceThresholdSelector.
         """
         kwargs = self._input_kwargs
diff --git a/python/pyspark/ml/fpm.py b/python/pyspark/ml/fpm.py
index 7c62ceed5de1e..0847109dccfc5 100644
--- a/python/pyspark/ml/fpm.py
+++ b/python/pyspark/ml/fpm.py
@@ -208,10 +208,10 @@ class FPGrowth(JavaEstimator, _FPGrowthParams, JavaMLWritable, JavaMLReadable):
     .. versionadded:: 2.2.0
     """
     @keyword_only
-    def __init__(self, minSupport=0.3, minConfidence=0.8, itemsCol="items",
+    def __init__(self, *, minSupport=0.3, minConfidence=0.8, itemsCol="items",
                  predictionCol="prediction", numPartitions=None):
         """
-        __init__(self, minSupport=0.3, minConfidence=0.8, itemsCol="items", \
+        __init__(self, \\*, minSupport=0.3, minConfidence=0.8, itemsCol="items", \
                  predictionCol="prediction", numPartitions=None)
         """
         super(FPGrowth, self).__init__()
@@ -221,10 +221,10 @@ def __init__(self, minSupport=0.3, minConfidence=0.8, itemsCol="items",
 
     @keyword_only
     @since("2.2.0")
-    def setParams(self, minSupport=0.3, minConfidence=0.8, itemsCol="items",
+    def setParams(self, *, minSupport=0.3, minConfidence=0.8, itemsCol="items",
                   predictionCol="prediction", numPartitions=None):
         """
-        setParams(self, minSupport=0.3, minConfidence=0.8, itemsCol="items", \
+        setParams(self, \\*, minSupport=0.3, minConfidence=0.8, itemsCol="items", \
                   predictionCol="prediction", numPartitions=None)
         """
         kwargs = self._input_kwargs
@@ -328,10 +328,10 @@ class PrefixSpan(JavaParams):
                         typeConverter=TypeConverters.toString)
 
     @keyword_only
-    def __init__(self, minSupport=0.1, maxPatternLength=10, maxLocalProjDBSize=32000000,
+    def __init__(self, *, minSupport=0.1, maxPatternLength=10, maxLocalProjDBSize=32000000,
                  sequenceCol="sequence"):
         """
-        __init__(self, minSupport=0.1, maxPatternLength=10, maxLocalProjDBSize=32000000, \
+        __init__(self, \\*, minSupport=0.1, maxPatternLength=10, maxLocalProjDBSize=32000000, \
                  sequenceCol="sequence")
         """
         super(PrefixSpan, self).__init__()
@@ -343,10 +343,10 @@ def __init__(self, minSupport=0.1, maxPatternLength=10, maxLocalProjDBSize=32000
 
     @keyword_only
     @since("2.4.0")
-    def setParams(self, minSupport=0.1, maxPatternLength=10, maxLocalProjDBSize=32000000,
+    def setParams(self, *, minSupport=0.1, maxPatternLength=10, maxLocalProjDBSize=32000000,
                   sequenceCol="sequence"):
         """
-        setParams(self, minSupport=0.1, maxPatternLength=10, maxLocalProjDBSize=32000000, \
+        setParams(self, \\*, minSupport=0.1, maxPatternLength=10, maxLocalProjDBSize=32000000, \
                   sequenceCol="sequence")
         """
         kwargs = self._input_kwargs
diff --git a/python/pyspark/ml/pipeline.py b/python/pyspark/ml/pipeline.py
index 287e9e825da05..4598433fbf4f1 100644
--- a/python/pyspark/ml/pipeline.py
+++ b/python/pyspark/ml/pipeline.py
@@ -50,9 +50,9 @@ class Pipeline(Estimator, MLReadable, MLWritable):
     stages = Param(Params._dummy(), "stages", "a list of pipeline stages")
 
     @keyword_only
-    def __init__(self, stages=None):
+    def __init__(self, *, stages=None):
         """
-        __init__(self, stages=None)
+        __init__(self, \\*, stages=None)
         """
         super(Pipeline, self).__init__()
         kwargs = self._input_kwargs
@@ -77,9 +77,9 @@ def getStages(self):
 
     @keyword_only
     @since("1.3.0")
-    def setParams(self, stages=None):
+    def setParams(self, *, stages=None):
         """
-        setParams(self, stages=None)
+        setParams(self, \\*, stages=None)
         Sets params for Pipeline.
         """
         kwargs = self._input_kwargs
diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py
index e3e440c6497af..4f39c5abec785 100644
--- a/python/pyspark/ml/recommendation.py
+++ b/python/pyspark/ml/recommendation.py
@@ -296,15 +296,15 @@ class ALS(JavaEstimator, _ALSParams, JavaMLWritable, JavaMLReadable):
     """
 
     @keyword_only
-    def __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10,
-                 implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=None,
-                 ratingCol="rating", nonnegative=False, checkpointInterval=10,
+    def __init__(self, *, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10,
+                 numItemBlocks=10, implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item",
+                 seed=None, ratingCol="rating", nonnegative=False, checkpointInterval=10,
                  intermediateStorageLevel="MEMORY_AND_DISK",
                  finalStorageLevel="MEMORY_AND_DISK", coldStartStrategy="nan", blockSize=4096):
         """
-        __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10, \
-                 implicitPrefs=false, alpha=1.0, userCol="user", itemCol="item", seed=None, \
-                 ratingCol="rating", nonnegative=false, checkpointInterval=10, \
+        __init__(self, \\*, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10,
+                 numItemBlocks=10, implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", \
+                 seed=None, ratingCol="rating", nonnegative=False, checkpointInterval=10, \
                  intermediateStorageLevel="MEMORY_AND_DISK", \
                  finalStorageLevel="MEMORY_AND_DISK", coldStartStrategy="nan", blockSize=4096)
         """
@@ -315,15 +315,15 @@ def __init__(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemB
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10,
-                  implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=None,
-                  ratingCol="rating", nonnegative=False, checkpointInterval=10,
+    def setParams(self, *, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10,
+                  numItemBlocks=10, implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item",
+                  seed=None, ratingCol="rating", nonnegative=False, checkpointInterval=10,
                   intermediateStorageLevel="MEMORY_AND_DISK",
                   finalStorageLevel="MEMORY_AND_DISK", coldStartStrategy="nan", blockSize=4096):
         """
-        setParams(self, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, numItemBlocks=10, \
-                 implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", seed=None, \
-                 ratingCol="rating", nonnegative=False, checkpointInterval=10, \
+        setParams(self, \\*, rank=10, maxIter=10, regParam=0.1, numUserBlocks=10, \
+                 numItemBlocks=10, implicitPrefs=False, alpha=1.0, userCol="user", itemCol="item", \
+                 seed=None, ratingCol="rating", nonnegative=False, checkpointInterval=10, \
                  intermediateStorageLevel="MEMORY_AND_DISK", \
                  finalStorageLevel="MEMORY_AND_DISK", coldStartStrategy="nan", blockSize=4096)
         Sets params for ALS.
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index e1b7ffb63f8fe..ba0127c938642 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -200,12 +200,12 @@ class LinearRegression(_JavaRegressor, _LinearRegressionParams, JavaMLWritable,
     """
 
     @keyword_only
-    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
                  standardization=True, solver="auto", weightCol=None, aggregationDepth=2,
                  loss="squaredError", epsilon=1.35, blockSize=1):
         """
-        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
                  standardization=True, solver="auto", weightCol=None, aggregationDepth=2, \
                  loss="squaredError", epsilon=1.35, blockSize=1)
@@ -218,12 +218,12 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                   maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
                   standardization=True, solver="auto", weightCol=None, aggregationDepth=2,
                   loss="squaredError", epsilon=1.35, blockSize=1):
         """
-        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
                   standardization=True, solver="auto", weightCol=None, aggregationDepth=2, \
                   loss="squaredError", epsilon=1.35, blockSize=1)
@@ -683,10 +683,10 @@ class IsotonicRegression(JavaEstimator, _IsotonicRegressionParams, HasWeightCol,
     .. versionadded:: 1.6.0
     """
     @keyword_only
-    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                  weightCol=None, isotonic=True, featureIndex=0):
         """
-        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  weightCol=None, isotonic=True, featureIndex=0):
         """
         super(IsotonicRegression, self).__init__()
@@ -696,10 +696,10 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
         self.setParams(**kwargs)
 
     @keyword_only
-    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                   weightCol=None, isotonic=True, featureIndex=0):
         """
-        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  weightCol=None, isotonic=True, featureIndex=0):
         Set the params for IsotonicRegression.
         """
@@ -895,13 +895,13 @@ class DecisionTreeRegressor(_JavaRegressor, _DecisionTreeRegressorParams, JavaML
     """
 
     @keyword_only
-    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, impurity="variance",
                  seed=None, varianceCol=None, weightCol=None, leafCol="",
                  minWeightFractionPerNode=0.0):
         """
-        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
                  impurity="variance", seed=None, varianceCol=None, weightCol=None, \
@@ -915,13 +915,13 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
                   impurity="variance", seed=None, varianceCol=None, weightCol=None,
                   leafCol="", minWeightFractionPerNode=0.0):
         """
-        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
                   impurity="variance", seed=None, varianceCol=None, weightCol=None, \
@@ -1139,14 +1139,14 @@ class RandomForestRegressor(_JavaRegressor, _RandomForestRegressorParams, JavaML
     """
 
     @keyword_only
-    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
                  impurity="variance", subsamplingRate=1.0, seed=None, numTrees=20,
                  featureSubsetStrategy="auto", leafCol="", minWeightFractionPerNode=0.0,
                  weightCol=None, bootstrap=True):
         """
-        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                  maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
                  impurity="variance", subsamplingRate=1.0, seed=None, numTrees=20, \
@@ -1161,14 +1161,14 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10,
                   impurity="variance", subsamplingRate=1.0, seed=None, numTrees=20,
                   featureSubsetStrategy="auto", leafCol="", minWeightFractionPerNode=0.0,
                   weightCol=None, bootstrap=True):
         """
-        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                   maxMemoryInMB=256, cacheNodeIds=False, checkpointInterval=10, \
                   impurity="variance", subsamplingRate=1.0, seed=None, numTrees=20, \
@@ -1415,7 +1415,7 @@ class GBTRegressor(_JavaRegressor, _GBTRegressorParams, JavaMLWritable, JavaMLRe
     """
 
     @keyword_only
-    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                  maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0,
                  checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None,
@@ -1423,7 +1423,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
                  validationIndicatorCol=None, leafCol="", minWeightFractionPerNode=0.0,
                  weightCol=None):
         """
-        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                  maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, \
                  checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None, \
@@ -1438,7 +1438,7 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                   maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0,
                   checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None,
@@ -1446,7 +1446,7 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
                   validationIndicatorCol=None, leafCol="", minWeightFractionPerNode=0.0,
                   weightCol=None):
         """
-        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \
                   maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0, \
                   checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None, \
@@ -1738,12 +1738,12 @@ class AFTSurvivalRegression(_JavaRegressor, _AFTSurvivalRegressionParams,
     """
 
     @keyword_only
-    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                  fitIntercept=True, maxIter=100, tol=1E-6, censorCol="censor",
                  quantileProbabilities=list([0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]),
                  quantilesCol=None, aggregationDepth=2, blockSize=1):
         """
-        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  fitIntercept=True, maxIter=100, tol=1E-6, censorCol="censor", \
                  quantileProbabilities=[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99], \
                  quantilesCol=None, aggregationDepth=2, blockSize=1)
@@ -1756,12 +1756,12 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
 
     @keyword_only
     @since("1.6.0")
-    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                   fitIntercept=True, maxIter=100, tol=1E-6, censorCol="censor",
                   quantileProbabilities=list([0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]),
                   quantilesCol=None, aggregationDepth=2, blockSize=1):
         """
-        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   fitIntercept=True, maxIter=100, tol=1E-6, censorCol="censor", \
                   quantileProbabilities=[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99], \
                   quantilesCol=None, aggregationDepth=2, blockSize=1):
@@ -2043,12 +2043,12 @@ class GeneralizedLinearRegression(_JavaRegressor, _GeneralizedLinearRegressionPa
     """
 
     @keyword_only
-    def __init__(self, labelCol="label", featuresCol="features", predictionCol="prediction",
+    def __init__(self, *, labelCol="label", featuresCol="features", predictionCol="prediction",
                  family="gaussian", link=None, fitIntercept=True, maxIter=25, tol=1e-6,
                  regParam=0.0, weightCol=None, solver="irls", linkPredictionCol=None,
                  variancePower=0.0, linkPower=None, offsetCol=None, aggregationDepth=2):
         """
-        __init__(self, labelCol="label", featuresCol="features", predictionCol="prediction", \
+        __init__(self, \\*, labelCol="label", featuresCol="features", predictionCol="prediction", \
                  family="gaussian", link=None, fitIntercept=True, maxIter=25, tol=1e-6, \
                  regParam=0.0, weightCol=None, solver="irls", linkPredictionCol=None, \
                  variancePower=0.0, linkPower=None, offsetCol=None, aggregationDepth=2)
@@ -2062,12 +2062,12 @@ def __init__(self, labelCol="label", featuresCol="features", predictionCol="pred
 
     @keyword_only
     @since("2.0.0")
-    def setParams(self, labelCol="label", featuresCol="features", predictionCol="prediction",
+    def setParams(self, *, labelCol="label", featuresCol="features", predictionCol="prediction",
                   family="gaussian", link=None, fitIntercept=True, maxIter=25, tol=1e-6,
                   regParam=0.0, weightCol=None, solver="irls", linkPredictionCol=None,
                   variancePower=0.0, linkPower=None, offsetCol=None, aggregationDepth=2):
         """
-        setParams(self, labelCol="label", featuresCol="features", predictionCol="prediction", \
+        setParams(self, \\*, labelCol="label", featuresCol="features", predictionCol="prediction", \
                   family="gaussian", link=None, fitIntercept=True, maxIter=25, tol=1e-6, \
                   regParam=0.0, weightCol=None, solver="irls", linkPredictionCol=None, \
                   variancePower=0.0, linkPower=None, offsetCol=None, aggregationDepth=2)
@@ -2524,12 +2524,12 @@ class FMRegressor(_JavaRegressor, _FactorizationMachinesParams, JavaMLWritable,
     """
 
     @keyword_only
-    def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                  factorSize=8, fitIntercept=True, fitLinear=True, regParam=0.0,
                  miniBatchFraction=1.0, initStd=0.01, maxIter=100, stepSize=1.0,
                  tol=1e-6, solver="adamW", seed=None):
         """
-        __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  factorSize=8, fitIntercept=True, fitLinear=True, regParam=0.0, \
                  miniBatchFraction=1.0, initStd=0.01, maxIter=100, stepSize=1.0, \
                  tol=1e-6, solver="adamW", seed=None)
@@ -2542,12 +2542,12 @@ def __init__(self, featuresCol="features", labelCol="label", predictionCol="pred
 
     @keyword_only
     @since("3.0.0")
-    def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
+    def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                   factorSize=8, fitIntercept=True, fitLinear=True, regParam=0.0,
                   miniBatchFraction=1.0, initStd=0.01, maxIter=100, stepSize=1.0,
                   tol=1e-6, solver="adamW", seed=None):
         """
-        setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
+        setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   factorSize=8, fitIntercept=True, fitLinear=True, regParam=0.0, \
                   miniBatchFraction=1.0, initStd=0.01, maxIter=100, stepSize=1.0, \
                   tol=1e-6, solver="adamW", seed=None)
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index 4757162a75125..21bbe1f96758b 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -275,10 +275,10 @@ class CrossValidator(Estimator, _CrossValidatorParams, HasParallelism, HasCollec
     """
 
     @keyword_only
-    def __init__(self, estimator=None, estimatorParamMaps=None, evaluator=None, numFolds=3,
+    def __init__(self, *, estimator=None, estimatorParamMaps=None, evaluator=None, numFolds=3,
                  seed=None, parallelism=1, collectSubModels=False, foldCol=""):
         """
-        __init__(self, estimator=None, estimatorParamMaps=None, evaluator=None, numFolds=3,\
+        __init__(self, \\*, estimator=None, estimatorParamMaps=None, evaluator=None, numFolds=3,\
                  seed=None, parallelism=1, collectSubModels=False, foldCol="")
         """
         super(CrossValidator, self).__init__()
@@ -288,10 +288,10 @@ def __init__(self, estimator=None, estimatorParamMaps=None, evaluator=None, numF
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, estimator=None, estimatorParamMaps=None, evaluator=None, numFolds=3,
+    def setParams(self, *, estimator=None, estimatorParamMaps=None, evaluator=None, numFolds=3,
                   seed=None, parallelism=1, collectSubModels=False, foldCol=""):
         """
-        setParams(self, estimator=None, estimatorParamMaps=None, evaluator=None, numFolds=3,\
+        setParams(self, \\*, estimator=None, estimatorParamMaps=None, evaluator=None, numFolds=3,\
                   seed=None, parallelism=1, collectSubModels=False, foldCol=""):
         Sets params for cross validator.
         """
@@ -686,11 +686,11 @@ class TrainValidationSplit(Estimator, _TrainValidationSplitParams, HasParallelis
     """
 
     @keyword_only
-    def __init__(self, estimator=None, estimatorParamMaps=None, evaluator=None, trainRatio=0.75,
-                 parallelism=1, collectSubModels=False, seed=None):
+    def __init__(self, *, estimator=None, estimatorParamMaps=None, evaluator=None,
+                 trainRatio=0.75, parallelism=1, collectSubModels=False, seed=None):
         """
-        __init__(self, estimator=None, estimatorParamMaps=None, evaluator=None, trainRatio=0.75,\
-                 parallelism=1, collectSubModels=False, seed=None)
+        __init__(self, \\*, estimator=None, estimatorParamMaps=None, evaluator=None, \
+                 trainRatio=0.75, parallelism=1, collectSubModels=False, seed=None)
         """
         super(TrainValidationSplit, self).__init__()
         self._setDefault(parallelism=1)
@@ -699,11 +699,11 @@ def __init__(self, estimator=None, estimatorParamMaps=None, evaluator=None, trai
 
     @since("2.0.0")
     @keyword_only
-    def setParams(self, estimator=None, estimatorParamMaps=None, evaluator=None, trainRatio=0.75,
-                  parallelism=1, collectSubModels=False, seed=None):
+    def setParams(self, *, estimator=None, estimatorParamMaps=None, evaluator=None,
+                  trainRatio=0.75, parallelism=1, collectSubModels=False, seed=None):
         """
-        setParams(self, estimator=None, estimatorParamMaps=None, evaluator=None, trainRatio=0.75,\
-                  parallelism=1, collectSubModels=False, seed=None):
+        setParams(self, \\*, estimator=None, estimatorParamMaps=None, evaluator=None, \
+                  trainRatio=0.75, parallelism=1, collectSubModels=False, seed=None):
         Sets params for the train validation split.
         """
         kwargs = self._input_kwargs
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index 23e3d74ebd082..eb3155e5512eb 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -923,7 +923,7 @@ def queryName(self, queryName):
 
     @keyword_only
     @since(2.0)
-    def trigger(self, processingTime=None, once=None, continuous=None):
+    def trigger(self, *, processingTime=None, once=None, continuous=None):
         """Set the trigger for the stream query. If this is not set it will run the query as fast
         as possible, which is equivalent to setting the trigger to ``processingTime='0 seconds'``.
 

From 942f577b6e34a4f24203dfcad53f5fb98232a6e4 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Wed, 23 Sep 2020 09:30:51 +0900
Subject: [PATCH 0088/1009] [SPARK-32017][PYTHON][BUILD] Make Pyspark Hadoop
 3.2+ Variant available in PyPI

### What changes were proposed in this pull request?

This PR proposes to add a way to select Hadoop and Hive versions in pip installation.
Users can select Hive or Hadoop versions as below:

```bash
HADOOP_VERSION=3.2 pip install pyspark
HIVE_VERSION=1.2 pip install pyspark
HIVE_VERSION=1.2 HADOOP_VERSION=2.7 pip install pyspark
```

When the environment variables are set, internally it downloads the corresponding Spark version and then sets the Spark home to it. Also this PR exposes a mirror to set as an environment variable, `PYSPARK_RELEASE_MIRROR`.

**Please NOTE that:**
- We cannot currently leverage pip's native installation option, for example:

    ```bash
    pip install pyspark --install-option="hadoop3.2"
    ```

    This is because of a limitation and bug in pip itself. Once they fix this issue, we can switch from the environment variables to the proper installation options, see SPARK-32837.

    It IS possible to workaround but very ugly or hacky with a big change. See [this PR](https://github.com/microsoft/nni/pull/139/files) as an example.

- In pip installation, we pack the relevant jars together. This PR _does not touch existing packaging way_ in order to prevent any behaviour changes.

  Once this experimental way is proven to be safe, we can avoid packing the relevant jars together (and keep only the relevant Python scripts). And downloads the Spark distribution as this PR proposes.

- This way is sort of consistent with SparkR:

  SparkR provides a method `SparkR::install.spark` to support CRAN installation. This is fine because SparkR is provided purely as a R library. For example, `sparkr` script is not packed together.

  PySpark cannot take this approach because PySpark packaging ships relevant executable script together, e.g.) `pyspark` shell.

  If PySpark has a method such as `pyspark.install_spark`, users cannot call it in `pyspark` because `pyspark` already assumes relevant Spark is installed, JVM is launched, etc.

- There looks no way to release that contains different Hadoop or Hive to PyPI due to [the version semantics](https://www.python.org/dev/peps/pep-0440/). This is not an option.

  The usual way looks either `--install-option` above with hacks or environment variables given my investigation.

### Why are the changes needed?

To provide users the options to select Hadoop and Hive versions.

### Does this PR introduce _any_ user-facing change?

Yes, users will be able to select Hive and Hadoop version as below when they install it from `pip`;

```bash
HADOOP_VERSION=3.2 pip install pyspark
HIVE_VERSION=1.2 pip install pyspark
HIVE_VERSION=1.2 HADOOP_VERSION=2.7 pip install pyspark
```

### How was this patch tested?

Unit tests were added. I also manually tested in Mac and Windows (after building Spark with `python/dist/pyspark-3.1.0.dev0.tar.gz`):

```bash
./build/mvn -DskipTests -Phive-thriftserver clean package
```

Mac:

```bash
SPARK_VERSION=3.0.1 HADOOP_VERSION=3.2 pip install pyspark-3.1.0.dev0.tar.gz
```

Windows:

```bash
set HADOOP_VERSION=3.2
set SPARK_VERSION=3.0.1
pip install pyspark-3.1.0.dev0.tar.gz
```

Closes #29703 from HyukjinKwon/SPARK-32017.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 dev/create-release/release-build.sh           |   3 +
 dev/sparktestsupport/modules.py               |   1 +
 .../docs/source/getting_started/install.rst   |  35 ++++
 python/pyspark/find_spark_home.py             |  11 +-
 python/pyspark/install.py                     | 173 ++++++++++++++++++
 python/pyspark/tests/test_install_spark.py    | 112 ++++++++++++
 python/setup.py                               |  47 ++++-
 7 files changed, 380 insertions(+), 2 deletions(-)
 create mode 100644 python/pyspark/install.py
 create mode 100644 python/pyspark/tests/test_install_spark.py

diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index d948ee4bee08d..42c6d3b609ca6 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -275,6 +275,9 @@ if [[ "$1" == "package" ]]; then
   # In dry run mode, only build the first one. The keys in BINARY_PKGS_ARGS are used as the
   # list of packages to be built, so it's ok for things to be missing in BINARY_PKGS_EXTRA.
 
+  # NOTE: Don't forget to update the valid combinations of distributions at
+  #   'python/pyspark.install.py' and 'python/docs/source/getting_started/installation.rst'
+  #   if you're changing them.
   declare -A BINARY_PKGS_ARGS
   BINARY_PKGS_ARGS["hadoop3.2"]="-Phadoop-3.2 $HIVE_PROFILES"
   if ! is_dry_run; then
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 3c438e309c22d..868e4a5d23ed7 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -386,6 +386,7 @@ def __hash__(self):
         "pyspark.tests.test_conf",
         "pyspark.tests.test_context",
         "pyspark.tests.test_daemon",
+        "pyspark.tests.test_install_spark",
         "pyspark.tests.test_join",
         "pyspark.tests.test_profiler",
         "pyspark.tests.test_rdd",
diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst
index 8516d514c7c72..d915e9c7349d4 100644
--- a/python/docs/source/getting_started/install.rst
+++ b/python/docs/source/getting_started/install.rst
@@ -48,6 +48,41 @@ If you want to install extra dependencies for a specific componenet, you can ins
 
     pip install pyspark[sql]
 
+For PySpark with different Hadoop and/or Hive, you can install it by using ``HIVE_VERSION`` and ``HADOOP_VERSION`` environment variables as below:
+
+.. code-block:: bash
+
+    HIVE_VERSION=2.3 pip install pyspark
+    HADOOP_VERSION=2.7 pip install pyspark
+    HIVE_VERSION=1.2 HADOOP_VERSION=2.7 pip install pyspark
+
+The default distribution has built-in Hadoop 3.2 and Hive 2.3. If users specify different versions, the pip installation automatically
+downloads a different version and use it in PySpark. Downloading it can take a while depending on
+the network and the mirror chosen. ``PYSPARK_RELEASE_MIRROR`` can be set to manually choose the mirror
+for faster downloading.
+
+.. code-block:: bash
+
+    PYSPARK_RELEASE_MIRROR=http://mirror.apache-kr.org HADOOP_VERSION=2.7 pip install
+
+It is recommended to use `-v` option in `pip` to track the installation and download status.
+
+.. code-block:: bash
+
+    HADOOP_VERSION=2.7 pip install pyspark -v
+
+Supported versions are as below:
+
+====================================== ====================================== ======================================
+``HADOOP_VERSION`` \\ ``HIVE_VERSION`` 1.2                                    2.3 (default)
+====================================== ====================================== ======================================
+**2.7**                                O                                      O
+**3.2 (default)**                      X                                      O
+**without**                            X                                      O
+====================================== ====================================== ======================================
+
+Note that this installation of PySpark with different versions of Hadoop and Hive is experimental. It can change or be removed between minor releases.
+
 
 Using Conda
 -----------
diff --git a/python/pyspark/find_spark_home.py b/python/pyspark/find_spark_home.py
index 011c3f5b10a6b..4521a36503a16 100755
--- a/python/pyspark/find_spark_home.py
+++ b/python/pyspark/find_spark_home.py
@@ -36,15 +36,24 @@ def is_spark_home(path):
                 (os.path.isdir(os.path.join(path, "jars")) or
                  os.path.isdir(os.path.join(path, "assembly"))))
 
-    paths = ["../", os.path.dirname(os.path.realpath(__file__))]
+    # Spark distribution can be downloaded when HADOOP_VERSION environment variable is set.
+    # We should look up this directory first, see also SPARK-32017.
+    spark_dist_dir = "spark-distribution"
+    paths = [
+        "../",  # When we're in spark/python.
+        # Two case belows are valid when the current script is called as a library.
+        os.path.join(os.path.dirname(os.path.realpath(__file__)), spark_dist_dir),
+        os.path.dirname(os.path.realpath(__file__))]
 
     # Add the path of the PySpark module if it exists
     import_error_raised = False
     from importlib.util import find_spec
     try:
         module_home = os.path.dirname(find_spec("pyspark").origin)
+        paths.append(os.path.join(module_home, spark_dist_dir))
         paths.append(module_home)
         # If we are installed in edit mode also look two dirs up
+        # Downloading different versions are not supported in edit mode.
         paths.append(os.path.join(module_home, "../../"))
     except ImportError:
         # Not pip installed no worries
diff --git a/python/pyspark/install.py b/python/pyspark/install.py
new file mode 100644
index 0000000000000..89573577cd994
--- /dev/null
+++ b/python/pyspark/install.py
@@ -0,0 +1,173 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os
+import re
+import tarfile
+import traceback
+import urllib.request
+from shutil import rmtree
+# NOTE that we shouldn't import pyspark here because this is used in
+# setup.py, and assume there's no PySpark imported.
+
+DEFAULT_HADOOP = "hadoop3.2"
+DEFAULT_HIVE = "hive2.3"
+SUPPORTED_HADOOP_VERSIONS = ["hadoop2.7", "hadoop3.2", "without-hadoop"]
+SUPPORTED_HIVE_VERSIONS = ["hive1.2", "hive2.3"]
+UNSUPPORTED_COMBINATIONS = [
+    ("without-hadoop", "hive1.2"),
+    ("hadoop3.2", "hive1.2"),
+]
+
+
+def checked_package_name(spark_version, hadoop_version, hive_version):
+    if hive_version == "hive1.2":
+        return "%s-bin-%s-%s" % (spark_version, hadoop_version, hive_version)
+    else:
+        return "%s-bin-%s" % (spark_version, hadoop_version)
+
+
+def checked_versions(spark_version, hadoop_version, hive_version):
+    """
+    Check the valid combinations of supported versions in Spark distributions.
+
+    :param spark_version: Spark version. It should be X.X.X such as '3.0.0' or spark-3.0.0.
+    :param hadoop_version: Hadoop version. It should be X.X such as '2.7' or 'hadoop2.7'.
+        'without' and 'without-hadoop' are supported as special keywords for Hadoop free
+        distribution.
+    :param hive_version: Hive version. It should be X.X such as '1.2' or 'hive1.2'.
+
+    :return it returns fully-qualified versions of Spark, Hadoop and Hive in a tuple.
+        For example, spark-3.0.0, hadoop3.2 and hive2.3.
+    """
+    if re.match("^[0-9]+\\.[0-9]+\\.[0-9]+$", spark_version):
+        spark_version = "spark-%s" % spark_version
+    if not spark_version.startswith("spark-"):
+        raise RuntimeError(
+            "Spark version should start with 'spark-' prefix; however, "
+            "got %s" % spark_version)
+
+    if hadoop_version == "without":
+        hadoop_version = "without-hadoop"
+    elif re.match("^[0-9]+\\.[0-9]+$", hadoop_version):
+        hadoop_version = "hadoop%s" % hadoop_version
+
+    if hadoop_version not in SUPPORTED_HADOOP_VERSIONS:
+        raise RuntimeError(
+            "Spark distribution of %s is not supported. Hadoop version should be "
+            "one of [%s]" % (hadoop_version, ", ".join(
+                SUPPORTED_HADOOP_VERSIONS)))
+
+    if re.match("^[0-9]+\\.[0-9]+$", hive_version):
+        hive_version = "hive%s" % hive_version
+
+    if hive_version not in SUPPORTED_HIVE_VERSIONS:
+        raise RuntimeError(
+            "Spark distribution of %s is not supported. Hive version should be "
+            "one of [%s]" % (hive_version, ", ".join(
+                SUPPORTED_HADOOP_VERSIONS)))
+
+    if (hadoop_version, hive_version) in UNSUPPORTED_COMBINATIONS:
+        raise RuntimeError("Hive 1.2 should only be with Hadoop 2.7.")
+
+    return spark_version, hadoop_version, hive_version
+
+
+def install_spark(dest, spark_version, hadoop_version, hive_version):
+    """
+    Installs Spark that corresponds to the given Hadoop version in the current
+    library directory.
+
+    :param dest: The location to download and install the Spark.
+    :param spark_version: Spark version. It should be spark-X.X.X form.
+    :param hadoop_version: Hadoop version. It should be hadoopX.X
+        such as 'hadoop2.7' or 'without-hadoop'.
+    :param hive_version: Hive version. It should be hiveX.X such as 'hive1.2'.
+    """
+
+    package_name = checked_package_name(spark_version, hadoop_version, hive_version)
+    package_local_path = os.path.join(dest, "%s.tgz" % package_name)
+    if "PYSPARK_RELEASE_MIRROR" in os.environ:
+        sites = [os.environ["PYSPARK_RELEASE_MIRROR"]]
+    else:
+        sites = get_preferred_mirrors()
+    print("Trying to download Spark %s from [%s]" % (spark_version, ", ".join(sites)))
+
+    pretty_pkg_name = "%s for Hadoop %s" % (
+        spark_version,
+        "Free build" if hadoop_version == "without" else hadoop_version)
+
+    for site in sites:
+        os.makedirs(dest, exist_ok=True)
+        url = "%s/spark/%s/%s.tgz" % (site, spark_version, package_name)
+
+        tar = None
+        try:
+            print("Downloading %s from:\n- %s" % (pretty_pkg_name, url))
+            download_to_file(urllib.request.urlopen(url), package_local_path)
+
+            print("Installing to %s" % dest)
+            tar = tarfile.open(package_local_path, "r:gz")
+            for member in tar.getmembers():
+                if member.name == package_name:
+                    # Skip the root directory.
+                    continue
+                member.name = os.path.relpath(member.name, package_name + os.path.sep)
+                tar.extract(member, dest)
+            return
+        except Exception:
+            print("Failed to download %s from %s:" % (pretty_pkg_name, url))
+            traceback.print_exc()
+            rmtree(dest, ignore_errors=True)
+        finally:
+            if tar is not None:
+                tar.close()
+            if os.path.exists(package_local_path):
+                os.remove(package_local_path)
+    raise IOError("Unable to download %s." % pretty_pkg_name)
+
+
+def get_preferred_mirrors():
+    mirror_urls = []
+    for _ in range(3):
+        try:
+            response = urllib.request.urlopen(
+                "https://www.apache.org/dyn/closer.lua?preferred=true")
+            mirror_urls.append(response.read().decode('utf-8'))
+        except Exception:
+            # If we can't get a mirror URL, skip it. No retry.
+            pass
+
+    default_sites = [
+        "https://archive.apache.org/dist", "https://dist.apache.org/repos/dist/release"]
+    return list(set(mirror_urls)) + default_sites
+
+
+def download_to_file(response, path, chunk_size=1024 * 1024):
+    total_size = int(response.info().get('Content-Length').strip())
+    bytes_so_far = 0
+
+    with open(path, mode="wb") as dest:
+        while True:
+            chunk = response.read(chunk_size)
+            bytes_so_far += len(chunk)
+            if not chunk:
+                break
+            dest.write(chunk)
+            print("Downloaded %d of %d bytes (%0.2f%%)" % (
+                bytes_so_far,
+                total_size,
+                round(float(bytes_so_far) / total_size * 100, 2)))
diff --git a/python/pyspark/tests/test_install_spark.py b/python/pyspark/tests/test_install_spark.py
new file mode 100644
index 0000000000000..b215cf6b01317
--- /dev/null
+++ b/python/pyspark/tests/test_install_spark.py
@@ -0,0 +1,112 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os
+import tempfile
+import unittest
+
+from pyspark.install import install_spark, DEFAULT_HADOOP, DEFAULT_HIVE, \
+    UNSUPPORTED_COMBINATIONS, checked_versions, checked_package_name
+
+
+class SparkInstallationTestCase(unittest.TestCase):
+
+    def test_install_spark(self):
+        # Test only one case. Testing this is expensive because it needs to download
+        # the Spark distribution.
+        spark_version, hadoop_version, hive_version = checked_versions("3.0.1", "3.2", "2.3")
+
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            install_spark(
+                dest=tmp_dir,
+                spark_version=spark_version,
+                hadoop_version=hadoop_version,
+                hive_version=hive_version)
+
+            self.assertTrue(os.path.isdir("%s/jars" % tmp_dir))
+            self.assertTrue(os.path.exists("%s/bin/spark-submit" % tmp_dir))
+            self.assertTrue(os.path.exists("%s/RELEASE" % tmp_dir))
+
+    def test_package_name(self):
+        self.assertEqual(
+            "spark-3.0.0-bin-hadoop3.2-hive1.2",
+            checked_package_name("spark-3.0.0", "hadoop3.2", "hive1.2"))
+        self.assertEqual(
+            "spark-3.0.0-bin-hadoop3.2",
+            checked_package_name("spark-3.0.0", "hadoop3.2", "hive2.3"))
+
+    def test_checked_versions(self):
+        test_version = "3.0.1"  # Just pick one version to test.
+
+        # Positive test cases
+        self.assertEqual(
+            ("spark-3.0.0", "hadoop2.7", "hive1.2"),
+            checked_versions("spark-3.0.0", "hadoop2.7", "hive1.2"))
+
+        self.assertEqual(
+            ("spark-3.0.0", "hadoop2.7", "hive1.2"),
+            checked_versions("3.0.0", "2.7", "1.2"))
+
+        self.assertEqual(
+            ("spark-2.4.1", "without-hadoop", "hive2.3"),
+            checked_versions("2.4.1", "without", "2.3"))
+
+        self.assertEqual(
+            ("spark-3.0.1", "without-hadoop", "hive2.3"),
+            checked_versions("spark-3.0.1", "without-hadoop", "hive2.3"))
+
+        # Negative test cases
+        for (hadoop_version, hive_version) in UNSUPPORTED_COMBINATIONS:
+            with self.assertRaisesRegex(RuntimeError, 'Hive.*should.*Hadoop'):
+                checked_versions(
+                    spark_version=test_version,
+                    hadoop_version=hadoop_version,
+                    hive_version=hive_version)
+
+        with self.assertRaisesRegex(RuntimeError, "Spark version should start with 'spark-'"):
+            checked_versions(
+                spark_version="malformed",
+                hadoop_version=DEFAULT_HADOOP,
+                hive_version=DEFAULT_HIVE)
+
+        with self.assertRaisesRegex(RuntimeError, "Spark distribution.*malformed.*"):
+            checked_versions(
+                spark_version=test_version,
+                hadoop_version="malformed",
+                hive_version=DEFAULT_HIVE)
+
+        with self.assertRaisesRegex(RuntimeError, "Spark distribution.*malformed.*"):
+            checked_versions(
+                spark_version=test_version,
+                hadoop_version=DEFAULT_HADOOP,
+                hive_version="malformed")
+
+        with self.assertRaisesRegex(RuntimeError, "Hive 1.2 should only be with Hadoop 2.7"):
+            checked_versions(
+                spark_version=test_version,
+                hadoop_version="hadoop3.2",
+                hive_version="hive1.2")
+
+
+if __name__ == "__main__":
+    from pyspark.tests.test_install_spark import *  # noqa: F401
+
+    try:
+        import xmlrunner
+        testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/setup.py b/python/setup.py
index 7fac7b3138486..7c12b112acd65 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -16,10 +16,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import importlib.util
 import glob
 import os
 import sys
 from setuptools import setup
+from setuptools.command.install import install
 from shutil import copyfile, copytree, rmtree
 
 try:
@@ -28,6 +30,15 @@
     print("Failed to load PySpark version file for packaging. You must be in Spark's python dir.",
           file=sys.stderr)
     sys.exit(-1)
+try:
+    spec = importlib.util.spec_from_file_location("install", "pyspark/install.py")
+    install_module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(install_module)
+except IOError:
+    print("Failed to load the installing module (pyspark/install.py) which had to be "
+          "packaged together.",
+          file=sys.stderr)
+    sys.exit(-1)
 VERSION = __version__  # noqa
 # A temporary path so we can access above the Python project root and fetch scripts and jars we need
 TEMP_PATH = "deps"
@@ -103,6 +114,37 @@ def _supports_symlinks():
 _minimum_pandas_version = "0.23.2"
 _minimum_pyarrow_version = "1.0.0"
 
+
+class InstallCommand(install):
+    # TODO(SPARK-32837) leverage pip's custom options
+
+    def run(self):
+        install.run(self)
+
+        # Make sure the destination is always clean.
+        spark_dist = os.path.join(self.install_lib, "pyspark", "spark-distribution")
+        rmtree(spark_dist, ignore_errors=True)
+
+        if ("HADOOP_VERSION" in os.environ) or ("HIVE_VERSION" in os.environ):
+            # Note that SPARK_VERSION environment is just a testing purpose.
+            spark_version, hadoop_version, hive_version = install_module.checked_versions(
+                os.environ.get("SPARK_VERSION", VERSION).lower(),
+                os.environ.get("HADOOP_VERSION", install_module.DEFAULT_HADOOP).lower(),
+                os.environ.get("HIVE_VERSION", install_module.DEFAULT_HIVE).lower())
+
+            if ("SPARK_VERSION" not in os.environ and
+                ((install_module.DEFAULT_HADOOP, install_module.DEFAULT_HIVE) ==
+                    (hadoop_version, hive_version))):
+                # Do not download and install if they are same as default.
+                return
+
+            install_module.install_spark(
+                dest=spark_dist,
+                spark_version=spark_version,
+                hadoop_version=hadoop_version,
+                hive_version=hive_version)
+
+
 try:
     # We copy the shell script to be under pyspark/python/pyspark so that the launcher scripts
     # find it where expected. The rest of the files aren't copied because they are accessed
@@ -223,7 +265,10 @@ def _supports_symlinks():
             'Programming Language :: Python :: 3.7',
             'Programming Language :: Python :: 3.8',
             'Programming Language :: Python :: Implementation :: CPython',
-            'Programming Language :: Python :: Implementation :: PyPy']
+            'Programming Language :: Python :: Implementation :: PyPy'],
+        cmdclass={
+            'install': InstallCommand,
+        },
     )
 finally:
     # We only cleanup the symlink farm if we were in Spark, otherwise we are installing rather than

From b53da23a28fe149cc75d593c5c36f7020a8a2752 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Wed, 23 Sep 2020 09:41:38 +0900
Subject: [PATCH 0089/1009] [MINOR][SQL] Improve examples for
 `percentile_approx()`

### What changes were proposed in this pull request?
In the PR, I propose to replace current examples for `percentile_approx()` with **only one** input value by example **with multiple values** in the input column.

### Why are the changes needed?
Current examples are pretty trivial, and don't demonstrate function's behaviour on a sequence of values.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
- by running `ExpressionInfoSuite`
- `./dev/scalastyle`

Closes #29841 from MaxGekk/example-percentile_approx.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../expressions/aggregate/ApproximatePercentile.scala     | 8 ++++----
 .../test/resources/sql-functions/sql-expression-schema.md | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
index 7a1eec1a30d78..3327f4ccf4461 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
@@ -62,10 +62,10 @@ import org.apache.spark.sql.types._
   """,
   examples = """
     Examples:
-      > SELECT _FUNC_(10.0, array(0.5, 0.4, 0.1), 100);
-       [10.0,10.0,10.0]
-      > SELECT _FUNC_(10.0, 0.5, 100);
-       10.0
+      > SELECT _FUNC_(col, array(0.5, 0.4, 0.1), 100) FROM VALUES (0), (1), (2), (10) AS tab(col);
+       [1,1,0]
+      > SELECT _FUNC_(col, 0.5, 100) FROM VALUES (0), (6), (7), (9), (10) AS tab(col);
+       7
   """,
   group = "agg_funcs",
   since = "2.1.0")
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 45f561a61df78..c9a2298c19a7f 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -297,8 +297,8 @@
 | org.apache.spark.sql.catalyst.expressions.XxHash64 | xxhash64 | SELECT xxhash64('Spark', array(123), 2) | struct<xxhash64(Spark, array(123), 2):bigint> |
 | org.apache.spark.sql.catalyst.expressions.Year | year | SELECT year('2016-07-30') | struct<year(CAST(2016-07-30 AS DATE)):int> |
 | org.apache.spark.sql.catalyst.expressions.ZipWith | zip_with | SELECT zip_with(array(1, 2, 3), array('a', 'b', 'c'), (x, y) -> (y, x)) | struct<zip_with(array(1, 2, 3), array(a, b, c), lambdafunction(named_struct(y, namedlambdavariable(), x, namedlambdavariable()), namedlambdavariable(), namedlambdavariable())):array<struct<y:string,x:int>>> |
-| org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile | approx_percentile | SELECT approx_percentile(10.0, array(0.5, 0.4, 0.1), 100) | struct<approx_percentile(10.0, array(0.5, 0.4, 0.1), 100):array<decimal(3,1)>> |
-| org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile | percentile_approx | SELECT percentile_approx(10.0, array(0.5, 0.4, 0.1), 100) | struct<percentile_approx(10.0, array(0.5, 0.4, 0.1), 100):array<decimal(3,1)>> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile | approx_percentile | SELECT approx_percentile(col, array(0.5, 0.4, 0.1), 100) FROM VALUES (0), (1), (2), (10) AS tab(col) | struct<approx_percentile(col, array(0.5, 0.4, 0.1), 100):array<int>> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile | percentile_approx | SELECT percentile_approx(col, array(0.5, 0.4, 0.1), 100) FROM VALUES (0), (1), (2), (10) AS tab(col) | struct<percentile_approx(col, array(0.5, 0.4, 0.1), 100):array<int>> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.Average | avg | SELECT avg(col) FROM VALUES (1), (2), (3) AS tab(col) | struct<avg(col):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.Average | mean | SELECT mean(col) FROM VALUES (1), (2), (3) AS tab(col) | struct<mean(col):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.BitAndAgg | bit_and | SELECT bit_and(col) FROM VALUES (3), (5) AS tab(col) | struct<bit_and(col):int> |

From acfee3c8b1fbe3daea141cb902dd6954873a1f8f Mon Sep 17 00:00:00 2001
From: "tanel.kiis@gmail.com" <tanel.kiis@gmail.com>
Date: Wed, 23 Sep 2020 10:18:38 +0900
Subject: [PATCH 0090/1009] [SPARK-32870][DOCS][SQL] Make sure that all
 expressions have their ExpressionDescription filled

### What changes were proposed in this pull request?

Made sure, that all the expressions in the `FunctionRegistry ` have the fields `usage`, `examples` and `since` filled in their `ExpressionDescription`. Added UT to `ExpressionInfoSuite`, to make sure, that all new expressions will also fill those fields.

### Why are the changes needed?

Documentation improvement

### Does this PR introduce _any_ user-facing change?

Better generated SQL built in functions documentation

### How was this patch tested?

Checked the fix version in the following jiras:
SPARK-1251 - UnaryMinus, Add, Subtract, Multiply, Divide, Remainder, Explode, Not, In, And, Or, Equals, LessThan, LessThanOrEqual, GreaterThan, GreaterThanOrEqual, If, Cast
SPARK-2053 - CaseWhen
SPARK-2665 - EqualNullSafe
SPARK-3176 - Abs
SPARK-6542 - CreateStruct
SPARK-7135 - MonotonicallyIncreasingID
SPARK-7152 - SparkPartitionID
SPARK-7295 - bitwiseAND, bitwiseOR, bitwiseXOR, bitwiseNOT
SPARK-8005 - InputFileName
SPARK-8203 - Greatest
SPARK-8204 - Least
SPARK-8220 - UnaryPositive
SPARK-8221 - Pmod
SPARK-8230 - Size
SPARK-8231 - ArrayContains
SPARK-8232 - SortArray
SPARK-8234 - md5
SPARK-8235 - sha1
SPARK-8236 - crc32
SPARK-8237 - sha2
SPARK-8240 - Concat
SPARK-8246 - GetJsonObject
SPARK-8407 - CreateNamedStruct
SPARK-9617 - JsonTuple
SPARK-10810 - CurrentDatabase
SPARK-12480 - Murmur3Hash
SPARK-14061 - CreateMap
SPARK-14160 - TimeWindow
SPARK-14580 - AssertTrue
SPARK-16274 - XPathBoolean
SPARK-16278 - MapKeys
SPARK-16279 - MapValues
SPARK-16284 - CallMethodViaReflection
SPARK-16286 - Stack
SPARK-16288 - Inline
SPARK-16289 - PosExplode
SPARK-16318 - XPathShort, XPathInt, XPathLong, XPathFloat, XPathDouble, XPathString, XPathList
SPARK-16730 - Cast aliases
SPARK-17495 - HiveHash
SPARK-18702 - InputFileBlockStart, InputFileBlockLength
SPARK-20910 - UUID

Closes #29743 from tanelk/SPARK-32870.

Authored-by: tanel.kiis@gmail.com <tanel.kiis@gmail.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../catalyst/analysis/FunctionRegistry.scala  |  2 +-
 .../expressions/CallMethodViaReflection.scala |  3 +-
 .../spark/sql/catalyst/expressions/Cast.scala |  3 +-
 .../MonotonicallyIncreasingID.scala           |  8 +-
 .../expressions/SparkPartitionID.scala        |  8 +-
 .../aggregate/CountMinSketchAgg.scala         |  7 ++
 .../aggregate/bitwiseAggregates.scala         |  1 +
 .../sql/catalyst/expressions/arithmetic.scala | 38 +++++---
 .../expressions/bitwiseExpressions.scala      | 12 ++-
 .../expressions/collectionOperations.scala    | 18 ++--
 .../expressions/complexTypeCreator.scala      | 20 ++--
 .../expressions/conditionalExpressions.scala  |  6 +-
 .../expressions/datetimeExpressions.scala     | 40 ++++----
 .../sql/catalyst/expressions/generators.scala | 12 ++-
 .../spark/sql/catalyst/expressions/hash.scala | 18 ++--
 .../catalyst/expressions/inputFileBlock.scala | 27 +++++-
 .../expressions/jsonExpressions.scala         |  6 +-
 .../spark/sql/catalyst/expressions/misc.scala | 16 +++-
 .../sql/catalyst/expressions/predicates.scala | 61 +++++++++++--
 .../expressions/windowExpressions.scala       | 91 +++++++++++++++++++
 .../sql/catalyst/expressions/xml/xpath.scala  | 24 +++--
 .../sql-functions/sql-expression-schema.md    | 48 +++++-----
 .../resources/sql-tests/results/cast.sql.out  |  2 +
 .../spark/sql/ExpressionsSchemaSuite.scala    | 10 +-
 .../sql/execution/command/DDLSuite.scala      |  6 +-
 .../sql/expressions/ExpressionInfoSuite.scala | 37 +++++++-
 26 files changed, 404 insertions(+), 120 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index f62c8bb0c2931..3fae34cbf00c2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -662,7 +662,7 @@ object FunctionRegistry {
     val clazz = scala.reflect.classTag[Cast].runtimeClass
     val usage = "_FUNC_(expr) - Casts the value `expr` to the target data type `_FUNC_`."
     val expressionInfo =
-      new ExpressionInfo(clazz.getCanonicalName, null, name, usage, "", "", "", "", "", "")
+      new ExpressionInfo(clazz.getCanonicalName, null, name, usage, "", "", "", "", "2.0.1", "")
     (name, (expressionInfo, builder))
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
index e6a4c8f1d3749..4bd6418789aa7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
@@ -51,7 +51,8 @@ import org.apache.spark.util.Utils
        c33fb387-8500-4bfa-81d2-6e0e3e930df2
       > SELECT _FUNC_('java.util.UUID', 'fromString', 'a5cf6c42-0c85-418f-af6c-3e4e5b1328f2');
        a5cf6c42-0c85-418f-af6c-3e4e5b1328f2
-  """)
+  """,
+  since = "2.0.0")
 case class CallMethodViaReflection(children: Seq[Expression])
   extends Expression with CodegenFallback {
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 96154917e1637..bf759db59f3e6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -1742,7 +1742,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
     Examples:
       > SELECT _FUNC_('10' as int);
        10
-  """)
+  """,
+  since = "1.0.0")
 case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String] = None)
   extends CastBase {
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala
index f1da592a76845..8b04c1aa513f9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala
@@ -40,7 +40,13 @@ import org.apache.spark.sql.types.{DataType, LongType}
       within each partition. The assumption is that the data frame has less than 1 billion
       partitions, and each partition has less than 8 billion records.
       The function is non-deterministic because its result depends on partition IDs.
-  """)
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_();
+       0
+  """,
+  since = "1.4.0")
 case class MonotonicallyIncreasingID() extends LeafExpression with Stateful {
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala
index 9856b37e53fbc..242735b4aebd3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala
@@ -26,7 +26,13 @@ import org.apache.spark.sql.types.{DataType, IntegerType}
  * Expression that returns the current partition id.
  */
 @ExpressionDescription(
-  usage = "_FUNC_() - Returns the current partition id.")
+  usage = "_FUNC_() - Returns the current partition id.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_();
+       0
+  """,
+  since = "1.4.0")
 case class SparkPartitionID() extends LeafExpression with Nondeterministic {
 
   override def nullable: Boolean = false
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala
index 787b21859c6da..8b51e0a908f42 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala
@@ -37,6 +37,7 @@ import org.apache.spark.util.sketch.CountMinSketch
  * @param confidenceExpression confidence, must be positive and less than 1.0
  * @param seedExpression random seed
  */
+// scalastyle:off line.size.limit
 @ExpressionDescription(
   usage = """
     _FUNC_(col, eps, confidence, seed) - Returns a count-min sketch of a column with the given esp,
@@ -44,8 +45,14 @@ import org.apache.spark.util.sketch.CountMinSketch
       `CountMinSketch` before usage. Count-min sketch is a probabilistic data structure used for
       cardinality estimation using sub-linear space.
   """,
+  examples = """
+    Examples:
+      > SELECT hex(_FUNC_(col, 0.5d, 0.5d, 1)) FROM VALUES (1), (2), (1) AS tab(col);
+       0000000100000000000000030000000100000004000000005D8D6AB90000000000000000000000000000000200000000000000010000000000000000
+  """,
   group = "agg_funcs",
   since = "2.2.0")
+// scalastyle:on line.size.limit
 case class CountMinSketchAgg(
     child: Expression,
     epsExpression: Expression,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala
index b4c1b2c708fb2..573dbd6c3f8c6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/bitwiseAggregates.scala
@@ -60,6 +60,7 @@ abstract class BitAggregate extends DeclarativeAggregate with ExpectsInputTypes
       > SELECT _FUNC_(col) FROM VALUES (3), (5) AS tab(col);
        1
   """,
+  group = "agg_funcs",
   since = "3.0.0")
 case class BitAndAgg(child: Expression) extends BitAggregate {
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
index 7c521838447d3..f25fd9b672e8b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -32,7 +32,8 @@ import org.apache.spark.unsafe.types.CalendarInterval
     Examples:
       > SELECT _FUNC_(1);
        -1
-  """)
+  """,
+  since = "1.0.0")
 case class UnaryMinus(child: Expression) extends UnaryExpression
     with ExpectsInputTypes with NullIntolerant {
   private val checkOverflow = SQLConf.get.ansiEnabled
@@ -95,7 +96,13 @@ case class UnaryMinus(child: Expression) extends UnaryExpression
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Returns the value of `expr`.")
+  usage = "_FUNC_(expr) - Returns the value of `expr`.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(1);
+       1
+  """,
+  since = "1.5.0")
 case class UnaryPositive(child: Expression)
     extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
   override def prettyName: String = "positive"
@@ -121,7 +128,8 @@ case class UnaryPositive(child: Expression)
     Examples:
       > SELECT _FUNC_(-1);
        1
-  """)
+  """,
+  since = "1.2.0")
 case class Abs(child: Expression)
     extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
 
@@ -223,7 +231,8 @@ object BinaryArithmetic {
     Examples:
       > SELECT 1 _FUNC_ 2;
        3
-  """)
+  """,
+  since = "1.0.0")
 case class Add(left: Expression, right: Expression) extends BinaryArithmetic {
 
   override def inputType: AbstractDataType = TypeCollection.NumericAndInterval
@@ -255,7 +264,8 @@ case class Add(left: Expression, right: Expression) extends BinaryArithmetic {
     Examples:
       > SELECT 2 _FUNC_ 1;
        1
-  """)
+  """,
+  since = "1.0.0")
 case class Subtract(left: Expression, right: Expression) extends BinaryArithmetic {
 
   override def inputType: AbstractDataType = TypeCollection.NumericAndInterval
@@ -287,7 +297,8 @@ case class Subtract(left: Expression, right: Expression) extends BinaryArithmeti
     Examples:
       > SELECT 2 _FUNC_ 3;
        6
-  """)
+  """,
+  since = "1.0.0")
 case class Multiply(left: Expression, right: Expression) extends BinaryArithmetic {
 
   override def inputType: AbstractDataType = NumericType
@@ -382,7 +393,8 @@ trait DivModLike extends BinaryArithmetic {
        1.5
       > SELECT 2L _FUNC_ 2L;
        1.0
-  """)
+  """,
+  since = "1.0.0")
 // scalastyle:on line.size.limit
 case class Divide(left: Expression, right: Expression) extends DivModLike {
 
@@ -455,7 +467,8 @@ object IntegralDivide {
        0.2
       > SELECT MOD(2, 1.8);
        0.2
-  """)
+  """,
+  since = "1.0.0")
 case class Remainder(left: Expression, right: Expression) extends DivModLike {
 
   override def inputType: AbstractDataType = NumericType
@@ -502,7 +515,8 @@ case class Remainder(left: Expression, right: Expression) extends DivModLike {
        1
       > SELECT _FUNC_(-10, 3);
        2
-  """)
+  """,
+  since = "1.5.0")
 case class Pmod(left: Expression, right: Expression) extends BinaryArithmetic {
 
   override def toString: String = s"pmod($left, $right)"
@@ -658,7 +672,8 @@ case class Pmod(left: Expression, right: Expression) extends BinaryArithmetic {
     Examples:
       > SELECT _FUNC_(10, 9, 2, 4, 3);
        2
-  """)
+  """,
+  since = "1.5.0")
 case class Least(children: Seq[Expression]) extends ComplexTypeMergingExpression {
 
   override def nullable: Boolean = children.forall(_.nullable)
@@ -731,7 +746,8 @@ case class Least(children: Seq[Expression]) extends ComplexTypeMergingExpression
     Examples:
       > SELECT _FUNC_(10, 9, 2, 4, 3);
        10
-  """)
+  """,
+  since = "1.5.0")
 case class Greatest(children: Seq[Expression]) extends ComplexTypeMergingExpression {
 
   override def nullable: Boolean = children.forall(_.nullable)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
index 342b14eaa3390..aa3993dccd1c5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
@@ -32,7 +32,8 @@ import org.apache.spark.sql.types._
     Examples:
       > SELECT 3 _FUNC_ 5;
        1
-  """)
+  """,
+  since = "1.4.0")
 case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithmetic {
 
   override def inputType: AbstractDataType = IntegralType
@@ -64,7 +65,8 @@ case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithme
     Examples:
       > SELECT 3 _FUNC_ 5;
        7
-  """)
+  """,
+  since = "1.4.0")
 case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmetic {
 
   override def inputType: AbstractDataType = IntegralType
@@ -96,7 +98,8 @@ case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmet
     Examples:
       > SELECT 3 _FUNC_ 5;
        6
-  """)
+  """,
+  since = "1.4.0")
 case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithmetic {
 
   override def inputType: AbstractDataType = IntegralType
@@ -126,7 +129,8 @@ case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithme
     Examples:
       > SELECT _FUNC_ 0;
        -1
-  """)
+  """,
+  since = "1.4.0")
 case class BitwiseNot(child: Expression)
   extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index d9de72e1b217b..8555f63df986f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -89,7 +89,8 @@ trait BinaryArrayExpressionWithImplicitCast extends BinaryExpression
        2
       > SELECT _FUNC_(NULL);
        -1
-  """)
+  """,
+  since = "1.5.0")
 case class Size(child: Expression, legacySizeOfNull: Boolean)
   extends UnaryExpression with ExpectsInputTypes {
 
@@ -139,7 +140,8 @@ object Size {
       > SELECT _FUNC_(map(1, 'a', 2, 'b'));
        [1,2]
   """,
-  group = "map_funcs")
+  group = "map_funcs",
+  since = "2.0.0")
 case class MapKeys(child: Expression)
   extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
 
@@ -330,7 +332,8 @@ case class ArraysZip(children: Seq[Expression]) extends Expression with ExpectsI
       > SELECT _FUNC_(map(1, 'a', 2, 'b'));
        ["a","b"]
   """,
-  group = "map_funcs")
+  group = "map_funcs",
+  since = "2.0.0")
 case class MapValues(child: Expression)
   extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
 
@@ -871,7 +874,8 @@ object ArraySortLike {
       > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'), true);
        [null,"a","b","c","d"]
   """,
-  group = "array_funcs")
+  group = "array_funcs",
+  since = "1.5.0")
 // scalastyle:on line.size.limit
 case class SortArray(base: Expression, ascendingOrder: Expression)
   extends BinaryExpression with ArraySortLike with NullIntolerant {
@@ -1086,7 +1090,8 @@ case class Reverse(child: Expression)
       > SELECT _FUNC_(array(1, 2, 3), 2);
        true
   """,
-  group = "array_funcs")
+  group = "array_funcs",
+  since = "1.5.0")
 case class ArrayContains(left: Expression, right: Expression)
   extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
@@ -2048,7 +2053,8 @@ case class ElementAt(left: Expression, right: Expression)
   note = """
     Concat logic for arrays is available since 2.4.0.
   """,
-  group = "array_funcs")
+  group = "array_funcs",
+  since = "1.5.0")
 case class Concat(children: Seq[Expression]) extends ComplexTypeMergingExpression {
 
   private def allowedTypes: Seq[AbstractDataType] = Seq(StringType, BinaryType, ArrayType)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index 563ce7133a3dc..42e4d3ec6df57 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -36,7 +36,8 @@ import org.apache.spark.unsafe.types.UTF8String
     Examples:
       > SELECT _FUNC_(1, 2, 3);
        [1,2,3]
-  """)
+  """,
+  since = "1.1.0")
 case class CreateArray(children: Seq[Expression], useStringTypeWhenEmpty: Boolean)
   extends Expression {
 
@@ -153,7 +154,8 @@ private [sql] object GenArrayData {
     Examples:
       > SELECT _FUNC_(1.0, '2', 3.0, '4');
        {1.0:"2",3.0:"4"}
-  """)
+  """,
+  since = "2.0.0")
 case class CreateMap(children: Seq[Expression], useStringTypeWhenEmpty: Boolean)
   extends Expression {
 
@@ -253,7 +255,8 @@ object CreateMap {
     Examples:
       > SELECT _FUNC_(array(1.0, 3.0), array('2', '4'));
        {1.0:"2",3.0:"4"}
-  """, since = "2.4.0")
+  """,
+  since = "2.4.0")
 case class MapFromArrays(left: Expression, right: Expression)
   extends BinaryExpression with ExpectsInputTypes with NullIntolerant {
 
@@ -346,10 +349,14 @@ object CreateStruct {
       "struct",
       "_FUNC_(col1, col2, col3, ...) - Creates a struct with the given field values.",
       "",
+      """
+        |    Examples:
+        |      > SELECT _FUNC_(1, 2, 3);
+        |       {"col1":1,"col2":2,"col3":3}
+        |  """.stripMargin,
       "",
       "",
-      "",
-      "",
+      "1.4.0",
       "")
     ("struct", (info, this.create))
   }
@@ -367,7 +374,8 @@ object CreateStruct {
     Examples:
       > SELECT _FUNC_("a", 1, "b", 2, "c", 3);
        {"a":1,"b":2,"c":3}
-  """)
+  """,
+  since = "1.5.0")
 // scalastyle:on line.size.limit
 case class CreateNamedStruct(children: Seq[Expression]) extends Expression {
   lazy val (nameExprs, valExprs) = children.grouped(2).map {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
index 6c6210994954c..84065d07e2b4d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
@@ -30,7 +30,8 @@ import org.apache.spark.sql.types._
     Examples:
       > SELECT _FUNC_(1 < 2, 'a', 'b');
        a
-  """)
+  """,
+  since = "1.0.0")
 // scalastyle:on line.size.limit
 case class If(predicate: Expression, trueValue: Expression, falseValue: Expression)
   extends ComplexTypeMergingExpression {
@@ -116,7 +117,8 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi
        2.0
       > SELECT CASE WHEN 1 < 0 THEN 1 WHEN 2 < 0 THEN 2.0 END;
        NULL
-  """)
+  """,
+  since = "1.0.1")
 // scalastyle:on line.size.limit
 case class CaseWhen(
     branches: Seq[(Expression, Expression)],
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 3d9612018aaf5..e889cfbec990f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -1565,13 +1565,13 @@ trait TruncInstant extends BinaryExpression with ImplicitCastInputTypes {
     _FUNC_(date, fmt) - Returns `date` with the time portion of the day truncated to the unit specified by the format model `fmt`.
   """,
   arguments = """
-     Arguments:
-       * date - date value or valid date string
-       * fmt - the format representing the unit to be truncated to
-           - "YEAR", "YYYY", "YY" - truncate to the first date of the year that the `date` falls in
-           - "QUARTER" - truncate to the first date of the quarter that the `date` falls in
-           - "MONTH", "MM", "MON" - truncate to the first date of the month that the `date` falls in
-           - "WEEK" - truncate to the Monday of the week that the `date` falls in
+    Arguments:
+      * date - date value or valid date string
+      * fmt - the format representing the unit to be truncated to
+          - "YEAR", "YYYY", "YY" - truncate to the first date of the year that the `date` falls in
+          - "QUARTER" - truncate to the first date of the quarter that the `date` falls in
+          - "MONTH", "MM", "MON" - truncate to the first date of the month that the `date` falls in
+          - "WEEK" - truncate to the Monday of the week that the `date` falls in
   """,
   examples = """
     Examples:
@@ -1619,19 +1619,19 @@ case class TruncDate(date: Expression, format: Expression)
     _FUNC_(fmt, ts) - Returns timestamp `ts` truncated to the unit specified by the format model `fmt`.
   """,
   arguments = """
-     Arguments:
-       * fmt - the format representing the unit to be truncated to
-           - "YEAR", "YYYY", "YY" - truncate to the first date of the year that the `ts` falls in, the time part will be zero out
-           - "QUARTER" - truncate to the first date of the quarter that the `ts` falls in, the time part will be zero out
-           - "MONTH", "MM", "MON" - truncate to the first date of the month that the `ts` falls in, the time part will be zero out
-           - "WEEK" - truncate to the Monday of the week that the `ts` falls in, the time part will be zero out
-           - "DAY", "DD" - zero out the time part
-           - "HOUR" - zero out the minute and second with fraction part
-           - "MINUTE"- zero out the second with fraction part
-           - "SECOND" -  zero out the second fraction part
-           - "MILLISECOND" - zero out the microseconds
-           - "MICROSECOND" - everything remains
-       * ts - datetime value or valid timestamp string
+    Arguments:
+      * fmt - the format representing the unit to be truncated to
+          - "YEAR", "YYYY", "YY" - truncate to the first date of the year that the `ts` falls in, the time part will be zero out
+          - "QUARTER" - truncate to the first date of the quarter that the `ts` falls in, the time part will be zero out
+          - "MONTH", "MM", "MON" - truncate to the first date of the month that the `ts` falls in, the time part will be zero out
+          - "WEEK" - truncate to the Monday of the week that the `ts` falls in, the time part will be zero out
+          - "DAY", "DD" - zero out the time part
+          - "HOUR" - zero out the minute and second with fraction part
+          - "MINUTE"- zero out the second with fraction part
+          - "SECOND" -  zero out the second fraction part
+          - "MILLISECOND" - zero out the microseconds
+          - "MICROSECOND" - everything remains
+      * ts - datetime value or valid timestamp string
   """,
   examples = """
     Examples:
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
index b0a23c62284d2..ad6e365f76fa9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
@@ -135,7 +135,8 @@ case class UserDefinedGenerator(
       > SELECT _FUNC_(2, 1, 2, 3);
        1	2
        3	NULL
-  """)
+  """,
+  since = "2.0.0")
 // scalastyle:on line.size.limit line.contains.tab
 case class Stack(children: Seq[Expression]) extends Generator {
 
@@ -360,7 +361,8 @@ abstract class ExplodeBase extends UnaryExpression with CollectionGenerator with
       > SELECT _FUNC_(array(10, 20));
        10
        20
-  """)
+  """,
+  since = "1.0.0")
 // scalastyle:on line.size.limit
 case class Explode(child: Expression) extends ExplodeBase {
   override val position: Boolean = false
@@ -383,7 +385,8 @@ case class Explode(child: Expression) extends ExplodeBase {
       > SELECT _FUNC_(array(10,20));
        0	10
        1	20
-  """)
+  """,
+  since = "2.0.0")
 // scalastyle:on line.size.limit line.contains.tab
 case class PosExplode(child: Expression) extends ExplodeBase {
   override val position = true
@@ -400,7 +403,8 @@ case class PosExplode(child: Expression) extends ExplodeBase {
       > SELECT _FUNC_(array(struct(1, 'a'), struct(2, 'b')));
        1	a
        2	b
-  """)
+  """,
+  since = "2.0.0")
 // scalastyle:on line.size.limit line.contains.tab
 case class Inline(child: Expression) extends UnaryExpression with CollectionGenerator {
   override val inline: Boolean = true
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
index 5e21b58f070ba..64360827fb794 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
@@ -52,7 +52,8 @@ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
     Examples:
       > SELECT _FUNC_('Spark');
        8cde774d6f7333752ed72cacddb05126
-  """)
+  """,
+  since = "1.5.0")
 case class Md5(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
@@ -87,7 +88,8 @@ case class Md5(child: Expression)
     Examples:
       > SELECT _FUNC_('Spark', 256);
        529bc3b07127ecb7e53a4dcf1991d9152c24537d919178022b2c42657f79a26b
-  """)
+  """,
+  since = "1.5.0")
 // scalastyle:on line.size.limit
 case class Sha2(left: Expression, right: Expression)
   extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant with Serializable {
@@ -160,7 +162,8 @@ case class Sha2(left: Expression, right: Expression)
     Examples:
       > SELECT _FUNC_('Spark');
        85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c
-  """)
+  """,
+  since = "1.5.0")
 case class Sha1(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
@@ -188,7 +191,8 @@ case class Sha1(child: Expression)
     Examples:
       > SELECT _FUNC_('Spark');
        1557323817
-  """)
+  """,
+  since = "1.5.0")
 case class Crc32(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
@@ -575,7 +579,8 @@ abstract class InterpretedHashFunction {
     Examples:
       > SELECT _FUNC_('Spark', array(123), 2);
        -1321691492
-  """)
+  """,
+  since = "2.0.0")
 case class Murmur3Hash(children: Seq[Expression], seed: Int) extends HashExpression[Int] {
   def this(arguments: Seq[Expression]) = this(arguments, 42)
 
@@ -647,7 +652,8 @@ object XxHash64Function extends InterpretedHashFunction {
  * we can guarantee shuffle and bucketing have same data distribution
  */
 @ExpressionDescription(
-  usage = "_FUNC_(expr1, expr2, ...) - Returns a hash value of the arguments.")
+  usage = "_FUNC_(expr1, expr2, ...) - Returns a hash value of the arguments.",
+  since = "2.2.0")
 case class HiveHash(children: Seq[Expression]) extends HashExpression[Int] {
   override val seed = 0
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala
index 3b0141ad52cc7..e9426223092de 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala
@@ -24,9 +24,16 @@ import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.types.{DataType, LongType, StringType}
 import org.apache.spark.unsafe.types.UTF8String
 
-
+// scalastyle:off whitespace.end.of.line
 @ExpressionDescription(
-  usage = "_FUNC_() - Returns the name of the file being read, or empty string if not available.")
+  usage = "_FUNC_() - Returns the name of the file being read, or empty string if not available.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_();
+
+  """,
+  since = "1.5.0")
+// scalastyle:on whitespace.end.of.line
 case class InputFileName() extends LeafExpression with Nondeterministic {
 
   override def nullable: Boolean = false
@@ -51,7 +58,13 @@ case class InputFileName() extends LeafExpression with Nondeterministic {
 
 
 @ExpressionDescription(
-  usage = "_FUNC_() - Returns the start offset of the block being read, or -1 if not available.")
+  usage = "_FUNC_() - Returns the start offset of the block being read, or -1 if not available.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_();
+       -1
+  """,
+  since = "2.2.0")
 case class InputFileBlockStart() extends LeafExpression with Nondeterministic {
   override def nullable: Boolean = false
 
@@ -74,7 +87,13 @@ case class InputFileBlockStart() extends LeafExpression with Nondeterministic {
 
 
 @ExpressionDescription(
-  usage = "_FUNC_() - Returns the length of the block being read, or -1 if not available.")
+  usage = "_FUNC_() - Returns the length of the block being read, or -1 if not available.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_();
+       -1
+  """,
+  since = "2.2.0")
 case class InputFileBlockLength() extends LeafExpression with Nondeterministic {
   override def nullable: Boolean = false
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index f4568f860ac0e..ef02d2db97a3f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -119,7 +119,8 @@ private[this] object SharedFactory {
       > SELECT _FUNC_('{"a":"b"}', '$.a');
        b
   """,
-  group = "json_funcs")
+  group = "json_funcs",
+  since = "1.5.0")
 case class GetJsonObject(json: Expression, path: Expression)
   extends BinaryExpression with ExpectsInputTypes with CodegenFallback {
 
@@ -343,7 +344,8 @@ case class GetJsonObject(json: Expression, path: Expression)
       > SELECT _FUNC_('{"a":1, "b":2}', 'a', 'b');
        1	2
   """,
-  group = "json_funcs")
+  group = "json_funcs",
+  since = "1.6.0")
 // scalastyle:on line.size.limit line.contains.tab
 case class JsonTuple(children: Seq[Expression])
   extends Generator with CodegenFallback {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index 617ddcb69eab0..2458a4aaba650 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -61,7 +61,8 @@ case class PrintToStderr(child: Expression) extends UnaryExpression {
     Examples:
       > SELECT _FUNC_(0 < 1);
        NULL
-  """)
+  """,
+  since = "2.0.0")
 case class AssertTrue(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   override def nullable: Boolean = true
@@ -108,7 +109,8 @@ case class AssertTrue(child: Expression) extends UnaryExpression with ImplicitCa
     Examples:
       > SELECT _FUNC_();
        default
-  """)
+  """,
+  since = "1.6.0")
 case class CurrentDatabase() extends LeafExpression with Unevaluable {
   override def dataType: DataType = StringType
   override def foldable: Boolean = true
@@ -144,7 +146,8 @@ case class CurrentCatalog() extends LeafExpression with Unevaluable {
   """,
   note = """
     The function is non-deterministic.
-  """)
+  """,
+  since = "2.3.0")
 // scalastyle:on line.size.limit
 case class Uuid(randomSeed: Option[Long] = None) extends LeafExpression with Stateful
     with ExpressionWithRandomSeed {
@@ -185,6 +188,11 @@ case class Uuid(randomSeed: Option[Long] = None) extends LeafExpression with Sta
 // scalastyle:off line.size.limit
 @ExpressionDescription(
   usage = """_FUNC_() - Returns the Spark version. The string contains 2 fields, the first being a release version and the second being a git revision.""",
+  examples = """
+    Examples:
+      > SELECT _FUNC_();
+       3.1.0 a6d6ea3efedbad14d99c24143834cd4e2e52fb40
+  """,
   since = "3.0.0")
 // scalastyle:on line.size.limit
 case class SparkVersion() extends LeafExpression with CodegenFallback {
@@ -200,7 +208,7 @@ case class SparkVersion() extends LeafExpression with CodegenFallback {
 @ExpressionDescription(
   usage = """_FUNC_(expr) - Return DDL-formatted type string for the data type of the input.""",
   examples = """
-      Examples:
+    Examples:
       > SELECT _FUNC_(1);
        int
       > SELECT _FUNC_(array(1));
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 03066fb34cf27..ddc4d8c0d39b6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -250,7 +250,17 @@ trait PredicateHelper extends Logging {
 }
 
 @ExpressionDescription(
-  usage = "_FUNC_ expr - Logical not.")
+  usage = "_FUNC_ expr - Logical not.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_ true;
+       false
+      > SELECT _FUNC_ false;
+       true
+      > SELECT _FUNC_ NULL;
+       NULL
+  """,
+  since = "1.0.0")
 case class Not(child: Expression)
   extends UnaryExpression with Predicate with ImplicitCastInputTypes with NullIntolerant {
 
@@ -353,7 +363,8 @@ case class InSubquery(values: Seq[Expression], query: ListQuery)
        false
       > SELECT named_struct('a', 1, 'b', 2) _FUNC_(named_struct('a', 1, 'b', 2), named_struct('a', 1, 'b', 3));
        true
-  """)
+  """,
+  since = "1.0.0")
 // scalastyle:on line.size.limit
 case class In(value: Expression, list: Seq[Expression]) extends Predicate {
 
@@ -577,7 +588,19 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with
 }
 
 @ExpressionDescription(
-  usage = "expr1 _FUNC_ expr2 - Logical AND.")
+  usage = "expr1 _FUNC_ expr2 - Logical AND.",
+  examples = """
+    Examples:
+      > SELECT true _FUNC_ true;
+       true
+      > SELECT true _FUNC_ false;
+       false
+      > SELECT true _FUNC_ NULL;
+       NULL
+      > SELECT false _FUNC_ NULL;
+       false
+  """,
+  since = "1.0.0")
 case class And(left: Expression, right: Expression) extends BinaryOperator with Predicate {
 
   override def inputType: AbstractDataType = BooleanType
@@ -647,7 +670,19 @@ case class And(left: Expression, right: Expression) extends BinaryOperator with
 }
 
 @ExpressionDescription(
-  usage = "expr1 _FUNC_ expr2 - Logical OR.")
+  usage = "expr1 _FUNC_ expr2 - Logical OR.",
+  examples = """
+    Examples:
+      > SELECT true _FUNC_ false;
+       true
+      > SELECT false _FUNC_ false;
+       false
+      > SELECT true _FUNC_ NULL;
+       true
+      > SELECT false _FUNC_ NULL;
+       NULL
+  """,
+  since = "1.0.0")
 case class Or(left: Expression, right: Expression) extends BinaryOperator with Predicate {
 
   override def inputType: AbstractDataType = BooleanType
@@ -780,7 +815,8 @@ object Equality {
        NULL
       > SELECT NULL _FUNC_ NULL;
        NULL
-  """)
+  """,
+  since = "1.0.0")
 case class EqualTo(left: Expression, right: Expression)
     extends BinaryComparison with NullIntolerant {
 
@@ -823,7 +859,8 @@ case class EqualTo(left: Expression, right: Expression)
        false
       > SELECT NULL _FUNC_ NULL;
        true
-  """)
+  """,
+  since = "1.1.0")
 case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComparison {
 
   override def symbol: String = "<=>"
@@ -880,7 +917,8 @@ case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComp
        true
       > SELECT 1 _FUNC_ NULL;
        NULL
-  """)
+  """,
+  since = "1.0.0")
 case class LessThan(left: Expression, right: Expression)
     extends BinaryComparison with NullIntolerant {
 
@@ -910,7 +948,8 @@ case class LessThan(left: Expression, right: Expression)
        true
       > SELECT 1 _FUNC_ NULL;
        NULL
-  """)
+  """,
+  since = "1.0.0")
 case class LessThanOrEqual(left: Expression, right: Expression)
     extends BinaryComparison with NullIntolerant {
 
@@ -940,7 +979,8 @@ case class LessThanOrEqual(left: Expression, right: Expression)
        false
       > SELECT 1 _FUNC_ NULL;
        NULL
-  """)
+  """,
+  since = "1.0.0")
 case class GreaterThan(left: Expression, right: Expression)
     extends BinaryComparison with NullIntolerant {
 
@@ -970,7 +1010,8 @@ case class GreaterThan(left: Expression, right: Expression)
        false
       > SELECT 1 _FUNC_ NULL;
        NULL
-  """)
+  """,
+  since = "1.0.0")
 case class GreaterThanOrEqual(left: Expression, right: Expression)
     extends BinaryComparison with NullIntolerant {
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index 07a2b6fa96c12..1a35a52098f4d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -411,6 +411,7 @@ abstract class OffsetWindowFunction
  * integer value. The default offset is 1. When the value of `input` is null at the `offset`th row,
  * null is returned. If there is no such offset row, the `default` expression is evaluated.
  */
+// scalastyle:off line.size.limit line.contains.tab
 @ExpressionDescription(
   usage = """
     _FUNC_(input[, offset[, default]]) - Returns the value of `input` at the `offset`th row
@@ -426,8 +427,17 @@ abstract class OffsetWindowFunction
       * default - a string expression which is to use when the offset is larger than the window.
           The default value is null.
   """,
+  examples = """
+    Examples:
+      > SELECT a, b, _FUNC_(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b);
+       A1	1	1
+       A1	1	2
+       A1	2	NULL
+       A2	3	NULL
+  """,
   since = "2.0.0",
   group = "window_funcs")
+// scalastyle:on line.size.limit line.contains.tab
 case class Lead(input: Expression, offset: Expression, default: Expression)
     extends OffsetWindowFunction {
 
@@ -446,6 +456,7 @@ case class Lead(input: Expression, offset: Expression, default: Expression)
  * integer value. The default offset is 1. When the value of `input` is null at the `offset`th row,
  * null is returned. If there is no such offset row, the `default` expression is evaluated.
  */
+// scalastyle:off line.size.limit line.contains.tab
 @ExpressionDescription(
   usage = """
     _FUNC_(input[, offset[, default]]) - Returns the value of `input` at the `offset`th row
@@ -460,8 +471,17 @@ case class Lead(input: Expression, offset: Expression, default: Expression)
       * offset - an int expression which is rows to jump back in the partition.
       * default - a string expression which is to use when the offset row does not exist.
   """,
+  examples = """
+    Examples:
+      > SELECT a, b, _FUNC_(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b);
+       A1	1	NULL
+       A1	1	1
+       A1	2	1
+       A2	3	NULL
+  """,
   since = "2.0.0",
   group = "window_funcs")
+// scalastyle:on line.size.limit line.contains.tab
 case class Lag(input: Expression, offset: Expression, default: Expression)
     extends OffsetWindowFunction {
 
@@ -514,13 +534,23 @@ object SizeBasedWindowFunction {
  *
  * This documentation has been based upon similar documentation for the Hive and Presto projects.
  */
+// scalastyle:off line.size.limit line.contains.tab
 @ExpressionDescription(
   usage = """
     _FUNC_() - Assigns a unique, sequential number to each row, starting with one,
       according to the ordering of rows within the window partition.
   """,
+  examples = """
+    Examples:
+      > SELECT a, b, _FUNC_() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b);
+       A1	1	1
+       A1	1	2
+       A1	2	3
+       A2	3	1
+  """,
   since = "2.0.0",
   group = "window_funcs")
+// scalastyle:on line.size.limit line.contains.tab
 case class RowNumber() extends RowNumberLike {
   override val evaluateExpression = rowNumber
   override def prettyName: String = "row_number"
@@ -534,12 +564,22 @@ case class RowNumber() extends RowNumberLike {
  *
  * This documentation has been based upon similar documentation for the Hive and Presto projects.
  */
+// scalastyle:off line.size.limit line.contains.tab
 @ExpressionDescription(
   usage = """
     _FUNC_() - Computes the position of a value relative to all values in the partition.
   """,
+  examples = """
+    Examples:
+      > SELECT a, b, _FUNC_() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b);
+       A1	1	0.6666666666666666
+       A1	1	0.6666666666666666
+       A1	2	1.0
+       A2	3	1.0
+  """,
   since = "2.0.0",
   group = "window_funcs")
+// scalastyle:on line.size.limit line.contains.tab
 case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction {
   override def dataType: DataType = DoubleType
   // The frame for CUME_DIST is Range based instead of Row based, because CUME_DIST must
@@ -549,6 +589,8 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction {
   override def prettyName: String = "cume_dist"
 }
 
+// scalastyle:off line.size.limit line.contains.tab
+
 @ExpressionDescription(
   usage = """
     _FUNC_(input[, offset]) - Returns the value of `input` at the row that is the `offset`th row
@@ -557,6 +599,14 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction {
       there is no such an `offset`th row (e.g., when the offset is 10, size of the window frame
       is less than 10), null is returned.
   """,
+  examples = """
+    Examples:
+      > SELECT a, b, _FUNC_(b, 2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b);
+       A1	1	1
+       A1	1	1
+       A1	2	1
+       A2	3	NULL
+  """,
   arguments = """
     Arguments:
       * input - the target column or expression that the function operates on.
@@ -567,6 +617,7 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction {
   """,
   since = "3.1.0",
   group = "window_funcs")
+// scalastyle:on line.size.limit line.contains.tab
 case class NthValue(input: Expression, offsetExpr: Expression, ignoreNulls: Boolean)
     extends AggregateWindowFunction with ImplicitCastInputTypes {
 
@@ -642,6 +693,7 @@ case class NthValue(input: Expression, offsetExpr: Expression, ignoreNulls: Bool
  *
  * This documentation has been based upon similar documentation for the Hive and Presto projects.
  */
+// scalastyle:off line.size.limit line.contains.tab
 @ExpressionDescription(
   usage = """
     _FUNC_(n) - Divides the rows for each window partition into `n` buckets ranging
@@ -652,8 +704,17 @@ case class NthValue(input: Expression, offsetExpr: Expression, ignoreNulls: Bool
       * buckets - an int expression which is number of buckets to divide the rows in.
           Default value is 1.
   """,
+  examples = """
+    Examples:
+      > SELECT a, b, _FUNC_(2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b);
+       A1	1	1
+       A1	1	1
+       A1	2	2
+       A2	3	1
+  """,
   since = "2.0.0",
   group = "window_funcs")
+// scalastyle:on line.size.limit line.contains.tab
 case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindowFunction {
   def this() = this(Literal(1))
 
@@ -767,6 +828,7 @@ abstract class RankLike extends AggregateWindowFunction {
  *
  * This documentation has been based upon similar documentation for the Hive and Presto projects.
  */
+// scalastyle:off line.size.limit line.contains.tab
 @ExpressionDescription(
   usage = """
     _FUNC_() - Computes the rank of a value in a group of values. The result is one plus the number
@@ -779,8 +841,17 @@ abstract class RankLike extends AggregateWindowFunction {
           trigger a change in rank. This is an internal parameter and will be assigned by the
           Analyser.
   """,
+  examples = """
+    Examples:
+      > SELECT a, b, _FUNC_(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b);
+       A1	1	1
+       A1	1	1
+       A1	2	3
+       A2	3	1
+  """,
   since = "2.0.0",
   group = "window_funcs")
+// scalastyle:on line.size.limit line.contains.tab
 case class Rank(children: Seq[Expression]) extends RankLike {
   def this() = this(Nil)
   override def withOrder(order: Seq[Expression]): Rank = Rank(order)
@@ -793,6 +864,7 @@ case class Rank(children: Seq[Expression]) extends RankLike {
  *
  * This documentation has been based upon similar documentation for the Hive and Presto projects.
  */
+// scalastyle:off line.size.limit line.contains.tab
 @ExpressionDescription(
   usage = """
     _FUNC_() - Computes the rank of a value in a group of values. The result is one plus the
@@ -805,8 +877,17 @@ case class Rank(children: Seq[Expression]) extends RankLike {
           trigger a change in rank. This is an internal parameter and will be assigned by the
           Analyser.
   """,
+  examples = """
+    Examples:
+      > SELECT a, b, _FUNC_(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b);
+       A1	1	1
+       A1	1	1
+       A1	2	2
+       A2	3	1
+  """,
   since = "2.0.0",
   group = "window_funcs")
+// scalastyle:on line.size.limit line.contains.tab
 case class DenseRank(children: Seq[Expression]) extends RankLike {
   def this() = this(Nil)
   override def withOrder(order: Seq[Expression]): DenseRank = DenseRank(order)
@@ -827,6 +908,7 @@ case class DenseRank(children: Seq[Expression]) extends RankLike {
  *
  * This documentation has been based upon similar documentation for the Hive and Presto projects.
  */
+// scalastyle:off line.size.limit line.contains.tab
 @ExpressionDescription(
   usage = """
     _FUNC_() - Computes the percentage ranking of a value in a group of values.
@@ -837,8 +919,17 @@ case class DenseRank(children: Seq[Expression]) extends RankLike {
           trigger a change in rank. This is an internal parameter and will be assigned by the
           Analyser.
   """,
+  examples = """
+    Examples:
+      > SELECT a, b, _FUNC_(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b);
+       A1	1	0.0
+       A1	1	0.0
+       A1	2	1.0
+       A2	3	0.0
+  """,
   since = "2.0.0",
   group = "window_funcs")
+// scalastyle:on line.size.limit line.contains.tab
 case class PercentRank(children: Seq[Expression]) extends RankLike with SizeBasedWindowFunction {
   def this() = this(Nil)
   override def withOrder(order: Seq[Expression]): PercentRank = PercentRank(order)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
index e08a10ecac71c..5f10667c55d79 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
@@ -63,7 +63,8 @@ abstract class XPathExtract
     Examples:
       > SELECT _FUNC_('<a><b>1</b></a>','a/b');
        true
-  """)
+  """,
+  since = "2.0.0")
 // scalastyle:on line.size.limit
 case class XPathBoolean(xml: Expression, path: Expression) extends XPathExtract {
 
@@ -82,7 +83,8 @@ case class XPathBoolean(xml: Expression, path: Expression) extends XPathExtract
     Examples:
       > SELECT _FUNC_('<a><b>1</b><b>2</b></a>', 'sum(a/b)');
        3
-  """)
+  """,
+  since = "2.0.0")
 // scalastyle:on line.size.limit
 case class XPathShort(xml: Expression, path: Expression) extends XPathExtract {
   override def prettyName: String = "xpath_short"
@@ -101,7 +103,8 @@ case class XPathShort(xml: Expression, path: Expression) extends XPathExtract {
     Examples:
       > SELECT _FUNC_('<a><b>1</b><b>2</b></a>', 'sum(a/b)');
        3
-  """)
+  """,
+  since = "2.0.0")
 // scalastyle:on line.size.limit
 case class XPathInt(xml: Expression, path: Expression) extends XPathExtract {
   override def prettyName: String = "xpath_int"
@@ -120,7 +123,8 @@ case class XPathInt(xml: Expression, path: Expression) extends XPathExtract {
     Examples:
       > SELECT _FUNC_('<a><b>1</b><b>2</b></a>', 'sum(a/b)');
        3
-  """)
+  """,
+  since = "2.0.0")
 // scalastyle:on line.size.limit
 case class XPathLong(xml: Expression, path: Expression) extends XPathExtract {
   override def prettyName: String = "xpath_long"
@@ -139,7 +143,8 @@ case class XPathLong(xml: Expression, path: Expression) extends XPathExtract {
     Examples:
       > SELECT _FUNC_('<a><b>1</b><b>2</b></a>', 'sum(a/b)');
        3.0
-  """)
+  """,
+  since = "2.0.0")
 // scalastyle:on line.size.limit
 case class XPathFloat(xml: Expression, path: Expression) extends XPathExtract {
   override def prettyName: String = "xpath_float"
@@ -158,7 +163,8 @@ case class XPathFloat(xml: Expression, path: Expression) extends XPathExtract {
     Examples:
       > SELECT _FUNC_('<a><b>1</b><b>2</b></a>', 'sum(a/b)');
        3.0
-  """)
+  """,
+  since = "2.0.0")
 // scalastyle:on line.size.limit
 case class XPathDouble(xml: Expression, path: Expression) extends XPathExtract {
   override def prettyName: String =
@@ -178,7 +184,8 @@ case class XPathDouble(xml: Expression, path: Expression) extends XPathExtract {
     Examples:
       > SELECT _FUNC_('<a><b>b</b><c>cc</c></a>','a/c');
        cc
-  """)
+  """,
+  since = "2.0.0")
 // scalastyle:on line.size.limit
 case class XPathString(xml: Expression, path: Expression) extends XPathExtract {
   override def prettyName: String = "xpath_string"
@@ -197,7 +204,8 @@ case class XPathString(xml: Expression, path: Expression) extends XPathExtract {
     Examples:
       > SELECT _FUNC_('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>','a/b/text()');
        ["b1","b2","b3"]
-  """)
+  """,
+  since = "2.0.0")
 // scalastyle:on line.size.limit
 case class XPathList(xml: Expression, path: Expression) extends XPathExtract {
   override def prettyName: String = "xpath"
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index c9a2298c19a7f..742a2ffee83f7 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -1,8 +1,8 @@
 <!-- Automatically generated by ExpressionsSchemaSuite -->
 ## Summary
   - Number of queries: 340
-  - Number of expressions that missing example: 35
-  - Expressions missing examples: and,bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint,struct,cume_dist,dense_rank,input_file_block_length,input_file_block_start,input_file_name,lag,lead,monotonically_increasing_id,ntile,!,not,nth_value,or,percent_rank,rank,row_number,spark_partition_id,version,window,positive,count_min_sketch
+  - Number of expressions that missing example: 13
+  - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint,window
 ## Schema of Built-in Functions
 | Class name | Function name or alias | Query example | Output schema |
 | ---------- | ---------------------- | ------------- | ------------- |
@@ -11,7 +11,7 @@
 | org.apache.spark.sql.catalyst.expressions.Acosh | acosh | SELECT acosh(1) | struct<ACOSH(CAST(1 AS DOUBLE)):double> |
 | org.apache.spark.sql.catalyst.expressions.Add | + | SELECT 1 + 2 | struct<(1 + 2):int> |
 | org.apache.spark.sql.catalyst.expressions.AddMonths | add_months | SELECT add_months('2016-08-31', 1) | struct<add_months(CAST(2016-08-31 AS DATE), 1):date> |
-| org.apache.spark.sql.catalyst.expressions.And | and | N/A | N/A |
+| org.apache.spark.sql.catalyst.expressions.And | and | SELECT true and true | struct<(true AND true):boolean> |
 | org.apache.spark.sql.catalyst.expressions.ArrayAggregate | aggregate | SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x) | struct<aggregate(array(1, 2, 3), 0, lambdafunction((namedlambdavariable() + namedlambdavariable()), namedlambdavariable(), namedlambdavariable()), lambdafunction(namedlambdavariable(), namedlambdavariable())):int> |
 | org.apache.spark.sql.catalyst.expressions.ArrayContains | array_contains | SELECT array_contains(array(1, 2, 3), 2) | struct<array_contains(array(1, 2, 3), 2):boolean> |
 | org.apache.spark.sql.catalyst.expressions.ArrayDistinct | array_distinct | SELECT array_distinct(array(1, 2, 3, null, 3)) | struct<array_distinct(array(1, 2, 3, CAST(NULL AS INT), 3)):array<int>> |
@@ -79,10 +79,10 @@
 | org.apache.spark.sql.catalyst.expressions.CreateArray | array | SELECT array(1, 2, 3) | struct<array(1, 2, 3):array<int>> |
 | org.apache.spark.sql.catalyst.expressions.CreateMap | map | SELECT map(1.0, '2', 3.0, '4') | struct<map(1.0, 2, 3.0, 4):map<decimal(2,1),string>> |
 | org.apache.spark.sql.catalyst.expressions.CreateNamedStruct | named_struct | SELECT named_struct("a", 1, "b", 2, "c", 3) | struct<named_struct(a, 1, b, 2, c, 3):struct<a:int,b:int,c:int>> |
-| org.apache.spark.sql.catalyst.expressions.CreateNamedStruct | struct | N/A | N/A |
+| org.apache.spark.sql.catalyst.expressions.CreateNamedStruct | struct | SELECT struct(1, 2, 3) | struct<struct(1, 2, 3):struct<col1:int,col2:int,col3:int>> |
 | org.apache.spark.sql.catalyst.expressions.CsvToStructs | from_csv | SELECT from_csv('1, 0.8', 'a INT, b DOUBLE') | struct<from_csv(1, 0.8):struct<a:int,b:double>> |
 | org.apache.spark.sql.catalyst.expressions.Cube | cube | SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name, age) | struct<name:string,age:int,count(1):bigint> |
-| org.apache.spark.sql.catalyst.expressions.CumeDist | cume_dist | N/A | N/A |
+| org.apache.spark.sql.catalyst.expressions.CumeDist | cume_dist | SELECT a, b, cume_dist() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,cume_dist() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double> |
 | org.apache.spark.sql.catalyst.expressions.CurrentCatalog | current_catalog | SELECT current_catalog() | struct<current_catalog():string> |
 | org.apache.spark.sql.catalyst.expressions.CurrentDatabase | current_database | SELECT current_database() | struct<current_database():string> |
 | org.apache.spark.sql.catalyst.expressions.CurrentDate | current_date | SELECT current_date() | struct<current_date():date> |
@@ -97,7 +97,7 @@
 | org.apache.spark.sql.catalyst.expressions.DayOfWeek | dayofweek | SELECT dayofweek('2009-07-30') | struct<dayofweek(CAST(2009-07-30 AS DATE)):int> |
 | org.apache.spark.sql.catalyst.expressions.DayOfYear | dayofyear | SELECT dayofyear('2016-04-09') | struct<dayofyear(CAST(2016-04-09 AS DATE)):int> |
 | org.apache.spark.sql.catalyst.expressions.Decode | decode | SELECT decode(encode('abc', 'utf-8'), 'utf-8') | struct<decode(encode(abc, utf-8), utf-8):string> |
-| org.apache.spark.sql.catalyst.expressions.DenseRank | dense_rank | N/A | N/A |
+| org.apache.spark.sql.catalyst.expressions.DenseRank | dense_rank | SELECT a, b, dense_rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,DENSE_RANK() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int> |
 | org.apache.spark.sql.catalyst.expressions.Divide | / | SELECT 3 / 2 | struct<(CAST(3 AS DOUBLE) / CAST(2 AS DOUBLE)):double> |
 | org.apache.spark.sql.catalyst.expressions.ElementAt | element_at | SELECT element_at(array(1, 2, 3), 2) | struct<element_at(array(1, 2, 3), 2):int> |
 | org.apache.spark.sql.catalyst.expressions.Elt | elt | SELECT elt(1, 'scala', 'java') | struct<elt(1, scala, java):string> |
@@ -135,9 +135,9 @@
 | org.apache.spark.sql.catalyst.expressions.InitCap | initcap | SELECT initcap('sPark sql') | struct<initcap(sPark sql):string> |
 | org.apache.spark.sql.catalyst.expressions.Inline | inline | SELECT inline(array(struct(1, 'a'), struct(2, 'b'))) | struct<col1:int,col2:string> |
 | org.apache.spark.sql.catalyst.expressions.Inline | inline_outer | SELECT inline_outer(array(struct(1, 'a'), struct(2, 'b'))) | struct<col1:int,col2:string> |
-| org.apache.spark.sql.catalyst.expressions.InputFileBlockLength | input_file_block_length | N/A | N/A |
-| org.apache.spark.sql.catalyst.expressions.InputFileBlockStart | input_file_block_start | N/A | N/A |
-| org.apache.spark.sql.catalyst.expressions.InputFileName | input_file_name | N/A | N/A |
+| org.apache.spark.sql.catalyst.expressions.InputFileBlockLength | input_file_block_length | SELECT input_file_block_length() | struct<input_file_block_length():bigint> |
+| org.apache.spark.sql.catalyst.expressions.InputFileBlockStart | input_file_block_start | SELECT input_file_block_start() | struct<input_file_block_start():bigint> |
+| org.apache.spark.sql.catalyst.expressions.InputFileName | input_file_name | SELECT input_file_name() | struct<input_file_name():string> |
 | org.apache.spark.sql.catalyst.expressions.IntegralDivide | div | SELECT 3 div 2 | struct<(CAST(3 AS BIGINT) div CAST(2 AS BIGINT)):bigint> |
 | org.apache.spark.sql.catalyst.expressions.IsNaN | isnan | SELECT isnan(cast('NaN' as double)) | struct<isnan(CAST(NaN AS DOUBLE)):boolean> |
 | org.apache.spark.sql.catalyst.expressions.IsNotNull | isnotnull | SELECT isnotnull(1) | struct<(1 IS NOT NULL):boolean> |
@@ -145,9 +145,9 @@
 | org.apache.spark.sql.catalyst.expressions.JsonObjectKeys | json_object_keys | SELECT json_object_keys('{}') | struct<json_object_keys({}):array<string>> |
 | org.apache.spark.sql.catalyst.expressions.JsonToStructs | from_json | SELECT from_json('{"a":1, "b":0.8}', 'a INT, b DOUBLE') | struct<from_json({"a":1, "b":0.8}):struct<a:int,b:double>> |
 | org.apache.spark.sql.catalyst.expressions.JsonTuple | json_tuple | SELECT json_tuple('{"a":1, "b":2}', 'a', 'b') | struct<c0:string,c1:string> |
-| org.apache.spark.sql.catalyst.expressions.Lag | lag | N/A | N/A |
+| org.apache.spark.sql.catalyst.expressions.Lag | lag | SELECT a, b, lag(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,lag(b, 1, NULL) OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN -1 FOLLOWING AND -1 FOLLOWING):int> |
 | org.apache.spark.sql.catalyst.expressions.LastDay | last_day | SELECT last_day('2009-01-12') | struct<last_day(CAST(2009-01-12 AS DATE)):date> |
-| org.apache.spark.sql.catalyst.expressions.Lead | lead | N/A | N/A |
+| org.apache.spark.sql.catalyst.expressions.Lead | lead | SELECT a, b, lead(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,lead(b, 1, NULL) OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN 1 FOLLOWING AND 1 FOLLOWING):int> |
 | org.apache.spark.sql.catalyst.expressions.Least | least | SELECT least(10, 9, 2, 4, 3) | struct<least(10, 9, 2, 4, 3):int> |
 | org.apache.spark.sql.catalyst.expressions.Left | left | SELECT left('Spark SQL', 3) | struct<left(Spark SQL, 3):string> |
 | org.apache.spark.sql.catalyst.expressions.Length | char_length | SELECT char_length('Spark SQL ') | struct<char_length(Spark SQL ):int> |
@@ -180,28 +180,28 @@
 | org.apache.spark.sql.catalyst.expressions.MicrosToTimestamp | timestamp_micros | SELECT timestamp_micros(1230219000123123) | struct<timestamp_micros(1230219000123123):timestamp> |
 | org.apache.spark.sql.catalyst.expressions.MillisToTimestamp | timestamp_millis | SELECT timestamp_millis(1230219000123) | struct<timestamp_millis(1230219000123):timestamp> |
 | org.apache.spark.sql.catalyst.expressions.Minute | minute | SELECT minute('2009-07-30 12:58:59') | struct<minute(CAST(2009-07-30 12:58:59 AS TIMESTAMP)):int> |
-| org.apache.spark.sql.catalyst.expressions.MonotonicallyIncreasingID | monotonically_increasing_id | N/A | N/A |
+| org.apache.spark.sql.catalyst.expressions.MonotonicallyIncreasingID | monotonically_increasing_id | SELECT monotonically_increasing_id() | struct<monotonically_increasing_id():bigint> |
 | org.apache.spark.sql.catalyst.expressions.Month | month | SELECT month('2016-07-30') | struct<month(CAST(2016-07-30 AS DATE)):int> |
 | org.apache.spark.sql.catalyst.expressions.MonthsBetween | months_between | SELECT months_between('1997-02-28 10:30:00', '1996-10-30') | struct<months_between(CAST(1997-02-28 10:30:00 AS TIMESTAMP), CAST(1996-10-30 AS TIMESTAMP), true):double> |
 | org.apache.spark.sql.catalyst.expressions.Multiply | * | SELECT 2 * 3 | struct<(2 * 3):int> |
 | org.apache.spark.sql.catalyst.expressions.Murmur3Hash | hash | SELECT hash('Spark', array(123), 2) | struct<hash(Spark, array(123), 2):int> |
-| org.apache.spark.sql.catalyst.expressions.NTile | ntile | N/A | N/A |
+| org.apache.spark.sql.catalyst.expressions.NTile | ntile | SELECT a, b, ntile(2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,ntile(2) OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int> |
 | org.apache.spark.sql.catalyst.expressions.NaNvl | nanvl | SELECT nanvl(cast('NaN' as double), 123) | struct<nanvl(CAST(NaN AS DOUBLE), CAST(123 AS DOUBLE)):double> |
 | org.apache.spark.sql.catalyst.expressions.NextDay | next_day | SELECT next_day('2015-01-14', 'TU') | struct<next_day(CAST(2015-01-14 AS DATE), TU):date> |
-| org.apache.spark.sql.catalyst.expressions.Not | ! | N/A | N/A |
-| org.apache.spark.sql.catalyst.expressions.Not | not | N/A | N/A |
+| org.apache.spark.sql.catalyst.expressions.Not | ! | SELECT ! true | struct<(NOT true):boolean> |
+| org.apache.spark.sql.catalyst.expressions.Not | not | SELECT not true | struct<(NOT true):boolean> |
 | org.apache.spark.sql.catalyst.expressions.Now | now | SELECT now() | struct<now():timestamp> |
-| org.apache.spark.sql.catalyst.expressions.NthValue | nth_value | N/A | N/A |
+| org.apache.spark.sql.catalyst.expressions.NthValue | nth_value | SELECT a, b, nth_value(b, 2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,nthvalue(b, 2) OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int> |
 | org.apache.spark.sql.catalyst.expressions.NullIf | nullif | SELECT nullif(2, 2) | struct<nullif(2, 2):int> |
 | org.apache.spark.sql.catalyst.expressions.Nvl | nvl | SELECT nvl(NULL, array('2')) | struct<nvl(NULL, array(2)):array<string>> |
 | org.apache.spark.sql.catalyst.expressions.Nvl2 | nvl2 | SELECT nvl2(NULL, 2, 1) | struct<nvl2(NULL, 2, 1):int> |
 | org.apache.spark.sql.catalyst.expressions.OctetLength | octet_length | SELECT octet_length('Spark SQL') | struct<octet_length(Spark SQL):int> |
-| org.apache.spark.sql.catalyst.expressions.Or | or | N/A | N/A |
+| org.apache.spark.sql.catalyst.expressions.Or | or | SELECT true or false | struct<(true OR false):boolean> |
 | org.apache.spark.sql.catalyst.expressions.Overlay | overlay | SELECT overlay('Spark SQL' PLACING '_' FROM 6) | struct<overlay(Spark SQL, _, 6, -1):string> |
 | org.apache.spark.sql.catalyst.expressions.ParseToDate | to_date | SELECT to_date('2009-07-30 04:17:52') | struct<to_date(2009-07-30 04:17:52):date> |
 | org.apache.spark.sql.catalyst.expressions.ParseToTimestamp | to_timestamp | SELECT to_timestamp('2016-12-31 00:12:00') | struct<to_timestamp(2016-12-31 00:12:00):timestamp> |
 | org.apache.spark.sql.catalyst.expressions.ParseUrl | parse_url | SELECT parse_url('http://spark.apache.org/path?query=1', 'HOST') | struct<parse_url(http://spark.apache.org/path?query=1, HOST):string> |
-| org.apache.spark.sql.catalyst.expressions.PercentRank | percent_rank | N/A | N/A |
+| org.apache.spark.sql.catalyst.expressions.PercentRank | percent_rank | SELECT a, b, percent_rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,PERCENT_RANK() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double> |
 | org.apache.spark.sql.catalyst.expressions.Pi | pi | SELECT pi() | struct<PI():double> |
 | org.apache.spark.sql.catalyst.expressions.Pmod | pmod | SELECT pmod(10, 3) | struct<pmod(10, 3):int> |
 | org.apache.spark.sql.catalyst.expressions.PosExplode | posexplode | SELECT posexplode(array(10,20)) | struct<pos:int,col:int> |
@@ -213,7 +213,7 @@
 | org.apache.spark.sql.catalyst.expressions.Rand | rand | SELECT rand() | struct<rand():double> |
 | org.apache.spark.sql.catalyst.expressions.Rand | random | SELECT random() | struct<rand():double> |
 | org.apache.spark.sql.catalyst.expressions.Randn | randn | SELECT randn() | struct<randn():double> |
-| org.apache.spark.sql.catalyst.expressions.Rank | rank | N/A | N/A |
+| org.apache.spark.sql.catalyst.expressions.Rank | rank | SELECT a, b, rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,RANK() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int> |
 | org.apache.spark.sql.catalyst.expressions.RegExpExtract | regexp_extract | SELECT regexp_extract('100-200', '(\\d+)-(\\d+)', 1) | struct<regexp_extract(100-200, (\d+)-(\d+), 1):string> |
 | org.apache.spark.sql.catalyst.expressions.RegExpExtractAll | regexp_extract_all | SELECT regexp_extract_all('100-200, 300-400', '(\\d+)-(\\d+)', 1) | struct<regexp_extract_all(100-200, 300-400, (\d+)-(\d+), 1):array<string>> |
 | org.apache.spark.sql.catalyst.expressions.RegExpReplace | regexp_replace | SELECT regexp_replace('100-200', '(\\d+)', 'num') | struct<regexp_replace(100-200, (\d+), num):string> |
@@ -224,7 +224,7 @@
 | org.apache.spark.sql.catalyst.expressions.Rint | rint | SELECT rint(12.3456) | struct<rint(CAST(12.3456 AS DOUBLE)):double> |
 | org.apache.spark.sql.catalyst.expressions.Rollup | rollup | SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY rollup(name, age) | struct<name:string,age:int,count(1):bigint> |
 | org.apache.spark.sql.catalyst.expressions.Round | round | SELECT round(2.5, 0) | struct<round(2.5, 0):decimal(2,0)> |
-| org.apache.spark.sql.catalyst.expressions.RowNumber | row_number | N/A | N/A |
+| org.apache.spark.sql.catalyst.expressions.RowNumber | row_number | SELECT a, b, row_number() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,row_number() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int> |
 | org.apache.spark.sql.catalyst.expressions.SchemaOfCsv | schema_of_csv | SELECT schema_of_csv('1,abc') | struct<schema_of_csv(1,abc):string> |
 | org.apache.spark.sql.catalyst.expressions.SchemaOfJson | schema_of_json | SELECT schema_of_json('[{"col":0}]') | struct<schema_of_json([{"col":0}]):string> |
 | org.apache.spark.sql.catalyst.expressions.Second | second | SELECT second('2009-07-30 12:58:59') | struct<second(CAST(2009-07-30 12:58:59 AS TIMESTAMP)):int> |
@@ -247,8 +247,8 @@
 | org.apache.spark.sql.catalyst.expressions.Slice | slice | SELECT slice(array(1, 2, 3, 4), 2, 2) | struct<slice(array(1, 2, 3, 4), 2, 2):array<int>> |
 | org.apache.spark.sql.catalyst.expressions.SortArray | sort_array | SELECT sort_array(array('b', 'd', null, 'c', 'a'), true) | struct<sort_array(array(b, d, CAST(NULL AS STRING), c, a), true):array<string>> |
 | org.apache.spark.sql.catalyst.expressions.SoundEx | soundex | SELECT soundex('Miller') | struct<soundex(Miller):string> |
-| org.apache.spark.sql.catalyst.expressions.SparkPartitionID | spark_partition_id | N/A | N/A |
-| org.apache.spark.sql.catalyst.expressions.SparkVersion | version | N/A | N/A |
+| org.apache.spark.sql.catalyst.expressions.SparkPartitionID | spark_partition_id | SELECT spark_partition_id() | struct<SPARK_PARTITION_ID():int> |
+| org.apache.spark.sql.catalyst.expressions.SparkVersion | version | SELECT version() | struct<version():string> |
 | org.apache.spark.sql.catalyst.expressions.Sqrt | sqrt | SELECT sqrt(4) | struct<SQRT(CAST(4 AS DOUBLE)):double> |
 | org.apache.spark.sql.catalyst.expressions.Stack | stack | SELECT stack(2, 1, 2, 3) | struct<col0:int,col1:int> |
 | org.apache.spark.sql.catalyst.expressions.StringInstr | instr | SELECT instr('SparkSQL', 'SQL') | struct<instr(SparkSQL, SQL):int> |
@@ -285,7 +285,7 @@
 | org.apache.spark.sql.catalyst.expressions.TypeOf | typeof | SELECT typeof(1) | struct<typeof(1):string> |
 | org.apache.spark.sql.catalyst.expressions.UnBase64 | unbase64 | SELECT unbase64('U3BhcmsgU1FM') | struct<unbase64(U3BhcmsgU1FM):binary> |
 | org.apache.spark.sql.catalyst.expressions.UnaryMinus | negative | SELECT negative(1) | struct<negative(1):int> |
-| org.apache.spark.sql.catalyst.expressions.UnaryPositive | positive | N/A | N/A |
+| org.apache.spark.sql.catalyst.expressions.UnaryPositive | positive | SELECT positive(1) | struct<(+ 1):int> |
 | org.apache.spark.sql.catalyst.expressions.Unhex | unhex | SELECT decode(unhex('537061726B2053514C'), 'UTF-8') | struct<decode(unhex(537061726B2053514C), UTF-8):string> |
 | org.apache.spark.sql.catalyst.expressions.UnixTimestamp | unix_timestamp | SELECT unix_timestamp() | struct<unix_timestamp(current_timestamp(), yyyy-MM-dd HH:mm:ss):bigint> |
 | org.apache.spark.sql.catalyst.expressions.Upper | ucase | SELECT ucase('SparkSql') | struct<ucase(SparkSql):string> |
@@ -314,7 +314,7 @@
 | org.apache.spark.sql.catalyst.expressions.aggregate.Corr | corr | SELECT corr(c1, c2) FROM VALUES (3, 2), (3, 3), (6, 4) as tab(c1, c2) | struct<corr(CAST(c1 AS DOUBLE), CAST(c2 AS DOUBLE)):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.Count | count | SELECT count(*) FROM VALUES (NULL), (5), (5), (20) AS tab(col) | struct<count(1):bigint> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.CountIf | count_if | SELECT count_if(col % 2 = 0) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col) | struct<count_if(((col % 2) = 0)):bigint> |
-| org.apache.spark.sql.catalyst.expressions.aggregate.CountMinSketchAgg | count_min_sketch | N/A | N/A |
+| org.apache.spark.sql.catalyst.expressions.aggregate.CountMinSketchAgg | count_min_sketch | SELECT hex(count_min_sketch(col, 0.5d, 0.5d, 1)) FROM VALUES (1), (2), (1) AS tab(col) | struct<hex(count_min_sketch(col, 0.5, 0.5, 1)):string> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.CovPopulation | covar_pop | SELECT covar_pop(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) | struct<covar_pop(CAST(c1 AS DOUBLE), CAST(c2 AS DOUBLE)):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.CovSample | covar_samp | SELECT covar_samp(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) | struct<covar_samp(CAST(c1 AS DOUBLE), CAST(c2 AS DOUBLE)):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.First | first | SELECT first(col) FROM VALUES (10), (5), (20) AS tab(col) | struct<first(col):int> |
diff --git a/sql/core/src/test/resources/sql-tests/results/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/cast.sql.out
index d4872ca03199b..42d12b80be989 100644
--- a/sql/core/src/test/resources/sql-tests/results/cast.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cast.sql.out
@@ -269,6 +269,8 @@ Class: org.apache.spark.sql.catalyst.expressions.Cast
 Extended Usage:
     No example/argument for boolean.
 
+    Since: 2.0.1
+
 Function: boolean
 Usage: boolean(expr) - Casts the value `expr` to the target data type `boolean`.
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
index d18aa9c549eb1..37ef04d9cb02f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
@@ -178,7 +178,15 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession {
           s"$numberOfQueries record in result file. Try regenerating the result files.")
 
       val numberOfMissingExamples = lines(3).split(":")(1).trim.toInt
-      val expectedMissingExamples = lines(4).split(":")(1).trim.split(",")
+      val expectedMissingExamples = {
+        val missingExamples = lines(4).split(":")(1).trim
+        // Splitting on a empty string would return [""]
+        if (missingExamples.nonEmpty) {
+          missingExamples.split(",")
+        } else {
+          Array.empty[String]
+        }
+      }
 
       assert(numberOfMissingExamples == expectedMissingExamples.size,
         s"expected missing examples size: ${expectedMissingExamples.size} not same as " +
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index adc87cd307191..348cf94dfc629 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -1863,6 +1863,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
             "Returns the concatenation of col1, col2, ..., colN.") :: Nil
     )
     // extended mode
+    // scalastyle:off whitespace.end.of.line
     checkAnswer(
       sql("DESCRIBE FUNCTION EXTENDED ^"),
       Row("Class: org.apache.spark.sql.catalyst.expressions.BitwiseXor") ::
@@ -1871,11 +1872,14 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
             |    Examples:
             |      > SELECT 3 ^ 5;
             |       6
-            |  """.stripMargin) ::
+            |  
+            |    Since: 1.4.0
+            |""".stripMargin) ::
         Row("Function: ^") ::
         Row("Usage: expr1 ^ expr2 - Returns the result of " +
           "bitwise exclusive OR of `expr1` and `expr2`.") :: Nil
     )
+    // scalastyle:on whitespace.end.of.line
   }
 
   test("create a data source table without schema") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
index 53f9757750735..f487a30c8dfa3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
@@ -105,11 +105,37 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
     }
   }
 
+  test("SPARK-32870: Default expressions in FunctionRegistry should have their " +
+    "usage, examples and since filled") {
+    val ignoreSet = Set(
+      // Explicitly inherits NonSQLExpression, and has no ExpressionDescription
+      "org.apache.spark.sql.catalyst.expressions.TimeWindow",
+      // Cast aliases do not need examples
+      "org.apache.spark.sql.catalyst.expressions.Cast")
+
+    spark.sessionState.functionRegistry.listFunction().foreach { funcId =>
+      val info = spark.sessionState.catalog.lookupFunctionInfo(funcId)
+      if (!ignoreSet.contains(info.getClassName)) {
+        withClue(s"Function '${info.getName}', Expression class '${info.getClassName}'") {
+          assert(info.getUsage.nonEmpty)
+          assert(info.getExamples.startsWith("\n    Examples:\n"))
+          assert(info.getExamples.endsWith("\n  "))
+          assert(info.getSince.matches("[0-9]+\\.[0-9]+\\.[0-9]+"))
+
+          if (info.getArguments.nonEmpty) {
+            assert(info.getArguments.startsWith("\n    Arguments:\n"))
+            assert(info.getArguments.endsWith("\n  "))
+          }
+        }
+      }
+    }
+  }
+
   test("check outputs of expression examples") {
     def unindentAndTrim(s: String): String = {
       s.replaceAll("\n\\s+", "\n").trim
     }
-    val beginSqlStmtRe = "  > ".r
+    val beginSqlStmtRe = "\n      > ".r
     val endSqlStmtRe = ";\n".r
     def checkExampleSyntax(example: String): Unit = {
       val beginStmtNum = beginSqlStmtRe.findAllIn(example).length
@@ -129,8 +155,15 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
       "org.apache.spark.sql.catalyst.expressions.Randn",
       "org.apache.spark.sql.catalyst.expressions.Shuffle",
       "org.apache.spark.sql.catalyst.expressions.Uuid",
+      // Other nondeterministic expressions
+      "org.apache.spark.sql.catalyst.expressions.MonotonicallyIncreasingID",
+      "org.apache.spark.sql.catalyst.expressions.SparkPartitionID",
+      "org.apache.spark.sql.catalyst.expressions.InputFileName",
+      "org.apache.spark.sql.catalyst.expressions.InputFileBlockStart",
+      "org.apache.spark.sql.catalyst.expressions.InputFileBlockLength",
       // The example calls methods that return unstable results.
-      "org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection")
+      "org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection",
+      "org.apache.spark.sql.catalyst.expressions.SparkVersion")
 
     val parFuncs = new ParVector(spark.sessionState.functionRegistry.listFunction().toVector)
     parFuncs.foreach { funcId =>

From 21b74797978e998504d795551dcc6b6a0e5801ac Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Wed, 23 Sep 2020 05:49:45 +0000
Subject: [PATCH 0091/1009] [SPARK-32959][SQL][TEST] Fix an invalid test in
 DataSourceV2SQLSuite

### What changes were proposed in this pull request?

This PR addresses two issues related to the `Relation: view text` test in `DataSourceV2SQLSuite`.

1. The test has the following block:
```scala
withView("view1") { v1: String =>
  sql(...)
}
```
Since `withView`'s signature is `withView(v: String*)(f: => Unit): Unit`, the `f` that will be executed is ` v1: String => sql(..)`, which is just defining the anonymous function, and _not_ executing it.

2. Once the test is fixed to run, it actually fails. The reason is that the v2 session catalog implementation used in tests does not correctly handle `V1Table` for views in `loadTable`. And this results in views resolved to `ResolvedTable` instead of `ResolvedView`, causing the test failure: https://github.com/apache/spark/blob/f1dc479d39a6f05df7155008d8ec26dff42bb06c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala#L1007-L1011

### Why are the changes needed?

Fixing a bug in test.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing test.

Closes #29811 from imback82/fix_minor_test.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/connector/DataSourceV2SQLSuite.scala      |  3 ++-
 .../sql/connector/TestV2SessionCatalogBase.scala  | 15 +++++++++------
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index ffc115e6b7600..0a4ece83717d5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -758,8 +758,9 @@ class DataSourceV2SQLSuite
 
   test("Relation: view text") {
     val t1 = "testcat.ns1.ns2.tbl"
+    val v1 = "view1"
     withTable(t1) {
-      withView("view1") { v1: String =>
+      withView(v1) {
         sql(s"CREATE TABLE $t1 USING foo AS SELECT id, data FROM source")
         sql(s"CREATE VIEW $v1 AS SELECT * from $t1")
         checkAnswer(sql(s"TABLE $v1"), spark.table("source"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
index 637cf2fd16515..4e741ff35c29f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
@@ -22,8 +22,8 @@ import java.util.concurrent.ConcurrentHashMap
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
-import org.apache.spark.sql.connector.catalog.{DelegatingCatalogExtension, Identifier, Table}
+import org.apache.spark.sql.catalyst.catalog.CatalogTableType
+import org.apache.spark.sql.connector.catalog.{DelegatingCatalogExtension, Identifier, Table, V1Table}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.types.StructType
 
@@ -47,10 +47,13 @@ private[connector] trait TestV2SessionCatalogBase[T <: Table] extends Delegating
       tables.get(ident)
     } else {
       // Table was created through the built-in catalog
-      val t = super.loadTable(ident)
-      val table = newTable(t.name(), t.schema(), t.partitioning(), t.properties())
-      tables.put(ident, table)
-      table
+      super.loadTable(ident) match {
+        case v1Table: V1Table if v1Table.v1Table.tableType == CatalogTableType.VIEW => v1Table
+        case t =>
+          val table = newTable(t.name(), t.schema(), t.partitioning(), t.properties())
+          tables.put(ident, table)
+          table
+      }
     }
   }
 

From 432afac07ea721e57b18e6108fe56fc878acef51 Mon Sep 17 00:00:00 2001
From: zhengruifeng <ruifengz@foxmail.com>
Date: Wed, 23 Sep 2020 15:54:56 +0800
Subject: [PATCH 0092/1009] [SPARK-32907][ML] adaptively blockify instances -
 revert blockify gmm

### What changes were proposed in this pull request?
revert blockify gmm

### Why are the changes needed?
WeichenXu123  and I thought we should use memory size instead of number of rows to blockify instance; then if a buffer's size is large and determined by number of rows, we should discard it.
In GMM, we found that the pre-allocated memory maybe too large and should be discarded:
```
transient private lazy val auxiliaryPDFMat = DenseMatrix.zeros(blockSize, numFeatures)
```
We had some offline discuss and thought it is better to revert blockify GMM.

### Does this PR introduce _any_ user-facing change?
blockSize added in master branch will be removed

### How was this patch tested?
existing testsuites

Closes #29782 from zhengruifeng/unblockify_gmm.

Authored-by: zhengruifeng <ruifengz@foxmail.com>
Signed-off-by: zhengruifeng <ruifengz@foxmail.com>
---
 .../distribution/MultivariateGaussian.scala   |  32 +--
 .../MultivariateGaussianSuite.scala           |  10 -
 .../spark/ml/clustering/GaussianMixture.scala | 235 +-----------------
 .../ml/clustering/GaussianMixtureSuite.scala  |  11 -
 python/pyspark/ml/clustering.py               |  26 +-
 5 files changed, 20 insertions(+), 294 deletions(-)

diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala b/mllib-local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala
index a08b8af0fcbfd..42746b5727029 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussian.scala
@@ -55,7 +55,7 @@ class MultivariateGaussian @Since("2.0.0") (
    */
   @transient private lazy val tuple = {
     val (rootSigmaInv, u) = calculateCovarianceConstants
-    val rootSigmaInvMat = Matrices.fromBreeze(rootSigmaInv).toDense
+    val rootSigmaInvMat = Matrices.fromBreeze(rootSigmaInv)
     val rootSigmaInvMulMu = rootSigmaInvMat.multiply(mean)
     (rootSigmaInvMat, u, rootSigmaInvMulMu)
   }
@@ -81,36 +81,6 @@ class MultivariateGaussian @Since("2.0.0") (
     u - 0.5 * BLAS.dot(v, v)
   }
 
-  private[ml] def pdf(X: Matrix): DenseVector = {
-    val mat = DenseMatrix.zeros(X.numRows, X.numCols)
-    pdf(X, mat)
-  }
-
-  private[ml] def pdf(X: Matrix, mat: DenseMatrix): DenseVector = {
-    require(!mat.isTransposed)
-
-    BLAS.gemm(1.0, X, rootSigmaInvMat.transpose, 0.0, mat)
-    val m = mat.numRows
-    val n = mat.numCols
-
-    val pdfVec = mat.multiply(rootSigmaInvMulMu)
-
-    val blas = BLAS.getBLAS(n)
-    val squared1 = blas.ddot(n, rootSigmaInvMulMu.values, 1, rootSigmaInvMulMu.values, 1)
-
-    val localU = u
-    var i = 0
-    while (i < m) {
-      val squared2 = blas.ddot(n, mat.values, i, m, mat.values, i, m)
-      val dot = pdfVec(i)
-      val squaredSum = squared1 + squared2 - dot - dot
-      pdfVec.values(i) = math.exp(localU - 0.5 * squaredSum)
-      i += 1
-    }
-
-    pdfVec
-  }
-
   /**
    * Calculate distribution dependent components used for the density function:
    *    pdf(x) = (2*pi)^(-k/2)^ * det(sigma)^(-1/2)^ * exp((-1/2) * (x-mu).t * inv(sigma) * (x-mu))
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussianSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussianSuite.scala
index 8652d317a85c4..f2ecff1cc58bd 100644
--- a/mllib-local/src/test/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussianSuite.scala
+++ b/mllib-local/src/test/scala/org/apache/spark/ml/stat/distribution/MultivariateGaussianSuite.scala
@@ -27,7 +27,6 @@ class MultivariateGaussianSuite extends SparkMLFunSuite {
   test("univariate") {
     val x1 = Vectors.dense(0.0)
     val x2 = Vectors.dense(1.5)
-    val mat = Matrices.fromVectors(Seq(x1, x2))
 
     val mu = Vectors.dense(0.0)
     val sigma1 = Matrices.dense(1, 1, Array(1.0))
@@ -36,7 +35,6 @@ class MultivariateGaussianSuite extends SparkMLFunSuite {
     assert(dist1.logpdf(x2) ~== -2.0439385332046727 absTol 1E-5)
     assert(dist1.pdf(x1) ~== 0.39894 absTol 1E-5)
     assert(dist1.pdf(x2) ~== 0.12952 absTol 1E-5)
-    assert(dist1.pdf(mat) ~== Vectors.dense(0.39894, 0.12952) absTol 1E-5)
 
     val sigma2 = Matrices.dense(1, 1, Array(4.0))
     val dist2 = new MultivariateGaussian(mu, sigma2)
@@ -44,13 +42,11 @@ class MultivariateGaussianSuite extends SparkMLFunSuite {
     assert(dist2.logpdf(x2) ~== -1.893335713764618 absTol 1E-5)
     assert(dist2.pdf(x1) ~== 0.19947 absTol 1E-5)
     assert(dist2.pdf(x2) ~== 0.15057 absTol 1E-5)
-    assert(dist2.pdf(mat) ~== Vectors.dense(0.19947, 0.15057) absTol 1E-5)
   }
 
   test("multivariate") {
     val x1 = Vectors.dense(0.0, 0.0)
     val x2 = Vectors.dense(1.0, 1.0)
-    val mat = Matrices.fromVectors(Seq(x1, x2))
 
     val mu = Vectors.dense(0.0, 0.0)
     val sigma1 = Matrices.dense(2, 2, Array(1.0, 0.0, 0.0, 1.0))
@@ -59,7 +55,6 @@ class MultivariateGaussianSuite extends SparkMLFunSuite {
     assert(dist1.logpdf(x2) ~== -2.8378770664093453 absTol 1E-5)
     assert(dist1.pdf(x1) ~== 0.15915 absTol 1E-5)
     assert(dist1.pdf(x2) ~== 0.05855 absTol 1E-5)
-    assert(dist1.pdf(mat) ~== Vectors.dense(0.15915, 0.05855) absTol 1E-5)
 
     val sigma2 = Matrices.dense(2, 2, Array(4.0, -1.0, -1.0, 2.0))
     val dist2 = new MultivariateGaussian(mu, sigma2)
@@ -67,25 +62,21 @@ class MultivariateGaussianSuite extends SparkMLFunSuite {
     assert(dist2.logpdf(x2) ~== -3.3822607123655732 absTol 1E-5)
     assert(dist2.pdf(x1) ~== 0.060155 absTol 1E-5)
     assert(dist2.pdf(x2) ~== 0.033971 absTol 1E-5)
-    assert(dist2.pdf(mat) ~== Vectors.dense(0.060155, 0.033971) absTol 1E-5)
   }
 
   test("multivariate degenerate") {
     val x1 = Vectors.dense(0.0, 0.0)
     val x2 = Vectors.dense(1.0, 1.0)
-    val mat = Matrices.fromVectors(Seq(x1, x2))
 
     val mu = Vectors.dense(0.0, 0.0)
     val sigma = Matrices.dense(2, 2, Array(1.0, 1.0, 1.0, 1.0))
     val dist = new MultivariateGaussian(mu, sigma)
     assert(dist.pdf(x1) ~== 0.11254 absTol 1E-5)
     assert(dist.pdf(x2) ~== 0.068259 absTol 1E-5)
-    assert(dist.pdf(mat) ~== Vectors.dense(0.11254, 0.068259) absTol 1E-5)
   }
 
   test("SPARK-11302") {
     val x = Vectors.dense(629, 640, 1.7188, 618.19)
-    val mat = Matrices.fromVectors(Seq(x))
     val mu = Vectors.dense(
       1055.3910505836575, 1070.489299610895, 1.39020554474708, 1040.5907503867697)
     val sigma = Matrices.dense(4, 4, Array(
@@ -96,6 +87,5 @@ class MultivariateGaussianSuite extends SparkMLFunSuite {
     val dist = new MultivariateGaussian(mu, sigma)
     // Agrees with R's dmvnorm: 7.154782e-05
     assert(dist.pdf(x) ~== 7.154782224045512E-5 absTol 1E-9)
-    assert(dist.pdf(mat) ~== Vectors.dense(7.154782224045512E-5) absTol 1E-5)
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
index 7df87d2c3336a..6568b36fb0e7c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
@@ -44,7 +44,7 @@ import org.apache.spark.storage.StorageLevel
  */
 private[clustering] trait GaussianMixtureParams extends Params with HasMaxIter with HasFeaturesCol
   with HasSeed with HasPredictionCol with HasWeightCol with HasProbabilityCol with HasTol
-  with HasAggregationDepth with HasBlockSize {
+  with HasAggregationDepth {
 
   /**
    * Number of independent Gaussians in the mixture model. Must be greater than 1. Default: 2.
@@ -59,7 +59,7 @@ private[clustering] trait GaussianMixtureParams extends Params with HasMaxIter w
   @Since("2.0.0")
   def getK: Int = $(k)
 
-  setDefault(k -> 2, maxIter -> 100, tol -> 0.01, blockSize -> 1)
+  setDefault(k -> 2, maxIter -> 100, tol -> 0.01)
 
   /**
    * Validates and transforms the input schema.
@@ -170,8 +170,7 @@ class GaussianMixtureModel private[ml] (
 
   @Since("3.0.0")
   def predictProbability(features: Vector): Vector = {
-    val probs = GaussianMixtureModel
-      .computeProbabilities(features, gaussians, weights)
+    val probs = GaussianMixtureModel.computeProbabilities(features, gaussians, weights)
     Vectors.dense(probs)
   }
 
@@ -263,10 +262,8 @@ object GaussianMixtureModel extends MLReadable[GaussianMixtureModel] {
       require(mus.length == sigmas.length, "Length of Mu and Sigma array must match")
       require(mus.length == weights.length, "Length of weight and Gaussian array must match")
 
-      val gaussians = mus.zip(sigmas).map {
-        case (mu, sigma) =>
-          new MultivariateGaussian(mu.asML, sigma.asML)
-      }
+      val gaussians = mus.zip(sigmas)
+        .map { case (mu, sigma) => new MultivariateGaussian(mu.asML, sigma.asML) }
       val model = new GaussianMixtureModel(metadata.uid, weights, gaussians)
 
       metadata.getAndSetParams(model)
@@ -372,24 +369,6 @@ class GaussianMixture @Since("2.0.0") (
   @Since("3.0.0")
   def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value)
 
-  /**
-   * Set block size for stacking input data in matrices.
-   * If blockSize == 1, then stacking will be skipped, and each vector is treated individually;
-   * If blockSize &gt; 1, then vectors will be stacked to blocks, and high-level BLAS routines
-   * will be used if possible (for example, GEMV instead of DOT, GEMM instead of GEMV).
-   * Recommended size is between 10 and 1000. An appropriate choice of the block size depends
-   * on the sparsity and dim of input datasets, the underlying BLAS implementation (for example,
-   * f2jBLAS, OpenBLAS, intel MKL) and its configuration (for example, number of threads).
-   * Note that existing BLAS implementations are mainly optimized for dense matrices, if the
-   * input dataset is sparse, stacking may bring no performance gain, the worse is possible
-   * performance regression.
-   * Default is 1.
-   *
-   * @group expertSetParam
-   */
-  @Since("3.1.0")
-  def setBlockSize(value: Int): this.type = set(blockSize, value)
-
   /**
    * Number of samples per cluster to use when initializing Gaussians.
    */
@@ -410,7 +389,7 @@ class GaussianMixture @Since("2.0.0") (
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
     instr.logParams(this, featuresCol, predictionCol, probabilityCol, weightCol, k, maxIter,
-      seed, tol, aggregationDepth, blockSize)
+      seed, tol, aggregationDepth)
     instr.logNumFeatures(numFeatures)
 
     val w = if (isDefined(weightCol) && $(weightCol).nonEmpty) {
@@ -423,25 +402,12 @@ class GaussianMixture @Since("2.0.0") (
       .as[(Vector, Double)].rdd
       .setName("training instances")
 
-    if ($(blockSize) == 1 && dataset.storageLevel == StorageLevel.NONE) {
-      instances.persist(StorageLevel.MEMORY_AND_DISK)
-    }
-
+    val handlePersistence = dataset.storageLevel == StorageLevel.NONE
+    if (handlePersistence) { instances.persist(StorageLevel.MEMORY_AND_DISK) }
     // TODO: SPARK-15785 Support users supplied initial GMM.
     val (weights, gaussians) = initRandom(instances, $(k), numFeatures)
-
-    val (logLikelihood, iteration) = if ($(blockSize) == 1) {
-      trainOnRows(instances, weights, gaussians, numFeatures, instr)
-    } else {
-      val sparsity = 1 - instances.map { case (v, _) => v.numNonzeros.toDouble / v.size }.mean()
-      instr.logNamedValue("sparsity", sparsity.toString)
-      if (sparsity > 0.5) {
-        logWarning(s"sparsity of input dataset is $sparsity, " +
-          s"which may hurt performance in high-level BLAS.")
-      }
-      trainOnBlocks(instances, weights, gaussians, numFeatures, instr)
-    }
-    if (instances.getStorageLevel != StorageLevel.NONE) instances.unpersist()
+    val (logLikelihood, iteration) = trainImpl(instances, weights, gaussians, numFeatures, instr)
+    if (handlePersistence) { instances.unpersist() }
 
     val gaussianDists = gaussians.map { case (mean, covVec) =>
       val cov = GaussianMixture.unpackUpperTriangularMatrix(numFeatures, covVec.values)
@@ -457,7 +423,7 @@ class GaussianMixture @Since("2.0.0") (
     model.setSummary(Some(summary))
   }
 
-  private def trainOnRows(
+  private def trainImpl(
       instances: RDD[(Vector, Double)],
       weights: Array[Double],
       gaussians: Array[(DenseVector, DenseVector)],
@@ -514,70 +480,6 @@ class GaussianMixture @Since("2.0.0") (
     (logLikelihood, iteration)
   }
 
-  private def trainOnBlocks(
-      instances: RDD[(Vector, Double)],
-      weights: Array[Double],
-      gaussians: Array[(DenseVector, DenseVector)],
-      numFeatures: Int,
-      instr: Instrumentation): (Double, Int) = {
-    val blocks = instances.mapPartitions { iter =>
-      iter.grouped($(blockSize))
-        .map { seq => (Matrices.fromVectors(seq.map(_._1)), seq.map(_._2).toArray) }
-    }.persist(StorageLevel.MEMORY_AND_DISK)
-     .setName(s"training dataset (blockSize=${$(blockSize)})")
-
-    val sc = instances.sparkContext
-    var logLikelihood = Double.MinValue
-    var logLikelihoodPrev = 0.0
-
-    var iteration = 0
-    while (iteration < $(maxIter) && math.abs(logLikelihood - logLikelihoodPrev) > $(tol)) {
-      val weightSumAccum = if (iteration == 0) sc.doubleAccumulator else null
-      val logLikelihoodAccum = sc.doubleAccumulator
-      val bcWeights = sc.broadcast(weights)
-      val bcGaussians = sc.broadcast(gaussians)
-
-      // aggregate the cluster contribution for all sample points,
-      // and then compute the new distributions
-      blocks.mapPartitions { iter =>
-        if (iter.nonEmpty) {
-          val agg = new BlockExpectationAggregator(numFeatures,
-            $(blockSize), bcWeights, bcGaussians)
-          while (iter.hasNext) { agg.add(iter.next) }
-          // sum of weights in this partition
-          val ws = agg.weights.sum
-          if (iteration == 0) weightSumAccum.add(ws)
-          logLikelihoodAccum.add(agg.logLikelihood)
-          agg.meanIter.zip(agg.covIter).zipWithIndex
-            .map { case ((mean, cov), i) => (i, (mean, cov, agg.weights(i), ws)) }
-        } else Iterator.empty
-      }.reduceByKey(GaussianMixture.mergeWeightsMeans).mapValues { case (mean, cov, w, ws) =>
-        // Create new distributions based on the partial assignments
-        // (often referred to as the "M" step in literature)
-        GaussianMixture.updateWeightsAndGaussians(mean, cov, w, ws)
-      }.collect().foreach { case (i, (weight, gaussian)) =>
-        weights(i) = weight
-        gaussians(i) = gaussian
-      }
-
-      bcWeights.destroy()
-      bcGaussians.destroy()
-
-      if (iteration == 0) {
-        instr.logNumExamples(weightSumAccum.count)
-        instr.logSumOfWeights(weightSumAccum.value)
-      }
-
-      logLikelihoodPrev = logLikelihood         // current becomes previous
-      logLikelihood = logLikelihoodAccum.value  // this is the freshly computed log-likelihood
-      instr.logNamedValue(s"logLikelihood@iter$iteration", logLikelihood)
-      iteration += 1
-    }
-    blocks.unpersist()
-
-    (logLikelihood, iteration)
-  }
-
   @Since("2.0.0")
   override def transformSchema(schema: StructType): StructType = {
     validateAndTransformSchema(schema)
@@ -792,121 +694,6 @@ private class ExpectationAggregator(
 }
 
 
-/**
- * BlockExpectationAggregator computes the partial expectation results.
- *
- * @param numFeatures The number of features.
- * @param bcWeights The broadcast weights for each Gaussian distribution in the mixture.
- * @param bcGaussians The broadcast array of Multivariate Gaussian (Normal) Distribution
- *                    in the mixture. Note only upper triangular part of the covariance
- *                    matrix of each distribution is stored as dense vector (column major)
- *                    in order to reduce shuffled data size.
- */
-private class BlockExpectationAggregator(
-    numFeatures: Int,
-    blockSize: Int,
-    bcWeights: Broadcast[Array[Double]],
-    bcGaussians: Broadcast[Array[(DenseVector, DenseVector)]]) extends Serializable {
-
-  private val k = bcWeights.value.length
-  private var totalCnt = 0L
-  private var newLogLikelihood = 0.0
-  private val covSize = numFeatures * (numFeatures + 1) / 2
-  private lazy val newWeights = Array.ofDim[Double](k)
-  @transient private lazy val newMeansMat = DenseMatrix.zeros(numFeatures, k)
-  @transient private lazy val newCovsMat = DenseMatrix.zeros(covSize, k)
-  @transient private lazy val auxiliaryProbMat = DenseMatrix.zeros(blockSize, k)
-  @transient private lazy val auxiliaryPDFMat = DenseMatrix.zeros(blockSize, numFeatures)
-  @transient private lazy val auxiliaryCovVec = Vectors.zeros(covSize).toDense
-
-  @transient private lazy val gaussians = {
-    bcGaussians.value.map { case (mean, covVec) =>
-      val cov = GaussianMixture.unpackUpperTriangularMatrix(numFeatures, covVec.values)
-      new MultivariateGaussian(mean, cov)
-    }
-  }
-
-  def count: Long = totalCnt
-
-  def logLikelihood: Double = newLogLikelihood
-
-  def weights: Array[Double] = newWeights
-
-  def meanIter: Iterator[DenseVector] = newMeansMat.colIter.map(_.toDense)
-
-  def covIter: Iterator[DenseVector] = newCovsMat.colIter.map(_.toDense)
-
-  /**
-   * Add a new training instance block to this BlockExpectationAggregator, update the weights,
-   * means and covariances for each distributions, and update the log likelihood.
-   *
-   * @param block The instance block of data point to be added.
-   * @return This BlockExpectationAggregator object.
-   */
-  def add(block: (Matrix, Array[Double])): this.type = {
-    val (matrix: Matrix, weights: Array[Double]) = block
-    require(matrix.isTransposed)
-    val size = matrix.numRows
-    require(weights.length == size)
-
-    val blas1 = BLAS.getBLAS(size)
-    val blas2 = BLAS.getBLAS(k)
-
-    val probMat = if (blockSize == size) auxiliaryProbMat else DenseMatrix.zeros(size, k)
-    require(!probMat.isTransposed)
-    java.util.Arrays.fill(probMat.values, EPSILON)
-
-    val pdfMat = if (blockSize == size) auxiliaryPDFMat else DenseMatrix.zeros(size, numFeatures)
-    var j = 0
-    while (j < k) {
-      val pdfVec = gaussians(j).pdf(matrix, pdfMat)
-      blas1.daxpy(size, bcWeights.value(j), pdfVec.values, 0, 1, probMat.values, j * size, 1)
-      j += 1
-    }
-
-    var i = 0
-    while (i < size) {
-      val weight = weights(i)
-      val probSum = blas2.dasum(k, probMat.values, i, size)
-      blas2.dscal(k, weight / probSum, probMat.values, i, size)
-      blas2.daxpy(k, 1.0, probMat.values, i, size, newWeights, 0, 1)
-      newLogLikelihood += math.log(probSum) * weight
-      i += 1
-    }
-
-    BLAS.gemm(1.0, matrix.transpose, probMat, 1.0, newMeansMat)
-
-    // compute the cov vector for each row vector
-    val covVec = auxiliaryCovVec
-    val covVecIter = matrix match {
-      case dm: DenseMatrix =>
-        Iterator.tabulate(size) { i =>
-          java.util.Arrays.fill(covVec.values, 0.0)
-          // when input block is dense, directly use nativeBLAS to avoid array copy
-          BLAS.nativeBLAS.dspr("U", numFeatures, 1.0, dm.values, i * numFeatures, 1,
-            covVec.values, 0)
-          covVec
-        }
-
-      case sm: SparseMatrix =>
-        sm.rowIter.map { vec =>
-          java.util.Arrays.fill(covVec.values, 0.0)
-          BLAS.spr(1.0, vec, covVec)
-          covVec
-        }
-    }
-
-    covVecIter.zipWithIndex.foreach { case (covVec, i) =>
-      BLAS.nativeBLAS.dger(covSize, k, 1.0, covVec.values, 0, 1,
-        probMat.values, i, size, newCovsMat.values, 0, covSize)
-    }
-
-    totalCnt += size
-
-    this
-  }
-}
-
 /**
  * Summary of GaussianMixture.
  *
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala
index d848d5a5ee452..b35f964c959bf 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala
@@ -285,17 +285,6 @@ class GaussianMixtureSuite extends MLTest with DefaultReadWriteTest {
     testClusteringModelSingleProbabilisticPrediction(model, model.predictProbability, dataset,
       model.getFeaturesCol, model.getProbabilityCol)
   }
-
-  test("GMM on blocks") {
-    Seq(dataset, sparseDataset, denseDataset, rDataset).foreach { dataset =>
-      val gmm = new GaussianMixture().setK(k).setMaxIter(20).setBlockSize(1).setSeed(seed)
-      val model = gmm.fit(dataset)
-      Seq(2, 4, 8, 16, 32).foreach { blockSize =>
-        val model2 = gmm.setBlockSize(blockSize).fit(dataset)
-        modelEquals(model, model2)
-      }
-    }
-  }
 }
 
 object GaussianMixtureSuite extends SparkFunSuite {
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index a304b15960236..dba3dcd670f7f 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -20,8 +20,8 @@
 
 from pyspark import since, keyword_only
 from pyspark.ml.param.shared import HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionCol, \
-    HasAggregationDepth, HasWeightCol, HasTol, HasProbabilityCol, HasBlockSize, \
-    HasDistanceMeasure, HasCheckpointInterval, Param, Params, TypeConverters
+    HasAggregationDepth, HasWeightCol, HasTol, HasProbabilityCol, HasDistanceMeasure, \
+    HasCheckpointInterval, Param, Params, TypeConverters
 from pyspark.ml.util import JavaMLWritable, JavaMLReadable, GeneralJavaMLWritable, \
     HasTrainingSummary, SparkContext
 from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams, JavaWrapper
@@ -101,8 +101,7 @@ def numIter(self):
 
 @inherit_doc
 class _GaussianMixtureParams(HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionCol,
-                             HasProbabilityCol, HasTol, HasAggregationDepth, HasWeightCol,
-                             HasBlockSize):
+                             HasProbabilityCol, HasTol, HasAggregationDepth, HasWeightCol):
     """
     Params for :py:class:`GaussianMixture` and :py:class:`GaussianMixtureModel`.
 
@@ -114,7 +113,7 @@ class _GaussianMixtureParams(HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionC
 
     def __init__(self, *args):
         super(_GaussianMixtureParams, self).__init__(*args)
-        self._setDefault(k=2, tol=0.01, maxIter=100, aggregationDepth=2, blockSize=1)
+        self._setDefault(k=2, tol=0.01, maxIter=100, aggregationDepth=2)
 
     @since("2.0.0")
     def getK(self):
@@ -251,8 +250,6 @@ class GaussianMixture(JavaEstimator, _GaussianMixtureParams, JavaMLWritable, Jav
     >>> gm.getMaxIter()
     30
     >>> model = gm.fit(df)
-    >>> model.getBlockSize()
-    1
     >>> model.getAggregationDepth()
     2
     >>> model.getFeaturesCol()
@@ -339,11 +336,11 @@ class GaussianMixture(JavaEstimator, _GaussianMixtureParams, JavaMLWritable, Jav
     @keyword_only
     def __init__(self, *, featuresCol="features", predictionCol="prediction", k=2,
                  probabilityCol="probability", tol=0.01, maxIter=100, seed=None,
-                 aggregationDepth=2, weightCol=None, blockSize=1):
+                 aggregationDepth=2, weightCol=None):
         """
         __init__(self, \\*, featuresCol="features", predictionCol="prediction", k=2, \
                  probabilityCol="probability", tol=0.01, maxIter=100, seed=None, \
-                 aggregationDepth=2, weightCol=None, blockSize=1)
+                 aggregationDepth=2, weightCol=None)
         """
         super(GaussianMixture, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.clustering.GaussianMixture",
@@ -358,11 +355,11 @@ def _create_model(self, java_model):
     @since("2.0.0")
     def setParams(self, *, featuresCol="features", predictionCol="prediction", k=2,
                   probabilityCol="probability", tol=0.01, maxIter=100, seed=None,
-                  aggregationDepth=2, weightCol=None, blockSize=1):
+                  aggregationDepth=2, weightCol=None):
         """
         setParams(self, \\*, featuresCol="features", predictionCol="prediction", k=2, \
                   probabilityCol="probability", tol=0.01, maxIter=100, seed=None, \
-                  aggregationDepth=2, weightCol=None, blockSize=1)
+                  aggregationDepth=2, weightCol=None)
 
         Sets params for GaussianMixture.
         """
@@ -432,13 +429,6 @@ def setAggregationDepth(self, value):
         """
         return self._set(aggregationDepth=value)
 
-    @since("3.1.0")
-    def setBlockSize(self, value):
-        """
-        Sets the value of :py:attr:`blockSize`.
-        """
-        return self._set(blockSize=value)
-
 
 class GaussianMixtureSummary(ClusteringSummary):
     """

From 383bb4af004253e1eb84d3f3e58347e0d7670f66 Mon Sep 17 00:00:00 2001
From: Michael Munday <mike.munday@ibm.com>
Date: Wed, 23 Sep 2020 12:36:46 -0500
Subject: [PATCH 0093/1009] [SPARK-32892][CORE][SQL] Fix hash functions on
 big-endian platforms

MurmurHash3 and xxHash64 interpret sequences of bytes as integers
encoded in little-endian byte order. This requires a byte reversal
on big endian platforms.

I've left the hashInt and hashLong functions as-is for now. My
interpretation of these functions is that they perform the hash on
the integer value as if it were serialized in little-endian byte
order. Therefore no byte reversal is necessary.

### What changes were proposed in this pull request?
Modify hash functions to produce correct results on big-endian platforms.

### Why are the changes needed?
Hash functions produce incorrect results on big-endian platforms which, amongst other potential issues, causes test failures.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing tests run on the IBM Z (s390x) platform which uses a big-endian byte order.

Closes #29762 from mundaym/fix-hashes.

Authored-by: Michael Munday <mike.munday@ibm.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../spark/util/sketch/Murmur3_x86_32.java     | 10 +-
 .../spark/unsafe/hash/Murmur3_x86_32.java     | 10 +-
 .../spark/sql/catalyst/expressions/XXH64.java | 43 +++++----
 .../sql/catalyst/expressions/XXH64Suite.java  | 91 +++++++++++--------
 4 files changed, 98 insertions(+), 56 deletions(-)

diff --git a/common/sketch/src/main/java/org/apache/spark/util/sketch/Murmur3_x86_32.java b/common/sketch/src/main/java/org/apache/spark/util/sketch/Murmur3_x86_32.java
index e83b331391e39..61cd2cec1a34b 100644
--- a/common/sketch/src/main/java/org/apache/spark/util/sketch/Murmur3_x86_32.java
+++ b/common/sketch/src/main/java/org/apache/spark/util/sketch/Murmur3_x86_32.java
@@ -17,12 +17,16 @@
 
 package org.apache.spark.util.sketch;
 
+import java.nio.ByteOrder;
+
 /**
  * 32-bit Murmur3 hasher.  This is based on Guava's Murmur3_32HashFunction.
  */
 // This class is duplicated from `org.apache.spark.unsafe.hash.Murmur3_x86_32` to make sure
 // spark-sketch has no external dependencies.
 final class Murmur3_x86_32 {
+  private static final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN);
+
   private static final int C1 = 0xcc9e2d51;
   private static final int C2 = 0x1b873593;
 
@@ -92,8 +96,10 @@ private static int hashBytesByInt(Object base, long offset, int lengthInBytes, i
     int h1 = seed;
     for (int i = 0; i < lengthInBytes; i += 4) {
       int halfWord = Platform.getInt(base, offset + i);
-      int k1 = mixK1(halfWord);
-      h1 = mixH1(h1, k1);
+      if (isBigEndian) {
+        halfWord = Integer.reverseBytes(halfWord);
+      }
+      h1 = mixH1(h1, mixK1(halfWord));
     }
     return h1;
   }
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java
index d239de6083ad0..0b9d9ced312a1 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/hash/Murmur3_x86_32.java
@@ -17,12 +17,16 @@
 
 package org.apache.spark.unsafe.hash;
 
+import java.nio.ByteOrder;
+
 import org.apache.spark.unsafe.Platform;
 
 /**
  * 32-bit Murmur3 hasher.  This is based on Guava's Murmur3_32HashFunction.
  */
 public final class Murmur3_x86_32 {
+  private static final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN);
+
   private static final int C1 = 0xcc9e2d51;
   private static final int C2 = 0x1b873593;
 
@@ -92,8 +96,10 @@ private static int hashBytesByInt(Object base, long offset, int lengthInBytes, i
     int h1 = seed;
     for (int i = 0; i < lengthInBytes; i += 4) {
       int halfWord = Platform.getInt(base, offset + i);
-      int k1 = mixK1(halfWord);
-      h1 = mixH1(h1, k1);
+      if (isBigEndian) {
+        halfWord = Integer.reverseBytes(halfWord);
+      }
+      h1 = mixH1(h1, mixK1(halfWord));
     }
     return h1;
   }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/XXH64.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/XXH64.java
index eb5051b284073..4c356256836c2 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/XXH64.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/XXH64.java
@@ -16,6 +16,8 @@
  */
 package org.apache.spark.sql.catalyst.expressions;
 
+import java.nio.ByteOrder;
+
 import org.apache.spark.unsafe.Platform;
 import org.apache.spark.unsafe.types.UTF8String;
 
@@ -31,6 +33,7 @@
  */
 // scalastyle: on
 public final class XXH64 {
+  private static final boolean isBigEndian = ByteOrder.nativeOrder().equals(ByteOrder.BIG_ENDIAN);
 
   private static final long PRIME64_1 = 0x9E3779B185EBCA87L;
   private static final long PRIME64_2 = 0xC2B2AE3D27D4EB4FL;
@@ -93,7 +96,11 @@ public static long hashUnsafeBytes(Object base, long offset, int length, long se
     offset += length & -8;
 
     if (offset + 4L <= end) {
-      hash ^= (Platform.getInt(base, offset) & 0xFFFFFFFFL) * PRIME64_1;
+      int k1 = Platform.getInt(base, offset);
+      if (isBigEndian) {
+        k1 = Integer.reverseBytes(k1);
+      }
+      hash ^= (k1 & 0xFFFFFFFFL) * PRIME64_1;
       hash = Long.rotateLeft(hash, 23) * PRIME64_2 + PRIME64_3;
       offset += 4L;
     }
@@ -130,21 +137,22 @@ private static long hashBytesByWords(Object base, long offset, int length, long
       long v4 = seed - PRIME64_1;
 
       do {
-        v1 += Platform.getLong(base, offset) * PRIME64_2;
-        v1 = Long.rotateLeft(v1, 31);
-        v1 *= PRIME64_1;
-
-        v2 += Platform.getLong(base, offset + 8) * PRIME64_2;
-        v2 = Long.rotateLeft(v2, 31);
-        v2 *= PRIME64_1;
-
-        v3 += Platform.getLong(base, offset + 16) * PRIME64_2;
-        v3 = Long.rotateLeft(v3, 31);
-        v3 *= PRIME64_1;
-
-        v4 += Platform.getLong(base, offset + 24) * PRIME64_2;
-        v4 = Long.rotateLeft(v4, 31);
-        v4 *= PRIME64_1;
+        long k1 = Platform.getLong(base, offset);
+        long k2 = Platform.getLong(base, offset + 8);
+        long k3 = Platform.getLong(base, offset + 16);
+        long k4 = Platform.getLong(base, offset + 24);
+
+        if (isBigEndian) {
+          k1 = Long.reverseBytes(k1);
+          k2 = Long.reverseBytes(k2);
+          k3 = Long.reverseBytes(k3);
+          k4 = Long.reverseBytes(k4);
+        }
+
+        v1 = Long.rotateLeft(v1 + (k1 * PRIME64_2), 31) * PRIME64_1;
+        v2 = Long.rotateLeft(v2 + (k2 * PRIME64_2), 31) * PRIME64_1;
+        v3 = Long.rotateLeft(v3 + (k3 * PRIME64_2), 31) * PRIME64_1;
+        v4 = Long.rotateLeft(v4 + (k4 * PRIME64_2), 31) * PRIME64_1;
 
         offset += 32L;
       } while (offset <= limit);
@@ -186,6 +194,9 @@ private static long hashBytesByWords(Object base, long offset, int length, long
     long limit = end - 8;
     while (offset <= limit) {
       long k1 = Platform.getLong(base, offset);
+      if (isBigEndian) {
+        k1 = Long.reverseBytes(k1);
+      }
       hash ^= Long.rotateLeft(k1 * PRIME64_2, 31) * PRIME64_1;
       hash = Long.rotateLeft(hash, 27) * PRIME64_1 + PRIME64_4;
       offset += 8L;
diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/XXH64Suite.java b/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/XXH64Suite.java
index 1baee91b3439c..1bd4eeeb35aee 100644
--- a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/XXH64Suite.java
+++ b/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/XXH64Suite.java
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.catalyst.expressions;
 
-import java.nio.ByteOrder;
 import java.nio.charset.StandardCharsets;
 import java.util.HashSet;
 import java.util.Random;
@@ -73,42 +72,62 @@ public void testKnownByteArrayInputs() {
             hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 1));
     Assert.assertEquals(0x739840CB819FA723L,
             XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 1, PRIME));
+    Assert.assertEquals(0x9256E58AA397AEF1L,
+            hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 4));
+    Assert.assertEquals(0x9D5FFDFB928AB4BL,
+            XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 4, PRIME));
+    Assert.assertEquals(0xF74CB1451B32B8CFL,
+            hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 8));
+    Assert.assertEquals(0x9C44B77FBCC302C5L,
+            XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 8, PRIME));
+    Assert.assertEquals(0xCFFA8DB881BC3A3DL,
+            hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 14));
+    Assert.assertEquals(0x5B9611585EFCC9CBL,
+            XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 14, PRIME));
+    Assert.assertEquals(0x0EAB543384F878ADL,
+            hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, SIZE));
+    Assert.assertEquals(0xCAA65939306F1E21L,
+            XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, SIZE, PRIME));
+  }
 
-    if (ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN) {
-      Assert.assertEquals(0x9256E58AA397AEF1L,
-              hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 4));
-      Assert.assertEquals(0x9D5FFDFB928AB4BL,
-              XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 4, PRIME));
-      Assert.assertEquals(0xF74CB1451B32B8CFL,
-              hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 8));
-      Assert.assertEquals(0x9C44B77FBCC302C5L,
-              XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 8, PRIME));
-      Assert.assertEquals(0xCFFA8DB881BC3A3DL,
-              hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 14));
-      Assert.assertEquals(0x5B9611585EFCC9CBL,
-              XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 14, PRIME));
-      Assert.assertEquals(0x0EAB543384F878ADL,
-              hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, SIZE));
-      Assert.assertEquals(0xCAA65939306F1E21L,
-              XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, SIZE, PRIME));
-    } else {
-      Assert.assertEquals(0x7F875412350ADDDCL,
-              hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 4));
-      Assert.assertEquals(0x564D279F524D8516L,
-              XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 4, PRIME));
-      Assert.assertEquals(0x7D9F07E27E0EB006L,
-              hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 8));
-      Assert.assertEquals(0x893CEF564CB7858L,
-              XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 8, PRIME));
-      Assert.assertEquals(0xC6198C4C9CC49E17L,
-              hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 14));
-      Assert.assertEquals(0x4E21BEF7164D4BBL,
-              XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, 14, PRIME));
-      Assert.assertEquals(0xBCF5FAEDEE1F2B5AL,
-              hasher.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, SIZE));
-      Assert.assertEquals(0x6F680C877A358FE5L,
-              XXH64.hashUnsafeBytes(BUFFER, Platform.BYTE_ARRAY_OFFSET, SIZE, PRIME));
-    }
+  @Test
+  public void testKnownWordArrayInputs() {
+    Assert.assertEquals(0XEF46DB3751D8E999L,
+            hasher.hashUnsafeWords(BUFFER, Platform.BYTE_ARRAY_OFFSET, 0));
+    Assert.assertEquals(0XAC75FDA2929B17EFL,
+            XXH64.hashUnsafeWords(BUFFER, Platform.BYTE_ARRAY_OFFSET, 0, PRIME));
+    Assert.assertEquals(0XF74CB1451B32B8CFL,
+            hasher.hashUnsafeWords(BUFFER, Platform.BYTE_ARRAY_OFFSET, 8));
+    Assert.assertEquals(0X9C44B77FBCC302C5L,
+            XXH64.hashUnsafeWords(BUFFER, Platform.BYTE_ARRAY_OFFSET, 8, PRIME));
+    Assert.assertEquals(0X169173A697113B29L,
+            hasher.hashUnsafeWords(BUFFER, Platform.BYTE_ARRAY_OFFSET, 16));
+    Assert.assertEquals(0XA0B652822C1538F6L,
+            XXH64.hashUnsafeWords(BUFFER, Platform.BYTE_ARRAY_OFFSET, 16, PRIME));
+    Assert.assertEquals(0XCEF5D1497F99F246L,
+            hasher.hashUnsafeWords(BUFFER, Platform.BYTE_ARRAY_OFFSET, 24));
+    Assert.assertEquals(0X1FA29EA08AA60D77L,
+            XXH64.hashUnsafeWords(BUFFER, Platform.BYTE_ARRAY_OFFSET, 24, PRIME));
+    Assert.assertEquals(0XAF5753D39159EDEEL,
+            hasher.hashUnsafeWords(BUFFER, Platform.BYTE_ARRAY_OFFSET, 32));
+    Assert.assertEquals(0XDCAB9233B8CA7B0FL,
+            XXH64.hashUnsafeWords(BUFFER, Platform.BYTE_ARRAY_OFFSET, 32, PRIME));
+    Assert.assertEquals(0XBAB04D3F1769013L,
+            hasher.hashUnsafeWords(BUFFER, Platform.BYTE_ARRAY_OFFSET, 40));
+    Assert.assertEquals(0X21273A6B8D344CF0L,
+            XXH64.hashUnsafeWords(BUFFER, Platform.BYTE_ARRAY_OFFSET, 40, PRIME));
+    Assert.assertEquals(0XB3571A0E02A3F4E1L,
+            hasher.hashUnsafeWords(BUFFER, Platform.BYTE_ARRAY_OFFSET, 48));
+    Assert.assertEquals(0X867479AC0EF16154L,
+            XXH64.hashUnsafeWords(BUFFER, Platform.BYTE_ARRAY_OFFSET, 48, PRIME));
+    Assert.assertEquals(0XA3D5C82BD2EE104AL,
+            hasher.hashUnsafeWords(BUFFER, Platform.BYTE_ARRAY_OFFSET, 56));
+    Assert.assertEquals(0X55EF042CF82C04D7L,
+            XXH64.hashUnsafeWords(BUFFER, Platform.BYTE_ARRAY_OFFSET, 56, PRIME));
+    Assert.assertEquals(0X18F5388F1D2BA08CL,
+            hasher.hashUnsafeWords(BUFFER, Platform.BYTE_ARRAY_OFFSET, 64));
+    Assert.assertEquals(0X479E7103CF9AA020L,
+            XXH64.hashUnsafeWords(BUFFER, Platform.BYTE_ARRAY_OFFSET, 64, PRIME));
   }
 
   @Test

From faeb71b39d746afdc29f154e293e7c09871c1254 Mon Sep 17 00:00:00 2001
From: Michael Munday <mike.munday@ibm.com>
Date: Wed, 23 Sep 2020 12:38:06 -0500
Subject: [PATCH 0094/1009] [SPARK-32950][SQL] Remove unnecessary big-endian
 code paths

### What changes were proposed in this pull request?
Remove unnecessary code.

### Why are the changes needed?

General housekeeping. Might be a slight performance improvement, especially on big-endian systems.

There is no need for separate code paths for big- and little-endian
platforms in putDoubles and putFloats anymore (since PR #24861). On
all platforms values are encoded in native byte order and can just
be copied directly.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Existing tests.

Closes #29815 from mundaym/clean-putfloats.

Authored-by: Michael Munday <mike.munday@ibm.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../vectorized/OffHeapColumnVector.java       | 24 ++++---------------
 .../vectorized/OnHeapColumnVector.java        | 22 ++++-------------
 2 files changed, 8 insertions(+), 38 deletions(-)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
index 982a26c0ef775..7da5a287710eb 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
@@ -413,16 +413,8 @@ public void putFloats(int rowId, int count, float[] src, int srcIndex) {
 
   @Override
   public void putFloats(int rowId, int count, byte[] src, int srcIndex) {
-    if (!bigEndianPlatform) {
-      Platform.copyMemory(src, Platform.BYTE_ARRAY_OFFSET + srcIndex,
-          null, data + rowId * 4L, count * 4L);
-    } else {
-      ByteBuffer bb = ByteBuffer.wrap(src).order(ByteOrder.BIG_ENDIAN);
-      long offset = data + 4L * rowId;
-      for (int i = 0; i < count; ++i, offset += 4) {
-        Platform.putFloat(null, offset, bb.getFloat(srcIndex + (4 * i)));
-      }
-    }
+    Platform.copyMemory(src, Platform.BYTE_ARRAY_OFFSET + srcIndex,
+        null, data + rowId * 4L, count * 4L);
   }
 
   @Override
@@ -481,16 +473,8 @@ public void putDoubles(int rowId, int count, double[] src, int srcIndex) {
 
   @Override
   public void putDoubles(int rowId, int count, byte[] src, int srcIndex) {
-    if (!bigEndianPlatform) {
-      Platform.copyMemory(src, Platform.BYTE_ARRAY_OFFSET + srcIndex,
-        null, data + rowId * 8L, count * 8L);
-    } else {
-      ByteBuffer bb = ByteBuffer.wrap(src).order(ByteOrder.BIG_ENDIAN);
-      long offset = data + 8L * rowId;
-      for (int i = 0; i < count; ++i, offset += 8) {
-        Platform.putDouble(null, offset, bb.getDouble(srcIndex + (8 * i)));
-      }
-    }
+    Platform.copyMemory(src, Platform.BYTE_ARRAY_OFFSET + srcIndex,
+      null, data + rowId * 8L, count * 8L);
   }
 
   @Override
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
index 625b78be4de94..5a7d6cc20971b 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
@@ -392,15 +392,8 @@ public void putFloats(int rowId, int count, float[] src, int srcIndex) {
 
   @Override
   public void putFloats(int rowId, int count, byte[] src, int srcIndex) {
-    if (!bigEndianPlatform) {
-      Platform.copyMemory(src, Platform.BYTE_ARRAY_OFFSET + srcIndex, floatData,
-          Platform.DOUBLE_ARRAY_OFFSET + rowId * 4L, count * 4L);
-    } else {
-      ByteBuffer bb = ByteBuffer.wrap(src).order(ByteOrder.BIG_ENDIAN);
-      for (int i = 0; i < count; ++i) {
-        floatData[i + rowId] = bb.getFloat(srcIndex + (4 * i));
-      }
-    }
+    Platform.copyMemory(src, Platform.BYTE_ARRAY_OFFSET + srcIndex, floatData,
+        Platform.FLOAT_ARRAY_OFFSET + rowId * 4L, count * 4L);
   }
 
   @Override
@@ -453,15 +446,8 @@ public void putDoubles(int rowId, int count, double[] src, int srcIndex) {
 
   @Override
   public void putDoubles(int rowId, int count, byte[] src, int srcIndex) {
-    if (!bigEndianPlatform) {
-      Platform.copyMemory(src, Platform.BYTE_ARRAY_OFFSET + srcIndex, doubleData,
-          Platform.DOUBLE_ARRAY_OFFSET + rowId * 8L, count * 8L);
-    } else {
-      ByteBuffer bb = ByteBuffer.wrap(src).order(ByteOrder.BIG_ENDIAN);
-      for (int i = 0; i < count; ++i) {
-        doubleData[i + rowId] = bb.getDouble(srcIndex + (8 * i));
-      }
-    }
+    Platform.copyMemory(src, Platform.BYTE_ARRAY_OFFSET + srcIndex, doubleData,
+        Platform.DOUBLE_ARRAY_OFFSET + rowId * 8L, count * 8L);
   }
 
   @Override

From 3c97665dad810bd9d0cc3ca8bd735914bb0d38d6 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Wed, 23 Sep 2020 15:33:53 -0700
Subject: [PATCH 0095/1009] [SPARK-32981][BUILD] Remove hive-1.2/hadoop-2.7
 from Apache Spark 3.1 distribution

### What changes were proposed in this pull request?

Apache Spark 3.0 switches its Hive execution version from 1.2 to 2.3, but it still provides the unofficial forked Hive 1.2 version from our distribution like the following. This PR aims to remove it from Apache Spark 3.1.0 officially while keeping `hive-1.2` profile.
```
spark-3.0.1-bin-hadoop2.7-hive1.2.tgz
spark-3.0.1-bin-hadoop2.7-hive1.2.tgz.asc
spark-3.0.1-bin-hadoop2.7-hive1.2.tgz.sha512
```

### Why are the changes needed?

The unofficial Hive 1.2.1 fork has many bugs and is not maintained for a long time. We had better not recommend this in the official Apache Spark distribution.

### Does this PR introduce _any_ user-facing change?

There is no user-facing change in the default distribution (Hadoop 3.2/Hive 2.3).

### How was this patch tested?

Manually because this is a change in release script .

Closes #29856 from dongjoon-hyun/SPARK-32981.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/create-release/release-build.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index 42c6d3b609ca6..c47469a2f6d95 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -285,7 +285,6 @@ if [[ "$1" == "package" ]]; then
     if [[ $SPARK_VERSION < "3.0." ]]; then
       BINARY_PKGS_ARGS["hadoop2.6"]="-Phadoop-2.6 $HIVE_PROFILES"
     else
-      BINARY_PKGS_ARGS["hadoop2.7-hive1.2"]="-Phadoop-2.7 -Phive-1.2 $HIVE_PROFILES"
       BINARY_PKGS_ARGS["hadoop2.7"]="-Phadoop-2.7 $HIVE_PROFILES"
     fi
   fi

From 27f6b5a103137fa1dee2103c3a17594d2df49f1b Mon Sep 17 00:00:00 2001
From: Holden Karau <hkarau@apple.com>
Date: Wed, 23 Sep 2020 15:39:31 -0700
Subject: [PATCH 0096/1009] [SPARK-32937][SPARK-32980][K8S] Fix decom &
 launcher tests and add some comments to reduce chance of breakage

### What changes were proposed in this pull request?

Fixes the log strings the decom integration tests looks for and add comments reminding people to run the K8s integration tests when changing those code paths.

### Why are the changes needed?

The strings it looks for have been changed.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

WIP: Verify that the K8s jenkins job succeeds

Closes #29854 from holdenk/SPARK-32979-spark-k8s-decom-test-is-broken.

Authored-by: Holden Karau <hkarau@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/scheduler/ExecutorLossReason.scala    | 1 +
 .../scheduler/cluster/CoarseGrainedSchedulerBackend.scala  | 5 ++++-
 .../deploy/k8s/integrationtest/DecommissionSuite.scala     | 3 ++-
 .../k8s/integrationtest/backend/minikube/Minikube.scala    | 7 +++++--
 4 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/ExecutorLossReason.scala b/core/src/main/scala/org/apache/spark/scheduler/ExecutorLossReason.scala
index f2eb4a7047b56..2644d0af2ac50 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ExecutorLossReason.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ExecutorLossReason.scala
@@ -70,6 +70,7 @@ case class ExecutorProcessLost(
  *
  * This is used by the task scheduler to remove state associated with the executor, but
  * not yet fail any tasks that were running in the executor before the executor is "fully" lost.
+ * If you update this code make sure to re-run the K8s integration tests.
  *
  * @param workerHost it is defined when the worker is decommissioned too
  */
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index 0f144125af7bf..37ea648d80048 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -191,6 +191,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
         executorDataMap.get(executorId).foreach(_.executorEndpoint.send(StopExecutor))
         removeExecutor(executorId, reason)
 
+      // Do not change this code without running the K8s integration suites
       case DecommissionExecutor(executorId, decommissionInfo) =>
         logError(s"Received decommission executor message ${executorId}: $decommissionInfo")
         decommissionExecutor(executorId, decommissionInfo, adjustTargetNumExecutors = false)
@@ -272,6 +273,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
         removeWorker(workerId, host, message)
         context.reply(true)
 
+      // Do not change this code without running the K8s integration suites
       case DecommissionExecutor(executorId, decommissionInfo) =>
         logError(s"Received decommission executor message ${executorId}: ${decommissionInfo}.")
         context.reply(decommissionExecutor(executorId, decommissionInfo,
@@ -469,6 +471,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
       executorsAndDecomInfo: Array[(String, ExecutorDecommissionInfo)],
       adjustTargetNumExecutors: Boolean): Seq[String] = {
 
+    // Do not change this code without running the K8s integration suites
     val executorsToDecommission = executorsAndDecomInfo.filter { case (executorId, decomInfo) =>
       CoarseGrainedSchedulerBackend.this.synchronized {
         // Only bother decommissioning executors which are alive.
@@ -491,7 +494,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     }.map(_._1)
   }
 
-
+  // Do not change this code without running the K8s integration suites
   private def doDecommission(executorId: String,
       decomInfo: ExecutorDecommissionInfo): Boolean = {
 
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
index 6cef7c17beeb0..6e42819b1779e 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
@@ -42,7 +42,8 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite =>
       expectedLogOnCompletion = Seq(
         "Finished waiting, stopping Spark",
         "Received decommission executor message",
-        "Finished decommissioning",
+        "Acknowledged decommissioning block manager",
+        ": Executor decommission.",
         "Final accumulator value is: 100"),
       appArgs = Array.empty[String],
       driverPodChecker = doBasicDriverPyPodCheck,
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
index a7d35b67d1b92..547427f96d7ec 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
@@ -118,10 +118,13 @@ private[spark] object Minikube extends Logging {
     }
   }
 
-  private def executeMinikube(action: String, args: String*): Seq[String] = {
+  def executeMinikube(action: String, args: String*): Seq[String] = {
     ProcessUtils.executeProcess(
       Array("bash", "-c", s"minikube $action ${args.mkString(" ")}"),
-      MINIKUBE_STARTUP_TIMEOUT_SECONDS)
+      MINIKUBE_STARTUP_TIMEOUT_SECONDS).filter{x =>
+      !x.contains("There is a newer version of minikube") &&
+      !x.contains("https://github.com/kubernetes")
+    }
   }
 
   def minikubeServiceAction(args: String*): String = {

From 527cd3fc3aac40f84ba8eee291e1a955e03f7665 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Wed, 23 Sep 2020 16:47:10 -0700
Subject: [PATCH 0097/1009] [SPARK-32971][K8S] Support dynamic PVC
 creation/deletion for K8s executors

### What changes were proposed in this pull request?

This PR aims to support dynamic PVC creation and deletion for K8s executors. The PVCs are created with executor pods and deleted when the executor pods are deleted.

**Configuration**
Mostly, this PR reuses the existing PVC volume configs and `storageClass` is added.
```
spark.executor.instances=2
spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-1.options.claimName=OnDemand
spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-1.options.storageClass=gp2
spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-1.options.sizeLimit=500Gi
spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-1.mount.path=/data
spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-1.mount.readOnly=false
```

**Executors**
```
$ kubectl get pod -l spark-role=executor
NAME                               READY   STATUS    RESTARTS   AGE
spark-pi-f4d80574b9bb0941-exec-1   1/1     Running   0          2m6s
spark-pi-f4d80574b9bb0941-exec-2   1/1     Running   0          2m6s
```

**PVCs**
```
$ kubectl get pvc
NAME                                     STATUS   VOLUME                                     CAPACITY   ACCESS MODES   STORAGECLA
SS   AGE
spark-pi-f4d80574b9bb0941-exec-1-pvc-0   Bound    pvc-7d20173f-278b-4c7b-b7e5-7f0ed414ee64   500Gi      RWO            gp2
     48s
spark-pi-f4d80574b9bb0941-exec-2-pvc-0   Bound    pvc-1138f00d-87f1-47f4-9b58-ce5d13ea0c3a   500Gi      RWO            gp2
     48s
```

**Executor Disk**
```
$ k exec -it spark-pi-f4d80574b9bb0941-exec-1 -- df -h /data
Filesystem      Size  Used Avail Use% Mounted on
/dev/nvme3n1    493G   74M  492G   1% /data
```

```
$ k exec -it spark-pi-f4d80574b9bb0941-exec-1 -- ls /data
blockmgr-81dcebaf-11a7-4d7b-91d6-3c580187d914
lost+found
spark-6be42db8-2c58-4389-b52c-8aeeafe76bd5
```

### Why are the changes needed?

While SPARK-32655 supports to mount a pre-created PVC, this PR can create PVC itself dynamically and reduce lots of manual efforts.

### Does this PR introduce _any_ user-facing change?

Yes. This is a new feature.

### How was this patch tested?

Pass the newly added test cases.

Closes #29846 from dongjoon-hyun/SPARK-32971.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/deploy/k8s/Config.scala  |  1 +
 .../deploy/k8s/KubernetesExecutorSpec.scala   | 23 ++++++++++
 .../spark/deploy/k8s/KubernetesUtils.scala    | 22 ++++++++-
 .../deploy/k8s/KubernetesVolumeSpec.scala     |  5 ++-
 .../deploy/k8s/KubernetesVolumeUtils.scala    |  8 +++-
 .../features/MountVolumesFeatureStep.scala    | 45 +++++++++++++++++--
 .../submit/KubernetesClientApplication.scala  | 26 +++--------
 .../cluster/k8s/ExecutorPodsAllocator.scala   | 32 ++++++++++---
 .../k8s/KubernetesExecutorBuilder.scala       | 14 +++++-
 .../spark/deploy/k8s/KubernetesTestConf.scala |  7 ++-
 .../MountVolumesFeatureStepSuite.scala        | 17 +++++++
 .../k8s/ExecutorPodsAllocatorSuite.scala      |  6 +--
 .../k8s/KubernetesExecutorBuilderSuite.scala  |  2 +-
 13 files changed, 167 insertions(+), 41 deletions(-)
 create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesExecutorSpec.scala

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index 274b859fef96d..d6dc56f9d9d1b 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -439,6 +439,7 @@ private[spark] object Config extends Logging {
   val KUBERNETES_VOLUMES_MOUNT_READONLY_KEY = "mount.readOnly"
   val KUBERNETES_VOLUMES_OPTIONS_PATH_KEY = "options.path"
   val KUBERNETES_VOLUMES_OPTIONS_CLAIM_NAME_KEY = "options.claimName"
+  val KUBERNETES_VOLUMES_OPTIONS_CLAIM_STORAGE_CLASS_KEY = "options.storageClass"
   val KUBERNETES_VOLUMES_OPTIONS_MEDIUM_KEY = "options.medium"
   val KUBERNETES_VOLUMES_OPTIONS_SIZE_LIMIT_KEY = "options.sizeLimit"
   val KUBERNETES_VOLUMES_OPTIONS_SERVER_KEY = "options.server"
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesExecutorSpec.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesExecutorSpec.scala
new file mode 100644
index 0000000000000..4db9211c1e42a
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesExecutorSpec.scala
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s
+
+import io.fabric8.kubernetes.api.model.HasMetadata
+
+private[spark] case class KubernetesExecutorSpec(
+    pod: SparkPod,
+    executorKubernetesResources: Seq[HasMetadata])
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
index c49f4a15de974..e8bf8f9c9b505 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
@@ -19,11 +19,11 @@ package org.apache.spark.deploy.k8s
 import java.io.{File, IOException}
 import java.net.URI
 import java.security.SecureRandom
-import java.util.UUID
+import java.util.{Collections, UUID}
 
 import scala.collection.JavaConverters._
 
-import io.fabric8.kubernetes.api.model.{Container, ContainerBuilder, ContainerStateRunning, ContainerStateTerminated, ContainerStateWaiting, ContainerStatus, Pod, PodBuilder, Quantity}
+import io.fabric8.kubernetes.api.model.{Container, ContainerBuilder, ContainerStateRunning, ContainerStateTerminated, ContainerStateWaiting, ContainerStatus, HasMetadata, OwnerReferenceBuilder, Pod, PodBuilder, Quantity}
 import io.fabric8.kubernetes.client.KubernetesClient
 import org.apache.commons.codec.binary.Hex
 import org.apache.hadoop.fs.{FileSystem, Path}
@@ -323,4 +323,22 @@ private[spark] object KubernetesUtils extends Logging {
         .build()
     }
   }
+
+  // Add a OwnerReference to the given resources making the pod an owner of them so when
+  // the pod is deleted, the resources are garbage collected.
+  def addOwnerReference(pod: Pod, resources: Seq[HasMetadata]): Unit = {
+    if (pod != null) {
+      val reference = new OwnerReferenceBuilder()
+        .withName(pod.getMetadata.getName)
+        .withApiVersion(pod.getApiVersion)
+        .withUid(pod.getMetadata.getUid)
+        .withKind(pod.getKind)
+        .withController(true)
+        .build()
+      resources.foreach { resource =>
+        val originalMetadata = resource.getMetadata
+        originalMetadata.setOwnerReferences(Collections.singletonList(reference))
+      }
+    }
+  }
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeSpec.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeSpec.scala
index f9faa435c81c5..3f7355de18911 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeSpec.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeSpec.scala
@@ -21,7 +21,10 @@ private[spark] sealed trait KubernetesVolumeSpecificConf
 private[spark] case class KubernetesHostPathVolumeConf(hostPath: String)
   extends KubernetesVolumeSpecificConf
 
-private[spark] case class KubernetesPVCVolumeConf(claimName: String)
+private[spark] case class KubernetesPVCVolumeConf(
+    claimName: String,
+    storageClass: Option[String] = None,
+    size: Option[String] = None)
   extends KubernetesVolumeSpecificConf
 
 private[spark] case class KubernetesEmptyDirVolumeConf(
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtils.scala
index 7f821d37ac816..77921f6338c74 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtils.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtils.scala
@@ -71,7 +71,13 @@ private[spark] object KubernetesVolumeUtils {
 
       case KUBERNETES_VOLUMES_PVC_TYPE =>
         val claimNameKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_OPTIONS_CLAIM_NAME_KEY"
-        KubernetesPVCVolumeConf(options(claimNameKey))
+        val storageClassKey =
+          s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_OPTIONS_CLAIM_STORAGE_CLASS_KEY"
+        val sizeLimitKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_OPTIONS_SIZE_LIMIT_KEY"
+        KubernetesPVCVolumeConf(
+          options(claimNameKey),
+          options.get(storageClassKey),
+          options.get(sizeLimitKey))
 
       case KUBERNETES_VOLUMES_EMPTYDIR_TYPE =>
         val mediumKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_OPTIONS_MEDIUM_KEY"
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
index 94b5c37f96e3d..fe4717d099510 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
@@ -16,6 +16,9 @@
  */
 package org.apache.spark.deploy.k8s.features
 
+import scala.collection.JavaConverters._
+import scala.collection.mutable.ArrayBuffer
+
 import io.fabric8.kubernetes.api.model._
 
 import org.apache.spark.deploy.k8s._
@@ -23,6 +26,9 @@ import org.apache.spark.deploy.k8s.Constants.ENV_EXECUTOR_ID
 
 private[spark] class MountVolumesFeatureStep(conf: KubernetesConf)
   extends KubernetesFeatureConfigStep {
+  import MountVolumesFeatureStep._
+
+  val additionalResources = ArrayBuffer.empty[HasMetadata]
 
   override def configurePod(pod: SparkPod): SparkPod = {
     val (volumeMounts, volumes) = constructVolumes(conf.volumes).unzip
@@ -43,7 +49,7 @@ private[spark] class MountVolumesFeatureStep(conf: KubernetesConf)
   private def constructVolumes(
     volumeSpecs: Iterable[KubernetesVolumeSpec]
   ): Iterable[(VolumeMount, Volume)] = {
-    volumeSpecs.map { spec =>
+    volumeSpecs.zipWithIndex.map { case (spec, i) =>
       val volumeMount = new VolumeMountBuilder()
         .withMountPath(spec.mountPath)
         .withReadOnly(spec.mountReadOnly)
@@ -57,10 +63,32 @@ private[spark] class MountVolumesFeatureStep(conf: KubernetesConf)
           new VolumeBuilder()
             .withHostPath(new HostPathVolumeSource(hostPath, ""))
 
-        case KubernetesPVCVolumeConf(claimNameTemplate) =>
+        case KubernetesPVCVolumeConf(claimNameTemplate, storageClass, size) =>
           val claimName = conf match {
             case c: KubernetesExecutorConf =>
-              claimNameTemplate.replaceAll(ENV_EXECUTOR_ID, c.executorId)
+              val claimName = claimNameTemplate
+                .replaceAll(PVC_ON_DEMAND,
+                  s"${conf.resourceNamePrefix}-exec-${c.executorId}$PVC_POSTFIX-$i")
+                .replaceAll(ENV_EXECUTOR_ID, c.executorId)
+
+              if (storageClass.isDefined && size.isDefined) {
+                additionalResources.append(new PersistentVolumeClaimBuilder()
+                  .withKind(PVC)
+                  .withApiVersion("v1")
+                  .withNewMetadata()
+                    .withName(claimName)
+                    .endMetadata()
+                  .withNewSpec()
+                    .withStorageClassName(storageClass.get)
+                    .withAccessModes(PVC_ACCESS_MODE)
+                    .withResources(new ResourceRequirementsBuilder()
+                      .withRequests(Map("storage" -> new Quantity(size.get)).asJava).build())
+                    .endSpec()
+                  .build())
+              }
+
+              claimName
+
             case _ =>
               claimNameTemplate
           }
@@ -84,4 +112,15 @@ private[spark] class MountVolumesFeatureStep(conf: KubernetesConf)
       (volumeMount, volume)
     }
   }
+
+  override def getAdditionalKubernetesResources(): Seq[HasMetadata] = {
+    additionalResources
+  }
+}
+
+private[spark] object MountVolumesFeatureStep {
+  val PVC_ON_DEMAND = "OnDemand"
+  val PVC = "PersistentVolumeClaim"
+  val PVC_POSTFIX = "-pvc"
+  val PVC_ACCESS_MODE = "ReadWriteOnce"
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
index 713d35dcf64f5..93caa70e085c7 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
@@ -20,18 +20,20 @@ import java.io.StringWriter
 import java.util.{Collections, UUID}
 import java.util.Properties
 
+import scala.collection.mutable
+import scala.util.control.Breaks._
+import scala.util.control.NonFatal
+
 import io.fabric8.kubernetes.api.model._
 import io.fabric8.kubernetes.client.{KubernetesClient, Watch}
 import io.fabric8.kubernetes.client.Watcher.Action
-import scala.collection.mutable
-import scala.util.control.NonFatal
-import util.control.Breaks._
 
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkApplication
 import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
+import org.apache.spark.deploy.k8s.KubernetesUtils.addOwnerReference
 import org.apache.spark.internal.Logging
 import org.apache.spark.util.Utils
 
@@ -134,7 +136,7 @@ private[spark] class Client(
     val createdDriverPod = kubernetesClient.pods().create(resolvedDriverPod)
     try {
       val otherKubernetesResources = resolvedDriverSpec.driverKubernetesResources ++ Seq(configMap)
-      addDriverOwnerReference(createdDriverPod, otherKubernetesResources)
+      addOwnerReference(createdDriverPod, otherKubernetesResources)
       kubernetesClient.resourceList(otherKubernetesResources: _*).createOrReplace()
     } catch {
       case NonFatal(e) =>
@@ -163,22 +165,6 @@ private[spark] class Client(
     }
   }
 
-  // Add a OwnerReference to the given resources making the driver pod an owner of them so when
-  // the driver pod is deleted, the resources are garbage collected.
-  private def addDriverOwnerReference(driverPod: Pod, resources: Seq[HasMetadata]): Unit = {
-    val driverPodOwnerReference = new OwnerReferenceBuilder()
-      .withName(driverPod.getMetadata.getName)
-      .withApiVersion(driverPod.getApiVersion)
-      .withUid(driverPod.getMetadata.getUid)
-      .withKind(driverPod.getKind)
-      .withController(true)
-      .build()
-    resources.foreach { resource =>
-      val originalMetadata = resource.getMetadata
-      originalMetadata.setOwnerReferences(Collections.singletonList(driverPodOwnerReference))
-    }
-  }
-
   // Build a Config Map that will house spark conf properties in a single file for spark-submit
   private def buildConfigMap(configMapName: String, conf: Map[String, String]): ConfigMap = {
     val properties = new Properties()
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
index b6ea1faeda972..2bf8685038cf5 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
@@ -18,14 +18,17 @@ package org.apache.spark.scheduler.cluster.k8s
 
 import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger, AtomicLong}
 
-import io.fabric8.kubernetes.api.model.PodBuilder
-import io.fabric8.kubernetes.client.KubernetesClient
 import scala.collection.mutable
+import scala.util.control.NonFatal
+
+import io.fabric8.kubernetes.api.model.{HasMetadata, PersistentVolumeClaim, PodBuilder}
+import io.fabric8.kubernetes.client.KubernetesClient
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.KubernetesConf
+import org.apache.spark.deploy.k8s.KubernetesUtils.addOwnerReference
 import org.apache.spark.internal.Logging
 import org.apache.spark.util.{Clock, Utils}
 
@@ -212,16 +215,33 @@ private[spark] class ExecutorPodsAllocator(
           newExecutorId.toString,
           applicationId,
           driverPod)
-        val executorPod = executorBuilder.buildFromFeatures(executorConf, secMgr,
+        val resolvedExecutorSpec = executorBuilder.buildFromFeatures(executorConf, secMgr,
           kubernetesClient)
+        val executorPod = resolvedExecutorSpec.pod
         val podWithAttachedContainer = new PodBuilder(executorPod.pod)
           .editOrNewSpec()
           .addToContainers(executorPod.container)
           .endSpec()
           .build()
-        kubernetesClient.pods().create(podWithAttachedContainer)
-        newlyCreatedExecutors(newExecutorId) = clock.getTimeMillis()
-        logDebug(s"Requested executor with id $newExecutorId from Kubernetes.")
+        val createdExecutorPod = kubernetesClient.pods().create(podWithAttachedContainer)
+        try {
+          val resources = resolvedExecutorSpec.executorKubernetesResources
+          addOwnerReference(createdExecutorPod, resources)
+          resources
+            .filter(_.getKind == "PersistentVolumeClaim")
+            .foreach { resource =>
+              val pvc = resource.asInstanceOf[PersistentVolumeClaim]
+              logInfo(s"Trying to create PersistentVolumeClaim ${pvc.getMetadata.getName} with " +
+                s"StorageClass ${pvc.getSpec.getStorageClassName}")
+              kubernetesClient.persistentVolumeClaims().create(pvc)
+            }
+          newlyCreatedExecutors(newExecutorId) = clock.getTimeMillis()
+          logDebug(s"Requested executor with id $newExecutorId from Kubernetes.")
+        } catch {
+          case NonFatal(e) =>
+            kubernetesClient.pods().delete(createdExecutorPod)
+            throw e
+        }
       }
     }
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
index 22bff2c807330..b5f21fe69f52b 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
@@ -29,7 +29,7 @@ private[spark] class KubernetesExecutorBuilder {
   def buildFromFeatures(
       conf: KubernetesExecutorConf,
       secMgr: SecurityManager,
-      client: KubernetesClient): SparkPod = {
+      client: KubernetesClient): KubernetesExecutorSpec = {
     val initialPod = conf.get(Config.KUBERNETES_EXECUTOR_PODTEMPLATE_FILE)
       .map { file =>
         KubernetesUtils.loadPodFromTemplate(
@@ -47,7 +47,17 @@ private[spark] class KubernetesExecutorBuilder {
       new MountVolumesFeatureStep(conf),
       new LocalDirsFeatureStep(conf))
 
-    features.foldLeft(initialPod) { case (pod, feature) => feature.configurePod(pod) }
+    val spec = KubernetesExecutorSpec(
+      initialPod,
+      executorKubernetesResources = Seq.empty)
+
+    features.foldLeft(spec) { case (spec, feature) =>
+      val configuredPod = feature.configurePod(spec.pod)
+      val addedResources = feature.getAdditionalKubernetesResources()
+      KubernetesExecutorSpec(
+        configuredPod,
+        spec.executorKubernetesResources ++ addedResources)
+    }
   }
 
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala
index d6871a6c2866a..83d9481e6f2b0 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala
@@ -114,9 +114,12 @@ object KubernetesTestConf {
           (KUBERNETES_VOLUMES_HOSTPATH_TYPE,
             Map(KUBERNETES_VOLUMES_OPTIONS_PATH_KEY -> path))
 
-        case KubernetesPVCVolumeConf(claimName) =>
+        case KubernetesPVCVolumeConf(claimName, storageClass, sizeLimit) =>
+          val sconf = storageClass
+            .map { s => (KUBERNETES_VOLUMES_OPTIONS_CLAIM_STORAGE_CLASS_KEY, s) }.toMap
+          val lconf = sizeLimit.map { l => (KUBERNETES_VOLUMES_OPTIONS_SIZE_LIMIT_KEY, l) }.toMap
           (KUBERNETES_VOLUMES_PVC_TYPE,
-            Map(KUBERNETES_VOLUMES_OPTIONS_CLAIM_NAME_KEY -> claimName))
+            Map(KUBERNETES_VOLUMES_OPTIONS_CLAIM_NAME_KEY -> claimName) ++ sconf ++ lconf)
 
         case KubernetesEmptyDirVolumeConf(medium, sizeLimit) =>
           val mconf = medium.map { m => (KUBERNETES_VOLUMES_OPTIONS_MEDIUM_KEY, m) }.toMap
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
index a9a1ec46a6e69..df7616271681d 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
@@ -89,6 +89,23 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
     assert(executorPVC.getClaimName === s"pvc-spark-${KubernetesTestConf.EXECUTOR_ID}")
   }
 
+  test("Create and mount persistentVolumeClaims in executors") {
+    val volumeConf = KubernetesVolumeSpec(
+      "testVolume",
+      "/tmp",
+      "",
+      true,
+      KubernetesPVCVolumeConf(MountVolumesFeatureStep.PVC_ON_DEMAND)
+    )
+    val executorConf = KubernetesTestConf.createExecutorConf(volumes = Seq(volumeConf))
+    val executorStep = new MountVolumesFeatureStep(executorConf)
+    val executorPod = executorStep.configurePod(SparkPod.initialPod())
+
+    assert(executorPod.pod.getSpec.getVolumes.size() === 1)
+    val executorPVC = executorPod.pod.getSpec.getVolumes.get(0).getPersistentVolumeClaim
+    assert(executorPVC.getClaimName.endsWith("-exec-1-pvc-0"))
+  }
+
   test("Mounts emptyDir") {
     val volumeConf = KubernetesVolumeSpec(
       "testVolume",
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
index a0abded3823bb..e4b36e46594f6 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
@@ -27,7 +27,7 @@ import org.mockito.stubbing.Answer
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
-import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, SparkPod}
+import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, KubernetesExecutorSpec, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.Fabric8Aliases._
@@ -202,9 +202,9 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     assert(!podsAllocatorUnderTest.isDeleted("4"))
   }
 
-  private def executorPodAnswer(): Answer[SparkPod] =
+  private def executorPodAnswer(): Answer[KubernetesExecutorSpec] =
     (invocation: InvocationOnMock) => {
       val k8sConf: KubernetesExecutorConf = invocation.getArgument(0)
-      executorPodWithId(k8sConf.executorId.toInt)
+      KubernetesExecutorSpec(executorPodWithId(k8sConf.executorId.toInt), Seq.empty)
   }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
index bd716174a8271..796e2126e9e3e 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
@@ -32,7 +32,7 @@ class KubernetesExecutorBuilderSuite extends PodBuilderSuite {
     sparkConf.set("spark.driver.host", "https://driver.host.com")
     val conf = KubernetesTestConf.createExecutorConf(sparkConf = sparkConf)
     val secMgr = new SecurityManager(sparkConf)
-    new KubernetesExecutorBuilder().buildFromFeatures(conf, secMgr, client)
+    new KubernetesExecutorBuilder().buildFromFeatures(conf, secMgr, client).pod
   }
 
 }

From b3f0087e39c8ad69cf1e53145d62eb73df48efd5 Mon Sep 17 00:00:00 2001
From: Russell Spitzer <russell.spitzer@gmail.com>
Date: Wed, 23 Sep 2020 20:02:20 -0700
Subject: [PATCH 0098/1009] [SPARK-32977][SQL][DOCS] Fix JavaDoc on Default
 Save Mode

### What changes were proposed in this pull request?

The default is always ErrorsOnExist regardless of DataSource version. Fixing the JavaDoc to reflect this.

### Why are the changes needed?

To fix documentation

### Does this PR introduce _any_ user-facing change?

Doc change.

### How was this patch tested?

Manual.

Closes #29853 from RussellSpitzer/SPARK-32977.

Authored-by: Russell Spitzer <russell.spitzer@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../src/main/scala/org/apache/spark/sql/DataFrameWriter.scala  | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 6fc4dc5aed6e7..da031b1827dd5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -61,8 +61,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    * <li>`SaveMode.ErrorIfExists`: throw an exception at runtime.</li>
    * </ul>
    * <p>
-   * When writing to data source v1, the default option is `ErrorIfExists`. When writing to data
-   * source v2, the default option is `Append`.
+   * The default option is `ErrorIfExists`.
    *
    * @since 1.4.0
    */

From 0bc0e91e4015eb98bd2f4bf17da2ec7135b520a9 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Wed, 23 Sep 2020 20:10:01 -0700
Subject: [PATCH 0099/1009] [SPARK-32971][K8S][FOLLOWUP] Add `.toSeq` for Scala
 2.13 compilation

### What changes were proposed in this pull request?

This is a follow-up to fix Scala 2.13 compilation at Kubernetes module.

### Why are the changes needed?

To fix Scala 2.13 compilation.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the GitHub Action Scala 2.13 compilation job.

Closes #29859 from dongjoon-hyun/SPARK-32971-2.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/deploy/k8s/features/MountVolumesFeatureStep.scala     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
index fe4717d099510..788ddeaf51cba 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
@@ -114,7 +114,7 @@ private[spark] class MountVolumesFeatureStep(conf: KubernetesConf)
   }
 
   override def getAdditionalKubernetesResources(): Seq[HasMetadata] = {
-    additionalResources
+    additionalResources.toSeq
   }
 }
 

From 31a16fbb405a19dc3eb732347e0e1f873b16971d Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Thu, 24 Sep 2020 14:15:36 +0900
Subject: [PATCH 0100/1009] [SPARK-32714][PYTHON] Initial pyspark-stubs port

### What changes were proposed in this pull request?

This PR proposes migration of [`pyspark-stubs`](https://github.com/zero323/pyspark-stubs) into Spark codebase.

### Why are the changes needed?

### Does this PR introduce _any_ user-facing change?

Yes. This PR adds type annotations directly to Spark source.

This can impact interaction with development tools for users, which haven't used `pyspark-stubs`.

### How was this patch tested?

- [x] MyPy tests of the PySpark source
    ```
    mypy --no-incremental --config python/mypy.ini python/pyspark
    ```
- [x] MyPy tests of Spark examples
    ```
   MYPYPATH=python/ mypy --no-incremental --config python/mypy.ini examples/src/main/python/ml examples/src/main/python/sql examples/src/main/python/sql/streaming
    ```
- [x] Existing Flake8 linter

- [x] Existing unit tests

Tested against:

- `mypy==0.790+dev.e959952d9001e9713d329a2f9b196705b028f894`
- `mypy==0.782`

Closes #29591 from zero323/SPARK-32681.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 dev/.rat-excludes                             |    1 +
 dev/tox.ini                                   |    2 +-
 .../ml/estimator_transformer_param_example.py |    8 +-
 .../main/python/ml/fm_classifier_example.py   |    6 +-
 .../main/python/ml/fm_regressor_example.py    |    6 +-
 .../src/main/python/ml/pipeline_example.py    |    8 +-
 examples/src/main/python/sql/arrow.py         |    4 +-
 python/MANIFEST.in                            |    1 +
 python/mypy.ini                               |   36 +
 python/pyspark/__init__.pyi                   |   73 +
 python/pyspark/_globals.pyi                   |   27 +
 python/pyspark/_typing.pyi                    |   33 +
 python/pyspark/accumulators.pyi               |   71 +
 python/pyspark/broadcast.pyi                  |   46 +
 python/pyspark/conf.pyi                       |   44 +
 python/pyspark/context.pyi                    |  176 ++
 python/pyspark/daemon.pyi                     |   29 +
 python/pyspark/files.pyi                      |   24 +
 python/pyspark/find_spark_home.pyi            |   17 +
 python/pyspark/java_gateway.pyi               |   24 +
 python/pyspark/join.pyi                       |   50 +
 python/pyspark/ml/__init__.pyi                |   45 +
 python/pyspark/ml/_typing.pyi                 |   76 +
 python/pyspark/ml/base.pyi                    |  103 ++
 python/pyspark/ml/classification.pyi          |  922 ++++++++++
 python/pyspark/ml/clustering.pyi              |  437 +++++
 python/pyspark/ml/common.pyi                  |   20 +
 python/pyspark/ml/evaluation.pyi              |  281 +++
 python/pyspark/ml/feature.pyi                 | 1629 +++++++++++++++++
 python/pyspark/ml/fpm.pyi                     |  109 ++
 python/pyspark/ml/functions.pyi               |   22 +
 python/pyspark/ml/image.pyi                   |   40 +
 python/pyspark/ml/linalg/__init__.pyi         |  255 +++
 python/pyspark/ml/param/__init__.pyi          |   96 +
 .../ml/param/_shared_params_code_gen.pyi      |   19 +
 python/pyspark/ml/param/shared.pyi            |  187 ++
 python/pyspark/ml/pipeline.pyi                |   97 +
 python/pyspark/ml/recommendation.pyi          |  152 ++
 python/pyspark/ml/regression.pyi              |  825 +++++++++
 python/pyspark/ml/stat.pyi                    |   89 +
 python/pyspark/ml/tests/test_algorithms.py    |    2 +-
 python/pyspark/ml/tests/test_base.py          |    2 +-
 python/pyspark/ml/tests/test_evaluation.py    |    2 +-
 python/pyspark/ml/tests/test_feature.py       |    2 +-
 python/pyspark/ml/tests/test_image.py         |    2 +-
 python/pyspark/ml/tests/test_linalg.py        |    2 +-
 python/pyspark/ml/tests/test_param.py         |    2 +-
 python/pyspark/ml/tests/test_persistence.py   |    2 +-
 python/pyspark/ml/tests/test_pipeline.py      |    2 +-
 python/pyspark/ml/tests/test_stat.py          |    2 +-
 .../pyspark/ml/tests/test_training_summary.py |    2 +-
 python/pyspark/ml/tests/test_tuning.py        |    2 +-
 python/pyspark/ml/tests/test_wrapper.py       |    6 +-
 python/pyspark/ml/tree.pyi                    |  112 ++
 python/pyspark/ml/tuning.pyi                  |  185 ++
 python/pyspark/ml/util.pyi                    |  128 ++
 python/pyspark/ml/wrapper.pyi                 |   48 +
 python/pyspark/mllib/__init__.pyi             |   32 +
 python/pyspark/mllib/_typing.pyi              |   23 +
 python/pyspark/mllib/classification.pyi       |  151 ++
 python/pyspark/mllib/clustering.pyi           |  196 ++
 python/pyspark/mllib/common.pyi               |   27 +
 python/pyspark/mllib/evaluation.pyi           |   94 +
 python/pyspark/mllib/feature.pyi              |  167 ++
 python/pyspark/mllib/fpm.pyi                  |   57 +
 python/pyspark/mllib/linalg/__init__.pyi      |  273 +++
 python/pyspark/mllib/linalg/distributed.pyi   |  147 ++
 python/pyspark/mllib/random.pyi               |  126 ++
 python/pyspark/mllib/recommendation.pyi       |   75 +
 python/pyspark/mllib/regression.pyi           |  155 ++
 python/pyspark/mllib/stat/KernelDensity.pyi   |   27 +
 python/pyspark/mllib/stat/__init__.pyi        |   29 +
 python/pyspark/mllib/stat/_statistics.pyi     |   69 +
 python/pyspark/mllib/stat/distribution.pyi    |   25 +
 python/pyspark/mllib/stat/test.pyi            |   39 +
 python/pyspark/mllib/tests/test_algorithms.py |    2 +-
 python/pyspark/mllib/tests/test_feature.py    |    2 +-
 python/pyspark/mllib/tests/test_linalg.py     |    6 +-
 python/pyspark/mllib/tests/test_stat.py       |    2 +-
 .../mllib/tests/test_streaming_algorithms.py  |    2 +-
 python/pyspark/mllib/tests/test_util.py       |    4 +-
 python/pyspark/mllib/tree.pyi                 |  126 ++
 python/pyspark/mllib/util.pyi                 |   90 +
 python/pyspark/profiler.pyi                   |   56 +
 python/pyspark/py.typed                       |    1 +
 python/pyspark/rdd.pyi                        |  479 +++++
 python/pyspark/rddsampler.pyi                 |   54 +
 python/pyspark/resource/__init__.pyi          |   31 +
 python/pyspark/resource/information.pyi       |   26 +
 python/pyspark/resource/profile.pyi           |   51 +
 python/pyspark/resource/requests.pyi          |   71 +
 .../pyspark/resource/tests/test_resources.py  |    2 +-
 python/pyspark/resultiterable.pyi             |   30 +
 python/pyspark/serializers.pyi                |  122 ++
 python/pyspark/shell.pyi                      |   31 +
 python/pyspark/shuffle.pyi                    |  109 ++
 python/pyspark/sql/__init__.pyi               |   41 +
 python/pyspark/sql/_typing.pyi                |   57 +
 python/pyspark/sql/avro/__init__.pyi          |   22 +
 python/pyspark/sql/avro/functions.pyi         |   27 +
 python/pyspark/sql/catalog.pyi                |   63 +
 python/pyspark/sql/column.pyi                 |  112 ++
 python/pyspark/sql/conf.pyi                   |   27 +
 python/pyspark/sql/context.pyi                |  139 ++
 python/pyspark/sql/dataframe.pyi              |  324 ++++
 python/pyspark/sql/functions.pyi              |  343 ++++
 python/pyspark/sql/group.pyi                  |   44 +
 python/pyspark/sql/pandas/__init__.pyi        |   17 +
 .../pyspark/sql/pandas/_typing/__init__.pyi   |  338 ++++
 .../sql/pandas/_typing/protocols/__init__.pyi |   17 +
 .../sql/pandas/_typing/protocols/frame.pyi    |  428 +++++
 .../sql/pandas/_typing/protocols/series.pyi   |  253 +++
 python/pyspark/sql/pandas/conversion.pyi      |   58 +
 python/pyspark/sql/pandas/functions.pyi       |  176 ++
 python/pyspark/sql/pandas/group_ops.pyi       |   49 +
 python/pyspark/sql/pandas/map_ops.pyi         |   30 +
 python/pyspark/sql/pandas/serializers.pyi     |   65 +
 python/pyspark/sql/pandas/typehints.pyi       |   33 +
 python/pyspark/sql/pandas/types.pyi           |   41 +
 python/pyspark/sql/pandas/utils.pyi           |   20 +
 python/pyspark/sql/readwriter.pyi             |  250 +++
 python/pyspark/sql/session.pyi                |  125 ++
 python/pyspark/sql/streaming.pyi              |  179 ++
 python/pyspark/sql/tests/test_arrow.py        |    6 +-
 python/pyspark/sql/tests/test_catalog.py      |    2 +-
 python/pyspark/sql/tests/test_column.py       |    2 +-
 python/pyspark/sql/tests/test_conf.py         |    2 +-
 python/pyspark/sql/tests/test_context.py      |    2 +-
 python/pyspark/sql/tests/test_dataframe.py    |   22 +-
 python/pyspark/sql/tests/test_datasources.py  |    2 +-
 python/pyspark/sql/tests/test_functions.py    |    2 +-
 python/pyspark/sql/tests/test_group.py        |    2 +-
 .../sql/tests/test_pandas_cogrouped_map.py    |    4 +-
 .../sql/tests/test_pandas_grouped_map.py      |    4 +-
 python/pyspark/sql/tests/test_pandas_map.py   |    4 +-
 python/pyspark/sql/tests/test_pandas_udf.py   |    4 +-
 .../sql/tests/test_pandas_udf_grouped_agg.py  |    4 +-
 .../sql/tests/test_pandas_udf_scalar.py       |    6 +-
 .../sql/tests/test_pandas_udf_typehints.py    |    4 +-
 .../sql/tests/test_pandas_udf_window.py       |    4 +-
 python/pyspark/sql/tests/test_readwriter.py   |    2 +-
 python/pyspark/sql/tests/test_serde.py        |    2 +-
 python/pyspark/sql/tests/test_session.py      |    2 +-
 python/pyspark/sql/tests/test_streaming.py    |    2 +-
 python/pyspark/sql/tests/test_types.py        |    9 +-
 python/pyspark/sql/tests/test_udf.py          |   11 +-
 python/pyspark/sql/tests/test_utils.py        |    2 +-
 python/pyspark/sql/types.pyi                  |  204 +++
 python/pyspark/sql/udf.pyi                    |   57 +
 python/pyspark/sql/utils.pyi                  |   55 +
 python/pyspark/sql/window.pyi                 |   40 +
 python/pyspark/statcounter.pyi                |   44 +
 python/pyspark/status.pyi                     |   42 +
 python/pyspark/storagelevel.pyi               |   43 +
 python/pyspark/streaming/__init__.pyi         |   23 +
 python/pyspark/streaming/context.pyi          |   75 +
 python/pyspark/streaming/dstream.pyi          |  208 +++
 python/pyspark/streaming/kinesis.pyi          |   46 +
 python/pyspark/streaming/listener.pyi         |   35 +
 .../pyspark/streaming/tests/test_context.py   |    2 +-
 .../pyspark/streaming/tests/test_dstream.py   |    2 +-
 .../pyspark/streaming/tests/test_kinesis.py   |    2 +-
 .../pyspark/streaming/tests/test_listener.py  |    2 +-
 python/pyspark/streaming/util.pyi             |   48 +
 python/pyspark/taskcontext.pyi                |   45 +
 python/pyspark/testing/mlutils.py             |    5 +-
 python/pyspark/testing/sqlutils.py            |    2 +-
 python/pyspark/testing/streamingutils.py      |    4 +-
 python/pyspark/tests/test_appsubmit.py        |    2 +-
 python/pyspark/tests/test_broadcast.py        |    2 +-
 python/pyspark/tests/test_conf.py             |    2 +-
 python/pyspark/tests/test_context.py          |   11 +-
 python/pyspark/tests/test_daemon.py           |    2 +-
 python/pyspark/tests/test_join.py             |    2 +-
 python/pyspark/tests/test_pin_thread.py       |    2 +-
 python/pyspark/tests/test_profiler.py         |    2 +-
 python/pyspark/tests/test_rdd.py              |    2 +-
 python/pyspark/tests/test_rddbarrier.py       |    2 +-
 python/pyspark/tests/test_readwrite.py        |    2 +-
 python/pyspark/tests/test_serializers.py      |    4 +-
 python/pyspark/tests/test_shuffle.py          |    2 +-
 python/pyspark/tests/test_taskcontext.py      |    2 +-
 python/pyspark/tests/test_util.py             |    2 +-
 python/pyspark/tests/test_worker.py           |    2 +-
 python/pyspark/traceback_utils.pyi            |   29 +
 python/pyspark/util.pyi                       |   35 +
 python/pyspark/version.pyi                    |   19 +
 python/pyspark/worker.pyi                     |   73 +
 python/setup.py                               |    3 +-
 189 files changed, 14053 insertions(+), 119 deletions(-)
 create mode 100644 python/mypy.ini
 create mode 100644 python/pyspark/__init__.pyi
 create mode 100644 python/pyspark/_globals.pyi
 create mode 100644 python/pyspark/_typing.pyi
 create mode 100644 python/pyspark/accumulators.pyi
 create mode 100644 python/pyspark/broadcast.pyi
 create mode 100644 python/pyspark/conf.pyi
 create mode 100644 python/pyspark/context.pyi
 create mode 100644 python/pyspark/daemon.pyi
 create mode 100644 python/pyspark/files.pyi
 create mode 100644 python/pyspark/find_spark_home.pyi
 create mode 100644 python/pyspark/java_gateway.pyi
 create mode 100644 python/pyspark/join.pyi
 create mode 100644 python/pyspark/ml/__init__.pyi
 create mode 100644 python/pyspark/ml/_typing.pyi
 create mode 100644 python/pyspark/ml/base.pyi
 create mode 100644 python/pyspark/ml/classification.pyi
 create mode 100644 python/pyspark/ml/clustering.pyi
 create mode 100644 python/pyspark/ml/common.pyi
 create mode 100644 python/pyspark/ml/evaluation.pyi
 create mode 100644 python/pyspark/ml/feature.pyi
 create mode 100644 python/pyspark/ml/fpm.pyi
 create mode 100644 python/pyspark/ml/functions.pyi
 create mode 100644 python/pyspark/ml/image.pyi
 create mode 100644 python/pyspark/ml/linalg/__init__.pyi
 create mode 100644 python/pyspark/ml/param/__init__.pyi
 create mode 100644 python/pyspark/ml/param/_shared_params_code_gen.pyi
 create mode 100644 python/pyspark/ml/param/shared.pyi
 create mode 100644 python/pyspark/ml/pipeline.pyi
 create mode 100644 python/pyspark/ml/recommendation.pyi
 create mode 100644 python/pyspark/ml/regression.pyi
 create mode 100644 python/pyspark/ml/stat.pyi
 create mode 100644 python/pyspark/ml/tree.pyi
 create mode 100644 python/pyspark/ml/tuning.pyi
 create mode 100644 python/pyspark/ml/util.pyi
 create mode 100644 python/pyspark/ml/wrapper.pyi
 create mode 100644 python/pyspark/mllib/__init__.pyi
 create mode 100644 python/pyspark/mllib/_typing.pyi
 create mode 100644 python/pyspark/mllib/classification.pyi
 create mode 100644 python/pyspark/mllib/clustering.pyi
 create mode 100644 python/pyspark/mllib/common.pyi
 create mode 100644 python/pyspark/mllib/evaluation.pyi
 create mode 100644 python/pyspark/mllib/feature.pyi
 create mode 100644 python/pyspark/mllib/fpm.pyi
 create mode 100644 python/pyspark/mllib/linalg/__init__.pyi
 create mode 100644 python/pyspark/mllib/linalg/distributed.pyi
 create mode 100644 python/pyspark/mllib/random.pyi
 create mode 100644 python/pyspark/mllib/recommendation.pyi
 create mode 100644 python/pyspark/mllib/regression.pyi
 create mode 100644 python/pyspark/mllib/stat/KernelDensity.pyi
 create mode 100644 python/pyspark/mllib/stat/__init__.pyi
 create mode 100644 python/pyspark/mllib/stat/_statistics.pyi
 create mode 100644 python/pyspark/mllib/stat/distribution.pyi
 create mode 100644 python/pyspark/mllib/stat/test.pyi
 create mode 100644 python/pyspark/mllib/tree.pyi
 create mode 100644 python/pyspark/mllib/util.pyi
 create mode 100644 python/pyspark/profiler.pyi
 create mode 100644 python/pyspark/py.typed
 create mode 100644 python/pyspark/rdd.pyi
 create mode 100644 python/pyspark/rddsampler.pyi
 create mode 100644 python/pyspark/resource/__init__.pyi
 create mode 100644 python/pyspark/resource/information.pyi
 create mode 100644 python/pyspark/resource/profile.pyi
 create mode 100644 python/pyspark/resource/requests.pyi
 create mode 100644 python/pyspark/resultiterable.pyi
 create mode 100644 python/pyspark/serializers.pyi
 create mode 100644 python/pyspark/shell.pyi
 create mode 100644 python/pyspark/shuffle.pyi
 create mode 100644 python/pyspark/sql/__init__.pyi
 create mode 100644 python/pyspark/sql/_typing.pyi
 create mode 100644 python/pyspark/sql/avro/__init__.pyi
 create mode 100644 python/pyspark/sql/avro/functions.pyi
 create mode 100644 python/pyspark/sql/catalog.pyi
 create mode 100644 python/pyspark/sql/column.pyi
 create mode 100644 python/pyspark/sql/conf.pyi
 create mode 100644 python/pyspark/sql/context.pyi
 create mode 100644 python/pyspark/sql/dataframe.pyi
 create mode 100644 python/pyspark/sql/functions.pyi
 create mode 100644 python/pyspark/sql/group.pyi
 create mode 100644 python/pyspark/sql/pandas/__init__.pyi
 create mode 100644 python/pyspark/sql/pandas/_typing/__init__.pyi
 create mode 100644 python/pyspark/sql/pandas/_typing/protocols/__init__.pyi
 create mode 100644 python/pyspark/sql/pandas/_typing/protocols/frame.pyi
 create mode 100644 python/pyspark/sql/pandas/_typing/protocols/series.pyi
 create mode 100644 python/pyspark/sql/pandas/conversion.pyi
 create mode 100644 python/pyspark/sql/pandas/functions.pyi
 create mode 100644 python/pyspark/sql/pandas/group_ops.pyi
 create mode 100644 python/pyspark/sql/pandas/map_ops.pyi
 create mode 100644 python/pyspark/sql/pandas/serializers.pyi
 create mode 100644 python/pyspark/sql/pandas/typehints.pyi
 create mode 100644 python/pyspark/sql/pandas/types.pyi
 create mode 100644 python/pyspark/sql/pandas/utils.pyi
 create mode 100644 python/pyspark/sql/readwriter.pyi
 create mode 100644 python/pyspark/sql/session.pyi
 create mode 100644 python/pyspark/sql/streaming.pyi
 create mode 100644 python/pyspark/sql/types.pyi
 create mode 100644 python/pyspark/sql/udf.pyi
 create mode 100644 python/pyspark/sql/utils.pyi
 create mode 100644 python/pyspark/sql/window.pyi
 create mode 100644 python/pyspark/statcounter.pyi
 create mode 100644 python/pyspark/status.pyi
 create mode 100644 python/pyspark/storagelevel.pyi
 create mode 100644 python/pyspark/streaming/__init__.pyi
 create mode 100644 python/pyspark/streaming/context.pyi
 create mode 100644 python/pyspark/streaming/dstream.pyi
 create mode 100644 python/pyspark/streaming/kinesis.pyi
 create mode 100644 python/pyspark/streaming/listener.pyi
 create mode 100644 python/pyspark/streaming/util.pyi
 create mode 100644 python/pyspark/taskcontext.pyi
 create mode 100644 python/pyspark/traceback_utils.pyi
 create mode 100644 python/pyspark/util.pyi
 create mode 100644 python/pyspark/version.pyi
 create mode 100644 python/pyspark/worker.pyi

diff --git a/dev/.rat-excludes b/dev/.rat-excludes
index df1dd51a7c519..98786437f7b1c 100644
--- a/dev/.rat-excludes
+++ b/dev/.rat-excludes
@@ -124,3 +124,4 @@ GangliaReporter.java
 application_1578436911597_0052
 config.properties
 app-20200706201101-0003
+py.typed
diff --git a/dev/tox.ini b/dev/tox.ini
index c14e6b9446cca..7edf7d597fb58 100644
--- a/dev/tox.ini
+++ b/dev/tox.ini
@@ -20,5 +20,5 @@ exclude=python/pyspark/cloudpickle/*.py,shared.py,python/docs/source/conf.py,wor
 
 [flake8]
 select = E901,E999,F821,F822,F823,F401,F405
-exclude = python/pyspark/cloudpickle/*.py,shared.py,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/*
+exclude = python/pyspark/cloudpickle/*.py,shared.py*,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/*,python/out,python/pyspark/sql/pandas/functions.pyi,python/pyspark/sql/column.pyi,python/pyspark/worker.pyi,python/pyspark/java_gateway.pyi
 max-line-length = 100
diff --git a/examples/src/main/python/ml/estimator_transformer_param_example.py b/examples/src/main/python/ml/estimator_transformer_param_example.py
index 1dcca6c201119..2cf9432646b5e 100644
--- a/examples/src/main/python/ml/estimator_transformer_param_example.py
+++ b/examples/src/main/python/ml/estimator_transformer_param_example.py
@@ -56,12 +56,14 @@
     # We may alternatively specify parameters using a Python dictionary as a paramMap
     paramMap = {lr.maxIter: 20}
     paramMap[lr.maxIter] = 30  # Specify 1 Param, overwriting the original maxIter.
-    paramMap.update({lr.regParam: 0.1, lr.threshold: 0.55})  # Specify multiple Params.
+    # Specify multiple Params.
+    paramMap.update({lr.regParam: 0.1, lr.threshold: 0.55})  # type: ignore
 
     # You can combine paramMaps, which are python dictionaries.
-    paramMap2 = {lr.probabilityCol: "myProbability"}  # Change output column name
+    # Change output column name
+    paramMap2 = {lr.probabilityCol: "myProbability"}  # type: ignore
     paramMapCombined = paramMap.copy()
-    paramMapCombined.update(paramMap2)
+    paramMapCombined.update(paramMap2)  # type: ignore
 
     # Now learn a new model using the paramMapCombined parameters.
     # paramMapCombined overrides all parameters set earlier via lr.set* methods.
diff --git a/examples/src/main/python/ml/fm_classifier_example.py b/examples/src/main/python/ml/fm_classifier_example.py
index b47bdc5275beb..da49e5fc2baa9 100644
--- a/examples/src/main/python/ml/fm_classifier_example.py
+++ b/examples/src/main/python/ml/fm_classifier_example.py
@@ -67,9 +67,9 @@
     print("Test set accuracy = %g" % accuracy)
 
     fmModel = model.stages[2]
-    print("Factors: " + str(fmModel.factors))
-    print("Linear: " + str(fmModel.linear))
-    print("Intercept: " + str(fmModel.intercept))
+    print("Factors: " + str(fmModel.factors))  # type: ignore
+    print("Linear: " + str(fmModel.linear))  # type: ignore
+    print("Intercept: " + str(fmModel.intercept))  # type: ignore
     # $example off$
 
     spark.stop()
diff --git a/examples/src/main/python/ml/fm_regressor_example.py b/examples/src/main/python/ml/fm_regressor_example.py
index 5c8133996ae83..47544b6324203 100644
--- a/examples/src/main/python/ml/fm_regressor_example.py
+++ b/examples/src/main/python/ml/fm_regressor_example.py
@@ -64,9 +64,9 @@
     print("Root Mean Squared Error (RMSE) on test data = %g" % rmse)
 
     fmModel = model.stages[1]
-    print("Factors: " + str(fmModel.factors))
-    print("Linear: " + str(fmModel.linear))
-    print("Intercept: " + str(fmModel.intercept))
+    print("Factors: " + str(fmModel.factors))  # type: ignore
+    print("Linear: " + str(fmModel.linear))  # type: ignore
+    print("Intercept: " + str(fmModel.intercept))  # type: ignore
     # $example off$
 
     spark.stop()
diff --git a/examples/src/main/python/ml/pipeline_example.py b/examples/src/main/python/ml/pipeline_example.py
index e1fab7cbe6d80..66fdd73632a70 100644
--- a/examples/src/main/python/ml/pipeline_example.py
+++ b/examples/src/main/python/ml/pipeline_example.py
@@ -62,8 +62,12 @@
     prediction = model.transform(test)
     selected = prediction.select("id", "text", "probability", "prediction")
     for row in selected.collect():
-        rid, text, prob, prediction = row
-        print("(%d, %s) --> prob=%s, prediction=%f" % (rid, text, str(prob), prediction))
+        rid, text, prob, prediction = row  # type: ignore
+        print(
+            "(%d, %s) --> prob=%s, prediction=%f" % (
+                rid, text, str(prob), prediction   # type: ignore
+            )
+        )
     # $example off$
 
     spark.stop()
diff --git a/examples/src/main/python/sql/arrow.py b/examples/src/main/python/sql/arrow.py
index 1789a54f0276e..9978e8601449a 100644
--- a/examples/src/main/python/sql/arrow.py
+++ b/examples/src/main/python/sql/arrow.py
@@ -32,8 +32,8 @@
 
 
 def dataframe_with_arrow_example(spark):
-    import numpy as np
-    import pandas as pd
+    import numpy as np  # type: ignore[import]
+    import pandas as pd  # type: ignore[import]
 
     # Enable Arrow-based columnar data transfers
     spark.conf.set("spark.sql.execution.arrow.pyspark.enabled", "true")
diff --git a/python/MANIFEST.in b/python/MANIFEST.in
index 2d78a001a4d98..862d62b1d3b29 100644
--- a/python/MANIFEST.in
+++ b/python/MANIFEST.in
@@ -22,4 +22,5 @@ recursive-include deps/data *.data *.txt
 recursive-include deps/licenses *.txt
 recursive-include deps/examples *.py
 recursive-include lib *.zip
+recursive-include pyspark *.pyi py.typed
 include README.md
diff --git a/python/mypy.ini b/python/mypy.ini
new file mode 100644
index 0000000000000..a9523e622ca0d
--- /dev/null
+++ b/python/mypy.ini
@@ -0,0 +1,36 @@
+;
+; Licensed to the Apache Software Foundation (ASF) under one or more
+; contributor license agreements.  See the NOTICE file distributed with
+; this work for additional information regarding copyright ownership.
+; The ASF licenses this file to You under the Apache License, Version 2.0
+; (the "License"); you may not use this file except in compliance with
+; the License.  You may obtain a copy of the License at
+;
+;    http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+;
+
+[mypy]
+
+[mypy-pyspark.cloudpickle.*]
+ignore_errors = True
+
+[mypy-py4j.*]
+ignore_missing_imports = True
+
+[mypy-numpy]
+ignore_missing_imports = True
+
+[mypy-scipy.*]
+ignore_missing_imports = True
+
+[mypy-pandas.*]
+ignore_missing_imports = True
+
+[mypy-pyarrow]
+ignore_missing_imports = True
diff --git a/python/pyspark/__init__.pyi b/python/pyspark/__init__.pyi
new file mode 100644
index 0000000000000..98bd40684c01b
--- /dev/null
+++ b/python/pyspark/__init__.pyi
@@ -0,0 +1,73 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Callable, Optional, TypeVar
+
+from pyspark.accumulators import (  # noqa: F401
+    Accumulator as Accumulator,
+    AccumulatorParam as AccumulatorParam,
+)
+from pyspark.broadcast import Broadcast as Broadcast  # noqa: F401
+from pyspark.conf import SparkConf as SparkConf  # noqa: F401
+from pyspark.context import SparkContext as SparkContext  # noqa: F401
+from pyspark.files import SparkFiles as SparkFiles  # noqa: F401
+from pyspark.status import (
+    StatusTracker as StatusTracker,
+    SparkJobInfo as SparkJobInfo,
+    SparkStageInfo as SparkStageInfo,
+)  # noqa: F401
+from pyspark.profiler import (  # noqa: F401
+    BasicProfiler as BasicProfiler,
+    Profiler as Profiler,
+)
+from pyspark.rdd import RDD as RDD, RDDBarrier as RDDBarrier  # noqa: F401
+from pyspark.serializers import (  # noqa: F401
+    MarshalSerializer as MarshalSerializer,
+    PickleSerializer as PickleSerializer,
+)
+from pyspark.status import (  # noqa: F401
+    SparkJobInfo as SparkJobInfo,
+    SparkStageInfo as SparkStageInfo,
+    StatusTracker as StatusTracker,
+)
+from pyspark.storagelevel import StorageLevel as StorageLevel  # noqa: F401
+from pyspark.taskcontext import (  # noqa: F401
+    BarrierTaskContext as BarrierTaskContext,
+    BarrierTaskInfo as BarrierTaskInfo,
+    TaskContext as TaskContext,
+)
+from pyspark.util import InheritableThread as InheritableThread  # noqa: F401
+
+# Compatiblity imports
+from pyspark.sql import (  # noqa: F401
+    SQLContext as SQLContext,
+    HiveContext as HiveContext,
+    Row as Row,
+)
+
+T = TypeVar("T")
+F = TypeVar("F", bound=Callable)
+
+def since(version: str) -> Callable[[T], T]: ...
+def copy_func(
+    f: F,
+    name: Optional[str] = ...,
+    sinceversion: Optional[str] = ...,
+    doc: Optional[str] = ...,
+) -> F: ...
+def keyword_only(func: F) -> F: ...
diff --git a/python/pyspark/_globals.pyi b/python/pyspark/_globals.pyi
new file mode 100644
index 0000000000000..9453775621196
--- /dev/null
+++ b/python/pyspark/_globals.pyi
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: This dynamically typed stub was automatically generated by stubgen.
+
+from typing import Any
+
+__ALL__: Any
+
+class _NoValueType:
+    def __new__(cls): ...
+    def __reduce__(self): ...
diff --git a/python/pyspark/_typing.pyi b/python/pyspark/_typing.pyi
new file mode 100644
index 0000000000000..637e4cb4fbccc
--- /dev/null
+++ b/python/pyspark/_typing.pyi
@@ -0,0 +1,33 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Callable, Iterable, Sized, TypeVar, Union
+from typing_extensions import Protocol
+
+F = TypeVar("F", bound=Callable)
+T = TypeVar("T", covariant=True)
+
+PrimitiveType = Union[bool, float, int, str]
+
+class SupportsIAdd(Protocol):
+    def __iadd__(self, other: SupportsIAdd) -> SupportsIAdd: ...
+
+class SupportsOrdering(Protocol):
+    def __le__(self, other: SupportsOrdering) -> bool: ...
+
+class SizedIterable(Protocol, Sized, Iterable[T]): ...
diff --git a/python/pyspark/accumulators.pyi b/python/pyspark/accumulators.pyi
new file mode 100644
index 0000000000000..94f8023d1102b
--- /dev/null
+++ b/python/pyspark/accumulators.pyi
@@ -0,0 +1,71 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Callable, Generic, Tuple, Type, TypeVar
+
+import socketserver.BaseRequestHandler  # type: ignore
+
+from pyspark._typing import SupportsIAdd
+
+T = TypeVar("T")
+U = TypeVar("U", bound=SupportsIAdd)
+
+import socketserver as SocketServer
+
+class Accumulator(Generic[T]):
+    aid: int
+    accum_param: AccumulatorParam[T]
+    def __init__(
+        self, aid: int, value: T, accum_param: AccumulatorParam[T]
+    ) -> None: ...
+    def __reduce__(
+        self,
+    ) -> Tuple[
+        Callable[[int, int, AccumulatorParam[T]], Accumulator[T]],
+        Tuple[int, int, AccumulatorParam[T]],
+    ]: ...
+    @property
+    def value(self) -> T: ...
+    @value.setter
+    def value(self, value: T) -> None: ...
+    def add(self, term: T) -> None: ...
+    def __iadd__(self, term: T) -> Accumulator[T]: ...
+
+class AccumulatorParam(Generic[T]):
+    def zero(self, value: T) -> T: ...
+    def addInPlace(self, value1: T, value2: T) -> T: ...
+
+class AddingAccumulatorParam(AccumulatorParam[U]):
+    zero_value: U
+    def __init__(self, zero_value: U) -> None: ...
+    def zero(self, value: U) -> U: ...
+    def addInPlace(self, value1: U, value2: U) -> U: ...
+
+class _UpdateRequestHandler(SocketServer.StreamRequestHandler):
+    def handle(self) -> None: ...
+
+class AccumulatorServer(SocketServer.TCPServer):
+    auth_token: str
+    def __init__(
+        self,
+        server_address: Tuple[str, int],
+        RequestHandlerClass: Type[socketserver.BaseRequestHandler],
+        auth_token: str,
+    ) -> None: ...
+    server_shutdown: bool
+    def shutdown(self) -> None: ...
diff --git a/python/pyspark/broadcast.pyi b/python/pyspark/broadcast.pyi
new file mode 100644
index 0000000000000..c2ea3c6f7d8b4
--- /dev/null
+++ b/python/pyspark/broadcast.pyi
@@ -0,0 +1,46 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import threading
+from typing import Any, Generic, Optional, TypeVar
+
+T = TypeVar("T")
+
+class Broadcast(Generic[T]):
+    def __init__(
+        self,
+        sc: Optional[Any] = ...,
+        value: Optional[T] = ...,
+        pickle_registry: Optional[Any] = ...,
+        path: Optional[Any] = ...,
+        sock_file: Optional[Any] = ...,
+    ) -> None: ...
+    def dump(self, value: Any, f: Any) -> None: ...
+    def load_from_path(self, path: Any): ...
+    def load(self, file: Any): ...
+    @property
+    def value(self) -> T: ...
+    def unpersist(self, blocking: bool = ...) -> None: ...
+    def destroy(self, blocking: bool = ...) -> None: ...
+    def __reduce__(self): ...
+
+class BroadcastPickleRegistry(threading.local):
+    def __init__(self) -> None: ...
+    def __iter__(self) -> None: ...
+    def add(self, bcast: Any) -> None: ...
+    def clear(self) -> None: ...
diff --git a/python/pyspark/conf.pyi b/python/pyspark/conf.pyi
new file mode 100644
index 0000000000000..f7ca61dea9cd2
--- /dev/null
+++ b/python/pyspark/conf.pyi
@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import overload
+from typing import List, Optional, Tuple
+
+from py4j.java_gateway import JVMView, JavaObject  # type: ignore[import]
+
+class SparkConf:
+    def __init__(
+        self,
+        loadDefaults: bool = ...,
+        _jvm: Optional[JVMView] = ...,
+        _jconf: Optional[JavaObject] = ...,
+    ) -> None: ...
+    def set(self, key: str, value: str) -> SparkConf: ...
+    def setIfMissing(self, key: str, value: str) -> SparkConf: ...
+    def setMaster(self, value: str) -> SparkConf: ...
+    def setAppName(self, value: str) -> SparkConf: ...
+    def setSparkHome(self, value: str) -> SparkConf: ...
+    @overload
+    def setExecutorEnv(self, key: str, value: str) -> SparkConf: ...
+    @overload
+    def setExecutorEnv(self, *, pairs: List[Tuple[str, str]]) -> SparkConf: ...
+    def setAll(self, pairs: List[Tuple[str, str]]) -> SparkConf: ...
+    def get(self, key: str, defaultValue: Optional[str] = ...) -> str: ...
+    def getAll(self) -> List[Tuple[str, str]]: ...
+    def contains(self, key: str) -> bool: ...
+    def toDebugString(self) -> str: ...
diff --git a/python/pyspark/context.pyi b/python/pyspark/context.pyi
new file mode 100644
index 0000000000000..76ecf8911471a
--- /dev/null
+++ b/python/pyspark/context.pyi
@@ -0,0 +1,176 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, TypeVar
+
+from py4j.java_gateway import JavaGateway, JavaObject  # type: ignore[import]
+
+from pyspark.accumulators import Accumulator, AccumulatorParam
+from pyspark.broadcast import Broadcast
+from pyspark.conf import SparkConf
+from pyspark.profiler import Profiler  # noqa: F401
+from pyspark.resource.information import ResourceInformation
+from pyspark.rdd import RDD
+from pyspark.serializers import Serializer
+from pyspark.status import StatusTracker
+
+T = TypeVar("T")
+U = TypeVar("U")
+
+class SparkContext:
+    master: str
+    appName: str
+    sparkHome: str
+    PACKAGE_EXTENSIONS: Iterable[str]
+    def __init__(
+        self,
+        master: Optional[str] = ...,
+        appName: Optional[str] = ...,
+        sparkHome: Optional[str] = ...,
+        pyFiles: Optional[List[str]] = ...,
+        environment: Optional[Dict[str, str]] = ...,
+        batchSize: int = ...,
+        serializer: Serializer = ...,
+        conf: Optional[SparkConf] = ...,
+        gateway: Optional[JavaGateway] = ...,
+        jsc: Optional[JavaObject] = ...,
+        profiler_cls: type = ...,
+    ) -> None: ...
+    def __getnewargs__(self): ...
+    def __enter__(self): ...
+    def __exit__(self, type, value, trace): ...
+    @classmethod
+    def getOrCreate(cls, conf: Optional[SparkConf] = ...) -> SparkContext: ...
+    def setLogLevel(self, logLevel: str) -> None: ...
+    @classmethod
+    def setSystemProperty(cls, key: str, value: str) -> None: ...
+    @property
+    def version(self) -> str: ...
+    @property
+    def applicationId(self) -> str: ...
+    @property
+    def uiWebUrl(self) -> str: ...
+    @property
+    def startTime(self) -> int: ...
+    @property
+    def defaultParallelism(self) -> int: ...
+    @property
+    def defaultMinPartitions(self) -> int: ...
+    def stop(self) -> None: ...
+    def emptyRDD(self) -> RDD[Any]: ...
+    def range(
+        self,
+        start: int,
+        end: Optional[int] = ...,
+        step: int = ...,
+        numSlices: Optional[int] = ...,
+    ) -> RDD[int]: ...
+    def parallelize(self, c: Iterable[T], numSlices: Optional[int] = ...) -> RDD[T]: ...
+    def pickleFile(self, name: str, minPartitions: Optional[int] = ...) -> RDD[Any]: ...
+    def textFile(
+        self, name: str, minPartitions: Optional[int] = ..., use_unicode: bool = ...
+    ) -> RDD[str]: ...
+    def wholeTextFiles(
+        self, path: str, minPartitions: Optional[int] = ..., use_unicode: bool = ...
+    ) -> RDD[Tuple[str, str]]: ...
+    def binaryFiles(
+        self, path: str, minPartitions: Optional[int] = ...
+    ) -> RDD[Tuple[str, bytes]]: ...
+    def binaryRecords(self, path: str, recordLength: int) -> RDD[bytes]: ...
+    def sequenceFile(
+        self,
+        path: str,
+        keyClass: Optional[str] = ...,
+        valueClass: Optional[str] = ...,
+        keyConverter: Optional[str] = ...,
+        valueConverter: Optional[str] = ...,
+        minSplits: Optional[int] = ...,
+        batchSize: int = ...,
+    ) -> RDD[Tuple[T, U]]: ...
+    def newAPIHadoopFile(
+        self,
+        path: str,
+        inputFormatClass: str,
+        keyClass: str,
+        valueClass: str,
+        keyConverter: Optional[str] = ...,
+        valueConverter: Optional[str] = ...,
+        conf: Optional[Dict[str, str]] = ...,
+        batchSize: int = ...,
+    ) -> RDD[Tuple[T, U]]: ...
+    def newAPIHadoopRDD(
+        self,
+        inputFormatClass: str,
+        keyClass: str,
+        valueClass: str,
+        keyConverter: Optional[str] = ...,
+        valueConverter: Optional[str] = ...,
+        conf: Optional[Dict[str, str]] = ...,
+        batchSize: int = ...,
+    ) -> RDD[Tuple[T, U]]: ...
+    def hadoopFile(
+        self,
+        path: str,
+        inputFormatClass: str,
+        keyClass: str,
+        valueClass: str,
+        keyConverter: Optional[str] = ...,
+        valueConverter: Optional[str] = ...,
+        conf: Optional[Dict[str, str]] = ...,
+        batchSize: int = ...,
+    ) -> RDD[Tuple[T, U]]: ...
+    def hadoopRDD(
+        self,
+        inputFormatClass: str,
+        keyClass: str,
+        valueClass: str,
+        keyConverter: Optional[str] = ...,
+        valueConverter: Optional[str] = ...,
+        conf: Optional[Dict[str, str]] = ...,
+        batchSize: int = ...,
+    ) -> RDD[Tuple[T, U]]: ...
+    def union(self, rdds: Iterable[RDD[T]]) -> RDD[T]: ...
+    def broadcast(self, value: T) -> Broadcast[T]: ...
+    def accumulator(
+        self, value: T, accum_param: Optional[AccumulatorParam[T]] = ...
+    ) -> Accumulator[T]: ...
+    def addFile(self, path: str, recursive: bool = ...) -> None: ...
+    def addPyFile(self, path: str) -> None: ...
+    def setCheckpointDir(self, dirName: str) -> None: ...
+    def setJobGroup(
+        self, groupId: str, description: str, interruptOnCancel: bool = ...
+    ) -> None: ...
+    def setLocalProperty(self, key: str, value: str) -> None: ...
+    def getLocalProperty(self, key: str) -> Optional[str]: ...
+    def sparkUser(self) -> str: ...
+    def setJobDescription(self, value: str) -> None: ...
+    def cancelJobGroup(self, groupId: str) -> None: ...
+    def cancelAllJobs(self) -> None: ...
+    def statusTracker(self) -> StatusTracker: ...
+    def runJob(
+        self,
+        rdd: RDD[T],
+        partitionFunc: Callable[[Iterable[T]], Iterable[U]],
+        partitions: Optional[List[int]] = ...,
+        allowLocal: bool = ...,
+    ) -> List[U]: ...
+    def show_profiles(self) -> None: ...
+    def dump_profiles(self, path: str) -> None: ...
+    def getConf(self) -> SparkConf: ...
+    @property
+    def resources(self) -> Dict[str, ResourceInformation]: ...
diff --git a/python/pyspark/daemon.pyi b/python/pyspark/daemon.pyi
new file mode 100644
index 0000000000000..dfacf30a9f8a7
--- /dev/null
+++ b/python/pyspark/daemon.pyi
@@ -0,0 +1,29 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyspark.serializers import (  # noqa: F401
+    UTF8Deserializer as UTF8Deserializer,
+    read_int as read_int,
+    write_int as write_int,
+    write_with_length as write_with_length,
+)
+from typing import Any
+
+def compute_real_exit_code(exit_code: Any): ...
+def worker(sock: Any, authenticated: Any): ...
+def manager() -> None: ...
diff --git a/python/pyspark/files.pyi b/python/pyspark/files.pyi
new file mode 100644
index 0000000000000..9e7cad17ebbdb
--- /dev/null
+++ b/python/pyspark/files.pyi
@@ -0,0 +1,24 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+class SparkFiles:
+    def __init__(self) -> None: ...
+    @classmethod
+    def get(cls, filename: str) -> str: ...
+    @classmethod
+    def getRootDirectory(cls) -> str: ...
diff --git a/python/pyspark/find_spark_home.pyi b/python/pyspark/find_spark_home.pyi
new file mode 100644
index 0000000000000..217e5db960782
--- /dev/null
+++ b/python/pyspark/find_spark_home.pyi
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/python/pyspark/java_gateway.pyi b/python/pyspark/java_gateway.pyi
new file mode 100644
index 0000000000000..5b45206dc045c
--- /dev/null
+++ b/python/pyspark/java_gateway.pyi
@@ -0,0 +1,24 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyspark.serializers import UTF8Deserializer as UTF8Deserializer, read_int as read_int, write_with_length as write_with_length  # type: ignore[attr-defined]
+from typing import Any, Optional
+
+def launch_gateway(conf: Optional[Any] = ..., popen_kwargs: Optional[Any] = ...): ...
+def local_connect_and_auth(port: Any, auth_secret: Any): ...
+def ensure_callback_server_started(gw: Any) -> None: ...
diff --git a/python/pyspark/join.pyi b/python/pyspark/join.pyi
new file mode 100644
index 0000000000000..e89e0fbbcda9b
--- /dev/null
+++ b/python/pyspark/join.pyi
@@ -0,0 +1,50 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Hashable, Iterable, Optional, Tuple, TypeVar
+
+from pyspark.resultiterable import ResultIterable
+import pyspark.rdd
+
+K = TypeVar("K", bound=Hashable)
+V = TypeVar("V")
+U = TypeVar("U")
+
+def python_join(
+    rdd: pyspark.rdd.RDD[Tuple[K, V]],
+    other: pyspark.rdd.RDD[Tuple[K, U]],
+    numPartitions: int,
+) -> pyspark.rdd.RDD[Tuple[K, Tuple[V, U]]]: ...
+def python_right_outer_join(
+    rdd: pyspark.rdd.RDD[Tuple[K, V]],
+    other: pyspark.rdd.RDD[Tuple[K, U]],
+    numPartitions: int,
+) -> pyspark.rdd.RDD[Tuple[K, Tuple[V, Optional[U]]]]: ...
+def python_left_outer_join(
+    rdd: pyspark.rdd.RDD[Tuple[K, V]],
+    other: pyspark.rdd.RDD[Tuple[K, U]],
+    numPartitions: int,
+) -> pyspark.rdd.RDD[Tuple[K, Tuple[Optional[V], U]]]: ...
+def python_full_outer_join(
+    rdd: pyspark.rdd.RDD[Tuple[K, V]],
+    other: pyspark.rdd.RDD[Tuple[K, U]],
+    numPartitions: int,
+) -> pyspark.rdd.RDD[Tuple[K, Tuple[Optional[V], Optional[U]]]]: ...
+def python_cogroup(
+    rdds: Iterable[pyspark.rdd.RDD[Tuple[K, V]]], numPartitions: int
+) -> pyspark.rdd.RDD[Tuple[K, Tuple[ResultIterable[V], ...]]]: ...
diff --git a/python/pyspark/ml/__init__.pyi b/python/pyspark/ml/__init__.pyi
new file mode 100644
index 0000000000000..8e3b8a5daeb08
--- /dev/null
+++ b/python/pyspark/ml/__init__.pyi
@@ -0,0 +1,45 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyspark.ml import (  # noqa: F401
+    classification as classification,
+    clustering as clustering,
+    evaluation as evaluation,
+    feature as feature,
+    fpm as fpm,
+    image as image,
+    linalg as linalg,
+    param as param,
+    recommendation as recommendation,
+    regression as regression,
+    stat as stat,
+    tuning as tuning,
+    util as util,
+)
+from pyspark.ml.base import (  # noqa: F401
+    Estimator as Estimator,
+    Model as Model,
+    PredictionModel as PredictionModel,
+    Predictor as Predictor,
+    Transformer as Transformer,
+    UnaryTransformer as UnaryTransformer,
+)
+from pyspark.ml.pipeline import (  # noqa: F401
+    Pipeline as Pipeline,
+    PipelineModel as PipelineModel,
+)
diff --git a/python/pyspark/ml/_typing.pyi b/python/pyspark/ml/_typing.pyi
new file mode 100644
index 0000000000000..d966a787c0fca
--- /dev/null
+++ b/python/pyspark/ml/_typing.pyi
@@ -0,0 +1,76 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any, Dict, TypeVar, Union
+from typing_extensions import Literal
+
+import pyspark.ml.base
+import pyspark.ml.param
+import pyspark.ml.util
+import pyspark.ml.wrapper
+
+ParamMap = Dict[pyspark.ml.param.Param, Any]
+PipelineStage = Union[pyspark.ml.base.Estimator, pyspark.ml.base.Transformer]
+
+T = TypeVar("T")
+P = TypeVar("P", bound=pyspark.ml.param.Params)
+M = TypeVar("M", bound=pyspark.ml.base.Transformer)
+JM = TypeVar("JM", bound=pyspark.ml.wrapper.JavaTransformer)
+
+BinaryClassificationEvaluatorMetricType = Union[
+    Literal["areaUnderROC"], Literal["areaUnderPR"]
+]
+RegressionEvaluatorMetricType = Union[
+    Literal["rmse"], Literal["mse"], Literal["r2"], Literal["mae"], Literal["var"]
+]
+MulticlassClassificationEvaluatorMetricType = Union[
+    Literal["f1"],
+    Literal["accuracy"],
+    Literal["weightedPrecision"],
+    Literal["weightedRecall"],
+    Literal["weightedTruePositiveRate"],
+    Literal["weightedFalsePositiveRate"],
+    Literal["weightedFMeasure"],
+    Literal["truePositiveRateByLabel"],
+    Literal["falsePositiveRateByLabel"],
+    Literal["precisionByLabel"],
+    Literal["recallByLabel"],
+    Literal["fMeasureByLabel"],
+]
+MultilabelClassificationEvaluatorMetricType = Union[
+    Literal["subsetAccuracy"],
+    Literal["accuracy"],
+    Literal["hammingLoss"],
+    Literal["precision"],
+    Literal["recall"],
+    Literal["f1Measure"],
+    Literal["precisionByLabel"],
+    Literal["recallByLabel"],
+    Literal["f1MeasureByLabel"],
+    Literal["microPrecision"],
+    Literal["microRecall"],
+    Literal["microF1Measure"],
+]
+ClusteringEvaluatorMetricType = Union[Literal["silhouette"]]
+RankingEvaluatorMetricType = Union[
+    Literal["meanAveragePrecision"],
+    Literal["meanAveragePrecisionAtK"],
+    Literal["precisionAtK"],
+    Literal["ndcgAtK"],
+    Literal["recallAtK"],
+]
diff --git a/python/pyspark/ml/base.pyi b/python/pyspark/ml/base.pyi
new file mode 100644
index 0000000000000..7fd8c3b70b672
--- /dev/null
+++ b/python/pyspark/ml/base.pyi
@@ -0,0 +1,103 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import overload
+from typing import (
+    Callable,
+    Generic,
+    Iterable,
+    List,
+    Optional,
+    Tuple,
+)
+from pyspark.ml._typing import M, P, T, ParamMap
+
+import _thread
+
+import abc
+from abc import abstractmethod
+from pyspark import since as since  # noqa: F401
+from pyspark.ml.common import inherit_doc as inherit_doc  # noqa: F401
+from pyspark.ml.param.shared import (
+    HasFeaturesCol as HasFeaturesCol,
+    HasInputCol as HasInputCol,
+    HasLabelCol as HasLabelCol,
+    HasOutputCol as HasOutputCol,
+    HasPredictionCol as HasPredictionCol,
+    Params as Params,
+)
+from pyspark.sql.functions import udf as udf  # noqa: F401
+from pyspark.sql.types import (  # noqa: F401
+    DataType,
+    StructField as StructField,
+    StructType as StructType,
+)
+
+from pyspark.sql.dataframe import DataFrame
+
+class _FitMultipleIterator:
+    fitSingleModel: Callable[[int], Transformer]
+    numModel: int
+    counter: int = ...
+    lock: _thread.LockType
+    def __init__(
+        self, fitSingleModel: Callable[[int], Transformer], numModels: int
+    ) -> None: ...
+    def __iter__(self) -> _FitMultipleIterator: ...
+    def __next__(self) -> Tuple[int, Transformer]: ...
+    def next(self) -> Tuple[int, Transformer]: ...
+
+class Estimator(Generic[M], Params, metaclass=abc.ABCMeta):
+    @overload
+    def fit(self, dataset: DataFrame, params: Optional[ParamMap] = ...) -> M: ...
+    @overload
+    def fit(self, dataset: DataFrame, params: List[ParamMap]) -> List[M]: ...
+    def fitMultiple(
+        self, dataset: DataFrame, params: List[ParamMap]
+    ) -> Iterable[Tuple[int, M]]: ...
+
+class Transformer(Params, metaclass=abc.ABCMeta):
+    def transform(
+        self, dataset: DataFrame, params: Optional[ParamMap] = ...
+    ) -> DataFrame: ...
+
+class Model(Transformer, metaclass=abc.ABCMeta): ...
+
+class UnaryTransformer(HasInputCol, HasOutputCol, Transformer, metaclass=abc.ABCMeta):
+    def createTransformFunc(self) -> Callable: ...
+    def outputDataType(self) -> DataType: ...
+    def validateInputType(self, inputType: DataType) -> None: ...
+    def transformSchema(self, schema: StructType) -> StructType: ...
+    def setInputCol(self: M, value: str) -> M: ...
+    def setOutputCol(self: M, value: str) -> M: ...
+
+class _PredictorParams(HasLabelCol, HasFeaturesCol, HasPredictionCol): ...
+
+class Predictor(Estimator[M], _PredictorParams, metaclass=abc.ABCMeta):
+    def setLabelCol(self: P, value: str) -> P: ...
+    def setFeaturesCol(self: P, value: str) -> P: ...
+    def setPredictionCol(self: P, value: str) -> P: ...
+
+class PredictionModel(Generic[T], Model, _PredictorParams, metaclass=abc.ABCMeta):
+    def setFeaturesCol(self: M, value: str) -> M: ...
+    def setPredictionCol(self: M, value: str) -> M: ...
+    @property
+    @abc.abstractmethod
+    def numFeatures(self) -> int: ...
+    @abstractmethod
+    def predict(self, value: T) -> float: ...
diff --git a/python/pyspark/ml/classification.pyi b/python/pyspark/ml/classification.pyi
new file mode 100644
index 0000000000000..55afc20a54cb9
--- /dev/null
+++ b/python/pyspark/ml/classification.pyi
@@ -0,0 +1,922 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any, List, Optional
+from pyspark.ml._typing import JM, M, P, T, ParamMap
+
+import abc
+from abc import abstractmethod
+from pyspark.ml import Estimator, Model, PredictionModel, Predictor, Transformer
+from pyspark.ml.base import _PredictorParams
+from pyspark.ml.param.shared import (
+    HasAggregationDepth,
+    HasBlockSize,
+    HasElasticNetParam,
+    HasFitIntercept,
+    HasMaxIter,
+    HasParallelism,
+    HasProbabilityCol,
+    HasRawPredictionCol,
+    HasRegParam,
+    HasSeed,
+    HasSolver,
+    HasStandardization,
+    HasStepSize,
+    HasThreshold,
+    HasThresholds,
+    HasTol,
+    HasWeightCol,
+)
+from pyspark.ml.regression import _FactorizationMachinesParams
+from pyspark.ml.tree import (
+    _DecisionTreeModel,
+    _DecisionTreeParams,
+    _GBTParams,
+    _HasVarianceImpurity,
+    _RandomForestParams,
+    _TreeClassifierParams,
+    _TreeEnsembleModel,
+)
+from pyspark.ml.util import HasTrainingSummary, JavaMLReadable, JavaMLWritable
+from pyspark.ml.wrapper import JavaPredictionModel, JavaPredictor, JavaWrapper
+
+from pyspark.ml.linalg import Matrix, Vector
+from pyspark.ml.param import Param
+from pyspark.ml.regression import DecisionTreeRegressionModel
+from pyspark.sql.dataframe import DataFrame
+
+class _ClassifierParams(HasRawPredictionCol, _PredictorParams): ...
+
+class Classifier(Predictor, _ClassifierParams, metaclass=abc.ABCMeta):
+    def setRawPredictionCol(self: P, value: str) -> P: ...
+
+class ClassificationModel(PredictionModel, _ClassifierParams, metaclass=abc.ABCMeta):
+    def setRawPredictionCol(self: P, value: str) -> P: ...
+    @property
+    @abc.abstractmethod
+    def numClasses(self) -> int: ...
+    @abstractmethod
+    def predictRaw(self, value: Vector) -> Vector: ...
+
+class _ProbabilisticClassifierParams(
+    HasProbabilityCol, HasThresholds, _ClassifierParams
+): ...
+
+class ProbabilisticClassifier(
+    Classifier, _ProbabilisticClassifierParams, metaclass=abc.ABCMeta
+):
+    def setProbabilityCol(self: P, value: str) -> P: ...
+    def setThresholds(self: P, value: List[float]) -> P: ...
+
+class ProbabilisticClassificationModel(
+    ClassificationModel, _ProbabilisticClassifierParams, metaclass=abc.ABCMeta
+):
+    def setProbabilityCol(self: M, value: str) -> M: ...
+    def setThresholds(self: M, value: List[float]) -> M: ...
+    @abstractmethod
+    def predictProbability(self, value: Vector) -> Vector: ...
+
+class _JavaClassifier(Classifier, JavaPredictor[JM], metaclass=abc.ABCMeta):
+    def setRawPredictionCol(self: P, value: str) -> P: ...
+
+class _JavaClassificationModel(ClassificationModel, JavaPredictionModel[T]):
+    @property
+    def numClasses(self) -> int: ...
+    def predictRaw(self, value: Vector) -> Vector: ...
+
+class _JavaProbabilisticClassifier(
+    ProbabilisticClassifier, _JavaClassifier[JM], metaclass=abc.ABCMeta
+): ...
+
+class _JavaProbabilisticClassificationModel(
+    ProbabilisticClassificationModel, _JavaClassificationModel[T]
+):
+    def predictProbability(self, value: Any): ...
+
+class _ClassificationSummary(JavaWrapper):
+    @property
+    def predictions(self) -> DataFrame: ...
+    @property
+    def predictionCol(self) -> str: ...
+    @property
+    def labelCol(self) -> str: ...
+    @property
+    def weightCol(self) -> str: ...
+    @property
+    def labels(self) -> List[str]: ...
+    @property
+    def truePositiveRateByLabel(self) -> List[float]: ...
+    @property
+    def falsePositiveRateByLabel(self) -> List[float]: ...
+    @property
+    def precisionByLabel(self) -> List[float]: ...
+    @property
+    def recallByLabel(self) -> List[float]: ...
+    def fMeasureByLabel(self, beta: float = ...) -> List[float]: ...
+    @property
+    def accuracy(self) -> float: ...
+    @property
+    def weightedTruePositiveRate(self) -> float: ...
+    @property
+    def weightedFalsePositiveRate(self) -> float: ...
+    @property
+    def weightedRecall(self) -> float: ...
+    @property
+    def weightedPrecision(self) -> float: ...
+    def weightedFMeasure(self, beta: float = ...) -> float: ...
+
+class _TrainingSummary(JavaWrapper):
+    @property
+    def objectiveHistory(self) -> List[float]: ...
+    @property
+    def totalIterations(self) -> int: ...
+
+class _BinaryClassificationSummary(_ClassificationSummary):
+    @property
+    def scoreCol(self) -> str: ...
+    @property
+    def roc(self) -> DataFrame: ...
+    @property
+    def areaUnderROC(self) -> float: ...
+    @property
+    def pr(self) -> DataFrame: ...
+    @property
+    def fMeasureByThreshold(self) -> DataFrame: ...
+    @property
+    def precisionByThreshold(self) -> DataFrame: ...
+    @property
+    def recallByThreshold(self) -> DataFrame: ...
+
+class _LinearSVCParams(
+    _ClassifierParams,
+    HasRegParam,
+    HasMaxIter,
+    HasFitIntercept,
+    HasTol,
+    HasStandardization,
+    HasWeightCol,
+    HasAggregationDepth,
+    HasThreshold,
+    HasBlockSize,
+):
+    threshold: Param[float]
+    def __init__(self, *args: Any) -> None: ...
+
+class LinearSVC(
+    _JavaClassifier[LinearSVCModel],
+    _LinearSVCParams,
+    JavaMLWritable,
+    JavaMLReadable[LinearSVC],
+):
+    def __init__(
+        self,
+        *,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        maxIter: int = ...,
+        regParam: float = ...,
+        tol: float = ...,
+        rawPredictionCol: str = ...,
+        fitIntercept: bool = ...,
+        standardization: bool = ...,
+        threshold: float = ...,
+        weightCol: Optional[str] = ...,
+        aggregationDepth: int = ...,
+        blockSize: int = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        maxIter: int = ...,
+        regParam: float = ...,
+        tol: float = ...,
+        rawPredictionCol: str = ...,
+        fitIntercept: bool = ...,
+        standardization: bool = ...,
+        threshold: float = ...,
+        weightCol: Optional[str] = ...,
+        aggregationDepth: int = ...,
+        blockSize: int = ...
+    ) -> LinearSVC: ...
+    def setMaxIter(self, value: int) -> LinearSVC: ...
+    def setRegParam(self, value: float) -> LinearSVC: ...
+    def setTol(self, value: float) -> LinearSVC: ...
+    def setFitIntercept(self, value: bool) -> LinearSVC: ...
+    def setStandardization(self, value: bool) -> LinearSVC: ...
+    def setThreshold(self, value: float) -> LinearSVC: ...
+    def setWeightCol(self, value: str) -> LinearSVC: ...
+    def setAggregationDepth(self, value: int) -> LinearSVC: ...
+    def setBlockSize(self, value: int) -> LinearSVC: ...
+
+class LinearSVCModel(
+    _JavaClassificationModel[Vector],
+    _LinearSVCParams,
+    JavaMLWritable,
+    JavaMLReadable[LinearSVCModel],
+    HasTrainingSummary[LinearSVCTrainingSummary],
+):
+    def setThreshold(self, value: float) -> LinearSVCModel: ...
+    @property
+    def coefficients(self) -> Vector: ...
+    @property
+    def intercept(self) -> float: ...
+    def summary(self) -> LinearSVCTrainingSummary: ...
+    def evaluate(self, dataset: DataFrame) -> LinearSVCSummary: ...
+
+class LinearSVCSummary(_BinaryClassificationSummary): ...
+class LinearSVCTrainingSummary(LinearSVCSummary, _TrainingSummary): ...
+
+class _LogisticRegressionParams(
+    _ProbabilisticClassifierParams,
+    HasRegParam,
+    HasElasticNetParam,
+    HasMaxIter,
+    HasFitIntercept,
+    HasTol,
+    HasStandardization,
+    HasWeightCol,
+    HasAggregationDepth,
+    HasThreshold,
+    HasBlockSize,
+):
+    threshold: Param[float]
+    family: Param[str]
+    lowerBoundsOnCoefficients: Param[Matrix]
+    upperBoundsOnCoefficients: Param[Matrix]
+    lowerBoundsOnIntercepts: Param[Vector]
+    upperBoundsOnIntercepts: Param[Vector]
+    def __init__(self, *args: Any): ...
+    def setThreshold(self: P, value: float) -> P: ...
+    def getThreshold(self) -> float: ...
+    def setThresholds(self: P, value: List[float]) -> P: ...
+    def getThresholds(self) -> List[float]: ...
+    def getFamily(self) -> str: ...
+    def getLowerBoundsOnCoefficients(self) -> Matrix: ...
+    def getUpperBoundsOnCoefficients(self) -> Matrix: ...
+    def getLowerBoundsOnIntercepts(self) -> Vector: ...
+    def getUpperBoundsOnIntercepts(self) -> Vector: ...
+
+class LogisticRegression(
+    _JavaProbabilisticClassifier[LogisticRegressionModel],
+    _LogisticRegressionParams,
+    JavaMLWritable,
+    JavaMLReadable[LogisticRegression],
+):
+    def __init__(
+        self,
+        *,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        maxIter: int = ...,
+        regParam: float = ...,
+        elasticNetParam: float = ...,
+        tol: float = ...,
+        fitIntercept: bool = ...,
+        threshold: float = ...,
+        thresholds: Optional[List[float]] = ...,
+        probabilityCol: str = ...,
+        rawPredictionCol: str = ...,
+        standardization: bool = ...,
+        weightCol: Optional[str] = ...,
+        aggregationDepth: int = ...,
+        family: str = ...,
+        lowerBoundsOnCoefficients: Optional[Matrix] = ...,
+        upperBoundsOnCoefficients: Optional[Matrix] = ...,
+        lowerBoundsOnIntercepts: Optional[Vector] = ...,
+        upperBoundsOnIntercepts: Optional[Vector] = ...,
+        blockSize: int = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        maxIter: int = ...,
+        regParam: float = ...,
+        elasticNetParam: float = ...,
+        tol: float = ...,
+        fitIntercept: bool = ...,
+        threshold: float = ...,
+        thresholds: Optional[List[float]] = ...,
+        probabilityCol: str = ...,
+        rawPredictionCol: str = ...,
+        standardization: bool = ...,
+        weightCol: Optional[str] = ...,
+        aggregationDepth: int = ...,
+        family: str = ...,
+        lowerBoundsOnCoefficients: Optional[Matrix] = ...,
+        upperBoundsOnCoefficients: Optional[Matrix] = ...,
+        lowerBoundsOnIntercepts: Optional[Vector] = ...,
+        upperBoundsOnIntercepts: Optional[Vector] = ...,
+        blockSize: int = ...
+    ) -> LogisticRegression: ...
+    def setFamily(self, value: str) -> LogisticRegression: ...
+    def setLowerBoundsOnCoefficients(self, value: Matrix) -> LogisticRegression: ...
+    def setUpperBoundsOnCoefficients(self, value: Matrix) -> LogisticRegression: ...
+    def setLowerBoundsOnIntercepts(self, value: Vector) -> LogisticRegression: ...
+    def setUpperBoundsOnIntercepts(self, value: Vector) -> LogisticRegression: ...
+    def setMaxIter(self, value: int) -> LogisticRegression: ...
+    def setRegParam(self, value: float) -> LogisticRegression: ...
+    def setTol(self, value: float) -> LogisticRegression: ...
+    def setElasticNetParam(self, value: float) -> LogisticRegression: ...
+    def setFitIntercept(self, value: bool) -> LogisticRegression: ...
+    def setStandardization(self, value: bool) -> LogisticRegression: ...
+    def setWeightCol(self, value: str) -> LogisticRegression: ...
+    def setAggregationDepth(self, value: int) -> LogisticRegression: ...
+    def setBlockSize(self, value: int) -> LogisticRegression: ...
+
+class LogisticRegressionModel(
+    _JavaProbabilisticClassificationModel[Vector],
+    _LogisticRegressionParams,
+    JavaMLWritable,
+    JavaMLReadable[LogisticRegressionModel],
+    HasTrainingSummary[LogisticRegressionTrainingSummary],
+):
+    @property
+    def coefficients(self) -> Vector: ...
+    @property
+    def intercept(self) -> float: ...
+    @property
+    def coefficientMatrix(self) -> Matrix: ...
+    @property
+    def interceptVector(self) -> Vector: ...
+    @property
+    def summary(self) -> LogisticRegressionTrainingSummary: ...
+    def evaluate(self, dataset: DataFrame) -> LogisticRegressionSummary: ...
+
+class LogisticRegressionSummary(_ClassificationSummary):
+    @property
+    def probabilityCol(self) -> str: ...
+    @property
+    def featuresCol(self) -> str: ...
+
+class LogisticRegressionTrainingSummary(
+    LogisticRegressionSummary, _TrainingSummary
+): ...
+class BinaryLogisticRegressionSummary(
+    _BinaryClassificationSummary, LogisticRegressionSummary
+): ...
+class BinaryLogisticRegressionTrainingSummary(
+    BinaryLogisticRegressionSummary, LogisticRegressionTrainingSummary
+): ...
+
+class _DecisionTreeClassifierParams(_DecisionTreeParams, _TreeClassifierParams):
+    def __init__(self, *args: Any): ...
+
+class DecisionTreeClassifier(
+    _JavaProbabilisticClassifier[DecisionTreeClassificationModel],
+    _DecisionTreeClassifierParams,
+    JavaMLWritable,
+    JavaMLReadable[DecisionTreeClassifier],
+):
+    def __init__(
+        self,
+        *,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        probabilityCol: str = ...,
+        rawPredictionCol: str = ...,
+        maxDepth: int = ...,
+        maxBins: int = ...,
+        minInstancesPerNode: int = ...,
+        minInfoGain: float = ...,
+        maxMemoryInMB: int = ...,
+        cacheNodeIds: bool = ...,
+        checkpointInterval: int = ...,
+        impurity: str = ...,
+        seed: Optional[int] = ...,
+        weightCol: Optional[str] = ...,
+        leafCol: str = ...,
+        minWeightFractionPerNode: float = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        probabilityCol: str = ...,
+        rawPredictionCol: str = ...,
+        maxDepth: int = ...,
+        maxBins: int = ...,
+        minInstancesPerNode: int = ...,
+        minInfoGain: float = ...,
+        maxMemoryInMB: int = ...,
+        cacheNodeIds: bool = ...,
+        checkpointInterval: int = ...,
+        impurity: str = ...,
+        seed: Optional[int] = ...,
+        weightCol: Optional[str] = ...,
+        leafCol: str = ...,
+        minWeightFractionPerNode: float = ...
+    ) -> DecisionTreeClassifier: ...
+    def setMaxDepth(self, value: int) -> DecisionTreeClassifier: ...
+    def setMaxBins(self, value: int) -> DecisionTreeClassifier: ...
+    def setMinInstancesPerNode(self, value: int) -> DecisionTreeClassifier: ...
+    def setMinWeightFractionPerNode(self, value: float) -> DecisionTreeClassifier: ...
+    def setMinInfoGain(self, value: float) -> DecisionTreeClassifier: ...
+    def setMaxMemoryInMB(self, value: int) -> DecisionTreeClassifier: ...
+    def setCacheNodeIds(self, value: bool) -> DecisionTreeClassifier: ...
+    def setImpurity(self, value: str) -> DecisionTreeClassifier: ...
+    def setCheckpointInterval(self, value: int) -> DecisionTreeClassifier: ...
+    def setSeed(self, value: int) -> DecisionTreeClassifier: ...
+    def setWeightCol(self, value: str) -> DecisionTreeClassifier: ...
+
+class DecisionTreeClassificationModel(
+    _DecisionTreeModel,
+    _JavaProbabilisticClassificationModel[Vector],
+    _DecisionTreeClassifierParams,
+    JavaMLWritable,
+    JavaMLReadable[DecisionTreeClassificationModel],
+):
+    @property
+    def featureImportances(self) -> Vector: ...
+
+class _RandomForestClassifierParams(_RandomForestParams, _TreeClassifierParams):
+    def __init__(self, *args: Any): ...
+
+class RandomForestClassifier(
+    _JavaProbabilisticClassifier[RandomForestClassificationModel],
+    _RandomForestClassifierParams,
+    JavaMLWritable,
+    JavaMLReadable[RandomForestClassifier],
+):
+    def __init__(
+        self,
+        *,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        probabilityCol: str = ...,
+        rawPredictionCol: str = ...,
+        maxDepth: int = ...,
+        maxBins: int = ...,
+        minInstancesPerNode: int = ...,
+        minInfoGain: float = ...,
+        maxMemoryInMB: int = ...,
+        cacheNodeIds: bool = ...,
+        checkpointInterval: int = ...,
+        impurity: str = ...,
+        numTrees: int = ...,
+        featureSubsetStrategy: str = ...,
+        seed: Optional[int] = ...,
+        subsamplingRate: float = ...,
+        leafCol: str = ...,
+        minWeightFractionPerNode: float = ...,
+        weightCol: Optional[str] = ...,
+        bootstrap: Optional[bool] = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        probabilityCol: str = ...,
+        rawPredictionCol: str = ...,
+        maxDepth: int = ...,
+        maxBins: int = ...,
+        minInstancesPerNode: int = ...,
+        minInfoGain: float = ...,
+        maxMemoryInMB: int = ...,
+        cacheNodeIds: bool = ...,
+        checkpointInterval: int = ...,
+        seed: Optional[int] = ...,
+        impurity: str = ...,
+        numTrees: int = ...,
+        featureSubsetStrategy: str = ...,
+        subsamplingRate: float = ...,
+        leafCol: str = ...,
+        minWeightFractionPerNode: float = ...,
+        weightCol: Optional[str] = ...,
+        bootstrap: Optional[bool] = ...
+    ) -> RandomForestClassifier: ...
+    def setMaxDepth(self, value: int) -> RandomForestClassifier: ...
+    def setMaxBins(self, value: int) -> RandomForestClassifier: ...
+    def setMinInstancesPerNode(self, value: int) -> RandomForestClassifier: ...
+    def setMinInfoGain(self, value: float) -> RandomForestClassifier: ...
+    def setMaxMemoryInMB(self, value: int) -> RandomForestClassifier: ...
+    def setCacheNodeIds(self, value: bool) -> RandomForestClassifier: ...
+    def setImpurity(self, value: str) -> RandomForestClassifier: ...
+    def setNumTrees(self, value: int) -> RandomForestClassifier: ...
+    def setBootstrap(self, value: bool) -> RandomForestClassifier: ...
+    def setSubsamplingRate(self, value: float) -> RandomForestClassifier: ...
+    def setFeatureSubsetStrategy(self, value: str) -> RandomForestClassifier: ...
+    def setSeed(self, value: int) -> RandomForestClassifier: ...
+    def setCheckpointInterval(self, value: int) -> RandomForestClassifier: ...
+    def setWeightCol(self, value: str) -> RandomForestClassifier: ...
+    def setMinWeightFractionPerNode(self, value: float) -> RandomForestClassifier: ...
+
+class RandomForestClassificationModel(
+    _TreeEnsembleModel,
+    _JavaProbabilisticClassificationModel[Vector],
+    _RandomForestClassifierParams,
+    JavaMLWritable,
+    JavaMLReadable[RandomForestClassificationModel],
+    HasTrainingSummary[RandomForestClassificationTrainingSummary],
+):
+    @property
+    def featureImportances(self) -> Vector: ...
+    @property
+    def trees(self) -> List[DecisionTreeClassificationModel]: ...
+    def summary(self) -> RandomForestClassificationTrainingSummary: ...
+    def evaluate(self, dataset) -> RandomForestClassificationSummary: ...
+
+class RandomForestClassificationSummary(_ClassificationSummary): ...
+class RandomForestClassificationTrainingSummary(
+    RandomForestClassificationSummary, _TrainingSummary
+): ...
+class BinaryRandomForestClassificationSummary(_BinaryClassificationSummary): ...
+class BinaryRandomForestClassificationTrainingSummary(
+    BinaryRandomForestClassificationSummary, RandomForestClassificationTrainingSummary
+): ...
+
+class _GBTClassifierParams(_GBTParams, _HasVarianceImpurity):
+    supportedLossTypes: List[str]
+    lossType: Param[str]
+    def __init__(self, *args: Any): ...
+    def getLossType(self) -> str: ...
+
+class GBTClassifier(
+    _JavaProbabilisticClassifier[GBTClassificationModel],
+    _GBTClassifierParams,
+    JavaMLWritable,
+    JavaMLReadable[GBTClassifier],
+):
+    def __init__(
+        self,
+        *,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        maxDepth: int = ...,
+        maxBins: int = ...,
+        minInstancesPerNode: int = ...,
+        minInfoGain: float = ...,
+        maxMemoryInMB: int = ...,
+        cacheNodeIds: bool = ...,
+        checkpointInterval: int = ...,
+        lossType: str = ...,
+        maxIter: int = ...,
+        stepSize: float = ...,
+        seed: Optional[int] = ...,
+        subsamplingRate: float = ...,
+        featureSubsetStrategy: str = ...,
+        validationTol: float = ...,
+        validationIndicatorCol: Optional[str] = ...,
+        leafCol: str = ...,
+        minWeightFractionPerNode: float = ...,
+        weightCol: Optional[str] = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        maxDepth: int = ...,
+        maxBins: int = ...,
+        minInstancesPerNode: int = ...,
+        minInfoGain: float = ...,
+        maxMemoryInMB: int = ...,
+        cacheNodeIds: bool = ...,
+        checkpointInterval: int = ...,
+        lossType: str = ...,
+        maxIter: int = ...,
+        stepSize: float = ...,
+        seed: Optional[int] = ...,
+        subsamplingRate: float = ...,
+        featureSubsetStrategy: str = ...,
+        validationTol: float = ...,
+        validationIndicatorCol: Optional[str] = ...,
+        leafCol: str = ...,
+        minWeightFractionPerNode: float = ...,
+        weightCol: Optional[str] = ...
+    ) -> GBTClassifier: ...
+    def setMaxDepth(self, value: int) -> GBTClassifier: ...
+    def setMaxBins(self, value: int) -> GBTClassifier: ...
+    def setMinInstancesPerNode(self, value: int) -> GBTClassifier: ...
+    def setMinInfoGain(self, value: float) -> GBTClassifier: ...
+    def setMaxMemoryInMB(self, value: int) -> GBTClassifier: ...
+    def setCacheNodeIds(self, value: bool) -> GBTClassifier: ...
+    def setImpurity(self, value: str) -> GBTClassifier: ...
+    def setLossType(self, value: str) -> GBTClassifier: ...
+    def setSubsamplingRate(self, value: float) -> GBTClassifier: ...
+    def setFeatureSubsetStrategy(self, value: str) -> GBTClassifier: ...
+    def setValidationIndicatorCol(self, value: str) -> GBTClassifier: ...
+    def setMaxIter(self, value: int) -> GBTClassifier: ...
+    def setCheckpointInterval(self, value: int) -> GBTClassifier: ...
+    def setSeed(self, value: int) -> GBTClassifier: ...
+    def setStepSize(self, value: float) -> GBTClassifier: ...
+    def setWeightCol(self, value: str) -> GBTClassifier: ...
+    def setMinWeightFractionPerNode(self, value: float) -> GBTClassifier: ...
+
+class GBTClassificationModel(
+    _TreeEnsembleModel,
+    _JavaProbabilisticClassificationModel[Vector],
+    _GBTClassifierParams,
+    JavaMLWritable,
+    JavaMLReadable[GBTClassificationModel],
+):
+    @property
+    def featureImportances(self) -> Vector: ...
+    @property
+    def trees(self) -> List[DecisionTreeRegressionModel]: ...
+    def evaluateEachIteration(self, dataset: DataFrame) -> List[float]: ...
+
+class _NaiveBayesParams(_PredictorParams, HasWeightCol):
+    smoothing: Param[float]
+    modelType: Param[str]
+    def __init__(self, *args: Any): ...
+    def getSmoothing(self) -> float: ...
+    def getModelType(self) -> str: ...
+
+class NaiveBayes(
+    _JavaProbabilisticClassifier[NaiveBayesModel],
+    _NaiveBayesParams,
+    HasThresholds,
+    HasWeightCol,
+    JavaMLWritable,
+    JavaMLReadable[NaiveBayes],
+):
+    def __init__(
+        self,
+        *,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        probabilityCol: str = ...,
+        rawPredictionCol: str = ...,
+        smoothing: float = ...,
+        modelType: str = ...,
+        thresholds: Optional[List[float]] = ...,
+        weightCol: Optional[str] = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        probabilityCol: str = ...,
+        rawPredictionCol: str = ...,
+        smoothing: float = ...,
+        modelType: str = ...,
+        thresholds: Optional[List[float]] = ...,
+        weightCol: Optional[str] = ...
+    ) -> NaiveBayes: ...
+    def setSmoothing(self, value: float) -> NaiveBayes: ...
+    def setModelType(self, value: str) -> NaiveBayes: ...
+    def setWeightCol(self, value: str) -> NaiveBayes: ...
+
+class NaiveBayesModel(
+    _JavaProbabilisticClassificationModel[Vector],
+    _NaiveBayesParams,
+    JavaMLWritable,
+    JavaMLReadable[NaiveBayesModel],
+):
+    @property
+    def pi(self) -> Vector: ...
+    @property
+    def theta(self) -> Matrix: ...
+    @property
+    def sigma(self) -> Matrix: ...
+
+class _MultilayerPerceptronParams(
+    _ProbabilisticClassifierParams,
+    HasSeed,
+    HasMaxIter,
+    HasTol,
+    HasStepSize,
+    HasSolver,
+    HasBlockSize,
+):
+    layers: Param[List[int]]
+    solver: Param[str]
+    initialWeights: Param[Vector]
+    def __init__(self, *args: Any): ...
+    def getLayers(self) -> List[int]: ...
+    def getInitialWeights(self) -> Vector: ...
+
+class MultilayerPerceptronClassifier(
+    _JavaProbabilisticClassifier[MultilayerPerceptronClassificationModel],
+    _MultilayerPerceptronParams,
+    JavaMLWritable,
+    JavaMLReadable[MultilayerPerceptronClassifier],
+):
+    def __init__(
+        self,
+        *,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        maxIter: int = ...,
+        tol: float = ...,
+        seed: Optional[int] = ...,
+        layers: Optional[List[int]] = ...,
+        blockSize: int = ...,
+        stepSize: float = ...,
+        solver: str = ...,
+        initialWeights: Optional[Vector] = ...,
+        probabilityCol: str = ...,
+        rawPredictionCol: str = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        maxIter: int = ...,
+        tol: float = ...,
+        seed: Optional[int] = ...,
+        layers: Optional[List[int]] = ...,
+        blockSize: int = ...,
+        stepSize: float = ...,
+        solver: str = ...,
+        initialWeights: Optional[Vector] = ...,
+        probabilityCol: str = ...,
+        rawPredictionCol: str = ...
+    ) -> MultilayerPerceptronClassifier: ...
+    def setLayers(self, value: List[int]) -> MultilayerPerceptronClassifier: ...
+    def setBlockSize(self, value: int) -> MultilayerPerceptronClassifier: ...
+    def setInitialWeights(self, value: Vector) -> MultilayerPerceptronClassifier: ...
+    def setMaxIter(self, value: int) -> MultilayerPerceptronClassifier: ...
+    def setSeed(self, value: int) -> MultilayerPerceptronClassifier: ...
+    def setTol(self, value: float) -> MultilayerPerceptronClassifier: ...
+    def setStepSize(self, value: float) -> MultilayerPerceptronClassifier: ...
+    def setSolver(self, value: str) -> MultilayerPerceptronClassifier: ...
+
+class MultilayerPerceptronClassificationModel(
+    _JavaProbabilisticClassificationModel[Vector],
+    _MultilayerPerceptronParams,
+    JavaMLWritable,
+    JavaMLReadable[MultilayerPerceptronClassificationModel],
+    HasTrainingSummary[MultilayerPerceptronClassificationTrainingSummary],
+):
+    @property
+    def weights(self) -> Vector: ...
+    def summary(self) -> MultilayerPerceptronClassificationTrainingSummary: ...
+    def evaluate(
+        self, dataset: DataFrame
+    ) -> MultilayerPerceptronClassificationSummary: ...
+
+class MultilayerPerceptronClassificationSummary(_ClassificationSummary): ...
+class MultilayerPerceptronClassificationTrainingSummary(
+    MultilayerPerceptronClassificationSummary, _TrainingSummary
+): ...
+
+class _OneVsRestParams(_ClassifierParams, HasWeightCol):
+    classifier: Param[Estimator]
+    def getClassifier(self) -> Estimator[M]: ...
+
+class OneVsRest(
+    Estimator[OneVsRestModel],
+    _OneVsRestParams,
+    HasParallelism,
+    JavaMLReadable[OneVsRest],
+    JavaMLWritable,
+):
+    def __init__(
+        self,
+        *,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        rawPredictionCol: str = ...,
+        classifier: Optional[Estimator[M]] = ...,
+        weightCol: Optional[str] = ...,
+        parallelism: int = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        featuresCol: Optional[str] = ...,
+        labelCol: Optional[str] = ...,
+        predictionCol: Optional[str] = ...,
+        rawPredictionCol: str = ...,
+        classifier: Optional[Estimator[M]] = ...,
+        weightCol: Optional[str] = ...,
+        parallelism: int = ...
+    ) -> OneVsRest: ...
+    def setClassifier(self, value: Estimator[M]) -> OneVsRest: ...
+    def setLabelCol(self, value: str) -> OneVsRest: ...
+    def setFeaturesCol(self, value: str) -> OneVsRest: ...
+    def setPredictionCol(self, value: str) -> OneVsRest: ...
+    def setRawPredictionCol(self, value: str) -> OneVsRest: ...
+    def setWeightCol(self, value: str) -> OneVsRest: ...
+    def setParallelism(self, value: int) -> OneVsRest: ...
+    def copy(self, extra: Optional[ParamMap] = ...) -> OneVsRest: ...
+
+class OneVsRestModel(
+    Model, _OneVsRestParams, JavaMLReadable[OneVsRestModel], JavaMLWritable
+):
+    models: List[Transformer]
+    def __init__(self, models: List[Transformer]) -> None: ...
+    def setFeaturesCol(self, value: str) -> OneVsRestModel: ...
+    def setPredictionCol(self, value: str) -> OneVsRestModel: ...
+    def setRawPredictionCol(self, value: str) -> OneVsRestModel: ...
+    def copy(self, extra: Optional[ParamMap] = ...) -> OneVsRestModel: ...
+
+class FMClassifier(
+    _JavaProbabilisticClassifier[FMClassificationModel],
+    _FactorizationMachinesParams,
+    JavaMLWritable,
+    JavaMLReadable[FMClassifier],
+):
+    factorSize: Param[int]
+    fitLinear: Param[bool]
+    miniBatchFraction: Param[float]
+    initStd: Param[float]
+    solver: Param[str]
+    def __init__(
+        self,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        probabilityCol: str = ...,
+        rawPredictionCol: str = ...,
+        factorSize: int = ...,
+        fitIntercept: bool = ...,
+        fitLinear: bool = ...,
+        regParam: float = ...,
+        miniBatchFraction: float = ...,
+        initStd: float = ...,
+        maxIter: int = ...,
+        stepSize: float = ...,
+        tol: float = ...,
+        solver: str = ...,
+        thresholds: Optional[Any] = ...,
+        seed: Optional[Any] = ...,
+    ) -> None: ...
+    def setParams(
+        self,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        probabilityCol: str = ...,
+        rawPredictionCol: str = ...,
+        factorSize: int = ...,
+        fitIntercept: bool = ...,
+        fitLinear: bool = ...,
+        regParam: float = ...,
+        miniBatchFraction: float = ...,
+        initStd: float = ...,
+        maxIter: int = ...,
+        stepSize: float = ...,
+        tol: float = ...,
+        solver: str = ...,
+        thresholds: Optional[Any] = ...,
+        seed: Optional[Any] = ...,
+    ): ...
+    def setFactorSize(self, value: int) -> FMClassifier: ...
+    def setFitLinear(self, value: bool) -> FMClassifier: ...
+    def setMiniBatchFraction(self, value: float) -> FMClassifier: ...
+    def setInitStd(self, value: float) -> FMClassifier: ...
+    def setMaxIter(self, value: int) -> FMClassifier: ...
+    def setStepSize(self, value: float) -> FMClassifier: ...
+    def setTol(self, value: float) -> FMClassifier: ...
+    def setSolver(self, value: str) -> FMClassifier: ...
+    def setSeed(self, value: int) -> FMClassifier: ...
+    def setFitIntercept(self, value: bool) -> FMClassifier: ...
+    def setRegParam(self, value: float) -> FMClassifier: ...
+
+class FMClassificationModel(
+    _JavaProbabilisticClassificationModel[Vector],
+    _FactorizationMachinesParams,
+    JavaMLWritable,
+    JavaMLReadable[FMClassificationModel],
+):
+    @property
+    def intercept(self) -> float: ...
+    @property
+    def linear(self) -> Vector: ...
+    @property
+    def factors(self) -> Matrix: ...
+    def summary(self) -> FMClassificationTrainingSummary: ...
+    def evaluate(self, dataset: DataFrame) -> FMClassificationSummary: ...
+
+class FMClassificationSummary(_BinaryClassificationSummary): ...
+class FMClassificationTrainingSummary(FMClassificationSummary, _TrainingSummary): ...
diff --git a/python/pyspark/ml/clustering.pyi b/python/pyspark/ml/clustering.pyi
new file mode 100644
index 0000000000000..e2a2d7e888367
--- /dev/null
+++ b/python/pyspark/ml/clustering.pyi
@@ -0,0 +1,437 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any, List, Optional
+
+from pyspark.ml.linalg import Matrix, Vector
+from pyspark.ml.util import (
+    GeneralJavaMLWritable,
+    HasTrainingSummary,
+    JavaMLReadable,
+    JavaMLWritable,
+)
+from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams, JavaWrapper
+from pyspark.ml.param.shared import (
+    HasAggregationDepth,
+    HasCheckpointInterval,
+    HasDistanceMeasure,
+    HasFeaturesCol,
+    HasMaxIter,
+    HasPredictionCol,
+    HasProbabilityCol,
+    HasSeed,
+    HasTol,
+    HasWeightCol,
+)
+
+from pyspark.ml.param import Param
+from pyspark.ml.stat import MultivariateGaussian
+from pyspark.sql.dataframe import DataFrame
+
+from numpy import ndarray  # type: ignore[import]
+
+class ClusteringSummary(JavaWrapper):
+    @property
+    def predictionCol(self) -> str: ...
+    @property
+    def predictions(self) -> DataFrame: ...
+    @property
+    def featuresCol(self) -> str: ...
+    @property
+    def k(self) -> int: ...
+    @property
+    def cluster(self) -> DataFrame: ...
+    @property
+    def clusterSizes(self) -> List[int]: ...
+    @property
+    def numIter(self) -> int: ...
+
+class _GaussianMixtureParams(
+    HasMaxIter,
+    HasFeaturesCol,
+    HasSeed,
+    HasPredictionCol,
+    HasProbabilityCol,
+    HasTol,
+    HasAggregationDepth,
+    HasWeightCol,
+):
+    k: Param[int]
+    def __init__(self, *args: Any): ...
+    def getK(self) -> int: ...
+
+class GaussianMixtureModel(
+    JavaModel,
+    _GaussianMixtureParams,
+    JavaMLWritable,
+    JavaMLReadable[GaussianMixtureModel],
+    HasTrainingSummary[GaussianMixtureSummary],
+):
+    def setFeaturesCol(self, value: str) -> GaussianMixtureModel: ...
+    def setPredictionCol(self, value: str) -> GaussianMixtureModel: ...
+    def setProbabilityCol(self, value: str) -> GaussianMixtureModel: ...
+    @property
+    def weights(self) -> List[float]: ...
+    @property
+    def gaussians(self) -> List[MultivariateGaussian]: ...
+    @property
+    def gaussiansDF(self) -> DataFrame: ...
+    @property
+    def summary(self) -> GaussianMixtureSummary: ...
+    def predict(self, value: Vector) -> int: ...
+    def predictProbability(self, value: Vector) -> Vector: ...
+
+class GaussianMixture(
+    JavaEstimator[GaussianMixtureModel],
+    _GaussianMixtureParams,
+    JavaMLWritable,
+    JavaMLReadable[GaussianMixture],
+):
+    def __init__(
+        self,
+        *,
+        featuresCol: str = ...,
+        predictionCol: str = ...,
+        k: int = ...,
+        probabilityCol: str = ...,
+        tol: float = ...,
+        maxIter: int = ...,
+        seed: Optional[int] = ...,
+        aggregationDepth: int = ...,
+        weightCol: Optional[str] = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        featuresCol: str = ...,
+        predictionCol: str = ...,
+        k: int = ...,
+        probabilityCol: str = ...,
+        tol: float = ...,
+        maxIter: int = ...,
+        seed: Optional[int] = ...,
+        aggregationDepth: int = ...,
+        weightCol: Optional[str] = ...
+    ) -> GaussianMixture: ...
+    def setK(self, value: int) -> GaussianMixture: ...
+    def setMaxIter(self, value: int) -> GaussianMixture: ...
+    def setFeaturesCol(self, value: str) -> GaussianMixture: ...
+    def setPredictionCol(self, value: str) -> GaussianMixture: ...
+    def setProbabilityCol(self, value: str) -> GaussianMixture: ...
+    def setWeightCol(self, value: str) -> GaussianMixture: ...
+    def setSeed(self, value: int) -> GaussianMixture: ...
+    def setTol(self, value: float) -> GaussianMixture: ...
+    def setAggregationDepth(self, value: int) -> GaussianMixture: ...
+
+class GaussianMixtureSummary(ClusteringSummary):
+    @property
+    def probabilityCol(self) -> str: ...
+    @property
+    def probability(self) -> DataFrame: ...
+    @property
+    def logLikelihood(self) -> float: ...
+
+class KMeansSummary(ClusteringSummary):
+    def trainingCost(self) -> float: ...
+
+class _KMeansParams(
+    HasMaxIter,
+    HasFeaturesCol,
+    HasSeed,
+    HasPredictionCol,
+    HasTol,
+    HasDistanceMeasure,
+    HasWeightCol,
+):
+    k: Param[int]
+    initMode: Param[str]
+    initSteps: Param[int]
+    def __init__(self, *args: Any): ...
+    def getK(self) -> int: ...
+    def getInitMode(self) -> str: ...
+    def getInitSteps(self) -> int: ...
+
+class KMeansModel(
+    JavaModel,
+    _KMeansParams,
+    GeneralJavaMLWritable,
+    JavaMLReadable[KMeansModel],
+    HasTrainingSummary[KMeansSummary],
+):
+    def setFeaturesCol(self, value: str) -> KMeansModel: ...
+    def setPredictionCol(self, value: str) -> KMeansModel: ...
+    def clusterCenters(self) -> List[ndarray]: ...
+    @property
+    def summary(self) -> KMeansSummary: ...
+    def predict(self, value: Vector) -> int: ...
+
+class KMeans(
+    JavaEstimator[KMeansModel], _KMeansParams, JavaMLWritable, JavaMLReadable[KMeans]
+):
+    def __init__(
+        self,
+        *,
+        featuresCol: str = ...,
+        predictionCol: str = ...,
+        k: int = ...,
+        initMode: str = ...,
+        initSteps: int = ...,
+        tol: float = ...,
+        maxIter: int = ...,
+        seed: Optional[int] = ...,
+        distanceMeasure: str = ...,
+        weightCol: Optional[str] = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        featuresCol: str = ...,
+        predictionCol: str = ...,
+        k: int = ...,
+        initMode: str = ...,
+        initSteps: int = ...,
+        tol: float = ...,
+        maxIter: int = ...,
+        seed: Optional[int] = ...,
+        distanceMeasure: str = ...,
+        weightCol: Optional[str] = ...
+    ) -> KMeans: ...
+    def setK(self, value: int) -> KMeans: ...
+    def setInitMode(self, value: str) -> KMeans: ...
+    def setInitSteps(self, value: int) -> KMeans: ...
+    def setDistanceMeasure(self, value: str) -> KMeans: ...
+    def setMaxIter(self, value: int) -> KMeans: ...
+    def setFeaturesCol(self, value: str) -> KMeans: ...
+    def setPredictionCol(self, value: str) -> KMeans: ...
+    def setSeed(self, value: int) -> KMeans: ...
+    def setTol(self, value: float) -> KMeans: ...
+    def setWeightCol(self, value: str) -> KMeans: ...
+
+class _BisectingKMeansParams(
+    HasMaxIter,
+    HasFeaturesCol,
+    HasSeed,
+    HasPredictionCol,
+    HasDistanceMeasure,
+    HasWeightCol,
+):
+    k: Param[int]
+    minDivisibleClusterSize: Param[float]
+    def __init__(self, *args: Any): ...
+    def getK(self) -> int: ...
+    def getMinDivisibleClusterSize(self) -> float: ...
+
+class BisectingKMeansModel(
+    JavaModel,
+    _BisectingKMeansParams,
+    JavaMLWritable,
+    JavaMLReadable[BisectingKMeansModel],
+    HasTrainingSummary[BisectingKMeansSummary],
+):
+    def setFeaturesCol(self, value: str) -> BisectingKMeansModel: ...
+    def setPredictionCol(self, value: str) -> BisectingKMeansModel: ...
+    def clusterCenters(self) -> List[ndarray]: ...
+    def computeCost(self, dataset: DataFrame) -> float: ...
+    @property
+    def summary(self) -> BisectingKMeansSummary: ...
+    def predict(self, value: Vector) -> int: ...
+
+class BisectingKMeans(
+    JavaEstimator[BisectingKMeansModel],
+    _BisectingKMeansParams,
+    JavaMLWritable,
+    JavaMLReadable[BisectingKMeans],
+):
+    def __init__(
+        self,
+        *,
+        featuresCol: str = ...,
+        predictionCol: str = ...,
+        maxIter: int = ...,
+        seed: Optional[int] = ...,
+        k: int = ...,
+        minDivisibleClusterSize: float = ...,
+        distanceMeasure: str = ...,
+        weightCol: Optional[str] = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        featuresCol: str = ...,
+        predictionCol: str = ...,
+        maxIter: int = ...,
+        seed: Optional[int] = ...,
+        k: int = ...,
+        minDivisibleClusterSize: float = ...,
+        distanceMeasure: str = ...,
+        weightCol: Optional[str] = ...
+    ) -> BisectingKMeans: ...
+    def setK(self, value: int) -> BisectingKMeans: ...
+    def setMinDivisibleClusterSize(self, value: float) -> BisectingKMeans: ...
+    def setDistanceMeasure(self, value: str) -> BisectingKMeans: ...
+    def setMaxIter(self, value: int) -> BisectingKMeans: ...
+    def setFeaturesCol(self, value: str) -> BisectingKMeans: ...
+    def setPredictionCol(self, value: str) -> BisectingKMeans: ...
+    def setSeed(self, value: int) -> BisectingKMeans: ...
+    def setWeightCol(self, value: str) -> BisectingKMeans: ...
+
+class BisectingKMeansSummary(ClusteringSummary):
+    @property
+    def trainingCost(self) -> float: ...
+
+class _LDAParams(HasMaxIter, HasFeaturesCol, HasSeed, HasCheckpointInterval):
+    k: Param[int]
+    optimizer: Param[str]
+    learningOffset: Param[float]
+    learningDecay: Param[float]
+    subsamplingRate: Param[float]
+    optimizeDocConcentration: Param[bool]
+    docConcentration: Param[List[float]]
+    topicConcentration: Param[float]
+    topicDistributionCol: Param[str]
+    keepLastCheckpoint: Param[bool]
+    def __init__(self, *args: Any): ...
+    def setK(self, value: int) -> LDA: ...
+    def getOptimizer(self) -> str: ...
+    def getLearningOffset(self) -> float: ...
+    def getLearningDecay(self) -> float: ...
+    def getSubsamplingRate(self) -> float: ...
+    def getOptimizeDocConcentration(self) -> bool: ...
+    def getDocConcentration(self) -> List[float]: ...
+    def getTopicConcentration(self) -> float: ...
+    def getTopicDistributionCol(self) -> str: ...
+    def getKeepLastCheckpoint(self) -> bool: ...
+
+class LDAModel(JavaModel, _LDAParams):
+    def setFeaturesCol(self, value: str) -> LDAModel: ...
+    def setSeed(self, value: int) -> LDAModel: ...
+    def setTopicDistributionCol(self, value: str) -> LDAModel: ...
+    def isDistributed(self) -> bool: ...
+    def vocabSize(self) -> int: ...
+    def topicsMatrix(self) -> Matrix: ...
+    def logLikelihood(self, dataset: DataFrame) -> float: ...
+    def logPerplexity(self, dataset: DataFrame) -> float: ...
+    def describeTopics(self, maxTermsPerTopic: int = ...) -> DataFrame: ...
+    def estimatedDocConcentration(self) -> Vector: ...
+
+class DistributedLDAModel(
+    LDAModel, JavaMLReadable[DistributedLDAModel], JavaMLWritable
+):
+    def toLocal(self) -> LDAModel: ...
+    def trainingLogLikelihood(self) -> float: ...
+    def logPrior(self) -> float: ...
+    def getCheckpointFiles(self) -> List[str]: ...
+
+class LocalLDAModel(LDAModel, JavaMLReadable[LocalLDAModel], JavaMLWritable): ...
+
+class LDA(JavaEstimator[LDAModel], _LDAParams, JavaMLReadable[LDA], JavaMLWritable):
+    def __init__(
+        self,
+        *,
+        featuresCol: str = ...,
+        maxIter: int = ...,
+        seed: Optional[int] = ...,
+        checkpointInterval: int = ...,
+        k: int = ...,
+        optimizer: str = ...,
+        learningOffset: float = ...,
+        learningDecay: float = ...,
+        subsamplingRate: float = ...,
+        optimizeDocConcentration: bool = ...,
+        docConcentration: Optional[List[float]] = ...,
+        topicConcentration: Optional[float] = ...,
+        topicDistributionCol: str = ...,
+        keepLastCheckpoint: bool = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        featuresCol: str = ...,
+        maxIter: int = ...,
+        seed: Optional[int] = ...,
+        checkpointInterval: int = ...,
+        k: int = ...,
+        optimizer: str = ...,
+        learningOffset: float = ...,
+        learningDecay: float = ...,
+        subsamplingRate: float = ...,
+        optimizeDocConcentration: bool = ...,
+        docConcentration: Optional[List[float]] = ...,
+        topicConcentration: Optional[float] = ...,
+        topicDistributionCol: str = ...,
+        keepLastCheckpoint: bool = ...
+    ) -> LDA: ...
+    def setCheckpointInterval(self, value: int) -> LDA: ...
+    def setSeed(self, value: int) -> LDA: ...
+    def setK(self, value: int) -> LDA: ...
+    def setOptimizer(self, value: str) -> LDA: ...
+    def setLearningOffset(self, value: float) -> LDA: ...
+    def setLearningDecay(self, value: float) -> LDA: ...
+    def setSubsamplingRate(self, value: float) -> LDA: ...
+    def setOptimizeDocConcentration(self, value: bool) -> LDA: ...
+    def setDocConcentration(self, value: List[float]) -> LDA: ...
+    def setTopicConcentration(self, value: float) -> LDA: ...
+    def setTopicDistributionCol(self, value: str) -> LDA: ...
+    def setKeepLastCheckpoint(self, value: bool) -> LDA: ...
+    def setMaxIter(self, value: int) -> LDA: ...
+    def setFeaturesCol(self, value: str) -> LDA: ...
+
+class _PowerIterationClusteringParams(HasMaxIter, HasWeightCol):
+    k: Param[int]
+    initMode: Param[str]
+    srcCol: Param[str]
+    dstCol: Param[str]
+    def __init__(self, *args: Any): ...
+    def getK(self) -> int: ...
+    def getInitMode(self) -> str: ...
+    def getSrcCol(self) -> str: ...
+    def getDstCol(self) -> str: ...
+
+class PowerIterationClustering(
+    _PowerIterationClusteringParams,
+    JavaParams,
+    JavaMLReadable[PowerIterationClustering],
+    JavaMLWritable,
+):
+    def __init__(
+        self,
+        *,
+        k: int = ...,
+        maxIter: int = ...,
+        initMode: str = ...,
+        srcCol: str = ...,
+        dstCol: str = ...,
+        weightCol: Optional[str] = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        k: int = ...,
+        maxIter: int = ...,
+        initMode: str = ...,
+        srcCol: str = ...,
+        dstCol: str = ...,
+        weightCol: Optional[str] = ...
+    ) -> PowerIterationClustering: ...
+    def setK(self, value: int) -> PowerIterationClustering: ...
+    def setInitMode(self, value: str) -> PowerIterationClustering: ...
+    def setSrcCol(self, value: str) -> str: ...
+    def setDstCol(self, value: str) -> PowerIterationClustering: ...
+    def setMaxIter(self, value: int) -> PowerIterationClustering: ...
+    def setWeightCol(self, value: str) -> PowerIterationClustering: ...
+    def assignClusters(self, dataset: DataFrame) -> DataFrame: ...
diff --git a/python/pyspark/ml/common.pyi b/python/pyspark/ml/common.pyi
new file mode 100644
index 0000000000000..7bf0ed6183d8a
--- /dev/null
+++ b/python/pyspark/ml/common.pyi
@@ -0,0 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+def callJavaFunc(sc, func, *args): ...
+def inherit_doc(cls): ...
diff --git a/python/pyspark/ml/evaluation.pyi b/python/pyspark/ml/evaluation.pyi
new file mode 100644
index 0000000000000..ea0a9f045cd6a
--- /dev/null
+++ b/python/pyspark/ml/evaluation.pyi
@@ -0,0 +1,281 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import abc
+from typing import Optional
+from pyspark.ml._typing import (
+    ParamMap,
+    BinaryClassificationEvaluatorMetricType,
+    ClusteringEvaluatorMetricType,
+    MulticlassClassificationEvaluatorMetricType,
+    MultilabelClassificationEvaluatorMetricType,
+    RankingEvaluatorMetricType,
+    RegressionEvaluatorMetricType,
+)
+
+from pyspark.ml.wrapper import JavaParams
+from pyspark.ml.param import Param, Params
+from pyspark.ml.param.shared import (
+    HasFeaturesCol,
+    HasLabelCol,
+    HasPredictionCol,
+    HasProbabilityCol,
+    HasRawPredictionCol,
+    HasWeightCol,
+)
+from pyspark.ml.util import JavaMLReadable, JavaMLWritable
+
+class Evaluator(Params, metaclass=abc.ABCMeta):
+    def evaluate(self, dataset, params: Optional[ParamMap] = ...) -> float: ...
+    def isLargerBetter(self) -> bool: ...
+
+class JavaEvaluator(JavaParams, Evaluator, metaclass=abc.ABCMeta):
+    def isLargerBetter(self) -> bool: ...
+
+class BinaryClassificationEvaluator(
+    JavaEvaluator,
+    HasLabelCol,
+    HasRawPredictionCol,
+    HasWeightCol,
+    JavaMLReadable[BinaryClassificationEvaluator],
+    JavaMLWritable,
+):
+    metricName: Param[BinaryClassificationEvaluatorMetricType]
+    numBins: Param[int]
+    def __init__(
+        self,
+        *,
+        rawPredictionCol: str = ...,
+        labelCol: str = ...,
+        metricName: BinaryClassificationEvaluatorMetricType = ...,
+        weightCol: Optional[str] = ...,
+        numBins: int = ...
+    ) -> None: ...
+    def setMetricName(
+        self, value: BinaryClassificationEvaluatorMetricType
+    ) -> BinaryClassificationEvaluator: ...
+    def getMetricName(self) -> BinaryClassificationEvaluatorMetricType: ...
+    def setNumBins(self, value: int) -> BinaryClassificationEvaluator: ...
+    def getNumBins(self) -> int: ...
+    def setLabelCol(self, value: str) -> BinaryClassificationEvaluator: ...
+    def setRawPredictionCol(self, value: str) -> BinaryClassificationEvaluator: ...
+    def setWeightCol(self, value: str) -> BinaryClassificationEvaluator: ...
+
+def setParams(
+    self,
+    *,
+    rawPredictionCol: str = ...,
+    labelCol: str = ...,
+    metricName: BinaryClassificationEvaluatorMetricType = ...,
+    weightCol: Optional[str] = ...,
+    numBins: int = ...
+) -> BinaryClassificationEvaluator: ...
+
+class RegressionEvaluator(
+    JavaEvaluator,
+    HasLabelCol,
+    HasPredictionCol,
+    HasWeightCol,
+    JavaMLReadable[RegressionEvaluator],
+    JavaMLWritable,
+):
+    metricName: Param[RegressionEvaluatorMetricType]
+    throughOrigin: Param[bool]
+    def __init__(
+        self,
+        *,
+        predictionCol: str = ...,
+        labelCol: str = ...,
+        metricName: RegressionEvaluatorMetricType = ...,
+        weightCol: Optional[str] = ...,
+        throughOrigin: bool = ...
+    ) -> None: ...
+    def setMetricName(
+        self, value: RegressionEvaluatorMetricType
+    ) -> RegressionEvaluator: ...
+    def getMetricName(self) -> RegressionEvaluatorMetricType: ...
+    def setThroughOrigin(self, value: bool) -> RegressionEvaluator: ...
+    def getThroughOrigin(self) -> bool: ...
+    def setLabelCol(self, value: str) -> RegressionEvaluator: ...
+    def setPredictionCol(self, value: str) -> RegressionEvaluator: ...
+    def setWeightCol(self, value: str) -> RegressionEvaluator: ...
+    def setParams(
+        self,
+        *,
+        predictionCol: str = ...,
+        labelCol: str = ...,
+        metricName: RegressionEvaluatorMetricType = ...,
+        weightCol: Optional[str] = ...,
+        throughOrigin: bool = ...
+    ) -> RegressionEvaluator: ...
+
+class MulticlassClassificationEvaluator(
+    JavaEvaluator,
+    HasLabelCol,
+    HasPredictionCol,
+    HasWeightCol,
+    HasProbabilityCol,
+    JavaMLReadable[MulticlassClassificationEvaluator],
+    JavaMLWritable,
+):
+    metricName: Param[MulticlassClassificationEvaluatorMetricType]
+    metricLabel: Param[float]
+    beta: Param[float]
+    eps: Param[float]
+    def __init__(
+        self,
+        *,
+        predictionCol: str = ...,
+        labelCol: str = ...,
+        metricName: MulticlassClassificationEvaluatorMetricType = ...,
+        weightCol: Optional[str] = ...,
+        metricLabel: float = ...,
+        beta: float = ...,
+        probabilityCol: str = ...,
+        eps: float = ...
+    ) -> None: ...
+    def setMetricName(
+        self, value: MulticlassClassificationEvaluatorMetricType
+    ) -> MulticlassClassificationEvaluator: ...
+    def getMetricName(self) -> MulticlassClassificationEvaluatorMetricType: ...
+    def setMetricLabel(self, value: float) -> MulticlassClassificationEvaluator: ...
+    def getMetricLabel(self) -> float: ...
+    def setBeta(self, value: float) -> MulticlassClassificationEvaluator: ...
+    def getBeta(self) -> float: ...
+    def setEps(self, value: float) -> MulticlassClassificationEvaluator: ...
+    def getEps(self) -> float: ...
+    def setLabelCol(self, value: str) -> MulticlassClassificationEvaluator: ...
+    def setPredictionCol(self, value: str) -> MulticlassClassificationEvaluator: ...
+    def setProbabilityCol(self, value: str) -> MulticlassClassificationEvaluator: ...
+    def setWeightCol(self, value: str) -> MulticlassClassificationEvaluator: ...
+    def setParams(
+        self,
+        *,
+        predictionCol: str = ...,
+        labelCol: str = ...,
+        metricName: MulticlassClassificationEvaluatorMetricType = ...,
+        weightCol: Optional[str] = ...,
+        metricLabel: float = ...,
+        beta: float = ...,
+        probabilityCol: str = ...,
+        eps: float = ...
+    ) -> MulticlassClassificationEvaluator: ...
+
+class MultilabelClassificationEvaluator(
+    JavaEvaluator,
+    HasLabelCol,
+    HasPredictionCol,
+    JavaMLReadable[MultilabelClassificationEvaluator],
+    JavaMLWritable,
+):
+    metricName: Param[MultilabelClassificationEvaluatorMetricType]
+    metricLabel: Param[float]
+    def __init__(
+        self,
+        *,
+        predictionCol: str = ...,
+        labelCol: str = ...,
+        metricName: MultilabelClassificationEvaluatorMetricType = ...,
+        metricLabel: float = ...
+    ) -> None: ...
+    def setMetricName(
+        self, value: MultilabelClassificationEvaluatorMetricType
+    ) -> MultilabelClassificationEvaluator: ...
+    def getMetricName(self) -> MultilabelClassificationEvaluatorMetricType: ...
+    def setMetricLabel(self, value: float) -> MultilabelClassificationEvaluator: ...
+    def getMetricLabel(self) -> float: ...
+    def setLabelCol(self, value: str) -> MultilabelClassificationEvaluator: ...
+    def setPredictionCol(self, value: str) -> MultilabelClassificationEvaluator: ...
+    def setParams(
+        self,
+        *,
+        predictionCol: str = ...,
+        labelCol: str = ...,
+        metricName: MultilabelClassificationEvaluatorMetricType = ...,
+        metricLabel: float = ...
+    ) -> MultilabelClassificationEvaluator: ...
+
+class ClusteringEvaluator(
+    JavaEvaluator,
+    HasPredictionCol,
+    HasFeaturesCol,
+    HasWeightCol,
+    JavaMLReadable[ClusteringEvaluator],
+    JavaMLWritable,
+):
+    metricName: Param[ClusteringEvaluatorMetricType]
+    distanceMeasure: Param[str]
+    def __init__(
+        self,
+        *,
+        predictionCol: str = ...,
+        featuresCol: str = ...,
+        metricName: ClusteringEvaluatorMetricType = ...,
+        distanceMeasure: str = ...,
+        weightCol: Optional[str] = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        predictionCol: str = ...,
+        featuresCol: str = ...,
+        metricName: ClusteringEvaluatorMetricType = ...,
+        distanceMeasure: str = ...,
+        weightCol: Optional[str] = ...
+    ) -> ClusteringEvaluator: ...
+    def setMetricName(
+        self, value: ClusteringEvaluatorMetricType
+    ) -> ClusteringEvaluator: ...
+    def getMetricName(self) -> ClusteringEvaluatorMetricType: ...
+    def setDistanceMeasure(self, value: str) -> ClusteringEvaluator: ...
+    def getDistanceMeasure(self) -> str: ...
+    def setFeaturesCol(self, value: str) -> ClusteringEvaluator: ...
+    def setPredictionCol(self, value: str) -> ClusteringEvaluator: ...
+    def setWeightCol(self, value: str) -> ClusteringEvaluator: ...
+
+class RankingEvaluator(
+    JavaEvaluator,
+    HasLabelCol,
+    HasPredictionCol,
+    JavaMLReadable[RankingEvaluator],
+    JavaMLWritable,
+):
+    metricName: Param[RankingEvaluatorMetricType]
+    k: Param[int]
+    def __init__(
+        self,
+        *,
+        predictionCol: str = ...,
+        labelCol: str = ...,
+        metricName: RankingEvaluatorMetricType = ...,
+        k: int = ...
+    ) -> None: ...
+    def setMetricName(self, value: RankingEvaluatorMetricType) -> RankingEvaluator: ...
+    def getMetricName(self) -> RankingEvaluatorMetricType: ...
+    def setK(self, value: int) -> RankingEvaluator: ...
+    def getK(self) -> int: ...
+    def setLabelCol(self, value: str) -> RankingEvaluator: ...
+    def setPredictionCol(self, value: str) -> RankingEvaluator: ...
+    def setParams(
+        self,
+        *,
+        predictionCol: str = ...,
+        labelCol: str = ...,
+        metricName: RankingEvaluatorMetricType = ...,
+        k: int = ...
+    ) -> RankingEvaluator: ...
diff --git a/python/pyspark/ml/feature.pyi b/python/pyspark/ml/feature.pyi
new file mode 100644
index 0000000000000..f5b12a5b2ffc6
--- /dev/null
+++ b/python/pyspark/ml/feature.pyi
@@ -0,0 +1,1629 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import overload
+from typing import Any, Dict, Generic, List, Optional, Tuple
+from pyspark.ml._typing import JM, P
+
+from pyspark.ml.param.shared import (
+    HasFeaturesCol,
+    HasHandleInvalid,
+    HasInputCol,
+    HasInputCols,
+    HasLabelCol,
+    HasMaxIter,
+    HasNumFeatures,
+    HasOutputCol,
+    HasOutputCols,
+    HasRelativeError,
+    HasSeed,
+    HasStepSize,
+    HasThreshold,
+    HasThresholds,
+)
+from pyspark.ml.util import JavaMLReadable, JavaMLWritable
+from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams, JavaTransformer
+from pyspark.ml.linalg import Vector, DenseVector, DenseMatrix
+from pyspark.sql.dataframe import DataFrame
+from pyspark.ml.param import Param
+
+class Binarizer(
+    JavaTransformer,
+    HasThreshold,
+    HasThresholds,
+    HasInputCol,
+    HasOutputCol,
+    HasInputCols,
+    HasOutputCols,
+    JavaMLReadable[Binarizer],
+    JavaMLWritable,
+):
+    threshold: Param[float]
+    thresholds: Param[List[float]]
+    @overload
+    def __init__(
+        self,
+        *,
+        threshold: float = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...
+    ) -> None: ...
+    @overload
+    def __init__(
+        self,
+        *,
+        thresholds: Optional[List[float]] = ...,
+        inputCols: Optional[List[str]] = ...,
+        outputCols: Optional[List[str]] = ...
+    ) -> None: ...
+    @overload
+    def setParams(
+        self,
+        *,
+        threshold: float = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...
+    ) -> Binarizer: ...
+    @overload
+    def setParams(
+        self,
+        *,
+        thresholds: Optional[List[float]] = ...,
+        inputCols: Optional[List[str]] = ...,
+        outputCols: Optional[List[str]] = ...
+    ) -> Binarizer: ...
+    def setThreshold(self, value: float) -> Binarizer: ...
+    def setThresholds(self, value: List[float]) -> Binarizer: ...
+    def setInputCol(self, value: str) -> Binarizer: ...
+    def setInputCols(self, value: List[str]) -> Binarizer: ...
+    def setOutputCol(self, value: str) -> Binarizer: ...
+    def setOutputCols(self, value: List[str]) -> Binarizer: ...
+
+class _LSHParams(HasInputCol, HasOutputCol):
+    numHashTables: Param[int]
+    def __init__(self, *args: Any): ...
+    def getNumHashTables(self) -> int: ...
+
+class _LSH(Generic[JM], JavaEstimator[JM], _LSHParams, JavaMLReadable, JavaMLWritable):
+    def setNumHashTables(self: P, value) -> P: ...
+    def setInputCol(self: P, value) -> P: ...
+    def setOutputCol(self: P, value) -> P: ...
+
+class _LSHModel(JavaModel, _LSHParams):
+    def setInputCol(self: P, value: str) -> P: ...
+    def setOutputCol(self: P, value: str) -> P: ...
+    def approxNearestNeighbors(
+        self,
+        dataset: DataFrame,
+        key: Vector,
+        numNearestNeighbors: int,
+        distCol: str = ...,
+    ) -> DataFrame: ...
+    def approxSimilarityJoin(
+        self,
+        datasetA: DataFrame,
+        datasetB: DataFrame,
+        threshold: float,
+        distCol: str = ...,
+    ) -> DataFrame: ...
+
+class _BucketedRandomProjectionLSHParams:
+    bucketLength: Param[float]
+    def getBucketLength(self) -> float: ...
+
+class BucketedRandomProjectionLSH(
+    _LSH[BucketedRandomProjectionLSHModel],
+    _LSHParams,
+    HasSeed,
+    JavaMLReadable[BucketedRandomProjectionLSH],
+    JavaMLWritable,
+):
+    def __init__(
+        self,
+        *,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...,
+        seed: Optional[int] = ...,
+        numHashTables: int = ...,
+        bucketLength: Optional[float] = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...,
+        seed: Optional[int] = ...,
+        numHashTables: int = ...,
+        bucketLength: Optional[float] = ...
+    ) -> BucketedRandomProjectionLSH: ...
+    def setBucketLength(self, value: float) -> BucketedRandomProjectionLSH: ...
+    def setSeed(self, value: int) -> BucketedRandomProjectionLSH: ...
+
+class BucketedRandomProjectionLSHModel(
+    _LSHModel,
+    _BucketedRandomProjectionLSHParams,
+    JavaMLReadable[BucketedRandomProjectionLSHModel],
+    JavaMLWritable,
+): ...
+
+class Bucketizer(
+    JavaTransformer,
+    HasInputCol,
+    HasOutputCol,
+    HasInputCols,
+    HasOutputCols,
+    HasHandleInvalid,
+    JavaMLReadable[Bucketizer],
+    JavaMLWritable,
+):
+    splits: Param[List[float]]
+    handleInvalid: Param[str]
+    splitsArray: Param[List[List[float]]]
+    @overload
+    def __init__(
+        self,
+        *,
+        splits: Optional[List[float]] = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...,
+        handleInvalid: str = ...
+    ) -> None: ...
+    @overload
+    def __init__(
+        self,
+        *,
+        handleInvalid: str = ...,
+        splitsArray: Optional[List[List[float]]] = ...,
+        inputCols: Optional[List[str]] = ...,
+        outputCols: Optional[List[str]] = ...
+    ) -> None: ...
+    @overload
+    def setParams(
+        self,
+        *,
+        splits: Optional[List[float]] = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...,
+        handleInvalid: str = ...
+    ) -> Bucketizer: ...
+    @overload
+    def setParams(
+        self,
+        *,
+        handleInvalid: str = ...,
+        splitsArray: Optional[List[List[float]]] = ...,
+        inputCols: Optional[List[str]] = ...,
+        outputCols: Optional[List[str]] = ...
+    ) -> Bucketizer: ...
+    def setSplits(self, value: List[float]) -> Bucketizer: ...
+    def getSplits(self) -> List[float]: ...
+    def setSplitsArray(self, value: List[List[float]]) -> Bucketizer: ...
+    def getSplitsArray(self) -> List[List[float]]: ...
+    def setInputCol(self, value: str) -> Bucketizer: ...
+    def setInputCols(self, value: List[str]) -> Bucketizer: ...
+    def setOutputCol(self, value: str) -> Bucketizer: ...
+    def setOutputCols(self, value: List[str]) -> Bucketizer: ...
+    def setHandleInvalid(self, value: str) -> Bucketizer: ...
+
+class _CountVectorizerParams(JavaParams, HasInputCol, HasOutputCol):
+    minTF: Param[float]
+    minDF: Param[float]
+    maxDF: Param[float]
+    vocabSize: Param[int]
+    binary: Param[bool]
+    def __init__(self, *args: Any) -> None: ...
+    def getMinTF(self) -> float: ...
+    def getMinDF(self) -> float: ...
+    def getMaxDF(self) -> float: ...
+    def getVocabSize(self) -> int: ...
+    def getBinary(self) -> bool: ...
+
+class CountVectorizer(
+    JavaEstimator[CountVectorizerModel],
+    _CountVectorizerParams,
+    JavaMLReadable[CountVectorizer],
+    JavaMLWritable,
+):
+    def __init__(
+        self,
+        *,
+        minTF: float = ...,
+        minDF: float = ...,
+        maxDF: float = ...,
+        vocabSize: int = ...,
+        binary: bool = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        minTF: float = ...,
+        minDF: float = ...,
+        maxDF: float = ...,
+        vocabSize: int = ...,
+        binary: bool = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...
+    ) -> CountVectorizer: ...
+    def setMinTF(self, value: float) -> CountVectorizer: ...
+    def setMinDF(self, value: float) -> CountVectorizer: ...
+    def setMaxDF(self, value: float) -> CountVectorizer: ...
+    def setVocabSize(self, value: int) -> CountVectorizer: ...
+    def setBinary(self, value: bool) -> CountVectorizer: ...
+    def setInputCol(self, value: str) -> CountVectorizer: ...
+    def setOutputCol(self, value: str) -> CountVectorizer: ...
+
+class CountVectorizerModel(
+    JavaModel, JavaMLReadable[CountVectorizerModel], JavaMLWritable
+):
+    def setInputCol(self, value: str) -> CountVectorizerModel: ...
+    def setOutputCol(self, value: str) -> CountVectorizerModel: ...
+    def setMinTF(self, value: float) -> CountVectorizerModel: ...
+    def setBinary(self, value: bool) -> CountVectorizerModel: ...
+    @classmethod
+    def from_vocabulary(
+        cls,
+        vocabulary: List[str],
+        inputCol: str,
+        outputCol: Optional[str] = ...,
+        minTF: Optional[float] = ...,
+        binary: Optional[bool] = ...,
+    ) -> CountVectorizerModel: ...
+    @property
+    def vocabulary(self) -> List[str]: ...
+
+class DCT(
+    JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable[DCT], JavaMLWritable
+):
+    inverse: Param[bool]
+    def __init__(
+        self,
+        *,
+        inverse: bool = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        inverse: bool = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...
+    ) -> DCT: ...
+    def setInverse(self, value: bool) -> DCT: ...
+    def getInverse(self) -> bool: ...
+    def setInputCol(self, value: str) -> DCT: ...
+    def setOutputCol(self, value: str) -> DCT: ...
+
+class ElementwiseProduct(
+    JavaTransformer,
+    HasInputCol,
+    HasOutputCol,
+    JavaMLReadable[ElementwiseProduct],
+    JavaMLWritable,
+):
+    scalingVec: Param[Vector]
+    def __init__(
+        self,
+        *,
+        scalingVec: Optional[Vector] = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        scalingVec: Optional[Vector] = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...
+    ) -> ElementwiseProduct: ...
+    def setScalingVec(self, value: Vector) -> ElementwiseProduct: ...
+    def getScalingVec(self) -> Vector: ...
+    def setInputCol(self, value: str) -> ElementwiseProduct: ...
+    def setOutputCol(self, value: str) -> ElementwiseProduct: ...
+
+class FeatureHasher(
+    JavaTransformer,
+    HasInputCols,
+    HasOutputCol,
+    HasNumFeatures,
+    JavaMLReadable[FeatureHasher],
+    JavaMLWritable,
+):
+    categoricalCols: Param[List[str]]
+    def __init__(
+        self,
+        *,
+        numFeatures: int = ...,
+        inputCols: Optional[List[str]] = ...,
+        outputCol: Optional[str] = ...,
+        categoricalCols: Optional[List[str]] = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        numFeatures: int = ...,
+        inputCols: Optional[List[str]] = ...,
+        outputCol: Optional[str] = ...,
+        categoricalCols: Optional[List[str]] = ...
+    ) -> FeatureHasher: ...
+    def setCategoricalCols(self, value: List[str]) -> FeatureHasher: ...
+    def getCategoricalCols(self) -> List[str]: ...
+    def setInputCols(self, value: List[str]) -> FeatureHasher: ...
+    def setOutputCol(self, value: str) -> FeatureHasher: ...
+    def setNumFeatures(self, value: int) -> FeatureHasher: ...
+
+class HashingTF(
+    JavaTransformer,
+    HasInputCol,
+    HasOutputCol,
+    HasNumFeatures,
+    JavaMLReadable[HashingTF],
+    JavaMLWritable,
+):
+    binary: Param[bool]
+    def __init__(
+        self,
+        *,
+        numFeatures: int = ...,
+        binary: bool = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        numFeatures: int = ...,
+        binary: bool = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...
+    ) -> HashingTF: ...
+    def setBinary(self, value: bool) -> HashingTF: ...
+    def getBinary(self) -> bool: ...
+    def setInputCol(self, value: str) -> HashingTF: ...
+    def setOutputCol(self, value: str) -> HashingTF: ...
+    def setNumFeatures(self, value: int) -> HashingTF: ...
+    def indexOf(self, term: Any) -> int: ...
+
+class _IDFParams(HasInputCol, HasOutputCol):
+    minDocFreq: Param[int]
+    def __init__(self, *args: Any): ...
+    def getMinDocFreq(self) -> int: ...
+
+class IDF(JavaEstimator[IDFModel], _IDFParams, JavaMLReadable[IDF], JavaMLWritable):
+    def __init__(
+        self,
+        *,
+        minDocFreq: int = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        minDocFreq: int = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...
+    ) -> IDF: ...
+    def setMinDocFreq(self, value: int) -> IDF: ...
+    def setInputCol(self, value: str) -> IDF: ...
+    def setOutputCol(self, value: str) -> IDF: ...
+
+class IDFModel(JavaModel, _IDFParams, JavaMLReadable[IDFModel], JavaMLWritable):
+    def setInputCol(self, value: str) -> IDFModel: ...
+    def setOutputCol(self, value: str) -> IDFModel: ...
+    @property
+    def idf(self) -> Vector: ...
+    @property
+    def docFreq(self) -> List[int]: ...
+    @property
+    def numDocs(self) -> int: ...
+
+class _ImputerParams(
+    HasInputCol, HasInputCols, HasOutputCol, HasOutputCols, HasRelativeError
+):
+    strategy: Param[str]
+    missingValue: Param[float]
+    def getStrategy(self) -> str: ...
+    def getMissingValue(self) -> float: ...
+
+class Imputer(
+    JavaEstimator[ImputerModel], _ImputerParams, JavaMLReadable[Imputer], JavaMLWritable
+):
+    @overload
+    def __init__(
+        self,
+        *,
+        strategy: str = ...,
+        missingValue: float = ...,
+        inputCols: Optional[List[str]] = ...,
+        outputCols: Optional[List[str]] = ...,
+        relativeError: float = ...
+    ) -> None: ...
+    @overload
+    def __init__(
+        self,
+        *,
+        strategy: str = ...,
+        missingValue: float = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...,
+        relativeError: float = ...
+    ) -> None: ...
+    @overload
+    def setParams(
+        self,
+        *,
+        strategy: str = ...,
+        missingValue: float = ...,
+        inputCols: Optional[List[str]] = ...,
+        outputCols: Optional[List[str]] = ...,
+        relativeError: float = ...
+    ) -> Imputer: ...
+    @overload
+    def setParams(
+        self,
+        *,
+        strategy: str = ...,
+        missingValue: float = ...,
+        inputCol: Optional[str] = ...,
+        outputCols: Optional[str] = ...,
+        relativeError: float = ...
+    ) -> Imputer: ...
+    def setStrategy(self, value: str) -> Imputer: ...
+    def setMissingValue(self, value: float) -> Imputer: ...
+    def setInputCols(self, value: List[str]) -> Imputer: ...
+    def setOutputCols(self, value: List[str]) -> Imputer: ...
+    def setInputCol(self, value: str) -> Imputer: ...
+    def setOutputCol(self, value: str) -> Imputer: ...
+    def setRelativeError(self, value: float) -> Imputer: ...
+
+class ImputerModel(
+    JavaModel, _ImputerParams, JavaMLReadable[ImputerModel], JavaMLWritable
+):
+    def setInputCols(self, value: List[str]) -> ImputerModel: ...
+    def setOutputCols(self, value: List[str]) -> ImputerModel: ...
+    def setInputCol(self, value: str) -> ImputerModel: ...
+    def setOutputCol(self, value: str) -> ImputerModel: ...
+    @property
+    def surrogateDF(self) -> DataFrame: ...
+
+class Interaction(
+    JavaTransformer,
+    HasInputCols,
+    HasOutputCol,
+    JavaMLReadable[Interaction],
+    JavaMLWritable,
+):
+    def __init__(
+        self, *, inputCols: Optional[List[str]] = ..., outputCol: Optional[str] = ...
+    ) -> None: ...
+    def setParams(
+        self, *, inputCols: Optional[List[str]] = ..., outputCol: Optional[str] = ...
+    ) -> Interaction: ...
+    def setInputCols(self, value: List[str]) -> Interaction: ...
+    def setOutputCol(self, value: str) -> Interaction: ...
+
+class _MaxAbsScalerParams(HasInputCol, HasOutputCol): ...
+
+class MaxAbsScaler(
+    JavaEstimator[MaxAbsScalerModel],
+    _MaxAbsScalerParams,
+    JavaMLReadable[MaxAbsScaler],
+    JavaMLWritable,
+):
+    def __init__(
+        self, *, inputCol: Optional[str] = ..., outputCol: Optional[str] = ...
+    ) -> None: ...
+    def setParams(
+        self, *, inputCol: Optional[str] = ..., outputCol: Optional[str] = ...
+    ) -> MaxAbsScaler: ...
+    def setInputCol(self, value: str) -> MaxAbsScaler: ...
+    def setOutputCol(self, value: str) -> MaxAbsScaler: ...
+
+class MaxAbsScalerModel(
+    JavaModel, _MaxAbsScalerParams, JavaMLReadable[MaxAbsScalerModel], JavaMLWritable
+):
+    def setInputCol(self, value: str) -> MaxAbsScalerModel: ...
+    def setOutputCol(self, value: str) -> MaxAbsScalerModel: ...
+    @property
+    def maxAbs(self) -> Vector: ...
+
+class MinHashLSH(
+    _LSH[MinHashLSHModel],
+    HasInputCol,
+    HasOutputCol,
+    HasSeed,
+    JavaMLReadable[MinHashLSH],
+    JavaMLWritable,
+):
+    def __init__(
+        self,
+        *,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...,
+        seed: Optional[int] = ...,
+        numHashTables: int = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...,
+        seed: Optional[int] = ...,
+        numHashTables: int = ...
+    ) -> MinHashLSH: ...
+    def setSeed(self, value: int) -> MinHashLSH: ...
+
+class MinHashLSHModel(_LSHModel, JavaMLReadable[MinHashLSHModel], JavaMLWritable): ...
+
+class _MinMaxScalerParams(HasInputCol, HasOutputCol):
+    min: Param[float]
+    max: Param[float]
+    def __init__(self, *args: Any): ...
+    def getMin(self) -> float: ...
+    def getMax(self) -> float: ...
+
+class MinMaxScaler(
+    JavaEstimator[MinMaxScalerModel],
+    _MinMaxScalerParams,
+    JavaMLReadable[MinMaxScaler],
+    JavaMLWritable,
+):
+    def __init__(
+        self,
+        *,
+        min: float = ...,
+        max: float = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        min: float = ...,
+        max: float = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...
+    ) -> MinMaxScaler: ...
+    def setMin(self, value: float) -> MinMaxScaler: ...
+    def setMax(self, value: float) -> MinMaxScaler: ...
+    def setInputCol(self, value: str) -> MinMaxScaler: ...
+    def setOutputCol(self, value: str) -> MinMaxScaler: ...
+
+class MinMaxScalerModel(
+    JavaModel, _MinMaxScalerParams, JavaMLReadable[MinMaxScalerModel], JavaMLWritable
+):
+    def setInputCol(self, value: str) -> MinMaxScalerModel: ...
+    def setOutputCol(self, value: str) -> MinMaxScalerModel: ...
+    def setMin(self, value: float) -> MinMaxScalerModel: ...
+    def setMax(self, value: float) -> MinMaxScalerModel: ...
+    @property
+    def originalMin(self) -> Vector: ...
+    @property
+    def originalMax(self) -> Vector: ...
+
+class NGram(
+    JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable[NGram], JavaMLWritable
+):
+    n: Param[int]
+    def __init__(
+        self,
+        *,
+        n: int = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        n: int = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...
+    ) -> NGram: ...
+    def setN(self, value: int) -> NGram: ...
+    def getN(self) -> int: ...
+    def setInputCol(self, value: str) -> NGram: ...
+    def setOutputCol(self, value: str) -> NGram: ...
+
+class Normalizer(
+    JavaTransformer,
+    HasInputCol,
+    HasOutputCol,
+    JavaMLReadable[Normalizer],
+    JavaMLWritable,
+):
+    p: Param[float]
+    def __init__(
+        self,
+        *,
+        p: float = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        p: float = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...
+    ) -> Normalizer: ...
+    def setP(self, value: float) -> Normalizer: ...
+    def getP(self) -> float: ...
+    def setInputCol(self, value: str) -> Normalizer: ...
+    def setOutputCol(self, value: str) -> Normalizer: ...
+
+class _OneHotEncoderParams(HasInputCols, HasOutputCols, HasHandleInvalid):
+    handleInvalid: Param[str]
+    dropLast: Param[bool]
+    def __init__(self, *args: Any): ...
+    def getDropLast(self) -> bool: ...
+
+class OneHotEncoder(
+    JavaEstimator[OneHotEncoderModel],
+    _OneHotEncoderParams,
+    JavaMLReadable[OneHotEncoder],
+    JavaMLWritable,
+):
+    @overload
+    def __init__(
+        self,
+        *,
+        inputCols: Optional[List[str]] = ...,
+        outputCols: Optional[List[str]] = ...,
+        handleInvalid: str = ...,
+        dropLast: bool = ...
+    ) -> None: ...
+    @overload
+    def __init__(
+        self,
+        *,
+        handleInvalid: str = ...,
+        dropLast: bool = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...
+    ) -> None: ...
+    @overload
+    def setParams(
+        self,
+        *,
+        inputCols: Optional[List[str]] = ...,
+        outputCols: Optional[List[str]] = ...,
+        handleInvalid: str = ...,
+        dropLast: bool = ...
+    ) -> OneHotEncoder: ...
+    @overload
+    def setParams(
+        self,
+        *,
+        handleInvalid: str = ...,
+        dropLast: bool = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...
+    ) -> OneHotEncoder: ...
+    def setDropLast(self, value: bool) -> OneHotEncoder: ...
+    def setInputCols(self, value: List[str]) -> OneHotEncoder: ...
+    def setOutputCols(self, value: List[str]) -> OneHotEncoder: ...
+    def setHandleInvalid(self, value: str) -> OneHotEncoder: ...
+    def setInputCol(self, value: str) -> OneHotEncoder: ...
+    def setOutputCol(self, value: str) -> OneHotEncoder: ...
+
+class OneHotEncoderModel(
+    JavaModel, _OneHotEncoderParams, JavaMLReadable[OneHotEncoderModel], JavaMLWritable
+):
+    def setDropLast(self, value: bool) -> OneHotEncoderModel: ...
+    def setInputCols(self, value: List[str]) -> OneHotEncoderModel: ...
+    def setOutputCols(self, value: List[str]) -> OneHotEncoderModel: ...
+    def setInputCol(self, value: str) -> OneHotEncoderModel: ...
+    def setOutputCol(self, value: str) -> OneHotEncoderModel: ...
+    def setHandleInvalid(self, value: str) -> OneHotEncoderModel: ...
+    @property
+    def categorySizes(self) -> List[int]: ...
+
+class PolynomialExpansion(
+    JavaTransformer,
+    HasInputCol,
+    HasOutputCol,
+    JavaMLReadable[PolynomialExpansion],
+    JavaMLWritable,
+):
+    degree: Param[int]
+    def __init__(
+        self,
+        *,
+        degree: int = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        degree: int = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...
+    ) -> PolynomialExpansion: ...
+    def setDegree(self, value: int) -> PolynomialExpansion: ...
+    def getDegree(self) -> int: ...
+    def setInputCol(self, value: str) -> PolynomialExpansion: ...
+    def setOutputCol(self, value: str) -> PolynomialExpansion: ...
+
+class QuantileDiscretizer(
+    JavaEstimator[Bucketizer],
+    HasInputCol,
+    HasOutputCol,
+    HasInputCols,
+    HasOutputCols,
+    HasHandleInvalid,
+    HasRelativeError,
+    JavaMLReadable[QuantileDiscretizer],
+    JavaMLWritable,
+):
+    numBuckets: Param[int]
+    handleInvalid: Param[str]
+    numBucketsArray: Param[List[int]]
+    @overload
+    def __init__(
+        self,
+        *,
+        numBuckets: int = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...,
+        relativeError: float = ...,
+        handleInvalid: str = ...
+    ) -> None: ...
+    @overload
+    def __init__(
+        self,
+        *,
+        relativeError: float = ...,
+        handleInvalid: str = ...,
+        numBucketsArray: Optional[List[int]] = ...,
+        inputCols: Optional[List[str]] = ...,
+        outputCols: Optional[List[str]] = ...
+    ) -> None: ...
+    @overload
+    def setParams(
+        self,
+        *,
+        numBuckets: int = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...,
+        relativeError: float = ...,
+        handleInvalid: str = ...
+    ) -> QuantileDiscretizer: ...
+    @overload
+    def setParams(
+        self,
+        *,
+        relativeError: float = ...,
+        handleInvalid: str = ...,
+        numBucketsArray: Optional[List[int]] = ...,
+        inputCols: Optional[List[str]] = ...,
+        outputCols: Optional[List[str]] = ...
+    ) -> QuantileDiscretizer: ...
+    def setNumBuckets(self, value: int) -> QuantileDiscretizer: ...
+    def getNumBuckets(self) -> int: ...
+    def setNumBucketsArray(self, value: List[int]) -> QuantileDiscretizer: ...
+    def getNumBucketsArray(self) -> List[int]: ...
+    def setRelativeError(self, value: float) -> QuantileDiscretizer: ...
+    def setInputCol(self, value: str) -> QuantileDiscretizer: ...
+    def setInputCols(self, value: List[str]) -> QuantileDiscretizer: ...
+    def setOutputCol(self, value: str) -> QuantileDiscretizer: ...
+    def setOutputCols(self, value: List[str]) -> QuantileDiscretizer: ...
+    def setHandleInvalid(self, value: str) -> QuantileDiscretizer: ...
+
+class _RobustScalerParams(HasInputCol, HasOutputCol, HasRelativeError):
+    lower: Param[float]
+    upper: Param[float]
+    withCentering: Param[bool]
+    withScaling: Param[bool]
+    def __init__(self, *args: Any): ...
+    def getLower(self) -> float: ...
+    def getUpper(self) -> float: ...
+    def getWithCentering(self) -> bool: ...
+    def getWithScaling(self) -> bool: ...
+
+class RobustScaler(
+    JavaEstimator, _RobustScalerParams, JavaMLReadable[RobustScaler], JavaMLWritable
+):
+    def __init__(
+        self,
+        *,
+        lower: float = ...,
+        upper: float = ...,
+        withCentering: bool = ...,
+        withScaling: bool = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...,
+        relativeError: float = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        lower: float = ...,
+        upper: float = ...,
+        withCentering: bool = ...,
+        withScaling: bool = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...,
+        relativeError: float = ...
+    ) -> RobustScaler: ...
+    def setLower(self, value: float) -> RobustScaler: ...
+    def setUpper(self, value: float) -> RobustScaler: ...
+    def setWithCentering(self, value: bool) -> RobustScaler: ...
+    def setWithScaling(self, value: bool) -> RobustScaler: ...
+    def setInputCol(self, value: str) -> RobustScaler: ...
+    def setOutputCol(self, value: str) -> RobustScaler: ...
+    def setRelativeError(self, value: float) -> RobustScaler: ...
+
+class RobustScalerModel(
+    JavaModel, _RobustScalerParams, JavaMLReadable[RobustScalerModel], JavaMLWritable
+):
+    def setInputCol(self, value: str) -> RobustScalerModel: ...
+    def setOutputCol(self, value: str) -> RobustScalerModel: ...
+    @property
+    def median(self) -> Vector: ...
+    @property
+    def range(self) -> Vector: ...
+
+class RegexTokenizer(
+    JavaTransformer,
+    HasInputCol,
+    HasOutputCol,
+    JavaMLReadable[RegexTokenizer],
+    JavaMLWritable,
+):
+    minTokenLength: Param[int]
+    gaps: Param[bool]
+    pattern: Param[str]
+    toLowercase: Param[bool]
+    def __init__(
+        self,
+        *,
+        minTokenLength: int = ...,
+        gaps: bool = ...,
+        pattern: str = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...,
+        toLowercase: bool = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        minTokenLength: int = ...,
+        gaps: bool = ...,
+        pattern: str = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...,
+        toLowercase: bool = ...
+    ) -> RegexTokenizer: ...
+    def setMinTokenLength(self, value: int) -> RegexTokenizer: ...
+    def getMinTokenLength(self) -> int: ...
+    def setGaps(self, value: bool) -> RegexTokenizer: ...
+    def getGaps(self) -> bool: ...
+    def setPattern(self, value: str) -> RegexTokenizer: ...
+    def getPattern(self) -> str: ...
+    def setToLowercase(self, value: bool) -> RegexTokenizer: ...
+    def getToLowercase(self) -> bool: ...
+    def setInputCol(self, value: str) -> RegexTokenizer: ...
+    def setOutputCol(self, value: str) -> RegexTokenizer: ...
+
+class SQLTransformer(JavaTransformer, JavaMLReadable[SQLTransformer], JavaMLWritable):
+    statement: Param[str]
+    def __init__(self, *, statement: Optional[str] = ...) -> None: ...
+    def setParams(self, *, statement: Optional[str] = ...) -> SQLTransformer: ...
+    def setStatement(self, value: str) -> SQLTransformer: ...
+    def getStatement(self) -> str: ...
+
+class _StandardScalerParams(HasInputCol, HasOutputCol):
+    withMean: Param[bool]
+    withStd: Param[bool]
+    def __init__(self, *args: Any): ...
+    def getWithMean(self) -> bool: ...
+    def getWithStd(self) -> bool: ...
+
+class StandardScaler(
+    JavaEstimator[StandardScalerModel],
+    _StandardScalerParams,
+    JavaMLReadable[StandardScaler],
+    JavaMLWritable,
+):
+    def __init__(
+        self,
+        *,
+        withMean: bool = ...,
+        withStd: bool = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        withMean: bool = ...,
+        withStd: bool = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...
+    ) -> StandardScaler: ...
+    def setWithMean(self, value: bool) -> StandardScaler: ...
+    def setWithStd(self, value: bool) -> StandardScaler: ...
+    def setInputCol(self, value: str) -> StandardScaler: ...
+    def setOutputCol(self, value: str) -> StandardScaler: ...
+
+class StandardScalerModel(
+    JavaModel,
+    _StandardScalerParams,
+    JavaMLReadable[StandardScalerModel],
+    JavaMLWritable,
+):
+    def setInputCol(self, value: str) -> StandardScalerModel: ...
+    def setOutputCol(self, value: str) -> StandardScalerModel: ...
+    @property
+    def std(self) -> Vector: ...
+    @property
+    def mean(self) -> Vector: ...
+
+class _StringIndexerParams(
+    JavaParams, HasHandleInvalid, HasInputCol, HasOutputCol, HasInputCols, HasOutputCols
+):
+    stringOrderType: Param[str]
+    handleInvalid: Param[str]
+    def __init__(self, *args: Any) -> None: ...
+    def getStringOrderType(self) -> str: ...
+
+class StringIndexer(
+    JavaEstimator[StringIndexerModel],
+    _StringIndexerParams,
+    JavaMLReadable[StringIndexer],
+    JavaMLWritable,
+):
+    @overload
+    def __init__(
+        self,
+        *,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...,
+        handleInvalid: str = ...,
+        stringOrderType: str = ...
+    ) -> None: ...
+    @overload
+    def __init__(
+        self,
+        *,
+        inputCols: Optional[List[str]] = ...,
+        outputCols: Optional[List[str]] = ...,
+        handleInvalid: str = ...,
+        stringOrderType: str = ...
+    ) -> None: ...
+    @overload
+    def setParams(
+        self,
+        *,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...,
+        handleInvalid: str = ...,
+        stringOrderType: str = ...
+    ) -> StringIndexer: ...
+    @overload
+    def setParams(
+        self,
+        *,
+        inputCols: Optional[List[str]] = ...,
+        outputCols: Optional[List[str]] = ...,
+        handleInvalid: str = ...,
+        stringOrderType: str = ...
+    ) -> StringIndexer: ...
+    def setStringOrderType(self, value: str) -> StringIndexer: ...
+    def setInputCol(self, value: str) -> StringIndexer: ...
+    def setInputCols(self, value: List[str]) -> StringIndexer: ...
+    def setOutputCol(self, value: str) -> StringIndexer: ...
+    def setOutputCols(self, value: List[str]) -> StringIndexer: ...
+    def setHandleInvalid(self, value: str) -> StringIndexer: ...
+
+class StringIndexerModel(
+    JavaModel, _StringIndexerParams, JavaMLReadable[StringIndexerModel], JavaMLWritable
+):
+    def setInputCol(self, value: str) -> StringIndexerModel: ...
+    def setInputCols(self, value: List[str]) -> StringIndexerModel: ...
+    def setOutputCol(self, value: str) -> StringIndexerModel: ...
+    def setOutputCols(self, value: List[str]) -> StringIndexerModel: ...
+    def setHandleInvalid(self, value: str) -> StringIndexerModel: ...
+    @classmethod
+    def from_labels(
+        cls,
+        labels: List[str],
+        inputCol: str,
+        outputCol: Optional[str] = ...,
+        handleInvalid: Optional[str] = ...,
+    ) -> StringIndexerModel: ...
+    @classmethod
+    def from_arrays_of_labels(
+        cls,
+        arrayOfLabels: List[List[str]],
+        inputCols: List[str],
+        outputCols: Optional[List[str]] = ...,
+        handleInvalid: Optional[str] = ...,
+    ) -> StringIndexerModel: ...
+    @property
+    def labels(self) -> List[str]: ...
+
+class IndexToString(
+    JavaTransformer,
+    HasInputCol,
+    HasOutputCol,
+    JavaMLReadable[IndexToString],
+    JavaMLWritable,
+):
+    labels: Param[List[str]]
+    def __init__(
+        self,
+        *,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...,
+        labels: Optional[List[str]] = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...,
+        labels: Optional[List[str]] = ...
+    ) -> IndexToString: ...
+    def setLabels(self, value: List[str]) -> IndexToString: ...
+    def getLabels(self) -> List[str]: ...
+    def setInputCol(self, value: str) -> IndexToString: ...
+    def setOutputCol(self, value: str) -> IndexToString: ...
+
+class StopWordsRemover(
+    JavaTransformer,
+    HasInputCol,
+    HasOutputCol,
+    HasInputCols,
+    HasOutputCols,
+    JavaMLReadable[StopWordsRemover],
+    JavaMLWritable,
+):
+    stopWords: Param[List[str]]
+    caseSensitive: Param[bool]
+    locale: Param[str]
+    @overload
+    def __init__(
+        self,
+        *,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...,
+        stopWords: Optional[List[str]] = ...,
+        caseSensitive: bool = ...,
+        locale: Optional[str] = ...
+    ) -> None: ...
+    @overload
+    def __init__(
+        self,
+        *,
+        stopWords: Optional[List[str]] = ...,
+        caseSensitive: bool = ...,
+        locale: Optional[str] = ...,
+        inputCols: Optional[List[str]] = ...,
+        outputCols: Optional[List[str]] = ...
+    ) -> None: ...
+    @overload
+    def setParams(
+        self,
+        *,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...,
+        stopWords: Optional[List[str]] = ...,
+        caseSensitive: bool = ...,
+        locale: Optional[str] = ...
+    ) -> StopWordsRemover: ...
+    @overload
+    def setParams(
+        self,
+        *,
+        stopWords: Optional[List[str]] = ...,
+        caseSensitive: bool = ...,
+        locale: Optional[str] = ...,
+        inputCols: Optional[List[str]] = ...,
+        outputCols: Optional[List[str]] = ...
+    ) -> StopWordsRemover: ...
+    def setStopWords(self, value: List[str]) -> StopWordsRemover: ...
+    def getStopWords(self) -> List[str]: ...
+    def setCaseSensitive(self, value: bool) -> StopWordsRemover: ...
+    def getCaseSensitive(self) -> bool: ...
+    def setLocale(self, value: str) -> StopWordsRemover: ...
+    def getLocale(self) -> str: ...
+    def setInputCol(self, value: str) -> StopWordsRemover: ...
+    def setOutputCol(self, value: str) -> StopWordsRemover: ...
+    def setInputCols(self, value: List[str]) -> StopWordsRemover: ...
+    def setOutputCols(self, value: List[str]) -> StopWordsRemover: ...
+    @staticmethod
+    def loadDefaultStopWords(language: str) -> List[str]: ...
+
+class Tokenizer(
+    JavaTransformer,
+    HasInputCol,
+    HasOutputCol,
+    JavaMLReadable[Tokenizer],
+    JavaMLWritable,
+):
+    def __init__(
+        self, *, inputCol: Optional[str] = ..., outputCol: Optional[str] = ...
+    ) -> None: ...
+    def setParams(
+        self, *, inputCol: Optional[str] = ..., outputCol: Optional[str] = ...
+    ) -> Tokenizer: ...
+    def setInputCol(self, value: str) -> Tokenizer: ...
+    def setOutputCol(self, value: str) -> Tokenizer: ...
+
+class VectorAssembler(
+    JavaTransformer,
+    HasInputCols,
+    HasOutputCol,
+    HasHandleInvalid,
+    JavaMLReadable[VectorAssembler],
+    JavaMLWritable,
+):
+    handleInvalid: Param[str]
+    def __init__(
+        self,
+        *,
+        inputCols: Optional[List[str]] = ...,
+        outputCol: Optional[str] = ...,
+        handleInvalid: str = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        inputCols: Optional[List[str]] = ...,
+        outputCol: Optional[str] = ...,
+        handleInvalid: str = ...
+    ) -> VectorAssembler: ...
+    def setInputCols(self, value: List[str]) -> VectorAssembler: ...
+    def setOutputCol(self, value: str) -> VectorAssembler: ...
+    def setHandleInvalid(self, value: str) -> VectorAssembler: ...
+
+class _VectorIndexerParams(HasInputCol, HasOutputCol, HasHandleInvalid):
+    maxCategories: Param[int]
+    handleInvalid: Param[str]
+    def __init__(self, *args: Any): ...
+    def getMaxCategories(self) -> int: ...
+
+class VectorIndexer(
+    JavaEstimator[VectorIndexerModel],
+    _VectorIndexerParams,
+    HasHandleInvalid,
+    JavaMLReadable[VectorIndexer],
+    JavaMLWritable,
+):
+    def __init__(
+        self,
+        *,
+        maxCategories: int = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...,
+        handleInvalid: str = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        maxCategories: int = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...,
+        handleInvalid: str = ...
+    ) -> VectorIndexer: ...
+    def setMaxCategories(self, value: int) -> VectorIndexer: ...
+    def setInputCol(self, value: str) -> VectorIndexer: ...
+    def setOutputCol(self, value: str) -> VectorIndexer: ...
+    def setHandleInvalid(self, value: str) -> VectorIndexer: ...
+
+class VectorIndexerModel(
+    JavaModel, _VectorIndexerParams, JavaMLReadable[VectorIndexerModel], JavaMLWritable
+):
+    def setInputCol(self, value: str) -> VectorIndexerModel: ...
+    def setOutputCol(self, value: str) -> VectorIndexerModel: ...
+    @property
+    def numFeatures(self) -> int: ...
+    @property
+    def categoryMaps(self) -> Dict[int, Tuple[float, int]]: ...
+
+class VectorSlicer(
+    JavaTransformer,
+    HasInputCol,
+    HasOutputCol,
+    JavaMLReadable[VectorSlicer],
+    JavaMLWritable,
+):
+    indices: Param[List[int]]
+    names: Param[List[str]]
+    def __init__(
+        self,
+        *,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...,
+        indices: Optional[List[int]] = ...,
+        names: Optional[List[str]] = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...,
+        indices: Optional[List[int]] = ...,
+        names: Optional[List[str]] = ...
+    ) -> VectorSlicer: ...
+    def setIndices(self, value: List[int]) -> VectorSlicer: ...
+    def getIndices(self) -> List[int]: ...
+    def setNames(self, value: List[str]) -> VectorSlicer: ...
+    def getNames(self) -> List[str]: ...
+    def setInputCol(self, value: str) -> VectorSlicer: ...
+    def setOutputCol(self, value: str) -> VectorSlicer: ...
+
+class _Word2VecParams(HasStepSize, HasMaxIter, HasSeed, HasInputCol, HasOutputCol):
+    vectorSize: Param[int]
+    numPartitions: Param[int]
+    minCount: Param[int]
+    windowSize: Param[int]
+    maxSentenceLength: Param[int]
+    def __init__(self, *args: Any): ...
+    def getVectorSize(self) -> int: ...
+    def getNumPartitions(self) -> int: ...
+    def getMinCount(self) -> int: ...
+    def getWindowSize(self) -> int: ...
+    def getMaxSentenceLength(self) -> int: ...
+
+class Word2Vec(
+    JavaEstimator[Word2VecModel],
+    _Word2VecParams,
+    JavaMLReadable[Word2Vec],
+    JavaMLWritable,
+):
+    def __init__(
+        self,
+        *,
+        vectorSize: int = ...,
+        minCount: int = ...,
+        numPartitions: int = ...,
+        stepSize: float = ...,
+        maxIter: int = ...,
+        seed: Optional[int] = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...,
+        windowSize: int = ...,
+        maxSentenceLength: int = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        vectorSize: int = ...,
+        minCount: int = ...,
+        numPartitions: int = ...,
+        stepSize: float = ...,
+        maxIter: int = ...,
+        seed: Optional[int] = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...,
+        windowSize: int = ...,
+        maxSentenceLength: int = ...
+    ) -> Word2Vec: ...
+    def setVectorSize(self, value: int) -> Word2Vec: ...
+    def setNumPartitions(self, value: int) -> Word2Vec: ...
+    def setMinCount(self, value: int) -> Word2Vec: ...
+    def setWindowSize(self, value: int) -> Word2Vec: ...
+    def setMaxSentenceLength(self, value: int) -> Word2Vec: ...
+    def setMaxIter(self, value: int) -> Word2Vec: ...
+    def setInputCol(self, value: str) -> Word2Vec: ...
+    def setOutputCol(self, value: str) -> Word2Vec: ...
+    def setSeed(self, value: int) -> Word2Vec: ...
+    def setStepSize(self, value: float) -> Word2Vec: ...
+
+class Word2VecModel(
+    JavaModel, _Word2VecParams, JavaMLReadable[Word2VecModel], JavaMLWritable
+):
+    def getVectors(self) -> DataFrame: ...
+    def setInputCol(self, value: str) -> Word2VecModel: ...
+    def setOutputCol(self, value: str) -> Word2VecModel: ...
+    @overload
+    def findSynonyms(self, word: str, num: int) -> DataFrame: ...
+    @overload
+    def findSynonyms(self, word: Vector, num: int) -> DataFrame: ...
+    @overload
+    def findSynonymsArray(self, word: str, num: int) -> List[Tuple[str, float]]: ...
+    @overload
+    def findSynonymsArray(self, word: Vector, num: int) -> List[Tuple[str, float]]: ...
+
+class _PCAParams(HasInputCol, HasOutputCol):
+    k: Param[int]
+    def getK(self) -> int: ...
+
+class PCA(JavaEstimator[PCAModel], _PCAParams, JavaMLReadable[PCA], JavaMLWritable):
+    def __init__(
+        self,
+        *,
+        k: Optional[int] = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        k: Optional[int] = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...
+    ) -> PCA: ...
+    def setK(self, value: int) -> PCA: ...
+    def setInputCol(self, value: str) -> PCA: ...
+    def setOutputCol(self, value: str) -> PCA: ...
+
+class PCAModel(JavaModel, _PCAParams, JavaMLReadable[PCAModel], JavaMLWritable):
+    def setInputCol(self, value: str) -> PCAModel: ...
+    def setOutputCol(self, value: str) -> PCAModel: ...
+    @property
+    def pc(self) -> DenseMatrix: ...
+    @property
+    def explainedVariance(self) -> DenseVector: ...
+
+class _RFormulaParams(HasFeaturesCol, HasLabelCol, HasHandleInvalid):
+    formula: Param[str]
+    forceIndexLabel: Param[bool]
+    stringIndexerOrderType: Param[str]
+    handleInvalid: Param[str]
+    def __init__(self, *args: Any): ...
+    def getFormula(self) -> str: ...
+    def getForceIndexLabel(self) -> bool: ...
+    def getStringIndexerOrderType(self) -> str: ...
+
+class RFormula(
+    JavaEstimator[RFormulaModel],
+    _RFormulaParams,
+    JavaMLReadable[RFormula],
+    JavaMLWritable,
+):
+    def __init__(
+        self,
+        *,
+        formula: Optional[str] = ...,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        forceIndexLabel: bool = ...,
+        stringIndexerOrderType: str = ...,
+        handleInvalid: str = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        formula: Optional[str] = ...,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        forceIndexLabel: bool = ...,
+        stringIndexerOrderType: str = ...,
+        handleInvalid: str = ...
+    ) -> RFormula: ...
+    def setFormula(self, value: str) -> RFormula: ...
+    def setForceIndexLabel(self, value: bool) -> RFormula: ...
+    def setStringIndexerOrderType(self, value: str) -> RFormula: ...
+    def setFeaturesCol(self, value: str) -> RFormula: ...
+    def setLabelCol(self, value: str) -> RFormula: ...
+    def setHandleInvalid(self, value: str) -> RFormula: ...
+
+class RFormulaModel(
+    JavaModel, _RFormulaParams, JavaMLReadable[RFormulaModel], JavaMLWritable
+): ...
+
+class _SelectorParams(HasFeaturesCol, HasOutputCol, HasLabelCol):
+    selectorType: Param[str]
+    numTopFeatures: Param[int]
+    percentile: Param[float]
+    fpr: Param[float]
+    fdr: Param[float]
+    fwe: Param[float]
+    def __init__(self, *args: Any): ...
+    def getSelectorType(self) -> str: ...
+    def getNumTopFeatures(self) -> int: ...
+    def getPercentile(self) -> float: ...
+    def getFpr(self) -> float: ...
+    def getFdr(self) -> float: ...
+    def getFwe(self) -> float: ...
+
+class _Selector(JavaEstimator[JM], _SelectorParams, JavaMLReadable, JavaMLWritable):
+    def setSelectorType(self: P, value: str) -> P: ...
+    def setNumTopFeatures(self: P, value: int) -> P: ...
+    def setPercentile(self: P, value: float) -> P: ...
+    def setFpr(self: P, value: float) -> P: ...
+    def setFdr(self: P, value: float) -> P: ...
+    def setFwe(self: P, value: float) -> P: ...
+    def setFeaturesCol(self: P, value: str) -> P: ...
+    def setOutputCol(self: P, value: str) -> P: ...
+    def setLabelCol(self: P, value: str) -> P: ...
+
+class _SelectorModel(JavaModel, _SelectorParams):
+    def setFeaturesCol(self: P, value: str) -> P: ...
+    def setOutputCol(self: P, value: str) -> P: ...
+    @property
+    def selectedFeatures(self) -> List[int]: ...
+
+class ANOVASelector(
+    _Selector[ANOVASelectorModel], JavaMLReadable[ANOVASelector], JavaMLWritable
+):
+    def __init__(
+        self,
+        numTopFeatures: int = ...,
+        featuresCol: str = ...,
+        outputCol: Optional[str] = ...,
+        labelCol: str = ...,
+        selectorType: str = ...,
+        percentile: float = ...,
+        fpr: float = ...,
+        fdr: float = ...,
+        fwe: float = ...,
+    ) -> None: ...
+    def setParams(
+        self,
+        numTopFeatures: int = ...,
+        featuresCol: str = ...,
+        outputCol: Optional[str] = ...,
+        labelCol: str = ...,
+        selectorType: str = ...,
+        percentile: float = ...,
+        fpr: float = ...,
+        fdr: float = ...,
+        fwe: float = ...,
+    ) -> ANOVASelector: ...
+
+class ANOVASelectorModel(
+    _SelectorModel, JavaMLReadable[ANOVASelectorModel], JavaMLWritable
+): ...
+
+class ChiSqSelector(
+    _Selector[ChiSqSelectorModel],
+    JavaMLReadable[ChiSqSelector],
+    JavaMLWritable,
+):
+    def __init__(
+        self,
+        *,
+        numTopFeatures: int = ...,
+        featuresCol: str = ...,
+        outputCol: Optional[str] = ...,
+        labelCol: str = ...,
+        selectorType: str = ...,
+        percentile: float = ...,
+        fpr: float = ...,
+        fdr: float = ...,
+        fwe: float = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        numTopFeatures: int = ...,
+        featuresCol: str = ...,
+        outputCol: Optional[str] = ...,
+        labelCol: str = ...,
+        selectorType: str = ...,
+        percentile: float = ...,
+        fpr: float = ...,
+        fdr: float = ...,
+        fwe: float = ...
+    ): ...
+    def setSelectorType(self, value: str) -> ChiSqSelector: ...
+    def setNumTopFeatures(self, value: int) -> ChiSqSelector: ...
+    def setPercentile(self, value: float) -> ChiSqSelector: ...
+    def setFpr(self, value: float) -> ChiSqSelector: ...
+    def setFdr(self, value: float) -> ChiSqSelector: ...
+    def setFwe(self, value: float) -> ChiSqSelector: ...
+    def setFeaturesCol(self, value: str) -> ChiSqSelector: ...
+    def setOutputCol(self, value: str) -> ChiSqSelector: ...
+    def setLabelCol(self, value: str) -> ChiSqSelector: ...
+
+class ChiSqSelectorModel(
+    _SelectorModel, JavaMLReadable[ChiSqSelectorModel], JavaMLWritable
+):
+    def setFeaturesCol(self, value: str) -> ChiSqSelectorModel: ...
+    def setOutputCol(self, value: str) -> ChiSqSelectorModel: ...
+    @property
+    def selectedFeatures(self) -> List[int]: ...
+
+class VectorSizeHint(
+    JavaTransformer,
+    HasInputCol,
+    HasHandleInvalid,
+    JavaMLReadable[VectorSizeHint],
+    JavaMLWritable,
+):
+    size: Param[int]
+    handleInvalid: Param[str]
+    def __init__(
+        self,
+        *,
+        inputCol: Optional[str] = ...,
+        size: Optional[int] = ...,
+        handleInvalid: str = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        inputCol: Optional[str] = ...,
+        size: Optional[int] = ...,
+        handleInvalid: str = ...
+    ) -> VectorSizeHint: ...
+    def setSize(self, value: int) -> VectorSizeHint: ...
+    def getSize(self) -> int: ...
+    def setInputCol(self, value: str) -> VectorSizeHint: ...
+    def setHandleInvalid(self, value: str) -> VectorSizeHint: ...
+
+class FValueSelector(
+    _Selector[FValueSelectorModel], JavaMLReadable[FValueSelector], JavaMLWritable
+):
+    def __init__(
+        self,
+        numTopFeatures: int = ...,
+        featuresCol: str = ...,
+        outputCol: Optional[str] = ...,
+        labelCol: str = ...,
+        selectorType: str = ...,
+        percentile: float = ...,
+        fpr: float = ...,
+        fdr: float = ...,
+        fwe: float = ...,
+    ) -> None: ...
+    def setParams(
+        self,
+        numTopFeatures: int = ...,
+        featuresCol: str = ...,
+        outputCol: Optional[str] = ...,
+        labelCol: str = ...,
+        selectorType: str = ...,
+        percentile: float = ...,
+        fpr: float = ...,
+        fdr: float = ...,
+        fwe: float = ...,
+    ) -> FValueSelector: ...
+
+class FValueSelectorModel(
+    _SelectorModel, JavaMLReadable[FValueSelectorModel], JavaMLWritable
+): ...
+
+class _VarianceThresholdSelectorParams(HasFeaturesCol, HasOutputCol):
+    varianceThreshold: Param[float] = ...
+    def getVarianceThreshold(self) -> float: ...
+
+class VarianceThresholdSelector(
+    JavaEstimator, _VarianceThresholdSelectorParams, JavaMLReadable, JavaMLWritable
+):
+    def __init__(
+        self,
+        featuresCol: str = ...,
+        outputCol: Optional[str] = ...,
+        varianceThreshold: float = ...,
+    ) -> None: ...
+    def setParams(
+        self,
+        featuresCol: str = ...,
+        outputCol: Optional[str] = ...,
+        varianceThreshold: float = ...,
+    ): ...
+    def setVarianceThreshold(self, value: float) -> VarianceThresholdSelector: ...
+    def setFeaturesCol(self, value: str) -> VarianceThresholdSelector: ...
+    def setOutputCol(self, value: str) -> VarianceThresholdSelector: ...
+
+class VarianceThresholdSelectorModel(
+    JavaModel, _VarianceThresholdSelectorParams, JavaMLReadable, JavaMLWritable
+):
+    def setFeaturesCol(self, value: str) -> VarianceThresholdSelectorModel: ...
+    def setOutputCol(self, value: str) -> VarianceThresholdSelectorModel: ...
+    @property
+    def selectedFeatures(self) -> List[int]: ...
diff --git a/python/pyspark/ml/fpm.pyi b/python/pyspark/ml/fpm.pyi
new file mode 100644
index 0000000000000..7cc304a2ffa39
--- /dev/null
+++ b/python/pyspark/ml/fpm.pyi
@@ -0,0 +1,109 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any, Optional
+
+from pyspark.ml.util import JavaMLReadable, JavaMLWritable
+from pyspark.ml.wrapper import JavaEstimator, JavaParams, JavaModel
+from pyspark.ml.param.shared import HasPredictionCol
+from pyspark.sql.dataframe import DataFrame
+
+from pyspark.ml.param import Param
+
+class _FPGrowthParams(HasPredictionCol):
+    itemsCol: Param[str]
+    minSupport: Param[float]
+    numPartitions: Param[int]
+    minConfidence: Param[float]
+    def __init__(self, *args: Any): ...
+    def getItemsCol(self) -> str: ...
+    def getMinSupport(self) -> float: ...
+    def getNumPartitions(self) -> int: ...
+    def getMinConfidence(self) -> float: ...
+
+class FPGrowthModel(
+    JavaModel, _FPGrowthParams, JavaMLWritable, JavaMLReadable[FPGrowthModel]
+):
+    def setItemsCol(self, value: str) -> FPGrowthModel: ...
+    def setMinConfidence(self, value: float) -> FPGrowthModel: ...
+    def setPredictionCol(self, value: str) -> FPGrowthModel: ...
+    @property
+    def freqItemsets(self) -> DataFrame: ...
+    @property
+    def associationRules(self) -> DataFrame: ...
+
+class FPGrowth(
+    JavaEstimator[FPGrowthModel],
+    _FPGrowthParams,
+    JavaMLWritable,
+    JavaMLReadable[FPGrowth],
+):
+    def __init__(
+        self,
+        *,
+        minSupport: float = ...,
+        minConfidence: float = ...,
+        itemsCol: str = ...,
+        predictionCol: str = ...,
+        numPartitions: Optional[int] = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        minSupport: float = ...,
+        minConfidence: float = ...,
+        itemsCol: str = ...,
+        predictionCol: str = ...,
+        numPartitions: Optional[int] = ...
+    ) -> FPGrowth: ...
+    def setItemsCol(self, value: str) -> FPGrowth: ...
+    def setMinSupport(self, value: float) -> FPGrowth: ...
+    def setNumPartitions(self, value: int) -> FPGrowth: ...
+    def setMinConfidence(self, value: float) -> FPGrowth: ...
+    def setPredictionCol(self, value: str) -> FPGrowth: ...
+
+class PrefixSpan(JavaParams):
+    minSupport: Param[float]
+    maxPatternLength: Param[int]
+    maxLocalProjDBSize: Param[int]
+    sequenceCol: Param[str]
+    def __init__(
+        self,
+        *,
+        minSupport: float = ...,
+        maxPatternLength: int = ...,
+        maxLocalProjDBSize: int = ...,
+        sequenceCol: str = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        minSupport: float = ...,
+        maxPatternLength: int = ...,
+        maxLocalProjDBSize: int = ...,
+        sequenceCol: str = ...
+    ) -> PrefixSpan: ...
+    def setMinSupport(self, value: float) -> PrefixSpan: ...
+    def getMinSupport(self) -> float: ...
+    def setMaxPatternLength(self, value: int) -> PrefixSpan: ...
+    def getMaxPatternLength(self) -> int: ...
+    def setMaxLocalProjDBSize(self, value: int) -> PrefixSpan: ...
+    def getMaxLocalProjDBSize(self) -> int: ...
+    def setSequenceCol(self, value: str) -> PrefixSpan: ...
+    def getSequenceCol(self) -> str: ...
+    def findFrequentSequentialPatterns(self, dataset: DataFrame) -> DataFrame: ...
diff --git a/python/pyspark/ml/functions.pyi b/python/pyspark/ml/functions.pyi
new file mode 100644
index 0000000000000..42650e742e781
--- /dev/null
+++ b/python/pyspark/ml/functions.pyi
@@ -0,0 +1,22 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyspark import SparkContext as SparkContext, since as since  # noqa: F401
+from pyspark.sql.column import Column as Column
+
+def vector_to_array(col: Column) -> Column: ...
diff --git a/python/pyspark/ml/image.pyi b/python/pyspark/ml/image.pyi
new file mode 100644
index 0000000000000..9ff3a8817aadd
--- /dev/null
+++ b/python/pyspark/ml/image.pyi
@@ -0,0 +1,40 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Dict, List
+
+from pyspark.sql.types import Row, StructType
+
+from numpy import ndarray  # type: ignore[import]
+
+class _ImageSchema:
+    def __init__(self) -> None: ...
+    @property
+    def imageSchema(self) -> StructType: ...
+    @property
+    def ocvTypes(self) -> Dict[str, int]: ...
+    @property
+    def columnSchema(self) -> StructType: ...
+    @property
+    def imageFields(self) -> List[str]: ...
+    @property
+    def undefinedImageType(self) -> str: ...
+    def toNDArray(self, image: Row) -> ndarray: ...
+    def toImage(self, array: ndarray, origin: str = ...) -> Row: ...
+
+ImageSchema: _ImageSchema
diff --git a/python/pyspark/ml/linalg/__init__.pyi b/python/pyspark/ml/linalg/__init__.pyi
new file mode 100644
index 0000000000000..a576b30aec308
--- /dev/null
+++ b/python/pyspark/ml/linalg/__init__.pyi
@@ -0,0 +1,255 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import overload
+from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
+
+from pyspark.ml import linalg as newlinalg  # noqa: F401
+from pyspark.sql.types import StructType, UserDefinedType
+
+from numpy import float64, ndarray  # type: ignore[import]
+
+class VectorUDT(UserDefinedType):
+    @classmethod
+    def sqlType(cls) -> StructType: ...
+    @classmethod
+    def module(cls) -> str: ...
+    @classmethod
+    def scalaUDT(cls) -> str: ...
+    def serialize(
+        self, obj: Vector
+    ) -> Tuple[int, Optional[int], Optional[List[int]], List[float]]: ...
+    def deserialize(self, datum: Any) -> Vector: ...
+    def simpleString(self) -> str: ...
+
+class MatrixUDT(UserDefinedType):
+    @classmethod
+    def sqlType(cls) -> StructType: ...
+    @classmethod
+    def module(cls) -> str: ...
+    @classmethod
+    def scalaUDT(cls) -> str: ...
+    def serialize(
+        self, obj
+    ) -> Tuple[
+        int, int, int, Optional[List[int]], Optional[List[int]], List[float], bool
+    ]: ...
+    def deserialize(self, datum: Any) -> Matrix: ...
+    def simpleString(self) -> str: ...
+
+class Vector:
+    __UDT__: VectorUDT
+    def toArray(self) -> ndarray: ...
+
+class DenseVector(Vector):
+    array: ndarray
+    @overload
+    def __init__(self, *elements: float) -> None: ...
+    @overload
+    def __init__(self, __arr: bytes) -> None: ...
+    @overload
+    def __init__(self, __arr: Iterable[float]) -> None: ...
+    @staticmethod
+    def parse(s) -> DenseVector: ...
+    def __reduce__(self) -> Tuple[type, bytes]: ...
+    def numNonzeros(self) -> int: ...
+    def norm(self, p: Union[float, str]) -> float64: ...
+    def dot(self, other: Iterable[float]) -> float64: ...
+    def squared_distance(self, other: Iterable[float]) -> float64: ...
+    def toArray(self) -> ndarray: ...
+    @property
+    def values(self) -> ndarray: ...
+    def __getitem__(self, item: int) -> float64: ...
+    def __len__(self) -> int: ...
+    def __eq__(self, other: Any) -> bool: ...
+    def __ne__(self, other: Any) -> bool: ...
+    def __hash__(self) -> int: ...
+    def __getattr__(self, item: str) -> Any: ...
+    def __neg__(self) -> DenseVector: ...
+    def __add__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
+    def __sub__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
+    def __mul__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
+    def __div__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
+    def __truediv__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
+    def __mod__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
+    def __radd__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
+    def __rsub__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
+    def __rmul__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
+    def __rdiv__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
+    def __rtruediv__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
+    def __rmod__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
+
+class SparseVector(Vector):
+    size: int
+    indices: ndarray
+    values: ndarray
+    @overload
+    def __init__(self, size: int, *args: Tuple[int, float]) -> None: ...
+    @overload
+    def __init__(self, size: int, __indices: bytes, __values: bytes) -> None: ...
+    @overload
+    def __init__(
+        self, size: int, __indices: Iterable[int], __values: Iterable[float]
+    ) -> None: ...
+    @overload
+    def __init__(self, size: int, __pairs: Iterable[Tuple[int, float]]) -> None: ...
+    @overload
+    def __init__(self, size: int, __map: Dict[int, float]) -> None: ...
+    def numNonzeros(self) -> int: ...
+    def norm(self, p: Union[float, str]) -> float64: ...
+    def __reduce__(self): ...
+    @staticmethod
+    def parse(s: str) -> SparseVector: ...
+    def dot(self, other: Iterable[float]) -> float64: ...
+    def squared_distance(self, other: Iterable[float]) -> float64: ...
+    def toArray(self) -> ndarray: ...
+    def __len__(self) -> int: ...
+    def __eq__(self, other) -> bool: ...
+    def __getitem__(self, index: int) -> float64: ...
+    def __ne__(self, other) -> bool: ...
+    def __hash__(self) -> int: ...
+
+class Vectors:
+    @overload
+    @staticmethod
+    def sparse(size: int, *args: Tuple[int, float]) -> SparseVector: ...
+    @overload
+    @staticmethod
+    def sparse(size: int, __indices: bytes, __values: bytes) -> SparseVector: ...
+    @overload
+    @staticmethod
+    def sparse(
+        size: int, __indices: Iterable[int], __values: Iterable[float]
+    ) -> SparseVector: ...
+    @overload
+    @staticmethod
+    def sparse(size: int, __pairs: Iterable[Tuple[int, float]]) -> SparseVector: ...
+    @overload
+    @staticmethod
+    def sparse(size: int, __map: Dict[int, float]) -> SparseVector: ...
+    @overload
+    @staticmethod
+    def dense(self, *elements: float) -> DenseVector: ...
+    @overload
+    @staticmethod
+    def dense(self, __arr: bytes) -> DenseVector: ...
+    @overload
+    @staticmethod
+    def dense(self, __arr: Iterable[float]) -> DenseVector: ...
+    @staticmethod
+    def stringify(vector: Vector) -> str: ...
+    @staticmethod
+    def squared_distance(v1: Vector, v2: Vector) -> float64: ...
+    @staticmethod
+    def norm(vector: Vector, p: Union[float, str]) -> float64: ...
+    @staticmethod
+    def parse(s: str) -> Vector: ...
+    @staticmethod
+    def zeros(size: int) -> DenseVector: ...
+
+class Matrix:
+    __UDT__: MatrixUDT
+    numRows: int
+    numCols: int
+    isTransposed: bool
+    def __init__(
+        self, numRows: int, numCols: int, isTransposed: bool = ...
+    ) -> None: ...
+    def toArray(self): ...
+
+class DenseMatrix(Matrix):
+    values: Any
+    @overload
+    def __init__(
+        self, numRows: int, numCols: int, values: bytes, isTransposed: bool = ...
+    ) -> None: ...
+    @overload
+    def __init__(
+        self,
+        numRows: int,
+        numCols: int,
+        values: Iterable[float],
+        isTransposed: bool = ...,
+    ) -> None: ...
+    def __reduce__(self) -> Tuple[type, Tuple[int, int, bytes, int]]: ...
+    def toArray(self) -> ndarray: ...
+    def toSparse(self) -> SparseMatrix: ...
+    def __getitem__(self, indices: Tuple[int, int]) -> float64: ...
+    def __eq__(self, other) -> bool: ...
+
+class SparseMatrix(Matrix):
+    colPtrs: ndarray
+    rowIndices: ndarray
+    values: ndarray
+    @overload
+    def __init__(
+        self,
+        numRows: int,
+        numCols: int,
+        colPtrs: bytes,
+        rowIndices: bytes,
+        values: bytes,
+        isTransposed: bool = ...,
+    ) -> None: ...
+    @overload
+    def __init__(
+        self,
+        numRows: int,
+        numCols: int,
+        colPtrs: Iterable[int],
+        rowIndices: Iterable[int],
+        values: Iterable[float],
+        isTransposed: bool = ...,
+    ) -> None: ...
+    def __reduce__(self) -> Tuple[type, Tuple[int, int, bytes, bytes, bytes, int]]: ...
+    def __getitem__(self, indices: Tuple[int, int]) -> float64: ...
+    def toArray(self) -> ndarray: ...
+    def toDense(self) -> DenseMatrix: ...
+    def __eq__(self, other) -> bool: ...
+
+class Matrices:
+    @overload
+    @staticmethod
+    def dense(
+        numRows: int, numCols: int, values: bytes, isTransposed: bool = ...
+    ) -> DenseMatrix: ...
+    @overload
+    @staticmethod
+    def dense(
+        numRows: int, numCols: int, values: Iterable[float], isTransposed: bool = ...
+    ) -> DenseMatrix: ...
+    @overload
+    @staticmethod
+    def sparse(
+        numRows: int,
+        numCols: int,
+        colPtrs: bytes,
+        rowIndices: bytes,
+        values: bytes,
+        isTransposed: bool = ...,
+    ) -> SparseMatrix: ...
+    @overload
+    @staticmethod
+    def sparse(
+        numRows: int,
+        numCols: int,
+        colPtrs: Iterable[int],
+        rowIndices: Iterable[int],
+        values: Iterable[float],
+        isTransposed: bool = ...,
+    ) -> SparseMatrix: ...
diff --git a/python/pyspark/ml/param/__init__.pyi b/python/pyspark/ml/param/__init__.pyi
new file mode 100644
index 0000000000000..23a63c573e452
--- /dev/null
+++ b/python/pyspark/ml/param/__init__.pyi
@@ -0,0 +1,96 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import abc
+from typing import overload
+from typing import Any, Callable, Generic, List, Optional
+from pyspark.ml._typing import T
+import pyspark.ml._typing
+
+import pyspark.ml.util
+from pyspark.ml.linalg import DenseVector, Matrix
+
+class Param(Generic[T]):
+    parent: str
+    name: str
+    doc: str
+    typeConverter: Callable[[Any], T]
+    def __init__(
+        self,
+        parent: pyspark.ml.util.Identifiable,
+        name: str,
+        doc: str,
+        typeConverter: Optional[Callable[[Any], T]] = ...,
+    ) -> None: ...
+    def __hash__(self) -> int: ...
+    def __eq__(self, other: Any) -> bool: ...
+
+class TypeConverters:
+    @staticmethod
+    def identity(value: T) -> T: ...
+    @staticmethod
+    def toList(value: Any) -> List: ...
+    @staticmethod
+    def toListFloat(value: Any) -> List[float]: ...
+    @staticmethod
+    def toListInt(value: Any) -> List[int]: ...
+    @staticmethod
+    def toListString(value: Any) -> List[str]: ...
+    @staticmethod
+    def toVector(value: Any) -> DenseVector: ...
+    @staticmethod
+    def toMatrix(value: Any) -> Matrix: ...
+    @staticmethod
+    def toFloat(value: Any) -> float: ...
+    @staticmethod
+    def toInt(value: Any) -> int: ...
+    @staticmethod
+    def toString(value: Any) -> str: ...
+    @staticmethod
+    def toBoolean(value: Any) -> bool: ...
+
+class Params(pyspark.ml.util.Identifiable, metaclass=abc.ABCMeta):
+    def __init__(self) -> None: ...
+    @property
+    def params(self) -> List[Param]: ...
+    def explainParam(self, param: str) -> str: ...
+    def explainParams(self) -> str: ...
+    def getParam(self, paramName: str) -> Param: ...
+    @overload
+    def isSet(self, param: str) -> bool: ...
+    @overload
+    def isSet(self, param: Param[Any]) -> bool: ...
+    @overload
+    def hasDefault(self, param: str) -> bool: ...
+    @overload
+    def hasDefault(self, param: Param[Any]) -> bool: ...
+    @overload
+    def isDefined(self, param: str) -> bool: ...
+    @overload
+    def isDefined(self, param: Param[Any]) -> bool: ...
+    def hasParam(self, paramName: str) -> bool: ...
+    @overload
+    def getOrDefault(self, param: str) -> Any: ...
+    @overload
+    def getOrDefault(self, param: Param[T]) -> T: ...
+    def extractParamMap(
+        self, extra: Optional[pyspark.ml._typing.ParamMap] = ...
+    ) -> pyspark.ml._typing.ParamMap: ...
+    def copy(self, extra: Optional[pyspark.ml._typing.ParamMap] = ...) -> Params: ...
+    def set(self, param: Param, value: Any) -> None: ...
+    def clear(self, param: Param) -> None: ...
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.pyi b/python/pyspark/ml/param/_shared_params_code_gen.pyi
new file mode 100644
index 0000000000000..e436a54c0eaa4
--- /dev/null
+++ b/python/pyspark/ml/param/_shared_params_code_gen.pyi
@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+header: str
diff --git a/python/pyspark/ml/param/shared.pyi b/python/pyspark/ml/param/shared.pyi
new file mode 100644
index 0000000000000..5999c0eaa4661
--- /dev/null
+++ b/python/pyspark/ml/param/shared.pyi
@@ -0,0 +1,187 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any, Generic, List
+from pyspark.ml._typing import T
+
+from pyspark.ml.param import *
+
+class HasMaxIter(Params):
+    maxIter: Param[int]
+    def __init__(self) -> None: ...
+    def getMaxIter(self) -> int: ...
+
+class HasRegParam(Params):
+    regParam: Param[float]
+    def __init__(self) -> None: ...
+    def getRegParam(self) -> float: ...
+
+class HasFeaturesCol(Params):
+    featuresCol: Param[str]
+    def __init__(self) -> None: ...
+    def getFeaturesCol(self) -> str: ...
+
+class HasLabelCol(Params):
+    labelCol: Param[str]
+    def __init__(self) -> None: ...
+    def getLabelCol(self) -> str: ...
+
+class HasPredictionCol(Params):
+    predictionCol: Param[str]
+    def __init__(self) -> None: ...
+    def getPredictionCol(self) -> str: ...
+
+class HasProbabilityCol(Params):
+    probabilityCol: Param[str]
+    def __init__(self) -> None: ...
+    def getProbabilityCol(self) -> str: ...
+
+class HasRawPredictionCol(Params):
+    rawPredictionCol: Param[str]
+    def __init__(self) -> None: ...
+    def getRawPredictionCol(self) -> str: ...
+
+class HasInputCol(Params):
+    inputCol: Param[str]
+    def __init__(self) -> None: ...
+    def getInputCol(self) -> str: ...
+
+class HasInputCols(Params):
+    inputCols: Param[List[str]]
+    def __init__(self) -> None: ...
+    def getInputCols(self) -> List[str]: ...
+
+class HasOutputCol(Params):
+    outputCol: Param[str]
+    def __init__(self) -> None: ...
+    def getOutputCol(self) -> str: ...
+
+class HasOutputCols(Params):
+    outputCols: Param[List[str]]
+    def __init__(self) -> None: ...
+    def getOutputCols(self) -> List[str]: ...
+
+class HasNumFeatures(Params):
+    numFeatures: Param[int]
+    def __init__(self) -> None: ...
+    def getNumFeatures(self) -> int: ...
+
+class HasCheckpointInterval(Params):
+    checkpointInterval: Param[int]
+    def __init__(self) -> None: ...
+    def getCheckpointInterval(self) -> int: ...
+
+class HasSeed(Params):
+    seed: Param[int]
+    def __init__(self) -> None: ...
+    def getSeed(self) -> int: ...
+
+class HasTol(Params):
+    tol: Param[float]
+    def __init__(self) -> None: ...
+    def getTol(self) -> float: ...
+
+class HasRelativeError(Params):
+    relativeError: Param[float]
+    def __init__(self) -> None: ...
+    def getRelativeError(self) -> float: ...
+
+class HasStepSize(Params):
+    stepSize: Param[float]
+    def __init__(self) -> None: ...
+    def getStepSize(self) -> float: ...
+
+class HasHandleInvalid(Params):
+    handleInvalid: Param[str]
+    def __init__(self) -> None: ...
+    def getHandleInvalid(self) -> str: ...
+
+class HasElasticNetParam(Params):
+    elasticNetParam: Param[float]
+    def __init__(self) -> None: ...
+    def getElasticNetParam(self) -> float: ...
+
+class HasFitIntercept(Params):
+    fitIntercept: Param[bool]
+    def __init__(self) -> None: ...
+    def getFitIntercept(self) -> bool: ...
+
+class HasStandardization(Params):
+    standardization: Param[bool]
+    def __init__(self) -> None: ...
+    def getStandardization(self) -> bool: ...
+
+class HasThresholds(Params):
+    thresholds: Param[List[float]]
+    def __init__(self) -> None: ...
+    def getThresholds(self) -> List[float]: ...
+
+class HasThreshold(Params):
+    threshold: Param[float]
+    def __init__(self) -> None: ...
+    def getThreshold(self) -> float: ...
+
+class HasWeightCol(Params):
+    weightCol: Param[str]
+    def __init__(self) -> None: ...
+    def getWeightCol(self) -> str: ...
+
+class HasSolver(Params):
+    solver: Param[str]
+    def __init__(self) -> None: ...
+    def getSolver(self) -> str: ...
+
+class HasVarianceCol(Params):
+    varianceCol: Param[str]
+    def __init__(self) -> None: ...
+    def getVarianceCol(self) -> str: ...
+
+class HasAggregationDepth(Params):
+    aggregationDepth: Param[int]
+    def __init__(self) -> None: ...
+    def getAggregationDepth(self) -> int: ...
+
+class HasParallelism(Params):
+    parallelism: Param[int]
+    def __init__(self) -> None: ...
+    def getParallelism(self) -> int: ...
+
+class HasCollectSubModels(Params):
+    collectSubModels: Param[bool]
+    def __init__(self) -> None: ...
+    def getCollectSubModels(self) -> bool: ...
+
+class HasLoss(Params):
+    loss: Param[str]
+    def __init__(self) -> None: ...
+    def getLoss(self) -> str: ...
+
+class HasValidationIndicatorCol(Params):
+    validationIndicatorCol: Param[str]
+    def __init__(self) -> None: ...
+    def getValidationIndicatorCol(self) -> str: ...
+
+class HasDistanceMeasure(Params):
+    distanceMeasure: Param[str]
+    def __init__(self) -> None: ...
+    def getDistanceMeasure(self) -> str: ...
+
+class HasBlockSize(Params):
+    blockSize: Param[int]
+    def __init__(self) -> None: ...
+    def getBlockSize(self) -> int: ...
diff --git a/python/pyspark/ml/pipeline.pyi b/python/pyspark/ml/pipeline.pyi
new file mode 100644
index 0000000000000..44680586d70d1
--- /dev/null
+++ b/python/pyspark/ml/pipeline.pyi
@@ -0,0 +1,97 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any, Dict, List, Optional, Tuple, Type, Union
+
+from pyspark.ml._typing import PipelineStage
+from pyspark.context import SparkContext
+from pyspark.ml.base import Estimator, Model, Transformer
+from pyspark.ml.param import Param
+from pyspark.ml.util import (  # noqa: F401
+    DefaultParamsReader as DefaultParamsReader,
+    DefaultParamsWriter as DefaultParamsWriter,
+    JavaMLReader as JavaMLReader,
+    JavaMLWritable as JavaMLWritable,
+    JavaMLWriter as JavaMLWriter,
+    MLReadable as MLReadable,
+    MLReader as MLReader,
+    MLWritable as MLWritable,
+    MLWriter as MLWriter,
+)
+
+class Pipeline(Estimator[PipelineModel], MLReadable[Pipeline], MLWritable):
+    stages: List[PipelineStage]
+    def __init__(self, *, stages: Optional[List[PipelineStage]] = ...) -> None: ...
+    def setStages(self, stages: List[PipelineStage]) -> Pipeline: ...
+    def getStages(self) -> List[PipelineStage]: ...
+    def setParams(self, *, stages: Optional[List[PipelineStage]] = ...) -> Pipeline: ...
+    def copy(self, extra: Optional[Dict[Param, str]] = ...) -> Pipeline: ...
+    def write(self) -> JavaMLWriter: ...
+    def save(self, path: str) -> None: ...
+    @classmethod
+    def read(cls) -> PipelineReader: ...
+
+class PipelineWriter(MLWriter):
+    instance: Pipeline
+    def __init__(self, instance: Pipeline) -> None: ...
+    def saveImpl(self, path: str) -> None: ...
+
+class PipelineReader(MLReader):
+    cls: Type[Pipeline]
+    def __init__(self, cls: Type[Pipeline]) -> None: ...
+    def load(self, path: str) -> Pipeline: ...
+
+class PipelineModelWriter(MLWriter):
+    instance: PipelineModel
+    def __init__(self, instance: PipelineModel) -> None: ...
+    def saveImpl(self, path: str) -> None: ...
+
+class PipelineModelReader(MLReader):
+    cls: Type[PipelineModel]
+    def __init__(self, cls: Type[PipelineModel]) -> None: ...
+    def load(self, path: str) -> PipelineModel: ...
+
+class PipelineModel(Model, MLReadable[PipelineModel], MLWritable):
+    stages: List[PipelineStage]
+    def __init__(self, stages: List[Transformer]) -> None: ...
+    def copy(self, extra: Optional[Dict[Param, Any]] = ...) -> PipelineModel: ...
+    def write(self) -> JavaMLWriter: ...
+    def save(self, path: str) -> None: ...
+    @classmethod
+    def read(cls) -> PipelineModelReader: ...
+
+class PipelineSharedReadWrite:
+    @staticmethod
+    def checkStagesForJava(stages: List[PipelineStage]) -> bool: ...
+    @staticmethod
+    def validateStages(stages: List[PipelineStage]) -> None: ...
+    @staticmethod
+    def saveImpl(
+        instance: Union[Pipeline, PipelineModel],
+        stages: List[PipelineStage],
+        sc: SparkContext,
+        path: str,
+    ) -> None: ...
+    @staticmethod
+    def load(
+        metadata: Dict[str, Any], sc: SparkContext, path: str
+    ) -> Tuple[str, List[PipelineStage]]: ...
+    @staticmethod
+    def getStagePath(
+        stageUid: str, stageIdx: int, numStages: int, stagesDir: str
+    ) -> str: ...
diff --git a/python/pyspark/ml/recommendation.pyi b/python/pyspark/ml/recommendation.pyi
new file mode 100644
index 0000000000000..390486b45c5e6
--- /dev/null
+++ b/python/pyspark/ml/recommendation.pyi
@@ -0,0 +1,152 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any, Optional
+
+import sys  # noqa: F401
+
+from pyspark import since, keyword_only  # noqa: F401
+from pyspark.ml.param.shared import (
+    HasBlockSize,
+    HasCheckpointInterval,
+    HasMaxIter,
+    HasPredictionCol,
+    HasRegParam,
+    HasSeed,
+)
+from pyspark.ml.wrapper import JavaEstimator, JavaModel
+from pyspark.ml.common import inherit_doc  # noqa: F401
+from pyspark.ml.param import Param
+from pyspark.ml.util import JavaMLWritable, JavaMLReadable
+
+from pyspark.sql.dataframe import DataFrame
+
+class _ALSModelParams(HasPredictionCol, HasBlockSize):
+    userCol: Param[str]
+    itemCol: Param[str]
+    coldStartStrategy: Param[str]
+    def getUserCol(self) -> str: ...
+    def getItemCol(self) -> str: ...
+    def getColdStartStrategy(self) -> str: ...
+
+class _ALSParams(
+    _ALSModelParams, HasMaxIter, HasRegParam, HasCheckpointInterval, HasSeed
+):
+    rank: Param[int]
+    numUserBlocks: Param[int]
+    numItemBlocks: Param[int]
+    implicitPrefs: Param[bool]
+    alpha: Param[float]
+    ratingCol: Param[str]
+    nonnegative: Param[bool]
+    intermediateStorageLevel: Param[str]
+    finalStorageLevel: Param[str]
+    def __init__(self, *args: Any): ...
+    def getRank(self) -> int: ...
+    def getNumUserBlocks(self) -> int: ...
+    def getNumItemBlocks(self) -> int: ...
+    def getImplicitPrefs(self) -> bool: ...
+    def getAlpha(self) -> float: ...
+    def getRatingCol(self) -> str: ...
+    def getNonnegative(self) -> bool: ...
+    def getIntermediateStorageLevel(self) -> str: ...
+    def getFinalStorageLevel(self) -> str: ...
+
+class ALS(JavaEstimator[ALSModel], _ALSParams, JavaMLWritable, JavaMLReadable[ALS]):
+    def __init__(
+        self,
+        *,
+        rank: int = ...,
+        maxIter: int = ...,
+        regParam: float = ...,
+        numUserBlocks: int = ...,
+        numItemBlocks: int = ...,
+        implicitPrefs: bool = ...,
+        alpha: float = ...,
+        userCol: str = ...,
+        itemCol: str = ...,
+        seed: Optional[int] = ...,
+        ratingCol: str = ...,
+        nonnegative: bool = ...,
+        checkpointInterval: int = ...,
+        intermediateStorageLevel: str = ...,
+        finalStorageLevel: str = ...,
+        coldStartStrategy: str = ...,
+        blockSize: int = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        rank: int = ...,
+        maxIter: int = ...,
+        regParam: float = ...,
+        numUserBlocks: int = ...,
+        numItemBlocks: int = ...,
+        implicitPrefs: bool = ...,
+        alpha: float = ...,
+        userCol: str = ...,
+        itemCol: str = ...,
+        seed: Optional[int] = ...,
+        ratingCol: str = ...,
+        nonnegative: bool = ...,
+        checkpointInterval: int = ...,
+        intermediateStorageLevel: str = ...,
+        finalStorageLevel: str = ...,
+        coldStartStrategy: str = ...,
+        blockSize: int = ...
+    ) -> ALS: ...
+    def setRank(self, value: int) -> ALS: ...
+    def setNumUserBlocks(self, value: int) -> ALS: ...
+    def setNumItemBlocks(self, value: int) -> ALS: ...
+    def setNumBlocks(self, value: int) -> ALS: ...
+    def setImplicitPrefs(self, value: bool) -> ALS: ...
+    def setAlpha(self, value: float) -> ALS: ...
+    def setUserCol(self, value: str) -> ALS: ...
+    def setItemCol(self, value: str) -> ALS: ...
+    def setRatingCol(self, value: str) -> ALS: ...
+    def setNonnegative(self, value: bool) -> ALS: ...
+    def setIntermediateStorageLevel(self, value: str) -> ALS: ...
+    def setFinalStorageLevel(self, value: str) -> ALS: ...
+    def setColdStartStrategy(self, value: str) -> ALS: ...
+    def setMaxIter(self, value: int) -> ALS: ...
+    def setRegParam(self, value: float) -> ALS: ...
+    def setPredictionCol(self, value: str) -> ALS: ...
+    def setCheckpointInterval(self, value: int) -> ALS: ...
+    def setSeed(self, value: int) -> ALS: ...
+    def setBlockSize(self, value: int) -> ALS: ...
+
+class ALSModel(JavaModel, _ALSModelParams, JavaMLWritable, JavaMLReadable[ALSModel]):
+    def setUserCol(self, value: str) -> ALSModel: ...
+    def setItemCol(self, value: str) -> ALSModel: ...
+    def setColdStartStrategy(self, value: str) -> ALSModel: ...
+    def setPredictionCol(self, value: str) -> ALSModel: ...
+    def setBlockSize(self, value: int) -> ALSModel: ...
+    @property
+    def rank(self) -> int: ...
+    @property
+    def userFactors(self) -> DataFrame: ...
+    @property
+    def itemFactors(self) -> DataFrame: ...
+    def recommendForAllUsers(self, numItems: int) -> DataFrame: ...
+    def recommendForAllItems(self, numUsers: int) -> DataFrame: ...
+    def recommendForUserSubset(
+        self, dataset: DataFrame, numItems: int
+    ) -> DataFrame: ...
+    def recommendForItemSubset(
+        self, dataset: DataFrame, numUsers: int
+    ) -> DataFrame: ...
diff --git a/python/pyspark/ml/regression.pyi b/python/pyspark/ml/regression.pyi
new file mode 100644
index 0000000000000..991eb4f12ac85
--- /dev/null
+++ b/python/pyspark/ml/regression.pyi
@@ -0,0 +1,825 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any, List, Optional
+from pyspark.ml._typing import JM, M, T
+
+import abc
+from pyspark.ml import PredictionModel, Predictor
+from pyspark.ml.base import _PredictorParams
+from pyspark.ml.param.shared import (
+    HasAggregationDepth,
+    HasBlockSize,
+    HasElasticNetParam,
+    HasFeaturesCol,
+    HasFitIntercept,
+    HasLabelCol,
+    HasLoss,
+    HasMaxIter,
+    HasPredictionCol,
+    HasRegParam,
+    HasSeed,
+    HasSolver,
+    HasStandardization,
+    HasStepSize,
+    HasTol,
+    HasVarianceCol,
+    HasWeightCol,
+)
+from pyspark.ml.tree import (
+    _DecisionTreeModel,
+    _DecisionTreeParams,
+    _GBTParams,
+    _RandomForestParams,
+    _TreeEnsembleModel,
+    _TreeRegressorParams,
+)
+from pyspark.ml.util import (
+    GeneralJavaMLWritable,
+    HasTrainingSummary,
+    JavaMLReadable,
+    JavaMLWritable,
+)
+from pyspark.ml.wrapper import (
+    JavaEstimator,
+    JavaModel,
+    JavaPredictionModel,
+    JavaPredictor,
+    JavaWrapper,
+)
+
+from pyspark.ml.linalg import Matrix, Vector
+from pyspark.ml.param import Param
+from pyspark.sql.dataframe import DataFrame
+
+class Regressor(Predictor[M], _PredictorParams, metaclass=abc.ABCMeta): ...
+class RegressionModel(PredictionModel[T], _PredictorParams, metaclass=abc.ABCMeta): ...
+class _JavaRegressor(Regressor, JavaPredictor[JM], metaclass=abc.ABCMeta): ...
+class _JavaRegressionModel(
+    RegressionModel, JavaPredictionModel[T], metaclass=abc.ABCMeta
+): ...
+
+class _LinearRegressionParams(
+    _PredictorParams,
+    HasRegParam,
+    HasElasticNetParam,
+    HasMaxIter,
+    HasTol,
+    HasFitIntercept,
+    HasStandardization,
+    HasWeightCol,
+    HasSolver,
+    HasAggregationDepth,
+    HasLoss,
+    HasBlockSize,
+):
+    solver: Param[str]
+    loss: Param[str]
+    epsilon: Param[float]
+    def __init__(self, *args: Any): ...
+    def getEpsilon(self) -> float: ...
+
+class LinearRegression(
+    _JavaRegressor[LinearRegressionModel],
+    _LinearRegressionParams,
+    JavaMLWritable,
+    JavaMLReadable[LinearRegression],
+):
+    def __init__(
+        self,
+        *,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        maxIter: int = ...,
+        regParam: float = ...,
+        elasticNetParam: float = ...,
+        tol: float = ...,
+        fitIntercept: bool = ...,
+        standardization: bool = ...,
+        solver: str = ...,
+        weightCol: Optional[str] = ...,
+        aggregationDepth: int = ...,
+        epsilon: float = ...,
+        blockSize: int = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        maxIter: int = ...,
+        regParam: float = ...,
+        elasticNetParam: float = ...,
+        tol: float = ...,
+        fitIntercept: bool = ...,
+        standardization: bool = ...,
+        solver: str = ...,
+        weightCol: Optional[str] = ...,
+        aggregationDepth: int = ...,
+        epsilon: float = ...,
+        blockSize: int = ...
+    ) -> LinearRegression: ...
+    def setEpsilon(self, value: float) -> LinearRegression: ...
+    def setMaxIter(self, value: int) -> LinearRegression: ...
+    def setRegParam(self, value: float) -> LinearRegression: ...
+    def setTol(self, value: float) -> LinearRegression: ...
+    def setElasticNetParam(self, value: float) -> LinearRegression: ...
+    def setFitIntercept(self, value: bool) -> LinearRegression: ...
+    def setStandardization(self, value: bool) -> LinearRegression: ...
+    def setWeightCol(self, value: str) -> LinearRegression: ...
+    def setSolver(self, value: str) -> LinearRegression: ...
+    def setAggregationDepth(self, value: int) -> LinearRegression: ...
+    def setLoss(self, value: str) -> LinearRegression: ...
+    def setBlockSize(self, value: int) -> LinearRegression: ...
+
+class LinearRegressionModel(
+    _JavaRegressionModel[Vector],
+    _LinearRegressionParams,
+    GeneralJavaMLWritable,
+    JavaMLReadable[LinearRegressionModel],
+    HasTrainingSummary[LinearRegressionSummary],
+):
+    @property
+    def coefficients(self) -> Vector: ...
+    @property
+    def intercept(self) -> float: ...
+    @property
+    def summary(self) -> LinearRegressionTrainingSummary: ...
+    def evaluate(self, dataset: DataFrame) -> LinearRegressionSummary: ...
+
+class LinearRegressionSummary(JavaWrapper):
+    @property
+    def predictions(self) -> DataFrame: ...
+    @property
+    def predictionCol(self) -> str: ...
+    @property
+    def labelCol(self) -> str: ...
+    @property
+    def featuresCol(self) -> str: ...
+    @property
+    def explainedVariance(self) -> float: ...
+    @property
+    def meanAbsoluteError(self) -> float: ...
+    @property
+    def meanSquaredError(self) -> float: ...
+    @property
+    def rootMeanSquaredError(self) -> float: ...
+    @property
+    def r2(self) -> float: ...
+    @property
+    def r2adj(self) -> float: ...
+    @property
+    def residuals(self) -> DataFrame: ...
+    @property
+    def numInstances(self) -> int: ...
+    @property
+    def devianceResiduals(self) -> List[float]: ...
+    @property
+    def coefficientStandardErrors(self) -> List[float]: ...
+    @property
+    def tValues(self) -> List[float]: ...
+    @property
+    def pValues(self) -> List[float]: ...
+
+class LinearRegressionTrainingSummary(LinearRegressionSummary):
+    @property
+    def objectiveHistory(self) -> List[float]: ...
+    @property
+    def totalIterations(self) -> int: ...
+
+class _IsotonicRegressionParams(
+    HasFeaturesCol, HasLabelCol, HasPredictionCol, HasWeightCol
+):
+    isotonic: Param[bool]
+    featureIndex: Param[int]
+    def getIsotonic(self) -> bool: ...
+    def getFeatureIndex(self) -> int: ...
+
+class IsotonicRegression(
+    JavaEstimator[IsotonicRegressionModel],
+    _IsotonicRegressionParams,
+    HasWeightCol,
+    JavaMLWritable,
+    JavaMLReadable[IsotonicRegression],
+):
+    def __init__(
+        self,
+        *,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        weightCol: Optional[str] = ...,
+        isotonic: bool = ...,
+        featureIndex: int = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        weightCol: Optional[str] = ...,
+        isotonic: bool = ...,
+        featureIndex: int = ...
+    ) -> IsotonicRegression: ...
+    def setIsotonic(self, value: bool) -> IsotonicRegression: ...
+    def setFeatureIndex(self, value: int) -> IsotonicRegression: ...
+    def setFeaturesCol(self, value: str) -> IsotonicRegression: ...
+    def setPredictionCol(self, value: str) -> IsotonicRegression: ...
+    def setLabelCol(self, value: str) -> IsotonicRegression: ...
+    def setWeightCol(self, value: str) -> IsotonicRegression: ...
+
+class IsotonicRegressionModel(
+    JavaModel,
+    _IsotonicRegressionParams,
+    JavaMLWritable,
+    JavaMLReadable[IsotonicRegressionModel],
+):
+    def setFeaturesCol(self, value: str) -> IsotonicRegressionModel: ...
+    def setPredictionCol(self, value: str) -> IsotonicRegressionModel: ...
+    def setFeatureIndex(self, value: int) -> IsotonicRegressionModel: ...
+    @property
+    def boundaries(self) -> Vector: ...
+    @property
+    def predictions(self) -> Vector: ...
+    @property
+    def numFeatures(self) -> int: ...
+    def predict(self, value: float) -> float: ...
+
+class _DecisionTreeRegressorParams(
+    _DecisionTreeParams, _TreeRegressorParams, HasVarianceCol
+):
+    def __init__(self, *args: Any): ...
+
+class DecisionTreeRegressor(
+    _JavaRegressor[DecisionTreeRegressionModel],
+    _DecisionTreeRegressorParams,
+    JavaMLWritable,
+    JavaMLReadable[DecisionTreeRegressor],
+):
+    def __init__(
+        self,
+        *,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        maxDepth: int = ...,
+        maxBins: int = ...,
+        minInstancesPerNode: int = ...,
+        minInfoGain: float = ...,
+        maxMemoryInMB: int = ...,
+        cacheNodeIds: bool = ...,
+        checkpointInterval: int = ...,
+        impurity: str = ...,
+        seed: Optional[int] = ...,
+        varianceCol: Optional[str] = ...,
+        weightCol: Optional[str] = ...,
+        leafCol: str = ...,
+        minWeightFractionPerNode: float = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        maxDepth: int = ...,
+        maxBins: int = ...,
+        minInstancesPerNode: int = ...,
+        minInfoGain: float = ...,
+        maxMemoryInMB: int = ...,
+        cacheNodeIds: bool = ...,
+        checkpointInterval: int = ...,
+        impurity: str = ...,
+        seed: Optional[int] = ...,
+        varianceCol: Optional[str] = ...,
+        weightCol: Optional[str] = ...,
+        leafCol: str = ...,
+        minWeightFractionPerNode: float = ...
+    ) -> DecisionTreeRegressor: ...
+    def setMaxDepth(self, value: int) -> DecisionTreeRegressor: ...
+    def setMaxBins(self, value: int) -> DecisionTreeRegressor: ...
+    def setMinInstancesPerNode(self, value: int) -> DecisionTreeRegressor: ...
+    def setMinWeightFractionPerNode(self, value: float) -> DecisionTreeRegressor: ...
+    def setMinInfoGain(self, value: float) -> DecisionTreeRegressor: ...
+    def setMaxMemoryInMB(self, value: int) -> DecisionTreeRegressor: ...
+    def setCacheNodeIds(self, value: bool) -> DecisionTreeRegressor: ...
+    def setImpurity(self, value: str) -> DecisionTreeRegressor: ...
+    def setCheckpointInterval(self, value: int) -> DecisionTreeRegressor: ...
+    def setSeed(self, value: int) -> DecisionTreeRegressor: ...
+    def setWeightCol(self, value: str) -> DecisionTreeRegressor: ...
+    def setVarianceCol(self, value: str) -> DecisionTreeRegressor: ...
+
+class DecisionTreeRegressionModel(
+    _JavaRegressionModel[Vector],
+    _DecisionTreeModel,
+    _DecisionTreeRegressorParams,
+    JavaMLWritable,
+    JavaMLReadable[DecisionTreeRegressionModel],
+):
+    def setVarianceCol(self, value: str) -> DecisionTreeRegressionModel: ...
+    @property
+    def featureImportances(self) -> Vector: ...
+
+class _RandomForestRegressorParams(_RandomForestParams, _TreeRegressorParams):
+    def __init__(self, *args: Any): ...
+
+class RandomForestRegressor(
+    _JavaRegressor[RandomForestRegressionModel],
+    _RandomForestRegressorParams,
+    JavaMLWritable,
+    JavaMLReadable[RandomForestRegressor],
+):
+    def __init__(
+        self,
+        *,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        maxDepth: int = ...,
+        maxBins: int = ...,
+        minInstancesPerNode: int = ...,
+        minInfoGain: float = ...,
+        maxMemoryInMB: int = ...,
+        cacheNodeIds: bool = ...,
+        checkpointInterval: int = ...,
+        impurity: str = ...,
+        subsamplingRate: float = ...,
+        seed: Optional[int] = ...,
+        numTrees: int = ...,
+        featureSubsetStrategy: str = ...,
+        leafCol: str = ...,
+        minWeightFractionPerNode: float = ...,
+        weightCol: Optional[str] = ...,
+        bootstrap: Optional[bool] = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        maxDepth: int = ...,
+        maxBins: int = ...,
+        minInstancesPerNode: int = ...,
+        minInfoGain: float = ...,
+        maxMemoryInMB: int = ...,
+        cacheNodeIds: bool = ...,
+        checkpointInterval: int = ...,
+        impurity: str = ...,
+        subsamplingRate: float = ...,
+        seed: Optional[int] = ...,
+        numTrees: int = ...,
+        featureSubsetStrategy: str = ...,
+        leafCol: str = ...,
+        minWeightFractionPerNode: float = ...,
+        weightCol: Optional[str] = ...,
+        bootstrap: Optional[bool] = ...
+    ) -> RandomForestRegressor: ...
+    def setMaxDepth(self, value: int) -> RandomForestRegressor: ...
+    def setMaxBins(self, value: int) -> RandomForestRegressor: ...
+    def setMinInstancesPerNode(self, value: int) -> RandomForestRegressor: ...
+    def setMinInfoGain(self, value: float) -> RandomForestRegressor: ...
+    def setMaxMemoryInMB(self, value: int) -> RandomForestRegressor: ...
+    def setCacheNodeIds(self, value: bool) -> RandomForestRegressor: ...
+    def setImpurity(self, value: str) -> RandomForestRegressor: ...
+    def setNumTrees(self, value: int) -> RandomForestRegressor: ...
+    def setBootstrap(self, value: bool) -> RandomForestRegressor: ...
+    def setSubsamplingRate(self, value: float) -> RandomForestRegressor: ...
+    def setFeatureSubsetStrategy(self, value: str) -> RandomForestRegressor: ...
+    def setCheckpointInterval(self, value: int) -> RandomForestRegressor: ...
+    def setSeed(self, value: int) -> RandomForestRegressor: ...
+    def setWeightCol(self, value: str) -> RandomForestRegressor: ...
+    def setMinWeightFractionPerNode(self, value: float) -> RandomForestRegressor: ...
+
+class RandomForestRegressionModel(
+    _JavaRegressionModel[Vector],
+    _TreeEnsembleModel,
+    _RandomForestRegressorParams,
+    JavaMLWritable,
+    JavaMLReadable,
+):
+    @property
+    def trees(self) -> List[DecisionTreeRegressionModel]: ...
+    @property
+    def featureImportances(self) -> Vector: ...
+
+class _GBTRegressorParams(_GBTParams, _TreeRegressorParams):
+    supportedLossTypes: List[str]
+    lossType: Param[str]
+    def __init__(self, *args: Any): ...
+    def getLossType(self) -> str: ...
+
+class GBTRegressor(
+    _JavaRegressor[GBTRegressionModel],
+    _GBTRegressorParams,
+    JavaMLWritable,
+    JavaMLReadable[GBTRegressor],
+):
+    def __init__(
+        self,
+        *,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        maxDepth: int = ...,
+        maxBins: int = ...,
+        minInstancesPerNode: int = ...,
+        minInfoGain: float = ...,
+        maxMemoryInMB: int = ...,
+        cacheNodeIds: bool = ...,
+        subsamplingRate: float = ...,
+        checkpointInterval: int = ...,
+        lossType: str = ...,
+        maxIter: int = ...,
+        stepSize: float = ...,
+        seed: Optional[int] = ...,
+        impurity: str = ...,
+        featureSubsetStrategy: str = ...,
+        validationTol: float = ...,
+        validationIndicatorCol: Optional[str] = ...,
+        leafCol: str = ...,
+        minWeightFractionPerNode: float = ...,
+        weightCol: Optional[str] = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        maxDepth: int = ...,
+        maxBins: int = ...,
+        minInstancesPerNode: int = ...,
+        minInfoGain: float = ...,
+        maxMemoryInMB: int = ...,
+        cacheNodeIds: bool = ...,
+        subsamplingRate: float = ...,
+        checkpointInterval: int = ...,
+        lossType: str = ...,
+        maxIter: int = ...,
+        stepSize: float = ...,
+        seed: Optional[int] = ...,
+        impuriy: str = ...,
+        featureSubsetStrategy: str = ...,
+        validationTol: float = ...,
+        validationIndicatorCol: Optional[str] = ...,
+        leafCol: str = ...,
+        minWeightFractionPerNode: float = ...,
+        weightCol: Optional[str] = ...
+    ) -> GBTRegressor: ...
+    def setMaxDepth(self, value: int) -> GBTRegressor: ...
+    def setMaxBins(self, value: int) -> GBTRegressor: ...
+    def setMinInstancesPerNode(self, value: int) -> GBTRegressor: ...
+    def setMinInfoGain(self, value: float) -> GBTRegressor: ...
+    def setMaxMemoryInMB(self, value: int) -> GBTRegressor: ...
+    def setCacheNodeIds(self, value: bool) -> GBTRegressor: ...
+    def setImpurity(self, value: str) -> GBTRegressor: ...
+    def setLossType(self, value: str) -> GBTRegressor: ...
+    def setSubsamplingRate(self, value: float) -> GBTRegressor: ...
+    def setFeatureSubsetStrategy(self, value: str) -> GBTRegressor: ...
+    def setValidationIndicatorCol(self, value: str) -> GBTRegressor: ...
+    def setMaxIter(self, value: int) -> GBTRegressor: ...
+    def setCheckpointInterval(self, value: int) -> GBTRegressor: ...
+    def setSeed(self, value: int) -> GBTRegressor: ...
+    def setStepSize(self, value: float) -> GBTRegressor: ...
+    def setWeightCol(self, value: str) -> GBTRegressor: ...
+    def setMinWeightFractionPerNode(self, value: float) -> GBTRegressor: ...
+
+class GBTRegressionModel(
+    _JavaRegressionModel[Vector],
+    _TreeEnsembleModel,
+    _GBTRegressorParams,
+    JavaMLWritable,
+    JavaMLReadable[GBTRegressionModel],
+):
+    @property
+    def featureImportances(self) -> Vector: ...
+    @property
+    def trees(self) -> List[DecisionTreeRegressionModel]: ...
+    def evaluateEachIteration(self, dataset: DataFrame, loss: str) -> List[float]: ...
+
+class _AFTSurvivalRegressionParams(
+    _PredictorParams,
+    HasMaxIter,
+    HasTol,
+    HasFitIntercept,
+    HasAggregationDepth,
+    HasBlockSize,
+):
+    censorCol: Param[str]
+    quantileProbabilities: Param[List[float]]
+    quantilesCol: Param[str]
+    def __init__(self, *args: Any): ...
+    def getCensorCol(self) -> str: ...
+    def getQuantileProbabilities(self) -> List[float]: ...
+    def getQuantilesCol(self) -> str: ...
+
+class AFTSurvivalRegression(
+    _JavaRegressor[AFTSurvivalRegressionModel],
+    _AFTSurvivalRegressionParams,
+    JavaMLWritable,
+    JavaMLReadable[AFTSurvivalRegression],
+):
+    def __init__(
+        self,
+        *,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        fitIntercept: bool = ...,
+        maxIter: int = ...,
+        tol: float = ...,
+        censorCol: str = ...,
+        quantileProbabilities: List[float] = ...,
+        quantilesCol: Optional[str] = ...,
+        aggregationDepth: int = ...,
+        blockSize: int = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        fitIntercept: bool = ...,
+        maxIter: int = ...,
+        tol: float = ...,
+        censorCol: str = ...,
+        quantileProbabilities: List[float] = ...,
+        quantilesCol: Optional[str] = ...,
+        aggregationDepth: int = ...,
+        blockSize: int = ...
+    ) -> AFTSurvivalRegression: ...
+    def setCensorCol(self, value: str) -> AFTSurvivalRegression: ...
+    def setQuantileProbabilities(self, value: List[float]) -> AFTSurvivalRegression: ...
+    def setQuantilesCol(self, value: str) -> AFTSurvivalRegression: ...
+    def setMaxIter(self, value: int) -> AFTSurvivalRegression: ...
+    def setTol(self, value: float) -> AFTSurvivalRegression: ...
+    def setFitIntercept(self, value: bool) -> AFTSurvivalRegression: ...
+    def setAggregationDepth(self, value: int) -> AFTSurvivalRegression: ...
+    def setBlockSize(self, value: int) -> AFTSurvivalRegression: ...
+
+class AFTSurvivalRegressionModel(
+    _JavaRegressionModel[Vector],
+    _AFTSurvivalRegressionParams,
+    JavaMLWritable,
+    JavaMLReadable[AFTSurvivalRegressionModel],
+):
+    def setQuantileProbabilities(
+        self, value: List[float]
+    ) -> AFTSurvivalRegressionModel: ...
+    def setQuantilesCol(self, value: str) -> AFTSurvivalRegressionModel: ...
+    @property
+    def coefficients(self) -> Vector: ...
+    @property
+    def intercept(self) -> float: ...
+    @property
+    def scale(self) -> float: ...
+    def predictQuantiles(self, features: Vector) -> Vector: ...
+    def predict(self, features: Vector) -> float: ...
+
+class _GeneralizedLinearRegressionParams(
+    _PredictorParams,
+    HasFitIntercept,
+    HasMaxIter,
+    HasTol,
+    HasRegParam,
+    HasWeightCol,
+    HasSolver,
+    HasAggregationDepth,
+):
+    family: Param[str]
+    link: Param[str]
+    linkPredictionCol: Param[str]
+    variancePower: Param[float]
+    linkPower: Param[float]
+    solver: Param[str]
+    offsetCol: Param[str]
+    def __init__(self, *args: Any): ...
+    def getFamily(self) -> str: ...
+    def getLinkPredictionCol(self) -> str: ...
+    def getLink(self) -> str: ...
+    def getVariancePower(self) -> float: ...
+    def getLinkPower(self) -> float: ...
+    def getOffsetCol(self) -> str: ...
+
+class GeneralizedLinearRegression(
+    _JavaRegressor[GeneralizedLinearRegressionModel],
+    _GeneralizedLinearRegressionParams,
+    JavaMLWritable,
+    JavaMLReadable[GeneralizedLinearRegression],
+):
+    def __init__(
+        self,
+        *,
+        labelCol: str = ...,
+        featuresCol: str = ...,
+        predictionCol: str = ...,
+        family: str = ...,
+        link: Optional[str] = ...,
+        fitIntercept: bool = ...,
+        maxIter: int = ...,
+        tol: float = ...,
+        regParam: float = ...,
+        weightCol: Optional[str] = ...,
+        solver: str = ...,
+        linkPredictionCol: Optional[str] = ...,
+        variancePower: float = ...,
+        linkPower: Optional[float] = ...,
+        offsetCol: Optional[str] = ...,
+        aggregationDepth: int = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        labelCol: str = ...,
+        featuresCol: str = ...,
+        predictionCol: str = ...,
+        family: str = ...,
+        link: Optional[str] = ...,
+        fitIntercept: bool = ...,
+        maxIter: int = ...,
+        tol: float = ...,
+        regParam: float = ...,
+        weightCol: Optional[str] = ...,
+        solver: str = ...,
+        linkPredictionCol: Optional[str] = ...,
+        variancePower: float = ...,
+        linkPower: Optional[float] = ...,
+        offsetCol: Optional[str] = ...,
+        aggregationDepth: int = ...
+    ) -> GeneralizedLinearRegression: ...
+    def setFamily(self, value: str) -> GeneralizedLinearRegression: ...
+    def setLinkPredictionCol(self, value: str) -> GeneralizedLinearRegression: ...
+    def setLink(self, value: str) -> GeneralizedLinearRegression: ...
+    def setVariancePower(self, value: float) -> GeneralizedLinearRegression: ...
+    def setLinkPower(self, value: float) -> GeneralizedLinearRegression: ...
+    def setOffsetCol(self, value: str) -> GeneralizedLinearRegression: ...
+    def setMaxIter(self, value: int) -> GeneralizedLinearRegression: ...
+    def setRegParam(self, value: float) -> GeneralizedLinearRegression: ...
+    def setTol(self, value: float) -> GeneralizedLinearRegression: ...
+    def setFitIntercept(self, value: bool) -> GeneralizedLinearRegression: ...
+    def setWeightCol(self, value: str) -> GeneralizedLinearRegression: ...
+    def setSolver(self, value: str) -> GeneralizedLinearRegression: ...
+    def setAggregationDepth(self, value: int) -> GeneralizedLinearRegression: ...
+
+class GeneralizedLinearRegressionModel(
+    _JavaRegressionModel[Vector],
+    _GeneralizedLinearRegressionParams,
+    JavaMLWritable,
+    JavaMLReadable[GeneralizedLinearRegressionModel],
+    HasTrainingSummary[GeneralizedLinearRegressionTrainingSummary],
+):
+    def setLinkPredictionCol(self, value: str) -> GeneralizedLinearRegressionModel: ...
+    @property
+    def coefficients(self) -> Vector: ...
+    @property
+    def intercept(self) -> float: ...
+    @property
+    def summary(self) -> GeneralizedLinearRegressionTrainingSummary: ...
+    def evaluate(self, dataset: DataFrame) -> GeneralizedLinearRegressionSummary: ...
+
+class GeneralizedLinearRegressionSummary(JavaWrapper):
+    @property
+    def predictions(self) -> DataFrame: ...
+    @property
+    def predictionCol(self) -> str: ...
+    @property
+    def rank(self) -> int: ...
+    @property
+    def degreesOfFreedom(self) -> int: ...
+    @property
+    def residualDegreeOfFreedom(self) -> int: ...
+    @property
+    def residualDegreeOfFreedomNull(self) -> int: ...
+    def residuals(self, residualsType: str = ...) -> DataFrame: ...
+    @property
+    def nullDeviance(self) -> float: ...
+    @property
+    def deviance(self) -> float: ...
+    @property
+    def dispersion(self) -> float: ...
+    @property
+    def aic(self) -> float: ...
+
+class GeneralizedLinearRegressionTrainingSummary(GeneralizedLinearRegressionSummary):
+    @property
+    def numIterations(self) -> int: ...
+    @property
+    def solver(self) -> str: ...
+    @property
+    def coefficientStandardErrors(self) -> List[float]: ...
+    @property
+    def tValues(self) -> List[float]: ...
+    @property
+    def pValues(self) -> List[float]: ...
+
+class _FactorizationMachinesParams(
+    _PredictorParams,
+    HasMaxIter,
+    HasStepSize,
+    HasTol,
+    HasSolver,
+    HasSeed,
+    HasFitIntercept,
+    HasRegParam,
+    HasWeightCol,
+):
+    factorSize: Param[int]
+    fitLinear: Param[bool]
+    miniBatchFraction: Param[float]
+    initStd: Param[float]
+    solver: Param[str]
+    def __init__(self, *args: Any): ...
+    def getFactorSize(self): ...
+    def getFitLinear(self): ...
+    def getMiniBatchFraction(self): ...
+    def getInitStd(self): ...
+
+class FMRegressor(
+    _JavaRegressor[FMRegressionModel],
+    _FactorizationMachinesParams,
+    JavaMLWritable,
+    JavaMLReadable[FMRegressor],
+):
+    factorSize: Param[int]
+    fitLinear: Param[bool]
+    miniBatchFraction: Param[float]
+    initStd: Param[float]
+    solver: Param[str]
+    def __init__(
+        self,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        factorSize: int = ...,
+        fitIntercept: bool = ...,
+        fitLinear: bool = ...,
+        regParam: float = ...,
+        miniBatchFraction: float = ...,
+        initStd: float = ...,
+        maxIter: int = ...,
+        stepSize: float = ...,
+        tol: float = ...,
+        solver: str = ...,
+        seed: Optional[int] = ...,
+    ) -> None: ...
+    def setParams(
+        self,
+        featuresCol: str = ...,
+        labelCol: str = ...,
+        predictionCol: str = ...,
+        factorSize: int = ...,
+        fitIntercept: bool = ...,
+        fitLinear: bool = ...,
+        regParam: float = ...,
+        miniBatchFraction: float = ...,
+        initStd: float = ...,
+        maxIter: int = ...,
+        stepSize: float = ...,
+        tol: float = ...,
+        solver: str = ...,
+        seed: Optional[int] = ...,
+    ) -> FMRegressor: ...
+    def setFactorSize(self, value: int) -> FMRegressor: ...
+    def setFitLinear(self, value: bool) -> FMRegressor: ...
+    def setMiniBatchFraction(self, value: float) -> FMRegressor: ...
+    def setInitStd(self, value: float) -> FMRegressor: ...
+    def setMaxIter(self, value: int) -> FMRegressor: ...
+    def setStepSize(self, value: float) -> FMRegressor: ...
+    def setTol(self, value: float) -> FMRegressor: ...
+    def setSolver(self, value: str) -> FMRegressor: ...
+    def setSeed(self, value: int) -> FMRegressor: ...
+    def setFitIntercept(self, value: bool) -> FMRegressor: ...
+    def setRegParam(self, value: float) -> FMRegressor: ...
+
+class FMRegressionModel(
+    _JavaRegressionModel,
+    _FactorizationMachinesParams,
+    JavaMLWritable,
+    JavaMLReadable[FMRegressionModel],
+):
+    @property
+    def intercept(self) -> float: ...
+    @property
+    def linear(self) -> Vector: ...
+    @property
+    def factors(self) -> Matrix: ...
diff --git a/python/pyspark/ml/stat.pyi b/python/pyspark/ml/stat.pyi
new file mode 100644
index 0000000000000..83b0f7eacb8f0
--- /dev/null
+++ b/python/pyspark/ml/stat.pyi
@@ -0,0 +1,89 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Optional
+
+from pyspark.ml.linalg import Matrix, Vector
+from pyspark.ml.wrapper import JavaWrapper
+from pyspark.sql.column import Column
+from pyspark.sql.dataframe import DataFrame
+
+from py4j.java_gateway import JavaObject  # type: ignore[import]
+
+class ChiSquareTest:
+    @staticmethod
+    def test(
+        dataset: DataFrame, featuresCol: str, labelCol: str, flatten: bool = ...
+    ) -> DataFrame: ...
+
+class Correlation:
+    @staticmethod
+    def corr(dataset: DataFrame, column: str, method: str = ...) -> DataFrame: ...
+
+class KolmogorovSmirnovTest:
+    @staticmethod
+    def test(
+        dataset: DataFrame, sampleCol: str, distName: str, *params: float
+    ) -> DataFrame: ...
+
+class Summarizer:
+    @staticmethod
+    def mean(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
+    @staticmethod
+    def sum(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
+    @staticmethod
+    def variance(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
+    @staticmethod
+    def std(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
+    @staticmethod
+    def count(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
+    @staticmethod
+    def numNonZeros(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
+    @staticmethod
+    def max(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
+    @staticmethod
+    def min(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
+    @staticmethod
+    def normL1(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
+    @staticmethod
+    def normL2(col: Column, weightCol: Optional[Column] = ...) -> Column: ...
+    @staticmethod
+    def metrics(*metrics: str) -> SummaryBuilder: ...
+
+class SummaryBuilder(JavaWrapper):
+    def __init__(self, jSummaryBuilder: JavaObject) -> None: ...
+    def summary(
+        self, featuresCol: Column, weightCol: Optional[Column] = ...
+    ) -> Column: ...
+
+class MultivariateGaussian:
+    mean: Vector
+    cov: Matrix
+    def __init__(self, mean: Vector, cov: Matrix) -> None: ...
+
+class ANOVATest:
+    @staticmethod
+    def test(
+        dataset: DataFrame, featuresCol: str, labelCol: str, flatten: bool = ...
+    ) -> DataFrame: ...
+
+class FValueTest:
+    @staticmethod
+    def test(
+        dataset: DataFrame, featuresCol: str, labelCol: str, flatten: bool = ...
+    ) -> DataFrame: ...
diff --git a/python/pyspark/ml/tests/test_algorithms.py b/python/pyspark/ml/tests/test_algorithms.py
index 492e849658f7a..03653c25b4ad4 100644
--- a/python/pyspark/ml/tests/test_algorithms.py
+++ b/python/pyspark/ml/tests/test_algorithms.py
@@ -333,7 +333,7 @@ def test_linear_regression_with_huber_loss(self):
     from pyspark.ml.tests.test_algorithms import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/ml/tests/test_base.py b/python/pyspark/ml/tests/test_base.py
index cba5369ca2623..d2c0bdfdf8556 100644
--- a/python/pyspark/ml/tests/test_base.py
+++ b/python/pyspark/ml/tests/test_base.py
@@ -70,7 +70,7 @@ def testDefaultFitMultiple(self):
     from pyspark.ml.tests.test_base import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/ml/tests/test_evaluation.py b/python/pyspark/ml/tests/test_evaluation.py
index 7883df7882769..746605076f86b 100644
--- a/python/pyspark/ml/tests/test_evaluation.py
+++ b/python/pyspark/ml/tests/test_evaluation.py
@@ -56,7 +56,7 @@ def test_clustering_evaluator_with_cosine_distance(self):
     from pyspark.ml.tests.test_evaluation import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/ml/tests/test_feature.py b/python/pyspark/ml/tests/test_feature.py
index 7fd8c0b669d9a..244110a986138 100644
--- a/python/pyspark/ml/tests/test_feature.py
+++ b/python/pyspark/ml/tests/test_feature.py
@@ -303,7 +303,7 @@ def test_apply_binary_term_freqs(self):
     from pyspark.ml.tests.test_feature import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/ml/tests/test_image.py b/python/pyspark/ml/tests/test_image.py
index 069ffceb50103..ceecdae971c99 100644
--- a/python/pyspark/ml/tests/test_image.py
+++ b/python/pyspark/ml/tests/test_image.py
@@ -69,7 +69,7 @@ def test_read_images(self):
     from pyspark.ml.tests.test_image import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/ml/tests/test_linalg.py b/python/pyspark/ml/tests/test_linalg.py
index 60dda82fe0911..18c01ddf88e67 100644
--- a/python/pyspark/ml/tests/test_linalg.py
+++ b/python/pyspark/ml/tests/test_linalg.py
@@ -381,7 +381,7 @@ def test_infer_schema(self):
     from pyspark.ml.tests.test_linalg import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/ml/tests/test_param.py b/python/pyspark/ml/tests/test_param.py
index abee6d1be5e29..4cddf50f36bdf 100644
--- a/python/pyspark/ml/tests/test_param.py
+++ b/python/pyspark/ml/tests/test_param.py
@@ -372,7 +372,7 @@ def test_java_params(self):
     from pyspark.ml.tests.test_param import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/ml/tests/test_persistence.py b/python/pyspark/ml/tests/test_persistence.py
index 4acf58da21531..826e6cd351d32 100644
--- a/python/pyspark/ml/tests/test_persistence.py
+++ b/python/pyspark/ml/tests/test_persistence.py
@@ -456,7 +456,7 @@ def test_default_read_write_default_params(self):
     from pyspark.ml.tests.test_persistence import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/ml/tests/test_pipeline.py b/python/pyspark/ml/tests/test_pipeline.py
index 011e6537a8db5..c29b2d3f44679 100644
--- a/python/pyspark/ml/tests/test_pipeline.py
+++ b/python/pyspark/ml/tests/test_pipeline.py
@@ -62,7 +62,7 @@ def doTransform(pipeline):
     from pyspark.ml.tests.test_pipeline import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/ml/tests/test_stat.py b/python/pyspark/ml/tests/test_stat.py
index 666d0aec58db5..a2403b38873db 100644
--- a/python/pyspark/ml/tests/test_stat.py
+++ b/python/pyspark/ml/tests/test_stat.py
@@ -43,7 +43,7 @@ def test_chisquaretest(self):
     from pyspark.ml.tests.test_stat import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/ml/tests/test_training_summary.py b/python/pyspark/ml/tests/test_training_summary.py
index cb0effbe2bf2a..7dafdcb3d683b 100644
--- a/python/pyspark/ml/tests/test_training_summary.py
+++ b/python/pyspark/ml/tests/test_training_summary.py
@@ -445,7 +445,7 @@ def test_kmeans_summary(self):
     from pyspark.ml.tests.test_training_summary import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/ml/tests/test_tuning.py b/python/pyspark/ml/tests/test_tuning.py
index c9163627fdd54..729e46419ae2c 100644
--- a/python/pyspark/ml/tests/test_tuning.py
+++ b/python/pyspark/ml/tests/test_tuning.py
@@ -864,7 +864,7 @@ def test_copy(self):
     from pyspark.ml.tests.test_tuning import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/ml/tests/test_wrapper.py b/python/pyspark/ml/tests/test_wrapper.py
index e6eef8a7de97a..31475299c7b98 100644
--- a/python/pyspark/ml/tests/test_wrapper.py
+++ b/python/pyspark/ml/tests/test_wrapper.py
@@ -21,7 +21,9 @@
 
 from pyspark.ml.linalg import DenseVector, Vectors
 from pyspark.ml.regression import LinearRegression
-from pyspark.ml.wrapper import _java2py, _py2java, JavaParams, JavaWrapper
+from pyspark.ml.wrapper import (  # type: ignore[attr-defined]
+    _java2py, _py2java, JavaParams, JavaWrapper
+)
 from pyspark.testing.mllibutils import MLlibTestCase
 from pyspark.testing.mlutils import SparkSessionTestCase
 from pyspark.testing.utils import eventually
@@ -120,7 +122,7 @@ def test_new_java_array(self):
     from pyspark.ml.tests.test_wrapper import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/ml/tree.pyi b/python/pyspark/ml/tree.pyi
new file mode 100644
index 0000000000000..ff6307654c569
--- /dev/null
+++ b/python/pyspark/ml/tree.pyi
@@ -0,0 +1,112 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import List, Sequence
+from pyspark.ml._typing import P, T
+
+from pyspark.ml.linalg import Vector
+from pyspark import since as since  # noqa: F401
+from pyspark.ml.common import inherit_doc as inherit_doc  # noqa: F401
+from pyspark.ml.param import Param, Params as Params
+from pyspark.ml.param.shared import (  # noqa: F401
+    HasCheckpointInterval as HasCheckpointInterval,
+    HasMaxIter as HasMaxIter,
+    HasSeed as HasSeed,
+    HasStepSize as HasStepSize,
+    HasValidationIndicatorCol as HasValidationIndicatorCol,
+    HasWeightCol as HasWeightCol,
+    Param as Param,
+    TypeConverters as TypeConverters,
+)
+from pyspark.ml.wrapper import JavaPredictionModel as JavaPredictionModel
+
+class _DecisionTreeModel(JavaPredictionModel[T]):
+    @property
+    def numNodes(self) -> int: ...
+    @property
+    def depth(self) -> int: ...
+    @property
+    def toDebugString(self) -> str: ...
+    def predictLeaf(self, value: Vector) -> float: ...
+
+class _DecisionTreeParams(HasCheckpointInterval, HasSeed, HasWeightCol):
+    leafCol: Param[str]
+    maxDepth: Param[int]
+    maxBins: Param[int]
+    minInstancesPerNode: Param[int]
+    minWeightFractionPerNode: Param[float]
+    minInfoGain: Param[float]
+    maxMemoryInMB: Param[int]
+    cacheNodeIds: Param[bool]
+    def __init__(self) -> None: ...
+    def setLeafCol(self: P, value: str) -> P: ...
+    def getLeafCol(self) -> str: ...
+    def getMaxDepth(self) -> int: ...
+    def getMaxBins(self) -> int: ...
+    def getMinInstancesPerNode(self) -> int: ...
+    def getMinInfoGain(self) -> float: ...
+    def getMaxMemoryInMB(self) -> int: ...
+    def getCacheNodeIds(self) -> bool: ...
+
+class _TreeEnsembleModel(JavaPredictionModel[T]):
+    @property
+    def trees(self) -> Sequence[_DecisionTreeModel]: ...
+    @property
+    def getNumTrees(self) -> int: ...
+    @property
+    def treeWeights(self) -> List[float]: ...
+    @property
+    def totalNumNodes(self) -> int: ...
+    @property
+    def toDebugString(self) -> str: ...
+
+class _TreeEnsembleParams(_DecisionTreeParams):
+    subsamplingRate: Param[float]
+    supportedFeatureSubsetStrategies: List[str]
+    featureSubsetStrategy: Param[str]
+    def __init__(self) -> None: ...
+    def getSubsamplingRate(self) -> float: ...
+    def getFeatureSubsetStrategy(self) -> str: ...
+
+class _RandomForestParams(_TreeEnsembleParams):
+    numTrees: Param[int]
+    bootstrap: Param[bool]
+    def __init__(self) -> None: ...
+    def getNumTrees(self) -> int: ...
+    def getBootstrap(self) -> bool: ...
+
+class _GBTParams(
+    _TreeEnsembleParams, HasMaxIter, HasStepSize, HasValidationIndicatorCol
+):
+    stepSize: Param[float]
+    validationTol: Param[float]
+    def getValidationTol(self) -> float: ...
+
+class _HasVarianceImpurity(Params):
+    supportedImpurities: List[str]
+    impurity: Param[str]
+    def __init__(self) -> None: ...
+    def getImpurity(self) -> str: ...
+
+class _TreeClassifierParams(Params):
+    supportedImpurities: List[str]
+    impurity: Param[str]
+    def __init__(self) -> None: ...
+    def getImpurity(self) -> str: ...
+
+class _TreeRegressorParams(_HasVarianceImpurity): ...
diff --git a/python/pyspark/ml/tuning.pyi b/python/pyspark/ml/tuning.pyi
new file mode 100644
index 0000000000000..63cd75f0e1d74
--- /dev/null
+++ b/python/pyspark/ml/tuning.pyi
@@ -0,0 +1,185 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import overload
+from typing import Any, List, Optional, Tuple, Type
+from pyspark.ml._typing import ParamMap
+
+from pyspark.ml import Estimator, Model
+from pyspark.ml.evaluation import Evaluator
+from pyspark.ml.param import Param
+from pyspark.ml.param.shared import HasCollectSubModels, HasParallelism, HasSeed
+from pyspark.ml.util import MLReader, MLReadable, MLWriter, MLWritable
+
+class ParamGridBuilder:
+    def __init__(self) -> None: ...
+    def addGrid(self, param: Param, values: List[Any]) -> ParamGridBuilder: ...
+    @overload
+    def baseOn(self, __args: ParamMap) -> ParamGridBuilder: ...
+    @overload
+    def baseOn(self, *args: Tuple[Param, Any]) -> ParamGridBuilder: ...
+    def build(self) -> List[ParamMap]: ...
+
+class _ValidatorParams(HasSeed):
+    estimator: Param[Estimator]
+    estimatorParamMaps: Param[List[ParamMap]]
+    evaluator: Param[Evaluator]
+    def getEstimator(self) -> Estimator: ...
+    def getEstimatorParamMaps(self) -> List[ParamMap]: ...
+    def getEvaluator(self) -> Evaluator: ...
+
+class _CrossValidatorParams(_ValidatorParams):
+    numFolds: Param[int]
+    foldCol: Param[str]
+    def __init__(self, *args: Any): ...
+    def getNumFolds(self) -> int: ...
+    def getFoldCol(self) -> str: ...
+
+class CrossValidator(
+    Estimator[CrossValidatorModel],
+    _CrossValidatorParams,
+    HasParallelism,
+    HasCollectSubModels,
+    MLReadable[CrossValidator],
+    MLWritable,
+):
+    def __init__(
+        self,
+        *,
+        estimator: Optional[Estimator] = ...,
+        estimatorParamMaps: Optional[List[ParamMap]] = ...,
+        evaluator: Optional[Evaluator] = ...,
+        numFolds: int = ...,
+        seed: Optional[int] = ...,
+        parallelism: int = ...,
+        collectSubModels: bool = ...,
+        foldCol: str = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        estimator: Optional[Estimator] = ...,
+        estimatorParamMaps: Optional[List[ParamMap]] = ...,
+        evaluator: Optional[Evaluator] = ...,
+        numFolds: int = ...,
+        seed: Optional[int] = ...,
+        parallelism: int = ...,
+        collectSubModels: bool = ...,
+        foldCol: str = ...
+    ) -> CrossValidator: ...
+    def setEstimator(self, value: Estimator) -> CrossValidator: ...
+    def setEstimatorParamMaps(self, value: List[ParamMap]) -> CrossValidator: ...
+    def setEvaluator(self, value: Evaluator) -> CrossValidator: ...
+    def setNumFolds(self, value: int) -> CrossValidator: ...
+    def setFoldCol(self, value: str) -> CrossValidator: ...
+    def setSeed(self, value: int) -> CrossValidator: ...
+    def setParallelism(self, value: int) -> CrossValidator: ...
+    def setCollectSubModels(self, value: bool) -> CrossValidator: ...
+    def copy(self, extra: Optional[ParamMap] = ...) -> CrossValidator: ...
+    def write(self) -> MLWriter: ...
+    @classmethod
+    def read(cls: Type[CrossValidator]) -> MLReader: ...
+
+class CrossValidatorModel(
+    Model, _CrossValidatorParams, MLReadable[CrossValidatorModel], MLWritable
+):
+    bestModel: Model
+    avgMetrics: List[float]
+    subModels: List[List[Model]]
+    def __init__(
+        self,
+        bestModel: Model,
+        avgMetrics: List[float] = ...,
+        subModels: Optional[List[List[Model]]] = ...,
+    ) -> None: ...
+    def copy(self, extra: Optional[ParamMap] = ...) -> CrossValidatorModel: ...
+    def write(self) -> MLWriter: ...
+    @classmethod
+    def read(cls: Type[CrossValidatorModel]) -> MLReader: ...
+
+class _TrainValidationSplitParams(_ValidatorParams):
+    trainRatio: Param[float]
+    def __init__(self, *args: Any): ...
+    def getTrainRatio(self) -> float: ...
+
+class TrainValidationSplit(
+    Estimator[TrainValidationSplitModel],
+    _TrainValidationSplitParams,
+    HasParallelism,
+    HasCollectSubModels,
+    MLReadable[TrainValidationSplit],
+    MLWritable,
+):
+    def __init__(
+        self,
+        *,
+        estimator: Optional[Estimator] = ...,
+        estimatorParamMaps: Optional[List[ParamMap]] = ...,
+        evaluator: Optional[Evaluator] = ...,
+        trainRatio: float = ...,
+        parallelism: int = ...,
+        collectSubModels: bool = ...,
+        seed: Optional[int] = ...
+    ) -> None: ...
+    def setParams(
+        self,
+        *,
+        estimator: Optional[Estimator] = ...,
+        estimatorParamMaps: Optional[List[ParamMap]] = ...,
+        evaluator: Optional[Evaluator] = ...,
+        trainRatio: float = ...,
+        parallelism: int = ...,
+        collectSubModels: bool = ...,
+        seed: Optional[int] = ...
+    ) -> TrainValidationSplit: ...
+    def setEstimator(self, value: Estimator) -> TrainValidationSplit: ...
+    def setEstimatorParamMaps(self, value: List[ParamMap]) -> TrainValidationSplit: ...
+    def setEvaluator(self, value: Evaluator) -> TrainValidationSplit: ...
+    def setTrainRatio(self, value: float) -> TrainValidationSplit: ...
+    def setSeed(self, value: int) -> TrainValidationSplit: ...
+    def setParallelism(self, value: int) -> TrainValidationSplit: ...
+    def setCollectSubModels(self, value: bool) -> TrainValidationSplit: ...
+    def copy(self, extra: Optional[ParamMap] = ...) -> TrainValidationSplit: ...
+    def write(self) -> MLWriter: ...
+    @classmethod
+    def read(cls: Type[TrainValidationSplit]) -> MLReader: ...
+
+class TrainValidationSplitModel(
+    Model,
+    _TrainValidationSplitParams,
+    MLReadable[TrainValidationSplitModel],
+    MLWritable,
+):
+    bestModel: Model
+    validationMetrics: List[float]
+    subModels: List[Model]
+    def __init__(
+        self,
+        bestModel: Model,
+        validationMetrics: List[float] = ...,
+        subModels: Optional[List[Model]] = ...,
+    ) -> None: ...
+    def setEstimator(self, value: Estimator) -> TrainValidationSplitModel: ...
+    def setEstimatorParamMaps(
+        self, value: List[ParamMap]
+    ) -> TrainValidationSplitModel: ...
+    def setEvaluator(self, value: Evaluator) -> TrainValidationSplitModel: ...
+    def copy(self, extra: Optional[ParamMap] = ...) -> TrainValidationSplitModel: ...
+    def write(self) -> MLWriter: ...
+    @classmethod
+    def read(cls: Type[TrainValidationSplitModel]) -> MLReader: ...
diff --git a/python/pyspark/ml/util.pyi b/python/pyspark/ml/util.pyi
new file mode 100644
index 0000000000000..d0781b2e26ed5
--- /dev/null
+++ b/python/pyspark/ml/util.pyi
@@ -0,0 +1,128 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any, Dict, Generic, Optional, Type, TypeVar, Union
+
+from pyspark import SparkContext as SparkContext, since as since  # noqa: F401
+from pyspark.ml.common import inherit_doc as inherit_doc  # noqa: F401
+from pyspark.sql import SparkSession as SparkSession
+from pyspark.util import VersionUtils as VersionUtils  # noqa: F401
+
+S = TypeVar("S")
+R = TypeVar("R", bound=MLReadable)
+
+class Identifiable:
+    uid: str
+    def __init__(self) -> None: ...
+
+class BaseReadWrite:
+    def __init__(self) -> None: ...
+    def session(self, sparkSession: SparkSession) -> Union[MLWriter, MLReader]: ...
+    @property
+    def sparkSession(self) -> SparkSession: ...
+    @property
+    def sc(self) -> SparkContext: ...
+
+class MLWriter(BaseReadWrite):
+    shouldOverwrite: bool = ...
+    def __init__(self) -> None: ...
+    def save(self, path: str) -> None: ...
+    def saveImpl(self, path: str) -> None: ...
+    def overwrite(self) -> MLWriter: ...
+
+class GeneralMLWriter(MLWriter):
+    source: str
+    def format(self, source: str) -> MLWriter: ...
+
+class JavaMLWriter(MLWriter):
+    def __init__(self, instance: JavaMLWritable) -> None: ...
+    def save(self, path: str) -> None: ...
+    def overwrite(self) -> JavaMLWriter: ...
+    def option(self, key: str, value: Any) -> JavaMLWriter: ...
+    def session(self, sparkSession: SparkSession) -> JavaMLWriter: ...
+
+class GeneralJavaMLWriter(JavaMLWriter):
+    def __init__(self, instance: MLWritable) -> None: ...
+    def format(self, source: str) -> GeneralJavaMLWriter: ...
+
+class MLWritable:
+    def write(self) -> MLWriter: ...
+    def save(self, path: str) -> None: ...
+
+class JavaMLWritable(MLWritable):
+    def write(self) -> JavaMLWriter: ...
+
+class GeneralJavaMLWritable(JavaMLWritable):
+    def write(self) -> GeneralJavaMLWriter: ...
+
+class MLReader(BaseReadWrite, Generic[R]):
+    def load(self, path: str) -> R: ...
+
+class JavaMLReader(MLReader[R]):
+    def __init__(self, clazz: Type[JavaMLReadable]) -> None: ...
+    def load(self, path: str) -> R: ...
+    def session(self, sparkSession: SparkSession) -> JavaMLReader[R]: ...
+
+class MLReadable(Generic[R]):
+    @classmethod
+    def read(cls: Type[R]) -> MLReader[R]: ...
+    @classmethod
+    def load(cls: Type[R], path: str) -> R: ...
+
+class JavaMLReadable(MLReadable[R]):
+    @classmethod
+    def read(cls: Type[R]) -> JavaMLReader[R]: ...
+
+class DefaultParamsWritable(MLWritable):
+    def write(self) -> MLWriter: ...
+
+class DefaultParamsWriter(MLWriter):
+    instance: DefaultParamsWritable
+    def __init__(self, instance: DefaultParamsWritable) -> None: ...
+    def saveImpl(self, path: str) -> None: ...
+    @staticmethod
+    def saveMetadata(
+        instance: DefaultParamsWritable,
+        path: str,
+        sc: SparkContext,
+        extraMetadata: Optional[Dict[str, Any]] = ...,
+        paramMap: Optional[Dict[str, Any]] = ...,
+    ) -> None: ...
+
+class DefaultParamsReadable(MLReadable[R]):
+    @classmethod
+    def read(cls: Type[R]) -> MLReader[R]: ...
+
+class DefaultParamsReader(MLReader[R]):
+    cls: Type[R]
+    def __init__(self, cls: Type[MLReadable]) -> None: ...
+    def load(self, path: str) -> R: ...
+    @staticmethod
+    def loadMetadata(
+        path: str, sc: SparkContext, expectedClassName: str = ...
+    ) -> Dict[str, Any]: ...
+    @staticmethod
+    def getAndSetParams(instance: R, metadata: Dict[str, Any]) -> None: ...
+    @staticmethod
+    def loadParamsInstance(path: str, sc: SparkContext) -> R: ...
+
+class HasTrainingSummary(Generic[S]):
+    @property
+    def hasSummary(self) -> bool: ...
+    @property
+    def summary(self) -> S: ...
diff --git a/python/pyspark/ml/wrapper.pyi b/python/pyspark/ml/wrapper.pyi
new file mode 100644
index 0000000000000..830224c177d1e
--- /dev/null
+++ b/python/pyspark/ml/wrapper.pyi
@@ -0,0 +1,48 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import abc
+from typing import Any, Optional
+from pyspark.ml._typing import P, T, JM, ParamMap
+
+from pyspark.ml import Estimator, Predictor, PredictionModel, Transformer, Model
+from pyspark.ml.base import _PredictorParams
+from pyspark.ml.param import Param, Params
+
+class JavaWrapper:
+    def __init__(self, java_obj: Optional[Any] = ...) -> None: ...
+    def __del__(self) -> None: ...
+
+class JavaParams(JavaWrapper, Params, metaclass=abc.ABCMeta):
+    def copy(self: P, extra: Optional[ParamMap] = ...) -> P: ...
+    def clear(self, param: Param) -> None: ...
+
+class JavaEstimator(JavaParams, Estimator[JM], metaclass=abc.ABCMeta): ...
+class JavaTransformer(JavaParams, Transformer, metaclass=abc.ABCMeta): ...
+
+class JavaModel(JavaTransformer, Model, metaclass=abc.ABCMeta):
+    def __init__(self, java_model: Optional[Any] = ...) -> None: ...
+
+class JavaPredictor(
+    Predictor[JM], JavaEstimator, _PredictorParams, metaclass=abc.ABCMeta
+): ...
+
+class JavaPredictionModel(PredictionModel[T], JavaModel, _PredictorParams):
+    @property
+    def numFeatures(self) -> int: ...
+    def predict(self, value: T) -> float: ...
diff --git a/python/pyspark/mllib/__init__.pyi b/python/pyspark/mllib/__init__.pyi
new file mode 100644
index 0000000000000..83032c4580fc8
--- /dev/null
+++ b/python/pyspark/mllib/__init__.pyi
@@ -0,0 +1,32 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: This dynamically typed stub was automatically generated by stubgen.
+
+# Names in __all__ with no definition:
+#   classification
+#   clustering
+#   feature
+#   fpm
+#   linalg
+#   random
+#   recommendation
+#   regression
+#   stat
+#   tree
+#   util
diff --git a/python/pyspark/mllib/_typing.pyi b/python/pyspark/mllib/_typing.pyi
new file mode 100644
index 0000000000000..213a69996b0ad
--- /dev/null
+++ b/python/pyspark/mllib/_typing.pyi
@@ -0,0 +1,23 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import List, Tuple, Union
+from pyspark.mllib.linalg import Vector
+from numpy import ndarray  # noqa: F401
+
+VectorLike = Union[Vector, List[float], Tuple[float, ...]]
diff --git a/python/pyspark/mllib/classification.pyi b/python/pyspark/mllib/classification.pyi
new file mode 100644
index 0000000000000..c51882c87bfc2
--- /dev/null
+++ b/python/pyspark/mllib/classification.pyi
@@ -0,0 +1,151 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import overload
+from typing import Optional, Union
+
+from pyspark.context import SparkContext
+from pyspark.rdd import RDD
+from pyspark.mllib._typing import VectorLike
+from pyspark.mllib.linalg import Vector
+from pyspark.mllib.regression import LabeledPoint, LinearModel, StreamingLinearAlgorithm
+from pyspark.mllib.util import Saveable, Loader
+from pyspark.streaming.dstream import DStream
+
+from numpy import float64, ndarray  # type: ignore[import]
+
+class LinearClassificationModel(LinearModel):
+    def __init__(self, weights: Vector, intercept: float) -> None: ...
+    def setThreshold(self, value: float) -> None: ...
+    @property
+    def threshold(self) -> Optional[float]: ...
+    def clearThreshold(self) -> None: ...
+    @overload
+    def predict(self, test: VectorLike) -> Union[int, float, float64]: ...
+    @overload
+    def predict(self, test: RDD[VectorLike]) -> RDD[Union[int, float]]: ...
+
+class LogisticRegressionModel(LinearClassificationModel):
+    def __init__(
+        self, weights: Vector, intercept: float, numFeatures: int, numClasses: int
+    ) -> None: ...
+    @property
+    def numFeatures(self) -> int: ...
+    @property
+    def numClasses(self) -> int: ...
+    @overload
+    def predict(self, x: VectorLike) -> Union[int, float]: ...
+    @overload
+    def predict(self, x: RDD[VectorLike]) -> RDD[Union[int, float]]: ...
+    def save(self, sc: SparkContext, path: str) -> None: ...
+    @classmethod
+    def load(cls, sc: SparkContext, path: str) -> LogisticRegressionModel: ...
+
+class LogisticRegressionWithSGD:
+    @classmethod
+    def train(
+        cls,
+        data: RDD[LabeledPoint],
+        iterations: int = ...,
+        step: float = ...,
+        miniBatchFraction: float = ...,
+        initialWeights: Optional[VectorLike] = ...,
+        regParam: float = ...,
+        regType: str = ...,
+        intercept: bool = ...,
+        validateData: bool = ...,
+        convergenceTol: float = ...,
+    ) -> LogisticRegressionModel: ...
+
+class LogisticRegressionWithLBFGS:
+    @classmethod
+    def train(
+        cls,
+        data: RDD[LabeledPoint],
+        iterations: int = ...,
+        initialWeights: Optional[VectorLike] = ...,
+        regParam: float = ...,
+        regType: str = ...,
+        intercept: bool = ...,
+        corrections: int = ...,
+        tolerance: float = ...,
+        validateData: bool = ...,
+        numClasses: int = ...,
+    ) -> LogisticRegressionModel: ...
+
+class SVMModel(LinearClassificationModel):
+    def __init__(self, weights: Vector, intercept: float) -> None: ...
+    @overload
+    def predict(self, x: VectorLike) -> float64: ...
+    @overload
+    def predict(self, x: RDD[VectorLike]) -> RDD[float64]: ...
+    def save(self, sc: SparkContext, path: str) -> None: ...
+    @classmethod
+    def load(cls, sc: SparkContext, path: str) -> SVMModel: ...
+
+class SVMWithSGD:
+    @classmethod
+    def train(
+        cls,
+        data: RDD[LabeledPoint],
+        iterations: int = ...,
+        step: float = ...,
+        regParam: float = ...,
+        miniBatchFraction: float = ...,
+        initialWeights: Optional[VectorLike] = ...,
+        regType: str = ...,
+        intercept: bool = ...,
+        validateData: bool = ...,
+        convergenceTol: float = ...,
+    ) -> SVMModel: ...
+
+class NaiveBayesModel(Saveable, Loader[NaiveBayesModel]):
+    labels: ndarray
+    pi: ndarray
+    theta: ndarray
+    def __init__(self, labels, pi, theta) -> None: ...
+    @overload
+    def predict(self, x: VectorLike) -> float64: ...
+    @overload
+    def predict(self, x: RDD[VectorLike]) -> RDD[float64]: ...
+    def save(self, sc: SparkContext, path: str) -> None: ...
+    @classmethod
+    def load(cls, sc: SparkContext, path: str) -> NaiveBayesModel: ...
+
+class NaiveBayes:
+    @classmethod
+    def train(cls, data: RDD[VectorLike], lambda_: float = ...) -> NaiveBayesModel: ...
+
+class StreamingLogisticRegressionWithSGD(StreamingLinearAlgorithm):
+    stepSize: float
+    numIterations: int
+    regParam: float
+    miniBatchFraction: float
+    convergenceTol: float
+    def __init__(
+        self,
+        stepSize: float = ...,
+        numIterations: int = ...,
+        miniBatchFraction: float = ...,
+        regParam: float = ...,
+        convergenceTol: float = ...,
+    ) -> None: ...
+    def setInitialWeights(
+        self, initialWeights: VectorLike
+    ) -> StreamingLogisticRegressionWithSGD: ...
+    def trainOn(self, dstream: DStream[LabeledPoint]) -> None: ...
diff --git a/python/pyspark/mllib/clustering.pyi b/python/pyspark/mllib/clustering.pyi
new file mode 100644
index 0000000000000..1c3eba17e201c
--- /dev/null
+++ b/python/pyspark/mllib/clustering.pyi
@@ -0,0 +1,196 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import overload
+from typing import List, NamedTuple, Optional, Tuple, TypeVar
+
+import array
+
+from numpy import float64, int64, ndarray  # type: ignore[import]
+from py4j.java_gateway import JavaObject  # type: ignore[import]
+
+from pyspark.mllib._typing import VectorLike
+from pyspark.context import SparkContext
+from pyspark.rdd import RDD
+from pyspark.mllib.common import JavaModelWrapper
+from pyspark.mllib.stat.distribution import MultivariateGaussian
+from pyspark.mllib.util import Saveable, Loader, JavaLoader, JavaSaveable
+from pyspark.streaming.dstream import DStream
+
+T = TypeVar("T")
+
+class BisectingKMeansModel(JavaModelWrapper):
+    centers: List[ndarray]
+    def __init__(self, java_model: JavaObject) -> None: ...
+    @property
+    def clusterCenters(self) -> List[ndarray]: ...
+    @property
+    def k(self) -> int: ...
+    @overload
+    def predict(self, x: VectorLike) -> int: ...
+    @overload
+    def predict(self, x: RDD[VectorLike]) -> RDD[int]: ...
+    @overload
+    def computeCost(self, x: VectorLike) -> float: ...
+    @overload
+    def computeCost(self, x: RDD[VectorLike]) -> float: ...
+
+class BisectingKMeans:
+    @classmethod
+    def train(
+        self,
+        rdd: RDD[VectorLike],
+        k: int = ...,
+        maxIterations: int = ...,
+        minDivisibleClusterSize: float = ...,
+        seed: int = ...,
+    ) -> BisectingKMeansModel: ...
+
+class KMeansModel(Saveable, Loader[KMeansModel]):
+    centers: List[ndarray]
+    def __init__(self, centers: List[ndarray]) -> None: ...
+    @property
+    def clusterCenters(self) -> List[ndarray]: ...
+    @property
+    def k(self) -> int: ...
+    @overload
+    def predict(self, x: VectorLike) -> int: ...
+    @overload
+    def predict(self, x: RDD[VectorLike]) -> RDD[int]: ...
+    def computeCost(self, rdd: RDD[VectorLike]) -> float: ...
+    def save(self, sc: SparkContext, path: str) -> None: ...
+    @classmethod
+    def load(cls, sc: SparkContext, path: str) -> KMeansModel: ...
+
+class KMeans:
+    @classmethod
+    def train(
+        cls,
+        rdd: RDD[VectorLike],
+        k: int,
+        maxIterations: int = ...,
+        initializationMode: str = ...,
+        seed: Optional[int] = ...,
+        initializationSteps: int = ...,
+        epsilon: float = ...,
+        initialModel: Optional[KMeansModel] = ...,
+    ) -> KMeansModel: ...
+
+class GaussianMixtureModel(
+    JavaModelWrapper, JavaSaveable, JavaLoader[GaussianMixtureModel]
+):
+    @property
+    def weights(self) -> ndarray: ...
+    @property
+    def gaussians(self) -> List[MultivariateGaussian]: ...
+    @property
+    def k(self) -> int: ...
+    @overload
+    def predict(self, x: VectorLike) -> int64: ...
+    @overload
+    def predict(self, x: RDD[VectorLike]) -> RDD[int]: ...
+    @overload
+    def predictSoft(self, x: VectorLike) -> ndarray: ...
+    @overload
+    def predictSoft(self, x: RDD[VectorLike]) -> RDD[array.array]: ...
+    @classmethod
+    def load(cls, sc: SparkContext, path: str) -> GaussianMixtureModel: ...
+
+class GaussianMixture:
+    @classmethod
+    def train(
+        cls,
+        rdd: RDD[VectorLike],
+        k: int,
+        convergenceTol: float = ...,
+        maxIterations: int = ...,
+        seed: Optional[int] = ...,
+        initialModel: Optional[GaussianMixtureModel] = ...,
+    ) -> GaussianMixtureModel: ...
+
+class PowerIterationClusteringModel(
+    JavaModelWrapper, JavaSaveable, JavaLoader[PowerIterationClusteringModel]
+):
+    @property
+    def k(self) -> int: ...
+    def assignments(self) -> RDD[PowerIterationClustering.Assignment]: ...
+    @classmethod
+    def load(cls, sc: SparkContext, path: str) -> PowerIterationClusteringModel: ...
+
+class PowerIterationClustering:
+    @classmethod
+    def train(
+        cls,
+        rdd: RDD[Tuple[int, int, float]],
+        k: int,
+        maxIterations: int = ...,
+        initMode: str = ...,
+    ) -> PowerIterationClusteringModel: ...
+    class Assignment(NamedTuple("Assignment", [("id", int), ("cluster", int)])): ...
+
+class StreamingKMeansModel(KMeansModel):
+    def __init__(self, clusterCenters, clusterWeights) -> None: ...
+    @property
+    def clusterWeights(self) -> List[float64]: ...
+    centers: ndarray
+    def update(
+        self, data: RDD[VectorLike], decayFactor: float, timeUnit: str
+    ) -> StreamingKMeansModel: ...
+
+class StreamingKMeans:
+    def __init__(
+        self, k: int = ..., decayFactor: float = ..., timeUnit: str = ...
+    ) -> None: ...
+    def latestModel(self) -> StreamingKMeansModel: ...
+    def setK(self, k: int) -> StreamingKMeans: ...
+    def setDecayFactor(self, decayFactor: float) -> StreamingKMeans: ...
+    def setHalfLife(self, halfLife: float, timeUnit: str) -> StreamingKMeans: ...
+    def setInitialCenters(
+        self, centers: List[VectorLike], weights: List[float]
+    ) -> StreamingKMeans: ...
+    def setRandomCenters(
+        self, dim: int, weight: float, seed: int
+    ) -> StreamingKMeans: ...
+    def trainOn(self, dstream: DStream[VectorLike]) -> None: ...
+    def predictOn(self, dstream: DStream[VectorLike]) -> DStream[int]: ...
+    def predictOnValues(
+        self, dstream: DStream[Tuple[T, VectorLike]]
+    ) -> DStream[Tuple[T, int]]: ...
+
+class LDAModel(JavaModelWrapper, JavaSaveable, Loader[LDAModel]):
+    def topicsMatrix(self) -> ndarray: ...
+    def vocabSize(self) -> int: ...
+    def describeTopics(
+        self, maxTermsPerTopic: Optional[int] = ...
+    ) -> List[Tuple[List[int], List[float]]]: ...
+    @classmethod
+    def load(cls, sc: SparkContext, path: str) -> LDAModel: ...
+
+class LDA:
+    @classmethod
+    def train(
+        cls,
+        rdd: RDD[Tuple[int, VectorLike]],
+        k: int = ...,
+        maxIterations: int = ...,
+        docConcentration: float = ...,
+        topicConcentration: float = ...,
+        seed: Optional[int] = ...,
+        checkpointInterval: int = ...,
+        optimizer: str = ...,
+    ) -> LDAModel: ...
diff --git a/python/pyspark/mllib/common.pyi b/python/pyspark/mllib/common.pyi
new file mode 100644
index 0000000000000..1df308b91b5a1
--- /dev/null
+++ b/python/pyspark/mllib/common.pyi
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+def callJavaFunc(sc, func, *args): ...
+def callMLlibFunc(name, *args): ...
+
+class JavaModelWrapper:
+    def __init__(self, java_model) -> None: ...
+    def __del__(self): ...
+    def call(self, name, *a): ...
+
+def inherit_doc(cls): ...
diff --git a/python/pyspark/mllib/evaluation.pyi b/python/pyspark/mllib/evaluation.pyi
new file mode 100644
index 0000000000000..03583784f0c3b
--- /dev/null
+++ b/python/pyspark/mllib/evaluation.pyi
@@ -0,0 +1,94 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import List, Optional, Tuple, TypeVar
+from pyspark.rdd import RDD
+from pyspark.mllib.common import JavaModelWrapper
+from pyspark.mllib.linalg import Matrix
+
+T = TypeVar("T")
+
+class BinaryClassificationMetrics(JavaModelWrapper):
+    def __init__(self, scoreAndLabels: RDD[Tuple[float, float]]) -> None: ...
+    @property
+    def areaUnderROC(self) -> float: ...
+    @property
+    def areaUnderPR(self) -> float: ...
+    def unpersist(self) -> None: ...
+
+class RegressionMetrics(JavaModelWrapper):
+    def __init__(self, predictionAndObservations: RDD[Tuple[float, float]]) -> None: ...
+    @property
+    def explainedVariance(self) -> float: ...
+    @property
+    def meanAbsoluteError(self) -> float: ...
+    @property
+    def meanSquaredError(self) -> float: ...
+    @property
+    def rootMeanSquaredError(self) -> float: ...
+    @property
+    def r2(self) -> float: ...
+
+class MulticlassMetrics(JavaModelWrapper):
+    def __init__(self, predictionAndLabels: RDD[Tuple[float, float]]) -> None: ...
+    def confusionMatrix(self) -> Matrix: ...
+    def truePositiveRate(self, label: float) -> float: ...
+    def falsePositiveRate(self, label: float) -> float: ...
+    def precision(self, label: float = ...) -> float: ...
+    def recall(self, label: float = ...) -> float: ...
+    def fMeasure(self, label: float = ..., beta: Optional[float] = ...) -> float: ...
+    @property
+    def accuracy(self) -> float: ...
+    @property
+    def weightedTruePositiveRate(self) -> float: ...
+    @property
+    def weightedFalsePositiveRate(self) -> float: ...
+    @property
+    def weightedRecall(self) -> float: ...
+    @property
+    def weightedPrecision(self) -> float: ...
+    def weightedFMeasure(self, beta: Optional[float] = ...) -> float: ...
+
+class RankingMetrics(JavaModelWrapper):
+    def __init__(self, predictionAndLabels: RDD[Tuple[List[T], List[T]]]) -> None: ...
+    def precisionAt(self, k: int) -> float: ...
+    @property
+    def meanAveragePrecision(self) -> float: ...
+    def meanAveragePrecisionAt(self, k: int) -> float: ...
+    def ndcgAt(self, k: int) -> float: ...
+    def recallAt(self, k: int) -> float: ...
+
+class MultilabelMetrics(JavaModelWrapper):
+    def __init__(
+        self, predictionAndLabels: RDD[Tuple[List[float], List[float]]]
+    ) -> None: ...
+    def precision(self, label: Optional[float] = ...) -> float: ...
+    def recall(self, label: Optional[float] = ...) -> float: ...
+    def f1Measure(self, label: Optional[float] = ...) -> float: ...
+    @property
+    def microPrecision(self) -> float: ...
+    @property
+    def microRecall(self) -> float: ...
+    @property
+    def microF1Measure(self) -> float: ...
+    @property
+    def hammingLoss(self) -> float: ...
+    @property
+    def subsetAccuracy(self) -> float: ...
+    @property
+    def accuracy(self) -> float: ...
diff --git a/python/pyspark/mllib/feature.pyi b/python/pyspark/mllib/feature.pyi
new file mode 100644
index 0000000000000..9ccec36abd6ff
--- /dev/null
+++ b/python/pyspark/mllib/feature.pyi
@@ -0,0 +1,167 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import overload
+from typing import Iterable, Hashable, List, Tuple
+
+from pyspark.mllib._typing import VectorLike
+from pyspark.context import SparkContext
+from pyspark.rdd import RDD
+from pyspark.mllib.common import JavaModelWrapper
+from pyspark.mllib.linalg import Vector
+from pyspark.mllib.regression import LabeledPoint
+from pyspark.mllib.util import JavaLoader, JavaSaveable
+
+from py4j.java_collections import JavaMap  # type: ignore[import]
+
+class VectorTransformer:
+    @overload
+    def transform(self, vector: VectorLike) -> Vector: ...
+    @overload
+    def transform(self, vector: RDD[VectorLike]) -> RDD[Vector]: ...
+
+class Normalizer(VectorTransformer):
+    p: float
+    def __init__(self, p: float = ...) -> None: ...
+    @overload
+    def transform(self, vector: VectorLike) -> Vector: ...
+    @overload
+    def transform(self, vector: RDD[VectorLike]) -> RDD[Vector]: ...
+
+class JavaVectorTransformer(JavaModelWrapper, VectorTransformer):
+    @overload
+    def transform(self, vector: VectorLike) -> Vector: ...
+    @overload
+    def transform(self, vector: RDD[VectorLike]) -> RDD[Vector]: ...
+
+class StandardScalerModel(JavaVectorTransformer):
+    @overload
+    def transform(self, vector: VectorLike) -> Vector: ...
+    @overload
+    def transform(self, vector: RDD[VectorLike]) -> RDD[Vector]: ...
+    def setWithMean(self, withMean: bool) -> StandardScalerModel: ...
+    def setWithStd(self, withStd: bool) -> StandardScalerModel: ...
+    @property
+    def withStd(self) -> bool: ...
+    @property
+    def withMean(self) -> bool: ...
+    @property
+    def std(self) -> Vector: ...
+    @property
+    def mean(self) -> Vector: ...
+
+class StandardScaler:
+    withMean: bool
+    withStd: bool
+    def __init__(self, withMean: bool = ..., withStd: bool = ...) -> None: ...
+    def fit(self, dataset: RDD[VectorLike]) -> StandardScalerModel: ...
+
+class ChiSqSelectorModel(JavaVectorTransformer):
+    @overload
+    def transform(self, vector: VectorLike) -> Vector: ...
+    @overload
+    def transform(self, vector: RDD[VectorLike]) -> RDD[Vector]: ...
+
+class ChiSqSelector:
+    numTopFeatures: int
+    selectorType: str
+    percentile: float
+    fpr: float
+    fdr: float
+    fwe: float
+    def __init__(
+        self,
+        numTopFeatures: int = ...,
+        selectorType: str = ...,
+        percentile: float = ...,
+        fpr: float = ...,
+        fdr: float = ...,
+        fwe: float = ...,
+    ) -> None: ...
+    def setNumTopFeatures(self, numTopFeatures: int) -> ChiSqSelector: ...
+    def setPercentile(self, percentile: float) -> ChiSqSelector: ...
+    def setFpr(self, fpr: float) -> ChiSqSelector: ...
+    def setFdr(self, fdr: float) -> ChiSqSelector: ...
+    def setFwe(self, fwe: float) -> ChiSqSelector: ...
+    def setSelectorType(self, selectorType: str) -> ChiSqSelector: ...
+    def fit(self, data: RDD[LabeledPoint]) -> ChiSqSelectorModel: ...
+
+class PCAModel(JavaVectorTransformer): ...
+
+class PCA:
+    k: int
+    def __init__(self, k: int) -> None: ...
+    def fit(self, data: RDD[VectorLike]) -> PCAModel: ...
+
+class HashingTF:
+    numFeatures: int
+    binary: bool
+    def __init__(self, numFeatures: int = ...) -> None: ...
+    def setBinary(self, value: bool) -> HashingTF: ...
+    def indexOf(self, term: Hashable) -> int: ...
+    @overload
+    def transform(self, document: Iterable[Hashable]) -> Vector: ...
+    @overload
+    def transform(self, document: RDD[Iterable[Hashable]]) -> RDD[Vector]: ...
+
+class IDFModel(JavaVectorTransformer):
+    @overload
+    def transform(self, x: VectorLike) -> Vector: ...
+    @overload
+    def transform(self, x: RDD[VectorLike]) -> RDD[Vector]: ...
+    def idf(self) -> Vector: ...
+    def docFreq(self) -> List[int]: ...
+    def numDocs(self) -> int: ...
+
+class IDF:
+    minDocFreq: int
+    def __init__(self, minDocFreq: int = ...) -> None: ...
+    def fit(self, dataset: RDD[VectorLike]) -> IDFModel: ...
+
+class Word2VecModel(JavaVectorTransformer, JavaSaveable, JavaLoader[Word2VecModel]):
+    def transform(self, word: str) -> Vector: ...  # type: ignore
+    def findSynonyms(self, word: str, num: int) -> Iterable[Tuple[str, float]]: ...
+    def getVectors(self) -> JavaMap: ...
+    @classmethod
+    def load(cls, sc: SparkContext, path: str) -> Word2VecModel: ...
+
+class Word2Vec:
+    vectorSize: int
+    learningRate: float
+    numPartitions: int
+    numIterations: int
+    seed: int
+    minCount: int
+    windowSize: int
+    def __init__(self) -> None: ...
+    def setVectorSize(self, vectorSize: int) -> Word2Vec: ...
+    def setLearningRate(self, learningRate: float) -> Word2Vec: ...
+    def setNumPartitions(self, numPartitions: int) -> Word2Vec: ...
+    def setNumIterations(self, numIterations: int) -> Word2Vec: ...
+    def setSeed(self, seed: int) -> Word2Vec: ...
+    def setMinCount(self, minCount: int) -> Word2Vec: ...
+    def setWindowSize(self, windowSize: int) -> Word2Vec: ...
+    def fit(self, data: RDD[List[str]]) -> Word2VecModel: ...
+
+class ElementwiseProduct(VectorTransformer):
+    scalingVector: Vector
+    def __init__(self, scalingVector: Vector) -> None: ...
+    @overload
+    def transform(self, vector: VectorLike) -> Vector: ...
+    @overload
+    def transform(self, vector: RDD[VectorLike]) -> RDD[Vector]: ...
diff --git a/python/pyspark/mllib/fpm.pyi b/python/pyspark/mllib/fpm.pyi
new file mode 100644
index 0000000000000..880baae1a91a5
--- /dev/null
+++ b/python/pyspark/mllib/fpm.pyi
@@ -0,0 +1,57 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Generic, List, TypeVar
+from pyspark.context import SparkContext
+from pyspark.rdd import RDD
+from pyspark.mllib.common import JavaModelWrapper
+from pyspark.mllib.util import JavaSaveable, JavaLoader
+
+T = TypeVar("T")
+
+class FPGrowthModel(
+    JavaModelWrapper, JavaSaveable, JavaLoader[FPGrowthModel], Generic[T]
+):
+    def freqItemsets(self) -> RDD[FPGrowth.FreqItemset[T]]: ...
+    @classmethod
+    def load(cls, sc: SparkContext, path: str) -> FPGrowthModel: ...
+
+class FPGrowth:
+    @classmethod
+    def train(
+        cls, data: RDD[List[T]], minSupport: float = ..., numPartitions: int = ...
+    ) -> FPGrowthModel[T]: ...
+    class FreqItemset(Generic[T]):
+        items = ...  # List[T]
+        freq = ...  # int
+
+class PrefixSpanModel(JavaModelWrapper, Generic[T]):
+    def freqSequences(self) -> RDD[PrefixSpan.FreqSequence[T]]: ...
+
+class PrefixSpan:
+    @classmethod
+    def train(
+        cls,
+        data: RDD[List[List[T]]],
+        minSupport: float = ...,
+        maxPatternLength: int = ...,
+        maxLocalProjDBSize: int = ...,
+    ) -> PrefixSpanModel[T]: ...
+    class FreqSequence(tuple, Generic[T]):
+        sequence: List[T]
+        freq: int
diff --git a/python/pyspark/mllib/linalg/__init__.pyi b/python/pyspark/mllib/linalg/__init__.pyi
new file mode 100644
index 0000000000000..c0719c535c8f4
--- /dev/null
+++ b/python/pyspark/mllib/linalg/__init__.pyi
@@ -0,0 +1,273 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import overload
+from typing import Any, Dict, Generic, Iterable, List, Optional, Tuple, TypeVar, Union
+from pyspark.ml import linalg as newlinalg
+from pyspark.sql.types import StructType, UserDefinedType
+from numpy import float64, ndarray  # type: ignore[import]
+
+QT = TypeVar("QT")
+RT = TypeVar("RT")
+
+class VectorUDT(UserDefinedType):
+    @classmethod
+    def sqlType(cls) -> StructType: ...
+    @classmethod
+    def module(cls) -> str: ...
+    @classmethod
+    def scalaUDT(cls) -> str: ...
+    def serialize(
+        self, obj: Vector
+    ) -> Tuple[int, Optional[int], Optional[List[int]], List[float]]: ...
+    def deserialize(self, datum: Any) -> Vector: ...
+    def simpleString(self) -> str: ...
+
+class MatrixUDT(UserDefinedType):
+    @classmethod
+    def sqlType(cls) -> StructType: ...
+    @classmethod
+    def module(cls) -> str: ...
+    @classmethod
+    def scalaUDT(cls) -> str: ...
+    def serialize(
+        self, obj
+    ) -> Tuple[
+        int, int, int, Optional[List[int]], Optional[List[int]], List[float], bool
+    ]: ...
+    def deserialize(self, datum: Any) -> Matrix: ...
+    def simpleString(self) -> str: ...
+
+class Vector:
+    __UDT__: VectorUDT
+    def toArray(self) -> ndarray: ...
+    def asML(self) -> newlinalg.Vector: ...
+
+class DenseVector(Vector):
+    array: ndarray
+    @overload
+    def __init__(self, *elements: float) -> None: ...
+    @overload
+    def __init__(self, __arr: bytes) -> None: ...
+    @overload
+    def __init__(self, __arr: Iterable[float]) -> None: ...
+    @staticmethod
+    def parse(s) -> DenseVector: ...
+    def __reduce__(self) -> Tuple[type, bytes]: ...
+    def numNonzeros(self) -> int: ...
+    def norm(self, p: Union[float, str]) -> float64: ...
+    def dot(self, other: Iterable[float]) -> float64: ...
+    def squared_distance(self, other: Iterable[float]) -> float64: ...
+    def toArray(self) -> ndarray: ...
+    def asML(self) -> newlinalg.DenseVector: ...
+    @property
+    def values(self) -> ndarray: ...
+    def __getitem__(self, item: int) -> float64: ...
+    def __len__(self) -> int: ...
+    def __eq__(self, other: Any) -> bool: ...
+    def __ne__(self, other: Any) -> bool: ...
+    def __hash__(self) -> int: ...
+    def __getattr__(self, item: str) -> Any: ...
+    def __neg__(self) -> DenseVector: ...
+    def __add__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
+    def __sub__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
+    def __mul__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
+    def __div__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
+    def __truediv__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
+    def __mod__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
+    def __radd__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
+    def __rsub__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
+    def __rmul__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
+    def __rdiv__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
+    def __rtruediv__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
+    def __rmod__(self, other: Union[float, Iterable[float]]) -> DenseVector: ...
+
+class SparseVector(Vector):
+    size: int
+    indices: ndarray
+    values: ndarray
+    @overload
+    def __init__(self, size: int, *args: Tuple[int, float]) -> None: ...
+    @overload
+    def __init__(self, size: int, __indices: bytes, __values: bytes) -> None: ...
+    @overload
+    def __init__(
+        self, size: int, __indices: Iterable[int], __values: Iterable[float]
+    ) -> None: ...
+    @overload
+    def __init__(self, size: int, __pairs: Iterable[Tuple[int, float]]) -> None: ...
+    @overload
+    def __init__(self, size: int, __map: Dict[int, float]) -> None: ...
+    def numNonzeros(self) -> int: ...
+    def norm(self, p: Union[float, str]) -> float64: ...
+    def __reduce__(self): ...
+    @staticmethod
+    def parse(s: str) -> SparseVector: ...
+    def dot(self, other: Iterable[float]) -> float64: ...
+    def squared_distance(self, other: Iterable[float]) -> float64: ...
+    def toArray(self) -> ndarray: ...
+    def asML(self) -> newlinalg.SparseVector: ...
+    def __len__(self) -> int: ...
+    def __eq__(self, other) -> bool: ...
+    def __getitem__(self, index: int) -> float64: ...
+    def __ne__(self, other) -> bool: ...
+    def __hash__(self) -> int: ...
+
+class Vectors:
+    @overload
+    @staticmethod
+    def sparse(size: int, *args: Tuple[int, float]) -> SparseVector: ...
+    @overload
+    @staticmethod
+    def sparse(size: int, __indices: bytes, __values: bytes) -> SparseVector: ...
+    @overload
+    @staticmethod
+    def sparse(
+        size: int, __indices: Iterable[int], __values: Iterable[float]
+    ) -> SparseVector: ...
+    @overload
+    @staticmethod
+    def sparse(size: int, __pairs: Iterable[Tuple[int, float]]) -> SparseVector: ...
+    @overload
+    @staticmethod
+    def sparse(size: int, __map: Dict[int, float]) -> SparseVector: ...
+    @overload
+    @staticmethod
+    def dense(self, *elements: float) -> DenseVector: ...
+    @overload
+    @staticmethod
+    def dense(self, __arr: bytes) -> DenseVector: ...
+    @overload
+    @staticmethod
+    def dense(self, __arr: Iterable[float]) -> DenseVector: ...
+    @staticmethod
+    def fromML(vec: newlinalg.DenseVector) -> DenseVector: ...
+    @staticmethod
+    def stringify(vector: Vector) -> str: ...
+    @staticmethod
+    def squared_distance(v1: Vector, v2: Vector) -> float64: ...
+    @staticmethod
+    def norm(vector: Vector, p: Union[float, str]) -> float64: ...
+    @staticmethod
+    def parse(s: str) -> Vector: ...
+    @staticmethod
+    def zeros(size: int) -> DenseVector: ...
+
+class Matrix:
+    __UDT__: MatrixUDT
+    numRows: int
+    numCols: int
+    isTransposed: bool
+    def __init__(
+        self, numRows: int, numCols: int, isTransposed: bool = ...
+    ) -> None: ...
+    def toArray(self): ...
+    def asML(self): ...
+
+class DenseMatrix(Matrix):
+    values: Any
+    @overload
+    def __init__(
+        self, numRows: int, numCols: int, values: bytes, isTransposed: bool = ...
+    ) -> None: ...
+    @overload
+    def __init__(
+        self,
+        numRows: int,
+        numCols: int,
+        values: Iterable[float],
+        isTransposed: bool = ...,
+    ) -> None: ...
+    def __reduce__(self) -> Tuple[type, Tuple[int, int, bytes, int]]: ...
+    def toArray(self) -> ndarray: ...
+    def toSparse(self) -> SparseMatrix: ...
+    def asML(self) -> newlinalg.DenseMatrix: ...
+    def __getitem__(self, indices: Tuple[int, int]) -> float64: ...
+    def __eq__(self, other) -> bool: ...
+
+class SparseMatrix(Matrix):
+    colPtrs: ndarray
+    rowIndices: ndarray
+    values: ndarray
+    @overload
+    def __init__(
+        self,
+        numRows: int,
+        numCols: int,
+        colPtrs: bytes,
+        rowIndices: bytes,
+        values: bytes,
+        isTransposed: bool = ...,
+    ) -> None: ...
+    @overload
+    def __init__(
+        self,
+        numRows: int,
+        numCols: int,
+        colPtrs: Iterable[int],
+        rowIndices: Iterable[int],
+        values: Iterable[float],
+        isTransposed: bool = ...,
+    ) -> None: ...
+    def __reduce__(self) -> Tuple[type, Tuple[int, int, bytes, bytes, bytes, int]]: ...
+    def __getitem__(self, indices: Tuple[int, int]) -> float64: ...
+    def toArray(self) -> ndarray: ...
+    def toDense(self) -> DenseMatrix: ...
+    def asML(self) -> newlinalg.SparseMatrix: ...
+    def __eq__(self, other) -> bool: ...
+
+class Matrices:
+    @overload
+    @staticmethod
+    def dense(
+        numRows: int, numCols: int, values: bytes, isTransposed: bool = ...
+    ) -> DenseMatrix: ...
+    @overload
+    @staticmethod
+    def dense(
+        numRows: int, numCols: int, values: Iterable[float], isTransposed: bool = ...
+    ) -> DenseMatrix: ...
+    @overload
+    @staticmethod
+    def sparse(
+        numRows: int,
+        numCols: int,
+        colPtrs: bytes,
+        rowIndices: bytes,
+        values: bytes,
+        isTransposed: bool = ...,
+    ) -> SparseMatrix: ...
+    @overload
+    @staticmethod
+    def sparse(
+        numRows: int,
+        numCols: int,
+        colPtrs: Iterable[int],
+        rowIndices: Iterable[int],
+        values: Iterable[float],
+        isTransposed: bool = ...,
+    ) -> SparseMatrix: ...
+    @staticmethod
+    def fromML(mat: newlinalg.Matrix) -> Matrix: ...
+
+class QRDecomposition(Generic[QT, RT]):
+    def __init__(self, Q: QT, R: RT) -> None: ...
+    @property
+    def Q(self) -> QT: ...
+    @property
+    def R(self) -> RT: ...
diff --git a/python/pyspark/mllib/linalg/distributed.pyi b/python/pyspark/mllib/linalg/distributed.pyi
new file mode 100644
index 0000000000000..238c4ea32e4e8
--- /dev/null
+++ b/python/pyspark/mllib/linalg/distributed.pyi
@@ -0,0 +1,147 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Generic, Sequence, Optional, Tuple, TypeVar, Union
+from pyspark.rdd import RDD
+from pyspark.storagelevel import StorageLevel
+from pyspark.mllib.common import JavaModelWrapper
+from pyspark.mllib.linalg import Vector, Matrix, QRDecomposition
+from pyspark.mllib.stat import MultivariateStatisticalSummary
+from numpy import ndarray  # noqa: F401
+
+VectorLike = Union[Vector, Sequence[Union[float, int]]]
+
+UT = TypeVar("UT")
+VT = TypeVar("VT")
+
+class DistributedMatrix:
+    def numRows(self) -> int: ...
+    def numCols(self) -> int: ...
+
+class RowMatrix(DistributedMatrix):
+    def __init__(
+        self, rows: RDD[Vector], numRows: int = ..., numCols: int = ...
+    ) -> None: ...
+    @property
+    def rows(self) -> RDD[Vector]: ...
+    def numRows(self) -> int: ...
+    def numCols(self) -> int: ...
+    def computeColumnSummaryStatistics(self) -> MultivariateStatisticalSummary: ...
+    def computeCovariance(self) -> Matrix: ...
+    def computeGramianMatrix(self) -> Matrix: ...
+    def columnSimilarities(self, threshold: float = ...) -> CoordinateMatrix: ...
+    def tallSkinnyQR(
+        self, computeQ: bool = ...
+    ) -> QRDecomposition[RowMatrix, Matrix]: ...
+    def computeSVD(
+        self, k: int, computeU: bool = ..., rCond: float = ...
+    ) -> SingularValueDecomposition[RowMatrix, Matrix]: ...
+    def computePrincipalComponents(self, k: int) -> Matrix: ...
+    def multiply(self, matrix: Matrix) -> RowMatrix: ...
+
+class SingularValueDecomposition(JavaModelWrapper, Generic[UT, VT]):
+    @property
+    def U(self) -> Optional[UT]: ...
+    @property
+    def s(self) -> Vector: ...
+    @property
+    def V(self) -> VT: ...
+
+class IndexedRow:
+    index: int
+    vector: VectorLike
+    def __init__(self, index: int, vector: VectorLike) -> None: ...
+
+class IndexedRowMatrix(DistributedMatrix):
+    def __init__(
+        self,
+        rows: RDD[Union[Tuple[int, VectorLike], IndexedRow]],
+        numRows: int = ...,
+        numCols: int = ...,
+    ) -> None: ...
+    @property
+    def rows(self) -> RDD[IndexedRow]: ...
+    def numRows(self) -> int: ...
+    def numCols(self) -> int: ...
+    def columnSimilarities(self) -> CoordinateMatrix: ...
+    def computeGramianMatrix(self) -> Matrix: ...
+    def toRowMatrix(self) -> RowMatrix: ...
+    def toCoordinateMatrix(self) -> CoordinateMatrix: ...
+    def toBlockMatrix(
+        self, rowsPerBlock: int = ..., colsPerBlock: int = ...
+    ) -> BlockMatrix: ...
+    def computeSVD(
+        self, k: int, computeU: bool = ..., rCond: float = ...
+    ) -> SingularValueDecomposition[IndexedRowMatrix, Matrix]: ...
+    def multiply(self, matrix: Matrix) -> IndexedRowMatrix: ...
+
+class MatrixEntry:
+    i: int
+    j: int
+    value: float
+    def __init__(self, i: int, j: int, value: float) -> None: ...
+
+class CoordinateMatrix(DistributedMatrix):
+    def __init__(
+        self,
+        entries: RDD[Union[Tuple[int, int, float], MatrixEntry]],
+        numRows: int = ...,
+        numCols: int = ...,
+    ) -> None: ...
+    @property
+    def entries(self) -> RDD[MatrixEntry]: ...
+    def numRows(self) -> int: ...
+    def numCols(self) -> int: ...
+    def transpose(self) -> CoordinateMatrix: ...
+    def toRowMatrix(self) -> RowMatrix: ...
+    def toIndexedRowMatrix(self) -> IndexedRowMatrix: ...
+    def toBlockMatrix(
+        self, rowsPerBlock: int = ..., colsPerBlock: int = ...
+    ) -> BlockMatrix: ...
+
+class BlockMatrix(DistributedMatrix):
+    def __init__(
+        self,
+        blocks: RDD[Tuple[Tuple[int, int], Matrix]],
+        rowsPerBlock: int,
+        colsPerBlock: int,
+        numRows: int = ...,
+        numCols: int = ...,
+    ) -> None: ...
+    @property
+    def blocks(self) -> RDD[Tuple[Tuple[int, int], Matrix]]: ...
+    @property
+    def rowsPerBlock(self) -> int: ...
+    @property
+    def colsPerBlock(self) -> int: ...
+    @property
+    def numRowBlocks(self) -> int: ...
+    @property
+    def numColBlocks(self) -> int: ...
+    def numRows(self) -> int: ...
+    def numCols(self) -> int: ...
+    def cache(self) -> BlockMatrix: ...
+    def persist(self, storageLevel: StorageLevel) -> BlockMatrix: ...
+    def validate(self) -> None: ...
+    def add(self, other: BlockMatrix) -> BlockMatrix: ...
+    def subtract(self, other: BlockMatrix) -> BlockMatrix: ...
+    def multiply(self, other: BlockMatrix) -> BlockMatrix: ...
+    def transpose(self) -> BlockMatrix: ...
+    def toLocalMatrix(self) -> Matrix: ...
+    def toIndexedRowMatrix(self) -> IndexedRowMatrix: ...
+    def toCoordinateMatrix(self) -> CoordinateMatrix: ...
diff --git a/python/pyspark/mllib/random.pyi b/python/pyspark/mllib/random.pyi
new file mode 100644
index 0000000000000..dc5f4701614da
--- /dev/null
+++ b/python/pyspark/mllib/random.pyi
@@ -0,0 +1,126 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Optional
+from pyspark.context import SparkContext
+from pyspark.rdd import RDD
+from pyspark.mllib.linalg import Vector
+
+class RandomRDDs:
+    @staticmethod
+    def uniformRDD(
+        sc: SparkContext,
+        size: int,
+        numPartitions: Optional[int] = ...,
+        seed: Optional[int] = ...,
+    ) -> RDD[float]: ...
+    @staticmethod
+    def normalRDD(
+        sc: SparkContext,
+        size: int,
+        numPartitions: Optional[int] = ...,
+        seed: Optional[int] = ...,
+    ) -> RDD[float]: ...
+    @staticmethod
+    def logNormalRDD(
+        sc: SparkContext,
+        mean: float,
+        std: float,
+        size: int,
+        numPartitions: Optional[int] = ...,
+        seed: Optional[int] = ...,
+    ) -> RDD[float]: ...
+    @staticmethod
+    def poissonRDD(
+        sc: SparkContext,
+        mean: float,
+        size: int,
+        numPartitions: Optional[int] = ...,
+        seed: Optional[int] = ...,
+    ) -> RDD[float]: ...
+    @staticmethod
+    def exponentialRDD(
+        sc: SparkContext,
+        mean: float,
+        size: int,
+        numPartitions: Optional[int] = ...,
+        seed: Optional[int] = ...,
+    ) -> RDD[float]: ...
+    @staticmethod
+    def gammaRDD(
+        sc: SparkContext,
+        shape: float,
+        scale: float,
+        size: int,
+        numPartitions: Optional[int] = ...,
+        seed: Optional[int] = ...,
+    ) -> RDD[float]: ...
+    @staticmethod
+    def uniformVectorRDD(
+        sc: SparkContext,
+        numRows: int,
+        numCols: int,
+        numPartitions: Optional[int] = ...,
+        seed: Optional[int] = ...,
+    ) -> RDD[Vector]: ...
+    @staticmethod
+    def normalVectorRDD(
+        sc: SparkContext,
+        numRows: int,
+        numCols: int,
+        numPartitions: Optional[int] = ...,
+        seed: Optional[int] = ...,
+    ) -> RDD[Vector]: ...
+    @staticmethod
+    def logNormalVectorRDD(
+        sc: SparkContext,
+        mean: float,
+        std,
+        numRows: int,
+        numCols: int,
+        numPartitions: Optional[int] = ...,
+        seed: Optional[int] = ...,
+    ) -> RDD[Vector]: ...
+    @staticmethod
+    def poissonVectorRDD(
+        sc: SparkContext,
+        mean: float,
+        numRows: int,
+        numCols: int,
+        numPartitions: Optional[int] = ...,
+        seed: Optional[int] = ...,
+    ) -> RDD[Vector]: ...
+    @staticmethod
+    def exponentialVectorRDD(
+        sc: SparkContext,
+        mean: float,
+        numRows: int,
+        numCols: int,
+        numPartitions: Optional[int] = ...,
+        seed: Optional[int] = ...,
+    ) -> RDD[Vector]: ...
+    @staticmethod
+    def gammaVectorRDD(
+        sc: SparkContext,
+        shape: float,
+        scale: float,
+        numRows: int,
+        numCols: int,
+        numPartitions: Optional[int] = ...,
+        seed: Optional[int] = ...,
+    ) -> RDD[Vector]: ...
diff --git a/python/pyspark/mllib/recommendation.pyi b/python/pyspark/mllib/recommendation.pyi
new file mode 100644
index 0000000000000..e2f15494209e9
--- /dev/null
+++ b/python/pyspark/mllib/recommendation.pyi
@@ -0,0 +1,75 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import List, Optional, Tuple, Union
+
+import array
+from collections import namedtuple
+
+from pyspark.context import SparkContext
+from pyspark.rdd import RDD
+from pyspark.mllib.common import JavaModelWrapper
+from pyspark.mllib.util import JavaLoader, JavaSaveable
+
+class Rating(namedtuple("Rating", ["user", "product", "rating"])):
+    def __reduce__(self): ...
+
+class MatrixFactorizationModel(
+    JavaModelWrapper, JavaSaveable, JavaLoader[MatrixFactorizationModel]
+):
+    def predict(self, user: int, product: int) -> float: ...
+    def predictAll(self, user_product: RDD[Tuple[int, int]]) -> RDD[Rating]: ...
+    def userFeatures(self) -> RDD[Tuple[int, array.array]]: ...
+    def productFeatures(self) -> RDD[Tuple[int, array.array]]: ...
+    def recommendUsers(self, product: int, num: int) -> List[Rating]: ...
+    def recommendProducts(self, user: int, num: int) -> List[Rating]: ...
+    def recommendProductsForUsers(
+        self, num: int
+    ) -> RDD[Tuple[int, Tuple[Rating, ...]]]: ...
+    def recommendUsersForProducts(
+        self, num: int
+    ) -> RDD[Tuple[int, Tuple[Rating, ...]]]: ...
+    @property
+    def rank(self) -> int: ...
+    @classmethod
+    def load(cls, sc: SparkContext, path: str) -> MatrixFactorizationModel: ...
+
+class ALS:
+    @classmethod
+    def train(
+        cls,
+        ratings: Union[RDD[Rating], RDD[Tuple[int, int, float]]],
+        rank: int,
+        iterations: int = ...,
+        lambda_: float = ...,
+        blocks: int = ...,
+        nonnegative: bool = ...,
+        seed: Optional[int] = ...,
+    ) -> MatrixFactorizationModel: ...
+    @classmethod
+    def trainImplicit(
+        cls,
+        ratings: Union[RDD[Rating], RDD[Tuple[int, int, float]]],
+        rank: int,
+        iterations: int = ...,
+        lambda_: float = ...,
+        blocks: int = ...,
+        alpha: float = ...,
+        nonnegative: bool = ...,
+        seed: Optional[int] = ...,
+    ) -> MatrixFactorizationModel: ...
diff --git a/python/pyspark/mllib/regression.pyi b/python/pyspark/mllib/regression.pyi
new file mode 100644
index 0000000000000..0283378b98cf3
--- /dev/null
+++ b/python/pyspark/mllib/regression.pyi
@@ -0,0 +1,155 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import overload
+from typing import Iterable, Optional, Tuple, TypeVar
+from pyspark.rdd import RDD
+from pyspark.mllib._typing import VectorLike
+from pyspark.context import SparkContext
+from pyspark.mllib.linalg import Vector
+from pyspark.mllib.util import Saveable, Loader
+from pyspark.streaming.dstream import DStream
+from numpy import ndarray  # type: ignore[import]
+
+K = TypeVar("K")
+
+class LabeledPoint:
+    label: int
+    features: Vector
+    def __init__(self, label: float, features: Iterable[float]) -> None: ...
+    def __reduce__(self) -> Tuple[type, Tuple[bytes]]: ...
+
+class LinearModel:
+    def __init__(self, weights: Vector, intercept: float) -> None: ...
+    @property
+    def weights(self) -> Vector: ...
+    @property
+    def intercept(self) -> float: ...
+
+class LinearRegressionModelBase(LinearModel):
+    @overload
+    def predict(self, x: Vector) -> float: ...
+    @overload
+    def predict(self, x: RDD[Vector]) -> RDD[float]: ...
+
+class LinearRegressionModel(LinearRegressionModelBase):
+    def save(self, sc: SparkContext, path: str) -> None: ...
+    @classmethod
+    def load(cls, sc: SparkContext, path: str) -> LinearRegressionModel: ...
+
+class LinearRegressionWithSGD:
+    @classmethod
+    def train(
+        cls,
+        data: RDD[LabeledPoint],
+        iterations: int = ...,
+        step: float = ...,
+        miniBatchFraction: float = ...,
+        initialWeights: Optional[VectorLike] = ...,
+        regParam: float = ...,
+        regType: Optional[str] = ...,
+        intercept: bool = ...,
+        validateData: bool = ...,
+        convergenceTol: float = ...,
+    ) -> LinearRegressionModel: ...
+
+class LassoModel(LinearRegressionModelBase):
+    def save(self, sc: SparkContext, path: str) -> None: ...
+    @classmethod
+    def load(cls, sc: SparkContext, path: str) -> LassoModel: ...
+
+class LassoWithSGD:
+    @classmethod
+    def train(
+        cls,
+        data: RDD[LabeledPoint],
+        iterations: int = ...,
+        step: float = ...,
+        regParam: float = ...,
+        miniBatchFraction: float = ...,
+        initialWeights: Optional[VectorLike] = ...,
+        intercept: bool = ...,
+        validateData: bool = ...,
+        convergenceTol: float = ...,
+    ) -> LassoModel: ...
+
+class RidgeRegressionModel(LinearRegressionModelBase):
+    def save(self, sc: SparkContext, path: str) -> None: ...
+    @classmethod
+    def load(cls, sc: SparkContext, path: str) -> RidgeRegressionModel: ...
+
+class RidgeRegressionWithSGD:
+    @classmethod
+    def train(
+        cls,
+        data: RDD[LabeledPoint],
+        iterations: int = ...,
+        step: float = ...,
+        regParam: float = ...,
+        miniBatchFraction: float = ...,
+        initialWeights: Optional[VectorLike] = ...,
+        intercept: bool = ...,
+        validateData: bool = ...,
+        convergenceTol: float = ...,
+    ) -> RidgeRegressionModel: ...
+
+class IsotonicRegressionModel(Saveable, Loader[IsotonicRegressionModel]):
+    boundaries: ndarray
+    predictions: ndarray
+    isotonic: bool
+    def __init__(
+        self, boundaries: ndarray, predictions: ndarray, isotonic: bool
+    ) -> None: ...
+    @overload
+    def predict(self, x: Vector) -> ndarray: ...
+    @overload
+    def predict(self, x: RDD[Vector]) -> RDD[ndarray]: ...
+    def save(self, sc: SparkContext, path: str) -> None: ...
+    @classmethod
+    def load(cls, sc: SparkContext, path: str) -> IsotonicRegressionModel: ...
+
+class IsotonicRegression:
+    @classmethod
+    def train(
+        cls, data: RDD[VectorLike], isotonic: bool = ...
+    ) -> IsotonicRegressionModel: ...
+
+class StreamingLinearAlgorithm:
+    def __init__(self, model: LinearModel) -> None: ...
+    def latestModel(self) -> LinearModel: ...
+    def predictOn(self, dstream: DStream[VectorLike]) -> DStream[float]: ...
+    def predictOnValues(
+        self, dstream: DStream[Tuple[K, VectorLike]]
+    ) -> DStream[Tuple[K, float]]: ...
+
+class StreamingLinearRegressionWithSGD(StreamingLinearAlgorithm):
+    stepSize: float
+    numIterations: int
+    miniBatchFraction: float
+    convergenceTol: float
+    def __init__(
+        self,
+        stepSize: float = ...,
+        numIterations: int = ...,
+        miniBatchFraction: float = ...,
+        convergenceTol: float = ...,
+    ) -> None: ...
+    def setInitialWeights(
+        self, initialWeights: VectorLike
+    ) -> StreamingLinearRegressionWithSGD: ...
+    def trainOn(self, dstream: DStream[LabeledPoint]) -> None: ...
diff --git a/python/pyspark/mllib/stat/KernelDensity.pyi b/python/pyspark/mllib/stat/KernelDensity.pyi
new file mode 100644
index 0000000000000..efc70c9470dbe
--- /dev/null
+++ b/python/pyspark/mllib/stat/KernelDensity.pyi
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Iterable
+from pyspark.rdd import RDD
+from numpy import ndarray  # type: ignore[import]
+
+class KernelDensity:
+    def __init__(self) -> None: ...
+    def setBandwidth(self, bandwidth: float) -> None: ...
+    def setSample(self, sample: RDD[float]) -> None: ...
+    def estimate(self, points: Iterable[float]) -> ndarray: ...
diff --git a/python/pyspark/mllib/stat/__init__.pyi b/python/pyspark/mllib/stat/__init__.pyi
new file mode 100644
index 0000000000000..bdd080a08cd56
--- /dev/null
+++ b/python/pyspark/mllib/stat/__init__.pyi
@@ -0,0 +1,29 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyspark.mllib.stat.KernelDensity import (  # noqa: F401
+    KernelDensity as KernelDensity,
+)
+from pyspark.mllib.stat._statistics import (  # noqa: F401
+    MultivariateStatisticalSummary as MultivariateStatisticalSummary,
+    Statistics as Statistics,
+)
+from pyspark.mllib.stat.distribution import (  # noqa: F401
+    MultivariateGaussian as MultivariateGaussian,
+)
+from pyspark.mllib.stat.test import ChiSqTestResult as ChiSqTestResult  # noqa: F401
diff --git a/python/pyspark/mllib/stat/_statistics.pyi b/python/pyspark/mllib/stat/_statistics.pyi
new file mode 100644
index 0000000000000..4d2701d486881
--- /dev/null
+++ b/python/pyspark/mllib/stat/_statistics.pyi
@@ -0,0 +1,69 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import List, Optional, overload, Union
+from typing_extensions import Literal
+
+from numpy import ndarray  # type: ignore[import]
+
+from pyspark.mllib.common import JavaModelWrapper
+from pyspark.mllib.linalg import Vector, Matrix
+from pyspark.mllib.regression import LabeledPoint
+from pyspark.mllib.stat.test import ChiSqTestResult, KolmogorovSmirnovTestResult
+from pyspark.rdd import RDD
+
+CorrelationMethod = Union[Literal["spearman"], Literal["pearson"]]
+
+class MultivariateStatisticalSummary(JavaModelWrapper):
+    def mean(self) -> ndarray: ...
+    def variance(self) -> ndarray: ...
+    def count(self) -> int: ...
+    def numNonzeros(self) -> ndarray: ...
+    def max(self) -> ndarray: ...
+    def min(self) -> ndarray: ...
+    def normL1(self) -> ndarray: ...
+    def normL2(self) -> ndarray: ...
+
+class Statistics:
+    @staticmethod
+    def colStats(rdd: RDD[Vector]) -> MultivariateStatisticalSummary: ...
+    @overload
+    @staticmethod
+    def corr(
+        x: RDD[Vector], *, method: Optional[CorrelationMethod] = ...
+    ) -> Matrix: ...
+    @overload
+    @staticmethod
+    def corr(
+        x: RDD[float], y: RDD[float], method: Optional[CorrelationMethod] = ...
+    ) -> float: ...
+    @overload
+    @staticmethod
+    def chiSqTest(observed: Matrix) -> ChiSqTestResult: ...
+    @overload
+    @staticmethod
+    def chiSqTest(
+        observed: Vector, expected: Optional[Vector] = ...
+    ) -> ChiSqTestResult: ...
+    @overload
+    @staticmethod
+    def chiSqTest(observed: RDD[LabeledPoint]) -> List[ChiSqTestResult]: ...
+    @staticmethod
+    def kolmogorovSmirnovTest(
+        data, distName: Literal["norm"] = ..., *params: float
+    ) -> KolmogorovSmirnovTestResult: ...
diff --git a/python/pyspark/mllib/stat/distribution.pyi b/python/pyspark/mllib/stat/distribution.pyi
new file mode 100644
index 0000000000000..8bb93f91b07b5
--- /dev/null
+++ b/python/pyspark/mllib/stat/distribution.pyi
@@ -0,0 +1,25 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import NamedTuple
+
+from pyspark.mllib.linalg import Vector, Matrix
+
+class MultivariateGaussian(NamedTuple):
+    mu: Vector
+    sigma: Matrix
diff --git a/python/pyspark/mllib/stat/test.pyi b/python/pyspark/mllib/stat/test.pyi
new file mode 100644
index 0000000000000..a65f8e40e87d8
--- /dev/null
+++ b/python/pyspark/mllib/stat/test.pyi
@@ -0,0 +1,39 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Generic, Tuple, TypeVar
+
+from pyspark.mllib.common import JavaModelWrapper
+
+DF = TypeVar("DF", int, float, Tuple[int, ...], Tuple[float, ...])
+
+class TestResult(JavaModelWrapper, Generic[DF]):
+    @property
+    def pValue(self) -> float: ...
+    @property
+    def degreesOfFreedom(self) -> DF: ...
+    @property
+    def statistic(self) -> float: ...
+    @property
+    def nullHypothesis(self) -> str: ...
+
+class ChiSqTestResult(TestResult[int]):
+    @property
+    def method(self) -> str: ...
+
+class KolmogorovSmirnovTestResult(TestResult[int]): ...
diff --git a/python/pyspark/mllib/tests/test_algorithms.py b/python/pyspark/mllib/tests/test_algorithms.py
index 27a340068a52a..89d09fae5cfbc 100644
--- a/python/pyspark/mllib/tests/test_algorithms.py
+++ b/python/pyspark/mllib/tests/test_algorithms.py
@@ -295,7 +295,7 @@ def test_fpgrowth(self):
     from pyspark.mllib.tests.test_algorithms import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/mllib/tests/test_feature.py b/python/pyspark/mllib/tests/test_feature.py
index 165c1466ddfa8..7fba83b3ea35f 100644
--- a/python/pyspark/mllib/tests/test_feature.py
+++ b/python/pyspark/mllib/tests/test_feature.py
@@ -185,7 +185,7 @@ def test_pca(self):
     from pyspark.mllib.tests.test_feature import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/mllib/tests/test_linalg.py b/python/pyspark/mllib/tests/test_linalg.py
index 0e25836599307..a8303ba4341f3 100644
--- a/python/pyspark/mllib/tests/test_linalg.py
+++ b/python/pyspark/mllib/tests/test_linalg.py
@@ -22,8 +22,10 @@
 
 import pyspark.ml.linalg as newlinalg
 from pyspark.serializers import PickleSerializer
-from pyspark.mllib.linalg import Vector, SparseVector, DenseVector, VectorUDT, _convert_to_vector, \
+from pyspark.mllib.linalg import (  # type: ignore[attr-defined]
+    Vector, SparseVector, DenseVector, VectorUDT, _convert_to_vector,
     DenseMatrix, SparseMatrix, Vectors, Matrices, MatrixUDT
+)
 from pyspark.mllib.linalg.distributed import RowMatrix, IndexedRowMatrix
 from pyspark.mllib.regression import LabeledPoint
 from pyspark.sql import Row
@@ -641,7 +643,7 @@ def test_regression(self):
     from pyspark.mllib.tests.test_linalg import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/mllib/tests/test_stat.py b/python/pyspark/mllib/tests/test_stat.py
index 6ed0589387a46..414106fe51cc8 100644
--- a/python/pyspark/mllib/tests/test_stat.py
+++ b/python/pyspark/mllib/tests/test_stat.py
@@ -180,7 +180,7 @@ def test_R_implementation_equivalence(self):
     from pyspark.mllib.tests.test_stat import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/mllib/tests/test_streaming_algorithms.py b/python/pyspark/mllib/tests/test_streaming_algorithms.py
index 666f6f4d8628b..b94fb2778d88d 100644
--- a/python/pyspark/mllib/tests/test_streaming_algorithms.py
+++ b/python/pyspark/mllib/tests/test_streaming_algorithms.py
@@ -469,7 +469,7 @@ def condition():
     from pyspark.mllib.tests.test_streaming_algorithms import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/mllib/tests/test_util.py b/python/pyspark/mllib/tests/test_util.py
index 12578e417bcdf..2be3f17069fd4 100644
--- a/python/pyspark/mllib/tests/test_util.py
+++ b/python/pyspark/mllib/tests/test_util.py
@@ -19,7 +19,7 @@
 import tempfile
 import unittest
 
-from pyspark.mllib.common import _to_java_object_rdd
+from pyspark.mllib.common import _to_java_object_rdd  # type: ignore[attr-defined]
 from pyspark.mllib.util import LinearDataGenerator
 from pyspark.mllib.util import MLUtils
 from pyspark.mllib.linalg import SparseVector, DenseVector, Vectors
@@ -97,7 +97,7 @@ def test_to_java_object_rdd(self):  # SPARK-6660
     from pyspark.mllib.tests.test_util import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/mllib/tree.pyi b/python/pyspark/mllib/tree.pyi
new file mode 100644
index 0000000000000..511afdeb063d9
--- /dev/null
+++ b/python/pyspark/mllib/tree.pyi
@@ -0,0 +1,126 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import overload
+from typing import Dict, Optional, Tuple
+from pyspark.mllib._typing import VectorLike
+from pyspark.rdd import RDD
+from pyspark.mllib.common import JavaModelWrapper
+from pyspark.mllib.regression import LabeledPoint
+from pyspark.mllib.util import JavaLoader, JavaSaveable
+
+class TreeEnsembleModel(JavaModelWrapper, JavaSaveable):
+    @overload
+    def predict(self, x: VectorLike) -> float: ...
+    @overload
+    def predict(self, x: RDD[VectorLike]) -> RDD[VectorLike]: ...
+    def numTrees(self) -> int: ...
+    def totalNumNodes(self) -> int: ...
+    def toDebugString(self) -> str: ...
+
+class DecisionTreeModel(JavaModelWrapper, JavaSaveable, JavaLoader[DecisionTreeModel]):
+    @overload
+    def predict(self, x: VectorLike) -> float: ...
+    @overload
+    def predict(self, x: RDD[VectorLike]) -> RDD[VectorLike]: ...
+    def numNodes(self) -> int: ...
+    def depth(self) -> int: ...
+    def toDebugString(self) -> str: ...
+
+class DecisionTree:
+    @classmethod
+    def trainClassifier(
+        cls,
+        data: RDD[LabeledPoint],
+        numClasses: int,
+        categoricalFeaturesInfo: Dict[int, int],
+        impurity: str = ...,
+        maxDepth: int = ...,
+        maxBins: int = ...,
+        minInstancesPerNode: int = ...,
+        minInfoGain: float = ...,
+    ) -> DecisionTreeModel: ...
+    @classmethod
+    def trainRegressor(
+        cls,
+        data: RDD[LabeledPoint],
+        categoricalFeaturesInfo: Dict[int, int],
+        impurity: str = ...,
+        maxDepth: int = ...,
+        maxBins: int = ...,
+        minInstancesPerNode: int = ...,
+        minInfoGain: float = ...,
+    ) -> DecisionTreeModel: ...
+
+class RandomForestModel(TreeEnsembleModel, JavaLoader[RandomForestModel]): ...
+
+class RandomForest:
+    supportedFeatureSubsetStrategies: Tuple[str, ...]
+    @classmethod
+    def trainClassifier(
+        cls,
+        data: RDD[LabeledPoint],
+        numClasses: int,
+        categoricalFeaturesInfo: Dict[int, int],
+        numTrees: int,
+        featureSubsetStrategy: str = ...,
+        impurity: str = ...,
+        maxDepth: int = ...,
+        maxBins: int = ...,
+        seed: Optional[int] = ...,
+    ) -> RandomForestModel: ...
+    @classmethod
+    def trainRegressor(
+        cls,
+        data: RDD[LabeledPoint],
+        categoricalFeaturesInfo: Dict[int, int],
+        numTrees: int,
+        featureSubsetStrategy: str = ...,
+        impurity: str = ...,
+        maxDepth: int = ...,
+        maxBins: int = ...,
+        seed: Optional[int] = ...,
+    ) -> RandomForestModel: ...
+
+class GradientBoostedTreesModel(
+    TreeEnsembleModel, JavaLoader[GradientBoostedTreesModel]
+): ...
+
+class GradientBoostedTrees:
+    @classmethod
+    def trainClassifier(
+        cls,
+        data: RDD[LabeledPoint],
+        categoricalFeaturesInfo: Dict[int, int],
+        loss: str = ...,
+        numIterations: int = ...,
+        learningRate: float = ...,
+        maxDepth: int = ...,
+        maxBins: int = ...,
+    ) -> GradientBoostedTreesModel: ...
+    @classmethod
+    def trainRegressor(
+        cls,
+        data: RDD[LabeledPoint],
+        categoricalFeaturesInfo: Dict[int, int],
+        loss: str = ...,
+        numIterations: int = ...,
+        learningRate: float = ...,
+        maxDepth: int = ...,
+        maxBins: int = ...,
+    ) -> GradientBoostedTreesModel: ...
diff --git a/python/pyspark/mllib/util.pyi b/python/pyspark/mllib/util.pyi
new file mode 100644
index 0000000000000..265f765ee263a
--- /dev/null
+++ b/python/pyspark/mllib/util.pyi
@@ -0,0 +1,90 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Generic, List, Optional, TypeVar
+
+from pyspark.mllib._typing import VectorLike
+from pyspark.context import SparkContext
+from pyspark.mllib.linalg import Vector
+from pyspark.mllib.regression import LabeledPoint
+from pyspark.rdd import RDD
+from pyspark.sql.dataframe import DataFrame
+
+T = TypeVar("T")
+
+class MLUtils:
+    @staticmethod
+    def loadLibSVMFile(
+        sc: SparkContext,
+        path: str,
+        numFeatures: int = ...,
+        minPartitions: Optional[int] = ...,
+    ) -> RDD[LabeledPoint]: ...
+    @staticmethod
+    def saveAsLibSVMFile(data: RDD[LabeledPoint], dir: str) -> None: ...
+    @staticmethod
+    def loadLabeledPoints(
+        sc: SparkContext, path: str, minPartitions: Optional[int] = ...
+    ) -> RDD[LabeledPoint]: ...
+    @staticmethod
+    def appendBias(data: Vector) -> Vector: ...
+    @staticmethod
+    def loadVectors(sc: SparkContext, path: str) -> RDD[Vector]: ...
+    @staticmethod
+    def convertVectorColumnsToML(dataset: DataFrame, *cols: str) -> DataFrame: ...
+    @staticmethod
+    def convertVectorColumnsFromML(dataset: DataFrame, *cols: str) -> DataFrame: ...
+    @staticmethod
+    def convertMatrixColumnsToML(dataset: DataFrame, *cols: str) -> DataFrame: ...
+    @staticmethod
+    def convertMatrixColumnsFromML(dataset: DataFrame, *cols: str) -> DataFrame: ...
+
+class Saveable:
+    def save(self, sc: SparkContext, path: str) -> None: ...
+
+class JavaSaveable(Saveable):
+    def save(self, sc: SparkContext, path: str) -> None: ...
+
+class Loader(Generic[T]):
+    @classmethod
+    def load(cls, sc: SparkContext, path: str) -> T: ...
+
+class JavaLoader(Loader[T]):
+    @classmethod
+    def load(cls, sc: SparkContext, path: str) -> T: ...
+
+class LinearDataGenerator:
+    @staticmethod
+    def generateLinearInput(
+        intercept: float,
+        weights: VectorLike,
+        xMean: VectorLike,
+        xVariance: VectorLike,
+        nPoints: int,
+        seed: int,
+        eps: float,
+    ) -> List[LabeledPoint]: ...
+    @staticmethod
+    def generateLinearRDD(
+        sc: SparkContext,
+        nexamples: int,
+        nfeatures: int,
+        eps: float,
+        nParts: int = ...,
+        intercept: float = ...,
+    ) -> RDD[LabeledPoint]: ...
diff --git a/python/pyspark/profiler.pyi b/python/pyspark/profiler.pyi
new file mode 100644
index 0000000000000..7276da529fa17
--- /dev/null
+++ b/python/pyspark/profiler.pyi
@@ -0,0 +1,56 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any, Callable, List, Optional, Tuple, Type
+
+import pstats
+
+from pyspark.accumulators import AccumulatorParam
+from pyspark.context import SparkContext
+
+class ProfilerCollector:
+    profiler_cls: Type[Profiler]
+    profile_dump_path: Optional[str]
+    profilers: List[Tuple[int, Profiler, bool]]
+    def __init__(
+        self, profiler_cls: Type[Profiler], dump_path: Optional[str] = ...
+    ) -> None: ...
+    def new_profiler(self, ctx: SparkContext) -> Profiler: ...
+    def add_profiler(self, id: int, profiler: Profiler) -> None: ...
+    def dump_profiles(self, path: str) -> None: ...
+    def show_profiles(self) -> None: ...
+
+class Profiler:
+    def __init__(self, ctx: SparkContext) -> None: ...
+    def profile(self, func: Callable[[], Any]) -> None: ...
+    def stats(self) -> pstats.Stats: ...
+    def show(self, id: int) -> None: ...
+    def dump(self, id: int, path: str) -> None: ...
+
+class PStatsParam(AccumulatorParam):
+    @staticmethod
+    def zero(value: pstats.Stats) -> None: ...
+    @staticmethod
+    def addInPlace(
+        value1: Optional[pstats.Stats], value2: Optional[pstats.Stats]
+    ) -> Optional[pstats.Stats]: ...
+
+class BasicProfiler(Profiler):
+    def __init__(self, ctx: SparkContext) -> None: ...
+    def profile(self, func: Callable[[], Any]) -> None: ...
+    def stats(self) -> pstats.Stats: ...
diff --git a/python/pyspark/py.typed b/python/pyspark/py.typed
new file mode 100644
index 0000000000000..b648ac9233330
--- /dev/null
+++ b/python/pyspark/py.typed
@@ -0,0 +1 @@
+partial
diff --git a/python/pyspark/rdd.pyi b/python/pyspark/rdd.pyi
new file mode 100644
index 0000000000000..35c49e952b0cd
--- /dev/null
+++ b/python/pyspark/rdd.pyi
@@ -0,0 +1,479 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import overload
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Generic,
+    Hashable,
+    Iterable,
+    Iterator,
+    List,
+    Optional,
+    Tuple,
+    Union,
+    TypeVar,
+)
+from typing_extensions import Literal
+
+from numpy import int32, int64, float32, float64, ndarray  # type: ignore[import]
+
+from pyspark._typing import SupportsOrdering
+from pyspark.sql.pandas._typing import (
+    PandasScalarUDFType,
+    PandasScalarIterUDFType,
+    PandasGroupedMapUDFType,
+    PandasCogroupedMapUDFType,
+    PandasGroupedAggUDFType,
+    PandasMapIterUDFType,
+)
+import pyspark.context
+from pyspark.resultiterable import ResultIterable
+from pyspark.serializers import Serializer
+from pyspark.storagelevel import StorageLevel
+from pyspark.resource.requests import (  # noqa: F401
+    ExecutorResourceRequests,
+    TaskResourceRequests,
+)
+from pyspark.resource.profile import ResourceProfile
+from pyspark.statcounter import StatCounter
+from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.types import StructType
+from pyspark.sql._typing import RowLike
+from py4j.java_gateway import JavaObject  # type: ignore[import]
+
+T = TypeVar("T")
+U = TypeVar("U")
+K = TypeVar("K", bound=Hashable)
+V = TypeVar("V")
+V1 = TypeVar("V1")
+V2 = TypeVar("V2")
+V3 = TypeVar("V3")
+O = TypeVar("O", bound=SupportsOrdering)
+NumberOrArray = TypeVar(
+    "NumberOrArray", float, int, complex, int32, int64, float32, float64, ndarray
+)
+
+def portable_hash(x: Hashable) -> int: ...
+
+class PythonEvalType:
+    NON_UDF: Literal[0]
+    SQL_BATCHED_UDF: Literal[100]
+    SQL_SCALAR_PANDAS_UDF: PandasScalarUDFType
+    SQL_GROUPED_MAP_PANDAS_UDF: PandasGroupedMapUDFType
+    SQL_GROUPED_AGG_PANDAS_UDF: PandasGroupedAggUDFType
+    SQL_WINDOW_AGG_PANDAS_UDF: Literal[203]
+    SQL_SCALAR_PANDAS_ITER_UDF: PandasScalarIterUDFType
+    SQL_MAP_PANDAS_ITER_UDF: PandasMapIterUDFType
+    SQL_COGROUPED_MAP_PANDAS_UDF: PandasCogroupedMapUDFType
+
+class BoundedFloat(float):
+    def __new__(cls, mean: float, confidence: float, low: float, high: float): ...
+
+class Partitioner:
+    numPartitions: int
+    partitionFunc: Callable[[Any], int]
+    def __init__(self, numPartitions, partitionFunc) -> None: ...
+    def __eq__(self, other: Any) -> bool: ...
+    def __call__(self, k: Any) -> int: ...
+
+class RDD(Generic[T]):
+    is_cached: bool
+    is_checkpointed: bool
+    ctx: pyspark.context.SparkContext
+    partitioner: Optional[Partitioner]
+    def __init__(
+        self,
+        jrdd: JavaObject,
+        ctx: pyspark.context.SparkContext,
+        jrdd_deserializer: Serializer = ...,
+    ) -> None: ...
+    def id(self) -> int: ...
+    def __getnewargs__(self) -> Any: ...
+    @property
+    def context(self) -> pyspark.context.SparkContext: ...
+    def cache(self) -> RDD[T]: ...
+    def persist(self, storageLevel: StorageLevel = ...) -> RDD[T]: ...
+    def unpersist(self, blocking: bool = ...) -> RDD[T]: ...
+    def checkpoint(self) -> None: ...
+    def isCheckpointed(self) -> bool: ...
+    def localCheckpoint(self) -> None: ...
+    def isLocallyCheckpointed(self) -> bool: ...
+    def getCheckpointFile(self) -> Optional[str]: ...
+    def map(self, f: Callable[[T], U], preservesPartitioning: bool = ...) -> RDD[U]: ...
+    def flatMap(
+        self, f: Callable[[T], Iterable[U]], preservesPartitioning: bool = ...
+    ) -> RDD[U]: ...
+    def mapPartitions(
+        self, f: Callable[[Iterable[T]], Iterable[U]], preservesPartitioning: bool = ...
+    ) -> RDD[U]: ...
+    def mapPartitionsWithIndex(
+        self,
+        f: Callable[[int, Iterable[T]], Iterable[U]],
+        preservesPartitioning: bool = ...,
+    ) -> RDD[U]: ...
+    def mapPartitionsWithSplit(
+        self,
+        f: Callable[[int, Iterable[T]], Iterable[U]],
+        preservesPartitioning: bool = ...,
+    ) -> RDD[U]: ...
+    def getNumPartitions(self) -> int: ...
+    def filter(self, f: Callable[[T], bool]) -> RDD[T]: ...
+    def distinct(self, numPartitions: Optional[int] = ...) -> RDD[T]: ...
+    def sample(
+        self, withReplacement: bool, fraction: float, seed: Optional[int] = ...
+    ) -> RDD[T]: ...
+    def randomSplit(
+        self, weights: List[Union[int, float]], seed: Optional[int] = ...
+    ) -> List[RDD[T]]: ...
+    def takeSample(
+        self, withReplacement: bool, num: int, seed: Optional[int] = ...
+    ) -> List[T]: ...
+    def union(self, other: RDD[U]) -> RDD[Union[T, U]]: ...
+    def intersection(self, other: RDD[T]) -> RDD[T]: ...
+    def __add__(self, other: RDD[T]) -> RDD[T]: ...
+    @overload
+    def repartitionAndSortWithinPartitions(
+        self: RDD[Tuple[O, V]],
+        numPartitions: Optional[int] = ...,
+        partitionFunc: Callable[[O], int] = ...,
+        ascending: bool = ...,
+    ) -> RDD[Tuple[O, V]]: ...
+    @overload
+    def repartitionAndSortWithinPartitions(
+        self: RDD[Tuple[K, V]],
+        numPartitions: Optional[int],
+        partitionFunc: Callable[[K], int],
+        ascending: bool,
+        keyfunc: Callable[[K], O],
+    ) -> RDD[Tuple[K, V]]: ...
+    @overload
+    def repartitionAndSortWithinPartitions(
+        self: RDD[Tuple[K, V]],
+        numPartitions: Optional[int] = ...,
+        partitionFunc: Callable[[K], int] = ...,
+        ascending: bool = ...,
+        *,
+        keyfunc: Callable[[K], O]
+    ) -> RDD[Tuple[K, V]]: ...
+    @overload
+    def sortByKey(
+        self: RDD[Tuple[O, V]],
+        ascending: bool = ...,
+        numPartitions: Optional[int] = ...,
+    ) -> RDD[Tuple[K, V]]: ...
+    @overload
+    def sortByKey(
+        self: RDD[Tuple[K, V]],
+        ascending: bool,
+        numPartitions: int,
+        keyfunc: Callable[[K], O],
+    ) -> RDD[Tuple[K, V]]: ...
+    @overload
+    def sortByKey(
+        self: RDD[Tuple[K, V]],
+        ascending: bool = ...,
+        numPartitions: Optional[int] = ...,
+        *,
+        keyfunc: Callable[[K], O]
+    ) -> RDD[Tuple[K, V]]: ...
+    def sortBy(
+        self: RDD[T],
+        keyfunc: Callable[[T], O],
+        ascending: bool = ...,
+        numPartitions: Optional[int] = ...,
+    ) -> RDD[T]: ...
+    def glom(self) -> RDD[List[T]]: ...
+    def cartesian(self, other: RDD[U]) -> RDD[Tuple[T, U]]: ...
+    def groupBy(
+        self,
+        f: Callable[[T], K],
+        numPartitions: Optional[int] = ...,
+        partitionFunc: Callable[[K], int] = ...,
+    ) -> RDD[Tuple[K, Iterable[T]]]: ...
+    def pipe(
+        self, command: str, env: Optional[Dict[str, str]] = ..., checkCode: bool = ...
+    ) -> RDD[str]: ...
+    def foreach(self, f: Callable[[T], None]) -> None: ...
+    def foreachPartition(self, f: Callable[[Iterable[T]], None]) -> None: ...
+    def collect(self) -> List[T]: ...
+    def collectWithJobGroup(
+        self, groupId: str, description: str, interruptOnCancel: bool = ...
+    ) -> List[T]: ...
+    def reduce(self, f: Callable[[T, T], T]) -> T: ...
+    def treeReduce(self, f: Callable[[T, T], T], depth: int = ...) -> T: ...
+    def fold(self, zeroValue: T, op: Callable[[T, T], T]) -> T: ...
+    def aggregate(
+        self, zeroValue: U, seqOp: Callable[[U, T], U], combOp: Callable[[U, U], U]
+    ) -> U: ...
+    def treeAggregate(
+        self,
+        zeroValue: U,
+        seqOp: Callable[[U, T], U],
+        combOp: Callable[[U, U], U],
+        depth: int = ...,
+    ) -> U: ...
+    @overload
+    def max(self: RDD[O]) -> O: ...
+    @overload
+    def max(self, key: Callable[[T], O]) -> T: ...
+    @overload
+    def min(self: RDD[O]) -> O: ...
+    @overload
+    def min(self, key: Callable[[T], O]) -> T: ...
+    def sum(self: RDD[NumberOrArray]) -> NumberOrArray: ...
+    def count(self) -> int: ...
+    def stats(self: RDD[NumberOrArray]) -> StatCounter: ...
+    def histogram(self, buckets: List[T]) -> Tuple[List[T], List[int]]: ...
+    def mean(self: RDD[NumberOrArray]) -> NumberOrArray: ...
+    def variance(self: RDD[NumberOrArray]) -> NumberOrArray: ...
+    def stdev(self: RDD[NumberOrArray]) -> NumberOrArray: ...
+    def sampleStdev(self: RDD[NumberOrArray]) -> NumberOrArray: ...
+    def sampleVariance(self: RDD[NumberOrArray]) -> NumberOrArray: ...
+    def countByValue(self: RDD[K]) -> Dict[K, int]: ...
+    @overload
+    def top(self: RDD[O], num: int) -> List[O]: ...
+    @overload
+    def top(self: RDD[T], num: int, key: Callable[[T], O]) -> List[T]: ...
+    @overload
+    def takeOrdered(self: RDD[O], num: int) -> List[O]: ...
+    @overload
+    def takeOrdered(self: RDD[T], num: int, key: Callable[[T], O]) -> List[T]: ...
+    def take(self, num: int) -> List[T]: ...
+    def first(self) -> T: ...
+    def isEmpty(self) -> bool: ...
+    def saveAsNewAPIHadoopDataset(
+        self: RDD[Tuple[K, V]],
+        conf: Dict[str, str],
+        keyConverter: Optional[str] = ...,
+        valueConverter: Optional[str] = ...,
+    ) -> None: ...
+    def saveAsNewAPIHadoopFile(
+        self: RDD[Tuple[K, V]],
+        path: str,
+        outputFormatClass: str,
+        keyClass: Optional[str] = ...,
+        valueClass: Optional[str] = ...,
+        keyConverter: Optional[str] = ...,
+        valueConverter: Optional[str] = ...,
+        conf: Optional[Dict[str, str]] = ...,
+    ) -> None: ...
+    def saveAsHadoopDataset(
+        self: RDD[Tuple[K, V]],
+        conf: Dict[str, str],
+        keyConverter: Optional[str] = ...,
+        valueConverter: Optional[str] = ...,
+    ) -> None: ...
+    def saveAsHadoopFile(
+        self: RDD[Tuple[K, V]],
+        path: str,
+        outputFormatClass: str,
+        keyClass: Optional[str] = ...,
+        valueClass: Optional[str] = ...,
+        keyConverter: Optional[str] = ...,
+        valueConverter: Optional[str] = ...,
+        conf: Optional[str] = ...,
+        compressionCodecClass: Optional[str] = ...,
+    ) -> None: ...
+    def saveAsSequenceFile(
+        self: RDD[Tuple[K, V]], path: str, compressionCodecClass: Optional[str] = ...
+    ) -> None: ...
+    def saveAsPickleFile(self, path: str, batchSize: int = ...) -> None: ...
+    def saveAsTextFile(
+        self, path: str, compressionCodecClass: Optional[str] = ...
+    ) -> None: ...
+    def collectAsMap(self: RDD[Tuple[K, V]]) -> Dict[K, V]: ...
+    def keys(self: RDD[Tuple[K, V]]) -> RDD[K]: ...
+    def values(self: RDD[Tuple[K, V]]) -> RDD[V]: ...
+    def reduceByKey(
+        self: RDD[Tuple[K, V]],
+        func: Callable[[V, V], V],
+        numPartitions: Optional[int] = ...,
+        partitionFunc: Callable[[K], int] = ...,
+    ) -> RDD[Tuple[K, V]]: ...
+    def reduceByKeyLocally(
+        self: RDD[Tuple[K, V]], func: Callable[[V, V], V]
+    ) -> Dict[K, V]: ...
+    def countByKey(self: RDD[Tuple[K, V]]) -> Dict[K, int]: ...
+    def join(
+        self: RDD[Tuple[K, V]],
+        other: RDD[Tuple[K, U]],
+        numPartitions: Optional[int] = ...,
+    ) -> RDD[Tuple[K, Tuple[V, U]]]: ...
+    def leftOuterJoin(
+        self: RDD[Tuple[K, V]],
+        other: RDD[Tuple[K, U]],
+        numPartitions: Optional[int] = ...,
+    ) -> RDD[Tuple[K, Tuple[V, Optional[U]]]]: ...
+    def rightOuterJoin(
+        self: RDD[Tuple[K, V]],
+        other: RDD[Tuple[K, U]],
+        numPartitions: Optional[int] = ...,
+    ) -> RDD[Tuple[K, Tuple[Optional[V], U]]]: ...
+    def fullOuterJoin(
+        self: RDD[Tuple[K, V]],
+        other: RDD[Tuple[K, U]],
+        numPartitions: Optional[int] = ...,
+    ) -> RDD[Tuple[K, Tuple[Optional[V], Optional[U]]]]: ...
+    def partitionBy(
+        self: RDD[Tuple[K, V]],
+        numPartitions: int,
+        partitionFunc: Callable[[K], int] = ...,
+    ) -> RDD[Tuple[K, V]]: ...
+    def combineByKey(
+        self: RDD[Tuple[K, V]],
+        createCombiner: Callable[[V], U],
+        mergeValue: Callable[[U, V], U],
+        mergeCombiners: Callable[[U, U], U],
+        numPartitions: Optional[int] = ...,
+        partitionFunc: Callable[[K], int] = ...,
+    ) -> RDD[Tuple[K, U]]: ...
+    def aggregateByKey(
+        self: RDD[Tuple[K, V]],
+        zeroValue: U,
+        seqFunc: Callable[[U, V], U],
+        combFunc: Callable[[U, U], U],
+        numPartitions: Optional[int] = ...,
+        partitionFunc: Callable[[K], int] = ...,
+    ) -> RDD[Tuple[K, U]]: ...
+    def foldByKey(
+        self: RDD[Tuple[K, V]],
+        zeroValue: V,
+        func: Callable[[V, V], V],
+        numPartitions: Optional[int] = ...,
+        partitionFunc: Callable[[K], int] = ...,
+    ) -> RDD[Tuple[K, V]]: ...
+    def groupByKey(
+        self: RDD[Tuple[K, V]],
+        numPartitions: Optional[int] = ...,
+        partitionFunc: Callable[[K], int] = ...,
+    ) -> RDD[Tuple[K, Iterable[V]]]: ...
+    def flatMapValues(
+        self: RDD[Tuple[K, V]], f: Callable[[V], Iterable[U]]
+    ) -> RDD[Tuple[K, U]]: ...
+    def mapValues(self: RDD[Tuple[K, V]], f: Callable[[V], U]) -> RDD[Tuple[K, U]]: ...
+    @overload
+    def groupWith(
+        self: RDD[Tuple[K, V]], __o: RDD[Tuple[K, V1]]
+    ) -> RDD[Tuple[K, Tuple[ResultIterable[V], ResultIterable[V1]]]]: ...
+    @overload
+    def groupWith(
+        self: RDD[Tuple[K, V]], __o1: RDD[Tuple[K, V1]], __o2: RDD[Tuple[K, V2]]
+    ) -> RDD[
+        Tuple[K, Tuple[ResultIterable[V], ResultIterable[V1], ResultIterable[V2]]]
+    ]: ...
+    @overload
+    def groupWith(
+        self: RDD[Tuple[K, V]],
+        other1: RDD[Tuple[K, V1]],
+        other2: RDD[Tuple[K, V2]],
+        other3: RDD[Tuple[K, V3]],
+    ) -> RDD[
+        Tuple[
+            K,
+            Tuple[
+                ResultIterable[V],
+                ResultIterable[V1],
+                ResultIterable[V2],
+                ResultIterable[V3],
+            ],
+        ]
+    ]: ...
+    def cogroup(
+        self: RDD[Tuple[K, V]],
+        other: RDD[Tuple[K, U]],
+        numPartitions: Optional[int] = ...,
+    ) -> RDD[Tuple[K, Tuple[ResultIterable[V], ResultIterable[U]]]]: ...
+    def sampleByKey(
+        self: RDD[Tuple[K, V]],
+        withReplacement: bool,
+        fractions: Dict[K, Union[float, int]],
+        seed: Optional[int] = ...,
+    ) -> RDD[Tuple[K, V]]: ...
+    def subtractByKey(
+        self: RDD[Tuple[K, V]],
+        other: RDD[Tuple[K, U]],
+        numPartitions: Optional[int] = ...,
+    ) -> RDD[Tuple[K, V]]: ...
+    def subtract(
+        self: RDD[T], other: RDD[T], numPartitions: Optional[int] = ...
+    ) -> RDD[T]: ...
+    def keyBy(self: RDD[T], f: Callable[[T], K]) -> RDD[Tuple[K, T]]: ...
+    def repartition(self, numPartitions: int) -> RDD[T]: ...
+    def coalesce(self, numPartitions: int, shuffle: bool = ...) -> RDD[T]: ...
+    def zip(self, other: RDD[U]) -> RDD[Tuple[T, U]]: ...
+    def zipWithIndex(self) -> RDD[Tuple[T, int]]: ...
+    def zipWithUniqueId(self) -> RDD[Tuple[T, int]]: ...
+    def name(self) -> str: ...
+    def setName(self, name: str) -> RDD[T]: ...
+    def toDebugString(self) -> bytes: ...
+    def getStorageLevel(self) -> StorageLevel: ...
+    def lookup(self: RDD[Tuple[K, V]], key: K) -> List[V]: ...
+    def countApprox(self, timeout: int, confidence: float = ...) -> int: ...
+    def sumApprox(
+        self: RDD[Union[float, int]], timeout: int, confidence: float = ...
+    ) -> BoundedFloat: ...
+    def meanApprox(
+        self: RDD[Union[float, int]], timeout: int, confidence: float = ...
+    ) -> BoundedFloat: ...
+    def countApproxDistinct(self, relativeSD: float = ...) -> int: ...
+    def toLocalIterator(self, prefetchPartitions: bool = ...) -> Iterator[T]: ...
+    def barrier(self: RDD[T]) -> RDDBarrier[T]: ...
+    def withResources(self: RDD[T], profile: ResourceProfile) -> RDD[T]: ...
+    def getResourceProfile(self) -> Optional[ResourceProfile]: ...
+    @overload
+    def toDF(
+        self: RDD[RowLike],
+        schema: Optional[List[str]] = ...,
+        sampleRatio: Optional[float] = ...,
+    ) -> DataFrame: ...
+    @overload
+    def toDF(self: RDD[RowLike], schema: Optional[StructType] = ...) -> DataFrame: ...
+
+class RDDBarrier(Generic[T]):
+    rdd: RDD[T]
+    def __init__(self, rdd: RDD[T]) -> None: ...
+    def mapPartitions(
+        self, f: Callable[[Iterable[T]], Iterable[U]], preservesPartitioning: bool = ...
+    ) -> RDD[U]: ...
+    def mapPartitionsWithIndex(
+        self,
+        f: Callable[[int, Iterable[T]], Iterable[U]],
+        preservesPartitioning: bool = ...,
+    ) -> RDD[U]: ...
+
+class PipelinedRDD(RDD[U], Generic[T, U]):
+    func: Callable[[T], U]
+    preservesPartitioning: bool
+    is_cached: bool
+    is_checkpointed: bool
+    ctx: pyspark.context.SparkContext
+    prev: RDD[T]
+    partitioner: Optional[Partitioner]
+    is_barrier: bool
+    def __init__(
+        self,
+        prev: RDD[T],
+        func: Callable[[Iterable[T]], Iterable[U]],
+        preservesPartitioning: bool = ...,
+        isFromBarrier: bool = ...,
+    ) -> None: ...
+    def getNumPartitions(self) -> int: ...
+    def id(self) -> int: ...
diff --git a/python/pyspark/rddsampler.pyi b/python/pyspark/rddsampler.pyi
new file mode 100644
index 0000000000000..8fbf72d90025c
--- /dev/null
+++ b/python/pyspark/rddsampler.pyi
@@ -0,0 +1,54 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any, Dict, Iterator, Optional, Tuple, TypeVar
+
+T = TypeVar("T")
+U = TypeVar("U")
+K = TypeVar("K")
+V = TypeVar("V")
+
+class RDDSamplerBase:
+    def __init__(self, withReplacement: bool, seed: Optional[int] = ...) -> None: ...
+    def initRandomGenerator(self, split: int) -> None: ...
+    def getUniformSample(self) -> float: ...
+    def getPoissonSample(self, mean: float) -> int: ...
+    def func(self, split: int, iterator: Iterator[Any]) -> Iterator[Any]: ...
+
+class RDDSampler(RDDSamplerBase):
+    def __init__(
+        self, withReplacement: bool, fraction: float, seed: Optional[int] = ...
+    ) -> None: ...
+    def func(self, split: int, iterator: Iterator[T]) -> Iterator[T]: ...
+
+class RDDRangeSampler(RDDSamplerBase):
+    def __init__(
+        self, lowerBound: T, upperBound: T, seed: Optional[Any] = ...
+    ) -> None: ...
+    def func(self, split: int, iterator: Iterator[T]) -> Iterator[T]: ...
+
+class RDDStratifiedSampler(RDDSamplerBase):
+    def __init__(
+        self,
+        withReplacement: bool,
+        fractions: Dict[K, float],
+        seed: Optional[int] = ...,
+    ) -> None: ...
+    def func(
+        self, split: int, iterator: Iterator[Tuple[K, V]]
+    ) -> Iterator[Tuple[K, V]]: ...
diff --git a/python/pyspark/resource/__init__.pyi b/python/pyspark/resource/__init__.pyi
new file mode 100644
index 0000000000000..87a9b53c268ac
--- /dev/null
+++ b/python/pyspark/resource/__init__.pyi
@@ -0,0 +1,31 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyspark.resource.information import (  # noqa: F401
+    ResourceInformation as ResourceInformation,
+)
+from pyspark.resource.profile import (  # noqa: F401
+    ResourceProfile as ResourceProfile,
+    ResourceProfileBuilder as ResourceProfileBuilder,
+)
+from pyspark.resource.requests import (  # noqa: F401
+    ExecutorResourceRequest as ExecutorResourceRequest,
+    ExecutorResourceRequests as ExecutorResourceRequests,
+    TaskResourceRequest as TaskResourceRequest,
+    TaskResourceRequests as TaskResourceRequests,
+)
diff --git a/python/pyspark/resource/information.pyi b/python/pyspark/resource/information.pyi
new file mode 100644
index 0000000000000..7baa6ca8520bd
--- /dev/null
+++ b/python/pyspark/resource/information.pyi
@@ -0,0 +1,26 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any
+
+class ResourceInformation:
+    def __init__(self, name: Any, addresses: Any) -> None: ...
+    @property
+    def name(self): ...
+    @property
+    def addresses(self): ...
diff --git a/python/pyspark/resource/profile.pyi b/python/pyspark/resource/profile.pyi
new file mode 100644
index 0000000000000..8ce7d93b29e93
--- /dev/null
+++ b/python/pyspark/resource/profile.pyi
@@ -0,0 +1,51 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyspark.resource.requests import (  # noqa: F401
+    ExecutorResourceRequest as ExecutorResourceRequest,
+    ExecutorResourceRequests as ExecutorResourceRequests,
+    TaskResourceRequest as TaskResourceRequest,
+    TaskResourceRequests as TaskResourceRequests,
+)
+from typing import Any, Optional
+
+class ResourceProfile:
+    def __init__(
+        self,
+        _java_resource_profile: Optional[Any] = ...,
+        _exec_req: Any = ...,
+        _task_req: Any = ...,
+    ) -> None: ...
+    @property
+    def id(self): ...
+    @property
+    def taskResources(self): ...
+    @property
+    def executorResources(self): ...
+
+class ResourceProfileBuilder:
+    def __init__(self) -> None: ...
+    def require(self, resourceRequest: Any): ...
+    def clearExecutorResourceRequests(self) -> None: ...
+    def clearTaskResourceRequests(self) -> None: ...
+    @property
+    def taskResources(self): ...
+    @property
+    def executorResources(self): ...
+    @property
+    def build(self): ...
diff --git a/python/pyspark/resource/requests.pyi b/python/pyspark/resource/requests.pyi
new file mode 100644
index 0000000000000..f9448d0780409
--- /dev/null
+++ b/python/pyspark/resource/requests.pyi
@@ -0,0 +1,71 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any, Optional
+
+class ExecutorResourceRequest:
+    def __init__(
+        self,
+        resourceName: Any,
+        amount: Any,
+        discoveryScript: str = ...,
+        vendor: str = ...,
+    ) -> None: ...
+    @property
+    def resourceName(self): ...
+    @property
+    def amount(self): ...
+    @property
+    def discoveryScript(self): ...
+    @property
+    def vendor(self): ...
+
+class ExecutorResourceRequests:
+    def __init__(
+        self, _jvm: Optional[Any] = ..., _requests: Optional[Any] = ...
+    ) -> None: ...
+    def memory(self, amount: Any): ...
+    def memoryOverhead(self, amount: Any): ...
+    def pysparkMemory(self, amount: Any): ...
+    def offheapMemory(self, amount: Any): ...
+    def cores(self, amount: Any): ...
+    def resource(
+        self,
+        resourceName: Any,
+        amount: Any,
+        discoveryScript: str = ...,
+        vendor: str = ...,
+    ): ...
+    @property
+    def requests(self): ...
+
+class TaskResourceRequest:
+    def __init__(self, resourceName: Any, amount: Any) -> None: ...
+    @property
+    def resourceName(self): ...
+    @property
+    def amount(self): ...
+
+class TaskResourceRequests:
+    def __init__(
+        self, _jvm: Optional[Any] = ..., _requests: Optional[Any] = ...
+    ) -> None: ...
+    def cpus(self, amount: Any): ...
+    def resource(self, resourceName: Any, amount: Any): ...
+    @property
+    def requests(self): ...
diff --git a/python/pyspark/resource/tests/test_resources.py b/python/pyspark/resource/tests/test_resources.py
index c2b574c61abc5..6149f1ff7205a 100644
--- a/python/pyspark/resource/tests/test_resources.py
+++ b/python/pyspark/resource/tests/test_resources.py
@@ -75,7 +75,7 @@ def assert_request_contents(exec_reqs, task_reqs):
     from pyspark.resource.tests.test_resources import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/resultiterable.pyi b/python/pyspark/resultiterable.pyi
new file mode 100644
index 0000000000000..69596ad82c8cc
--- /dev/null
+++ b/python/pyspark/resultiterable.pyi
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyspark._typing import SizedIterable
+from typing import Iterator, TypeVar
+
+T = TypeVar("T")
+
+class ResultIterable(SizedIterable[T]):
+    data: SizedIterable[T]
+    index: int
+    maxindex: int
+    def __init__(self, data: SizedIterable[T]) -> None: ...
+    def __iter__(self) -> Iterator[T]: ...
+    def __len__(self) -> int: ...
diff --git a/python/pyspark/serializers.pyi b/python/pyspark/serializers.pyi
new file mode 100644
index 0000000000000..26ef17c38d227
--- /dev/null
+++ b/python/pyspark/serializers.pyi
@@ -0,0 +1,122 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any
+
+class SpecialLengths:
+    END_OF_DATA_SECTION: int = ...
+    PYTHON_EXCEPTION_THROWN: int = ...
+    TIMING_DATA: int = ...
+    END_OF_STREAM: int = ...
+    NULL: int = ...
+    START_ARROW_STREAM: int = ...
+
+class Serializer:
+    def dump_stream(self, iterator: Any, stream: Any) -> None: ...
+    def load_stream(self, stream: Any) -> None: ...
+    def __eq__(self, other: Any) -> Any: ...
+    def __ne__(self, other: Any) -> Any: ...
+    def __hash__(self) -> Any: ...
+
+class FramedSerializer(Serializer):
+    def __init__(self) -> None: ...
+    def dump_stream(self, iterator: Any, stream: Any) -> None: ...
+    def load_stream(self, stream: Any) -> None: ...
+    def dumps(self, obj: Any) -> None: ...
+    def loads(self, obj: Any) -> None: ...
+
+class BatchedSerializer(Serializer):
+    UNLIMITED_BATCH_SIZE: int = ...
+    UNKNOWN_BATCH_SIZE: int = ...
+    serializer: Any = ...
+    batchSize: Any = ...
+    def __init__(self, serializer: Any, batchSize: Any = ...) -> None: ...
+    def dump_stream(self, iterator: Any, stream: Any) -> None: ...
+    def load_stream(self, stream: Any): ...
+
+class FlattenedValuesSerializer(BatchedSerializer):
+    def __init__(self, serializer: Any, batchSize: int = ...) -> None: ...
+    def load_stream(self, stream: Any): ...
+
+class AutoBatchedSerializer(BatchedSerializer):
+    bestSize: Any = ...
+    def __init__(self, serializer: Any, bestSize: Any = ...) -> None: ...
+    def dump_stream(self, iterator: Any, stream: Any) -> None: ...
+
+class CartesianDeserializer(Serializer):
+    key_ser: Any = ...
+    val_ser: Any = ...
+    def __init__(self, key_ser: Any, val_ser: Any) -> None: ...
+    def load_stream(self, stream: Any): ...
+
+class PairDeserializer(Serializer):
+    key_ser: Any = ...
+    val_ser: Any = ...
+    def __init__(self, key_ser: Any, val_ser: Any) -> None: ...
+    def load_stream(self, stream: Any): ...
+
+class NoOpSerializer(FramedSerializer):
+    def loads(self, obj: Any): ...
+    def dumps(self, obj: Any): ...
+
+class PickleSerializer(FramedSerializer):
+    def dumps(self, obj: Any): ...
+    def loads(self, obj: Any, encoding: str = ...): ...
+
+class CloudPickleSerializer(PickleSerializer):
+    def dumps(self, obj: Any): ...
+
+class MarshalSerializer(FramedSerializer):
+    def dumps(self, obj: Any): ...
+    def loads(self, obj: Any): ...
+
+class AutoSerializer(FramedSerializer):
+    def __init__(self) -> None: ...
+    def dumps(self, obj: Any): ...
+    def loads(self, obj: Any): ...
+
+class CompressedSerializer(FramedSerializer):
+    serializer: Any = ...
+    def __init__(self, serializer: Any) -> None: ...
+    def dumps(self, obj: Any): ...
+    def loads(self, obj: Any): ...
+
+class UTF8Deserializer(Serializer):
+    use_unicode: Any = ...
+    def __init__(self, use_unicode: bool = ...) -> None: ...
+    def loads(self, stream: Any): ...
+    def load_stream(self, stream: Any) -> None: ...
+
+class ChunkedStream:
+    buffer_size: Any = ...
+    buffer: Any = ...
+    current_pos: int = ...
+    wrapped: Any = ...
+    def __init__(self, wrapped: Any, buffer_size: Any) -> None: ...
+    def write(self, bytes: Any) -> None: ...
+    def close(self) -> None: ...
+    @property
+    def closed(self): ...
+
+def write_with_length(obj: Any, stream: Any): ...
+def pack_long(value): ...
+def read_int(stream): ...
+def read_long(stream): ...
+def read_bool(stream): ...
+def write_int(value, stream): ...
+def write_long(value, stream): ...
diff --git a/python/pyspark/shell.pyi b/python/pyspark/shell.pyi
new file mode 100644
index 0000000000000..0760309542f8d
--- /dev/null
+++ b/python/pyspark/shell.pyi
@@ -0,0 +1,31 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyspark import SparkConf as SparkConf  # noqa: F401
+from pyspark.context import SparkContext as SparkContext
+from pyspark.sql import SQLContext as SQLContext, SparkSession as SparkSession
+from typing import Any, Callable
+
+from pyspark.sql.dataframe import DataFrame
+
+spark: SparkSession
+sc: SparkContext
+sql: Callable[[str], DataFrame]
+sqlContext: SQLContext
+sqlCtx: SQLContext
+code: Any
diff --git a/python/pyspark/shuffle.pyi b/python/pyspark/shuffle.pyi
new file mode 100644
index 0000000000000..10648c51dca8f
--- /dev/null
+++ b/python/pyspark/shuffle.pyi
@@ -0,0 +1,109 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyspark.serializers import (  # noqa: F401
+    AutoBatchedSerializer as AutoBatchedSerializer,
+    BatchedSerializer as BatchedSerializer,
+    CompressedSerializer as CompressedSerializer,
+    FlattenedValuesSerializer as FlattenedValuesSerializer,
+    PickleSerializer as PickleSerializer,
+)
+from pyspark.util import fail_on_stopiteration as fail_on_stopiteration  # noqa: F401
+from typing import Any, Optional
+
+process: Any
+
+def get_used_memory(): ...
+
+MemoryBytesSpilled: int
+DiskBytesSpilled: int
+
+class Aggregator:
+    createCombiner: Any = ...
+    mergeValue: Any = ...
+    mergeCombiners: Any = ...
+    def __init__(
+        self, createCombiner: Any, mergeValue: Any, mergeCombiners: Any
+    ) -> None: ...
+
+class SimpleAggregator(Aggregator):
+    def __init__(self, combiner: Any): ...
+
+class Merger:
+    agg: Any = ...
+    def __init__(self, aggregator: Any) -> None: ...
+    def mergeValues(self, iterator: Any) -> None: ...
+    def mergeCombiners(self, iterator: Any) -> None: ...
+    def items(self) -> None: ...
+
+class ExternalMerger(Merger):
+    MAX_TOTAL_PARTITIONS: int = ...
+    memory_limit: Any = ...
+    serializer: Any = ...
+    localdirs: Any = ...
+    partitions: Any = ...
+    batch: Any = ...
+    scale: Any = ...
+    data: Any = ...
+    pdata: Any = ...
+    spills: int = ...
+    def __init__(
+        self,
+        aggregator: Any,
+        memory_limit: int = ...,
+        serializer: Optional[Any] = ...,
+        localdirs: Optional[Any] = ...,
+        scale: int = ...,
+        partitions: int = ...,
+        batch: int = ...,
+    ) -> None: ...
+    def mergeValues(self, iterator: Any) -> None: ...
+    def mergeCombiners(self, iterator: Any, limit: Optional[Any] = ...) -> None: ...
+    def items(self): ...
+
+class ExternalSorter:
+    memory_limit: Any = ...
+    local_dirs: Any = ...
+    serializer: Any = ...
+    def __init__(self, memory_limit: Any, serializer: Optional[Any] = ...) -> None: ...
+    def sorted(self, iterator: Any, key: Optional[Any] = ..., reverse: bool = ...): ...
+
+class ExternalList:
+    LIMIT: int = ...
+    values: Any = ...
+    count: Any = ...
+    def __init__(self, values: Any) -> None: ...
+    def __iter__(self) -> Any: ...
+    def __len__(self): ...
+    def append(self, value: Any) -> None: ...
+    def __del__(self) -> None: ...
+
+class ExternalListOfList(ExternalList):
+    count: Any = ...
+    def __init__(self, values: Any) -> None: ...
+    def append(self, value: Any) -> None: ...
+    def __iter__(self) -> Any: ...
+
+class GroupByKey:
+    iterator: Any = ...
+    def __init__(self, iterator: Any) -> None: ...
+    def __iter__(self) -> Any: ...
+
+class ExternalGroupBy(ExternalMerger):
+    SORT_KEY_LIMIT: int = ...
+    def flattened_serializer(self): ...
diff --git a/python/pyspark/sql/__init__.pyi b/python/pyspark/sql/__init__.pyi
new file mode 100644
index 0000000000000..787be5647772e
--- /dev/null
+++ b/python/pyspark/sql/__init__.pyi
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyspark.sql.catalog import Catalog as Catalog  # noqa: F401
+from pyspark.sql.column import Column as Column  # noqa: F401
+from pyspark.sql.context import (  # noqa: F401
+    HiveContext as HiveContext,
+    SQLContext as SQLContext,
+    UDFRegistration as UDFRegistration,
+)
+from pyspark.sql.dataframe import (  # noqa: F401
+    DataFrame as DataFrame,
+    DataFrameNaFunctions as DataFrameNaFunctions,
+    DataFrameStatFunctions as DataFrameStatFunctions,
+)
+from pyspark.sql.group import GroupedData as GroupedData  # noqa: F401
+from pyspark.sql.pandas.group_ops import (  # noqa: F401
+    PandasCogroupedOps as PandasCogroupedOps,
+)
+from pyspark.sql.readwriter import (  # noqa: F401
+    DataFrameReader as DataFrameReader,
+    DataFrameWriter as DataFrameWriter,
+)
+from pyspark.sql.session import SparkSession as SparkSession  # noqa: F401
+from pyspark.sql.types import Row as Row  # noqa: F401
+from pyspark.sql.window import Window as Window, WindowSpec as WindowSpec  # noqa: F401
diff --git a/python/pyspark/sql/_typing.pyi b/python/pyspark/sql/_typing.pyi
new file mode 100644
index 0000000000000..799a73204a639
--- /dev/null
+++ b/python/pyspark/sql/_typing.pyi
@@ -0,0 +1,57 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import (
+    Any,
+    List,
+    Optional,
+    Tuple,
+    TypeVar,
+    Union,
+)
+from typing_extensions import Protocol
+
+import datetime
+import decimal
+
+from pyspark._typing import PrimitiveType
+import pyspark.sql.column
+import pyspark.sql.types
+from pyspark.sql.column import Column
+
+ColumnOrName = Union[pyspark.sql.column.Column, str]
+DecimalLiteral = decimal.Decimal
+DateTimeLiteral = Union[datetime.datetime, datetime.date]
+LiteralType = PrimitiveType
+AtomicDataTypeOrString = Union[pyspark.sql.types.AtomicType, str]
+DataTypeOrString = Union[pyspark.sql.types.DataType, str]
+OptionalPrimitiveType = Optional[PrimitiveType]
+
+RowLike = TypeVar("RowLike", List[Any], Tuple[Any, ...], pyspark.sql.types.Row)
+
+class SupportsOpen(Protocol):
+    def open(self, partition_id: int, epoch_id: int) -> bool: ...
+
+class SupportsProcess(Protocol):
+    def process(self, row: pyspark.sql.types.Row) -> None: ...
+
+class SupportsClose(Protocol):
+    def close(self, error: Exception) -> None: ...
+
+class UserDefinedFunctionLike(Protocol):
+    def __call__(self, *_: ColumnOrName) -> Column: ...
diff --git a/python/pyspark/sql/avro/__init__.pyi b/python/pyspark/sql/avro/__init__.pyi
new file mode 100644
index 0000000000000..0d7871da4c100
--- /dev/null
+++ b/python/pyspark/sql/avro/__init__.pyi
@@ -0,0 +1,22 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: This dynamically typed stub was automatically generated by stubgen.
+
+# Names in __all__ with no definition:
+#   functions
diff --git a/python/pyspark/sql/avro/functions.pyi b/python/pyspark/sql/avro/functions.pyi
new file mode 100644
index 0000000000000..4c2e3814a9e94
--- /dev/null
+++ b/python/pyspark/sql/avro/functions.pyi
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Dict
+
+from pyspark.sql._typing import ColumnOrName
+from pyspark.sql.column import Column
+
+def from_avro(
+    data: ColumnOrName, jsonFormatSchema: str, options: Dict[str, str] = ...
+) -> Column: ...
+def to_avro(data: ColumnOrName, jsonFormatSchema: str = ...) -> Column: ...
diff --git a/python/pyspark/sql/catalog.pyi b/python/pyspark/sql/catalog.pyi
new file mode 100644
index 0000000000000..86263fff63ce8
--- /dev/null
+++ b/python/pyspark/sql/catalog.pyi
@@ -0,0 +1,63 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any, Callable, List, Optional
+from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.session import SparkSession
+from pyspark.sql.types import DataType, StructType
+from collections import namedtuple
+
+Database = namedtuple("Database", "name description locationUri")
+
+Table = namedtuple("Table", "name database description tableType isTemporary")
+
+Column = namedtuple("Column", "name description dataType nullable isPartition isBucket")
+
+Function = namedtuple("Function", "name description className isTemporary")
+
+class Catalog:
+    def __init__(self, sparkSession: SparkSession) -> None: ...
+    def currentDatabase(self) -> str: ...
+    def setCurrentDatabase(self, dbName: str) -> None: ...
+    def listDatabases(self) -> List[Database]: ...
+    def listTables(self, dbName: Optional[str] = ...) -> List[Table]: ...
+    def listFunctions(self, dbName: Optional[str] = ...) -> List[Function]: ...
+    def listColumns(
+        self, tableName: str, dbName: Optional[str] = ...
+    ) -> List[Column]: ...
+    def createTable(
+        self,
+        tableName: str,
+        path: Optional[str] = ...,
+        source: Optional[str] = ...,
+        schema: Optional[StructType] = ...,
+        description: Optional[str] = ...,
+        **options: str
+    ) -> DataFrame: ...
+    def dropTempView(self, viewName: str) -> None: ...
+    def dropGlobalTempView(self, viewName: str) -> None: ...
+    def registerFunction(
+        self, name: str, f: Callable[..., Any], returnType: DataType = ...
+    ) -> None: ...
+    def isCached(self, tableName: str) -> bool: ...
+    def cacheTable(self, tableName: str) -> None: ...
+    def uncacheTable(self, tableName: str) -> None: ...
+    def clearCache(self) -> None: ...
+    def refreshTable(self, tableName: str) -> None: ...
+    def recoverPartitions(self, tableName: str) -> None: ...
+    def refreshByPath(self, path: str) -> None: ...
diff --git a/python/pyspark/sql/column.pyi b/python/pyspark/sql/column.pyi
new file mode 100644
index 0000000000000..261fb6e5f3911
--- /dev/null
+++ b/python/pyspark/sql/column.pyi
@@ -0,0 +1,112 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import overload
+from typing import Any, Union
+
+from pyspark.sql._typing import LiteralType, DecimalLiteral, DateTimeLiteral
+from pyspark.sql.types import (  # noqa: F401
+    DataType,
+    StructField,
+    StructType,
+    IntegerType,
+    StringType,
+)
+from pyspark.sql.window import WindowSpec
+
+from py4j.java_gateway import JavaObject  # type: ignore[import]
+
+class Column:
+    def __init__(self, JavaObject) -> None: ...
+    def __neg__(self) -> Column: ...
+    def __add__(self, other: Union[Column, LiteralType, DecimalLiteral]) -> Column: ...
+    def __sub__(self, other: Union[Column, LiteralType, DecimalLiteral]) -> Column: ...
+    def __mul__(self, other: Union[Column, LiteralType, DecimalLiteral]) -> Column: ...
+    def __div__(self, other: Union[Column, LiteralType, DecimalLiteral]) -> Column: ...
+    def __truediv__(
+        self, other: Union[Column, LiteralType, DecimalLiteral]
+    ) -> Column: ...
+    def __mod__(self, other: Union[Column, LiteralType, DecimalLiteral]) -> Column: ...
+    def __radd__(self, other: Union[LiteralType, DecimalLiteral]) -> Column: ...
+    def __rsub__(self, other: Union[LiteralType, DecimalLiteral]) -> Column: ...
+    def __rmul__(self, other: Union[LiteralType, DecimalLiteral]) -> Column: ...
+    def __rdiv__(self, other: Union[LiteralType, DecimalLiteral]) -> Column: ...
+    def __rtruediv__(self, other: Union[LiteralType, DecimalLiteral]) -> Column: ...
+    def __rmod__(self, other: Union[bool, int, float, DecimalLiteral]) -> Column: ...
+    def __pow__(self, other: Union[Column, LiteralType, DecimalLiteral]) -> Column: ...
+    def __rpow__(self, other: Union[LiteralType, DecimalLiteral]) -> Column: ...
+    def __eq__(self, other: Union[Column, LiteralType, DateTimeLiteral, DecimalLiteral]) -> Column: ...  # type: ignore[override]
+    def __ne__(self, other: Any) -> Column: ...  # type: ignore[override]
+    def __lt__(
+        self, other: Union[Column, LiteralType, DateTimeLiteral, DecimalLiteral]
+    ) -> Column: ...
+    def __le__(
+        self, other: Union[Column, LiteralType, DateTimeLiteral, DecimalLiteral]
+    ) -> Column: ...
+    def __ge__(
+        self, other: Union[Column, LiteralType, DateTimeLiteral, DecimalLiteral]
+    ) -> Column: ...
+    def __gt__(
+        self, other: Union[Column, LiteralType, DateTimeLiteral, DecimalLiteral]
+    ) -> Column: ...
+    def eqNullSafe(
+        self, other: Union[Column, LiteralType, DecimalLiteral]
+    ) -> Column: ...
+    def __and__(self, other: Column) -> Column: ...
+    def __or__(self, other: Column) -> Column: ...
+    def __invert__(self) -> Column: ...
+    def __rand__(self, other: Column) -> Column: ...
+    def __ror__(self, other: Column) -> Column: ...
+    def __contains__(self, other: Any) -> Column: ...
+    def __getitem__(self, other: Any) -> Column: ...
+    def bitwiseOR(self, other: Union[Column, int]) -> Column: ...
+    def bitwiseAND(self, other: Union[Column, int]) -> Column: ...
+    def bitwiseXOR(self, other: Union[Column, int]) -> Column: ...
+    def getItem(self, key: Any) -> Column: ...
+    def getField(self, name: Any) -> Column: ...
+    def withField(self, fieldName: str, col: Column) -> Column: ...
+    def __getattr__(self, item: Any) -> Column: ...
+    def __iter__(self) -> None: ...
+    def rlike(self, item: str) -> Column: ...
+    def like(self, item: str) -> Column: ...
+    def startswith(self, item: Union[str, Column]) -> Column: ...
+    def endswith(self, item: Union[str, Column]) -> Column: ...
+    @overload
+    def substr(self, startPos: int, length: int) -> Column: ...
+    @overload
+    def substr(self, startPos: Column, length: Column) -> Column: ...
+    def __getslice__(self, startPos: int, length: int) -> Column: ...
+    def isin(self, *cols: Any) -> Column: ...
+    def asc(self) -> Column: ...
+    def asc_nulls_first(self) -> Column: ...
+    def asc_nulls_last(self) -> Column: ...
+    def desc(self) -> Column: ...
+    def desc_nulls_first(self) -> Column: ...
+    def desc_nulls_last(self) -> Column: ...
+    def isNull(self) -> Column: ...
+    def isNotNull(self) -> Column: ...
+    def alias(self, *alias: str, **kwargs: Any) -> Column: ...
+    def name(self, *alias: str) -> Column: ...
+    def cast(self, dataType: Union[DataType, str]) -> Column: ...
+    def astype(self, dataType: Union[DataType, str]) -> Column: ...
+    def between(self, lowerBound, upperBound) -> Column: ...
+    def when(self, condition: Column, value: Any) -> Column: ...
+    def otherwise(self, value: Any) -> Column: ...
+    def over(self, window: WindowSpec) -> Column: ...
+    def __nonzero__(self) -> None: ...
+    def __bool__(self) -> None: ...
diff --git a/python/pyspark/sql/conf.pyi b/python/pyspark/sql/conf.pyi
new file mode 100644
index 0000000000000..304dfcb3f9e53
--- /dev/null
+++ b/python/pyspark/sql/conf.pyi
@@ -0,0 +1,27 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Optional
+from py4j.java_gateway import JavaObject  # type: ignore[import]
+
+class RuntimeConfig:
+    def __init__(self, jconf: JavaObject) -> None: ...
+    def set(self, key: str, value: str) -> str: ...
+    def get(self, key: str, default: Optional[str] = ...) -> str: ...
+    def unset(self, key: str) -> None: ...
+    def isModifiable(self, key: str) -> bool: ...
diff --git a/python/pyspark/sql/context.pyi b/python/pyspark/sql/context.pyi
new file mode 100644
index 0000000000000..64927b37ac2a9
--- /dev/null
+++ b/python/pyspark/sql/context.pyi
@@ -0,0 +1,139 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import overload
+from typing import Any, Callable, Iterable, List, Optional, Tuple, TypeVar, Union
+
+from py4j.java_gateway import JavaObject  # type: ignore[import]
+
+from pyspark.sql._typing import (
+    DateTimeLiteral,
+    LiteralType,
+    DecimalLiteral,
+    RowLike,
+)
+from pyspark.sql.pandas._typing import DataFrameLike
+from pyspark.context import SparkContext
+from pyspark.rdd import RDD
+from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.session import SparkSession
+from pyspark.sql.types import AtomicType, DataType, StructType
+from pyspark.sql.udf import UDFRegistration as UDFRegistration
+from pyspark.sql.readwriter import DataFrameReader
+from pyspark.sql.streaming import DataStreamReader, StreamingQueryManager
+
+T = TypeVar("T")
+
+class SQLContext:
+    sparkSession: SparkSession
+    def __init__(
+        self,
+        sparkContext,
+        sparkSession: Optional[SparkSession] = ...,
+        jsqlContext: Optional[JavaObject] = ...,
+    ) -> None: ...
+    @classmethod
+    def getOrCreate(cls: type, sc: SparkContext) -> SQLContext: ...
+    def newSession(self) -> SQLContext: ...
+    def setConf(self, key: str, value) -> None: ...
+    def getConf(self, key: str, defaultValue: Optional[str] = ...) -> str: ...
+    @property
+    def udf(self) -> UDFRegistration: ...
+    def range(
+        self,
+        start: int,
+        end: Optional[int] = ...,
+        step: int = ...,
+        numPartitions: Optional[int] = ...,
+    ) -> DataFrame: ...
+    def registerFunction(
+        self, name: str, f: Callable[..., Any], returnType: DataType = ...
+    ) -> None: ...
+    def registerJavaFunction(
+        self, name: str, javaClassName: str, returnType: Optional[DataType] = ...
+    ) -> None: ...
+    @overload
+    def createDataFrame(
+        self,
+        data: Union[RDD[RowLike], Iterable[RowLike]],
+        samplingRatio: Optional[float] = ...,
+    ) -> DataFrame: ...
+    @overload
+    def createDataFrame(
+        self,
+        data: Union[RDD[RowLike], Iterable[RowLike]],
+        schema: Union[List[str], Tuple[str, ...]] = ...,
+        verifySchema: bool = ...,
+    ) -> DataFrame: ...
+    @overload
+    def createDataFrame(
+        self,
+        data: Union[
+            RDD[Union[DateTimeLiteral, LiteralType, DecimalLiteral]],
+            Iterable[Union[DateTimeLiteral, LiteralType, DecimalLiteral]],
+        ],
+        schema: Union[AtomicType, str],
+        verifySchema: bool = ...,
+    ) -> DataFrame: ...
+    @overload
+    def createDataFrame(
+        self,
+        data: Union[RDD[RowLike], Iterable[RowLike]],
+        schema: Union[StructType, str],
+        verifySchema: bool = ...,
+    ) -> DataFrame: ...
+    @overload
+    def createDataFrame(
+        self, data: DataFrameLike, samplingRatio: Optional[float] = ...
+    ) -> DataFrame: ...
+    @overload
+    def createDataFrame(
+        self,
+        data: DataFrameLike,
+        schema: Union[StructType, str],
+        verifySchema: bool = ...,
+    ) -> DataFrame: ...
+    def registerDataFrameAsTable(self, df: DataFrame, tableName: str) -> None: ...
+    def dropTempTable(self, tableName: str) -> None: ...
+    def createExternalTable(
+        self,
+        tableName: str,
+        path: Optional[str] = ...,
+        source: Optional[str] = ...,
+        schema: Optional[StructType] = ...,
+        **options
+    ) -> DataFrame: ...
+    def sql(self, sqlQuery: str) -> DataFrame: ...
+    def table(self, tableName: str) -> DataFrame: ...
+    def tables(self, dbName: Optional[str] = ...) -> DataFrame: ...
+    def tableNames(self, dbName: Optional[str] = ...) -> List[str]: ...
+    def cacheTable(self, tableName: str) -> None: ...
+    def uncacheTable(self, tableName: str) -> None: ...
+    def clearCache(self) -> None: ...
+    @property
+    def read(self) -> DataFrameReader: ...
+    @property
+    def readStream(self) -> DataStreamReader: ...
+    @property
+    def streams(self) -> StreamingQueryManager: ...
+
+class HiveContext(SQLContext):
+    def __init__(
+        self, sparkContext: SparkContext, jhiveContext: Optional[JavaObject] = ...
+    ) -> None: ...
+    def refreshTable(self, tableName: str) -> None: ...
diff --git a/python/pyspark/sql/dataframe.pyi b/python/pyspark/sql/dataframe.pyi
new file mode 100644
index 0000000000000..c498d529d820f
--- /dev/null
+++ b/python/pyspark/sql/dataframe.pyi
@@ -0,0 +1,324 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import overload
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Iterator,
+    List,
+    Optional,
+    Tuple,
+    Union,
+)
+
+from py4j.java_gateway import JavaObject  # type: ignore[import]
+
+from pyspark.sql._typing import ColumnOrName, LiteralType, OptionalPrimitiveType
+from pyspark.sql.types import (  # noqa: F401
+    StructType,
+    StructField,
+    StringType,
+    IntegerType,
+    Row,
+)  # noqa: F401
+from pyspark.sql.context import SQLContext
+from pyspark.sql.group import GroupedData
+from pyspark.sql.readwriter import DataFrameWriter, DataFrameWriterV2
+from pyspark.sql.streaming import DataStreamWriter
+from pyspark.sql.column import Column
+from pyspark.rdd import RDD
+from pyspark.storagelevel import StorageLevel
+
+from pyspark.sql.pandas.conversion import PandasConversionMixin
+from pyspark.sql.pandas.map_ops import PandasMapOpsMixin
+
+class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
+    sql_ctx: SQLContext
+    is_cached: bool
+    def __init__(self, jdf: JavaObject, sql_ctx: SQLContext) -> None: ...
+    @property
+    def rdd(self) -> RDD[Row]: ...
+    @property
+    def na(self) -> DataFrameNaFunctions: ...
+    @property
+    def stat(self) -> DataFrameStatFunctions: ...
+    def toJSON(self, use_unicode: bool = ...) -> RDD[str]: ...
+    def registerTempTable(self, name: str) -> None: ...
+    def createTempView(self, name: str) -> None: ...
+    def createOrReplaceTempView(self, name: str) -> None: ...
+    def createGlobalTempView(self, name: str) -> None: ...
+    @property
+    def write(self) -> DataFrameWriter: ...
+    @property
+    def writeStream(self) -> DataStreamWriter: ...
+    @property
+    def schema(self) -> StructType: ...
+    def printSchema(self) -> None: ...
+    def explain(
+        self, extended: Optional[Union[bool, str]] = ..., mode: Optional[str] = ...
+    ) -> None: ...
+    def exceptAll(self, other: DataFrame) -> DataFrame: ...
+    def isLocal(self) -> bool: ...
+    @property
+    def isStreaming(self) -> bool: ...
+    def show(
+        self, n: int = ..., truncate: Union[bool, int] = ..., vertical: bool = ...
+    ) -> None: ...
+    def checkpoint(self, eager: bool = ...) -> DataFrame: ...
+    def localCheckpoint(self, eager: bool = ...) -> DataFrame: ...
+    def withWatermark(
+        self, eventTime: ColumnOrName, delayThreshold: str
+    ) -> DataFrame: ...
+    def hint(self, name: str, *parameters: Any) -> DataFrame: ...
+    def count(self) -> int: ...
+    def collect(self) -> List[Row]: ...
+    def toLocalIterator(self, prefetchPartitions: bool = ...) -> Iterator[Row]: ...
+    def limit(self, num: int) -> DataFrame: ...
+    def take(self, num: int) -> List[Row]: ...
+    def tail(self, num: int) -> List[Row]: ...
+    def foreach(self, f: Callable[[Row], None]) -> None: ...
+    def foreachPartition(self, f: Callable[[Iterator[Row]], None]) -> None: ...
+    def cache(self) -> DataFrame: ...
+    def persist(self, storageLevel: StorageLevel = ...) -> DataFrame: ...
+    @property
+    def storageLevel(self) -> StorageLevel: ...
+    def unpersist(self, blocking: bool = ...) -> DataFrame: ...
+    def coalesce(self, numPartitions: int) -> DataFrame: ...
+    @overload
+    def repartition(self, numPartitions: int, *cols: ColumnOrName) -> DataFrame: ...
+    @overload
+    def repartition(self, *cols: ColumnOrName) -> DataFrame: ...
+    @overload
+    def repartitionByRange(
+        self, numPartitions: int, *cols: ColumnOrName
+    ) -> DataFrame: ...
+    @overload
+    def repartitionByRange(self, *cols: ColumnOrName) -> DataFrame: ...
+    def distinct(self) -> DataFrame: ...
+    @overload
+    def sample(self, fraction: float, seed: Optional[int] = ...) -> DataFrame: ...
+    @overload
+    def sample(
+        self,
+        withReplacement: Optional[bool],
+        fraction: float,
+        seed: Optional[int] = ...,
+    ) -> DataFrame: ...
+    def sampleBy(
+        self, col: str, fractions: Dict[Any, float], seed: Optional[int] = ...
+    ) -> DataFrame: ...
+    def randomSplit(
+        self, weights: List[float], seed: Optional[int] = ...
+    ) -> List[DataFrame]: ...
+    @property
+    def dtypes(self) -> List[Tuple[str, str]]: ...
+    @property
+    def columns(self) -> List[str]: ...
+    def colRegex(self, colName: str) -> Column: ...
+    def alias(self, alias: str) -> DataFrame: ...
+    def crossJoin(self, other: DataFrame) -> DataFrame: ...
+    def join(
+        self,
+        other: DataFrame,
+        on: Optional[Union[str, List[str], Column, List[Column]]] = ...,
+        how: Optional[str] = ...,
+    ) -> DataFrame: ...
+    def sortWithinPartitions(
+        self,
+        *cols: Union[str, Column, List[Union[str, Column]]],
+        ascending: Union[bool, List[bool]] = ...
+    ) -> DataFrame: ...
+    def sort(
+        self,
+        *cols: Union[str, Column, List[Union[str, Column]]],
+        ascending: Union[bool, List[bool]] = ...
+    ) -> DataFrame: ...
+    def orderBy(
+        self,
+        *cols: Union[str, Column, List[Union[str, Column]]],
+        ascending: Union[bool, List[bool]] = ...
+    ) -> DataFrame: ...
+    def describe(self, *cols: Union[str, List[str]]) -> DataFrame: ...
+    def summary(self, *statistics: str) -> DataFrame: ...
+    @overload
+    def head(self) -> Row: ...
+    @overload
+    def head(self, n: int) -> List[Row]: ...
+    def first(self) -> Row: ...
+    def __getitem__(self, item: Union[int, str, Column, List, Tuple]) -> Column: ...
+    def __getattr__(self, name: str) -> Column: ...
+    @overload
+    def select(self, *cols: ColumnOrName) -> DataFrame: ...
+    @overload
+    def select(self, __cols: Union[List[Column], List[str]]) -> DataFrame: ...
+    @overload
+    def selectExpr(self, *expr: str) -> DataFrame: ...
+    @overload
+    def selectExpr(self, *expr: List[str]) -> DataFrame: ...
+    def filter(self, condition: ColumnOrName) -> DataFrame: ...
+    @overload
+    def groupBy(self, *cols: ColumnOrName) -> GroupedData: ...
+    @overload
+    def groupBy(self, __cols: Union[List[Column], List[str]]) -> GroupedData: ...
+    @overload
+    def rollup(self, *cols: ColumnOrName) -> GroupedData: ...
+    @overload
+    def rollup(self, __cols: Union[List[Column], List[str]]) -> GroupedData: ...
+    @overload
+    def cube(self, *cols: ColumnOrName) -> GroupedData: ...
+    @overload
+    def cube(self, __cols: Union[List[Column], List[str]]) -> GroupedData: ...
+    def agg(self, *exprs: Union[Column, Dict[str, str]]) -> DataFrame: ...
+    def union(self, other: DataFrame) -> DataFrame: ...
+    def unionAll(self, other: DataFrame) -> DataFrame: ...
+    def unionByName(
+        self, other: DataFrame, allowMissingColumns: bool = ...
+    ) -> DataFrame: ...
+    def intersect(self, other: DataFrame) -> DataFrame: ...
+    def intersectAll(self, other: DataFrame) -> DataFrame: ...
+    def subtract(self, other: DataFrame) -> DataFrame: ...
+    def dropDuplicates(self, subset: Optional[List[str]] = ...) -> DataFrame: ...
+    def dropna(
+        self,
+        how: str = ...,
+        thresh: Optional[int] = ...,
+        subset: Optional[List[str]] = ...,
+    ) -> DataFrame: ...
+    @overload
+    def fillna(
+        self,
+        value: LiteralType,
+        subset: Optional[Union[str, Tuple[str, ...], List[str]]] = ...,
+    ) -> DataFrame: ...
+    @overload
+    def fillna(self, value: Dict[str, LiteralType]) -> DataFrame: ...
+    @overload
+    def replace(
+        self,
+        to_replace: LiteralType,
+        value: OptionalPrimitiveType,
+        subset: Optional[List[str]] = ...,
+    ) -> DataFrame: ...
+    @overload
+    def replace(
+        self,
+        to_replace: List[LiteralType],
+        value: List[OptionalPrimitiveType],
+        subset: Optional[List[str]] = ...,
+    ) -> DataFrame: ...
+    @overload
+    def replace(
+        self,
+        to_replace: Dict[LiteralType, OptionalPrimitiveType],
+        subset: Optional[List[str]] = ...,
+    ) -> DataFrame: ...
+    @overload
+    def replace(
+        self,
+        to_replace: List[LiteralType],
+        value: OptionalPrimitiveType,
+        subset: Optional[List[str]] = ...,
+    ) -> DataFrame: ...
+    def approxQuantile(
+        self, col: str, probabilities: List[float], relativeError: float
+    ) -> List[float]: ...
+    def corr(self, col1: str, col2: str, method: Optional[str] = ...) -> float: ...
+    def cov(self, col1: str, col2: str) -> float: ...
+    def crosstab(self, col1: str, col2: str) -> DataFrame: ...
+    def freqItems(
+        self, cols: List[str], support: Optional[float] = ...
+    ) -> DataFrame: ...
+    def withColumn(self, colName: str, col: Column) -> DataFrame: ...
+    def withColumnRenamed(self, existing: str, new: str) -> DataFrame: ...
+    @overload
+    def drop(self, cols: ColumnOrName) -> DataFrame: ...
+    @overload
+    def drop(self, *cols: str) -> DataFrame: ...
+    def toDF(self, *cols: ColumnOrName) -> DataFrame: ...
+    def transform(self, func: Callable[[DataFrame], DataFrame]) -> DataFrame: ...
+    @overload
+    def groupby(self, *cols: ColumnOrName) -> GroupedData: ...
+    @overload
+    def groupby(self, __cols: Union[List[Column], List[str]]) -> GroupedData: ...
+    def drop_duplicates(self, subset: Optional[List[str]] = ...) -> DataFrame: ...
+    def where(self, condition: ColumnOrName) -> DataFrame: ...
+    def sameSemantics(self, other: DataFrame) -> bool: ...
+    def semanticHash(self) -> int: ...
+    def inputFiles(self) -> List[str]: ...
+    def writeTo(self, table: str) -> DataFrameWriterV2: ...
+
+class DataFrameNaFunctions:
+    df: DataFrame
+    def __init__(self, df: DataFrame) -> None: ...
+    def drop(
+        self,
+        how: str = ...,
+        thresh: Optional[int] = ...,
+        subset: Optional[List[str]] = ...,
+    ) -> DataFrame: ...
+    @overload
+    def fill(
+        self, value: LiteralType, subset: Optional[List[str]] = ...
+    ) -> DataFrame: ...
+    @overload
+    def fill(self, value: Dict[str, LiteralType]) -> DataFrame: ...
+    @overload
+    def replace(
+        self,
+        to_replace: LiteralType,
+        value: OptionalPrimitiveType,
+        subset: Optional[List[str]] = ...,
+    ) -> DataFrame: ...
+    @overload
+    def replace(
+        self,
+        to_replace: List[LiteralType],
+        value: List[OptionalPrimitiveType],
+        subset: Optional[List[str]] = ...,
+    ) -> DataFrame: ...
+    @overload
+    def replace(
+        self,
+        to_replace: Dict[LiteralType, OptionalPrimitiveType],
+        subset: Optional[List[str]] = ...,
+    ) -> DataFrame: ...
+    @overload
+    def replace(
+        self,
+        to_replace: List[LiteralType],
+        value: OptionalPrimitiveType,
+        subset: Optional[List[str]] = ...,
+    ) -> DataFrame: ...
+
+class DataFrameStatFunctions:
+    df: DataFrame
+    def __init__(self, df: DataFrame) -> None: ...
+    def approxQuantile(
+        self, col: str, probabilities: List[float], relativeError: float
+    ) -> List[float]: ...
+    def corr(self, col1: str, col2: str, method: Optional[str] = ...) -> float: ...
+    def cov(self, col1: str, col2: str) -> float: ...
+    def crosstab(self, col1: str, col2: str) -> DataFrame: ...
+    def freqItems(
+        self, cols: List[str], support: Optional[float] = ...
+    ) -> DataFrame: ...
+    def sampleBy(
+        self, col: str, fractions: Dict[Any, float], seed: Optional[int] = ...
+    ) -> DataFrame: ...
diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi
new file mode 100644
index 0000000000000..3b0b2030178ef
--- /dev/null
+++ b/python/pyspark/sql/functions.pyi
@@ -0,0 +1,343 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import overload
+from typing import Any, Callable, Dict, List, Optional, Union
+
+from pyspark.sql._typing import (
+    ColumnOrName,
+    DataTypeOrString,
+)
+from pyspark.sql.pandas.functions import (  # noqa: F401
+    pandas_udf as pandas_udf,
+    PandasUDFType as PandasUDFType,
+)
+from pyspark.sql.column import Column
+from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.types import (  # noqa: F401
+    ArrayType,
+    StringType,
+    StructType,
+    DataType,
+)
+from pyspark.sql.utils import to_str  # noqa: F401
+
+def approxCountDistinct(col: ColumnOrName, rsd: Optional[float] = ...) -> Column: ...
+def approx_count_distinct(col: ColumnOrName, rsd: Optional[float] = ...) -> Column: ...
+def broadcast(df: DataFrame) -> DataFrame: ...
+def coalesce(*cols: ColumnOrName) -> Column: ...
+def corr(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
+def covar_pop(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
+def covar_samp(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
+def countDistinct(col: ColumnOrName, *cols: ColumnOrName) -> Column: ...
+def first(col: ColumnOrName, ignorenulls: bool = ...) -> Column: ...
+def grouping(col: ColumnOrName) -> Column: ...
+def grouping_id(*cols: ColumnOrName) -> Column: ...
+def input_file_name() -> Column: ...
+def isnan(col: ColumnOrName) -> Column: ...
+def isnull(col: ColumnOrName) -> Column: ...
+def last(col: ColumnOrName, ignorenulls: bool = ...) -> Column: ...
+def monotonically_increasing_id() -> Column: ...
+def nanvl(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
+def percentile_approx(
+    col: ColumnOrName,
+    percentage: Union[Column, float, List[float]],
+    accuracy: Union[Column, float] = ...,
+) -> Column: ...
+def rand(seed: Optional[int] = ...) -> Column: ...
+def randn(seed: Optional[int] = ...) -> Column: ...
+def round(col: ColumnOrName, scale: int = ...) -> Column: ...
+def bround(col: ColumnOrName, scale: int = ...) -> Column: ...
+def shiftLeft(col: ColumnOrName, numBits: int) -> Column: ...
+def shiftRight(col: ColumnOrName, numBits: int) -> Column: ...
+def shiftRightUnsigned(col, numBits) -> Column: ...
+def spark_partition_id() -> Column: ...
+def expr(str: str) -> Column: ...
+def struct(*cols: ColumnOrName) -> Column: ...
+def greatest(*cols: ColumnOrName) -> Column: ...
+def least(*cols: Column) -> Column: ...
+def when(condition: Column, value) -> Column: ...
+@overload
+def log(arg1: ColumnOrName) -> Column: ...
+@overload
+def log(arg1: float, arg2: ColumnOrName) -> Column: ...
+def log2(col: ColumnOrName) -> Column: ...
+def conv(col: ColumnOrName, fromBase: int, toBase: int) -> Column: ...
+def factorial(col: ColumnOrName) -> Column: ...
+def lag(
+    col: ColumnOrName, offset: int = ..., default: Optional[Any] = ...
+) -> Column: ...
+def lead(
+    col: ColumnOrName, offset: int = ..., default: Optional[Any] = ...
+) -> Column: ...
+def ntile(n: int) -> Column: ...
+def current_date() -> Column: ...
+def current_timestamp() -> Column: ...
+def date_format(date: ColumnOrName, format: str) -> Column: ...
+def year(col: ColumnOrName) -> Column: ...
+def quarter(col: ColumnOrName) -> Column: ...
+def month(col: ColumnOrName) -> Column: ...
+def dayofweek(col: ColumnOrName) -> Column: ...
+def dayofmonth(col: ColumnOrName) -> Column: ...
+def dayofyear(col: ColumnOrName) -> Column: ...
+def hour(col: ColumnOrName) -> Column: ...
+def minute(col: ColumnOrName) -> Column: ...
+def second(col: ColumnOrName) -> Column: ...
+def weekofyear(col: ColumnOrName) -> Column: ...
+def date_add(start: ColumnOrName, days: int) -> Column: ...
+def date_sub(start: ColumnOrName, days: int) -> Column: ...
+def datediff(end: ColumnOrName, start: ColumnOrName) -> Column: ...
+def add_months(start: ColumnOrName, months: int) -> Column: ...
+def months_between(
+    date1: ColumnOrName, date2: ColumnOrName, roundOff: bool = ...
+) -> Column: ...
+def to_date(col: ColumnOrName, format: Optional[str] = ...) -> Column: ...
+@overload
+def to_timestamp(col: ColumnOrName) -> Column: ...
+@overload
+def to_timestamp(col: ColumnOrName, format: str) -> Column: ...
+def trunc(date: ColumnOrName, format: str) -> Column: ...
+def date_trunc(format: str, timestamp: ColumnOrName) -> Column: ...
+def next_day(date: ColumnOrName, dayOfWeek: str) -> Column: ...
+def last_day(date: ColumnOrName) -> Column: ...
+def from_unixtime(timestamp: ColumnOrName, format: str = ...) -> Column: ...
+def unix_timestamp(
+    timestamp: Optional[ColumnOrName] = ..., format: str = ...
+) -> Column: ...
+def from_utc_timestamp(timestamp: ColumnOrName, tz: ColumnOrName) -> Column: ...
+def to_utc_timestamp(timestamp: ColumnOrName, tz: ColumnOrName) -> Column: ...
+def timestamp_seconds(col: ColumnOrName) -> Column: ...
+def window(
+    timeColumn: ColumnOrName,
+    windowDuration: str,
+    slideDuration: Optional[str] = ...,
+    startTime: Optional[str] = ...,
+) -> Column: ...
+def crc32(col: ColumnOrName) -> Column: ...
+def md5(col: ColumnOrName) -> Column: ...
+def sha1(col: ColumnOrName) -> Column: ...
+def sha2(col: ColumnOrName, numBits: int) -> Column: ...
+def hash(*cols: ColumnOrName) -> Column: ...
+def xxhash64(*cols: ColumnOrName) -> Column: ...
+def concat(*cols: ColumnOrName) -> Column: ...
+def concat_ws(sep: str, *cols: ColumnOrName) -> Column: ...
+def decode(col: ColumnOrName, charset: str) -> Column: ...
+def encode(col: ColumnOrName, charset: str) -> Column: ...
+def format_number(col: ColumnOrName, d: int) -> Column: ...
+def format_string(format: str, *cols: ColumnOrName) -> Column: ...
+def instr(str: ColumnOrName, substr: str) -> Column: ...
+def overlay(
+    src: ColumnOrName,
+    replace: ColumnOrName,
+    pos: Union[Column, int],
+    len: Union[Column, int] = ...,
+) -> Column: ...
+def substring(str: ColumnOrName, pos: int, len: int) -> Column: ...
+def substring_index(str: ColumnOrName, delim: str, count: int) -> Column: ...
+def levenshtein(left: ColumnOrName, right: ColumnOrName) -> Column: ...
+def locate(substr: str, str: Column, pos: int = ...) -> Column: ...
+def lpad(col: Column, len: int, pad: str) -> Column: ...
+def rpad(col: Column, len: int, pad: str) -> Column: ...
+def repeat(col: Column, n: int) -> Column: ...
+def split(str: Column, pattern: str, limit: int = ...) -> Column: ...
+def regexp_extract(str: ColumnOrName, pattern: str, idx: int) -> Column: ...
+def regexp_replace(str: ColumnOrName, pattern: str, replacement: str) -> Column: ...
+def initcap(col: ColumnOrName) -> Column: ...
+def soundex(col: ColumnOrName) -> Column: ...
+def bin(col: ColumnOrName) -> Column: ...
+def hex(col: ColumnOrName) -> Column: ...
+def unhex(col: ColumnOrName) -> Column: ...
+def length(col: ColumnOrName) -> Column: ...
+def translate(srcCol: ColumnOrName, matching: str, replace: str) -> Column: ...
+def map_from_arrays(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
+def create_map(*cols: ColumnOrName) -> Column: ...
+def array(*cols: ColumnOrName) -> Column: ...
+def array_contains(col: ColumnOrName, value: Any) -> Column: ...
+def arrays_overlap(a1: ColumnOrName, a2: ColumnOrName) -> Column: ...
+def slice(x: ColumnOrName, start: int, length: int) -> Column: ...
+def array_join(
+    col: ColumnOrName, delimiter: str, null_replacement: Optional[str] = ...
+) -> Column: ...
+def array_position(col: ColumnOrName, value: Any) -> Column: ...
+def element_at(col: ColumnOrName, extraction: Any) -> Column: ...
+def array_remove(col: ColumnOrName, element: Any) -> Column: ...
+def array_distinct(col: ColumnOrName) -> Column: ...
+def array_intersect(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
+def array_union(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
+def array_except(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
+def explode(col: ColumnOrName) -> Column: ...
+def explode_outer(col: ColumnOrName) -> Column: ...
+def posexplode(col: ColumnOrName) -> Column: ...
+def posexplode_outer(col: ColumnOrName) -> Column: ...
+def get_json_object(col: ColumnOrName, path: str) -> Column: ...
+def json_tuple(col: ColumnOrName, *fields: str) -> Column: ...
+def from_json(
+    col: ColumnOrName,
+    schema: Union[ArrayType, StructType, Column, str],
+    options: Dict[str, str] = ...,
+) -> Column: ...
+def to_json(col: ColumnOrName, options: Dict[str, str] = ...) -> Column: ...
+def schema_of_json(json: ColumnOrName, options: Dict[str, str] = ...) -> Column: ...
+def schema_of_csv(csv: ColumnOrName, options: Dict[str, str] = ...) -> Column: ...
+def to_csv(col: ColumnOrName, options: Dict[str, str] = ...) -> Column: ...
+def size(col: ColumnOrName) -> Column: ...
+def array_min(col: ColumnOrName) -> Column: ...
+def array_max(col: ColumnOrName) -> Column: ...
+def sort_array(col: ColumnOrName, asc: bool = ...) -> Column: ...
+def array_sort(col: ColumnOrName) -> Column: ...
+def shuffle(col: ColumnOrName) -> Column: ...
+def reverse(col: ColumnOrName) -> Column: ...
+def flatten(col: ColumnOrName) -> Column: ...
+def map_keys(col: ColumnOrName) -> Column: ...
+def map_values(col: ColumnOrName) -> Column: ...
+def map_entries(col: ColumnOrName) -> Column: ...
+def map_from_entries(col: ColumnOrName) -> Column: ...
+def array_repeat(col: ColumnOrName, count: Union[Column, int]) -> Column: ...
+def arrays_zip(*cols: ColumnOrName) -> Column: ...
+def map_concat(*cols: ColumnOrName) -> Column: ...
+def sequence(
+    start: ColumnOrName, stop: ColumnOrName, step: Optional[ColumnOrName] = ...
+) -> Column: ...
+def from_csv(
+    col: ColumnOrName,
+    schema: Union[StructType, Column, str],
+    options: Dict[str, str] = ...,
+) -> Column: ...
+@overload
+def transform(col: ColumnOrName, f: Callable[[Column], Column]) -> Column: ...
+@overload
+def transform(col: ColumnOrName, f: Callable[[Column, Column], Column]) -> Column: ...
+def exists(col: ColumnOrName, f: Callable[[Column], Column]) -> Column: ...
+def forall(col: ColumnOrName, f: Callable[[Column], Column]) -> Column: ...
+@overload
+def filter(col: ColumnOrName, f: Callable[[Column], Column]) -> Column: ...
+@overload
+def filter(col: ColumnOrName, f: Callable[[Column, Column], Column]) -> Column: ...
+def aggregate(
+    col: ColumnOrName,
+    zero: ColumnOrName,
+    merge: Callable[[Column, Column], Column],
+    finish: Optional[Callable[[Column], Column]] = ...,
+) -> Column: ...
+def zip_with(
+    col1: ColumnOrName,
+    ColumnOrName: ColumnOrName,
+    f: Callable[[Column, Column], Column],
+) -> Column: ...
+def transform_keys(
+    col: ColumnOrName, f: Callable[[Column, Column], Column]
+) -> Column: ...
+def transform_values(
+    col: ColumnOrName, f: Callable[[Column, Column], Column]
+) -> Column: ...
+def map_filter(col: ColumnOrName, f: Callable[[Column, Column], Column]) -> Column: ...
+def map_zip_with(
+    col1: ColumnOrName,
+    col2: ColumnOrName,
+    f: Callable[[Column, Column, Column], Column],
+) -> Column: ...
+def abs(col: ColumnOrName) -> Column: ...
+def acos(col: ColumnOrName) -> Column: ...
+def asc(col: ColumnOrName) -> Column: ...
+def asc_nulls_first(col: ColumnOrName) -> Column: ...
+def asc_nulls_last(col: ColumnOrName) -> Column: ...
+def ascii(col: ColumnOrName) -> Column: ...
+def asin(col: ColumnOrName) -> Column: ...
+def atan(col: ColumnOrName) -> Column: ...
+@overload
+def atan2(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
+@overload
+def atan2(col1: float, col2: ColumnOrName) -> Column: ...
+@overload
+def atan2(col1: ColumnOrName, col2: float) -> Column: ...
+def avg(col: ColumnOrName) -> Column: ...
+def base64(col: ColumnOrName) -> Column: ...
+def bitwiseNOT(col: ColumnOrName) -> Column: ...
+def cbrt(col: ColumnOrName) -> Column: ...
+def ceil(col: ColumnOrName) -> Column: ...
+def col(col: str) -> Column: ...
+def collect_list(col: ColumnOrName) -> Column: ...
+def collect_set(col: ColumnOrName) -> Column: ...
+def column(col: str) -> Column: ...
+def cos(col: ColumnOrName) -> Column: ...
+def cosh(col: ColumnOrName) -> Column: ...
+def count(col: ColumnOrName) -> Column: ...
+def cume_dist() -> Column: ...
+def degrees(col: ColumnOrName) -> Column: ...
+def dense_rank() -> Column: ...
+def desc(col: ColumnOrName) -> Column: ...
+def desc_nulls_first(col: ColumnOrName) -> Column: ...
+def desc_nulls_last(col: ColumnOrName) -> Column: ...
+def exp(col: ColumnOrName) -> Column: ...
+def expm1(col: ColumnOrName) -> Column: ...
+def floor(col: ColumnOrName) -> Column: ...
+@overload
+def hypot(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
+@overload
+def hypot(col1: float, col2: ColumnOrName) -> Column: ...
+@overload
+def hypot(col1: ColumnOrName, col2: float) -> Column: ...
+def kurtosis(col: ColumnOrName) -> Column: ...
+def lit(col: Any) -> Column: ...
+def log10(col: ColumnOrName) -> Column: ...
+def log1p(col: ColumnOrName) -> Column: ...
+def lower(col: ColumnOrName) -> Column: ...
+def ltrim(col: ColumnOrName) -> Column: ...
+def max(col: ColumnOrName) -> Column: ...
+def mean(col: ColumnOrName) -> Column: ...
+def min(col: ColumnOrName) -> Column: ...
+def percent_rank() -> Column: ...
+@overload
+def pow(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
+@overload
+def pow(col1: float, col2: ColumnOrName) -> Column: ...
+@overload
+def pow(col1: ColumnOrName, col2: float) -> Column: ...
+def radians(col: ColumnOrName) -> Column: ...
+def rank() -> Column: ...
+def rint(col: ColumnOrName) -> Column: ...
+def row_number() -> Column: ...
+def rtrim(col: ColumnOrName) -> Column: ...
+def signum(col: ColumnOrName) -> Column: ...
+def sin(col: ColumnOrName) -> Column: ...
+def sinh(col: ColumnOrName) -> Column: ...
+def skewness(col: ColumnOrName) -> Column: ...
+def sqrt(col: ColumnOrName) -> Column: ...
+def stddev(col: ColumnOrName) -> Column: ...
+def stddev_pop(col: ColumnOrName) -> Column: ...
+def stddev_samp(col: ColumnOrName) -> Column: ...
+def sum(col: ColumnOrName) -> Column: ...
+def sumDistinct(col: ColumnOrName) -> Column: ...
+def tan(col: ColumnOrName) -> Column: ...
+def tanh(col: ColumnOrName) -> Column: ...
+def toDegrees(col: ColumnOrName) -> Column: ...
+def toRadians(col: ColumnOrName) -> Column: ...
+def trim(col: ColumnOrName) -> Column: ...
+def unbase64(col: ColumnOrName) -> Column: ...
+def upper(col: ColumnOrName) -> Column: ...
+def var_pop(col: ColumnOrName) -> Column: ...
+def var_samp(col: ColumnOrName) -> Column: ...
+def variance(col: ColumnOrName) -> Column: ...
+@overload
+def udf(
+    f: Callable[..., Any], returnType: DataTypeOrString = ...
+) -> Callable[..., Column]: ...
+@overload
+def udf(
+    f: DataTypeOrString = ...,
+) -> Callable[[Callable[..., Any]], Callable[..., Column]]: ...
diff --git a/python/pyspark/sql/group.pyi b/python/pyspark/sql/group.pyi
new file mode 100644
index 0000000000000..0b0df8c63cfdd
--- /dev/null
+++ b/python/pyspark/sql/group.pyi
@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import overload
+from typing import Dict, List, Optional
+
+from pyspark.sql._typing import LiteralType
+from pyspark.sql.context import SQLContext
+from pyspark.sql.column import Column
+from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.pandas.group_ops import PandasGroupedOpsMixin
+from py4j.java_gateway import JavaObject  # type: ignore[import]
+
+class GroupedData(PandasGroupedOpsMixin):
+    sql_ctx: SQLContext
+    def __init__(self, jgd: JavaObject, df: DataFrame) -> None: ...
+    @overload
+    def agg(self, *exprs: Column) -> DataFrame: ...
+    @overload
+    def agg(self, __exprs: Dict[str, str]) -> DataFrame: ...
+    def count(self) -> DataFrame: ...
+    def mean(self, *cols: str) -> DataFrame: ...
+    def avg(self, *cols: str) -> DataFrame: ...
+    def max(self, *cols: str) -> DataFrame: ...
+    def min(self, *cols: str) -> DataFrame: ...
+    def sum(self, *cols: str) -> DataFrame: ...
+    def pivot(
+        self, pivot_col: str, values: Optional[List[LiteralType]] = ...
+    ) -> GroupedData: ...
diff --git a/python/pyspark/sql/pandas/__init__.pyi b/python/pyspark/sql/pandas/__init__.pyi
new file mode 100644
index 0000000000000..217e5db960782
--- /dev/null
+++ b/python/pyspark/sql/pandas/__init__.pyi
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/python/pyspark/sql/pandas/_typing/__init__.pyi b/python/pyspark/sql/pandas/_typing/__init__.pyi
new file mode 100644
index 0000000000000..dda1b3341b31c
--- /dev/null
+++ b/python/pyspark/sql/pandas/_typing/__init__.pyi
@@ -0,0 +1,338 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import (
+    Any,
+    Callable,
+    Iterable,
+    NewType,
+    Tuple,
+    Type,
+    Union,
+)
+from typing_extensions import Protocol, Literal
+from types import FunctionType
+
+from pyspark.sql._typing import LiteralType
+from pyspark.sql.pandas._typing.protocols.frame import DataFrameLike as DataFrameLike
+from pyspark.sql.pandas._typing.protocols.series import SeriesLike as SeriesLike
+
+import pandas.core.frame  # type: ignore[import]
+import pandas.core.series  # type: ignore[import]
+
+# POC compatibility annotations
+PandasDataFrame: Type[DataFrameLike] = pandas.core.frame.DataFrame
+PandasSeries: Type[SeriesLike] = pandas.core.series.Series
+
+DataFrameOrSeriesLike = Union[DataFrameLike, SeriesLike]
+
+# UDF annotations
+PandasScalarUDFType = Literal[200]
+PandasScalarIterUDFType = Literal[204]
+PandasGroupedMapUDFType = Literal[201]
+PandasCogroupedMapUDFType = Literal[206]
+PandasGroupedAggUDFType = Literal[202]
+PandasMapIterUDFType = Literal[205]
+
+class PandasVariadicScalarToScalarFunction(Protocol):
+    def __call__(self, *_: DataFrameOrSeriesLike) -> SeriesLike: ...
+
+PandasScalarToScalarFunction = Union[
+    PandasVariadicScalarToScalarFunction,
+    Callable[[DataFrameOrSeriesLike], SeriesLike],
+    Callable[[DataFrameOrSeriesLike, DataFrameOrSeriesLike], SeriesLike],
+    Callable[
+        [DataFrameOrSeriesLike, DataFrameOrSeriesLike, DataFrameOrSeriesLike],
+        SeriesLike,
+    ],
+    Callable[
+        [
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+        ],
+        SeriesLike,
+    ],
+    Callable[
+        [
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+        ],
+        SeriesLike,
+    ],
+    Callable[
+        [
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+        ],
+        SeriesLike,
+    ],
+    Callable[
+        [
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+        ],
+        SeriesLike,
+    ],
+    Callable[
+        [
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+        ],
+        SeriesLike,
+    ],
+    Callable[
+        [
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+        ],
+        SeriesLike,
+    ],
+    Callable[
+        [
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+        ],
+        SeriesLike,
+    ],
+]
+
+class PandasVariadicScalarToStructFunction(Protocol):
+    def __call__(self, *_: DataFrameOrSeriesLike) -> DataFrameLike: ...
+
+PandasScalarToStructFunction = Union[
+    PandasVariadicScalarToStructFunction,
+    Callable[[DataFrameOrSeriesLike], DataFrameLike],
+    Callable[[DataFrameOrSeriesLike, DataFrameOrSeriesLike], DataFrameLike],
+    Callable[
+        [DataFrameOrSeriesLike, DataFrameOrSeriesLike, DataFrameOrSeriesLike],
+        DataFrameLike,
+    ],
+    Callable[
+        [
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+        ],
+        DataFrameLike,
+    ],
+    Callable[
+        [
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+        ],
+        DataFrameLike,
+    ],
+    Callable[
+        [
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+        ],
+        DataFrameLike,
+    ],
+    Callable[
+        [
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+        ],
+        DataFrameLike,
+    ],
+    Callable[
+        [
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+        ],
+        DataFrameLike,
+    ],
+    Callable[
+        [
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+        ],
+        DataFrameLike,
+    ],
+    Callable[
+        [
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+            DataFrameOrSeriesLike,
+        ],
+        DataFrameLike,
+    ],
+]
+
+PandasScalarIterFunction = Callable[
+    [Iterable[Union[DataFrameOrSeriesLike, Tuple[DataFrameOrSeriesLike, ...]]]],
+    Iterable[SeriesLike],
+]
+
+PandasGroupedMapFunction = Union[
+    Callable[[DataFrameLike], DataFrameLike],
+    Callable[[Any, DataFrameLike], DataFrameLike],
+]
+
+class PandasVariadicGroupedAggFunction(Protocol):
+    def __call__(self, *_: SeriesLike) -> LiteralType: ...
+
+PandasGroupedAggFunction = Union[
+    Callable[[SeriesLike], LiteralType],
+    Callable[[SeriesLike, SeriesLike], LiteralType],
+    Callable[[SeriesLike, SeriesLike, SeriesLike], LiteralType],
+    Callable[[SeriesLike, SeriesLike, SeriesLike, SeriesLike], LiteralType],
+    Callable[[SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike], LiteralType],
+    Callable[
+        [SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike, SeriesLike],
+        LiteralType,
+    ],
+    Callable[
+        [
+            SeriesLike,
+            SeriesLike,
+            SeriesLike,
+            SeriesLike,
+            SeriesLike,
+            SeriesLike,
+            SeriesLike,
+        ],
+        LiteralType,
+    ],
+    Callable[
+        [
+            SeriesLike,
+            SeriesLike,
+            SeriesLike,
+            SeriesLike,
+            SeriesLike,
+            SeriesLike,
+            SeriesLike,
+            SeriesLike,
+        ],
+        LiteralType,
+    ],
+    Callable[
+        [
+            SeriesLike,
+            SeriesLike,
+            SeriesLike,
+            SeriesLike,
+            SeriesLike,
+            SeriesLike,
+            SeriesLike,
+            SeriesLike,
+            SeriesLike,
+        ],
+        LiteralType,
+    ],
+    Callable[
+        [
+            SeriesLike,
+            SeriesLike,
+            SeriesLike,
+            SeriesLike,
+            SeriesLike,
+            SeriesLike,
+            SeriesLike,
+            SeriesLike,
+            SeriesLike,
+            SeriesLike,
+        ],
+        LiteralType,
+    ],
+    PandasVariadicGroupedAggFunction,
+]
+
+PandasMapIterFunction = Callable[[Iterable[DataFrameLike]], Iterable[DataFrameLike]]
+
+PandasCogroupedMapFunction = Callable[[DataFrameLike, DataFrameLike], DataFrameLike]
+
+MapIterPandasUserDefinedFunction = NewType(
+    "MapIterPandasUserDefinedFunction", FunctionType
+)
+GroupedMapPandasUserDefinedFunction = NewType(
+    "GroupedMapPandasUserDefinedFunction", FunctionType
+)
+CogroupedMapPandasUserDefinedFunction = NewType(
+    "CogroupedMapPandasUserDefinedFunction", FunctionType
+)
diff --git a/python/pyspark/sql/pandas/_typing/protocols/__init__.pyi b/python/pyspark/sql/pandas/_typing/protocols/__init__.pyi
new file mode 100644
index 0000000000000..217e5db960782
--- /dev/null
+++ b/python/pyspark/sql/pandas/_typing/protocols/__init__.pyi
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/python/pyspark/sql/pandas/_typing/protocols/frame.pyi b/python/pyspark/sql/pandas/_typing/protocols/frame.pyi
new file mode 100644
index 0000000000000..de679ee2cd017
--- /dev/null
+++ b/python/pyspark/sql/pandas/_typing/protocols/frame.pyi
@@ -0,0 +1,428 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This Protocol resuses core Pandas annotation.
+# Overall pipeline looks as follows
+# - Stubgen pandas.core.frame
+# - Add Protocol as a base class
+# - Replace imports with Any
+
+import numpy.ma as np  # type: ignore[import]
+from typing import Any, Hashable, IO, Iterable, List, Optional, Sequence, Tuple, Union
+from typing_extensions import Protocol
+from .series import SeriesLike
+
+Axes = Any
+Dtype = Any
+Index = Any
+Renamer = Any
+Axis = Any
+Level = Any
+
+class DataFrameLike(Protocol):
+    def __init__(
+        self,
+        data: Any = ...,
+        index: Optional[Axes] = ...,
+        columns: Optional[Axes] = ...,
+        dtype: Optional[Dtype] = ...,
+        copy: bool = ...,
+    ) -> None: ...
+    @property
+    def axes(self) -> List[Index]: ...
+    @property
+    def shape(self) -> Tuple[int, int]: ...
+    @property
+    def style(self) -> Any: ...
+    def items(self) -> Iterable[Tuple[Optional[Hashable], SeriesLike]]: ...
+    def iteritems(self) -> Iterable[Tuple[Optional[Hashable], SeriesLike]]: ...
+    def iterrows(self) -> Iterable[Tuple[Optional[Hashable], SeriesLike]]: ...
+    def itertuples(self, index: bool = ..., name: str = ...): ...
+    def __len__(self) -> int: ...
+    def dot(self, other: Any): ...
+    def __matmul__(self, other: Any): ...
+    def __rmatmul__(self, other: Any): ...
+    @classmethod
+    def from_dict(
+        cls: Any, data: Any, orient: Any = ..., dtype: Any = ..., columns: Any = ...
+    ) -> DataFrameLike: ...
+    def to_numpy(self, dtype: Any = ..., copy: Any = ...) -> np.ndarray: ...
+    def to_dict(self, orient: str = ..., into: Any = ...): ...
+    def to_gbq(
+        self,
+        destination_table: Any,
+        project_id: Any = ...,
+        chunksize: Any = ...,
+        reauth: Any = ...,
+        if_exists: Any = ...,
+        auth_local_webserver: Any = ...,
+        table_schema: Any = ...,
+        location: Any = ...,
+        progress_bar: Any = ...,
+        credentials: Any = ...,
+    ) -> None: ...
+    @classmethod
+    def from_records(
+        cls: Any,
+        data: Any,
+        index: Any = ...,
+        exclude: Any = ...,
+        columns: Any = ...,
+        coerce_float: Any = ...,
+        nrows: Any = ...,
+    ) -> DataFrameLike: ...
+    def to_records(
+        self, index: Any = ..., column_dtypes: Any = ..., index_dtypes: Any = ...
+    ) -> np.recarray: ...
+    def to_stata(
+        self,
+        path: Any,
+        convert_dates: Optional[Any] = ...,
+        write_index: bool = ...,
+        byteorder: Optional[Any] = ...,
+        time_stamp: Optional[Any] = ...,
+        data_label: Optional[Any] = ...,
+        variable_labels: Optional[Any] = ...,
+        version: int = ...,
+        convert_strl: Optional[Any] = ...,
+    ) -> None: ...
+    def to_feather(self, path: Any) -> None: ...
+    def to_markdown(
+        self, buf: Optional[IO[str]] = ..., mode: Optional[str] = ..., **kwargs: Any
+    ) -> Optional[str]: ...
+    def to_parquet(
+        self,
+        path: Any,
+        engine: Any = ...,
+        compression: Any = ...,
+        index: Any = ...,
+        partition_cols: Any = ...,
+        **kwargs: Any
+    ) -> None: ...
+    def to_html(
+        self,
+        buf: Optional[Any] = ...,
+        columns: Optional[Any] = ...,
+        col_space: Optional[Any] = ...,
+        header: bool = ...,
+        index: bool = ...,
+        na_rep: str = ...,
+        formatters: Optional[Any] = ...,
+        float_format: Optional[Any] = ...,
+        sparsify: Optional[Any] = ...,
+        index_names: bool = ...,
+        justify: Optional[Any] = ...,
+        max_rows: Optional[Any] = ...,
+        max_cols: Optional[Any] = ...,
+        show_dimensions: bool = ...,
+        decimal: str = ...,
+        bold_rows: bool = ...,
+        classes: Optional[Any] = ...,
+        escape: bool = ...,
+        notebook: bool = ...,
+        border: Optional[Any] = ...,
+        table_id: Optional[Any] = ...,
+        render_links: bool = ...,
+        encoding: Optional[Any] = ...,
+    ): ...
+    def info(
+        self,
+        verbose: Any = ...,
+        buf: Any = ...,
+        max_cols: Any = ...,
+        memory_usage: Any = ...,
+        null_counts: Any = ...,
+    ) -> None: ...
+    def memory_usage(self, index: Any = ..., deep: Any = ...) -> SeriesLike: ...
+    def transpose(self, *args: Any, copy: bool = ...) -> DataFrameLike: ...
+    T: Any = ...
+    def __getitem__(self, key: Any): ...
+    def __setitem__(self, key: Any, value: Any): ...
+    def query(self, expr: Any, inplace: bool = ..., **kwargs: Any): ...
+    def eval(self, expr: Any, inplace: bool = ..., **kwargs: Any): ...
+    def select_dtypes(
+        self, include: Any = ..., exclude: Any = ...
+    ) -> DataFrameLike: ...
+    def insert(
+        self, loc: Any, column: Any, value: Any, allow_duplicates: Any = ...
+    ) -> None: ...
+    def assign(self, **kwargs: Any) -> DataFrameLike: ...
+    def lookup(self, row_labels: Any, col_labels: Any) -> np.ndarray: ...
+    def align(
+        self,
+        other: Any,
+        join: Any = ...,
+        axis: Any = ...,
+        level: Any = ...,
+        copy: Any = ...,
+        fill_value: Any = ...,
+        method: Any = ...,
+        limit: Any = ...,
+        fill_axis: Any = ...,
+        broadcast_axis: Any = ...,
+    ) -> DataFrameLike: ...
+    def reindex(self, *args: Any, **kwargs: Any) -> DataFrameLike: ...
+    def drop(
+        self,
+        labels: Optional[Any] = ...,
+        axis: int = ...,
+        index: Optional[Any] = ...,
+        columns: Optional[Any] = ...,
+        level: Optional[Any] = ...,
+        inplace: bool = ...,
+        errors: str = ...,
+    ): ...
+    def rename(
+        self,
+        mapper: Optional[Renamer] = ...,
+        *,
+        index: Optional[Renamer] = ...,
+        columns: Optional[Renamer] = ...,
+        axis: Optional[Axis] = ...,
+        copy: bool = ...,
+        inplace: bool = ...,
+        level: Optional[Level] = ...,
+        errors: str = ...
+    ) -> Optional[DataFrameLike]: ...
+    def fillna(
+        self,
+        value: Any = ...,
+        method: Any = ...,
+        axis: Any = ...,
+        inplace: Any = ...,
+        limit: Any = ...,
+        downcast: Any = ...,
+    ) -> Optional[DataFrameLike]: ...
+    def replace(
+        self,
+        to_replace: Optional[Any] = ...,
+        value: Optional[Any] = ...,
+        inplace: bool = ...,
+        limit: Optional[Any] = ...,
+        regex: bool = ...,
+        method: str = ...,
+    ): ...
+    def shift(
+        self,
+        periods: Any = ...,
+        freq: Any = ...,
+        axis: Any = ...,
+        fill_value: Any = ...,
+    ) -> DataFrameLike: ...
+    def set_index(
+        self,
+        keys: Any,
+        drop: bool = ...,
+        append: bool = ...,
+        inplace: bool = ...,
+        verify_integrity: bool = ...,
+    ): ...
+    def reset_index(
+        self,
+        level: Optional[Union[Hashable, Sequence[Hashable]]] = ...,
+        drop: bool = ...,
+        inplace: bool = ...,
+        col_level: Hashable = ...,
+        col_fill: Optional[Hashable] = ...,
+    ) -> Optional[DataFrameLike]: ...
+    def isna(self) -> DataFrameLike: ...
+    def isnull(self) -> DataFrameLike: ...
+    def notna(self) -> DataFrameLike: ...
+    def notnull(self) -> DataFrameLike: ...
+    def dropna(
+        self,
+        axis: int = ...,
+        how: str = ...,
+        thresh: Optional[Any] = ...,
+        subset: Optional[Any] = ...,
+        inplace: bool = ...,
+    ): ...
+    def drop_duplicates(
+        self,
+        subset: Optional[Union[Hashable, Sequence[Hashable]]] = ...,
+        keep: Union[str, bool] = ...,
+        inplace: bool = ...,
+        ignore_index: bool = ...,
+    ) -> Optional[DataFrameLike]: ...
+    def duplicated(
+        self,
+        subset: Optional[Union[Hashable, Sequence[Hashable]]] = ...,
+        keep: Union[str, bool] = ...,
+    ) -> SeriesLike: ...
+    def sort_values(
+        self,
+        by: Any,
+        axis: int = ...,
+        ascending: bool = ...,
+        inplace: bool = ...,
+        kind: str = ...,
+        na_position: str = ...,
+        ignore_index: bool = ...,
+    ): ...
+    def sort_index(
+        self,
+        axis: Any = ...,
+        level: Any = ...,
+        ascending: Any = ...,
+        inplace: Any = ...,
+        kind: Any = ...,
+        na_position: Any = ...,
+        sort_remaining: Any = ...,
+        ignore_index: bool = ...,
+    ) -> Any: ...
+    def nlargest(self, n: Any, columns: Any, keep: Any = ...) -> DataFrameLike: ...
+    def nsmallest(self, n: Any, columns: Any, keep: Any = ...) -> DataFrameLike: ...
+    def swaplevel(
+        self, i: Any = ..., j: Any = ..., axis: Any = ...
+    ) -> DataFrameLike: ...
+    def reorder_levels(self, order: Any, axis: Any = ...) -> DataFrameLike: ...
+    def combine(
+        self,
+        other: DataFrameLike,
+        func: Any,
+        fill_value: Any = ...,
+        overwrite: Any = ...,
+    ) -> DataFrameLike: ...
+    def combine_first(self, other: DataFrameLike) -> DataFrameLike: ...
+    def update(
+        self,
+        other: Any,
+        join: Any = ...,
+        overwrite: Any = ...,
+        filter_func: Any = ...,
+        errors: Any = ...,
+    ) -> None: ...
+    def groupby(
+        self,
+        by: Any = ...,
+        axis: Any = ...,
+        level: Any = ...,
+        as_index: bool = ...,
+        sort: bool = ...,
+        group_keys: bool = ...,
+        squeeze: bool = ...,
+        observed: bool = ...,
+    ) -> Any: ...
+    def pivot(
+        self, index: Any = ..., columns: Any = ..., values: Any = ...
+    ) -> DataFrameLike: ...
+    def pivot_table(
+        self,
+        values: Any = ...,
+        index: Any = ...,
+        columns: Any = ...,
+        aggfunc: Any = ...,
+        fill_value: Any = ...,
+        margins: Any = ...,
+        dropna: Any = ...,
+        margins_name: Any = ...,
+        observed: Any = ...,
+    ) -> DataFrameLike: ...
+    def stack(self, level: int = ..., dropna: bool = ...): ...
+    def explode(self, column: Union[str, Tuple]) -> DataFrameLike: ...
+    def unstack(self, level: int = ..., fill_value: Optional[Any] = ...): ...
+    def melt(
+        self,
+        id_vars: Any = ...,
+        value_vars: Any = ...,
+        var_name: Any = ...,
+        value_name: Any = ...,
+        col_level: Any = ...,
+    ) -> DataFrameLike: ...
+    def diff(self, periods: Any = ..., axis: Any = ...) -> DataFrameLike: ...
+    def aggregate(self, func: Any, axis: int = ..., *args: Any, **kwargs: Any): ...
+    agg: Any = ...
+    def transform(
+        self, func: Any, axis: Any = ..., *args: Any, **kwargs: Any
+    ) -> DataFrameLike: ...
+    def apply(
+        self,
+        func: Any,
+        axis: int = ...,
+        raw: bool = ...,
+        result_type: Optional[Any] = ...,
+        args: Any = ...,
+        **kwds: Any
+    ): ...
+    def applymap(self, func: Any) -> DataFrameLike: ...
+    def append(
+        self,
+        other: Any,
+        ignore_index: Any = ...,
+        verify_integrity: Any = ...,
+        sort: Any = ...,
+    ) -> DataFrameLike: ...
+    def join(
+        self,
+        other: Any,
+        on: Any = ...,
+        how: Any = ...,
+        lsuffix: Any = ...,
+        rsuffix: Any = ...,
+        sort: Any = ...,
+    ) -> DataFrameLike: ...
+    def merge(
+        self,
+        right: Any,
+        how: Any = ...,
+        on: Any = ...,
+        left_on: Any = ...,
+        right_on: Any = ...,
+        left_index: Any = ...,
+        right_index: Any = ...,
+        sort: Any = ...,
+        suffixes: Any = ...,
+        copy: Any = ...,
+        indicator: Any = ...,
+        validate: Any = ...,
+    ) -> DataFrameLike: ...
+    def round(
+        self, decimals: Any = ..., *args: Any, **kwargs: Any
+    ) -> DataFrameLike: ...
+    def corr(self, method: Any = ..., min_periods: Any = ...) -> DataFrameLike: ...
+    def cov(self, min_periods: Any = ...) -> DataFrameLike: ...
+    def corrwith(
+        self, other: Any, axis: Any = ..., drop: Any = ..., method: Any = ...
+    ) -> SeriesLike: ...
+    def count(
+        self, axis: int = ..., level: Optional[Any] = ..., numeric_only: bool = ...
+    ): ...
+    def nunique(self, axis: Any = ..., dropna: Any = ...) -> SeriesLike: ...
+    def idxmin(self, axis: Any = ..., skipna: Any = ...) -> SeriesLike: ...
+    def idxmax(self, axis: Any = ..., skipna: Any = ...) -> SeriesLike: ...
+    def mode(
+        self, axis: Any = ..., numeric_only: Any = ..., dropna: Any = ...
+    ) -> DataFrameLike: ...
+    def quantile(
+        self,
+        q: float = ...,
+        axis: int = ...,
+        numeric_only: bool = ...,
+        interpolation: str = ...,
+    ): ...
+    def to_timestamp(
+        self, freq: Any = ..., how: Any = ..., axis: Any = ..., copy: Any = ...
+    ) -> DataFrameLike: ...
+    def to_period(
+        self, freq: Any = ..., axis: Any = ..., copy: Any = ...
+    ) -> DataFrameLike: ...
+    def isin(self, values: Any) -> DataFrameLike: ...
+    plot: Any = ...
+    hist: Any = ...
+    boxplot: Any = ...
+    sparse: Any = ...
diff --git a/python/pyspark/sql/pandas/_typing/protocols/series.pyi b/python/pyspark/sql/pandas/_typing/protocols/series.pyi
new file mode 100644
index 0000000000000..14babb067da0d
--- /dev/null
+++ b/python/pyspark/sql/pandas/_typing/protocols/series.pyi
@@ -0,0 +1,253 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This Protocol resuses core Pandas annotation.
+# Overall pipeline looks as follows
+# - Stubgen pandas.core.series
+# - Add Protocol as a base class
+# - Replace imports with Any
+
+import numpy as np  # type: ignore[import]
+from typing import Any, Callable, Hashable, IO, Optional
+from typing_extensions import Protocol
+
+groupby_generic = Any
+
+class SeriesLike(Protocol):
+    hasnans: Any = ...
+    div: Callable[[SeriesLike, Any], SeriesLike]
+    rdiv: Callable[[SeriesLike, Any], SeriesLike]
+    def __init__(
+        self,
+        data: Optional[Any] = ...,
+        index: Optional[Any] = ...,
+        dtype: Optional[Any] = ...,
+        name: Optional[Any] = ...,
+        copy: bool = ...,
+        fastpath: bool = ...,
+    ) -> None: ...
+    @property
+    def dtype(self): ...
+    @property
+    def dtypes(self): ...
+    @property
+    def name(self) -> Optional[Hashable]: ...
+    @name.setter
+    def name(self, value: Optional[Hashable]) -> None: ...
+    @property
+    def values(self): ...
+    def ravel(self, order: str = ...): ...
+    def __len__(self) -> int: ...
+    def view(self, dtype: Optional[Any] = ...): ...
+    def __array_ufunc__(
+        self, ufunc: Callable, method: str, *inputs: Any, **kwargs: Any
+    ) -> Any: ...
+    def __array__(self, dtype: Any = ...) -> np.ndarray: ...
+    __float__: Any = ...
+    __long__: Any = ...
+    __int__: Any = ...
+    @property
+    def axes(self): ...
+    def take(
+        self, indices: Any, axis: int = ..., is_copy: bool = ..., **kwargs: Any
+    ): ...
+    def __getitem__(self, key: Any): ...
+    def __setitem__(self, key: Any, value: Any) -> None: ...
+    def repeat(self, repeats: Any, axis: Optional[Any] = ...): ...
+    index: Any = ...
+    def reset_index(
+        self,
+        level: Optional[Any] = ...,
+        drop: bool = ...,
+        name: Optional[Any] = ...,
+        inplace: bool = ...,
+    ): ...
+    def to_string(
+        self,
+        buf: Optional[Any] = ...,
+        na_rep: str = ...,
+        float_format: Optional[Any] = ...,
+        header: bool = ...,
+        index: bool = ...,
+        length: bool = ...,
+        dtype: bool = ...,
+        name: bool = ...,
+        max_rows: Optional[Any] = ...,
+        min_rows: Optional[Any] = ...,
+    ): ...
+    def to_markdown(
+        self, buf: Optional[IO[str]] = ..., mode: Optional[str] = ..., **kwargs: Any
+    ) -> Optional[str]: ...
+    def items(self): ...
+    def iteritems(self): ...
+    def keys(self): ...
+    def to_dict(self, into: Any = ...): ...
+    def to_frame(self, name: Optional[Any] = ...): ...
+    def groupby(
+        self,
+        by: Any = ...,
+        axis: Any = ...,
+        level: Any = ...,
+        as_index: bool = ...,
+        sort: bool = ...,
+        group_keys: bool = ...,
+        squeeze: bool = ...,
+        observed: bool = ...,
+    ) -> Any: ...
+    def count(self, level: Optional[Any] = ...): ...
+    def mode(self, dropna: bool = ...): ...
+    def unique(self): ...
+    def drop_duplicates(self, keep: str = ..., inplace: bool = ...): ...
+    def duplicated(self, keep: str = ...): ...
+    def idxmin(
+        self, axis: int = ..., skipna: bool = ..., *args: Any, **kwargs: Any
+    ): ...
+    def idxmax(
+        self, axis: int = ..., skipna: bool = ..., *args: Any, **kwargs: Any
+    ): ...
+    def round(self, decimals: int = ..., *args: Any, **kwargs: Any): ...
+    def quantile(self, q: float = ..., interpolation: str = ...): ...
+    def corr(self, other: Any, method: str = ..., min_periods: Optional[Any] = ...): ...
+    def cov(self, other: Any, min_periods: Optional[Any] = ...): ...
+    def diff(self, periods: int = ...): ...
+    def autocorr(self, lag: int = ...): ...
+    def dot(self, other: Any): ...
+    def __matmul__(self, other: Any): ...
+    def __rmatmul__(self, other: Any): ...
+    def searchsorted(
+        self, value: Any, side: str = ..., sorter: Optional[Any] = ...
+    ): ...
+    def append(
+        self, to_append: Any, ignore_index: bool = ..., verify_integrity: bool = ...
+    ): ...
+    def combine(self, other: Any, func: Any, fill_value: Optional[Any] = ...): ...
+    def combine_first(self, other: Any): ...
+    def update(self, other: Any) -> None: ...
+    def sort_values(
+        self,
+        axis: int = ...,
+        ascending: bool = ...,
+        inplace: bool = ...,
+        kind: str = ...,
+        na_position: str = ...,
+        ignore_index: bool = ...,
+    ): ...
+    def sort_index(
+        self,
+        axis: Any = ...,
+        level: Any = ...,
+        ascending: Any = ...,
+        inplace: Any = ...,
+        kind: Any = ...,
+        na_position: Any = ...,
+        sort_remaining: Any = ...,
+        ignore_index: bool = ...,
+    ) -> Any: ...
+    def argsort(self, axis: int = ..., kind: str = ..., order: Optional[Any] = ...): ...
+    def nlargest(self, n: int = ..., keep: str = ...): ...
+    def nsmallest(self, n: int = ..., keep: str = ...): ...
+    def swaplevel(self, i: int = ..., j: int = ..., copy: bool = ...): ...
+    def reorder_levels(self, order: Any): ...
+    def explode(self) -> SeriesLike: ...
+    def unstack(self, level: int = ..., fill_value: Optional[Any] = ...): ...
+    def map(self, arg: Any, na_action: Optional[Any] = ...): ...
+    def aggregate(self, func: Any, axis: int = ..., *args: Any, **kwargs: Any): ...
+    agg: Any = ...
+    def transform(self, func: Any, axis: int = ..., *args: Any, **kwargs: Any): ...
+    def apply(
+        self, func: Any, convert_dtype: bool = ..., args: Any = ..., **kwds: Any
+    ): ...
+    def align(
+        self,
+        other: Any,
+        join: str = ...,
+        axis: Optional[Any] = ...,
+        level: Optional[Any] = ...,
+        copy: bool = ...,
+        fill_value: Optional[Any] = ...,
+        method: Optional[Any] = ...,
+        limit: Optional[Any] = ...,
+        fill_axis: int = ...,
+        broadcast_axis: Optional[Any] = ...,
+    ): ...
+    def rename(
+        self,
+        index: Optional[Any] = ...,
+        *,
+        axis: Optional[Any] = ...,
+        copy: bool = ...,
+        inplace: bool = ...,
+        level: Optional[Any] = ...,
+        errors: str = ...
+    ): ...
+    def reindex(self, index: Optional[Any] = ..., **kwargs: Any): ...
+    def drop(
+        self,
+        labels: Optional[Any] = ...,
+        axis: int = ...,
+        index: Optional[Any] = ...,
+        columns: Optional[Any] = ...,
+        level: Optional[Any] = ...,
+        inplace: bool = ...,
+        errors: str = ...,
+    ): ...
+    def fillna(
+        self,
+        value: Any = ...,
+        method: Any = ...,
+        axis: Any = ...,
+        inplace: Any = ...,
+        limit: Any = ...,
+        downcast: Any = ...,
+    ) -> Optional[SeriesLike]: ...
+    def replace(
+        self,
+        to_replace: Optional[Any] = ...,
+        value: Optional[Any] = ...,
+        inplace: bool = ...,
+        limit: Optional[Any] = ...,
+        regex: bool = ...,
+        method: str = ...,
+    ): ...
+    def shift(
+        self,
+        periods: int = ...,
+        freq: Optional[Any] = ...,
+        axis: int = ...,
+        fill_value: Optional[Any] = ...,
+    ): ...
+    def memory_usage(self, index: bool = ..., deep: bool = ...): ...
+    def isin(self, values: Any): ...
+    def between(self, left: Any, right: Any, inclusive: bool = ...): ...
+    def isna(self): ...
+    def isnull(self): ...
+    def notna(self): ...
+    def notnull(self): ...
+    def dropna(
+        self, axis: int = ..., inplace: bool = ..., how: Optional[Any] = ...
+    ): ...
+    def to_timestamp(
+        self, freq: Optional[Any] = ..., how: str = ..., copy: bool = ...
+    ): ...
+    def to_period(self, freq: Optional[Any] = ..., copy: bool = ...): ...
+    str: Any = ...
+    dt: Any = ...
+    cat: Any = ...
+    plot: Any = ...
+    sparse: Any = ...
+    hist: Any = ...
diff --git a/python/pyspark/sql/pandas/conversion.pyi b/python/pyspark/sql/pandas/conversion.pyi
new file mode 100644
index 0000000000000..031852fcc053d
--- /dev/null
+++ b/python/pyspark/sql/pandas/conversion.pyi
@@ -0,0 +1,58 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import overload
+from typing import Optional, Union
+
+from pyspark.sql.pandas._typing import DataFrameLike
+from pyspark import since as since  # noqa: F401
+from pyspark.rdd import RDD  # noqa: F401
+import pyspark.sql.dataframe
+from pyspark.sql.pandas.serializers import (  # noqa: F401
+    ArrowCollectSerializer as ArrowCollectSerializer,
+)
+from pyspark.sql.types import (  # noqa: F401
+    BooleanType as BooleanType,
+    ByteType as ByteType,
+    DataType as DataType,
+    DoubleType as DoubleType,
+    FloatType as FloatType,
+    IntegerType as IntegerType,
+    IntegralType as IntegralType,
+    LongType as LongType,
+    ShortType as ShortType,
+    StructType as StructType,
+    TimestampType as TimestampType,
+)
+from pyspark.traceback_utils import SCCallSiteSync as SCCallSiteSync  # noqa: F401
+
+class PandasConversionMixin:
+    def toPandas(self) -> DataFrameLike: ...
+
+class SparkConversionMixin:
+    @overload
+    def createDataFrame(
+        self, data: DataFrameLike, samplingRatio: Optional[float] = ...
+    ) -> pyspark.sql.dataframe.DataFrame: ...
+    @overload
+    def createDataFrame(
+        self,
+        data: DataFrameLike,
+        schema: Union[StructType, str],
+        verifySchema: bool = ...,
+    ) -> pyspark.sql.dataframe.DataFrame: ...
diff --git a/python/pyspark/sql/pandas/functions.pyi b/python/pyspark/sql/pandas/functions.pyi
new file mode 100644
index 0000000000000..09318e43f8aa1
--- /dev/null
+++ b/python/pyspark/sql/pandas/functions.pyi
@@ -0,0 +1,176 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import overload
+from typing import Union, Callable
+
+from pyspark.sql._typing import (
+    AtomicDataTypeOrString,
+    UserDefinedFunctionLike,
+)
+from pyspark.sql.pandas._typing import (
+    GroupedMapPandasUserDefinedFunction,
+    MapIterPandasUserDefinedFunction,
+    CogroupedMapPandasUserDefinedFunction,
+    PandasCogroupedMapFunction,
+    PandasCogroupedMapUDFType,
+    PandasGroupedAggFunction,
+    PandasGroupedAggUDFType,
+    PandasGroupedMapFunction,
+    PandasGroupedMapUDFType,
+    PandasMapIterFunction,
+    PandasMapIterUDFType,
+    PandasScalarIterFunction,
+    PandasScalarIterUDFType,
+    PandasScalarToScalarFunction,
+    PandasScalarToStructFunction,
+    PandasScalarUDFType,
+)
+
+from pyspark import since as since  # noqa: F401
+from pyspark.rdd import PythonEvalType as PythonEvalType  # noqa: F401
+from pyspark.sql.types import ArrayType, StructType
+
+class PandasUDFType:
+    SCALAR: PandasScalarUDFType
+    SCALAR_ITER: PandasScalarIterUDFType
+    GROUPED_MAP: PandasGroupedMapUDFType
+    GROUPED_AGG: PandasGroupedAggUDFType
+
+@overload
+def pandas_udf(
+    f: PandasScalarToScalarFunction,
+    returnType: Union[AtomicDataTypeOrString, ArrayType],
+    functionType: PandasScalarUDFType,
+) -> UserDefinedFunctionLike: ...
+@overload
+def pandas_udf(f: Union[AtomicDataTypeOrString, ArrayType], returnType: PandasScalarUDFType) -> Callable[[PandasScalarToScalarFunction], UserDefinedFunctionLike]: ...  # type: ignore[misc]
+@overload
+def pandas_udf(f: Union[AtomicDataTypeOrString, ArrayType], *, functionType: PandasScalarUDFType) -> Callable[[PandasScalarToScalarFunction], UserDefinedFunctionLike]: ...  # type: ignore[misc]
+@overload
+def pandas_udf(*, returnType: Union[AtomicDataTypeOrString, ArrayType], functionType: PandasScalarUDFType) -> Callable[[PandasScalarToScalarFunction], UserDefinedFunctionLike]: ...  # type: ignore[misc]
+@overload
+def pandas_udf(
+    f: PandasScalarToStructFunction,
+    returnType: Union[StructType, str],
+    functionType: PandasScalarUDFType,
+) -> UserDefinedFunctionLike: ...
+@overload
+def pandas_udf(f: Union[StructType, str], returnType: PandasScalarUDFType) -> Callable[[PandasScalarToStructFunction], UserDefinedFunctionLike]: ...  # type: ignore[misc]
+@overload
+def pandas_udf(f: Union[StructType, str], *, functionType: PandasScalarUDFType) -> Callable[[PandasScalarToStructFunction], UserDefinedFunctionLike]: ...  # type: ignore[misc]
+@overload
+def pandas_udf(*, returnType: Union[StructType, str], functionType: PandasScalarUDFType) -> Callable[[PandasScalarToStructFunction], UserDefinedFunctionLike]: ...  # type: ignore[misc]
+@overload
+def pandas_udf(
+    f: PandasScalarIterFunction,
+    returnType: Union[AtomicDataTypeOrString, ArrayType],
+    functionType: PandasScalarIterUDFType,
+) -> UserDefinedFunctionLike: ...
+@overload
+def pandas_udf(
+    f: Union[AtomicDataTypeOrString, ArrayType], returnType: PandasScalarIterUDFType
+) -> Callable[[PandasScalarIterFunction], UserDefinedFunctionLike]: ...
+@overload
+def pandas_udf(
+    *,
+    returnType: Union[AtomicDataTypeOrString, ArrayType],
+    functionType: PandasScalarIterUDFType
+) -> Callable[[PandasScalarIterFunction], UserDefinedFunctionLike]: ...
+@overload
+def pandas_udf(
+    f: Union[AtomicDataTypeOrString, ArrayType],
+    *,
+    functionType: PandasScalarIterUDFType
+) -> Callable[[PandasScalarIterFunction], UserDefinedFunctionLike]: ...
+@overload
+def pandas_udf(
+    f: PandasGroupedMapFunction,
+    returnType: Union[StructType, str],
+    functionType: PandasGroupedMapUDFType,
+) -> GroupedMapPandasUserDefinedFunction: ...
+@overload
+def pandas_udf(
+    f: Union[StructType, str], returnType: PandasGroupedMapUDFType
+) -> Callable[[PandasGroupedMapFunction], GroupedMapPandasUserDefinedFunction]: ...
+@overload
+def pandas_udf(
+    *, returnType: Union[StructType, str], functionType: PandasGroupedMapUDFType
+) -> Callable[[PandasGroupedMapFunction], GroupedMapPandasUserDefinedFunction]: ...
+@overload
+def pandas_udf(
+    f: Union[StructType, str], *, functionType: PandasGroupedMapUDFType
+) -> Callable[[PandasGroupedMapFunction], GroupedMapPandasUserDefinedFunction]: ...
+@overload
+def pandas_udf(
+    f: PandasGroupedAggFunction,
+    returnType: Union[AtomicDataTypeOrString, ArrayType],
+    functionType: PandasGroupedAggUDFType,
+) -> UserDefinedFunctionLike: ...
+@overload
+def pandas_udf(
+    f: Union[AtomicDataTypeOrString, ArrayType], returnType: PandasGroupedAggUDFType
+) -> Callable[[PandasGroupedAggFunction], UserDefinedFunctionLike]: ...
+@overload
+def pandas_udf(
+    *,
+    returnType: Union[AtomicDataTypeOrString, ArrayType],
+    functionType: PandasGroupedAggUDFType
+) -> Callable[[PandasGroupedAggFunction], UserDefinedFunctionLike]: ...
+@overload
+def pandas_udf(
+    f: Union[AtomicDataTypeOrString, ArrayType],
+    *,
+    functionType: PandasGroupedAggUDFType
+) -> Callable[[PandasGroupedAggFunction], UserDefinedFunctionLike]: ...
+@overload
+def pandas_udf(
+    f: PandasMapIterFunction,
+    returnType: Union[StructType, str],
+    functionType: PandasMapIterUDFType,
+) -> MapIterPandasUserDefinedFunction: ...
+@overload
+def pandas_udf(
+    f: Union[StructType, str], returnType: PandasMapIterUDFType
+) -> Callable[[PandasMapIterFunction], MapIterPandasUserDefinedFunction]: ...
+@overload
+def pandas_udf(
+    *, returnType: Union[StructType, str], functionType: PandasMapIterUDFType
+) -> Callable[[PandasMapIterFunction], MapIterPandasUserDefinedFunction]: ...
+@overload
+def pandas_udf(
+    f: Union[StructType, str], *, functionType: PandasMapIterUDFType
+) -> Callable[[PandasMapIterFunction], MapIterPandasUserDefinedFunction]: ...
+@overload
+def pandas_udf(
+    f: PandasCogroupedMapFunction,
+    returnType: Union[StructType, str],
+    functionType: PandasCogroupedMapUDFType,
+) -> CogroupedMapPandasUserDefinedFunction: ...
+@overload
+def pandas_udf(
+    f: Union[StructType, str], returnType: PandasCogroupedMapUDFType
+) -> Callable[[PandasCogroupedMapFunction], CogroupedMapPandasUserDefinedFunction]: ...
+@overload
+def pandas_udf(
+    *, returnType: Union[StructType, str], functionType: PandasCogroupedMapUDFType
+) -> Callable[[PandasCogroupedMapFunction], CogroupedMapPandasUserDefinedFunction]: ...
+@overload
+def pandas_udf(
+    f: Union[StructType, str], *, functionType: PandasCogroupedMapUDFType
+) -> Callable[[PandasCogroupedMapFunction], CogroupedMapPandasUserDefinedFunction]: ...
diff --git a/python/pyspark/sql/pandas/group_ops.pyi b/python/pyspark/sql/pandas/group_ops.pyi
new file mode 100644
index 0000000000000..2c543e0dc77b9
--- /dev/null
+++ b/python/pyspark/sql/pandas/group_ops.pyi
@@ -0,0 +1,49 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Union
+
+from pyspark.sql.pandas._typing import (
+    GroupedMapPandasUserDefinedFunction,
+    PandasGroupedMapFunction,
+    PandasCogroupedMapFunction,
+)
+
+from pyspark import since as since  # noqa: F401
+from pyspark.rdd import PythonEvalType as PythonEvalType  # noqa: F401
+from pyspark.sql.column import Column as Column  # noqa: F401
+from pyspark.sql.context import SQLContext
+import pyspark.sql.group
+from pyspark.sql.dataframe import DataFrame as DataFrame
+from pyspark.sql.types import StructType
+
+class PandasGroupedOpsMixin:
+    def cogroup(self, other: pyspark.sql.group.GroupedData) -> PandasCogroupedOps: ...
+    def apply(self, udf: GroupedMapPandasUserDefinedFunction) -> DataFrame: ...
+    def applyInPandas(
+        self, func: PandasGroupedMapFunction, schema: Union[StructType, str]
+    ) -> DataFrame: ...
+
+class PandasCogroupedOps:
+    sql_ctx: SQLContext
+    def __init__(
+        self, gd1: pyspark.sql.group.GroupedData, gd2: pyspark.sql.group.GroupedData
+    ) -> None: ...
+    def applyInPandas(
+        self, func: PandasCogroupedMapFunction, schema: Union[StructType, str]
+    ) -> DataFrame: ...
diff --git a/python/pyspark/sql/pandas/map_ops.pyi b/python/pyspark/sql/pandas/map_ops.pyi
new file mode 100644
index 0000000000000..cab885278c388
--- /dev/null
+++ b/python/pyspark/sql/pandas/map_ops.pyi
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Union
+
+from pyspark.sql.pandas._typing import PandasMapIterFunction
+from pyspark import since as since  # noqa: F401
+from pyspark.rdd import PythonEvalType as PythonEvalType  # noqa: F401
+from pyspark.sql.types import StructType
+import pyspark.sql.dataframe
+
+class PandasMapOpsMixin:
+    def mapInPandas(
+        self, udf: PandasMapIterFunction, schema: Union[StructType, str]
+    ) -> pyspark.sql.dataframe.DataFrame: ...
diff --git a/python/pyspark/sql/pandas/serializers.pyi b/python/pyspark/sql/pandas/serializers.pyi
new file mode 100644
index 0000000000000..8be3c0dcbc9ad
--- /dev/null
+++ b/python/pyspark/sql/pandas/serializers.pyi
@@ -0,0 +1,65 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyspark.serializers import (  # noqa: F401
+    Serializer as Serializer,
+    UTF8Deserializer as UTF8Deserializer,
+    read_int as read_int,
+    write_int as write_int,
+)
+from typing import Any
+
+class SpecialLengths:
+    END_OF_DATA_SECTION: int = ...
+    PYTHON_EXCEPTION_THROWN: int = ...
+    TIMING_DATA: int = ...
+    END_OF_STREAM: int = ...
+    NULL: int = ...
+    START_ARROW_STREAM: int = ...
+
+class ArrowCollectSerializer(Serializer):
+    serializer: Any = ...
+    def __init__(self) -> None: ...
+    def dump_stream(self, iterator: Any, stream: Any): ...
+    def load_stream(self, stream: Any) -> None: ...
+
+class ArrowStreamSerializer(Serializer):
+    def dump_stream(self, iterator: Any, stream: Any) -> None: ...
+    def load_stream(self, stream: Any) -> None: ...
+
+class ArrowStreamPandasSerializer(ArrowStreamSerializer):
+    def __init__(
+        self, timezone: Any, safecheck: Any, assign_cols_by_name: Any
+    ) -> None: ...
+    def arrow_to_pandas(self, arrow_column: Any): ...
+    def dump_stream(self, iterator: Any, stream: Any) -> None: ...
+    def load_stream(self, stream: Any) -> None: ...
+
+class ArrowStreamPandasUDFSerializer(ArrowStreamPandasSerializer):
+    def __init__(
+        self,
+        timezone: Any,
+        safecheck: Any,
+        assign_cols_by_name: Any,
+        df_for_struct: bool = ...,
+    ) -> None: ...
+    def arrow_to_pandas(self, arrow_column: Any): ...
+    def dump_stream(self, iterator: Any, stream: Any): ...
+
+class CogroupUDFSerializer(ArrowStreamPandasUDFSerializer):
+    def load_stream(self, stream: Any) -> None: ...
diff --git a/python/pyspark/sql/pandas/typehints.pyi b/python/pyspark/sql/pandas/typehints.pyi
new file mode 100644
index 0000000000000..eea9c86225332
--- /dev/null
+++ b/python/pyspark/sql/pandas/typehints.pyi
@@ -0,0 +1,33 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyspark.sql.pandas.utils import (  # noqa: F401
+    require_minimum_pandas_version as require_minimum_pandas_version,
+)
+from typing import Any, Optional
+
+def infer_eval_type(sig: Any): ...
+def check_tuple_annotation(
+    annotation: Any, parameter_check_func: Optional[Any] = ...
+): ...
+def check_iterator_annotation(
+    annotation: Any, parameter_check_func: Optional[Any] = ...
+): ...
+def check_union_annotation(
+    annotation: Any, parameter_check_func: Optional[Any] = ...
+): ...
diff --git a/python/pyspark/sql/pandas/types.pyi b/python/pyspark/sql/pandas/types.pyi
new file mode 100644
index 0000000000000..5ae29bd273180
--- /dev/null
+++ b/python/pyspark/sql/pandas/types.pyi
@@ -0,0 +1,41 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyspark.sql.types import (  # noqa: F401
+    ArrayType as ArrayType,
+    BinaryType as BinaryType,
+    BooleanType as BooleanType,
+    ByteType as ByteType,
+    DateType as DateType,
+    DecimalType as DecimalType,
+    DoubleType as DoubleType,
+    FloatType as FloatType,
+    IntegerType as IntegerType,
+    LongType as LongType,
+    ShortType as ShortType,
+    StringType as StringType,
+    StructField as StructField,
+    StructType as StructType,
+    TimestampType as TimestampType,
+)
+from typing import Any
+
+def to_arrow_type(dt: Any): ...
+def to_arrow_schema(schema: Any): ...
+def from_arrow_type(at: Any): ...
+def from_arrow_schema(arrow_schema: Any): ...
diff --git a/python/pyspark/sql/pandas/utils.pyi b/python/pyspark/sql/pandas/utils.pyi
new file mode 100644
index 0000000000000..e4d315b0ce205
--- /dev/null
+++ b/python/pyspark/sql/pandas/utils.pyi
@@ -0,0 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+def require_minimum_pandas_version() -> None: ...
+def require_minimum_pyarrow_version() -> None: ...
diff --git a/python/pyspark/sql/readwriter.pyi b/python/pyspark/sql/readwriter.pyi
new file mode 100644
index 0000000000000..a111cbe416c2f
--- /dev/null
+++ b/python/pyspark/sql/readwriter.pyi
@@ -0,0 +1,250 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import overload
+from typing import Dict, List, Optional, Tuple, Union
+
+from pyspark.sql._typing import OptionalPrimitiveType
+from pyspark.sql.dataframe import DataFrame
+from pyspark.rdd import RDD
+from pyspark.sql.column import Column
+from pyspark.sql.context import SQLContext
+from pyspark.sql.types import StructType
+
+PathOrPaths = Union[str, List[str]]
+TupleOrListOfString = Union[List[str], Tuple[str, ...]]
+
+class OptionUtils: ...
+
+class DataFrameReader(OptionUtils):
+    def __init__(self, spark: SQLContext) -> None: ...
+    def format(self, source: str) -> DataFrameReader: ...
+    def schema(self, schema: Union[StructType, str]) -> DataFrameReader: ...
+    def option(self, key: str, value: OptionalPrimitiveType) -> DataFrameReader: ...
+    def options(self, **options: OptionalPrimitiveType) -> DataFrameReader: ...
+    def load(
+        self,
+        path: Optional[PathOrPaths] = ...,
+        format: Optional[str] = ...,
+        schema: Optional[StructType] = ...,
+        **options: OptionalPrimitiveType
+    ) -> DataFrame: ...
+    def json(
+        self,
+        path: Union[str, List[str], RDD[str]],
+        schema: Optional[Union[StructType, str]] = ...,
+        primitivesAsString: Optional[Union[bool, str]] = ...,
+        prefersDecimal: Optional[Union[bool, str]] = ...,
+        allowComments: Optional[Union[bool, str]] = ...,
+        allowUnquotedFieldNames: Optional[Union[bool, str]] = ...,
+        allowSingleQuotes: Optional[Union[bool, str]] = ...,
+        allowNumericLeadingZero: Optional[Union[bool, str]] = ...,
+        allowBackslashEscapingAnyCharacter: Optional[Union[bool, str]] = ...,
+        mode: Optional[str] = ...,
+        columnNameOfCorruptRecord: Optional[str] = ...,
+        dateFormat: Optional[str] = ...,
+        timestampFormat: Optional[str] = ...,
+        multiLine: Optional[Union[bool, str]] = ...,
+        allowUnquotedControlChars: Optional[Union[bool, str]] = ...,
+        lineSep: Optional[str] = ...,
+        samplingRatio: Optional[Union[float, str]] = ...,
+        dropFieldIfAllNull: Optional[Union[bool, str]] = ...,
+        encoding: Optional[str] = ...,
+        locale: Optional[str] = ...,
+        recursiveFileLookup: Optional[bool] = ...,
+    ) -> DataFrame: ...
+    def table(self, tableName: str) -> DataFrame: ...
+    def parquet(self, *paths: str, **options: OptionalPrimitiveType) -> DataFrame: ...
+    def text(
+        self,
+        paths: PathOrPaths,
+        wholetext: bool = ...,
+        lineSep: Optional[str] = ...,
+        recursiveFileLookup: Optional[bool] = ...,
+    ) -> DataFrame: ...
+    def csv(
+        self,
+        path: PathOrPaths,
+        schema: Optional[Union[StructType, str]] = ...,
+        sep: Optional[str] = ...,
+        encoding: Optional[str] = ...,
+        quote: Optional[str] = ...,
+        escape: Optional[str] = ...,
+        comment: Optional[str] = ...,
+        header: Optional[Union[bool, str]] = ...,
+        inferSchema: Optional[Union[bool, str]] = ...,
+        ignoreLeadingWhiteSpace: Optional[Union[bool, str]] = ...,
+        ignoreTrailingWhiteSpace: Optional[Union[bool, str]] = ...,
+        nullValue: Optional[str] = ...,
+        nanValue: Optional[str] = ...,
+        positiveInf: Optional[str] = ...,
+        negativeInf: Optional[str] = ...,
+        dateFormat: Optional[str] = ...,
+        timestampFormat: Optional[str] = ...,
+        maxColumns: Optional[int] = ...,
+        maxCharsPerColumn: Optional[int] = ...,
+        maxMalformedLogPerPartition: Optional[int] = ...,
+        mode: Optional[str] = ...,
+        columnNameOfCorruptRecord: Optional[str] = ...,
+        multiLine: Optional[Union[bool, str]] = ...,
+        charToEscapeQuoteEscaping: Optional[str] = ...,
+        samplingRatio: Optional[Union[float, str]] = ...,
+        enforceSchema: Optional[Union[bool, str]] = ...,
+        emptyValue: Optional[str] = ...,
+        locale: Optional[str] = ...,
+        lineSep: Optional[str] = ...,
+    ) -> DataFrame: ...
+    def orc(
+        self,
+        path: PathOrPaths,
+        mergeSchema: Optional[bool] = ...,
+        recursiveFileLookup: Optional[bool] = ...,
+    ) -> DataFrame: ...
+    @overload
+    def jdbc(
+        self, url: str, table: str, *, properties: Optional[Dict[str, str]] = ...
+    ) -> DataFrame: ...
+    @overload
+    def jdbc(
+        self,
+        url: str,
+        table: str,
+        column: str,
+        lowerBound: int,
+        upperBound: int,
+        numPartitions: int,
+        *,
+        properties: Optional[Dict[str, str]] = ...
+    ) -> DataFrame: ...
+    @overload
+    def jdbc(
+        self,
+        url: str,
+        table: str,
+        *,
+        predicates: List[str],
+        properties: Optional[Dict[str, str]] = ...
+    ) -> DataFrame: ...
+
+class DataFrameWriter(OptionUtils):
+    def __init__(self, df: DataFrame) -> None: ...
+    def mode(self, saveMode: str) -> DataFrameWriter: ...
+    def format(self, source: str) -> DataFrameWriter: ...
+    def option(self, key: str, value: OptionalPrimitiveType) -> DataFrameWriter: ...
+    def options(self, **options: OptionalPrimitiveType) -> DataFrameWriter: ...
+    @overload
+    def partitionBy(self, *cols: str) -> DataFrameWriter: ...
+    @overload
+    def partitionBy(self, __cols: List[str]) -> DataFrameWriter: ...
+    @overload
+    def bucketBy(self, numBuckets: int, col: str, *cols: str) -> DataFrameWriter: ...
+    @overload
+    def bucketBy(
+        self, numBuckets: int, col: TupleOrListOfString
+    ) -> DataFrameWriter: ...
+    @overload
+    def sortBy(self, col: str, *cols: str) -> DataFrameWriter: ...
+    @overload
+    def sortBy(self, col: TupleOrListOfString) -> DataFrameWriter: ...
+    def save(
+        self,
+        path: Optional[str] = ...,
+        format: Optional[str] = ...,
+        mode: Optional[str] = ...,
+        partitionBy: Optional[List[str]] = ...,
+        **options: OptionalPrimitiveType
+    ) -> None: ...
+    def insertInto(self, tableName: str, overwrite: Optional[bool] = ...) -> None: ...
+    def saveAsTable(
+        self,
+        name: str,
+        format: Optional[str] = ...,
+        mode: Optional[str] = ...,
+        partitionBy: Optional[List[str]] = ...,
+        **options: OptionalPrimitiveType
+    ) -> None: ...
+    def json(
+        self,
+        path: str,
+        mode: Optional[str] = ...,
+        compression: Optional[str] = ...,
+        dateFormat: Optional[str] = ...,
+        timestampFormat: Optional[str] = ...,
+        lineSep: Optional[str] = ...,
+        encoding: Optional[str] = ...,
+        ignoreNullFields: Optional[bool] = ...,
+    ) -> None: ...
+    def parquet(
+        self,
+        path: str,
+        mode: Optional[str] = ...,
+        partitionBy: Optional[List[str]] = ...,
+        compression: Optional[str] = ...,
+    ) -> None: ...
+    def text(
+        self, path: str, compression: Optional[str] = ..., lineSep: Optional[str] = ...
+    ) -> None: ...
+    def csv(
+        self,
+        path: str,
+        mode: Optional[str] = ...,
+        compression: Optional[str] = ...,
+        sep: Optional[str] = ...,
+        quote: Optional[str] = ...,
+        escape: Optional[str] = ...,
+        header: Optional[Union[bool, str]] = ...,
+        nullValue: Optional[str] = ...,
+        escapeQuotes: Optional[Union[bool, str]] = ...,
+        quoteAll: Optional[Union[bool, str]] = ...,
+        dateFormat: Optional[str] = ...,
+        timestampFormat: Optional[str] = ...,
+        ignoreLeadingWhiteSpace: Optional[Union[bool, str]] = ...,
+        ignoreTrailingWhiteSpace: Optional[Union[bool, str]] = ...,
+        charToEscapeQuoteEscaping: Optional[str] = ...,
+        encoding: Optional[str] = ...,
+        emptyValue: Optional[str] = ...,
+        lineSep: Optional[str] = ...,
+    ) -> None: ...
+    def orc(
+        self,
+        path: str,
+        mode: Optional[str] = ...,
+        partitionBy: Optional[List[str]] = ...,
+        compression: Optional[str] = ...,
+    ) -> None: ...
+    def jdbc(
+        self,
+        url: str,
+        table: str,
+        mode: Optional[str] = ...,
+        properties: Optional[Dict[str, str]] = ...,
+    ) -> None: ...
+
+class DataFrameWriterV2:
+    def __init__(self, df: DataFrame, table: str) -> None: ...
+    def using(self, provider: str) -> DataFrameWriterV2: ...
+    def option(self, key: str, value: OptionalPrimitiveType) -> DataFrameWriterV2: ...
+    def options(self, **options: OptionalPrimitiveType) -> DataFrameWriterV2: ...
+    def tableProperty(self, property: str, value: str) -> DataFrameWriterV2: ...
+    def partitionedBy(self, col: Column, *cols: Column) -> DataFrameWriterV2: ...
+    def create(self) -> None: ...
+    def replace(self) -> None: ...
+    def createOrReplace(self) -> None: ...
+    def append(self) -> None: ...
+    def overwrite(self, condition: Column) -> None: ...
+    def overwritePartitions(self) -> None: ...
diff --git a/python/pyspark/sql/session.pyi b/python/pyspark/sql/session.pyi
new file mode 100644
index 0000000000000..17ba8894c1731
--- /dev/null
+++ b/python/pyspark/sql/session.pyi
@@ -0,0 +1,125 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import overload
+from typing import Any, Iterable, List, Optional, Tuple, TypeVar, Union
+
+from py4j.java_gateway import JavaObject  # type: ignore[import]
+
+from pyspark.sql._typing import DateTimeLiteral, LiteralType, DecimalLiteral, RowLike
+from pyspark.sql.pandas._typing import DataFrameLike
+from pyspark.conf import SparkConf
+from pyspark.context import SparkContext
+from pyspark.rdd import RDD
+from pyspark.sql.catalog import Catalog
+from pyspark.sql.conf import RuntimeConfig
+from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.pandas.conversion import SparkConversionMixin
+from pyspark.sql.types import AtomicType, StructType
+from pyspark.sql.readwriter import DataFrameReader
+from pyspark.sql.streaming import DataStreamReader, StreamingQueryManager
+from pyspark.sql.udf import UDFRegistration
+
+T = TypeVar("T")
+
+class SparkSession(SparkConversionMixin):
+    class Builder:
+        @overload
+        def config(self, *, conf: SparkConf) -> SparkSession.Builder: ...
+        @overload
+        def config(self, key: str, value: Any) -> SparkSession.Builder: ...
+        def master(self, master: str) -> SparkSession.Builder: ...
+        def appName(self, name: str) -> SparkSession.Builder: ...
+        def enableHiveSupport(self) -> SparkSession.Builder: ...
+        def getOrCreate(self) -> SparkSession: ...
+    builder: SparkSession.Builder
+    def __init__(
+        self, sparkContext: SparkContext, jsparkSession: Optional[JavaObject] = ...
+    ) -> None: ...
+    def newSession(self) -> SparkSession: ...
+    @classmethod
+    def getActiveSession(cls) -> SparkSession: ...
+    @property
+    def sparkContext(self) -> SparkContext: ...
+    @property
+    def version(self) -> str: ...
+    @property
+    def conf(self) -> RuntimeConfig: ...
+    @property
+    def catalog(self) -> Catalog: ...
+    @property
+    def udf(self) -> UDFRegistration: ...
+    def range(
+        self,
+        start: int,
+        end: Optional[int] = ...,
+        step: int = ...,
+        numPartitions: Optional[int] = ...,
+    ) -> DataFrame: ...
+    @overload
+    def createDataFrame(
+        self,
+        data: Union[RDD[RowLike], Iterable[RowLike]],
+        samplingRatio: Optional[float] = ...,
+    ) -> DataFrame: ...
+    @overload
+    def createDataFrame(
+        self,
+        data: Union[RDD[RowLike], Iterable[RowLike]],
+        schema: Union[List[str], Tuple[str, ...]] = ...,
+        verifySchema: bool = ...,
+    ) -> DataFrame: ...
+    @overload
+    def createDataFrame(
+        self,
+        data: Union[
+            RDD[Union[DateTimeLiteral, LiteralType, DecimalLiteral]],
+            Iterable[Union[DateTimeLiteral, LiteralType, DecimalLiteral]],
+        ],
+        schema: Union[AtomicType, str],
+        verifySchema: bool = ...,
+    ) -> DataFrame: ...
+    @overload
+    def createDataFrame(
+        self,
+        data: Union[RDD[RowLike], Iterable[RowLike]],
+        schema: Union[StructType, str],
+        verifySchema: bool = ...,
+    ) -> DataFrame: ...
+    @overload
+    def createDataFrame(
+        self, data: DataFrameLike, samplingRatio: Optional[float] = ...
+    ) -> DataFrame: ...
+    @overload
+    def createDataFrame(
+        self,
+        data: DataFrameLike,
+        schema: Union[StructType, str],
+        verifySchema: bool = ...,
+    ) -> DataFrame: ...
+    def sql(self, sqlQuery: str) -> DataFrame: ...
+    def table(self, tableName: str) -> DataFrame: ...
+    @property
+    def read(self) -> DataFrameReader: ...
+    @property
+    def readStream(self) -> DataStreamReader: ...
+    @property
+    def streams(self) -> StreamingQueryManager: ...
+    def stop(self) -> None: ...
+    def __enter__(self) -> SparkSession: ...
+    def __exit__(self, exc_type, exc_val, exc_tb) -> None: ...
diff --git a/python/pyspark/sql/streaming.pyi b/python/pyspark/sql/streaming.pyi
new file mode 100644
index 0000000000000..22055b2efc06b
--- /dev/null
+++ b/python/pyspark/sql/streaming.pyi
@@ -0,0 +1,179 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import overload
+from typing import Any, Callable, Dict, List, Optional, Union
+
+from pyspark.sql._typing import SupportsProcess, OptionalPrimitiveType
+from pyspark.sql.context import SQLContext
+from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.readwriter import OptionUtils
+from pyspark.sql.types import Row, StructType
+from pyspark.sql.utils import StreamingQueryException
+
+from py4j.java_gateway import JavaObject  # type: ignore[import]
+
+class StreamingQuery:
+    def __init__(self, jsq: JavaObject) -> None: ...
+    @property
+    def id(self) -> str: ...
+    @property
+    def runId(self) -> str: ...
+    @property
+    def name(self) -> str: ...
+    @property
+    def isActive(self) -> bool: ...
+    def awaitTermination(self, timeout: Optional[int] = ...) -> Optional[bool]: ...
+    @property
+    def status(self) -> Dict[str, Any]: ...
+    @property
+    def recentProgress(self) -> List[Dict[str, Any]]: ...
+    @property
+    def lastProgress(self) -> Optional[Dict[str, Any]]: ...
+    def processAllAvailable(self) -> None: ...
+    def stop(self) -> None: ...
+    def explain(self, extended: bool = ...) -> None: ...
+    def exception(self) -> Optional[StreamingQueryException]: ...
+
+class StreamingQueryManager:
+    def __init__(self, jsqm: JavaObject) -> None: ...
+    @property
+    def active(self) -> List[StreamingQuery]: ...
+    def get(self, id: str) -> StreamingQuery: ...
+    def awaitAnyTermination(self, timeout: Optional[int] = ...) -> bool: ...
+    def resetTerminated(self) -> None: ...
+
+class DataStreamReader(OptionUtils):
+    def __init__(self, spark: SQLContext) -> None: ...
+    def format(self, source: str) -> DataStreamReader: ...
+    def schema(self, schema: Union[StructType, str]) -> DataStreamReader: ...
+    def option(self, key: str, value: OptionalPrimitiveType) -> DataStreamReader: ...
+    def options(self, **options: OptionalPrimitiveType) -> DataStreamReader: ...
+    def load(
+        self,
+        path: Optional[str] = ...,
+        format: Optional[str] = ...,
+        schema: Optional[StructType] = ...,
+        **options: OptionalPrimitiveType
+    ) -> DataFrame: ...
+    def json(
+        self,
+        path: str,
+        schema: Optional[Union[StructType, str]] = ...,
+        primitivesAsString: Optional[Union[bool, str]] = ...,
+        prefersDecimal: Optional[Union[bool, str]] = ...,
+        allowComments: Optional[Union[bool, str]] = ...,
+        allowUnquotedFieldNames: Optional[Union[bool, str]] = ...,
+        allowSingleQuotes: Optional[Union[bool, str]] = ...,
+        allowNumericLeadingZero: Optional[Union[bool, str]] = ...,
+        allowBackslashEscapingAnyCharacter: Optional[Union[bool, str]] = ...,
+        mode: Optional[str] = ...,
+        columnNameOfCorruptRecord: Optional[str] = ...,
+        dateFormat: Optional[str] = ...,
+        timestampFormat: Optional[str] = ...,
+        multiLine: Optional[Union[bool, str]] = ...,
+        allowUnquotedControlChars: Optional[Union[bool, str]] = ...,
+        lineSep: Optional[str] = ...,
+        locale: Optional[str] = ...,
+        dropFieldIfAllNull: Optional[Union[bool, str]] = ...,
+        encoding: Optional[str] = ...,
+        recursiveFileLookup: Optional[bool] = ...,
+    ) -> DataFrame: ...
+    def orc(
+        self,
+        path: str,
+        mergeSchema: Optional[bool] = ...,
+        recursiveFileLookup: Optional[bool] = ...,
+    ) -> DataFrame: ...
+    def parquet(
+        self,
+        path: str,
+        mergeSchema: Optional[bool] = ...,
+        recursiveFileLookup: Optional[bool] = ...,
+    ) -> DataFrame: ...
+    def text(
+        self,
+        path: str,
+        wholetext: bool = ...,
+        lineSep: Optional[str] = ...,
+        recursiveFileLookup: Optional[bool] = ...,
+    ) -> DataFrame: ...
+    def csv(
+        self,
+        path: str,
+        schema: Optional[Union[StructType, str]] = ...,
+        sep: Optional[str] = ...,
+        encoding: Optional[str] = ...,
+        quote: Optional[str] = ...,
+        escape: Optional[str] = ...,
+        comment: Optional[str] = ...,
+        header: Optional[Union[bool, str]] = ...,
+        inferSchema: Optional[Union[bool, str]] = ...,
+        ignoreLeadingWhiteSpace: Optional[Union[bool, str]] = ...,
+        ignoreTrailingWhiteSpace: Optional[Union[bool, str]] = ...,
+        nullValue: Optional[str] = ...,
+        nanValue: Optional[str] = ...,
+        positiveInf: Optional[str] = ...,
+        negativeInf: Optional[str] = ...,
+        dateFormat: Optional[str] = ...,
+        timestampFormat: Optional[str] = ...,
+        maxColumns: Optional[Union[int, str]] = ...,
+        maxCharsPerColumn: Optional[Union[int, str]] = ...,
+        mode: Optional[str] = ...,
+        columnNameOfCorruptRecord: Optional[str] = ...,
+        multiLine: Optional[Union[bool, str]] = ...,
+        charToEscapeQuoteEscaping: Optional[Union[bool, str]] = ...,
+        enforceSchema: Optional[Union[bool, str]] = ...,
+        emptyValue: Optional[str] = ...,
+        locale: Optional[str] = ...,
+        lineSep: Optional[str] = ...,
+    ) -> DataFrame: ...
+
+class DataStreamWriter:
+    def __init__(self, df: DataFrame) -> None: ...
+    def outputMode(self, outputMode: str) -> DataStreamWriter: ...
+    def format(self, source: str) -> DataStreamWriter: ...
+    def option(self, key: str, value: OptionalPrimitiveType) -> DataStreamWriter: ...
+    def options(self, **options: OptionalPrimitiveType) -> DataStreamWriter: ...
+    @overload
+    def partitionBy(self, *cols: str) -> DataStreamWriter: ...
+    @overload
+    def partitionBy(self, __cols: List[str]) -> DataStreamWriter: ...
+    def queryName(self, queryName: str) -> DataStreamWriter: ...
+    @overload
+    def trigger(self, processingTime: str) -> DataStreamWriter: ...
+    @overload
+    def trigger(self, once: bool) -> DataStreamWriter: ...
+    @overload
+    def trigger(self, continuous: bool) -> DataStreamWriter: ...
+    def start(
+        self,
+        path: Optional[str] = ...,
+        format: Optional[str] = ...,
+        outputMode: Optional[str] = ...,
+        partitionBy: Optional[Union[str, List[str]]] = ...,
+        queryName: Optional[str] = ...,
+        **options: OptionalPrimitiveType
+    ) -> StreamingQuery: ...
+    @overload
+    def foreach(self, f: Callable[[Row], None]) -> DataStreamWriter: ...
+    @overload
+    def foreach(self, f: SupportsProcess) -> DataStreamWriter: ...
+    def foreachBatch(
+        self, func: Callable[[DataFrame, int], None]
+    ) -> DataStreamWriter: ...
diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py
index fb4f619c8bf63..c6497923d84fb 100644
--- a/python/pyspark/sql/tests/test_arrow.py
+++ b/python/pyspark/sql/tests/test_arrow.py
@@ -42,7 +42,7 @@
 
 @unittest.skipIf(
     not have_pandas or not have_pyarrow,
-    pandas_requirement_message or pyarrow_requirement_message)
+    pandas_requirement_message or pyarrow_requirement_message)  # type: ignore
 class ArrowTests(ReusedSQLTestCase):
 
     @classmethod
@@ -465,7 +465,7 @@ def test_createDataFrame_empty_partition(self):
 
 @unittest.skipIf(
     not have_pandas or not have_pyarrow,
-    pandas_requirement_message or pyarrow_requirement_message)
+    pandas_requirement_message or pyarrow_requirement_message)  # type: ignore
 class MaxResultArrowTests(unittest.TestCase):
     # These tests are separate as 'spark.driver.maxResultSize' configuration
     # is a static configuration to Spark context.
@@ -500,7 +500,7 @@ def conf(cls):
     from pyspark.sql.tests.test_arrow import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/sql/tests/test_catalog.py b/python/pyspark/sql/tests/test_catalog.py
index 141b249db0fc6..ca4e427a7db28 100644
--- a/python/pyspark/sql/tests/test_catalog.py
+++ b/python/pyspark/sql/tests/test_catalog.py
@@ -206,7 +206,7 @@ def test_list_columns(self):
     from pyspark.sql.tests.test_catalog import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/sql/tests/test_column.py b/python/pyspark/sql/tests/test_column.py
index 8a89e6e9d5599..7e03e2ef3e6d0 100644
--- a/python/pyspark/sql/tests/test_column.py
+++ b/python/pyspark/sql/tests/test_column.py
@@ -161,7 +161,7 @@ def test_with_field(self):
     from pyspark.sql.tests.test_column import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/sql/tests/test_conf.py b/python/pyspark/sql/tests/test_conf.py
index dd2e0be85d508..1cc0c1b7562c5 100644
--- a/python/pyspark/sql/tests/test_conf.py
+++ b/python/pyspark/sql/tests/test_conf.py
@@ -49,7 +49,7 @@ def test_conf(self):
     from pyspark.sql.tests.test_conf import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/sql/tests/test_context.py b/python/pyspark/sql/tests/test_context.py
index ce22a52dc119e..d506908b784db 100644
--- a/python/pyspark/sql/tests/test_context.py
+++ b/python/pyspark/sql/tests/test_context.py
@@ -276,7 +276,7 @@ def test_get_or_create(self):
     from pyspark.sql.tests.test_context import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py
index d03939821a176..d941707b8969f 100644
--- a/python/pyspark/sql/tests/test_dataframe.py
+++ b/python/pyspark/sql/tests/test_dataframe.py
@@ -518,7 +518,7 @@ def _to_pandas(self):
         df = self.spark.createDataFrame(data, schema)
         return df.toPandas()
 
-    @unittest.skipIf(not have_pandas, pandas_requirement_message)
+    @unittest.skipIf(not have_pandas, pandas_requirement_message)  # type: ignore
     def test_to_pandas(self):
         import numpy as np
         pdf = self._to_pandas()
@@ -530,7 +530,7 @@ def test_to_pandas(self):
         self.assertEquals(types[4], np.object)  # datetime.date
         self.assertEquals(types[5], 'datetime64[ns]')
 
-    @unittest.skipIf(not have_pandas, pandas_requirement_message)
+    @unittest.skipIf(not have_pandas, pandas_requirement_message)  # type: ignore
     def test_to_pandas_with_duplicated_column_names(self):
         import numpy as np
 
@@ -543,7 +543,7 @@ def test_to_pandas_with_duplicated_column_names(self):
                 self.assertEquals(types.iloc[0], np.int32)
                 self.assertEquals(types.iloc[1], np.int32)
 
-    @unittest.skipIf(not have_pandas, pandas_requirement_message)
+    @unittest.skipIf(not have_pandas, pandas_requirement_message)  # type: ignore
     def test_to_pandas_on_cross_join(self):
         import numpy as np
 
@@ -569,7 +569,7 @@ def test_to_pandas_required_pandas_not_found(self):
             with self.assertRaisesRegexp(ImportError, 'Pandas >= .* must be installed'):
                 self._to_pandas()
 
-    @unittest.skipIf(not have_pandas, pandas_requirement_message)
+    @unittest.skipIf(not have_pandas, pandas_requirement_message)  # type: ignore
     def test_to_pandas_avoid_astype(self):
         import numpy as np
         schema = StructType().add("a", IntegerType()).add("b", StringType())\
@@ -581,7 +581,7 @@ def test_to_pandas_avoid_astype(self):
         self.assertEquals(types[1], np.object)
         self.assertEquals(types[2], np.float64)
 
-    @unittest.skipIf(not have_pandas, pandas_requirement_message)
+    @unittest.skipIf(not have_pandas, pandas_requirement_message)  # type: ignore
     def test_to_pandas_from_empty_dataframe(self):
         with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": False}):
             # SPARK-29188 test that toPandas() on an empty dataframe has the correct dtypes
@@ -601,7 +601,7 @@ def test_to_pandas_from_empty_dataframe(self):
             dtypes_when_empty_df = self.spark.sql(sql).filter("False").toPandas().dtypes
             self.assertTrue(np.all(dtypes_when_empty_df == dtypes_when_nonempty_df))
 
-    @unittest.skipIf(not have_pandas, pandas_requirement_message)
+    @unittest.skipIf(not have_pandas, pandas_requirement_message)  # type: ignore
     def test_to_pandas_from_null_dataframe(self):
         with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": False}):
             # SPARK-29188 test that toPandas() on a dataframe with only nulls has correct dtypes
@@ -629,7 +629,7 @@ def test_to_pandas_from_null_dataframe(self):
             self.assertEqual(types[7], np.object)
             self.assertTrue(np.can_cast(np.datetime64, types[8]))
 
-    @unittest.skipIf(not have_pandas, pandas_requirement_message)
+    @unittest.skipIf(not have_pandas, pandas_requirement_message)  # type: ignore
     def test_to_pandas_from_mixed_dataframe(self):
         with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": False}):
             # SPARK-29188 test that toPandas() on a dataframe with some nulls has correct dtypes
@@ -657,7 +657,7 @@ def test_create_dataframe_from_array_of_long(self):
         df = self.spark.createDataFrame(data)
         self.assertEqual(df.first(), Row(longarray=[-9223372036854775808, 0, 9223372036854775807]))
 
-    @unittest.skipIf(not have_pandas, pandas_requirement_message)
+    @unittest.skipIf(not have_pandas, pandas_requirement_message)  # type: ignore
     def test_create_dataframe_from_pandas_with_timestamp(self):
         import pandas as pd
         from datetime import datetime
@@ -685,7 +685,7 @@ def test_create_dataframe_required_pandas_not_found(self):
                 self.spark.createDataFrame(pdf)
 
     # Regression test for SPARK-23360
-    @unittest.skipIf(not have_pandas, pandas_requirement_message)
+    @unittest.skipIf(not have_pandas, pandas_requirement_message)  # type: ignore
     def test_create_dataframe_from_pandas_with_dst(self):
         import pandas as pd
         from pandas.util.testing import assert_frame_equal
@@ -889,7 +889,7 @@ def test_query_execution_listener_on_collect(self):
 
     @unittest.skipIf(
         not have_pandas or not have_pyarrow,
-        pandas_requirement_message or pyarrow_requirement_message)
+        pandas_requirement_message or pyarrow_requirement_message)  # type: ignore
     def test_query_execution_listener_on_collect_with_arrow(self):
         with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": True}):
             self.assertFalse(
@@ -907,7 +907,7 @@ def test_query_execution_listener_on_collect_with_arrow(self):
     from pyspark.sql.tests.test_dataframe import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/sql/tests/test_datasources.py b/python/pyspark/sql/tests/test_datasources.py
index dfef8f5740050..9425494fb0d90 100644
--- a/python/pyspark/sql/tests/test_datasources.py
+++ b/python/pyspark/sql/tests/test_datasources.py
@@ -164,7 +164,7 @@ def test_ignore_column_of_all_nulls(self):
     from pyspark.sql.tests.test_datasources import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py
index 09f5960c6f648..5638cad51b755 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -396,7 +396,7 @@ def test_higher_order_function_failures(self):
     from pyspark.sql.tests.test_functions import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/sql/tests/test_group.py b/python/pyspark/sql/tests/test_group.py
index 2fab7a08da1da..324c964f4f0cf 100644
--- a/python/pyspark/sql/tests/test_group.py
+++ b/python/pyspark/sql/tests/test_group.py
@@ -39,7 +39,7 @@ def test_aggregator(self):
     from pyspark.sql.tests.test_group import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/sql/tests/test_pandas_cogrouped_map.py b/python/pyspark/sql/tests/test_pandas_cogrouped_map.py
index 5013e2d4d6bd9..f9a7dd69b61fb 100644
--- a/python/pyspark/sql/tests/test_pandas_cogrouped_map.py
+++ b/python/pyspark/sql/tests/test_pandas_cogrouped_map.py
@@ -33,7 +33,7 @@
 
 @unittest.skipIf(
     not have_pandas or not have_pyarrow,
-    pandas_requirement_message or pyarrow_requirement_message)
+    pandas_requirement_message or pyarrow_requirement_message)  # type: ignore[arg-type]
 class CogroupedMapInPandasTests(ReusedSQLTestCase):
 
     @property
@@ -247,7 +247,7 @@ def merge_pandas(l, r):
     from pyspark.sql.tests.test_pandas_cogrouped_map import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/sql/tests/test_pandas_grouped_map.py b/python/pyspark/sql/tests/test_pandas_grouped_map.py
index 6eb5355044bb0..81b6d5efb710a 100644
--- a/python/pyspark/sql/tests/test_pandas_grouped_map.py
+++ b/python/pyspark/sql/tests/test_pandas_grouped_map.py
@@ -41,7 +41,7 @@
 
 @unittest.skipIf(
     not have_pandas or not have_pyarrow,
-    pandas_requirement_message or pyarrow_requirement_message)
+    pandas_requirement_message or pyarrow_requirement_message)  # type: ignore[arg-type]
 class GroupedMapInPandasTests(ReusedSQLTestCase):
 
     @property
@@ -611,7 +611,7 @@ def my_pandas_udf(pdf):
     from pyspark.sql.tests.test_pandas_grouped_map import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/sql/tests/test_pandas_map.py b/python/pyspark/sql/tests/test_pandas_map.py
index bda370dffbf6a..3ca437f75fc23 100644
--- a/python/pyspark/sql/tests/test_pandas_map.py
+++ b/python/pyspark/sql/tests/test_pandas_map.py
@@ -27,7 +27,7 @@
 
 @unittest.skipIf(
     not have_pandas or not have_pyarrow,
-    pandas_requirement_message or pyarrow_requirement_message)
+    pandas_requirement_message or pyarrow_requirement_message)  # type: ignore[arg-type]
 class MapInPandasTests(ReusedSQLTestCase):
 
     @classmethod
@@ -117,7 +117,7 @@ def func(iterator):
     from pyspark.sql.tests.test_pandas_map import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/sql/tests/test_pandas_udf.py b/python/pyspark/sql/tests/test_pandas_udf.py
index 24b98182b7fcf..cc742fc4267cb 100644
--- a/python/pyspark/sql/tests/test_pandas_udf.py
+++ b/python/pyspark/sql/tests/test_pandas_udf.py
@@ -28,7 +28,7 @@
 
 @unittest.skipIf(
     not have_pandas or not have_pyarrow,
-    pandas_requirement_message or pyarrow_requirement_message)
+    pandas_requirement_message or pyarrow_requirement_message)  # type: ignore[arg-type]
 class PandasUDFTests(ReusedSQLTestCase):
 
     def test_pandas_udf_basic(self):
@@ -244,7 +244,7 @@ def udf(column):
     from pyspark.sql.tests.test_pandas_udf import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py b/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py
index f63f52239fdf2..451308927629b 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py
@@ -35,7 +35,7 @@
 
 @unittest.skipIf(
     not have_pandas or not have_pyarrow,
-    pandas_requirement_message or pyarrow_requirement_message)
+    pandas_requirement_message or pyarrow_requirement_message)  # type: ignore[arg-type]
 class GroupedAggPandasUDFTests(ReusedSQLTestCase):
 
     @property
@@ -514,7 +514,7 @@ def mean(x):
     from pyspark.sql.tests.test_pandas_udf_grouped_agg import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/sql/tests/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
index 522807b03af70..6d325c9085ce1 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_scalar.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
@@ -46,7 +46,7 @@
 
 @unittest.skipIf(
     not have_pandas or not have_pyarrow,
-    pandas_requirement_message or pyarrow_requirement_message)
+    pandas_requirement_message or pyarrow_requirement_message)  # type: ignore
 class ScalarPandasUDFTests(ReusedSQLTestCase):
 
     @classmethod
@@ -1095,7 +1095,7 @@ def f3i(it):
             self.assertEquals(expected, df1.collect())
 
     # SPARK-24721
-    @unittest.skipIf(not test_compiled, test_not_compiled_message)
+    @unittest.skipIf(not test_compiled, test_not_compiled_message)  # type: ignore
     def test_datasource_with_udf(self):
         # Same as SQLTests.test_datasource_with_udf, but with Pandas UDF
         # This needs to a separate test because Arrow dependency is optional
@@ -1142,7 +1142,7 @@ def test_datasource_with_udf(self):
     from pyspark.sql.tests.test_pandas_udf_scalar import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/sql/tests/test_pandas_udf_typehints.py b/python/pyspark/sql/tests/test_pandas_udf_typehints.py
index 7be81f82808e4..d9717da4d2fbd 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_typehints.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_typehints.py
@@ -34,7 +34,7 @@
 
 @unittest.skipIf(
     not have_pandas or not have_pyarrow,
-    pandas_requirement_message or pyarrow_requirement_message)
+    pandas_requirement_message or pyarrow_requirement_message)  # type: ignore[arg-type]
 class PandasUDFTypeHintsTests(ReusedSQLTestCase):
     def test_type_annotation_scalar(self):
         def func(col: pd.Series) -> pd.Series:
@@ -246,7 +246,7 @@ def pandas_plus_one(iter: Iterator[pd.DataFrame]) -> Iterator[pd.DataFrame]:
     from pyspark.sql.tests.test_pandas_udf_typehints import *  # noqa: #401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/sql/tests/test_pandas_udf_window.py b/python/pyspark/sql/tests/test_pandas_udf_window.py
index 6e59255da13a2..5ad2ecd8f85d4 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_window.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_window.py
@@ -31,7 +31,7 @@
 
 @unittest.skipIf(
     not have_pandas or not have_pyarrow,
-    pandas_requirement_message or pyarrow_requirement_message)
+    pandas_requirement_message or pyarrow_requirement_message)  # type: ignore[arg-type]
 class WindowPandasUDFTests(ReusedSQLTestCase):
     @property
     def data(self):
@@ -355,7 +355,7 @@ def test_bounded_mixed(self):
     from pyspark.sql.tests.test_pandas_udf_window import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/sql/tests/test_readwriter.py b/python/pyspark/sql/tests/test_readwriter.py
index 55ffefc43c105..80b4118ae796a 100644
--- a/python/pyspark/sql/tests/test_readwriter.py
+++ b/python/pyspark/sql/tests/test_readwriter.py
@@ -204,7 +204,7 @@ def test_partitioning_functions(self):
     from pyspark.sql.tests.test_readwriter import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/sql/tests/test_serde.py b/python/pyspark/sql/tests/test_serde.py
index 35c14e430af50..ce087ff4ce550 100644
--- a/python/pyspark/sql/tests/test_serde.py
+++ b/python/pyspark/sql/tests/test_serde.py
@@ -142,7 +142,7 @@ def test_bytes_as_binary_type(self):
     from pyspark.sql.tests.test_serde import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/sql/tests/test_session.py b/python/pyspark/sql/tests/test_session.py
index d10f7bf906c3b..7faeb1857b983 100644
--- a/python/pyspark/sql/tests/test_session.py
+++ b/python/pyspark/sql/tests/test_session.py
@@ -361,7 +361,7 @@ def test_use_custom_class_for_extensions(self):
     from pyspark.sql.tests.test_session import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/sql/tests/test_streaming.py b/python/pyspark/sql/tests/test_streaming.py
index 21ce04618a904..28a50f9575a0a 100644
--- a/python/pyspark/sql/tests/test_streaming.py
+++ b/python/pyspark/sql/tests/test_streaming.py
@@ -575,7 +575,7 @@ def collectBatch(df, id):
     from pyspark.sql.tests.test_streaming import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py
index 7256db055fb9c..e85e8a6e6d1ee 100644
--- a/python/pyspark/sql/tests/test_types.py
+++ b/python/pyspark/sql/tests/test_types.py
@@ -25,12 +25,15 @@
 import unittest
 
 from pyspark.sql import Row
-from pyspark.sql.functions import col, UserDefinedFunction
+from pyspark.sql.functions import col
+from pyspark.sql.udf import UserDefinedFunction
 from pyspark.sql.types import ByteType, ShortType, IntegerType, FloatType, DateType, \
     TimestampType, MapType, StringType, StructType, StructField, ArrayType, DoubleType, LongType, \
     DecimalType, BinaryType, BooleanType, NullType
-from pyspark.sql.types import _array_signed_int_typecode_ctype_mappings, _array_type_mappings, \
+from pyspark.sql.types import (  # type: ignore
+    _array_signed_int_typecode_ctype_mappings, _array_type_mappings,
     _array_unsigned_int_typecode_ctype_mappings, _infer_type, _make_type_verifier, _merge_type
+)
 from pyspark.testing.sqlutils import ReusedSQLTestCase, ExamplePointUDT, PythonOnlyUDT, \
     ExamplePoint, PythonOnlyPoint, MyObject
 
@@ -974,7 +977,7 @@ def test_row_without_field_sorting(self):
     from pyspark.sql.tests.test_types import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py
index ad94bc83cc5be..a7dcbfd32ac1c 100644
--- a/python/pyspark/sql/tests/test_udf.py
+++ b/python/pyspark/sql/tests/test_udf.py
@@ -23,7 +23,8 @@
 
 from pyspark import SparkContext
 from pyspark.sql import SparkSession, Column, Row
-from pyspark.sql.functions import UserDefinedFunction, udf
+from pyspark.sql.functions import udf
+from pyspark.sql.udf import UserDefinedFunction
 from pyspark.sql.types import StringType, IntegerType, BooleanType, DoubleType, LongType, \
     ArrayType, StructType, StructField
 from pyspark.sql.utils import AnalysisException
@@ -356,7 +357,7 @@ def test_udf_registration_returns_udf(self):
             df.select(add_four("id").alias("plus_four")).collect()
         )
 
-    @unittest.skipIf(not test_compiled, test_not_compiled_message)
+    @unittest.skipIf(not test_compiled, test_not_compiled_message)  # type: ignore
     def test_register_java_function(self):
         self.spark.udf.registerJavaFunction(
             "javaStringLength", "test.org.apache.spark.sql.JavaStringLength", IntegerType())
@@ -373,7 +374,7 @@ def test_register_java_function(self):
         [value] = self.spark.sql("SELECT javaStringLength3('test')").first()
         self.assertEqual(value, 4)
 
-    @unittest.skipIf(not test_compiled, test_not_compiled_message)
+    @unittest.skipIf(not test_compiled, test_not_compiled_message)  # type: ignore
     def test_register_java_udaf(self):
         self.spark.udf.registerJavaUDAF("javaUDAF", "test.org.apache.spark.sql.MyDoubleAvg")
         df = self.spark.createDataFrame([(1, "a"), (2, "b"), (3, "a")], ["id", "name"])
@@ -560,7 +561,7 @@ def test_nonparam_udf_with_aggregate(self):
         self.assertEqual(rows, [Row(_1=1, _2=2, a=u'const_str')])
 
     # SPARK-24721
-    @unittest.skipIf(not test_compiled, test_not_compiled_message)
+    @unittest.skipIf(not test_compiled, test_not_compiled_message)  # type: ignore
     def test_datasource_with_udf(self):
         from pyspark.sql.functions import lit, col
 
@@ -699,7 +700,7 @@ def test_udf_init_shouldnt_initialize_context(self):
     from pyspark.sql.tests.test_udf import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/sql/tests/test_utils.py b/python/pyspark/sql/tests/test_utils.py
index c6e7fcd8ec11a..b08e17208d8af 100644
--- a/python/pyspark/sql/tests/test_utils.py
+++ b/python/pyspark/sql/tests/test_utils.py
@@ -55,7 +55,7 @@ def test_capture_illegalargument_exception(self):
     from pyspark.sql.tests.test_utils import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/sql/types.pyi b/python/pyspark/sql/types.pyi
new file mode 100644
index 0000000000000..31765e94884d7
--- /dev/null
+++ b/python/pyspark/sql/types.pyi
@@ -0,0 +1,204 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import overload
+from typing import Any, Callable, Dict, Iterator, List, Optional, Union, Tuple, TypeVar
+import datetime
+
+T = TypeVar("T")
+U = TypeVar("U")
+
+class DataType:
+    def __hash__(self) -> int: ...
+    def __eq__(self, other: Any) -> bool: ...
+    def __ne__(self, other: Any) -> bool: ...
+    @classmethod
+    def typeName(cls) -> str: ...
+    def simpleString(self) -> str: ...
+    def jsonValue(self) -> Union[str, Dict[str, Any]]: ...
+    def json(self) -> str: ...
+    def needConversion(self) -> bool: ...
+    def toInternal(self, obj: Any) -> Any: ...
+    def fromInternal(self, obj: Any) -> Any: ...
+
+class DataTypeSingleton(type):
+    def __call__(cls): ...
+
+class NullType(DataType, metaclass=DataTypeSingleton): ...
+class AtomicType(DataType): ...
+class NumericType(AtomicType): ...
+class IntegralType(NumericType, metaclass=DataTypeSingleton): ...
+class FractionalType(NumericType): ...
+class StringType(AtomicType, metaclass=DataTypeSingleton): ...
+class BinaryType(AtomicType, metaclass=DataTypeSingleton): ...
+class BooleanType(AtomicType, metaclass=DataTypeSingleton): ...
+
+class DateType(AtomicType, metaclass=DataTypeSingleton):
+    EPOCH_ORDINAL: int
+    def needConversion(self) -> bool: ...
+    def toInternal(self, d: datetime.date) -> int: ...
+    def fromInternal(self, v: int) -> datetime.date: ...
+
+class TimestampType(AtomicType, metaclass=DataTypeSingleton):
+    def needConversion(self) -> bool: ...
+    def toInternal(self, dt: datetime.datetime) -> int: ...
+    def fromInternal(self, ts: int) -> datetime.datetime: ...
+
+class DecimalType(FractionalType):
+    precision: int
+    scale: int
+    hasPrecisionInfo: bool
+    def __init__(self, precision: int = ..., scale: int = ...) -> None: ...
+    def simpleString(self) -> str: ...
+    def jsonValue(self) -> str: ...
+
+class DoubleType(FractionalType, metaclass=DataTypeSingleton): ...
+class FloatType(FractionalType, metaclass=DataTypeSingleton): ...
+
+class ByteType(IntegralType):
+    def simpleString(self) -> str: ...
+
+class IntegerType(IntegralType):
+    def simpleString(self) -> str: ...
+
+class LongType(IntegralType):
+    def simpleString(self) -> str: ...
+
+class ShortType(IntegralType):
+    def simpleString(self) -> str: ...
+
+class ArrayType(DataType):
+    elementType: DataType
+    containsNull: bool
+    def __init__(self, elementType=DataType, containsNull: bool = ...) -> None: ...
+    def simpleString(self): ...
+    def jsonValue(self) -> Dict[str, Any]: ...
+    @classmethod
+    def fromJson(cls, json: Dict[str, Any]) -> ArrayType: ...
+    def needConversion(self) -> bool: ...
+    def toInternal(self, obj: List[Optional[T]]) -> List[Optional[T]]: ...
+    def fromInternal(self, obj: List[Optional[T]]) -> List[Optional[T]]: ...
+
+class MapType(DataType):
+    keyType: DataType
+    valueType: DataType
+    valueContainsNull: bool
+    def __init__(
+        self, keyType: DataType, valueType: DataType, valueContainsNull: bool = ...
+    ) -> None: ...
+    def simpleString(self) -> str: ...
+    def jsonValue(self) -> Dict[str, Any]: ...
+    @classmethod
+    def fromJson(cls, json: Dict[str, Any]) -> MapType: ...
+    def needConversion(self) -> bool: ...
+    def toInternal(self, obj: Dict[T, Optional[U]]) -> Dict[T, Optional[U]]: ...
+    def fromInternal(self, obj: Dict[T, Optional[U]]) -> Dict[T, Optional[U]]: ...
+
+class StructField(DataType):
+    name: str
+    dataType: DataType
+    nullable: bool
+    metadata: Dict[str, Any]
+    def __init__(
+        self,
+        name: str,
+        dataType: DataType,
+        nullable: bool = ...,
+        metadata: Optional[Dict[str, Any]] = ...,
+    ) -> None: ...
+    def simpleString(self) -> str: ...
+    def jsonValue(self) -> Dict[str, Any]: ...
+    @classmethod
+    def fromJson(cls, json: Dict[str, Any]) -> StructField: ...
+    def needConversion(self) -> bool: ...
+    def toInternal(self, obj: T) -> T: ...
+    def fromInternal(self, obj: T) -> T: ...
+
+class StructType(DataType):
+    fields: List[StructField]
+    names: List[str]
+    def __init__(self, fields: Optional[List[StructField]] = ...) -> None: ...
+    @overload
+    def add(
+        self,
+        field: str,
+        data_type: Union[str, DataType],
+        nullable: bool = ...,
+        metadata: Optional[Dict[str, Any]] = ...,
+    ) -> StructType: ...
+    @overload
+    def add(self, field: StructField) -> StructType: ...
+    def __iter__(self) -> Iterator[StructField]: ...
+    def __len__(self) -> int: ...
+    def __getitem__(self, key: Union[str, int]) -> StructField: ...
+    def simpleString(self) -> str: ...
+    def jsonValue(self) -> Dict[str, Any]: ...
+    @classmethod
+    def fromJson(cls, json: Dict[str, Any]) -> StructType: ...
+    def fieldNames(self) -> List[str]: ...
+    def needConversion(self) -> bool: ...
+    def toInternal(self, obj: Tuple) -> Tuple: ...
+    def fromInternal(self, obj: Tuple) -> Row: ...
+
+class UserDefinedType(DataType):
+    @classmethod
+    def typeName(cls) -> str: ...
+    @classmethod
+    def sqlType(cls) -> DataType: ...
+    @classmethod
+    def module(cls) -> str: ...
+    @classmethod
+    def scalaUDT(cls) -> str: ...
+    def needConversion(self) -> bool: ...
+    def toInternal(self, obj: Any) -> Any: ...
+    def fromInternal(self, obj: Any) -> Any: ...
+    def serialize(self, obj: Any) -> Any: ...
+    def deserialize(self, datum: Any) -> Any: ...
+    def simpleString(self) -> str: ...
+    def json(self) -> str: ...
+    def jsonValue(self) -> Dict[str, Any]: ...
+    @classmethod
+    def fromJson(cls, json: Dict[str, Any]) -> UserDefinedType: ...
+    def __eq__(self, other: Any) -> bool: ...
+
+class Row(tuple):
+    @overload
+    def __new__(self, *args: str) -> Row: ...
+    @overload
+    def __new__(self, **kwargs: Any) -> Row: ...
+    @overload
+    def __init__(self, *args: str) -> None: ...
+    @overload
+    def __init__(self, **kwargs: Any) -> None: ...
+    def asDict(self, recursive: bool = ...) -> Dict[str, Any]: ...
+    def __contains__(self, item: Any) -> bool: ...
+    def __call__(self, *args: Any) -> Row: ...
+    def __getitem__(self, item: Any) -> Any: ...
+    def __getattr__(self, item: str) -> Any: ...
+    def __setattr__(self, key: Any, value: Any) -> None: ...
+    def __reduce__(
+        self,
+    ) -> Tuple[Callable[[List[str], List[Any]], Row], Tuple[List[str], Tuple]]: ...
+
+class DateConverter:
+    def can_convert(self, obj: Any) -> bool: ...
+    def convert(self, obj, gateway_client) -> Any: ...
+
+class DatetimeConverter:
+    def can_convert(self, obj) -> bool: ...
+    def convert(self, obj, gateway_client) -> Any: ...
diff --git a/python/pyspark/sql/udf.pyi b/python/pyspark/sql/udf.pyi
new file mode 100644
index 0000000000000..87c3672780037
--- /dev/null
+++ b/python/pyspark/sql/udf.pyi
@@ -0,0 +1,57 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any, Callable, Optional
+
+from pyspark.sql._typing import ColumnOrName, DataTypeOrString
+from pyspark.sql.column import Column
+import pyspark.sql.session
+
+class UserDefinedFunction:
+    func: Callable[..., Any]
+    evalType: int
+    deterministic: bool
+    def __init__(
+        self,
+        func: Callable[..., Any],
+        returnType: DataTypeOrString = ...,
+        name: Optional[str] = ...,
+        evalType: int = ...,
+        deterministic: bool = ...,
+    ) -> None: ...
+    @property
+    def returnType(self): ...
+    def __call__(self, *cols: ColumnOrName) -> Column: ...
+    def asNondeterministic(self) -> UserDefinedFunction: ...
+
+class UDFRegistration:
+    sparkSession: pyspark.sql.session.SparkSession
+    def __init__(self, sparkSession: pyspark.sql.session.SparkSession) -> None: ...
+    def register(
+        self,
+        name: str,
+        f: Callable[..., Any],
+        returnType: Optional[DataTypeOrString] = ...,
+    ): ...
+    def registerJavaFunction(
+        self,
+        name: str,
+        javaClassName: str,
+        returnType: Optional[DataTypeOrString] = ...,
+    ) -> None: ...
+    def registerJavaUDAF(self, name: str, javaClassName: str) -> None: ...
diff --git a/python/pyspark/sql/utils.pyi b/python/pyspark/sql/utils.pyi
new file mode 100644
index 0000000000000..c11e4bed54e7f
--- /dev/null
+++ b/python/pyspark/sql/utils.pyi
@@ -0,0 +1,55 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: This dynamically typed stub was automatically generated by stubgen.
+
+from pyspark import SparkContext as SparkContext  # noqa: F401
+from typing import Any, Optional
+
+class CapturedException(Exception):
+    desc: Any = ...
+    stackTrace: Any = ...
+    cause: Any = ...
+    def __init__(
+        self, desc: Any, stackTrace: Any, cause: Optional[Any] = ...
+    ) -> None: ...
+
+class AnalysisException(CapturedException): ...
+class ParseException(CapturedException): ...
+class IllegalArgumentException(CapturedException): ...
+class StreamingQueryException(CapturedException): ...
+class QueryExecutionException(CapturedException): ...
+class PythonException(CapturedException): ...
+class UnknownException(CapturedException): ...
+
+def convert_exception(e: Any): ...
+def capture_sql_exception(f: Any): ...
+def install_exception_handler() -> None: ...
+def toJArray(gateway: Any, jtype: Any, arr: Any): ...
+def require_test_compiled() -> None: ...
+
+class ForeachBatchFunction:
+    sql_ctx: Any = ...
+    func: Any = ...
+    def __init__(self, sql_ctx: Any, func: Any) -> None: ...
+    error: Any = ...
+    def call(self, jdf: Any, batch_id: Any) -> None: ...
+    class Java:
+        implements: Any = ...
+
+def to_str(value: Any): ...
diff --git a/python/pyspark/sql/window.pyi b/python/pyspark/sql/window.pyi
new file mode 100644
index 0000000000000..4e31d57bec4d0
--- /dev/null
+++ b/python/pyspark/sql/window.pyi
@@ -0,0 +1,40 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyspark.sql._typing import ColumnOrName
+from py4j.java_gateway import JavaObject  # type: ignore[import]
+
+class Window:
+    unboundedPreceding: int
+    unboundedFollowing: int
+    currentRow: int
+    @staticmethod
+    def partitionBy(*cols: ColumnOrName) -> WindowSpec: ...
+    @staticmethod
+    def orderBy(*cols: ColumnOrName) -> WindowSpec: ...
+    @staticmethod
+    def rowsBetween(start: int, end: int) -> WindowSpec: ...
+    @staticmethod
+    def rangeBetween(start: int, end: int) -> WindowSpec: ...
+
+class WindowSpec:
+    def __init__(self, jspec: JavaObject) -> None: ...
+    def partitionBy(self, *cols: ColumnOrName) -> WindowSpec: ...
+    def orderBy(self, *cols: ColumnOrName) -> WindowSpec: ...
+    def rowsBetween(self, start: int, end: int) -> WindowSpec: ...
+    def rangeBetween(self, start: int, end: int) -> WindowSpec: ...
diff --git a/python/pyspark/statcounter.pyi b/python/pyspark/statcounter.pyi
new file mode 100644
index 0000000000000..38e5970501527
--- /dev/null
+++ b/python/pyspark/statcounter.pyi
@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any, Dict, Iterable, Optional, Union
+
+maximum: Any
+minimum: Any
+sqrt: Any
+
+class StatCounter:
+    n: int
+    mu: float
+    m2: float
+    maxValue: float
+    minValue: float
+    def __init__(self, values: Optional[Iterable[float]] = ...) -> None: ...
+    def merge(self, value: float) -> StatCounter: ...
+    def mergeStats(self, other: StatCounter) -> StatCounter: ...
+    def copy(self) -> StatCounter: ...
+    def count(self) -> int: ...
+    def mean(self) -> float: ...
+    def sum(self) -> float: ...
+    def min(self) -> float: ...
+    def max(self) -> float: ...
+    def variance(self) -> float: ...
+    def sampleVariance(self) -> float: ...
+    def stdev(self) -> float: ...
+    def sampleStdev(self) -> float: ...
+    def asDict(self, sample: bool = ...) -> Dict[str, Union[float, int]]: ...
diff --git a/python/pyspark/status.pyi b/python/pyspark/status.pyi
new file mode 100644
index 0000000000000..0558e245f49cc
--- /dev/null
+++ b/python/pyspark/status.pyi
@@ -0,0 +1,42 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import List, NamedTuple, Optional
+from py4j.java_gateway import JavaArray, JavaObject  # type: ignore[import]
+
+class SparkJobInfo(NamedTuple):
+    jobId: int
+    stageIds: JavaArray
+    status: str
+
+class SparkStageInfo(NamedTuple):
+    stageId: int
+    currentAttemptId: int
+    name: str
+    numTasks: int
+    numActiveTasks: int
+    numCompletedTasks: int
+    numFailedTasks: int
+
+class StatusTracker:
+    def __init__(self, jtracker: JavaObject) -> None: ...
+    def getJobIdsForGroup(self, jobGroup: Optional[str] = ...) -> List[int]: ...
+    def getActiveStageIds(self) -> List[int]: ...
+    def getActiveJobsIds(self) -> List[int]: ...
+    def getJobInfo(self, jobId: int) -> SparkJobInfo: ...
+    def getStageInfo(self, stageId: int) -> SparkStageInfo: ...
diff --git a/python/pyspark/storagelevel.pyi b/python/pyspark/storagelevel.pyi
new file mode 100644
index 0000000000000..2eb05850bae78
--- /dev/null
+++ b/python/pyspark/storagelevel.pyi
@@ -0,0 +1,43 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import ClassVar
+
+class StorageLevel:
+    DISK_ONLY: ClassVar[StorageLevel]
+    DISK_ONLY_2: ClassVar[StorageLevel]
+    MEMORY_ONLY: ClassVar[StorageLevel]
+    MEMORY_ONLY_2: ClassVar[StorageLevel]
+    DISK_ONLY_3: ClassVar[StorageLevel]
+    MEMORY_AND_DISK: ClassVar[StorageLevel]
+    MEMORY_AND_DISK_2: ClassVar[StorageLevel]
+    OFF_HEAP: ClassVar[StorageLevel]
+
+    useDisk: bool
+    useMemory: bool
+    useOffHeap: bool
+    deserialized: bool
+    replication: int
+    def __init__(
+        self,
+        useDisk: bool,
+        useMemory: bool,
+        useOffHeap: bool,
+        deserialized: bool,
+        replication: int = ...,
+    ) -> None: ...
diff --git a/python/pyspark/streaming/__init__.pyi b/python/pyspark/streaming/__init__.pyi
new file mode 100644
index 0000000000000..281c06e51cc60
--- /dev/null
+++ b/python/pyspark/streaming/__init__.pyi
@@ -0,0 +1,23 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyspark.streaming.context import StreamingContext as StreamingContext  # noqa: F401
+from pyspark.streaming.dstream import DStream as DStream  # noqa: F401
+from pyspark.streaming.listener import (  # noqa: F401
+    StreamingListener as StreamingListener,
+)
diff --git a/python/pyspark/streaming/context.pyi b/python/pyspark/streaming/context.pyi
new file mode 100644
index 0000000000000..f4b3dad38f1fb
--- /dev/null
+++ b/python/pyspark/streaming/context.pyi
@@ -0,0 +1,75 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Any, Callable, List, Optional, TypeVar, Union
+
+from py4j.java_gateway import JavaObject  # type: ignore[import]
+
+from pyspark.context import SparkContext
+from pyspark.rdd import RDD
+from pyspark.storagelevel import StorageLevel
+from pyspark.streaming.dstream import DStream
+from pyspark.streaming.listener import StreamingListener
+
+T = TypeVar("T")
+
+class StreamingContext:
+    def __init__(
+        self,
+        sparkContext: SparkContext,
+        batchDuration: Union[float, int] = ...,
+        jssc: Optional[JavaObject] = ...,
+    ) -> None: ...
+    @classmethod
+    def getOrCreate(
+        cls, checkpointPath: str, setupFunc: Callable[[], StreamingContext]
+    ) -> StreamingContext: ...
+    @classmethod
+    def getActive(cls) -> StreamingContext: ...
+    @classmethod
+    def getActiveOrCreate(
+        cls, checkpointPath: str, setupFunc: Callable[[], StreamingContext]
+    ) -> StreamingContext: ...
+    @property
+    def sparkContext(self) -> SparkContext: ...
+    def start(self) -> None: ...
+    def awaitTermination(self, timeout: Optional[int] = ...) -> None: ...
+    def awaitTerminationOrTimeout(self, timeout: int) -> None: ...
+    def stop(
+        self, stopSparkContext: bool = ..., stopGraceFully: bool = ...
+    ) -> None: ...
+    def remember(self, duration: int) -> None: ...
+    def checkpoint(self, directory: str) -> None: ...
+    def socketTextStream(
+        self, hostname: str, port: int, storageLevel: StorageLevel = ...
+    ) -> DStream[str]: ...
+    def textFileStream(self, directory: str) -> DStream[str]: ...
+    def binaryRecordsStream(
+        self, directory: str, recordLength: int
+    ) -> DStream[bytes]: ...
+    def queueStream(
+        self,
+        rdds: List[RDD[T]],
+        oneAtATime: bool = ...,
+        default: Optional[RDD[T]] = ...,
+    ) -> DStream[T]: ...
+    def transform(
+        self, dstreams: List[DStream[Any]], transformFunc: Callable[..., RDD[T]]
+    ) -> DStream[T]: ...
+    def union(self, *dstreams: DStream[T]) -> DStream[T]: ...
+    def addStreamingListener(self, streamingListener: StreamingListener) -> None: ...
diff --git a/python/pyspark/streaming/dstream.pyi b/python/pyspark/streaming/dstream.pyi
new file mode 100644
index 0000000000000..bbeea69ee9ac2
--- /dev/null
+++ b/python/pyspark/streaming/dstream.pyi
@@ -0,0 +1,208 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import overload
+from typing import (
+    Callable,
+    Generic,
+    Hashable,
+    Iterable,
+    List,
+    Optional,
+    Tuple,
+    TypeVar,
+    Union,
+)
+import datetime
+from pyspark.rdd import RDD
+from pyspark.storagelevel import StorageLevel
+import pyspark.streaming.context
+
+S = TypeVar("S")
+T = TypeVar("T")
+U = TypeVar("U")
+K = TypeVar("K", bound=Hashable)
+V = TypeVar("V")
+
+class DStream(Generic[T]):
+    is_cached: bool
+    is_checkpointed: bool
+    def __init__(self, jdstream, ssc, jrdd_deserializer) -> None: ...
+    def context(self) -> pyspark.streaming.context.StreamingContext: ...
+    def count(self) -> DStream[int]: ...
+    def filter(self, f: Callable[[T], bool]) -> DStream[T]: ...
+    def flatMap(
+        self: DStream[T],
+        f: Callable[[T], Iterable[U]],
+        preservesPartitioning: bool = ...,
+    ) -> DStream[U]: ...
+    def map(
+        self: DStream[T], f: Callable[[T], U], preservesPartitioning: bool = ...
+    ) -> DStream[U]: ...
+    def mapPartitions(
+        self, f: Callable[[Iterable[T]], Iterable[U]], preservesPartitioning: bool = ...
+    ) -> DStream[U]: ...
+    def mapPartitionsWithIndex(
+        self,
+        f: Callable[[int, Iterable[T]], Iterable[U]],
+        preservesPartitioning: bool = ...,
+    ) -> DStream[U]: ...
+    def reduce(self, func: Callable[[T, T], T]) -> DStream[T]: ...
+    def reduceByKey(
+        self: DStream[Tuple[K, V]],
+        func: Callable[[V, V], V],
+        numPartitions: Optional[int] = ...,
+    ) -> DStream[Tuple[K, V]]: ...
+    def combineByKey(
+        self: DStream[Tuple[K, V]],
+        createCombiner: Callable[[V], U],
+        mergeValue: Callable[[U, V], U],
+        mergeCombiners: Callable[[U, U], U],
+        numPartitions: Optional[int] = ...,
+    ) -> DStream[Tuple[K, U]]: ...
+    def partitionBy(
+        self: DStream[Tuple[K, V]],
+        numPartitions: int,
+        partitionFunc: Callable[[K], int] = ...,
+    ) -> DStream[Tuple[K, V]]: ...
+    @overload
+    def foreachRDD(self, func: Callable[[RDD[T]], None]) -> None: ...
+    @overload
+    def foreachRDD(self, func: Callable[[datetime.datetime, RDD[T]], None]) -> None: ...
+    def pprint(self, num: int = ...) -> None: ...
+    def mapValues(
+        self: DStream[Tuple[K, V]], f: Callable[[V], U]
+    ) -> DStream[Tuple[K, U]]: ...
+    def flatMapValues(
+        self: DStream[Tuple[K, V]], f: Callable[[V], Iterable[U]]
+    ) -> DStream[Tuple[K, U]]: ...
+    def glom(self) -> DStream[List[T]]: ...
+    def cache(self) -> DStream[T]: ...
+    def persist(self, storageLevel: StorageLevel) -> DStream[T]: ...
+    def checkpoint(self, interval: Union[float, int]) -> DStream[T]: ...
+    def groupByKey(
+        self: DStream[Tuple[K, V]], numPartitions: Optional[int] = ...
+    ) -> DStream[Tuple[K, Iterable[V]]]: ...
+    def countByValue(self) -> DStream[Tuple[T, int]]: ...
+    def saveAsTextFiles(self, prefix: str, suffix: Optional[str] = ...) -> None: ...
+    @overload
+    def transform(self, func: Callable[[RDD[T]], RDD[U]]) -> TransformedDStream[U]: ...
+    @overload
+    def transform(
+        self, func: Callable[[datetime.datetime, RDD[T]], RDD[U]]
+    ) -> TransformedDStream[U]: ...
+    @overload
+    def transformWith(
+        self,
+        func: Callable[[RDD[T], RDD[U]], RDD[V]],
+        other: RDD[U],
+        keepSerializer: bool = ...,
+    ) -> DStream[V]: ...
+    @overload
+    def transformWith(
+        self,
+        func: Callable[[datetime.datetime, RDD[T], RDD[U]], RDD[V]],
+        other: RDD[U],
+        keepSerializer: bool = ...,
+    ) -> DStream[V]: ...
+    def repartition(self, numPartitions: int) -> DStream[T]: ...
+    def union(self, other: DStream[U]) -> DStream[Union[T, U]]: ...
+    def cogroup(
+        self: DStream[Tuple[K, V]],
+        other: DStream[Tuple[K, U]],
+        numPartitions: Optional[int] = ...,
+    ) -> DStream[Tuple[K, Tuple[List[V], List[U]]]]: ...
+    def join(
+        self: DStream[Tuple[K, V]],
+        other: DStream[Tuple[K, U]],
+        numPartitions: Optional[int] = ...,
+    ) -> DStream[Tuple[K, Tuple[V, U]]]: ...
+    def leftOuterJoin(
+        self: DStream[Tuple[K, V]],
+        other: DStream[Tuple[K, U]],
+        numPartitions: Optional[int] = ...,
+    ) -> DStream[Tuple[K, Tuple[V, Optional[U]]]]: ...
+    def rightOuterJoin(
+        self: DStream[Tuple[K, V]],
+        other: DStream[Tuple[K, U]],
+        numPartitions: Optional[int] = ...,
+    ) -> DStream[Tuple[K, Tuple[Optional[V], U]]]: ...
+    def fullOuterJoin(
+        self: DStream[Tuple[K, V]],
+        other: DStream[Tuple[K, U]],
+        numPartitions: Optional[int] = ...,
+    ) -> DStream[Tuple[K, Tuple[Optional[V], Optional[U]]]]: ...
+    def slice(
+        self, begin: Union[datetime.datetime, int], end: Union[datetime.datetime, int]
+    ) -> List[RDD[T]]: ...
+    def window(
+        self, windowDuration: int, slideDuration: Optional[int] = ...
+    ) -> DStream[T]: ...
+    def reduceByWindow(
+        self,
+        reduceFunc: Callable[[T, T], T],
+        invReduceFunc: Optional[Callable[[T, T], T]],
+        windowDuration: int,
+        slideDuration: int,
+    ) -> DStream[T]: ...
+    def countByWindow(
+        self, windowDuration: int, slideDuration: int
+    ) -> DStream[Tuple[T, int]]: ...
+    def countByValueAndWindow(
+        self,
+        windowDuration: int,
+        slideDuration: int,
+        numPartitions: Optional[int] = ...,
+    ) -> DStream[Tuple[T, int]]: ...
+    def groupByKeyAndWindow(
+        self: DStream[Tuple[K, V]],
+        windowDuration: int,
+        slideDuration: int,
+        numPartitions: Optional[int] = ...,
+    ) -> DStream[Tuple[K, Iterable[V]]]: ...
+    def reduceByKeyAndWindow(
+        self: DStream[Tuple[K, V]],
+        func: Callable[[V, V], V],
+        invFunc: Optional[Callable[[V, V], V]],
+        windowDuration: int,
+        slideDuration: Optional[int] = ...,
+        numPartitions: Optional[int] = ...,
+        filterFunc: Optional[Callable[[Tuple[K, V]], bool]] = ...,
+    ) -> DStream[Tuple[K, V]]: ...
+    def updateStateByKey(
+        self: DStream[Tuple[K, V]],
+        updateFunc: Callable[[Iterable[V], Optional[S]], S],
+        numPartitions: Optional[int] = ...,
+        initialRDD: Optional[RDD[Tuple[K, S]]] = ...,
+    ) -> DStream[Tuple[K, S]]: ...
+
+class TransformedDStream(DStream[U]):
+    is_cached: bool
+    is_checkpointed: bool
+    func: Callable
+    prev: DStream
+    @overload
+    def __init__(
+        self: DStream[U], prev: DStream[T], func: Callable[[RDD[T]], RDD[U]]
+    ) -> None: ...
+    @overload
+    def __init__(
+        self: DStream[U],
+        prev: DStream[T],
+        func: Callable[[datetime.datetime, RDD[T]], RDD[U]],
+    ) -> None: ...
diff --git a/python/pyspark/streaming/kinesis.pyi b/python/pyspark/streaming/kinesis.pyi
new file mode 100644
index 0000000000000..246fa58ca6da3
--- /dev/null
+++ b/python/pyspark/streaming/kinesis.pyi
@@ -0,0 +1,46 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: This dynamically typed stub was automatically generated by stubgen.
+
+from typing import Any, Optional
+
+def utf8_decoder(s): ...
+
+class KinesisUtils:
+    @staticmethod
+    def createStream(
+        ssc,
+        kinesisAppName,
+        streamName,
+        endpointUrl,
+        regionName,
+        initialPositionInStream,
+        checkpointInterval,
+        storageLevel: Any = ...,
+        awsAccessKeyId: Optional[Any] = ...,
+        awsSecretKey: Optional[Any] = ...,
+        decoder: Any = ...,
+        stsAssumeRoleArn: Optional[Any] = ...,
+        stsSessionName: Optional[Any] = ...,
+        stsExternalId: Optional[Any] = ...,
+    ): ...
+
+class InitialPositionInStream:
+    LATEST: Any
+    TRIM_HORIZON: Any
diff --git a/python/pyspark/streaming/listener.pyi b/python/pyspark/streaming/listener.pyi
new file mode 100644
index 0000000000000..4033529607cea
--- /dev/null
+++ b/python/pyspark/streaming/listener.pyi
@@ -0,0 +1,35 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: This dynamically typed stub was automatically generated by stubgen.
+
+from typing import Any
+
+class StreamingListener:
+    def __init__(self) -> None: ...
+    def onStreamingStarted(self, streamingStarted: Any) -> None: ...
+    def onReceiverStarted(self, receiverStarted: Any) -> None: ...
+    def onReceiverError(self, receiverError: Any) -> None: ...
+    def onReceiverStopped(self, receiverStopped: Any) -> None: ...
+    def onBatchSubmitted(self, batchSubmitted: Any) -> None: ...
+    def onBatchStarted(self, batchStarted: Any) -> None: ...
+    def onBatchCompleted(self, batchCompleted: Any) -> None: ...
+    def onOutputOperationStarted(self, outputOperationStarted: Any) -> None: ...
+    def onOutputOperationCompleted(self, outputOperationCompleted: Any) -> None: ...
+    class Java:
+        implements: Any = ...
diff --git a/python/pyspark/streaming/tests/test_context.py b/python/pyspark/streaming/tests/test_context.py
index 26f1d24f644ea..b255796cdcdd7 100644
--- a/python/pyspark/streaming/tests/test_context.py
+++ b/python/pyspark/streaming/tests/test_context.py
@@ -178,7 +178,7 @@ def test_await_termination_or_timeout(self):
     from pyspark.streaming.tests.test_context import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/streaming/tests/test_dstream.py b/python/pyspark/streaming/tests/test_dstream.py
index 00d00b50c9283..ea5353c77b6b2 100644
--- a/python/pyspark/streaming/tests/test_dstream.py
+++ b/python/pyspark/streaming/tests/test_dstream.py
@@ -647,7 +647,7 @@ def check_output(n):
     from pyspark.streaming.tests.test_dstream import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/streaming/tests/test_kinesis.py b/python/pyspark/streaming/tests/test_kinesis.py
index b39809e2f69c2..70c9a012e7a03 100644
--- a/python/pyspark/streaming/tests/test_kinesis.py
+++ b/python/pyspark/streaming/tests/test_kinesis.py
@@ -83,7 +83,7 @@ def get_output(_, rdd):
     from pyspark.streaming.tests.test_kinesis import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/streaming/tests/test_listener.py b/python/pyspark/streaming/tests/test_listener.py
index 3970cf6589394..e4dab1bba3a6c 100644
--- a/python/pyspark/streaming/tests/test_listener.py
+++ b/python/pyspark/streaming/tests/test_listener.py
@@ -152,7 +152,7 @@ def func(dstream):
     from pyspark.streaming.tests.test_listener import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/streaming/util.pyi b/python/pyspark/streaming/util.pyi
new file mode 100644
index 0000000000000..d552eb15f4818
--- /dev/null
+++ b/python/pyspark/streaming/util.pyi
@@ -0,0 +1,48 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: This dynamically typed stub was automatically generated by stubgen.
+
+from typing import Any, Optional
+
+class TransformFunction:
+    ctx: Any
+    func: Any
+    deserializers: Any
+    rdd_wrap_func: Any
+    failure: Any
+    def __init__(self, ctx, func, *deserializers) -> None: ...
+    def rdd_wrapper(self, func): ...
+    def call(self, milliseconds, jrdds): ...
+    def getLastFailure(self): ...
+    class Java:
+        implements: Any
+
+class TransformFunctionSerializer:
+    ctx: Any
+    serializer: Any
+    gateway: Any
+    failure: Any
+    def __init__(self, ctx, serializer, gateway: Optional[Any] = ...) -> None: ...
+    def dumps(self, id): ...
+    def loads(self, data): ...
+    def getLastFailure(self): ...
+    class Java:
+        implements: Any
+
+def rddToFileName(prefix, suffix, timestamp): ...
diff --git a/python/pyspark/taskcontext.pyi b/python/pyspark/taskcontext.pyi
new file mode 100644
index 0000000000000..3415c69f02177
--- /dev/null
+++ b/python/pyspark/taskcontext.pyi
@@ -0,0 +1,45 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Dict, List
+from typing_extensions import Literal
+from pyspark.resource.information import ResourceInformation
+
+class TaskContext:
+    def __new__(cls) -> TaskContext: ...
+    @classmethod
+    def get(cls) -> TaskContext: ...
+    def stageId(self) -> int: ...
+    def partitionId(self) -> int: ...
+    def attemptNumber(self) -> int: ...
+    def taskAttemptId(self) -> int: ...
+    def getLocalProperty(self, key: str) -> str: ...
+    def resources(self) -> Dict[str, ResourceInformation]: ...
+
+BARRIER_FUNCTION = Literal[1]
+
+class BarrierTaskContext(TaskContext):
+    @classmethod
+    def get(cls) -> BarrierTaskContext: ...
+    def barrier(self) -> None: ...
+    def allGather(self, message: str = ...) -> List[str]: ...
+    def getTaskInfos(self) -> List[BarrierTaskInfo]: ...
+
+class BarrierTaskInfo:
+    address: str
+    def __init__(self, address: str) -> None: ...
diff --git a/python/pyspark/testing/mlutils.py b/python/pyspark/testing/mlutils.py
index a36d0709d8013..a8cf53b31f8c9 100644
--- a/python/pyspark/testing/mlutils.py
+++ b/python/pyspark/testing/mlutils.py
@@ -20,7 +20,7 @@
 from pyspark.ml import Estimator, Model, Transformer, UnaryTransformer
 from pyspark.ml.param import Param, Params, TypeConverters
 from pyspark.ml.util import DefaultParamsReadable, DefaultParamsWritable
-from pyspark.ml.wrapper import _java2py
+from pyspark.ml.wrapper import _java2py  # type: ignore
 from pyspark.sql import DataFrame, SparkSession
 from pyspark.sql.types import DoubleType
 from pyspark.testing.utils import ReusedPySparkTestCase as PySparkTestCase
@@ -116,7 +116,8 @@ def _transform(self, dataset):
 
 class MockUnaryTransformer(UnaryTransformer, DefaultParamsReadable, DefaultParamsWritable):
 
-    shift = Param(Params._dummy(), "shift", "The amount by which to shift " +
+    shift = Param(Params._dummy(),  # type: ignore
+                  "shift", "The amount by which to shift " +
                   "data in a DataFrame",
                   typeConverter=TypeConverters.toFloat)
 
diff --git a/python/pyspark/testing/sqlutils.py b/python/pyspark/testing/sqlutils.py
index e85cae7dda2c6..a394e8eecc69e 100644
--- a/python/pyspark/testing/sqlutils.py
+++ b/python/pyspark/testing/sqlutils.py
@@ -147,7 +147,7 @@ class PythonOnlyPoint(ExamplePoint):
     """
     An example class to demonstrate UDT in only Python
     """
-    __UDT__ = PythonOnlyUDT()
+    __UDT__ = PythonOnlyUDT()  # type: ignore
 
 
 class MyObject(object):
diff --git a/python/pyspark/testing/streamingutils.py b/python/pyspark/testing/streamingutils.py
index a6abc2ef673b7..f6a317e97331c 100644
--- a/python/pyspark/testing/streamingutils.py
+++ b/python/pyspark/testing/streamingutils.py
@@ -37,7 +37,7 @@
                                           "spark-streaming-kinesis-asl-assembly-",
                                           "spark-streaming-kinesis-asl-assembly_")
     if kinesis_asl_assembly_jar is None:
-        kinesis_requirement_message = (
+        kinesis_requirement_message = (  # type: ignore
             "Skipping all Kinesis Python tests as the optional Kinesis project was "
             "not compiled into a JAR. To run these tests, "
             "you need to build Spark with 'build/sbt -Pkinesis-asl assembly/package "
@@ -47,7 +47,7 @@
         existing_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell")
         jars_args = "--jars %s" % kinesis_asl_assembly_jar
         os.environ["PYSPARK_SUBMIT_ARGS"] = " ".join([jars_args, existing_args])
-        kinesis_requirement_message = None
+        kinesis_requirement_message = None  # type: ignore
 
 should_test_kinesis = kinesis_requirement_message is None
 
diff --git a/python/pyspark/tests/test_appsubmit.py b/python/pyspark/tests/test_appsubmit.py
index 15170b878eb22..3f45bf039d3a9 100644
--- a/python/pyspark/tests/test_appsubmit.py
+++ b/python/pyspark/tests/test_appsubmit.py
@@ -241,7 +241,7 @@ def test_user_configuration(self):
     from pyspark.tests.test_appsubmit import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/tests/test_broadcast.py b/python/pyspark/tests/test_broadcast.py
index 543dc98660fde..c35c5a68e4986 100644
--- a/python/pyspark/tests/test_broadcast.py
+++ b/python/pyspark/tests/test_broadcast.py
@@ -148,7 +148,7 @@ def random_bytes(n):
     from pyspark.tests.test_broadcast import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/tests/test_conf.py b/python/pyspark/tests/test_conf.py
index 3e80c17f4931c..a8d65b8919777 100644
--- a/python/pyspark/tests/test_conf.py
+++ b/python/pyspark/tests/test_conf.py
@@ -36,7 +36,7 @@ def test_memory_conf(self):
     from pyspark.tests.test_conf import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/tests/test_context.py b/python/pyspark/tests/test_context.py
index 9f159f7703950..9b6b74a111288 100644
--- a/python/pyspark/tests/test_context.py
+++ b/python/pyspark/tests/test_context.py
@@ -93,7 +93,7 @@ def test_add_py_file(self):
         # this job fails due to `userlibrary` not being on the Python path:
         # disable logging in log4j temporarily
         def func(x):
-            from userlibrary import UserClass
+            from userlibrary import UserClass  # type: ignore
             return UserClass().hello()
         with QuietTest(self.sc):
             self.assertRaises(Exception, self.sc.parallelize(range(2)).map(func).first)
@@ -137,7 +137,8 @@ def test_add_egg_file_locally(self):
         # To ensure that we're actually testing addPyFile's effects, check that
         # this fails due to `userlibrary` not being on the Python path:
         def func():
-            from userlib import UserClass  # noqa: F401
+            from userlib import UserClass  # type: ignore[import]
+            UserClass()
         self.assertRaises(ImportError, func)
         path = os.path.join(SPARK_HOME, "python/test_support/userlib-0.1.zip")
         self.sc.addPyFile(path)
@@ -147,11 +148,11 @@ def func():
     def test_overwrite_system_module(self):
         self.sc.addPyFile(os.path.join(SPARK_HOME, "python/test_support/SimpleHTTPServer.py"))
 
-        import SimpleHTTPServer
+        import SimpleHTTPServer  # type: ignore[import]
         self.assertEqual("My Server", SimpleHTTPServer.__name__)
 
         def func(x):
-            import SimpleHTTPServer
+            import SimpleHTTPServer  # type: ignore[import]
             return SimpleHTTPServer.__name__
 
         self.assertEqual(["My Server"], self.sc.parallelize(range(1)).map(func).collect())
@@ -321,7 +322,7 @@ def tearDown(self):
     from pyspark.tests.test_context import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/tests/test_daemon.py b/python/pyspark/tests/test_daemon.py
index b1f8c71c77ba9..c3fd89fef72c2 100644
--- a/python/pyspark/tests/test_daemon.py
+++ b/python/pyspark/tests/test_daemon.py
@@ -76,7 +76,7 @@ def test_termination_sigterm(self):
     from pyspark.tests.test_daemon import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/tests/test_join.py b/python/pyspark/tests/test_join.py
index 815c78ef9a8e2..63dd1cfef9a6a 100644
--- a/python/pyspark/tests/test_join.py
+++ b/python/pyspark/tests/test_join.py
@@ -62,7 +62,7 @@ def test_narrow_dependency_in_join(self):
     from pyspark.tests.test_join import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/tests/test_pin_thread.py b/python/pyspark/tests/test_pin_thread.py
index efe7d7f6639b1..b612796c963a0 100644
--- a/python/pyspark/tests/test_pin_thread.py
+++ b/python/pyspark/tests/test_pin_thread.py
@@ -169,7 +169,7 @@ def get_outer_local_prop():
     from pyspark.tests.test_pin_thread import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/tests/test_profiler.py b/python/pyspark/tests/test_profiler.py
index ca144cc6e1eb6..de72a547b0844 100644
--- a/python/pyspark/tests/test_profiler.py
+++ b/python/pyspark/tests/test_profiler.py
@@ -101,7 +101,7 @@ def test_profiler_disabled(self):
     from pyspark.tests.test_profiler import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/tests/test_rdd.py b/python/pyspark/tests/test_rdd.py
index c154bda00d605..47b8f10a5b05e 100644
--- a/python/pyspark/tests/test_rdd.py
+++ b/python/pyspark/tests/test_rdd.py
@@ -884,7 +884,7 @@ def run_job(job_group, index):
     from pyspark.tests.test_rdd import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/tests/test_rddbarrier.py b/python/pyspark/tests/test_rddbarrier.py
index f0a05a23cc4e0..ba2c4b9ba84d4 100644
--- a/python/pyspark/tests/test_rddbarrier.py
+++ b/python/pyspark/tests/test_rddbarrier.py
@@ -43,7 +43,7 @@ def f(index, iterator):
     from pyspark.tests.test_rddbarrier import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/tests/test_readwrite.py b/python/pyspark/tests/test_readwrite.py
index adbc343c650a7..145b53a5eaaa1 100644
--- a/python/pyspark/tests/test_readwrite.py
+++ b/python/pyspark/tests/test_readwrite.py
@@ -307,7 +307,7 @@ def test_malformed_RDD(self):
     from pyspark.tests.test_readwrite import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/tests/test_serializers.py b/python/pyspark/tests/test_serializers.py
index 8eaa9c7d5a8d2..cc7838e595b8a 100644
--- a/python/pyspark/tests/test_serializers.py
+++ b/python/pyspark/tests/test_serializers.py
@@ -87,7 +87,7 @@ def __getattr__(self, item):
     def test_pickling_file_handles(self):
         # to be corrected with SPARK-11160
         try:
-            import xmlrunner  # noqa: F401
+            import xmlrunner  # type: ignore[import]  # noqa: F401
         except ImportError:
             ser = CloudPickleSerializer()
             out1 = sys.stderr
@@ -227,7 +227,7 @@ def test_chunked_stream(self):
     from pyspark.tests.test_serializers import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/tests/test_shuffle.py b/python/pyspark/tests/test_shuffle.py
index 061b93f32c56c..6a245a26b4551 100644
--- a/python/pyspark/tests/test_shuffle.py
+++ b/python/pyspark/tests/test_shuffle.py
@@ -170,7 +170,7 @@ def test_external_sort_in_rdd(self):
     from pyspark.tests.test_shuffle import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/tests/test_taskcontext.py b/python/pyspark/tests/test_taskcontext.py
index f5be685643dd5..f0e6672957c13 100644
--- a/python/pyspark/tests/test_taskcontext.py
+++ b/python/pyspark/tests/test_taskcontext.py
@@ -324,7 +324,7 @@ def tearDown(self):
     from pyspark.tests.test_taskcontext import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/tests/test_util.py b/python/pyspark/tests/test_util.py
index e853bc322c184..a25c41b296944 100644
--- a/python/pyspark/tests/test_util.py
+++ b/python/pyspark/tests/test_util.py
@@ -77,7 +77,7 @@ def test_parsing_version_string(self):
     from pyspark.tests.test_util import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/tests/test_worker.py b/python/pyspark/tests/test_worker.py
index a855eaafc1927..bfaf3a3186cad 100644
--- a/python/pyspark/tests/test_worker.py
+++ b/python/pyspark/tests/test_worker.py
@@ -202,7 +202,7 @@ def tearDown(self):
     from pyspark.tests.test_worker import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None
diff --git a/python/pyspark/traceback_utils.pyi b/python/pyspark/traceback_utils.pyi
new file mode 100644
index 0000000000000..33b1b7dc3227f
--- /dev/null
+++ b/python/pyspark/traceback_utils.pyi
@@ -0,0 +1,29 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from collections import namedtuple
+from typing import Any
+
+CallSite = namedtuple("CallSite", "function file linenum")
+
+def first_spark_call(): ...
+
+class SCCallSiteSync:
+    def __init__(self, sc: Any) -> None: ...
+    def __enter__(self) -> None: ...
+    def __exit__(self, type: Any, value: Any, tb: Any) -> None: ...
diff --git a/python/pyspark/util.pyi b/python/pyspark/util.pyi
new file mode 100644
index 0000000000000..023b409831459
--- /dev/null
+++ b/python/pyspark/util.pyi
@@ -0,0 +1,35 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+from typing import Any, Tuple
+from pyspark._typing import F
+
+import threading
+
+def print_exec(stream: Any) -> None: ...
+
+class VersionUtils:
+    @staticmethod
+    def majorMinorVersion(sparkVersion: str) -> Tuple[int, int]: ...
+
+def fail_on_stopiteration(f: F) -> F: ...
+
+class InheritableThread(threading.Thread):
+    def __init__(self, target: Any, *args: Any, **kwargs: Any): ...
+    def __del__(self) -> None: ...
diff --git a/python/pyspark/version.pyi b/python/pyspark/version.pyi
new file mode 100644
index 0000000000000..444dae62f0c09
--- /dev/null
+++ b/python/pyspark/version.pyi
@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+__version__: str
diff --git a/python/pyspark/worker.pyi b/python/pyspark/worker.pyi
new file mode 100644
index 0000000000000..cc264823cc867
--- /dev/null
+++ b/python/pyspark/worker.pyi
@@ -0,0 +1,73 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pyspark import shuffle as shuffle
+from pyspark.broadcast import Broadcast as Broadcast
+from pyspark.files import SparkFiles as SparkFiles
+from pyspark.java_gateway import local_connect_and_auth as local_connect_and_auth
+from pyspark.rdd import PythonEvalType as PythonEvalType
+from pyspark.resource import ResourceInformation as ResourceInformation
+from pyspark.serializers import (
+    BatchedSerializer as BatchedSerializer,
+    PickleSerializer as PickleSerializer,
+    SpecialLengths as SpecialLengths,
+    UTF8Deserializer as UTF8Deserializer,
+    read_bool as read_bool,
+    read_int as read_int,
+    read_long as read_long,
+    write_int as write_int,
+    write_long as write_long,
+    write_with_length as write_with_length,
+)
+from pyspark.sql.pandas.serializers import (
+    ArrowStreamPandasUDFSerializer as ArrowStreamPandasUDFSerializer,
+    CogroupUDFSerializer as CogroupUDFSerializer,
+)
+from pyspark.sql.pandas.types import to_arrow_type as to_arrow_type
+from pyspark.sql.types import StructType as StructType
+from pyspark.taskcontext import (
+    BarrierTaskContext as BarrierTaskContext,
+    TaskContext as TaskContext,
+)
+from pyspark.util import fail_on_stopiteration as fail_on_stopiteration
+from typing import Any
+
+has_resource_module: bool
+pickleSer: Any
+utf8_deserializer: Any
+
+def report_times(outfile: Any, boot: Any, init: Any, finish: Any) -> None: ...
+def add_path(path: Any) -> None: ...
+def read_command(serializer: Any, file: Any): ...
+def chain(f: Any, g: Any): ...
+def wrap_udf(f: Any, return_type: Any): ...
+def wrap_scalar_pandas_udf(f: Any, return_type: Any): ...
+def wrap_pandas_iter_udf(f: Any, return_type: Any): ...
+def wrap_cogrouped_map_pandas_udf(f: Any, return_type: Any, argspec: Any): ...
+def wrap_grouped_map_pandas_udf(f: Any, return_type: Any, argspec: Any): ...
+def wrap_grouped_agg_pandas_udf(f: Any, return_type: Any): ...
+def wrap_window_agg_pandas_udf(
+    f: Any, return_type: Any, runner_conf: Any, udf_index: Any
+): ...
+def wrap_unbounded_window_agg_pandas_udf(f: Any, return_type: Any): ...
+def wrap_bounded_window_agg_pandas_udf(f: Any, return_type: Any): ...
+def read_single_udf(
+    pickleSer: Any, infile: Any, eval_type: Any, runner_conf: Any, udf_index: Any
+): ...
+def read_udfs(pickleSer: Any, infile: Any, eval_type: Any): ...
+def main(infile: Any, outfile: Any) -> None: ...
diff --git a/python/setup.py b/python/setup.py
index 7c12b112acd65..206765389335f 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -265,7 +265,8 @@ def run(self):
             'Programming Language :: Python :: 3.7',
             'Programming Language :: Python :: 3.8',
             'Programming Language :: Python :: Implementation :: CPython',
-            'Programming Language :: Python :: Implementation :: PyPy'],
+            'Programming Language :: Python :: Implementation :: PyPy',
+            'Typing :: Typed'],
         cmdclass={
             'install': InstallCommand,
         },

From 688d016c7acc4b9d96d75b40123be9f40b7b2693 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Thu, 24 Sep 2020 14:49:58 +0900
Subject: [PATCH 0101/1009] [SPARK-32982][BUILD] Remove hive-1.2 profiles in
 PIP installation option

### What changes were proposed in this pull request?

This PR removes Hive 1.2 option (and therefore `HIVE_VERSION` environment variable as well).

### Why are the changes needed?

Hive 1.2 is a fork version. We shouldn't promote users to use.

### Does this PR introduce _any_ user-facing change?

Nope, `HIVE_VERSION` and Hive 1.2 are removed but this is new experimental feature in master only.

### How was this patch tested?

Manually tested:

```bash
SPARK_VERSION=3.0.1 HADOOP_VERSION=3.2 pip install pyspark-3.1.0.dev0.tar.gz -v
SPARK_VERSION=3.0.1 HADOOP_VERSION=2.7 pip install pyspark-3.1.0.dev0.tar.gz -v
SPARK_VERSION=3.0.1 HADOOP_VERSION=invalid pip install pyspark-3.1.0.dev0.tar.gz -v
```

Closes #29858 from HyukjinKwon/SPARK-32981.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 dev/create-release/release-build.sh           |  2 +-
 .../docs/source/getting_started/install.rst   | 24 +++++--------------
 python/pyspark/install.py                     | 16 ++++---------
 python/pyspark/tests/test_install_spark.py    | 13 ++++------
 python/setup.py                               |  2 ++
 5 files changed, 18 insertions(+), 39 deletions(-)

diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index c47469a2f6d95..c7fee13d39c6b 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -276,7 +276,7 @@ if [[ "$1" == "package" ]]; then
   # list of packages to be built, so it's ok for things to be missing in BINARY_PKGS_EXTRA.
 
   # NOTE: Don't forget to update the valid combinations of distributions at
-  #   'python/pyspark.install.py' and 'python/docs/source/getting_started/installation.rst'
+  #   'python/pyspark/install.py' and 'python/docs/source/getting_started/install.rst'
   #   if you're changing them.
   declare -A BINARY_PKGS_ARGS
   BINARY_PKGS_ARGS["hadoop3.2"]="-Phadoop-3.2 $HIVE_PROFILES"
diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst
index d915e9c7349d4..4039698d39958 100644
--- a/python/docs/source/getting_started/install.rst
+++ b/python/docs/source/getting_started/install.rst
@@ -48,40 +48,28 @@ If you want to install extra dependencies for a specific componenet, you can ins
 
     pip install pyspark[sql]
 
-For PySpark with different Hadoop and/or Hive, you can install it by using ``HIVE_VERSION`` and ``HADOOP_VERSION`` environment variables as below:
+For PySpark with a different Hadoop version, you can install it by using ``HADOOP_VERSION`` environment variables as below:
 
 .. code-block:: bash
 
-    HIVE_VERSION=2.3 pip install pyspark
     HADOOP_VERSION=2.7 pip install pyspark
-    HIVE_VERSION=1.2 HADOOP_VERSION=2.7 pip install pyspark
 
-The default distribution has built-in Hadoop 3.2 and Hive 2.3. If users specify different versions, the pip installation automatically
+The default distribution uses Hadoop 3.2 and Hive 2.3. If users specify different versions of Hadoop, the pip installation automatically
 downloads a different version and use it in PySpark. Downloading it can take a while depending on
-the network and the mirror chosen. ``PYSPARK_RELEASE_MIRROR`` can be set to manually choose the mirror
-for faster downloading.
+the network and the mirror chosen. ``PYSPARK_RELEASE_MIRROR`` can be set to manually choose the mirror for faster downloading.
 
 .. code-block:: bash
 
     PYSPARK_RELEASE_MIRROR=http://mirror.apache-kr.org HADOOP_VERSION=2.7 pip install
 
-It is recommended to use `-v` option in `pip` to track the installation and download status.
+It is recommended to use ``-v`` option in ``pip`` to track the installation and download status.
 
 .. code-block:: bash
 
     HADOOP_VERSION=2.7 pip install pyspark -v
 
-Supported versions are as below:
-
-====================================== ====================================== ======================================
-``HADOOP_VERSION`` \\ ``HIVE_VERSION`` 1.2                                    2.3 (default)
-====================================== ====================================== ======================================
-**2.7**                                O                                      O
-**3.2 (default)**                      X                                      O
-**without**                            X                                      O
-====================================== ====================================== ======================================
-
-Note that this installation of PySpark with different versions of Hadoop and Hive is experimental. It can change or be removed between minor releases.
+Supported versions of Hadoop are ``HADOOP_VERSION=2.7`` and ``HADOOP_VERSION=3.2`` (default).
+Note that this installation of PySpark with a different version of Hadoop is experimental. It can change or be removed between minor releases.
 
 
 Using Conda
diff --git a/python/pyspark/install.py b/python/pyspark/install.py
index 89573577cd994..84dd2c9964563 100644
--- a/python/pyspark/install.py
+++ b/python/pyspark/install.py
@@ -26,18 +26,13 @@
 DEFAULT_HADOOP = "hadoop3.2"
 DEFAULT_HIVE = "hive2.3"
 SUPPORTED_HADOOP_VERSIONS = ["hadoop2.7", "hadoop3.2", "without-hadoop"]
-SUPPORTED_HIVE_VERSIONS = ["hive1.2", "hive2.3"]
+SUPPORTED_HIVE_VERSIONS = ["hive2.3"]
 UNSUPPORTED_COMBINATIONS = [
-    ("without-hadoop", "hive1.2"),
-    ("hadoop3.2", "hive1.2"),
 ]
 
 
 def checked_package_name(spark_version, hadoop_version, hive_version):
-    if hive_version == "hive1.2":
-        return "%s-bin-%s-%s" % (spark_version, hadoop_version, hive_version)
-    else:
-        return "%s-bin-%s" % (spark_version, hadoop_version)
+    return "%s-bin-%s" % (spark_version, hadoop_version)
 
 
 def checked_versions(spark_version, hadoop_version, hive_version):
@@ -48,7 +43,7 @@ def checked_versions(spark_version, hadoop_version, hive_version):
     :param hadoop_version: Hadoop version. It should be X.X such as '2.7' or 'hadoop2.7'.
         'without' and 'without-hadoop' are supported as special keywords for Hadoop free
         distribution.
-    :param hive_version: Hive version. It should be X.X such as '1.2' or 'hive1.2'.
+    :param hive_version: Hive version. It should be X.X such as '2.3' or 'hive2.3'.
 
     :return it returns fully-qualified versions of Spark, Hadoop and Hive in a tuple.
         For example, spark-3.0.0, hadoop3.2 and hive2.3.
@@ -80,9 +75,6 @@ def checked_versions(spark_version, hadoop_version, hive_version):
             "one of [%s]" % (hive_version, ", ".join(
                 SUPPORTED_HADOOP_VERSIONS)))
 
-    if (hadoop_version, hive_version) in UNSUPPORTED_COMBINATIONS:
-        raise RuntimeError("Hive 1.2 should only be with Hadoop 2.7.")
-
     return spark_version, hadoop_version, hive_version
 
 
@@ -95,7 +87,7 @@ def install_spark(dest, spark_version, hadoop_version, hive_version):
     :param spark_version: Spark version. It should be spark-X.X.X form.
     :param hadoop_version: Hadoop version. It should be hadoopX.X
         such as 'hadoop2.7' or 'without-hadoop'.
-    :param hive_version: Hive version. It should be hiveX.X such as 'hive1.2'.
+    :param hive_version: Hive version. It should be hiveX.X such as 'hive2.3'.
     """
 
     package_name = checked_package_name(spark_version, hadoop_version, hive_version)
diff --git a/python/pyspark/tests/test_install_spark.py b/python/pyspark/tests/test_install_spark.py
index b215cf6b01317..6f9949aa8b2e0 100644
--- a/python/pyspark/tests/test_install_spark.py
+++ b/python/pyspark/tests/test_install_spark.py
@@ -41,9 +41,6 @@ def test_install_spark(self):
             self.assertTrue(os.path.exists("%s/RELEASE" % tmp_dir))
 
     def test_package_name(self):
-        self.assertEqual(
-            "spark-3.0.0-bin-hadoop3.2-hive1.2",
-            checked_package_name("spark-3.0.0", "hadoop3.2", "hive1.2"))
         self.assertEqual(
             "spark-3.0.0-bin-hadoop3.2",
             checked_package_name("spark-3.0.0", "hadoop3.2", "hive2.3"))
@@ -53,12 +50,12 @@ def test_checked_versions(self):
 
         # Positive test cases
         self.assertEqual(
-            ("spark-3.0.0", "hadoop2.7", "hive1.2"),
-            checked_versions("spark-3.0.0", "hadoop2.7", "hive1.2"))
+            ("spark-3.0.0", "hadoop2.7", "hive2.3"),
+            checked_versions("spark-3.0.0", "hadoop2.7", "hive2.3"))
 
         self.assertEqual(
-            ("spark-3.0.0", "hadoop2.7", "hive1.2"),
-            checked_versions("3.0.0", "2.7", "1.2"))
+            ("spark-3.0.0", "hadoop2.7", "hive2.3"),
+            checked_versions("3.0.0", "2.7", "2.3"))
 
         self.assertEqual(
             ("spark-2.4.1", "without-hadoop", "hive2.3"),
@@ -94,7 +91,7 @@ def test_checked_versions(self):
                 hadoop_version=DEFAULT_HADOOP,
                 hive_version="malformed")
 
-        with self.assertRaisesRegex(RuntimeError, "Hive 1.2 should only be with Hadoop 2.7"):
+        with self.assertRaisesRegex(RuntimeError, "Spark distribution of hive1.2 is not supported"):
             checked_versions(
                 spark_version=test_version,
                 hadoop_version="hadoop3.2",
diff --git a/python/setup.py b/python/setup.py
index 206765389335f..8d9cf2ee5459a 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -127,6 +127,8 @@ def run(self):
 
         if ("HADOOP_VERSION" in os.environ) or ("HIVE_VERSION" in os.environ):
             # Note that SPARK_VERSION environment is just a testing purpose.
+            # HIVE_VERSION environment variable is also internal for now in case
+            # we support another version of Hive in the future.
             spark_version, hadoop_version, hive_version = install_module.checked_versions(
                 os.environ.get("SPARK_VERSION", VERSION).lower(),
                 os.environ.get("HADOOP_VERSION", install_module.DEFAULT_HADOOP).lower(),

From fe6d38d24356df46af7705154070cde536e5ac38 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Thu, 24 Sep 2020 08:25:24 -0700
Subject: [PATCH 0102/1009] [SPARK-32987][MESOS] Pass all `mesos` module UTs in
 Scala 2.13

### What changes were proposed in this pull request?
The main change of this pr is add a manual sort to `defaultConf ++ driverConf` before constructing `--conf` options to ensure options has same order in Scala 2.12 and Scala 2.13.

### Why are the changes needed?
We need to support a Scala 2.13 build.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

- Scala 2.12: Pass the Jenkins or GitHub Action

- Scala 2.13: Pass GitHub 2.13 Build Action

Do the following:

```
dev/change-scala-version.sh 2.13
mvn clean install -DskipTests  -pl resource-managers/mesos -Pscala-2.13 -Pmesos -am
mvn test -pl resource-managers/mesos -Pscala-2.13 -Pmesos
```

**Before**
```
Tests: succeeded 106, failed 1, canceled 0, ignored 0, pending 0
*** 1 TESTS FAILED ***
```

**After**

```
Tests: succeeded 107, failed 0, canceled 0, ignored 0, pending 0
All tests passed.
```

Closes #29865 from LuciferYang/SPARK-32987-2.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/scheduler/cluster/mesos/MesosClusterScheduler.scala   | 2 +-
 .../scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
index 96f6737894392..39168a5e3c7a5 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
@@ -550,7 +550,7 @@ private[spark] class MesosClusterScheduler(
     val driverConf = desc.conf.getAll
       .filter { case (key, _) => !replicatedOptionsExcludeList.contains(key) }
       .toMap
-    (defaultConf ++ driverConf).foreach { case (key, value) =>
+    (defaultConf ++ driverConf).toSeq.sortBy(_._1).foreach { case (key, value) =>
       options ++= Seq("--conf", s"${key}=${value}") }
 
     options.map(shellEscape)
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
index 287c235d5b047..5ff7f99aadb2f 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
@@ -593,9 +593,9 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
     val expectedCmd = "cd spark-version*;  " +
         "bin/spark-submit --name \"app name\" --master mesos://mesos://localhost:5050 " +
         "--driver-cores 1.0 --driver-memory 1000M --class Main " +
-        "--conf spark.executor.uri=s3a://bucket/spark-version.tgz " +
         "--conf \"another.conf=\\\\value\" " +
         "--conf \"spark.app.name=app name\" " +
+        "--conf spark.executor.uri=s3a://bucket/spark-version.tgz " +
         "../jar " +
         "\"--a=\\$2\" " +
         "--b \"x y z\""

From 4ae0f703954cbd837dd96ff453270148b327343b Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Thu, 24 Sep 2020 08:32:32 -0700
Subject: [PATCH 0103/1009] [SPARK-32954][YARN][TEST] Add jakarta.servlet-api
 test dependency to yarn module to avoid UTs badcase

### What changes were proposed in this pull request?

When I tried to verify that the `resource-managers/yarn` module passed all UTs in Scala 2.13 , I found that there is a
issue related to classpath order maybe blocked the UTs because there are more than one `servlet-api` dependency in spark now:

- One is `javax.servlet:javax.servlet-api:3.10:compile` config in core/pom.xml,

- The other is `jakarta.servlet:jakarta.servlet-api:4.0.3:test`  cascaded by `org.glassfish.jersey.test-framework.providers`

we can use `mvn dependency:tree` to check it .

So when we execute `resource-managers/yarn` module test use

```
mvn clean test -pl resource-managers/yarn -Pyarn
```
or
```
mvn clean test -pl resource-managers/yarn -Pyarn -Pscala-2.13
```

and if the position of `javax.servlet-api` in the  in classpath is before `jakarta.servlet-api`, there are some cases failed in `YarnClusterSuite`, `YarnShuffleIntegrationSuite`  and `YarnShuffleAuthSuite`.

The failed reason as follow:

```
20/09/18 19:14:07.486 launcher-proc-1 INFO YarnClusterDriver: Exception in thread "main" java.lang.ExceptionInInitializerError
...
20/09/18 19:14:07.486 launcher-proc-1 INFO YarnClusterDriver: Caused by: java.lang.SecurityException: class "javax.servlet.http.HttpSessionIdListener"'s signer information does not match signer information of other classes in the same package
...
```

### Why are the changes needed?

Avoid UTs error caused by classpath order .

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

- Scala 2.12: Pass the Jenkins or GitHub Action

- Scala 2.13: Pass 2.13 Build GitHub Action and do the following:

```
dev/change-scala-version.sh 2.13
mvn clean install -DskipTests -pl resource-managers/yarn -Pyarn -Pscala-2.13 -am
mvn clean test -pl resource-managers/yarn -Pyarn -Pscala-2.13
```

```
Tests: succeeded 136, failed 0, canceled 1, ignored 0, pending 0
All tests passed.
```

Closes #29824 from LuciferYang/yarn-tests-deps.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 resource-managers/yarn/pom.xml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index d081be94ba7ae..bc80769be2390 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -88,6 +88,13 @@
       <artifactId>hadoop-client</artifactId>
     </dependency>
 
+    <dependency>
+      <groupId>jakarta.servlet</groupId>
+      <artifactId>jakarta.servlet-api</artifactId>
+      <version>4.0.3</version>
+      <scope>test</scope>
+    </dependency>
+
     <!-- Explicit listing of transitive deps that are shaded. Otherwise, odd compiler crashes. -->
     <dependency>
       <groupId>com.google.guava</groupId>

From 8ccfbc114e3e8d9fc919bf05602e02a506566e31 Mon Sep 17 00:00:00 2001
From: Chao Sun <sunchao@apache.org>
Date: Thu, 24 Sep 2020 10:58:52 -0700
Subject: [PATCH 0104/1009] [SPARK-32381][CORE][SQL] Move and refactor parallel
 listing & non-location sensitive listing to core

<!--
Thanks for sending a pull request!  Here are some tips for you:
  1. If this is your first time, please read our contributor guidelines: https://spark.apache.org/contributing.html
  2. Ensure you have added or run the appropriate tests for your PR: https://spark.apache.org/developer-tools.html
  3. If the PR is unfinished, add '[WIP]' in your PR title, e.g., '[WIP][SPARK-XXXX] Your PR title ...'.
  4. Be sure to keep the PR description updated to reflect all changes.
  5. Please write your PR title to summarize what this PR proposes.
  6. If possible, provide a concise example to reproduce the issue for a faster review.
  7. If you want to add a new configuration, please read the guideline first for naming configurations in
     'core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala'.
-->

### What changes were proposed in this pull request?
<!--
Please clarify what changes you are proposing. The purpose of this section is to outline the changes and how this PR fixes the issue.
If possible, please consider writing useful notes for better and faster reviews in your PR. See the examples below.
  1. If you refactor some codes with changing classes, showing the class hierarchy will help reviewers.
  2. If you fix some SQL features, you can provide some references of other DBMSes.
  3. If there is design documentation, please add the link.
  4. If there is a discussion in the mailing list, please add the link.
-->

This moves and refactors the parallel listing utilities from `InMemoryFileIndex` to Spark core so it can be reused by modules beside SQL. Along the process this also did some cleanups/refactorings:

- Created a `HadoopFSUtils` class under core
- Moved `InMemoryFileIndex.bulkListLeafFiles` into `HadoopFSUtils.parallelListLeafFiles`. It now depends on a `SparkContext` instead of `SparkSession` in SQL. Also added a few parameters which used to be read from `SparkSession.conf`: `ignoreMissingFiles`, `ignoreLocality`, `parallelismThreshold`, `parallelismMax ` and `filterFun` (for additional filtering support but we may be able to merge this with `filter` parameter in future).
- Moved `InMemoryFileIndex.listLeafFiles` into `HadoopFSUtils.listLeafFiles` with similar changes above.

### Why are the changes needed?
<!--
Please clarify why the changes are needed. For instance,
  1. If you propose a new API, clarify the use case for a new API.
  2. If you fix a bug, you can clarify why it is a bug.
-->

Currently the locality-aware parallel listing mechanism only applies to `InMemoryFileIndex`. By moving this to core, we can potentially reuse the same mechanism for other code paths as well.

### Does this PR introduce _any_ user-facing change?
<!--
Note that it means *any* user-facing change including all aspects such as the documentation fix.
If yes, please clarify the previous behavior and the change this PR proposes - provide the console output, description and/or an example to show the behavior difference if possible.
If possible, please also clarify if this is a user-facing change compared to the released Spark versions or within the unreleased branches such as master.
If no, write 'No'.
-->

No.

### How was this patch tested?
<!--
If tests were added, say they were added here. Please make sure to add some test cases that check the changes thoroughly including negative and positive cases if possible.
If it was tested in a way different from regular unit tests, please clarify how you tested step by step, ideally copy and paste-able, so that other reviewers can test and check, and descendants can verify in the future.
If tests were not added, please describe why they were not added and/or why it was difficult to add.
-->

Since this is mostly a refactoring, it relies on existing unit tests such as those for `InMemoryFileIndex`.

Closes #29471 from sunchao/SPARK-32381.

Lead-authored-by: Chao Sun <sunchao@apache.org>
Co-authored-by: Holden Karau <hkarau@apple.com>
Co-authored-by: Chao Sun <sunchao@uber.com>
Signed-off-by: Holden Karau <hkarau@apple.com>
---
 .../org/apache/spark/util/HadoopFSUtils.scala | 360 ++++++++++++++++++
 .../sql/execution/command/CommandUtils.scala  |   2 +-
 .../datasources/InMemoryFileIndex.scala       | 297 +--------------
 3 files changed, 376 insertions(+), 283 deletions(-)
 create mode 100644 core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala

diff --git a/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala b/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala
new file mode 100644
index 0000000000000..c0a135e04bac5
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala
@@ -0,0 +1,360 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import java.io.FileNotFoundException
+
+import scala.collection.mutable
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs._
+import org.apache.hadoop.fs.viewfs.ViewFileSystem
+import org.apache.hadoop.hdfs.DistributedFileSystem
+
+import org.apache.spark._
+import org.apache.spark.annotation.Private
+import org.apache.spark.internal.Logging
+import org.apache.spark.metrics.source.HiveCatalogMetrics
+
+/**
+ * Utility functions to simplify and speed-up file listing.
+ */
+private[spark] object HadoopFSUtils extends Logging {
+  /**
+   * Lists a collection of paths recursively. Picks the listing strategy adaptively depending
+   * on the number of paths to list.
+   *
+   * This may only be called on the driver.
+   *
+   * @param sc Spark context used to run parallel listing.
+   * @param paths Input paths to list
+   * @param hadoopConf Hadoop configuration
+   * @param filter Path filter used to exclude leaf files from result
+   * @param isRootLevel Whether the input paths are at the root level, i.e., they are the root
+   *                    paths as opposed to nested paths encountered during recursive calls of this.
+   * @param ignoreMissingFiles Ignore missing files that happen during recursive listing
+   *                           (e.g., due to race conditions)
+   * @param ignoreLocality Whether to fetch data locality info when listing leaf files. If false,
+   *                       this will return `FileStatus` without `BlockLocation` info.
+   * @param parallelismThreshold The threshold to enable parallelism. If the number of input paths
+   *                             is smaller than this value, this will fallback to use
+   *                             sequential listing.
+   * @param parallelismMax The maximum parallelism for listing. If the number of input paths is
+   *                       larger than this value, parallelism will be throttled to this value
+   *                       to avoid generating too many tasks.
+   * @param filterFun Optional predicate on the leaf files. Files who failed the check will be
+   *                  excluded from the results
+   * @return for each input path, the set of discovered files for the path
+   */
+  def parallelListLeafFiles(
+      sc: SparkContext,
+      paths: Seq[Path],
+      hadoopConf: Configuration,
+      filter: PathFilter,
+      isRootLevel: Boolean,
+      ignoreMissingFiles: Boolean,
+      ignoreLocality: Boolean,
+      parallelismThreshold: Int,
+      parallelismMax: Int,
+      filterFun: Option[String => Boolean] = None): Seq[(Path, Seq[FileStatus])] = {
+
+    // Short-circuits parallel listing when serial listing is likely to be faster.
+    if (paths.size <= parallelismThreshold) {
+      return paths.map { path =>
+        val leafFiles = listLeafFiles(
+          path,
+          hadoopConf,
+          filter,
+          Some(sc),
+          ignoreMissingFiles = ignoreMissingFiles,
+          ignoreLocality = ignoreLocality,
+          isRootPath = isRootLevel,
+          parallelismThreshold = parallelismThreshold,
+          parallelismMax = parallelismMax,
+          filterFun = filterFun)
+        (path, leafFiles)
+      }
+    }
+
+    logInfo(s"Listing leaf files and directories in parallel under ${paths.length} paths." +
+      s" The first several paths are: ${paths.take(10).mkString(", ")}.")
+    HiveCatalogMetrics.incrementParallelListingJobCount(1)
+
+    val serializableConfiguration = new SerializableConfiguration(hadoopConf)
+    val serializedPaths = paths.map(_.toString)
+
+    // Set the number of parallelism to prevent following file listing from generating many tasks
+    // in case of large #defaultParallelism.
+    val numParallelism = Math.min(paths.size, parallelismMax)
+
+    val previousJobDescription = sc.getLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION)
+    val statusMap = try {
+      val description = paths.size match {
+        case 0 =>
+          "Listing leaf files and directories 0 paths"
+        case 1 =>
+          s"Listing leaf files and directories for 1 path:<br/>${paths(0)}"
+        case s =>
+          s"Listing leaf files and directories for $s paths:<br/>${paths(0)}, ..."
+      }
+      sc.setJobDescription(description)
+      sc
+        .parallelize(serializedPaths, numParallelism)
+        .mapPartitions { pathStrings =>
+          val hadoopConf = serializableConfiguration.value
+          pathStrings.map(new Path(_)).toSeq.map { path =>
+            val leafFiles = listLeafFiles(
+              path = path,
+              hadoopConf = hadoopConf,
+              filter = filter,
+              contextOpt = None, // Can't execute parallel scans on workers
+              ignoreMissingFiles = ignoreMissingFiles,
+              ignoreLocality = ignoreLocality,
+              isRootPath = isRootLevel,
+              filterFun = filterFun,
+              parallelismThreshold = Int.MaxValue,
+              parallelismMax = 0)
+            (path, leafFiles)
+          }.iterator
+        }.map { case (path, statuses) =>
+            val serializableStatuses = statuses.map { status =>
+              // Turn FileStatus into SerializableFileStatus so we can send it back to the driver
+              val blockLocations = status match {
+                case f: LocatedFileStatus =>
+                  f.getBlockLocations.map { loc =>
+                    SerializableBlockLocation(
+                      loc.getNames,
+                      loc.getHosts,
+                      loc.getOffset,
+                      loc.getLength)
+                  }
+
+                case _ =>
+                  Array.empty[SerializableBlockLocation]
+              }
+
+              SerializableFileStatus(
+                status.getPath.toString,
+                status.getLen,
+                status.isDirectory,
+                status.getReplication,
+                status.getBlockSize,
+                status.getModificationTime,
+                status.getAccessTime,
+                blockLocations)
+            }
+            (path.toString, serializableStatuses)
+        }.collect()
+    } finally {
+      sc.setJobDescription(previousJobDescription)
+    }
+
+    // turn SerializableFileStatus back to Status
+    statusMap.map { case (path, serializableStatuses) =>
+      val statuses = serializableStatuses.map { f =>
+        val blockLocations = f.blockLocations.map { loc =>
+          new BlockLocation(loc.names, loc.hosts, loc.offset, loc.length)
+        }
+        new LocatedFileStatus(
+          new FileStatus(
+            f.length, f.isDir, f.blockReplication, f.blockSize, f.modificationTime,
+            new Path(f.path)),
+          blockLocations)
+      }
+      (new Path(path), statuses)
+    }
+  }
+
+  // scalastyle:off argcount
+  /**
+   * Lists a single filesystem path recursively. If a `SparkContext` object is specified, this
+   * function may launch Spark jobs to parallelize listing based on `parallelismThreshold`.
+   *
+   * If sessionOpt is None, this may be called on executors.
+   *
+   * @return all children of path that match the specified filter.
+   */
+  private def listLeafFiles(
+      path: Path,
+      hadoopConf: Configuration,
+      filter: PathFilter,
+      contextOpt: Option[SparkContext],
+      ignoreMissingFiles: Boolean,
+      ignoreLocality: Boolean,
+      isRootPath: Boolean,
+      filterFun: Option[String => Boolean],
+      parallelismThreshold: Int,
+      parallelismMax: Int): Seq[FileStatus] = {
+
+    logTrace(s"Listing $path")
+    val fs = path.getFileSystem(hadoopConf)
+
+    // Note that statuses only include FileStatus for the files and dirs directly under path,
+    // and does not include anything else recursively.
+    val statuses: Array[FileStatus] = try {
+      fs match {
+        // DistributedFileSystem overrides listLocatedStatus to make 1 single call to namenode
+        // to retrieve the file status with the file block location. The reason to still fallback
+        // to listStatus is because the default implementation would potentially throw a
+        // FileNotFoundException which is better handled by doing the lookups manually below.
+        case (_: DistributedFileSystem | _: ViewFileSystem) if !ignoreLocality =>
+          val remoteIter = fs.listLocatedStatus(path)
+          new Iterator[LocatedFileStatus]() {
+            def next(): LocatedFileStatus = remoteIter.next
+            def hasNext(): Boolean = remoteIter.hasNext
+          }.toArray
+        case _ => fs.listStatus(path)
+      }
+    } catch {
+      // If we are listing a root path for SQL (e.g. a top level directory of a table), we need to
+      // ignore FileNotFoundExceptions during this root level of the listing because
+      //
+      //  (a) certain code paths might construct an InMemoryFileIndex with root paths that
+      //      might not exist (i.e. not all callers are guaranteed to have checked
+      //      path existence prior to constructing InMemoryFileIndex) and,
+      //  (b) we need to ignore deleted root paths during REFRESH TABLE, otherwise we break
+      //      existing behavior and break the ability drop SessionCatalog tables when tables'
+      //      root directories have been deleted (which breaks a number of Spark's own tests).
+      //
+      // If we are NOT listing a root path then a FileNotFoundException here means that the
+      // directory was present in a previous level of file listing but is absent in this
+      // listing, likely indicating a race condition (e.g. concurrent table overwrite or S3
+      // list inconsistency).
+      //
+      // The trade-off in supporting existing behaviors / use-cases is that we won't be
+      // able to detect race conditions involving root paths being deleted during
+      // InMemoryFileIndex construction. However, it's still a net improvement to detect and
+      // fail-fast on the non-root cases. For more info see the SPARK-27676 review discussion.
+      case _: FileNotFoundException if isRootPath || ignoreMissingFiles =>
+        logWarning(s"The directory $path was not found. Was it deleted very recently?")
+        Array.empty[FileStatus]
+    }
+
+    def doFilter(statuses: Array[FileStatus]) = filterFun match {
+      case Some(shouldFilterOut) =>
+        statuses.filterNot(status => shouldFilterOut(status.getPath.getName))
+      case None =>
+        statuses
+    }
+
+    val filteredStatuses = doFilter(statuses)
+    val allLeafStatuses = {
+      val (dirs, topLevelFiles) = filteredStatuses.partition(_.isDirectory)
+      val nestedFiles: Seq[FileStatus] = contextOpt match {
+        case Some(context) if dirs.size > parallelismThreshold =>
+          parallelListLeafFiles(
+            context,
+            dirs.map(_.getPath),
+            hadoopConf = hadoopConf,
+            filter = filter,
+            isRootLevel = false,
+            ignoreMissingFiles = ignoreMissingFiles,
+            ignoreLocality = ignoreLocality,
+            filterFun = filterFun,
+            parallelismThreshold = parallelismThreshold,
+            parallelismMax = parallelismMax
+          ).flatMap(_._2)
+        case _ =>
+          dirs.flatMap { dir =>
+            listLeafFiles(
+              path = dir.getPath,
+              hadoopConf = hadoopConf,
+              filter = filter,
+              contextOpt = contextOpt,
+              ignoreMissingFiles = ignoreMissingFiles,
+              ignoreLocality = ignoreLocality,
+              isRootPath = false,
+              filterFun = filterFun,
+              parallelismThreshold = parallelismThreshold,
+              parallelismMax = parallelismMax)
+          }
+      }
+      val allFiles = topLevelFiles ++ nestedFiles
+      if (filter != null) allFiles.filter(f => filter.accept(f.getPath)) else allFiles
+    }
+
+    val missingFiles = mutable.ArrayBuffer.empty[String]
+    val filteredLeafStatuses = doFilter(allLeafStatuses)
+    val resolvedLeafStatuses = filteredLeafStatuses.flatMap {
+      case f: LocatedFileStatus =>
+        Some(f)
+
+      // NOTE:
+      //
+      // - Although S3/S3A/S3N file system can be quite slow for remote file metadata
+      //   operations, calling `getFileBlockLocations` does no harm here since these file system
+      //   implementations don't actually issue RPC for this method.
+      //
+      // - Here we are calling `getFileBlockLocations` in a sequential manner, but it should not
+      //   be a big deal since we always use to `parallelListLeafFiles` when the number of
+      //   paths exceeds threshold.
+      case f if !ignoreLocality =>
+        // The other constructor of LocatedFileStatus will call FileStatus.getPermission(),
+        // which is very slow on some file system (RawLocalFileSystem, which is launch a
+        // subprocess and parse the stdout).
+        try {
+          val locations = fs.getFileBlockLocations(f, 0, f.getLen).map { loc =>
+            // Store BlockLocation objects to consume less memory
+            if (loc.getClass == classOf[BlockLocation]) {
+              loc
+            } else {
+              new BlockLocation(loc.getNames, loc.getHosts, loc.getOffset, loc.getLength)
+            }
+          }
+          val lfs = new LocatedFileStatus(f.getLen, f.isDirectory, f.getReplication, f.getBlockSize,
+            f.getModificationTime, 0, null, null, null, null, f.getPath, locations)
+          if (f.isSymlink) {
+            lfs.setSymlink(f.getSymlink)
+          }
+          Some(lfs)
+        } catch {
+          case _: FileNotFoundException if ignoreMissingFiles =>
+            missingFiles += f.getPath.toString
+            None
+        }
+
+      case f => Some(f)
+    }
+
+    if (missingFiles.nonEmpty) {
+      logWarning(
+        s"the following files were missing during file scan:\n  ${missingFiles.mkString("\n  ")}")
+    }
+
+    resolvedLeafStatuses
+  }
+  // scalastyle:on argcount
+
+  /** A serializable variant of HDFS's BlockLocation. */
+  private case class SerializableBlockLocation(
+    names: Array[String],
+    hosts: Array[String],
+    offset: Long,
+    length: Long)
+
+  /** A serializable variant of HDFS's FileStatus. */
+  private case class SerializableFileStatus(
+    path: String,
+    length: Long,
+    isDir: Boolean,
+    blockReplication: Short,
+    blockSize: Long,
+    modificationTime: Long,
+    accessTime: Long,
+    blockLocations: Array[SerializableBlockLocation])
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
index c047be774d99a..8bf7504716f79 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
@@ -163,7 +163,7 @@ object CommandUtils extends Logging {
       .getConfString("hive.exec.stagingdir", ".hive-staging")
     val filter = new PathFilterIgnoreNonData(stagingDir)
     val sizes = InMemoryFileIndex.bulkListLeafFiles(paths.flatten,
-      sparkSession.sessionState.newHadoopConf(), filter, sparkSession, areRootPaths = true).map {
+      sparkSession.sessionState.newHadoopConf(), filter, sparkSession, isRootLevel = true).map {
       case (_, files) => files.map(_.getLen).sum
     }
     // the size is 0 where paths(i) is not defined and sizes(i) where it is defined
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
index a488ed16a835a..130894e9bc025 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
@@ -17,23 +17,18 @@
 
 package org.apache.spark.sql.execution.datasources
 
-import java.io.FileNotFoundException
-
 import scala.collection.mutable
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs._
-import org.apache.hadoop.fs.viewfs.ViewFileSystem
-import org.apache.hadoop.hdfs.DistributedFileSystem
 import org.apache.hadoop.mapred.{FileInputFormat, JobConf}
 
-import org.apache.spark.SparkContext
 import org.apache.spark.internal.Logging
 import org.apache.spark.metrics.source.HiveCatalogMetrics
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.execution.streaming.FileStreamSink
 import org.apache.spark.sql.types.StructType
-import org.apache.spark.util.SerializableConfiguration
+import org.apache.spark.util.HadoopFSUtils
 
 
 /**
@@ -133,7 +128,7 @@ class InMemoryFileIndex(
     }
     val filter = FileInputFormat.getInputPathFilter(new JobConf(hadoopConf, this.getClass))
     val discovered = InMemoryFileIndex.bulkListLeafFiles(
-      pathsToFetch.toSeq, hadoopConf, filter, sparkSession, areRootPaths = true)
+      pathsToFetch.toSeq, hadoopConf, filter, sparkSession, isRootLevel = true)
     discovered.foreach { case (path, leafFiles) =>
       HiveCatalogMetrics.incrementFilesDiscovered(leafFiles.size)
       fileStatusCache.putLeafFiles(path, leafFiles.toArray)
@@ -147,286 +142,24 @@ class InMemoryFileIndex(
 
 object InMemoryFileIndex extends Logging {
 
-  /** A serializable variant of HDFS's BlockLocation. */
-  private case class SerializableBlockLocation(
-      names: Array[String],
-      hosts: Array[String],
-      offset: Long,
-      length: Long)
-
-  /** A serializable variant of HDFS's FileStatus. */
-  private case class SerializableFileStatus(
-      path: String,
-      length: Long,
-      isDir: Boolean,
-      blockReplication: Short,
-      blockSize: Long,
-      modificationTime: Long,
-      accessTime: Long,
-      blockLocations: Array[SerializableBlockLocation])
-
-  /**
-   * Lists a collection of paths recursively. Picks the listing strategy adaptively depending
-   * on the number of paths to list.
-   *
-   * This may only be called on the driver.
-   *
-   * @return for each input path, the set of discovered files for the path
-   */
   private[sql] def bulkListLeafFiles(
       paths: Seq[Path],
       hadoopConf: Configuration,
       filter: PathFilter,
       sparkSession: SparkSession,
-      areRootPaths: Boolean): Seq[(Path, Seq[FileStatus])] = {
-
-    val ignoreMissingFiles = sparkSession.sessionState.conf.ignoreMissingFiles
-    val ignoreLocality = sparkSession.sessionState.conf.ignoreDataLocality
-
-    // Short-circuits parallel listing when serial listing is likely to be faster.
-    if (paths.size <= sparkSession.sessionState.conf.parallelPartitionDiscoveryThreshold) {
-      return paths.map { path =>
-        val leafFiles = listLeafFiles(
-          path,
-          hadoopConf,
-          filter,
-          Some(sparkSession),
-          ignoreMissingFiles = ignoreMissingFiles,
-          ignoreLocality = ignoreLocality,
-          isRootPath = areRootPaths)
-        (path, leafFiles)
-      }
-    }
-
-    logInfo(s"Listing leaf files and directories in parallel under ${paths.length} paths." +
-      s" The first several paths are: ${paths.take(10).mkString(", ")}.")
-    HiveCatalogMetrics.incrementParallelListingJobCount(1)
-
-    val sparkContext = sparkSession.sparkContext
-    val serializableConfiguration = new SerializableConfiguration(hadoopConf)
-    val serializedPaths = paths.map(_.toString)
-    val parallelPartitionDiscoveryParallelism =
-      sparkSession.sessionState.conf.parallelPartitionDiscoveryParallelism
-
-    // Set the number of parallelism to prevent following file listing from generating many tasks
-    // in case of large #defaultParallelism.
-    val numParallelism = Math.min(paths.size, parallelPartitionDiscoveryParallelism)
-
-    val previousJobDescription = sparkContext.getLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION)
-    val statusMap = try {
-      val description = paths.size match {
-        case 0 =>
-          s"Listing leaf files and directories 0 paths"
-        case 1 =>
-          s"Listing leaf files and directories for 1 path:<br/>${paths(0)}"
-        case s =>
-          s"Listing leaf files and directories for $s paths:<br/>${paths(0)}, ..."
-      }
-      sparkContext.setJobDescription(description)
-      sparkContext
-        .parallelize(serializedPaths, numParallelism)
-        .mapPartitions { pathStrings =>
-          val hadoopConf = serializableConfiguration.value
-          pathStrings.map(new Path(_)).toSeq.map { path =>
-            val leafFiles = listLeafFiles(
-              path,
-              hadoopConf,
-              filter,
-              None,
-              ignoreMissingFiles = ignoreMissingFiles,
-              ignoreLocality = ignoreLocality,
-              isRootPath = areRootPaths)
-            (path, leafFiles)
-          }.iterator
-        }.map { case (path, statuses) =>
-        val serializableStatuses = statuses.map { status =>
-          // Turn FileStatus into SerializableFileStatus so we can send it back to the driver
-          val blockLocations = status match {
-            case f: LocatedFileStatus =>
-              f.getBlockLocations.map { loc =>
-                SerializableBlockLocation(
-                  loc.getNames,
-                  loc.getHosts,
-                  loc.getOffset,
-                  loc.getLength)
-              }
-
-            case _ =>
-              Array.empty[SerializableBlockLocation]
-          }
-
-          SerializableFileStatus(
-            status.getPath.toString,
-            status.getLen,
-            status.isDirectory,
-            status.getReplication,
-            status.getBlockSize,
-            status.getModificationTime,
-            status.getAccessTime,
-            blockLocations)
-        }
-        (path.toString, serializableStatuses)
-      }.collect()
-    } finally {
-      sparkContext.setJobDescription(previousJobDescription)
-    }
-
-    // turn SerializableFileStatus back to Status
-    statusMap.map { case (path, serializableStatuses) =>
-      val statuses = serializableStatuses.map { f =>
-        val blockLocations = f.blockLocations.map { loc =>
-          new BlockLocation(loc.names, loc.hosts, loc.offset, loc.length)
-        }
-        new LocatedFileStatus(
-          new FileStatus(
-            f.length, f.isDir, f.blockReplication, f.blockSize, f.modificationTime,
-            new Path(f.path)),
-          blockLocations)
-      }
-      (new Path(path), statuses)
-    }
-  }
-
-  /**
-   * Lists a single filesystem path recursively. If a SparkSession object is specified, this
-   * function may launch Spark jobs to parallelize listing.
-   *
-   * If sessionOpt is None, this may be called on executors.
-   *
-   * @return all children of path that match the specified filter.
-   */
-  private def listLeafFiles(
-      path: Path,
-      hadoopConf: Configuration,
-      filter: PathFilter,
-      sessionOpt: Option[SparkSession],
-      ignoreMissingFiles: Boolean,
-      ignoreLocality: Boolean,
-      isRootPath: Boolean): Seq[FileStatus] = {
-    logTrace(s"Listing $path")
-    val fs = path.getFileSystem(hadoopConf)
-
-    // Note that statuses only include FileStatus for the files and dirs directly under path,
-    // and does not include anything else recursively.
-    val statuses: Array[FileStatus] = try {
-      fs match {
-        // DistributedFileSystem overrides listLocatedStatus to make 1 single call to namenode
-        // to retrieve the file status with the file block location. The reason to still fallback
-        // to listStatus is because the default implementation would potentially throw a
-        // FileNotFoundException which is better handled by doing the lookups manually below.
-        case (_: DistributedFileSystem | _: ViewFileSystem) if !ignoreLocality =>
-          val remoteIter = fs.listLocatedStatus(path)
-          new Iterator[LocatedFileStatus]() {
-            def next(): LocatedFileStatus = remoteIter.next
-            def hasNext(): Boolean = remoteIter.hasNext
-          }.toArray
-        case _ => fs.listStatus(path)
-      }
-    } catch {
-      // If we are listing a root path (e.g. a top level directory of a table), we need to
-      // ignore FileNotFoundExceptions during this root level of the listing because
-      //
-      //  (a) certain code paths might construct an InMemoryFileIndex with root paths that
-      //      might not exist (i.e. not all callers are guaranteed to have checked
-      //      path existence prior to constructing InMemoryFileIndex) and,
-      //  (b) we need to ignore deleted root paths during REFRESH TABLE, otherwise we break
-      //      existing behavior and break the ability drop SessionCatalog tables when tables'
-      //      root directories have been deleted (which breaks a number of Spark's own tests).
-      //
-      // If we are NOT listing a root path then a FileNotFoundException here means that the
-      // directory was present in a previous level of file listing but is absent in this
-      // listing, likely indicating a race condition (e.g. concurrent table overwrite or S3
-      // list inconsistency).
-      //
-      // The trade-off in supporting existing behaviors / use-cases is that we won't be
-      // able to detect race conditions involving root paths being deleted during
-      // InMemoryFileIndex construction. However, it's still a net improvement to detect and
-      // fail-fast on the non-root cases. For more info see the SPARK-27676 review discussion.
-      case _: FileNotFoundException if isRootPath || ignoreMissingFiles =>
-        logWarning(s"The directory $path was not found. Was it deleted very recently?")
-        Array.empty[FileStatus]
-    }
-
-    val filteredStatuses = statuses.filterNot(status => shouldFilterOut(status.getPath.getName))
-
-    val allLeafStatuses = {
-      val (dirs, topLevelFiles) = filteredStatuses.partition(_.isDirectory)
-      val nestedFiles: Seq[FileStatus] = sessionOpt match {
-        case Some(session) =>
-          bulkListLeafFiles(
-            dirs.map(_.getPath),
-            hadoopConf,
-            filter,
-            session,
-            areRootPaths = false
-          ).flatMap(_._2)
-        case _ =>
-          dirs.flatMap { dir =>
-            listLeafFiles(
-              dir.getPath,
-              hadoopConf,
-              filter,
-              sessionOpt,
-              ignoreMissingFiles = ignoreMissingFiles,
-              ignoreLocality = ignoreLocality,
-              isRootPath = false)
-          }
-      }
-      val allFiles = topLevelFiles ++ nestedFiles
-      if (filter != null) allFiles.filter(f => filter.accept(f.getPath)) else allFiles
-    }
-
-    val missingFiles = mutable.ArrayBuffer.empty[String]
-    val filteredLeafStatuses = allLeafStatuses.filterNot(
-      status => shouldFilterOut(status.getPath.getName))
-    val resolvedLeafStatuses = filteredLeafStatuses.flatMap {
-      case f: LocatedFileStatus =>
-        Some(f)
-
-      // NOTE:
-      //
-      // - Although S3/S3A/S3N file system can be quite slow for remote file metadata
-      //   operations, calling `getFileBlockLocations` does no harm here since these file system
-      //   implementations don't actually issue RPC for this method.
-      //
-      // - Here we are calling `getFileBlockLocations` in a sequential manner, but it should not
-      //   be a big deal since we always use to `bulkListLeafFiles` when the number of
-      //   paths exceeds threshold.
-      case f if !ignoreLocality =>
-        // The other constructor of LocatedFileStatus will call FileStatus.getPermission(),
-        // which is very slow on some file system (RawLocalFileSystem, which is launch a
-        // subprocess and parse the stdout).
-        try {
-          val locations = fs.getFileBlockLocations(f, 0, f.getLen).map { loc =>
-            // Store BlockLocation objects to consume less memory
-            if (loc.getClass == classOf[BlockLocation]) {
-              loc
-            } else {
-              new BlockLocation(loc.getNames, loc.getHosts, loc.getOffset, loc.getLength)
-            }
-          }
-          val lfs = new LocatedFileStatus(f.getLen, f.isDirectory, f.getReplication, f.getBlockSize,
-            f.getModificationTime, 0, null, null, null, null, f.getPath, locations)
-          if (f.isSymlink) {
-            lfs.setSymlink(f.getSymlink)
-          }
-          Some(lfs)
-        } catch {
-          case _: FileNotFoundException if ignoreMissingFiles =>
-            missingFiles += f.getPath.toString
-            None
-        }
-
-      case f => Some(f)
-    }
-
-    if (missingFiles.nonEmpty) {
-      logWarning(
-        s"the following files were missing during file scan:\n  ${missingFiles.mkString("\n  ")}")
-    }
-
-    resolvedLeafStatuses
-  }
+      isRootLevel: Boolean): Seq[(Path, Seq[FileStatus])] = {
+    HadoopFSUtils.parallelListLeafFiles(
+      sc = sparkSession.sparkContext,
+      paths = paths,
+      hadoopConf = hadoopConf,
+      filter = filter,
+      isRootLevel = isRootLevel,
+      ignoreMissingFiles = sparkSession.sessionState.conf.ignoreMissingFiles,
+      ignoreLocality = sparkSession.sessionState.conf.ignoreDataLocality,
+      parallelismThreshold = sparkSession.sessionState.conf.parallelPartitionDiscoveryThreshold,
+      parallelismMax = sparkSession.sessionState.conf.parallelPartitionDiscoveryParallelism,
+      filterFun = Some(shouldFilterOut))
+ }
 
   /** Checks if we should filter out this path name. */
   def shouldFilterOut(pathName: String): Boolean = {

From d7aa3b56e8dbdc5582565ce3427f368edbabc708 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Thu, 24 Sep 2020 16:22:08 -0700
Subject: [PATCH 0105/1009] [SPARK-32889][SQL][TESTS][FOLLOWUP] Skip special
 column names test in Hive 1.2

### What changes were proposed in this pull request?

This PR is a followup of SPARK-32889 in order to ignore the special column names test in `hive-1.2` profile.

### Why are the changes needed?

Hive 1.2 is too old to support special column names because it doesn't use Apache ORC. This will recover our `hive-1.2` Jenkins job.
- https://amplab.cs.berkeley.edu/jenkins/view/Spark%20QA%20Test%20(Dashboard)/job/spark-master-test-sbt-hadoop-2.7-hive-1.2/
- https://amplab.cs.berkeley.edu/jenkins/view/Spark%20QA%20Test%20(Dashboard)/job/spark-master-test-maven-hadoop-2.7-hive-1.2/

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the test with Hive 1.2 profile.

Closes #29867 from dongjoon-hyun/SPARK-32889-2.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/sql/hive/execution/SQLQuerySuite.scala      | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index a69a949e3a3a2..96bca5404831d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -2242,6 +2242,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
   }
 
   test("SPARK-32889: ORC table column name supports special characters") {
+    assume(HiveUtils.isHive23)
     // " " "," is not allowed.
     Seq("$", ";", "{", "}", "(", ")", "\n", "\t", "=").foreach { name =>
       val source = "ORC"

From e9c98c910aee10efe447dc4fff951e748441d10a Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Fri, 25 Sep 2020 04:29:09 +0000
Subject: [PATCH 0106/1009] [SPARK-32990][SQL] Migrate REFRESH TABLE to use
 UnresolvedTableOrView to resolve the identifier

### What changes were proposed in this pull request?

This PR proposes to migrate `REFRESH TABLE` to use `UnresolvedTableOrView` to resolve the table/view identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing).

### Why are the changes needed?

The current behavior is not consistent between v1 and v2 commands when resolving a temp view.
In v2, the `t` in the following example is resolved to a table:
```scala
sql("CREATE TABLE testcat.ns.t (id bigint) USING foo")
sql("CREATE TEMPORARY VIEW t AS SELECT 2")
sql("USE testcat.ns")
sql("REFRESH TABLE t") // 't' is resolved to testcat.ns.t
```
whereas in v1, the `t` is resolved to a temp view:
```scala
sql("CREATE DATABASE test")
sql("CREATE TABLE spark_catalog.test.t (id bigint) USING csv")
sql("CREATE TEMPORARY VIEW t AS SELECT 2")
sql("USE spark_catalog.test")
sql("REFRESH TABLE t") // 't' is resolved to a temp view
```

### Does this PR introduce _any_ user-facing change?

After this PR, `REFRESH TABLE t` is resolved to a temp view `t` instead of `testcat.ns.t`.

### How was this patch tested?

Added a new test

Closes #29866 from imback82/refresh_table_consistent.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/analysis/ResolveCatalogs.scala   |  3 ---
 .../sql/catalyst/parser/AstBuilder.scala      |  4 ++--
 .../catalyst/plans/logical/statements.scala   |  5 -----
 .../catalyst/plans/logical/v2Commands.scala   |  6 +++---
 .../sql/catalyst/parser/DDLParserSuite.scala  |  2 +-
 .../analysis/ResolveSessionCatalog.scala      | 10 ++++++----
 .../spark/sql/execution/command/tables.scala  | 19 +++++++++++++++++++
 .../spark/sql/execution/datasources/ddl.scala | 11 -----------
 .../datasources/v2/DataSourceV2Strategy.scala |  4 ++--
 .../sql/connector/DataSourceV2SQLSuite.scala  | 17 +++++++++++++++++
 10 files changed, 50 insertions(+), 31 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
index a40604045978c..0d0f80be359e7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
@@ -174,9 +174,6 @@ class ResolveCatalogs(val catalogManager: CatalogManager)
         writeOptions = c.writeOptions,
         ignoreIfExists = c.ifNotExists)
 
-    case RefreshTableStatement(NonSessionCatalogAndTable(catalog, tbl)) =>
-      RefreshTable(catalog.asTableCatalog, tbl.asIdentifier)
-
     case c @ ReplaceTableStatement(
          NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _) =>
       assertNoNullTypeInSchema(c.tableSchema)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 6682b0575430a..f133235a2636e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3364,7 +3364,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   }
 
   /**
-   * Create a [[RefreshTableStatement]].
+   * Create a [[RefreshTable]].
    *
    * For example:
    * {{{
@@ -3372,7 +3372,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
    * }}}
    */
   override def visitRefreshTable(ctx: RefreshTableContext): LogicalPlan = withOrigin(ctx) {
-    RefreshTableStatement(visitMultipartIdentifier(ctx.multipartIdentifier()))
+    RefreshTable(UnresolvedTableOrView(visitMultipartIdentifier(ctx.multipartIdentifier())))
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
index 19831a7b5ef84..d09e08d105c21 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
@@ -429,11 +429,6 @@ case class ShowPartitionsStatement(
     tableName: Seq[String],
     partitionSpec: Option[TablePartitionSpec]) extends ParsedStatement
 
-/**
- * A REFRESH TABLE statement, as parsed from SQL
- */
-case class RefreshTableStatement(tableName: Seq[String]) extends ParsedStatement
-
 /**
  * A SHOW COLUMNS statement, as parsed from SQL
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index 70e03c23fd115..fa0a10c3a5a45 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -463,9 +463,9 @@ case class SetCatalogAndNamespace(
 /**
  * The logical plan of the REFRESH TABLE command that works for v2 catalogs.
  */
-case class RefreshTable(
-    catalog: TableCatalog,
-    ident: Identifier) extends Command
+case class RefreshTable(child: LogicalPlan) extends Command {
+  override def children: Seq[LogicalPlan] = child :: Nil
+}
 
 /**
  * The logical plan of the SHOW CURRENT NAMESPACE command that works for v2 catalogs.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index ac6af4f4e3231..378026b1ce9c6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -1665,7 +1665,7 @@ class DDLParserSuite extends AnalysisTest {
   test("REFRESH TABLE") {
     comparePlans(
       parsePlan("REFRESH TABLE a.b.c"),
-      RefreshTableStatement(Seq("a", "b", "c")))
+      RefreshTable(UnresolvedTableOrView(Seq("a", "b", "c"))))
   }
 
   test("show columns") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 8923d5c86e19a..11493ad59a760 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, LookupCatalog, SupportsNamespaces, TableCatalog, TableChange, V1Table}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.execution.command._
-import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource, RefreshTable}
+import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource}
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{HIVE_TYPE_STRING, HiveStringType, MetadataBuilder, StructField, StructType}
@@ -318,9 +318,11 @@ class ResolveSessionCatalog(
           ignoreIfExists = c.ifNotExists)
       }
 
-    // v1 REFRESH TABLE supports temp view.
-    case RefreshTableStatement(TempViewOrV1Table(name)) =>
-      RefreshTable(name.asTableIdentifier)
+    case RefreshTable(r @ ResolvedTable(_, _, _: V1Table)) if isSessionCatalog(r.catalog) =>
+      RefreshTableCommand(r.identifier.asTableIdentifier)
+
+    case RefreshTable(r: ResolvedView) =>
+      RefreshTableCommand(r.identifier.asTableIdentifier)
 
     // For REPLACE TABLE [AS SELECT], we should fail if the catalog is resolved to the
     // session catalog and the table provider is not v2.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index f94c9712a31cc..e4be2a8d3bb8e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -1381,3 +1381,22 @@ case class ShowCreateTableAsSerdeCommand(table: TableIdentifier)
     }
   }
 }
+
+/**
+ * A command to refresh all cached entries associated with the table.
+ *
+ * The syntax of using this command in SQL is:
+ * {{{
+ *   REFRESH TABLE [db_name.]table_name
+ * }}}
+ */
+case class RefreshTableCommand(tableIdent: TableIdentifier)
+  extends RunnableCommand {
+
+  override def run(sparkSession: SparkSession): Seq[Row] = {
+    // Refresh the given table's metadata. If this table is cached as an InMemoryRelation,
+    // drop the original cached version and make the new version cached lazily.
+    sparkSession.catalog.refreshTable(tableIdent.quotedString)
+    Seq.empty[Row]
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
index 4022640224424..e455fae4675f4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
@@ -103,17 +103,6 @@ case class CreateTempViewUsing(
   }
 }
 
-case class RefreshTable(tableIdent: TableIdentifier)
-  extends RunnableCommand {
-
-  override def run(sparkSession: SparkSession): Seq[Row] = {
-    // Refresh the given table's metadata. If this table is cached as an InMemoryRelation,
-    // drop the original cached version and make the new version cached lazily.
-    sparkSession.catalog.refreshTable(tableIdent.quotedString)
-    Seq.empty[Row]
-  }
-}
-
 case class RefreshResource(path: String)
   extends RunnableCommand {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index fe4f8bc83fcff..c5ddba43a56aa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -127,8 +127,8 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
             propsWithOwner, writeOptions, ifNotExists) :: Nil
       }
 
-    case RefreshTable(catalog, ident) =>
-      RefreshTableExec(catalog, ident) :: Nil
+    case RefreshTable(r: ResolvedTable) =>
+      RefreshTableExec(r.catalog, r.identifier) :: Nil
 
     case ReplaceTable(catalog, ident, schema, parts, props, orCreate) =>
       val propsWithOwner = CatalogV2Util.withDefaultOwnership(props)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 0a4ece83717d5..e3782c7409198 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -1729,6 +1729,23 @@ class DataSourceV2SQLSuite
     }
   }
 
+  test("SPARK-32990: REFRESH TABLE should resolve to a temporary view first") {
+    withTable("testcat.ns.t") {
+      withTempView("t") {
+        sql("CREATE TABLE testcat.ns.t (id bigint) USING foo")
+        sql("CREATE TEMPORARY VIEW t AS SELECT 2")
+        sql("USE testcat.ns")
+
+        val testCatalog = catalog("testcat").asTableCatalog.asInstanceOf[InMemoryTableCatalog]
+        val identifier = Identifier.of(Array("ns"), "t")
+
+        assert(!testCatalog.isTableInvalidated(identifier))
+        sql("REFRESH TABLE t")
+        assert(!testCatalog.isTableInvalidated(identifier))
+      }
+    }
+  }
+
   test("REPLACE TABLE: v1 table") {
     val e = intercept[AnalysisException] {
       sql(s"CREATE OR REPLACE TABLE tbl (a int) USING ${classOf[SimpleScanSource].getName}")

From f2fc96667481169affbc20cec95b9fc1c19fc7c3 Mon Sep 17 00:00:00 2001
From: ulysses <youxiduo@weidian.com>
Date: Thu, 24 Sep 2020 22:16:05 -0700
Subject: [PATCH 0107/1009] [SPARK-32877][SQL][TEST] Add test for Hive UDF
 complex decimal type

### What changes were proposed in this pull request?

Add test to cover Hive UDF whose input contains complex decimal type.
Add comment to explain why we can't make `HiveSimpleUDF` extend `ImplicitTypeCasts`.

### Why are the changes needed?

For better test coverage with Hive which we compatible or not.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Add test.

Closes #29863 from ulysses-you/SPARK-32877-test.

Authored-by: ulysses <youxiduo@weidian.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/sql/hive/hiveUDFs.scala  |  5 +++
 .../sql/hive/execution/HiveUDFSuite.scala     | 31 +++++++++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
index 8ad5cb70d248b..462e67c4ed35c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -41,6 +41,11 @@ import org.apache.spark.sql.hive.HiveShim._
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
+/**
+ * Here we cannot extends `ImplicitTypeCasts` to compatible with UDF input data type, the reason is:
+ * we use children data type to reflect UDF method first and will get exception if it fails so that
+ * we can never go into `ImplicitTypeCasts`.
+ */
 private[hive] case class HiveSimpleUDF(
     name: String, funcWrapper: HiveFunctionWrapper, children: Seq[Expression])
   extends Expression
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
index 057f2f4ce01be..f5cd4f9f843d8 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
@@ -34,6 +34,7 @@ import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.catalyst.plans.logical.Project
 import org.apache.spark.sql.execution.command.FunctionsCommand
 import org.apache.spark.sql.functions.max
+import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
@@ -658,6 +659,25 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
     }
   }
 
+  test("SPARK-32877: add test for Hive UDF complex decimal type") {
+    assume(HiveUtils.isHive23)
+    withUserDefinedFunction("testArraySum" -> false) {
+      sql(s"CREATE FUNCTION testArraySum AS '${classOf[ArraySumUDF].getName}'")
+      checkAnswer(
+        sql("SELECT testArraySum(array(1, 1.1, 1.2))"),
+        Seq(Row(3.3)))
+
+      val msg = intercept[AnalysisException] {
+        sql("SELECT testArraySum(1)")
+      }.getMessage
+      assert(msg.contains(s"No handler for UDF/UDAF/UDTF '${classOf[ArraySumUDF].getName}'"))
+
+      val msg2 = intercept[AnalysisException] {
+        sql("SELECT testArraySum(1, 2)")
+      }.getMessage
+      assert(msg2.contains(s"No handler for UDF/UDAF/UDTF '${classOf[ArraySumUDF].getName}'"))
+    }
+  }
 }
 
 class TestPair(x: Int, y: Int) extends Writable with Serializable {
@@ -741,3 +761,14 @@ class StatelessUDF extends UDF {
     result
   }
 }
+
+class ArraySumUDF extends UDF {
+  import scala.collection.JavaConverters._
+  def evaluate(values: java.util.List[java.lang.Double]): java.lang.Double = {
+    var r = 0d
+    for (v <- values.asScala) {
+      r += v
+    }
+    r
+  }
+}

From 9e6882feca0800d5d4f9920886cb5dae73bbe1d4 Mon Sep 17 00:00:00 2001
From: Yuanjian Li <yuanjian.li@databricks.com>
Date: Fri, 25 Sep 2020 06:50:24 +0000
Subject: [PATCH 0108/1009] [SPARK-32885][SS] Add DataStreamReader.table API

### What changes were proposed in this pull request?
This pr aims to add a new `table` API in DataStreamReader, which is similar to the table API in DataFrameReader.

### Why are the changes needed?
Users can directly use this API to get a Streaming DataFrame on a table. Below is a simple example:

Application 1 for initializing and starting the streaming job:

```
val path = "/home/yuanjian.li/runtime/to_be_deleted"
val tblName = "my_table"

// Write some data to `my_table`
spark.range(3).write.format("parquet").option("path", path).saveAsTable(tblName)

// Read the table as a streaming source, write result to destination directory
val table = spark.readStream.table(tblName)
table.writeStream.format("parquet").option("checkpointLocation", "/home/yuanjian.li/runtime/to_be_deleted_ck").start("/home/yuanjian.li/runtime/to_be_deleted_2")
```

Application 2 for appending new data:

```
// Append new data into the path
spark.range(5).write.format("parquet").option("path", "/home/yuanjian.li/runtime/to_be_deleted").mode("append").save()
```

Check result:
```
// The desitination directory should contains all written data
spark.read.parquet("/home/yuanjian.li/runtime/to_be_deleted_2").show()
```

### Does this PR introduce _any_ user-facing change?
Yes, a new API added.

### How was this patch tested?
New UT added and integrated testing.

Closes #29756 from xuanyuanking/SPARK-32885.

Authored-by: Yuanjian Li <yuanjian.li@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  85 +++++--
 .../catalyst/analysis/CTESubstitution.scala   |   2 +-
 .../sql/catalyst/analysis/ResolveHints.scala  |   4 +-
 .../sql/catalyst/analysis/unresolved.scala    |  11 +-
 .../sql/catalyst/catalog/interface.scala      |   3 +-
 .../streaming/StreamingRelationV2.scala       |   4 +-
 .../spark/sql/connector/catalog/V1Table.scala |   8 +
 .../spark/sql/execution/command/views.scala   |   2 +-
 .../datasources/DataSourceStrategy.scala      |  41 ++-
 .../streaming/MicroBatchExecution.scala       |   2 +-
 .../continuous/ContinuousExecution.scala      |   2 +-
 .../sql/execution/streaming/memory.scala      |   2 +
 .../sql/streaming/DataStreamReader.scala      |  21 +-
 .../sql-tests/results/explain-aqe.sql.out     |   2 +-
 .../sql-tests/results/explain.sql.out         |   2 +-
 .../connector/TableCapabilityCheckSuite.scala |   2 +
 .../test/DataStreamTableAPISuite.scala        | 234 ++++++++++++++++++
 .../apache/spark/sql/hive/test/TestHive.scala |   2 +-
 18 files changed, 391 insertions(+), 38 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 7d591eeea2b79..6e1f371b1a2b5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -36,6 +36,7 @@ import org.apache.spark.sql.catalyst.expressions.objects._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
+import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
 import org.apache.spark.sql.catalyst.trees.TreeNodeRef
 import org.apache.spark.sql.catalyst.util.toPrettySQL
 import org.apache.spark.sql.connector.catalog._
@@ -846,9 +847,9 @@ class Analyzer(
    */
   object ResolveTempViews extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
-      case u @ UnresolvedRelation(ident, _) =>
-        lookupTempView(ident).getOrElse(u)
-      case i @ InsertIntoStatement(UnresolvedRelation(ident, _), _, _, _, _) =>
+      case u @ UnresolvedRelation(ident, _, isStreaming) =>
+        lookupTempView(ident, isStreaming).getOrElse(u)
+      case i @ InsertIntoStatement(UnresolvedRelation(ident, _, false), _, _, _, _) =>
         lookupTempView(ident)
           .map(view => i.copy(table = view))
           .getOrElse(i)
@@ -861,15 +862,22 @@ class Analyzer(
         lookupTempView(ident).map(_ => ResolvedView(ident.asIdentifier)).getOrElse(u)
     }
 
-    def lookupTempView(identifier: Seq[String]): Option[LogicalPlan] = {
+    def lookupTempView(
+        identifier: Seq[String], isStreaming: Boolean = false): Option[LogicalPlan] = {
       // Permanent View can't refer to temp views, no need to lookup at all.
       if (isResolvingView) return None
 
-      identifier match {
+      val tmpView = identifier match {
         case Seq(part1) => v1SessionCatalog.lookupTempView(part1)
         case Seq(part1, part2) => v1SessionCatalog.lookupGlobalTempView(part1, part2)
         case _ => None
       }
+
+      if (isStreaming && tmpView.nonEmpty && !tmpView.get.isStreaming) {
+        throw new AnalysisException(s"${identifier.quoted} is not a temp view of streaming " +
+          s"logical plan, please use batch API such as `DataFrameReader.table` to read it.")
+      }
+      tmpView
     }
   }
 
@@ -895,10 +903,13 @@ class Analyzer(
   object ResolveTables extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = ResolveTempViews(plan).resolveOperatorsUp {
       case u: UnresolvedRelation =>
-        lookupV2Relation(u.multipartIdentifier, u.options)
-          .map { rel =>
-            val ident = rel.identifier.get
-            SubqueryAlias(rel.catalog.get.name +: ident.namespace :+ ident.name, rel)
+        lookupV2Relation(u.multipartIdentifier, u.options, u.isStreaming)
+          .map { relation =>
+            val (catalog, ident) = relation match {
+              case ds: DataSourceV2Relation => (ds.catalog, ds.identifier.get)
+              case s: StreamingRelationV2 => (s.catalog, s.identifier.get)
+            }
+            SubqueryAlias(catalog.get.name +: ident.namespace :+ ident.name, relation)
           }.getOrElse(u)
 
       case u @ UnresolvedTable(NonSessionCatalogAndIdentifier(catalog, ident)) =>
@@ -911,8 +922,9 @@ class Analyzer(
           .map(ResolvedTable(catalog.asTableCatalog, ident, _))
           .getOrElse(u)
 
-      case i @ InsertIntoStatement(u: UnresolvedRelation, _, _, _, _) if i.query.resolved =>
-        lookupV2Relation(u.multipartIdentifier, u.options)
+      case i @ InsertIntoStatement(u @ UnresolvedRelation(_, _, false), _, _, _, _)
+          if i.query.resolved =>
+        lookupV2Relation(u.multipartIdentifier, u.options, false)
           .map(v2Relation => i.copy(table = v2Relation))
           .getOrElse(i)
 
@@ -930,12 +942,18 @@ class Analyzer(
      */
     private def lookupV2Relation(
         identifier: Seq[String],
-        options: CaseInsensitiveStringMap): Option[DataSourceV2Relation] =
+        options: CaseInsensitiveStringMap,
+        isStreaming: Boolean): Option[LogicalPlan] =
       expandRelationName(identifier) match {
         case NonSessionCatalogAndIdentifier(catalog, ident) =>
           CatalogV2Util.loadTable(catalog, ident) match {
             case Some(table) =>
-              Some(DataSourceV2Relation.create(table, Some(catalog), Some(ident), options))
+              if (isStreaming) {
+                Some(StreamingRelationV2(None, table.name, table, options,
+                  table.schema.toAttributes, Some(catalog), Some(ident), None))
+              } else {
+                Some(DataSourceV2Relation.create(table, Some(catalog), Some(ident), options))
+              }
             case None => None
           }
         case _ => None
@@ -976,8 +994,8 @@ class Analyzer(
     def apply(plan: LogicalPlan): LogicalPlan = ResolveTempViews(plan).resolveOperatorsUp {
       case i @ InsertIntoStatement(table, _, _, _, _) if i.query.resolved =>
         val relation = table match {
-          case u: UnresolvedRelation =>
-            lookupRelation(u.multipartIdentifier, u.options).getOrElse(u)
+          case u @ UnresolvedRelation(_, _, false) =>
+            lookupRelation(u.multipartIdentifier, u.options, false).getOrElse(u)
           case other => other
         }
 
@@ -988,7 +1006,8 @@ class Analyzer(
         }
 
       case u: UnresolvedRelation =>
-        lookupRelation(u.multipartIdentifier, u.options).map(resolveViews).getOrElse(u)
+        lookupRelation(u.multipartIdentifier, u.options, u.isStreaming)
+          .map(resolveViews).getOrElse(u)
 
       case u @ UnresolvedTable(identifier) =>
         lookupTableOrView(identifier).map {
@@ -1020,16 +1039,40 @@ class Analyzer(
     // 3) If a v1 table is found, create a v1 relation. Otherwise, create a v2 relation.
     private def lookupRelation(
         identifier: Seq[String],
-        options: CaseInsensitiveStringMap): Option[LogicalPlan] = {
+        options: CaseInsensitiveStringMap,
+        isStreaming: Boolean): Option[LogicalPlan] = {
       expandRelationName(identifier) match {
         case SessionCatalogAndIdentifier(catalog, ident) =>
           lazy val loaded = CatalogV2Util.loadTable(catalog, ident).map {
             case v1Table: V1Table =>
-              v1SessionCatalog.getRelation(v1Table.v1Table, options)
+              if (isStreaming) {
+                if (v1Table.v1Table.tableType == CatalogTableType.VIEW) {
+                  throw new AnalysisException(s"${identifier.quoted} is a permanent view, " +
+                    "which is not supported by streaming reading API such as " +
+                    "`DataStreamReader.table` yet.")
+                }
+                SubqueryAlias(
+                  catalog.name +: ident.asMultipartIdentifier,
+                  UnresolvedCatalogRelation(v1Table.v1Table, options, isStreaming = true))
+              } else {
+                v1SessionCatalog.getRelation(v1Table.v1Table, options)
+              }
             case table =>
-              SubqueryAlias(
-                catalog.name +: ident.asMultipartIdentifier,
-                DataSourceV2Relation.create(table, Some(catalog), Some(ident), options))
+              if (isStreaming) {
+                val v1Fallback = table match {
+                  case withFallback: V2TableWithV1Fallback =>
+                    Some(UnresolvedCatalogRelation(withFallback.v1Table, isStreaming = true))
+                  case _ => None
+                }
+                SubqueryAlias(
+                  catalog.name +: ident.asMultipartIdentifier,
+                  StreamingRelationV2(None, table.name, table, options, table.schema.toAttributes,
+                    Some(catalog), Some(ident), v1Fallback))
+              } else {
+                SubqueryAlias(
+                  catalog.name +: ident.asMultipartIdentifier,
+                  DataSourceV2Relation.create(table, Some(catalog), Some(ident), options))
+              }
           }
           val key = catalog.name +: ident.namespace :+ ident.name
           AnalysisContext.get.relationCache.get(key).map(_.transform {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala
index b177aa8dd0aa7..8d3b04c202962 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala
@@ -171,7 +171,7 @@ object CTESubstitution extends Rule[LogicalPlan] {
       plan: LogicalPlan,
       cteRelations: Seq[(String, LogicalPlan)]): LogicalPlan =
     plan resolveOperatorsUp {
-      case u @ UnresolvedRelation(Seq(table), _) =>
+      case u @ UnresolvedRelation(Seq(table), _, _) =>
         cteRelations.find(r => plan.conf.resolver(r._1, table)).map(_._2).getOrElse(u)
 
       case other =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
index 1f0de78b696fd..c0a9414d61f8f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
@@ -105,7 +105,7 @@ object ResolveHints {
 
       val newNode = CurrentOrigin.withOrigin(plan.origin) {
         plan match {
-          case ResolvedHint(u @ UnresolvedRelation(ident, _), hint)
+          case ResolvedHint(u @ UnresolvedRelation(ident, _, _), hint)
               if matchedIdentifierInHint(ident) =>
             ResolvedHint(u, createHintInfo(hintName).merge(hint, hintErrorHandler))
 
@@ -113,7 +113,7 @@ object ResolveHints {
               if matchedIdentifierInHint(extractIdentifier(r)) =>
             ResolvedHint(r, createHintInfo(hintName).merge(hint, hintErrorHandler))
 
-          case UnresolvedRelation(ident, _) if matchedIdentifierInHint(ident) =>
+          case UnresolvedRelation(ident, _, _) if matchedIdentifierInHint(ident) =>
             ResolvedHint(plan, createHintInfo(hintName))
 
           case r: SubqueryAlias if matchedIdentifierInHint(extractIdentifier(r)) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 62000ac0efbb3..49861f9172a2a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -45,7 +45,8 @@ class UnresolvedException[TreeType <: TreeNode[_]](tree: TreeType, function: Str
  */
 case class UnresolvedRelation(
     multipartIdentifier: Seq[String],
-    options: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty())
+    options: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty(),
+    override val isStreaming: Boolean = false)
   extends LeafNode with NamedRelation {
   import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
@@ -60,6 +61,14 @@ case class UnresolvedRelation(
 }
 
 object UnresolvedRelation {
+  def apply(
+      tableIdentifier: TableIdentifier,
+      extraOptions: CaseInsensitiveStringMap,
+      isStreaming: Boolean): UnresolvedRelation = {
+    UnresolvedRelation(
+      tableIdentifier.database.toSeq :+ tableIdentifier.table, extraOptions, isStreaming)
+  }
+
   def apply(tableIdentifier: TableIdentifier): UnresolvedRelation =
     UnresolvedRelation(tableIdentifier.database.toSeq :+ tableIdentifier.table)
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index db01999ab9bb2..9c93691ca3b41 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -643,7 +643,8 @@ object CatalogTypes {
  */
 case class UnresolvedCatalogRelation(
     tableMeta: CatalogTable,
-    options: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty()) extends LeafNode {
+    options: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty(),
+    override val isStreaming: Boolean = false) extends LeafNode {
   assert(tableMeta.identifier.database.isDefined)
   override lazy val resolved: Boolean = false
   override def output: Seq[Attribute] = Nil
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/StreamingRelationV2.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/StreamingRelationV2.scala
index 92c4926c3a7f9..6a059025a71f6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/StreamingRelationV2.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/StreamingRelationV2.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.streaming
 import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
-import org.apache.spark.sql.connector.catalog.{Table, TableProvider}
+import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, Table, TableProvider}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -37,6 +37,8 @@ case class StreamingRelationV2(
     table: Table,
     extraOptions: CaseInsensitiveStringMap,
     output: Seq[Attribute],
+    catalog: Option[CatalogPlugin],
+    identifier: Option[Identifier],
     v1Relation: Option[LogicalPlan])
   extends LeafNode with MultiInstanceRelation {
   override lazy val resolved = v1Relation.forall(_.resolved)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
index 70fc9689e6087..9aed550ff97c4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
@@ -80,3 +80,11 @@ private[sql] case class V1Table(v1Table: CatalogTable) extends Table {
 
   override def toString: String = s"V1Table($name)"
 }
+
+/**
+ * A V2 table with V1 fallback support. This is used to fallback to V1 table when the V2 one
+ * doesn't implement specific capabilities but V1 already has.
+ */
+private[sql] trait V2TableWithV1Fallback extends Table {
+  def v1Table: CatalogTable
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index cc2a4a6b3ca96..94f34a9b39b28 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -174,7 +174,7 @@ case class CreateViewCommand(
       def verify(child: LogicalPlan) {
         child.collect {
           // Disallow creating permanent views based on temporary views.
-          case UnresolvedRelation(nameParts, _) if catalog.isTempView(nameParts) =>
+          case UnresolvedRelation(nameParts, _, _) if catalog.isTempView(nameParts) =>
             throw new AnalysisException(s"Not allowed to create a permanent view $name by " +
               s"referencing a temporary view ${nameParts.quoted}. " +
               "Please create a temp view instead by CREATE TEMP VIEW")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 1f8cfee308033..86e85719272e8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -37,8 +37,12 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.ScanOperation
 import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoDir, InsertIntoStatement, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
+import org.apache.spark.sql.connector.catalog.SupportsRead
+import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.execution.{RowDataSourceScanExec, SparkPlan}
 import org.apache.spark.sql.execution.command._
+import org.apache.spark.sql.execution.streaming.StreamingRelation
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
 import org.apache.spark.sql.sources._
@@ -260,19 +264,48 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan]
     })
   }
 
+  private def getStreamingRelation(
+      table: CatalogTable,
+      extraOptions: CaseInsensitiveStringMap): StreamingRelation = {
+    val dsOptions = DataSourceUtils.generateDatasourceOptions(extraOptions, table)
+    val dataSource = DataSource(
+      sparkSession,
+      className = table.provider.get,
+      userSpecifiedSchema = Some(table.schema),
+      options = dsOptions)
+    StreamingRelation(dataSource)
+  }
+
+
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-    case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta, options), _, _, _, _)
+    case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta, options, false), _, _, _, _)
         if DDLUtils.isDatasourceTable(tableMeta) =>
       i.copy(table = readDataSourceTable(tableMeta, options))
 
-    case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta, _), _, _, _, _) =>
+    case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta, _, false), _, _, _, _) =>
       i.copy(table = DDLUtils.readHiveTable(tableMeta))
 
-    case UnresolvedCatalogRelation(tableMeta, options) if DDLUtils.isDatasourceTable(tableMeta) =>
+    case UnresolvedCatalogRelation(tableMeta, options, false)
+        if DDLUtils.isDatasourceTable(tableMeta) =>
       readDataSourceTable(tableMeta, options)
 
-    case UnresolvedCatalogRelation(tableMeta, _) =>
+    case UnresolvedCatalogRelation(tableMeta, _, false) =>
       DDLUtils.readHiveTable(tableMeta)
+
+    case UnresolvedCatalogRelation(tableMeta, extraOptions, true) =>
+      getStreamingRelation(tableMeta, extraOptions)
+
+    case s @ StreamingRelationV2(
+        _, _, table, extraOptions, _, _, _, Some(UnresolvedCatalogRelation(tableMeta, _, true))) =>
+      import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
+      val v1Relation = getStreamingRelation(tableMeta, extraOptions)
+      if (table.isInstanceOf[SupportsRead]
+          && table.supportsAny(MICRO_BATCH_READ, CONTINUOUS_READ)) {
+        s.copy(v1Relation = Some(v1Relation))
+      } else {
+        // Fallback to V1 relation
+        v1Relation
+      }
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index 5a91b24a0803f..aad212cc13486 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -90,7 +90,7 @@ class MicroBatchExecution(
           StreamingExecutionRelation(source, output)(sparkSession)
         })
 
-      case s @ StreamingRelationV2(src, srcName, table: SupportsRead, options, output, v1) =>
+      case s @ StreamingRelationV2(src, srcName, table: SupportsRead, options, output, _, _, v1) =>
         val dsStr = if (src.nonEmpty) s"[${src.get}]" else ""
         val v2Disabled = disabledSources.contains(src.getOrElse(None).getClass.getCanonicalName)
         if (!v2Disabled && table.supports(TableCapability.MICRO_BATCH_READ)) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
index 12198f735c4c3..6eb28d4c66ded 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -65,7 +65,7 @@ class ContinuousExecution(
     var nextSourceId = 0
     import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
     val _logicalPlan = analyzedPlan.transform {
-      case s @ StreamingRelationV2(ds, sourceName, table: SupportsRead, options, output, _) =>
+      case s @ StreamingRelationV2(ds, sourceName, table: SupportsRead, options, output, _, _, _) =>
         val dsStr = if (ds.nonEmpty) s"[${ds.get}]" else ""
         if (!table.supports(TableCapability.CONTINUOUS_READ)) {
           throw new UnsupportedOperationException(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
index c6ba0da6ef04d..ee1cb127a3bc5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
@@ -83,6 +83,8 @@ abstract class MemoryStreamBase[A : Encoder](sqlContext: SQLContext) extends Spa
       new MemoryStreamTable(this),
       CaseInsensitiveStringMap.empty(),
       attributes,
+      None,
+      None,
       None)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index 93a48946fbafc..9bc4acd49a980 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -24,6 +24,7 @@ import scala.collection.JavaConverters._
 import org.apache.spark.annotation.Evolving
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, SparkSession}
+import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.connector.catalog.{SupportsRead, TableProvider}
@@ -231,7 +232,8 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
             Dataset.ofRows(
               sparkSession,
               StreamingRelationV2(
-                Some(provider), source, table, dsOptions, table.schema.toAttributes, v1Relation))
+                Some(provider), source, table, dsOptions,
+                table.schema.toAttributes, None, None, v1Relation))
 
           // fallback to v1
           // TODO (SPARK-27483): we should move this fallback logic to an analyzer rule.
@@ -475,6 +477,23 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
     format("parquet").load(path)
   }
 
+  /**
+   * Define a Streaming DataFrame on a Table. The DataSource corresponding to the table should
+   * support streaming mode.
+   * @param tableName The name of the table
+   * @since 3.1.0
+   */
+  def table(tableName: String): DataFrame = {
+    require(tableName != null, "The table name can't be null")
+    val identifier = sparkSession.sessionState.sqlParser.parseMultipartIdentifier(tableName)
+    Dataset.ofRows(
+      sparkSession,
+      UnresolvedRelation(
+        identifier,
+        new CaseInsensitiveStringMap(extraOptions.toMap.asJava),
+        isStreaming = true))
+  }
+
   /**
    * Loads text files and returns a `DataFrame` whose schema starts with a string column named
    * "value", and followed by partitioned columns if there are any.
diff --git a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out
index 5a59ffa03880f..3a850160b43e0 100644
--- a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out
@@ -693,7 +693,7 @@ Output: []
 Arguments: `default`.`explain_view`, SELECT key, val FROM explain_temp1, false, false, PersistedView
 
 (3) UnresolvedRelation
-Arguments: [explain_temp1], []
+Arguments: [explain_temp1], [], false
 
 (4) Project
 Arguments: ['key, 'val]
diff --git a/sql/core/src/test/resources/sql-tests/results/explain.sql.out b/sql/core/src/test/resources/sql-tests/results/explain.sql.out
index f28c408407c3f..6b3b71f85ced2 100644
--- a/sql/core/src/test/resources/sql-tests/results/explain.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/explain.sql.out
@@ -827,7 +827,7 @@ Output: []
 Arguments: `default`.`explain_view`, SELECT key, val FROM explain_temp1, false, false, PersistedView
 
 (3) UnresolvedRelation
-Arguments: [explain_temp1], []
+Arguments: [explain_temp1], [], false
 
 (4) Project
 Arguments: ['key, 'val]
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala
index 1d016496df2de..2d75a35215866 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala
@@ -46,6 +46,8 @@ class TableCapabilityCheckSuite extends AnalysisSuite with SharedSparkSession {
       table,
       CaseInsensitiveStringMap.empty(),
       TableCapabilityCheckSuite.schema.toAttributes,
+      None,
+      None,
       v1Relation)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
new file mode 100644
index 0000000000000..788452dace84b
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
@@ -0,0 +1,234 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming.test
+
+import java.util
+
+import scala.collection.JavaConverters._
+
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
+import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
+import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
+import org.apache.spark.sql.connector.{FakeV2Provider, InMemoryTableCatalog}
+import org.apache.spark.sql.connector.catalog.{Identifier, SupportsRead, Table, TableCapability, V2TableWithV1Fallback}
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.connector.read.ScanBuilder
+import org.apache.spark.sql.execution.streaming.{MemoryStream, MemoryStreamScanBuilder}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.streaming.StreamTest
+import org.apache.spark.sql.streaming.sources.FakeScanBuilder
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
+  import testImplicits._
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+  before {
+    spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryTableCatalog].getName)
+    spark.conf.set("spark.sql.catalog.teststream", classOf[InMemoryStreamTableCatalog].getName)
+  }
+
+  after {
+    spark.sessionState.catalogManager.reset()
+    spark.sessionState.conf.clear()
+  }
+
+  test("table API with file source") {
+    Seq("parquet", "").foreach { source =>
+      withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> source) {
+        withTempDir { tempDir =>
+          val tblName = "my_table"
+          val dir = tempDir.getAbsolutePath
+          withTable(tblName) {
+            spark.range(3).write.format("parquet").option("path", dir).saveAsTable(tblName)
+
+            testStream(spark.readStream.table(tblName))(
+              ProcessAllAvailable(),
+              CheckAnswer(Row(0), Row(1), Row(2))
+            )
+          }
+        }
+      }
+    }
+  }
+
+  test("read non-exist table") {
+    intercept[AnalysisException] {
+      spark.readStream.table("non_exist_table")
+    }.message.contains("Table not found")
+  }
+
+  test("stream table API with temp view") {
+    val tblName = "my_table"
+    val stream = MemoryStream[Int]
+    withTable(tblName) {
+      stream.toDF().createOrReplaceTempView(tblName)
+
+      testStream(spark.readStream.table(tblName)) (
+        AddData(stream, 1, 2, 3),
+        CheckLastBatch(1, 2, 3),
+        AddData(stream, 4, 5),
+        CheckLastBatch(4, 5)
+      )
+    }
+  }
+
+  test("stream table API with non-streaming temp view") {
+    val tblName = "my_table"
+    withTable(tblName) {
+      spark.range(3).createOrReplaceTempView(tblName)
+      intercept[AnalysisException] {
+        spark.readStream.table(tblName)
+      }.message.contains("is not a temp view of streaming logical plan")
+    }
+  }
+
+  test("read table without streaming capability support") {
+    val tableIdentifer = "testcat.table_name"
+
+    spark.sql(s"CREATE TABLE $tableIdentifer (id bigint, data string) USING foo")
+
+    intercept[AnalysisException] {
+      spark.readStream.table(tableIdentifer)
+    }.message.contains("does not support either micro-batch or continuous scan")
+  }
+
+  test("read table with custom catalog") {
+    val tblName = "teststream.table_name"
+    withTable(tblName) {
+      spark.sql(s"CREATE TABLE $tblName (data int) USING foo")
+      val stream = MemoryStream[Int]
+      val testCatalog = spark.sessionState.catalogManager.catalog("teststream").asTableCatalog
+      val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+      table.asInstanceOf[InMemoryStreamTable].setStream(stream)
+
+      testStream(spark.readStream.table(tblName)) (
+        AddData(stream, 1, 2, 3),
+        CheckLastBatch(1, 2, 3),
+        AddData(stream, 4, 5),
+        CheckLastBatch(4, 5)
+      )
+    }
+  }
+
+  test("read table with custom catalog & namespace") {
+    spark.sql("CREATE NAMESPACE teststream.ns")
+
+    val tblName = "teststream.ns.table_name"
+    withTable(tblName) {
+      spark.sql(s"CREATE TABLE $tblName (data int) USING foo")
+      val stream = MemoryStream[Int]
+      val testCatalog = spark.sessionState.catalogManager.catalog("teststream").asTableCatalog
+      val table = testCatalog.loadTable(Identifier.of(Array("ns"), "table_name"))
+      table.asInstanceOf[InMemoryStreamTable].setStream(stream)
+
+      testStream(spark.readStream.table(tblName)) (
+        AddData(stream, 1, 2, 3),
+        CheckLastBatch(1, 2, 3),
+        AddData(stream, 4, 5),
+        CheckLastBatch(4, 5)
+      )
+    }
+  }
+
+  test("fallback to V1 relation") {
+    val tblName = DataStreamTableAPISuite.V1FallbackTestTableName
+    spark.conf.set(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION.key,
+      classOf[InMemoryStreamTableCatalog].getName)
+    val v2Source = classOf[FakeV2Provider].getName
+    withTempDir { tempDir =>
+      withTable(tblName) {
+        spark.sql(s"CREATE TABLE $tblName (data int) USING $v2Source")
+
+        // Check the StreamingRelationV2 has been replaced by StreamingRelation
+        val plan = spark.readStream.option("path", tempDir.getCanonicalPath).table(tblName)
+          .queryExecution.analyzed.collectFirst {
+            case d: StreamingRelationV2 => d
+          }
+        assert(plan.isEmpty)
+      }
+    }
+  }
+}
+
+object DataStreamTableAPISuite {
+  val V1FallbackTestTableName = "fallbackV1Test"
+}
+
+class InMemoryStreamTable(override val name: String) extends Table with SupportsRead {
+  var stream: MemoryStream[Int] = _
+
+  def setStream(inputData: MemoryStream[Int]): Unit = stream = inputData
+
+  override def schema(): StructType = stream.fullSchema()
+
+  override def capabilities(): util.Set[TableCapability] = {
+    Set(TableCapability.MICRO_BATCH_READ, TableCapability.CONTINUOUS_READ).asJava
+  }
+
+  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
+    new MemoryStreamScanBuilder(stream)
+  }
+}
+
+class NonStreamV2Table(override val name: String)
+    extends Table with SupportsRead with V2TableWithV1Fallback {
+  override def schema(): StructType = StructType(Nil)
+  override def capabilities(): util.Set[TableCapability] = Set(TableCapability.BATCH_READ).asJava
+  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = new FakeScanBuilder
+
+  override def v1Table: CatalogTable = {
+    CatalogTable(
+      identifier =
+        TableIdentifier(DataStreamTableAPISuite.V1FallbackTestTableName, Some("default")),
+      tableType = CatalogTableType.MANAGED,
+      storage = CatalogStorageFormat.empty,
+      owner = null,
+      schema = schema(),
+      provider = Some("parquet"))
+  }
+}
+
+
+class InMemoryStreamTableCatalog extends InMemoryTableCatalog {
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+  override def createTable(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): Table = {
+    if (tables.containsKey(ident)) {
+      throw new TableAlreadyExistsException(ident)
+    }
+
+    val table = if (ident.name() == DataStreamTableAPISuite.V1FallbackTestTableName) {
+      new NonStreamV2Table(s"$name.${ident.quoted}")
+    } else {
+      new InMemoryStreamTable(s"$name.${ident.quoted}")
+    }
+    tables.put(ident, table)
+    namespaces.putIfAbsent(ident.namespace.toList, Map())
+    table
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 497dda4e22213..accfcb8d9deff 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -601,7 +601,7 @@ private[hive] class TestHiveQueryExecution(
     // Make sure any test tables referenced are loaded.
     val referencedTables =
       describedTables ++
-        logical.collect { case UnresolvedRelation(ident, _) => ident.asTableIdentifier }
+        logical.collect { case UnresolvedRelation(ident, _, _) => ident.asTableIdentifier }
     val resolver = sparkSession.sessionState.conf.resolver
     val referencedTestTables = referencedTables.flatMap { tbl =>
       val testTableOpt = sparkSession.testTables.keys.find(resolver(_, tbl.table))

From e887c639a766fde0a74e7557d1ad2b2cc4b92f1b Mon Sep 17 00:00:00 2001
From: gatorsmile <gatorsmile@gmail.com>
Date: Fri, 25 Sep 2020 07:27:29 +0000
Subject: [PATCH 0109/1009] [SPARK-32931][SQL] Unevaluable Expressions are not
 Foldable

### What changes were proposed in this pull request?
Unevaluable expressions are not foldable because we don't have an eval for it. This PR is to clean up the code and enforce it.

### Why are the changes needed?
Ensure that we will not hit the weird cases that trigger ConstantFolding.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
The existing tests.

Closes #29798 from gatorsmile/refactorUneval.

Lead-authored-by: gatorsmile <gatorsmile@gmail.com>
Co-authored-by: Xiao Li <gatorsmile@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/analysis/unresolved.scala     |  5 -----
 .../spark/sql/catalyst/expressions/Expression.scala  |  4 +++-
 .../spark/sql/catalyst/expressions/SortOrder.scala   |  3 ---
 .../catalyst/expressions/aggregate/interfaces.scala  | 12 ++++++++----
 .../catalyst/expressions/complexTypeCreator.scala    |  1 -
 .../apache/spark/sql/catalyst/expressions/misc.scala |  2 --
 .../spark/sql/catalyst/expressions/predicates.scala  |  1 -
 .../sql/catalyst/expressions/windowExpressions.scala | 10 ++++------
 .../sql/catalyst/plans/logical/v2Commands.scala      |  2 --
 9 files changed, 15 insertions(+), 25 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 49861f9172a2a..9c7d572a12071 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -264,7 +264,6 @@ case class UnresolvedFunction(
   override def children: Seq[Expression] = arguments ++ filter.toSeq
 
   override def dataType: DataType = throw new UnresolvedException(this, "dataType")
-  override def foldable: Boolean = throw new UnresolvedException(this, "foldable")
   override def nullable: Boolean = throw new UnresolvedException(this, "nullable")
   override lazy val resolved = false
 
@@ -452,7 +451,6 @@ case class UnresolvedExtractValue(child: Expression, extraction: Expression)
   override def right: Expression = extraction
 
   override def dataType: DataType = throw new UnresolvedException(this, "dataType")
-  override def foldable: Boolean = throw new UnresolvedException(this, "foldable")
   override def nullable: Boolean = throw new UnresolvedException(this, "nullable")
   override lazy val resolved = false
 
@@ -522,14 +520,12 @@ case class UnresolvedDeserializer(deserializer: Expression, inputAttributes: Seq
 
   override def child: Expression = deserializer
   override def dataType: DataType = throw new UnresolvedException(this, "dataType")
-  override def foldable: Boolean = throw new UnresolvedException(this, "foldable")
   override def nullable: Boolean = throw new UnresolvedException(this, "nullable")
   override lazy val resolved = false
 }
 
 case class GetColumnByOrdinal(ordinal: Int, dataType: DataType) extends LeafExpression
   with Unevaluable with NonSQLExpression {
-  override def foldable: Boolean = throw new UnresolvedException(this, "foldable")
   override def nullable: Boolean = throw new UnresolvedException(this, "nullable")
   override lazy val resolved = false
 }
@@ -547,7 +543,6 @@ case class GetColumnByOrdinal(ordinal: Int, dataType: DataType) extends LeafExpr
 case class UnresolvedOrdinal(ordinal: Int)
     extends LeafExpression with Unevaluable with NonSQLExpression {
   override def dataType: DataType = throw new UnresolvedException(this, "dataType")
-  override def foldable: Boolean = throw new UnresolvedException(this, "foldable")
   override def nullable: Boolean = throw new UnresolvedException(this, "nullable")
   override lazy val resolved = false
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 18cc648e57d71..ce4aa1c2b7c2f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -298,6 +298,9 @@ abstract class Expression extends TreeNode[Expression] {
  */
 trait Unevaluable extends Expression {
 
+  /** Unevaluable is not foldable because we don't have an eval for it. */
+  final override def foldable: Boolean = false
+
   final override def eval(input: InternalRow = null): Any =
     throw new UnsupportedOperationException(s"Cannot evaluate expression: $this")
 
@@ -318,7 +321,6 @@ trait Unevaluable extends Expression {
  */
 trait RuntimeReplaceable extends UnaryExpression with Unevaluable {
   override def nullable: Boolean = child.nullable
-  override def foldable: Boolean = child.foldable
   override def dataType: DataType = child.dataType
   // As this expression gets replaced at optimization with its `child" expression,
   // two `RuntimeReplaceable` are considered to be semantically equal if their "child" expressions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala
index 536276b5cb29f..54259e713accd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala
@@ -66,9 +66,6 @@ case class SortOrder(
     sameOrderExpressions: Set[Expression])
   extends UnaryExpression with Unevaluable {
 
-  /** Sort order is not foldable because we don't have an eval for it. */
-  override def foldable: Boolean = false
-
   override def checkInputDataTypes(): TypeCheckResult = {
     if (RowOrdering.isOrderable(dataType)) {
       TypeCheckResult.TypeCheckSuccess
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala
index 26367cc058bfa..421b8ee2a25b2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.expressions.aggregate
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodegenFallback, ExprCode}
 import org.apache.spark.sql.types._
 
 /** The mode of an [[AggregateFunction]]. */
@@ -133,7 +133,6 @@ case class AggregateExpression(
   override def children: Seq[Expression] = aggregateFunction +: filter.toSeq
 
   override def dataType: DataType = aggregateFunction.dataType
-  override def foldable: Boolean = false
   override def nullable: Boolean = aggregateFunction.nullable
 
   @transient
@@ -374,8 +373,7 @@ abstract class ImperativeAggregate extends AggregateFunction with CodegenFallbac
  */
 abstract class DeclarativeAggregate
   extends AggregateFunction
-  with Serializable
-  with Unevaluable {
+  with Serializable {
 
   /**
    * Expressions for initializing empty aggregation buffers.
@@ -421,6 +419,12 @@ abstract class DeclarativeAggregate
     /** Represents this attribute at the input buffer side (the data value is read-only). */
     def right: AttributeReference = inputAggBufferAttributes(aggBufferAttributes.indexOf(a))
   }
+
+  final override def eval(input: InternalRow = null): Any =
+    throw new UnsupportedOperationException(s"Cannot evaluate expression: $this")
+
+  final override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
+    throw new UnsupportedOperationException(s"Cannot generate code for expression: $this")
 }
 
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index 42e4d3ec6df57..c1471455b58c0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -304,7 +304,6 @@ case class MapFromArrays(left: Expression, right: Expression)
  */
 case object NamePlaceholder extends LeafExpression with Unevaluable {
   override lazy val resolved: Boolean = false
-  override def foldable: Boolean = false
   override def nullable: Boolean = false
   override def dataType: DataType = StringType
   override def prettyName: String = "NamePlaceholder"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index 2458a4aaba650..1eec26c8e987a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -113,7 +113,6 @@ case class AssertTrue(child: Expression) extends UnaryExpression with ImplicitCa
   since = "1.6.0")
 case class CurrentDatabase() extends LeafExpression with Unevaluable {
   override def dataType: DataType = StringType
-  override def foldable: Boolean = true
   override def nullable: Boolean = false
   override def prettyName: String = "current_database"
 }
@@ -131,7 +130,6 @@ case class CurrentDatabase() extends LeafExpression with Unevaluable {
   since = "3.1.0")
 case class CurrentCatalog() extends LeafExpression with Unevaluable {
   override def dataType: DataType = StringType
-  override def foldable: Boolean = true
   override def nullable: Boolean = false
   override def prettyName: String = "current_catalog"
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index ddc4d8c0d39b6..1f55045dbca74 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -337,7 +337,6 @@ case class InSubquery(values: Seq[Expression], query: ListQuery)
 
   override def children: Seq[Expression] = values :+ query
   override def nullable: Boolean = children.exists(_.nullable)
-  override def foldable: Boolean = children.forall(_.foldable)
   override def toString: String = s"$value IN ($query)"
   override def sql: String = s"(${value.sql} IN (${query.sql}))"
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index 1a35a52098f4d..8e3702c157a3c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -50,7 +50,6 @@ case class WindowSpecDefinition(
       frameSpecification.isInstanceOf[SpecifiedWindowFrame]
 
   override def nullable: Boolean = true
-  override def foldable: Boolean = false
   override def dataType: DataType = throw new UnsupportedOperationException("dataType")
 
   override def checkInputDataTypes(): TypeCheckResult = {
@@ -144,7 +143,6 @@ case object RangeFrame extends FrameType {
 sealed trait SpecialFrameBoundary extends Expression with Unevaluable {
   override def children: Seq[Expression] = Nil
   override def dataType: DataType = NullType
-  override def foldable: Boolean = false
   override def nullable: Boolean = false
 }
 
@@ -168,7 +166,6 @@ case object CurrentRow extends SpecialFrameBoundary {
 sealed trait WindowFrame extends Expression with Unevaluable {
   override def children: Seq[Expression] = Nil
   override def dataType: DataType = throw new UnsupportedOperationException("dataType")
-  override def foldable: Boolean = false
   override def nullable: Boolean = false
 }
 
@@ -275,7 +272,6 @@ case class UnresolvedWindowExpression(
     windowSpec: WindowSpecReference) extends UnaryExpression with Unevaluable {
 
   override def dataType: DataType = throw new UnresolvedException(this, "dataType")
-  override def foldable: Boolean = throw new UnresolvedException(this, "foldable")
   override def nullable: Boolean = throw new UnresolvedException(this, "nullable")
   override lazy val resolved = false
 }
@@ -287,7 +283,6 @@ case class WindowExpression(
   override def children: Seq[Expression] = windowFunction :: windowSpec :: Nil
 
   override def dataType: DataType = windowFunction.dataType
-  override def foldable: Boolean = windowFunction.foldable
   override def nullable: Boolean = windowFunction.nullable
 
   override def toString: String = s"$windowFunction $windowSpec"
@@ -370,8 +365,11 @@ abstract class OffsetWindowFunction
    * OffsetWindowFunction is executed, the input expression and the default expression. Even when
    * both the input and the default expression are foldable, the result is still not foldable due to
    * the frame.
+   *
+   * Note, the value of foldable is set to false in the trait Unevaluable
+   *
+   * override def foldable: Boolean = false
    */
-  override def foldable: Boolean = false
 
   override def nullable: Boolean = default == null || default.nullable || input.nullable
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index fa0a10c3a5a45..475eb7d74773d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -348,7 +348,6 @@ case class MergeIntoTable(
 
 sealed abstract class MergeAction extends Expression with Unevaluable {
   def condition: Option[Expression]
-  override def foldable: Boolean = false
   override def nullable: Boolean = false
   override def dataType: DataType = throw new UnresolvedException(this, "nullable")
   override def children: Seq[Expression] = condition.toSeq
@@ -369,7 +368,6 @@ case class InsertAction(
 }
 
 case class Assignment(key: Expression, value: Expression) extends Expression with Unevaluable {
-  override def foldable: Boolean = false
   override def nullable: Boolean = false
   override def dataType: DataType = throw new UnresolvedException(this, "nullable")
   override def children: Seq[Expression] = key ::  value :: Nil

From 6c805470a7e8d1f44747dc64c2e49ebd302f9ba4 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Fri, 25 Sep 2020 16:36:15 -0700
Subject: [PATCH 0110/1009] [SPARK-32997][K8S] Support dynamic PVC creation and
 deletion in K8s driver

### What changes were proposed in this pull request?

This PR aims to support dynamic PVC creation and deletion in K8s driver.

**Configuration**
This PR reuses the existing PVC volume configs.
```
spark.kubernetes.driver.volumes.persistentVolumeClaim.spark-local-dir-1.options.claimName=OnDemand
spark.kubernetes.driver.volumes.persistentVolumeClaim.spark-local-dir-1.options.storageClass=gp2
spark.kubernetes.driver.volumes.persistentVolumeClaim.spark-local-dir-1.options.sizeLimit=200Gi
spark.kubernetes.driver.volumes.persistentVolumeClaim.spark-local-dir-1.mount.path=/data
spark.kubernetes.driver.volumes.persistentVolumeClaim.spark-local-dir-1.mount.readOnly=false
```

**PVC**
```
$ kubectl get pvc | grep driver
tpcds-d6087874c6705564-driver-pvc-0  Bound    pvc-fae914a2-ca5c-4e1e-8aba-54a35357d072   200Gi RWO gp2 12m
```

**Disk**
```
$ k exec -it tpcds-d6087874c6705564-driver -- df -h | grep data
/dev/nvme5n1    197G   61M  197G   1% /data
```

```
$ k exec -it tpcds-d6087874c6705564-driver -- ls -al /data
total 28
drwxr-xr-x  5 root root  4096 Sep 25 18:06 .
drwxr-xr-x  1 root root    63 Sep 25 18:06 ..
drwxr-xr-x 66 root root  4096 Sep 25 18:09 blockmgr-2c9a8cc5-a05c-45fe-a58e-b8f42da88a57
drwx------  2 root root 16384 Sep 25 18:06 lost+found
drwx------  4 root root  4096 Sep 25 18:07 spark-0448efe7-da2c-4f3a-bd3c-769aadb11dd6
```

**NOTE**
This should be used carefully because Apache Spark doesn't delete driver pod automatically. Since the driver PVC shares the lifecycle of driver pod, it will exist after the job completion until the pod deletion. However, if the users are already using pre-populated PVCs, this isn't a regression at all in terms of the cost.

```
$ k get pod -l spark-role=driver
NAME                            READY   STATUS      RESTARTS   AGE
tpcds-d6087874c6705564-driver   0/1     Completed   0          35m
```

### Why are the changes needed?

Like executors, driver also needs larger PVC.

### Does this PR introduce _any_ user-facing change?

Yes. This is a new feature.

### How was this patch tested?

Pass the newly added test case.

Closes #29873 from dongjoon-hyun/SPARK-32997.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../features/MountVolumesFeatureStep.scala    | 38 +++++++++----------
 .../MountVolumesFeatureStepSuite.scala        | 17 +++++++++
 2 files changed, 35 insertions(+), 20 deletions(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
index 788ddeaf51cba..e297656520200 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
@@ -66,32 +66,30 @@ private[spark] class MountVolumesFeatureStep(conf: KubernetesConf)
         case KubernetesPVCVolumeConf(claimNameTemplate, storageClass, size) =>
           val claimName = conf match {
             case c: KubernetesExecutorConf =>
-              val claimName = claimNameTemplate
+              claimNameTemplate
                 .replaceAll(PVC_ON_DEMAND,
                   s"${conf.resourceNamePrefix}-exec-${c.executorId}$PVC_POSTFIX-$i")
                 .replaceAll(ENV_EXECUTOR_ID, c.executorId)
-
-              if (storageClass.isDefined && size.isDefined) {
-                additionalResources.append(new PersistentVolumeClaimBuilder()
-                  .withKind(PVC)
-                  .withApiVersion("v1")
-                  .withNewMetadata()
-                    .withName(claimName)
-                    .endMetadata()
-                  .withNewSpec()
-                    .withStorageClassName(storageClass.get)
-                    .withAccessModes(PVC_ACCESS_MODE)
-                    .withResources(new ResourceRequirementsBuilder()
-                      .withRequests(Map("storage" -> new Quantity(size.get)).asJava).build())
-                    .endSpec()
-                  .build())
-              }
-
-              claimName
-
             case _ =>
               claimNameTemplate
+                .replaceAll(PVC_ON_DEMAND, s"${conf.resourceNamePrefix}-driver$PVC_POSTFIX-$i")
           }
+          if (storageClass.isDefined && size.isDefined) {
+            additionalResources.append(new PersistentVolumeClaimBuilder()
+              .withKind(PVC)
+              .withApiVersion("v1")
+              .withNewMetadata()
+                .withName(claimName)
+                .endMetadata()
+              .withNewSpec()
+                .withStorageClassName(storageClass.get)
+                .withAccessModes(PVC_ACCESS_MODE)
+                .withResources(new ResourceRequirementsBuilder()
+                  .withRequests(Map("storage" -> new Quantity(size.get)).asJava).build())
+                .endSpec()
+              .build())
+          }
+
           new VolumeBuilder()
             .withPersistentVolumeClaim(
               new PersistentVolumeClaimVolumeSource(claimName, spec.mountReadOnly))
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
index df7616271681d..e95af264d09ec 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
@@ -89,6 +89,23 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
     assert(executorPVC.getClaimName === s"pvc-spark-${KubernetesTestConf.EXECUTOR_ID}")
   }
 
+  test("Create and mounts persistentVolumeClaims in driver") {
+    val volumeConf = KubernetesVolumeSpec(
+      "testVolume",
+      "/tmp",
+      "",
+      true,
+      KubernetesPVCVolumeConf("OnDemand")
+    )
+    val kubernetesConf = KubernetesTestConf.createDriverConf(volumes = Seq(volumeConf))
+    val step = new MountVolumesFeatureStep(kubernetesConf)
+    val configuredPod = step.configurePod(SparkPod.initialPod())
+
+    assert(configuredPod.pod.getSpec.getVolumes.size() === 1)
+    val pvcClaim = configuredPod.pod.getSpec.getVolumes.get(0).getPersistentVolumeClaim
+    assert(pvcClaim.getClaimName.endsWith("-driver-pvc-0"))
+  }
+
   test("Create and mount persistentVolumeClaims in executors") {
     val volumeConf = KubernetesVolumeSpec(
       "testVolume",

From 934a91fcb4de1e5c4b93b58e7452afa4bb4a9586 Mon Sep 17 00:00:00 2001
From: zhengruifeng <ruifengz@foxmail.com>
Date: Sat, 26 Sep 2020 08:16:39 -0500
Subject: [PATCH 0111/1009] [SPARK-21481][ML][FOLLOWUP][TRIVIAL] HashingTF use
 util.collection.OpenHashMap instead of mutable.HashMap

### What changes were proposed in this pull request?
`HashingTF` use `util.collection.OpenHashMap` instead of `mutable.HashMap`

### Why are the changes needed?
according to `util.collection.OpenHashMap` 's doc:

> This map is about 5X faster than java.util.HashMap, while using much less space overhead.

according to performance tests like ([Simple microbenchmarks comparing Scala vs Java mutable map performance ](https://gist.github.com/pchiusano/1423303)), `mutable.HashMap` maybe more inefficient than `java.util.HashMap`

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
existing testsuites

Closes #29852 from zhengruifeng/hashingtf_opt.

Authored-by: zhengruifeng <ruifengz@foxmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../apache/spark/ml/feature/HashingTF.scala   | 20 ++++++-------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
index d2bb013448aae..f4223bc85943d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.ml.feature
 
-import scala.collection.mutable
-
 import org.apache.spark.annotation.Since
 import org.apache.spark.ml.Transformer
 import org.apache.spark.ml.attribute.AttributeGroup
@@ -32,6 +30,7 @@ import org.apache.spark.sql.functions.{col, udf}
 import org.apache.spark.sql.types.{ArrayType, StructType}
 import org.apache.spark.util.Utils
 import org.apache.spark.util.VersionUtils.majorMinorVersion
+import org.apache.spark.util.collection.OpenHashMap
 
 /**
  * Maps a sequence of terms to their term frequencies using the hashing trick.
@@ -91,20 +90,13 @@ class HashingTF @Since("3.0.0") private[ml] (
   @Since("2.0.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
     val outputSchema = transformSchema(dataset.schema)
-    val localNumFeatures = $(numFeatures)
-    val localBinary = $(binary)
+    val n = $(numFeatures)
+    val updateFunc = if ($(binary)) (v: Double) => 1.0 else (v: Double) => v + 1.0
 
     val hashUDF = udf { terms: Seq[_] =>
-      val termFrequencies = mutable.HashMap.empty[Int, Double].withDefaultValue(0.0)
-      terms.foreach { term =>
-        val i = indexOf(term)
-        if (localBinary) {
-          termFrequencies(i) = 1.0
-        } else {
-          termFrequencies(i) += 1.0
-        }
-      }
-      Vectors.sparse(localNumFeatures, termFrequencies.toSeq)
+      val map = new OpenHashMap[Int, Double]()
+      terms.foreach { term => map.changeValue(indexOf(term), 1.0, updateFunc) }
+      Vectors.sparse(n, map.toSeq)
     }
 
     dataset.withColumn($(outputCol), hashUDF(col($(inputCol))),

From 9a155d42a3202fbafc48f8b722bbc27cce522e11 Mon Sep 17 00:00:00 2001
From: Kris Mok <kris.mok@databricks.com>
Date: Sat, 26 Sep 2020 16:03:59 -0700
Subject: [PATCH 0112/1009] [SPARK-32999][SQL] Use Utils.getSimpleName to avoid
 hitting Malformed class name in TreeNode

### What changes were proposed in this pull request?

Use `Utils.getSimpleName` to avoid hitting `Malformed class name` error in `TreeNode`.

### Why are the changes needed?

On older JDK versions (e.g. JDK8u), nested Scala classes may trigger `java.lang.Class.getSimpleName` to throw an `java.lang.InternalError: Malformed class name` error.

Similar to https://github.com/apache/spark/pull/29050, we should use  Spark's `Utils.getSimpleName` utility function in place of `Class.getSimpleName` to avoid hitting the issue.

### Does this PR introduce _any_ user-facing change?

Fixes a bug that throws an error when invoking `TreeNode.nodeName`, otherwise no changes.

### How was this patch tested?

Added new unit test case in `TreeNodeSuite`. Note that the test case assumes the test code can trigger the expected error, otherwise it'll skip the test safely, for compatibility with newer JDKs.

Manually tested on JDK8u and JDK11u and observed expected behavior:
- JDK8u: the test case triggers the "Malformed class name" issue and the fix works;
- JDK11u: the test case does not trigger the "Malformed class name" issue, and the test case is safely skipped.

Closes #29875 from rednaxelafx/spark-32999-getsimplename.

Authored-by: Kris Mok <kris.mok@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/catalyst/trees/TreeNode.scala   |  9 ++++---
 .../sql/catalyst/trees/TreeNodeSuite.scala    | 26 +++++++++++++++++++
 2 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 8003012f30ca5..1ab7bbdcff697 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -41,6 +41,7 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.Utils
 
 /** Used by [[TreeNode.getNodeNumbered]] when traversing the tree for a given number */
 private class MutableInt(var i: Int)
@@ -521,11 +522,13 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
     mapChildren(_.clone(), forceCopy = true)
   }
 
+  private def simpleClassName: String = Utils.getSimpleName(this.getClass)
+
   /**
    * Returns the name of this type of TreeNode.  Defaults to the class name.
    * Note that we remove the "Exec" suffix for physical operators here.
    */
-  def nodeName: String = getClass.getSimpleName.replaceAll("Exec$", "")
+  def nodeName: String = simpleClassName.replaceAll("Exec$", "")
 
   /**
    * The arguments that should be included in the arg string.  Defaults to the `productIterator`.
@@ -747,7 +750,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
   protected def jsonFields: List[JField] = {
     val fieldNames = getConstructorParameterNames(getClass)
     val fieldValues = productIterator.toSeq ++ otherCopyArgs
-    assert(fieldNames.length == fieldValues.length, s"${getClass.getSimpleName} fields: " +
+    assert(fieldNames.length == fieldValues.length, s"$simpleClassName fields: " +
       fieldNames.mkString(", ") + s", values: " + fieldValues.mkString(", "))
 
     fieldNames.zip(fieldValues).map {
@@ -801,7 +804,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
       try {
         val fieldNames = getConstructorParameterNames(p.getClass)
         val fieldValues = p.productIterator.toSeq
-        assert(fieldNames.length == fieldValues.length, s"${getClass.getSimpleName} fields: " +
+        assert(fieldNames.length == fieldValues.length, s"$simpleClassName fields: " +
           fieldNames.mkString(", ") + s", values: " + fieldValues.mkString(", "))
         ("product-class" -> JString(p.getClass.getName)) :: fieldNames.zip(fieldValues).map {
           case (name, value) => name -> parseToJson(value)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
index ff51bc0071c80..4ad8475a0113c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
@@ -736,4 +736,30 @@ class TreeNodeSuite extends SparkFunSuite with SQLHelper {
     assertDifferentInstance(leaf, leafCloned)
     assert(leaf.child.eq(leafCloned.asInstanceOf[FakeLeafPlan].child))
   }
+
+  object MalformedClassObject extends Serializable {
+    case class MalformedNameExpression(child: Expression) extends TaggingExpression
+  }
+
+  test("SPARK-32999: TreeNode.nodeName should not throw malformed class name error") {
+    val testTriggersExpectedError = try {
+      classOf[MalformedClassObject.MalformedNameExpression].getSimpleName
+      false
+    } catch {
+      case ex: java.lang.InternalError if ex.getMessage.contains("Malformed class name") =>
+        true
+      case ex: Throwable => throw ex
+    }
+    // This test case only applies on older JDK versions (e.g. JDK8u), and doesn't trigger the
+    // issue on newer JDK versions (e.g. JDK11u).
+    assume(testTriggersExpectedError, "the test case didn't trigger malformed class name error")
+
+    val expr = MalformedClassObject.MalformedNameExpression(Literal(1))
+    try {
+      expr.nodeName
+    } catch {
+      case ex: java.lang.InternalError if ex.getMessage.contains("Malformed class name") =>
+        fail("TreeNode.nodeName should not throw malformed class name error")
+    }
+  }
 }

From 0c38765b297337c3d80496db09ae7f79d2acf778 Mon Sep 17 00:00:00 2001
From: zhengruifeng <ruifengz@foxmail.com>
Date: Sun, 27 Sep 2020 09:35:05 +0800
Subject: [PATCH 0113/1009] [SPARK-32974][ML] FeatureHasher transform
 optimization

### What changes were proposed in this pull request?
pre-compute the output indices of numerical columns, instead of computing them on each row.

### Why are the changes needed?
for a numerical column, its output index is a hash of its `col_name`, we can pre-compute it at first, instead of computing it on each row.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
existing testsuites

Closes #29850 from zhengruifeng/hash_opt.

Authored-by: zhengruifeng <ruifengz@foxmail.com>
Signed-off-by: zhengruifeng <ruifengz@foxmail.com>
---
 .../spark/ml/feature/FeatureHasher.scala      | 66 +++++++++++--------
 1 file changed, 38 insertions(+), 28 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala
index 39862554c5d8d..0bb0b05322873 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala
@@ -125,19 +125,24 @@ class FeatureHasher(@Since("2.3.0") override val uid: String) extends Transforme
 
   @Since("2.3.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
+    val outputSchema = transformSchema(dataset.schema)
     val hashFunc: Any => Int = FeatureHasher.murmur3Hash
+
     val n = $(numFeatures)
     val localInputCols = $(inputCols)
-    val catCols = if (isSet(categoricalCols)) {
-      $(categoricalCols).toSet
-    } else {
-      Set[String]()
+
+    var catCols = dataset.schema(localInputCols.toSet)
+      .filterNot(_.dataType.isInstanceOf[NumericType]).map(_.name).toArray
+    if (isSet(categoricalCols)) {
+      // categoricalCols may contain columns not set in inputCols
+      catCols = (catCols ++ $(categoricalCols).intersect(localInputCols)).distinct
     }
+    val catIndices = catCols.map(c => localInputCols.indexOf(c))
 
-    val outputSchema = transformSchema(dataset.schema)
-    val realFields = outputSchema.fields.filter { f =>
-      f.dataType.isInstanceOf[NumericType] && !catCols.contains(f.name)
-    }.map(_.name).toSet
+    val realCols = (localInputCols.toSet -- catCols).toArray
+    val realIndices = realCols.map(c => localInputCols.indexOf(c))
+    // pre-compute output indices of real columns
+    val realOutputIndices = realCols.map(c => Utils.nonNegativeMod(hashFunc(c), n))
 
     def getDouble(x: Any): Double = {
       x match {
@@ -151,33 +156,38 @@ class FeatureHasher(@Since("2.3.0") override val uid: String) extends Transforme
 
     val hashFeatures = udf { row: Row =>
       val map = new OpenHashMap[Int, Double]()
-      localInputCols.foreach { colName =>
-        val fieldIndex = row.fieldIndex(colName)
-        if (!row.isNullAt(fieldIndex)) {
-          val (rawIdx, value) = if (realFields(colName)) {
-            // numeric values are kept as is, with vector index based on hash of "column_name"
-            val value = getDouble(row.get(fieldIndex))
-            val hash = hashFunc(colName)
-            (hash, value)
-          } else {
-            // string, boolean and numeric values that are in catCols are treated as categorical,
-            // with an indicator value of 1.0 and vector index based on hash of "column_name=value"
-            val value = row.get(fieldIndex).toString
-            val fieldName = s"$colName=$value"
-            val hash = hashFunc(fieldName)
-            (hash, 1.0)
-          }
-          val idx = Utils.nonNegativeMod(rawIdx, n)
+
+      var i = 0
+      while (i < realIndices.length) {
+        val realIdx = realIndices(i)
+        if (!row.isNullAt(realIdx)) {
+          // numeric values are kept as is, with vector index based on hash of "column_name"
+          val value = getDouble(row.get(realIdx))
+          val idx = realOutputIndices(i)
           map.changeValue(idx, value, v => v + value)
         }
+        i += 1
       }
+
+      i = 0
+      while (i < catIndices.length) {
+        val catIdx = catIndices(i)
+        if (!row.isNullAt(catIdx)) {
+          // string, boolean and numeric values that are in catCols are treated as categorical,
+          // with an indicator value of 1.0 and vector index based on hash of "column_name=value"
+          val string = row.get(catIdx).toString
+          val rawIdx = hashFunc(s"${catCols(i)}=$string")
+          val idx = Utils.nonNegativeMod(rawIdx, n)
+          map.changeValue(idx, 1.0, v => v + 1.0)
+        }
+        i += 1
+      }
+
       Vectors.sparse(n, map.toSeq)
     }
 
     val metadata = outputSchema($(outputCol)).metadata
-    dataset.select(
-      col("*"),
-      hashFeatures(struct($(inputCols).map(col): _*)).as($(outputCol), metadata))
+    dataset.withColumn($(outputCol), hashFeatures(struct($(inputCols).map(col): _*)), metadata)
   }
 
   @Since("2.3.0")

From c65b64552f947a7eaf4f379edbdce05daa923363 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Sun, 27 Sep 2020 16:21:23 +0900
Subject: [PATCH 0114/1009] [SPARK-32714][FOLLOW-UP][PYTHON] Address
 pyspark.install typing errors

### What changes were proposed in this pull request?

This PR adds two `type: ignores`, one in `pyspark.install` and one in related tests.

### Why are the changes needed?

To satisfy MyPy type checks. It seems like we originally missed some changes that happened around merge of
https://github.com/apache/spark/commit/31a16fbb405a19dc3eb732347e0e1f873b16971d

```
python/pyspark/install.py:30: error: Need type annotation for 'UNSUPPORTED_COMBINATIONS' (hint: "UNSUPPORTED_COMBINATIONS: List[<type>] = ...")  [var-annotated]
python/pyspark/tests/test_install_spark.py:105: error: Cannot find implementation or library stub for module named 'xmlrunner'  [import]
python/pyspark/tests/test_install_spark.py:105: note: See https://mypy.readthedocs.io/en/latest/running_mypy.html#missing-imports
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

- Existing tests.
- MyPy tests
    ```
    mypy --show-error-code --no-incremental --config python/mypy.ini python/pyspark
   ```

Closes #29878 from zero323/SPARK-32714-FOLLOW-UP.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/install.py                  | 2 +-
 python/pyspark/tests/test_install_spark.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/install.py b/python/pyspark/install.py
index 84dd2c9964563..2de7b21832abf 100644
--- a/python/pyspark/install.py
+++ b/python/pyspark/install.py
@@ -27,7 +27,7 @@
 DEFAULT_HIVE = "hive2.3"
 SUPPORTED_HADOOP_VERSIONS = ["hadoop2.7", "hadoop3.2", "without-hadoop"]
 SUPPORTED_HIVE_VERSIONS = ["hive2.3"]
-UNSUPPORTED_COMBINATIONS = [
+UNSUPPORTED_COMBINATIONS = [  # type: ignore
 ]
 
 
diff --git a/python/pyspark/tests/test_install_spark.py b/python/pyspark/tests/test_install_spark.py
index 6f9949aa8b2e0..f761e0088cd77 100644
--- a/python/pyspark/tests/test_install_spark.py
+++ b/python/pyspark/tests/test_install_spark.py
@@ -102,7 +102,7 @@ def test_checked_versions(self):
     from pyspark.tests.test_install_spark import *  # noqa: F401
 
     try:
-        import xmlrunner
+        import xmlrunner  # type: ignore[import]
         testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
     except ImportError:
         testRunner = None

From bc77e5b840b2feb18a9c8a61dfe75f421e5b64ca Mon Sep 17 00:00:00 2001
From: zhengruifeng <ruifengz@foxmail.com>
Date: Sun, 27 Sep 2020 10:26:05 -0500
Subject: [PATCH 0115/1009] [SPARK-32973][ML][DOC] FeatureHasher does not check
 categoricalCols in inputCols

### What changes were proposed in this pull request?
1, update the comment: `Note, the relevant columns must also be set in inputCols` -> `Note, the relevant columns should also be set in inputCols`;
2, add a check, and if there are `categoricalCols` not set in `inputCols`, log.warn it;

### Why are the changes needed?
1, there is no check to make sure `categoricalCols` are all set in `inputCols`, to keep existing behavior, update this comments;

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
repl

Closes #29868 from zhengruifeng/feature_hash_cat_doc.

Authored-by: zhengruifeng <ruifengz@foxmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../org/apache/spark/ml/feature/FeatureHasher.scala | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala
index 0bb0b05322873..f1268bdf6bd89 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala
@@ -91,8 +91,8 @@ class FeatureHasher(@Since("2.3.0") override val uid: String) extends Transforme
   /**
    * Numeric columns to treat as categorical features. By default only string and boolean
    * columns are treated as categorical, so this param can be used to explicitly specify the
-   * numerical columns to treat as categorical. Note, the relevant columns must also be set in
-   * `inputCols`.
+   * numerical columns to treat as categorical. Note, the relevant columns should also be set in
+   * `inputCols`, categorical columns not set in `inputCols` will be listed in a warning.
    * @group param
    */
   @Since("2.3.0")
@@ -195,7 +195,14 @@ class FeatureHasher(@Since("2.3.0") override val uid: String) extends Transforme
 
   @Since("2.3.0")
   override def transformSchema(schema: StructType): StructType = {
-    val fields = schema($(inputCols).toSet)
+    val localInputCols = $(inputCols).toSet
+    if (isSet(categoricalCols)) {
+      val set = $(categoricalCols).filterNot(c => localInputCols.contains(c))
+      if (set.nonEmpty) {
+        log.warn(s"categoricalCols ${set.mkString("[", ",", "]")} do not exist in inputCols")
+      }
+    }
+    val fields = schema(localInputCols)
     fields.foreach { fieldSchema =>
       val dataType = fieldSchema.dataType
       val fieldName = fieldSchema.name

From bb6d5e7a908dbd0918a9fe50147be7d16a4733f5 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Sun, 27 Sep 2020 10:26:51 -0500
Subject: [PATCH 0116/1009] [SPARK-32972][ML] Pass all UTs of  `mllib` module
 in Scala 2.13
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
The purpose of this pr is to resolve SPARK-32972, total of 51 Scala failed test cases and 3 Java failed test cases were fixed, the main change of this pr as follow:

- Specified `Seq` to `scala.collection.Seq` in case match `Seq` scene and `x.asInstanceOf[Seq[T]]` scene

- Use `Row.getSeq[T]` instead of `Row.getAs[Seq]`

- Manual call `toMap` method to convert `MapView` to `Map` in Scala 2.13

- Change  the tol in the last test to 0.75 to pass `RandomForestRegressorSuite#training with sample weights` in Scala 2.13

### Why are the changes needed?
We need to support a Scala 2.13 build.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
- Scala 2.12: Pass the Jenkins or GitHub Action

- Scala 2.13: Pass  GitHub 2.13 Build Action

Do the follow:

```
dev/change-scala-version.sh 2.13
mvn clean install -DskipTests  -pl mllib -Pscala-2.13 -am
mvn test -pl mllib -Pscala-2.13 -fn
```

**Before**
```
[ERROR] Errors:
[ERROR]   JavaVectorIndexerSuite.vectorIndexerAPI:51 » ClassCast scala.collection.conver...
[ERROR]   JavaWord2VecSuite.testJavaWord2Vec:51 » Spark Job aborted due to stage failure...
[ERROR]   JavaPrefixSpanSuite.runPrefixSpanSaveLoad:79 » Spark Job aborted due to stage ...

Tests: succeeded 1567, failed 51, canceled 0, ignored 7, pending 0
*** 51 TESTS FAILED ***

```

**After**

```
[INFO] Tests run: 122, Failures: 0, Errors: 0, Skipped: 0

Tests: succeeded 1617, failed 0, canceled 0, ignored 7, pending 0
All tests passed.

```

Closes #29857 from LuciferYang/fix-mllib-2.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../src/main/scala/org/apache/spark/ml/feature/IDF.scala | 6 +++---
 .../scala/org/apache/spark/ml/feature/MinHashLSH.scala   | 2 +-
 .../scala/org/apache/spark/ml/feature/RFormula.scala     | 2 +-
 .../org/apache/spark/ml/feature/StringIndexer.scala      | 5 +++--
 .../org/apache/spark/ml/feature/VectorIndexer.scala      | 2 +-
 .../scala/org/apache/spark/ml/feature/Word2Vec.scala     | 3 ++-
 .../main/scala/org/apache/spark/ml/fpm/FPGrowth.scala    | 2 +-
 .../main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala  | 2 +-
 .../apache/spark/mllib/classification/NaiveBayes.scala   | 4 ++--
 .../scala/org/apache/spark/mllib/fpm/PrefixSpan.scala    | 2 +-
 .../mllib/recommendation/MatrixFactorizationModel.scala  | 8 ++++----
 .../spark/mllib/tree/model/DecisionTreeModel.scala       | 2 +-
 .../scala/org/apache/spark/ml/clustering/LDASuite.scala  | 4 ++--
 .../ml/feature/BucketedRandomProjectionLSHSuite.scala    | 2 +-
 .../test/scala/org/apache/spark/ml/feature/LSHTest.scala | 3 ++-
 .../org/apache/spark/ml/feature/MinHashLSHSuite.scala    | 2 +-
 .../scala/org/apache/spark/ml/feature/NGramSuite.scala   | 2 +-
 .../apache/spark/ml/feature/StopWordsRemoverSuite.scala  | 8 +++++---
 .../scala/org/apache/spark/ml/fpm/FPGrowthSuite.scala    | 2 +-
 .../spark/ml/regression/RandomForestRegressorSuite.scala | 2 +-
 .../scala/org/apache/spark/ml/util/MLTestSuite.scala     | 2 +-
 .../org/apache/spark/mllib/feature/Word2VecSuite.scala   | 9 ++++++---
 22 files changed, 42 insertions(+), 34 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
index e6f124ef7d666..e451d4daffbc7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
@@ -215,10 +215,10 @@ object IDFModel extends MLReadable[IDFModel] {
       val data = sparkSession.read.parquet(dataPath)
 
       val model = if (majorVersion(metadata.sparkVersion) >= 3) {
-        val Row(idf: Vector, df: Seq[_], numDocs: Long) = data.select("idf", "docFreq", "numDocs")
-          .head()
+        val Row(idf: Vector, df: scala.collection.Seq[_], numDocs: Long) =
+          data.select("idf", "docFreq", "numDocs").head()
         new IDFModel(metadata.uid, new feature.IDFModel(OldVectors.fromML(idf),
-          df.asInstanceOf[Seq[Long]].toArray, numDocs))
+          df.asInstanceOf[scala.collection.Seq[Long]].toArray, numDocs))
       } else {
         val Row(idf: Vector) = MLUtils.convertVectorColumnsToML(data, "idf")
           .select("idf")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
index be467c654aaa1..12cae13174379 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
@@ -224,7 +224,7 @@ object MinHashLSHModel extends MLReadable[MinHashLSHModel] {
 
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.parquet(dataPath).select("randCoefficients").head()
-      val randCoefficients = data.getAs[Seq[Int]](0).grouped(2)
+      val randCoefficients = data.getSeq[Int](0).grouped(2)
         .map(tuple => (tuple(0), tuple(1))).toArray
       val model = new MinHashLSHModel(metadata.uid, randCoefficients)
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
index b8da020017f12..563e1708acdf1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
@@ -449,7 +449,7 @@ object RFormulaModel extends MLReadable[RFormulaModel] {
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.parquet(dataPath).select("label", "terms", "hasIntercept").head()
       val label = data.getString(0)
-      val terms = data.getAs[Seq[Seq[String]]](1)
+      val terms = data.getSeq[Seq[String]](1)
       val hasIntercept = data.getBoolean(2)
       val resolvedRFormula = ResolvedRFormula(label, terms, hasIntercept)
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
index ab51fe6e78bd7..0ca88b8e61e29 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
@@ -220,7 +220,8 @@ class StringIndexer @Since("1.4.0") (
 
     val selectedCols = getSelectedCols(dataset, inputCols).map(collect_set(_))
     val allLabels = dataset.select(selectedCols: _*)
-      .collect().toSeq.flatMap(_.toSeq).asInstanceOf[Seq[Seq[String]]]
+      .collect().toSeq.flatMap(_.toSeq)
+      .asInstanceOf[scala.collection.Seq[scala.collection.Seq[String]]].toSeq
     ThreadUtils.parmap(allLabels, "sortingStringLabels", 8) { labels =>
       val sorted = labels.filter(_ != null).sorted
       if (ascending) {
@@ -522,7 +523,7 @@ object StringIndexerModel extends MLReadable[StringIndexerModel] {
         val data = sparkSession.read.parquet(dataPath)
           .select("labelsArray")
           .head()
-        data.getAs[Seq[Seq[String]]](0).map(_.toArray).toArray
+        data.getSeq[scala.collection.Seq[String]](0).map(_.toArray).toArray
       }
       val model = new StringIndexerModel(metadata.uid, labelsArray)
       metadata.getAndSetParams(model)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
index b7cf4392cd177..874b421387279 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
@@ -300,7 +300,7 @@ class VectorIndexerModel private[ml] (
   /** Java-friendly version of [[categoryMaps]] */
   @Since("1.4.0")
   def javaCategoryMaps: JMap[JInt, JMap[JDouble, JInt]] = {
-    categoryMaps.mapValues(_.asJava).asJava.asInstanceOf[JMap[JInt, JMap[JDouble, JInt]]]
+    categoryMaps.mapValues(_.asJava).toMap.asJava.asInstanceOf[JMap[JInt, JMap[JDouble, JInt]]]
   }
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
index 01db39f9e3921..9b5f5a619e02c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
@@ -169,7 +169,8 @@ final class Word2Vec @Since("1.4.0") (
   @Since("2.0.0")
   override def fit(dataset: Dataset[_]): Word2VecModel = {
     transformSchema(dataset.schema, logging = true)
-    val input = dataset.select($(inputCol)).rdd.map(_.getAs[Seq[String]](0))
+    val input =
+      dataset.select($(inputCol)).rdd.map(_.getSeq[String](0))
     val wordVectors = new feature.Word2Vec()
       .setLearningRate($(stepSize))
       .setMinCount($(minCount))
diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
index 7aab4ef62c4d9..8aaa5efdf06c5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
@@ -363,7 +363,7 @@ object FPGrowthModel extends MLReadable[FPGrowthModel] {
         Map.empty[Any, Double]
       } else {
         frequentItems.rdd.flatMap {
-            case Row(items: Seq[_], count: Long) if items.length == 1 =>
+            case Row(items: scala.collection.Seq[_], count: Long) if items.length == 1 =>
               Some(items.head -> count.toDouble / numTrainingRecords)
             case _ => None
           }.collectAsMap()
diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala
index c9c049248f70c..10a569a8ff88b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala
@@ -146,7 +146,7 @@ final class PrefixSpan(@Since("2.4.0") override val uid: String) extends Params
 
     val data = dataset.select(sequenceColParam)
     val sequences = data.where(col(sequenceColParam).isNotNull).rdd
-      .map(r => r.getAs[Seq[Seq[Any]]](0).map(_.toArray).toArray)
+      .map(r => r.getSeq[scala.collection.Seq[Any]](0).map(_.toArray).toArray)
 
     val mllibPrefixSpan = new mllibPrefixSpan()
       .setMinSupport($(minSupport))
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index 586f622fc47c5..5b13deffcf056 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -216,7 +216,7 @@ object NaiveBayesModel extends Loader[NaiveBayesModel] {
       val data = dataArray(0)
       val labels = data.getAs[Seq[Double]](0).toArray
       val pi = data.getAs[Seq[Double]](1).toArray
-      val theta = data.getAs[Seq[Seq[Double]]](2).map(_.toArray).toArray
+      val theta = data.getSeq[scala.collection.Seq[Double]](2).map(_.toArray).toArray
       val modelType = data.getString(3)
       new NaiveBayesModel(labels, pi, theta, modelType)
     }
@@ -260,7 +260,7 @@ object NaiveBayesModel extends Loader[NaiveBayesModel] {
       val data = dataArray(0)
       val labels = data.getAs[Seq[Double]](0).toArray
       val pi = data.getAs[Seq[Double]](1).toArray
-      val theta = data.getAs[Seq[Seq[Double]]](2).map(_.toArray).toArray
+      val theta = data.getSeq[scala.collection.Seq[Double]](2).map(_.toArray).toArray
       new NaiveBayesModel(labels, pi, theta)
     }
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
index de3209c34bf07..cd71aac34c268 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
@@ -683,7 +683,7 @@ object PrefixSpanModel extends Loader[PrefixSpanModel[_]] {
 
     def loadImpl[Item: ClassTag](freqSequences: DataFrame, sample: Item): PrefixSpanModel[Item] = {
       val freqSequencesRDD = freqSequences.select("sequence", "freq").rdd.map { x =>
-        val sequence = x.getAs[Seq[Seq[Item]]](0).map(_.toArray).toArray
+        val sequence = x.getSeq[scala.collection.Seq[Item]](0).map(_.toArray).toArray
         val freq = x.getLong(1)
         new PrefixSpan.FreqSequence(sequence, freq)
       }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
index e5e82d19f1cbd..d79314b9637a8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
@@ -386,12 +386,12 @@ object MatrixFactorizationModel extends Loader[MatrixFactorizationModel] {
       assert(formatVersion == thisFormatVersion)
       val rank = (metadata \ "rank").extract[Int]
       val userFeatures = spark.read.parquet(userPath(path)).rdd.map {
-        case Row(id: Int, features: Seq[_]) =>
-          (id, features.asInstanceOf[Seq[Double]].toArray)
+        case Row(id: Int, features: scala.collection.Seq[_]) =>
+          (id, features.asInstanceOf[scala.collection.Seq[Double]].toArray)
       }
       val productFeatures = spark.read.parquet(productPath(path)).rdd.map {
-        case Row(id: Int, features: Seq[_]) =>
-          (id, features.asInstanceOf[Seq[Double]].toArray)
+        case Row(id: Int, features: scala.collection.Seq[_]) =>
+          (id, features.asInstanceOf[scala.collection.Seq[Double]].toArray)
       }
       new MatrixFactorizationModel(rank, userFeatures, productFeatures)
     }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
index 9983ca7dc5e87..cdc998000c2fc 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
@@ -164,7 +164,7 @@ object DecisionTreeModel extends Loader[DecisionTreeModel] with Logging {
       }
 
       def apply(r: Row): SplitData = {
-        SplitData(r.getInt(0), r.getDouble(1), r.getInt(2), r.getAs[Seq[Double]](3))
+        SplitData(r.getInt(0), r.getDouble(1), r.getInt(2), r.getSeq[Double](3))
       }
     }
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala
index d0898220b80de..e05d76cf70ed3 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala
@@ -199,11 +199,11 @@ class LDASuite extends MLTest with DefaultReadWriteTest {
     assert(topics.count() === k)
     assert(topics.select("topic").rdd.map(_.getInt(0)).collect().toSet === Range(0, k).toSet)
     topics.select("termIndices").collect().foreach { case r: Row =>
-      val termIndices = r.getAs[Seq[Int]](0)
+      val termIndices = r.getSeq[Int](0)
       assert(termIndices.length === 3 && termIndices.toSet.size === 3)
     }
     topics.select("termWeights").collect().foreach { case r: Row =>
-      val termWeights = r.getAs[Seq[Double]](0)
+      val termWeights = r.getSeq[Double](0)
       assert(termWeights.length === 3 && termWeights.forall(w => w >= 0.0 && w <= 1.0))
     }
   }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSHSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSHSuite.scala
index 9b823259b1deb..a7d320e8164b6 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSHSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSHSuite.scala
@@ -115,7 +115,7 @@ class BucketedRandomProjectionLSHSuite extends MLTest with DefaultReadWriteTest
     val brpModel = brp.fit(dataset)
 
     testTransformer[Tuple1[Vector]](dataset.toDF(), brpModel, "values") {
-      case Row(values: Seq[_]) =>
+      case Row(values: scala.collection.Seq[_]) =>
         assert(values.length === brp.getNumHashTables)
     }
   }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala
index 1d052fbebd92d..93564681994d7 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala
@@ -71,7 +71,8 @@ private[ml] object LSHTest {
       transformedData.schema, model.getOutputCol, DataTypes.createArrayType(new VectorUDT))
 
     // Check output column dimensions
-    val headHashValue = transformedData.select(outputCol).head().get(0).asInstanceOf[Seq[Vector]]
+    val headHashValue =
+      transformedData.select(outputCol).head().get(0).asInstanceOf[scala.collection.Seq[Vector]]
     assert(headHashValue.length == model.getNumHashTables)
 
     // Perform a cross join and label each pair of same_bucket and distance
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/MinHashLSHSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/MinHashLSHSuite.scala
index 1c2956cb82908..c99e0fa3f8623 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/MinHashLSHSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/MinHashLSHSuite.scala
@@ -182,7 +182,7 @@ class MinHashLSHSuite extends MLTest with DefaultReadWriteTest {
     val model = new MinHashLSHModel("mh", randCoefficients = Array((1, 0)))
     model.set(model.inputCol, "keys")
     testTransformer[Tuple1[Vector]](dataset.toDF(), model, "keys", model.getOutputCol) {
-      case Row(_: Vector, output: Seq[_]) =>
+      case Row(_: Vector, output: scala.collection.Seq[_]) =>
         assert(output.length === model.randCoefficients.length)
         // no AND-amplification yet: SPARK-18450, so each hash output is of length 1
         output.foreach {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/NGramSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/NGramSuite.scala
index 1483d5df4d224..bf276ceed2097 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/NGramSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/NGramSuite.scala
@@ -83,7 +83,7 @@ class NGramSuite extends MLTest with DefaultReadWriteTest {
 
   def testNGram(t: NGram, dataFrame: DataFrame): Unit = {
     testTransformer[(Seq[String], Seq[String])](dataFrame, t, "nGrams", "wantedNGrams") {
-      case Row(actualNGrams : Seq[_], wantedNGrams: Seq[_]) =>
+      case Row(actualNGrams : scala.collection.Seq[_], wantedNGrams: scala.collection.Seq[_]) =>
         assert(actualNGrams === wantedNGrams)
     }
   }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala
index dc6fb31a1f8e4..eaf91769a08dd 100755
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala
@@ -29,7 +29,7 @@ class StopWordsRemoverSuite extends MLTest with DefaultReadWriteTest {
 
   def testStopWordsRemover(t: StopWordsRemover, dataFrame: DataFrame): Unit = {
     testTransformer[(Array[String], Array[String])](dataFrame, t, "filtered", "expected") {
-       case Row(tokens: Seq[_], wantedTokens: Seq[_]) =>
+       case Row(tokens: scala.collection.Seq[_], wantedTokens: scala.collection.Seq[_]) =>
          assert(tokens === wantedTokens)
     }
   }
@@ -242,7 +242,8 @@ class StopWordsRemoverSuite extends MLTest with DefaultReadWriteTest {
     remover.transform(df)
       .select("filtered1", "expected1", "filtered2", "expected2")
       .collect().foreach {
-        case Row(r1: Seq[_], e1: Seq[_], r2: Seq[_], e2: Seq[_]) =>
+        case Row(r1: scala.collection.Seq[_], e1: scala.collection.Seq[_],
+          r2: scala.collection.Seq[_], e2: scala.collection.Seq[_]) =>
           assert(r1 === e1,
             s"The result value is not correct after bucketing. Expected $e1 but found $r1")
           assert(r2 === e2,
@@ -268,7 +269,8 @@ class StopWordsRemoverSuite extends MLTest with DefaultReadWriteTest {
     remover.transform(df)
       .select("filtered1", "expected1", "filtered2", "expected2")
       .collect().foreach {
-        case Row(r1: Seq[_], e1: Seq[_], r2: Seq[_], e2: Seq[_]) =>
+        case Row(r1: scala.collection.Seq[_], e1: scala.collection.Seq[_],
+          r2: scala.collection.Seq[_], e2: scala.collection.Seq[_]) =>
           assert(r1 === e1,
             s"The result value is not correct after bucketing. Expected $e1 but found $r1")
           assert(r2 === e2,
diff --git a/mllib/src/test/scala/org/apache/spark/ml/fpm/FPGrowthSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/fpm/FPGrowthSuite.scala
index d42ced0f8f91b..3d994366b8918 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/fpm/FPGrowthSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/fpm/FPGrowthSuite.scala
@@ -121,7 +121,7 @@ class FPGrowthSuite extends SparkFunSuite with MLlibTestSparkContext with Defaul
 
     val prediction = model.transform(
       spark.createDataFrame(Seq(Tuple1(Array("1", "2")))).toDF("items")
-    ).first().getAs[Seq[String]]("prediction")
+    ).first().getAs[scala.collection.Seq[String]]("prediction")
 
     assert(prediction === Seq("3"))
   }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
index 31dc6d379e76c..aeddb5ac7b13e 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
@@ -175,7 +175,7 @@ class RandomForestRegressorSuite extends MLTest with DefaultReadWriteTest{
     val testParams = Seq(
       (50, 5, 1.0, 0.75),
       (50, 10, 1.0, 0.75),
-      (50, 10, 0.95, 0.78)
+      (50, 10, 0.95, 0.75)
     )
 
     for ((numTrees, maxDepth, subsamplingRate, tol) <- testParams) {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/MLTestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/util/MLTestSuite.scala
index 20c5b5395f6a4..1732469ccf590 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/util/MLTestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/util/MLTestSuite.scala
@@ -47,7 +47,7 @@ class MLTestSuite extends MLTest {
     }
     intercept[Exception] {
       testTransformerOnStreamData[(Int, String)](data, indexerModel, "id", "indexed") {
-        rows: Seq[Row] =>
+        rows: scala.collection.Seq[Row] =>
           assert(rows.map(_.getDouble(1)).max === 1.0)
       }
     }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala
index a679fe43414f2..e4cd492be3d2e 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala
@@ -43,7 +43,8 @@ class Word2VecSuite extends SparkFunSuite with MLlibTestSparkContext {
     // and a Word2VecMap give the same values.
     val word2VecMap = model.getVectors
     val newModel = new Word2VecModel(word2VecMap)
-    assert(newModel.getVectors.mapValues(_.toSeq) === word2VecMap.mapValues(_.toSeq))
+    assert(newModel.getVectors.mapValues(_.toSeq).toMap ===
+      word2VecMap.mapValues(_.toSeq).toMap)
   }
 
   test("Word2Vec throws exception when vocabulary is empty") {
@@ -102,7 +103,8 @@ class Word2VecSuite extends SparkFunSuite with MLlibTestSparkContext {
     try {
       model.save(sc, path)
       val sameModel = Word2VecModel.load(sc, path)
-      assert(sameModel.getVectors.mapValues(_.toSeq) === model.getVectors.mapValues(_.toSeq))
+      assert(sameModel.getVectors.mapValues(_.toSeq).toMap ===
+        model.getVectors.mapValues(_.toSeq).toMap)
     } finally {
       Utils.deleteRecursively(tempDir)
     }
@@ -136,7 +138,8 @@ class Word2VecSuite extends SparkFunSuite with MLlibTestSparkContext {
     try {
       model.save(sc, path)
       val sameModel = Word2VecModel.load(sc, path)
-      assert(sameModel.getVectors.mapValues(_.toSeq) === model.getVectors.mapValues(_.toSeq))
+      assert(sameModel.getVectors.mapValues(_.toSeq).toMap ===
+        model.getVectors.mapValues(_.toSeq).toMap)
     }
     catch {
       case t: Throwable => fail("exception thrown persisting a model " +

From f41ba2a2f3b86e485aa0ca1c10a2efe9a7163fb3 Mon Sep 17 00:00:00 2001
From: "tanel.kiis@gmail.com" <tanel.kiis@gmail.com>
Date: Mon, 28 Sep 2020 12:22:15 +0900
Subject: [PATCH 0117/1009] [SPARK-32927][SQL] Bitwise OR, AND and XOR should
 have similar canonicalization rules to boolean OR and AND

### What changes were proposed in this pull request?

Add canonicalization rules for commutative bitwise operations.

### Why are the changes needed?

Canonical form is used in many other optimization rules. Reduces the number of cases, where plans with identical results are considered to be distinct.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

UT

Closes #29794 from tanelk/SPARK-32927.

Lead-authored-by: tanel.kiis@gmail.com <tanel.kiis@gmail.com>
Co-authored-by: Tanel Kiis <tanel.kiis@reach-u.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../catalyst/expressions/Canonicalize.scala   |  7 +++
 .../expressions/CanonicalizeSuite.scala       | 47 +++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
index a8031086d82f7..1ecf4372cfb58 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
@@ -80,6 +80,13 @@ object Canonicalize {
       orderCommutative(a, { case And(l, r) if l.deterministic && r.deterministic => Seq(l, r)})
         .reduce(And)
 
+    case o: BitwiseOr =>
+      orderCommutative(o, { case BitwiseOr(l, r) => Seq(l, r) }).reduce(BitwiseOr)
+    case a: BitwiseAnd =>
+      orderCommutative(a, { case BitwiseAnd(l, r) => Seq(l, r) }).reduce(BitwiseAnd)
+    case x: BitwiseXor =>
+      orderCommutative(x, { case BitwiseXor(l, r) => Seq(l, r) }).reduce(BitwiseXor)
+
     case EqualTo(l, r) if l.hashCode() > r.hashCode() => EqualTo(r, l)
     case EqualNullSafe(l, r) if l.hashCode() > r.hashCode() => EqualNullSafe(r, l)
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
index a043b4cbed1f1..bcbccd93e509f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
 import java.util.TimeZone
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.plans.logical.Range
 import org.apache.spark.sql.types.{IntegerType, LongType, StructField, StructType}
@@ -95,4 +96,50 @@ class CanonicalizeSuite extends SparkFunSuite {
     val castWithTimeZoneId = Cast(literal, LongType, Some(TimeZone.getDefault.getID))
     assert(castWithTimeZoneId.semanticEquals(cast))
   }
+
+  test("SPARK-32927: Bitwise operations are commutative") {
+    Seq(BitwiseOr(_, _), BitwiseAnd(_, _), BitwiseXor(_, _)).foreach { f =>
+      val e1 = f('a, f('b, 'c))
+      val e2 = f(f('a, 'b), 'c)
+      val e3 = f('a, f('b, 'a))
+
+      assert(e1.canonicalized == e2.canonicalized)
+      assert(e1.canonicalized != e3.canonicalized)
+    }
+  }
+
+  test("SPARK-32927: Bitwise operations are commutative for non-deterministic expressions") {
+    Seq(BitwiseOr(_, _), BitwiseAnd(_, _), BitwiseXor(_, _)).foreach { f =>
+      val e1 = f('a, f(rand(42), 'c))
+      val e2 = f(f('a, rand(42)), 'c)
+      val e3 = f('a, f(rand(42), 'a))
+
+      assert(e1.canonicalized == e2.canonicalized)
+      assert(e1.canonicalized != e3.canonicalized)
+    }
+  }
+
+  test("SPARK-32927: Bitwise operations are commutative for literal expressions") {
+    Seq(BitwiseOr(_, _), BitwiseAnd(_, _), BitwiseXor(_, _)).foreach { f =>
+      val e1 = f('a, f(42, 'c))
+      val e2 = f(f('a, 42), 'c)
+      val e3 = f('a, f(42, 'a))
+
+      assert(e1.canonicalized == e2.canonicalized)
+      assert(e1.canonicalized != e3.canonicalized)
+    }
+  }
+
+  test("SPARK-32927: Bitwise operations are commutative in a complex case") {
+    Seq(BitwiseOr(_, _), BitwiseAnd(_, _), BitwiseXor(_, _)).foreach { f1 =>
+      Seq(BitwiseOr(_, _), BitwiseAnd(_, _), BitwiseXor(_, _)).foreach { f2 =>
+        val e1 = f2(f1('a, f1('b, 'c)), 'a)
+        val e2 = f2(f1(f1('a, 'b), 'c), 'a)
+        val e3 = f2(f1('a, f1('b, 'a)), 'a)
+
+        assert(e1.canonicalized == e2.canonicalized)
+        assert(e1.canonicalized != e3.canonicalized)
+      }
+    }
+  }
 }

From a7f84a0b457ed3e1b854729f132e218a4ae48b21 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fabian=20H=C3=B6ring?= <f.horing@criteo.com>
Date: Mon, 28 Sep 2020 12:30:28 +0900
Subject: [PATCH 0118/1009] [SPARK-32187][PYTHON][DOCS] Doc on Python packaging
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

This PR proposes to document PySpark specific packaging guidelines.

### Why are the changes needed?

To have a single place for PySpark users, and better documentation.

### Does this PR introduce _any_ user-facing change?

Yes

### How was this patch tested?

```
cd python/docs
make clean html
```

Closes #29806 from fhoering/add_doc_python_packaging.

Lead-authored-by: Fabian Höring <f.horing@criteo.com>
Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/docs/source/user_guide/index.rst       |   1 +
 .../source/user_guide/python_packaging.rst    | 201 ++++++++++++++++++
 2 files changed, 202 insertions(+)
 create mode 100644 python/docs/source/user_guide/python_packaging.rst

diff --git a/python/docs/source/user_guide/index.rst b/python/docs/source/user_guide/index.rst
index c39feace05209..3e535ce16b22e 100644
--- a/python/docs/source/user_guide/index.rst
+++ b/python/docs/source/user_guide/index.rst
@@ -24,4 +24,5 @@ User Guide
     :maxdepth: 2
 
     arrow_pandas
+    python_packaging
 
diff --git a/python/docs/source/user_guide/python_packaging.rst b/python/docs/source/user_guide/python_packaging.rst
new file mode 100644
index 0000000000000..ef4d05a8eefea
--- /dev/null
+++ b/python/docs/source/user_guide/python_packaging.rst
@@ -0,0 +1,201 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+
+=========================
+3rd Party Python Packages
+=========================
+
+When you want to run your PySpark application on a cluster such as YARN, Kubernetes, Mesos, etc., you need to make
+sure that your code and all used libraries are available on the executors.
+
+As an example let's say you may want to run the `Pandas UDF's examples <arrow_pandas.rst#series-to-scalar>`_.
+As it uses pyarrow as an underlying implementation we need to make sure to have pyarrow installed on each executor
+on the cluster. Otherwise you may get errors such as ``ModuleNotFoundError: No module named 'pyarrow'``.
+
+Here is the script ``app.py`` from the previous example that will be executed on the cluster:
+
+.. code-block:: python
+
+    import pandas as pd
+    from pyspark.sql.functions import pandas_udf
+    from pyspark.sql import SparkSession
+
+    def main(spark):
+        df = spark.createDataFrame(
+            [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)],
+            ("id", "v"))
+
+        @pandas_udf("double")
+        def mean_udf(v: pd.Series) -> float:
+            return v.mean()
+
+        print(df.groupby("id").agg(mean_udf(df['v'])).collect())
+
+
+    if __name__ == "__main__":
+        main(SparkSession.builder.getOrCreate())
+
+
+There are multiple ways to ship the dependencies to the cluster:
+
+- Using PySpark Native Features
+- Using Zipped Virtual Environment
+- Using PEX
+
+
+Using PySpark Native Features
+-----------------------------
+
+PySpark allows to upload Python files (``.py``), zipped Python packages (``.zip``), and Egg files (``.egg``)
+to the executors by setting the configuration setting ``spark.submit.pyFiles`` or by directly
+calling :meth:`pyspark.SparkContext.addPyFile`.
+
+This is an easy way to ship additional custom Python code to the cluster. You can just add individual files or zip whole
+packages and upload them. Using :meth:`pyspark.SparkContext.addPyFile` allows to upload code
+even after having started your job.
+
+Note that it doesn't allow to add packages built as `Wheels <https://www.python.org/dev/peps/pep-0427/>`_ and therefore doesn't
+allow to include dependencies with native code.
+
+
+Using Zipped Virtual Environment
+--------------------------------
+
+The idea of zipped environments is to zip your whole `virtual environment <https://docs.python.org/3/tutorial/venv.html>`_, 
+ship it to the cluster, unzip it remotely and target the Python interpreter from inside this zipped environment. Note that this
+is currently supported *only for YARN*.
+
+Zip Virtual Environment
+~~~~~~~~~~~~~~~~~~~~~~~
+
+You can zip the virtual environment on your own or use tools for doing this:
+
+* `conda-pack <https://conda.github.io/conda-pack/spark.html>`_ for conda environments
+* `venv-pack <https://jcristharif.com/venv-pack/spark.html>`_ for virtual environments
+
+Example with `conda-pack`:
+
+.. code-block:: bash
+
+    conda create -y -n conda_env -c conda-forge \
+      pyspark==3.0.1 pyarrow==0.15.1 pandas==0.25.3 conda-pack==0.4.0
+    conda activate conda_env
+    conda pack -f -o conda_env.tar.gz
+
+Upload to Spark Executors
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Unzipping will be done by Spark when using target ``--archives`` option in spark-submit
+or setting ``spark.yarn.dist.archives`` configuration.
+
+Example with ``spark-submit``:
+
+.. code-block:: bash
+
+    export PYSPARK_DRIVER_PYTHON=python
+    export PYSPARK_PYTHON=./environment/bin/python
+    spark-submit --master=yarn --deploy-mode client \
+      --archives conda_env.tar.gz#environment app.py
+
+Example using ``SparkSession.builder``:
+
+.. code-block:: python
+
+    import os
+    from pyspark.sql import SparkSession
+    from app import main
+
+    os.environ['PYSPARK_PYTHON'] = "./environment/bin/python"
+    builder = SparkSession.builder.master("yarn").config(
+        "spark.yarn.dist.archives", "conda_env.tar.gz#environment")
+    spark = builder.getOrCreate()
+    main(spark)
+
+
+Using PEX
+---------
+
+`PEX <https://github.com/pantsbuild/pex>`_ is a library for generating ``.pex`` (Python EXecutable) files.
+A PEX file is a self-contained executable Python environment. It can be seen as the Python equivalent of Java uber-JARs (a.k.a. fat JARs).
+
+You need to build the PEX file somewhere with all your requirements and then upload it to each Spark executor.
+
+Using CLI to Build PEX file
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: bash
+
+    pex pyspark==3.0.1 pyarrow==0.15.1 pandas==0.25.3 -o myarchive.pex
+
+
+Invoking the PEX file will by default invoke the Python interpreter. pyarrow, pandas and pyspark will be included in the PEX file.
+
+.. code-block:: bash
+
+    ./myarchive.pex
+    Python 3.6.6 (default, Jan 26 2019, 16:53:05)
+    (InteractiveConsole)
+    >>> import pyarrow
+    >>> import pandas
+    >>> import pyspark
+    >>>
+
+This can also be done directly with the Python API. For more information on how to build PEX files,
+please refer to `Building .pex files <https://pex.readthedocs.io/en/stable/buildingpex.html>`_
+
+Upload to Spark Executors
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The upload can be done by setting ``--files`` option in spark-submit or setting ``spark.files`` configuration (``spark.yarn.dist.files`` on YARN) 
+and changing the ``PYSPARK_PYTHON`` environment variable to change the Python interpreter to the PEX executable on each executor.
+
+..
+   TODO: we should also document the way on other cluster modes.
+
+Example with ``spark-submit`` on YARN:
+
+.. code-block:: bash
+
+    export PYSPARK_DRIVER_PYTHON=python
+    export PYSPARK_PYTHON=./myarchive.pex
+    spark-submit --master=yarn --deploy-mode client --files myarchive.pex app.py
+
+Example using ``SparkSession.builder`` on YARN:
+
+.. code-block:: python
+
+    import os
+    from pyspark.sql import SparkSession
+    from app import main
+
+    os.environ['PYSPARK_PYTHON']="./myarchive.pex"
+    builder = SparkSession.builder
+    builder.master("yarn") \
+         .config("spark.submit.deployMode", "client") \
+         .config("spark.yarn.dist.files", "myarchive.pex")
+    spark = builder.getOrCreate()
+    main(spark)
+
+Notes
+~~~~~
+
+* The Python interpreter that has been used to generate the PEX file must be available on each executor. PEX doesn't include the Python interpreter.
+
+* In YARN cluster mode you may also need to set ``PYSPARK_PYTHON`` environment variable on the AppMaster ``--conf spark.yarn.appMasterEnv.PYSPARK_PYTHON=./myarchive.pex``.
+
+* An end-to-end Docker example for deploying a standalone PySpark with ``SparkSession.builder`` and PEX can be found `here <https://github.com/criteo/cluster-pack/blob/master/examples/spark-with-S3/README.md>`_ - it uses cluster-pack, a library on top of PEX that automatizes the the intermediate step of having to create & upload the PEX manually.

From d15f504a5e8bd8acfb6dc1ee138f7d92ff211396 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan.opensource@gmail.com>
Date: Mon, 28 Sep 2020 14:57:59 +0900
Subject: [PATCH 0119/1009] [SPARK-33011][ML] Promote the stability annotation
 to Evolving for MLEvent traits/classes

### What changes were proposed in this pull request?

This PR proposes to promote the stability annotation to `Evolving` for MLEvent traits/classes.

### Why are the changes needed?

The feature is released in Spark 3.0.0 having SPARK-26818 as the last change in Feb. 2020, and haven't changed in Spark 3.0.1. (There's no change more than a half of year.)

While we'd better to wait for some minor releases to consider the API as stable, it would worth to promote to Evolving so that we clearly state that we support the API.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Just changed the annotation, no tests required.

Closes #29887 from HeartSaVioR/SPARK-33011.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../scala/org/apache/spark/ml/events.scala    | 22 +++++++++----------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/events.scala b/mllib/src/main/scala/org/apache/spark/ml/events.scala
index dc4be4dd9efda..f221183369dfd 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/events.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/events.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml
 import com.fasterxml.jackson.annotation.JsonIgnore
 
 import org.apache.spark.SparkContext
-import org.apache.spark.annotation.Unstable
+import org.apache.spark.annotation.Evolving
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml.util.{MLReader, MLWriter}
 import org.apache.spark.scheduler.SparkListenerEvent
@@ -31,10 +31,8 @@ import org.apache.spark.sql.{DataFrame, Dataset}
  * after each operation (the event should document this).
  *
  * @note This is supported via [[Pipeline]] and [[PipelineModel]].
- * @note This is experimental and unstable. Do not use this unless you fully
- *   understand what `Unstable` means.
  */
-@Unstable
+@Evolving
 sealed trait MLEvent extends SparkListenerEvent {
   // Do not log ML events in event log. It should be revisited to see
   // how it works with history server.
@@ -44,7 +42,7 @@ sealed trait MLEvent extends SparkListenerEvent {
 /**
  * Event fired before `Transformer.transform`.
  */
-@Unstable
+@Evolving
 case class TransformStart() extends MLEvent {
   @JsonIgnore var transformer: Transformer = _
   @JsonIgnore var input: Dataset[_] = _
@@ -53,7 +51,7 @@ case class TransformStart() extends MLEvent {
 /**
  * Event fired after `Transformer.transform`.
  */
-@Unstable
+@Evolving
 case class TransformEnd() extends MLEvent {
   @JsonIgnore var transformer: Transformer = _
   @JsonIgnore var output: Dataset[_] = _
@@ -62,7 +60,7 @@ case class TransformEnd() extends MLEvent {
 /**
  * Event fired before `Estimator.fit`.
  */
-@Unstable
+@Evolving
 case class FitStart[M <: Model[M]]() extends MLEvent {
   @JsonIgnore var estimator: Estimator[M] = _
   @JsonIgnore var dataset: Dataset[_] = _
@@ -71,7 +69,7 @@ case class FitStart[M <: Model[M]]() extends MLEvent {
 /**
  * Event fired after `Estimator.fit`.
  */
-@Unstable
+@Evolving
 case class FitEnd[M <: Model[M]]() extends MLEvent {
   @JsonIgnore var estimator: Estimator[M] = _
   @JsonIgnore var model: M = _
@@ -80,7 +78,7 @@ case class FitEnd[M <: Model[M]]() extends MLEvent {
 /**
  * Event fired before `MLReader.load`.
  */
-@Unstable
+@Evolving
 case class LoadInstanceStart[T](path: String) extends MLEvent {
   @JsonIgnore var reader: MLReader[T] = _
 }
@@ -88,7 +86,7 @@ case class LoadInstanceStart[T](path: String) extends MLEvent {
 /**
  * Event fired after `MLReader.load`.
  */
-@Unstable
+@Evolving
 case class LoadInstanceEnd[T]() extends MLEvent {
   @JsonIgnore var reader: MLReader[T] = _
   @JsonIgnore var instance: T = _
@@ -97,7 +95,7 @@ case class LoadInstanceEnd[T]() extends MLEvent {
 /**
  * Event fired before `MLWriter.save`.
  */
-@Unstable
+@Evolving
 case class SaveInstanceStart(path: String) extends MLEvent {
   @JsonIgnore var writer: MLWriter = _
 }
@@ -105,7 +103,7 @@ case class SaveInstanceStart(path: String) extends MLEvent {
 /**
  * Event fired after `MLWriter.save`.
  */
-@Unstable
+@Evolving
 case class SaveInstanceEnd(path: String) extends MLEvent {
   @JsonIgnore var writer: MLWriter = _
 }

From 173da5bf11daecbd428add1a5e0aedd58a66fadb Mon Sep 17 00:00:00 2001
From: Shruti Gumma <shruti_gumma@apple.com>
Date: Mon, 28 Sep 2020 10:07:36 -0700
Subject: [PATCH 0120/1009] [SPARK-32996][WEB-UI] Handle empty ExecutorMetrics
 in ExecutorMetricsJsonSerializer

### What changes were proposed in this pull request?
When `peakMemoryMetrics` in `ExecutorSummary` is `Option.empty`, then the `ExecutorMetricsJsonSerializer#serialize` method does not execute the `jsonGenerator.writeObject` method. This causes the json to be generated with `peakMemoryMetrics` key added to the serialized string, but no corresponding value.
This causes an error to be thrown when it is the next key `attributes` turn to be added to the json:
`com.fasterxml.jackson.core.JsonGenerationException: Can not write a field name, expecting a value
`

### Why are the changes needed?
At the start of the Spark job, if `peakMemoryMetrics` is `Option.empty`, then it causes
a `com.fasterxml.jackson.core.JsonGenerationException` to be thrown when we navigate to the Executors tab in Spark UI.
Complete stacktrace:

> com.fasterxml.jackson.core.JsonGenerationException: Can not write a field name, expecting a value
> 	at com.fasterxml.jackson.core.JsonGenerator._reportError(JsonGenerator.java:2080)
> 	at com.fasterxml.jackson.core.json.WriterBasedJsonGenerator.writeFieldName(WriterBasedJsonGenerator.java:161)
> 	at com.fasterxml.jackson.databind.ser.BeanPropertyWriter.serializeAsField(BeanPropertyWriter.java:725)
> 	at com.fasterxml.jackson.databind.ser.std.BeanSerializerBase.serializeFields(BeanSerializerBase.java:721)
> 	at com.fasterxml.jackson.databind.ser.BeanSerializer.serialize(BeanSerializer.java:166)
> 	at com.fasterxml.jackson.databind.ser.std.CollectionSerializer.serializeContents(CollectionSerializer.java:145)
> 	at com.fasterxml.jackson.module.scala.ser.IterableSerializer.serializeContents(IterableSerializerModule.scala:26)
> 	at com.fasterxml.jackson.module.scala.ser.IterableSerializer.serializeContents$(IterableSerializerModule.scala:25)
> 	at com.fasterxml.jackson.module.scala.ser.UnresolvedIterableSerializer.serializeContents(IterableSerializerModule.scala:54)
> 	at com.fasterxml.jackson.module.scala.ser.UnresolvedIterableSerializer.serializeContents(IterableSerializerModule.scala:54)
> 	at com.fasterxml.jackson.databind.ser.std.AsArraySerializerBase.serialize(AsArraySerializerBase.java:250)
> 	at com.fasterxml.jackson.databind.ser.DefaultSerializerProvider._serialize(DefaultSerializerProvider.java:480)
> 	at com.fasterxml.jackson.databind.ser.DefaultSerializerProvider.serializeValue(DefaultSerializerProvider.java:319)
> 	at com.fasterxml.jackson.databind.ObjectMapper._configAndWriteValue(ObjectMapper.java:4094)
> 	at com.fasterxml.jackson.databind.ObjectMapper.writeValueAsString(ObjectMapper.java:3404)
> 	at org.apache.spark.ui.exec.ExecutorsPage.allExecutorsDataScript$1(ExecutorsTab.scala:64)
> 	at org.apache.spark.ui.exec.ExecutorsPage.render(ExecutorsTab.scala:76)
> 	at org.apache.spark.ui.WebUI.$anonfun$attachPage$1(WebUI.scala:89)
> 	at org.apache.spark.ui.JettyUtils$$anon$1.doGet(JettyUtils.scala:80)
> 	at javax.servlet.http.HttpServlet.service(HttpServlet.java:687)
> 	at javax.servlet.http.HttpServlet.service(HttpServlet.java:790)
> 	at org.sparkproject.jetty.servlet.ServletHolder.handle(ServletHolder.java:873)
> 	at org.sparkproject.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1623)
> 	at org.apache.spark.ui.HttpSecurityFilter.doFilter(HttpSecurityFilter.scala:95)
> 	at org.sparkproject.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1610)
> 	at org.sparkproject.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:540)
> 	at org.sparkproject.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:255)
> 	at org.sparkproject.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1345)
> 	at org.sparkproject.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:203)
> 	at org.sparkproject.jetty.servlet.ServletHandler.doScope(ServletHandler.java:480)
> 	at org.sparkproject.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:201)
> 	at org.sparkproject.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1247)
> 	at org.sparkproject.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:144)
> 	at org.sparkproject.jetty.server.handler.gzip.GzipHandler.handle(GzipHandler.java:753)
> 	at org.sparkproject.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:220)
> 	at org.sparkproject.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
> 	at org.sparkproject.jetty.server.Server.handle(Server.java:505)
> 	at org.sparkproject.jetty.server.HttpChannel.handle(HttpChannel.java:370)
> 	at org.sparkproject.jetty.server.HttpConnection.onFillable(HttpConnection.java:267)
> 	at org.sparkproject.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:305)
> 	at org.sparkproject.jetty.io.FillInterest.fillable(FillInterest.java:103)
> 	at org.sparkproject.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:117)
> 	at org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:333)
> 	at org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:310)
> 	at org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:168)
> 	at org.sparkproject.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:126)
> 	at org.sparkproject.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:366)
> 	at org.sparkproject.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:698)
> 	at org.sparkproject.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:804)
> 	at java.base/java.lang.Thread.run(Thread.java:834)

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Unit test

Closes #29872 from shrutig/SPARK-32996.

Authored-by: Shruti Gumma <shruti_gumma@apple.com>
Signed-off-by: Liang-Chi Hsieh <viirya@gmail.com>
---
 .../org/apache/spark/status/api/v1/api.scala  | 16 ++++--
 .../status/api/v1/ExecutorSummarySuite.scala  | 51 +++++++++++++++++++
 2 files changed, 63 insertions(+), 4 deletions(-)
 create mode 100644 core/src/test/java/org/apache/spark/status/api/v1/ExecutorSummarySuite.scala

diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
index d207a6023f7f9..5a8cf09e1cba6 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
@@ -136,6 +136,10 @@ private[spark] class ExecutorMetricsJsonDeserializer
       new TypeReference[Option[Map[String, java.lang.Long]]] {})
     metricsMap.map(metrics => new ExecutorMetrics(metrics))
   }
+
+  override def getNullValue(ctxt: DeserializationContext): Option[ExecutorMetrics] = {
+    None
+  }
 }
 /** serializer for peakMemoryMetrics: convert ExecutorMetrics to map with metric name as key */
 private[spark] class ExecutorMetricsJsonSerializer
@@ -144,11 +148,15 @@ private[spark] class ExecutorMetricsJsonSerializer
       metrics: Option[ExecutorMetrics],
       jsonGenerator: JsonGenerator,
       serializerProvider: SerializerProvider): Unit = {
-    metrics.foreach { m: ExecutorMetrics =>
-      val metricsMap = ExecutorMetricType.metricToOffset.map { case (metric, _) =>
-        metric -> m.getMetricValue(metric)
+    if (metrics.isEmpty) {
+      jsonGenerator.writeNull()
+    } else {
+      metrics.foreach { m: ExecutorMetrics =>
+        val metricsMap = ExecutorMetricType.metricToOffset.map { case (metric, _) =>
+          metric -> m.getMetricValue(metric)
+        }
+        jsonGenerator.writeObject(metricsMap)
       }
-      jsonGenerator.writeObject(metricsMap)
     }
   }
 
diff --git a/core/src/test/java/org/apache/spark/status/api/v1/ExecutorSummarySuite.scala b/core/src/test/java/org/apache/spark/status/api/v1/ExecutorSummarySuite.scala
new file mode 100644
index 0000000000000..286911bdfc19a
--- /dev/null
+++ b/core/src/test/java/org/apache/spark/status/api/v1/ExecutorSummarySuite.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import java.util.Date
+
+import com.fasterxml.jackson.core.`type`.TypeReference
+import com.fasterxml.jackson.databind.ObjectMapper
+import com.fasterxml.jackson.module.scala.DefaultScalaModule
+
+import org.apache.spark.SparkFunSuite
+
+class ExecutorSummarySuite extends SparkFunSuite {
+
+  test("Check ExecutorSummary serialize and deserialize with empty peakMemoryMetrics") {
+    val mapper = new ObjectMapper().registerModule(DefaultScalaModule)
+    val executorSummary = new ExecutorSummary("id", "host:port", true, 1,
+      10, 10, 1, 1, 1,
+      0, 0, 1, 100,
+      1, 100, 100,
+      10, false, 20, new Date(1600984336352L),
+      Option.empty, Option.empty, Map(), Option.empty, Set(), Option.empty, Map(), Map(), 1)
+    val expectedJson = "{\"id\":\"id\",\"hostPort\":\"host:port\",\"isActive\":true," +
+      "\"rddBlocks\":1,\"memoryUsed\":10,\"diskUsed\":10,\"totalCores\":1,\"maxTasks\":1," +
+      "\"activeTasks\":1,\"failedTasks\":0,\"completedTasks\":0,\"totalTasks\":1," +
+      "\"totalDuration\":100,\"totalGCTime\":1,\"totalInputBytes\":100," +
+      "\"totalShuffleRead\":100,\"totalShuffleWrite\":10,\"isBlacklisted\":false," +
+      "\"maxMemory\":20,\"addTime\":1600984336352,\"removeTime\":null,\"removeReason\":null," +
+      "\"executorLogs\":{},\"memoryMetrics\":null,\"blacklistedInStages\":[]," +
+      "\"peakMemoryMetrics\":null,\"attributes\":{},\"resources\":{},\"resourceProfileId\":1}"
+    val json = mapper.writeValueAsString(executorSummary)
+    assert(expectedJson.equals(json))
+    val deserializeExecutorSummary = mapper.readValue(json, new TypeReference[ExecutorSummary] {})
+    assert(deserializeExecutorSummary.peakMemoryMetrics == None)
+  }
+
+}

From a53fc9b7ae2b96b302d72170db6572b337ec9894 Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Tue, 29 Sep 2020 09:54:43 +0900
Subject: [PATCH 0121/1009] [SPARK-27951][SQL][FOLLOWUP] Improve the window
 function nth_value

### What changes were proposed in this pull request?
https://github.com/apache/spark/pull/29604 supports the ANSI SQL NTH_VALUE.
We should override the `prettyName` and `sql`.

### Why are the changes needed?
Make the name of nth_value correct.
To show the ignoreNulls parameter correctly.

### Does this PR introduce _any_ user-facing change?
'No'.

### How was this patch tested?
Jenkins test.

Closes #29886 from beliefer/improve-nth_value.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: beliefer <beliefer@163.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../spark/sql/catalyst/expressions/windowExpressions.scala    | 4 +++-
 .../src/test/resources/sql-functions/sql-expression-schema.md | 2 +-
 .../sql-tests/results/postgreSQL/window_part3.sql.out         | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index 8e3702c157a3c..0e15ff2904306 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -669,7 +669,9 @@ case class NthValue(input: Expression, offsetExpr: Expression, ignoreNulls: Bool
 
   override lazy val evaluateExpression: AttributeReference = result
 
-  override def toString: String = s"$prettyName($input, $offset)${if (ignoreNulls) " ignore nulls"}"
+  override def prettyName: String = "nth_value"
+  override def sql: String =
+    s"$prettyName(${input.sql}, ${offsetExpr.sql})${if (ignoreNulls) " ignore nulls" else ""}"
 }
 
 /**
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 742a2ffee83f7..473204c182a69 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -191,7 +191,7 @@
 | org.apache.spark.sql.catalyst.expressions.Not | ! | SELECT ! true | struct<(NOT true):boolean> |
 | org.apache.spark.sql.catalyst.expressions.Not | not | SELECT not true | struct<(NOT true):boolean> |
 | org.apache.spark.sql.catalyst.expressions.Now | now | SELECT now() | struct<now():timestamp> |
-| org.apache.spark.sql.catalyst.expressions.NthValue | nth_value | SELECT a, b, nth_value(b, 2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,nthvalue(b, 2) OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int> |
+| org.apache.spark.sql.catalyst.expressions.NthValue | nth_value | SELECT a, b, nth_value(b, 2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,nth_value(b, 2) OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int> |
 | org.apache.spark.sql.catalyst.expressions.NullIf | nullif | SELECT nullif(2, 2) | struct<nullif(2, 2):int> |
 | org.apache.spark.sql.catalyst.expressions.Nvl | nvl | SELECT nvl(NULL, array('2')) | struct<nvl(NULL, array(2)):array<string>> |
 | org.apache.spark.sql.catalyst.expressions.Nvl2 | nvl2 | SELECT nvl2(NULL, 2, 1) | struct<nvl2(NULL, 2, 1):int> |
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out
index b63b5601715a8..553432e503d5c 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out
@@ -391,7 +391,7 @@ SELECT nth_value(four, 0) OVER (ORDER BY ten), ten, four FROM tenk1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'nthvalue(spark_catalog.default.tenk1.`four`, 0)' due to data type mismatch: The 'offset' argument of nth_value must be greater than zero but it is 0.; line 1 pos 7
+cannot resolve 'nth_value(spark_catalog.default.tenk1.`four`, 0)' due to data type mismatch: The 'offset' argument of nth_value must be greater than zero but it is 0.; line 1 pos 7
 
 
 -- !query

From 376ede130149e0fa2029da423f8d9c654b096921 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Mon, 28 Sep 2020 21:54:00 -0700
Subject: [PATCH 0122/1009] [SPARK-33021][PYTHON][TESTS] Move functions related
 test cases into test_functions.py

### What changes were proposed in this pull request?

Move functions related test cases from `test_context.py` to `test_functions.py`.

### Why are the changes needed?

To group the similar test cases.

### Does this PR introduce _any_ user-facing change?

Nope, test-only.

### How was this patch tested?

Jenkins and GitHub Actions should test.

Closes #29898 from HyukjinKwon/SPARK-33021.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 python/pyspark/sql/tests/test_context.py   | 101 --------------------
 python/pyspark/sql/tests/test_functions.py | 102 ++++++++++++++++++++-
 2 files changed, 101 insertions(+), 102 deletions(-)

diff --git a/python/pyspark/sql/tests/test_context.py b/python/pyspark/sql/tests/test_context.py
index d506908b784db..ff1db31072df9 100644
--- a/python/pyspark/sql/tests/test_context.py
+++ b/python/pyspark/sql/tests/test_context.py
@@ -26,7 +26,6 @@
 from pyspark import SparkContext, SQLContext
 from pyspark.sql import Row, SparkSession
 from pyspark.sql.types import StructType, StringType, StructField
-from pyspark.sql.window import Window
 from pyspark.testing.utils import ReusedPySparkTestCase
 
 
@@ -108,99 +107,6 @@ def test_save_and_load_table(self):
 
         shutil.rmtree(tmpPath)
 
-    def test_window_functions(self):
-        df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"])
-        w = Window.partitionBy("value").orderBy("key")
-        from pyspark.sql import functions as F
-        sel = df.select(df.value, df.key,
-                        F.max("key").over(w.rowsBetween(0, 1)),
-                        F.min("key").over(w.rowsBetween(0, 1)),
-                        F.count("key").over(w.rowsBetween(float('-inf'), float('inf'))),
-                        F.row_number().over(w),
-                        F.rank().over(w),
-                        F.dense_rank().over(w),
-                        F.ntile(2).over(w))
-        rs = sorted(sel.collect())
-        expected = [
-            ("1", 1, 1, 1, 1, 1, 1, 1, 1),
-            ("2", 1, 1, 1, 3, 1, 1, 1, 1),
-            ("2", 1, 2, 1, 3, 2, 1, 1, 1),
-            ("2", 2, 2, 2, 3, 3, 3, 2, 2)
-        ]
-        for r, ex in zip(rs, expected):
-            self.assertEqual(tuple(r), ex[:len(r)])
-
-    def test_window_functions_without_partitionBy(self):
-        df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"])
-        w = Window.orderBy("key", df.value)
-        from pyspark.sql import functions as F
-        sel = df.select(df.value, df.key,
-                        F.max("key").over(w.rowsBetween(0, 1)),
-                        F.min("key").over(w.rowsBetween(0, 1)),
-                        F.count("key").over(w.rowsBetween(float('-inf'), float('inf'))),
-                        F.row_number().over(w),
-                        F.rank().over(w),
-                        F.dense_rank().over(w),
-                        F.ntile(2).over(w))
-        rs = sorted(sel.collect())
-        expected = [
-            ("1", 1, 1, 1, 4, 1, 1, 1, 1),
-            ("2", 1, 1, 1, 4, 2, 2, 2, 1),
-            ("2", 1, 2, 1, 4, 3, 2, 2, 2),
-            ("2", 2, 2, 2, 4, 4, 4, 3, 2)
-        ]
-        for r, ex in zip(rs, expected):
-            self.assertEqual(tuple(r), ex[:len(r)])
-
-    def test_window_functions_cumulative_sum(self):
-        df = self.spark.createDataFrame([("one", 1), ("two", 2)], ["key", "value"])
-        from pyspark.sql import functions as F
-
-        # Test cumulative sum
-        sel = df.select(
-            df.key,
-            F.sum(df.value).over(Window.rowsBetween(Window.unboundedPreceding, 0)))
-        rs = sorted(sel.collect())
-        expected = [("one", 1), ("two", 3)]
-        for r, ex in zip(rs, expected):
-            self.assertEqual(tuple(r), ex[:len(r)])
-
-        # Test boundary values less than JVM's Long.MinValue and make sure we don't overflow
-        sel = df.select(
-            df.key,
-            F.sum(df.value).over(Window.rowsBetween(Window.unboundedPreceding - 1, 0)))
-        rs = sorted(sel.collect())
-        expected = [("one", 1), ("two", 3)]
-        for r, ex in zip(rs, expected):
-            self.assertEqual(tuple(r), ex[:len(r)])
-
-        # Test boundary values greater than JVM's Long.MaxValue and make sure we don't overflow
-        frame_end = Window.unboundedFollowing + 1
-        sel = df.select(
-            df.key,
-            F.sum(df.value).over(Window.rowsBetween(Window.currentRow, frame_end)))
-        rs = sorted(sel.collect())
-        expected = [("one", 3), ("two", 2)]
-        for r, ex in zip(rs, expected):
-            self.assertEqual(tuple(r), ex[:len(r)])
-
-    def test_collect_functions(self):
-        df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"])
-        from pyspark.sql import functions
-
-        self.assertEqual(
-            sorted(df.select(functions.collect_set(df.key).alias('r')).collect()[0].r),
-            [1, 2])
-        self.assertEqual(
-            sorted(df.select(functions.collect_list(df.key).alias('r')).collect()[0].r),
-            [1, 1, 1, 2])
-        self.assertEqual(
-            sorted(df.select(functions.collect_set(df.value).alias('r')).collect()[0].r),
-            ["1", "2"])
-        self.assertEqual(
-            sorted(df.select(functions.collect_list(df.value).alias('r')).collect()[0].r),
-            ["1", "2", "2", "2"])
-
     def test_limit_and_take(self):
         df = self.spark.range(1, 1000, numPartitions=10)
 
@@ -219,13 +125,6 @@ def assert_runs_only_one_job_stage_and_task(job_group_name, f):
         # Regression test for SPARK-17514: limit(n).collect() should the perform same as take(n)
         assert_runs_only_one_job_stage_and_task("collect_limit", lambda: df.limit(1).collect())
 
-    def test_datetime_functions(self):
-        from pyspark.sql import functions
-        from datetime import date
-        df = self.spark.range(1).selectExpr("'2017-01-22' as dateCol")
-        parse_result = df.select(functions.to_date(functions.col("dateCol"))).first()
-        self.assertEquals(date(2017, 1, 22), parse_result['to_date(dateCol)'])
-
     def test_unbounded_frames(self):
         from pyspark.sql import functions as F
         from pyspark.sql import window
diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py
index 5638cad51b755..fdc5e247043de 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -19,7 +19,7 @@
 from itertools import chain
 import re
 
-from pyspark.sql import Row
+from pyspark.sql import Row, Window
 from pyspark.sql.functions import udf, input_file_name, col, percentile_approx, lit
 from pyspark.testing.sqlutils import ReusedSQLTestCase
 
@@ -390,6 +390,106 @@ def test_higher_order_function_failures(self):
         with self.assertRaises(ValueError):
             transform(col("foo"), lambda x: 1)
 
+    def test_window_functions(self):
+        df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"])
+        w = Window.partitionBy("value").orderBy("key")
+        from pyspark.sql import functions as F
+        sel = df.select(df.value, df.key,
+                        F.max("key").over(w.rowsBetween(0, 1)),
+                        F.min("key").over(w.rowsBetween(0, 1)),
+                        F.count("key").over(w.rowsBetween(float('-inf'), float('inf'))),
+                        F.row_number().over(w),
+                        F.rank().over(w),
+                        F.dense_rank().over(w),
+                        F.ntile(2).over(w))
+        rs = sorted(sel.collect())
+        expected = [
+            ("1", 1, 1, 1, 1, 1, 1, 1, 1),
+            ("2", 1, 1, 1, 3, 1, 1, 1, 1),
+            ("2", 1, 2, 1, 3, 2, 1, 1, 1),
+            ("2", 2, 2, 2, 3, 3, 3, 2, 2)
+        ]
+        for r, ex in zip(rs, expected):
+            self.assertEqual(tuple(r), ex[:len(r)])
+
+    def test_window_functions_without_partitionBy(self):
+        df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"])
+        w = Window.orderBy("key", df.value)
+        from pyspark.sql import functions as F
+        sel = df.select(df.value, df.key,
+                        F.max("key").over(w.rowsBetween(0, 1)),
+                        F.min("key").over(w.rowsBetween(0, 1)),
+                        F.count("key").over(w.rowsBetween(float('-inf'), float('inf'))),
+                        F.row_number().over(w),
+                        F.rank().over(w),
+                        F.dense_rank().over(w),
+                        F.ntile(2).over(w))
+        rs = sorted(sel.collect())
+        expected = [
+            ("1", 1, 1, 1, 4, 1, 1, 1, 1),
+            ("2", 1, 1, 1, 4, 2, 2, 2, 1),
+            ("2", 1, 2, 1, 4, 3, 2, 2, 2),
+            ("2", 2, 2, 2, 4, 4, 4, 3, 2)
+        ]
+        for r, ex in zip(rs, expected):
+            self.assertEqual(tuple(r), ex[:len(r)])
+
+    def test_window_functions_cumulative_sum(self):
+        df = self.spark.createDataFrame([("one", 1), ("two", 2)], ["key", "value"])
+        from pyspark.sql import functions as F
+
+        # Test cumulative sum
+        sel = df.select(
+            df.key,
+            F.sum(df.value).over(Window.rowsBetween(Window.unboundedPreceding, 0)))
+        rs = sorted(sel.collect())
+        expected = [("one", 1), ("two", 3)]
+        for r, ex in zip(rs, expected):
+            self.assertEqual(tuple(r), ex[:len(r)])
+
+        # Test boundary values less than JVM's Long.MinValue and make sure we don't overflow
+        sel = df.select(
+            df.key,
+            F.sum(df.value).over(Window.rowsBetween(Window.unboundedPreceding - 1, 0)))
+        rs = sorted(sel.collect())
+        expected = [("one", 1), ("two", 3)]
+        for r, ex in zip(rs, expected):
+            self.assertEqual(tuple(r), ex[:len(r)])
+
+        # Test boundary values greater than JVM's Long.MaxValue and make sure we don't overflow
+        frame_end = Window.unboundedFollowing + 1
+        sel = df.select(
+            df.key,
+            F.sum(df.value).over(Window.rowsBetween(Window.currentRow, frame_end)))
+        rs = sorted(sel.collect())
+        expected = [("one", 3), ("two", 2)]
+        for r, ex in zip(rs, expected):
+            self.assertEqual(tuple(r), ex[:len(r)])
+
+    def test_collect_functions(self):
+        df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"])
+        from pyspark.sql import functions
+
+        self.assertEqual(
+            sorted(df.select(functions.collect_set(df.key).alias('r')).collect()[0].r),
+            [1, 2])
+        self.assertEqual(
+            sorted(df.select(functions.collect_list(df.key).alias('r')).collect()[0].r),
+            [1, 1, 1, 2])
+        self.assertEqual(
+            sorted(df.select(functions.collect_set(df.value).alias('r')).collect()[0].r),
+            ["1", "2"])
+        self.assertEqual(
+            sorted(df.select(functions.collect_list(df.value).alias('r')).collect()[0].r),
+            ["1", "2", "2", "2"])
+
+    def test_datetime_functions(self):
+        from pyspark.sql import functions
+        from datetime import date
+        df = self.spark.range(1).selectExpr("'2017-01-22' as dateCol")
+        parse_result = df.select(functions.to_date(functions.col("dateCol"))).first()
+        self.assertEquals(date(2017, 1, 22), parse_result['to_date(dateCol)'])
+
 
 if __name__ == "__main__":
     import unittest

From 68cd5677ae0e3891e6bb4938a64ff98810656ba8 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Tue, 29 Sep 2020 05:13:01 +0000
Subject: [PATCH 0123/1009] [SPARK-33015][SQL] Compute the current date only
 once

### What changes were proposed in this pull request?
Compute the current date at the specified time zone using timestamp taken at the start of query evaluation.

### Why are the changes needed?
According to the doc for [current_date()](http://spark.apache.org/docs/latest/api/sql/#current_date), the current date should be computed at the start of query evaluation but it can be computed multiple times. As a consequence of that, the function can return different values if the query is executed at the border of two dates.

### Does this PR introduce _any_ user-facing change?
Yes

### How was this patch tested?
By existing test suites `ComputeCurrentTimeSuite` and `DateExpressionsSuite`.

Closes #29889 from MaxGekk/fix-current_date.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/catalyst/optimizer/finishAnalysis.scala  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
index 6c9bb6db06d86..76b9bd03f216c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
@@ -79,10 +79,10 @@ object ComputeCurrentTime extends Rule[LogicalPlan] {
     val currentTime = Literal.create(timestamp, timeExpr.dataType)
 
     plan transformAllExpressions {
-      case CurrentDate(Some(timeZoneId)) =>
+      case currentDate @ CurrentDate(Some(timeZoneId)) =>
         currentDates.getOrElseUpdate(timeZoneId, {
           Literal.create(
-            LocalDate.now(DateTimeUtils.getZoneId(timeZoneId)),
+            DateTimeUtils.microsToDays(timestamp, currentDate.zoneId),
             DateType)
         })
       case CurrentTimestamp() | Now() => currentTime

From 6868b405171bfaa8d013bd938dbef6636a8c9845 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Mon, 28 Sep 2020 22:14:28 -0700
Subject: [PATCH 0124/1009] [SPARK-33020][PYTHON] Add nth_value as a PySpark
 function

### What changes were proposed in this pull request?

`nth_value` was added at SPARK-27951. This PR adds the corresponding PySpark API.

### Why are the changes needed?

To support the consistent APIs

### Does this PR introduce _any_ user-facing change?

Yes, it introduces a new PySpark function API.

### How was this patch tested?

Unittest was added.

Closes #29899 from HyukjinKwon/SPARK-33020.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 python/docs/source/reference/pyspark.sql.rst |  1 +
 python/pyspark/sql/functions.py              | 20 ++++++++++++
 python/pyspark/sql/functions.pyi             |  3 ++
 python/pyspark/sql/tests/test_functions.py   | 34 ++++++++++++++++++++
 4 files changed, 58 insertions(+)

diff --git a/python/docs/source/reference/pyspark.sql.rst b/python/docs/source/reference/pyspark.sql.rst
index e5348c6c6e9aa..692d098c89cdc 100644
--- a/python/docs/source/reference/pyspark.sql.rst
+++ b/python/docs/source/reference/pyspark.sql.rst
@@ -409,6 +409,7 @@ Functions
     months_between
     nanvl
     next_day
+    nth_value
     ntile
     overlay
     pandas_udf
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 14d101a65252a..e6c7eb6edb904 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -934,6 +934,26 @@ def lead(col, offset=1, default=None):
     return Column(sc._jvm.functions.lead(_to_java_column(col), offset, default))
 
 
+@since(3.1)
+def nth_value(col, offset, ignoreNulls=False):
+    """
+    Window function: returns the value that is the `offset`\\th row of the window frame
+    (counting from 1), and `null` if the size of window frame is less than `offset` rows.
+
+    It will return the `offset`\\th non-null value it sees when `ignoreNulls` is set to
+    true. If all values are null, then null is returned.
+
+    This is equivalent to the nth_value function in SQL.
+
+    :param col: name of column or expression
+    :param offset: number of row to use as the value
+    :param ignoreNulls: indicates the Nth value should skip null in the
+        determination of which row to use
+    """
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.functions.nth_value(_to_java_column(col), offset, ignoreNulls))
+
+
 @since(1.4)
 def ntile(n):
     """
diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi
index 3b0b2030178ef..8efe65205315e 100644
--- a/python/pyspark/sql/functions.pyi
+++ b/python/pyspark/sql/functions.pyi
@@ -85,6 +85,9 @@ def lag(
 def lead(
     col: ColumnOrName, offset: int = ..., default: Optional[Any] = ...
 ) -> Column: ...
+def nth_value(
+    col: ColumnOrName, offset: int, ignoreNulls: Optional[bool] = ...
+) -> Column: ...
 def ntile(n: int) -> Column: ...
 def current_date() -> Column: ...
 def current_timestamp() -> Column: ...
diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py
index fdc5e247043de..8d05ed28b8d4e 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -367,6 +367,40 @@ def test_percentile_approx(self):
 
         self.assertListEqual(actual, expected)
 
+    def test_nth_value(self):
+        from pyspark.sql import Window
+        from pyspark.sql.functions import nth_value
+
+        df = self.spark.createDataFrame([
+            ("a", 0, None),
+            ("a", 1, "x"),
+            ("a", 2, "y"),
+            ("a", 3, "z"),
+            ("a", 4, None),
+            ("b", 1, None),
+            ("b", 2, None)], schema=("key", "order", "value"))
+        w = Window.partitionBy("key").orderBy("order")
+
+        rs = df.select(
+            df.key,
+            df.order,
+            nth_value("value", 2).over(w),
+            nth_value("value", 2, False).over(w),
+            nth_value("value", 2, True).over(w)).collect()
+
+        expected = [
+            ("a", 0, None, None, None),
+            ("a", 1, "x", "x", None),
+            ("a", 2, "x", "x", "y"),
+            ("a", 3, "x", "x", "y"),
+            ("a", 4, "x", "x", "y"),
+            ("b", 1, None, None, None),
+            ("b", 2, None, None, None)
+        ]
+
+        for r, ex in zip(sorted(rs), sorted(expected)):
+            self.assertEqual(tuple(r), ex[:len(r)])
+
     def test_higher_order_function_failures(self):
         from pyspark.sql.functions import col, transform
 

From 1b60ff5afea0637f74c5f064642225b35b13b069 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Tue, 29 Sep 2020 05:20:12 +0000
Subject: [PATCH 0125/1009] [MINOR][DOCS] Document when `current_date` and
 `current_timestamp` are evaluated

### What changes were proposed in this pull request?
Explicitly document that `current_date` and `current_timestamp` are executed at the start of query evaluation. And all calls of `current_date`/`current_timestamp` within the same query return the same value

### Why are the changes needed?
Users could expect that `current_date` and `current_timestamp` return the current date/timestamp at the moment of query execution but in fact the functions are folded by the optimizer at the start of query evaluation:
https://github.com/apache/spark/blob/0df8dd60733066076967f0525210bbdb5e12415a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala#L71-L91

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
by running `./dev/scalastyle`.

Closes #29892 from MaxGekk/doc-current_date.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 R/pkg/R/functions.R                                  |  6 ++++--
 python/pyspark/sql/functions.py                      |  6 ++++--
 .../catalyst/expressions/datetimeExpressions.scala   | 12 ++++++------
 .../main/scala/org/apache/spark/sql/functions.scala  |  6 ++++--
 4 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 2d1667f563490..df221de4c7327 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -4382,7 +4382,8 @@ setMethod("date_trunc",
           })
 
 #' @details
-#' \code{current_date}: Returns the current date as a date column.
+#' \code{current_date}: Returns the current date at the start of query evaluation as a date column.
+#' All calls of current_date within the same query return the same value.
 #'
 #' @rdname column_datetime_functions
 #' @aliases current_date current_date,missing-method
@@ -4398,7 +4399,8 @@ setMethod("current_date",
           })
 
 #' @details
-#' \code{current_timestamp}: Returns the current timestamp as a timestamp column.
+#' \code{current_timestamp}: Returns the current timestamp at the start of query evaluation as
+#' a timestamp column. All calls of current_timestamp within the same query return the same value.
 #'
 #' @rdname column_datetime_functions
 #' @aliases current_timestamp current_timestamp,missing-method
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index e6c7eb6edb904..7007d505d048d 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -975,7 +975,8 @@ def ntile(n):
 @since(1.5)
 def current_date():
     """
-    Returns the current date as a :class:`DateType` column.
+    Returns the current date at the start of query evaluation as a :class:`DateType` column.
+    All calls of current_date within the same query return the same value.
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.current_date())
@@ -983,7 +984,8 @@ def current_date():
 
 def current_timestamp():
     """
-    Returns the current timestamp as a :class:`TimestampType` column.
+    Returns the current timestamp at the start of query evaluation as a :class:`TimestampType`
+    column. All calls of current_timestamp within the same query return the same value.
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.current_timestamp())
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index e889cfbec990f..571b0be40c6e6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -75,13 +75,12 @@ trait TimestampFormatterHelper extends TimeZoneAwareExpression {
 
 /**
  * Returns the current date at the start of query evaluation.
- * All calls of current_date within the same query return the same value.
- *
  * There is no code generation since this expression should get constant folded by the optimizer.
  */
+// scalastyle:off line.size.limit
 @ExpressionDescription(
   usage = """
-    _FUNC_() - Returns the current date at the start of query evaluation.
+    _FUNC_() - Returns the current date at the start of query evaluation. All calls of current_date within the same query return the same value.
 
     _FUNC_ - Returns the current date at the start of query evaluation.
   """,
@@ -97,6 +96,7 @@ trait TimestampFormatterHelper extends TimeZoneAwareExpression {
   """,
   group = "datetime_funcs",
   since = "1.5.0")
+// scalastyle:on line.size.limit
 case class CurrentDate(timeZoneId: Option[String] = None)
   extends LeafExpression with TimeZoneAwareExpression with CodegenFallback {
 
@@ -124,13 +124,12 @@ abstract class CurrentTimestampLike() extends LeafExpression with CodegenFallbac
 
 /**
  * Returns the current timestamp at the start of query evaluation.
- * All calls of current_timestamp within the same query return the same value.
- *
  * There is no code generation since this expression should get constant folded by the optimizer.
  */
+// scalastyle:off line.size.limit
 @ExpressionDescription(
   usage = """
-    _FUNC_() - Returns the current timestamp at the start of query evaluation.
+    _FUNC_() - Returns the current timestamp at the start of query evaluation. All calls of current_timestamp within the same query return the same value.
 
     _FUNC_ - Returns the current timestamp at the start of query evaluation.
   """,
@@ -146,6 +145,7 @@ abstract class CurrentTimestampLike() extends LeafExpression with CodegenFallbac
   """,
   group = "datetime_funcs",
   since = "1.5.0")
+// scalastyle:on line.size.limit
 case class CurrentTimestamp() extends CurrentTimestampLike {
   override def prettyName: String = "current_timestamp"
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index acf845d6eceaf..2c545fe762b6d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2760,7 +2760,8 @@ object functions {
   }
 
   /**
-   * Returns the current date as a date column.
+   * Returns the current date at the start of query evaluation as a date column.
+   * All calls of current_date within the same query return the same value.
    *
    * @group datetime_funcs
    * @since 1.5.0
@@ -2768,7 +2769,8 @@ object functions {
   def current_date(): Column = withExpr { CurrentDate() }
 
   /**
-   * Returns the current timestamp as a timestamp column.
+   * Returns the current timestamp at the start of query evaluation as a timestamp column.
+   * All calls of current_timestamp within the same query return the same value.
    *
    * @group datetime_funcs
    * @since 1.5.0

From 202115e7cd0bc2b32c68274e625cded0d628a0c5 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 28 Sep 2020 22:22:47 -0700
Subject: [PATCH 0126/1009] [SPARK-32948][SQL] Optimize to_json and from_json
 expression chain

### What changes were proposed in this pull request?

This patch proposes to optimize from_json + to_json expression chain.

### Why are the changes needed?

To optimize json expression chain that could be manually generated or generated automatically during query optimization.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Unit test.

Closes #29828 from viirya/SPARK-32948.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../optimizer/OptimizeJsonExprs.scala         |  43 ++++++
 .../sql/catalyst/optimizer/Optimizer.scala    |   1 +
 .../optimizer/OptimizeJsonExprsSuite.scala    | 144 ++++++++++++++++++
 3 files changed, 188 insertions(+)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala
new file mode 100644
index 0000000000000..24df480208220
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+
+/**
+ * Simplify redundant json related expressions.
+ */
+object OptimizeJsonExprs extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+    case p => p.transformExpressions {
+      case jsonToStructs @ JsonToStructs(_, options1,
+        StructsToJson(options2, child, timeZoneId2), timeZoneId1)
+          if options1.isEmpty && options2.isEmpty && timeZoneId1 == timeZoneId2 &&
+            jsonToStructs.dataType == child.dataType =>
+        // `StructsToJson` only fails when `JacksonGenerator` encounters data types it
+        // cannot convert to JSON. But `StructsToJson.checkInputDataTypes` already
+        // verifies its child's data types is convertible to JSON. But in
+        // `StructsToJson(JsonToStructs(...))` case, we cannot verify input json string
+        // so `JsonToStructs` might throw error in runtime. Thus we cannot optimize
+        // this case similarly.
+        child
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 6033c01a60f47..94970740d8d91 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -111,6 +111,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
         RemoveNoopOperators,
         CombineWithFields,
         SimplifyExtractValueOps,
+        OptimizeJsonExprs,
         CombineConcats) ++
         extendedOperatorOptimizationRules
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
new file mode 100644
index 0000000000000..90397d4cabee8
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.getZoneId
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+
+class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper {
+
+  object Optimizer extends RuleExecutor[LogicalPlan] {
+    val batches = Batch("Json optimization", FixedPoint(10), OptimizeJsonExprs) :: Nil
+  }
+
+  val schema = StructType.fromDDL("a int, b int")
+
+  private val structAtt = 'struct.struct(schema).notNull
+
+  private val testRelation = LocalRelation(structAtt)
+
+  test("SPARK-32948: optimize from_json + to_json") {
+    val options = Map.empty[String, String]
+
+    val query1 = testRelation
+      .select(JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct"))
+    val optimized1 = Optimizer.execute(query1.analyze)
+
+    val expected = testRelation.select('struct.as("struct")).analyze
+    comparePlans(optimized1, expected)
+
+    val query2 = testRelation
+      .select(
+        JsonToStructs(schema, options,
+          StructsToJson(options,
+            JsonToStructs(schema, options,
+              StructsToJson(options, 'struct)))).as("struct"))
+    val optimized2 = Optimizer.execute(query2.analyze)
+
+    comparePlans(optimized2, expected)
+  }
+
+  test("SPARK-32948: not optimize from_json + to_json if schema is different") {
+    val options = Map.empty[String, String]
+    val schema = StructType.fromDDL("a int")
+
+    val query = testRelation
+      .select(JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct"))
+    val optimized = Optimizer.execute(query.analyze)
+
+    val expected = testRelation.select(
+      JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct")).analyze
+    comparePlans(optimized, expected)
+  }
+
+  test("SPARK-32948: if user gives schema with different letter case under case-insensitive") {
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+      val options = Map.empty[String, String]
+      val schema = StructType.fromDDL("a int, B int")
+
+      val query = testRelation
+        .select(JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct"))
+      val optimized = Optimizer.execute(query.analyze)
+
+      val expected = testRelation.select(
+        JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct")).analyze
+      comparePlans(optimized, expected)
+    }
+  }
+
+  test("SPARK-32948: not optimize from_json + to_json if nullability is different") {
+    val options = Map.empty[String, String]
+    val nonNullSchema = StructType(
+      StructField("a", IntegerType, false) :: StructField("b", IntegerType, false) :: Nil)
+
+    val structAtt = 'struct.struct(nonNullSchema).notNull
+    val testRelationWithNonNullAttr = LocalRelation(structAtt)
+
+    val schema = StructType.fromDDL("a int, b int")
+
+    val query = testRelationWithNonNullAttr
+      .select(JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct"))
+    val optimized = Optimizer.execute(query.analyze)
+
+    val expected = testRelationWithNonNullAttr.select(
+      JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct")).analyze
+    comparePlans(optimized, expected)
+  }
+
+  test("SPARK-32948: not optimize from_json + to_json if option is not empty") {
+    val options = Map("testOption" -> "test")
+
+    val query = testRelation
+      .select(JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct"))
+    val optimized = Optimizer.execute(query.analyze)
+
+    val expected = testRelation.select(
+      JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct")).analyze
+    comparePlans(optimized, expected)
+  }
+
+  test("SPARK-32948: not optimize from_json + to_json if timezone is different") {
+    val options = Map.empty[String, String]
+    val UTC_OPT = Option("UTC")
+    val PST = getZoneId("-08:00")
+
+    val query1 = testRelation
+      .select(JsonToStructs(schema, options,
+        StructsToJson(options, 'struct, Option(PST.getId)), UTC_OPT).as("struct"))
+    val optimized1 = Optimizer.execute(query1.analyze)
+
+    val expected1 = testRelation.select(
+      JsonToStructs(schema, options,
+        StructsToJson(options, 'struct, Option(PST.getId)), UTC_OPT).as("struct")).analyze
+    comparePlans(optimized1, expected1)
+
+    val query2 = testRelation
+      .select(JsonToStructs(schema, options,
+        StructsToJson(options, 'struct, UTC_OPT), UTC_OPT).as("struct"))
+    val optimized2 = Optimizer.execute(query2.analyze)
+    val expected2 = testRelation.select('struct.as("struct")).analyze
+    comparePlans(optimized2, expected2)
+  }
+}

From 90e86f6fac8ac42cf61e523397dc1bcc01871744 Mon Sep 17 00:00:00 2001
From: "tanel.kiis@gmail.com" <tanel.kiis@gmail.com>
Date: Tue, 29 Sep 2020 16:51:44 +0900
Subject: [PATCH 0127/1009] [SPARK-32970][SPARK-32019][SQL][TEST] Reduce the
 runtime of an UT for

### What changes were proposed in this pull request?

The UT for SPARK-32019 (#28853) tries to write about 16GB of data do the disk. We must change the value of `spark.sql.files.maxPartitionBytes` to a smaller value do check the correct behavior with less data. By default it is `128MB`.
The other parameters in this UT are also changed to smaller values to keep the behavior the same.

### Why are the changes needed?

The runtime of this one UT can be over 7 minutes on Jenkins. After the change it is few seconds.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing UT

Closes #29842 from tanelk/SPARK-32970.

Authored-by: tanel.kiis@gmail.com <tanel.kiis@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../datasources/FileSourceStrategySuite.scala | 25 +++++++++++--------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
index a808546745817..dfd9ba03f5be0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
@@ -549,17 +549,22 @@ class FileSourceStrategySuite extends QueryTest with SharedSparkSession with Pre
       assert(table.rdd.partitions.length == 3)
     }
 
-    withSQLConf(SQLConf.FILES_MIN_PARTITION_NUM.key -> "16") {
-      val partitions = (1 to 100).map(i => s"file$i" -> 128 * 1024 * 1024)
-      val table = createTable(files = partitions)
-      // partition is limited by filesMaxPartitionBytes(128MB)
-      assert(table.rdd.partitions.length == 100)
-    }
+    withSQLConf(
+      SQLConf.FILES_MAX_PARTITION_BYTES.key -> "2MB",
+      SQLConf.FILES_OPEN_COST_IN_BYTES.key -> String.valueOf(4 * 1024 * 1024)) {
+
+      withSQLConf(SQLConf.FILES_MIN_PARTITION_NUM.key -> "8") {
+        val partitions = (1 to 12).map(i => s"file$i" -> 2 * 1024 * 1024)
+        val table = createTable(files = partitions)
+        // partition is limited by filesMaxPartitionBytes(2MB)
+        assert(table.rdd.partitions.length == 12)
+      }
 
-    withSQLConf(SQLConf.FILES_MIN_PARTITION_NUM.key -> "32") {
-      val partitions = (1 to 800).map(i => s"file$i" -> 4 * 1024 * 1024)
-      val table = createTable(files = partitions)
-      assert(table.rdd.partitions.length == 50)
+      withSQLConf(SQLConf.FILES_MIN_PARTITION_NUM.key -> "16") {
+        val partitions = (1 to 12).map(i => s"file$i" -> 4 * 1024 * 1024)
+        val table = createTable(files = partitions)
+        assert(table.rdd.partitions.length == 24)
+      }
     }
   }
 

From f167002522d50eefb261c8ba2d66a23b781a38c4 Mon Sep 17 00:00:00 2001
From: Tom van Bussel <tom.vanbussel@databricks.com>
Date: Tue, 29 Sep 2020 13:05:33 +0200
Subject: [PATCH 0128/1009] [SPARK-32901][CORE] Do not allocate memory while
 spilling UnsafeExternalSorter

### What changes were proposed in this pull request?

This PR changes `UnsafeExternalSorter` to no longer allocate any memory while spilling. In particular it removes the allocation of a new pointer array in `UnsafeInMemorySorter`. Instead the new pointer array is allocated whenever the next record is inserted into the sorter.

### Why are the changes needed?

Without this change the `UnsafeExternalSorter` could throw an OOM while spilling. The following sequence of events would have triggered an OOM:

1. `UnsafeExternalSorter` runs out of space in its pointer array and attempts to allocate a new large array to replace the old one.
2. `TaskMemoryManager` tries to allocate the memory backing the new large array using `MemoryManager`, but `MemoryManager` is only willing to return most but not all of the memory requested.
3. `TaskMemoryManager` asks `UnsafeExternalSorter` to spill, which causes `UnsafeExternalSorter` to spill the current run to disk, to free its record pages and to reset its `UnsafeInMemorySorter`.
4. `UnsafeInMemorySorter` frees the old pointer array, and tries to allocate a new small pointer array.
5. `TaskMemoryManager` tries to allocate the memory backing the small array using `MemoryManager`, but `MemoryManager` is unwilling to give it any memory, as the `TaskMemoryManager` is still holding on to the memory it got for the new large array.
6. `TaskMemoryManager` again asks `UnsafeExternalSorter` to spill, but this time there is nothing to spill.
7. `UnsafeInMemorySorter` receives less memory than it requested, and causes a `SparkOutOfMemoryError` to be thrown, which causes the current task to fail.

With the changes in the PR the following will happen instead:

1. `UnsafeExternalSorter` runs out of space in its pointer array and attempts to allocate a new large array to replace the old one.
2. `TaskMemoryManager` tries to allocate the memory backing the new large array using `MemoryManager`, but `MemoryManager` is only willing to return most but not all of the memory requested.
3. `TaskMemoryManager` asks `UnsafeExternalSorter` to spill, which causes `UnsafeExternalSorter` to spill the current run to disk, to free its record pages and to reset its `UnsafeInMemorySorter`.
4. `UnsafeInMemorySorter` frees the old pointer array.
5. `TaskMemoryManager` returns control to `UnsafeExternalSorter.growPointerArrayIfNecessary` (either by returning the the new large array or by throwing a `SparkOutOfMemoryError`).
6. `UnsafeExternalSorter` either frees the new large array or it ignores the `SparkOutOfMemoryError` depending on what happened in the previous step.
7. `UnsafeExternalSorter` successfully allocates a new small pointer array and operation continues as normal.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Tests were added in `UnsafeExternalSorterSuite` and `UnsafeInMemorySorterSuite`.

Closes #29785 from tomvanbussel/SPARK-32901.

Authored-by: Tom van Bussel <tom.vanbussel@databricks.com>
Signed-off-by: herman <herman@databricks.com>
---
 .../unsafe/sort/UnsafeExternalSorter.java     | 96 ++++++++++++++-----
 .../unsafe/sort/UnsafeInMemorySorter.java     | 55 +++++------
 .../sort/UnsafeExternalSorterSuite.java       | 46 ++++-----
 .../sort/UnsafeInMemorySorterSuite.java       | 40 ++++----
 .../spark/memory/TestMemoryManager.scala      |  8 ++
 5 files changed, 143 insertions(+), 102 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
index e4a882d609fc2..dda8ed4c239ae 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
@@ -203,6 +203,10 @@ public long spill(long size, MemoryConsumer trigger) throws IOException {
     }
 
     if (inMemSorter == null || inMemSorter.numRecords() <= 0) {
+      // There could still be some memory allocated when there are no records in the in-memory
+      // sorter. We will not spill it however, to ensure that we can always process at least one
+      // record before spilling. See the comments in `allocateMemoryForRecordIfNecessary` for why
+      // this is necessary.
       return 0L;
     }
 
@@ -224,7 +228,7 @@ public long spill(long size, MemoryConsumer trigger) throws IOException {
     // Note that this is more-or-less going to be a multiple of the page size, so wasted space in
     // pages will currently be counted as memory spilled even though that space isn't actually
     // written to disk. This also counts the space needed to store the sorter's pointer array.
-    inMemSorter.reset();
+    inMemSorter.freeMemory();
     // Reset the in-memory sorter's pointer array only after freeing up the memory pages holding the
     // records. Otherwise, if the task is over allocated memory, then without freeing the memory
     // pages, we might not be able to get memory for the pointer array.
@@ -325,7 +329,7 @@ public void cleanupResources() {
       deleteSpillFiles();
       freeMemory();
       if (inMemSorter != null) {
-        inMemSorter.free();
+        inMemSorter.freeMemory();
         inMemSorter = null;
       }
     }
@@ -339,40 +343,53 @@ public void cleanupResources() {
   private void growPointerArrayIfNecessary() throws IOException {
     assert(inMemSorter != null);
     if (!inMemSorter.hasSpaceForAnotherRecord()) {
+      if (inMemSorter.numRecords() <= 0) {
+        // Spilling was triggered just before this method was called. The pointer array was freed
+        // during the spill, so a new pointer array needs to be allocated here.
+        LongArray array = allocateArray(inMemSorter.getInitialSize());
+        inMemSorter.expandPointerArray(array);
+        return;
+      }
+
       long used = inMemSorter.getMemoryUsage();
-      LongArray array;
+      LongArray array = null;
       try {
         // could trigger spilling
         array = allocateArray(used / 8 * 2);
       } catch (TooLargePageException e) {
         // The pointer array is too big to fix in a single page, spill.
         spill();
-        return;
       } catch (SparkOutOfMemoryError e) {
-        // should have trigger spilling
-        if (!inMemSorter.hasSpaceForAnotherRecord()) {
+        if (inMemSorter.numRecords() > 0) {
           logger.error("Unable to grow the pointer array");
           throw e;
         }
-        return;
+        // The new array could not be allocated, but that is not an issue as it is longer needed,
+        // as all records were spilled.
       }
-      // check if spilling is triggered or not
-      if (inMemSorter.hasSpaceForAnotherRecord()) {
-        freeArray(array);
-      } else {
-        inMemSorter.expandPointerArray(array);
+
+      if (inMemSorter.numRecords() <= 0) {
+        // Spilling was triggered while trying to allocate the new array.
+        if (array != null) {
+          // We succeeded in allocating the new array, but, since all records were spilled, a
+          // smaller array would also suffice.
+          freeArray(array);
+        }
+        // The pointer array was freed during the spill, so a new pointer array needs to be
+        // allocated here.
+        array = allocateArray(inMemSorter.getInitialSize());
       }
+      inMemSorter.expandPointerArray(array);
     }
   }
 
   /**
-   * Allocates more memory in order to insert an additional record. This will request additional
-   * memory from the memory manager and spill if the requested memory can not be obtained.
+   * Allocates an additional page in order to insert an additional record. This will request
+   * additional memory from the memory manager and spill if the requested memory can not be
+   * obtained.
    *
    * @param required the required space in the data page, in bytes, including space for storing
-   *                      the record size. This must be less than or equal to the page size (records
-   *                      that exceed the page size are handled via a different code path which uses
-   *                      special overflow pages).
+   *                 the record size.
    */
   private void acquireNewPageIfNecessary(int required) {
     if (currentPage == null ||
@@ -384,6 +401,37 @@ private void acquireNewPageIfNecessary(int required) {
     }
   }
 
+  /**
+   * Allocates more memory in order to insert an additional record. This will request additional
+   * memory from the memory manager and spill if the requested memory can not be obtained.
+   *
+   * @param required the required space in the data page, in bytes, including space for storing
+   *                 the record size.
+   */
+  private void allocateMemoryForRecordIfNecessary(int required) throws IOException {
+    // Step 1:
+    // Ensure that the pointer array has space for another record. This may cause a spill.
+    growPointerArrayIfNecessary();
+    // Step 2:
+    // Ensure that the last page has space for another record. This may cause a spill.
+    acquireNewPageIfNecessary(required);
+    // Step 3:
+    // The allocation in step 2 could have caused a spill, which would have freed the pointer
+    // array allocated in step 1. Therefore we need to check again whether we have to allocate
+    // a new pointer array.
+    //
+    // If the allocation in this step causes a spill event then it will not cause the page
+    // allocated in the previous step to be freed. The function `spill` only frees memory if at
+    // least one record has been inserted in the in-memory sorter. This will not be the case if
+    // we have spilled in the previous step.
+    //
+    // If we did not spill in the previous step then `growPointerArrayIfNecessary` will be a
+    // no-op that does not allocate any memory, and therefore can't cause a spill event.
+    //
+    // Thus there is no need to call `acquireNewPageIfNecessary` again after this step.
+    growPointerArrayIfNecessary();
+  }
+
   /**
    * Write a record to the sorter.
    */
@@ -398,11 +446,10 @@ public void insertRecord(
       spill();
     }
 
-    growPointerArrayIfNecessary();
-    int uaoSize = UnsafeAlignedOffset.getUaoSize();
+    final int uaoSize = UnsafeAlignedOffset.getUaoSize();
     // Need 4 or 8 bytes to store the record length.
     final int required = length + uaoSize;
-    acquireNewPageIfNecessary(required);
+    allocateMemoryForRecordIfNecessary(required);
 
     final Object base = currentPage.getBaseObject();
     final long recordAddress = taskMemoryManager.encodePageNumberAndOffset(currentPage, pageCursor);
@@ -425,10 +472,9 @@ public void insertKVRecord(Object keyBase, long keyOffset, int keyLen,
       Object valueBase, long valueOffset, int valueLen, long prefix, boolean prefixIsNull)
     throws IOException {
 
-    growPointerArrayIfNecessary();
-    int uaoSize = UnsafeAlignedOffset.getUaoSize();
+    final int uaoSize = UnsafeAlignedOffset.getUaoSize();
     final int required = keyLen + valueLen + (2 * uaoSize);
-    acquireNewPageIfNecessary(required);
+    allocateMemoryForRecordIfNecessary(required);
 
     final Object base = currentPage.getBaseObject();
     final long recordAddress = taskMemoryManager.encodePageNumberAndOffset(currentPage, pageCursor);
@@ -572,7 +618,7 @@ public long spill() throws IOException {
         assert(inMemSorter != null);
         released += inMemSorter.getMemoryUsage();
         totalSortTimeNanos += inMemSorter.getSortTimeNanos();
-        inMemSorter.free();
+        inMemSorter.freeMemory();
         inMemSorter = null;
         taskContext.taskMetrics().incMemoryBytesSpilled(released);
         taskContext.taskMetrics().incDiskBytesSpilled(writeMetrics.bytesWritten());
@@ -669,7 +715,7 @@ public UnsafeSorterIterator getIterator(int startIndex) throws IOException {
         }
         i += spillWriter.recordsSpilled();
       }
-      if (inMemSorter != null) {
+      if (inMemSorter != null && inMemSorter.numRecords() > 0) {
         UnsafeSorterIterator iter = inMemSorter.getSortedIterator();
         moveOver(iter, startIndex - i);
         queue.add(iter);
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
index ff641a24a7b3e..33be899b6b438 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
@@ -159,32 +159,26 @@ private int getUsableCapacity() {
     return (int) (array.size() / (radixSortSupport != null ? 2 : 1.5));
   }
 
+  public long getInitialSize() {
+    return initialSize;
+  }
+
   /**
    * Free the memory used by pointer array.
    */
-  public void free() {
+  public void freeMemory() {
     if (consumer != null) {
       if (array != null) {
         consumer.freeArray(array);
       }
-      array = null;
-    }
-  }
 
-  public void reset() {
-    if (consumer != null) {
-      consumer.freeArray(array);
-      // the call to consumer.allocateArray may trigger a spill which in turn access this instance
-      // and eventually re-enter this method and try to free the array again.  by setting the array
-      // to null and its length to 0 we effectively make the spill code-path a no-op.  setting the
-      // array to null also indicates that it has already been de-allocated which prevents a double
-      // de-allocation in free().
+      // Set the array to null instead of allocating a new array. Allocating an array could have
+      // triggered another spill and this method already is called from UnsafeExternalSorter when
+      // spilling. Attempting to allocate while spilling is dangerous, as we could be holding onto
+      // a large partially complete allocation, which may prevent other memory from being allocated.
+      // Instead we will allocate the new array when it is necessary.
       array = null;
       usableCapacity = 0;
-      pos = 0;
-      nullBoundaryPos = 0;
-      array = consumer.allocateArray(initialSize);
-      usableCapacity = getUsableCapacity();
     }
     pos = 0;
     nullBoundaryPos = 0;
@@ -217,18 +211,20 @@ public boolean hasSpaceForAnotherRecord() {
   }
 
   public void expandPointerArray(LongArray newArray) {
-    if (newArray.size() < array.size()) {
-      // checkstyle.off: RegexpSinglelineJava
-      throw new SparkOutOfMemoryError("Not enough memory to grow pointer array");
-      // checkstyle.on: RegexpSinglelineJava
+    if (array != null) {
+      if (newArray.size() < array.size()) {
+        // checkstyle.off: RegexpSinglelineJava
+        throw new SparkOutOfMemoryError("Not enough memory to grow pointer array");
+        // checkstyle.on: RegexpSinglelineJava
+      }
+      Platform.copyMemory(
+        array.getBaseObject(),
+        array.getBaseOffset(),
+        newArray.getBaseObject(),
+        newArray.getBaseOffset(),
+        pos * 8L);
+      consumer.freeArray(array);
     }
-    Platform.copyMemory(
-      array.getBaseObject(),
-      array.getBaseOffset(),
-      newArray.getBaseObject(),
-      newArray.getBaseOffset(),
-      pos * 8L);
-    consumer.freeArray(array);
     array = newArray;
     usableCapacity = getUsableCapacity();
   }
@@ -347,6 +343,11 @@ public long getCurrentPageNumber() {
    * {@code next()} will return the same mutable object.
    */
   public UnsafeSorterIterator getSortedIterator() {
+    if (numRecords() == 0) {
+      // `array` might be null, so make sure that it is not accessed by returning early.
+      return new SortedIterator(0, 0);
+    }
+
     int offset = 0;
     long start = System.nanoTime();
     if (sortComparator != null) {
diff --git a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
index a1b66ccfaef03..dc2b4814c8284 100644
--- a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
+++ b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
@@ -23,7 +23,6 @@
 import java.util.LinkedList;
 import java.util.UUID;
 
-import org.hamcrest.Matchers;
 import scala.Tuple2$;
 
 import org.junit.After;
@@ -38,7 +37,6 @@
 import org.apache.spark.executor.TaskMetrics;
 import org.apache.spark.internal.config.package$;
 import org.apache.spark.memory.TestMemoryManager;
-import org.apache.spark.memory.SparkOutOfMemoryError;
 import org.apache.spark.memory.TaskMemoryManager;
 import org.apache.spark.serializer.JavaSerializer;
 import org.apache.spark.serializer.SerializerInstance;
@@ -581,40 +579,28 @@ public void testGetIterator() throws Exception {
   }
 
   @Test
-  public void testOOMDuringSpill() throws Exception {
+  public void testNoOOMDuringSpill() throws Exception {
     final UnsafeExternalSorter sorter = newSorter();
-    // we assume that given default configuration,
-    // the size of the data we insert to the sorter (ints)
-    // and assuming we shouldn't spill before pointers array is exhausted
-    // (memory manager is not configured to throw at this point)
-    // - so this loop runs a reasonable number of iterations (<2000).
-    // test indeed completed within <30ms (on a quad i7 laptop).
-    for (int i = 0; sorter.hasSpaceForAnotherRecord(); ++i) {
+    for (int i = 0; i < 100; i++) {
       insertNumber(sorter, i);
     }
-    // we expect the next insert to attempt growing the pointerssArray first
-    // allocation is expected to fail, then a spill is triggered which
-    // attempts another allocation which also fails and we expect to see this
-    // OOM here.  the original code messed with a released array within the
-    // spill code and ended up with a failed assertion.  we also expect the
-    // location of the OOM to be
-    // org.apache.spark.util.collection.unsafe.sort.UnsafeInMemorySorter.reset
-    memoryManager.markconsequentOOM(2);
-    try {
-      insertNumber(sorter, 1024);
-      fail("expected OutOfMmoryError but it seems operation surprisingly succeeded");
-    }
-    // we expect an SparkOutOfMemoryError here, anything else (i.e the original NPE is a failure)
-    catch (SparkOutOfMemoryError oom){
-      String oomStackTrace = Utils.exceptionString(oom);
-      assertThat("expected SparkOutOfMemoryError in " +
-        "org.apache.spark.util.collection.unsafe.sort.UnsafeInMemorySorter.reset",
-        oomStackTrace,
-        Matchers.containsString(
-          "org.apache.spark.util.collection.unsafe.sort.UnsafeInMemorySorter.reset"));
+
+    // Check that spilling still succeeds when the task is starved for memory.
+    memoryManager.markconsequentOOM(Integer.MAX_VALUE);
+    sorter.spill();
+    memoryManager.resetConsequentOOM();
+
+    // Ensure that records can be appended after spilling, i.e. check that the sorter will allocate
+    // the new pointer array that it could not allocate while spilling.
+    for (int i = 0; i < 100; ++i) {
+      insertNumber(sorter, i);
     }
+
+    sorter.cleanupResources();
+    assertSpillFilesWereCleanedUp();
   }
 
+
   private void verifyIntIterator(UnsafeSorterIterator iter, int start, int end)
       throws IOException {
     for (int i = start; i < end; i++) {
diff --git a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorterSuite.java b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorterSuite.java
index 2b8a0602730e1..9d4909ddce792 100644
--- a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorterSuite.java
+++ b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorterSuite.java
@@ -20,6 +20,7 @@
 import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 
+import org.apache.spark.unsafe.array.LongArray;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -27,7 +28,6 @@
 import org.apache.spark.SparkConf;
 import org.apache.spark.memory.TestMemoryConsumer;
 import org.apache.spark.memory.TestMemoryManager;
-import org.apache.spark.memory.SparkOutOfMemoryError;
 import org.apache.spark.memory.TaskMemoryManager;
 import org.apache.spark.unsafe.Platform;
 import org.apache.spark.unsafe.memory.MemoryBlock;
@@ -37,7 +37,6 @@
 import static org.hamcrest.Matchers.greaterThanOrEqualTo;
 import static org.hamcrest.Matchers.isIn;
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.fail;
 import static org.mockito.Mockito.mock;
 
 public class UnsafeInMemorySorterSuite {
@@ -147,7 +146,7 @@ public int compare(
   }
 
   @Test
-  public void freeAfterOOM() {
+  public void testNoOOMDuringReset() {
     final SparkConf sparkConf = new SparkConf();
     sparkConf.set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), false);
 
@@ -156,12 +155,7 @@ public void freeAfterOOM() {
     final TaskMemoryManager memoryManager = new TaskMemoryManager(
             testMemoryManager, 0);
     final TestMemoryConsumer consumer = new TestMemoryConsumer(memoryManager);
-    final MemoryBlock dataPage = memoryManager.allocatePage(2048, consumer);
-    final Object baseObject = dataPage.getBaseObject();
-    // Write the records into the data page:
-    long position = dataPage.getBaseOffset();
 
-    final HashPartitioner hashPartitioner = new HashPartitioner(4);
     // Use integer comparison for comparing prefixes (which are partition ids, in this case)
     final PrefixComparator prefixComparator = PrefixComparators.LONG;
     final RecordComparator recordComparator = new RecordComparator() {
@@ -179,18 +173,24 @@ public int compare(
     UnsafeInMemorySorter sorter = new UnsafeInMemorySorter(consumer, memoryManager,
             recordComparator, prefixComparator, 100, shouldUseRadixSort());
 
-    testMemoryManager.markExecutionAsOutOfMemoryOnce();
-    try {
-      sorter.reset();
-      fail("expected SparkOutOfMemoryError but it seems operation surprisingly succeeded");
-    } catch (SparkOutOfMemoryError oom) {
-      // as expected
-    }
-    // [SPARK-21907] this failed on NPE at
-    // org.apache.spark.memory.MemoryConsumer.freeArray(MemoryConsumer.java:108)
-    sorter.free();
-    // simulate a 'back to back' free.
-    sorter.free();
+    // Ensure that the sorter does not OOM while freeing its memory.
+    testMemoryManager.markconsequentOOM(Integer.MAX_VALUE);
+    sorter.freeMemory();
+    testMemoryManager.resetConsequentOOM();
+    Assert.assertFalse(sorter.hasSpaceForAnotherRecord());
+
+    // Get the sorter in an usable state again by allocating a new pointer array.
+    LongArray array = consumer.allocateArray(1000);
+    sorter.expandPointerArray(array);
+
+    // Ensure that it is safe to call freeMemory() multiple times.
+    testMemoryManager.markconsequentOOM(Integer.MAX_VALUE);
+    sorter.freeMemory();
+    sorter.freeMemory();
+    testMemoryManager.resetConsequentOOM();
+    Assert.assertFalse(sorter.hasSpaceForAnotherRecord());
+
+    assertEquals(0L, memoryManager.cleanUpAllAllocatedMemory());
   }
 
 }
diff --git a/core/src/test/scala/org/apache/spark/memory/TestMemoryManager.scala b/core/src/test/scala/org/apache/spark/memory/TestMemoryManager.scala
index 60f67699f81be..987f383c9c4fa 100644
--- a/core/src/test/scala/org/apache/spark/memory/TestMemoryManager.scala
+++ b/core/src/test/scala/org/apache/spark/memory/TestMemoryManager.scala
@@ -119,6 +119,14 @@ class TestMemoryManager(conf: SparkConf)
     consequentOOM += n
   }
 
+  /**
+   * Undos the effects of [[markExecutionAsOutOfMemoryOnce]] and [[markconsequentOOM]] and lets
+   * calls to [[acquireExecutionMemory()]] (if there is enough memory available).
+   */
+  def resetConsequentOOM(): Unit = synchronized {
+    consequentOOM = 0
+  }
+
   def limit(avail: Long): Unit = synchronized {
     require(avail >= 0)
     available = avail

From 7766fd13c9e7cb72b97fdfee224d3958fbe882a0 Mon Sep 17 00:00:00 2001
From: Akshat Bordia <akshat.bordia31@gmail.com>
Date: Tue, 29 Sep 2020 08:38:43 -0500
Subject: [PATCH 0129/1009] [MINOR][DOCS] Fixing log message for better clarity

Fixing log message for better clarity.

Closes #29870 from akshatb1/master.

Lead-authored-by: Akshat Bordia <akshat.bordia31@gmail.com>
Co-authored-by: Akshat Bordia <akshat.bordia@citrix.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 core/src/main/scala/org/apache/spark/SparkConf.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index dbd89d646ae54..427e98e616515 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -568,7 +568,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
     // If spark.executor.heartbeatInterval bigger than spark.network.timeout,
     // it will almost always cause ExecutorLostFailure. See SPARK-22754.
     require(executorTimeoutThresholdMs > executorHeartbeatIntervalMs, "The value of " +
-      s"${networkTimeout}=${executorTimeoutThresholdMs}ms must be no less than the value of " +
+      s"${networkTimeout}=${executorTimeoutThresholdMs}ms must be greater than the value of " +
       s"${EXECUTOR_HEARTBEAT_INTERVAL.key}=${executorHeartbeatIntervalMs}ms.")
   }
 

From 711d8dd28afd9af92b025f9908534e5f1d575042 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Tue, 29 Sep 2020 16:46:04 +0000
Subject: [PATCH 0130/1009] [SPARK-33018][SQL] Fix estimate statistics issue if
 child has 0 bytes

### What changes were proposed in this pull request?

This pr fix estimate statistics issue if child has 0 bytes.

### Why are the changes needed?
The `sizeInBytes` can be `0` when AQE and CBO are enabled(`spark.sql.adaptive.enabled`=true, `spark.sql.cbo.enabled`=true and `spark.sql.cbo.planStats.enabled`=true). This will generate incorrect BroadcastJoin, resulting in Driver OOM. For example:
![SPARK-33018](https://user-images.githubusercontent.com/5399861/94457606-647e3d00-01e7-11eb-85ee-812ae6efe7bb.jpg)

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manual test.

Closes #29894 from wangyum/SPARK-33018.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../SizeInBytesOnlyStatsPlanVisitor.scala     |  3 ++-
 .../statsEstimation/JoinEstimationSuite.scala | 22 +++++++++++++++++++
 .../StatsEstimationTestBase.scala             |  9 +++++---
 3 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala
index da36db7ae1f5f..a586988fd3253 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala
@@ -53,7 +53,8 @@ object SizeInBytesOnlyStatsPlanVisitor extends LogicalPlanVisitor[Statistics] {
    */
   override def default(p: LogicalPlan): Statistics = p match {
     case p: LeafNode => p.computeStats()
-    case _: LogicalPlan => Statistics(sizeInBytes = p.children.map(_.stats.sizeInBytes).product)
+    case _: LogicalPlan =>
+      Statistics(sizeInBytes = p.children.map(_.stats.sizeInBytes).filter(_ > 0L).product)
   }
 
   override def visitAggregate(p: Aggregate): Statistics = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/JoinEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/JoinEstimationSuite.scala
index 6c5a2b247fc23..cdfc863cc0212 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/JoinEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/JoinEstimationSuite.scala
@@ -551,4 +551,26 @@ class JoinEstimationSuite extends StatsEstimationTestBase {
       attributeStats = AttributeMap(Nil))
     assert(join.stats == expectedStats)
   }
+
+  test("SPARK-33018 Fix estimate statistics issue if child has 0 bytes") {
+    case class MyStatsTestPlan(
+        outputList: Seq[Attribute],
+        sizeInBytes: BigInt) extends LeafNode {
+      override def output: Seq[Attribute] = outputList
+      override def computeStats(): Statistics = Statistics(sizeInBytes = sizeInBytes)
+    }
+
+    val left = MyStatsTestPlan(
+      outputList = Seq("key-1-2", "key-2-4").map(nameToAttr),
+      sizeInBytes = BigInt(100))
+
+    val right = MyStatsTestPlan(
+      outputList = Seq("key-1-2", "key-2-3").map(nameToAttr),
+      sizeInBytes = BigInt(0))
+
+    val join = Join(left, right, LeftOuter,
+      Some(EqualTo(nameToAttr("key-2-4"), nameToAttr("key-2-3"))), JoinHint.NONE)
+
+    assert(join.stats == Statistics(sizeInBytes = 100))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/StatsEstimationTestBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/StatsEstimationTestBase.scala
index 9dceca59f5b87..0a27e31b3c9f6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/StatsEstimationTestBase.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/StatsEstimationTestBase.scala
@@ -26,17 +26,20 @@ import org.apache.spark.sql.types.{IntegerType, StringType}
 
 trait StatsEstimationTestBase extends SparkFunSuite {
 
-  var originalValue: Boolean = false
+  var originalCBOValue: Boolean = false
+  var originalPlanStatsValue: Boolean = false
 
   override def beforeAll(): Unit = {
     super.beforeAll()
     // Enable stats estimation based on CBO.
-    originalValue = SQLConf.get.getConf(SQLConf.CBO_ENABLED)
+    originalCBOValue = SQLConf.get.getConf(SQLConf.CBO_ENABLED)
+    originalPlanStatsValue = SQLConf.get.getConf(SQLConf.PLAN_STATS_ENABLED)
     SQLConf.get.setConf(SQLConf.CBO_ENABLED, true)
   }
 
   override def afterAll(): Unit = {
-    SQLConf.get.setConf(SQLConf.CBO_ENABLED, originalValue)
+    SQLConf.get.setConf(SQLConf.CBO_ENABLED, originalCBOValue)
+    SQLConf.get.setConf(SQLConf.PLAN_STATS_ENABLED, originalPlanStatsValue)
     super.afterAll()
   }
 

From cc06266ade5a4eb35089501a3b32736624208d4c Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 29 Sep 2020 12:02:45 -0700
Subject: [PATCH 0131/1009] [SPARK-33019][CORE] Use
 spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version=1 by default

### What changes were proposed in this pull request?

Apache Spark 3.1's default Hadoop profile is `hadoop-3.2`. Instead of having a warning documentation, this PR aims to use a consistent and safer version of Apache Hadoop file output committer algorithm which is `v1`. This will prevent a silent correctness regression during migration from Apache Spark 2.4/3.0 to Apache Spark 3.1.0. Of course, if there is a user-provided configuration, `spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version=2`, that will be used still.

### Why are the changes needed?

Apache Spark provides multiple distributions with Hadoop 2.7 and Hadoop 3.2. `spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version` depends on the Hadoop version. Apache Hadoop 3.0 switches the default algorithm from `v1` to `v2` and now there exists a discussion to remove `v2`. We had better provide a consistent default behavior of `v1` across various Spark distributions.

- [MAPREDUCE-7282](https://issues.apache.org/jira/browse/MAPREDUCE-7282) MR v2 commit algorithm should be deprecated and not the default

### Does this PR introduce _any_ user-facing change?

Yes. This changes the default behavior. Users can override this conf.

### How was this patch tested?

Manual.

**BEFORE (spark-3.0.1-bin-hadoop3.2)**
```scala
scala> sc.version
res0: String = 3.0.1

scala> sc.hadoopConfiguration.get("mapreduce.fileoutputcommitter.algorithm.version")
res1: String = 2
```

**AFTER**
```scala
scala> sc.hadoopConfiguration.get("mapreduce.fileoutputcommitter.algorithm.version")
res0: String = 1
```

Closes #29895 from dongjoon-hyun/SPARK-DEFAUT-COMMITTER.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/deploy/SparkHadoopUtil.scala      |  3 +++
 docs/configuration.md                                  | 10 ++--------
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index 1180501e8c738..6f799a542bc1e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -462,6 +462,9 @@ private[spark] object SparkHadoopUtil {
     for ((key, value) <- conf.getAll if key.startsWith("spark.hadoop.")) {
       hadoopConf.set(key.substring("spark.hadoop.".length), value)
     }
+    if (conf.getOption("spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version").isEmpty) {
+      hadoopConf.set("mapreduce.fileoutputcommitter.algorithm.version", "1")
+    }
   }
 
   private def appendSparkHiveConfigs(conf: SparkConf, hadoopConf: Configuration): Unit = {
diff --git a/docs/configuration.md b/docs/configuration.md
index 8b6ae9d777cce..d825a589dfd31 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1761,16 +1761,10 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version</code></td>
-  <td>Dependent on environment</td>
+  <td>1</td>
   <td>
     The file output committer algorithm version, valid algorithm version number: 1 or 2.
-    Version 2 may have better performance, but version 1 may handle failures better in certain situations,
-    as per <a href="https://issues.apache.org/jira/browse/MAPREDUCE-4815">MAPREDUCE-4815</a>.
-    The default value depends on the Hadoop version used in an environment:
-    1 for Hadoop versions lower than 3.0
-    2 for Hadoop versions 3.0 and higher
-    It's important to note that this can change back to 1 again in the future once <a href="https://issues.apache.org/jira/browse/MAPREDUCE-7282">MAPREDUCE-7282</a>
-    is fixed and merged.
+    Note that 2 may cause a correctness issue like MAPREDUCE-7282.
   </td>
   <td>2.2.0</td>
 </tr>

From 3a299aa6480ac22501512cd0310d31a441d7dfdc Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Wed, 30 Sep 2020 21:37:29 +0900
Subject: [PATCH 0132/1009] [SPARK-32741][SQL] Check if the same ExprId refers
 to the unique attribute in logical plans

### What changes were proposed in this pull request?

Some plan transformations (e.g., `RemoveNoopOperators`) implicitly assume the same `ExprId` refers to the unique attribute. But, `RuleExecutor` does not check this integrity between logical plan transformations. So, this PR intends to add this check in `isPlanIntegral` of `Analyzer`/`Optimizer`.

This PR comes from the talk with cloud-fan viirya in https://github.com/apache/spark/pull/29485#discussion_r475346278

### Why are the changes needed?

For better logical plan integrity checking.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #29585 from maropu/PlanIntegrityTest.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 11 ++-
 .../sql/catalyst/optimizer/Optimizer.scala    | 15 ++--
 .../sql/catalyst/optimizer/subquery.scala     | 51 ++++++++------
 .../catalyst/plans/logical/LogicalPlan.scala  | 70 +++++++++++++++++++
 .../optimizer/FoldablePropagationSuite.scala  |  4 +-
 .../logical/LogicalPlanIntegritySuite.scala   | 51 ++++++++++++++
 .../sql/execution/adaptive/AQEOptimizer.scala |  8 ++-
 .../spark/sql/streaming/StreamSuite.scala     |  7 +-
 8 files changed, 181 insertions(+), 36 deletions(-)
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanIntegritySuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 6e1f371b1a2b5..77a6631b250e8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -48,6 +48,7 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.{PartitionOverwriteMode, StoreAssignmentPolicy}
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.util.Utils
 
 /**
  * A trivial [[Analyzer]] with a dummy [[SessionCatalog]] and [[EmptyFunctionRegistry]].
@@ -136,6 +137,10 @@ class Analyzer(
 
   private val v1SessionCatalog: SessionCatalog = catalogManager.v1SessionCatalog
 
+  override protected def isPlanIntegral(plan: LogicalPlan): Boolean = {
+    !Utils.isTesting || LogicalPlanIntegrity.checkIfExprIdsAreGloballyUnique(plan)
+  }
+
   override def isView(nameParts: Seq[String]): Boolean = v1SessionCatalog.isView(nameParts)
 
   // Only for tests.
@@ -2777,8 +2782,8 @@ class Analyzer(
       // a resolved Aggregate will not have Window Functions.
       case f @ UnresolvedHaving(condition, a @ Aggregate(groupingExprs, aggregateExprs, child))
         if child.resolved &&
-           hasWindowFunction(aggregateExprs) &&
-           a.expressions.forall(_.resolved) =>
+          hasWindowFunction(aggregateExprs) &&
+          a.expressions.forall(_.resolved) =>
         val (windowExpressions, aggregateExpressions) = extract(aggregateExprs)
         // Create an Aggregate operator to evaluate aggregation functions.
         val withAggregate = Aggregate(groupingExprs, aggregateExpressions, child)
@@ -2795,7 +2800,7 @@ class Analyzer(
       // Aggregate without Having clause.
       case a @ Aggregate(groupingExprs, aggregateExprs, child)
         if hasWindowFunction(aggregateExprs) &&
-           a.expressions.forall(_.resolved) =>
+          a.expressions.forall(_.resolved) =>
         val (windowExpressions, aggregateExpressions) = extract(aggregateExprs)
         // Create an Aggregate operator to evaluate aggregation functions.
         val withAggregate = Aggregate(groupingExprs, aggregateExpressions, child)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 94970740d8d91..f2360150e47b5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.catalyst.optimizer
 import scala.collection.mutable
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.expressions._
@@ -44,9 +43,11 @@ abstract class Optimizer(catalogManager: CatalogManager)
   // Currently we check after the execution of each rule if a plan:
   // - is still resolved
   // - only host special expressions in supported operators
+  // - has globally-unique attribute IDs
   override protected def isPlanIntegral(plan: LogicalPlan): Boolean = {
     !Utils.isTesting || (plan.resolved &&
-      plan.find(PlanHelper.specialExpressionsInUnsupportedOperator(_).nonEmpty).isEmpty)
+      plan.find(PlanHelper.specialExpressionsInUnsupportedOperator(_).nonEmpty).isEmpty &&
+      LogicalPlanIntegrity.checkIfExprIdsAreGloballyUnique(plan))
   }
 
   override protected val excludedOnceBatches: Set[String] =
@@ -1585,14 +1586,14 @@ object ReplaceDistinctWithAggregate extends Rule[LogicalPlan] {
  * Replaces logical [[Deduplicate]] operator with an [[Aggregate]] operator.
  */
 object ReplaceDeduplicateWithAggregate extends Rule[LogicalPlan] {
-  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
-    case Deduplicate(keys, child) if !child.isStreaming =>
+  def apply(plan: LogicalPlan): LogicalPlan = plan transformUpWithNewOutput {
+    case d @ Deduplicate(keys, child) if !child.isStreaming =>
       val keyExprIds = keys.map(_.exprId)
       val aggCols = child.output.map { attr =>
         if (keyExprIds.contains(attr.exprId)) {
           attr
         } else {
-          Alias(new First(attr).toAggregateExpression(), attr.name)(attr.exprId)
+          Alias(new First(attr).toAggregateExpression(), attr.name)()
         }
       }
       // SPARK-22951: Physical aggregate operators distinguishes global aggregation and grouping
@@ -1601,7 +1602,9 @@ object ReplaceDeduplicateWithAggregate extends Rule[LogicalPlan] {
       // we append a literal when the grouping key list is empty so that the result aggregate
       // operator is properly treated as a grouping aggregation.
       val nonemptyKeys = if (keys.isEmpty) Literal(1) :: Nil else keys
-      Aggregate(nonemptyKeys, aggCols, child)
+      val newAgg = Aggregate(nonemptyKeys, aggCols, child)
+      val attrMapping = d.output.zip(newAgg.output)
+      newAgg -> attrMapping
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
index 7b696912aa465..a168dcd7a83f5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
@@ -338,15 +338,20 @@ object PullupCorrelatedPredicates extends Rule[LogicalPlan] with PredicateHelper
 object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] {
   /**
    * Extract all correlated scalar subqueries from an expression. The subqueries are collected using
-   * the given collector. The expression is rewritten and returned.
+   * the given collector. To avoid the reuse of `exprId`s, this method generates new `exprId`
+   * for the subqueries and rewrite references in the given `expression`.
+   * This method returns extracted subqueries and the corresponding `exprId`s and these values
+   * will be used later in `constructLeftJoins` for building the child plan that
+   * returns subquery output with the `exprId`s.
    */
   private def extractCorrelatedScalarSubqueries[E <: Expression](
       expression: E,
-      subqueries: ArrayBuffer[ScalarSubquery]): E = {
+      subqueries: ArrayBuffer[(ScalarSubquery, ExprId)]): E = {
     val newExpression = expression transform {
       case s: ScalarSubquery if s.children.nonEmpty =>
-        subqueries += s
-        s.plan.output.head
+        val newExprId = NamedExpression.newExprId
+        subqueries += s -> newExprId
+        s.plan.output.head.withExprId(newExprId)
     }
     newExpression.asInstanceOf[E]
   }
@@ -510,16 +515,16 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] {
    */
   private def constructLeftJoins(
       child: LogicalPlan,
-      subqueries: ArrayBuffer[ScalarSubquery]): LogicalPlan = {
+      subqueries: ArrayBuffer[(ScalarSubquery, ExprId)]): LogicalPlan = {
     subqueries.foldLeft(child) {
-      case (currentChild, ScalarSubquery(query, conditions, _)) =>
+      case (currentChild, (ScalarSubquery(query, conditions, _), newExprId)) =>
         val origOutput = query.output.head
 
         val resultWithZeroTups = evalSubqueryOnZeroTups(query)
         if (resultWithZeroTups.isEmpty) {
           // CASE 1: Subquery guaranteed not to have the COUNT bug
           Project(
-            currentChild.output :+ origOutput,
+            currentChild.output :+ Alias(origOutput, origOutput.name)(exprId = newExprId),
             Join(currentChild, query, LeftOuter, conditions.reduceOption(And), JoinHint.NONE))
         } else {
           // Subquery might have the COUNT bug. Add appropriate corrections.
@@ -544,7 +549,7 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] {
                 Alias(
                   If(IsNull(alwaysTrueRef),
                     resultWithZeroTups.get,
-                    aggValRef), origOutput.name)(exprId = origOutput.exprId),
+                    aggValRef), origOutput.name)(exprId = newExprId),
               Join(currentChild,
                 Project(query.output :+ alwaysTrueExpr, query),
                 LeftOuter, conditions.reduceOption(And), JoinHint.NONE))
@@ -571,7 +576,7 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] {
               (IsNull(alwaysTrueRef), resultWithZeroTups.get),
               (Not(havingNode.get.condition), Literal.create(null, aggValRef.dataType))),
               aggValRef),
-              origOutput.name)(exprId = origOutput.exprId)
+              origOutput.name)(exprId = newExprId)
 
             Project(
               currentChild.output :+ caseExpr,
@@ -588,36 +593,42 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] {
    * Rewrite [[Filter]], [[Project]] and [[Aggregate]] plans containing correlated scalar
    * subqueries.
    */
-  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+  def apply(plan: LogicalPlan): LogicalPlan = plan transformUpWithNewOutput {
     case a @ Aggregate(grouping, expressions, child) =>
-      val subqueries = ArrayBuffer.empty[ScalarSubquery]
+      val subqueries = ArrayBuffer.empty[(ScalarSubquery, ExprId)]
       val newExpressions = expressions.map(extractCorrelatedScalarSubqueries(_, subqueries))
       if (subqueries.nonEmpty) {
         // We currently only allow correlated subqueries in an aggregate if they are part of the
         // grouping expressions. As a result we need to replace all the scalar subqueries in the
         // grouping expressions by their result.
         val newGrouping = grouping.map { e =>
-          subqueries.find(_.semanticEquals(e)).map(_.plan.output.head).getOrElse(e)
+          subqueries.find(_._1.semanticEquals(e)).map(_._1.plan.output.head).getOrElse(e)
         }
-        Aggregate(newGrouping, newExpressions, constructLeftJoins(child, subqueries))
+        val newAgg = Aggregate(newGrouping, newExpressions, constructLeftJoins(child, subqueries))
+        val attrMapping = a.output.zip(newAgg.output)
+        newAgg -> attrMapping
       } else {
-        a
+        a -> Nil
       }
     case p @ Project(expressions, child) =>
-      val subqueries = ArrayBuffer.empty[ScalarSubquery]
+      val subqueries = ArrayBuffer.empty[(ScalarSubquery, ExprId)]
       val newExpressions = expressions.map(extractCorrelatedScalarSubqueries(_, subqueries))
       if (subqueries.nonEmpty) {
-        Project(newExpressions, constructLeftJoins(child, subqueries))
+        val newProj = Project(newExpressions, constructLeftJoins(child, subqueries))
+        val attrMapping = p.output.zip(newProj.output)
+        newProj -> attrMapping
       } else {
-        p
+        p -> Nil
       }
     case f @ Filter(condition, child) =>
-      val subqueries = ArrayBuffer.empty[ScalarSubquery]
+      val subqueries = ArrayBuffer.empty[(ScalarSubquery, ExprId)]
       val newCondition = extractCorrelatedScalarSubqueries(condition, subqueries)
       if (subqueries.nonEmpty) {
-        Project(f.output, Filter(newCondition, constructLeftJoins(child, subqueries)))
+        val newProj = Project(f.output, Filter(newCondition, constructLeftJoins(child, subqueries)))
+        val attrMapping = f.output.zip(newProj.output)
+        newProj -> attrMapping
       } else {
-        f
+        f -> Nil
       }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 96c550616065a..48dfc5fd57e63 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -203,3 +203,73 @@ abstract class BinaryNode extends LogicalPlan {
 abstract class OrderPreservingUnaryNode extends UnaryNode {
   override final def outputOrdering: Seq[SortOrder] = child.outputOrdering
 }
+
+object LogicalPlanIntegrity {
+
+  private def canGetOutputAttrs(p: LogicalPlan): Boolean = {
+    p.resolved && !p.expressions.exists { e =>
+      e.collectFirst {
+        // We cannot call `output` in plans with a `ScalarSubquery` expr having no column,
+        // so, we filter out them in advance.
+        case s: ScalarSubquery if s.plan.schema.fields.isEmpty => true
+      }.isDefined
+    }
+  }
+
+  /**
+   * Since some logical plans (e.g., `Union`) can build `AttributeReference`s in their `output`,
+   * this method checks if the same `ExprId` refers to attributes having the same data type
+   * in plan output.
+   */
+  def hasUniqueExprIdsForOutput(plan: LogicalPlan): Boolean = {
+    val exprIds = plan.collect { case p if canGetOutputAttrs(p) =>
+      // NOTE: we still need to filter resolved expressions here because the output of
+      // some resolved logical plans can have unresolved references,
+      // e.g., outer references in `ExistenceJoin`.
+      p.output.filter(_.resolved).map { a => (a.exprId, a.dataType) }
+    }.flatten
+
+    val ignoredExprIds = plan.collect {
+      // NOTE: `Union` currently reuses input `ExprId`s for output references, but we cannot
+      // simply modify the code for assigning new `ExprId`s in `Union#output` because
+      // the modification will make breaking changes (See SPARK-32741(#29585)).
+      // So, this check just ignores the `exprId`s of `Union` output.
+      case u: Union if u.resolved => u.output.map(_.exprId)
+    }.flatten.toSet
+
+    val groupedDataTypesByExprId = exprIds.filterNot { case (exprId, _) =>
+      ignoredExprIds.contains(exprId)
+    }.groupBy(_._1).values.map(_.distinct)
+
+    groupedDataTypesByExprId.forall(_.length == 1)
+  }
+
+  /**
+   * This method checks if reference `ExprId`s are not reused when assigning a new `ExprId`.
+   * For example, it returns false if plan transformers create an alias having the same `ExprId`
+   * with one of reference attributes, e.g., `a#1 + 1 AS a#1`.
+   */
+  def checkIfSameExprIdNotReused(plan: LogicalPlan): Boolean = {
+    plan.collect { case p if p.resolved =>
+      p.expressions.forall {
+        case a: Alias =>
+          // Even if a plan is resolved, `a.references` can return unresolved references,
+          // e.g., in `Grouping`/`GroupingID`, so we need to filter out them and
+          // check if the same `exprId` in `Alias` does not exist
+          // among reference `exprId`s.
+          !a.references.filter(_.resolved).map(_.exprId).exists(_ == a.exprId)
+        case _ =>
+          true
+      }
+    }.forall(identity)
+  }
+
+  /**
+   * This method checks if the same `ExprId` refers to an unique attribute in a plan tree.
+   * Some plan transformers (e.g., `RemoveNoopOperators`) rewrite logical
+   * plans based on this assumption.
+   */
+  def checkIfExprIdsAreGloballyUnique(plan: LogicalPlan): Boolean = {
+    checkIfSameExprIdNotReused(plan) && hasUniqueExprIdsForOutput(plan)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
index fe43e8e288673..92e4fa345e2ad 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
@@ -156,8 +156,8 @@ class FoldablePropagationSuite extends PlanTest {
     val query = expand.where(a1.isNotNull).select(a1, a2).analyze
     val optimized = Optimize.execute(query)
     val correctExpand = expand.copy(projections = Seq(
-      Seq(Literal(null), c2),
-      Seq(c1, Literal(null))))
+      Seq(Literal(null), Literal(2)),
+      Seq(Literal(1), Literal(null))))
     val correctAnswer = correctExpand.where(a1.isNotNull).select(a1, a2).analyze
     comparePlans(optimized, correctAnswer)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanIntegritySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanIntegritySuite.scala
new file mode 100644
index 0000000000000..6f342b8d94379
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanIntegritySuite.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference}
+import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.types.LongType
+
+class LogicalPlanIntegritySuite extends PlanTest {
+  import LogicalPlanIntegrity._
+
+  case class OutputTestPlan(child: LogicalPlan, output: Seq[Attribute]) extends UnaryNode {
+    override val analyzed = true
+  }
+
+  test("Checks if the same `ExprId` refers to a semantically-equal attribute in a plan output") {
+    val t = LocalRelation('a.int, 'b.int)
+    assert(hasUniqueExprIdsForOutput(OutputTestPlan(t, t.output)))
+    assert(!hasUniqueExprIdsForOutput(OutputTestPlan(t, t.output.zipWithIndex.map {
+      case (a, i) => AttributeReference(s"c$i", LongType)(a.exprId)
+    })))
+  }
+
+  test("Checks if reference ExprIds are not reused when assigning a new ExprId") {
+    val t = LocalRelation('a.int, 'b.int)
+    val Seq(a, b) = t.output
+    assert(checkIfSameExprIdNotReused(t.select(Alias(a + 1, "a")())))
+    assert(!checkIfSameExprIdNotReused(t.select(Alias(a + 1, "a")(exprId = a.exprId))))
+    assert(checkIfSameExprIdNotReused(t.select(Alias(a + 1, "a")(exprId = b.exprId))))
+    assert(checkIfSameExprIdNotReused(t.select(Alias(a + b, "ab")())))
+    assert(!checkIfSameExprIdNotReused(t.select(Alias(a + b, "ab")(exprId = a.exprId))))
+    assert(!checkIfSameExprIdNotReused(t.select(Alias(a + b, "ab")(exprId = b.exprId))))
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala
index c82b264a600ef..0170f8b2f71c2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.adaptive
 
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, LogicalPlanIntegrity, PlanHelper}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.util.Utils
@@ -54,4 +54,10 @@ class AQEOptimizer(conf: SQLConf) extends RuleExecutor[LogicalPlan] {
       }
     }
   }
+
+  override protected def isPlanIntegral(plan: LogicalPlan): Boolean = {
+    !Utils.isTesting || (plan.resolved &&
+      plan.find(PlanHelper.specialExpressionsInUnsupportedOperator(_).nonEmpty).isEmpty &&
+      LogicalPlanIntegrity.checkIfExprIdsAreGloballyUnique(plan))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index 9f3ff1a6708e4..8797e5ad64149 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -36,7 +36,6 @@ import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.plans.logical.Range
 import org.apache.spark.sql.catalyst.streaming.{InternalOutputModes, StreamingRelationV2}
-import org.apache.spark.sql.catalyst.util.DateTimeConstants.MICROS_PER_MILLIS
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.{LocalLimitExec, SimpleMode, SparkPlan}
 import org.apache.spark.sql.execution.command.ExplainCommand
@@ -47,7 +46,7 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.StreamSourceProvider
 import org.apache.spark.sql.streaming.util.{BlockOnStopSourceProvider, StreamManualClock}
-import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
+import org.apache.spark.sql.types.{IntegerType, LongType, StructField, StructType}
 import org.apache.spark.util.Utils
 
 class StreamSuite extends StreamTest {
@@ -1268,7 +1267,7 @@ class StreamSuite extends StreamTest {
 }
 
 abstract class FakeSource extends StreamSourceProvider {
-  private val fakeSchema = StructType(StructField("a", IntegerType) :: Nil)
+  private val fakeSchema = StructType(StructField("a", LongType) :: Nil)
 
   override def sourceSchema(
       spark: SQLContext,
@@ -1290,7 +1289,7 @@ class FakeDefaultSource extends FakeSource {
     new Source {
       private var offset = -1L
 
-      override def schema: StructType = StructType(StructField("a", IntegerType) :: Nil)
+      override def schema: StructType = StructType(StructField("a", LongType) :: Nil)
 
       override def getOffset: Option[Offset] = {
         if (offset >= 10) {

From ece8d8e22cf7e3924e44c16f58028c323dc54356 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Wed, 30 Sep 2020 09:27:57 -0700
Subject: [PATCH 0133/1009] [SPARK-33006][K8S][DOCS] Add dynamic PVC usage
 example into K8s doc

### What changes were proposed in this pull request?

This updates K8s document to describe new dynamic PVC features.

### Why are the changes needed?

This will help the user use the new features easily.

### Does this PR introduce _any_ user-facing change?

Yes, but it's a doc updates.

### How was this patch tested?

Manual.

<img width="847" alt="Screen Shot 2020-09-28 at 3 54 53 PM" src="https://user-images.githubusercontent.com/9700541/94494923-3ed04400-01a5-11eb-81f9-127db42d4256.png">

<img width="779" alt="Screen Shot 2020-09-28 at 3 55 07 PM" src="https://user-images.githubusercontent.com/9700541/94494930-4394f800-01a5-11eb-9387-50ebc14af477.png">

Closes #29897 from dongjoon-hyun/SPARK-33006.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/running-on-kubernetes.md | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index d0c6012e00aa6..e9c292d21fd47 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -307,7 +307,18 @@ And, the claim name of a `persistentVolumeClaim` with volume name `checkpointpvc
 spark.kubernetes.driver.volumes.persistentVolumeClaim.checkpointpvc.options.claimName=check-point-pvc-claim
 ```
 
-The configuration properties for mounting volumes into the executor pods use prefix `spark.kubernetes.executor.` instead of `spark.kubernetes.driver.`. For a complete list of available options for each supported type of volumes, please refer to the [Spark Properties](#spark-properties) section below.
+The configuration properties for mounting volumes into the executor pods use prefix `spark.kubernetes.executor.` instead of `spark.kubernetes.driver.`.
+
+For example, you can mount a dynamically-created persistent volume claim per executor by using `OnDemand` as a claim name and `storageClass` and `sizeLimit` options like the following. This is useful in case of [Dynamic Allocation](configuration.html#dynamic-allocation).
+```
+spark.kubernetes.executor.volumes.persistentVolumeClaim.data.options.claimName=OnDemand
+spark.kubernetes.executor.volumes.persistentVolumeClaim.data.options.storageClass=gp
+spark.kubernetes.executor.volumes.persistentVolumeClaim.data.options.sizeLimit=500Gi
+spark.kubernetes.executor.volumes.persistentVolumeClaim.data.mount.path=/data
+spark.kubernetes.executor.volumes.persistentVolumeClaim.data.mount.readOnly=false
+```
+
+For a complete list of available options for each supported type of volumes, please refer to the [Spark Properties](#spark-properties) section below.
 
 ## Local Storage
 
@@ -318,6 +329,15 @@ Spark supports using volumes to spill data during shuffles and other operations.
 --conf spark.kubernetes.driver.volumes.[VolumeType].spark-local-dir-[VolumeName].mount.readOnly=false
 ```
 
+Specifically, you can use persistent volume claims if the jobs require large shuffle and sorting operations in executors.
+
+```
+spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-1.options.claimName=OnDemand
+spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-1.options.storageClass=gp
+spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-1.options.sizeLimit=500Gi
+spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-1.mount.path=/data
+spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-1.mount.readOnly=false
+```
 
 If no volume is set as local storage, Spark uses temporary scratch space to spill data to disk during shuffles and other operations. When using Kubernetes as the resource manager the pods will be created with an [emptyDir](https://kubernetes.io/docs/concepts/storage/volumes/#emptydir) volume mounted for each directory listed in `spark.local.dir` or the environment variable `SPARK_LOCAL_DIRS` .  If no directories are explicitly specified then a default directory is created and configured appropriately.
 

From 3bdbb5546d2517dda6f71613927cc1783c87f319 Mon Sep 17 00:00:00 2001
From: GuoPhilipse <46367746+GuoPhilipse@users.noreply.github.com>
Date: Thu, 1 Oct 2020 08:15:53 +0900
Subject: [PATCH 0134/1009] [SPARK-31753][SQL][DOCS][FOLLOW-UP] Add missing
 keywords in the SQL docs

### What changes were proposed in this pull request?
update sql-ref docs, the following key words will be added in this PR.

CLUSTERED BY
SORTED BY
INTO num_buckets BUCKETS

### Why are the changes needed?
let more users know the sql key words usage

### Does this PR introduce _any_ user-facing change?
No
![image](https://user-images.githubusercontent.com/46367746/94428281-0a6b8080-01c3-11eb-9ff3-899f8da602ca.png)
![image](https://user-images.githubusercontent.com/46367746/94428285-0d667100-01c3-11eb-8a54-90e7641d917b.png)
![image](https://user-images.githubusercontent.com/46367746/94428288-0f303480-01c3-11eb-9e1d-023538aa6e2d.png)

### How was this patch tested?
generate html test

Closes #29883 from GuoPhilipse/add-sql-missing-keywords.

Lead-authored-by: GuoPhilipse <46367746+GuoPhilipse@users.noreply.github.com>
Co-authored-by: GuoPhilipse <guofei_ok@126.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 ...-ref-syntax-ddl-create-table-datasource.md |  7 +++-
 ...-ref-syntax-ddl-create-table-hiveformat.md | 32 +++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/docs/sql-ref-syntax-ddl-create-table-datasource.md b/docs/sql-ref-syntax-ddl-create-table-datasource.md
index d334447a91011..ba0516afbbfad 100644
--- a/docs/sql-ref-syntax-ddl-create-table-datasource.md
+++ b/docs/sql-ref-syntax-ddl-create-table-datasource.md
@@ -67,7 +67,12 @@ as any order. For example, you can write COMMENT table_comment after TBLPROPERTI
 
 * **SORTED BY**
 
-    Determines the order in which the data is stored in buckets. Default is Ascending order.
+    Specifies an ordering of bucket columns. Optionally, one can use ASC for an ascending order or DESC for a descending order after any column names in the SORTED BY clause.
+    If not specified, ASC is assumed by default.
+   
+* **INTO num_buckets BUCKETS**
+
+    Specifies buckets numbers, which is used in `CLUSTERED BY` clause.
 
 * **LOCATION**
 
diff --git a/docs/sql-ref-syntax-ddl-create-table-hiveformat.md b/docs/sql-ref-syntax-ddl-create-table-hiveformat.md
index 7bf847df98150..3a8c8d5b1160a 100644
--- a/docs/sql-ref-syntax-ddl-create-table-hiveformat.md
+++ b/docs/sql-ref-syntax-ddl-create-table-hiveformat.md
@@ -31,6 +31,9 @@ CREATE [ EXTERNAL ] TABLE [ IF NOT EXISTS ] table_identifier
     [ COMMENT table_comment ]
     [ PARTITIONED BY ( col_name2[:] col_type2 [ COMMENT col_comment2 ], ... ) 
         | ( col_name1, col_name2, ... ) ]
+    [ CLUSTERED BY ( col_name1, col_name2, ...) 
+        [ SORTED BY ( col_name1 [ ASC | DESC ], col_name2 [ ASC | DESC ], ... ) ] 
+        INTO num_buckets BUCKETS ]
     [ ROW FORMAT row_format ]
     [ STORED AS file_format ]
     [ LOCATION path ]
@@ -65,6 +68,21 @@ as any order. For example, you can write COMMENT table_comment after TBLPROPERTI
 
     Partitions are created on the table, based on the columns specified.
     
+* **CLUSTERED BY**
+
+    Partitions created on the table will be bucketed into fixed buckets based on the column specified for bucketing.
+
+    **NOTE:** Bucketing is an optimization technique that uses buckets (and bucketing columns) to determine data partitioning and avoid data shuffle.
+
+* **SORTED BY**
+
+    Specifies an ordering of bucket columns. Optionally, one can use ASC for an ascending order or DESC for a descending order after any column names in the SORTED BY clause.
+    If not specified, ASC is assumed by default.
+
+* **INTO num_buckets BUCKETS**
+
+    Specifies buckets numbers, which is used in `CLUSTERED BY` clause.
+    
 * **row_format**    
 
     Use the `SERDE` clause to specify a custom SerDe for one table. Otherwise, use the `DELIMITED` clause to use the native SerDe and specify the delimiter, escape character, null character and so on.
@@ -203,6 +221,20 @@ CREATE EXTERNAL TABLE family (id INT, name STRING)
     STORED AS INPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleInputFormat'
         OUTPUTFORMAT 'com.ly.spark.example.serde.io.SerDeExampleOutputFormat'
     LOCATION '/tmp/family/';
+
+--Use `CLUSTERED BY` clause to create bucket table without `SORTED BY`
+CREATE TABLE clustered_by_test1 (ID INT, AGE STRING)
+    CLUSTERED BY (ID)
+    INTO 4 BUCKETS
+    STORED AS ORC
+
+--Use `CLUSTERED BY` clause to create bucket table with `SORTED BY`
+CREATE TABLE clustered_by_test2 (ID INT, NAME STRING)
+    PARTITIONED BY (YEAR STRING)
+    CLUSTERED BY (ID, NAME)
+    SORTED BY (ID ASC)
+    INTO 3 BUCKETS
+    STORED AS PARQUET
 ```
 
 ### Related Statements

From d75222dd1b0cdaaa7c22964e974117923fd069bb Mon Sep 17 00:00:00 2001
From: jlafleche <jlafleche@palantir.com>
Date: Wed, 30 Sep 2020 19:00:18 -0700
Subject: [PATCH 0135/1009] [SPARK-33012][BUILD][K8S] Upgrade fabric8 to 4.10.3

### What changes were proposed in this pull request?

This PR aims to upgrade `kubernetes-client` library to track fabric8's declared compatibility for k8s 1.18.0:
https://github.com/fabric8io/kubernetes-client#compatibility-matrix

### Why are the changes needed?
According to fabric8, 4.9.2 is incompatible with k8s 1.18.0.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Not tested yet.

Closes #29888 from laflechejonathan/jlf/fabric8Ugprade.

Authored-by: jlafleche <jlafleche@palantir.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/deps/spark-deps-hadoop-2.7-hive-1.2       | 28 +++++++++++++++----
 dev/deps/spark-deps-hadoop-2.7-hive-2.3       | 28 +++++++++++++++----
 dev/deps/spark-deps-hadoop-3.2-hive-2.3       | 28 +++++++++++++++----
 resource-managers/kubernetes/core/pom.xml     |  2 +-
 .../kubernetes/integration-tests/pom.xml      |  2 +-
 5 files changed, 71 insertions(+), 17 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-1.2 b/dev/deps/spark-deps-hadoop-2.7-hive-1.2
index 900ee6d18d06d..fef1a6442cd33 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-1.2
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-1.2
@@ -139,14 +139,31 @@ jsr305/3.0.0//jsr305-3.0.0.jar
 jta/1.1//jta-1.1.jar
 jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar
 kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
-kubernetes-client/4.9.2//kubernetes-client-4.9.2.jar
-kubernetes-model-common/4.9.2//kubernetes-model-common-4.9.2.jar
-kubernetes-model/4.9.2//kubernetes-model-4.9.2.jar
+kubernetes-client/4.10.3//kubernetes-client-4.10.3.jar
+kubernetes-model-admissionregistration/4.10.3//kubernetes-model-admissionregistration-4.10.3.jar
+kubernetes-model-apiextensions/4.10.3//kubernetes-model-apiextensions-4.10.3.jar
+kubernetes-model-apps/4.10.3//kubernetes-model-apps-4.10.3.jar
+kubernetes-model-autoscaling/4.10.3//kubernetes-model-autoscaling-4.10.3.jar
+kubernetes-model-batch/4.10.3//kubernetes-model-batch-4.10.3.jar
+kubernetes-model-certificates/4.10.3//kubernetes-model-certificates-4.10.3.jar
+kubernetes-model-common/4.10.3//kubernetes-model-common-4.10.3.jar
+kubernetes-model-coordination/4.10.3//kubernetes-model-coordination-4.10.3.jar
+kubernetes-model-core/4.10.3//kubernetes-model-core-4.10.3.jar
+kubernetes-model-discovery/4.10.3//kubernetes-model-discovery-4.10.3.jar
+kubernetes-model-events/4.10.3//kubernetes-model-events-4.10.3.jar
+kubernetes-model-extensions/4.10.3//kubernetes-model-extensions-4.10.3.jar
+kubernetes-model-metrics/4.10.3//kubernetes-model-metrics-4.10.3.jar
+kubernetes-model-networking/4.10.3//kubernetes-model-networking-4.10.3.jar
+kubernetes-model-policy/4.10.3//kubernetes-model-policy-4.10.3.jar
+kubernetes-model-rbac/4.10.3//kubernetes-model-rbac-4.10.3.jar
+kubernetes-model-scheduling/4.10.3//kubernetes-model-scheduling-4.10.3.jar
+kubernetes-model-settings/4.10.3//kubernetes-model-settings-4.10.3.jar
+kubernetes-model-storageclass/4.10.3//kubernetes-model-storageclass-4.10.3.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
 libthrift/0.12.0//libthrift-0.12.0.jar
 log4j/1.2.17//log4j-1.2.17.jar
-logging-interceptor/3.12.6//logging-interceptor-3.12.6.jar
+logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar
 lz4-java/1.7.1//lz4-java-1.7.1.jar
 machinist_2.12/0.6.8//machinist_2.12-0.6.8.jar
 macro-compat_2.12/1.1.1//macro-compat_2.12-1.1.1.jar
@@ -159,9 +176,10 @@ metrics-jvm/4.1.1//metrics-jvm-4.1.1.jar
 minlog/1.3.0//minlog-1.3.0.jar
 netty-all/4.1.51.Final//netty-all-4.1.51.Final.jar
 objenesis/2.6//objenesis-2.6.jar
-okhttp/3.12.6//okhttp-3.12.6.jar
+okhttp/3.12.12//okhttp-3.12.12.jar
 okio/1.14.0//okio-1.14.0.jar
 opencsv/2.3//opencsv-2.3.jar
+openshift-model/4.10.3//openshift-model-4.10.3.jar
 orc-core/1.5.10/nohive/orc-core-1.5.10-nohive.jar
 orc-mapreduce/1.5.10/nohive/orc-mapreduce-1.5.10-nohive.jar
 orc-shims/1.5.10//orc-shims-1.5.10.jar
diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index 9e167256236c0..6d1934b46261b 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -154,14 +154,31 @@ jsr305/3.0.0//jsr305-3.0.0.jar
 jta/1.1//jta-1.1.jar
 jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar
 kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
-kubernetes-client/4.9.2//kubernetes-client-4.9.2.jar
-kubernetes-model-common/4.9.2//kubernetes-model-common-4.9.2.jar
-kubernetes-model/4.9.2//kubernetes-model-4.9.2.jar
+kubernetes-client/4.10.3//kubernetes-client-4.10.3.jar
+kubernetes-model-admissionregistration/4.10.3//kubernetes-model-admissionregistration-4.10.3.jar
+kubernetes-model-apiextensions/4.10.3//kubernetes-model-apiextensions-4.10.3.jar
+kubernetes-model-apps/4.10.3//kubernetes-model-apps-4.10.3.jar
+kubernetes-model-autoscaling/4.10.3//kubernetes-model-autoscaling-4.10.3.jar
+kubernetes-model-batch/4.10.3//kubernetes-model-batch-4.10.3.jar
+kubernetes-model-certificates/4.10.3//kubernetes-model-certificates-4.10.3.jar
+kubernetes-model-common/4.10.3//kubernetes-model-common-4.10.3.jar
+kubernetes-model-coordination/4.10.3//kubernetes-model-coordination-4.10.3.jar
+kubernetes-model-core/4.10.3//kubernetes-model-core-4.10.3.jar
+kubernetes-model-discovery/4.10.3//kubernetes-model-discovery-4.10.3.jar
+kubernetes-model-events/4.10.3//kubernetes-model-events-4.10.3.jar
+kubernetes-model-extensions/4.10.3//kubernetes-model-extensions-4.10.3.jar
+kubernetes-model-metrics/4.10.3//kubernetes-model-metrics-4.10.3.jar
+kubernetes-model-networking/4.10.3//kubernetes-model-networking-4.10.3.jar
+kubernetes-model-policy/4.10.3//kubernetes-model-policy-4.10.3.jar
+kubernetes-model-rbac/4.10.3//kubernetes-model-rbac-4.10.3.jar
+kubernetes-model-scheduling/4.10.3//kubernetes-model-scheduling-4.10.3.jar
+kubernetes-model-settings/4.10.3//kubernetes-model-settings-4.10.3.jar
+kubernetes-model-storageclass/4.10.3//kubernetes-model-storageclass-4.10.3.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
 libthrift/0.12.0//libthrift-0.12.0.jar
 log4j/1.2.17//log4j-1.2.17.jar
-logging-interceptor/3.12.6//logging-interceptor-3.12.6.jar
+logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar
 lz4-java/1.7.1//lz4-java-1.7.1.jar
 machinist_2.12/0.6.8//machinist_2.12-0.6.8.jar
 macro-compat_2.12/1.1.1//macro-compat_2.12-1.1.1.jar
@@ -174,9 +191,10 @@ metrics-jvm/4.1.1//metrics-jvm-4.1.1.jar
 minlog/1.3.0//minlog-1.3.0.jar
 netty-all/4.1.51.Final//netty-all-4.1.51.Final.jar
 objenesis/2.6//objenesis-2.6.jar
-okhttp/3.12.6//okhttp-3.12.6.jar
+okhttp/3.12.12//okhttp-3.12.12.jar
 okio/1.14.0//okio-1.14.0.jar
 opencsv/2.3//opencsv-2.3.jar
+openshift-model/4.10.3//openshift-model-4.10.3.jar
 orc-core/1.5.10//orc-core-1.5.10.jar
 orc-mapreduce/1.5.10//orc-mapreduce-1.5.10.jar
 orc-shims/1.5.10//orc-shims-1.5.10.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index b44b461014cd7..2e29d831b9e66 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -166,14 +166,31 @@ kerby-pkix/1.0.1//kerby-pkix-1.0.1.jar
 kerby-util/1.0.1//kerby-util-1.0.1.jar
 kerby-xdr/1.0.1//kerby-xdr-1.0.1.jar
 kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
-kubernetes-client/4.9.2//kubernetes-client-4.9.2.jar
-kubernetes-model-common/4.9.2//kubernetes-model-common-4.9.2.jar
-kubernetes-model/4.9.2//kubernetes-model-4.9.2.jar
+kubernetes-client/4.10.3//kubernetes-client-4.10.3.jar
+kubernetes-model-admissionregistration/4.10.3//kubernetes-model-admissionregistration-4.10.3.jar
+kubernetes-model-apiextensions/4.10.3//kubernetes-model-apiextensions-4.10.3.jar
+kubernetes-model-apps/4.10.3//kubernetes-model-apps-4.10.3.jar
+kubernetes-model-autoscaling/4.10.3//kubernetes-model-autoscaling-4.10.3.jar
+kubernetes-model-batch/4.10.3//kubernetes-model-batch-4.10.3.jar
+kubernetes-model-certificates/4.10.3//kubernetes-model-certificates-4.10.3.jar
+kubernetes-model-common/4.10.3//kubernetes-model-common-4.10.3.jar
+kubernetes-model-coordination/4.10.3//kubernetes-model-coordination-4.10.3.jar
+kubernetes-model-core/4.10.3//kubernetes-model-core-4.10.3.jar
+kubernetes-model-discovery/4.10.3//kubernetes-model-discovery-4.10.3.jar
+kubernetes-model-events/4.10.3//kubernetes-model-events-4.10.3.jar
+kubernetes-model-extensions/4.10.3//kubernetes-model-extensions-4.10.3.jar
+kubernetes-model-metrics/4.10.3//kubernetes-model-metrics-4.10.3.jar
+kubernetes-model-networking/4.10.3//kubernetes-model-networking-4.10.3.jar
+kubernetes-model-policy/4.10.3//kubernetes-model-policy-4.10.3.jar
+kubernetes-model-rbac/4.10.3//kubernetes-model-rbac-4.10.3.jar
+kubernetes-model-scheduling/4.10.3//kubernetes-model-scheduling-4.10.3.jar
+kubernetes-model-settings/4.10.3//kubernetes-model-settings-4.10.3.jar
+kubernetes-model-storageclass/4.10.3//kubernetes-model-storageclass-4.10.3.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
 libthrift/0.12.0//libthrift-0.12.0.jar
 log4j/1.2.17//log4j-1.2.17.jar
-logging-interceptor/3.12.6//logging-interceptor-3.12.6.jar
+logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar
 lz4-java/1.7.1//lz4-java-1.7.1.jar
 machinist_2.12/0.6.8//machinist_2.12-0.6.8.jar
 macro-compat_2.12/1.1.1//macro-compat_2.12-1.1.1.jar
@@ -188,9 +205,10 @@ netty-all/4.1.51.Final//netty-all-4.1.51.Final.jar
 nimbus-jose-jwt/4.41.1//nimbus-jose-jwt-4.41.1.jar
 objenesis/2.6//objenesis-2.6.jar
 okhttp/2.7.5//okhttp-2.7.5.jar
-okhttp/3.12.6//okhttp-3.12.6.jar
+okhttp/3.12.12//okhttp-3.12.12.jar
 okio/1.14.0//okio-1.14.0.jar
 opencsv/2.3//opencsv-2.3.jar
+openshift-model/4.10.3//openshift-model-4.10.3.jar
 orc-core/1.5.10//orc-core-1.5.10.jar
 orc-mapreduce/1.5.10//orc-mapreduce-1.5.10.jar
 orc-shims/1.5.10//orc-shims-1.5.10.jar
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index c1a7dafb69c46..a4c80f551cdfc 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -30,7 +30,7 @@
   <properties>
     <sbt.project.name>kubernetes</sbt.project.name>
     <!-- Note: Please update the kubernetes client version in kubernetes/integration-tests/pom.xml -->
-    <kubernetes.client.version>4.9.2</kubernetes.client.version>
+    <kubernetes.client.version>4.10.3</kubernetes.client.version>
   </properties>
 
   <dependencies>
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 4a55ead38aae2..952081030f5f3 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -28,7 +28,7 @@
   <properties>
     <download-maven-plugin.version>1.3.0</download-maven-plugin.version>
     <extraScalaTestArgs></extraScalaTestArgs>
-    <kubernetes-client.version>4.9.2</kubernetes-client.version>
+    <kubernetes-client.version>4.10.3</kubernetes-client.version>
     <sbt.project.name>kubernetes-integration-tests</sbt.project.name>
 
     <!-- Integration Test Configuration Properties -->

From 0b5a379c1fb87aa536ebe9433e501dcf4f80ea60 Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Wed, 30 Sep 2020 19:24:50 -0700
Subject: [PATCH 0136/1009] [SPARK-33023][CORE] Judge  path of Windows need 
 add condition `Utils.isWindows`

### What changes were proposed in this pull request?
according to  https://github.com/apache/spark/pull/29881#discussion_r496648397
we need add condition `Utils.isWindows`

### Why are the changes needed?
add strict condition of judging path is window path

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
No

Closes #29909 from AngersZhuuuu/SPARK-33023.

Authored-by: angerszhu <angers.zhu@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 core/src/main/scala/org/apache/spark/SparkContext.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 409e3065492b0..501e865c4105a 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1899,7 +1899,7 @@ class SparkContext(config: SparkConf) extends Logging {
     if (path == null || path.isEmpty) {
       logWarning("null or empty path specified as parameter to addJar")
     } else {
-      val key = if (path.contains("\\")) {
+      val key = if (path.contains("\\") && Utils.isWindows) {
         // For local paths with backslashes on Windows, URI throws an exception
         addLocalJarFile(new File(path))
       } else {

From 28ed3a512ac6fcaafa885eb8092a68fe9e8f5c26 Mon Sep 17 00:00:00 2001
From: Peter Toth <peter.toth@gmail.com>
Date: Wed, 30 Sep 2020 21:30:17 -0700
Subject: [PATCH 0137/1009] [SPARK-32723][WEBUI] Upgrade to jQuery 3.5.1

### What changes were proposed in this pull request?
Upgrade to the latest available version of jQuery (3.5.1).

### Why are the changes needed?
There are some CVE-s reported (CVE-2020-11022, CVE-2020-11023) affecting older versions of jQuery. Although Spark UI is read-only and those CVEs doesn't seem to affect Spark, using the latest version of this library can help to handle vulnerability reports of security scans.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Manual tests and checked the jQuery 3.5 upgrade guide.

Closes #29902 from peter-toth/SPARK-32723-upgrade-to-jquery-3.5.1.

Authored-by: Peter Toth <peter.toth@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../resources/org/apache/spark/ui/static/jquery-3.4.1.min.js    | 2 --
 .../resources/org/apache/spark/ui/static/jquery-3.5.1.min.js    | 2 ++
 core/src/main/scala/org/apache/spark/ui/UIUtils.scala           | 2 +-
 dev/.rat-excludes                                               | 2 +-
 docs/_layouts/global.html                                       | 2 +-
 docs/js/vendor/jquery-3.4.1.min.js                              | 2 --
 docs/js/vendor/jquery-3.5.1.min.js                              | 2 ++
 7 files changed, 7 insertions(+), 7 deletions(-)
 delete mode 100644 core/src/main/resources/org/apache/spark/ui/static/jquery-3.4.1.min.js
 create mode 100644 core/src/main/resources/org/apache/spark/ui/static/jquery-3.5.1.min.js
 delete mode 100644 docs/js/vendor/jquery-3.4.1.min.js
 create mode 100644 docs/js/vendor/jquery-3.5.1.min.js

diff --git a/core/src/main/resources/org/apache/spark/ui/static/jquery-3.4.1.min.js b/core/src/main/resources/org/apache/spark/ui/static/jquery-3.4.1.min.js
deleted file mode 100644
index 07c00cd227da0..0000000000000
--- a/core/src/main/resources/org/apache/spark/ui/static/jquery-3.4.1.min.js
+++ /dev/null
@@ -1,2 +0,0 @@
-/*! jQuery v3.4.1 | (c) JS Foundation and other contributors | jquery.org/license */
-!function(e,t){"use strict";"object"==typeof module&&"object"==typeof module.exports?module.exports=e.document?t(e,!0):function(e){if(!e.document)throw new Error("jQuery requires a window with a document");return t(e)}:t(e)}("undefined"!=typeof window?window:this,function(C,e){"use strict";var t=[],E=C.document,r=Object.getPrototypeOf,s=t.slice,g=t.concat,u=t.push,i=t.indexOf,n={},o=n.toString,v=n.hasOwnProperty,a=v.toString,l=a.call(Object),y={},m=function(e){return"function"==typeof e&&"number"!=typeof e.nodeType},x=function(e){return null!=e&&e===e.window},c={type:!0,src:!0,nonce:!0,noModule:!0};function b(e,t,n){var r,i,o=(n=n||E).createElement("script");if(o.text=e,t)for(r in c)(i=t[r]||t.getAttribute&&t.getAttribute(r))&&o.setAttribute(r,i);n.head.appendChild(o).parentNode.removeChild(o)}function w(e){return null==e?e+"":"object"==typeof e||"function"==typeof e?n[o.call(e)]||"object":typeof e}var f="3.4.1",k=function(e,t){return new k.fn.init(e,t)},p=/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g;function d(e){var t=!!e&&"length"in e&&e.length,n=w(e);return!m(e)&&!x(e)&&("array"===n||0===t||"number"==typeof t&&0<t&&t-1 in e)}k.fn=k.prototype={jquery:f,constructor:k,length:0,toArray:function(){return s.call(this)},get:function(e){return null==e?s.call(this):e<0?this[e+this.length]:this[e]},pushStack:function(e){var t=k.merge(this.constructor(),e);return t.prevObject=this,t},each:function(e){return k.each(this,e)},map:function(n){return this.pushStack(k.map(this,function(e,t){return n.call(e,t,e)}))},slice:function(){return this.pushStack(s.apply(this,arguments))},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},eq:function(e){var t=this.length,n=+e+(e<0?t:0);return this.pushStack(0<=n&&n<t?[this[n]]:[])},end:function(){return this.prevObject||this.constructor()},push:u,sort:t.sort,splice:t.splice},k.extend=k.fn.extend=function(){var e,t,n,r,i,o,a=arguments[0]||{},s=1,u=arguments.length,l=!1;for("boolean"==typeof a&&(l=a,a=arguments[s]||{},s++),"object"==typeof a||m(a)||(a={}),s===u&&(a=this,s--);s<u;s++)if(null!=(e=arguments[s]))for(t in e)r=e[t],"__proto__"!==t&&a!==r&&(l&&r&&(k.isPlainObject(r)||(i=Array.isArray(r)))?(n=a[t],o=i&&!Array.isArray(n)?[]:i||k.isPlainObject(n)?n:{},i=!1,a[t]=k.extend(l,o,r)):void 0!==r&&(a[t]=r));return a},k.extend({expando:"jQuery"+(f+Math.random()).replace(/\D/g,""),isReady:!0,error:function(e){throw new Error(e)},noop:function(){},isPlainObject:function(e){var t,n;return!(!e||"[object Object]"!==o.call(e))&&(!(t=r(e))||"function"==typeof(n=v.call(t,"constructor")&&t.constructor)&&a.call(n)===l)},isEmptyObject:function(e){var t;for(t in e)return!1;return!0},globalEval:function(e,t){b(e,{nonce:t&&t.nonce})},each:function(e,t){var n,r=0;if(d(e)){for(n=e.length;r<n;r++)if(!1===t.call(e[r],r,e[r]))break}else for(r in e)if(!1===t.call(e[r],r,e[r]))break;return e},trim:function(e){return null==e?"":(e+"").replace(p,"")},makeArray:function(e,t){var n=t||[];return null!=e&&(d(Object(e))?k.merge(n,"string"==typeof e?[e]:e):u.call(n,e)),n},inArray:function(e,t,n){return null==t?-1:i.call(t,e,n)},merge:function(e,t){for(var n=+t.length,r=0,i=e.length;r<n;r++)e[i++]=t[r];return e.length=i,e},grep:function(e,t,n){for(var r=[],i=0,o=e.length,a=!n;i<o;i++)!t(e[i],i)!==a&&r.push(e[i]);return r},map:function(e,t,n){var r,i,o=0,a=[];if(d(e))for(r=e.length;o<r;o++)null!=(i=t(e[o],o,n))&&a.push(i);else for(o in e)null!=(i=t(e[o],o,n))&&a.push(i);return g.apply([],a)},guid:1,support:y}),"function"==typeof Symbol&&(k.fn[Symbol.iterator]=t[Symbol.iterator]),k.each("Boolean Number String Function Array Date RegExp Object Error Symbol".split(" "),function(e,t){n["[object "+t+"]"]=t.toLowerCase()});var h=function(n){var e,d,b,o,i,h,f,g,w,u,l,T,C,a,E,v,s,c,y,k="sizzle"+1*new Date,m=n.document,S=0,r=0,p=ue(),x=ue(),N=ue(),A=ue(),D=function(e,t){return e===t&&(l=!0),0},j={}.hasOwnProperty,t=[],q=t.pop,L=t.push,H=t.push,O=t.slice,P=function(e,t){for(var n=0,r=e.length;n<r;n++)if(e[n]===t)return n;return-1},R="checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|ismap|loop|multiple|open|readonly|required|scoped",M="[\\x20\\t\\r\\n\\f]",I="(?:\\\\.|[\\w-]|[^\0-\\xa0])+",W="\\["+M+"*("+I+")(?:"+M+"*([*^$|!~]?=)"+M+"*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|("+I+"))|)"+M+"*\\]",$=":("+I+")(?:\\((('((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\")|((?:\\\\.|[^\\\\()[\\]]|"+W+")*)|.*)\\)|)",F=new RegExp(M+"+","g"),B=new RegExp("^"+M+"+|((?:^|[^\\\\])(?:\\\\.)*)"+M+"+$","g"),_=new RegExp("^"+M+"*,"+M+"*"),z=new RegExp("^"+M+"*([>+~]|"+M+")"+M+"*"),U=new RegExp(M+"|>"),X=new RegExp($),V=new RegExp("^"+I+"$"),G={ID:new RegExp("^#("+I+")"),CLASS:new RegExp("^\\.("+I+")"),TAG:new RegExp("^("+I+"|[*])"),ATTR:new RegExp("^"+W),PSEUDO:new RegExp("^"+$),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+M+"*(even|odd|(([+-]|)(\\d*)n|)"+M+"*(?:([+-]|)"+M+"*(\\d+)|))"+M+"*\\)|)","i"),bool:new RegExp("^(?:"+R+")$","i"),needsContext:new RegExp("^"+M+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+M+"*((?:-\\d)?\\d*)"+M+"*\\)|)(?=[^-]|$)","i")},Y=/HTML$/i,Q=/^(?:input|select|textarea|button)$/i,J=/^h\d$/i,K=/^[^{]+\{\s*\[native \w/,Z=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,ee=/[+~]/,te=new RegExp("\\\\([\\da-f]{1,6}"+M+"?|("+M+")|.)","ig"),ne=function(e,t,n){var r="0x"+t-65536;return r!=r||n?t:r<0?String.fromCharCode(r+65536):String.fromCharCode(r>>10|55296,1023&r|56320)},re=/([\0-\x1f\x7f]|^-?\d)|^-$|[^\0-\x1f\x7f-\uFFFF\w-]/g,ie=function(e,t){return t?"\0"===e?"\ufffd":e.slice(0,-1)+"\\"+e.charCodeAt(e.length-1).toString(16)+" ":"\\"+e},oe=function(){T()},ae=be(function(e){return!0===e.disabled&&"fieldset"===e.nodeName.toLowerCase()},{dir:"parentNode",next:"legend"});try{H.apply(t=O.call(m.childNodes),m.childNodes),t[m.childNodes.length].nodeType}catch(e){H={apply:t.length?function(e,t){L.apply(e,O.call(t))}:function(e,t){var n=e.length,r=0;while(e[n++]=t[r++]);e.length=n-1}}}function se(t,e,n,r){var i,o,a,s,u,l,c,f=e&&e.ownerDocument,p=e?e.nodeType:9;if(n=n||[],"string"!=typeof t||!t||1!==p&&9!==p&&11!==p)return n;if(!r&&((e?e.ownerDocument||e:m)!==C&&T(e),e=e||C,E)){if(11!==p&&(u=Z.exec(t)))if(i=u[1]){if(9===p){if(!(a=e.getElementById(i)))return n;if(a.id===i)return n.push(a),n}else if(f&&(a=f.getElementById(i))&&y(e,a)&&a.id===i)return n.push(a),n}else{if(u[2])return H.apply(n,e.getElementsByTagName(t)),n;if((i=u[3])&&d.getElementsByClassName&&e.getElementsByClassName)return H.apply(n,e.getElementsByClassName(i)),n}if(d.qsa&&!A[t+" "]&&(!v||!v.test(t))&&(1!==p||"object"!==e.nodeName.toLowerCase())){if(c=t,f=e,1===p&&U.test(t)){(s=e.getAttribute("id"))?s=s.replace(re,ie):e.setAttribute("id",s=k),o=(l=h(t)).length;while(o--)l[o]="#"+s+" "+xe(l[o]);c=l.join(","),f=ee.test(t)&&ye(e.parentNode)||e}try{return H.apply(n,f.querySelectorAll(c)),n}catch(e){A(t,!0)}finally{s===k&&e.removeAttribute("id")}}}return g(t.replace(B,"$1"),e,n,r)}function ue(){var r=[];return function e(t,n){return r.push(t+" ")>b.cacheLength&&delete e[r.shift()],e[t+" "]=n}}function le(e){return e[k]=!0,e}function ce(e){var t=C.createElement("fieldset");try{return!!e(t)}catch(e){return!1}finally{t.parentNode&&t.parentNode.removeChild(t),t=null}}function fe(e,t){var n=e.split("|"),r=n.length;while(r--)b.attrHandle[n[r]]=t}function pe(e,t){var n=t&&e,r=n&&1===e.nodeType&&1===t.nodeType&&e.sourceIndex-t.sourceIndex;if(r)return r;if(n)while(n=n.nextSibling)if(n===t)return-1;return e?1:-1}function de(t){return function(e){return"input"===e.nodeName.toLowerCase()&&e.type===t}}function he(n){return function(e){var t=e.nodeName.toLowerCase();return("input"===t||"button"===t)&&e.type===n}}function ge(t){return function(e){return"form"in e?e.parentNode&&!1===e.disabled?"label"in e?"label"in e.parentNode?e.parentNode.disabled===t:e.disabled===t:e.isDisabled===t||e.isDisabled!==!t&&ae(e)===t:e.disabled===t:"label"in e&&e.disabled===t}}function ve(a){return le(function(o){return o=+o,le(function(e,t){var n,r=a([],e.length,o),i=r.length;while(i--)e[n=r[i]]&&(e[n]=!(t[n]=e[n]))})})}function ye(e){return e&&"undefined"!=typeof e.getElementsByTagName&&e}for(e in d=se.support={},i=se.isXML=function(e){var t=e.namespaceURI,n=(e.ownerDocument||e).documentElement;return!Y.test(t||n&&n.nodeName||"HTML")},T=se.setDocument=function(e){var t,n,r=e?e.ownerDocument||e:m;return r!==C&&9===r.nodeType&&r.documentElement&&(a=(C=r).documentElement,E=!i(C),m!==C&&(n=C.defaultView)&&n.top!==n&&(n.addEventListener?n.addEventListener("unload",oe,!1):n.attachEvent&&n.attachEvent("onunload",oe)),d.attributes=ce(function(e){return e.className="i",!e.getAttribute("className")}),d.getElementsByTagName=ce(function(e){return e.appendChild(C.createComment("")),!e.getElementsByTagName("*").length}),d.getElementsByClassName=K.test(C.getElementsByClassName),d.getById=ce(function(e){return a.appendChild(e).id=k,!C.getElementsByName||!C.getElementsByName(k).length}),d.getById?(b.filter.ID=function(e){var t=e.replace(te,ne);return function(e){return e.getAttribute("id")===t}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n=t.getElementById(e);return n?[n]:[]}}):(b.filter.ID=function(e){var n=e.replace(te,ne);return function(e){var t="undefined"!=typeof e.getAttributeNode&&e.getAttributeNode("id");return t&&t.value===n}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n,r,i,o=t.getElementById(e);if(o){if((n=o.getAttributeNode("id"))&&n.value===e)return[o];i=t.getElementsByName(e),r=0;while(o=i[r++])if((n=o.getAttributeNode("id"))&&n.value===e)return[o]}return[]}}),b.find.TAG=d.getElementsByTagName?function(e,t){return"undefined"!=typeof t.getElementsByTagName?t.getElementsByTagName(e):d.qsa?t.querySelectorAll(e):void 0}:function(e,t){var n,r=[],i=0,o=t.getElementsByTagName(e);if("*"===e){while(n=o[i++])1===n.nodeType&&r.push(n);return r}return o},b.find.CLASS=d.getElementsByClassName&&function(e,t){if("undefined"!=typeof t.getElementsByClassName&&E)return t.getElementsByClassName(e)},s=[],v=[],(d.qsa=K.test(C.querySelectorAll))&&(ce(function(e){a.appendChild(e).innerHTML="<a id='"+k+"'></a><select id='"+k+"-\r\\' msallowcapture=''><option selected=''></option></select>",e.querySelectorAll("[msallowcapture^='']").length&&v.push("[*^$]="+M+"*(?:''|\"\")"),e.querySelectorAll("[selected]").length||v.push("\\["+M+"*(?:value|"+R+")"),e.querySelectorAll("[id~="+k+"-]").length||v.push("~="),e.querySelectorAll(":checked").length||v.push(":checked"),e.querySelectorAll("a#"+k+"+*").length||v.push(".#.+[+~]")}),ce(function(e){e.innerHTML="<a href='' disabled='disabled'></a><select disabled='disabled'><option/></select>";var t=C.createElement("input");t.setAttribute("type","hidden"),e.appendChild(t).setAttribute("name","D"),e.querySelectorAll("[name=d]").length&&v.push("name"+M+"*[*^$|!~]?="),2!==e.querySelectorAll(":enabled").length&&v.push(":enabled",":disabled"),a.appendChild(e).disabled=!0,2!==e.querySelectorAll(":disabled").length&&v.push(":enabled",":disabled"),e.querySelectorAll("*,:x"),v.push(",.*:")})),(d.matchesSelector=K.test(c=a.matches||a.webkitMatchesSelector||a.mozMatchesSelector||a.oMatchesSelector||a.msMatchesSelector))&&ce(function(e){d.disconnectedMatch=c.call(e,"*"),c.call(e,"[s!='']:x"),s.push("!=",$)}),v=v.length&&new RegExp(v.join("|")),s=s.length&&new RegExp(s.join("|")),t=K.test(a.compareDocumentPosition),y=t||K.test(a.contains)?function(e,t){var n=9===e.nodeType?e.documentElement:e,r=t&&t.parentNode;return e===r||!(!r||1!==r.nodeType||!(n.contains?n.contains(r):e.compareDocumentPosition&&16&e.compareDocumentPosition(r)))}:function(e,t){if(t)while(t=t.parentNode)if(t===e)return!0;return!1},D=t?function(e,t){if(e===t)return l=!0,0;var n=!e.compareDocumentPosition-!t.compareDocumentPosition;return n||(1&(n=(e.ownerDocument||e)===(t.ownerDocument||t)?e.compareDocumentPosition(t):1)||!d.sortDetached&&t.compareDocumentPosition(e)===n?e===C||e.ownerDocument===m&&y(m,e)?-1:t===C||t.ownerDocument===m&&y(m,t)?1:u?P(u,e)-P(u,t):0:4&n?-1:1)}:function(e,t){if(e===t)return l=!0,0;var n,r=0,i=e.parentNode,o=t.parentNode,a=[e],s=[t];if(!i||!o)return e===C?-1:t===C?1:i?-1:o?1:u?P(u,e)-P(u,t):0;if(i===o)return pe(e,t);n=e;while(n=n.parentNode)a.unshift(n);n=t;while(n=n.parentNode)s.unshift(n);while(a[r]===s[r])r++;return r?pe(a[r],s[r]):a[r]===m?-1:s[r]===m?1:0}),C},se.matches=function(e,t){return se(e,null,null,t)},se.matchesSelector=function(e,t){if((e.ownerDocument||e)!==C&&T(e),d.matchesSelector&&E&&!A[t+" "]&&(!s||!s.test(t))&&(!v||!v.test(t)))try{var n=c.call(e,t);if(n||d.disconnectedMatch||e.document&&11!==e.document.nodeType)return n}catch(e){A(t,!0)}return 0<se(t,C,null,[e]).length},se.contains=function(e,t){return(e.ownerDocument||e)!==C&&T(e),y(e,t)},se.attr=function(e,t){(e.ownerDocument||e)!==C&&T(e);var n=b.attrHandle[t.toLowerCase()],r=n&&j.call(b.attrHandle,t.toLowerCase())?n(e,t,!E):void 0;return void 0!==r?r:d.attributes||!E?e.getAttribute(t):(r=e.getAttributeNode(t))&&r.specified?r.value:null},se.escape=function(e){return(e+"").replace(re,ie)},se.error=function(e){throw new Error("Syntax error, unrecognized expression: "+e)},se.uniqueSort=function(e){var t,n=[],r=0,i=0;if(l=!d.detectDuplicates,u=!d.sortStable&&e.slice(0),e.sort(D),l){while(t=e[i++])t===e[i]&&(r=n.push(i));while(r--)e.splice(n[r],1)}return u=null,e},o=se.getText=function(e){var t,n="",r=0,i=e.nodeType;if(i){if(1===i||9===i||11===i){if("string"==typeof e.textContent)return e.textContent;for(e=e.firstChild;e;e=e.nextSibling)n+=o(e)}else if(3===i||4===i)return e.nodeValue}else while(t=e[r++])n+=o(t);return n},(b=se.selectors={cacheLength:50,createPseudo:le,match:G,attrHandle:{},find:{},relative:{">":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(e){return e[1]=e[1].replace(te,ne),e[3]=(e[3]||e[4]||e[5]||"").replace(te,ne),"~="===e[2]&&(e[3]=" "+e[3]+" "),e.slice(0,4)},CHILD:function(e){return e[1]=e[1].toLowerCase(),"nth"===e[1].slice(0,3)?(e[3]||se.error(e[0]),e[4]=+(e[4]?e[5]+(e[6]||1):2*("even"===e[3]||"odd"===e[3])),e[5]=+(e[7]+e[8]||"odd"===e[3])):e[3]&&se.error(e[0]),e},PSEUDO:function(e){var t,n=!e[6]&&e[2];return G.CHILD.test(e[0])?null:(e[3]?e[2]=e[4]||e[5]||"":n&&X.test(n)&&(t=h(n,!0))&&(t=n.indexOf(")",n.length-t)-n.length)&&(e[0]=e[0].slice(0,t),e[2]=n.slice(0,t)),e.slice(0,3))}},filter:{TAG:function(e){var t=e.replace(te,ne).toLowerCase();return"*"===e?function(){return!0}:function(e){return e.nodeName&&e.nodeName.toLowerCase()===t}},CLASS:function(e){var t=p[e+" "];return t||(t=new RegExp("(^|"+M+")"+e+"("+M+"|$)"))&&p(e,function(e){return t.test("string"==typeof e.className&&e.className||"undefined"!=typeof e.getAttribute&&e.getAttribute("class")||"")})},ATTR:function(n,r,i){return function(e){var t=se.attr(e,n);return null==t?"!="===r:!r||(t+="","="===r?t===i:"!="===r?t!==i:"^="===r?i&&0===t.indexOf(i):"*="===r?i&&-1<t.indexOf(i):"$="===r?i&&t.slice(-i.length)===i:"~="===r?-1<(" "+t.replace(F," ")+" ").indexOf(i):"|="===r&&(t===i||t.slice(0,i.length+1)===i+"-"))}},CHILD:function(h,e,t,g,v){var y="nth"!==h.slice(0,3),m="last"!==h.slice(-4),x="of-type"===e;return 1===g&&0===v?function(e){return!!e.parentNode}:function(e,t,n){var r,i,o,a,s,u,l=y!==m?"nextSibling":"previousSibling",c=e.parentNode,f=x&&e.nodeName.toLowerCase(),p=!n&&!x,d=!1;if(c){if(y){while(l){a=e;while(a=a[l])if(x?a.nodeName.toLowerCase()===f:1===a.nodeType)return!1;u=l="only"===h&&!u&&"nextSibling"}return!0}if(u=[m?c.firstChild:c.lastChild],m&&p){d=(s=(r=(i=(o=(a=c)[k]||(a[k]={}))[a.uniqueID]||(o[a.uniqueID]={}))[h]||[])[0]===S&&r[1])&&r[2],a=s&&c.childNodes[s];while(a=++s&&a&&a[l]||(d=s=0)||u.pop())if(1===a.nodeType&&++d&&a===e){i[h]=[S,s,d];break}}else if(p&&(d=s=(r=(i=(o=(a=e)[k]||(a[k]={}))[a.uniqueID]||(o[a.uniqueID]={}))[h]||[])[0]===S&&r[1]),!1===d)while(a=++s&&a&&a[l]||(d=s=0)||u.pop())if((x?a.nodeName.toLowerCase()===f:1===a.nodeType)&&++d&&(p&&((i=(o=a[k]||(a[k]={}))[a.uniqueID]||(o[a.uniqueID]={}))[h]=[S,d]),a===e))break;return(d-=v)===g||d%g==0&&0<=d/g}}},PSEUDO:function(e,o){var t,a=b.pseudos[e]||b.setFilters[e.toLowerCase()]||se.error("unsupported pseudo: "+e);return a[k]?a(o):1<a.length?(t=[e,e,"",o],b.setFilters.hasOwnProperty(e.toLowerCase())?le(function(e,t){var n,r=a(e,o),i=r.length;while(i--)e[n=P(e,r[i])]=!(t[n]=r[i])}):function(e){return a(e,0,t)}):a}},pseudos:{not:le(function(e){var r=[],i=[],s=f(e.replace(B,"$1"));return s[k]?le(function(e,t,n,r){var i,o=s(e,null,r,[]),a=e.length;while(a--)(i=o[a])&&(e[a]=!(t[a]=i))}):function(e,t,n){return r[0]=e,s(r,null,n,i),r[0]=null,!i.pop()}}),has:le(function(t){return function(e){return 0<se(t,e).length}}),contains:le(function(t){return t=t.replace(te,ne),function(e){return-1<(e.textContent||o(e)).indexOf(t)}}),lang:le(function(n){return V.test(n||"")||se.error("unsupported lang: "+n),n=n.replace(te,ne).toLowerCase(),function(e){var t;do{if(t=E?e.lang:e.getAttribute("xml:lang")||e.getAttribute("lang"))return(t=t.toLowerCase())===n||0===t.indexOf(n+"-")}while((e=e.parentNode)&&1===e.nodeType);return!1}}),target:function(e){var t=n.location&&n.location.hash;return t&&t.slice(1)===e.id},root:function(e){return e===a},focus:function(e){return e===C.activeElement&&(!C.hasFocus||C.hasFocus())&&!!(e.type||e.href||~e.tabIndex)},enabled:ge(!1),disabled:ge(!0),checked:function(e){var t=e.nodeName.toLowerCase();return"input"===t&&!!e.checked||"option"===t&&!!e.selected},selected:function(e){return e.parentNode&&e.parentNode.selectedIndex,!0===e.selected},empty:function(e){for(e=e.firstChild;e;e=e.nextSibling)if(e.nodeType<6)return!1;return!0},parent:function(e){return!b.pseudos.empty(e)},header:function(e){return J.test(e.nodeName)},input:function(e){return Q.test(e.nodeName)},button:function(e){var t=e.nodeName.toLowerCase();return"input"===t&&"button"===e.type||"button"===t},text:function(e){var t;return"input"===e.nodeName.toLowerCase()&&"text"===e.type&&(null==(t=e.getAttribute("type"))||"text"===t.toLowerCase())},first:ve(function(){return[0]}),last:ve(function(e,t){return[t-1]}),eq:ve(function(e,t,n){return[n<0?n+t:n]}),even:ve(function(e,t){for(var n=0;n<t;n+=2)e.push(n);return e}),odd:ve(function(e,t){for(var n=1;n<t;n+=2)e.push(n);return e}),lt:ve(function(e,t,n){for(var r=n<0?n+t:t<n?t:n;0<=--r;)e.push(r);return e}),gt:ve(function(e,t,n){for(var r=n<0?n+t:n;++r<t;)e.push(r);return e})}}).pseudos.nth=b.pseudos.eq,{radio:!0,checkbox:!0,file:!0,password:!0,image:!0})b.pseudos[e]=de(e);for(e in{submit:!0,reset:!0})b.pseudos[e]=he(e);function me(){}function xe(e){for(var t=0,n=e.length,r="";t<n;t++)r+=e[t].value;return r}function be(s,e,t){var u=e.dir,l=e.next,c=l||u,f=t&&"parentNode"===c,p=r++;return e.first?function(e,t,n){while(e=e[u])if(1===e.nodeType||f)return s(e,t,n);return!1}:function(e,t,n){var r,i,o,a=[S,p];if(n){while(e=e[u])if((1===e.nodeType||f)&&s(e,t,n))return!0}else while(e=e[u])if(1===e.nodeType||f)if(i=(o=e[k]||(e[k]={}))[e.uniqueID]||(o[e.uniqueID]={}),l&&l===e.nodeName.toLowerCase())e=e[u]||e;else{if((r=i[c])&&r[0]===S&&r[1]===p)return a[2]=r[2];if((i[c]=a)[2]=s(e,t,n))return!0}return!1}}function we(i){return 1<i.length?function(e,t,n){var r=i.length;while(r--)if(!i[r](e,t,n))return!1;return!0}:i[0]}function Te(e,t,n,r,i){for(var o,a=[],s=0,u=e.length,l=null!=t;s<u;s++)(o=e[s])&&(n&&!n(o,r,i)||(a.push(o),l&&t.push(s)));return a}function Ce(d,h,g,v,y,e){return v&&!v[k]&&(v=Ce(v)),y&&!y[k]&&(y=Ce(y,e)),le(function(e,t,n,r){var i,o,a,s=[],u=[],l=t.length,c=e||function(e,t,n){for(var r=0,i=t.length;r<i;r++)se(e,t[r],n);return n}(h||"*",n.nodeType?[n]:n,[]),f=!d||!e&&h?c:Te(c,s,d,n,r),p=g?y||(e?d:l||v)?[]:t:f;if(g&&g(f,p,n,r),v){i=Te(p,u),v(i,[],n,r),o=i.length;while(o--)(a=i[o])&&(p[u[o]]=!(f[u[o]]=a))}if(e){if(y||d){if(y){i=[],o=p.length;while(o--)(a=p[o])&&i.push(f[o]=a);y(null,p=[],i,r)}o=p.length;while(o--)(a=p[o])&&-1<(i=y?P(e,a):s[o])&&(e[i]=!(t[i]=a))}}else p=Te(p===t?p.splice(l,p.length):p),y?y(null,t,p,r):H.apply(t,p)})}function Ee(e){for(var i,t,n,r=e.length,o=b.relative[e[0].type],a=o||b.relative[" "],s=o?1:0,u=be(function(e){return e===i},a,!0),l=be(function(e){return-1<P(i,e)},a,!0),c=[function(e,t,n){var r=!o&&(n||t!==w)||((i=t).nodeType?u(e,t,n):l(e,t,n));return i=null,r}];s<r;s++)if(t=b.relative[e[s].type])c=[be(we(c),t)];else{if((t=b.filter[e[s].type].apply(null,e[s].matches))[k]){for(n=++s;n<r;n++)if(b.relative[e[n].type])break;return Ce(1<s&&we(c),1<s&&xe(e.slice(0,s-1).concat({value:" "===e[s-2].type?"*":""})).replace(B,"$1"),t,s<n&&Ee(e.slice(s,n)),n<r&&Ee(e=e.slice(n)),n<r&&xe(e))}c.push(t)}return we(c)}return me.prototype=b.filters=b.pseudos,b.setFilters=new me,h=se.tokenize=function(e,t){var n,r,i,o,a,s,u,l=x[e+" "];if(l)return t?0:l.slice(0);a=e,s=[],u=b.preFilter;while(a){for(o in n&&!(r=_.exec(a))||(r&&(a=a.slice(r[0].length)||a),s.push(i=[])),n=!1,(r=z.exec(a))&&(n=r.shift(),i.push({value:n,type:r[0].replace(B," ")}),a=a.slice(n.length)),b.filter)!(r=G[o].exec(a))||u[o]&&!(r=u[o](r))||(n=r.shift(),i.push({value:n,type:o,matches:r}),a=a.slice(n.length));if(!n)break}return t?a.length:a?se.error(e):x(e,s).slice(0)},f=se.compile=function(e,t){var n,v,y,m,x,r,i=[],o=[],a=N[e+" "];if(!a){t||(t=h(e)),n=t.length;while(n--)(a=Ee(t[n]))[k]?i.push(a):o.push(a);(a=N(e,(v=o,m=0<(y=i).length,x=0<v.length,r=function(e,t,n,r,i){var o,a,s,u=0,l="0",c=e&&[],f=[],p=w,d=e||x&&b.find.TAG("*",i),h=S+=null==p?1:Math.random()||.1,g=d.length;for(i&&(w=t===C||t||i);l!==g&&null!=(o=d[l]);l++){if(x&&o){a=0,t||o.ownerDocument===C||(T(o),n=!E);while(s=v[a++])if(s(o,t||C,n)){r.push(o);break}i&&(S=h)}m&&((o=!s&&o)&&u--,e&&c.push(o))}if(u+=l,m&&l!==u){a=0;while(s=y[a++])s(c,f,t,n);if(e){if(0<u)while(l--)c[l]||f[l]||(f[l]=q.call(r));f=Te(f)}H.apply(r,f),i&&!e&&0<f.length&&1<u+y.length&&se.uniqueSort(r)}return i&&(S=h,w=p),c},m?le(r):r))).selector=e}return a},g=se.select=function(e,t,n,r){var i,o,a,s,u,l="function"==typeof e&&e,c=!r&&h(e=l.selector||e);if(n=n||[],1===c.length){if(2<(o=c[0]=c[0].slice(0)).length&&"ID"===(a=o[0]).type&&9===t.nodeType&&E&&b.relative[o[1].type]){if(!(t=(b.find.ID(a.matches[0].replace(te,ne),t)||[])[0]))return n;l&&(t=t.parentNode),e=e.slice(o.shift().value.length)}i=G.needsContext.test(e)?0:o.length;while(i--){if(a=o[i],b.relative[s=a.type])break;if((u=b.find[s])&&(r=u(a.matches[0].replace(te,ne),ee.test(o[0].type)&&ye(t.parentNode)||t))){if(o.splice(i,1),!(e=r.length&&xe(o)))return H.apply(n,r),n;break}}}return(l||f(e,c))(r,t,!E,n,!t||ee.test(e)&&ye(t.parentNode)||t),n},d.sortStable=k.split("").sort(D).join("")===k,d.detectDuplicates=!!l,T(),d.sortDetached=ce(function(e){return 1&e.compareDocumentPosition(C.createElement("fieldset"))}),ce(function(e){return e.innerHTML="<a href='#'></a>","#"===e.firstChild.getAttribute("href")})||fe("type|href|height|width",function(e,t,n){if(!n)return e.getAttribute(t,"type"===t.toLowerCase()?1:2)}),d.attributes&&ce(function(e){return e.innerHTML="<input/>",e.firstChild.setAttribute("value",""),""===e.firstChild.getAttribute("value")})||fe("value",function(e,t,n){if(!n&&"input"===e.nodeName.toLowerCase())return e.defaultValue}),ce(function(e){return null==e.getAttribute("disabled")})||fe(R,function(e,t,n){var r;if(!n)return!0===e[t]?t.toLowerCase():(r=e.getAttributeNode(t))&&r.specified?r.value:null}),se}(C);k.find=h,k.expr=h.selectors,k.expr[":"]=k.expr.pseudos,k.uniqueSort=k.unique=h.uniqueSort,k.text=h.getText,k.isXMLDoc=h.isXML,k.contains=h.contains,k.escapeSelector=h.escape;var T=function(e,t,n){var r=[],i=void 0!==n;while((e=e[t])&&9!==e.nodeType)if(1===e.nodeType){if(i&&k(e).is(n))break;r.push(e)}return r},S=function(e,t){for(var n=[];e;e=e.nextSibling)1===e.nodeType&&e!==t&&n.push(e);return n},N=k.expr.match.needsContext;function A(e,t){return e.nodeName&&e.nodeName.toLowerCase()===t.toLowerCase()}var D=/^<([a-z][^\/\0>:\x20\t\r\n\f]*)[\x20\t\r\n\f]*\/?>(?:<\/\1>|)$/i;function j(e,n,r){return m(n)?k.grep(e,function(e,t){return!!n.call(e,t,e)!==r}):n.nodeType?k.grep(e,function(e){return e===n!==r}):"string"!=typeof n?k.grep(e,function(e){return-1<i.call(n,e)!==r}):k.filter(n,e,r)}k.filter=function(e,t,n){var r=t[0];return n&&(e=":not("+e+")"),1===t.length&&1===r.nodeType?k.find.matchesSelector(r,e)?[r]:[]:k.find.matches(e,k.grep(t,function(e){return 1===e.nodeType}))},k.fn.extend({find:function(e){var t,n,r=this.length,i=this;if("string"!=typeof e)return this.pushStack(k(e).filter(function(){for(t=0;t<r;t++)if(k.contains(i[t],this))return!0}));for(n=this.pushStack([]),t=0;t<r;t++)k.find(e,i[t],n);return 1<r?k.uniqueSort(n):n},filter:function(e){return this.pushStack(j(this,e||[],!1))},not:function(e){return this.pushStack(j(this,e||[],!0))},is:function(e){return!!j(this,"string"==typeof e&&N.test(e)?k(e):e||[],!1).length}});var q,L=/^(?:\s*(<[\w\W]+>)[^>]*|#([\w-]+))$/;(k.fn.init=function(e,t,n){var r,i;if(!e)return this;if(n=n||q,"string"==typeof e){if(!(r="<"===e[0]&&">"===e[e.length-1]&&3<=e.length?[null,e,null]:L.exec(e))||!r[1]&&t)return!t||t.jquery?(t||n).find(e):this.constructor(t).find(e);if(r[1]){if(t=t instanceof k?t[0]:t,k.merge(this,k.parseHTML(r[1],t&&t.nodeType?t.ownerDocument||t:E,!0)),D.test(r[1])&&k.isPlainObject(t))for(r in t)m(this[r])?this[r](t[r]):this.attr(r,t[r]);return this}return(i=E.getElementById(r[2]))&&(this[0]=i,this.length=1),this}return e.nodeType?(this[0]=e,this.length=1,this):m(e)?void 0!==n.ready?n.ready(e):e(k):k.makeArray(e,this)}).prototype=k.fn,q=k(E);var H=/^(?:parents|prev(?:Until|All))/,O={children:!0,contents:!0,next:!0,prev:!0};function P(e,t){while((e=e[t])&&1!==e.nodeType);return e}k.fn.extend({has:function(e){var t=k(e,this),n=t.length;return this.filter(function(){for(var e=0;e<n;e++)if(k.contains(this,t[e]))return!0})},closest:function(e,t){var n,r=0,i=this.length,o=[],a="string"!=typeof e&&k(e);if(!N.test(e))for(;r<i;r++)for(n=this[r];n&&n!==t;n=n.parentNode)if(n.nodeType<11&&(a?-1<a.index(n):1===n.nodeType&&k.find.matchesSelector(n,e))){o.push(n);break}return this.pushStack(1<o.length?k.uniqueSort(o):o)},index:function(e){return e?"string"==typeof e?i.call(k(e),this[0]):i.call(this,e.jquery?e[0]:e):this[0]&&this[0].parentNode?this.first().prevAll().length:-1},add:function(e,t){return this.pushStack(k.uniqueSort(k.merge(this.get(),k(e,t))))},addBack:function(e){return this.add(null==e?this.prevObject:this.prevObject.filter(e))}}),k.each({parent:function(e){var t=e.parentNode;return t&&11!==t.nodeType?t:null},parents:function(e){return T(e,"parentNode")},parentsUntil:function(e,t,n){return T(e,"parentNode",n)},next:function(e){return P(e,"nextSibling")},prev:function(e){return P(e,"previousSibling")},nextAll:function(e){return T(e,"nextSibling")},prevAll:function(e){return T(e,"previousSibling")},nextUntil:function(e,t,n){return T(e,"nextSibling",n)},prevUntil:function(e,t,n){return T(e,"previousSibling",n)},siblings:function(e){return S((e.parentNode||{}).firstChild,e)},children:function(e){return S(e.firstChild)},contents:function(e){return"undefined"!=typeof e.contentDocument?e.contentDocument:(A(e,"template")&&(e=e.content||e),k.merge([],e.childNodes))}},function(r,i){k.fn[r]=function(e,t){var n=k.map(this,i,e);return"Until"!==r.slice(-5)&&(t=e),t&&"string"==typeof t&&(n=k.filter(t,n)),1<this.length&&(O[r]||k.uniqueSort(n),H.test(r)&&n.reverse()),this.pushStack(n)}});var R=/[^\x20\t\r\n\f]+/g;function M(e){return e}function I(e){throw e}function W(e,t,n,r){var i;try{e&&m(i=e.promise)?i.call(e).done(t).fail(n):e&&m(i=e.then)?i.call(e,t,n):t.apply(void 0,[e].slice(r))}catch(e){n.apply(void 0,[e])}}k.Callbacks=function(r){var e,n;r="string"==typeof r?(e=r,n={},k.each(e.match(R)||[],function(e,t){n[t]=!0}),n):k.extend({},r);var i,t,o,a,s=[],u=[],l=-1,c=function(){for(a=a||r.once,o=i=!0;u.length;l=-1){t=u.shift();while(++l<s.length)!1===s[l].apply(t[0],t[1])&&r.stopOnFalse&&(l=s.length,t=!1)}r.memory||(t=!1),i=!1,a&&(s=t?[]:"")},f={add:function(){return s&&(t&&!i&&(l=s.length-1,u.push(t)),function n(e){k.each(e,function(e,t){m(t)?r.unique&&f.has(t)||s.push(t):t&&t.length&&"string"!==w(t)&&n(t)})}(arguments),t&&!i&&c()),this},remove:function(){return k.each(arguments,function(e,t){var n;while(-1<(n=k.inArray(t,s,n)))s.splice(n,1),n<=l&&l--}),this},has:function(e){return e?-1<k.inArray(e,s):0<s.length},empty:function(){return s&&(s=[]),this},disable:function(){return a=u=[],s=t="",this},disabled:function(){return!s},lock:function(){return a=u=[],t||i||(s=t=""),this},locked:function(){return!!a},fireWith:function(e,t){return a||(t=[e,(t=t||[]).slice?t.slice():t],u.push(t),i||c()),this},fire:function(){return f.fireWith(this,arguments),this},fired:function(){return!!o}};return f},k.extend({Deferred:function(e){var o=[["notify","progress",k.Callbacks("memory"),k.Callbacks("memory"),2],["resolve","done",k.Callbacks("once memory"),k.Callbacks("once memory"),0,"resolved"],["reject","fail",k.Callbacks("once memory"),k.Callbacks("once memory"),1,"rejected"]],i="pending",a={state:function(){return i},always:function(){return s.done(arguments).fail(arguments),this},"catch":function(e){return a.then(null,e)},pipe:function(){var i=arguments;return k.Deferred(function(r){k.each(o,function(e,t){var n=m(i[t[4]])&&i[t[4]];s[t[1]](function(){var e=n&&n.apply(this,arguments);e&&m(e.promise)?e.promise().progress(r.notify).done(r.resolve).fail(r.reject):r[t[0]+"With"](this,n?[e]:arguments)})}),i=null}).promise()},then:function(t,n,r){var u=0;function l(i,o,a,s){return function(){var n=this,r=arguments,e=function(){var e,t;if(!(i<u)){if((e=a.apply(n,r))===o.promise())throw new TypeError("Thenable self-resolution");t=e&&("object"==typeof e||"function"==typeof e)&&e.then,m(t)?s?t.call(e,l(u,o,M,s),l(u,o,I,s)):(u++,t.call(e,l(u,o,M,s),l(u,o,I,s),l(u,o,M,o.notifyWith))):(a!==M&&(n=void 0,r=[e]),(s||o.resolveWith)(n,r))}},t=s?e:function(){try{e()}catch(e){k.Deferred.exceptionHook&&k.Deferred.exceptionHook(e,t.stackTrace),u<=i+1&&(a!==I&&(n=void 0,r=[e]),o.rejectWith(n,r))}};i?t():(k.Deferred.getStackHook&&(t.stackTrace=k.Deferred.getStackHook()),C.setTimeout(t))}}return k.Deferred(function(e){o[0][3].add(l(0,e,m(r)?r:M,e.notifyWith)),o[1][3].add(l(0,e,m(t)?t:M)),o[2][3].add(l(0,e,m(n)?n:I))}).promise()},promise:function(e){return null!=e?k.extend(e,a):a}},s={};return k.each(o,function(e,t){var n=t[2],r=t[5];a[t[1]]=n.add,r&&n.add(function(){i=r},o[3-e][2].disable,o[3-e][3].disable,o[0][2].lock,o[0][3].lock),n.add(t[3].fire),s[t[0]]=function(){return s[t[0]+"With"](this===s?void 0:this,arguments),this},s[t[0]+"With"]=n.fireWith}),a.promise(s),e&&e.call(s,s),s},when:function(e){var n=arguments.length,t=n,r=Array(t),i=s.call(arguments),o=k.Deferred(),a=function(t){return function(e){r[t]=this,i[t]=1<arguments.length?s.call(arguments):e,--n||o.resolveWith(r,i)}};if(n<=1&&(W(e,o.done(a(t)).resolve,o.reject,!n),"pending"===o.state()||m(i[t]&&i[t].then)))return o.then();while(t--)W(i[t],a(t),o.reject);return o.promise()}});var $=/^(Eval|Internal|Range|Reference|Syntax|Type|URI)Error$/;k.Deferred.exceptionHook=function(e,t){C.console&&C.console.warn&&e&&$.test(e.name)&&C.console.warn("jQuery.Deferred exception: "+e.message,e.stack,t)},k.readyException=function(e){C.setTimeout(function(){throw e})};var F=k.Deferred();function B(){E.removeEventListener("DOMContentLoaded",B),C.removeEventListener("load",B),k.ready()}k.fn.ready=function(e){return F.then(e)["catch"](function(e){k.readyException(e)}),this},k.extend({isReady:!1,readyWait:1,ready:function(e){(!0===e?--k.readyWait:k.isReady)||(k.isReady=!0)!==e&&0<--k.readyWait||F.resolveWith(E,[k])}}),k.ready.then=F.then,"complete"===E.readyState||"loading"!==E.readyState&&!E.documentElement.doScroll?C.setTimeout(k.ready):(E.addEventListener("DOMContentLoaded",B),C.addEventListener("load",B));var _=function(e,t,n,r,i,o,a){var s=0,u=e.length,l=null==n;if("object"===w(n))for(s in i=!0,n)_(e,t,s,n[s],!0,o,a);else if(void 0!==r&&(i=!0,m(r)||(a=!0),l&&(a?(t.call(e,r),t=null):(l=t,t=function(e,t,n){return l.call(k(e),n)})),t))for(;s<u;s++)t(e[s],n,a?r:r.call(e[s],s,t(e[s],n)));return i?e:l?t.call(e):u?t(e[0],n):o},z=/^-ms-/,U=/-([a-z])/g;function X(e,t){return t.toUpperCase()}function V(e){return e.replace(z,"ms-").replace(U,X)}var G=function(e){return 1===e.nodeType||9===e.nodeType||!+e.nodeType};function Y(){this.expando=k.expando+Y.uid++}Y.uid=1,Y.prototype={cache:function(e){var t=e[this.expando];return t||(t={},G(e)&&(e.nodeType?e[this.expando]=t:Object.defineProperty(e,this.expando,{value:t,configurable:!0}))),t},set:function(e,t,n){var r,i=this.cache(e);if("string"==typeof t)i[V(t)]=n;else for(r in t)i[V(r)]=t[r];return i},get:function(e,t){return void 0===t?this.cache(e):e[this.expando]&&e[this.expando][V(t)]},access:function(e,t,n){return void 0===t||t&&"string"==typeof t&&void 0===n?this.get(e,t):(this.set(e,t,n),void 0!==n?n:t)},remove:function(e,t){var n,r=e[this.expando];if(void 0!==r){if(void 0!==t){n=(t=Array.isArray(t)?t.map(V):(t=V(t))in r?[t]:t.match(R)||[]).length;while(n--)delete r[t[n]]}(void 0===t||k.isEmptyObject(r))&&(e.nodeType?e[this.expando]=void 0:delete e[this.expando])}},hasData:function(e){var t=e[this.expando];return void 0!==t&&!k.isEmptyObject(t)}};var Q=new Y,J=new Y,K=/^(?:\{[\w\W]*\}|\[[\w\W]*\])$/,Z=/[A-Z]/g;function ee(e,t,n){var r,i;if(void 0===n&&1===e.nodeType)if(r="data-"+t.replace(Z,"-$&").toLowerCase(),"string"==typeof(n=e.getAttribute(r))){try{n="true"===(i=n)||"false"!==i&&("null"===i?null:i===+i+""?+i:K.test(i)?JSON.parse(i):i)}catch(e){}J.set(e,t,n)}else n=void 0;return n}k.extend({hasData:function(e){return J.hasData(e)||Q.hasData(e)},data:function(e,t,n){return J.access(e,t,n)},removeData:function(e,t){J.remove(e,t)},_data:function(e,t,n){return Q.access(e,t,n)},_removeData:function(e,t){Q.remove(e,t)}}),k.fn.extend({data:function(n,e){var t,r,i,o=this[0],a=o&&o.attributes;if(void 0===n){if(this.length&&(i=J.get(o),1===o.nodeType&&!Q.get(o,"hasDataAttrs"))){t=a.length;while(t--)a[t]&&0===(r=a[t].name).indexOf("data-")&&(r=V(r.slice(5)),ee(o,r,i[r]));Q.set(o,"hasDataAttrs",!0)}return i}return"object"==typeof n?this.each(function(){J.set(this,n)}):_(this,function(e){var t;if(o&&void 0===e)return void 0!==(t=J.get(o,n))?t:void 0!==(t=ee(o,n))?t:void 0;this.each(function(){J.set(this,n,e)})},null,e,1<arguments.length,null,!0)},removeData:function(e){return this.each(function(){J.remove(this,e)})}}),k.extend({queue:function(e,t,n){var r;if(e)return t=(t||"fx")+"queue",r=Q.get(e,t),n&&(!r||Array.isArray(n)?r=Q.access(e,t,k.makeArray(n)):r.push(n)),r||[]},dequeue:function(e,t){t=t||"fx";var n=k.queue(e,t),r=n.length,i=n.shift(),o=k._queueHooks(e,t);"inprogress"===i&&(i=n.shift(),r--),i&&("fx"===t&&n.unshift("inprogress"),delete o.stop,i.call(e,function(){k.dequeue(e,t)},o)),!r&&o&&o.empty.fire()},_queueHooks:function(e,t){var n=t+"queueHooks";return Q.get(e,n)||Q.access(e,n,{empty:k.Callbacks("once memory").add(function(){Q.remove(e,[t+"queue",n])})})}}),k.fn.extend({queue:function(t,n){var e=2;return"string"!=typeof t&&(n=t,t="fx",e--),arguments.length<e?k.queue(this[0],t):void 0===n?this:this.each(function(){var e=k.queue(this,t,n);k._queueHooks(this,t),"fx"===t&&"inprogress"!==e[0]&&k.dequeue(this,t)})},dequeue:function(e){return this.each(function(){k.dequeue(this,e)})},clearQueue:function(e){return this.queue(e||"fx",[])},promise:function(e,t){var n,r=1,i=k.Deferred(),o=this,a=this.length,s=function(){--r||i.resolveWith(o,[o])};"string"!=typeof e&&(t=e,e=void 0),e=e||"fx";while(a--)(n=Q.get(o[a],e+"queueHooks"))&&n.empty&&(r++,n.empty.add(s));return s(),i.promise(t)}});var te=/[+-]?(?:\d*\.|)\d+(?:[eE][+-]?\d+|)/.source,ne=new RegExp("^(?:([+-])=|)("+te+")([a-z%]*)$","i"),re=["Top","Right","Bottom","Left"],ie=E.documentElement,oe=function(e){return k.contains(e.ownerDocument,e)},ae={composed:!0};ie.getRootNode&&(oe=function(e){return k.contains(e.ownerDocument,e)||e.getRootNode(ae)===e.ownerDocument});var se=function(e,t){return"none"===(e=t||e).style.display||""===e.style.display&&oe(e)&&"none"===k.css(e,"display")},ue=function(e,t,n,r){var i,o,a={};for(o in t)a[o]=e.style[o],e.style[o]=t[o];for(o in i=n.apply(e,r||[]),t)e.style[o]=a[o];return i};function le(e,t,n,r){var i,o,a=20,s=r?function(){return r.cur()}:function(){return k.css(e,t,"")},u=s(),l=n&&n[3]||(k.cssNumber[t]?"":"px"),c=e.nodeType&&(k.cssNumber[t]||"px"!==l&&+u)&&ne.exec(k.css(e,t));if(c&&c[3]!==l){u/=2,l=l||c[3],c=+u||1;while(a--)k.style(e,t,c+l),(1-o)*(1-(o=s()/u||.5))<=0&&(a=0),c/=o;c*=2,k.style(e,t,c+l),n=n||[]}return n&&(c=+c||+u||0,i=n[1]?c+(n[1]+1)*n[2]:+n[2],r&&(r.unit=l,r.start=c,r.end=i)),i}var ce={};function fe(e,t){for(var n,r,i,o,a,s,u,l=[],c=0,f=e.length;c<f;c++)(r=e[c]).style&&(n=r.style.display,t?("none"===n&&(l[c]=Q.get(r,"display")||null,l[c]||(r.style.display="")),""===r.style.display&&se(r)&&(l[c]=(u=a=o=void 0,a=(i=r).ownerDocument,s=i.nodeName,(u=ce[s])||(o=a.body.appendChild(a.createElement(s)),u=k.css(o,"display"),o.parentNode.removeChild(o),"none"===u&&(u="block"),ce[s]=u)))):"none"!==n&&(l[c]="none",Q.set(r,"display",n)));for(c=0;c<f;c++)null!=l[c]&&(e[c].style.display=l[c]);return e}k.fn.extend({show:function(){return fe(this,!0)},hide:function(){return fe(this)},toggle:function(e){return"boolean"==typeof e?e?this.show():this.hide():this.each(function(){se(this)?k(this).show():k(this).hide()})}});var pe=/^(?:checkbox|radio)$/i,de=/<([a-z][^\/\0>\x20\t\r\n\f]*)/i,he=/^$|^module$|\/(?:java|ecma)script/i,ge={option:[1,"<select multiple='multiple'>","</select>"],thead:[1,"<table>","</table>"],col:[2,"<table><colgroup>","</colgroup></table>"],tr:[2,"<table><tbody>","</tbody></table>"],td:[3,"<table><tbody><tr>","</tr></tbody></table>"],_default:[0,"",""]};function ve(e,t){var n;return n="undefined"!=typeof e.getElementsByTagName?e.getElementsByTagName(t||"*"):"undefined"!=typeof e.querySelectorAll?e.querySelectorAll(t||"*"):[],void 0===t||t&&A(e,t)?k.merge([e],n):n}function ye(e,t){for(var n=0,r=e.length;n<r;n++)Q.set(e[n],"globalEval",!t||Q.get(t[n],"globalEval"))}ge.optgroup=ge.option,ge.tbody=ge.tfoot=ge.colgroup=ge.caption=ge.thead,ge.th=ge.td;var me,xe,be=/<|&#?\w+;/;function we(e,t,n,r,i){for(var o,a,s,u,l,c,f=t.createDocumentFragment(),p=[],d=0,h=e.length;d<h;d++)if((o=e[d])||0===o)if("object"===w(o))k.merge(p,o.nodeType?[o]:o);else if(be.test(o)){a=a||f.appendChild(t.createElement("div")),s=(de.exec(o)||["",""])[1].toLowerCase(),u=ge[s]||ge._default,a.innerHTML=u[1]+k.htmlPrefilter(o)+u[2],c=u[0];while(c--)a=a.lastChild;k.merge(p,a.childNodes),(a=f.firstChild).textContent=""}else p.push(t.createTextNode(o));f.textContent="",d=0;while(o=p[d++])if(r&&-1<k.inArray(o,r))i&&i.push(o);else if(l=oe(o),a=ve(f.appendChild(o),"script"),l&&ye(a),n){c=0;while(o=a[c++])he.test(o.type||"")&&n.push(o)}return f}me=E.createDocumentFragment().appendChild(E.createElement("div")),(xe=E.createElement("input")).setAttribute("type","radio"),xe.setAttribute("checked","checked"),xe.setAttribute("name","t"),me.appendChild(xe),y.checkClone=me.cloneNode(!0).cloneNode(!0).lastChild.checked,me.innerHTML="<textarea>x</textarea>",y.noCloneChecked=!!me.cloneNode(!0).lastChild.defaultValue;var Te=/^key/,Ce=/^(?:mouse|pointer|contextmenu|drag|drop)|click/,Ee=/^([^.]*)(?:\.(.+)|)/;function ke(){return!0}function Se(){return!1}function Ne(e,t){return e===function(){try{return E.activeElement}catch(e){}}()==("focus"===t)}function Ae(e,t,n,r,i,o){var a,s;if("object"==typeof t){for(s in"string"!=typeof n&&(r=r||n,n=void 0),t)Ae(e,s,n,r,t[s],o);return e}if(null==r&&null==i?(i=n,r=n=void 0):null==i&&("string"==typeof n?(i=r,r=void 0):(i=r,r=n,n=void 0)),!1===i)i=Se;else if(!i)return e;return 1===o&&(a=i,(i=function(e){return k().off(e),a.apply(this,arguments)}).guid=a.guid||(a.guid=k.guid++)),e.each(function(){k.event.add(this,t,i,r,n)})}function De(e,i,o){o?(Q.set(e,i,!1),k.event.add(e,i,{namespace:!1,handler:function(e){var t,n,r=Q.get(this,i);if(1&e.isTrigger&&this[i]){if(r.length)(k.event.special[i]||{}).delegateType&&e.stopPropagation();else if(r=s.call(arguments),Q.set(this,i,r),t=o(this,i),this[i](),r!==(n=Q.get(this,i))||t?Q.set(this,i,!1):n={},r!==n)return e.stopImmediatePropagation(),e.preventDefault(),n.value}else r.length&&(Q.set(this,i,{value:k.event.trigger(k.extend(r[0],k.Event.prototype),r.slice(1),this)}),e.stopImmediatePropagation())}})):void 0===Q.get(e,i)&&k.event.add(e,i,ke)}k.event={global:{},add:function(t,e,n,r,i){var o,a,s,u,l,c,f,p,d,h,g,v=Q.get(t);if(v){n.handler&&(n=(o=n).handler,i=o.selector),i&&k.find.matchesSelector(ie,i),n.guid||(n.guid=k.guid++),(u=v.events)||(u=v.events={}),(a=v.handle)||(a=v.handle=function(e){return"undefined"!=typeof k&&k.event.triggered!==e.type?k.event.dispatch.apply(t,arguments):void 0}),l=(e=(e||"").match(R)||[""]).length;while(l--)d=g=(s=Ee.exec(e[l])||[])[1],h=(s[2]||"").split(".").sort(),d&&(f=k.event.special[d]||{},d=(i?f.delegateType:f.bindType)||d,f=k.event.special[d]||{},c=k.extend({type:d,origType:g,data:r,handler:n,guid:n.guid,selector:i,needsContext:i&&k.expr.match.needsContext.test(i),namespace:h.join(".")},o),(p=u[d])||((p=u[d]=[]).delegateCount=0,f.setup&&!1!==f.setup.call(t,r,h,a)||t.addEventListener&&t.addEventListener(d,a)),f.add&&(f.add.call(t,c),c.handler.guid||(c.handler.guid=n.guid)),i?p.splice(p.delegateCount++,0,c):p.push(c),k.event.global[d]=!0)}},remove:function(e,t,n,r,i){var o,a,s,u,l,c,f,p,d,h,g,v=Q.hasData(e)&&Q.get(e);if(v&&(u=v.events)){l=(t=(t||"").match(R)||[""]).length;while(l--)if(d=g=(s=Ee.exec(t[l])||[])[1],h=(s[2]||"").split(".").sort(),d){f=k.event.special[d]||{},p=u[d=(r?f.delegateType:f.bindType)||d]||[],s=s[2]&&new RegExp("(^|\\.)"+h.join("\\.(?:.*\\.|)")+"(\\.|$)"),a=o=p.length;while(o--)c=p[o],!i&&g!==c.origType||n&&n.guid!==c.guid||s&&!s.test(c.namespace)||r&&r!==c.selector&&("**"!==r||!c.selector)||(p.splice(o,1),c.selector&&p.delegateCount--,f.remove&&f.remove.call(e,c));a&&!p.length&&(f.teardown&&!1!==f.teardown.call(e,h,v.handle)||k.removeEvent(e,d,v.handle),delete u[d])}else for(d in u)k.event.remove(e,d+t[l],n,r,!0);k.isEmptyObject(u)&&Q.remove(e,"handle events")}},dispatch:function(e){var t,n,r,i,o,a,s=k.event.fix(e),u=new Array(arguments.length),l=(Q.get(this,"events")||{})[s.type]||[],c=k.event.special[s.type]||{};for(u[0]=s,t=1;t<arguments.length;t++)u[t]=arguments[t];if(s.delegateTarget=this,!c.preDispatch||!1!==c.preDispatch.call(this,s)){a=k.event.handlers.call(this,s,l),t=0;while((i=a[t++])&&!s.isPropagationStopped()){s.currentTarget=i.elem,n=0;while((o=i.handlers[n++])&&!s.isImmediatePropagationStopped())s.rnamespace&&!1!==o.namespace&&!s.rnamespace.test(o.namespace)||(s.handleObj=o,s.data=o.data,void 0!==(r=((k.event.special[o.origType]||{}).handle||o.handler).apply(i.elem,u))&&!1===(s.result=r)&&(s.preventDefault(),s.stopPropagation()))}return c.postDispatch&&c.postDispatch.call(this,s),s.result}},handlers:function(e,t){var n,r,i,o,a,s=[],u=t.delegateCount,l=e.target;if(u&&l.nodeType&&!("click"===e.type&&1<=e.button))for(;l!==this;l=l.parentNode||this)if(1===l.nodeType&&("click"!==e.type||!0!==l.disabled)){for(o=[],a={},n=0;n<u;n++)void 0===a[i=(r=t[n]).selector+" "]&&(a[i]=r.needsContext?-1<k(i,this).index(l):k.find(i,this,null,[l]).length),a[i]&&o.push(r);o.length&&s.push({elem:l,handlers:o})}return l=this,u<t.length&&s.push({elem:l,handlers:t.slice(u)}),s},addProp:function(t,e){Object.defineProperty(k.Event.prototype,t,{enumerable:!0,configurable:!0,get:m(e)?function(){if(this.originalEvent)return e(this.originalEvent)}:function(){if(this.originalEvent)return this.originalEvent[t]},set:function(e){Object.defineProperty(this,t,{enumerable:!0,configurable:!0,writable:!0,value:e})}})},fix:function(e){return e[k.expando]?e:new k.Event(e)},special:{load:{noBubble:!0},click:{setup:function(e){var t=this||e;return pe.test(t.type)&&t.click&&A(t,"input")&&De(t,"click",ke),!1},trigger:function(e){var t=this||e;return pe.test(t.type)&&t.click&&A(t,"input")&&De(t,"click"),!0},_default:function(e){var t=e.target;return pe.test(t.type)&&t.click&&A(t,"input")&&Q.get(t,"click")||A(t,"a")}},beforeunload:{postDispatch:function(e){void 0!==e.result&&e.originalEvent&&(e.originalEvent.returnValue=e.result)}}}},k.removeEvent=function(e,t,n){e.removeEventListener&&e.removeEventListener(t,n)},k.Event=function(e,t){if(!(this instanceof k.Event))return new k.Event(e,t);e&&e.type?(this.originalEvent=e,this.type=e.type,this.isDefaultPrevented=e.defaultPrevented||void 0===e.defaultPrevented&&!1===e.returnValue?ke:Se,this.target=e.target&&3===e.target.nodeType?e.target.parentNode:e.target,this.currentTarget=e.currentTarget,this.relatedTarget=e.relatedTarget):this.type=e,t&&k.extend(this,t),this.timeStamp=e&&e.timeStamp||Date.now(),this[k.expando]=!0},k.Event.prototype={constructor:k.Event,isDefaultPrevented:Se,isPropagationStopped:Se,isImmediatePropagationStopped:Se,isSimulated:!1,preventDefault:function(){var e=this.originalEvent;this.isDefaultPrevented=ke,e&&!this.isSimulated&&e.preventDefault()},stopPropagation:function(){var e=this.originalEvent;this.isPropagationStopped=ke,e&&!this.isSimulated&&e.stopPropagation()},stopImmediatePropagation:function(){var e=this.originalEvent;this.isImmediatePropagationStopped=ke,e&&!this.isSimulated&&e.stopImmediatePropagation(),this.stopPropagation()}},k.each({altKey:!0,bubbles:!0,cancelable:!0,changedTouches:!0,ctrlKey:!0,detail:!0,eventPhase:!0,metaKey:!0,pageX:!0,pageY:!0,shiftKey:!0,view:!0,"char":!0,code:!0,charCode:!0,key:!0,keyCode:!0,button:!0,buttons:!0,clientX:!0,clientY:!0,offsetX:!0,offsetY:!0,pointerId:!0,pointerType:!0,screenX:!0,screenY:!0,targetTouches:!0,toElement:!0,touches:!0,which:function(e){var t=e.button;return null==e.which&&Te.test(e.type)?null!=e.charCode?e.charCode:e.keyCode:!e.which&&void 0!==t&&Ce.test(e.type)?1&t?1:2&t?3:4&t?2:0:e.which}},k.event.addProp),k.each({focus:"focusin",blur:"focusout"},function(e,t){k.event.special[e]={setup:function(){return De(this,e,Ne),!1},trigger:function(){return De(this,e),!0},delegateType:t}}),k.each({mouseenter:"mouseover",mouseleave:"mouseout",pointerenter:"pointerover",pointerleave:"pointerout"},function(e,i){k.event.special[e]={delegateType:i,bindType:i,handle:function(e){var t,n=e.relatedTarget,r=e.handleObj;return n&&(n===this||k.contains(this,n))||(e.type=r.origType,t=r.handler.apply(this,arguments),e.type=i),t}}}),k.fn.extend({on:function(e,t,n,r){return Ae(this,e,t,n,r)},one:function(e,t,n,r){return Ae(this,e,t,n,r,1)},off:function(e,t,n){var r,i;if(e&&e.preventDefault&&e.handleObj)return r=e.handleObj,k(e.delegateTarget).off(r.namespace?r.origType+"."+r.namespace:r.origType,r.selector,r.handler),this;if("object"==typeof e){for(i in e)this.off(i,t,e[i]);return this}return!1!==t&&"function"!=typeof t||(n=t,t=void 0),!1===n&&(n=Se),this.each(function(){k.event.remove(this,e,n,t)})}});var je=/<(?!area|br|col|embed|hr|img|input|link|meta|param)(([a-z][^\/\0>\x20\t\r\n\f]*)[^>]*)\/>/gi,qe=/<script|<style|<link/i,Le=/checked\s*(?:[^=]|=\s*.checked.)/i,He=/^\s*<!(?:\[CDATA\[|--)|(?:\]\]|--)>\s*$/g;function Oe(e,t){return A(e,"table")&&A(11!==t.nodeType?t:t.firstChild,"tr")&&k(e).children("tbody")[0]||e}function Pe(e){return e.type=(null!==e.getAttribute("type"))+"/"+e.type,e}function Re(e){return"true/"===(e.type||"").slice(0,5)?e.type=e.type.slice(5):e.removeAttribute("type"),e}function Me(e,t){var n,r,i,o,a,s,u,l;if(1===t.nodeType){if(Q.hasData(e)&&(o=Q.access(e),a=Q.set(t,o),l=o.events))for(i in delete a.handle,a.events={},l)for(n=0,r=l[i].length;n<r;n++)k.event.add(t,i,l[i][n]);J.hasData(e)&&(s=J.access(e),u=k.extend({},s),J.set(t,u))}}function Ie(n,r,i,o){r=g.apply([],r);var e,t,a,s,u,l,c=0,f=n.length,p=f-1,d=r[0],h=m(d);if(h||1<f&&"string"==typeof d&&!y.checkClone&&Le.test(d))return n.each(function(e){var t=n.eq(e);h&&(r[0]=d.call(this,e,t.html())),Ie(t,r,i,o)});if(f&&(t=(e=we(r,n[0].ownerDocument,!1,n,o)).firstChild,1===e.childNodes.length&&(e=t),t||o)){for(s=(a=k.map(ve(e,"script"),Pe)).length;c<f;c++)u=e,c!==p&&(u=k.clone(u,!0,!0),s&&k.merge(a,ve(u,"script"))),i.call(n[c],u,c);if(s)for(l=a[a.length-1].ownerDocument,k.map(a,Re),c=0;c<s;c++)u=a[c],he.test(u.type||"")&&!Q.access(u,"globalEval")&&k.contains(l,u)&&(u.src&&"module"!==(u.type||"").toLowerCase()?k._evalUrl&&!u.noModule&&k._evalUrl(u.src,{nonce:u.nonce||u.getAttribute("nonce")}):b(u.textContent.replace(He,""),u,l))}return n}function We(e,t,n){for(var r,i=t?k.filter(t,e):e,o=0;null!=(r=i[o]);o++)n||1!==r.nodeType||k.cleanData(ve(r)),r.parentNode&&(n&&oe(r)&&ye(ve(r,"script")),r.parentNode.removeChild(r));return e}k.extend({htmlPrefilter:function(e){return e.replace(je,"<$1></$2>")},clone:function(e,t,n){var r,i,o,a,s,u,l,c=e.cloneNode(!0),f=oe(e);if(!(y.noCloneChecked||1!==e.nodeType&&11!==e.nodeType||k.isXMLDoc(e)))for(a=ve(c),r=0,i=(o=ve(e)).length;r<i;r++)s=o[r],u=a[r],void 0,"input"===(l=u.nodeName.toLowerCase())&&pe.test(s.type)?u.checked=s.checked:"input"!==l&&"textarea"!==l||(u.defaultValue=s.defaultValue);if(t)if(n)for(o=o||ve(e),a=a||ve(c),r=0,i=o.length;r<i;r++)Me(o[r],a[r]);else Me(e,c);return 0<(a=ve(c,"script")).length&&ye(a,!f&&ve(e,"script")),c},cleanData:function(e){for(var t,n,r,i=k.event.special,o=0;void 0!==(n=e[o]);o++)if(G(n)){if(t=n[Q.expando]){if(t.events)for(r in t.events)i[r]?k.event.remove(n,r):k.removeEvent(n,r,t.handle);n[Q.expando]=void 0}n[J.expando]&&(n[J.expando]=void 0)}}}),k.fn.extend({detach:function(e){return We(this,e,!0)},remove:function(e){return We(this,e)},text:function(e){return _(this,function(e){return void 0===e?k.text(this):this.empty().each(function(){1!==this.nodeType&&11!==this.nodeType&&9!==this.nodeType||(this.textContent=e)})},null,e,arguments.length)},append:function(){return Ie(this,arguments,function(e){1!==this.nodeType&&11!==this.nodeType&&9!==this.nodeType||Oe(this,e).appendChild(e)})},prepend:function(){return Ie(this,arguments,function(e){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var t=Oe(this,e);t.insertBefore(e,t.firstChild)}})},before:function(){return Ie(this,arguments,function(e){this.parentNode&&this.parentNode.insertBefore(e,this)})},after:function(){return Ie(this,arguments,function(e){this.parentNode&&this.parentNode.insertBefore(e,this.nextSibling)})},empty:function(){for(var e,t=0;null!=(e=this[t]);t++)1===e.nodeType&&(k.cleanData(ve(e,!1)),e.textContent="");return this},clone:function(e,t){return e=null!=e&&e,t=null==t?e:t,this.map(function(){return k.clone(this,e,t)})},html:function(e){return _(this,function(e){var t=this[0]||{},n=0,r=this.length;if(void 0===e&&1===t.nodeType)return t.innerHTML;if("string"==typeof e&&!qe.test(e)&&!ge[(de.exec(e)||["",""])[1].toLowerCase()]){e=k.htmlPrefilter(e);try{for(;n<r;n++)1===(t=this[n]||{}).nodeType&&(k.cleanData(ve(t,!1)),t.innerHTML=e);t=0}catch(e){}}t&&this.empty().append(e)},null,e,arguments.length)},replaceWith:function(){var n=[];return Ie(this,arguments,function(e){var t=this.parentNode;k.inArray(this,n)<0&&(k.cleanData(ve(this)),t&&t.replaceChild(e,this))},n)}}),k.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(e,a){k.fn[e]=function(e){for(var t,n=[],r=k(e),i=r.length-1,o=0;o<=i;o++)t=o===i?this:this.clone(!0),k(r[o])[a](t),u.apply(n,t.get());return this.pushStack(n)}});var $e=new RegExp("^("+te+")(?!px)[a-z%]+$","i"),Fe=function(e){var t=e.ownerDocument.defaultView;return t&&t.opener||(t=C),t.getComputedStyle(e)},Be=new RegExp(re.join("|"),"i");function _e(e,t,n){var r,i,o,a,s=e.style;return(n=n||Fe(e))&&(""!==(a=n.getPropertyValue(t)||n[t])||oe(e)||(a=k.style(e,t)),!y.pixelBoxStyles()&&$e.test(a)&&Be.test(t)&&(r=s.width,i=s.minWidth,o=s.maxWidth,s.minWidth=s.maxWidth=s.width=a,a=n.width,s.width=r,s.minWidth=i,s.maxWidth=o)),void 0!==a?a+"":a}function ze(e,t){return{get:function(){if(!e())return(this.get=t).apply(this,arguments);delete this.get}}}!function(){function e(){if(u){s.style.cssText="position:absolute;left:-11111px;width:60px;margin-top:1px;padding:0;border:0",u.style.cssText="position:relative;display:block;box-sizing:border-box;overflow:scroll;margin:auto;border:1px;padding:1px;width:60%;top:1%",ie.appendChild(s).appendChild(u);var e=C.getComputedStyle(u);n="1%"!==e.top,a=12===t(e.marginLeft),u.style.right="60%",o=36===t(e.right),r=36===t(e.width),u.style.position="absolute",i=12===t(u.offsetWidth/3),ie.removeChild(s),u=null}}function t(e){return Math.round(parseFloat(e))}var n,r,i,o,a,s=E.createElement("div"),u=E.createElement("div");u.style&&(u.style.backgroundClip="content-box",u.cloneNode(!0).style.backgroundClip="",y.clearCloneStyle="content-box"===u.style.backgroundClip,k.extend(y,{boxSizingReliable:function(){return e(),r},pixelBoxStyles:function(){return e(),o},pixelPosition:function(){return e(),n},reliableMarginLeft:function(){return e(),a},scrollboxSize:function(){return e(),i}}))}();var Ue=["Webkit","Moz","ms"],Xe=E.createElement("div").style,Ve={};function Ge(e){var t=k.cssProps[e]||Ve[e];return t||(e in Xe?e:Ve[e]=function(e){var t=e[0].toUpperCase()+e.slice(1),n=Ue.length;while(n--)if((e=Ue[n]+t)in Xe)return e}(e)||e)}var Ye=/^(none|table(?!-c[ea]).+)/,Qe=/^--/,Je={position:"absolute",visibility:"hidden",display:"block"},Ke={letterSpacing:"0",fontWeight:"400"};function Ze(e,t,n){var r=ne.exec(t);return r?Math.max(0,r[2]-(n||0))+(r[3]||"px"):t}function et(e,t,n,r,i,o){var a="width"===t?1:0,s=0,u=0;if(n===(r?"border":"content"))return 0;for(;a<4;a+=2)"margin"===n&&(u+=k.css(e,n+re[a],!0,i)),r?("content"===n&&(u-=k.css(e,"padding"+re[a],!0,i)),"margin"!==n&&(u-=k.css(e,"border"+re[a]+"Width",!0,i))):(u+=k.css(e,"padding"+re[a],!0,i),"padding"!==n?u+=k.css(e,"border"+re[a]+"Width",!0,i):s+=k.css(e,"border"+re[a]+"Width",!0,i));return!r&&0<=o&&(u+=Math.max(0,Math.ceil(e["offset"+t[0].toUpperCase()+t.slice(1)]-o-u-s-.5))||0),u}function tt(e,t,n){var r=Fe(e),i=(!y.boxSizingReliable()||n)&&"border-box"===k.css(e,"boxSizing",!1,r),o=i,a=_e(e,t,r),s="offset"+t[0].toUpperCase()+t.slice(1);if($e.test(a)){if(!n)return a;a="auto"}return(!y.boxSizingReliable()&&i||"auto"===a||!parseFloat(a)&&"inline"===k.css(e,"display",!1,r))&&e.getClientRects().length&&(i="border-box"===k.css(e,"boxSizing",!1,r),(o=s in e)&&(a=e[s])),(a=parseFloat(a)||0)+et(e,t,n||(i?"border":"content"),o,r,a)+"px"}function nt(e,t,n,r,i){return new nt.prototype.init(e,t,n,r,i)}k.extend({cssHooks:{opacity:{get:function(e,t){if(t){var n=_e(e,"opacity");return""===n?"1":n}}}},cssNumber:{animationIterationCount:!0,columnCount:!0,fillOpacity:!0,flexGrow:!0,flexShrink:!0,fontWeight:!0,gridArea:!0,gridColumn:!0,gridColumnEnd:!0,gridColumnStart:!0,gridRow:!0,gridRowEnd:!0,gridRowStart:!0,lineHeight:!0,opacity:!0,order:!0,orphans:!0,widows:!0,zIndex:!0,zoom:!0},cssProps:{},style:function(e,t,n,r){if(e&&3!==e.nodeType&&8!==e.nodeType&&e.style){var i,o,a,s=V(t),u=Qe.test(t),l=e.style;if(u||(t=Ge(s)),a=k.cssHooks[t]||k.cssHooks[s],void 0===n)return a&&"get"in a&&void 0!==(i=a.get(e,!1,r))?i:l[t];"string"===(o=typeof n)&&(i=ne.exec(n))&&i[1]&&(n=le(e,t,i),o="number"),null!=n&&n==n&&("number"!==o||u||(n+=i&&i[3]||(k.cssNumber[s]?"":"px")),y.clearCloneStyle||""!==n||0!==t.indexOf("background")||(l[t]="inherit"),a&&"set"in a&&void 0===(n=a.set(e,n,r))||(u?l.setProperty(t,n):l[t]=n))}},css:function(e,t,n,r){var i,o,a,s=V(t);return Qe.test(t)||(t=Ge(s)),(a=k.cssHooks[t]||k.cssHooks[s])&&"get"in a&&(i=a.get(e,!0,n)),void 0===i&&(i=_e(e,t,r)),"normal"===i&&t in Ke&&(i=Ke[t]),""===n||n?(o=parseFloat(i),!0===n||isFinite(o)?o||0:i):i}}),k.each(["height","width"],function(e,u){k.cssHooks[u]={get:function(e,t,n){if(t)return!Ye.test(k.css(e,"display"))||e.getClientRects().length&&e.getBoundingClientRect().width?tt(e,u,n):ue(e,Je,function(){return tt(e,u,n)})},set:function(e,t,n){var r,i=Fe(e),o=!y.scrollboxSize()&&"absolute"===i.position,a=(o||n)&&"border-box"===k.css(e,"boxSizing",!1,i),s=n?et(e,u,n,a,i):0;return a&&o&&(s-=Math.ceil(e["offset"+u[0].toUpperCase()+u.slice(1)]-parseFloat(i[u])-et(e,u,"border",!1,i)-.5)),s&&(r=ne.exec(t))&&"px"!==(r[3]||"px")&&(e.style[u]=t,t=k.css(e,u)),Ze(0,t,s)}}}),k.cssHooks.marginLeft=ze(y.reliableMarginLeft,function(e,t){if(t)return(parseFloat(_e(e,"marginLeft"))||e.getBoundingClientRect().left-ue(e,{marginLeft:0},function(){return e.getBoundingClientRect().left}))+"px"}),k.each({margin:"",padding:"",border:"Width"},function(i,o){k.cssHooks[i+o]={expand:function(e){for(var t=0,n={},r="string"==typeof e?e.split(" "):[e];t<4;t++)n[i+re[t]+o]=r[t]||r[t-2]||r[0];return n}},"margin"!==i&&(k.cssHooks[i+o].set=Ze)}),k.fn.extend({css:function(e,t){return _(this,function(e,t,n){var r,i,o={},a=0;if(Array.isArray(t)){for(r=Fe(e),i=t.length;a<i;a++)o[t[a]]=k.css(e,t[a],!1,r);return o}return void 0!==n?k.style(e,t,n):k.css(e,t)},e,t,1<arguments.length)}}),((k.Tween=nt).prototype={constructor:nt,init:function(e,t,n,r,i,o){this.elem=e,this.prop=n,this.easing=i||k.easing._default,this.options=t,this.start=this.now=this.cur(),this.end=r,this.unit=o||(k.cssNumber[n]?"":"px")},cur:function(){var e=nt.propHooks[this.prop];return e&&e.get?e.get(this):nt.propHooks._default.get(this)},run:function(e){var t,n=nt.propHooks[this.prop];return this.options.duration?this.pos=t=k.easing[this.easing](e,this.options.duration*e,0,1,this.options.duration):this.pos=t=e,this.now=(this.end-this.start)*t+this.start,this.options.step&&this.options.step.call(this.elem,this.now,this),n&&n.set?n.set(this):nt.propHooks._default.set(this),this}}).init.prototype=nt.prototype,(nt.propHooks={_default:{get:function(e){var t;return 1!==e.elem.nodeType||null!=e.elem[e.prop]&&null==e.elem.style[e.prop]?e.elem[e.prop]:(t=k.css(e.elem,e.prop,""))&&"auto"!==t?t:0},set:function(e){k.fx.step[e.prop]?k.fx.step[e.prop](e):1!==e.elem.nodeType||!k.cssHooks[e.prop]&&null==e.elem.style[Ge(e.prop)]?e.elem[e.prop]=e.now:k.style(e.elem,e.prop,e.now+e.unit)}}}).scrollTop=nt.propHooks.scrollLeft={set:function(e){e.elem.nodeType&&e.elem.parentNode&&(e.elem[e.prop]=e.now)}},k.easing={linear:function(e){return e},swing:function(e){return.5-Math.cos(e*Math.PI)/2},_default:"swing"},k.fx=nt.prototype.init,k.fx.step={};var rt,it,ot,at,st=/^(?:toggle|show|hide)$/,ut=/queueHooks$/;function lt(){it&&(!1===E.hidden&&C.requestAnimationFrame?C.requestAnimationFrame(lt):C.setTimeout(lt,k.fx.interval),k.fx.tick())}function ct(){return C.setTimeout(function(){rt=void 0}),rt=Date.now()}function ft(e,t){var n,r=0,i={height:e};for(t=t?1:0;r<4;r+=2-t)i["margin"+(n=re[r])]=i["padding"+n]=e;return t&&(i.opacity=i.width=e),i}function pt(e,t,n){for(var r,i=(dt.tweeners[t]||[]).concat(dt.tweeners["*"]),o=0,a=i.length;o<a;o++)if(r=i[o].call(n,t,e))return r}function dt(o,e,t){var n,a,r=0,i=dt.prefilters.length,s=k.Deferred().always(function(){delete u.elem}),u=function(){if(a)return!1;for(var e=rt||ct(),t=Math.max(0,l.startTime+l.duration-e),n=1-(t/l.duration||0),r=0,i=l.tweens.length;r<i;r++)l.tweens[r].run(n);return s.notifyWith(o,[l,n,t]),n<1&&i?t:(i||s.notifyWith(o,[l,1,0]),s.resolveWith(o,[l]),!1)},l=s.promise({elem:o,props:k.extend({},e),opts:k.extend(!0,{specialEasing:{},easing:k.easing._default},t),originalProperties:e,originalOptions:t,startTime:rt||ct(),duration:t.duration,tweens:[],createTween:function(e,t){var n=k.Tween(o,l.opts,e,t,l.opts.specialEasing[e]||l.opts.easing);return l.tweens.push(n),n},stop:function(e){var t=0,n=e?l.tweens.length:0;if(a)return this;for(a=!0;t<n;t++)l.tweens[t].run(1);return e?(s.notifyWith(o,[l,1,0]),s.resolveWith(o,[l,e])):s.rejectWith(o,[l,e]),this}}),c=l.props;for(!function(e,t){var n,r,i,o,a;for(n in e)if(i=t[r=V(n)],o=e[n],Array.isArray(o)&&(i=o[1],o=e[n]=o[0]),n!==r&&(e[r]=o,delete e[n]),(a=k.cssHooks[r])&&"expand"in a)for(n in o=a.expand(o),delete e[r],o)n in e||(e[n]=o[n],t[n]=i);else t[r]=i}(c,l.opts.specialEasing);r<i;r++)if(n=dt.prefilters[r].call(l,o,c,l.opts))return m(n.stop)&&(k._queueHooks(l.elem,l.opts.queue).stop=n.stop.bind(n)),n;return k.map(c,pt,l),m(l.opts.start)&&l.opts.start.call(o,l),l.progress(l.opts.progress).done(l.opts.done,l.opts.complete).fail(l.opts.fail).always(l.opts.always),k.fx.timer(k.extend(u,{elem:o,anim:l,queue:l.opts.queue})),l}k.Animation=k.extend(dt,{tweeners:{"*":[function(e,t){var n=this.createTween(e,t);return le(n.elem,e,ne.exec(t),n),n}]},tweener:function(e,t){m(e)?(t=e,e=["*"]):e=e.match(R);for(var n,r=0,i=e.length;r<i;r++)n=e[r],dt.tweeners[n]=dt.tweeners[n]||[],dt.tweeners[n].unshift(t)},prefilters:[function(e,t,n){var r,i,o,a,s,u,l,c,f="width"in t||"height"in t,p=this,d={},h=e.style,g=e.nodeType&&se(e),v=Q.get(e,"fxshow");for(r in n.queue||(null==(a=k._queueHooks(e,"fx")).unqueued&&(a.unqueued=0,s=a.empty.fire,a.empty.fire=function(){a.unqueued||s()}),a.unqueued++,p.always(function(){p.always(function(){a.unqueued--,k.queue(e,"fx").length||a.empty.fire()})})),t)if(i=t[r],st.test(i)){if(delete t[r],o=o||"toggle"===i,i===(g?"hide":"show")){if("show"!==i||!v||void 0===v[r])continue;g=!0}d[r]=v&&v[r]||k.style(e,r)}if((u=!k.isEmptyObject(t))||!k.isEmptyObject(d))for(r in f&&1===e.nodeType&&(n.overflow=[h.overflow,h.overflowX,h.overflowY],null==(l=v&&v.display)&&(l=Q.get(e,"display")),"none"===(c=k.css(e,"display"))&&(l?c=l:(fe([e],!0),l=e.style.display||l,c=k.css(e,"display"),fe([e]))),("inline"===c||"inline-block"===c&&null!=l)&&"none"===k.css(e,"float")&&(u||(p.done(function(){h.display=l}),null==l&&(c=h.display,l="none"===c?"":c)),h.display="inline-block")),n.overflow&&(h.overflow="hidden",p.always(function(){h.overflow=n.overflow[0],h.overflowX=n.overflow[1],h.overflowY=n.overflow[2]})),u=!1,d)u||(v?"hidden"in v&&(g=v.hidden):v=Q.access(e,"fxshow",{display:l}),o&&(v.hidden=!g),g&&fe([e],!0),p.done(function(){for(r in g||fe([e]),Q.remove(e,"fxshow"),d)k.style(e,r,d[r])})),u=pt(g?v[r]:0,r,p),r in v||(v[r]=u.start,g&&(u.end=u.start,u.start=0))}],prefilter:function(e,t){t?dt.prefilters.unshift(e):dt.prefilters.push(e)}}),k.speed=function(e,t,n){var r=e&&"object"==typeof e?k.extend({},e):{complete:n||!n&&t||m(e)&&e,duration:e,easing:n&&t||t&&!m(t)&&t};return k.fx.off?r.duration=0:"number"!=typeof r.duration&&(r.duration in k.fx.speeds?r.duration=k.fx.speeds[r.duration]:r.duration=k.fx.speeds._default),null!=r.queue&&!0!==r.queue||(r.queue="fx"),r.old=r.complete,r.complete=function(){m(r.old)&&r.old.call(this),r.queue&&k.dequeue(this,r.queue)},r},k.fn.extend({fadeTo:function(e,t,n,r){return this.filter(se).css("opacity",0).show().end().animate({opacity:t},e,n,r)},animate:function(t,e,n,r){var i=k.isEmptyObject(t),o=k.speed(e,n,r),a=function(){var e=dt(this,k.extend({},t),o);(i||Q.get(this,"finish"))&&e.stop(!0)};return a.finish=a,i||!1===o.queue?this.each(a):this.queue(o.queue,a)},stop:function(i,e,o){var a=function(e){var t=e.stop;delete e.stop,t(o)};return"string"!=typeof i&&(o=e,e=i,i=void 0),e&&!1!==i&&this.queue(i||"fx",[]),this.each(function(){var e=!0,t=null!=i&&i+"queueHooks",n=k.timers,r=Q.get(this);if(t)r[t]&&r[t].stop&&a(r[t]);else for(t in r)r[t]&&r[t].stop&&ut.test(t)&&a(r[t]);for(t=n.length;t--;)n[t].elem!==this||null!=i&&n[t].queue!==i||(n[t].anim.stop(o),e=!1,n.splice(t,1));!e&&o||k.dequeue(this,i)})},finish:function(a){return!1!==a&&(a=a||"fx"),this.each(function(){var e,t=Q.get(this),n=t[a+"queue"],r=t[a+"queueHooks"],i=k.timers,o=n?n.length:0;for(t.finish=!0,k.queue(this,a,[]),r&&r.stop&&r.stop.call(this,!0),e=i.length;e--;)i[e].elem===this&&i[e].queue===a&&(i[e].anim.stop(!0),i.splice(e,1));for(e=0;e<o;e++)n[e]&&n[e].finish&&n[e].finish.call(this);delete t.finish})}}),k.each(["toggle","show","hide"],function(e,r){var i=k.fn[r];k.fn[r]=function(e,t,n){return null==e||"boolean"==typeof e?i.apply(this,arguments):this.animate(ft(r,!0),e,t,n)}}),k.each({slideDown:ft("show"),slideUp:ft("hide"),slideToggle:ft("toggle"),fadeIn:{opacity:"show"},fadeOut:{opacity:"hide"},fadeToggle:{opacity:"toggle"}},function(e,r){k.fn[e]=function(e,t,n){return this.animate(r,e,t,n)}}),k.timers=[],k.fx.tick=function(){var e,t=0,n=k.timers;for(rt=Date.now();t<n.length;t++)(e=n[t])()||n[t]!==e||n.splice(t--,1);n.length||k.fx.stop(),rt=void 0},k.fx.timer=function(e){k.timers.push(e),k.fx.start()},k.fx.interval=13,k.fx.start=function(){it||(it=!0,lt())},k.fx.stop=function(){it=null},k.fx.speeds={slow:600,fast:200,_default:400},k.fn.delay=function(r,e){return r=k.fx&&k.fx.speeds[r]||r,e=e||"fx",this.queue(e,function(e,t){var n=C.setTimeout(e,r);t.stop=function(){C.clearTimeout(n)}})},ot=E.createElement("input"),at=E.createElement("select").appendChild(E.createElement("option")),ot.type="checkbox",y.checkOn=""!==ot.value,y.optSelected=at.selected,(ot=E.createElement("input")).value="t",ot.type="radio",y.radioValue="t"===ot.value;var ht,gt=k.expr.attrHandle;k.fn.extend({attr:function(e,t){return _(this,k.attr,e,t,1<arguments.length)},removeAttr:function(e){return this.each(function(){k.removeAttr(this,e)})}}),k.extend({attr:function(e,t,n){var r,i,o=e.nodeType;if(3!==o&&8!==o&&2!==o)return"undefined"==typeof e.getAttribute?k.prop(e,t,n):(1===o&&k.isXMLDoc(e)||(i=k.attrHooks[t.toLowerCase()]||(k.expr.match.bool.test(t)?ht:void 0)),void 0!==n?null===n?void k.removeAttr(e,t):i&&"set"in i&&void 0!==(r=i.set(e,n,t))?r:(e.setAttribute(t,n+""),n):i&&"get"in i&&null!==(r=i.get(e,t))?r:null==(r=k.find.attr(e,t))?void 0:r)},attrHooks:{type:{set:function(e,t){if(!y.radioValue&&"radio"===t&&A(e,"input")){var n=e.value;return e.setAttribute("type",t),n&&(e.value=n),t}}}},removeAttr:function(e,t){var n,r=0,i=t&&t.match(R);if(i&&1===e.nodeType)while(n=i[r++])e.removeAttribute(n)}}),ht={set:function(e,t,n){return!1===t?k.removeAttr(e,n):e.setAttribute(n,n),n}},k.each(k.expr.match.bool.source.match(/\w+/g),function(e,t){var a=gt[t]||k.find.attr;gt[t]=function(e,t,n){var r,i,o=t.toLowerCase();return n||(i=gt[o],gt[o]=r,r=null!=a(e,t,n)?o:null,gt[o]=i),r}});var vt=/^(?:input|select|textarea|button)$/i,yt=/^(?:a|area)$/i;function mt(e){return(e.match(R)||[]).join(" ")}function xt(e){return e.getAttribute&&e.getAttribute("class")||""}function bt(e){return Array.isArray(e)?e:"string"==typeof e&&e.match(R)||[]}k.fn.extend({prop:function(e,t){return _(this,k.prop,e,t,1<arguments.length)},removeProp:function(e){return this.each(function(){delete this[k.propFix[e]||e]})}}),k.extend({prop:function(e,t,n){var r,i,o=e.nodeType;if(3!==o&&8!==o&&2!==o)return 1===o&&k.isXMLDoc(e)||(t=k.propFix[t]||t,i=k.propHooks[t]),void 0!==n?i&&"set"in i&&void 0!==(r=i.set(e,n,t))?r:e[t]=n:i&&"get"in i&&null!==(r=i.get(e,t))?r:e[t]},propHooks:{tabIndex:{get:function(e){var t=k.find.attr(e,"tabindex");return t?parseInt(t,10):vt.test(e.nodeName)||yt.test(e.nodeName)&&e.href?0:-1}}},propFix:{"for":"htmlFor","class":"className"}}),y.optSelected||(k.propHooks.selected={get:function(e){var t=e.parentNode;return t&&t.parentNode&&t.parentNode.selectedIndex,null},set:function(e){var t=e.parentNode;t&&(t.selectedIndex,t.parentNode&&t.parentNode.selectedIndex)}}),k.each(["tabIndex","readOnly","maxLength","cellSpacing","cellPadding","rowSpan","colSpan","useMap","frameBorder","contentEditable"],function(){k.propFix[this.toLowerCase()]=this}),k.fn.extend({addClass:function(t){var e,n,r,i,o,a,s,u=0;if(m(t))return this.each(function(e){k(this).addClass(t.call(this,e,xt(this)))});if((e=bt(t)).length)while(n=this[u++])if(i=xt(n),r=1===n.nodeType&&" "+mt(i)+" "){a=0;while(o=e[a++])r.indexOf(" "+o+" ")<0&&(r+=o+" ");i!==(s=mt(r))&&n.setAttribute("class",s)}return this},removeClass:function(t){var e,n,r,i,o,a,s,u=0;if(m(t))return this.each(function(e){k(this).removeClass(t.call(this,e,xt(this)))});if(!arguments.length)return this.attr("class","");if((e=bt(t)).length)while(n=this[u++])if(i=xt(n),r=1===n.nodeType&&" "+mt(i)+" "){a=0;while(o=e[a++])while(-1<r.indexOf(" "+o+" "))r=r.replace(" "+o+" "," ");i!==(s=mt(r))&&n.setAttribute("class",s)}return this},toggleClass:function(i,t){var o=typeof i,a="string"===o||Array.isArray(i);return"boolean"==typeof t&&a?t?this.addClass(i):this.removeClass(i):m(i)?this.each(function(e){k(this).toggleClass(i.call(this,e,xt(this),t),t)}):this.each(function(){var e,t,n,r;if(a){t=0,n=k(this),r=bt(i);while(e=r[t++])n.hasClass(e)?n.removeClass(e):n.addClass(e)}else void 0!==i&&"boolean"!==o||((e=xt(this))&&Q.set(this,"__className__",e),this.setAttribute&&this.setAttribute("class",e||!1===i?"":Q.get(this,"__className__")||""))})},hasClass:function(e){var t,n,r=0;t=" "+e+" ";while(n=this[r++])if(1===n.nodeType&&-1<(" "+mt(xt(n))+" ").indexOf(t))return!0;return!1}});var wt=/\r/g;k.fn.extend({val:function(n){var r,e,i,t=this[0];return arguments.length?(i=m(n),this.each(function(e){var t;1===this.nodeType&&(null==(t=i?n.call(this,e,k(this).val()):n)?t="":"number"==typeof t?t+="":Array.isArray(t)&&(t=k.map(t,function(e){return null==e?"":e+""})),(r=k.valHooks[this.type]||k.valHooks[this.nodeName.toLowerCase()])&&"set"in r&&void 0!==r.set(this,t,"value")||(this.value=t))})):t?(r=k.valHooks[t.type]||k.valHooks[t.nodeName.toLowerCase()])&&"get"in r&&void 0!==(e=r.get(t,"value"))?e:"string"==typeof(e=t.value)?e.replace(wt,""):null==e?"":e:void 0}}),k.extend({valHooks:{option:{get:function(e){var t=k.find.attr(e,"value");return null!=t?t:mt(k.text(e))}},select:{get:function(e){var t,n,r,i=e.options,o=e.selectedIndex,a="select-one"===e.type,s=a?null:[],u=a?o+1:i.length;for(r=o<0?u:a?o:0;r<u;r++)if(((n=i[r]).selected||r===o)&&!n.disabled&&(!n.parentNode.disabled||!A(n.parentNode,"optgroup"))){if(t=k(n).val(),a)return t;s.push(t)}return s},set:function(e,t){var n,r,i=e.options,o=k.makeArray(t),a=i.length;while(a--)((r=i[a]).selected=-1<k.inArray(k.valHooks.option.get(r),o))&&(n=!0);return n||(e.selectedIndex=-1),o}}}}),k.each(["radio","checkbox"],function(){k.valHooks[this]={set:function(e,t){if(Array.isArray(t))return e.checked=-1<k.inArray(k(e).val(),t)}},y.checkOn||(k.valHooks[this].get=function(e){return null===e.getAttribute("value")?"on":e.value})}),y.focusin="onfocusin"in C;var Tt=/^(?:focusinfocus|focusoutblur)$/,Ct=function(e){e.stopPropagation()};k.extend(k.event,{trigger:function(e,t,n,r){var i,o,a,s,u,l,c,f,p=[n||E],d=v.call(e,"type")?e.type:e,h=v.call(e,"namespace")?e.namespace.split("."):[];if(o=f=a=n=n||E,3!==n.nodeType&&8!==n.nodeType&&!Tt.test(d+k.event.triggered)&&(-1<d.indexOf(".")&&(d=(h=d.split(".")).shift(),h.sort()),u=d.indexOf(":")<0&&"on"+d,(e=e[k.expando]?e:new k.Event(d,"object"==typeof e&&e)).isTrigger=r?2:3,e.namespace=h.join("."),e.rnamespace=e.namespace?new RegExp("(^|\\.)"+h.join("\\.(?:.*\\.|)")+"(\\.|$)"):null,e.result=void 0,e.target||(e.target=n),t=null==t?[e]:k.makeArray(t,[e]),c=k.event.special[d]||{},r||!c.trigger||!1!==c.trigger.apply(n,t))){if(!r&&!c.noBubble&&!x(n)){for(s=c.delegateType||d,Tt.test(s+d)||(o=o.parentNode);o;o=o.parentNode)p.push(o),a=o;a===(n.ownerDocument||E)&&p.push(a.defaultView||a.parentWindow||C)}i=0;while((o=p[i++])&&!e.isPropagationStopped())f=o,e.type=1<i?s:c.bindType||d,(l=(Q.get(o,"events")||{})[e.type]&&Q.get(o,"handle"))&&l.apply(o,t),(l=u&&o[u])&&l.apply&&G(o)&&(e.result=l.apply(o,t),!1===e.result&&e.preventDefault());return e.type=d,r||e.isDefaultPrevented()||c._default&&!1!==c._default.apply(p.pop(),t)||!G(n)||u&&m(n[d])&&!x(n)&&((a=n[u])&&(n[u]=null),k.event.triggered=d,e.isPropagationStopped()&&f.addEventListener(d,Ct),n[d](),e.isPropagationStopped()&&f.removeEventListener(d,Ct),k.event.triggered=void 0,a&&(n[u]=a)),e.result}},simulate:function(e,t,n){var r=k.extend(new k.Event,n,{type:e,isSimulated:!0});k.event.trigger(r,null,t)}}),k.fn.extend({trigger:function(e,t){return this.each(function(){k.event.trigger(e,t,this)})},triggerHandler:function(e,t){var n=this[0];if(n)return k.event.trigger(e,t,n,!0)}}),y.focusin||k.each({focus:"focusin",blur:"focusout"},function(n,r){var i=function(e){k.event.simulate(r,e.target,k.event.fix(e))};k.event.special[r]={setup:function(){var e=this.ownerDocument||this,t=Q.access(e,r);t||e.addEventListener(n,i,!0),Q.access(e,r,(t||0)+1)},teardown:function(){var e=this.ownerDocument||this,t=Q.access(e,r)-1;t?Q.access(e,r,t):(e.removeEventListener(n,i,!0),Q.remove(e,r))}}});var Et=C.location,kt=Date.now(),St=/\?/;k.parseXML=function(e){var t;if(!e||"string"!=typeof e)return null;try{t=(new C.DOMParser).parseFromString(e,"text/xml")}catch(e){t=void 0}return t&&!t.getElementsByTagName("parsererror").length||k.error("Invalid XML: "+e),t};var Nt=/\[\]$/,At=/\r?\n/g,Dt=/^(?:submit|button|image|reset|file)$/i,jt=/^(?:input|select|textarea|keygen)/i;function qt(n,e,r,i){var t;if(Array.isArray(e))k.each(e,function(e,t){r||Nt.test(n)?i(n,t):qt(n+"["+("object"==typeof t&&null!=t?e:"")+"]",t,r,i)});else if(r||"object"!==w(e))i(n,e);else for(t in e)qt(n+"["+t+"]",e[t],r,i)}k.param=function(e,t){var n,r=[],i=function(e,t){var n=m(t)?t():t;r[r.length]=encodeURIComponent(e)+"="+encodeURIComponent(null==n?"":n)};if(null==e)return"";if(Array.isArray(e)||e.jquery&&!k.isPlainObject(e))k.each(e,function(){i(this.name,this.value)});else for(n in e)qt(n,e[n],t,i);return r.join("&")},k.fn.extend({serialize:function(){return k.param(this.serializeArray())},serializeArray:function(){return this.map(function(){var e=k.prop(this,"elements");return e?k.makeArray(e):this}).filter(function(){var e=this.type;return this.name&&!k(this).is(":disabled")&&jt.test(this.nodeName)&&!Dt.test(e)&&(this.checked||!pe.test(e))}).map(function(e,t){var n=k(this).val();return null==n?null:Array.isArray(n)?k.map(n,function(e){return{name:t.name,value:e.replace(At,"\r\n")}}):{name:t.name,value:n.replace(At,"\r\n")}}).get()}});var Lt=/%20/g,Ht=/#.*$/,Ot=/([?&])_=[^&]*/,Pt=/^(.*?):[ \t]*([^\r\n]*)$/gm,Rt=/^(?:GET|HEAD)$/,Mt=/^\/\//,It={},Wt={},$t="*/".concat("*"),Ft=E.createElement("a");function Bt(o){return function(e,t){"string"!=typeof e&&(t=e,e="*");var n,r=0,i=e.toLowerCase().match(R)||[];if(m(t))while(n=i[r++])"+"===n[0]?(n=n.slice(1)||"*",(o[n]=o[n]||[]).unshift(t)):(o[n]=o[n]||[]).push(t)}}function _t(t,i,o,a){var s={},u=t===Wt;function l(e){var r;return s[e]=!0,k.each(t[e]||[],function(e,t){var n=t(i,o,a);return"string"!=typeof n||u||s[n]?u?!(r=n):void 0:(i.dataTypes.unshift(n),l(n),!1)}),r}return l(i.dataTypes[0])||!s["*"]&&l("*")}function zt(e,t){var n,r,i=k.ajaxSettings.flatOptions||{};for(n in t)void 0!==t[n]&&((i[n]?e:r||(r={}))[n]=t[n]);return r&&k.extend(!0,e,r),e}Ft.href=Et.href,k.extend({active:0,lastModified:{},etag:{},ajaxSettings:{url:Et.href,type:"GET",isLocal:/^(?:about|app|app-storage|.+-extension|file|res|widget):$/.test(Et.protocol),global:!0,processData:!0,async:!0,contentType:"application/x-www-form-urlencoded; charset=UTF-8",accepts:{"*":$t,text:"text/plain",html:"text/html",xml:"application/xml, text/xml",json:"application/json, text/javascript"},contents:{xml:/\bxml\b/,html:/\bhtml/,json:/\bjson\b/},responseFields:{xml:"responseXML",text:"responseText",json:"responseJSON"},converters:{"* text":String,"text html":!0,"text json":JSON.parse,"text xml":k.parseXML},flatOptions:{url:!0,context:!0}},ajaxSetup:function(e,t){return t?zt(zt(e,k.ajaxSettings),t):zt(k.ajaxSettings,e)},ajaxPrefilter:Bt(It),ajaxTransport:Bt(Wt),ajax:function(e,t){"object"==typeof e&&(t=e,e=void 0),t=t||{};var c,f,p,n,d,r,h,g,i,o,v=k.ajaxSetup({},t),y=v.context||v,m=v.context&&(y.nodeType||y.jquery)?k(y):k.event,x=k.Deferred(),b=k.Callbacks("once memory"),w=v.statusCode||{},a={},s={},u="canceled",T={readyState:0,getResponseHeader:function(e){var t;if(h){if(!n){n={};while(t=Pt.exec(p))n[t[1].toLowerCase()+" "]=(n[t[1].toLowerCase()+" "]||[]).concat(t[2])}t=n[e.toLowerCase()+" "]}return null==t?null:t.join(", ")},getAllResponseHeaders:function(){return h?p:null},setRequestHeader:function(e,t){return null==h&&(e=s[e.toLowerCase()]=s[e.toLowerCase()]||e,a[e]=t),this},overrideMimeType:function(e){return null==h&&(v.mimeType=e),this},statusCode:function(e){var t;if(e)if(h)T.always(e[T.status]);else for(t in e)w[t]=[w[t],e[t]];return this},abort:function(e){var t=e||u;return c&&c.abort(t),l(0,t),this}};if(x.promise(T),v.url=((e||v.url||Et.href)+"").replace(Mt,Et.protocol+"//"),v.type=t.method||t.type||v.method||v.type,v.dataTypes=(v.dataType||"*").toLowerCase().match(R)||[""],null==v.crossDomain){r=E.createElement("a");try{r.href=v.url,r.href=r.href,v.crossDomain=Ft.protocol+"//"+Ft.host!=r.protocol+"//"+r.host}catch(e){v.crossDomain=!0}}if(v.data&&v.processData&&"string"!=typeof v.data&&(v.data=k.param(v.data,v.traditional)),_t(It,v,t,T),h)return T;for(i in(g=k.event&&v.global)&&0==k.active++&&k.event.trigger("ajaxStart"),v.type=v.type.toUpperCase(),v.hasContent=!Rt.test(v.type),f=v.url.replace(Ht,""),v.hasContent?v.data&&v.processData&&0===(v.contentType||"").indexOf("application/x-www-form-urlencoded")&&(v.data=v.data.replace(Lt,"+")):(o=v.url.slice(f.length),v.data&&(v.processData||"string"==typeof v.data)&&(f+=(St.test(f)?"&":"?")+v.data,delete v.data),!1===v.cache&&(f=f.replace(Ot,"$1"),o=(St.test(f)?"&":"?")+"_="+kt+++o),v.url=f+o),v.ifModified&&(k.lastModified[f]&&T.setRequestHeader("If-Modified-Since",k.lastModified[f]),k.etag[f]&&T.setRequestHeader("If-None-Match",k.etag[f])),(v.data&&v.hasContent&&!1!==v.contentType||t.contentType)&&T.setRequestHeader("Content-Type",v.contentType),T.setRequestHeader("Accept",v.dataTypes[0]&&v.accepts[v.dataTypes[0]]?v.accepts[v.dataTypes[0]]+("*"!==v.dataTypes[0]?", "+$t+"; q=0.01":""):v.accepts["*"]),v.headers)T.setRequestHeader(i,v.headers[i]);if(v.beforeSend&&(!1===v.beforeSend.call(y,T,v)||h))return T.abort();if(u="abort",b.add(v.complete),T.done(v.success),T.fail(v.error),c=_t(Wt,v,t,T)){if(T.readyState=1,g&&m.trigger("ajaxSend",[T,v]),h)return T;v.async&&0<v.timeout&&(d=C.setTimeout(function(){T.abort("timeout")},v.timeout));try{h=!1,c.send(a,l)}catch(e){if(h)throw e;l(-1,e)}}else l(-1,"No Transport");function l(e,t,n,r){var i,o,a,s,u,l=t;h||(h=!0,d&&C.clearTimeout(d),c=void 0,p=r||"",T.readyState=0<e?4:0,i=200<=e&&e<300||304===e,n&&(s=function(e,t,n){var r,i,o,a,s=e.contents,u=e.dataTypes;while("*"===u[0])u.shift(),void 0===r&&(r=e.mimeType||t.getResponseHeader("Content-Type"));if(r)for(i in s)if(s[i]&&s[i].test(r)){u.unshift(i);break}if(u[0]in n)o=u[0];else{for(i in n){if(!u[0]||e.converters[i+" "+u[0]]){o=i;break}a||(a=i)}o=o||a}if(o)return o!==u[0]&&u.unshift(o),n[o]}(v,T,n)),s=function(e,t,n,r){var i,o,a,s,u,l={},c=e.dataTypes.slice();if(c[1])for(a in e.converters)l[a.toLowerCase()]=e.converters[a];o=c.shift();while(o)if(e.responseFields[o]&&(n[e.responseFields[o]]=t),!u&&r&&e.dataFilter&&(t=e.dataFilter(t,e.dataType)),u=o,o=c.shift())if("*"===o)o=u;else if("*"!==u&&u!==o){if(!(a=l[u+" "+o]||l["* "+o]))for(i in l)if((s=i.split(" "))[1]===o&&(a=l[u+" "+s[0]]||l["* "+s[0]])){!0===a?a=l[i]:!0!==l[i]&&(o=s[0],c.unshift(s[1]));break}if(!0!==a)if(a&&e["throws"])t=a(t);else try{t=a(t)}catch(e){return{state:"parsererror",error:a?e:"No conversion from "+u+" to "+o}}}return{state:"success",data:t}}(v,s,T,i),i?(v.ifModified&&((u=T.getResponseHeader("Last-Modified"))&&(k.lastModified[f]=u),(u=T.getResponseHeader("etag"))&&(k.etag[f]=u)),204===e||"HEAD"===v.type?l="nocontent":304===e?l="notmodified":(l=s.state,o=s.data,i=!(a=s.error))):(a=l,!e&&l||(l="error",e<0&&(e=0))),T.status=e,T.statusText=(t||l)+"",i?x.resolveWith(y,[o,l,T]):x.rejectWith(y,[T,l,a]),T.statusCode(w),w=void 0,g&&m.trigger(i?"ajaxSuccess":"ajaxError",[T,v,i?o:a]),b.fireWith(y,[T,l]),g&&(m.trigger("ajaxComplete",[T,v]),--k.active||k.event.trigger("ajaxStop")))}return T},getJSON:function(e,t,n){return k.get(e,t,n,"json")},getScript:function(e,t){return k.get(e,void 0,t,"script")}}),k.each(["get","post"],function(e,i){k[i]=function(e,t,n,r){return m(t)&&(r=r||n,n=t,t=void 0),k.ajax(k.extend({url:e,type:i,dataType:r,data:t,success:n},k.isPlainObject(e)&&e))}}),k._evalUrl=function(e,t){return k.ajax({url:e,type:"GET",dataType:"script",cache:!0,async:!1,global:!1,converters:{"text script":function(){}},dataFilter:function(e){k.globalEval(e,t)}})},k.fn.extend({wrapAll:function(e){var t;return this[0]&&(m(e)&&(e=e.call(this[0])),t=k(e,this[0].ownerDocument).eq(0).clone(!0),this[0].parentNode&&t.insertBefore(this[0]),t.map(function(){var e=this;while(e.firstElementChild)e=e.firstElementChild;return e}).append(this)),this},wrapInner:function(n){return m(n)?this.each(function(e){k(this).wrapInner(n.call(this,e))}):this.each(function(){var e=k(this),t=e.contents();t.length?t.wrapAll(n):e.append(n)})},wrap:function(t){var n=m(t);return this.each(function(e){k(this).wrapAll(n?t.call(this,e):t)})},unwrap:function(e){return this.parent(e).not("body").each(function(){k(this).replaceWith(this.childNodes)}),this}}),k.expr.pseudos.hidden=function(e){return!k.expr.pseudos.visible(e)},k.expr.pseudos.visible=function(e){return!!(e.offsetWidth||e.offsetHeight||e.getClientRects().length)},k.ajaxSettings.xhr=function(){try{return new C.XMLHttpRequest}catch(e){}};var Ut={0:200,1223:204},Xt=k.ajaxSettings.xhr();y.cors=!!Xt&&"withCredentials"in Xt,y.ajax=Xt=!!Xt,k.ajaxTransport(function(i){var o,a;if(y.cors||Xt&&!i.crossDomain)return{send:function(e,t){var n,r=i.xhr();if(r.open(i.type,i.url,i.async,i.username,i.password),i.xhrFields)for(n in i.xhrFields)r[n]=i.xhrFields[n];for(n in i.mimeType&&r.overrideMimeType&&r.overrideMimeType(i.mimeType),i.crossDomain||e["X-Requested-With"]||(e["X-Requested-With"]="XMLHttpRequest"),e)r.setRequestHeader(n,e[n]);o=function(e){return function(){o&&(o=a=r.onload=r.onerror=r.onabort=r.ontimeout=r.onreadystatechange=null,"abort"===e?r.abort():"error"===e?"number"!=typeof r.status?t(0,"error"):t(r.status,r.statusText):t(Ut[r.status]||r.status,r.statusText,"text"!==(r.responseType||"text")||"string"!=typeof r.responseText?{binary:r.response}:{text:r.responseText},r.getAllResponseHeaders()))}},r.onload=o(),a=r.onerror=r.ontimeout=o("error"),void 0!==r.onabort?r.onabort=a:r.onreadystatechange=function(){4===r.readyState&&C.setTimeout(function(){o&&a()})},o=o("abort");try{r.send(i.hasContent&&i.data||null)}catch(e){if(o)throw e}},abort:function(){o&&o()}}}),k.ajaxPrefilter(function(e){e.crossDomain&&(e.contents.script=!1)}),k.ajaxSetup({accepts:{script:"text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"},contents:{script:/\b(?:java|ecma)script\b/},converters:{"text script":function(e){return k.globalEval(e),e}}}),k.ajaxPrefilter("script",function(e){void 0===e.cache&&(e.cache=!1),e.crossDomain&&(e.type="GET")}),k.ajaxTransport("script",function(n){var r,i;if(n.crossDomain||n.scriptAttrs)return{send:function(e,t){r=k("<script>").attr(n.scriptAttrs||{}).prop({charset:n.scriptCharset,src:n.url}).on("load error",i=function(e){r.remove(),i=null,e&&t("error"===e.type?404:200,e.type)}),E.head.appendChild(r[0])},abort:function(){i&&i()}}});var Vt,Gt=[],Yt=/(=)\?(?=&|$)|\?\?/;k.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var e=Gt.pop()||k.expando+"_"+kt++;return this[e]=!0,e}}),k.ajaxPrefilter("json jsonp",function(e,t,n){var r,i,o,a=!1!==e.jsonp&&(Yt.test(e.url)?"url":"string"==typeof e.data&&0===(e.contentType||"").indexOf("application/x-www-form-urlencoded")&&Yt.test(e.data)&&"data");if(a||"jsonp"===e.dataTypes[0])return r=e.jsonpCallback=m(e.jsonpCallback)?e.jsonpCallback():e.jsonpCallback,a?e[a]=e[a].replace(Yt,"$1"+r):!1!==e.jsonp&&(e.url+=(St.test(e.url)?"&":"?")+e.jsonp+"="+r),e.converters["script json"]=function(){return o||k.error(r+" was not called"),o[0]},e.dataTypes[0]="json",i=C[r],C[r]=function(){o=arguments},n.always(function(){void 0===i?k(C).removeProp(r):C[r]=i,e[r]&&(e.jsonpCallback=t.jsonpCallback,Gt.push(r)),o&&m(i)&&i(o[0]),o=i=void 0}),"script"}),y.createHTMLDocument=((Vt=E.implementation.createHTMLDocument("").body).innerHTML="<form></form><form></form>",2===Vt.childNodes.length),k.parseHTML=function(e,t,n){return"string"!=typeof e?[]:("boolean"==typeof t&&(n=t,t=!1),t||(y.createHTMLDocument?((r=(t=E.implementation.createHTMLDocument("")).createElement("base")).href=E.location.href,t.head.appendChild(r)):t=E),o=!n&&[],(i=D.exec(e))?[t.createElement(i[1])]:(i=we([e],t,o),o&&o.length&&k(o).remove(),k.merge([],i.childNodes)));var r,i,o},k.fn.load=function(e,t,n){var r,i,o,a=this,s=e.indexOf(" ");return-1<s&&(r=mt(e.slice(s)),e=e.slice(0,s)),m(t)?(n=t,t=void 0):t&&"object"==typeof t&&(i="POST"),0<a.length&&k.ajax({url:e,type:i||"GET",dataType:"html",data:t}).done(function(e){o=arguments,a.html(r?k("<div>").append(k.parseHTML(e)).find(r):e)}).always(n&&function(e,t){a.each(function(){n.apply(this,o||[e.responseText,t,e])})}),this},k.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(e,t){k.fn[t]=function(e){return this.on(t,e)}}),k.expr.pseudos.animated=function(t){return k.grep(k.timers,function(e){return t===e.elem}).length},k.offset={setOffset:function(e,t,n){var r,i,o,a,s,u,l=k.css(e,"position"),c=k(e),f={};"static"===l&&(e.style.position="relative"),s=c.offset(),o=k.css(e,"top"),u=k.css(e,"left"),("absolute"===l||"fixed"===l)&&-1<(o+u).indexOf("auto")?(a=(r=c.position()).top,i=r.left):(a=parseFloat(o)||0,i=parseFloat(u)||0),m(t)&&(t=t.call(e,n,k.extend({},s))),null!=t.top&&(f.top=t.top-s.top+a),null!=t.left&&(f.left=t.left-s.left+i),"using"in t?t.using.call(e,f):c.css(f)}},k.fn.extend({offset:function(t){if(arguments.length)return void 0===t?this:this.each(function(e){k.offset.setOffset(this,t,e)});var e,n,r=this[0];return r?r.getClientRects().length?(e=r.getBoundingClientRect(),n=r.ownerDocument.defaultView,{top:e.top+n.pageYOffset,left:e.left+n.pageXOffset}):{top:0,left:0}:void 0},position:function(){if(this[0]){var e,t,n,r=this[0],i={top:0,left:0};if("fixed"===k.css(r,"position"))t=r.getBoundingClientRect();else{t=this.offset(),n=r.ownerDocument,e=r.offsetParent||n.documentElement;while(e&&(e===n.body||e===n.documentElement)&&"static"===k.css(e,"position"))e=e.parentNode;e&&e!==r&&1===e.nodeType&&((i=k(e).offset()).top+=k.css(e,"borderTopWidth",!0),i.left+=k.css(e,"borderLeftWidth",!0))}return{top:t.top-i.top-k.css(r,"marginTop",!0),left:t.left-i.left-k.css(r,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var e=this.offsetParent;while(e&&"static"===k.css(e,"position"))e=e.offsetParent;return e||ie})}}),k.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(t,i){var o="pageYOffset"===i;k.fn[t]=function(e){return _(this,function(e,t,n){var r;if(x(e)?r=e:9===e.nodeType&&(r=e.defaultView),void 0===n)return r?r[i]:e[t];r?r.scrollTo(o?r.pageXOffset:n,o?n:r.pageYOffset):e[t]=n},t,e,arguments.length)}}),k.each(["top","left"],function(e,n){k.cssHooks[n]=ze(y.pixelPosition,function(e,t){if(t)return t=_e(e,n),$e.test(t)?k(e).position()[n]+"px":t})}),k.each({Height:"height",Width:"width"},function(a,s){k.each({padding:"inner"+a,content:s,"":"outer"+a},function(r,o){k.fn[o]=function(e,t){var n=arguments.length&&(r||"boolean"!=typeof e),i=r||(!0===e||!0===t?"margin":"border");return _(this,function(e,t,n){var r;return x(e)?0===o.indexOf("outer")?e["inner"+a]:e.document.documentElement["client"+a]:9===e.nodeType?(r=e.documentElement,Math.max(e.body["scroll"+a],r["scroll"+a],e.body["offset"+a],r["offset"+a],r["client"+a])):void 0===n?k.css(e,t,i):k.style(e,t,n,i)},s,n?e:void 0,n)}})}),k.each("blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu".split(" "),function(e,n){k.fn[n]=function(e,t){return 0<arguments.length?this.on(n,null,e,t):this.trigger(n)}}),k.fn.extend({hover:function(e,t){return this.mouseenter(e).mouseleave(t||e)}}),k.fn.extend({bind:function(e,t,n){return this.on(e,null,t,n)},unbind:function(e,t){return this.off(e,null,t)},delegate:function(e,t,n,r){return this.on(t,e,n,r)},undelegate:function(e,t,n){return 1===arguments.length?this.off(e,"**"):this.off(t,e||"**",n)}}),k.proxy=function(e,t){var n,r,i;if("string"==typeof t&&(n=e[t],t=e,e=n),m(e))return r=s.call(arguments,2),(i=function(){return e.apply(t||this,r.concat(s.call(arguments)))}).guid=e.guid=e.guid||k.guid++,i},k.holdReady=function(e){e?k.readyWait++:k.ready(!0)},k.isArray=Array.isArray,k.parseJSON=JSON.parse,k.nodeName=A,k.isFunction=m,k.isWindow=x,k.camelCase=V,k.type=w,k.now=Date.now,k.isNumeric=function(e){var t=k.type(e);return("number"===t||"string"===t)&&!isNaN(e-parseFloat(e))},"function"==typeof define&&define.amd&&define("jquery",[],function(){return k});var Qt=C.jQuery,Jt=C.$;return k.noConflict=function(e){return C.$===k&&(C.$=Jt),e&&C.jQuery===k&&(C.jQuery=Qt),k},e||(C.jQuery=C.$=k),k});
\ No newline at end of file
diff --git a/core/src/main/resources/org/apache/spark/ui/static/jquery-3.5.1.min.js b/core/src/main/resources/org/apache/spark/ui/static/jquery-3.5.1.min.js
new file mode 100644
index 0000000000000..b0614034ad3a9
--- /dev/null
+++ b/core/src/main/resources/org/apache/spark/ui/static/jquery-3.5.1.min.js
@@ -0,0 +1,2 @@
+/*! jQuery v3.5.1 | (c) JS Foundation and other contributors | jquery.org/license */
+!function(e,t){"use strict";"object"==typeof module&&"object"==typeof module.exports?module.exports=e.document?t(e,!0):function(e){if(!e.document)throw new Error("jQuery requires a window with a document");return t(e)}:t(e)}("undefined"!=typeof window?window:this,function(C,e){"use strict";var t=[],r=Object.getPrototypeOf,s=t.slice,g=t.flat?function(e){return t.flat.call(e)}:function(e){return t.concat.apply([],e)},u=t.push,i=t.indexOf,n={},o=n.toString,v=n.hasOwnProperty,a=v.toString,l=a.call(Object),y={},m=function(e){return"function"==typeof e&&"number"!=typeof e.nodeType},x=function(e){return null!=e&&e===e.window},E=C.document,c={type:!0,src:!0,nonce:!0,noModule:!0};function b(e,t,n){var r,i,o=(n=n||E).createElement("script");if(o.text=e,t)for(r in c)(i=t[r]||t.getAttribute&&t.getAttribute(r))&&o.setAttribute(r,i);n.head.appendChild(o).parentNode.removeChild(o)}function w(e){return null==e?e+"":"object"==typeof e||"function"==typeof e?n[o.call(e)]||"object":typeof e}var f="3.5.1",S=function(e,t){return new S.fn.init(e,t)};function p(e){var t=!!e&&"length"in e&&e.length,n=w(e);return!m(e)&&!x(e)&&("array"===n||0===t||"number"==typeof t&&0<t&&t-1 in e)}S.fn=S.prototype={jquery:f,constructor:S,length:0,toArray:function(){return s.call(this)},get:function(e){return null==e?s.call(this):e<0?this[e+this.length]:this[e]},pushStack:function(e){var t=S.merge(this.constructor(),e);return t.prevObject=this,t},each:function(e){return S.each(this,e)},map:function(n){return this.pushStack(S.map(this,function(e,t){return n.call(e,t,e)}))},slice:function(){return this.pushStack(s.apply(this,arguments))},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},even:function(){return this.pushStack(S.grep(this,function(e,t){return(t+1)%2}))},odd:function(){return this.pushStack(S.grep(this,function(e,t){return t%2}))},eq:function(e){var t=this.length,n=+e+(e<0?t:0);return this.pushStack(0<=n&&n<t?[this[n]]:[])},end:function(){return this.prevObject||this.constructor()},push:u,sort:t.sort,splice:t.splice},S.extend=S.fn.extend=function(){var e,t,n,r,i,o,a=arguments[0]||{},s=1,u=arguments.length,l=!1;for("boolean"==typeof a&&(l=a,a=arguments[s]||{},s++),"object"==typeof a||m(a)||(a={}),s===u&&(a=this,s--);s<u;s++)if(null!=(e=arguments[s]))for(t in e)r=e[t],"__proto__"!==t&&a!==r&&(l&&r&&(S.isPlainObject(r)||(i=Array.isArray(r)))?(n=a[t],o=i&&!Array.isArray(n)?[]:i||S.isPlainObject(n)?n:{},i=!1,a[t]=S.extend(l,o,r)):void 0!==r&&(a[t]=r));return a},S.extend({expando:"jQuery"+(f+Math.random()).replace(/\D/g,""),isReady:!0,error:function(e){throw new Error(e)},noop:function(){},isPlainObject:function(e){var t,n;return!(!e||"[object Object]"!==o.call(e))&&(!(t=r(e))||"function"==typeof(n=v.call(t,"constructor")&&t.constructor)&&a.call(n)===l)},isEmptyObject:function(e){var t;for(t in e)return!1;return!0},globalEval:function(e,t,n){b(e,{nonce:t&&t.nonce},n)},each:function(e,t){var n,r=0;if(p(e)){for(n=e.length;r<n;r++)if(!1===t.call(e[r],r,e[r]))break}else for(r in e)if(!1===t.call(e[r],r,e[r]))break;return e},makeArray:function(e,t){var n=t||[];return null!=e&&(p(Object(e))?S.merge(n,"string"==typeof e?[e]:e):u.call(n,e)),n},inArray:function(e,t,n){return null==t?-1:i.call(t,e,n)},merge:function(e,t){for(var n=+t.length,r=0,i=e.length;r<n;r++)e[i++]=t[r];return e.length=i,e},grep:function(e,t,n){for(var r=[],i=0,o=e.length,a=!n;i<o;i++)!t(e[i],i)!==a&&r.push(e[i]);return r},map:function(e,t,n){var r,i,o=0,a=[];if(p(e))for(r=e.length;o<r;o++)null!=(i=t(e[o],o,n))&&a.push(i);else for(o in e)null!=(i=t(e[o],o,n))&&a.push(i);return g(a)},guid:1,support:y}),"function"==typeof Symbol&&(S.fn[Symbol.iterator]=t[Symbol.iterator]),S.each("Boolean Number String Function Array Date RegExp Object Error Symbol".split(" "),function(e,t){n["[object "+t+"]"]=t.toLowerCase()});var d=function(n){var e,d,b,o,i,h,f,g,w,u,l,T,C,a,E,v,s,c,y,S="sizzle"+1*new Date,p=n.document,k=0,r=0,m=ue(),x=ue(),A=ue(),N=ue(),D=function(e,t){return e===t&&(l=!0),0},j={}.hasOwnProperty,t=[],q=t.pop,L=t.push,H=t.push,O=t.slice,P=function(e,t){for(var n=0,r=e.length;n<r;n++)if(e[n]===t)return n;return-1},R="checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|ismap|loop|multiple|open|readonly|required|scoped",M="[\\x20\\t\\r\\n\\f]",I="(?:\\\\[\\da-fA-F]{1,6}"+M+"?|\\\\[^\\r\\n\\f]|[\\w-]|[^\0-\\x7f])+",W="\\["+M+"*("+I+")(?:"+M+"*([*^$|!~]?=)"+M+"*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|("+I+"))|)"+M+"*\\]",F=":("+I+")(?:\\((('((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\")|((?:\\\\.|[^\\\\()[\\]]|"+W+")*)|.*)\\)|)",B=new RegExp(M+"+","g"),$=new RegExp("^"+M+"+|((?:^|[^\\\\])(?:\\\\.)*)"+M+"+$","g"),_=new RegExp("^"+M+"*,"+M+"*"),z=new RegExp("^"+M+"*([>+~]|"+M+")"+M+"*"),U=new RegExp(M+"|>"),X=new RegExp(F),V=new RegExp("^"+I+"$"),G={ID:new RegExp("^#("+I+")"),CLASS:new RegExp("^\\.("+I+")"),TAG:new RegExp("^("+I+"|[*])"),ATTR:new RegExp("^"+W),PSEUDO:new RegExp("^"+F),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+M+"*(even|odd|(([+-]|)(\\d*)n|)"+M+"*(?:([+-]|)"+M+"*(\\d+)|))"+M+"*\\)|)","i"),bool:new RegExp("^(?:"+R+")$","i"),needsContext:new RegExp("^"+M+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+M+"*((?:-\\d)?\\d*)"+M+"*\\)|)(?=[^-]|$)","i")},Y=/HTML$/i,Q=/^(?:input|select|textarea|button)$/i,J=/^h\d$/i,K=/^[^{]+\{\s*\[native \w/,Z=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,ee=/[+~]/,te=new RegExp("\\\\[\\da-fA-F]{1,6}"+M+"?|\\\\([^\\r\\n\\f])","g"),ne=function(e,t){var n="0x"+e.slice(1)-65536;return t||(n<0?String.fromCharCode(n+65536):String.fromCharCode(n>>10|55296,1023&n|56320))},re=/([\0-\x1f\x7f]|^-?\d)|^-$|[^\0-\x1f\x7f-\uFFFF\w-]/g,ie=function(e,t){return t?"\0"===e?"\ufffd":e.slice(0,-1)+"\\"+e.charCodeAt(e.length-1).toString(16)+" ":"\\"+e},oe=function(){T()},ae=be(function(e){return!0===e.disabled&&"fieldset"===e.nodeName.toLowerCase()},{dir:"parentNode",next:"legend"});try{H.apply(t=O.call(p.childNodes),p.childNodes),t[p.childNodes.length].nodeType}catch(e){H={apply:t.length?function(e,t){L.apply(e,O.call(t))}:function(e,t){var n=e.length,r=0;while(e[n++]=t[r++]);e.length=n-1}}}function se(t,e,n,r){var i,o,a,s,u,l,c,f=e&&e.ownerDocument,p=e?e.nodeType:9;if(n=n||[],"string"!=typeof t||!t||1!==p&&9!==p&&11!==p)return n;if(!r&&(T(e),e=e||C,E)){if(11!==p&&(u=Z.exec(t)))if(i=u[1]){if(9===p){if(!(a=e.getElementById(i)))return n;if(a.id===i)return n.push(a),n}else if(f&&(a=f.getElementById(i))&&y(e,a)&&a.id===i)return n.push(a),n}else{if(u[2])return H.apply(n,e.getElementsByTagName(t)),n;if((i=u[3])&&d.getElementsByClassName&&e.getElementsByClassName)return H.apply(n,e.getElementsByClassName(i)),n}if(d.qsa&&!N[t+" "]&&(!v||!v.test(t))&&(1!==p||"object"!==e.nodeName.toLowerCase())){if(c=t,f=e,1===p&&(U.test(t)||z.test(t))){(f=ee.test(t)&&ye(e.parentNode)||e)===e&&d.scope||((s=e.getAttribute("id"))?s=s.replace(re,ie):e.setAttribute("id",s=S)),o=(l=h(t)).length;while(o--)l[o]=(s?"#"+s:":scope")+" "+xe(l[o]);c=l.join(",")}try{return H.apply(n,f.querySelectorAll(c)),n}catch(e){N(t,!0)}finally{s===S&&e.removeAttribute("id")}}}return g(t.replace($,"$1"),e,n,r)}function ue(){var r=[];return function e(t,n){return r.push(t+" ")>b.cacheLength&&delete e[r.shift()],e[t+" "]=n}}function le(e){return e[S]=!0,e}function ce(e){var t=C.createElement("fieldset");try{return!!e(t)}catch(e){return!1}finally{t.parentNode&&t.parentNode.removeChild(t),t=null}}function fe(e,t){var n=e.split("|"),r=n.length;while(r--)b.attrHandle[n[r]]=t}function pe(e,t){var n=t&&e,r=n&&1===e.nodeType&&1===t.nodeType&&e.sourceIndex-t.sourceIndex;if(r)return r;if(n)while(n=n.nextSibling)if(n===t)return-1;return e?1:-1}function de(t){return function(e){return"input"===e.nodeName.toLowerCase()&&e.type===t}}function he(n){return function(e){var t=e.nodeName.toLowerCase();return("input"===t||"button"===t)&&e.type===n}}function ge(t){return function(e){return"form"in e?e.parentNode&&!1===e.disabled?"label"in e?"label"in e.parentNode?e.parentNode.disabled===t:e.disabled===t:e.isDisabled===t||e.isDisabled!==!t&&ae(e)===t:e.disabled===t:"label"in e&&e.disabled===t}}function ve(a){return le(function(o){return o=+o,le(function(e,t){var n,r=a([],e.length,o),i=r.length;while(i--)e[n=r[i]]&&(e[n]=!(t[n]=e[n]))})})}function ye(e){return e&&"undefined"!=typeof e.getElementsByTagName&&e}for(e in d=se.support={},i=se.isXML=function(e){var t=e.namespaceURI,n=(e.ownerDocument||e).documentElement;return!Y.test(t||n&&n.nodeName||"HTML")},T=se.setDocument=function(e){var t,n,r=e?e.ownerDocument||e:p;return r!=C&&9===r.nodeType&&r.documentElement&&(a=(C=r).documentElement,E=!i(C),p!=C&&(n=C.defaultView)&&n.top!==n&&(n.addEventListener?n.addEventListener("unload",oe,!1):n.attachEvent&&n.attachEvent("onunload",oe)),d.scope=ce(function(e){return a.appendChild(e).appendChild(C.createElement("div")),"undefined"!=typeof e.querySelectorAll&&!e.querySelectorAll(":scope fieldset div").length}),d.attributes=ce(function(e){return e.className="i",!e.getAttribute("className")}),d.getElementsByTagName=ce(function(e){return e.appendChild(C.createComment("")),!e.getElementsByTagName("*").length}),d.getElementsByClassName=K.test(C.getElementsByClassName),d.getById=ce(function(e){return a.appendChild(e).id=S,!C.getElementsByName||!C.getElementsByName(S).length}),d.getById?(b.filter.ID=function(e){var t=e.replace(te,ne);return function(e){return e.getAttribute("id")===t}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n=t.getElementById(e);return n?[n]:[]}}):(b.filter.ID=function(e){var n=e.replace(te,ne);return function(e){var t="undefined"!=typeof e.getAttributeNode&&e.getAttributeNode("id");return t&&t.value===n}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n,r,i,o=t.getElementById(e);if(o){if((n=o.getAttributeNode("id"))&&n.value===e)return[o];i=t.getElementsByName(e),r=0;while(o=i[r++])if((n=o.getAttributeNode("id"))&&n.value===e)return[o]}return[]}}),b.find.TAG=d.getElementsByTagName?function(e,t){return"undefined"!=typeof t.getElementsByTagName?t.getElementsByTagName(e):d.qsa?t.querySelectorAll(e):void 0}:function(e,t){var n,r=[],i=0,o=t.getElementsByTagName(e);if("*"===e){while(n=o[i++])1===n.nodeType&&r.push(n);return r}return o},b.find.CLASS=d.getElementsByClassName&&function(e,t){if("undefined"!=typeof t.getElementsByClassName&&E)return t.getElementsByClassName(e)},s=[],v=[],(d.qsa=K.test(C.querySelectorAll))&&(ce(function(e){var t;a.appendChild(e).innerHTML="<a id='"+S+"'></a><select id='"+S+"-\r\\' msallowcapture=''><option selected=''></option></select>",e.querySelectorAll("[msallowcapture^='']").length&&v.push("[*^$]="+M+"*(?:''|\"\")"),e.querySelectorAll("[selected]").length||v.push("\\["+M+"*(?:value|"+R+")"),e.querySelectorAll("[id~="+S+"-]").length||v.push("~="),(t=C.createElement("input")).setAttribute("name",""),e.appendChild(t),e.querySelectorAll("[name='']").length||v.push("\\["+M+"*name"+M+"*="+M+"*(?:''|\"\")"),e.querySelectorAll(":checked").length||v.push(":checked"),e.querySelectorAll("a#"+S+"+*").length||v.push(".#.+[+~]"),e.querySelectorAll("\\\f"),v.push("[\\r\\n\\f]")}),ce(function(e){e.innerHTML="<a href='' disabled='disabled'></a><select disabled='disabled'><option/></select>";var t=C.createElement("input");t.setAttribute("type","hidden"),e.appendChild(t).setAttribute("name","D"),e.querySelectorAll("[name=d]").length&&v.push("name"+M+"*[*^$|!~]?="),2!==e.querySelectorAll(":enabled").length&&v.push(":enabled",":disabled"),a.appendChild(e).disabled=!0,2!==e.querySelectorAll(":disabled").length&&v.push(":enabled",":disabled"),e.querySelectorAll("*,:x"),v.push(",.*:")})),(d.matchesSelector=K.test(c=a.matches||a.webkitMatchesSelector||a.mozMatchesSelector||a.oMatchesSelector||a.msMatchesSelector))&&ce(function(e){d.disconnectedMatch=c.call(e,"*"),c.call(e,"[s!='']:x"),s.push("!=",F)}),v=v.length&&new RegExp(v.join("|")),s=s.length&&new RegExp(s.join("|")),t=K.test(a.compareDocumentPosition),y=t||K.test(a.contains)?function(e,t){var n=9===e.nodeType?e.documentElement:e,r=t&&t.parentNode;return e===r||!(!r||1!==r.nodeType||!(n.contains?n.contains(r):e.compareDocumentPosition&&16&e.compareDocumentPosition(r)))}:function(e,t){if(t)while(t=t.parentNode)if(t===e)return!0;return!1},D=t?function(e,t){if(e===t)return l=!0,0;var n=!e.compareDocumentPosition-!t.compareDocumentPosition;return n||(1&(n=(e.ownerDocument||e)==(t.ownerDocument||t)?e.compareDocumentPosition(t):1)||!d.sortDetached&&t.compareDocumentPosition(e)===n?e==C||e.ownerDocument==p&&y(p,e)?-1:t==C||t.ownerDocument==p&&y(p,t)?1:u?P(u,e)-P(u,t):0:4&n?-1:1)}:function(e,t){if(e===t)return l=!0,0;var n,r=0,i=e.parentNode,o=t.parentNode,a=[e],s=[t];if(!i||!o)return e==C?-1:t==C?1:i?-1:o?1:u?P(u,e)-P(u,t):0;if(i===o)return pe(e,t);n=e;while(n=n.parentNode)a.unshift(n);n=t;while(n=n.parentNode)s.unshift(n);while(a[r]===s[r])r++;return r?pe(a[r],s[r]):a[r]==p?-1:s[r]==p?1:0}),C},se.matches=function(e,t){return se(e,null,null,t)},se.matchesSelector=function(e,t){if(T(e),d.matchesSelector&&E&&!N[t+" "]&&(!s||!s.test(t))&&(!v||!v.test(t)))try{var n=c.call(e,t);if(n||d.disconnectedMatch||e.document&&11!==e.document.nodeType)return n}catch(e){N(t,!0)}return 0<se(t,C,null,[e]).length},se.contains=function(e,t){return(e.ownerDocument||e)!=C&&T(e),y(e,t)},se.attr=function(e,t){(e.ownerDocument||e)!=C&&T(e);var n=b.attrHandle[t.toLowerCase()],r=n&&j.call(b.attrHandle,t.toLowerCase())?n(e,t,!E):void 0;return void 0!==r?r:d.attributes||!E?e.getAttribute(t):(r=e.getAttributeNode(t))&&r.specified?r.value:null},se.escape=function(e){return(e+"").replace(re,ie)},se.error=function(e){throw new Error("Syntax error, unrecognized expression: "+e)},se.uniqueSort=function(e){var t,n=[],r=0,i=0;if(l=!d.detectDuplicates,u=!d.sortStable&&e.slice(0),e.sort(D),l){while(t=e[i++])t===e[i]&&(r=n.push(i));while(r--)e.splice(n[r],1)}return u=null,e},o=se.getText=function(e){var t,n="",r=0,i=e.nodeType;if(i){if(1===i||9===i||11===i){if("string"==typeof e.textContent)return e.textContent;for(e=e.firstChild;e;e=e.nextSibling)n+=o(e)}else if(3===i||4===i)return e.nodeValue}else while(t=e[r++])n+=o(t);return n},(b=se.selectors={cacheLength:50,createPseudo:le,match:G,attrHandle:{},find:{},relative:{">":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(e){return e[1]=e[1].replace(te,ne),e[3]=(e[3]||e[4]||e[5]||"").replace(te,ne),"~="===e[2]&&(e[3]=" "+e[3]+" "),e.slice(0,4)},CHILD:function(e){return e[1]=e[1].toLowerCase(),"nth"===e[1].slice(0,3)?(e[3]||se.error(e[0]),e[4]=+(e[4]?e[5]+(e[6]||1):2*("even"===e[3]||"odd"===e[3])),e[5]=+(e[7]+e[8]||"odd"===e[3])):e[3]&&se.error(e[0]),e},PSEUDO:function(e){var t,n=!e[6]&&e[2];return G.CHILD.test(e[0])?null:(e[3]?e[2]=e[4]||e[5]||"":n&&X.test(n)&&(t=h(n,!0))&&(t=n.indexOf(")",n.length-t)-n.length)&&(e[0]=e[0].slice(0,t),e[2]=n.slice(0,t)),e.slice(0,3))}},filter:{TAG:function(e){var t=e.replace(te,ne).toLowerCase();return"*"===e?function(){return!0}:function(e){return e.nodeName&&e.nodeName.toLowerCase()===t}},CLASS:function(e){var t=m[e+" "];return t||(t=new RegExp("(^|"+M+")"+e+"("+M+"|$)"))&&m(e,function(e){return t.test("string"==typeof e.className&&e.className||"undefined"!=typeof e.getAttribute&&e.getAttribute("class")||"")})},ATTR:function(n,r,i){return function(e){var t=se.attr(e,n);return null==t?"!="===r:!r||(t+="","="===r?t===i:"!="===r?t!==i:"^="===r?i&&0===t.indexOf(i):"*="===r?i&&-1<t.indexOf(i):"$="===r?i&&t.slice(-i.length)===i:"~="===r?-1<(" "+t.replace(B," ")+" ").indexOf(i):"|="===r&&(t===i||t.slice(0,i.length+1)===i+"-"))}},CHILD:function(h,e,t,g,v){var y="nth"!==h.slice(0,3),m="last"!==h.slice(-4),x="of-type"===e;return 1===g&&0===v?function(e){return!!e.parentNode}:function(e,t,n){var r,i,o,a,s,u,l=y!==m?"nextSibling":"previousSibling",c=e.parentNode,f=x&&e.nodeName.toLowerCase(),p=!n&&!x,d=!1;if(c){if(y){while(l){a=e;while(a=a[l])if(x?a.nodeName.toLowerCase()===f:1===a.nodeType)return!1;u=l="only"===h&&!u&&"nextSibling"}return!0}if(u=[m?c.firstChild:c.lastChild],m&&p){d=(s=(r=(i=(o=(a=c)[S]||(a[S]={}))[a.uniqueID]||(o[a.uniqueID]={}))[h]||[])[0]===k&&r[1])&&r[2],a=s&&c.childNodes[s];while(a=++s&&a&&a[l]||(d=s=0)||u.pop())if(1===a.nodeType&&++d&&a===e){i[h]=[k,s,d];break}}else if(p&&(d=s=(r=(i=(o=(a=e)[S]||(a[S]={}))[a.uniqueID]||(o[a.uniqueID]={}))[h]||[])[0]===k&&r[1]),!1===d)while(a=++s&&a&&a[l]||(d=s=0)||u.pop())if((x?a.nodeName.toLowerCase()===f:1===a.nodeType)&&++d&&(p&&((i=(o=a[S]||(a[S]={}))[a.uniqueID]||(o[a.uniqueID]={}))[h]=[k,d]),a===e))break;return(d-=v)===g||d%g==0&&0<=d/g}}},PSEUDO:function(e,o){var t,a=b.pseudos[e]||b.setFilters[e.toLowerCase()]||se.error("unsupported pseudo: "+e);return a[S]?a(o):1<a.length?(t=[e,e,"",o],b.setFilters.hasOwnProperty(e.toLowerCase())?le(function(e,t){var n,r=a(e,o),i=r.length;while(i--)e[n=P(e,r[i])]=!(t[n]=r[i])}):function(e){return a(e,0,t)}):a}},pseudos:{not:le(function(e){var r=[],i=[],s=f(e.replace($,"$1"));return s[S]?le(function(e,t,n,r){var i,o=s(e,null,r,[]),a=e.length;while(a--)(i=o[a])&&(e[a]=!(t[a]=i))}):function(e,t,n){return r[0]=e,s(r,null,n,i),r[0]=null,!i.pop()}}),has:le(function(t){return function(e){return 0<se(t,e).length}}),contains:le(function(t){return t=t.replace(te,ne),function(e){return-1<(e.textContent||o(e)).indexOf(t)}}),lang:le(function(n){return V.test(n||"")||se.error("unsupported lang: "+n),n=n.replace(te,ne).toLowerCase(),function(e){var t;do{if(t=E?e.lang:e.getAttribute("xml:lang")||e.getAttribute("lang"))return(t=t.toLowerCase())===n||0===t.indexOf(n+"-")}while((e=e.parentNode)&&1===e.nodeType);return!1}}),target:function(e){var t=n.location&&n.location.hash;return t&&t.slice(1)===e.id},root:function(e){return e===a},focus:function(e){return e===C.activeElement&&(!C.hasFocus||C.hasFocus())&&!!(e.type||e.href||~e.tabIndex)},enabled:ge(!1),disabled:ge(!0),checked:function(e){var t=e.nodeName.toLowerCase();return"input"===t&&!!e.checked||"option"===t&&!!e.selected},selected:function(e){return e.parentNode&&e.parentNode.selectedIndex,!0===e.selected},empty:function(e){for(e=e.firstChild;e;e=e.nextSibling)if(e.nodeType<6)return!1;return!0},parent:function(e){return!b.pseudos.empty(e)},header:function(e){return J.test(e.nodeName)},input:function(e){return Q.test(e.nodeName)},button:function(e){var t=e.nodeName.toLowerCase();return"input"===t&&"button"===e.type||"button"===t},text:function(e){var t;return"input"===e.nodeName.toLowerCase()&&"text"===e.type&&(null==(t=e.getAttribute("type"))||"text"===t.toLowerCase())},first:ve(function(){return[0]}),last:ve(function(e,t){return[t-1]}),eq:ve(function(e,t,n){return[n<0?n+t:n]}),even:ve(function(e,t){for(var n=0;n<t;n+=2)e.push(n);return e}),odd:ve(function(e,t){for(var n=1;n<t;n+=2)e.push(n);return e}),lt:ve(function(e,t,n){for(var r=n<0?n+t:t<n?t:n;0<=--r;)e.push(r);return e}),gt:ve(function(e,t,n){for(var r=n<0?n+t:n;++r<t;)e.push(r);return e})}}).pseudos.nth=b.pseudos.eq,{radio:!0,checkbox:!0,file:!0,password:!0,image:!0})b.pseudos[e]=de(e);for(e in{submit:!0,reset:!0})b.pseudos[e]=he(e);function me(){}function xe(e){for(var t=0,n=e.length,r="";t<n;t++)r+=e[t].value;return r}function be(s,e,t){var u=e.dir,l=e.next,c=l||u,f=t&&"parentNode"===c,p=r++;return e.first?function(e,t,n){while(e=e[u])if(1===e.nodeType||f)return s(e,t,n);return!1}:function(e,t,n){var r,i,o,a=[k,p];if(n){while(e=e[u])if((1===e.nodeType||f)&&s(e,t,n))return!0}else while(e=e[u])if(1===e.nodeType||f)if(i=(o=e[S]||(e[S]={}))[e.uniqueID]||(o[e.uniqueID]={}),l&&l===e.nodeName.toLowerCase())e=e[u]||e;else{if((r=i[c])&&r[0]===k&&r[1]===p)return a[2]=r[2];if((i[c]=a)[2]=s(e,t,n))return!0}return!1}}function we(i){return 1<i.length?function(e,t,n){var r=i.length;while(r--)if(!i[r](e,t,n))return!1;return!0}:i[0]}function Te(e,t,n,r,i){for(var o,a=[],s=0,u=e.length,l=null!=t;s<u;s++)(o=e[s])&&(n&&!n(o,r,i)||(a.push(o),l&&t.push(s)));return a}function Ce(d,h,g,v,y,e){return v&&!v[S]&&(v=Ce(v)),y&&!y[S]&&(y=Ce(y,e)),le(function(e,t,n,r){var i,o,a,s=[],u=[],l=t.length,c=e||function(e,t,n){for(var r=0,i=t.length;r<i;r++)se(e,t[r],n);return n}(h||"*",n.nodeType?[n]:n,[]),f=!d||!e&&h?c:Te(c,s,d,n,r),p=g?y||(e?d:l||v)?[]:t:f;if(g&&g(f,p,n,r),v){i=Te(p,u),v(i,[],n,r),o=i.length;while(o--)(a=i[o])&&(p[u[o]]=!(f[u[o]]=a))}if(e){if(y||d){if(y){i=[],o=p.length;while(o--)(a=p[o])&&i.push(f[o]=a);y(null,p=[],i,r)}o=p.length;while(o--)(a=p[o])&&-1<(i=y?P(e,a):s[o])&&(e[i]=!(t[i]=a))}}else p=Te(p===t?p.splice(l,p.length):p),y?y(null,t,p,r):H.apply(t,p)})}function Ee(e){for(var i,t,n,r=e.length,o=b.relative[e[0].type],a=o||b.relative[" "],s=o?1:0,u=be(function(e){return e===i},a,!0),l=be(function(e){return-1<P(i,e)},a,!0),c=[function(e,t,n){var r=!o&&(n||t!==w)||((i=t).nodeType?u(e,t,n):l(e,t,n));return i=null,r}];s<r;s++)if(t=b.relative[e[s].type])c=[be(we(c),t)];else{if((t=b.filter[e[s].type].apply(null,e[s].matches))[S]){for(n=++s;n<r;n++)if(b.relative[e[n].type])break;return Ce(1<s&&we(c),1<s&&xe(e.slice(0,s-1).concat({value:" "===e[s-2].type?"*":""})).replace($,"$1"),t,s<n&&Ee(e.slice(s,n)),n<r&&Ee(e=e.slice(n)),n<r&&xe(e))}c.push(t)}return we(c)}return me.prototype=b.filters=b.pseudos,b.setFilters=new me,h=se.tokenize=function(e,t){var n,r,i,o,a,s,u,l=x[e+" "];if(l)return t?0:l.slice(0);a=e,s=[],u=b.preFilter;while(a){for(o in n&&!(r=_.exec(a))||(r&&(a=a.slice(r[0].length)||a),s.push(i=[])),n=!1,(r=z.exec(a))&&(n=r.shift(),i.push({value:n,type:r[0].replace($," ")}),a=a.slice(n.length)),b.filter)!(r=G[o].exec(a))||u[o]&&!(r=u[o](r))||(n=r.shift(),i.push({value:n,type:o,matches:r}),a=a.slice(n.length));if(!n)break}return t?a.length:a?se.error(e):x(e,s).slice(0)},f=se.compile=function(e,t){var n,v,y,m,x,r,i=[],o=[],a=A[e+" "];if(!a){t||(t=h(e)),n=t.length;while(n--)(a=Ee(t[n]))[S]?i.push(a):o.push(a);(a=A(e,(v=o,m=0<(y=i).length,x=0<v.length,r=function(e,t,n,r,i){var o,a,s,u=0,l="0",c=e&&[],f=[],p=w,d=e||x&&b.find.TAG("*",i),h=k+=null==p?1:Math.random()||.1,g=d.length;for(i&&(w=t==C||t||i);l!==g&&null!=(o=d[l]);l++){if(x&&o){a=0,t||o.ownerDocument==C||(T(o),n=!E);while(s=v[a++])if(s(o,t||C,n)){r.push(o);break}i&&(k=h)}m&&((o=!s&&o)&&u--,e&&c.push(o))}if(u+=l,m&&l!==u){a=0;while(s=y[a++])s(c,f,t,n);if(e){if(0<u)while(l--)c[l]||f[l]||(f[l]=q.call(r));f=Te(f)}H.apply(r,f),i&&!e&&0<f.length&&1<u+y.length&&se.uniqueSort(r)}return i&&(k=h,w=p),c},m?le(r):r))).selector=e}return a},g=se.select=function(e,t,n,r){var i,o,a,s,u,l="function"==typeof e&&e,c=!r&&h(e=l.selector||e);if(n=n||[],1===c.length){if(2<(o=c[0]=c[0].slice(0)).length&&"ID"===(a=o[0]).type&&9===t.nodeType&&E&&b.relative[o[1].type]){if(!(t=(b.find.ID(a.matches[0].replace(te,ne),t)||[])[0]))return n;l&&(t=t.parentNode),e=e.slice(o.shift().value.length)}i=G.needsContext.test(e)?0:o.length;while(i--){if(a=o[i],b.relative[s=a.type])break;if((u=b.find[s])&&(r=u(a.matches[0].replace(te,ne),ee.test(o[0].type)&&ye(t.parentNode)||t))){if(o.splice(i,1),!(e=r.length&&xe(o)))return H.apply(n,r),n;break}}}return(l||f(e,c))(r,t,!E,n,!t||ee.test(e)&&ye(t.parentNode)||t),n},d.sortStable=S.split("").sort(D).join("")===S,d.detectDuplicates=!!l,T(),d.sortDetached=ce(function(e){return 1&e.compareDocumentPosition(C.createElement("fieldset"))}),ce(function(e){return e.innerHTML="<a href='#'></a>","#"===e.firstChild.getAttribute("href")})||fe("type|href|height|width",function(e,t,n){if(!n)return e.getAttribute(t,"type"===t.toLowerCase()?1:2)}),d.attributes&&ce(function(e){return e.innerHTML="<input/>",e.firstChild.setAttribute("value",""),""===e.firstChild.getAttribute("value")})||fe("value",function(e,t,n){if(!n&&"input"===e.nodeName.toLowerCase())return e.defaultValue}),ce(function(e){return null==e.getAttribute("disabled")})||fe(R,function(e,t,n){var r;if(!n)return!0===e[t]?t.toLowerCase():(r=e.getAttributeNode(t))&&r.specified?r.value:null}),se}(C);S.find=d,S.expr=d.selectors,S.expr[":"]=S.expr.pseudos,S.uniqueSort=S.unique=d.uniqueSort,S.text=d.getText,S.isXMLDoc=d.isXML,S.contains=d.contains,S.escapeSelector=d.escape;var h=function(e,t,n){var r=[],i=void 0!==n;while((e=e[t])&&9!==e.nodeType)if(1===e.nodeType){if(i&&S(e).is(n))break;r.push(e)}return r},T=function(e,t){for(var n=[];e;e=e.nextSibling)1===e.nodeType&&e!==t&&n.push(e);return n},k=S.expr.match.needsContext;function A(e,t){return e.nodeName&&e.nodeName.toLowerCase()===t.toLowerCase()}var N=/^<([a-z][^\/\0>:\x20\t\r\n\f]*)[\x20\t\r\n\f]*\/?>(?:<\/\1>|)$/i;function D(e,n,r){return m(n)?S.grep(e,function(e,t){return!!n.call(e,t,e)!==r}):n.nodeType?S.grep(e,function(e){return e===n!==r}):"string"!=typeof n?S.grep(e,function(e){return-1<i.call(n,e)!==r}):S.filter(n,e,r)}S.filter=function(e,t,n){var r=t[0];return n&&(e=":not("+e+")"),1===t.length&&1===r.nodeType?S.find.matchesSelector(r,e)?[r]:[]:S.find.matches(e,S.grep(t,function(e){return 1===e.nodeType}))},S.fn.extend({find:function(e){var t,n,r=this.length,i=this;if("string"!=typeof e)return this.pushStack(S(e).filter(function(){for(t=0;t<r;t++)if(S.contains(i[t],this))return!0}));for(n=this.pushStack([]),t=0;t<r;t++)S.find(e,i[t],n);return 1<r?S.uniqueSort(n):n},filter:function(e){return this.pushStack(D(this,e||[],!1))},not:function(e){return this.pushStack(D(this,e||[],!0))},is:function(e){return!!D(this,"string"==typeof e&&k.test(e)?S(e):e||[],!1).length}});var j,q=/^(?:\s*(<[\w\W]+>)[^>]*|#([\w-]+))$/;(S.fn.init=function(e,t,n){var r,i;if(!e)return this;if(n=n||j,"string"==typeof e){if(!(r="<"===e[0]&&">"===e[e.length-1]&&3<=e.length?[null,e,null]:q.exec(e))||!r[1]&&t)return!t||t.jquery?(t||n).find(e):this.constructor(t).find(e);if(r[1]){if(t=t instanceof S?t[0]:t,S.merge(this,S.parseHTML(r[1],t&&t.nodeType?t.ownerDocument||t:E,!0)),N.test(r[1])&&S.isPlainObject(t))for(r in t)m(this[r])?this[r](t[r]):this.attr(r,t[r]);return this}return(i=E.getElementById(r[2]))&&(this[0]=i,this.length=1),this}return e.nodeType?(this[0]=e,this.length=1,this):m(e)?void 0!==n.ready?n.ready(e):e(S):S.makeArray(e,this)}).prototype=S.fn,j=S(E);var L=/^(?:parents|prev(?:Until|All))/,H={children:!0,contents:!0,next:!0,prev:!0};function O(e,t){while((e=e[t])&&1!==e.nodeType);return e}S.fn.extend({has:function(e){var t=S(e,this),n=t.length;return this.filter(function(){for(var e=0;e<n;e++)if(S.contains(this,t[e]))return!0})},closest:function(e,t){var n,r=0,i=this.length,o=[],a="string"!=typeof e&&S(e);if(!k.test(e))for(;r<i;r++)for(n=this[r];n&&n!==t;n=n.parentNode)if(n.nodeType<11&&(a?-1<a.index(n):1===n.nodeType&&S.find.matchesSelector(n,e))){o.push(n);break}return this.pushStack(1<o.length?S.uniqueSort(o):o)},index:function(e){return e?"string"==typeof e?i.call(S(e),this[0]):i.call(this,e.jquery?e[0]:e):this[0]&&this[0].parentNode?this.first().prevAll().length:-1},add:function(e,t){return this.pushStack(S.uniqueSort(S.merge(this.get(),S(e,t))))},addBack:function(e){return this.add(null==e?this.prevObject:this.prevObject.filter(e))}}),S.each({parent:function(e){var t=e.parentNode;return t&&11!==t.nodeType?t:null},parents:function(e){return h(e,"parentNode")},parentsUntil:function(e,t,n){return h(e,"parentNode",n)},next:function(e){return O(e,"nextSibling")},prev:function(e){return O(e,"previousSibling")},nextAll:function(e){return h(e,"nextSibling")},prevAll:function(e){return h(e,"previousSibling")},nextUntil:function(e,t,n){return h(e,"nextSibling",n)},prevUntil:function(e,t,n){return h(e,"previousSibling",n)},siblings:function(e){return T((e.parentNode||{}).firstChild,e)},children:function(e){return T(e.firstChild)},contents:function(e){return null!=e.contentDocument&&r(e.contentDocument)?e.contentDocument:(A(e,"template")&&(e=e.content||e),S.merge([],e.childNodes))}},function(r,i){S.fn[r]=function(e,t){var n=S.map(this,i,e);return"Until"!==r.slice(-5)&&(t=e),t&&"string"==typeof t&&(n=S.filter(t,n)),1<this.length&&(H[r]||S.uniqueSort(n),L.test(r)&&n.reverse()),this.pushStack(n)}});var P=/[^\x20\t\r\n\f]+/g;function R(e){return e}function M(e){throw e}function I(e,t,n,r){var i;try{e&&m(i=e.promise)?i.call(e).done(t).fail(n):e&&m(i=e.then)?i.call(e,t,n):t.apply(void 0,[e].slice(r))}catch(e){n.apply(void 0,[e])}}S.Callbacks=function(r){var e,n;r="string"==typeof r?(e=r,n={},S.each(e.match(P)||[],function(e,t){n[t]=!0}),n):S.extend({},r);var i,t,o,a,s=[],u=[],l=-1,c=function(){for(a=a||r.once,o=i=!0;u.length;l=-1){t=u.shift();while(++l<s.length)!1===s[l].apply(t[0],t[1])&&r.stopOnFalse&&(l=s.length,t=!1)}r.memory||(t=!1),i=!1,a&&(s=t?[]:"")},f={add:function(){return s&&(t&&!i&&(l=s.length-1,u.push(t)),function n(e){S.each(e,function(e,t){m(t)?r.unique&&f.has(t)||s.push(t):t&&t.length&&"string"!==w(t)&&n(t)})}(arguments),t&&!i&&c()),this},remove:function(){return S.each(arguments,function(e,t){var n;while(-1<(n=S.inArray(t,s,n)))s.splice(n,1),n<=l&&l--}),this},has:function(e){return e?-1<S.inArray(e,s):0<s.length},empty:function(){return s&&(s=[]),this},disable:function(){return a=u=[],s=t="",this},disabled:function(){return!s},lock:function(){return a=u=[],t||i||(s=t=""),this},locked:function(){return!!a},fireWith:function(e,t){return a||(t=[e,(t=t||[]).slice?t.slice():t],u.push(t),i||c()),this},fire:function(){return f.fireWith(this,arguments),this},fired:function(){return!!o}};return f},S.extend({Deferred:function(e){var o=[["notify","progress",S.Callbacks("memory"),S.Callbacks("memory"),2],["resolve","done",S.Callbacks("once memory"),S.Callbacks("once memory"),0,"resolved"],["reject","fail",S.Callbacks("once memory"),S.Callbacks("once memory"),1,"rejected"]],i="pending",a={state:function(){return i},always:function(){return s.done(arguments).fail(arguments),this},"catch":function(e){return a.then(null,e)},pipe:function(){var i=arguments;return S.Deferred(function(r){S.each(o,function(e,t){var n=m(i[t[4]])&&i[t[4]];s[t[1]](function(){var e=n&&n.apply(this,arguments);e&&m(e.promise)?e.promise().progress(r.notify).done(r.resolve).fail(r.reject):r[t[0]+"With"](this,n?[e]:arguments)})}),i=null}).promise()},then:function(t,n,r){var u=0;function l(i,o,a,s){return function(){var n=this,r=arguments,e=function(){var e,t;if(!(i<u)){if((e=a.apply(n,r))===o.promise())throw new TypeError("Thenable self-resolution");t=e&&("object"==typeof e||"function"==typeof e)&&e.then,m(t)?s?t.call(e,l(u,o,R,s),l(u,o,M,s)):(u++,t.call(e,l(u,o,R,s),l(u,o,M,s),l(u,o,R,o.notifyWith))):(a!==R&&(n=void 0,r=[e]),(s||o.resolveWith)(n,r))}},t=s?e:function(){try{e()}catch(e){S.Deferred.exceptionHook&&S.Deferred.exceptionHook(e,t.stackTrace),u<=i+1&&(a!==M&&(n=void 0,r=[e]),o.rejectWith(n,r))}};i?t():(S.Deferred.getStackHook&&(t.stackTrace=S.Deferred.getStackHook()),C.setTimeout(t))}}return S.Deferred(function(e){o[0][3].add(l(0,e,m(r)?r:R,e.notifyWith)),o[1][3].add(l(0,e,m(t)?t:R)),o[2][3].add(l(0,e,m(n)?n:M))}).promise()},promise:function(e){return null!=e?S.extend(e,a):a}},s={};return S.each(o,function(e,t){var n=t[2],r=t[5];a[t[1]]=n.add,r&&n.add(function(){i=r},o[3-e][2].disable,o[3-e][3].disable,o[0][2].lock,o[0][3].lock),n.add(t[3].fire),s[t[0]]=function(){return s[t[0]+"With"](this===s?void 0:this,arguments),this},s[t[0]+"With"]=n.fireWith}),a.promise(s),e&&e.call(s,s),s},when:function(e){var n=arguments.length,t=n,r=Array(t),i=s.call(arguments),o=S.Deferred(),a=function(t){return function(e){r[t]=this,i[t]=1<arguments.length?s.call(arguments):e,--n||o.resolveWith(r,i)}};if(n<=1&&(I(e,o.done(a(t)).resolve,o.reject,!n),"pending"===o.state()||m(i[t]&&i[t].then)))return o.then();while(t--)I(i[t],a(t),o.reject);return o.promise()}});var W=/^(Eval|Internal|Range|Reference|Syntax|Type|URI)Error$/;S.Deferred.exceptionHook=function(e,t){C.console&&C.console.warn&&e&&W.test(e.name)&&C.console.warn("jQuery.Deferred exception: "+e.message,e.stack,t)},S.readyException=function(e){C.setTimeout(function(){throw e})};var F=S.Deferred();function B(){E.removeEventListener("DOMContentLoaded",B),C.removeEventListener("load",B),S.ready()}S.fn.ready=function(e){return F.then(e)["catch"](function(e){S.readyException(e)}),this},S.extend({isReady:!1,readyWait:1,ready:function(e){(!0===e?--S.readyWait:S.isReady)||(S.isReady=!0)!==e&&0<--S.readyWait||F.resolveWith(E,[S])}}),S.ready.then=F.then,"complete"===E.readyState||"loading"!==E.readyState&&!E.documentElement.doScroll?C.setTimeout(S.ready):(E.addEventListener("DOMContentLoaded",B),C.addEventListener("load",B));var $=function(e,t,n,r,i,o,a){var s=0,u=e.length,l=null==n;if("object"===w(n))for(s in i=!0,n)$(e,t,s,n[s],!0,o,a);else if(void 0!==r&&(i=!0,m(r)||(a=!0),l&&(a?(t.call(e,r),t=null):(l=t,t=function(e,t,n){return l.call(S(e),n)})),t))for(;s<u;s++)t(e[s],n,a?r:r.call(e[s],s,t(e[s],n)));return i?e:l?t.call(e):u?t(e[0],n):o},_=/^-ms-/,z=/-([a-z])/g;function U(e,t){return t.toUpperCase()}function X(e){return e.replace(_,"ms-").replace(z,U)}var V=function(e){return 1===e.nodeType||9===e.nodeType||!+e.nodeType};function G(){this.expando=S.expando+G.uid++}G.uid=1,G.prototype={cache:function(e){var t=e[this.expando];return t||(t={},V(e)&&(e.nodeType?e[this.expando]=t:Object.defineProperty(e,this.expando,{value:t,configurable:!0}))),t},set:function(e,t,n){var r,i=this.cache(e);if("string"==typeof t)i[X(t)]=n;else for(r in t)i[X(r)]=t[r];return i},get:function(e,t){return void 0===t?this.cache(e):e[this.expando]&&e[this.expando][X(t)]},access:function(e,t,n){return void 0===t||t&&"string"==typeof t&&void 0===n?this.get(e,t):(this.set(e,t,n),void 0!==n?n:t)},remove:function(e,t){var n,r=e[this.expando];if(void 0!==r){if(void 0!==t){n=(t=Array.isArray(t)?t.map(X):(t=X(t))in r?[t]:t.match(P)||[]).length;while(n--)delete r[t[n]]}(void 0===t||S.isEmptyObject(r))&&(e.nodeType?e[this.expando]=void 0:delete e[this.expando])}},hasData:function(e){var t=e[this.expando];return void 0!==t&&!S.isEmptyObject(t)}};var Y=new G,Q=new G,J=/^(?:\{[\w\W]*\}|\[[\w\W]*\])$/,K=/[A-Z]/g;function Z(e,t,n){var r,i;if(void 0===n&&1===e.nodeType)if(r="data-"+t.replace(K,"-$&").toLowerCase(),"string"==typeof(n=e.getAttribute(r))){try{n="true"===(i=n)||"false"!==i&&("null"===i?null:i===+i+""?+i:J.test(i)?JSON.parse(i):i)}catch(e){}Q.set(e,t,n)}else n=void 0;return n}S.extend({hasData:function(e){return Q.hasData(e)||Y.hasData(e)},data:function(e,t,n){return Q.access(e,t,n)},removeData:function(e,t){Q.remove(e,t)},_data:function(e,t,n){return Y.access(e,t,n)},_removeData:function(e,t){Y.remove(e,t)}}),S.fn.extend({data:function(n,e){var t,r,i,o=this[0],a=o&&o.attributes;if(void 0===n){if(this.length&&(i=Q.get(o),1===o.nodeType&&!Y.get(o,"hasDataAttrs"))){t=a.length;while(t--)a[t]&&0===(r=a[t].name).indexOf("data-")&&(r=X(r.slice(5)),Z(o,r,i[r]));Y.set(o,"hasDataAttrs",!0)}return i}return"object"==typeof n?this.each(function(){Q.set(this,n)}):$(this,function(e){var t;if(o&&void 0===e)return void 0!==(t=Q.get(o,n))?t:void 0!==(t=Z(o,n))?t:void 0;this.each(function(){Q.set(this,n,e)})},null,e,1<arguments.length,null,!0)},removeData:function(e){return this.each(function(){Q.remove(this,e)})}}),S.extend({queue:function(e,t,n){var r;if(e)return t=(t||"fx")+"queue",r=Y.get(e,t),n&&(!r||Array.isArray(n)?r=Y.access(e,t,S.makeArray(n)):r.push(n)),r||[]},dequeue:function(e,t){t=t||"fx";var n=S.queue(e,t),r=n.length,i=n.shift(),o=S._queueHooks(e,t);"inprogress"===i&&(i=n.shift(),r--),i&&("fx"===t&&n.unshift("inprogress"),delete o.stop,i.call(e,function(){S.dequeue(e,t)},o)),!r&&o&&o.empty.fire()},_queueHooks:function(e,t){var n=t+"queueHooks";return Y.get(e,n)||Y.access(e,n,{empty:S.Callbacks("once memory").add(function(){Y.remove(e,[t+"queue",n])})})}}),S.fn.extend({queue:function(t,n){var e=2;return"string"!=typeof t&&(n=t,t="fx",e--),arguments.length<e?S.queue(this[0],t):void 0===n?this:this.each(function(){var e=S.queue(this,t,n);S._queueHooks(this,t),"fx"===t&&"inprogress"!==e[0]&&S.dequeue(this,t)})},dequeue:function(e){return this.each(function(){S.dequeue(this,e)})},clearQueue:function(e){return this.queue(e||"fx",[])},promise:function(e,t){var n,r=1,i=S.Deferred(),o=this,a=this.length,s=function(){--r||i.resolveWith(o,[o])};"string"!=typeof e&&(t=e,e=void 0),e=e||"fx";while(a--)(n=Y.get(o[a],e+"queueHooks"))&&n.empty&&(r++,n.empty.add(s));return s(),i.promise(t)}});var ee=/[+-]?(?:\d*\.|)\d+(?:[eE][+-]?\d+|)/.source,te=new RegExp("^(?:([+-])=|)("+ee+")([a-z%]*)$","i"),ne=["Top","Right","Bottom","Left"],re=E.documentElement,ie=function(e){return S.contains(e.ownerDocument,e)},oe={composed:!0};re.getRootNode&&(ie=function(e){return S.contains(e.ownerDocument,e)||e.getRootNode(oe)===e.ownerDocument});var ae=function(e,t){return"none"===(e=t||e).style.display||""===e.style.display&&ie(e)&&"none"===S.css(e,"display")};function se(e,t,n,r){var i,o,a=20,s=r?function(){return r.cur()}:function(){return S.css(e,t,"")},u=s(),l=n&&n[3]||(S.cssNumber[t]?"":"px"),c=e.nodeType&&(S.cssNumber[t]||"px"!==l&&+u)&&te.exec(S.css(e,t));if(c&&c[3]!==l){u/=2,l=l||c[3],c=+u||1;while(a--)S.style(e,t,c+l),(1-o)*(1-(o=s()/u||.5))<=0&&(a=0),c/=o;c*=2,S.style(e,t,c+l),n=n||[]}return n&&(c=+c||+u||0,i=n[1]?c+(n[1]+1)*n[2]:+n[2],r&&(r.unit=l,r.start=c,r.end=i)),i}var ue={};function le(e,t){for(var n,r,i,o,a,s,u,l=[],c=0,f=e.length;c<f;c++)(r=e[c]).style&&(n=r.style.display,t?("none"===n&&(l[c]=Y.get(r,"display")||null,l[c]||(r.style.display="")),""===r.style.display&&ae(r)&&(l[c]=(u=a=o=void 0,a=(i=r).ownerDocument,s=i.nodeName,(u=ue[s])||(o=a.body.appendChild(a.createElement(s)),u=S.css(o,"display"),o.parentNode.removeChild(o),"none"===u&&(u="block"),ue[s]=u)))):"none"!==n&&(l[c]="none",Y.set(r,"display",n)));for(c=0;c<f;c++)null!=l[c]&&(e[c].style.display=l[c]);return e}S.fn.extend({show:function(){return le(this,!0)},hide:function(){return le(this)},toggle:function(e){return"boolean"==typeof e?e?this.show():this.hide():this.each(function(){ae(this)?S(this).show():S(this).hide()})}});var ce,fe,pe=/^(?:checkbox|radio)$/i,de=/<([a-z][^\/\0>\x20\t\r\n\f]*)/i,he=/^$|^module$|\/(?:java|ecma)script/i;ce=E.createDocumentFragment().appendChild(E.createElement("div")),(fe=E.createElement("input")).setAttribute("type","radio"),fe.setAttribute("checked","checked"),fe.setAttribute("name","t"),ce.appendChild(fe),y.checkClone=ce.cloneNode(!0).cloneNode(!0).lastChild.checked,ce.innerHTML="<textarea>x</textarea>",y.noCloneChecked=!!ce.cloneNode(!0).lastChild.defaultValue,ce.innerHTML="<option></option>",y.option=!!ce.lastChild;var ge={thead:[1,"<table>","</table>"],col:[2,"<table><colgroup>","</colgroup></table>"],tr:[2,"<table><tbody>","</tbody></table>"],td:[3,"<table><tbody><tr>","</tr></tbody></table>"],_default:[0,"",""]};function ve(e,t){var n;return n="undefined"!=typeof e.getElementsByTagName?e.getElementsByTagName(t||"*"):"undefined"!=typeof e.querySelectorAll?e.querySelectorAll(t||"*"):[],void 0===t||t&&A(e,t)?S.merge([e],n):n}function ye(e,t){for(var n=0,r=e.length;n<r;n++)Y.set(e[n],"globalEval",!t||Y.get(t[n],"globalEval"))}ge.tbody=ge.tfoot=ge.colgroup=ge.caption=ge.thead,ge.th=ge.td,y.option||(ge.optgroup=ge.option=[1,"<select multiple='multiple'>","</select>"]);var me=/<|&#?\w+;/;function xe(e,t,n,r,i){for(var o,a,s,u,l,c,f=t.createDocumentFragment(),p=[],d=0,h=e.length;d<h;d++)if((o=e[d])||0===o)if("object"===w(o))S.merge(p,o.nodeType?[o]:o);else if(me.test(o)){a=a||f.appendChild(t.createElement("div")),s=(de.exec(o)||["",""])[1].toLowerCase(),u=ge[s]||ge._default,a.innerHTML=u[1]+S.htmlPrefilter(o)+u[2],c=u[0];while(c--)a=a.lastChild;S.merge(p,a.childNodes),(a=f.firstChild).textContent=""}else p.push(t.createTextNode(o));f.textContent="",d=0;while(o=p[d++])if(r&&-1<S.inArray(o,r))i&&i.push(o);else if(l=ie(o),a=ve(f.appendChild(o),"script"),l&&ye(a),n){c=0;while(o=a[c++])he.test(o.type||"")&&n.push(o)}return f}var be=/^key/,we=/^(?:mouse|pointer|contextmenu|drag|drop)|click/,Te=/^([^.]*)(?:\.(.+)|)/;function Ce(){return!0}function Ee(){return!1}function Se(e,t){return e===function(){try{return E.activeElement}catch(e){}}()==("focus"===t)}function ke(e,t,n,r,i,o){var a,s;if("object"==typeof t){for(s in"string"!=typeof n&&(r=r||n,n=void 0),t)ke(e,s,n,r,t[s],o);return e}if(null==r&&null==i?(i=n,r=n=void 0):null==i&&("string"==typeof n?(i=r,r=void 0):(i=r,r=n,n=void 0)),!1===i)i=Ee;else if(!i)return e;return 1===o&&(a=i,(i=function(e){return S().off(e),a.apply(this,arguments)}).guid=a.guid||(a.guid=S.guid++)),e.each(function(){S.event.add(this,t,i,r,n)})}function Ae(e,i,o){o?(Y.set(e,i,!1),S.event.add(e,i,{namespace:!1,handler:function(e){var t,n,r=Y.get(this,i);if(1&e.isTrigger&&this[i]){if(r.length)(S.event.special[i]||{}).delegateType&&e.stopPropagation();else if(r=s.call(arguments),Y.set(this,i,r),t=o(this,i),this[i](),r!==(n=Y.get(this,i))||t?Y.set(this,i,!1):n={},r!==n)return e.stopImmediatePropagation(),e.preventDefault(),n.value}else r.length&&(Y.set(this,i,{value:S.event.trigger(S.extend(r[0],S.Event.prototype),r.slice(1),this)}),e.stopImmediatePropagation())}})):void 0===Y.get(e,i)&&S.event.add(e,i,Ce)}S.event={global:{},add:function(t,e,n,r,i){var o,a,s,u,l,c,f,p,d,h,g,v=Y.get(t);if(V(t)){n.handler&&(n=(o=n).handler,i=o.selector),i&&S.find.matchesSelector(re,i),n.guid||(n.guid=S.guid++),(u=v.events)||(u=v.events=Object.create(null)),(a=v.handle)||(a=v.handle=function(e){return"undefined"!=typeof S&&S.event.triggered!==e.type?S.event.dispatch.apply(t,arguments):void 0}),l=(e=(e||"").match(P)||[""]).length;while(l--)d=g=(s=Te.exec(e[l])||[])[1],h=(s[2]||"").split(".").sort(),d&&(f=S.event.special[d]||{},d=(i?f.delegateType:f.bindType)||d,f=S.event.special[d]||{},c=S.extend({type:d,origType:g,data:r,handler:n,guid:n.guid,selector:i,needsContext:i&&S.expr.match.needsContext.test(i),namespace:h.join(".")},o),(p=u[d])||((p=u[d]=[]).delegateCount=0,f.setup&&!1!==f.setup.call(t,r,h,a)||t.addEventListener&&t.addEventListener(d,a)),f.add&&(f.add.call(t,c),c.handler.guid||(c.handler.guid=n.guid)),i?p.splice(p.delegateCount++,0,c):p.push(c),S.event.global[d]=!0)}},remove:function(e,t,n,r,i){var o,a,s,u,l,c,f,p,d,h,g,v=Y.hasData(e)&&Y.get(e);if(v&&(u=v.events)){l=(t=(t||"").match(P)||[""]).length;while(l--)if(d=g=(s=Te.exec(t[l])||[])[1],h=(s[2]||"").split(".").sort(),d){f=S.event.special[d]||{},p=u[d=(r?f.delegateType:f.bindType)||d]||[],s=s[2]&&new RegExp("(^|\\.)"+h.join("\\.(?:.*\\.|)")+"(\\.|$)"),a=o=p.length;while(o--)c=p[o],!i&&g!==c.origType||n&&n.guid!==c.guid||s&&!s.test(c.namespace)||r&&r!==c.selector&&("**"!==r||!c.selector)||(p.splice(o,1),c.selector&&p.delegateCount--,f.remove&&f.remove.call(e,c));a&&!p.length&&(f.teardown&&!1!==f.teardown.call(e,h,v.handle)||S.removeEvent(e,d,v.handle),delete u[d])}else for(d in u)S.event.remove(e,d+t[l],n,r,!0);S.isEmptyObject(u)&&Y.remove(e,"handle events")}},dispatch:function(e){var t,n,r,i,o,a,s=new Array(arguments.length),u=S.event.fix(e),l=(Y.get(this,"events")||Object.create(null))[u.type]||[],c=S.event.special[u.type]||{};for(s[0]=u,t=1;t<arguments.length;t++)s[t]=arguments[t];if(u.delegateTarget=this,!c.preDispatch||!1!==c.preDispatch.call(this,u)){a=S.event.handlers.call(this,u,l),t=0;while((i=a[t++])&&!u.isPropagationStopped()){u.currentTarget=i.elem,n=0;while((o=i.handlers[n++])&&!u.isImmediatePropagationStopped())u.rnamespace&&!1!==o.namespace&&!u.rnamespace.test(o.namespace)||(u.handleObj=o,u.data=o.data,void 0!==(r=((S.event.special[o.origType]||{}).handle||o.handler).apply(i.elem,s))&&!1===(u.result=r)&&(u.preventDefault(),u.stopPropagation()))}return c.postDispatch&&c.postDispatch.call(this,u),u.result}},handlers:function(e,t){var n,r,i,o,a,s=[],u=t.delegateCount,l=e.target;if(u&&l.nodeType&&!("click"===e.type&&1<=e.button))for(;l!==this;l=l.parentNode||this)if(1===l.nodeType&&("click"!==e.type||!0!==l.disabled)){for(o=[],a={},n=0;n<u;n++)void 0===a[i=(r=t[n]).selector+" "]&&(a[i]=r.needsContext?-1<S(i,this).index(l):S.find(i,this,null,[l]).length),a[i]&&o.push(r);o.length&&s.push({elem:l,handlers:o})}return l=this,u<t.length&&s.push({elem:l,handlers:t.slice(u)}),s},addProp:function(t,e){Object.defineProperty(S.Event.prototype,t,{enumerable:!0,configurable:!0,get:m(e)?function(){if(this.originalEvent)return e(this.originalEvent)}:function(){if(this.originalEvent)return this.originalEvent[t]},set:function(e){Object.defineProperty(this,t,{enumerable:!0,configurable:!0,writable:!0,value:e})}})},fix:function(e){return e[S.expando]?e:new S.Event(e)},special:{load:{noBubble:!0},click:{setup:function(e){var t=this||e;return pe.test(t.type)&&t.click&&A(t,"input")&&Ae(t,"click",Ce),!1},trigger:function(e){var t=this||e;return pe.test(t.type)&&t.click&&A(t,"input")&&Ae(t,"click"),!0},_default:function(e){var t=e.target;return pe.test(t.type)&&t.click&&A(t,"input")&&Y.get(t,"click")||A(t,"a")}},beforeunload:{postDispatch:function(e){void 0!==e.result&&e.originalEvent&&(e.originalEvent.returnValue=e.result)}}}},S.removeEvent=function(e,t,n){e.removeEventListener&&e.removeEventListener(t,n)},S.Event=function(e,t){if(!(this instanceof S.Event))return new S.Event(e,t);e&&e.type?(this.originalEvent=e,this.type=e.type,this.isDefaultPrevented=e.defaultPrevented||void 0===e.defaultPrevented&&!1===e.returnValue?Ce:Ee,this.target=e.target&&3===e.target.nodeType?e.target.parentNode:e.target,this.currentTarget=e.currentTarget,this.relatedTarget=e.relatedTarget):this.type=e,t&&S.extend(this,t),this.timeStamp=e&&e.timeStamp||Date.now(),this[S.expando]=!0},S.Event.prototype={constructor:S.Event,isDefaultPrevented:Ee,isPropagationStopped:Ee,isImmediatePropagationStopped:Ee,isSimulated:!1,preventDefault:function(){var e=this.originalEvent;this.isDefaultPrevented=Ce,e&&!this.isSimulated&&e.preventDefault()},stopPropagation:function(){var e=this.originalEvent;this.isPropagationStopped=Ce,e&&!this.isSimulated&&e.stopPropagation()},stopImmediatePropagation:function(){var e=this.originalEvent;this.isImmediatePropagationStopped=Ce,e&&!this.isSimulated&&e.stopImmediatePropagation(),this.stopPropagation()}},S.each({altKey:!0,bubbles:!0,cancelable:!0,changedTouches:!0,ctrlKey:!0,detail:!0,eventPhase:!0,metaKey:!0,pageX:!0,pageY:!0,shiftKey:!0,view:!0,"char":!0,code:!0,charCode:!0,key:!0,keyCode:!0,button:!0,buttons:!0,clientX:!0,clientY:!0,offsetX:!0,offsetY:!0,pointerId:!0,pointerType:!0,screenX:!0,screenY:!0,targetTouches:!0,toElement:!0,touches:!0,which:function(e){var t=e.button;return null==e.which&&be.test(e.type)?null!=e.charCode?e.charCode:e.keyCode:!e.which&&void 0!==t&&we.test(e.type)?1&t?1:2&t?3:4&t?2:0:e.which}},S.event.addProp),S.each({focus:"focusin",blur:"focusout"},function(e,t){S.event.special[e]={setup:function(){return Ae(this,e,Se),!1},trigger:function(){return Ae(this,e),!0},delegateType:t}}),S.each({mouseenter:"mouseover",mouseleave:"mouseout",pointerenter:"pointerover",pointerleave:"pointerout"},function(e,i){S.event.special[e]={delegateType:i,bindType:i,handle:function(e){var t,n=e.relatedTarget,r=e.handleObj;return n&&(n===this||S.contains(this,n))||(e.type=r.origType,t=r.handler.apply(this,arguments),e.type=i),t}}}),S.fn.extend({on:function(e,t,n,r){return ke(this,e,t,n,r)},one:function(e,t,n,r){return ke(this,e,t,n,r,1)},off:function(e,t,n){var r,i;if(e&&e.preventDefault&&e.handleObj)return r=e.handleObj,S(e.delegateTarget).off(r.namespace?r.origType+"."+r.namespace:r.origType,r.selector,r.handler),this;if("object"==typeof e){for(i in e)this.off(i,t,e[i]);return this}return!1!==t&&"function"!=typeof t||(n=t,t=void 0),!1===n&&(n=Ee),this.each(function(){S.event.remove(this,e,n,t)})}});var Ne=/<script|<style|<link/i,De=/checked\s*(?:[^=]|=\s*.checked.)/i,je=/^\s*<!(?:\[CDATA\[|--)|(?:\]\]|--)>\s*$/g;function qe(e,t){return A(e,"table")&&A(11!==t.nodeType?t:t.firstChild,"tr")&&S(e).children("tbody")[0]||e}function Le(e){return e.type=(null!==e.getAttribute("type"))+"/"+e.type,e}function He(e){return"true/"===(e.type||"").slice(0,5)?e.type=e.type.slice(5):e.removeAttribute("type"),e}function Oe(e,t){var n,r,i,o,a,s;if(1===t.nodeType){if(Y.hasData(e)&&(s=Y.get(e).events))for(i in Y.remove(t,"handle events"),s)for(n=0,r=s[i].length;n<r;n++)S.event.add(t,i,s[i][n]);Q.hasData(e)&&(o=Q.access(e),a=S.extend({},o),Q.set(t,a))}}function Pe(n,r,i,o){r=g(r);var e,t,a,s,u,l,c=0,f=n.length,p=f-1,d=r[0],h=m(d);if(h||1<f&&"string"==typeof d&&!y.checkClone&&De.test(d))return n.each(function(e){var t=n.eq(e);h&&(r[0]=d.call(this,e,t.html())),Pe(t,r,i,o)});if(f&&(t=(e=xe(r,n[0].ownerDocument,!1,n,o)).firstChild,1===e.childNodes.length&&(e=t),t||o)){for(s=(a=S.map(ve(e,"script"),Le)).length;c<f;c++)u=e,c!==p&&(u=S.clone(u,!0,!0),s&&S.merge(a,ve(u,"script"))),i.call(n[c],u,c);if(s)for(l=a[a.length-1].ownerDocument,S.map(a,He),c=0;c<s;c++)u=a[c],he.test(u.type||"")&&!Y.access(u,"globalEval")&&S.contains(l,u)&&(u.src&&"module"!==(u.type||"").toLowerCase()?S._evalUrl&&!u.noModule&&S._evalUrl(u.src,{nonce:u.nonce||u.getAttribute("nonce")},l):b(u.textContent.replace(je,""),u,l))}return n}function Re(e,t,n){for(var r,i=t?S.filter(t,e):e,o=0;null!=(r=i[o]);o++)n||1!==r.nodeType||S.cleanData(ve(r)),r.parentNode&&(n&&ie(r)&&ye(ve(r,"script")),r.parentNode.removeChild(r));return e}S.extend({htmlPrefilter:function(e){return e},clone:function(e,t,n){var r,i,o,a,s,u,l,c=e.cloneNode(!0),f=ie(e);if(!(y.noCloneChecked||1!==e.nodeType&&11!==e.nodeType||S.isXMLDoc(e)))for(a=ve(c),r=0,i=(o=ve(e)).length;r<i;r++)s=o[r],u=a[r],void 0,"input"===(l=u.nodeName.toLowerCase())&&pe.test(s.type)?u.checked=s.checked:"input"!==l&&"textarea"!==l||(u.defaultValue=s.defaultValue);if(t)if(n)for(o=o||ve(e),a=a||ve(c),r=0,i=o.length;r<i;r++)Oe(o[r],a[r]);else Oe(e,c);return 0<(a=ve(c,"script")).length&&ye(a,!f&&ve(e,"script")),c},cleanData:function(e){for(var t,n,r,i=S.event.special,o=0;void 0!==(n=e[o]);o++)if(V(n)){if(t=n[Y.expando]){if(t.events)for(r in t.events)i[r]?S.event.remove(n,r):S.removeEvent(n,r,t.handle);n[Y.expando]=void 0}n[Q.expando]&&(n[Q.expando]=void 0)}}}),S.fn.extend({detach:function(e){return Re(this,e,!0)},remove:function(e){return Re(this,e)},text:function(e){return $(this,function(e){return void 0===e?S.text(this):this.empty().each(function(){1!==this.nodeType&&11!==this.nodeType&&9!==this.nodeType||(this.textContent=e)})},null,e,arguments.length)},append:function(){return Pe(this,arguments,function(e){1!==this.nodeType&&11!==this.nodeType&&9!==this.nodeType||qe(this,e).appendChild(e)})},prepend:function(){return Pe(this,arguments,function(e){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var t=qe(this,e);t.insertBefore(e,t.firstChild)}})},before:function(){return Pe(this,arguments,function(e){this.parentNode&&this.parentNode.insertBefore(e,this)})},after:function(){return Pe(this,arguments,function(e){this.parentNode&&this.parentNode.insertBefore(e,this.nextSibling)})},empty:function(){for(var e,t=0;null!=(e=this[t]);t++)1===e.nodeType&&(S.cleanData(ve(e,!1)),e.textContent="");return this},clone:function(e,t){return e=null!=e&&e,t=null==t?e:t,this.map(function(){return S.clone(this,e,t)})},html:function(e){return $(this,function(e){var t=this[0]||{},n=0,r=this.length;if(void 0===e&&1===t.nodeType)return t.innerHTML;if("string"==typeof e&&!Ne.test(e)&&!ge[(de.exec(e)||["",""])[1].toLowerCase()]){e=S.htmlPrefilter(e);try{for(;n<r;n++)1===(t=this[n]||{}).nodeType&&(S.cleanData(ve(t,!1)),t.innerHTML=e);t=0}catch(e){}}t&&this.empty().append(e)},null,e,arguments.length)},replaceWith:function(){var n=[];return Pe(this,arguments,function(e){var t=this.parentNode;S.inArray(this,n)<0&&(S.cleanData(ve(this)),t&&t.replaceChild(e,this))},n)}}),S.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(e,a){S.fn[e]=function(e){for(var t,n=[],r=S(e),i=r.length-1,o=0;o<=i;o++)t=o===i?this:this.clone(!0),S(r[o])[a](t),u.apply(n,t.get());return this.pushStack(n)}});var Me=new RegExp("^("+ee+")(?!px)[a-z%]+$","i"),Ie=function(e){var t=e.ownerDocument.defaultView;return t&&t.opener||(t=C),t.getComputedStyle(e)},We=function(e,t,n){var r,i,o={};for(i in t)o[i]=e.style[i],e.style[i]=t[i];for(i in r=n.call(e),t)e.style[i]=o[i];return r},Fe=new RegExp(ne.join("|"),"i");function Be(e,t,n){var r,i,o,a,s=e.style;return(n=n||Ie(e))&&(""!==(a=n.getPropertyValue(t)||n[t])||ie(e)||(a=S.style(e,t)),!y.pixelBoxStyles()&&Me.test(a)&&Fe.test(t)&&(r=s.width,i=s.minWidth,o=s.maxWidth,s.minWidth=s.maxWidth=s.width=a,a=n.width,s.width=r,s.minWidth=i,s.maxWidth=o)),void 0!==a?a+"":a}function $e(e,t){return{get:function(){if(!e())return(this.get=t).apply(this,arguments);delete this.get}}}!function(){function e(){if(l){u.style.cssText="position:absolute;left:-11111px;width:60px;margin-top:1px;padding:0;border:0",l.style.cssText="position:relative;display:block;box-sizing:border-box;overflow:scroll;margin:auto;border:1px;padding:1px;width:60%;top:1%",re.appendChild(u).appendChild(l);var e=C.getComputedStyle(l);n="1%"!==e.top,s=12===t(e.marginLeft),l.style.right="60%",o=36===t(e.right),r=36===t(e.width),l.style.position="absolute",i=12===t(l.offsetWidth/3),re.removeChild(u),l=null}}function t(e){return Math.round(parseFloat(e))}var n,r,i,o,a,s,u=E.createElement("div"),l=E.createElement("div");l.style&&(l.style.backgroundClip="content-box",l.cloneNode(!0).style.backgroundClip="",y.clearCloneStyle="content-box"===l.style.backgroundClip,S.extend(y,{boxSizingReliable:function(){return e(),r},pixelBoxStyles:function(){return e(),o},pixelPosition:function(){return e(),n},reliableMarginLeft:function(){return e(),s},scrollboxSize:function(){return e(),i},reliableTrDimensions:function(){var e,t,n,r;return null==a&&(e=E.createElement("table"),t=E.createElement("tr"),n=E.createElement("div"),e.style.cssText="position:absolute;left:-11111px",t.style.height="1px",n.style.height="9px",re.appendChild(e).appendChild(t).appendChild(n),r=C.getComputedStyle(t),a=3<parseInt(r.height),re.removeChild(e)),a}}))}();var _e=["Webkit","Moz","ms"],ze=E.createElement("div").style,Ue={};function Xe(e){var t=S.cssProps[e]||Ue[e];return t||(e in ze?e:Ue[e]=function(e){var t=e[0].toUpperCase()+e.slice(1),n=_e.length;while(n--)if((e=_e[n]+t)in ze)return e}(e)||e)}var Ve=/^(none|table(?!-c[ea]).+)/,Ge=/^--/,Ye={position:"absolute",visibility:"hidden",display:"block"},Qe={letterSpacing:"0",fontWeight:"400"};function Je(e,t,n){var r=te.exec(t);return r?Math.max(0,r[2]-(n||0))+(r[3]||"px"):t}function Ke(e,t,n,r,i,o){var a="width"===t?1:0,s=0,u=0;if(n===(r?"border":"content"))return 0;for(;a<4;a+=2)"margin"===n&&(u+=S.css(e,n+ne[a],!0,i)),r?("content"===n&&(u-=S.css(e,"padding"+ne[a],!0,i)),"margin"!==n&&(u-=S.css(e,"border"+ne[a]+"Width",!0,i))):(u+=S.css(e,"padding"+ne[a],!0,i),"padding"!==n?u+=S.css(e,"border"+ne[a]+"Width",!0,i):s+=S.css(e,"border"+ne[a]+"Width",!0,i));return!r&&0<=o&&(u+=Math.max(0,Math.ceil(e["offset"+t[0].toUpperCase()+t.slice(1)]-o-u-s-.5))||0),u}function Ze(e,t,n){var r=Ie(e),i=(!y.boxSizingReliable()||n)&&"border-box"===S.css(e,"boxSizing",!1,r),o=i,a=Be(e,t,r),s="offset"+t[0].toUpperCase()+t.slice(1);if(Me.test(a)){if(!n)return a;a="auto"}return(!y.boxSizingReliable()&&i||!y.reliableTrDimensions()&&A(e,"tr")||"auto"===a||!parseFloat(a)&&"inline"===S.css(e,"display",!1,r))&&e.getClientRects().length&&(i="border-box"===S.css(e,"boxSizing",!1,r),(o=s in e)&&(a=e[s])),(a=parseFloat(a)||0)+Ke(e,t,n||(i?"border":"content"),o,r,a)+"px"}function et(e,t,n,r,i){return new et.prototype.init(e,t,n,r,i)}S.extend({cssHooks:{opacity:{get:function(e,t){if(t){var n=Be(e,"opacity");return""===n?"1":n}}}},cssNumber:{animationIterationCount:!0,columnCount:!0,fillOpacity:!0,flexGrow:!0,flexShrink:!0,fontWeight:!0,gridArea:!0,gridColumn:!0,gridColumnEnd:!0,gridColumnStart:!0,gridRow:!0,gridRowEnd:!0,gridRowStart:!0,lineHeight:!0,opacity:!0,order:!0,orphans:!0,widows:!0,zIndex:!0,zoom:!0},cssProps:{},style:function(e,t,n,r){if(e&&3!==e.nodeType&&8!==e.nodeType&&e.style){var i,o,a,s=X(t),u=Ge.test(t),l=e.style;if(u||(t=Xe(s)),a=S.cssHooks[t]||S.cssHooks[s],void 0===n)return a&&"get"in a&&void 0!==(i=a.get(e,!1,r))?i:l[t];"string"===(o=typeof n)&&(i=te.exec(n))&&i[1]&&(n=se(e,t,i),o="number"),null!=n&&n==n&&("number"!==o||u||(n+=i&&i[3]||(S.cssNumber[s]?"":"px")),y.clearCloneStyle||""!==n||0!==t.indexOf("background")||(l[t]="inherit"),a&&"set"in a&&void 0===(n=a.set(e,n,r))||(u?l.setProperty(t,n):l[t]=n))}},css:function(e,t,n,r){var i,o,a,s=X(t);return Ge.test(t)||(t=Xe(s)),(a=S.cssHooks[t]||S.cssHooks[s])&&"get"in a&&(i=a.get(e,!0,n)),void 0===i&&(i=Be(e,t,r)),"normal"===i&&t in Qe&&(i=Qe[t]),""===n||n?(o=parseFloat(i),!0===n||isFinite(o)?o||0:i):i}}),S.each(["height","width"],function(e,u){S.cssHooks[u]={get:function(e,t,n){if(t)return!Ve.test(S.css(e,"display"))||e.getClientRects().length&&e.getBoundingClientRect().width?Ze(e,u,n):We(e,Ye,function(){return Ze(e,u,n)})},set:function(e,t,n){var r,i=Ie(e),o=!y.scrollboxSize()&&"absolute"===i.position,a=(o||n)&&"border-box"===S.css(e,"boxSizing",!1,i),s=n?Ke(e,u,n,a,i):0;return a&&o&&(s-=Math.ceil(e["offset"+u[0].toUpperCase()+u.slice(1)]-parseFloat(i[u])-Ke(e,u,"border",!1,i)-.5)),s&&(r=te.exec(t))&&"px"!==(r[3]||"px")&&(e.style[u]=t,t=S.css(e,u)),Je(0,t,s)}}}),S.cssHooks.marginLeft=$e(y.reliableMarginLeft,function(e,t){if(t)return(parseFloat(Be(e,"marginLeft"))||e.getBoundingClientRect().left-We(e,{marginLeft:0},function(){return e.getBoundingClientRect().left}))+"px"}),S.each({margin:"",padding:"",border:"Width"},function(i,o){S.cssHooks[i+o]={expand:function(e){for(var t=0,n={},r="string"==typeof e?e.split(" "):[e];t<4;t++)n[i+ne[t]+o]=r[t]||r[t-2]||r[0];return n}},"margin"!==i&&(S.cssHooks[i+o].set=Je)}),S.fn.extend({css:function(e,t){return $(this,function(e,t,n){var r,i,o={},a=0;if(Array.isArray(t)){for(r=Ie(e),i=t.length;a<i;a++)o[t[a]]=S.css(e,t[a],!1,r);return o}return void 0!==n?S.style(e,t,n):S.css(e,t)},e,t,1<arguments.length)}}),((S.Tween=et).prototype={constructor:et,init:function(e,t,n,r,i,o){this.elem=e,this.prop=n,this.easing=i||S.easing._default,this.options=t,this.start=this.now=this.cur(),this.end=r,this.unit=o||(S.cssNumber[n]?"":"px")},cur:function(){var e=et.propHooks[this.prop];return e&&e.get?e.get(this):et.propHooks._default.get(this)},run:function(e){var t,n=et.propHooks[this.prop];return this.options.duration?this.pos=t=S.easing[this.easing](e,this.options.duration*e,0,1,this.options.duration):this.pos=t=e,this.now=(this.end-this.start)*t+this.start,this.options.step&&this.options.step.call(this.elem,this.now,this),n&&n.set?n.set(this):et.propHooks._default.set(this),this}}).init.prototype=et.prototype,(et.propHooks={_default:{get:function(e){var t;return 1!==e.elem.nodeType||null!=e.elem[e.prop]&&null==e.elem.style[e.prop]?e.elem[e.prop]:(t=S.css(e.elem,e.prop,""))&&"auto"!==t?t:0},set:function(e){S.fx.step[e.prop]?S.fx.step[e.prop](e):1!==e.elem.nodeType||!S.cssHooks[e.prop]&&null==e.elem.style[Xe(e.prop)]?e.elem[e.prop]=e.now:S.style(e.elem,e.prop,e.now+e.unit)}}}).scrollTop=et.propHooks.scrollLeft={set:function(e){e.elem.nodeType&&e.elem.parentNode&&(e.elem[e.prop]=e.now)}},S.easing={linear:function(e){return e},swing:function(e){return.5-Math.cos(e*Math.PI)/2},_default:"swing"},S.fx=et.prototype.init,S.fx.step={};var tt,nt,rt,it,ot=/^(?:toggle|show|hide)$/,at=/queueHooks$/;function st(){nt&&(!1===E.hidden&&C.requestAnimationFrame?C.requestAnimationFrame(st):C.setTimeout(st,S.fx.interval),S.fx.tick())}function ut(){return C.setTimeout(function(){tt=void 0}),tt=Date.now()}function lt(e,t){var n,r=0,i={height:e};for(t=t?1:0;r<4;r+=2-t)i["margin"+(n=ne[r])]=i["padding"+n]=e;return t&&(i.opacity=i.width=e),i}function ct(e,t,n){for(var r,i=(ft.tweeners[t]||[]).concat(ft.tweeners["*"]),o=0,a=i.length;o<a;o++)if(r=i[o].call(n,t,e))return r}function ft(o,e,t){var n,a,r=0,i=ft.prefilters.length,s=S.Deferred().always(function(){delete u.elem}),u=function(){if(a)return!1;for(var e=tt||ut(),t=Math.max(0,l.startTime+l.duration-e),n=1-(t/l.duration||0),r=0,i=l.tweens.length;r<i;r++)l.tweens[r].run(n);return s.notifyWith(o,[l,n,t]),n<1&&i?t:(i||s.notifyWith(o,[l,1,0]),s.resolveWith(o,[l]),!1)},l=s.promise({elem:o,props:S.extend({},e),opts:S.extend(!0,{specialEasing:{},easing:S.easing._default},t),originalProperties:e,originalOptions:t,startTime:tt||ut(),duration:t.duration,tweens:[],createTween:function(e,t){var n=S.Tween(o,l.opts,e,t,l.opts.specialEasing[e]||l.opts.easing);return l.tweens.push(n),n},stop:function(e){var t=0,n=e?l.tweens.length:0;if(a)return this;for(a=!0;t<n;t++)l.tweens[t].run(1);return e?(s.notifyWith(o,[l,1,0]),s.resolveWith(o,[l,e])):s.rejectWith(o,[l,e]),this}}),c=l.props;for(!function(e,t){var n,r,i,o,a;for(n in e)if(i=t[r=X(n)],o=e[n],Array.isArray(o)&&(i=o[1],o=e[n]=o[0]),n!==r&&(e[r]=o,delete e[n]),(a=S.cssHooks[r])&&"expand"in a)for(n in o=a.expand(o),delete e[r],o)n in e||(e[n]=o[n],t[n]=i);else t[r]=i}(c,l.opts.specialEasing);r<i;r++)if(n=ft.prefilters[r].call(l,o,c,l.opts))return m(n.stop)&&(S._queueHooks(l.elem,l.opts.queue).stop=n.stop.bind(n)),n;return S.map(c,ct,l),m(l.opts.start)&&l.opts.start.call(o,l),l.progress(l.opts.progress).done(l.opts.done,l.opts.complete).fail(l.opts.fail).always(l.opts.always),S.fx.timer(S.extend(u,{elem:o,anim:l,queue:l.opts.queue})),l}S.Animation=S.extend(ft,{tweeners:{"*":[function(e,t){var n=this.createTween(e,t);return se(n.elem,e,te.exec(t),n),n}]},tweener:function(e,t){m(e)?(t=e,e=["*"]):e=e.match(P);for(var n,r=0,i=e.length;r<i;r++)n=e[r],ft.tweeners[n]=ft.tweeners[n]||[],ft.tweeners[n].unshift(t)},prefilters:[function(e,t,n){var r,i,o,a,s,u,l,c,f="width"in t||"height"in t,p=this,d={},h=e.style,g=e.nodeType&&ae(e),v=Y.get(e,"fxshow");for(r in n.queue||(null==(a=S._queueHooks(e,"fx")).unqueued&&(a.unqueued=0,s=a.empty.fire,a.empty.fire=function(){a.unqueued||s()}),a.unqueued++,p.always(function(){p.always(function(){a.unqueued--,S.queue(e,"fx").length||a.empty.fire()})})),t)if(i=t[r],ot.test(i)){if(delete t[r],o=o||"toggle"===i,i===(g?"hide":"show")){if("show"!==i||!v||void 0===v[r])continue;g=!0}d[r]=v&&v[r]||S.style(e,r)}if((u=!S.isEmptyObject(t))||!S.isEmptyObject(d))for(r in f&&1===e.nodeType&&(n.overflow=[h.overflow,h.overflowX,h.overflowY],null==(l=v&&v.display)&&(l=Y.get(e,"display")),"none"===(c=S.css(e,"display"))&&(l?c=l:(le([e],!0),l=e.style.display||l,c=S.css(e,"display"),le([e]))),("inline"===c||"inline-block"===c&&null!=l)&&"none"===S.css(e,"float")&&(u||(p.done(function(){h.display=l}),null==l&&(c=h.display,l="none"===c?"":c)),h.display="inline-block")),n.overflow&&(h.overflow="hidden",p.always(function(){h.overflow=n.overflow[0],h.overflowX=n.overflow[1],h.overflowY=n.overflow[2]})),u=!1,d)u||(v?"hidden"in v&&(g=v.hidden):v=Y.access(e,"fxshow",{display:l}),o&&(v.hidden=!g),g&&le([e],!0),p.done(function(){for(r in g||le([e]),Y.remove(e,"fxshow"),d)S.style(e,r,d[r])})),u=ct(g?v[r]:0,r,p),r in v||(v[r]=u.start,g&&(u.end=u.start,u.start=0))}],prefilter:function(e,t){t?ft.prefilters.unshift(e):ft.prefilters.push(e)}}),S.speed=function(e,t,n){var r=e&&"object"==typeof e?S.extend({},e):{complete:n||!n&&t||m(e)&&e,duration:e,easing:n&&t||t&&!m(t)&&t};return S.fx.off?r.duration=0:"number"!=typeof r.duration&&(r.duration in S.fx.speeds?r.duration=S.fx.speeds[r.duration]:r.duration=S.fx.speeds._default),null!=r.queue&&!0!==r.queue||(r.queue="fx"),r.old=r.complete,r.complete=function(){m(r.old)&&r.old.call(this),r.queue&&S.dequeue(this,r.queue)},r},S.fn.extend({fadeTo:function(e,t,n,r){return this.filter(ae).css("opacity",0).show().end().animate({opacity:t},e,n,r)},animate:function(t,e,n,r){var i=S.isEmptyObject(t),o=S.speed(e,n,r),a=function(){var e=ft(this,S.extend({},t),o);(i||Y.get(this,"finish"))&&e.stop(!0)};return a.finish=a,i||!1===o.queue?this.each(a):this.queue(o.queue,a)},stop:function(i,e,o){var a=function(e){var t=e.stop;delete e.stop,t(o)};return"string"!=typeof i&&(o=e,e=i,i=void 0),e&&this.queue(i||"fx",[]),this.each(function(){var e=!0,t=null!=i&&i+"queueHooks",n=S.timers,r=Y.get(this);if(t)r[t]&&r[t].stop&&a(r[t]);else for(t in r)r[t]&&r[t].stop&&at.test(t)&&a(r[t]);for(t=n.length;t--;)n[t].elem!==this||null!=i&&n[t].queue!==i||(n[t].anim.stop(o),e=!1,n.splice(t,1));!e&&o||S.dequeue(this,i)})},finish:function(a){return!1!==a&&(a=a||"fx"),this.each(function(){var e,t=Y.get(this),n=t[a+"queue"],r=t[a+"queueHooks"],i=S.timers,o=n?n.length:0;for(t.finish=!0,S.queue(this,a,[]),r&&r.stop&&r.stop.call(this,!0),e=i.length;e--;)i[e].elem===this&&i[e].queue===a&&(i[e].anim.stop(!0),i.splice(e,1));for(e=0;e<o;e++)n[e]&&n[e].finish&&n[e].finish.call(this);delete t.finish})}}),S.each(["toggle","show","hide"],function(e,r){var i=S.fn[r];S.fn[r]=function(e,t,n){return null==e||"boolean"==typeof e?i.apply(this,arguments):this.animate(lt(r,!0),e,t,n)}}),S.each({slideDown:lt("show"),slideUp:lt("hide"),slideToggle:lt("toggle"),fadeIn:{opacity:"show"},fadeOut:{opacity:"hide"},fadeToggle:{opacity:"toggle"}},function(e,r){S.fn[e]=function(e,t,n){return this.animate(r,e,t,n)}}),S.timers=[],S.fx.tick=function(){var e,t=0,n=S.timers;for(tt=Date.now();t<n.length;t++)(e=n[t])()||n[t]!==e||n.splice(t--,1);n.length||S.fx.stop(),tt=void 0},S.fx.timer=function(e){S.timers.push(e),S.fx.start()},S.fx.interval=13,S.fx.start=function(){nt||(nt=!0,st())},S.fx.stop=function(){nt=null},S.fx.speeds={slow:600,fast:200,_default:400},S.fn.delay=function(r,e){return r=S.fx&&S.fx.speeds[r]||r,e=e||"fx",this.queue(e,function(e,t){var n=C.setTimeout(e,r);t.stop=function(){C.clearTimeout(n)}})},rt=E.createElement("input"),it=E.createElement("select").appendChild(E.createElement("option")),rt.type="checkbox",y.checkOn=""!==rt.value,y.optSelected=it.selected,(rt=E.createElement("input")).value="t",rt.type="radio",y.radioValue="t"===rt.value;var pt,dt=S.expr.attrHandle;S.fn.extend({attr:function(e,t){return $(this,S.attr,e,t,1<arguments.length)},removeAttr:function(e){return this.each(function(){S.removeAttr(this,e)})}}),S.extend({attr:function(e,t,n){var r,i,o=e.nodeType;if(3!==o&&8!==o&&2!==o)return"undefined"==typeof e.getAttribute?S.prop(e,t,n):(1===o&&S.isXMLDoc(e)||(i=S.attrHooks[t.toLowerCase()]||(S.expr.match.bool.test(t)?pt:void 0)),void 0!==n?null===n?void S.removeAttr(e,t):i&&"set"in i&&void 0!==(r=i.set(e,n,t))?r:(e.setAttribute(t,n+""),n):i&&"get"in i&&null!==(r=i.get(e,t))?r:null==(r=S.find.attr(e,t))?void 0:r)},attrHooks:{type:{set:function(e,t){if(!y.radioValue&&"radio"===t&&A(e,"input")){var n=e.value;return e.setAttribute("type",t),n&&(e.value=n),t}}}},removeAttr:function(e,t){var n,r=0,i=t&&t.match(P);if(i&&1===e.nodeType)while(n=i[r++])e.removeAttribute(n)}}),pt={set:function(e,t,n){return!1===t?S.removeAttr(e,n):e.setAttribute(n,n),n}},S.each(S.expr.match.bool.source.match(/\w+/g),function(e,t){var a=dt[t]||S.find.attr;dt[t]=function(e,t,n){var r,i,o=t.toLowerCase();return n||(i=dt[o],dt[o]=r,r=null!=a(e,t,n)?o:null,dt[o]=i),r}});var ht=/^(?:input|select|textarea|button)$/i,gt=/^(?:a|area)$/i;function vt(e){return(e.match(P)||[]).join(" ")}function yt(e){return e.getAttribute&&e.getAttribute("class")||""}function mt(e){return Array.isArray(e)?e:"string"==typeof e&&e.match(P)||[]}S.fn.extend({prop:function(e,t){return $(this,S.prop,e,t,1<arguments.length)},removeProp:function(e){return this.each(function(){delete this[S.propFix[e]||e]})}}),S.extend({prop:function(e,t,n){var r,i,o=e.nodeType;if(3!==o&&8!==o&&2!==o)return 1===o&&S.isXMLDoc(e)||(t=S.propFix[t]||t,i=S.propHooks[t]),void 0!==n?i&&"set"in i&&void 0!==(r=i.set(e,n,t))?r:e[t]=n:i&&"get"in i&&null!==(r=i.get(e,t))?r:e[t]},propHooks:{tabIndex:{get:function(e){var t=S.find.attr(e,"tabindex");return t?parseInt(t,10):ht.test(e.nodeName)||gt.test(e.nodeName)&&e.href?0:-1}}},propFix:{"for":"htmlFor","class":"className"}}),y.optSelected||(S.propHooks.selected={get:function(e){var t=e.parentNode;return t&&t.parentNode&&t.parentNode.selectedIndex,null},set:function(e){var t=e.parentNode;t&&(t.selectedIndex,t.parentNode&&t.parentNode.selectedIndex)}}),S.each(["tabIndex","readOnly","maxLength","cellSpacing","cellPadding","rowSpan","colSpan","useMap","frameBorder","contentEditable"],function(){S.propFix[this.toLowerCase()]=this}),S.fn.extend({addClass:function(t){var e,n,r,i,o,a,s,u=0;if(m(t))return this.each(function(e){S(this).addClass(t.call(this,e,yt(this)))});if((e=mt(t)).length)while(n=this[u++])if(i=yt(n),r=1===n.nodeType&&" "+vt(i)+" "){a=0;while(o=e[a++])r.indexOf(" "+o+" ")<0&&(r+=o+" ");i!==(s=vt(r))&&n.setAttribute("class",s)}return this},removeClass:function(t){var e,n,r,i,o,a,s,u=0;if(m(t))return this.each(function(e){S(this).removeClass(t.call(this,e,yt(this)))});if(!arguments.length)return this.attr("class","");if((e=mt(t)).length)while(n=this[u++])if(i=yt(n),r=1===n.nodeType&&" "+vt(i)+" "){a=0;while(o=e[a++])while(-1<r.indexOf(" "+o+" "))r=r.replace(" "+o+" "," ");i!==(s=vt(r))&&n.setAttribute("class",s)}return this},toggleClass:function(i,t){var o=typeof i,a="string"===o||Array.isArray(i);return"boolean"==typeof t&&a?t?this.addClass(i):this.removeClass(i):m(i)?this.each(function(e){S(this).toggleClass(i.call(this,e,yt(this),t),t)}):this.each(function(){var e,t,n,r;if(a){t=0,n=S(this),r=mt(i);while(e=r[t++])n.hasClass(e)?n.removeClass(e):n.addClass(e)}else void 0!==i&&"boolean"!==o||((e=yt(this))&&Y.set(this,"__className__",e),this.setAttribute&&this.setAttribute("class",e||!1===i?"":Y.get(this,"__className__")||""))})},hasClass:function(e){var t,n,r=0;t=" "+e+" ";while(n=this[r++])if(1===n.nodeType&&-1<(" "+vt(yt(n))+" ").indexOf(t))return!0;return!1}});var xt=/\r/g;S.fn.extend({val:function(n){var r,e,i,t=this[0];return arguments.length?(i=m(n),this.each(function(e){var t;1===this.nodeType&&(null==(t=i?n.call(this,e,S(this).val()):n)?t="":"number"==typeof t?t+="":Array.isArray(t)&&(t=S.map(t,function(e){return null==e?"":e+""})),(r=S.valHooks[this.type]||S.valHooks[this.nodeName.toLowerCase()])&&"set"in r&&void 0!==r.set(this,t,"value")||(this.value=t))})):t?(r=S.valHooks[t.type]||S.valHooks[t.nodeName.toLowerCase()])&&"get"in r&&void 0!==(e=r.get(t,"value"))?e:"string"==typeof(e=t.value)?e.replace(xt,""):null==e?"":e:void 0}}),S.extend({valHooks:{option:{get:function(e){var t=S.find.attr(e,"value");return null!=t?t:vt(S.text(e))}},select:{get:function(e){var t,n,r,i=e.options,o=e.selectedIndex,a="select-one"===e.type,s=a?null:[],u=a?o+1:i.length;for(r=o<0?u:a?o:0;r<u;r++)if(((n=i[r]).selected||r===o)&&!n.disabled&&(!n.parentNode.disabled||!A(n.parentNode,"optgroup"))){if(t=S(n).val(),a)return t;s.push(t)}return s},set:function(e,t){var n,r,i=e.options,o=S.makeArray(t),a=i.length;while(a--)((r=i[a]).selected=-1<S.inArray(S.valHooks.option.get(r),o))&&(n=!0);return n||(e.selectedIndex=-1),o}}}}),S.each(["radio","checkbox"],function(){S.valHooks[this]={set:function(e,t){if(Array.isArray(t))return e.checked=-1<S.inArray(S(e).val(),t)}},y.checkOn||(S.valHooks[this].get=function(e){return null===e.getAttribute("value")?"on":e.value})}),y.focusin="onfocusin"in C;var bt=/^(?:focusinfocus|focusoutblur)$/,wt=function(e){e.stopPropagation()};S.extend(S.event,{trigger:function(e,t,n,r){var i,o,a,s,u,l,c,f,p=[n||E],d=v.call(e,"type")?e.type:e,h=v.call(e,"namespace")?e.namespace.split("."):[];if(o=f=a=n=n||E,3!==n.nodeType&&8!==n.nodeType&&!bt.test(d+S.event.triggered)&&(-1<d.indexOf(".")&&(d=(h=d.split(".")).shift(),h.sort()),u=d.indexOf(":")<0&&"on"+d,(e=e[S.expando]?e:new S.Event(d,"object"==typeof e&&e)).isTrigger=r?2:3,e.namespace=h.join("."),e.rnamespace=e.namespace?new RegExp("(^|\\.)"+h.join("\\.(?:.*\\.|)")+"(\\.|$)"):null,e.result=void 0,e.target||(e.target=n),t=null==t?[e]:S.makeArray(t,[e]),c=S.event.special[d]||{},r||!c.trigger||!1!==c.trigger.apply(n,t))){if(!r&&!c.noBubble&&!x(n)){for(s=c.delegateType||d,bt.test(s+d)||(o=o.parentNode);o;o=o.parentNode)p.push(o),a=o;a===(n.ownerDocument||E)&&p.push(a.defaultView||a.parentWindow||C)}i=0;while((o=p[i++])&&!e.isPropagationStopped())f=o,e.type=1<i?s:c.bindType||d,(l=(Y.get(o,"events")||Object.create(null))[e.type]&&Y.get(o,"handle"))&&l.apply(o,t),(l=u&&o[u])&&l.apply&&V(o)&&(e.result=l.apply(o,t),!1===e.result&&e.preventDefault());return e.type=d,r||e.isDefaultPrevented()||c._default&&!1!==c._default.apply(p.pop(),t)||!V(n)||u&&m(n[d])&&!x(n)&&((a=n[u])&&(n[u]=null),S.event.triggered=d,e.isPropagationStopped()&&f.addEventListener(d,wt),n[d](),e.isPropagationStopped()&&f.removeEventListener(d,wt),S.event.triggered=void 0,a&&(n[u]=a)),e.result}},simulate:function(e,t,n){var r=S.extend(new S.Event,n,{type:e,isSimulated:!0});S.event.trigger(r,null,t)}}),S.fn.extend({trigger:function(e,t){return this.each(function(){S.event.trigger(e,t,this)})},triggerHandler:function(e,t){var n=this[0];if(n)return S.event.trigger(e,t,n,!0)}}),y.focusin||S.each({focus:"focusin",blur:"focusout"},function(n,r){var i=function(e){S.event.simulate(r,e.target,S.event.fix(e))};S.event.special[r]={setup:function(){var e=this.ownerDocument||this.document||this,t=Y.access(e,r);t||e.addEventListener(n,i,!0),Y.access(e,r,(t||0)+1)},teardown:function(){var e=this.ownerDocument||this.document||this,t=Y.access(e,r)-1;t?Y.access(e,r,t):(e.removeEventListener(n,i,!0),Y.remove(e,r))}}});var Tt=C.location,Ct={guid:Date.now()},Et=/\?/;S.parseXML=function(e){var t;if(!e||"string"!=typeof e)return null;try{t=(new C.DOMParser).parseFromString(e,"text/xml")}catch(e){t=void 0}return t&&!t.getElementsByTagName("parsererror").length||S.error("Invalid XML: "+e),t};var St=/\[\]$/,kt=/\r?\n/g,At=/^(?:submit|button|image|reset|file)$/i,Nt=/^(?:input|select|textarea|keygen)/i;function Dt(n,e,r,i){var t;if(Array.isArray(e))S.each(e,function(e,t){r||St.test(n)?i(n,t):Dt(n+"["+("object"==typeof t&&null!=t?e:"")+"]",t,r,i)});else if(r||"object"!==w(e))i(n,e);else for(t in e)Dt(n+"["+t+"]",e[t],r,i)}S.param=function(e,t){var n,r=[],i=function(e,t){var n=m(t)?t():t;r[r.length]=encodeURIComponent(e)+"="+encodeURIComponent(null==n?"":n)};if(null==e)return"";if(Array.isArray(e)||e.jquery&&!S.isPlainObject(e))S.each(e,function(){i(this.name,this.value)});else for(n in e)Dt(n,e[n],t,i);return r.join("&")},S.fn.extend({serialize:function(){return S.param(this.serializeArray())},serializeArray:function(){return this.map(function(){var e=S.prop(this,"elements");return e?S.makeArray(e):this}).filter(function(){var e=this.type;return this.name&&!S(this).is(":disabled")&&Nt.test(this.nodeName)&&!At.test(e)&&(this.checked||!pe.test(e))}).map(function(e,t){var n=S(this).val();return null==n?null:Array.isArray(n)?S.map(n,function(e){return{name:t.name,value:e.replace(kt,"\r\n")}}):{name:t.name,value:n.replace(kt,"\r\n")}}).get()}});var jt=/%20/g,qt=/#.*$/,Lt=/([?&])_=[^&]*/,Ht=/^(.*?):[ \t]*([^\r\n]*)$/gm,Ot=/^(?:GET|HEAD)$/,Pt=/^\/\//,Rt={},Mt={},It="*/".concat("*"),Wt=E.createElement("a");function Ft(o){return function(e,t){"string"!=typeof e&&(t=e,e="*");var n,r=0,i=e.toLowerCase().match(P)||[];if(m(t))while(n=i[r++])"+"===n[0]?(n=n.slice(1)||"*",(o[n]=o[n]||[]).unshift(t)):(o[n]=o[n]||[]).push(t)}}function Bt(t,i,o,a){var s={},u=t===Mt;function l(e){var r;return s[e]=!0,S.each(t[e]||[],function(e,t){var n=t(i,o,a);return"string"!=typeof n||u||s[n]?u?!(r=n):void 0:(i.dataTypes.unshift(n),l(n),!1)}),r}return l(i.dataTypes[0])||!s["*"]&&l("*")}function $t(e,t){var n,r,i=S.ajaxSettings.flatOptions||{};for(n in t)void 0!==t[n]&&((i[n]?e:r||(r={}))[n]=t[n]);return r&&S.extend(!0,e,r),e}Wt.href=Tt.href,S.extend({active:0,lastModified:{},etag:{},ajaxSettings:{url:Tt.href,type:"GET",isLocal:/^(?:about|app|app-storage|.+-extension|file|res|widget):$/.test(Tt.protocol),global:!0,processData:!0,async:!0,contentType:"application/x-www-form-urlencoded; charset=UTF-8",accepts:{"*":It,text:"text/plain",html:"text/html",xml:"application/xml, text/xml",json:"application/json, text/javascript"},contents:{xml:/\bxml\b/,html:/\bhtml/,json:/\bjson\b/},responseFields:{xml:"responseXML",text:"responseText",json:"responseJSON"},converters:{"* text":String,"text html":!0,"text json":JSON.parse,"text xml":S.parseXML},flatOptions:{url:!0,context:!0}},ajaxSetup:function(e,t){return t?$t($t(e,S.ajaxSettings),t):$t(S.ajaxSettings,e)},ajaxPrefilter:Ft(Rt),ajaxTransport:Ft(Mt),ajax:function(e,t){"object"==typeof e&&(t=e,e=void 0),t=t||{};var c,f,p,n,d,r,h,g,i,o,v=S.ajaxSetup({},t),y=v.context||v,m=v.context&&(y.nodeType||y.jquery)?S(y):S.event,x=S.Deferred(),b=S.Callbacks("once memory"),w=v.statusCode||{},a={},s={},u="canceled",T={readyState:0,getResponseHeader:function(e){var t;if(h){if(!n){n={};while(t=Ht.exec(p))n[t[1].toLowerCase()+" "]=(n[t[1].toLowerCase()+" "]||[]).concat(t[2])}t=n[e.toLowerCase()+" "]}return null==t?null:t.join(", ")},getAllResponseHeaders:function(){return h?p:null},setRequestHeader:function(e,t){return null==h&&(e=s[e.toLowerCase()]=s[e.toLowerCase()]||e,a[e]=t),this},overrideMimeType:function(e){return null==h&&(v.mimeType=e),this},statusCode:function(e){var t;if(e)if(h)T.always(e[T.status]);else for(t in e)w[t]=[w[t],e[t]];return this},abort:function(e){var t=e||u;return c&&c.abort(t),l(0,t),this}};if(x.promise(T),v.url=((e||v.url||Tt.href)+"").replace(Pt,Tt.protocol+"//"),v.type=t.method||t.type||v.method||v.type,v.dataTypes=(v.dataType||"*").toLowerCase().match(P)||[""],null==v.crossDomain){r=E.createElement("a");try{r.href=v.url,r.href=r.href,v.crossDomain=Wt.protocol+"//"+Wt.host!=r.protocol+"//"+r.host}catch(e){v.crossDomain=!0}}if(v.data&&v.processData&&"string"!=typeof v.data&&(v.data=S.param(v.data,v.traditional)),Bt(Rt,v,t,T),h)return T;for(i in(g=S.event&&v.global)&&0==S.active++&&S.event.trigger("ajaxStart"),v.type=v.type.toUpperCase(),v.hasContent=!Ot.test(v.type),f=v.url.replace(qt,""),v.hasContent?v.data&&v.processData&&0===(v.contentType||"").indexOf("application/x-www-form-urlencoded")&&(v.data=v.data.replace(jt,"+")):(o=v.url.slice(f.length),v.data&&(v.processData||"string"==typeof v.data)&&(f+=(Et.test(f)?"&":"?")+v.data,delete v.data),!1===v.cache&&(f=f.replace(Lt,"$1"),o=(Et.test(f)?"&":"?")+"_="+Ct.guid+++o),v.url=f+o),v.ifModified&&(S.lastModified[f]&&T.setRequestHeader("If-Modified-Since",S.lastModified[f]),S.etag[f]&&T.setRequestHeader("If-None-Match",S.etag[f])),(v.data&&v.hasContent&&!1!==v.contentType||t.contentType)&&T.setRequestHeader("Content-Type",v.contentType),T.setRequestHeader("Accept",v.dataTypes[0]&&v.accepts[v.dataTypes[0]]?v.accepts[v.dataTypes[0]]+("*"!==v.dataTypes[0]?", "+It+"; q=0.01":""):v.accepts["*"]),v.headers)T.setRequestHeader(i,v.headers[i]);if(v.beforeSend&&(!1===v.beforeSend.call(y,T,v)||h))return T.abort();if(u="abort",b.add(v.complete),T.done(v.success),T.fail(v.error),c=Bt(Mt,v,t,T)){if(T.readyState=1,g&&m.trigger("ajaxSend",[T,v]),h)return T;v.async&&0<v.timeout&&(d=C.setTimeout(function(){T.abort("timeout")},v.timeout));try{h=!1,c.send(a,l)}catch(e){if(h)throw e;l(-1,e)}}else l(-1,"No Transport");function l(e,t,n,r){var i,o,a,s,u,l=t;h||(h=!0,d&&C.clearTimeout(d),c=void 0,p=r||"",T.readyState=0<e?4:0,i=200<=e&&e<300||304===e,n&&(s=function(e,t,n){var r,i,o,a,s=e.contents,u=e.dataTypes;while("*"===u[0])u.shift(),void 0===r&&(r=e.mimeType||t.getResponseHeader("Content-Type"));if(r)for(i in s)if(s[i]&&s[i].test(r)){u.unshift(i);break}if(u[0]in n)o=u[0];else{for(i in n){if(!u[0]||e.converters[i+" "+u[0]]){o=i;break}a||(a=i)}o=o||a}if(o)return o!==u[0]&&u.unshift(o),n[o]}(v,T,n)),!i&&-1<S.inArray("script",v.dataTypes)&&(v.converters["text script"]=function(){}),s=function(e,t,n,r){var i,o,a,s,u,l={},c=e.dataTypes.slice();if(c[1])for(a in e.converters)l[a.toLowerCase()]=e.converters[a];o=c.shift();while(o)if(e.responseFields[o]&&(n[e.responseFields[o]]=t),!u&&r&&e.dataFilter&&(t=e.dataFilter(t,e.dataType)),u=o,o=c.shift())if("*"===o)o=u;else if("*"!==u&&u!==o){if(!(a=l[u+" "+o]||l["* "+o]))for(i in l)if((s=i.split(" "))[1]===o&&(a=l[u+" "+s[0]]||l["* "+s[0]])){!0===a?a=l[i]:!0!==l[i]&&(o=s[0],c.unshift(s[1]));break}if(!0!==a)if(a&&e["throws"])t=a(t);else try{t=a(t)}catch(e){return{state:"parsererror",error:a?e:"No conversion from "+u+" to "+o}}}return{state:"success",data:t}}(v,s,T,i),i?(v.ifModified&&((u=T.getResponseHeader("Last-Modified"))&&(S.lastModified[f]=u),(u=T.getResponseHeader("etag"))&&(S.etag[f]=u)),204===e||"HEAD"===v.type?l="nocontent":304===e?l="notmodified":(l=s.state,o=s.data,i=!(a=s.error))):(a=l,!e&&l||(l="error",e<0&&(e=0))),T.status=e,T.statusText=(t||l)+"",i?x.resolveWith(y,[o,l,T]):x.rejectWith(y,[T,l,a]),T.statusCode(w),w=void 0,g&&m.trigger(i?"ajaxSuccess":"ajaxError",[T,v,i?o:a]),b.fireWith(y,[T,l]),g&&(m.trigger("ajaxComplete",[T,v]),--S.active||S.event.trigger("ajaxStop")))}return T},getJSON:function(e,t,n){return S.get(e,t,n,"json")},getScript:function(e,t){return S.get(e,void 0,t,"script")}}),S.each(["get","post"],function(e,i){S[i]=function(e,t,n,r){return m(t)&&(r=r||n,n=t,t=void 0),S.ajax(S.extend({url:e,type:i,dataType:r,data:t,success:n},S.isPlainObject(e)&&e))}}),S.ajaxPrefilter(function(e){var t;for(t in e.headers)"content-type"===t.toLowerCase()&&(e.contentType=e.headers[t]||"")}),S._evalUrl=function(e,t,n){return S.ajax({url:e,type:"GET",dataType:"script",cache:!0,async:!1,global:!1,converters:{"text script":function(){}},dataFilter:function(e){S.globalEval(e,t,n)}})},S.fn.extend({wrapAll:function(e){var t;return this[0]&&(m(e)&&(e=e.call(this[0])),t=S(e,this[0].ownerDocument).eq(0).clone(!0),this[0].parentNode&&t.insertBefore(this[0]),t.map(function(){var e=this;while(e.firstElementChild)e=e.firstElementChild;return e}).append(this)),this},wrapInner:function(n){return m(n)?this.each(function(e){S(this).wrapInner(n.call(this,e))}):this.each(function(){var e=S(this),t=e.contents();t.length?t.wrapAll(n):e.append(n)})},wrap:function(t){var n=m(t);return this.each(function(e){S(this).wrapAll(n?t.call(this,e):t)})},unwrap:function(e){return this.parent(e).not("body").each(function(){S(this).replaceWith(this.childNodes)}),this}}),S.expr.pseudos.hidden=function(e){return!S.expr.pseudos.visible(e)},S.expr.pseudos.visible=function(e){return!!(e.offsetWidth||e.offsetHeight||e.getClientRects().length)},S.ajaxSettings.xhr=function(){try{return new C.XMLHttpRequest}catch(e){}};var _t={0:200,1223:204},zt=S.ajaxSettings.xhr();y.cors=!!zt&&"withCredentials"in zt,y.ajax=zt=!!zt,S.ajaxTransport(function(i){var o,a;if(y.cors||zt&&!i.crossDomain)return{send:function(e,t){var n,r=i.xhr();if(r.open(i.type,i.url,i.async,i.username,i.password),i.xhrFields)for(n in i.xhrFields)r[n]=i.xhrFields[n];for(n in i.mimeType&&r.overrideMimeType&&r.overrideMimeType(i.mimeType),i.crossDomain||e["X-Requested-With"]||(e["X-Requested-With"]="XMLHttpRequest"),e)r.setRequestHeader(n,e[n]);o=function(e){return function(){o&&(o=a=r.onload=r.onerror=r.onabort=r.ontimeout=r.onreadystatechange=null,"abort"===e?r.abort():"error"===e?"number"!=typeof r.status?t(0,"error"):t(r.status,r.statusText):t(_t[r.status]||r.status,r.statusText,"text"!==(r.responseType||"text")||"string"!=typeof r.responseText?{binary:r.response}:{text:r.responseText},r.getAllResponseHeaders()))}},r.onload=o(),a=r.onerror=r.ontimeout=o("error"),void 0!==r.onabort?r.onabort=a:r.onreadystatechange=function(){4===r.readyState&&C.setTimeout(function(){o&&a()})},o=o("abort");try{r.send(i.hasContent&&i.data||null)}catch(e){if(o)throw e}},abort:function(){o&&o()}}}),S.ajaxPrefilter(function(e){e.crossDomain&&(e.contents.script=!1)}),S.ajaxSetup({accepts:{script:"text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"},contents:{script:/\b(?:java|ecma)script\b/},converters:{"text script":function(e){return S.globalEval(e),e}}}),S.ajaxPrefilter("script",function(e){void 0===e.cache&&(e.cache=!1),e.crossDomain&&(e.type="GET")}),S.ajaxTransport("script",function(n){var r,i;if(n.crossDomain||n.scriptAttrs)return{send:function(e,t){r=S("<script>").attr(n.scriptAttrs||{}).prop({charset:n.scriptCharset,src:n.url}).on("load error",i=function(e){r.remove(),i=null,e&&t("error"===e.type?404:200,e.type)}),E.head.appendChild(r[0])},abort:function(){i&&i()}}});var Ut,Xt=[],Vt=/(=)\?(?=&|$)|\?\?/;S.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var e=Xt.pop()||S.expando+"_"+Ct.guid++;return this[e]=!0,e}}),S.ajaxPrefilter("json jsonp",function(e,t,n){var r,i,o,a=!1!==e.jsonp&&(Vt.test(e.url)?"url":"string"==typeof e.data&&0===(e.contentType||"").indexOf("application/x-www-form-urlencoded")&&Vt.test(e.data)&&"data");if(a||"jsonp"===e.dataTypes[0])return r=e.jsonpCallback=m(e.jsonpCallback)?e.jsonpCallback():e.jsonpCallback,a?e[a]=e[a].replace(Vt,"$1"+r):!1!==e.jsonp&&(e.url+=(Et.test(e.url)?"&":"?")+e.jsonp+"="+r),e.converters["script json"]=function(){return o||S.error(r+" was not called"),o[0]},e.dataTypes[0]="json",i=C[r],C[r]=function(){o=arguments},n.always(function(){void 0===i?S(C).removeProp(r):C[r]=i,e[r]&&(e.jsonpCallback=t.jsonpCallback,Xt.push(r)),o&&m(i)&&i(o[0]),o=i=void 0}),"script"}),y.createHTMLDocument=((Ut=E.implementation.createHTMLDocument("").body).innerHTML="<form></form><form></form>",2===Ut.childNodes.length),S.parseHTML=function(e,t,n){return"string"!=typeof e?[]:("boolean"==typeof t&&(n=t,t=!1),t||(y.createHTMLDocument?((r=(t=E.implementation.createHTMLDocument("")).createElement("base")).href=E.location.href,t.head.appendChild(r)):t=E),o=!n&&[],(i=N.exec(e))?[t.createElement(i[1])]:(i=xe([e],t,o),o&&o.length&&S(o).remove(),S.merge([],i.childNodes)));var r,i,o},S.fn.load=function(e,t,n){var r,i,o,a=this,s=e.indexOf(" ");return-1<s&&(r=vt(e.slice(s)),e=e.slice(0,s)),m(t)?(n=t,t=void 0):t&&"object"==typeof t&&(i="POST"),0<a.length&&S.ajax({url:e,type:i||"GET",dataType:"html",data:t}).done(function(e){o=arguments,a.html(r?S("<div>").append(S.parseHTML(e)).find(r):e)}).always(n&&function(e,t){a.each(function(){n.apply(this,o||[e.responseText,t,e])})}),this},S.expr.pseudos.animated=function(t){return S.grep(S.timers,function(e){return t===e.elem}).length},S.offset={setOffset:function(e,t,n){var r,i,o,a,s,u,l=S.css(e,"position"),c=S(e),f={};"static"===l&&(e.style.position="relative"),s=c.offset(),o=S.css(e,"top"),u=S.css(e,"left"),("absolute"===l||"fixed"===l)&&-1<(o+u).indexOf("auto")?(a=(r=c.position()).top,i=r.left):(a=parseFloat(o)||0,i=parseFloat(u)||0),m(t)&&(t=t.call(e,n,S.extend({},s))),null!=t.top&&(f.top=t.top-s.top+a),null!=t.left&&(f.left=t.left-s.left+i),"using"in t?t.using.call(e,f):("number"==typeof f.top&&(f.top+="px"),"number"==typeof f.left&&(f.left+="px"),c.css(f))}},S.fn.extend({offset:function(t){if(arguments.length)return void 0===t?this:this.each(function(e){S.offset.setOffset(this,t,e)});var e,n,r=this[0];return r?r.getClientRects().length?(e=r.getBoundingClientRect(),n=r.ownerDocument.defaultView,{top:e.top+n.pageYOffset,left:e.left+n.pageXOffset}):{top:0,left:0}:void 0},position:function(){if(this[0]){var e,t,n,r=this[0],i={top:0,left:0};if("fixed"===S.css(r,"position"))t=r.getBoundingClientRect();else{t=this.offset(),n=r.ownerDocument,e=r.offsetParent||n.documentElement;while(e&&(e===n.body||e===n.documentElement)&&"static"===S.css(e,"position"))e=e.parentNode;e&&e!==r&&1===e.nodeType&&((i=S(e).offset()).top+=S.css(e,"borderTopWidth",!0),i.left+=S.css(e,"borderLeftWidth",!0))}return{top:t.top-i.top-S.css(r,"marginTop",!0),left:t.left-i.left-S.css(r,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var e=this.offsetParent;while(e&&"static"===S.css(e,"position"))e=e.offsetParent;return e||re})}}),S.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(t,i){var o="pageYOffset"===i;S.fn[t]=function(e){return $(this,function(e,t,n){var r;if(x(e)?r=e:9===e.nodeType&&(r=e.defaultView),void 0===n)return r?r[i]:e[t];r?r.scrollTo(o?r.pageXOffset:n,o?n:r.pageYOffset):e[t]=n},t,e,arguments.length)}}),S.each(["top","left"],function(e,n){S.cssHooks[n]=$e(y.pixelPosition,function(e,t){if(t)return t=Be(e,n),Me.test(t)?S(e).position()[n]+"px":t})}),S.each({Height:"height",Width:"width"},function(a,s){S.each({padding:"inner"+a,content:s,"":"outer"+a},function(r,o){S.fn[o]=function(e,t){var n=arguments.length&&(r||"boolean"!=typeof e),i=r||(!0===e||!0===t?"margin":"border");return $(this,function(e,t,n){var r;return x(e)?0===o.indexOf("outer")?e["inner"+a]:e.document.documentElement["client"+a]:9===e.nodeType?(r=e.documentElement,Math.max(e.body["scroll"+a],r["scroll"+a],e.body["offset"+a],r["offset"+a],r["client"+a])):void 0===n?S.css(e,t,i):S.style(e,t,n,i)},s,n?e:void 0,n)}})}),S.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(e,t){S.fn[t]=function(e){return this.on(t,e)}}),S.fn.extend({bind:function(e,t,n){return this.on(e,null,t,n)},unbind:function(e,t){return this.off(e,null,t)},delegate:function(e,t,n,r){return this.on(t,e,n,r)},undelegate:function(e,t,n){return 1===arguments.length?this.off(e,"**"):this.off(t,e||"**",n)},hover:function(e,t){return this.mouseenter(e).mouseleave(t||e)}}),S.each("blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu".split(" "),function(e,n){S.fn[n]=function(e,t){return 0<arguments.length?this.on(n,null,e,t):this.trigger(n)}});var Gt=/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g;S.proxy=function(e,t){var n,r,i;if("string"==typeof t&&(n=e[t],t=e,e=n),m(e))return r=s.call(arguments,2),(i=function(){return e.apply(t||this,r.concat(s.call(arguments)))}).guid=e.guid=e.guid||S.guid++,i},S.holdReady=function(e){e?S.readyWait++:S.ready(!0)},S.isArray=Array.isArray,S.parseJSON=JSON.parse,S.nodeName=A,S.isFunction=m,S.isWindow=x,S.camelCase=X,S.type=w,S.now=Date.now,S.isNumeric=function(e){var t=S.type(e);return("number"===t||"string"===t)&&!isNaN(e-parseFloat(e))},S.trim=function(e){return null==e?"":(e+"").replace(Gt,"")},"function"==typeof define&&define.amd&&define("jquery",[],function(){return S});var Yt=C.jQuery,Qt=C.$;return S.noConflict=function(e){return C.$===S&&(C.$=Qt),e&&C.jQuery===S&&(C.jQuery=Yt),S},"undefined"==typeof e&&(C.jQuery=C.$=S),S});
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index a070cc9c7b39d..dba6f8e8440cb 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -231,7 +231,7 @@ private[spark] object UIUtils extends Logging {
     <link rel="stylesheet"
           href={prependBaseUri(request, "/static/timeline-view.css")} type="text/css"/>
     <script src={prependBaseUri(request, "/static/sorttable.js")} ></script>
-    <script src={prependBaseUri(request, "/static/jquery-3.4.1.min.js")}></script>
+    <script src={prependBaseUri(request, "/static/jquery-3.5.1.min.js")}></script>
     <script src={prependBaseUri(request, "/static/vis-timeline-graph2d.min.js")}></script>
     <script src={prependBaseUri(request, "/static/bootstrap.bundle.min.js")}></script>
     <script src={prependBaseUri(request, "/static/initialize-tooltips.js")}></script>
diff --git a/dev/.rat-excludes b/dev/.rat-excludes
index 98786437f7b1c..0e892a927906a 100644
--- a/dev/.rat-excludes
+++ b/dev/.rat-excludes
@@ -25,7 +25,7 @@ bootstrap.bundle.min.js
 bootstrap.bundle.min.js.map
 bootstrap.min.css
 bootstrap.min.css.map
-jquery-3.4.1.min.js
+jquery-3.5.1.min.js
 d3.min.js
 dagre-d3.min.js
 graphlib-dot.min.js
diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
index d6548f0fa9534..5f6cd7c6b7f20 100755
--- a/docs/_layouts/global.html
+++ b/docs/_layouts/global.html
@@ -168,7 +168,7 @@ <h1 class="title">{{ page.title }}</h1>
              <!-- /container -->
         </div>
 
-        <script src="js/vendor/jquery-3.4.1.min.js"></script>
+        <script src="js/vendor/jquery-3.5.1.min.js"></script>
         <script src="js/vendor/bootstrap.bundle.min.js"></script>
         <script src="js/vendor/anchor.min.js"></script>
         <script src="js/main.js"></script>
diff --git a/docs/js/vendor/jquery-3.4.1.min.js b/docs/js/vendor/jquery-3.4.1.min.js
deleted file mode 100644
index 07c00cd227da0..0000000000000
--- a/docs/js/vendor/jquery-3.4.1.min.js
+++ /dev/null
@@ -1,2 +0,0 @@
-/*! jQuery v3.4.1 | (c) JS Foundation and other contributors | jquery.org/license */
-!function(e,t){"use strict";"object"==typeof module&&"object"==typeof module.exports?module.exports=e.document?t(e,!0):function(e){if(!e.document)throw new Error("jQuery requires a window with a document");return t(e)}:t(e)}("undefined"!=typeof window?window:this,function(C,e){"use strict";var t=[],E=C.document,r=Object.getPrototypeOf,s=t.slice,g=t.concat,u=t.push,i=t.indexOf,n={},o=n.toString,v=n.hasOwnProperty,a=v.toString,l=a.call(Object),y={},m=function(e){return"function"==typeof e&&"number"!=typeof e.nodeType},x=function(e){return null!=e&&e===e.window},c={type:!0,src:!0,nonce:!0,noModule:!0};function b(e,t,n){var r,i,o=(n=n||E).createElement("script");if(o.text=e,t)for(r in c)(i=t[r]||t.getAttribute&&t.getAttribute(r))&&o.setAttribute(r,i);n.head.appendChild(o).parentNode.removeChild(o)}function w(e){return null==e?e+"":"object"==typeof e||"function"==typeof e?n[o.call(e)]||"object":typeof e}var f="3.4.1",k=function(e,t){return new k.fn.init(e,t)},p=/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g;function d(e){var t=!!e&&"length"in e&&e.length,n=w(e);return!m(e)&&!x(e)&&("array"===n||0===t||"number"==typeof t&&0<t&&t-1 in e)}k.fn=k.prototype={jquery:f,constructor:k,length:0,toArray:function(){return s.call(this)},get:function(e){return null==e?s.call(this):e<0?this[e+this.length]:this[e]},pushStack:function(e){var t=k.merge(this.constructor(),e);return t.prevObject=this,t},each:function(e){return k.each(this,e)},map:function(n){return this.pushStack(k.map(this,function(e,t){return n.call(e,t,e)}))},slice:function(){return this.pushStack(s.apply(this,arguments))},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},eq:function(e){var t=this.length,n=+e+(e<0?t:0);return this.pushStack(0<=n&&n<t?[this[n]]:[])},end:function(){return this.prevObject||this.constructor()},push:u,sort:t.sort,splice:t.splice},k.extend=k.fn.extend=function(){var e,t,n,r,i,o,a=arguments[0]||{},s=1,u=arguments.length,l=!1;for("boolean"==typeof a&&(l=a,a=arguments[s]||{},s++),"object"==typeof a||m(a)||(a={}),s===u&&(a=this,s--);s<u;s++)if(null!=(e=arguments[s]))for(t in e)r=e[t],"__proto__"!==t&&a!==r&&(l&&r&&(k.isPlainObject(r)||(i=Array.isArray(r)))?(n=a[t],o=i&&!Array.isArray(n)?[]:i||k.isPlainObject(n)?n:{},i=!1,a[t]=k.extend(l,o,r)):void 0!==r&&(a[t]=r));return a},k.extend({expando:"jQuery"+(f+Math.random()).replace(/\D/g,""),isReady:!0,error:function(e){throw new Error(e)},noop:function(){},isPlainObject:function(e){var t,n;return!(!e||"[object Object]"!==o.call(e))&&(!(t=r(e))||"function"==typeof(n=v.call(t,"constructor")&&t.constructor)&&a.call(n)===l)},isEmptyObject:function(e){var t;for(t in e)return!1;return!0},globalEval:function(e,t){b(e,{nonce:t&&t.nonce})},each:function(e,t){var n,r=0;if(d(e)){for(n=e.length;r<n;r++)if(!1===t.call(e[r],r,e[r]))break}else for(r in e)if(!1===t.call(e[r],r,e[r]))break;return e},trim:function(e){return null==e?"":(e+"").replace(p,"")},makeArray:function(e,t){var n=t||[];return null!=e&&(d(Object(e))?k.merge(n,"string"==typeof e?[e]:e):u.call(n,e)),n},inArray:function(e,t,n){return null==t?-1:i.call(t,e,n)},merge:function(e,t){for(var n=+t.length,r=0,i=e.length;r<n;r++)e[i++]=t[r];return e.length=i,e},grep:function(e,t,n){for(var r=[],i=0,o=e.length,a=!n;i<o;i++)!t(e[i],i)!==a&&r.push(e[i]);return r},map:function(e,t,n){var r,i,o=0,a=[];if(d(e))for(r=e.length;o<r;o++)null!=(i=t(e[o],o,n))&&a.push(i);else for(o in e)null!=(i=t(e[o],o,n))&&a.push(i);return g.apply([],a)},guid:1,support:y}),"function"==typeof Symbol&&(k.fn[Symbol.iterator]=t[Symbol.iterator]),k.each("Boolean Number String Function Array Date RegExp Object Error Symbol".split(" "),function(e,t){n["[object "+t+"]"]=t.toLowerCase()});var h=function(n){var e,d,b,o,i,h,f,g,w,u,l,T,C,a,E,v,s,c,y,k="sizzle"+1*new Date,m=n.document,S=0,r=0,p=ue(),x=ue(),N=ue(),A=ue(),D=function(e,t){return e===t&&(l=!0),0},j={}.hasOwnProperty,t=[],q=t.pop,L=t.push,H=t.push,O=t.slice,P=function(e,t){for(var n=0,r=e.length;n<r;n++)if(e[n]===t)return n;return-1},R="checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|ismap|loop|multiple|open|readonly|required|scoped",M="[\\x20\\t\\r\\n\\f]",I="(?:\\\\.|[\\w-]|[^\0-\\xa0])+",W="\\["+M+"*("+I+")(?:"+M+"*([*^$|!~]?=)"+M+"*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|("+I+"))|)"+M+"*\\]",$=":("+I+")(?:\\((('((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\")|((?:\\\\.|[^\\\\()[\\]]|"+W+")*)|.*)\\)|)",F=new RegExp(M+"+","g"),B=new RegExp("^"+M+"+|((?:^|[^\\\\])(?:\\\\.)*)"+M+"+$","g"),_=new RegExp("^"+M+"*,"+M+"*"),z=new RegExp("^"+M+"*([>+~]|"+M+")"+M+"*"),U=new RegExp(M+"|>"),X=new RegExp($),V=new RegExp("^"+I+"$"),G={ID:new RegExp("^#("+I+")"),CLASS:new RegExp("^\\.("+I+")"),TAG:new RegExp("^("+I+"|[*])"),ATTR:new RegExp("^"+W),PSEUDO:new RegExp("^"+$),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+M+"*(even|odd|(([+-]|)(\\d*)n|)"+M+"*(?:([+-]|)"+M+"*(\\d+)|))"+M+"*\\)|)","i"),bool:new RegExp("^(?:"+R+")$","i"),needsContext:new RegExp("^"+M+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+M+"*((?:-\\d)?\\d*)"+M+"*\\)|)(?=[^-]|$)","i")},Y=/HTML$/i,Q=/^(?:input|select|textarea|button)$/i,J=/^h\d$/i,K=/^[^{]+\{\s*\[native \w/,Z=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,ee=/[+~]/,te=new RegExp("\\\\([\\da-f]{1,6}"+M+"?|("+M+")|.)","ig"),ne=function(e,t,n){var r="0x"+t-65536;return r!=r||n?t:r<0?String.fromCharCode(r+65536):String.fromCharCode(r>>10|55296,1023&r|56320)},re=/([\0-\x1f\x7f]|^-?\d)|^-$|[^\0-\x1f\x7f-\uFFFF\w-]/g,ie=function(e,t){return t?"\0"===e?"\ufffd":e.slice(0,-1)+"\\"+e.charCodeAt(e.length-1).toString(16)+" ":"\\"+e},oe=function(){T()},ae=be(function(e){return!0===e.disabled&&"fieldset"===e.nodeName.toLowerCase()},{dir:"parentNode",next:"legend"});try{H.apply(t=O.call(m.childNodes),m.childNodes),t[m.childNodes.length].nodeType}catch(e){H={apply:t.length?function(e,t){L.apply(e,O.call(t))}:function(e,t){var n=e.length,r=0;while(e[n++]=t[r++]);e.length=n-1}}}function se(t,e,n,r){var i,o,a,s,u,l,c,f=e&&e.ownerDocument,p=e?e.nodeType:9;if(n=n||[],"string"!=typeof t||!t||1!==p&&9!==p&&11!==p)return n;if(!r&&((e?e.ownerDocument||e:m)!==C&&T(e),e=e||C,E)){if(11!==p&&(u=Z.exec(t)))if(i=u[1]){if(9===p){if(!(a=e.getElementById(i)))return n;if(a.id===i)return n.push(a),n}else if(f&&(a=f.getElementById(i))&&y(e,a)&&a.id===i)return n.push(a),n}else{if(u[2])return H.apply(n,e.getElementsByTagName(t)),n;if((i=u[3])&&d.getElementsByClassName&&e.getElementsByClassName)return H.apply(n,e.getElementsByClassName(i)),n}if(d.qsa&&!A[t+" "]&&(!v||!v.test(t))&&(1!==p||"object"!==e.nodeName.toLowerCase())){if(c=t,f=e,1===p&&U.test(t)){(s=e.getAttribute("id"))?s=s.replace(re,ie):e.setAttribute("id",s=k),o=(l=h(t)).length;while(o--)l[o]="#"+s+" "+xe(l[o]);c=l.join(","),f=ee.test(t)&&ye(e.parentNode)||e}try{return H.apply(n,f.querySelectorAll(c)),n}catch(e){A(t,!0)}finally{s===k&&e.removeAttribute("id")}}}return g(t.replace(B,"$1"),e,n,r)}function ue(){var r=[];return function e(t,n){return r.push(t+" ")>b.cacheLength&&delete e[r.shift()],e[t+" "]=n}}function le(e){return e[k]=!0,e}function ce(e){var t=C.createElement("fieldset");try{return!!e(t)}catch(e){return!1}finally{t.parentNode&&t.parentNode.removeChild(t),t=null}}function fe(e,t){var n=e.split("|"),r=n.length;while(r--)b.attrHandle[n[r]]=t}function pe(e,t){var n=t&&e,r=n&&1===e.nodeType&&1===t.nodeType&&e.sourceIndex-t.sourceIndex;if(r)return r;if(n)while(n=n.nextSibling)if(n===t)return-1;return e?1:-1}function de(t){return function(e){return"input"===e.nodeName.toLowerCase()&&e.type===t}}function he(n){return function(e){var t=e.nodeName.toLowerCase();return("input"===t||"button"===t)&&e.type===n}}function ge(t){return function(e){return"form"in e?e.parentNode&&!1===e.disabled?"label"in e?"label"in e.parentNode?e.parentNode.disabled===t:e.disabled===t:e.isDisabled===t||e.isDisabled!==!t&&ae(e)===t:e.disabled===t:"label"in e&&e.disabled===t}}function ve(a){return le(function(o){return o=+o,le(function(e,t){var n,r=a([],e.length,o),i=r.length;while(i--)e[n=r[i]]&&(e[n]=!(t[n]=e[n]))})})}function ye(e){return e&&"undefined"!=typeof e.getElementsByTagName&&e}for(e in d=se.support={},i=se.isXML=function(e){var t=e.namespaceURI,n=(e.ownerDocument||e).documentElement;return!Y.test(t||n&&n.nodeName||"HTML")},T=se.setDocument=function(e){var t,n,r=e?e.ownerDocument||e:m;return r!==C&&9===r.nodeType&&r.documentElement&&(a=(C=r).documentElement,E=!i(C),m!==C&&(n=C.defaultView)&&n.top!==n&&(n.addEventListener?n.addEventListener("unload",oe,!1):n.attachEvent&&n.attachEvent("onunload",oe)),d.attributes=ce(function(e){return e.className="i",!e.getAttribute("className")}),d.getElementsByTagName=ce(function(e){return e.appendChild(C.createComment("")),!e.getElementsByTagName("*").length}),d.getElementsByClassName=K.test(C.getElementsByClassName),d.getById=ce(function(e){return a.appendChild(e).id=k,!C.getElementsByName||!C.getElementsByName(k).length}),d.getById?(b.filter.ID=function(e){var t=e.replace(te,ne);return function(e){return e.getAttribute("id")===t}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n=t.getElementById(e);return n?[n]:[]}}):(b.filter.ID=function(e){var n=e.replace(te,ne);return function(e){var t="undefined"!=typeof e.getAttributeNode&&e.getAttributeNode("id");return t&&t.value===n}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n,r,i,o=t.getElementById(e);if(o){if((n=o.getAttributeNode("id"))&&n.value===e)return[o];i=t.getElementsByName(e),r=0;while(o=i[r++])if((n=o.getAttributeNode("id"))&&n.value===e)return[o]}return[]}}),b.find.TAG=d.getElementsByTagName?function(e,t){return"undefined"!=typeof t.getElementsByTagName?t.getElementsByTagName(e):d.qsa?t.querySelectorAll(e):void 0}:function(e,t){var n,r=[],i=0,o=t.getElementsByTagName(e);if("*"===e){while(n=o[i++])1===n.nodeType&&r.push(n);return r}return o},b.find.CLASS=d.getElementsByClassName&&function(e,t){if("undefined"!=typeof t.getElementsByClassName&&E)return t.getElementsByClassName(e)},s=[],v=[],(d.qsa=K.test(C.querySelectorAll))&&(ce(function(e){a.appendChild(e).innerHTML="<a id='"+k+"'></a><select id='"+k+"-\r\\' msallowcapture=''><option selected=''></option></select>",e.querySelectorAll("[msallowcapture^='']").length&&v.push("[*^$]="+M+"*(?:''|\"\")"),e.querySelectorAll("[selected]").length||v.push("\\["+M+"*(?:value|"+R+")"),e.querySelectorAll("[id~="+k+"-]").length||v.push("~="),e.querySelectorAll(":checked").length||v.push(":checked"),e.querySelectorAll("a#"+k+"+*").length||v.push(".#.+[+~]")}),ce(function(e){e.innerHTML="<a href='' disabled='disabled'></a><select disabled='disabled'><option/></select>";var t=C.createElement("input");t.setAttribute("type","hidden"),e.appendChild(t).setAttribute("name","D"),e.querySelectorAll("[name=d]").length&&v.push("name"+M+"*[*^$|!~]?="),2!==e.querySelectorAll(":enabled").length&&v.push(":enabled",":disabled"),a.appendChild(e).disabled=!0,2!==e.querySelectorAll(":disabled").length&&v.push(":enabled",":disabled"),e.querySelectorAll("*,:x"),v.push(",.*:")})),(d.matchesSelector=K.test(c=a.matches||a.webkitMatchesSelector||a.mozMatchesSelector||a.oMatchesSelector||a.msMatchesSelector))&&ce(function(e){d.disconnectedMatch=c.call(e,"*"),c.call(e,"[s!='']:x"),s.push("!=",$)}),v=v.length&&new RegExp(v.join("|")),s=s.length&&new RegExp(s.join("|")),t=K.test(a.compareDocumentPosition),y=t||K.test(a.contains)?function(e,t){var n=9===e.nodeType?e.documentElement:e,r=t&&t.parentNode;return e===r||!(!r||1!==r.nodeType||!(n.contains?n.contains(r):e.compareDocumentPosition&&16&e.compareDocumentPosition(r)))}:function(e,t){if(t)while(t=t.parentNode)if(t===e)return!0;return!1},D=t?function(e,t){if(e===t)return l=!0,0;var n=!e.compareDocumentPosition-!t.compareDocumentPosition;return n||(1&(n=(e.ownerDocument||e)===(t.ownerDocument||t)?e.compareDocumentPosition(t):1)||!d.sortDetached&&t.compareDocumentPosition(e)===n?e===C||e.ownerDocument===m&&y(m,e)?-1:t===C||t.ownerDocument===m&&y(m,t)?1:u?P(u,e)-P(u,t):0:4&n?-1:1)}:function(e,t){if(e===t)return l=!0,0;var n,r=0,i=e.parentNode,o=t.parentNode,a=[e],s=[t];if(!i||!o)return e===C?-1:t===C?1:i?-1:o?1:u?P(u,e)-P(u,t):0;if(i===o)return pe(e,t);n=e;while(n=n.parentNode)a.unshift(n);n=t;while(n=n.parentNode)s.unshift(n);while(a[r]===s[r])r++;return r?pe(a[r],s[r]):a[r]===m?-1:s[r]===m?1:0}),C},se.matches=function(e,t){return se(e,null,null,t)},se.matchesSelector=function(e,t){if((e.ownerDocument||e)!==C&&T(e),d.matchesSelector&&E&&!A[t+" "]&&(!s||!s.test(t))&&(!v||!v.test(t)))try{var n=c.call(e,t);if(n||d.disconnectedMatch||e.document&&11!==e.document.nodeType)return n}catch(e){A(t,!0)}return 0<se(t,C,null,[e]).length},se.contains=function(e,t){return(e.ownerDocument||e)!==C&&T(e),y(e,t)},se.attr=function(e,t){(e.ownerDocument||e)!==C&&T(e);var n=b.attrHandle[t.toLowerCase()],r=n&&j.call(b.attrHandle,t.toLowerCase())?n(e,t,!E):void 0;return void 0!==r?r:d.attributes||!E?e.getAttribute(t):(r=e.getAttributeNode(t))&&r.specified?r.value:null},se.escape=function(e){return(e+"").replace(re,ie)},se.error=function(e){throw new Error("Syntax error, unrecognized expression: "+e)},se.uniqueSort=function(e){var t,n=[],r=0,i=0;if(l=!d.detectDuplicates,u=!d.sortStable&&e.slice(0),e.sort(D),l){while(t=e[i++])t===e[i]&&(r=n.push(i));while(r--)e.splice(n[r],1)}return u=null,e},o=se.getText=function(e){var t,n="",r=0,i=e.nodeType;if(i){if(1===i||9===i||11===i){if("string"==typeof e.textContent)return e.textContent;for(e=e.firstChild;e;e=e.nextSibling)n+=o(e)}else if(3===i||4===i)return e.nodeValue}else while(t=e[r++])n+=o(t);return n},(b=se.selectors={cacheLength:50,createPseudo:le,match:G,attrHandle:{},find:{},relative:{">":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(e){return e[1]=e[1].replace(te,ne),e[3]=(e[3]||e[4]||e[5]||"").replace(te,ne),"~="===e[2]&&(e[3]=" "+e[3]+" "),e.slice(0,4)},CHILD:function(e){return e[1]=e[1].toLowerCase(),"nth"===e[1].slice(0,3)?(e[3]||se.error(e[0]),e[4]=+(e[4]?e[5]+(e[6]||1):2*("even"===e[3]||"odd"===e[3])),e[5]=+(e[7]+e[8]||"odd"===e[3])):e[3]&&se.error(e[0]),e},PSEUDO:function(e){var t,n=!e[6]&&e[2];return G.CHILD.test(e[0])?null:(e[3]?e[2]=e[4]||e[5]||"":n&&X.test(n)&&(t=h(n,!0))&&(t=n.indexOf(")",n.length-t)-n.length)&&(e[0]=e[0].slice(0,t),e[2]=n.slice(0,t)),e.slice(0,3))}},filter:{TAG:function(e){var t=e.replace(te,ne).toLowerCase();return"*"===e?function(){return!0}:function(e){return e.nodeName&&e.nodeName.toLowerCase()===t}},CLASS:function(e){var t=p[e+" "];return t||(t=new RegExp("(^|"+M+")"+e+"("+M+"|$)"))&&p(e,function(e){return t.test("string"==typeof e.className&&e.className||"undefined"!=typeof e.getAttribute&&e.getAttribute("class")||"")})},ATTR:function(n,r,i){return function(e){var t=se.attr(e,n);return null==t?"!="===r:!r||(t+="","="===r?t===i:"!="===r?t!==i:"^="===r?i&&0===t.indexOf(i):"*="===r?i&&-1<t.indexOf(i):"$="===r?i&&t.slice(-i.length)===i:"~="===r?-1<(" "+t.replace(F," ")+" ").indexOf(i):"|="===r&&(t===i||t.slice(0,i.length+1)===i+"-"))}},CHILD:function(h,e,t,g,v){var y="nth"!==h.slice(0,3),m="last"!==h.slice(-4),x="of-type"===e;return 1===g&&0===v?function(e){return!!e.parentNode}:function(e,t,n){var r,i,o,a,s,u,l=y!==m?"nextSibling":"previousSibling",c=e.parentNode,f=x&&e.nodeName.toLowerCase(),p=!n&&!x,d=!1;if(c){if(y){while(l){a=e;while(a=a[l])if(x?a.nodeName.toLowerCase()===f:1===a.nodeType)return!1;u=l="only"===h&&!u&&"nextSibling"}return!0}if(u=[m?c.firstChild:c.lastChild],m&&p){d=(s=(r=(i=(o=(a=c)[k]||(a[k]={}))[a.uniqueID]||(o[a.uniqueID]={}))[h]||[])[0]===S&&r[1])&&r[2],a=s&&c.childNodes[s];while(a=++s&&a&&a[l]||(d=s=0)||u.pop())if(1===a.nodeType&&++d&&a===e){i[h]=[S,s,d];break}}else if(p&&(d=s=(r=(i=(o=(a=e)[k]||(a[k]={}))[a.uniqueID]||(o[a.uniqueID]={}))[h]||[])[0]===S&&r[1]),!1===d)while(a=++s&&a&&a[l]||(d=s=0)||u.pop())if((x?a.nodeName.toLowerCase()===f:1===a.nodeType)&&++d&&(p&&((i=(o=a[k]||(a[k]={}))[a.uniqueID]||(o[a.uniqueID]={}))[h]=[S,d]),a===e))break;return(d-=v)===g||d%g==0&&0<=d/g}}},PSEUDO:function(e,o){var t,a=b.pseudos[e]||b.setFilters[e.toLowerCase()]||se.error("unsupported pseudo: "+e);return a[k]?a(o):1<a.length?(t=[e,e,"",o],b.setFilters.hasOwnProperty(e.toLowerCase())?le(function(e,t){var n,r=a(e,o),i=r.length;while(i--)e[n=P(e,r[i])]=!(t[n]=r[i])}):function(e){return a(e,0,t)}):a}},pseudos:{not:le(function(e){var r=[],i=[],s=f(e.replace(B,"$1"));return s[k]?le(function(e,t,n,r){var i,o=s(e,null,r,[]),a=e.length;while(a--)(i=o[a])&&(e[a]=!(t[a]=i))}):function(e,t,n){return r[0]=e,s(r,null,n,i),r[0]=null,!i.pop()}}),has:le(function(t){return function(e){return 0<se(t,e).length}}),contains:le(function(t){return t=t.replace(te,ne),function(e){return-1<(e.textContent||o(e)).indexOf(t)}}),lang:le(function(n){return V.test(n||"")||se.error("unsupported lang: "+n),n=n.replace(te,ne).toLowerCase(),function(e){var t;do{if(t=E?e.lang:e.getAttribute("xml:lang")||e.getAttribute("lang"))return(t=t.toLowerCase())===n||0===t.indexOf(n+"-")}while((e=e.parentNode)&&1===e.nodeType);return!1}}),target:function(e){var t=n.location&&n.location.hash;return t&&t.slice(1)===e.id},root:function(e){return e===a},focus:function(e){return e===C.activeElement&&(!C.hasFocus||C.hasFocus())&&!!(e.type||e.href||~e.tabIndex)},enabled:ge(!1),disabled:ge(!0),checked:function(e){var t=e.nodeName.toLowerCase();return"input"===t&&!!e.checked||"option"===t&&!!e.selected},selected:function(e){return e.parentNode&&e.parentNode.selectedIndex,!0===e.selected},empty:function(e){for(e=e.firstChild;e;e=e.nextSibling)if(e.nodeType<6)return!1;return!0},parent:function(e){return!b.pseudos.empty(e)},header:function(e){return J.test(e.nodeName)},input:function(e){return Q.test(e.nodeName)},button:function(e){var t=e.nodeName.toLowerCase();return"input"===t&&"button"===e.type||"button"===t},text:function(e){var t;return"input"===e.nodeName.toLowerCase()&&"text"===e.type&&(null==(t=e.getAttribute("type"))||"text"===t.toLowerCase())},first:ve(function(){return[0]}),last:ve(function(e,t){return[t-1]}),eq:ve(function(e,t,n){return[n<0?n+t:n]}),even:ve(function(e,t){for(var n=0;n<t;n+=2)e.push(n);return e}),odd:ve(function(e,t){for(var n=1;n<t;n+=2)e.push(n);return e}),lt:ve(function(e,t,n){for(var r=n<0?n+t:t<n?t:n;0<=--r;)e.push(r);return e}),gt:ve(function(e,t,n){for(var r=n<0?n+t:n;++r<t;)e.push(r);return e})}}).pseudos.nth=b.pseudos.eq,{radio:!0,checkbox:!0,file:!0,password:!0,image:!0})b.pseudos[e]=de(e);for(e in{submit:!0,reset:!0})b.pseudos[e]=he(e);function me(){}function xe(e){for(var t=0,n=e.length,r="";t<n;t++)r+=e[t].value;return r}function be(s,e,t){var u=e.dir,l=e.next,c=l||u,f=t&&"parentNode"===c,p=r++;return e.first?function(e,t,n){while(e=e[u])if(1===e.nodeType||f)return s(e,t,n);return!1}:function(e,t,n){var r,i,o,a=[S,p];if(n){while(e=e[u])if((1===e.nodeType||f)&&s(e,t,n))return!0}else while(e=e[u])if(1===e.nodeType||f)if(i=(o=e[k]||(e[k]={}))[e.uniqueID]||(o[e.uniqueID]={}),l&&l===e.nodeName.toLowerCase())e=e[u]||e;else{if((r=i[c])&&r[0]===S&&r[1]===p)return a[2]=r[2];if((i[c]=a)[2]=s(e,t,n))return!0}return!1}}function we(i){return 1<i.length?function(e,t,n){var r=i.length;while(r--)if(!i[r](e,t,n))return!1;return!0}:i[0]}function Te(e,t,n,r,i){for(var o,a=[],s=0,u=e.length,l=null!=t;s<u;s++)(o=e[s])&&(n&&!n(o,r,i)||(a.push(o),l&&t.push(s)));return a}function Ce(d,h,g,v,y,e){return v&&!v[k]&&(v=Ce(v)),y&&!y[k]&&(y=Ce(y,e)),le(function(e,t,n,r){var i,o,a,s=[],u=[],l=t.length,c=e||function(e,t,n){for(var r=0,i=t.length;r<i;r++)se(e,t[r],n);return n}(h||"*",n.nodeType?[n]:n,[]),f=!d||!e&&h?c:Te(c,s,d,n,r),p=g?y||(e?d:l||v)?[]:t:f;if(g&&g(f,p,n,r),v){i=Te(p,u),v(i,[],n,r),o=i.length;while(o--)(a=i[o])&&(p[u[o]]=!(f[u[o]]=a))}if(e){if(y||d){if(y){i=[],o=p.length;while(o--)(a=p[o])&&i.push(f[o]=a);y(null,p=[],i,r)}o=p.length;while(o--)(a=p[o])&&-1<(i=y?P(e,a):s[o])&&(e[i]=!(t[i]=a))}}else p=Te(p===t?p.splice(l,p.length):p),y?y(null,t,p,r):H.apply(t,p)})}function Ee(e){for(var i,t,n,r=e.length,o=b.relative[e[0].type],a=o||b.relative[" "],s=o?1:0,u=be(function(e){return e===i},a,!0),l=be(function(e){return-1<P(i,e)},a,!0),c=[function(e,t,n){var r=!o&&(n||t!==w)||((i=t).nodeType?u(e,t,n):l(e,t,n));return i=null,r}];s<r;s++)if(t=b.relative[e[s].type])c=[be(we(c),t)];else{if((t=b.filter[e[s].type].apply(null,e[s].matches))[k]){for(n=++s;n<r;n++)if(b.relative[e[n].type])break;return Ce(1<s&&we(c),1<s&&xe(e.slice(0,s-1).concat({value:" "===e[s-2].type?"*":""})).replace(B,"$1"),t,s<n&&Ee(e.slice(s,n)),n<r&&Ee(e=e.slice(n)),n<r&&xe(e))}c.push(t)}return we(c)}return me.prototype=b.filters=b.pseudos,b.setFilters=new me,h=se.tokenize=function(e,t){var n,r,i,o,a,s,u,l=x[e+" "];if(l)return t?0:l.slice(0);a=e,s=[],u=b.preFilter;while(a){for(o in n&&!(r=_.exec(a))||(r&&(a=a.slice(r[0].length)||a),s.push(i=[])),n=!1,(r=z.exec(a))&&(n=r.shift(),i.push({value:n,type:r[0].replace(B," ")}),a=a.slice(n.length)),b.filter)!(r=G[o].exec(a))||u[o]&&!(r=u[o](r))||(n=r.shift(),i.push({value:n,type:o,matches:r}),a=a.slice(n.length));if(!n)break}return t?a.length:a?se.error(e):x(e,s).slice(0)},f=se.compile=function(e,t){var n,v,y,m,x,r,i=[],o=[],a=N[e+" "];if(!a){t||(t=h(e)),n=t.length;while(n--)(a=Ee(t[n]))[k]?i.push(a):o.push(a);(a=N(e,(v=o,m=0<(y=i).length,x=0<v.length,r=function(e,t,n,r,i){var o,a,s,u=0,l="0",c=e&&[],f=[],p=w,d=e||x&&b.find.TAG("*",i),h=S+=null==p?1:Math.random()||.1,g=d.length;for(i&&(w=t===C||t||i);l!==g&&null!=(o=d[l]);l++){if(x&&o){a=0,t||o.ownerDocument===C||(T(o),n=!E);while(s=v[a++])if(s(o,t||C,n)){r.push(o);break}i&&(S=h)}m&&((o=!s&&o)&&u--,e&&c.push(o))}if(u+=l,m&&l!==u){a=0;while(s=y[a++])s(c,f,t,n);if(e){if(0<u)while(l--)c[l]||f[l]||(f[l]=q.call(r));f=Te(f)}H.apply(r,f),i&&!e&&0<f.length&&1<u+y.length&&se.uniqueSort(r)}return i&&(S=h,w=p),c},m?le(r):r))).selector=e}return a},g=se.select=function(e,t,n,r){var i,o,a,s,u,l="function"==typeof e&&e,c=!r&&h(e=l.selector||e);if(n=n||[],1===c.length){if(2<(o=c[0]=c[0].slice(0)).length&&"ID"===(a=o[0]).type&&9===t.nodeType&&E&&b.relative[o[1].type]){if(!(t=(b.find.ID(a.matches[0].replace(te,ne),t)||[])[0]))return n;l&&(t=t.parentNode),e=e.slice(o.shift().value.length)}i=G.needsContext.test(e)?0:o.length;while(i--){if(a=o[i],b.relative[s=a.type])break;if((u=b.find[s])&&(r=u(a.matches[0].replace(te,ne),ee.test(o[0].type)&&ye(t.parentNode)||t))){if(o.splice(i,1),!(e=r.length&&xe(o)))return H.apply(n,r),n;break}}}return(l||f(e,c))(r,t,!E,n,!t||ee.test(e)&&ye(t.parentNode)||t),n},d.sortStable=k.split("").sort(D).join("")===k,d.detectDuplicates=!!l,T(),d.sortDetached=ce(function(e){return 1&e.compareDocumentPosition(C.createElement("fieldset"))}),ce(function(e){return e.innerHTML="<a href='#'></a>","#"===e.firstChild.getAttribute("href")})||fe("type|href|height|width",function(e,t,n){if(!n)return e.getAttribute(t,"type"===t.toLowerCase()?1:2)}),d.attributes&&ce(function(e){return e.innerHTML="<input/>",e.firstChild.setAttribute("value",""),""===e.firstChild.getAttribute("value")})||fe("value",function(e,t,n){if(!n&&"input"===e.nodeName.toLowerCase())return e.defaultValue}),ce(function(e){return null==e.getAttribute("disabled")})||fe(R,function(e,t,n){var r;if(!n)return!0===e[t]?t.toLowerCase():(r=e.getAttributeNode(t))&&r.specified?r.value:null}),se}(C);k.find=h,k.expr=h.selectors,k.expr[":"]=k.expr.pseudos,k.uniqueSort=k.unique=h.uniqueSort,k.text=h.getText,k.isXMLDoc=h.isXML,k.contains=h.contains,k.escapeSelector=h.escape;var T=function(e,t,n){var r=[],i=void 0!==n;while((e=e[t])&&9!==e.nodeType)if(1===e.nodeType){if(i&&k(e).is(n))break;r.push(e)}return r},S=function(e,t){for(var n=[];e;e=e.nextSibling)1===e.nodeType&&e!==t&&n.push(e);return n},N=k.expr.match.needsContext;function A(e,t){return e.nodeName&&e.nodeName.toLowerCase()===t.toLowerCase()}var D=/^<([a-z][^\/\0>:\x20\t\r\n\f]*)[\x20\t\r\n\f]*\/?>(?:<\/\1>|)$/i;function j(e,n,r){return m(n)?k.grep(e,function(e,t){return!!n.call(e,t,e)!==r}):n.nodeType?k.grep(e,function(e){return e===n!==r}):"string"!=typeof n?k.grep(e,function(e){return-1<i.call(n,e)!==r}):k.filter(n,e,r)}k.filter=function(e,t,n){var r=t[0];return n&&(e=":not("+e+")"),1===t.length&&1===r.nodeType?k.find.matchesSelector(r,e)?[r]:[]:k.find.matches(e,k.grep(t,function(e){return 1===e.nodeType}))},k.fn.extend({find:function(e){var t,n,r=this.length,i=this;if("string"!=typeof e)return this.pushStack(k(e).filter(function(){for(t=0;t<r;t++)if(k.contains(i[t],this))return!0}));for(n=this.pushStack([]),t=0;t<r;t++)k.find(e,i[t],n);return 1<r?k.uniqueSort(n):n},filter:function(e){return this.pushStack(j(this,e||[],!1))},not:function(e){return this.pushStack(j(this,e||[],!0))},is:function(e){return!!j(this,"string"==typeof e&&N.test(e)?k(e):e||[],!1).length}});var q,L=/^(?:\s*(<[\w\W]+>)[^>]*|#([\w-]+))$/;(k.fn.init=function(e,t,n){var r,i;if(!e)return this;if(n=n||q,"string"==typeof e){if(!(r="<"===e[0]&&">"===e[e.length-1]&&3<=e.length?[null,e,null]:L.exec(e))||!r[1]&&t)return!t||t.jquery?(t||n).find(e):this.constructor(t).find(e);if(r[1]){if(t=t instanceof k?t[0]:t,k.merge(this,k.parseHTML(r[1],t&&t.nodeType?t.ownerDocument||t:E,!0)),D.test(r[1])&&k.isPlainObject(t))for(r in t)m(this[r])?this[r](t[r]):this.attr(r,t[r]);return this}return(i=E.getElementById(r[2]))&&(this[0]=i,this.length=1),this}return e.nodeType?(this[0]=e,this.length=1,this):m(e)?void 0!==n.ready?n.ready(e):e(k):k.makeArray(e,this)}).prototype=k.fn,q=k(E);var H=/^(?:parents|prev(?:Until|All))/,O={children:!0,contents:!0,next:!0,prev:!0};function P(e,t){while((e=e[t])&&1!==e.nodeType);return e}k.fn.extend({has:function(e){var t=k(e,this),n=t.length;return this.filter(function(){for(var e=0;e<n;e++)if(k.contains(this,t[e]))return!0})},closest:function(e,t){var n,r=0,i=this.length,o=[],a="string"!=typeof e&&k(e);if(!N.test(e))for(;r<i;r++)for(n=this[r];n&&n!==t;n=n.parentNode)if(n.nodeType<11&&(a?-1<a.index(n):1===n.nodeType&&k.find.matchesSelector(n,e))){o.push(n);break}return this.pushStack(1<o.length?k.uniqueSort(o):o)},index:function(e){return e?"string"==typeof e?i.call(k(e),this[0]):i.call(this,e.jquery?e[0]:e):this[0]&&this[0].parentNode?this.first().prevAll().length:-1},add:function(e,t){return this.pushStack(k.uniqueSort(k.merge(this.get(),k(e,t))))},addBack:function(e){return this.add(null==e?this.prevObject:this.prevObject.filter(e))}}),k.each({parent:function(e){var t=e.parentNode;return t&&11!==t.nodeType?t:null},parents:function(e){return T(e,"parentNode")},parentsUntil:function(e,t,n){return T(e,"parentNode",n)},next:function(e){return P(e,"nextSibling")},prev:function(e){return P(e,"previousSibling")},nextAll:function(e){return T(e,"nextSibling")},prevAll:function(e){return T(e,"previousSibling")},nextUntil:function(e,t,n){return T(e,"nextSibling",n)},prevUntil:function(e,t,n){return T(e,"previousSibling",n)},siblings:function(e){return S((e.parentNode||{}).firstChild,e)},children:function(e){return S(e.firstChild)},contents:function(e){return"undefined"!=typeof e.contentDocument?e.contentDocument:(A(e,"template")&&(e=e.content||e),k.merge([],e.childNodes))}},function(r,i){k.fn[r]=function(e,t){var n=k.map(this,i,e);return"Until"!==r.slice(-5)&&(t=e),t&&"string"==typeof t&&(n=k.filter(t,n)),1<this.length&&(O[r]||k.uniqueSort(n),H.test(r)&&n.reverse()),this.pushStack(n)}});var R=/[^\x20\t\r\n\f]+/g;function M(e){return e}function I(e){throw e}function W(e,t,n,r){var i;try{e&&m(i=e.promise)?i.call(e).done(t).fail(n):e&&m(i=e.then)?i.call(e,t,n):t.apply(void 0,[e].slice(r))}catch(e){n.apply(void 0,[e])}}k.Callbacks=function(r){var e,n;r="string"==typeof r?(e=r,n={},k.each(e.match(R)||[],function(e,t){n[t]=!0}),n):k.extend({},r);var i,t,o,a,s=[],u=[],l=-1,c=function(){for(a=a||r.once,o=i=!0;u.length;l=-1){t=u.shift();while(++l<s.length)!1===s[l].apply(t[0],t[1])&&r.stopOnFalse&&(l=s.length,t=!1)}r.memory||(t=!1),i=!1,a&&(s=t?[]:"")},f={add:function(){return s&&(t&&!i&&(l=s.length-1,u.push(t)),function n(e){k.each(e,function(e,t){m(t)?r.unique&&f.has(t)||s.push(t):t&&t.length&&"string"!==w(t)&&n(t)})}(arguments),t&&!i&&c()),this},remove:function(){return k.each(arguments,function(e,t){var n;while(-1<(n=k.inArray(t,s,n)))s.splice(n,1),n<=l&&l--}),this},has:function(e){return e?-1<k.inArray(e,s):0<s.length},empty:function(){return s&&(s=[]),this},disable:function(){return a=u=[],s=t="",this},disabled:function(){return!s},lock:function(){return a=u=[],t||i||(s=t=""),this},locked:function(){return!!a},fireWith:function(e,t){return a||(t=[e,(t=t||[]).slice?t.slice():t],u.push(t),i||c()),this},fire:function(){return f.fireWith(this,arguments),this},fired:function(){return!!o}};return f},k.extend({Deferred:function(e){var o=[["notify","progress",k.Callbacks("memory"),k.Callbacks("memory"),2],["resolve","done",k.Callbacks("once memory"),k.Callbacks("once memory"),0,"resolved"],["reject","fail",k.Callbacks("once memory"),k.Callbacks("once memory"),1,"rejected"]],i="pending",a={state:function(){return i},always:function(){return s.done(arguments).fail(arguments),this},"catch":function(e){return a.then(null,e)},pipe:function(){var i=arguments;return k.Deferred(function(r){k.each(o,function(e,t){var n=m(i[t[4]])&&i[t[4]];s[t[1]](function(){var e=n&&n.apply(this,arguments);e&&m(e.promise)?e.promise().progress(r.notify).done(r.resolve).fail(r.reject):r[t[0]+"With"](this,n?[e]:arguments)})}),i=null}).promise()},then:function(t,n,r){var u=0;function l(i,o,a,s){return function(){var n=this,r=arguments,e=function(){var e,t;if(!(i<u)){if((e=a.apply(n,r))===o.promise())throw new TypeError("Thenable self-resolution");t=e&&("object"==typeof e||"function"==typeof e)&&e.then,m(t)?s?t.call(e,l(u,o,M,s),l(u,o,I,s)):(u++,t.call(e,l(u,o,M,s),l(u,o,I,s),l(u,o,M,o.notifyWith))):(a!==M&&(n=void 0,r=[e]),(s||o.resolveWith)(n,r))}},t=s?e:function(){try{e()}catch(e){k.Deferred.exceptionHook&&k.Deferred.exceptionHook(e,t.stackTrace),u<=i+1&&(a!==I&&(n=void 0,r=[e]),o.rejectWith(n,r))}};i?t():(k.Deferred.getStackHook&&(t.stackTrace=k.Deferred.getStackHook()),C.setTimeout(t))}}return k.Deferred(function(e){o[0][3].add(l(0,e,m(r)?r:M,e.notifyWith)),o[1][3].add(l(0,e,m(t)?t:M)),o[2][3].add(l(0,e,m(n)?n:I))}).promise()},promise:function(e){return null!=e?k.extend(e,a):a}},s={};return k.each(o,function(e,t){var n=t[2],r=t[5];a[t[1]]=n.add,r&&n.add(function(){i=r},o[3-e][2].disable,o[3-e][3].disable,o[0][2].lock,o[0][3].lock),n.add(t[3].fire),s[t[0]]=function(){return s[t[0]+"With"](this===s?void 0:this,arguments),this},s[t[0]+"With"]=n.fireWith}),a.promise(s),e&&e.call(s,s),s},when:function(e){var n=arguments.length,t=n,r=Array(t),i=s.call(arguments),o=k.Deferred(),a=function(t){return function(e){r[t]=this,i[t]=1<arguments.length?s.call(arguments):e,--n||o.resolveWith(r,i)}};if(n<=1&&(W(e,o.done(a(t)).resolve,o.reject,!n),"pending"===o.state()||m(i[t]&&i[t].then)))return o.then();while(t--)W(i[t],a(t),o.reject);return o.promise()}});var $=/^(Eval|Internal|Range|Reference|Syntax|Type|URI)Error$/;k.Deferred.exceptionHook=function(e,t){C.console&&C.console.warn&&e&&$.test(e.name)&&C.console.warn("jQuery.Deferred exception: "+e.message,e.stack,t)},k.readyException=function(e){C.setTimeout(function(){throw e})};var F=k.Deferred();function B(){E.removeEventListener("DOMContentLoaded",B),C.removeEventListener("load",B),k.ready()}k.fn.ready=function(e){return F.then(e)["catch"](function(e){k.readyException(e)}),this},k.extend({isReady:!1,readyWait:1,ready:function(e){(!0===e?--k.readyWait:k.isReady)||(k.isReady=!0)!==e&&0<--k.readyWait||F.resolveWith(E,[k])}}),k.ready.then=F.then,"complete"===E.readyState||"loading"!==E.readyState&&!E.documentElement.doScroll?C.setTimeout(k.ready):(E.addEventListener("DOMContentLoaded",B),C.addEventListener("load",B));var _=function(e,t,n,r,i,o,a){var s=0,u=e.length,l=null==n;if("object"===w(n))for(s in i=!0,n)_(e,t,s,n[s],!0,o,a);else if(void 0!==r&&(i=!0,m(r)||(a=!0),l&&(a?(t.call(e,r),t=null):(l=t,t=function(e,t,n){return l.call(k(e),n)})),t))for(;s<u;s++)t(e[s],n,a?r:r.call(e[s],s,t(e[s],n)));return i?e:l?t.call(e):u?t(e[0],n):o},z=/^-ms-/,U=/-([a-z])/g;function X(e,t){return t.toUpperCase()}function V(e){return e.replace(z,"ms-").replace(U,X)}var G=function(e){return 1===e.nodeType||9===e.nodeType||!+e.nodeType};function Y(){this.expando=k.expando+Y.uid++}Y.uid=1,Y.prototype={cache:function(e){var t=e[this.expando];return t||(t={},G(e)&&(e.nodeType?e[this.expando]=t:Object.defineProperty(e,this.expando,{value:t,configurable:!0}))),t},set:function(e,t,n){var r,i=this.cache(e);if("string"==typeof t)i[V(t)]=n;else for(r in t)i[V(r)]=t[r];return i},get:function(e,t){return void 0===t?this.cache(e):e[this.expando]&&e[this.expando][V(t)]},access:function(e,t,n){return void 0===t||t&&"string"==typeof t&&void 0===n?this.get(e,t):(this.set(e,t,n),void 0!==n?n:t)},remove:function(e,t){var n,r=e[this.expando];if(void 0!==r){if(void 0!==t){n=(t=Array.isArray(t)?t.map(V):(t=V(t))in r?[t]:t.match(R)||[]).length;while(n--)delete r[t[n]]}(void 0===t||k.isEmptyObject(r))&&(e.nodeType?e[this.expando]=void 0:delete e[this.expando])}},hasData:function(e){var t=e[this.expando];return void 0!==t&&!k.isEmptyObject(t)}};var Q=new Y,J=new Y,K=/^(?:\{[\w\W]*\}|\[[\w\W]*\])$/,Z=/[A-Z]/g;function ee(e,t,n){var r,i;if(void 0===n&&1===e.nodeType)if(r="data-"+t.replace(Z,"-$&").toLowerCase(),"string"==typeof(n=e.getAttribute(r))){try{n="true"===(i=n)||"false"!==i&&("null"===i?null:i===+i+""?+i:K.test(i)?JSON.parse(i):i)}catch(e){}J.set(e,t,n)}else n=void 0;return n}k.extend({hasData:function(e){return J.hasData(e)||Q.hasData(e)},data:function(e,t,n){return J.access(e,t,n)},removeData:function(e,t){J.remove(e,t)},_data:function(e,t,n){return Q.access(e,t,n)},_removeData:function(e,t){Q.remove(e,t)}}),k.fn.extend({data:function(n,e){var t,r,i,o=this[0],a=o&&o.attributes;if(void 0===n){if(this.length&&(i=J.get(o),1===o.nodeType&&!Q.get(o,"hasDataAttrs"))){t=a.length;while(t--)a[t]&&0===(r=a[t].name).indexOf("data-")&&(r=V(r.slice(5)),ee(o,r,i[r]));Q.set(o,"hasDataAttrs",!0)}return i}return"object"==typeof n?this.each(function(){J.set(this,n)}):_(this,function(e){var t;if(o&&void 0===e)return void 0!==(t=J.get(o,n))?t:void 0!==(t=ee(o,n))?t:void 0;this.each(function(){J.set(this,n,e)})},null,e,1<arguments.length,null,!0)},removeData:function(e){return this.each(function(){J.remove(this,e)})}}),k.extend({queue:function(e,t,n){var r;if(e)return t=(t||"fx")+"queue",r=Q.get(e,t),n&&(!r||Array.isArray(n)?r=Q.access(e,t,k.makeArray(n)):r.push(n)),r||[]},dequeue:function(e,t){t=t||"fx";var n=k.queue(e,t),r=n.length,i=n.shift(),o=k._queueHooks(e,t);"inprogress"===i&&(i=n.shift(),r--),i&&("fx"===t&&n.unshift("inprogress"),delete o.stop,i.call(e,function(){k.dequeue(e,t)},o)),!r&&o&&o.empty.fire()},_queueHooks:function(e,t){var n=t+"queueHooks";return Q.get(e,n)||Q.access(e,n,{empty:k.Callbacks("once memory").add(function(){Q.remove(e,[t+"queue",n])})})}}),k.fn.extend({queue:function(t,n){var e=2;return"string"!=typeof t&&(n=t,t="fx",e--),arguments.length<e?k.queue(this[0],t):void 0===n?this:this.each(function(){var e=k.queue(this,t,n);k._queueHooks(this,t),"fx"===t&&"inprogress"!==e[0]&&k.dequeue(this,t)})},dequeue:function(e){return this.each(function(){k.dequeue(this,e)})},clearQueue:function(e){return this.queue(e||"fx",[])},promise:function(e,t){var n,r=1,i=k.Deferred(),o=this,a=this.length,s=function(){--r||i.resolveWith(o,[o])};"string"!=typeof e&&(t=e,e=void 0),e=e||"fx";while(a--)(n=Q.get(o[a],e+"queueHooks"))&&n.empty&&(r++,n.empty.add(s));return s(),i.promise(t)}});var te=/[+-]?(?:\d*\.|)\d+(?:[eE][+-]?\d+|)/.source,ne=new RegExp("^(?:([+-])=|)("+te+")([a-z%]*)$","i"),re=["Top","Right","Bottom","Left"],ie=E.documentElement,oe=function(e){return k.contains(e.ownerDocument,e)},ae={composed:!0};ie.getRootNode&&(oe=function(e){return k.contains(e.ownerDocument,e)||e.getRootNode(ae)===e.ownerDocument});var se=function(e,t){return"none"===(e=t||e).style.display||""===e.style.display&&oe(e)&&"none"===k.css(e,"display")},ue=function(e,t,n,r){var i,o,a={};for(o in t)a[o]=e.style[o],e.style[o]=t[o];for(o in i=n.apply(e,r||[]),t)e.style[o]=a[o];return i};function le(e,t,n,r){var i,o,a=20,s=r?function(){return r.cur()}:function(){return k.css(e,t,"")},u=s(),l=n&&n[3]||(k.cssNumber[t]?"":"px"),c=e.nodeType&&(k.cssNumber[t]||"px"!==l&&+u)&&ne.exec(k.css(e,t));if(c&&c[3]!==l){u/=2,l=l||c[3],c=+u||1;while(a--)k.style(e,t,c+l),(1-o)*(1-(o=s()/u||.5))<=0&&(a=0),c/=o;c*=2,k.style(e,t,c+l),n=n||[]}return n&&(c=+c||+u||0,i=n[1]?c+(n[1]+1)*n[2]:+n[2],r&&(r.unit=l,r.start=c,r.end=i)),i}var ce={};function fe(e,t){for(var n,r,i,o,a,s,u,l=[],c=0,f=e.length;c<f;c++)(r=e[c]).style&&(n=r.style.display,t?("none"===n&&(l[c]=Q.get(r,"display")||null,l[c]||(r.style.display="")),""===r.style.display&&se(r)&&(l[c]=(u=a=o=void 0,a=(i=r).ownerDocument,s=i.nodeName,(u=ce[s])||(o=a.body.appendChild(a.createElement(s)),u=k.css(o,"display"),o.parentNode.removeChild(o),"none"===u&&(u="block"),ce[s]=u)))):"none"!==n&&(l[c]="none",Q.set(r,"display",n)));for(c=0;c<f;c++)null!=l[c]&&(e[c].style.display=l[c]);return e}k.fn.extend({show:function(){return fe(this,!0)},hide:function(){return fe(this)},toggle:function(e){return"boolean"==typeof e?e?this.show():this.hide():this.each(function(){se(this)?k(this).show():k(this).hide()})}});var pe=/^(?:checkbox|radio)$/i,de=/<([a-z][^\/\0>\x20\t\r\n\f]*)/i,he=/^$|^module$|\/(?:java|ecma)script/i,ge={option:[1,"<select multiple='multiple'>","</select>"],thead:[1,"<table>","</table>"],col:[2,"<table><colgroup>","</colgroup></table>"],tr:[2,"<table><tbody>","</tbody></table>"],td:[3,"<table><tbody><tr>","</tr></tbody></table>"],_default:[0,"",""]};function ve(e,t){var n;return n="undefined"!=typeof e.getElementsByTagName?e.getElementsByTagName(t||"*"):"undefined"!=typeof e.querySelectorAll?e.querySelectorAll(t||"*"):[],void 0===t||t&&A(e,t)?k.merge([e],n):n}function ye(e,t){for(var n=0,r=e.length;n<r;n++)Q.set(e[n],"globalEval",!t||Q.get(t[n],"globalEval"))}ge.optgroup=ge.option,ge.tbody=ge.tfoot=ge.colgroup=ge.caption=ge.thead,ge.th=ge.td;var me,xe,be=/<|&#?\w+;/;function we(e,t,n,r,i){for(var o,a,s,u,l,c,f=t.createDocumentFragment(),p=[],d=0,h=e.length;d<h;d++)if((o=e[d])||0===o)if("object"===w(o))k.merge(p,o.nodeType?[o]:o);else if(be.test(o)){a=a||f.appendChild(t.createElement("div")),s=(de.exec(o)||["",""])[1].toLowerCase(),u=ge[s]||ge._default,a.innerHTML=u[1]+k.htmlPrefilter(o)+u[2],c=u[0];while(c--)a=a.lastChild;k.merge(p,a.childNodes),(a=f.firstChild).textContent=""}else p.push(t.createTextNode(o));f.textContent="",d=0;while(o=p[d++])if(r&&-1<k.inArray(o,r))i&&i.push(o);else if(l=oe(o),a=ve(f.appendChild(o),"script"),l&&ye(a),n){c=0;while(o=a[c++])he.test(o.type||"")&&n.push(o)}return f}me=E.createDocumentFragment().appendChild(E.createElement("div")),(xe=E.createElement("input")).setAttribute("type","radio"),xe.setAttribute("checked","checked"),xe.setAttribute("name","t"),me.appendChild(xe),y.checkClone=me.cloneNode(!0).cloneNode(!0).lastChild.checked,me.innerHTML="<textarea>x</textarea>",y.noCloneChecked=!!me.cloneNode(!0).lastChild.defaultValue;var Te=/^key/,Ce=/^(?:mouse|pointer|contextmenu|drag|drop)|click/,Ee=/^([^.]*)(?:\.(.+)|)/;function ke(){return!0}function Se(){return!1}function Ne(e,t){return e===function(){try{return E.activeElement}catch(e){}}()==("focus"===t)}function Ae(e,t,n,r,i,o){var a,s;if("object"==typeof t){for(s in"string"!=typeof n&&(r=r||n,n=void 0),t)Ae(e,s,n,r,t[s],o);return e}if(null==r&&null==i?(i=n,r=n=void 0):null==i&&("string"==typeof n?(i=r,r=void 0):(i=r,r=n,n=void 0)),!1===i)i=Se;else if(!i)return e;return 1===o&&(a=i,(i=function(e){return k().off(e),a.apply(this,arguments)}).guid=a.guid||(a.guid=k.guid++)),e.each(function(){k.event.add(this,t,i,r,n)})}function De(e,i,o){o?(Q.set(e,i,!1),k.event.add(e,i,{namespace:!1,handler:function(e){var t,n,r=Q.get(this,i);if(1&e.isTrigger&&this[i]){if(r.length)(k.event.special[i]||{}).delegateType&&e.stopPropagation();else if(r=s.call(arguments),Q.set(this,i,r),t=o(this,i),this[i](),r!==(n=Q.get(this,i))||t?Q.set(this,i,!1):n={},r!==n)return e.stopImmediatePropagation(),e.preventDefault(),n.value}else r.length&&(Q.set(this,i,{value:k.event.trigger(k.extend(r[0],k.Event.prototype),r.slice(1),this)}),e.stopImmediatePropagation())}})):void 0===Q.get(e,i)&&k.event.add(e,i,ke)}k.event={global:{},add:function(t,e,n,r,i){var o,a,s,u,l,c,f,p,d,h,g,v=Q.get(t);if(v){n.handler&&(n=(o=n).handler,i=o.selector),i&&k.find.matchesSelector(ie,i),n.guid||(n.guid=k.guid++),(u=v.events)||(u=v.events={}),(a=v.handle)||(a=v.handle=function(e){return"undefined"!=typeof k&&k.event.triggered!==e.type?k.event.dispatch.apply(t,arguments):void 0}),l=(e=(e||"").match(R)||[""]).length;while(l--)d=g=(s=Ee.exec(e[l])||[])[1],h=(s[2]||"").split(".").sort(),d&&(f=k.event.special[d]||{},d=(i?f.delegateType:f.bindType)||d,f=k.event.special[d]||{},c=k.extend({type:d,origType:g,data:r,handler:n,guid:n.guid,selector:i,needsContext:i&&k.expr.match.needsContext.test(i),namespace:h.join(".")},o),(p=u[d])||((p=u[d]=[]).delegateCount=0,f.setup&&!1!==f.setup.call(t,r,h,a)||t.addEventListener&&t.addEventListener(d,a)),f.add&&(f.add.call(t,c),c.handler.guid||(c.handler.guid=n.guid)),i?p.splice(p.delegateCount++,0,c):p.push(c),k.event.global[d]=!0)}},remove:function(e,t,n,r,i){var o,a,s,u,l,c,f,p,d,h,g,v=Q.hasData(e)&&Q.get(e);if(v&&(u=v.events)){l=(t=(t||"").match(R)||[""]).length;while(l--)if(d=g=(s=Ee.exec(t[l])||[])[1],h=(s[2]||"").split(".").sort(),d){f=k.event.special[d]||{},p=u[d=(r?f.delegateType:f.bindType)||d]||[],s=s[2]&&new RegExp("(^|\\.)"+h.join("\\.(?:.*\\.|)")+"(\\.|$)"),a=o=p.length;while(o--)c=p[o],!i&&g!==c.origType||n&&n.guid!==c.guid||s&&!s.test(c.namespace)||r&&r!==c.selector&&("**"!==r||!c.selector)||(p.splice(o,1),c.selector&&p.delegateCount--,f.remove&&f.remove.call(e,c));a&&!p.length&&(f.teardown&&!1!==f.teardown.call(e,h,v.handle)||k.removeEvent(e,d,v.handle),delete u[d])}else for(d in u)k.event.remove(e,d+t[l],n,r,!0);k.isEmptyObject(u)&&Q.remove(e,"handle events")}},dispatch:function(e){var t,n,r,i,o,a,s=k.event.fix(e),u=new Array(arguments.length),l=(Q.get(this,"events")||{})[s.type]||[],c=k.event.special[s.type]||{};for(u[0]=s,t=1;t<arguments.length;t++)u[t]=arguments[t];if(s.delegateTarget=this,!c.preDispatch||!1!==c.preDispatch.call(this,s)){a=k.event.handlers.call(this,s,l),t=0;while((i=a[t++])&&!s.isPropagationStopped()){s.currentTarget=i.elem,n=0;while((o=i.handlers[n++])&&!s.isImmediatePropagationStopped())s.rnamespace&&!1!==o.namespace&&!s.rnamespace.test(o.namespace)||(s.handleObj=o,s.data=o.data,void 0!==(r=((k.event.special[o.origType]||{}).handle||o.handler).apply(i.elem,u))&&!1===(s.result=r)&&(s.preventDefault(),s.stopPropagation()))}return c.postDispatch&&c.postDispatch.call(this,s),s.result}},handlers:function(e,t){var n,r,i,o,a,s=[],u=t.delegateCount,l=e.target;if(u&&l.nodeType&&!("click"===e.type&&1<=e.button))for(;l!==this;l=l.parentNode||this)if(1===l.nodeType&&("click"!==e.type||!0!==l.disabled)){for(o=[],a={},n=0;n<u;n++)void 0===a[i=(r=t[n]).selector+" "]&&(a[i]=r.needsContext?-1<k(i,this).index(l):k.find(i,this,null,[l]).length),a[i]&&o.push(r);o.length&&s.push({elem:l,handlers:o})}return l=this,u<t.length&&s.push({elem:l,handlers:t.slice(u)}),s},addProp:function(t,e){Object.defineProperty(k.Event.prototype,t,{enumerable:!0,configurable:!0,get:m(e)?function(){if(this.originalEvent)return e(this.originalEvent)}:function(){if(this.originalEvent)return this.originalEvent[t]},set:function(e){Object.defineProperty(this,t,{enumerable:!0,configurable:!0,writable:!0,value:e})}})},fix:function(e){return e[k.expando]?e:new k.Event(e)},special:{load:{noBubble:!0},click:{setup:function(e){var t=this||e;return pe.test(t.type)&&t.click&&A(t,"input")&&De(t,"click",ke),!1},trigger:function(e){var t=this||e;return pe.test(t.type)&&t.click&&A(t,"input")&&De(t,"click"),!0},_default:function(e){var t=e.target;return pe.test(t.type)&&t.click&&A(t,"input")&&Q.get(t,"click")||A(t,"a")}},beforeunload:{postDispatch:function(e){void 0!==e.result&&e.originalEvent&&(e.originalEvent.returnValue=e.result)}}}},k.removeEvent=function(e,t,n){e.removeEventListener&&e.removeEventListener(t,n)},k.Event=function(e,t){if(!(this instanceof k.Event))return new k.Event(e,t);e&&e.type?(this.originalEvent=e,this.type=e.type,this.isDefaultPrevented=e.defaultPrevented||void 0===e.defaultPrevented&&!1===e.returnValue?ke:Se,this.target=e.target&&3===e.target.nodeType?e.target.parentNode:e.target,this.currentTarget=e.currentTarget,this.relatedTarget=e.relatedTarget):this.type=e,t&&k.extend(this,t),this.timeStamp=e&&e.timeStamp||Date.now(),this[k.expando]=!0},k.Event.prototype={constructor:k.Event,isDefaultPrevented:Se,isPropagationStopped:Se,isImmediatePropagationStopped:Se,isSimulated:!1,preventDefault:function(){var e=this.originalEvent;this.isDefaultPrevented=ke,e&&!this.isSimulated&&e.preventDefault()},stopPropagation:function(){var e=this.originalEvent;this.isPropagationStopped=ke,e&&!this.isSimulated&&e.stopPropagation()},stopImmediatePropagation:function(){var e=this.originalEvent;this.isImmediatePropagationStopped=ke,e&&!this.isSimulated&&e.stopImmediatePropagation(),this.stopPropagation()}},k.each({altKey:!0,bubbles:!0,cancelable:!0,changedTouches:!0,ctrlKey:!0,detail:!0,eventPhase:!0,metaKey:!0,pageX:!0,pageY:!0,shiftKey:!0,view:!0,"char":!0,code:!0,charCode:!0,key:!0,keyCode:!0,button:!0,buttons:!0,clientX:!0,clientY:!0,offsetX:!0,offsetY:!0,pointerId:!0,pointerType:!0,screenX:!0,screenY:!0,targetTouches:!0,toElement:!0,touches:!0,which:function(e){var t=e.button;return null==e.which&&Te.test(e.type)?null!=e.charCode?e.charCode:e.keyCode:!e.which&&void 0!==t&&Ce.test(e.type)?1&t?1:2&t?3:4&t?2:0:e.which}},k.event.addProp),k.each({focus:"focusin",blur:"focusout"},function(e,t){k.event.special[e]={setup:function(){return De(this,e,Ne),!1},trigger:function(){return De(this,e),!0},delegateType:t}}),k.each({mouseenter:"mouseover",mouseleave:"mouseout",pointerenter:"pointerover",pointerleave:"pointerout"},function(e,i){k.event.special[e]={delegateType:i,bindType:i,handle:function(e){var t,n=e.relatedTarget,r=e.handleObj;return n&&(n===this||k.contains(this,n))||(e.type=r.origType,t=r.handler.apply(this,arguments),e.type=i),t}}}),k.fn.extend({on:function(e,t,n,r){return Ae(this,e,t,n,r)},one:function(e,t,n,r){return Ae(this,e,t,n,r,1)},off:function(e,t,n){var r,i;if(e&&e.preventDefault&&e.handleObj)return r=e.handleObj,k(e.delegateTarget).off(r.namespace?r.origType+"."+r.namespace:r.origType,r.selector,r.handler),this;if("object"==typeof e){for(i in e)this.off(i,t,e[i]);return this}return!1!==t&&"function"!=typeof t||(n=t,t=void 0),!1===n&&(n=Se),this.each(function(){k.event.remove(this,e,n,t)})}});var je=/<(?!area|br|col|embed|hr|img|input|link|meta|param)(([a-z][^\/\0>\x20\t\r\n\f]*)[^>]*)\/>/gi,qe=/<script|<style|<link/i,Le=/checked\s*(?:[^=]|=\s*.checked.)/i,He=/^\s*<!(?:\[CDATA\[|--)|(?:\]\]|--)>\s*$/g;function Oe(e,t){return A(e,"table")&&A(11!==t.nodeType?t:t.firstChild,"tr")&&k(e).children("tbody")[0]||e}function Pe(e){return e.type=(null!==e.getAttribute("type"))+"/"+e.type,e}function Re(e){return"true/"===(e.type||"").slice(0,5)?e.type=e.type.slice(5):e.removeAttribute("type"),e}function Me(e,t){var n,r,i,o,a,s,u,l;if(1===t.nodeType){if(Q.hasData(e)&&(o=Q.access(e),a=Q.set(t,o),l=o.events))for(i in delete a.handle,a.events={},l)for(n=0,r=l[i].length;n<r;n++)k.event.add(t,i,l[i][n]);J.hasData(e)&&(s=J.access(e),u=k.extend({},s),J.set(t,u))}}function Ie(n,r,i,o){r=g.apply([],r);var e,t,a,s,u,l,c=0,f=n.length,p=f-1,d=r[0],h=m(d);if(h||1<f&&"string"==typeof d&&!y.checkClone&&Le.test(d))return n.each(function(e){var t=n.eq(e);h&&(r[0]=d.call(this,e,t.html())),Ie(t,r,i,o)});if(f&&(t=(e=we(r,n[0].ownerDocument,!1,n,o)).firstChild,1===e.childNodes.length&&(e=t),t||o)){for(s=(a=k.map(ve(e,"script"),Pe)).length;c<f;c++)u=e,c!==p&&(u=k.clone(u,!0,!0),s&&k.merge(a,ve(u,"script"))),i.call(n[c],u,c);if(s)for(l=a[a.length-1].ownerDocument,k.map(a,Re),c=0;c<s;c++)u=a[c],he.test(u.type||"")&&!Q.access(u,"globalEval")&&k.contains(l,u)&&(u.src&&"module"!==(u.type||"").toLowerCase()?k._evalUrl&&!u.noModule&&k._evalUrl(u.src,{nonce:u.nonce||u.getAttribute("nonce")}):b(u.textContent.replace(He,""),u,l))}return n}function We(e,t,n){for(var r,i=t?k.filter(t,e):e,o=0;null!=(r=i[o]);o++)n||1!==r.nodeType||k.cleanData(ve(r)),r.parentNode&&(n&&oe(r)&&ye(ve(r,"script")),r.parentNode.removeChild(r));return e}k.extend({htmlPrefilter:function(e){return e.replace(je,"<$1></$2>")},clone:function(e,t,n){var r,i,o,a,s,u,l,c=e.cloneNode(!0),f=oe(e);if(!(y.noCloneChecked||1!==e.nodeType&&11!==e.nodeType||k.isXMLDoc(e)))for(a=ve(c),r=0,i=(o=ve(e)).length;r<i;r++)s=o[r],u=a[r],void 0,"input"===(l=u.nodeName.toLowerCase())&&pe.test(s.type)?u.checked=s.checked:"input"!==l&&"textarea"!==l||(u.defaultValue=s.defaultValue);if(t)if(n)for(o=o||ve(e),a=a||ve(c),r=0,i=o.length;r<i;r++)Me(o[r],a[r]);else Me(e,c);return 0<(a=ve(c,"script")).length&&ye(a,!f&&ve(e,"script")),c},cleanData:function(e){for(var t,n,r,i=k.event.special,o=0;void 0!==(n=e[o]);o++)if(G(n)){if(t=n[Q.expando]){if(t.events)for(r in t.events)i[r]?k.event.remove(n,r):k.removeEvent(n,r,t.handle);n[Q.expando]=void 0}n[J.expando]&&(n[J.expando]=void 0)}}}),k.fn.extend({detach:function(e){return We(this,e,!0)},remove:function(e){return We(this,e)},text:function(e){return _(this,function(e){return void 0===e?k.text(this):this.empty().each(function(){1!==this.nodeType&&11!==this.nodeType&&9!==this.nodeType||(this.textContent=e)})},null,e,arguments.length)},append:function(){return Ie(this,arguments,function(e){1!==this.nodeType&&11!==this.nodeType&&9!==this.nodeType||Oe(this,e).appendChild(e)})},prepend:function(){return Ie(this,arguments,function(e){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var t=Oe(this,e);t.insertBefore(e,t.firstChild)}})},before:function(){return Ie(this,arguments,function(e){this.parentNode&&this.parentNode.insertBefore(e,this)})},after:function(){return Ie(this,arguments,function(e){this.parentNode&&this.parentNode.insertBefore(e,this.nextSibling)})},empty:function(){for(var e,t=0;null!=(e=this[t]);t++)1===e.nodeType&&(k.cleanData(ve(e,!1)),e.textContent="");return this},clone:function(e,t){return e=null!=e&&e,t=null==t?e:t,this.map(function(){return k.clone(this,e,t)})},html:function(e){return _(this,function(e){var t=this[0]||{},n=0,r=this.length;if(void 0===e&&1===t.nodeType)return t.innerHTML;if("string"==typeof e&&!qe.test(e)&&!ge[(de.exec(e)||["",""])[1].toLowerCase()]){e=k.htmlPrefilter(e);try{for(;n<r;n++)1===(t=this[n]||{}).nodeType&&(k.cleanData(ve(t,!1)),t.innerHTML=e);t=0}catch(e){}}t&&this.empty().append(e)},null,e,arguments.length)},replaceWith:function(){var n=[];return Ie(this,arguments,function(e){var t=this.parentNode;k.inArray(this,n)<0&&(k.cleanData(ve(this)),t&&t.replaceChild(e,this))},n)}}),k.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(e,a){k.fn[e]=function(e){for(var t,n=[],r=k(e),i=r.length-1,o=0;o<=i;o++)t=o===i?this:this.clone(!0),k(r[o])[a](t),u.apply(n,t.get());return this.pushStack(n)}});var $e=new RegExp("^("+te+")(?!px)[a-z%]+$","i"),Fe=function(e){var t=e.ownerDocument.defaultView;return t&&t.opener||(t=C),t.getComputedStyle(e)},Be=new RegExp(re.join("|"),"i");function _e(e,t,n){var r,i,o,a,s=e.style;return(n=n||Fe(e))&&(""!==(a=n.getPropertyValue(t)||n[t])||oe(e)||(a=k.style(e,t)),!y.pixelBoxStyles()&&$e.test(a)&&Be.test(t)&&(r=s.width,i=s.minWidth,o=s.maxWidth,s.minWidth=s.maxWidth=s.width=a,a=n.width,s.width=r,s.minWidth=i,s.maxWidth=o)),void 0!==a?a+"":a}function ze(e,t){return{get:function(){if(!e())return(this.get=t).apply(this,arguments);delete this.get}}}!function(){function e(){if(u){s.style.cssText="position:absolute;left:-11111px;width:60px;margin-top:1px;padding:0;border:0",u.style.cssText="position:relative;display:block;box-sizing:border-box;overflow:scroll;margin:auto;border:1px;padding:1px;width:60%;top:1%",ie.appendChild(s).appendChild(u);var e=C.getComputedStyle(u);n="1%"!==e.top,a=12===t(e.marginLeft),u.style.right="60%",o=36===t(e.right),r=36===t(e.width),u.style.position="absolute",i=12===t(u.offsetWidth/3),ie.removeChild(s),u=null}}function t(e){return Math.round(parseFloat(e))}var n,r,i,o,a,s=E.createElement("div"),u=E.createElement("div");u.style&&(u.style.backgroundClip="content-box",u.cloneNode(!0).style.backgroundClip="",y.clearCloneStyle="content-box"===u.style.backgroundClip,k.extend(y,{boxSizingReliable:function(){return e(),r},pixelBoxStyles:function(){return e(),o},pixelPosition:function(){return e(),n},reliableMarginLeft:function(){return e(),a},scrollboxSize:function(){return e(),i}}))}();var Ue=["Webkit","Moz","ms"],Xe=E.createElement("div").style,Ve={};function Ge(e){var t=k.cssProps[e]||Ve[e];return t||(e in Xe?e:Ve[e]=function(e){var t=e[0].toUpperCase()+e.slice(1),n=Ue.length;while(n--)if((e=Ue[n]+t)in Xe)return e}(e)||e)}var Ye=/^(none|table(?!-c[ea]).+)/,Qe=/^--/,Je={position:"absolute",visibility:"hidden",display:"block"},Ke={letterSpacing:"0",fontWeight:"400"};function Ze(e,t,n){var r=ne.exec(t);return r?Math.max(0,r[2]-(n||0))+(r[3]||"px"):t}function et(e,t,n,r,i,o){var a="width"===t?1:0,s=0,u=0;if(n===(r?"border":"content"))return 0;for(;a<4;a+=2)"margin"===n&&(u+=k.css(e,n+re[a],!0,i)),r?("content"===n&&(u-=k.css(e,"padding"+re[a],!0,i)),"margin"!==n&&(u-=k.css(e,"border"+re[a]+"Width",!0,i))):(u+=k.css(e,"padding"+re[a],!0,i),"padding"!==n?u+=k.css(e,"border"+re[a]+"Width",!0,i):s+=k.css(e,"border"+re[a]+"Width",!0,i));return!r&&0<=o&&(u+=Math.max(0,Math.ceil(e["offset"+t[0].toUpperCase()+t.slice(1)]-o-u-s-.5))||0),u}function tt(e,t,n){var r=Fe(e),i=(!y.boxSizingReliable()||n)&&"border-box"===k.css(e,"boxSizing",!1,r),o=i,a=_e(e,t,r),s="offset"+t[0].toUpperCase()+t.slice(1);if($e.test(a)){if(!n)return a;a="auto"}return(!y.boxSizingReliable()&&i||"auto"===a||!parseFloat(a)&&"inline"===k.css(e,"display",!1,r))&&e.getClientRects().length&&(i="border-box"===k.css(e,"boxSizing",!1,r),(o=s in e)&&(a=e[s])),(a=parseFloat(a)||0)+et(e,t,n||(i?"border":"content"),o,r,a)+"px"}function nt(e,t,n,r,i){return new nt.prototype.init(e,t,n,r,i)}k.extend({cssHooks:{opacity:{get:function(e,t){if(t){var n=_e(e,"opacity");return""===n?"1":n}}}},cssNumber:{animationIterationCount:!0,columnCount:!0,fillOpacity:!0,flexGrow:!0,flexShrink:!0,fontWeight:!0,gridArea:!0,gridColumn:!0,gridColumnEnd:!0,gridColumnStart:!0,gridRow:!0,gridRowEnd:!0,gridRowStart:!0,lineHeight:!0,opacity:!0,order:!0,orphans:!0,widows:!0,zIndex:!0,zoom:!0},cssProps:{},style:function(e,t,n,r){if(e&&3!==e.nodeType&&8!==e.nodeType&&e.style){var i,o,a,s=V(t),u=Qe.test(t),l=e.style;if(u||(t=Ge(s)),a=k.cssHooks[t]||k.cssHooks[s],void 0===n)return a&&"get"in a&&void 0!==(i=a.get(e,!1,r))?i:l[t];"string"===(o=typeof n)&&(i=ne.exec(n))&&i[1]&&(n=le(e,t,i),o="number"),null!=n&&n==n&&("number"!==o||u||(n+=i&&i[3]||(k.cssNumber[s]?"":"px")),y.clearCloneStyle||""!==n||0!==t.indexOf("background")||(l[t]="inherit"),a&&"set"in a&&void 0===(n=a.set(e,n,r))||(u?l.setProperty(t,n):l[t]=n))}},css:function(e,t,n,r){var i,o,a,s=V(t);return Qe.test(t)||(t=Ge(s)),(a=k.cssHooks[t]||k.cssHooks[s])&&"get"in a&&(i=a.get(e,!0,n)),void 0===i&&(i=_e(e,t,r)),"normal"===i&&t in Ke&&(i=Ke[t]),""===n||n?(o=parseFloat(i),!0===n||isFinite(o)?o||0:i):i}}),k.each(["height","width"],function(e,u){k.cssHooks[u]={get:function(e,t,n){if(t)return!Ye.test(k.css(e,"display"))||e.getClientRects().length&&e.getBoundingClientRect().width?tt(e,u,n):ue(e,Je,function(){return tt(e,u,n)})},set:function(e,t,n){var r,i=Fe(e),o=!y.scrollboxSize()&&"absolute"===i.position,a=(o||n)&&"border-box"===k.css(e,"boxSizing",!1,i),s=n?et(e,u,n,a,i):0;return a&&o&&(s-=Math.ceil(e["offset"+u[0].toUpperCase()+u.slice(1)]-parseFloat(i[u])-et(e,u,"border",!1,i)-.5)),s&&(r=ne.exec(t))&&"px"!==(r[3]||"px")&&(e.style[u]=t,t=k.css(e,u)),Ze(0,t,s)}}}),k.cssHooks.marginLeft=ze(y.reliableMarginLeft,function(e,t){if(t)return(parseFloat(_e(e,"marginLeft"))||e.getBoundingClientRect().left-ue(e,{marginLeft:0},function(){return e.getBoundingClientRect().left}))+"px"}),k.each({margin:"",padding:"",border:"Width"},function(i,o){k.cssHooks[i+o]={expand:function(e){for(var t=0,n={},r="string"==typeof e?e.split(" "):[e];t<4;t++)n[i+re[t]+o]=r[t]||r[t-2]||r[0];return n}},"margin"!==i&&(k.cssHooks[i+o].set=Ze)}),k.fn.extend({css:function(e,t){return _(this,function(e,t,n){var r,i,o={},a=0;if(Array.isArray(t)){for(r=Fe(e),i=t.length;a<i;a++)o[t[a]]=k.css(e,t[a],!1,r);return o}return void 0!==n?k.style(e,t,n):k.css(e,t)},e,t,1<arguments.length)}}),((k.Tween=nt).prototype={constructor:nt,init:function(e,t,n,r,i,o){this.elem=e,this.prop=n,this.easing=i||k.easing._default,this.options=t,this.start=this.now=this.cur(),this.end=r,this.unit=o||(k.cssNumber[n]?"":"px")},cur:function(){var e=nt.propHooks[this.prop];return e&&e.get?e.get(this):nt.propHooks._default.get(this)},run:function(e){var t,n=nt.propHooks[this.prop];return this.options.duration?this.pos=t=k.easing[this.easing](e,this.options.duration*e,0,1,this.options.duration):this.pos=t=e,this.now=(this.end-this.start)*t+this.start,this.options.step&&this.options.step.call(this.elem,this.now,this),n&&n.set?n.set(this):nt.propHooks._default.set(this),this}}).init.prototype=nt.prototype,(nt.propHooks={_default:{get:function(e){var t;return 1!==e.elem.nodeType||null!=e.elem[e.prop]&&null==e.elem.style[e.prop]?e.elem[e.prop]:(t=k.css(e.elem,e.prop,""))&&"auto"!==t?t:0},set:function(e){k.fx.step[e.prop]?k.fx.step[e.prop](e):1!==e.elem.nodeType||!k.cssHooks[e.prop]&&null==e.elem.style[Ge(e.prop)]?e.elem[e.prop]=e.now:k.style(e.elem,e.prop,e.now+e.unit)}}}).scrollTop=nt.propHooks.scrollLeft={set:function(e){e.elem.nodeType&&e.elem.parentNode&&(e.elem[e.prop]=e.now)}},k.easing={linear:function(e){return e},swing:function(e){return.5-Math.cos(e*Math.PI)/2},_default:"swing"},k.fx=nt.prototype.init,k.fx.step={};var rt,it,ot,at,st=/^(?:toggle|show|hide)$/,ut=/queueHooks$/;function lt(){it&&(!1===E.hidden&&C.requestAnimationFrame?C.requestAnimationFrame(lt):C.setTimeout(lt,k.fx.interval),k.fx.tick())}function ct(){return C.setTimeout(function(){rt=void 0}),rt=Date.now()}function ft(e,t){var n,r=0,i={height:e};for(t=t?1:0;r<4;r+=2-t)i["margin"+(n=re[r])]=i["padding"+n]=e;return t&&(i.opacity=i.width=e),i}function pt(e,t,n){for(var r,i=(dt.tweeners[t]||[]).concat(dt.tweeners["*"]),o=0,a=i.length;o<a;o++)if(r=i[o].call(n,t,e))return r}function dt(o,e,t){var n,a,r=0,i=dt.prefilters.length,s=k.Deferred().always(function(){delete u.elem}),u=function(){if(a)return!1;for(var e=rt||ct(),t=Math.max(0,l.startTime+l.duration-e),n=1-(t/l.duration||0),r=0,i=l.tweens.length;r<i;r++)l.tweens[r].run(n);return s.notifyWith(o,[l,n,t]),n<1&&i?t:(i||s.notifyWith(o,[l,1,0]),s.resolveWith(o,[l]),!1)},l=s.promise({elem:o,props:k.extend({},e),opts:k.extend(!0,{specialEasing:{},easing:k.easing._default},t),originalProperties:e,originalOptions:t,startTime:rt||ct(),duration:t.duration,tweens:[],createTween:function(e,t){var n=k.Tween(o,l.opts,e,t,l.opts.specialEasing[e]||l.opts.easing);return l.tweens.push(n),n},stop:function(e){var t=0,n=e?l.tweens.length:0;if(a)return this;for(a=!0;t<n;t++)l.tweens[t].run(1);return e?(s.notifyWith(o,[l,1,0]),s.resolveWith(o,[l,e])):s.rejectWith(o,[l,e]),this}}),c=l.props;for(!function(e,t){var n,r,i,o,a;for(n in e)if(i=t[r=V(n)],o=e[n],Array.isArray(o)&&(i=o[1],o=e[n]=o[0]),n!==r&&(e[r]=o,delete e[n]),(a=k.cssHooks[r])&&"expand"in a)for(n in o=a.expand(o),delete e[r],o)n in e||(e[n]=o[n],t[n]=i);else t[r]=i}(c,l.opts.specialEasing);r<i;r++)if(n=dt.prefilters[r].call(l,o,c,l.opts))return m(n.stop)&&(k._queueHooks(l.elem,l.opts.queue).stop=n.stop.bind(n)),n;return k.map(c,pt,l),m(l.opts.start)&&l.opts.start.call(o,l),l.progress(l.opts.progress).done(l.opts.done,l.opts.complete).fail(l.opts.fail).always(l.opts.always),k.fx.timer(k.extend(u,{elem:o,anim:l,queue:l.opts.queue})),l}k.Animation=k.extend(dt,{tweeners:{"*":[function(e,t){var n=this.createTween(e,t);return le(n.elem,e,ne.exec(t),n),n}]},tweener:function(e,t){m(e)?(t=e,e=["*"]):e=e.match(R);for(var n,r=0,i=e.length;r<i;r++)n=e[r],dt.tweeners[n]=dt.tweeners[n]||[],dt.tweeners[n].unshift(t)},prefilters:[function(e,t,n){var r,i,o,a,s,u,l,c,f="width"in t||"height"in t,p=this,d={},h=e.style,g=e.nodeType&&se(e),v=Q.get(e,"fxshow");for(r in n.queue||(null==(a=k._queueHooks(e,"fx")).unqueued&&(a.unqueued=0,s=a.empty.fire,a.empty.fire=function(){a.unqueued||s()}),a.unqueued++,p.always(function(){p.always(function(){a.unqueued--,k.queue(e,"fx").length||a.empty.fire()})})),t)if(i=t[r],st.test(i)){if(delete t[r],o=o||"toggle"===i,i===(g?"hide":"show")){if("show"!==i||!v||void 0===v[r])continue;g=!0}d[r]=v&&v[r]||k.style(e,r)}if((u=!k.isEmptyObject(t))||!k.isEmptyObject(d))for(r in f&&1===e.nodeType&&(n.overflow=[h.overflow,h.overflowX,h.overflowY],null==(l=v&&v.display)&&(l=Q.get(e,"display")),"none"===(c=k.css(e,"display"))&&(l?c=l:(fe([e],!0),l=e.style.display||l,c=k.css(e,"display"),fe([e]))),("inline"===c||"inline-block"===c&&null!=l)&&"none"===k.css(e,"float")&&(u||(p.done(function(){h.display=l}),null==l&&(c=h.display,l="none"===c?"":c)),h.display="inline-block")),n.overflow&&(h.overflow="hidden",p.always(function(){h.overflow=n.overflow[0],h.overflowX=n.overflow[1],h.overflowY=n.overflow[2]})),u=!1,d)u||(v?"hidden"in v&&(g=v.hidden):v=Q.access(e,"fxshow",{display:l}),o&&(v.hidden=!g),g&&fe([e],!0),p.done(function(){for(r in g||fe([e]),Q.remove(e,"fxshow"),d)k.style(e,r,d[r])})),u=pt(g?v[r]:0,r,p),r in v||(v[r]=u.start,g&&(u.end=u.start,u.start=0))}],prefilter:function(e,t){t?dt.prefilters.unshift(e):dt.prefilters.push(e)}}),k.speed=function(e,t,n){var r=e&&"object"==typeof e?k.extend({},e):{complete:n||!n&&t||m(e)&&e,duration:e,easing:n&&t||t&&!m(t)&&t};return k.fx.off?r.duration=0:"number"!=typeof r.duration&&(r.duration in k.fx.speeds?r.duration=k.fx.speeds[r.duration]:r.duration=k.fx.speeds._default),null!=r.queue&&!0!==r.queue||(r.queue="fx"),r.old=r.complete,r.complete=function(){m(r.old)&&r.old.call(this),r.queue&&k.dequeue(this,r.queue)},r},k.fn.extend({fadeTo:function(e,t,n,r){return this.filter(se).css("opacity",0).show().end().animate({opacity:t},e,n,r)},animate:function(t,e,n,r){var i=k.isEmptyObject(t),o=k.speed(e,n,r),a=function(){var e=dt(this,k.extend({},t),o);(i||Q.get(this,"finish"))&&e.stop(!0)};return a.finish=a,i||!1===o.queue?this.each(a):this.queue(o.queue,a)},stop:function(i,e,o){var a=function(e){var t=e.stop;delete e.stop,t(o)};return"string"!=typeof i&&(o=e,e=i,i=void 0),e&&!1!==i&&this.queue(i||"fx",[]),this.each(function(){var e=!0,t=null!=i&&i+"queueHooks",n=k.timers,r=Q.get(this);if(t)r[t]&&r[t].stop&&a(r[t]);else for(t in r)r[t]&&r[t].stop&&ut.test(t)&&a(r[t]);for(t=n.length;t--;)n[t].elem!==this||null!=i&&n[t].queue!==i||(n[t].anim.stop(o),e=!1,n.splice(t,1));!e&&o||k.dequeue(this,i)})},finish:function(a){return!1!==a&&(a=a||"fx"),this.each(function(){var e,t=Q.get(this),n=t[a+"queue"],r=t[a+"queueHooks"],i=k.timers,o=n?n.length:0;for(t.finish=!0,k.queue(this,a,[]),r&&r.stop&&r.stop.call(this,!0),e=i.length;e--;)i[e].elem===this&&i[e].queue===a&&(i[e].anim.stop(!0),i.splice(e,1));for(e=0;e<o;e++)n[e]&&n[e].finish&&n[e].finish.call(this);delete t.finish})}}),k.each(["toggle","show","hide"],function(e,r){var i=k.fn[r];k.fn[r]=function(e,t,n){return null==e||"boolean"==typeof e?i.apply(this,arguments):this.animate(ft(r,!0),e,t,n)}}),k.each({slideDown:ft("show"),slideUp:ft("hide"),slideToggle:ft("toggle"),fadeIn:{opacity:"show"},fadeOut:{opacity:"hide"},fadeToggle:{opacity:"toggle"}},function(e,r){k.fn[e]=function(e,t,n){return this.animate(r,e,t,n)}}),k.timers=[],k.fx.tick=function(){var e,t=0,n=k.timers;for(rt=Date.now();t<n.length;t++)(e=n[t])()||n[t]!==e||n.splice(t--,1);n.length||k.fx.stop(),rt=void 0},k.fx.timer=function(e){k.timers.push(e),k.fx.start()},k.fx.interval=13,k.fx.start=function(){it||(it=!0,lt())},k.fx.stop=function(){it=null},k.fx.speeds={slow:600,fast:200,_default:400},k.fn.delay=function(r,e){return r=k.fx&&k.fx.speeds[r]||r,e=e||"fx",this.queue(e,function(e,t){var n=C.setTimeout(e,r);t.stop=function(){C.clearTimeout(n)}})},ot=E.createElement("input"),at=E.createElement("select").appendChild(E.createElement("option")),ot.type="checkbox",y.checkOn=""!==ot.value,y.optSelected=at.selected,(ot=E.createElement("input")).value="t",ot.type="radio",y.radioValue="t"===ot.value;var ht,gt=k.expr.attrHandle;k.fn.extend({attr:function(e,t){return _(this,k.attr,e,t,1<arguments.length)},removeAttr:function(e){return this.each(function(){k.removeAttr(this,e)})}}),k.extend({attr:function(e,t,n){var r,i,o=e.nodeType;if(3!==o&&8!==o&&2!==o)return"undefined"==typeof e.getAttribute?k.prop(e,t,n):(1===o&&k.isXMLDoc(e)||(i=k.attrHooks[t.toLowerCase()]||(k.expr.match.bool.test(t)?ht:void 0)),void 0!==n?null===n?void k.removeAttr(e,t):i&&"set"in i&&void 0!==(r=i.set(e,n,t))?r:(e.setAttribute(t,n+""),n):i&&"get"in i&&null!==(r=i.get(e,t))?r:null==(r=k.find.attr(e,t))?void 0:r)},attrHooks:{type:{set:function(e,t){if(!y.radioValue&&"radio"===t&&A(e,"input")){var n=e.value;return e.setAttribute("type",t),n&&(e.value=n),t}}}},removeAttr:function(e,t){var n,r=0,i=t&&t.match(R);if(i&&1===e.nodeType)while(n=i[r++])e.removeAttribute(n)}}),ht={set:function(e,t,n){return!1===t?k.removeAttr(e,n):e.setAttribute(n,n),n}},k.each(k.expr.match.bool.source.match(/\w+/g),function(e,t){var a=gt[t]||k.find.attr;gt[t]=function(e,t,n){var r,i,o=t.toLowerCase();return n||(i=gt[o],gt[o]=r,r=null!=a(e,t,n)?o:null,gt[o]=i),r}});var vt=/^(?:input|select|textarea|button)$/i,yt=/^(?:a|area)$/i;function mt(e){return(e.match(R)||[]).join(" ")}function xt(e){return e.getAttribute&&e.getAttribute("class")||""}function bt(e){return Array.isArray(e)?e:"string"==typeof e&&e.match(R)||[]}k.fn.extend({prop:function(e,t){return _(this,k.prop,e,t,1<arguments.length)},removeProp:function(e){return this.each(function(){delete this[k.propFix[e]||e]})}}),k.extend({prop:function(e,t,n){var r,i,o=e.nodeType;if(3!==o&&8!==o&&2!==o)return 1===o&&k.isXMLDoc(e)||(t=k.propFix[t]||t,i=k.propHooks[t]),void 0!==n?i&&"set"in i&&void 0!==(r=i.set(e,n,t))?r:e[t]=n:i&&"get"in i&&null!==(r=i.get(e,t))?r:e[t]},propHooks:{tabIndex:{get:function(e){var t=k.find.attr(e,"tabindex");return t?parseInt(t,10):vt.test(e.nodeName)||yt.test(e.nodeName)&&e.href?0:-1}}},propFix:{"for":"htmlFor","class":"className"}}),y.optSelected||(k.propHooks.selected={get:function(e){var t=e.parentNode;return t&&t.parentNode&&t.parentNode.selectedIndex,null},set:function(e){var t=e.parentNode;t&&(t.selectedIndex,t.parentNode&&t.parentNode.selectedIndex)}}),k.each(["tabIndex","readOnly","maxLength","cellSpacing","cellPadding","rowSpan","colSpan","useMap","frameBorder","contentEditable"],function(){k.propFix[this.toLowerCase()]=this}),k.fn.extend({addClass:function(t){var e,n,r,i,o,a,s,u=0;if(m(t))return this.each(function(e){k(this).addClass(t.call(this,e,xt(this)))});if((e=bt(t)).length)while(n=this[u++])if(i=xt(n),r=1===n.nodeType&&" "+mt(i)+" "){a=0;while(o=e[a++])r.indexOf(" "+o+" ")<0&&(r+=o+" ");i!==(s=mt(r))&&n.setAttribute("class",s)}return this},removeClass:function(t){var e,n,r,i,o,a,s,u=0;if(m(t))return this.each(function(e){k(this).removeClass(t.call(this,e,xt(this)))});if(!arguments.length)return this.attr("class","");if((e=bt(t)).length)while(n=this[u++])if(i=xt(n),r=1===n.nodeType&&" "+mt(i)+" "){a=0;while(o=e[a++])while(-1<r.indexOf(" "+o+" "))r=r.replace(" "+o+" "," ");i!==(s=mt(r))&&n.setAttribute("class",s)}return this},toggleClass:function(i,t){var o=typeof i,a="string"===o||Array.isArray(i);return"boolean"==typeof t&&a?t?this.addClass(i):this.removeClass(i):m(i)?this.each(function(e){k(this).toggleClass(i.call(this,e,xt(this),t),t)}):this.each(function(){var e,t,n,r;if(a){t=0,n=k(this),r=bt(i);while(e=r[t++])n.hasClass(e)?n.removeClass(e):n.addClass(e)}else void 0!==i&&"boolean"!==o||((e=xt(this))&&Q.set(this,"__className__",e),this.setAttribute&&this.setAttribute("class",e||!1===i?"":Q.get(this,"__className__")||""))})},hasClass:function(e){var t,n,r=0;t=" "+e+" ";while(n=this[r++])if(1===n.nodeType&&-1<(" "+mt(xt(n))+" ").indexOf(t))return!0;return!1}});var wt=/\r/g;k.fn.extend({val:function(n){var r,e,i,t=this[0];return arguments.length?(i=m(n),this.each(function(e){var t;1===this.nodeType&&(null==(t=i?n.call(this,e,k(this).val()):n)?t="":"number"==typeof t?t+="":Array.isArray(t)&&(t=k.map(t,function(e){return null==e?"":e+""})),(r=k.valHooks[this.type]||k.valHooks[this.nodeName.toLowerCase()])&&"set"in r&&void 0!==r.set(this,t,"value")||(this.value=t))})):t?(r=k.valHooks[t.type]||k.valHooks[t.nodeName.toLowerCase()])&&"get"in r&&void 0!==(e=r.get(t,"value"))?e:"string"==typeof(e=t.value)?e.replace(wt,""):null==e?"":e:void 0}}),k.extend({valHooks:{option:{get:function(e){var t=k.find.attr(e,"value");return null!=t?t:mt(k.text(e))}},select:{get:function(e){var t,n,r,i=e.options,o=e.selectedIndex,a="select-one"===e.type,s=a?null:[],u=a?o+1:i.length;for(r=o<0?u:a?o:0;r<u;r++)if(((n=i[r]).selected||r===o)&&!n.disabled&&(!n.parentNode.disabled||!A(n.parentNode,"optgroup"))){if(t=k(n).val(),a)return t;s.push(t)}return s},set:function(e,t){var n,r,i=e.options,o=k.makeArray(t),a=i.length;while(a--)((r=i[a]).selected=-1<k.inArray(k.valHooks.option.get(r),o))&&(n=!0);return n||(e.selectedIndex=-1),o}}}}),k.each(["radio","checkbox"],function(){k.valHooks[this]={set:function(e,t){if(Array.isArray(t))return e.checked=-1<k.inArray(k(e).val(),t)}},y.checkOn||(k.valHooks[this].get=function(e){return null===e.getAttribute("value")?"on":e.value})}),y.focusin="onfocusin"in C;var Tt=/^(?:focusinfocus|focusoutblur)$/,Ct=function(e){e.stopPropagation()};k.extend(k.event,{trigger:function(e,t,n,r){var i,o,a,s,u,l,c,f,p=[n||E],d=v.call(e,"type")?e.type:e,h=v.call(e,"namespace")?e.namespace.split("."):[];if(o=f=a=n=n||E,3!==n.nodeType&&8!==n.nodeType&&!Tt.test(d+k.event.triggered)&&(-1<d.indexOf(".")&&(d=(h=d.split(".")).shift(),h.sort()),u=d.indexOf(":")<0&&"on"+d,(e=e[k.expando]?e:new k.Event(d,"object"==typeof e&&e)).isTrigger=r?2:3,e.namespace=h.join("."),e.rnamespace=e.namespace?new RegExp("(^|\\.)"+h.join("\\.(?:.*\\.|)")+"(\\.|$)"):null,e.result=void 0,e.target||(e.target=n),t=null==t?[e]:k.makeArray(t,[e]),c=k.event.special[d]||{},r||!c.trigger||!1!==c.trigger.apply(n,t))){if(!r&&!c.noBubble&&!x(n)){for(s=c.delegateType||d,Tt.test(s+d)||(o=o.parentNode);o;o=o.parentNode)p.push(o),a=o;a===(n.ownerDocument||E)&&p.push(a.defaultView||a.parentWindow||C)}i=0;while((o=p[i++])&&!e.isPropagationStopped())f=o,e.type=1<i?s:c.bindType||d,(l=(Q.get(o,"events")||{})[e.type]&&Q.get(o,"handle"))&&l.apply(o,t),(l=u&&o[u])&&l.apply&&G(o)&&(e.result=l.apply(o,t),!1===e.result&&e.preventDefault());return e.type=d,r||e.isDefaultPrevented()||c._default&&!1!==c._default.apply(p.pop(),t)||!G(n)||u&&m(n[d])&&!x(n)&&((a=n[u])&&(n[u]=null),k.event.triggered=d,e.isPropagationStopped()&&f.addEventListener(d,Ct),n[d](),e.isPropagationStopped()&&f.removeEventListener(d,Ct),k.event.triggered=void 0,a&&(n[u]=a)),e.result}},simulate:function(e,t,n){var r=k.extend(new k.Event,n,{type:e,isSimulated:!0});k.event.trigger(r,null,t)}}),k.fn.extend({trigger:function(e,t){return this.each(function(){k.event.trigger(e,t,this)})},triggerHandler:function(e,t){var n=this[0];if(n)return k.event.trigger(e,t,n,!0)}}),y.focusin||k.each({focus:"focusin",blur:"focusout"},function(n,r){var i=function(e){k.event.simulate(r,e.target,k.event.fix(e))};k.event.special[r]={setup:function(){var e=this.ownerDocument||this,t=Q.access(e,r);t||e.addEventListener(n,i,!0),Q.access(e,r,(t||0)+1)},teardown:function(){var e=this.ownerDocument||this,t=Q.access(e,r)-1;t?Q.access(e,r,t):(e.removeEventListener(n,i,!0),Q.remove(e,r))}}});var Et=C.location,kt=Date.now(),St=/\?/;k.parseXML=function(e){var t;if(!e||"string"!=typeof e)return null;try{t=(new C.DOMParser).parseFromString(e,"text/xml")}catch(e){t=void 0}return t&&!t.getElementsByTagName("parsererror").length||k.error("Invalid XML: "+e),t};var Nt=/\[\]$/,At=/\r?\n/g,Dt=/^(?:submit|button|image|reset|file)$/i,jt=/^(?:input|select|textarea|keygen)/i;function qt(n,e,r,i){var t;if(Array.isArray(e))k.each(e,function(e,t){r||Nt.test(n)?i(n,t):qt(n+"["+("object"==typeof t&&null!=t?e:"")+"]",t,r,i)});else if(r||"object"!==w(e))i(n,e);else for(t in e)qt(n+"["+t+"]",e[t],r,i)}k.param=function(e,t){var n,r=[],i=function(e,t){var n=m(t)?t():t;r[r.length]=encodeURIComponent(e)+"="+encodeURIComponent(null==n?"":n)};if(null==e)return"";if(Array.isArray(e)||e.jquery&&!k.isPlainObject(e))k.each(e,function(){i(this.name,this.value)});else for(n in e)qt(n,e[n],t,i);return r.join("&")},k.fn.extend({serialize:function(){return k.param(this.serializeArray())},serializeArray:function(){return this.map(function(){var e=k.prop(this,"elements");return e?k.makeArray(e):this}).filter(function(){var e=this.type;return this.name&&!k(this).is(":disabled")&&jt.test(this.nodeName)&&!Dt.test(e)&&(this.checked||!pe.test(e))}).map(function(e,t){var n=k(this).val();return null==n?null:Array.isArray(n)?k.map(n,function(e){return{name:t.name,value:e.replace(At,"\r\n")}}):{name:t.name,value:n.replace(At,"\r\n")}}).get()}});var Lt=/%20/g,Ht=/#.*$/,Ot=/([?&])_=[^&]*/,Pt=/^(.*?):[ \t]*([^\r\n]*)$/gm,Rt=/^(?:GET|HEAD)$/,Mt=/^\/\//,It={},Wt={},$t="*/".concat("*"),Ft=E.createElement("a");function Bt(o){return function(e,t){"string"!=typeof e&&(t=e,e="*");var n,r=0,i=e.toLowerCase().match(R)||[];if(m(t))while(n=i[r++])"+"===n[0]?(n=n.slice(1)||"*",(o[n]=o[n]||[]).unshift(t)):(o[n]=o[n]||[]).push(t)}}function _t(t,i,o,a){var s={},u=t===Wt;function l(e){var r;return s[e]=!0,k.each(t[e]||[],function(e,t){var n=t(i,o,a);return"string"!=typeof n||u||s[n]?u?!(r=n):void 0:(i.dataTypes.unshift(n),l(n),!1)}),r}return l(i.dataTypes[0])||!s["*"]&&l("*")}function zt(e,t){var n,r,i=k.ajaxSettings.flatOptions||{};for(n in t)void 0!==t[n]&&((i[n]?e:r||(r={}))[n]=t[n]);return r&&k.extend(!0,e,r),e}Ft.href=Et.href,k.extend({active:0,lastModified:{},etag:{},ajaxSettings:{url:Et.href,type:"GET",isLocal:/^(?:about|app|app-storage|.+-extension|file|res|widget):$/.test(Et.protocol),global:!0,processData:!0,async:!0,contentType:"application/x-www-form-urlencoded; charset=UTF-8",accepts:{"*":$t,text:"text/plain",html:"text/html",xml:"application/xml, text/xml",json:"application/json, text/javascript"},contents:{xml:/\bxml\b/,html:/\bhtml/,json:/\bjson\b/},responseFields:{xml:"responseXML",text:"responseText",json:"responseJSON"},converters:{"* text":String,"text html":!0,"text json":JSON.parse,"text xml":k.parseXML},flatOptions:{url:!0,context:!0}},ajaxSetup:function(e,t){return t?zt(zt(e,k.ajaxSettings),t):zt(k.ajaxSettings,e)},ajaxPrefilter:Bt(It),ajaxTransport:Bt(Wt),ajax:function(e,t){"object"==typeof e&&(t=e,e=void 0),t=t||{};var c,f,p,n,d,r,h,g,i,o,v=k.ajaxSetup({},t),y=v.context||v,m=v.context&&(y.nodeType||y.jquery)?k(y):k.event,x=k.Deferred(),b=k.Callbacks("once memory"),w=v.statusCode||{},a={},s={},u="canceled",T={readyState:0,getResponseHeader:function(e){var t;if(h){if(!n){n={};while(t=Pt.exec(p))n[t[1].toLowerCase()+" "]=(n[t[1].toLowerCase()+" "]||[]).concat(t[2])}t=n[e.toLowerCase()+" "]}return null==t?null:t.join(", ")},getAllResponseHeaders:function(){return h?p:null},setRequestHeader:function(e,t){return null==h&&(e=s[e.toLowerCase()]=s[e.toLowerCase()]||e,a[e]=t),this},overrideMimeType:function(e){return null==h&&(v.mimeType=e),this},statusCode:function(e){var t;if(e)if(h)T.always(e[T.status]);else for(t in e)w[t]=[w[t],e[t]];return this},abort:function(e){var t=e||u;return c&&c.abort(t),l(0,t),this}};if(x.promise(T),v.url=((e||v.url||Et.href)+"").replace(Mt,Et.protocol+"//"),v.type=t.method||t.type||v.method||v.type,v.dataTypes=(v.dataType||"*").toLowerCase().match(R)||[""],null==v.crossDomain){r=E.createElement("a");try{r.href=v.url,r.href=r.href,v.crossDomain=Ft.protocol+"//"+Ft.host!=r.protocol+"//"+r.host}catch(e){v.crossDomain=!0}}if(v.data&&v.processData&&"string"!=typeof v.data&&(v.data=k.param(v.data,v.traditional)),_t(It,v,t,T),h)return T;for(i in(g=k.event&&v.global)&&0==k.active++&&k.event.trigger("ajaxStart"),v.type=v.type.toUpperCase(),v.hasContent=!Rt.test(v.type),f=v.url.replace(Ht,""),v.hasContent?v.data&&v.processData&&0===(v.contentType||"").indexOf("application/x-www-form-urlencoded")&&(v.data=v.data.replace(Lt,"+")):(o=v.url.slice(f.length),v.data&&(v.processData||"string"==typeof v.data)&&(f+=(St.test(f)?"&":"?")+v.data,delete v.data),!1===v.cache&&(f=f.replace(Ot,"$1"),o=(St.test(f)?"&":"?")+"_="+kt+++o),v.url=f+o),v.ifModified&&(k.lastModified[f]&&T.setRequestHeader("If-Modified-Since",k.lastModified[f]),k.etag[f]&&T.setRequestHeader("If-None-Match",k.etag[f])),(v.data&&v.hasContent&&!1!==v.contentType||t.contentType)&&T.setRequestHeader("Content-Type",v.contentType),T.setRequestHeader("Accept",v.dataTypes[0]&&v.accepts[v.dataTypes[0]]?v.accepts[v.dataTypes[0]]+("*"!==v.dataTypes[0]?", "+$t+"; q=0.01":""):v.accepts["*"]),v.headers)T.setRequestHeader(i,v.headers[i]);if(v.beforeSend&&(!1===v.beforeSend.call(y,T,v)||h))return T.abort();if(u="abort",b.add(v.complete),T.done(v.success),T.fail(v.error),c=_t(Wt,v,t,T)){if(T.readyState=1,g&&m.trigger("ajaxSend",[T,v]),h)return T;v.async&&0<v.timeout&&(d=C.setTimeout(function(){T.abort("timeout")},v.timeout));try{h=!1,c.send(a,l)}catch(e){if(h)throw e;l(-1,e)}}else l(-1,"No Transport");function l(e,t,n,r){var i,o,a,s,u,l=t;h||(h=!0,d&&C.clearTimeout(d),c=void 0,p=r||"",T.readyState=0<e?4:0,i=200<=e&&e<300||304===e,n&&(s=function(e,t,n){var r,i,o,a,s=e.contents,u=e.dataTypes;while("*"===u[0])u.shift(),void 0===r&&(r=e.mimeType||t.getResponseHeader("Content-Type"));if(r)for(i in s)if(s[i]&&s[i].test(r)){u.unshift(i);break}if(u[0]in n)o=u[0];else{for(i in n){if(!u[0]||e.converters[i+" "+u[0]]){o=i;break}a||(a=i)}o=o||a}if(o)return o!==u[0]&&u.unshift(o),n[o]}(v,T,n)),s=function(e,t,n,r){var i,o,a,s,u,l={},c=e.dataTypes.slice();if(c[1])for(a in e.converters)l[a.toLowerCase()]=e.converters[a];o=c.shift();while(o)if(e.responseFields[o]&&(n[e.responseFields[o]]=t),!u&&r&&e.dataFilter&&(t=e.dataFilter(t,e.dataType)),u=o,o=c.shift())if("*"===o)o=u;else if("*"!==u&&u!==o){if(!(a=l[u+" "+o]||l["* "+o]))for(i in l)if((s=i.split(" "))[1]===o&&(a=l[u+" "+s[0]]||l["* "+s[0]])){!0===a?a=l[i]:!0!==l[i]&&(o=s[0],c.unshift(s[1]));break}if(!0!==a)if(a&&e["throws"])t=a(t);else try{t=a(t)}catch(e){return{state:"parsererror",error:a?e:"No conversion from "+u+" to "+o}}}return{state:"success",data:t}}(v,s,T,i),i?(v.ifModified&&((u=T.getResponseHeader("Last-Modified"))&&(k.lastModified[f]=u),(u=T.getResponseHeader("etag"))&&(k.etag[f]=u)),204===e||"HEAD"===v.type?l="nocontent":304===e?l="notmodified":(l=s.state,o=s.data,i=!(a=s.error))):(a=l,!e&&l||(l="error",e<0&&(e=0))),T.status=e,T.statusText=(t||l)+"",i?x.resolveWith(y,[o,l,T]):x.rejectWith(y,[T,l,a]),T.statusCode(w),w=void 0,g&&m.trigger(i?"ajaxSuccess":"ajaxError",[T,v,i?o:a]),b.fireWith(y,[T,l]),g&&(m.trigger("ajaxComplete",[T,v]),--k.active||k.event.trigger("ajaxStop")))}return T},getJSON:function(e,t,n){return k.get(e,t,n,"json")},getScript:function(e,t){return k.get(e,void 0,t,"script")}}),k.each(["get","post"],function(e,i){k[i]=function(e,t,n,r){return m(t)&&(r=r||n,n=t,t=void 0),k.ajax(k.extend({url:e,type:i,dataType:r,data:t,success:n},k.isPlainObject(e)&&e))}}),k._evalUrl=function(e,t){return k.ajax({url:e,type:"GET",dataType:"script",cache:!0,async:!1,global:!1,converters:{"text script":function(){}},dataFilter:function(e){k.globalEval(e,t)}})},k.fn.extend({wrapAll:function(e){var t;return this[0]&&(m(e)&&(e=e.call(this[0])),t=k(e,this[0].ownerDocument).eq(0).clone(!0),this[0].parentNode&&t.insertBefore(this[0]),t.map(function(){var e=this;while(e.firstElementChild)e=e.firstElementChild;return e}).append(this)),this},wrapInner:function(n){return m(n)?this.each(function(e){k(this).wrapInner(n.call(this,e))}):this.each(function(){var e=k(this),t=e.contents();t.length?t.wrapAll(n):e.append(n)})},wrap:function(t){var n=m(t);return this.each(function(e){k(this).wrapAll(n?t.call(this,e):t)})},unwrap:function(e){return this.parent(e).not("body").each(function(){k(this).replaceWith(this.childNodes)}),this}}),k.expr.pseudos.hidden=function(e){return!k.expr.pseudos.visible(e)},k.expr.pseudos.visible=function(e){return!!(e.offsetWidth||e.offsetHeight||e.getClientRects().length)},k.ajaxSettings.xhr=function(){try{return new C.XMLHttpRequest}catch(e){}};var Ut={0:200,1223:204},Xt=k.ajaxSettings.xhr();y.cors=!!Xt&&"withCredentials"in Xt,y.ajax=Xt=!!Xt,k.ajaxTransport(function(i){var o,a;if(y.cors||Xt&&!i.crossDomain)return{send:function(e,t){var n,r=i.xhr();if(r.open(i.type,i.url,i.async,i.username,i.password),i.xhrFields)for(n in i.xhrFields)r[n]=i.xhrFields[n];for(n in i.mimeType&&r.overrideMimeType&&r.overrideMimeType(i.mimeType),i.crossDomain||e["X-Requested-With"]||(e["X-Requested-With"]="XMLHttpRequest"),e)r.setRequestHeader(n,e[n]);o=function(e){return function(){o&&(o=a=r.onload=r.onerror=r.onabort=r.ontimeout=r.onreadystatechange=null,"abort"===e?r.abort():"error"===e?"number"!=typeof r.status?t(0,"error"):t(r.status,r.statusText):t(Ut[r.status]||r.status,r.statusText,"text"!==(r.responseType||"text")||"string"!=typeof r.responseText?{binary:r.response}:{text:r.responseText},r.getAllResponseHeaders()))}},r.onload=o(),a=r.onerror=r.ontimeout=o("error"),void 0!==r.onabort?r.onabort=a:r.onreadystatechange=function(){4===r.readyState&&C.setTimeout(function(){o&&a()})},o=o("abort");try{r.send(i.hasContent&&i.data||null)}catch(e){if(o)throw e}},abort:function(){o&&o()}}}),k.ajaxPrefilter(function(e){e.crossDomain&&(e.contents.script=!1)}),k.ajaxSetup({accepts:{script:"text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"},contents:{script:/\b(?:java|ecma)script\b/},converters:{"text script":function(e){return k.globalEval(e),e}}}),k.ajaxPrefilter("script",function(e){void 0===e.cache&&(e.cache=!1),e.crossDomain&&(e.type="GET")}),k.ajaxTransport("script",function(n){var r,i;if(n.crossDomain||n.scriptAttrs)return{send:function(e,t){r=k("<script>").attr(n.scriptAttrs||{}).prop({charset:n.scriptCharset,src:n.url}).on("load error",i=function(e){r.remove(),i=null,e&&t("error"===e.type?404:200,e.type)}),E.head.appendChild(r[0])},abort:function(){i&&i()}}});var Vt,Gt=[],Yt=/(=)\?(?=&|$)|\?\?/;k.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var e=Gt.pop()||k.expando+"_"+kt++;return this[e]=!0,e}}),k.ajaxPrefilter("json jsonp",function(e,t,n){var r,i,o,a=!1!==e.jsonp&&(Yt.test(e.url)?"url":"string"==typeof e.data&&0===(e.contentType||"").indexOf("application/x-www-form-urlencoded")&&Yt.test(e.data)&&"data");if(a||"jsonp"===e.dataTypes[0])return r=e.jsonpCallback=m(e.jsonpCallback)?e.jsonpCallback():e.jsonpCallback,a?e[a]=e[a].replace(Yt,"$1"+r):!1!==e.jsonp&&(e.url+=(St.test(e.url)?"&":"?")+e.jsonp+"="+r),e.converters["script json"]=function(){return o||k.error(r+" was not called"),o[0]},e.dataTypes[0]="json",i=C[r],C[r]=function(){o=arguments},n.always(function(){void 0===i?k(C).removeProp(r):C[r]=i,e[r]&&(e.jsonpCallback=t.jsonpCallback,Gt.push(r)),o&&m(i)&&i(o[0]),o=i=void 0}),"script"}),y.createHTMLDocument=((Vt=E.implementation.createHTMLDocument("").body).innerHTML="<form></form><form></form>",2===Vt.childNodes.length),k.parseHTML=function(e,t,n){return"string"!=typeof e?[]:("boolean"==typeof t&&(n=t,t=!1),t||(y.createHTMLDocument?((r=(t=E.implementation.createHTMLDocument("")).createElement("base")).href=E.location.href,t.head.appendChild(r)):t=E),o=!n&&[],(i=D.exec(e))?[t.createElement(i[1])]:(i=we([e],t,o),o&&o.length&&k(o).remove(),k.merge([],i.childNodes)));var r,i,o},k.fn.load=function(e,t,n){var r,i,o,a=this,s=e.indexOf(" ");return-1<s&&(r=mt(e.slice(s)),e=e.slice(0,s)),m(t)?(n=t,t=void 0):t&&"object"==typeof t&&(i="POST"),0<a.length&&k.ajax({url:e,type:i||"GET",dataType:"html",data:t}).done(function(e){o=arguments,a.html(r?k("<div>").append(k.parseHTML(e)).find(r):e)}).always(n&&function(e,t){a.each(function(){n.apply(this,o||[e.responseText,t,e])})}),this},k.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(e,t){k.fn[t]=function(e){return this.on(t,e)}}),k.expr.pseudos.animated=function(t){return k.grep(k.timers,function(e){return t===e.elem}).length},k.offset={setOffset:function(e,t,n){var r,i,o,a,s,u,l=k.css(e,"position"),c=k(e),f={};"static"===l&&(e.style.position="relative"),s=c.offset(),o=k.css(e,"top"),u=k.css(e,"left"),("absolute"===l||"fixed"===l)&&-1<(o+u).indexOf("auto")?(a=(r=c.position()).top,i=r.left):(a=parseFloat(o)||0,i=parseFloat(u)||0),m(t)&&(t=t.call(e,n,k.extend({},s))),null!=t.top&&(f.top=t.top-s.top+a),null!=t.left&&(f.left=t.left-s.left+i),"using"in t?t.using.call(e,f):c.css(f)}},k.fn.extend({offset:function(t){if(arguments.length)return void 0===t?this:this.each(function(e){k.offset.setOffset(this,t,e)});var e,n,r=this[0];return r?r.getClientRects().length?(e=r.getBoundingClientRect(),n=r.ownerDocument.defaultView,{top:e.top+n.pageYOffset,left:e.left+n.pageXOffset}):{top:0,left:0}:void 0},position:function(){if(this[0]){var e,t,n,r=this[0],i={top:0,left:0};if("fixed"===k.css(r,"position"))t=r.getBoundingClientRect();else{t=this.offset(),n=r.ownerDocument,e=r.offsetParent||n.documentElement;while(e&&(e===n.body||e===n.documentElement)&&"static"===k.css(e,"position"))e=e.parentNode;e&&e!==r&&1===e.nodeType&&((i=k(e).offset()).top+=k.css(e,"borderTopWidth",!0),i.left+=k.css(e,"borderLeftWidth",!0))}return{top:t.top-i.top-k.css(r,"marginTop",!0),left:t.left-i.left-k.css(r,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var e=this.offsetParent;while(e&&"static"===k.css(e,"position"))e=e.offsetParent;return e||ie})}}),k.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(t,i){var o="pageYOffset"===i;k.fn[t]=function(e){return _(this,function(e,t,n){var r;if(x(e)?r=e:9===e.nodeType&&(r=e.defaultView),void 0===n)return r?r[i]:e[t];r?r.scrollTo(o?r.pageXOffset:n,o?n:r.pageYOffset):e[t]=n},t,e,arguments.length)}}),k.each(["top","left"],function(e,n){k.cssHooks[n]=ze(y.pixelPosition,function(e,t){if(t)return t=_e(e,n),$e.test(t)?k(e).position()[n]+"px":t})}),k.each({Height:"height",Width:"width"},function(a,s){k.each({padding:"inner"+a,content:s,"":"outer"+a},function(r,o){k.fn[o]=function(e,t){var n=arguments.length&&(r||"boolean"!=typeof e),i=r||(!0===e||!0===t?"margin":"border");return _(this,function(e,t,n){var r;return x(e)?0===o.indexOf("outer")?e["inner"+a]:e.document.documentElement["client"+a]:9===e.nodeType?(r=e.documentElement,Math.max(e.body["scroll"+a],r["scroll"+a],e.body["offset"+a],r["offset"+a],r["client"+a])):void 0===n?k.css(e,t,i):k.style(e,t,n,i)},s,n?e:void 0,n)}})}),k.each("blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu".split(" "),function(e,n){k.fn[n]=function(e,t){return 0<arguments.length?this.on(n,null,e,t):this.trigger(n)}}),k.fn.extend({hover:function(e,t){return this.mouseenter(e).mouseleave(t||e)}}),k.fn.extend({bind:function(e,t,n){return this.on(e,null,t,n)},unbind:function(e,t){return this.off(e,null,t)},delegate:function(e,t,n,r){return this.on(t,e,n,r)},undelegate:function(e,t,n){return 1===arguments.length?this.off(e,"**"):this.off(t,e||"**",n)}}),k.proxy=function(e,t){var n,r,i;if("string"==typeof t&&(n=e[t],t=e,e=n),m(e))return r=s.call(arguments,2),(i=function(){return e.apply(t||this,r.concat(s.call(arguments)))}).guid=e.guid=e.guid||k.guid++,i},k.holdReady=function(e){e?k.readyWait++:k.ready(!0)},k.isArray=Array.isArray,k.parseJSON=JSON.parse,k.nodeName=A,k.isFunction=m,k.isWindow=x,k.camelCase=V,k.type=w,k.now=Date.now,k.isNumeric=function(e){var t=k.type(e);return("number"===t||"string"===t)&&!isNaN(e-parseFloat(e))},"function"==typeof define&&define.amd&&define("jquery",[],function(){return k});var Qt=C.jQuery,Jt=C.$;return k.noConflict=function(e){return C.$===k&&(C.$=Jt),e&&C.jQuery===k&&(C.jQuery=Qt),k},e||(C.jQuery=C.$=k),k});
\ No newline at end of file
diff --git a/docs/js/vendor/jquery-3.5.1.min.js b/docs/js/vendor/jquery-3.5.1.min.js
new file mode 100644
index 0000000000000..b0614034ad3a9
--- /dev/null
+++ b/docs/js/vendor/jquery-3.5.1.min.js
@@ -0,0 +1,2 @@
+/*! jQuery v3.5.1 | (c) JS Foundation and other contributors | jquery.org/license */
+!function(e,t){"use strict";"object"==typeof module&&"object"==typeof module.exports?module.exports=e.document?t(e,!0):function(e){if(!e.document)throw new Error("jQuery requires a window with a document");return t(e)}:t(e)}("undefined"!=typeof window?window:this,function(C,e){"use strict";var t=[],r=Object.getPrototypeOf,s=t.slice,g=t.flat?function(e){return t.flat.call(e)}:function(e){return t.concat.apply([],e)},u=t.push,i=t.indexOf,n={},o=n.toString,v=n.hasOwnProperty,a=v.toString,l=a.call(Object),y={},m=function(e){return"function"==typeof e&&"number"!=typeof e.nodeType},x=function(e){return null!=e&&e===e.window},E=C.document,c={type:!0,src:!0,nonce:!0,noModule:!0};function b(e,t,n){var r,i,o=(n=n||E).createElement("script");if(o.text=e,t)for(r in c)(i=t[r]||t.getAttribute&&t.getAttribute(r))&&o.setAttribute(r,i);n.head.appendChild(o).parentNode.removeChild(o)}function w(e){return null==e?e+"":"object"==typeof e||"function"==typeof e?n[o.call(e)]||"object":typeof e}var f="3.5.1",S=function(e,t){return new S.fn.init(e,t)};function p(e){var t=!!e&&"length"in e&&e.length,n=w(e);return!m(e)&&!x(e)&&("array"===n||0===t||"number"==typeof t&&0<t&&t-1 in e)}S.fn=S.prototype={jquery:f,constructor:S,length:0,toArray:function(){return s.call(this)},get:function(e){return null==e?s.call(this):e<0?this[e+this.length]:this[e]},pushStack:function(e){var t=S.merge(this.constructor(),e);return t.prevObject=this,t},each:function(e){return S.each(this,e)},map:function(n){return this.pushStack(S.map(this,function(e,t){return n.call(e,t,e)}))},slice:function(){return this.pushStack(s.apply(this,arguments))},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},even:function(){return this.pushStack(S.grep(this,function(e,t){return(t+1)%2}))},odd:function(){return this.pushStack(S.grep(this,function(e,t){return t%2}))},eq:function(e){var t=this.length,n=+e+(e<0?t:0);return this.pushStack(0<=n&&n<t?[this[n]]:[])},end:function(){return this.prevObject||this.constructor()},push:u,sort:t.sort,splice:t.splice},S.extend=S.fn.extend=function(){var e,t,n,r,i,o,a=arguments[0]||{},s=1,u=arguments.length,l=!1;for("boolean"==typeof a&&(l=a,a=arguments[s]||{},s++),"object"==typeof a||m(a)||(a={}),s===u&&(a=this,s--);s<u;s++)if(null!=(e=arguments[s]))for(t in e)r=e[t],"__proto__"!==t&&a!==r&&(l&&r&&(S.isPlainObject(r)||(i=Array.isArray(r)))?(n=a[t],o=i&&!Array.isArray(n)?[]:i||S.isPlainObject(n)?n:{},i=!1,a[t]=S.extend(l,o,r)):void 0!==r&&(a[t]=r));return a},S.extend({expando:"jQuery"+(f+Math.random()).replace(/\D/g,""),isReady:!0,error:function(e){throw new Error(e)},noop:function(){},isPlainObject:function(e){var t,n;return!(!e||"[object Object]"!==o.call(e))&&(!(t=r(e))||"function"==typeof(n=v.call(t,"constructor")&&t.constructor)&&a.call(n)===l)},isEmptyObject:function(e){var t;for(t in e)return!1;return!0},globalEval:function(e,t,n){b(e,{nonce:t&&t.nonce},n)},each:function(e,t){var n,r=0;if(p(e)){for(n=e.length;r<n;r++)if(!1===t.call(e[r],r,e[r]))break}else for(r in e)if(!1===t.call(e[r],r,e[r]))break;return e},makeArray:function(e,t){var n=t||[];return null!=e&&(p(Object(e))?S.merge(n,"string"==typeof e?[e]:e):u.call(n,e)),n},inArray:function(e,t,n){return null==t?-1:i.call(t,e,n)},merge:function(e,t){for(var n=+t.length,r=0,i=e.length;r<n;r++)e[i++]=t[r];return e.length=i,e},grep:function(e,t,n){for(var r=[],i=0,o=e.length,a=!n;i<o;i++)!t(e[i],i)!==a&&r.push(e[i]);return r},map:function(e,t,n){var r,i,o=0,a=[];if(p(e))for(r=e.length;o<r;o++)null!=(i=t(e[o],o,n))&&a.push(i);else for(o in e)null!=(i=t(e[o],o,n))&&a.push(i);return g(a)},guid:1,support:y}),"function"==typeof Symbol&&(S.fn[Symbol.iterator]=t[Symbol.iterator]),S.each("Boolean Number String Function Array Date RegExp Object Error Symbol".split(" "),function(e,t){n["[object "+t+"]"]=t.toLowerCase()});var d=function(n){var e,d,b,o,i,h,f,g,w,u,l,T,C,a,E,v,s,c,y,S="sizzle"+1*new Date,p=n.document,k=0,r=0,m=ue(),x=ue(),A=ue(),N=ue(),D=function(e,t){return e===t&&(l=!0),0},j={}.hasOwnProperty,t=[],q=t.pop,L=t.push,H=t.push,O=t.slice,P=function(e,t){for(var n=0,r=e.length;n<r;n++)if(e[n]===t)return n;return-1},R="checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|ismap|loop|multiple|open|readonly|required|scoped",M="[\\x20\\t\\r\\n\\f]",I="(?:\\\\[\\da-fA-F]{1,6}"+M+"?|\\\\[^\\r\\n\\f]|[\\w-]|[^\0-\\x7f])+",W="\\["+M+"*("+I+")(?:"+M+"*([*^$|!~]?=)"+M+"*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|("+I+"))|)"+M+"*\\]",F=":("+I+")(?:\\((('((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\")|((?:\\\\.|[^\\\\()[\\]]|"+W+")*)|.*)\\)|)",B=new RegExp(M+"+","g"),$=new RegExp("^"+M+"+|((?:^|[^\\\\])(?:\\\\.)*)"+M+"+$","g"),_=new RegExp("^"+M+"*,"+M+"*"),z=new RegExp("^"+M+"*([>+~]|"+M+")"+M+"*"),U=new RegExp(M+"|>"),X=new RegExp(F),V=new RegExp("^"+I+"$"),G={ID:new RegExp("^#("+I+")"),CLASS:new RegExp("^\\.("+I+")"),TAG:new RegExp("^("+I+"|[*])"),ATTR:new RegExp("^"+W),PSEUDO:new RegExp("^"+F),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+M+"*(even|odd|(([+-]|)(\\d*)n|)"+M+"*(?:([+-]|)"+M+"*(\\d+)|))"+M+"*\\)|)","i"),bool:new RegExp("^(?:"+R+")$","i"),needsContext:new RegExp("^"+M+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+M+"*((?:-\\d)?\\d*)"+M+"*\\)|)(?=[^-]|$)","i")},Y=/HTML$/i,Q=/^(?:input|select|textarea|button)$/i,J=/^h\d$/i,K=/^[^{]+\{\s*\[native \w/,Z=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,ee=/[+~]/,te=new RegExp("\\\\[\\da-fA-F]{1,6}"+M+"?|\\\\([^\\r\\n\\f])","g"),ne=function(e,t){var n="0x"+e.slice(1)-65536;return t||(n<0?String.fromCharCode(n+65536):String.fromCharCode(n>>10|55296,1023&n|56320))},re=/([\0-\x1f\x7f]|^-?\d)|^-$|[^\0-\x1f\x7f-\uFFFF\w-]/g,ie=function(e,t){return t?"\0"===e?"\ufffd":e.slice(0,-1)+"\\"+e.charCodeAt(e.length-1).toString(16)+" ":"\\"+e},oe=function(){T()},ae=be(function(e){return!0===e.disabled&&"fieldset"===e.nodeName.toLowerCase()},{dir:"parentNode",next:"legend"});try{H.apply(t=O.call(p.childNodes),p.childNodes),t[p.childNodes.length].nodeType}catch(e){H={apply:t.length?function(e,t){L.apply(e,O.call(t))}:function(e,t){var n=e.length,r=0;while(e[n++]=t[r++]);e.length=n-1}}}function se(t,e,n,r){var i,o,a,s,u,l,c,f=e&&e.ownerDocument,p=e?e.nodeType:9;if(n=n||[],"string"!=typeof t||!t||1!==p&&9!==p&&11!==p)return n;if(!r&&(T(e),e=e||C,E)){if(11!==p&&(u=Z.exec(t)))if(i=u[1]){if(9===p){if(!(a=e.getElementById(i)))return n;if(a.id===i)return n.push(a),n}else if(f&&(a=f.getElementById(i))&&y(e,a)&&a.id===i)return n.push(a),n}else{if(u[2])return H.apply(n,e.getElementsByTagName(t)),n;if((i=u[3])&&d.getElementsByClassName&&e.getElementsByClassName)return H.apply(n,e.getElementsByClassName(i)),n}if(d.qsa&&!N[t+" "]&&(!v||!v.test(t))&&(1!==p||"object"!==e.nodeName.toLowerCase())){if(c=t,f=e,1===p&&(U.test(t)||z.test(t))){(f=ee.test(t)&&ye(e.parentNode)||e)===e&&d.scope||((s=e.getAttribute("id"))?s=s.replace(re,ie):e.setAttribute("id",s=S)),o=(l=h(t)).length;while(o--)l[o]=(s?"#"+s:":scope")+" "+xe(l[o]);c=l.join(",")}try{return H.apply(n,f.querySelectorAll(c)),n}catch(e){N(t,!0)}finally{s===S&&e.removeAttribute("id")}}}return g(t.replace($,"$1"),e,n,r)}function ue(){var r=[];return function e(t,n){return r.push(t+" ")>b.cacheLength&&delete e[r.shift()],e[t+" "]=n}}function le(e){return e[S]=!0,e}function ce(e){var t=C.createElement("fieldset");try{return!!e(t)}catch(e){return!1}finally{t.parentNode&&t.parentNode.removeChild(t),t=null}}function fe(e,t){var n=e.split("|"),r=n.length;while(r--)b.attrHandle[n[r]]=t}function pe(e,t){var n=t&&e,r=n&&1===e.nodeType&&1===t.nodeType&&e.sourceIndex-t.sourceIndex;if(r)return r;if(n)while(n=n.nextSibling)if(n===t)return-1;return e?1:-1}function de(t){return function(e){return"input"===e.nodeName.toLowerCase()&&e.type===t}}function he(n){return function(e){var t=e.nodeName.toLowerCase();return("input"===t||"button"===t)&&e.type===n}}function ge(t){return function(e){return"form"in e?e.parentNode&&!1===e.disabled?"label"in e?"label"in e.parentNode?e.parentNode.disabled===t:e.disabled===t:e.isDisabled===t||e.isDisabled!==!t&&ae(e)===t:e.disabled===t:"label"in e&&e.disabled===t}}function ve(a){return le(function(o){return o=+o,le(function(e,t){var n,r=a([],e.length,o),i=r.length;while(i--)e[n=r[i]]&&(e[n]=!(t[n]=e[n]))})})}function ye(e){return e&&"undefined"!=typeof e.getElementsByTagName&&e}for(e in d=se.support={},i=se.isXML=function(e){var t=e.namespaceURI,n=(e.ownerDocument||e).documentElement;return!Y.test(t||n&&n.nodeName||"HTML")},T=se.setDocument=function(e){var t,n,r=e?e.ownerDocument||e:p;return r!=C&&9===r.nodeType&&r.documentElement&&(a=(C=r).documentElement,E=!i(C),p!=C&&(n=C.defaultView)&&n.top!==n&&(n.addEventListener?n.addEventListener("unload",oe,!1):n.attachEvent&&n.attachEvent("onunload",oe)),d.scope=ce(function(e){return a.appendChild(e).appendChild(C.createElement("div")),"undefined"!=typeof e.querySelectorAll&&!e.querySelectorAll(":scope fieldset div").length}),d.attributes=ce(function(e){return e.className="i",!e.getAttribute("className")}),d.getElementsByTagName=ce(function(e){return e.appendChild(C.createComment("")),!e.getElementsByTagName("*").length}),d.getElementsByClassName=K.test(C.getElementsByClassName),d.getById=ce(function(e){return a.appendChild(e).id=S,!C.getElementsByName||!C.getElementsByName(S).length}),d.getById?(b.filter.ID=function(e){var t=e.replace(te,ne);return function(e){return e.getAttribute("id")===t}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n=t.getElementById(e);return n?[n]:[]}}):(b.filter.ID=function(e){var n=e.replace(te,ne);return function(e){var t="undefined"!=typeof e.getAttributeNode&&e.getAttributeNode("id");return t&&t.value===n}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n,r,i,o=t.getElementById(e);if(o){if((n=o.getAttributeNode("id"))&&n.value===e)return[o];i=t.getElementsByName(e),r=0;while(o=i[r++])if((n=o.getAttributeNode("id"))&&n.value===e)return[o]}return[]}}),b.find.TAG=d.getElementsByTagName?function(e,t){return"undefined"!=typeof t.getElementsByTagName?t.getElementsByTagName(e):d.qsa?t.querySelectorAll(e):void 0}:function(e,t){var n,r=[],i=0,o=t.getElementsByTagName(e);if("*"===e){while(n=o[i++])1===n.nodeType&&r.push(n);return r}return o},b.find.CLASS=d.getElementsByClassName&&function(e,t){if("undefined"!=typeof t.getElementsByClassName&&E)return t.getElementsByClassName(e)},s=[],v=[],(d.qsa=K.test(C.querySelectorAll))&&(ce(function(e){var t;a.appendChild(e).innerHTML="<a id='"+S+"'></a><select id='"+S+"-\r\\' msallowcapture=''><option selected=''></option></select>",e.querySelectorAll("[msallowcapture^='']").length&&v.push("[*^$]="+M+"*(?:''|\"\")"),e.querySelectorAll("[selected]").length||v.push("\\["+M+"*(?:value|"+R+")"),e.querySelectorAll("[id~="+S+"-]").length||v.push("~="),(t=C.createElement("input")).setAttribute("name",""),e.appendChild(t),e.querySelectorAll("[name='']").length||v.push("\\["+M+"*name"+M+"*="+M+"*(?:''|\"\")"),e.querySelectorAll(":checked").length||v.push(":checked"),e.querySelectorAll("a#"+S+"+*").length||v.push(".#.+[+~]"),e.querySelectorAll("\\\f"),v.push("[\\r\\n\\f]")}),ce(function(e){e.innerHTML="<a href='' disabled='disabled'></a><select disabled='disabled'><option/></select>";var t=C.createElement("input");t.setAttribute("type","hidden"),e.appendChild(t).setAttribute("name","D"),e.querySelectorAll("[name=d]").length&&v.push("name"+M+"*[*^$|!~]?="),2!==e.querySelectorAll(":enabled").length&&v.push(":enabled",":disabled"),a.appendChild(e).disabled=!0,2!==e.querySelectorAll(":disabled").length&&v.push(":enabled",":disabled"),e.querySelectorAll("*,:x"),v.push(",.*:")})),(d.matchesSelector=K.test(c=a.matches||a.webkitMatchesSelector||a.mozMatchesSelector||a.oMatchesSelector||a.msMatchesSelector))&&ce(function(e){d.disconnectedMatch=c.call(e,"*"),c.call(e,"[s!='']:x"),s.push("!=",F)}),v=v.length&&new RegExp(v.join("|")),s=s.length&&new RegExp(s.join("|")),t=K.test(a.compareDocumentPosition),y=t||K.test(a.contains)?function(e,t){var n=9===e.nodeType?e.documentElement:e,r=t&&t.parentNode;return e===r||!(!r||1!==r.nodeType||!(n.contains?n.contains(r):e.compareDocumentPosition&&16&e.compareDocumentPosition(r)))}:function(e,t){if(t)while(t=t.parentNode)if(t===e)return!0;return!1},D=t?function(e,t){if(e===t)return l=!0,0;var n=!e.compareDocumentPosition-!t.compareDocumentPosition;return n||(1&(n=(e.ownerDocument||e)==(t.ownerDocument||t)?e.compareDocumentPosition(t):1)||!d.sortDetached&&t.compareDocumentPosition(e)===n?e==C||e.ownerDocument==p&&y(p,e)?-1:t==C||t.ownerDocument==p&&y(p,t)?1:u?P(u,e)-P(u,t):0:4&n?-1:1)}:function(e,t){if(e===t)return l=!0,0;var n,r=0,i=e.parentNode,o=t.parentNode,a=[e],s=[t];if(!i||!o)return e==C?-1:t==C?1:i?-1:o?1:u?P(u,e)-P(u,t):0;if(i===o)return pe(e,t);n=e;while(n=n.parentNode)a.unshift(n);n=t;while(n=n.parentNode)s.unshift(n);while(a[r]===s[r])r++;return r?pe(a[r],s[r]):a[r]==p?-1:s[r]==p?1:0}),C},se.matches=function(e,t){return se(e,null,null,t)},se.matchesSelector=function(e,t){if(T(e),d.matchesSelector&&E&&!N[t+" "]&&(!s||!s.test(t))&&(!v||!v.test(t)))try{var n=c.call(e,t);if(n||d.disconnectedMatch||e.document&&11!==e.document.nodeType)return n}catch(e){N(t,!0)}return 0<se(t,C,null,[e]).length},se.contains=function(e,t){return(e.ownerDocument||e)!=C&&T(e),y(e,t)},se.attr=function(e,t){(e.ownerDocument||e)!=C&&T(e);var n=b.attrHandle[t.toLowerCase()],r=n&&j.call(b.attrHandle,t.toLowerCase())?n(e,t,!E):void 0;return void 0!==r?r:d.attributes||!E?e.getAttribute(t):(r=e.getAttributeNode(t))&&r.specified?r.value:null},se.escape=function(e){return(e+"").replace(re,ie)},se.error=function(e){throw new Error("Syntax error, unrecognized expression: "+e)},se.uniqueSort=function(e){var t,n=[],r=0,i=0;if(l=!d.detectDuplicates,u=!d.sortStable&&e.slice(0),e.sort(D),l){while(t=e[i++])t===e[i]&&(r=n.push(i));while(r--)e.splice(n[r],1)}return u=null,e},o=se.getText=function(e){var t,n="",r=0,i=e.nodeType;if(i){if(1===i||9===i||11===i){if("string"==typeof e.textContent)return e.textContent;for(e=e.firstChild;e;e=e.nextSibling)n+=o(e)}else if(3===i||4===i)return e.nodeValue}else while(t=e[r++])n+=o(t);return n},(b=se.selectors={cacheLength:50,createPseudo:le,match:G,attrHandle:{},find:{},relative:{">":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(e){return e[1]=e[1].replace(te,ne),e[3]=(e[3]||e[4]||e[5]||"").replace(te,ne),"~="===e[2]&&(e[3]=" "+e[3]+" "),e.slice(0,4)},CHILD:function(e){return e[1]=e[1].toLowerCase(),"nth"===e[1].slice(0,3)?(e[3]||se.error(e[0]),e[4]=+(e[4]?e[5]+(e[6]||1):2*("even"===e[3]||"odd"===e[3])),e[5]=+(e[7]+e[8]||"odd"===e[3])):e[3]&&se.error(e[0]),e},PSEUDO:function(e){var t,n=!e[6]&&e[2];return G.CHILD.test(e[0])?null:(e[3]?e[2]=e[4]||e[5]||"":n&&X.test(n)&&(t=h(n,!0))&&(t=n.indexOf(")",n.length-t)-n.length)&&(e[0]=e[0].slice(0,t),e[2]=n.slice(0,t)),e.slice(0,3))}},filter:{TAG:function(e){var t=e.replace(te,ne).toLowerCase();return"*"===e?function(){return!0}:function(e){return e.nodeName&&e.nodeName.toLowerCase()===t}},CLASS:function(e){var t=m[e+" "];return t||(t=new RegExp("(^|"+M+")"+e+"("+M+"|$)"))&&m(e,function(e){return t.test("string"==typeof e.className&&e.className||"undefined"!=typeof e.getAttribute&&e.getAttribute("class")||"")})},ATTR:function(n,r,i){return function(e){var t=se.attr(e,n);return null==t?"!="===r:!r||(t+="","="===r?t===i:"!="===r?t!==i:"^="===r?i&&0===t.indexOf(i):"*="===r?i&&-1<t.indexOf(i):"$="===r?i&&t.slice(-i.length)===i:"~="===r?-1<(" "+t.replace(B," ")+" ").indexOf(i):"|="===r&&(t===i||t.slice(0,i.length+1)===i+"-"))}},CHILD:function(h,e,t,g,v){var y="nth"!==h.slice(0,3),m="last"!==h.slice(-4),x="of-type"===e;return 1===g&&0===v?function(e){return!!e.parentNode}:function(e,t,n){var r,i,o,a,s,u,l=y!==m?"nextSibling":"previousSibling",c=e.parentNode,f=x&&e.nodeName.toLowerCase(),p=!n&&!x,d=!1;if(c){if(y){while(l){a=e;while(a=a[l])if(x?a.nodeName.toLowerCase()===f:1===a.nodeType)return!1;u=l="only"===h&&!u&&"nextSibling"}return!0}if(u=[m?c.firstChild:c.lastChild],m&&p){d=(s=(r=(i=(o=(a=c)[S]||(a[S]={}))[a.uniqueID]||(o[a.uniqueID]={}))[h]||[])[0]===k&&r[1])&&r[2],a=s&&c.childNodes[s];while(a=++s&&a&&a[l]||(d=s=0)||u.pop())if(1===a.nodeType&&++d&&a===e){i[h]=[k,s,d];break}}else if(p&&(d=s=(r=(i=(o=(a=e)[S]||(a[S]={}))[a.uniqueID]||(o[a.uniqueID]={}))[h]||[])[0]===k&&r[1]),!1===d)while(a=++s&&a&&a[l]||(d=s=0)||u.pop())if((x?a.nodeName.toLowerCase()===f:1===a.nodeType)&&++d&&(p&&((i=(o=a[S]||(a[S]={}))[a.uniqueID]||(o[a.uniqueID]={}))[h]=[k,d]),a===e))break;return(d-=v)===g||d%g==0&&0<=d/g}}},PSEUDO:function(e,o){var t,a=b.pseudos[e]||b.setFilters[e.toLowerCase()]||se.error("unsupported pseudo: "+e);return a[S]?a(o):1<a.length?(t=[e,e,"",o],b.setFilters.hasOwnProperty(e.toLowerCase())?le(function(e,t){var n,r=a(e,o),i=r.length;while(i--)e[n=P(e,r[i])]=!(t[n]=r[i])}):function(e){return a(e,0,t)}):a}},pseudos:{not:le(function(e){var r=[],i=[],s=f(e.replace($,"$1"));return s[S]?le(function(e,t,n,r){var i,o=s(e,null,r,[]),a=e.length;while(a--)(i=o[a])&&(e[a]=!(t[a]=i))}):function(e,t,n){return r[0]=e,s(r,null,n,i),r[0]=null,!i.pop()}}),has:le(function(t){return function(e){return 0<se(t,e).length}}),contains:le(function(t){return t=t.replace(te,ne),function(e){return-1<(e.textContent||o(e)).indexOf(t)}}),lang:le(function(n){return V.test(n||"")||se.error("unsupported lang: "+n),n=n.replace(te,ne).toLowerCase(),function(e){var t;do{if(t=E?e.lang:e.getAttribute("xml:lang")||e.getAttribute("lang"))return(t=t.toLowerCase())===n||0===t.indexOf(n+"-")}while((e=e.parentNode)&&1===e.nodeType);return!1}}),target:function(e){var t=n.location&&n.location.hash;return t&&t.slice(1)===e.id},root:function(e){return e===a},focus:function(e){return e===C.activeElement&&(!C.hasFocus||C.hasFocus())&&!!(e.type||e.href||~e.tabIndex)},enabled:ge(!1),disabled:ge(!0),checked:function(e){var t=e.nodeName.toLowerCase();return"input"===t&&!!e.checked||"option"===t&&!!e.selected},selected:function(e){return e.parentNode&&e.parentNode.selectedIndex,!0===e.selected},empty:function(e){for(e=e.firstChild;e;e=e.nextSibling)if(e.nodeType<6)return!1;return!0},parent:function(e){return!b.pseudos.empty(e)},header:function(e){return J.test(e.nodeName)},input:function(e){return Q.test(e.nodeName)},button:function(e){var t=e.nodeName.toLowerCase();return"input"===t&&"button"===e.type||"button"===t},text:function(e){var t;return"input"===e.nodeName.toLowerCase()&&"text"===e.type&&(null==(t=e.getAttribute("type"))||"text"===t.toLowerCase())},first:ve(function(){return[0]}),last:ve(function(e,t){return[t-1]}),eq:ve(function(e,t,n){return[n<0?n+t:n]}),even:ve(function(e,t){for(var n=0;n<t;n+=2)e.push(n);return e}),odd:ve(function(e,t){for(var n=1;n<t;n+=2)e.push(n);return e}),lt:ve(function(e,t,n){for(var r=n<0?n+t:t<n?t:n;0<=--r;)e.push(r);return e}),gt:ve(function(e,t,n){for(var r=n<0?n+t:n;++r<t;)e.push(r);return e})}}).pseudos.nth=b.pseudos.eq,{radio:!0,checkbox:!0,file:!0,password:!0,image:!0})b.pseudos[e]=de(e);for(e in{submit:!0,reset:!0})b.pseudos[e]=he(e);function me(){}function xe(e){for(var t=0,n=e.length,r="";t<n;t++)r+=e[t].value;return r}function be(s,e,t){var u=e.dir,l=e.next,c=l||u,f=t&&"parentNode"===c,p=r++;return e.first?function(e,t,n){while(e=e[u])if(1===e.nodeType||f)return s(e,t,n);return!1}:function(e,t,n){var r,i,o,a=[k,p];if(n){while(e=e[u])if((1===e.nodeType||f)&&s(e,t,n))return!0}else while(e=e[u])if(1===e.nodeType||f)if(i=(o=e[S]||(e[S]={}))[e.uniqueID]||(o[e.uniqueID]={}),l&&l===e.nodeName.toLowerCase())e=e[u]||e;else{if((r=i[c])&&r[0]===k&&r[1]===p)return a[2]=r[2];if((i[c]=a)[2]=s(e,t,n))return!0}return!1}}function we(i){return 1<i.length?function(e,t,n){var r=i.length;while(r--)if(!i[r](e,t,n))return!1;return!0}:i[0]}function Te(e,t,n,r,i){for(var o,a=[],s=0,u=e.length,l=null!=t;s<u;s++)(o=e[s])&&(n&&!n(o,r,i)||(a.push(o),l&&t.push(s)));return a}function Ce(d,h,g,v,y,e){return v&&!v[S]&&(v=Ce(v)),y&&!y[S]&&(y=Ce(y,e)),le(function(e,t,n,r){var i,o,a,s=[],u=[],l=t.length,c=e||function(e,t,n){for(var r=0,i=t.length;r<i;r++)se(e,t[r],n);return n}(h||"*",n.nodeType?[n]:n,[]),f=!d||!e&&h?c:Te(c,s,d,n,r),p=g?y||(e?d:l||v)?[]:t:f;if(g&&g(f,p,n,r),v){i=Te(p,u),v(i,[],n,r),o=i.length;while(o--)(a=i[o])&&(p[u[o]]=!(f[u[o]]=a))}if(e){if(y||d){if(y){i=[],o=p.length;while(o--)(a=p[o])&&i.push(f[o]=a);y(null,p=[],i,r)}o=p.length;while(o--)(a=p[o])&&-1<(i=y?P(e,a):s[o])&&(e[i]=!(t[i]=a))}}else p=Te(p===t?p.splice(l,p.length):p),y?y(null,t,p,r):H.apply(t,p)})}function Ee(e){for(var i,t,n,r=e.length,o=b.relative[e[0].type],a=o||b.relative[" "],s=o?1:0,u=be(function(e){return e===i},a,!0),l=be(function(e){return-1<P(i,e)},a,!0),c=[function(e,t,n){var r=!o&&(n||t!==w)||((i=t).nodeType?u(e,t,n):l(e,t,n));return i=null,r}];s<r;s++)if(t=b.relative[e[s].type])c=[be(we(c),t)];else{if((t=b.filter[e[s].type].apply(null,e[s].matches))[S]){for(n=++s;n<r;n++)if(b.relative[e[n].type])break;return Ce(1<s&&we(c),1<s&&xe(e.slice(0,s-1).concat({value:" "===e[s-2].type?"*":""})).replace($,"$1"),t,s<n&&Ee(e.slice(s,n)),n<r&&Ee(e=e.slice(n)),n<r&&xe(e))}c.push(t)}return we(c)}return me.prototype=b.filters=b.pseudos,b.setFilters=new me,h=se.tokenize=function(e,t){var n,r,i,o,a,s,u,l=x[e+" "];if(l)return t?0:l.slice(0);a=e,s=[],u=b.preFilter;while(a){for(o in n&&!(r=_.exec(a))||(r&&(a=a.slice(r[0].length)||a),s.push(i=[])),n=!1,(r=z.exec(a))&&(n=r.shift(),i.push({value:n,type:r[0].replace($," ")}),a=a.slice(n.length)),b.filter)!(r=G[o].exec(a))||u[o]&&!(r=u[o](r))||(n=r.shift(),i.push({value:n,type:o,matches:r}),a=a.slice(n.length));if(!n)break}return t?a.length:a?se.error(e):x(e,s).slice(0)},f=se.compile=function(e,t){var n,v,y,m,x,r,i=[],o=[],a=A[e+" "];if(!a){t||(t=h(e)),n=t.length;while(n--)(a=Ee(t[n]))[S]?i.push(a):o.push(a);(a=A(e,(v=o,m=0<(y=i).length,x=0<v.length,r=function(e,t,n,r,i){var o,a,s,u=0,l="0",c=e&&[],f=[],p=w,d=e||x&&b.find.TAG("*",i),h=k+=null==p?1:Math.random()||.1,g=d.length;for(i&&(w=t==C||t||i);l!==g&&null!=(o=d[l]);l++){if(x&&o){a=0,t||o.ownerDocument==C||(T(o),n=!E);while(s=v[a++])if(s(o,t||C,n)){r.push(o);break}i&&(k=h)}m&&((o=!s&&o)&&u--,e&&c.push(o))}if(u+=l,m&&l!==u){a=0;while(s=y[a++])s(c,f,t,n);if(e){if(0<u)while(l--)c[l]||f[l]||(f[l]=q.call(r));f=Te(f)}H.apply(r,f),i&&!e&&0<f.length&&1<u+y.length&&se.uniqueSort(r)}return i&&(k=h,w=p),c},m?le(r):r))).selector=e}return a},g=se.select=function(e,t,n,r){var i,o,a,s,u,l="function"==typeof e&&e,c=!r&&h(e=l.selector||e);if(n=n||[],1===c.length){if(2<(o=c[0]=c[0].slice(0)).length&&"ID"===(a=o[0]).type&&9===t.nodeType&&E&&b.relative[o[1].type]){if(!(t=(b.find.ID(a.matches[0].replace(te,ne),t)||[])[0]))return n;l&&(t=t.parentNode),e=e.slice(o.shift().value.length)}i=G.needsContext.test(e)?0:o.length;while(i--){if(a=o[i],b.relative[s=a.type])break;if((u=b.find[s])&&(r=u(a.matches[0].replace(te,ne),ee.test(o[0].type)&&ye(t.parentNode)||t))){if(o.splice(i,1),!(e=r.length&&xe(o)))return H.apply(n,r),n;break}}}return(l||f(e,c))(r,t,!E,n,!t||ee.test(e)&&ye(t.parentNode)||t),n},d.sortStable=S.split("").sort(D).join("")===S,d.detectDuplicates=!!l,T(),d.sortDetached=ce(function(e){return 1&e.compareDocumentPosition(C.createElement("fieldset"))}),ce(function(e){return e.innerHTML="<a href='#'></a>","#"===e.firstChild.getAttribute("href")})||fe("type|href|height|width",function(e,t,n){if(!n)return e.getAttribute(t,"type"===t.toLowerCase()?1:2)}),d.attributes&&ce(function(e){return e.innerHTML="<input/>",e.firstChild.setAttribute("value",""),""===e.firstChild.getAttribute("value")})||fe("value",function(e,t,n){if(!n&&"input"===e.nodeName.toLowerCase())return e.defaultValue}),ce(function(e){return null==e.getAttribute("disabled")})||fe(R,function(e,t,n){var r;if(!n)return!0===e[t]?t.toLowerCase():(r=e.getAttributeNode(t))&&r.specified?r.value:null}),se}(C);S.find=d,S.expr=d.selectors,S.expr[":"]=S.expr.pseudos,S.uniqueSort=S.unique=d.uniqueSort,S.text=d.getText,S.isXMLDoc=d.isXML,S.contains=d.contains,S.escapeSelector=d.escape;var h=function(e,t,n){var r=[],i=void 0!==n;while((e=e[t])&&9!==e.nodeType)if(1===e.nodeType){if(i&&S(e).is(n))break;r.push(e)}return r},T=function(e,t){for(var n=[];e;e=e.nextSibling)1===e.nodeType&&e!==t&&n.push(e);return n},k=S.expr.match.needsContext;function A(e,t){return e.nodeName&&e.nodeName.toLowerCase()===t.toLowerCase()}var N=/^<([a-z][^\/\0>:\x20\t\r\n\f]*)[\x20\t\r\n\f]*\/?>(?:<\/\1>|)$/i;function D(e,n,r){return m(n)?S.grep(e,function(e,t){return!!n.call(e,t,e)!==r}):n.nodeType?S.grep(e,function(e){return e===n!==r}):"string"!=typeof n?S.grep(e,function(e){return-1<i.call(n,e)!==r}):S.filter(n,e,r)}S.filter=function(e,t,n){var r=t[0];return n&&(e=":not("+e+")"),1===t.length&&1===r.nodeType?S.find.matchesSelector(r,e)?[r]:[]:S.find.matches(e,S.grep(t,function(e){return 1===e.nodeType}))},S.fn.extend({find:function(e){var t,n,r=this.length,i=this;if("string"!=typeof e)return this.pushStack(S(e).filter(function(){for(t=0;t<r;t++)if(S.contains(i[t],this))return!0}));for(n=this.pushStack([]),t=0;t<r;t++)S.find(e,i[t],n);return 1<r?S.uniqueSort(n):n},filter:function(e){return this.pushStack(D(this,e||[],!1))},not:function(e){return this.pushStack(D(this,e||[],!0))},is:function(e){return!!D(this,"string"==typeof e&&k.test(e)?S(e):e||[],!1).length}});var j,q=/^(?:\s*(<[\w\W]+>)[^>]*|#([\w-]+))$/;(S.fn.init=function(e,t,n){var r,i;if(!e)return this;if(n=n||j,"string"==typeof e){if(!(r="<"===e[0]&&">"===e[e.length-1]&&3<=e.length?[null,e,null]:q.exec(e))||!r[1]&&t)return!t||t.jquery?(t||n).find(e):this.constructor(t).find(e);if(r[1]){if(t=t instanceof S?t[0]:t,S.merge(this,S.parseHTML(r[1],t&&t.nodeType?t.ownerDocument||t:E,!0)),N.test(r[1])&&S.isPlainObject(t))for(r in t)m(this[r])?this[r](t[r]):this.attr(r,t[r]);return this}return(i=E.getElementById(r[2]))&&(this[0]=i,this.length=1),this}return e.nodeType?(this[0]=e,this.length=1,this):m(e)?void 0!==n.ready?n.ready(e):e(S):S.makeArray(e,this)}).prototype=S.fn,j=S(E);var L=/^(?:parents|prev(?:Until|All))/,H={children:!0,contents:!0,next:!0,prev:!0};function O(e,t){while((e=e[t])&&1!==e.nodeType);return e}S.fn.extend({has:function(e){var t=S(e,this),n=t.length;return this.filter(function(){for(var e=0;e<n;e++)if(S.contains(this,t[e]))return!0})},closest:function(e,t){var n,r=0,i=this.length,o=[],a="string"!=typeof e&&S(e);if(!k.test(e))for(;r<i;r++)for(n=this[r];n&&n!==t;n=n.parentNode)if(n.nodeType<11&&(a?-1<a.index(n):1===n.nodeType&&S.find.matchesSelector(n,e))){o.push(n);break}return this.pushStack(1<o.length?S.uniqueSort(o):o)},index:function(e){return e?"string"==typeof e?i.call(S(e),this[0]):i.call(this,e.jquery?e[0]:e):this[0]&&this[0].parentNode?this.first().prevAll().length:-1},add:function(e,t){return this.pushStack(S.uniqueSort(S.merge(this.get(),S(e,t))))},addBack:function(e){return this.add(null==e?this.prevObject:this.prevObject.filter(e))}}),S.each({parent:function(e){var t=e.parentNode;return t&&11!==t.nodeType?t:null},parents:function(e){return h(e,"parentNode")},parentsUntil:function(e,t,n){return h(e,"parentNode",n)},next:function(e){return O(e,"nextSibling")},prev:function(e){return O(e,"previousSibling")},nextAll:function(e){return h(e,"nextSibling")},prevAll:function(e){return h(e,"previousSibling")},nextUntil:function(e,t,n){return h(e,"nextSibling",n)},prevUntil:function(e,t,n){return h(e,"previousSibling",n)},siblings:function(e){return T((e.parentNode||{}).firstChild,e)},children:function(e){return T(e.firstChild)},contents:function(e){return null!=e.contentDocument&&r(e.contentDocument)?e.contentDocument:(A(e,"template")&&(e=e.content||e),S.merge([],e.childNodes))}},function(r,i){S.fn[r]=function(e,t){var n=S.map(this,i,e);return"Until"!==r.slice(-5)&&(t=e),t&&"string"==typeof t&&(n=S.filter(t,n)),1<this.length&&(H[r]||S.uniqueSort(n),L.test(r)&&n.reverse()),this.pushStack(n)}});var P=/[^\x20\t\r\n\f]+/g;function R(e){return e}function M(e){throw e}function I(e,t,n,r){var i;try{e&&m(i=e.promise)?i.call(e).done(t).fail(n):e&&m(i=e.then)?i.call(e,t,n):t.apply(void 0,[e].slice(r))}catch(e){n.apply(void 0,[e])}}S.Callbacks=function(r){var e,n;r="string"==typeof r?(e=r,n={},S.each(e.match(P)||[],function(e,t){n[t]=!0}),n):S.extend({},r);var i,t,o,a,s=[],u=[],l=-1,c=function(){for(a=a||r.once,o=i=!0;u.length;l=-1){t=u.shift();while(++l<s.length)!1===s[l].apply(t[0],t[1])&&r.stopOnFalse&&(l=s.length,t=!1)}r.memory||(t=!1),i=!1,a&&(s=t?[]:"")},f={add:function(){return s&&(t&&!i&&(l=s.length-1,u.push(t)),function n(e){S.each(e,function(e,t){m(t)?r.unique&&f.has(t)||s.push(t):t&&t.length&&"string"!==w(t)&&n(t)})}(arguments),t&&!i&&c()),this},remove:function(){return S.each(arguments,function(e,t){var n;while(-1<(n=S.inArray(t,s,n)))s.splice(n,1),n<=l&&l--}),this},has:function(e){return e?-1<S.inArray(e,s):0<s.length},empty:function(){return s&&(s=[]),this},disable:function(){return a=u=[],s=t="",this},disabled:function(){return!s},lock:function(){return a=u=[],t||i||(s=t=""),this},locked:function(){return!!a},fireWith:function(e,t){return a||(t=[e,(t=t||[]).slice?t.slice():t],u.push(t),i||c()),this},fire:function(){return f.fireWith(this,arguments),this},fired:function(){return!!o}};return f},S.extend({Deferred:function(e){var o=[["notify","progress",S.Callbacks("memory"),S.Callbacks("memory"),2],["resolve","done",S.Callbacks("once memory"),S.Callbacks("once memory"),0,"resolved"],["reject","fail",S.Callbacks("once memory"),S.Callbacks("once memory"),1,"rejected"]],i="pending",a={state:function(){return i},always:function(){return s.done(arguments).fail(arguments),this},"catch":function(e){return a.then(null,e)},pipe:function(){var i=arguments;return S.Deferred(function(r){S.each(o,function(e,t){var n=m(i[t[4]])&&i[t[4]];s[t[1]](function(){var e=n&&n.apply(this,arguments);e&&m(e.promise)?e.promise().progress(r.notify).done(r.resolve).fail(r.reject):r[t[0]+"With"](this,n?[e]:arguments)})}),i=null}).promise()},then:function(t,n,r){var u=0;function l(i,o,a,s){return function(){var n=this,r=arguments,e=function(){var e,t;if(!(i<u)){if((e=a.apply(n,r))===o.promise())throw new TypeError("Thenable self-resolution");t=e&&("object"==typeof e||"function"==typeof e)&&e.then,m(t)?s?t.call(e,l(u,o,R,s),l(u,o,M,s)):(u++,t.call(e,l(u,o,R,s),l(u,o,M,s),l(u,o,R,o.notifyWith))):(a!==R&&(n=void 0,r=[e]),(s||o.resolveWith)(n,r))}},t=s?e:function(){try{e()}catch(e){S.Deferred.exceptionHook&&S.Deferred.exceptionHook(e,t.stackTrace),u<=i+1&&(a!==M&&(n=void 0,r=[e]),o.rejectWith(n,r))}};i?t():(S.Deferred.getStackHook&&(t.stackTrace=S.Deferred.getStackHook()),C.setTimeout(t))}}return S.Deferred(function(e){o[0][3].add(l(0,e,m(r)?r:R,e.notifyWith)),o[1][3].add(l(0,e,m(t)?t:R)),o[2][3].add(l(0,e,m(n)?n:M))}).promise()},promise:function(e){return null!=e?S.extend(e,a):a}},s={};return S.each(o,function(e,t){var n=t[2],r=t[5];a[t[1]]=n.add,r&&n.add(function(){i=r},o[3-e][2].disable,o[3-e][3].disable,o[0][2].lock,o[0][3].lock),n.add(t[3].fire),s[t[0]]=function(){return s[t[0]+"With"](this===s?void 0:this,arguments),this},s[t[0]+"With"]=n.fireWith}),a.promise(s),e&&e.call(s,s),s},when:function(e){var n=arguments.length,t=n,r=Array(t),i=s.call(arguments),o=S.Deferred(),a=function(t){return function(e){r[t]=this,i[t]=1<arguments.length?s.call(arguments):e,--n||o.resolveWith(r,i)}};if(n<=1&&(I(e,o.done(a(t)).resolve,o.reject,!n),"pending"===o.state()||m(i[t]&&i[t].then)))return o.then();while(t--)I(i[t],a(t),o.reject);return o.promise()}});var W=/^(Eval|Internal|Range|Reference|Syntax|Type|URI)Error$/;S.Deferred.exceptionHook=function(e,t){C.console&&C.console.warn&&e&&W.test(e.name)&&C.console.warn("jQuery.Deferred exception: "+e.message,e.stack,t)},S.readyException=function(e){C.setTimeout(function(){throw e})};var F=S.Deferred();function B(){E.removeEventListener("DOMContentLoaded",B),C.removeEventListener("load",B),S.ready()}S.fn.ready=function(e){return F.then(e)["catch"](function(e){S.readyException(e)}),this},S.extend({isReady:!1,readyWait:1,ready:function(e){(!0===e?--S.readyWait:S.isReady)||(S.isReady=!0)!==e&&0<--S.readyWait||F.resolveWith(E,[S])}}),S.ready.then=F.then,"complete"===E.readyState||"loading"!==E.readyState&&!E.documentElement.doScroll?C.setTimeout(S.ready):(E.addEventListener("DOMContentLoaded",B),C.addEventListener("load",B));var $=function(e,t,n,r,i,o,a){var s=0,u=e.length,l=null==n;if("object"===w(n))for(s in i=!0,n)$(e,t,s,n[s],!0,o,a);else if(void 0!==r&&(i=!0,m(r)||(a=!0),l&&(a?(t.call(e,r),t=null):(l=t,t=function(e,t,n){return l.call(S(e),n)})),t))for(;s<u;s++)t(e[s],n,a?r:r.call(e[s],s,t(e[s],n)));return i?e:l?t.call(e):u?t(e[0],n):o},_=/^-ms-/,z=/-([a-z])/g;function U(e,t){return t.toUpperCase()}function X(e){return e.replace(_,"ms-").replace(z,U)}var V=function(e){return 1===e.nodeType||9===e.nodeType||!+e.nodeType};function G(){this.expando=S.expando+G.uid++}G.uid=1,G.prototype={cache:function(e){var t=e[this.expando];return t||(t={},V(e)&&(e.nodeType?e[this.expando]=t:Object.defineProperty(e,this.expando,{value:t,configurable:!0}))),t},set:function(e,t,n){var r,i=this.cache(e);if("string"==typeof t)i[X(t)]=n;else for(r in t)i[X(r)]=t[r];return i},get:function(e,t){return void 0===t?this.cache(e):e[this.expando]&&e[this.expando][X(t)]},access:function(e,t,n){return void 0===t||t&&"string"==typeof t&&void 0===n?this.get(e,t):(this.set(e,t,n),void 0!==n?n:t)},remove:function(e,t){var n,r=e[this.expando];if(void 0!==r){if(void 0!==t){n=(t=Array.isArray(t)?t.map(X):(t=X(t))in r?[t]:t.match(P)||[]).length;while(n--)delete r[t[n]]}(void 0===t||S.isEmptyObject(r))&&(e.nodeType?e[this.expando]=void 0:delete e[this.expando])}},hasData:function(e){var t=e[this.expando];return void 0!==t&&!S.isEmptyObject(t)}};var Y=new G,Q=new G,J=/^(?:\{[\w\W]*\}|\[[\w\W]*\])$/,K=/[A-Z]/g;function Z(e,t,n){var r,i;if(void 0===n&&1===e.nodeType)if(r="data-"+t.replace(K,"-$&").toLowerCase(),"string"==typeof(n=e.getAttribute(r))){try{n="true"===(i=n)||"false"!==i&&("null"===i?null:i===+i+""?+i:J.test(i)?JSON.parse(i):i)}catch(e){}Q.set(e,t,n)}else n=void 0;return n}S.extend({hasData:function(e){return Q.hasData(e)||Y.hasData(e)},data:function(e,t,n){return Q.access(e,t,n)},removeData:function(e,t){Q.remove(e,t)},_data:function(e,t,n){return Y.access(e,t,n)},_removeData:function(e,t){Y.remove(e,t)}}),S.fn.extend({data:function(n,e){var t,r,i,o=this[0],a=o&&o.attributes;if(void 0===n){if(this.length&&(i=Q.get(o),1===o.nodeType&&!Y.get(o,"hasDataAttrs"))){t=a.length;while(t--)a[t]&&0===(r=a[t].name).indexOf("data-")&&(r=X(r.slice(5)),Z(o,r,i[r]));Y.set(o,"hasDataAttrs",!0)}return i}return"object"==typeof n?this.each(function(){Q.set(this,n)}):$(this,function(e){var t;if(o&&void 0===e)return void 0!==(t=Q.get(o,n))?t:void 0!==(t=Z(o,n))?t:void 0;this.each(function(){Q.set(this,n,e)})},null,e,1<arguments.length,null,!0)},removeData:function(e){return this.each(function(){Q.remove(this,e)})}}),S.extend({queue:function(e,t,n){var r;if(e)return t=(t||"fx")+"queue",r=Y.get(e,t),n&&(!r||Array.isArray(n)?r=Y.access(e,t,S.makeArray(n)):r.push(n)),r||[]},dequeue:function(e,t){t=t||"fx";var n=S.queue(e,t),r=n.length,i=n.shift(),o=S._queueHooks(e,t);"inprogress"===i&&(i=n.shift(),r--),i&&("fx"===t&&n.unshift("inprogress"),delete o.stop,i.call(e,function(){S.dequeue(e,t)},o)),!r&&o&&o.empty.fire()},_queueHooks:function(e,t){var n=t+"queueHooks";return Y.get(e,n)||Y.access(e,n,{empty:S.Callbacks("once memory").add(function(){Y.remove(e,[t+"queue",n])})})}}),S.fn.extend({queue:function(t,n){var e=2;return"string"!=typeof t&&(n=t,t="fx",e--),arguments.length<e?S.queue(this[0],t):void 0===n?this:this.each(function(){var e=S.queue(this,t,n);S._queueHooks(this,t),"fx"===t&&"inprogress"!==e[0]&&S.dequeue(this,t)})},dequeue:function(e){return this.each(function(){S.dequeue(this,e)})},clearQueue:function(e){return this.queue(e||"fx",[])},promise:function(e,t){var n,r=1,i=S.Deferred(),o=this,a=this.length,s=function(){--r||i.resolveWith(o,[o])};"string"!=typeof e&&(t=e,e=void 0),e=e||"fx";while(a--)(n=Y.get(o[a],e+"queueHooks"))&&n.empty&&(r++,n.empty.add(s));return s(),i.promise(t)}});var ee=/[+-]?(?:\d*\.|)\d+(?:[eE][+-]?\d+|)/.source,te=new RegExp("^(?:([+-])=|)("+ee+")([a-z%]*)$","i"),ne=["Top","Right","Bottom","Left"],re=E.documentElement,ie=function(e){return S.contains(e.ownerDocument,e)},oe={composed:!0};re.getRootNode&&(ie=function(e){return S.contains(e.ownerDocument,e)||e.getRootNode(oe)===e.ownerDocument});var ae=function(e,t){return"none"===(e=t||e).style.display||""===e.style.display&&ie(e)&&"none"===S.css(e,"display")};function se(e,t,n,r){var i,o,a=20,s=r?function(){return r.cur()}:function(){return S.css(e,t,"")},u=s(),l=n&&n[3]||(S.cssNumber[t]?"":"px"),c=e.nodeType&&(S.cssNumber[t]||"px"!==l&&+u)&&te.exec(S.css(e,t));if(c&&c[3]!==l){u/=2,l=l||c[3],c=+u||1;while(a--)S.style(e,t,c+l),(1-o)*(1-(o=s()/u||.5))<=0&&(a=0),c/=o;c*=2,S.style(e,t,c+l),n=n||[]}return n&&(c=+c||+u||0,i=n[1]?c+(n[1]+1)*n[2]:+n[2],r&&(r.unit=l,r.start=c,r.end=i)),i}var ue={};function le(e,t){for(var n,r,i,o,a,s,u,l=[],c=0,f=e.length;c<f;c++)(r=e[c]).style&&(n=r.style.display,t?("none"===n&&(l[c]=Y.get(r,"display")||null,l[c]||(r.style.display="")),""===r.style.display&&ae(r)&&(l[c]=(u=a=o=void 0,a=(i=r).ownerDocument,s=i.nodeName,(u=ue[s])||(o=a.body.appendChild(a.createElement(s)),u=S.css(o,"display"),o.parentNode.removeChild(o),"none"===u&&(u="block"),ue[s]=u)))):"none"!==n&&(l[c]="none",Y.set(r,"display",n)));for(c=0;c<f;c++)null!=l[c]&&(e[c].style.display=l[c]);return e}S.fn.extend({show:function(){return le(this,!0)},hide:function(){return le(this)},toggle:function(e){return"boolean"==typeof e?e?this.show():this.hide():this.each(function(){ae(this)?S(this).show():S(this).hide()})}});var ce,fe,pe=/^(?:checkbox|radio)$/i,de=/<([a-z][^\/\0>\x20\t\r\n\f]*)/i,he=/^$|^module$|\/(?:java|ecma)script/i;ce=E.createDocumentFragment().appendChild(E.createElement("div")),(fe=E.createElement("input")).setAttribute("type","radio"),fe.setAttribute("checked","checked"),fe.setAttribute("name","t"),ce.appendChild(fe),y.checkClone=ce.cloneNode(!0).cloneNode(!0).lastChild.checked,ce.innerHTML="<textarea>x</textarea>",y.noCloneChecked=!!ce.cloneNode(!0).lastChild.defaultValue,ce.innerHTML="<option></option>",y.option=!!ce.lastChild;var ge={thead:[1,"<table>","</table>"],col:[2,"<table><colgroup>","</colgroup></table>"],tr:[2,"<table><tbody>","</tbody></table>"],td:[3,"<table><tbody><tr>","</tr></tbody></table>"],_default:[0,"",""]};function ve(e,t){var n;return n="undefined"!=typeof e.getElementsByTagName?e.getElementsByTagName(t||"*"):"undefined"!=typeof e.querySelectorAll?e.querySelectorAll(t||"*"):[],void 0===t||t&&A(e,t)?S.merge([e],n):n}function ye(e,t){for(var n=0,r=e.length;n<r;n++)Y.set(e[n],"globalEval",!t||Y.get(t[n],"globalEval"))}ge.tbody=ge.tfoot=ge.colgroup=ge.caption=ge.thead,ge.th=ge.td,y.option||(ge.optgroup=ge.option=[1,"<select multiple='multiple'>","</select>"]);var me=/<|&#?\w+;/;function xe(e,t,n,r,i){for(var o,a,s,u,l,c,f=t.createDocumentFragment(),p=[],d=0,h=e.length;d<h;d++)if((o=e[d])||0===o)if("object"===w(o))S.merge(p,o.nodeType?[o]:o);else if(me.test(o)){a=a||f.appendChild(t.createElement("div")),s=(de.exec(o)||["",""])[1].toLowerCase(),u=ge[s]||ge._default,a.innerHTML=u[1]+S.htmlPrefilter(o)+u[2],c=u[0];while(c--)a=a.lastChild;S.merge(p,a.childNodes),(a=f.firstChild).textContent=""}else p.push(t.createTextNode(o));f.textContent="",d=0;while(o=p[d++])if(r&&-1<S.inArray(o,r))i&&i.push(o);else if(l=ie(o),a=ve(f.appendChild(o),"script"),l&&ye(a),n){c=0;while(o=a[c++])he.test(o.type||"")&&n.push(o)}return f}var be=/^key/,we=/^(?:mouse|pointer|contextmenu|drag|drop)|click/,Te=/^([^.]*)(?:\.(.+)|)/;function Ce(){return!0}function Ee(){return!1}function Se(e,t){return e===function(){try{return E.activeElement}catch(e){}}()==("focus"===t)}function ke(e,t,n,r,i,o){var a,s;if("object"==typeof t){for(s in"string"!=typeof n&&(r=r||n,n=void 0),t)ke(e,s,n,r,t[s],o);return e}if(null==r&&null==i?(i=n,r=n=void 0):null==i&&("string"==typeof n?(i=r,r=void 0):(i=r,r=n,n=void 0)),!1===i)i=Ee;else if(!i)return e;return 1===o&&(a=i,(i=function(e){return S().off(e),a.apply(this,arguments)}).guid=a.guid||(a.guid=S.guid++)),e.each(function(){S.event.add(this,t,i,r,n)})}function Ae(e,i,o){o?(Y.set(e,i,!1),S.event.add(e,i,{namespace:!1,handler:function(e){var t,n,r=Y.get(this,i);if(1&e.isTrigger&&this[i]){if(r.length)(S.event.special[i]||{}).delegateType&&e.stopPropagation();else if(r=s.call(arguments),Y.set(this,i,r),t=o(this,i),this[i](),r!==(n=Y.get(this,i))||t?Y.set(this,i,!1):n={},r!==n)return e.stopImmediatePropagation(),e.preventDefault(),n.value}else r.length&&(Y.set(this,i,{value:S.event.trigger(S.extend(r[0],S.Event.prototype),r.slice(1),this)}),e.stopImmediatePropagation())}})):void 0===Y.get(e,i)&&S.event.add(e,i,Ce)}S.event={global:{},add:function(t,e,n,r,i){var o,a,s,u,l,c,f,p,d,h,g,v=Y.get(t);if(V(t)){n.handler&&(n=(o=n).handler,i=o.selector),i&&S.find.matchesSelector(re,i),n.guid||(n.guid=S.guid++),(u=v.events)||(u=v.events=Object.create(null)),(a=v.handle)||(a=v.handle=function(e){return"undefined"!=typeof S&&S.event.triggered!==e.type?S.event.dispatch.apply(t,arguments):void 0}),l=(e=(e||"").match(P)||[""]).length;while(l--)d=g=(s=Te.exec(e[l])||[])[1],h=(s[2]||"").split(".").sort(),d&&(f=S.event.special[d]||{},d=(i?f.delegateType:f.bindType)||d,f=S.event.special[d]||{},c=S.extend({type:d,origType:g,data:r,handler:n,guid:n.guid,selector:i,needsContext:i&&S.expr.match.needsContext.test(i),namespace:h.join(".")},o),(p=u[d])||((p=u[d]=[]).delegateCount=0,f.setup&&!1!==f.setup.call(t,r,h,a)||t.addEventListener&&t.addEventListener(d,a)),f.add&&(f.add.call(t,c),c.handler.guid||(c.handler.guid=n.guid)),i?p.splice(p.delegateCount++,0,c):p.push(c),S.event.global[d]=!0)}},remove:function(e,t,n,r,i){var o,a,s,u,l,c,f,p,d,h,g,v=Y.hasData(e)&&Y.get(e);if(v&&(u=v.events)){l=(t=(t||"").match(P)||[""]).length;while(l--)if(d=g=(s=Te.exec(t[l])||[])[1],h=(s[2]||"").split(".").sort(),d){f=S.event.special[d]||{},p=u[d=(r?f.delegateType:f.bindType)||d]||[],s=s[2]&&new RegExp("(^|\\.)"+h.join("\\.(?:.*\\.|)")+"(\\.|$)"),a=o=p.length;while(o--)c=p[o],!i&&g!==c.origType||n&&n.guid!==c.guid||s&&!s.test(c.namespace)||r&&r!==c.selector&&("**"!==r||!c.selector)||(p.splice(o,1),c.selector&&p.delegateCount--,f.remove&&f.remove.call(e,c));a&&!p.length&&(f.teardown&&!1!==f.teardown.call(e,h,v.handle)||S.removeEvent(e,d,v.handle),delete u[d])}else for(d in u)S.event.remove(e,d+t[l],n,r,!0);S.isEmptyObject(u)&&Y.remove(e,"handle events")}},dispatch:function(e){var t,n,r,i,o,a,s=new Array(arguments.length),u=S.event.fix(e),l=(Y.get(this,"events")||Object.create(null))[u.type]||[],c=S.event.special[u.type]||{};for(s[0]=u,t=1;t<arguments.length;t++)s[t]=arguments[t];if(u.delegateTarget=this,!c.preDispatch||!1!==c.preDispatch.call(this,u)){a=S.event.handlers.call(this,u,l),t=0;while((i=a[t++])&&!u.isPropagationStopped()){u.currentTarget=i.elem,n=0;while((o=i.handlers[n++])&&!u.isImmediatePropagationStopped())u.rnamespace&&!1!==o.namespace&&!u.rnamespace.test(o.namespace)||(u.handleObj=o,u.data=o.data,void 0!==(r=((S.event.special[o.origType]||{}).handle||o.handler).apply(i.elem,s))&&!1===(u.result=r)&&(u.preventDefault(),u.stopPropagation()))}return c.postDispatch&&c.postDispatch.call(this,u),u.result}},handlers:function(e,t){var n,r,i,o,a,s=[],u=t.delegateCount,l=e.target;if(u&&l.nodeType&&!("click"===e.type&&1<=e.button))for(;l!==this;l=l.parentNode||this)if(1===l.nodeType&&("click"!==e.type||!0!==l.disabled)){for(o=[],a={},n=0;n<u;n++)void 0===a[i=(r=t[n]).selector+" "]&&(a[i]=r.needsContext?-1<S(i,this).index(l):S.find(i,this,null,[l]).length),a[i]&&o.push(r);o.length&&s.push({elem:l,handlers:o})}return l=this,u<t.length&&s.push({elem:l,handlers:t.slice(u)}),s},addProp:function(t,e){Object.defineProperty(S.Event.prototype,t,{enumerable:!0,configurable:!0,get:m(e)?function(){if(this.originalEvent)return e(this.originalEvent)}:function(){if(this.originalEvent)return this.originalEvent[t]},set:function(e){Object.defineProperty(this,t,{enumerable:!0,configurable:!0,writable:!0,value:e})}})},fix:function(e){return e[S.expando]?e:new S.Event(e)},special:{load:{noBubble:!0},click:{setup:function(e){var t=this||e;return pe.test(t.type)&&t.click&&A(t,"input")&&Ae(t,"click",Ce),!1},trigger:function(e){var t=this||e;return pe.test(t.type)&&t.click&&A(t,"input")&&Ae(t,"click"),!0},_default:function(e){var t=e.target;return pe.test(t.type)&&t.click&&A(t,"input")&&Y.get(t,"click")||A(t,"a")}},beforeunload:{postDispatch:function(e){void 0!==e.result&&e.originalEvent&&(e.originalEvent.returnValue=e.result)}}}},S.removeEvent=function(e,t,n){e.removeEventListener&&e.removeEventListener(t,n)},S.Event=function(e,t){if(!(this instanceof S.Event))return new S.Event(e,t);e&&e.type?(this.originalEvent=e,this.type=e.type,this.isDefaultPrevented=e.defaultPrevented||void 0===e.defaultPrevented&&!1===e.returnValue?Ce:Ee,this.target=e.target&&3===e.target.nodeType?e.target.parentNode:e.target,this.currentTarget=e.currentTarget,this.relatedTarget=e.relatedTarget):this.type=e,t&&S.extend(this,t),this.timeStamp=e&&e.timeStamp||Date.now(),this[S.expando]=!0},S.Event.prototype={constructor:S.Event,isDefaultPrevented:Ee,isPropagationStopped:Ee,isImmediatePropagationStopped:Ee,isSimulated:!1,preventDefault:function(){var e=this.originalEvent;this.isDefaultPrevented=Ce,e&&!this.isSimulated&&e.preventDefault()},stopPropagation:function(){var e=this.originalEvent;this.isPropagationStopped=Ce,e&&!this.isSimulated&&e.stopPropagation()},stopImmediatePropagation:function(){var e=this.originalEvent;this.isImmediatePropagationStopped=Ce,e&&!this.isSimulated&&e.stopImmediatePropagation(),this.stopPropagation()}},S.each({altKey:!0,bubbles:!0,cancelable:!0,changedTouches:!0,ctrlKey:!0,detail:!0,eventPhase:!0,metaKey:!0,pageX:!0,pageY:!0,shiftKey:!0,view:!0,"char":!0,code:!0,charCode:!0,key:!0,keyCode:!0,button:!0,buttons:!0,clientX:!0,clientY:!0,offsetX:!0,offsetY:!0,pointerId:!0,pointerType:!0,screenX:!0,screenY:!0,targetTouches:!0,toElement:!0,touches:!0,which:function(e){var t=e.button;return null==e.which&&be.test(e.type)?null!=e.charCode?e.charCode:e.keyCode:!e.which&&void 0!==t&&we.test(e.type)?1&t?1:2&t?3:4&t?2:0:e.which}},S.event.addProp),S.each({focus:"focusin",blur:"focusout"},function(e,t){S.event.special[e]={setup:function(){return Ae(this,e,Se),!1},trigger:function(){return Ae(this,e),!0},delegateType:t}}),S.each({mouseenter:"mouseover",mouseleave:"mouseout",pointerenter:"pointerover",pointerleave:"pointerout"},function(e,i){S.event.special[e]={delegateType:i,bindType:i,handle:function(e){var t,n=e.relatedTarget,r=e.handleObj;return n&&(n===this||S.contains(this,n))||(e.type=r.origType,t=r.handler.apply(this,arguments),e.type=i),t}}}),S.fn.extend({on:function(e,t,n,r){return ke(this,e,t,n,r)},one:function(e,t,n,r){return ke(this,e,t,n,r,1)},off:function(e,t,n){var r,i;if(e&&e.preventDefault&&e.handleObj)return r=e.handleObj,S(e.delegateTarget).off(r.namespace?r.origType+"."+r.namespace:r.origType,r.selector,r.handler),this;if("object"==typeof e){for(i in e)this.off(i,t,e[i]);return this}return!1!==t&&"function"!=typeof t||(n=t,t=void 0),!1===n&&(n=Ee),this.each(function(){S.event.remove(this,e,n,t)})}});var Ne=/<script|<style|<link/i,De=/checked\s*(?:[^=]|=\s*.checked.)/i,je=/^\s*<!(?:\[CDATA\[|--)|(?:\]\]|--)>\s*$/g;function qe(e,t){return A(e,"table")&&A(11!==t.nodeType?t:t.firstChild,"tr")&&S(e).children("tbody")[0]||e}function Le(e){return e.type=(null!==e.getAttribute("type"))+"/"+e.type,e}function He(e){return"true/"===(e.type||"").slice(0,5)?e.type=e.type.slice(5):e.removeAttribute("type"),e}function Oe(e,t){var n,r,i,o,a,s;if(1===t.nodeType){if(Y.hasData(e)&&(s=Y.get(e).events))for(i in Y.remove(t,"handle events"),s)for(n=0,r=s[i].length;n<r;n++)S.event.add(t,i,s[i][n]);Q.hasData(e)&&(o=Q.access(e),a=S.extend({},o),Q.set(t,a))}}function Pe(n,r,i,o){r=g(r);var e,t,a,s,u,l,c=0,f=n.length,p=f-1,d=r[0],h=m(d);if(h||1<f&&"string"==typeof d&&!y.checkClone&&De.test(d))return n.each(function(e){var t=n.eq(e);h&&(r[0]=d.call(this,e,t.html())),Pe(t,r,i,o)});if(f&&(t=(e=xe(r,n[0].ownerDocument,!1,n,o)).firstChild,1===e.childNodes.length&&(e=t),t||o)){for(s=(a=S.map(ve(e,"script"),Le)).length;c<f;c++)u=e,c!==p&&(u=S.clone(u,!0,!0),s&&S.merge(a,ve(u,"script"))),i.call(n[c],u,c);if(s)for(l=a[a.length-1].ownerDocument,S.map(a,He),c=0;c<s;c++)u=a[c],he.test(u.type||"")&&!Y.access(u,"globalEval")&&S.contains(l,u)&&(u.src&&"module"!==(u.type||"").toLowerCase()?S._evalUrl&&!u.noModule&&S._evalUrl(u.src,{nonce:u.nonce||u.getAttribute("nonce")},l):b(u.textContent.replace(je,""),u,l))}return n}function Re(e,t,n){for(var r,i=t?S.filter(t,e):e,o=0;null!=(r=i[o]);o++)n||1!==r.nodeType||S.cleanData(ve(r)),r.parentNode&&(n&&ie(r)&&ye(ve(r,"script")),r.parentNode.removeChild(r));return e}S.extend({htmlPrefilter:function(e){return e},clone:function(e,t,n){var r,i,o,a,s,u,l,c=e.cloneNode(!0),f=ie(e);if(!(y.noCloneChecked||1!==e.nodeType&&11!==e.nodeType||S.isXMLDoc(e)))for(a=ve(c),r=0,i=(o=ve(e)).length;r<i;r++)s=o[r],u=a[r],void 0,"input"===(l=u.nodeName.toLowerCase())&&pe.test(s.type)?u.checked=s.checked:"input"!==l&&"textarea"!==l||(u.defaultValue=s.defaultValue);if(t)if(n)for(o=o||ve(e),a=a||ve(c),r=0,i=o.length;r<i;r++)Oe(o[r],a[r]);else Oe(e,c);return 0<(a=ve(c,"script")).length&&ye(a,!f&&ve(e,"script")),c},cleanData:function(e){for(var t,n,r,i=S.event.special,o=0;void 0!==(n=e[o]);o++)if(V(n)){if(t=n[Y.expando]){if(t.events)for(r in t.events)i[r]?S.event.remove(n,r):S.removeEvent(n,r,t.handle);n[Y.expando]=void 0}n[Q.expando]&&(n[Q.expando]=void 0)}}}),S.fn.extend({detach:function(e){return Re(this,e,!0)},remove:function(e){return Re(this,e)},text:function(e){return $(this,function(e){return void 0===e?S.text(this):this.empty().each(function(){1!==this.nodeType&&11!==this.nodeType&&9!==this.nodeType||(this.textContent=e)})},null,e,arguments.length)},append:function(){return Pe(this,arguments,function(e){1!==this.nodeType&&11!==this.nodeType&&9!==this.nodeType||qe(this,e).appendChild(e)})},prepend:function(){return Pe(this,arguments,function(e){if(1===this.nodeType||11===this.nodeType||9===this.nodeType){var t=qe(this,e);t.insertBefore(e,t.firstChild)}})},before:function(){return Pe(this,arguments,function(e){this.parentNode&&this.parentNode.insertBefore(e,this)})},after:function(){return Pe(this,arguments,function(e){this.parentNode&&this.parentNode.insertBefore(e,this.nextSibling)})},empty:function(){for(var e,t=0;null!=(e=this[t]);t++)1===e.nodeType&&(S.cleanData(ve(e,!1)),e.textContent="");return this},clone:function(e,t){return e=null!=e&&e,t=null==t?e:t,this.map(function(){return S.clone(this,e,t)})},html:function(e){return $(this,function(e){var t=this[0]||{},n=0,r=this.length;if(void 0===e&&1===t.nodeType)return t.innerHTML;if("string"==typeof e&&!Ne.test(e)&&!ge[(de.exec(e)||["",""])[1].toLowerCase()]){e=S.htmlPrefilter(e);try{for(;n<r;n++)1===(t=this[n]||{}).nodeType&&(S.cleanData(ve(t,!1)),t.innerHTML=e);t=0}catch(e){}}t&&this.empty().append(e)},null,e,arguments.length)},replaceWith:function(){var n=[];return Pe(this,arguments,function(e){var t=this.parentNode;S.inArray(this,n)<0&&(S.cleanData(ve(this)),t&&t.replaceChild(e,this))},n)}}),S.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(e,a){S.fn[e]=function(e){for(var t,n=[],r=S(e),i=r.length-1,o=0;o<=i;o++)t=o===i?this:this.clone(!0),S(r[o])[a](t),u.apply(n,t.get());return this.pushStack(n)}});var Me=new RegExp("^("+ee+")(?!px)[a-z%]+$","i"),Ie=function(e){var t=e.ownerDocument.defaultView;return t&&t.opener||(t=C),t.getComputedStyle(e)},We=function(e,t,n){var r,i,o={};for(i in t)o[i]=e.style[i],e.style[i]=t[i];for(i in r=n.call(e),t)e.style[i]=o[i];return r},Fe=new RegExp(ne.join("|"),"i");function Be(e,t,n){var r,i,o,a,s=e.style;return(n=n||Ie(e))&&(""!==(a=n.getPropertyValue(t)||n[t])||ie(e)||(a=S.style(e,t)),!y.pixelBoxStyles()&&Me.test(a)&&Fe.test(t)&&(r=s.width,i=s.minWidth,o=s.maxWidth,s.minWidth=s.maxWidth=s.width=a,a=n.width,s.width=r,s.minWidth=i,s.maxWidth=o)),void 0!==a?a+"":a}function $e(e,t){return{get:function(){if(!e())return(this.get=t).apply(this,arguments);delete this.get}}}!function(){function e(){if(l){u.style.cssText="position:absolute;left:-11111px;width:60px;margin-top:1px;padding:0;border:0",l.style.cssText="position:relative;display:block;box-sizing:border-box;overflow:scroll;margin:auto;border:1px;padding:1px;width:60%;top:1%",re.appendChild(u).appendChild(l);var e=C.getComputedStyle(l);n="1%"!==e.top,s=12===t(e.marginLeft),l.style.right="60%",o=36===t(e.right),r=36===t(e.width),l.style.position="absolute",i=12===t(l.offsetWidth/3),re.removeChild(u),l=null}}function t(e){return Math.round(parseFloat(e))}var n,r,i,o,a,s,u=E.createElement("div"),l=E.createElement("div");l.style&&(l.style.backgroundClip="content-box",l.cloneNode(!0).style.backgroundClip="",y.clearCloneStyle="content-box"===l.style.backgroundClip,S.extend(y,{boxSizingReliable:function(){return e(),r},pixelBoxStyles:function(){return e(),o},pixelPosition:function(){return e(),n},reliableMarginLeft:function(){return e(),s},scrollboxSize:function(){return e(),i},reliableTrDimensions:function(){var e,t,n,r;return null==a&&(e=E.createElement("table"),t=E.createElement("tr"),n=E.createElement("div"),e.style.cssText="position:absolute;left:-11111px",t.style.height="1px",n.style.height="9px",re.appendChild(e).appendChild(t).appendChild(n),r=C.getComputedStyle(t),a=3<parseInt(r.height),re.removeChild(e)),a}}))}();var _e=["Webkit","Moz","ms"],ze=E.createElement("div").style,Ue={};function Xe(e){var t=S.cssProps[e]||Ue[e];return t||(e in ze?e:Ue[e]=function(e){var t=e[0].toUpperCase()+e.slice(1),n=_e.length;while(n--)if((e=_e[n]+t)in ze)return e}(e)||e)}var Ve=/^(none|table(?!-c[ea]).+)/,Ge=/^--/,Ye={position:"absolute",visibility:"hidden",display:"block"},Qe={letterSpacing:"0",fontWeight:"400"};function Je(e,t,n){var r=te.exec(t);return r?Math.max(0,r[2]-(n||0))+(r[3]||"px"):t}function Ke(e,t,n,r,i,o){var a="width"===t?1:0,s=0,u=0;if(n===(r?"border":"content"))return 0;for(;a<4;a+=2)"margin"===n&&(u+=S.css(e,n+ne[a],!0,i)),r?("content"===n&&(u-=S.css(e,"padding"+ne[a],!0,i)),"margin"!==n&&(u-=S.css(e,"border"+ne[a]+"Width",!0,i))):(u+=S.css(e,"padding"+ne[a],!0,i),"padding"!==n?u+=S.css(e,"border"+ne[a]+"Width",!0,i):s+=S.css(e,"border"+ne[a]+"Width",!0,i));return!r&&0<=o&&(u+=Math.max(0,Math.ceil(e["offset"+t[0].toUpperCase()+t.slice(1)]-o-u-s-.5))||0),u}function Ze(e,t,n){var r=Ie(e),i=(!y.boxSizingReliable()||n)&&"border-box"===S.css(e,"boxSizing",!1,r),o=i,a=Be(e,t,r),s="offset"+t[0].toUpperCase()+t.slice(1);if(Me.test(a)){if(!n)return a;a="auto"}return(!y.boxSizingReliable()&&i||!y.reliableTrDimensions()&&A(e,"tr")||"auto"===a||!parseFloat(a)&&"inline"===S.css(e,"display",!1,r))&&e.getClientRects().length&&(i="border-box"===S.css(e,"boxSizing",!1,r),(o=s in e)&&(a=e[s])),(a=parseFloat(a)||0)+Ke(e,t,n||(i?"border":"content"),o,r,a)+"px"}function et(e,t,n,r,i){return new et.prototype.init(e,t,n,r,i)}S.extend({cssHooks:{opacity:{get:function(e,t){if(t){var n=Be(e,"opacity");return""===n?"1":n}}}},cssNumber:{animationIterationCount:!0,columnCount:!0,fillOpacity:!0,flexGrow:!0,flexShrink:!0,fontWeight:!0,gridArea:!0,gridColumn:!0,gridColumnEnd:!0,gridColumnStart:!0,gridRow:!0,gridRowEnd:!0,gridRowStart:!0,lineHeight:!0,opacity:!0,order:!0,orphans:!0,widows:!0,zIndex:!0,zoom:!0},cssProps:{},style:function(e,t,n,r){if(e&&3!==e.nodeType&&8!==e.nodeType&&e.style){var i,o,a,s=X(t),u=Ge.test(t),l=e.style;if(u||(t=Xe(s)),a=S.cssHooks[t]||S.cssHooks[s],void 0===n)return a&&"get"in a&&void 0!==(i=a.get(e,!1,r))?i:l[t];"string"===(o=typeof n)&&(i=te.exec(n))&&i[1]&&(n=se(e,t,i),o="number"),null!=n&&n==n&&("number"!==o||u||(n+=i&&i[3]||(S.cssNumber[s]?"":"px")),y.clearCloneStyle||""!==n||0!==t.indexOf("background")||(l[t]="inherit"),a&&"set"in a&&void 0===(n=a.set(e,n,r))||(u?l.setProperty(t,n):l[t]=n))}},css:function(e,t,n,r){var i,o,a,s=X(t);return Ge.test(t)||(t=Xe(s)),(a=S.cssHooks[t]||S.cssHooks[s])&&"get"in a&&(i=a.get(e,!0,n)),void 0===i&&(i=Be(e,t,r)),"normal"===i&&t in Qe&&(i=Qe[t]),""===n||n?(o=parseFloat(i),!0===n||isFinite(o)?o||0:i):i}}),S.each(["height","width"],function(e,u){S.cssHooks[u]={get:function(e,t,n){if(t)return!Ve.test(S.css(e,"display"))||e.getClientRects().length&&e.getBoundingClientRect().width?Ze(e,u,n):We(e,Ye,function(){return Ze(e,u,n)})},set:function(e,t,n){var r,i=Ie(e),o=!y.scrollboxSize()&&"absolute"===i.position,a=(o||n)&&"border-box"===S.css(e,"boxSizing",!1,i),s=n?Ke(e,u,n,a,i):0;return a&&o&&(s-=Math.ceil(e["offset"+u[0].toUpperCase()+u.slice(1)]-parseFloat(i[u])-Ke(e,u,"border",!1,i)-.5)),s&&(r=te.exec(t))&&"px"!==(r[3]||"px")&&(e.style[u]=t,t=S.css(e,u)),Je(0,t,s)}}}),S.cssHooks.marginLeft=$e(y.reliableMarginLeft,function(e,t){if(t)return(parseFloat(Be(e,"marginLeft"))||e.getBoundingClientRect().left-We(e,{marginLeft:0},function(){return e.getBoundingClientRect().left}))+"px"}),S.each({margin:"",padding:"",border:"Width"},function(i,o){S.cssHooks[i+o]={expand:function(e){for(var t=0,n={},r="string"==typeof e?e.split(" "):[e];t<4;t++)n[i+ne[t]+o]=r[t]||r[t-2]||r[0];return n}},"margin"!==i&&(S.cssHooks[i+o].set=Je)}),S.fn.extend({css:function(e,t){return $(this,function(e,t,n){var r,i,o={},a=0;if(Array.isArray(t)){for(r=Ie(e),i=t.length;a<i;a++)o[t[a]]=S.css(e,t[a],!1,r);return o}return void 0!==n?S.style(e,t,n):S.css(e,t)},e,t,1<arguments.length)}}),((S.Tween=et).prototype={constructor:et,init:function(e,t,n,r,i,o){this.elem=e,this.prop=n,this.easing=i||S.easing._default,this.options=t,this.start=this.now=this.cur(),this.end=r,this.unit=o||(S.cssNumber[n]?"":"px")},cur:function(){var e=et.propHooks[this.prop];return e&&e.get?e.get(this):et.propHooks._default.get(this)},run:function(e){var t,n=et.propHooks[this.prop];return this.options.duration?this.pos=t=S.easing[this.easing](e,this.options.duration*e,0,1,this.options.duration):this.pos=t=e,this.now=(this.end-this.start)*t+this.start,this.options.step&&this.options.step.call(this.elem,this.now,this),n&&n.set?n.set(this):et.propHooks._default.set(this),this}}).init.prototype=et.prototype,(et.propHooks={_default:{get:function(e){var t;return 1!==e.elem.nodeType||null!=e.elem[e.prop]&&null==e.elem.style[e.prop]?e.elem[e.prop]:(t=S.css(e.elem,e.prop,""))&&"auto"!==t?t:0},set:function(e){S.fx.step[e.prop]?S.fx.step[e.prop](e):1!==e.elem.nodeType||!S.cssHooks[e.prop]&&null==e.elem.style[Xe(e.prop)]?e.elem[e.prop]=e.now:S.style(e.elem,e.prop,e.now+e.unit)}}}).scrollTop=et.propHooks.scrollLeft={set:function(e){e.elem.nodeType&&e.elem.parentNode&&(e.elem[e.prop]=e.now)}},S.easing={linear:function(e){return e},swing:function(e){return.5-Math.cos(e*Math.PI)/2},_default:"swing"},S.fx=et.prototype.init,S.fx.step={};var tt,nt,rt,it,ot=/^(?:toggle|show|hide)$/,at=/queueHooks$/;function st(){nt&&(!1===E.hidden&&C.requestAnimationFrame?C.requestAnimationFrame(st):C.setTimeout(st,S.fx.interval),S.fx.tick())}function ut(){return C.setTimeout(function(){tt=void 0}),tt=Date.now()}function lt(e,t){var n,r=0,i={height:e};for(t=t?1:0;r<4;r+=2-t)i["margin"+(n=ne[r])]=i["padding"+n]=e;return t&&(i.opacity=i.width=e),i}function ct(e,t,n){for(var r,i=(ft.tweeners[t]||[]).concat(ft.tweeners["*"]),o=0,a=i.length;o<a;o++)if(r=i[o].call(n,t,e))return r}function ft(o,e,t){var n,a,r=0,i=ft.prefilters.length,s=S.Deferred().always(function(){delete u.elem}),u=function(){if(a)return!1;for(var e=tt||ut(),t=Math.max(0,l.startTime+l.duration-e),n=1-(t/l.duration||0),r=0,i=l.tweens.length;r<i;r++)l.tweens[r].run(n);return s.notifyWith(o,[l,n,t]),n<1&&i?t:(i||s.notifyWith(o,[l,1,0]),s.resolveWith(o,[l]),!1)},l=s.promise({elem:o,props:S.extend({},e),opts:S.extend(!0,{specialEasing:{},easing:S.easing._default},t),originalProperties:e,originalOptions:t,startTime:tt||ut(),duration:t.duration,tweens:[],createTween:function(e,t){var n=S.Tween(o,l.opts,e,t,l.opts.specialEasing[e]||l.opts.easing);return l.tweens.push(n),n},stop:function(e){var t=0,n=e?l.tweens.length:0;if(a)return this;for(a=!0;t<n;t++)l.tweens[t].run(1);return e?(s.notifyWith(o,[l,1,0]),s.resolveWith(o,[l,e])):s.rejectWith(o,[l,e]),this}}),c=l.props;for(!function(e,t){var n,r,i,o,a;for(n in e)if(i=t[r=X(n)],o=e[n],Array.isArray(o)&&(i=o[1],o=e[n]=o[0]),n!==r&&(e[r]=o,delete e[n]),(a=S.cssHooks[r])&&"expand"in a)for(n in o=a.expand(o),delete e[r],o)n in e||(e[n]=o[n],t[n]=i);else t[r]=i}(c,l.opts.specialEasing);r<i;r++)if(n=ft.prefilters[r].call(l,o,c,l.opts))return m(n.stop)&&(S._queueHooks(l.elem,l.opts.queue).stop=n.stop.bind(n)),n;return S.map(c,ct,l),m(l.opts.start)&&l.opts.start.call(o,l),l.progress(l.opts.progress).done(l.opts.done,l.opts.complete).fail(l.opts.fail).always(l.opts.always),S.fx.timer(S.extend(u,{elem:o,anim:l,queue:l.opts.queue})),l}S.Animation=S.extend(ft,{tweeners:{"*":[function(e,t){var n=this.createTween(e,t);return se(n.elem,e,te.exec(t),n),n}]},tweener:function(e,t){m(e)?(t=e,e=["*"]):e=e.match(P);for(var n,r=0,i=e.length;r<i;r++)n=e[r],ft.tweeners[n]=ft.tweeners[n]||[],ft.tweeners[n].unshift(t)},prefilters:[function(e,t,n){var r,i,o,a,s,u,l,c,f="width"in t||"height"in t,p=this,d={},h=e.style,g=e.nodeType&&ae(e),v=Y.get(e,"fxshow");for(r in n.queue||(null==(a=S._queueHooks(e,"fx")).unqueued&&(a.unqueued=0,s=a.empty.fire,a.empty.fire=function(){a.unqueued||s()}),a.unqueued++,p.always(function(){p.always(function(){a.unqueued--,S.queue(e,"fx").length||a.empty.fire()})})),t)if(i=t[r],ot.test(i)){if(delete t[r],o=o||"toggle"===i,i===(g?"hide":"show")){if("show"!==i||!v||void 0===v[r])continue;g=!0}d[r]=v&&v[r]||S.style(e,r)}if((u=!S.isEmptyObject(t))||!S.isEmptyObject(d))for(r in f&&1===e.nodeType&&(n.overflow=[h.overflow,h.overflowX,h.overflowY],null==(l=v&&v.display)&&(l=Y.get(e,"display")),"none"===(c=S.css(e,"display"))&&(l?c=l:(le([e],!0),l=e.style.display||l,c=S.css(e,"display"),le([e]))),("inline"===c||"inline-block"===c&&null!=l)&&"none"===S.css(e,"float")&&(u||(p.done(function(){h.display=l}),null==l&&(c=h.display,l="none"===c?"":c)),h.display="inline-block")),n.overflow&&(h.overflow="hidden",p.always(function(){h.overflow=n.overflow[0],h.overflowX=n.overflow[1],h.overflowY=n.overflow[2]})),u=!1,d)u||(v?"hidden"in v&&(g=v.hidden):v=Y.access(e,"fxshow",{display:l}),o&&(v.hidden=!g),g&&le([e],!0),p.done(function(){for(r in g||le([e]),Y.remove(e,"fxshow"),d)S.style(e,r,d[r])})),u=ct(g?v[r]:0,r,p),r in v||(v[r]=u.start,g&&(u.end=u.start,u.start=0))}],prefilter:function(e,t){t?ft.prefilters.unshift(e):ft.prefilters.push(e)}}),S.speed=function(e,t,n){var r=e&&"object"==typeof e?S.extend({},e):{complete:n||!n&&t||m(e)&&e,duration:e,easing:n&&t||t&&!m(t)&&t};return S.fx.off?r.duration=0:"number"!=typeof r.duration&&(r.duration in S.fx.speeds?r.duration=S.fx.speeds[r.duration]:r.duration=S.fx.speeds._default),null!=r.queue&&!0!==r.queue||(r.queue="fx"),r.old=r.complete,r.complete=function(){m(r.old)&&r.old.call(this),r.queue&&S.dequeue(this,r.queue)},r},S.fn.extend({fadeTo:function(e,t,n,r){return this.filter(ae).css("opacity",0).show().end().animate({opacity:t},e,n,r)},animate:function(t,e,n,r){var i=S.isEmptyObject(t),o=S.speed(e,n,r),a=function(){var e=ft(this,S.extend({},t),o);(i||Y.get(this,"finish"))&&e.stop(!0)};return a.finish=a,i||!1===o.queue?this.each(a):this.queue(o.queue,a)},stop:function(i,e,o){var a=function(e){var t=e.stop;delete e.stop,t(o)};return"string"!=typeof i&&(o=e,e=i,i=void 0),e&&this.queue(i||"fx",[]),this.each(function(){var e=!0,t=null!=i&&i+"queueHooks",n=S.timers,r=Y.get(this);if(t)r[t]&&r[t].stop&&a(r[t]);else for(t in r)r[t]&&r[t].stop&&at.test(t)&&a(r[t]);for(t=n.length;t--;)n[t].elem!==this||null!=i&&n[t].queue!==i||(n[t].anim.stop(o),e=!1,n.splice(t,1));!e&&o||S.dequeue(this,i)})},finish:function(a){return!1!==a&&(a=a||"fx"),this.each(function(){var e,t=Y.get(this),n=t[a+"queue"],r=t[a+"queueHooks"],i=S.timers,o=n?n.length:0;for(t.finish=!0,S.queue(this,a,[]),r&&r.stop&&r.stop.call(this,!0),e=i.length;e--;)i[e].elem===this&&i[e].queue===a&&(i[e].anim.stop(!0),i.splice(e,1));for(e=0;e<o;e++)n[e]&&n[e].finish&&n[e].finish.call(this);delete t.finish})}}),S.each(["toggle","show","hide"],function(e,r){var i=S.fn[r];S.fn[r]=function(e,t,n){return null==e||"boolean"==typeof e?i.apply(this,arguments):this.animate(lt(r,!0),e,t,n)}}),S.each({slideDown:lt("show"),slideUp:lt("hide"),slideToggle:lt("toggle"),fadeIn:{opacity:"show"},fadeOut:{opacity:"hide"},fadeToggle:{opacity:"toggle"}},function(e,r){S.fn[e]=function(e,t,n){return this.animate(r,e,t,n)}}),S.timers=[],S.fx.tick=function(){var e,t=0,n=S.timers;for(tt=Date.now();t<n.length;t++)(e=n[t])()||n[t]!==e||n.splice(t--,1);n.length||S.fx.stop(),tt=void 0},S.fx.timer=function(e){S.timers.push(e),S.fx.start()},S.fx.interval=13,S.fx.start=function(){nt||(nt=!0,st())},S.fx.stop=function(){nt=null},S.fx.speeds={slow:600,fast:200,_default:400},S.fn.delay=function(r,e){return r=S.fx&&S.fx.speeds[r]||r,e=e||"fx",this.queue(e,function(e,t){var n=C.setTimeout(e,r);t.stop=function(){C.clearTimeout(n)}})},rt=E.createElement("input"),it=E.createElement("select").appendChild(E.createElement("option")),rt.type="checkbox",y.checkOn=""!==rt.value,y.optSelected=it.selected,(rt=E.createElement("input")).value="t",rt.type="radio",y.radioValue="t"===rt.value;var pt,dt=S.expr.attrHandle;S.fn.extend({attr:function(e,t){return $(this,S.attr,e,t,1<arguments.length)},removeAttr:function(e){return this.each(function(){S.removeAttr(this,e)})}}),S.extend({attr:function(e,t,n){var r,i,o=e.nodeType;if(3!==o&&8!==o&&2!==o)return"undefined"==typeof e.getAttribute?S.prop(e,t,n):(1===o&&S.isXMLDoc(e)||(i=S.attrHooks[t.toLowerCase()]||(S.expr.match.bool.test(t)?pt:void 0)),void 0!==n?null===n?void S.removeAttr(e,t):i&&"set"in i&&void 0!==(r=i.set(e,n,t))?r:(e.setAttribute(t,n+""),n):i&&"get"in i&&null!==(r=i.get(e,t))?r:null==(r=S.find.attr(e,t))?void 0:r)},attrHooks:{type:{set:function(e,t){if(!y.radioValue&&"radio"===t&&A(e,"input")){var n=e.value;return e.setAttribute("type",t),n&&(e.value=n),t}}}},removeAttr:function(e,t){var n,r=0,i=t&&t.match(P);if(i&&1===e.nodeType)while(n=i[r++])e.removeAttribute(n)}}),pt={set:function(e,t,n){return!1===t?S.removeAttr(e,n):e.setAttribute(n,n),n}},S.each(S.expr.match.bool.source.match(/\w+/g),function(e,t){var a=dt[t]||S.find.attr;dt[t]=function(e,t,n){var r,i,o=t.toLowerCase();return n||(i=dt[o],dt[o]=r,r=null!=a(e,t,n)?o:null,dt[o]=i),r}});var ht=/^(?:input|select|textarea|button)$/i,gt=/^(?:a|area)$/i;function vt(e){return(e.match(P)||[]).join(" ")}function yt(e){return e.getAttribute&&e.getAttribute("class")||""}function mt(e){return Array.isArray(e)?e:"string"==typeof e&&e.match(P)||[]}S.fn.extend({prop:function(e,t){return $(this,S.prop,e,t,1<arguments.length)},removeProp:function(e){return this.each(function(){delete this[S.propFix[e]||e]})}}),S.extend({prop:function(e,t,n){var r,i,o=e.nodeType;if(3!==o&&8!==o&&2!==o)return 1===o&&S.isXMLDoc(e)||(t=S.propFix[t]||t,i=S.propHooks[t]),void 0!==n?i&&"set"in i&&void 0!==(r=i.set(e,n,t))?r:e[t]=n:i&&"get"in i&&null!==(r=i.get(e,t))?r:e[t]},propHooks:{tabIndex:{get:function(e){var t=S.find.attr(e,"tabindex");return t?parseInt(t,10):ht.test(e.nodeName)||gt.test(e.nodeName)&&e.href?0:-1}}},propFix:{"for":"htmlFor","class":"className"}}),y.optSelected||(S.propHooks.selected={get:function(e){var t=e.parentNode;return t&&t.parentNode&&t.parentNode.selectedIndex,null},set:function(e){var t=e.parentNode;t&&(t.selectedIndex,t.parentNode&&t.parentNode.selectedIndex)}}),S.each(["tabIndex","readOnly","maxLength","cellSpacing","cellPadding","rowSpan","colSpan","useMap","frameBorder","contentEditable"],function(){S.propFix[this.toLowerCase()]=this}),S.fn.extend({addClass:function(t){var e,n,r,i,o,a,s,u=0;if(m(t))return this.each(function(e){S(this).addClass(t.call(this,e,yt(this)))});if((e=mt(t)).length)while(n=this[u++])if(i=yt(n),r=1===n.nodeType&&" "+vt(i)+" "){a=0;while(o=e[a++])r.indexOf(" "+o+" ")<0&&(r+=o+" ");i!==(s=vt(r))&&n.setAttribute("class",s)}return this},removeClass:function(t){var e,n,r,i,o,a,s,u=0;if(m(t))return this.each(function(e){S(this).removeClass(t.call(this,e,yt(this)))});if(!arguments.length)return this.attr("class","");if((e=mt(t)).length)while(n=this[u++])if(i=yt(n),r=1===n.nodeType&&" "+vt(i)+" "){a=0;while(o=e[a++])while(-1<r.indexOf(" "+o+" "))r=r.replace(" "+o+" "," ");i!==(s=vt(r))&&n.setAttribute("class",s)}return this},toggleClass:function(i,t){var o=typeof i,a="string"===o||Array.isArray(i);return"boolean"==typeof t&&a?t?this.addClass(i):this.removeClass(i):m(i)?this.each(function(e){S(this).toggleClass(i.call(this,e,yt(this),t),t)}):this.each(function(){var e,t,n,r;if(a){t=0,n=S(this),r=mt(i);while(e=r[t++])n.hasClass(e)?n.removeClass(e):n.addClass(e)}else void 0!==i&&"boolean"!==o||((e=yt(this))&&Y.set(this,"__className__",e),this.setAttribute&&this.setAttribute("class",e||!1===i?"":Y.get(this,"__className__")||""))})},hasClass:function(e){var t,n,r=0;t=" "+e+" ";while(n=this[r++])if(1===n.nodeType&&-1<(" "+vt(yt(n))+" ").indexOf(t))return!0;return!1}});var xt=/\r/g;S.fn.extend({val:function(n){var r,e,i,t=this[0];return arguments.length?(i=m(n),this.each(function(e){var t;1===this.nodeType&&(null==(t=i?n.call(this,e,S(this).val()):n)?t="":"number"==typeof t?t+="":Array.isArray(t)&&(t=S.map(t,function(e){return null==e?"":e+""})),(r=S.valHooks[this.type]||S.valHooks[this.nodeName.toLowerCase()])&&"set"in r&&void 0!==r.set(this,t,"value")||(this.value=t))})):t?(r=S.valHooks[t.type]||S.valHooks[t.nodeName.toLowerCase()])&&"get"in r&&void 0!==(e=r.get(t,"value"))?e:"string"==typeof(e=t.value)?e.replace(xt,""):null==e?"":e:void 0}}),S.extend({valHooks:{option:{get:function(e){var t=S.find.attr(e,"value");return null!=t?t:vt(S.text(e))}},select:{get:function(e){var t,n,r,i=e.options,o=e.selectedIndex,a="select-one"===e.type,s=a?null:[],u=a?o+1:i.length;for(r=o<0?u:a?o:0;r<u;r++)if(((n=i[r]).selected||r===o)&&!n.disabled&&(!n.parentNode.disabled||!A(n.parentNode,"optgroup"))){if(t=S(n).val(),a)return t;s.push(t)}return s},set:function(e,t){var n,r,i=e.options,o=S.makeArray(t),a=i.length;while(a--)((r=i[a]).selected=-1<S.inArray(S.valHooks.option.get(r),o))&&(n=!0);return n||(e.selectedIndex=-1),o}}}}),S.each(["radio","checkbox"],function(){S.valHooks[this]={set:function(e,t){if(Array.isArray(t))return e.checked=-1<S.inArray(S(e).val(),t)}},y.checkOn||(S.valHooks[this].get=function(e){return null===e.getAttribute("value")?"on":e.value})}),y.focusin="onfocusin"in C;var bt=/^(?:focusinfocus|focusoutblur)$/,wt=function(e){e.stopPropagation()};S.extend(S.event,{trigger:function(e,t,n,r){var i,o,a,s,u,l,c,f,p=[n||E],d=v.call(e,"type")?e.type:e,h=v.call(e,"namespace")?e.namespace.split("."):[];if(o=f=a=n=n||E,3!==n.nodeType&&8!==n.nodeType&&!bt.test(d+S.event.triggered)&&(-1<d.indexOf(".")&&(d=(h=d.split(".")).shift(),h.sort()),u=d.indexOf(":")<0&&"on"+d,(e=e[S.expando]?e:new S.Event(d,"object"==typeof e&&e)).isTrigger=r?2:3,e.namespace=h.join("."),e.rnamespace=e.namespace?new RegExp("(^|\\.)"+h.join("\\.(?:.*\\.|)")+"(\\.|$)"):null,e.result=void 0,e.target||(e.target=n),t=null==t?[e]:S.makeArray(t,[e]),c=S.event.special[d]||{},r||!c.trigger||!1!==c.trigger.apply(n,t))){if(!r&&!c.noBubble&&!x(n)){for(s=c.delegateType||d,bt.test(s+d)||(o=o.parentNode);o;o=o.parentNode)p.push(o),a=o;a===(n.ownerDocument||E)&&p.push(a.defaultView||a.parentWindow||C)}i=0;while((o=p[i++])&&!e.isPropagationStopped())f=o,e.type=1<i?s:c.bindType||d,(l=(Y.get(o,"events")||Object.create(null))[e.type]&&Y.get(o,"handle"))&&l.apply(o,t),(l=u&&o[u])&&l.apply&&V(o)&&(e.result=l.apply(o,t),!1===e.result&&e.preventDefault());return e.type=d,r||e.isDefaultPrevented()||c._default&&!1!==c._default.apply(p.pop(),t)||!V(n)||u&&m(n[d])&&!x(n)&&((a=n[u])&&(n[u]=null),S.event.triggered=d,e.isPropagationStopped()&&f.addEventListener(d,wt),n[d](),e.isPropagationStopped()&&f.removeEventListener(d,wt),S.event.triggered=void 0,a&&(n[u]=a)),e.result}},simulate:function(e,t,n){var r=S.extend(new S.Event,n,{type:e,isSimulated:!0});S.event.trigger(r,null,t)}}),S.fn.extend({trigger:function(e,t){return this.each(function(){S.event.trigger(e,t,this)})},triggerHandler:function(e,t){var n=this[0];if(n)return S.event.trigger(e,t,n,!0)}}),y.focusin||S.each({focus:"focusin",blur:"focusout"},function(n,r){var i=function(e){S.event.simulate(r,e.target,S.event.fix(e))};S.event.special[r]={setup:function(){var e=this.ownerDocument||this.document||this,t=Y.access(e,r);t||e.addEventListener(n,i,!0),Y.access(e,r,(t||0)+1)},teardown:function(){var e=this.ownerDocument||this.document||this,t=Y.access(e,r)-1;t?Y.access(e,r,t):(e.removeEventListener(n,i,!0),Y.remove(e,r))}}});var Tt=C.location,Ct={guid:Date.now()},Et=/\?/;S.parseXML=function(e){var t;if(!e||"string"!=typeof e)return null;try{t=(new C.DOMParser).parseFromString(e,"text/xml")}catch(e){t=void 0}return t&&!t.getElementsByTagName("parsererror").length||S.error("Invalid XML: "+e),t};var St=/\[\]$/,kt=/\r?\n/g,At=/^(?:submit|button|image|reset|file)$/i,Nt=/^(?:input|select|textarea|keygen)/i;function Dt(n,e,r,i){var t;if(Array.isArray(e))S.each(e,function(e,t){r||St.test(n)?i(n,t):Dt(n+"["+("object"==typeof t&&null!=t?e:"")+"]",t,r,i)});else if(r||"object"!==w(e))i(n,e);else for(t in e)Dt(n+"["+t+"]",e[t],r,i)}S.param=function(e,t){var n,r=[],i=function(e,t){var n=m(t)?t():t;r[r.length]=encodeURIComponent(e)+"="+encodeURIComponent(null==n?"":n)};if(null==e)return"";if(Array.isArray(e)||e.jquery&&!S.isPlainObject(e))S.each(e,function(){i(this.name,this.value)});else for(n in e)Dt(n,e[n],t,i);return r.join("&")},S.fn.extend({serialize:function(){return S.param(this.serializeArray())},serializeArray:function(){return this.map(function(){var e=S.prop(this,"elements");return e?S.makeArray(e):this}).filter(function(){var e=this.type;return this.name&&!S(this).is(":disabled")&&Nt.test(this.nodeName)&&!At.test(e)&&(this.checked||!pe.test(e))}).map(function(e,t){var n=S(this).val();return null==n?null:Array.isArray(n)?S.map(n,function(e){return{name:t.name,value:e.replace(kt,"\r\n")}}):{name:t.name,value:n.replace(kt,"\r\n")}}).get()}});var jt=/%20/g,qt=/#.*$/,Lt=/([?&])_=[^&]*/,Ht=/^(.*?):[ \t]*([^\r\n]*)$/gm,Ot=/^(?:GET|HEAD)$/,Pt=/^\/\//,Rt={},Mt={},It="*/".concat("*"),Wt=E.createElement("a");function Ft(o){return function(e,t){"string"!=typeof e&&(t=e,e="*");var n,r=0,i=e.toLowerCase().match(P)||[];if(m(t))while(n=i[r++])"+"===n[0]?(n=n.slice(1)||"*",(o[n]=o[n]||[]).unshift(t)):(o[n]=o[n]||[]).push(t)}}function Bt(t,i,o,a){var s={},u=t===Mt;function l(e){var r;return s[e]=!0,S.each(t[e]||[],function(e,t){var n=t(i,o,a);return"string"!=typeof n||u||s[n]?u?!(r=n):void 0:(i.dataTypes.unshift(n),l(n),!1)}),r}return l(i.dataTypes[0])||!s["*"]&&l("*")}function $t(e,t){var n,r,i=S.ajaxSettings.flatOptions||{};for(n in t)void 0!==t[n]&&((i[n]?e:r||(r={}))[n]=t[n]);return r&&S.extend(!0,e,r),e}Wt.href=Tt.href,S.extend({active:0,lastModified:{},etag:{},ajaxSettings:{url:Tt.href,type:"GET",isLocal:/^(?:about|app|app-storage|.+-extension|file|res|widget):$/.test(Tt.protocol),global:!0,processData:!0,async:!0,contentType:"application/x-www-form-urlencoded; charset=UTF-8",accepts:{"*":It,text:"text/plain",html:"text/html",xml:"application/xml, text/xml",json:"application/json, text/javascript"},contents:{xml:/\bxml\b/,html:/\bhtml/,json:/\bjson\b/},responseFields:{xml:"responseXML",text:"responseText",json:"responseJSON"},converters:{"* text":String,"text html":!0,"text json":JSON.parse,"text xml":S.parseXML},flatOptions:{url:!0,context:!0}},ajaxSetup:function(e,t){return t?$t($t(e,S.ajaxSettings),t):$t(S.ajaxSettings,e)},ajaxPrefilter:Ft(Rt),ajaxTransport:Ft(Mt),ajax:function(e,t){"object"==typeof e&&(t=e,e=void 0),t=t||{};var c,f,p,n,d,r,h,g,i,o,v=S.ajaxSetup({},t),y=v.context||v,m=v.context&&(y.nodeType||y.jquery)?S(y):S.event,x=S.Deferred(),b=S.Callbacks("once memory"),w=v.statusCode||{},a={},s={},u="canceled",T={readyState:0,getResponseHeader:function(e){var t;if(h){if(!n){n={};while(t=Ht.exec(p))n[t[1].toLowerCase()+" "]=(n[t[1].toLowerCase()+" "]||[]).concat(t[2])}t=n[e.toLowerCase()+" "]}return null==t?null:t.join(", ")},getAllResponseHeaders:function(){return h?p:null},setRequestHeader:function(e,t){return null==h&&(e=s[e.toLowerCase()]=s[e.toLowerCase()]||e,a[e]=t),this},overrideMimeType:function(e){return null==h&&(v.mimeType=e),this},statusCode:function(e){var t;if(e)if(h)T.always(e[T.status]);else for(t in e)w[t]=[w[t],e[t]];return this},abort:function(e){var t=e||u;return c&&c.abort(t),l(0,t),this}};if(x.promise(T),v.url=((e||v.url||Tt.href)+"").replace(Pt,Tt.protocol+"//"),v.type=t.method||t.type||v.method||v.type,v.dataTypes=(v.dataType||"*").toLowerCase().match(P)||[""],null==v.crossDomain){r=E.createElement("a");try{r.href=v.url,r.href=r.href,v.crossDomain=Wt.protocol+"//"+Wt.host!=r.protocol+"//"+r.host}catch(e){v.crossDomain=!0}}if(v.data&&v.processData&&"string"!=typeof v.data&&(v.data=S.param(v.data,v.traditional)),Bt(Rt,v,t,T),h)return T;for(i in(g=S.event&&v.global)&&0==S.active++&&S.event.trigger("ajaxStart"),v.type=v.type.toUpperCase(),v.hasContent=!Ot.test(v.type),f=v.url.replace(qt,""),v.hasContent?v.data&&v.processData&&0===(v.contentType||"").indexOf("application/x-www-form-urlencoded")&&(v.data=v.data.replace(jt,"+")):(o=v.url.slice(f.length),v.data&&(v.processData||"string"==typeof v.data)&&(f+=(Et.test(f)?"&":"?")+v.data,delete v.data),!1===v.cache&&(f=f.replace(Lt,"$1"),o=(Et.test(f)?"&":"?")+"_="+Ct.guid+++o),v.url=f+o),v.ifModified&&(S.lastModified[f]&&T.setRequestHeader("If-Modified-Since",S.lastModified[f]),S.etag[f]&&T.setRequestHeader("If-None-Match",S.etag[f])),(v.data&&v.hasContent&&!1!==v.contentType||t.contentType)&&T.setRequestHeader("Content-Type",v.contentType),T.setRequestHeader("Accept",v.dataTypes[0]&&v.accepts[v.dataTypes[0]]?v.accepts[v.dataTypes[0]]+("*"!==v.dataTypes[0]?", "+It+"; q=0.01":""):v.accepts["*"]),v.headers)T.setRequestHeader(i,v.headers[i]);if(v.beforeSend&&(!1===v.beforeSend.call(y,T,v)||h))return T.abort();if(u="abort",b.add(v.complete),T.done(v.success),T.fail(v.error),c=Bt(Mt,v,t,T)){if(T.readyState=1,g&&m.trigger("ajaxSend",[T,v]),h)return T;v.async&&0<v.timeout&&(d=C.setTimeout(function(){T.abort("timeout")},v.timeout));try{h=!1,c.send(a,l)}catch(e){if(h)throw e;l(-1,e)}}else l(-1,"No Transport");function l(e,t,n,r){var i,o,a,s,u,l=t;h||(h=!0,d&&C.clearTimeout(d),c=void 0,p=r||"",T.readyState=0<e?4:0,i=200<=e&&e<300||304===e,n&&(s=function(e,t,n){var r,i,o,a,s=e.contents,u=e.dataTypes;while("*"===u[0])u.shift(),void 0===r&&(r=e.mimeType||t.getResponseHeader("Content-Type"));if(r)for(i in s)if(s[i]&&s[i].test(r)){u.unshift(i);break}if(u[0]in n)o=u[0];else{for(i in n){if(!u[0]||e.converters[i+" "+u[0]]){o=i;break}a||(a=i)}o=o||a}if(o)return o!==u[0]&&u.unshift(o),n[o]}(v,T,n)),!i&&-1<S.inArray("script",v.dataTypes)&&(v.converters["text script"]=function(){}),s=function(e,t,n,r){var i,o,a,s,u,l={},c=e.dataTypes.slice();if(c[1])for(a in e.converters)l[a.toLowerCase()]=e.converters[a];o=c.shift();while(o)if(e.responseFields[o]&&(n[e.responseFields[o]]=t),!u&&r&&e.dataFilter&&(t=e.dataFilter(t,e.dataType)),u=o,o=c.shift())if("*"===o)o=u;else if("*"!==u&&u!==o){if(!(a=l[u+" "+o]||l["* "+o]))for(i in l)if((s=i.split(" "))[1]===o&&(a=l[u+" "+s[0]]||l["* "+s[0]])){!0===a?a=l[i]:!0!==l[i]&&(o=s[0],c.unshift(s[1]));break}if(!0!==a)if(a&&e["throws"])t=a(t);else try{t=a(t)}catch(e){return{state:"parsererror",error:a?e:"No conversion from "+u+" to "+o}}}return{state:"success",data:t}}(v,s,T,i),i?(v.ifModified&&((u=T.getResponseHeader("Last-Modified"))&&(S.lastModified[f]=u),(u=T.getResponseHeader("etag"))&&(S.etag[f]=u)),204===e||"HEAD"===v.type?l="nocontent":304===e?l="notmodified":(l=s.state,o=s.data,i=!(a=s.error))):(a=l,!e&&l||(l="error",e<0&&(e=0))),T.status=e,T.statusText=(t||l)+"",i?x.resolveWith(y,[o,l,T]):x.rejectWith(y,[T,l,a]),T.statusCode(w),w=void 0,g&&m.trigger(i?"ajaxSuccess":"ajaxError",[T,v,i?o:a]),b.fireWith(y,[T,l]),g&&(m.trigger("ajaxComplete",[T,v]),--S.active||S.event.trigger("ajaxStop")))}return T},getJSON:function(e,t,n){return S.get(e,t,n,"json")},getScript:function(e,t){return S.get(e,void 0,t,"script")}}),S.each(["get","post"],function(e,i){S[i]=function(e,t,n,r){return m(t)&&(r=r||n,n=t,t=void 0),S.ajax(S.extend({url:e,type:i,dataType:r,data:t,success:n},S.isPlainObject(e)&&e))}}),S.ajaxPrefilter(function(e){var t;for(t in e.headers)"content-type"===t.toLowerCase()&&(e.contentType=e.headers[t]||"")}),S._evalUrl=function(e,t,n){return S.ajax({url:e,type:"GET",dataType:"script",cache:!0,async:!1,global:!1,converters:{"text script":function(){}},dataFilter:function(e){S.globalEval(e,t,n)}})},S.fn.extend({wrapAll:function(e){var t;return this[0]&&(m(e)&&(e=e.call(this[0])),t=S(e,this[0].ownerDocument).eq(0).clone(!0),this[0].parentNode&&t.insertBefore(this[0]),t.map(function(){var e=this;while(e.firstElementChild)e=e.firstElementChild;return e}).append(this)),this},wrapInner:function(n){return m(n)?this.each(function(e){S(this).wrapInner(n.call(this,e))}):this.each(function(){var e=S(this),t=e.contents();t.length?t.wrapAll(n):e.append(n)})},wrap:function(t){var n=m(t);return this.each(function(e){S(this).wrapAll(n?t.call(this,e):t)})},unwrap:function(e){return this.parent(e).not("body").each(function(){S(this).replaceWith(this.childNodes)}),this}}),S.expr.pseudos.hidden=function(e){return!S.expr.pseudos.visible(e)},S.expr.pseudos.visible=function(e){return!!(e.offsetWidth||e.offsetHeight||e.getClientRects().length)},S.ajaxSettings.xhr=function(){try{return new C.XMLHttpRequest}catch(e){}};var _t={0:200,1223:204},zt=S.ajaxSettings.xhr();y.cors=!!zt&&"withCredentials"in zt,y.ajax=zt=!!zt,S.ajaxTransport(function(i){var o,a;if(y.cors||zt&&!i.crossDomain)return{send:function(e,t){var n,r=i.xhr();if(r.open(i.type,i.url,i.async,i.username,i.password),i.xhrFields)for(n in i.xhrFields)r[n]=i.xhrFields[n];for(n in i.mimeType&&r.overrideMimeType&&r.overrideMimeType(i.mimeType),i.crossDomain||e["X-Requested-With"]||(e["X-Requested-With"]="XMLHttpRequest"),e)r.setRequestHeader(n,e[n]);o=function(e){return function(){o&&(o=a=r.onload=r.onerror=r.onabort=r.ontimeout=r.onreadystatechange=null,"abort"===e?r.abort():"error"===e?"number"!=typeof r.status?t(0,"error"):t(r.status,r.statusText):t(_t[r.status]||r.status,r.statusText,"text"!==(r.responseType||"text")||"string"!=typeof r.responseText?{binary:r.response}:{text:r.responseText},r.getAllResponseHeaders()))}},r.onload=o(),a=r.onerror=r.ontimeout=o("error"),void 0!==r.onabort?r.onabort=a:r.onreadystatechange=function(){4===r.readyState&&C.setTimeout(function(){o&&a()})},o=o("abort");try{r.send(i.hasContent&&i.data||null)}catch(e){if(o)throw e}},abort:function(){o&&o()}}}),S.ajaxPrefilter(function(e){e.crossDomain&&(e.contents.script=!1)}),S.ajaxSetup({accepts:{script:"text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"},contents:{script:/\b(?:java|ecma)script\b/},converters:{"text script":function(e){return S.globalEval(e),e}}}),S.ajaxPrefilter("script",function(e){void 0===e.cache&&(e.cache=!1),e.crossDomain&&(e.type="GET")}),S.ajaxTransport("script",function(n){var r,i;if(n.crossDomain||n.scriptAttrs)return{send:function(e,t){r=S("<script>").attr(n.scriptAttrs||{}).prop({charset:n.scriptCharset,src:n.url}).on("load error",i=function(e){r.remove(),i=null,e&&t("error"===e.type?404:200,e.type)}),E.head.appendChild(r[0])},abort:function(){i&&i()}}});var Ut,Xt=[],Vt=/(=)\?(?=&|$)|\?\?/;S.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var e=Xt.pop()||S.expando+"_"+Ct.guid++;return this[e]=!0,e}}),S.ajaxPrefilter("json jsonp",function(e,t,n){var r,i,o,a=!1!==e.jsonp&&(Vt.test(e.url)?"url":"string"==typeof e.data&&0===(e.contentType||"").indexOf("application/x-www-form-urlencoded")&&Vt.test(e.data)&&"data");if(a||"jsonp"===e.dataTypes[0])return r=e.jsonpCallback=m(e.jsonpCallback)?e.jsonpCallback():e.jsonpCallback,a?e[a]=e[a].replace(Vt,"$1"+r):!1!==e.jsonp&&(e.url+=(Et.test(e.url)?"&":"?")+e.jsonp+"="+r),e.converters["script json"]=function(){return o||S.error(r+" was not called"),o[0]},e.dataTypes[0]="json",i=C[r],C[r]=function(){o=arguments},n.always(function(){void 0===i?S(C).removeProp(r):C[r]=i,e[r]&&(e.jsonpCallback=t.jsonpCallback,Xt.push(r)),o&&m(i)&&i(o[0]),o=i=void 0}),"script"}),y.createHTMLDocument=((Ut=E.implementation.createHTMLDocument("").body).innerHTML="<form></form><form></form>",2===Ut.childNodes.length),S.parseHTML=function(e,t,n){return"string"!=typeof e?[]:("boolean"==typeof t&&(n=t,t=!1),t||(y.createHTMLDocument?((r=(t=E.implementation.createHTMLDocument("")).createElement("base")).href=E.location.href,t.head.appendChild(r)):t=E),o=!n&&[],(i=N.exec(e))?[t.createElement(i[1])]:(i=xe([e],t,o),o&&o.length&&S(o).remove(),S.merge([],i.childNodes)));var r,i,o},S.fn.load=function(e,t,n){var r,i,o,a=this,s=e.indexOf(" ");return-1<s&&(r=vt(e.slice(s)),e=e.slice(0,s)),m(t)?(n=t,t=void 0):t&&"object"==typeof t&&(i="POST"),0<a.length&&S.ajax({url:e,type:i||"GET",dataType:"html",data:t}).done(function(e){o=arguments,a.html(r?S("<div>").append(S.parseHTML(e)).find(r):e)}).always(n&&function(e,t){a.each(function(){n.apply(this,o||[e.responseText,t,e])})}),this},S.expr.pseudos.animated=function(t){return S.grep(S.timers,function(e){return t===e.elem}).length},S.offset={setOffset:function(e,t,n){var r,i,o,a,s,u,l=S.css(e,"position"),c=S(e),f={};"static"===l&&(e.style.position="relative"),s=c.offset(),o=S.css(e,"top"),u=S.css(e,"left"),("absolute"===l||"fixed"===l)&&-1<(o+u).indexOf("auto")?(a=(r=c.position()).top,i=r.left):(a=parseFloat(o)||0,i=parseFloat(u)||0),m(t)&&(t=t.call(e,n,S.extend({},s))),null!=t.top&&(f.top=t.top-s.top+a),null!=t.left&&(f.left=t.left-s.left+i),"using"in t?t.using.call(e,f):("number"==typeof f.top&&(f.top+="px"),"number"==typeof f.left&&(f.left+="px"),c.css(f))}},S.fn.extend({offset:function(t){if(arguments.length)return void 0===t?this:this.each(function(e){S.offset.setOffset(this,t,e)});var e,n,r=this[0];return r?r.getClientRects().length?(e=r.getBoundingClientRect(),n=r.ownerDocument.defaultView,{top:e.top+n.pageYOffset,left:e.left+n.pageXOffset}):{top:0,left:0}:void 0},position:function(){if(this[0]){var e,t,n,r=this[0],i={top:0,left:0};if("fixed"===S.css(r,"position"))t=r.getBoundingClientRect();else{t=this.offset(),n=r.ownerDocument,e=r.offsetParent||n.documentElement;while(e&&(e===n.body||e===n.documentElement)&&"static"===S.css(e,"position"))e=e.parentNode;e&&e!==r&&1===e.nodeType&&((i=S(e).offset()).top+=S.css(e,"borderTopWidth",!0),i.left+=S.css(e,"borderLeftWidth",!0))}return{top:t.top-i.top-S.css(r,"marginTop",!0),left:t.left-i.left-S.css(r,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var e=this.offsetParent;while(e&&"static"===S.css(e,"position"))e=e.offsetParent;return e||re})}}),S.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(t,i){var o="pageYOffset"===i;S.fn[t]=function(e){return $(this,function(e,t,n){var r;if(x(e)?r=e:9===e.nodeType&&(r=e.defaultView),void 0===n)return r?r[i]:e[t];r?r.scrollTo(o?r.pageXOffset:n,o?n:r.pageYOffset):e[t]=n},t,e,arguments.length)}}),S.each(["top","left"],function(e,n){S.cssHooks[n]=$e(y.pixelPosition,function(e,t){if(t)return t=Be(e,n),Me.test(t)?S(e).position()[n]+"px":t})}),S.each({Height:"height",Width:"width"},function(a,s){S.each({padding:"inner"+a,content:s,"":"outer"+a},function(r,o){S.fn[o]=function(e,t){var n=arguments.length&&(r||"boolean"!=typeof e),i=r||(!0===e||!0===t?"margin":"border");return $(this,function(e,t,n){var r;return x(e)?0===o.indexOf("outer")?e["inner"+a]:e.document.documentElement["client"+a]:9===e.nodeType?(r=e.documentElement,Math.max(e.body["scroll"+a],r["scroll"+a],e.body["offset"+a],r["offset"+a],r["client"+a])):void 0===n?S.css(e,t,i):S.style(e,t,n,i)},s,n?e:void 0,n)}})}),S.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(e,t){S.fn[t]=function(e){return this.on(t,e)}}),S.fn.extend({bind:function(e,t,n){return this.on(e,null,t,n)},unbind:function(e,t){return this.off(e,null,t)},delegate:function(e,t,n,r){return this.on(t,e,n,r)},undelegate:function(e,t,n){return 1===arguments.length?this.off(e,"**"):this.off(t,e||"**",n)},hover:function(e,t){return this.mouseenter(e).mouseleave(t||e)}}),S.each("blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu".split(" "),function(e,n){S.fn[n]=function(e,t){return 0<arguments.length?this.on(n,null,e,t):this.trigger(n)}});var Gt=/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g;S.proxy=function(e,t){var n,r,i;if("string"==typeof t&&(n=e[t],t=e,e=n),m(e))return r=s.call(arguments,2),(i=function(){return e.apply(t||this,r.concat(s.call(arguments)))}).guid=e.guid=e.guid||S.guid++,i},S.holdReady=function(e){e?S.readyWait++:S.ready(!0)},S.isArray=Array.isArray,S.parseJSON=JSON.parse,S.nodeName=A,S.isFunction=m,S.isWindow=x,S.camelCase=X,S.type=w,S.now=Date.now,S.isNumeric=function(e){var t=S.type(e);return("number"===t||"string"===t)&&!isNaN(e-parseFloat(e))},S.trim=function(e){return null==e?"":(e+"").replace(Gt,"")},"function"==typeof define&&define.amd&&define("jquery",[],function(){return S});var Yt=C.jQuery,Qt=C.$;return S.noConflict=function(e){return C.$===S&&(C.$=Qt),e&&C.jQuery===S&&(C.jQuery=Yt),S},"undefined"==typeof e&&(C.jQuery=C.$=S),S});

From 5651284c3b2f3d5cf3ced1301ab24e3f4d04c09e Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Thu, 1 Oct 2020 14:50:32 +0900
Subject: [PATCH 0138/1009] [SPARK-32992][SQL] Map Oracle's ROWID type to
 StringType in read via JDBC

### What changes were proposed in this pull request?
Convert the `ROWID` type in the Oracle JDBC dialect to Catalyst's `StringType`. The doc for Oracle 19c says explicitly that the type must be string: https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Data-Types.html#GUID-AEF1FE4C-2DE5-4BE7-BB53-83AD8F1E34EF

### Why are the changes needed?
To avoid the exception showed in https://stackoverflow.com/questions/52244492/spark-jdbc-dataframereader-fails-to-read-oracle-table-with-datatype-as-rowid

### Does this PR introduce _any_ user-facing change?
Yes

### How was this patch tested?
N/A

Closes #29884 from MaxGekk/jdbc-oracle-rowid-string.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../spark/sql/jdbc/OracleIntegrationSuite.scala       | 11 +++++++++++
 .../org/apache/spark/sql/jdbc/OracleDialect.scala     |  6 ++++++
 2 files changed, 17 insertions(+)

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
index 9c59023cd8766..ce63d1df6f028 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
@@ -518,4 +518,15 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark
        """.stripMargin.replaceAll("\n", " "))
     assert(sql("select id, d, t from queryOption").collect.toSet == expectedResult)
   }
+
+  test("SPARK-32992: map Oracle's ROWID type to StringType") {
+    val rows = spark.read.format("jdbc")
+      .option("url", jdbcUrl)
+      .option("query", "SELECT ROWID from datetime")
+      .load()
+      .collect()
+    val types = rows(0).toSeq.map(x => x.getClass.toString)
+    assert(types(0).equals("class java.lang.String"))
+    assert(!rows(0).getString(0).isEmpty)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
index 4c0623729e00d..3f12b9acd0fc4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
@@ -64,6 +64,12 @@ private case object OracleDialect extends JdbcDialect {
         => Some(TimestampType) // Value for Timestamp with Time Zone in Oracle
       case BINARY_FLOAT => Some(FloatType) // Value for OracleTypes.BINARY_FLOAT
       case BINARY_DOUBLE => Some(DoubleType) // Value for OracleTypes.BINARY_DOUBLE
+      // scalastyle:off line.size.limit
+      // According to the documentation for Oracle Database 19c:
+      // "Values of the ROWID pseudocolumn are strings representing the address of each row."
+      // https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Data-Types.html#GUID-AEF1FE4C-2DE5-4BE7-BB53-83AD8F1E34EF
+      // scalastyle:on line.size.limit
+      case Types.ROWID => Some(StringType)
       case _ => None
     }
   }

From d3dbe1a9076c8a76be0590ca071bfbec6114813b Mon Sep 17 00:00:00 2001
From: iRakson <raksonrakesh@gmail.com>
Date: Thu, 1 Oct 2020 20:50:16 +0900
Subject: [PATCH 0139/1009] [SQL][DOC][MINOR] Corrects input table names in the
 examples of CREATE FUNCTION doc

### What changes were proposed in this pull request?
Fix Typo

### Why are the changes needed?
To maintain consistency.
Correct table name should be used for SELECT command.

### Does this PR introduce _any_ user-facing change?
Yes. Now CREATE FUNCTION doc will show the correct name of table.

### How was this patch tested?
Manually. Doc changes.

Closes #29920 from iRakson/fixTypo.

Authored-by: iRakson <raksonrakesh@gmail.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 docs/sql-ref-syntax-ddl-create-function.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/sql-ref-syntax-ddl-create-function.md b/docs/sql-ref-syntax-ddl-create-function.md
index aa6c1fad7b56b..dfa4f4f8123d8 100644
--- a/docs/sql-ref-syntax-ddl-create-function.md
+++ b/docs/sql-ref-syntax-ddl-create-function.md
@@ -112,7 +112,7 @@ SHOW USER FUNCTIONS;
 +------------------+
 
 -- Invoke the function. Every selected value should be incremented by 10.
-SELECT simple_udf(c1) AS function_return_value FROM t1;
+SELECT simple_udf(c1) AS function_return_value FROM test;
 +---------------------+
 |function_return_value|
 +---------------------+
@@ -150,7 +150,7 @@ CREATE OR REPLACE FUNCTION simple_udf AS 'SimpleUdfR'
     USING JAR '/tmp/SimpleUdfR.jar';
 
 -- Invoke the function. Every selected value should be incremented by 20.
-SELECT simple_udf(c1) AS function_return_value FROM t1;
+SELECT simple_udf(c1) AS function_return_value FROM test;
 +---------------------+
 |function_return_value|
 +---------------------+

From 0963fcd848f62b4f2231dfcf67f9beabf927c21e Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Thu, 1 Oct 2020 08:37:07 -0500
Subject: [PATCH 0140/1009] [SPARK-33024][SQL] Fix CodeGen fallback issue of
 UDFSuite in Scala 2.13

### What changes were proposed in this pull request?
After `SPARK-32851` set `CODEGEN_FACTORY_MODE` to `CODEGEN_ONLY` of `sparkConf` in `SharedSparkSessionBase`  to construction `SparkSession`  in test, the test suite `SPARK-32459: UDF should not fail on WrappedArray` in s.sql.UDFSuite exposed a codegen fallback issue in Scala 2.13 as follow:

```
- SPARK-32459: UDF should not fail on WrappedArray *** FAILED ***
Caused by: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 47, Column 99: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 47, Column 99: No applicable constructor/method found for zero actual parameters; candidates are: "public scala.collection.mutable.Builder scala.collection.mutable.ArraySeq$.newBuilder(java.lang.Object)", "public scala.collection.mutable.Builder scala.collection.mutable.ArraySeq$.newBuilder(scala.reflect.ClassTag)", "public abstract scala.collection.mutable.Builder scala.collection.EvidenceIterableFactory.newBuilder(java.lang.Object)"
```

The root cause is `WrappedArray` represent `mutable.ArraySeq`  in Scala 2.13 and has a different constructor of `newBuilder` method.

The main change of is pr is add Scala 2.13 only code part to deal with  `case match WrappedArray` in Scala 2.13.

### Why are the changes needed?
We need to support a Scala 2.13 build

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
- Scala 2.12: Pass the Jenkins or GitHub Action

- Scala 2.13: All tests passed.

Do the following:

```
dev/change-scala-version.sh 2.13
mvn clean install -DskipTests  -pl sql/core -Pscala-2.13 -am
mvn test -pl sql/core -Pscala-2.13
```

**Before**
```
Tests: succeeded 8540, failed 1, canceled 1, ignored 52, pending 0
*** 1 TEST FAILED ***

```

**After**

```
Tests: succeeded 8541, failed 0, canceled 1, ignored 52, pending 0
All tests passed.
```

Closes #29903 from LuciferYang/fix-udfsuite.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../expressions/objects/objects.scala         | 53 ++++++++++++++-----
 1 file changed, 39 insertions(+), 14 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 7933d05c8dba4..9701420e65870 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -22,7 +22,7 @@ import java.lang.reflect.{Method, Modifier}
 import scala.collection.JavaConverters._
 import scala.collection.mutable.{Builder, IndexedSeq, WrappedArray}
 import scala.reflect.ClassTag
-import scala.util.Try
+import scala.util.{Properties, Try}
 
 import org.apache.spark.{SparkConf, SparkEnv}
 import org.apache.spark.serializer._
@@ -916,19 +916,44 @@ case class MapObjects private(
     val (initCollection, addElement, getResult): (String, String => String, String) =
       customCollectionCls match {
         case Some(cls) if classOf[WrappedArray[_]].isAssignableFrom(cls) =>
-          // Scala WrappedArray
-          val getBuilder = s"${cls.getName}$$.MODULE$$.newBuilder()"
-          val builder = ctx.freshName("collectionBuilder")
-          (
-            s"""
-               ${classOf[Builder[_, _]].getName} $builder = $getBuilder;
-               $builder.sizeHint($dataLength);
-             """,
-            (genValue: String) => s"$builder.$$plus$$eq($genValue);",
-            s"(${cls.getName}) ${classOf[WrappedArray[_]].getName}$$." +
-              s"MODULE$$.make(((${classOf[IndexedSeq[_]].getName})$builder" +
-              s".result()).toArray(scala.reflect.ClassTag$$.MODULE$$.Object()));"
-          )
+          def doCodeGenForScala212 = {
+            // WrappedArray in Scala 2.12
+            val getBuilder = s"${cls.getName}$$.MODULE$$.newBuilder()"
+            val builder = ctx.freshName("collectionBuilder")
+            (
+              s"""
+                 ${classOf[Builder[_, _]].getName} $builder = $getBuilder;
+                 $builder.sizeHint($dataLength);
+               """,
+              (genValue: String) => s"$builder.$$plus$$eq($genValue);",
+              s"(${cls.getName}) ${classOf[WrappedArray[_]].getName}$$." +
+                s"MODULE$$.make(((${classOf[IndexedSeq[_]].getName})$builder" +
+                s".result()).toArray(scala.reflect.ClassTag$$.MODULE$$.Object()));"
+            )
+          }
+
+          def doCodeGenForScala213 = {
+            // In Scala 2.13, WrappedArray is mutable.ArraySeq and newBuilder method need
+            // a ClassTag type construction parameter
+            val getBuilder = s"${cls.getName}$$.MODULE$$.newBuilder(" +
+              s"scala.reflect.ClassTag$$.MODULE$$.Object())"
+            val builder = ctx.freshName("collectionBuilder")
+            (
+              s"""
+                 ${classOf[Builder[_, _]].getName} $builder = $getBuilder;
+                 $builder.sizeHint($dataLength);
+               """,
+              (genValue: String) => s"$builder.$$plus$$eq($genValue);",
+              s"(${cls.getName})$builder.result();"
+            )
+          }
+
+          val scalaVersion = Properties.versionNumberString
+          if (scalaVersion.startsWith("2.12")) {
+            doCodeGenForScala212
+          } else {
+            doCodeGenForScala213
+          }
         case Some(cls) if classOf[Seq[_]].isAssignableFrom(cls) ||
           classOf[scala.collection.Set[_]].isAssignableFrom(cls) =>
           // Scala sequence or set

From 9c618b33084c8ff6f68e5183e2574ba368fb7758 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Thu, 1 Oct 2020 12:41:40 -0700
Subject: [PATCH 0141/1009] [SPARK-33047][BUILD] Upgrade hive-storage-api to
 2.7.2

### What changes were proposed in this pull request?

This PR aims to upgrade Apache Hive `hive-storage-api` library from 2.7.1 to 2.7.2.

### Why are the changes needed?

[storage-api 2.7.2](https://github.com/apache/hive/commits/rel/storage-release-2.7.2/storage-api) has the following extension and can be used when users uses a provided orc dependency.

[HIVE-22959](https://github.com/apache/hive/commit/dade9919d904f8a4bff12a9130c150301a4713ed#diff-ccfc9dd7584117f531322cda3a29f3c3) : Extend storage-api to expose FilterContext
[HIVE-23215](https://github.com/apache/hive/commit/361925d2f3675bb9c6566b615a4b53faee335385#diff-ccfc9dd7584117f531322cda3a29f3c3) : Make FilterContext and MutableFilterContext interfaces

### Does this PR introduce _any_ user-facing change?

Yes. This is a dependency change.

### How was this patch tested?

Pass the existing tests.

Closes #29923 from dongjoon-hyun/SPARK-33047.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 2 +-
 dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 2 +-
 pom.xml                                 | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index 6d1934b46261b..7b31bdd98ef26 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -92,7 +92,7 @@ hive-shims-0.23/2.3.7//hive-shims-0.23-2.3.7.jar
 hive-shims-common/2.3.7//hive-shims-common-2.3.7.jar
 hive-shims-scheduler/2.3.7//hive-shims-scheduler-2.3.7.jar
 hive-shims/2.3.7//hive-shims-2.3.7.jar
-hive-storage-api/2.7.1//hive-storage-api-2.7.1.jar
+hive-storage-api/2.7.2//hive-storage-api-2.7.2.jar
 hive-vector-code-gen/2.3.7//hive-vector-code-gen-2.3.7.jar
 hk2-api/2.6.1//hk2-api-2.6.1.jar
 hk2-locator/2.6.1//hk2-locator-2.6.1.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index 2e29d831b9e66..960ea5f836ddf 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -91,7 +91,7 @@ hive-shims-0.23/2.3.7//hive-shims-0.23-2.3.7.jar
 hive-shims-common/2.3.7//hive-shims-common-2.3.7.jar
 hive-shims-scheduler/2.3.7//hive-shims-scheduler-2.3.7.jar
 hive-shims/2.3.7//hive-shims-2.3.7.jar
-hive-storage-api/2.7.1//hive-storage-api-2.7.1.jar
+hive-storage-api/2.7.2//hive-storage-api-2.7.2.jar
 hive-vector-code-gen/2.3.7//hive-vector-code-gen-2.3.7.jar
 hk2-api/2.6.1//hk2-api-2.6.1.jar
 hk2-locator/2.6.1//hk2-locator-2.6.1.jar
diff --git a/pom.xml b/pom.xml
index 873daaa6161ee..421d932cef5fa 100644
--- a/pom.xml
+++ b/pom.xml
@@ -237,7 +237,7 @@
     <hadoop.deps.scope>compile</hadoop.deps.scope>
     <hive.deps.scope>compile</hive.deps.scope>
     <hive.parquet.scope>provided</hive.parquet.scope>
-    <hive.storage.version>2.7.1</hive.storage.version>
+    <hive.storage.version>2.7.2</hive.storage.version>
     <hive.storage.scope>compile</hive.storage.scope>
     <hive.common.scope>compile</hive.common.scope>
     <hive.llap.scope>compile</hive.llap.scope>

From e62d24717eb774f1c7adfd0fbe39640b96bc661d Mon Sep 17 00:00:00 2001
From: ulysses <youxiduo@weidian.com>
Date: Thu, 1 Oct 2020 15:58:01 -0400
Subject: [PATCH 0142/1009] [SPARK-32585][SQL] Support scala enumeration in
 ScalaReflection

### What changes were proposed in this pull request?

Add code in `ScalaReflection` to support scala enumeration and make enumeration type as string type in Spark.

### Why are the changes needed?

We support java enum but failed with scala enum, it's better to keep the same behavior.

Here is a example.

```
package test

object TestEnum extends Enumeration {
  type TestEnum = Value
  val E1, E2, E3 = Value
}
import TestEnum._
case class TestClass(i: Int,  e: TestEnum) {
}

import test._
Seq(TestClass(1, TestEnum.E1)).toDS
```

Before this PR
```
Exception in thread "main" java.lang.UnsupportedOperationException: No Encoder found for test.TestEnum.TestEnum
- field (class: "scala.Enumeration.Value", name: "e")
- root class: "test.TestClass"
  at org.apache.spark.sql.catalyst.ScalaReflection$.$anonfun$serializerFor$1(ScalaReflection.scala:567)
  at scala.reflect.internal.tpe.TypeConstraints$UndoLog.undo(TypeConstraints.scala:69)
  at org.apache.spark.sql.catalyst.ScalaReflection.cleanUpReflectionObjects(ScalaReflection.scala:882)
  at org.apache.spark.sql.catalyst.ScalaReflection.cleanUpReflectionObjects$(ScalaReflection.scala:881)
```

After this PR
`org.apache.spark.sql.Dataset[test.TestClass] = [i: int, e: string]`

### Does this PR introduce _any_ user-facing change?

Yes, user can make case class which include scala enumeration field as dataset.

### How was this patch tested?

Add test.

Closes #29403 from ulysses-you/SPARK-32585.

Authored-by: ulysses <youxiduo@weidian.com>
Signed-off-by: Tathagata Das <tathagata.das1565@gmail.com>
---
 .../spark/sql/catalyst/ScalaReflection.scala  | 28 +++++++++++++++++++
 .../sql/catalyst/ScalaReflectionSuite.scala   | 15 ++++++++++
 .../encoders/ExpressionEncoderSuite.scala     | 10 ++++++-
 .../org/apache/spark/sql/DatasetSuite.scala   | 15 +++++++++-
 4 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index a9c8b0bf4df2c..c65e181181e83 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -30,6 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.objects._
 import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
+import org.apache.spark.util.Utils
 
 
 /**
@@ -377,6 +378,23 @@ object ScalaReflection extends ScalaReflection {
           expressions.Literal.create(null, ObjectType(cls)),
           newInstance
         )
+
+      case t if isSubtype(t, localTypeOf[Enumeration#Value]) =>
+        // package example
+        // object Foo extends Enumeration {
+        //  type Foo = Value
+        //  val E1, E2 = Value
+        // }
+        // the fullName of tpe is example.Foo.Foo, but we need example.Foo so that
+        // we can call example.Foo.withName to deserialize string to enumeration.
+        val parent = t.asInstanceOf[TypeRef].pre.typeSymbol.asClass
+        val cls = mirror.runtimeClass(parent)
+        StaticInvoke(
+          cls,
+          ObjectType(getClassFromType(t)),
+          "withName",
+          createDeserializerForString(path, false) :: Nil,
+          returnNullable = false)
     }
   }
 
@@ -561,6 +579,14 @@ object ScalaReflection extends ScalaReflection {
         }
         createSerializerForObject(inputObject, fields)
 
+      case t if isSubtype(t, localTypeOf[Enumeration#Value]) =>
+        createSerializerForString(
+          Invoke(
+            inputObject,
+            "toString",
+            ObjectType(classOf[java.lang.String]),
+            returnNullable = false))
+
       case _ =>
         throw new UnsupportedOperationException(
           s"No Encoder found for $tpe\n" + walkedTypePath)
@@ -738,6 +764,8 @@ object ScalaReflection extends ScalaReflection {
             val Schema(dataType, nullable) = schemaFor(fieldType)
             StructField(fieldName, dataType, nullable)
           }), nullable = true)
+      case t if isSubtype(t, localTypeOf[Enumeration#Value]) =>
+        Schema(StringType, nullable = true)
       case other =>
         throw new UnsupportedOperationException(s"Schema for type $other is not supported")
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
index b981a50499bf5..e8c7aed6d72ce 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
@@ -22,6 +22,7 @@ import java.sql.{Date, Timestamp}
 import scala.reflect.runtime.universe.TypeTag
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.FooEnum.FooEnum
 import org.apache.spark.sql.catalyst.analysis.UnresolvedExtractValue
 import org.apache.spark.sql.catalyst.expressions.{CreateNamedStruct, Expression, If, SpecificInternalRow, UpCast}
 import org.apache.spark.sql.catalyst.expressions.objects.{AssertNotNull, NewInstance}
@@ -90,6 +91,13 @@ case class FooWithAnnotation(f1: String @FooAnnotation, f2: Option[String] @FooA
 
 case class SpecialCharAsFieldData(`field.1`: String, `field 2`: String)
 
+object FooEnum extends Enumeration {
+  type FooEnum = Value
+  val E1, E2 = Value
+}
+
+case class FooClassWithEnum(i: Int, e: FooEnum)
+
 object TestingUDT {
   @SQLUserDefinedType(udt = classOf[NestedStructUDT])
   class NestedStruct(val a: Integer, val b: Long, val c: Double)
@@ -437,4 +445,11 @@ class ScalaReflectionSuite extends SparkFunSuite {
       StructField("f2", StringType))))
     assert(deserializerFor[FooWithAnnotation].dataType == ObjectType(classOf[FooWithAnnotation]))
   }
+
+  test("SPARK-32585: Support scala enumeration in ScalaReflection") {
+    assert(serializerFor[FooClassWithEnum].dataType == StructType(Seq(
+      StructField("i", IntegerType, false),
+      StructField("e", StringType, true))))
+    assert(deserializerFor[FooClassWithEnum].dataType == ObjectType(classOf[FooClassWithEnum]))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
index 6a094d4aaddae..f2598a925e08e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
@@ -25,7 +25,7 @@ import scala.collection.mutable.ArrayBuffer
 import scala.reflect.runtime.universe.TypeTag
 
 import org.apache.spark.sql.{Encoder, Encoders}
-import org.apache.spark.sql.catalyst.{OptionalData, PrimitiveData}
+import org.apache.spark.sql.catalyst.{FooClassWithEnum, FooEnum, OptionalData, PrimitiveData}
 import org.apache.spark.sql.catalyst.analysis.AnalysisTest
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions.AttributeReference
@@ -389,6 +389,14 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes
     assert(e.getMessage.contains("tuple with more than 22 elements are not supported"))
   }
 
+  encodeDecodeTest((1, FooEnum.E1), "Tuple with Int and scala Enum")
+  encodeDecodeTest((null, FooEnum.E1, FooEnum.E2), "Tuple with Null and scala Enum")
+  encodeDecodeTest(Seq(FooEnum.E1, null), "Seq with scala Enum")
+  encodeDecodeTest(Map("key" -> FooEnum.E1), "Map with String key and scala Enum")
+  encodeDecodeTest(Map(FooEnum.E1 -> "value"), "Map with scala Enum key and String value")
+  encodeDecodeTest(FooClassWithEnum(1, FooEnum.E1), "case class with Int and scala Enum")
+  encodeDecodeTest(FooEnum.E1, "scala Enum")
+
   // Scala / Java big decimals ----------------------------------------------------------
 
   encodeDecodeTest(BigDecimal(("9" * 20) + "." + "9" * 18),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 4923e8b556907..3c914ae043677 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -25,7 +25,7 @@ import org.scalatest.exceptions.TestFailedException
 import org.scalatest.prop.TableDrivenPropertyChecks._
 
 import org.apache.spark.{SparkException, TaskContext}
-import org.apache.spark.sql.catalyst.ScroogeLikeExample
+import org.apache.spark.sql.catalyst.{FooClassWithEnum, FooEnum, ScroogeLikeExample}
 import org.apache.spark.sql.catalyst.encoders.{OuterScopes, RowEncoder}
 import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi}
 import org.apache.spark.sql.catalyst.util.sideBySide
@@ -1926,6 +1926,19 @@ class DatasetSuite extends QueryTest
       }
     }
   }
+
+  test("SPARK-32585: Support scala enumeration in ScalaReflection") {
+    checkDataset(
+      Seq(FooClassWithEnum(1, FooEnum.E1), FooClassWithEnum(2, FooEnum.E2)).toDS(),
+      Seq(FooClassWithEnum(1, FooEnum.E1), FooClassWithEnum(2, FooEnum.E2)): _*
+    )
+
+    // test null
+    checkDataset(
+      Seq(FooClassWithEnum(1, null), FooClassWithEnum(2, FooEnum.E2)).toDS(),
+      Seq(FooClassWithEnum(1, null), FooClassWithEnum(2, FooEnum.E2)): _*
+    )
+  }
 }
 
 object AssertExecutionId {

From 005999721f103bce653c39a0001cba7f2e04b7c8 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Thu, 1 Oct 2020 18:01:23 -0500
Subject: [PATCH 0143/1009] [SPARK-33046][DOCS] Update how to build doc for
 Scala 2.13 with sbt

### What changes were proposed in this pull request?

This PR fixes the description how to build Spark for Scala 2.13 with sbt.
In the current doc, how to build Spark for Scala 2.13 with sbt is described like:
![scala-2 13-build-before](https://user-images.githubusercontent.com/4736016/94816248-80c3e900-0436-11eb-9bc2-99af5786971a.png)

But build fails with this command because scala-2.13 profile is not enabled and scala-parallel-collections is absent.

```
[error] /home/kou/work/oss/spark-scala-2.13/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala:23: object parallel is not a member of package collection
```

The correct command should be:
```
build/sbt -Pspark-2.13 compile
```

### Why are the changes needed?

The build command is wrong.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

I checked that `sbt -Pspark-2.13` is correct with the following command:
```
build/sbt -Dscala.version=2.13.3 -Phive -Phive-thriftserver -Pyarn -Pkubernetes  compile
```

I also build the modified doc and checked the generated html:
![spark-scala-2 13-build-doc-after](https://user-images.githubusercontent.com/4736016/94869259-f2745500-047f-11eb-89e5-20816f3ed24d.png)

Closes #29921 from sarutak/fix-scala-2.13-build-doc.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 docs/building-spark.md | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/docs/building-spark.md b/docs/building-spark.md
index 3d12a60e2b974..73c527b7a5ed6 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -265,15 +265,13 @@ Change the major Scala version using (e.g. 2.13):
 
     ./dev/change-scala-version.sh 2.13
 
-For Maven, please enable the profile (e.g. 2.13):
+Enable the profile (e.g. 2.13):
 
+    # For Maven
     ./build/mvn -Pscala-2.13 compile
 
-For SBT, specify a complete scala version using (e.g. 2.13.0):
-
-    ./build/sbt -Dscala.version=2.13.0
-
-Otherwise, the sbt-pom-reader plugin will use the `scala.version` specified in the spark-parent pom.
+    # For sbt
+    ./build/sbt -Pscala-2.13 compile
 
 ## Running Jenkins tests with Github Enterprise
 

From 8657742ec7570c8292ed45629fc61b9791f28796 Mon Sep 17 00:00:00 2001
From: Shruti Gumma <shruti_gumma@apple.com>
Date: Thu, 1 Oct 2020 16:33:19 -0700
Subject: [PATCH 0144/1009] [SPARK-32996][WEB-UI][FOLLOWUP] Move
 ExecutorSummarySuite to proper path

### What changes were proposed in this pull request?

This  change updates the test file location in #29872 to proper path.

### Why are the changes needed?

ExecutorSummarySuite.scala should be in core/src/test/scala instead of core/src/test/java.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Unit tests

Closes #29926 from shrutig/SPARK-32996.

Authored-by: Shruti Gumma <shruti_gumma@apple.com>
Signed-off-by: Liang-Chi Hsieh <viirya@gmail.com>
---
 .../org/apache/spark/status/api/v1/ExecutorSummarySuite.scala     | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename core/src/test/{java => scala}/org/apache/spark/status/api/v1/ExecutorSummarySuite.scala (100%)

diff --git a/core/src/test/java/org/apache/spark/status/api/v1/ExecutorSummarySuite.scala b/core/src/test/scala/org/apache/spark/status/api/v1/ExecutorSummarySuite.scala
similarity index 100%
rename from core/src/test/java/org/apache/spark/status/api/v1/ExecutorSummarySuite.scala
rename to core/src/test/scala/org/apache/spark/status/api/v1/ExecutorSummarySuite.scala

From d6f3138352042e33a2291e11c325b8eadb8dd5f2 Mon Sep 17 00:00:00 2001
From: Cheng Su <chengsu@fb.com>
Date: Fri, 2 Oct 2020 09:01:15 +0900
Subject: [PATCH 0145/1009] [SPARK-32859][SQL] Introduce physical rule to
 decide bucketing dynamically

### What changes were proposed in this pull request?

This PR is to add support to decide bucketed table scan dynamically based on actual query plan. Currently bucketing is enabled by default (`spark.sql.sources.bucketing.enabled`=true), so for all bucketed tables in the query plan, we will use bucket table scan (all input files per the bucket will be read by same task). This has the drawback that if the bucket table scan is not benefitting at all (no join/groupby/etc in the query), we don't need to use bucket table scan as it would restrict the # of tasks to be # of buckets and might hurt parallelism.

The feature is to add a physical plan rule right after `EnsureRequirements`:

The rule goes through plan nodes. For all operators which has "interesting partition" (i.e., require `ClusteredDistribution` or `HashClusteredDistribution`), check if the sub-plan for operator has `Exchange` and bucketed table scan (and only allow certain operators in plan (i.e. `Scan/Filter/Project/Sort/PartialAgg/etc`.), see details in `DisableUnnecessaryBucketedScan.disableBucketWithInterestingPartition`). If yes, disable the bucketed table scan in the sub-plan. In addition, disabling bucketed table scan if there's operator with interesting partition along the sub-plan.

Why the algorithm works is that if there's a shuffle between the bucketed table scan and operator with interesting partition, then bucketed table scan partitioning will be destroyed by the shuffle operator in the middle, and we don't need bucketed table scan for sure.

The idea of "interesting partition" is inspired from "interesting order" in "Access Path Selection in a Relational Database Management System"(http://www.inf.ed.ac.uk/teaching/courses/adbs/AccessPath.pdf), after discussion with cloud-fan .

### Why are the changes needed?

To avoid unnecessary bucketed scan in the query, and this is prerequisite for https://github.com/apache/spark/pull/29625 (decide bucketed sorted scan dynamically will be added later in that PR).

### Does this PR introduce _any_ user-facing change?

A new config `spark.sql.sources.bucketing.autoBucketedScan.enabled` is introduced which set to false by default (the rule is disabled by default as it can regress cached bucketed table query, see discussion in https://github.com/apache/spark/pull/29804#issuecomment-701151447). User can opt-in/opt-out by enabling/disabling the config, as we found in prod, some users rely on assumption of # of tasks == # of buckets when reading bucket table to precisely control # of tasks. This is a bad assumption but it does happen on our side, so leave a config here to allow them opt-out for the feature.

### How was this patch tested?

Added unit tests in `DisableUnnecessaryBucketedScanSuite.scala`

Closes #29804 from c21/bucket-rule.

Authored-by: Cheng Su <chengsu@fb.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../optimizer/CostBasedJoinReorder.scala      |   2 +-
 .../apache/spark/sql/internal/SQLConf.scala   |  13 ++
 .../sql/execution/DataSourceScanExec.scala    |  37 +--
 .../spark/sql/execution/QueryExecution.scala  |   3 +-
 .../DisableUnnecessaryBucketedScan.scala      | 161 +++++++++++++
 .../apache/spark/sql/DataFrameJoinSuite.scala |   2 +-
 .../org/apache/spark/sql/SubquerySuite.scala  |   2 +-
 .../DisableUnnecessaryBucketedScanSuite.scala | 221 ++++++++++++++++++
 ...saryBucketedScanWithHiveSupportSuite.scala |  31 +++
 9 files changed, 454 insertions(+), 18 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanWithHiveSupportSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
index 8b019f35263f3..45541051a6b13 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
@@ -114,7 +114,7 @@ case class OrderedJoin(
 /**
  * Reorder the joins using a dynamic programming algorithm. This implementation is based on the
  * paper: Access Path Selection in a Relational Database Management System.
- * http://www.inf.ed.ac.uk/teaching/courses/adbs/AccessPath.pdf
+ * https://dl.acm.org/doi/10.1145/582095.582099
  *
  * First we put all items (basic joined nodes) into level 0, then we build all two-way joins
  * at level 1 from plans at level 0 (single items), then build all 3-way joins from plans
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 0d1a3e365c918..18ffc655b2174 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -951,6 +951,17 @@ object SQLConf {
     .checkValue(_ > 0, "the value of spark.sql.sources.bucketing.maxBuckets must be greater than 0")
     .createWithDefault(100000)
 
+  val AUTO_BUCKETED_SCAN_ENABLED =
+    buildConf("spark.sql.sources.bucketing.autoBucketedScan.enabled")
+      .doc("When true, decide whether to do bucketed scan on input tables based on query plan " +
+        "automatically. Do not use bucketed scan if 1. query does not have operators to utilize " +
+        "bucketing (e.g. join, group-by, etc), or 2. there's an exchange operator between these " +
+        s"operators and table scan. Note when '${BUCKETING_ENABLED.key}' is set to " +
+        "false, this configuration does not take any effect.")
+      .version("3.1.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val CROSS_JOINS_ENABLED = buildConf("spark.sql.crossJoin.enabled")
     .internal()
     .doc("When false, we will throw an error if a query contains a cartesian product without " +
@@ -3164,6 +3175,8 @@ class SQLConf extends Serializable with Logging {
 
   def bucketingMaxBuckets: Int = getConf(SQLConf.BUCKETING_MAX_BUCKETS)
 
+  def autoBucketedScanEnabled: Boolean = getConf(SQLConf.AUTO_BUCKETED_SCAN_ENABLED)
+
   def dataFrameSelfJoinAutoResolveAmbiguity: Boolean =
     getConf(DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index 1b9ca63ea21d3..45d28ddb42fc3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -156,7 +156,9 @@ case class RowDataSourceScanExec(
  * @param optionalBucketSet Bucket ids for bucket pruning.
  * @param optionalNumCoalescedBuckets Number of coalesced buckets.
  * @param dataFilters Filters on non-partition columns.
- * @param tableIdentifier identifier for the table in the metastore.
+ * @param tableIdentifier Identifier for the table in the metastore.
+ * @param disableBucketedScan Disable bucketed scan based on physical query plan, see rule
+ *                            [[DisableUnnecessaryBucketedScan]] for details.
  */
 case class FileSourceScanExec(
     @transient relation: HadoopFsRelation,
@@ -166,7 +168,8 @@ case class FileSourceScanExec(
     optionalBucketSet: Option[BitSet],
     optionalNumCoalescedBuckets: Option[Int],
     dataFilters: Seq[Expression],
-    tableIdentifier: Option[TableIdentifier])
+    tableIdentifier: Option[TableIdentifier],
+    disableBucketedScan: Boolean = false)
   extends DataSourceScanExec {
 
   // Note that some vals referring the file-based relation are lazy intentionally
@@ -257,7 +260,8 @@ case class FileSourceScanExec(
 
   // exposed for testing
   lazy val bucketedScan: Boolean = {
-    if (relation.sparkSession.sessionState.conf.bucketingEnabled && relation.bucketSpec.isDefined) {
+    if (relation.sparkSession.sessionState.conf.bucketingEnabled && relation.bucketSpec.isDefined
+      && !disableBucketedScan) {
       val spec = relation.bucketSpec.get
       val bucketColumns = spec.bucketColumnNames.flatMap(n => toAttribute(n))
       bucketColumns.size == spec.bucketColumnNames.size
@@ -348,20 +352,23 @@ case class FileSourceScanExec(
         "DataFilters" -> seqToString(dataFilters),
         "Location" -> locationDesc)
 
-    val withSelectedBucketsCount = relation.bucketSpec.map { spec =>
-      val numSelectedBuckets = optionalBucketSet.map { b =>
-        b.cardinality()
+    // TODO(SPARK-32986): Add bucketed scan info in explain output of FileSourceScanExec
+    if (bucketedScan) {
+      relation.bucketSpec.map { spec =>
+        val numSelectedBuckets = optionalBucketSet.map { b =>
+          b.cardinality()
+        } getOrElse {
+          spec.numBuckets
+        }
+        metadata + ("SelectedBucketsCount" ->
+          (s"$numSelectedBuckets out of ${spec.numBuckets}" +
+            optionalNumCoalescedBuckets.map { b => s" (Coalesced to $b)"}.getOrElse("")))
       } getOrElse {
-        spec.numBuckets
+        metadata
       }
-      metadata + ("SelectedBucketsCount" ->
-        (s"$numSelectedBuckets out of ${spec.numBuckets}" +
-          optionalNumCoalescedBuckets.map { b => s" (Coalesced to $b)"}.getOrElse("")))
-    } getOrElse {
+    } else {
       metadata
     }
-
-    withSelectedBucketsCount
   }
 
   override def verboseStringWithOperatorId(): String = {
@@ -539,6 +546,7 @@ case class FileSourceScanExec(
           .getOrElse(sys.error(s"Invalid bucket file ${f.filePath}"))
       }
 
+    // TODO(SPARK-32985): Decouple bucket filter pruning and bucketed table scan
     val prunedFilesGroupedToBuckets = if (optionalBucketSet.isDefined) {
       val bucketSet = optionalBucketSet.get
       filesGroupedToBuckets.filter {
@@ -624,6 +632,7 @@ case class FileSourceScanExec(
       optionalBucketSet,
       optionalNumCoalescedBuckets,
       QueryPlan.normalizePredicates(dataFilters, output),
-      None)
+      None,
+      disableBucketedScan)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index dca2c5b16e8d5..a056500fa361a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.rules.{PlanChangeLogger, Rule}
 import org.apache.spark.sql.catalyst.util.StringUtils.PlanStringConcat
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.adaptive.{AdaptiveExecutionContext, InsertAdaptiveSparkPlan}
-import org.apache.spark.sql.execution.bucketing.CoalesceBucketsInJoin
+import org.apache.spark.sql.execution.bucketing.{CoalesceBucketsInJoin, DisableUnnecessaryBucketedScan}
 import org.apache.spark.sql.execution.dynamicpruning.PlanDynamicPruningFilters
 import org.apache.spark.sql.execution.exchange.{EnsureRequirements, ReuseExchange}
 import org.apache.spark.sql.execution.streaming.{IncrementalExecution, OffsetSeqMetadata}
@@ -344,6 +344,7 @@ object QueryExecution {
       PlanSubqueries(sparkSession),
       RemoveRedundantProjects(sparkSession.sessionState.conf),
       EnsureRequirements(sparkSession.sessionState.conf),
+      DisableUnnecessaryBucketedScan(sparkSession.sessionState.conf),
       ApplyColumnarRulesAndInsertTransitions(sparkSession.sessionState.conf,
         sparkSession.sessionState.columnarRules),
       CollapseCodegenStages(sparkSession.sessionState.conf),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala
new file mode 100644
index 0000000000000..9b4f898df00b6
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.bucketing
+
+import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, HashClusteredDistribution}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.execution.{FileSourceScanExec, FilterExec, ProjectExec, SortExec, SparkPlan}
+import org.apache.spark.sql.execution.aggregate.BaseAggregateExec
+import org.apache.spark.sql.execution.exchange.Exchange
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * Disable unnecessary bucketed table scan based on actual physical query plan.
+ * NOTE: this rule is designed to be applied right after [[EnsureRequirements]],
+ * where all [[ShuffleExchangeExec]] and [[SortExec]] have been added to plan properly.
+ *
+ * When BUCKETING_ENABLED and AUTO_BUCKETED_SCAN_ENABLED are set to true, go through
+ * query plan to check where bucketed table scan is unnecessary, and disable bucketed table
+ * scan if:
+ *
+ * 1. The sub-plan from root to bucketed table scan, does not contain
+ *    [[hasInterestingPartition]] operator.
+ *
+ * 2. The sub-plan from the nearest downstream [[hasInterestingPartition]] operator
+ *    to the bucketed table scan, contains only [[isAllowedUnaryExecNode]] operators
+ *    and at least one [[Exchange]].
+ *
+ * Examples:
+ * 1. no [[hasInterestingPartition]] operator:
+ *                Project
+ *                   |
+ *                 Filter
+ *                   |
+ *             Scan(t1: i, j)
+ *  (bucketed on column j, DISABLE bucketed scan)
+ *
+ * 2. join:
+ *         SortMergeJoin(t1.i = t2.j)
+ *            /            \
+ *        Sort(i)        Sort(j)
+ *          /               \
+ *      Shuffle(i)       Scan(t2: i, j)
+ *        /         (bucketed on column j, enable bucketed scan)
+ *   Scan(t1: i, j)
+ * (bucketed on column j, DISABLE bucketed scan)
+ *
+ * 3. aggregate:
+ *         HashAggregate(i, ..., Final)
+ *                      |
+ *                  Shuffle(i)
+ *                      |
+ *         HashAggregate(i, ..., Partial)
+ *                      |
+ *                    Filter
+ *                      |
+ *                  Scan(t1: i, j)
+ *  (bucketed on column j, DISABLE bucketed scan)
+ *
+ * The idea of [[hasInterestingPartition]] is inspired from "interesting order" in
+ * the paper "Access Path Selection in a Relational Database Management System"
+ * (https://dl.acm.org/doi/10.1145/582095.582099).
+ */
+case class DisableUnnecessaryBucketedScan(conf: SQLConf) extends Rule[SparkPlan] {
+
+  /**
+   * Disable bucketed table scan with pre-order traversal of plan.
+   *
+   * @param withInterestingPartition The traversed plan has operator with interesting partition.
+   * @param withExchange The traversed plan has [[Exchange]] operator.
+   * @param withAllowedNode The traversed plan has only [[isAllowedUnaryExecNode]] operators.
+   */
+  private def disableBucketWithInterestingPartition(
+      plan: SparkPlan,
+      withInterestingPartition: Boolean,
+      withExchange: Boolean,
+      withAllowedNode: Boolean): SparkPlan = {
+    plan match {
+      case p if hasInterestingPartition(p) =>
+        // Operator with interesting partition, propagates `withInterestingPartition` as true
+        // to its children, and resets `withExchange` and `withAllowedNode`.
+        p.mapChildren(disableBucketWithInterestingPartition(_, true, false, true))
+      case exchange: Exchange =>
+        // Exchange operator propagates `withExchange` as true to its child.
+        exchange.mapChildren(disableBucketWithInterestingPartition(
+          _, withInterestingPartition, true, withAllowedNode))
+      case scan: FileSourceScanExec =>
+        if (isBucketedScanWithoutFilter(scan)) {
+          if (!withInterestingPartition || (withExchange && withAllowedNode)) {
+            scan.copy(disableBucketedScan = true)
+          } else {
+            scan
+          }
+        } else {
+          scan
+        }
+      case o =>
+        o.mapChildren(disableBucketWithInterestingPartition(
+          _,
+          withInterestingPartition,
+          withExchange,
+          withAllowedNode && isAllowedUnaryExecNode(o)))
+    }
+  }
+
+  private def hasInterestingPartition(plan: SparkPlan): Boolean = {
+    plan.requiredChildDistribution.exists {
+      case _: ClusteredDistribution | _: HashClusteredDistribution => true
+      case _ => false
+    }
+  }
+
+  /**
+   * Check if the operator is allowed single-child operator.
+   * We may revisit this method later as we probably can
+   * remove this restriction to allow arbitrary operator between
+   * bucketed table scan and operator with interesting partition.
+   */
+  private def isAllowedUnaryExecNode(plan: SparkPlan): Boolean = {
+    plan match {
+      case _: SortExec | _: ProjectExec | _: FilterExec => true
+      case partialAgg: BaseAggregateExec =>
+        partialAgg.requiredChildDistributionExpressions.isEmpty
+      case _ => false
+    }
+  }
+
+  private def isBucketedScanWithoutFilter(scan: FileSourceScanExec): Boolean = {
+    // Do not disable bucketed table scan if it has filter pruning,
+    // because bucketed table scan is still useful here to save CPU/IO cost with
+    // only reading selected bucket files.
+    scan.bucketedScan && scan.optionalBucketSet.isEmpty
+  }
+
+  def apply(plan: SparkPlan): SparkPlan = {
+    lazy val hasBucketedScanWithoutFilter = plan.find {
+      case scan: FileSourceScanExec => isBucketedScanWithoutFilter(scan)
+      case _ => false
+    }.isDefined
+
+    if (!conf.bucketingEnabled || !conf.autoBucketedScanEnabled || !hasBucketedScanWithoutFilter) {
+      plan
+    } else {
+      disableBucketWithInterestingPartition(plan, false, false, true)
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
index b463a76a74026..14d03a30453ac 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -348,7 +348,7 @@ class DataFrameJoinSuite extends QueryTest
             }
             assert(broadcastExchanges.size == 1)
             val tables = broadcastExchanges.head.collect {
-              case FileSourceScanExec(_, _, _, _, _, _, _, Some(tableIdent)) => tableIdent
+              case FileSourceScanExec(_, _, _, _, _, _, _, Some(tableIdent), _) => tableIdent
             }
             assert(tables.size == 1)
             assert(tables.head === TableIdentifier(table1Name, Some(dbName)))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index a21c461e84588..73b23496de515 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -1314,7 +1314,7 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       // need to execute the query before we can examine fs.inputRDDs()
       assert(stripAQEPlan(df.queryExecution.executedPlan) match {
         case WholeStageCodegenExec(ColumnarToRowExec(InputAdapter(
-            fs @ FileSourceScanExec(_, _, _, partitionFilters, _, _, _, _)))) =>
+            fs @ FileSourceScanExec(_, _, _, partitionFilters, _, _, _, _, _)))) =>
           partitionFilters.exists(ExecSubqueryExpression.hasSubquery) &&
             fs.inputRDDs().forall(
               _.asInstanceOf[FileScanRDD].filePartitions.forall(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala
new file mode 100644
index 0000000000000..1c258bc0dadb9
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala
@@ -0,0 +1,221 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.execution.FileSourceScanExec
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
+import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
+
+class DisableUnnecessaryBucketedScanWithoutHiveSupportSuite
+  extends DisableUnnecessaryBucketedScanSuite
+  with SharedSparkSession {
+
+  protected override def beforeAll(): Unit = {
+    super.beforeAll()
+    assert(spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "in-memory")
+  }
+}
+
+abstract class DisableUnnecessaryBucketedScanSuite extends QueryTest with SQLTestUtils {
+  import testImplicits._
+
+  private lazy val df1 =
+    (0 until 50).map(i => (i % 5, i % 13, i.toString)).toDF("i", "j", "k").as("df1")
+  private lazy val df2 =
+    (0 until 50).map(i => (i % 7, i % 11, i.toString)).toDF("i", "j", "k").as("df2")
+
+  private def checkDisableBucketedScan(
+      query: String,
+      expectedNumScanWithAutoScanEnabled: Int,
+      expectedNumScanWithAutoScanDisabled: Int): Unit = {
+
+    def checkNumBucketedScan(query: String, expectedNumBucketedScan: Int): Unit = {
+      val plan = sql(query).queryExecution.executedPlan
+      val bucketedScan = plan.collect { case s: FileSourceScanExec if s.bucketedScan => s }
+      assert(bucketedScan.length == expectedNumBucketedScan)
+    }
+
+    withSQLConf(SQLConf.AUTO_BUCKETED_SCAN_ENABLED.key -> "true") {
+      checkNumBucketedScan(query, expectedNumScanWithAutoScanEnabled)
+      val result = sql(query).collect()
+
+      withSQLConf(SQLConf.AUTO_BUCKETED_SCAN_ENABLED.key -> "false") {
+        checkNumBucketedScan(query, expectedNumScanWithAutoScanDisabled)
+        checkAnswer(sql(query), result)
+      }
+    }
+  }
+
+  test("SPARK-32859: disable unnecessary bucketed table scan - basic test") {
+    withTable("t1", "t2", "t3") {
+      df1.write.format("parquet").bucketBy(8, "i").saveAsTable("t1")
+      df2.write.format("parquet").bucketBy(8, "i").saveAsTable("t2")
+      df2.write.format("parquet").bucketBy(4, "i").saveAsTable("t3")
+
+      Seq(
+        // Read bucketed table
+        ("SELECT * FROM t1", 0, 1),
+        ("SELECT i FROM t1", 0, 1),
+        ("SELECT j FROM t1", 0, 0),
+        // Filter on bucketed column
+        ("SELECT * FROM t1 WHERE i = 1", 1, 1),
+        // Filter on non-bucketed column
+        ("SELECT * FROM t1 WHERE j = 1", 0, 1),
+        // Join with same buckets
+        ("SELECT /*+ broadcast(t1)*/ * FROM t1 JOIN t2 ON t1.i = t2.i", 0, 2),
+        ("SELECT /*+ shuffle_hash(t1)*/ * FROM t1 JOIN t2 ON t1.i = t2.i", 2, 2),
+        ("SELECT /*+ merge(t1)*/ * FROM t1 JOIN t2 ON t1.i = t2.i", 2, 2),
+        // Join with different buckets
+        ("SELECT /*+ broadcast(t1)*/ * FROM t1 JOIN t3 ON t1.i = t3.i", 0, 2),
+        ("SELECT /*+ shuffle_hash(t1)*/ * FROM t1 JOIN t3 ON t1.i = t3.i", 1, 2),
+        ("SELECT /*+ merge(t1)*/ * FROM t1 JOIN t3 ON t1.i = t3.i", 1, 2),
+        // Join on non-bucketed column
+        ("SELECT /*+ broadcast(t1)*/ * FROM t1 JOIN t2 ON t1.i = t2.j", 0, 2),
+        ("SELECT /*+ shuffle_hash(t1)*/ * FROM t1 JOIN t2 ON t1.i = t2.j", 1, 2),
+        ("SELECT /*+ merge(t1)*/ * FROM t1 JOIN t2 ON t1.i = t2.j", 1, 2),
+        ("SELECT /*+ broadcast(t1)*/ * FROM t1 JOIN t2 ON t1.j = t2.j", 0, 2),
+        ("SELECT /*+ shuffle_hash(t1)*/ * FROM t1 JOIN t2 ON t1.j = t2.j", 0, 2),
+        ("SELECT /*+ merge(t1)*/ * FROM t1 JOIN t2 ON t1.j = t2.j", 0, 2),
+        // Aggregate on bucketed column
+        ("SELECT SUM(i) FROM t1 GROUP BY i", 1, 1),
+        // Aggregate on non-bucketed column
+        ("SELECT SUM(i) FROM t1 GROUP BY j", 0, 1),
+        ("SELECT j, SUM(i), COUNT(j) FROM t1 GROUP BY j", 0, 1)
+      ).foreach { case (query, numScanWithAutoScanEnabled, numScanWithAutoScanDisabled) =>
+        checkDisableBucketedScan(query, numScanWithAutoScanEnabled, numScanWithAutoScanDisabled)
+      }
+    }
+  }
+
+  test("SPARK-32859: disable unnecessary bucketed table scan - multiple joins test") {
+    withTable("t1", "t2", "t3") {
+      df1.write.format("parquet").bucketBy(8, "i").saveAsTable("t1")
+      df2.write.format("parquet").bucketBy(8, "i").saveAsTable("t2")
+      df2.write.format("parquet").bucketBy(4, "i").saveAsTable("t3")
+
+      Seq(
+        // Multiple joins on bucketed columns
+        ("""
+         SELECT /*+ broadcast(t1, t3)*/ * FROM t1 JOIN t2 JOIN t3
+         ON t1.i = t2.i AND t2.i = t3.i
+         """.stripMargin, 0, 3),
+        ("""
+         SELECT /*+ broadcast(t1) merge(t3)*/ * FROM t1 JOIN t2 JOIN t3
+         ON t1.i = t2.i AND t2.i = t3.i
+         """.stripMargin, 2, 3),
+        ("""
+         SELECT /*+ merge(t1) broadcast(t3)*/ * FROM t1 JOIN t2 JOIN t3
+         ON t1.i = t2.i AND t2.i = t3.i
+         """.stripMargin, 2, 3),
+        ("""
+         SELECT /*+ merge(t1, t3)*/ * FROM t1 JOIN t2 JOIN t3
+         ON t1.i = t2.i AND t2.i = t3.i
+         """.stripMargin, 2, 3),
+        // Multiple joins on non-bucketed columns
+        ("""
+         SELECT /*+ broadcast(t1, t3)*/ * FROM t1 JOIN t2 JOIN t3
+         ON t1.i = t2.j AND t2.j = t3.i
+         """.stripMargin, 0, 3),
+        ("""
+         SELECT /*+ merge(t1, t3)*/ * FROM t1 JOIN t2 JOIN t3
+         ON t1.i = t2.j AND t2.j = t3.i
+         """.stripMargin, 1, 3),
+        ("""
+         SELECT /*+ merge(t1, t3)*/ * FROM t1 JOIN t2 JOIN t3
+         ON t1.j = t2.j AND t2.j = t3.j
+         """.stripMargin, 0, 3)
+      ).foreach { case (query, numScanWithAutoScanEnabled, numScanWithAutoScanDisabled) =>
+        checkDisableBucketedScan(query, numScanWithAutoScanEnabled, numScanWithAutoScanDisabled)
+      }
+    }
+  }
+
+  test("SPARK-32859: disable unnecessary bucketed table scan - multiple bucketed columns test") {
+    withTable("t1", "t2", "t3") {
+      df1.write.format("parquet").bucketBy(8, "i", "j").saveAsTable("t1")
+      df2.write.format("parquet").bucketBy(8, "i", "j").saveAsTable("t2")
+      df2.write.format("parquet").bucketBy(4, "i", "j").saveAsTable("t3")
+
+      Seq(
+        // Filter on bucketed columns
+        ("SELECT * FROM t1 WHERE i = 1", 0, 1),
+        ("SELECT * FROM t1 WHERE i = 1 AND j = 1", 0, 1),
+        // Join on bucketed columns
+        ("""
+         SELECT /*+ broadcast(t1)*/ * FROM t1 JOIN t2 ON t1.i = t2.i AND t1.j = t2.j
+         """.stripMargin, 0, 2),
+        ("""
+         SELECT /*+ merge(t1)*/ * FROM t1 JOIN t2 ON t1.i = t2.i AND t1.j = t2.j
+         """.stripMargin, 2, 2),
+        ("""
+         SELECT /*+ merge(t1)*/ * FROM t1 JOIN t3 ON t1.i = t3.i AND t1.j = t3.j
+         """.stripMargin, 1, 2),
+        ("SELECT /*+ merge(t1)*/ * FROM t1 JOIN t2 ON t1.i = t2.i", 0, 2),
+        // Aggregate on bucketed columns
+        ("SELECT i, j, COUNT(*) FROM t1 GROUP BY i, j", 1, 1),
+        ("SELECT i, COUNT(i) FROM t1 GROUP BY i", 0, 0),
+        ("SELECT i, COUNT(j) FROM t1 GROUP BY i", 0, 1)
+      ).foreach { case (query, numScanWithAutoScanEnabled, numScanWithAutoScanDisabled) =>
+        checkDisableBucketedScan(query, numScanWithAutoScanEnabled, numScanWithAutoScanDisabled)
+      }
+    }
+  }
+
+  test("SPARK-32859: disable unnecessary bucketed table scan - other operators test") {
+    withTable("t1", "t2", "t3") {
+      df1.write.format("parquet").bucketBy(8, "i").saveAsTable("t1")
+      df2.write.format("parquet").bucketBy(8, "i").saveAsTable("t2")
+      df1.write.format("parquet").saveAsTable("t3")
+
+      Seq(
+        // Operator with interesting partition not in sub-plan
+        ("""
+         SELECT t1.i FROM t1
+         UNION ALL
+         (SELECT t2.i FROM t2 GROUP BY t2.i)
+         """.stripMargin, 1, 2),
+        // Non-allowed operator in sub-plan
+        ("""
+         SELECT COUNT(*)
+         FROM (SELECT t1.i FROM t1 UNION ALL SELECT t2.i FROM t2)
+         GROUP BY i
+         """.stripMargin, 2, 2),
+        // Multiple [[Exchange]] in sub-plan
+        ("""
+         SELECT j, SUM(i), COUNT(*) FROM t1 GROUP BY j
+         DISTRIBUTE BY j
+         """.stripMargin, 0, 1),
+        ("""
+         SELECT j, COUNT(*)
+         FROM (SELECT i, j FROM t1 DISTRIBUTE BY i, j)
+         GROUP BY j
+         """.stripMargin, 0, 1),
+        // No bucketed table scan in plan
+        ("""
+         SELECT j, COUNT(*)
+         FROM (SELECT t1.j FROM t1 JOIN t3 ON t1.j = t3.j)
+         GROUP BY j
+         """.stripMargin, 0, 0)
+      ).foreach { case (query, numScanWithAutoScanEnabled, numScanWithAutoScanDisabled) =>
+        checkDisableBucketedScan(query, numScanWithAutoScanEnabled, numScanWithAutoScanDisabled)
+      }
+    }
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanWithHiveSupportSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanWithHiveSupportSuite.scala
new file mode 100644
index 0000000000000..30eb93cb5c3e8
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanWithHiveSupportSuite.scala
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.sources
+
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
+
+class DisableUnnecessaryBucketedScanWithHiveSupportSuite
+  extends DisableUnnecessaryBucketedScanSuite
+  with TestHiveSingleton {
+
+  protected override def beforeAll(): Unit = {
+    super.beforeAll()
+    assert(spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "hive")
+  }
+}

From 991f7e81d46820f6e097fcf92c025689b677491f Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Fri, 2 Oct 2020 13:04:40 +0900
Subject: [PATCH 0146/1009] [SPARK-32001][SQL] Create JDBC authentication
 provider developer API

### What changes were proposed in this pull request?
At the moment only the baked in JDBC connection providers can be used but there is a need to support additional databases and use-cases. In this PR I'm proposing a new developer API name `JdbcConnectionProvider`. To show how an external JDBC connection provider can be implemented I've created an example [here](https://github.com/gaborgsomogyi/spark-jdbc-connection-provider).

The PR contains the following changes:
* Added connection provider developer API
* Made JDBC connection providers constructor to noarg => needed to load them w/ service loader
* Connection providers are now loaded w/ service loader
* Added tests to load providers independently
* Moved `SecurityConfigurationLock` into a central place because other areas will change global JVM security config

### Why are the changes needed?
No custom authentication possibility.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
* Existing + additional unit tests
* Docker integration tests
* Tested manually the newly created external JDBC connection provider

Closes #29024 from gaborgsomogyi/SPARK-32001.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../security/SecurityConfigurationLock.scala  | 24 ++++++
 .../sql/jdbc/DB2KrbIntegrationSuite.scala     |  2 +-
 ...ache.spark.sql.jdbc.JdbcConnectionProvider |  6 ++
 .../datasources/jdbc/JDBCOptions.scala        |  4 +-
 .../datasources/jdbc/JdbcUtils.scala          |  2 +-
 .../connection/BasicConnectionProvider.scala  | 30 +++++--
 .../jdbc/connection/ConnectionProvider.scala  | 83 ++++++++-----------
 .../connection/DB2ConnectionProvider.scala    | 39 +++++----
 .../connection/MSSQLConnectionProvider.scala  | 41 +++++----
 .../MariaDBConnectionProvider.scala           | 18 ++--
 .../connection/OracleConnectionProvider.scala | 39 +++++----
 .../PostgresConnectionProvider.scala          | 17 ++--
 .../connection/SecureConnectionProvider.scala | 42 ++++++----
 .../sql/jdbc/JdbcConnectionProvider.scala     | 58 +++++++++++++
 ...ache.spark.sql.jdbc.JdbcConnectionProvider |  1 +
 .../connection/ConnectionProviderSuite.scala  | 39 ++++++---
 .../ConnectionProviderSuiteBase.scala         | 17 ++--
 .../DB2ConnectionProviderSuite.scala          |  6 +-
 ...ntentionallyFaultyConnectionProvider.scala | 33 ++++++++
 .../MSSQLConnectionProviderSuite.scala        | 42 +++++-----
 .../MariaDBConnectionProviderSuite.scala      |  6 +-
 .../OracleConnectionProviderSuite.scala       |  7 +-
 .../PostgresConnectionProviderSuite.scala     | 16 ++--
 23 files changed, 355 insertions(+), 217 deletions(-)
 create mode 100644 core/src/main/scala/org/apache/spark/security/SecurityConfigurationLock.scala
 create mode 100644 sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcConnectionProvider.scala
 create mode 100644 sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/IntentionallyFaultyConnectionProvider.scala

diff --git a/core/src/main/scala/org/apache/spark/security/SecurityConfigurationLock.scala b/core/src/main/scala/org/apache/spark/security/SecurityConfigurationLock.scala
new file mode 100644
index 0000000000000..0741a8c1580df
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/security/SecurityConfigurationLock.scala
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.security
+
+/**
+ * There are cases when global JVM security configuration must be modified.
+ * In order to avoid race the modification must be synchronized with this.
+ */
+object SecurityConfigurationLock
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala
index fc88985cf2ec7..fa5ce2d106a10 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala
@@ -54,7 +54,7 @@ class DB2KrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite {
         JDBCOptions.JDBC_KEYTAB -> keytabFileName,
         JDBCOptions.JDBC_PRINCIPAL -> principal
       ))
-      new DB2ConnectionProvider(null, options).getAdditionalProperties()
+      new DB2ConnectionProvider().getAdditionalProperties(options)
     }
 
     override def beforeContainerStart(
diff --git a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider
new file mode 100644
index 0000000000000..6e42517a6d40c
--- /dev/null
+++ b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider
@@ -0,0 +1,6 @@
+org.apache.spark.sql.execution.datasources.jdbc.connection.BasicConnectionProvider
+org.apache.spark.sql.execution.datasources.jdbc.connection.DB2ConnectionProvider
+org.apache.spark.sql.execution.datasources.jdbc.connection.MariaDBConnectionProvider
+org.apache.spark.sql.execution.datasources.jdbc.connection.MSSQLConnectionProvider
+org.apache.spark.sql.execution.datasources.jdbc.connection.PostgresConnectionProvider
+org.apache.spark.sql.execution.datasources.jdbc.connection.OracleConnectionProvider
\ No newline at end of file
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
index 9e0438c0016bd..e6fff8dbdbd7c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
  * Options for the JDBC data source.
  */
 class JDBCOptions(
-    @transient val parameters: CaseInsensitiveMap[String])
+    val parameters: CaseInsensitiveMap[String])
   extends Serializable with Logging {
 
   import JDBCOptions._
@@ -209,7 +209,7 @@ class JDBCOptions(
 }
 
 class JdbcOptionsInWrite(
-    @transient override val parameters: CaseInsensitiveMap[String])
+    override val parameters: CaseInsensitiveMap[String])
   extends JDBCOptions(parameters) {
 
   import JDBCOptions._
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index 5831c35c7e301..202f2e03b68d8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -63,7 +63,7 @@ object JdbcUtils extends Logging {
         throw new IllegalStateException(
           s"Did not find registered driver with class $driverClass")
       }
-      val connection = ConnectionProvider.create(driver, options).getConnection()
+      val connection = ConnectionProvider.create(driver, options.parameters)
       require(connection != null,
         s"The driver could not open a JDBC connection. Check the URL: ${options.url}")
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala
index 16b244cc617ce..a5f04649e6628 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala
@@ -18,18 +18,30 @@
 package org.apache.spark.sql.execution.datasources.jdbc.connection
 
 import java.sql.{Connection, Driver}
+import java.util.Properties
 
-import scala.collection.JavaConverters._
-
+import org.apache.spark.internal.Logging
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
+import org.apache.spark.sql.jdbc.JdbcConnectionProvider
+
+private[jdbc] class BasicConnectionProvider extends JdbcConnectionProvider with Logging {
+  /**
+   * Additional properties for data connection (Data source property takes precedence).
+   */
+  def getAdditionalProperties(options: JDBCOptions): Properties = new Properties()
+
+  override def canHandle(driver: Driver, options: Map[String, String]): Boolean = {
+    val jdbcOptions = new JDBCOptions(options)
+    jdbcOptions.keytab == null || jdbcOptions.principal == null
+  }
 
-private[jdbc] class BasicConnectionProvider(driver: Driver, options: JDBCOptions)
-  extends ConnectionProvider {
-  def getConnection(): Connection = {
-    val properties = getAdditionalProperties()
-    options.asConnectionProperties.entrySet().asScala.foreach { e =>
-      properties.put(e.getKey(), e.getValue())
+  override def getConnection(driver: Driver, options: Map[String, String]): Connection = {
+    val jdbcOptions = new JDBCOptions(options)
+    val properties = getAdditionalProperties(jdbcOptions)
+    options.foreach { case(k, v) =>
+      properties.put(k, v)
     }
-    driver.connect(options.url, properties)
+    logDebug(s"JDBC connection initiated with URL: ${jdbcOptions.url} and properties: $properties")
+    driver.connect(jdbcOptions.url, properties)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
index ce45be442ccc3..546756677edce 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
@@ -18,60 +18,45 @@
 package org.apache.spark.sql.execution.datasources.jdbc.connection
 
 import java.sql.{Connection, Driver}
-import java.util.Properties
+import java.util.ServiceLoader
 
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
-
-/**
- * Connection provider which opens connection toward various databases (database specific instance
- * needed). If kerberos authentication required then it's the provider's responsibility to set all
- * the parameters.
- */
-private[jdbc] trait ConnectionProvider {
-  /**
-   * Additional properties for data connection (Data source property takes precedence).
-   */
-  def getAdditionalProperties(): Properties = new Properties()
+import scala.collection.mutable
 
-  /**
-   * Opens connection toward the database.
-   */
-  def getConnection(): Connection
-}
+import org.apache.spark.internal.Logging
+import org.apache.spark.security.SecurityConfigurationLock
+import org.apache.spark.sql.jdbc.JdbcConnectionProvider
+import org.apache.spark.util.Utils
 
 private[jdbc] object ConnectionProvider extends Logging {
-  def create(driver: Driver, options: JDBCOptions): ConnectionProvider = {
-    if (options.keytab == null || options.principal == null) {
-      logDebug("No authentication configuration found, using basic connection provider")
-      new BasicConnectionProvider(driver, options)
-    } else {
-      logDebug("Authentication configuration found, using database specific connection provider")
-      options.driverClass match {
-        case PostgresConnectionProvider.driverClass =>
-          logDebug("Postgres connection provider found")
-          new PostgresConnectionProvider(driver, options)
-
-        case MariaDBConnectionProvider.driverClass =>
-          logDebug("MariaDB connection provider found")
-          new MariaDBConnectionProvider(driver, options)
-
-        case DB2ConnectionProvider.driverClass =>
-          logDebug("DB2 connection provider found")
-          new DB2ConnectionProvider(driver, options)
-
-        case MSSQLConnectionProvider.driverClass =>
-          logDebug("MS SQL connection provider found")
-          new MSSQLConnectionProvider(driver, options)
-
-        case OracleConnectionProvider.driverClass =>
-          logDebug("Oracle connection provider found")
-          new OracleConnectionProvider(driver, options)
-
-        case _ =>
-          throw new IllegalArgumentException(s"Driver ${options.driverClass} does not support " +
-            "Kerberos authentication")
+  private val providers = loadProviders()
+
+  def loadProviders(): Seq[JdbcConnectionProvider] = {
+    val loader = ServiceLoader.load(classOf[JdbcConnectionProvider],
+      Utils.getContextOrSparkClassLoader)
+    val providers = mutable.ArrayBuffer[JdbcConnectionProvider]()
+
+    val iterator = loader.iterator
+    while (iterator.hasNext) {
+      try {
+        val provider = iterator.next
+        logDebug(s"Loaded built in provider: $provider")
+        providers += provider
+      } catch {
+        case t: Throwable =>
+          logError(s"Failed to load built in provider.", t)
       }
     }
+    // Seems duplicate but it's needed for Scala 2.13
+    providers.toSeq
+  }
+
+  def create(driver: Driver, options: Map[String, String]): Connection = {
+    val filteredProviders = providers.filter(_.canHandle(driver, options))
+    require(filteredProviders.size == 1,
+      "JDBC connection initiated but not exactly one connection provider found which can handle " +
+        s"it. Found active providers: ${filteredProviders.mkString(", ")}")
+    SecurityConfigurationLock.synchronized {
+      filteredProviders.head.getConnection(driver, options)
+    }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/DB2ConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/DB2ConnectionProvider.scala
index 095821cf83890..ca82cdc561bef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/DB2ConnectionProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/DB2ConnectionProvider.scala
@@ -25,22 +25,25 @@ import org.apache.hadoop.security.UserGroupInformation
 
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
 
-private[sql] class DB2ConnectionProvider(driver: Driver, options: JDBCOptions)
-  extends SecureConnectionProvider(driver, options) {
-  override val appEntry: String = "JaasClient"
-
-  override def getConnection(): Connection = {
-    setAuthenticationConfigIfNeeded()
-    UserGroupInformation.loginUserFromKeytabAndReturnUGI(options.principal, options.keytab).doAs(
-      new PrivilegedExceptionAction[Connection]() {
-        override def run(): Connection = {
-          DB2ConnectionProvider.super.getConnection()
+private[sql] class DB2ConnectionProvider extends SecureConnectionProvider {
+  override val driverClass = "com.ibm.db2.jcc.DB2Driver"
+
+  override def appEntry(driver: Driver, options: JDBCOptions): String = "JaasClient"
+
+  override def getConnection(driver: Driver, options: Map[String, String]): Connection = {
+    val jdbcOptions = new JDBCOptions(options)
+    setAuthenticationConfigIfNeeded(driver, jdbcOptions)
+    UserGroupInformation.loginUserFromKeytabAndReturnUGI(jdbcOptions.principal, jdbcOptions.keytab)
+      .doAs(
+        new PrivilegedExceptionAction[Connection]() {
+          override def run(): Connection = {
+            DB2ConnectionProvider.super.getConnection(driver, options)
+          }
         }
-      }
-    )
+      )
   }
 
-  override def getAdditionalProperties(): Properties = {
+  override def getAdditionalProperties(options: JDBCOptions): Properties = {
     val result = new Properties()
     // 11 is the integer value for kerberos
     result.put("securityMechanism", new String("11"))
@@ -48,14 +51,10 @@ private[sql] class DB2ConnectionProvider(driver: Driver, options: JDBCOptions)
     result
   }
 
-  override def setAuthenticationConfigIfNeeded(): Unit = SecurityConfigurationLock.synchronized {
-    val (parent, configEntry) = getConfigWithAppEntry()
+  override def setAuthenticationConfigIfNeeded(driver: Driver, options: JDBCOptions): Unit = {
+    val (parent, configEntry) = getConfigWithAppEntry(driver, options)
     if (configEntry == null || configEntry.isEmpty) {
-      setAuthenticationConfig(parent)
+      setAuthenticationConfig(parent, driver, options)
     }
   }
 }
-
-private[sql] object DB2ConnectionProvider {
-  val driverClass = "com.ibm.db2.jcc.DB2Driver"
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MSSQLConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MSSQLConnectionProvider.scala
index 2950aa9b4db94..4e405b2187e56 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MSSQLConnectionProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MSSQLConnectionProvider.scala
@@ -25,12 +25,11 @@ import org.apache.hadoop.security.UserGroupInformation
 
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
 
-private[sql] class MSSQLConnectionProvider(
-    driver: Driver,
-    options: JDBCOptions,
-    parserMethod: String = "parseAndMergeProperties"
-  ) extends SecureConnectionProvider(driver, options) {
-  override val appEntry: String = {
+private[sql] class MSSQLConnectionProvider extends SecureConnectionProvider {
+  override val driverClass = "com.microsoft.sqlserver.jdbc.SQLServerDriver"
+  val parserMethod: String = "parseAndMergeProperties"
+
+  override def appEntry(driver: Driver, options: JDBCOptions): String = {
     val configName = "jaasConfigurationName"
     val appEntryDefault = "SQLJDBCDriver"
 
@@ -58,18 +57,20 @@ private[sql] class MSSQLConnectionProvider(
     }
   }
 
-  override def getConnection(): Connection = {
-    setAuthenticationConfigIfNeeded()
-    UserGroupInformation.loginUserFromKeytabAndReturnUGI(options.principal, options.keytab).doAs(
-      new PrivilegedExceptionAction[Connection]() {
-        override def run(): Connection = {
-          MSSQLConnectionProvider.super.getConnection()
+  override def getConnection(driver: Driver, options: Map[String, String]): Connection = {
+    val jdbcOptions = new JDBCOptions(options)
+    setAuthenticationConfigIfNeeded(driver, jdbcOptions)
+    UserGroupInformation.loginUserFromKeytabAndReturnUGI(jdbcOptions.principal, jdbcOptions.keytab)
+      .doAs(
+        new PrivilegedExceptionAction[Connection]() {
+          override def run(): Connection = {
+            MSSQLConnectionProvider.super.getConnection(driver, options)
+          }
         }
-      }
-    )
+      )
   }
 
-  override def getAdditionalProperties(): Properties = {
+  override def getAdditionalProperties(options: JDBCOptions): Properties = {
     val result = new Properties()
     // These props needed to reach internal kerberos authentication in the JDBC driver
     result.put("integratedSecurity", "true")
@@ -77,8 +78,8 @@ private[sql] class MSSQLConnectionProvider(
     result
   }
 
-  override def setAuthenticationConfigIfNeeded(): Unit = SecurityConfigurationLock.synchronized {
-    val (parent, configEntry) = getConfigWithAppEntry()
+  override def setAuthenticationConfigIfNeeded(driver: Driver, options: JDBCOptions): Unit = {
+    val (parent, configEntry) = getConfigWithAppEntry(driver, options)
     /**
      * Couple of things to mention here (v8.2.2 client):
      * 1. MS SQL supports JAAS application name configuration
@@ -87,11 +88,7 @@ private[sql] class MSSQLConnectionProvider(
     val entryUsesKeytab = configEntry != null &&
       configEntry.exists(_.getOptions().get("useKeyTab") == "true")
     if (configEntry == null || configEntry.isEmpty || !entryUsesKeytab) {
-      setAuthenticationConfig(parent)
+      setAuthenticationConfig(parent, driver, options)
     }
   }
 }
-
-private[sql] object MSSQLConnectionProvider {
-  val driverClass = "com.microsoft.sqlserver.jdbc.SQLServerDriver"
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MariaDBConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MariaDBConnectionProvider.scala
index 3c0286654a8ec..d5fe13bf0ca19 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MariaDBConnectionProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MariaDBConnectionProvider.scala
@@ -21,14 +21,14 @@ import java.sql.Driver
 
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
 
-private[jdbc] class MariaDBConnectionProvider(driver: Driver, options: JDBCOptions)
-  extends SecureConnectionProvider(driver, options) {
-  override val appEntry: String = {
+private[jdbc] class MariaDBConnectionProvider extends SecureConnectionProvider {
+  override val driverClass = "org.mariadb.jdbc.Driver"
+
+  override def appEntry(driver: Driver, options: JDBCOptions): String =
     "Krb5ConnectorContext"
-  }
 
-  override def setAuthenticationConfigIfNeeded(): Unit = SecurityConfigurationLock.synchronized {
-    val (parent, configEntry) = getConfigWithAppEntry()
+  override def setAuthenticationConfigIfNeeded(driver: Driver, options: JDBCOptions): Unit = {
+    val (parent, configEntry) = getConfigWithAppEntry(driver, options)
     /**
      * Couple of things to mention here (v2.5.4 client):
      * 1. MariaDB doesn't support JAAS application name configuration
@@ -37,11 +37,7 @@ private[jdbc] class MariaDBConnectionProvider(driver: Driver, options: JDBCOptio
     val entryUsesKeytab = configEntry != null &&
       configEntry.exists(_.getOptions().get("useKeyTab") == "true")
     if (configEntry == null || configEntry.isEmpty || !entryUsesKeytab) {
-      setAuthenticationConfig(parent)
+      setAuthenticationConfig(parent, driver, options)
     }
   }
 }
-
-private[sql] object MariaDBConnectionProvider {
-  val driverClass = "org.mariadb.jdbc.Driver"
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/OracleConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/OracleConnectionProvider.scala
index c2b71b35b8128..3defda3871765 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/OracleConnectionProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/OracleConnectionProvider.scala
@@ -25,22 +25,25 @@ import org.apache.hadoop.security.UserGroupInformation
 
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
 
-private[sql] class OracleConnectionProvider(driver: Driver, options: JDBCOptions)
-  extends SecureConnectionProvider(driver, options) {
-  override val appEntry: String = "kprb5module"
-
-  override def getConnection(): Connection = {
-    setAuthenticationConfigIfNeeded()
-    UserGroupInformation.loginUserFromKeytabAndReturnUGI(options.principal, options.keytab).doAs(
-      new PrivilegedExceptionAction[Connection]() {
-        override def run(): Connection = {
-          OracleConnectionProvider.super.getConnection()
+private[sql] class OracleConnectionProvider extends SecureConnectionProvider {
+  override val driverClass = "oracle.jdbc.OracleDriver"
+
+  override def appEntry(driver: Driver, options: JDBCOptions): String = "kprb5module"
+
+  override def getConnection(driver: Driver, options: Map[String, String]): Connection = {
+    val jdbcOptions = new JDBCOptions(options)
+    setAuthenticationConfigIfNeeded(driver, jdbcOptions)
+    UserGroupInformation.loginUserFromKeytabAndReturnUGI(jdbcOptions.principal, jdbcOptions.keytab)
+      .doAs(
+        new PrivilegedExceptionAction[Connection]() {
+          override def run(): Connection = {
+            OracleConnectionProvider.super.getConnection(driver, options)
+          }
         }
-      }
-    )
+      )
   }
 
-  override def getAdditionalProperties(): Properties = {
+  override def getAdditionalProperties(options: JDBCOptions): Properties = {
     val result = new Properties()
     // This prop is needed to turn on kerberos authentication in the JDBC driver.
     // The possible values can be found in AnoServices public interface
@@ -49,14 +52,10 @@ private[sql] class OracleConnectionProvider(driver: Driver, options: JDBCOptions
     result
   }
 
-  override def setAuthenticationConfigIfNeeded(): Unit = SecurityConfigurationLock.synchronized {
-    val (parent, configEntry) = getConfigWithAppEntry()
+  override def setAuthenticationConfigIfNeeded(driver: Driver, options: JDBCOptions): Unit = {
+    val (parent, configEntry) = getConfigWithAppEntry(driver, options)
     if (configEntry == null || configEntry.isEmpty) {
-      setAuthenticationConfig(parent)
+      setAuthenticationConfig(parent, driver, options)
     }
   }
 }
-
-private[sql] object OracleConnectionProvider {
-  val driverClass = "oracle.jdbc.OracleDriver"
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/PostgresConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/PostgresConnectionProvider.scala
index fa9232e00bd88..dae8aea81f20a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/PostgresConnectionProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/PostgresConnectionProvider.scala
@@ -22,22 +22,19 @@ import java.util.Properties
 
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
 
-private[jdbc] class PostgresConnectionProvider(driver: Driver, options: JDBCOptions)
-  extends SecureConnectionProvider(driver, options) {
-  override val appEntry: String = {
+private[jdbc] class PostgresConnectionProvider extends SecureConnectionProvider {
+  override val driverClass = "org.postgresql.Driver"
+
+  override def appEntry(driver: Driver, options: JDBCOptions): String = {
     val parseURL = driver.getClass.getMethod("parseURL", classOf[String], classOf[Properties])
     val properties = parseURL.invoke(driver, options.url, null).asInstanceOf[Properties]
     properties.getProperty("jaasApplicationName", "pgjdbc")
   }
 
-  override def setAuthenticationConfigIfNeeded(): Unit = SecurityConfigurationLock.synchronized {
-    val (parent, configEntry) = getConfigWithAppEntry()
+  override def setAuthenticationConfigIfNeeded(driver: Driver, options: JDBCOptions): Unit = {
+    val (parent, configEntry) = getConfigWithAppEntry(driver, options)
     if (configEntry == null || configEntry.isEmpty) {
-      setAuthenticationConfig(parent)
+      setAuthenticationConfig(parent, driver, options)
     }
   }
 }
-
-private[sql] object PostgresConnectionProvider {
-  val driverClass = "org.postgresql.Driver"
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/SecureConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/SecureConnectionProvider.scala
index 24eec63a7244f..80c795957dac8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/SecureConnectionProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/SecureConnectionProvider.scala
@@ -26,39 +26,49 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
 import org.apache.spark.util.SecurityUtils
 
-/**
- * Some of the secure connection providers modify global JVM security configuration.
- * In order to avoid race the modification must be synchronized with this.
- */
-private[connection] object SecurityConfigurationLock
+private[jdbc] abstract class SecureConnectionProvider extends BasicConnectionProvider with Logging {
+  /**
+   * Returns the driver canonical class name which the connection provider supports.
+   */
+  protected val driverClass: String
+
+  override def canHandle(driver: Driver, options: Map[String, String]): Boolean = {
+    val jdbcOptions = new JDBCOptions(options)
+    jdbcOptions.keytab != null && jdbcOptions.principal != null &&
+      driverClass.equalsIgnoreCase(jdbcOptions.driverClass)
+  }
 
-private[jdbc] abstract class SecureConnectionProvider(driver: Driver, options: JDBCOptions)
-  extends BasicConnectionProvider(driver, options) with Logging {
-  override def getConnection(): Connection = {
-    setAuthenticationConfigIfNeeded()
-    super.getConnection()
+  override def getConnection(driver: Driver, options: Map[String, String]): Connection = {
+    val jdbcOptions = new JDBCOptions(options)
+    setAuthenticationConfigIfNeeded(driver, jdbcOptions)
+    super.getConnection(driver: Driver, options: Map[String, String])
   }
 
   /**
    * Returns JAAS application name. This is sometimes configurable on the JDBC driver level.
    */
-  val appEntry: String
+  def appEntry(driver: Driver, options: JDBCOptions): String
 
   /**
    * Sets database specific authentication configuration when needed. If configuration already set
    * then later calls must be no op. When the global JVM security configuration changed then the
    * related code parts must be synchronized properly.
    */
-  def setAuthenticationConfigIfNeeded(): Unit
+  def setAuthenticationConfigIfNeeded(driver: Driver, options: JDBCOptions): Unit
 
-  protected def getConfigWithAppEntry(): (Configuration, Array[AppConfigurationEntry]) = {
+  protected def getConfigWithAppEntry(
+      driver: Driver,
+      options: JDBCOptions): (Configuration, Array[AppConfigurationEntry]) = {
     val parent = Configuration.getConfiguration
-    (parent, parent.getAppConfigurationEntry(appEntry))
+    (parent, parent.getAppConfigurationEntry(appEntry(driver, options)))
   }
 
-  protected def setAuthenticationConfig(parent: Configuration) = {
+  protected def setAuthenticationConfig(
+      parent: Configuration,
+      driver: Driver,
+      options: JDBCOptions) = {
     val config = new SecureConnectionProvider.JDBCConfiguration(
-      parent, appEntry, options.keytab, options.principal)
+      parent, appEntry(driver, options), options.keytab, options.principal)
     logDebug("Adding database specific security configuration")
     Configuration.setConfiguration(config)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcConnectionProvider.scala
new file mode 100644
index 0000000000000..caf574b0c2284
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcConnectionProvider.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc
+
+import java.sql.{Connection, Driver}
+
+import org.apache.spark.annotation.{DeveloperApi, Unstable}
+
+/**
+ * ::DeveloperApi::
+ * Connection provider which opens connection toward various databases (database specific instance
+ * needed). If any authentication required then it's the provider's responsibility to set all
+ * the parameters.
+ * Important to mention connection providers within a JVM used from multiple threads so adding
+ * internal state is not advised. If any state added then it must be synchronized properly.
+ *
+ * @since 3.1.0
+ */
+@DeveloperApi
+@Unstable
+abstract class JdbcConnectionProvider {
+  /**
+   * Checks if this connection provider instance can handle the connection initiated by the driver.
+   * There must be exactly one active connection provider which can handle the connection for a
+   * specific driver. If this requirement doesn't met then `IllegalArgumentException`
+   * will be thrown by the provider framework.
+   *
+   * @param driver  Java driver which initiates the connection
+   * @param options Driver options which initiates the connection
+   * @return True if the connection provider can handle the driver with the given options.
+   */
+  def canHandle(driver: Driver, options: Map[String, String]): Boolean
+
+  /**
+   * Opens connection toward the database. Since global JVM security configuration change may needed
+   * this API is called synchronized by `SecurityConfigurationLock` to avoid race.
+   *
+   * @param driver  Java driver which initiates the connection
+   * @param options Driver options which initiates the connection
+   * @return a `Connection` object that represents a connection to the URL
+   */
+  def getConnection(driver: Driver, options: Map[String, String]): Connection
+}
diff --git a/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider b/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider
new file mode 100644
index 0000000000000..afb48e1a3511f
--- /dev/null
+++ b/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.jdbc.JdbcConnectionProvider
@@ -0,0 +1 @@
+org.apache.spark.sql.execution.datasources.jdbc.connection.IntentionallyFaultyConnectionProvider
\ No newline at end of file
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuite.scala
index ff5fe4f620a1d..a48dbdebea7e9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuite.scala
@@ -20,26 +20,43 @@ package org.apache.spark.sql.execution.datasources.jdbc.connection
 import javax.security.auth.login.Configuration
 
 class ConnectionProviderSuite extends ConnectionProviderSuiteBase {
+  test("All built-in provides must be loaded") {
+    IntentionallyFaultyConnectionProvider.constructed = false
+    val providers = ConnectionProvider.loadProviders()
+    assert(providers.exists(_.isInstanceOf[BasicConnectionProvider]))
+    assert(providers.exists(_.isInstanceOf[DB2ConnectionProvider]))
+    assert(providers.exists(_.isInstanceOf[MariaDBConnectionProvider]))
+    assert(providers.exists(_.isInstanceOf[MSSQLConnectionProvider]))
+    assert(providers.exists(_.isInstanceOf[PostgresConnectionProvider]))
+    assert(providers.exists(_.isInstanceOf[OracleConnectionProvider]))
+    assert(IntentionallyFaultyConnectionProvider.constructed)
+    assert(!providers.exists(_.isInstanceOf[IntentionallyFaultyConnectionProvider]))
+    assert(providers.size === 6)
+  }
+
   test("Multiple security configs must be reachable") {
     Configuration.setConfiguration(null)
-    val postgresDriver = registerDriver(PostgresConnectionProvider.driverClass)
-    val postgresProvider = new PostgresConnectionProvider(
-      postgresDriver, options("jdbc:postgresql://localhost/postgres"))
-    val db2Driver = registerDriver(DB2ConnectionProvider.driverClass)
-    val db2Provider = new DB2ConnectionProvider(db2Driver, options("jdbc:db2://localhost/db2"))
+    val postgresProvider = new PostgresConnectionProvider()
+    val postgresDriver = registerDriver(postgresProvider.driverClass)
+    val postgresOptions = options("jdbc:postgresql://localhost/postgres")
+    val postgresAppEntry = postgresProvider.appEntry(postgresDriver, postgresOptions)
+    val db2Provider = new DB2ConnectionProvider()
+    val db2Driver = registerDriver(db2Provider.driverClass)
+    val db2Options = options("jdbc:db2://localhost/db2")
+    val db2AppEntry = db2Provider.appEntry(db2Driver, db2Options)
 
     // Make sure no authentication for the databases are set
     val oldConfig = Configuration.getConfiguration
-    assert(oldConfig.getAppConfigurationEntry(postgresProvider.appEntry) == null)
-    assert(oldConfig.getAppConfigurationEntry(db2Provider.appEntry) == null)
+    assert(oldConfig.getAppConfigurationEntry(postgresAppEntry) == null)
+    assert(oldConfig.getAppConfigurationEntry(db2AppEntry) == null)
 
-    postgresProvider.setAuthenticationConfigIfNeeded()
-    db2Provider.setAuthenticationConfigIfNeeded()
+    postgresProvider.setAuthenticationConfigIfNeeded(postgresDriver, postgresOptions)
+    db2Provider.setAuthenticationConfigIfNeeded(db2Driver, db2Options)
 
     // Make sure authentication for the databases are set
     val newConfig = Configuration.getConfiguration
     assert(oldConfig != newConfig)
-    assert(newConfig.getAppConfigurationEntry(postgresProvider.appEntry) != null)
-    assert(newConfig.getAppConfigurationEntry(db2Provider.appEntry) != null)
+    assert(newConfig.getAppConfigurationEntry(postgresAppEntry) != null)
+    assert(newConfig.getAppConfigurationEntry(db2AppEntry) != null)
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuiteBase.scala
index d18a3088c4f2f..be08a3c2f7367 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuiteBase.scala
@@ -50,20 +50,25 @@ abstract class ConnectionProviderSuiteBase extends SparkFunSuite with BeforeAndA
     }
   }
 
-  protected def testSecureConnectionProvider(provider: SecureConnectionProvider): Unit = {
+  protected def testSecureConnectionProvider(
+      provider: SecureConnectionProvider,
+      driver: Driver,
+      options: JDBCOptions): Unit = {
+    val providerAppEntry = provider.appEntry(driver, options)
+
     // Make sure no authentication for the database is set
-    assert(Configuration.getConfiguration.getAppConfigurationEntry(provider.appEntry) == null)
+    assert(Configuration.getConfiguration.getAppConfigurationEntry(providerAppEntry) == null)
 
     // Make sure the first call sets authentication properly
     val savedConfig = Configuration.getConfiguration
-    provider.setAuthenticationConfigIfNeeded()
+    provider.setAuthenticationConfigIfNeeded(driver, options)
     val config = Configuration.getConfiguration
     assert(savedConfig != config)
-    val appEntry = config.getAppConfigurationEntry(provider.appEntry)
+    val appEntry = config.getAppConfigurationEntry(providerAppEntry)
     assert(appEntry != null)
 
     // Make sure a second call is not modifying the existing authentication
-    provider.setAuthenticationConfigIfNeeded()
-    assert(config.getAppConfigurationEntry(provider.appEntry) === appEntry)
+    provider.setAuthenticationConfigIfNeeded(driver, options)
+    assert(config.getAppConfigurationEntry(providerAppEntry) === appEntry)
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/DB2ConnectionProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/DB2ConnectionProviderSuite.scala
index d656f83e2ebb9..5885af82532d4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/DB2ConnectionProviderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/DB2ConnectionProviderSuite.scala
@@ -19,9 +19,9 @@ package org.apache.spark.sql.execution.datasources.jdbc.connection
 
 class DB2ConnectionProviderSuite extends ConnectionProviderSuiteBase {
   test("setAuthenticationConfigIfNeeded must set authentication if not set") {
-    val driver = registerDriver(DB2ConnectionProvider.driverClass)
-    val provider = new DB2ConnectionProvider(driver, options("jdbc:db2://localhost/db2"))
+    val provider = new DB2ConnectionProvider()
+    val driver = registerDriver(provider.driverClass)
 
-    testSecureConnectionProvider(provider)
+    testSecureConnectionProvider(provider, driver, options("jdbc:db2://localhost/db2"))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/IntentionallyFaultyConnectionProvider.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/IntentionallyFaultyConnectionProvider.scala
new file mode 100644
index 0000000000000..fbefcb91cccde
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/IntentionallyFaultyConnectionProvider.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.jdbc.connection
+
+import java.sql.{Connection, Driver}
+
+import org.apache.spark.sql.jdbc.JdbcConnectionProvider
+
+private class IntentionallyFaultyConnectionProvider extends JdbcConnectionProvider {
+  IntentionallyFaultyConnectionProvider.constructed = true
+  throw new IllegalArgumentException("Intentional Exception")
+  override def canHandle(driver: Driver, options: Map[String, String]): Boolean = true
+  override def getConnection(driver: Driver, options: Map[String, String]): Connection = null
+}
+
+private object IntentionallyFaultyConnectionProvider {
+  var constructed = false
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MSSQLConnectionProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MSSQLConnectionProviderSuite.scala
index 249f1e36347ed..a5704e842e018 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MSSQLConnectionProviderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MSSQLConnectionProviderSuite.scala
@@ -17,35 +17,35 @@
 
 package org.apache.spark.sql.execution.datasources.jdbc.connection
 
+import java.sql.Driver
+
+import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
+
 class MSSQLConnectionProviderSuite extends ConnectionProviderSuiteBase {
   test("setAuthenticationConfigIfNeeded default parser must set authentication if not set") {
-    val driver = registerDriver(MSSQLConnectionProvider.driverClass)
-    val defaultProvider = new MSSQLConnectionProvider(
-      driver, options("jdbc:sqlserver://localhost/mssql"))
-    val customProvider = new MSSQLConnectionProvider(
-      driver, options("jdbc:sqlserver://localhost/mssql;jaasConfigurationName=custommssql"))
+    val provider = new MSSQLConnectionProvider()
+    val driver = registerDriver(provider.driverClass)
 
-    testProviders(defaultProvider, customProvider)
+    testProviders(driver, provider, options("jdbc:sqlserver://localhost/mssql"),
+      options("jdbc:sqlserver://localhost/mssql;jaasConfigurationName=custommssql"))
   }
 
   test("setAuthenticationConfigIfNeeded custom parser must set authentication if not set") {
-    val parserMethod = "IntentionallyNotExistingMethod"
-    val driver = registerDriver(MSSQLConnectionProvider.driverClass)
-    val defaultProvider = new MSSQLConnectionProvider(
-      driver, options("jdbc:sqlserver://localhost/mssql"), parserMethod)
-    val customProvider = new MSSQLConnectionProvider(
-      driver,
-      options("jdbc:sqlserver://localhost/mssql;jaasConfigurationName=custommssql"),
-      parserMethod)
-
-    testProviders(defaultProvider, customProvider)
+    val provider = new MSSQLConnectionProvider() {
+      override val parserMethod: String = "IntentionallyNotExistingMethod"
+    }
+    val driver = registerDriver(provider.driverClass)
+
+    testProviders(driver, provider, options("jdbc:sqlserver://localhost/mssql"),
+      options("jdbc:sqlserver://localhost/mssql;jaasConfigurationName=custommssql"))
   }
 
   private def testProviders(
-      defaultProvider: SecureConnectionProvider,
-      customProvider: SecureConnectionProvider) = {
-    assert(defaultProvider.appEntry !== customProvider.appEntry)
-    testSecureConnectionProvider(defaultProvider)
-    testSecureConnectionProvider(customProvider)
+      driver: Driver,
+      provider: SecureConnectionProvider,
+      defaultOptions: JDBCOptions,
+      customOptions: JDBCOptions) = {
+    testSecureConnectionProvider(provider, driver, defaultOptions)
+    testSecureConnectionProvider(provider, driver, customOptions)
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MariaDBConnectionProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MariaDBConnectionProviderSuite.scala
index 70cad2097eb43..f450662fcbe74 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MariaDBConnectionProviderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MariaDBConnectionProviderSuite.scala
@@ -19,9 +19,9 @@ package org.apache.spark.sql.execution.datasources.jdbc.connection
 
 class MariaDBConnectionProviderSuite extends ConnectionProviderSuiteBase {
   test("setAuthenticationConfigIfNeeded must set authentication if not set") {
-    val driver = registerDriver(MariaDBConnectionProvider.driverClass)
-    val provider = new MariaDBConnectionProvider(driver, options("jdbc:mysql://localhost/mysql"))
+    val provider = new MariaDBConnectionProvider()
+    val driver = registerDriver(provider.driverClass)
 
-    testSecureConnectionProvider(provider)
+    testSecureConnectionProvider(provider, driver, options("jdbc:mysql://localhost/mysql"))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/OracleConnectionProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/OracleConnectionProviderSuite.scala
index 13cde32ddbe4e..40e7f1191dccc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/OracleConnectionProviderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/OracleConnectionProviderSuite.scala
@@ -19,10 +19,9 @@ package org.apache.spark.sql.execution.datasources.jdbc.connection
 
 class OracleConnectionProviderSuite extends ConnectionProviderSuiteBase {
   test("setAuthenticationConfigIfNeeded must set authentication if not set") {
-    val driver = registerDriver(OracleConnectionProvider.driverClass)
-    val provider = new OracleConnectionProvider(driver,
-      options("jdbc:oracle:thin:@//localhost/xe"))
+    val provider = new OracleConnectionProvider()
+    val driver = registerDriver(provider.driverClass)
 
-    testSecureConnectionProvider(provider)
+    testSecureConnectionProvider(provider, driver, options("jdbc:oracle:thin:@//localhost/xe"))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/PostgresConnectionProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/PostgresConnectionProviderSuite.scala
index 8cef7652f9c54..ee43a7d9708c5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/PostgresConnectionProviderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/PostgresConnectionProviderSuite.scala
@@ -19,14 +19,14 @@ package org.apache.spark.sql.execution.datasources.jdbc.connection
 
 class PostgresConnectionProviderSuite extends ConnectionProviderSuiteBase {
   test("setAuthenticationConfigIfNeeded must set authentication if not set") {
-    val driver = registerDriver(PostgresConnectionProvider.driverClass)
-    val defaultProvider = new PostgresConnectionProvider(
-      driver, options("jdbc:postgresql://localhost/postgres"))
-    val customProvider = new PostgresConnectionProvider(
-      driver, options(s"jdbc:postgresql://localhost/postgres?jaasApplicationName=custompgjdbc"))
+    val provider = new PostgresConnectionProvider()
+    val defaultOptions = options("jdbc:postgresql://localhost/postgres")
+    val customOptions =
+      options(s"jdbc:postgresql://localhost/postgres?jaasApplicationName=custompgjdbc")
+    val driver = registerDriver(provider.driverClass)
 
-    assert(defaultProvider.appEntry !== customProvider.appEntry)
-    testSecureConnectionProvider(defaultProvider)
-    testSecureConnectionProvider(customProvider)
+    assert(provider.appEntry(driver, defaultOptions) !== provider.appEntry(driver, customOptions))
+    testSecureConnectionProvider(provider, driver, defaultOptions)
+    testSecureConnectionProvider(provider, driver, customOptions)
   }
 }

From 9996e252ad3ef20760fcbc785e8d3a6f20b6acb5 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Thu, 1 Oct 2020 23:01:31 -0700
Subject: [PATCH 0147/1009] [SPARK-33026][SQL] Add numRows to metric of
 BroadcastExchangeExec

### What changes were proposed in this pull request?

This pr adds `numRows` to the metric and runtimeStatistics of `BroadcastExchangeExec`.

### Why are the changes needed?

[`JoinEstimation.estimateInnerOuterJoin`](https://github.com/apache/spark/blob/d6a68e0b67ff7de58073c176dd097070e88ac831/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala#L55-L156) need row count. The [ShuffleExchangeExec](https://github.com/apache/spark/blob/1c6dff7b5fc171c190feea0d8f7d323e330d9151/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala#L127) have added the row count, but `BroadcastExchangeExec` missing the row count.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #29904 from wangyum/SPARK-33026.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../exchange/BroadcastExchangeExec.scala      |  5 ++++-
 .../execution/metric/SQLMetricsSuite.scala    | 21 ++++++++++++++++++-
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
index 6d8d37022ea42..4b884dfe537e8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
@@ -78,6 +78,7 @@ case class BroadcastExchangeExec(
 
   override lazy val metrics = Map(
     "dataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size"),
+    "numRows" -> SQLMetrics.createMetric(sparkContext, "number of rows"),
     "collectTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to collect"),
     "buildTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to build"),
     "broadcastTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to broadcast"))
@@ -90,7 +91,8 @@ case class BroadcastExchangeExec(
 
   override def runtimeStatistics: Statistics = {
     val dataSize = metrics("dataSize").value
-    Statistics(dataSize)
+    val numRows = metrics("numRows").value
+    Statistics(dataSize, Some(numRows))
   }
 
   @transient
@@ -118,6 +120,7 @@ case class BroadcastExchangeExec(
               throw new SparkException(
                 s"Cannot broadcast the table over $MAX_BROADCAST_TABLE_ROWS rows: $numRows rows")
             }
+            longMetric("numRows") += numRows
 
             val beforeBuild = System.nanoTime()
             longMetric("collectTime") += NANOSECONDS.toMillis(beforeBuild - beforeCollect)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index 4e10c27edb0e9..e404e460fe611 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
 import org.apache.spark.sql.execution.{FilterExec, RangeExec, SparkPlan, WholeStageCodegenExec}
 import org.apache.spark.sql.execution.adaptive.DisableAdaptiveExecutionSuite
 import org.apache.spark.sql.execution.aggregate.HashAggregateExec
-import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
+import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ShuffleExchangeExec}
 import org.apache.spark.sql.execution.joins.ShuffledHashJoinExec
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
@@ -736,4 +736,23 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
       Map("dataSize" -> 3200, "shuffleRecordsWritten" -> 100))
     testMetricsInSparkPlanOperator(exchanges(1), Map("dataSize" -> 0, "shuffleRecordsWritten" -> 0))
   }
+
+  test("Add numRows to metric of BroadcastExchangeExec") {
+    withSQLConf(SQLConf.AUTO_SIZE_UPDATE_ENABLED.key -> "true") {
+      withTable("t1", "t2") {
+        spark.range(2).write.saveAsTable("t1")
+        spark.range(2).write.saveAsTable("t2")
+        val df = sql("SELECT t1.* FROM t1 JOIN t2 ON t1.id = t2.id")
+        df.collect()
+        val plan = df.queryExecution.executedPlan
+
+        val exchanges = plan.collect {
+          case s: BroadcastExchangeExec => s
+        }
+
+        assert(exchanges.size === 1)
+        testMetricsInSparkPlanOperator(exchanges.head, Map("numRows" -> 2))
+      }
+    }
+  }
 }

From b205be5ff6926454b0afe76e4c3438cfa0f34832 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Fri, 2 Oct 2020 15:12:33 +0900
Subject: [PATCH 0148/1009] [SPARK-33051][INFRA][R] Uses setup-r to install R
 in GitHub Actions build

### What changes were proposed in this pull request?

At SPARK-32493, the R installation was switched to manual installation because setup-r was broken. This seems fixed in the upstream so we should better switch it back.

### Why are the changes needed?

To avoid maintaining the installation steps by ourselve.

### Does this PR introduce _any_ user-facing change?

No, dev-only.

### How was this patch tested?

GitHub Actions build in this PR should test it.

Closes #29931 from HyukjinKwon/recover-r-build.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .github/workflows/build_and_test.yml | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 17c040323d515..667371dacf5dc 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -168,12 +168,10 @@ jobs:
         python3.8 -m pip list
     # SparkR
     - name: Install R 4.0
+      uses: r-lib/actions/setup-r@v1
       if: contains(matrix.modules, 'sparkr')
-      run: |
-        sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' >> /etc/apt/sources.list"
-        curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9" | sudo apt-key add
-        sudo apt-get update
-        sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev
+      with:
+        r-version: 4.0
     - name: Install R packages
       if: contains(matrix.modules, 'sparkr')
       run: |
@@ -232,11 +230,9 @@ jobs:
         #   See also https://github.com/sphinx-doc/sphinx/issues/7551.
         pip3 install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme ipython nbsphinx
     - name: Install R 4.0
-      run: |
-        sudo sh -c "echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' >> /etc/apt/sources.list"
-        curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0xE298A3A825C0D65DFD57CBB651716619E084DAB9" | sudo apt-key add
-        sudo apt-get update
-        sudo apt-get install -y r-base r-base-dev libcurl4-openssl-dev
+      uses: r-lib/actions/setup-r@v1
+      with:
+        r-version: 4.0
     - name: Install R linter dependencies and SparkR
       run: |
         sudo apt-get install -y libcurl4-openssl-dev

From f7ba95264d38484f57c772e459bffb939c9c718e Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Fri, 2 Oct 2020 15:17:44 +0900
Subject: [PATCH 0149/1009] [SPARK-33048][BUILD] Fix SparkBuild.scala to
 recognize build settings for Scala 2.13

### What changes were proposed in this pull request?

This PR fixes `SparkBuild.scala` to recognize build settings for Scala 2.13.
In `SparkBuild.scala`, a variable `scalaBinaryVersion` is hardcoded as `2.12`.
So, an environment variable `SPARK_SCALA_VERSION` is also to be `2.12`.
This issue causes some test suites (e.g. `SparkSubmitSuite`) to be error.

```
===== TEST OUTPUT FOR o.a.s.deploy.SparkSubmitSuite: 'user classpath first in driver' =====

20/10/02 08:55:30.234 redirect stderr for command /home/kou/work/oss/spark-scala-2.13/bin/spark-submit INFO Utils: Error: Could not find or load m
ain class org.apache.spark.launcher.Main
20/10/02 08:55:30.235 redirect stderr for command /home/kou/work/oss/spark-scala-2.13/bin/spark-submit INFO Utils: /home/kou/work/oss/spark-scala-
2.13/bin/spark-class: line 96: CMD: bad array subscript
```
The reason of this error is that environment variables `SPARK_JARS_DIR` and `LAUNCH_CLASSPATH` is defined in `bin/spark-class` as follows.
```
SPARK_JARS_DIR="${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION/jars"
LAUNCH_CLASSPATH="${SPARK_HOME}/launcher/target/scala-$SPARK_SCALA_VERSION/classes:$LAUNCH_CLASSPATH"
```
### Why are the changes needed?

To build for Scala 2.13 successfully.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Tests for `core` module finish successfully.
```
build/sbt -Pscala-2.13 clean "core/test"
```

Closes #29927 from sarutak/fix-sparkbuild-for-scala-2.13.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 project/SparkBuild.scala | 28 +---------------------------
 1 file changed, 1 insertion(+), 27 deletions(-)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 160b3b5e7edb3..6328daec027ef 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -94,21 +94,6 @@ object SparkBuild extends PomBuild {
       case Some(v) =>
         v.split("(\\s+|,)").filterNot(_.isEmpty).map(_.trim.replaceAll("-P", "")).toSeq
     }
-
-    // TODO: revisit for Scala 2.13 support
-    /*
-    Option(System.getProperty("scala.version"))
-      .filter(_.startsWith("2.11"))
-      .foreach { versionString =>
-        System.setProperty("scala-2.11", "true")
-      }
-    if (System.getProperty("scala-2.11") == "") {
-      // To activate scala-2.10 profile, replace empty property value to non-empty value
-      // in the same way as Maven which handles -Dname as -Dname=true before executes build process.
-      // see: https://github.com/apache/maven/blob/maven-3.0.4/maven-embedder/src/main/java/org/apache/maven/cli/MavenCli.java#L1082
-      System.setProperty("scala-2.11", "true")
-    }
-     */
     profiles
   }
 
@@ -965,17 +950,6 @@ object CopyDependencies {
 
 object TestSettings {
   import BuildCommons._
-
-  // TODO revisit for Scala 2.13 support
-  private val scalaBinaryVersion = "2.12"
-    /*
-    if (System.getProperty("scala-2.11") == "true") {
-      "2.11"
-    } else {
-      "2.12"
-    }
-     */
-
   private val defaultExcludedTags = Seq("org.apache.spark.tags.ChromeUITest")
 
   lazy val settings = Seq (
@@ -988,7 +962,7 @@ object TestSettings {
         (fullClasspath in Test).value.files.map(_.getAbsolutePath)
           .mkString(File.pathSeparator).stripSuffix(File.pathSeparator),
       "SPARK_PREPEND_CLASSES" -> "1",
-      "SPARK_SCALA_VERSION" -> scalaBinaryVersion,
+      "SPARK_SCALA_VERSION" -> scalaBinaryVersion.value,
       "SPARK_TESTING" -> "1",
       "JAVA_HOME" -> sys.env.get("JAVA_HOME").getOrElse(sys.props("java.home"))),
     javaOptions in Test += s"-Djava.io.tmpdir=$testTempDir",

From aa6657981aefae8067672d2c99ca560b6179b723 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Fri, 2 Oct 2020 00:06:03 -0700
Subject: [PATCH 0150/1009] [SPARK-33050][BUILD] Upgrade Apache ORC to 1.5.12

### What changes were proposed in this pull request?

This PR aims to upgrade Apache ORC to 1.5.12.

### Why are the changes needed?

This brings us the latest bug patches like the followings.
- ORC-644 nested struct evolution does not respect to orc.force.positional.evolution
- ORC-667 Positional mapping for nested struct types should not applied by default

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CI.

Closes #29930 from dongjoon-hyun/SPARK-ORC-1.5.12.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/deps/spark-deps-hadoop-2.7-hive-1.2 | 6 +++---
 dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 6 +++---
 dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 6 +++---
 pom.xml                                 | 2 +-
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-1.2 b/dev/deps/spark-deps-hadoop-2.7-hive-1.2
index fef1a6442cd33..d07b04608328f 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-1.2
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-1.2
@@ -180,9 +180,9 @@ okhttp/3.12.12//okhttp-3.12.12.jar
 okio/1.14.0//okio-1.14.0.jar
 opencsv/2.3//opencsv-2.3.jar
 openshift-model/4.10.3//openshift-model-4.10.3.jar
-orc-core/1.5.10/nohive/orc-core-1.5.10-nohive.jar
-orc-mapreduce/1.5.10/nohive/orc-mapreduce-1.5.10-nohive.jar
-orc-shims/1.5.10//orc-shims-1.5.10.jar
+orc-core/1.5.12/nohive/orc-core-1.5.12-nohive.jar
+orc-mapreduce/1.5.12/nohive/orc-mapreduce-1.5.12-nohive.jar
+orc-shims/1.5.12//orc-shims-1.5.12.jar
 oro/2.0.8//oro-2.0.8.jar
 osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
 paranamer/2.8//paranamer-2.8.jar
diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index 7b31bdd98ef26..979bb1419ce7b 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -195,9 +195,9 @@ okhttp/3.12.12//okhttp-3.12.12.jar
 okio/1.14.0//okio-1.14.0.jar
 opencsv/2.3//opencsv-2.3.jar
 openshift-model/4.10.3//openshift-model-4.10.3.jar
-orc-core/1.5.10//orc-core-1.5.10.jar
-orc-mapreduce/1.5.10//orc-mapreduce-1.5.10.jar
-orc-shims/1.5.10//orc-shims-1.5.10.jar
+orc-core/1.5.12//orc-core-1.5.12.jar
+orc-mapreduce/1.5.12//orc-mapreduce-1.5.12.jar
+orc-shims/1.5.12//orc-shims-1.5.12.jar
 oro/2.0.8//oro-2.0.8.jar
 osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
 paranamer/2.8//paranamer-2.8.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index 960ea5f836ddf..ebaff6d1977c9 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -209,9 +209,9 @@ okhttp/3.12.12//okhttp-3.12.12.jar
 okio/1.14.0//okio-1.14.0.jar
 opencsv/2.3//opencsv-2.3.jar
 openshift-model/4.10.3//openshift-model-4.10.3.jar
-orc-core/1.5.10//orc-core-1.5.10.jar
-orc-mapreduce/1.5.10//orc-mapreduce-1.5.10.jar
-orc-shims/1.5.10//orc-shims-1.5.10.jar
+orc-core/1.5.12//orc-core-1.5.12.jar
+orc-mapreduce/1.5.12//orc-mapreduce-1.5.12.jar
+orc-shims/1.5.12//orc-shims-1.5.12.jar
 oro/2.0.8//oro-2.0.8.jar
 osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
 paranamer/2.8//paranamer-2.8.jar
diff --git a/pom.xml b/pom.xml
index 421d932cef5fa..5d6b0511ce458 100644
--- a/pom.xml
+++ b/pom.xml
@@ -136,7 +136,7 @@
     <kafka.version>2.6.0</kafka.version>
     <derby.version>10.12.1.1</derby.version>
     <parquet.version>1.10.1</parquet.version>
-    <orc.version>1.5.10</orc.version>
+    <orc.version>1.5.12</orc.version>
     <orc.classifier></orc.classifier>
     <hive.parquet.group>com.twitter</hive.parquet.group>
     <hive.parquet.version>1.6.0</hive.parquet.version>

From 9b88aca2954cd931c94a7cc788c3c3f7a33e99b7 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Fri, 2 Oct 2020 00:53:17 -0700
Subject: [PATCH 0151/1009] [SPARK-33030][R] Add nth_value to SparkR

### What changes were proposed in this pull request?

Adds `nth_value` function to SparkR.

### Why are the changes needed?

Feature parity. The function has been already added to [Scala](https://issues.apache.org/jira/browse/SPARK-27951) and [Python](https://issues.apache.org/jira/browse/SPARK-33020).

### Does this PR introduce _any_ user-facing change?

Yes. New function is exposed to R users.

### How was this patch tested?

New unit tests.

Closes #29905 from zero323/SPARK-33030.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 R/pkg/NAMESPACE                       |  1 +
 R/pkg/R/functions.R                   | 34 ++++++++++++++++++++++++++-
 R/pkg/R/generics.R                    |  4 ++++
 R/pkg/tests/fulltests/test_sparkSQL.R |  2 ++
 4 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 6d28caff0d56f..4ea05b25ecc9e 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -348,6 +348,7 @@ exportMethods("%<=>%",
               "negate",
               "next_day",
               "not",
+              "nth_value",
               "ntile",
               "otherwise",
               "over",
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index df221de4c7327..18206f6f67778 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -338,7 +338,8 @@ NULL
 #' tmp <- mutate(df, dist = over(cume_dist(), ws), dense_rank = over(dense_rank(), ws),
 #'               lag = over(lag(df$mpg), ws), lead = over(lead(df$mpg, 1), ws),
 #'               percent_rank = over(percent_rank(), ws),
-#'               rank = over(rank(), ws), row_number = over(row_number(), ws))
+#'               rank = over(rank(), ws), row_number = over(row_number(), ws),
+#'               nth_value = over(nth_value(df$mpg, 3), ws))
 #' # Get ntile group id (1-4) for hp
 #' tmp <- mutate(tmp, ntile = over(ntile(4), ws))
 #' head(tmp)}
@@ -3298,6 +3299,37 @@ setMethod("lead",
             column(jc)
           })
 
+#' @details
+#' \code{nth_value}: Window function: returns the value that is the \code{offset}th
+#' row of the window frame# (counting from 1), and \code{null} if the size of window
+#' frame is less than \code{offset} rows.
+#'
+#' @param offset a numeric indicating number of row to use as the value
+#' @param na.rm a logical which indicates that the Nth value should skip null in the
+#'        determination of which row to use
+#'
+#' @rdname column_window_functions
+#' @aliases nth_value nth_value,characterOrColumn-method
+#' @note nth_value since 3.1.0
+setMethod("nth_value",
+          signature(x = "characterOrColumn", offset = "numeric"),
+          function(x, offset, na.rm = FALSE) {
+            x <- if (is.character(x)) {
+              column(x)
+            } else {
+              x
+            }
+            offset <- as.integer(offset)
+            jc <- callJStatic(
+              "org.apache.spark.sql.functions",
+              "nth_value",
+              x@jc,
+              offset,
+              na.rm
+            )
+            column(jc)
+          })
+
 #' @details
 #' \code{ntile}: Returns the ntile group id (from 1 to n inclusive) in an ordered window
 #' partition. For example, if n is 4, the first quarter of the rows will get value 1, the second
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index a7a9379b927b1..985678679dec8 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1164,6 +1164,10 @@ setGeneric("months_between", function(y, x, ...) { standardGeneric("months_betwe
 #' @rdname count
 setGeneric("n", function(x) { standardGeneric("n") })
 
+#' @rdname column_window_functions
+#' @name NULL
+setGeneric("nth_value", function(x, offset, ...) { standardGeneric("nth_value") })
+
 #' @rdname column_nonaggregate_functions
 #' @name NULL
 setGeneric("nanvl", function(y, x) { standardGeneric("nanvl") })
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 1c65dabaf6656..c36620227593d 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1425,6 +1425,8 @@ test_that("column functions", {
   c25 <- overlay(c1, c2, c3, c3) + overlay(c1, c2, c3) + overlay(c1, c2, 1) +
     overlay(c1, c2, 3, 4)
   c26 <- timestamp_seconds(c1)
+  c27 <- nth_value("x", 1L) + nth_value("y", 2, TRUE) +
+    nth_value(column("v"), 3) + nth_value(column("z"), 4L, FALSE)
 
   # Test if base::is.nan() is exposed
   expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE))

From 82721ce00b6cf535abd3d9cd66445e452554d15d Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Fri, 2 Oct 2020 22:16:19 +0900
Subject: [PATCH 0152/1009] [SPARK-32741][SQL][FOLLOWUP] Run plan integrity
 check only for effective plan changes

### What changes were proposed in this pull request?

(This is a followup PR of #29585) The PR modified `RuleExecutor#isPlanIntegral` code for checking if a plan has globally-unique attribute IDs, but this check made Jenkins maven test jobs much longer (See [the Dongjoon comment](https://github.com/apache/spark/pull/29585#issuecomment-702461314) and thanks, dongjoon-hyun !). To recover running time for the Jenkins tests, this PR intends to update the code to run plan integrity check only for effective plans.

### Why are the changes needed?

To recover running time for Jenkins tests.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #29928 from maropu/PR29585-FOLLOWUP.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../org/apache/spark/sql/catalyst/rules/RuleExecutor.scala      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
index 3bd8fa78ec92c..d5b0884f6ff13 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
@@ -229,7 +229,7 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
             tracker.foreach(_.recordRuleInvocation(rule.ruleName, runTime, effective))
 
             // Run the structural integrity checker against the plan after each rule.
-            if (!isPlanIntegral(result)) {
+            if (effective && !isPlanIntegral(result)) {
               val message = s"After applying rule ${rule.ruleName} in batch ${batch.name}, " +
                 "the structural integrity of the plan is broken."
               throw new TreeNodeException(result, message, null)

From 1299c8a81ddba7f0fd8ff1f9afa223a4bb75f7f9 Mon Sep 17 00:00:00 2001
From: Bo Yang <boy@uber.com>
Date: Fri, 2 Oct 2020 20:26:46 -0700
Subject: [PATCH 0153/1009] [SPARK-33037][SHUFFLE] Remove knownManagers to
 support user's custom shuffle manager plugin

### What changes were proposed in this pull request?

Spark has a hardcode list to contain known shuffle managers, which has two values now. It does not contain user's custom shuffle manager which is set through Spark config "spark.shuffle.manager".

We hit issue when set "spark.shuffle.manager" with our own shuffle manager plugin (Uber Remote Shuffle Service implementation, https://github.com/uber/RemoteShuffleService). Other users will hit same issue when they implement their own shuffle manager.

It is better to remove that knownManagers hardcode list, to support user's custom shuffle manager implementation.

### Why are the changes needed?

Spark has shuffle manager API to support custom shuffle manager implementation. The hardcoded known managers list does not consider that shuffle manager config value which could be set by user. Thus better to remove this hardcoded known managers list.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Current Spark unit test already covers the code path.

Closes #29916 from boy-uber/knownManagers.

Lead-authored-by: Bo Yang <boy@uber.com>
Co-authored-by: Bo Yang <boy-uber@users.noreply.github.com>
Signed-off-by: Liang-Chi Hsieh <viirya@gmail.com>
---
 .../network/shuffle/ExternalShuffleBlockResolver.java    | 8 --------
 .../shuffle/ExternalShuffleBlockResolverSuite.java       | 9 ---------
 .../network/shuffle/ExternalShuffleIntegrationSuite.java | 6 +++---
 .../scala/org/apache/spark/shuffle/ShuffleManager.scala  | 6 +++++-
 4 files changed, 8 insertions(+), 21 deletions(-)

diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
index a6bcbb8850566..a095bf2723418 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
@@ -92,10 +92,6 @@ public class ExternalShuffleBlockResolver {
   @VisibleForTesting
   final DB db;
 
-  private final List<String> knownManagers = Arrays.asList(
-    "org.apache.spark.shuffle.sort.SortShuffleManager",
-    "org.apache.spark.shuffle.unsafe.UnsafeShuffleManager");
-
   public ExternalShuffleBlockResolver(TransportConf conf, File registeredExecutorFile)
       throws IOException {
     this(conf, registeredExecutorFile, Executors.newSingleThreadExecutor(
@@ -148,10 +144,6 @@ public void registerExecutor(
       ExecutorShuffleInfo executorInfo) {
     AppExecId fullId = new AppExecId(appId, execId);
     logger.info("Registered executor {} with {}", fullId, executorInfo);
-    if (!knownManagers.contains(executorInfo.shuffleManager)) {
-      throw new UnsupportedOperationException(
-        "Unsupported shuffle manager of executor: " + executorInfo);
-    }
     try {
       if (db != null) {
         byte[] key = dbAppExecKey(fullId);
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java
index 88bcf43c2371f..04d4bdf92bae7 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java
@@ -71,15 +71,6 @@ public void testBadRequests() throws IOException {
       assertTrue("Bad error message: " + e, e.getMessage().contains("not registered"));
     }
 
-    // Invalid shuffle manager
-    try {
-      resolver.registerExecutor("app0", "exec2", dataContext.createExecutorInfo("foobar"));
-      resolver.getBlockData("app0", "exec2", 1, 1, 0);
-      fail("Should have failed");
-    } catch (UnsupportedOperationException e) {
-      // pass
-    }
-
     // Nonexistent shuffle block
     resolver.registerExecutor("app0", "exec3",
       dataContext.createExecutorInfo(SORT_MANAGER));
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
index 9d398e372056b..49d02e5dc6fb4 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
@@ -233,9 +233,9 @@ public void testFetchThreeSort() throws Exception {
     exec0Fetch.releaseBuffers();
   }
 
-  @Test (expected = RuntimeException.class)
-  public void testRegisterInvalidExecutor() throws Exception {
-    registerExecutor("exec-1", dataContext0.createExecutorInfo("unknown sort manager"));
+  @Test
+  public void testRegisterWithCustomShuffleManager() throws Exception {
+    registerExecutor("exec-1", dataContext0.createExecutorInfo("custom shuffle manager"));
   }
 
   @Test
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala
index 400c4526f0114..4e2183451c258 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala
@@ -24,8 +24,12 @@ import org.apache.spark.{ShuffleDependency, TaskContext}
  * and on each executor, based on the spark.shuffle.manager setting. The driver registers shuffles
  * with it, and executors (or tasks running locally in the driver) can ask to read and write data.
  *
- * NOTE: this will be instantiated by SparkEnv so its constructor can take a SparkConf and
+ * NOTE:
+ * 1. This will be instantiated by SparkEnv so its constructor can take a SparkConf and
  * boolean isDriver as parameters.
+ * 2. This contains a method ShuffleBlockResolver which interacts with External Shuffle Service
+ * when it is enabled. Need to pay attention to that, if implementing a custom ShuffleManager, to
+ * make sure the custom ShuffleManager could co-exist with External Shuffle Service.
  */
 private[spark] trait ShuffleManager {
 

From 5af62a2ec74356ce1a97c1371321b3424b674289 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Sat, 3 Oct 2020 23:37:01 +0900
Subject: [PATCH 0154/1009] [SPARK-33052][SQL][TEST] Make all the database
 versions up-to-date for integration tests

### What changes were proposed in this pull request?

This PR intends to update database versions below for integration tests;
 - ibmcom/db2:11.5.0.0a => ibmcom/db2:11.5.4.0 in `DB2[Krb]IntegrationSuite`
 - mysql:5.7.28 => mysql:5.7.31 in `MySQLIntegrationSuite`
 - postgres:12.0 => postgres:13.0 in `Postgres[Krb]IntegrationSuite`
 - mariadb:10.4 => mariadb:10.5 in `MariaDBKrbIntegrationSuite`

Also, this added environmental variables so that we can test with any database version and all the variables are as follows (see documents in the code for how to use all the variables);
 - DB2_DOCKER_IMAGE_NAME
 - MSSQLSERVER_DOCKER_IMAGE_NAME
 - MYSQL_DOCKER_IMAGE_NAME
 - POSTGRES_DOCKER_IMAGE_NAME

### Why are the changes needed?

To improve tests.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manually checked.

Closes #29932 from maropu/UpdateIntegrationTests.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../src/test/resources/mariadb_docker_entrypoint.sh   |  2 +-
 .../apache/spark/sql/jdbc/DB2IntegrationSuite.scala   |  9 ++++++++-
 .../spark/sql/jdbc/DB2KrbIntegrationSuite.scala       |  9 ++++++++-
 .../spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala   |  4 +++-
 .../spark/sql/jdbc/MsSqlServerIntegrationSuite.scala  | 10 +++++++++-
 .../apache/spark/sql/jdbc/MySQLIntegrationSuite.scala | 11 +++++++++--
 .../spark/sql/jdbc/PostgresIntegrationSuite.scala     |  9 ++++++++-
 .../spark/sql/jdbc/PostgresKrbIntegrationSuite.scala  |  9 ++++++++-
 8 files changed, 54 insertions(+), 9 deletions(-)

diff --git a/external/docker-integration-tests/src/test/resources/mariadb_docker_entrypoint.sh b/external/docker-integration-tests/src/test/resources/mariadb_docker_entrypoint.sh
index 343bc01651318..97c00a9d81b76 100755
--- a/external/docker-integration-tests/src/test/resources/mariadb_docker_entrypoint.sh
+++ b/external/docker-integration-tests/src/test/resources/mariadb_docker_entrypoint.sh
@@ -18,7 +18,7 @@
 
 dpkg-divert --add /bin/systemctl && ln -sT /bin/true /bin/systemctl
 apt update
-apt install -y mariadb-plugin-gssapi-server=1:10.4.12+maria~bionic
+apt install -y mariadb-plugin-gssapi-server=1:10.5.5+maria~focal
 echo "gssapi_keytab_path=/docker-entrypoint-initdb.d/mariadb.keytab" >> /etc/mysql/mariadb.conf.d/auth_gssapi.cnf
 echo "gssapi_principal_name=mariadb/__IP_ADDRESS_REPLACE_ME__@EXAMPLE.COM" >> /etc/mysql/mariadb.conf.d/auth_gssapi.cnf
 docker-entrypoint.sh mysqld
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
index 02a7ff8f16073..91498493e78e2 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
@@ -25,10 +25,17 @@ import org.apache.spark.sql.Row
 import org.apache.spark.sql.types.{BooleanType, ByteType, ShortType, StructType}
 import org.apache.spark.tags.DockerTest
 
+/**
+ * To run this test suite for a specific version (e.g., ibmcom/db2:11.5.4.0):
+ * {{{
+ *   DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0
+ *     ./build/sbt -Pdocker-integration-tests "test-only *DB2IntegrationSuite"
+ * }}}
+ */
 @DockerTest
 class DB2IntegrationSuite extends DockerJDBCIntegrationSuite {
   override val db = new DatabaseOnDocker {
-    override val imageName = "ibmcom/db2:11.5.0.0a"
+    override val imageName = sys.env.getOrElse("DB2_DOCKER_IMAGE_NAME", "ibmcom/db2:11.5.4.0")
     override val env = Map(
       "DB2INST1_PASSWORD" -> "rootpass",
       "LICENSE" -> "accept",
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala
index fa5ce2d106a10..7ab544c17a5d8 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala
@@ -29,13 +29,20 @@ import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
 import org.apache.spark.sql.execution.datasources.jdbc.connection.{DB2ConnectionProvider, SecureConnectionProvider}
 import org.apache.spark.tags.DockerTest
 
+/**
+ * To run this test suite for a specific version (e.g., ibmcom/db2:11.5.4.0):
+ * {{{
+ *   DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0
+ *     ./build/sbt -Pdocker-integration-tests "test-only *DB2KrbIntegrationSuite"
+ * }}}
+ */
 @DockerTest
 class DB2KrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite {
   override protected val userName = s"db2/$dockerIp"
   override protected val keytabFileName = "db2.keytab"
 
   override val db = new DatabaseOnDocker {
-    override val imageName = "ibmcom/db2:11.5.0.0a"
+    override val imageName = sys.env.getOrElse("DB2_DOCKER_IMAGE_NAME", "ibmcom/db2:11.5.4.0")
     override val env = Map(
       "DB2INST1_PASSWORD" -> "rootpass",
       "LICENSE" -> "accept",
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala
index 9b9d15517d572..adee2bebe41ce 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala
@@ -30,7 +30,9 @@ class MariaDBKrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite {
   override protected val keytabFileName = "mariadb.keytab"
 
   override val db = new DatabaseOnDocker {
-    override val imageName = "mariadb:10.4"
+    // If you change `imageName`, you need to update the version of `mariadb-plugin-gssapi-server`
+    // in `resources/mariadb_docker_entrypoint.sh` accordingly.
+    override val imageName = "mariadb:10.5"
     override val env = Map(
       "MYSQL_ROOT_PASSWORD" -> "rootpass"
     )
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
index 6c633af1fde84..5d3deff9d2704 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
@@ -24,10 +24,18 @@ import java.util.Properties
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.tags.DockerTest
 
+/**
+ * To run this test suite for a specific version (e.g., 2019-GA-ubuntu-16.04):
+ * {{{
+ *   MSSQLSERVER_DOCKER_IMAGE_NAME=2019-GA-ubuntu-16.04
+ *     ./build/sbt -Pdocker-integration-tests "test-only *MsSqlServerIntegrationSuite"
+ * }}}
+ */
 @DockerTest
 class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite {
   override val db = new DatabaseOnDocker {
-    override val imageName = "mcr.microsoft.com/mssql/server:2019-GA-ubuntu-16.04"
+    override val imageName = sys.env.getOrElse("MSSQLSERVER_DOCKER_IMAGE_NAME",
+      "mcr.microsoft.com/mssql/server:2019-GA-ubuntu-16.04")
     override val env = Map(
       "SA_PASSWORD" -> "Sapass123",
       "ACCEPT_EULA" -> "Y"
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
index 4cbcb59e02de1..4cd27f8b9fff2 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
@@ -21,13 +21,20 @@ import java.math.BigDecimal
 import java.sql.{Connection, Date, Timestamp}
 import java.util.Properties
 
-import org.apache.spark.sql.{Row, SaveMode}
+import org.apache.spark.sql.Row
 import org.apache.spark.tags.DockerTest
 
+/**
+ * To run this test suite for a specific version (e.g., mysql:5.7.31):
+ * {{{
+ *   MYSQL_DOCKER_IMAGE_NAME=mysql:5.7.31
+ *     ./build/sbt -Pdocker-integration-tests "test-only *MySQLIntegrationSuite"
+ * }}}
+ */
 @DockerTest
 class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {
   override val db = new DatabaseOnDocker {
-    override val imageName = "mysql:5.7.28"
+    override val imageName = sys.env.getOrElse("MYSQL_DOCKER_IMAGE_NAME", "mysql:5.7.31")
     override val env = Map(
       "MYSQL_ROOT_PASSWORD" -> "rootpass"
     )
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
index 36d96a69ec659..ba71c942714da 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
@@ -26,10 +26,17 @@ import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.types.{ArrayType, DecimalType, FloatType, ShortType}
 import org.apache.spark.tags.DockerTest
 
+/**
+ * To run this test suite for a specific version (e.g., postgres:13.0):
+ * {{{
+ *   POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0
+ *     ./build/sbt -Pdocker-integration-tests "test-only *PostgresIntegrationSuite"
+ * }}}
+ */
 @DockerTest
 class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite {
   override val db = new DatabaseOnDocker {
-    override val imageName = "postgres:12.0-alpine"
+    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:13.0-alpine")
     override val env = Map(
       "POSTGRES_PASSWORD" -> "rootpass"
     )
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala
index e94bf3dd588aa..6b215485247d9 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala
@@ -24,13 +24,20 @@ import com.spotify.docker.client.messages.{ContainerConfig, HostConfig}
 import org.apache.spark.sql.execution.datasources.jdbc.connection.SecureConnectionProvider
 import org.apache.spark.tags.DockerTest
 
+/**
+ * To run this test suite for a specific version (e.g., postgres:13.0):
+ * {{{
+ *   POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0
+ *     ./build/sbt -Pdocker-integration-tests "test-only *PostgresKrbIntegrationSuite"
+ * }}}
+ */
 @DockerTest
 class PostgresKrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite {
   override protected val userName = s"postgres/$dockerIp"
   override protected val keytabFileName = "postgres.keytab"
 
   override val db = new DatabaseOnDocker {
-    override val imageName = "postgres:12.0"
+    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:13.0")
     override val env = Map(
       "POSTGRES_PASSWORD" -> "rootpass"
     )

From f86171aea43479f54ac2bbbca8f128baa3fc4a8c Mon Sep 17 00:00:00 2001
From: Sean Owen <srowen@gmail.com>
Date: Sat, 3 Oct 2020 13:12:55 -0500
Subject: [PATCH 0155/1009] [SPARK-33043][ML] Handle
 spark.driver.maxResultSize=0 in RowMatrix heuristic computation

### What changes were proposed in this pull request?

RowMatrix contains a computation based on spark.driver.maxResultSize. However, when this value is set to 0, the computation fails (log of 0). The fix is simply to correctly handle this setting, which means unlimited result size, by using a tree depth of 1 in the RowMatrix method.

### Why are the changes needed?

Simple bug fix to make several Spark ML functions which use RowMatrix run correctly in this case.

### Does this PR introduce _any_ user-facing change?

Not other than the bug fix of course.

### How was this patch tested?

Existing RowMatrix tests plus a new test.

Closes #29925 from srowen/SPARK-33043.

Authored-by: Sean Owen <srowen@gmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../mllib/linalg/distributed/RowMatrix.scala      |  6 +++++-
 .../mllib/linalg/distributed/RowMatrixSuite.scala | 15 +++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index 20e26cee9e0d6..07b9d91c1f59b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -786,11 +786,15 @@ class RowMatrix @Since("1.0.0") (
    * Based on the formulae: (numPartitions)^(1/depth) * objectSize <= DriverMaxResultSize
    * @param aggregatedObjectSizeInBytes the size, in megabytes, of the object being tree aggregated
    */
-  private[spark] def getTreeAggregateIdealDepth(aggregatedObjectSizeInBytes: Long) = {
+  private[spark] def getTreeAggregateIdealDepth(aggregatedObjectSizeInBytes: Long): Int = {
     require(aggregatedObjectSizeInBytes > 0,
       "Cannot compute aggregate depth heuristic based on a zero-size object to aggregate")
 
     val maxDriverResultSizeInBytes = rows.conf.get[Long](MAX_RESULT_SIZE)
+    if (maxDriverResultSizeInBytes <= 0) {
+      // Unlimited result size, so 1 is OK
+      return 1
+    }
 
     require(maxDriverResultSizeInBytes > aggregatedObjectSizeInBytes,
       s"Cannot aggregate object of size $aggregatedObjectSizeInBytes Bytes, "
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
index 0a4b11935580a..adc4eeef91bb1 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
@@ -25,6 +25,7 @@ import breeze.linalg.{norm => brzNorm, svd => brzSvd, DenseMatrix => BDM, DenseV
 import breeze.numerics.abs
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.internal.config.MAX_RESULT_SIZE
 import org.apache.spark.mllib.linalg.{Matrices, Vector, Vectors}
 import org.apache.spark.mllib.random.RandomRDDs
 import org.apache.spark.mllib.util.{LocalClusterSparkContext, MLlibTestSparkContext}
@@ -121,6 +122,20 @@ class RowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext {
     assert(objectBiggerThanResultSize.getMessage.contains("it's bigger than maxResultSize"))
   }
 
+  test("SPARK-33043: getTreeAggregateIdealDepth with unlimited driver size") {
+    val originalMaxResultSize = sc.conf.get[Long](MAX_RESULT_SIZE)
+    sc.conf.set(MAX_RESULT_SIZE, 0L)
+    try {
+      val nbPartitions = 100
+      val vectors = sc.emptyRDD[Vector]
+        .repartition(nbPartitions)
+      val rowMat = new RowMatrix(vectors)
+      assert(rowMat.getTreeAggregateIdealDepth(700 * 1024 * 1024) === 1)
+    } finally {
+      sc.conf.set(MAX_RESULT_SIZE, originalMaxResultSize)
+    }
+  }
+
   test("similar columns") {
     val colMags = Vectors.dense(math.sqrt(126), math.sqrt(66), math.sqrt(94))
     val expected = BDM(

From 9b21fdd731489b529a52cd2074f79dc7293eed3b Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Sat, 3 Oct 2020 13:50:38 -0700
Subject: [PATCH 0156/1009] [SPARK-32949][FOLLOW-UP][R][SQL] Reindent lines in
 SparkR timestamp_seconds

### What changes were proposed in this pull request?

Re-indent lines of SparkR `timestamp_seconds`.

### Why are the changes needed?

Current indentation is not aligned with the opening line.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #29940 from zero323/SPARK-32949-FOLLOW-UP.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 R/pkg/R/functions.R | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 18206f6f67778..b216f404a3ca5 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -4451,10 +4451,10 @@ setMethod("current_timestamp",
 #' @aliases timestamp_seconds timestamp_seconds,Column-method
 #' @note timestamp_seconds since 3.1.0
 setMethod("timestamp_seconds",
-    signature(x = "Column"),
-    function(x) {
-        jc <- callJStatic(
-            "org.apache.spark.sql.functions", "timestamp_seconds", x@jc
-        )
-        column(jc)
-    })
+          signature(x = "Column"),
+          function(x) {
+            jc <- callJStatic(
+              "org.apache.spark.sql.functions", "timestamp_seconds", x@jc
+            )
+            column(jc)
+          })

From 37c806af2bd3fb4c1f25e02f4986226e5e8d994d Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Sat, 3 Oct 2020 14:55:02 -0700
Subject: [PATCH 0157/1009] [SPARK-32958][SQL] Prune unnecessary columns from
 JsonToStructs

### What changes were proposed in this pull request?

This patch proposes to do column pruning for `JsonToStructs` expression if we only require some fields from it.

### Why are the changes needed?

`JsonToStructs` takes a schema parameter used to tell `JacksonParser` what fields are needed to parse. If `JsonToStructs` is followed by `GetStructField`. We can prune the schema to only parse certain field.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Unit test

Closes #29900 from viirya/SPARK-32958.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../optimizer/OptimizeJsonExprs.scala         | 16 +++++
 .../optimizer/OptimizeJsonExprsSuite.scala    | 58 +++++++++++++++++++
 2 files changed, 74 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala
index 24df480208220..59228904d84b7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala
@@ -20,9 +20,14 @@ package org.apache.spark.sql.catalyst.optimizer
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.types.{ArrayType, StructType}
 
 /**
  * Simplify redundant json related expressions.
+ *
+ * The optimization includes:
+ * 1. JsonToStructs(StructsToJson(child)) => child.
+ * 2. Prune unnecessary columns from GetStructField/GetArrayStructFields + JsonToStructs.
  */
 object OptimizeJsonExprs extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan transform {
@@ -38,6 +43,17 @@ object OptimizeJsonExprs extends Rule[LogicalPlan] {
         // so `JsonToStructs` might throw error in runtime. Thus we cannot optimize
         // this case similarly.
         child
+
+      case g @ GetStructField(j @ JsonToStructs(schema: StructType, _, _, _), ordinal, _)
+          if schema.length > 1 =>
+        val prunedSchema = StructType(Seq(schema(ordinal)))
+        g.copy(child = j.copy(schema = prunedSchema), ordinal = 0)
+
+      case g @ GetArrayStructFields(j @ JsonToStructs(schema: ArrayType, _, _, _), _, _, _, _)
+          if schema.elementType.asInstanceOf[StructType].length > 1 =>
+        val prunedSchema = ArrayType(StructType(Seq(g.field)), g.containsNull)
+        g.copy(child = j.copy(schema = prunedSchema), ordinal = 0, numFields = 1)
+
     }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
index 90397d4cabee8..e47a141dfed1f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
@@ -36,8 +36,10 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper {
   val schema = StructType.fromDDL("a int, b int")
 
   private val structAtt = 'struct.struct(schema).notNull
+  private val jsonAttr = 'json.string
 
   private val testRelation = LocalRelation(structAtt)
+  private val testRelation2 = LocalRelation(jsonAttr)
 
   test("SPARK-32948: optimize from_json + to_json") {
     val options = Map.empty[String, String]
@@ -141,4 +143,60 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper {
     val expected2 = testRelation.select('struct.as("struct")).analyze
     comparePlans(optimized2, expected2)
   }
+
+  test("SPARK-32958: prune unnecessary columns from GetStructField + from_json") {
+    val options = Map.empty[String, String]
+
+    val query1 = testRelation2
+      .select(GetStructField(JsonToStructs(schema, options, 'json), 0))
+    val optimized1 = Optimizer.execute(query1.analyze)
+
+    val prunedSchema1 = StructType.fromDDL("a int")
+    val expected1 = testRelation2
+      .select(GetStructField(JsonToStructs(prunedSchema1, options, 'json), 0)).analyze
+    comparePlans(optimized1, expected1)
+
+    val query2 = testRelation2
+      .select(GetStructField(JsonToStructs(schema, options, 'json), 1))
+    val optimized2 = Optimizer.execute(query2.analyze)
+
+    val prunedSchema2 = StructType.fromDDL("b int")
+    val expected2 = testRelation2
+      .select(GetStructField(JsonToStructs(prunedSchema2, options, 'json), 0)).analyze
+    comparePlans(optimized2, expected2)
+  }
+
+  test("SPARK-32958: prune unnecessary columns from GetArrayStructFields + from_json") {
+    val options = Map.empty[String, String]
+    val schema1 = ArrayType(StructType.fromDDL("a int, b int"), containsNull = true)
+    val field1 = schema1.elementType.asInstanceOf[StructType](0)
+
+    val query1 = testRelation2
+      .select(GetArrayStructFields(
+        JsonToStructs(schema1, options, 'json), field1, 0, 2, true).as("a"))
+    val optimized1 = Optimizer.execute(query1.analyze)
+
+    val prunedSchema1 = ArrayType(StructType.fromDDL("a int"), containsNull = true)
+    val expected1 = testRelation2
+      .select(GetArrayStructFields(
+        JsonToStructs(prunedSchema1, options, 'json), field1, 0, 1, true).as("a")).analyze
+    comparePlans(optimized1, expected1)
+
+    val schema2 = ArrayType(
+      StructType(
+        StructField("a", IntegerType, false) ::
+          StructField("b", IntegerType, false) :: Nil), containsNull = false)
+    val field2 = schema2.elementType.asInstanceOf[StructType](1)
+    val query2 = testRelation2
+      .select(GetArrayStructFields(
+        JsonToStructs(schema2, options, 'json), field2, 1, 2, false).as("b"))
+    val optimized2 = Optimizer.execute(query2.analyze)
+
+    val prunedSchema2 = ArrayType(
+      StructType(StructField("b", IntegerType, false) :: Nil), containsNull = false)
+    val expected2 = testRelation2
+      .select(GetArrayStructFields(
+        JsonToStructs(prunedSchema2, options, 'json), field2, 0, 1, false).as("b")).analyze
+    comparePlans(optimized2, expected2)
+  }
 }

From db420f79cc588dc0f98b906accb34d63a1e4664c Mon Sep 17 00:00:00 2001
From: Holden Karau <hkarau@apple.com>
Date: Sat, 3 Oct 2020 15:14:48 -0700
Subject: [PATCH 0158/1009] [SPARK-33049][CORE] Decommission shuffle block test
 is flaky

### What changes were proposed in this pull request?

Increase the listener bus event length, syncrhonize the addition of blocks modified to the array list.

### Why are the changes needed?

This test appears flaky in Jenkins (can not repro locally). Given that the index file made it through and the index file is only transferred after the data file, the only two reasons I could come up with an interminentent failure here are with the listenerbus dropping a message or the two block change messages being received at the same time.

### Does this PR introduce _any_ user-facing change?

No (test only).

### How was this patch tested?

The tests still pass on my machine but they did before. We'll need to run it through jenkins a few times first.

Closes #29929 from holdenk/fix-.BlockManagerDecommissionIntegrationSuite.

Authored-by: Holden Karau <hkarau@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../storage/BlockManagerDecommissionIntegrationSuite.scala    | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala
index 094b893cdda2e..dcf313f671d5e 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala
@@ -69,6 +69,8 @@ class BlockManagerDecommissionIntegrationSuite extends SparkFunSuite with LocalS
       .set(config.STORAGE_DECOMMISSION_ENABLED, true)
       .set(config.STORAGE_DECOMMISSION_RDD_BLOCKS_ENABLED, persist)
       .set(config.STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED, shuffle)
+      // Since we use the bus for testing we don't want to drop any messages
+      .set(config.LISTENER_BUS_EVENT_QUEUE_CAPACITY, 1000000)
       // Just replicate blocks quickly during testing, there isn't another
       // workload we need to worry about.
       .set(config.STORAGE_DECOMMISSION_REPLICATION_REATTEMPT_INTERVAL, 10L)
@@ -137,7 +139,7 @@ class BlockManagerDecommissionIntegrationSuite extends SparkFunSuite with LocalS
         taskEndEvents.add(taskEnd)
       }
 
-      override def onBlockUpdated(blockUpdated: SparkListenerBlockUpdated): Unit = {
+      override def onBlockUpdated(blockUpdated: SparkListenerBlockUpdated): Unit = synchronized {
         blocksUpdated.append(blockUpdated)
       }
 

From fab53212cb110a81696cee8546c35095332f6e09 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Sun, 4 Oct 2020 16:11:06 -0700
Subject: [PATCH 0159/1009] [SPARK-33065][TESTS] Expand the stack size of a
 thread in a test in LocalityPlacementStrategySuite for Java 11 with sbt

### What changes were proposed in this pull request?

This PR fixes an issue that a test in `LocalityPlacementStrategySuite` fails with Java 11 due to `StackOverflowError`.

```
[info] - handle large number of containers and tasks (SPARK-18750) *** FAILED *** (170 milliseconds)
[info]   StackOverflowError should not be thrown; however, got:
[info]
[info]   java.lang.StackOverflowError
[info]          at java.base/java.util.concurrent.ConcurrentHashMap.putVal(ConcurrentHashMap.java:1012)
[info]          at java.base/java.util.concurrent.ConcurrentHashMap.putIfAbsent(ConcurrentHashMap.java:1541)
[info]          at java.base/java.lang.ClassLoader.getClassLoadingLock(ClassLoader.java:668)
[info]          at java.base/jdk.internal.loader.BuiltinClassLoader.loadClassOrNull(BuiltinClassLoader.java:591)
[info]          at java.base/jdk.internal.loader.BuiltinClassLoader.loadClass(BuiltinClassLoader.java:579)
[info]          at java.base/jdk.internal.loader.ClassLoaders$AppClassLoader.loadClass(ClassLoaders.java:178)
[info]          at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:522)
```

The solution is to expand the stack size of a thread in the test from 32KB to 256KB.
Currently, the stack size is specified as 32KB but the actual stack size can be greater than 32KB.
According to the code of Hotspot, the minimum stack size is prefer to the specified size.

Java 8: https://hg.openjdk.java.net/jdk8u/jdk8u/hotspot/file/c92ba514724d/src/os/linux/vm/os_linux.cpp#l900
Java 11: https://hg.openjdk.java.net/jdk-updates/jdk11u/file/73edf743a93a/src/hotspot/os/posix/os_posix.cpp#l1555

For Linux on x86_64, the minimum stack size seems to be 224KB and 136KB for Java 8 and Java 11 respectively. So, the actual stack size should be 224KB rather than 32KB for Java 8 on x86_64/Linux.
As the test passes for Java 8 but doesn't for Java 11, 224KB is enough while 136KB is not.
So I think specifing 256KB is reasonable for the new stack size.

### Why are the changes needed?

To pass the test for Java 11.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Following command with Java 11.
```
build/sbt -Pyarn clean package "testOnly org.apache.spark.deploy.yarn.LocalityPlacementStrategySuite"
```

Closes #29943 from sarutak/fix-stack-size.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/deploy/yarn/LocalityPlacementStrategySuite.scala      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala
index 3c9209c292418..d2397504ba140 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/LocalityPlacementStrategySuite.scala
@@ -43,7 +43,7 @@ class LocalityPlacementStrategySuite extends SparkFunSuite {
       }
     }
 
-    val thread = new Thread(new ThreadGroup("test"), runnable, "test-thread", 32 * 1024)
+    val thread = new Thread(new ThreadGroup("test"), runnable, "test-thread", 256 * 1024)
     thread.start()
     thread.join()
 

From 4ab9aa03055d3ad90137efacb2e00eff4ac3fbf1 Mon Sep 17 00:00:00 2001
From: reidy-p <paul_reidy@outlook.com>
Date: Mon, 5 Oct 2020 11:48:28 +0900
Subject: [PATCH 0160/1009] [SPARK-33017][PYTHON] Add getCheckpointDir method
 to PySpark Context

### What changes were proposed in this pull request?

Adding a method to get the checkpoint directory from the PySpark context to match the Scala API

### Why are the changes needed?

To make the Scala and Python APIs consistent and remove the need to use the JavaObject

### Does this PR introduce _any_ user-facing change?

Yes, there is a new method which makes it easier to get the checkpoint directory directly rather than using the JavaObject

#### Previous behaviour:
```python
>>> spark.sparkContext.setCheckpointDir('/tmp/spark/checkpoint/')
>>> sc._jsc.sc().getCheckpointDir().get()
'file:/tmp/spark/checkpoint/63f7b67c-e5dc-4d11-a70c-33554a71717a'
```
This method returns a confusing Scala error if it has not been set
```python
>>> sc._jsc.sc().getCheckpointDir().get()
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/home/paul/Desktop/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1305, in __call__
  File "/home/paul/Desktop/spark/python/pyspark/sql/utils.py", line 111, in deco
    return f(*a, **kw)
  File "/home/paul/Desktop/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py", line 328, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling o25.get.
: java.util.NoSuchElementException: None.get
        at scala.None$.get(Option.scala:529)
        at scala.None$.get(Option.scala:527)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
        at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
        at py4j.Gateway.invoke(Gateway.java:282)
        at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
        at py4j.commands.CallCommand.execute(CallCommand.java:79)
        at py4j.GatewayConnection.run(GatewayConnection.java:238)
        at java.lang.Thread.run(Thread.java:748)

```

#### New method:
```python
>>> spark.sparkContext.setCheckpointDir('/tmp/spark/checkpoint/')
>>> spark.sparkContext.getCheckpointDir()
'file:/tmp/spark/checkpoint/b38aca2e-8ace-44fc-a4c4-f4e36c2da2a7'
```

``getCheckpointDir()`` returns ``None`` if it has not been set
```python
>>> print(spark.sparkContext.getCheckpointDir())
None
```

### How was this patch tested?

Added to existing unit tests. But I'm not sure how to add a test for the case where ``getCheckpointDir()`` should return ``None`` since the existing checkpoint tests set the checkpoint directory in the ``setUp`` method before any tests are run as far as I can tell.

Closes #29918 from reidy-p/SPARK-33017.

Authored-by: reidy-p <paul_reidy@outlook.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/__init__.py           |  3 ++-
 python/pyspark/context.py            | 12 +++++++++++-
 python/pyspark/context.pyi           |  1 +
 python/pyspark/tests/test_context.py |  3 +++
 4 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py
index fb05819e74124..19269e4466507 100644
--- a/python/pyspark/__init__.py
+++ b/python/pyspark/__init__.py
@@ -50,7 +50,6 @@
 import types
 
 from pyspark.conf import SparkConf
-from pyspark.context import SparkContext
 from pyspark.rdd import RDD, RDDBarrier
 from pyspark.files import SparkFiles
 from pyspark.status import StatusTracker, SparkJobInfo, SparkStageInfo
@@ -113,6 +112,8 @@ def wrapper(self, *args, **kwargs):
         return func(self, **kwargs)
     return wrapper
 
+# To avoid circular dependencies
+from pyspark.context import SparkContext
 
 # for back compatibility
 from pyspark.sql import SQLContext, HiveContext, Row  # noqa: F401
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 55a5657b64055..4213a742a1dc4 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -28,7 +28,7 @@
 from py4j.protocol import Py4JError
 from py4j.java_gateway import is_instance_of
 
-from pyspark import accumulators
+from pyspark import accumulators, since
 from pyspark.accumulators import Accumulator
 from pyspark.broadcast import Broadcast, BroadcastPickleRegistry
 from pyspark.conf import SparkConf
@@ -956,6 +956,16 @@ def setCheckpointDir(self, dirName):
         """
         self._jsc.sc().setCheckpointDir(dirName)
 
+    @since(3.1)
+    def getCheckpointDir(self):
+        """
+        Return the directory where RDDs are checkpointed. Returns None if no
+        checkpoint directory has been set.
+        """
+        if not self._jsc.sc().getCheckpointDir().isEmpty():
+            return self._jsc.sc().getCheckpointDir().get()
+        return None
+
     def _getJavaStorageLevel(self, storageLevel):
         """
         Returns a Java StorageLevel based on a pyspark.StorageLevel.
diff --git a/python/pyspark/context.pyi b/python/pyspark/context.pyi
index 76ecf8911471a..2789a38b3be9f 100644
--- a/python/pyspark/context.pyi
+++ b/python/pyspark/context.pyi
@@ -152,6 +152,7 @@ class SparkContext:
     def addFile(self, path: str, recursive: bool = ...) -> None: ...
     def addPyFile(self, path: str) -> None: ...
     def setCheckpointDir(self, dirName: str) -> None: ...
+    def getCheckpointDir(self) -> Optional[str]: ...
     def setJobGroup(
         self, groupId: str, description: str, interruptOnCancel: bool = ...
     ) -> None: ...
diff --git a/python/pyspark/tests/test_context.py b/python/pyspark/tests/test_context.py
index 9b6b74a111288..d86f6c3c1571c 100644
--- a/python/pyspark/tests/test_context.py
+++ b/python/pyspark/tests/test_context.py
@@ -43,6 +43,7 @@ def test_basic_checkpointing(self):
 
         self.assertFalse(flatMappedRDD.isCheckpointed())
         self.assertTrue(flatMappedRDD.getCheckpointFile() is None)
+        self.assertFalse(self.sc.getCheckpointDir() is None)
 
         flatMappedRDD.checkpoint()
         result = flatMappedRDD.collect()
@@ -51,6 +52,8 @@ def test_basic_checkpointing(self):
         self.assertEqual(flatMappedRDD.collect(), result)
         self.assertEqual("file:" + self.checkpointDir.name,
                          os.path.dirname(os.path.dirname(flatMappedRDD.getCheckpointFile())))
+        self.assertEqual(self.sc.getCheckpointDir(),
+                         os.path.dirname(flatMappedRDD.getCheckpointFile()))
 
     def test_checkpoint_and_restore(self):
         parCollection = self.sc.parallelize([1, 2, 3, 4])

From e83d03ca4861a69cd688beacc544b3f6dae32ae0 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Mon, 5 Oct 2020 13:18:12 +0900
Subject: [PATCH 0161/1009] [SPARK-33040][R][ML] Add SparkR wrapper for
 vector_to_array

### What changes were proposed in this pull request?

Add SparkR wrapper for `o.a.s.ml.functions.vector_to_array`

### Why are the changes needed?

- Currently ML vectors, including predictions, are almost inaccessible to R users. That's is a serious loss of functionality.
- Feature parity.

### Does this PR introduce _any_ user-facing change?

Yes, new R function is added.

### How was this patch tested?

- New unit tests.
- Manual verification.

Closes #29917 from zero323/SPARK-33040.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 R/pkg/NAMESPACE                       |  1 +
 R/pkg/R/functions.R                   | 33 +++++++++++++++++++++++++++
 R/pkg/R/generics.R                    |  4 ++++
 R/pkg/tests/fulltests/test_sparkSQL.R |  3 ++-
 4 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 4ea05b25ecc9e..25162f3e23b38 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -427,6 +427,7 @@ exportMethods("%<=>%",
               "variance",
               "var_pop",
               "var_samp",
+              "vector_to_array",
               "weekofyear",
               "when",
               "window",
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index b216f404a3ca5..61ea90efb348d 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -345,6 +345,17 @@ NULL
 #' head(tmp)}
 NULL
 
+#' ML functions for Column operations
+#'
+#' ML functions defined for \code{Column}.
+#'
+#' @param x Column to compute on.
+#' @param ... additional argument(s).
+#' @name column_ml_functions
+#' @rdname column_ml_functions
+#' @family ml functions
+NULL
+
 #' @details
 #' \code{lit}: A new Column is created to represent the literal value.
 #' If the parameter is a Column, it is returned unchanged.
@@ -4458,3 +4469,25 @@ setMethod("timestamp_seconds",
             )
             column(jc)
           })
+
+#' @details
+#' \code{vector_to_array} Converts a column of MLlib sparse/dense vectors into
+#' a column of dense arrays.
+#'
+#' @param dtype The data type of the output array. Valid values: "float64" or "float32".
+#'
+#' @rdname column_ml_functions
+#' @aliases vector_to_array vector_to_array,Column-method
+#' @note vector_to_array since 3.1.0
+setMethod("vector_to_array",
+          signature(x = "Column"),
+          function(x, dtype = c("float32", "float64")) {
+            dtype <- match.arg(dtype)
+            jc <- callJStatic(
+              "org.apache.spark.ml.functions",
+              "vector_to_array",
+              x@jc,
+              dtype
+            )
+            column(jc)
+          })
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 985678679dec8..993fc758adbe5 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -1449,6 +1449,10 @@ setGeneric("var_pop", function(x) { standardGeneric("var_pop") })
 #' @name NULL
 setGeneric("var_samp", function(x) { standardGeneric("var_samp") })
 
+#' @rdname column_ml_functions
+#' @name NULL
+setGeneric("vector_to_array", function(x, ...) { standardGeneric("vector_to_array") })
+
 #' @rdname column_datetime_functions
 #' @name NULL
 setGeneric("weekofyear", function(x) { standardGeneric("weekofyear") })
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index c36620227593d..c3b271b1205c5 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1424,7 +1424,8 @@ test_that("column functions", {
     date_trunc("quarter", c) + current_date() + current_timestamp()
   c25 <- overlay(c1, c2, c3, c3) + overlay(c1, c2, c3) + overlay(c1, c2, 1) +
     overlay(c1, c2, 3, 4)
-  c26 <- timestamp_seconds(c1)
+  c26 <- timestamp_seconds(c1) + vector_to_array(c) +
+    vector_to_array(c, "float32") + vector_to_array(c, "float64")
   c27 <- nth_value("x", 1L) + nth_value("y", 2, TRUE) +
     nth_value(column("v"), 3) + nth_value(column("z"), 4L, FALSE)
 

From 24f890e8e81ee03fe0d9ce4c8f232784e9fdaccd Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Mon, 5 Oct 2020 16:31:17 +0900
Subject: [PATCH 0162/1009] [SPARK-33040][FOLLOW-UP][R] Reorder argument
 choices and add examples

### What changes were proposed in this pull request?

- Reorder choices of `dtype` to match Scala defaults.
- Add example to ml_functions.

### Why are the changes needed?

As requested:

- https://github.com/apache/spark/pull/29917#pullrequestreview-501715344
- https://github.com/apache/spark/pull/29917#pullrequestreview-501716521

### Does this PR introduce _any_ user-facing change?

No (changes to newly added component).

### How was this patch tested?

Existing tests.

Closes #29944 from zero323/SPARK-33040-FOLLOW-UP.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 R/pkg/R/functions.R | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 61ea90efb348d..959edf29e2429 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -354,6 +354,11 @@ NULL
 #' @name column_ml_functions
 #' @rdname column_ml_functions
 #' @family ml functions
+#' @examples
+#' \dontrun{
+#' df <- read.df("data/mllib/sample_libsvm_data.txt", source = "libsvm")
+#' head(select(df, vector_to_array(df$features)))
+#' }
 NULL
 
 #' @details
@@ -4481,7 +4486,7 @@ setMethod("timestamp_seconds",
 #' @note vector_to_array since 3.1.0
 setMethod("vector_to_array",
           signature(x = "Column"),
-          function(x, dtype = c("float32", "float64")) {
+          function(x, dtype = c("float64", "float32")) {
             dtype <- match.arg(dtype)
             jc <- callJStatic(
               "org.apache.spark.ml.functions",

From 0fb2574d4e75fa4a545da1d53357c2359c0bffeb Mon Sep 17 00:00:00 2001
From: Yuning Zhang <yuning.zhang@databricks.com>
Date: Mon, 5 Oct 2020 20:25:57 +0900
Subject: [PATCH 0163/1009] [SPARK-33042][SQL][TEST] Add a test case to ensure
 changes to spark.sql.optimizer.maxIterations take effect at runtime

### What changes were proposed in this pull request?

Add a test case to ensure changes to `spark.sql.optimizer.maxIterations` take effect at runtime.

### Why are the changes needed?

Currently, there is only one related test case: https://github.com/apache/spark/blob/master/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala#L156

However, this test case only checks the value of the conf can be changed at runtime. It does not check the updated value is actually used by the Optimizer.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

unit test

Closes #29919 from yuningzh-db/add_optimizer_test.

Authored-by: Yuning Zhang <yuning.zhang@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../catalyst/optimizer/OptimizerSuite.scala   | 74 +++++++++++++++++++
 1 file changed, 74 insertions(+)
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerSuite.scala

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerSuite.scala
new file mode 100644
index 0000000000000..b48555ec2fb28
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerSuite.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.errors.TreeNodeException
+import org.apache.spark.sql.catalyst.expressions.{Alias, IntegerLiteral, Literal}
+import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation, Project}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * A dummy optimizer rule for testing that decrements integer literals until 0.
+ */
+object DecrementLiterals extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan): LogicalPlan = plan transformExpressions {
+    case IntegerLiteral(i) if i > 0 => Literal(i - 1)
+  }
+}
+
+class OptimizerSuite extends PlanTest {
+  test("Optimizer exceeds max iterations") {
+    val iterations = 5
+    val maxIterationsNotEnough = 3
+    val maxIterationsEnough = 10
+    val analyzed = Project(Alias(Literal(iterations), "attr")() :: Nil, OneRowRelation()).analyze
+
+    withSQLConf(SQLConf.OPTIMIZER_MAX_ITERATIONS.key -> maxIterationsNotEnough.toString) {
+      val optimizer = new SimpleTestOptimizer() {
+        override def defaultBatches: Seq[Batch] =
+          Batch("test", fixedPoint,
+            DecrementLiterals) :: Nil
+      }
+
+      val message1 = intercept[TreeNodeException[LogicalPlan]] {
+        optimizer.execute(analyzed)
+      }.getMessage
+      assert(message1.startsWith(s"Max iterations ($maxIterationsNotEnough) reached for batch " +
+        s"test, please set '${SQLConf.OPTIMIZER_MAX_ITERATIONS.key}' to a larger value."))
+
+      withSQLConf(SQLConf.OPTIMIZER_MAX_ITERATIONS.key -> maxIterationsEnough.toString) {
+        try {
+          optimizer.execute(analyzed)
+        } catch {
+          case ex: TreeNodeException[LogicalPlan]
+            if ex.getMessage.contains(SQLConf.OPTIMIZER_MAX_ITERATIONS.key) =>
+              fail("optimizer.execute should not reach max iterations.")
+        }
+      }
+
+      val message2 = intercept[TreeNodeException[LogicalPlan]] {
+        optimizer.execute(analyzed)
+      }.getMessage
+      assert(message2.startsWith(s"Max iterations ($maxIterationsNotEnough) reached for batch " +
+        s"test, please set '${SQLConf.OPTIMIZER_MAX_ITERATIONS.key}' to a larger value."))
+    }
+  }
+}

From 023eb482b23b5d63d2157b3def9926673844e0a3 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Mon, 5 Oct 2020 22:00:42 +0900
Subject: [PATCH 0164/1009] [SPARK-32914][SQL] Avoid constructing dataType
 multiple times

### What changes were proposed in this pull request?

Some expression's data type not a static value. It needs to be constructed a new object when calling `dataType` function. E.g.: `CaseWhen`.
We should avoid constructing dataType multiple times because it may be used many times. E.g.: [`HyperLogLogPlusPlus.update`](https://github.com/apache/spark/blob/10edeafc69250afef8c71ed7b3c77992f67aa4ff/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlus.scala#L122).

### Why are the changes needed?

Improve query performance. for example:
```scala
spark.range(100000000L).selectExpr("approx_count_distinct(case when id % 400 > 20 then id else 0 end)").show
```

Profiling result:
```
-- Execution profile ---
Total samples       : 18365

Frame buffer usage  : 2.6688%

--- 58443254327 ns (31.82%), 5844 samples
  [ 0] GenericTaskQueueSet<OverflowTaskQueue<StarTask, (MemoryType)1, 131072u>, (MemoryType)1>::steal_best_of_2(unsigned int, int*, StarTask&)
  [ 1] StealTask::do_it(GCTaskManager*, unsigned int)
  [ 2] GCTaskThread::run()
  [ 3] java_start(Thread*)
  [ 4] start_thread

--- 6140668667 ns (3.34%), 614 samples
  [ 0] GenericTaskQueueSet<OverflowTaskQueue<StarTask, (MemoryType)1, 131072u>, (MemoryType)1>::peek()
  [ 1] ParallelTaskTerminator::offer_termination(TerminatorTerminator*)
  [ 2] StealTask::do_it(GCTaskManager*, unsigned int)
  [ 3] GCTaskThread::run()
  [ 4] java_start(Thread*)
  [ 5] start_thread

--- 5679994036 ns (3.09%), 568 samples
  [ 0] scala.collection.generic.Growable.$plus$plus$eq
  [ 1] scala.collection.generic.Growable.$plus$plus$eq$
  [ 2] scala.collection.mutable.ListBuffer.$plus$plus$eq
  [ 3] scala.collection.mutable.ListBuffer.$plus$plus$eq
  [ 4] scala.collection.generic.GenericTraversableTemplate.$anonfun$flatten$1
  [ 5] scala.collection.generic.GenericTraversableTemplate$$Lambda$107.411506101.apply
  [ 6] scala.collection.immutable.List.foreach
  [ 7] scala.collection.generic.GenericTraversableTemplate.flatten
  [ 8] scala.collection.generic.GenericTraversableTemplate.flatten$
  [ 9] scala.collection.AbstractTraversable.flatten
  [10] org.apache.spark.internal.config.ConfigEntry.readString
  [11] org.apache.spark.internal.config.ConfigEntryWithDefault.readFrom
  [12] org.apache.spark.sql.internal.SQLConf.getConf
  [13] org.apache.spark.sql.internal.SQLConf.caseSensitiveAnalysis
  [14] org.apache.spark.sql.types.DataType.sameType
  [15] org.apache.spark.sql.catalyst.analysis.TypeCoercion$.$anonfun$haveSameType$1
  [16] org.apache.spark.sql.catalyst.analysis.TypeCoercion$.$anonfun$haveSameType$1$adapted
  [17] org.apache.spark.sql.catalyst.analysis.TypeCoercion$$$Lambda$1527.1975399904.apply
  [18] scala.collection.IndexedSeqOptimized.prefixLengthImpl
  [19] scala.collection.IndexedSeqOptimized.forall
  [20] scala.collection.IndexedSeqOptimized.forall$
  [21] scala.collection.mutable.ArrayBuffer.forall
  [22] org.apache.spark.sql.catalyst.analysis.TypeCoercion$.haveSameType
  [23] org.apache.spark.sql.catalyst.expressions.ComplexTypeMergingExpression.dataTypeCheck
  [24] org.apache.spark.sql.catalyst.expressions.ComplexTypeMergingExpression.dataTypeCheck$
  [25] org.apache.spark.sql.catalyst.expressions.CaseWhen.dataTypeCheck
  [26] org.apache.spark.sql.catalyst.expressions.ComplexTypeMergingExpression.dataType
  [27] org.apache.spark.sql.catalyst.expressions.ComplexTypeMergingExpression.dataType$
  [28] org.apache.spark.sql.catalyst.expressions.CaseWhen.dataType
  [29] org.apache.spark.sql.catalyst.expressions.aggregate.HyperLogLogPlusPlus.update
  [30] org.apache.spark.sql.execution.aggregate.AggregationIterator$$anonfun$1.$anonfun$applyOrElse$2
  [31] org.apache.spark.sql.execution.aggregate.AggregationIterator$$anonfun$1.$anonfun$applyOrElse$2$adapted
  [32] org.apache.spark.sql.execution.aggregate.AggregationIterator$$anonfun$1$$Lambda$1534.1383512673.apply
  [33] org.apache.spark.sql.execution.aggregate.AggregationIterator.$anonfun$generateProcessRow$7
  [34] org.apache.spark.sql.execution.aggregate.AggregationIterator.$anonfun$generateProcessRow$7$adapted
  [35] org.apache.spark.sql.execution.aggregate.AggregationIterator$$Lambda$1555.725788712.apply
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manual test and benchmark test:

Benchmark code | Before this PR(Milliseconds) | After this PR(Milliseconds)
--- | --- | ---
spark.range(100000000L).selectExpr("approx_count_distinct(case   when id % 400 > 20 then id else 0 end)").collect() | 56462 | 3794

Closes #29790 from wangyum/SPARK-32914.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../spark/sql/catalyst/expressions/Expression.scala |  4 +++-
 .../aggregate/ApproximatePercentile.scala           |  4 +++-
 .../catalyst/expressions/collectionOperations.scala | 13 ++++++++++---
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index ce4aa1c2b7c2f..35b192cc5544a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -1060,10 +1060,12 @@ trait ComplexTypeMergingExpression extends Expression {
         s" The input types found are\n\t${inputTypesForMerging.mkString("\n\t")}")
   }
 
-  override def dataType: DataType = {
+  private lazy val internalDataType: DataType = {
     dataTypeCheck
     inputTypesForMerging.reduceLeft(TypeCoercion.findCommonTypeDifferentOnlyInNullFlags(_, _).get)
   }
+
+  override def dataType: DataType = internalDataType
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
index 3327f4ccf4461..2a5275e75d4f9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
@@ -187,10 +187,12 @@ case class ApproximatePercentile(
   override def nullable: Boolean = true
 
   // The result type is the same as the input type.
-  override def dataType: DataType = {
+  private lazy val internalDataType: DataType = {
     if (returnPercentileArray) ArrayType(child.dataType, false) else child.dataType
   }
 
+  override def dataType: DataType = internalDataType
+
   override def prettyName: String =
     getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("percentile_approx")
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index 8555f63df986f..8719b2e065663 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -371,7 +371,7 @@ case class MapEntries(child: Expression)
 
   @transient private lazy val childDataType: MapType = child.dataType.asInstanceOf[MapType]
 
-  override def dataType: DataType = {
+  private lazy val internalDataType: DataType = {
     ArrayType(
       StructType(
         StructField("key", childDataType.keyType, false) ::
@@ -380,6 +380,8 @@ case class MapEntries(child: Expression)
       false)
   }
 
+  override def dataType: DataType = internalDataType
+
   override protected def nullSafeEval(input: Any): Any = {
     val childMap = input.asInstanceOf[MapData]
     val keys = childMap.keyArray()
@@ -3504,13 +3506,16 @@ object ArrayUnion {
   since = "2.4.0")
 case class ArrayIntersect(left: Expression, right: Expression) extends ArrayBinaryLike
   with ComplexTypeMergingExpression {
-  override def dataType: DataType = {
+
+  private lazy val internalDataType: DataType = {
     dataTypeCheck
     ArrayType(elementType,
       left.dataType.asInstanceOf[ArrayType].containsNull &&
         right.dataType.asInstanceOf[ArrayType].containsNull)
   }
 
+  override def dataType: DataType = internalDataType
+
   @transient lazy val evalIntersect: (ArrayData, ArrayData) => ArrayData = {
     if (TypeUtils.typeWithProperEquals(elementType)) {
       (array1, array2) =>
@@ -3747,11 +3752,13 @@ case class ArrayIntersect(left: Expression, right: Expression) extends ArrayBina
 case class ArrayExcept(left: Expression, right: Expression) extends ArrayBinaryLike
   with ComplexTypeMergingExpression {
 
-  override def dataType: DataType = {
+  private lazy val internalDataType: DataType = {
     dataTypeCheck
     left.dataType
   }
 
+  override def dataType: DataType = internalDataType
+
   @transient lazy val evalExcept: (ArrayData, ArrayData) => ArrayData = {
     if (TypeUtils.typeWithProperEquals(elementType)) {
       (array1, array2) =>

From a09747bf326677e212fbc284285cce822571c315 Mon Sep 17 00:00:00 2001
From: gschiavon <germanschiavon@gmail.com>
Date: Mon, 5 Oct 2020 09:02:06 -0700
Subject: [PATCH 0165/1009] [SPARK-33063][K8S] Improve error message for
 insufficient K8s volume confs

### What changes were proposed in this pull request?
Provide error handling when creating kubernetes volumes. Right now they keys are expected to be there and if not it fails with a `key not found` error, but not knowing why do you need that `key`.

Also I renamed some tests that didn't indicate the kind of kubernetes volume

### Why are the changes needed?

Easier for the users to understand why `spark-submit` command is failing if not providing they right kubernetes volumes properties.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
It was tested with the current tests plus added one more.

[Jira ticket](https://issues.apache.org/jira/browse/SPARK-33063)

Closes #29941 from Gschiavon/SPARK-33063-provide-error-handling-k8s-volumes.

Authored-by: gschiavon <germanschiavon@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/deploy/k8s/KubernetesVolumeUtils.scala      | 10 ++++++++++
 .../spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala | 11 +++++++++++
 2 files changed, 21 insertions(+)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtils.scala
index 77921f6338c74..b2eacca042794 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtils.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtils.scala
@@ -67,6 +67,7 @@ private[spark] object KubernetesVolumeUtils {
     volumeType match {
       case KUBERNETES_VOLUMES_HOSTPATH_TYPE =>
         val pathKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_OPTIONS_PATH_KEY"
+        verifyOptionKey(options, pathKey, KUBERNETES_VOLUMES_HOSTPATH_TYPE)
         KubernetesHostPathVolumeConf(options(pathKey))
 
       case KUBERNETES_VOLUMES_PVC_TYPE =>
@@ -74,6 +75,7 @@ private[spark] object KubernetesVolumeUtils {
         val storageClassKey =
           s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_OPTIONS_CLAIM_STORAGE_CLASS_KEY"
         val sizeLimitKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_OPTIONS_SIZE_LIMIT_KEY"
+        verifyOptionKey(options, claimNameKey, KUBERNETES_VOLUMES_PVC_TYPE)
         KubernetesPVCVolumeConf(
           options(claimNameKey),
           options.get(storageClassKey),
@@ -87,6 +89,8 @@ private[spark] object KubernetesVolumeUtils {
       case KUBERNETES_VOLUMES_NFS_TYPE =>
         val pathKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_OPTIONS_PATH_KEY"
         val serverKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_OPTIONS_SERVER_KEY"
+        verifyOptionKey(options, pathKey, KUBERNETES_VOLUMES_NFS_TYPE)
+        verifyOptionKey(options, serverKey, KUBERNETES_VOLUMES_NFS_TYPE)
         KubernetesNFSVolumeConf(
           options(pathKey),
           options(serverKey))
@@ -95,4 +99,10 @@ private[spark] object KubernetesVolumeUtils {
         throw new IllegalArgumentException(s"Kubernetes Volume type `$volumeType` is not supported")
     }
   }
+
+  private def verifyOptionKey(options: Map[String, String], key: String, msg: String): Unit = {
+    if (!options.isDefinedAt(key)) {
+      throw new NoSuchElementException(key + s" is required for $msg")
+    }
+  }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala
index 6596c5e2ad2e7..349cbd04f6027 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala
@@ -118,6 +118,17 @@ class KubernetesVolumeUtilsSuite extends SparkFunSuite {
     assert(e.getMessage.contains("hostPath.volumeName.options.path"))
   }
 
+  test("SPARK-33063: Fails on missing option key in persistentVolumeClaim") {
+    val sparkConf = new SparkConf(false)
+    sparkConf.set("test.persistentVolumeClaim.volumeName.mount.path", "/path")
+    sparkConf.set("test.persistentVolumeClaim.volumeName.mount.readOnly", "true")
+
+    val e = intercept[NoSuchElementException] {
+      KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.")
+    }
+    assert(e.getMessage.contains("persistentVolumeClaim.volumeName.options.claimName"))
+  }
+
   test("Parses read-only nfs volumes correctly") {
     val sparkConf = new SparkConf(false)
     sparkConf.set("test.nfs.volumeName.mount.path", "/path")

From 14aeab3b279b1c23cddb86b97afc048c195b9b75 Mon Sep 17 00:00:00 2001
From: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com>
Date: Mon, 5 Oct 2020 09:30:27 -0700
Subject: [PATCH 0166/1009] [SPARK-33038][SQL] Combine AQE initial and current
 plan string when two plans are the same

### What changes were proposed in this pull request?
This PR combines the current plan and the initial plan in the AQE query plan string when the two plans are the same. It also removes the `== Current Plan ==` and `== Initial Plan ==` headers:

Before
```scala
AdaptiveSparkPlan isFinalPlan=false
+- == Current Plan ==
   SortMergeJoin [key#13], [a#23], Inner
   :- Sort [key#13 ASC NULLS FIRST], false, 0
   :  +- Exchange hashpartitioning(key#13, 5), true, [id=#94]
            ...
+- == Initial Plan ==
   SortMergeJoin [key#13], [a#23], Inner
   :- Sort [key#13 ASC NULLS FIRST], false, 0
   :  +- Exchange hashpartitioning(key#13, 5), true, [id=#94]
            ...
```
After
```scala
AdaptiveSparkPlan isFinalPlan=false
+- SortMergeJoin [key#13], [a#23], Inner
   :- Sort [key#13 ASC NULLS FIRST], false, 0
   :  +- Exchange hashpartitioning(key#13, 5), true, [id=#94]
            ...
```
For SQL `EXPLAIN` output:
Before
```scala
AdaptiveSparkPlan (8)
+- == Current Plan ==
   Sort (7)
   +- Exchange (6)
      ...
+- == Initial Plan ==
   Sort (7)
   +- Exchange (6)
      ...
```
After
```scala
AdaptiveSparkPlan (8)
+- Sort (7)
   +- Exchange (6)
      ...
```

### Why are the changes needed?
To simplify the AQE plan string by removing the redundant plan information.

### Does this PR introduce _any_ user-facing change?
Yes.

### How was this patch tested?
Modified the existing unit test.

Closes #29915 from allisonwang-db/aqe-explain.

Authored-by: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com>
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../adaptive/AdaptiveSparkPlanExec.scala      |  50 ++++---
 .../sql-tests/results/explain-aqe.sql.out     | 123 ++----------------
 .../adaptive/AdaptiveQueryExecSuite.scala     |   4 +-
 3 files changed, 47 insertions(+), 130 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index 6c197fedd8c56..0e032569bb8a7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -300,26 +300,40 @@ case class AdaptiveSparkPlanExec(
       maxFields,
       printNodeId,
       indent)
-    generateTreeStringWithHeader(
-      if (isFinalPlan) "Final Plan" else "Current Plan",
-      currentPhysicalPlan,
-      depth,
-      lastChildren,
-      append,
-      verbose,
-      maxFields,
-      printNodeId)
-    generateTreeStringWithHeader(
-      "Initial Plan",
-      initialPlan,
-      depth,
-      lastChildren,
-      append,
-      verbose,
-      maxFields,
-      printNodeId)
+    if (currentPhysicalPlan.fastEquals(initialPlan)) {
+      currentPhysicalPlan.generateTreeString(
+        depth + 1,
+        lastChildren :+ true,
+        append,
+        verbose,
+        prefix = "",
+        addSuffix = false,
+        maxFields,
+        printNodeId,
+        indent)
+    } else {
+      generateTreeStringWithHeader(
+        if (isFinalPlan) "Final Plan" else "Current Plan",
+        currentPhysicalPlan,
+        depth,
+        lastChildren,
+        append,
+        verbose,
+        maxFields,
+        printNodeId)
+      generateTreeStringWithHeader(
+        "Initial Plan",
+        initialPlan,
+        depth,
+        lastChildren,
+        append,
+        verbose,
+        maxFields,
+        printNodeId)
+    }
   }
 
+
   private def generateTreeStringWithHeader(
       header: String,
       plan: SparkPlan,
diff --git a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out
index 3a850160b43e0..5435cde050fd1 100644
--- a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out
@@ -54,16 +54,7 @@ struct<plan:string>
 -- !query output
 == Physical Plan ==
 AdaptiveSparkPlan (8)
-+- == Current Plan ==
-   Sort (7)
-   +- Exchange (6)
-      +- HashAggregate (5)
-         +- Exchange (4)
-            +- HashAggregate (3)
-               +- Filter (2)
-                  +- Scan parquet default.explain_temp1 (1)
-+- == Initial Plan ==
-   Sort (7)
++- Sort (7)
    +- Exchange (6)
       +- HashAggregate (5)
          +- Exchange (4)
@@ -126,16 +117,7 @@ struct<plan:string>
 -- !query output
 == Physical Plan ==
 AdaptiveSparkPlan (8)
-+- == Current Plan ==
-   Project (7)
-   +- Filter (6)
-      +- HashAggregate (5)
-         +- Exchange (4)
-            +- HashAggregate (3)
-               +- Filter (2)
-                  +- Scan parquet default.explain_temp1 (1)
-+- == Initial Plan ==
-   Project (7)
++- Project (7)
    +- Filter (6)
       +- HashAggregate (5)
          +- Exchange (4)
@@ -196,17 +178,7 @@ struct<plan:string>
 -- !query output
 == Physical Plan ==
 AdaptiveSparkPlan (9)
-+- == Current Plan ==
-   HashAggregate (8)
-   +- Exchange (7)
-      +- HashAggregate (6)
-         +- Union (5)
-            :- Filter (2)
-            :  +- Scan parquet default.explain_temp1 (1)
-            +- Filter (4)
-               +- Scan parquet default.explain_temp1 (3)
-+- == Initial Plan ==
-   HashAggregate (8)
++- HashAggregate (8)
    +- Exchange (7)
       +- HashAggregate (6)
          +- Union (5)
@@ -274,15 +246,7 @@ struct<plan:string>
 -- !query output
 == Physical Plan ==
 AdaptiveSparkPlan (7)
-+- == Current Plan ==
-   BroadcastHashJoin Inner BuildRight (6)
-   :- Filter (2)
-   :  +- Scan parquet default.explain_temp1 (1)
-   +- BroadcastExchange (5)
-      +- Filter (4)
-         +- Scan parquet default.explain_temp2 (3)
-+- == Initial Plan ==
-   BroadcastHashJoin Inner BuildRight (6)
++- BroadcastHashJoin Inner BuildRight (6)
    :- Filter (2)
    :  +- Scan parquet default.explain_temp1 (1)
    +- BroadcastExchange (5)
@@ -337,14 +301,7 @@ struct<plan:string>
 -- !query output
 == Physical Plan ==
 AdaptiveSparkPlan (6)
-+- == Current Plan ==
-   BroadcastHashJoin LeftOuter BuildRight (5)
-   :- Scan parquet default.explain_temp1 (1)
-   +- BroadcastExchange (4)
-      +- Filter (3)
-         +- Scan parquet default.explain_temp2 (2)
-+- == Initial Plan ==
-   BroadcastHashJoin LeftOuter BuildRight (5)
++- BroadcastHashJoin LeftOuter BuildRight (5)
    :- Scan parquet default.explain_temp1 (1)
    +- BroadcastExchange (4)
       +- Filter (3)
@@ -398,11 +355,7 @@ struct<plan:string>
 -- !query output
 == Physical Plan ==
 AdaptiveSparkPlan (3)
-+- == Current Plan ==
-   Filter (2)
-   +- Scan parquet default.explain_temp1 (1)
-+- == Initial Plan ==
-   Filter (2)
++- Filter (2)
    +- Scan parquet default.explain_temp1 (1)
 
 
@@ -438,11 +391,7 @@ struct<plan:string>
 -- !query output
 == Physical Plan ==
 AdaptiveSparkPlan (3)
-+- == Current Plan ==
-   Filter (2)
-   +- Scan parquet default.explain_temp1 (1)
-+- == Initial Plan ==
-   Filter (2)
++- Filter (2)
    +- Scan parquet default.explain_temp1 (1)
 
 
@@ -470,11 +419,7 @@ struct<plan:string>
 -- !query output
 == Physical Plan ==
 AdaptiveSparkPlan (3)
-+- == Current Plan ==
-   Project (2)
-   +- Scan parquet default.explain_temp1 (1)
-+- == Initial Plan ==
-   Project (2)
++- Project (2)
    +- Scan parquet default.explain_temp1 (1)
 
 
@@ -506,15 +451,7 @@ struct<plan:string>
 -- !query output
 == Physical Plan ==
 AdaptiveSparkPlan (7)
-+- == Current Plan ==
-   BroadcastHashJoin Inner BuildRight (6)
-   :- Filter (2)
-   :  +- Scan parquet default.explain_temp1 (1)
-   +- BroadcastExchange (5)
-      +- Filter (4)
-         +- Scan parquet default.explain_temp1 (3)
-+- == Initial Plan ==
-   BroadcastHashJoin Inner BuildRight (6)
++- BroadcastHashJoin Inner BuildRight (6)
    :- Filter (2)
    :  +- Scan parquet default.explain_temp1 (1)
    +- BroadcastExchange (5)
@@ -572,21 +509,7 @@ struct<plan:string>
 -- !query output
 == Physical Plan ==
 AdaptiveSparkPlan (13)
-+- == Current Plan ==
-   BroadcastHashJoin Inner BuildRight (12)
-   :- HashAggregate (5)
-   :  +- Exchange (4)
-   :     +- HashAggregate (3)
-   :        +- Filter (2)
-   :           +- Scan parquet default.explain_temp1 (1)
-   +- BroadcastExchange (11)
-      +- HashAggregate (10)
-         +- Exchange (9)
-            +- HashAggregate (8)
-               +- Filter (7)
-                  +- Scan parquet default.explain_temp1 (6)
-+- == Initial Plan ==
-   BroadcastHashJoin Inner BuildRight (12)
++- BroadcastHashJoin Inner BuildRight (12)
    :- HashAggregate (5)
    :  +- Exchange (4)
    :     +- HashAggregate (3)
@@ -710,13 +633,7 @@ struct<plan:string>
 -- !query output
 == Physical Plan ==
 AdaptiveSparkPlan (5)
-+- == Current Plan ==
-   HashAggregate (4)
-   +- Exchange (3)
-      +- HashAggregate (2)
-         +- Scan parquet default.explain_temp1 (1)
-+- == Initial Plan ==
-   HashAggregate (4)
++- HashAggregate (4)
    +- Exchange (3)
       +- HashAggregate (2)
          +- Scan parquet default.explain_temp1 (1)
@@ -761,13 +678,7 @@ struct<plan:string>
 -- !query output
 == Physical Plan ==
 AdaptiveSparkPlan (5)
-+- == Current Plan ==
-   ObjectHashAggregate (4)
-   +- Exchange (3)
-      +- ObjectHashAggregate (2)
-         +- Scan parquet default.explain_temp4 (1)
-+- == Initial Plan ==
-   ObjectHashAggregate (4)
++- ObjectHashAggregate (4)
    +- Exchange (3)
       +- ObjectHashAggregate (2)
          +- Scan parquet default.explain_temp4 (1)
@@ -812,15 +723,7 @@ struct<plan:string>
 -- !query output
 == Physical Plan ==
 AdaptiveSparkPlan (7)
-+- == Current Plan ==
-   SortAggregate (6)
-   +- Sort (5)
-      +- Exchange (4)
-         +- SortAggregate (3)
-            +- Sort (2)
-               +- Scan parquet default.explain_temp4 (1)
-+- == Initial Plan ==
-   SortAggregate (6)
++- SortAggregate (6)
    +- Sort (5)
       +- Exchange (4)
          +- SortAggregate (3)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index 8799dbb14ef34..0dfb1d2fd9eda 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -842,8 +842,8 @@ class AdaptiveQueryExecSuite
     withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
       val df = sql("SELECT * FROM testData join testData2 ON key = a where value = '1'")
       val planBefore = df.queryExecution.executedPlan
-      assert(planBefore.toString.contains("== Current Plan =="))
-      assert(planBefore.toString.contains("== Initial Plan =="))
+      assert(!planBefore.toString.contains("== Current Plan =="))
+      assert(!planBefore.toString.contains("== Initial Plan =="))
       df.collect()
       val planAfter = df.queryExecution.executedPlan
       assert(planAfter.toString.contains("== Final Plan =="))

From 008a2ad1f836ff04fafd51a9c94c355ef35f1692 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Mon, 5 Oct 2020 15:29:56 -0700
Subject: [PATCH 0167/1009] [SPARK-20202][BUILD][SQL] Remove references to
 org.spark-project.hive (Hive 1.2.1)

### What changes were proposed in this pull request?

As of today,
- SPARK-30034 Apache Spark 3.0.0 switched its default Hive execution engine from Hive 1.2 to Hive 2.3. This removes the direct dependency to the forked Hive 1.2.1 in maven repository.
- SPARK-32981 Apache Spark 3.1.0(`master` branch) removed Hive 1.2 related artifacts from Apache Spark binary distributions.

This PR(SPARK-20202) aims to remove the following usage of unofficial Apache Hive fork completely from Apache Spark master for Apache Spark 3.1.0.
```
<hive.group>org.spark-project.hive</hive.group>
<hive.version>1.2.1.spark2</hive.version>
```

For the forked Hive 1.2.1.spark2 users, Apache Spark 2.4(LTS) and 3.0 (~ 2021.12) will provide it.

### Why are the changes needed?

- First, Apache Spark community should not use the unofficial forked release of another Apache project.
- Second, Apache Hive 1.2.1 was released at 2015-06-26 and the forked Hive `1.2.1.spark2` exposed many unfixable bugs in Apache because the forked `1.2.1.spark2` is not maintained at all. Apache Hive 2.3.0 was released at 2017-07-19 and it has been used with less number of bugs compared with `1.2.1.spark2`. Many bugs still exist in `hive-1.2` profile and new Apache Spark unit tests are added with `HiveUtils.isHive23` condition so far.

### Does this PR introduce _any_ user-facing change?

No. This is a dev-only change. PRBuilder will not accept `[test-hive1.2]` on master and `branch-3.1`.

### How was this patch tested?

1. SBT/Hadoop 3.2/Hive 2.3 (https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/129366)
2. SBT/Hadoop 2.7/Hive 2.3 (https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/129382)
3. SBT/Hadoop 3.2/Hive 1.2 (This has not been supported already due to Hive 1.2 doesn't work with Hadoop 3.2.)
4. SBT/Hadoop 2.7/Hive 1.2 (https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/129383, This is rejected)

Closes #29936 from dongjoon-hyun/SPARK-REMOVE-HIVE1.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/run-tests.py                              |     1 -
 dev/test-dependencies.sh                      |     6 +-
 docs/sql-migration-guide.md                   |     2 +
 pom.xml                                       |    25 -
 sql/core/pom.xml                              |     3 -
 .../datasources/orc/OrcColumnVector.java      |     0
 .../datasources/orc/OrcFilters.scala          |     0
 .../datasources/orc/OrcShimUtils.scala        |     0
 .../datasources/orc/OrcFilterSuite.scala      |     0
 .../datasources/orc/OrcColumnVector.java      |   208 -
 .../datasources/orc/DaysWritable.scala        |    79 -
 .../datasources/orc/OrcFilters.scala          |   275 -
 .../datasources/orc/OrcShimUtils.scala        |    66 -
 .../datasources/orc/OrcFilterSuite.scala      |   676 -
 .../{v2.3 => }/if/TCLIService.thrift          |     0
 sql/hive-thriftserver/pom.xml                 |     4 +-
 .../service/rpc/thrift/TArrayTypeEntry.java   |     0
 .../service/rpc/thrift/TBinaryColumn.java     |     0
 .../hive/service/rpc/thrift/TBoolColumn.java  |     0
 .../hive/service/rpc/thrift/TBoolValue.java   |     0
 .../hive/service/rpc/thrift/TByteColumn.java  |     0
 .../hive/service/rpc/thrift/TByteValue.java   |     0
 .../hive/service/rpc/thrift/TCLIService.java  |     0
 .../rpc/thrift/TCLIServiceConstants.java      |     0
 .../rpc/thrift/TCancelDelegationTokenReq.java |     0
 .../thrift/TCancelDelegationTokenResp.java    |     0
 .../rpc/thrift/TCancelOperationReq.java       |     0
 .../rpc/thrift/TCancelOperationResp.java      |     0
 .../rpc/thrift/TCloseOperationReq.java        |     0
 .../rpc/thrift/TCloseOperationResp.java       |     0
 .../service/rpc/thrift/TCloseSessionReq.java  |     0
 .../service/rpc/thrift/TCloseSessionResp.java |     0
 .../hive/service/rpc/thrift/TColumn.java      |     0
 .../hive/service/rpc/thrift/TColumnDesc.java  |     0
 .../hive/service/rpc/thrift/TColumnValue.java |     0
 .../service/rpc/thrift/TDoubleColumn.java     |     0
 .../hive/service/rpc/thrift/TDoubleValue.java |     0
 .../rpc/thrift/TExecuteStatementReq.java      |     0
 .../rpc/thrift/TExecuteStatementResp.java     |     0
 .../service/rpc/thrift/TFetchOrientation.java |     0
 .../service/rpc/thrift/TFetchResultsReq.java  |     0
 .../service/rpc/thrift/TFetchResultsResp.java |     0
 .../service/rpc/thrift/TGetCatalogsReq.java   |     0
 .../service/rpc/thrift/TGetCatalogsResp.java  |     0
 .../service/rpc/thrift/TGetColumnsReq.java    |     0
 .../service/rpc/thrift/TGetColumnsResp.java   |     0
 .../rpc/thrift/TGetCrossReferenceReq.java     |     0
 .../rpc/thrift/TGetCrossReferenceResp.java    |     0
 .../rpc/thrift/TGetDelegationTokenReq.java    |     0
 .../rpc/thrift/TGetDelegationTokenResp.java   |     0
 .../service/rpc/thrift/TGetFunctionsReq.java  |     0
 .../service/rpc/thrift/TGetFunctionsResp.java |     0
 .../hive/service/rpc/thrift/TGetInfoReq.java  |     0
 .../hive/service/rpc/thrift/TGetInfoResp.java |     0
 .../hive/service/rpc/thrift/TGetInfoType.java |     0
 .../service/rpc/thrift/TGetInfoValue.java     |     0
 .../rpc/thrift/TGetOperationStatusReq.java    |     0
 .../rpc/thrift/TGetOperationStatusResp.java   |     0
 .../rpc/thrift/TGetPrimaryKeysReq.java        |     0
 .../rpc/thrift/TGetPrimaryKeysResp.java       |     0
 .../rpc/thrift/TGetResultSetMetadataReq.java  |     0
 .../rpc/thrift/TGetResultSetMetadataResp.java |     0
 .../service/rpc/thrift/TGetSchemasReq.java    |     0
 .../service/rpc/thrift/TGetSchemasResp.java   |     0
 .../service/rpc/thrift/TGetTableTypesReq.java |     0
 .../rpc/thrift/TGetTableTypesResp.java        |     0
 .../service/rpc/thrift/TGetTablesReq.java     |     0
 .../service/rpc/thrift/TGetTablesResp.java    |     0
 .../service/rpc/thrift/TGetTypeInfoReq.java   |     0
 .../service/rpc/thrift/TGetTypeInfoResp.java  |     0
 .../service/rpc/thrift/THandleIdentifier.java |     0
 .../hive/service/rpc/thrift/TI16Column.java   |     0
 .../hive/service/rpc/thrift/TI16Value.java    |     0
 .../hive/service/rpc/thrift/TI32Column.java   |     0
 .../hive/service/rpc/thrift/TI32Value.java    |     0
 .../hive/service/rpc/thrift/TI64Column.java   |     0
 .../hive/service/rpc/thrift/TI64Value.java    |     0
 .../rpc/thrift/TJobExecutionStatus.java       |     0
 .../service/rpc/thrift/TMapTypeEntry.java     |     0
 .../service/rpc/thrift/TOpenSessionReq.java   |     0
 .../service/rpc/thrift/TOpenSessionResp.java  |     0
 .../service/rpc/thrift/TOperationHandle.java  |     0
 .../service/rpc/thrift/TOperationState.java   |     0
 .../service/rpc/thrift/TOperationType.java    |     0
 .../rpc/thrift/TPrimitiveTypeEntry.java       |     0
 .../rpc/thrift/TProgressUpdateResp.java       |     0
 .../service/rpc/thrift/TProtocolVersion.java  |     0
 .../rpc/thrift/TRenewDelegationTokenReq.java  |     0
 .../rpc/thrift/TRenewDelegationTokenResp.java |     0
 .../apache/hive/service/rpc/thrift/TRow.java  |     0
 .../hive/service/rpc/thrift/TRowSet.java      |     0
 .../service/rpc/thrift/TSessionHandle.java    |     0
 .../hive/service/rpc/thrift/TStatus.java      |     0
 .../hive/service/rpc/thrift/TStatusCode.java  |     0
 .../service/rpc/thrift/TStringColumn.java     |     0
 .../hive/service/rpc/thrift/TStringValue.java |     0
 .../service/rpc/thrift/TStructTypeEntry.java  |     0
 .../hive/service/rpc/thrift/TTableSchema.java |     0
 .../hive/service/rpc/thrift/TTypeDesc.java    |     0
 .../hive/service/rpc/thrift/TTypeEntry.java   |     0
 .../hive/service/rpc/thrift/TTypeId.java      |     0
 .../rpc/thrift/TTypeQualifierValue.java       |     0
 .../service/rpc/thrift/TTypeQualifiers.java   |     0
 .../service/rpc/thrift/TUnionTypeEntry.java   |     0
 .../rpc/thrift/TUserDefinedTypeEntry.java     |     0
 .../apache/hive/service/AbstractService.java  |     0
 .../apache/hive/service/CompositeService.java |     0
 .../org/apache/hive/service/CookieSigner.java |     0
 .../hive/service/ServiceOperations.java       |     0
 .../org/apache/hive/service/ServiceUtils.java |     0
 .../hive/service/auth/HiveAuthFactory.java    |     0
 .../hive/service/auth/HttpAuthUtils.java      |     0
 .../hive/service/auth/KerberosSaslHelper.java |     0
 .../hive/service/auth/PlainSaslHelper.java    |     0
 .../service/auth/TSetIpAddressProcessor.java  |     0
 .../apache/hive/service/cli/CLIService.java   |     0
 .../hive/service/cli/ColumnBasedSet.java      |     0
 .../hive/service/cli/ColumnDescriptor.java    |     0
 .../apache/hive/service/cli/ColumnValue.java  |     0
 .../hive/service/cli/FetchOrientation.java    |     0
 .../apache/hive/service/cli/GetInfoType.java  |     0
 .../apache/hive/service/cli/GetInfoValue.java |     0
 .../org/apache/hive/service/cli/Handle.java   |     0
 .../hive/service/cli/HandleIdentifier.java    |     0
 .../hive/service/cli/HiveSQLException.java    |     0
 .../apache/hive/service/cli/ICLIService.java  |     0
 .../hive/service/cli/OperationHandle.java     |     0
 .../hive/service/cli/OperationState.java      |     0
 .../hive/service/cli/OperationType.java       |     0
 .../apache/hive/service/cli/RowBasedSet.java  |     0
 .../org/apache/hive/service/cli/RowSet.java   |     0
 .../hive/service/cli/RowSetFactory.java       |     0
 .../hive/service/cli/SessionHandle.java       |     0
 .../apache/hive/service/cli/TableSchema.java  |     0
 .../hive/service/cli/TypeDescriptor.java      |     0
 .../hive/service/cli/TypeQualifiers.java      |     0
 .../operation/ClassicTableTypeMapping.java    |     0
 .../operation/ExecuteStatementOperation.java  |     0
 .../cli/operation/GetCatalogsOperation.java   |     0
 .../cli/operation/GetColumnsOperation.java    |     0
 .../operation/GetCrossReferenceOperation.java |     0
 .../cli/operation/GetFunctionsOperation.java  |     0
 .../operation/GetPrimaryKeysOperation.java    |     0
 .../cli/operation/GetSchemasOperation.java    |     0
 .../cli/operation/GetTableTypesOperation.java |     0
 .../cli/operation/GetTablesOperation.java     |     0
 .../cli/operation/GetTypeInfoOperation.java   |     0
 .../cli/operation/HiveCommandOperation.java   |     0
 .../cli/operation/HiveTableTypeMapping.java   |     0
 .../cli/operation/MetadataOperation.java      |     0
 .../hive/service/cli/operation/Operation.java |     0
 .../cli/operation/OperationManager.java       |     0
 .../service/cli/operation/SQLOperation.java   |     0
 .../cli/operation/TableTypeMapping.java       |     0
 .../hive/service/cli/session/HiveSession.java |     0
 .../service/cli/session/HiveSessionBase.java  |     0
 .../cli/session/HiveSessionHookContext.java   |     0
 .../session/HiveSessionHookContextImpl.java   |     0
 .../service/cli/session/HiveSessionImpl.java  |     0
 .../cli/session/HiveSessionImplwithUGI.java   |     0
 .../service/cli/session/SessionManager.java   |     0
 .../cli/thrift/ThriftBinaryCLIService.java    |     0
 .../service/cli/thrift/ThriftCLIService.java  |     0
 .../cli/thrift/ThriftCLIServiceClient.java    |     0
 .../cli/thrift/ThriftHttpCLIService.java      |     0
 .../service/cli/thrift/ThriftHttpServlet.java |     0
 .../hive/service/server/HiveServer2.java      |     0
 .../server/ThreadWithGarbageCleanup.java      |     0
 .../thriftserver/ThriftserverShimUtils.scala  |     0
 .../v1.2/if/TCLIService.thrift                |  1173 --
 .../service/cli/thrift/TArrayTypeEntry.java   |   383 -
 .../service/cli/thrift/TBinaryColumn.java     |   550 -
 .../hive/service/cli/thrift/TBoolColumn.java  |   548 -
 .../hive/service/cli/thrift/TBoolValue.java   |   386 -
 .../hive/service/cli/thrift/TByteColumn.java  |   548 -
 .../hive/service/cli/thrift/TByteValue.java   |   386 -
 .../hive/service/cli/thrift/TCLIService.java  | 15414 ----------------
 .../cli/thrift/TCLIServiceConstants.java      |   103 -
 .../cli/thrift/TCancelDelegationTokenReq.java |   491 -
 .../thrift/TCancelDelegationTokenResp.java    |   390 -
 .../cli/thrift/TCancelOperationReq.java       |   390 -
 .../cli/thrift/TCancelOperationResp.java      |   390 -
 .../cli/thrift/TCloseOperationReq.java        |   390 -
 .../cli/thrift/TCloseOperationResp.java       |   390 -
 .../service/cli/thrift/TCloseSessionReq.java  |   390 -
 .../service/cli/thrift/TCloseSessionResp.java |   390 -
 .../hive/service/cli/thrift/TColumn.java      |   732 -
 .../hive/service/cli/thrift/TColumnDesc.java  |   700 -
 .../hive/service/cli/thrift/TColumnValue.java |   671 -
 .../service/cli/thrift/TDoubleColumn.java     |   548 -
 .../hive/service/cli/thrift/TDoubleValue.java |   386 -
 .../cli/thrift/TExecuteStatementReq.java      |   769 -
 .../cli/thrift/TExecuteStatementResp.java     |   505 -
 .../service/cli/thrift/TFetchOrientation.java |    57 -
 .../service/cli/thrift/TFetchResultsReq.java  |   710 -
 .../service/cli/thrift/TFetchResultsResp.java |   608 -
 .../service/cli/thrift/TGetCatalogsReq.java   |   390 -
 .../service/cli/thrift/TGetCatalogsResp.java  |   505 -
 .../service/cli/thrift/TGetColumnsReq.java    |   818 -
 .../service/cli/thrift/TGetColumnsResp.java   |   505 -
 .../cli/thrift/TGetDelegationTokenReq.java    |   592 -
 .../cli/thrift/TGetDelegationTokenResp.java   |   500 -
 .../service/cli/thrift/TGetFunctionsReq.java  |   707 -
 .../service/cli/thrift/TGetFunctionsResp.java |   505 -
 .../hive/service/cli/thrift/TGetInfoReq.java  |   503 -
 .../hive/service/cli/thrift/TGetInfoResp.java |   493 -
 .../hive/service/cli/thrift/TGetInfoType.java |   180 -
 .../service/cli/thrift/TGetInfoValue.java     |   593 -
 .../cli/thrift/TGetOperationStatusReq.java    |   390 -
 .../cli/thrift/TGetOperationStatusResp.java   |   827 -
 .../cli/thrift/TGetResultSetMetadataReq.java  |   390 -
 .../cli/thrift/TGetResultSetMetadataResp.java |   505 -
 .../service/cli/thrift/TGetSchemasReq.java    |   606 -
 .../service/cli/thrift/TGetSchemasResp.java   |   505 -
 .../service/cli/thrift/TGetTableTypesReq.java |   390 -
 .../cli/thrift/TGetTableTypesResp.java        |   505 -
 .../service/cli/thrift/TGetTablesReq.java     |   870 -
 .../service/cli/thrift/TGetTablesResp.java    |   505 -
 .../service/cli/thrift/TGetTypeInfoReq.java   |   390 -
 .../service/cli/thrift/TGetTypeInfoResp.java  |   505 -
 .../service/cli/thrift/THandleIdentifier.java |   506 -
 .../hive/service/cli/thrift/TI16Column.java   |   548 -
 .../hive/service/cli/thrift/TI16Value.java    |   386 -
 .../hive/service/cli/thrift/TI32Column.java   |   548 -
 .../hive/service/cli/thrift/TI32Value.java    |   386 -
 .../hive/service/cli/thrift/TI64Column.java   |   548 -
 .../hive/service/cli/thrift/TI64Value.java    |   386 -
 .../service/cli/thrift/TMapTypeEntry.java     |   478 -
 .../service/cli/thrift/TOpenSessionReq.java   |   785 -
 .../service/cli/thrift/TOpenSessionResp.java  |   790 -
 .../service/cli/thrift/TOperationHandle.java  |   705 -
 .../service/cli/thrift/TOperationState.java   |    63 -
 .../service/cli/thrift/TOperationType.java    |    66 -
 .../cli/thrift/TPrimitiveTypeEntry.java       |   512 -
 .../service/cli/thrift/TProtocolVersion.java  |    63 -
 .../cli/thrift/TRenewDelegationTokenReq.java  |   491 -
 .../cli/thrift/TRenewDelegationTokenResp.java |   390 -
 .../apache/hive/service/cli/thrift/TRow.java  |   439 -
 .../hive/service/cli/thrift/TRowSet.java      |   702 -
 .../service/cli/thrift/TSessionHandle.java    |   390 -
 .../hive/service/cli/thrift/TStatus.java      |   874 -
 .../hive/service/cli/thrift/TStatusCode.java  |    54 -
 .../service/cli/thrift/TStringColumn.java     |   548 -
 .../hive/service/cli/thrift/TStringValue.java |   389 -
 .../service/cli/thrift/TStructTypeEntry.java  |   448 -
 .../hive/service/cli/thrift/TTableSchema.java |   439 -
 .../hive/service/cli/thrift/TTypeDesc.java    |   439 -
 .../hive/service/cli/thrift/TTypeEntry.java   |   610 -
 .../hive/service/cli/thrift/TTypeId.java      |   105 -
 .../cli/thrift/TTypeQualifierValue.java       |   361 -
 .../service/cli/thrift/TTypeQualifiers.java   |   450 -
 .../service/cli/thrift/TUnionTypeEntry.java   |   448 -
 .../cli/thrift/TUserDefinedTypeEntry.java     |   385 -
 .../apache/hive/service/AbstractService.java  |   184 -
 .../apache/hive/service/CompositeService.java |   133 -
 .../org/apache/hive/service/CookieSigner.java |   108 -
 .../hive/service/ServiceOperations.java       |   141 -
 .../org/apache/hive/service/ServiceUtils.java |    44 -
 .../hive/service/auth/HiveAuthFactory.java    |   419 -
 .../hive/service/auth/HttpAuthUtils.java      |   189 -
 .../hive/service/auth/KerberosSaslHelper.java |   111 -
 .../hive/service/auth/PlainSaslHelper.java    |   154 -
 .../service/auth/TSetIpAddressProcessor.java  |   114 -
 .../apache/hive/service/cli/CLIService.java   |   507 -
 .../org/apache/hive/service/cli/Column.java   |   423 -
 .../hive/service/cli/ColumnBasedSet.java      |   149 -
 .../hive/service/cli/ColumnDescriptor.java    |    99 -
 .../apache/hive/service/cli/ColumnValue.java  |   288 -
 .../service/cli/EmbeddedCLIServiceClient.java |   208 -
 .../hive/service/cli/FetchOrientation.java    |    54 -
 .../apache/hive/service/cli/GetInfoType.java  |    96 -
 .../apache/hive/service/cli/GetInfoValue.java |    82 -
 .../org/apache/hive/service/cli/Handle.java   |    78 -
 .../hive/service/cli/HandleIdentifier.java    |   113 -
 .../hive/service/cli/HiveSQLException.java    |   249 -
 .../apache/hive/service/cli/ICLIService.java  |   105 -
 .../hive/service/cli/OperationHandle.java     |   102 -
 .../hive/service/cli/OperationState.java      |   108 -
 .../hive/service/cli/OperationType.java       |    58 -
 .../hive/service/cli/PatternOrIdentifier.java |    47 -
 .../apache/hive/service/cli/RowBasedSet.java  |   140 -
 .../org/apache/hive/service/cli/RowSet.java   |    38 -
 .../hive/service/cli/RowSetFactory.java       |    41 -
 .../hive/service/cli/SessionHandle.java       |    67 -
 .../apache/hive/service/cli/TableSchema.java  |   102 -
 .../org/apache/hive/service/cli/Type.java     |   349 -
 .../hive/service/cli/TypeDescriptor.java      |   159 -
 .../hive/service/cli/TypeQualifiers.java      |   133 -
 .../operation/ClassicTableTypeMapping.java    |    86 -
 .../operation/ExecuteStatementOperation.java  |    83 -
 .../cli/operation/GetCatalogsOperation.java   |    81 -
 .../cli/operation/GetColumnsOperation.java    |   234 -
 .../cli/operation/GetFunctionsOperation.java  |   147 -
 .../cli/operation/GetSchemasOperation.java    |    96 -
 .../cli/operation/GetTableTypesOperation.java |    93 -
 .../cli/operation/GetTablesOperation.java     |   135 -
 .../cli/operation/GetTypeInfoOperation.java   |   142 -
 .../cli/operation/HiveCommandOperation.java   |   215 -
 .../cli/operation/HiveTableTypeMapping.java   |    51 -
 .../cli/operation/MetadataOperation.java      |   134 -
 .../hive/service/cli/operation/Operation.java |   328 -
 .../cli/operation/OperationManager.java       |   284 -
 .../service/cli/operation/SQLOperation.java   |   456 -
 .../cli/operation/TableTypeMapping.java       |    44 -
 .../hive/service/cli/session/HiveSession.java |   156 -
 .../service/cli/session/HiveSessionBase.java  |    90 -
 .../service/cli/session/HiveSessionImpl.java  |   842 -
 .../cli/session/HiveSessionImplwithUGI.java   |   182 -
 .../service/cli/session/SessionManager.java   |   377 -
 .../cli/thrift/ThriftBinaryCLIService.java    |   121 -
 .../service/cli/thrift/ThriftCLIService.java  |   693 -
 .../cli/thrift/ThriftCLIServiceClient.java    |   440 -
 .../cli/thrift/ThriftHttpCLIService.java      |   194 -
 .../service/cli/thrift/ThriftHttpServlet.java |   545 -
 .../hive/service/server/HiveServer2.java      |   277 -
 .../server/ThreadWithGarbageCleanup.java      |    77 -
 .../thriftserver/ThriftserverShimUtils.scala  |    77 -
 ...IntoHiveTableBenchmark-hive1.2-results.txt |    11 -
 .../sql/hive/client/HiveClientImpl.scala      |     3 +-
 .../InsertIntoHiveTableBenchmark.scala        |     7 +-
 320 files changed, 7 insertions(+), 69240 deletions(-)
 rename sql/core/{v2.3 => }/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java (100%)
 rename sql/core/{v2.3 => }/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala (100%)
 rename sql/core/{v2.3 => }/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala (100%)
 rename sql/core/{v2.3 => }/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala (100%)
 delete mode 100644 sql/core/v1.2/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
 delete mode 100644 sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/DaysWritable.scala
 delete mode 100644 sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
 delete mode 100644 sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
 delete mode 100644 sql/core/v1.2/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
 rename sql/hive-thriftserver/{v2.3 => }/if/TCLIService.thrift (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TArrayTypeEntry.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TBinaryColumn.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolColumn.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolValue.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TByteColumn.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TByteValue.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIService.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIServiceConstants.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenReq.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenResp.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationReq.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationResp.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationReq.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationResp.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionReq.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionResp.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TColumn.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnDesc.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnValue.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleColumn.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleValue.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementReq.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementResp.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchOrientation.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsReq.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsResp.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsReq.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsResp.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsReq.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsResp.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceReq.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceResp.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenReq.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenResp.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsReq.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsResp.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoReq.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoResp.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoType.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoValue.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusReq.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusResp.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysReq.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysResp.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataReq.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataResp.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasReq.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasResp.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesReq.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesResp.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesReq.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesResp.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoReq.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoResp.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/THandleIdentifier.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Column.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Value.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Column.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Value.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Column.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Value.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TJobExecutionStatus.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TMapTypeEntry.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionReq.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionResp.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationHandle.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationState.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationType.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TPrimitiveTypeEntry.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TProgressUpdateResp.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TProtocolVersion.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenReq.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenResp.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TRow.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TRowSet.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TSessionHandle.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TStatus.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TStatusCode.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TStringColumn.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TStringValue.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TStructTypeEntry.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TTableSchema.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeDesc.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeEntry.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeId.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifierValue.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifiers.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TUnionTypeEntry.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/gen/java/org/apache/hive/service/rpc/thrift/TUserDefinedTypeEntry.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/AbstractService.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/CompositeService.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/CookieSigner.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/ServiceOperations.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/ServiceUtils.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/CLIService.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/ColumnValue.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/FetchOrientation.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/GetInfoType.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/GetInfoValue.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/Handle.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/HiveSQLException.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/ICLIService.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/OperationHandle.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/OperationState.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/OperationType.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/RowBasedSet.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/RowSet.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/RowSetFactory.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/SessionHandle.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/TableSchema.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/GetCrossReferenceOperation.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/Operation.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/session/HiveSession.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContext.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContextImpl.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/session/SessionManager.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/server/HiveServer2.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java (100%)
 rename sql/hive-thriftserver/{v2.3 => }/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala (100%)
 delete mode 100644 sql/hive-thriftserver/v1.2/if/TCLIService.thrift
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TArrayTypeEntry.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBinaryColumn.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBoolColumn.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBoolValue.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TByteColumn.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TByteValue.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCLIService.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCLIServiceConstants.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenReq.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenResp.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationReq.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationResp.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationReq.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationResp.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionReq.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionResp.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumn.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumnDesc.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumnValue.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleColumn.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleValue.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementReq.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementResp.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchOrientation.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsReq.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsResp.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsReq.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsResp.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsReq.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsResp.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenReq.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenResp.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsReq.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsResp.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoReq.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoResp.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoType.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoValue.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusReq.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusResp.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataReq.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataResp.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasReq.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasResp.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesReq.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesResp.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesReq.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesResp.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoReq.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoResp.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/THandleIdentifier.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI16Column.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI16Value.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI32Column.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI32Value.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI64Column.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI64Value.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TMapTypeEntry.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionReq.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionResp.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationHandle.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationState.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationType.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TPrimitiveTypeEntry.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TProtocolVersion.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenReq.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenResp.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRow.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRowSet.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TSessionHandle.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStatus.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStatusCode.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStringColumn.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStringValue.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStructTypeEntry.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTableSchema.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeDesc.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeEntry.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeId.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifierValue.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifiers.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TUnionTypeEntry.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TUserDefinedTypeEntry.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/AbstractService.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/CompositeService.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/CookieSigner.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/ServiceOperations.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/ServiceUtils.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/CLIService.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Column.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnValue.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/EmbeddedCLIServiceClient.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/FetchOrientation.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/GetInfoType.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/GetInfoValue.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Handle.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/HiveSQLException.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ICLIService.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationHandle.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationState.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationType.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/PatternOrIdentifier.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowBasedSet.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowSet.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowSetFactory.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/SessionHandle.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TableSchema.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Type.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/Operation.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSession.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/SessionManager.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/server/HiveServer2.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java
 delete mode 100644 sql/hive-thriftserver/v1.2/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala
 delete mode 100644 sql/hive/benchmarks/InsertIntoHiveTableBenchmark-hive1.2-results.txt

diff --git a/dev/run-tests.py b/dev/run-tests.py
index 3e118dcbc160d..48191e9bb024d 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -325,7 +325,6 @@ def get_hive_profiles(hive_version):
     """
 
     sbt_maven_hive_profiles = {
-        "hive1.2": ["-Phive-1.2"],
         "hive2.3": ["-Phive-2.3"],
     }
 
diff --git a/dev/test-dependencies.sh b/dev/test-dependencies.sh
index 129b073d75254..e9e9227d239e1 100755
--- a/dev/test-dependencies.sh
+++ b/dev/test-dependencies.sh
@@ -32,7 +32,6 @@ export LC_ALL=C
 HADOOP_MODULE_PROFILES="-Phive-thriftserver -Pmesos -Pkubernetes -Pyarn -Phive"
 MVN="build/mvn"
 HADOOP_HIVE_PROFILES=(
-    hadoop-2.7-hive-1.2
     hadoop-2.7-hive-2.3
     hadoop-3.2-hive-2.3
 )
@@ -71,12 +70,9 @@ for HADOOP_HIVE_PROFILE in "${HADOOP_HIVE_PROFILES[@]}"; do
   if [[ $HADOOP_HIVE_PROFILE == **hadoop-3.2-hive-2.3** ]]; then
     HADOOP_PROFILE=hadoop-3.2
     HIVE_PROFILE=hive-2.3
-  elif [[ $HADOOP_HIVE_PROFILE == **hadoop-2.7-hive-2.3** ]]; then
-    HADOOP_PROFILE=hadoop-2.7
-    HIVE_PROFILE=hive-2.3
   else
     HADOOP_PROFILE=hadoop-2.7
-    HIVE_PROFILE=hive-1.2
+    HIVE_PROFILE=hive-2.3
   fi
   echo "Performing Maven install for $HADOOP_HIVE_PROFILE"
   $MVN $HADOOP_MODULE_PROFILES -P$HADOOP_PROFILE -P$HIVE_PROFILE jar:jar jar:test-jar install:install clean -q
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index de60aed7483c7..feff2c7e9f543 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -42,6 +42,8 @@ license: |
 
   - In Spark 3.1, incomplete interval literals, e.g. `INTERVAL '1'`, `INTERVAL '1 DAY 2'` will fail with IllegalArgumentException. In Spark 3.0, they result `NULL`s.
 
+  - In Spark 3.1, we remove the built-in Hive 1.2. You need to migrate your custom SerDes to Hive 2.3. See [HIVE-15167](https://issues.apache.org/jira/browse/HIVE-15167) for more details.
+
 ## Upgrading from Spark SQL 3.0 to 3.0.1
 
 - In Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they match to the pattern defined by the JSON option `timestampFormat`. Since version 3.0.1, the timestamp type inference is disabled by default. Set the JSON option `inferTimestamp` to `true` to enable such type inference.
diff --git a/pom.xml b/pom.xml
index 5d6b0511ce458..b13d5ab81856c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2970,13 +2970,9 @@
           <sourceDirectories>
             <directory>${basedir}/src/main/java</directory>
             <directory>${basedir}/src/main/scala</directory>
-            <directory>${basedir}/v${hive.version.short}/src/main/java</directory>
-            <directory>${basedir}/v${hive.version.short}/src/main/scala</directory>
           </sourceDirectories>
           <testSourceDirectories>
             <directory>${basedir}/src/test/java</directory>
-            <directory>${basedir}/v${hive.version.short}/src/test/java</directory>
-            <directory>${basedir}/v${hive.version.short}/src/test/scala</directory>
           </testSourceDirectories>
           <configLocation>dev/checkstyle.xml</configLocation>
           <outputFile>${basedir}/target/checkstyle-output.xml</outputFile>
@@ -3148,27 +3144,6 @@
       <!-- Default hadoop profile. Uses global properties. -->
     </profile>
 
-    <profile>
-      <id>hive-1.2</id>
-      <properties>
-        <hive.group>org.spark-project.hive</hive.group>
-        <hive.classifier></hive.classifier>
-        <!-- Version used in Maven Hive dependency -->
-        <hive.version>1.2.1.spark2</hive.version>
-        <!-- Version used for internal directory structure -->
-        <hive.version.short>1.2</hive.version.short>
-        <hive.parquet.scope>${hive.deps.scope}</hive.parquet.scope>
-        <hive.storage.version>2.6.0</hive.storage.version>
-        <hive.storage.scope>provided</hive.storage.scope>
-        <hive.common.scope>provided</hive.common.scope>
-        <hive.llap.scope>provided</hive.llap.scope>
-        <hive.serde.scope>provided</hive.serde.scope>
-        <hive.shims.scope>provided</hive.shims.scope>
-        <orc.classifier>nohive</orc.classifier>
-        <datanucleus-core.version>3.2.10</datanucleus-core.version>
-      </properties>
-    </profile>
-
     <profile>
       <id>hive-2.3</id>
       <!-- Default hive profile. Uses global properties. -->
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index c2ed4c079d3cf..0f5d3fd55c15d 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -221,8 +221,6 @@
             </goals>
             <configuration>
               <sources>
-                <source>v${hive.version.short}/src/main/scala</source>
-                <source>v${hive.version.short}/src/main/java</source>
                 <source>src/main/scala-${scala.binary.version}</source>
               </sources>
             </configuration>
@@ -235,7 +233,6 @@
             </goals>
             <configuration>
               <sources>
-                <source>v${hive.version.short}/src/test/scala</source>
                 <source>src/test/gen-java</source>
               </sources>
             </configuration>
diff --git a/sql/core/v2.3/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
similarity index 100%
rename from sql/core/v2.3/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
rename to sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
diff --git a/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
similarity index 100%
rename from sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
diff --git a/sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
similarity index 100%
rename from sql/core/v2.3/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
diff --git a/sql/core/v2.3/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
similarity index 100%
rename from sql/core/v2.3/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
diff --git a/sql/core/v1.2/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java b/sql/core/v1.2/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
deleted file mode 100644
index 6601bcb9018f4..0000000000000
--- a/sql/core/v1.2/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnVector.java
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.orc;
-
-import java.math.BigDecimal;
-
-import org.apache.orc.storage.ql.exec.vector.*;
-
-import org.apache.spark.sql.catalyst.util.DateTimeUtils;
-import org.apache.spark.sql.catalyst.util.RebaseDateTime;
-import org.apache.spark.sql.types.DataType;
-import org.apache.spark.sql.types.DateType;
-import org.apache.spark.sql.types.Decimal;
-import org.apache.spark.sql.types.TimestampType;
-import org.apache.spark.sql.vectorized.ColumnarArray;
-import org.apache.spark.sql.vectorized.ColumnarMap;
-import org.apache.spark.unsafe.types.UTF8String;
-
-/**
- * A column vector class wrapping Hive's ColumnVector. Because Spark ColumnarBatch only accepts
- * Spark's vectorized.ColumnVector, this column vector is used to adapt Hive ColumnVector with
- * Spark ColumnarVector.
- */
-public class OrcColumnVector extends org.apache.spark.sql.vectorized.ColumnVector {
-  private ColumnVector baseData;
-  private LongColumnVector longData;
-  private DoubleColumnVector doubleData;
-  private BytesColumnVector bytesData;
-  private DecimalColumnVector decimalData;
-  private TimestampColumnVector timestampData;
-  private final boolean isTimestamp;
-  private final boolean isDate;
-
-  private int batchSize;
-
-  OrcColumnVector(DataType type, ColumnVector vector) {
-    super(type);
-
-    if (type instanceof TimestampType) {
-      isTimestamp = true;
-    } else {
-      isTimestamp = false;
-    }
-
-    if (type instanceof DateType) {
-      isDate = true;
-    } else {
-      isDate = false;
-    }
-
-    baseData = vector;
-    if (vector instanceof LongColumnVector) {
-      longData = (LongColumnVector) vector;
-    } else if (vector instanceof DoubleColumnVector) {
-      doubleData = (DoubleColumnVector) vector;
-    } else if (vector instanceof BytesColumnVector) {
-      bytesData = (BytesColumnVector) vector;
-    } else if (vector instanceof DecimalColumnVector) {
-      decimalData = (DecimalColumnVector) vector;
-    } else if (vector instanceof TimestampColumnVector) {
-      timestampData = (TimestampColumnVector) vector;
-    } else {
-      throw new UnsupportedOperationException();
-    }
-  }
-
-  public void setBatchSize(int batchSize) {
-    this.batchSize = batchSize;
-  }
-
-  @Override
-  public void close() {
-
-  }
-
-  @Override
-  public boolean hasNull() {
-    return !baseData.noNulls;
-  }
-
-  @Override
-  public int numNulls() {
-    if (baseData.isRepeating) {
-      if (baseData.isNull[0]) {
-        return batchSize;
-      } else {
-        return 0;
-      }
-    } else if (baseData.noNulls) {
-      return 0;
-    } else {
-      int count = 0;
-      for (int i = 0; i < batchSize; i++) {
-        if (baseData.isNull[i]) count++;
-      }
-      return count;
-    }
-  }
-
-  /* A helper method to get the row index in a column. */
-  private int getRowIndex(int rowId) {
-    return baseData.isRepeating ? 0 : rowId;
-  }
-
-  @Override
-  public boolean isNullAt(int rowId) {
-    return baseData.isNull[getRowIndex(rowId)];
-  }
-
-  @Override
-  public boolean getBoolean(int rowId) {
-    return longData.vector[getRowIndex(rowId)] == 1;
-  }
-
-  @Override
-  public byte getByte(int rowId) {
-    return (byte) longData.vector[getRowIndex(rowId)];
-  }
-
-  @Override
-  public short getShort(int rowId) {
-    return (short) longData.vector[getRowIndex(rowId)];
-  }
-
-  @Override
-  public int getInt(int rowId) {
-    int value = (int) longData.vector[getRowIndex(rowId)];
-    if (isDate) {
-      return RebaseDateTime.rebaseJulianToGregorianDays(value);
-    } else {
-      return value;
-    }
-  }
-
-  @Override
-  public long getLong(int rowId) {
-    int index = getRowIndex(rowId);
-    if (isTimestamp) {
-      return DateTimeUtils.fromJavaTimestamp(timestampData.asScratchTimestamp(index));
-    } else {
-      return longData.vector[index];
-    }
-  }
-
-  @Override
-  public float getFloat(int rowId) {
-    return (float) doubleData.vector[getRowIndex(rowId)];
-  }
-
-  @Override
-  public double getDouble(int rowId) {
-    return doubleData.vector[getRowIndex(rowId)];
-  }
-
-  @Override
-  public Decimal getDecimal(int rowId, int precision, int scale) {
-    if (isNullAt(rowId)) return null;
-    BigDecimal data = decimalData.vector[getRowIndex(rowId)].getHiveDecimal().bigDecimalValue();
-    return Decimal.apply(data, precision, scale);
-  }
-
-  @Override
-  public UTF8String getUTF8String(int rowId) {
-    if (isNullAt(rowId)) return null;
-    int index = getRowIndex(rowId);
-    BytesColumnVector col = bytesData;
-    return UTF8String.fromBytes(col.vector[index], col.start[index], col.length[index]);
-  }
-
-  @Override
-  public byte[] getBinary(int rowId) {
-    if (isNullAt(rowId)) return null;
-    int index = getRowIndex(rowId);
-    byte[] binary = new byte[bytesData.length[index]];
-    System.arraycopy(bytesData.vector[index], bytesData.start[index], binary, 0, binary.length);
-    return binary;
-  }
-
-  @Override
-  public ColumnarArray getArray(int rowId) {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public ColumnarMap getMap(int rowId) {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public org.apache.spark.sql.vectorized.ColumnVector getChild(int ordinal) {
-    throw new UnsupportedOperationException();
-  }
-}
diff --git a/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/DaysWritable.scala b/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/DaysWritable.scala
deleted file mode 100644
index 1dccf0ca1faef..0000000000000
--- a/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/DaysWritable.scala
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.orc
-
-import java.io.{DataInput, DataOutput, IOException}
-import java.sql.Date
-
-import org.apache.hadoop.io.WritableUtils
-import org.apache.orc.storage.serde2.io.DateWritable
-
-import org.apache.spark.sql.catalyst.util.RebaseDateTime.{rebaseGregorianToJulianDays, rebaseJulianToGregorianDays}
-
-/**
- * The class accepts/returns days in Gregorian calendar and rebase them
- * via conversion to local date in Julian calendar for dates before 1582-10-15
- * in read/write for backward compatibility with Spark 2.4 and earlier versions.
- *
- * This is a clone of `org.apache.spark.sql.execution.datasources.DaysWritable`.
- * The class is cloned because Hive ORC v1.2 uses different `DateWritable`:
- *   - v1.2: `org.apache.orc.storage.serde2.io.DateWritable`
- *   - v2.3 and `HiveInspectors`: `org.apache.hadoop.hive.serde2.io.DateWritable`
- *
- * @param gregorianDays The number of days since the epoch 1970-01-01 in
- *                      Gregorian calendar.
- * @param julianDays The number of days since the epoch 1970-01-01 in
- *                   Julian calendar.
- */
-class DaysWritable(
-    var gregorianDays: Int,
-    var julianDays: Int)
-  extends DateWritable {
-
-  def this() = this(0, 0)
-  def this(gregorianDays: Int) =
-    this(gregorianDays, rebaseGregorianToJulianDays(gregorianDays))
-  def this(dateWritable: DateWritable) = {
-    this(
-      gregorianDays = dateWritable match {
-        case daysWritable: DaysWritable => daysWritable.gregorianDays
-        case dateWritable: DateWritable =>
-        rebaseJulianToGregorianDays(dateWritable.getDays)
-      },
-      julianDays = dateWritable.getDays)
-  }
-
-  override def getDays: Int = julianDays
-  override def get(): Date = new Date(DateWritable.daysToMillis(julianDays))
-
-  override def set(d: Int): Unit = {
-    gregorianDays = d
-    julianDays = rebaseGregorianToJulianDays(d)
-  }
-
-  @throws[IOException]
-  override def write(out: DataOutput): Unit = {
-    WritableUtils.writeVInt(out, julianDays)
-  }
-
-  @throws[IOException]
-  override def readFields(in: DataInput): Unit = {
-    julianDays = WritableUtils.readVInt(in)
-    gregorianDays = rebaseJulianToGregorianDays(julianDays)
-  }
-}
diff --git a/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
deleted file mode 100644
index 0e657bfe66238..0000000000000
--- a/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala
+++ /dev/null
@@ -1,275 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.orc
-
-import java.time.{Instant, LocalDate}
-
-import org.apache.orc.storage.common.`type`.HiveDecimal
-import org.apache.orc.storage.ql.io.sarg.{PredicateLeaf, SearchArgument}
-import org.apache.orc.storage.ql.io.sarg.SearchArgument.Builder
-import org.apache.orc.storage.ql.io.sarg.SearchArgumentFactory.newBuilder
-import org.apache.orc.storage.serde2.io.HiveDecimalWritable
-
-import org.apache.spark.SparkException
-import org.apache.spark.sql.catalyst.util.DateTimeUtils.{instantToMicros, localDateToDays, toJavaDate, toJavaTimestamp}
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.Filter
-import org.apache.spark.sql.types._
-
-/**
- * Helper object for building ORC `SearchArgument`s, which are used for ORC predicate push-down.
- *
- * Due to limitation of ORC `SearchArgument` builder, we had to implement separate checking and
- * conversion passes through the Filter to make sure we only convert predicates that are known
- * to be convertible.
- *
- * An ORC `SearchArgument` must be built in one pass using a single builder.  For example, you can't
- * build `a = 1` and `b = 2` first, and then combine them into `a = 1 AND b = 2`.  This is quite
- * different from the cases in Spark SQL or Parquet, where complex filters can be easily built using
- * existing simpler ones.
- *
- * The annoying part is that, `SearchArgument` builder methods like `startAnd()`, `startOr()`, and
- * `startNot()` mutate internal state of the builder instance.  This forces us to translate all
- * convertible filters with a single builder instance. However, if we try to translate a filter
- * before checking whether it can be converted or not, we may end up with a builder whose internal
- * state is inconsistent in the case of an inconvertible filter.
- *
- * For example, to convert an `And` filter with builder `b`, we call `b.startAnd()` first, and then
- * try to convert its children.  Say we convert `left` child successfully, but find that `right`
- * child is inconvertible.  Alas, `b.startAnd()` call can't be rolled back, and `b` is inconsistent
- * now.
- *
- * The workaround employed here is to trim the Spark filters before trying to convert them. This
- * way, we can only do the actual conversion on the part of the Filter that is known to be
- * convertible.
- *
- * P.S.: Hive seems to use `SearchArgument` together with `ExprNodeGenericFuncDesc` only.  Usage of
- * builder methods mentioned above can only be found in test code, where all tested filters are
- * known to be convertible.
- */
-private[sql] object OrcFilters extends OrcFiltersBase {
-
-  /**
-   * Create ORC filter as a SearchArgument instance.
-   */
-  def createFilter(schema: StructType, filters: Seq[Filter]): Option[SearchArgument] = {
-    val dataTypeMap = OrcFilters.getSearchableTypeMap(schema, SQLConf.get.caseSensitiveAnalysis)
-    // Combines all convertible filters using `And` to produce a single conjunction
-    val conjunctionOptional = buildTree(convertibleFilters(schema, dataTypeMap, filters))
-    conjunctionOptional.map { conjunction =>
-      // Then tries to build a single ORC `SearchArgument` for the conjunction predicate.
-      // The input predicate is fully convertible. There should not be any empty result in the
-      // following recursive method call `buildSearchArgument`.
-      buildSearchArgument(dataTypeMap, conjunction, newBuilder).build()
-    }
-  }
-
-  def convertibleFilters(
-      schema: StructType,
-      dataTypeMap: Map[String, OrcPrimitiveField],
-      filters: Seq[Filter]): Seq[Filter] = {
-    import org.apache.spark.sql.sources._
-
-    def convertibleFiltersHelper(
-        filter: Filter,
-        canPartialPushDown: Boolean): Option[Filter] = filter match {
-      // At here, it is not safe to just convert one side and remove the other side
-      // if we do not understand what the parent filters are.
-      //
-      // Here is an example used to explain the reason.
-      // Let's say we have NOT(a = 2 AND b in ('1')) and we do not understand how to
-      // convert b in ('1'). If we only convert a = 2, we will end up with a filter
-      // NOT(a = 2), which will generate wrong results.
-      //
-      // Pushing one side of AND down is only safe to do at the top level or in the child
-      // AND before hitting NOT or OR conditions, and in this case, the unsupported predicate
-      // can be safely removed.
-      case And(left, right) =>
-        val leftResultOptional = convertibleFiltersHelper(left, canPartialPushDown)
-        val rightResultOptional = convertibleFiltersHelper(right, canPartialPushDown)
-        (leftResultOptional, rightResultOptional) match {
-          case (Some(leftResult), Some(rightResult)) => Some(And(leftResult, rightResult))
-          case (Some(leftResult), None) if canPartialPushDown => Some(leftResult)
-          case (None, Some(rightResult)) if canPartialPushDown => Some(rightResult)
-          case _ => None
-        }
-
-      // The Or predicate is convertible when both of its children can be pushed down.
-      // That is to say, if one/both of the children can be partially pushed down, the Or
-      // predicate can be partially pushed down as well.
-      //
-      // Here is an example used to explain the reason.
-      // Let's say we have
-      // (a1 AND a2) OR (b1 AND b2),
-      // a1 and b1 is convertible, while a2 and b2 is not.
-      // The predicate can be converted as
-      // (a1 OR b1) AND (a1 OR b2) AND (a2 OR b1) AND (a2 OR b2)
-      // As per the logical in And predicate, we can push down (a1 OR b1).
-      case Or(left, right) =>
-        for {
-          lhs <- convertibleFiltersHelper(left, canPartialPushDown)
-          rhs <- convertibleFiltersHelper(right, canPartialPushDown)
-        } yield Or(lhs, rhs)
-      case Not(pred) =>
-        val childResultOptional = convertibleFiltersHelper(pred, canPartialPushDown = false)
-        childResultOptional.map(Not)
-      case other =>
-        for (_ <- buildLeafSearchArgument(dataTypeMap, other, newBuilder())) yield other
-    }
-    filters.flatMap { filter =>
-      convertibleFiltersHelper(filter, true)
-    }
-  }
-
-  /**
-   * Get PredicateLeafType which is corresponding to the given DataType.
-   */
-  def getPredicateLeafType(dataType: DataType): PredicateLeaf.Type = dataType match {
-    case BooleanType => PredicateLeaf.Type.BOOLEAN
-    case ByteType | ShortType | IntegerType | LongType => PredicateLeaf.Type.LONG
-    case FloatType | DoubleType => PredicateLeaf.Type.FLOAT
-    case StringType => PredicateLeaf.Type.STRING
-    case DateType => PredicateLeaf.Type.DATE
-    case TimestampType => PredicateLeaf.Type.TIMESTAMP
-    case _: DecimalType => PredicateLeaf.Type.DECIMAL
-    case _ => throw new UnsupportedOperationException(s"DataType: ${dataType.catalogString}")
-  }
-
-  /**
-   * Cast literal values for filters.
-   *
-   * We need to cast to long because ORC raises exceptions
-   * at 'checkLiteralType' of SearchArgumentImpl.java.
-   */
-  private def castLiteralValue(value: Any, dataType: DataType): Any = dataType match {
-    case ByteType | ShortType | IntegerType | LongType =>
-      value.asInstanceOf[Number].longValue
-    case FloatType | DoubleType =>
-      value.asInstanceOf[Number].doubleValue()
-    case _: DecimalType =>
-      new HiveDecimalWritable(HiveDecimal.create(value.asInstanceOf[java.math.BigDecimal]))
-    case _: DateType if value.isInstanceOf[LocalDate] =>
-      toJavaDate(localDateToDays(value.asInstanceOf[LocalDate]))
-    case _: TimestampType if value.isInstanceOf[Instant] =>
-      toJavaTimestamp(instantToMicros(value.asInstanceOf[Instant]))
-    case _ => value
-  }
-
-  /**
-   * Build a SearchArgument and return the builder so far.
-   *
-   * @param dataTypeMap a map from the attribute name to its data type.
-   * @param expression the input predicates, which should be fully convertible to SearchArgument.
-   * @param builder the input SearchArgument.Builder.
-   * @return the builder so far.
-   */
-  private def buildSearchArgument(
-      dataTypeMap: Map[String, OrcPrimitiveField],
-      expression: Filter,
-      builder: Builder): Builder = {
-    import org.apache.spark.sql.sources._
-
-    expression match {
-      case And(left, right) =>
-        val lhs = buildSearchArgument(dataTypeMap, left, builder.startAnd())
-        val rhs = buildSearchArgument(dataTypeMap, right, lhs)
-        rhs.end()
-
-      case Or(left, right) =>
-        val lhs = buildSearchArgument(dataTypeMap, left, builder.startOr())
-        val rhs = buildSearchArgument(dataTypeMap, right, lhs)
-        rhs.end()
-
-      case Not(child) =>
-        buildSearchArgument(dataTypeMap, child, builder.startNot()).end()
-
-      case other =>
-        buildLeafSearchArgument(dataTypeMap, other, builder).getOrElse {
-          throw new SparkException(
-            "The input filter of OrcFilters.buildSearchArgument should be fully convertible.")
-        }
-    }
-  }
-
-  /**
-   * Build a SearchArgument for a leaf predicate and return the builder so far.
-   *
-   * @param dataTypeMap a map from the attribute name to its data type.
-   * @param expression the input filter predicates.
-   * @param builder the input SearchArgument.Builder.
-   * @return the builder so far.
-   */
-  private def buildLeafSearchArgument(
-      dataTypeMap: Map[String, OrcPrimitiveField],
-      expression: Filter,
-      builder: Builder): Option[Builder] = {
-    def getType(attribute: String): PredicateLeaf.Type =
-      getPredicateLeafType(dataTypeMap(attribute).fieldType)
-
-    import org.apache.spark.sql.sources._
-
-    // NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()`
-    // call is mandatory. ORC `SearchArgument` builder requires that all leaf predicates must be
-    // wrapped by a "parent" predicate (`And`, `Or`, or `Not`).
-    expression match {
-      case EqualTo(name, value) if dataTypeMap.contains(name) =>
-        val castedValue = castLiteralValue(value, dataTypeMap(name).fieldType)
-        Some(builder.startAnd()
-          .equals(dataTypeMap(name).fieldName, getType(name), castedValue).end())
-
-      case EqualNullSafe(name, value) if dataTypeMap.contains(name) =>
-        val castedValue = castLiteralValue(value, dataTypeMap(name).fieldType)
-        Some(builder.startAnd()
-          .nullSafeEquals(dataTypeMap(name).fieldName, getType(name), castedValue).end())
-
-      case LessThan(name, value) if dataTypeMap.contains(name) =>
-        val castedValue = castLiteralValue(value, dataTypeMap(name).fieldType)
-        Some(builder.startAnd()
-          .lessThan(dataTypeMap(name).fieldName, getType(name), castedValue).end())
-
-      case LessThanOrEqual(name, value) if dataTypeMap.contains(name) =>
-        val castedValue = castLiteralValue(value, dataTypeMap(name).fieldType)
-        Some(builder.startAnd()
-          .lessThanEquals(dataTypeMap(name).fieldName, getType(name), castedValue).end())
-
-      case GreaterThan(name, value) if dataTypeMap.contains(name) =>
-        val castedValue = castLiteralValue(value, dataTypeMap(name).fieldType)
-        Some(builder.startNot()
-          .lessThanEquals(dataTypeMap(name).fieldName, getType(name), castedValue).end())
-
-      case GreaterThanOrEqual(name, value) if dataTypeMap.contains(name) =>
-        val castedValue = castLiteralValue(value, dataTypeMap(name).fieldType)
-        Some(builder.startNot()
-          .lessThan(dataTypeMap(name).fieldName, getType(name), castedValue).end())
-
-      case IsNull(name) if dataTypeMap.contains(name) =>
-        Some(builder.startAnd().isNull(dataTypeMap(name).fieldName, getType(name)).end())
-
-      case IsNotNull(name) if dataTypeMap.contains(name) =>
-        Some(builder.startNot().isNull(dataTypeMap(name).fieldName, getType(name)).end())
-
-      case In(name, values) if dataTypeMap.contains(name) =>
-        val castedValues = values.map(v => castLiteralValue(v, dataTypeMap(name).fieldType))
-        Some(builder.startAnd().in(dataTypeMap(name).fieldName, getType(name),
-          castedValues.map(_.asInstanceOf[AnyRef]): _*).end())
-
-      case _ => None
-    }
-  }
-}
-
diff --git a/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala b/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
deleted file mode 100644
index 7fbc1cd205b13..0000000000000
--- a/sql/core/v1.2/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcShimUtils.scala
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.orc
-
-import org.apache.orc.storage.common.`type`.HiveDecimal
-import org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch
-import org.apache.orc.storage.ql.io.sarg.{SearchArgument => OrcSearchArgument}
-import org.apache.orc.storage.ql.io.sarg.PredicateLeaf.{Operator => OrcOperator}
-import org.apache.orc.storage.serde2.io.{DateWritable, HiveDecimalWritable}
-
-import org.apache.spark.sql.catalyst.expressions.SpecializedGetters
-import org.apache.spark.sql.types.Decimal
-
-/**
- * Various utilities for ORC used to upgrade the built-in Hive.
- */
-private[sql] object OrcShimUtils {
-
-  class VectorizedRowBatchWrap(val batch: VectorizedRowBatch) {}
-
-  private[sql] type Operator = OrcOperator
-  private[sql] type SearchArgument = OrcSearchArgument
-
-  def getGregorianDays(value: Any): Int = {
-    new DaysWritable(value.asInstanceOf[DateWritable]).gregorianDays
-  }
-
-  def getDecimal(value: Any): Decimal = {
-    val decimal = value.asInstanceOf[HiveDecimalWritable].getHiveDecimal()
-    Decimal(decimal.bigDecimalValue, decimal.precision(), decimal.scale())
-  }
-
-  def getDateWritable(reuseObj: Boolean): (SpecializedGetters, Int) => DateWritable = {
-    if (reuseObj) {
-      val result = new DaysWritable()
-      (getter, ordinal) =>
-        result.set(getter.getInt(ordinal))
-        result
-    } else {
-      (getter: SpecializedGetters, ordinal: Int) =>
-        new DaysWritable(getter.getInt(ordinal))
-    }
-  }
-
-  def getHiveDecimalWritable(precision: Int, scale: Int):
-      (SpecializedGetters, Int) => HiveDecimalWritable = {
-    (getter, ordinal) =>
-      val d = getter.getDecimal(ordinal, precision, scale)
-      new HiveDecimalWritable(HiveDecimal.create(d.toJavaBigDecimal))
-  }
-}
diff --git a/sql/core/v1.2/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/v1.2/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
deleted file mode 100644
index e159a0588dfff..0000000000000
--- a/sql/core/v1.2/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
+++ /dev/null
@@ -1,676 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.orc
-
-import java.math.MathContext
-import java.nio.charset.StandardCharsets
-import java.sql.{Date, Timestamp}
-
-import scala.collection.JavaConverters._
-
-import org.apache.orc.storage.ql.io.sarg.{PredicateLeaf, SearchArgument}
-import org.apache.orc.storage.ql.io.sarg.SearchArgumentFactory.newBuilder
-
-import org.apache.spark.{SparkConf, SparkException}
-import org.apache.spark.sql.{AnalysisException, Column, DataFrame, Row}
-import org.apache.spark.sql.catalyst.dsl.expressions._
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.planning.PhysicalOperation
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
-import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types._
-
-/**
- * A test suite that tests Apache ORC filter API based filter pushdown optimization.
- * OrcFilterSuite and HiveOrcFilterSuite is logically duplicated to provide the same test coverage.
- * The difference are the packages containing 'Predicate' and 'SearchArgument' classes.
- * - OrcFilterSuite uses 'org.apache.orc.storage.ql.io.sarg' package.
- * - HiveOrcFilterSuite uses 'org.apache.hadoop.hive.ql.io.sarg' package.
- */
-class OrcFilterSuite extends OrcTest with SharedSparkSession {
-
-  override protected def sparkConf: SparkConf =
-    super
-      .sparkConf
-      .set(SQLConf.USE_V1_SOURCE_LIST, "")
-
-  protected def checkFilterPredicate(
-      df: DataFrame,
-      predicate: Predicate,
-      checker: (SearchArgument) => Unit): Unit = {
-    val output = predicate.collect { case a: Attribute => a }.distinct
-    val query = df
-      .select(output.map(e => Column(e)): _*)
-      .where(Column(predicate))
-
-    query.queryExecution.optimizedPlan match {
-      case PhysicalOperation(_, filters, DataSourceV2ScanRelation(_, o: OrcScan, _)) =>
-        assert(filters.nonEmpty, "No filter is analyzed from the given query")
-        assert(o.pushedFilters.nonEmpty, "No filter is pushed down")
-        val maybeFilter = OrcFilters.createFilter(query.schema, o.pushedFilters)
-        assert(maybeFilter.isDefined, s"Couldn't generate filter predicate for ${o.pushedFilters}")
-        checker(maybeFilter.get)
-
-      case _ =>
-        throw new AnalysisException("Can not match OrcTable in the query.")
-    }
-  }
-
-  protected def checkFilterPredicate
-      (predicate: Predicate, filterOperator: PredicateLeaf.Operator)
-      (implicit df: DataFrame): Unit = {
-    def checkComparisonOperator(filter: SearchArgument) = {
-      val operator = filter.getLeaves.asScala
-      assert(operator.map(_.getOperator).contains(filterOperator))
-    }
-    checkFilterPredicate(df, predicate, checkComparisonOperator)
-  }
-
-  protected def checkFilterPredicate
-      (predicate: Predicate, stringExpr: String)
-      (implicit df: DataFrame): Unit = {
-    def checkLogicalOperator(filter: SearchArgument) = {
-      assert(filter.toString == stringExpr)
-    }
-    checkFilterPredicate(df, predicate, checkLogicalOperator)
-  }
-
-  test("filter pushdown - integer") {
-    withNestedOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { case (inputDF, colName, _) =>
-      implicit val df: DataFrame = inputDF
-
-      val intAttr = df(colName).expr
-      assert(df(colName).expr.dataType === IntegerType)
-
-      checkFilterPredicate(intAttr.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate(intAttr === 1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(intAttr <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate(intAttr < 2, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(intAttr > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(intAttr <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(intAttr >= 4, PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(1) === intAttr, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(1) <=> intAttr, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(2) > intAttr, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(3) < intAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(1) >= intAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(4) <= intAttr, PredicateLeaf.Operator.LESS_THAN)
-    }
-  }
-
-  test("filter pushdown - long") {
-    withNestedOrcDataFrame(
-        (1 to 4).map(i => Tuple1(Option(i.toLong)))) { case (inputDF, colName, _) =>
-      implicit val df: DataFrame = inputDF
-
-      val longAttr = df(colName).expr
-      assert(df(colName).expr.dataType === LongType)
-
-      checkFilterPredicate(longAttr.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate(longAttr === 1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(longAttr <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate(longAttr < 2, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(longAttr > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(longAttr <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(longAttr >= 4, PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(1) === longAttr, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(1) <=> longAttr, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(2) > longAttr, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(3) < longAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(1) >= longAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(4) <= longAttr, PredicateLeaf.Operator.LESS_THAN)
-    }
-  }
-
-  test("filter pushdown - float") {
-    withNestedOrcDataFrame(
-        (1 to 4).map(i => Tuple1(Option(i.toFloat)))) { case (inputDF, colName, _) =>
-      implicit val df: DataFrame = inputDF
-
-      val floatAttr = df(colName).expr
-      assert(df(colName).expr.dataType === FloatType)
-
-      checkFilterPredicate(floatAttr.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate(floatAttr === 1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(floatAttr <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate(floatAttr < 2, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(floatAttr > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(floatAttr <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(floatAttr >= 4, PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(1) === floatAttr, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(1) <=> floatAttr, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(2) > floatAttr, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(3) < floatAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(1) >= floatAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(4) <= floatAttr, PredicateLeaf.Operator.LESS_THAN)
-    }
-  }
-
-  test("filter pushdown - double") {
-    withNestedOrcDataFrame(
-        (1 to 4).map(i => Tuple1(Option(i.toDouble)))) { case (inputDF, colName, _) =>
-      implicit val df: DataFrame = inputDF
-
-      val doubleAttr = df(colName).expr
-      assert(df(colName).expr.dataType === DoubleType)
-
-      checkFilterPredicate(doubleAttr.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate(doubleAttr === 1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(doubleAttr <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate(doubleAttr < 2, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(doubleAttr > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(doubleAttr <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(doubleAttr >= 4, PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(1) === doubleAttr, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(1) <=> doubleAttr, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(2) > doubleAttr, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(3) < doubleAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(1) >= doubleAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(4) <= doubleAttr, PredicateLeaf.Operator.LESS_THAN)
-    }
-  }
-
-  test("filter pushdown - string") {
-    withNestedOrcDataFrame((1 to 4).map(i => Tuple1(i.toString))) { case (inputDF, colName, _) =>
-      implicit val df: DataFrame = inputDF
-
-      val strAttr = df(colName).expr
-      assert(df(colName).expr.dataType === StringType)
-
-      checkFilterPredicate(strAttr.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate(strAttr === "1", PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(strAttr <=> "1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate(strAttr < "2", PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(strAttr > "3", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(strAttr <= "1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(strAttr >= "4", PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal("1") === strAttr, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal("1") <=> strAttr, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal("2") > strAttr, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal("3") < strAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal("1") >= strAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal("4") <= strAttr, PredicateLeaf.Operator.LESS_THAN)
-    }
-  }
-
-  test("filter pushdown - boolean") {
-    withNestedOrcDataFrame(
-        (true :: false :: Nil).map(b => Tuple1.apply(Option(b)))) { case (inputDF, colName, _) =>
-      implicit val df: DataFrame = inputDF
-
-      val booleanAttr = df(colName).expr
-      assert(df(colName).expr.dataType === BooleanType)
-
-      checkFilterPredicate(booleanAttr.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate(booleanAttr === true, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(booleanAttr <=> true, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate(booleanAttr < true, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(booleanAttr > false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(booleanAttr <= false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(booleanAttr >= false, PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(false) === booleanAttr, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(false) <=> booleanAttr,
-        PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(false) > booleanAttr, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(true) < booleanAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(true) >= booleanAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(true) <= booleanAttr, PredicateLeaf.Operator.LESS_THAN)
-    }
-  }
-
-  test("filter pushdown - decimal") {
-    withNestedOrcDataFrame(
-        (1 to 4).map(i => Tuple1.apply(BigDecimal.valueOf(i)))) { case (inputDF, colName, _) =>
-      implicit val df: DataFrame = inputDF
-
-      val decimalAttr = df(colName).expr
-      assert(df(colName).expr.dataType === DecimalType(38, 18))
-
-      checkFilterPredicate(decimalAttr.isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate(decimalAttr === BigDecimal.valueOf(1), PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(decimalAttr <=> BigDecimal.valueOf(1),
-        PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate(decimalAttr < BigDecimal.valueOf(2), PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(decimalAttr > BigDecimal.valueOf(3),
-        PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(decimalAttr <= BigDecimal.valueOf(1),
-        PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(decimalAttr >= BigDecimal.valueOf(4), PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(
-        Literal(BigDecimal.valueOf(1)) === decimalAttr, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(
-        Literal(BigDecimal.valueOf(1)) <=> decimalAttr, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(
-        Literal(BigDecimal.valueOf(2)) > decimalAttr, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(
-        Literal(BigDecimal.valueOf(3)) < decimalAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(
-        Literal(BigDecimal.valueOf(1)) >= decimalAttr, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(
-        Literal(BigDecimal.valueOf(4)) <= decimalAttr, PredicateLeaf.Operator.LESS_THAN)
-    }
-  }
-
-  test("filter pushdown - timestamp") {
-    val input = Seq(
-      "1000-01-01 01:02:03",
-      "1582-10-01 00:11:22",
-      "1900-01-01 23:59:59",
-      "2020-05-25 10:11:12").map(Timestamp.valueOf)
-
-    withOrcFile(input.map(Tuple1(_))) { path =>
-      Seq(false, true).foreach { java8Api =>
-        withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> java8Api.toString) {
-          readFile(path) { implicit df =>
-            val timestamps = input.map(Literal(_))
-            checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
-
-            checkFilterPredicate($"_1" === timestamps(0), PredicateLeaf.Operator.EQUALS)
-            checkFilterPredicate($"_1" <=> timestamps(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-            checkFilterPredicate($"_1" < timestamps(1), PredicateLeaf.Operator.LESS_THAN)
-            checkFilterPredicate($"_1" > timestamps(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
-            checkFilterPredicate($"_1" <= timestamps(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
-            checkFilterPredicate($"_1" >= timestamps(3), PredicateLeaf.Operator.LESS_THAN)
-
-            checkFilterPredicate(Literal(timestamps(0)) === $"_1", PredicateLeaf.Operator.EQUALS)
-            checkFilterPredicate(
-              Literal(timestamps(0)) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-            checkFilterPredicate(Literal(timestamps(1)) > $"_1", PredicateLeaf.Operator.LESS_THAN)
-            checkFilterPredicate(
-              Literal(timestamps(2)) < $"_1",
-              PredicateLeaf.Operator.LESS_THAN_EQUALS)
-            checkFilterPredicate(
-              Literal(timestamps(0)) >= $"_1",
-              PredicateLeaf.Operator.LESS_THAN_EQUALS)
-            checkFilterPredicate(Literal(timestamps(3)) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
-          }
-        }
-      }
-    }
-  }
-
-  test("filter pushdown - combinations with logical operators") {
-    withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { implicit df =>
-      checkFilterPredicate(
-        $"_1".isNotNull,
-        "leaf-0 = (IS_NULL _1), expr = (not leaf-0)"
-      )
-      checkFilterPredicate(
-        $"_1" =!= 1,
-        "leaf-0 = (IS_NULL _1), leaf-1 = (EQUALS _1 1), expr = (and (not leaf-0) (not leaf-1))"
-      )
-      checkFilterPredicate(
-        !($"_1" < 4),
-        "leaf-0 = (IS_NULL _1), leaf-1 = (LESS_THAN _1 4), expr = (and (not leaf-0) (not leaf-1))"
-      )
-      checkFilterPredicate(
-        $"_1" < 2 || $"_1" > 3,
-        "leaf-0 = (LESS_THAN _1 2), leaf-1 = (LESS_THAN_EQUALS _1 3), " +
-          "expr = (or leaf-0 (not leaf-1))"
-      )
-      checkFilterPredicate(
-        $"_1" < 2 && $"_1" > 3,
-        "leaf-0 = (IS_NULL _1), leaf-1 = (LESS_THAN _1 2), leaf-2 = (LESS_THAN_EQUALS _1 3), " +
-          "expr = (and (not leaf-0) leaf-1 (not leaf-2))"
-      )
-    }
-  }
-
-  test("filter pushdown - date") {
-    val input = Seq("2017-08-18", "2017-08-19", "2017-08-20", "2017-08-21").map { day =>
-      Date.valueOf(day)
-    }
-    withOrcFile(input.map(Tuple1(_))) { path =>
-      Seq(false, true).foreach { java8Api =>
-        withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> java8Api.toString) {
-          readFile(path) { implicit df =>
-            val dates = input.map(Literal(_))
-            checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
-
-            checkFilterPredicate($"_1" === dates(0), PredicateLeaf.Operator.EQUALS)
-            checkFilterPredicate($"_1" <=> dates(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-            checkFilterPredicate($"_1" < dates(1), PredicateLeaf.Operator.LESS_THAN)
-            checkFilterPredicate($"_1" > dates(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
-            checkFilterPredicate($"_1" <= dates(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
-            checkFilterPredicate($"_1" >= dates(3), PredicateLeaf.Operator.LESS_THAN)
-
-            checkFilterPredicate(dates(0) === $"_1", PredicateLeaf.Operator.EQUALS)
-            checkFilterPredicate(dates(0) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-            checkFilterPredicate(dates(1) > $"_1", PredicateLeaf.Operator.LESS_THAN)
-            checkFilterPredicate(dates(2) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-            checkFilterPredicate(dates(0) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-            checkFilterPredicate(dates(3) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
-          }
-        }
-      }
-    }
-  }
-
-  test("no filter pushdown - non-supported types") {
-    implicit class IntToBinary(int: Int) {
-      def b: Array[Byte] = int.toString.getBytes(StandardCharsets.UTF_8)
-    }
-    // ArrayType
-    withOrcDataFrame((1 to 4).map(i => Tuple1(Array(i)))) { implicit df =>
-      checkNoFilterPredicate($"_1".isNull, noneSupported = true)
-    }
-    // BinaryType
-    withOrcDataFrame((1 to 4).map(i => Tuple1(i.b))) { implicit df =>
-      checkNoFilterPredicate($"_1" <=> 1.b, noneSupported = true)
-    }
-    // MapType
-    withOrcDataFrame((1 to 4).map(i => Tuple1(Map(i -> i)))) { implicit df =>
-      checkNoFilterPredicate($"_1".isNotNull, noneSupported = true)
-    }
-  }
-
-  test("SPARK-12218 and SPARK-25699 Converting conjunctions into ORC SearchArguments") {
-    import org.apache.spark.sql.sources._
-    // The `LessThan` should be converted while the `StringContains` shouldn't
-    val schema = new StructType(
-      Array(
-        StructField("a", IntegerType, nullable = true),
-        StructField("b", StringType, nullable = true)))
-    assertResult("leaf-0 = (LESS_THAN a 10), expr = leaf-0") {
-      OrcFilters.createFilter(schema, Array(
-        LessThan("a", 10),
-        StringContains("b", "prefix")
-      )).get.toString
-    }
-
-    // The `LessThan` should be converted while the whole inner `And` shouldn't
-    assertResult("leaf-0 = (LESS_THAN a 10), expr = leaf-0") {
-      OrcFilters.createFilter(schema, Array(
-        LessThan("a", 10),
-        Not(And(
-          GreaterThan("a", 1),
-          StringContains("b", "prefix")
-        ))
-      )).get.toString
-    }
-
-    // Safely remove unsupported `StringContains` predicate and push down `LessThan`
-    assertResult("leaf-0 = (LESS_THAN a 10), expr = leaf-0") {
-      OrcFilters.createFilter(schema, Array(
-        And(
-          LessThan("a", 10),
-          StringContains("b", "prefix")
-        )
-      )).get.toString
-    }
-
-    // Safely remove unsupported `StringContains` predicate, push down `LessThan` and `GreaterThan`.
-    assertResult("leaf-0 = (LESS_THAN a 10), leaf-1 = (LESS_THAN_EQUALS a 1)," +
-      " expr = (and leaf-0 (not leaf-1))") {
-      OrcFilters.createFilter(schema, Array(
-        And(
-          And(
-            LessThan("a", 10),
-            StringContains("b", "prefix")
-          ),
-          GreaterThan("a", 1)
-        )
-      )).get.toString
-    }
-  }
-
-  test("SPARK-27699 Converting disjunctions into ORC SearchArguments") {
-    import org.apache.spark.sql.sources._
-    // The `LessThan` should be converted while the `StringContains` shouldn't
-    val schema = new StructType(
-      Array(
-        StructField("a", IntegerType, nullable = true),
-        StructField("b", StringType, nullable = true)))
-
-    // The predicate `StringContains` predicate is not able to be pushed down.
-    assertResult("leaf-0 = (LESS_THAN_EQUALS a 10), leaf-1 = (LESS_THAN a 1)," +
-      " expr = (or (not leaf-0) leaf-1)") {
-      OrcFilters.createFilter(schema, Array(
-        Or(
-          GreaterThan("a", 10),
-          And(
-            StringContains("b", "prefix"),
-            LessThan("a", 1)
-          )
-        )
-      )).get.toString
-    }
-
-    assertResult("leaf-0 = (LESS_THAN_EQUALS a 10), leaf-1 = (LESS_THAN a 1)," +
-      " expr = (or (not leaf-0) leaf-1)") {
-      OrcFilters.createFilter(schema, Array(
-        Or(
-          And(
-            GreaterThan("a", 10),
-            StringContains("b", "foobar")
-          ),
-          And(
-            StringContains("b", "prefix"),
-            LessThan("a", 1)
-          )
-        )
-      )).get.toString
-    }
-
-    assert(OrcFilters.createFilter(schema, Array(
-      Or(
-        StringContains("b", "foobar"),
-        And(
-          StringContains("b", "prefix"),
-          LessThan("a", 1)
-        )
-      )
-    )).isEmpty)
-  }
-
-  test("SPARK-27160: Fix casting of the DecimalType literal") {
-    import org.apache.spark.sql.sources._
-    val schema = StructType(Array(StructField("a", DecimalType(3, 2))))
-    assertResult("leaf-0 = (LESS_THAN a 3.14), expr = leaf-0") {
-      OrcFilters.createFilter(schema, Array(
-        LessThan(
-          "a",
-          new java.math.BigDecimal(3.14, MathContext.DECIMAL64).setScale(2)))
-      ).get.toString
-    }
-  }
-
-  test("SPARK-32622: case sensitivity in predicate pushdown") {
-    withTempPath { dir =>
-      val count = 10
-      val tableName = "spark_32622"
-      val tableDir1 = dir.getAbsoluteFile + "/table1"
-
-      // Physical ORC files have both `A` and `a` fields.
-      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
-        spark.range(count).repartition(count).selectExpr("id - 1 as A", "id as a")
-          .write.mode("overwrite").orc(tableDir1)
-      }
-
-      // Metastore table has both `A` and `a` fields too.
-      withTable(tableName) {
-        withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
-          sql(
-            s"""
-               |CREATE TABLE $tableName (A LONG, a LONG) USING ORC LOCATION '$tableDir1'
-             """.stripMargin)
-
-          checkAnswer(sql(s"select a, A from $tableName"), (0 until count).map(c => Row(c, c - 1)))
-
-          val actual1 = stripSparkFilter(sql(s"select A from $tableName where A < 0"))
-          assert(actual1.count() == 1)
-
-          val actual2 = stripSparkFilter(sql(s"select A from $tableName where a < 0"))
-          assert(actual2.count() == 0)
-        }
-
-        // Exception thrown for ambiguous case.
-        withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
-          val e = intercept[AnalysisException] {
-            sql(s"select a from $tableName where a < 0").collect()
-          }
-          assert(e.getMessage.contains(
-            "Reference 'a' is ambiguous"))
-        }
-      }
-
-      // Metastore table has only `A` field.
-      withTable(tableName) {
-        withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
-          sql(
-            s"""
-               |CREATE TABLE $tableName (A LONG) USING ORC LOCATION '$tableDir1'
-             """.stripMargin)
-
-          val e = intercept[SparkException] {
-            sql(s"select A from $tableName where A < 0").collect()
-          }
-          assert(e.getCause.isInstanceOf[RuntimeException] && e.getCause.getMessage.contains(
-            """Found duplicate field(s) "A": [A, a] in case-insensitive mode"""))
-        }
-      }
-
-      // Physical ORC files have only `A` field.
-      val tableDir2 = dir.getAbsoluteFile + "/table2"
-      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
-        spark.range(count).repartition(count).selectExpr("id - 1 as A")
-          .write.mode("overwrite").orc(tableDir2)
-      }
-
-      withTable(tableName) {
-        withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
-          sql(
-            s"""
-               |CREATE TABLE $tableName (a LONG) USING ORC LOCATION '$tableDir2'
-             """.stripMargin)
-
-          checkAnswer(sql(s"select a from $tableName"), (0 until count).map(c => Row(c - 1)))
-
-          val actual = stripSparkFilter(sql(s"select a from $tableName where a < 0"))
-          assert(actual.count() == 1)
-        }
-      }
-
-      withTable(tableName) {
-        withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
-          sql(
-            s"""
-               |CREATE TABLE $tableName (A LONG) USING ORC LOCATION '$tableDir2'
-             """.stripMargin)
-
-          checkAnswer(sql(s"select A from $tableName"), (0 until count).map(c => Row(c - 1)))
-
-          val actual = stripSparkFilter(sql(s"select A from $tableName where A < 0"))
-          assert(actual.count() == 1)
-        }
-      }
-    }
-  }
-
-  test("SPARK-32646: Case-insensitive field resolution for pushdown when reading ORC") {
-    import org.apache.spark.sql.sources._
-
-    def getOrcFilter(
-        schema: StructType,
-        filters: Seq[Filter],
-        caseSensitive: String): Option[SearchArgument] = {
-      var orcFilter: Option[SearchArgument] = None
-      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive) {
-        orcFilter =
-          OrcFilters.createFilter(schema, filters)
-      }
-      orcFilter
-    }
-
-    def testFilter(
-        schema: StructType,
-        filters: Seq[Filter],
-        expected: SearchArgument): Unit = {
-      val caseSensitiveFilters = getOrcFilter(schema, filters, "true")
-      val caseInsensitiveFilters = getOrcFilter(schema, filters, "false")
-
-      assert(caseSensitiveFilters.isEmpty)
-      assert(caseInsensitiveFilters.isDefined)
-
-      assert(caseInsensitiveFilters.get.getLeaves().size() > 0)
-      assert(caseInsensitiveFilters.get.getLeaves().size() == expected.getLeaves().size())
-      (0 until expected.getLeaves().size()).foreach { index =>
-        assert(caseInsensitiveFilters.get.getLeaves().get(index) == expected.getLeaves().get(index))
-      }
-    }
-
-    val schema1 = StructType(Seq(StructField("cint", IntegerType)))
-    testFilter(schema1, Seq(GreaterThan("CINT", 1)),
-      newBuilder.startNot()
-        .lessThanEquals("cint", OrcFilters.getPredicateLeafType(IntegerType), 1L).`end`().build())
-    testFilter(schema1, Seq(
-      And(GreaterThan("CINT", 1), EqualTo("Cint", 2))),
-      newBuilder.startAnd()
-        .startNot()
-        .lessThanEquals("cint", OrcFilters.getPredicateLeafType(IntegerType), 1L).`end`()
-        .equals("cint", OrcFilters.getPredicateLeafType(IntegerType), 2L)
-        .`end`().build())
-
-    // Nested column case
-    val schema2 = StructType(Seq(StructField("a",
-      StructType(Seq(StructField("cint", IntegerType))))))
-
-    testFilter(schema2, Seq(GreaterThan("A.CINT", 1)),
-      newBuilder.startNot()
-        .lessThanEquals("a.cint", OrcFilters.getPredicateLeafType(IntegerType), 1L).`end`().build())
-    testFilter(schema2, Seq(GreaterThan("a.CINT", 1)),
-      newBuilder.startNot()
-        .lessThanEquals("a.cint", OrcFilters.getPredicateLeafType(IntegerType), 1L).`end`().build())
-    testFilter(schema2, Seq(GreaterThan("A.cint", 1)),
-      newBuilder.startNot()
-        .lessThanEquals("a.cint", OrcFilters.getPredicateLeafType(IntegerType), 1L).`end`().build())
-    testFilter(schema2, Seq(
-      And(GreaterThan("a.CINT", 1), EqualTo("a.Cint", 2))),
-      newBuilder.startAnd()
-        .startNot()
-        .lessThanEquals("a.cint", OrcFilters.getPredicateLeafType(IntegerType), 1L).`end`()
-        .equals("a.cint", OrcFilters.getPredicateLeafType(IntegerType), 2L)
-        .`end`().build())
-  }
-}
-
diff --git a/sql/hive-thriftserver/v2.3/if/TCLIService.thrift b/sql/hive-thriftserver/if/TCLIService.thrift
similarity index 100%
rename from sql/hive-thriftserver/v2.3/if/TCLIService.thrift
rename to sql/hive-thriftserver/if/TCLIService.thrift
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 5bf20b209aff7..4a96afe9df20a 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -146,9 +146,7 @@
             </goals>
             <configuration>
               <sources>
-                <source>v${hive.version.short}/src/gen/java</source>
-                <source>v${hive.version.short}/src/main/java</source>
-                <source>v${hive.version.short}/src/main/scala</source>
+                <source>src/gen/java</source>
               </sources>
             </configuration>
           </execution>
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TArrayTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TArrayTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TArrayTypeEntry.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TArrayTypeEntry.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TBinaryColumn.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBinaryColumn.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TBinaryColumn.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBinaryColumn.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolColumn.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolColumn.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolColumn.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolColumn.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolValue.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolValue.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolValue.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TByteColumn.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TByteColumn.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TByteColumn.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TByteColumn.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TByteValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TByteValue.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TByteValue.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TByteValue.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIService.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIService.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIService.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIService.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIServiceConstants.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIServiceConstants.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIServiceConstants.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIServiceConstants.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenReq.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenReq.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenResp.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenResp.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationReq.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationReq.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationResp.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationResp.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationReq.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationReq.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationResp.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationResp.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionReq.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionReq.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionResp.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionResp.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TColumn.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumn.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TColumn.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumn.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnDesc.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnDesc.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnDesc.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnDesc.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnValue.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnValue.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnValue.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleColumn.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleColumn.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleColumn.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleColumn.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleValue.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleValue.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleValue.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementReq.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementReq.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementResp.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementResp.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchOrientation.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchOrientation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchOrientation.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchOrientation.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsReq.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsReq.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsResp.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsResp.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsReq.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsReq.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsResp.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsResp.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsReq.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsReq.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsResp.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsResp.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceReq.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceReq.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceResp.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceResp.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenReq.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenReq.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenResp.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenResp.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsReq.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsReq.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsResp.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsResp.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoReq.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoReq.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoResp.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoResp.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoType.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoType.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoType.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoType.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoValue.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoValue.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoValue.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusReq.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusReq.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusResp.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusResp.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysReq.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysReq.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysResp.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysResp.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataReq.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataReq.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataResp.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataResp.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasReq.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasReq.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasResp.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasResp.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesReq.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesReq.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesResp.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesResp.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesReq.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesReq.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesResp.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesResp.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoReq.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoReq.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoResp.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoResp.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/THandleIdentifier.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/THandleIdentifier.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/THandleIdentifier.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/THandleIdentifier.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Column.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Column.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Column.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Column.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Value.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Value.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Value.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Value.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Column.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Column.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Column.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Column.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Value.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Value.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Value.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Value.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Column.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Column.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Column.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Column.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Value.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Value.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Value.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Value.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TJobExecutionStatus.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TJobExecutionStatus.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TJobExecutionStatus.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TJobExecutionStatus.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TMapTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TMapTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TMapTypeEntry.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TMapTypeEntry.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionReq.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionReq.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionResp.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionResp.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationHandle.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationHandle.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationHandle.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationHandle.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationState.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationState.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationState.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationState.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationType.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationType.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationType.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationType.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TPrimitiveTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TPrimitiveTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TPrimitiveTypeEntry.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TPrimitiveTypeEntry.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TProgressUpdateResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TProgressUpdateResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TProgressUpdateResp.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TProgressUpdateResp.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TProtocolVersion.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TProtocolVersion.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TProtocolVersion.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TProtocolVersion.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenReq.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenReq.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenReq.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenResp.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenResp.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenResp.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TRow.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRow.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TRow.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRow.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TRowSet.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRowSet.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TRowSet.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRowSet.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TSessionHandle.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TSessionHandle.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TSessionHandle.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TSessionHandle.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStatus.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStatus.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStatus.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStatus.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStatusCode.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStatusCode.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStatusCode.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStatusCode.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStringColumn.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStringColumn.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStringColumn.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStringColumn.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStringValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStringValue.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStringValue.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStringValue.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStructTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStructTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TStructTypeEntry.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStructTypeEntry.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTableSchema.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTableSchema.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTableSchema.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTableSchema.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeDesc.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeDesc.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeDesc.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeDesc.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeEntry.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeEntry.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeId.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeId.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeId.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeId.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifierValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifierValue.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifierValue.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifierValue.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifiers.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifiers.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifiers.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifiers.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TUnionTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TUnionTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TUnionTypeEntry.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TUnionTypeEntry.java
diff --git a/sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TUserDefinedTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TUserDefinedTypeEntry.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/gen/java/org/apache/hive/service/rpc/thrift/TUserDefinedTypeEntry.java
rename to sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TUserDefinedTypeEntry.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/AbstractService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/AbstractService.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/AbstractService.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/AbstractService.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/CompositeService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/CompositeService.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/CompositeService.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/CompositeService.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/CookieSigner.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/CookieSigner.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/CookieSigner.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/CookieSigner.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/ServiceOperations.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/ServiceOperations.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/ServiceOperations.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/ServiceOperations.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/ServiceUtils.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/ServiceUtils.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/ServiceUtils.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/ServiceUtils.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/CLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/CLIService.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/ColumnValue.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnValue.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/ColumnValue.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnValue.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/FetchOrientation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/FetchOrientation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/FetchOrientation.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/FetchOrientation.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/GetInfoType.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoType.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/GetInfoType.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoType.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/GetInfoValue.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoValue.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/GetInfoValue.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoValue.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/Handle.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/Handle.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/Handle.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/Handle.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/HiveSQLException.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/HiveSQLException.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/HiveSQLException.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/HiveSQLException.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/ICLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ICLIService.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/ICLIService.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ICLIService.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/OperationHandle.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/OperationHandle.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/OperationHandle.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/OperationHandle.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/OperationState.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/OperationState.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/OperationState.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/OperationState.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/OperationType.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/OperationType.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/OperationType.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/OperationType.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/RowBasedSet.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/RowBasedSet.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/RowBasedSet.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/RowBasedSet.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/RowSet.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/RowSet.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/RowSet.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/RowSet.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/RowSetFactory.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/RowSetFactory.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/RowSetFactory.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/RowSetFactory.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/SessionHandle.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/SessionHandle.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/SessionHandle.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/SessionHandle.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/TableSchema.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/TableSchema.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/TableSchema.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/TableSchema.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetCrossReferenceOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetCrossReferenceOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetCrossReferenceOperation.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetCrossReferenceOperation.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/Operation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/Operation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/Operation.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/Operation.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSession.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSession.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSession.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSession.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContext.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContext.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContext.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContext.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContextImpl.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContextImpl.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContextImpl.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionHookContextImpl.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/SessionManager.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/SessionManager.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/session/SessionManager.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/SessionManager.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/server/HiveServer2.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/HiveServer2.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/server/HiveServer2.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/HiveServer2.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java
rename to sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java
diff --git a/sql/hive-thriftserver/v2.3/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala
similarity index 100%
rename from sql/hive-thriftserver/v2.3/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala
rename to sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala
diff --git a/sql/hive-thriftserver/v1.2/if/TCLIService.thrift b/sql/hive-thriftserver/v1.2/if/TCLIService.thrift
deleted file mode 100644
index 225e319737811..0000000000000
--- a/sql/hive-thriftserver/v1.2/if/TCLIService.thrift
+++ /dev/null
@@ -1,1173 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Coding Conventions for this file:
-//
-// Structs/Enums/Unions
-// * Struct, Enum, and Union names begin with a "T",
-//   and use a capital letter for each new word, with no underscores.
-// * All fields should be declared as either optional or required.
-//
-// Functions
-// * Function names start with a capital letter and have a capital letter for
-//   each new word, with no underscores.
-// * Each function should take exactly one parameter, named TFunctionNameReq,
-//   and should return either void or TFunctionNameResp. This convention allows
-//   incremental updates.
-//
-// Services
-// * Service names begin with the letter "T", use a capital letter for each
-//   new word (with no underscores), and end with the word "Service".
-
-namespace java org.apache.hive.service.cli.thrift
-namespace cpp apache.hive.service.cli.thrift
-
-// List of protocol versions. A new token should be
-// added to the end of this list every time a change is made.
-enum TProtocolVersion {
-  HIVE_CLI_SERVICE_PROTOCOL_V1,
-  
-  // V2 adds support for asynchronous execution
-  HIVE_CLI_SERVICE_PROTOCOL_V2
-
-  // V3 add varchar type, primitive type qualifiers
-  HIVE_CLI_SERVICE_PROTOCOL_V3
-
-  // V4 add decimal precision/scale, char type
-  HIVE_CLI_SERVICE_PROTOCOL_V4
-
-  // V5 adds error details when GetOperationStatus returns in error state
-  HIVE_CLI_SERVICE_PROTOCOL_V5
-
-  // V6 uses binary type for binary payload (was string) and uses columnar result set
-  HIVE_CLI_SERVICE_PROTOCOL_V6
-
-  // V7 adds support for delegation token based connection
-  HIVE_CLI_SERVICE_PROTOCOL_V7
-
-  // V8 adds support for interval types
-  HIVE_CLI_SERVICE_PROTOCOL_V8
-}
-
-enum TTypeId {
-  BOOLEAN_TYPE,
-  TINYINT_TYPE,
-  SMALLINT_TYPE,
-  INT_TYPE,
-  BIGINT_TYPE,
-  FLOAT_TYPE,
-  DOUBLE_TYPE,
-  STRING_TYPE,
-  TIMESTAMP_TYPE,
-  BINARY_TYPE,
-  ARRAY_TYPE,
-  MAP_TYPE,
-  STRUCT_TYPE,
-  UNION_TYPE,
-  USER_DEFINED_TYPE,
-  DECIMAL_TYPE,
-  NULL_TYPE,
-  DATE_TYPE,
-  VARCHAR_TYPE,
-  CHAR_TYPE,
-  INTERVAL_YEAR_MONTH_TYPE,
-  INTERVAL_DAY_TIME_TYPE
-}
-  
-const set<TTypeId> PRIMITIVE_TYPES = [
-  TTypeId.BOOLEAN_TYPE,
-  TTypeId.TINYINT_TYPE,
-  TTypeId.SMALLINT_TYPE,
-  TTypeId.INT_TYPE,
-  TTypeId.BIGINT_TYPE,
-  TTypeId.FLOAT_TYPE,
-  TTypeId.DOUBLE_TYPE,
-  TTypeId.STRING_TYPE,
-  TTypeId.TIMESTAMP_TYPE,
-  TTypeId.BINARY_TYPE,
-  TTypeId.DECIMAL_TYPE,
-  TTypeId.NULL_TYPE,
-  TTypeId.DATE_TYPE,
-  TTypeId.VARCHAR_TYPE,
-  TTypeId.CHAR_TYPE,
-  TTypeId.INTERVAL_YEAR_MONTH_TYPE,
-  TTypeId.INTERVAL_DAY_TIME_TYPE
-]
-
-const set<TTypeId> COMPLEX_TYPES = [
-  TTypeId.ARRAY_TYPE
-  TTypeId.MAP_TYPE
-  TTypeId.STRUCT_TYPE
-  TTypeId.UNION_TYPE
-  TTypeId.USER_DEFINED_TYPE
-]
-
-const set<TTypeId> COLLECTION_TYPES = [
-  TTypeId.ARRAY_TYPE
-  TTypeId.MAP_TYPE
-]
-
-const map<TTypeId,string> TYPE_NAMES = {
-  TTypeId.BOOLEAN_TYPE: "BOOLEAN",
-  TTypeId.TINYINT_TYPE: "TINYINT",
-  TTypeId.SMALLINT_TYPE: "SMALLINT",
-  TTypeId.INT_TYPE: "INT",
-  TTypeId.BIGINT_TYPE: "BIGINT",
-  TTypeId.FLOAT_TYPE: "FLOAT",
-  TTypeId.DOUBLE_TYPE: "DOUBLE",
-  TTypeId.STRING_TYPE: "STRING",
-  TTypeId.TIMESTAMP_TYPE: "TIMESTAMP",
-  TTypeId.BINARY_TYPE: "BINARY",
-  TTypeId.ARRAY_TYPE: "ARRAY",
-  TTypeId.MAP_TYPE: "MAP",
-  TTypeId.STRUCT_TYPE: "STRUCT",
-  TTypeId.UNION_TYPE: "UNIONTYPE",
-  TTypeId.DECIMAL_TYPE: "DECIMAL",
-  TTypeId.NULL_TYPE: "NULL"
-  TTypeId.DATE_TYPE: "DATE"
-  TTypeId.VARCHAR_TYPE: "VARCHAR"
-  TTypeId.CHAR_TYPE: "CHAR"
-  TTypeId.INTERVAL_YEAR_MONTH_TYPE: "INTERVAL_YEAR_MONTH"
-  TTypeId.INTERVAL_DAY_TIME_TYPE: "INTERVAL_DAY_TIME"
-}
-
-// Thrift does not support recursively defined types or forward declarations,
-// which makes it difficult to represent Hive's nested types.
-// To get around these limitations TTypeDesc employs a type list that maps
-// integer "pointers" to TTypeEntry objects. The following examples show
-// how different types are represented using this scheme:
-//
-// "INT":
-// TTypeDesc {
-//   types = [
-//     TTypeEntry.primitive_entry {
-//       type = INT_TYPE
-//     }
-//   ]
-// }
-//
-// "ARRAY<INT>":
-// TTypeDesc {
-//   types = [
-//     TTypeEntry.array_entry {
-//       object_type_ptr = 1
-//     },
-//     TTypeEntry.primitive_entry {
-//       type = INT_TYPE
-//     }
-//   ]
-// }
-//
-// "MAP<INT,STRING>":
-// TTypeDesc {
-//   types = [
-//     TTypeEntry.map_entry {
-//       key_type_ptr = 1
-//       value_type_ptr = 2
-//     },
-//     TTypeEntry.primitive_entry {
-//       type = INT_TYPE
-//     },
-//     TTypeEntry.primitive_entry {
-//       type = STRING_TYPE
-//     }
-//   ]
-// }
-
-typedef i32 TTypeEntryPtr
-
-// Valid TTypeQualifiers key names
-const string CHARACTER_MAXIMUM_LENGTH = "characterMaximumLength"
-
-// Type qualifier key name for decimal
-const string PRECISION = "precision"
-const string SCALE = "scale"
-
-union TTypeQualifierValue {
-  1: optional i32 i32Value
-  2: optional string stringValue
-}
-
-// Type qualifiers for primitive type.
-struct TTypeQualifiers {
-  1: required map <string, TTypeQualifierValue> qualifiers
-}
-
-// Type entry for a primitive type.
-struct TPrimitiveTypeEntry {
-  // The primitive type token. This must satisfy the condition
-  // that type is in the PRIMITIVE_TYPES set.
-  1: required TTypeId type
-  2: optional TTypeQualifiers typeQualifiers
-}
-
-// Type entry for an ARRAY type.
-struct TArrayTypeEntry {
-  1: required TTypeEntryPtr objectTypePtr
-}
-
-// Type entry for a MAP type.
-struct TMapTypeEntry {
-  1: required TTypeEntryPtr keyTypePtr
-  2: required TTypeEntryPtr valueTypePtr
-}
-
-// Type entry for a STRUCT type.
-struct TStructTypeEntry {
-  1: required map<string, TTypeEntryPtr> nameToTypePtr
-}
-
-// Type entry for a UNIONTYPE type.
-struct TUnionTypeEntry {
-  1: required map<string, TTypeEntryPtr> nameToTypePtr
-}
-
-struct TUserDefinedTypeEntry {
-  // The fully qualified name of the class implementing this type.
-  1: required string typeClassName
-}
-
-// We use a union here since Thrift does not support inheritance.
-union TTypeEntry {
-  1: TPrimitiveTypeEntry primitiveEntry
-  2: TArrayTypeEntry arrayEntry
-  3: TMapTypeEntry mapEntry
-  4: TStructTypeEntry structEntry
-  5: TUnionTypeEntry unionEntry
-  6: TUserDefinedTypeEntry userDefinedTypeEntry
-}
-
-// Type descriptor for columns.
-struct TTypeDesc {
-  // The "top" type is always the first element of the list.
-  // If the top type is an ARRAY, MAP, STRUCT, or UNIONTYPE
-  // type, then subsequent elements represent nested types.
-  1: required list<TTypeEntry> types
-}
-
-// A result set column descriptor.
-struct TColumnDesc {
-  // The name of the column
-  1: required string columnName
-
-  // The type descriptor for this column
-  2: required TTypeDesc typeDesc
-  
-  // The ordinal position of this column in the schema
-  3: required i32 position
-
-  4: optional string comment
-}
-
-// Metadata used to describe the schema (column names, types, comments)
-// of result sets.
-struct TTableSchema {
-  1: required list<TColumnDesc> columns
-}
-
-// A Boolean column value.
-struct TBoolValue {
-  // NULL if value is unset.
-  1: optional bool value
-}
-
-// A Byte column value.
-struct TByteValue {
-  // NULL if value is unset.
-  1: optional byte value
-}
-
-// A signed, 16 bit column value.
-struct TI16Value {
-  // NULL if value is unset
-  1: optional i16 value
-}
-
-// A signed, 32 bit column value
-struct TI32Value {
-  // NULL if value is unset
-  1: optional i32 value
-}
-
-// A signed 64 bit column value
-struct TI64Value {
-  // NULL if value is unset
-  1: optional i64 value
-}
-
-// A floating point 64 bit column value
-struct TDoubleValue {
-  // NULL if value is unset
-  1: optional double value
-}
-
-struct TStringValue {
-  // NULL if value is unset
-  1: optional string value
-}
-
-// A single column value in a result set.
-// Note that Hive's type system is richer than Thrift's,
-// so in some cases we have to map multiple Hive types
-// to the same Thrift type. On the client-side this is
-// disambiguated by looking at the Schema of the
-// result set.
-union TColumnValue {
-  1: TBoolValue   boolVal      // BOOLEAN
-  2: TByteValue   byteVal      // TINYINT
-  3: TI16Value    i16Val       // SMALLINT
-  4: TI32Value    i32Val       // INT
-  5: TI64Value    i64Val       // BIGINT, TIMESTAMP
-  6: TDoubleValue doubleVal    // FLOAT, DOUBLE
-  7: TStringValue stringVal    // STRING, LIST, MAP, STRUCT, UNIONTYPE, BINARY, DECIMAL, NULL, INTERVAL_YEAR_MONTH, INTERVAL_DAY_TIME
-}
-
-// Represents a row in a rowset.
-struct TRow {
-  1: required list<TColumnValue> colVals
-}
-
-struct TBoolColumn {
-  1: required list<bool> values
-  2: required binary nulls
-}
-
-struct TByteColumn {
-  1: required list<byte> values
-  2: required binary nulls
-}
-
-struct TI16Column {
-  1: required list<i16> values
-  2: required binary nulls
-}
-
-struct TI32Column {
-  1: required list<i32> values
-  2: required binary nulls
-}
-
-struct TI64Column {
-  1: required list<i64> values
-  2: required binary nulls
-}
-
-struct TDoubleColumn {
-  1: required list<double> values
-  2: required binary nulls
-}
-
-struct TStringColumn {
-  1: required list<string> values
-  2: required binary nulls
-}
-
-struct TBinaryColumn {
-  1: required list<binary> values
-  2: required binary nulls
-}
-
-// Note that Hive's type system is richer than Thrift's,
-// so in some cases we have to map multiple Hive types
-// to the same Thrift type. On the client-side this is
-// disambiguated by looking at the Schema of the
-// result set.
-union TColumn {
-  1: TBoolColumn   boolVal      // BOOLEAN
-  2: TByteColumn   byteVal      // TINYINT
-  3: TI16Column    i16Val       // SMALLINT
-  4: TI32Column    i32Val       // INT
-  5: TI64Column    i64Val       // BIGINT, TIMESTAMP
-  6: TDoubleColumn doubleVal    // FLOAT, DOUBLE
-  7: TStringColumn stringVal    // STRING, LIST, MAP, STRUCT, UNIONTYPE, DECIMAL, NULL
-  8: TBinaryColumn binaryVal    // BINARY
-}
-
-// Represents a rowset
-struct TRowSet {
-  // The starting row offset of this rowset.
-  1: required i64 startRowOffset
-  2: required list<TRow> rows
-  3: optional list<TColumn> columns
-}
-
-// The return status code contained in each response.
-enum TStatusCode {
-  SUCCESS_STATUS,
-  SUCCESS_WITH_INFO_STATUS,
-  STILL_EXECUTING_STATUS,
-  ERROR_STATUS,
-  INVALID_HANDLE_STATUS
-}
-
-// The return status of a remote request
-struct TStatus {
-  1: required TStatusCode statusCode
-
-  // If status is SUCCESS_WITH_INFO, info_msgs may be populated with
-  // additional diagnostic information.
-  2: optional list<string> infoMessages
-
-  // If status is ERROR, then the following fields may be set
-  3: optional string sqlState  // as defined in the ISO/IEF CLI specification
-  4: optional i32 errorCode    // internal error code
-  5: optional string errorMessage
-}
-
-// The state of an operation (i.e. a query or other
-// asynchronous operation that generates a result set)
-// on the server.
-enum TOperationState {
-  // The operation has been initialized
-  INITIALIZED_STATE,
-
-  // The operation is running. In this state the result
-  // set is not available.
-  RUNNING_STATE,
-
-  // The operation has completed. When an operation is in
-  // this state its result set may be fetched.
-  FINISHED_STATE,
-
-  // The operation was canceled by a client
-  CANCELED_STATE,
-
-  // The operation was closed by a client
-  CLOSED_STATE,
-
-  // The operation failed due to an error
-  ERROR_STATE,
-
-  // The operation is in an unrecognized state
-  UKNOWN_STATE,
-
-  // The operation is in an pending state
-  PENDING_STATE,
-}
-
-// A string identifier. This is interpreted literally.
-typedef string TIdentifier
-
-// A search pattern.
-//
-// Valid search pattern characters:
-// '_': Any single character.
-// '%': Any sequence of zero or more characters.
-// '\': Escape character used to include special characters,
-//      e.g. '_', '%', '\'. If a '\' precedes a non-special
-//      character it has no special meaning and is interpreted
-//      literally.
-typedef string TPattern
-
-
-// A search pattern or identifier. Used as input
-// parameter for many of the catalog functions.
-typedef string TPatternOrIdentifier
-
-struct THandleIdentifier {
-  // 16 byte globally unique identifier
-  // This is the public ID of the handle and
-  // can be used for reporting.
-  1: required binary guid,
-
-  // 16 byte secret generated by the server
-  // and used to verify that the handle is not
-  // being hijacked by another user.
-  2: required binary secret,
-}
-
-// Client-side handle to persistent
-// session information on the server-side.
-struct TSessionHandle {
-  1: required THandleIdentifier sessionId
-}
-
-// The subtype of an OperationHandle.
-enum TOperationType {
-  EXECUTE_STATEMENT,
-  GET_TYPE_INFO,
-  GET_CATALOGS,
-  GET_SCHEMAS,
-  GET_TABLES,
-  GET_TABLE_TYPES,
-  GET_COLUMNS,
-  GET_FUNCTIONS,
-  UNKNOWN,
-}
-
-// Client-side reference to a task running
-// asynchronously on the server.
-struct TOperationHandle {
-  1: required THandleIdentifier operationId
-  2: required TOperationType operationType
-
-  // If hasResultSet = TRUE, then this operation
-  // generates a result set that can be fetched.
-  // Note that the result set may be empty.
-  //
-  // If hasResultSet = FALSE, then this operation
-  // does not generate a result set, and calling
-  // GetResultSetMetadata or FetchResults against
-  // this OperationHandle will generate an error.
-  3: required bool hasResultSet
-
-  // For operations that don't generate result sets,
-  // modifiedRowCount is either:
-  //
-  // 1) The number of rows that were modified by
-  //    the DML operation (e.g. number of rows inserted,
-  //    number of rows deleted, etc).
-  //
-  // 2) 0 for operations that don't modify or add rows.
-  //
-  // 3) < 0 if the operation is capable of modifiying rows,
-  //    but Hive is unable to determine how many rows were
-  //    modified. For example, Hive's LOAD DATA command
-  //    doesn't generate row count information because
-  //    Hive doesn't inspect the data as it is loaded.
-  //
-  // modifiedRowCount is unset if the operation generates
-  // a result set.
-  4: optional double modifiedRowCount
-}
-
-
-// OpenSession()
-//
-// Open a session (connection) on the server against
-// which operations may be executed.
-struct TOpenSessionReq {
-  // The version of the HiveServer2 protocol that the client is using.
-  1: required TProtocolVersion client_protocol = TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V8
-
-  // Username and password for authentication.
-  // Depending on the authentication scheme being used,
-  // this information may instead be provided by a lower
-  // protocol layer, in which case these fields may be
-  // left unset.
-  2: optional string username
-  3: optional string password
-
-  // Configuration overlay which is applied when the session is
-  // first created.
-  4: optional map<string, string> configuration
-}
-
-struct TOpenSessionResp {
-  1: required TStatus status
-
-  // The protocol version that the server is using.
-  2: required TProtocolVersion serverProtocolVersion = TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V8
-
-  // Session Handle
-  3: optional TSessionHandle sessionHandle
-
-  // The configuration settings for this session.
-  4: optional map<string, string> configuration
-}
-
-
-// CloseSession()
-//
-// Closes the specified session and frees any resources
-// currently allocated to that session. Any open
-// operations in that session will be canceled.
-struct TCloseSessionReq {
-  1: required TSessionHandle sessionHandle
-}
-
-struct TCloseSessionResp {
-  1: required TStatus status
-}
-
-
-
-enum TGetInfoType {
-  CLI_MAX_DRIVER_CONNECTIONS =           0,
-  CLI_MAX_CONCURRENT_ACTIVITIES =        1,
-  CLI_DATA_SOURCE_NAME =                 2,
-  CLI_FETCH_DIRECTION =                  8,
-  CLI_SERVER_NAME =                      13,
-  CLI_SEARCH_PATTERN_ESCAPE =            14,
-  CLI_DBMS_NAME =                        17,
-  CLI_DBMS_VER =                         18,
-  CLI_ACCESSIBLE_TABLES =                19,
-  CLI_ACCESSIBLE_PROCEDURES =            20,
-  CLI_CURSOR_COMMIT_BEHAVIOR =           23,
-  CLI_DATA_SOURCE_READ_ONLY =            25,
-  CLI_DEFAULT_TXN_ISOLATION =            26,
-  CLI_IDENTIFIER_CASE =                  28,
-  CLI_IDENTIFIER_QUOTE_CHAR =            29,
-  CLI_MAX_COLUMN_NAME_LEN =              30,
-  CLI_MAX_CURSOR_NAME_LEN =              31,
-  CLI_MAX_SCHEMA_NAME_LEN =              32,
-  CLI_MAX_CATALOG_NAME_LEN =             34,
-  CLI_MAX_TABLE_NAME_LEN =               35,
-  CLI_SCROLL_CONCURRENCY =               43,
-  CLI_TXN_CAPABLE =                      46,
-  CLI_USER_NAME =                        47,
-  CLI_TXN_ISOLATION_OPTION =             72,
-  CLI_INTEGRITY =                        73,
-  CLI_GETDATA_EXTENSIONS =               81,
-  CLI_NULL_COLLATION =                   85,
-  CLI_ALTER_TABLE =                      86,
-  CLI_ORDER_BY_COLUMNS_IN_SELECT =       90,
-  CLI_SPECIAL_CHARACTERS =               94,
-  CLI_MAX_COLUMNS_IN_GROUP_BY =          97,
-  CLI_MAX_COLUMNS_IN_INDEX =             98,
-  CLI_MAX_COLUMNS_IN_ORDER_BY =          99,
-  CLI_MAX_COLUMNS_IN_SELECT =            100,
-  CLI_MAX_COLUMNS_IN_TABLE =             101,
-  CLI_MAX_INDEX_SIZE =                   102,
-  CLI_MAX_ROW_SIZE =                     104,
-  CLI_MAX_STATEMENT_LEN =                105,
-  CLI_MAX_TABLES_IN_SELECT =             106,
-  CLI_MAX_USER_NAME_LEN =                107,
-  CLI_OJ_CAPABILITIES =                  115,
-
-  CLI_XOPEN_CLI_YEAR =                   10000,
-  CLI_CURSOR_SENSITIVITY =               10001,
-  CLI_DESCRIBE_PARAMETER =               10002,
-  CLI_CATALOG_NAME =                     10003,
-  CLI_COLLATION_SEQ =                    10004,
-  CLI_MAX_IDENTIFIER_LEN =               10005,
-}
-
-union TGetInfoValue {
-  1: string stringValue
-  2: i16 smallIntValue
-  3: i32 integerBitmask
-  4: i32 integerFlag
-  5: i32 binaryValue
-  6: i64 lenValue
-}
-
-// GetInfo()
-//
-// This function is based on ODBC's CLIGetInfo() function.
-// The function returns general information about the data source
-// using the same keys as ODBC.
-struct TGetInfoReq {
-  // The session to run this request against
-  1: required TSessionHandle sessionHandle
-
-  2: required TGetInfoType infoType
-}
-
-struct TGetInfoResp {
-  1: required TStatus status
-
-  2: required TGetInfoValue infoValue
-}
-
-
-// ExecuteStatement()
-//
-// Execute a statement.
-// The returned OperationHandle can be used to check on the
-// status of the statement, and to fetch results once the
-// statement has finished executing.
-struct TExecuteStatementReq {
-  // The session to execute the statement against
-  1: required TSessionHandle sessionHandle
-
-  // The statement to be executed (DML, DDL, SET, etc)
-  2: required string statement
-
-  // Configuration properties that are overlayed on top of the
-  // the existing session configuration before this statement
-  // is executed. These properties apply to this statement
-  // only and will not affect the subsequent state of the Session.
-  3: optional map<string, string> confOverlay
-  
-  // Execute asynchronously when runAsync is true
-  4: optional bool runAsync = false
-}
-
-struct TExecuteStatementResp {
-  1: required TStatus status
-  2: optional TOperationHandle operationHandle
-}
-
-// GetTypeInfo()
-//
-// Get information about types supported by the HiveServer instance.
-// The information is returned as a result set which can be fetched
-// using the OperationHandle provided in the response.
-//
-// Refer to the documentation for ODBC's CLIGetTypeInfo function for
-// the format of the result set.
-struct TGetTypeInfoReq {
-  // The session to run this request against.
-  1: required TSessionHandle sessionHandle
-}
-
-struct TGetTypeInfoResp {
-  1: required TStatus status
-  2: optional TOperationHandle operationHandle
-}  
-
-
-// GetCatalogs()
-//
-// Returns the list of catalogs (databases) 
-// Results are ordered by TABLE_CATALOG 
-//
-// Resultset columns :
-// col1
-// name: TABLE_CAT
-// type: STRING
-// desc: Catalog name. NULL if not applicable.
-//
-struct TGetCatalogsReq {
-  // Session to run this request against
-  1: required TSessionHandle sessionHandle
-}
-
-struct TGetCatalogsResp {
-  1: required TStatus status
-  2: optional TOperationHandle operationHandle
-}
-
-
-// GetSchemas()
-//
-// Retrieves the schema names available in this database. 
-// The results are ordered by TABLE_CATALOG and TABLE_SCHEM.
-// col1
-// name: TABLE_SCHEM
-// type: STRING
-// desc: schema name
-// col2
-// name: TABLE_CATALOG
-// type: STRING
-// desc: catalog name
-struct TGetSchemasReq {
-  // Session to run this request against
-  1: required TSessionHandle sessionHandle
-
-  // Name of the catalog. Must not contain a search pattern.
-  2: optional TIdentifier catalogName
-
-  // schema name or pattern
-  3: optional TPatternOrIdentifier schemaName
-}
-
-struct TGetSchemasResp {
-  1: required TStatus status
-  2: optional TOperationHandle operationHandle
-}
-
-
-// GetTables()
-//
-// Returns a list of tables with catalog, schema, and table
-// type information. The information is returned as a result
-// set which can be fetched using the OperationHandle
-// provided in the response.
-// Results are ordered by TABLE_TYPE, TABLE_CAT, TABLE_SCHEM, and TABLE_NAME
-//
-// Result Set Columns:
-//
-// col1
-// name: TABLE_CAT
-// type: STRING
-// desc: Catalog name. NULL if not applicable.
-//
-// col2
-// name: TABLE_SCHEM
-// type: STRING
-// desc: Schema name.
-//
-// col3
-// name: TABLE_NAME
-// type: STRING
-// desc: Table name.
-//
-// col4
-// name: TABLE_TYPE
-// type: STRING
-// desc: The table type, e.g. "TABLE", "VIEW", etc.
-//
-// col5
-// name: REMARKS
-// type: STRING
-// desc: Comments about the table
-//
-struct TGetTablesReq {
-  // Session to run this request against
-  1: required TSessionHandle sessionHandle
-
-  // Name of the catalog or a search pattern.
-  2: optional TPatternOrIdentifier catalogName
-
-  // Name of the schema or a search pattern.
-  3: optional TPatternOrIdentifier schemaName
-
-  // Name of the table or a search pattern.
-  4: optional TPatternOrIdentifier tableName
-
-  // List of table types to match
-  // e.g. "TABLE", "VIEW", "SYSTEM TABLE", "GLOBAL TEMPORARY",
-  // "LOCAL TEMPORARY", "ALIAS", "SYNONYM", etc.
-  5: optional list<string> tableTypes
-}
-
-struct TGetTablesResp {
-  1: required TStatus status
-  2: optional TOperationHandle operationHandle
-}
-
-
-// GetTableTypes()
-//
-// Returns the table types available in this database. 
-// The results are ordered by table type. 
-// 
-// col1
-// name: TABLE_TYPE
-// type: STRING
-// desc: Table type name.
-struct TGetTableTypesReq {
-  // Session to run this request against
-  1: required TSessionHandle sessionHandle
-}
-
-struct TGetTableTypesResp {
-  1: required TStatus status
-  2: optional TOperationHandle operationHandle
-}
-
-
-// GetColumns()
-//
-// Returns a list of columns in the specified tables.
-// The information is returned as a result set which can be fetched
-// using the OperationHandle provided in the response.
-// Results are ordered by TABLE_CAT, TABLE_SCHEM, TABLE_NAME, 
-// and ORDINAL_POSITION. 
-//
-// Result Set Columns are the same as those for the ODBC CLIColumns
-// function.
-//
-struct TGetColumnsReq {
-  // Session to run this request against
-  1: required TSessionHandle sessionHandle
-
-  // Name of the catalog. Must not contain a search pattern.
-  2: optional TIdentifier catalogName
-
-  // Schema name or search pattern
-  3: optional TPatternOrIdentifier schemaName
-
-  // Table name or search pattern
-  4: optional TPatternOrIdentifier tableName
-
-  // Column name or search pattern
-  5: optional TPatternOrIdentifier columnName
-}
-
-struct TGetColumnsResp {
-  1: required TStatus status
-  2: optional TOperationHandle operationHandle
-}
-
-
-// GetFunctions()
-//
-// Returns a list of functions supported by the data source. The
-// behavior of this function matches
-// java.sql.DatabaseMetaData.getFunctions() both in terms of
-// inputs and outputs.
-//
-// Result Set Columns:
-//
-// col1
-// name: FUNCTION_CAT
-// type: STRING
-// desc: Function catalog (may be null)
-//
-// col2
-// name: FUNCTION_SCHEM
-// type: STRING
-// desc: Function schema (may be null)
-//
-// col3
-// name: FUNCTION_NAME
-// type: STRING
-// desc: Function name. This is the name used to invoke the function.
-//
-// col4
-// name: REMARKS
-// type: STRING
-// desc: Explanatory comment on the function.
-//
-// col5
-// name: FUNCTION_TYPE
-// type: SMALLINT
-// desc: Kind of function. One of:
-//       * functionResultUnknown - Cannot determine if a return value or a table
-//                                 will be returned.
-//       * functionNoTable       - Does not a return a table.
-//       * functionReturnsTable  - Returns a table.
-//
-// col6
-// name: SPECIFIC_NAME
-// type: STRING
-// desc: The name which uniquely identifies this function within its schema.
-//       In this case this is the fully qualified class name of the class
-//       that implements this function.
-//
-struct TGetFunctionsReq {
-  // Session to run this request against
-  1: required TSessionHandle sessionHandle
-
-  // A catalog name; must match the catalog name as it is stored in the
-  // database; "" retrieves those without a catalog; null means
-  // that the catalog name should not be used to narrow the search.
-  2: optional TIdentifier catalogName
-
-  // A schema name pattern; must match the schema name as it is stored
-  // in the database; "" retrieves those without a schema; null means
-  // that the schema name should not be used to narrow the search.
-  3: optional TPatternOrIdentifier schemaName
-
-  // A function name pattern; must match the function name as it is stored
-  // in the database.
-  4: required TPatternOrIdentifier functionName
-}
-
-struct TGetFunctionsResp {
-  1: required TStatus status
-  2: optional TOperationHandle operationHandle
-}
-  
-
-// GetOperationStatus()
-//
-// Get the status of an operation running on the server.
-struct TGetOperationStatusReq {
-  // Session to run this request against
-  1: required TOperationHandle operationHandle
-}
-
-struct TGetOperationStatusResp {
-  1: required TStatus status
-  2: optional TOperationState operationState
-
-  // If operationState is ERROR_STATE, then the following fields may be set
-  // sqlState as defined in the ISO/IEF CLI specification
-  3: optional string sqlState
-
-  // Internal error code
-  4: optional i32 errorCode
-
-  // Error message
-  5: optional string errorMessage
-}
-
-
-// CancelOperation()
-//
-// Cancels processing on the specified operation handle and
-// frees any resources which were allocated.
-struct TCancelOperationReq {
-  // Operation to cancel
-  1: required TOperationHandle operationHandle
-}
-
-struct TCancelOperationResp {
-  1: required TStatus status
-}
-
-
-// CloseOperation()
-//
-// Given an operation in the FINISHED, CANCELED,
-// or ERROR states, CloseOperation() will free
-// all of the resources which were allocated on
-// the server to service the operation.
-struct TCloseOperationReq {
-  1: required TOperationHandle operationHandle
-}
-
-struct TCloseOperationResp {
-  1: required TStatus status
-}
-
-
-// GetResultSetMetadata()
-//
-// Retrieves schema information for the specified operation
-struct TGetResultSetMetadataReq {
-  // Operation for which to fetch result set schema information
-  1: required TOperationHandle operationHandle
-}
-
-struct TGetResultSetMetadataResp {
-  1: required TStatus status
-  2: optional TTableSchema schema
-}
-
-
-enum TFetchOrientation {
-  // Get the next rowset. The fetch offset is ignored.
-  FETCH_NEXT,
-
-  // Get the previous rowset. The fetch offset is ignored.
-  FETCH_PRIOR,
-
-  // Return the rowset at the given fetch offset relative
-  // to the current rowset.
-  // NOT SUPPORTED
-  FETCH_RELATIVE,
-
-  // Return the rowset at the specified fetch offset.
-  // NOT SUPPORTED
-  FETCH_ABSOLUTE,
-
-  // Get the first rowset in the result set.
-  FETCH_FIRST,
-
-  // Get the last rowset in the result set.
-  // NOT SUPPORTED
-  FETCH_LAST
-}
-
-// FetchResults()
-//
-// Fetch rows from the server corresponding to
-// a particular OperationHandle.
-struct TFetchResultsReq {
-  // Operation from which to fetch results.
-  1: required TOperationHandle operationHandle
-
-  // The fetch orientation. This must be either
-  // FETCH_NEXT, FETCH_PRIOR or FETCH_FIRST. Defaults to FETCH_NEXT.
-  2: required TFetchOrientation orientation = TFetchOrientation.FETCH_NEXT
-  
-  // Max number of rows that should be returned in
-  // the rowset.
-  3: required i64 maxRows
-
-  // The type of a fetch results request. 0 represents Query output. 1 represents Log
-  4: optional i16 fetchType = 0
-}
-
-struct TFetchResultsResp {
-  1: required TStatus status
-
-  // TRUE if there are more rows left to fetch from the server.
-  2: optional bool hasMoreRows
-
-  // The rowset. This is optional so that we have the
-  // option in the future of adding alternate formats for
-  // representing result set data, e.g. delimited strings,
-  // binary encoded, etc.
-  3: optional TRowSet results
-}
-
-// GetDelegationToken()
-// Retrieve delegation token for the current user
-struct  TGetDelegationTokenReq {
-  // session handle
-  1: required TSessionHandle sessionHandle
-
-  // userid for the proxy user
-  2: required string owner
-
-  // designated renewer userid
-  3: required string renewer
-}
-
-struct TGetDelegationTokenResp {
-  // status of the request
-  1: required TStatus status
-
-  // delegation token string
-  2: optional string delegationToken
-}
-
-// CancelDelegationToken()
-// Cancel the given delegation token
-struct TCancelDelegationTokenReq {
-  // session handle
-  1: required TSessionHandle sessionHandle
-
-  // delegation token to cancel
-  2: required string delegationToken
-}
-
-struct TCancelDelegationTokenResp {
-  // status of the request
-  1: required TStatus status
-}
-
-// RenewDelegationToken()
-// Renew the given delegation token
-struct TRenewDelegationTokenReq {
-  // session handle
-  1: required TSessionHandle sessionHandle
-
-  // delegation token to renew
-  2: required string delegationToken
-}
-
-struct TRenewDelegationTokenResp {
-  // status of the request
-  1: required TStatus status
-}
-
-service TCLIService {
-
-  TOpenSessionResp OpenSession(1:TOpenSessionReq req);
-
-  TCloseSessionResp CloseSession(1:TCloseSessionReq req);
-
-  TGetInfoResp GetInfo(1:TGetInfoReq req);
-
-  TExecuteStatementResp ExecuteStatement(1:TExecuteStatementReq req);
-
-  TGetTypeInfoResp GetTypeInfo(1:TGetTypeInfoReq req);
-
-  TGetCatalogsResp GetCatalogs(1:TGetCatalogsReq req);
-
-  TGetSchemasResp GetSchemas(1:TGetSchemasReq req);
-
-  TGetTablesResp GetTables(1:TGetTablesReq req);
-
-  TGetTableTypesResp GetTableTypes(1:TGetTableTypesReq req);
-
-  TGetColumnsResp GetColumns(1:TGetColumnsReq req);
-
-  TGetFunctionsResp GetFunctions(1:TGetFunctionsReq req);
-
-  TGetOperationStatusResp GetOperationStatus(1:TGetOperationStatusReq req);
-  
-  TCancelOperationResp CancelOperation(1:TCancelOperationReq req);
-
-  TCloseOperationResp CloseOperation(1:TCloseOperationReq req);
-
-  TGetResultSetMetadataResp GetResultSetMetadata(1:TGetResultSetMetadataReq req);
-
-  TFetchResultsResp FetchResults(1:TFetchResultsReq req);
-
-  TGetDelegationTokenResp GetDelegationToken(1:TGetDelegationTokenReq req);
-
-  TCancelDelegationTokenResp CancelDelegationToken(1:TCancelDelegationTokenReq req);
-
-  TRenewDelegationTokenResp RenewDelegationToken(1:TRenewDelegationTokenReq req);
-}
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TArrayTypeEntry.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TArrayTypeEntry.java
deleted file mode 100644
index 6323d34eac734..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TArrayTypeEntry.java
+++ /dev/null
@@ -1,383 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TArrayTypeEntry implements org.apache.thrift.TBase<TArrayTypeEntry, TArrayTypeEntry._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TArrayTypeEntry");
-
-  private static final org.apache.thrift.protocol.TField OBJECT_TYPE_PTR_FIELD_DESC = new org.apache.thrift.protocol.TField("objectTypePtr", org.apache.thrift.protocol.TType.I32, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TArrayTypeEntryStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TArrayTypeEntryTupleSchemeFactory());
-  }
-
-  private int objectTypePtr; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    OBJECT_TYPE_PTR((short)1, "objectTypePtr");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // OBJECT_TYPE_PTR
-          return OBJECT_TYPE_PTR;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __OBJECTTYPEPTR_ISSET_ID = 0;
-  private byte __isset_bitfield = 0;
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.OBJECT_TYPE_PTR, new org.apache.thrift.meta_data.FieldMetaData("objectTypePtr", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32        , "TTypeEntryPtr")));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TArrayTypeEntry.class, metaDataMap);
-  }
-
-  public TArrayTypeEntry() {
-  }
-
-  public TArrayTypeEntry(
-    int objectTypePtr)
-  {
-    this();
-    this.objectTypePtr = objectTypePtr;
-    setObjectTypePtrIsSet(true);
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TArrayTypeEntry(TArrayTypeEntry other) {
-    __isset_bitfield = other.__isset_bitfield;
-    this.objectTypePtr = other.objectTypePtr;
-  }
-
-  public TArrayTypeEntry deepCopy() {
-    return new TArrayTypeEntry(this);
-  }
-
-  @Override
-  public void clear() {
-    setObjectTypePtrIsSet(false);
-    this.objectTypePtr = 0;
-  }
-
-  public int getObjectTypePtr() {
-    return this.objectTypePtr;
-  }
-
-  public void setObjectTypePtr(int objectTypePtr) {
-    this.objectTypePtr = objectTypePtr;
-    setObjectTypePtrIsSet(true);
-  }
-
-  public void unsetObjectTypePtr() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __OBJECTTYPEPTR_ISSET_ID);
-  }
-
-  /** Returns true if field objectTypePtr is set (has been assigned a value) and false otherwise */
-  public boolean isSetObjectTypePtr() {
-    return EncodingUtils.testBit(__isset_bitfield, __OBJECTTYPEPTR_ISSET_ID);
-  }
-
-  public void setObjectTypePtrIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __OBJECTTYPEPTR_ISSET_ID, value);
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case OBJECT_TYPE_PTR:
-      if (value == null) {
-        unsetObjectTypePtr();
-      } else {
-        setObjectTypePtr((Integer)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case OBJECT_TYPE_PTR:
-      return Integer.valueOf(getObjectTypePtr());
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case OBJECT_TYPE_PTR:
-      return isSetObjectTypePtr();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TArrayTypeEntry)
-      return this.equals((TArrayTypeEntry)that);
-    return false;
-  }
-
-  public boolean equals(TArrayTypeEntry that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_objectTypePtr = true;
-    boolean that_present_objectTypePtr = true;
-    if (this_present_objectTypePtr || that_present_objectTypePtr) {
-      if (!(this_present_objectTypePtr && that_present_objectTypePtr))
-        return false;
-      if (this.objectTypePtr != that.objectTypePtr)
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_objectTypePtr = true;
-    builder.append(present_objectTypePtr);
-    if (present_objectTypePtr)
-      builder.append(objectTypePtr);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TArrayTypeEntry other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TArrayTypeEntry typedOther = (TArrayTypeEntry)other;
-
-    lastComparison = Boolean.valueOf(isSetObjectTypePtr()).compareTo(typedOther.isSetObjectTypePtr());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetObjectTypePtr()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.objectTypePtr, typedOther.objectTypePtr);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TArrayTypeEntry(");
-    boolean first = true;
-
-    sb.append("objectTypePtr:");
-    sb.append(this.objectTypePtr);
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetObjectTypePtr()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'objectTypePtr' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TArrayTypeEntryStandardSchemeFactory implements SchemeFactory {
-    public TArrayTypeEntryStandardScheme getScheme() {
-      return new TArrayTypeEntryStandardScheme();
-    }
-  }
-
-  private static class TArrayTypeEntryStandardScheme extends StandardScheme<TArrayTypeEntry> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TArrayTypeEntry struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // OBJECT_TYPE_PTR
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.objectTypePtr = iprot.readI32();
-              struct.setObjectTypePtrIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TArrayTypeEntry struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      oprot.writeFieldBegin(OBJECT_TYPE_PTR_FIELD_DESC);
-      oprot.writeI32(struct.objectTypePtr);
-      oprot.writeFieldEnd();
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TArrayTypeEntryTupleSchemeFactory implements SchemeFactory {
-    public TArrayTypeEntryTupleScheme getScheme() {
-      return new TArrayTypeEntryTupleScheme();
-    }
-  }
-
-  private static class TArrayTypeEntryTupleScheme extends TupleScheme<TArrayTypeEntry> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TArrayTypeEntry struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      oprot.writeI32(struct.objectTypePtr);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TArrayTypeEntry struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.objectTypePtr = iprot.readI32();
-      struct.setObjectTypePtrIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBinaryColumn.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBinaryColumn.java
deleted file mode 100644
index 6b1b054d1acad..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBinaryColumn.java
+++ /dev/null
@@ -1,550 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TBinaryColumn implements org.apache.thrift.TBase<TBinaryColumn, TBinaryColumn._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TBinaryColumn");
-
-  private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1);
-  private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TBinaryColumnStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TBinaryColumnTupleSchemeFactory());
-  }
-
-  private List<ByteBuffer> values; // required
-  private ByteBuffer nulls; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUES((short)1, "values"),
-    NULLS((short)2, "nulls");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUES
-          return VALUES;
-        case 2: // NULLS
-          return NULLS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING            , true))));
-    tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TBinaryColumn.class, metaDataMap);
-  }
-
-  public TBinaryColumn() {
-  }
-
-  public TBinaryColumn(
-    List<ByteBuffer> values,
-    ByteBuffer nulls)
-  {
-    this();
-    this.values = values;
-    this.nulls = nulls;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TBinaryColumn(TBinaryColumn other) {
-    if (other.isSetValues()) {
-      List<ByteBuffer> __this__values = new ArrayList<ByteBuffer>();
-      for (ByteBuffer other_element : other.values) {
-        ByteBuffer temp_binary_element = org.apache.thrift.TBaseHelper.copyBinary(other_element);
-;
-        __this__values.add(temp_binary_element);
-      }
-      this.values = __this__values;
-    }
-    if (other.isSetNulls()) {
-      this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls);
-;
-    }
-  }
-
-  public TBinaryColumn deepCopy() {
-    return new TBinaryColumn(this);
-  }
-
-  @Override
-  public void clear() {
-    this.values = null;
-    this.nulls = null;
-  }
-
-  public int getValuesSize() {
-    return (this.values == null) ? 0 : this.values.size();
-  }
-
-  public java.util.Iterator<ByteBuffer> getValuesIterator() {
-    return (this.values == null) ? null : this.values.iterator();
-  }
-
-  public void addToValues(ByteBuffer elem) {
-    if (this.values == null) {
-      this.values = new ArrayList<ByteBuffer>();
-    }
-    this.values.add(elem);
-  }
-
-  public List<ByteBuffer> getValues() {
-    return this.values;
-  }
-
-  public void setValues(List<ByteBuffer> values) {
-    this.values = values;
-  }
-
-  public void unsetValues() {
-    this.values = null;
-  }
-
-  /** Returns true if field values is set (has been assigned a value) and false otherwise */
-  public boolean isSetValues() {
-    return this.values != null;
-  }
-
-  public void setValuesIsSet(boolean value) {
-    if (!value) {
-      this.values = null;
-    }
-  }
-
-  public byte[] getNulls() {
-    setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls));
-    return nulls == null ? null : nulls.array();
-  }
-
-  public ByteBuffer bufferForNulls() {
-    return nulls;
-  }
-
-  public void setNulls(byte[] nulls) {
-    setNulls(nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(nulls));
-  }
-
-  public void setNulls(ByteBuffer nulls) {
-    this.nulls = nulls;
-  }
-
-  public void unsetNulls() {
-    this.nulls = null;
-  }
-
-  /** Returns true if field nulls is set (has been assigned a value) and false otherwise */
-  public boolean isSetNulls() {
-    return this.nulls != null;
-  }
-
-  public void setNullsIsSet(boolean value) {
-    if (!value) {
-      this.nulls = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUES:
-      if (value == null) {
-        unsetValues();
-      } else {
-        setValues((List<ByteBuffer>)value);
-      }
-      break;
-
-    case NULLS:
-      if (value == null) {
-        unsetNulls();
-      } else {
-        setNulls((ByteBuffer)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUES:
-      return getValues();
-
-    case NULLS:
-      return getNulls();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUES:
-      return isSetValues();
-    case NULLS:
-      return isSetNulls();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TBinaryColumn)
-      return this.equals((TBinaryColumn)that);
-    return false;
-  }
-
-  public boolean equals(TBinaryColumn that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_values = true && this.isSetValues();
-    boolean that_present_values = true && that.isSetValues();
-    if (this_present_values || that_present_values) {
-      if (!(this_present_values && that_present_values))
-        return false;
-      if (!this.values.equals(that.values))
-        return false;
-    }
-
-    boolean this_present_nulls = true && this.isSetNulls();
-    boolean that_present_nulls = true && that.isSetNulls();
-    if (this_present_nulls || that_present_nulls) {
-      if (!(this_present_nulls && that_present_nulls))
-        return false;
-      if (!this.nulls.equals(that.nulls))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_values = true && (isSetValues());
-    builder.append(present_values);
-    if (present_values)
-      builder.append(values);
-
-    boolean present_nulls = true && (isSetNulls());
-    builder.append(present_nulls);
-    if (present_nulls)
-      builder.append(nulls);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TBinaryColumn other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TBinaryColumn typedOther = (TBinaryColumn)other;
-
-    lastComparison = Boolean.valueOf(isSetValues()).compareTo(typedOther.isSetValues());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValues()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, typedOther.values);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetNulls()).compareTo(typedOther.isSetNulls());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetNulls()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, typedOther.nulls);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TBinaryColumn(");
-    boolean first = true;
-
-    sb.append("values:");
-    if (this.values == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.values);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("nulls:");
-    if (this.nulls == null) {
-      sb.append("null");
-    } else {
-      org.apache.thrift.TBaseHelper.toString(this.nulls, sb);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetValues()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString());
-    }
-
-    if (!isSetNulls()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TBinaryColumnStandardSchemeFactory implements SchemeFactory {
-    public TBinaryColumnStandardScheme getScheme() {
-      return new TBinaryColumnStandardScheme();
-    }
-  }
-
-  private static class TBinaryColumnStandardScheme extends StandardScheme<TBinaryColumn> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TBinaryColumn struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUES
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list110 = iprot.readListBegin();
-                struct.values = new ArrayList<ByteBuffer>(_list110.size);
-                for (int _i111 = 0; _i111 < _list110.size; ++_i111)
-                {
-                  ByteBuffer _elem112; // optional
-                  _elem112 = iprot.readBinary();
-                  struct.values.add(_elem112);
-                }
-                iprot.readListEnd();
-              }
-              struct.setValuesIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // NULLS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.nulls = iprot.readBinary();
-              struct.setNullsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TBinaryColumn struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.values != null) {
-        oprot.writeFieldBegin(VALUES_FIELD_DESC);
-        {
-          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, struct.values.size()));
-          for (ByteBuffer _iter113 : struct.values)
-          {
-            oprot.writeBinary(_iter113);
-          }
-          oprot.writeListEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      if (struct.nulls != null) {
-        oprot.writeFieldBegin(NULLS_FIELD_DESC);
-        oprot.writeBinary(struct.nulls);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TBinaryColumnTupleSchemeFactory implements SchemeFactory {
-    public TBinaryColumnTupleScheme getScheme() {
-      return new TBinaryColumnTupleScheme();
-    }
-  }
-
-  private static class TBinaryColumnTupleScheme extends TupleScheme<TBinaryColumn> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TBinaryColumn struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.values.size());
-        for (ByteBuffer _iter114 : struct.values)
-        {
-          oprot.writeBinary(_iter114);
-        }
-      }
-      oprot.writeBinary(struct.nulls);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TBinaryColumn struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TList _list115 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32());
-        struct.values = new ArrayList<ByteBuffer>(_list115.size);
-        for (int _i116 = 0; _i116 < _list115.size; ++_i116)
-        {
-          ByteBuffer _elem117; // optional
-          _elem117 = iprot.readBinary();
-          struct.values.add(_elem117);
-        }
-      }
-      struct.setValuesIsSet(true);
-      struct.nulls = iprot.readBinary();
-      struct.setNullsIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBoolColumn.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBoolColumn.java
deleted file mode 100644
index efd571cfdfbbf..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBoolColumn.java
+++ /dev/null
@@ -1,548 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TBoolColumn implements org.apache.thrift.TBase<TBoolColumn, TBoolColumn._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TBoolColumn");
-
-  private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1);
-  private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TBoolColumnStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TBoolColumnTupleSchemeFactory());
-  }
-
-  private List<Boolean> values; // required
-  private ByteBuffer nulls; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUES((short)1, "values"),
-    NULLS((short)2, "nulls");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUES
-          return VALUES;
-        case 2: // NULLS
-          return NULLS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL))));
-    tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TBoolColumn.class, metaDataMap);
-  }
-
-  public TBoolColumn() {
-  }
-
-  public TBoolColumn(
-    List<Boolean> values,
-    ByteBuffer nulls)
-  {
-    this();
-    this.values = values;
-    this.nulls = nulls;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TBoolColumn(TBoolColumn other) {
-    if (other.isSetValues()) {
-      List<Boolean> __this__values = new ArrayList<Boolean>();
-      for (Boolean other_element : other.values) {
-        __this__values.add(other_element);
-      }
-      this.values = __this__values;
-    }
-    if (other.isSetNulls()) {
-      this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls);
-;
-    }
-  }
-
-  public TBoolColumn deepCopy() {
-    return new TBoolColumn(this);
-  }
-
-  @Override
-  public void clear() {
-    this.values = null;
-    this.nulls = null;
-  }
-
-  public int getValuesSize() {
-    return (this.values == null) ? 0 : this.values.size();
-  }
-
-  public java.util.Iterator<Boolean> getValuesIterator() {
-    return (this.values == null) ? null : this.values.iterator();
-  }
-
-  public void addToValues(boolean elem) {
-    if (this.values == null) {
-      this.values = new ArrayList<Boolean>();
-    }
-    this.values.add(elem);
-  }
-
-  public List<Boolean> getValues() {
-    return this.values;
-  }
-
-  public void setValues(List<Boolean> values) {
-    this.values = values;
-  }
-
-  public void unsetValues() {
-    this.values = null;
-  }
-
-  /** Returns true if field values is set (has been assigned a value) and false otherwise */
-  public boolean isSetValues() {
-    return this.values != null;
-  }
-
-  public void setValuesIsSet(boolean value) {
-    if (!value) {
-      this.values = null;
-    }
-  }
-
-  public byte[] getNulls() {
-    setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls));
-    return nulls == null ? null : nulls.array();
-  }
-
-  public ByteBuffer bufferForNulls() {
-    return nulls;
-  }
-
-  public void setNulls(byte[] nulls) {
-    setNulls(nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(nulls));
-  }
-
-  public void setNulls(ByteBuffer nulls) {
-    this.nulls = nulls;
-  }
-
-  public void unsetNulls() {
-    this.nulls = null;
-  }
-
-  /** Returns true if field nulls is set (has been assigned a value) and false otherwise */
-  public boolean isSetNulls() {
-    return this.nulls != null;
-  }
-
-  public void setNullsIsSet(boolean value) {
-    if (!value) {
-      this.nulls = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUES:
-      if (value == null) {
-        unsetValues();
-      } else {
-        setValues((List<Boolean>)value);
-      }
-      break;
-
-    case NULLS:
-      if (value == null) {
-        unsetNulls();
-      } else {
-        setNulls((ByteBuffer)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUES:
-      return getValues();
-
-    case NULLS:
-      return getNulls();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUES:
-      return isSetValues();
-    case NULLS:
-      return isSetNulls();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TBoolColumn)
-      return this.equals((TBoolColumn)that);
-    return false;
-  }
-
-  public boolean equals(TBoolColumn that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_values = true && this.isSetValues();
-    boolean that_present_values = true && that.isSetValues();
-    if (this_present_values || that_present_values) {
-      if (!(this_present_values && that_present_values))
-        return false;
-      if (!this.values.equals(that.values))
-        return false;
-    }
-
-    boolean this_present_nulls = true && this.isSetNulls();
-    boolean that_present_nulls = true && that.isSetNulls();
-    if (this_present_nulls || that_present_nulls) {
-      if (!(this_present_nulls && that_present_nulls))
-        return false;
-      if (!this.nulls.equals(that.nulls))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_values = true && (isSetValues());
-    builder.append(present_values);
-    if (present_values)
-      builder.append(values);
-
-    boolean present_nulls = true && (isSetNulls());
-    builder.append(present_nulls);
-    if (present_nulls)
-      builder.append(nulls);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TBoolColumn other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TBoolColumn typedOther = (TBoolColumn)other;
-
-    lastComparison = Boolean.valueOf(isSetValues()).compareTo(typedOther.isSetValues());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValues()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, typedOther.values);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetNulls()).compareTo(typedOther.isSetNulls());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetNulls()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, typedOther.nulls);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TBoolColumn(");
-    boolean first = true;
-
-    sb.append("values:");
-    if (this.values == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.values);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("nulls:");
-    if (this.nulls == null) {
-      sb.append("null");
-    } else {
-      org.apache.thrift.TBaseHelper.toString(this.nulls, sb);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetValues()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString());
-    }
-
-    if (!isSetNulls()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TBoolColumnStandardSchemeFactory implements SchemeFactory {
-    public TBoolColumnStandardScheme getScheme() {
-      return new TBoolColumnStandardScheme();
-    }
-  }
-
-  private static class TBoolColumnStandardScheme extends StandardScheme<TBoolColumn> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TBoolColumn struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUES
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list54 = iprot.readListBegin();
-                struct.values = new ArrayList<Boolean>(_list54.size);
-                for (int _i55 = 0; _i55 < _list54.size; ++_i55)
-                {
-                  boolean _elem56; // optional
-                  _elem56 = iprot.readBool();
-                  struct.values.add(_elem56);
-                }
-                iprot.readListEnd();
-              }
-              struct.setValuesIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // NULLS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.nulls = iprot.readBinary();
-              struct.setNullsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TBoolColumn struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.values != null) {
-        oprot.writeFieldBegin(VALUES_FIELD_DESC);
-        {
-          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.BOOL, struct.values.size()));
-          for (boolean _iter57 : struct.values)
-          {
-            oprot.writeBool(_iter57);
-          }
-          oprot.writeListEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      if (struct.nulls != null) {
-        oprot.writeFieldBegin(NULLS_FIELD_DESC);
-        oprot.writeBinary(struct.nulls);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TBoolColumnTupleSchemeFactory implements SchemeFactory {
-    public TBoolColumnTupleScheme getScheme() {
-      return new TBoolColumnTupleScheme();
-    }
-  }
-
-  private static class TBoolColumnTupleScheme extends TupleScheme<TBoolColumn> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TBoolColumn struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.values.size());
-        for (boolean _iter58 : struct.values)
-        {
-          oprot.writeBool(_iter58);
-        }
-      }
-      oprot.writeBinary(struct.nulls);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TBoolColumn struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TList _list59 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.BOOL, iprot.readI32());
-        struct.values = new ArrayList<Boolean>(_list59.size);
-        for (int _i60 = 0; _i60 < _list59.size; ++_i60)
-        {
-          boolean _elem61; // optional
-          _elem61 = iprot.readBool();
-          struct.values.add(_elem61);
-        }
-      }
-      struct.setValuesIsSet(true);
-      struct.nulls = iprot.readBinary();
-      struct.setNullsIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBoolValue.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBoolValue.java
deleted file mode 100644
index c7495ee79e4b5..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TBoolValue.java
+++ /dev/null
@@ -1,386 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TBoolValue implements org.apache.thrift.TBase<TBoolValue, TBoolValue._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TBoolValue");
-
-  private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.BOOL, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TBoolValueStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TBoolValueTupleSchemeFactory());
-  }
-
-  private boolean value; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUE((short)1, "value");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUE
-          return VALUE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __VALUE_ISSET_ID = 0;
-  private byte __isset_bitfield = 0;
-  private _Fields optionals[] = {_Fields.VALUE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TBoolValue.class, metaDataMap);
-  }
-
-  public TBoolValue() {
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TBoolValue(TBoolValue other) {
-    __isset_bitfield = other.__isset_bitfield;
-    this.value = other.value;
-  }
-
-  public TBoolValue deepCopy() {
-    return new TBoolValue(this);
-  }
-
-  @Override
-  public void clear() {
-    setValueIsSet(false);
-    this.value = false;
-  }
-
-  public boolean isValue() {
-    return this.value;
-  }
-
-  public void setValue(boolean value) {
-    this.value = value;
-    setValueIsSet(true);
-  }
-
-  public void unsetValue() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID);
-  }
-
-  /** Returns true if field value is set (has been assigned a value) and false otherwise */
-  public boolean isSetValue() {
-    return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID);
-  }
-
-  public void setValueIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value);
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUE:
-      if (value == null) {
-        unsetValue();
-      } else {
-        setValue((Boolean)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUE:
-      return Boolean.valueOf(isValue());
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUE:
-      return isSetValue();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TBoolValue)
-      return this.equals((TBoolValue)that);
-    return false;
-  }
-
-  public boolean equals(TBoolValue that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_value = true && this.isSetValue();
-    boolean that_present_value = true && that.isSetValue();
-    if (this_present_value || that_present_value) {
-      if (!(this_present_value && that_present_value))
-        return false;
-      if (this.value != that.value)
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_value = true && (isSetValue());
-    builder.append(present_value);
-    if (present_value)
-      builder.append(value);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TBoolValue other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TBoolValue typedOther = (TBoolValue)other;
-
-    lastComparison = Boolean.valueOf(isSetValue()).compareTo(typedOther.isSetValue());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValue()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, typedOther.value);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TBoolValue(");
-    boolean first = true;
-
-    if (isSetValue()) {
-      sb.append("value:");
-      sb.append(this.value);
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TBoolValueStandardSchemeFactory implements SchemeFactory {
-    public TBoolValueStandardScheme getScheme() {
-      return new TBoolValueStandardScheme();
-    }
-  }
-
-  private static class TBoolValueStandardScheme extends StandardScheme<TBoolValue> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TBoolValue struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUE
-            if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) {
-              struct.value = iprot.readBool();
-              struct.setValueIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TBoolValue struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.isSetValue()) {
-        oprot.writeFieldBegin(VALUE_FIELD_DESC);
-        oprot.writeBool(struct.value);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TBoolValueTupleSchemeFactory implements SchemeFactory {
-    public TBoolValueTupleScheme getScheme() {
-      return new TBoolValueTupleScheme();
-    }
-  }
-
-  private static class TBoolValueTupleScheme extends TupleScheme<TBoolValue> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TBoolValue struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      BitSet optionals = new BitSet();
-      if (struct.isSetValue()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetValue()) {
-        oprot.writeBool(struct.value);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TBoolValue struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.value = iprot.readBool();
-        struct.setValueIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TByteColumn.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TByteColumn.java
deleted file mode 100644
index 169bfdeab3eea..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TByteColumn.java
+++ /dev/null
@@ -1,548 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TByteColumn implements org.apache.thrift.TBase<TByteColumn, TByteColumn._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TByteColumn");
-
-  private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1);
-  private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TByteColumnStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TByteColumnTupleSchemeFactory());
-  }
-
-  private List<Byte> values; // required
-  private ByteBuffer nulls; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUES((short)1, "values"),
-    NULLS((short)2, "nulls");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUES
-          return VALUES;
-        case 2: // NULLS
-          return NULLS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BYTE))));
-    tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TByteColumn.class, metaDataMap);
-  }
-
-  public TByteColumn() {
-  }
-
-  public TByteColumn(
-    List<Byte> values,
-    ByteBuffer nulls)
-  {
-    this();
-    this.values = values;
-    this.nulls = nulls;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TByteColumn(TByteColumn other) {
-    if (other.isSetValues()) {
-      List<Byte> __this__values = new ArrayList<Byte>();
-      for (Byte other_element : other.values) {
-        __this__values.add(other_element);
-      }
-      this.values = __this__values;
-    }
-    if (other.isSetNulls()) {
-      this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls);
-;
-    }
-  }
-
-  public TByteColumn deepCopy() {
-    return new TByteColumn(this);
-  }
-
-  @Override
-  public void clear() {
-    this.values = null;
-    this.nulls = null;
-  }
-
-  public int getValuesSize() {
-    return (this.values == null) ? 0 : this.values.size();
-  }
-
-  public java.util.Iterator<Byte> getValuesIterator() {
-    return (this.values == null) ? null : this.values.iterator();
-  }
-
-  public void addToValues(byte elem) {
-    if (this.values == null) {
-      this.values = new ArrayList<Byte>();
-    }
-    this.values.add(elem);
-  }
-
-  public List<Byte> getValues() {
-    return this.values;
-  }
-
-  public void setValues(List<Byte> values) {
-    this.values = values;
-  }
-
-  public void unsetValues() {
-    this.values = null;
-  }
-
-  /** Returns true if field values is set (has been assigned a value) and false otherwise */
-  public boolean isSetValues() {
-    return this.values != null;
-  }
-
-  public void setValuesIsSet(boolean value) {
-    if (!value) {
-      this.values = null;
-    }
-  }
-
-  public byte[] getNulls() {
-    setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls));
-    return nulls == null ? null : nulls.array();
-  }
-
-  public ByteBuffer bufferForNulls() {
-    return nulls;
-  }
-
-  public void setNulls(byte[] nulls) {
-    setNulls(nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(nulls));
-  }
-
-  public void setNulls(ByteBuffer nulls) {
-    this.nulls = nulls;
-  }
-
-  public void unsetNulls() {
-    this.nulls = null;
-  }
-
-  /** Returns true if field nulls is set (has been assigned a value) and false otherwise */
-  public boolean isSetNulls() {
-    return this.nulls != null;
-  }
-
-  public void setNullsIsSet(boolean value) {
-    if (!value) {
-      this.nulls = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUES:
-      if (value == null) {
-        unsetValues();
-      } else {
-        setValues((List<Byte>)value);
-      }
-      break;
-
-    case NULLS:
-      if (value == null) {
-        unsetNulls();
-      } else {
-        setNulls((ByteBuffer)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUES:
-      return getValues();
-
-    case NULLS:
-      return getNulls();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUES:
-      return isSetValues();
-    case NULLS:
-      return isSetNulls();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TByteColumn)
-      return this.equals((TByteColumn)that);
-    return false;
-  }
-
-  public boolean equals(TByteColumn that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_values = true && this.isSetValues();
-    boolean that_present_values = true && that.isSetValues();
-    if (this_present_values || that_present_values) {
-      if (!(this_present_values && that_present_values))
-        return false;
-      if (!this.values.equals(that.values))
-        return false;
-    }
-
-    boolean this_present_nulls = true && this.isSetNulls();
-    boolean that_present_nulls = true && that.isSetNulls();
-    if (this_present_nulls || that_present_nulls) {
-      if (!(this_present_nulls && that_present_nulls))
-        return false;
-      if (!this.nulls.equals(that.nulls))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_values = true && (isSetValues());
-    builder.append(present_values);
-    if (present_values)
-      builder.append(values);
-
-    boolean present_nulls = true && (isSetNulls());
-    builder.append(present_nulls);
-    if (present_nulls)
-      builder.append(nulls);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TByteColumn other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TByteColumn typedOther = (TByteColumn)other;
-
-    lastComparison = Boolean.valueOf(isSetValues()).compareTo(typedOther.isSetValues());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValues()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, typedOther.values);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetNulls()).compareTo(typedOther.isSetNulls());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetNulls()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, typedOther.nulls);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TByteColumn(");
-    boolean first = true;
-
-    sb.append("values:");
-    if (this.values == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.values);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("nulls:");
-    if (this.nulls == null) {
-      sb.append("null");
-    } else {
-      org.apache.thrift.TBaseHelper.toString(this.nulls, sb);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetValues()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString());
-    }
-
-    if (!isSetNulls()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TByteColumnStandardSchemeFactory implements SchemeFactory {
-    public TByteColumnStandardScheme getScheme() {
-      return new TByteColumnStandardScheme();
-    }
-  }
-
-  private static class TByteColumnStandardScheme extends StandardScheme<TByteColumn> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TByteColumn struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUES
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list62 = iprot.readListBegin();
-                struct.values = new ArrayList<Byte>(_list62.size);
-                for (int _i63 = 0; _i63 < _list62.size; ++_i63)
-                {
-                  byte _elem64; // optional
-                  _elem64 = iprot.readByte();
-                  struct.values.add(_elem64);
-                }
-                iprot.readListEnd();
-              }
-              struct.setValuesIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // NULLS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.nulls = iprot.readBinary();
-              struct.setNullsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TByteColumn struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.values != null) {
-        oprot.writeFieldBegin(VALUES_FIELD_DESC);
-        {
-          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.BYTE, struct.values.size()));
-          for (byte _iter65 : struct.values)
-          {
-            oprot.writeByte(_iter65);
-          }
-          oprot.writeListEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      if (struct.nulls != null) {
-        oprot.writeFieldBegin(NULLS_FIELD_DESC);
-        oprot.writeBinary(struct.nulls);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TByteColumnTupleSchemeFactory implements SchemeFactory {
-    public TByteColumnTupleScheme getScheme() {
-      return new TByteColumnTupleScheme();
-    }
-  }
-
-  private static class TByteColumnTupleScheme extends TupleScheme<TByteColumn> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TByteColumn struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.values.size());
-        for (byte _iter66 : struct.values)
-        {
-          oprot.writeByte(_iter66);
-        }
-      }
-      oprot.writeBinary(struct.nulls);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TByteColumn struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TList _list67 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.BYTE, iprot.readI32());
-        struct.values = new ArrayList<Byte>(_list67.size);
-        for (int _i68 = 0; _i68 < _list67.size; ++_i68)
-        {
-          byte _elem69; // optional
-          _elem69 = iprot.readByte();
-          struct.values.add(_elem69);
-        }
-      }
-      struct.setValuesIsSet(true);
-      struct.nulls = iprot.readBinary();
-      struct.setNullsIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TByteValue.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TByteValue.java
deleted file mode 100644
index 23d9693759968..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TByteValue.java
+++ /dev/null
@@ -1,386 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TByteValue implements org.apache.thrift.TBase<TByteValue, TByteValue._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TByteValue");
-
-  private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.BYTE, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TByteValueStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TByteValueTupleSchemeFactory());
-  }
-
-  private byte value; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUE((short)1, "value");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUE
-          return VALUE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __VALUE_ISSET_ID = 0;
-  private byte __isset_bitfield = 0;
-  private _Fields optionals[] = {_Fields.VALUE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BYTE)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TByteValue.class, metaDataMap);
-  }
-
-  public TByteValue() {
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TByteValue(TByteValue other) {
-    __isset_bitfield = other.__isset_bitfield;
-    this.value = other.value;
-  }
-
-  public TByteValue deepCopy() {
-    return new TByteValue(this);
-  }
-
-  @Override
-  public void clear() {
-    setValueIsSet(false);
-    this.value = 0;
-  }
-
-  public byte getValue() {
-    return this.value;
-  }
-
-  public void setValue(byte value) {
-    this.value = value;
-    setValueIsSet(true);
-  }
-
-  public void unsetValue() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID);
-  }
-
-  /** Returns true if field value is set (has been assigned a value) and false otherwise */
-  public boolean isSetValue() {
-    return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID);
-  }
-
-  public void setValueIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value);
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUE:
-      if (value == null) {
-        unsetValue();
-      } else {
-        setValue((Byte)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUE:
-      return Byte.valueOf(getValue());
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUE:
-      return isSetValue();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TByteValue)
-      return this.equals((TByteValue)that);
-    return false;
-  }
-
-  public boolean equals(TByteValue that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_value = true && this.isSetValue();
-    boolean that_present_value = true && that.isSetValue();
-    if (this_present_value || that_present_value) {
-      if (!(this_present_value && that_present_value))
-        return false;
-      if (this.value != that.value)
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_value = true && (isSetValue());
-    builder.append(present_value);
-    if (present_value)
-      builder.append(value);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TByteValue other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TByteValue typedOther = (TByteValue)other;
-
-    lastComparison = Boolean.valueOf(isSetValue()).compareTo(typedOther.isSetValue());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValue()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, typedOther.value);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TByteValue(");
-    boolean first = true;
-
-    if (isSetValue()) {
-      sb.append("value:");
-      sb.append(this.value);
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TByteValueStandardSchemeFactory implements SchemeFactory {
-    public TByteValueStandardScheme getScheme() {
-      return new TByteValueStandardScheme();
-    }
-  }
-
-  private static class TByteValueStandardScheme extends StandardScheme<TByteValue> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TByteValue struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUE
-            if (schemeField.type == org.apache.thrift.protocol.TType.BYTE) {
-              struct.value = iprot.readByte();
-              struct.setValueIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TByteValue struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.isSetValue()) {
-        oprot.writeFieldBegin(VALUE_FIELD_DESC);
-        oprot.writeByte(struct.value);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TByteValueTupleSchemeFactory implements SchemeFactory {
-    public TByteValueTupleScheme getScheme() {
-      return new TByteValueTupleScheme();
-    }
-  }
-
-  private static class TByteValueTupleScheme extends TupleScheme<TByteValue> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TByteValue struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      BitSet optionals = new BitSet();
-      if (struct.isSetValue()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetValue()) {
-        oprot.writeByte(struct.value);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TByteValue struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.value = iprot.readByte();
-        struct.setValueIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCLIService.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCLIService.java
deleted file mode 100644
index 54851b8d51317..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCLIService.java
+++ /dev/null
@@ -1,15414 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TCLIService {
-
-  public interface Iface {
-
-    public TOpenSessionResp OpenSession(TOpenSessionReq req) throws org.apache.thrift.TException;
-
-    public TCloseSessionResp CloseSession(TCloseSessionReq req) throws org.apache.thrift.TException;
-
-    public TGetInfoResp GetInfo(TGetInfoReq req) throws org.apache.thrift.TException;
-
-    public TExecuteStatementResp ExecuteStatement(TExecuteStatementReq req) throws org.apache.thrift.TException;
-
-    public TGetTypeInfoResp GetTypeInfo(TGetTypeInfoReq req) throws org.apache.thrift.TException;
-
-    public TGetCatalogsResp GetCatalogs(TGetCatalogsReq req) throws org.apache.thrift.TException;
-
-    public TGetSchemasResp GetSchemas(TGetSchemasReq req) throws org.apache.thrift.TException;
-
-    public TGetTablesResp GetTables(TGetTablesReq req) throws org.apache.thrift.TException;
-
-    public TGetTableTypesResp GetTableTypes(TGetTableTypesReq req) throws org.apache.thrift.TException;
-
-    public TGetColumnsResp GetColumns(TGetColumnsReq req) throws org.apache.thrift.TException;
-
-    public TGetFunctionsResp GetFunctions(TGetFunctionsReq req) throws org.apache.thrift.TException;
-
-    public TGetOperationStatusResp GetOperationStatus(TGetOperationStatusReq req) throws org.apache.thrift.TException;
-
-    public TCancelOperationResp CancelOperation(TCancelOperationReq req) throws org.apache.thrift.TException;
-
-    public TCloseOperationResp CloseOperation(TCloseOperationReq req) throws org.apache.thrift.TException;
-
-    public TGetResultSetMetadataResp GetResultSetMetadata(TGetResultSetMetadataReq req) throws org.apache.thrift.TException;
-
-    public TFetchResultsResp FetchResults(TFetchResultsReq req) throws org.apache.thrift.TException;
-
-    public TGetDelegationTokenResp GetDelegationToken(TGetDelegationTokenReq req) throws org.apache.thrift.TException;
-
-    public TCancelDelegationTokenResp CancelDelegationToken(TCancelDelegationTokenReq req) throws org.apache.thrift.TException;
-
-    public TRenewDelegationTokenResp RenewDelegationToken(TRenewDelegationTokenReq req) throws org.apache.thrift.TException;
-
-  }
-
-  public interface AsyncIface {
-
-    public void OpenSession(TOpenSessionReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.OpenSession_call> resultHandler) throws org.apache.thrift.TException;
-
-    public void CloseSession(TCloseSessionReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.CloseSession_call> resultHandler) throws org.apache.thrift.TException;
-
-    public void GetInfo(TGetInfoReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.GetInfo_call> resultHandler) throws org.apache.thrift.TException;
-
-    public void ExecuteStatement(TExecuteStatementReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.ExecuteStatement_call> resultHandler) throws org.apache.thrift.TException;
-
-    public void GetTypeInfo(TGetTypeInfoReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.GetTypeInfo_call> resultHandler) throws org.apache.thrift.TException;
-
-    public void GetCatalogs(TGetCatalogsReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.GetCatalogs_call> resultHandler) throws org.apache.thrift.TException;
-
-    public void GetSchemas(TGetSchemasReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.GetSchemas_call> resultHandler) throws org.apache.thrift.TException;
-
-    public void GetTables(TGetTablesReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.GetTables_call> resultHandler) throws org.apache.thrift.TException;
-
-    public void GetTableTypes(TGetTableTypesReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.GetTableTypes_call> resultHandler) throws org.apache.thrift.TException;
-
-    public void GetColumns(TGetColumnsReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.GetColumns_call> resultHandler) throws org.apache.thrift.TException;
-
-    public void GetFunctions(TGetFunctionsReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.GetFunctions_call> resultHandler) throws org.apache.thrift.TException;
-
-    public void GetOperationStatus(TGetOperationStatusReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.GetOperationStatus_call> resultHandler) throws org.apache.thrift.TException;
-
-    public void CancelOperation(TCancelOperationReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.CancelOperation_call> resultHandler) throws org.apache.thrift.TException;
-
-    public void CloseOperation(TCloseOperationReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.CloseOperation_call> resultHandler) throws org.apache.thrift.TException;
-
-    public void GetResultSetMetadata(TGetResultSetMetadataReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.GetResultSetMetadata_call> resultHandler) throws org.apache.thrift.TException;
-
-    public void FetchResults(TFetchResultsReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.FetchResults_call> resultHandler) throws org.apache.thrift.TException;
-
-    public void GetDelegationToken(TGetDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.GetDelegationToken_call> resultHandler) throws org.apache.thrift.TException;
-
-    public void CancelDelegationToken(TCancelDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.CancelDelegationToken_call> resultHandler) throws org.apache.thrift.TException;
-
-    public void RenewDelegationToken(TRenewDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback<AsyncClient.RenewDelegationToken_call> resultHandler) throws org.apache.thrift.TException;
-
-  }
-
-  public static class Client extends org.apache.thrift.TServiceClient implements Iface {
-    public static class Factory implements org.apache.thrift.TServiceClientFactory<Client> {
-      public Factory() {}
-      public Client getClient(org.apache.thrift.protocol.TProtocol prot) {
-        return new Client(prot);
-      }
-      public Client getClient(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TProtocol oprot) {
-        return new Client(iprot, oprot);
-      }
-    }
-
-    public Client(org.apache.thrift.protocol.TProtocol prot)
-    {
-      super(prot, prot);
-    }
-
-    public Client(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TProtocol oprot) {
-      super(iprot, oprot);
-    }
-
-    public TOpenSessionResp OpenSession(TOpenSessionReq req) throws org.apache.thrift.TException
-    {
-      send_OpenSession(req);
-      return recv_OpenSession();
-    }
-
-    public void send_OpenSession(TOpenSessionReq req) throws org.apache.thrift.TException
-    {
-      OpenSession_args args = new OpenSession_args();
-      args.setReq(req);
-      sendBase("OpenSession", args);
-    }
-
-    public TOpenSessionResp recv_OpenSession() throws org.apache.thrift.TException
-    {
-      OpenSession_result result = new OpenSession_result();
-      receiveBase(result, "OpenSession");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "OpenSession failed: unknown result");
-    }
-
-    public TCloseSessionResp CloseSession(TCloseSessionReq req) throws org.apache.thrift.TException
-    {
-      send_CloseSession(req);
-      return recv_CloseSession();
-    }
-
-    public void send_CloseSession(TCloseSessionReq req) throws org.apache.thrift.TException
-    {
-      CloseSession_args args = new CloseSession_args();
-      args.setReq(req);
-      sendBase("CloseSession", args);
-    }
-
-    public TCloseSessionResp recv_CloseSession() throws org.apache.thrift.TException
-    {
-      CloseSession_result result = new CloseSession_result();
-      receiveBase(result, "CloseSession");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "CloseSession failed: unknown result");
-    }
-
-    public TGetInfoResp GetInfo(TGetInfoReq req) throws org.apache.thrift.TException
-    {
-      send_GetInfo(req);
-      return recv_GetInfo();
-    }
-
-    public void send_GetInfo(TGetInfoReq req) throws org.apache.thrift.TException
-    {
-      GetInfo_args args = new GetInfo_args();
-      args.setReq(req);
-      sendBase("GetInfo", args);
-    }
-
-    public TGetInfoResp recv_GetInfo() throws org.apache.thrift.TException
-    {
-      GetInfo_result result = new GetInfo_result();
-      receiveBase(result, "GetInfo");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetInfo failed: unknown result");
-    }
-
-    public TExecuteStatementResp ExecuteStatement(TExecuteStatementReq req) throws org.apache.thrift.TException
-    {
-      send_ExecuteStatement(req);
-      return recv_ExecuteStatement();
-    }
-
-    public void send_ExecuteStatement(TExecuteStatementReq req) throws org.apache.thrift.TException
-    {
-      ExecuteStatement_args args = new ExecuteStatement_args();
-      args.setReq(req);
-      sendBase("ExecuteStatement", args);
-    }
-
-    public TExecuteStatementResp recv_ExecuteStatement() throws org.apache.thrift.TException
-    {
-      ExecuteStatement_result result = new ExecuteStatement_result();
-      receiveBase(result, "ExecuteStatement");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "ExecuteStatement failed: unknown result");
-    }
-
-    public TGetTypeInfoResp GetTypeInfo(TGetTypeInfoReq req) throws org.apache.thrift.TException
-    {
-      send_GetTypeInfo(req);
-      return recv_GetTypeInfo();
-    }
-
-    public void send_GetTypeInfo(TGetTypeInfoReq req) throws org.apache.thrift.TException
-    {
-      GetTypeInfo_args args = new GetTypeInfo_args();
-      args.setReq(req);
-      sendBase("GetTypeInfo", args);
-    }
-
-    public TGetTypeInfoResp recv_GetTypeInfo() throws org.apache.thrift.TException
-    {
-      GetTypeInfo_result result = new GetTypeInfo_result();
-      receiveBase(result, "GetTypeInfo");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetTypeInfo failed: unknown result");
-    }
-
-    public TGetCatalogsResp GetCatalogs(TGetCatalogsReq req) throws org.apache.thrift.TException
-    {
-      send_GetCatalogs(req);
-      return recv_GetCatalogs();
-    }
-
-    public void send_GetCatalogs(TGetCatalogsReq req) throws org.apache.thrift.TException
-    {
-      GetCatalogs_args args = new GetCatalogs_args();
-      args.setReq(req);
-      sendBase("GetCatalogs", args);
-    }
-
-    public TGetCatalogsResp recv_GetCatalogs() throws org.apache.thrift.TException
-    {
-      GetCatalogs_result result = new GetCatalogs_result();
-      receiveBase(result, "GetCatalogs");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetCatalogs failed: unknown result");
-    }
-
-    public TGetSchemasResp GetSchemas(TGetSchemasReq req) throws org.apache.thrift.TException
-    {
-      send_GetSchemas(req);
-      return recv_GetSchemas();
-    }
-
-    public void send_GetSchemas(TGetSchemasReq req) throws org.apache.thrift.TException
-    {
-      GetSchemas_args args = new GetSchemas_args();
-      args.setReq(req);
-      sendBase("GetSchemas", args);
-    }
-
-    public TGetSchemasResp recv_GetSchemas() throws org.apache.thrift.TException
-    {
-      GetSchemas_result result = new GetSchemas_result();
-      receiveBase(result, "GetSchemas");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetSchemas failed: unknown result");
-    }
-
-    public TGetTablesResp GetTables(TGetTablesReq req) throws org.apache.thrift.TException
-    {
-      send_GetTables(req);
-      return recv_GetTables();
-    }
-
-    public void send_GetTables(TGetTablesReq req) throws org.apache.thrift.TException
-    {
-      GetTables_args args = new GetTables_args();
-      args.setReq(req);
-      sendBase("GetTables", args);
-    }
-
-    public TGetTablesResp recv_GetTables() throws org.apache.thrift.TException
-    {
-      GetTables_result result = new GetTables_result();
-      receiveBase(result, "GetTables");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetTables failed: unknown result");
-    }
-
-    public TGetTableTypesResp GetTableTypes(TGetTableTypesReq req) throws org.apache.thrift.TException
-    {
-      send_GetTableTypes(req);
-      return recv_GetTableTypes();
-    }
-
-    public void send_GetTableTypes(TGetTableTypesReq req) throws org.apache.thrift.TException
-    {
-      GetTableTypes_args args = new GetTableTypes_args();
-      args.setReq(req);
-      sendBase("GetTableTypes", args);
-    }
-
-    public TGetTableTypesResp recv_GetTableTypes() throws org.apache.thrift.TException
-    {
-      GetTableTypes_result result = new GetTableTypes_result();
-      receiveBase(result, "GetTableTypes");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetTableTypes failed: unknown result");
-    }
-
-    public TGetColumnsResp GetColumns(TGetColumnsReq req) throws org.apache.thrift.TException
-    {
-      send_GetColumns(req);
-      return recv_GetColumns();
-    }
-
-    public void send_GetColumns(TGetColumnsReq req) throws org.apache.thrift.TException
-    {
-      GetColumns_args args = new GetColumns_args();
-      args.setReq(req);
-      sendBase("GetColumns", args);
-    }
-
-    public TGetColumnsResp recv_GetColumns() throws org.apache.thrift.TException
-    {
-      GetColumns_result result = new GetColumns_result();
-      receiveBase(result, "GetColumns");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetColumns failed: unknown result");
-    }
-
-    public TGetFunctionsResp GetFunctions(TGetFunctionsReq req) throws org.apache.thrift.TException
-    {
-      send_GetFunctions(req);
-      return recv_GetFunctions();
-    }
-
-    public void send_GetFunctions(TGetFunctionsReq req) throws org.apache.thrift.TException
-    {
-      GetFunctions_args args = new GetFunctions_args();
-      args.setReq(req);
-      sendBase("GetFunctions", args);
-    }
-
-    public TGetFunctionsResp recv_GetFunctions() throws org.apache.thrift.TException
-    {
-      GetFunctions_result result = new GetFunctions_result();
-      receiveBase(result, "GetFunctions");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetFunctions failed: unknown result");
-    }
-
-    public TGetOperationStatusResp GetOperationStatus(TGetOperationStatusReq req) throws org.apache.thrift.TException
-    {
-      send_GetOperationStatus(req);
-      return recv_GetOperationStatus();
-    }
-
-    public void send_GetOperationStatus(TGetOperationStatusReq req) throws org.apache.thrift.TException
-    {
-      GetOperationStatus_args args = new GetOperationStatus_args();
-      args.setReq(req);
-      sendBase("GetOperationStatus", args);
-    }
-
-    public TGetOperationStatusResp recv_GetOperationStatus() throws org.apache.thrift.TException
-    {
-      GetOperationStatus_result result = new GetOperationStatus_result();
-      receiveBase(result, "GetOperationStatus");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetOperationStatus failed: unknown result");
-    }
-
-    public TCancelOperationResp CancelOperation(TCancelOperationReq req) throws org.apache.thrift.TException
-    {
-      send_CancelOperation(req);
-      return recv_CancelOperation();
-    }
-
-    public void send_CancelOperation(TCancelOperationReq req) throws org.apache.thrift.TException
-    {
-      CancelOperation_args args = new CancelOperation_args();
-      args.setReq(req);
-      sendBase("CancelOperation", args);
-    }
-
-    public TCancelOperationResp recv_CancelOperation() throws org.apache.thrift.TException
-    {
-      CancelOperation_result result = new CancelOperation_result();
-      receiveBase(result, "CancelOperation");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "CancelOperation failed: unknown result");
-    }
-
-    public TCloseOperationResp CloseOperation(TCloseOperationReq req) throws org.apache.thrift.TException
-    {
-      send_CloseOperation(req);
-      return recv_CloseOperation();
-    }
-
-    public void send_CloseOperation(TCloseOperationReq req) throws org.apache.thrift.TException
-    {
-      CloseOperation_args args = new CloseOperation_args();
-      args.setReq(req);
-      sendBase("CloseOperation", args);
-    }
-
-    public TCloseOperationResp recv_CloseOperation() throws org.apache.thrift.TException
-    {
-      CloseOperation_result result = new CloseOperation_result();
-      receiveBase(result, "CloseOperation");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "CloseOperation failed: unknown result");
-    }
-
-    public TGetResultSetMetadataResp GetResultSetMetadata(TGetResultSetMetadataReq req) throws org.apache.thrift.TException
-    {
-      send_GetResultSetMetadata(req);
-      return recv_GetResultSetMetadata();
-    }
-
-    public void send_GetResultSetMetadata(TGetResultSetMetadataReq req) throws org.apache.thrift.TException
-    {
-      GetResultSetMetadata_args args = new GetResultSetMetadata_args();
-      args.setReq(req);
-      sendBase("GetResultSetMetadata", args);
-    }
-
-    public TGetResultSetMetadataResp recv_GetResultSetMetadata() throws org.apache.thrift.TException
-    {
-      GetResultSetMetadata_result result = new GetResultSetMetadata_result();
-      receiveBase(result, "GetResultSetMetadata");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetResultSetMetadata failed: unknown result");
-    }
-
-    public TFetchResultsResp FetchResults(TFetchResultsReq req) throws org.apache.thrift.TException
-    {
-      send_FetchResults(req);
-      return recv_FetchResults();
-    }
-
-    public void send_FetchResults(TFetchResultsReq req) throws org.apache.thrift.TException
-    {
-      FetchResults_args args = new FetchResults_args();
-      args.setReq(req);
-      sendBase("FetchResults", args);
-    }
-
-    public TFetchResultsResp recv_FetchResults() throws org.apache.thrift.TException
-    {
-      FetchResults_result result = new FetchResults_result();
-      receiveBase(result, "FetchResults");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "FetchResults failed: unknown result");
-    }
-
-    public TGetDelegationTokenResp GetDelegationToken(TGetDelegationTokenReq req) throws org.apache.thrift.TException
-    {
-      send_GetDelegationToken(req);
-      return recv_GetDelegationToken();
-    }
-
-    public void send_GetDelegationToken(TGetDelegationTokenReq req) throws org.apache.thrift.TException
-    {
-      GetDelegationToken_args args = new GetDelegationToken_args();
-      args.setReq(req);
-      sendBase("GetDelegationToken", args);
-    }
-
-    public TGetDelegationTokenResp recv_GetDelegationToken() throws org.apache.thrift.TException
-    {
-      GetDelegationToken_result result = new GetDelegationToken_result();
-      receiveBase(result, "GetDelegationToken");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetDelegationToken failed: unknown result");
-    }
-
-    public TCancelDelegationTokenResp CancelDelegationToken(TCancelDelegationTokenReq req) throws org.apache.thrift.TException
-    {
-      send_CancelDelegationToken(req);
-      return recv_CancelDelegationToken();
-    }
-
-    public void send_CancelDelegationToken(TCancelDelegationTokenReq req) throws org.apache.thrift.TException
-    {
-      CancelDelegationToken_args args = new CancelDelegationToken_args();
-      args.setReq(req);
-      sendBase("CancelDelegationToken", args);
-    }
-
-    public TCancelDelegationTokenResp recv_CancelDelegationToken() throws org.apache.thrift.TException
-    {
-      CancelDelegationToken_result result = new CancelDelegationToken_result();
-      receiveBase(result, "CancelDelegationToken");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "CancelDelegationToken failed: unknown result");
-    }
-
-    public TRenewDelegationTokenResp RenewDelegationToken(TRenewDelegationTokenReq req) throws org.apache.thrift.TException
-    {
-      send_RenewDelegationToken(req);
-      return recv_RenewDelegationToken();
-    }
-
-    public void send_RenewDelegationToken(TRenewDelegationTokenReq req) throws org.apache.thrift.TException
-    {
-      RenewDelegationToken_args args = new RenewDelegationToken_args();
-      args.setReq(req);
-      sendBase("RenewDelegationToken", args);
-    }
-
-    public TRenewDelegationTokenResp recv_RenewDelegationToken() throws org.apache.thrift.TException
-    {
-      RenewDelegationToken_result result = new RenewDelegationToken_result();
-      receiveBase(result, "RenewDelegationToken");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "RenewDelegationToken failed: unknown result");
-    }
-
-  }
-  public static class AsyncClient extends org.apache.thrift.async.TAsyncClient implements AsyncIface {
-    public static class Factory implements org.apache.thrift.async.TAsyncClientFactory<AsyncClient> {
-      private org.apache.thrift.async.TAsyncClientManager clientManager;
-      private org.apache.thrift.protocol.TProtocolFactory protocolFactory;
-      public Factory(org.apache.thrift.async.TAsyncClientManager clientManager, org.apache.thrift.protocol.TProtocolFactory protocolFactory) {
-        this.clientManager = clientManager;
-        this.protocolFactory = protocolFactory;
-      }
-      public AsyncClient getAsyncClient(org.apache.thrift.transport.TNonblockingTransport transport) {
-        return new AsyncClient(protocolFactory, clientManager, transport);
-      }
-    }
-
-    public AsyncClient(org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.async.TAsyncClientManager clientManager, org.apache.thrift.transport.TNonblockingTransport transport) {
-      super(protocolFactory, clientManager, transport);
-    }
-
-    public void OpenSession(TOpenSessionReq req, org.apache.thrift.async.AsyncMethodCallback<OpenSession_call> resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      OpenSession_call method_call = new OpenSession_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class OpenSession_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TOpenSessionReq req;
-      public OpenSession_call(TOpenSessionReq req, org.apache.thrift.async.AsyncMethodCallback<OpenSession_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("OpenSession", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        OpenSession_args args = new OpenSession_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TOpenSessionResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_OpenSession();
-      }
-    }
-
-    public void CloseSession(TCloseSessionReq req, org.apache.thrift.async.AsyncMethodCallback<CloseSession_call> resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      CloseSession_call method_call = new CloseSession_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class CloseSession_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TCloseSessionReq req;
-      public CloseSession_call(TCloseSessionReq req, org.apache.thrift.async.AsyncMethodCallback<CloseSession_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("CloseSession", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        CloseSession_args args = new CloseSession_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TCloseSessionResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_CloseSession();
-      }
-    }
-
-    public void GetInfo(TGetInfoReq req, org.apache.thrift.async.AsyncMethodCallback<GetInfo_call> resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      GetInfo_call method_call = new GetInfo_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class GetInfo_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TGetInfoReq req;
-      public GetInfo_call(TGetInfoReq req, org.apache.thrift.async.AsyncMethodCallback<GetInfo_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetInfo", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        GetInfo_args args = new GetInfo_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TGetInfoResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_GetInfo();
-      }
-    }
-
-    public void ExecuteStatement(TExecuteStatementReq req, org.apache.thrift.async.AsyncMethodCallback<ExecuteStatement_call> resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      ExecuteStatement_call method_call = new ExecuteStatement_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class ExecuteStatement_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TExecuteStatementReq req;
-      public ExecuteStatement_call(TExecuteStatementReq req, org.apache.thrift.async.AsyncMethodCallback<ExecuteStatement_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("ExecuteStatement", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        ExecuteStatement_args args = new ExecuteStatement_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TExecuteStatementResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_ExecuteStatement();
-      }
-    }
-
-    public void GetTypeInfo(TGetTypeInfoReq req, org.apache.thrift.async.AsyncMethodCallback<GetTypeInfo_call> resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      GetTypeInfo_call method_call = new GetTypeInfo_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class GetTypeInfo_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TGetTypeInfoReq req;
-      public GetTypeInfo_call(TGetTypeInfoReq req, org.apache.thrift.async.AsyncMethodCallback<GetTypeInfo_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetTypeInfo", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        GetTypeInfo_args args = new GetTypeInfo_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TGetTypeInfoResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_GetTypeInfo();
-      }
-    }
-
-    public void GetCatalogs(TGetCatalogsReq req, org.apache.thrift.async.AsyncMethodCallback<GetCatalogs_call> resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      GetCatalogs_call method_call = new GetCatalogs_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class GetCatalogs_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TGetCatalogsReq req;
-      public GetCatalogs_call(TGetCatalogsReq req, org.apache.thrift.async.AsyncMethodCallback<GetCatalogs_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetCatalogs", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        GetCatalogs_args args = new GetCatalogs_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TGetCatalogsResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_GetCatalogs();
-      }
-    }
-
-    public void GetSchemas(TGetSchemasReq req, org.apache.thrift.async.AsyncMethodCallback<GetSchemas_call> resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      GetSchemas_call method_call = new GetSchemas_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class GetSchemas_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TGetSchemasReq req;
-      public GetSchemas_call(TGetSchemasReq req, org.apache.thrift.async.AsyncMethodCallback<GetSchemas_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetSchemas", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        GetSchemas_args args = new GetSchemas_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TGetSchemasResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_GetSchemas();
-      }
-    }
-
-    public void GetTables(TGetTablesReq req, org.apache.thrift.async.AsyncMethodCallback<GetTables_call> resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      GetTables_call method_call = new GetTables_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class GetTables_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TGetTablesReq req;
-      public GetTables_call(TGetTablesReq req, org.apache.thrift.async.AsyncMethodCallback<GetTables_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetTables", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        GetTables_args args = new GetTables_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TGetTablesResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_GetTables();
-      }
-    }
-
-    public void GetTableTypes(TGetTableTypesReq req, org.apache.thrift.async.AsyncMethodCallback<GetTableTypes_call> resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      GetTableTypes_call method_call = new GetTableTypes_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class GetTableTypes_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TGetTableTypesReq req;
-      public GetTableTypes_call(TGetTableTypesReq req, org.apache.thrift.async.AsyncMethodCallback<GetTableTypes_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetTableTypes", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        GetTableTypes_args args = new GetTableTypes_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TGetTableTypesResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_GetTableTypes();
-      }
-    }
-
-    public void GetColumns(TGetColumnsReq req, org.apache.thrift.async.AsyncMethodCallback<GetColumns_call> resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      GetColumns_call method_call = new GetColumns_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class GetColumns_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TGetColumnsReq req;
-      public GetColumns_call(TGetColumnsReq req, org.apache.thrift.async.AsyncMethodCallback<GetColumns_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetColumns", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        GetColumns_args args = new GetColumns_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TGetColumnsResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_GetColumns();
-      }
-    }
-
-    public void GetFunctions(TGetFunctionsReq req, org.apache.thrift.async.AsyncMethodCallback<GetFunctions_call> resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      GetFunctions_call method_call = new GetFunctions_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class GetFunctions_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TGetFunctionsReq req;
-      public GetFunctions_call(TGetFunctionsReq req, org.apache.thrift.async.AsyncMethodCallback<GetFunctions_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetFunctions", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        GetFunctions_args args = new GetFunctions_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TGetFunctionsResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_GetFunctions();
-      }
-    }
-
-    public void GetOperationStatus(TGetOperationStatusReq req, org.apache.thrift.async.AsyncMethodCallback<GetOperationStatus_call> resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      GetOperationStatus_call method_call = new GetOperationStatus_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class GetOperationStatus_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TGetOperationStatusReq req;
-      public GetOperationStatus_call(TGetOperationStatusReq req, org.apache.thrift.async.AsyncMethodCallback<GetOperationStatus_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetOperationStatus", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        GetOperationStatus_args args = new GetOperationStatus_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TGetOperationStatusResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_GetOperationStatus();
-      }
-    }
-
-    public void CancelOperation(TCancelOperationReq req, org.apache.thrift.async.AsyncMethodCallback<CancelOperation_call> resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      CancelOperation_call method_call = new CancelOperation_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class CancelOperation_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TCancelOperationReq req;
-      public CancelOperation_call(TCancelOperationReq req, org.apache.thrift.async.AsyncMethodCallback<CancelOperation_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("CancelOperation", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        CancelOperation_args args = new CancelOperation_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TCancelOperationResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_CancelOperation();
-      }
-    }
-
-    public void CloseOperation(TCloseOperationReq req, org.apache.thrift.async.AsyncMethodCallback<CloseOperation_call> resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      CloseOperation_call method_call = new CloseOperation_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class CloseOperation_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TCloseOperationReq req;
-      public CloseOperation_call(TCloseOperationReq req, org.apache.thrift.async.AsyncMethodCallback<CloseOperation_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("CloseOperation", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        CloseOperation_args args = new CloseOperation_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TCloseOperationResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_CloseOperation();
-      }
-    }
-
-    public void GetResultSetMetadata(TGetResultSetMetadataReq req, org.apache.thrift.async.AsyncMethodCallback<GetResultSetMetadata_call> resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      GetResultSetMetadata_call method_call = new GetResultSetMetadata_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class GetResultSetMetadata_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TGetResultSetMetadataReq req;
-      public GetResultSetMetadata_call(TGetResultSetMetadataReq req, org.apache.thrift.async.AsyncMethodCallback<GetResultSetMetadata_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetResultSetMetadata", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        GetResultSetMetadata_args args = new GetResultSetMetadata_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TGetResultSetMetadataResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_GetResultSetMetadata();
-      }
-    }
-
-    public void FetchResults(TFetchResultsReq req, org.apache.thrift.async.AsyncMethodCallback<FetchResults_call> resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      FetchResults_call method_call = new FetchResults_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class FetchResults_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TFetchResultsReq req;
-      public FetchResults_call(TFetchResultsReq req, org.apache.thrift.async.AsyncMethodCallback<FetchResults_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("FetchResults", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        FetchResults_args args = new FetchResults_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TFetchResultsResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_FetchResults();
-      }
-    }
-
-    public void GetDelegationToken(TGetDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback<GetDelegationToken_call> resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      GetDelegationToken_call method_call = new GetDelegationToken_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class GetDelegationToken_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TGetDelegationTokenReq req;
-      public GetDelegationToken_call(TGetDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback<GetDelegationToken_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetDelegationToken", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        GetDelegationToken_args args = new GetDelegationToken_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TGetDelegationTokenResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_GetDelegationToken();
-      }
-    }
-
-    public void CancelDelegationToken(TCancelDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback<CancelDelegationToken_call> resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      CancelDelegationToken_call method_call = new CancelDelegationToken_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class CancelDelegationToken_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TCancelDelegationTokenReq req;
-      public CancelDelegationToken_call(TCancelDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback<CancelDelegationToken_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("CancelDelegationToken", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        CancelDelegationToken_args args = new CancelDelegationToken_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TCancelDelegationTokenResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_CancelDelegationToken();
-      }
-    }
-
-    public void RenewDelegationToken(TRenewDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback<RenewDelegationToken_call> resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      RenewDelegationToken_call method_call = new RenewDelegationToken_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class RenewDelegationToken_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TRenewDelegationTokenReq req;
-      public RenewDelegationToken_call(TRenewDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback<RenewDelegationToken_call> resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("RenewDelegationToken", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        RenewDelegationToken_args args = new RenewDelegationToken_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TRenewDelegationTokenResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_RenewDelegationToken();
-      }
-    }
-
-  }
-
-  public static class Processor<I extends Iface> extends org.apache.thrift.TBaseProcessor<I> implements org.apache.thrift.TProcessor {
-    private static final Logger LOGGER = LoggerFactory.getLogger(Processor.class.getName());
-    public Processor(I iface) {
-      super(iface, getProcessMap(new HashMap<String, org.apache.thrift.ProcessFunction<I, ? extends org.apache.thrift.TBase>>()));
-    }
-
-    protected Processor(I iface, Map<String,  org.apache.thrift.ProcessFunction<I, ? extends  org.apache.thrift.TBase>> processMap) {
-      super(iface, getProcessMap(processMap));
-    }
-
-    private static <I extends Iface> Map<String,  org.apache.thrift.ProcessFunction<I, ? extends  org.apache.thrift.TBase>> getProcessMap(Map<String,  org.apache.thrift.ProcessFunction<I, ? extends  org.apache.thrift.TBase>> processMap) {
-      processMap.put("OpenSession", new OpenSession());
-      processMap.put("CloseSession", new CloseSession());
-      processMap.put("GetInfo", new GetInfo());
-      processMap.put("ExecuteStatement", new ExecuteStatement());
-      processMap.put("GetTypeInfo", new GetTypeInfo());
-      processMap.put("GetCatalogs", new GetCatalogs());
-      processMap.put("GetSchemas", new GetSchemas());
-      processMap.put("GetTables", new GetTables());
-      processMap.put("GetTableTypes", new GetTableTypes());
-      processMap.put("GetColumns", new GetColumns());
-      processMap.put("GetFunctions", new GetFunctions());
-      processMap.put("GetOperationStatus", new GetOperationStatus());
-      processMap.put("CancelOperation", new CancelOperation());
-      processMap.put("CloseOperation", new CloseOperation());
-      processMap.put("GetResultSetMetadata", new GetResultSetMetadata());
-      processMap.put("FetchResults", new FetchResults());
-      processMap.put("GetDelegationToken", new GetDelegationToken());
-      processMap.put("CancelDelegationToken", new CancelDelegationToken());
-      processMap.put("RenewDelegationToken", new RenewDelegationToken());
-      return processMap;
-    }
-
-    public static class OpenSession<I extends Iface> extends org.apache.thrift.ProcessFunction<I, OpenSession_args> {
-      public OpenSession() {
-        super("OpenSession");
-      }
-
-      public OpenSession_args getEmptyArgsInstance() {
-        return new OpenSession_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public OpenSession_result getResult(I iface, OpenSession_args args) throws org.apache.thrift.TException {
-        OpenSession_result result = new OpenSession_result();
-        result.success = iface.OpenSession(args.req);
-        return result;
-      }
-    }
-
-    public static class CloseSession<I extends Iface> extends org.apache.thrift.ProcessFunction<I, CloseSession_args> {
-      public CloseSession() {
-        super("CloseSession");
-      }
-
-      public CloseSession_args getEmptyArgsInstance() {
-        return new CloseSession_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public CloseSession_result getResult(I iface, CloseSession_args args) throws org.apache.thrift.TException {
-        CloseSession_result result = new CloseSession_result();
-        result.success = iface.CloseSession(args.req);
-        return result;
-      }
-    }
-
-    public static class GetInfo<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetInfo_args> {
-      public GetInfo() {
-        super("GetInfo");
-      }
-
-      public GetInfo_args getEmptyArgsInstance() {
-        return new GetInfo_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public GetInfo_result getResult(I iface, GetInfo_args args) throws org.apache.thrift.TException {
-        GetInfo_result result = new GetInfo_result();
-        result.success = iface.GetInfo(args.req);
-        return result;
-      }
-    }
-
-    public static class ExecuteStatement<I extends Iface> extends org.apache.thrift.ProcessFunction<I, ExecuteStatement_args> {
-      public ExecuteStatement() {
-        super("ExecuteStatement");
-      }
-
-      public ExecuteStatement_args getEmptyArgsInstance() {
-        return new ExecuteStatement_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public ExecuteStatement_result getResult(I iface, ExecuteStatement_args args) throws org.apache.thrift.TException {
-        ExecuteStatement_result result = new ExecuteStatement_result();
-        result.success = iface.ExecuteStatement(args.req);
-        return result;
-      }
-    }
-
-    public static class GetTypeInfo<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetTypeInfo_args> {
-      public GetTypeInfo() {
-        super("GetTypeInfo");
-      }
-
-      public GetTypeInfo_args getEmptyArgsInstance() {
-        return new GetTypeInfo_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public GetTypeInfo_result getResult(I iface, GetTypeInfo_args args) throws org.apache.thrift.TException {
-        GetTypeInfo_result result = new GetTypeInfo_result();
-        result.success = iface.GetTypeInfo(args.req);
-        return result;
-      }
-    }
-
-    public static class GetCatalogs<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetCatalogs_args> {
-      public GetCatalogs() {
-        super("GetCatalogs");
-      }
-
-      public GetCatalogs_args getEmptyArgsInstance() {
-        return new GetCatalogs_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public GetCatalogs_result getResult(I iface, GetCatalogs_args args) throws org.apache.thrift.TException {
-        GetCatalogs_result result = new GetCatalogs_result();
-        result.success = iface.GetCatalogs(args.req);
-        return result;
-      }
-    }
-
-    public static class GetSchemas<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetSchemas_args> {
-      public GetSchemas() {
-        super("GetSchemas");
-      }
-
-      public GetSchemas_args getEmptyArgsInstance() {
-        return new GetSchemas_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public GetSchemas_result getResult(I iface, GetSchemas_args args) throws org.apache.thrift.TException {
-        GetSchemas_result result = new GetSchemas_result();
-        result.success = iface.GetSchemas(args.req);
-        return result;
-      }
-    }
-
-    public static class GetTables<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetTables_args> {
-      public GetTables() {
-        super("GetTables");
-      }
-
-      public GetTables_args getEmptyArgsInstance() {
-        return new GetTables_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public GetTables_result getResult(I iface, GetTables_args args) throws org.apache.thrift.TException {
-        GetTables_result result = new GetTables_result();
-        result.success = iface.GetTables(args.req);
-        return result;
-      }
-    }
-
-    public static class GetTableTypes<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetTableTypes_args> {
-      public GetTableTypes() {
-        super("GetTableTypes");
-      }
-
-      public GetTableTypes_args getEmptyArgsInstance() {
-        return new GetTableTypes_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public GetTableTypes_result getResult(I iface, GetTableTypes_args args) throws org.apache.thrift.TException {
-        GetTableTypes_result result = new GetTableTypes_result();
-        result.success = iface.GetTableTypes(args.req);
-        return result;
-      }
-    }
-
-    public static class GetColumns<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetColumns_args> {
-      public GetColumns() {
-        super("GetColumns");
-      }
-
-      public GetColumns_args getEmptyArgsInstance() {
-        return new GetColumns_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public GetColumns_result getResult(I iface, GetColumns_args args) throws org.apache.thrift.TException {
-        GetColumns_result result = new GetColumns_result();
-        result.success = iface.GetColumns(args.req);
-        return result;
-      }
-    }
-
-    public static class GetFunctions<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetFunctions_args> {
-      public GetFunctions() {
-        super("GetFunctions");
-      }
-
-      public GetFunctions_args getEmptyArgsInstance() {
-        return new GetFunctions_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public GetFunctions_result getResult(I iface, GetFunctions_args args) throws org.apache.thrift.TException {
-        GetFunctions_result result = new GetFunctions_result();
-        result.success = iface.GetFunctions(args.req);
-        return result;
-      }
-    }
-
-    public static class GetOperationStatus<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetOperationStatus_args> {
-      public GetOperationStatus() {
-        super("GetOperationStatus");
-      }
-
-      public GetOperationStatus_args getEmptyArgsInstance() {
-        return new GetOperationStatus_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public GetOperationStatus_result getResult(I iface, GetOperationStatus_args args) throws org.apache.thrift.TException {
-        GetOperationStatus_result result = new GetOperationStatus_result();
-        result.success = iface.GetOperationStatus(args.req);
-        return result;
-      }
-    }
-
-    public static class CancelOperation<I extends Iface> extends org.apache.thrift.ProcessFunction<I, CancelOperation_args> {
-      public CancelOperation() {
-        super("CancelOperation");
-      }
-
-      public CancelOperation_args getEmptyArgsInstance() {
-        return new CancelOperation_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public CancelOperation_result getResult(I iface, CancelOperation_args args) throws org.apache.thrift.TException {
-        CancelOperation_result result = new CancelOperation_result();
-        result.success = iface.CancelOperation(args.req);
-        return result;
-      }
-    }
-
-    public static class CloseOperation<I extends Iface> extends org.apache.thrift.ProcessFunction<I, CloseOperation_args> {
-      public CloseOperation() {
-        super("CloseOperation");
-      }
-
-      public CloseOperation_args getEmptyArgsInstance() {
-        return new CloseOperation_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public CloseOperation_result getResult(I iface, CloseOperation_args args) throws org.apache.thrift.TException {
-        CloseOperation_result result = new CloseOperation_result();
-        result.success = iface.CloseOperation(args.req);
-        return result;
-      }
-    }
-
-    public static class GetResultSetMetadata<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetResultSetMetadata_args> {
-      public GetResultSetMetadata() {
-        super("GetResultSetMetadata");
-      }
-
-      public GetResultSetMetadata_args getEmptyArgsInstance() {
-        return new GetResultSetMetadata_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public GetResultSetMetadata_result getResult(I iface, GetResultSetMetadata_args args) throws org.apache.thrift.TException {
-        GetResultSetMetadata_result result = new GetResultSetMetadata_result();
-        result.success = iface.GetResultSetMetadata(args.req);
-        return result;
-      }
-    }
-
-    public static class FetchResults<I extends Iface> extends org.apache.thrift.ProcessFunction<I, FetchResults_args> {
-      public FetchResults() {
-        super("FetchResults");
-      }
-
-      public FetchResults_args getEmptyArgsInstance() {
-        return new FetchResults_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public FetchResults_result getResult(I iface, FetchResults_args args) throws org.apache.thrift.TException {
-        FetchResults_result result = new FetchResults_result();
-        result.success = iface.FetchResults(args.req);
-        return result;
-      }
-    }
-
-    public static class GetDelegationToken<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetDelegationToken_args> {
-      public GetDelegationToken() {
-        super("GetDelegationToken");
-      }
-
-      public GetDelegationToken_args getEmptyArgsInstance() {
-        return new GetDelegationToken_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public GetDelegationToken_result getResult(I iface, GetDelegationToken_args args) throws org.apache.thrift.TException {
-        GetDelegationToken_result result = new GetDelegationToken_result();
-        result.success = iface.GetDelegationToken(args.req);
-        return result;
-      }
-    }
-
-    public static class CancelDelegationToken<I extends Iface> extends org.apache.thrift.ProcessFunction<I, CancelDelegationToken_args> {
-      public CancelDelegationToken() {
-        super("CancelDelegationToken");
-      }
-
-      public CancelDelegationToken_args getEmptyArgsInstance() {
-        return new CancelDelegationToken_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public CancelDelegationToken_result getResult(I iface, CancelDelegationToken_args args) throws org.apache.thrift.TException {
-        CancelDelegationToken_result result = new CancelDelegationToken_result();
-        result.success = iface.CancelDelegationToken(args.req);
-        return result;
-      }
-    }
-
-    public static class RenewDelegationToken<I extends Iface> extends org.apache.thrift.ProcessFunction<I, RenewDelegationToken_args> {
-      public RenewDelegationToken() {
-        super("RenewDelegationToken");
-      }
-
-      public RenewDelegationToken_args getEmptyArgsInstance() {
-        return new RenewDelegationToken_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public RenewDelegationToken_result getResult(I iface, RenewDelegationToken_args args) throws org.apache.thrift.TException {
-        RenewDelegationToken_result result = new RenewDelegationToken_result();
-        result.success = iface.RenewDelegationToken(args.req);
-        return result;
-      }
-    }
-
-  }
-
-  public static class OpenSession_args implements org.apache.thrift.TBase<OpenSession_args, OpenSession_args._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("OpenSession_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new OpenSession_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new OpenSession_argsTupleSchemeFactory());
-    }
-
-    private TOpenSessionReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOpenSessionReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(OpenSession_args.class, metaDataMap);
-    }
-
-    public OpenSession_args() {
-    }
-
-    public OpenSession_args(
-      TOpenSessionReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public OpenSession_args(OpenSession_args other) {
-      if (other.isSetReq()) {
-        this.req = new TOpenSessionReq(other.req);
-      }
-    }
-
-    public OpenSession_args deepCopy() {
-      return new OpenSession_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TOpenSessionReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TOpenSessionReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TOpenSessionReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof OpenSession_args)
-        return this.equals((OpenSession_args)that);
-      return false;
-    }
-
-    public boolean equals(OpenSession_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_req = true && (isSetReq());
-      builder.append(present_req);
-      if (present_req)
-        builder.append(req);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(OpenSession_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      OpenSession_args typedOther = (OpenSession_args)other;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("OpenSession_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class OpenSession_argsStandardSchemeFactory implements SchemeFactory {
-      public OpenSession_argsStandardScheme getScheme() {
-        return new OpenSession_argsStandardScheme();
-      }
-    }
-
-    private static class OpenSession_argsStandardScheme extends StandardScheme<OpenSession_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, OpenSession_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TOpenSessionReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, OpenSession_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class OpenSession_argsTupleSchemeFactory implements SchemeFactory {
-      public OpenSession_argsTupleScheme getScheme() {
-        return new OpenSession_argsTupleScheme();
-      }
-    }
-
-    private static class OpenSession_argsTupleScheme extends TupleScheme<OpenSession_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, OpenSession_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, OpenSession_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TOpenSessionReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class OpenSession_result implements org.apache.thrift.TBase<OpenSession_result, OpenSession_result._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("OpenSession_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new OpenSession_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new OpenSession_resultTupleSchemeFactory());
-    }
-
-    private TOpenSessionResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOpenSessionResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(OpenSession_result.class, metaDataMap);
-    }
-
-    public OpenSession_result() {
-    }
-
-    public OpenSession_result(
-      TOpenSessionResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public OpenSession_result(OpenSession_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TOpenSessionResp(other.success);
-      }
-    }
-
-    public OpenSession_result deepCopy() {
-      return new OpenSession_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TOpenSessionResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TOpenSessionResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TOpenSessionResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof OpenSession_result)
-        return this.equals((OpenSession_result)that);
-      return false;
-    }
-
-    public boolean equals(OpenSession_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_success = true && (isSetSuccess());
-      builder.append(present_success);
-      if (present_success)
-        builder.append(success);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(OpenSession_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      OpenSession_result typedOther = (OpenSession_result)other;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("OpenSession_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class OpenSession_resultStandardSchemeFactory implements SchemeFactory {
-      public OpenSession_resultStandardScheme getScheme() {
-        return new OpenSession_resultStandardScheme();
-      }
-    }
-
-    private static class OpenSession_resultStandardScheme extends StandardScheme<OpenSession_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, OpenSession_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TOpenSessionResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, OpenSession_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class OpenSession_resultTupleSchemeFactory implements SchemeFactory {
-      public OpenSession_resultTupleScheme getScheme() {
-        return new OpenSession_resultTupleScheme();
-      }
-    }
-
-    private static class OpenSession_resultTupleScheme extends TupleScheme<OpenSession_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, OpenSession_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, OpenSession_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TOpenSessionResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class CloseSession_args implements org.apache.thrift.TBase<CloseSession_args, CloseSession_args._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CloseSession_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new CloseSession_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new CloseSession_argsTupleSchemeFactory());
-    }
-
-    private TCloseSessionReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCloseSessionReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CloseSession_args.class, metaDataMap);
-    }
-
-    public CloseSession_args() {
-    }
-
-    public CloseSession_args(
-      TCloseSessionReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public CloseSession_args(CloseSession_args other) {
-      if (other.isSetReq()) {
-        this.req = new TCloseSessionReq(other.req);
-      }
-    }
-
-    public CloseSession_args deepCopy() {
-      return new CloseSession_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TCloseSessionReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TCloseSessionReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TCloseSessionReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof CloseSession_args)
-        return this.equals((CloseSession_args)that);
-      return false;
-    }
-
-    public boolean equals(CloseSession_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_req = true && (isSetReq());
-      builder.append(present_req);
-      if (present_req)
-        builder.append(req);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(CloseSession_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      CloseSession_args typedOther = (CloseSession_args)other;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("CloseSession_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class CloseSession_argsStandardSchemeFactory implements SchemeFactory {
-      public CloseSession_argsStandardScheme getScheme() {
-        return new CloseSession_argsStandardScheme();
-      }
-    }
-
-    private static class CloseSession_argsStandardScheme extends StandardScheme<CloseSession_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, CloseSession_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TCloseSessionReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, CloseSession_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class CloseSession_argsTupleSchemeFactory implements SchemeFactory {
-      public CloseSession_argsTupleScheme getScheme() {
-        return new CloseSession_argsTupleScheme();
-      }
-    }
-
-    private static class CloseSession_argsTupleScheme extends TupleScheme<CloseSession_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, CloseSession_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, CloseSession_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TCloseSessionReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class CloseSession_result implements org.apache.thrift.TBase<CloseSession_result, CloseSession_result._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CloseSession_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new CloseSession_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new CloseSession_resultTupleSchemeFactory());
-    }
-
-    private TCloseSessionResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCloseSessionResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CloseSession_result.class, metaDataMap);
-    }
-
-    public CloseSession_result() {
-    }
-
-    public CloseSession_result(
-      TCloseSessionResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public CloseSession_result(CloseSession_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TCloseSessionResp(other.success);
-      }
-    }
-
-    public CloseSession_result deepCopy() {
-      return new CloseSession_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TCloseSessionResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TCloseSessionResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TCloseSessionResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof CloseSession_result)
-        return this.equals((CloseSession_result)that);
-      return false;
-    }
-
-    public boolean equals(CloseSession_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_success = true && (isSetSuccess());
-      builder.append(present_success);
-      if (present_success)
-        builder.append(success);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(CloseSession_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      CloseSession_result typedOther = (CloseSession_result)other;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("CloseSession_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class CloseSession_resultStandardSchemeFactory implements SchemeFactory {
-      public CloseSession_resultStandardScheme getScheme() {
-        return new CloseSession_resultStandardScheme();
-      }
-    }
-
-    private static class CloseSession_resultStandardScheme extends StandardScheme<CloseSession_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, CloseSession_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TCloseSessionResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, CloseSession_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class CloseSession_resultTupleSchemeFactory implements SchemeFactory {
-      public CloseSession_resultTupleScheme getScheme() {
-        return new CloseSession_resultTupleScheme();
-      }
-    }
-
-    private static class CloseSession_resultTupleScheme extends TupleScheme<CloseSession_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, CloseSession_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, CloseSession_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TCloseSessionResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetInfo_args implements org.apache.thrift.TBase<GetInfo_args, GetInfo_args._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetInfo_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetInfo_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetInfo_argsTupleSchemeFactory());
-    }
-
-    private TGetInfoReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetInfoReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetInfo_args.class, metaDataMap);
-    }
-
-    public GetInfo_args() {
-    }
-
-    public GetInfo_args(
-      TGetInfoReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetInfo_args(GetInfo_args other) {
-      if (other.isSetReq()) {
-        this.req = new TGetInfoReq(other.req);
-      }
-    }
-
-    public GetInfo_args deepCopy() {
-      return new GetInfo_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TGetInfoReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TGetInfoReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TGetInfoReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetInfo_args)
-        return this.equals((GetInfo_args)that);
-      return false;
-    }
-
-    public boolean equals(GetInfo_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_req = true && (isSetReq());
-      builder.append(present_req);
-      if (present_req)
-        builder.append(req);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(GetInfo_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      GetInfo_args typedOther = (GetInfo_args)other;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetInfo_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetInfo_argsStandardSchemeFactory implements SchemeFactory {
-      public GetInfo_argsStandardScheme getScheme() {
-        return new GetInfo_argsStandardScheme();
-      }
-    }
-
-    private static class GetInfo_argsStandardScheme extends StandardScheme<GetInfo_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetInfo_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TGetInfoReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetInfo_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetInfo_argsTupleSchemeFactory implements SchemeFactory {
-      public GetInfo_argsTupleScheme getScheme() {
-        return new GetInfo_argsTupleScheme();
-      }
-    }
-
-    private static class GetInfo_argsTupleScheme extends TupleScheme<GetInfo_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetInfo_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetInfo_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TGetInfoReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetInfo_result implements org.apache.thrift.TBase<GetInfo_result, GetInfo_result._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetInfo_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetInfo_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetInfo_resultTupleSchemeFactory());
-    }
-
-    private TGetInfoResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetInfoResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetInfo_result.class, metaDataMap);
-    }
-
-    public GetInfo_result() {
-    }
-
-    public GetInfo_result(
-      TGetInfoResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetInfo_result(GetInfo_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TGetInfoResp(other.success);
-      }
-    }
-
-    public GetInfo_result deepCopy() {
-      return new GetInfo_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TGetInfoResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TGetInfoResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TGetInfoResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetInfo_result)
-        return this.equals((GetInfo_result)that);
-      return false;
-    }
-
-    public boolean equals(GetInfo_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_success = true && (isSetSuccess());
-      builder.append(present_success);
-      if (present_success)
-        builder.append(success);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(GetInfo_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      GetInfo_result typedOther = (GetInfo_result)other;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetInfo_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetInfo_resultStandardSchemeFactory implements SchemeFactory {
-      public GetInfo_resultStandardScheme getScheme() {
-        return new GetInfo_resultStandardScheme();
-      }
-    }
-
-    private static class GetInfo_resultStandardScheme extends StandardScheme<GetInfo_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetInfo_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TGetInfoResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetInfo_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetInfo_resultTupleSchemeFactory implements SchemeFactory {
-      public GetInfo_resultTupleScheme getScheme() {
-        return new GetInfo_resultTupleScheme();
-      }
-    }
-
-    private static class GetInfo_resultTupleScheme extends TupleScheme<GetInfo_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetInfo_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetInfo_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TGetInfoResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class ExecuteStatement_args implements org.apache.thrift.TBase<ExecuteStatement_args, ExecuteStatement_args._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("ExecuteStatement_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new ExecuteStatement_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new ExecuteStatement_argsTupleSchemeFactory());
-    }
-
-    private TExecuteStatementReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TExecuteStatementReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(ExecuteStatement_args.class, metaDataMap);
-    }
-
-    public ExecuteStatement_args() {
-    }
-
-    public ExecuteStatement_args(
-      TExecuteStatementReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public ExecuteStatement_args(ExecuteStatement_args other) {
-      if (other.isSetReq()) {
-        this.req = new TExecuteStatementReq(other.req);
-      }
-    }
-
-    public ExecuteStatement_args deepCopy() {
-      return new ExecuteStatement_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TExecuteStatementReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TExecuteStatementReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TExecuteStatementReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof ExecuteStatement_args)
-        return this.equals((ExecuteStatement_args)that);
-      return false;
-    }
-
-    public boolean equals(ExecuteStatement_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_req = true && (isSetReq());
-      builder.append(present_req);
-      if (present_req)
-        builder.append(req);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(ExecuteStatement_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      ExecuteStatement_args typedOther = (ExecuteStatement_args)other;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("ExecuteStatement_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class ExecuteStatement_argsStandardSchemeFactory implements SchemeFactory {
-      public ExecuteStatement_argsStandardScheme getScheme() {
-        return new ExecuteStatement_argsStandardScheme();
-      }
-    }
-
-    private static class ExecuteStatement_argsStandardScheme extends StandardScheme<ExecuteStatement_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, ExecuteStatement_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TExecuteStatementReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, ExecuteStatement_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class ExecuteStatement_argsTupleSchemeFactory implements SchemeFactory {
-      public ExecuteStatement_argsTupleScheme getScheme() {
-        return new ExecuteStatement_argsTupleScheme();
-      }
-    }
-
-    private static class ExecuteStatement_argsTupleScheme extends TupleScheme<ExecuteStatement_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, ExecuteStatement_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, ExecuteStatement_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TExecuteStatementReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class ExecuteStatement_result implements org.apache.thrift.TBase<ExecuteStatement_result, ExecuteStatement_result._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("ExecuteStatement_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new ExecuteStatement_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new ExecuteStatement_resultTupleSchemeFactory());
-    }
-
-    private TExecuteStatementResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TExecuteStatementResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(ExecuteStatement_result.class, metaDataMap);
-    }
-
-    public ExecuteStatement_result() {
-    }
-
-    public ExecuteStatement_result(
-      TExecuteStatementResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public ExecuteStatement_result(ExecuteStatement_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TExecuteStatementResp(other.success);
-      }
-    }
-
-    public ExecuteStatement_result deepCopy() {
-      return new ExecuteStatement_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TExecuteStatementResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TExecuteStatementResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TExecuteStatementResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof ExecuteStatement_result)
-        return this.equals((ExecuteStatement_result)that);
-      return false;
-    }
-
-    public boolean equals(ExecuteStatement_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_success = true && (isSetSuccess());
-      builder.append(present_success);
-      if (present_success)
-        builder.append(success);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(ExecuteStatement_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      ExecuteStatement_result typedOther = (ExecuteStatement_result)other;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("ExecuteStatement_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class ExecuteStatement_resultStandardSchemeFactory implements SchemeFactory {
-      public ExecuteStatement_resultStandardScheme getScheme() {
-        return new ExecuteStatement_resultStandardScheme();
-      }
-    }
-
-    private static class ExecuteStatement_resultStandardScheme extends StandardScheme<ExecuteStatement_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, ExecuteStatement_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TExecuteStatementResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, ExecuteStatement_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class ExecuteStatement_resultTupleSchemeFactory implements SchemeFactory {
-      public ExecuteStatement_resultTupleScheme getScheme() {
-        return new ExecuteStatement_resultTupleScheme();
-      }
-    }
-
-    private static class ExecuteStatement_resultTupleScheme extends TupleScheme<ExecuteStatement_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, ExecuteStatement_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, ExecuteStatement_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TExecuteStatementResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetTypeInfo_args implements org.apache.thrift.TBase<GetTypeInfo_args, GetTypeInfo_args._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTypeInfo_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetTypeInfo_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetTypeInfo_argsTupleSchemeFactory());
-    }
-
-    private TGetTypeInfoReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTypeInfoReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTypeInfo_args.class, metaDataMap);
-    }
-
-    public GetTypeInfo_args() {
-    }
-
-    public GetTypeInfo_args(
-      TGetTypeInfoReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetTypeInfo_args(GetTypeInfo_args other) {
-      if (other.isSetReq()) {
-        this.req = new TGetTypeInfoReq(other.req);
-      }
-    }
-
-    public GetTypeInfo_args deepCopy() {
-      return new GetTypeInfo_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TGetTypeInfoReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TGetTypeInfoReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TGetTypeInfoReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetTypeInfo_args)
-        return this.equals((GetTypeInfo_args)that);
-      return false;
-    }
-
-    public boolean equals(GetTypeInfo_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_req = true && (isSetReq());
-      builder.append(present_req);
-      if (present_req)
-        builder.append(req);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(GetTypeInfo_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      GetTypeInfo_args typedOther = (GetTypeInfo_args)other;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetTypeInfo_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetTypeInfo_argsStandardSchemeFactory implements SchemeFactory {
-      public GetTypeInfo_argsStandardScheme getScheme() {
-        return new GetTypeInfo_argsStandardScheme();
-      }
-    }
-
-    private static class GetTypeInfo_argsStandardScheme extends StandardScheme<GetTypeInfo_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetTypeInfo_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TGetTypeInfoReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetTypeInfo_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetTypeInfo_argsTupleSchemeFactory implements SchemeFactory {
-      public GetTypeInfo_argsTupleScheme getScheme() {
-        return new GetTypeInfo_argsTupleScheme();
-      }
-    }
-
-    private static class GetTypeInfo_argsTupleScheme extends TupleScheme<GetTypeInfo_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetTypeInfo_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetTypeInfo_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TGetTypeInfoReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetTypeInfo_result implements org.apache.thrift.TBase<GetTypeInfo_result, GetTypeInfo_result._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTypeInfo_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetTypeInfo_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetTypeInfo_resultTupleSchemeFactory());
-    }
-
-    private TGetTypeInfoResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTypeInfoResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTypeInfo_result.class, metaDataMap);
-    }
-
-    public GetTypeInfo_result() {
-    }
-
-    public GetTypeInfo_result(
-      TGetTypeInfoResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetTypeInfo_result(GetTypeInfo_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TGetTypeInfoResp(other.success);
-      }
-    }
-
-    public GetTypeInfo_result deepCopy() {
-      return new GetTypeInfo_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TGetTypeInfoResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TGetTypeInfoResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TGetTypeInfoResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetTypeInfo_result)
-        return this.equals((GetTypeInfo_result)that);
-      return false;
-    }
-
-    public boolean equals(GetTypeInfo_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_success = true && (isSetSuccess());
-      builder.append(present_success);
-      if (present_success)
-        builder.append(success);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(GetTypeInfo_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      GetTypeInfo_result typedOther = (GetTypeInfo_result)other;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetTypeInfo_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetTypeInfo_resultStandardSchemeFactory implements SchemeFactory {
-      public GetTypeInfo_resultStandardScheme getScheme() {
-        return new GetTypeInfo_resultStandardScheme();
-      }
-    }
-
-    private static class GetTypeInfo_resultStandardScheme extends StandardScheme<GetTypeInfo_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetTypeInfo_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TGetTypeInfoResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetTypeInfo_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetTypeInfo_resultTupleSchemeFactory implements SchemeFactory {
-      public GetTypeInfo_resultTupleScheme getScheme() {
-        return new GetTypeInfo_resultTupleScheme();
-      }
-    }
-
-    private static class GetTypeInfo_resultTupleScheme extends TupleScheme<GetTypeInfo_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetTypeInfo_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetTypeInfo_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TGetTypeInfoResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetCatalogs_args implements org.apache.thrift.TBase<GetCatalogs_args, GetCatalogs_args._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetCatalogs_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetCatalogs_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetCatalogs_argsTupleSchemeFactory());
-    }
-
-    private TGetCatalogsReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetCatalogsReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetCatalogs_args.class, metaDataMap);
-    }
-
-    public GetCatalogs_args() {
-    }
-
-    public GetCatalogs_args(
-      TGetCatalogsReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetCatalogs_args(GetCatalogs_args other) {
-      if (other.isSetReq()) {
-        this.req = new TGetCatalogsReq(other.req);
-      }
-    }
-
-    public GetCatalogs_args deepCopy() {
-      return new GetCatalogs_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TGetCatalogsReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TGetCatalogsReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TGetCatalogsReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetCatalogs_args)
-        return this.equals((GetCatalogs_args)that);
-      return false;
-    }
-
-    public boolean equals(GetCatalogs_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_req = true && (isSetReq());
-      builder.append(present_req);
-      if (present_req)
-        builder.append(req);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(GetCatalogs_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      GetCatalogs_args typedOther = (GetCatalogs_args)other;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetCatalogs_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetCatalogs_argsStandardSchemeFactory implements SchemeFactory {
-      public GetCatalogs_argsStandardScheme getScheme() {
-        return new GetCatalogs_argsStandardScheme();
-      }
-    }
-
-    private static class GetCatalogs_argsStandardScheme extends StandardScheme<GetCatalogs_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetCatalogs_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TGetCatalogsReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetCatalogs_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetCatalogs_argsTupleSchemeFactory implements SchemeFactory {
-      public GetCatalogs_argsTupleScheme getScheme() {
-        return new GetCatalogs_argsTupleScheme();
-      }
-    }
-
-    private static class GetCatalogs_argsTupleScheme extends TupleScheme<GetCatalogs_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetCatalogs_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetCatalogs_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TGetCatalogsReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetCatalogs_result implements org.apache.thrift.TBase<GetCatalogs_result, GetCatalogs_result._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetCatalogs_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetCatalogs_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetCatalogs_resultTupleSchemeFactory());
-    }
-
-    private TGetCatalogsResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetCatalogsResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetCatalogs_result.class, metaDataMap);
-    }
-
-    public GetCatalogs_result() {
-    }
-
-    public GetCatalogs_result(
-      TGetCatalogsResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetCatalogs_result(GetCatalogs_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TGetCatalogsResp(other.success);
-      }
-    }
-
-    public GetCatalogs_result deepCopy() {
-      return new GetCatalogs_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TGetCatalogsResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TGetCatalogsResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TGetCatalogsResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetCatalogs_result)
-        return this.equals((GetCatalogs_result)that);
-      return false;
-    }
-
-    public boolean equals(GetCatalogs_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_success = true && (isSetSuccess());
-      builder.append(present_success);
-      if (present_success)
-        builder.append(success);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(GetCatalogs_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      GetCatalogs_result typedOther = (GetCatalogs_result)other;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetCatalogs_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetCatalogs_resultStandardSchemeFactory implements SchemeFactory {
-      public GetCatalogs_resultStandardScheme getScheme() {
-        return new GetCatalogs_resultStandardScheme();
-      }
-    }
-
-    private static class GetCatalogs_resultStandardScheme extends StandardScheme<GetCatalogs_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetCatalogs_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TGetCatalogsResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetCatalogs_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetCatalogs_resultTupleSchemeFactory implements SchemeFactory {
-      public GetCatalogs_resultTupleScheme getScheme() {
-        return new GetCatalogs_resultTupleScheme();
-      }
-    }
-
-    private static class GetCatalogs_resultTupleScheme extends TupleScheme<GetCatalogs_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetCatalogs_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetCatalogs_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TGetCatalogsResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetSchemas_args implements org.apache.thrift.TBase<GetSchemas_args, GetSchemas_args._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetSchemas_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetSchemas_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetSchemas_argsTupleSchemeFactory());
-    }
-
-    private TGetSchemasReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetSchemasReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetSchemas_args.class, metaDataMap);
-    }
-
-    public GetSchemas_args() {
-    }
-
-    public GetSchemas_args(
-      TGetSchemasReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetSchemas_args(GetSchemas_args other) {
-      if (other.isSetReq()) {
-        this.req = new TGetSchemasReq(other.req);
-      }
-    }
-
-    public GetSchemas_args deepCopy() {
-      return new GetSchemas_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TGetSchemasReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TGetSchemasReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TGetSchemasReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetSchemas_args)
-        return this.equals((GetSchemas_args)that);
-      return false;
-    }
-
-    public boolean equals(GetSchemas_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_req = true && (isSetReq());
-      builder.append(present_req);
-      if (present_req)
-        builder.append(req);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(GetSchemas_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      GetSchemas_args typedOther = (GetSchemas_args)other;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetSchemas_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetSchemas_argsStandardSchemeFactory implements SchemeFactory {
-      public GetSchemas_argsStandardScheme getScheme() {
-        return new GetSchemas_argsStandardScheme();
-      }
-    }
-
-    private static class GetSchemas_argsStandardScheme extends StandardScheme<GetSchemas_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetSchemas_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TGetSchemasReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetSchemas_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetSchemas_argsTupleSchemeFactory implements SchemeFactory {
-      public GetSchemas_argsTupleScheme getScheme() {
-        return new GetSchemas_argsTupleScheme();
-      }
-    }
-
-    private static class GetSchemas_argsTupleScheme extends TupleScheme<GetSchemas_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetSchemas_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetSchemas_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TGetSchemasReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetSchemas_result implements org.apache.thrift.TBase<GetSchemas_result, GetSchemas_result._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetSchemas_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetSchemas_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetSchemas_resultTupleSchemeFactory());
-    }
-
-    private TGetSchemasResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetSchemasResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetSchemas_result.class, metaDataMap);
-    }
-
-    public GetSchemas_result() {
-    }
-
-    public GetSchemas_result(
-      TGetSchemasResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetSchemas_result(GetSchemas_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TGetSchemasResp(other.success);
-      }
-    }
-
-    public GetSchemas_result deepCopy() {
-      return new GetSchemas_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TGetSchemasResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TGetSchemasResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TGetSchemasResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetSchemas_result)
-        return this.equals((GetSchemas_result)that);
-      return false;
-    }
-
-    public boolean equals(GetSchemas_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_success = true && (isSetSuccess());
-      builder.append(present_success);
-      if (present_success)
-        builder.append(success);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(GetSchemas_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      GetSchemas_result typedOther = (GetSchemas_result)other;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetSchemas_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetSchemas_resultStandardSchemeFactory implements SchemeFactory {
-      public GetSchemas_resultStandardScheme getScheme() {
-        return new GetSchemas_resultStandardScheme();
-      }
-    }
-
-    private static class GetSchemas_resultStandardScheme extends StandardScheme<GetSchemas_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetSchemas_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TGetSchemasResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetSchemas_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetSchemas_resultTupleSchemeFactory implements SchemeFactory {
-      public GetSchemas_resultTupleScheme getScheme() {
-        return new GetSchemas_resultTupleScheme();
-      }
-    }
-
-    private static class GetSchemas_resultTupleScheme extends TupleScheme<GetSchemas_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetSchemas_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetSchemas_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TGetSchemasResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetTables_args implements org.apache.thrift.TBase<GetTables_args, GetTables_args._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTables_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetTables_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetTables_argsTupleSchemeFactory());
-    }
-
-    private TGetTablesReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTablesReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTables_args.class, metaDataMap);
-    }
-
-    public GetTables_args() {
-    }
-
-    public GetTables_args(
-      TGetTablesReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetTables_args(GetTables_args other) {
-      if (other.isSetReq()) {
-        this.req = new TGetTablesReq(other.req);
-      }
-    }
-
-    public GetTables_args deepCopy() {
-      return new GetTables_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TGetTablesReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TGetTablesReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TGetTablesReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetTables_args)
-        return this.equals((GetTables_args)that);
-      return false;
-    }
-
-    public boolean equals(GetTables_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_req = true && (isSetReq());
-      builder.append(present_req);
-      if (present_req)
-        builder.append(req);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(GetTables_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      GetTables_args typedOther = (GetTables_args)other;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetTables_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetTables_argsStandardSchemeFactory implements SchemeFactory {
-      public GetTables_argsStandardScheme getScheme() {
-        return new GetTables_argsStandardScheme();
-      }
-    }
-
-    private static class GetTables_argsStandardScheme extends StandardScheme<GetTables_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetTables_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TGetTablesReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetTables_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetTables_argsTupleSchemeFactory implements SchemeFactory {
-      public GetTables_argsTupleScheme getScheme() {
-        return new GetTables_argsTupleScheme();
-      }
-    }
-
-    private static class GetTables_argsTupleScheme extends TupleScheme<GetTables_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetTables_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetTables_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TGetTablesReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetTables_result implements org.apache.thrift.TBase<GetTables_result, GetTables_result._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTables_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetTables_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetTables_resultTupleSchemeFactory());
-    }
-
-    private TGetTablesResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTablesResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTables_result.class, metaDataMap);
-    }
-
-    public GetTables_result() {
-    }
-
-    public GetTables_result(
-      TGetTablesResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetTables_result(GetTables_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TGetTablesResp(other.success);
-      }
-    }
-
-    public GetTables_result deepCopy() {
-      return new GetTables_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TGetTablesResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TGetTablesResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TGetTablesResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetTables_result)
-        return this.equals((GetTables_result)that);
-      return false;
-    }
-
-    public boolean equals(GetTables_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_success = true && (isSetSuccess());
-      builder.append(present_success);
-      if (present_success)
-        builder.append(success);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(GetTables_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      GetTables_result typedOther = (GetTables_result)other;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetTables_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetTables_resultStandardSchemeFactory implements SchemeFactory {
-      public GetTables_resultStandardScheme getScheme() {
-        return new GetTables_resultStandardScheme();
-      }
-    }
-
-    private static class GetTables_resultStandardScheme extends StandardScheme<GetTables_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetTables_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TGetTablesResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetTables_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetTables_resultTupleSchemeFactory implements SchemeFactory {
-      public GetTables_resultTupleScheme getScheme() {
-        return new GetTables_resultTupleScheme();
-      }
-    }
-
-    private static class GetTables_resultTupleScheme extends TupleScheme<GetTables_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetTables_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetTables_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TGetTablesResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetTableTypes_args implements org.apache.thrift.TBase<GetTableTypes_args, GetTableTypes_args._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTableTypes_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetTableTypes_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetTableTypes_argsTupleSchemeFactory());
-    }
-
-    private TGetTableTypesReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTableTypesReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTableTypes_args.class, metaDataMap);
-    }
-
-    public GetTableTypes_args() {
-    }
-
-    public GetTableTypes_args(
-      TGetTableTypesReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetTableTypes_args(GetTableTypes_args other) {
-      if (other.isSetReq()) {
-        this.req = new TGetTableTypesReq(other.req);
-      }
-    }
-
-    public GetTableTypes_args deepCopy() {
-      return new GetTableTypes_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TGetTableTypesReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TGetTableTypesReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TGetTableTypesReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetTableTypes_args)
-        return this.equals((GetTableTypes_args)that);
-      return false;
-    }
-
-    public boolean equals(GetTableTypes_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_req = true && (isSetReq());
-      builder.append(present_req);
-      if (present_req)
-        builder.append(req);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(GetTableTypes_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      GetTableTypes_args typedOther = (GetTableTypes_args)other;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetTableTypes_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetTableTypes_argsStandardSchemeFactory implements SchemeFactory {
-      public GetTableTypes_argsStandardScheme getScheme() {
-        return new GetTableTypes_argsStandardScheme();
-      }
-    }
-
-    private static class GetTableTypes_argsStandardScheme extends StandardScheme<GetTableTypes_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetTableTypes_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TGetTableTypesReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetTableTypes_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetTableTypes_argsTupleSchemeFactory implements SchemeFactory {
-      public GetTableTypes_argsTupleScheme getScheme() {
-        return new GetTableTypes_argsTupleScheme();
-      }
-    }
-
-    private static class GetTableTypes_argsTupleScheme extends TupleScheme<GetTableTypes_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetTableTypes_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetTableTypes_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TGetTableTypesReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetTableTypes_result implements org.apache.thrift.TBase<GetTableTypes_result, GetTableTypes_result._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTableTypes_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetTableTypes_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetTableTypes_resultTupleSchemeFactory());
-    }
-
-    private TGetTableTypesResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTableTypesResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTableTypes_result.class, metaDataMap);
-    }
-
-    public GetTableTypes_result() {
-    }
-
-    public GetTableTypes_result(
-      TGetTableTypesResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetTableTypes_result(GetTableTypes_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TGetTableTypesResp(other.success);
-      }
-    }
-
-    public GetTableTypes_result deepCopy() {
-      return new GetTableTypes_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TGetTableTypesResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TGetTableTypesResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TGetTableTypesResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetTableTypes_result)
-        return this.equals((GetTableTypes_result)that);
-      return false;
-    }
-
-    public boolean equals(GetTableTypes_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_success = true && (isSetSuccess());
-      builder.append(present_success);
-      if (present_success)
-        builder.append(success);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(GetTableTypes_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      GetTableTypes_result typedOther = (GetTableTypes_result)other;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetTableTypes_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetTableTypes_resultStandardSchemeFactory implements SchemeFactory {
-      public GetTableTypes_resultStandardScheme getScheme() {
-        return new GetTableTypes_resultStandardScheme();
-      }
-    }
-
-    private static class GetTableTypes_resultStandardScheme extends StandardScheme<GetTableTypes_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetTableTypes_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TGetTableTypesResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetTableTypes_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetTableTypes_resultTupleSchemeFactory implements SchemeFactory {
-      public GetTableTypes_resultTupleScheme getScheme() {
-        return new GetTableTypes_resultTupleScheme();
-      }
-    }
-
-    private static class GetTableTypes_resultTupleScheme extends TupleScheme<GetTableTypes_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetTableTypes_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetTableTypes_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TGetTableTypesResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetColumns_args implements org.apache.thrift.TBase<GetColumns_args, GetColumns_args._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetColumns_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetColumns_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetColumns_argsTupleSchemeFactory());
-    }
-
-    private TGetColumnsReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetColumnsReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetColumns_args.class, metaDataMap);
-    }
-
-    public GetColumns_args() {
-    }
-
-    public GetColumns_args(
-      TGetColumnsReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetColumns_args(GetColumns_args other) {
-      if (other.isSetReq()) {
-        this.req = new TGetColumnsReq(other.req);
-      }
-    }
-
-    public GetColumns_args deepCopy() {
-      return new GetColumns_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TGetColumnsReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TGetColumnsReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TGetColumnsReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetColumns_args)
-        return this.equals((GetColumns_args)that);
-      return false;
-    }
-
-    public boolean equals(GetColumns_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_req = true && (isSetReq());
-      builder.append(present_req);
-      if (present_req)
-        builder.append(req);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(GetColumns_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      GetColumns_args typedOther = (GetColumns_args)other;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetColumns_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetColumns_argsStandardSchemeFactory implements SchemeFactory {
-      public GetColumns_argsStandardScheme getScheme() {
-        return new GetColumns_argsStandardScheme();
-      }
-    }
-
-    private static class GetColumns_argsStandardScheme extends StandardScheme<GetColumns_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetColumns_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TGetColumnsReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetColumns_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetColumns_argsTupleSchemeFactory implements SchemeFactory {
-      public GetColumns_argsTupleScheme getScheme() {
-        return new GetColumns_argsTupleScheme();
-      }
-    }
-
-    private static class GetColumns_argsTupleScheme extends TupleScheme<GetColumns_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetColumns_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetColumns_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TGetColumnsReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetColumns_result implements org.apache.thrift.TBase<GetColumns_result, GetColumns_result._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetColumns_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetColumns_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetColumns_resultTupleSchemeFactory());
-    }
-
-    private TGetColumnsResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetColumnsResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetColumns_result.class, metaDataMap);
-    }
-
-    public GetColumns_result() {
-    }
-
-    public GetColumns_result(
-      TGetColumnsResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetColumns_result(GetColumns_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TGetColumnsResp(other.success);
-      }
-    }
-
-    public GetColumns_result deepCopy() {
-      return new GetColumns_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TGetColumnsResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TGetColumnsResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TGetColumnsResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetColumns_result)
-        return this.equals((GetColumns_result)that);
-      return false;
-    }
-
-    public boolean equals(GetColumns_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_success = true && (isSetSuccess());
-      builder.append(present_success);
-      if (present_success)
-        builder.append(success);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(GetColumns_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      GetColumns_result typedOther = (GetColumns_result)other;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetColumns_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetColumns_resultStandardSchemeFactory implements SchemeFactory {
-      public GetColumns_resultStandardScheme getScheme() {
-        return new GetColumns_resultStandardScheme();
-      }
-    }
-
-    private static class GetColumns_resultStandardScheme extends StandardScheme<GetColumns_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetColumns_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TGetColumnsResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetColumns_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetColumns_resultTupleSchemeFactory implements SchemeFactory {
-      public GetColumns_resultTupleScheme getScheme() {
-        return new GetColumns_resultTupleScheme();
-      }
-    }
-
-    private static class GetColumns_resultTupleScheme extends TupleScheme<GetColumns_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetColumns_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetColumns_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TGetColumnsResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetFunctions_args implements org.apache.thrift.TBase<GetFunctions_args, GetFunctions_args._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetFunctions_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetFunctions_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetFunctions_argsTupleSchemeFactory());
-    }
-
-    private TGetFunctionsReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetFunctionsReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetFunctions_args.class, metaDataMap);
-    }
-
-    public GetFunctions_args() {
-    }
-
-    public GetFunctions_args(
-      TGetFunctionsReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetFunctions_args(GetFunctions_args other) {
-      if (other.isSetReq()) {
-        this.req = new TGetFunctionsReq(other.req);
-      }
-    }
-
-    public GetFunctions_args deepCopy() {
-      return new GetFunctions_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TGetFunctionsReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TGetFunctionsReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TGetFunctionsReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetFunctions_args)
-        return this.equals((GetFunctions_args)that);
-      return false;
-    }
-
-    public boolean equals(GetFunctions_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_req = true && (isSetReq());
-      builder.append(present_req);
-      if (present_req)
-        builder.append(req);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(GetFunctions_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      GetFunctions_args typedOther = (GetFunctions_args)other;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetFunctions_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetFunctions_argsStandardSchemeFactory implements SchemeFactory {
-      public GetFunctions_argsStandardScheme getScheme() {
-        return new GetFunctions_argsStandardScheme();
-      }
-    }
-
-    private static class GetFunctions_argsStandardScheme extends StandardScheme<GetFunctions_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetFunctions_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TGetFunctionsReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetFunctions_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetFunctions_argsTupleSchemeFactory implements SchemeFactory {
-      public GetFunctions_argsTupleScheme getScheme() {
-        return new GetFunctions_argsTupleScheme();
-      }
-    }
-
-    private static class GetFunctions_argsTupleScheme extends TupleScheme<GetFunctions_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetFunctions_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetFunctions_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TGetFunctionsReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetFunctions_result implements org.apache.thrift.TBase<GetFunctions_result, GetFunctions_result._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetFunctions_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetFunctions_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetFunctions_resultTupleSchemeFactory());
-    }
-
-    private TGetFunctionsResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetFunctionsResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetFunctions_result.class, metaDataMap);
-    }
-
-    public GetFunctions_result() {
-    }
-
-    public GetFunctions_result(
-      TGetFunctionsResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetFunctions_result(GetFunctions_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TGetFunctionsResp(other.success);
-      }
-    }
-
-    public GetFunctions_result deepCopy() {
-      return new GetFunctions_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TGetFunctionsResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TGetFunctionsResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TGetFunctionsResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetFunctions_result)
-        return this.equals((GetFunctions_result)that);
-      return false;
-    }
-
-    public boolean equals(GetFunctions_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_success = true && (isSetSuccess());
-      builder.append(present_success);
-      if (present_success)
-        builder.append(success);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(GetFunctions_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      GetFunctions_result typedOther = (GetFunctions_result)other;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetFunctions_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetFunctions_resultStandardSchemeFactory implements SchemeFactory {
-      public GetFunctions_resultStandardScheme getScheme() {
-        return new GetFunctions_resultStandardScheme();
-      }
-    }
-
-    private static class GetFunctions_resultStandardScheme extends StandardScheme<GetFunctions_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetFunctions_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TGetFunctionsResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetFunctions_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetFunctions_resultTupleSchemeFactory implements SchemeFactory {
-      public GetFunctions_resultTupleScheme getScheme() {
-        return new GetFunctions_resultTupleScheme();
-      }
-    }
-
-    private static class GetFunctions_resultTupleScheme extends TupleScheme<GetFunctions_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetFunctions_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetFunctions_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TGetFunctionsResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetOperationStatus_args implements org.apache.thrift.TBase<GetOperationStatus_args, GetOperationStatus_args._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetOperationStatus_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetOperationStatus_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetOperationStatus_argsTupleSchemeFactory());
-    }
-
-    private TGetOperationStatusReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetOperationStatusReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetOperationStatus_args.class, metaDataMap);
-    }
-
-    public GetOperationStatus_args() {
-    }
-
-    public GetOperationStatus_args(
-      TGetOperationStatusReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetOperationStatus_args(GetOperationStatus_args other) {
-      if (other.isSetReq()) {
-        this.req = new TGetOperationStatusReq(other.req);
-      }
-    }
-
-    public GetOperationStatus_args deepCopy() {
-      return new GetOperationStatus_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TGetOperationStatusReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TGetOperationStatusReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TGetOperationStatusReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetOperationStatus_args)
-        return this.equals((GetOperationStatus_args)that);
-      return false;
-    }
-
-    public boolean equals(GetOperationStatus_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_req = true && (isSetReq());
-      builder.append(present_req);
-      if (present_req)
-        builder.append(req);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(GetOperationStatus_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      GetOperationStatus_args typedOther = (GetOperationStatus_args)other;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetOperationStatus_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetOperationStatus_argsStandardSchemeFactory implements SchemeFactory {
-      public GetOperationStatus_argsStandardScheme getScheme() {
-        return new GetOperationStatus_argsStandardScheme();
-      }
-    }
-
-    private static class GetOperationStatus_argsStandardScheme extends StandardScheme<GetOperationStatus_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetOperationStatus_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TGetOperationStatusReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetOperationStatus_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetOperationStatus_argsTupleSchemeFactory implements SchemeFactory {
-      public GetOperationStatus_argsTupleScheme getScheme() {
-        return new GetOperationStatus_argsTupleScheme();
-      }
-    }
-
-    private static class GetOperationStatus_argsTupleScheme extends TupleScheme<GetOperationStatus_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetOperationStatus_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetOperationStatus_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TGetOperationStatusReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetOperationStatus_result implements org.apache.thrift.TBase<GetOperationStatus_result, GetOperationStatus_result._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetOperationStatus_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetOperationStatus_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetOperationStatus_resultTupleSchemeFactory());
-    }
-
-    private TGetOperationStatusResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetOperationStatusResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetOperationStatus_result.class, metaDataMap);
-    }
-
-    public GetOperationStatus_result() {
-    }
-
-    public GetOperationStatus_result(
-      TGetOperationStatusResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetOperationStatus_result(GetOperationStatus_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TGetOperationStatusResp(other.success);
-      }
-    }
-
-    public GetOperationStatus_result deepCopy() {
-      return new GetOperationStatus_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TGetOperationStatusResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TGetOperationStatusResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TGetOperationStatusResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetOperationStatus_result)
-        return this.equals((GetOperationStatus_result)that);
-      return false;
-    }
-
-    public boolean equals(GetOperationStatus_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_success = true && (isSetSuccess());
-      builder.append(present_success);
-      if (present_success)
-        builder.append(success);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(GetOperationStatus_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      GetOperationStatus_result typedOther = (GetOperationStatus_result)other;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetOperationStatus_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetOperationStatus_resultStandardSchemeFactory implements SchemeFactory {
-      public GetOperationStatus_resultStandardScheme getScheme() {
-        return new GetOperationStatus_resultStandardScheme();
-      }
-    }
-
-    private static class GetOperationStatus_resultStandardScheme extends StandardScheme<GetOperationStatus_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetOperationStatus_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TGetOperationStatusResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetOperationStatus_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetOperationStatus_resultTupleSchemeFactory implements SchemeFactory {
-      public GetOperationStatus_resultTupleScheme getScheme() {
-        return new GetOperationStatus_resultTupleScheme();
-      }
-    }
-
-    private static class GetOperationStatus_resultTupleScheme extends TupleScheme<GetOperationStatus_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetOperationStatus_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetOperationStatus_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TGetOperationStatusResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class CancelOperation_args implements org.apache.thrift.TBase<CancelOperation_args, CancelOperation_args._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CancelOperation_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new CancelOperation_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new CancelOperation_argsTupleSchemeFactory());
-    }
-
-    private TCancelOperationReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCancelOperationReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CancelOperation_args.class, metaDataMap);
-    }
-
-    public CancelOperation_args() {
-    }
-
-    public CancelOperation_args(
-      TCancelOperationReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public CancelOperation_args(CancelOperation_args other) {
-      if (other.isSetReq()) {
-        this.req = new TCancelOperationReq(other.req);
-      }
-    }
-
-    public CancelOperation_args deepCopy() {
-      return new CancelOperation_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TCancelOperationReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TCancelOperationReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TCancelOperationReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof CancelOperation_args)
-        return this.equals((CancelOperation_args)that);
-      return false;
-    }
-
-    public boolean equals(CancelOperation_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_req = true && (isSetReq());
-      builder.append(present_req);
-      if (present_req)
-        builder.append(req);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(CancelOperation_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      CancelOperation_args typedOther = (CancelOperation_args)other;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("CancelOperation_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class CancelOperation_argsStandardSchemeFactory implements SchemeFactory {
-      public CancelOperation_argsStandardScheme getScheme() {
-        return new CancelOperation_argsStandardScheme();
-      }
-    }
-
-    private static class CancelOperation_argsStandardScheme extends StandardScheme<CancelOperation_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, CancelOperation_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TCancelOperationReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, CancelOperation_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class CancelOperation_argsTupleSchemeFactory implements SchemeFactory {
-      public CancelOperation_argsTupleScheme getScheme() {
-        return new CancelOperation_argsTupleScheme();
-      }
-    }
-
-    private static class CancelOperation_argsTupleScheme extends TupleScheme<CancelOperation_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, CancelOperation_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, CancelOperation_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TCancelOperationReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class CancelOperation_result implements org.apache.thrift.TBase<CancelOperation_result, CancelOperation_result._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CancelOperation_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new CancelOperation_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new CancelOperation_resultTupleSchemeFactory());
-    }
-
-    private TCancelOperationResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCancelOperationResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CancelOperation_result.class, metaDataMap);
-    }
-
-    public CancelOperation_result() {
-    }
-
-    public CancelOperation_result(
-      TCancelOperationResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public CancelOperation_result(CancelOperation_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TCancelOperationResp(other.success);
-      }
-    }
-
-    public CancelOperation_result deepCopy() {
-      return new CancelOperation_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TCancelOperationResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TCancelOperationResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TCancelOperationResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof CancelOperation_result)
-        return this.equals((CancelOperation_result)that);
-      return false;
-    }
-
-    public boolean equals(CancelOperation_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_success = true && (isSetSuccess());
-      builder.append(present_success);
-      if (present_success)
-        builder.append(success);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(CancelOperation_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      CancelOperation_result typedOther = (CancelOperation_result)other;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("CancelOperation_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class CancelOperation_resultStandardSchemeFactory implements SchemeFactory {
-      public CancelOperation_resultStandardScheme getScheme() {
-        return new CancelOperation_resultStandardScheme();
-      }
-    }
-
-    private static class CancelOperation_resultStandardScheme extends StandardScheme<CancelOperation_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, CancelOperation_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TCancelOperationResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, CancelOperation_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class CancelOperation_resultTupleSchemeFactory implements SchemeFactory {
-      public CancelOperation_resultTupleScheme getScheme() {
-        return new CancelOperation_resultTupleScheme();
-      }
-    }
-
-    private static class CancelOperation_resultTupleScheme extends TupleScheme<CancelOperation_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, CancelOperation_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, CancelOperation_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TCancelOperationResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class CloseOperation_args implements org.apache.thrift.TBase<CloseOperation_args, CloseOperation_args._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CloseOperation_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new CloseOperation_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new CloseOperation_argsTupleSchemeFactory());
-    }
-
-    private TCloseOperationReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCloseOperationReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CloseOperation_args.class, metaDataMap);
-    }
-
-    public CloseOperation_args() {
-    }
-
-    public CloseOperation_args(
-      TCloseOperationReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public CloseOperation_args(CloseOperation_args other) {
-      if (other.isSetReq()) {
-        this.req = new TCloseOperationReq(other.req);
-      }
-    }
-
-    public CloseOperation_args deepCopy() {
-      return new CloseOperation_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TCloseOperationReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TCloseOperationReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TCloseOperationReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof CloseOperation_args)
-        return this.equals((CloseOperation_args)that);
-      return false;
-    }
-
-    public boolean equals(CloseOperation_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_req = true && (isSetReq());
-      builder.append(present_req);
-      if (present_req)
-        builder.append(req);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(CloseOperation_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      CloseOperation_args typedOther = (CloseOperation_args)other;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("CloseOperation_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class CloseOperation_argsStandardSchemeFactory implements SchemeFactory {
-      public CloseOperation_argsStandardScheme getScheme() {
-        return new CloseOperation_argsStandardScheme();
-      }
-    }
-
-    private static class CloseOperation_argsStandardScheme extends StandardScheme<CloseOperation_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, CloseOperation_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TCloseOperationReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, CloseOperation_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class CloseOperation_argsTupleSchemeFactory implements SchemeFactory {
-      public CloseOperation_argsTupleScheme getScheme() {
-        return new CloseOperation_argsTupleScheme();
-      }
-    }
-
-    private static class CloseOperation_argsTupleScheme extends TupleScheme<CloseOperation_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, CloseOperation_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, CloseOperation_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TCloseOperationReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class CloseOperation_result implements org.apache.thrift.TBase<CloseOperation_result, CloseOperation_result._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CloseOperation_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new CloseOperation_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new CloseOperation_resultTupleSchemeFactory());
-    }
-
-    private TCloseOperationResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCloseOperationResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CloseOperation_result.class, metaDataMap);
-    }
-
-    public CloseOperation_result() {
-    }
-
-    public CloseOperation_result(
-      TCloseOperationResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public CloseOperation_result(CloseOperation_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TCloseOperationResp(other.success);
-      }
-    }
-
-    public CloseOperation_result deepCopy() {
-      return new CloseOperation_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TCloseOperationResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TCloseOperationResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TCloseOperationResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof CloseOperation_result)
-        return this.equals((CloseOperation_result)that);
-      return false;
-    }
-
-    public boolean equals(CloseOperation_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_success = true && (isSetSuccess());
-      builder.append(present_success);
-      if (present_success)
-        builder.append(success);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(CloseOperation_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      CloseOperation_result typedOther = (CloseOperation_result)other;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("CloseOperation_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class CloseOperation_resultStandardSchemeFactory implements SchemeFactory {
-      public CloseOperation_resultStandardScheme getScheme() {
-        return new CloseOperation_resultStandardScheme();
-      }
-    }
-
-    private static class CloseOperation_resultStandardScheme extends StandardScheme<CloseOperation_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, CloseOperation_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TCloseOperationResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, CloseOperation_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class CloseOperation_resultTupleSchemeFactory implements SchemeFactory {
-      public CloseOperation_resultTupleScheme getScheme() {
-        return new CloseOperation_resultTupleScheme();
-      }
-    }
-
-    private static class CloseOperation_resultTupleScheme extends TupleScheme<CloseOperation_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, CloseOperation_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, CloseOperation_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TCloseOperationResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetResultSetMetadata_args implements org.apache.thrift.TBase<GetResultSetMetadata_args, GetResultSetMetadata_args._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetResultSetMetadata_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetResultSetMetadata_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetResultSetMetadata_argsTupleSchemeFactory());
-    }
-
-    private TGetResultSetMetadataReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetResultSetMetadataReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetResultSetMetadata_args.class, metaDataMap);
-    }
-
-    public GetResultSetMetadata_args() {
-    }
-
-    public GetResultSetMetadata_args(
-      TGetResultSetMetadataReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetResultSetMetadata_args(GetResultSetMetadata_args other) {
-      if (other.isSetReq()) {
-        this.req = new TGetResultSetMetadataReq(other.req);
-      }
-    }
-
-    public GetResultSetMetadata_args deepCopy() {
-      return new GetResultSetMetadata_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TGetResultSetMetadataReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TGetResultSetMetadataReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TGetResultSetMetadataReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetResultSetMetadata_args)
-        return this.equals((GetResultSetMetadata_args)that);
-      return false;
-    }
-
-    public boolean equals(GetResultSetMetadata_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_req = true && (isSetReq());
-      builder.append(present_req);
-      if (present_req)
-        builder.append(req);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(GetResultSetMetadata_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      GetResultSetMetadata_args typedOther = (GetResultSetMetadata_args)other;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetResultSetMetadata_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetResultSetMetadata_argsStandardSchemeFactory implements SchemeFactory {
-      public GetResultSetMetadata_argsStandardScheme getScheme() {
-        return new GetResultSetMetadata_argsStandardScheme();
-      }
-    }
-
-    private static class GetResultSetMetadata_argsStandardScheme extends StandardScheme<GetResultSetMetadata_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetResultSetMetadata_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TGetResultSetMetadataReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetResultSetMetadata_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetResultSetMetadata_argsTupleSchemeFactory implements SchemeFactory {
-      public GetResultSetMetadata_argsTupleScheme getScheme() {
-        return new GetResultSetMetadata_argsTupleScheme();
-      }
-    }
-
-    private static class GetResultSetMetadata_argsTupleScheme extends TupleScheme<GetResultSetMetadata_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetResultSetMetadata_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetResultSetMetadata_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TGetResultSetMetadataReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetResultSetMetadata_result implements org.apache.thrift.TBase<GetResultSetMetadata_result, GetResultSetMetadata_result._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetResultSetMetadata_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetResultSetMetadata_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetResultSetMetadata_resultTupleSchemeFactory());
-    }
-
-    private TGetResultSetMetadataResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetResultSetMetadataResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetResultSetMetadata_result.class, metaDataMap);
-    }
-
-    public GetResultSetMetadata_result() {
-    }
-
-    public GetResultSetMetadata_result(
-      TGetResultSetMetadataResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetResultSetMetadata_result(GetResultSetMetadata_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TGetResultSetMetadataResp(other.success);
-      }
-    }
-
-    public GetResultSetMetadata_result deepCopy() {
-      return new GetResultSetMetadata_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TGetResultSetMetadataResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TGetResultSetMetadataResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TGetResultSetMetadataResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetResultSetMetadata_result)
-        return this.equals((GetResultSetMetadata_result)that);
-      return false;
-    }
-
-    public boolean equals(GetResultSetMetadata_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_success = true && (isSetSuccess());
-      builder.append(present_success);
-      if (present_success)
-        builder.append(success);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(GetResultSetMetadata_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      GetResultSetMetadata_result typedOther = (GetResultSetMetadata_result)other;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetResultSetMetadata_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetResultSetMetadata_resultStandardSchemeFactory implements SchemeFactory {
-      public GetResultSetMetadata_resultStandardScheme getScheme() {
-        return new GetResultSetMetadata_resultStandardScheme();
-      }
-    }
-
-    private static class GetResultSetMetadata_resultStandardScheme extends StandardScheme<GetResultSetMetadata_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetResultSetMetadata_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TGetResultSetMetadataResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetResultSetMetadata_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetResultSetMetadata_resultTupleSchemeFactory implements SchemeFactory {
-      public GetResultSetMetadata_resultTupleScheme getScheme() {
-        return new GetResultSetMetadata_resultTupleScheme();
-      }
-    }
-
-    private static class GetResultSetMetadata_resultTupleScheme extends TupleScheme<GetResultSetMetadata_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetResultSetMetadata_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetResultSetMetadata_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TGetResultSetMetadataResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class FetchResults_args implements org.apache.thrift.TBase<FetchResults_args, FetchResults_args._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("FetchResults_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new FetchResults_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new FetchResults_argsTupleSchemeFactory());
-    }
-
-    private TFetchResultsReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TFetchResultsReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(FetchResults_args.class, metaDataMap);
-    }
-
-    public FetchResults_args() {
-    }
-
-    public FetchResults_args(
-      TFetchResultsReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public FetchResults_args(FetchResults_args other) {
-      if (other.isSetReq()) {
-        this.req = new TFetchResultsReq(other.req);
-      }
-    }
-
-    public FetchResults_args deepCopy() {
-      return new FetchResults_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TFetchResultsReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TFetchResultsReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TFetchResultsReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof FetchResults_args)
-        return this.equals((FetchResults_args)that);
-      return false;
-    }
-
-    public boolean equals(FetchResults_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_req = true && (isSetReq());
-      builder.append(present_req);
-      if (present_req)
-        builder.append(req);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(FetchResults_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      FetchResults_args typedOther = (FetchResults_args)other;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("FetchResults_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class FetchResults_argsStandardSchemeFactory implements SchemeFactory {
-      public FetchResults_argsStandardScheme getScheme() {
-        return new FetchResults_argsStandardScheme();
-      }
-    }
-
-    private static class FetchResults_argsStandardScheme extends StandardScheme<FetchResults_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, FetchResults_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TFetchResultsReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, FetchResults_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class FetchResults_argsTupleSchemeFactory implements SchemeFactory {
-      public FetchResults_argsTupleScheme getScheme() {
-        return new FetchResults_argsTupleScheme();
-      }
-    }
-
-    private static class FetchResults_argsTupleScheme extends TupleScheme<FetchResults_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, FetchResults_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, FetchResults_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TFetchResultsReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class FetchResults_result implements org.apache.thrift.TBase<FetchResults_result, FetchResults_result._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("FetchResults_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new FetchResults_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new FetchResults_resultTupleSchemeFactory());
-    }
-
-    private TFetchResultsResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TFetchResultsResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(FetchResults_result.class, metaDataMap);
-    }
-
-    public FetchResults_result() {
-    }
-
-    public FetchResults_result(
-      TFetchResultsResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public FetchResults_result(FetchResults_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TFetchResultsResp(other.success);
-      }
-    }
-
-    public FetchResults_result deepCopy() {
-      return new FetchResults_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TFetchResultsResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TFetchResultsResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TFetchResultsResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof FetchResults_result)
-        return this.equals((FetchResults_result)that);
-      return false;
-    }
-
-    public boolean equals(FetchResults_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_success = true && (isSetSuccess());
-      builder.append(present_success);
-      if (present_success)
-        builder.append(success);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(FetchResults_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      FetchResults_result typedOther = (FetchResults_result)other;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("FetchResults_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class FetchResults_resultStandardSchemeFactory implements SchemeFactory {
-      public FetchResults_resultStandardScheme getScheme() {
-        return new FetchResults_resultStandardScheme();
-      }
-    }
-
-    private static class FetchResults_resultStandardScheme extends StandardScheme<FetchResults_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, FetchResults_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TFetchResultsResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, FetchResults_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class FetchResults_resultTupleSchemeFactory implements SchemeFactory {
-      public FetchResults_resultTupleScheme getScheme() {
-        return new FetchResults_resultTupleScheme();
-      }
-    }
-
-    private static class FetchResults_resultTupleScheme extends TupleScheme<FetchResults_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, FetchResults_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, FetchResults_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TFetchResultsResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetDelegationToken_args implements org.apache.thrift.TBase<GetDelegationToken_args, GetDelegationToken_args._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetDelegationToken_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetDelegationToken_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetDelegationToken_argsTupleSchemeFactory());
-    }
-
-    private TGetDelegationTokenReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetDelegationTokenReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetDelegationToken_args.class, metaDataMap);
-    }
-
-    public GetDelegationToken_args() {
-    }
-
-    public GetDelegationToken_args(
-      TGetDelegationTokenReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetDelegationToken_args(GetDelegationToken_args other) {
-      if (other.isSetReq()) {
-        this.req = new TGetDelegationTokenReq(other.req);
-      }
-    }
-
-    public GetDelegationToken_args deepCopy() {
-      return new GetDelegationToken_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TGetDelegationTokenReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TGetDelegationTokenReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TGetDelegationTokenReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetDelegationToken_args)
-        return this.equals((GetDelegationToken_args)that);
-      return false;
-    }
-
-    public boolean equals(GetDelegationToken_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_req = true && (isSetReq());
-      builder.append(present_req);
-      if (present_req)
-        builder.append(req);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(GetDelegationToken_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      GetDelegationToken_args typedOther = (GetDelegationToken_args)other;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetDelegationToken_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetDelegationToken_argsStandardSchemeFactory implements SchemeFactory {
-      public GetDelegationToken_argsStandardScheme getScheme() {
-        return new GetDelegationToken_argsStandardScheme();
-      }
-    }
-
-    private static class GetDelegationToken_argsStandardScheme extends StandardScheme<GetDelegationToken_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetDelegationToken_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TGetDelegationTokenReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetDelegationToken_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetDelegationToken_argsTupleSchemeFactory implements SchemeFactory {
-      public GetDelegationToken_argsTupleScheme getScheme() {
-        return new GetDelegationToken_argsTupleScheme();
-      }
-    }
-
-    private static class GetDelegationToken_argsTupleScheme extends TupleScheme<GetDelegationToken_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetDelegationToken_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetDelegationToken_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TGetDelegationTokenReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetDelegationToken_result implements org.apache.thrift.TBase<GetDelegationToken_result, GetDelegationToken_result._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetDelegationToken_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetDelegationToken_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetDelegationToken_resultTupleSchemeFactory());
-    }
-
-    private TGetDelegationTokenResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetDelegationTokenResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetDelegationToken_result.class, metaDataMap);
-    }
-
-    public GetDelegationToken_result() {
-    }
-
-    public GetDelegationToken_result(
-      TGetDelegationTokenResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetDelegationToken_result(GetDelegationToken_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TGetDelegationTokenResp(other.success);
-      }
-    }
-
-    public GetDelegationToken_result deepCopy() {
-      return new GetDelegationToken_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TGetDelegationTokenResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TGetDelegationTokenResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TGetDelegationTokenResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetDelegationToken_result)
-        return this.equals((GetDelegationToken_result)that);
-      return false;
-    }
-
-    public boolean equals(GetDelegationToken_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_success = true && (isSetSuccess());
-      builder.append(present_success);
-      if (present_success)
-        builder.append(success);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(GetDelegationToken_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      GetDelegationToken_result typedOther = (GetDelegationToken_result)other;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetDelegationToken_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetDelegationToken_resultStandardSchemeFactory implements SchemeFactory {
-      public GetDelegationToken_resultStandardScheme getScheme() {
-        return new GetDelegationToken_resultStandardScheme();
-      }
-    }
-
-    private static class GetDelegationToken_resultStandardScheme extends StandardScheme<GetDelegationToken_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetDelegationToken_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TGetDelegationTokenResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetDelegationToken_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetDelegationToken_resultTupleSchemeFactory implements SchemeFactory {
-      public GetDelegationToken_resultTupleScheme getScheme() {
-        return new GetDelegationToken_resultTupleScheme();
-      }
-    }
-
-    private static class GetDelegationToken_resultTupleScheme extends TupleScheme<GetDelegationToken_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetDelegationToken_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetDelegationToken_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TGetDelegationTokenResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class CancelDelegationToken_args implements org.apache.thrift.TBase<CancelDelegationToken_args, CancelDelegationToken_args._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CancelDelegationToken_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new CancelDelegationToken_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new CancelDelegationToken_argsTupleSchemeFactory());
-    }
-
-    private TCancelDelegationTokenReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCancelDelegationTokenReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CancelDelegationToken_args.class, metaDataMap);
-    }
-
-    public CancelDelegationToken_args() {
-    }
-
-    public CancelDelegationToken_args(
-      TCancelDelegationTokenReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public CancelDelegationToken_args(CancelDelegationToken_args other) {
-      if (other.isSetReq()) {
-        this.req = new TCancelDelegationTokenReq(other.req);
-      }
-    }
-
-    public CancelDelegationToken_args deepCopy() {
-      return new CancelDelegationToken_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TCancelDelegationTokenReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TCancelDelegationTokenReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TCancelDelegationTokenReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof CancelDelegationToken_args)
-        return this.equals((CancelDelegationToken_args)that);
-      return false;
-    }
-
-    public boolean equals(CancelDelegationToken_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_req = true && (isSetReq());
-      builder.append(present_req);
-      if (present_req)
-        builder.append(req);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(CancelDelegationToken_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      CancelDelegationToken_args typedOther = (CancelDelegationToken_args)other;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("CancelDelegationToken_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class CancelDelegationToken_argsStandardSchemeFactory implements SchemeFactory {
-      public CancelDelegationToken_argsStandardScheme getScheme() {
-        return new CancelDelegationToken_argsStandardScheme();
-      }
-    }
-
-    private static class CancelDelegationToken_argsStandardScheme extends StandardScheme<CancelDelegationToken_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, CancelDelegationToken_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TCancelDelegationTokenReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, CancelDelegationToken_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class CancelDelegationToken_argsTupleSchemeFactory implements SchemeFactory {
-      public CancelDelegationToken_argsTupleScheme getScheme() {
-        return new CancelDelegationToken_argsTupleScheme();
-      }
-    }
-
-    private static class CancelDelegationToken_argsTupleScheme extends TupleScheme<CancelDelegationToken_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, CancelDelegationToken_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, CancelDelegationToken_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TCancelDelegationTokenReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class CancelDelegationToken_result implements org.apache.thrift.TBase<CancelDelegationToken_result, CancelDelegationToken_result._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CancelDelegationToken_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new CancelDelegationToken_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new CancelDelegationToken_resultTupleSchemeFactory());
-    }
-
-    private TCancelDelegationTokenResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCancelDelegationTokenResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CancelDelegationToken_result.class, metaDataMap);
-    }
-
-    public CancelDelegationToken_result() {
-    }
-
-    public CancelDelegationToken_result(
-      TCancelDelegationTokenResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public CancelDelegationToken_result(CancelDelegationToken_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TCancelDelegationTokenResp(other.success);
-      }
-    }
-
-    public CancelDelegationToken_result deepCopy() {
-      return new CancelDelegationToken_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TCancelDelegationTokenResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TCancelDelegationTokenResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TCancelDelegationTokenResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof CancelDelegationToken_result)
-        return this.equals((CancelDelegationToken_result)that);
-      return false;
-    }
-
-    public boolean equals(CancelDelegationToken_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_success = true && (isSetSuccess());
-      builder.append(present_success);
-      if (present_success)
-        builder.append(success);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(CancelDelegationToken_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      CancelDelegationToken_result typedOther = (CancelDelegationToken_result)other;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("CancelDelegationToken_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class CancelDelegationToken_resultStandardSchemeFactory implements SchemeFactory {
-      public CancelDelegationToken_resultStandardScheme getScheme() {
-        return new CancelDelegationToken_resultStandardScheme();
-      }
-    }
-
-    private static class CancelDelegationToken_resultStandardScheme extends StandardScheme<CancelDelegationToken_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, CancelDelegationToken_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TCancelDelegationTokenResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, CancelDelegationToken_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class CancelDelegationToken_resultTupleSchemeFactory implements SchemeFactory {
-      public CancelDelegationToken_resultTupleScheme getScheme() {
-        return new CancelDelegationToken_resultTupleScheme();
-      }
-    }
-
-    private static class CancelDelegationToken_resultTupleScheme extends TupleScheme<CancelDelegationToken_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, CancelDelegationToken_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, CancelDelegationToken_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TCancelDelegationTokenResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class RenewDelegationToken_args implements org.apache.thrift.TBase<RenewDelegationToken_args, RenewDelegationToken_args._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("RenewDelegationToken_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new RenewDelegationToken_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new RenewDelegationToken_argsTupleSchemeFactory());
-    }
-
-    private TRenewDelegationTokenReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TRenewDelegationTokenReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(RenewDelegationToken_args.class, metaDataMap);
-    }
-
-    public RenewDelegationToken_args() {
-    }
-
-    public RenewDelegationToken_args(
-      TRenewDelegationTokenReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public RenewDelegationToken_args(RenewDelegationToken_args other) {
-      if (other.isSetReq()) {
-        this.req = new TRenewDelegationTokenReq(other.req);
-      }
-    }
-
-    public RenewDelegationToken_args deepCopy() {
-      return new RenewDelegationToken_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TRenewDelegationTokenReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TRenewDelegationTokenReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TRenewDelegationTokenReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof RenewDelegationToken_args)
-        return this.equals((RenewDelegationToken_args)that);
-      return false;
-    }
-
-    public boolean equals(RenewDelegationToken_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_req = true && (isSetReq());
-      builder.append(present_req);
-      if (present_req)
-        builder.append(req);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(RenewDelegationToken_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      RenewDelegationToken_args typedOther = (RenewDelegationToken_args)other;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(typedOther.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, typedOther.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("RenewDelegationToken_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class RenewDelegationToken_argsStandardSchemeFactory implements SchemeFactory {
-      public RenewDelegationToken_argsStandardScheme getScheme() {
-        return new RenewDelegationToken_argsStandardScheme();
-      }
-    }
-
-    private static class RenewDelegationToken_argsStandardScheme extends StandardScheme<RenewDelegationToken_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, RenewDelegationToken_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TRenewDelegationTokenReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, RenewDelegationToken_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class RenewDelegationToken_argsTupleSchemeFactory implements SchemeFactory {
-      public RenewDelegationToken_argsTupleScheme getScheme() {
-        return new RenewDelegationToken_argsTupleScheme();
-      }
-    }
-
-    private static class RenewDelegationToken_argsTupleScheme extends TupleScheme<RenewDelegationToken_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, RenewDelegationToken_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, RenewDelegationToken_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TRenewDelegationTokenReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class RenewDelegationToken_result implements org.apache.thrift.TBase<RenewDelegationToken_result, RenewDelegationToken_result._Fields>, java.io.Serializable, Cloneable   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("RenewDelegationToken_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new RenewDelegationToken_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new RenewDelegationToken_resultTupleSchemeFactory());
-    }
-
-    private TRenewDelegationTokenResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TRenewDelegationTokenResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(RenewDelegationToken_result.class, metaDataMap);
-    }
-
-    public RenewDelegationToken_result() {
-    }
-
-    public RenewDelegationToken_result(
-      TRenewDelegationTokenResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public RenewDelegationToken_result(RenewDelegationToken_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TRenewDelegationTokenResp(other.success);
-      }
-    }
-
-    public RenewDelegationToken_result deepCopy() {
-      return new RenewDelegationToken_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TRenewDelegationTokenResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TRenewDelegationTokenResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TRenewDelegationTokenResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof RenewDelegationToken_result)
-        return this.equals((RenewDelegationToken_result)that);
-      return false;
-    }
-
-    public boolean equals(RenewDelegationToken_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      HashCodeBuilder builder = new HashCodeBuilder();
-
-      boolean present_success = true && (isSetSuccess());
-      builder.append(present_success);
-      if (present_success)
-        builder.append(success);
-
-      return builder.toHashCode();
-    }
-
-    public int compareTo(RenewDelegationToken_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-      RenewDelegationToken_result typedOther = (RenewDelegationToken_result)other;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(typedOther.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, typedOther.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("RenewDelegationToken_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class RenewDelegationToken_resultStandardSchemeFactory implements SchemeFactory {
-      public RenewDelegationToken_resultStandardScheme getScheme() {
-        return new RenewDelegationToken_resultStandardScheme();
-      }
-    }
-
-    private static class RenewDelegationToken_resultStandardScheme extends StandardScheme<RenewDelegationToken_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, RenewDelegationToken_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TRenewDelegationTokenResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, RenewDelegationToken_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class RenewDelegationToken_resultTupleSchemeFactory implements SchemeFactory {
-      public RenewDelegationToken_resultTupleScheme getScheme() {
-        return new RenewDelegationToken_resultTupleScheme();
-      }
-    }
-
-    private static class RenewDelegationToken_resultTupleScheme extends TupleScheme<RenewDelegationToken_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, RenewDelegationToken_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, RenewDelegationToken_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TRenewDelegationTokenResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-}
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCLIServiceConstants.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCLIServiceConstants.java
deleted file mode 100644
index 25a38b178428a..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCLIServiceConstants.java
+++ /dev/null
@@ -1,103 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TCLIServiceConstants {
-
-  public static final Set<TTypeId> PRIMITIVE_TYPES = new HashSet<TTypeId>();
-  static {
-    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.BOOLEAN_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.TINYINT_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.SMALLINT_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.INT_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.BIGINT_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.FLOAT_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.DOUBLE_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.STRING_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.TIMESTAMP_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.BINARY_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.DECIMAL_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.NULL_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.DATE_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.VARCHAR_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.CHAR_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.INTERVAL_YEAR_MONTH_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.INTERVAL_DAY_TIME_TYPE);
-  }
-
-  public static final Set<TTypeId> COMPLEX_TYPES = new HashSet<TTypeId>();
-  static {
-    COMPLEX_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.ARRAY_TYPE);
-    COMPLEX_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.MAP_TYPE);
-    COMPLEX_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.STRUCT_TYPE);
-    COMPLEX_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.UNION_TYPE);
-    COMPLEX_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.USER_DEFINED_TYPE);
-  }
-
-  public static final Set<TTypeId> COLLECTION_TYPES = new HashSet<TTypeId>();
-  static {
-    COLLECTION_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.ARRAY_TYPE);
-    COLLECTION_TYPES.add(org.apache.hive.service.cli.thrift.TTypeId.MAP_TYPE);
-  }
-
-  public static final Map<TTypeId,String> TYPE_NAMES = new HashMap<TTypeId,String>();
-  static {
-    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.BOOLEAN_TYPE, "BOOLEAN");
-    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.TINYINT_TYPE, "TINYINT");
-    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.SMALLINT_TYPE, "SMALLINT");
-    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.INT_TYPE, "INT");
-    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.BIGINT_TYPE, "BIGINT");
-    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.FLOAT_TYPE, "FLOAT");
-    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.DOUBLE_TYPE, "DOUBLE");
-    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.STRING_TYPE, "STRING");
-    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.TIMESTAMP_TYPE, "TIMESTAMP");
-    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.BINARY_TYPE, "BINARY");
-    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.ARRAY_TYPE, "ARRAY");
-    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.MAP_TYPE, "MAP");
-    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.STRUCT_TYPE, "STRUCT");
-    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.UNION_TYPE, "UNIONTYPE");
-    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.DECIMAL_TYPE, "DECIMAL");
-    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.NULL_TYPE, "NULL");
-    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.DATE_TYPE, "DATE");
-    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.VARCHAR_TYPE, "VARCHAR");
-    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.CHAR_TYPE, "CHAR");
-    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.INTERVAL_YEAR_MONTH_TYPE, "INTERVAL_YEAR_MONTH");
-    TYPE_NAMES.put(org.apache.hive.service.cli.thrift.TTypeId.INTERVAL_DAY_TIME_TYPE, "INTERVAL_DAY_TIME");
-  }
-
-  public static final String CHARACTER_MAXIMUM_LENGTH = "characterMaximumLength";
-
-  public static final String PRECISION = "precision";
-
-  public static final String SCALE = "scale";
-
-}
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenReq.java
deleted file mode 100644
index e23fcdd77a1a4..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenReq.java
+++ /dev/null
@@ -1,491 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TCancelDelegationTokenReq implements org.apache.thrift.TBase<TCancelDelegationTokenReq, TCancelDelegationTokenReq._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCancelDelegationTokenReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField DELEGATION_TOKEN_FIELD_DESC = new org.apache.thrift.protocol.TField("delegationToken", org.apache.thrift.protocol.TType.STRING, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TCancelDelegationTokenReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TCancelDelegationTokenReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-  private String delegationToken; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle"),
-    DELEGATION_TOKEN((short)2, "delegationToken");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        case 2: // DELEGATION_TOKEN
-          return DELEGATION_TOKEN;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    tmpMap.put(_Fields.DELEGATION_TOKEN, new org.apache.thrift.meta_data.FieldMetaData("delegationToken", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCancelDelegationTokenReq.class, metaDataMap);
-  }
-
-  public TCancelDelegationTokenReq() {
-  }
-
-  public TCancelDelegationTokenReq(
-    TSessionHandle sessionHandle,
-    String delegationToken)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-    this.delegationToken = delegationToken;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TCancelDelegationTokenReq(TCancelDelegationTokenReq other) {
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-    if (other.isSetDelegationToken()) {
-      this.delegationToken = other.delegationToken;
-    }
-  }
-
-  public TCancelDelegationTokenReq deepCopy() {
-    return new TCancelDelegationTokenReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-    this.delegationToken = null;
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public String getDelegationToken() {
-    return this.delegationToken;
-  }
-
-  public void setDelegationToken(String delegationToken) {
-    this.delegationToken = delegationToken;
-  }
-
-  public void unsetDelegationToken() {
-    this.delegationToken = null;
-  }
-
-  /** Returns true if field delegationToken is set (has been assigned a value) and false otherwise */
-  public boolean isSetDelegationToken() {
-    return this.delegationToken != null;
-  }
-
-  public void setDelegationTokenIsSet(boolean value) {
-    if (!value) {
-      this.delegationToken = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    case DELEGATION_TOKEN:
-      if (value == null) {
-        unsetDelegationToken();
-      } else {
-        setDelegationToken((String)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    case DELEGATION_TOKEN:
-      return getDelegationToken();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    case DELEGATION_TOKEN:
-      return isSetDelegationToken();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TCancelDelegationTokenReq)
-      return this.equals((TCancelDelegationTokenReq)that);
-    return false;
-  }
-
-  public boolean equals(TCancelDelegationTokenReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    boolean this_present_delegationToken = true && this.isSetDelegationToken();
-    boolean that_present_delegationToken = true && that.isSetDelegationToken();
-    if (this_present_delegationToken || that_present_delegationToken) {
-      if (!(this_present_delegationToken && that_present_delegationToken))
-        return false;
-      if (!this.delegationToken.equals(that.delegationToken))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    builder.append(present_sessionHandle);
-    if (present_sessionHandle)
-      builder.append(sessionHandle);
-
-    boolean present_delegationToken = true && (isSetDelegationToken());
-    builder.append(present_delegationToken);
-    if (present_delegationToken)
-      builder.append(delegationToken);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TCancelDelegationTokenReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TCancelDelegationTokenReq typedOther = (TCancelDelegationTokenReq)other;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetDelegationToken()).compareTo(typedOther.isSetDelegationToken());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetDelegationToken()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.delegationToken, typedOther.delegationToken);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TCancelDelegationTokenReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("delegationToken:");
-    if (this.delegationToken == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.delegationToken);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    if (!isSetDelegationToken()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'delegationToken' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TCancelDelegationTokenReqStandardSchemeFactory implements SchemeFactory {
-    public TCancelDelegationTokenReqStandardScheme getScheme() {
-      return new TCancelDelegationTokenReqStandardScheme();
-    }
-  }
-
-  private static class TCancelDelegationTokenReqStandardScheme extends StandardScheme<TCancelDelegationTokenReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TCancelDelegationTokenReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // DELEGATION_TOKEN
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.delegationToken = iprot.readString();
-              struct.setDelegationTokenIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TCancelDelegationTokenReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.delegationToken != null) {
-        oprot.writeFieldBegin(DELEGATION_TOKEN_FIELD_DESC);
-        oprot.writeString(struct.delegationToken);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TCancelDelegationTokenReqTupleSchemeFactory implements SchemeFactory {
-    public TCancelDelegationTokenReqTupleScheme getScheme() {
-      return new TCancelDelegationTokenReqTupleScheme();
-    }
-  }
-
-  private static class TCancelDelegationTokenReqTupleScheme extends TupleScheme<TCancelDelegationTokenReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TCancelDelegationTokenReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-      oprot.writeString(struct.delegationToken);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TCancelDelegationTokenReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-      struct.delegationToken = iprot.readString();
-      struct.setDelegationTokenIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenResp.java
deleted file mode 100644
index 77c9ee77ec59b..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelDelegationTokenResp.java
+++ /dev/null
@@ -1,390 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TCancelDelegationTokenResp implements org.apache.thrift.TBase<TCancelDelegationTokenResp, TCancelDelegationTokenResp._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCancelDelegationTokenResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TCancelDelegationTokenRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TCancelDelegationTokenRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCancelDelegationTokenResp.class, metaDataMap);
-  }
-
-  public TCancelDelegationTokenResp() {
-  }
-
-  public TCancelDelegationTokenResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TCancelDelegationTokenResp(TCancelDelegationTokenResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-  }
-
-  public TCancelDelegationTokenResp deepCopy() {
-    return new TCancelDelegationTokenResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TCancelDelegationTokenResp)
-      return this.equals((TCancelDelegationTokenResp)that);
-    return false;
-  }
-
-  public boolean equals(TCancelDelegationTokenResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_status = true && (isSetStatus());
-    builder.append(present_status);
-    if (present_status)
-      builder.append(status);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TCancelDelegationTokenResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TCancelDelegationTokenResp typedOther = (TCancelDelegationTokenResp)other;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TCancelDelegationTokenResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TCancelDelegationTokenRespStandardSchemeFactory implements SchemeFactory {
-    public TCancelDelegationTokenRespStandardScheme getScheme() {
-      return new TCancelDelegationTokenRespStandardScheme();
-    }
-  }
-
-  private static class TCancelDelegationTokenRespStandardScheme extends StandardScheme<TCancelDelegationTokenResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TCancelDelegationTokenResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TCancelDelegationTokenResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TCancelDelegationTokenRespTupleSchemeFactory implements SchemeFactory {
-    public TCancelDelegationTokenRespTupleScheme getScheme() {
-      return new TCancelDelegationTokenRespTupleScheme();
-    }
-  }
-
-  private static class TCancelDelegationTokenRespTupleScheme extends TupleScheme<TCancelDelegationTokenResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TCancelDelegationTokenResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TCancelDelegationTokenResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationReq.java
deleted file mode 100644
index 45eac48ab12d3..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationReq.java
+++ /dev/null
@@ -1,390 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TCancelOperationReq implements org.apache.thrift.TBase<TCancelOperationReq, TCancelOperationReq._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCancelOperationReq");
-
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TCancelOperationReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TCancelOperationReqTupleSchemeFactory());
-  }
-
-  private TOperationHandle operationHandle; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    OPERATION_HANDLE((short)1, "operationHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCancelOperationReq.class, metaDataMap);
-  }
-
-  public TCancelOperationReq() {
-  }
-
-  public TCancelOperationReq(
-    TOperationHandle operationHandle)
-  {
-    this();
-    this.operationHandle = operationHandle;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TCancelOperationReq(TCancelOperationReq other) {
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-  }
-
-  public TCancelOperationReq deepCopy() {
-    return new TCancelOperationReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.operationHandle = null;
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TCancelOperationReq)
-      return this.equals((TCancelOperationReq)that);
-    return false;
-  }
-
-  public boolean equals(TCancelOperationReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    builder.append(present_operationHandle);
-    if (present_operationHandle)
-      builder.append(operationHandle);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TCancelOperationReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TCancelOperationReq typedOther = (TCancelOperationReq)other;
-
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TCancelOperationReq(");
-    boolean first = true;
-
-    sb.append("operationHandle:");
-    if (this.operationHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.operationHandle);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetOperationHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationHandle' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TCancelOperationReqStandardSchemeFactory implements SchemeFactory {
-    public TCancelOperationReqStandardScheme getScheme() {
-      return new TCancelOperationReqStandardScheme();
-    }
-  }
-
-  private static class TCancelOperationReqStandardScheme extends StandardScheme<TCancelOperationReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TCancelOperationReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TCancelOperationReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.operationHandle != null) {
-        oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-        struct.operationHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TCancelOperationReqTupleSchemeFactory implements SchemeFactory {
-    public TCancelOperationReqTupleScheme getScheme() {
-      return new TCancelOperationReqTupleScheme();
-    }
-  }
-
-  private static class TCancelOperationReqTupleScheme extends TupleScheme<TCancelOperationReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TCancelOperationReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.operationHandle.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TCancelOperationReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.operationHandle = new TOperationHandle();
-      struct.operationHandle.read(iprot);
-      struct.setOperationHandleIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationResp.java
deleted file mode 100644
index 2a39414d601aa..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCancelOperationResp.java
+++ /dev/null
@@ -1,390 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TCancelOperationResp implements org.apache.thrift.TBase<TCancelOperationResp, TCancelOperationResp._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCancelOperationResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TCancelOperationRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TCancelOperationRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCancelOperationResp.class, metaDataMap);
-  }
-
-  public TCancelOperationResp() {
-  }
-
-  public TCancelOperationResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TCancelOperationResp(TCancelOperationResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-  }
-
-  public TCancelOperationResp deepCopy() {
-    return new TCancelOperationResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TCancelOperationResp)
-      return this.equals((TCancelOperationResp)that);
-    return false;
-  }
-
-  public boolean equals(TCancelOperationResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_status = true && (isSetStatus());
-    builder.append(present_status);
-    if (present_status)
-      builder.append(status);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TCancelOperationResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TCancelOperationResp typedOther = (TCancelOperationResp)other;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TCancelOperationResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TCancelOperationRespStandardSchemeFactory implements SchemeFactory {
-    public TCancelOperationRespStandardScheme getScheme() {
-      return new TCancelOperationRespStandardScheme();
-    }
-  }
-
-  private static class TCancelOperationRespStandardScheme extends StandardScheme<TCancelOperationResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TCancelOperationResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TCancelOperationResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TCancelOperationRespTupleSchemeFactory implements SchemeFactory {
-    public TCancelOperationRespTupleScheme getScheme() {
-      return new TCancelOperationRespTupleScheme();
-    }
-  }
-
-  private static class TCancelOperationRespTupleScheme extends TupleScheme<TCancelOperationResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TCancelOperationResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TCancelOperationResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationReq.java
deleted file mode 100644
index 0cbb7ccced073..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationReq.java
+++ /dev/null
@@ -1,390 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TCloseOperationReq implements org.apache.thrift.TBase<TCloseOperationReq, TCloseOperationReq._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCloseOperationReq");
-
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TCloseOperationReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TCloseOperationReqTupleSchemeFactory());
-  }
-
-  private TOperationHandle operationHandle; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    OPERATION_HANDLE((short)1, "operationHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCloseOperationReq.class, metaDataMap);
-  }
-
-  public TCloseOperationReq() {
-  }
-
-  public TCloseOperationReq(
-    TOperationHandle operationHandle)
-  {
-    this();
-    this.operationHandle = operationHandle;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TCloseOperationReq(TCloseOperationReq other) {
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-  }
-
-  public TCloseOperationReq deepCopy() {
-    return new TCloseOperationReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.operationHandle = null;
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TCloseOperationReq)
-      return this.equals((TCloseOperationReq)that);
-    return false;
-  }
-
-  public boolean equals(TCloseOperationReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    builder.append(present_operationHandle);
-    if (present_operationHandle)
-      builder.append(operationHandle);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TCloseOperationReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TCloseOperationReq typedOther = (TCloseOperationReq)other;
-
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TCloseOperationReq(");
-    boolean first = true;
-
-    sb.append("operationHandle:");
-    if (this.operationHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.operationHandle);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetOperationHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationHandle' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TCloseOperationReqStandardSchemeFactory implements SchemeFactory {
-    public TCloseOperationReqStandardScheme getScheme() {
-      return new TCloseOperationReqStandardScheme();
-    }
-  }
-
-  private static class TCloseOperationReqStandardScheme extends StandardScheme<TCloseOperationReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TCloseOperationReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TCloseOperationReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.operationHandle != null) {
-        oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-        struct.operationHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TCloseOperationReqTupleSchemeFactory implements SchemeFactory {
-    public TCloseOperationReqTupleScheme getScheme() {
-      return new TCloseOperationReqTupleScheme();
-    }
-  }
-
-  private static class TCloseOperationReqTupleScheme extends TupleScheme<TCloseOperationReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TCloseOperationReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.operationHandle.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TCloseOperationReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.operationHandle = new TOperationHandle();
-      struct.operationHandle.read(iprot);
-      struct.setOperationHandleIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationResp.java
deleted file mode 100644
index 7334d67173d7b..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseOperationResp.java
+++ /dev/null
@@ -1,390 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TCloseOperationResp implements org.apache.thrift.TBase<TCloseOperationResp, TCloseOperationResp._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCloseOperationResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TCloseOperationRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TCloseOperationRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCloseOperationResp.class, metaDataMap);
-  }
-
-  public TCloseOperationResp() {
-  }
-
-  public TCloseOperationResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TCloseOperationResp(TCloseOperationResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-  }
-
-  public TCloseOperationResp deepCopy() {
-    return new TCloseOperationResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TCloseOperationResp)
-      return this.equals((TCloseOperationResp)that);
-    return false;
-  }
-
-  public boolean equals(TCloseOperationResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_status = true && (isSetStatus());
-    builder.append(present_status);
-    if (present_status)
-      builder.append(status);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TCloseOperationResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TCloseOperationResp typedOther = (TCloseOperationResp)other;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TCloseOperationResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TCloseOperationRespStandardSchemeFactory implements SchemeFactory {
-    public TCloseOperationRespStandardScheme getScheme() {
-      return new TCloseOperationRespStandardScheme();
-    }
-  }
-
-  private static class TCloseOperationRespStandardScheme extends StandardScheme<TCloseOperationResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TCloseOperationResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TCloseOperationResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TCloseOperationRespTupleSchemeFactory implements SchemeFactory {
-    public TCloseOperationRespTupleScheme getScheme() {
-      return new TCloseOperationRespTupleScheme();
-    }
-  }
-
-  private static class TCloseOperationRespTupleScheme extends TupleScheme<TCloseOperationResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TCloseOperationResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TCloseOperationResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionReq.java
deleted file mode 100644
index 027e8295436b0..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionReq.java
+++ /dev/null
@@ -1,390 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TCloseSessionReq implements org.apache.thrift.TBase<TCloseSessionReq, TCloseSessionReq._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCloseSessionReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TCloseSessionReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TCloseSessionReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCloseSessionReq.class, metaDataMap);
-  }
-
-  public TCloseSessionReq() {
-  }
-
-  public TCloseSessionReq(
-    TSessionHandle sessionHandle)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TCloseSessionReq(TCloseSessionReq other) {
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-  }
-
-  public TCloseSessionReq deepCopy() {
-    return new TCloseSessionReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TCloseSessionReq)
-      return this.equals((TCloseSessionReq)that);
-    return false;
-  }
-
-  public boolean equals(TCloseSessionReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    builder.append(present_sessionHandle);
-    if (present_sessionHandle)
-      builder.append(sessionHandle);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TCloseSessionReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TCloseSessionReq typedOther = (TCloseSessionReq)other;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TCloseSessionReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TCloseSessionReqStandardSchemeFactory implements SchemeFactory {
-    public TCloseSessionReqStandardScheme getScheme() {
-      return new TCloseSessionReqStandardScheme();
-    }
-  }
-
-  private static class TCloseSessionReqStandardScheme extends StandardScheme<TCloseSessionReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TCloseSessionReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TCloseSessionReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TCloseSessionReqTupleSchemeFactory implements SchemeFactory {
-    public TCloseSessionReqTupleScheme getScheme() {
-      return new TCloseSessionReqTupleScheme();
-    }
-  }
-
-  private static class TCloseSessionReqTupleScheme extends TupleScheme<TCloseSessionReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TCloseSessionReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TCloseSessionReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionResp.java
deleted file mode 100644
index 168c8fc775e33..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TCloseSessionResp.java
+++ /dev/null
@@ -1,390 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TCloseSessionResp implements org.apache.thrift.TBase<TCloseSessionResp, TCloseSessionResp._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCloseSessionResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TCloseSessionRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TCloseSessionRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCloseSessionResp.class, metaDataMap);
-  }
-
-  public TCloseSessionResp() {
-  }
-
-  public TCloseSessionResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TCloseSessionResp(TCloseSessionResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-  }
-
-  public TCloseSessionResp deepCopy() {
-    return new TCloseSessionResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TCloseSessionResp)
-      return this.equals((TCloseSessionResp)that);
-    return false;
-  }
-
-  public boolean equals(TCloseSessionResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_status = true && (isSetStatus());
-    builder.append(present_status);
-    if (present_status)
-      builder.append(status);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TCloseSessionResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TCloseSessionResp typedOther = (TCloseSessionResp)other;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TCloseSessionResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TCloseSessionRespStandardSchemeFactory implements SchemeFactory {
-    public TCloseSessionRespStandardScheme getScheme() {
-      return new TCloseSessionRespStandardScheme();
-    }
-  }
-
-  private static class TCloseSessionRespStandardScheme extends StandardScheme<TCloseSessionResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TCloseSessionResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TCloseSessionResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TCloseSessionRespTupleSchemeFactory implements SchemeFactory {
-    public TCloseSessionRespTupleScheme getScheme() {
-      return new TCloseSessionRespTupleScheme();
-    }
-  }
-
-  private static class TCloseSessionRespTupleScheme extends TupleScheme<TCloseSessionResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TCloseSessionResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TCloseSessionResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumn.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumn.java
deleted file mode 100644
index fc2171dc99e4c..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumn.java
+++ /dev/null
@@ -1,732 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TColumn extends org.apache.thrift.TUnion<TColumn, TColumn._Fields> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TColumn");
-  private static final org.apache.thrift.protocol.TField BOOL_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("boolVal", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField BYTE_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("byteVal", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-  private static final org.apache.thrift.protocol.TField I16_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i16Val", org.apache.thrift.protocol.TType.STRUCT, (short)3);
-  private static final org.apache.thrift.protocol.TField I32_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i32Val", org.apache.thrift.protocol.TType.STRUCT, (short)4);
-  private static final org.apache.thrift.protocol.TField I64_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i64Val", org.apache.thrift.protocol.TType.STRUCT, (short)5);
-  private static final org.apache.thrift.protocol.TField DOUBLE_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("doubleVal", org.apache.thrift.protocol.TType.STRUCT, (short)6);
-  private static final org.apache.thrift.protocol.TField STRING_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("stringVal", org.apache.thrift.protocol.TType.STRUCT, (short)7);
-  private static final org.apache.thrift.protocol.TField BINARY_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("binaryVal", org.apache.thrift.protocol.TType.STRUCT, (short)8);
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    BOOL_VAL((short)1, "boolVal"),
-    BYTE_VAL((short)2, "byteVal"),
-    I16_VAL((short)3, "i16Val"),
-    I32_VAL((short)4, "i32Val"),
-    I64_VAL((short)5, "i64Val"),
-    DOUBLE_VAL((short)6, "doubleVal"),
-    STRING_VAL((short)7, "stringVal"),
-    BINARY_VAL((short)8, "binaryVal");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // BOOL_VAL
-          return BOOL_VAL;
-        case 2: // BYTE_VAL
-          return BYTE_VAL;
-        case 3: // I16_VAL
-          return I16_VAL;
-        case 4: // I32_VAL
-          return I32_VAL;
-        case 5: // I64_VAL
-          return I64_VAL;
-        case 6: // DOUBLE_VAL
-          return DOUBLE_VAL;
-        case 7: // STRING_VAL
-          return STRING_VAL;
-        case 8: // BINARY_VAL
-          return BINARY_VAL;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.BOOL_VAL, new org.apache.thrift.meta_data.FieldMetaData("boolVal", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TBoolColumn.class)));
-    tmpMap.put(_Fields.BYTE_VAL, new org.apache.thrift.meta_data.FieldMetaData("byteVal", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TByteColumn.class)));
-    tmpMap.put(_Fields.I16_VAL, new org.apache.thrift.meta_data.FieldMetaData("i16Val", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI16Column.class)));
-    tmpMap.put(_Fields.I32_VAL, new org.apache.thrift.meta_data.FieldMetaData("i32Val", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI32Column.class)));
-    tmpMap.put(_Fields.I64_VAL, new org.apache.thrift.meta_data.FieldMetaData("i64Val", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI64Column.class)));
-    tmpMap.put(_Fields.DOUBLE_VAL, new org.apache.thrift.meta_data.FieldMetaData("doubleVal", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TDoubleColumn.class)));
-    tmpMap.put(_Fields.STRING_VAL, new org.apache.thrift.meta_data.FieldMetaData("stringVal", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStringColumn.class)));
-    tmpMap.put(_Fields.BINARY_VAL, new org.apache.thrift.meta_data.FieldMetaData("binaryVal", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TBinaryColumn.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TColumn.class, metaDataMap);
-  }
-
-  public TColumn() {
-    super();
-  }
-
-  public TColumn(TColumn._Fields setField, Object value) {
-    super(setField, value);
-  }
-
-  public TColumn(TColumn other) {
-    super(other);
-  }
-  public TColumn deepCopy() {
-    return new TColumn(this);
-  }
-
-  public static TColumn boolVal(TBoolColumn value) {
-    TColumn x = new TColumn();
-    x.setBoolVal(value);
-    return x;
-  }
-
-  public static TColumn byteVal(TByteColumn value) {
-    TColumn x = new TColumn();
-    x.setByteVal(value);
-    return x;
-  }
-
-  public static TColumn i16Val(TI16Column value) {
-    TColumn x = new TColumn();
-    x.setI16Val(value);
-    return x;
-  }
-
-  public static TColumn i32Val(TI32Column value) {
-    TColumn x = new TColumn();
-    x.setI32Val(value);
-    return x;
-  }
-
-  public static TColumn i64Val(TI64Column value) {
-    TColumn x = new TColumn();
-    x.setI64Val(value);
-    return x;
-  }
-
-  public static TColumn doubleVal(TDoubleColumn value) {
-    TColumn x = new TColumn();
-    x.setDoubleVal(value);
-    return x;
-  }
-
-  public static TColumn stringVal(TStringColumn value) {
-    TColumn x = new TColumn();
-    x.setStringVal(value);
-    return x;
-  }
-
-  public static TColumn binaryVal(TBinaryColumn value) {
-    TColumn x = new TColumn();
-    x.setBinaryVal(value);
-    return x;
-  }
-
-
-  @Override
-  protected void checkType(_Fields setField, Object value) throws ClassCastException {
-    switch (setField) {
-      case BOOL_VAL:
-        if (value instanceof TBoolColumn) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TBoolColumn for field 'boolVal', but got " + value.getClass().getSimpleName());
-      case BYTE_VAL:
-        if (value instanceof TByteColumn) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TByteColumn for field 'byteVal', but got " + value.getClass().getSimpleName());
-      case I16_VAL:
-        if (value instanceof TI16Column) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TI16Column for field 'i16Val', but got " + value.getClass().getSimpleName());
-      case I32_VAL:
-        if (value instanceof TI32Column) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TI32Column for field 'i32Val', but got " + value.getClass().getSimpleName());
-      case I64_VAL:
-        if (value instanceof TI64Column) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TI64Column for field 'i64Val', but got " + value.getClass().getSimpleName());
-      case DOUBLE_VAL:
-        if (value instanceof TDoubleColumn) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TDoubleColumn for field 'doubleVal', but got " + value.getClass().getSimpleName());
-      case STRING_VAL:
-        if (value instanceof TStringColumn) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TStringColumn for field 'stringVal', but got " + value.getClass().getSimpleName());
-      case BINARY_VAL:
-        if (value instanceof TBinaryColumn) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TBinaryColumn for field 'binaryVal', but got " + value.getClass().getSimpleName());
-      default:
-        throw new IllegalArgumentException("Unknown field id " + setField);
-    }
-  }
-
-  @Override
-  protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TField field) throws org.apache.thrift.TException {
-    _Fields setField = _Fields.findByThriftId(field.id);
-    if (setField != null) {
-      switch (setField) {
-        case BOOL_VAL:
-          if (field.type == BOOL_VAL_FIELD_DESC.type) {
-            TBoolColumn boolVal;
-            boolVal = new TBoolColumn();
-            boolVal.read(iprot);
-            return boolVal;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case BYTE_VAL:
-          if (field.type == BYTE_VAL_FIELD_DESC.type) {
-            TByteColumn byteVal;
-            byteVal = new TByteColumn();
-            byteVal.read(iprot);
-            return byteVal;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case I16_VAL:
-          if (field.type == I16_VAL_FIELD_DESC.type) {
-            TI16Column i16Val;
-            i16Val = new TI16Column();
-            i16Val.read(iprot);
-            return i16Val;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case I32_VAL:
-          if (field.type == I32_VAL_FIELD_DESC.type) {
-            TI32Column i32Val;
-            i32Val = new TI32Column();
-            i32Val.read(iprot);
-            return i32Val;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case I64_VAL:
-          if (field.type == I64_VAL_FIELD_DESC.type) {
-            TI64Column i64Val;
-            i64Val = new TI64Column();
-            i64Val.read(iprot);
-            return i64Val;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case DOUBLE_VAL:
-          if (field.type == DOUBLE_VAL_FIELD_DESC.type) {
-            TDoubleColumn doubleVal;
-            doubleVal = new TDoubleColumn();
-            doubleVal.read(iprot);
-            return doubleVal;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case STRING_VAL:
-          if (field.type == STRING_VAL_FIELD_DESC.type) {
-            TStringColumn stringVal;
-            stringVal = new TStringColumn();
-            stringVal.read(iprot);
-            return stringVal;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case BINARY_VAL:
-          if (field.type == BINARY_VAL_FIELD_DESC.type) {
-            TBinaryColumn binaryVal;
-            binaryVal = new TBinaryColumn();
-            binaryVal.read(iprot);
-            return binaryVal;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        default:
-          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
-      }
-    } else {
-      return null;
-    }
-  }
-
-  @Override
-  protected void standardSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    switch (setField_) {
-      case BOOL_VAL:
-        TBoolColumn boolVal = (TBoolColumn)value_;
-        boolVal.write(oprot);
-        return;
-      case BYTE_VAL:
-        TByteColumn byteVal = (TByteColumn)value_;
-        byteVal.write(oprot);
-        return;
-      case I16_VAL:
-        TI16Column i16Val = (TI16Column)value_;
-        i16Val.write(oprot);
-        return;
-      case I32_VAL:
-        TI32Column i32Val = (TI32Column)value_;
-        i32Val.write(oprot);
-        return;
-      case I64_VAL:
-        TI64Column i64Val = (TI64Column)value_;
-        i64Val.write(oprot);
-        return;
-      case DOUBLE_VAL:
-        TDoubleColumn doubleVal = (TDoubleColumn)value_;
-        doubleVal.write(oprot);
-        return;
-      case STRING_VAL:
-        TStringColumn stringVal = (TStringColumn)value_;
-        stringVal.write(oprot);
-        return;
-      case BINARY_VAL:
-        TBinaryColumn binaryVal = (TBinaryColumn)value_;
-        binaryVal.write(oprot);
-        return;
-      default:
-        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
-    }
-  }
-
-  @Override
-  protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, short fieldID) throws org.apache.thrift.TException {
-    _Fields setField = _Fields.findByThriftId(fieldID);
-    if (setField != null) {
-      switch (setField) {
-        case BOOL_VAL:
-          TBoolColumn boolVal;
-          boolVal = new TBoolColumn();
-          boolVal.read(iprot);
-          return boolVal;
-        case BYTE_VAL:
-          TByteColumn byteVal;
-          byteVal = new TByteColumn();
-          byteVal.read(iprot);
-          return byteVal;
-        case I16_VAL:
-          TI16Column i16Val;
-          i16Val = new TI16Column();
-          i16Val.read(iprot);
-          return i16Val;
-        case I32_VAL:
-          TI32Column i32Val;
-          i32Val = new TI32Column();
-          i32Val.read(iprot);
-          return i32Val;
-        case I64_VAL:
-          TI64Column i64Val;
-          i64Val = new TI64Column();
-          i64Val.read(iprot);
-          return i64Val;
-        case DOUBLE_VAL:
-          TDoubleColumn doubleVal;
-          doubleVal = new TDoubleColumn();
-          doubleVal.read(iprot);
-          return doubleVal;
-        case STRING_VAL:
-          TStringColumn stringVal;
-          stringVal = new TStringColumn();
-          stringVal.read(iprot);
-          return stringVal;
-        case BINARY_VAL:
-          TBinaryColumn binaryVal;
-          binaryVal = new TBinaryColumn();
-          binaryVal.read(iprot);
-          return binaryVal;
-        default:
-          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
-      }
-    } else {
-      throw new TProtocolException("Couldn't find a field with field id " + fieldID);
-    }
-  }
-
-  @Override
-  protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    switch (setField_) {
-      case BOOL_VAL:
-        TBoolColumn boolVal = (TBoolColumn)value_;
-        boolVal.write(oprot);
-        return;
-      case BYTE_VAL:
-        TByteColumn byteVal = (TByteColumn)value_;
-        byteVal.write(oprot);
-        return;
-      case I16_VAL:
-        TI16Column i16Val = (TI16Column)value_;
-        i16Val.write(oprot);
-        return;
-      case I32_VAL:
-        TI32Column i32Val = (TI32Column)value_;
-        i32Val.write(oprot);
-        return;
-      case I64_VAL:
-        TI64Column i64Val = (TI64Column)value_;
-        i64Val.write(oprot);
-        return;
-      case DOUBLE_VAL:
-        TDoubleColumn doubleVal = (TDoubleColumn)value_;
-        doubleVal.write(oprot);
-        return;
-      case STRING_VAL:
-        TStringColumn stringVal = (TStringColumn)value_;
-        stringVal.write(oprot);
-        return;
-      case BINARY_VAL:
-        TBinaryColumn binaryVal = (TBinaryColumn)value_;
-        binaryVal.write(oprot);
-        return;
-      default:
-        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
-    }
-  }
-
-  @Override
-  protected org.apache.thrift.protocol.TField getFieldDesc(_Fields setField) {
-    switch (setField) {
-      case BOOL_VAL:
-        return BOOL_VAL_FIELD_DESC;
-      case BYTE_VAL:
-        return BYTE_VAL_FIELD_DESC;
-      case I16_VAL:
-        return I16_VAL_FIELD_DESC;
-      case I32_VAL:
-        return I32_VAL_FIELD_DESC;
-      case I64_VAL:
-        return I64_VAL_FIELD_DESC;
-      case DOUBLE_VAL:
-        return DOUBLE_VAL_FIELD_DESC;
-      case STRING_VAL:
-        return STRING_VAL_FIELD_DESC;
-      case BINARY_VAL:
-        return BINARY_VAL_FIELD_DESC;
-      default:
-        throw new IllegalArgumentException("Unknown field id " + setField);
-    }
-  }
-
-  @Override
-  protected org.apache.thrift.protocol.TStruct getStructDesc() {
-    return STRUCT_DESC;
-  }
-
-  @Override
-  protected _Fields enumForId(short id) {
-    return _Fields.findByThriftIdOrThrow(id);
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-
-  public TBoolColumn getBoolVal() {
-    if (getSetField() == _Fields.BOOL_VAL) {
-      return (TBoolColumn)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'boolVal' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setBoolVal(TBoolColumn value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.BOOL_VAL;
-    value_ = value;
-  }
-
-  public TByteColumn getByteVal() {
-    if (getSetField() == _Fields.BYTE_VAL) {
-      return (TByteColumn)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'byteVal' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setByteVal(TByteColumn value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.BYTE_VAL;
-    value_ = value;
-  }
-
-  public TI16Column getI16Val() {
-    if (getSetField() == _Fields.I16_VAL) {
-      return (TI16Column)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'i16Val' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setI16Val(TI16Column value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.I16_VAL;
-    value_ = value;
-  }
-
-  public TI32Column getI32Val() {
-    if (getSetField() == _Fields.I32_VAL) {
-      return (TI32Column)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'i32Val' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setI32Val(TI32Column value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.I32_VAL;
-    value_ = value;
-  }
-
-  public TI64Column getI64Val() {
-    if (getSetField() == _Fields.I64_VAL) {
-      return (TI64Column)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'i64Val' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setI64Val(TI64Column value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.I64_VAL;
-    value_ = value;
-  }
-
-  public TDoubleColumn getDoubleVal() {
-    if (getSetField() == _Fields.DOUBLE_VAL) {
-      return (TDoubleColumn)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'doubleVal' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setDoubleVal(TDoubleColumn value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.DOUBLE_VAL;
-    value_ = value;
-  }
-
-  public TStringColumn getStringVal() {
-    if (getSetField() == _Fields.STRING_VAL) {
-      return (TStringColumn)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'stringVal' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setStringVal(TStringColumn value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.STRING_VAL;
-    value_ = value;
-  }
-
-  public TBinaryColumn getBinaryVal() {
-    if (getSetField() == _Fields.BINARY_VAL) {
-      return (TBinaryColumn)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'binaryVal' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setBinaryVal(TBinaryColumn value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.BINARY_VAL;
-    value_ = value;
-  }
-
-  public boolean isSetBoolVal() {
-    return setField_ == _Fields.BOOL_VAL;
-  }
-
-
-  public boolean isSetByteVal() {
-    return setField_ == _Fields.BYTE_VAL;
-  }
-
-
-  public boolean isSetI16Val() {
-    return setField_ == _Fields.I16_VAL;
-  }
-
-
-  public boolean isSetI32Val() {
-    return setField_ == _Fields.I32_VAL;
-  }
-
-
-  public boolean isSetI64Val() {
-    return setField_ == _Fields.I64_VAL;
-  }
-
-
-  public boolean isSetDoubleVal() {
-    return setField_ == _Fields.DOUBLE_VAL;
-  }
-
-
-  public boolean isSetStringVal() {
-    return setField_ == _Fields.STRING_VAL;
-  }
-
-
-  public boolean isSetBinaryVal() {
-    return setField_ == _Fields.BINARY_VAL;
-  }
-
-
-  public boolean equals(Object other) {
-    if (other instanceof TColumn) {
-      return equals((TColumn)other);
-    } else {
-      return false;
-    }
-  }
-
-  public boolean equals(TColumn other) {
-    return other != null && getSetField() == other.getSetField() && getFieldValue().equals(other.getFieldValue());
-  }
-
-  @Override
-  public int compareTo(TColumn other) {
-    int lastComparison = org.apache.thrift.TBaseHelper.compareTo(getSetField(), other.getSetField());
-    if (lastComparison == 0) {
-      return org.apache.thrift.TBaseHelper.compareTo(getFieldValue(), other.getFieldValue());
-    }
-    return lastComparison;
-  }
-
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder hcb = new HashCodeBuilder();
-    hcb.append(this.getClass().getName());
-    org.apache.thrift.TFieldIdEnum setField = getSetField();
-    if (setField != null) {
-      hcb.append(setField.getThriftFieldId());
-      Object value = getFieldValue();
-      if (value instanceof org.apache.thrift.TEnum) {
-        hcb.append(((org.apache.thrift.TEnum)getFieldValue()).getValue());
-      } else {
-        hcb.append(value);
-      }
-    }
-    return hcb.toHashCode();
-  }
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-
-}
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumnDesc.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumnDesc.java
deleted file mode 100644
index 247db6489457f..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumnDesc.java
+++ /dev/null
@@ -1,700 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TColumnDesc implements org.apache.thrift.TBase<TColumnDesc, TColumnDesc._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TColumnDesc");
-
-  private static final org.apache.thrift.protocol.TField COLUMN_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("columnName", org.apache.thrift.protocol.TType.STRING, (short)1);
-  private static final org.apache.thrift.protocol.TField TYPE_DESC_FIELD_DESC = new org.apache.thrift.protocol.TField("typeDesc", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-  private static final org.apache.thrift.protocol.TField POSITION_FIELD_DESC = new org.apache.thrift.protocol.TField("position", org.apache.thrift.protocol.TType.I32, (short)3);
-  private static final org.apache.thrift.protocol.TField COMMENT_FIELD_DESC = new org.apache.thrift.protocol.TField("comment", org.apache.thrift.protocol.TType.STRING, (short)4);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TColumnDescStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TColumnDescTupleSchemeFactory());
-  }
-
-  private String columnName; // required
-  private TTypeDesc typeDesc; // required
-  private int position; // required
-  private String comment; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    COLUMN_NAME((short)1, "columnName"),
-    TYPE_DESC((short)2, "typeDesc"),
-    POSITION((short)3, "position"),
-    COMMENT((short)4, "comment");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // COLUMN_NAME
-          return COLUMN_NAME;
-        case 2: // TYPE_DESC
-          return TYPE_DESC;
-        case 3: // POSITION
-          return POSITION;
-        case 4: // COMMENT
-          return COMMENT;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __POSITION_ISSET_ID = 0;
-  private byte __isset_bitfield = 0;
-  private _Fields optionals[] = {_Fields.COMMENT};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.COLUMN_NAME, new org.apache.thrift.meta_data.FieldMetaData("columnName", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    tmpMap.put(_Fields.TYPE_DESC, new org.apache.thrift.meta_data.FieldMetaData("typeDesc", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TTypeDesc.class)));
-    tmpMap.put(_Fields.POSITION, new org.apache.thrift.meta_data.FieldMetaData("position", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)));
-    tmpMap.put(_Fields.COMMENT, new org.apache.thrift.meta_data.FieldMetaData("comment", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TColumnDesc.class, metaDataMap);
-  }
-
-  public TColumnDesc() {
-  }
-
-  public TColumnDesc(
-    String columnName,
-    TTypeDesc typeDesc,
-    int position)
-  {
-    this();
-    this.columnName = columnName;
-    this.typeDesc = typeDesc;
-    this.position = position;
-    setPositionIsSet(true);
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TColumnDesc(TColumnDesc other) {
-    __isset_bitfield = other.__isset_bitfield;
-    if (other.isSetColumnName()) {
-      this.columnName = other.columnName;
-    }
-    if (other.isSetTypeDesc()) {
-      this.typeDesc = new TTypeDesc(other.typeDesc);
-    }
-    this.position = other.position;
-    if (other.isSetComment()) {
-      this.comment = other.comment;
-    }
-  }
-
-  public TColumnDesc deepCopy() {
-    return new TColumnDesc(this);
-  }
-
-  @Override
-  public void clear() {
-    this.columnName = null;
-    this.typeDesc = null;
-    setPositionIsSet(false);
-    this.position = 0;
-    this.comment = null;
-  }
-
-  public String getColumnName() {
-    return this.columnName;
-  }
-
-  public void setColumnName(String columnName) {
-    this.columnName = columnName;
-  }
-
-  public void unsetColumnName() {
-    this.columnName = null;
-  }
-
-  /** Returns true if field columnName is set (has been assigned a value) and false otherwise */
-  public boolean isSetColumnName() {
-    return this.columnName != null;
-  }
-
-  public void setColumnNameIsSet(boolean value) {
-    if (!value) {
-      this.columnName = null;
-    }
-  }
-
-  public TTypeDesc getTypeDesc() {
-    return this.typeDesc;
-  }
-
-  public void setTypeDesc(TTypeDesc typeDesc) {
-    this.typeDesc = typeDesc;
-  }
-
-  public void unsetTypeDesc() {
-    this.typeDesc = null;
-  }
-
-  /** Returns true if field typeDesc is set (has been assigned a value) and false otherwise */
-  public boolean isSetTypeDesc() {
-    return this.typeDesc != null;
-  }
-
-  public void setTypeDescIsSet(boolean value) {
-    if (!value) {
-      this.typeDesc = null;
-    }
-  }
-
-  public int getPosition() {
-    return this.position;
-  }
-
-  public void setPosition(int position) {
-    this.position = position;
-    setPositionIsSet(true);
-  }
-
-  public void unsetPosition() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __POSITION_ISSET_ID);
-  }
-
-  /** Returns true if field position is set (has been assigned a value) and false otherwise */
-  public boolean isSetPosition() {
-    return EncodingUtils.testBit(__isset_bitfield, __POSITION_ISSET_ID);
-  }
-
-  public void setPositionIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __POSITION_ISSET_ID, value);
-  }
-
-  public String getComment() {
-    return this.comment;
-  }
-
-  public void setComment(String comment) {
-    this.comment = comment;
-  }
-
-  public void unsetComment() {
-    this.comment = null;
-  }
-
-  /** Returns true if field comment is set (has been assigned a value) and false otherwise */
-  public boolean isSetComment() {
-    return this.comment != null;
-  }
-
-  public void setCommentIsSet(boolean value) {
-    if (!value) {
-      this.comment = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case COLUMN_NAME:
-      if (value == null) {
-        unsetColumnName();
-      } else {
-        setColumnName((String)value);
-      }
-      break;
-
-    case TYPE_DESC:
-      if (value == null) {
-        unsetTypeDesc();
-      } else {
-        setTypeDesc((TTypeDesc)value);
-      }
-      break;
-
-    case POSITION:
-      if (value == null) {
-        unsetPosition();
-      } else {
-        setPosition((Integer)value);
-      }
-      break;
-
-    case COMMENT:
-      if (value == null) {
-        unsetComment();
-      } else {
-        setComment((String)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case COLUMN_NAME:
-      return getColumnName();
-
-    case TYPE_DESC:
-      return getTypeDesc();
-
-    case POSITION:
-      return Integer.valueOf(getPosition());
-
-    case COMMENT:
-      return getComment();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case COLUMN_NAME:
-      return isSetColumnName();
-    case TYPE_DESC:
-      return isSetTypeDesc();
-    case POSITION:
-      return isSetPosition();
-    case COMMENT:
-      return isSetComment();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TColumnDesc)
-      return this.equals((TColumnDesc)that);
-    return false;
-  }
-
-  public boolean equals(TColumnDesc that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_columnName = true && this.isSetColumnName();
-    boolean that_present_columnName = true && that.isSetColumnName();
-    if (this_present_columnName || that_present_columnName) {
-      if (!(this_present_columnName && that_present_columnName))
-        return false;
-      if (!this.columnName.equals(that.columnName))
-        return false;
-    }
-
-    boolean this_present_typeDesc = true && this.isSetTypeDesc();
-    boolean that_present_typeDesc = true && that.isSetTypeDesc();
-    if (this_present_typeDesc || that_present_typeDesc) {
-      if (!(this_present_typeDesc && that_present_typeDesc))
-        return false;
-      if (!this.typeDesc.equals(that.typeDesc))
-        return false;
-    }
-
-    boolean this_present_position = true;
-    boolean that_present_position = true;
-    if (this_present_position || that_present_position) {
-      if (!(this_present_position && that_present_position))
-        return false;
-      if (this.position != that.position)
-        return false;
-    }
-
-    boolean this_present_comment = true && this.isSetComment();
-    boolean that_present_comment = true && that.isSetComment();
-    if (this_present_comment || that_present_comment) {
-      if (!(this_present_comment && that_present_comment))
-        return false;
-      if (!this.comment.equals(that.comment))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_columnName = true && (isSetColumnName());
-    builder.append(present_columnName);
-    if (present_columnName)
-      builder.append(columnName);
-
-    boolean present_typeDesc = true && (isSetTypeDesc());
-    builder.append(present_typeDesc);
-    if (present_typeDesc)
-      builder.append(typeDesc);
-
-    boolean present_position = true;
-    builder.append(present_position);
-    if (present_position)
-      builder.append(position);
-
-    boolean present_comment = true && (isSetComment());
-    builder.append(present_comment);
-    if (present_comment)
-      builder.append(comment);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TColumnDesc other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TColumnDesc typedOther = (TColumnDesc)other;
-
-    lastComparison = Boolean.valueOf(isSetColumnName()).compareTo(typedOther.isSetColumnName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetColumnName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.columnName, typedOther.columnName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetTypeDesc()).compareTo(typedOther.isSetTypeDesc());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetTypeDesc()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.typeDesc, typedOther.typeDesc);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetPosition()).compareTo(typedOther.isSetPosition());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetPosition()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.position, typedOther.position);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetComment()).compareTo(typedOther.isSetComment());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetComment()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.comment, typedOther.comment);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TColumnDesc(");
-    boolean first = true;
-
-    sb.append("columnName:");
-    if (this.columnName == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.columnName);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("typeDesc:");
-    if (this.typeDesc == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.typeDesc);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("position:");
-    sb.append(this.position);
-    first = false;
-    if (isSetComment()) {
-      if (!first) sb.append(", ");
-      sb.append("comment:");
-      if (this.comment == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.comment);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetColumnName()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'columnName' is unset! Struct:" + toString());
-    }
-
-    if (!isSetTypeDesc()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'typeDesc' is unset! Struct:" + toString());
-    }
-
-    if (!isSetPosition()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'position' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (typeDesc != null) {
-      typeDesc.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TColumnDescStandardSchemeFactory implements SchemeFactory {
-    public TColumnDescStandardScheme getScheme() {
-      return new TColumnDescStandardScheme();
-    }
-  }
-
-  private static class TColumnDescStandardScheme extends StandardScheme<TColumnDesc> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TColumnDesc struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // COLUMN_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.columnName = iprot.readString();
-              struct.setColumnNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // TYPE_DESC
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.typeDesc = new TTypeDesc();
-              struct.typeDesc.read(iprot);
-              struct.setTypeDescIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // POSITION
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.position = iprot.readI32();
-              struct.setPositionIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 4: // COMMENT
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.comment = iprot.readString();
-              struct.setCommentIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TColumnDesc struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.columnName != null) {
-        oprot.writeFieldBegin(COLUMN_NAME_FIELD_DESC);
-        oprot.writeString(struct.columnName);
-        oprot.writeFieldEnd();
-      }
-      if (struct.typeDesc != null) {
-        oprot.writeFieldBegin(TYPE_DESC_FIELD_DESC);
-        struct.typeDesc.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldBegin(POSITION_FIELD_DESC);
-      oprot.writeI32(struct.position);
-      oprot.writeFieldEnd();
-      if (struct.comment != null) {
-        if (struct.isSetComment()) {
-          oprot.writeFieldBegin(COMMENT_FIELD_DESC);
-          oprot.writeString(struct.comment);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TColumnDescTupleSchemeFactory implements SchemeFactory {
-    public TColumnDescTupleScheme getScheme() {
-      return new TColumnDescTupleScheme();
-    }
-  }
-
-  private static class TColumnDescTupleScheme extends TupleScheme<TColumnDesc> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TColumnDesc struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      oprot.writeString(struct.columnName);
-      struct.typeDesc.write(oprot);
-      oprot.writeI32(struct.position);
-      BitSet optionals = new BitSet();
-      if (struct.isSetComment()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetComment()) {
-        oprot.writeString(struct.comment);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TColumnDesc struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.columnName = iprot.readString();
-      struct.setColumnNameIsSet(true);
-      struct.typeDesc = new TTypeDesc();
-      struct.typeDesc.read(iprot);
-      struct.setTypeDescIsSet(true);
-      struct.position = iprot.readI32();
-      struct.setPositionIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.comment = iprot.readString();
-        struct.setCommentIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumnValue.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumnValue.java
deleted file mode 100644
index 8504c6d608d42..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TColumnValue.java
+++ /dev/null
@@ -1,671 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TColumnValue extends org.apache.thrift.TUnion<TColumnValue, TColumnValue._Fields> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TColumnValue");
-  private static final org.apache.thrift.protocol.TField BOOL_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("boolVal", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField BYTE_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("byteVal", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-  private static final org.apache.thrift.protocol.TField I16_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i16Val", org.apache.thrift.protocol.TType.STRUCT, (short)3);
-  private static final org.apache.thrift.protocol.TField I32_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i32Val", org.apache.thrift.protocol.TType.STRUCT, (short)4);
-  private static final org.apache.thrift.protocol.TField I64_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i64Val", org.apache.thrift.protocol.TType.STRUCT, (short)5);
-  private static final org.apache.thrift.protocol.TField DOUBLE_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("doubleVal", org.apache.thrift.protocol.TType.STRUCT, (short)6);
-  private static final org.apache.thrift.protocol.TField STRING_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("stringVal", org.apache.thrift.protocol.TType.STRUCT, (short)7);
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    BOOL_VAL((short)1, "boolVal"),
-    BYTE_VAL((short)2, "byteVal"),
-    I16_VAL((short)3, "i16Val"),
-    I32_VAL((short)4, "i32Val"),
-    I64_VAL((short)5, "i64Val"),
-    DOUBLE_VAL((short)6, "doubleVal"),
-    STRING_VAL((short)7, "stringVal");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // BOOL_VAL
-          return BOOL_VAL;
-        case 2: // BYTE_VAL
-          return BYTE_VAL;
-        case 3: // I16_VAL
-          return I16_VAL;
-        case 4: // I32_VAL
-          return I32_VAL;
-        case 5: // I64_VAL
-          return I64_VAL;
-        case 6: // DOUBLE_VAL
-          return DOUBLE_VAL;
-        case 7: // STRING_VAL
-          return STRING_VAL;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.BOOL_VAL, new org.apache.thrift.meta_data.FieldMetaData("boolVal", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TBoolValue.class)));
-    tmpMap.put(_Fields.BYTE_VAL, new org.apache.thrift.meta_data.FieldMetaData("byteVal", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TByteValue.class)));
-    tmpMap.put(_Fields.I16_VAL, new org.apache.thrift.meta_data.FieldMetaData("i16Val", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI16Value.class)));
-    tmpMap.put(_Fields.I32_VAL, new org.apache.thrift.meta_data.FieldMetaData("i32Val", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI32Value.class)));
-    tmpMap.put(_Fields.I64_VAL, new org.apache.thrift.meta_data.FieldMetaData("i64Val", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI64Value.class)));
-    tmpMap.put(_Fields.DOUBLE_VAL, new org.apache.thrift.meta_data.FieldMetaData("doubleVal", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TDoubleValue.class)));
-    tmpMap.put(_Fields.STRING_VAL, new org.apache.thrift.meta_data.FieldMetaData("stringVal", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStringValue.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TColumnValue.class, metaDataMap);
-  }
-
-  public TColumnValue() {
-    super();
-  }
-
-  public TColumnValue(TColumnValue._Fields setField, Object value) {
-    super(setField, value);
-  }
-
-  public TColumnValue(TColumnValue other) {
-    super(other);
-  }
-  public TColumnValue deepCopy() {
-    return new TColumnValue(this);
-  }
-
-  public static TColumnValue boolVal(TBoolValue value) {
-    TColumnValue x = new TColumnValue();
-    x.setBoolVal(value);
-    return x;
-  }
-
-  public static TColumnValue byteVal(TByteValue value) {
-    TColumnValue x = new TColumnValue();
-    x.setByteVal(value);
-    return x;
-  }
-
-  public static TColumnValue i16Val(TI16Value value) {
-    TColumnValue x = new TColumnValue();
-    x.setI16Val(value);
-    return x;
-  }
-
-  public static TColumnValue i32Val(TI32Value value) {
-    TColumnValue x = new TColumnValue();
-    x.setI32Val(value);
-    return x;
-  }
-
-  public static TColumnValue i64Val(TI64Value value) {
-    TColumnValue x = new TColumnValue();
-    x.setI64Val(value);
-    return x;
-  }
-
-  public static TColumnValue doubleVal(TDoubleValue value) {
-    TColumnValue x = new TColumnValue();
-    x.setDoubleVal(value);
-    return x;
-  }
-
-  public static TColumnValue stringVal(TStringValue value) {
-    TColumnValue x = new TColumnValue();
-    x.setStringVal(value);
-    return x;
-  }
-
-
-  @Override
-  protected void checkType(_Fields setField, Object value) throws ClassCastException {
-    switch (setField) {
-      case BOOL_VAL:
-        if (value instanceof TBoolValue) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TBoolValue for field 'boolVal', but got " + value.getClass().getSimpleName());
-      case BYTE_VAL:
-        if (value instanceof TByteValue) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TByteValue for field 'byteVal', but got " + value.getClass().getSimpleName());
-      case I16_VAL:
-        if (value instanceof TI16Value) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TI16Value for field 'i16Val', but got " + value.getClass().getSimpleName());
-      case I32_VAL:
-        if (value instanceof TI32Value) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TI32Value for field 'i32Val', but got " + value.getClass().getSimpleName());
-      case I64_VAL:
-        if (value instanceof TI64Value) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TI64Value for field 'i64Val', but got " + value.getClass().getSimpleName());
-      case DOUBLE_VAL:
-        if (value instanceof TDoubleValue) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TDoubleValue for field 'doubleVal', but got " + value.getClass().getSimpleName());
-      case STRING_VAL:
-        if (value instanceof TStringValue) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TStringValue for field 'stringVal', but got " + value.getClass().getSimpleName());
-      default:
-        throw new IllegalArgumentException("Unknown field id " + setField);
-    }
-  }
-
-  @Override
-  protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TField field) throws org.apache.thrift.TException {
-    _Fields setField = _Fields.findByThriftId(field.id);
-    if (setField != null) {
-      switch (setField) {
-        case BOOL_VAL:
-          if (field.type == BOOL_VAL_FIELD_DESC.type) {
-            TBoolValue boolVal;
-            boolVal = new TBoolValue();
-            boolVal.read(iprot);
-            return boolVal;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case BYTE_VAL:
-          if (field.type == BYTE_VAL_FIELD_DESC.type) {
-            TByteValue byteVal;
-            byteVal = new TByteValue();
-            byteVal.read(iprot);
-            return byteVal;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case I16_VAL:
-          if (field.type == I16_VAL_FIELD_DESC.type) {
-            TI16Value i16Val;
-            i16Val = new TI16Value();
-            i16Val.read(iprot);
-            return i16Val;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case I32_VAL:
-          if (field.type == I32_VAL_FIELD_DESC.type) {
-            TI32Value i32Val;
-            i32Val = new TI32Value();
-            i32Val.read(iprot);
-            return i32Val;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case I64_VAL:
-          if (field.type == I64_VAL_FIELD_DESC.type) {
-            TI64Value i64Val;
-            i64Val = new TI64Value();
-            i64Val.read(iprot);
-            return i64Val;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case DOUBLE_VAL:
-          if (field.type == DOUBLE_VAL_FIELD_DESC.type) {
-            TDoubleValue doubleVal;
-            doubleVal = new TDoubleValue();
-            doubleVal.read(iprot);
-            return doubleVal;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case STRING_VAL:
-          if (field.type == STRING_VAL_FIELD_DESC.type) {
-            TStringValue stringVal;
-            stringVal = new TStringValue();
-            stringVal.read(iprot);
-            return stringVal;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        default:
-          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
-      }
-    } else {
-      return null;
-    }
-  }
-
-  @Override
-  protected void standardSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    switch (setField_) {
-      case BOOL_VAL:
-        TBoolValue boolVal = (TBoolValue)value_;
-        boolVal.write(oprot);
-        return;
-      case BYTE_VAL:
-        TByteValue byteVal = (TByteValue)value_;
-        byteVal.write(oprot);
-        return;
-      case I16_VAL:
-        TI16Value i16Val = (TI16Value)value_;
-        i16Val.write(oprot);
-        return;
-      case I32_VAL:
-        TI32Value i32Val = (TI32Value)value_;
-        i32Val.write(oprot);
-        return;
-      case I64_VAL:
-        TI64Value i64Val = (TI64Value)value_;
-        i64Val.write(oprot);
-        return;
-      case DOUBLE_VAL:
-        TDoubleValue doubleVal = (TDoubleValue)value_;
-        doubleVal.write(oprot);
-        return;
-      case STRING_VAL:
-        TStringValue stringVal = (TStringValue)value_;
-        stringVal.write(oprot);
-        return;
-      default:
-        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
-    }
-  }
-
-  @Override
-  protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, short fieldID) throws org.apache.thrift.TException {
-    _Fields setField = _Fields.findByThriftId(fieldID);
-    if (setField != null) {
-      switch (setField) {
-        case BOOL_VAL:
-          TBoolValue boolVal;
-          boolVal = new TBoolValue();
-          boolVal.read(iprot);
-          return boolVal;
-        case BYTE_VAL:
-          TByteValue byteVal;
-          byteVal = new TByteValue();
-          byteVal.read(iprot);
-          return byteVal;
-        case I16_VAL:
-          TI16Value i16Val;
-          i16Val = new TI16Value();
-          i16Val.read(iprot);
-          return i16Val;
-        case I32_VAL:
-          TI32Value i32Val;
-          i32Val = new TI32Value();
-          i32Val.read(iprot);
-          return i32Val;
-        case I64_VAL:
-          TI64Value i64Val;
-          i64Val = new TI64Value();
-          i64Val.read(iprot);
-          return i64Val;
-        case DOUBLE_VAL:
-          TDoubleValue doubleVal;
-          doubleVal = new TDoubleValue();
-          doubleVal.read(iprot);
-          return doubleVal;
-        case STRING_VAL:
-          TStringValue stringVal;
-          stringVal = new TStringValue();
-          stringVal.read(iprot);
-          return stringVal;
-        default:
-          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
-      }
-    } else {
-      throw new TProtocolException("Couldn't find a field with field id " + fieldID);
-    }
-  }
-
-  @Override
-  protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    switch (setField_) {
-      case BOOL_VAL:
-        TBoolValue boolVal = (TBoolValue)value_;
-        boolVal.write(oprot);
-        return;
-      case BYTE_VAL:
-        TByteValue byteVal = (TByteValue)value_;
-        byteVal.write(oprot);
-        return;
-      case I16_VAL:
-        TI16Value i16Val = (TI16Value)value_;
-        i16Val.write(oprot);
-        return;
-      case I32_VAL:
-        TI32Value i32Val = (TI32Value)value_;
-        i32Val.write(oprot);
-        return;
-      case I64_VAL:
-        TI64Value i64Val = (TI64Value)value_;
-        i64Val.write(oprot);
-        return;
-      case DOUBLE_VAL:
-        TDoubleValue doubleVal = (TDoubleValue)value_;
-        doubleVal.write(oprot);
-        return;
-      case STRING_VAL:
-        TStringValue stringVal = (TStringValue)value_;
-        stringVal.write(oprot);
-        return;
-      default:
-        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
-    }
-  }
-
-  @Override
-  protected org.apache.thrift.protocol.TField getFieldDesc(_Fields setField) {
-    switch (setField) {
-      case BOOL_VAL:
-        return BOOL_VAL_FIELD_DESC;
-      case BYTE_VAL:
-        return BYTE_VAL_FIELD_DESC;
-      case I16_VAL:
-        return I16_VAL_FIELD_DESC;
-      case I32_VAL:
-        return I32_VAL_FIELD_DESC;
-      case I64_VAL:
-        return I64_VAL_FIELD_DESC;
-      case DOUBLE_VAL:
-        return DOUBLE_VAL_FIELD_DESC;
-      case STRING_VAL:
-        return STRING_VAL_FIELD_DESC;
-      default:
-        throw new IllegalArgumentException("Unknown field id " + setField);
-    }
-  }
-
-  @Override
-  protected org.apache.thrift.protocol.TStruct getStructDesc() {
-    return STRUCT_DESC;
-  }
-
-  @Override
-  protected _Fields enumForId(short id) {
-    return _Fields.findByThriftIdOrThrow(id);
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-
-  public TBoolValue getBoolVal() {
-    if (getSetField() == _Fields.BOOL_VAL) {
-      return (TBoolValue)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'boolVal' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setBoolVal(TBoolValue value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.BOOL_VAL;
-    value_ = value;
-  }
-
-  public TByteValue getByteVal() {
-    if (getSetField() == _Fields.BYTE_VAL) {
-      return (TByteValue)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'byteVal' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setByteVal(TByteValue value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.BYTE_VAL;
-    value_ = value;
-  }
-
-  public TI16Value getI16Val() {
-    if (getSetField() == _Fields.I16_VAL) {
-      return (TI16Value)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'i16Val' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setI16Val(TI16Value value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.I16_VAL;
-    value_ = value;
-  }
-
-  public TI32Value getI32Val() {
-    if (getSetField() == _Fields.I32_VAL) {
-      return (TI32Value)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'i32Val' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setI32Val(TI32Value value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.I32_VAL;
-    value_ = value;
-  }
-
-  public TI64Value getI64Val() {
-    if (getSetField() == _Fields.I64_VAL) {
-      return (TI64Value)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'i64Val' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setI64Val(TI64Value value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.I64_VAL;
-    value_ = value;
-  }
-
-  public TDoubleValue getDoubleVal() {
-    if (getSetField() == _Fields.DOUBLE_VAL) {
-      return (TDoubleValue)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'doubleVal' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setDoubleVal(TDoubleValue value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.DOUBLE_VAL;
-    value_ = value;
-  }
-
-  public TStringValue getStringVal() {
-    if (getSetField() == _Fields.STRING_VAL) {
-      return (TStringValue)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'stringVal' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setStringVal(TStringValue value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.STRING_VAL;
-    value_ = value;
-  }
-
-  public boolean isSetBoolVal() {
-    return setField_ == _Fields.BOOL_VAL;
-  }
-
-
-  public boolean isSetByteVal() {
-    return setField_ == _Fields.BYTE_VAL;
-  }
-
-
-  public boolean isSetI16Val() {
-    return setField_ == _Fields.I16_VAL;
-  }
-
-
-  public boolean isSetI32Val() {
-    return setField_ == _Fields.I32_VAL;
-  }
-
-
-  public boolean isSetI64Val() {
-    return setField_ == _Fields.I64_VAL;
-  }
-
-
-  public boolean isSetDoubleVal() {
-    return setField_ == _Fields.DOUBLE_VAL;
-  }
-
-
-  public boolean isSetStringVal() {
-    return setField_ == _Fields.STRING_VAL;
-  }
-
-
-  public boolean equals(Object other) {
-    if (other instanceof TColumnValue) {
-      return equals((TColumnValue)other);
-    } else {
-      return false;
-    }
-  }
-
-  public boolean equals(TColumnValue other) {
-    return other != null && getSetField() == other.getSetField() && getFieldValue().equals(other.getFieldValue());
-  }
-
-  @Override
-  public int compareTo(TColumnValue other) {
-    int lastComparison = org.apache.thrift.TBaseHelper.compareTo(getSetField(), other.getSetField());
-    if (lastComparison == 0) {
-      return org.apache.thrift.TBaseHelper.compareTo(getFieldValue(), other.getFieldValue());
-    }
-    return lastComparison;
-  }
-
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder hcb = new HashCodeBuilder();
-    hcb.append(this.getClass().getName());
-    org.apache.thrift.TFieldIdEnum setField = getSetField();
-    if (setField != null) {
-      hcb.append(setField.getThriftFieldId());
-      Object value = getFieldValue();
-      if (value instanceof org.apache.thrift.TEnum) {
-        hcb.append(((org.apache.thrift.TEnum)getFieldValue()).getValue());
-      } else {
-        hcb.append(value);
-      }
-    }
-    return hcb.toHashCode();
-  }
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-
-}
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleColumn.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleColumn.java
deleted file mode 100644
index 4fc54544c1bea..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleColumn.java
+++ /dev/null
@@ -1,548 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TDoubleColumn implements org.apache.thrift.TBase<TDoubleColumn, TDoubleColumn._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TDoubleColumn");
-
-  private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1);
-  private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TDoubleColumnStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TDoubleColumnTupleSchemeFactory());
-  }
-
-  private List<Double> values; // required
-  private ByteBuffer nulls; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUES((short)1, "values"),
-    NULLS((short)2, "nulls");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUES
-          return VALUES;
-        case 2: // NULLS
-          return NULLS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.DOUBLE))));
-    tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TDoubleColumn.class, metaDataMap);
-  }
-
-  public TDoubleColumn() {
-  }
-
-  public TDoubleColumn(
-    List<Double> values,
-    ByteBuffer nulls)
-  {
-    this();
-    this.values = values;
-    this.nulls = nulls;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TDoubleColumn(TDoubleColumn other) {
-    if (other.isSetValues()) {
-      List<Double> __this__values = new ArrayList<Double>();
-      for (Double other_element : other.values) {
-        __this__values.add(other_element);
-      }
-      this.values = __this__values;
-    }
-    if (other.isSetNulls()) {
-      this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls);
-;
-    }
-  }
-
-  public TDoubleColumn deepCopy() {
-    return new TDoubleColumn(this);
-  }
-
-  @Override
-  public void clear() {
-    this.values = null;
-    this.nulls = null;
-  }
-
-  public int getValuesSize() {
-    return (this.values == null) ? 0 : this.values.size();
-  }
-
-  public java.util.Iterator<Double> getValuesIterator() {
-    return (this.values == null) ? null : this.values.iterator();
-  }
-
-  public void addToValues(double elem) {
-    if (this.values == null) {
-      this.values = new ArrayList<Double>();
-    }
-    this.values.add(elem);
-  }
-
-  public List<Double> getValues() {
-    return this.values;
-  }
-
-  public void setValues(List<Double> values) {
-    this.values = values;
-  }
-
-  public void unsetValues() {
-    this.values = null;
-  }
-
-  /** Returns true if field values is set (has been assigned a value) and false otherwise */
-  public boolean isSetValues() {
-    return this.values != null;
-  }
-
-  public void setValuesIsSet(boolean value) {
-    if (!value) {
-      this.values = null;
-    }
-  }
-
-  public byte[] getNulls() {
-    setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls));
-    return nulls == null ? null : nulls.array();
-  }
-
-  public ByteBuffer bufferForNulls() {
-    return nulls;
-  }
-
-  public void setNulls(byte[] nulls) {
-    setNulls(nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(nulls));
-  }
-
-  public void setNulls(ByteBuffer nulls) {
-    this.nulls = nulls;
-  }
-
-  public void unsetNulls() {
-    this.nulls = null;
-  }
-
-  /** Returns true if field nulls is set (has been assigned a value) and false otherwise */
-  public boolean isSetNulls() {
-    return this.nulls != null;
-  }
-
-  public void setNullsIsSet(boolean value) {
-    if (!value) {
-      this.nulls = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUES:
-      if (value == null) {
-        unsetValues();
-      } else {
-        setValues((List<Double>)value);
-      }
-      break;
-
-    case NULLS:
-      if (value == null) {
-        unsetNulls();
-      } else {
-        setNulls((ByteBuffer)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUES:
-      return getValues();
-
-    case NULLS:
-      return getNulls();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUES:
-      return isSetValues();
-    case NULLS:
-      return isSetNulls();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TDoubleColumn)
-      return this.equals((TDoubleColumn)that);
-    return false;
-  }
-
-  public boolean equals(TDoubleColumn that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_values = true && this.isSetValues();
-    boolean that_present_values = true && that.isSetValues();
-    if (this_present_values || that_present_values) {
-      if (!(this_present_values && that_present_values))
-        return false;
-      if (!this.values.equals(that.values))
-        return false;
-    }
-
-    boolean this_present_nulls = true && this.isSetNulls();
-    boolean that_present_nulls = true && that.isSetNulls();
-    if (this_present_nulls || that_present_nulls) {
-      if (!(this_present_nulls && that_present_nulls))
-        return false;
-      if (!this.nulls.equals(that.nulls))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_values = true && (isSetValues());
-    builder.append(present_values);
-    if (present_values)
-      builder.append(values);
-
-    boolean present_nulls = true && (isSetNulls());
-    builder.append(present_nulls);
-    if (present_nulls)
-      builder.append(nulls);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TDoubleColumn other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TDoubleColumn typedOther = (TDoubleColumn)other;
-
-    lastComparison = Boolean.valueOf(isSetValues()).compareTo(typedOther.isSetValues());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValues()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, typedOther.values);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetNulls()).compareTo(typedOther.isSetNulls());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetNulls()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, typedOther.nulls);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TDoubleColumn(");
-    boolean first = true;
-
-    sb.append("values:");
-    if (this.values == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.values);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("nulls:");
-    if (this.nulls == null) {
-      sb.append("null");
-    } else {
-      org.apache.thrift.TBaseHelper.toString(this.nulls, sb);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetValues()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString());
-    }
-
-    if (!isSetNulls()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TDoubleColumnStandardSchemeFactory implements SchemeFactory {
-    public TDoubleColumnStandardScheme getScheme() {
-      return new TDoubleColumnStandardScheme();
-    }
-  }
-
-  private static class TDoubleColumnStandardScheme extends StandardScheme<TDoubleColumn> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TDoubleColumn struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUES
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list94 = iprot.readListBegin();
-                struct.values = new ArrayList<Double>(_list94.size);
-                for (int _i95 = 0; _i95 < _list94.size; ++_i95)
-                {
-                  double _elem96; // optional
-                  _elem96 = iprot.readDouble();
-                  struct.values.add(_elem96);
-                }
-                iprot.readListEnd();
-              }
-              struct.setValuesIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // NULLS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.nulls = iprot.readBinary();
-              struct.setNullsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TDoubleColumn struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.values != null) {
-        oprot.writeFieldBegin(VALUES_FIELD_DESC);
-        {
-          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.DOUBLE, struct.values.size()));
-          for (double _iter97 : struct.values)
-          {
-            oprot.writeDouble(_iter97);
-          }
-          oprot.writeListEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      if (struct.nulls != null) {
-        oprot.writeFieldBegin(NULLS_FIELD_DESC);
-        oprot.writeBinary(struct.nulls);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TDoubleColumnTupleSchemeFactory implements SchemeFactory {
-    public TDoubleColumnTupleScheme getScheme() {
-      return new TDoubleColumnTupleScheme();
-    }
-  }
-
-  private static class TDoubleColumnTupleScheme extends TupleScheme<TDoubleColumn> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TDoubleColumn struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.values.size());
-        for (double _iter98 : struct.values)
-        {
-          oprot.writeDouble(_iter98);
-        }
-      }
-      oprot.writeBinary(struct.nulls);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TDoubleColumn struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TList _list99 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.DOUBLE, iprot.readI32());
-        struct.values = new ArrayList<Double>(_list99.size);
-        for (int _i100 = 0; _i100 < _list99.size; ++_i100)
-        {
-          double _elem101; // optional
-          _elem101 = iprot.readDouble();
-          struct.values.add(_elem101);
-        }
-      }
-      struct.setValuesIsSet(true);
-      struct.nulls = iprot.readBinary();
-      struct.setNullsIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleValue.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleValue.java
deleted file mode 100644
index d21573633ef51..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TDoubleValue.java
+++ /dev/null
@@ -1,386 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TDoubleValue implements org.apache.thrift.TBase<TDoubleValue, TDoubleValue._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TDoubleValue");
-
-  private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.DOUBLE, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TDoubleValueStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TDoubleValueTupleSchemeFactory());
-  }
-
-  private double value; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUE((short)1, "value");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUE
-          return VALUE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __VALUE_ISSET_ID = 0;
-  private byte __isset_bitfield = 0;
-  private _Fields optionals[] = {_Fields.VALUE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.DOUBLE)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TDoubleValue.class, metaDataMap);
-  }
-
-  public TDoubleValue() {
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TDoubleValue(TDoubleValue other) {
-    __isset_bitfield = other.__isset_bitfield;
-    this.value = other.value;
-  }
-
-  public TDoubleValue deepCopy() {
-    return new TDoubleValue(this);
-  }
-
-  @Override
-  public void clear() {
-    setValueIsSet(false);
-    this.value = 0.0;
-  }
-
-  public double getValue() {
-    return this.value;
-  }
-
-  public void setValue(double value) {
-    this.value = value;
-    setValueIsSet(true);
-  }
-
-  public void unsetValue() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID);
-  }
-
-  /** Returns true if field value is set (has been assigned a value) and false otherwise */
-  public boolean isSetValue() {
-    return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID);
-  }
-
-  public void setValueIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value);
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUE:
-      if (value == null) {
-        unsetValue();
-      } else {
-        setValue((Double)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUE:
-      return Double.valueOf(getValue());
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUE:
-      return isSetValue();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TDoubleValue)
-      return this.equals((TDoubleValue)that);
-    return false;
-  }
-
-  public boolean equals(TDoubleValue that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_value = true && this.isSetValue();
-    boolean that_present_value = true && that.isSetValue();
-    if (this_present_value || that_present_value) {
-      if (!(this_present_value && that_present_value))
-        return false;
-      if (this.value != that.value)
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_value = true && (isSetValue());
-    builder.append(present_value);
-    if (present_value)
-      builder.append(value);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TDoubleValue other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TDoubleValue typedOther = (TDoubleValue)other;
-
-    lastComparison = Boolean.valueOf(isSetValue()).compareTo(typedOther.isSetValue());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValue()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, typedOther.value);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TDoubleValue(");
-    boolean first = true;
-
-    if (isSetValue()) {
-      sb.append("value:");
-      sb.append(this.value);
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TDoubleValueStandardSchemeFactory implements SchemeFactory {
-    public TDoubleValueStandardScheme getScheme() {
-      return new TDoubleValueStandardScheme();
-    }
-  }
-
-  private static class TDoubleValueStandardScheme extends StandardScheme<TDoubleValue> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TDoubleValue struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUE
-            if (schemeField.type == org.apache.thrift.protocol.TType.DOUBLE) {
-              struct.value = iprot.readDouble();
-              struct.setValueIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TDoubleValue struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.isSetValue()) {
-        oprot.writeFieldBegin(VALUE_FIELD_DESC);
-        oprot.writeDouble(struct.value);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TDoubleValueTupleSchemeFactory implements SchemeFactory {
-    public TDoubleValueTupleScheme getScheme() {
-      return new TDoubleValueTupleScheme();
-    }
-  }
-
-  private static class TDoubleValueTupleScheme extends TupleScheme<TDoubleValue> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TDoubleValue struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      BitSet optionals = new BitSet();
-      if (struct.isSetValue()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetValue()) {
-        oprot.writeDouble(struct.value);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TDoubleValue struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.value = iprot.readDouble();
-        struct.setValueIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementReq.java
deleted file mode 100644
index 4f157ad5a6450..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementReq.java
+++ /dev/null
@@ -1,769 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TExecuteStatementReq implements org.apache.thrift.TBase<TExecuteStatementReq, TExecuteStatementReq._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TExecuteStatementReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField STATEMENT_FIELD_DESC = new org.apache.thrift.protocol.TField("statement", org.apache.thrift.protocol.TType.STRING, (short)2);
-  private static final org.apache.thrift.protocol.TField CONF_OVERLAY_FIELD_DESC = new org.apache.thrift.protocol.TField("confOverlay", org.apache.thrift.protocol.TType.MAP, (short)3);
-  private static final org.apache.thrift.protocol.TField RUN_ASYNC_FIELD_DESC = new org.apache.thrift.protocol.TField("runAsync", org.apache.thrift.protocol.TType.BOOL, (short)4);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TExecuteStatementReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TExecuteStatementReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-  private String statement; // required
-  private Map<String,String> confOverlay; // optional
-  private boolean runAsync; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle"),
-    STATEMENT((short)2, "statement"),
-    CONF_OVERLAY((short)3, "confOverlay"),
-    RUN_ASYNC((short)4, "runAsync");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        case 2: // STATEMENT
-          return STATEMENT;
-        case 3: // CONF_OVERLAY
-          return CONF_OVERLAY;
-        case 4: // RUN_ASYNC
-          return RUN_ASYNC;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __RUNASYNC_ISSET_ID = 0;
-  private byte __isset_bitfield = 0;
-  private _Fields optionals[] = {_Fields.CONF_OVERLAY,_Fields.RUN_ASYNC};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    tmpMap.put(_Fields.STATEMENT, new org.apache.thrift.meta_data.FieldMetaData("statement", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    tmpMap.put(_Fields.CONF_OVERLAY, new org.apache.thrift.meta_data.FieldMetaData("confOverlay", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))));
-    tmpMap.put(_Fields.RUN_ASYNC, new org.apache.thrift.meta_data.FieldMetaData("runAsync", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TExecuteStatementReq.class, metaDataMap);
-  }
-
-  public TExecuteStatementReq() {
-    this.runAsync = false;
-
-  }
-
-  public TExecuteStatementReq(
-    TSessionHandle sessionHandle,
-    String statement)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-    this.statement = statement;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TExecuteStatementReq(TExecuteStatementReq other) {
-    __isset_bitfield = other.__isset_bitfield;
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-    if (other.isSetStatement()) {
-      this.statement = other.statement;
-    }
-    if (other.isSetConfOverlay()) {
-      Map<String,String> __this__confOverlay = new HashMap<String,String>();
-      for (Map.Entry<String, String> other_element : other.confOverlay.entrySet()) {
-
-        String other_element_key = other_element.getKey();
-        String other_element_value = other_element.getValue();
-
-        String __this__confOverlay_copy_key = other_element_key;
-
-        String __this__confOverlay_copy_value = other_element_value;
-
-        __this__confOverlay.put(__this__confOverlay_copy_key, __this__confOverlay_copy_value);
-      }
-      this.confOverlay = __this__confOverlay;
-    }
-    this.runAsync = other.runAsync;
-  }
-
-  public TExecuteStatementReq deepCopy() {
-    return new TExecuteStatementReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-    this.statement = null;
-    this.confOverlay = null;
-    this.runAsync = false;
-
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public String getStatement() {
-    return this.statement;
-  }
-
-  public void setStatement(String statement) {
-    this.statement = statement;
-  }
-
-  public void unsetStatement() {
-    this.statement = null;
-  }
-
-  /** Returns true if field statement is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatement() {
-    return this.statement != null;
-  }
-
-  public void setStatementIsSet(boolean value) {
-    if (!value) {
-      this.statement = null;
-    }
-  }
-
-  public int getConfOverlaySize() {
-    return (this.confOverlay == null) ? 0 : this.confOverlay.size();
-  }
-
-  public void putToConfOverlay(String key, String val) {
-    if (this.confOverlay == null) {
-      this.confOverlay = new HashMap<String,String>();
-    }
-    this.confOverlay.put(key, val);
-  }
-
-  public Map<String,String> getConfOverlay() {
-    return this.confOverlay;
-  }
-
-  public void setConfOverlay(Map<String,String> confOverlay) {
-    this.confOverlay = confOverlay;
-  }
-
-  public void unsetConfOverlay() {
-    this.confOverlay = null;
-  }
-
-  /** Returns true if field confOverlay is set (has been assigned a value) and false otherwise */
-  public boolean isSetConfOverlay() {
-    return this.confOverlay != null;
-  }
-
-  public void setConfOverlayIsSet(boolean value) {
-    if (!value) {
-      this.confOverlay = null;
-    }
-  }
-
-  public boolean isRunAsync() {
-    return this.runAsync;
-  }
-
-  public void setRunAsync(boolean runAsync) {
-    this.runAsync = runAsync;
-    setRunAsyncIsSet(true);
-  }
-
-  public void unsetRunAsync() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __RUNASYNC_ISSET_ID);
-  }
-
-  /** Returns true if field runAsync is set (has been assigned a value) and false otherwise */
-  public boolean isSetRunAsync() {
-    return EncodingUtils.testBit(__isset_bitfield, __RUNASYNC_ISSET_ID);
-  }
-
-  public void setRunAsyncIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __RUNASYNC_ISSET_ID, value);
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    case STATEMENT:
-      if (value == null) {
-        unsetStatement();
-      } else {
-        setStatement((String)value);
-      }
-      break;
-
-    case CONF_OVERLAY:
-      if (value == null) {
-        unsetConfOverlay();
-      } else {
-        setConfOverlay((Map<String,String>)value);
-      }
-      break;
-
-    case RUN_ASYNC:
-      if (value == null) {
-        unsetRunAsync();
-      } else {
-        setRunAsync((Boolean)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    case STATEMENT:
-      return getStatement();
-
-    case CONF_OVERLAY:
-      return getConfOverlay();
-
-    case RUN_ASYNC:
-      return Boolean.valueOf(isRunAsync());
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    case STATEMENT:
-      return isSetStatement();
-    case CONF_OVERLAY:
-      return isSetConfOverlay();
-    case RUN_ASYNC:
-      return isSetRunAsync();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TExecuteStatementReq)
-      return this.equals((TExecuteStatementReq)that);
-    return false;
-  }
-
-  public boolean equals(TExecuteStatementReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    boolean this_present_statement = true && this.isSetStatement();
-    boolean that_present_statement = true && that.isSetStatement();
-    if (this_present_statement || that_present_statement) {
-      if (!(this_present_statement && that_present_statement))
-        return false;
-      if (!this.statement.equals(that.statement))
-        return false;
-    }
-
-    boolean this_present_confOverlay = true && this.isSetConfOverlay();
-    boolean that_present_confOverlay = true && that.isSetConfOverlay();
-    if (this_present_confOverlay || that_present_confOverlay) {
-      if (!(this_present_confOverlay && that_present_confOverlay))
-        return false;
-      if (!this.confOverlay.equals(that.confOverlay))
-        return false;
-    }
-
-    boolean this_present_runAsync = true && this.isSetRunAsync();
-    boolean that_present_runAsync = true && that.isSetRunAsync();
-    if (this_present_runAsync || that_present_runAsync) {
-      if (!(this_present_runAsync && that_present_runAsync))
-        return false;
-      if (this.runAsync != that.runAsync)
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    builder.append(present_sessionHandle);
-    if (present_sessionHandle)
-      builder.append(sessionHandle);
-
-    boolean present_statement = true && (isSetStatement());
-    builder.append(present_statement);
-    if (present_statement)
-      builder.append(statement);
-
-    boolean present_confOverlay = true && (isSetConfOverlay());
-    builder.append(present_confOverlay);
-    if (present_confOverlay)
-      builder.append(confOverlay);
-
-    boolean present_runAsync = true && (isSetRunAsync());
-    builder.append(present_runAsync);
-    if (present_runAsync)
-      builder.append(runAsync);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TExecuteStatementReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TExecuteStatementReq typedOther = (TExecuteStatementReq)other;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetStatement()).compareTo(typedOther.isSetStatement());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatement()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.statement, typedOther.statement);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetConfOverlay()).compareTo(typedOther.isSetConfOverlay());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetConfOverlay()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.confOverlay, typedOther.confOverlay);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetRunAsync()).compareTo(typedOther.isSetRunAsync());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetRunAsync()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.runAsync, typedOther.runAsync);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TExecuteStatementReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("statement:");
-    if (this.statement == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.statement);
-    }
-    first = false;
-    if (isSetConfOverlay()) {
-      if (!first) sb.append(", ");
-      sb.append("confOverlay:");
-      if (this.confOverlay == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.confOverlay);
-      }
-      first = false;
-    }
-    if (isSetRunAsync()) {
-      if (!first) sb.append(", ");
-      sb.append("runAsync:");
-      sb.append(this.runAsync);
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    if (!isSetStatement()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'statement' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TExecuteStatementReqStandardSchemeFactory implements SchemeFactory {
-    public TExecuteStatementReqStandardScheme getScheme() {
-      return new TExecuteStatementReqStandardScheme();
-    }
-  }
-
-  private static class TExecuteStatementReqStandardScheme extends StandardScheme<TExecuteStatementReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TExecuteStatementReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // STATEMENT
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.statement = iprot.readString();
-              struct.setStatementIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // CONF_OVERLAY
-            if (schemeField.type == org.apache.thrift.protocol.TType.MAP) {
-              {
-                org.apache.thrift.protocol.TMap _map162 = iprot.readMapBegin();
-                struct.confOverlay = new HashMap<String,String>(2*_map162.size);
-                for (int _i163 = 0; _i163 < _map162.size; ++_i163)
-                {
-                  String _key164; // required
-                  String _val165; // required
-                  _key164 = iprot.readString();
-                  _val165 = iprot.readString();
-                  struct.confOverlay.put(_key164, _val165);
-                }
-                iprot.readMapEnd();
-              }
-              struct.setConfOverlayIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 4: // RUN_ASYNC
-            if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) {
-              struct.runAsync = iprot.readBool();
-              struct.setRunAsyncIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TExecuteStatementReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.statement != null) {
-        oprot.writeFieldBegin(STATEMENT_FIELD_DESC);
-        oprot.writeString(struct.statement);
-        oprot.writeFieldEnd();
-      }
-      if (struct.confOverlay != null) {
-        if (struct.isSetConfOverlay()) {
-          oprot.writeFieldBegin(CONF_OVERLAY_FIELD_DESC);
-          {
-            oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, struct.confOverlay.size()));
-            for (Map.Entry<String, String> _iter166 : struct.confOverlay.entrySet())
-            {
-              oprot.writeString(_iter166.getKey());
-              oprot.writeString(_iter166.getValue());
-            }
-            oprot.writeMapEnd();
-          }
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.isSetRunAsync()) {
-        oprot.writeFieldBegin(RUN_ASYNC_FIELD_DESC);
-        oprot.writeBool(struct.runAsync);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TExecuteStatementReqTupleSchemeFactory implements SchemeFactory {
-    public TExecuteStatementReqTupleScheme getScheme() {
-      return new TExecuteStatementReqTupleScheme();
-    }
-  }
-
-  private static class TExecuteStatementReqTupleScheme extends TupleScheme<TExecuteStatementReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TExecuteStatementReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-      oprot.writeString(struct.statement);
-      BitSet optionals = new BitSet();
-      if (struct.isSetConfOverlay()) {
-        optionals.set(0);
-      }
-      if (struct.isSetRunAsync()) {
-        optionals.set(1);
-      }
-      oprot.writeBitSet(optionals, 2);
-      if (struct.isSetConfOverlay()) {
-        {
-          oprot.writeI32(struct.confOverlay.size());
-          for (Map.Entry<String, String> _iter167 : struct.confOverlay.entrySet())
-          {
-            oprot.writeString(_iter167.getKey());
-            oprot.writeString(_iter167.getValue());
-          }
-        }
-      }
-      if (struct.isSetRunAsync()) {
-        oprot.writeBool(struct.runAsync);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TExecuteStatementReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-      struct.statement = iprot.readString();
-      struct.setStatementIsSet(true);
-      BitSet incoming = iprot.readBitSet(2);
-      if (incoming.get(0)) {
-        {
-          org.apache.thrift.protocol.TMap _map168 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, iprot.readI32());
-          struct.confOverlay = new HashMap<String,String>(2*_map168.size);
-          for (int _i169 = 0; _i169 < _map168.size; ++_i169)
-          {
-            String _key170; // required
-            String _val171; // required
-            _key170 = iprot.readString();
-            _val171 = iprot.readString();
-            struct.confOverlay.put(_key170, _val171);
-          }
-        }
-        struct.setConfOverlayIsSet(true);
-      }
-      if (incoming.get(1)) {
-        struct.runAsync = iprot.readBool();
-        struct.setRunAsyncIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementResp.java
deleted file mode 100644
index fdde51e70f783..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TExecuteStatementResp.java
+++ /dev/null
@@ -1,505 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TExecuteStatementResp implements org.apache.thrift.TBase<TExecuteStatementResp, TExecuteStatementResp._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TExecuteStatementResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TExecuteStatementRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TExecuteStatementRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private TOperationHandle operationHandle; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    OPERATION_HANDLE((short)2, "operationHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private _Fields optionals[] = {_Fields.OPERATION_HANDLE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TExecuteStatementResp.class, metaDataMap);
-  }
-
-  public TExecuteStatementResp() {
-  }
-
-  public TExecuteStatementResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TExecuteStatementResp(TExecuteStatementResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-  }
-
-  public TExecuteStatementResp deepCopy() {
-    return new TExecuteStatementResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.operationHandle = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TExecuteStatementResp)
-      return this.equals((TExecuteStatementResp)that);
-    return false;
-  }
-
-  public boolean equals(TExecuteStatementResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_status = true && (isSetStatus());
-    builder.append(present_status);
-    if (present_status)
-      builder.append(status);
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    builder.append(present_operationHandle);
-    if (present_operationHandle)
-      builder.append(operationHandle);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TExecuteStatementResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TExecuteStatementResp typedOther = (TExecuteStatementResp)other;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TExecuteStatementResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (isSetOperationHandle()) {
-      if (!first) sb.append(", ");
-      sb.append("operationHandle:");
-      if (this.operationHandle == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.operationHandle);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TExecuteStatementRespStandardSchemeFactory implements SchemeFactory {
-    public TExecuteStatementRespStandardScheme getScheme() {
-      return new TExecuteStatementRespStandardScheme();
-    }
-  }
-
-  private static class TExecuteStatementRespStandardScheme extends StandardScheme<TExecuteStatementResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TExecuteStatementResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TExecuteStatementResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.operationHandle != null) {
-        if (struct.isSetOperationHandle()) {
-          oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-          struct.operationHandle.write(oprot);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TExecuteStatementRespTupleSchemeFactory implements SchemeFactory {
-    public TExecuteStatementRespTupleScheme getScheme() {
-      return new TExecuteStatementRespTupleScheme();
-    }
-  }
-
-  private static class TExecuteStatementRespTupleScheme extends TupleScheme<TExecuteStatementResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TExecuteStatementResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetOperationHandle()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetOperationHandle()) {
-        struct.operationHandle.write(oprot);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TExecuteStatementResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.operationHandle = new TOperationHandle();
-        struct.operationHandle.read(iprot);
-        struct.setOperationHandleIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchOrientation.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchOrientation.java
deleted file mode 100644
index b2a22effd91af..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchOrientation.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-
-import java.util.Map;
-import java.util.HashMap;
-import org.apache.thrift.TEnum;
-
-public enum TFetchOrientation implements org.apache.thrift.TEnum {
-  FETCH_NEXT(0),
-  FETCH_PRIOR(1),
-  FETCH_RELATIVE(2),
-  FETCH_ABSOLUTE(3),
-  FETCH_FIRST(4),
-  FETCH_LAST(5);
-
-  private final int value;
-
-  private TFetchOrientation(int value) {
-    this.value = value;
-  }
-
-  /**
-   * Get the integer value of this enum value, as defined in the Thrift IDL.
-   */
-  public int getValue() {
-    return value;
-  }
-
-  /**
-   * Find a the enum type by its integer value, as defined in the Thrift IDL.
-   * @return null if the value is not found.
-   */
-  public static TFetchOrientation findByValue(int value) { 
-    switch (value) {
-      case 0:
-        return FETCH_NEXT;
-      case 1:
-        return FETCH_PRIOR;
-      case 2:
-        return FETCH_RELATIVE;
-      case 3:
-        return FETCH_ABSOLUTE;
-      case 4:
-        return FETCH_FIRST;
-      case 5:
-        return FETCH_LAST;
-      default:
-        return null;
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsReq.java
deleted file mode 100644
index 068711fc44440..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsReq.java
+++ /dev/null
@@ -1,710 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TFetchResultsReq implements org.apache.thrift.TBase<TFetchResultsReq, TFetchResultsReq._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TFetchResultsReq");
-
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField ORIENTATION_FIELD_DESC = new org.apache.thrift.protocol.TField("orientation", org.apache.thrift.protocol.TType.I32, (short)2);
-  private static final org.apache.thrift.protocol.TField MAX_ROWS_FIELD_DESC = new org.apache.thrift.protocol.TField("maxRows", org.apache.thrift.protocol.TType.I64, (short)3);
-  private static final org.apache.thrift.protocol.TField FETCH_TYPE_FIELD_DESC = new org.apache.thrift.protocol.TField("fetchType", org.apache.thrift.protocol.TType.I16, (short)4);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TFetchResultsReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TFetchResultsReqTupleSchemeFactory());
-  }
-
-  private TOperationHandle operationHandle; // required
-  private TFetchOrientation orientation; // required
-  private long maxRows; // required
-  private short fetchType; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    OPERATION_HANDLE((short)1, "operationHandle"),
-    /**
-     * 
-     * @see TFetchOrientation
-     */
-    ORIENTATION((short)2, "orientation"),
-    MAX_ROWS((short)3, "maxRows"),
-    FETCH_TYPE((short)4, "fetchType");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        case 2: // ORIENTATION
-          return ORIENTATION;
-        case 3: // MAX_ROWS
-          return MAX_ROWS;
-        case 4: // FETCH_TYPE
-          return FETCH_TYPE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __MAXROWS_ISSET_ID = 0;
-  private static final int __FETCHTYPE_ISSET_ID = 1;
-  private byte __isset_bitfield = 0;
-  private _Fields optionals[] = {_Fields.FETCH_TYPE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    tmpMap.put(_Fields.ORIENTATION, new org.apache.thrift.meta_data.FieldMetaData("orientation", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TFetchOrientation.class)));
-    tmpMap.put(_Fields.MAX_ROWS, new org.apache.thrift.meta_data.FieldMetaData("maxRows", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
-    tmpMap.put(_Fields.FETCH_TYPE, new org.apache.thrift.meta_data.FieldMetaData("fetchType", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I16)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TFetchResultsReq.class, metaDataMap);
-  }
-
-  public TFetchResultsReq() {
-    this.orientation = org.apache.hive.service.cli.thrift.TFetchOrientation.FETCH_NEXT;
-
-    this.fetchType = (short)0;
-
-  }
-
-  public TFetchResultsReq(
-    TOperationHandle operationHandle,
-    TFetchOrientation orientation,
-    long maxRows)
-  {
-    this();
-    this.operationHandle = operationHandle;
-    this.orientation = orientation;
-    this.maxRows = maxRows;
-    setMaxRowsIsSet(true);
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TFetchResultsReq(TFetchResultsReq other) {
-    __isset_bitfield = other.__isset_bitfield;
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-    if (other.isSetOrientation()) {
-      this.orientation = other.orientation;
-    }
-    this.maxRows = other.maxRows;
-    this.fetchType = other.fetchType;
-  }
-
-  public TFetchResultsReq deepCopy() {
-    return new TFetchResultsReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.operationHandle = null;
-    this.orientation = org.apache.hive.service.cli.thrift.TFetchOrientation.FETCH_NEXT;
-
-    setMaxRowsIsSet(false);
-    this.maxRows = 0;
-    this.fetchType = (short)0;
-
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  /**
-   * 
-   * @see TFetchOrientation
-   */
-  public TFetchOrientation getOrientation() {
-    return this.orientation;
-  }
-
-  /**
-   * 
-   * @see TFetchOrientation
-   */
-  public void setOrientation(TFetchOrientation orientation) {
-    this.orientation = orientation;
-  }
-
-  public void unsetOrientation() {
-    this.orientation = null;
-  }
-
-  /** Returns true if field orientation is set (has been assigned a value) and false otherwise */
-  public boolean isSetOrientation() {
-    return this.orientation != null;
-  }
-
-  public void setOrientationIsSet(boolean value) {
-    if (!value) {
-      this.orientation = null;
-    }
-  }
-
-  public long getMaxRows() {
-    return this.maxRows;
-  }
-
-  public void setMaxRows(long maxRows) {
-    this.maxRows = maxRows;
-    setMaxRowsIsSet(true);
-  }
-
-  public void unsetMaxRows() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __MAXROWS_ISSET_ID);
-  }
-
-  /** Returns true if field maxRows is set (has been assigned a value) and false otherwise */
-  public boolean isSetMaxRows() {
-    return EncodingUtils.testBit(__isset_bitfield, __MAXROWS_ISSET_ID);
-  }
-
-  public void setMaxRowsIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __MAXROWS_ISSET_ID, value);
-  }
-
-  public short getFetchType() {
-    return this.fetchType;
-  }
-
-  public void setFetchType(short fetchType) {
-    this.fetchType = fetchType;
-    setFetchTypeIsSet(true);
-  }
-
-  public void unsetFetchType() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __FETCHTYPE_ISSET_ID);
-  }
-
-  /** Returns true if field fetchType is set (has been assigned a value) and false otherwise */
-  public boolean isSetFetchType() {
-    return EncodingUtils.testBit(__isset_bitfield, __FETCHTYPE_ISSET_ID);
-  }
-
-  public void setFetchTypeIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __FETCHTYPE_ISSET_ID, value);
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    case ORIENTATION:
-      if (value == null) {
-        unsetOrientation();
-      } else {
-        setOrientation((TFetchOrientation)value);
-      }
-      break;
-
-    case MAX_ROWS:
-      if (value == null) {
-        unsetMaxRows();
-      } else {
-        setMaxRows((Long)value);
-      }
-      break;
-
-    case FETCH_TYPE:
-      if (value == null) {
-        unsetFetchType();
-      } else {
-        setFetchType((Short)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    case ORIENTATION:
-      return getOrientation();
-
-    case MAX_ROWS:
-      return Long.valueOf(getMaxRows());
-
-    case FETCH_TYPE:
-      return Short.valueOf(getFetchType());
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    case ORIENTATION:
-      return isSetOrientation();
-    case MAX_ROWS:
-      return isSetMaxRows();
-    case FETCH_TYPE:
-      return isSetFetchType();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TFetchResultsReq)
-      return this.equals((TFetchResultsReq)that);
-    return false;
-  }
-
-  public boolean equals(TFetchResultsReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    boolean this_present_orientation = true && this.isSetOrientation();
-    boolean that_present_orientation = true && that.isSetOrientation();
-    if (this_present_orientation || that_present_orientation) {
-      if (!(this_present_orientation && that_present_orientation))
-        return false;
-      if (!this.orientation.equals(that.orientation))
-        return false;
-    }
-
-    boolean this_present_maxRows = true;
-    boolean that_present_maxRows = true;
-    if (this_present_maxRows || that_present_maxRows) {
-      if (!(this_present_maxRows && that_present_maxRows))
-        return false;
-      if (this.maxRows != that.maxRows)
-        return false;
-    }
-
-    boolean this_present_fetchType = true && this.isSetFetchType();
-    boolean that_present_fetchType = true && that.isSetFetchType();
-    if (this_present_fetchType || that_present_fetchType) {
-      if (!(this_present_fetchType && that_present_fetchType))
-        return false;
-      if (this.fetchType != that.fetchType)
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    builder.append(present_operationHandle);
-    if (present_operationHandle)
-      builder.append(operationHandle);
-
-    boolean present_orientation = true && (isSetOrientation());
-    builder.append(present_orientation);
-    if (present_orientation)
-      builder.append(orientation.getValue());
-
-    boolean present_maxRows = true;
-    builder.append(present_maxRows);
-    if (present_maxRows)
-      builder.append(maxRows);
-
-    boolean present_fetchType = true && (isSetFetchType());
-    builder.append(present_fetchType);
-    if (present_fetchType)
-      builder.append(fetchType);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TFetchResultsReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TFetchResultsReq typedOther = (TFetchResultsReq)other;
-
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOrientation()).compareTo(typedOther.isSetOrientation());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOrientation()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.orientation, typedOther.orientation);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetMaxRows()).compareTo(typedOther.isSetMaxRows());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetMaxRows()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.maxRows, typedOther.maxRows);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetFetchType()).compareTo(typedOther.isSetFetchType());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetFetchType()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.fetchType, typedOther.fetchType);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TFetchResultsReq(");
-    boolean first = true;
-
-    sb.append("operationHandle:");
-    if (this.operationHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.operationHandle);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("orientation:");
-    if (this.orientation == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.orientation);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("maxRows:");
-    sb.append(this.maxRows);
-    first = false;
-    if (isSetFetchType()) {
-      if (!first) sb.append(", ");
-      sb.append("fetchType:");
-      sb.append(this.fetchType);
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetOperationHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationHandle' is unset! Struct:" + toString());
-    }
-
-    if (!isSetOrientation()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'orientation' is unset! Struct:" + toString());
-    }
-
-    if (!isSetMaxRows()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'maxRows' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TFetchResultsReqStandardSchemeFactory implements SchemeFactory {
-    public TFetchResultsReqStandardScheme getScheme() {
-      return new TFetchResultsReqStandardScheme();
-    }
-  }
-
-  private static class TFetchResultsReqStandardScheme extends StandardScheme<TFetchResultsReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TFetchResultsReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // ORIENTATION
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.orientation = TFetchOrientation.findByValue(iprot.readI32());
-              struct.setOrientationIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // MAX_ROWS
-            if (schemeField.type == org.apache.thrift.protocol.TType.I64) {
-              struct.maxRows = iprot.readI64();
-              struct.setMaxRowsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 4: // FETCH_TYPE
-            if (schemeField.type == org.apache.thrift.protocol.TType.I16) {
-              struct.fetchType = iprot.readI16();
-              struct.setFetchTypeIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TFetchResultsReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.operationHandle != null) {
-        oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-        struct.operationHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.orientation != null) {
-        oprot.writeFieldBegin(ORIENTATION_FIELD_DESC);
-        oprot.writeI32(struct.orientation.getValue());
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldBegin(MAX_ROWS_FIELD_DESC);
-      oprot.writeI64(struct.maxRows);
-      oprot.writeFieldEnd();
-      if (struct.isSetFetchType()) {
-        oprot.writeFieldBegin(FETCH_TYPE_FIELD_DESC);
-        oprot.writeI16(struct.fetchType);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TFetchResultsReqTupleSchemeFactory implements SchemeFactory {
-    public TFetchResultsReqTupleScheme getScheme() {
-      return new TFetchResultsReqTupleScheme();
-    }
-  }
-
-  private static class TFetchResultsReqTupleScheme extends TupleScheme<TFetchResultsReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TFetchResultsReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.operationHandle.write(oprot);
-      oprot.writeI32(struct.orientation.getValue());
-      oprot.writeI64(struct.maxRows);
-      BitSet optionals = new BitSet();
-      if (struct.isSetFetchType()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetFetchType()) {
-        oprot.writeI16(struct.fetchType);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TFetchResultsReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.operationHandle = new TOperationHandle();
-      struct.operationHandle.read(iprot);
-      struct.setOperationHandleIsSet(true);
-      struct.orientation = TFetchOrientation.findByValue(iprot.readI32());
-      struct.setOrientationIsSet(true);
-      struct.maxRows = iprot.readI64();
-      struct.setMaxRowsIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.fetchType = iprot.readI16();
-        struct.setFetchTypeIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsResp.java
deleted file mode 100644
index 19991f1da3eb3..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TFetchResultsResp.java
+++ /dev/null
@@ -1,608 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TFetchResultsResp implements org.apache.thrift.TBase<TFetchResultsResp, TFetchResultsResp._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TFetchResultsResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField HAS_MORE_ROWS_FIELD_DESC = new org.apache.thrift.protocol.TField("hasMoreRows", org.apache.thrift.protocol.TType.BOOL, (short)2);
-  private static final org.apache.thrift.protocol.TField RESULTS_FIELD_DESC = new org.apache.thrift.protocol.TField("results", org.apache.thrift.protocol.TType.STRUCT, (short)3);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TFetchResultsRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TFetchResultsRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private boolean hasMoreRows; // optional
-  private TRowSet results; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    HAS_MORE_ROWS((short)2, "hasMoreRows"),
-    RESULTS((short)3, "results");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // HAS_MORE_ROWS
-          return HAS_MORE_ROWS;
-        case 3: // RESULTS
-          return RESULTS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __HASMOREROWS_ISSET_ID = 0;
-  private byte __isset_bitfield = 0;
-  private _Fields optionals[] = {_Fields.HAS_MORE_ROWS,_Fields.RESULTS};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.HAS_MORE_ROWS, new org.apache.thrift.meta_data.FieldMetaData("hasMoreRows", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL)));
-    tmpMap.put(_Fields.RESULTS, new org.apache.thrift.meta_data.FieldMetaData("results", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TRowSet.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TFetchResultsResp.class, metaDataMap);
-  }
-
-  public TFetchResultsResp() {
-  }
-
-  public TFetchResultsResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TFetchResultsResp(TFetchResultsResp other) {
-    __isset_bitfield = other.__isset_bitfield;
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    this.hasMoreRows = other.hasMoreRows;
-    if (other.isSetResults()) {
-      this.results = new TRowSet(other.results);
-    }
-  }
-
-  public TFetchResultsResp deepCopy() {
-    return new TFetchResultsResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    setHasMoreRowsIsSet(false);
-    this.hasMoreRows = false;
-    this.results = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public boolean isHasMoreRows() {
-    return this.hasMoreRows;
-  }
-
-  public void setHasMoreRows(boolean hasMoreRows) {
-    this.hasMoreRows = hasMoreRows;
-    setHasMoreRowsIsSet(true);
-  }
-
-  public void unsetHasMoreRows() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __HASMOREROWS_ISSET_ID);
-  }
-
-  /** Returns true if field hasMoreRows is set (has been assigned a value) and false otherwise */
-  public boolean isSetHasMoreRows() {
-    return EncodingUtils.testBit(__isset_bitfield, __HASMOREROWS_ISSET_ID);
-  }
-
-  public void setHasMoreRowsIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __HASMOREROWS_ISSET_ID, value);
-  }
-
-  public TRowSet getResults() {
-    return this.results;
-  }
-
-  public void setResults(TRowSet results) {
-    this.results = results;
-  }
-
-  public void unsetResults() {
-    this.results = null;
-  }
-
-  /** Returns true if field results is set (has been assigned a value) and false otherwise */
-  public boolean isSetResults() {
-    return this.results != null;
-  }
-
-  public void setResultsIsSet(boolean value) {
-    if (!value) {
-      this.results = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case HAS_MORE_ROWS:
-      if (value == null) {
-        unsetHasMoreRows();
-      } else {
-        setHasMoreRows((Boolean)value);
-      }
-      break;
-
-    case RESULTS:
-      if (value == null) {
-        unsetResults();
-      } else {
-        setResults((TRowSet)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case HAS_MORE_ROWS:
-      return Boolean.valueOf(isHasMoreRows());
-
-    case RESULTS:
-      return getResults();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case HAS_MORE_ROWS:
-      return isSetHasMoreRows();
-    case RESULTS:
-      return isSetResults();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TFetchResultsResp)
-      return this.equals((TFetchResultsResp)that);
-    return false;
-  }
-
-  public boolean equals(TFetchResultsResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_hasMoreRows = true && this.isSetHasMoreRows();
-    boolean that_present_hasMoreRows = true && that.isSetHasMoreRows();
-    if (this_present_hasMoreRows || that_present_hasMoreRows) {
-      if (!(this_present_hasMoreRows && that_present_hasMoreRows))
-        return false;
-      if (this.hasMoreRows != that.hasMoreRows)
-        return false;
-    }
-
-    boolean this_present_results = true && this.isSetResults();
-    boolean that_present_results = true && that.isSetResults();
-    if (this_present_results || that_present_results) {
-      if (!(this_present_results && that_present_results))
-        return false;
-      if (!this.results.equals(that.results))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_status = true && (isSetStatus());
-    builder.append(present_status);
-    if (present_status)
-      builder.append(status);
-
-    boolean present_hasMoreRows = true && (isSetHasMoreRows());
-    builder.append(present_hasMoreRows);
-    if (present_hasMoreRows)
-      builder.append(hasMoreRows);
-
-    boolean present_results = true && (isSetResults());
-    builder.append(present_results);
-    if (present_results)
-      builder.append(results);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TFetchResultsResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TFetchResultsResp typedOther = (TFetchResultsResp)other;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetHasMoreRows()).compareTo(typedOther.isSetHasMoreRows());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetHasMoreRows()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.hasMoreRows, typedOther.hasMoreRows);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetResults()).compareTo(typedOther.isSetResults());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetResults()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.results, typedOther.results);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TFetchResultsResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (isSetHasMoreRows()) {
-      if (!first) sb.append(", ");
-      sb.append("hasMoreRows:");
-      sb.append(this.hasMoreRows);
-      first = false;
-    }
-    if (isSetResults()) {
-      if (!first) sb.append(", ");
-      sb.append("results:");
-      if (this.results == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.results);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-    if (results != null) {
-      results.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TFetchResultsRespStandardSchemeFactory implements SchemeFactory {
-    public TFetchResultsRespStandardScheme getScheme() {
-      return new TFetchResultsRespStandardScheme();
-    }
-  }
-
-  private static class TFetchResultsRespStandardScheme extends StandardScheme<TFetchResultsResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TFetchResultsResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // HAS_MORE_ROWS
-            if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) {
-              struct.hasMoreRows = iprot.readBool();
-              struct.setHasMoreRowsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // RESULTS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.results = new TRowSet();
-              struct.results.read(iprot);
-              struct.setResultsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TFetchResultsResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.isSetHasMoreRows()) {
-        oprot.writeFieldBegin(HAS_MORE_ROWS_FIELD_DESC);
-        oprot.writeBool(struct.hasMoreRows);
-        oprot.writeFieldEnd();
-      }
-      if (struct.results != null) {
-        if (struct.isSetResults()) {
-          oprot.writeFieldBegin(RESULTS_FIELD_DESC);
-          struct.results.write(oprot);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TFetchResultsRespTupleSchemeFactory implements SchemeFactory {
-    public TFetchResultsRespTupleScheme getScheme() {
-      return new TFetchResultsRespTupleScheme();
-    }
-  }
-
-  private static class TFetchResultsRespTupleScheme extends TupleScheme<TFetchResultsResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TFetchResultsResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetHasMoreRows()) {
-        optionals.set(0);
-      }
-      if (struct.isSetResults()) {
-        optionals.set(1);
-      }
-      oprot.writeBitSet(optionals, 2);
-      if (struct.isSetHasMoreRows()) {
-        oprot.writeBool(struct.hasMoreRows);
-      }
-      if (struct.isSetResults()) {
-        struct.results.write(oprot);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TFetchResultsResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      BitSet incoming = iprot.readBitSet(2);
-      if (incoming.get(0)) {
-        struct.hasMoreRows = iprot.readBool();
-        struct.setHasMoreRowsIsSet(true);
-      }
-      if (incoming.get(1)) {
-        struct.results = new TRowSet();
-        struct.results.read(iprot);
-        struct.setResultsIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsReq.java
deleted file mode 100644
index cfd157f701b26..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsReq.java
+++ /dev/null
@@ -1,390 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TGetCatalogsReq implements org.apache.thrift.TBase<TGetCatalogsReq, TGetCatalogsReq._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetCatalogsReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetCatalogsReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetCatalogsReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetCatalogsReq.class, metaDataMap);
-  }
-
-  public TGetCatalogsReq() {
-  }
-
-  public TGetCatalogsReq(
-    TSessionHandle sessionHandle)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetCatalogsReq(TGetCatalogsReq other) {
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-  }
-
-  public TGetCatalogsReq deepCopy() {
-    return new TGetCatalogsReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetCatalogsReq)
-      return this.equals((TGetCatalogsReq)that);
-    return false;
-  }
-
-  public boolean equals(TGetCatalogsReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    builder.append(present_sessionHandle);
-    if (present_sessionHandle)
-      builder.append(sessionHandle);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TGetCatalogsReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TGetCatalogsReq typedOther = (TGetCatalogsReq)other;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetCatalogsReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetCatalogsReqStandardSchemeFactory implements SchemeFactory {
-    public TGetCatalogsReqStandardScheme getScheme() {
-      return new TGetCatalogsReqStandardScheme();
-    }
-  }
-
-  private static class TGetCatalogsReqStandardScheme extends StandardScheme<TGetCatalogsReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetCatalogsReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetCatalogsReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetCatalogsReqTupleSchemeFactory implements SchemeFactory {
-    public TGetCatalogsReqTupleScheme getScheme() {
-      return new TGetCatalogsReqTupleScheme();
-    }
-  }
-
-  private static class TGetCatalogsReqTupleScheme extends TupleScheme<TGetCatalogsReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetCatalogsReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetCatalogsReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsResp.java
deleted file mode 100644
index 1c5a35437d416..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetCatalogsResp.java
+++ /dev/null
@@ -1,505 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TGetCatalogsResp implements org.apache.thrift.TBase<TGetCatalogsResp, TGetCatalogsResp._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetCatalogsResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetCatalogsRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetCatalogsRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private TOperationHandle operationHandle; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    OPERATION_HANDLE((short)2, "operationHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private _Fields optionals[] = {_Fields.OPERATION_HANDLE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetCatalogsResp.class, metaDataMap);
-  }
-
-  public TGetCatalogsResp() {
-  }
-
-  public TGetCatalogsResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetCatalogsResp(TGetCatalogsResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-  }
-
-  public TGetCatalogsResp deepCopy() {
-    return new TGetCatalogsResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.operationHandle = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetCatalogsResp)
-      return this.equals((TGetCatalogsResp)that);
-    return false;
-  }
-
-  public boolean equals(TGetCatalogsResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_status = true && (isSetStatus());
-    builder.append(present_status);
-    if (present_status)
-      builder.append(status);
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    builder.append(present_operationHandle);
-    if (present_operationHandle)
-      builder.append(operationHandle);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TGetCatalogsResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TGetCatalogsResp typedOther = (TGetCatalogsResp)other;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetCatalogsResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (isSetOperationHandle()) {
-      if (!first) sb.append(", ");
-      sb.append("operationHandle:");
-      if (this.operationHandle == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.operationHandle);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetCatalogsRespStandardSchemeFactory implements SchemeFactory {
-    public TGetCatalogsRespStandardScheme getScheme() {
-      return new TGetCatalogsRespStandardScheme();
-    }
-  }
-
-  private static class TGetCatalogsRespStandardScheme extends StandardScheme<TGetCatalogsResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetCatalogsResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetCatalogsResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.operationHandle != null) {
-        if (struct.isSetOperationHandle()) {
-          oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-          struct.operationHandle.write(oprot);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetCatalogsRespTupleSchemeFactory implements SchemeFactory {
-    public TGetCatalogsRespTupleScheme getScheme() {
-      return new TGetCatalogsRespTupleScheme();
-    }
-  }
-
-  private static class TGetCatalogsRespTupleScheme extends TupleScheme<TGetCatalogsResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetCatalogsResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetOperationHandle()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetOperationHandle()) {
-        struct.operationHandle.write(oprot);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetCatalogsResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.operationHandle = new TOperationHandle();
-        struct.operationHandle.read(iprot);
-        struct.setOperationHandleIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsReq.java
deleted file mode 100644
index a2c793bd95927..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsReq.java
+++ /dev/null
@@ -1,818 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TGetColumnsReq implements org.apache.thrift.TBase<TGetColumnsReq, TGetColumnsReq._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetColumnsReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField CATALOG_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("catalogName", org.apache.thrift.protocol.TType.STRING, (short)2);
-  private static final org.apache.thrift.protocol.TField SCHEMA_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("schemaName", org.apache.thrift.protocol.TType.STRING, (short)3);
-  private static final org.apache.thrift.protocol.TField TABLE_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("tableName", org.apache.thrift.protocol.TType.STRING, (short)4);
-  private static final org.apache.thrift.protocol.TField COLUMN_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("columnName", org.apache.thrift.protocol.TType.STRING, (short)5);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetColumnsReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetColumnsReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-  private String catalogName; // optional
-  private String schemaName; // optional
-  private String tableName; // optional
-  private String columnName; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle"),
-    CATALOG_NAME((short)2, "catalogName"),
-    SCHEMA_NAME((short)3, "schemaName"),
-    TABLE_NAME((short)4, "tableName"),
-    COLUMN_NAME((short)5, "columnName");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        case 2: // CATALOG_NAME
-          return CATALOG_NAME;
-        case 3: // SCHEMA_NAME
-          return SCHEMA_NAME;
-        case 4: // TABLE_NAME
-          return TABLE_NAME;
-        case 5: // COLUMN_NAME
-          return COLUMN_NAME;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private _Fields optionals[] = {_Fields.CATALOG_NAME,_Fields.SCHEMA_NAME,_Fields.TABLE_NAME,_Fields.COLUMN_NAME};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    tmpMap.put(_Fields.CATALOG_NAME, new org.apache.thrift.meta_data.FieldMetaData("catalogName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TIdentifier")));
-    tmpMap.put(_Fields.SCHEMA_NAME, new org.apache.thrift.meta_data.FieldMetaData("schemaName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TPatternOrIdentifier")));
-    tmpMap.put(_Fields.TABLE_NAME, new org.apache.thrift.meta_data.FieldMetaData("tableName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TPatternOrIdentifier")));
-    tmpMap.put(_Fields.COLUMN_NAME, new org.apache.thrift.meta_data.FieldMetaData("columnName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TPatternOrIdentifier")));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetColumnsReq.class, metaDataMap);
-  }
-
-  public TGetColumnsReq() {
-  }
-
-  public TGetColumnsReq(
-    TSessionHandle sessionHandle)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetColumnsReq(TGetColumnsReq other) {
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-    if (other.isSetCatalogName()) {
-      this.catalogName = other.catalogName;
-    }
-    if (other.isSetSchemaName()) {
-      this.schemaName = other.schemaName;
-    }
-    if (other.isSetTableName()) {
-      this.tableName = other.tableName;
-    }
-    if (other.isSetColumnName()) {
-      this.columnName = other.columnName;
-    }
-  }
-
-  public TGetColumnsReq deepCopy() {
-    return new TGetColumnsReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-    this.catalogName = null;
-    this.schemaName = null;
-    this.tableName = null;
-    this.columnName = null;
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public String getCatalogName() {
-    return this.catalogName;
-  }
-
-  public void setCatalogName(String catalogName) {
-    this.catalogName = catalogName;
-  }
-
-  public void unsetCatalogName() {
-    this.catalogName = null;
-  }
-
-  /** Returns true if field catalogName is set (has been assigned a value) and false otherwise */
-  public boolean isSetCatalogName() {
-    return this.catalogName != null;
-  }
-
-  public void setCatalogNameIsSet(boolean value) {
-    if (!value) {
-      this.catalogName = null;
-    }
-  }
-
-  public String getSchemaName() {
-    return this.schemaName;
-  }
-
-  public void setSchemaName(String schemaName) {
-    this.schemaName = schemaName;
-  }
-
-  public void unsetSchemaName() {
-    this.schemaName = null;
-  }
-
-  /** Returns true if field schemaName is set (has been assigned a value) and false otherwise */
-  public boolean isSetSchemaName() {
-    return this.schemaName != null;
-  }
-
-  public void setSchemaNameIsSet(boolean value) {
-    if (!value) {
-      this.schemaName = null;
-    }
-  }
-
-  public String getTableName() {
-    return this.tableName;
-  }
-
-  public void setTableName(String tableName) {
-    this.tableName = tableName;
-  }
-
-  public void unsetTableName() {
-    this.tableName = null;
-  }
-
-  /** Returns true if field tableName is set (has been assigned a value) and false otherwise */
-  public boolean isSetTableName() {
-    return this.tableName != null;
-  }
-
-  public void setTableNameIsSet(boolean value) {
-    if (!value) {
-      this.tableName = null;
-    }
-  }
-
-  public String getColumnName() {
-    return this.columnName;
-  }
-
-  public void setColumnName(String columnName) {
-    this.columnName = columnName;
-  }
-
-  public void unsetColumnName() {
-    this.columnName = null;
-  }
-
-  /** Returns true if field columnName is set (has been assigned a value) and false otherwise */
-  public boolean isSetColumnName() {
-    return this.columnName != null;
-  }
-
-  public void setColumnNameIsSet(boolean value) {
-    if (!value) {
-      this.columnName = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    case CATALOG_NAME:
-      if (value == null) {
-        unsetCatalogName();
-      } else {
-        setCatalogName((String)value);
-      }
-      break;
-
-    case SCHEMA_NAME:
-      if (value == null) {
-        unsetSchemaName();
-      } else {
-        setSchemaName((String)value);
-      }
-      break;
-
-    case TABLE_NAME:
-      if (value == null) {
-        unsetTableName();
-      } else {
-        setTableName((String)value);
-      }
-      break;
-
-    case COLUMN_NAME:
-      if (value == null) {
-        unsetColumnName();
-      } else {
-        setColumnName((String)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    case CATALOG_NAME:
-      return getCatalogName();
-
-    case SCHEMA_NAME:
-      return getSchemaName();
-
-    case TABLE_NAME:
-      return getTableName();
-
-    case COLUMN_NAME:
-      return getColumnName();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    case CATALOG_NAME:
-      return isSetCatalogName();
-    case SCHEMA_NAME:
-      return isSetSchemaName();
-    case TABLE_NAME:
-      return isSetTableName();
-    case COLUMN_NAME:
-      return isSetColumnName();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetColumnsReq)
-      return this.equals((TGetColumnsReq)that);
-    return false;
-  }
-
-  public boolean equals(TGetColumnsReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    boolean this_present_catalogName = true && this.isSetCatalogName();
-    boolean that_present_catalogName = true && that.isSetCatalogName();
-    if (this_present_catalogName || that_present_catalogName) {
-      if (!(this_present_catalogName && that_present_catalogName))
-        return false;
-      if (!this.catalogName.equals(that.catalogName))
-        return false;
-    }
-
-    boolean this_present_schemaName = true && this.isSetSchemaName();
-    boolean that_present_schemaName = true && that.isSetSchemaName();
-    if (this_present_schemaName || that_present_schemaName) {
-      if (!(this_present_schemaName && that_present_schemaName))
-        return false;
-      if (!this.schemaName.equals(that.schemaName))
-        return false;
-    }
-
-    boolean this_present_tableName = true && this.isSetTableName();
-    boolean that_present_tableName = true && that.isSetTableName();
-    if (this_present_tableName || that_present_tableName) {
-      if (!(this_present_tableName && that_present_tableName))
-        return false;
-      if (!this.tableName.equals(that.tableName))
-        return false;
-    }
-
-    boolean this_present_columnName = true && this.isSetColumnName();
-    boolean that_present_columnName = true && that.isSetColumnName();
-    if (this_present_columnName || that_present_columnName) {
-      if (!(this_present_columnName && that_present_columnName))
-        return false;
-      if (!this.columnName.equals(that.columnName))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    builder.append(present_sessionHandle);
-    if (present_sessionHandle)
-      builder.append(sessionHandle);
-
-    boolean present_catalogName = true && (isSetCatalogName());
-    builder.append(present_catalogName);
-    if (present_catalogName)
-      builder.append(catalogName);
-
-    boolean present_schemaName = true && (isSetSchemaName());
-    builder.append(present_schemaName);
-    if (present_schemaName)
-      builder.append(schemaName);
-
-    boolean present_tableName = true && (isSetTableName());
-    builder.append(present_tableName);
-    if (present_tableName)
-      builder.append(tableName);
-
-    boolean present_columnName = true && (isSetColumnName());
-    builder.append(present_columnName);
-    if (present_columnName)
-      builder.append(columnName);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TGetColumnsReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TGetColumnsReq typedOther = (TGetColumnsReq)other;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetCatalogName()).compareTo(typedOther.isSetCatalogName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetCatalogName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.catalogName, typedOther.catalogName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetSchemaName()).compareTo(typedOther.isSetSchemaName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSchemaName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.schemaName, typedOther.schemaName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetTableName()).compareTo(typedOther.isSetTableName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetTableName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.tableName, typedOther.tableName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetColumnName()).compareTo(typedOther.isSetColumnName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetColumnName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.columnName, typedOther.columnName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetColumnsReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    if (isSetCatalogName()) {
-      if (!first) sb.append(", ");
-      sb.append("catalogName:");
-      if (this.catalogName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.catalogName);
-      }
-      first = false;
-    }
-    if (isSetSchemaName()) {
-      if (!first) sb.append(", ");
-      sb.append("schemaName:");
-      if (this.schemaName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.schemaName);
-      }
-      first = false;
-    }
-    if (isSetTableName()) {
-      if (!first) sb.append(", ");
-      sb.append("tableName:");
-      if (this.tableName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.tableName);
-      }
-      first = false;
-    }
-    if (isSetColumnName()) {
-      if (!first) sb.append(", ");
-      sb.append("columnName:");
-      if (this.columnName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.columnName);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetColumnsReqStandardSchemeFactory implements SchemeFactory {
-    public TGetColumnsReqStandardScheme getScheme() {
-      return new TGetColumnsReqStandardScheme();
-    }
-  }
-
-  private static class TGetColumnsReqStandardScheme extends StandardScheme<TGetColumnsReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetColumnsReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // CATALOG_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.catalogName = iprot.readString();
-              struct.setCatalogNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // SCHEMA_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.schemaName = iprot.readString();
-              struct.setSchemaNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 4: // TABLE_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.tableName = iprot.readString();
-              struct.setTableNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 5: // COLUMN_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.columnName = iprot.readString();
-              struct.setColumnNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetColumnsReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.catalogName != null) {
-        if (struct.isSetCatalogName()) {
-          oprot.writeFieldBegin(CATALOG_NAME_FIELD_DESC);
-          oprot.writeString(struct.catalogName);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.schemaName != null) {
-        if (struct.isSetSchemaName()) {
-          oprot.writeFieldBegin(SCHEMA_NAME_FIELD_DESC);
-          oprot.writeString(struct.schemaName);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.tableName != null) {
-        if (struct.isSetTableName()) {
-          oprot.writeFieldBegin(TABLE_NAME_FIELD_DESC);
-          oprot.writeString(struct.tableName);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.columnName != null) {
-        if (struct.isSetColumnName()) {
-          oprot.writeFieldBegin(COLUMN_NAME_FIELD_DESC);
-          oprot.writeString(struct.columnName);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetColumnsReqTupleSchemeFactory implements SchemeFactory {
-    public TGetColumnsReqTupleScheme getScheme() {
-      return new TGetColumnsReqTupleScheme();
-    }
-  }
-
-  private static class TGetColumnsReqTupleScheme extends TupleScheme<TGetColumnsReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetColumnsReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetCatalogName()) {
-        optionals.set(0);
-      }
-      if (struct.isSetSchemaName()) {
-        optionals.set(1);
-      }
-      if (struct.isSetTableName()) {
-        optionals.set(2);
-      }
-      if (struct.isSetColumnName()) {
-        optionals.set(3);
-      }
-      oprot.writeBitSet(optionals, 4);
-      if (struct.isSetCatalogName()) {
-        oprot.writeString(struct.catalogName);
-      }
-      if (struct.isSetSchemaName()) {
-        oprot.writeString(struct.schemaName);
-      }
-      if (struct.isSetTableName()) {
-        oprot.writeString(struct.tableName);
-      }
-      if (struct.isSetColumnName()) {
-        oprot.writeString(struct.columnName);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetColumnsReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-      BitSet incoming = iprot.readBitSet(4);
-      if (incoming.get(0)) {
-        struct.catalogName = iprot.readString();
-        struct.setCatalogNameIsSet(true);
-      }
-      if (incoming.get(1)) {
-        struct.schemaName = iprot.readString();
-        struct.setSchemaNameIsSet(true);
-      }
-      if (incoming.get(2)) {
-        struct.tableName = iprot.readString();
-        struct.setTableNameIsSet(true);
-      }
-      if (incoming.get(3)) {
-        struct.columnName = iprot.readString();
-        struct.setColumnNameIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsResp.java
deleted file mode 100644
index d6cf1be6d304b..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetColumnsResp.java
+++ /dev/null
@@ -1,505 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TGetColumnsResp implements org.apache.thrift.TBase<TGetColumnsResp, TGetColumnsResp._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetColumnsResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetColumnsRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetColumnsRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private TOperationHandle operationHandle; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    OPERATION_HANDLE((short)2, "operationHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private _Fields optionals[] = {_Fields.OPERATION_HANDLE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetColumnsResp.class, metaDataMap);
-  }
-
-  public TGetColumnsResp() {
-  }
-
-  public TGetColumnsResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetColumnsResp(TGetColumnsResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-  }
-
-  public TGetColumnsResp deepCopy() {
-    return new TGetColumnsResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.operationHandle = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetColumnsResp)
-      return this.equals((TGetColumnsResp)that);
-    return false;
-  }
-
-  public boolean equals(TGetColumnsResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_status = true && (isSetStatus());
-    builder.append(present_status);
-    if (present_status)
-      builder.append(status);
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    builder.append(present_operationHandle);
-    if (present_operationHandle)
-      builder.append(operationHandle);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TGetColumnsResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TGetColumnsResp typedOther = (TGetColumnsResp)other;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetColumnsResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (isSetOperationHandle()) {
-      if (!first) sb.append(", ");
-      sb.append("operationHandle:");
-      if (this.operationHandle == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.operationHandle);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetColumnsRespStandardSchemeFactory implements SchemeFactory {
-    public TGetColumnsRespStandardScheme getScheme() {
-      return new TGetColumnsRespStandardScheme();
-    }
-  }
-
-  private static class TGetColumnsRespStandardScheme extends StandardScheme<TGetColumnsResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetColumnsResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetColumnsResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.operationHandle != null) {
-        if (struct.isSetOperationHandle()) {
-          oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-          struct.operationHandle.write(oprot);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetColumnsRespTupleSchemeFactory implements SchemeFactory {
-    public TGetColumnsRespTupleScheme getScheme() {
-      return new TGetColumnsRespTupleScheme();
-    }
-  }
-
-  private static class TGetColumnsRespTupleScheme extends TupleScheme<TGetColumnsResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetColumnsResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetOperationHandle()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetOperationHandle()) {
-        struct.operationHandle.write(oprot);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetColumnsResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.operationHandle = new TOperationHandle();
-        struct.operationHandle.read(iprot);
-        struct.setOperationHandleIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenReq.java
deleted file mode 100644
index 6c6bb00e43e43..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenReq.java
+++ /dev/null
@@ -1,592 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TGetDelegationTokenReq implements org.apache.thrift.TBase<TGetDelegationTokenReq, TGetDelegationTokenReq._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetDelegationTokenReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField OWNER_FIELD_DESC = new org.apache.thrift.protocol.TField("owner", org.apache.thrift.protocol.TType.STRING, (short)2);
-  private static final org.apache.thrift.protocol.TField RENEWER_FIELD_DESC = new org.apache.thrift.protocol.TField("renewer", org.apache.thrift.protocol.TType.STRING, (short)3);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetDelegationTokenReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetDelegationTokenReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-  private String owner; // required
-  private String renewer; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle"),
-    OWNER((short)2, "owner"),
-    RENEWER((short)3, "renewer");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        case 2: // OWNER
-          return OWNER;
-        case 3: // RENEWER
-          return RENEWER;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    tmpMap.put(_Fields.OWNER, new org.apache.thrift.meta_data.FieldMetaData("owner", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    tmpMap.put(_Fields.RENEWER, new org.apache.thrift.meta_data.FieldMetaData("renewer", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetDelegationTokenReq.class, metaDataMap);
-  }
-
-  public TGetDelegationTokenReq() {
-  }
-
-  public TGetDelegationTokenReq(
-    TSessionHandle sessionHandle,
-    String owner,
-    String renewer)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-    this.owner = owner;
-    this.renewer = renewer;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetDelegationTokenReq(TGetDelegationTokenReq other) {
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-    if (other.isSetOwner()) {
-      this.owner = other.owner;
-    }
-    if (other.isSetRenewer()) {
-      this.renewer = other.renewer;
-    }
-  }
-
-  public TGetDelegationTokenReq deepCopy() {
-    return new TGetDelegationTokenReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-    this.owner = null;
-    this.renewer = null;
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public String getOwner() {
-    return this.owner;
-  }
-
-  public void setOwner(String owner) {
-    this.owner = owner;
-  }
-
-  public void unsetOwner() {
-    this.owner = null;
-  }
-
-  /** Returns true if field owner is set (has been assigned a value) and false otherwise */
-  public boolean isSetOwner() {
-    return this.owner != null;
-  }
-
-  public void setOwnerIsSet(boolean value) {
-    if (!value) {
-      this.owner = null;
-    }
-  }
-
-  public String getRenewer() {
-    return this.renewer;
-  }
-
-  public void setRenewer(String renewer) {
-    this.renewer = renewer;
-  }
-
-  public void unsetRenewer() {
-    this.renewer = null;
-  }
-
-  /** Returns true if field renewer is set (has been assigned a value) and false otherwise */
-  public boolean isSetRenewer() {
-    return this.renewer != null;
-  }
-
-  public void setRenewerIsSet(boolean value) {
-    if (!value) {
-      this.renewer = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    case OWNER:
-      if (value == null) {
-        unsetOwner();
-      } else {
-        setOwner((String)value);
-      }
-      break;
-
-    case RENEWER:
-      if (value == null) {
-        unsetRenewer();
-      } else {
-        setRenewer((String)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    case OWNER:
-      return getOwner();
-
-    case RENEWER:
-      return getRenewer();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    case OWNER:
-      return isSetOwner();
-    case RENEWER:
-      return isSetRenewer();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetDelegationTokenReq)
-      return this.equals((TGetDelegationTokenReq)that);
-    return false;
-  }
-
-  public boolean equals(TGetDelegationTokenReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    boolean this_present_owner = true && this.isSetOwner();
-    boolean that_present_owner = true && that.isSetOwner();
-    if (this_present_owner || that_present_owner) {
-      if (!(this_present_owner && that_present_owner))
-        return false;
-      if (!this.owner.equals(that.owner))
-        return false;
-    }
-
-    boolean this_present_renewer = true && this.isSetRenewer();
-    boolean that_present_renewer = true && that.isSetRenewer();
-    if (this_present_renewer || that_present_renewer) {
-      if (!(this_present_renewer && that_present_renewer))
-        return false;
-      if (!this.renewer.equals(that.renewer))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    builder.append(present_sessionHandle);
-    if (present_sessionHandle)
-      builder.append(sessionHandle);
-
-    boolean present_owner = true && (isSetOwner());
-    builder.append(present_owner);
-    if (present_owner)
-      builder.append(owner);
-
-    boolean present_renewer = true && (isSetRenewer());
-    builder.append(present_renewer);
-    if (present_renewer)
-      builder.append(renewer);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TGetDelegationTokenReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TGetDelegationTokenReq typedOther = (TGetDelegationTokenReq)other;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOwner()).compareTo(typedOther.isSetOwner());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOwner()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.owner, typedOther.owner);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetRenewer()).compareTo(typedOther.isSetRenewer());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetRenewer()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.renewer, typedOther.renewer);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetDelegationTokenReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("owner:");
-    if (this.owner == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.owner);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("renewer:");
-    if (this.renewer == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.renewer);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    if (!isSetOwner()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'owner' is unset! Struct:" + toString());
-    }
-
-    if (!isSetRenewer()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'renewer' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetDelegationTokenReqStandardSchemeFactory implements SchemeFactory {
-    public TGetDelegationTokenReqStandardScheme getScheme() {
-      return new TGetDelegationTokenReqStandardScheme();
-    }
-  }
-
-  private static class TGetDelegationTokenReqStandardScheme extends StandardScheme<TGetDelegationTokenReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetDelegationTokenReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // OWNER
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.owner = iprot.readString();
-              struct.setOwnerIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // RENEWER
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.renewer = iprot.readString();
-              struct.setRenewerIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetDelegationTokenReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.owner != null) {
-        oprot.writeFieldBegin(OWNER_FIELD_DESC);
-        oprot.writeString(struct.owner);
-        oprot.writeFieldEnd();
-      }
-      if (struct.renewer != null) {
-        oprot.writeFieldBegin(RENEWER_FIELD_DESC);
-        oprot.writeString(struct.renewer);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetDelegationTokenReqTupleSchemeFactory implements SchemeFactory {
-    public TGetDelegationTokenReqTupleScheme getScheme() {
-      return new TGetDelegationTokenReqTupleScheme();
-    }
-  }
-
-  private static class TGetDelegationTokenReqTupleScheme extends TupleScheme<TGetDelegationTokenReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetDelegationTokenReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-      oprot.writeString(struct.owner);
-      oprot.writeString(struct.renewer);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetDelegationTokenReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-      struct.owner = iprot.readString();
-      struct.setOwnerIsSet(true);
-      struct.renewer = iprot.readString();
-      struct.setRenewerIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenResp.java
deleted file mode 100644
index d14c5e029a35d..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetDelegationTokenResp.java
+++ /dev/null
@@ -1,500 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TGetDelegationTokenResp implements org.apache.thrift.TBase<TGetDelegationTokenResp, TGetDelegationTokenResp._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetDelegationTokenResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField DELEGATION_TOKEN_FIELD_DESC = new org.apache.thrift.protocol.TField("delegationToken", org.apache.thrift.protocol.TType.STRING, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetDelegationTokenRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetDelegationTokenRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private String delegationToken; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    DELEGATION_TOKEN((short)2, "delegationToken");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // DELEGATION_TOKEN
-          return DELEGATION_TOKEN;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private _Fields optionals[] = {_Fields.DELEGATION_TOKEN};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.DELEGATION_TOKEN, new org.apache.thrift.meta_data.FieldMetaData("delegationToken", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetDelegationTokenResp.class, metaDataMap);
-  }
-
-  public TGetDelegationTokenResp() {
-  }
-
-  public TGetDelegationTokenResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetDelegationTokenResp(TGetDelegationTokenResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetDelegationToken()) {
-      this.delegationToken = other.delegationToken;
-    }
-  }
-
-  public TGetDelegationTokenResp deepCopy() {
-    return new TGetDelegationTokenResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.delegationToken = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public String getDelegationToken() {
-    return this.delegationToken;
-  }
-
-  public void setDelegationToken(String delegationToken) {
-    this.delegationToken = delegationToken;
-  }
-
-  public void unsetDelegationToken() {
-    this.delegationToken = null;
-  }
-
-  /** Returns true if field delegationToken is set (has been assigned a value) and false otherwise */
-  public boolean isSetDelegationToken() {
-    return this.delegationToken != null;
-  }
-
-  public void setDelegationTokenIsSet(boolean value) {
-    if (!value) {
-      this.delegationToken = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case DELEGATION_TOKEN:
-      if (value == null) {
-        unsetDelegationToken();
-      } else {
-        setDelegationToken((String)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case DELEGATION_TOKEN:
-      return getDelegationToken();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case DELEGATION_TOKEN:
-      return isSetDelegationToken();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetDelegationTokenResp)
-      return this.equals((TGetDelegationTokenResp)that);
-    return false;
-  }
-
-  public boolean equals(TGetDelegationTokenResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_delegationToken = true && this.isSetDelegationToken();
-    boolean that_present_delegationToken = true && that.isSetDelegationToken();
-    if (this_present_delegationToken || that_present_delegationToken) {
-      if (!(this_present_delegationToken && that_present_delegationToken))
-        return false;
-      if (!this.delegationToken.equals(that.delegationToken))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_status = true && (isSetStatus());
-    builder.append(present_status);
-    if (present_status)
-      builder.append(status);
-
-    boolean present_delegationToken = true && (isSetDelegationToken());
-    builder.append(present_delegationToken);
-    if (present_delegationToken)
-      builder.append(delegationToken);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TGetDelegationTokenResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TGetDelegationTokenResp typedOther = (TGetDelegationTokenResp)other;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetDelegationToken()).compareTo(typedOther.isSetDelegationToken());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetDelegationToken()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.delegationToken, typedOther.delegationToken);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetDelegationTokenResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (isSetDelegationToken()) {
-      if (!first) sb.append(", ");
-      sb.append("delegationToken:");
-      if (this.delegationToken == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.delegationToken);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetDelegationTokenRespStandardSchemeFactory implements SchemeFactory {
-    public TGetDelegationTokenRespStandardScheme getScheme() {
-      return new TGetDelegationTokenRespStandardScheme();
-    }
-  }
-
-  private static class TGetDelegationTokenRespStandardScheme extends StandardScheme<TGetDelegationTokenResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetDelegationTokenResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // DELEGATION_TOKEN
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.delegationToken = iprot.readString();
-              struct.setDelegationTokenIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetDelegationTokenResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.delegationToken != null) {
-        if (struct.isSetDelegationToken()) {
-          oprot.writeFieldBegin(DELEGATION_TOKEN_FIELD_DESC);
-          oprot.writeString(struct.delegationToken);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetDelegationTokenRespTupleSchemeFactory implements SchemeFactory {
-    public TGetDelegationTokenRespTupleScheme getScheme() {
-      return new TGetDelegationTokenRespTupleScheme();
-    }
-  }
-
-  private static class TGetDelegationTokenRespTupleScheme extends TupleScheme<TGetDelegationTokenResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetDelegationTokenResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetDelegationToken()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetDelegationToken()) {
-        oprot.writeString(struct.delegationToken);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetDelegationTokenResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.delegationToken = iprot.readString();
-        struct.setDelegationTokenIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsReq.java
deleted file mode 100644
index ff45ee0386cb9..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsReq.java
+++ /dev/null
@@ -1,707 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TGetFunctionsReq implements org.apache.thrift.TBase<TGetFunctionsReq, TGetFunctionsReq._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetFunctionsReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField CATALOG_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("catalogName", org.apache.thrift.protocol.TType.STRING, (short)2);
-  private static final org.apache.thrift.protocol.TField SCHEMA_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("schemaName", org.apache.thrift.protocol.TType.STRING, (short)3);
-  private static final org.apache.thrift.protocol.TField FUNCTION_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("functionName", org.apache.thrift.protocol.TType.STRING, (short)4);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetFunctionsReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetFunctionsReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-  private String catalogName; // optional
-  private String schemaName; // optional
-  private String functionName; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle"),
-    CATALOG_NAME((short)2, "catalogName"),
-    SCHEMA_NAME((short)3, "schemaName"),
-    FUNCTION_NAME((short)4, "functionName");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        case 2: // CATALOG_NAME
-          return CATALOG_NAME;
-        case 3: // SCHEMA_NAME
-          return SCHEMA_NAME;
-        case 4: // FUNCTION_NAME
-          return FUNCTION_NAME;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private _Fields optionals[] = {_Fields.CATALOG_NAME,_Fields.SCHEMA_NAME};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    tmpMap.put(_Fields.CATALOG_NAME, new org.apache.thrift.meta_data.FieldMetaData("catalogName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TIdentifier")));
-    tmpMap.put(_Fields.SCHEMA_NAME, new org.apache.thrift.meta_data.FieldMetaData("schemaName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TPatternOrIdentifier")));
-    tmpMap.put(_Fields.FUNCTION_NAME, new org.apache.thrift.meta_data.FieldMetaData("functionName", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TPatternOrIdentifier")));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetFunctionsReq.class, metaDataMap);
-  }
-
-  public TGetFunctionsReq() {
-  }
-
-  public TGetFunctionsReq(
-    TSessionHandle sessionHandle,
-    String functionName)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-    this.functionName = functionName;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetFunctionsReq(TGetFunctionsReq other) {
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-    if (other.isSetCatalogName()) {
-      this.catalogName = other.catalogName;
-    }
-    if (other.isSetSchemaName()) {
-      this.schemaName = other.schemaName;
-    }
-    if (other.isSetFunctionName()) {
-      this.functionName = other.functionName;
-    }
-  }
-
-  public TGetFunctionsReq deepCopy() {
-    return new TGetFunctionsReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-    this.catalogName = null;
-    this.schemaName = null;
-    this.functionName = null;
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public String getCatalogName() {
-    return this.catalogName;
-  }
-
-  public void setCatalogName(String catalogName) {
-    this.catalogName = catalogName;
-  }
-
-  public void unsetCatalogName() {
-    this.catalogName = null;
-  }
-
-  /** Returns true if field catalogName is set (has been assigned a value) and false otherwise */
-  public boolean isSetCatalogName() {
-    return this.catalogName != null;
-  }
-
-  public void setCatalogNameIsSet(boolean value) {
-    if (!value) {
-      this.catalogName = null;
-    }
-  }
-
-  public String getSchemaName() {
-    return this.schemaName;
-  }
-
-  public void setSchemaName(String schemaName) {
-    this.schemaName = schemaName;
-  }
-
-  public void unsetSchemaName() {
-    this.schemaName = null;
-  }
-
-  /** Returns true if field schemaName is set (has been assigned a value) and false otherwise */
-  public boolean isSetSchemaName() {
-    return this.schemaName != null;
-  }
-
-  public void setSchemaNameIsSet(boolean value) {
-    if (!value) {
-      this.schemaName = null;
-    }
-  }
-
-  public String getFunctionName() {
-    return this.functionName;
-  }
-
-  public void setFunctionName(String functionName) {
-    this.functionName = functionName;
-  }
-
-  public void unsetFunctionName() {
-    this.functionName = null;
-  }
-
-  /** Returns true if field functionName is set (has been assigned a value) and false otherwise */
-  public boolean isSetFunctionName() {
-    return this.functionName != null;
-  }
-
-  public void setFunctionNameIsSet(boolean value) {
-    if (!value) {
-      this.functionName = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    case CATALOG_NAME:
-      if (value == null) {
-        unsetCatalogName();
-      } else {
-        setCatalogName((String)value);
-      }
-      break;
-
-    case SCHEMA_NAME:
-      if (value == null) {
-        unsetSchemaName();
-      } else {
-        setSchemaName((String)value);
-      }
-      break;
-
-    case FUNCTION_NAME:
-      if (value == null) {
-        unsetFunctionName();
-      } else {
-        setFunctionName((String)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    case CATALOG_NAME:
-      return getCatalogName();
-
-    case SCHEMA_NAME:
-      return getSchemaName();
-
-    case FUNCTION_NAME:
-      return getFunctionName();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    case CATALOG_NAME:
-      return isSetCatalogName();
-    case SCHEMA_NAME:
-      return isSetSchemaName();
-    case FUNCTION_NAME:
-      return isSetFunctionName();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetFunctionsReq)
-      return this.equals((TGetFunctionsReq)that);
-    return false;
-  }
-
-  public boolean equals(TGetFunctionsReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    boolean this_present_catalogName = true && this.isSetCatalogName();
-    boolean that_present_catalogName = true && that.isSetCatalogName();
-    if (this_present_catalogName || that_present_catalogName) {
-      if (!(this_present_catalogName && that_present_catalogName))
-        return false;
-      if (!this.catalogName.equals(that.catalogName))
-        return false;
-    }
-
-    boolean this_present_schemaName = true && this.isSetSchemaName();
-    boolean that_present_schemaName = true && that.isSetSchemaName();
-    if (this_present_schemaName || that_present_schemaName) {
-      if (!(this_present_schemaName && that_present_schemaName))
-        return false;
-      if (!this.schemaName.equals(that.schemaName))
-        return false;
-    }
-
-    boolean this_present_functionName = true && this.isSetFunctionName();
-    boolean that_present_functionName = true && that.isSetFunctionName();
-    if (this_present_functionName || that_present_functionName) {
-      if (!(this_present_functionName && that_present_functionName))
-        return false;
-      if (!this.functionName.equals(that.functionName))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    builder.append(present_sessionHandle);
-    if (present_sessionHandle)
-      builder.append(sessionHandle);
-
-    boolean present_catalogName = true && (isSetCatalogName());
-    builder.append(present_catalogName);
-    if (present_catalogName)
-      builder.append(catalogName);
-
-    boolean present_schemaName = true && (isSetSchemaName());
-    builder.append(present_schemaName);
-    if (present_schemaName)
-      builder.append(schemaName);
-
-    boolean present_functionName = true && (isSetFunctionName());
-    builder.append(present_functionName);
-    if (present_functionName)
-      builder.append(functionName);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TGetFunctionsReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TGetFunctionsReq typedOther = (TGetFunctionsReq)other;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetCatalogName()).compareTo(typedOther.isSetCatalogName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetCatalogName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.catalogName, typedOther.catalogName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetSchemaName()).compareTo(typedOther.isSetSchemaName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSchemaName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.schemaName, typedOther.schemaName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetFunctionName()).compareTo(typedOther.isSetFunctionName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetFunctionName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.functionName, typedOther.functionName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetFunctionsReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    if (isSetCatalogName()) {
-      if (!first) sb.append(", ");
-      sb.append("catalogName:");
-      if (this.catalogName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.catalogName);
-      }
-      first = false;
-    }
-    if (isSetSchemaName()) {
-      if (!first) sb.append(", ");
-      sb.append("schemaName:");
-      if (this.schemaName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.schemaName);
-      }
-      first = false;
-    }
-    if (!first) sb.append(", ");
-    sb.append("functionName:");
-    if (this.functionName == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.functionName);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    if (!isSetFunctionName()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'functionName' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetFunctionsReqStandardSchemeFactory implements SchemeFactory {
-    public TGetFunctionsReqStandardScheme getScheme() {
-      return new TGetFunctionsReqStandardScheme();
-    }
-  }
-
-  private static class TGetFunctionsReqStandardScheme extends StandardScheme<TGetFunctionsReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetFunctionsReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // CATALOG_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.catalogName = iprot.readString();
-              struct.setCatalogNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // SCHEMA_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.schemaName = iprot.readString();
-              struct.setSchemaNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 4: // FUNCTION_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.functionName = iprot.readString();
-              struct.setFunctionNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetFunctionsReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.catalogName != null) {
-        if (struct.isSetCatalogName()) {
-          oprot.writeFieldBegin(CATALOG_NAME_FIELD_DESC);
-          oprot.writeString(struct.catalogName);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.schemaName != null) {
-        if (struct.isSetSchemaName()) {
-          oprot.writeFieldBegin(SCHEMA_NAME_FIELD_DESC);
-          oprot.writeString(struct.schemaName);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.functionName != null) {
-        oprot.writeFieldBegin(FUNCTION_NAME_FIELD_DESC);
-        oprot.writeString(struct.functionName);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetFunctionsReqTupleSchemeFactory implements SchemeFactory {
-    public TGetFunctionsReqTupleScheme getScheme() {
-      return new TGetFunctionsReqTupleScheme();
-    }
-  }
-
-  private static class TGetFunctionsReqTupleScheme extends TupleScheme<TGetFunctionsReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetFunctionsReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-      oprot.writeString(struct.functionName);
-      BitSet optionals = new BitSet();
-      if (struct.isSetCatalogName()) {
-        optionals.set(0);
-      }
-      if (struct.isSetSchemaName()) {
-        optionals.set(1);
-      }
-      oprot.writeBitSet(optionals, 2);
-      if (struct.isSetCatalogName()) {
-        oprot.writeString(struct.catalogName);
-      }
-      if (struct.isSetSchemaName()) {
-        oprot.writeString(struct.schemaName);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetFunctionsReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-      struct.functionName = iprot.readString();
-      struct.setFunctionNameIsSet(true);
-      BitSet incoming = iprot.readBitSet(2);
-      if (incoming.get(0)) {
-        struct.catalogName = iprot.readString();
-        struct.setCatalogNameIsSet(true);
-      }
-      if (incoming.get(1)) {
-        struct.schemaName = iprot.readString();
-        struct.setSchemaNameIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsResp.java
deleted file mode 100644
index 3adafdacb54ef..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetFunctionsResp.java
+++ /dev/null
@@ -1,505 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TGetFunctionsResp implements org.apache.thrift.TBase<TGetFunctionsResp, TGetFunctionsResp._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetFunctionsResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetFunctionsRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetFunctionsRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private TOperationHandle operationHandle; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    OPERATION_HANDLE((short)2, "operationHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private _Fields optionals[] = {_Fields.OPERATION_HANDLE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetFunctionsResp.class, metaDataMap);
-  }
-
-  public TGetFunctionsResp() {
-  }
-
-  public TGetFunctionsResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetFunctionsResp(TGetFunctionsResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-  }
-
-  public TGetFunctionsResp deepCopy() {
-    return new TGetFunctionsResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.operationHandle = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetFunctionsResp)
-      return this.equals((TGetFunctionsResp)that);
-    return false;
-  }
-
-  public boolean equals(TGetFunctionsResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_status = true && (isSetStatus());
-    builder.append(present_status);
-    if (present_status)
-      builder.append(status);
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    builder.append(present_operationHandle);
-    if (present_operationHandle)
-      builder.append(operationHandle);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TGetFunctionsResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TGetFunctionsResp typedOther = (TGetFunctionsResp)other;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetFunctionsResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (isSetOperationHandle()) {
-      if (!first) sb.append(", ");
-      sb.append("operationHandle:");
-      if (this.operationHandle == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.operationHandle);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetFunctionsRespStandardSchemeFactory implements SchemeFactory {
-    public TGetFunctionsRespStandardScheme getScheme() {
-      return new TGetFunctionsRespStandardScheme();
-    }
-  }
-
-  private static class TGetFunctionsRespStandardScheme extends StandardScheme<TGetFunctionsResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetFunctionsResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetFunctionsResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.operationHandle != null) {
-        if (struct.isSetOperationHandle()) {
-          oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-          struct.operationHandle.write(oprot);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetFunctionsRespTupleSchemeFactory implements SchemeFactory {
-    public TGetFunctionsRespTupleScheme getScheme() {
-      return new TGetFunctionsRespTupleScheme();
-    }
-  }
-
-  private static class TGetFunctionsRespTupleScheme extends TupleScheme<TGetFunctionsResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetFunctionsResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetOperationHandle()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetOperationHandle()) {
-        struct.operationHandle.write(oprot);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetFunctionsResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.operationHandle = new TOperationHandle();
-        struct.operationHandle.read(iprot);
-        struct.setOperationHandleIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoReq.java
deleted file mode 100644
index 0139bf04ec7db..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoReq.java
+++ /dev/null
@@ -1,503 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TGetInfoReq implements org.apache.thrift.TBase<TGetInfoReq, TGetInfoReq._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetInfoReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField INFO_TYPE_FIELD_DESC = new org.apache.thrift.protocol.TField("infoType", org.apache.thrift.protocol.TType.I32, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetInfoReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetInfoReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-  private TGetInfoType infoType; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle"),
-    /**
-     * 
-     * @see TGetInfoType
-     */
-    INFO_TYPE((short)2, "infoType");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        case 2: // INFO_TYPE
-          return INFO_TYPE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    tmpMap.put(_Fields.INFO_TYPE, new org.apache.thrift.meta_data.FieldMetaData("infoType", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TGetInfoType.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetInfoReq.class, metaDataMap);
-  }
-
-  public TGetInfoReq() {
-  }
-
-  public TGetInfoReq(
-    TSessionHandle sessionHandle,
-    TGetInfoType infoType)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-    this.infoType = infoType;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetInfoReq(TGetInfoReq other) {
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-    if (other.isSetInfoType()) {
-      this.infoType = other.infoType;
-    }
-  }
-
-  public TGetInfoReq deepCopy() {
-    return new TGetInfoReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-    this.infoType = null;
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  /**
-   * 
-   * @see TGetInfoType
-   */
-  public TGetInfoType getInfoType() {
-    return this.infoType;
-  }
-
-  /**
-   * 
-   * @see TGetInfoType
-   */
-  public void setInfoType(TGetInfoType infoType) {
-    this.infoType = infoType;
-  }
-
-  public void unsetInfoType() {
-    this.infoType = null;
-  }
-
-  /** Returns true if field infoType is set (has been assigned a value) and false otherwise */
-  public boolean isSetInfoType() {
-    return this.infoType != null;
-  }
-
-  public void setInfoTypeIsSet(boolean value) {
-    if (!value) {
-      this.infoType = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    case INFO_TYPE:
-      if (value == null) {
-        unsetInfoType();
-      } else {
-        setInfoType((TGetInfoType)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    case INFO_TYPE:
-      return getInfoType();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    case INFO_TYPE:
-      return isSetInfoType();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetInfoReq)
-      return this.equals((TGetInfoReq)that);
-    return false;
-  }
-
-  public boolean equals(TGetInfoReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    boolean this_present_infoType = true && this.isSetInfoType();
-    boolean that_present_infoType = true && that.isSetInfoType();
-    if (this_present_infoType || that_present_infoType) {
-      if (!(this_present_infoType && that_present_infoType))
-        return false;
-      if (!this.infoType.equals(that.infoType))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    builder.append(present_sessionHandle);
-    if (present_sessionHandle)
-      builder.append(sessionHandle);
-
-    boolean present_infoType = true && (isSetInfoType());
-    builder.append(present_infoType);
-    if (present_infoType)
-      builder.append(infoType.getValue());
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TGetInfoReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TGetInfoReq typedOther = (TGetInfoReq)other;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetInfoType()).compareTo(typedOther.isSetInfoType());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetInfoType()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.infoType, typedOther.infoType);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetInfoReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("infoType:");
-    if (this.infoType == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.infoType);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    if (!isSetInfoType()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'infoType' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetInfoReqStandardSchemeFactory implements SchemeFactory {
-    public TGetInfoReqStandardScheme getScheme() {
-      return new TGetInfoReqStandardScheme();
-    }
-  }
-
-  private static class TGetInfoReqStandardScheme extends StandardScheme<TGetInfoReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetInfoReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // INFO_TYPE
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.infoType = TGetInfoType.findByValue(iprot.readI32());
-              struct.setInfoTypeIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetInfoReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.infoType != null) {
-        oprot.writeFieldBegin(INFO_TYPE_FIELD_DESC);
-        oprot.writeI32(struct.infoType.getValue());
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetInfoReqTupleSchemeFactory implements SchemeFactory {
-    public TGetInfoReqTupleScheme getScheme() {
-      return new TGetInfoReqTupleScheme();
-    }
-  }
-
-  private static class TGetInfoReqTupleScheme extends TupleScheme<TGetInfoReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetInfoReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-      oprot.writeI32(struct.infoType.getValue());
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetInfoReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-      struct.infoType = TGetInfoType.findByValue(iprot.readI32());
-      struct.setInfoTypeIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoResp.java
deleted file mode 100644
index 2faaa9211b3ba..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoResp.java
+++ /dev/null
@@ -1,493 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TGetInfoResp implements org.apache.thrift.TBase<TGetInfoResp, TGetInfoResp._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetInfoResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField INFO_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("infoValue", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetInfoRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetInfoRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private TGetInfoValue infoValue; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    INFO_VALUE((short)2, "infoValue");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // INFO_VALUE
-          return INFO_VALUE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.INFO_VALUE, new org.apache.thrift.meta_data.FieldMetaData("infoValue", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetInfoValue.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetInfoResp.class, metaDataMap);
-  }
-
-  public TGetInfoResp() {
-  }
-
-  public TGetInfoResp(
-    TStatus status,
-    TGetInfoValue infoValue)
-  {
-    this();
-    this.status = status;
-    this.infoValue = infoValue;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetInfoResp(TGetInfoResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetInfoValue()) {
-      this.infoValue = new TGetInfoValue(other.infoValue);
-    }
-  }
-
-  public TGetInfoResp deepCopy() {
-    return new TGetInfoResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.infoValue = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public TGetInfoValue getInfoValue() {
-    return this.infoValue;
-  }
-
-  public void setInfoValue(TGetInfoValue infoValue) {
-    this.infoValue = infoValue;
-  }
-
-  public void unsetInfoValue() {
-    this.infoValue = null;
-  }
-
-  /** Returns true if field infoValue is set (has been assigned a value) and false otherwise */
-  public boolean isSetInfoValue() {
-    return this.infoValue != null;
-  }
-
-  public void setInfoValueIsSet(boolean value) {
-    if (!value) {
-      this.infoValue = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case INFO_VALUE:
-      if (value == null) {
-        unsetInfoValue();
-      } else {
-        setInfoValue((TGetInfoValue)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case INFO_VALUE:
-      return getInfoValue();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case INFO_VALUE:
-      return isSetInfoValue();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetInfoResp)
-      return this.equals((TGetInfoResp)that);
-    return false;
-  }
-
-  public boolean equals(TGetInfoResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_infoValue = true && this.isSetInfoValue();
-    boolean that_present_infoValue = true && that.isSetInfoValue();
-    if (this_present_infoValue || that_present_infoValue) {
-      if (!(this_present_infoValue && that_present_infoValue))
-        return false;
-      if (!this.infoValue.equals(that.infoValue))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_status = true && (isSetStatus());
-    builder.append(present_status);
-    if (present_status)
-      builder.append(status);
-
-    boolean present_infoValue = true && (isSetInfoValue());
-    builder.append(present_infoValue);
-    if (present_infoValue)
-      builder.append(infoValue);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TGetInfoResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TGetInfoResp typedOther = (TGetInfoResp)other;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetInfoValue()).compareTo(typedOther.isSetInfoValue());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetInfoValue()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.infoValue, typedOther.infoValue);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetInfoResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("infoValue:");
-    if (this.infoValue == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.infoValue);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    if (!isSetInfoValue()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'infoValue' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetInfoRespStandardSchemeFactory implements SchemeFactory {
-    public TGetInfoRespStandardScheme getScheme() {
-      return new TGetInfoRespStandardScheme();
-    }
-  }
-
-  private static class TGetInfoRespStandardScheme extends StandardScheme<TGetInfoResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetInfoResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // INFO_VALUE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.infoValue = new TGetInfoValue();
-              struct.infoValue.read(iprot);
-              struct.setInfoValueIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetInfoResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.infoValue != null) {
-        oprot.writeFieldBegin(INFO_VALUE_FIELD_DESC);
-        struct.infoValue.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetInfoRespTupleSchemeFactory implements SchemeFactory {
-    public TGetInfoRespTupleScheme getScheme() {
-      return new TGetInfoRespTupleScheme();
-    }
-  }
-
-  private static class TGetInfoRespTupleScheme extends TupleScheme<TGetInfoResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetInfoResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      struct.infoValue.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetInfoResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      struct.infoValue = new TGetInfoValue();
-      struct.infoValue.read(iprot);
-      struct.setInfoValueIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoType.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoType.java
deleted file mode 100644
index d9dd62414f001..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoType.java
+++ /dev/null
@@ -1,180 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-
-import java.util.Map;
-import java.util.HashMap;
-import org.apache.thrift.TEnum;
-
-public enum TGetInfoType implements org.apache.thrift.TEnum {
-  CLI_MAX_DRIVER_CONNECTIONS(0),
-  CLI_MAX_CONCURRENT_ACTIVITIES(1),
-  CLI_DATA_SOURCE_NAME(2),
-  CLI_FETCH_DIRECTION(8),
-  CLI_SERVER_NAME(13),
-  CLI_SEARCH_PATTERN_ESCAPE(14),
-  CLI_DBMS_NAME(17),
-  CLI_DBMS_VER(18),
-  CLI_ACCESSIBLE_TABLES(19),
-  CLI_ACCESSIBLE_PROCEDURES(20),
-  CLI_CURSOR_COMMIT_BEHAVIOR(23),
-  CLI_DATA_SOURCE_READ_ONLY(25),
-  CLI_DEFAULT_TXN_ISOLATION(26),
-  CLI_IDENTIFIER_CASE(28),
-  CLI_IDENTIFIER_QUOTE_CHAR(29),
-  CLI_MAX_COLUMN_NAME_LEN(30),
-  CLI_MAX_CURSOR_NAME_LEN(31),
-  CLI_MAX_SCHEMA_NAME_LEN(32),
-  CLI_MAX_CATALOG_NAME_LEN(34),
-  CLI_MAX_TABLE_NAME_LEN(35),
-  CLI_SCROLL_CONCURRENCY(43),
-  CLI_TXN_CAPABLE(46),
-  CLI_USER_NAME(47),
-  CLI_TXN_ISOLATION_OPTION(72),
-  CLI_INTEGRITY(73),
-  CLI_GETDATA_EXTENSIONS(81),
-  CLI_NULL_COLLATION(85),
-  CLI_ALTER_TABLE(86),
-  CLI_ORDER_BY_COLUMNS_IN_SELECT(90),
-  CLI_SPECIAL_CHARACTERS(94),
-  CLI_MAX_COLUMNS_IN_GROUP_BY(97),
-  CLI_MAX_COLUMNS_IN_INDEX(98),
-  CLI_MAX_COLUMNS_IN_ORDER_BY(99),
-  CLI_MAX_COLUMNS_IN_SELECT(100),
-  CLI_MAX_COLUMNS_IN_TABLE(101),
-  CLI_MAX_INDEX_SIZE(102),
-  CLI_MAX_ROW_SIZE(104),
-  CLI_MAX_STATEMENT_LEN(105),
-  CLI_MAX_TABLES_IN_SELECT(106),
-  CLI_MAX_USER_NAME_LEN(107),
-  CLI_OJ_CAPABILITIES(115),
-  CLI_XOPEN_CLI_YEAR(10000),
-  CLI_CURSOR_SENSITIVITY(10001),
-  CLI_DESCRIBE_PARAMETER(10002),
-  CLI_CATALOG_NAME(10003),
-  CLI_COLLATION_SEQ(10004),
-  CLI_MAX_IDENTIFIER_LEN(10005);
-
-  private final int value;
-
-  private TGetInfoType(int value) {
-    this.value = value;
-  }
-
-  /**
-   * Get the integer value of this enum value, as defined in the Thrift IDL.
-   */
-  public int getValue() {
-    return value;
-  }
-
-  /**
-   * Find a the enum type by its integer value, as defined in the Thrift IDL.
-   * @return null if the value is not found.
-   */
-  public static TGetInfoType findByValue(int value) { 
-    switch (value) {
-      case 0:
-        return CLI_MAX_DRIVER_CONNECTIONS;
-      case 1:
-        return CLI_MAX_CONCURRENT_ACTIVITIES;
-      case 2:
-        return CLI_DATA_SOURCE_NAME;
-      case 8:
-        return CLI_FETCH_DIRECTION;
-      case 13:
-        return CLI_SERVER_NAME;
-      case 14:
-        return CLI_SEARCH_PATTERN_ESCAPE;
-      case 17:
-        return CLI_DBMS_NAME;
-      case 18:
-        return CLI_DBMS_VER;
-      case 19:
-        return CLI_ACCESSIBLE_TABLES;
-      case 20:
-        return CLI_ACCESSIBLE_PROCEDURES;
-      case 23:
-        return CLI_CURSOR_COMMIT_BEHAVIOR;
-      case 25:
-        return CLI_DATA_SOURCE_READ_ONLY;
-      case 26:
-        return CLI_DEFAULT_TXN_ISOLATION;
-      case 28:
-        return CLI_IDENTIFIER_CASE;
-      case 29:
-        return CLI_IDENTIFIER_QUOTE_CHAR;
-      case 30:
-        return CLI_MAX_COLUMN_NAME_LEN;
-      case 31:
-        return CLI_MAX_CURSOR_NAME_LEN;
-      case 32:
-        return CLI_MAX_SCHEMA_NAME_LEN;
-      case 34:
-        return CLI_MAX_CATALOG_NAME_LEN;
-      case 35:
-        return CLI_MAX_TABLE_NAME_LEN;
-      case 43:
-        return CLI_SCROLL_CONCURRENCY;
-      case 46:
-        return CLI_TXN_CAPABLE;
-      case 47:
-        return CLI_USER_NAME;
-      case 72:
-        return CLI_TXN_ISOLATION_OPTION;
-      case 73:
-        return CLI_INTEGRITY;
-      case 81:
-        return CLI_GETDATA_EXTENSIONS;
-      case 85:
-        return CLI_NULL_COLLATION;
-      case 86:
-        return CLI_ALTER_TABLE;
-      case 90:
-        return CLI_ORDER_BY_COLUMNS_IN_SELECT;
-      case 94:
-        return CLI_SPECIAL_CHARACTERS;
-      case 97:
-        return CLI_MAX_COLUMNS_IN_GROUP_BY;
-      case 98:
-        return CLI_MAX_COLUMNS_IN_INDEX;
-      case 99:
-        return CLI_MAX_COLUMNS_IN_ORDER_BY;
-      case 100:
-        return CLI_MAX_COLUMNS_IN_SELECT;
-      case 101:
-        return CLI_MAX_COLUMNS_IN_TABLE;
-      case 102:
-        return CLI_MAX_INDEX_SIZE;
-      case 104:
-        return CLI_MAX_ROW_SIZE;
-      case 105:
-        return CLI_MAX_STATEMENT_LEN;
-      case 106:
-        return CLI_MAX_TABLES_IN_SELECT;
-      case 107:
-        return CLI_MAX_USER_NAME_LEN;
-      case 115:
-        return CLI_OJ_CAPABILITIES;
-      case 10000:
-        return CLI_XOPEN_CLI_YEAR;
-      case 10001:
-        return CLI_CURSOR_SENSITIVITY;
-      case 10002:
-        return CLI_DESCRIBE_PARAMETER;
-      case 10003:
-        return CLI_CATALOG_NAME;
-      case 10004:
-        return CLI_COLLATION_SEQ;
-      case 10005:
-        return CLI_MAX_IDENTIFIER_LEN;
-      default:
-        return null;
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoValue.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoValue.java
deleted file mode 100644
index fe2a211c46309..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetInfoValue.java
+++ /dev/null
@@ -1,593 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TGetInfoValue extends org.apache.thrift.TUnion<TGetInfoValue, TGetInfoValue._Fields> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetInfoValue");
-  private static final org.apache.thrift.protocol.TField STRING_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("stringValue", org.apache.thrift.protocol.TType.STRING, (short)1);
-  private static final org.apache.thrift.protocol.TField SMALL_INT_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("smallIntValue", org.apache.thrift.protocol.TType.I16, (short)2);
-  private static final org.apache.thrift.protocol.TField INTEGER_BITMASK_FIELD_DESC = new org.apache.thrift.protocol.TField("integerBitmask", org.apache.thrift.protocol.TType.I32, (short)3);
-  private static final org.apache.thrift.protocol.TField INTEGER_FLAG_FIELD_DESC = new org.apache.thrift.protocol.TField("integerFlag", org.apache.thrift.protocol.TType.I32, (short)4);
-  private static final org.apache.thrift.protocol.TField BINARY_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("binaryValue", org.apache.thrift.protocol.TType.I32, (short)5);
-  private static final org.apache.thrift.protocol.TField LEN_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("lenValue", org.apache.thrift.protocol.TType.I64, (short)6);
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STRING_VALUE((short)1, "stringValue"),
-    SMALL_INT_VALUE((short)2, "smallIntValue"),
-    INTEGER_BITMASK((short)3, "integerBitmask"),
-    INTEGER_FLAG((short)4, "integerFlag"),
-    BINARY_VALUE((short)5, "binaryValue"),
-    LEN_VALUE((short)6, "lenValue");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STRING_VALUE
-          return STRING_VALUE;
-        case 2: // SMALL_INT_VALUE
-          return SMALL_INT_VALUE;
-        case 3: // INTEGER_BITMASK
-          return INTEGER_BITMASK;
-        case 4: // INTEGER_FLAG
-          return INTEGER_FLAG;
-        case 5: // BINARY_VALUE
-          return BINARY_VALUE;
-        case 6: // LEN_VALUE
-          return LEN_VALUE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STRING_VALUE, new org.apache.thrift.meta_data.FieldMetaData("stringValue", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    tmpMap.put(_Fields.SMALL_INT_VALUE, new org.apache.thrift.meta_data.FieldMetaData("smallIntValue", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I16)));
-    tmpMap.put(_Fields.INTEGER_BITMASK, new org.apache.thrift.meta_data.FieldMetaData("integerBitmask", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)));
-    tmpMap.put(_Fields.INTEGER_FLAG, new org.apache.thrift.meta_data.FieldMetaData("integerFlag", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)));
-    tmpMap.put(_Fields.BINARY_VALUE, new org.apache.thrift.meta_data.FieldMetaData("binaryValue", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)));
-    tmpMap.put(_Fields.LEN_VALUE, new org.apache.thrift.meta_data.FieldMetaData("lenValue", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetInfoValue.class, metaDataMap);
-  }
-
-  public TGetInfoValue() {
-    super();
-  }
-
-  public TGetInfoValue(TGetInfoValue._Fields setField, Object value) {
-    super(setField, value);
-  }
-
-  public TGetInfoValue(TGetInfoValue other) {
-    super(other);
-  }
-  public TGetInfoValue deepCopy() {
-    return new TGetInfoValue(this);
-  }
-
-  public static TGetInfoValue stringValue(String value) {
-    TGetInfoValue x = new TGetInfoValue();
-    x.setStringValue(value);
-    return x;
-  }
-
-  public static TGetInfoValue smallIntValue(short value) {
-    TGetInfoValue x = new TGetInfoValue();
-    x.setSmallIntValue(value);
-    return x;
-  }
-
-  public static TGetInfoValue integerBitmask(int value) {
-    TGetInfoValue x = new TGetInfoValue();
-    x.setIntegerBitmask(value);
-    return x;
-  }
-
-  public static TGetInfoValue integerFlag(int value) {
-    TGetInfoValue x = new TGetInfoValue();
-    x.setIntegerFlag(value);
-    return x;
-  }
-
-  public static TGetInfoValue binaryValue(int value) {
-    TGetInfoValue x = new TGetInfoValue();
-    x.setBinaryValue(value);
-    return x;
-  }
-
-  public static TGetInfoValue lenValue(long value) {
-    TGetInfoValue x = new TGetInfoValue();
-    x.setLenValue(value);
-    return x;
-  }
-
-
-  @Override
-  protected void checkType(_Fields setField, Object value) throws ClassCastException {
-    switch (setField) {
-      case STRING_VALUE:
-        if (value instanceof String) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type String for field 'stringValue', but got " + value.getClass().getSimpleName());
-      case SMALL_INT_VALUE:
-        if (value instanceof Short) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type Short for field 'smallIntValue', but got " + value.getClass().getSimpleName());
-      case INTEGER_BITMASK:
-        if (value instanceof Integer) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type Integer for field 'integerBitmask', but got " + value.getClass().getSimpleName());
-      case INTEGER_FLAG:
-        if (value instanceof Integer) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type Integer for field 'integerFlag', but got " + value.getClass().getSimpleName());
-      case BINARY_VALUE:
-        if (value instanceof Integer) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type Integer for field 'binaryValue', but got " + value.getClass().getSimpleName());
-      case LEN_VALUE:
-        if (value instanceof Long) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type Long for field 'lenValue', but got " + value.getClass().getSimpleName());
-      default:
-        throw new IllegalArgumentException("Unknown field id " + setField);
-    }
-  }
-
-  @Override
-  protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TField field) throws org.apache.thrift.TException {
-    _Fields setField = _Fields.findByThriftId(field.id);
-    if (setField != null) {
-      switch (setField) {
-        case STRING_VALUE:
-          if (field.type == STRING_VALUE_FIELD_DESC.type) {
-            String stringValue;
-            stringValue = iprot.readString();
-            return stringValue;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case SMALL_INT_VALUE:
-          if (field.type == SMALL_INT_VALUE_FIELD_DESC.type) {
-            Short smallIntValue;
-            smallIntValue = iprot.readI16();
-            return smallIntValue;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case INTEGER_BITMASK:
-          if (field.type == INTEGER_BITMASK_FIELD_DESC.type) {
-            Integer integerBitmask;
-            integerBitmask = iprot.readI32();
-            return integerBitmask;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case INTEGER_FLAG:
-          if (field.type == INTEGER_FLAG_FIELD_DESC.type) {
-            Integer integerFlag;
-            integerFlag = iprot.readI32();
-            return integerFlag;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case BINARY_VALUE:
-          if (field.type == BINARY_VALUE_FIELD_DESC.type) {
-            Integer binaryValue;
-            binaryValue = iprot.readI32();
-            return binaryValue;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case LEN_VALUE:
-          if (field.type == LEN_VALUE_FIELD_DESC.type) {
-            Long lenValue;
-            lenValue = iprot.readI64();
-            return lenValue;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        default:
-          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
-      }
-    } else {
-      return null;
-    }
-  }
-
-  @Override
-  protected void standardSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    switch (setField_) {
-      case STRING_VALUE:
-        String stringValue = (String)value_;
-        oprot.writeString(stringValue);
-        return;
-      case SMALL_INT_VALUE:
-        Short smallIntValue = (Short)value_;
-        oprot.writeI16(smallIntValue);
-        return;
-      case INTEGER_BITMASK:
-        Integer integerBitmask = (Integer)value_;
-        oprot.writeI32(integerBitmask);
-        return;
-      case INTEGER_FLAG:
-        Integer integerFlag = (Integer)value_;
-        oprot.writeI32(integerFlag);
-        return;
-      case BINARY_VALUE:
-        Integer binaryValue = (Integer)value_;
-        oprot.writeI32(binaryValue);
-        return;
-      case LEN_VALUE:
-        Long lenValue = (Long)value_;
-        oprot.writeI64(lenValue);
-        return;
-      default:
-        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
-    }
-  }
-
-  @Override
-  protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, short fieldID) throws org.apache.thrift.TException {
-    _Fields setField = _Fields.findByThriftId(fieldID);
-    if (setField != null) {
-      switch (setField) {
-        case STRING_VALUE:
-          String stringValue;
-          stringValue = iprot.readString();
-          return stringValue;
-        case SMALL_INT_VALUE:
-          Short smallIntValue;
-          smallIntValue = iprot.readI16();
-          return smallIntValue;
-        case INTEGER_BITMASK:
-          Integer integerBitmask;
-          integerBitmask = iprot.readI32();
-          return integerBitmask;
-        case INTEGER_FLAG:
-          Integer integerFlag;
-          integerFlag = iprot.readI32();
-          return integerFlag;
-        case BINARY_VALUE:
-          Integer binaryValue;
-          binaryValue = iprot.readI32();
-          return binaryValue;
-        case LEN_VALUE:
-          Long lenValue;
-          lenValue = iprot.readI64();
-          return lenValue;
-        default:
-          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
-      }
-    } else {
-      throw new TProtocolException("Couldn't find a field with field id " + fieldID);
-    }
-  }
-
-  @Override
-  protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    switch (setField_) {
-      case STRING_VALUE:
-        String stringValue = (String)value_;
-        oprot.writeString(stringValue);
-        return;
-      case SMALL_INT_VALUE:
-        Short smallIntValue = (Short)value_;
-        oprot.writeI16(smallIntValue);
-        return;
-      case INTEGER_BITMASK:
-        Integer integerBitmask = (Integer)value_;
-        oprot.writeI32(integerBitmask);
-        return;
-      case INTEGER_FLAG:
-        Integer integerFlag = (Integer)value_;
-        oprot.writeI32(integerFlag);
-        return;
-      case BINARY_VALUE:
-        Integer binaryValue = (Integer)value_;
-        oprot.writeI32(binaryValue);
-        return;
-      case LEN_VALUE:
-        Long lenValue = (Long)value_;
-        oprot.writeI64(lenValue);
-        return;
-      default:
-        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
-    }
-  }
-
-  @Override
-  protected org.apache.thrift.protocol.TField getFieldDesc(_Fields setField) {
-    switch (setField) {
-      case STRING_VALUE:
-        return STRING_VALUE_FIELD_DESC;
-      case SMALL_INT_VALUE:
-        return SMALL_INT_VALUE_FIELD_DESC;
-      case INTEGER_BITMASK:
-        return INTEGER_BITMASK_FIELD_DESC;
-      case INTEGER_FLAG:
-        return INTEGER_FLAG_FIELD_DESC;
-      case BINARY_VALUE:
-        return BINARY_VALUE_FIELD_DESC;
-      case LEN_VALUE:
-        return LEN_VALUE_FIELD_DESC;
-      default:
-        throw new IllegalArgumentException("Unknown field id " + setField);
-    }
-  }
-
-  @Override
-  protected org.apache.thrift.protocol.TStruct getStructDesc() {
-    return STRUCT_DESC;
-  }
-
-  @Override
-  protected _Fields enumForId(short id) {
-    return _Fields.findByThriftIdOrThrow(id);
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-
-  public String getStringValue() {
-    if (getSetField() == _Fields.STRING_VALUE) {
-      return (String)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'stringValue' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setStringValue(String value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.STRING_VALUE;
-    value_ = value;
-  }
-
-  public short getSmallIntValue() {
-    if (getSetField() == _Fields.SMALL_INT_VALUE) {
-      return (Short)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'smallIntValue' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setSmallIntValue(short value) {
-    setField_ = _Fields.SMALL_INT_VALUE;
-    value_ = value;
-  }
-
-  public int getIntegerBitmask() {
-    if (getSetField() == _Fields.INTEGER_BITMASK) {
-      return (Integer)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'integerBitmask' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setIntegerBitmask(int value) {
-    setField_ = _Fields.INTEGER_BITMASK;
-    value_ = value;
-  }
-
-  public int getIntegerFlag() {
-    if (getSetField() == _Fields.INTEGER_FLAG) {
-      return (Integer)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'integerFlag' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setIntegerFlag(int value) {
-    setField_ = _Fields.INTEGER_FLAG;
-    value_ = value;
-  }
-
-  public int getBinaryValue() {
-    if (getSetField() == _Fields.BINARY_VALUE) {
-      return (Integer)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'binaryValue' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setBinaryValue(int value) {
-    setField_ = _Fields.BINARY_VALUE;
-    value_ = value;
-  }
-
-  public long getLenValue() {
-    if (getSetField() == _Fields.LEN_VALUE) {
-      return (Long)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'lenValue' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setLenValue(long value) {
-    setField_ = _Fields.LEN_VALUE;
-    value_ = value;
-  }
-
-  public boolean isSetStringValue() {
-    return setField_ == _Fields.STRING_VALUE;
-  }
-
-
-  public boolean isSetSmallIntValue() {
-    return setField_ == _Fields.SMALL_INT_VALUE;
-  }
-
-
-  public boolean isSetIntegerBitmask() {
-    return setField_ == _Fields.INTEGER_BITMASK;
-  }
-
-
-  public boolean isSetIntegerFlag() {
-    return setField_ == _Fields.INTEGER_FLAG;
-  }
-
-
-  public boolean isSetBinaryValue() {
-    return setField_ == _Fields.BINARY_VALUE;
-  }
-
-
-  public boolean isSetLenValue() {
-    return setField_ == _Fields.LEN_VALUE;
-  }
-
-
-  public boolean equals(Object other) {
-    if (other instanceof TGetInfoValue) {
-      return equals((TGetInfoValue)other);
-    } else {
-      return false;
-    }
-  }
-
-  public boolean equals(TGetInfoValue other) {
-    return other != null && getSetField() == other.getSetField() && getFieldValue().equals(other.getFieldValue());
-  }
-
-  @Override
-  public int compareTo(TGetInfoValue other) {
-    int lastComparison = org.apache.thrift.TBaseHelper.compareTo(getSetField(), other.getSetField());
-    if (lastComparison == 0) {
-      return org.apache.thrift.TBaseHelper.compareTo(getFieldValue(), other.getFieldValue());
-    }
-    return lastComparison;
-  }
-
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder hcb = new HashCodeBuilder();
-    hcb.append(this.getClass().getName());
-    org.apache.thrift.TFieldIdEnum setField = getSetField();
-    if (setField != null) {
-      hcb.append(setField.getThriftFieldId());
-      Object value = getFieldValue();
-      if (value instanceof org.apache.thrift.TEnum) {
-        hcb.append(((org.apache.thrift.TEnum)getFieldValue()).getValue());
-      } else {
-        hcb.append(value);
-      }
-    }
-    return hcb.toHashCode();
-  }
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-
-}
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusReq.java
deleted file mode 100644
index b88591ea1945b..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusReq.java
+++ /dev/null
@@ -1,390 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TGetOperationStatusReq implements org.apache.thrift.TBase<TGetOperationStatusReq, TGetOperationStatusReq._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetOperationStatusReq");
-
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetOperationStatusReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetOperationStatusReqTupleSchemeFactory());
-  }
-
-  private TOperationHandle operationHandle; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    OPERATION_HANDLE((short)1, "operationHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetOperationStatusReq.class, metaDataMap);
-  }
-
-  public TGetOperationStatusReq() {
-  }
-
-  public TGetOperationStatusReq(
-    TOperationHandle operationHandle)
-  {
-    this();
-    this.operationHandle = operationHandle;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetOperationStatusReq(TGetOperationStatusReq other) {
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-  }
-
-  public TGetOperationStatusReq deepCopy() {
-    return new TGetOperationStatusReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.operationHandle = null;
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetOperationStatusReq)
-      return this.equals((TGetOperationStatusReq)that);
-    return false;
-  }
-
-  public boolean equals(TGetOperationStatusReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    builder.append(present_operationHandle);
-    if (present_operationHandle)
-      builder.append(operationHandle);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TGetOperationStatusReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TGetOperationStatusReq typedOther = (TGetOperationStatusReq)other;
-
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetOperationStatusReq(");
-    boolean first = true;
-
-    sb.append("operationHandle:");
-    if (this.operationHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.operationHandle);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetOperationHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationHandle' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetOperationStatusReqStandardSchemeFactory implements SchemeFactory {
-    public TGetOperationStatusReqStandardScheme getScheme() {
-      return new TGetOperationStatusReqStandardScheme();
-    }
-  }
-
-  private static class TGetOperationStatusReqStandardScheme extends StandardScheme<TGetOperationStatusReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetOperationStatusReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetOperationStatusReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.operationHandle != null) {
-        oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-        struct.operationHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetOperationStatusReqTupleSchemeFactory implements SchemeFactory {
-    public TGetOperationStatusReqTupleScheme getScheme() {
-      return new TGetOperationStatusReqTupleScheme();
-    }
-  }
-
-  private static class TGetOperationStatusReqTupleScheme extends TupleScheme<TGetOperationStatusReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetOperationStatusReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.operationHandle.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetOperationStatusReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.operationHandle = new TOperationHandle();
-      struct.operationHandle.read(iprot);
-      struct.setOperationHandleIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusResp.java
deleted file mode 100644
index 94ba6bb1146de..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetOperationStatusResp.java
+++ /dev/null
@@ -1,827 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TGetOperationStatusResp implements org.apache.thrift.TBase<TGetOperationStatusResp, TGetOperationStatusResp._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetOperationStatusResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField OPERATION_STATE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationState", org.apache.thrift.protocol.TType.I32, (short)2);
-  private static final org.apache.thrift.protocol.TField SQL_STATE_FIELD_DESC = new org.apache.thrift.protocol.TField("sqlState", org.apache.thrift.protocol.TType.STRING, (short)3);
-  private static final org.apache.thrift.protocol.TField ERROR_CODE_FIELD_DESC = new org.apache.thrift.protocol.TField("errorCode", org.apache.thrift.protocol.TType.I32, (short)4);
-  private static final org.apache.thrift.protocol.TField ERROR_MESSAGE_FIELD_DESC = new org.apache.thrift.protocol.TField("errorMessage", org.apache.thrift.protocol.TType.STRING, (short)5);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetOperationStatusRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetOperationStatusRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private TOperationState operationState; // optional
-  private String sqlState; // optional
-  private int errorCode; // optional
-  private String errorMessage; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    /**
-     * 
-     * @see TOperationState
-     */
-    OPERATION_STATE((short)2, "operationState"),
-    SQL_STATE((short)3, "sqlState"),
-    ERROR_CODE((short)4, "errorCode"),
-    ERROR_MESSAGE((short)5, "errorMessage");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // OPERATION_STATE
-          return OPERATION_STATE;
-        case 3: // SQL_STATE
-          return SQL_STATE;
-        case 4: // ERROR_CODE
-          return ERROR_CODE;
-        case 5: // ERROR_MESSAGE
-          return ERROR_MESSAGE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __ERRORCODE_ISSET_ID = 0;
-  private byte __isset_bitfield = 0;
-  private _Fields optionals[] = {_Fields.OPERATION_STATE,_Fields.SQL_STATE,_Fields.ERROR_CODE,_Fields.ERROR_MESSAGE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.OPERATION_STATE, new org.apache.thrift.meta_data.FieldMetaData("operationState", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TOperationState.class)));
-    tmpMap.put(_Fields.SQL_STATE, new org.apache.thrift.meta_data.FieldMetaData("sqlState", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    tmpMap.put(_Fields.ERROR_CODE, new org.apache.thrift.meta_data.FieldMetaData("errorCode", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)));
-    tmpMap.put(_Fields.ERROR_MESSAGE, new org.apache.thrift.meta_data.FieldMetaData("errorMessage", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetOperationStatusResp.class, metaDataMap);
-  }
-
-  public TGetOperationStatusResp() {
-  }
-
-  public TGetOperationStatusResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetOperationStatusResp(TGetOperationStatusResp other) {
-    __isset_bitfield = other.__isset_bitfield;
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetOperationState()) {
-      this.operationState = other.operationState;
-    }
-    if (other.isSetSqlState()) {
-      this.sqlState = other.sqlState;
-    }
-    this.errorCode = other.errorCode;
-    if (other.isSetErrorMessage()) {
-      this.errorMessage = other.errorMessage;
-    }
-  }
-
-  public TGetOperationStatusResp deepCopy() {
-    return new TGetOperationStatusResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.operationState = null;
-    this.sqlState = null;
-    setErrorCodeIsSet(false);
-    this.errorCode = 0;
-    this.errorMessage = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  /**
-   * 
-   * @see TOperationState
-   */
-  public TOperationState getOperationState() {
-    return this.operationState;
-  }
-
-  /**
-   * 
-   * @see TOperationState
-   */
-  public void setOperationState(TOperationState operationState) {
-    this.operationState = operationState;
-  }
-
-  public void unsetOperationState() {
-    this.operationState = null;
-  }
-
-  /** Returns true if field operationState is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationState() {
-    return this.operationState != null;
-  }
-
-  public void setOperationStateIsSet(boolean value) {
-    if (!value) {
-      this.operationState = null;
-    }
-  }
-
-  public String getSqlState() {
-    return this.sqlState;
-  }
-
-  public void setSqlState(String sqlState) {
-    this.sqlState = sqlState;
-  }
-
-  public void unsetSqlState() {
-    this.sqlState = null;
-  }
-
-  /** Returns true if field sqlState is set (has been assigned a value) and false otherwise */
-  public boolean isSetSqlState() {
-    return this.sqlState != null;
-  }
-
-  public void setSqlStateIsSet(boolean value) {
-    if (!value) {
-      this.sqlState = null;
-    }
-  }
-
-  public int getErrorCode() {
-    return this.errorCode;
-  }
-
-  public void setErrorCode(int errorCode) {
-    this.errorCode = errorCode;
-    setErrorCodeIsSet(true);
-  }
-
-  public void unsetErrorCode() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __ERRORCODE_ISSET_ID);
-  }
-
-  /** Returns true if field errorCode is set (has been assigned a value) and false otherwise */
-  public boolean isSetErrorCode() {
-    return EncodingUtils.testBit(__isset_bitfield, __ERRORCODE_ISSET_ID);
-  }
-
-  public void setErrorCodeIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __ERRORCODE_ISSET_ID, value);
-  }
-
-  public String getErrorMessage() {
-    return this.errorMessage;
-  }
-
-  public void setErrorMessage(String errorMessage) {
-    this.errorMessage = errorMessage;
-  }
-
-  public void unsetErrorMessage() {
-    this.errorMessage = null;
-  }
-
-  /** Returns true if field errorMessage is set (has been assigned a value) and false otherwise */
-  public boolean isSetErrorMessage() {
-    return this.errorMessage != null;
-  }
-
-  public void setErrorMessageIsSet(boolean value) {
-    if (!value) {
-      this.errorMessage = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case OPERATION_STATE:
-      if (value == null) {
-        unsetOperationState();
-      } else {
-        setOperationState((TOperationState)value);
-      }
-      break;
-
-    case SQL_STATE:
-      if (value == null) {
-        unsetSqlState();
-      } else {
-        setSqlState((String)value);
-      }
-      break;
-
-    case ERROR_CODE:
-      if (value == null) {
-        unsetErrorCode();
-      } else {
-        setErrorCode((Integer)value);
-      }
-      break;
-
-    case ERROR_MESSAGE:
-      if (value == null) {
-        unsetErrorMessage();
-      } else {
-        setErrorMessage((String)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case OPERATION_STATE:
-      return getOperationState();
-
-    case SQL_STATE:
-      return getSqlState();
-
-    case ERROR_CODE:
-      return Integer.valueOf(getErrorCode());
-
-    case ERROR_MESSAGE:
-      return getErrorMessage();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case OPERATION_STATE:
-      return isSetOperationState();
-    case SQL_STATE:
-      return isSetSqlState();
-    case ERROR_CODE:
-      return isSetErrorCode();
-    case ERROR_MESSAGE:
-      return isSetErrorMessage();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetOperationStatusResp)
-      return this.equals((TGetOperationStatusResp)that);
-    return false;
-  }
-
-  public boolean equals(TGetOperationStatusResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_operationState = true && this.isSetOperationState();
-    boolean that_present_operationState = true && that.isSetOperationState();
-    if (this_present_operationState || that_present_operationState) {
-      if (!(this_present_operationState && that_present_operationState))
-        return false;
-      if (!this.operationState.equals(that.operationState))
-        return false;
-    }
-
-    boolean this_present_sqlState = true && this.isSetSqlState();
-    boolean that_present_sqlState = true && that.isSetSqlState();
-    if (this_present_sqlState || that_present_sqlState) {
-      if (!(this_present_sqlState && that_present_sqlState))
-        return false;
-      if (!this.sqlState.equals(that.sqlState))
-        return false;
-    }
-
-    boolean this_present_errorCode = true && this.isSetErrorCode();
-    boolean that_present_errorCode = true && that.isSetErrorCode();
-    if (this_present_errorCode || that_present_errorCode) {
-      if (!(this_present_errorCode && that_present_errorCode))
-        return false;
-      if (this.errorCode != that.errorCode)
-        return false;
-    }
-
-    boolean this_present_errorMessage = true && this.isSetErrorMessage();
-    boolean that_present_errorMessage = true && that.isSetErrorMessage();
-    if (this_present_errorMessage || that_present_errorMessage) {
-      if (!(this_present_errorMessage && that_present_errorMessage))
-        return false;
-      if (!this.errorMessage.equals(that.errorMessage))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_status = true && (isSetStatus());
-    builder.append(present_status);
-    if (present_status)
-      builder.append(status);
-
-    boolean present_operationState = true && (isSetOperationState());
-    builder.append(present_operationState);
-    if (present_operationState)
-      builder.append(operationState.getValue());
-
-    boolean present_sqlState = true && (isSetSqlState());
-    builder.append(present_sqlState);
-    if (present_sqlState)
-      builder.append(sqlState);
-
-    boolean present_errorCode = true && (isSetErrorCode());
-    builder.append(present_errorCode);
-    if (present_errorCode)
-      builder.append(errorCode);
-
-    boolean present_errorMessage = true && (isSetErrorMessage());
-    builder.append(present_errorMessage);
-    if (present_errorMessage)
-      builder.append(errorMessage);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TGetOperationStatusResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TGetOperationStatusResp typedOther = (TGetOperationStatusResp)other;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOperationState()).compareTo(typedOther.isSetOperationState());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationState()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationState, typedOther.operationState);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetSqlState()).compareTo(typedOther.isSetSqlState());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSqlState()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sqlState, typedOther.sqlState);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetErrorCode()).compareTo(typedOther.isSetErrorCode());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetErrorCode()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.errorCode, typedOther.errorCode);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetErrorMessage()).compareTo(typedOther.isSetErrorMessage());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetErrorMessage()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.errorMessage, typedOther.errorMessage);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetOperationStatusResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (isSetOperationState()) {
-      if (!first) sb.append(", ");
-      sb.append("operationState:");
-      if (this.operationState == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.operationState);
-      }
-      first = false;
-    }
-    if (isSetSqlState()) {
-      if (!first) sb.append(", ");
-      sb.append("sqlState:");
-      if (this.sqlState == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.sqlState);
-      }
-      first = false;
-    }
-    if (isSetErrorCode()) {
-      if (!first) sb.append(", ");
-      sb.append("errorCode:");
-      sb.append(this.errorCode);
-      first = false;
-    }
-    if (isSetErrorMessage()) {
-      if (!first) sb.append(", ");
-      sb.append("errorMessage:");
-      if (this.errorMessage == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.errorMessage);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetOperationStatusRespStandardSchemeFactory implements SchemeFactory {
-    public TGetOperationStatusRespStandardScheme getScheme() {
-      return new TGetOperationStatusRespStandardScheme();
-    }
-  }
-
-  private static class TGetOperationStatusRespStandardScheme extends StandardScheme<TGetOperationStatusResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetOperationStatusResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // OPERATION_STATE
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.operationState = TOperationState.findByValue(iprot.readI32());
-              struct.setOperationStateIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // SQL_STATE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.sqlState = iprot.readString();
-              struct.setSqlStateIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 4: // ERROR_CODE
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.errorCode = iprot.readI32();
-              struct.setErrorCodeIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 5: // ERROR_MESSAGE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.errorMessage = iprot.readString();
-              struct.setErrorMessageIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetOperationStatusResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.operationState != null) {
-        if (struct.isSetOperationState()) {
-          oprot.writeFieldBegin(OPERATION_STATE_FIELD_DESC);
-          oprot.writeI32(struct.operationState.getValue());
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.sqlState != null) {
-        if (struct.isSetSqlState()) {
-          oprot.writeFieldBegin(SQL_STATE_FIELD_DESC);
-          oprot.writeString(struct.sqlState);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.isSetErrorCode()) {
-        oprot.writeFieldBegin(ERROR_CODE_FIELD_DESC);
-        oprot.writeI32(struct.errorCode);
-        oprot.writeFieldEnd();
-      }
-      if (struct.errorMessage != null) {
-        if (struct.isSetErrorMessage()) {
-          oprot.writeFieldBegin(ERROR_MESSAGE_FIELD_DESC);
-          oprot.writeString(struct.errorMessage);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetOperationStatusRespTupleSchemeFactory implements SchemeFactory {
-    public TGetOperationStatusRespTupleScheme getScheme() {
-      return new TGetOperationStatusRespTupleScheme();
-    }
-  }
-
-  private static class TGetOperationStatusRespTupleScheme extends TupleScheme<TGetOperationStatusResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetOperationStatusResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetOperationState()) {
-        optionals.set(0);
-      }
-      if (struct.isSetSqlState()) {
-        optionals.set(1);
-      }
-      if (struct.isSetErrorCode()) {
-        optionals.set(2);
-      }
-      if (struct.isSetErrorMessage()) {
-        optionals.set(3);
-      }
-      oprot.writeBitSet(optionals, 4);
-      if (struct.isSetOperationState()) {
-        oprot.writeI32(struct.operationState.getValue());
-      }
-      if (struct.isSetSqlState()) {
-        oprot.writeString(struct.sqlState);
-      }
-      if (struct.isSetErrorCode()) {
-        oprot.writeI32(struct.errorCode);
-      }
-      if (struct.isSetErrorMessage()) {
-        oprot.writeString(struct.errorMessage);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetOperationStatusResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      BitSet incoming = iprot.readBitSet(4);
-      if (incoming.get(0)) {
-        struct.operationState = TOperationState.findByValue(iprot.readI32());
-        struct.setOperationStateIsSet(true);
-      }
-      if (incoming.get(1)) {
-        struct.sqlState = iprot.readString();
-        struct.setSqlStateIsSet(true);
-      }
-      if (incoming.get(2)) {
-        struct.errorCode = iprot.readI32();
-        struct.setErrorCodeIsSet(true);
-      }
-      if (incoming.get(3)) {
-        struct.errorMessage = iprot.readString();
-        struct.setErrorMessageIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataReq.java
deleted file mode 100644
index 3bf363c958468..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataReq.java
+++ /dev/null
@@ -1,390 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TGetResultSetMetadataReq implements org.apache.thrift.TBase<TGetResultSetMetadataReq, TGetResultSetMetadataReq._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetResultSetMetadataReq");
-
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetResultSetMetadataReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetResultSetMetadataReqTupleSchemeFactory());
-  }
-
-  private TOperationHandle operationHandle; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    OPERATION_HANDLE((short)1, "operationHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetResultSetMetadataReq.class, metaDataMap);
-  }
-
-  public TGetResultSetMetadataReq() {
-  }
-
-  public TGetResultSetMetadataReq(
-    TOperationHandle operationHandle)
-  {
-    this();
-    this.operationHandle = operationHandle;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetResultSetMetadataReq(TGetResultSetMetadataReq other) {
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-  }
-
-  public TGetResultSetMetadataReq deepCopy() {
-    return new TGetResultSetMetadataReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.operationHandle = null;
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetResultSetMetadataReq)
-      return this.equals((TGetResultSetMetadataReq)that);
-    return false;
-  }
-
-  public boolean equals(TGetResultSetMetadataReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    builder.append(present_operationHandle);
-    if (present_operationHandle)
-      builder.append(operationHandle);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TGetResultSetMetadataReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TGetResultSetMetadataReq typedOther = (TGetResultSetMetadataReq)other;
-
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetResultSetMetadataReq(");
-    boolean first = true;
-
-    sb.append("operationHandle:");
-    if (this.operationHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.operationHandle);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetOperationHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationHandle' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetResultSetMetadataReqStandardSchemeFactory implements SchemeFactory {
-    public TGetResultSetMetadataReqStandardScheme getScheme() {
-      return new TGetResultSetMetadataReqStandardScheme();
-    }
-  }
-
-  private static class TGetResultSetMetadataReqStandardScheme extends StandardScheme<TGetResultSetMetadataReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetResultSetMetadataReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetResultSetMetadataReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.operationHandle != null) {
-        oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-        struct.operationHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetResultSetMetadataReqTupleSchemeFactory implements SchemeFactory {
-    public TGetResultSetMetadataReqTupleScheme getScheme() {
-      return new TGetResultSetMetadataReqTupleScheme();
-    }
-  }
-
-  private static class TGetResultSetMetadataReqTupleScheme extends TupleScheme<TGetResultSetMetadataReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetResultSetMetadataReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.operationHandle.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetResultSetMetadataReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.operationHandle = new TOperationHandle();
-      struct.operationHandle.read(iprot);
-      struct.setOperationHandleIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataResp.java
deleted file mode 100644
index a9bef9f722c16..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetResultSetMetadataResp.java
+++ /dev/null
@@ -1,505 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TGetResultSetMetadataResp implements org.apache.thrift.TBase<TGetResultSetMetadataResp, TGetResultSetMetadataResp._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetResultSetMetadataResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField SCHEMA_FIELD_DESC = new org.apache.thrift.protocol.TField("schema", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetResultSetMetadataRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetResultSetMetadataRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private TTableSchema schema; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    SCHEMA((short)2, "schema");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // SCHEMA
-          return SCHEMA;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private _Fields optionals[] = {_Fields.SCHEMA};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.SCHEMA, new org.apache.thrift.meta_data.FieldMetaData("schema", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TTableSchema.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetResultSetMetadataResp.class, metaDataMap);
-  }
-
-  public TGetResultSetMetadataResp() {
-  }
-
-  public TGetResultSetMetadataResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetResultSetMetadataResp(TGetResultSetMetadataResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetSchema()) {
-      this.schema = new TTableSchema(other.schema);
-    }
-  }
-
-  public TGetResultSetMetadataResp deepCopy() {
-    return new TGetResultSetMetadataResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.schema = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public TTableSchema getSchema() {
-    return this.schema;
-  }
-
-  public void setSchema(TTableSchema schema) {
-    this.schema = schema;
-  }
-
-  public void unsetSchema() {
-    this.schema = null;
-  }
-
-  /** Returns true if field schema is set (has been assigned a value) and false otherwise */
-  public boolean isSetSchema() {
-    return this.schema != null;
-  }
-
-  public void setSchemaIsSet(boolean value) {
-    if (!value) {
-      this.schema = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case SCHEMA:
-      if (value == null) {
-        unsetSchema();
-      } else {
-        setSchema((TTableSchema)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case SCHEMA:
-      return getSchema();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case SCHEMA:
-      return isSetSchema();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetResultSetMetadataResp)
-      return this.equals((TGetResultSetMetadataResp)that);
-    return false;
-  }
-
-  public boolean equals(TGetResultSetMetadataResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_schema = true && this.isSetSchema();
-    boolean that_present_schema = true && that.isSetSchema();
-    if (this_present_schema || that_present_schema) {
-      if (!(this_present_schema && that_present_schema))
-        return false;
-      if (!this.schema.equals(that.schema))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_status = true && (isSetStatus());
-    builder.append(present_status);
-    if (present_status)
-      builder.append(status);
-
-    boolean present_schema = true && (isSetSchema());
-    builder.append(present_schema);
-    if (present_schema)
-      builder.append(schema);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TGetResultSetMetadataResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TGetResultSetMetadataResp typedOther = (TGetResultSetMetadataResp)other;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetSchema()).compareTo(typedOther.isSetSchema());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSchema()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.schema, typedOther.schema);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetResultSetMetadataResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (isSetSchema()) {
-      if (!first) sb.append(", ");
-      sb.append("schema:");
-      if (this.schema == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.schema);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-    if (schema != null) {
-      schema.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetResultSetMetadataRespStandardSchemeFactory implements SchemeFactory {
-    public TGetResultSetMetadataRespStandardScheme getScheme() {
-      return new TGetResultSetMetadataRespStandardScheme();
-    }
-  }
-
-  private static class TGetResultSetMetadataRespStandardScheme extends StandardScheme<TGetResultSetMetadataResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetResultSetMetadataResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // SCHEMA
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.schema = new TTableSchema();
-              struct.schema.read(iprot);
-              struct.setSchemaIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetResultSetMetadataResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.schema != null) {
-        if (struct.isSetSchema()) {
-          oprot.writeFieldBegin(SCHEMA_FIELD_DESC);
-          struct.schema.write(oprot);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetResultSetMetadataRespTupleSchemeFactory implements SchemeFactory {
-    public TGetResultSetMetadataRespTupleScheme getScheme() {
-      return new TGetResultSetMetadataRespTupleScheme();
-    }
-  }
-
-  private static class TGetResultSetMetadataRespTupleScheme extends TupleScheme<TGetResultSetMetadataResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetResultSetMetadataResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetSchema()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetSchema()) {
-        struct.schema.write(oprot);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetResultSetMetadataResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.schema = new TTableSchema();
-        struct.schema.read(iprot);
-        struct.setSchemaIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasReq.java
deleted file mode 100644
index c2aadaa49a1e9..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasReq.java
+++ /dev/null
@@ -1,606 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TGetSchemasReq implements org.apache.thrift.TBase<TGetSchemasReq, TGetSchemasReq._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetSchemasReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField CATALOG_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("catalogName", org.apache.thrift.protocol.TType.STRING, (short)2);
-  private static final org.apache.thrift.protocol.TField SCHEMA_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("schemaName", org.apache.thrift.protocol.TType.STRING, (short)3);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetSchemasReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetSchemasReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-  private String catalogName; // optional
-  private String schemaName; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle"),
-    CATALOG_NAME((short)2, "catalogName"),
-    SCHEMA_NAME((short)3, "schemaName");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        case 2: // CATALOG_NAME
-          return CATALOG_NAME;
-        case 3: // SCHEMA_NAME
-          return SCHEMA_NAME;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private _Fields optionals[] = {_Fields.CATALOG_NAME,_Fields.SCHEMA_NAME};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    tmpMap.put(_Fields.CATALOG_NAME, new org.apache.thrift.meta_data.FieldMetaData("catalogName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TIdentifier")));
-    tmpMap.put(_Fields.SCHEMA_NAME, new org.apache.thrift.meta_data.FieldMetaData("schemaName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TPatternOrIdentifier")));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetSchemasReq.class, metaDataMap);
-  }
-
-  public TGetSchemasReq() {
-  }
-
-  public TGetSchemasReq(
-    TSessionHandle sessionHandle)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetSchemasReq(TGetSchemasReq other) {
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-    if (other.isSetCatalogName()) {
-      this.catalogName = other.catalogName;
-    }
-    if (other.isSetSchemaName()) {
-      this.schemaName = other.schemaName;
-    }
-  }
-
-  public TGetSchemasReq deepCopy() {
-    return new TGetSchemasReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-    this.catalogName = null;
-    this.schemaName = null;
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public String getCatalogName() {
-    return this.catalogName;
-  }
-
-  public void setCatalogName(String catalogName) {
-    this.catalogName = catalogName;
-  }
-
-  public void unsetCatalogName() {
-    this.catalogName = null;
-  }
-
-  /** Returns true if field catalogName is set (has been assigned a value) and false otherwise */
-  public boolean isSetCatalogName() {
-    return this.catalogName != null;
-  }
-
-  public void setCatalogNameIsSet(boolean value) {
-    if (!value) {
-      this.catalogName = null;
-    }
-  }
-
-  public String getSchemaName() {
-    return this.schemaName;
-  }
-
-  public void setSchemaName(String schemaName) {
-    this.schemaName = schemaName;
-  }
-
-  public void unsetSchemaName() {
-    this.schemaName = null;
-  }
-
-  /** Returns true if field schemaName is set (has been assigned a value) and false otherwise */
-  public boolean isSetSchemaName() {
-    return this.schemaName != null;
-  }
-
-  public void setSchemaNameIsSet(boolean value) {
-    if (!value) {
-      this.schemaName = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    case CATALOG_NAME:
-      if (value == null) {
-        unsetCatalogName();
-      } else {
-        setCatalogName((String)value);
-      }
-      break;
-
-    case SCHEMA_NAME:
-      if (value == null) {
-        unsetSchemaName();
-      } else {
-        setSchemaName((String)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    case CATALOG_NAME:
-      return getCatalogName();
-
-    case SCHEMA_NAME:
-      return getSchemaName();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    case CATALOG_NAME:
-      return isSetCatalogName();
-    case SCHEMA_NAME:
-      return isSetSchemaName();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetSchemasReq)
-      return this.equals((TGetSchemasReq)that);
-    return false;
-  }
-
-  public boolean equals(TGetSchemasReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    boolean this_present_catalogName = true && this.isSetCatalogName();
-    boolean that_present_catalogName = true && that.isSetCatalogName();
-    if (this_present_catalogName || that_present_catalogName) {
-      if (!(this_present_catalogName && that_present_catalogName))
-        return false;
-      if (!this.catalogName.equals(that.catalogName))
-        return false;
-    }
-
-    boolean this_present_schemaName = true && this.isSetSchemaName();
-    boolean that_present_schemaName = true && that.isSetSchemaName();
-    if (this_present_schemaName || that_present_schemaName) {
-      if (!(this_present_schemaName && that_present_schemaName))
-        return false;
-      if (!this.schemaName.equals(that.schemaName))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    builder.append(present_sessionHandle);
-    if (present_sessionHandle)
-      builder.append(sessionHandle);
-
-    boolean present_catalogName = true && (isSetCatalogName());
-    builder.append(present_catalogName);
-    if (present_catalogName)
-      builder.append(catalogName);
-
-    boolean present_schemaName = true && (isSetSchemaName());
-    builder.append(present_schemaName);
-    if (present_schemaName)
-      builder.append(schemaName);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TGetSchemasReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TGetSchemasReq typedOther = (TGetSchemasReq)other;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetCatalogName()).compareTo(typedOther.isSetCatalogName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetCatalogName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.catalogName, typedOther.catalogName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetSchemaName()).compareTo(typedOther.isSetSchemaName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSchemaName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.schemaName, typedOther.schemaName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetSchemasReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    if (isSetCatalogName()) {
-      if (!first) sb.append(", ");
-      sb.append("catalogName:");
-      if (this.catalogName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.catalogName);
-      }
-      first = false;
-    }
-    if (isSetSchemaName()) {
-      if (!first) sb.append(", ");
-      sb.append("schemaName:");
-      if (this.schemaName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.schemaName);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetSchemasReqStandardSchemeFactory implements SchemeFactory {
-    public TGetSchemasReqStandardScheme getScheme() {
-      return new TGetSchemasReqStandardScheme();
-    }
-  }
-
-  private static class TGetSchemasReqStandardScheme extends StandardScheme<TGetSchemasReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetSchemasReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // CATALOG_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.catalogName = iprot.readString();
-              struct.setCatalogNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // SCHEMA_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.schemaName = iprot.readString();
-              struct.setSchemaNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetSchemasReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.catalogName != null) {
-        if (struct.isSetCatalogName()) {
-          oprot.writeFieldBegin(CATALOG_NAME_FIELD_DESC);
-          oprot.writeString(struct.catalogName);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.schemaName != null) {
-        if (struct.isSetSchemaName()) {
-          oprot.writeFieldBegin(SCHEMA_NAME_FIELD_DESC);
-          oprot.writeString(struct.schemaName);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetSchemasReqTupleSchemeFactory implements SchemeFactory {
-    public TGetSchemasReqTupleScheme getScheme() {
-      return new TGetSchemasReqTupleScheme();
-    }
-  }
-
-  private static class TGetSchemasReqTupleScheme extends TupleScheme<TGetSchemasReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetSchemasReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetCatalogName()) {
-        optionals.set(0);
-      }
-      if (struct.isSetSchemaName()) {
-        optionals.set(1);
-      }
-      oprot.writeBitSet(optionals, 2);
-      if (struct.isSetCatalogName()) {
-        oprot.writeString(struct.catalogName);
-      }
-      if (struct.isSetSchemaName()) {
-        oprot.writeString(struct.schemaName);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetSchemasReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-      BitSet incoming = iprot.readBitSet(2);
-      if (incoming.get(0)) {
-        struct.catalogName = iprot.readString();
-        struct.setCatalogNameIsSet(true);
-      }
-      if (incoming.get(1)) {
-        struct.schemaName = iprot.readString();
-        struct.setSchemaNameIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasResp.java
deleted file mode 100644
index ac1ea3e7cc7af..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetSchemasResp.java
+++ /dev/null
@@ -1,505 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TGetSchemasResp implements org.apache.thrift.TBase<TGetSchemasResp, TGetSchemasResp._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetSchemasResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetSchemasRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetSchemasRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private TOperationHandle operationHandle; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    OPERATION_HANDLE((short)2, "operationHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private _Fields optionals[] = {_Fields.OPERATION_HANDLE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetSchemasResp.class, metaDataMap);
-  }
-
-  public TGetSchemasResp() {
-  }
-
-  public TGetSchemasResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetSchemasResp(TGetSchemasResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-  }
-
-  public TGetSchemasResp deepCopy() {
-    return new TGetSchemasResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.operationHandle = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetSchemasResp)
-      return this.equals((TGetSchemasResp)that);
-    return false;
-  }
-
-  public boolean equals(TGetSchemasResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_status = true && (isSetStatus());
-    builder.append(present_status);
-    if (present_status)
-      builder.append(status);
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    builder.append(present_operationHandle);
-    if (present_operationHandle)
-      builder.append(operationHandle);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TGetSchemasResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TGetSchemasResp typedOther = (TGetSchemasResp)other;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetSchemasResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (isSetOperationHandle()) {
-      if (!first) sb.append(", ");
-      sb.append("operationHandle:");
-      if (this.operationHandle == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.operationHandle);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetSchemasRespStandardSchemeFactory implements SchemeFactory {
-    public TGetSchemasRespStandardScheme getScheme() {
-      return new TGetSchemasRespStandardScheme();
-    }
-  }
-
-  private static class TGetSchemasRespStandardScheme extends StandardScheme<TGetSchemasResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetSchemasResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetSchemasResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.operationHandle != null) {
-        if (struct.isSetOperationHandle()) {
-          oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-          struct.operationHandle.write(oprot);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetSchemasRespTupleSchemeFactory implements SchemeFactory {
-    public TGetSchemasRespTupleScheme getScheme() {
-      return new TGetSchemasRespTupleScheme();
-    }
-  }
-
-  private static class TGetSchemasRespTupleScheme extends TupleScheme<TGetSchemasResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetSchemasResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetOperationHandle()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetOperationHandle()) {
-        struct.operationHandle.write(oprot);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetSchemasResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.operationHandle = new TOperationHandle();
-        struct.operationHandle.read(iprot);
-        struct.setOperationHandleIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesReq.java
deleted file mode 100644
index 6f2c713e0be6a..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesReq.java
+++ /dev/null
@@ -1,390 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TGetTableTypesReq implements org.apache.thrift.TBase<TGetTableTypesReq, TGetTableTypesReq._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTableTypesReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetTableTypesReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetTableTypesReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTableTypesReq.class, metaDataMap);
-  }
-
-  public TGetTableTypesReq() {
-  }
-
-  public TGetTableTypesReq(
-    TSessionHandle sessionHandle)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetTableTypesReq(TGetTableTypesReq other) {
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-  }
-
-  public TGetTableTypesReq deepCopy() {
-    return new TGetTableTypesReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetTableTypesReq)
-      return this.equals((TGetTableTypesReq)that);
-    return false;
-  }
-
-  public boolean equals(TGetTableTypesReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    builder.append(present_sessionHandle);
-    if (present_sessionHandle)
-      builder.append(sessionHandle);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TGetTableTypesReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TGetTableTypesReq typedOther = (TGetTableTypesReq)other;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetTableTypesReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetTableTypesReqStandardSchemeFactory implements SchemeFactory {
-    public TGetTableTypesReqStandardScheme getScheme() {
-      return new TGetTableTypesReqStandardScheme();
-    }
-  }
-
-  private static class TGetTableTypesReqStandardScheme extends StandardScheme<TGetTableTypesReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTableTypesReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTableTypesReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetTableTypesReqTupleSchemeFactory implements SchemeFactory {
-    public TGetTableTypesReqTupleScheme getScheme() {
-      return new TGetTableTypesReqTupleScheme();
-    }
-  }
-
-  private static class TGetTableTypesReqTupleScheme extends TupleScheme<TGetTableTypesReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetTableTypesReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetTableTypesReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesResp.java
deleted file mode 100644
index 6f33fbcf5dadc..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTableTypesResp.java
+++ /dev/null
@@ -1,505 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TGetTableTypesResp implements org.apache.thrift.TBase<TGetTableTypesResp, TGetTableTypesResp._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTableTypesResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetTableTypesRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetTableTypesRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private TOperationHandle operationHandle; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    OPERATION_HANDLE((short)2, "operationHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private _Fields optionals[] = {_Fields.OPERATION_HANDLE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTableTypesResp.class, metaDataMap);
-  }
-
-  public TGetTableTypesResp() {
-  }
-
-  public TGetTableTypesResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetTableTypesResp(TGetTableTypesResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-  }
-
-  public TGetTableTypesResp deepCopy() {
-    return new TGetTableTypesResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.operationHandle = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetTableTypesResp)
-      return this.equals((TGetTableTypesResp)that);
-    return false;
-  }
-
-  public boolean equals(TGetTableTypesResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_status = true && (isSetStatus());
-    builder.append(present_status);
-    if (present_status)
-      builder.append(status);
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    builder.append(present_operationHandle);
-    if (present_operationHandle)
-      builder.append(operationHandle);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TGetTableTypesResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TGetTableTypesResp typedOther = (TGetTableTypesResp)other;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetTableTypesResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (isSetOperationHandle()) {
-      if (!first) sb.append(", ");
-      sb.append("operationHandle:");
-      if (this.operationHandle == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.operationHandle);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetTableTypesRespStandardSchemeFactory implements SchemeFactory {
-    public TGetTableTypesRespStandardScheme getScheme() {
-      return new TGetTableTypesRespStandardScheme();
-    }
-  }
-
-  private static class TGetTableTypesRespStandardScheme extends StandardScheme<TGetTableTypesResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTableTypesResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTableTypesResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.operationHandle != null) {
-        if (struct.isSetOperationHandle()) {
-          oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-          struct.operationHandle.write(oprot);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetTableTypesRespTupleSchemeFactory implements SchemeFactory {
-    public TGetTableTypesRespTupleScheme getScheme() {
-      return new TGetTableTypesRespTupleScheme();
-    }
-  }
-
-  private static class TGetTableTypesRespTupleScheme extends TupleScheme<TGetTableTypesResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetTableTypesResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetOperationHandle()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetOperationHandle()) {
-        struct.operationHandle.write(oprot);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetTableTypesResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.operationHandle = new TOperationHandle();
-        struct.operationHandle.read(iprot);
-        struct.setOperationHandleIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesReq.java
deleted file mode 100644
index c973fcc24cb10..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesReq.java
+++ /dev/null
@@ -1,870 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TGetTablesReq implements org.apache.thrift.TBase<TGetTablesReq, TGetTablesReq._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTablesReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField CATALOG_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("catalogName", org.apache.thrift.protocol.TType.STRING, (short)2);
-  private static final org.apache.thrift.protocol.TField SCHEMA_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("schemaName", org.apache.thrift.protocol.TType.STRING, (short)3);
-  private static final org.apache.thrift.protocol.TField TABLE_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("tableName", org.apache.thrift.protocol.TType.STRING, (short)4);
-  private static final org.apache.thrift.protocol.TField TABLE_TYPES_FIELD_DESC = new org.apache.thrift.protocol.TField("tableTypes", org.apache.thrift.protocol.TType.LIST, (short)5);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetTablesReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetTablesReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-  private String catalogName; // optional
-  private String schemaName; // optional
-  private String tableName; // optional
-  private List<String> tableTypes; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle"),
-    CATALOG_NAME((short)2, "catalogName"),
-    SCHEMA_NAME((short)3, "schemaName"),
-    TABLE_NAME((short)4, "tableName"),
-    TABLE_TYPES((short)5, "tableTypes");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        case 2: // CATALOG_NAME
-          return CATALOG_NAME;
-        case 3: // SCHEMA_NAME
-          return SCHEMA_NAME;
-        case 4: // TABLE_NAME
-          return TABLE_NAME;
-        case 5: // TABLE_TYPES
-          return TABLE_TYPES;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private _Fields optionals[] = {_Fields.CATALOG_NAME,_Fields.SCHEMA_NAME,_Fields.TABLE_NAME,_Fields.TABLE_TYPES};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    tmpMap.put(_Fields.CATALOG_NAME, new org.apache.thrift.meta_data.FieldMetaData("catalogName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TPatternOrIdentifier")));
-    tmpMap.put(_Fields.SCHEMA_NAME, new org.apache.thrift.meta_data.FieldMetaData("schemaName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TPatternOrIdentifier")));
-    tmpMap.put(_Fields.TABLE_NAME, new org.apache.thrift.meta_data.FieldMetaData("tableName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TPatternOrIdentifier")));
-    tmpMap.put(_Fields.TABLE_TYPES, new org.apache.thrift.meta_data.FieldMetaData("tableTypes", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTablesReq.class, metaDataMap);
-  }
-
-  public TGetTablesReq() {
-  }
-
-  public TGetTablesReq(
-    TSessionHandle sessionHandle)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetTablesReq(TGetTablesReq other) {
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-    if (other.isSetCatalogName()) {
-      this.catalogName = other.catalogName;
-    }
-    if (other.isSetSchemaName()) {
-      this.schemaName = other.schemaName;
-    }
-    if (other.isSetTableName()) {
-      this.tableName = other.tableName;
-    }
-    if (other.isSetTableTypes()) {
-      List<String> __this__tableTypes = new ArrayList<String>();
-      for (String other_element : other.tableTypes) {
-        __this__tableTypes.add(other_element);
-      }
-      this.tableTypes = __this__tableTypes;
-    }
-  }
-
-  public TGetTablesReq deepCopy() {
-    return new TGetTablesReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-    this.catalogName = null;
-    this.schemaName = null;
-    this.tableName = null;
-    this.tableTypes = null;
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public String getCatalogName() {
-    return this.catalogName;
-  }
-
-  public void setCatalogName(String catalogName) {
-    this.catalogName = catalogName;
-  }
-
-  public void unsetCatalogName() {
-    this.catalogName = null;
-  }
-
-  /** Returns true if field catalogName is set (has been assigned a value) and false otherwise */
-  public boolean isSetCatalogName() {
-    return this.catalogName != null;
-  }
-
-  public void setCatalogNameIsSet(boolean value) {
-    if (!value) {
-      this.catalogName = null;
-    }
-  }
-
-  public String getSchemaName() {
-    return this.schemaName;
-  }
-
-  public void setSchemaName(String schemaName) {
-    this.schemaName = schemaName;
-  }
-
-  public void unsetSchemaName() {
-    this.schemaName = null;
-  }
-
-  /** Returns true if field schemaName is set (has been assigned a value) and false otherwise */
-  public boolean isSetSchemaName() {
-    return this.schemaName != null;
-  }
-
-  public void setSchemaNameIsSet(boolean value) {
-    if (!value) {
-      this.schemaName = null;
-    }
-  }
-
-  public String getTableName() {
-    return this.tableName;
-  }
-
-  public void setTableName(String tableName) {
-    this.tableName = tableName;
-  }
-
-  public void unsetTableName() {
-    this.tableName = null;
-  }
-
-  /** Returns true if field tableName is set (has been assigned a value) and false otherwise */
-  public boolean isSetTableName() {
-    return this.tableName != null;
-  }
-
-  public void setTableNameIsSet(boolean value) {
-    if (!value) {
-      this.tableName = null;
-    }
-  }
-
-  public int getTableTypesSize() {
-    return (this.tableTypes == null) ? 0 : this.tableTypes.size();
-  }
-
-  public java.util.Iterator<String> getTableTypesIterator() {
-    return (this.tableTypes == null) ? null : this.tableTypes.iterator();
-  }
-
-  public void addToTableTypes(String elem) {
-    if (this.tableTypes == null) {
-      this.tableTypes = new ArrayList<String>();
-    }
-    this.tableTypes.add(elem);
-  }
-
-  public List<String> getTableTypes() {
-    return this.tableTypes;
-  }
-
-  public void setTableTypes(List<String> tableTypes) {
-    this.tableTypes = tableTypes;
-  }
-
-  public void unsetTableTypes() {
-    this.tableTypes = null;
-  }
-
-  /** Returns true if field tableTypes is set (has been assigned a value) and false otherwise */
-  public boolean isSetTableTypes() {
-    return this.tableTypes != null;
-  }
-
-  public void setTableTypesIsSet(boolean value) {
-    if (!value) {
-      this.tableTypes = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    case CATALOG_NAME:
-      if (value == null) {
-        unsetCatalogName();
-      } else {
-        setCatalogName((String)value);
-      }
-      break;
-
-    case SCHEMA_NAME:
-      if (value == null) {
-        unsetSchemaName();
-      } else {
-        setSchemaName((String)value);
-      }
-      break;
-
-    case TABLE_NAME:
-      if (value == null) {
-        unsetTableName();
-      } else {
-        setTableName((String)value);
-      }
-      break;
-
-    case TABLE_TYPES:
-      if (value == null) {
-        unsetTableTypes();
-      } else {
-        setTableTypes((List<String>)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    case CATALOG_NAME:
-      return getCatalogName();
-
-    case SCHEMA_NAME:
-      return getSchemaName();
-
-    case TABLE_NAME:
-      return getTableName();
-
-    case TABLE_TYPES:
-      return getTableTypes();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    case CATALOG_NAME:
-      return isSetCatalogName();
-    case SCHEMA_NAME:
-      return isSetSchemaName();
-    case TABLE_NAME:
-      return isSetTableName();
-    case TABLE_TYPES:
-      return isSetTableTypes();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetTablesReq)
-      return this.equals((TGetTablesReq)that);
-    return false;
-  }
-
-  public boolean equals(TGetTablesReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    boolean this_present_catalogName = true && this.isSetCatalogName();
-    boolean that_present_catalogName = true && that.isSetCatalogName();
-    if (this_present_catalogName || that_present_catalogName) {
-      if (!(this_present_catalogName && that_present_catalogName))
-        return false;
-      if (!this.catalogName.equals(that.catalogName))
-        return false;
-    }
-
-    boolean this_present_schemaName = true && this.isSetSchemaName();
-    boolean that_present_schemaName = true && that.isSetSchemaName();
-    if (this_present_schemaName || that_present_schemaName) {
-      if (!(this_present_schemaName && that_present_schemaName))
-        return false;
-      if (!this.schemaName.equals(that.schemaName))
-        return false;
-    }
-
-    boolean this_present_tableName = true && this.isSetTableName();
-    boolean that_present_tableName = true && that.isSetTableName();
-    if (this_present_tableName || that_present_tableName) {
-      if (!(this_present_tableName && that_present_tableName))
-        return false;
-      if (!this.tableName.equals(that.tableName))
-        return false;
-    }
-
-    boolean this_present_tableTypes = true && this.isSetTableTypes();
-    boolean that_present_tableTypes = true && that.isSetTableTypes();
-    if (this_present_tableTypes || that_present_tableTypes) {
-      if (!(this_present_tableTypes && that_present_tableTypes))
-        return false;
-      if (!this.tableTypes.equals(that.tableTypes))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    builder.append(present_sessionHandle);
-    if (present_sessionHandle)
-      builder.append(sessionHandle);
-
-    boolean present_catalogName = true && (isSetCatalogName());
-    builder.append(present_catalogName);
-    if (present_catalogName)
-      builder.append(catalogName);
-
-    boolean present_schemaName = true && (isSetSchemaName());
-    builder.append(present_schemaName);
-    if (present_schemaName)
-      builder.append(schemaName);
-
-    boolean present_tableName = true && (isSetTableName());
-    builder.append(present_tableName);
-    if (present_tableName)
-      builder.append(tableName);
-
-    boolean present_tableTypes = true && (isSetTableTypes());
-    builder.append(present_tableTypes);
-    if (present_tableTypes)
-      builder.append(tableTypes);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TGetTablesReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TGetTablesReq typedOther = (TGetTablesReq)other;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetCatalogName()).compareTo(typedOther.isSetCatalogName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetCatalogName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.catalogName, typedOther.catalogName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetSchemaName()).compareTo(typedOther.isSetSchemaName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSchemaName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.schemaName, typedOther.schemaName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetTableName()).compareTo(typedOther.isSetTableName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetTableName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.tableName, typedOther.tableName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetTableTypes()).compareTo(typedOther.isSetTableTypes());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetTableTypes()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.tableTypes, typedOther.tableTypes);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetTablesReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    if (isSetCatalogName()) {
-      if (!first) sb.append(", ");
-      sb.append("catalogName:");
-      if (this.catalogName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.catalogName);
-      }
-      first = false;
-    }
-    if (isSetSchemaName()) {
-      if (!first) sb.append(", ");
-      sb.append("schemaName:");
-      if (this.schemaName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.schemaName);
-      }
-      first = false;
-    }
-    if (isSetTableName()) {
-      if (!first) sb.append(", ");
-      sb.append("tableName:");
-      if (this.tableName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.tableName);
-      }
-      first = false;
-    }
-    if (isSetTableTypes()) {
-      if (!first) sb.append(", ");
-      sb.append("tableTypes:");
-      if (this.tableTypes == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.tableTypes);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetTablesReqStandardSchemeFactory implements SchemeFactory {
-    public TGetTablesReqStandardScheme getScheme() {
-      return new TGetTablesReqStandardScheme();
-    }
-  }
-
-  private static class TGetTablesReqStandardScheme extends StandardScheme<TGetTablesReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTablesReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // CATALOG_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.catalogName = iprot.readString();
-              struct.setCatalogNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // SCHEMA_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.schemaName = iprot.readString();
-              struct.setSchemaNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 4: // TABLE_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.tableName = iprot.readString();
-              struct.setTableNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 5: // TABLE_TYPES
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list172 = iprot.readListBegin();
-                struct.tableTypes = new ArrayList<String>(_list172.size);
-                for (int _i173 = 0; _i173 < _list172.size; ++_i173)
-                {
-                  String _elem174; // optional
-                  _elem174 = iprot.readString();
-                  struct.tableTypes.add(_elem174);
-                }
-                iprot.readListEnd();
-              }
-              struct.setTableTypesIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTablesReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.catalogName != null) {
-        if (struct.isSetCatalogName()) {
-          oprot.writeFieldBegin(CATALOG_NAME_FIELD_DESC);
-          oprot.writeString(struct.catalogName);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.schemaName != null) {
-        if (struct.isSetSchemaName()) {
-          oprot.writeFieldBegin(SCHEMA_NAME_FIELD_DESC);
-          oprot.writeString(struct.schemaName);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.tableName != null) {
-        if (struct.isSetTableName()) {
-          oprot.writeFieldBegin(TABLE_NAME_FIELD_DESC);
-          oprot.writeString(struct.tableName);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.tableTypes != null) {
-        if (struct.isSetTableTypes()) {
-          oprot.writeFieldBegin(TABLE_TYPES_FIELD_DESC);
-          {
-            oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, struct.tableTypes.size()));
-            for (String _iter175 : struct.tableTypes)
-            {
-              oprot.writeString(_iter175);
-            }
-            oprot.writeListEnd();
-          }
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetTablesReqTupleSchemeFactory implements SchemeFactory {
-    public TGetTablesReqTupleScheme getScheme() {
-      return new TGetTablesReqTupleScheme();
-    }
-  }
-
-  private static class TGetTablesReqTupleScheme extends TupleScheme<TGetTablesReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetTablesReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetCatalogName()) {
-        optionals.set(0);
-      }
-      if (struct.isSetSchemaName()) {
-        optionals.set(1);
-      }
-      if (struct.isSetTableName()) {
-        optionals.set(2);
-      }
-      if (struct.isSetTableTypes()) {
-        optionals.set(3);
-      }
-      oprot.writeBitSet(optionals, 4);
-      if (struct.isSetCatalogName()) {
-        oprot.writeString(struct.catalogName);
-      }
-      if (struct.isSetSchemaName()) {
-        oprot.writeString(struct.schemaName);
-      }
-      if (struct.isSetTableName()) {
-        oprot.writeString(struct.tableName);
-      }
-      if (struct.isSetTableTypes()) {
-        {
-          oprot.writeI32(struct.tableTypes.size());
-          for (String _iter176 : struct.tableTypes)
-          {
-            oprot.writeString(_iter176);
-          }
-        }
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetTablesReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-      BitSet incoming = iprot.readBitSet(4);
-      if (incoming.get(0)) {
-        struct.catalogName = iprot.readString();
-        struct.setCatalogNameIsSet(true);
-      }
-      if (incoming.get(1)) {
-        struct.schemaName = iprot.readString();
-        struct.setSchemaNameIsSet(true);
-      }
-      if (incoming.get(2)) {
-        struct.tableName = iprot.readString();
-        struct.setTableNameIsSet(true);
-      }
-      if (incoming.get(3)) {
-        {
-          org.apache.thrift.protocol.TList _list177 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32());
-          struct.tableTypes = new ArrayList<String>(_list177.size);
-          for (int _i178 = 0; _i178 < _list177.size; ++_i178)
-          {
-            String _elem179; // optional
-            _elem179 = iprot.readString();
-            struct.tableTypes.add(_elem179);
-          }
-        }
-        struct.setTableTypesIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesResp.java
deleted file mode 100644
index d526f4478a24e..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTablesResp.java
+++ /dev/null
@@ -1,505 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TGetTablesResp implements org.apache.thrift.TBase<TGetTablesResp, TGetTablesResp._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTablesResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetTablesRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetTablesRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private TOperationHandle operationHandle; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    OPERATION_HANDLE((short)2, "operationHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private _Fields optionals[] = {_Fields.OPERATION_HANDLE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTablesResp.class, metaDataMap);
-  }
-
-  public TGetTablesResp() {
-  }
-
-  public TGetTablesResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetTablesResp(TGetTablesResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-  }
-
-  public TGetTablesResp deepCopy() {
-    return new TGetTablesResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.operationHandle = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetTablesResp)
-      return this.equals((TGetTablesResp)that);
-    return false;
-  }
-
-  public boolean equals(TGetTablesResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_status = true && (isSetStatus());
-    builder.append(present_status);
-    if (present_status)
-      builder.append(status);
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    builder.append(present_operationHandle);
-    if (present_operationHandle)
-      builder.append(operationHandle);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TGetTablesResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TGetTablesResp typedOther = (TGetTablesResp)other;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetTablesResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (isSetOperationHandle()) {
-      if (!first) sb.append(", ");
-      sb.append("operationHandle:");
-      if (this.operationHandle == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.operationHandle);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetTablesRespStandardSchemeFactory implements SchemeFactory {
-    public TGetTablesRespStandardScheme getScheme() {
-      return new TGetTablesRespStandardScheme();
-    }
-  }
-
-  private static class TGetTablesRespStandardScheme extends StandardScheme<TGetTablesResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTablesResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTablesResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.operationHandle != null) {
-        if (struct.isSetOperationHandle()) {
-          oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-          struct.operationHandle.write(oprot);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetTablesRespTupleSchemeFactory implements SchemeFactory {
-    public TGetTablesRespTupleScheme getScheme() {
-      return new TGetTablesRespTupleScheme();
-    }
-  }
-
-  private static class TGetTablesRespTupleScheme extends TupleScheme<TGetTablesResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetTablesResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetOperationHandle()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetOperationHandle()) {
-        struct.operationHandle.write(oprot);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetTablesResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.operationHandle = new TOperationHandle();
-        struct.operationHandle.read(iprot);
-        struct.setOperationHandleIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoReq.java
deleted file mode 100644
index d40115e83ec45..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoReq.java
+++ /dev/null
@@ -1,390 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TGetTypeInfoReq implements org.apache.thrift.TBase<TGetTypeInfoReq, TGetTypeInfoReq._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTypeInfoReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetTypeInfoReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetTypeInfoReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTypeInfoReq.class, metaDataMap);
-  }
-
-  public TGetTypeInfoReq() {
-  }
-
-  public TGetTypeInfoReq(
-    TSessionHandle sessionHandle)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetTypeInfoReq(TGetTypeInfoReq other) {
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-  }
-
-  public TGetTypeInfoReq deepCopy() {
-    return new TGetTypeInfoReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetTypeInfoReq)
-      return this.equals((TGetTypeInfoReq)that);
-    return false;
-  }
-
-  public boolean equals(TGetTypeInfoReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    builder.append(present_sessionHandle);
-    if (present_sessionHandle)
-      builder.append(sessionHandle);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TGetTypeInfoReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TGetTypeInfoReq typedOther = (TGetTypeInfoReq)other;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetTypeInfoReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetTypeInfoReqStandardSchemeFactory implements SchemeFactory {
-    public TGetTypeInfoReqStandardScheme getScheme() {
-      return new TGetTypeInfoReqStandardScheme();
-    }
-  }
-
-  private static class TGetTypeInfoReqStandardScheme extends StandardScheme<TGetTypeInfoReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTypeInfoReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTypeInfoReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetTypeInfoReqTupleSchemeFactory implements SchemeFactory {
-    public TGetTypeInfoReqTupleScheme getScheme() {
-      return new TGetTypeInfoReqTupleScheme();
-    }
-  }
-
-  private static class TGetTypeInfoReqTupleScheme extends TupleScheme<TGetTypeInfoReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetTypeInfoReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetTypeInfoReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoResp.java
deleted file mode 100644
index 59be1a33b55e2..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TGetTypeInfoResp.java
+++ /dev/null
@@ -1,505 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TGetTypeInfoResp implements org.apache.thrift.TBase<TGetTypeInfoResp, TGetTypeInfoResp._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTypeInfoResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetTypeInfoRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetTypeInfoRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private TOperationHandle operationHandle; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    OPERATION_HANDLE((short)2, "operationHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private _Fields optionals[] = {_Fields.OPERATION_HANDLE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTypeInfoResp.class, metaDataMap);
-  }
-
-  public TGetTypeInfoResp() {
-  }
-
-  public TGetTypeInfoResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetTypeInfoResp(TGetTypeInfoResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-  }
-
-  public TGetTypeInfoResp deepCopy() {
-    return new TGetTypeInfoResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.operationHandle = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetTypeInfoResp)
-      return this.equals((TGetTypeInfoResp)that);
-    return false;
-  }
-
-  public boolean equals(TGetTypeInfoResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_status = true && (isSetStatus());
-    builder.append(present_status);
-    if (present_status)
-      builder.append(status);
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    builder.append(present_operationHandle);
-    if (present_operationHandle)
-      builder.append(operationHandle);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TGetTypeInfoResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TGetTypeInfoResp typedOther = (TGetTypeInfoResp)other;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(typedOther.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, typedOther.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetTypeInfoResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (isSetOperationHandle()) {
-      if (!first) sb.append(", ");
-      sb.append("operationHandle:");
-      if (this.operationHandle == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.operationHandle);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetTypeInfoRespStandardSchemeFactory implements SchemeFactory {
-    public TGetTypeInfoRespStandardScheme getScheme() {
-      return new TGetTypeInfoRespStandardScheme();
-    }
-  }
-
-  private static class TGetTypeInfoRespStandardScheme extends StandardScheme<TGetTypeInfoResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTypeInfoResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTypeInfoResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.operationHandle != null) {
-        if (struct.isSetOperationHandle()) {
-          oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-          struct.operationHandle.write(oprot);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetTypeInfoRespTupleSchemeFactory implements SchemeFactory {
-    public TGetTypeInfoRespTupleScheme getScheme() {
-      return new TGetTypeInfoRespTupleScheme();
-    }
-  }
-
-  private static class TGetTypeInfoRespTupleScheme extends TupleScheme<TGetTypeInfoResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetTypeInfoResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetOperationHandle()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetOperationHandle()) {
-        struct.operationHandle.write(oprot);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetTypeInfoResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.operationHandle = new TOperationHandle();
-        struct.operationHandle.read(iprot);
-        struct.setOperationHandleIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/THandleIdentifier.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/THandleIdentifier.java
deleted file mode 100644
index 368273c341c7b..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/THandleIdentifier.java
+++ /dev/null
@@ -1,506 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class THandleIdentifier implements org.apache.thrift.TBase<THandleIdentifier, THandleIdentifier._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("THandleIdentifier");
-
-  private static final org.apache.thrift.protocol.TField GUID_FIELD_DESC = new org.apache.thrift.protocol.TField("guid", org.apache.thrift.protocol.TType.STRING, (short)1);
-  private static final org.apache.thrift.protocol.TField SECRET_FIELD_DESC = new org.apache.thrift.protocol.TField("secret", org.apache.thrift.protocol.TType.STRING, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new THandleIdentifierStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new THandleIdentifierTupleSchemeFactory());
-  }
-
-  private ByteBuffer guid; // required
-  private ByteBuffer secret; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    GUID((short)1, "guid"),
-    SECRET((short)2, "secret");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // GUID
-          return GUID;
-        case 2: // SECRET
-          return SECRET;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.GUID, new org.apache.thrift.meta_data.FieldMetaData("guid", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
-    tmpMap.put(_Fields.SECRET, new org.apache.thrift.meta_data.FieldMetaData("secret", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(THandleIdentifier.class, metaDataMap);
-  }
-
-  public THandleIdentifier() {
-  }
-
-  public THandleIdentifier(
-    ByteBuffer guid,
-    ByteBuffer secret)
-  {
-    this();
-    this.guid = guid;
-    this.secret = secret;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public THandleIdentifier(THandleIdentifier other) {
-    if (other.isSetGuid()) {
-      this.guid = org.apache.thrift.TBaseHelper.copyBinary(other.guid);
-;
-    }
-    if (other.isSetSecret()) {
-      this.secret = org.apache.thrift.TBaseHelper.copyBinary(other.secret);
-;
-    }
-  }
-
-  public THandleIdentifier deepCopy() {
-    return new THandleIdentifier(this);
-  }
-
-  @Override
-  public void clear() {
-    this.guid = null;
-    this.secret = null;
-  }
-
-  public byte[] getGuid() {
-    setGuid(org.apache.thrift.TBaseHelper.rightSize(guid));
-    return guid == null ? null : guid.array();
-  }
-
-  public ByteBuffer bufferForGuid() {
-    return guid;
-  }
-
-  public void setGuid(byte[] guid) {
-    setGuid(guid == null ? (ByteBuffer)null : ByteBuffer.wrap(guid));
-  }
-
-  public void setGuid(ByteBuffer guid) {
-    this.guid = guid;
-  }
-
-  public void unsetGuid() {
-    this.guid = null;
-  }
-
-  /** Returns true if field guid is set (has been assigned a value) and false otherwise */
-  public boolean isSetGuid() {
-    return this.guid != null;
-  }
-
-  public void setGuidIsSet(boolean value) {
-    if (!value) {
-      this.guid = null;
-    }
-  }
-
-  public byte[] getSecret() {
-    setSecret(org.apache.thrift.TBaseHelper.rightSize(secret));
-    return secret == null ? null : secret.array();
-  }
-
-  public ByteBuffer bufferForSecret() {
-    return secret;
-  }
-
-  public void setSecret(byte[] secret) {
-    setSecret(secret == null ? (ByteBuffer)null : ByteBuffer.wrap(secret));
-  }
-
-  public void setSecret(ByteBuffer secret) {
-    this.secret = secret;
-  }
-
-  public void unsetSecret() {
-    this.secret = null;
-  }
-
-  /** Returns true if field secret is set (has been assigned a value) and false otherwise */
-  public boolean isSetSecret() {
-    return this.secret != null;
-  }
-
-  public void setSecretIsSet(boolean value) {
-    if (!value) {
-      this.secret = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case GUID:
-      if (value == null) {
-        unsetGuid();
-      } else {
-        setGuid((ByteBuffer)value);
-      }
-      break;
-
-    case SECRET:
-      if (value == null) {
-        unsetSecret();
-      } else {
-        setSecret((ByteBuffer)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case GUID:
-      return getGuid();
-
-    case SECRET:
-      return getSecret();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case GUID:
-      return isSetGuid();
-    case SECRET:
-      return isSetSecret();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof THandleIdentifier)
-      return this.equals((THandleIdentifier)that);
-    return false;
-  }
-
-  public boolean equals(THandleIdentifier that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_guid = true && this.isSetGuid();
-    boolean that_present_guid = true && that.isSetGuid();
-    if (this_present_guid || that_present_guid) {
-      if (!(this_present_guid && that_present_guid))
-        return false;
-      if (!this.guid.equals(that.guid))
-        return false;
-    }
-
-    boolean this_present_secret = true && this.isSetSecret();
-    boolean that_present_secret = true && that.isSetSecret();
-    if (this_present_secret || that_present_secret) {
-      if (!(this_present_secret && that_present_secret))
-        return false;
-      if (!this.secret.equals(that.secret))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_guid = true && (isSetGuid());
-    builder.append(present_guid);
-    if (present_guid)
-      builder.append(guid);
-
-    boolean present_secret = true && (isSetSecret());
-    builder.append(present_secret);
-    if (present_secret)
-      builder.append(secret);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(THandleIdentifier other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    THandleIdentifier typedOther = (THandleIdentifier)other;
-
-    lastComparison = Boolean.valueOf(isSetGuid()).compareTo(typedOther.isSetGuid());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetGuid()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.guid, typedOther.guid);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetSecret()).compareTo(typedOther.isSetSecret());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSecret()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.secret, typedOther.secret);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("THandleIdentifier(");
-    boolean first = true;
-
-    sb.append("guid:");
-    if (this.guid == null) {
-      sb.append("null");
-    } else {
-      org.apache.thrift.TBaseHelper.toString(this.guid, sb);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("secret:");
-    if (this.secret == null) {
-      sb.append("null");
-    } else {
-      org.apache.thrift.TBaseHelper.toString(this.secret, sb);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetGuid()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'guid' is unset! Struct:" + toString());
-    }
-
-    if (!isSetSecret()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'secret' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class THandleIdentifierStandardSchemeFactory implements SchemeFactory {
-    public THandleIdentifierStandardScheme getScheme() {
-      return new THandleIdentifierStandardScheme();
-    }
-  }
-
-  private static class THandleIdentifierStandardScheme extends StandardScheme<THandleIdentifier> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, THandleIdentifier struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // GUID
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.guid = iprot.readBinary();
-              struct.setGuidIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // SECRET
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.secret = iprot.readBinary();
-              struct.setSecretIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, THandleIdentifier struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.guid != null) {
-        oprot.writeFieldBegin(GUID_FIELD_DESC);
-        oprot.writeBinary(struct.guid);
-        oprot.writeFieldEnd();
-      }
-      if (struct.secret != null) {
-        oprot.writeFieldBegin(SECRET_FIELD_DESC);
-        oprot.writeBinary(struct.secret);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class THandleIdentifierTupleSchemeFactory implements SchemeFactory {
-    public THandleIdentifierTupleScheme getScheme() {
-      return new THandleIdentifierTupleScheme();
-    }
-  }
-
-  private static class THandleIdentifierTupleScheme extends TupleScheme<THandleIdentifier> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, THandleIdentifier struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      oprot.writeBinary(struct.guid);
-      oprot.writeBinary(struct.secret);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, THandleIdentifier struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.guid = iprot.readBinary();
-      struct.setGuidIsSet(true);
-      struct.secret = iprot.readBinary();
-      struct.setSecretIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI16Column.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI16Column.java
deleted file mode 100644
index c83663072f877..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI16Column.java
+++ /dev/null
@@ -1,548 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TI16Column implements org.apache.thrift.TBase<TI16Column, TI16Column._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI16Column");
-
-  private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1);
-  private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TI16ColumnStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TI16ColumnTupleSchemeFactory());
-  }
-
-  private List<Short> values; // required
-  private ByteBuffer nulls; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUES((short)1, "values"),
-    NULLS((short)2, "nulls");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUES
-          return VALUES;
-        case 2: // NULLS
-          return NULLS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I16))));
-    tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI16Column.class, metaDataMap);
-  }
-
-  public TI16Column() {
-  }
-
-  public TI16Column(
-    List<Short> values,
-    ByteBuffer nulls)
-  {
-    this();
-    this.values = values;
-    this.nulls = nulls;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TI16Column(TI16Column other) {
-    if (other.isSetValues()) {
-      List<Short> __this__values = new ArrayList<Short>();
-      for (Short other_element : other.values) {
-        __this__values.add(other_element);
-      }
-      this.values = __this__values;
-    }
-    if (other.isSetNulls()) {
-      this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls);
-;
-    }
-  }
-
-  public TI16Column deepCopy() {
-    return new TI16Column(this);
-  }
-
-  @Override
-  public void clear() {
-    this.values = null;
-    this.nulls = null;
-  }
-
-  public int getValuesSize() {
-    return (this.values == null) ? 0 : this.values.size();
-  }
-
-  public java.util.Iterator<Short> getValuesIterator() {
-    return (this.values == null) ? null : this.values.iterator();
-  }
-
-  public void addToValues(short elem) {
-    if (this.values == null) {
-      this.values = new ArrayList<Short>();
-    }
-    this.values.add(elem);
-  }
-
-  public List<Short> getValues() {
-    return this.values;
-  }
-
-  public void setValues(List<Short> values) {
-    this.values = values;
-  }
-
-  public void unsetValues() {
-    this.values = null;
-  }
-
-  /** Returns true if field values is set (has been assigned a value) and false otherwise */
-  public boolean isSetValues() {
-    return this.values != null;
-  }
-
-  public void setValuesIsSet(boolean value) {
-    if (!value) {
-      this.values = null;
-    }
-  }
-
-  public byte[] getNulls() {
-    setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls));
-    return nulls == null ? null : nulls.array();
-  }
-
-  public ByteBuffer bufferForNulls() {
-    return nulls;
-  }
-
-  public void setNulls(byte[] nulls) {
-    setNulls(nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(nulls));
-  }
-
-  public void setNulls(ByteBuffer nulls) {
-    this.nulls = nulls;
-  }
-
-  public void unsetNulls() {
-    this.nulls = null;
-  }
-
-  /** Returns true if field nulls is set (has been assigned a value) and false otherwise */
-  public boolean isSetNulls() {
-    return this.nulls != null;
-  }
-
-  public void setNullsIsSet(boolean value) {
-    if (!value) {
-      this.nulls = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUES:
-      if (value == null) {
-        unsetValues();
-      } else {
-        setValues((List<Short>)value);
-      }
-      break;
-
-    case NULLS:
-      if (value == null) {
-        unsetNulls();
-      } else {
-        setNulls((ByteBuffer)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUES:
-      return getValues();
-
-    case NULLS:
-      return getNulls();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUES:
-      return isSetValues();
-    case NULLS:
-      return isSetNulls();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TI16Column)
-      return this.equals((TI16Column)that);
-    return false;
-  }
-
-  public boolean equals(TI16Column that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_values = true && this.isSetValues();
-    boolean that_present_values = true && that.isSetValues();
-    if (this_present_values || that_present_values) {
-      if (!(this_present_values && that_present_values))
-        return false;
-      if (!this.values.equals(that.values))
-        return false;
-    }
-
-    boolean this_present_nulls = true && this.isSetNulls();
-    boolean that_present_nulls = true && that.isSetNulls();
-    if (this_present_nulls || that_present_nulls) {
-      if (!(this_present_nulls && that_present_nulls))
-        return false;
-      if (!this.nulls.equals(that.nulls))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_values = true && (isSetValues());
-    builder.append(present_values);
-    if (present_values)
-      builder.append(values);
-
-    boolean present_nulls = true && (isSetNulls());
-    builder.append(present_nulls);
-    if (present_nulls)
-      builder.append(nulls);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TI16Column other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TI16Column typedOther = (TI16Column)other;
-
-    lastComparison = Boolean.valueOf(isSetValues()).compareTo(typedOther.isSetValues());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValues()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, typedOther.values);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetNulls()).compareTo(typedOther.isSetNulls());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetNulls()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, typedOther.nulls);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TI16Column(");
-    boolean first = true;
-
-    sb.append("values:");
-    if (this.values == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.values);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("nulls:");
-    if (this.nulls == null) {
-      sb.append("null");
-    } else {
-      org.apache.thrift.TBaseHelper.toString(this.nulls, sb);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetValues()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString());
-    }
-
-    if (!isSetNulls()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TI16ColumnStandardSchemeFactory implements SchemeFactory {
-    public TI16ColumnStandardScheme getScheme() {
-      return new TI16ColumnStandardScheme();
-    }
-  }
-
-  private static class TI16ColumnStandardScheme extends StandardScheme<TI16Column> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TI16Column struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUES
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list70 = iprot.readListBegin();
-                struct.values = new ArrayList<Short>(_list70.size);
-                for (int _i71 = 0; _i71 < _list70.size; ++_i71)
-                {
-                  short _elem72; // optional
-                  _elem72 = iprot.readI16();
-                  struct.values.add(_elem72);
-                }
-                iprot.readListEnd();
-              }
-              struct.setValuesIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // NULLS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.nulls = iprot.readBinary();
-              struct.setNullsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TI16Column struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.values != null) {
-        oprot.writeFieldBegin(VALUES_FIELD_DESC);
-        {
-          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I16, struct.values.size()));
-          for (short _iter73 : struct.values)
-          {
-            oprot.writeI16(_iter73);
-          }
-          oprot.writeListEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      if (struct.nulls != null) {
-        oprot.writeFieldBegin(NULLS_FIELD_DESC);
-        oprot.writeBinary(struct.nulls);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TI16ColumnTupleSchemeFactory implements SchemeFactory {
-    public TI16ColumnTupleScheme getScheme() {
-      return new TI16ColumnTupleScheme();
-    }
-  }
-
-  private static class TI16ColumnTupleScheme extends TupleScheme<TI16Column> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TI16Column struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.values.size());
-        for (short _iter74 : struct.values)
-        {
-          oprot.writeI16(_iter74);
-        }
-      }
-      oprot.writeBinary(struct.nulls);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TI16Column struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TList _list75 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I16, iprot.readI32());
-        struct.values = new ArrayList<Short>(_list75.size);
-        for (int _i76 = 0; _i76 < _list75.size; ++_i76)
-        {
-          short _elem77; // optional
-          _elem77 = iprot.readI16();
-          struct.values.add(_elem77);
-        }
-      }
-      struct.setValuesIsSet(true);
-      struct.nulls = iprot.readBinary();
-      struct.setNullsIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI16Value.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI16Value.java
deleted file mode 100644
index bb5ae9609de86..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI16Value.java
+++ /dev/null
@@ -1,386 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TI16Value implements org.apache.thrift.TBase<TI16Value, TI16Value._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI16Value");
-
-  private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.I16, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TI16ValueStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TI16ValueTupleSchemeFactory());
-  }
-
-  private short value; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUE((short)1, "value");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUE
-          return VALUE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __VALUE_ISSET_ID = 0;
-  private byte __isset_bitfield = 0;
-  private _Fields optionals[] = {_Fields.VALUE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I16)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI16Value.class, metaDataMap);
-  }
-
-  public TI16Value() {
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TI16Value(TI16Value other) {
-    __isset_bitfield = other.__isset_bitfield;
-    this.value = other.value;
-  }
-
-  public TI16Value deepCopy() {
-    return new TI16Value(this);
-  }
-
-  @Override
-  public void clear() {
-    setValueIsSet(false);
-    this.value = 0;
-  }
-
-  public short getValue() {
-    return this.value;
-  }
-
-  public void setValue(short value) {
-    this.value = value;
-    setValueIsSet(true);
-  }
-
-  public void unsetValue() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID);
-  }
-
-  /** Returns true if field value is set (has been assigned a value) and false otherwise */
-  public boolean isSetValue() {
-    return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID);
-  }
-
-  public void setValueIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value);
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUE:
-      if (value == null) {
-        unsetValue();
-      } else {
-        setValue((Short)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUE:
-      return Short.valueOf(getValue());
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUE:
-      return isSetValue();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TI16Value)
-      return this.equals((TI16Value)that);
-    return false;
-  }
-
-  public boolean equals(TI16Value that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_value = true && this.isSetValue();
-    boolean that_present_value = true && that.isSetValue();
-    if (this_present_value || that_present_value) {
-      if (!(this_present_value && that_present_value))
-        return false;
-      if (this.value != that.value)
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_value = true && (isSetValue());
-    builder.append(present_value);
-    if (present_value)
-      builder.append(value);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TI16Value other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TI16Value typedOther = (TI16Value)other;
-
-    lastComparison = Boolean.valueOf(isSetValue()).compareTo(typedOther.isSetValue());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValue()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, typedOther.value);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TI16Value(");
-    boolean first = true;
-
-    if (isSetValue()) {
-      sb.append("value:");
-      sb.append(this.value);
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TI16ValueStandardSchemeFactory implements SchemeFactory {
-    public TI16ValueStandardScheme getScheme() {
-      return new TI16ValueStandardScheme();
-    }
-  }
-
-  private static class TI16ValueStandardScheme extends StandardScheme<TI16Value> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TI16Value struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUE
-            if (schemeField.type == org.apache.thrift.protocol.TType.I16) {
-              struct.value = iprot.readI16();
-              struct.setValueIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TI16Value struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.isSetValue()) {
-        oprot.writeFieldBegin(VALUE_FIELD_DESC);
-        oprot.writeI16(struct.value);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TI16ValueTupleSchemeFactory implements SchemeFactory {
-    public TI16ValueTupleScheme getScheme() {
-      return new TI16ValueTupleScheme();
-    }
-  }
-
-  private static class TI16ValueTupleScheme extends TupleScheme<TI16Value> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TI16Value struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      BitSet optionals = new BitSet();
-      if (struct.isSetValue()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetValue()) {
-        oprot.writeI16(struct.value);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TI16Value struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.value = iprot.readI16();
-        struct.setValueIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI32Column.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI32Column.java
deleted file mode 100644
index 6c6c5f35b7c8e..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI32Column.java
+++ /dev/null
@@ -1,548 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TI32Column implements org.apache.thrift.TBase<TI32Column, TI32Column._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI32Column");
-
-  private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1);
-  private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TI32ColumnStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TI32ColumnTupleSchemeFactory());
-  }
-
-  private List<Integer> values; // required
-  private ByteBuffer nulls; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUES((short)1, "values"),
-    NULLS((short)2, "nulls");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUES
-          return VALUES;
-        case 2: // NULLS
-          return NULLS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))));
-    tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI32Column.class, metaDataMap);
-  }
-
-  public TI32Column() {
-  }
-
-  public TI32Column(
-    List<Integer> values,
-    ByteBuffer nulls)
-  {
-    this();
-    this.values = values;
-    this.nulls = nulls;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TI32Column(TI32Column other) {
-    if (other.isSetValues()) {
-      List<Integer> __this__values = new ArrayList<Integer>();
-      for (Integer other_element : other.values) {
-        __this__values.add(other_element);
-      }
-      this.values = __this__values;
-    }
-    if (other.isSetNulls()) {
-      this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls);
-;
-    }
-  }
-
-  public TI32Column deepCopy() {
-    return new TI32Column(this);
-  }
-
-  @Override
-  public void clear() {
-    this.values = null;
-    this.nulls = null;
-  }
-
-  public int getValuesSize() {
-    return (this.values == null) ? 0 : this.values.size();
-  }
-
-  public java.util.Iterator<Integer> getValuesIterator() {
-    return (this.values == null) ? null : this.values.iterator();
-  }
-
-  public void addToValues(int elem) {
-    if (this.values == null) {
-      this.values = new ArrayList<Integer>();
-    }
-    this.values.add(elem);
-  }
-
-  public List<Integer> getValues() {
-    return this.values;
-  }
-
-  public void setValues(List<Integer> values) {
-    this.values = values;
-  }
-
-  public void unsetValues() {
-    this.values = null;
-  }
-
-  /** Returns true if field values is set (has been assigned a value) and false otherwise */
-  public boolean isSetValues() {
-    return this.values != null;
-  }
-
-  public void setValuesIsSet(boolean value) {
-    if (!value) {
-      this.values = null;
-    }
-  }
-
-  public byte[] getNulls() {
-    setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls));
-    return nulls == null ? null : nulls.array();
-  }
-
-  public ByteBuffer bufferForNulls() {
-    return nulls;
-  }
-
-  public void setNulls(byte[] nulls) {
-    setNulls(nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(nulls));
-  }
-
-  public void setNulls(ByteBuffer nulls) {
-    this.nulls = nulls;
-  }
-
-  public void unsetNulls() {
-    this.nulls = null;
-  }
-
-  /** Returns true if field nulls is set (has been assigned a value) and false otherwise */
-  public boolean isSetNulls() {
-    return this.nulls != null;
-  }
-
-  public void setNullsIsSet(boolean value) {
-    if (!value) {
-      this.nulls = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUES:
-      if (value == null) {
-        unsetValues();
-      } else {
-        setValues((List<Integer>)value);
-      }
-      break;
-
-    case NULLS:
-      if (value == null) {
-        unsetNulls();
-      } else {
-        setNulls((ByteBuffer)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUES:
-      return getValues();
-
-    case NULLS:
-      return getNulls();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUES:
-      return isSetValues();
-    case NULLS:
-      return isSetNulls();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TI32Column)
-      return this.equals((TI32Column)that);
-    return false;
-  }
-
-  public boolean equals(TI32Column that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_values = true && this.isSetValues();
-    boolean that_present_values = true && that.isSetValues();
-    if (this_present_values || that_present_values) {
-      if (!(this_present_values && that_present_values))
-        return false;
-      if (!this.values.equals(that.values))
-        return false;
-    }
-
-    boolean this_present_nulls = true && this.isSetNulls();
-    boolean that_present_nulls = true && that.isSetNulls();
-    if (this_present_nulls || that_present_nulls) {
-      if (!(this_present_nulls && that_present_nulls))
-        return false;
-      if (!this.nulls.equals(that.nulls))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_values = true && (isSetValues());
-    builder.append(present_values);
-    if (present_values)
-      builder.append(values);
-
-    boolean present_nulls = true && (isSetNulls());
-    builder.append(present_nulls);
-    if (present_nulls)
-      builder.append(nulls);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TI32Column other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TI32Column typedOther = (TI32Column)other;
-
-    lastComparison = Boolean.valueOf(isSetValues()).compareTo(typedOther.isSetValues());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValues()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, typedOther.values);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetNulls()).compareTo(typedOther.isSetNulls());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetNulls()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, typedOther.nulls);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TI32Column(");
-    boolean first = true;
-
-    sb.append("values:");
-    if (this.values == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.values);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("nulls:");
-    if (this.nulls == null) {
-      sb.append("null");
-    } else {
-      org.apache.thrift.TBaseHelper.toString(this.nulls, sb);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetValues()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString());
-    }
-
-    if (!isSetNulls()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TI32ColumnStandardSchemeFactory implements SchemeFactory {
-    public TI32ColumnStandardScheme getScheme() {
-      return new TI32ColumnStandardScheme();
-    }
-  }
-
-  private static class TI32ColumnStandardScheme extends StandardScheme<TI32Column> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TI32Column struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUES
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list78 = iprot.readListBegin();
-                struct.values = new ArrayList<Integer>(_list78.size);
-                for (int _i79 = 0; _i79 < _list78.size; ++_i79)
-                {
-                  int _elem80; // optional
-                  _elem80 = iprot.readI32();
-                  struct.values.add(_elem80);
-                }
-                iprot.readListEnd();
-              }
-              struct.setValuesIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // NULLS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.nulls = iprot.readBinary();
-              struct.setNullsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TI32Column struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.values != null) {
-        oprot.writeFieldBegin(VALUES_FIELD_DESC);
-        {
-          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I32, struct.values.size()));
-          for (int _iter81 : struct.values)
-          {
-            oprot.writeI32(_iter81);
-          }
-          oprot.writeListEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      if (struct.nulls != null) {
-        oprot.writeFieldBegin(NULLS_FIELD_DESC);
-        oprot.writeBinary(struct.nulls);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TI32ColumnTupleSchemeFactory implements SchemeFactory {
-    public TI32ColumnTupleScheme getScheme() {
-      return new TI32ColumnTupleScheme();
-    }
-  }
-
-  private static class TI32ColumnTupleScheme extends TupleScheme<TI32Column> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TI32Column struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.values.size());
-        for (int _iter82 : struct.values)
-        {
-          oprot.writeI32(_iter82);
-        }
-      }
-      oprot.writeBinary(struct.nulls);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TI32Column struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TList _list83 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I32, iprot.readI32());
-        struct.values = new ArrayList<Integer>(_list83.size);
-        for (int _i84 = 0; _i84 < _list83.size; ++_i84)
-        {
-          int _elem85; // optional
-          _elem85 = iprot.readI32();
-          struct.values.add(_elem85);
-        }
-      }
-      struct.setValuesIsSet(true);
-      struct.nulls = iprot.readBinary();
-      struct.setNullsIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI32Value.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI32Value.java
deleted file mode 100644
index 059408b96c8ce..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI32Value.java
+++ /dev/null
@@ -1,386 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TI32Value implements org.apache.thrift.TBase<TI32Value, TI32Value._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI32Value");
-
-  private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.I32, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TI32ValueStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TI32ValueTupleSchemeFactory());
-  }
-
-  private int value; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUE((short)1, "value");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUE
-          return VALUE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __VALUE_ISSET_ID = 0;
-  private byte __isset_bitfield = 0;
-  private _Fields optionals[] = {_Fields.VALUE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI32Value.class, metaDataMap);
-  }
-
-  public TI32Value() {
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TI32Value(TI32Value other) {
-    __isset_bitfield = other.__isset_bitfield;
-    this.value = other.value;
-  }
-
-  public TI32Value deepCopy() {
-    return new TI32Value(this);
-  }
-
-  @Override
-  public void clear() {
-    setValueIsSet(false);
-    this.value = 0;
-  }
-
-  public int getValue() {
-    return this.value;
-  }
-
-  public void setValue(int value) {
-    this.value = value;
-    setValueIsSet(true);
-  }
-
-  public void unsetValue() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID);
-  }
-
-  /** Returns true if field value is set (has been assigned a value) and false otherwise */
-  public boolean isSetValue() {
-    return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID);
-  }
-
-  public void setValueIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value);
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUE:
-      if (value == null) {
-        unsetValue();
-      } else {
-        setValue((Integer)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUE:
-      return Integer.valueOf(getValue());
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUE:
-      return isSetValue();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TI32Value)
-      return this.equals((TI32Value)that);
-    return false;
-  }
-
-  public boolean equals(TI32Value that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_value = true && this.isSetValue();
-    boolean that_present_value = true && that.isSetValue();
-    if (this_present_value || that_present_value) {
-      if (!(this_present_value && that_present_value))
-        return false;
-      if (this.value != that.value)
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_value = true && (isSetValue());
-    builder.append(present_value);
-    if (present_value)
-      builder.append(value);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TI32Value other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TI32Value typedOther = (TI32Value)other;
-
-    lastComparison = Boolean.valueOf(isSetValue()).compareTo(typedOther.isSetValue());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValue()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, typedOther.value);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TI32Value(");
-    boolean first = true;
-
-    if (isSetValue()) {
-      sb.append("value:");
-      sb.append(this.value);
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TI32ValueStandardSchemeFactory implements SchemeFactory {
-    public TI32ValueStandardScheme getScheme() {
-      return new TI32ValueStandardScheme();
-    }
-  }
-
-  private static class TI32ValueStandardScheme extends StandardScheme<TI32Value> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TI32Value struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUE
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.value = iprot.readI32();
-              struct.setValueIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TI32Value struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.isSetValue()) {
-        oprot.writeFieldBegin(VALUE_FIELD_DESC);
-        oprot.writeI32(struct.value);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TI32ValueTupleSchemeFactory implements SchemeFactory {
-    public TI32ValueTupleScheme getScheme() {
-      return new TI32ValueTupleScheme();
-    }
-  }
-
-  private static class TI32ValueTupleScheme extends TupleScheme<TI32Value> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TI32Value struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      BitSet optionals = new BitSet();
-      if (struct.isSetValue()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetValue()) {
-        oprot.writeI32(struct.value);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TI32Value struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.value = iprot.readI32();
-        struct.setValueIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI64Column.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI64Column.java
deleted file mode 100644
index cc383ed089fa4..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI64Column.java
+++ /dev/null
@@ -1,548 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TI64Column implements org.apache.thrift.TBase<TI64Column, TI64Column._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI64Column");
-
-  private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1);
-  private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TI64ColumnStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TI64ColumnTupleSchemeFactory());
-  }
-
-  private List<Long> values; // required
-  private ByteBuffer nulls; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUES((short)1, "values"),
-    NULLS((short)2, "nulls");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUES
-          return VALUES;
-        case 2: // NULLS
-          return NULLS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))));
-    tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI64Column.class, metaDataMap);
-  }
-
-  public TI64Column() {
-  }
-
-  public TI64Column(
-    List<Long> values,
-    ByteBuffer nulls)
-  {
-    this();
-    this.values = values;
-    this.nulls = nulls;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TI64Column(TI64Column other) {
-    if (other.isSetValues()) {
-      List<Long> __this__values = new ArrayList<Long>();
-      for (Long other_element : other.values) {
-        __this__values.add(other_element);
-      }
-      this.values = __this__values;
-    }
-    if (other.isSetNulls()) {
-      this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls);
-;
-    }
-  }
-
-  public TI64Column deepCopy() {
-    return new TI64Column(this);
-  }
-
-  @Override
-  public void clear() {
-    this.values = null;
-    this.nulls = null;
-  }
-
-  public int getValuesSize() {
-    return (this.values == null) ? 0 : this.values.size();
-  }
-
-  public java.util.Iterator<Long> getValuesIterator() {
-    return (this.values == null) ? null : this.values.iterator();
-  }
-
-  public void addToValues(long elem) {
-    if (this.values == null) {
-      this.values = new ArrayList<Long>();
-    }
-    this.values.add(elem);
-  }
-
-  public List<Long> getValues() {
-    return this.values;
-  }
-
-  public void setValues(List<Long> values) {
-    this.values = values;
-  }
-
-  public void unsetValues() {
-    this.values = null;
-  }
-
-  /** Returns true if field values is set (has been assigned a value) and false otherwise */
-  public boolean isSetValues() {
-    return this.values != null;
-  }
-
-  public void setValuesIsSet(boolean value) {
-    if (!value) {
-      this.values = null;
-    }
-  }
-
-  public byte[] getNulls() {
-    setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls));
-    return nulls == null ? null : nulls.array();
-  }
-
-  public ByteBuffer bufferForNulls() {
-    return nulls;
-  }
-
-  public void setNulls(byte[] nulls) {
-    setNulls(nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(nulls));
-  }
-
-  public void setNulls(ByteBuffer nulls) {
-    this.nulls = nulls;
-  }
-
-  public void unsetNulls() {
-    this.nulls = null;
-  }
-
-  /** Returns true if field nulls is set (has been assigned a value) and false otherwise */
-  public boolean isSetNulls() {
-    return this.nulls != null;
-  }
-
-  public void setNullsIsSet(boolean value) {
-    if (!value) {
-      this.nulls = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUES:
-      if (value == null) {
-        unsetValues();
-      } else {
-        setValues((List<Long>)value);
-      }
-      break;
-
-    case NULLS:
-      if (value == null) {
-        unsetNulls();
-      } else {
-        setNulls((ByteBuffer)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUES:
-      return getValues();
-
-    case NULLS:
-      return getNulls();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUES:
-      return isSetValues();
-    case NULLS:
-      return isSetNulls();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TI64Column)
-      return this.equals((TI64Column)that);
-    return false;
-  }
-
-  public boolean equals(TI64Column that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_values = true && this.isSetValues();
-    boolean that_present_values = true && that.isSetValues();
-    if (this_present_values || that_present_values) {
-      if (!(this_present_values && that_present_values))
-        return false;
-      if (!this.values.equals(that.values))
-        return false;
-    }
-
-    boolean this_present_nulls = true && this.isSetNulls();
-    boolean that_present_nulls = true && that.isSetNulls();
-    if (this_present_nulls || that_present_nulls) {
-      if (!(this_present_nulls && that_present_nulls))
-        return false;
-      if (!this.nulls.equals(that.nulls))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_values = true && (isSetValues());
-    builder.append(present_values);
-    if (present_values)
-      builder.append(values);
-
-    boolean present_nulls = true && (isSetNulls());
-    builder.append(present_nulls);
-    if (present_nulls)
-      builder.append(nulls);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TI64Column other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TI64Column typedOther = (TI64Column)other;
-
-    lastComparison = Boolean.valueOf(isSetValues()).compareTo(typedOther.isSetValues());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValues()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, typedOther.values);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetNulls()).compareTo(typedOther.isSetNulls());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetNulls()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, typedOther.nulls);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TI64Column(");
-    boolean first = true;
-
-    sb.append("values:");
-    if (this.values == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.values);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("nulls:");
-    if (this.nulls == null) {
-      sb.append("null");
-    } else {
-      org.apache.thrift.TBaseHelper.toString(this.nulls, sb);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetValues()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString());
-    }
-
-    if (!isSetNulls()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TI64ColumnStandardSchemeFactory implements SchemeFactory {
-    public TI64ColumnStandardScheme getScheme() {
-      return new TI64ColumnStandardScheme();
-    }
-  }
-
-  private static class TI64ColumnStandardScheme extends StandardScheme<TI64Column> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TI64Column struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUES
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list86 = iprot.readListBegin();
-                struct.values = new ArrayList<Long>(_list86.size);
-                for (int _i87 = 0; _i87 < _list86.size; ++_i87)
-                {
-                  long _elem88; // optional
-                  _elem88 = iprot.readI64();
-                  struct.values.add(_elem88);
-                }
-                iprot.readListEnd();
-              }
-              struct.setValuesIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // NULLS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.nulls = iprot.readBinary();
-              struct.setNullsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TI64Column struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.values != null) {
-        oprot.writeFieldBegin(VALUES_FIELD_DESC);
-        {
-          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I64, struct.values.size()));
-          for (long _iter89 : struct.values)
-          {
-            oprot.writeI64(_iter89);
-          }
-          oprot.writeListEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      if (struct.nulls != null) {
-        oprot.writeFieldBegin(NULLS_FIELD_DESC);
-        oprot.writeBinary(struct.nulls);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TI64ColumnTupleSchemeFactory implements SchemeFactory {
-    public TI64ColumnTupleScheme getScheme() {
-      return new TI64ColumnTupleScheme();
-    }
-  }
-
-  private static class TI64ColumnTupleScheme extends TupleScheme<TI64Column> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TI64Column struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.values.size());
-        for (long _iter90 : struct.values)
-        {
-          oprot.writeI64(_iter90);
-        }
-      }
-      oprot.writeBinary(struct.nulls);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TI64Column struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TList _list91 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I64, iprot.readI32());
-        struct.values = new ArrayList<Long>(_list91.size);
-        for (int _i92 = 0; _i92 < _list91.size; ++_i92)
-        {
-          long _elem93; // optional
-          _elem93 = iprot.readI64();
-          struct.values.add(_elem93);
-        }
-      }
-      struct.setValuesIsSet(true);
-      struct.nulls = iprot.readBinary();
-      struct.setNullsIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI64Value.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI64Value.java
deleted file mode 100644
index 9a941cce0c077..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TI64Value.java
+++ /dev/null
@@ -1,386 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TI64Value implements org.apache.thrift.TBase<TI64Value, TI64Value._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI64Value");
-
-  private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.I64, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TI64ValueStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TI64ValueTupleSchemeFactory());
-  }
-
-  private long value; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUE((short)1, "value");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUE
-          return VALUE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __VALUE_ISSET_ID = 0;
-  private byte __isset_bitfield = 0;
-  private _Fields optionals[] = {_Fields.VALUE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI64Value.class, metaDataMap);
-  }
-
-  public TI64Value() {
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TI64Value(TI64Value other) {
-    __isset_bitfield = other.__isset_bitfield;
-    this.value = other.value;
-  }
-
-  public TI64Value deepCopy() {
-    return new TI64Value(this);
-  }
-
-  @Override
-  public void clear() {
-    setValueIsSet(false);
-    this.value = 0;
-  }
-
-  public long getValue() {
-    return this.value;
-  }
-
-  public void setValue(long value) {
-    this.value = value;
-    setValueIsSet(true);
-  }
-
-  public void unsetValue() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID);
-  }
-
-  /** Returns true if field value is set (has been assigned a value) and false otherwise */
-  public boolean isSetValue() {
-    return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID);
-  }
-
-  public void setValueIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value);
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUE:
-      if (value == null) {
-        unsetValue();
-      } else {
-        setValue((Long)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUE:
-      return Long.valueOf(getValue());
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUE:
-      return isSetValue();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TI64Value)
-      return this.equals((TI64Value)that);
-    return false;
-  }
-
-  public boolean equals(TI64Value that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_value = true && this.isSetValue();
-    boolean that_present_value = true && that.isSetValue();
-    if (this_present_value || that_present_value) {
-      if (!(this_present_value && that_present_value))
-        return false;
-      if (this.value != that.value)
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_value = true && (isSetValue());
-    builder.append(present_value);
-    if (present_value)
-      builder.append(value);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TI64Value other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TI64Value typedOther = (TI64Value)other;
-
-    lastComparison = Boolean.valueOf(isSetValue()).compareTo(typedOther.isSetValue());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValue()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, typedOther.value);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TI64Value(");
-    boolean first = true;
-
-    if (isSetValue()) {
-      sb.append("value:");
-      sb.append(this.value);
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TI64ValueStandardSchemeFactory implements SchemeFactory {
-    public TI64ValueStandardScheme getScheme() {
-      return new TI64ValueStandardScheme();
-    }
-  }
-
-  private static class TI64ValueStandardScheme extends StandardScheme<TI64Value> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TI64Value struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUE
-            if (schemeField.type == org.apache.thrift.protocol.TType.I64) {
-              struct.value = iprot.readI64();
-              struct.setValueIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TI64Value struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.isSetValue()) {
-        oprot.writeFieldBegin(VALUE_FIELD_DESC);
-        oprot.writeI64(struct.value);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TI64ValueTupleSchemeFactory implements SchemeFactory {
-    public TI64ValueTupleScheme getScheme() {
-      return new TI64ValueTupleScheme();
-    }
-  }
-
-  private static class TI64ValueTupleScheme extends TupleScheme<TI64Value> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TI64Value struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      BitSet optionals = new BitSet();
-      if (struct.isSetValue()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetValue()) {
-        oprot.writeI64(struct.value);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TI64Value struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.value = iprot.readI64();
-        struct.setValueIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TMapTypeEntry.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TMapTypeEntry.java
deleted file mode 100644
index 425603cbdecbd..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TMapTypeEntry.java
+++ /dev/null
@@ -1,478 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TMapTypeEntry implements org.apache.thrift.TBase<TMapTypeEntry, TMapTypeEntry._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TMapTypeEntry");
-
-  private static final org.apache.thrift.protocol.TField KEY_TYPE_PTR_FIELD_DESC = new org.apache.thrift.protocol.TField("keyTypePtr", org.apache.thrift.protocol.TType.I32, (short)1);
-  private static final org.apache.thrift.protocol.TField VALUE_TYPE_PTR_FIELD_DESC = new org.apache.thrift.protocol.TField("valueTypePtr", org.apache.thrift.protocol.TType.I32, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TMapTypeEntryStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TMapTypeEntryTupleSchemeFactory());
-  }
-
-  private int keyTypePtr; // required
-  private int valueTypePtr; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    KEY_TYPE_PTR((short)1, "keyTypePtr"),
-    VALUE_TYPE_PTR((short)2, "valueTypePtr");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // KEY_TYPE_PTR
-          return KEY_TYPE_PTR;
-        case 2: // VALUE_TYPE_PTR
-          return VALUE_TYPE_PTR;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __KEYTYPEPTR_ISSET_ID = 0;
-  private static final int __VALUETYPEPTR_ISSET_ID = 1;
-  private byte __isset_bitfield = 0;
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.KEY_TYPE_PTR, new org.apache.thrift.meta_data.FieldMetaData("keyTypePtr", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32        , "TTypeEntryPtr")));
-    tmpMap.put(_Fields.VALUE_TYPE_PTR, new org.apache.thrift.meta_data.FieldMetaData("valueTypePtr", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32        , "TTypeEntryPtr")));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TMapTypeEntry.class, metaDataMap);
-  }
-
-  public TMapTypeEntry() {
-  }
-
-  public TMapTypeEntry(
-    int keyTypePtr,
-    int valueTypePtr)
-  {
-    this();
-    this.keyTypePtr = keyTypePtr;
-    setKeyTypePtrIsSet(true);
-    this.valueTypePtr = valueTypePtr;
-    setValueTypePtrIsSet(true);
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TMapTypeEntry(TMapTypeEntry other) {
-    __isset_bitfield = other.__isset_bitfield;
-    this.keyTypePtr = other.keyTypePtr;
-    this.valueTypePtr = other.valueTypePtr;
-  }
-
-  public TMapTypeEntry deepCopy() {
-    return new TMapTypeEntry(this);
-  }
-
-  @Override
-  public void clear() {
-    setKeyTypePtrIsSet(false);
-    this.keyTypePtr = 0;
-    setValueTypePtrIsSet(false);
-    this.valueTypePtr = 0;
-  }
-
-  public int getKeyTypePtr() {
-    return this.keyTypePtr;
-  }
-
-  public void setKeyTypePtr(int keyTypePtr) {
-    this.keyTypePtr = keyTypePtr;
-    setKeyTypePtrIsSet(true);
-  }
-
-  public void unsetKeyTypePtr() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __KEYTYPEPTR_ISSET_ID);
-  }
-
-  /** Returns true if field keyTypePtr is set (has been assigned a value) and false otherwise */
-  public boolean isSetKeyTypePtr() {
-    return EncodingUtils.testBit(__isset_bitfield, __KEYTYPEPTR_ISSET_ID);
-  }
-
-  public void setKeyTypePtrIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __KEYTYPEPTR_ISSET_ID, value);
-  }
-
-  public int getValueTypePtr() {
-    return this.valueTypePtr;
-  }
-
-  public void setValueTypePtr(int valueTypePtr) {
-    this.valueTypePtr = valueTypePtr;
-    setValueTypePtrIsSet(true);
-  }
-
-  public void unsetValueTypePtr() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUETYPEPTR_ISSET_ID);
-  }
-
-  /** Returns true if field valueTypePtr is set (has been assigned a value) and false otherwise */
-  public boolean isSetValueTypePtr() {
-    return EncodingUtils.testBit(__isset_bitfield, __VALUETYPEPTR_ISSET_ID);
-  }
-
-  public void setValueTypePtrIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUETYPEPTR_ISSET_ID, value);
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case KEY_TYPE_PTR:
-      if (value == null) {
-        unsetKeyTypePtr();
-      } else {
-        setKeyTypePtr((Integer)value);
-      }
-      break;
-
-    case VALUE_TYPE_PTR:
-      if (value == null) {
-        unsetValueTypePtr();
-      } else {
-        setValueTypePtr((Integer)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case KEY_TYPE_PTR:
-      return Integer.valueOf(getKeyTypePtr());
-
-    case VALUE_TYPE_PTR:
-      return Integer.valueOf(getValueTypePtr());
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case KEY_TYPE_PTR:
-      return isSetKeyTypePtr();
-    case VALUE_TYPE_PTR:
-      return isSetValueTypePtr();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TMapTypeEntry)
-      return this.equals((TMapTypeEntry)that);
-    return false;
-  }
-
-  public boolean equals(TMapTypeEntry that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_keyTypePtr = true;
-    boolean that_present_keyTypePtr = true;
-    if (this_present_keyTypePtr || that_present_keyTypePtr) {
-      if (!(this_present_keyTypePtr && that_present_keyTypePtr))
-        return false;
-      if (this.keyTypePtr != that.keyTypePtr)
-        return false;
-    }
-
-    boolean this_present_valueTypePtr = true;
-    boolean that_present_valueTypePtr = true;
-    if (this_present_valueTypePtr || that_present_valueTypePtr) {
-      if (!(this_present_valueTypePtr && that_present_valueTypePtr))
-        return false;
-      if (this.valueTypePtr != that.valueTypePtr)
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_keyTypePtr = true;
-    builder.append(present_keyTypePtr);
-    if (present_keyTypePtr)
-      builder.append(keyTypePtr);
-
-    boolean present_valueTypePtr = true;
-    builder.append(present_valueTypePtr);
-    if (present_valueTypePtr)
-      builder.append(valueTypePtr);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TMapTypeEntry other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TMapTypeEntry typedOther = (TMapTypeEntry)other;
-
-    lastComparison = Boolean.valueOf(isSetKeyTypePtr()).compareTo(typedOther.isSetKeyTypePtr());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetKeyTypePtr()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.keyTypePtr, typedOther.keyTypePtr);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetValueTypePtr()).compareTo(typedOther.isSetValueTypePtr());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValueTypePtr()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.valueTypePtr, typedOther.valueTypePtr);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TMapTypeEntry(");
-    boolean first = true;
-
-    sb.append("keyTypePtr:");
-    sb.append(this.keyTypePtr);
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("valueTypePtr:");
-    sb.append(this.valueTypePtr);
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetKeyTypePtr()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'keyTypePtr' is unset! Struct:" + toString());
-    }
-
-    if (!isSetValueTypePtr()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'valueTypePtr' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TMapTypeEntryStandardSchemeFactory implements SchemeFactory {
-    public TMapTypeEntryStandardScheme getScheme() {
-      return new TMapTypeEntryStandardScheme();
-    }
-  }
-
-  private static class TMapTypeEntryStandardScheme extends StandardScheme<TMapTypeEntry> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TMapTypeEntry struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // KEY_TYPE_PTR
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.keyTypePtr = iprot.readI32();
-              struct.setKeyTypePtrIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // VALUE_TYPE_PTR
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.valueTypePtr = iprot.readI32();
-              struct.setValueTypePtrIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TMapTypeEntry struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      oprot.writeFieldBegin(KEY_TYPE_PTR_FIELD_DESC);
-      oprot.writeI32(struct.keyTypePtr);
-      oprot.writeFieldEnd();
-      oprot.writeFieldBegin(VALUE_TYPE_PTR_FIELD_DESC);
-      oprot.writeI32(struct.valueTypePtr);
-      oprot.writeFieldEnd();
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TMapTypeEntryTupleSchemeFactory implements SchemeFactory {
-    public TMapTypeEntryTupleScheme getScheme() {
-      return new TMapTypeEntryTupleScheme();
-    }
-  }
-
-  private static class TMapTypeEntryTupleScheme extends TupleScheme<TMapTypeEntry> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TMapTypeEntry struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      oprot.writeI32(struct.keyTypePtr);
-      oprot.writeI32(struct.valueTypePtr);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TMapTypeEntry struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.keyTypePtr = iprot.readI32();
-      struct.setKeyTypePtrIsSet(true);
-      struct.valueTypePtr = iprot.readI32();
-      struct.setValueTypePtrIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionReq.java
deleted file mode 100644
index c0481615b06d3..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionReq.java
+++ /dev/null
@@ -1,785 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TOpenSessionReq implements org.apache.thrift.TBase<TOpenSessionReq, TOpenSessionReq._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TOpenSessionReq");
-
-  private static final org.apache.thrift.protocol.TField CLIENT_PROTOCOL_FIELD_DESC = new org.apache.thrift.protocol.TField("client_protocol", org.apache.thrift.protocol.TType.I32, (short)1);
-  private static final org.apache.thrift.protocol.TField USERNAME_FIELD_DESC = new org.apache.thrift.protocol.TField("username", org.apache.thrift.protocol.TType.STRING, (short)2);
-  private static final org.apache.thrift.protocol.TField PASSWORD_FIELD_DESC = new org.apache.thrift.protocol.TField("password", org.apache.thrift.protocol.TType.STRING, (short)3);
-  private static final org.apache.thrift.protocol.TField CONFIGURATION_FIELD_DESC = new org.apache.thrift.protocol.TField("configuration", org.apache.thrift.protocol.TType.MAP, (short)4);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TOpenSessionReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TOpenSessionReqTupleSchemeFactory());
-  }
-
-  private TProtocolVersion client_protocol; // required
-  private String username; // optional
-  private String password; // optional
-  private Map<String,String> configuration; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    /**
-     * 
-     * @see TProtocolVersion
-     */
-    CLIENT_PROTOCOL((short)1, "client_protocol"),
-    USERNAME((short)2, "username"),
-    PASSWORD((short)3, "password"),
-    CONFIGURATION((short)4, "configuration");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // CLIENT_PROTOCOL
-          return CLIENT_PROTOCOL;
-        case 2: // USERNAME
-          return USERNAME;
-        case 3: // PASSWORD
-          return PASSWORD;
-        case 4: // CONFIGURATION
-          return CONFIGURATION;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private _Fields optionals[] = {_Fields.USERNAME,_Fields.PASSWORD,_Fields.CONFIGURATION};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.CLIENT_PROTOCOL, new org.apache.thrift.meta_data.FieldMetaData("client_protocol", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TProtocolVersion.class)));
-    tmpMap.put(_Fields.USERNAME, new org.apache.thrift.meta_data.FieldMetaData("username", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    tmpMap.put(_Fields.PASSWORD, new org.apache.thrift.meta_data.FieldMetaData("password", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    tmpMap.put(_Fields.CONFIGURATION, new org.apache.thrift.meta_data.FieldMetaData("configuration", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TOpenSessionReq.class, metaDataMap);
-  }
-
-  public TOpenSessionReq() {
-    this.client_protocol = org.apache.hive.service.cli.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V8;
-
-  }
-
-  public TOpenSessionReq(
-    TProtocolVersion client_protocol)
-  {
-    this();
-    this.client_protocol = client_protocol;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TOpenSessionReq(TOpenSessionReq other) {
-    if (other.isSetClient_protocol()) {
-      this.client_protocol = other.client_protocol;
-    }
-    if (other.isSetUsername()) {
-      this.username = other.username;
-    }
-    if (other.isSetPassword()) {
-      this.password = other.password;
-    }
-    if (other.isSetConfiguration()) {
-      Map<String,String> __this__configuration = new HashMap<String,String>();
-      for (Map.Entry<String, String> other_element : other.configuration.entrySet()) {
-
-        String other_element_key = other_element.getKey();
-        String other_element_value = other_element.getValue();
-
-        String __this__configuration_copy_key = other_element_key;
-
-        String __this__configuration_copy_value = other_element_value;
-
-        __this__configuration.put(__this__configuration_copy_key, __this__configuration_copy_value);
-      }
-      this.configuration = __this__configuration;
-    }
-  }
-
-  public TOpenSessionReq deepCopy() {
-    return new TOpenSessionReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.client_protocol = org.apache.hive.service.cli.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V8;
-
-    this.username = null;
-    this.password = null;
-    this.configuration = null;
-  }
-
-  /**
-   * 
-   * @see TProtocolVersion
-   */
-  public TProtocolVersion getClient_protocol() {
-    return this.client_protocol;
-  }
-
-  /**
-   * 
-   * @see TProtocolVersion
-   */
-  public void setClient_protocol(TProtocolVersion client_protocol) {
-    this.client_protocol = client_protocol;
-  }
-
-  public void unsetClient_protocol() {
-    this.client_protocol = null;
-  }
-
-  /** Returns true if field client_protocol is set (has been assigned a value) and false otherwise */
-  public boolean isSetClient_protocol() {
-    return this.client_protocol != null;
-  }
-
-  public void setClient_protocolIsSet(boolean value) {
-    if (!value) {
-      this.client_protocol = null;
-    }
-  }
-
-  public String getUsername() {
-    return this.username;
-  }
-
-  public void setUsername(String username) {
-    this.username = username;
-  }
-
-  public void unsetUsername() {
-    this.username = null;
-  }
-
-  /** Returns true if field username is set (has been assigned a value) and false otherwise */
-  public boolean isSetUsername() {
-    return this.username != null;
-  }
-
-  public void setUsernameIsSet(boolean value) {
-    if (!value) {
-      this.username = null;
-    }
-  }
-
-  public String getPassword() {
-    return this.password;
-  }
-
-  public void setPassword(String password) {
-    this.password = password;
-  }
-
-  public void unsetPassword() {
-    this.password = null;
-  }
-
-  /** Returns true if field password is set (has been assigned a value) and false otherwise */
-  public boolean isSetPassword() {
-    return this.password != null;
-  }
-
-  public void setPasswordIsSet(boolean value) {
-    if (!value) {
-      this.password = null;
-    }
-  }
-
-  public int getConfigurationSize() {
-    return (this.configuration == null) ? 0 : this.configuration.size();
-  }
-
-  public void putToConfiguration(String key, String val) {
-    if (this.configuration == null) {
-      this.configuration = new HashMap<String,String>();
-    }
-    this.configuration.put(key, val);
-  }
-
-  public Map<String,String> getConfiguration() {
-    return this.configuration;
-  }
-
-  public void setConfiguration(Map<String,String> configuration) {
-    this.configuration = configuration;
-  }
-
-  public void unsetConfiguration() {
-    this.configuration = null;
-  }
-
-  /** Returns true if field configuration is set (has been assigned a value) and false otherwise */
-  public boolean isSetConfiguration() {
-    return this.configuration != null;
-  }
-
-  public void setConfigurationIsSet(boolean value) {
-    if (!value) {
-      this.configuration = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case CLIENT_PROTOCOL:
-      if (value == null) {
-        unsetClient_protocol();
-      } else {
-        setClient_protocol((TProtocolVersion)value);
-      }
-      break;
-
-    case USERNAME:
-      if (value == null) {
-        unsetUsername();
-      } else {
-        setUsername((String)value);
-      }
-      break;
-
-    case PASSWORD:
-      if (value == null) {
-        unsetPassword();
-      } else {
-        setPassword((String)value);
-      }
-      break;
-
-    case CONFIGURATION:
-      if (value == null) {
-        unsetConfiguration();
-      } else {
-        setConfiguration((Map<String,String>)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case CLIENT_PROTOCOL:
-      return getClient_protocol();
-
-    case USERNAME:
-      return getUsername();
-
-    case PASSWORD:
-      return getPassword();
-
-    case CONFIGURATION:
-      return getConfiguration();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case CLIENT_PROTOCOL:
-      return isSetClient_protocol();
-    case USERNAME:
-      return isSetUsername();
-    case PASSWORD:
-      return isSetPassword();
-    case CONFIGURATION:
-      return isSetConfiguration();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TOpenSessionReq)
-      return this.equals((TOpenSessionReq)that);
-    return false;
-  }
-
-  public boolean equals(TOpenSessionReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_client_protocol = true && this.isSetClient_protocol();
-    boolean that_present_client_protocol = true && that.isSetClient_protocol();
-    if (this_present_client_protocol || that_present_client_protocol) {
-      if (!(this_present_client_protocol && that_present_client_protocol))
-        return false;
-      if (!this.client_protocol.equals(that.client_protocol))
-        return false;
-    }
-
-    boolean this_present_username = true && this.isSetUsername();
-    boolean that_present_username = true && that.isSetUsername();
-    if (this_present_username || that_present_username) {
-      if (!(this_present_username && that_present_username))
-        return false;
-      if (!this.username.equals(that.username))
-        return false;
-    }
-
-    boolean this_present_password = true && this.isSetPassword();
-    boolean that_present_password = true && that.isSetPassword();
-    if (this_present_password || that_present_password) {
-      if (!(this_present_password && that_present_password))
-        return false;
-      if (!this.password.equals(that.password))
-        return false;
-    }
-
-    boolean this_present_configuration = true && this.isSetConfiguration();
-    boolean that_present_configuration = true && that.isSetConfiguration();
-    if (this_present_configuration || that_present_configuration) {
-      if (!(this_present_configuration && that_present_configuration))
-        return false;
-      if (!this.configuration.equals(that.configuration))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_client_protocol = true && (isSetClient_protocol());
-    builder.append(present_client_protocol);
-    if (present_client_protocol)
-      builder.append(client_protocol.getValue());
-
-    boolean present_username = true && (isSetUsername());
-    builder.append(present_username);
-    if (present_username)
-      builder.append(username);
-
-    boolean present_password = true && (isSetPassword());
-    builder.append(present_password);
-    if (present_password)
-      builder.append(password);
-
-    boolean present_configuration = true && (isSetConfiguration());
-    builder.append(present_configuration);
-    if (present_configuration)
-      builder.append(configuration);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TOpenSessionReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TOpenSessionReq typedOther = (TOpenSessionReq)other;
-
-    lastComparison = Boolean.valueOf(isSetClient_protocol()).compareTo(typedOther.isSetClient_protocol());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetClient_protocol()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.client_protocol, typedOther.client_protocol);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetUsername()).compareTo(typedOther.isSetUsername());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetUsername()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.username, typedOther.username);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetPassword()).compareTo(typedOther.isSetPassword());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetPassword()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.password, typedOther.password);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetConfiguration()).compareTo(typedOther.isSetConfiguration());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetConfiguration()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.configuration, typedOther.configuration);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TOpenSessionReq(");
-    boolean first = true;
-
-    sb.append("client_protocol:");
-    if (this.client_protocol == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.client_protocol);
-    }
-    first = false;
-    if (isSetUsername()) {
-      if (!first) sb.append(", ");
-      sb.append("username:");
-      if (this.username == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.username);
-      }
-      first = false;
-    }
-    if (isSetPassword()) {
-      if (!first) sb.append(", ");
-      sb.append("password:");
-      if (this.password == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.password);
-      }
-      first = false;
-    }
-    if (isSetConfiguration()) {
-      if (!first) sb.append(", ");
-      sb.append("configuration:");
-      if (this.configuration == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.configuration);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetClient_protocol()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'client_protocol' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TOpenSessionReqStandardSchemeFactory implements SchemeFactory {
-    public TOpenSessionReqStandardScheme getScheme() {
-      return new TOpenSessionReqStandardScheme();
-    }
-  }
-
-  private static class TOpenSessionReqStandardScheme extends StandardScheme<TOpenSessionReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TOpenSessionReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // CLIENT_PROTOCOL
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.client_protocol = TProtocolVersion.findByValue(iprot.readI32());
-              struct.setClient_protocolIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // USERNAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.username = iprot.readString();
-              struct.setUsernameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // PASSWORD
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.password = iprot.readString();
-              struct.setPasswordIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 4: // CONFIGURATION
-            if (schemeField.type == org.apache.thrift.protocol.TType.MAP) {
-              {
-                org.apache.thrift.protocol.TMap _map142 = iprot.readMapBegin();
-                struct.configuration = new HashMap<String,String>(2*_map142.size);
-                for (int _i143 = 0; _i143 < _map142.size; ++_i143)
-                {
-                  String _key144; // required
-                  String _val145; // required
-                  _key144 = iprot.readString();
-                  _val145 = iprot.readString();
-                  struct.configuration.put(_key144, _val145);
-                }
-                iprot.readMapEnd();
-              }
-              struct.setConfigurationIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TOpenSessionReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.client_protocol != null) {
-        oprot.writeFieldBegin(CLIENT_PROTOCOL_FIELD_DESC);
-        oprot.writeI32(struct.client_protocol.getValue());
-        oprot.writeFieldEnd();
-      }
-      if (struct.username != null) {
-        if (struct.isSetUsername()) {
-          oprot.writeFieldBegin(USERNAME_FIELD_DESC);
-          oprot.writeString(struct.username);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.password != null) {
-        if (struct.isSetPassword()) {
-          oprot.writeFieldBegin(PASSWORD_FIELD_DESC);
-          oprot.writeString(struct.password);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.configuration != null) {
-        if (struct.isSetConfiguration()) {
-          oprot.writeFieldBegin(CONFIGURATION_FIELD_DESC);
-          {
-            oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, struct.configuration.size()));
-            for (Map.Entry<String, String> _iter146 : struct.configuration.entrySet())
-            {
-              oprot.writeString(_iter146.getKey());
-              oprot.writeString(_iter146.getValue());
-            }
-            oprot.writeMapEnd();
-          }
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TOpenSessionReqTupleSchemeFactory implements SchemeFactory {
-    public TOpenSessionReqTupleScheme getScheme() {
-      return new TOpenSessionReqTupleScheme();
-    }
-  }
-
-  private static class TOpenSessionReqTupleScheme extends TupleScheme<TOpenSessionReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TOpenSessionReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      oprot.writeI32(struct.client_protocol.getValue());
-      BitSet optionals = new BitSet();
-      if (struct.isSetUsername()) {
-        optionals.set(0);
-      }
-      if (struct.isSetPassword()) {
-        optionals.set(1);
-      }
-      if (struct.isSetConfiguration()) {
-        optionals.set(2);
-      }
-      oprot.writeBitSet(optionals, 3);
-      if (struct.isSetUsername()) {
-        oprot.writeString(struct.username);
-      }
-      if (struct.isSetPassword()) {
-        oprot.writeString(struct.password);
-      }
-      if (struct.isSetConfiguration()) {
-        {
-          oprot.writeI32(struct.configuration.size());
-          for (Map.Entry<String, String> _iter147 : struct.configuration.entrySet())
-          {
-            oprot.writeString(_iter147.getKey());
-            oprot.writeString(_iter147.getValue());
-          }
-        }
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TOpenSessionReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.client_protocol = TProtocolVersion.findByValue(iprot.readI32());
-      struct.setClient_protocolIsSet(true);
-      BitSet incoming = iprot.readBitSet(3);
-      if (incoming.get(0)) {
-        struct.username = iprot.readString();
-        struct.setUsernameIsSet(true);
-      }
-      if (incoming.get(1)) {
-        struct.password = iprot.readString();
-        struct.setPasswordIsSet(true);
-      }
-      if (incoming.get(2)) {
-        {
-          org.apache.thrift.protocol.TMap _map148 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, iprot.readI32());
-          struct.configuration = new HashMap<String,String>(2*_map148.size);
-          for (int _i149 = 0; _i149 < _map148.size; ++_i149)
-          {
-            String _key150; // required
-            String _val151; // required
-            _key150 = iprot.readString();
-            _val151 = iprot.readString();
-            struct.configuration.put(_key150, _val151);
-          }
-        }
-        struct.setConfigurationIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionResp.java
deleted file mode 100644
index 351f78b2de20c..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOpenSessionResp.java
+++ /dev/null
@@ -1,790 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TOpenSessionResp implements org.apache.thrift.TBase<TOpenSessionResp, TOpenSessionResp._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TOpenSessionResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField SERVER_PROTOCOL_VERSION_FIELD_DESC = new org.apache.thrift.protocol.TField("serverProtocolVersion", org.apache.thrift.protocol.TType.I32, (short)2);
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)3);
-  private static final org.apache.thrift.protocol.TField CONFIGURATION_FIELD_DESC = new org.apache.thrift.protocol.TField("configuration", org.apache.thrift.protocol.TType.MAP, (short)4);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TOpenSessionRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TOpenSessionRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private TProtocolVersion serverProtocolVersion; // required
-  private TSessionHandle sessionHandle; // optional
-  private Map<String,String> configuration; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    /**
-     * 
-     * @see TProtocolVersion
-     */
-    SERVER_PROTOCOL_VERSION((short)2, "serverProtocolVersion"),
-    SESSION_HANDLE((short)3, "sessionHandle"),
-    CONFIGURATION((short)4, "configuration");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // SERVER_PROTOCOL_VERSION
-          return SERVER_PROTOCOL_VERSION;
-        case 3: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        case 4: // CONFIGURATION
-          return CONFIGURATION;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private _Fields optionals[] = {_Fields.SESSION_HANDLE,_Fields.CONFIGURATION};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.SERVER_PROTOCOL_VERSION, new org.apache.thrift.meta_data.FieldMetaData("serverProtocolVersion", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TProtocolVersion.class)));
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    tmpMap.put(_Fields.CONFIGURATION, new org.apache.thrift.meta_data.FieldMetaData("configuration", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TOpenSessionResp.class, metaDataMap);
-  }
-
-  public TOpenSessionResp() {
-    this.serverProtocolVersion = org.apache.hive.service.cli.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V8;
-
-  }
-
-  public TOpenSessionResp(
-    TStatus status,
-    TProtocolVersion serverProtocolVersion)
-  {
-    this();
-    this.status = status;
-    this.serverProtocolVersion = serverProtocolVersion;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TOpenSessionResp(TOpenSessionResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetServerProtocolVersion()) {
-      this.serverProtocolVersion = other.serverProtocolVersion;
-    }
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-    if (other.isSetConfiguration()) {
-      Map<String,String> __this__configuration = new HashMap<String,String>();
-      for (Map.Entry<String, String> other_element : other.configuration.entrySet()) {
-
-        String other_element_key = other_element.getKey();
-        String other_element_value = other_element.getValue();
-
-        String __this__configuration_copy_key = other_element_key;
-
-        String __this__configuration_copy_value = other_element_value;
-
-        __this__configuration.put(__this__configuration_copy_key, __this__configuration_copy_value);
-      }
-      this.configuration = __this__configuration;
-    }
-  }
-
-  public TOpenSessionResp deepCopy() {
-    return new TOpenSessionResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.serverProtocolVersion = org.apache.hive.service.cli.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V8;
-
-    this.sessionHandle = null;
-    this.configuration = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  /**
-   * 
-   * @see TProtocolVersion
-   */
-  public TProtocolVersion getServerProtocolVersion() {
-    return this.serverProtocolVersion;
-  }
-
-  /**
-   * 
-   * @see TProtocolVersion
-   */
-  public void setServerProtocolVersion(TProtocolVersion serverProtocolVersion) {
-    this.serverProtocolVersion = serverProtocolVersion;
-  }
-
-  public void unsetServerProtocolVersion() {
-    this.serverProtocolVersion = null;
-  }
-
-  /** Returns true if field serverProtocolVersion is set (has been assigned a value) and false otherwise */
-  public boolean isSetServerProtocolVersion() {
-    return this.serverProtocolVersion != null;
-  }
-
-  public void setServerProtocolVersionIsSet(boolean value) {
-    if (!value) {
-      this.serverProtocolVersion = null;
-    }
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public int getConfigurationSize() {
-    return (this.configuration == null) ? 0 : this.configuration.size();
-  }
-
-  public void putToConfiguration(String key, String val) {
-    if (this.configuration == null) {
-      this.configuration = new HashMap<String,String>();
-    }
-    this.configuration.put(key, val);
-  }
-
-  public Map<String,String> getConfiguration() {
-    return this.configuration;
-  }
-
-  public void setConfiguration(Map<String,String> configuration) {
-    this.configuration = configuration;
-  }
-
-  public void unsetConfiguration() {
-    this.configuration = null;
-  }
-
-  /** Returns true if field configuration is set (has been assigned a value) and false otherwise */
-  public boolean isSetConfiguration() {
-    return this.configuration != null;
-  }
-
-  public void setConfigurationIsSet(boolean value) {
-    if (!value) {
-      this.configuration = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case SERVER_PROTOCOL_VERSION:
-      if (value == null) {
-        unsetServerProtocolVersion();
-      } else {
-        setServerProtocolVersion((TProtocolVersion)value);
-      }
-      break;
-
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    case CONFIGURATION:
-      if (value == null) {
-        unsetConfiguration();
-      } else {
-        setConfiguration((Map<String,String>)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case SERVER_PROTOCOL_VERSION:
-      return getServerProtocolVersion();
-
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    case CONFIGURATION:
-      return getConfiguration();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case SERVER_PROTOCOL_VERSION:
-      return isSetServerProtocolVersion();
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    case CONFIGURATION:
-      return isSetConfiguration();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TOpenSessionResp)
-      return this.equals((TOpenSessionResp)that);
-    return false;
-  }
-
-  public boolean equals(TOpenSessionResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_serverProtocolVersion = true && this.isSetServerProtocolVersion();
-    boolean that_present_serverProtocolVersion = true && that.isSetServerProtocolVersion();
-    if (this_present_serverProtocolVersion || that_present_serverProtocolVersion) {
-      if (!(this_present_serverProtocolVersion && that_present_serverProtocolVersion))
-        return false;
-      if (!this.serverProtocolVersion.equals(that.serverProtocolVersion))
-        return false;
-    }
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    boolean this_present_configuration = true && this.isSetConfiguration();
-    boolean that_present_configuration = true && that.isSetConfiguration();
-    if (this_present_configuration || that_present_configuration) {
-      if (!(this_present_configuration && that_present_configuration))
-        return false;
-      if (!this.configuration.equals(that.configuration))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_status = true && (isSetStatus());
-    builder.append(present_status);
-    if (present_status)
-      builder.append(status);
-
-    boolean present_serverProtocolVersion = true && (isSetServerProtocolVersion());
-    builder.append(present_serverProtocolVersion);
-    if (present_serverProtocolVersion)
-      builder.append(serverProtocolVersion.getValue());
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    builder.append(present_sessionHandle);
-    if (present_sessionHandle)
-      builder.append(sessionHandle);
-
-    boolean present_configuration = true && (isSetConfiguration());
-    builder.append(present_configuration);
-    if (present_configuration)
-      builder.append(configuration);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TOpenSessionResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TOpenSessionResp typedOther = (TOpenSessionResp)other;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetServerProtocolVersion()).compareTo(typedOther.isSetServerProtocolVersion());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetServerProtocolVersion()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.serverProtocolVersion, typedOther.serverProtocolVersion);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetConfiguration()).compareTo(typedOther.isSetConfiguration());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetConfiguration()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.configuration, typedOther.configuration);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TOpenSessionResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("serverProtocolVersion:");
-    if (this.serverProtocolVersion == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.serverProtocolVersion);
-    }
-    first = false;
-    if (isSetSessionHandle()) {
-      if (!first) sb.append(", ");
-      sb.append("sessionHandle:");
-      if (this.sessionHandle == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.sessionHandle);
-      }
-      first = false;
-    }
-    if (isSetConfiguration()) {
-      if (!first) sb.append(", ");
-      sb.append("configuration:");
-      if (this.configuration == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.configuration);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    if (!isSetServerProtocolVersion()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'serverProtocolVersion' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TOpenSessionRespStandardSchemeFactory implements SchemeFactory {
-    public TOpenSessionRespStandardScheme getScheme() {
-      return new TOpenSessionRespStandardScheme();
-    }
-  }
-
-  private static class TOpenSessionRespStandardScheme extends StandardScheme<TOpenSessionResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TOpenSessionResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // SERVER_PROTOCOL_VERSION
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.serverProtocolVersion = TProtocolVersion.findByValue(iprot.readI32());
-              struct.setServerProtocolVersionIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 4: // CONFIGURATION
-            if (schemeField.type == org.apache.thrift.protocol.TType.MAP) {
-              {
-                org.apache.thrift.protocol.TMap _map152 = iprot.readMapBegin();
-                struct.configuration = new HashMap<String,String>(2*_map152.size);
-                for (int _i153 = 0; _i153 < _map152.size; ++_i153)
-                {
-                  String _key154; // required
-                  String _val155; // required
-                  _key154 = iprot.readString();
-                  _val155 = iprot.readString();
-                  struct.configuration.put(_key154, _val155);
-                }
-                iprot.readMapEnd();
-              }
-              struct.setConfigurationIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TOpenSessionResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.serverProtocolVersion != null) {
-        oprot.writeFieldBegin(SERVER_PROTOCOL_VERSION_FIELD_DESC);
-        oprot.writeI32(struct.serverProtocolVersion.getValue());
-        oprot.writeFieldEnd();
-      }
-      if (struct.sessionHandle != null) {
-        if (struct.isSetSessionHandle()) {
-          oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-          struct.sessionHandle.write(oprot);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.configuration != null) {
-        if (struct.isSetConfiguration()) {
-          oprot.writeFieldBegin(CONFIGURATION_FIELD_DESC);
-          {
-            oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, struct.configuration.size()));
-            for (Map.Entry<String, String> _iter156 : struct.configuration.entrySet())
-            {
-              oprot.writeString(_iter156.getKey());
-              oprot.writeString(_iter156.getValue());
-            }
-            oprot.writeMapEnd();
-          }
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TOpenSessionRespTupleSchemeFactory implements SchemeFactory {
-    public TOpenSessionRespTupleScheme getScheme() {
-      return new TOpenSessionRespTupleScheme();
-    }
-  }
-
-  private static class TOpenSessionRespTupleScheme extends TupleScheme<TOpenSessionResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TOpenSessionResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      oprot.writeI32(struct.serverProtocolVersion.getValue());
-      BitSet optionals = new BitSet();
-      if (struct.isSetSessionHandle()) {
-        optionals.set(0);
-      }
-      if (struct.isSetConfiguration()) {
-        optionals.set(1);
-      }
-      oprot.writeBitSet(optionals, 2);
-      if (struct.isSetSessionHandle()) {
-        struct.sessionHandle.write(oprot);
-      }
-      if (struct.isSetConfiguration()) {
-        {
-          oprot.writeI32(struct.configuration.size());
-          for (Map.Entry<String, String> _iter157 : struct.configuration.entrySet())
-          {
-            oprot.writeString(_iter157.getKey());
-            oprot.writeString(_iter157.getValue());
-          }
-        }
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TOpenSessionResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      struct.serverProtocolVersion = TProtocolVersion.findByValue(iprot.readI32());
-      struct.setServerProtocolVersionIsSet(true);
-      BitSet incoming = iprot.readBitSet(2);
-      if (incoming.get(0)) {
-        struct.sessionHandle = new TSessionHandle();
-        struct.sessionHandle.read(iprot);
-        struct.setSessionHandleIsSet(true);
-      }
-      if (incoming.get(1)) {
-        {
-          org.apache.thrift.protocol.TMap _map158 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, iprot.readI32());
-          struct.configuration = new HashMap<String,String>(2*_map158.size);
-          for (int _i159 = 0; _i159 < _map158.size; ++_i159)
-          {
-            String _key160; // required
-            String _val161; // required
-            _key160 = iprot.readString();
-            _val161 = iprot.readString();
-            struct.configuration.put(_key160, _val161);
-          }
-        }
-        struct.setConfigurationIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationHandle.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationHandle.java
deleted file mode 100644
index 8fbd8752eaca6..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationHandle.java
+++ /dev/null
@@ -1,705 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TOperationHandle implements org.apache.thrift.TBase<TOperationHandle, TOperationHandle._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TOperationHandle");
-
-  private static final org.apache.thrift.protocol.TField OPERATION_ID_FIELD_DESC = new org.apache.thrift.protocol.TField("operationId", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField OPERATION_TYPE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationType", org.apache.thrift.protocol.TType.I32, (short)2);
-  private static final org.apache.thrift.protocol.TField HAS_RESULT_SET_FIELD_DESC = new org.apache.thrift.protocol.TField("hasResultSet", org.apache.thrift.protocol.TType.BOOL, (short)3);
-  private static final org.apache.thrift.protocol.TField MODIFIED_ROW_COUNT_FIELD_DESC = new org.apache.thrift.protocol.TField("modifiedRowCount", org.apache.thrift.protocol.TType.DOUBLE, (short)4);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TOperationHandleStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TOperationHandleTupleSchemeFactory());
-  }
-
-  private THandleIdentifier operationId; // required
-  private TOperationType operationType; // required
-  private boolean hasResultSet; // required
-  private double modifiedRowCount; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    OPERATION_ID((short)1, "operationId"),
-    /**
-     * 
-     * @see TOperationType
-     */
-    OPERATION_TYPE((short)2, "operationType"),
-    HAS_RESULT_SET((short)3, "hasResultSet"),
-    MODIFIED_ROW_COUNT((short)4, "modifiedRowCount");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // OPERATION_ID
-          return OPERATION_ID;
-        case 2: // OPERATION_TYPE
-          return OPERATION_TYPE;
-        case 3: // HAS_RESULT_SET
-          return HAS_RESULT_SET;
-        case 4: // MODIFIED_ROW_COUNT
-          return MODIFIED_ROW_COUNT;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __HASRESULTSET_ISSET_ID = 0;
-  private static final int __MODIFIEDROWCOUNT_ISSET_ID = 1;
-  private byte __isset_bitfield = 0;
-  private _Fields optionals[] = {_Fields.MODIFIED_ROW_COUNT};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.OPERATION_ID, new org.apache.thrift.meta_data.FieldMetaData("operationId", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, THandleIdentifier.class)));
-    tmpMap.put(_Fields.OPERATION_TYPE, new org.apache.thrift.meta_data.FieldMetaData("operationType", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TOperationType.class)));
-    tmpMap.put(_Fields.HAS_RESULT_SET, new org.apache.thrift.meta_data.FieldMetaData("hasResultSet", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL)));
-    tmpMap.put(_Fields.MODIFIED_ROW_COUNT, new org.apache.thrift.meta_data.FieldMetaData("modifiedRowCount", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.DOUBLE)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TOperationHandle.class, metaDataMap);
-  }
-
-  public TOperationHandle() {
-  }
-
-  public TOperationHandle(
-    THandleIdentifier operationId,
-    TOperationType operationType,
-    boolean hasResultSet)
-  {
-    this();
-    this.operationId = operationId;
-    this.operationType = operationType;
-    this.hasResultSet = hasResultSet;
-    setHasResultSetIsSet(true);
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TOperationHandle(TOperationHandle other) {
-    __isset_bitfield = other.__isset_bitfield;
-    if (other.isSetOperationId()) {
-      this.operationId = new THandleIdentifier(other.operationId);
-    }
-    if (other.isSetOperationType()) {
-      this.operationType = other.operationType;
-    }
-    this.hasResultSet = other.hasResultSet;
-    this.modifiedRowCount = other.modifiedRowCount;
-  }
-
-  public TOperationHandle deepCopy() {
-    return new TOperationHandle(this);
-  }
-
-  @Override
-  public void clear() {
-    this.operationId = null;
-    this.operationType = null;
-    setHasResultSetIsSet(false);
-    this.hasResultSet = false;
-    setModifiedRowCountIsSet(false);
-    this.modifiedRowCount = 0.0;
-  }
-
-  public THandleIdentifier getOperationId() {
-    return this.operationId;
-  }
-
-  public void setOperationId(THandleIdentifier operationId) {
-    this.operationId = operationId;
-  }
-
-  public void unsetOperationId() {
-    this.operationId = null;
-  }
-
-  /** Returns true if field operationId is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationId() {
-    return this.operationId != null;
-  }
-
-  public void setOperationIdIsSet(boolean value) {
-    if (!value) {
-      this.operationId = null;
-    }
-  }
-
-  /**
-   * 
-   * @see TOperationType
-   */
-  public TOperationType getOperationType() {
-    return this.operationType;
-  }
-
-  /**
-   * 
-   * @see TOperationType
-   */
-  public void setOperationType(TOperationType operationType) {
-    this.operationType = operationType;
-  }
-
-  public void unsetOperationType() {
-    this.operationType = null;
-  }
-
-  /** Returns true if field operationType is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationType() {
-    return this.operationType != null;
-  }
-
-  public void setOperationTypeIsSet(boolean value) {
-    if (!value) {
-      this.operationType = null;
-    }
-  }
-
-  public boolean isHasResultSet() {
-    return this.hasResultSet;
-  }
-
-  public void setHasResultSet(boolean hasResultSet) {
-    this.hasResultSet = hasResultSet;
-    setHasResultSetIsSet(true);
-  }
-
-  public void unsetHasResultSet() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __HASRESULTSET_ISSET_ID);
-  }
-
-  /** Returns true if field hasResultSet is set (has been assigned a value) and false otherwise */
-  public boolean isSetHasResultSet() {
-    return EncodingUtils.testBit(__isset_bitfield, __HASRESULTSET_ISSET_ID);
-  }
-
-  public void setHasResultSetIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __HASRESULTSET_ISSET_ID, value);
-  }
-
-  public double getModifiedRowCount() {
-    return this.modifiedRowCount;
-  }
-
-  public void setModifiedRowCount(double modifiedRowCount) {
-    this.modifiedRowCount = modifiedRowCount;
-    setModifiedRowCountIsSet(true);
-  }
-
-  public void unsetModifiedRowCount() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __MODIFIEDROWCOUNT_ISSET_ID);
-  }
-
-  /** Returns true if field modifiedRowCount is set (has been assigned a value) and false otherwise */
-  public boolean isSetModifiedRowCount() {
-    return EncodingUtils.testBit(__isset_bitfield, __MODIFIEDROWCOUNT_ISSET_ID);
-  }
-
-  public void setModifiedRowCountIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __MODIFIEDROWCOUNT_ISSET_ID, value);
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case OPERATION_ID:
-      if (value == null) {
-        unsetOperationId();
-      } else {
-        setOperationId((THandleIdentifier)value);
-      }
-      break;
-
-    case OPERATION_TYPE:
-      if (value == null) {
-        unsetOperationType();
-      } else {
-        setOperationType((TOperationType)value);
-      }
-      break;
-
-    case HAS_RESULT_SET:
-      if (value == null) {
-        unsetHasResultSet();
-      } else {
-        setHasResultSet((Boolean)value);
-      }
-      break;
-
-    case MODIFIED_ROW_COUNT:
-      if (value == null) {
-        unsetModifiedRowCount();
-      } else {
-        setModifiedRowCount((Double)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case OPERATION_ID:
-      return getOperationId();
-
-    case OPERATION_TYPE:
-      return getOperationType();
-
-    case HAS_RESULT_SET:
-      return Boolean.valueOf(isHasResultSet());
-
-    case MODIFIED_ROW_COUNT:
-      return Double.valueOf(getModifiedRowCount());
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case OPERATION_ID:
-      return isSetOperationId();
-    case OPERATION_TYPE:
-      return isSetOperationType();
-    case HAS_RESULT_SET:
-      return isSetHasResultSet();
-    case MODIFIED_ROW_COUNT:
-      return isSetModifiedRowCount();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TOperationHandle)
-      return this.equals((TOperationHandle)that);
-    return false;
-  }
-
-  public boolean equals(TOperationHandle that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_operationId = true && this.isSetOperationId();
-    boolean that_present_operationId = true && that.isSetOperationId();
-    if (this_present_operationId || that_present_operationId) {
-      if (!(this_present_operationId && that_present_operationId))
-        return false;
-      if (!this.operationId.equals(that.operationId))
-        return false;
-    }
-
-    boolean this_present_operationType = true && this.isSetOperationType();
-    boolean that_present_operationType = true && that.isSetOperationType();
-    if (this_present_operationType || that_present_operationType) {
-      if (!(this_present_operationType && that_present_operationType))
-        return false;
-      if (!this.operationType.equals(that.operationType))
-        return false;
-    }
-
-    boolean this_present_hasResultSet = true;
-    boolean that_present_hasResultSet = true;
-    if (this_present_hasResultSet || that_present_hasResultSet) {
-      if (!(this_present_hasResultSet && that_present_hasResultSet))
-        return false;
-      if (this.hasResultSet != that.hasResultSet)
-        return false;
-    }
-
-    boolean this_present_modifiedRowCount = true && this.isSetModifiedRowCount();
-    boolean that_present_modifiedRowCount = true && that.isSetModifiedRowCount();
-    if (this_present_modifiedRowCount || that_present_modifiedRowCount) {
-      if (!(this_present_modifiedRowCount && that_present_modifiedRowCount))
-        return false;
-      if (this.modifiedRowCount != that.modifiedRowCount)
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_operationId = true && (isSetOperationId());
-    builder.append(present_operationId);
-    if (present_operationId)
-      builder.append(operationId);
-
-    boolean present_operationType = true && (isSetOperationType());
-    builder.append(present_operationType);
-    if (present_operationType)
-      builder.append(operationType.getValue());
-
-    boolean present_hasResultSet = true;
-    builder.append(present_hasResultSet);
-    if (present_hasResultSet)
-      builder.append(hasResultSet);
-
-    boolean present_modifiedRowCount = true && (isSetModifiedRowCount());
-    builder.append(present_modifiedRowCount);
-    if (present_modifiedRowCount)
-      builder.append(modifiedRowCount);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TOperationHandle other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TOperationHandle typedOther = (TOperationHandle)other;
-
-    lastComparison = Boolean.valueOf(isSetOperationId()).compareTo(typedOther.isSetOperationId());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationId()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationId, typedOther.operationId);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOperationType()).compareTo(typedOther.isSetOperationType());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationType()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationType, typedOther.operationType);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetHasResultSet()).compareTo(typedOther.isSetHasResultSet());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetHasResultSet()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.hasResultSet, typedOther.hasResultSet);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetModifiedRowCount()).compareTo(typedOther.isSetModifiedRowCount());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetModifiedRowCount()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.modifiedRowCount, typedOther.modifiedRowCount);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TOperationHandle(");
-    boolean first = true;
-
-    sb.append("operationId:");
-    if (this.operationId == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.operationId);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("operationType:");
-    if (this.operationType == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.operationType);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("hasResultSet:");
-    sb.append(this.hasResultSet);
-    first = false;
-    if (isSetModifiedRowCount()) {
-      if (!first) sb.append(", ");
-      sb.append("modifiedRowCount:");
-      sb.append(this.modifiedRowCount);
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetOperationId()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationId' is unset! Struct:" + toString());
-    }
-
-    if (!isSetOperationType()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationType' is unset! Struct:" + toString());
-    }
-
-    if (!isSetHasResultSet()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'hasResultSet' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (operationId != null) {
-      operationId.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TOperationHandleStandardSchemeFactory implements SchemeFactory {
-    public TOperationHandleStandardScheme getScheme() {
-      return new TOperationHandleStandardScheme();
-    }
-  }
-
-  private static class TOperationHandleStandardScheme extends StandardScheme<TOperationHandle> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TOperationHandle struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // OPERATION_ID
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationId = new THandleIdentifier();
-              struct.operationId.read(iprot);
-              struct.setOperationIdIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // OPERATION_TYPE
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.operationType = TOperationType.findByValue(iprot.readI32());
-              struct.setOperationTypeIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // HAS_RESULT_SET
-            if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) {
-              struct.hasResultSet = iprot.readBool();
-              struct.setHasResultSetIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 4: // MODIFIED_ROW_COUNT
-            if (schemeField.type == org.apache.thrift.protocol.TType.DOUBLE) {
-              struct.modifiedRowCount = iprot.readDouble();
-              struct.setModifiedRowCountIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TOperationHandle struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.operationId != null) {
-        oprot.writeFieldBegin(OPERATION_ID_FIELD_DESC);
-        struct.operationId.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.operationType != null) {
-        oprot.writeFieldBegin(OPERATION_TYPE_FIELD_DESC);
-        oprot.writeI32(struct.operationType.getValue());
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldBegin(HAS_RESULT_SET_FIELD_DESC);
-      oprot.writeBool(struct.hasResultSet);
-      oprot.writeFieldEnd();
-      if (struct.isSetModifiedRowCount()) {
-        oprot.writeFieldBegin(MODIFIED_ROW_COUNT_FIELD_DESC);
-        oprot.writeDouble(struct.modifiedRowCount);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TOperationHandleTupleSchemeFactory implements SchemeFactory {
-    public TOperationHandleTupleScheme getScheme() {
-      return new TOperationHandleTupleScheme();
-    }
-  }
-
-  private static class TOperationHandleTupleScheme extends TupleScheme<TOperationHandle> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TOperationHandle struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.operationId.write(oprot);
-      oprot.writeI32(struct.operationType.getValue());
-      oprot.writeBool(struct.hasResultSet);
-      BitSet optionals = new BitSet();
-      if (struct.isSetModifiedRowCount()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetModifiedRowCount()) {
-        oprot.writeDouble(struct.modifiedRowCount);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TOperationHandle struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.operationId = new THandleIdentifier();
-      struct.operationId.read(iprot);
-      struct.setOperationIdIsSet(true);
-      struct.operationType = TOperationType.findByValue(iprot.readI32());
-      struct.setOperationTypeIsSet(true);
-      struct.hasResultSet = iprot.readBool();
-      struct.setHasResultSetIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.modifiedRowCount = iprot.readDouble();
-        struct.setModifiedRowCountIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationState.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationState.java
deleted file mode 100644
index 219866223a6b0..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationState.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-
-import java.util.Map;
-import java.util.HashMap;
-import org.apache.thrift.TEnum;
-
-public enum TOperationState implements org.apache.thrift.TEnum {
-  INITIALIZED_STATE(0),
-  RUNNING_STATE(1),
-  FINISHED_STATE(2),
-  CANCELED_STATE(3),
-  CLOSED_STATE(4),
-  ERROR_STATE(5),
-  UKNOWN_STATE(6),
-  PENDING_STATE(7);
-
-  private final int value;
-
-  private TOperationState(int value) {
-    this.value = value;
-  }
-
-  /**
-   * Get the integer value of this enum value, as defined in the Thrift IDL.
-   */
-  public int getValue() {
-    return value;
-  }
-
-  /**
-   * Find a the enum type by its integer value, as defined in the Thrift IDL.
-   * @return null if the value is not found.
-   */
-  public static TOperationState findByValue(int value) { 
-    switch (value) {
-      case 0:
-        return INITIALIZED_STATE;
-      case 1:
-        return RUNNING_STATE;
-      case 2:
-        return FINISHED_STATE;
-      case 3:
-        return CANCELED_STATE;
-      case 4:
-        return CLOSED_STATE;
-      case 5:
-        return ERROR_STATE;
-      case 6:
-        return UKNOWN_STATE;
-      case 7:
-        return PENDING_STATE;
-      default:
-        return null;
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationType.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationType.java
deleted file mode 100644
index b6d4b2fab9f96..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TOperationType.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-
-import java.util.Map;
-import java.util.HashMap;
-import org.apache.thrift.TEnum;
-
-public enum TOperationType implements org.apache.thrift.TEnum {
-  EXECUTE_STATEMENT(0),
-  GET_TYPE_INFO(1),
-  GET_CATALOGS(2),
-  GET_SCHEMAS(3),
-  GET_TABLES(4),
-  GET_TABLE_TYPES(5),
-  GET_COLUMNS(6),
-  GET_FUNCTIONS(7),
-  UNKNOWN(8);
-
-  private final int value;
-
-  private TOperationType(int value) {
-    this.value = value;
-  }
-
-  /**
-   * Get the integer value of this enum value, as defined in the Thrift IDL.
-   */
-  public int getValue() {
-    return value;
-  }
-
-  /**
-   * Find a the enum type by its integer value, as defined in the Thrift IDL.
-   * @return null if the value is not found.
-   */
-  public static TOperationType findByValue(int value) { 
-    switch (value) {
-      case 0:
-        return EXECUTE_STATEMENT;
-      case 1:
-        return GET_TYPE_INFO;
-      case 2:
-        return GET_CATALOGS;
-      case 3:
-        return GET_SCHEMAS;
-      case 4:
-        return GET_TABLES;
-      case 5:
-        return GET_TABLE_TYPES;
-      case 6:
-        return GET_COLUMNS;
-      case 7:
-        return GET_FUNCTIONS;
-      case 8:
-        return UNKNOWN;
-      default:
-        return null;
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TPrimitiveTypeEntry.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TPrimitiveTypeEntry.java
deleted file mode 100644
index 9d2abf2b3b084..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TPrimitiveTypeEntry.java
+++ /dev/null
@@ -1,512 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TPrimitiveTypeEntry implements org.apache.thrift.TBase<TPrimitiveTypeEntry, TPrimitiveTypeEntry._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TPrimitiveTypeEntry");
-
-  private static final org.apache.thrift.protocol.TField TYPE_FIELD_DESC = new org.apache.thrift.protocol.TField("type", org.apache.thrift.protocol.TType.I32, (short)1);
-  private static final org.apache.thrift.protocol.TField TYPE_QUALIFIERS_FIELD_DESC = new org.apache.thrift.protocol.TField("typeQualifiers", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TPrimitiveTypeEntryStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TPrimitiveTypeEntryTupleSchemeFactory());
-  }
-
-  private TTypeId type; // required
-  private TTypeQualifiers typeQualifiers; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    /**
-     * 
-     * @see TTypeId
-     */
-    TYPE((short)1, "type"),
-    TYPE_QUALIFIERS((short)2, "typeQualifiers");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // TYPE
-          return TYPE;
-        case 2: // TYPE_QUALIFIERS
-          return TYPE_QUALIFIERS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private _Fields optionals[] = {_Fields.TYPE_QUALIFIERS};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.TYPE, new org.apache.thrift.meta_data.FieldMetaData("type", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TTypeId.class)));
-    tmpMap.put(_Fields.TYPE_QUALIFIERS, new org.apache.thrift.meta_data.FieldMetaData("typeQualifiers", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TTypeQualifiers.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TPrimitiveTypeEntry.class, metaDataMap);
-  }
-
-  public TPrimitiveTypeEntry() {
-  }
-
-  public TPrimitiveTypeEntry(
-    TTypeId type)
-  {
-    this();
-    this.type = type;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TPrimitiveTypeEntry(TPrimitiveTypeEntry other) {
-    if (other.isSetType()) {
-      this.type = other.type;
-    }
-    if (other.isSetTypeQualifiers()) {
-      this.typeQualifiers = new TTypeQualifiers(other.typeQualifiers);
-    }
-  }
-
-  public TPrimitiveTypeEntry deepCopy() {
-    return new TPrimitiveTypeEntry(this);
-  }
-
-  @Override
-  public void clear() {
-    this.type = null;
-    this.typeQualifiers = null;
-  }
-
-  /**
-   * 
-   * @see TTypeId
-   */
-  public TTypeId getType() {
-    return this.type;
-  }
-
-  /**
-   * 
-   * @see TTypeId
-   */
-  public void setType(TTypeId type) {
-    this.type = type;
-  }
-
-  public void unsetType() {
-    this.type = null;
-  }
-
-  /** Returns true if field type is set (has been assigned a value) and false otherwise */
-  public boolean isSetType() {
-    return this.type != null;
-  }
-
-  public void setTypeIsSet(boolean value) {
-    if (!value) {
-      this.type = null;
-    }
-  }
-
-  public TTypeQualifiers getTypeQualifiers() {
-    return this.typeQualifiers;
-  }
-
-  public void setTypeQualifiers(TTypeQualifiers typeQualifiers) {
-    this.typeQualifiers = typeQualifiers;
-  }
-
-  public void unsetTypeQualifiers() {
-    this.typeQualifiers = null;
-  }
-
-  /** Returns true if field typeQualifiers is set (has been assigned a value) and false otherwise */
-  public boolean isSetTypeQualifiers() {
-    return this.typeQualifiers != null;
-  }
-
-  public void setTypeQualifiersIsSet(boolean value) {
-    if (!value) {
-      this.typeQualifiers = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case TYPE:
-      if (value == null) {
-        unsetType();
-      } else {
-        setType((TTypeId)value);
-      }
-      break;
-
-    case TYPE_QUALIFIERS:
-      if (value == null) {
-        unsetTypeQualifiers();
-      } else {
-        setTypeQualifiers((TTypeQualifiers)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case TYPE:
-      return getType();
-
-    case TYPE_QUALIFIERS:
-      return getTypeQualifiers();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case TYPE:
-      return isSetType();
-    case TYPE_QUALIFIERS:
-      return isSetTypeQualifiers();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TPrimitiveTypeEntry)
-      return this.equals((TPrimitiveTypeEntry)that);
-    return false;
-  }
-
-  public boolean equals(TPrimitiveTypeEntry that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_type = true && this.isSetType();
-    boolean that_present_type = true && that.isSetType();
-    if (this_present_type || that_present_type) {
-      if (!(this_present_type && that_present_type))
-        return false;
-      if (!this.type.equals(that.type))
-        return false;
-    }
-
-    boolean this_present_typeQualifiers = true && this.isSetTypeQualifiers();
-    boolean that_present_typeQualifiers = true && that.isSetTypeQualifiers();
-    if (this_present_typeQualifiers || that_present_typeQualifiers) {
-      if (!(this_present_typeQualifiers && that_present_typeQualifiers))
-        return false;
-      if (!this.typeQualifiers.equals(that.typeQualifiers))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_type = true && (isSetType());
-    builder.append(present_type);
-    if (present_type)
-      builder.append(type.getValue());
-
-    boolean present_typeQualifiers = true && (isSetTypeQualifiers());
-    builder.append(present_typeQualifiers);
-    if (present_typeQualifiers)
-      builder.append(typeQualifiers);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TPrimitiveTypeEntry other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TPrimitiveTypeEntry typedOther = (TPrimitiveTypeEntry)other;
-
-    lastComparison = Boolean.valueOf(isSetType()).compareTo(typedOther.isSetType());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetType()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.type, typedOther.type);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetTypeQualifiers()).compareTo(typedOther.isSetTypeQualifiers());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetTypeQualifiers()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.typeQualifiers, typedOther.typeQualifiers);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TPrimitiveTypeEntry(");
-    boolean first = true;
-
-    sb.append("type:");
-    if (this.type == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.type);
-    }
-    first = false;
-    if (isSetTypeQualifiers()) {
-      if (!first) sb.append(", ");
-      sb.append("typeQualifiers:");
-      if (this.typeQualifiers == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.typeQualifiers);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetType()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'type' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (typeQualifiers != null) {
-      typeQualifiers.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TPrimitiveTypeEntryStandardSchemeFactory implements SchemeFactory {
-    public TPrimitiveTypeEntryStandardScheme getScheme() {
-      return new TPrimitiveTypeEntryStandardScheme();
-    }
-  }
-
-  private static class TPrimitiveTypeEntryStandardScheme extends StandardScheme<TPrimitiveTypeEntry> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TPrimitiveTypeEntry struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // TYPE
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.type = TTypeId.findByValue(iprot.readI32());
-              struct.setTypeIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // TYPE_QUALIFIERS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.typeQualifiers = new TTypeQualifiers();
-              struct.typeQualifiers.read(iprot);
-              struct.setTypeQualifiersIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TPrimitiveTypeEntry struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.type != null) {
-        oprot.writeFieldBegin(TYPE_FIELD_DESC);
-        oprot.writeI32(struct.type.getValue());
-        oprot.writeFieldEnd();
-      }
-      if (struct.typeQualifiers != null) {
-        if (struct.isSetTypeQualifiers()) {
-          oprot.writeFieldBegin(TYPE_QUALIFIERS_FIELD_DESC);
-          struct.typeQualifiers.write(oprot);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TPrimitiveTypeEntryTupleSchemeFactory implements SchemeFactory {
-    public TPrimitiveTypeEntryTupleScheme getScheme() {
-      return new TPrimitiveTypeEntryTupleScheme();
-    }
-  }
-
-  private static class TPrimitiveTypeEntryTupleScheme extends TupleScheme<TPrimitiveTypeEntry> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TPrimitiveTypeEntry struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      oprot.writeI32(struct.type.getValue());
-      BitSet optionals = new BitSet();
-      if (struct.isSetTypeQualifiers()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetTypeQualifiers()) {
-        struct.typeQualifiers.write(oprot);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TPrimitiveTypeEntry struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.type = TTypeId.findByValue(iprot.readI32());
-      struct.setTypeIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.typeQualifiers = new TTypeQualifiers();
-        struct.typeQualifiers.read(iprot);
-        struct.setTypeQualifiersIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TProtocolVersion.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TProtocolVersion.java
deleted file mode 100644
index a4279d29f662e..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TProtocolVersion.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-
-import java.util.Map;
-import java.util.HashMap;
-import org.apache.thrift.TEnum;
-
-public enum TProtocolVersion implements org.apache.thrift.TEnum {
-  HIVE_CLI_SERVICE_PROTOCOL_V1(0),
-  HIVE_CLI_SERVICE_PROTOCOL_V2(1),
-  HIVE_CLI_SERVICE_PROTOCOL_V3(2),
-  HIVE_CLI_SERVICE_PROTOCOL_V4(3),
-  HIVE_CLI_SERVICE_PROTOCOL_V5(4),
-  HIVE_CLI_SERVICE_PROTOCOL_V6(5),
-  HIVE_CLI_SERVICE_PROTOCOL_V7(6),
-  HIVE_CLI_SERVICE_PROTOCOL_V8(7);
-
-  private final int value;
-
-  private TProtocolVersion(int value) {
-    this.value = value;
-  }
-
-  /**
-   * Get the integer value of this enum value, as defined in the Thrift IDL.
-   */
-  public int getValue() {
-    return value;
-  }
-
-  /**
-   * Find a the enum type by its integer value, as defined in the Thrift IDL.
-   * @return null if the value is not found.
-   */
-  public static TProtocolVersion findByValue(int value) { 
-    switch (value) {
-      case 0:
-        return HIVE_CLI_SERVICE_PROTOCOL_V1;
-      case 1:
-        return HIVE_CLI_SERVICE_PROTOCOL_V2;
-      case 2:
-        return HIVE_CLI_SERVICE_PROTOCOL_V3;
-      case 3:
-        return HIVE_CLI_SERVICE_PROTOCOL_V4;
-      case 4:
-        return HIVE_CLI_SERVICE_PROTOCOL_V5;
-      case 5:
-        return HIVE_CLI_SERVICE_PROTOCOL_V6;
-      case 6:
-        return HIVE_CLI_SERVICE_PROTOCOL_V7;
-      case 7:
-        return HIVE_CLI_SERVICE_PROTOCOL_V8;
-      default:
-        return null;
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenReq.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenReq.java
deleted file mode 100644
index a3e39c8cdf321..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenReq.java
+++ /dev/null
@@ -1,491 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TRenewDelegationTokenReq implements org.apache.thrift.TBase<TRenewDelegationTokenReq, TRenewDelegationTokenReq._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TRenewDelegationTokenReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField DELEGATION_TOKEN_FIELD_DESC = new org.apache.thrift.protocol.TField("delegationToken", org.apache.thrift.protocol.TType.STRING, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TRenewDelegationTokenReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TRenewDelegationTokenReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-  private String delegationToken; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle"),
-    DELEGATION_TOKEN((short)2, "delegationToken");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        case 2: // DELEGATION_TOKEN
-          return DELEGATION_TOKEN;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    tmpMap.put(_Fields.DELEGATION_TOKEN, new org.apache.thrift.meta_data.FieldMetaData("delegationToken", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TRenewDelegationTokenReq.class, metaDataMap);
-  }
-
-  public TRenewDelegationTokenReq() {
-  }
-
-  public TRenewDelegationTokenReq(
-    TSessionHandle sessionHandle,
-    String delegationToken)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-    this.delegationToken = delegationToken;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TRenewDelegationTokenReq(TRenewDelegationTokenReq other) {
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-    if (other.isSetDelegationToken()) {
-      this.delegationToken = other.delegationToken;
-    }
-  }
-
-  public TRenewDelegationTokenReq deepCopy() {
-    return new TRenewDelegationTokenReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-    this.delegationToken = null;
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public String getDelegationToken() {
-    return this.delegationToken;
-  }
-
-  public void setDelegationToken(String delegationToken) {
-    this.delegationToken = delegationToken;
-  }
-
-  public void unsetDelegationToken() {
-    this.delegationToken = null;
-  }
-
-  /** Returns true if field delegationToken is set (has been assigned a value) and false otherwise */
-  public boolean isSetDelegationToken() {
-    return this.delegationToken != null;
-  }
-
-  public void setDelegationTokenIsSet(boolean value) {
-    if (!value) {
-      this.delegationToken = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    case DELEGATION_TOKEN:
-      if (value == null) {
-        unsetDelegationToken();
-      } else {
-        setDelegationToken((String)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    case DELEGATION_TOKEN:
-      return getDelegationToken();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    case DELEGATION_TOKEN:
-      return isSetDelegationToken();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TRenewDelegationTokenReq)
-      return this.equals((TRenewDelegationTokenReq)that);
-    return false;
-  }
-
-  public boolean equals(TRenewDelegationTokenReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    boolean this_present_delegationToken = true && this.isSetDelegationToken();
-    boolean that_present_delegationToken = true && that.isSetDelegationToken();
-    if (this_present_delegationToken || that_present_delegationToken) {
-      if (!(this_present_delegationToken && that_present_delegationToken))
-        return false;
-      if (!this.delegationToken.equals(that.delegationToken))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    builder.append(present_sessionHandle);
-    if (present_sessionHandle)
-      builder.append(sessionHandle);
-
-    boolean present_delegationToken = true && (isSetDelegationToken());
-    builder.append(present_delegationToken);
-    if (present_delegationToken)
-      builder.append(delegationToken);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TRenewDelegationTokenReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TRenewDelegationTokenReq typedOther = (TRenewDelegationTokenReq)other;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(typedOther.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, typedOther.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetDelegationToken()).compareTo(typedOther.isSetDelegationToken());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetDelegationToken()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.delegationToken, typedOther.delegationToken);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TRenewDelegationTokenReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("delegationToken:");
-    if (this.delegationToken == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.delegationToken);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    if (!isSetDelegationToken()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'delegationToken' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TRenewDelegationTokenReqStandardSchemeFactory implements SchemeFactory {
-    public TRenewDelegationTokenReqStandardScheme getScheme() {
-      return new TRenewDelegationTokenReqStandardScheme();
-    }
-  }
-
-  private static class TRenewDelegationTokenReqStandardScheme extends StandardScheme<TRenewDelegationTokenReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TRenewDelegationTokenReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // DELEGATION_TOKEN
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.delegationToken = iprot.readString();
-              struct.setDelegationTokenIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TRenewDelegationTokenReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.delegationToken != null) {
-        oprot.writeFieldBegin(DELEGATION_TOKEN_FIELD_DESC);
-        oprot.writeString(struct.delegationToken);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TRenewDelegationTokenReqTupleSchemeFactory implements SchemeFactory {
-    public TRenewDelegationTokenReqTupleScheme getScheme() {
-      return new TRenewDelegationTokenReqTupleScheme();
-    }
-  }
-
-  private static class TRenewDelegationTokenReqTupleScheme extends TupleScheme<TRenewDelegationTokenReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TRenewDelegationTokenReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-      oprot.writeString(struct.delegationToken);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TRenewDelegationTokenReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-      struct.delegationToken = iprot.readString();
-      struct.setDelegationTokenIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenResp.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenResp.java
deleted file mode 100644
index 5f3eb6c4d4b90..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRenewDelegationTokenResp.java
+++ /dev/null
@@ -1,390 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TRenewDelegationTokenResp implements org.apache.thrift.TBase<TRenewDelegationTokenResp, TRenewDelegationTokenResp._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TRenewDelegationTokenResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TRenewDelegationTokenRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TRenewDelegationTokenRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TRenewDelegationTokenResp.class, metaDataMap);
-  }
-
-  public TRenewDelegationTokenResp() {
-  }
-
-  public TRenewDelegationTokenResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TRenewDelegationTokenResp(TRenewDelegationTokenResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-  }
-
-  public TRenewDelegationTokenResp deepCopy() {
-    return new TRenewDelegationTokenResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TRenewDelegationTokenResp)
-      return this.equals((TRenewDelegationTokenResp)that);
-    return false;
-  }
-
-  public boolean equals(TRenewDelegationTokenResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_status = true && (isSetStatus());
-    builder.append(present_status);
-    if (present_status)
-      builder.append(status);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TRenewDelegationTokenResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TRenewDelegationTokenResp typedOther = (TRenewDelegationTokenResp)other;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(typedOther.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, typedOther.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TRenewDelegationTokenResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TRenewDelegationTokenRespStandardSchemeFactory implements SchemeFactory {
-    public TRenewDelegationTokenRespStandardScheme getScheme() {
-      return new TRenewDelegationTokenRespStandardScheme();
-    }
-  }
-
-  private static class TRenewDelegationTokenRespStandardScheme extends StandardScheme<TRenewDelegationTokenResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TRenewDelegationTokenResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TRenewDelegationTokenResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TRenewDelegationTokenRespTupleSchemeFactory implements SchemeFactory {
-    public TRenewDelegationTokenRespTupleScheme getScheme() {
-      return new TRenewDelegationTokenRespTupleScheme();
-    }
-  }
-
-  private static class TRenewDelegationTokenRespTupleScheme extends TupleScheme<TRenewDelegationTokenResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TRenewDelegationTokenResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TRenewDelegationTokenResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRow.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRow.java
deleted file mode 100644
index a44cfb08ff01a..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRow.java
+++ /dev/null
@@ -1,439 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TRow implements org.apache.thrift.TBase<TRow, TRow._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TRow");
-
-  private static final org.apache.thrift.protocol.TField COL_VALS_FIELD_DESC = new org.apache.thrift.protocol.TField("colVals", org.apache.thrift.protocol.TType.LIST, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TRowStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TRowTupleSchemeFactory());
-  }
-
-  private List<TColumnValue> colVals; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    COL_VALS((short)1, "colVals");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // COL_VALS
-          return COL_VALS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.COL_VALS, new org.apache.thrift.meta_data.FieldMetaData("colVals", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TColumnValue.class))));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TRow.class, metaDataMap);
-  }
-
-  public TRow() {
-  }
-
-  public TRow(
-    List<TColumnValue> colVals)
-  {
-    this();
-    this.colVals = colVals;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TRow(TRow other) {
-    if (other.isSetColVals()) {
-      List<TColumnValue> __this__colVals = new ArrayList<TColumnValue>();
-      for (TColumnValue other_element : other.colVals) {
-        __this__colVals.add(new TColumnValue(other_element));
-      }
-      this.colVals = __this__colVals;
-    }
-  }
-
-  public TRow deepCopy() {
-    return new TRow(this);
-  }
-
-  @Override
-  public void clear() {
-    this.colVals = null;
-  }
-
-  public int getColValsSize() {
-    return (this.colVals == null) ? 0 : this.colVals.size();
-  }
-
-  public java.util.Iterator<TColumnValue> getColValsIterator() {
-    return (this.colVals == null) ? null : this.colVals.iterator();
-  }
-
-  public void addToColVals(TColumnValue elem) {
-    if (this.colVals == null) {
-      this.colVals = new ArrayList<TColumnValue>();
-    }
-    this.colVals.add(elem);
-  }
-
-  public List<TColumnValue> getColVals() {
-    return this.colVals;
-  }
-
-  public void setColVals(List<TColumnValue> colVals) {
-    this.colVals = colVals;
-  }
-
-  public void unsetColVals() {
-    this.colVals = null;
-  }
-
-  /** Returns true if field colVals is set (has been assigned a value) and false otherwise */
-  public boolean isSetColVals() {
-    return this.colVals != null;
-  }
-
-  public void setColValsIsSet(boolean value) {
-    if (!value) {
-      this.colVals = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case COL_VALS:
-      if (value == null) {
-        unsetColVals();
-      } else {
-        setColVals((List<TColumnValue>)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case COL_VALS:
-      return getColVals();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case COL_VALS:
-      return isSetColVals();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TRow)
-      return this.equals((TRow)that);
-    return false;
-  }
-
-  public boolean equals(TRow that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_colVals = true && this.isSetColVals();
-    boolean that_present_colVals = true && that.isSetColVals();
-    if (this_present_colVals || that_present_colVals) {
-      if (!(this_present_colVals && that_present_colVals))
-        return false;
-      if (!this.colVals.equals(that.colVals))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_colVals = true && (isSetColVals());
-    builder.append(present_colVals);
-    if (present_colVals)
-      builder.append(colVals);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TRow other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TRow typedOther = (TRow)other;
-
-    lastComparison = Boolean.valueOf(isSetColVals()).compareTo(typedOther.isSetColVals());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetColVals()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.colVals, typedOther.colVals);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TRow(");
-    boolean first = true;
-
-    sb.append("colVals:");
-    if (this.colVals == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.colVals);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetColVals()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'colVals' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TRowStandardSchemeFactory implements SchemeFactory {
-    public TRowStandardScheme getScheme() {
-      return new TRowStandardScheme();
-    }
-  }
-
-  private static class TRowStandardScheme extends StandardScheme<TRow> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TRow struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // COL_VALS
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list46 = iprot.readListBegin();
-                struct.colVals = new ArrayList<TColumnValue>(_list46.size);
-                for (int _i47 = 0; _i47 < _list46.size; ++_i47)
-                {
-                  TColumnValue _elem48; // optional
-                  _elem48 = new TColumnValue();
-                  _elem48.read(iprot);
-                  struct.colVals.add(_elem48);
-                }
-                iprot.readListEnd();
-              }
-              struct.setColValsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TRow struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.colVals != null) {
-        oprot.writeFieldBegin(COL_VALS_FIELD_DESC);
-        {
-          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, struct.colVals.size()));
-          for (TColumnValue _iter49 : struct.colVals)
-          {
-            _iter49.write(oprot);
-          }
-          oprot.writeListEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TRowTupleSchemeFactory implements SchemeFactory {
-    public TRowTupleScheme getScheme() {
-      return new TRowTupleScheme();
-    }
-  }
-
-  private static class TRowTupleScheme extends TupleScheme<TRow> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TRow struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.colVals.size());
-        for (TColumnValue _iter50 : struct.colVals)
-        {
-          _iter50.write(oprot);
-        }
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TRow struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TList _list51 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32());
-        struct.colVals = new ArrayList<TColumnValue>(_list51.size);
-        for (int _i52 = 0; _i52 < _list51.size; ++_i52)
-        {
-          TColumnValue _elem53; // optional
-          _elem53 = new TColumnValue();
-          _elem53.read(iprot);
-          struct.colVals.add(_elem53);
-        }
-      }
-      struct.setColValsIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRowSet.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRowSet.java
deleted file mode 100644
index d16c8a4bb32da..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TRowSet.java
+++ /dev/null
@@ -1,702 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TRowSet implements org.apache.thrift.TBase<TRowSet, TRowSet._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TRowSet");
-
-  private static final org.apache.thrift.protocol.TField START_ROW_OFFSET_FIELD_DESC = new org.apache.thrift.protocol.TField("startRowOffset", org.apache.thrift.protocol.TType.I64, (short)1);
-  private static final org.apache.thrift.protocol.TField ROWS_FIELD_DESC = new org.apache.thrift.protocol.TField("rows", org.apache.thrift.protocol.TType.LIST, (short)2);
-  private static final org.apache.thrift.protocol.TField COLUMNS_FIELD_DESC = new org.apache.thrift.protocol.TField("columns", org.apache.thrift.protocol.TType.LIST, (short)3);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TRowSetStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TRowSetTupleSchemeFactory());
-  }
-
-  private long startRowOffset; // required
-  private List<TRow> rows; // required
-  private List<TColumn> columns; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    START_ROW_OFFSET((short)1, "startRowOffset"),
-    ROWS((short)2, "rows"),
-    COLUMNS((short)3, "columns");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // START_ROW_OFFSET
-          return START_ROW_OFFSET;
-        case 2: // ROWS
-          return ROWS;
-        case 3: // COLUMNS
-          return COLUMNS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __STARTROWOFFSET_ISSET_ID = 0;
-  private byte __isset_bitfield = 0;
-  private _Fields optionals[] = {_Fields.COLUMNS};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.START_ROW_OFFSET, new org.apache.thrift.meta_data.FieldMetaData("startRowOffset", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
-    tmpMap.put(_Fields.ROWS, new org.apache.thrift.meta_data.FieldMetaData("rows", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TRow.class))));
-    tmpMap.put(_Fields.COLUMNS, new org.apache.thrift.meta_data.FieldMetaData("columns", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TColumn.class))));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TRowSet.class, metaDataMap);
-  }
-
-  public TRowSet() {
-  }
-
-  public TRowSet(
-    long startRowOffset,
-    List<TRow> rows)
-  {
-    this();
-    this.startRowOffset = startRowOffset;
-    setStartRowOffsetIsSet(true);
-    this.rows = rows;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TRowSet(TRowSet other) {
-    __isset_bitfield = other.__isset_bitfield;
-    this.startRowOffset = other.startRowOffset;
-    if (other.isSetRows()) {
-      List<TRow> __this__rows = new ArrayList<TRow>();
-      for (TRow other_element : other.rows) {
-        __this__rows.add(new TRow(other_element));
-      }
-      this.rows = __this__rows;
-    }
-    if (other.isSetColumns()) {
-      List<TColumn> __this__columns = new ArrayList<TColumn>();
-      for (TColumn other_element : other.columns) {
-        __this__columns.add(new TColumn(other_element));
-      }
-      this.columns = __this__columns;
-    }
-  }
-
-  public TRowSet deepCopy() {
-    return new TRowSet(this);
-  }
-
-  @Override
-  public void clear() {
-    setStartRowOffsetIsSet(false);
-    this.startRowOffset = 0;
-    this.rows = null;
-    this.columns = null;
-  }
-
-  public long getStartRowOffset() {
-    return this.startRowOffset;
-  }
-
-  public void setStartRowOffset(long startRowOffset) {
-    this.startRowOffset = startRowOffset;
-    setStartRowOffsetIsSet(true);
-  }
-
-  public void unsetStartRowOffset() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __STARTROWOFFSET_ISSET_ID);
-  }
-
-  /** Returns true if field startRowOffset is set (has been assigned a value) and false otherwise */
-  public boolean isSetStartRowOffset() {
-    return EncodingUtils.testBit(__isset_bitfield, __STARTROWOFFSET_ISSET_ID);
-  }
-
-  public void setStartRowOffsetIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __STARTROWOFFSET_ISSET_ID, value);
-  }
-
-  public int getRowsSize() {
-    return (this.rows == null) ? 0 : this.rows.size();
-  }
-
-  public java.util.Iterator<TRow> getRowsIterator() {
-    return (this.rows == null) ? null : this.rows.iterator();
-  }
-
-  public void addToRows(TRow elem) {
-    if (this.rows == null) {
-      this.rows = new ArrayList<TRow>();
-    }
-    this.rows.add(elem);
-  }
-
-  public List<TRow> getRows() {
-    return this.rows;
-  }
-
-  public void setRows(List<TRow> rows) {
-    this.rows = rows;
-  }
-
-  public void unsetRows() {
-    this.rows = null;
-  }
-
-  /** Returns true if field rows is set (has been assigned a value) and false otherwise */
-  public boolean isSetRows() {
-    return this.rows != null;
-  }
-
-  public void setRowsIsSet(boolean value) {
-    if (!value) {
-      this.rows = null;
-    }
-  }
-
-  public int getColumnsSize() {
-    return (this.columns == null) ? 0 : this.columns.size();
-  }
-
-  public java.util.Iterator<TColumn> getColumnsIterator() {
-    return (this.columns == null) ? null : this.columns.iterator();
-  }
-
-  public void addToColumns(TColumn elem) {
-    if (this.columns == null) {
-      this.columns = new ArrayList<TColumn>();
-    }
-    this.columns.add(elem);
-  }
-
-  public List<TColumn> getColumns() {
-    return this.columns;
-  }
-
-  public void setColumns(List<TColumn> columns) {
-    this.columns = columns;
-  }
-
-  public void unsetColumns() {
-    this.columns = null;
-  }
-
-  /** Returns true if field columns is set (has been assigned a value) and false otherwise */
-  public boolean isSetColumns() {
-    return this.columns != null;
-  }
-
-  public void setColumnsIsSet(boolean value) {
-    if (!value) {
-      this.columns = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case START_ROW_OFFSET:
-      if (value == null) {
-        unsetStartRowOffset();
-      } else {
-        setStartRowOffset((Long)value);
-      }
-      break;
-
-    case ROWS:
-      if (value == null) {
-        unsetRows();
-      } else {
-        setRows((List<TRow>)value);
-      }
-      break;
-
-    case COLUMNS:
-      if (value == null) {
-        unsetColumns();
-      } else {
-        setColumns((List<TColumn>)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case START_ROW_OFFSET:
-      return Long.valueOf(getStartRowOffset());
-
-    case ROWS:
-      return getRows();
-
-    case COLUMNS:
-      return getColumns();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case START_ROW_OFFSET:
-      return isSetStartRowOffset();
-    case ROWS:
-      return isSetRows();
-    case COLUMNS:
-      return isSetColumns();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TRowSet)
-      return this.equals((TRowSet)that);
-    return false;
-  }
-
-  public boolean equals(TRowSet that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_startRowOffset = true;
-    boolean that_present_startRowOffset = true;
-    if (this_present_startRowOffset || that_present_startRowOffset) {
-      if (!(this_present_startRowOffset && that_present_startRowOffset))
-        return false;
-      if (this.startRowOffset != that.startRowOffset)
-        return false;
-    }
-
-    boolean this_present_rows = true && this.isSetRows();
-    boolean that_present_rows = true && that.isSetRows();
-    if (this_present_rows || that_present_rows) {
-      if (!(this_present_rows && that_present_rows))
-        return false;
-      if (!this.rows.equals(that.rows))
-        return false;
-    }
-
-    boolean this_present_columns = true && this.isSetColumns();
-    boolean that_present_columns = true && that.isSetColumns();
-    if (this_present_columns || that_present_columns) {
-      if (!(this_present_columns && that_present_columns))
-        return false;
-      if (!this.columns.equals(that.columns))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_startRowOffset = true;
-    builder.append(present_startRowOffset);
-    if (present_startRowOffset)
-      builder.append(startRowOffset);
-
-    boolean present_rows = true && (isSetRows());
-    builder.append(present_rows);
-    if (present_rows)
-      builder.append(rows);
-
-    boolean present_columns = true && (isSetColumns());
-    builder.append(present_columns);
-    if (present_columns)
-      builder.append(columns);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TRowSet other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TRowSet typedOther = (TRowSet)other;
-
-    lastComparison = Boolean.valueOf(isSetStartRowOffset()).compareTo(typedOther.isSetStartRowOffset());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStartRowOffset()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.startRowOffset, typedOther.startRowOffset);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetRows()).compareTo(typedOther.isSetRows());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetRows()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.rows, typedOther.rows);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetColumns()).compareTo(typedOther.isSetColumns());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetColumns()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.columns, typedOther.columns);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TRowSet(");
-    boolean first = true;
-
-    sb.append("startRowOffset:");
-    sb.append(this.startRowOffset);
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("rows:");
-    if (this.rows == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.rows);
-    }
-    first = false;
-    if (isSetColumns()) {
-      if (!first) sb.append(", ");
-      sb.append("columns:");
-      if (this.columns == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.columns);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStartRowOffset()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'startRowOffset' is unset! Struct:" + toString());
-    }
-
-    if (!isSetRows()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'rows' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TRowSetStandardSchemeFactory implements SchemeFactory {
-    public TRowSetStandardScheme getScheme() {
-      return new TRowSetStandardScheme();
-    }
-  }
-
-  private static class TRowSetStandardScheme extends StandardScheme<TRowSet> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TRowSet struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // START_ROW_OFFSET
-            if (schemeField.type == org.apache.thrift.protocol.TType.I64) {
-              struct.startRowOffset = iprot.readI64();
-              struct.setStartRowOffsetIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // ROWS
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list118 = iprot.readListBegin();
-                struct.rows = new ArrayList<TRow>(_list118.size);
-                for (int _i119 = 0; _i119 < _list118.size; ++_i119)
-                {
-                  TRow _elem120; // optional
-                  _elem120 = new TRow();
-                  _elem120.read(iprot);
-                  struct.rows.add(_elem120);
-                }
-                iprot.readListEnd();
-              }
-              struct.setRowsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // COLUMNS
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list121 = iprot.readListBegin();
-                struct.columns = new ArrayList<TColumn>(_list121.size);
-                for (int _i122 = 0; _i122 < _list121.size; ++_i122)
-                {
-                  TColumn _elem123; // optional
-                  _elem123 = new TColumn();
-                  _elem123.read(iprot);
-                  struct.columns.add(_elem123);
-                }
-                iprot.readListEnd();
-              }
-              struct.setColumnsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TRowSet struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      oprot.writeFieldBegin(START_ROW_OFFSET_FIELD_DESC);
-      oprot.writeI64(struct.startRowOffset);
-      oprot.writeFieldEnd();
-      if (struct.rows != null) {
-        oprot.writeFieldBegin(ROWS_FIELD_DESC);
-        {
-          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, struct.rows.size()));
-          for (TRow _iter124 : struct.rows)
-          {
-            _iter124.write(oprot);
-          }
-          oprot.writeListEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      if (struct.columns != null) {
-        if (struct.isSetColumns()) {
-          oprot.writeFieldBegin(COLUMNS_FIELD_DESC);
-          {
-            oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, struct.columns.size()));
-            for (TColumn _iter125 : struct.columns)
-            {
-              _iter125.write(oprot);
-            }
-            oprot.writeListEnd();
-          }
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TRowSetTupleSchemeFactory implements SchemeFactory {
-    public TRowSetTupleScheme getScheme() {
-      return new TRowSetTupleScheme();
-    }
-  }
-
-  private static class TRowSetTupleScheme extends TupleScheme<TRowSet> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TRowSet struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      oprot.writeI64(struct.startRowOffset);
-      {
-        oprot.writeI32(struct.rows.size());
-        for (TRow _iter126 : struct.rows)
-        {
-          _iter126.write(oprot);
-        }
-      }
-      BitSet optionals = new BitSet();
-      if (struct.isSetColumns()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetColumns()) {
-        {
-          oprot.writeI32(struct.columns.size());
-          for (TColumn _iter127 : struct.columns)
-          {
-            _iter127.write(oprot);
-          }
-        }
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TRowSet struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.startRowOffset = iprot.readI64();
-      struct.setStartRowOffsetIsSet(true);
-      {
-        org.apache.thrift.protocol.TList _list128 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32());
-        struct.rows = new ArrayList<TRow>(_list128.size);
-        for (int _i129 = 0; _i129 < _list128.size; ++_i129)
-        {
-          TRow _elem130; // optional
-          _elem130 = new TRow();
-          _elem130.read(iprot);
-          struct.rows.add(_elem130);
-        }
-      }
-      struct.setRowsIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        {
-          org.apache.thrift.protocol.TList _list131 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32());
-          struct.columns = new ArrayList<TColumn>(_list131.size);
-          for (int _i132 = 0; _i132 < _list131.size; ++_i132)
-          {
-            TColumn _elem133; // optional
-            _elem133 = new TColumn();
-            _elem133.read(iprot);
-            struct.columns.add(_elem133);
-          }
-        }
-        struct.setColumnsIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TSessionHandle.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TSessionHandle.java
deleted file mode 100644
index 82c00dd68a98b..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TSessionHandle.java
+++ /dev/null
@@ -1,390 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TSessionHandle implements org.apache.thrift.TBase<TSessionHandle, TSessionHandle._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TSessionHandle");
-
-  private static final org.apache.thrift.protocol.TField SESSION_ID_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionId", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TSessionHandleStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TSessionHandleTupleSchemeFactory());
-  }
-
-  private THandleIdentifier sessionId; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_ID((short)1, "sessionId");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_ID
-          return SESSION_ID;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_ID, new org.apache.thrift.meta_data.FieldMetaData("sessionId", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, THandleIdentifier.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TSessionHandle.class, metaDataMap);
-  }
-
-  public TSessionHandle() {
-  }
-
-  public TSessionHandle(
-    THandleIdentifier sessionId)
-  {
-    this();
-    this.sessionId = sessionId;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TSessionHandle(TSessionHandle other) {
-    if (other.isSetSessionId()) {
-      this.sessionId = new THandleIdentifier(other.sessionId);
-    }
-  }
-
-  public TSessionHandle deepCopy() {
-    return new TSessionHandle(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionId = null;
-  }
-
-  public THandleIdentifier getSessionId() {
-    return this.sessionId;
-  }
-
-  public void setSessionId(THandleIdentifier sessionId) {
-    this.sessionId = sessionId;
-  }
-
-  public void unsetSessionId() {
-    this.sessionId = null;
-  }
-
-  /** Returns true if field sessionId is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionId() {
-    return this.sessionId != null;
-  }
-
-  public void setSessionIdIsSet(boolean value) {
-    if (!value) {
-      this.sessionId = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_ID:
-      if (value == null) {
-        unsetSessionId();
-      } else {
-        setSessionId((THandleIdentifier)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_ID:
-      return getSessionId();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_ID:
-      return isSetSessionId();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TSessionHandle)
-      return this.equals((TSessionHandle)that);
-    return false;
-  }
-
-  public boolean equals(TSessionHandle that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionId = true && this.isSetSessionId();
-    boolean that_present_sessionId = true && that.isSetSessionId();
-    if (this_present_sessionId || that_present_sessionId) {
-      if (!(this_present_sessionId && that_present_sessionId))
-        return false;
-      if (!this.sessionId.equals(that.sessionId))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_sessionId = true && (isSetSessionId());
-    builder.append(present_sessionId);
-    if (present_sessionId)
-      builder.append(sessionId);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TSessionHandle other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TSessionHandle typedOther = (TSessionHandle)other;
-
-    lastComparison = Boolean.valueOf(isSetSessionId()).compareTo(typedOther.isSetSessionId());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionId()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionId, typedOther.sessionId);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TSessionHandle(");
-    boolean first = true;
-
-    sb.append("sessionId:");
-    if (this.sessionId == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionId);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionId()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionId' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionId != null) {
-      sessionId.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TSessionHandleStandardSchemeFactory implements SchemeFactory {
-    public TSessionHandleStandardScheme getScheme() {
-      return new TSessionHandleStandardScheme();
-    }
-  }
-
-  private static class TSessionHandleStandardScheme extends StandardScheme<TSessionHandle> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TSessionHandle struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_ID
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionId = new THandleIdentifier();
-              struct.sessionId.read(iprot);
-              struct.setSessionIdIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TSessionHandle struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionId != null) {
-        oprot.writeFieldBegin(SESSION_ID_FIELD_DESC);
-        struct.sessionId.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TSessionHandleTupleSchemeFactory implements SchemeFactory {
-    public TSessionHandleTupleScheme getScheme() {
-      return new TSessionHandleTupleScheme();
-    }
-  }
-
-  private static class TSessionHandleTupleScheme extends TupleScheme<TSessionHandle> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TSessionHandle struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionId.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TSessionHandle struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionId = new THandleIdentifier();
-      struct.sessionId.read(iprot);
-      struct.setSessionIdIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStatus.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStatus.java
deleted file mode 100644
index 24a746e94965d..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStatus.java
+++ /dev/null
@@ -1,874 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TStatus implements org.apache.thrift.TBase<TStatus, TStatus._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TStatus");
-
-  private static final org.apache.thrift.protocol.TField STATUS_CODE_FIELD_DESC = new org.apache.thrift.protocol.TField("statusCode", org.apache.thrift.protocol.TType.I32, (short)1);
-  private static final org.apache.thrift.protocol.TField INFO_MESSAGES_FIELD_DESC = new org.apache.thrift.protocol.TField("infoMessages", org.apache.thrift.protocol.TType.LIST, (short)2);
-  private static final org.apache.thrift.protocol.TField SQL_STATE_FIELD_DESC = new org.apache.thrift.protocol.TField("sqlState", org.apache.thrift.protocol.TType.STRING, (short)3);
-  private static final org.apache.thrift.protocol.TField ERROR_CODE_FIELD_DESC = new org.apache.thrift.protocol.TField("errorCode", org.apache.thrift.protocol.TType.I32, (short)4);
-  private static final org.apache.thrift.protocol.TField ERROR_MESSAGE_FIELD_DESC = new org.apache.thrift.protocol.TField("errorMessage", org.apache.thrift.protocol.TType.STRING, (short)5);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TStatusStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TStatusTupleSchemeFactory());
-  }
-
-  private TStatusCode statusCode; // required
-  private List<String> infoMessages; // optional
-  private String sqlState; // optional
-  private int errorCode; // optional
-  private String errorMessage; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    /**
-     * 
-     * @see TStatusCode
-     */
-    STATUS_CODE((short)1, "statusCode"),
-    INFO_MESSAGES((short)2, "infoMessages"),
-    SQL_STATE((short)3, "sqlState"),
-    ERROR_CODE((short)4, "errorCode"),
-    ERROR_MESSAGE((short)5, "errorMessage");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS_CODE
-          return STATUS_CODE;
-        case 2: // INFO_MESSAGES
-          return INFO_MESSAGES;
-        case 3: // SQL_STATE
-          return SQL_STATE;
-        case 4: // ERROR_CODE
-          return ERROR_CODE;
-        case 5: // ERROR_MESSAGE
-          return ERROR_MESSAGE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __ERRORCODE_ISSET_ID = 0;
-  private byte __isset_bitfield = 0;
-  private _Fields optionals[] = {_Fields.INFO_MESSAGES,_Fields.SQL_STATE,_Fields.ERROR_CODE,_Fields.ERROR_MESSAGE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS_CODE, new org.apache.thrift.meta_data.FieldMetaData("statusCode", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TStatusCode.class)));
-    tmpMap.put(_Fields.INFO_MESSAGES, new org.apache.thrift.meta_data.FieldMetaData("infoMessages", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))));
-    tmpMap.put(_Fields.SQL_STATE, new org.apache.thrift.meta_data.FieldMetaData("sqlState", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    tmpMap.put(_Fields.ERROR_CODE, new org.apache.thrift.meta_data.FieldMetaData("errorCode", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)));
-    tmpMap.put(_Fields.ERROR_MESSAGE, new org.apache.thrift.meta_data.FieldMetaData("errorMessage", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TStatus.class, metaDataMap);
-  }
-
-  public TStatus() {
-  }
-
-  public TStatus(
-    TStatusCode statusCode)
-  {
-    this();
-    this.statusCode = statusCode;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TStatus(TStatus other) {
-    __isset_bitfield = other.__isset_bitfield;
-    if (other.isSetStatusCode()) {
-      this.statusCode = other.statusCode;
-    }
-    if (other.isSetInfoMessages()) {
-      List<String> __this__infoMessages = new ArrayList<String>();
-      for (String other_element : other.infoMessages) {
-        __this__infoMessages.add(other_element);
-      }
-      this.infoMessages = __this__infoMessages;
-    }
-    if (other.isSetSqlState()) {
-      this.sqlState = other.sqlState;
-    }
-    this.errorCode = other.errorCode;
-    if (other.isSetErrorMessage()) {
-      this.errorMessage = other.errorMessage;
-    }
-  }
-
-  public TStatus deepCopy() {
-    return new TStatus(this);
-  }
-
-  @Override
-  public void clear() {
-    this.statusCode = null;
-    this.infoMessages = null;
-    this.sqlState = null;
-    setErrorCodeIsSet(false);
-    this.errorCode = 0;
-    this.errorMessage = null;
-  }
-
-  /**
-   * 
-   * @see TStatusCode
-   */
-  public TStatusCode getStatusCode() {
-    return this.statusCode;
-  }
-
-  /**
-   * 
-   * @see TStatusCode
-   */
-  public void setStatusCode(TStatusCode statusCode) {
-    this.statusCode = statusCode;
-  }
-
-  public void unsetStatusCode() {
-    this.statusCode = null;
-  }
-
-  /** Returns true if field statusCode is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatusCode() {
-    return this.statusCode != null;
-  }
-
-  public void setStatusCodeIsSet(boolean value) {
-    if (!value) {
-      this.statusCode = null;
-    }
-  }
-
-  public int getInfoMessagesSize() {
-    return (this.infoMessages == null) ? 0 : this.infoMessages.size();
-  }
-
-  public java.util.Iterator<String> getInfoMessagesIterator() {
-    return (this.infoMessages == null) ? null : this.infoMessages.iterator();
-  }
-
-  public void addToInfoMessages(String elem) {
-    if (this.infoMessages == null) {
-      this.infoMessages = new ArrayList<String>();
-    }
-    this.infoMessages.add(elem);
-  }
-
-  public List<String> getInfoMessages() {
-    return this.infoMessages;
-  }
-
-  public void setInfoMessages(List<String> infoMessages) {
-    this.infoMessages = infoMessages;
-  }
-
-  public void unsetInfoMessages() {
-    this.infoMessages = null;
-  }
-
-  /** Returns true if field infoMessages is set (has been assigned a value) and false otherwise */
-  public boolean isSetInfoMessages() {
-    return this.infoMessages != null;
-  }
-
-  public void setInfoMessagesIsSet(boolean value) {
-    if (!value) {
-      this.infoMessages = null;
-    }
-  }
-
-  public String getSqlState() {
-    return this.sqlState;
-  }
-
-  public void setSqlState(String sqlState) {
-    this.sqlState = sqlState;
-  }
-
-  public void unsetSqlState() {
-    this.sqlState = null;
-  }
-
-  /** Returns true if field sqlState is set (has been assigned a value) and false otherwise */
-  public boolean isSetSqlState() {
-    return this.sqlState != null;
-  }
-
-  public void setSqlStateIsSet(boolean value) {
-    if (!value) {
-      this.sqlState = null;
-    }
-  }
-
-  public int getErrorCode() {
-    return this.errorCode;
-  }
-
-  public void setErrorCode(int errorCode) {
-    this.errorCode = errorCode;
-    setErrorCodeIsSet(true);
-  }
-
-  public void unsetErrorCode() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __ERRORCODE_ISSET_ID);
-  }
-
-  /** Returns true if field errorCode is set (has been assigned a value) and false otherwise */
-  public boolean isSetErrorCode() {
-    return EncodingUtils.testBit(__isset_bitfield, __ERRORCODE_ISSET_ID);
-  }
-
-  public void setErrorCodeIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __ERRORCODE_ISSET_ID, value);
-  }
-
-  public String getErrorMessage() {
-    return this.errorMessage;
-  }
-
-  public void setErrorMessage(String errorMessage) {
-    this.errorMessage = errorMessage;
-  }
-
-  public void unsetErrorMessage() {
-    this.errorMessage = null;
-  }
-
-  /** Returns true if field errorMessage is set (has been assigned a value) and false otherwise */
-  public boolean isSetErrorMessage() {
-    return this.errorMessage != null;
-  }
-
-  public void setErrorMessageIsSet(boolean value) {
-    if (!value) {
-      this.errorMessage = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS_CODE:
-      if (value == null) {
-        unsetStatusCode();
-      } else {
-        setStatusCode((TStatusCode)value);
-      }
-      break;
-
-    case INFO_MESSAGES:
-      if (value == null) {
-        unsetInfoMessages();
-      } else {
-        setInfoMessages((List<String>)value);
-      }
-      break;
-
-    case SQL_STATE:
-      if (value == null) {
-        unsetSqlState();
-      } else {
-        setSqlState((String)value);
-      }
-      break;
-
-    case ERROR_CODE:
-      if (value == null) {
-        unsetErrorCode();
-      } else {
-        setErrorCode((Integer)value);
-      }
-      break;
-
-    case ERROR_MESSAGE:
-      if (value == null) {
-        unsetErrorMessage();
-      } else {
-        setErrorMessage((String)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS_CODE:
-      return getStatusCode();
-
-    case INFO_MESSAGES:
-      return getInfoMessages();
-
-    case SQL_STATE:
-      return getSqlState();
-
-    case ERROR_CODE:
-      return Integer.valueOf(getErrorCode());
-
-    case ERROR_MESSAGE:
-      return getErrorMessage();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS_CODE:
-      return isSetStatusCode();
-    case INFO_MESSAGES:
-      return isSetInfoMessages();
-    case SQL_STATE:
-      return isSetSqlState();
-    case ERROR_CODE:
-      return isSetErrorCode();
-    case ERROR_MESSAGE:
-      return isSetErrorMessage();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TStatus)
-      return this.equals((TStatus)that);
-    return false;
-  }
-
-  public boolean equals(TStatus that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_statusCode = true && this.isSetStatusCode();
-    boolean that_present_statusCode = true && that.isSetStatusCode();
-    if (this_present_statusCode || that_present_statusCode) {
-      if (!(this_present_statusCode && that_present_statusCode))
-        return false;
-      if (!this.statusCode.equals(that.statusCode))
-        return false;
-    }
-
-    boolean this_present_infoMessages = true && this.isSetInfoMessages();
-    boolean that_present_infoMessages = true && that.isSetInfoMessages();
-    if (this_present_infoMessages || that_present_infoMessages) {
-      if (!(this_present_infoMessages && that_present_infoMessages))
-        return false;
-      if (!this.infoMessages.equals(that.infoMessages))
-        return false;
-    }
-
-    boolean this_present_sqlState = true && this.isSetSqlState();
-    boolean that_present_sqlState = true && that.isSetSqlState();
-    if (this_present_sqlState || that_present_sqlState) {
-      if (!(this_present_sqlState && that_present_sqlState))
-        return false;
-      if (!this.sqlState.equals(that.sqlState))
-        return false;
-    }
-
-    boolean this_present_errorCode = true && this.isSetErrorCode();
-    boolean that_present_errorCode = true && that.isSetErrorCode();
-    if (this_present_errorCode || that_present_errorCode) {
-      if (!(this_present_errorCode && that_present_errorCode))
-        return false;
-      if (this.errorCode != that.errorCode)
-        return false;
-    }
-
-    boolean this_present_errorMessage = true && this.isSetErrorMessage();
-    boolean that_present_errorMessage = true && that.isSetErrorMessage();
-    if (this_present_errorMessage || that_present_errorMessage) {
-      if (!(this_present_errorMessage && that_present_errorMessage))
-        return false;
-      if (!this.errorMessage.equals(that.errorMessage))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_statusCode = true && (isSetStatusCode());
-    builder.append(present_statusCode);
-    if (present_statusCode)
-      builder.append(statusCode.getValue());
-
-    boolean present_infoMessages = true && (isSetInfoMessages());
-    builder.append(present_infoMessages);
-    if (present_infoMessages)
-      builder.append(infoMessages);
-
-    boolean present_sqlState = true && (isSetSqlState());
-    builder.append(present_sqlState);
-    if (present_sqlState)
-      builder.append(sqlState);
-
-    boolean present_errorCode = true && (isSetErrorCode());
-    builder.append(present_errorCode);
-    if (present_errorCode)
-      builder.append(errorCode);
-
-    boolean present_errorMessage = true && (isSetErrorMessage());
-    builder.append(present_errorMessage);
-    if (present_errorMessage)
-      builder.append(errorMessage);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TStatus other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TStatus typedOther = (TStatus)other;
-
-    lastComparison = Boolean.valueOf(isSetStatusCode()).compareTo(typedOther.isSetStatusCode());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatusCode()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.statusCode, typedOther.statusCode);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetInfoMessages()).compareTo(typedOther.isSetInfoMessages());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetInfoMessages()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.infoMessages, typedOther.infoMessages);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetSqlState()).compareTo(typedOther.isSetSqlState());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSqlState()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sqlState, typedOther.sqlState);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetErrorCode()).compareTo(typedOther.isSetErrorCode());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetErrorCode()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.errorCode, typedOther.errorCode);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetErrorMessage()).compareTo(typedOther.isSetErrorMessage());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetErrorMessage()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.errorMessage, typedOther.errorMessage);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TStatus(");
-    boolean first = true;
-
-    sb.append("statusCode:");
-    if (this.statusCode == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.statusCode);
-    }
-    first = false;
-    if (isSetInfoMessages()) {
-      if (!first) sb.append(", ");
-      sb.append("infoMessages:");
-      if (this.infoMessages == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.infoMessages);
-      }
-      first = false;
-    }
-    if (isSetSqlState()) {
-      if (!first) sb.append(", ");
-      sb.append("sqlState:");
-      if (this.sqlState == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.sqlState);
-      }
-      first = false;
-    }
-    if (isSetErrorCode()) {
-      if (!first) sb.append(", ");
-      sb.append("errorCode:");
-      sb.append(this.errorCode);
-      first = false;
-    }
-    if (isSetErrorMessage()) {
-      if (!first) sb.append(", ");
-      sb.append("errorMessage:");
-      if (this.errorMessage == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.errorMessage);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatusCode()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'statusCode' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TStatusStandardSchemeFactory implements SchemeFactory {
-    public TStatusStandardScheme getScheme() {
-      return new TStatusStandardScheme();
-    }
-  }
-
-  private static class TStatusStandardScheme extends StandardScheme<TStatus> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TStatus struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS_CODE
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.statusCode = TStatusCode.findByValue(iprot.readI32());
-              struct.setStatusCodeIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // INFO_MESSAGES
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list134 = iprot.readListBegin();
-                struct.infoMessages = new ArrayList<String>(_list134.size);
-                for (int _i135 = 0; _i135 < _list134.size; ++_i135)
-                {
-                  String _elem136; // optional
-                  _elem136 = iprot.readString();
-                  struct.infoMessages.add(_elem136);
-                }
-                iprot.readListEnd();
-              }
-              struct.setInfoMessagesIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // SQL_STATE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.sqlState = iprot.readString();
-              struct.setSqlStateIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 4: // ERROR_CODE
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.errorCode = iprot.readI32();
-              struct.setErrorCodeIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 5: // ERROR_MESSAGE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.errorMessage = iprot.readString();
-              struct.setErrorMessageIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TStatus struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.statusCode != null) {
-        oprot.writeFieldBegin(STATUS_CODE_FIELD_DESC);
-        oprot.writeI32(struct.statusCode.getValue());
-        oprot.writeFieldEnd();
-      }
-      if (struct.infoMessages != null) {
-        if (struct.isSetInfoMessages()) {
-          oprot.writeFieldBegin(INFO_MESSAGES_FIELD_DESC);
-          {
-            oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, struct.infoMessages.size()));
-            for (String _iter137 : struct.infoMessages)
-            {
-              oprot.writeString(_iter137);
-            }
-            oprot.writeListEnd();
-          }
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.sqlState != null) {
-        if (struct.isSetSqlState()) {
-          oprot.writeFieldBegin(SQL_STATE_FIELD_DESC);
-          oprot.writeString(struct.sqlState);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.isSetErrorCode()) {
-        oprot.writeFieldBegin(ERROR_CODE_FIELD_DESC);
-        oprot.writeI32(struct.errorCode);
-        oprot.writeFieldEnd();
-      }
-      if (struct.errorMessage != null) {
-        if (struct.isSetErrorMessage()) {
-          oprot.writeFieldBegin(ERROR_MESSAGE_FIELD_DESC);
-          oprot.writeString(struct.errorMessage);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TStatusTupleSchemeFactory implements SchemeFactory {
-    public TStatusTupleScheme getScheme() {
-      return new TStatusTupleScheme();
-    }
-  }
-
-  private static class TStatusTupleScheme extends TupleScheme<TStatus> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TStatus struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      oprot.writeI32(struct.statusCode.getValue());
-      BitSet optionals = new BitSet();
-      if (struct.isSetInfoMessages()) {
-        optionals.set(0);
-      }
-      if (struct.isSetSqlState()) {
-        optionals.set(1);
-      }
-      if (struct.isSetErrorCode()) {
-        optionals.set(2);
-      }
-      if (struct.isSetErrorMessage()) {
-        optionals.set(3);
-      }
-      oprot.writeBitSet(optionals, 4);
-      if (struct.isSetInfoMessages()) {
-        {
-          oprot.writeI32(struct.infoMessages.size());
-          for (String _iter138 : struct.infoMessages)
-          {
-            oprot.writeString(_iter138);
-          }
-        }
-      }
-      if (struct.isSetSqlState()) {
-        oprot.writeString(struct.sqlState);
-      }
-      if (struct.isSetErrorCode()) {
-        oprot.writeI32(struct.errorCode);
-      }
-      if (struct.isSetErrorMessage()) {
-        oprot.writeString(struct.errorMessage);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TStatus struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.statusCode = TStatusCode.findByValue(iprot.readI32());
-      struct.setStatusCodeIsSet(true);
-      BitSet incoming = iprot.readBitSet(4);
-      if (incoming.get(0)) {
-        {
-          org.apache.thrift.protocol.TList _list139 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32());
-          struct.infoMessages = new ArrayList<String>(_list139.size);
-          for (int _i140 = 0; _i140 < _list139.size; ++_i140)
-          {
-            String _elem141; // optional
-            _elem141 = iprot.readString();
-            struct.infoMessages.add(_elem141);
-          }
-        }
-        struct.setInfoMessagesIsSet(true);
-      }
-      if (incoming.get(1)) {
-        struct.sqlState = iprot.readString();
-        struct.setSqlStateIsSet(true);
-      }
-      if (incoming.get(2)) {
-        struct.errorCode = iprot.readI32();
-        struct.setErrorCodeIsSet(true);
-      }
-      if (incoming.get(3)) {
-        struct.errorMessage = iprot.readString();
-        struct.setErrorMessageIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStatusCode.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStatusCode.java
deleted file mode 100644
index e7fde45fd131a..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStatusCode.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-
-import java.util.Map;
-import java.util.HashMap;
-import org.apache.thrift.TEnum;
-
-public enum TStatusCode implements org.apache.thrift.TEnum {
-  SUCCESS_STATUS(0),
-  SUCCESS_WITH_INFO_STATUS(1),
-  STILL_EXECUTING_STATUS(2),
-  ERROR_STATUS(3),
-  INVALID_HANDLE_STATUS(4);
-
-  private final int value;
-
-  private TStatusCode(int value) {
-    this.value = value;
-  }
-
-  /**
-   * Get the integer value of this enum value, as defined in the Thrift IDL.
-   */
-  public int getValue() {
-    return value;
-  }
-
-  /**
-   * Find a the enum type by its integer value, as defined in the Thrift IDL.
-   * @return null if the value is not found.
-   */
-  public static TStatusCode findByValue(int value) { 
-    switch (value) {
-      case 0:
-        return SUCCESS_STATUS;
-      case 1:
-        return SUCCESS_WITH_INFO_STATUS;
-      case 2:
-        return STILL_EXECUTING_STATUS;
-      case 3:
-        return ERROR_STATUS;
-      case 4:
-        return INVALID_HANDLE_STATUS;
-      default:
-        return null;
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStringColumn.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStringColumn.java
deleted file mode 100644
index 3dae460c8621d..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStringColumn.java
+++ /dev/null
@@ -1,548 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TStringColumn implements org.apache.thrift.TBase<TStringColumn, TStringColumn._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TStringColumn");
-
-  private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1);
-  private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TStringColumnStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TStringColumnTupleSchemeFactory());
-  }
-
-  private List<String> values; // required
-  private ByteBuffer nulls; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUES((short)1, "values"),
-    NULLS((short)2, "nulls");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUES
-          return VALUES;
-        case 2: // NULLS
-          return NULLS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))));
-    tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TStringColumn.class, metaDataMap);
-  }
-
-  public TStringColumn() {
-  }
-
-  public TStringColumn(
-    List<String> values,
-    ByteBuffer nulls)
-  {
-    this();
-    this.values = values;
-    this.nulls = nulls;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TStringColumn(TStringColumn other) {
-    if (other.isSetValues()) {
-      List<String> __this__values = new ArrayList<String>();
-      for (String other_element : other.values) {
-        __this__values.add(other_element);
-      }
-      this.values = __this__values;
-    }
-    if (other.isSetNulls()) {
-      this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls);
-;
-    }
-  }
-
-  public TStringColumn deepCopy() {
-    return new TStringColumn(this);
-  }
-
-  @Override
-  public void clear() {
-    this.values = null;
-    this.nulls = null;
-  }
-
-  public int getValuesSize() {
-    return (this.values == null) ? 0 : this.values.size();
-  }
-
-  public java.util.Iterator<String> getValuesIterator() {
-    return (this.values == null) ? null : this.values.iterator();
-  }
-
-  public void addToValues(String elem) {
-    if (this.values == null) {
-      this.values = new ArrayList<String>();
-    }
-    this.values.add(elem);
-  }
-
-  public List<String> getValues() {
-    return this.values;
-  }
-
-  public void setValues(List<String> values) {
-    this.values = values;
-  }
-
-  public void unsetValues() {
-    this.values = null;
-  }
-
-  /** Returns true if field values is set (has been assigned a value) and false otherwise */
-  public boolean isSetValues() {
-    return this.values != null;
-  }
-
-  public void setValuesIsSet(boolean value) {
-    if (!value) {
-      this.values = null;
-    }
-  }
-
-  public byte[] getNulls() {
-    setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls));
-    return nulls == null ? null : nulls.array();
-  }
-
-  public ByteBuffer bufferForNulls() {
-    return nulls;
-  }
-
-  public void setNulls(byte[] nulls) {
-    setNulls(nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(nulls));
-  }
-
-  public void setNulls(ByteBuffer nulls) {
-    this.nulls = nulls;
-  }
-
-  public void unsetNulls() {
-    this.nulls = null;
-  }
-
-  /** Returns true if field nulls is set (has been assigned a value) and false otherwise */
-  public boolean isSetNulls() {
-    return this.nulls != null;
-  }
-
-  public void setNullsIsSet(boolean value) {
-    if (!value) {
-      this.nulls = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUES:
-      if (value == null) {
-        unsetValues();
-      } else {
-        setValues((List<String>)value);
-      }
-      break;
-
-    case NULLS:
-      if (value == null) {
-        unsetNulls();
-      } else {
-        setNulls((ByteBuffer)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUES:
-      return getValues();
-
-    case NULLS:
-      return getNulls();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUES:
-      return isSetValues();
-    case NULLS:
-      return isSetNulls();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TStringColumn)
-      return this.equals((TStringColumn)that);
-    return false;
-  }
-
-  public boolean equals(TStringColumn that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_values = true && this.isSetValues();
-    boolean that_present_values = true && that.isSetValues();
-    if (this_present_values || that_present_values) {
-      if (!(this_present_values && that_present_values))
-        return false;
-      if (!this.values.equals(that.values))
-        return false;
-    }
-
-    boolean this_present_nulls = true && this.isSetNulls();
-    boolean that_present_nulls = true && that.isSetNulls();
-    if (this_present_nulls || that_present_nulls) {
-      if (!(this_present_nulls && that_present_nulls))
-        return false;
-      if (!this.nulls.equals(that.nulls))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_values = true && (isSetValues());
-    builder.append(present_values);
-    if (present_values)
-      builder.append(values);
-
-    boolean present_nulls = true && (isSetNulls());
-    builder.append(present_nulls);
-    if (present_nulls)
-      builder.append(nulls);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TStringColumn other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TStringColumn typedOther = (TStringColumn)other;
-
-    lastComparison = Boolean.valueOf(isSetValues()).compareTo(typedOther.isSetValues());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValues()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, typedOther.values);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetNulls()).compareTo(typedOther.isSetNulls());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetNulls()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, typedOther.nulls);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TStringColumn(");
-    boolean first = true;
-
-    sb.append("values:");
-    if (this.values == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.values);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("nulls:");
-    if (this.nulls == null) {
-      sb.append("null");
-    } else {
-      org.apache.thrift.TBaseHelper.toString(this.nulls, sb);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetValues()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString());
-    }
-
-    if (!isSetNulls()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TStringColumnStandardSchemeFactory implements SchemeFactory {
-    public TStringColumnStandardScheme getScheme() {
-      return new TStringColumnStandardScheme();
-    }
-  }
-
-  private static class TStringColumnStandardScheme extends StandardScheme<TStringColumn> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TStringColumn struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUES
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list102 = iprot.readListBegin();
-                struct.values = new ArrayList<String>(_list102.size);
-                for (int _i103 = 0; _i103 < _list102.size; ++_i103)
-                {
-                  String _elem104; // optional
-                  _elem104 = iprot.readString();
-                  struct.values.add(_elem104);
-                }
-                iprot.readListEnd();
-              }
-              struct.setValuesIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // NULLS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.nulls = iprot.readBinary();
-              struct.setNullsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TStringColumn struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.values != null) {
-        oprot.writeFieldBegin(VALUES_FIELD_DESC);
-        {
-          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, struct.values.size()));
-          for (String _iter105 : struct.values)
-          {
-            oprot.writeString(_iter105);
-          }
-          oprot.writeListEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      if (struct.nulls != null) {
-        oprot.writeFieldBegin(NULLS_FIELD_DESC);
-        oprot.writeBinary(struct.nulls);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TStringColumnTupleSchemeFactory implements SchemeFactory {
-    public TStringColumnTupleScheme getScheme() {
-      return new TStringColumnTupleScheme();
-    }
-  }
-
-  private static class TStringColumnTupleScheme extends TupleScheme<TStringColumn> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TStringColumn struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.values.size());
-        for (String _iter106 : struct.values)
-        {
-          oprot.writeString(_iter106);
-        }
-      }
-      oprot.writeBinary(struct.nulls);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TStringColumn struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TList _list107 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32());
-        struct.values = new ArrayList<String>(_list107.size);
-        for (int _i108 = 0; _i108 < _list107.size; ++_i108)
-        {
-          String _elem109; // optional
-          _elem109 = iprot.readString();
-          struct.values.add(_elem109);
-        }
-      }
-      struct.setValuesIsSet(true);
-      struct.nulls = iprot.readBinary();
-      struct.setNullsIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStringValue.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStringValue.java
deleted file mode 100644
index af7a109775a8b..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStringValue.java
+++ /dev/null
@@ -1,389 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TStringValue implements org.apache.thrift.TBase<TStringValue, TStringValue._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TStringValue");
-
-  private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.STRING, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TStringValueStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TStringValueTupleSchemeFactory());
-  }
-
-  private String value; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUE((short)1, "value");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUE
-          return VALUE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private _Fields optionals[] = {_Fields.VALUE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TStringValue.class, metaDataMap);
-  }
-
-  public TStringValue() {
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TStringValue(TStringValue other) {
-    if (other.isSetValue()) {
-      this.value = other.value;
-    }
-  }
-
-  public TStringValue deepCopy() {
-    return new TStringValue(this);
-  }
-
-  @Override
-  public void clear() {
-    this.value = null;
-  }
-
-  public String getValue() {
-    return this.value;
-  }
-
-  public void setValue(String value) {
-    this.value = value;
-  }
-
-  public void unsetValue() {
-    this.value = null;
-  }
-
-  /** Returns true if field value is set (has been assigned a value) and false otherwise */
-  public boolean isSetValue() {
-    return this.value != null;
-  }
-
-  public void setValueIsSet(boolean value) {
-    if (!value) {
-      this.value = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUE:
-      if (value == null) {
-        unsetValue();
-      } else {
-        setValue((String)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUE:
-      return getValue();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUE:
-      return isSetValue();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TStringValue)
-      return this.equals((TStringValue)that);
-    return false;
-  }
-
-  public boolean equals(TStringValue that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_value = true && this.isSetValue();
-    boolean that_present_value = true && that.isSetValue();
-    if (this_present_value || that_present_value) {
-      if (!(this_present_value && that_present_value))
-        return false;
-      if (!this.value.equals(that.value))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_value = true && (isSetValue());
-    builder.append(present_value);
-    if (present_value)
-      builder.append(value);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TStringValue other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TStringValue typedOther = (TStringValue)other;
-
-    lastComparison = Boolean.valueOf(isSetValue()).compareTo(typedOther.isSetValue());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValue()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, typedOther.value);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TStringValue(");
-    boolean first = true;
-
-    if (isSetValue()) {
-      sb.append("value:");
-      if (this.value == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.value);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TStringValueStandardSchemeFactory implements SchemeFactory {
-    public TStringValueStandardScheme getScheme() {
-      return new TStringValueStandardScheme();
-    }
-  }
-
-  private static class TStringValueStandardScheme extends StandardScheme<TStringValue> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TStringValue struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.value = iprot.readString();
-              struct.setValueIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TStringValue struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.value != null) {
-        if (struct.isSetValue()) {
-          oprot.writeFieldBegin(VALUE_FIELD_DESC);
-          oprot.writeString(struct.value);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TStringValueTupleSchemeFactory implements SchemeFactory {
-    public TStringValueTupleScheme getScheme() {
-      return new TStringValueTupleScheme();
-    }
-  }
-
-  private static class TStringValueTupleScheme extends TupleScheme<TStringValue> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TStringValue struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      BitSet optionals = new BitSet();
-      if (struct.isSetValue()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetValue()) {
-        oprot.writeString(struct.value);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TStringValue struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.value = iprot.readString();
-        struct.setValueIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStructTypeEntry.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStructTypeEntry.java
deleted file mode 100644
index 20f5fb6c29073..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TStructTypeEntry.java
+++ /dev/null
@@ -1,448 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TStructTypeEntry implements org.apache.thrift.TBase<TStructTypeEntry, TStructTypeEntry._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TStructTypeEntry");
-
-  private static final org.apache.thrift.protocol.TField NAME_TO_TYPE_PTR_FIELD_DESC = new org.apache.thrift.protocol.TField("nameToTypePtr", org.apache.thrift.protocol.TType.MAP, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TStructTypeEntryStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TStructTypeEntryTupleSchemeFactory());
-  }
-
-  private Map<String,Integer> nameToTypePtr; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    NAME_TO_TYPE_PTR((short)1, "nameToTypePtr");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // NAME_TO_TYPE_PTR
-          return NAME_TO_TYPE_PTR;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.NAME_TO_TYPE_PTR, new org.apache.thrift.meta_data.FieldMetaData("nameToTypePtr", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32            , "TTypeEntryPtr"))));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TStructTypeEntry.class, metaDataMap);
-  }
-
-  public TStructTypeEntry() {
-  }
-
-  public TStructTypeEntry(
-    Map<String,Integer> nameToTypePtr)
-  {
-    this();
-    this.nameToTypePtr = nameToTypePtr;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TStructTypeEntry(TStructTypeEntry other) {
-    if (other.isSetNameToTypePtr()) {
-      Map<String,Integer> __this__nameToTypePtr = new HashMap<String,Integer>();
-      for (Map.Entry<String, Integer> other_element : other.nameToTypePtr.entrySet()) {
-
-        String other_element_key = other_element.getKey();
-        Integer other_element_value = other_element.getValue();
-
-        String __this__nameToTypePtr_copy_key = other_element_key;
-
-        Integer __this__nameToTypePtr_copy_value = other_element_value;
-
-        __this__nameToTypePtr.put(__this__nameToTypePtr_copy_key, __this__nameToTypePtr_copy_value);
-      }
-      this.nameToTypePtr = __this__nameToTypePtr;
-    }
-  }
-
-  public TStructTypeEntry deepCopy() {
-    return new TStructTypeEntry(this);
-  }
-
-  @Override
-  public void clear() {
-    this.nameToTypePtr = null;
-  }
-
-  public int getNameToTypePtrSize() {
-    return (this.nameToTypePtr == null) ? 0 : this.nameToTypePtr.size();
-  }
-
-  public void putToNameToTypePtr(String key, int val) {
-    if (this.nameToTypePtr == null) {
-      this.nameToTypePtr = new HashMap<String,Integer>();
-    }
-    this.nameToTypePtr.put(key, val);
-  }
-
-  public Map<String,Integer> getNameToTypePtr() {
-    return this.nameToTypePtr;
-  }
-
-  public void setNameToTypePtr(Map<String,Integer> nameToTypePtr) {
-    this.nameToTypePtr = nameToTypePtr;
-  }
-
-  public void unsetNameToTypePtr() {
-    this.nameToTypePtr = null;
-  }
-
-  /** Returns true if field nameToTypePtr is set (has been assigned a value) and false otherwise */
-  public boolean isSetNameToTypePtr() {
-    return this.nameToTypePtr != null;
-  }
-
-  public void setNameToTypePtrIsSet(boolean value) {
-    if (!value) {
-      this.nameToTypePtr = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case NAME_TO_TYPE_PTR:
-      if (value == null) {
-        unsetNameToTypePtr();
-      } else {
-        setNameToTypePtr((Map<String,Integer>)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case NAME_TO_TYPE_PTR:
-      return getNameToTypePtr();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case NAME_TO_TYPE_PTR:
-      return isSetNameToTypePtr();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TStructTypeEntry)
-      return this.equals((TStructTypeEntry)that);
-    return false;
-  }
-
-  public boolean equals(TStructTypeEntry that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_nameToTypePtr = true && this.isSetNameToTypePtr();
-    boolean that_present_nameToTypePtr = true && that.isSetNameToTypePtr();
-    if (this_present_nameToTypePtr || that_present_nameToTypePtr) {
-      if (!(this_present_nameToTypePtr && that_present_nameToTypePtr))
-        return false;
-      if (!this.nameToTypePtr.equals(that.nameToTypePtr))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_nameToTypePtr = true && (isSetNameToTypePtr());
-    builder.append(present_nameToTypePtr);
-    if (present_nameToTypePtr)
-      builder.append(nameToTypePtr);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TStructTypeEntry other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TStructTypeEntry typedOther = (TStructTypeEntry)other;
-
-    lastComparison = Boolean.valueOf(isSetNameToTypePtr()).compareTo(typedOther.isSetNameToTypePtr());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetNameToTypePtr()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nameToTypePtr, typedOther.nameToTypePtr);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TStructTypeEntry(");
-    boolean first = true;
-
-    sb.append("nameToTypePtr:");
-    if (this.nameToTypePtr == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.nameToTypePtr);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetNameToTypePtr()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nameToTypePtr' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TStructTypeEntryStandardSchemeFactory implements SchemeFactory {
-    public TStructTypeEntryStandardScheme getScheme() {
-      return new TStructTypeEntryStandardScheme();
-    }
-  }
-
-  private static class TStructTypeEntryStandardScheme extends StandardScheme<TStructTypeEntry> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TStructTypeEntry struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // NAME_TO_TYPE_PTR
-            if (schemeField.type == org.apache.thrift.protocol.TType.MAP) {
-              {
-                org.apache.thrift.protocol.TMap _map10 = iprot.readMapBegin();
-                struct.nameToTypePtr = new HashMap<String,Integer>(2*_map10.size);
-                for (int _i11 = 0; _i11 < _map10.size; ++_i11)
-                {
-                  String _key12; // required
-                  int _val13; // required
-                  _key12 = iprot.readString();
-                  _val13 = iprot.readI32();
-                  struct.nameToTypePtr.put(_key12, _val13);
-                }
-                iprot.readMapEnd();
-              }
-              struct.setNameToTypePtrIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TStructTypeEntry struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.nameToTypePtr != null) {
-        oprot.writeFieldBegin(NAME_TO_TYPE_PTR_FIELD_DESC);
-        {
-          oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.I32, struct.nameToTypePtr.size()));
-          for (Map.Entry<String, Integer> _iter14 : struct.nameToTypePtr.entrySet())
-          {
-            oprot.writeString(_iter14.getKey());
-            oprot.writeI32(_iter14.getValue());
-          }
-          oprot.writeMapEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TStructTypeEntryTupleSchemeFactory implements SchemeFactory {
-    public TStructTypeEntryTupleScheme getScheme() {
-      return new TStructTypeEntryTupleScheme();
-    }
-  }
-
-  private static class TStructTypeEntryTupleScheme extends TupleScheme<TStructTypeEntry> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TStructTypeEntry struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.nameToTypePtr.size());
-        for (Map.Entry<String, Integer> _iter15 : struct.nameToTypePtr.entrySet())
-        {
-          oprot.writeString(_iter15.getKey());
-          oprot.writeI32(_iter15.getValue());
-        }
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TStructTypeEntry struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TMap _map16 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.I32, iprot.readI32());
-        struct.nameToTypePtr = new HashMap<String,Integer>(2*_map16.size);
-        for (int _i17 = 0; _i17 < _map16.size; ++_i17)
-        {
-          String _key18; // required
-          int _val19; // required
-          _key18 = iprot.readString();
-          _val19 = iprot.readI32();
-          struct.nameToTypePtr.put(_key18, _val19);
-        }
-      }
-      struct.setNameToTypePtrIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTableSchema.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTableSchema.java
deleted file mode 100644
index ff5e54db7c16c..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTableSchema.java
+++ /dev/null
@@ -1,439 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TTableSchema implements org.apache.thrift.TBase<TTableSchema, TTableSchema._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TTableSchema");
-
-  private static final org.apache.thrift.protocol.TField COLUMNS_FIELD_DESC = new org.apache.thrift.protocol.TField("columns", org.apache.thrift.protocol.TType.LIST, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TTableSchemaStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TTableSchemaTupleSchemeFactory());
-  }
-
-  private List<TColumnDesc> columns; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    COLUMNS((short)1, "columns");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // COLUMNS
-          return COLUMNS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.COLUMNS, new org.apache.thrift.meta_data.FieldMetaData("columns", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TColumnDesc.class))));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TTableSchema.class, metaDataMap);
-  }
-
-  public TTableSchema() {
-  }
-
-  public TTableSchema(
-    List<TColumnDesc> columns)
-  {
-    this();
-    this.columns = columns;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TTableSchema(TTableSchema other) {
-    if (other.isSetColumns()) {
-      List<TColumnDesc> __this__columns = new ArrayList<TColumnDesc>();
-      for (TColumnDesc other_element : other.columns) {
-        __this__columns.add(new TColumnDesc(other_element));
-      }
-      this.columns = __this__columns;
-    }
-  }
-
-  public TTableSchema deepCopy() {
-    return new TTableSchema(this);
-  }
-
-  @Override
-  public void clear() {
-    this.columns = null;
-  }
-
-  public int getColumnsSize() {
-    return (this.columns == null) ? 0 : this.columns.size();
-  }
-
-  public java.util.Iterator<TColumnDesc> getColumnsIterator() {
-    return (this.columns == null) ? null : this.columns.iterator();
-  }
-
-  public void addToColumns(TColumnDesc elem) {
-    if (this.columns == null) {
-      this.columns = new ArrayList<TColumnDesc>();
-    }
-    this.columns.add(elem);
-  }
-
-  public List<TColumnDesc> getColumns() {
-    return this.columns;
-  }
-
-  public void setColumns(List<TColumnDesc> columns) {
-    this.columns = columns;
-  }
-
-  public void unsetColumns() {
-    this.columns = null;
-  }
-
-  /** Returns true if field columns is set (has been assigned a value) and false otherwise */
-  public boolean isSetColumns() {
-    return this.columns != null;
-  }
-
-  public void setColumnsIsSet(boolean value) {
-    if (!value) {
-      this.columns = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case COLUMNS:
-      if (value == null) {
-        unsetColumns();
-      } else {
-        setColumns((List<TColumnDesc>)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case COLUMNS:
-      return getColumns();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case COLUMNS:
-      return isSetColumns();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TTableSchema)
-      return this.equals((TTableSchema)that);
-    return false;
-  }
-
-  public boolean equals(TTableSchema that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_columns = true && this.isSetColumns();
-    boolean that_present_columns = true && that.isSetColumns();
-    if (this_present_columns || that_present_columns) {
-      if (!(this_present_columns && that_present_columns))
-        return false;
-      if (!this.columns.equals(that.columns))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_columns = true && (isSetColumns());
-    builder.append(present_columns);
-    if (present_columns)
-      builder.append(columns);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TTableSchema other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TTableSchema typedOther = (TTableSchema)other;
-
-    lastComparison = Boolean.valueOf(isSetColumns()).compareTo(typedOther.isSetColumns());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetColumns()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.columns, typedOther.columns);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TTableSchema(");
-    boolean first = true;
-
-    sb.append("columns:");
-    if (this.columns == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.columns);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetColumns()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'columns' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TTableSchemaStandardSchemeFactory implements SchemeFactory {
-    public TTableSchemaStandardScheme getScheme() {
-      return new TTableSchemaStandardScheme();
-    }
-  }
-
-  private static class TTableSchemaStandardScheme extends StandardScheme<TTableSchema> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TTableSchema struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // COLUMNS
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list38 = iprot.readListBegin();
-                struct.columns = new ArrayList<TColumnDesc>(_list38.size);
-                for (int _i39 = 0; _i39 < _list38.size; ++_i39)
-                {
-                  TColumnDesc _elem40; // optional
-                  _elem40 = new TColumnDesc();
-                  _elem40.read(iprot);
-                  struct.columns.add(_elem40);
-                }
-                iprot.readListEnd();
-              }
-              struct.setColumnsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TTableSchema struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.columns != null) {
-        oprot.writeFieldBegin(COLUMNS_FIELD_DESC);
-        {
-          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, struct.columns.size()));
-          for (TColumnDesc _iter41 : struct.columns)
-          {
-            _iter41.write(oprot);
-          }
-          oprot.writeListEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TTableSchemaTupleSchemeFactory implements SchemeFactory {
-    public TTableSchemaTupleScheme getScheme() {
-      return new TTableSchemaTupleScheme();
-    }
-  }
-
-  private static class TTableSchemaTupleScheme extends TupleScheme<TTableSchema> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TTableSchema struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.columns.size());
-        for (TColumnDesc _iter42 : struct.columns)
-        {
-          _iter42.write(oprot);
-        }
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TTableSchema struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TList _list43 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32());
-        struct.columns = new ArrayList<TColumnDesc>(_list43.size);
-        for (int _i44 = 0; _i44 < _list43.size; ++_i44)
-        {
-          TColumnDesc _elem45; // optional
-          _elem45 = new TColumnDesc();
-          _elem45.read(iprot);
-          struct.columns.add(_elem45);
-        }
-      }
-      struct.setColumnsIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeDesc.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeDesc.java
deleted file mode 100644
index 251f86a914719..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeDesc.java
+++ /dev/null
@@ -1,439 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TTypeDesc implements org.apache.thrift.TBase<TTypeDesc, TTypeDesc._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TTypeDesc");
-
-  private static final org.apache.thrift.protocol.TField TYPES_FIELD_DESC = new org.apache.thrift.protocol.TField("types", org.apache.thrift.protocol.TType.LIST, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TTypeDescStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TTypeDescTupleSchemeFactory());
-  }
-
-  private List<TTypeEntry> types; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    TYPES((short)1, "types");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // TYPES
-          return TYPES;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.TYPES, new org.apache.thrift.meta_data.FieldMetaData("types", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TTypeEntry.class))));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TTypeDesc.class, metaDataMap);
-  }
-
-  public TTypeDesc() {
-  }
-
-  public TTypeDesc(
-    List<TTypeEntry> types)
-  {
-    this();
-    this.types = types;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TTypeDesc(TTypeDesc other) {
-    if (other.isSetTypes()) {
-      List<TTypeEntry> __this__types = new ArrayList<TTypeEntry>();
-      for (TTypeEntry other_element : other.types) {
-        __this__types.add(new TTypeEntry(other_element));
-      }
-      this.types = __this__types;
-    }
-  }
-
-  public TTypeDesc deepCopy() {
-    return new TTypeDesc(this);
-  }
-
-  @Override
-  public void clear() {
-    this.types = null;
-  }
-
-  public int getTypesSize() {
-    return (this.types == null) ? 0 : this.types.size();
-  }
-
-  public java.util.Iterator<TTypeEntry> getTypesIterator() {
-    return (this.types == null) ? null : this.types.iterator();
-  }
-
-  public void addToTypes(TTypeEntry elem) {
-    if (this.types == null) {
-      this.types = new ArrayList<TTypeEntry>();
-    }
-    this.types.add(elem);
-  }
-
-  public List<TTypeEntry> getTypes() {
-    return this.types;
-  }
-
-  public void setTypes(List<TTypeEntry> types) {
-    this.types = types;
-  }
-
-  public void unsetTypes() {
-    this.types = null;
-  }
-
-  /** Returns true if field types is set (has been assigned a value) and false otherwise */
-  public boolean isSetTypes() {
-    return this.types != null;
-  }
-
-  public void setTypesIsSet(boolean value) {
-    if (!value) {
-      this.types = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case TYPES:
-      if (value == null) {
-        unsetTypes();
-      } else {
-        setTypes((List<TTypeEntry>)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case TYPES:
-      return getTypes();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case TYPES:
-      return isSetTypes();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TTypeDesc)
-      return this.equals((TTypeDesc)that);
-    return false;
-  }
-
-  public boolean equals(TTypeDesc that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_types = true && this.isSetTypes();
-    boolean that_present_types = true && that.isSetTypes();
-    if (this_present_types || that_present_types) {
-      if (!(this_present_types && that_present_types))
-        return false;
-      if (!this.types.equals(that.types))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_types = true && (isSetTypes());
-    builder.append(present_types);
-    if (present_types)
-      builder.append(types);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TTypeDesc other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TTypeDesc typedOther = (TTypeDesc)other;
-
-    lastComparison = Boolean.valueOf(isSetTypes()).compareTo(typedOther.isSetTypes());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetTypes()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.types, typedOther.types);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TTypeDesc(");
-    boolean first = true;
-
-    sb.append("types:");
-    if (this.types == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.types);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetTypes()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'types' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TTypeDescStandardSchemeFactory implements SchemeFactory {
-    public TTypeDescStandardScheme getScheme() {
-      return new TTypeDescStandardScheme();
-    }
-  }
-
-  private static class TTypeDescStandardScheme extends StandardScheme<TTypeDesc> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TTypeDesc struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // TYPES
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list30 = iprot.readListBegin();
-                struct.types = new ArrayList<TTypeEntry>(_list30.size);
-                for (int _i31 = 0; _i31 < _list30.size; ++_i31)
-                {
-                  TTypeEntry _elem32; // optional
-                  _elem32 = new TTypeEntry();
-                  _elem32.read(iprot);
-                  struct.types.add(_elem32);
-                }
-                iprot.readListEnd();
-              }
-              struct.setTypesIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TTypeDesc struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.types != null) {
-        oprot.writeFieldBegin(TYPES_FIELD_DESC);
-        {
-          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, struct.types.size()));
-          for (TTypeEntry _iter33 : struct.types)
-          {
-            _iter33.write(oprot);
-          }
-          oprot.writeListEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TTypeDescTupleSchemeFactory implements SchemeFactory {
-    public TTypeDescTupleScheme getScheme() {
-      return new TTypeDescTupleScheme();
-    }
-  }
-
-  private static class TTypeDescTupleScheme extends TupleScheme<TTypeDesc> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TTypeDesc struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.types.size());
-        for (TTypeEntry _iter34 : struct.types)
-        {
-          _iter34.write(oprot);
-        }
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TTypeDesc struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TList _list35 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32());
-        struct.types = new ArrayList<TTypeEntry>(_list35.size);
-        for (int _i36 = 0; _i36 < _list35.size; ++_i36)
-        {
-          TTypeEntry _elem37; // optional
-          _elem37 = new TTypeEntry();
-          _elem37.read(iprot);
-          struct.types.add(_elem37);
-        }
-      }
-      struct.setTypesIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeEntry.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeEntry.java
deleted file mode 100644
index d0d70c1279572..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeEntry.java
+++ /dev/null
@@ -1,610 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TTypeEntry extends org.apache.thrift.TUnion<TTypeEntry, TTypeEntry._Fields> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TTypeEntry");
-  private static final org.apache.thrift.protocol.TField PRIMITIVE_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("primitiveEntry", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField ARRAY_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("arrayEntry", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-  private static final org.apache.thrift.protocol.TField MAP_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("mapEntry", org.apache.thrift.protocol.TType.STRUCT, (short)3);
-  private static final org.apache.thrift.protocol.TField STRUCT_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("structEntry", org.apache.thrift.protocol.TType.STRUCT, (short)4);
-  private static final org.apache.thrift.protocol.TField UNION_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("unionEntry", org.apache.thrift.protocol.TType.STRUCT, (short)5);
-  private static final org.apache.thrift.protocol.TField USER_DEFINED_TYPE_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("userDefinedTypeEntry", org.apache.thrift.protocol.TType.STRUCT, (short)6);
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    PRIMITIVE_ENTRY((short)1, "primitiveEntry"),
-    ARRAY_ENTRY((short)2, "arrayEntry"),
-    MAP_ENTRY((short)3, "mapEntry"),
-    STRUCT_ENTRY((short)4, "structEntry"),
-    UNION_ENTRY((short)5, "unionEntry"),
-    USER_DEFINED_TYPE_ENTRY((short)6, "userDefinedTypeEntry");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // PRIMITIVE_ENTRY
-          return PRIMITIVE_ENTRY;
-        case 2: // ARRAY_ENTRY
-          return ARRAY_ENTRY;
-        case 3: // MAP_ENTRY
-          return MAP_ENTRY;
-        case 4: // STRUCT_ENTRY
-          return STRUCT_ENTRY;
-        case 5: // UNION_ENTRY
-          return UNION_ENTRY;
-        case 6: // USER_DEFINED_TYPE_ENTRY
-          return USER_DEFINED_TYPE_ENTRY;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.PRIMITIVE_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("primitiveEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TPrimitiveTypeEntry.class)));
-    tmpMap.put(_Fields.ARRAY_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("arrayEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TArrayTypeEntry.class)));
-    tmpMap.put(_Fields.MAP_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("mapEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TMapTypeEntry.class)));
-    tmpMap.put(_Fields.STRUCT_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("structEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStructTypeEntry.class)));
-    tmpMap.put(_Fields.UNION_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("unionEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TUnionTypeEntry.class)));
-    tmpMap.put(_Fields.USER_DEFINED_TYPE_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("userDefinedTypeEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TUserDefinedTypeEntry.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TTypeEntry.class, metaDataMap);
-  }
-
-  public TTypeEntry() {
-    super();
-  }
-
-  public TTypeEntry(TTypeEntry._Fields setField, Object value) {
-    super(setField, value);
-  }
-
-  public TTypeEntry(TTypeEntry other) {
-    super(other);
-  }
-  public TTypeEntry deepCopy() {
-    return new TTypeEntry(this);
-  }
-
-  public static TTypeEntry primitiveEntry(TPrimitiveTypeEntry value) {
-    TTypeEntry x = new TTypeEntry();
-    x.setPrimitiveEntry(value);
-    return x;
-  }
-
-  public static TTypeEntry arrayEntry(TArrayTypeEntry value) {
-    TTypeEntry x = new TTypeEntry();
-    x.setArrayEntry(value);
-    return x;
-  }
-
-  public static TTypeEntry mapEntry(TMapTypeEntry value) {
-    TTypeEntry x = new TTypeEntry();
-    x.setMapEntry(value);
-    return x;
-  }
-
-  public static TTypeEntry structEntry(TStructTypeEntry value) {
-    TTypeEntry x = new TTypeEntry();
-    x.setStructEntry(value);
-    return x;
-  }
-
-  public static TTypeEntry unionEntry(TUnionTypeEntry value) {
-    TTypeEntry x = new TTypeEntry();
-    x.setUnionEntry(value);
-    return x;
-  }
-
-  public static TTypeEntry userDefinedTypeEntry(TUserDefinedTypeEntry value) {
-    TTypeEntry x = new TTypeEntry();
-    x.setUserDefinedTypeEntry(value);
-    return x;
-  }
-
-
-  @Override
-  protected void checkType(_Fields setField, Object value) throws ClassCastException {
-    switch (setField) {
-      case PRIMITIVE_ENTRY:
-        if (value instanceof TPrimitiveTypeEntry) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TPrimitiveTypeEntry for field 'primitiveEntry', but got " + value.getClass().getSimpleName());
-      case ARRAY_ENTRY:
-        if (value instanceof TArrayTypeEntry) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TArrayTypeEntry for field 'arrayEntry', but got " + value.getClass().getSimpleName());
-      case MAP_ENTRY:
-        if (value instanceof TMapTypeEntry) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TMapTypeEntry for field 'mapEntry', but got " + value.getClass().getSimpleName());
-      case STRUCT_ENTRY:
-        if (value instanceof TStructTypeEntry) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TStructTypeEntry for field 'structEntry', but got " + value.getClass().getSimpleName());
-      case UNION_ENTRY:
-        if (value instanceof TUnionTypeEntry) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TUnionTypeEntry for field 'unionEntry', but got " + value.getClass().getSimpleName());
-      case USER_DEFINED_TYPE_ENTRY:
-        if (value instanceof TUserDefinedTypeEntry) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TUserDefinedTypeEntry for field 'userDefinedTypeEntry', but got " + value.getClass().getSimpleName());
-      default:
-        throw new IllegalArgumentException("Unknown field id " + setField);
-    }
-  }
-
-  @Override
-  protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TField field) throws org.apache.thrift.TException {
-    _Fields setField = _Fields.findByThriftId(field.id);
-    if (setField != null) {
-      switch (setField) {
-        case PRIMITIVE_ENTRY:
-          if (field.type == PRIMITIVE_ENTRY_FIELD_DESC.type) {
-            TPrimitiveTypeEntry primitiveEntry;
-            primitiveEntry = new TPrimitiveTypeEntry();
-            primitiveEntry.read(iprot);
-            return primitiveEntry;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case ARRAY_ENTRY:
-          if (field.type == ARRAY_ENTRY_FIELD_DESC.type) {
-            TArrayTypeEntry arrayEntry;
-            arrayEntry = new TArrayTypeEntry();
-            arrayEntry.read(iprot);
-            return arrayEntry;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case MAP_ENTRY:
-          if (field.type == MAP_ENTRY_FIELD_DESC.type) {
-            TMapTypeEntry mapEntry;
-            mapEntry = new TMapTypeEntry();
-            mapEntry.read(iprot);
-            return mapEntry;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case STRUCT_ENTRY:
-          if (field.type == STRUCT_ENTRY_FIELD_DESC.type) {
-            TStructTypeEntry structEntry;
-            structEntry = new TStructTypeEntry();
-            structEntry.read(iprot);
-            return structEntry;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case UNION_ENTRY:
-          if (field.type == UNION_ENTRY_FIELD_DESC.type) {
-            TUnionTypeEntry unionEntry;
-            unionEntry = new TUnionTypeEntry();
-            unionEntry.read(iprot);
-            return unionEntry;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case USER_DEFINED_TYPE_ENTRY:
-          if (field.type == USER_DEFINED_TYPE_ENTRY_FIELD_DESC.type) {
-            TUserDefinedTypeEntry userDefinedTypeEntry;
-            userDefinedTypeEntry = new TUserDefinedTypeEntry();
-            userDefinedTypeEntry.read(iprot);
-            return userDefinedTypeEntry;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        default:
-          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
-      }
-    } else {
-      return null;
-    }
-  }
-
-  @Override
-  protected void standardSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    switch (setField_) {
-      case PRIMITIVE_ENTRY:
-        TPrimitiveTypeEntry primitiveEntry = (TPrimitiveTypeEntry)value_;
-        primitiveEntry.write(oprot);
-        return;
-      case ARRAY_ENTRY:
-        TArrayTypeEntry arrayEntry = (TArrayTypeEntry)value_;
-        arrayEntry.write(oprot);
-        return;
-      case MAP_ENTRY:
-        TMapTypeEntry mapEntry = (TMapTypeEntry)value_;
-        mapEntry.write(oprot);
-        return;
-      case STRUCT_ENTRY:
-        TStructTypeEntry structEntry = (TStructTypeEntry)value_;
-        structEntry.write(oprot);
-        return;
-      case UNION_ENTRY:
-        TUnionTypeEntry unionEntry = (TUnionTypeEntry)value_;
-        unionEntry.write(oprot);
-        return;
-      case USER_DEFINED_TYPE_ENTRY:
-        TUserDefinedTypeEntry userDefinedTypeEntry = (TUserDefinedTypeEntry)value_;
-        userDefinedTypeEntry.write(oprot);
-        return;
-      default:
-        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
-    }
-  }
-
-  @Override
-  protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, short fieldID) throws org.apache.thrift.TException {
-    _Fields setField = _Fields.findByThriftId(fieldID);
-    if (setField != null) {
-      switch (setField) {
-        case PRIMITIVE_ENTRY:
-          TPrimitiveTypeEntry primitiveEntry;
-          primitiveEntry = new TPrimitiveTypeEntry();
-          primitiveEntry.read(iprot);
-          return primitiveEntry;
-        case ARRAY_ENTRY:
-          TArrayTypeEntry arrayEntry;
-          arrayEntry = new TArrayTypeEntry();
-          arrayEntry.read(iprot);
-          return arrayEntry;
-        case MAP_ENTRY:
-          TMapTypeEntry mapEntry;
-          mapEntry = new TMapTypeEntry();
-          mapEntry.read(iprot);
-          return mapEntry;
-        case STRUCT_ENTRY:
-          TStructTypeEntry structEntry;
-          structEntry = new TStructTypeEntry();
-          structEntry.read(iprot);
-          return structEntry;
-        case UNION_ENTRY:
-          TUnionTypeEntry unionEntry;
-          unionEntry = new TUnionTypeEntry();
-          unionEntry.read(iprot);
-          return unionEntry;
-        case USER_DEFINED_TYPE_ENTRY:
-          TUserDefinedTypeEntry userDefinedTypeEntry;
-          userDefinedTypeEntry = new TUserDefinedTypeEntry();
-          userDefinedTypeEntry.read(iprot);
-          return userDefinedTypeEntry;
-        default:
-          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
-      }
-    } else {
-      throw new TProtocolException("Couldn't find a field with field id " + fieldID);
-    }
-  }
-
-  @Override
-  protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    switch (setField_) {
-      case PRIMITIVE_ENTRY:
-        TPrimitiveTypeEntry primitiveEntry = (TPrimitiveTypeEntry)value_;
-        primitiveEntry.write(oprot);
-        return;
-      case ARRAY_ENTRY:
-        TArrayTypeEntry arrayEntry = (TArrayTypeEntry)value_;
-        arrayEntry.write(oprot);
-        return;
-      case MAP_ENTRY:
-        TMapTypeEntry mapEntry = (TMapTypeEntry)value_;
-        mapEntry.write(oprot);
-        return;
-      case STRUCT_ENTRY:
-        TStructTypeEntry structEntry = (TStructTypeEntry)value_;
-        structEntry.write(oprot);
-        return;
-      case UNION_ENTRY:
-        TUnionTypeEntry unionEntry = (TUnionTypeEntry)value_;
-        unionEntry.write(oprot);
-        return;
-      case USER_DEFINED_TYPE_ENTRY:
-        TUserDefinedTypeEntry userDefinedTypeEntry = (TUserDefinedTypeEntry)value_;
-        userDefinedTypeEntry.write(oprot);
-        return;
-      default:
-        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
-    }
-  }
-
-  @Override
-  protected org.apache.thrift.protocol.TField getFieldDesc(_Fields setField) {
-    switch (setField) {
-      case PRIMITIVE_ENTRY:
-        return PRIMITIVE_ENTRY_FIELD_DESC;
-      case ARRAY_ENTRY:
-        return ARRAY_ENTRY_FIELD_DESC;
-      case MAP_ENTRY:
-        return MAP_ENTRY_FIELD_DESC;
-      case STRUCT_ENTRY:
-        return STRUCT_ENTRY_FIELD_DESC;
-      case UNION_ENTRY:
-        return UNION_ENTRY_FIELD_DESC;
-      case USER_DEFINED_TYPE_ENTRY:
-        return USER_DEFINED_TYPE_ENTRY_FIELD_DESC;
-      default:
-        throw new IllegalArgumentException("Unknown field id " + setField);
-    }
-  }
-
-  @Override
-  protected org.apache.thrift.protocol.TStruct getStructDesc() {
-    return STRUCT_DESC;
-  }
-
-  @Override
-  protected _Fields enumForId(short id) {
-    return _Fields.findByThriftIdOrThrow(id);
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-
-  public TPrimitiveTypeEntry getPrimitiveEntry() {
-    if (getSetField() == _Fields.PRIMITIVE_ENTRY) {
-      return (TPrimitiveTypeEntry)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'primitiveEntry' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setPrimitiveEntry(TPrimitiveTypeEntry value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.PRIMITIVE_ENTRY;
-    value_ = value;
-  }
-
-  public TArrayTypeEntry getArrayEntry() {
-    if (getSetField() == _Fields.ARRAY_ENTRY) {
-      return (TArrayTypeEntry)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'arrayEntry' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setArrayEntry(TArrayTypeEntry value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.ARRAY_ENTRY;
-    value_ = value;
-  }
-
-  public TMapTypeEntry getMapEntry() {
-    if (getSetField() == _Fields.MAP_ENTRY) {
-      return (TMapTypeEntry)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'mapEntry' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setMapEntry(TMapTypeEntry value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.MAP_ENTRY;
-    value_ = value;
-  }
-
-  public TStructTypeEntry getStructEntry() {
-    if (getSetField() == _Fields.STRUCT_ENTRY) {
-      return (TStructTypeEntry)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'structEntry' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setStructEntry(TStructTypeEntry value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.STRUCT_ENTRY;
-    value_ = value;
-  }
-
-  public TUnionTypeEntry getUnionEntry() {
-    if (getSetField() == _Fields.UNION_ENTRY) {
-      return (TUnionTypeEntry)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'unionEntry' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setUnionEntry(TUnionTypeEntry value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.UNION_ENTRY;
-    value_ = value;
-  }
-
-  public TUserDefinedTypeEntry getUserDefinedTypeEntry() {
-    if (getSetField() == _Fields.USER_DEFINED_TYPE_ENTRY) {
-      return (TUserDefinedTypeEntry)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'userDefinedTypeEntry' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setUserDefinedTypeEntry(TUserDefinedTypeEntry value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.USER_DEFINED_TYPE_ENTRY;
-    value_ = value;
-  }
-
-  public boolean isSetPrimitiveEntry() {
-    return setField_ == _Fields.PRIMITIVE_ENTRY;
-  }
-
-
-  public boolean isSetArrayEntry() {
-    return setField_ == _Fields.ARRAY_ENTRY;
-  }
-
-
-  public boolean isSetMapEntry() {
-    return setField_ == _Fields.MAP_ENTRY;
-  }
-
-
-  public boolean isSetStructEntry() {
-    return setField_ == _Fields.STRUCT_ENTRY;
-  }
-
-
-  public boolean isSetUnionEntry() {
-    return setField_ == _Fields.UNION_ENTRY;
-  }
-
-
-  public boolean isSetUserDefinedTypeEntry() {
-    return setField_ == _Fields.USER_DEFINED_TYPE_ENTRY;
-  }
-
-
-  public boolean equals(Object other) {
-    if (other instanceof TTypeEntry) {
-      return equals((TTypeEntry)other);
-    } else {
-      return false;
-    }
-  }
-
-  public boolean equals(TTypeEntry other) {
-    return other != null && getSetField() == other.getSetField() && getFieldValue().equals(other.getFieldValue());
-  }
-
-  @Override
-  public int compareTo(TTypeEntry other) {
-    int lastComparison = org.apache.thrift.TBaseHelper.compareTo(getSetField(), other.getSetField());
-    if (lastComparison == 0) {
-      return org.apache.thrift.TBaseHelper.compareTo(getFieldValue(), other.getFieldValue());
-    }
-    return lastComparison;
-  }
-
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder hcb = new HashCodeBuilder();
-    hcb.append(this.getClass().getName());
-    org.apache.thrift.TFieldIdEnum setField = getSetField();
-    if (setField != null) {
-      hcb.append(setField.getThriftFieldId());
-      Object value = getFieldValue();
-      if (value instanceof org.apache.thrift.TEnum) {
-        hcb.append(((org.apache.thrift.TEnum)getFieldValue()).getValue());
-      } else {
-        hcb.append(value);
-      }
-    }
-    return hcb.toHashCode();
-  }
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-
-}
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeId.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeId.java
deleted file mode 100644
index 40f05894623c0..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeId.java
+++ /dev/null
@@ -1,105 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-
-import java.util.Map;
-import java.util.HashMap;
-import org.apache.thrift.TEnum;
-
-public enum TTypeId implements org.apache.thrift.TEnum {
-  BOOLEAN_TYPE(0),
-  TINYINT_TYPE(1),
-  SMALLINT_TYPE(2),
-  INT_TYPE(3),
-  BIGINT_TYPE(4),
-  FLOAT_TYPE(5),
-  DOUBLE_TYPE(6),
-  STRING_TYPE(7),
-  TIMESTAMP_TYPE(8),
-  BINARY_TYPE(9),
-  ARRAY_TYPE(10),
-  MAP_TYPE(11),
-  STRUCT_TYPE(12),
-  UNION_TYPE(13),
-  USER_DEFINED_TYPE(14),
-  DECIMAL_TYPE(15),
-  NULL_TYPE(16),
-  DATE_TYPE(17),
-  VARCHAR_TYPE(18),
-  CHAR_TYPE(19),
-  INTERVAL_YEAR_MONTH_TYPE(20),
-  INTERVAL_DAY_TIME_TYPE(21);
-
-  private final int value;
-
-  private TTypeId(int value) {
-    this.value = value;
-  }
-
-  /**
-   * Get the integer value of this enum value, as defined in the Thrift IDL.
-   */
-  public int getValue() {
-    return value;
-  }
-
-  /**
-   * Find a the enum type by its integer value, as defined in the Thrift IDL.
-   * @return null if the value is not found.
-   */
-  public static TTypeId findByValue(int value) { 
-    switch (value) {
-      case 0:
-        return BOOLEAN_TYPE;
-      case 1:
-        return TINYINT_TYPE;
-      case 2:
-        return SMALLINT_TYPE;
-      case 3:
-        return INT_TYPE;
-      case 4:
-        return BIGINT_TYPE;
-      case 5:
-        return FLOAT_TYPE;
-      case 6:
-        return DOUBLE_TYPE;
-      case 7:
-        return STRING_TYPE;
-      case 8:
-        return TIMESTAMP_TYPE;
-      case 9:
-        return BINARY_TYPE;
-      case 10:
-        return ARRAY_TYPE;
-      case 11:
-        return MAP_TYPE;
-      case 12:
-        return STRUCT_TYPE;
-      case 13:
-        return UNION_TYPE;
-      case 14:
-        return USER_DEFINED_TYPE;
-      case 15:
-        return DECIMAL_TYPE;
-      case 16:
-        return NULL_TYPE;
-      case 17:
-        return DATE_TYPE;
-      case 18:
-        return VARCHAR_TYPE;
-      case 19:
-        return CHAR_TYPE;
-      case 20:
-        return INTERVAL_YEAR_MONTH_TYPE;
-      case 21:
-        return INTERVAL_DAY_TIME_TYPE;
-      default:
-        return null;
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifierValue.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifierValue.java
deleted file mode 100644
index a3e3829372276..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifierValue.java
+++ /dev/null
@@ -1,361 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TTypeQualifierValue extends org.apache.thrift.TUnion<TTypeQualifierValue, TTypeQualifierValue._Fields> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TTypeQualifierValue");
-  private static final org.apache.thrift.protocol.TField I32_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("i32Value", org.apache.thrift.protocol.TType.I32, (short)1);
-  private static final org.apache.thrift.protocol.TField STRING_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("stringValue", org.apache.thrift.protocol.TType.STRING, (short)2);
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    I32_VALUE((short)1, "i32Value"),
-    STRING_VALUE((short)2, "stringValue");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // I32_VALUE
-          return I32_VALUE;
-        case 2: // STRING_VALUE
-          return STRING_VALUE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.I32_VALUE, new org.apache.thrift.meta_data.FieldMetaData("i32Value", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)));
-    tmpMap.put(_Fields.STRING_VALUE, new org.apache.thrift.meta_data.FieldMetaData("stringValue", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TTypeQualifierValue.class, metaDataMap);
-  }
-
-  public TTypeQualifierValue() {
-    super();
-  }
-
-  public TTypeQualifierValue(TTypeQualifierValue._Fields setField, Object value) {
-    super(setField, value);
-  }
-
-  public TTypeQualifierValue(TTypeQualifierValue other) {
-    super(other);
-  }
-  public TTypeQualifierValue deepCopy() {
-    return new TTypeQualifierValue(this);
-  }
-
-  public static TTypeQualifierValue i32Value(int value) {
-    TTypeQualifierValue x = new TTypeQualifierValue();
-    x.setI32Value(value);
-    return x;
-  }
-
-  public static TTypeQualifierValue stringValue(String value) {
-    TTypeQualifierValue x = new TTypeQualifierValue();
-    x.setStringValue(value);
-    return x;
-  }
-
-
-  @Override
-  protected void checkType(_Fields setField, Object value) throws ClassCastException {
-    switch (setField) {
-      case I32_VALUE:
-        if (value instanceof Integer) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type Integer for field 'i32Value', but got " + value.getClass().getSimpleName());
-      case STRING_VALUE:
-        if (value instanceof String) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type String for field 'stringValue', but got " + value.getClass().getSimpleName());
-      default:
-        throw new IllegalArgumentException("Unknown field id " + setField);
-    }
-  }
-
-  @Override
-  protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TField field) throws org.apache.thrift.TException {
-    _Fields setField = _Fields.findByThriftId(field.id);
-    if (setField != null) {
-      switch (setField) {
-        case I32_VALUE:
-          if (field.type == I32_VALUE_FIELD_DESC.type) {
-            Integer i32Value;
-            i32Value = iprot.readI32();
-            return i32Value;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case STRING_VALUE:
-          if (field.type == STRING_VALUE_FIELD_DESC.type) {
-            String stringValue;
-            stringValue = iprot.readString();
-            return stringValue;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        default:
-          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
-      }
-    } else {
-      return null;
-    }
-  }
-
-  @Override
-  protected void standardSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    switch (setField_) {
-      case I32_VALUE:
-        Integer i32Value = (Integer)value_;
-        oprot.writeI32(i32Value);
-        return;
-      case STRING_VALUE:
-        String stringValue = (String)value_;
-        oprot.writeString(stringValue);
-        return;
-      default:
-        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
-    }
-  }
-
-  @Override
-  protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, short fieldID) throws org.apache.thrift.TException {
-    _Fields setField = _Fields.findByThriftId(fieldID);
-    if (setField != null) {
-      switch (setField) {
-        case I32_VALUE:
-          Integer i32Value;
-          i32Value = iprot.readI32();
-          return i32Value;
-        case STRING_VALUE:
-          String stringValue;
-          stringValue = iprot.readString();
-          return stringValue;
-        default:
-          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
-      }
-    } else {
-      throw new TProtocolException("Couldn't find a field with field id " + fieldID);
-    }
-  }
-
-  @Override
-  protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    switch (setField_) {
-      case I32_VALUE:
-        Integer i32Value = (Integer)value_;
-        oprot.writeI32(i32Value);
-        return;
-      case STRING_VALUE:
-        String stringValue = (String)value_;
-        oprot.writeString(stringValue);
-        return;
-      default:
-        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
-    }
-  }
-
-  @Override
-  protected org.apache.thrift.protocol.TField getFieldDesc(_Fields setField) {
-    switch (setField) {
-      case I32_VALUE:
-        return I32_VALUE_FIELD_DESC;
-      case STRING_VALUE:
-        return STRING_VALUE_FIELD_DESC;
-      default:
-        throw new IllegalArgumentException("Unknown field id " + setField);
-    }
-  }
-
-  @Override
-  protected org.apache.thrift.protocol.TStruct getStructDesc() {
-    return STRUCT_DESC;
-  }
-
-  @Override
-  protected _Fields enumForId(short id) {
-    return _Fields.findByThriftIdOrThrow(id);
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-
-  public int getI32Value() {
-    if (getSetField() == _Fields.I32_VALUE) {
-      return (Integer)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'i32Value' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setI32Value(int value) {
-    setField_ = _Fields.I32_VALUE;
-    value_ = value;
-  }
-
-  public String getStringValue() {
-    if (getSetField() == _Fields.STRING_VALUE) {
-      return (String)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'stringValue' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setStringValue(String value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.STRING_VALUE;
-    value_ = value;
-  }
-
-  public boolean isSetI32Value() {
-    return setField_ == _Fields.I32_VALUE;
-  }
-
-
-  public boolean isSetStringValue() {
-    return setField_ == _Fields.STRING_VALUE;
-  }
-
-
-  public boolean equals(Object other) {
-    if (other instanceof TTypeQualifierValue) {
-      return equals((TTypeQualifierValue)other);
-    } else {
-      return false;
-    }
-  }
-
-  public boolean equals(TTypeQualifierValue other) {
-    return other != null && getSetField() == other.getSetField() && getFieldValue().equals(other.getFieldValue());
-  }
-
-  @Override
-  public int compareTo(TTypeQualifierValue other) {
-    int lastComparison = org.apache.thrift.TBaseHelper.compareTo(getSetField(), other.getSetField());
-    if (lastComparison == 0) {
-      return org.apache.thrift.TBaseHelper.compareTo(getFieldValue(), other.getFieldValue());
-    }
-    return lastComparison;
-  }
-
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder hcb = new HashCodeBuilder();
-    hcb.append(this.getClass().getName());
-    org.apache.thrift.TFieldIdEnum setField = getSetField();
-    if (setField != null) {
-      hcb.append(setField.getThriftFieldId());
-      Object value = getFieldValue();
-      if (value instanceof org.apache.thrift.TEnum) {
-        hcb.append(((org.apache.thrift.TEnum)getFieldValue()).getValue());
-      } else {
-        hcb.append(value);
-      }
-    }
-    return hcb.toHashCode();
-  }
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-
-}
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifiers.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifiers.java
deleted file mode 100644
index 39355551d3722..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TTypeQualifiers.java
+++ /dev/null
@@ -1,450 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TTypeQualifiers implements org.apache.thrift.TBase<TTypeQualifiers, TTypeQualifiers._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TTypeQualifiers");
-
-  private static final org.apache.thrift.protocol.TField QUALIFIERS_FIELD_DESC = new org.apache.thrift.protocol.TField("qualifiers", org.apache.thrift.protocol.TType.MAP, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TTypeQualifiersStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TTypeQualifiersTupleSchemeFactory());
-  }
-
-  private Map<String,TTypeQualifierValue> qualifiers; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    QUALIFIERS((short)1, "qualifiers");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // QUALIFIERS
-          return QUALIFIERS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.QUALIFIERS, new org.apache.thrift.meta_data.FieldMetaData("qualifiers", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), 
-            new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TTypeQualifierValue.class))));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TTypeQualifiers.class, metaDataMap);
-  }
-
-  public TTypeQualifiers() {
-  }
-
-  public TTypeQualifiers(
-    Map<String,TTypeQualifierValue> qualifiers)
-  {
-    this();
-    this.qualifiers = qualifiers;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TTypeQualifiers(TTypeQualifiers other) {
-    if (other.isSetQualifiers()) {
-      Map<String,TTypeQualifierValue> __this__qualifiers = new HashMap<String,TTypeQualifierValue>();
-      for (Map.Entry<String, TTypeQualifierValue> other_element : other.qualifiers.entrySet()) {
-
-        String other_element_key = other_element.getKey();
-        TTypeQualifierValue other_element_value = other_element.getValue();
-
-        String __this__qualifiers_copy_key = other_element_key;
-
-        TTypeQualifierValue __this__qualifiers_copy_value = new TTypeQualifierValue(other_element_value);
-
-        __this__qualifiers.put(__this__qualifiers_copy_key, __this__qualifiers_copy_value);
-      }
-      this.qualifiers = __this__qualifiers;
-    }
-  }
-
-  public TTypeQualifiers deepCopy() {
-    return new TTypeQualifiers(this);
-  }
-
-  @Override
-  public void clear() {
-    this.qualifiers = null;
-  }
-
-  public int getQualifiersSize() {
-    return (this.qualifiers == null) ? 0 : this.qualifiers.size();
-  }
-
-  public void putToQualifiers(String key, TTypeQualifierValue val) {
-    if (this.qualifiers == null) {
-      this.qualifiers = new HashMap<String,TTypeQualifierValue>();
-    }
-    this.qualifiers.put(key, val);
-  }
-
-  public Map<String,TTypeQualifierValue> getQualifiers() {
-    return this.qualifiers;
-  }
-
-  public void setQualifiers(Map<String,TTypeQualifierValue> qualifiers) {
-    this.qualifiers = qualifiers;
-  }
-
-  public void unsetQualifiers() {
-    this.qualifiers = null;
-  }
-
-  /** Returns true if field qualifiers is set (has been assigned a value) and false otherwise */
-  public boolean isSetQualifiers() {
-    return this.qualifiers != null;
-  }
-
-  public void setQualifiersIsSet(boolean value) {
-    if (!value) {
-      this.qualifiers = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case QUALIFIERS:
-      if (value == null) {
-        unsetQualifiers();
-      } else {
-        setQualifiers((Map<String,TTypeQualifierValue>)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case QUALIFIERS:
-      return getQualifiers();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case QUALIFIERS:
-      return isSetQualifiers();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TTypeQualifiers)
-      return this.equals((TTypeQualifiers)that);
-    return false;
-  }
-
-  public boolean equals(TTypeQualifiers that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_qualifiers = true && this.isSetQualifiers();
-    boolean that_present_qualifiers = true && that.isSetQualifiers();
-    if (this_present_qualifiers || that_present_qualifiers) {
-      if (!(this_present_qualifiers && that_present_qualifiers))
-        return false;
-      if (!this.qualifiers.equals(that.qualifiers))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_qualifiers = true && (isSetQualifiers());
-    builder.append(present_qualifiers);
-    if (present_qualifiers)
-      builder.append(qualifiers);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TTypeQualifiers other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TTypeQualifiers typedOther = (TTypeQualifiers)other;
-
-    lastComparison = Boolean.valueOf(isSetQualifiers()).compareTo(typedOther.isSetQualifiers());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetQualifiers()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.qualifiers, typedOther.qualifiers);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TTypeQualifiers(");
-    boolean first = true;
-
-    sb.append("qualifiers:");
-    if (this.qualifiers == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.qualifiers);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetQualifiers()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'qualifiers' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TTypeQualifiersStandardSchemeFactory implements SchemeFactory {
-    public TTypeQualifiersStandardScheme getScheme() {
-      return new TTypeQualifiersStandardScheme();
-    }
-  }
-
-  private static class TTypeQualifiersStandardScheme extends StandardScheme<TTypeQualifiers> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TTypeQualifiers struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // QUALIFIERS
-            if (schemeField.type == org.apache.thrift.protocol.TType.MAP) {
-              {
-                org.apache.thrift.protocol.TMap _map0 = iprot.readMapBegin();
-                struct.qualifiers = new HashMap<String,TTypeQualifierValue>(2*_map0.size);
-                for (int _i1 = 0; _i1 < _map0.size; ++_i1)
-                {
-                  String _key2; // required
-                  TTypeQualifierValue _val3; // required
-                  _key2 = iprot.readString();
-                  _val3 = new TTypeQualifierValue();
-                  _val3.read(iprot);
-                  struct.qualifiers.put(_key2, _val3);
-                }
-                iprot.readMapEnd();
-              }
-              struct.setQualifiersIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TTypeQualifiers struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.qualifiers != null) {
-        oprot.writeFieldBegin(QUALIFIERS_FIELD_DESC);
-        {
-          oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRUCT, struct.qualifiers.size()));
-          for (Map.Entry<String, TTypeQualifierValue> _iter4 : struct.qualifiers.entrySet())
-          {
-            oprot.writeString(_iter4.getKey());
-            _iter4.getValue().write(oprot);
-          }
-          oprot.writeMapEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TTypeQualifiersTupleSchemeFactory implements SchemeFactory {
-    public TTypeQualifiersTupleScheme getScheme() {
-      return new TTypeQualifiersTupleScheme();
-    }
-  }
-
-  private static class TTypeQualifiersTupleScheme extends TupleScheme<TTypeQualifiers> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TTypeQualifiers struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.qualifiers.size());
-        for (Map.Entry<String, TTypeQualifierValue> _iter5 : struct.qualifiers.entrySet())
-        {
-          oprot.writeString(_iter5.getKey());
-          _iter5.getValue().write(oprot);
-        }
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TTypeQualifiers struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TMap _map6 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRUCT, iprot.readI32());
-        struct.qualifiers = new HashMap<String,TTypeQualifierValue>(2*_map6.size);
-        for (int _i7 = 0; _i7 < _map6.size; ++_i7)
-        {
-          String _key8; // required
-          TTypeQualifierValue _val9; // required
-          _key8 = iprot.readString();
-          _val9 = new TTypeQualifierValue();
-          _val9.read(iprot);
-          struct.qualifiers.put(_key8, _val9);
-        }
-      }
-      struct.setQualifiersIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TUnionTypeEntry.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TUnionTypeEntry.java
deleted file mode 100644
index 73dd45d3dd01a..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TUnionTypeEntry.java
+++ /dev/null
@@ -1,448 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TUnionTypeEntry implements org.apache.thrift.TBase<TUnionTypeEntry, TUnionTypeEntry._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TUnionTypeEntry");
-
-  private static final org.apache.thrift.protocol.TField NAME_TO_TYPE_PTR_FIELD_DESC = new org.apache.thrift.protocol.TField("nameToTypePtr", org.apache.thrift.protocol.TType.MAP, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TUnionTypeEntryStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TUnionTypeEntryTupleSchemeFactory());
-  }
-
-  private Map<String,Integer> nameToTypePtr; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    NAME_TO_TYPE_PTR((short)1, "nameToTypePtr");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // NAME_TO_TYPE_PTR
-          return NAME_TO_TYPE_PTR;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.NAME_TO_TYPE_PTR, new org.apache.thrift.meta_data.FieldMetaData("nameToTypePtr", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32            , "TTypeEntryPtr"))));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TUnionTypeEntry.class, metaDataMap);
-  }
-
-  public TUnionTypeEntry() {
-  }
-
-  public TUnionTypeEntry(
-    Map<String,Integer> nameToTypePtr)
-  {
-    this();
-    this.nameToTypePtr = nameToTypePtr;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TUnionTypeEntry(TUnionTypeEntry other) {
-    if (other.isSetNameToTypePtr()) {
-      Map<String,Integer> __this__nameToTypePtr = new HashMap<String,Integer>();
-      for (Map.Entry<String, Integer> other_element : other.nameToTypePtr.entrySet()) {
-
-        String other_element_key = other_element.getKey();
-        Integer other_element_value = other_element.getValue();
-
-        String __this__nameToTypePtr_copy_key = other_element_key;
-
-        Integer __this__nameToTypePtr_copy_value = other_element_value;
-
-        __this__nameToTypePtr.put(__this__nameToTypePtr_copy_key, __this__nameToTypePtr_copy_value);
-      }
-      this.nameToTypePtr = __this__nameToTypePtr;
-    }
-  }
-
-  public TUnionTypeEntry deepCopy() {
-    return new TUnionTypeEntry(this);
-  }
-
-  @Override
-  public void clear() {
-    this.nameToTypePtr = null;
-  }
-
-  public int getNameToTypePtrSize() {
-    return (this.nameToTypePtr == null) ? 0 : this.nameToTypePtr.size();
-  }
-
-  public void putToNameToTypePtr(String key, int val) {
-    if (this.nameToTypePtr == null) {
-      this.nameToTypePtr = new HashMap<String,Integer>();
-    }
-    this.nameToTypePtr.put(key, val);
-  }
-
-  public Map<String,Integer> getNameToTypePtr() {
-    return this.nameToTypePtr;
-  }
-
-  public void setNameToTypePtr(Map<String,Integer> nameToTypePtr) {
-    this.nameToTypePtr = nameToTypePtr;
-  }
-
-  public void unsetNameToTypePtr() {
-    this.nameToTypePtr = null;
-  }
-
-  /** Returns true if field nameToTypePtr is set (has been assigned a value) and false otherwise */
-  public boolean isSetNameToTypePtr() {
-    return this.nameToTypePtr != null;
-  }
-
-  public void setNameToTypePtrIsSet(boolean value) {
-    if (!value) {
-      this.nameToTypePtr = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case NAME_TO_TYPE_PTR:
-      if (value == null) {
-        unsetNameToTypePtr();
-      } else {
-        setNameToTypePtr((Map<String,Integer>)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case NAME_TO_TYPE_PTR:
-      return getNameToTypePtr();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case NAME_TO_TYPE_PTR:
-      return isSetNameToTypePtr();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TUnionTypeEntry)
-      return this.equals((TUnionTypeEntry)that);
-    return false;
-  }
-
-  public boolean equals(TUnionTypeEntry that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_nameToTypePtr = true && this.isSetNameToTypePtr();
-    boolean that_present_nameToTypePtr = true && that.isSetNameToTypePtr();
-    if (this_present_nameToTypePtr || that_present_nameToTypePtr) {
-      if (!(this_present_nameToTypePtr && that_present_nameToTypePtr))
-        return false;
-      if (!this.nameToTypePtr.equals(that.nameToTypePtr))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_nameToTypePtr = true && (isSetNameToTypePtr());
-    builder.append(present_nameToTypePtr);
-    if (present_nameToTypePtr)
-      builder.append(nameToTypePtr);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TUnionTypeEntry other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TUnionTypeEntry typedOther = (TUnionTypeEntry)other;
-
-    lastComparison = Boolean.valueOf(isSetNameToTypePtr()).compareTo(typedOther.isSetNameToTypePtr());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetNameToTypePtr()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nameToTypePtr, typedOther.nameToTypePtr);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TUnionTypeEntry(");
-    boolean first = true;
-
-    sb.append("nameToTypePtr:");
-    if (this.nameToTypePtr == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.nameToTypePtr);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetNameToTypePtr()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nameToTypePtr' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TUnionTypeEntryStandardSchemeFactory implements SchemeFactory {
-    public TUnionTypeEntryStandardScheme getScheme() {
-      return new TUnionTypeEntryStandardScheme();
-    }
-  }
-
-  private static class TUnionTypeEntryStandardScheme extends StandardScheme<TUnionTypeEntry> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TUnionTypeEntry struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // NAME_TO_TYPE_PTR
-            if (schemeField.type == org.apache.thrift.protocol.TType.MAP) {
-              {
-                org.apache.thrift.protocol.TMap _map20 = iprot.readMapBegin();
-                struct.nameToTypePtr = new HashMap<String,Integer>(2*_map20.size);
-                for (int _i21 = 0; _i21 < _map20.size; ++_i21)
-                {
-                  String _key22; // required
-                  int _val23; // required
-                  _key22 = iprot.readString();
-                  _val23 = iprot.readI32();
-                  struct.nameToTypePtr.put(_key22, _val23);
-                }
-                iprot.readMapEnd();
-              }
-              struct.setNameToTypePtrIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TUnionTypeEntry struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.nameToTypePtr != null) {
-        oprot.writeFieldBegin(NAME_TO_TYPE_PTR_FIELD_DESC);
-        {
-          oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.I32, struct.nameToTypePtr.size()));
-          for (Map.Entry<String, Integer> _iter24 : struct.nameToTypePtr.entrySet())
-          {
-            oprot.writeString(_iter24.getKey());
-            oprot.writeI32(_iter24.getValue());
-          }
-          oprot.writeMapEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TUnionTypeEntryTupleSchemeFactory implements SchemeFactory {
-    public TUnionTypeEntryTupleScheme getScheme() {
-      return new TUnionTypeEntryTupleScheme();
-    }
-  }
-
-  private static class TUnionTypeEntryTupleScheme extends TupleScheme<TUnionTypeEntry> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TUnionTypeEntry struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.nameToTypePtr.size());
-        for (Map.Entry<String, Integer> _iter25 : struct.nameToTypePtr.entrySet())
-        {
-          oprot.writeString(_iter25.getKey());
-          oprot.writeI32(_iter25.getValue());
-        }
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TUnionTypeEntry struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TMap _map26 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.I32, iprot.readI32());
-        struct.nameToTypePtr = new HashMap<String,Integer>(2*_map26.size);
-        for (int _i27 = 0; _i27 < _map26.size; ++_i27)
-        {
-          String _key28; // required
-          int _val29; // required
-          _key28 = iprot.readString();
-          _val29 = iprot.readI32();
-          struct.nameToTypePtr.put(_key28, _val29);
-        }
-      }
-      struct.setNameToTypePtrIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TUserDefinedTypeEntry.java b/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TUserDefinedTypeEntry.java
deleted file mode 100644
index 3a111a2c8c2c6..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/gen/java/org/apache/hive/service/cli/thrift/TUserDefinedTypeEntry.java
+++ /dev/null
@@ -1,385 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.0)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.cli.thrift;
-
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class TUserDefinedTypeEntry implements org.apache.thrift.TBase<TUserDefinedTypeEntry, TUserDefinedTypeEntry._Fields>, java.io.Serializable, Cloneable {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TUserDefinedTypeEntry");
-
-  private static final org.apache.thrift.protocol.TField TYPE_CLASS_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("typeClassName", org.apache.thrift.protocol.TType.STRING, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TUserDefinedTypeEntryStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TUserDefinedTypeEntryTupleSchemeFactory());
-  }
-
-  private String typeClassName; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    TYPE_CLASS_NAME((short)1, "typeClassName");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // TYPE_CLASS_NAME
-          return TYPE_CLASS_NAME;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.TYPE_CLASS_NAME, new org.apache.thrift.meta_data.FieldMetaData("typeClassName", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TUserDefinedTypeEntry.class, metaDataMap);
-  }
-
-  public TUserDefinedTypeEntry() {
-  }
-
-  public TUserDefinedTypeEntry(
-    String typeClassName)
-  {
-    this();
-    this.typeClassName = typeClassName;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TUserDefinedTypeEntry(TUserDefinedTypeEntry other) {
-    if (other.isSetTypeClassName()) {
-      this.typeClassName = other.typeClassName;
-    }
-  }
-
-  public TUserDefinedTypeEntry deepCopy() {
-    return new TUserDefinedTypeEntry(this);
-  }
-
-  @Override
-  public void clear() {
-    this.typeClassName = null;
-  }
-
-  public String getTypeClassName() {
-    return this.typeClassName;
-  }
-
-  public void setTypeClassName(String typeClassName) {
-    this.typeClassName = typeClassName;
-  }
-
-  public void unsetTypeClassName() {
-    this.typeClassName = null;
-  }
-
-  /** Returns true if field typeClassName is set (has been assigned a value) and false otherwise */
-  public boolean isSetTypeClassName() {
-    return this.typeClassName != null;
-  }
-
-  public void setTypeClassNameIsSet(boolean value) {
-    if (!value) {
-      this.typeClassName = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case TYPE_CLASS_NAME:
-      if (value == null) {
-        unsetTypeClassName();
-      } else {
-        setTypeClassName((String)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case TYPE_CLASS_NAME:
-      return getTypeClassName();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case TYPE_CLASS_NAME:
-      return isSetTypeClassName();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TUserDefinedTypeEntry)
-      return this.equals((TUserDefinedTypeEntry)that);
-    return false;
-  }
-
-  public boolean equals(TUserDefinedTypeEntry that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_typeClassName = true && this.isSetTypeClassName();
-    boolean that_present_typeClassName = true && that.isSetTypeClassName();
-    if (this_present_typeClassName || that_present_typeClassName) {
-      if (!(this_present_typeClassName && that_present_typeClassName))
-        return false;
-      if (!this.typeClassName.equals(that.typeClassName))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    HashCodeBuilder builder = new HashCodeBuilder();
-
-    boolean present_typeClassName = true && (isSetTypeClassName());
-    builder.append(present_typeClassName);
-    if (present_typeClassName)
-      builder.append(typeClassName);
-
-    return builder.toHashCode();
-  }
-
-  public int compareTo(TUserDefinedTypeEntry other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-    TUserDefinedTypeEntry typedOther = (TUserDefinedTypeEntry)other;
-
-    lastComparison = Boolean.valueOf(isSetTypeClassName()).compareTo(typedOther.isSetTypeClassName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetTypeClassName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.typeClassName, typedOther.typeClassName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TUserDefinedTypeEntry(");
-    boolean first = true;
-
-    sb.append("typeClassName:");
-    if (this.typeClassName == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.typeClassName);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetTypeClassName()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'typeClassName' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TUserDefinedTypeEntryStandardSchemeFactory implements SchemeFactory {
-    public TUserDefinedTypeEntryStandardScheme getScheme() {
-      return new TUserDefinedTypeEntryStandardScheme();
-    }
-  }
-
-  private static class TUserDefinedTypeEntryStandardScheme extends StandardScheme<TUserDefinedTypeEntry> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TUserDefinedTypeEntry struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // TYPE_CLASS_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.typeClassName = iprot.readString();
-              struct.setTypeClassNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TUserDefinedTypeEntry struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.typeClassName != null) {
-        oprot.writeFieldBegin(TYPE_CLASS_NAME_FIELD_DESC);
-        oprot.writeString(struct.typeClassName);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TUserDefinedTypeEntryTupleSchemeFactory implements SchemeFactory {
-    public TUserDefinedTypeEntryTupleScheme getScheme() {
-      return new TUserDefinedTypeEntryTupleScheme();
-    }
-  }
-
-  private static class TUserDefinedTypeEntryTupleScheme extends TupleScheme<TUserDefinedTypeEntry> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TUserDefinedTypeEntry struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      oprot.writeString(struct.typeClassName);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TUserDefinedTypeEntry struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.typeClassName = iprot.readString();
-      struct.setTypeClassNameIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/AbstractService.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/AbstractService.java
deleted file mode 100644
index 7e557aeccf5b0..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/AbstractService.java
+++ /dev/null
@@ -1,184 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.conf.HiveConf;
-
-/**
- * AbstractService.
- *
- */
-public abstract class AbstractService implements Service {
-
-  private static final Log LOG = LogFactory.getLog(AbstractService.class);
-
-  /**
-   * Service state: initially {@link STATE#NOTINITED}.
-   */
-  private Service.STATE state = STATE.NOTINITED;
-
-  /**
-   * Service name.
-   */
-  private final String name;
-  /**
-   * Service start time. Will be zero until the service is started.
-   */
-  private long startTime;
-
-  /**
-   * The configuration. Will be null until the service is initialized.
-   */
-  private HiveConf hiveConf;
-
-  /**
-   * List of state change listeners; it is final to ensure
-   * that it will never be null.
-   */
-  private final List<ServiceStateChangeListener> listeners =
-      new ArrayList<ServiceStateChangeListener>();
-
-  /**
-   * Construct the service.
-   *
-   * @param name
-   *          service name
-   */
-  public AbstractService(String name) {
-    this.name = name;
-  }
-
-  @Override
-  public synchronized Service.STATE getServiceState() {
-    return state;
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @throws IllegalStateException
-   *           if the current service state does not permit
-   *           this action
-   */
-  @Override
-  public synchronized void init(HiveConf hiveConf) {
-    ensureCurrentState(STATE.NOTINITED);
-    this.hiveConf = hiveConf;
-    changeState(STATE.INITED);
-    LOG.info("Service:" + getName() + " is inited.");
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @throws IllegalStateException
-   *           if the current service state does not permit
-   *           this action
-   */
-  @Override
-  public synchronized void start() {
-    startTime = System.currentTimeMillis();
-    ensureCurrentState(STATE.INITED);
-    changeState(STATE.STARTED);
-    LOG.info("Service:" + getName() + " is started.");
-  }
-
-  /**
-   * {@inheritDoc}
-   *
-   * @throws IllegalStateException
-   *           if the current service state does not permit
-   *           this action
-   */
-  @Override
-  public synchronized void stop() {
-    if (state == STATE.STOPPED ||
-        state == STATE.INITED ||
-        state == STATE.NOTINITED) {
-      // already stopped, or else it was never
-      // started (eg another service failing canceled startup)
-      return;
-    }
-    ensureCurrentState(STATE.STARTED);
-    changeState(STATE.STOPPED);
-    LOG.info("Service:" + getName() + " is stopped.");
-  }
-
-  @Override
-  public synchronized void register(ServiceStateChangeListener l) {
-    listeners.add(l);
-  }
-
-  @Override
-  public synchronized void unregister(ServiceStateChangeListener l) {
-    listeners.remove(l);
-  }
-
-  @Override
-  public String getName() {
-    return name;
-  }
-
-  @Override
-  public synchronized HiveConf getHiveConf() {
-    return hiveConf;
-  }
-
-  @Override
-  public long getStartTime() {
-    return startTime;
-  }
-
-  /**
-   * Verify that a service is in a given state.
-   *
-   * @param currentState
-   *          the desired state
-   * @throws IllegalStateException
-   *           if the service state is different from
-   *           the desired state
-   */
-  private void ensureCurrentState(Service.STATE currentState) {
-    ServiceOperations.ensureCurrentState(state, currentState);
-  }
-
-  /**
-   * Change to a new state and notify all listeners.
-   * This is a private method that is only invoked from synchronized methods,
-   * which avoid having to clone the listener list. It does imply that
-   * the state change listener methods should be short lived, as they
-   * will delay the state transition.
-   *
-   * @param newState
-   *          new service state
-   */
-  private void changeState(Service.STATE newState) {
-    state = newState;
-    // notify listeners
-    for (ServiceStateChangeListener l : listeners) {
-      l.stateChanged(this);
-    }
-  }
-
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/CompositeService.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/CompositeService.java
deleted file mode 100644
index 897911872b80f..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/CompositeService.java
+++ /dev/null
@@ -1,133 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service;
-
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.conf.HiveConf;
-
-/**
- * CompositeService.
- *
- */
-public class CompositeService extends AbstractService {
-
-  private static final Log LOG = LogFactory.getLog(CompositeService.class);
-
-  private final List<Service> serviceList = new ArrayList<Service>();
-
-  public CompositeService(String name) {
-    super(name);
-  }
-
-  public Collection<Service> getServices() {
-    return Collections.unmodifiableList(serviceList);
-  }
-
-  protected synchronized void addService(Service service) {
-    serviceList.add(service);
-  }
-
-  protected synchronized boolean removeService(Service service) {
-    return serviceList.remove(service);
-  }
-
-  @Override
-  public synchronized void init(HiveConf hiveConf) {
-    for (Service service : serviceList) {
-      service.init(hiveConf);
-    }
-    super.init(hiveConf);
-  }
-
-  @Override
-  public synchronized void start() {
-    int i = 0;
-    try {
-      for (int n = serviceList.size(); i < n; i++) {
-        Service service = serviceList.get(i);
-        service.start();
-      }
-      super.start();
-    } catch (Throwable e) {
-      LOG.error("Error starting services " + getName(), e);
-      // Note that the state of the failed service is still INITED and not
-      // STARTED. Even though the last service is not started completely, still
-      // call stop() on all services including failed service to make sure cleanup
-      // happens.
-      stop(i);
-      throw new ServiceException("Failed to Start " + getName(), e);
-    }
-
-  }
-
-  @Override
-  public synchronized void stop() {
-    if (this.getServiceState() == STATE.STOPPED) {
-      // The base composite-service is already stopped, don't do anything again.
-      return;
-    }
-    if (serviceList.size() > 0) {
-      stop(serviceList.size() - 1);
-    }
-    super.stop();
-  }
-
-  private synchronized void stop(int numOfServicesStarted) {
-    // stop in reserve order of start
-    for (int i = numOfServicesStarted; i >= 0; i--) {
-      Service service = serviceList.get(i);
-      try {
-        service.stop();
-      } catch (Throwable t) {
-        LOG.info("Error stopping " + service.getName(), t);
-      }
-    }
-  }
-
-  /**
-   * JVM Shutdown hook for CompositeService which will stop the given
-   * CompositeService gracefully in case of JVM shutdown.
-   */
-  public static class CompositeServiceShutdownHook implements Runnable {
-
-    private final CompositeService compositeService;
-
-    public CompositeServiceShutdownHook(CompositeService compositeService) {
-      this.compositeService = compositeService;
-    }
-
-    @Override
-    public void run() {
-      try {
-        // Stop the Composite Service
-        compositeService.stop();
-      } catch (Throwable t) {
-        LOG.info("Error stopping " + compositeService.getName(), t);
-      }
-    }
-  }
-
-
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/CookieSigner.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/CookieSigner.java
deleted file mode 100644
index f2a80c9d5ffbc..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/CookieSigner.java
+++ /dev/null
@@ -1,108 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service;
-
-import org.apache.commons.codec.binary.Base64;
-import org.apache.commons.logging.LogFactory;
-import org.apache.commons.logging.Log;
-
-import java.security.MessageDigest;
-import java.security.NoSuchAlgorithmException;
-
-/**
- * The cookie signer generates a signature based on SHA digest
- * and appends it to the cookie value generated at the
- * server side. It uses SHA digest algorithm to sign and verify signatures.
- */
-public class CookieSigner {
-  private static final String SIGNATURE = "&s=";
-  private static final String SHA_STRING = "SHA";
-  private byte[] secretBytes;
-  private static final Log LOG = LogFactory.getLog(CookieSigner.class);
-
-  /**
-   * Constructor
-   * @param secret Secret Bytes
-   */
-  public CookieSigner(byte[] secret) {
-    if (secret == null) {
-      throw new IllegalArgumentException(" NULL Secret Bytes");
-    }
-    this.secretBytes = secret.clone();
-  }
-
-  /**
-   * Sign the cookie given the string token as input.
-   * @param str Input token
-   * @return Signed token that can be used to create a cookie
-   */
-  public String signCookie(String str) {
-    if (str == null || str.isEmpty()) {
-      throw new IllegalArgumentException("NULL or empty string to sign");
-    }
-    String signature = getSignature(str);
-
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("Signature generated for " + str + " is " + signature);
-    }
-    return str + SIGNATURE + signature;
-  }
-
-  /**
-   * Verify a signed string and extracts the original string.
-   * @param signedStr The already signed string
-   * @return Raw Value of the string without the signature
-   */
-  public String verifyAndExtract(String signedStr) {
-    int index = signedStr.lastIndexOf(SIGNATURE);
-    if (index == -1) {
-      throw new IllegalArgumentException("Invalid input sign: " + signedStr);
-    }
-    String originalSignature = signedStr.substring(index + SIGNATURE.length());
-    String rawValue = signedStr.substring(0, index);
-    String currentSignature = getSignature(rawValue);
-
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("Signature generated for " + rawValue + " inside verify is " + currentSignature);
-    }
-    if (!MessageDigest.isEqual(originalSignature.getBytes(), currentSignature.getBytes())) {
-      throw new IllegalArgumentException("Invalid sign, original = " + originalSignature +
-        " current = " + currentSignature);
-    }
-    return rawValue;
-  }
-
-  /**
-   * Get the signature of the input string based on SHA digest algorithm.
-   * @param str Input token
-   * @return Signed String
-   */
-  private String getSignature(String str) {
-    try {
-      MessageDigest md = MessageDigest.getInstance(SHA_STRING);
-      md.update(str.getBytes());
-      md.update(secretBytes);
-      byte[] digest = md.digest();
-      return new Base64(0).encodeToString(digest);
-    } catch (NoSuchAlgorithmException ex) {
-      throw new RuntimeException("Invalid SHA digest String: " + SHA_STRING +
-        " " + ex.getMessage(), ex);
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/ServiceOperations.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/ServiceOperations.java
deleted file mode 100644
index f16863c1b41aa..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/ServiceOperations.java
+++ /dev/null
@@ -1,141 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.conf.HiveConf;
-
-/**
- * ServiceOperations.
- *
- */
-public final class ServiceOperations {
-  private static final Log LOG = LogFactory.getLog(ServiceOperations.class);
-
-  private ServiceOperations() {
-  }
-
-  /**
-   * Verify that a service is in a given state.
-   * @param state the actual state a service is in
-   * @param expectedState the desired state
-   * @throws IllegalStateException if the service state is different from
-   * the desired state
-   */
-  public static void ensureCurrentState(Service.STATE state,
-                                        Service.STATE expectedState) {
-    if (state != expectedState) {
-      throw new IllegalStateException("For this operation, the " +
-                                          "current service state must be "
-                                          + expectedState
-                                          + " instead of " + state);
-    }
-  }
-
-  /**
-   * Initialize a service.
-   *
-   * The service state is checked <i>before</i> the operation begins.
-   * This process is <i>not</i> thread safe.
-   * @param service a service that must be in the state
-   *   {@link Service.STATE#NOTINITED}
-   * @param configuration the configuration to initialize the service with
-   * @throws RuntimeException on a state change failure
-   * @throws IllegalStateException if the service is in the wrong state
-   */
-
-  public static void init(Service service, HiveConf configuration) {
-    Service.STATE state = service.getServiceState();
-    ensureCurrentState(state, Service.STATE.NOTINITED);
-    service.init(configuration);
-  }
-
-  /**
-   * Start a service.
-   *
-   * The service state is checked <i>before</i> the operation begins.
-   * This process is <i>not</i> thread safe.
-   * @param service a service that must be in the state
-   *   {@link Service.STATE#INITED}
-   * @throws RuntimeException on a state change failure
-   * @throws IllegalStateException if the service is in the wrong state
-   */
-
-  public static void start(Service service) {
-    Service.STATE state = service.getServiceState();
-    ensureCurrentState(state, Service.STATE.INITED);
-    service.start();
-  }
-
-  /**
-   * Initialize then start a service.
-   *
-   * The service state is checked <i>before</i> the operation begins.
-   * This process is <i>not</i> thread safe.
-   * @param service a service that must be in the state
-   *   {@link Service.STATE#NOTINITED}
-   * @param configuration the configuration to initialize the service with
-   * @throws RuntimeException on a state change failure
-   * @throws IllegalStateException if the service is in the wrong state
-   */
-  public static void deploy(Service service, HiveConf configuration) {
-    init(service, configuration);
-    start(service);
-  }
-
-  /**
-   * Stop a service.
-   *
-   * Do nothing if the service is null or not in a state in which it can be/needs to be stopped.
-   *
-   * The service state is checked <i>before</i> the operation begins.
-   * This process is <i>not</i> thread safe.
-   * @param service a service or null
-   */
-  public static void stop(Service service) {
-    if (service != null) {
-      Service.STATE state = service.getServiceState();
-      if (state == Service.STATE.STARTED) {
-        service.stop();
-      }
-    }
-  }
-
-  /**
-   * Stop a service; if it is null do nothing. Exceptions are caught and
-   * logged at warn level. (but not Throwables). This operation is intended to
-   * be used in cleanup operations
-   *
-   * @param service a service; may be null
-   * @return any exception that was caught; null if none was.
-   */
-  public static Exception stopQuietly(Service service) {
-    try {
-      stop(service);
-    } catch (Exception e) {
-      LOG.warn("When stopping the service " + service.getName()
-                   + " : " + e,
-               e);
-      return e;
-    }
-    return null;
-  }
-
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/ServiceUtils.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/ServiceUtils.java
deleted file mode 100644
index edb5eff9615bf..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/ServiceUtils.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hive.service;
-
-public class ServiceUtils {
-
-  /*
-   * Get the index separating the user name from domain name (the user's name up
-   * to the first '/' or '@').
-   *
-   * @param userName full user name.
-   * @return index of domain match or -1 if not found
-   */
-  public static int indexOfDomainMatch(String userName) {
-    if (userName == null) {
-      return -1;
-    }
-
-    int idx = userName.indexOf('/');
-    int idx2 = userName.indexOf('@');
-    int endIdx = Math.min(idx, idx2); // Use the earlier match.
-    // Unless at least one of '/' or '@' was not found, in
-    // which case, user the latter match.
-    if (endIdx == -1) {
-      endIdx = Math.max(idx, idx2);
-    }
-    return endIdx;
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
deleted file mode 100644
index 10000f12ab329..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
+++ /dev/null
@@ -1,419 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hive.service.auth;
-
-import java.io.IOException;
-import java.lang.reflect.Field;
-import java.lang.reflect.Method;
-import java.net.InetSocketAddress;
-import java.net.UnknownHostException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Locale;
-import java.util.Map;
-import java.util.Objects;
-
-import javax.net.ssl.SSLServerSocket;
-import javax.security.auth.login.LoginException;
-import javax.security.sasl.Sasl;
-
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
-import org.apache.hadoop.hive.metastore.HiveMetaStore;
-import org.apache.hadoop.hive.metastore.HiveMetaStore.HMSHandler;
-import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.shims.HadoopShims.KerberosNameShim;
-import org.apache.hadoop.hive.shims.ShimLoader;
-import org.apache.hadoop.hive.thrift.DBTokenStore;
-import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge;
-import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server.ServerMode;
-import org.apache.hadoop.security.SecurityUtil;
-import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hadoop.security.authorize.ProxyUsers;
-import org.apache.hive.service.cli.HiveSQLException;
-import org.apache.hive.service.cli.thrift.ThriftCLIService;
-import org.apache.thrift.TProcessorFactory;
-import org.apache.thrift.transport.TSSLTransportFactory;
-import org.apache.thrift.transport.TServerSocket;
-import org.apache.thrift.transport.TSocket;
-import org.apache.thrift.transport.TTransport;
-import org.apache.thrift.transport.TTransportException;
-import org.apache.thrift.transport.TTransportFactory;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * This class helps in some aspects of authentication. It creates the proper Thrift classes for the
- * given configuration as well as helps with authenticating requests.
- */
-public class HiveAuthFactory {
-  private static final Logger LOG = LoggerFactory.getLogger(HiveAuthFactory.class);
-
-
-  public enum AuthTypes {
-    NOSASL("NOSASL"),
-    NONE("NONE"),
-    LDAP("LDAP"),
-    KERBEROS("KERBEROS"),
-    CUSTOM("CUSTOM"),
-    PAM("PAM");
-
-    private final String authType;
-
-    AuthTypes(String authType) {
-      this.authType = authType;
-    }
-
-    public String getAuthName() {
-      return authType;
-    }
-
-  }
-
-  private HadoopThriftAuthBridge.Server saslServer;
-  private String authTypeStr;
-  private final String transportMode;
-  private final HiveConf conf;
-
-  public static final String HS2_PROXY_USER = "hive.server2.proxy.user";
-  public static final String HS2_CLIENT_TOKEN = "hiveserver2ClientToken";
-
-  private static Field keytabFile = null;
-  private static Method getKeytab = null;
-  static {
-    Class<?> clz = UserGroupInformation.class;
-    try {
-      keytabFile = clz.getDeclaredField("keytabFile");
-      keytabFile.setAccessible(true);
-    } catch (NoSuchFieldException nfe) {
-      LOG.debug("Cannot find private field \"keytabFile\" in class: " +
-        UserGroupInformation.class.getCanonicalName(), nfe);
-      keytabFile = null;
-    }
-
-    try {
-      getKeytab = clz.getDeclaredMethod("getKeytab");
-      getKeytab.setAccessible(true);
-    } catch(NoSuchMethodException nme) {
-      LOG.debug("Cannot find private method \"getKeytab\" in class:" +
-        UserGroupInformation.class.getCanonicalName(), nme);
-      getKeytab = null;
-    }
-  }
-
-  public HiveAuthFactory(HiveConf conf) throws TTransportException, IOException {
-    this.conf = conf;
-    transportMode = conf.getVar(HiveConf.ConfVars.HIVE_SERVER2_TRANSPORT_MODE);
-    authTypeStr = conf.getVar(HiveConf.ConfVars.HIVE_SERVER2_AUTHENTICATION);
-
-    // In http mode we use NOSASL as the default auth type
-    if ("http".equalsIgnoreCase(transportMode)) {
-      if (authTypeStr == null) {
-        authTypeStr = AuthTypes.NOSASL.getAuthName();
-      }
-    } else {
-      if (authTypeStr == null) {
-        authTypeStr = AuthTypes.NONE.getAuthName();
-      }
-      if (authTypeStr.equalsIgnoreCase(AuthTypes.KERBEROS.getAuthName())) {
-        String principal = conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL);
-        String keytab = conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB);
-        if (needUgiLogin(UserGroupInformation.getCurrentUser(),
-          SecurityUtil.getServerPrincipal(principal, "0.0.0.0"), keytab)) {
-          saslServer = ShimLoader.getHadoopThriftAuthBridge().createServer(principal, keytab);
-        } else {
-          // Using the default constructor to avoid unnecessary UGI login.
-          saslServer = new HadoopThriftAuthBridge.Server();
-        }
-
-        // start delegation token manager
-        try {
-          // rawStore is only necessary for DBTokenStore
-          Object rawStore = null;
-          String tokenStoreClass = conf.getVar(HiveConf.ConfVars.METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_CLS);
-
-          if (tokenStoreClass.equals(DBTokenStore.class.getName())) {
-            HMSHandler baseHandler = new HiveMetaStore.HMSHandler(
-                "new db based metaserver", conf, true);
-            rawStore = baseHandler.getMS();
-          }
-
-          saslServer.startDelegationTokenSecretManager(conf, rawStore, ServerMode.HIVESERVER2);
-        }
-        catch (MetaException|IOException e) {
-          throw new TTransportException("Failed to start token manager", e);
-        }
-      }
-    }
-  }
-
-  public Map<String, String> getSaslProperties() {
-    Map<String, String> saslProps = new HashMap<String, String>();
-    SaslQOP saslQOP = SaslQOP.fromString(conf.getVar(ConfVars.HIVE_SERVER2_THRIFT_SASL_QOP));
-    saslProps.put(Sasl.QOP, saslQOP.toString());
-    saslProps.put(Sasl.SERVER_AUTH, "true");
-    return saslProps;
-  }
-
-  public TTransportFactory getAuthTransFactory() throws LoginException {
-    TTransportFactory transportFactory;
-    if (authTypeStr.equalsIgnoreCase(AuthTypes.KERBEROS.getAuthName())) {
-      try {
-        transportFactory = saslServer.createTransportFactory(getSaslProperties());
-      } catch (TTransportException e) {
-        throw new LoginException(e.getMessage());
-      }
-    } else if (authTypeStr.equalsIgnoreCase(AuthTypes.NONE.getAuthName())) {
-      transportFactory = PlainSaslHelper.getPlainTransportFactory(authTypeStr);
-    } else if (authTypeStr.equalsIgnoreCase(AuthTypes.LDAP.getAuthName())) {
-      transportFactory = PlainSaslHelper.getPlainTransportFactory(authTypeStr);
-    } else if (authTypeStr.equalsIgnoreCase(AuthTypes.PAM.getAuthName())) {
-      transportFactory = PlainSaslHelper.getPlainTransportFactory(authTypeStr);
-    } else if (authTypeStr.equalsIgnoreCase(AuthTypes.NOSASL.getAuthName())) {
-      transportFactory = new TTransportFactory();
-    } else if (authTypeStr.equalsIgnoreCase(AuthTypes.CUSTOM.getAuthName())) {
-      transportFactory = PlainSaslHelper.getPlainTransportFactory(authTypeStr);
-    } else {
-      throw new LoginException("Unsupported authentication type " + authTypeStr);
-    }
-    return transportFactory;
-  }
-
-  /**
-   * Returns the thrift processor factory for HiveServer2 running in binary mode
-   * @param service
-   * @return
-   * @throws LoginException
-   */
-  public TProcessorFactory getAuthProcFactory(ThriftCLIService service) throws LoginException {
-    if (authTypeStr.equalsIgnoreCase(AuthTypes.KERBEROS.getAuthName())) {
-      return KerberosSaslHelper.getKerberosProcessorFactory(saslServer, service);
-    } else {
-      return PlainSaslHelper.getPlainProcessorFactory(service);
-    }
-  }
-
-  public String getRemoteUser() {
-    return saslServer == null ? null : saslServer.getRemoteUser();
-  }
-
-  public String getIpAddress() {
-    if (saslServer == null || saslServer.getRemoteAddress() == null) {
-      return null;
-    } else {
-      return saslServer.getRemoteAddress().getHostAddress();
-    }
-  }
-
-  // Perform kerberos login using the hadoop shim API if the configuration is available
-  public static void loginFromKeytab(HiveConf hiveConf) throws IOException {
-    String principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL);
-    String keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB);
-    if (principal.isEmpty() || keyTabFile.isEmpty()) {
-      throw new IOException("HiveServer2 Kerberos principal or keytab is not correctly configured");
-    } else {
-      UserGroupInformation.loginUserFromKeytab(SecurityUtil.getServerPrincipal(principal, "0.0.0.0"), keyTabFile);
-    }
-  }
-
-  // Perform SPNEGO login using the hadoop shim API if the configuration is available
-  public static UserGroupInformation loginFromSpnegoKeytabAndReturnUGI(HiveConf hiveConf)
-    throws IOException {
-    String principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_PRINCIPAL);
-    String keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_KEYTAB);
-    if (principal.isEmpty() || keyTabFile.isEmpty()) {
-      throw new IOException("HiveServer2 SPNEGO principal or keytab is not correctly configured");
-    } else {
-      return UserGroupInformation.loginUserFromKeytabAndReturnUGI(SecurityUtil.getServerPrincipal(principal, "0.0.0.0"), keyTabFile);
-    }
-  }
-
-  public static TTransport getSocketTransport(String host, int port, int loginTimeout) {
-    return new TSocket(host, port, loginTimeout);
-  }
-
-  public static TTransport getSSLSocket(String host, int port, int loginTimeout)
-    throws TTransportException {
-    return TSSLTransportFactory.getClientSocket(host, port, loginTimeout);
-  }
-
-  public static TTransport getSSLSocket(String host, int port, int loginTimeout,
-    String trustStorePath, String trustStorePassWord) throws TTransportException {
-    TSSLTransportFactory.TSSLTransportParameters params =
-      new TSSLTransportFactory.TSSLTransportParameters();
-    params.setTrustStore(trustStorePath, trustStorePassWord);
-    params.requireClientAuth(true);
-    return TSSLTransportFactory.getClientSocket(host, port, loginTimeout, params);
-  }
-
-  public static TServerSocket getServerSocket(String hiveHost, int portNum)
-    throws TTransportException {
-    InetSocketAddress serverAddress;
-    if (hiveHost == null || hiveHost.isEmpty()) {
-      // Wildcard bind
-      serverAddress = new InetSocketAddress(portNum);
-    } else {
-      serverAddress = new InetSocketAddress(hiveHost, portNum);
-    }
-    return new TServerSocket(serverAddress);
-  }
-
-  public static TServerSocket getServerSSLSocket(String hiveHost, int portNum, String keyStorePath,
-      String keyStorePassWord, List<String> sslVersionBlacklist) throws TTransportException,
-      UnknownHostException {
-    TSSLTransportFactory.TSSLTransportParameters params =
-        new TSSLTransportFactory.TSSLTransportParameters();
-    params.setKeyStore(keyStorePath, keyStorePassWord);
-    InetSocketAddress serverAddress;
-    if (hiveHost == null || hiveHost.isEmpty()) {
-      // Wildcard bind
-      serverAddress = new InetSocketAddress(portNum);
-    } else {
-      serverAddress = new InetSocketAddress(hiveHost, portNum);
-    }
-    TServerSocket thriftServerSocket =
-        TSSLTransportFactory.getServerSocket(portNum, 0, serverAddress.getAddress(), params);
-    if (thriftServerSocket.getServerSocket() instanceof SSLServerSocket) {
-      List<String> sslVersionBlacklistLocal = new ArrayList<String>();
-      for (String sslVersion : sslVersionBlacklist) {
-        sslVersionBlacklistLocal.add(sslVersion.trim().toLowerCase(Locale.ROOT));
-      }
-      SSLServerSocket sslServerSocket = (SSLServerSocket) thriftServerSocket.getServerSocket();
-      List<String> enabledProtocols = new ArrayList<String>();
-      for (String protocol : sslServerSocket.getEnabledProtocols()) {
-        if (sslVersionBlacklistLocal.contains(protocol.toLowerCase(Locale.ROOT))) {
-          LOG.debug("Disabling SSL Protocol: " + protocol);
-        } else {
-          enabledProtocols.add(protocol);
-        }
-      }
-      sslServerSocket.setEnabledProtocols(enabledProtocols.toArray(new String[0]));
-      LOG.info("SSL Server Socket Enabled Protocols: "
-          + Arrays.toString(sslServerSocket.getEnabledProtocols()));
-    }
-    return thriftServerSocket;
-  }
-
-  // retrieve delegation token for the given user
-  public String getDelegationToken(String owner, String renewer) throws HiveSQLException {
-    if (saslServer == null) {
-      throw new HiveSQLException(
-          "Delegation token only supported over kerberos authentication", "08S01");
-    }
-
-    try {
-      String tokenStr = saslServer.getDelegationTokenWithService(owner, renewer, HS2_CLIENT_TOKEN);
-      if (tokenStr == null || tokenStr.isEmpty()) {
-        throw new HiveSQLException(
-            "Received empty retrieving delegation token for user " + owner, "08S01");
-      }
-      return tokenStr;
-    } catch (IOException e) {
-      throw new HiveSQLException(
-          "Error retrieving delegation token for user " + owner, "08S01", e);
-    } catch (InterruptedException e) {
-      throw new HiveSQLException("delegation token retrieval interrupted", "08S01", e);
-    }
-  }
-
-  // cancel given delegation token
-  public void cancelDelegationToken(String delegationToken) throws HiveSQLException {
-    if (saslServer == null) {
-      throw new HiveSQLException(
-          "Delegation token only supported over kerberos authentication", "08S01");
-    }
-    try {
-      saslServer.cancelDelegationToken(delegationToken);
-    } catch (IOException e) {
-      throw new HiveSQLException(
-          "Error canceling delegation token " + delegationToken, "08S01", e);
-    }
-  }
-
-  public void renewDelegationToken(String delegationToken) throws HiveSQLException {
-    if (saslServer == null) {
-      throw new HiveSQLException(
-          "Delegation token only supported over kerberos authentication", "08S01");
-    }
-    try {
-      saslServer.renewDelegationToken(delegationToken);
-    } catch (IOException e) {
-      throw new HiveSQLException(
-          "Error renewing delegation token " + delegationToken, "08S01", e);
-    }
-  }
-
-  public String getUserFromToken(String delegationToken) throws HiveSQLException {
-    if (saslServer == null) {
-      throw new HiveSQLException(
-          "Delegation token only supported over kerberos authentication", "08S01");
-    }
-    try {
-      return saslServer.getUserFromToken(delegationToken);
-    } catch (IOException e) {
-      throw new HiveSQLException(
-          "Error extracting user from delegation token " + delegationToken, "08S01", e);
-    }
-  }
-
-  public static void verifyProxyAccess(String realUser, String proxyUser, String ipAddress,
-    HiveConf hiveConf) throws HiveSQLException {
-    try {
-      UserGroupInformation sessionUgi;
-      if (UserGroupInformation.isSecurityEnabled()) {
-        KerberosNameShim kerbName = ShimLoader.getHadoopShims().getKerberosNameShim(realUser);
-        sessionUgi = UserGroupInformation.createProxyUser(
-            kerbName.getServiceName(), UserGroupInformation.getLoginUser());
-      } else {
-        sessionUgi = UserGroupInformation.createRemoteUser(realUser);
-      }
-      if (!proxyUser.equalsIgnoreCase(realUser)) {
-        ProxyUsers.refreshSuperUserGroupsConfiguration(hiveConf);
-        ProxyUsers.authorize(UserGroupInformation.createProxyUser(proxyUser, sessionUgi),
-            ipAddress, hiveConf);
-      }
-    } catch (IOException e) {
-      throw new HiveSQLException(
-        "Failed to validate proxy privilege of " + realUser + " for " + proxyUser, "08S01", e);
-    }
-  }
-
-  public static boolean needUgiLogin(UserGroupInformation ugi, String principal, String keytab) {
-    return null == ugi || !ugi.hasKerberosCredentials() || !ugi.getUserName().equals(principal) ||
-      !Objects.equals(keytab, getKeytabFromUgi());
-  }
-
-  private static String getKeytabFromUgi() {
-    synchronized (UserGroupInformation.class) {
-      try {
-        if (keytabFile != null) {
-          return (String) keytabFile.get(null);
-        } else if (getKeytab != null) {
-          return (String) getKeytab.invoke(UserGroupInformation.getCurrentUser());
-        } else {
-          return null;
-        }
-      } catch (Exception e) {
-        LOG.debug("Fail to get keytabFile path via reflection", e);
-        return null;
-      }
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java
deleted file mode 100644
index f7375ee707830..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java
+++ /dev/null
@@ -1,189 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.auth;
-
-import java.security.AccessControlContext;
-import java.security.AccessController;
-import java.security.PrivilegedExceptionAction;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Random;
-import java.util.Set;
-import java.util.StringTokenizer;
-
-import javax.security.auth.Subject;
-
-import org.apache.commons.codec.binary.Base64;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.shims.ShimLoader;
-import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.http.protocol.BasicHttpContext;
-import org.apache.http.protocol.HttpContext;
-import org.ietf.jgss.GSSContext;
-import org.ietf.jgss.GSSManager;
-import org.ietf.jgss.GSSName;
-import org.ietf.jgss.Oid;
-
-/**
- * Utility functions for HTTP mode authentication.
- */
-public final class HttpAuthUtils {
-  public static final String WWW_AUTHENTICATE = "WWW-Authenticate";
-  public static final String AUTHORIZATION = "Authorization";
-  public static final String BASIC = "Basic";
-  public static final String NEGOTIATE = "Negotiate";
-  private static final Log LOG = LogFactory.getLog(HttpAuthUtils.class);
-  private static final String COOKIE_ATTR_SEPARATOR = "&";
-  private static final String COOKIE_CLIENT_USER_NAME = "cu";
-  private static final String COOKIE_CLIENT_RAND_NUMBER = "rn";
-  private static final String COOKIE_KEY_VALUE_SEPARATOR = "=";
-  private static final Set<String> COOKIE_ATTRIBUTES =
-    new HashSet<String>(Arrays.asList(COOKIE_CLIENT_USER_NAME, COOKIE_CLIENT_RAND_NUMBER));
-
-  /**
-   * @return Stringified Base64 encoded kerberosAuthHeader on success
-   * @throws Exception
-   */
-  public static String getKerberosServiceTicket(String principal, String host,
-      String serverHttpUrl, boolean assumeSubject) throws Exception {
-    String serverPrincipal =
-        ShimLoader.getHadoopThriftAuthBridge().getServerPrincipal(principal, host);
-    if (assumeSubject) {
-      // With this option, we're assuming that the external application,
-      // using the JDBC driver has done a JAAS kerberos login already
-      AccessControlContext context = AccessController.getContext();
-      Subject subject = Subject.getSubject(context);
-      if (subject == null) {
-        throw new Exception("The Subject is not set");
-      }
-      return Subject.doAs(subject, new HttpKerberosClientAction(serverPrincipal, serverHttpUrl));
-    } else {
-      // JAAS login from ticket cache to setup the client UserGroupInformation
-      UserGroupInformation clientUGI =
-          ShimLoader.getHadoopThriftAuthBridge().getCurrentUGIWithConf("kerberos");
-      return clientUGI.doAs(new HttpKerberosClientAction(serverPrincipal, serverHttpUrl));
-    }
-  }
-
-  /**
-   * Creates and returns a HS2 cookie token.
-   * @param clientUserName Client User name.
-   * @return An unsigned cookie token generated from input parameters.
-   * The final cookie generated is of the following format :
-   * {@code cu=<username>&rn=<randomNumber>&s=<cookieSignature>}
-   */
-  public static String createCookieToken(String clientUserName) {
-    StringBuffer sb = new StringBuffer();
-    sb.append(COOKIE_CLIENT_USER_NAME).append(COOKIE_KEY_VALUE_SEPARATOR).append(clientUserName)
-      .append(COOKIE_ATTR_SEPARATOR);
-    sb.append(COOKIE_CLIENT_RAND_NUMBER).append(COOKIE_KEY_VALUE_SEPARATOR)
-      .append((new Random(System.currentTimeMillis())).nextLong());
-    return sb.toString();
-  }
-
-  /**
-   * Parses a cookie token to retrieve client user name.
-   * @param tokenStr Token String.
-   * @return A valid user name if input is of valid format, else returns null.
-   */
-  public static String getUserNameFromCookieToken(String tokenStr) {
-    Map<String, String> map = splitCookieToken(tokenStr);
-
-    if (!map.keySet().equals(COOKIE_ATTRIBUTES)) {
-      LOG.error("Invalid token with missing attributes " + tokenStr);
-      return null;
-    }
-    return map.get(COOKIE_CLIENT_USER_NAME);
-  }
-
-  /**
-   * Splits the cookie token into attributes pairs.
-   * @param str input token.
-   * @return a map with the attribute pairs of the token if the input is valid.
-   * Else, returns null.
-   */
-  private static Map<String, String> splitCookieToken(String tokenStr) {
-    Map<String, String> map = new HashMap<String, String>();
-    StringTokenizer st = new StringTokenizer(tokenStr, COOKIE_ATTR_SEPARATOR);
-
-    while (st.hasMoreTokens()) {
-      String part = st.nextToken();
-      int separator = part.indexOf(COOKIE_KEY_VALUE_SEPARATOR);
-      if (separator == -1) {
-        LOG.error("Invalid token string " + tokenStr);
-        return null;
-      }
-      String key = part.substring(0, separator);
-      String value = part.substring(separator + 1);
-      map.put(key, value);
-    }
-    return map;
-  }
-
-
-  private HttpAuthUtils() {
-    throw new UnsupportedOperationException("Can't initialize class");
-  }
-
-  /**
-   * We'll create an instance of this class within a doAs block so that the client's TGT credentials
-   * can be read from the Subject
-   */
-  public static class HttpKerberosClientAction implements PrivilegedExceptionAction<String> {
-    public static final String HTTP_RESPONSE = "HTTP_RESPONSE";
-    public static final String SERVER_HTTP_URL = "SERVER_HTTP_URL";
-    private final String serverPrincipal;
-    private final String serverHttpUrl;
-    private final Base64 base64codec;
-    private final HttpContext httpContext;
-
-    public HttpKerberosClientAction(String serverPrincipal, String serverHttpUrl) {
-      this.serverPrincipal = serverPrincipal;
-      this.serverHttpUrl = serverHttpUrl;
-      base64codec = new Base64(0);
-      httpContext = new BasicHttpContext();
-      httpContext.setAttribute(SERVER_HTTP_URL, serverHttpUrl);
-    }
-
-    @Override
-    public String run() throws Exception {
-      // This Oid for Kerberos GSS-API mechanism.
-      Oid mechOid = new Oid("1.2.840.113554.1.2.2");
-      // Oid for kerberos principal name
-      Oid krb5PrincipalOid = new Oid("1.2.840.113554.1.2.2.1");
-      GSSManager manager = GSSManager.getInstance();
-      // GSS name for server
-      GSSName serverName = manager.createName(serverPrincipal, krb5PrincipalOid);
-      // Create a GSSContext for authentication with the service.
-      // We're passing client credentials as null since we want them to be read from the Subject.
-      GSSContext gssContext =
-          manager.createContext(serverName, mechOid, null, GSSContext.DEFAULT_LIFETIME);
-      gssContext.requestMutualAuth(false);
-      // Establish context
-      byte[] inToken = new byte[0];
-      byte[] outToken = gssContext.initSecContext(inToken, 0, inToken.length);
-      gssContext.dispose();
-      // Base64 encoded and stringified token for server
-      return new String(base64codec.encode(outToken));
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java
deleted file mode 100644
index 52eb752f1e026..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java
+++ /dev/null
@@ -1,111 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hive.service.auth;
-
-import java.io.IOException;
-import java.util.Map;
-import javax.security.sasl.SaslException;
-
-import org.apache.hadoop.hive.shims.ShimLoader;
-import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge;
-import org.apache.hadoop.hive.thrift.HadoopThriftAuthBridge.Server;
-import org.apache.hive.service.cli.thrift.TCLIService;
-import org.apache.hive.service.cli.thrift.TCLIService.Iface;
-import org.apache.hive.service.cli.thrift.ThriftCLIService;
-import org.apache.thrift.TProcessor;
-import org.apache.thrift.TProcessorFactory;
-import org.apache.thrift.transport.TSaslClientTransport;
-import org.apache.thrift.transport.TTransport;
-
-public final class KerberosSaslHelper {
-
-  public static TProcessorFactory getKerberosProcessorFactory(Server saslServer,
-    ThriftCLIService service) {
-    return new CLIServiceProcessorFactory(saslServer, service);
-  }
-
-  public static TTransport getKerberosTransport(String principal, String host,
-    TTransport underlyingTransport, Map<String, String> saslProps, boolean assumeSubject)
-    throws SaslException {
-    try {
-      String[] names = principal.split("[/@]");
-      if (names.length != 3) {
-        throw new IllegalArgumentException("Kerberos principal should have 3 parts: " + principal);
-      }
-
-      if (assumeSubject) {
-        return createSubjectAssumedTransport(principal, underlyingTransport, saslProps);
-      } else {
-        HadoopThriftAuthBridge.Client authBridge =
-          ShimLoader.getHadoopThriftAuthBridge().createClientWithConf("kerberos");
-        return authBridge.createClientTransport(principal, host, "KERBEROS", null,
-                                                underlyingTransport, saslProps);
-      }
-    } catch (IOException e) {
-      throw new SaslException("Failed to open client transport", e);
-    }
-  }
-
-  public static TTransport createSubjectAssumedTransport(String principal,
-    TTransport underlyingTransport, Map<String, String> saslProps) throws IOException {
-    String[] names = principal.split("[/@]");
-    try {
-      TTransport saslTransport =
-        new TSaslClientTransport("GSSAPI", null, names[0], names[1], saslProps, null,
-          underlyingTransport);
-      return new TSubjectAssumingTransport(saslTransport);
-    } catch (SaslException se) {
-      throw new IOException("Could not instantiate SASL transport", se);
-    }
-  }
-
-  public static TTransport getTokenTransport(String tokenStr, String host,
-    TTransport underlyingTransport, Map<String, String> saslProps) throws SaslException {
-    HadoopThriftAuthBridge.Client authBridge =
-      ShimLoader.getHadoopThriftAuthBridge().createClientWithConf("kerberos");
-
-    try {
-      return authBridge.createClientTransport(null, host, "DIGEST", tokenStr, underlyingTransport,
-                                              saslProps);
-    } catch (IOException e) {
-      throw new SaslException("Failed to open client transport", e);
-    }
-  }
-
-  private KerberosSaslHelper() {
-    throw new UnsupportedOperationException("Can't initialize class");
-  }
-
-  private static class CLIServiceProcessorFactory extends TProcessorFactory {
-
-    private final ThriftCLIService service;
-    private final Server saslServer;
-
-    CLIServiceProcessorFactory(Server saslServer, ThriftCLIService service) {
-      super(null);
-      this.service = service;
-      this.saslServer = saslServer;
-    }
-
-    @Override
-    public TProcessor getProcessor(TTransport trans) {
-      TProcessor sqlProcessor = new TCLIService.Processor<Iface>(service);
-      return saslServer.wrapNonAssumingProcessor(sqlProcessor);
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java
deleted file mode 100644
index afc144199f1e8..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java
+++ /dev/null
@@ -1,154 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hive.service.auth;
-
-import java.io.IOException;
-import java.security.Security;
-import java.util.HashMap;
-import javax.security.auth.callback.Callback;
-import javax.security.auth.callback.CallbackHandler;
-import javax.security.auth.callback.NameCallback;
-import javax.security.auth.callback.PasswordCallback;
-import javax.security.auth.callback.UnsupportedCallbackException;
-import javax.security.auth.login.LoginException;
-import javax.security.sasl.AuthenticationException;
-import javax.security.sasl.AuthorizeCallback;
-import javax.security.sasl.SaslException;
-
-import org.apache.hive.service.auth.AuthenticationProviderFactory.AuthMethods;
-import org.apache.hive.service.auth.PlainSaslServer.SaslPlainProvider;
-import org.apache.hive.service.cli.thrift.TCLIService.Iface;
-import org.apache.hive.service.cli.thrift.ThriftCLIService;
-import org.apache.thrift.TProcessor;
-import org.apache.thrift.TProcessorFactory;
-import org.apache.thrift.transport.TSaslClientTransport;
-import org.apache.thrift.transport.TSaslServerTransport;
-import org.apache.thrift.transport.TTransport;
-import org.apache.thrift.transport.TTransportFactory;
-
-public final class PlainSaslHelper {
-
-  public static TProcessorFactory getPlainProcessorFactory(ThriftCLIService service) {
-    return new SQLPlainProcessorFactory(service);
-  }
-
-  // Register Plain SASL server provider
-  static {
-    Security.addProvider(new SaslPlainProvider());
-  }
-
-  public static TTransportFactory getPlainTransportFactory(String authTypeStr)
-    throws LoginException {
-    TSaslServerTransport.Factory saslFactory = new TSaslServerTransport.Factory();
-    try {
-      saslFactory.addServerDefinition("PLAIN", authTypeStr, null, new HashMap<String, String>(),
-        new PlainServerCallbackHandler(authTypeStr));
-    } catch (AuthenticationException e) {
-      throw new LoginException("Error setting callback handler" + e);
-    }
-    return saslFactory;
-  }
-
-  public static TTransport getPlainTransport(String username, String password,
-    TTransport underlyingTransport) throws SaslException {
-    return new TSaslClientTransport("PLAIN", null, null, null, new HashMap<String, String>(),
-      new PlainCallbackHandler(username, password), underlyingTransport);
-  }
-
-  private PlainSaslHelper() {
-    throw new UnsupportedOperationException("Can't initialize class");
-  }
-
-  private static final class PlainServerCallbackHandler implements CallbackHandler {
-
-    private final AuthMethods authMethod;
-
-    PlainServerCallbackHandler(String authMethodStr) throws AuthenticationException {
-      authMethod = AuthMethods.getValidAuthMethod(authMethodStr);
-    }
-
-    @Override
-    public void handle(Callback[] callbacks) throws IOException, UnsupportedCallbackException {
-      String username = null;
-      String password = null;
-      AuthorizeCallback ac = null;
-
-      for (Callback callback : callbacks) {
-        if (callback instanceof NameCallback) {
-          NameCallback nc = (NameCallback) callback;
-          username = nc.getName();
-        } else if (callback instanceof PasswordCallback) {
-          PasswordCallback pc = (PasswordCallback) callback;
-          password = new String(pc.getPassword());
-        } else if (callback instanceof AuthorizeCallback) {
-          ac = (AuthorizeCallback) callback;
-        } else {
-          throw new UnsupportedCallbackException(callback);
-        }
-      }
-      PasswdAuthenticationProvider provider =
-        AuthenticationProviderFactory.getAuthenticationProvider(authMethod);
-      provider.Authenticate(username, password);
-      if (ac != null) {
-        ac.setAuthorized(true);
-      }
-    }
-  }
-
-  public static class PlainCallbackHandler implements CallbackHandler {
-
-    private final String username;
-    private final String password;
-
-    public PlainCallbackHandler(String username, String password) {
-      this.username = username;
-      this.password = password;
-    }
-
-    @Override
-    public void handle(Callback[] callbacks) throws IOException, UnsupportedCallbackException {
-      for (Callback callback : callbacks) {
-        if (callback instanceof NameCallback) {
-          NameCallback nameCallback = (NameCallback) callback;
-          nameCallback.setName(username);
-        } else if (callback instanceof PasswordCallback) {
-          PasswordCallback passCallback = (PasswordCallback) callback;
-          passCallback.setPassword(password.toCharArray());
-        } else {
-          throw new UnsupportedCallbackException(callback);
-        }
-      }
-    }
-  }
-
-  private static final class SQLPlainProcessorFactory extends TProcessorFactory {
-
-    private final ThriftCLIService service;
-
-    SQLPlainProcessorFactory(ThriftCLIService service) {
-      super(null);
-      this.service = service;
-    }
-
-    @Override
-    public TProcessor getProcessor(TTransport trans) {
-      return new TSetIpAddressProcessor<Iface>(service);
-    }
-  }
-
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java
deleted file mode 100644
index 9a61ad49942c8..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java
+++ /dev/null
@@ -1,114 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.auth;
-
-import org.apache.hive.service.cli.thrift.TCLIService;
-import org.apache.hive.service.cli.thrift.TCLIService.Iface;
-import org.apache.thrift.TException;
-import org.apache.thrift.protocol.TProtocol;
-import org.apache.thrift.transport.TSaslClientTransport;
-import org.apache.thrift.transport.TSaslServerTransport;
-import org.apache.thrift.transport.TSocket;
-import org.apache.thrift.transport.TTransport;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * This class is responsible for setting the ipAddress for operations executed via HiveServer2.
- *
- * - IP address is only set for operations that calls listeners with hookContext
- * - IP address is only set if the underlying transport mechanism is socket
- *
- * @see org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext
- */
-public class TSetIpAddressProcessor<I extends Iface> extends TCLIService.Processor<Iface> {
-
-  private static final Logger LOGGER = LoggerFactory.getLogger(TSetIpAddressProcessor.class.getName());
-
-  public TSetIpAddressProcessor(Iface iface) {
-    super(iface);
-  }
-
-  @Override
-  public boolean process(final TProtocol in, final TProtocol out) throws TException {
-    setIpAddress(in);
-    setUserName(in);
-    try {
-      return super.process(in, out);
-    } finally {
-      THREAD_LOCAL_USER_NAME.remove();
-      THREAD_LOCAL_IP_ADDRESS.remove();
-    }
-  }
-
-  private void setUserName(final TProtocol in) {
-    TTransport transport = in.getTransport();
-    if (transport instanceof TSaslServerTransport) {
-      String userName = ((TSaslServerTransport) transport).getSaslServer().getAuthorizationID();
-      THREAD_LOCAL_USER_NAME.set(userName);
-    }
-  }
-
-  protected void setIpAddress(final TProtocol in) {
-    TTransport transport = in.getTransport();
-    TSocket tSocket = getUnderlyingSocketFromTransport(transport);
-    if (tSocket == null) {
-      LOGGER.warn("Unknown Transport, cannot determine ipAddress");
-    } else {
-      THREAD_LOCAL_IP_ADDRESS.set(tSocket.getSocket().getInetAddress().getHostAddress());
-    }
-  }
-
-  private TSocket getUnderlyingSocketFromTransport(TTransport transport) {
-    while (transport != null) {
-      if (transport instanceof TSaslServerTransport) {
-        transport = ((TSaslServerTransport) transport).getUnderlyingTransport();
-      }
-      if (transport instanceof TSaslClientTransport) {
-        transport = ((TSaslClientTransport) transport).getUnderlyingTransport();
-      }
-      if (transport instanceof TSocket) {
-        return (TSocket) transport;
-      }
-    }
-    return null;
-  }
-
-  private static final ThreadLocal<String> THREAD_LOCAL_IP_ADDRESS = new ThreadLocal<String>() {
-    @Override
-    protected synchronized String initialValue() {
-      return null;
-    }
-  };
-
-  private static final ThreadLocal<String> THREAD_LOCAL_USER_NAME = new ThreadLocal<String>() {
-    @Override
-    protected synchronized String initialValue() {
-      return null;
-    }
-  };
-
-  public static String getUserIpAddress() {
-    return THREAD_LOCAL_IP_ADDRESS.get();
-  }
-
-  public static String getUserName() {
-    return THREAD_LOCAL_USER_NAME.get();
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/CLIService.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/CLIService.java
deleted file mode 100644
index 791ddcbd2c5b6..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/CLIService.java
+++ /dev/null
@@ -1,507 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.CancellationException;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.TimeoutException;
-
-import javax.security.auth.login.LoginException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
-import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
-import org.apache.hadoop.hive.metastore.IMetaStoreClient;
-import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
-import org.apache.hadoop.hive.ql.metadata.Hive;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hadoop.hive.shims.Utils;
-import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hive.service.CompositeService;
-import org.apache.hive.service.ServiceException;
-import org.apache.hive.service.auth.HiveAuthFactory;
-import org.apache.hive.service.cli.operation.Operation;
-import org.apache.hive.service.cli.session.SessionManager;
-import org.apache.hive.service.cli.thrift.TProtocolVersion;
-import org.apache.hive.service.server.HiveServer2;
-
-/**
- * CLIService.
- *
- */
-public class CLIService extends CompositeService implements ICLIService {
-
-  public static final TProtocolVersion SERVER_VERSION;
-
-  static {
-    TProtocolVersion[] protocols = TProtocolVersion.values();
-    SERVER_VERSION = protocols[protocols.length - 1];
-  }
-
-  private final Log LOG = LogFactory.getLog(CLIService.class.getName());
-
-  private HiveConf hiveConf;
-  private SessionManager sessionManager;
-  private UserGroupInformation serviceUGI;
-  private UserGroupInformation httpUGI;
-  // The HiveServer2 instance running this service
-  private final HiveServer2 hiveServer2;
-
-  public CLIService(HiveServer2 hiveServer2) {
-    super(CLIService.class.getSimpleName());
-    this.hiveServer2 = hiveServer2;
-  }
-
-  @Override
-  public synchronized void init(HiveConf hiveConf) {
-    this.hiveConf = hiveConf;
-    sessionManager = new SessionManager(hiveServer2);
-    addService(sessionManager);
-    //  If the hadoop cluster is secure, do a kerberos login for the service from the keytab
-    if (UserGroupInformation.isSecurityEnabled()) {
-      try {
-        HiveAuthFactory.loginFromKeytab(hiveConf);
-        this.serviceUGI = Utils.getUGI();
-      } catch (IOException e) {
-        throw new ServiceException("Unable to login to kerberos with given principal/keytab", e);
-      } catch (LoginException e) {
-        throw new ServiceException("Unable to login to kerberos with given principal/keytab", e);
-      }
-
-      // Also try creating a UGI object for the SPNego principal
-      String principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_PRINCIPAL);
-      String keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_KEYTAB);
-      if (principal.isEmpty() || keyTabFile.isEmpty()) {
-        LOG.info("SPNego httpUGI not created, spNegoPrincipal: " + principal +
-            ", ketabFile: " + keyTabFile);
-      } else {
-        try {
-          this.httpUGI = HiveAuthFactory.loginFromSpnegoKeytabAndReturnUGI(hiveConf);
-          LOG.info("SPNego httpUGI successfully created.");
-        } catch (IOException e) {
-          LOG.warn("SPNego httpUGI creation failed: ", e);
-        }
-      }
-    }
-    // creates connection to HMS and thus *must* occur after kerberos login above
-    try {
-      applyAuthorizationConfigPolicy(hiveConf);
-    } catch (Exception e) {
-      throw new RuntimeException("Error applying authorization policy on hive configuration: "
-          + e.getMessage(), e);
-    }
-    setupBlockedUdfs();
-    super.init(hiveConf);
-  }
-
-  private void applyAuthorizationConfigPolicy(HiveConf newHiveConf) throws HiveException,
-      MetaException {
-    // authorization setup using SessionState should be revisited eventually, as
-    // authorization and authentication are not session specific settings
-    SessionState ss = new SessionState(newHiveConf);
-    ss.setIsHiveServerQuery(true);
-    SessionState.start(ss);
-    ss.applyAuthorizationPolicy();
-  }
-
-  private void setupBlockedUdfs() {
-    FunctionRegistry.setupPermissionsForBuiltinUDFs(
-        hiveConf.getVar(ConfVars.HIVE_SERVER2_BUILTIN_UDF_WHITELIST),
-        hiveConf.getVar(ConfVars.HIVE_SERVER2_BUILTIN_UDF_BLACKLIST));
-  }
-
-  public UserGroupInformation getServiceUGI() {
-    return this.serviceUGI;
-  }
-
-  public UserGroupInformation getHttpUGI() {
-    return this.httpUGI;
-  }
-
-  @Override
-  public synchronized void start() {
-    super.start();
-    // Initialize and test a connection to the metastore
-    IMetaStoreClient metastoreClient = null;
-    try {
-      metastoreClient = new HiveMetaStoreClient(hiveConf);
-      metastoreClient.getDatabases("default");
-    } catch (Exception e) {
-      throw new ServiceException("Unable to connect to MetaStore!", e);
-    }
-    finally {
-      if (metastoreClient != null) {
-        metastoreClient.close();
-      }
-    }
-  }
-
-  @Override
-  public synchronized void stop() {
-    super.stop();
-  }
-
-  /**
-   * @deprecated  Use {@link #openSession(TProtocolVersion, String, String, String, Map)}
-   */
-  @Deprecated
-  public SessionHandle openSession(TProtocolVersion protocol, String username, String password,
-      Map<String, String> configuration) throws HiveSQLException {
-    SessionHandle sessionHandle = sessionManager.openSession(protocol, username, password, null, configuration, false, null);
-    LOG.debug(sessionHandle + ": openSession()");
-    return sessionHandle;
-  }
-
-  /**
-   * @deprecated  Use {@link #openSessionWithImpersonation(TProtocolVersion, String, String, String, Map, String)}
-   */
-  @Deprecated
-  public SessionHandle openSessionWithImpersonation(TProtocolVersion protocol, String username,
-      String password, Map<String, String> configuration, String delegationToken)
-          throws HiveSQLException {
-    SessionHandle sessionHandle = sessionManager.openSession(protocol, username, password, null, configuration,
-        true, delegationToken);
-    LOG.debug(sessionHandle + ": openSessionWithImpersonation()");
-    return sessionHandle;
-  }
-
-  public SessionHandle openSession(TProtocolVersion protocol, String username, String password, String ipAddress,
-      Map<String, String> configuration) throws HiveSQLException {
-    SessionHandle sessionHandle = sessionManager.openSession(protocol, username, password, ipAddress, configuration, false, null);
-    LOG.debug(sessionHandle + ": openSession()");
-    return sessionHandle;
-  }
-
-  public SessionHandle openSessionWithImpersonation(TProtocolVersion protocol, String username,
-      String password, String ipAddress, Map<String, String> configuration, String delegationToken)
-          throws HiveSQLException {
-    SessionHandle sessionHandle = sessionManager.openSession(protocol, username, password, ipAddress, configuration,
-        true, delegationToken);
-    LOG.debug(sessionHandle + ": openSession()");
-    return sessionHandle;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#openSession(java.lang.String, java.lang.String, java.util.Map)
-   */
-  @Override
-  public SessionHandle openSession(String username, String password, Map<String, String> configuration)
-      throws HiveSQLException {
-    SessionHandle sessionHandle = sessionManager.openSession(SERVER_VERSION, username, password, null, configuration, false, null);
-    LOG.debug(sessionHandle + ": openSession()");
-    return sessionHandle;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#openSession(java.lang.String, java.lang.String, java.util.Map)
-   */
-  @Override
-  public SessionHandle openSessionWithImpersonation(String username, String password, Map<String, String> configuration,
-      String delegationToken) throws HiveSQLException {
-    SessionHandle sessionHandle = sessionManager.openSession(SERVER_VERSION, username, password, null, configuration,
-        true, delegationToken);
-    LOG.debug(sessionHandle + ": openSession()");
-    return sessionHandle;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#closeSession(org.apache.hive.service.cli.SessionHandle)
-   */
-  @Override
-  public void closeSession(SessionHandle sessionHandle)
-      throws HiveSQLException {
-    sessionManager.closeSession(sessionHandle);
-    LOG.debug(sessionHandle + ": closeSession()");
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#getInfo(org.apache.hive.service.cli.SessionHandle, java.util.List)
-   */
-  @Override
-  public GetInfoValue getInfo(SessionHandle sessionHandle, GetInfoType getInfoType)
-      throws HiveSQLException {
-    GetInfoValue infoValue = sessionManager.getSession(sessionHandle)
-        .getInfo(getInfoType);
-    LOG.debug(sessionHandle + ": getInfo()");
-    return infoValue;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#executeStatement(org.apache.hive.service.cli.SessionHandle,
-   *  java.lang.String, java.util.Map)
-   */
-  @Override
-  public OperationHandle executeStatement(SessionHandle sessionHandle, String statement,
-      Map<String, String> confOverlay)
-          throws HiveSQLException {
-    OperationHandle opHandle = sessionManager.getSession(sessionHandle)
-        .executeStatement(statement, confOverlay);
-    LOG.debug(sessionHandle + ": executeStatement()");
-    return opHandle;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#executeStatementAsync(org.apache.hive.service.cli.SessionHandle,
-   *  java.lang.String, java.util.Map)
-   */
-  @Override
-  public OperationHandle executeStatementAsync(SessionHandle sessionHandle, String statement,
-      Map<String, String> confOverlay) throws HiveSQLException {
-    OperationHandle opHandle = sessionManager.getSession(sessionHandle)
-        .executeStatementAsync(statement, confOverlay);
-    LOG.debug(sessionHandle + ": executeStatementAsync()");
-    return opHandle;
-  }
-
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#getTypeInfo(org.apache.hive.service.cli.SessionHandle)
-   */
-  @Override
-  public OperationHandle getTypeInfo(SessionHandle sessionHandle)
-      throws HiveSQLException {
-    OperationHandle opHandle = sessionManager.getSession(sessionHandle)
-        .getTypeInfo();
-    LOG.debug(sessionHandle + ": getTypeInfo()");
-    return opHandle;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#getCatalogs(org.apache.hive.service.cli.SessionHandle)
-   */
-  @Override
-  public OperationHandle getCatalogs(SessionHandle sessionHandle)
-      throws HiveSQLException {
-    OperationHandle opHandle = sessionManager.getSession(sessionHandle)
-        .getCatalogs();
-    LOG.debug(sessionHandle + ": getCatalogs()");
-    return opHandle;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#getSchemas(org.apache.hive.service.cli.SessionHandle, java.lang.String, java.lang.String)
-   */
-  @Override
-  public OperationHandle getSchemas(SessionHandle sessionHandle,
-      String catalogName, String schemaName)
-          throws HiveSQLException {
-    OperationHandle opHandle = sessionManager.getSession(sessionHandle)
-        .getSchemas(catalogName, schemaName);
-    LOG.debug(sessionHandle + ": getSchemas()");
-    return opHandle;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#getTables(org.apache.hive.service.cli.SessionHandle, java.lang.String, java.lang.String, java.lang.String, java.util.List)
-   */
-  @Override
-  public OperationHandle getTables(SessionHandle sessionHandle,
-      String catalogName, String schemaName, String tableName, List<String> tableTypes)
-          throws HiveSQLException {
-    OperationHandle opHandle = sessionManager.getSession(sessionHandle)
-        .getTables(catalogName, schemaName, tableName, tableTypes);
-    LOG.debug(sessionHandle + ": getTables()");
-    return opHandle;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#getTableTypes(org.apache.hive.service.cli.SessionHandle)
-   */
-  @Override
-  public OperationHandle getTableTypes(SessionHandle sessionHandle)
-      throws HiveSQLException {
-    OperationHandle opHandle = sessionManager.getSession(sessionHandle)
-        .getTableTypes();
-    LOG.debug(sessionHandle + ": getTableTypes()");
-    return opHandle;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#getColumns(org.apache.hive.service.cli.SessionHandle)
-   */
-  @Override
-  public OperationHandle getColumns(SessionHandle sessionHandle,
-      String catalogName, String schemaName, String tableName, String columnName)
-          throws HiveSQLException {
-    OperationHandle opHandle = sessionManager.getSession(sessionHandle)
-        .getColumns(catalogName, schemaName, tableName, columnName);
-    LOG.debug(sessionHandle + ": getColumns()");
-    return opHandle;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#getFunctions(org.apache.hive.service.cli.SessionHandle)
-   */
-  @Override
-  public OperationHandle getFunctions(SessionHandle sessionHandle,
-      String catalogName, String schemaName, String functionName)
-          throws HiveSQLException {
-    OperationHandle opHandle = sessionManager.getSession(sessionHandle)
-        .getFunctions(catalogName, schemaName, functionName);
-    LOG.debug(sessionHandle + ": getFunctions()");
-    return opHandle;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#getOperationStatus(org.apache.hive.service.cli.OperationHandle)
-   */
-  @Override
-  public OperationStatus getOperationStatus(OperationHandle opHandle)
-      throws HiveSQLException {
-    Operation operation = sessionManager.getOperationManager().getOperation(opHandle);
-    /**
-     * If this is a background operation run asynchronously,
-     * we block for a configured duration, before we return
-     * (duration: HIVE_SERVER2_LONG_POLLING_TIMEOUT).
-     * However, if the background operation is complete, we return immediately.
-     */
-    if (operation.shouldRunAsync()) {
-      HiveConf conf = operation.getParentSession().getHiveConf();
-      long timeout = HiveConf.getTimeVar(conf,
-          HiveConf.ConfVars.HIVE_SERVER2_LONG_POLLING_TIMEOUT, TimeUnit.MILLISECONDS);
-      try {
-        operation.getBackgroundHandle().get(timeout, TimeUnit.MILLISECONDS);
-      } catch (TimeoutException e) {
-        // No Op, return to the caller since long polling timeout has expired
-        LOG.trace(opHandle + ": Long polling timed out");
-      } catch (CancellationException e) {
-        // The background operation thread was cancelled
-        LOG.trace(opHandle + ": The background operation was cancelled", e);
-      } catch (ExecutionException e) {
-        // The background operation thread was aborted
-        LOG.warn(opHandle + ": The background operation was aborted", e);
-      } catch (InterruptedException e) {
-        // No op, this thread was interrupted
-        // In this case, the call might return sooner than long polling timeout
-      }
-    }
-    OperationStatus opStatus = operation.getStatus();
-    LOG.debug(opHandle + ": getOperationStatus()");
-    return opStatus;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#cancelOperation(org.apache.hive.service.cli.OperationHandle)
-   */
-  @Override
-  public void cancelOperation(OperationHandle opHandle)
-      throws HiveSQLException {
-    sessionManager.getOperationManager().getOperation(opHandle)
-    .getParentSession().cancelOperation(opHandle);
-    LOG.debug(opHandle + ": cancelOperation()");
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#closeOperation(org.apache.hive.service.cli.OperationHandle)
-   */
-  @Override
-  public void closeOperation(OperationHandle opHandle)
-      throws HiveSQLException {
-    sessionManager.getOperationManager().getOperation(opHandle)
-    .getParentSession().closeOperation(opHandle);
-    LOG.debug(opHandle + ": closeOperation");
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#getResultSetMetadata(org.apache.hive.service.cli.OperationHandle)
-   */
-  @Override
-  public TableSchema getResultSetMetadata(OperationHandle opHandle)
-      throws HiveSQLException {
-    TableSchema tableSchema = sessionManager.getOperationManager()
-        .getOperation(opHandle).getParentSession().getResultSetMetadata(opHandle);
-    LOG.debug(opHandle + ": getResultSetMetadata()");
-    return tableSchema;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#fetchResults(org.apache.hive.service.cli.OperationHandle)
-   */
-  @Override
-  public RowSet fetchResults(OperationHandle opHandle)
-      throws HiveSQLException {
-    return fetchResults(opHandle, Operation.DEFAULT_FETCH_ORIENTATION,
-        Operation.DEFAULT_FETCH_MAX_ROWS, FetchType.QUERY_OUTPUT);
-  }
-
-  @Override
-  public RowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation,
-                             long maxRows, FetchType fetchType) throws HiveSQLException {
-    RowSet rowSet = sessionManager.getOperationManager().getOperation(opHandle)
-        .getParentSession().fetchResults(opHandle, orientation, maxRows, fetchType);
-    LOG.debug(opHandle + ": fetchResults()");
-    return rowSet;
-  }
-
-  // obtain delegation token for the give user from metastore
-  public synchronized String getDelegationTokenFromMetaStore(String owner)
-      throws HiveSQLException, UnsupportedOperationException, LoginException, IOException {
-    if (!hiveConf.getBoolVar(HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL) ||
-        !hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_ENABLE_DOAS)) {
-      throw new UnsupportedOperationException(
-          "delegation token is can only be obtained for a secure remote metastore");
-    }
-
-    try {
-      Hive.closeCurrent();
-      return Hive.get(hiveConf).getDelegationToken(owner, owner);
-    } catch (HiveException e) {
-      if (e.getCause() instanceof UnsupportedOperationException) {
-        throw (UnsupportedOperationException)e.getCause();
-      } else {
-        throw new HiveSQLException("Error connect metastore to setup impersonation", e);
-      }
-    }
-  }
-
-  @Override
-  public String getDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
-      String owner, String renewer) throws HiveSQLException {
-    String delegationToken = sessionManager.getSession(sessionHandle)
-        .getDelegationToken(authFactory, owner, renewer);
-    LOG.info(sessionHandle  + ": getDelegationToken()");
-    return delegationToken;
-  }
-
-  @Override
-  public void cancelDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
-      String tokenStr) throws HiveSQLException {
-    sessionManager.getSession(sessionHandle).cancelDelegationToken(authFactory, tokenStr);
-    LOG.info(sessionHandle  + ": cancelDelegationToken()");
-  }
-
-  @Override
-  public void renewDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
-      String tokenStr) throws HiveSQLException {
-    sessionManager.getSession(sessionHandle).renewDelegationToken(authFactory, tokenStr);
-    LOG.info(sessionHandle  + ": renewDelegationToken()");
-  }
-
-  public SessionManager getSessionManager() {
-    return sessionManager;
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Column.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Column.java
deleted file mode 100644
index 26d0f718f383a..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Column.java
+++ /dev/null
@@ -1,423 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli;
-
-import java.nio.ByteBuffer;
-import java.util.AbstractList;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.BitSet;
-import java.util.List;
-
-import com.google.common.primitives.Booleans;
-import com.google.common.primitives.Bytes;
-import com.google.common.primitives.Doubles;
-import com.google.common.primitives.Ints;
-import com.google.common.primitives.Longs;
-import com.google.common.primitives.Shorts;
-import org.apache.hive.service.cli.thrift.TBinaryColumn;
-import org.apache.hive.service.cli.thrift.TBoolColumn;
-import org.apache.hive.service.cli.thrift.TByteColumn;
-import org.apache.hive.service.cli.thrift.TColumn;
-import org.apache.hive.service.cli.thrift.TDoubleColumn;
-import org.apache.hive.service.cli.thrift.TI16Column;
-import org.apache.hive.service.cli.thrift.TI32Column;
-import org.apache.hive.service.cli.thrift.TI64Column;
-import org.apache.hive.service.cli.thrift.TStringColumn;
-
-/**
- * Column.
- */
-public class Column extends AbstractList {
-
-  private static final int DEFAULT_SIZE = 100;
-
-  private final Type type;
-
-  private BitSet nulls;
-
-  private int size;
-  private boolean[] boolVars;
-  private byte[] byteVars;
-  private short[] shortVars;
-  private int[] intVars;
-  private long[] longVars;
-  private double[] doubleVars;
-  private List<String> stringVars;
-  private List<ByteBuffer> binaryVars;
-
-  public Column(Type type, BitSet nulls, Object values) {
-    this.type = type;
-    this.nulls = nulls;
-    if (type == Type.BOOLEAN_TYPE) {
-      boolVars = (boolean[]) values;
-      size = boolVars.length;
-    } else if (type == Type.TINYINT_TYPE) {
-      byteVars = (byte[]) values;
-      size = byteVars.length;
-    } else if (type == Type.SMALLINT_TYPE) {
-      shortVars = (short[]) values;
-      size = shortVars.length;
-    } else if (type == Type.INT_TYPE) {
-      intVars = (int[]) values;
-      size = intVars.length;
-    } else if (type == Type.BIGINT_TYPE) {
-      longVars = (long[]) values;
-      size = longVars.length;
-    } else if (type == Type.DOUBLE_TYPE) {
-      doubleVars = (double[]) values;
-      size = doubleVars.length;
-    } else if (type == Type.BINARY_TYPE) {
-      binaryVars = (List<ByteBuffer>) values;
-      size = binaryVars.size();
-    } else if (type == Type.STRING_TYPE) {
-      stringVars = (List<String>) values;
-      size = stringVars.size();
-    } else {
-      throw new IllegalStateException("invalid union object");
-    }
-  }
-
-  public Column(Type type) {
-    nulls = new BitSet();
-    switch (type) {
-      case BOOLEAN_TYPE:
-        boolVars = new boolean[DEFAULT_SIZE];
-        break;
-      case TINYINT_TYPE:
-        byteVars = new byte[DEFAULT_SIZE];
-        break;
-      case SMALLINT_TYPE:
-        shortVars = new short[DEFAULT_SIZE];
-        break;
-      case INT_TYPE:
-        intVars = new int[DEFAULT_SIZE];
-        break;
-      case BIGINT_TYPE:
-        longVars = new long[DEFAULT_SIZE];
-        break;
-      case FLOAT_TYPE:
-      case DOUBLE_TYPE:
-        type = Type.DOUBLE_TYPE;
-        doubleVars = new double[DEFAULT_SIZE];
-        break;
-      case BINARY_TYPE:
-        binaryVars = new ArrayList<ByteBuffer>();
-        break;
-      default:
-        type = Type.STRING_TYPE;
-        stringVars = new ArrayList<String>();
-    }
-    this.type = type;
-  }
-
-  public Column(TColumn colValues) {
-    if (colValues.isSetBoolVal()) {
-      type = Type.BOOLEAN_TYPE;
-      nulls = toBitset(colValues.getBoolVal().getNulls());
-      boolVars = Booleans.toArray(colValues.getBoolVal().getValues());
-      size = boolVars.length;
-    } else if (colValues.isSetByteVal()) {
-      type = Type.TINYINT_TYPE;
-      nulls = toBitset(colValues.getByteVal().getNulls());
-      byteVars = Bytes.toArray(colValues.getByteVal().getValues());
-      size = byteVars.length;
-    } else if (colValues.isSetI16Val()) {
-      type = Type.SMALLINT_TYPE;
-      nulls = toBitset(colValues.getI16Val().getNulls());
-      shortVars = Shorts.toArray(colValues.getI16Val().getValues());
-      size = shortVars.length;
-    } else if (colValues.isSetI32Val()) {
-      type = Type.INT_TYPE;
-      nulls = toBitset(colValues.getI32Val().getNulls());
-      intVars = Ints.toArray(colValues.getI32Val().getValues());
-      size = intVars.length;
-    } else if (colValues.isSetI64Val()) {
-      type = Type.BIGINT_TYPE;
-      nulls = toBitset(colValues.getI64Val().getNulls());
-      longVars = Longs.toArray(colValues.getI64Val().getValues());
-      size = longVars.length;
-    } else if (colValues.isSetDoubleVal()) {
-      type = Type.DOUBLE_TYPE;
-      nulls = toBitset(colValues.getDoubleVal().getNulls());
-      doubleVars = Doubles.toArray(colValues.getDoubleVal().getValues());
-      size = doubleVars.length;
-    } else if (colValues.isSetBinaryVal()) {
-      type = Type.BINARY_TYPE;
-      nulls = toBitset(colValues.getBinaryVal().getNulls());
-      binaryVars = colValues.getBinaryVal().getValues();
-      size = binaryVars.size();
-    } else if (colValues.isSetStringVal()) {
-      type = Type.STRING_TYPE;
-      nulls = toBitset(colValues.getStringVal().getNulls());
-      stringVars = colValues.getStringVal().getValues();
-      size = stringVars.size();
-    } else {
-      throw new IllegalStateException("invalid union object");
-    }
-  }
-
-  public Column extractSubset(int start, int end) {
-    BitSet subNulls = nulls.get(start, end);
-    if (type == Type.BOOLEAN_TYPE) {
-      Column subset = new Column(type, subNulls, Arrays.copyOfRange(boolVars, start, end));
-      boolVars = Arrays.copyOfRange(boolVars, end, size);
-      nulls = nulls.get(start, size);
-      size = boolVars.length;
-      return subset;
-    }
-    if (type == Type.TINYINT_TYPE) {
-      Column subset = new Column(type, subNulls, Arrays.copyOfRange(byteVars, start, end));
-      byteVars = Arrays.copyOfRange(byteVars, end, size);
-      nulls = nulls.get(start, size);
-      size = byteVars.length;
-      return subset;
-    }
-    if (type == Type.SMALLINT_TYPE) {
-      Column subset = new Column(type, subNulls, Arrays.copyOfRange(shortVars, start, end));
-      shortVars = Arrays.copyOfRange(shortVars, end, size);
-      nulls = nulls.get(start, size);
-      size = shortVars.length;
-      return subset;
-    }
-    if (type == Type.INT_TYPE) {
-      Column subset = new Column(type, subNulls, Arrays.copyOfRange(intVars, start, end));
-      intVars = Arrays.copyOfRange(intVars, end, size);
-      nulls = nulls.get(start, size);
-      size = intVars.length;
-      return subset;
-    }
-    if (type == Type.BIGINT_TYPE) {
-      Column subset = new Column(type, subNulls, Arrays.copyOfRange(longVars, start, end));
-      longVars = Arrays.copyOfRange(longVars, end, size);
-      nulls = nulls.get(start, size);
-      size = longVars.length;
-      return subset;
-    }
-    if (type == Type.DOUBLE_TYPE) {
-      Column subset = new Column(type, subNulls, Arrays.copyOfRange(doubleVars, start, end));
-      doubleVars = Arrays.copyOfRange(doubleVars, end, size);
-      nulls = nulls.get(start, size);
-      size = doubleVars.length;
-      return subset;
-    }
-    if (type == Type.BINARY_TYPE) {
-      Column subset = new Column(type, subNulls, binaryVars.subList(start, end));
-      binaryVars = binaryVars.subList(end, binaryVars.size());
-      nulls = nulls.get(start, size);
-      size = binaryVars.size();
-      return subset;
-    }
-    if (type == Type.STRING_TYPE) {
-      Column subset = new Column(type, subNulls, stringVars.subList(start, end));
-      stringVars = stringVars.subList(end, stringVars.size());
-      nulls = nulls.get(start, size);
-      size = stringVars.size();
-      return subset;
-    }
-    throw new IllegalStateException("invalid union object");
-  }
-
-  private static final byte[] MASKS = new byte[] {
-      0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, (byte)0x80
-  };
-
-  private static BitSet toBitset(byte[] nulls) {
-    BitSet bitset = new BitSet();
-    int bits = nulls.length * 8;
-    for (int i = 0; i < bits; i++) {
-      bitset.set(i, (nulls[i / 8] & MASKS[i % 8]) != 0);
-    }
-    return bitset;
-  }
-
-  private static byte[] toBinary(BitSet bitset) {
-    byte[] nulls = new byte[1 + (bitset.length() / 8)];
-    for (int i = 0; i < bitset.length(); i++) {
-      nulls[i / 8] |= bitset.get(i) ? MASKS[i % 8] : 0;
-    }
-    return nulls;
-  }
-
-  public Type getType() {
-    return type;
-  }
-
-  @Override
-  public Object get(int index) {
-    if (nulls.get(index)) {
-      return null;
-    }
-    switch (type) {
-      case BOOLEAN_TYPE:
-        return boolVars[index];
-      case TINYINT_TYPE:
-        return byteVars[index];
-      case SMALLINT_TYPE:
-        return shortVars[index];
-      case INT_TYPE:
-        return intVars[index];
-      case BIGINT_TYPE:
-        return longVars[index];
-      case DOUBLE_TYPE:
-        return doubleVars[index];
-      case STRING_TYPE:
-        return stringVars.get(index);
-      case BINARY_TYPE:
-        return binaryVars.get(index).array();
-    }
-    return null;
-  }
-
-  @Override
-  public int size() {
-    return size;
-  }
-
-  public TColumn toTColumn() {
-    TColumn value = new TColumn();
-    ByteBuffer nullMasks = ByteBuffer.wrap(toBinary(nulls));
-    switch (type) {
-      case BOOLEAN_TYPE:
-        value.setBoolVal(new TBoolColumn(Booleans.asList(Arrays.copyOfRange(boolVars, 0, size)), nullMasks));
-        break;
-      case TINYINT_TYPE:
-        value.setByteVal(new TByteColumn(Bytes.asList(Arrays.copyOfRange(byteVars, 0, size)), nullMasks));
-        break;
-      case SMALLINT_TYPE:
-        value.setI16Val(new TI16Column(Shorts.asList(Arrays.copyOfRange(shortVars, 0, size)), nullMasks));
-        break;
-      case INT_TYPE:
-        value.setI32Val(new TI32Column(Ints.asList(Arrays.copyOfRange(intVars, 0, size)), nullMasks));
-        break;
-      case BIGINT_TYPE:
-        value.setI64Val(new TI64Column(Longs.asList(Arrays.copyOfRange(longVars, 0, size)), nullMasks));
-        break;
-      case DOUBLE_TYPE:
-        value.setDoubleVal(new TDoubleColumn(Doubles.asList(Arrays.copyOfRange(doubleVars, 0, size)), nullMasks));
-        break;
-      case STRING_TYPE:
-        value.setStringVal(new TStringColumn(stringVars, nullMasks));
-        break;
-      case BINARY_TYPE:
-        value.setBinaryVal(new TBinaryColumn(binaryVars, nullMasks));
-        break;
-    }
-    return value;
-  }
-
-  private static final ByteBuffer EMPTY_BINARY = ByteBuffer.allocate(0);
-  private static final String EMPTY_STRING = "";
-
-  public void addValue(Type type, Object field) {
-    switch (type) {
-      case BOOLEAN_TYPE:
-        nulls.set(size, field == null);
-        boolVars()[size] = field == null ? true : (Boolean)field;
-        break;
-      case TINYINT_TYPE:
-        nulls.set(size, field == null);
-        byteVars()[size] = field == null ? 0 : (Byte) field;
-        break;
-      case SMALLINT_TYPE:
-        nulls.set(size, field == null);
-        shortVars()[size] = field == null ? 0 : (Short)field;
-        break;
-      case INT_TYPE:
-        nulls.set(size, field == null);
-        intVars()[size] = field == null ? 0 : (Integer)field;
-        break;
-      case BIGINT_TYPE:
-        nulls.set(size, field == null);
-        longVars()[size] = field == null ? 0 : (Long)field;
-        break;
-      case FLOAT_TYPE:
-        nulls.set(size, field == null);
-        doubleVars()[size] = field == null ? 0 : Double.valueOf(field.toString());
-        break;
-      case DOUBLE_TYPE:
-        nulls.set(size, field == null);
-        doubleVars()[size] = field == null ? 0 : (Double)field;
-        break;
-      case BINARY_TYPE:
-        nulls.set(binaryVars.size(), field == null);
-        binaryVars.add(field == null ? EMPTY_BINARY : ByteBuffer.wrap((byte[])field));
-        break;
-      default:
-        nulls.set(stringVars.size(), field == null);
-        stringVars.add(field == null ? EMPTY_STRING : String.valueOf(field));
-        break;
-    }
-    size++;
-  }
-
-  private boolean[] boolVars() {
-    if (boolVars.length == size) {
-      boolean[] newVars = new boolean[size << 1];
-      System.arraycopy(boolVars, 0, newVars, 0, size);
-      return boolVars = newVars;
-    }
-    return boolVars;
-  }
-
-  private byte[] byteVars() {
-    if (byteVars.length == size) {
-      byte[] newVars = new byte[size << 1];
-      System.arraycopy(byteVars, 0, newVars, 0, size);
-      return byteVars = newVars;
-    }
-    return byteVars;
-  }
-
-  private short[] shortVars() {
-    if (shortVars.length == size) {
-      short[] newVars = new short[size << 1];
-      System.arraycopy(shortVars, 0, newVars, 0, size);
-      return shortVars = newVars;
-    }
-    return shortVars;
-  }
-
-  private int[] intVars() {
-    if (intVars.length == size) {
-      int[] newVars = new int[size << 1];
-      System.arraycopy(intVars, 0, newVars, 0, size);
-      return intVars = newVars;
-    }
-    return intVars;
-  }
-
-  private long[] longVars() {
-    if (longVars.length == size) {
-      long[] newVars = new long[size << 1];
-      System.arraycopy(longVars, 0, newVars, 0, size);
-      return longVars = newVars;
-    }
-    return longVars;
-  }
-
-  private double[] doubleVars() {
-    if (doubleVars.length == size) {
-      double[] newVars = new double[size << 1];
-      System.arraycopy(doubleVars, 0, newVars, 0, size);
-      return doubleVars = newVars;
-    }
-    return doubleVars;
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java
deleted file mode 100644
index 47a582e2223e4..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java
+++ /dev/null
@@ -1,149 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli;
-
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.hive.service.cli.thrift.TColumn;
-import org.apache.hive.service.cli.thrift.TRow;
-import org.apache.hive.service.cli.thrift.TRowSet;
-
-/**
- * ColumnBasedSet.
- */
-public class ColumnBasedSet implements RowSet {
-
-  private long startOffset;
-
-  private final Type[] types; // non-null only for writing (server-side)
-  private final List<Column> columns;
-
-  public ColumnBasedSet(TableSchema schema) {
-    types = schema.toTypes();
-    columns = new ArrayList<Column>();
-    for (ColumnDescriptor colDesc : schema.getColumnDescriptors()) {
-      columns.add(new Column(colDesc.getType()));
-    }
-  }
-
-  public ColumnBasedSet(TRowSet tRowSet) {
-    types = null;
-    columns = new ArrayList<Column>();
-    for (TColumn tvalue : tRowSet.getColumns()) {
-      columns.add(new Column(tvalue));
-    }
-    startOffset = tRowSet.getStartRowOffset();
-  }
-
-  private ColumnBasedSet(Type[] types, List<Column> columns, long startOffset) {
-    this.types = types;
-    this.columns = columns;
-    this.startOffset = startOffset;
-  }
-
-  @Override
-  public ColumnBasedSet addRow(Object[] fields) {
-    for (int i = 0; i < fields.length; i++) {
-      columns.get(i).addValue(types[i], fields[i]);
-    }
-    return this;
-  }
-
-  public List<Column> getColumns() {
-    return columns;
-  }
-
-  @Override
-  public int numColumns() {
-    return columns.size();
-  }
-
-  @Override
-  public int numRows() {
-    return columns.isEmpty() ? 0 : columns.get(0).size();
-  }
-
-  @Override
-  public ColumnBasedSet extractSubset(int maxRows) {
-    int numRows = Math.min(numRows(), maxRows);
-
-    List<Column> subset = new ArrayList<Column>();
-    for (int i = 0; i < columns.size(); i++) {
-      subset.add(columns.get(i).extractSubset(0, numRows));
-    }
-    ColumnBasedSet result = new ColumnBasedSet(types, subset, startOffset);
-    startOffset += numRows;
-    return result;
-  }
-
-  @Override
-  public long getStartOffset() {
-    return startOffset;
-  }
-
-  @Override
-  public void setStartOffset(long startOffset) {
-    this.startOffset = startOffset;
-  }
-
-  public TRowSet toTRowSet() {
-    TRowSet tRowSet = new TRowSet(startOffset, new ArrayList<TRow>());
-    for (int i = 0; i < columns.size(); i++) {
-      tRowSet.addToColumns(columns.get(i).toTColumn());
-    }
-    return tRowSet;
-  }
-
-  @Override
-  public Iterator<Object[]> iterator() {
-    return new Iterator<Object[]>() {
-
-      private int index;
-      private final Object[] convey = new Object[numColumns()];
-
-      @Override
-      public boolean hasNext() {
-        return index < numRows();
-      }
-
-      @Override
-      public Object[] next() {
-        for (int i = 0; i < columns.size(); i++) {
-          convey[i] = columns.get(i).get(index);
-        }
-        index++;
-        return convey;
-      }
-
-      @Override
-      public void remove() {
-        throw new UnsupportedOperationException("remove");
-      }
-    };
-  }
-
-  public Object[] fill(int index, Object[] convey) {
-    for (int i = 0; i < columns.size(); i++) {
-      convey[i] = columns.get(i).get(index);
-    }
-    return convey;
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java
deleted file mode 100644
index f0bbf14693160..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java
+++ /dev/null
@@ -1,99 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli;
-
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hive.service.cli.thrift.TColumnDesc;
-
-
-/**
- * ColumnDescriptor.
- *
- */
-public class ColumnDescriptor {
-  private final String name;
-  private final String comment;
-  private final TypeDescriptor type;
-  // ordinal position of this column in the schema
-  private final int position;
-
-  public ColumnDescriptor(String name, String comment, TypeDescriptor type, int position) {
-    this.name = name;
-    this.comment = comment;
-    this.type = type;
-    this.position = position;
-  }
-
-  public ColumnDescriptor(TColumnDesc tColumnDesc) {
-    name = tColumnDesc.getColumnName();
-    comment = tColumnDesc.getComment();
-    type = new TypeDescriptor(tColumnDesc.getTypeDesc());
-    position = tColumnDesc.getPosition();
-  }
-
-  public ColumnDescriptor(FieldSchema column, int position) {
-    name = column.getName();
-    comment = column.getComment();
-    type = new TypeDescriptor(column.getType());
-    this.position = position;
-  }
-
-  public static ColumnDescriptor newPrimitiveColumnDescriptor(String name, String comment, Type type, int position) {
-    // Current usage looks like it's only for metadata columns, but if that changes then
-    // this method may need to require a type qualifiers aruments.
-    return new ColumnDescriptor(name, comment, new TypeDescriptor(type), position);
-  }
-
-  public String getName() {
-    return name;
-  }
-
-  public String getComment() {
-    return comment;
-  }
-
-  public TypeDescriptor getTypeDescriptor() {
-    return type;
-  }
-
-  public int getOrdinalPosition() {
-    return position;
-  }
-
-  public TColumnDesc toTColumnDesc() {
-    TColumnDesc tColumnDesc = new TColumnDesc();
-    tColumnDesc.setColumnName(name);
-    tColumnDesc.setComment(comment);
-    tColumnDesc.setTypeDesc(type.toTTypeDesc());
-    tColumnDesc.setPosition(position);
-    return tColumnDesc;
-  }
-
-  public Type getType() {
-    return type.getType();
-  }
-
-  public boolean isPrimitive() {
-    return type.getType().isPrimitiveType();
-  }
-
-  public String getTypeName() {
-    return type.getTypeName();
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnValue.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnValue.java
deleted file mode 100644
index 462b93a0f09fe..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ColumnValue.java
+++ /dev/null
@@ -1,288 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli;
-
-import java.math.BigDecimal;
-import java.sql.Date;
-import java.sql.Timestamp;
-
-import org.apache.hadoop.hive.common.type.HiveChar;
-import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
-import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
-import org.apache.hadoop.hive.common.type.HiveVarchar;
-import org.apache.hive.service.cli.thrift.TBoolValue;
-import org.apache.hive.service.cli.thrift.TByteValue;
-import org.apache.hive.service.cli.thrift.TColumnValue;
-import org.apache.hive.service.cli.thrift.TDoubleValue;
-import org.apache.hive.service.cli.thrift.TI16Value;
-import org.apache.hive.service.cli.thrift.TI32Value;
-import org.apache.hive.service.cli.thrift.TI64Value;
-import org.apache.hive.service.cli.thrift.TStringValue;
-
-import org.apache.spark.unsafe.types.UTF8String;
-
-/**
- * Protocols before HIVE_CLI_SERVICE_PROTOCOL_V6 (used by RowBasedSet)
- *
- */
-public class ColumnValue {
-
-  private static TColumnValue booleanValue(Boolean value) {
-    TBoolValue tBoolValue = new TBoolValue();
-    if (value != null) {
-      tBoolValue.setValue(value);
-    }
-    return TColumnValue.boolVal(tBoolValue);
-  }
-
-  private static TColumnValue byteValue(Byte value) {
-    TByteValue tByteValue = new TByteValue();
-    if (value != null) {
-      tByteValue.setValue(value);
-    }
-    return TColumnValue.byteVal(tByteValue);
-  }
-
-  private static TColumnValue shortValue(Short value) {
-    TI16Value tI16Value = new TI16Value();
-    if (value != null) {
-      tI16Value.setValue(value);
-    }
-    return TColumnValue.i16Val(tI16Value);
-  }
-
-  private static TColumnValue intValue(Integer value) {
-    TI32Value tI32Value = new TI32Value();
-    if (value != null) {
-      tI32Value.setValue(value);
-    }
-    return TColumnValue.i32Val(tI32Value);
-  }
-
-  private static TColumnValue longValue(Long value) {
-    TI64Value tI64Value = new TI64Value();
-    if (value != null) {
-      tI64Value.setValue(value);
-    }
-    return TColumnValue.i64Val(tI64Value);
-  }
-
-  private static TColumnValue floatValue(Float value) {
-    TDoubleValue tDoubleValue = new TDoubleValue();
-    if (value != null) {
-      tDoubleValue.setValue(value);
-    }
-    return TColumnValue.doubleVal(tDoubleValue);
-  }
-
-  private static TColumnValue doubleValue(Double value) {
-    TDoubleValue tDoubleValue = new TDoubleValue();
-    if (value != null) {
-      tDoubleValue.setValue(value);
-    }
-    return TColumnValue.doubleVal(tDoubleValue);
-  }
-
-  private static TColumnValue stringValue(String value) {
-    TStringValue tStringValue = new TStringValue();
-    if (value != null) {
-      tStringValue.setValue(value);
-    }
-    return TColumnValue.stringVal(tStringValue);
-  }
-
-  private static TColumnValue stringValue(HiveChar value) {
-    TStringValue tStringValue = new TStringValue();
-    if (value != null) {
-      tStringValue.setValue(value.toString());
-    }
-    return TColumnValue.stringVal(tStringValue);
-  }
-
-  private static TColumnValue stringValue(HiveVarchar value) {
-    TStringValue tStringValue = new TStringValue();
-    if (value != null) {
-      tStringValue.setValue(value.toString());
-    }
-    return TColumnValue.stringVal(tStringValue);
-  }
-
-  private static TColumnValue stringValue(HiveIntervalYearMonth value) {
-    TStringValue tStrValue = new TStringValue();
-    if (value != null) {
-      tStrValue.setValue(value.toString());
-    }
-    return TColumnValue.stringVal(tStrValue);
-  }
-
-  private static TColumnValue stringValue(HiveIntervalDayTime value) {
-    TStringValue tStrValue = new TStringValue();
-    if (value != null) {
-      tStrValue.setValue(value.toString());
-    }
-    return TColumnValue.stringVal(tStrValue);
-  }
-
-  public static TColumnValue toTColumnValue(Type type, Object value) {
-    switch (type) {
-    case BOOLEAN_TYPE:
-      return booleanValue((Boolean)value);
-    case TINYINT_TYPE:
-      return byteValue((Byte)value);
-    case SMALLINT_TYPE:
-      return shortValue((Short)value);
-    case INT_TYPE:
-      return intValue((Integer)value);
-    case BIGINT_TYPE:
-      return longValue((Long)value);
-    case FLOAT_TYPE:
-      return floatValue((Float)value);
-    case DOUBLE_TYPE:
-      return doubleValue((Double)value);
-    case STRING_TYPE:
-      return stringValue((String)value);
-    case CHAR_TYPE:
-      return stringValue((HiveChar)value);
-    case VARCHAR_TYPE:
-      return stringValue((HiveVarchar)value);
-    case DATE_TYPE:
-    case TIMESTAMP_TYPE:
-      // SPARK-31859, SPARK-31861: converted to string already in SparkExecuteStatementOperation
-      return stringValue((String)value);
-    case INTERVAL_YEAR_MONTH_TYPE:
-      return stringValue((HiveIntervalYearMonth) value);
-    case INTERVAL_DAY_TIME_TYPE:
-      return stringValue((HiveIntervalDayTime) value);
-    case DECIMAL_TYPE:
-      String plainStr = value == null ? null : ((BigDecimal)value).toPlainString();
-      return stringValue(plainStr);
-    case BINARY_TYPE:
-      String strVal = value == null ? null : UTF8String.fromBytes((byte[])value).toString();
-      return stringValue(strVal);
-    case ARRAY_TYPE:
-    case MAP_TYPE:
-    case STRUCT_TYPE:
-    case UNION_TYPE:
-    case USER_DEFINED_TYPE:
-      return stringValue((String)value);
-    case NULL_TYPE:
-      return stringValue((String)value);
-    default:
-      return null;
-    }
-  }
-
-  private static Boolean getBooleanValue(TBoolValue tBoolValue) {
-    if (tBoolValue.isSetValue()) {
-      return tBoolValue.isValue();
-    }
-    return null;
-  }
-
-  private static Byte getByteValue(TByteValue tByteValue) {
-    if (tByteValue.isSetValue()) {
-      return tByteValue.getValue();
-    }
-    return null;
-  }
-
-  private static Short getShortValue(TI16Value tI16Value) {
-    if (tI16Value.isSetValue()) {
-      return tI16Value.getValue();
-    }
-    return null;
-  }
-
-  private static Integer getIntegerValue(TI32Value tI32Value) {
-    if (tI32Value.isSetValue()) {
-      return tI32Value.getValue();
-    }
-    return null;
-  }
-
-  private static Long getLongValue(TI64Value tI64Value) {
-    if (tI64Value.isSetValue()) {
-      return tI64Value.getValue();
-    }
-    return null;
-  }
-
-  private static Double getDoubleValue(TDoubleValue tDoubleValue) {
-    if (tDoubleValue.isSetValue()) {
-      return tDoubleValue.getValue();
-    }
-    return null;
-  }
-
-  private static String getStringValue(TStringValue tStringValue) {
-    if (tStringValue.isSetValue()) {
-      return tStringValue.getValue();
-    }
-    return null;
-  }
-
-  private static Date getDateValue(TStringValue tStringValue) {
-    if (tStringValue.isSetValue()) {
-      return Date.valueOf(tStringValue.getValue());
-    }
-    return null;
-  }
-
-  private static Timestamp getTimestampValue(TStringValue tStringValue) {
-    if (tStringValue.isSetValue()) {
-      return Timestamp.valueOf(tStringValue.getValue());
-    }
-    return null;
-  }
-
-  private static byte[] getBinaryValue(TStringValue tString) {
-    if (tString.isSetValue()) {
-      return tString.getValue().getBytes();
-    }
-    return null;
-  }
-
-  private static BigDecimal getBigDecimalValue(TStringValue tStringValue) {
-    if (tStringValue.isSetValue()) {
-      return new BigDecimal(tStringValue.getValue());
-    }
-    return null;
-  }
-
-  public static Object toColumnValue(TColumnValue value) {
-    TColumnValue._Fields field = value.getSetField();
-    switch (field) {
-      case BOOL_VAL:
-        return getBooleanValue(value.getBoolVal());
-      case BYTE_VAL:
-        return getByteValue(value.getByteVal());
-      case I16_VAL:
-        return getShortValue(value.getI16Val());
-      case I32_VAL:
-        return getIntegerValue(value.getI32Val());
-      case I64_VAL:
-        return getLongValue(value.getI64Val());
-      case DOUBLE_VAL:
-        return getDoubleValue(value.getDoubleVal());
-      case STRING_VAL:
-        return getStringValue(value.getStringVal());
-    }
-    throw new IllegalArgumentException("never");
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/EmbeddedCLIServiceClient.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/EmbeddedCLIServiceClient.java
deleted file mode 100644
index 9cad5be198c06..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/EmbeddedCLIServiceClient.java
+++ /dev/null
@@ -1,208 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli;
-
-import java.util.List;
-import java.util.Map;
-
-import org.apache.hive.service.auth.HiveAuthFactory;
-
-
-/**
- * EmbeddedCLIServiceClient.
- *
- */
-public class EmbeddedCLIServiceClient extends CLIServiceClient {
-  private final ICLIService cliService;
-
-  public EmbeddedCLIServiceClient(ICLIService cliService) {
-    this.cliService = cliService;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.CLIServiceClient#openSession(java.lang.String, java.lang.String, java.util.Map)
-   */
-  @Override
-  public SessionHandle openSession(String username, String password,
-      Map<String, String> configuration) throws HiveSQLException {
-    return cliService.openSession(username, password, configuration);
-  }
-
-  @Override
-  public SessionHandle openSessionWithImpersonation(String username, String password,
-      Map<String, String> configuration, String delegationToken) throws HiveSQLException {
-    throw new HiveSQLException("Impersonated session is not supported in the embedded mode");
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.CLIServiceClient#closeSession(org.apache.hive.service.cli.SessionHandle)
-   */
-  @Override
-  public void closeSession(SessionHandle sessionHandle) throws HiveSQLException {
-    cliService.closeSession(sessionHandle);
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.CLIServiceClient#getInfo(org.apache.hive.service.cli.SessionHandle, java.util.List)
-   */
-  @Override
-  public GetInfoValue getInfo(SessionHandle sessionHandle, GetInfoType getInfoType)
-      throws HiveSQLException {
-    return cliService.getInfo(sessionHandle, getInfoType);
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.CLIServiceClient#executeStatement(org.apache.hive.service.cli.SessionHandle,
-   *  java.lang.String, java.util.Map)
-   */
-  @Override
-  public OperationHandle executeStatement(SessionHandle sessionHandle, String statement,
-      Map<String, String> confOverlay) throws HiveSQLException {
-    return cliService.executeStatement(sessionHandle, statement, confOverlay);
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.CLIServiceClient#executeStatementAsync(org.apache.hive.service.cli.SessionHandle,
-   *  java.lang.String, java.util.Map)
-   */
-  @Override
-  public OperationHandle executeStatementAsync(SessionHandle sessionHandle, String statement,
-      Map<String, String> confOverlay) throws HiveSQLException {
-    return cliService.executeStatementAsync(sessionHandle, statement, confOverlay);
-  }
-
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.CLIServiceClient#getTypeInfo(org.apache.hive.service.cli.SessionHandle)
-   */
-  @Override
-  public OperationHandle getTypeInfo(SessionHandle sessionHandle) throws HiveSQLException {
-    return cliService.getTypeInfo(sessionHandle);
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.CLIServiceClient#getCatalogs(org.apache.hive.service.cli.SessionHandle)
-   */
-  @Override
-  public OperationHandle getCatalogs(SessionHandle sessionHandle) throws HiveSQLException {
-    return cliService.getCatalogs(sessionHandle);
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.CLIServiceClient#getSchemas(org.apache.hive.service.cli.SessionHandle, java.lang.String, java.lang.String)
-   */
-  @Override
-  public OperationHandle getSchemas(SessionHandle sessionHandle, String catalogName,
-      String schemaName) throws HiveSQLException {
-    return cliService.getSchemas(sessionHandle, catalogName, schemaName);
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.CLIServiceClient#getTables(org.apache.hive.service.cli.SessionHandle, java.lang.String, java.lang.String, java.lang.String, java.util.List)
-   */
-  @Override
-  public OperationHandle getTables(SessionHandle sessionHandle, String catalogName,
-      String schemaName, String tableName, List<String> tableTypes) throws HiveSQLException {
-    return cliService.getTables(sessionHandle, catalogName, schemaName, tableName, tableTypes);
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.CLIServiceClient#getTableTypes(org.apache.hive.service.cli.SessionHandle)
-   */
-  @Override
-  public OperationHandle getTableTypes(SessionHandle sessionHandle) throws HiveSQLException {
-    return cliService.getTableTypes(sessionHandle);
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.CLIServiceClient#getColumns(org.apache.hive.service.cli.SessionHandle, java.lang.String, java.lang.String, java.lang.String, java.lang.String)
-   */
-  @Override
-  public OperationHandle getColumns(SessionHandle sessionHandle, String catalogName,
-      String schemaName, String tableName, String columnName) throws HiveSQLException {
-    return cliService.getColumns(sessionHandle, catalogName, schemaName, tableName, columnName);
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.CLIServiceClient#getFunctions(org.apache.hive.service.cli.SessionHandle, java.lang.String)
-   */
-  @Override
-  public OperationHandle getFunctions(SessionHandle sessionHandle,
-      String catalogName, String schemaName, String functionName)
-          throws HiveSQLException {
-    return cliService.getFunctions(sessionHandle, catalogName, schemaName, functionName);
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.CLIServiceClient#getOperationStatus(org.apache.hive.service.cli.OperationHandle)
-   */
-  @Override
-  public OperationStatus getOperationStatus(OperationHandle opHandle) throws HiveSQLException {
-    return cliService.getOperationStatus(opHandle);
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.CLIServiceClient#cancelOperation(org.apache.hive.service.cli.OperationHandle)
-   */
-  @Override
-  public void cancelOperation(OperationHandle opHandle) throws HiveSQLException {
-    cliService.cancelOperation(opHandle);
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.CLIServiceClient#closeOperation(org.apache.hive.service.cli.OperationHandle)
-   */
-  @Override
-  public void closeOperation(OperationHandle opHandle) throws HiveSQLException {
-    cliService.closeOperation(opHandle);
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.CLIServiceClient#getResultSetMetadata(org.apache.hive.service.cli.OperationHandle)
-   */
-  @Override
-  public TableSchema getResultSetMetadata(OperationHandle opHandle) throws HiveSQLException {
-    return cliService.getResultSetMetadata(opHandle);
-  }
-
-  @Override
-  public RowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation,
-      long maxRows,  FetchType fetchType) throws HiveSQLException {
-    return cliService.fetchResults(opHandle, orientation, maxRows, fetchType);
-  }
-
-
-  @Override
-  public String getDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
-         String owner, String renewer) throws HiveSQLException {
-    return cliService.getDelegationToken(sessionHandle, authFactory, owner, renewer);
-  }
-
-  @Override
-  public void cancelDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
-      String tokenStr) throws HiveSQLException {
-    cliService.cancelDelegationToken(sessionHandle, authFactory, tokenStr);
-  }
-
-  @Override
-  public void renewDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
-      String tokenStr) throws HiveSQLException {
-    cliService.renewDelegationToken(sessionHandle, authFactory, tokenStr);
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/FetchOrientation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/FetchOrientation.java
deleted file mode 100644
index ffa6f2e1f3743..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/FetchOrientation.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli;
-
-import org.apache.hive.service.cli.thrift.TFetchOrientation;
-
-/**
- * FetchOrientation.
- *
- */
-public enum FetchOrientation {
-  FETCH_NEXT(TFetchOrientation.FETCH_NEXT),
-  FETCH_PRIOR(TFetchOrientation.FETCH_PRIOR),
-  FETCH_RELATIVE(TFetchOrientation.FETCH_RELATIVE),
-  FETCH_ABSOLUTE(TFetchOrientation.FETCH_ABSOLUTE),
-  FETCH_FIRST(TFetchOrientation.FETCH_FIRST),
-  FETCH_LAST(TFetchOrientation.FETCH_LAST);
-
-  private TFetchOrientation tFetchOrientation;
-
-  FetchOrientation(TFetchOrientation tFetchOrientation) {
-    this.tFetchOrientation = tFetchOrientation;
-  }
-
-  public static FetchOrientation getFetchOrientation(TFetchOrientation tFetchOrientation) {
-    for (FetchOrientation fetchOrientation : values()) {
-      if (tFetchOrientation.equals(fetchOrientation.toTFetchOrientation())) {
-        return fetchOrientation;
-      }
-    }
-    // TODO: Should this really default to FETCH_NEXT?
-    return FETCH_NEXT;
-  }
-
-  public TFetchOrientation toTFetchOrientation() {
-    return tFetchOrientation;
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/GetInfoType.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/GetInfoType.java
deleted file mode 100644
index 8dd33a88fdeb2..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/GetInfoType.java
+++ /dev/null
@@ -1,96 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli;
-
-import org.apache.hive.service.cli.thrift.TGetInfoType;
-
-/**
- * GetInfoType.
- *
- */
-public enum GetInfoType {
-  CLI_MAX_DRIVER_CONNECTIONS(TGetInfoType.CLI_MAX_DRIVER_CONNECTIONS),
-  CLI_MAX_CONCURRENT_ACTIVITIES(TGetInfoType.CLI_MAX_CONCURRENT_ACTIVITIES),
-  CLI_DATA_SOURCE_NAME(TGetInfoType.CLI_DATA_SOURCE_NAME),
-  CLI_FETCH_DIRECTION(TGetInfoType.CLI_FETCH_DIRECTION),
-  CLI_SERVER_NAME(TGetInfoType.CLI_SERVER_NAME),
-  CLI_SEARCH_PATTERN_ESCAPE(TGetInfoType.CLI_SEARCH_PATTERN_ESCAPE),
-  CLI_DBMS_NAME(TGetInfoType.CLI_DBMS_NAME),
-  CLI_DBMS_VER(TGetInfoType.CLI_DBMS_VER),
-  CLI_ACCESSIBLE_TABLES(TGetInfoType.CLI_ACCESSIBLE_TABLES),
-  CLI_ACCESSIBLE_PROCEDURES(TGetInfoType.CLI_ACCESSIBLE_PROCEDURES),
-  CLI_CURSOR_COMMIT_BEHAVIOR(TGetInfoType.CLI_CURSOR_COMMIT_BEHAVIOR),
-  CLI_DATA_SOURCE_READ_ONLY(TGetInfoType.CLI_DATA_SOURCE_READ_ONLY),
-  CLI_DEFAULT_TXN_ISOLATION(TGetInfoType.CLI_DEFAULT_TXN_ISOLATION),
-  CLI_IDENTIFIER_CASE(TGetInfoType.CLI_IDENTIFIER_CASE),
-  CLI_IDENTIFIER_QUOTE_CHAR(TGetInfoType.CLI_IDENTIFIER_QUOTE_CHAR),
-  CLI_MAX_COLUMN_NAME_LEN(TGetInfoType.CLI_MAX_COLUMN_NAME_LEN),
-  CLI_MAX_CURSOR_NAME_LEN(TGetInfoType.CLI_MAX_CURSOR_NAME_LEN),
-  CLI_MAX_SCHEMA_NAME_LEN(TGetInfoType.CLI_MAX_SCHEMA_NAME_LEN),
-  CLI_MAX_CATALOG_NAME_LEN(TGetInfoType.CLI_MAX_CATALOG_NAME_LEN),
-  CLI_MAX_TABLE_NAME_LEN(TGetInfoType.CLI_MAX_TABLE_NAME_LEN),
-  CLI_SCROLL_CONCURRENCY(TGetInfoType.CLI_SCROLL_CONCURRENCY),
-  CLI_TXN_CAPABLE(TGetInfoType.CLI_TXN_CAPABLE),
-  CLI_USER_NAME(TGetInfoType.CLI_USER_NAME),
-  CLI_TXN_ISOLATION_OPTION(TGetInfoType.CLI_TXN_ISOLATION_OPTION),
-  CLI_INTEGRITY(TGetInfoType.CLI_INTEGRITY),
-  CLI_GETDATA_EXTENSIONS(TGetInfoType.CLI_GETDATA_EXTENSIONS),
-  CLI_NULL_COLLATION(TGetInfoType.CLI_NULL_COLLATION),
-  CLI_ALTER_TABLE(TGetInfoType.CLI_ALTER_TABLE),
-  CLI_ORDER_BY_COLUMNS_IN_SELECT(TGetInfoType.CLI_ORDER_BY_COLUMNS_IN_SELECT),
-  CLI_SPECIAL_CHARACTERS(TGetInfoType.CLI_SPECIAL_CHARACTERS),
-  CLI_MAX_COLUMNS_IN_GROUP_BY(TGetInfoType.CLI_MAX_COLUMNS_IN_GROUP_BY),
-  CLI_MAX_COLUMNS_IN_INDEX(TGetInfoType.CLI_MAX_COLUMNS_IN_INDEX),
-  CLI_MAX_COLUMNS_IN_ORDER_BY(TGetInfoType.CLI_MAX_COLUMNS_IN_ORDER_BY),
-  CLI_MAX_COLUMNS_IN_SELECT(TGetInfoType.CLI_MAX_COLUMNS_IN_SELECT),
-  CLI_MAX_COLUMNS_IN_TABLE(TGetInfoType.CLI_MAX_COLUMNS_IN_TABLE),
-  CLI_MAX_INDEX_SIZE(TGetInfoType.CLI_MAX_INDEX_SIZE),
-  CLI_MAX_ROW_SIZE(TGetInfoType.CLI_MAX_ROW_SIZE),
-  CLI_MAX_STATEMENT_LEN(TGetInfoType.CLI_MAX_STATEMENT_LEN),
-  CLI_MAX_TABLES_IN_SELECT(TGetInfoType.CLI_MAX_TABLES_IN_SELECT),
-  CLI_MAX_USER_NAME_LEN(TGetInfoType.CLI_MAX_USER_NAME_LEN),
-  CLI_OJ_CAPABILITIES(TGetInfoType.CLI_OJ_CAPABILITIES),
-
-  CLI_XOPEN_CLI_YEAR(TGetInfoType.CLI_XOPEN_CLI_YEAR),
-  CLI_CURSOR_SENSITIVITY(TGetInfoType.CLI_CURSOR_SENSITIVITY),
-  CLI_DESCRIBE_PARAMETER(TGetInfoType.CLI_DESCRIBE_PARAMETER),
-  CLI_CATALOG_NAME(TGetInfoType.CLI_CATALOG_NAME),
-  CLI_COLLATION_SEQ(TGetInfoType.CLI_COLLATION_SEQ),
-  CLI_MAX_IDENTIFIER_LEN(TGetInfoType.CLI_MAX_IDENTIFIER_LEN);
-
-  private final TGetInfoType tInfoType;
-
-  GetInfoType(TGetInfoType tInfoType) {
-    this.tInfoType = tInfoType;
-  }
-
-  public static GetInfoType getGetInfoType(TGetInfoType tGetInfoType) {
-    for (GetInfoType infoType : values()) {
-      if (tGetInfoType.equals(infoType.tInfoType)) {
-        return infoType;
-      }
-    }
-    throw new IllegalArgumentException("Unrecognized Thrift TGetInfoType value: " + tGetInfoType);
-  }
-
-  public TGetInfoType toTGetInfoType() {
-    return tInfoType;
-  }
-
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/GetInfoValue.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/GetInfoValue.java
deleted file mode 100644
index ba92ff4ab5c11..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/GetInfoValue.java
+++ /dev/null
@@ -1,82 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli;
-
-import org.apache.hive.service.cli.thrift.TGetInfoValue;
-
-/**
- * GetInfoValue.
- *
- */
-public class GetInfoValue {
-  private String stringValue = null;
-  private short shortValue;
-  private int intValue;
-  private long longValue;
-
-  public GetInfoValue(String stringValue) {
-    this.stringValue = stringValue;
-  }
-
-  public GetInfoValue(short shortValue) {
-    this.shortValue = shortValue;
-  }
-
-  public GetInfoValue(int intValue) {
-    this.intValue = intValue;
-  }
-
-  public GetInfoValue(long longValue) {
-    this.longValue = longValue;
-  }
-
-  public GetInfoValue(TGetInfoValue tGetInfoValue) {
-    switch (tGetInfoValue.getSetField()) {
-    case STRING_VALUE:
-      stringValue = tGetInfoValue.getStringValue();
-      break;
-    default:
-      throw new IllegalArgumentException("Unreconigzed TGetInfoValue");
-    }
-  }
-
-  public TGetInfoValue toTGetInfoValue() {
-    TGetInfoValue tInfoValue = new TGetInfoValue();
-    if (stringValue != null) {
-      tInfoValue.setStringValue(stringValue);
-    }
-    return tInfoValue;
-  }
-
-  public String getStringValue() {
-    return stringValue;
-  }
-
-  public short getShortValue() {
-    return shortValue;
-  }
-
-  public int getIntValue() {
-    return intValue;
-  }
-
-  public long getLongValue() {
-    return longValue;
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Handle.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Handle.java
deleted file mode 100644
index cf3427ae20f3c..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Handle.java
+++ /dev/null
@@ -1,78 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hive.service.cli;
-
-import org.apache.hive.service.cli.thrift.THandleIdentifier;
-
-
-
-
-public abstract class Handle {
-
-  private final HandleIdentifier handleId;
-
-  public Handle() {
-    handleId = new HandleIdentifier();
-  }
-
-  public Handle(HandleIdentifier handleId) {
-    this.handleId = handleId;
-  }
-
-  public Handle(THandleIdentifier tHandleIdentifier) {
-    this.handleId = new HandleIdentifier(tHandleIdentifier);
-  }
-
-  public HandleIdentifier getHandleIdentifier() {
-    return handleId;
-  }
-
-  @Override
-  public int hashCode() {
-    final int prime = 31;
-    int result = 1;
-    result = prime * result + ((handleId == null) ? 0 : handleId.hashCode());
-    return result;
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (this == obj) {
-      return true;
-    }
-    if (obj == null) {
-      return false;
-    }
-    if (!(obj instanceof Handle)) {
-      return false;
-    }
-    Handle other = (Handle) obj;
-    if (handleId == null) {
-      if (other.handleId != null) {
-        return false;
-      }
-    } else if (!handleId.equals(other.handleId)) {
-      return false;
-    }
-    return true;
-  }
-
-  @Override
-  public abstract String toString();
-
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java
deleted file mode 100644
index 4dc80da8dc500..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/HandleIdentifier.java
+++ /dev/null
@@ -1,113 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli;
-
-import java.nio.ByteBuffer;
-import java.util.UUID;
-
-import org.apache.hive.service.cli.thrift.THandleIdentifier;
-
-/**
- * HandleIdentifier.
- *
- */
-public class HandleIdentifier {
-  private final UUID publicId;
-  private final UUID secretId;
-
-  public HandleIdentifier() {
-    publicId = UUID.randomUUID();
-    secretId = UUID.randomUUID();
-  }
-
-  public HandleIdentifier(UUID publicId, UUID secretId) {
-    this.publicId = publicId;
-    this.secretId = secretId;
-  }
-
-  public HandleIdentifier(THandleIdentifier tHandleId) {
-    ByteBuffer bb = ByteBuffer.wrap(tHandleId.getGuid());
-    this.publicId = new UUID(bb.getLong(), bb.getLong());
-    bb = ByteBuffer.wrap(tHandleId.getSecret());
-    this.secretId = new UUID(bb.getLong(), bb.getLong());
-  }
-
-  public UUID getPublicId() {
-    return publicId;
-  }
-
-  public UUID getSecretId() {
-    return secretId;
-  }
-
-  public THandleIdentifier toTHandleIdentifier() {
-    byte[] guid = new byte[16];
-    byte[] secret = new byte[16];
-    ByteBuffer guidBB = ByteBuffer.wrap(guid);
-    ByteBuffer secretBB = ByteBuffer.wrap(secret);
-    guidBB.putLong(publicId.getMostSignificantBits());
-    guidBB.putLong(publicId.getLeastSignificantBits());
-    secretBB.putLong(secretId.getMostSignificantBits());
-    secretBB.putLong(secretId.getLeastSignificantBits());
-    return new THandleIdentifier(ByteBuffer.wrap(guid), ByteBuffer.wrap(secret));
-  }
-
-  @Override
-  public int hashCode() {
-    final int prime = 31;
-    int result = 1;
-    result = prime * result + ((publicId == null) ? 0 : publicId.hashCode());
-    result = prime * result + ((secretId == null) ? 0 : secretId.hashCode());
-    return result;
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (this == obj) {
-      return true;
-    }
-    if (obj == null) {
-      return false;
-    }
-    if (!(obj instanceof HandleIdentifier)) {
-      return false;
-    }
-    HandleIdentifier other = (HandleIdentifier) obj;
-    if (publicId == null) {
-      if (other.publicId != null) {
-        return false;
-      }
-    } else if (!publicId.equals(other.publicId)) {
-      return false;
-    }
-    if (secretId == null) {
-      if (other.secretId != null) {
-        return false;
-      }
-    } else if (!secretId.equals(other.secretId)) {
-      return false;
-    }
-    return true;
-  }
-
-  @Override
-  public String toString() {
-    return publicId.toString();
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/HiveSQLException.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/HiveSQLException.java
deleted file mode 100644
index 86e57fbf31fe0..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/HiveSQLException.java
+++ /dev/null
@@ -1,249 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli;
-
-import java.sql.SQLException;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hive.service.cli.thrift.TStatus;
-import org.apache.hive.service.cli.thrift.TStatusCode;
-
-/**
- * HiveSQLException.
- *
- */
-public class HiveSQLException extends SQLException {
-
-  /**
-   *
-   */
-  private static final long serialVersionUID = -6095254671958748094L;
-
-  /**
-   *
-   */
-  public HiveSQLException() {
-    super();
-  }
-
-  /**
-   * @param reason
-   */
-  public HiveSQLException(String reason) {
-    super(reason);
-  }
-
-  /**
-   * @param cause
-   */
-  public HiveSQLException(Throwable cause) {
-    super(cause);
-  }
-
-  /**
-   * @param reason
-   * @param sqlState
-   */
-  public HiveSQLException(String reason, String sqlState) {
-    super(reason, sqlState);
-  }
-
-  /**
-   * @param reason
-   * @param cause
-   */
-  public HiveSQLException(String reason, Throwable cause) {
-    super(reason, cause);
-  }
-
-  /**
-   * @param reason
-   * @param sqlState
-   * @param vendorCode
-   */
-  public HiveSQLException(String reason, String sqlState, int vendorCode) {
-    super(reason, sqlState, vendorCode);
-  }
-
-  /**
-   * @param reason
-   * @param sqlState
-   * @param cause
-   */
-  public HiveSQLException(String reason, String sqlState, Throwable cause) {
-    super(reason, sqlState, cause);
-  }
-
-  /**
-   * @param reason
-   * @param sqlState
-   * @param vendorCode
-   * @param cause
-   */
-  public HiveSQLException(String reason, String sqlState, int vendorCode, Throwable cause) {
-    super(reason, sqlState, vendorCode, cause);
-  }
-
-  public HiveSQLException(TStatus status) {
-    // TODO: set correct vendorCode field
-    super(status.getErrorMessage(), status.getSqlState(), status.getErrorCode());
-    if (status.getInfoMessages() != null) {
-      initCause(toCause(status.getInfoMessages()));
-    }
-  }
-
-  /**
-   * Converts current object to a {@link TStatus} object
-   * @return a {@link TStatus} object
-   */
-  public TStatus toTStatus() {
-    // TODO: convert sqlState, etc.
-    TStatus tStatus = new TStatus(TStatusCode.ERROR_STATUS);
-    tStatus.setSqlState(getSQLState());
-    tStatus.setErrorCode(getErrorCode());
-    tStatus.setErrorMessage(getMessage());
-    tStatus.setInfoMessages(toString(this));
-    return tStatus;
-  }
-
-  /**
-   * Converts the specified {@link Exception} object into a {@link TStatus} object
-   * @param e a {@link Exception} object
-   * @return a {@link TStatus} object
-   */
-  public static TStatus toTStatus(Exception e) {
-    if (e instanceof HiveSQLException) {
-      return ((HiveSQLException)e).toTStatus();
-    }
-    TStatus tStatus = new TStatus(TStatusCode.ERROR_STATUS);
-    tStatus.setErrorMessage(e.getMessage());
-    tStatus.setInfoMessages(toString(e));
-    return tStatus;
-  }
-
-  /**
-   * Converts a {@link Throwable} object into a flattened list of texts including its stack trace
-   * and the stack traces of the nested causes.
-   * @param ex  a {@link Throwable} object
-   * @return    a flattened list of texts including the {@link Throwable} object's stack trace
-   *            and the stack traces of the nested causes.
-   */
-  public static List<String> toString(Throwable ex) {
-    return toString(ex, null);
-  }
-
-  private static List<String> toString(Throwable cause, StackTraceElement[] parent) {
-    StackTraceElement[] trace = cause.getStackTrace();
-    int m = trace.length - 1;
-    if (parent != null) {
-      int n = parent.length - 1;
-      while (m >= 0 && n >= 0 && trace[m].equals(parent[n])) {
-        m--;
-        n--;
-      }
-    }
-    List<String> detail = enroll(cause, trace, m);
-    cause = cause.getCause();
-    if (cause != null) {
-      detail.addAll(toString(cause, trace));
-    }
-    return detail;
-  }
-
-  private static List<String> enroll(Throwable ex, StackTraceElement[] trace, int max) {
-    List<String> details = new ArrayList<String>();
-    StringBuilder builder = new StringBuilder();
-    builder.append('*').append(ex.getClass().getName()).append(':');
-    builder.append(ex.getMessage()).append(':');
-    builder.append(trace.length).append(':').append(max);
-    details.add(builder.toString());
-    for (int i = 0; i <= max; i++) {
-      builder.setLength(0);
-      builder.append(trace[i].getClassName()).append(':');
-      builder.append(trace[i].getMethodName()).append(':');
-      String fileName = trace[i].getFileName();
-      builder.append(fileName == null ? "" : fileName).append(':');
-      builder.append(trace[i].getLineNumber());
-      details.add(builder.toString());
-    }
-    return details;
-  }
-
-  /**
-   * Converts a flattened list of texts including the stack trace and the stack
-   * traces of the nested causes into a {@link Throwable} object.
-   * @param details a flattened list of texts including the stack trace and the stack
-   *                traces of the nested causes
-   * @return        a {@link Throwable} object
-   */
-  public static Throwable toCause(List<String> details) {
-    return toStackTrace(details, null, 0);
-  }
-
-  private static Throwable toStackTrace(List<String> details, StackTraceElement[] parent, int index) {
-    String detail = details.get(index++);
-    if (!detail.startsWith("*")) {
-      return null;  // should not be happened. ignore remaining
-    }
-    int i1 = detail.indexOf(':');
-    int i3 = detail.lastIndexOf(':');
-    int i2 = detail.substring(0, i3).lastIndexOf(':');
-    String exceptionClass = detail.substring(1, i1);
-    String exceptionMessage = detail.substring(i1 + 1, i2);
-    Throwable ex = newInstance(exceptionClass, exceptionMessage);
-
-    Integer length = Integer.valueOf(detail.substring(i2 + 1, i3));
-    Integer unique = Integer.valueOf(detail.substring(i3 + 1));
-
-    int i = 0;
-    StackTraceElement[] trace = new StackTraceElement[length];
-    for (; i <= unique; i++) {
-      detail = details.get(index++);
-      int j1 = detail.indexOf(':');
-      int j3 = detail.lastIndexOf(':');
-      int j2 = detail.substring(0, j3).lastIndexOf(':');
-      String className = detail.substring(0, j1);
-      String methodName = detail.substring(j1 + 1, j2);
-      String fileName = detail.substring(j2 + 1, j3);
-      if (fileName.isEmpty()) {
-        fileName = null;
-      }
-      int lineNumber = Integer.valueOf(detail.substring(j3 + 1));
-      trace[i] = new StackTraceElement(className, methodName, fileName, lineNumber);
-    }
-    int common = trace.length - i;
-    if (common > 0) {
-      System.arraycopy(parent, parent.length - common, trace, trace.length - common, common);
-    }
-    if (details.size() > index) {
-      ex.initCause(toStackTrace(details, trace, index));
-    }
-    ex.setStackTrace(trace);
-    return ex;
-  }
-
-  private static Throwable newInstance(String className, String message) {
-    try {
-      return (Throwable)Class.forName(className).getConstructor(String.class).newInstance(message);
-    } catch (Exception e) {
-      return new RuntimeException(className + ":" + message);
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ICLIService.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ICLIService.java
deleted file mode 100644
index c9cc1f4da56f1..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/ICLIService.java
+++ /dev/null
@@ -1,105 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hive.service.cli;
-
-import java.util.List;
-import java.util.Map;
-
-
-
-
-import org.apache.hive.service.auth.HiveAuthFactory;
-
-public interface ICLIService {
-
-  SessionHandle openSession(String username, String password,
-      Map<String, String> configuration)
-          throws HiveSQLException;
-
-  SessionHandle openSessionWithImpersonation(String username, String password,
-      Map<String, String> configuration, String delegationToken)
-          throws HiveSQLException;
-
-  void closeSession(SessionHandle sessionHandle)
-      throws HiveSQLException;
-
-  GetInfoValue getInfo(SessionHandle sessionHandle, GetInfoType infoType)
-      throws HiveSQLException;
-
-  OperationHandle executeStatement(SessionHandle sessionHandle, String statement,
-      Map<String, String> confOverlay)
-          throws HiveSQLException;
-
-  OperationHandle executeStatementAsync(SessionHandle sessionHandle,
-      String statement, Map<String, String> confOverlay)
-          throws HiveSQLException;
-
-  OperationHandle getTypeInfo(SessionHandle sessionHandle)
-      throws HiveSQLException;
-
-  OperationHandle getCatalogs(SessionHandle sessionHandle)
-      throws HiveSQLException;
-
-  OperationHandle getSchemas(SessionHandle sessionHandle,
-      String catalogName, String schemaName)
-          throws HiveSQLException;
-
-  OperationHandle getTables(SessionHandle sessionHandle,
-      String catalogName, String schemaName, String tableName, List<String> tableTypes)
-          throws HiveSQLException;
-
-  OperationHandle getTableTypes(SessionHandle sessionHandle)
-      throws HiveSQLException;
-
-  OperationHandle getColumns(SessionHandle sessionHandle,
-      String catalogName, String schemaName, String tableName, String columnName)
-          throws HiveSQLException;
-
-  OperationHandle getFunctions(SessionHandle sessionHandle,
-      String catalogName, String schemaName, String functionName)
-          throws HiveSQLException;
-
-  OperationStatus getOperationStatus(OperationHandle opHandle)
-      throws HiveSQLException;
-
-  void cancelOperation(OperationHandle opHandle)
-      throws HiveSQLException;
-
-  void closeOperation(OperationHandle opHandle)
-      throws HiveSQLException;
-
-  TableSchema getResultSetMetadata(OperationHandle opHandle)
-      throws HiveSQLException;
-
-  RowSet fetchResults(OperationHandle opHandle)
-      throws HiveSQLException;
-
-  RowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation,
-      long maxRows, FetchType fetchType) throws HiveSQLException;
-
-  String getDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
-      String owner, String renewer) throws HiveSQLException;
-
-  void cancelDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
-      String tokenStr) throws HiveSQLException;
-
-  void renewDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
-      String tokenStr) throws HiveSQLException;
-
-
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationHandle.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationHandle.java
deleted file mode 100644
index 5426e28471239..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationHandle.java
+++ /dev/null
@@ -1,102 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hive.service.cli;
-
-import org.apache.hive.service.cli.thrift.TOperationHandle;
-import org.apache.hive.service.cli.thrift.TProtocolVersion;
-
-public class OperationHandle extends Handle {
-
-  private final OperationType opType;
-  private final TProtocolVersion protocol;
-  private boolean hasResultSet = false;
-
-  public OperationHandle(OperationType opType, TProtocolVersion protocol) {
-    super();
-    this.opType = opType;
-    this.protocol = protocol;
-  }
-
-  // dummy handle for ThriftCLIService
-  public OperationHandle(TOperationHandle tOperationHandle) {
-    this(tOperationHandle, TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V1);
-  }
-
-  public OperationHandle(TOperationHandle tOperationHandle, TProtocolVersion protocol) {
-    super(tOperationHandle.getOperationId());
-    this.opType = OperationType.getOperationType(tOperationHandle.getOperationType());
-    this.hasResultSet = tOperationHandle.isHasResultSet();
-    this.protocol = protocol;
-  }
-
-  public OperationType getOperationType() {
-    return opType;
-  }
-
-  public void setHasResultSet(boolean hasResultSet) {
-    this.hasResultSet = hasResultSet;
-  }
-
-  public boolean hasResultSet() {
-    return hasResultSet;
-  }
-
-  public TOperationHandle toTOperationHandle() {
-    TOperationHandle tOperationHandle = new TOperationHandle();
-    tOperationHandle.setOperationId(getHandleIdentifier().toTHandleIdentifier());
-    tOperationHandle.setOperationType(opType.toTOperationType());
-    tOperationHandle.setHasResultSet(hasResultSet);
-    return tOperationHandle;
-  }
-
-  public TProtocolVersion getProtocolVersion() {
-    return protocol;
-  }
-
-  @Override
-  public int hashCode() {
-    final int prime = 31;
-    int result = super.hashCode();
-    result = prime * result + ((opType == null) ? 0 : opType.hashCode());
-    return result;
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    if (this == obj) {
-      return true;
-    }
-    if (!super.equals(obj)) {
-      return false;
-    }
-    if (!(obj instanceof OperationHandle)) {
-      return false;
-    }
-    OperationHandle other = (OperationHandle) obj;
-    if (opType != other.opType) {
-      return false;
-    }
-    return true;
-  }
-
-  @Override
-  public String toString() {
-    return "OperationHandle [opType=" + opType + ", getHandleIdentifier()=" + getHandleIdentifier()
-        + "]";
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationState.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationState.java
deleted file mode 100644
index 1165180118413..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationState.java
+++ /dev/null
@@ -1,108 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli;
-
-import org.apache.hive.service.cli.thrift.TOperationState;
-
-/**
- * OperationState.
- *
- */
-public enum OperationState {
-  INITIALIZED(TOperationState.INITIALIZED_STATE, false),
-  RUNNING(TOperationState.RUNNING_STATE, false),
-  FINISHED(TOperationState.FINISHED_STATE, true),
-  CANCELED(TOperationState.CANCELED_STATE, true),
-  CLOSED(TOperationState.CLOSED_STATE, true),
-  ERROR(TOperationState.ERROR_STATE, true),
-  UNKNOWN(TOperationState.UKNOWN_STATE, false),
-  PENDING(TOperationState.PENDING_STATE, false);
-
-  private final TOperationState tOperationState;
-  private final boolean terminal;
-
-  OperationState(TOperationState tOperationState, boolean terminal) {
-    this.tOperationState = tOperationState;
-    this.terminal = terminal;
-  }
-
-  // must be sync with TOperationState in order
-  public static OperationState getOperationState(TOperationState tOperationState) {
-    return OperationState.values()[tOperationState.getValue()];
-  }
-
-  public static void validateTransition(OperationState oldState,
-      OperationState newState)
-          throws HiveSQLException {
-    switch (oldState) {
-    case INITIALIZED:
-      switch (newState) {
-      case PENDING:
-      case RUNNING:
-      case CANCELED:
-      case CLOSED:
-        return;
-      }
-      break;
-    case PENDING:
-      switch (newState) {
-      case RUNNING:
-      case FINISHED:
-      case CANCELED:
-      case ERROR:
-      case CLOSED:
-        return;
-      }
-      break;
-    case RUNNING:
-      switch (newState) {
-      case FINISHED:
-      case CANCELED:
-      case ERROR:
-      case CLOSED:
-        return;
-      }
-      break;
-    case FINISHED:
-    case CANCELED:
-    case ERROR:
-      if (OperationState.CLOSED.equals(newState)) {
-        return;
-      }
-      break;
-    default:
-      // fall-through
-    }
-    throw new HiveSQLException("Illegal Operation state transition " +
-        "from " + oldState + " to " + newState);
-  }
-
-  public void validateTransition(OperationState newState)
-      throws HiveSQLException {
-    validateTransition(this, newState);
-  }
-
-  public TOperationState toTOperationState() {
-    return tOperationState;
-  }
-
-  public boolean isTerminal() {
-    return terminal;
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationType.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationType.java
deleted file mode 100644
index 429d9a4c25688..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/OperationType.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli;
-
-import org.apache.hive.service.cli.thrift.TOperationType;
-
-/**
- * OperationType.
- *
- */
-public enum OperationType {
-
-  UNKNOWN_OPERATION(TOperationType.UNKNOWN),
-  EXECUTE_STATEMENT(TOperationType.EXECUTE_STATEMENT),
-  GET_TYPE_INFO(TOperationType.GET_TYPE_INFO),
-  GET_CATALOGS(TOperationType.GET_CATALOGS),
-  GET_SCHEMAS(TOperationType.GET_SCHEMAS),
-  GET_TABLES(TOperationType.GET_TABLES),
-  GET_TABLE_TYPES(TOperationType.GET_TABLE_TYPES),
-  GET_COLUMNS(TOperationType.GET_COLUMNS),
-  GET_FUNCTIONS(TOperationType.GET_FUNCTIONS);
-
-  private TOperationType tOperationType;
-
-  OperationType(TOperationType tOpType) {
-    this.tOperationType = tOpType;
-  }
-
-  public static OperationType getOperationType(TOperationType tOperationType) {
-    // TODO: replace this with a Map?
-    for (OperationType opType : values()) {
-      if (tOperationType.equals(opType.tOperationType)) {
-        return opType;
-      }
-    }
-    return OperationType.UNKNOWN_OPERATION;
-  }
-
-  public TOperationType toTOperationType() {
-    return tOperationType;
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/PatternOrIdentifier.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/PatternOrIdentifier.java
deleted file mode 100644
index 6e4d43fd5df63..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/PatternOrIdentifier.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli;
-
-/**
- * PatternOrIdentifier.
- *
- */
-public class PatternOrIdentifier {
-
-  boolean isPattern = false;
-  String text;
-
-  public PatternOrIdentifier(String tpoi) {
-    text = tpoi;
-    isPattern = false;
-  }
-
-  public boolean isPattern() {
-    return isPattern;
-  }
-
-  public boolean isIdentifier() {
-    return !isPattern;
-  }
-
-  @Override
-  public String toString() {
-    return text;
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowBasedSet.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowBasedSet.java
deleted file mode 100644
index 7452137f077db..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowBasedSet.java
+++ /dev/null
@@ -1,140 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli;
-
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.hive.service.cli.thrift.TColumnValue;
-import org.apache.hive.service.cli.thrift.TRow;
-import org.apache.hive.service.cli.thrift.TRowSet;
-
-/**
- * RowBasedSet
- */
-public class RowBasedSet implements RowSet {
-
-  private long startOffset;
-
-  private final Type[] types; // non-null only for writing (server-side)
-  private final RemovableList<TRow> rows;
-
-  public RowBasedSet(TableSchema schema) {
-    types = schema.toTypes();
-    rows = new RemovableList<TRow>();
-  }
-
-  public RowBasedSet(TRowSet tRowSet) {
-    types = null;
-    rows = new RemovableList<TRow>(tRowSet.getRows());
-    startOffset = tRowSet.getStartRowOffset();
-  }
-
-  private RowBasedSet(Type[] types, List<TRow> rows, long startOffset) {
-    this.types = types;
-    this.rows = new RemovableList<TRow>(rows);
-    this.startOffset = startOffset;
-  }
-
-  @Override
-  public RowBasedSet addRow(Object[] fields) {
-    TRow tRow = new TRow();
-    for (int i = 0; i < fields.length; i++) {
-      tRow.addToColVals(ColumnValue.toTColumnValue(types[i], fields[i]));
-    }
-    rows.add(tRow);
-    return this;
-  }
-
-  @Override
-  public int numColumns() {
-    return rows.isEmpty() ? 0 : rows.get(0).getColVals().size();
-  }
-
-  @Override
-  public int numRows() {
-    return rows.size();
-  }
-
-  public RowBasedSet extractSubset(int maxRows) {
-    int numRows = Math.min(numRows(), maxRows);
-    RowBasedSet result = new RowBasedSet(types, rows.subList(0, numRows), startOffset);
-    rows.removeRange(0, numRows);
-    startOffset += numRows;
-    return result;
-  }
-
-  public long getStartOffset() {
-    return startOffset;
-  }
-
-  public void setStartOffset(long startOffset) {
-    this.startOffset = startOffset;
-  }
-
-  public int getSize() {
-    return rows.size();
-  }
-
-  public TRowSet toTRowSet() {
-    TRowSet tRowSet = new TRowSet();
-    tRowSet.setStartRowOffset(startOffset);
-    tRowSet.setRows(new ArrayList<TRow>(rows));
-    return tRowSet;
-  }
-
-  @Override
-  public Iterator<Object[]> iterator() {
-    return new Iterator<Object[]>() {
-
-      final Iterator<TRow> iterator = rows.iterator();
-      final Object[] convey = new Object[numColumns()];
-
-      @Override
-      public boolean hasNext() {
-        return iterator.hasNext();
-      }
-
-      @Override
-      public Object[] next() {
-        TRow row = iterator.next();
-        List<TColumnValue> values = row.getColVals();
-        for (int i = 0; i < values.size(); i++) {
-          convey[i] = ColumnValue.toColumnValue(values.get(i));
-        }
-        return convey;
-      }
-
-      @Override
-      public void remove() {
-        throw new UnsupportedOperationException("remove");
-      }
-    };
-  }
-
-  private static class RemovableList<E> extends ArrayList<E> {
-    RemovableList() { super(); }
-    RemovableList(List<E> rows) { super(rows); }
-    @Override
-    public void removeRange(int fromIndex, int toIndex) {
-      super.removeRange(fromIndex, toIndex);
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowSet.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowSet.java
deleted file mode 100644
index ab0787e1d389e..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowSet.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli;
-
-import org.apache.hive.service.cli.thrift.TRowSet;
-
-public interface RowSet extends Iterable<Object[]> {
-
-  RowSet addRow(Object[] fields);
-
-  RowSet extractSubset(int maxRows);
-
-  int numColumns();
-
-  int numRows();
-
-  long getStartOffset();
-
-  void setStartOffset(long startOffset);
-
-  TRowSet toTRowSet();
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowSetFactory.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowSetFactory.java
deleted file mode 100644
index e8f68eaaf9063..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/RowSetFactory.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli;
-
-import org.apache.hive.service.cli.thrift.TProtocolVersion;
-import org.apache.hive.service.cli.thrift.TRowSet;
-
-import static org.apache.hive.service.cli.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6;
-
-public class RowSetFactory {
-
-  public static RowSet create(TableSchema schema, TProtocolVersion version) {
-    if (version.getValue() >= HIVE_CLI_SERVICE_PROTOCOL_V6.getValue()) {
-      return new ColumnBasedSet(schema);
-    }
-    return new RowBasedSet(schema);
-  }
-
-  public static RowSet create(TRowSet results, TProtocolVersion version) {
-    if (version.getValue() >= HIVE_CLI_SERVICE_PROTOCOL_V6.getValue()) {
-      return new ColumnBasedSet(results);
-    }
-    return new RowBasedSet(results);
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/SessionHandle.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/SessionHandle.java
deleted file mode 100644
index 52e0ad4834d8b..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/SessionHandle.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli;
-
-import java.util.UUID;
-
-import org.apache.hive.service.cli.thrift.TProtocolVersion;
-import org.apache.hive.service.cli.thrift.TSessionHandle;
-
-
-/**
- * SessionHandle.
- *
- */
-public class SessionHandle extends Handle {
-
-  private final TProtocolVersion protocol;
-
-  public SessionHandle(TProtocolVersion protocol) {
-    this.protocol = protocol;
-  }
-
-  // dummy handle for ThriftCLIService
-  public SessionHandle(TSessionHandle tSessionHandle) {
-    this(tSessionHandle, TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V1);
-  }
-
-  public SessionHandle(TSessionHandle tSessionHandle, TProtocolVersion protocol) {
-    super(tSessionHandle.getSessionId());
-    this.protocol = protocol;
-  }
-
-  public UUID getSessionId() {
-    return getHandleIdentifier().getPublicId();
-  }
-
-  public TSessionHandle toTSessionHandle() {
-    TSessionHandle tSessionHandle = new TSessionHandle();
-    tSessionHandle.setSessionId(getHandleIdentifier().toTHandleIdentifier());
-    return tSessionHandle;
-  }
-
-  public TProtocolVersion getProtocolVersion() {
-    return protocol;
-  }
-
-  @Override
-  public String toString() {
-    return "SessionHandle [" + getHandleIdentifier() + "]";
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TableSchema.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TableSchema.java
deleted file mode 100644
index ee019bc737101..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TableSchema.java
+++ /dev/null
@@ -1,102 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.metastore.api.Schema;
-import org.apache.hive.service.cli.thrift.TColumnDesc;
-import org.apache.hive.service.cli.thrift.TTableSchema;
-
-/**
- * TableSchema.
- *
- */
-public class TableSchema {
-  private final List<ColumnDescriptor> columns = new ArrayList<ColumnDescriptor>();
-
-  public TableSchema() {
-  }
-
-  public TableSchema(int numColumns) {
-    // TODO: remove this constructor
-  }
-
-  public TableSchema(TTableSchema tTableSchema) {
-    for (TColumnDesc tColumnDesc : tTableSchema.getColumns()) {
-      columns.add(new ColumnDescriptor(tColumnDesc));
-    }
-  }
-
-  public TableSchema(List<FieldSchema> fieldSchemas) {
-    int pos = 1;
-    for (FieldSchema field : fieldSchemas) {
-      columns.add(new ColumnDescriptor(field, pos++));
-    }
-  }
-
-  public TableSchema(Schema schema) {
-    this(schema.getFieldSchemas());
-  }
-
-  public List<ColumnDescriptor> getColumnDescriptors() {
-    return new ArrayList<ColumnDescriptor>(columns);
-  }
-
-  public ColumnDescriptor getColumnDescriptorAt(int pos) {
-    return columns.get(pos);
-  }
-
-  public int getSize() {
-    return columns.size();
-  }
-
-  public void clear() {
-    columns.clear();
-  }
-
-
-  public TTableSchema toTTableSchema() {
-    TTableSchema tTableSchema = new TTableSchema();
-    for (ColumnDescriptor col : columns) {
-      tTableSchema.addToColumns(col.toTColumnDesc());
-    }
-    return tTableSchema;
-  }
-
-  public Type[] toTypes() {
-    Type[] types = new Type[columns.size()];
-    for (int i = 0; i < types.length; i++) {
-      types[i] = columns.get(i).getType();
-    }
-    return types;
-  }
-
-  public TableSchema addPrimitiveColumn(String columnName, Type columnType, String columnComment) {
-    columns.add(ColumnDescriptor.newPrimitiveColumnDescriptor(columnName, columnComment, columnType, columns.size() + 1));
-    return this;
-  }
-
-  public TableSchema addStringColumn(String columnName, String columnComment) {
-    columns.add(ColumnDescriptor.newPrimitiveColumnDescriptor(columnName, columnComment, Type.STRING_TYPE, columns.size() + 1));
-    return this;
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Type.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Type.java
deleted file mode 100644
index 7752ec03a29b7..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/Type.java
+++ /dev/null
@@ -1,349 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli;
-
-import java.sql.DatabaseMetaData;
-import java.util.Locale;
-
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hive.service.cli.thrift.TTypeId;
-
-/**
- * Type.
- *
- */
-public enum Type {
-  NULL_TYPE("VOID",
-      java.sql.Types.NULL,
-      TTypeId.NULL_TYPE),
-  BOOLEAN_TYPE("BOOLEAN",
-      java.sql.Types.BOOLEAN,
-      TTypeId.BOOLEAN_TYPE),
-  TINYINT_TYPE("TINYINT",
-      java.sql.Types.TINYINT,
-      TTypeId.TINYINT_TYPE),
-  SMALLINT_TYPE("SMALLINT",
-      java.sql.Types.SMALLINT,
-      TTypeId.SMALLINT_TYPE),
-  INT_TYPE("INT",
-      java.sql.Types.INTEGER,
-      TTypeId.INT_TYPE),
-  BIGINT_TYPE("BIGINT",
-      java.sql.Types.BIGINT,
-      TTypeId.BIGINT_TYPE),
-  FLOAT_TYPE("FLOAT",
-      java.sql.Types.FLOAT,
-      TTypeId.FLOAT_TYPE),
-  DOUBLE_TYPE("DOUBLE",
-      java.sql.Types.DOUBLE,
-      TTypeId.DOUBLE_TYPE),
-  STRING_TYPE("STRING",
-      java.sql.Types.VARCHAR,
-      TTypeId.STRING_TYPE),
-  CHAR_TYPE("CHAR",
-      java.sql.Types.CHAR,
-      TTypeId.CHAR_TYPE,
-      true, false, false),
-  VARCHAR_TYPE("VARCHAR",
-      java.sql.Types.VARCHAR,
-      TTypeId.VARCHAR_TYPE,
-      true, false, false),
-  DATE_TYPE("DATE",
-      java.sql.Types.DATE,
-      TTypeId.DATE_TYPE),
-  TIMESTAMP_TYPE("TIMESTAMP",
-      java.sql.Types.TIMESTAMP,
-      TTypeId.TIMESTAMP_TYPE),
-  INTERVAL_YEAR_MONTH_TYPE("INTERVAL_YEAR_MONTH",
-      java.sql.Types.OTHER,
-      TTypeId.INTERVAL_YEAR_MONTH_TYPE),
-  INTERVAL_DAY_TIME_TYPE("INTERVAL_DAY_TIME",
-      java.sql.Types.OTHER,
-      TTypeId.INTERVAL_DAY_TIME_TYPE),
-  BINARY_TYPE("BINARY",
-      java.sql.Types.BINARY,
-      TTypeId.BINARY_TYPE),
-  DECIMAL_TYPE("DECIMAL",
-      java.sql.Types.DECIMAL,
-      TTypeId.DECIMAL_TYPE,
-      true, false, false),
-  ARRAY_TYPE("ARRAY",
-      java.sql.Types.ARRAY,
-      TTypeId.ARRAY_TYPE,
-      true, true),
-  MAP_TYPE("MAP",
-      java.sql.Types.JAVA_OBJECT,
-      TTypeId.MAP_TYPE,
-      true, true),
-  STRUCT_TYPE("STRUCT",
-      java.sql.Types.STRUCT,
-      TTypeId.STRUCT_TYPE,
-      true, false),
-  UNION_TYPE("UNIONTYPE",
-      java.sql.Types.OTHER,
-      TTypeId.UNION_TYPE,
-      true, false),
-  USER_DEFINED_TYPE("USER_DEFINED",
-      java.sql.Types.OTHER,
-      TTypeId.USER_DEFINED_TYPE,
-      true, false);
-
-  private final String name;
-  private final TTypeId tType;
-  private final int javaSQLType;
-  private final boolean isQualified;
-  private final boolean isComplex;
-  private final boolean isCollection;
-
-  Type(String name, int javaSQLType, TTypeId tType, boolean isQualified, boolean isComplex, boolean isCollection) {
-    this.name = name;
-    this.javaSQLType = javaSQLType;
-    this.tType = tType;
-    this.isQualified = isQualified;
-    this.isComplex = isComplex;
-    this.isCollection = isCollection;
-  }
-
-  Type(String name, int javaSQLType, TTypeId tType, boolean isComplex, boolean isCollection) {
-    this(name, javaSQLType, tType, false, isComplex, isCollection);
-  }
-
-  Type(String name, int javaSqlType, TTypeId tType) {
-    this(name, javaSqlType, tType, false, false, false);
-  }
-
-  public boolean isPrimitiveType() {
-    return !isComplex;
-  }
-
-  public boolean isQualifiedType() {
-    return isQualified;
-  }
-
-  public boolean isComplexType() {
-    return isComplex;
-  }
-
-  public boolean isCollectionType() {
-    return isCollection;
-  }
-
-  public static Type getType(TTypeId tType) {
-    for (Type type : values()) {
-      if (tType.equals(type.tType)) {
-        return type;
-      }
-    }
-    throw new IllegalArgumentException("Unregonized Thrift TTypeId value: " + tType);
-  }
-
-  public static Type getType(String name) {
-    if (name == null) {
-      throw new IllegalArgumentException("Invalid type name: null");
-    }
-    for (Type type : values()) {
-      if (name.equalsIgnoreCase(type.name)) {
-        return type;
-      } else if (type.isQualifiedType() || type.isComplexType()) {
-        if (name.toUpperCase(Locale.ROOT).startsWith(type.name)) {
-            return type;
-        }
-      }
-    }
-    throw new IllegalArgumentException("Unrecognized type name: " + name);
-  }
-
-  /**
-   * Radix for this type (typically either 2 or 10)
-   * Null is returned for data types where this is not applicable.
-   */
-  public Integer getNumPrecRadix() {
-    if (this.isNumericType()) {
-      return 10;
-    }
-    return null;
-  }
-
-  /**
-   * Maximum precision for numeric types.
-   * Returns null for non-numeric types.
-   * @return
-   */
-  public Integer getMaxPrecision() {
-    switch (this) {
-    case TINYINT_TYPE:
-      return 3;
-    case SMALLINT_TYPE:
-      return 5;
-    case INT_TYPE:
-      return 10;
-    case BIGINT_TYPE:
-      return 19;
-    case FLOAT_TYPE:
-      return 7;
-    case DOUBLE_TYPE:
-      return 15;
-    case DECIMAL_TYPE:
-      return HiveDecimal.MAX_PRECISION;
-    default:
-      return null;
-    }
-  }
-
-  public boolean isNumericType() {
-    switch (this) {
-    case TINYINT_TYPE:
-    case SMALLINT_TYPE:
-    case INT_TYPE:
-    case BIGINT_TYPE:
-    case FLOAT_TYPE:
-    case DOUBLE_TYPE:
-    case DECIMAL_TYPE:
-      return true;
-    default:
-      return false;
-    }
-  }
-
-  /**
-   * Prefix used to quote a literal of this type (may be null)
-   */
-  public String getLiteralPrefix() {
-    return null;
-  }
-
-  /**
-   * Suffix used to quote a literal of this type (may be null)
-   * @return
-   */
-  public String getLiteralSuffix() {
-    return null;
-  }
-
-  /**
-   * Can you use NULL for this type?
-   * @return
-   * DatabaseMetaData.typeNoNulls - does not allow NULL values
-   * DatabaseMetaData.typeNullable - allows NULL values
-   * DatabaseMetaData.typeNullableUnknown - nullability unknown
-   */
-  public Short getNullable() {
-    // All Hive types are nullable
-    return DatabaseMetaData.typeNullable;
-  }
-
-  /**
-   * Is the type case sensitive?
-   * @return
-   */
-  public Boolean isCaseSensitive() {
-    switch (this) {
-    case STRING_TYPE:
-      return true;
-    default:
-      return false;
-    }
-  }
-
-  /**
-   * Parameters used in creating the type (may be null)
-   * @return
-   */
-  public String getCreateParams() {
-    return null;
-  }
-
-  /**
-   * Can you use WHERE based on this type?
-   * @return
-   * DatabaseMetaData.typePredNone - No support
-   * DatabaseMetaData.typePredChar - Only support with WHERE .. LIKE
-   * DatabaseMetaData.typePredBasic - Supported except for WHERE .. LIKE
-   * DatabaseMetaData.typeSearchable - Supported for all WHERE ..
-   */
-  public Short getSearchable() {
-    if (isPrimitiveType()) {
-      return DatabaseMetaData.typeSearchable;
-    }
-    return DatabaseMetaData.typePredNone;
-  }
-
-  /**
-   * Is this type unsigned?
-   * @return
-   */
-  public Boolean isUnsignedAttribute() {
-    if (isNumericType()) {
-      return false;
-    }
-    return true;
-  }
-
-  /**
-   * Can this type represent money?
-   * @return
-   */
-  public Boolean isFixedPrecScale() {
-    return false;
-  }
-
-  /**
-   * Can this type be used for an auto-increment value?
-   * @return
-   */
-  public Boolean isAutoIncrement() {
-    return false;
-  }
-
-  /**
-   * Localized version of type name (may be null).
-   * @return
-   */
-  public String getLocalizedName() {
-    return null;
-  }
-
-  /**
-   * Minimum scale supported for this type
-   * @return
-   */
-  public Short getMinimumScale() {
-    return 0;
-  }
-
-  /**
-   * Maximum scale supported for this type
-   * @return
-   */
-  public Short getMaximumScale() {
-    return 0;
-  }
-
-  public TTypeId toTType() {
-    return tType;
-  }
-
-  public int toJavaSQLType() {
-    return javaSQLType;
-  }
-
-  public String getName() {
-    return name;
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java
deleted file mode 100644
index b80fd67884add..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TypeDescriptor.java
+++ /dev/null
@@ -1,159 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli;
-
-import java.util.List;
-
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.hive.service.cli.thrift.TPrimitiveTypeEntry;
-import org.apache.hive.service.cli.thrift.TTypeDesc;
-import org.apache.hive.service.cli.thrift.TTypeEntry;
-
-/**
- * TypeDescriptor.
- *
- */
-public class TypeDescriptor {
-
-  private final Type type;
-  private String typeName = null;
-  private TypeQualifiers typeQualifiers = null;
-
-  public TypeDescriptor(Type type) {
-    this.type = type;
-  }
-
-  public TypeDescriptor(TTypeDesc tTypeDesc) {
-    List<TTypeEntry> tTypeEntries = tTypeDesc.getTypes();
-    TPrimitiveTypeEntry top = tTypeEntries.get(0).getPrimitiveEntry();
-    this.type = Type.getType(top.getType());
-    if (top.isSetTypeQualifiers()) {
-      setTypeQualifiers(TypeQualifiers.fromTTypeQualifiers(top.getTypeQualifiers()));
-    }
-  }
-
-  public TypeDescriptor(String typeName) {
-    this.type = Type.getType(typeName);
-    if (this.type.isComplexType()) {
-      this.typeName = typeName;
-    } else if (this.type.isQualifiedType()) {
-      PrimitiveTypeInfo pti = TypeInfoFactory.getPrimitiveTypeInfo(typeName);
-      setTypeQualifiers(TypeQualifiers.fromTypeInfo(pti));
-    }
-  }
-
-  public Type getType() {
-    return type;
-  }
-
-  public TTypeDesc toTTypeDesc() {
-    TPrimitiveTypeEntry primitiveEntry = new TPrimitiveTypeEntry(type.toTType());
-    if (getTypeQualifiers() != null) {
-      primitiveEntry.setTypeQualifiers(getTypeQualifiers().toTTypeQualifiers());
-    }
-    TTypeEntry entry = TTypeEntry.primitiveEntry(primitiveEntry);
-
-    TTypeDesc desc = new TTypeDesc();
-    desc.addToTypes(entry);
-    return desc;
-  }
-
-  public String getTypeName() {
-    if (typeName != null) {
-      return typeName;
-    } else {
-      return type.getName();
-    }
-  }
-
-  public TypeQualifiers getTypeQualifiers() {
-    return typeQualifiers;
-  }
-
-  public void setTypeQualifiers(TypeQualifiers typeQualifiers) {
-    this.typeQualifiers = typeQualifiers;
-  }
-
-  /**
-   * The column size for this type.
-   * For numeric data this is the maximum precision.
-   * For character data this is the length in characters.
-   * For datetime types this is the length in characters of the String representation
-   * (assuming the maximum allowed precision of the fractional seconds component).
-   * For binary data this is the length in bytes.
-   * Null is returned for data types where the column size is not applicable.
-   */
-  public Integer getColumnSize() {
-    if (type.isNumericType()) {
-      return getPrecision();
-    }
-    switch (type) {
-    case STRING_TYPE:
-    case BINARY_TYPE:
-      return Integer.MAX_VALUE;
-    case CHAR_TYPE:
-    case VARCHAR_TYPE:
-      return typeQualifiers.getCharacterMaximumLength();
-    case DATE_TYPE:
-      return 10;
-    case TIMESTAMP_TYPE:
-      return 29;
-    default:
-      return null;
-    }
-  }
-
-  /**
-   * Maximum precision for numeric types.
-   * Returns null for non-numeric types.
-   * @return
-   */
-  public Integer getPrecision() {
-    if (this.type == Type.DECIMAL_TYPE) {
-      return typeQualifiers.getPrecision();
-    }
-    return this.type.getMaxPrecision();
-  }
-
-  /**
-   * The number of fractional digits for this type.
-   * Null is returned for data types where this is not applicable.
-   */
-  public Integer getDecimalDigits() {
-    switch (this.type) {
-    case BOOLEAN_TYPE:
-    case TINYINT_TYPE:
-    case SMALLINT_TYPE:
-    case INT_TYPE:
-    case BIGINT_TYPE:
-      return 0;
-    case FLOAT_TYPE:
-      return 7;
-    case DOUBLE_TYPE:
-      return 15;
-    case DECIMAL_TYPE:
-      return typeQualifiers.getScale();
-    case TIMESTAMP_TYPE:
-      return 9;
-    default:
-      return null;
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java
deleted file mode 100644
index c6da52c15a2b5..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/TypeQualifiers.java
+++ /dev/null
@@ -1,133 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli;
-
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
-import org.apache.hive.service.cli.thrift.TCLIServiceConstants;
-import org.apache.hive.service.cli.thrift.TTypeQualifierValue;
-import org.apache.hive.service.cli.thrift.TTypeQualifiers;
-
-/**
- * This class holds type qualifier information for a primitive type,
- * such as char/varchar length or decimal precision/scale.
- */
-public class TypeQualifiers {
-  private Integer characterMaximumLength;
-  private Integer precision;
-  private Integer scale;
-
-  public TypeQualifiers() {}
-
-  public Integer getCharacterMaximumLength() {
-    return characterMaximumLength;
-  }
-  public void setCharacterMaximumLength(int characterMaximumLength) {
-    this.characterMaximumLength = characterMaximumLength;
-  }
-
-  public TTypeQualifiers toTTypeQualifiers() {
-    TTypeQualifiers ret = null;
-
-    Map<String, TTypeQualifierValue> qMap = new HashMap<String, TTypeQualifierValue>();
-    if (getCharacterMaximumLength() != null) {
-      TTypeQualifierValue val = new TTypeQualifierValue();
-      val.setI32Value(getCharacterMaximumLength().intValue());
-      qMap.put(TCLIServiceConstants.CHARACTER_MAXIMUM_LENGTH, val);
-    }
-
-    if (precision != null) {
-      TTypeQualifierValue val = new TTypeQualifierValue();
-      val.setI32Value(precision.intValue());
-      qMap.put(TCLIServiceConstants.PRECISION, val);
-    }
-
-    if (scale != null) {
-      TTypeQualifierValue val = new TTypeQualifierValue();
-      val.setI32Value(scale.intValue());
-      qMap.put(TCLIServiceConstants.SCALE, val);
-    }
-
-    if (qMap.size() > 0) {
-      ret = new TTypeQualifiers(qMap);
-    }
-
-    return ret;
-  }
-
-  public static TypeQualifiers fromTTypeQualifiers(TTypeQualifiers ttq) {
-    TypeQualifiers ret = null;
-    if (ttq != null) {
-      ret = new TypeQualifiers();
-      Map<String, TTypeQualifierValue> tqMap = ttq.getQualifiers();
-
-      if (tqMap.containsKey(TCLIServiceConstants.CHARACTER_MAXIMUM_LENGTH)) {
-        ret.setCharacterMaximumLength(
-            tqMap.get(TCLIServiceConstants.CHARACTER_MAXIMUM_LENGTH).getI32Value());
-      }
-
-      if (tqMap.containsKey(TCLIServiceConstants.PRECISION)) {
-        ret.setPrecision(tqMap.get(TCLIServiceConstants.PRECISION).getI32Value());
-      }
-
-      if (tqMap.containsKey(TCLIServiceConstants.SCALE)) {
-        ret.setScale(tqMap.get(TCLIServiceConstants.SCALE).getI32Value());
-      }
-    }
-    return ret;
-  }
-
-  public static TypeQualifiers fromTypeInfo(PrimitiveTypeInfo pti) {
-    TypeQualifiers result = null;
-    if (pti instanceof VarcharTypeInfo) {
-      result = new TypeQualifiers();
-      result.setCharacterMaximumLength(((VarcharTypeInfo)pti).getLength());
-    }  else if (pti instanceof CharTypeInfo) {
-      result = new TypeQualifiers();
-      result.setCharacterMaximumLength(((CharTypeInfo)pti).getLength());
-    } else if (pti instanceof DecimalTypeInfo) {
-      result = new TypeQualifiers();
-      result.setPrecision(((DecimalTypeInfo)pti).precision());
-      result.setScale(((DecimalTypeInfo)pti).scale());
-    }
-    return result;
-  }
-
-  public Integer getPrecision() {
-    return precision;
-  }
-
-  public void setPrecision(Integer precision) {
-    this.precision = precision;
-  }
-
-  public Integer getScale() {
-    return scale;
-  }
-
-  public void setScale(Integer scale) {
-    this.scale = scale;
-  }
-
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java
deleted file mode 100644
index af36057bdaeca..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.operation;
-
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.hadoop.hive.metastore.TableType;
-
-/**
- * ClassicTableTypeMapping.
- * Classic table type mapping :
- *  Managed Table to Table
- *  External Table to Table
- *  Virtual View to View
- */
-public class ClassicTableTypeMapping implements TableTypeMapping {
-
-  public enum ClassicTableTypes {
-    TABLE,
-    VIEW,
-  }
-
-  private final Map<String, String> hiveToClientMap = new HashMap<String, String>();
-  private final Map<String, String> clientToHiveMap = new HashMap<String, String>();
-
-  public ClassicTableTypeMapping() {
-    hiveToClientMap.put(TableType.MANAGED_TABLE.toString(),
-        ClassicTableTypes.TABLE.toString());
-    hiveToClientMap.put(TableType.EXTERNAL_TABLE.toString(),
-        ClassicTableTypes.TABLE.toString());
-    hiveToClientMap.put(TableType.VIRTUAL_VIEW.toString(),
-        ClassicTableTypes.VIEW.toString());
-
-    clientToHiveMap.put(ClassicTableTypes.TABLE.toString(),
-        TableType.MANAGED_TABLE.toString());
-    clientToHiveMap.put(ClassicTableTypes.VIEW.toString(),
-        TableType.VIRTUAL_VIEW.toString());
-  }
-
-  @Override
-  public String mapToHiveType(String clientTypeName) {
-    if (clientToHiveMap.containsKey(clientTypeName)) {
-      return clientToHiveMap.get(clientTypeName);
-    } else {
-      return clientTypeName;
-    }
-  }
-
-  @Override
-  public String mapToClientType(String hiveTypeName) {
-    if (hiveToClientMap.containsKey(hiveTypeName)) {
-      return hiveToClientMap.get(hiveTypeName);
-    } else {
-      return hiveTypeName;
-    }
-  }
-
-  @Override
-  public Set<String> getTableTypeNames() {
-    Set<String> typeNameSet = new HashSet<String>();
-    for (ClassicTableTypes typeNames : ClassicTableTypes.values()) {
-      typeNameSet.add(typeNames.toString());
-    }
-    return typeNameSet;
-  }
-
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
deleted file mode 100644
index 6740d3bb59dc3..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hive.service.cli.operation;
-
-import java.sql.SQLException;
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.hadoop.hive.ql.processors.CommandProcessor;
-import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory;
-import org.apache.hadoop.hive.ql.session.OperationLog;
-import org.apache.hive.service.cli.HiveSQLException;
-import org.apache.hive.service.cli.OperationType;
-import org.apache.hive.service.cli.session.HiveSession;
-
-public abstract class ExecuteStatementOperation extends Operation {
-  protected String statement = null;
-  protected Map<String, String> confOverlay = new HashMap<String, String>();
-
-  public ExecuteStatementOperation(HiveSession parentSession, String statement,
-      Map<String, String> confOverlay, boolean runInBackground) {
-    super(parentSession, OperationType.EXECUTE_STATEMENT, runInBackground);
-    this.statement = statement;
-    setConfOverlay(confOverlay);
-  }
-
-  public String getStatement() {
-    return statement;
-  }
-
-  public static ExecuteStatementOperation newExecuteStatementOperation(
-      HiveSession parentSession, String statement, Map<String, String> confOverlay, boolean runAsync)
-          throws HiveSQLException {
-    String[] tokens = statement.trim().split("\\s+");
-    CommandProcessor processor = null;
-    try {
-      processor = CommandProcessorFactory.getForHiveCommand(tokens, parentSession.getHiveConf());
-    } catch (SQLException e) {
-      throw new HiveSQLException(e.getMessage(), e.getSQLState(), e);
-    }
-    if (processor == null) {
-      return new SQLOperation(parentSession, statement, confOverlay, runAsync);
-    }
-    return new HiveCommandOperation(parentSession, statement, processor, confOverlay);
-  }
-
-  protected Map<String, String> getConfOverlay() {
-    return confOverlay;
-  }
-
-  protected void setConfOverlay(Map<String, String> confOverlay) {
-    if (confOverlay != null) {
-      this.confOverlay = confOverlay;
-    }
-  }
-
-  protected void registerCurrentOperationLog() {
-    if (isOperationLogEnabled) {
-      if (operationLog == null) {
-        LOG.warn("Failed to get current OperationLog object of Operation: " +
-          getHandle().getHandleIdentifier());
-        isOperationLogEnabled = false;
-        return;
-      }
-      OperationLog.setCurrentOperationLog(operationLog);
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java
deleted file mode 100644
index 581d975344060..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java
+++ /dev/null
@@ -1,81 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.operation;
-
-import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType;
-import org.apache.hive.service.cli.FetchOrientation;
-import org.apache.hive.service.cli.HiveSQLException;
-import org.apache.hive.service.cli.OperationState;
-import org.apache.hive.service.cli.OperationType;
-import org.apache.hive.service.cli.RowSet;
-import org.apache.hive.service.cli.RowSetFactory;
-import org.apache.hive.service.cli.TableSchema;
-import org.apache.hive.service.cli.session.HiveSession;
-
-/**
- * GetCatalogsOperation.
- *
- */
-public class GetCatalogsOperation extends MetadataOperation {
-  private static final TableSchema RESULT_SET_SCHEMA = new TableSchema()
-  .addStringColumn("TABLE_CAT", "Catalog name. NULL if not applicable.");
-
-  protected final RowSet rowSet;
-
-  protected GetCatalogsOperation(HiveSession parentSession) {
-    super(parentSession, OperationType.GET_CATALOGS);
-    rowSet = RowSetFactory.create(RESULT_SET_SCHEMA, getProtocolVersion());
-  }
-
-  @Override
-  public void runInternal() throws HiveSQLException {
-    setState(OperationState.RUNNING);
-    try {
-      if (isAuthV2Enabled()) {
-        authorizeMetaGets(HiveOperationType.GET_CATALOGS, null);
-      }
-      setState(OperationState.FINISHED);
-    } catch (HiveSQLException e) {
-      setState(OperationState.ERROR);
-      throw e;
-    }
-
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.Operation#getResultSetSchema()
-   */
-  @Override
-  public TableSchema getResultSetSchema() throws HiveSQLException {
-    return RESULT_SET_SCHEMA;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long)
-   */
-  @Override
-  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
-    assertState(OperationState.FINISHED);
-    validateDefaultFetchOrientation(orientation);
-    if (orientation.equals(FetchOrientation.FETCH_FIRST)) {
-      rowSet.setStartOffset(0);
-    }
-    return rowSet.extractSubset((int)maxRows);
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
deleted file mode 100644
index 96ba4890075ac..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
+++ /dev/null
@@ -1,234 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.operation;
-
-import java.sql.DatabaseMetaData;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.regex.Pattern;
-
-import org.apache.hadoop.hive.metastore.IMetaStoreClient;
-import org.apache.hadoop.hive.metastore.api.Table;
-import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType;
-import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject;
-import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject.HivePrivilegeObjectType;
-import org.apache.hive.service.cli.ColumnDescriptor;
-import org.apache.hive.service.cli.FetchOrientation;
-import org.apache.hive.service.cli.HiveSQLException;
-import org.apache.hive.service.cli.OperationState;
-import org.apache.hive.service.cli.OperationType;
-import org.apache.hive.service.cli.RowSet;
-import org.apache.hive.service.cli.RowSetFactory;
-import org.apache.hive.service.cli.TableSchema;
-import org.apache.hive.service.cli.Type;
-import org.apache.hive.service.cli.session.HiveSession;
-
-/**
- * GetColumnsOperation.
- *
- */
-public class GetColumnsOperation extends MetadataOperation {
-
-  protected static final TableSchema RESULT_SET_SCHEMA = new TableSchema()
-  .addPrimitiveColumn("TABLE_CAT", Type.STRING_TYPE,
-      "Catalog name. NULL if not applicable")
-  .addPrimitiveColumn("TABLE_SCHEM", Type.STRING_TYPE,
-      "Schema name")
-  .addPrimitiveColumn("TABLE_NAME", Type.STRING_TYPE,
-      "Table name")
-  .addPrimitiveColumn("COLUMN_NAME", Type.STRING_TYPE,
-      "Column name")
-  .addPrimitiveColumn("DATA_TYPE", Type.INT_TYPE,
-      "SQL type from java.sql.Types")
-  .addPrimitiveColumn("TYPE_NAME", Type.STRING_TYPE,
-      "Data source dependent type name, for a UDT the type name is fully qualified")
-  .addPrimitiveColumn("COLUMN_SIZE", Type.INT_TYPE,
-      "Column size. For char or date types this is the maximum number of characters,"
-      + " for numeric or decimal types this is precision.")
-  .addPrimitiveColumn("BUFFER_LENGTH", Type.TINYINT_TYPE,
-      "Unused")
-  .addPrimitiveColumn("DECIMAL_DIGITS", Type.INT_TYPE,
-      "The number of fractional digits")
-  .addPrimitiveColumn("NUM_PREC_RADIX", Type.INT_TYPE,
-      "Radix (typically either 10 or 2)")
-  .addPrimitiveColumn("NULLABLE", Type.INT_TYPE,
-      "Is NULL allowed")
-  .addPrimitiveColumn("REMARKS", Type.STRING_TYPE,
-      "Comment describing column (may be null)")
-  .addPrimitiveColumn("COLUMN_DEF", Type.STRING_TYPE,
-      "Default value (may be null)")
-  .addPrimitiveColumn("SQL_DATA_TYPE", Type.INT_TYPE,
-      "Unused")
-  .addPrimitiveColumn("SQL_DATETIME_SUB", Type.INT_TYPE,
-      "Unused")
-  .addPrimitiveColumn("CHAR_OCTET_LENGTH", Type.INT_TYPE,
-      "For char types the maximum number of bytes in the column")
-  .addPrimitiveColumn("ORDINAL_POSITION", Type.INT_TYPE,
-      "Index of column in table (starting at 1)")
-  .addPrimitiveColumn("IS_NULLABLE", Type.STRING_TYPE,
-      "\"NO\" means column definitely does not allow NULL values; "
-      + "\"YES\" means the column might allow NULL values. An empty "
-      + "string means nobody knows.")
-  .addPrimitiveColumn("SCOPE_CATALOG", Type.STRING_TYPE,
-      "Catalog of table that is the scope of a reference attribute "
-      + "(null if DATA_TYPE isn't REF)")
-  .addPrimitiveColumn("SCOPE_SCHEMA", Type.STRING_TYPE,
-      "Schema of table that is the scope of a reference attribute "
-      + "(null if the DATA_TYPE isn't REF)")
-  .addPrimitiveColumn("SCOPE_TABLE", Type.STRING_TYPE,
-      "Table name that this the scope of a reference attribure "
-      + "(null if the DATA_TYPE isn't REF)")
-  .addPrimitiveColumn("SOURCE_DATA_TYPE", Type.SMALLINT_TYPE,
-      "Source type of a distinct type or user-generated Ref type, "
-      + "SQL type from java.sql.Types (null if DATA_TYPE isn't DISTINCT or user-generated REF)")
-  .addPrimitiveColumn("IS_AUTO_INCREMENT", Type.STRING_TYPE,
-      "Indicates whether this column is auto incremented.");
-
-  private final String catalogName;
-  private final String schemaName;
-  private final String tableName;
-  private final String columnName;
-
-  protected final RowSet rowSet;
-
-  protected GetColumnsOperation(HiveSession parentSession, String catalogName, String schemaName,
-      String tableName, String columnName) {
-    super(parentSession, OperationType.GET_COLUMNS);
-    this.catalogName = catalogName;
-    this.schemaName = schemaName;
-    this.tableName = tableName;
-    this.columnName = columnName;
-    this.rowSet = RowSetFactory.create(RESULT_SET_SCHEMA, getProtocolVersion());
-  }
-
-  @Override
-  public void runInternal() throws HiveSQLException {
-    setState(OperationState.RUNNING);
-    try {
-      IMetaStoreClient metastoreClient = getParentSession().getMetaStoreClient();
-      String schemaPattern = convertSchemaPattern(schemaName);
-      String tablePattern = convertIdentifierPattern(tableName, true);
-
-      Pattern columnPattern = null;
-      if (columnName != null) {
-        columnPattern = Pattern.compile(convertIdentifierPattern(columnName, false));
-      }
-
-      List<String> dbNames = metastoreClient.getDatabases(schemaPattern);
-      Collections.sort(dbNames);
-      Map<String, List<String>> db2Tabs = new HashMap<>();
-
-      for (String dbName : dbNames) {
-        List<String> tableNames = metastoreClient.getTables(dbName, tablePattern);
-        Collections.sort(tableNames);
-        db2Tabs.put(dbName, tableNames);
-      }
-
-      if (isAuthV2Enabled()) {
-        List<HivePrivilegeObject> privObjs = getPrivObjs(db2Tabs);
-        String cmdStr = "catalog : " + catalogName + ", schemaPattern : " + schemaName
-            + ", tablePattern : " + tableName;
-        authorizeMetaGets(HiveOperationType.GET_COLUMNS, privObjs, cmdStr);
-      }
-
-      for (Entry<String, List<String>> dbTabs : db2Tabs.entrySet()) {
-        String dbName = dbTabs.getKey();
-        List<String> tableNames = dbTabs.getValue();
-        for (Table table : metastoreClient.getTableObjectsByName(dbName, tableNames)) {
-          TableSchema schema = new TableSchema(metastoreClient.getSchema(dbName, table.getTableName()));
-          for (ColumnDescriptor column : schema.getColumnDescriptors()) {
-            if (columnPattern != null && !columnPattern.matcher(column.getName()).matches()) {
-              continue;
-            }
-            Object[] rowData = new Object[] {
-                null,  // TABLE_CAT
-                table.getDbName(), // TABLE_SCHEM
-                table.getTableName(), // TABLE_NAME
-                column.getName(), // COLUMN_NAME
-                column.getType().toJavaSQLType(), // DATA_TYPE
-                column.getTypeName(), // TYPE_NAME
-                column.getTypeDescriptor().getColumnSize(), // COLUMN_SIZE
-                null, // BUFFER_LENGTH, unused
-                column.getTypeDescriptor().getDecimalDigits(), // DECIMAL_DIGITS
-                column.getType().getNumPrecRadix(), // NUM_PREC_RADIX
-                DatabaseMetaData.columnNullable, // NULLABLE
-                column.getComment(), // REMARKS
-                null, // COLUMN_DEF
-                null, // SQL_DATA_TYPE
-                null, // SQL_DATETIME_SUB
-                null, // CHAR_OCTET_LENGTH
-                column.getOrdinalPosition(), // ORDINAL_POSITION
-                "YES", // IS_NULLABLE
-                null, // SCOPE_CATALOG
-                null, // SCOPE_SCHEMA
-                null, // SCOPE_TABLE
-                null, // SOURCE_DATA_TYPE
-                "NO", // IS_AUTO_INCREMENT
-            };
-            rowSet.addRow(rowData);
-          }
-        }
-      }
-      setState(OperationState.FINISHED);
-    } catch (Exception e) {
-      setState(OperationState.ERROR);
-      throw new HiveSQLException(e);
-    }
-
-  }
-
-
-  private List<HivePrivilegeObject> getPrivObjs(Map<String, List<String>> db2Tabs) {
-    List<HivePrivilegeObject> privObjs = new ArrayList<>();
-    for (Entry<String, List<String>> dbTabs : db2Tabs.entrySet()) {
-      for (String tabName : dbTabs.getValue()) {
-        privObjs.add(new HivePrivilegeObject(HivePrivilegeObjectType.TABLE_OR_VIEW, dbTabs.getKey(),
-            tabName));
-      }
-    }
-    return privObjs;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.Operation#getResultSetSchema()
-   */
-  @Override
-  public TableSchema getResultSetSchema() throws HiveSQLException {
-    assertState(OperationState.FINISHED);
-    return RESULT_SET_SCHEMA;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long)
-   */
-  @Override
-  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
-    assertState(OperationState.FINISHED);
-    validateDefaultFetchOrientation(orientation);
-    if (orientation.equals(FetchOrientation.FETCH_FIRST)) {
-      rowSet.setStartOffset(0);
-    }
-    return rowSet.extractSubset((int)maxRows);
-  }
-
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
deleted file mode 100644
index 5dec8bdbf45de..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
+++ /dev/null
@@ -1,147 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.operation;
-
-import java.sql.DatabaseMetaData;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.hadoop.hive.metastore.IMetaStoreClient;
-import org.apache.hadoop.hive.ql.exec.FunctionInfo;
-import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
-import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType;
-import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject;
-import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObjectUtils;
-import org.apache.hive.service.cli.CLIServiceUtils;
-import org.apache.hive.service.cli.FetchOrientation;
-import org.apache.hive.service.cli.HiveSQLException;
-import org.apache.hive.service.cli.OperationState;
-import org.apache.hive.service.cli.OperationType;
-import org.apache.hive.service.cli.RowSet;
-import org.apache.hive.service.cli.RowSetFactory;
-import org.apache.hive.service.cli.TableSchema;
-import org.apache.hive.service.cli.Type;
-import org.apache.hive.service.cli.session.HiveSession;
-import org.apache.thrift.TException;
-
-/**
- * GetFunctionsOperation.
- *
- */
-public class GetFunctionsOperation extends MetadataOperation {
-  private static final TableSchema RESULT_SET_SCHEMA = new TableSchema()
-  .addPrimitiveColumn("FUNCTION_CAT", Type.STRING_TYPE,
-      "Function catalog (may be null)")
-  .addPrimitiveColumn("FUNCTION_SCHEM", Type.STRING_TYPE,
-      "Function schema (may be null)")
-  .addPrimitiveColumn("FUNCTION_NAME", Type.STRING_TYPE,
-      "Function name. This is the name used to invoke the function")
-  .addPrimitiveColumn("REMARKS", Type.STRING_TYPE,
-      "Explanatory comment on the function")
-  .addPrimitiveColumn("FUNCTION_TYPE", Type.INT_TYPE,
-      "Kind of function.")
-  .addPrimitiveColumn("SPECIFIC_NAME", Type.STRING_TYPE,
-      "The name which uniquely identifies this function within its schema");
-
-  private final String catalogName;
-  private final String schemaName;
-  private final String functionName;
-
-  protected final RowSet rowSet;
-
-  public GetFunctionsOperation(HiveSession parentSession,
-      String catalogName, String schemaName, String functionName) {
-    super(parentSession, OperationType.GET_FUNCTIONS);
-    this.catalogName = catalogName;
-    this.schemaName = schemaName;
-    this.functionName = functionName;
-    this.rowSet = RowSetFactory.create(RESULT_SET_SCHEMA, getProtocolVersion());
-  }
-
-  @Override
-  public void runInternal() throws HiveSQLException {
-    setState(OperationState.RUNNING);
-    if (isAuthV2Enabled()) {
-      // get databases for schema pattern
-      IMetaStoreClient metastoreClient = getParentSession().getMetaStoreClient();
-      String schemaPattern = convertSchemaPattern(schemaName);
-      List<String> matchingDbs;
-      try {
-        matchingDbs = metastoreClient.getDatabases(schemaPattern);
-      } catch (TException e) {
-        setState(OperationState.ERROR);
-        throw new HiveSQLException(e);
-      }
-      // authorize this call on the schema objects
-      List<HivePrivilegeObject> privObjs = HivePrivilegeObjectUtils
-          .getHivePrivDbObjects(matchingDbs);
-      String cmdStr = "catalog : " + catalogName + ", schemaPattern : " + schemaName;
-      authorizeMetaGets(HiveOperationType.GET_FUNCTIONS, privObjs, cmdStr);
-    }
-
-    try {
-      if ((null == catalogName || "".equals(catalogName))
-          && (null == schemaName || "".equals(schemaName))) {
-        Set<String> functionNames =  FunctionRegistry
-            .getFunctionNames(CLIServiceUtils.patternToRegex(functionName));
-        for (String functionName : functionNames) {
-          FunctionInfo functionInfo = FunctionRegistry.getFunctionInfo(functionName);
-          Object[] rowData = new Object[] {
-              null, // FUNCTION_CAT
-              null, // FUNCTION_SCHEM
-              functionInfo.getDisplayName(), // FUNCTION_NAME
-              "", // REMARKS
-              (functionInfo.isGenericUDTF() ?
-                  DatabaseMetaData.functionReturnsTable
-                  : DatabaseMetaData.functionNoTable), // FUNCTION_TYPE
-             functionInfo.getClass().getCanonicalName()
-          };
-          rowSet.addRow(rowData);
-        }
-      }
-      setState(OperationState.FINISHED);
-    } catch (Exception e) {
-      setState(OperationState.ERROR);
-      throw new HiveSQLException(e);
-    }
-  }
-
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.Operation#getResultSetSchema()
-   */
-  @Override
-  public TableSchema getResultSetSchema() throws HiveSQLException {
-    assertState(OperationState.FINISHED);
-    return RESULT_SET_SCHEMA;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long)
-   */
-  @Override
-  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
-    assertState(OperationState.FINISHED);
-    validateDefaultFetchOrientation(orientation);
-    if (orientation.equals(FetchOrientation.FETCH_FIRST)) {
-      rowSet.setStartOffset(0);
-    }
-    return rowSet.extractSubset((int)maxRows);
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java
deleted file mode 100644
index 3516bc2ba242c..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java
+++ /dev/null
@@ -1,96 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.operation;
-
-import org.apache.hadoop.hive.metastore.IMetaStoreClient;
-import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType;
-import org.apache.hive.service.cli.FetchOrientation;
-import org.apache.hive.service.cli.HiveSQLException;
-import org.apache.hive.service.cli.OperationState;
-import org.apache.hive.service.cli.OperationType;
-import org.apache.hive.service.cli.RowSet;
-import org.apache.hive.service.cli.RowSetFactory;
-import org.apache.hive.service.cli.TableSchema;
-import org.apache.hive.service.cli.session.HiveSession;
-
-/**
- * GetSchemasOperation.
- *
- */
-public class GetSchemasOperation extends MetadataOperation {
-  private final String catalogName;
-  private final String schemaName;
-
-  private static final TableSchema RESULT_SET_SCHEMA = new TableSchema()
-  .addStringColumn("TABLE_SCHEM", "Schema name.")
-  .addStringColumn("TABLE_CATALOG", "Catalog name.");
-
-  protected RowSet rowSet;
-
-  protected GetSchemasOperation(HiveSession parentSession,
-      String catalogName, String schemaName) {
-    super(parentSession, OperationType.GET_SCHEMAS);
-    this.catalogName = catalogName;
-    this.schemaName = schemaName;
-    this.rowSet = RowSetFactory.create(RESULT_SET_SCHEMA, getProtocolVersion());
-  }
-
-  @Override
-  public void runInternal() throws HiveSQLException {
-    setState(OperationState.RUNNING);
-    if (isAuthV2Enabled()) {
-      String cmdStr = "catalog : " + catalogName + ", schemaPattern : " + schemaName;
-      authorizeMetaGets(HiveOperationType.GET_SCHEMAS, null, cmdStr);
-    }
-    try {
-      IMetaStoreClient metastoreClient = getParentSession().getMetaStoreClient();
-      String schemaPattern = convertSchemaPattern(schemaName);
-      for (String dbName : metastoreClient.getDatabases(schemaPattern)) {
-        rowSet.addRow(new Object[] {dbName, DEFAULT_HIVE_CATALOG});
-      }
-      setState(OperationState.FINISHED);
-    } catch (Exception e) {
-      setState(OperationState.ERROR);
-      throw new HiveSQLException(e);
-    }
-  }
-
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.Operation#getResultSetSchema()
-   */
-  @Override
-  public TableSchema getResultSetSchema() throws HiveSQLException {
-    assertState(OperationState.FINISHED);
-    return RESULT_SET_SCHEMA;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long)
-   */
-  @Override
-  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
-    assertState(OperationState.FINISHED);
-    validateDefaultFetchOrientation(orientation);
-    if (orientation.equals(FetchOrientation.FETCH_FIRST)) {
-      rowSet.setStartOffset(0);
-    }
-    return rowSet.extractSubset((int)maxRows);
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java
deleted file mode 100644
index b372f55cedd1c..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java
+++ /dev/null
@@ -1,93 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.operation;
-
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.TableType;
-import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType;
-import org.apache.hive.service.cli.FetchOrientation;
-import org.apache.hive.service.cli.HiveSQLException;
-import org.apache.hive.service.cli.OperationState;
-import org.apache.hive.service.cli.OperationType;
-import org.apache.hive.service.cli.RowSet;
-import org.apache.hive.service.cli.RowSetFactory;
-import org.apache.hive.service.cli.TableSchema;
-import org.apache.hive.service.cli.session.HiveSession;
-
-/**
- * GetTableTypesOperation.
- *
- */
-public class GetTableTypesOperation extends MetadataOperation {
-
-  protected static TableSchema RESULT_SET_SCHEMA = new TableSchema()
-  .addStringColumn("TABLE_TYPE", "Table type name.");
-
-  protected final RowSet rowSet;
-  private final TableTypeMapping tableTypeMapping;
-
-  protected GetTableTypesOperation(HiveSession parentSession) {
-    super(parentSession, OperationType.GET_TABLE_TYPES);
-    String tableMappingStr = getParentSession().getHiveConf()
-      .getVar(HiveConf.ConfVars.HIVE_SERVER2_TABLE_TYPE_MAPPING);
-    tableTypeMapping =
-      TableTypeMappingFactory.getTableTypeMapping(tableMappingStr);
-    rowSet = RowSetFactory.create(RESULT_SET_SCHEMA, getProtocolVersion());
-  }
-
-  @Override
-  public void runInternal() throws HiveSQLException {
-    setState(OperationState.RUNNING);
-    if (isAuthV2Enabled()) {
-      authorizeMetaGets(HiveOperationType.GET_TABLETYPES, null);
-    }
-    try {
-      for (TableType type : TableType.values()) {
-        rowSet.addRow(new String[] {tableTypeMapping.mapToClientType(type.toString())});
-      }
-      setState(OperationState.FINISHED);
-    } catch (Exception e) {
-      setState(OperationState.ERROR);
-      throw new HiveSQLException(e);
-    }
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.Operation#getResultSetSchema()
-   */
-  @Override
-  public TableSchema getResultSetSchema() throws HiveSQLException {
-    assertState(OperationState.FINISHED);
-    return RESULT_SET_SCHEMA;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long)
-   */
-  @Override
-  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
-    assertState(OperationState.FINISHED);
-    validateDefaultFetchOrientation(orientation);
-    if (orientation.equals(FetchOrientation.FETCH_FIRST)) {
-      rowSet.setStartOffset(0);
-    }
-    return rowSet.extractSubset((int)maxRows);
-  }
-
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
deleted file mode 100644
index 2af17a662a296..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
+++ /dev/null
@@ -1,135 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.operation;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.IMetaStoreClient;
-import org.apache.hadoop.hive.metastore.api.Table;
-import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType;
-import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject;
-import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObjectUtils;
-import org.apache.hive.service.cli.FetchOrientation;
-import org.apache.hive.service.cli.HiveSQLException;
-import org.apache.hive.service.cli.OperationState;
-import org.apache.hive.service.cli.OperationType;
-import org.apache.hive.service.cli.RowSet;
-import org.apache.hive.service.cli.RowSetFactory;
-import org.apache.hive.service.cli.TableSchema;
-import org.apache.hive.service.cli.session.HiveSession;
-
-/**
- * GetTablesOperation.
- *
- */
-public class GetTablesOperation extends MetadataOperation {
-
-  private final String catalogName;
-  private final String schemaName;
-  private final String tableName;
-  private final List<String> tableTypes = new ArrayList<String>();
-  protected final RowSet rowSet;
-  private final TableTypeMapping tableTypeMapping;
-
-
-  private static final TableSchema RESULT_SET_SCHEMA = new TableSchema()
-  .addStringColumn("TABLE_CAT", "Catalog name. NULL if not applicable.")
-  .addStringColumn("TABLE_SCHEM", "Schema name.")
-  .addStringColumn("TABLE_NAME", "Table name.")
-  .addStringColumn("TABLE_TYPE", "The table type, e.g. \"TABLE\", \"VIEW\", etc.")
-  .addStringColumn("REMARKS", "Comments about the table.");
-
-  protected GetTablesOperation(HiveSession parentSession,
-      String catalogName, String schemaName, String tableName,
-      List<String> tableTypes) {
-    super(parentSession, OperationType.GET_TABLES);
-    this.catalogName = catalogName;
-    this.schemaName = schemaName;
-    this.tableName = tableName;
-    String tableMappingStr = getParentSession().getHiveConf()
-        .getVar(HiveConf.ConfVars.HIVE_SERVER2_TABLE_TYPE_MAPPING);
-    tableTypeMapping =
-        TableTypeMappingFactory.getTableTypeMapping(tableMappingStr);
-    if (tableTypes != null) {
-      this.tableTypes.addAll(tableTypes);
-    }
-    this.rowSet = RowSetFactory.create(RESULT_SET_SCHEMA, getProtocolVersion());
-  }
-
-  @Override
-  public void runInternal() throws HiveSQLException {
-    setState(OperationState.RUNNING);
-    try {
-      IMetaStoreClient metastoreClient = getParentSession().getMetaStoreClient();
-      String schemaPattern = convertSchemaPattern(schemaName);
-      List<String> matchingDbs = metastoreClient.getDatabases(schemaPattern);
-      if(isAuthV2Enabled()){
-        List<HivePrivilegeObject> privObjs = HivePrivilegeObjectUtils.getHivePrivDbObjects(matchingDbs);
-        String cmdStr = "catalog : " + catalogName + ", schemaPattern : " + schemaName;
-        authorizeMetaGets(HiveOperationType.GET_TABLES, privObjs, cmdStr);
-      }
-
-      String tablePattern = convertIdentifierPattern(tableName, true);
-      for (String dbName : metastoreClient.getDatabases(schemaPattern)) {
-        List<String> tableNames = metastoreClient.getTables(dbName, tablePattern);
-        for (Table table : metastoreClient.getTableObjectsByName(dbName, tableNames)) {
-          Object[] rowData = new Object[] {
-              DEFAULT_HIVE_CATALOG,
-              table.getDbName(),
-              table.getTableName(),
-              tableTypeMapping.mapToClientType(table.getTableType()),
-              table.getParameters().get("comment")
-              };
-          if (tableTypes.isEmpty() || tableTypes.contains(
-                tableTypeMapping.mapToClientType(table.getTableType()))) {
-            rowSet.addRow(rowData);
-          }
-        }
-      }
-      setState(OperationState.FINISHED);
-    } catch (Exception e) {
-      setState(OperationState.ERROR);
-      throw new HiveSQLException(e);
-    }
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.Operation#getResultSetSchema()
-   */
-  @Override
-  public TableSchema getResultSetSchema() throws HiveSQLException {
-    assertState(OperationState.FINISHED);
-    return RESULT_SET_SCHEMA;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long)
-   */
-  @Override
-  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
-    assertState(OperationState.FINISHED);
-    validateDefaultFetchOrientation(orientation);
-    if (orientation.equals(FetchOrientation.FETCH_FIRST)) {
-      rowSet.setStartOffset(0);
-    }
-    return rowSet.extractSubset((int)maxRows);
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
deleted file mode 100644
index 3e81f8afbd85f..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
+++ /dev/null
@@ -1,142 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.operation;
-
-import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType;
-import org.apache.hive.service.cli.FetchOrientation;
-import org.apache.hive.service.cli.HiveSQLException;
-import org.apache.hive.service.cli.OperationState;
-import org.apache.hive.service.cli.OperationType;
-import org.apache.hive.service.cli.RowSet;
-import org.apache.hive.service.cli.RowSetFactory;
-import org.apache.hive.service.cli.TableSchema;
-import org.apache.hive.service.cli.Type;
-import org.apache.hive.service.cli.session.HiveSession;
-
-/**
- * GetTypeInfoOperation.
- *
- */
-public class GetTypeInfoOperation extends MetadataOperation {
-
-  private static final TableSchema RESULT_SET_SCHEMA = new TableSchema()
-  .addPrimitiveColumn("TYPE_NAME", Type.STRING_TYPE,
-      "Type name")
-  .addPrimitiveColumn("DATA_TYPE", Type.INT_TYPE,
-      "SQL data type from java.sql.Types")
-  .addPrimitiveColumn("PRECISION", Type.INT_TYPE,
-      "Maximum precision")
-  .addPrimitiveColumn("LITERAL_PREFIX", Type.STRING_TYPE,
-      "Prefix used to quote a literal (may be null)")
-  .addPrimitiveColumn("LITERAL_SUFFIX", Type.STRING_TYPE,
-      "Suffix used to quote a literal (may be null)")
-  .addPrimitiveColumn("CREATE_PARAMS", Type.STRING_TYPE,
-      "Parameters used in creating the type (may be null)")
-  .addPrimitiveColumn("NULLABLE", Type.SMALLINT_TYPE,
-      "Can you use NULL for this type")
-  .addPrimitiveColumn("CASE_SENSITIVE", Type.BOOLEAN_TYPE,
-      "Is it case sensitive")
-  .addPrimitiveColumn("SEARCHABLE", Type.SMALLINT_TYPE,
-      "Can you use \"WHERE\" based on this type")
-  .addPrimitiveColumn("UNSIGNED_ATTRIBUTE", Type.BOOLEAN_TYPE,
-      "Is it unsigned")
-  .addPrimitiveColumn("FIXED_PREC_SCALE", Type.BOOLEAN_TYPE,
-      "Can it be a money value")
-  .addPrimitiveColumn("AUTO_INCREMENT", Type.BOOLEAN_TYPE,
-      "Can it be used for an auto-increment value")
-  .addPrimitiveColumn("LOCAL_TYPE_NAME", Type.STRING_TYPE,
-      "Localized version of type name (may be null)")
-  .addPrimitiveColumn("MINIMUM_SCALE", Type.SMALLINT_TYPE,
-      "Minimum scale supported")
-  .addPrimitiveColumn("MAXIMUM_SCALE", Type.SMALLINT_TYPE,
-      "Maximum scale supported")
-  .addPrimitiveColumn("SQL_DATA_TYPE", Type.INT_TYPE,
-      "Unused")
-  .addPrimitiveColumn("SQL_DATETIME_SUB", Type.INT_TYPE,
-      "Unused")
-  .addPrimitiveColumn("NUM_PREC_RADIX", Type.INT_TYPE,
-      "Usually 2 or 10");
-
-  protected final RowSet rowSet;
-
-  protected GetTypeInfoOperation(HiveSession parentSession) {
-    super(parentSession, OperationType.GET_TYPE_INFO);
-    rowSet = RowSetFactory.create(RESULT_SET_SCHEMA, getProtocolVersion());
-  }
-
-  @Override
-  public void runInternal() throws HiveSQLException {
-    setState(OperationState.RUNNING);
-    if (isAuthV2Enabled()) {
-      authorizeMetaGets(HiveOperationType.GET_TYPEINFO, null);
-    }
-    try {
-      for (Type type : Type.values()) {
-        Object[] rowData = new Object[] {
-            type.getName(), // TYPE_NAME
-            type.toJavaSQLType(), // DATA_TYPE
-            type.getMaxPrecision(), // PRECISION
-            type.getLiteralPrefix(), // LITERAL_PREFIX
-            type.getLiteralSuffix(), // LITERAL_SUFFIX
-            type.getCreateParams(), // CREATE_PARAMS
-            type.getNullable(), // NULLABLE
-            type.isCaseSensitive(), // CASE_SENSITIVE
-            type.getSearchable(), // SEARCHABLE
-            type.isUnsignedAttribute(), // UNSIGNED_ATTRIBUTE
-            type.isFixedPrecScale(), // FIXED_PREC_SCALE
-            type.isAutoIncrement(), // AUTO_INCREMENT
-            type.getLocalizedName(), // LOCAL_TYPE_NAME
-            type.getMinimumScale(), // MINIMUM_SCALE
-            type.getMaximumScale(), // MAXIMUM_SCALE
-            null, // SQL_DATA_TYPE, unused
-            null, // SQL_DATETIME_SUB, unused
-            type.getNumPrecRadix() //NUM_PREC_RADIX
-        };
-        rowSet.addRow(rowData);
-      }
-      setState(OperationState.FINISHED);
-    } catch (Exception e) {
-      setState(OperationState.ERROR);
-      throw new HiveSQLException(e);
-    }
-  }
-
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.Operation#getResultSetSchema()
-   */
-  @Override
-  public TableSchema getResultSetSchema() throws HiveSQLException {
-    assertState(OperationState.FINISHED);
-    return RESULT_SET_SCHEMA;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long)
-   */
-  @Override
-  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
-    assertState(OperationState.FINISHED);
-    validateDefaultFetchOrientation(orientation);
-    if (orientation.equals(FetchOrientation.FETCH_FIRST)) {
-      rowSet.setStartOffset(0);
-    }
-    return rowSet.extractSubset((int)maxRows);
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java
deleted file mode 100644
index 5b6e6ad042412..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java
+++ /dev/null
@@ -1,215 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.operation;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.FileReader;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.io.UnsupportedEncodingException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
-import static java.nio.charset.StandardCharsets.UTF_8;
-
-import org.apache.hadoop.hive.metastore.api.Schema;
-import org.apache.hadoop.hive.ql.processors.CommandProcessor;
-import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
-import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hadoop.io.IOUtils;
-import org.apache.hive.service.cli.FetchOrientation;
-import org.apache.hive.service.cli.HiveSQLException;
-import org.apache.hive.service.cli.OperationState;
-import org.apache.hive.service.cli.RowSet;
-import org.apache.hive.service.cli.RowSetFactory;
-import org.apache.hive.service.cli.TableSchema;
-import org.apache.hive.service.cli.session.HiveSession;
-
-/**
- * Executes a HiveCommand
- */
-public class HiveCommandOperation extends ExecuteStatementOperation {
-  private CommandProcessor commandProcessor;
-  private TableSchema resultSchema = null;
-
-  /**
-   * For processors other than Hive queries (Driver), they output to session.out (a temp file)
-   * first and the fetchOne/fetchN/fetchAll functions get the output from pipeIn.
-   */
-  private BufferedReader resultReader;
-
-
-  protected HiveCommandOperation(HiveSession parentSession, String statement,
-      CommandProcessor commandProcessor, Map<String, String> confOverlay) {
-    super(parentSession, statement, confOverlay, false);
-    this.commandProcessor = commandProcessor;
-    setupSessionIO(parentSession.getSessionState());
-  }
-
-  private void setupSessionIO(SessionState sessionState) {
-    try {
-      LOG.info("Putting temp output to file " + sessionState.getTmpOutputFile().toString());
-      sessionState.in = null; // hive server's session input stream is not used
-      // open a per-session file in auto-flush mode for writing temp results
-      sessionState.out = new PrintStream(new FileOutputStream(sessionState.getTmpOutputFile()), true, UTF_8.name());
-      // TODO: for hadoop jobs, progress is printed out to session.err,
-      // we should find a way to feed back job progress to client
-      sessionState.err = new PrintStream(System.err, true, UTF_8.name());
-    } catch (IOException e) {
-      LOG.error("Error in creating temp output file ", e);
-      try {
-        sessionState.in = null;
-        sessionState.out = new PrintStream(System.out, true, UTF_8.name());
-        sessionState.err = new PrintStream(System.err, true, UTF_8.name());
-      } catch (UnsupportedEncodingException ee) {
-        LOG.error("Error creating PrintStream", e);
-        ee.printStackTrace();
-        sessionState.out = null;
-        sessionState.err = null;
-      }
-    }
-  }
-
-
-  private void tearDownSessionIO() {
-    IOUtils.cleanup(LOG, parentSession.getSessionState().out);
-    IOUtils.cleanup(LOG, parentSession.getSessionState().err);
-  }
-
-  @Override
-  public void runInternal() throws HiveSQLException {
-    setState(OperationState.RUNNING);
-    try {
-      String command = getStatement().trim();
-      String[] tokens = statement.split("\\s");
-      String commandArgs = command.substring(tokens[0].length()).trim();
-
-      CommandProcessorResponse response = commandProcessor.run(commandArgs);
-      int returnCode = response.getResponseCode();
-      if (returnCode != 0) {
-        throw toSQLException("Error while processing statement", response);
-      }
-      Schema schema = response.getSchema();
-      if (schema != null) {
-        setHasResultSet(true);
-        resultSchema = new TableSchema(schema);
-      } else {
-        setHasResultSet(false);
-        resultSchema = new TableSchema();
-      }
-    } catch (HiveSQLException e) {
-      setState(OperationState.ERROR);
-      throw e;
-    } catch (Exception e) {
-      setState(OperationState.ERROR);
-      throw new HiveSQLException("Error running query: " + e.toString(), e);
-    }
-    setState(OperationState.FINISHED);
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.operation.Operation#close()
-   */
-  @Override
-  public void close() throws HiveSQLException {
-    setState(OperationState.CLOSED);
-    tearDownSessionIO();
-    cleanTmpFile();
-    cleanupOperationLog();
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.operation.Operation#getResultSetSchema()
-   */
-  @Override
-  public TableSchema getResultSetSchema() throws HiveSQLException {
-    return resultSchema;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.operation.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long)
-   */
-  @Override
-  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
-    validateDefaultFetchOrientation(orientation);
-    if (orientation.equals(FetchOrientation.FETCH_FIRST)) {
-      resetResultReader();
-    }
-    List<String> rows = readResults((int) maxRows);
-    RowSet rowSet = RowSetFactory.create(resultSchema, getProtocolVersion());
-
-    for (String row : rows) {
-      rowSet.addRow(new String[] {row});
-    }
-    return rowSet;
-  }
-
-  /**
-   * Reads the temporary results for non-Hive (non-Driver) commands to the
-   * resulting List of strings.
-   * @param nLines number of lines read at once. If it is <= 0, then read all lines.
-   */
-  private List<String> readResults(int nLines) throws HiveSQLException {
-    if (resultReader == null) {
-      SessionState sessionState = getParentSession().getSessionState();
-      File tmp = sessionState.getTmpOutputFile();
-      try {
-        resultReader = new BufferedReader(new FileReader(tmp));
-      } catch (FileNotFoundException e) {
-        LOG.error("File " + tmp + " not found. ", e);
-        throw new HiveSQLException(e);
-      }
-    }
-    List<String> results = new ArrayList<String>();
-
-    for (int i = 0; i < nLines || nLines <= 0; ++i) {
-      try {
-        String line = resultReader.readLine();
-        if (line == null) {
-          // reached the end of the result file
-          break;
-        } else {
-          results.add(line);
-        }
-      } catch (IOException e) {
-        LOG.error("Reading temp results encountered an exception: ", e);
-        throw new HiveSQLException(e);
-      }
-    }
-    return results;
-  }
-
-  private void cleanTmpFile() {
-    resetResultReader();
-    SessionState sessionState = getParentSession().getSessionState();
-    File tmp = sessionState.getTmpOutputFile();
-    tmp.delete();
-  }
-
-  private void resetResultReader() {
-    if (resultReader != null) {
-      IOUtils.cleanup(LOG, resultReader);
-      resultReader = null;
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java
deleted file mode 100644
index b530f217125b8..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/HiveTableTypeMapping.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.operation;
-
-import java.util.HashSet;
-import java.util.Set;
-
-import org.apache.hadoop.hive.metastore.TableType;
-
-/**
- * HiveTableTypeMapping.
- * Default table type mapping
- *
- */
-public class HiveTableTypeMapping implements TableTypeMapping {
-
-  @Override
-  public String mapToHiveType(String clientTypeName) {
-    return clientTypeName;
-  }
-
-  @Override
-  public String mapToClientType(String hiveTypeName) {
-    return hiveTypeName;
-  }
-
-  @Override
-  public Set<String> getTableTypeNames() {
-    Set<String> typeNameSet = new HashSet<String>();
-    for (TableType typeNames : TableType.values()) {
-      typeNameSet.add(typeNames.toString());
-    }
-    return typeNameSet;
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java
deleted file mode 100644
index 6c819876a556d..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/MetadataOperation.java
+++ /dev/null
@@ -1,134 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.operation;
-
-import java.util.List;
-
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException;
-import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzContext;
-import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzPluginException;
-import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType;
-import org.apache.hadoop.hive.ql.security.authorization.plugin.HivePrivilegeObject;
-import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hive.service.cli.HiveSQLException;
-import org.apache.hive.service.cli.OperationState;
-import org.apache.hive.service.cli.OperationType;
-import org.apache.hive.service.cli.TableSchema;
-import org.apache.hive.service.cli.session.HiveSession;
-
-/**
- * MetadataOperation.
- *
- */
-public abstract class MetadataOperation extends Operation {
-
-  protected static final String DEFAULT_HIVE_CATALOG = "";
-  protected static TableSchema RESULT_SET_SCHEMA;
-  private static final char SEARCH_STRING_ESCAPE = '\\';
-
-  protected MetadataOperation(HiveSession parentSession, OperationType opType) {
-    super(parentSession, opType, false);
-    setHasResultSet(true);
-  }
-
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.Operation#close()
-   */
-  @Override
-  public void close() throws HiveSQLException {
-    setState(OperationState.CLOSED);
-    cleanupOperationLog();
-  }
-
-  /**
-   * Convert wildchars and escape sequence from JDBC format to datanucleous/regex
-   */
-  protected String convertIdentifierPattern(final String pattern, boolean datanucleusFormat) {
-    if (pattern == null) {
-      return convertPattern("%", true);
-    } else {
-      return convertPattern(pattern, datanucleusFormat);
-    }
-  }
-
-  /**
-   * Convert wildchars and escape sequence of schema pattern from JDBC format to datanucleous/regex
-   * The schema pattern treats empty string also as wildchar
-   */
-  protected String convertSchemaPattern(final String pattern) {
-    if ((pattern == null) || pattern.isEmpty()) {
-      return convertPattern("%", true);
-    } else {
-      return convertPattern(pattern, true);
-    }
-  }
-
-  /**
-   * Convert a pattern containing JDBC catalog search wildcards into
-   * Java regex patterns.
-   *
-   * @param pattern input which may contain '%' or '_' wildcard characters, or
-   * these characters escaped using {@link #getSearchStringEscape()}.
-   * @return replace %/_ with regex search characters, also handle escaped
-   * characters.
-   *
-   * The datanucleus module expects the wildchar as '*'. The columns search on the
-   * other hand is done locally inside the hive code and that requires the regex wildchar
-   * format '.*'  This is driven by the datanucleusFormat flag.
-   */
-  private String convertPattern(final String pattern, boolean datanucleusFormat) {
-    String wStr;
-    if (datanucleusFormat) {
-      wStr = "*";
-    } else {
-      wStr = ".*";
-    }
-    return pattern
-        .replaceAll("([^\\\\])%", "$1" + wStr).replaceAll("\\\\%", "%").replaceAll("^%", wStr)
-        .replaceAll("([^\\\\])_", "$1.").replaceAll("\\\\_", "_").replaceAll("^_", ".");
-  }
-
-  protected boolean isAuthV2Enabled(){
-    SessionState ss = SessionState.get();
-    return (ss.isAuthorizationModeV2() &&
-        HiveConf.getBoolVar(ss.getConf(), HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED));
-  }
-
-  protected void authorizeMetaGets(HiveOperationType opType, List<HivePrivilegeObject> inpObjs)
-      throws HiveSQLException {
-    authorizeMetaGets(opType, inpObjs, null);
-  }
-
-  protected void authorizeMetaGets(HiveOperationType opType, List<HivePrivilegeObject> inpObjs,
-      String cmdString) throws HiveSQLException {
-    SessionState ss = SessionState.get();
-    HiveAuthzContext.Builder ctxBuilder = new HiveAuthzContext.Builder();
-    ctxBuilder.setUserIpAddress(ss.getUserIpAddress());
-    ctxBuilder.setCommandString(cmdString);
-    try {
-      ss.getAuthorizerV2().checkPrivileges(opType, inpObjs, null,
-          ctxBuilder.build());
-    } catch (HiveAuthzPluginException | HiveAccessControlException e) {
-      throw new HiveSQLException(e.getMessage(), e);
-    }
-  }
-
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/Operation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/Operation.java
deleted file mode 100644
index 4b331423948fa..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/Operation.java
+++ /dev/null
@@ -1,328 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hive.service.cli.operation;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.util.EnumSet;
-import java.util.concurrent.Future;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
-import org.apache.hadoop.hive.ql.session.OperationLog;
-import org.apache.hive.service.cli.FetchOrientation;
-import org.apache.hive.service.cli.HiveSQLException;
-import org.apache.hive.service.cli.OperationHandle;
-import org.apache.hive.service.cli.OperationState;
-import org.apache.hive.service.cli.OperationStatus;
-import org.apache.hive.service.cli.OperationType;
-import org.apache.hive.service.cli.RowSet;
-import org.apache.hive.service.cli.TableSchema;
-import org.apache.hive.service.cli.session.HiveSession;
-import org.apache.hive.service.cli.thrift.TProtocolVersion;
-
-public abstract class Operation {
-  protected final HiveSession parentSession;
-  private OperationState state = OperationState.INITIALIZED;
-  private final OperationHandle opHandle;
-  private HiveConf configuration;
-  public static final Log LOG = LogFactory.getLog(Operation.class.getName());
-  public static final FetchOrientation DEFAULT_FETCH_ORIENTATION = FetchOrientation.FETCH_NEXT;
-  public static final long DEFAULT_FETCH_MAX_ROWS = 100;
-  protected boolean hasResultSet;
-  protected volatile HiveSQLException operationException;
-  protected final boolean runAsync;
-  protected volatile Future<?> backgroundHandle;
-  protected OperationLog operationLog;
-  protected boolean isOperationLogEnabled;
-
-  private long operationTimeout;
-  private long lastAccessTime;
-
-  protected static final EnumSet<FetchOrientation> DEFAULT_FETCH_ORIENTATION_SET =
-      EnumSet.of(
-          FetchOrientation.FETCH_NEXT,
-          FetchOrientation.FETCH_FIRST,
-          FetchOrientation.FETCH_PRIOR);
-
-  protected Operation(HiveSession parentSession, OperationType opType, boolean runInBackground) {
-    this.parentSession = parentSession;
-    this.runAsync = runInBackground;
-    this.opHandle = new OperationHandle(opType, parentSession.getProtocolVersion());
-    lastAccessTime = System.currentTimeMillis();
-    operationTimeout = HiveConf.getTimeVar(parentSession.getHiveConf(),
-        HiveConf.ConfVars.HIVE_SERVER2_IDLE_OPERATION_TIMEOUT, TimeUnit.MILLISECONDS);
-  }
-
-  public Future<?> getBackgroundHandle() {
-    return backgroundHandle;
-  }
-
-  protected void setBackgroundHandle(Future<?> backgroundHandle) {
-    this.backgroundHandle = backgroundHandle;
-  }
-
-  public boolean shouldRunAsync() {
-    return runAsync;
-  }
-
-  public void setConfiguration(HiveConf configuration) {
-    this.configuration = new HiveConf(configuration);
-  }
-
-  public HiveConf getConfiguration() {
-    return new HiveConf(configuration);
-  }
-
-  public HiveSession getParentSession() {
-    return parentSession;
-  }
-
-  public OperationHandle getHandle() {
-    return opHandle;
-  }
-
-  public TProtocolVersion getProtocolVersion() {
-    return opHandle.getProtocolVersion();
-  }
-
-  public OperationType getType() {
-    return opHandle.getOperationType();
-  }
-
-  public OperationStatus getStatus() {
-    return new OperationStatus(state, operationException);
-  }
-
-  public boolean hasResultSet() {
-    return hasResultSet;
-  }
-
-  protected void setHasResultSet(boolean hasResultSet) {
-    this.hasResultSet = hasResultSet;
-    opHandle.setHasResultSet(hasResultSet);
-  }
-
-  public OperationLog getOperationLog() {
-    return operationLog;
-  }
-
-  protected final OperationState setState(OperationState newState) throws HiveSQLException {
-    state.validateTransition(newState);
-    this.state = newState;
-    this.lastAccessTime = System.currentTimeMillis();
-    return this.state;
-  }
-
-  public boolean isTimedOut(long current) {
-    if (operationTimeout == 0) {
-      return false;
-    }
-    if (operationTimeout > 0) {
-      // check only when it's in terminal state
-      return state.isTerminal() && lastAccessTime + operationTimeout <= current;
-    }
-    return lastAccessTime + -operationTimeout <= current;
-  }
-
-  public long getLastAccessTime() {
-    return lastAccessTime;
-  }
-
-  public long getOperationTimeout() {
-    return operationTimeout;
-  }
-
-  public void setOperationTimeout(long operationTimeout) {
-    this.operationTimeout = operationTimeout;
-  }
-
-  protected void setOperationException(HiveSQLException operationException) {
-    this.operationException = operationException;
-  }
-
-  protected final void assertState(OperationState state) throws HiveSQLException {
-    if (this.state != state) {
-      throw new HiveSQLException("Expected state " + state + ", but found " + this.state);
-    }
-    this.lastAccessTime = System.currentTimeMillis();
-  }
-
-  public boolean isRunning() {
-    return OperationState.RUNNING.equals(state);
-  }
-
-  public boolean isFinished() {
-    return OperationState.FINISHED.equals(state);
-  }
-
-  public boolean isCanceled() {
-    return OperationState.CANCELED.equals(state);
-  }
-
-  public boolean isFailed() {
-    return OperationState.ERROR.equals(state);
-  }
-
-  protected void createOperationLog() {
-    if (parentSession.isOperationLogEnabled()) {
-      File operationLogFile = new File(parentSession.getOperationLogSessionDir(),
-          opHandle.getHandleIdentifier().toString());
-      isOperationLogEnabled = true;
-
-      // create log file
-      try {
-        if (operationLogFile.exists()) {
-          LOG.warn("The operation log file should not exist, but it is already there: " +
-              operationLogFile.getAbsolutePath());
-          operationLogFile.delete();
-        }
-        if (!operationLogFile.createNewFile()) {
-          // the log file already exists and cannot be deleted.
-          // If it can be read/written, keep its contents and use it.
-          if (!operationLogFile.canRead() || !operationLogFile.canWrite()) {
-            LOG.warn("The already existed operation log file cannot be recreated, " +
-                "and it cannot be read or written: " + operationLogFile.getAbsolutePath());
-            isOperationLogEnabled = false;
-            return;
-          }
-        }
-      } catch (Exception e) {
-        LOG.warn("Unable to create operation log file: " + operationLogFile.getAbsolutePath(), e);
-        isOperationLogEnabled = false;
-        return;
-      }
-
-      // create OperationLog object with above log file
-      try {
-        operationLog = new OperationLog(opHandle.toString(), operationLogFile, parentSession.getHiveConf());
-      } catch (FileNotFoundException e) {
-        LOG.warn("Unable to instantiate OperationLog object for operation: " +
-            opHandle, e);
-        isOperationLogEnabled = false;
-        return;
-      }
-
-      // register this operationLog to current thread
-      OperationLog.setCurrentOperationLog(operationLog);
-    }
-  }
-
-  protected void unregisterOperationLog() {
-    if (isOperationLogEnabled) {
-      OperationLog.removeCurrentOperationLog();
-    }
-  }
-
-  /**
-   * Invoked before runInternal().
-   * Set up some preconditions, or configurations.
-   */
-  protected void beforeRun() {
-    createOperationLog();
-  }
-
-  /**
-   * Invoked after runInternal(), even if an exception is thrown in runInternal().
-   * Clean up resources, which was set up in beforeRun().
-   */
-  protected void afterRun() {
-    unregisterOperationLog();
-  }
-
-  /**
-   * Implemented by subclass of Operation class to execute specific behaviors.
-   * @throws HiveSQLException
-   */
-  protected abstract void runInternal() throws HiveSQLException;
-
-  public void run() throws HiveSQLException {
-    beforeRun();
-    try {
-      runInternal();
-    } finally {
-      afterRun();
-    }
-  }
-
-  protected void cleanupOperationLog() {
-    if (isOperationLogEnabled) {
-      if (operationLog == null) {
-        LOG.error("Operation [ " + opHandle.getHandleIdentifier() + " ] "
-          + "logging is enabled, but its OperationLog object cannot be found.");
-      } else {
-        operationLog.close();
-      }
-    }
-  }
-
-  // TODO: make this abstract and implement in subclasses.
-  public void cancel() throws HiveSQLException {
-    setState(OperationState.CANCELED);
-    throw new UnsupportedOperationException("SQLOperation.cancel()");
-  }
-
-  public void close() throws HiveSQLException {
-    setState(OperationState.CLOSED);
-    cleanupOperationLog();
-  }
-
-  public abstract TableSchema getResultSetSchema() throws HiveSQLException;
-
-  public abstract RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException;
-
-  public RowSet getNextRowSet() throws HiveSQLException {
-    return getNextRowSet(FetchOrientation.FETCH_NEXT, DEFAULT_FETCH_MAX_ROWS);
-  }
-
-  /**
-   * Verify if the given fetch orientation is part of the default orientation types.
-   * @param orientation
-   * @throws HiveSQLException
-   */
-  protected void validateDefaultFetchOrientation(FetchOrientation orientation)
-      throws HiveSQLException {
-    validateFetchOrientation(orientation, DEFAULT_FETCH_ORIENTATION_SET);
-  }
-
-  /**
-   * Verify if the given fetch orientation is part of the supported orientation types.
-   * @param orientation
-   * @param supportedOrientations
-   * @throws HiveSQLException
-   */
-  protected void validateFetchOrientation(FetchOrientation orientation,
-      EnumSet<FetchOrientation> supportedOrientations) throws HiveSQLException {
-    if (!supportedOrientations.contains(orientation)) {
-      throw new HiveSQLException("The fetch type " + orientation.toString() +
-          " is not supported for this resultset", "HY106");
-    }
-  }
-
-  protected HiveSQLException toSQLException(String prefix, CommandProcessorResponse response) {
-    HiveSQLException ex = new HiveSQLException(prefix + ": " + response.getErrorMessage(),
-        response.getSQLState(), response.getResponseCode());
-    if (response.getException() != null) {
-      ex.initCause(response.getException());
-    }
-    return ex;
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java
deleted file mode 100644
index 92c340a29c107..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java
+++ /dev/null
@@ -1,284 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.operation;
-
-import java.sql.SQLException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.metastore.api.Schema;
-import org.apache.hadoop.hive.ql.session.OperationLog;
-import org.apache.hive.service.AbstractService;
-import org.apache.hive.service.cli.FetchOrientation;
-import org.apache.hive.service.cli.HiveSQLException;
-import org.apache.hive.service.cli.OperationHandle;
-import org.apache.hive.service.cli.OperationState;
-import org.apache.hive.service.cli.OperationStatus;
-import org.apache.hive.service.cli.RowSet;
-import org.apache.hive.service.cli.RowSetFactory;
-import org.apache.hive.service.cli.TableSchema;
-import org.apache.hive.service.cli.session.HiveSession;
-import org.apache.log4j.Appender;
-import org.apache.log4j.Logger;
-
-/**
- * OperationManager.
- *
- */
-public class OperationManager extends AbstractService {
-  private final Log LOG = LogFactory.getLog(OperationManager.class.getName());
-
-  private final Map<OperationHandle, Operation> handleToOperation =
-      new HashMap<OperationHandle, Operation>();
-
-  public OperationManager() {
-    super(OperationManager.class.getSimpleName());
-  }
-
-  @Override
-  public synchronized void init(HiveConf hiveConf) {
-    if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_ENABLED)) {
-      initOperationLogCapture(hiveConf.getVar(
-        HiveConf.ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LEVEL));
-    } else {
-      LOG.debug("Operation level logging is turned off");
-    }
-    super.init(hiveConf);
-  }
-
-  @Override
-  public synchronized void start() {
-    super.start();
-    // TODO
-  }
-
-  @Override
-  public synchronized void stop() {
-    // TODO
-    super.stop();
-  }
-
-  private void initOperationLogCapture(String loggingMode) {
-    // Register another Appender (with the same layout) that talks to us.
-    Appender ap = new LogDivertAppender(this, OperationLog.getLoggingLevel(loggingMode));
-    Logger.getRootLogger().addAppender(ap);
-  }
-
-  public ExecuteStatementOperation newExecuteStatementOperation(HiveSession parentSession,
-      String statement, Map<String, String> confOverlay, boolean runAsync)
-          throws HiveSQLException {
-    ExecuteStatementOperation executeStatementOperation = ExecuteStatementOperation
-        .newExecuteStatementOperation(parentSession, statement, confOverlay, runAsync);
-    addOperation(executeStatementOperation);
-    return executeStatementOperation;
-  }
-
-  public GetTypeInfoOperation newGetTypeInfoOperation(HiveSession parentSession) {
-    GetTypeInfoOperation operation = new GetTypeInfoOperation(parentSession);
-    addOperation(operation);
-    return operation;
-  }
-
-  public GetCatalogsOperation newGetCatalogsOperation(HiveSession parentSession) {
-    GetCatalogsOperation operation = new GetCatalogsOperation(parentSession);
-    addOperation(operation);
-    return operation;
-  }
-
-  public GetSchemasOperation newGetSchemasOperation(HiveSession parentSession,
-      String catalogName, String schemaName) {
-    GetSchemasOperation operation = new GetSchemasOperation(parentSession, catalogName, schemaName);
-    addOperation(operation);
-    return operation;
-  }
-
-  public MetadataOperation newGetTablesOperation(HiveSession parentSession,
-      String catalogName, String schemaName, String tableName,
-      List<String> tableTypes) {
-    MetadataOperation operation =
-        new GetTablesOperation(parentSession, catalogName, schemaName, tableName, tableTypes);
-    addOperation(operation);
-    return operation;
-  }
-
-  public GetTableTypesOperation newGetTableTypesOperation(HiveSession parentSession) {
-    GetTableTypesOperation operation = new GetTableTypesOperation(parentSession);
-    addOperation(operation);
-    return operation;
-  }
-
-  public GetColumnsOperation newGetColumnsOperation(HiveSession parentSession,
-      String catalogName, String schemaName, String tableName, String columnName) {
-    GetColumnsOperation operation = new GetColumnsOperation(parentSession,
-        catalogName, schemaName, tableName, columnName);
-    addOperation(operation);
-    return operation;
-  }
-
-  public GetFunctionsOperation newGetFunctionsOperation(HiveSession parentSession,
-      String catalogName, String schemaName, String functionName) {
-    GetFunctionsOperation operation = new GetFunctionsOperation(parentSession,
-        catalogName, schemaName, functionName);
-    addOperation(operation);
-    return operation;
-  }
-
-  public Operation getOperation(OperationHandle operationHandle) throws HiveSQLException {
-    Operation operation = getOperationInternal(operationHandle);
-    if (operation == null) {
-      throw new HiveSQLException("Invalid OperationHandle: " + operationHandle);
-    }
-    return operation;
-  }
-
-  private synchronized Operation getOperationInternal(OperationHandle operationHandle) {
-    return handleToOperation.get(operationHandle);
-  }
-
-  private synchronized Operation removeTimedOutOperation(OperationHandle operationHandle) {
-    Operation operation = handleToOperation.get(operationHandle);
-    if (operation != null && operation.isTimedOut(System.currentTimeMillis())) {
-      handleToOperation.remove(operationHandle);
-      return operation;
-    }
-    return null;
-  }
-
-  private synchronized void addOperation(Operation operation) {
-    handleToOperation.put(operation.getHandle(), operation);
-  }
-
-  private synchronized Operation removeOperation(OperationHandle opHandle) {
-    return handleToOperation.remove(opHandle);
-  }
-
-  public OperationStatus getOperationStatus(OperationHandle opHandle)
-      throws HiveSQLException {
-    return getOperation(opHandle).getStatus();
-  }
-
-  public void cancelOperation(OperationHandle opHandle) throws HiveSQLException {
-    Operation operation = getOperation(opHandle);
-    OperationState opState = operation.getStatus().getState();
-    if (opState == OperationState.CANCELED ||
-        opState == OperationState.CLOSED ||
-        opState == OperationState.FINISHED ||
-        opState == OperationState.ERROR ||
-        opState == OperationState.UNKNOWN) {
-      // Cancel should be a no-op in either cases
-      LOG.debug(opHandle + ": Operation is already aborted in state - " + opState);
-    }
-    else {
-      LOG.debug(opHandle + ": Attempting to cancel from state - " + opState);
-      operation.cancel();
-    }
-  }
-
-  public void closeOperation(OperationHandle opHandle) throws HiveSQLException {
-    Operation operation = removeOperation(opHandle);
-    if (operation == null) {
-      throw new HiveSQLException("Operation does not exist!");
-    }
-    operation.close();
-  }
-
-  public TableSchema getOperationResultSetSchema(OperationHandle opHandle)
-      throws HiveSQLException {
-    return getOperation(opHandle).getResultSetSchema();
-  }
-
-  public RowSet getOperationNextRowSet(OperationHandle opHandle)
-      throws HiveSQLException {
-    return getOperation(opHandle).getNextRowSet();
-  }
-
-  public RowSet getOperationNextRowSet(OperationHandle opHandle,
-      FetchOrientation orientation, long maxRows)
-          throws HiveSQLException {
-    return getOperation(opHandle).getNextRowSet(orientation, maxRows);
-  }
-
-  public RowSet getOperationLogRowSet(OperationHandle opHandle,
-      FetchOrientation orientation, long maxRows)
-          throws HiveSQLException {
-    // get the OperationLog object from the operation
-    OperationLog operationLog = getOperation(opHandle).getOperationLog();
-    if (operationLog == null) {
-      throw new HiveSQLException("Couldn't find log associated with operation handle: " + opHandle);
-    }
-
-    // read logs
-    List<String> logs;
-    try {
-      logs = operationLog.readOperationLog(isFetchFirst(orientation), maxRows);
-    } catch (SQLException e) {
-      throw new HiveSQLException(e.getMessage(), e.getCause());
-    }
-
-
-    // convert logs to RowSet
-    TableSchema tableSchema = new TableSchema(getLogSchema());
-    RowSet rowSet = RowSetFactory.create(tableSchema, getOperation(opHandle).getProtocolVersion());
-    for (String log : logs) {
-      rowSet.addRow(new String[] {log});
-    }
-
-    return rowSet;
-  }
-
-  private boolean isFetchFirst(FetchOrientation fetchOrientation) {
-    //TODO: Since OperationLog is moved to package o.a.h.h.ql.session,
-    // we may add a Enum there and map FetchOrientation to it.
-    if (fetchOrientation.equals(FetchOrientation.FETCH_FIRST)) {
-      return true;
-    }
-    return false;
-  }
-
-  private Schema getLogSchema() {
-    Schema schema = new Schema();
-    FieldSchema fieldSchema = new FieldSchema();
-    fieldSchema.setName("operation_log");
-    fieldSchema.setType("string");
-    schema.addToFieldSchemas(fieldSchema);
-    return schema;
-  }
-
-  public OperationLog getOperationLogByThread() {
-    return OperationLog.getCurrentOperationLog();
-  }
-
-  public List<Operation> removeExpiredOperations(OperationHandle[] handles) {
-    List<Operation> removed = new ArrayList<Operation>();
-    for (OperationHandle handle : handles) {
-      Operation operation = removeTimedOutOperation(handle);
-      if (operation != null) {
-        LOG.warn("Operation " + handle + " is timed-out and will be closed");
-        removed.add(operation);
-      }
-    }
-    return removed;
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
deleted file mode 100644
index c7726f1fac07a..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
+++ /dev/null
@@ -1,456 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.operation;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.nio.charset.StandardCharsets;
-import java.security.PrivilegedExceptionAction;
-import java.sql.SQLException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.concurrent.Future;
-import java.util.concurrent.RejectedExecutionException;
-
-import org.apache.commons.codec.binary.Base64;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.metastore.api.Schema;
-import org.apache.hadoop.hive.ql.CommandNeedRetryException;
-import org.apache.hadoop.hive.ql.Driver;
-import org.apache.hadoop.hive.ql.exec.ExplainTask;
-import org.apache.hadoop.hive.ql.exec.Task;
-import org.apache.hadoop.hive.ql.metadata.Hive;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.parse.VariableSubstitution;
-import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
-import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hadoop.hive.serde.serdeConstants;
-import org.apache.hadoop.hive.serde2.SerDe;
-import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.SerDeUtils;
-import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.shims.Utils;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hive.service.cli.FetchOrientation;
-import org.apache.hive.service.cli.HiveSQLException;
-import org.apache.hive.service.cli.OperationState;
-import org.apache.hive.service.cli.RowSet;
-import org.apache.hive.service.cli.RowSetFactory;
-import org.apache.hive.service.cli.TableSchema;
-import org.apache.hive.service.cli.session.HiveSession;
-import org.apache.hive.service.server.ThreadWithGarbageCleanup;
-
-/**
- * SQLOperation.
- *
- */
-public class SQLOperation extends ExecuteStatementOperation {
-
-  private Driver driver = null;
-  private CommandProcessorResponse response;
-  private TableSchema resultSchema = null;
-  private Schema mResultSchema = null;
-  private SerDe serde = null;
-  private boolean fetchStarted = false;
-
-  public SQLOperation(HiveSession parentSession, String statement, Map<String,
-      String> confOverlay, boolean runInBackground) {
-    // TODO: call setRemoteUser in ExecuteStatementOperation or higher.
-    super(parentSession, statement, confOverlay, runInBackground);
-  }
-
-  /***
-   * Compile the query and extract metadata
-   * @param sqlOperationConf
-   * @throws HiveSQLException
-   */
-  public void prepare(HiveConf sqlOperationConf) throws HiveSQLException {
-    setState(OperationState.RUNNING);
-
-    try {
-      driver = new Driver(sqlOperationConf, getParentSession().getUserName());
-
-      // set the operation handle information in Driver, so that thrift API users
-      // can use the operation handle they receive, to lookup query information in
-      // Yarn ATS
-      String guid64 = Base64.encodeBase64URLSafeString(getHandle().getHandleIdentifier()
-          .toTHandleIdentifier().getGuid()).trim();
-      driver.setOperationId(guid64);
-
-      // In Hive server mode, we are not able to retry in the FetchTask
-      // case, when calling fetch queries since execute() has returned.
-      // For now, we disable the test attempts.
-      driver.setTryCount(Integer.MAX_VALUE);
-
-      String subStatement = new VariableSubstitution().substitute(sqlOperationConf, statement);
-      response = driver.compileAndRespond(subStatement);
-      if (0 != response.getResponseCode()) {
-        throw toSQLException("Error while compiling statement", response);
-      }
-
-      mResultSchema = driver.getSchema();
-
-      // hasResultSet should be true only if the query has a FetchTask
-      // "explain" is an exception for now
-      if(driver.getPlan().getFetchTask() != null) {
-        //Schema has to be set
-        if (mResultSchema == null || !mResultSchema.isSetFieldSchemas()) {
-          throw new HiveSQLException("Error compiling query: Schema and FieldSchema " +
-              "should be set when query plan has a FetchTask");
-        }
-        resultSchema = new TableSchema(mResultSchema);
-        setHasResultSet(true);
-      } else {
-        setHasResultSet(false);
-      }
-      // Set hasResultSet true if the plan has ExplainTask
-      // TODO explain should use a FetchTask for reading
-      for (Task<? extends Serializable> task: driver.getPlan().getRootTasks()) {
-        if (task.getClass() == ExplainTask.class) {
-          resultSchema = new TableSchema(mResultSchema);
-          setHasResultSet(true);
-          break;
-        }
-      }
-    } catch (HiveSQLException e) {
-      setState(OperationState.ERROR);
-      throw e;
-    } catch (Exception e) {
-      setState(OperationState.ERROR);
-      throw new HiveSQLException("Error running query: " + e.toString(), e);
-    }
-  }
-
-  private void runQuery(HiveConf sqlOperationConf) throws HiveSQLException {
-    try {
-      // In Hive server mode, we are not able to retry in the FetchTask
-      // case, when calling fetch queries since execute() has returned.
-      // For now, we disable the test attempts.
-      driver.setTryCount(Integer.MAX_VALUE);
-      response = driver.run();
-      if (0 != response.getResponseCode()) {
-        throw toSQLException("Error while processing statement", response);
-      }
-    } catch (HiveSQLException e) {
-      // If the operation was cancelled by another thread,
-      // Driver#run will return a non-zero response code.
-      // We will simply return if the operation state is CANCELED,
-      // otherwise throw an exception
-      if (getStatus().getState() == OperationState.CANCELED) {
-        return;
-      }
-      else {
-        setState(OperationState.ERROR);
-        throw e;
-      }
-    } catch (Exception e) {
-      setState(OperationState.ERROR);
-      throw new HiveSQLException("Error running query: " + e.toString(), e);
-    }
-    setState(OperationState.FINISHED);
-  }
-
-  @Override
-  public void runInternal() throws HiveSQLException {
-    setState(OperationState.PENDING);
-    final HiveConf opConfig = getConfigForOperation();
-    prepare(opConfig);
-    if (!shouldRunAsync()) {
-      runQuery(opConfig);
-    } else {
-      // We'll pass ThreadLocals in the background thread from the foreground (handler) thread
-      final SessionState parentSessionState = SessionState.get();
-      // ThreadLocal Hive object needs to be set in background thread.
-      // The metastore client in Hive is associated with right user.
-      final Hive parentHive = getSessionHive();
-      // Current UGI will get used by metastore when metsatore is in embedded mode
-      // So this needs to get passed to the new background thread
-      final UserGroupInformation currentUGI = getCurrentUGI(opConfig);
-      // Runnable impl to call runInternal asynchronously,
-      // from a different thread
-      Runnable backgroundOperation = new Runnable() {
-        @Override
-        public void run() {
-          PrivilegedExceptionAction<Object> doAsAction = new PrivilegedExceptionAction<Object>() {
-            @Override
-            public Object run() throws HiveSQLException {
-              Hive.set(parentHive);
-              SessionState.setCurrentSessionState(parentSessionState);
-              // Set current OperationLog in this async thread for keeping on saving query log.
-              registerCurrentOperationLog();
-              try {
-                runQuery(opConfig);
-              } catch (HiveSQLException e) {
-                setOperationException(e);
-                LOG.error("Error running hive query: ", e);
-              } finally {
-                unregisterOperationLog();
-              }
-              return null;
-            }
-          };
-
-          try {
-            currentUGI.doAs(doAsAction);
-          } catch (Exception e) {
-            setOperationException(new HiveSQLException(e));
-            LOG.error("Error running hive query as user : " + currentUGI.getShortUserName(), e);
-          }
-          finally {
-            /**
-             * We'll cache the ThreadLocal RawStore object for this background thread for an orderly cleanup
-             * when this thread is garbage collected later.
-             * @see org.apache.hive.service.server.ThreadWithGarbageCleanup#finalize()
-             */
-            if (ThreadWithGarbageCleanup.currentThread() instanceof ThreadWithGarbageCleanup) {
-              ThreadWithGarbageCleanup currentThread =
-                  (ThreadWithGarbageCleanup) ThreadWithGarbageCleanup.currentThread();
-              currentThread.cacheThreadLocalRawStore();
-            }
-          }
-        }
-      };
-      try {
-        // This submit blocks if no background threads are available to run this operation
-        Future<?> backgroundHandle =
-            getParentSession().getSessionManager().submitBackgroundOperation(backgroundOperation);
-        setBackgroundHandle(backgroundHandle);
-      } catch (RejectedExecutionException rejected) {
-        setState(OperationState.ERROR);
-        throw new HiveSQLException("The background threadpool cannot accept" +
-            " new task for execution, please retry the operation", rejected);
-      }
-    }
-  }
-
-  /**
-   * Returns the current UGI on the stack
-   * @param opConfig
-   * @return UserGroupInformation
-   * @throws HiveSQLException
-   */
-  private UserGroupInformation getCurrentUGI(HiveConf opConfig) throws HiveSQLException {
-    try {
-      return Utils.getUGI();
-    } catch (Exception e) {
-      throw new HiveSQLException("Unable to get current user", e);
-    }
-  }
-
-  /**
-   * Returns the ThreadLocal Hive for the current thread
-   * @return Hive
-   * @throws HiveSQLException
-   */
-  private Hive getSessionHive() throws HiveSQLException {
-    try {
-      return Hive.get();
-    } catch (HiveException e) {
-      throw new HiveSQLException("Failed to get ThreadLocal Hive object", e);
-    }
-  }
-
-  private void cleanup(OperationState state) throws HiveSQLException {
-    setState(state);
-    if (shouldRunAsync()) {
-      Future<?> backgroundHandle = getBackgroundHandle();
-      if (backgroundHandle != null) {
-        backgroundHandle.cancel(true);
-      }
-    }
-    if (driver != null) {
-      driver.close();
-      driver.destroy();
-    }
-    driver = null;
-
-    SessionState ss = SessionState.get();
-    if (ss.getTmpOutputFile() != null) {
-      ss.getTmpOutputFile().delete();
-    }
-  }
-
-  @Override
-  public void cancel() throws HiveSQLException {
-    cleanup(OperationState.CANCELED);
-  }
-
-  @Override
-  public void close() throws HiveSQLException {
-    cleanup(OperationState.CLOSED);
-    cleanupOperationLog();
-  }
-
-  @Override
-  public TableSchema getResultSetSchema() throws HiveSQLException {
-    assertState(OperationState.FINISHED);
-    if (resultSchema == null) {
-      resultSchema = new TableSchema(driver.getSchema());
-    }
-    return resultSchema;
-  }
-
-  private final transient List<Object> convey = new ArrayList<Object>();
-
-  @Override
-  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
-    validateDefaultFetchOrientation(orientation);
-    assertState(OperationState.FINISHED);
-
-    RowSet rowSet = RowSetFactory.create(resultSchema, getProtocolVersion());
-
-    try {
-      /* if client is requesting fetch-from-start and its not the first time reading from this operation
-       * then reset the fetch position to beginning
-       */
-      if (orientation.equals(FetchOrientation.FETCH_FIRST) && fetchStarted) {
-        driver.resetFetch();
-      }
-      fetchStarted = true;
-      driver.setMaxRows((int) maxRows);
-      if (driver.getResults(convey)) {
-        return decode(convey, rowSet);
-      }
-      return rowSet;
-    } catch (IOException e) {
-      throw new HiveSQLException(e);
-    } catch (CommandNeedRetryException e) {
-      throw new HiveSQLException(e);
-    } catch (Exception e) {
-      throw new HiveSQLException(e);
-    } finally {
-      convey.clear();
-    }
-  }
-
-  private RowSet decode(List<Object> rows, RowSet rowSet) throws Exception {
-    if (driver.isFetchingTable()) {
-      return prepareFromRow(rows, rowSet);
-    }
-    return decodeFromString(rows, rowSet);
-  }
-
-  // already encoded to thrift-able object in ThriftFormatter
-  private RowSet prepareFromRow(List<Object> rows, RowSet rowSet) throws Exception {
-    for (Object row : rows) {
-      rowSet.addRow((Object[]) row);
-    }
-    return rowSet;
-  }
-
-  private RowSet decodeFromString(List<Object> rows, RowSet rowSet)
-      throws SQLException, SerDeException {
-    getSerDe();
-    StructObjectInspector soi = (StructObjectInspector) serde.getObjectInspector();
-    List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs();
-
-    Object[] deserializedFields = new Object[fieldRefs.size()];
-    Object rowObj;
-    ObjectInspector fieldOI;
-
-    int protocol = getProtocolVersion().getValue();
-    for (Object rowString : rows) {
-      rowObj = serde.deserialize(new BytesWritable(((String)rowString).getBytes(StandardCharsets.UTF_8)));
-      for (int i = 0; i < fieldRefs.size(); i++) {
-        StructField fieldRef = fieldRefs.get(i);
-        fieldOI = fieldRef.getFieldObjectInspector();
-        Object fieldData = soi.getStructFieldData(rowObj, fieldRef);
-        deserializedFields[i] = SerDeUtils.toThriftPayload(fieldData, fieldOI, protocol);
-      }
-      rowSet.addRow(deserializedFields);
-    }
-    return rowSet;
-  }
-
-  private SerDe getSerDe() throws SQLException {
-    if (serde != null) {
-      return serde;
-    }
-    try {
-      List<FieldSchema> fieldSchemas = mResultSchema.getFieldSchemas();
-      StringBuilder namesSb = new StringBuilder();
-      StringBuilder typesSb = new StringBuilder();
-
-      if (fieldSchemas != null && !fieldSchemas.isEmpty()) {
-        for (int pos = 0; pos < fieldSchemas.size(); pos++) {
-          if (pos != 0) {
-            namesSb.append(",");
-            typesSb.append(",");
-          }
-          namesSb.append(fieldSchemas.get(pos).getName());
-          typesSb.append(fieldSchemas.get(pos).getType());
-        }
-      }
-      String names = namesSb.toString();
-      String types = typesSb.toString();
-
-      serde = new LazySimpleSerDe();
-      Properties props = new Properties();
-      if (names.length() > 0) {
-        LOG.debug("Column names: " + names);
-        props.setProperty(serdeConstants.LIST_COLUMNS, names);
-      }
-      if (types.length() > 0) {
-        LOG.debug("Column types: " + types);
-        props.setProperty(serdeConstants.LIST_COLUMN_TYPES, types);
-      }
-      SerDeUtils.initializeSerDe(serde, new HiveConf(), props, null);
-
-    } catch (Exception ex) {
-      ex.printStackTrace();
-      throw new SQLException("Could not create ResultSet: " + ex.getMessage(), ex);
-    }
-    return serde;
-  }
-
-  /**
-   * If there are query specific settings to overlay, then create a copy of config
-   * There are two cases we need to clone the session config that's being passed to hive driver
-   * 1. Async query -
-   *    If the client changes a config setting, that shouldn't reflect in the execution already underway
-   * 2. confOverlay -
-   *    The query specific settings should only be applied to the query config and not session
-   * @return new configuration
-   * @throws HiveSQLException
-   */
-  private HiveConf getConfigForOperation() throws HiveSQLException {
-    HiveConf sqlOperationConf = getParentSession().getHiveConf();
-    if (!getConfOverlay().isEmpty() || shouldRunAsync()) {
-      // clone the parent session config for this query
-      sqlOperationConf = new HiveConf(sqlOperationConf);
-
-      // apply overlay query specific settings, if any
-      for (Map.Entry<String, String> confEntry : getConfOverlay().entrySet()) {
-        try {
-          sqlOperationConf.verifyAndSet(confEntry.getKey(), confEntry.getValue());
-        } catch (IllegalArgumentException e) {
-          throw new HiveSQLException("Error applying statement specific settings", e);
-        }
-      }
-    }
-    return sqlOperationConf;
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java
deleted file mode 100644
index e59d19ea6be42..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/operation/TableTypeMapping.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.operation;
-
-import java.util.Set;
-
-
-public interface TableTypeMapping {
-  /**
-   * Map client's table type name to hive's table type
-   * @param clientTypeName
-   * @return
-   */
-  String mapToHiveType(String clientTypeName);
-
-  /**
-   * Map hive's table type name to client's table type
-   * @param hiveTypeName
-   * @return
-   */
-  String mapToClientType(String hiveTypeName);
-
-  /**
-   * Get all the table types of this mapping
-   * @return
-   */
-  Set<String> getTableTypeNames();
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSession.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSession.java
deleted file mode 100644
index 65f9b298bf4f6..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSession.java
+++ /dev/null
@@ -1,156 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.session;
-
-import java.util.List;
-import java.util.Map;
-
-import org.apache.hadoop.hive.metastore.IMetaStoreClient;
-import org.apache.hive.service.auth.HiveAuthFactory;
-import org.apache.hive.service.cli.*;
-
-public interface HiveSession extends HiveSessionBase {
-
-  void open(Map<String, String> sessionConfMap) throws Exception;
-
-  IMetaStoreClient getMetaStoreClient() throws HiveSQLException;
-
-  /**
-   * getInfo operation handler
-   * @param getInfoType
-   * @return
-   * @throws HiveSQLException
-   */
-  GetInfoValue getInfo(GetInfoType getInfoType) throws HiveSQLException;
-
-  /**
-   * execute operation handler
-   * @param statement
-   * @param confOverlay
-   * @return
-   * @throws HiveSQLException
-   */
-  OperationHandle executeStatement(String statement,
-      Map<String, String> confOverlay) throws HiveSQLException;
-
-  /**
-   * execute operation handler
-   * @param statement
-   * @param confOverlay
-   * @return
-   * @throws HiveSQLException
-   */
-  OperationHandle executeStatementAsync(String statement,
-      Map<String, String> confOverlay) throws HiveSQLException;
-
-  /**
-   * getTypeInfo operation handler
-   * @return
-   * @throws HiveSQLException
-   */
-  OperationHandle getTypeInfo() throws HiveSQLException;
-
-  /**
-   * getCatalogs operation handler
-   * @return
-   * @throws HiveSQLException
-   */
-  OperationHandle getCatalogs() throws HiveSQLException;
-
-  /**
-   * getSchemas operation handler
-   * @param catalogName
-   * @param schemaName
-   * @return
-   * @throws HiveSQLException
-   */
-  OperationHandle getSchemas(String catalogName, String schemaName)
-      throws HiveSQLException;
-
-  /**
-   * getTables operation handler
-   * @param catalogName
-   * @param schemaName
-   * @param tableName
-   * @param tableTypes
-   * @return
-   * @throws HiveSQLException
-   */
-  OperationHandle getTables(String catalogName, String schemaName,
-      String tableName, List<String> tableTypes) throws HiveSQLException;
-
-  /**
-   * getTableTypes operation handler
-   * @return
-   * @throws HiveSQLException
-   */
-  OperationHandle getTableTypes() throws HiveSQLException ;
-
-  /**
-   * getColumns operation handler
-   * @param catalogName
-   * @param schemaName
-   * @param tableName
-   * @param columnName
-   * @return
-   * @throws HiveSQLException
-   */
-  OperationHandle getColumns(String catalogName, String schemaName,
-      String tableName, String columnName)  throws HiveSQLException;
-
-  /**
-   * getFunctions operation handler
-   * @param catalogName
-   * @param schemaName
-   * @param functionName
-   * @return
-   * @throws HiveSQLException
-   */
-  OperationHandle getFunctions(String catalogName, String schemaName,
-      String functionName) throws HiveSQLException;
-
-  /**
-   * close the session
-   * @throws HiveSQLException
-   */
-  void close() throws HiveSQLException;
-
-  void cancelOperation(OperationHandle opHandle) throws HiveSQLException;
-
-  void closeOperation(OperationHandle opHandle) throws HiveSQLException;
-
-  TableSchema getResultSetMetadata(OperationHandle opHandle)
-      throws HiveSQLException;
-
-  RowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation,
-      long maxRows, FetchType fetchType) throws HiveSQLException;
-
-  String getDelegationToken(HiveAuthFactory authFactory, String owner,
-      String renewer) throws HiveSQLException;
-
-  void cancelDelegationToken(HiveAuthFactory authFactory, String tokenStr)
-      throws HiveSQLException;
-
-  void renewDelegationToken(HiveAuthFactory authFactory, String tokenStr)
-      throws HiveSQLException;
-
-  void closeExpiredOperations();
-
-  long getNoOperationTime();
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java
deleted file mode 100644
index b72c18b2b2135..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionBase.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.session;
-
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hive.service.cli.SessionHandle;
-import org.apache.hive.service.cli.operation.OperationManager;
-import org.apache.hive.service.cli.thrift.TProtocolVersion;
-
-import java.io.File;
-
-/**
- * Methods that don't need to be executed under a doAs
- * context are here. Rest of them in HiveSession interface
- */
-public interface HiveSessionBase {
-
-  TProtocolVersion getProtocolVersion();
-
-  /**
-   * Set the session manager for the session
-   * @param sessionManager
-   */
-  void setSessionManager(SessionManager sessionManager);
-
-  /**
-   * Get the session manager for the session
-   */
-  SessionManager getSessionManager();
-
-  /**
-   * Set operation manager for the session
-   * @param operationManager
-   */
-  void setOperationManager(OperationManager operationManager);
-
-  /**
-   * Check whether operation logging is enabled and session dir is created successfully
-   */
-  boolean isOperationLogEnabled();
-
-  /**
-   * Get the session dir, which is the parent dir of operation logs
-   * @return a file representing the parent directory of operation logs
-   */
-  File getOperationLogSessionDir();
-
-  /**
-   * Set the session dir, which is the parent dir of operation logs
-   * @param operationLogRootDir the parent dir of the session dir
-   */
-  void setOperationLogSessionDir(File operationLogRootDir);
-
-  SessionHandle getSessionHandle();
-
-  String getUsername();
-
-  String getPassword();
-
-  HiveConf getHiveConf();
-
-  SessionState getSessionState();
-
-  String getUserName();
-
-  void setUserName(String userName);
-
-  String getIpAddress();
-
-  void setIpAddress(String ipAddress);
-
-  long getLastAccessTime();
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
deleted file mode 100644
index e3fb54d9f47e9..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
+++ /dev/null
@@ -1,842 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.session;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.commons.lang3.StringUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.common.cli.HiveFileProcessor;
-import org.apache.hadoop.hive.common.cli.IHiveFileProcessor;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
-import org.apache.hadoop.hive.metastore.IMetaStoreClient;
-import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.ql.exec.FetchFormatter;
-import org.apache.hadoop.hive.ql.exec.ListSinkOperator;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.history.HiveHistory;
-import org.apache.hadoop.hive.ql.metadata.Hive;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.parse.VariableSubstitution;
-import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hadoop.hive.shims.ShimLoader;
-import org.apache.hive.common.util.HiveVersionInfo;
-import org.apache.hive.service.auth.HiveAuthFactory;
-import org.apache.hive.service.cli.FetchOrientation;
-import org.apache.hive.service.cli.FetchType;
-import org.apache.hive.service.cli.GetInfoType;
-import org.apache.hive.service.cli.GetInfoValue;
-import org.apache.hive.service.cli.HiveSQLException;
-import org.apache.hive.service.cli.OperationHandle;
-import org.apache.hive.service.cli.RowSet;
-import org.apache.hive.service.cli.SessionHandle;
-import org.apache.hive.service.cli.TableSchema;
-import org.apache.hive.service.cli.operation.ExecuteStatementOperation;
-import org.apache.hive.service.cli.operation.GetCatalogsOperation;
-import org.apache.hive.service.cli.operation.GetColumnsOperation;
-import org.apache.hive.service.cli.operation.GetFunctionsOperation;
-import org.apache.hive.service.cli.operation.GetSchemasOperation;
-import org.apache.hive.service.cli.operation.GetTableTypesOperation;
-import org.apache.hive.service.cli.operation.GetTypeInfoOperation;
-import org.apache.hive.service.cli.operation.MetadataOperation;
-import org.apache.hive.service.cli.operation.Operation;
-import org.apache.hive.service.cli.operation.OperationManager;
-import org.apache.hive.service.cli.thrift.TProtocolVersion;
-import org.apache.hive.service.server.ThreadWithGarbageCleanup;
-
-import static org.apache.hadoop.hive.conf.SystemVariables.ENV_PREFIX;
-import static org.apache.hadoop.hive.conf.SystemVariables.HIVECONF_PREFIX;
-import static org.apache.hadoop.hive.conf.SystemVariables.HIVEVAR_PREFIX;
-import static org.apache.hadoop.hive.conf.SystemVariables.METACONF_PREFIX;
-import static org.apache.hadoop.hive.conf.SystemVariables.SYSTEM_PREFIX;
-
-/**
- * HiveSession
- *
- */
-public class HiveSessionImpl implements HiveSession {
-  private final SessionHandle sessionHandle;
-  private String username;
-  private final String password;
-  private HiveConf hiveConf;
-  private SessionState sessionState;
-  private String ipAddress;
-  private static final String FETCH_WORK_SERDE_CLASS =
-      "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe";
-  private static final Log LOG = LogFactory.getLog(HiveSessionImpl.class);
-  private SessionManager sessionManager;
-  private OperationManager operationManager;
-  private final Set<OperationHandle> opHandleSet = new HashSet<OperationHandle>();
-  private boolean isOperationLogEnabled;
-  private File sessionLogDir;
-  private volatile long lastAccessTime;
-  private volatile long lastIdleTime;
-
-  public HiveSessionImpl(TProtocolVersion protocol, String username, String password,
-      HiveConf serverhiveConf, String ipAddress) {
-    this.username = username;
-    this.password = password;
-    this.sessionHandle = new SessionHandle(protocol);
-    this.hiveConf = new HiveConf(serverhiveConf);
-    this.ipAddress = ipAddress;
-
-    try {
-      // In non-impersonation mode, map scheduler queue to current user
-      // if fair scheduler is configured.
-      if (! hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_ENABLE_DOAS) &&
-        hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_MAP_FAIR_SCHEDULER_QUEUE)) {
-        ShimLoader.getHadoopShims().refreshDefaultQueue(hiveConf, username);
-      }
-    } catch (IOException e) {
-      LOG.warn("Error setting scheduler queue: " + e, e);
-    }
-    // Set an explicit session name to control the download directory name
-    hiveConf.set(ConfVars.HIVESESSIONID.varname,
-        sessionHandle.getHandleIdentifier().toString());
-    // Use thrift transportable formatter
-    hiveConf.set(ListSinkOperator.OUTPUT_FORMATTER,
-        FetchFormatter.ThriftFormatter.class.getName());
-    hiveConf.setInt(ListSinkOperator.OUTPUT_PROTOCOL, protocol.getValue());
-  }
-
-  @Override
-  /**
-   * Opens a new HiveServer2 session for the client connection.
-   * Creates a new SessionState object that will be associated with this HiveServer2 session.
-   * When the server executes multiple queries in the same session,
-   * this SessionState object is reused across multiple queries.
-   * Note that if doAs is true, this call goes through a proxy object,
-   * which wraps the method logic in a UserGroupInformation#doAs.
-   * That's why it is important to create SessionState here rather than in the constructor.
-   */
-  public void open(Map<String, String> sessionConfMap) throws HiveSQLException {
-    sessionState = new SessionState(hiveConf, username);
-    sessionState.setUserIpAddress(ipAddress);
-    sessionState.setIsHiveServerQuery(true);
-    SessionState.start(sessionState);
-    try {
-      sessionState.reloadAuxJars();
-    } catch (IOException e) {
-      String msg = "Failed to load reloadable jar file path: " + e;
-      LOG.error(msg, e);
-      throw new HiveSQLException(msg, e);
-    }
-    // Process global init file: .hiverc
-    processGlobalInitFile();
-    if (sessionConfMap != null) {
-      configureSession(sessionConfMap);
-    }
-    lastAccessTime = System.currentTimeMillis();
-    lastIdleTime = lastAccessTime;
-  }
-
-  /**
-   * It is used for processing hiverc file from HiveServer2 side.
-   */
-  private class GlobalHivercFileProcessor extends HiveFileProcessor {
-    @Override
-    protected BufferedReader loadFile(String fileName) throws IOException {
-      FileInputStream initStream = null;
-      BufferedReader bufferedReader = null;
-      initStream = new FileInputStream(fileName);
-      bufferedReader = new BufferedReader(new InputStreamReader(initStream));
-      return bufferedReader;
-    }
-
-    @Override
-    protected int processCmd(String cmd) {
-      int rc = 0;
-      String cmd_trimed = cmd.trim();
-      try {
-        executeStatementInternal(cmd_trimed, null, false);
-      } catch (HiveSQLException e) {
-        rc = -1;
-        LOG.warn("Failed to execute HQL command in global .hiverc file.", e);
-      }
-      return rc;
-    }
-  }
-
-  private void processGlobalInitFile() {
-    IHiveFileProcessor processor = new GlobalHivercFileProcessor();
-
-    try {
-      String hiverc = hiveConf.getVar(ConfVars.HIVE_SERVER2_GLOBAL_INIT_FILE_LOCATION);
-      if (hiverc != null) {
-        File hivercFile = new File(hiverc);
-        if (hivercFile.isDirectory()) {
-          hivercFile = new File(hivercFile, SessionManager.HIVERCFILE);
-        }
-        if (hivercFile.isFile()) {
-          LOG.info("Running global init file: " + hivercFile);
-          int rc = processor.processFile(hivercFile.getAbsolutePath());
-          if (rc != 0) {
-            LOG.error("Failed on initializing global .hiverc file");
-          }
-        } else {
-          LOG.debug("Global init file " + hivercFile + " does not exist");
-        }
-      }
-    } catch (IOException e) {
-      LOG.warn("Failed on initializing global .hiverc file", e);
-    }
-  }
-
-  private void configureSession(Map<String, String> sessionConfMap) throws HiveSQLException {
-    SessionState.setCurrentSessionState(sessionState);
-    for (Map.Entry<String, String> entry : sessionConfMap.entrySet()) {
-      String key = entry.getKey();
-      if (key.startsWith("set:")) {
-        try {
-          setVariable(key.substring(4), entry.getValue());
-        } catch (Exception e) {
-          throw new HiveSQLException(e);
-        }
-      } else if (key.startsWith("use:")) {
-        SessionState.get().setCurrentDatabase(entry.getValue());
-      } else {
-        hiveConf.verifyAndSet(key, entry.getValue());
-      }
-    }
-  }
-
-  // Copy from org.apache.hadoop.hive.ql.processors.SetProcessor, only change:
-  // setConf(varname, propName, varvalue, true) when varname.startsWith(HIVECONF_PREFIX)
-  public static int setVariable(String varname, String varvalue) throws Exception {
-    SessionState ss = SessionState.get();
-    if (varvalue.contains("\n")){
-      ss.err.println("Warning: Value had a \\n character in it.");
-    }
-    varname = varname.trim();
-    if (varname.startsWith(ENV_PREFIX)){
-      ss.err.println("env:* variables can not be set.");
-      return 1;
-    } else if (varname.startsWith(SYSTEM_PREFIX)){
-      String propName = varname.substring(SYSTEM_PREFIX.length());
-      System.getProperties().setProperty(propName,
-              new VariableSubstitution().substitute(ss.getConf(),varvalue));
-    } else if (varname.startsWith(HIVECONF_PREFIX)){
-      String propName = varname.substring(HIVECONF_PREFIX.length());
-      setConf(varname, propName, varvalue, true);
-    } else if (varname.startsWith(HIVEVAR_PREFIX)) {
-      String propName = varname.substring(HIVEVAR_PREFIX.length());
-      ss.getHiveVariables().put(propName,
-              new VariableSubstitution().substitute(ss.getConf(),varvalue));
-    } else if (varname.startsWith(METACONF_PREFIX)) {
-      String propName = varname.substring(METACONF_PREFIX.length());
-      Hive hive = Hive.get(ss.getConf());
-      hive.setMetaConf(propName, new VariableSubstitution().substitute(ss.getConf(), varvalue));
-    } else {
-      setConf(varname, varname, varvalue, true);
-    }
-    return 0;
-  }
-
-  // returns non-null string for validation fail
-  private static void setConf(String varname, String key, String varvalue, boolean register)
-          throws IllegalArgumentException {
-    HiveConf conf = SessionState.get().getConf();
-    String value = new VariableSubstitution().substitute(conf, varvalue);
-    if (conf.getBoolVar(HiveConf.ConfVars.HIVECONFVALIDATION)) {
-      HiveConf.ConfVars confVars = HiveConf.getConfVars(key);
-      if (confVars != null) {
-        if (!confVars.isType(value)) {
-          StringBuilder message = new StringBuilder();
-          message.append("'SET ").append(varname).append('=').append(varvalue);
-          message.append("' FAILED because ").append(key).append(" expects ");
-          message.append(confVars.typeString()).append(" type value.");
-          throw new IllegalArgumentException(message.toString());
-        }
-        String fail = confVars.validate(value);
-        if (fail != null) {
-          StringBuilder message = new StringBuilder();
-          message.append("'SET ").append(varname).append('=').append(varvalue);
-          message.append("' FAILED in validation : ").append(fail).append('.');
-          throw new IllegalArgumentException(message.toString());
-        }
-      } else if (key.startsWith("hive.")) {
-        throw new IllegalArgumentException("hive configuration " + key + " does not exists.");
-      }
-    }
-    conf.verifyAndSet(key, value);
-    if (register) {
-      SessionState.get().getOverriddenConfigurations().put(key, value);
-    }
-  }
-
-  @Override
-  public void setOperationLogSessionDir(File operationLogRootDir) {
-    if (!operationLogRootDir.exists()) {
-      LOG.warn("The operation log root directory is removed, recreating: " +
-          operationLogRootDir.getAbsolutePath());
-      if (!operationLogRootDir.mkdirs()) {
-        LOG.warn("Unable to create operation log root directory: " +
-            operationLogRootDir.getAbsolutePath());
-      }
-    }
-    if (!operationLogRootDir.canWrite()) {
-      LOG.warn("The operation log root directory is not writable: " +
-          operationLogRootDir.getAbsolutePath());
-    }
-    sessionLogDir = new File(operationLogRootDir, sessionHandle.getHandleIdentifier().toString());
-    isOperationLogEnabled = true;
-    if (!sessionLogDir.exists()) {
-      if (!sessionLogDir.mkdir()) {
-        LOG.warn("Unable to create operation log session directory: " +
-            sessionLogDir.getAbsolutePath());
-        isOperationLogEnabled = false;
-      }
-    }
-    if (isOperationLogEnabled) {
-      LOG.info("Operation log session directory is created: " + sessionLogDir.getAbsolutePath());
-    }
-  }
-
-  @Override
-  public boolean isOperationLogEnabled() {
-    return isOperationLogEnabled;
-  }
-
-  @Override
-  public File getOperationLogSessionDir() {
-    return sessionLogDir;
-  }
-
-  @Override
-  public TProtocolVersion getProtocolVersion() {
-    return sessionHandle.getProtocolVersion();
-  }
-
-  @Override
-  public SessionManager getSessionManager() {
-    return sessionManager;
-  }
-
-  @Override
-  public void setSessionManager(SessionManager sessionManager) {
-    this.sessionManager = sessionManager;
-  }
-
-  private OperationManager getOperationManager() {
-    return operationManager;
-  }
-
-  @Override
-  public void setOperationManager(OperationManager operationManager) {
-    this.operationManager = operationManager;
-  }
-
-  protected synchronized void acquire(boolean userAccess) {
-    // Need to make sure that the this HiveServer2's session's SessionState is
-    // stored in the thread local for the handler thread.
-    SessionState.setCurrentSessionState(sessionState);
-    if (userAccess) {
-      lastAccessTime = System.currentTimeMillis();
-    }
-  }
-
-  /**
-   * 1. We'll remove the ThreadLocal SessionState as this thread might now serve
-   * other requests.
-   * 2. We'll cache the ThreadLocal RawStore object for this background thread for an orderly cleanup
-   * when this thread is garbage collected later.
-   * @see org.apache.hive.service.server.ThreadWithGarbageCleanup#finalize()
-   */
-  protected synchronized void release(boolean userAccess) {
-    SessionState.detachSession();
-    if (ThreadWithGarbageCleanup.currentThread() instanceof ThreadWithGarbageCleanup) {
-      ThreadWithGarbageCleanup currentThread =
-          (ThreadWithGarbageCleanup) ThreadWithGarbageCleanup.currentThread();
-      currentThread.cacheThreadLocalRawStore();
-    }
-    if (userAccess) {
-      lastAccessTime = System.currentTimeMillis();
-    }
-    if (opHandleSet.isEmpty()) {
-      lastIdleTime = System.currentTimeMillis();
-    } else {
-      lastIdleTime = 0;
-    }
-  }
-
-  @Override
-  public SessionHandle getSessionHandle() {
-    return sessionHandle;
-  }
-
-  @Override
-  public String getUsername() {
-    return username;
-  }
-
-  @Override
-  public String getPassword() {
-    return password;
-  }
-
-  @Override
-  public HiveConf getHiveConf() {
-    hiveConf.setVar(HiveConf.ConfVars.HIVEFETCHOUTPUTSERDE, FETCH_WORK_SERDE_CLASS);
-    return hiveConf;
-  }
-
-  @Override
-  public IMetaStoreClient getMetaStoreClient() throws HiveSQLException {
-    try {
-      return Hive.get(getHiveConf()).getMSC();
-    } catch (HiveException e) {
-      throw new HiveSQLException("Failed to get metastore connection", e);
-    } catch (MetaException e) {
-      throw new HiveSQLException("Failed to get metastore connection", e);
-    }
-  }
-
-  @Override
-  public GetInfoValue getInfo(GetInfoType getInfoType)
-      throws HiveSQLException {
-    acquire(true);
-    try {
-      switch (getInfoType) {
-      case CLI_SERVER_NAME:
-        return new GetInfoValue("Hive");
-      case CLI_DBMS_NAME:
-        return new GetInfoValue("Apache Hive");
-      case CLI_DBMS_VER:
-        return new GetInfoValue(HiveVersionInfo.getVersion());
-      case CLI_MAX_COLUMN_NAME_LEN:
-        return new GetInfoValue(128);
-      case CLI_MAX_SCHEMA_NAME_LEN:
-        return new GetInfoValue(128);
-      case CLI_MAX_TABLE_NAME_LEN:
-        return new GetInfoValue(128);
-      case CLI_TXN_CAPABLE:
-      default:
-        throw new HiveSQLException("Unrecognized GetInfoType value: " + getInfoType.toString());
-      }
-    } finally {
-      release(true);
-    }
-  }
-
-  @Override
-  public OperationHandle executeStatement(String statement, Map<String, String> confOverlay)
-      throws HiveSQLException {
-    return executeStatementInternal(statement, confOverlay, false);
-  }
-
-  @Override
-  public OperationHandle executeStatementAsync(String statement, Map<String, String> confOverlay)
-      throws HiveSQLException {
-    return executeStatementInternal(statement, confOverlay, true);
-  }
-
-  private OperationHandle executeStatementInternal(String statement, Map<String, String> confOverlay,
-      boolean runAsync)
-          throws HiveSQLException {
-    acquire(true);
-
-    OperationManager operationManager = getOperationManager();
-    ExecuteStatementOperation operation = operationManager
-        .newExecuteStatementOperation(getSession(), statement, confOverlay, runAsync);
-    OperationHandle opHandle = operation.getHandle();
-    try {
-      operation.run();
-      opHandleSet.add(opHandle);
-      return opHandle;
-    } catch (HiveSQLException e) {
-      // Referring to SQLOperation.java, there is no chance that a HiveSQLException throws and the asyn
-      // background operation submits to thread pool successfully at the same time. So, Cleanup
-      // opHandle directly when got HiveSQLException
-      operationManager.closeOperation(opHandle);
-      throw e;
-    } finally {
-      release(true);
-    }
-  }
-
-  @Override
-  public OperationHandle getTypeInfo()
-      throws HiveSQLException {
-    acquire(true);
-
-    OperationManager operationManager = getOperationManager();
-    GetTypeInfoOperation operation = operationManager.newGetTypeInfoOperation(getSession());
-    OperationHandle opHandle = operation.getHandle();
-    try {
-      operation.run();
-      opHandleSet.add(opHandle);
-      return opHandle;
-    } catch (HiveSQLException e) {
-      operationManager.closeOperation(opHandle);
-      throw e;
-    } finally {
-      release(true);
-    }
-  }
-
-  @Override
-  public OperationHandle getCatalogs()
-      throws HiveSQLException {
-    acquire(true);
-
-    OperationManager operationManager = getOperationManager();
-    GetCatalogsOperation operation = operationManager.newGetCatalogsOperation(getSession());
-    OperationHandle opHandle = operation.getHandle();
-    try {
-      operation.run();
-      opHandleSet.add(opHandle);
-      return opHandle;
-    } catch (HiveSQLException e) {
-      operationManager.closeOperation(opHandle);
-      throw e;
-    } finally {
-      release(true);
-    }
-  }
-
-  @Override
-  public OperationHandle getSchemas(String catalogName, String schemaName)
-      throws HiveSQLException {
-    acquire(true);
-
-    OperationManager operationManager = getOperationManager();
-    GetSchemasOperation operation =
-        operationManager.newGetSchemasOperation(getSession(), catalogName, schemaName);
-    OperationHandle opHandle = operation.getHandle();
-    try {
-      operation.run();
-      opHandleSet.add(opHandle);
-      return opHandle;
-    } catch (HiveSQLException e) {
-      operationManager.closeOperation(opHandle);
-      throw e;
-    } finally {
-      release(true);
-    }
-  }
-
-  @Override
-  public OperationHandle getTables(String catalogName, String schemaName, String tableName,
-      List<String> tableTypes)
-          throws HiveSQLException {
-    acquire(true);
-
-    OperationManager operationManager = getOperationManager();
-    MetadataOperation operation =
-        operationManager.newGetTablesOperation(getSession(), catalogName, schemaName, tableName, tableTypes);
-    OperationHandle opHandle = operation.getHandle();
-    try {
-      operation.run();
-      opHandleSet.add(opHandle);
-      return opHandle;
-    } catch (HiveSQLException e) {
-      operationManager.closeOperation(opHandle);
-      throw e;
-    } finally {
-      release(true);
-    }
-  }
-
-  @Override
-  public OperationHandle getTableTypes()
-      throws HiveSQLException {
-    acquire(true);
-
-    OperationManager operationManager = getOperationManager();
-    GetTableTypesOperation operation = operationManager.newGetTableTypesOperation(getSession());
-    OperationHandle opHandle = operation.getHandle();
-    try {
-      operation.run();
-      opHandleSet.add(opHandle);
-      return opHandle;
-    } catch (HiveSQLException e) {
-      operationManager.closeOperation(opHandle);
-      throw e;
-    } finally {
-      release(true);
-    }
-  }
-
-  @Override
-  public OperationHandle getColumns(String catalogName, String schemaName,
-      String tableName, String columnName)  throws HiveSQLException {
-    acquire(true);
-    String addedJars = Utilities.getResourceFiles(hiveConf, SessionState.ResourceType.JAR);
-    if (StringUtils.isNotBlank(addedJars)) {
-       IMetaStoreClient metastoreClient = getSession().getMetaStoreClient();
-       metastoreClient.setHiveAddedJars(addedJars);
-    }
-    OperationManager operationManager = getOperationManager();
-    GetColumnsOperation operation = operationManager.newGetColumnsOperation(getSession(),
-        catalogName, schemaName, tableName, columnName);
-    OperationHandle opHandle = operation.getHandle();
-    try {
-      operation.run();
-      opHandleSet.add(opHandle);
-      return opHandle;
-    } catch (HiveSQLException e) {
-      operationManager.closeOperation(opHandle);
-      throw e;
-    } finally {
-      release(true);
-    }
-  }
-
-  @Override
-  public OperationHandle getFunctions(String catalogName, String schemaName, String functionName)
-      throws HiveSQLException {
-    acquire(true);
-
-    OperationManager operationManager = getOperationManager();
-    GetFunctionsOperation operation = operationManager
-        .newGetFunctionsOperation(getSession(), catalogName, schemaName, functionName);
-    OperationHandle opHandle = operation.getHandle();
-    try {
-      operation.run();
-      opHandleSet.add(opHandle);
-      return opHandle;
-    } catch (HiveSQLException e) {
-      operationManager.closeOperation(opHandle);
-      throw e;
-    } finally {
-      release(true);
-    }
-  }
-
-  @Override
-  public void close() throws HiveSQLException {
-    try {
-      acquire(true);
-      // Iterate through the opHandles and close their operations
-      for (OperationHandle opHandle : opHandleSet) {
-        try {
-          operationManager.closeOperation(opHandle);
-        } catch (Exception e) {
-          LOG.warn("Exception is thrown closing operation " + opHandle, e);
-        }
-      }
-      opHandleSet.clear();
-      // Cleanup session log directory.
-      cleanupSessionLogDir();
-      // Cleanup pipeout file.
-      cleanupPipeoutFile();
-      HiveHistory hiveHist = sessionState.getHiveHistory();
-      if (null != hiveHist) {
-        hiveHist.closeStream();
-      }
-      try {
-        sessionState.close();
-      } finally {
-        sessionState = null;
-      }
-    } catch (IOException ioe) {
-      throw new HiveSQLException("Failure to close", ioe);
-    } finally {
-      if (sessionState != null) {
-        try {
-          sessionState.close();
-        } catch (Throwable t) {
-          LOG.warn("Error closing session", t);
-        }
-        sessionState = null;
-      }
-      release(true);
-    }
-  }
-
-  private void cleanupPipeoutFile() {
-    String lScratchDir = hiveConf.getVar(ConfVars.LOCALSCRATCHDIR);
-    String sessionID = hiveConf.getVar(ConfVars.HIVESESSIONID);
-
-    File[] fileAry = new File(lScratchDir).listFiles(
-            (dir, name) -> name.startsWith(sessionID) && name.endsWith(".pipeout"));
-
-    if (fileAry == null) {
-      LOG.error("Unable to access pipeout files in " + lScratchDir);
-    } else {
-      for (File file : fileAry) {
-        try {
-          FileUtils.forceDelete(file);
-        } catch (Exception e) {
-          LOG.error("Failed to cleanup pipeout file: " + file, e);
-        }
-      }
-    }
-  }
-
-  private void cleanupSessionLogDir() {
-    if (isOperationLogEnabled) {
-      try {
-        FileUtils.forceDelete(sessionLogDir);
-      } catch (Exception e) {
-        LOG.error("Failed to cleanup session log dir: " + sessionHandle, e);
-      }
-    }
-  }
-
-  @Override
-  public SessionState getSessionState() {
-    return sessionState;
-  }
-
-  @Override
-  public String getUserName() {
-    return username;
-  }
-
-  @Override
-  public void setUserName(String userName) {
-    this.username = userName;
-  }
-
-  @Override
-  public long getLastAccessTime() {
-    return lastAccessTime;
-  }
-
-  @Override
-  public void closeExpiredOperations() {
-    OperationHandle[] handles = opHandleSet.toArray(new OperationHandle[opHandleSet.size()]);
-    if (handles.length > 0) {
-      List<Operation> operations = operationManager.removeExpiredOperations(handles);
-      if (!operations.isEmpty()) {
-        closeTimedOutOperations(operations);
-      }
-    }
-  }
-
-  @Override
-  public long getNoOperationTime() {
-    return lastIdleTime > 0 ? System.currentTimeMillis() - lastIdleTime : 0;
-  }
-
-  private void closeTimedOutOperations(List<Operation> operations) {
-    acquire(false);
-    try {
-      for (Operation operation : operations) {
-        opHandleSet.remove(operation.getHandle());
-        try {
-          operation.close();
-        } catch (Exception e) {
-          LOG.warn("Exception is thrown closing timed-out operation " + operation.getHandle(), e);
-        }
-      }
-    } finally {
-      release(false);
-    }
-  }
-
-  @Override
-  public void cancelOperation(OperationHandle opHandle) throws HiveSQLException {
-    acquire(true);
-    try {
-      sessionManager.getOperationManager().cancelOperation(opHandle);
-    } finally {
-      release(true);
-    }
-  }
-
-  @Override
-  public void closeOperation(OperationHandle opHandle) throws HiveSQLException {
-    acquire(true);
-    try {
-      operationManager.closeOperation(opHandle);
-      opHandleSet.remove(opHandle);
-    } finally {
-      release(true);
-    }
-  }
-
-  @Override
-  public TableSchema getResultSetMetadata(OperationHandle opHandle) throws HiveSQLException {
-    acquire(true);
-    try {
-      return sessionManager.getOperationManager().getOperationResultSetSchema(opHandle);
-    } finally {
-      release(true);
-    }
-  }
-
-  @Override
-  public RowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation,
-      long maxRows, FetchType fetchType) throws HiveSQLException {
-    acquire(true);
-    try {
-      if (fetchType == FetchType.QUERY_OUTPUT) {
-        return operationManager.getOperationNextRowSet(opHandle, orientation, maxRows);
-      }
-      return operationManager.getOperationLogRowSet(opHandle, orientation, maxRows);
-    } finally {
-      release(true);
-    }
-  }
-
-  protected HiveSession getSession() {
-    return this;
-  }
-
-  @Override
-  public String getIpAddress() {
-    return ipAddress;
-  }
-
-  @Override
-  public void setIpAddress(String ipAddress) {
-    this.ipAddress = ipAddress;
-  }
-
-  @Override
-  public String getDelegationToken(HiveAuthFactory authFactory, String owner, String renewer)
-      throws HiveSQLException {
-    HiveAuthFactory.verifyProxyAccess(getUsername(), owner, getIpAddress(), getHiveConf());
-    return authFactory.getDelegationToken(owner, renewer);
-  }
-
-  @Override
-  public void cancelDelegationToken(HiveAuthFactory authFactory, String tokenStr)
-      throws HiveSQLException {
-    HiveAuthFactory.verifyProxyAccess(getUsername(), getUserFromToken(authFactory, tokenStr),
-        getIpAddress(), getHiveConf());
-    authFactory.cancelDelegationToken(tokenStr);
-  }
-
-  @Override
-  public void renewDelegationToken(HiveAuthFactory authFactory, String tokenStr)
-      throws HiveSQLException {
-    HiveAuthFactory.verifyProxyAccess(getUsername(), getUserFromToken(authFactory, tokenStr),
-        getIpAddress(), getHiveConf());
-    authFactory.renewDelegationToken(tokenStr);
-  }
-
-  // extract the real user from the given token string
-  private String getUserFromToken(HiveAuthFactory authFactory, String tokenStr) throws HiveSQLException {
-    return authFactory.getUserFromToken(tokenStr);
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java
deleted file mode 100644
index 762dbb2faadec..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java
+++ /dev/null
@@ -1,182 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.session;
-
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.metadata.Hive;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.shims.Utils;
-import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hive.service.auth.HiveAuthFactory;
-import org.apache.hive.service.cli.HiveSQLException;
-import org.apache.hive.service.cli.thrift.TProtocolVersion;
-
-/**
- *
- * HiveSessionImplwithUGI.
- * HiveSession with connecting user's UGI and delegation token if required
- */
-public class HiveSessionImplwithUGI extends HiveSessionImpl {
-  public static final String HS2TOKEN = "HiveServer2ImpersonationToken";
-
-  private UserGroupInformation sessionUgi = null;
-  private String delegationTokenStr = null;
-  private Hive sessionHive = null;
-  private HiveSession proxySession = null;
-  static final Log LOG = LogFactory.getLog(HiveSessionImplwithUGI.class);
-
-  public HiveSessionImplwithUGI(TProtocolVersion protocol, String username, String password,
-      HiveConf hiveConf, String ipAddress, String delegationToken) throws HiveSQLException {
-    super(protocol, username, password, hiveConf, ipAddress);
-    setSessionUGI(username);
-    setDelegationToken(delegationToken);
-
-    // create a new metastore connection for this particular user session
-    Hive.set(null);
-    try {
-      sessionHive = Hive.get(getHiveConf());
-    } catch (HiveException e) {
-      throw new HiveSQLException("Failed to setup metastore connection", e);
-    }
-  }
-
-  // setup appropriate UGI for the session
-  public void setSessionUGI(String owner) throws HiveSQLException {
-    if (owner == null) {
-      throw new HiveSQLException("No username provided for impersonation");
-    }
-    if (UserGroupInformation.isSecurityEnabled()) {
-      try {
-        sessionUgi = UserGroupInformation.createProxyUser(
-            owner, UserGroupInformation.getLoginUser());
-      } catch (IOException e) {
-        throw new HiveSQLException("Couldn't setup proxy user", e);
-      }
-    } else {
-      sessionUgi = UserGroupInformation.createRemoteUser(owner);
-    }
-  }
-
-  public UserGroupInformation getSessionUgi() {
-    return this.sessionUgi;
-  }
-
-  public String getDelegationToken() {
-    return this.delegationTokenStr;
-  }
-
-  @Override
-  protected synchronized void acquire(boolean userAccess) {
-    super.acquire(userAccess);
-    // if we have a metastore connection with impersonation, then set it first
-    if (sessionHive != null) {
-      Hive.set(sessionHive);
-    }
-  }
-
-  /**
-   * Close the file systems for the session and remove it from the FileSystem cache.
-   * Cancel the session's delegation token and close the metastore connection
-   */
-  @Override
-  public void close() throws HiveSQLException {
-    try {
-      acquire(true);
-      cancelDelegationToken();
-    } finally {
-      try {
-        super.close();
-      } finally {
-        try {
-          FileSystem.closeAllForUGI(sessionUgi);
-        } catch (IOException ioe) {
-          throw new HiveSQLException("Could not clean up file-system handles for UGI: "
-              + sessionUgi, ioe);
-        }
-      }
-    }
-  }
-
-  /**
-   * Enable delegation token for the session
-   * save the token string and set the token.signature in hive conf. The metastore client uses
-   * this token.signature to determine where to use kerberos or delegation token
-   * @throws HiveException
-   * @throws IOException
-   */
-  private void setDelegationToken(String delegationTokenStr) throws HiveSQLException {
-    this.delegationTokenStr = delegationTokenStr;
-    if (delegationTokenStr != null) {
-      getHiveConf().set("hive.metastore.token.signature", HS2TOKEN);
-      try {
-        Utils.setTokenStr(sessionUgi, delegationTokenStr, HS2TOKEN);
-      } catch (IOException e) {
-        throw new HiveSQLException("Couldn't setup delegation token in the ugi", e);
-      }
-    }
-  }
-
-  // If the session has a delegation token obtained from the metastore, then cancel it
-  private void cancelDelegationToken() throws HiveSQLException {
-    if (delegationTokenStr != null) {
-      try {
-        Hive.get(getHiveConf()).cancelDelegationToken(delegationTokenStr);
-      } catch (HiveException e) {
-        throw new HiveSQLException("Couldn't cancel delegation token", e);
-      }
-      // close the metastore connection created with this delegation token
-      Hive.closeCurrent();
-    }
-  }
-
-  @Override
-  protected HiveSession getSession() {
-    assert proxySession != null;
-
-    return proxySession;
-  }
-
-  public void setProxySession(HiveSession proxySession) {
-    this.proxySession = proxySession;
-  }
-
-  @Override
-  public String getDelegationToken(HiveAuthFactory authFactory, String owner,
-      String renewer) throws HiveSQLException {
-    return authFactory.getDelegationToken(owner, renewer);
-  }
-
-  @Override
-  public void cancelDelegationToken(HiveAuthFactory authFactory, String tokenStr)
-      throws HiveSQLException {
-    authFactory.cancelDelegationToken(tokenStr);
-  }
-
-  @Override
-  public void renewDelegationToken(HiveAuthFactory authFactory, String tokenStr)
-      throws HiveSQLException {
-    authFactory.renewDelegationToken(tokenStr);
-  }
-
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/SessionManager.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/SessionManager.java
deleted file mode 100644
index ad6fb3ba37a0e..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/session/SessionManager.java
+++ /dev/null
@@ -1,377 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.session;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Date;
-import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.Future;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
-import org.apache.hive.service.CompositeService;
-import org.apache.hive.service.cli.HiveSQLException;
-import org.apache.hive.service.cli.SessionHandle;
-import org.apache.hive.service.cli.operation.OperationManager;
-import org.apache.hive.service.cli.thrift.TProtocolVersion;
-import org.apache.hive.service.server.HiveServer2;
-import org.apache.hive.service.server.ThreadFactoryWithGarbageCleanup;
-
-/**
- * SessionManager.
- *
- */
-public class SessionManager extends CompositeService {
-
-  private static final Log LOG = LogFactory.getLog(SessionManager.class);
-  public static final String HIVERCFILE = ".hiverc";
-  private HiveConf hiveConf;
-  private final Map<SessionHandle, HiveSession> handleToSession =
-      new ConcurrentHashMap<SessionHandle, HiveSession>();
-  private final OperationManager operationManager = new OperationManager();
-  private ThreadPoolExecutor backgroundOperationPool;
-  private boolean isOperationLogEnabled;
-  private File operationLogRootDir;
-
-  private long checkInterval;
-  private long sessionTimeout;
-  private boolean checkOperation;
-
-  private volatile boolean shutdown;
-  // The HiveServer2 instance running this service
-  private final HiveServer2 hiveServer2;
-
-  public SessionManager(HiveServer2 hiveServer2) {
-    super(SessionManager.class.getSimpleName());
-    this.hiveServer2 = hiveServer2;
-  }
-
-  @Override
-  public synchronized void init(HiveConf hiveConf) {
-    this.hiveConf = hiveConf;
-    //Create operation log root directory, if operation logging is enabled
-    if (hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_LOGGING_OPERATION_ENABLED)) {
-      initOperationLogRootDir();
-    }
-    createBackgroundOperationPool();
-    addService(operationManager);
-    super.init(hiveConf);
-  }
-
-  private void createBackgroundOperationPool() {
-    int poolSize = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_ASYNC_EXEC_THREADS);
-    LOG.info("HiveServer2: Background operation thread pool size: " + poolSize);
-    int poolQueueSize = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_ASYNC_EXEC_WAIT_QUEUE_SIZE);
-    LOG.info("HiveServer2: Background operation thread wait queue size: " + poolQueueSize);
-    long keepAliveTime = HiveConf.getTimeVar(
-        hiveConf, ConfVars.HIVE_SERVER2_ASYNC_EXEC_KEEPALIVE_TIME, TimeUnit.SECONDS);
-    LOG.info(
-        "HiveServer2: Background operation thread keepalive time: " + keepAliveTime + " seconds");
-
-    // Create a thread pool with #poolSize threads
-    // Threads terminate when they are idle for more than the keepAliveTime
-    // A bounded blocking queue is used to queue incoming operations, if #operations > poolSize
-    String threadPoolName = "HiveServer2-Background-Pool";
-    backgroundOperationPool = new ThreadPoolExecutor(poolSize, poolSize,
-        keepAliveTime, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(poolQueueSize),
-        new ThreadFactoryWithGarbageCleanup(threadPoolName));
-    backgroundOperationPool.allowCoreThreadTimeOut(true);
-
-    checkInterval = HiveConf.getTimeVar(
-        hiveConf, ConfVars.HIVE_SERVER2_SESSION_CHECK_INTERVAL, TimeUnit.MILLISECONDS);
-    sessionTimeout = HiveConf.getTimeVar(
-        hiveConf, ConfVars.HIVE_SERVER2_IDLE_SESSION_TIMEOUT, TimeUnit.MILLISECONDS);
-    checkOperation = HiveConf.getBoolVar(hiveConf,
-        ConfVars.HIVE_SERVER2_IDLE_SESSION_CHECK_OPERATION);
-  }
-
-  private void initOperationLogRootDir() {
-    operationLogRootDir = new File(
-        hiveConf.getVar(ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LOG_LOCATION));
-    isOperationLogEnabled = true;
-
-    if (operationLogRootDir.exists() && !operationLogRootDir.isDirectory()) {
-      LOG.warn("The operation log root directory exists, but it is not a directory: " +
-          operationLogRootDir.getAbsolutePath());
-      isOperationLogEnabled = false;
-    }
-
-    if (!operationLogRootDir.exists()) {
-      if (!operationLogRootDir.mkdirs()) {
-        LOG.warn("Unable to create operation log root directory: " +
-            operationLogRootDir.getAbsolutePath());
-        isOperationLogEnabled = false;
-      }
-    }
-
-    if (isOperationLogEnabled) {
-      LOG.info("Operation log root directory is created: " + operationLogRootDir.getAbsolutePath());
-      try {
-        FileUtils.forceDeleteOnExit(operationLogRootDir);
-      } catch (IOException e) {
-        LOG.warn("Failed to schedule cleanup HS2 operation logging root dir: " +
-            operationLogRootDir.getAbsolutePath(), e);
-      }
-    }
-  }
-
-  @Override
-  public synchronized void start() {
-    super.start();
-    if (checkInterval > 0) {
-      startTimeoutChecker();
-    }
-  }
-
-  private final Object timeoutCheckerLock = new Object();
-
-  private void startTimeoutChecker() {
-    final long interval = Math.max(checkInterval, 3000L);  // minimum 3 seconds
-    final Runnable timeoutChecker = new Runnable() {
-      @Override
-      public void run() {
-        sleepFor(interval);
-        while (!shutdown) {
-          long current = System.currentTimeMillis();
-          for (HiveSession session : new ArrayList<HiveSession>(handleToSession.values())) {
-            if (shutdown) {
-              break;
-            }
-            if (sessionTimeout > 0 && session.getLastAccessTime() + sessionTimeout <= current
-                && (!checkOperation || session.getNoOperationTime() > sessionTimeout)) {
-              SessionHandle handle = session.getSessionHandle();
-              LOG.warn("Session " + handle + " is Timed-out (last access : " +
-                  new Date(session.getLastAccessTime()) + ") and will be closed");
-              try {
-                closeSession(handle);
-              } catch (HiveSQLException e) {
-                LOG.warn("Exception is thrown closing session " + handle, e);
-              }
-            } else {
-              session.closeExpiredOperations();
-            }
-          }
-          sleepFor(interval);
-        }
-      }
-
-      private void sleepFor(long interval) {
-        synchronized (timeoutCheckerLock) {
-          try {
-            timeoutCheckerLock.wait(interval);
-          } catch (InterruptedException e) {
-            // Ignore, and break.
-          }
-        }
-      }
-    };
-    backgroundOperationPool.execute(timeoutChecker);
-  }
-
-  private void shutdownTimeoutChecker() {
-    shutdown = true;
-    synchronized (timeoutCheckerLock) {
-      timeoutCheckerLock.notify();
-    }
-  }
-
-  @Override
-  public synchronized void stop() {
-    super.stop();
-    shutdownTimeoutChecker();
-    if (backgroundOperationPool != null) {
-      backgroundOperationPool.shutdown();
-      long timeout = hiveConf.getTimeVar(
-          ConfVars.HIVE_SERVER2_ASYNC_EXEC_SHUTDOWN_TIMEOUT, TimeUnit.SECONDS);
-      try {
-        backgroundOperationPool.awaitTermination(timeout, TimeUnit.SECONDS);
-      } catch (InterruptedException e) {
-        LOG.warn("HIVE_SERVER2_ASYNC_EXEC_SHUTDOWN_TIMEOUT = " + timeout +
-            " seconds has been exceeded. RUNNING background operations will be shut down", e);
-      }
-      backgroundOperationPool = null;
-    }
-    cleanupLoggingRootDir();
-  }
-
-  private void cleanupLoggingRootDir() {
-    if (isOperationLogEnabled) {
-      try {
-        FileUtils.forceDelete(operationLogRootDir);
-      } catch (Exception e) {
-        LOG.warn("Failed to cleanup root dir of HS2 logging: " + operationLogRootDir
-            .getAbsolutePath(), e);
-      }
-    }
-  }
-
-  public SessionHandle openSession(TProtocolVersion protocol, String username, String password, String ipAddress,
-      Map<String, String> sessionConf) throws HiveSQLException {
-    return openSession(protocol, username, password, ipAddress, sessionConf, false, null);
-  }
-
-  /**
-   * Opens a new session and creates a session handle.
-   * The username passed to this method is the effective username.
-   * If withImpersonation is true (==doAs true) we wrap all the calls in HiveSession
-   * within a UGI.doAs, where UGI corresponds to the effective user.
-   *
-   * Please see {@code org.apache.hive.service.cli.thrift.ThriftCLIService.getUserName()} for
-   * more details.
-   *
-   * @param protocol
-   * @param username
-   * @param password
-   * @param ipAddress
-   * @param sessionConf
-   * @param withImpersonation
-   * @param delegationToken
-   * @return
-   * @throws HiveSQLException
-   */
-  public SessionHandle openSession(TProtocolVersion protocol, String username, String password, String ipAddress,
-      Map<String, String> sessionConf, boolean withImpersonation, String delegationToken)
-          throws HiveSQLException {
-    HiveSession session;
-    // If doAs is set to true for HiveServer2, we will create a proxy object for the session impl.
-    // Within the proxy object, we wrap the method call in a UserGroupInformation#doAs
-    if (withImpersonation) {
-      HiveSessionImplwithUGI sessionWithUGI = new HiveSessionImplwithUGI(protocol, username, password,
-          hiveConf, ipAddress, delegationToken);
-      session = HiveSessionProxy.getProxy(sessionWithUGI, sessionWithUGI.getSessionUgi());
-      sessionWithUGI.setProxySession(session);
-    } else {
-      session = new HiveSessionImpl(protocol, username, password, hiveConf, ipAddress);
-    }
-    session.setSessionManager(this);
-    session.setOperationManager(operationManager);
-    try {
-      session.open(sessionConf);
-    } catch (Exception e) {
-      try {
-        session.close();
-      } catch (Throwable t) {
-        LOG.warn("Error closing session", t);
-      }
-      session = null;
-      throw new HiveSQLException("Failed to open new session: " + e, e);
-    }
-    if (isOperationLogEnabled) {
-      session.setOperationLogSessionDir(operationLogRootDir);
-    }
-    handleToSession.put(session.getSessionHandle(), session);
-    return session.getSessionHandle();
-  }
-
-  public void closeSession(SessionHandle sessionHandle) throws HiveSQLException {
-    HiveSession session = handleToSession.remove(sessionHandle);
-    if (session == null) {
-      throw new HiveSQLException("Session does not exist!");
-    }
-    session.close();
-  }
-
-  public HiveSession getSession(SessionHandle sessionHandle) throws HiveSQLException {
-    HiveSession session = handleToSession.get(sessionHandle);
-    if (session == null) {
-      throw new HiveSQLException("Invalid SessionHandle: " + sessionHandle);
-    }
-    return session;
-  }
-
-  public OperationManager getOperationManager() {
-    return operationManager;
-  }
-
-  private static ThreadLocal<String> threadLocalIpAddress = new ThreadLocal<String>() {
-    @Override
-    protected synchronized String initialValue() {
-      return null;
-    }
-  };
-
-  public static void setIpAddress(String ipAddress) {
-    threadLocalIpAddress.set(ipAddress);
-  }
-
-  public static void clearIpAddress() {
-    threadLocalIpAddress.remove();
-  }
-
-  public static String getIpAddress() {
-    return threadLocalIpAddress.get();
-  }
-
-  private static ThreadLocal<String> threadLocalUserName = new ThreadLocal<String>(){
-    @Override
-    protected synchronized String initialValue() {
-      return null;
-    }
-  };
-
-  public static void setUserName(String userName) {
-    threadLocalUserName.set(userName);
-  }
-
-  public static void clearUserName() {
-    threadLocalUserName.remove();
-  }
-
-  public static String getUserName() {
-    return threadLocalUserName.get();
-  }
-
-  private static ThreadLocal<String> threadLocalProxyUserName = new ThreadLocal<String>(){
-    @Override
-    protected synchronized String initialValue() {
-      return null;
-    }
-  };
-
-  public static void setProxyUserName(String userName) {
-    LOG.debug("setting proxy user name based on query param to: " + userName);
-    threadLocalProxyUserName.set(userName);
-  }
-
-  public static String getProxyUserName() {
-    return threadLocalProxyUserName.get();
-  }
-
-  public static void clearProxyUserName() {
-    threadLocalProxyUserName.remove();
-  }
-
-  public Future<?> submitBackgroundOperation(Runnable r) {
-    return backgroundOperationPool.submit(r);
-  }
-
-  public int getOpenSessionCount() {
-    return handleToSession.size();
-  }
-}
-
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
deleted file mode 100644
index 00bdf7e19126e..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
+++ /dev/null
@@ -1,121 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.thrift;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.SynchronousQueue;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
-import org.apache.hadoop.hive.shims.ShimLoader;
-import org.apache.hive.service.ServiceException;
-import org.apache.hive.service.auth.HiveAuthFactory;
-import org.apache.hive.service.cli.CLIService;
-import org.apache.hive.service.server.ThreadFactoryWithGarbageCleanup;
-import org.apache.thrift.TProcessorFactory;
-import org.apache.thrift.protocol.TBinaryProtocol;
-import org.apache.thrift.server.TThreadPoolServer;
-import org.apache.thrift.transport.TServerSocket;
-import org.apache.thrift.transport.TTransportFactory;
-
-
-public class ThriftBinaryCLIService extends ThriftCLIService {
-
-  public ThriftBinaryCLIService(CLIService cliService) {
-    super(cliService, ThriftBinaryCLIService.class.getSimpleName());
-  }
-
-  @Override
-  protected void initializeServer() {
-    try {
-      // Server thread pool
-      String threadPoolName = "HiveServer2-Handler-Pool";
-      ExecutorService executorService = new ThreadPoolExecutor(minWorkerThreads, maxWorkerThreads,
-          workerKeepAliveTime, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(),
-          new ThreadFactoryWithGarbageCleanup(threadPoolName));
-
-      // Thrift configs
-      hiveAuthFactory = new HiveAuthFactory(hiveConf);
-      TTransportFactory transportFactory = hiveAuthFactory.getAuthTransFactory();
-      TProcessorFactory processorFactory = hiveAuthFactory.getAuthProcFactory(this);
-      TServerSocket serverSocket = null;
-      List<String> sslVersionBlacklist = new ArrayList<String>();
-      for (String sslVersion : hiveConf.getVar(ConfVars.HIVE_SSL_PROTOCOL_BLACKLIST).split(",")) {
-        sslVersionBlacklist.add(sslVersion);
-      }
-      if (!hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_USE_SSL)) {
-        serverSocket = HiveAuthFactory.getServerSocket(hiveHost, portNum);
-      } else {
-        String keyStorePath = hiveConf.getVar(ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PATH).trim();
-        if (keyStorePath.isEmpty()) {
-          throw new IllegalArgumentException(ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PATH.varname
-              + " Not configured for SSL connection");
-        }
-        String keyStorePassword = ShimLoader.getHadoopShims().getPassword(hiveConf,
-            HiveConf.ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PASSWORD.varname);
-        serverSocket = HiveAuthFactory.getServerSSLSocket(hiveHost, portNum, keyStorePath,
-            keyStorePassword, sslVersionBlacklist);
-      }
-
-      // In case HIVE_SERVER2_THRIFT_PORT or hive.server2.thrift.port is configured with 0 which
-      // represents any free port, we should set it to the actual one
-      portNum = serverSocket.getServerSocket().getLocalPort();
-
-      // Server args
-      int maxMessageSize = hiveConf.getIntVar(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_MAX_MESSAGE_SIZE);
-      int requestTimeout = (int) hiveConf.getTimeVar(
-          HiveConf.ConfVars.HIVE_SERVER2_THRIFT_LOGIN_TIMEOUT, TimeUnit.SECONDS);
-      int beBackoffSlotLength = (int) hiveConf.getTimeVar(
-          HiveConf.ConfVars.HIVE_SERVER2_THRIFT_LOGIN_BEBACKOFF_SLOT_LENGTH, TimeUnit.MILLISECONDS);
-      TThreadPoolServer.Args sargs = new TThreadPoolServer.Args(serverSocket)
-          .processorFactory(processorFactory).transportFactory(transportFactory)
-          .protocolFactory(new TBinaryProtocol.Factory())
-          .inputProtocolFactory(new TBinaryProtocol.Factory(true, true, maxMessageSize, maxMessageSize))
-          .requestTimeout(requestTimeout).requestTimeoutUnit(TimeUnit.SECONDS)
-          .beBackoffSlotLength(beBackoffSlotLength).beBackoffSlotLengthUnit(TimeUnit.MILLISECONDS)
-          .executorService(executorService);
-
-      // TCP Server
-      server = new TThreadPoolServer(sargs);
-      server.setServerEventHandler(serverEventHandler);
-      String msg = "Starting " + ThriftBinaryCLIService.class.getSimpleName() + " on port "
-          + serverSocket.getServerSocket().getLocalPort() + " with " + minWorkerThreads + "..." + maxWorkerThreads + " worker threads";
-      LOG.info(msg);
-    } catch (Exception t) {
-      throw new ServiceException("Error initializing " + getName(), t);
-    }
-  }
-
-  @Override
-  public void run() {
-    try {
-      server.serve();
-    } catch (Throwable t) {
-      LOG.fatal(
-          "Error starting HiveServer2: could not start "
-              + ThriftBinaryCLIService.class.getSimpleName(), t);
-      System.exit(-1);
-    }
-  }
-
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
deleted file mode 100644
index ff533769b5b84..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
+++ /dev/null
@@ -1,693 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.thrift;
-
-import javax.security.auth.login.LoginException;
-import java.io.IOException;
-import java.net.InetAddress;
-import java.net.UnknownHostException;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
-import org.apache.hive.service.AbstractService;
-import org.apache.hive.service.ServiceException;
-import org.apache.hive.service.ServiceUtils;
-import org.apache.hive.service.auth.HiveAuthFactory;
-import org.apache.hive.service.auth.TSetIpAddressProcessor;
-import org.apache.hive.service.cli.*;
-import org.apache.hive.service.cli.session.SessionManager;
-import org.apache.hive.service.server.HiveServer2;
-import org.apache.thrift.TException;
-import org.apache.thrift.protocol.TProtocol;
-import org.apache.thrift.server.ServerContext;
-import org.apache.thrift.server.TServer;
-import org.apache.thrift.server.TServerEventHandler;
-import org.apache.thrift.transport.TTransport;
-
-/**
- * ThriftCLIService.
- *
- */
-public abstract class ThriftCLIService extends AbstractService implements TCLIService.Iface, Runnable {
-
-  public static final Log LOG = LogFactory.getLog(ThriftCLIService.class.getName());
-
-  protected CLIService cliService;
-  private static final TStatus OK_STATUS = new TStatus(TStatusCode.SUCCESS_STATUS);
-  protected static HiveAuthFactory hiveAuthFactory;
-
-  protected int portNum;
-  protected InetAddress serverIPAddress;
-  protected String hiveHost;
-  protected TServer server;
-  protected org.eclipse.jetty.server.Server httpServer;
-
-  private boolean isStarted = false;
-  protected boolean isEmbedded = false;
-
-  protected HiveConf hiveConf;
-
-  protected int minWorkerThreads;
-  protected int maxWorkerThreads;
-  protected long workerKeepAliveTime;
-
-  protected TServerEventHandler serverEventHandler;
-  protected ThreadLocal<ServerContext> currentServerContext;
-
-  static class ThriftCLIServerContext implements ServerContext {
-    private SessionHandle sessionHandle = null;
-
-    public void setSessionHandle(SessionHandle sessionHandle) {
-      this.sessionHandle = sessionHandle;
-    }
-
-    public SessionHandle getSessionHandle() {
-      return sessionHandle;
-    }
-  }
-
-  public ThriftCLIService(CLIService service, String serviceName) {
-    super(serviceName);
-    this.cliService = service;
-    currentServerContext = new ThreadLocal<ServerContext>();
-    serverEventHandler = new TServerEventHandler() {
-      @Override
-      public ServerContext createContext(
-          TProtocol input, TProtocol output) {
-        return new ThriftCLIServerContext();
-      }
-
-      @Override
-      public void deleteContext(ServerContext serverContext,
-          TProtocol input, TProtocol output) {
-        ThriftCLIServerContext context = (ThriftCLIServerContext)serverContext;
-        SessionHandle sessionHandle = context.getSessionHandle();
-        if (sessionHandle != null) {
-          LOG.info("Session disconnected without closing properly, close it now");
-          try {
-            cliService.closeSession(sessionHandle);
-          } catch (HiveSQLException e) {
-            LOG.warn("Failed to close session: " + e, e);
-          }
-        }
-      }
-
-      @Override
-      public void preServe() {
-      }
-
-      @Override
-      public void processContext(ServerContext serverContext,
-          TTransport input, TTransport output) {
-        currentServerContext.set(serverContext);
-      }
-    };
-  }
-
-  @Override
-  public synchronized void init(HiveConf hiveConf) {
-    this.hiveConf = hiveConf;
-    // Initialize common server configs needed in both binary & http modes
-    String portString;
-    hiveHost = System.getenv("HIVE_SERVER2_THRIFT_BIND_HOST");
-    if (hiveHost == null) {
-      hiveHost = hiveConf.getVar(ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST);
-    }
-    try {
-      if (hiveHost != null && !hiveHost.isEmpty()) {
-        serverIPAddress = InetAddress.getByName(hiveHost);
-      } else {
-        serverIPAddress = InetAddress.getLocalHost();
-      }
-    } catch (UnknownHostException e) {
-      throw new ServiceException(e);
-    }
-    // HTTP mode
-    if (HiveServer2.isHTTPTransportMode(hiveConf)) {
-      workerKeepAliveTime =
-          hiveConf.getTimeVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_WORKER_KEEPALIVE_TIME,
-              TimeUnit.SECONDS);
-      portString = System.getenv("HIVE_SERVER2_THRIFT_HTTP_PORT");
-      if (portString != null) {
-        portNum = Integer.valueOf(portString);
-      } else {
-        portNum = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_PORT);
-      }
-    }
-    // Binary mode
-    else {
-      workerKeepAliveTime =
-          hiveConf.getTimeVar(ConfVars.HIVE_SERVER2_THRIFT_WORKER_KEEPALIVE_TIME, TimeUnit.SECONDS);
-      portString = System.getenv("HIVE_SERVER2_THRIFT_PORT");
-      if (portString != null) {
-        portNum = Integer.valueOf(portString);
-      } else {
-        portNum = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_THRIFT_PORT);
-      }
-    }
-    minWorkerThreads = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_THRIFT_MIN_WORKER_THREADS);
-    maxWorkerThreads = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_THRIFT_MAX_WORKER_THREADS);
-    super.init(hiveConf);
-  }
-
-  @Override
-  public synchronized void start() {
-    super.start();
-    if (!isStarted && !isEmbedded) {
-      initializeServer();
-      new Thread(this).start();
-      isStarted = true;
-    }
-  }
-
-  @Override
-  public synchronized void stop() {
-    if (isStarted && !isEmbedded) {
-      if(server != null) {
-        server.stop();
-        LOG.info("Thrift server has stopped");
-      }
-      if((httpServer != null) && httpServer.isStarted()) {
-        try {
-          httpServer.stop();
-          LOG.info("Http server has stopped");
-        } catch (Exception e) {
-          LOG.error("Error stopping Http server: ", e);
-        }
-      }
-      isStarted = false;
-    }
-    super.stop();
-  }
-
-  public int getPortNumber() {
-    return portNum;
-  }
-
-  public InetAddress getServerIPAddress() {
-    return serverIPAddress;
-  }
-
-  @Override
-  public TGetDelegationTokenResp GetDelegationToken(TGetDelegationTokenReq req)
-      throws TException {
-    TGetDelegationTokenResp resp = new TGetDelegationTokenResp();
-    resp.setStatus(notSupportTokenErrorStatus());
-    return resp;
-  }
-
-  @Override
-  public TCancelDelegationTokenResp CancelDelegationToken(TCancelDelegationTokenReq req)
-      throws TException {
-    TCancelDelegationTokenResp resp = new TCancelDelegationTokenResp();
-    resp.setStatus(notSupportTokenErrorStatus());
-    return resp;
-  }
-
-  @Override
-  public TRenewDelegationTokenResp RenewDelegationToken(TRenewDelegationTokenReq req)
-      throws TException {
-    TRenewDelegationTokenResp resp = new TRenewDelegationTokenResp();
-    resp.setStatus(notSupportTokenErrorStatus());
-    return resp;
-  }
-
-  private TStatus notSupportTokenErrorStatus() {
-    TStatus errorStatus = new TStatus(TStatusCode.ERROR_STATUS);
-    errorStatus.setErrorMessage("Delegation token is not supported");
-    return errorStatus;
-  }
-
-  @Override
-  public TOpenSessionResp OpenSession(TOpenSessionReq req) throws TException {
-    LOG.info("Client protocol version: " + req.getClient_protocol());
-    TOpenSessionResp resp = new TOpenSessionResp();
-    try {
-      SessionHandle sessionHandle = getSessionHandle(req, resp);
-      resp.setSessionHandle(sessionHandle.toTSessionHandle());
-      // TODO: set real configuration map
-      resp.setConfiguration(new HashMap<String, String>());
-      resp.setStatus(OK_STATUS);
-      ThriftCLIServerContext context =
-        (ThriftCLIServerContext)currentServerContext.get();
-      if (context != null) {
-        context.setSessionHandle(sessionHandle);
-      }
-    } catch (Exception e) {
-      LOG.warn("Error opening session: ", e);
-      resp.setStatus(HiveSQLException.toTStatus(e));
-    }
-    return resp;
-  }
-
-  private String getIpAddress() {
-    String clientIpAddress;
-    // Http transport mode.
-    // We set the thread local ip address, in ThriftHttpServlet.
-    if (cliService.getHiveConf().getVar(
-        ConfVars.HIVE_SERVER2_TRANSPORT_MODE).equalsIgnoreCase("http")) {
-      clientIpAddress = SessionManager.getIpAddress();
-    }
-    else {
-      // Kerberos
-      if (isKerberosAuthMode()) {
-        clientIpAddress = hiveAuthFactory.getIpAddress();
-      }
-      // Except kerberos, NOSASL
-      else {
-        clientIpAddress = TSetIpAddressProcessor.getUserIpAddress();
-      }
-    }
-    LOG.debug("Client's IP Address: " + clientIpAddress);
-    return clientIpAddress;
-  }
-
-  /**
-   * Returns the effective username.
-   * 1. If hive.server2.allow.user.substitution = false: the username of the connecting user
-   * 2. If hive.server2.allow.user.substitution = true: the username of the end user,
-   * that the connecting user is trying to proxy for.
-   * This includes a check whether the connecting user is allowed to proxy for the end user.
-   * @param req
-   * @return
-   * @throws HiveSQLException
-   */
-  private String getUserName(TOpenSessionReq req) throws HiveSQLException {
-    String userName = null;
-    // Kerberos
-    if (isKerberosAuthMode()) {
-      userName = hiveAuthFactory.getRemoteUser();
-    }
-    // Except kerberos, NOSASL
-    if (userName == null) {
-      userName = TSetIpAddressProcessor.getUserName();
-    }
-    // Http transport mode.
-    // We set the thread local username, in ThriftHttpServlet.
-    if (cliService.getHiveConf().getVar(
-        ConfVars.HIVE_SERVER2_TRANSPORT_MODE).equalsIgnoreCase("http")) {
-      userName = SessionManager.getUserName();
-    }
-    if (userName == null) {
-      userName = req.getUsername();
-    }
-
-    userName = getShortName(userName);
-    String effectiveClientUser = getProxyUser(userName, req.getConfiguration(), getIpAddress());
-    LOG.debug("Client's username: " + effectiveClientUser);
-    return effectiveClientUser;
-  }
-
-  private String getShortName(String userName) {
-    String ret = null;
-    if (userName != null) {
-      int indexOfDomainMatch = ServiceUtils.indexOfDomainMatch(userName);
-      ret = (indexOfDomainMatch <= 0) ? userName :
-          userName.substring(0, indexOfDomainMatch);
-    }
-
-    return ret;
-  }
-
-  /**
-   * Create a session handle
-   * @param req
-   * @param res
-   * @return
-   * @throws HiveSQLException
-   * @throws LoginException
-   * @throws IOException
-   */
-  SessionHandle getSessionHandle(TOpenSessionReq req, TOpenSessionResp res)
-      throws HiveSQLException, LoginException, IOException {
-    String userName = getUserName(req);
-    String ipAddress = getIpAddress();
-    TProtocolVersion protocol = getMinVersion(CLIService.SERVER_VERSION,
-        req.getClient_protocol());
-    res.setServerProtocolVersion(protocol);
-    SessionHandle sessionHandle;
-    if (cliService.getHiveConf().getBoolVar(ConfVars.HIVE_SERVER2_ENABLE_DOAS) &&
-        (userName != null)) {
-      String delegationTokenStr = getDelegationToken(userName);
-      sessionHandle = cliService.openSessionWithImpersonation(protocol, userName,
-          req.getPassword(), ipAddress, req.getConfiguration(), delegationTokenStr);
-    } else {
-      sessionHandle = cliService.openSession(protocol, userName, req.getPassword(),
-          ipAddress, req.getConfiguration());
-    }
-    return sessionHandle;
-  }
-
-
-  private String getDelegationToken(String userName)
-      throws HiveSQLException, LoginException, IOException {
-    if (userName == null || !cliService.getHiveConf().getVar(ConfVars.HIVE_SERVER2_AUTHENTICATION)
-        .equalsIgnoreCase(HiveAuthFactory.AuthTypes.KERBEROS.toString())) {
-      return null;
-    }
-    try {
-      return cliService.getDelegationTokenFromMetaStore(userName);
-    } catch (UnsupportedOperationException e) {
-      // The delegation token is not applicable in the given deployment mode
-    }
-    return null;
-  }
-
-  private TProtocolVersion getMinVersion(TProtocolVersion... versions) {
-    TProtocolVersion[] values = TProtocolVersion.values();
-    int current = values[values.length - 1].getValue();
-    for (TProtocolVersion version : versions) {
-      if (current > version.getValue()) {
-        current = version.getValue();
-      }
-    }
-    for (TProtocolVersion version : values) {
-      if (version.getValue() == current) {
-        return version;
-      }
-    }
-    throw new IllegalArgumentException("never");
-  }
-
-  @Override
-  public TCloseSessionResp CloseSession(TCloseSessionReq req) throws TException {
-    TCloseSessionResp resp = new TCloseSessionResp();
-    try {
-      SessionHandle sessionHandle = new SessionHandle(req.getSessionHandle());
-      cliService.closeSession(sessionHandle);
-      resp.setStatus(OK_STATUS);
-      ThriftCLIServerContext context =
-        (ThriftCLIServerContext)currentServerContext.get();
-      if (context != null) {
-        context.setSessionHandle(null);
-      }
-    } catch (Exception e) {
-      LOG.warn("Error closing session: ", e);
-      resp.setStatus(HiveSQLException.toTStatus(e));
-    }
-    return resp;
-  }
-
-  @Override
-  public TGetInfoResp GetInfo(TGetInfoReq req) throws TException {
-    TGetInfoResp resp = new TGetInfoResp();
-    try {
-      GetInfoValue getInfoValue =
-          cliService.getInfo(new SessionHandle(req.getSessionHandle()),
-              GetInfoType.getGetInfoType(req.getInfoType()));
-      resp.setInfoValue(getInfoValue.toTGetInfoValue());
-      resp.setStatus(OK_STATUS);
-    } catch (Exception e) {
-      LOG.warn("Error getting info: ", e);
-      resp.setStatus(HiveSQLException.toTStatus(e));
-    }
-    return resp;
-  }
-
-  @Override
-  public TExecuteStatementResp ExecuteStatement(TExecuteStatementReq req) throws TException {
-    TExecuteStatementResp resp = new TExecuteStatementResp();
-    try {
-      SessionHandle sessionHandle = new SessionHandle(req.getSessionHandle());
-      String statement = req.getStatement();
-      Map<String, String> confOverlay = req.getConfOverlay();
-      Boolean runAsync = req.isRunAsync();
-      OperationHandle operationHandle = runAsync ?
-          cliService.executeStatementAsync(sessionHandle, statement, confOverlay)
-          : cliService.executeStatement(sessionHandle, statement, confOverlay);
-          resp.setOperationHandle(operationHandle.toTOperationHandle());
-          resp.setStatus(OK_STATUS);
-    } catch (Exception e) {
-      LOG.warn("Error executing statement: ", e);
-      resp.setStatus(HiveSQLException.toTStatus(e));
-    }
-    return resp;
-  }
-
-  @Override
-  public TGetTypeInfoResp GetTypeInfo(TGetTypeInfoReq req) throws TException {
-    TGetTypeInfoResp resp = new TGetTypeInfoResp();
-    try {
-      OperationHandle operationHandle = cliService.getTypeInfo(new SessionHandle(req.getSessionHandle()));
-      resp.setOperationHandle(operationHandle.toTOperationHandle());
-      resp.setStatus(OK_STATUS);
-    } catch (Exception e) {
-      LOG.warn("Error getting type info: ", e);
-      resp.setStatus(HiveSQLException.toTStatus(e));
-    }
-    return resp;
-  }
-
-  @Override
-  public TGetCatalogsResp GetCatalogs(TGetCatalogsReq req) throws TException {
-    TGetCatalogsResp resp = new TGetCatalogsResp();
-    try {
-      OperationHandle opHandle = cliService.getCatalogs(new SessionHandle(req.getSessionHandle()));
-      resp.setOperationHandle(opHandle.toTOperationHandle());
-      resp.setStatus(OK_STATUS);
-    } catch (Exception e) {
-      LOG.warn("Error getting catalogs: ", e);
-      resp.setStatus(HiveSQLException.toTStatus(e));
-    }
-    return resp;
-  }
-
-  @Override
-  public TGetSchemasResp GetSchemas(TGetSchemasReq req) throws TException {
-    TGetSchemasResp resp = new TGetSchemasResp();
-    try {
-      OperationHandle opHandle = cliService.getSchemas(
-          new SessionHandle(req.getSessionHandle()), req.getCatalogName(), req.getSchemaName());
-      resp.setOperationHandle(opHandle.toTOperationHandle());
-      resp.setStatus(OK_STATUS);
-    } catch (Exception e) {
-      LOG.warn("Error getting schemas: ", e);
-      resp.setStatus(HiveSQLException.toTStatus(e));
-    }
-    return resp;
-  }
-
-  @Override
-  public TGetTablesResp GetTables(TGetTablesReq req) throws TException {
-    TGetTablesResp resp = new TGetTablesResp();
-    try {
-      OperationHandle opHandle = cliService
-          .getTables(new SessionHandle(req.getSessionHandle()), req.getCatalogName(),
-              req.getSchemaName(), req.getTableName(), req.getTableTypes());
-      resp.setOperationHandle(opHandle.toTOperationHandle());
-      resp.setStatus(OK_STATUS);
-    } catch (Exception e) {
-      LOG.warn("Error getting tables: ", e);
-      resp.setStatus(HiveSQLException.toTStatus(e));
-    }
-    return resp;
-  }
-
-  @Override
-  public TGetTableTypesResp GetTableTypes(TGetTableTypesReq req) throws TException {
-    TGetTableTypesResp resp = new TGetTableTypesResp();
-    try {
-      OperationHandle opHandle = cliService.getTableTypes(new SessionHandle(req.getSessionHandle()));
-      resp.setOperationHandle(opHandle.toTOperationHandle());
-      resp.setStatus(OK_STATUS);
-    } catch (Exception e) {
-      LOG.warn("Error getting table types: ", e);
-      resp.setStatus(HiveSQLException.toTStatus(e));
-    }
-    return resp;
-  }
-
-  @Override
-  public TGetColumnsResp GetColumns(TGetColumnsReq req) throws TException {
-    TGetColumnsResp resp = new TGetColumnsResp();
-    try {
-      OperationHandle opHandle = cliService.getColumns(
-          new SessionHandle(req.getSessionHandle()),
-          req.getCatalogName(),
-          req.getSchemaName(),
-          req.getTableName(),
-          req.getColumnName());
-      resp.setOperationHandle(opHandle.toTOperationHandle());
-      resp.setStatus(OK_STATUS);
-    } catch (Exception e) {
-      LOG.warn("Error getting columns: ", e);
-      resp.setStatus(HiveSQLException.toTStatus(e));
-    }
-    return resp;
-  }
-
-  @Override
-  public TGetFunctionsResp GetFunctions(TGetFunctionsReq req) throws TException {
-    TGetFunctionsResp resp = new TGetFunctionsResp();
-    try {
-      OperationHandle opHandle = cliService.getFunctions(
-          new SessionHandle(req.getSessionHandle()), req.getCatalogName(),
-          req.getSchemaName(), req.getFunctionName());
-      resp.setOperationHandle(opHandle.toTOperationHandle());
-      resp.setStatus(OK_STATUS);
-    } catch (Exception e) {
-      LOG.warn("Error getting functions: ", e);
-      resp.setStatus(HiveSQLException.toTStatus(e));
-    }
-    return resp;
-  }
-
-  @Override
-  public TGetOperationStatusResp GetOperationStatus(TGetOperationStatusReq req) throws TException {
-    TGetOperationStatusResp resp = new TGetOperationStatusResp();
-    try {
-      OperationStatus operationStatus = cliService.getOperationStatus(
-          new OperationHandle(req.getOperationHandle()));
-      resp.setOperationState(operationStatus.getState().toTOperationState());
-      HiveSQLException opException = operationStatus.getOperationException();
-      if (opException != null) {
-        resp.setSqlState(opException.getSQLState());
-        resp.setErrorCode(opException.getErrorCode());
-        resp.setErrorMessage(org.apache.hadoop.util.StringUtils
-            .stringifyException(opException));
-      }
-      resp.setStatus(OK_STATUS);
-    } catch (Exception e) {
-      LOG.warn("Error getting operation status: ", e);
-      resp.setStatus(HiveSQLException.toTStatus(e));
-    }
-    return resp;
-  }
-
-  @Override
-  public TCancelOperationResp CancelOperation(TCancelOperationReq req) throws TException {
-    TCancelOperationResp resp = new TCancelOperationResp();
-    try {
-      cliService.cancelOperation(new OperationHandle(req.getOperationHandle()));
-      resp.setStatus(OK_STATUS);
-    } catch (Exception e) {
-      LOG.warn("Error cancelling operation: ", e);
-      resp.setStatus(HiveSQLException.toTStatus(e));
-    }
-    return resp;
-  }
-
-  @Override
-  public TCloseOperationResp CloseOperation(TCloseOperationReq req) throws TException {
-    TCloseOperationResp resp = new TCloseOperationResp();
-    try {
-      cliService.closeOperation(new OperationHandle(req.getOperationHandle()));
-      resp.setStatus(OK_STATUS);
-    } catch (Exception e) {
-      LOG.warn("Error closing operation: ", e);
-      resp.setStatus(HiveSQLException.toTStatus(e));
-    }
-    return resp;
-  }
-
-  @Override
-  public TGetResultSetMetadataResp GetResultSetMetadata(TGetResultSetMetadataReq req)
-      throws TException {
-    TGetResultSetMetadataResp resp = new TGetResultSetMetadataResp();
-    try {
-      TableSchema schema = cliService.getResultSetMetadata(new OperationHandle(req.getOperationHandle()));
-      resp.setSchema(schema.toTTableSchema());
-      resp.setStatus(OK_STATUS);
-    } catch (Exception e) {
-      LOG.warn("Error getting result set metadata: ", e);
-      resp.setStatus(HiveSQLException.toTStatus(e));
-    }
-    return resp;
-  }
-
-  @Override
-  public TFetchResultsResp FetchResults(TFetchResultsReq req) throws TException {
-    TFetchResultsResp resp = new TFetchResultsResp();
-    try {
-      RowSet rowSet = cliService.fetchResults(
-          new OperationHandle(req.getOperationHandle()),
-          FetchOrientation.getFetchOrientation(req.getOrientation()),
-          req.getMaxRows(),
-          FetchType.getFetchType(req.getFetchType()));
-      resp.setResults(rowSet.toTRowSet());
-      resp.setHasMoreRows(false);
-      resp.setStatus(OK_STATUS);
-    } catch (Exception e) {
-      LOG.warn("Error fetching results: ", e);
-      resp.setStatus(HiveSQLException.toTStatus(e));
-    }
-    return resp;
-  }
-
-  protected abstract void initializeServer();
-
-  @Override
-  public abstract void run();
-
-  /**
-   * If the proxy user name is provided then check privileges to substitute the user.
-   * @param realUser
-   * @param sessionConf
-   * @param ipAddress
-   * @return
-   * @throws HiveSQLException
-   */
-  private String getProxyUser(String realUser, Map<String, String> sessionConf,
-      String ipAddress) throws HiveSQLException {
-    String proxyUser = null;
-    // Http transport mode.
-    // We set the thread local proxy username, in ThriftHttpServlet.
-    if (cliService.getHiveConf().getVar(
-        ConfVars.HIVE_SERVER2_TRANSPORT_MODE).equalsIgnoreCase("http")) {
-      proxyUser = SessionManager.getProxyUserName();
-      LOG.debug("Proxy user from query string: " + proxyUser);
-    }
-
-    if (proxyUser == null && sessionConf != null && sessionConf.containsKey(HiveAuthFactory.HS2_PROXY_USER)) {
-      String proxyUserFromThriftBody = sessionConf.get(HiveAuthFactory.HS2_PROXY_USER);
-      LOG.debug("Proxy user from thrift body: " + proxyUserFromThriftBody);
-      proxyUser = proxyUserFromThriftBody;
-    }
-
-    if (proxyUser == null) {
-      return realUser;
-    }
-
-    // check whether substitution is allowed
-    if (!hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SERVER2_ALLOW_USER_SUBSTITUTION)) {
-      throw new HiveSQLException("Proxy user substitution is not allowed");
-    }
-
-    // If there's no authentication, then directly substitute the user
-    if (HiveAuthFactory.AuthTypes.NONE.toString()
-        .equalsIgnoreCase(hiveConf.getVar(ConfVars.HIVE_SERVER2_AUTHENTICATION))) {
-      return proxyUser;
-    }
-
-    // Verify proxy user privilege of the realUser for the proxyUser
-    HiveAuthFactory.verifyProxyAccess(realUser, proxyUser, ipAddress, hiveConf);
-    LOG.debug("Verified proxy user: " + proxyUser);
-    return proxyUser;
-  }
-
-  private boolean isKerberosAuthMode() {
-    return cliService.getHiveConf().getVar(ConfVars.HIVE_SERVER2_AUTHENTICATION)
-        .equalsIgnoreCase(HiveAuthFactory.AuthTypes.KERBEROS.toString());
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java
deleted file mode 100644
index 1af45398b895c..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java
+++ /dev/null
@@ -1,440 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.thrift;
-
-import java.util.List;
-import java.util.Map;
-
-import org.apache.hive.service.auth.HiveAuthFactory;
-import org.apache.hive.service.cli.*;
-import org.apache.thrift.TException;
-
-/**
- * ThriftCLIServiceClient.
- *
- */
-public class ThriftCLIServiceClient extends CLIServiceClient {
-  private final TCLIService.Iface cliService;
-
-  public ThriftCLIServiceClient(TCLIService.Iface cliService) {
-    this.cliService = cliService;
-  }
-
-  public void checkStatus(TStatus status) throws HiveSQLException {
-    if (TStatusCode.ERROR_STATUS.equals(status.getStatusCode())) {
-      throw new HiveSQLException(status);
-    }
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#openSession(java.lang.String, java.lang.String, java.util.Map)
-   */
-  @Override
-  public SessionHandle openSession(String username, String password,
-      Map<String, String> configuration)
-          throws HiveSQLException {
-    try {
-      TOpenSessionReq req = new TOpenSessionReq();
-      req.setUsername(username);
-      req.setPassword(password);
-      req.setConfiguration(configuration);
-      TOpenSessionResp resp = cliService.OpenSession(req);
-      checkStatus(resp.getStatus());
-      return new SessionHandle(resp.getSessionHandle(), resp.getServerProtocolVersion());
-    } catch (HiveSQLException e) {
-      throw e;
-    } catch (Exception e) {
-      throw new HiveSQLException(e);
-    }
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#closeSession(org.apache.hive.service.cli.SessionHandle)
-   */
-  @Override
-  public SessionHandle openSessionWithImpersonation(String username, String password,
-      Map<String, String> configuration, String delegationToken) throws HiveSQLException {
-    throw new HiveSQLException("open with impersonation operation is not supported in the client");
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#closeSession(org.apache.hive.service.cli.SessionHandle)
-   */
-  @Override
-  public void closeSession(SessionHandle sessionHandle) throws HiveSQLException {
-    try {
-      TCloseSessionReq req = new TCloseSessionReq(sessionHandle.toTSessionHandle());
-      TCloseSessionResp resp = cliService.CloseSession(req);
-      checkStatus(resp.getStatus());
-    } catch (HiveSQLException e) {
-      throw e;
-    } catch (Exception e) {
-      throw new HiveSQLException(e);
-    }
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#getInfo(org.apache.hive.service.cli.SessionHandle, java.util.List)
-   */
-  @Override
-  public GetInfoValue getInfo(SessionHandle sessionHandle, GetInfoType infoType)
-      throws HiveSQLException {
-    try {
-      // FIXME extract the right info type
-      TGetInfoReq req = new TGetInfoReq(sessionHandle.toTSessionHandle(), infoType.toTGetInfoType());
-      TGetInfoResp resp = cliService.GetInfo(req);
-      checkStatus(resp.getStatus());
-      return new GetInfoValue(resp.getInfoValue());
-    } catch (HiveSQLException e) {
-      throw e;
-    } catch (Exception e) {
-      throw new HiveSQLException(e);
-    }
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#executeStatement(org.apache.hive.service.cli.SessionHandle, java.lang.String, java.util.Map)
-   */
-  @Override
-  public OperationHandle executeStatement(SessionHandle sessionHandle, String statement,
-      Map<String, String> confOverlay)
-          throws HiveSQLException {
-    return executeStatementInternal(sessionHandle, statement, confOverlay, false);
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#executeStatementAsync(org.apache.hive.service.cli.SessionHandle, java.lang.String, java.util.Map)
-   */
-  @Override
-  public OperationHandle executeStatementAsync(SessionHandle sessionHandle, String statement,
-      Map<String, String> confOverlay)
-          throws HiveSQLException {
-    return executeStatementInternal(sessionHandle, statement, confOverlay, true);
-  }
-
-  private OperationHandle executeStatementInternal(SessionHandle sessionHandle, String statement,
-      Map<String, String> confOverlay, boolean isAsync)
-          throws HiveSQLException {
-    try {
-      TExecuteStatementReq req =
-          new TExecuteStatementReq(sessionHandle.toTSessionHandle(), statement);
-      req.setConfOverlay(confOverlay);
-      req.setRunAsync(isAsync);
-      TExecuteStatementResp resp = cliService.ExecuteStatement(req);
-      checkStatus(resp.getStatus());
-      TProtocolVersion protocol = sessionHandle.getProtocolVersion();
-      return new OperationHandle(resp.getOperationHandle(), protocol);
-    } catch (HiveSQLException e) {
-      throw e;
-    } catch (Exception e) {
-      throw new HiveSQLException(e);
-    }
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#getTypeInfo(org.apache.hive.service.cli.SessionHandle)
-   */
-  @Override
-  public OperationHandle getTypeInfo(SessionHandle sessionHandle) throws HiveSQLException {
-    try {
-      TGetTypeInfoReq req = new TGetTypeInfoReq(sessionHandle.toTSessionHandle());
-      TGetTypeInfoResp resp = cliService.GetTypeInfo(req);
-      checkStatus(resp.getStatus());
-      TProtocolVersion protocol = sessionHandle.getProtocolVersion();
-      return new OperationHandle(resp.getOperationHandle(), protocol);
-    } catch (HiveSQLException e) {
-      throw e;
-    } catch (Exception e) {
-      throw new HiveSQLException(e);
-    }
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#getCatalogs(org.apache.hive.service.cli.SessionHandle)
-   */
-  @Override
-  public OperationHandle getCatalogs(SessionHandle sessionHandle) throws HiveSQLException {
-    try {
-      TGetCatalogsReq req = new TGetCatalogsReq(sessionHandle.toTSessionHandle());
-      TGetCatalogsResp resp = cliService.GetCatalogs(req);
-      checkStatus(resp.getStatus());
-      TProtocolVersion protocol = sessionHandle.getProtocolVersion();
-      return new OperationHandle(resp.getOperationHandle(), protocol);
-    } catch (HiveSQLException e) {
-      throw e;
-    } catch (Exception e) {
-      throw new HiveSQLException(e);
-    }
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#getSchemas(org.apache.hive.service.cli.SessionHandle, java.lang.String, java.lang.String)
-   */
-  @Override
-  public OperationHandle getSchemas(SessionHandle sessionHandle, String catalogName,
-      String schemaName)
-          throws HiveSQLException {
-    try {
-      TGetSchemasReq req = new TGetSchemasReq(sessionHandle.toTSessionHandle());
-      req.setCatalogName(catalogName);
-      req.setSchemaName(schemaName);
-      TGetSchemasResp resp = cliService.GetSchemas(req);
-      checkStatus(resp.getStatus());
-      TProtocolVersion protocol = sessionHandle.getProtocolVersion();
-      return new OperationHandle(resp.getOperationHandle(), protocol);
-    } catch (HiveSQLException e) {
-      throw e;
-    } catch (Exception e) {
-      throw new HiveSQLException(e);
-    }
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#getTables(org.apache.hive.service.cli.SessionHandle, java.lang.String, java.lang.String, java.lang.String, java.util.List)
-   */
-  @Override
-  public OperationHandle getTables(SessionHandle sessionHandle, String catalogName,
-      String schemaName, String tableName, List<String> tableTypes)
-          throws HiveSQLException {
-    try {
-      TGetTablesReq req = new TGetTablesReq(sessionHandle.toTSessionHandle());
-      req.setTableName(tableName);
-      req.setTableTypes(tableTypes);
-      req.setSchemaName(schemaName);
-      TGetTablesResp resp = cliService.GetTables(req);
-      checkStatus(resp.getStatus());
-      TProtocolVersion protocol = sessionHandle.getProtocolVersion();
-      return new OperationHandle(resp.getOperationHandle(), protocol);
-    } catch (HiveSQLException e) {
-      throw e;
-    } catch (Exception e) {
-      throw new HiveSQLException(e);
-    }
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#getTableTypes(org.apache.hive.service.cli.SessionHandle)
-   */
-  @Override
-  public OperationHandle getTableTypes(SessionHandle sessionHandle) throws HiveSQLException {
-    try {
-      TGetTableTypesReq req = new TGetTableTypesReq(sessionHandle.toTSessionHandle());
-      TGetTableTypesResp resp = cliService.GetTableTypes(req);
-      checkStatus(resp.getStatus());
-      TProtocolVersion protocol = sessionHandle.getProtocolVersion();
-      return new OperationHandle(resp.getOperationHandle(), protocol);
-    } catch (HiveSQLException e) {
-      throw e;
-    } catch (Exception e) {
-      throw new HiveSQLException(e);
-    }
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#getColumns(org.apache.hive.service.cli.SessionHandle)
-   */
-  @Override
-  public OperationHandle getColumns(SessionHandle sessionHandle,
-      String catalogName, String schemaName, String tableName, String columnName)
-          throws HiveSQLException {
-    try {
-      TGetColumnsReq req = new TGetColumnsReq();
-      req.setSessionHandle(sessionHandle.toTSessionHandle());
-      req.setCatalogName(catalogName);
-      req.setSchemaName(schemaName);
-      req.setTableName(tableName);
-      req.setColumnName(columnName);
-      TGetColumnsResp resp = cliService.GetColumns(req);
-      checkStatus(resp.getStatus());
-      TProtocolVersion protocol = sessionHandle.getProtocolVersion();
-      return new OperationHandle(resp.getOperationHandle(), protocol);
-    } catch (HiveSQLException e) {
-      throw e;
-    } catch (Exception e) {
-      throw new HiveSQLException(e);
-    }
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#getFunctions(org.apache.hive.service.cli.SessionHandle)
-   */
-  @Override
-  public OperationHandle getFunctions(SessionHandle sessionHandle,
-      String catalogName, String schemaName, String functionName) throws HiveSQLException {
-    try {
-      TGetFunctionsReq req = new TGetFunctionsReq(sessionHandle.toTSessionHandle(), functionName);
-      req.setCatalogName(catalogName);
-      req.setSchemaName(schemaName);
-      TGetFunctionsResp resp = cliService.GetFunctions(req);
-      checkStatus(resp.getStatus());
-      TProtocolVersion protocol = sessionHandle.getProtocolVersion();
-      return new OperationHandle(resp.getOperationHandle(), protocol);
-    } catch (HiveSQLException e) {
-      throw e;
-    } catch (Exception e) {
-      throw new HiveSQLException(e);
-    }
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#getOperationStatus(org.apache.hive.service.cli.OperationHandle)
-   */
-  @Override
-  public OperationStatus getOperationStatus(OperationHandle opHandle) throws HiveSQLException {
-    try {
-      TGetOperationStatusReq req = new TGetOperationStatusReq(opHandle.toTOperationHandle());
-      TGetOperationStatusResp resp = cliService.GetOperationStatus(req);
-      // Checks the status of the RPC call, throws an exception in case of error
-      checkStatus(resp.getStatus());
-      OperationState opState = OperationState.getOperationState(resp.getOperationState());
-      HiveSQLException opException = null;
-      if (opState == OperationState.ERROR) {
-        opException = new HiveSQLException(resp.getErrorMessage(), resp.getSqlState(), resp.getErrorCode());
-      }
-      return new OperationStatus(opState, opException);
-    } catch (HiveSQLException e) {
-      throw e;
-    } catch (Exception e) {
-      throw new HiveSQLException(e);
-    }
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#cancelOperation(org.apache.hive.service.cli.OperationHandle)
-   */
-  @Override
-  public void cancelOperation(OperationHandle opHandle) throws HiveSQLException {
-    try {
-      TCancelOperationReq req = new TCancelOperationReq(opHandle.toTOperationHandle());
-      TCancelOperationResp resp = cliService.CancelOperation(req);
-      checkStatus(resp.getStatus());
-    } catch (HiveSQLException e) {
-      throw e;
-    } catch (Exception e) {
-      throw new HiveSQLException(e);
-    }
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#closeOperation(org.apache.hive.service.cli.OperationHandle)
-   */
-  @Override
-  public void closeOperation(OperationHandle opHandle)
-      throws HiveSQLException {
-    try {
-      TCloseOperationReq req  = new TCloseOperationReq(opHandle.toTOperationHandle());
-      TCloseOperationResp resp = cliService.CloseOperation(req);
-      checkStatus(resp.getStatus());
-    } catch (HiveSQLException e) {
-      throw e;
-    } catch (Exception e) {
-      throw new HiveSQLException(e);
-    }
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#getResultSetMetadata(org.apache.hive.service.cli.OperationHandle)
-   */
-  @Override
-  public TableSchema getResultSetMetadata(OperationHandle opHandle)
-      throws HiveSQLException {
-    try {
-      TGetResultSetMetadataReq req = new TGetResultSetMetadataReq(opHandle.toTOperationHandle());
-      TGetResultSetMetadataResp resp = cliService.GetResultSetMetadata(req);
-      checkStatus(resp.getStatus());
-      return new TableSchema(resp.getSchema());
-    } catch (HiveSQLException e) {
-      throw e;
-    } catch (Exception e) {
-      throw new HiveSQLException(e);
-    }
-  }
-
-  @Override
-  public RowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation, long maxRows,
-      FetchType fetchType) throws HiveSQLException {
-    try {
-      TFetchResultsReq req = new TFetchResultsReq();
-      req.setOperationHandle(opHandle.toTOperationHandle());
-      req.setOrientation(orientation.toTFetchOrientation());
-      req.setMaxRows(maxRows);
-      req.setFetchType(fetchType.toTFetchType());
-      TFetchResultsResp resp = cliService.FetchResults(req);
-      checkStatus(resp.getStatus());
-      return RowSetFactory.create(resp.getResults(), opHandle.getProtocolVersion());
-    } catch (HiveSQLException e) {
-      throw e;
-    } catch (Exception e) {
-      throw new HiveSQLException(e);
-    }
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.ICLIService#fetchResults(org.apache.hive.service.cli.OperationHandle)
-   */
-  @Override
-  public RowSet fetchResults(OperationHandle opHandle) throws HiveSQLException {
-    // TODO: set the correct default fetch size
-    return fetchResults(opHandle, FetchOrientation.FETCH_NEXT, 10000, FetchType.QUERY_OUTPUT);
-  }
-
-  @Override
-  public String getDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
-      String owner, String renewer) throws HiveSQLException {
-    TGetDelegationTokenReq req = new TGetDelegationTokenReq(
-        sessionHandle.toTSessionHandle(), owner, renewer);
-    try {
-      TGetDelegationTokenResp tokenResp = cliService.GetDelegationToken(req);
-      checkStatus(tokenResp.getStatus());
-      return tokenResp.getDelegationToken();
-    } catch (Exception e) {
-      throw new HiveSQLException(e);
-    }
-  }
-
-  @Override
-  public void cancelDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
-      String tokenStr) throws HiveSQLException {
-    TCancelDelegationTokenReq cancelReq = new TCancelDelegationTokenReq(
-          sessionHandle.toTSessionHandle(), tokenStr);
-    try {
-      TCancelDelegationTokenResp cancelResp =
-        cliService.CancelDelegationToken(cancelReq);
-      checkStatus(cancelResp.getStatus());
-      return;
-    } catch (TException e) {
-      throw new HiveSQLException(e);
-    }
-  }
-
-  @Override
-  public void renewDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
-      String tokenStr) throws HiveSQLException {
-    TRenewDelegationTokenReq cancelReq = new TRenewDelegationTokenReq(
-        sessionHandle.toTSessionHandle(), tokenStr);
-    try {
-      TRenewDelegationTokenResp renewResp =
-        cliService.RenewDelegationToken(cancelReq);
-      checkStatus(renewResp.getStatus());
-      return;
-    } catch (Exception e) {
-      throw new HiveSQLException(e);
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
deleted file mode 100644
index bd64c777c1d76..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
+++ /dev/null
@@ -1,194 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.thrift;
-
-import java.util.Arrays;
-import java.util.concurrent.SynchronousQueue;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
-import org.apache.hadoop.hive.shims.ShimLoader;
-import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hadoop.util.Shell;
-import org.apache.hive.service.ServiceException;
-import org.apache.hive.service.auth.HiveAuthFactory;
-import org.apache.hive.service.cli.CLIService;
-import org.apache.hive.service.cli.thrift.TCLIService.Iface;
-import org.apache.hive.service.server.ThreadFactoryWithGarbageCleanup;
-import org.apache.thrift.TProcessor;
-import org.apache.thrift.protocol.TBinaryProtocol;
-import org.apache.thrift.protocol.TProtocolFactory;
-import org.apache.thrift.server.TServlet;
-import org.eclipse.jetty.server.AbstractConnectionFactory;
-import org.eclipse.jetty.server.ConnectionFactory;
-import org.eclipse.jetty.server.HttpConnectionFactory;
-import org.eclipse.jetty.server.ServerConnector;
-import org.eclipse.jetty.servlet.ServletContextHandler;
-import org.eclipse.jetty.servlet.ServletHolder;
-import org.eclipse.jetty.util.ssl.SslContextFactory;
-import org.eclipse.jetty.util.thread.ExecutorThreadPool;
-import org.eclipse.jetty.util.thread.ScheduledExecutorScheduler;
-
-
-public class ThriftHttpCLIService extends ThriftCLIService {
-
-  public ThriftHttpCLIService(CLIService cliService) {
-    super(cliService, ThriftHttpCLIService.class.getSimpleName());
-  }
-
-  @Override
-  protected void initializeServer() {
-    try {
-      // Server thread pool
-      // Start with minWorkerThreads, expand till maxWorkerThreads and reject subsequent requests
-      String threadPoolName = "HiveServer2-HttpHandler-Pool";
-      ThreadPoolExecutor executorService = new ThreadPoolExecutor(minWorkerThreads, maxWorkerThreads,
-          workerKeepAliveTime, TimeUnit.SECONDS, new SynchronousQueue<Runnable>(),
-          new ThreadFactoryWithGarbageCleanup(threadPoolName));
-      ExecutorThreadPool threadPool = new ExecutorThreadPool(executorService);
-
-      // HTTP Server
-      httpServer = new org.eclipse.jetty.server.Server(threadPool);
-
-      // Connector configs
-
-      ConnectionFactory[] connectionFactories;
-      boolean useSsl = hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_USE_SSL);
-      String schemeName = useSsl ? "https" : "http";
-      // Change connector if SSL is used
-      if (useSsl) {
-        String keyStorePath = hiveConf.getVar(ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PATH).trim();
-        String keyStorePassword = ShimLoader.getHadoopShims().getPassword(hiveConf,
-            HiveConf.ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PASSWORD.varname);
-        if (keyStorePath.isEmpty()) {
-          throw new IllegalArgumentException(ConfVars.HIVE_SERVER2_SSL_KEYSTORE_PATH.varname
-              + " Not configured for SSL connection");
-        }
-        SslContextFactory sslContextFactory = new SslContextFactory.Server();
-        String[] excludedProtocols = hiveConf.getVar(ConfVars.HIVE_SSL_PROTOCOL_BLACKLIST).split(",");
-        LOG.info("HTTP Server SSL: adding excluded protocols: " + Arrays.toString(excludedProtocols));
-        sslContextFactory.addExcludeProtocols(excludedProtocols);
-        LOG.info("HTTP Server SSL: SslContextFactory.getExcludeProtocols = " +
-          Arrays.toString(sslContextFactory.getExcludeProtocols()));
-        sslContextFactory.setKeyStorePath(keyStorePath);
-        sslContextFactory.setKeyStorePassword(keyStorePassword);
-        connectionFactories = AbstractConnectionFactory.getFactories(
-            sslContextFactory, new HttpConnectionFactory());
-      } else {
-        connectionFactories = new ConnectionFactory[] { new HttpConnectionFactory() };
-      }
-      ServerConnector connector = new ServerConnector(
-          httpServer,
-          null,
-          // Call this full constructor to set this, which forces daemon threads:
-          new ScheduledExecutorScheduler("HiveServer2-HttpHandler-JettyScheduler", true),
-          null,
-          -1,
-          -1,
-          connectionFactories);
-
-      connector.setPort(portNum);
-      // Linux:yes, Windows:no
-      connector.setReuseAddress(!Shell.WINDOWS);
-      int maxIdleTime = (int) hiveConf.getTimeVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_MAX_IDLE_TIME,
-          TimeUnit.MILLISECONDS);
-      connector.setIdleTimeout(maxIdleTime);
-
-      httpServer.addConnector(connector);
-
-      // Thrift configs
-      hiveAuthFactory = new HiveAuthFactory(hiveConf);
-      TProcessor processor = new TCLIService.Processor<Iface>(this);
-      TProtocolFactory protocolFactory = new TBinaryProtocol.Factory();
-      // Set during the init phase of HiveServer2 if auth mode is kerberos
-      // UGI for the hive/_HOST (kerberos) principal
-      UserGroupInformation serviceUGI = cliService.getServiceUGI();
-      // UGI for the http/_HOST (SPNego) principal
-      UserGroupInformation httpUGI = cliService.getHttpUGI();
-      String authType = hiveConf.getVar(ConfVars.HIVE_SERVER2_AUTHENTICATION);
-      TServlet thriftHttpServlet = new ThriftHttpServlet(processor, protocolFactory, authType,
-          serviceUGI, httpUGI);
-
-      // Context handler
-      final ServletContextHandler context = new ServletContextHandler(
-          ServletContextHandler.SESSIONS);
-      context.setContextPath("/");
-      String httpPath = getHttpPath(hiveConf
-          .getVar(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_HTTP_PATH));
-      httpServer.setHandler(context);
-      context.addServlet(new ServletHolder(thriftHttpServlet), httpPath);
-
-      // TODO: check defaults: maxTimeout, keepalive, maxBodySize, bodyRecieveDuration, etc.
-      // Finally, start the server
-      httpServer.start();
-      // In case HIVE_SERVER2_THRIFT_HTTP_PORT or hive.server2.thrift.http.port is configured with
-      // 0 which represents any free port, we should set it to the actual one
-      portNum = connector.getLocalPort();
-      String msg = "Started " + ThriftHttpCLIService.class.getSimpleName() + " in " + schemeName
-          + " mode on port " + connector.getLocalPort()+ " path=" + httpPath + " with " + minWorkerThreads + "..."
-          + maxWorkerThreads + " worker threads";
-      LOG.info(msg);
-    } catch (Exception t) {
-      throw new ServiceException("Error initializing " + getName(), t);
-    }
-  }
-
-  /**
-   * Configure Jetty to serve http requests. Example of a client connection URL:
-   * http://localhost:10000/servlets/thrifths2/ A gateway may cause actual target URL to differ,
-   * e.g. http://gateway:port/hive2/servlets/thrifths2/
-   */
-  @Override
-  public void run() {
-    try {
-      httpServer.join();
-    } catch (Throwable t) {
-      LOG.fatal(
-          "Error starting HiveServer2: could not start "
-              + ThriftHttpCLIService.class.getSimpleName(), t);
-      System.exit(-1);
-    }
-  }
-
-  /**
-   * The config parameter can be like "path", "/path", "/path/", "path/*", "/path1/path2/*" and so on.
-   * httpPath should end up as "/*", "/path/*" or "/path1/../pathN/*"
-   * @param httpPath
-   * @return
-   */
-  private String getHttpPath(String httpPath) {
-    if(httpPath == null || httpPath.equals("")) {
-      httpPath = "/*";
-    }
-    else {
-      if(!httpPath.startsWith("/")) {
-        httpPath = "/" + httpPath;
-      }
-      if(httpPath.endsWith("/")) {
-        httpPath = httpPath + "*";
-      }
-      if(!httpPath.endsWith("/*")) {
-        httpPath = httpPath + "/*";
-      }
-    }
-    return httpPath;
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
deleted file mode 100644
index e15d2d0566d2b..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
+++ /dev/null
@@ -1,545 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.thrift;
-
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.security.PrivilegedExceptionAction;
-import java.util.Map;
-import java.util.Random;
-import java.util.Set;
-import java.util.concurrent.TimeUnit;
-
-import javax.servlet.ServletException;
-import javax.servlet.http.Cookie;
-import javax.servlet.http.HttpServletRequest;
-import javax.servlet.http.HttpServletResponse;
-import javax.ws.rs.core.NewCookie;
-
-import org.apache.commons.codec.binary.Base64;
-import org.apache.commons.codec.binary.StringUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
-import org.apache.hadoop.hive.shims.HadoopShims.KerberosNameShim;
-import org.apache.hadoop.hive.shims.ShimLoader;
-import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hive.service.auth.AuthenticationProviderFactory;
-import org.apache.hive.service.auth.AuthenticationProviderFactory.AuthMethods;
-import org.apache.hive.service.auth.HiveAuthFactory;
-import org.apache.hive.service.auth.HttpAuthUtils;
-import org.apache.hive.service.auth.HttpAuthenticationException;
-import org.apache.hive.service.auth.PasswdAuthenticationProvider;
-import org.apache.hive.service.cli.session.SessionManager;
-import org.apache.hive.service.CookieSigner;
-import org.apache.thrift.TProcessor;
-import org.apache.thrift.protocol.TProtocolFactory;
-import org.apache.thrift.server.TServlet;
-import org.ietf.jgss.GSSContext;
-import org.ietf.jgss.GSSCredential;
-import org.ietf.jgss.GSSException;
-import org.ietf.jgss.GSSManager;
-import org.ietf.jgss.GSSName;
-import org.ietf.jgss.Oid;
-
-/**
- *
- * ThriftHttpServlet
- *
- */
-public class ThriftHttpServlet extends TServlet {
-
-  private static final long serialVersionUID = 1L;
-  public static final Log LOG = LogFactory.getLog(ThriftHttpServlet.class.getName());
-  private final String authType;
-  private final UserGroupInformation serviceUGI;
-  private final UserGroupInformation httpUGI;
-  private HiveConf hiveConf = new HiveConf();
-
-  // Class members for cookie based authentication.
-  private CookieSigner signer;
-  public static final String AUTH_COOKIE = "hive.server2.auth";
-  private static final Random RAN = new Random();
-  private boolean isCookieAuthEnabled;
-  private String cookieDomain;
-  private String cookiePath;
-  private int cookieMaxAge;
-  private boolean isCookieSecure;
-  private boolean isHttpOnlyCookie;
-
-  public ThriftHttpServlet(TProcessor processor, TProtocolFactory protocolFactory,
-      String authType, UserGroupInformation serviceUGI, UserGroupInformation httpUGI) {
-    super(processor, protocolFactory);
-    this.authType = authType;
-    this.serviceUGI = serviceUGI;
-    this.httpUGI = httpUGI;
-    this.isCookieAuthEnabled = hiveConf.getBoolVar(
-      ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_AUTH_ENABLED);
-    // Initialize the cookie based authentication related variables.
-    if (isCookieAuthEnabled) {
-      // Generate the signer with secret.
-      String secret = Long.toString(RAN.nextLong());
-      LOG.debug("Using the random number as the secret for cookie generation " + secret);
-      this.signer = new CookieSigner(secret.getBytes());
-      this.cookieMaxAge = (int) hiveConf.getTimeVar(
-        ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_MAX_AGE, TimeUnit.SECONDS);
-      this.cookieDomain = hiveConf.getVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_DOMAIN);
-      this.cookiePath = hiveConf.getVar(ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_PATH);
-      this.isCookieSecure = hiveConf.getBoolVar(
-        ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_IS_SECURE);
-      this.isHttpOnlyCookie = hiveConf.getBoolVar(
-        ConfVars.HIVE_SERVER2_THRIFT_HTTP_COOKIE_IS_HTTPONLY);
-    }
-  }
-
-  @Override
-  protected void doPost(HttpServletRequest request, HttpServletResponse response)
-      throws ServletException, IOException {
-    String clientUserName = null;
-    String clientIpAddress;
-    boolean requireNewCookie = false;
-
-    try {
-      // If the cookie based authentication is already enabled, parse the
-      // request and validate the request cookies.
-      if (isCookieAuthEnabled) {
-        clientUserName = validateCookie(request);
-        requireNewCookie = (clientUserName == null);
-        if (requireNewCookie) {
-          LOG.info("Could not validate cookie sent, will try to generate a new cookie");
-        }
-      }
-      // If the cookie based authentication is not enabled or the request does
-      // not have a valid cookie, use the kerberos or password based authentication
-      // depending on the server setup.
-      if (clientUserName == null) {
-        // For a kerberos setup
-        if (isKerberosAuthMode(authType)) {
-          clientUserName = doKerberosAuth(request);
-        }
-        // For password based authentication
-        else {
-          clientUserName = doPasswdAuth(request, authType);
-        }
-      }
-      LOG.debug("Client username: " + clientUserName);
-
-      // Set the thread local username to be used for doAs if true
-      SessionManager.setUserName(clientUserName);
-
-      // find proxy user if any from query param
-      String doAsQueryParam = getDoAsQueryParam(request.getQueryString());
-      if (doAsQueryParam != null) {
-        SessionManager.setProxyUserName(doAsQueryParam);
-      }
-
-      clientIpAddress = request.getRemoteAddr();
-      LOG.debug("Client IP Address: " + clientIpAddress);
-      // Set the thread local ip address
-      SessionManager.setIpAddress(clientIpAddress);
-      // Generate new cookie and add it to the response
-      if (requireNewCookie &&
-          !authType.equalsIgnoreCase(HiveAuthFactory.AuthTypes.NOSASL.toString())) {
-        String cookieToken = HttpAuthUtils.createCookieToken(clientUserName);
-        Cookie hs2Cookie = createCookie(signer.signCookie(cookieToken));
-
-        if (isHttpOnlyCookie) {
-          response.setHeader("SET-COOKIE", getHttpOnlyCookieHeader(hs2Cookie));
-        } else {
-          response.addCookie(hs2Cookie);
-        }
-        LOG.info("Cookie added for clientUserName " + clientUserName);
-      }
-      super.doPost(request, response);
-    }
-    catch (HttpAuthenticationException e) {
-      LOG.error("Error: ", e);
-      // Send a 401 to the client
-      response.setStatus(HttpServletResponse.SC_UNAUTHORIZED);
-      if(isKerberosAuthMode(authType)) {
-        response.addHeader(HttpAuthUtils.WWW_AUTHENTICATE, HttpAuthUtils.NEGOTIATE);
-      }
-      response.getWriter().println("Authentication Error: " + e.getMessage());
-    }
-    finally {
-      // Clear the thread locals
-      SessionManager.clearUserName();
-      SessionManager.clearIpAddress();
-      SessionManager.clearProxyUserName();
-    }
-  }
-
-  /**
-   * Retrieves the client name from cookieString. If the cookie does not
-   * correspond to a valid client, the function returns null.
-   * @param cookies HTTP Request cookies.
-   * @return Client Username if cookieString has a HS2 Generated cookie that is currently valid.
-   * Else, returns null.
-   */
-  private String getClientNameFromCookie(Cookie[] cookies) {
-    // Current Cookie Name, Current Cookie Value
-    String currName, currValue;
-
-    // Following is the main loop which iterates through all the cookies send by the client.
-    // The HS2 generated cookies are of the format hive.server2.auth=<value>
-    // A cookie which is identified as a hiveserver2 generated cookie is validated
-    // by calling signer.verifyAndExtract(). If the validation passes, send the
-    // username for which the cookie is validated to the caller. If no client side
-    // cookie passes the validation, return null to the caller.
-    for (Cookie currCookie : cookies) {
-      // Get the cookie name
-      currName = currCookie.getName();
-      if (!currName.equals(AUTH_COOKIE)) {
-        // Not a HS2 generated cookie, continue.
-        continue;
-      }
-      // If we reached here, we have match for HS2 generated cookie
-      currValue = currCookie.getValue();
-      // Validate the value.
-      currValue = signer.verifyAndExtract(currValue);
-      // Retrieve the user name, do the final validation step.
-      if (currValue != null) {
-        String userName = HttpAuthUtils.getUserNameFromCookieToken(currValue);
-
-        if (userName == null) {
-          LOG.warn("Invalid cookie token " + currValue);
-          continue;
-        }
-        //We have found a valid cookie in the client request.
-        if (LOG.isDebugEnabled()) {
-          LOG.debug("Validated the cookie for user " + userName);
-        }
-        return userName;
-      }
-    }
-    // No valid HS2 generated cookies found, return null
-    return null;
-  }
-
-  /**
-   * Convert cookie array to human readable cookie string
-   * @param cookies Cookie Array
-   * @return String containing all the cookies separated by a newline character.
-   * Each cookie is of the format [key]=[value]
-   */
-  private String toCookieStr(Cookie[] cookies) {
-    String cookieStr = "";
-
-    for (Cookie c : cookies) {
-     cookieStr += c.getName() + "=" + c.getValue() + " ;\n";
-    }
-    return cookieStr;
-  }
-
-  /**
-   * Validate the request cookie. This function iterates over the request cookie headers
-   * and finds a cookie that represents a valid client/server session. If it finds one, it
-   * returns the client name associated with the session. Else, it returns null.
-   * @param request The HTTP Servlet Request send by the client
-   * @return Client Username if the request has valid HS2 cookie, else returns null
-   * @throws UnsupportedEncodingException
-   */
-  private String validateCookie(HttpServletRequest request) throws UnsupportedEncodingException {
-    // Find all the valid cookies associated with the request.
-    Cookie[] cookies = request.getCookies();
-
-    if (cookies == null) {
-      if (LOG.isDebugEnabled()) {
-        LOG.debug("No valid cookies associated with the request " + request);
-      }
-      return null;
-    }
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("Received cookies: " + toCookieStr(cookies));
-    }
-    return getClientNameFromCookie(cookies);
-  }
-
-  /**
-   * Generate a server side cookie given the cookie value as the input.
-   * @param str Input string token.
-   * @return The generated cookie.
-   * @throws UnsupportedEncodingException
-   */
-  private Cookie createCookie(String str) throws UnsupportedEncodingException {
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("Cookie name = " + AUTH_COOKIE + " value = " + str);
-    }
-    Cookie cookie = new Cookie(AUTH_COOKIE, str);
-
-    cookie.setMaxAge(cookieMaxAge);
-    if (cookieDomain != null) {
-      cookie.setDomain(cookieDomain);
-    }
-    if (cookiePath != null) {
-      cookie.setPath(cookiePath);
-    }
-    cookie.setSecure(isCookieSecure);
-    return cookie;
-  }
-
-  /**
-   * Generate httponly cookie from HS2 cookie
-   * @param cookie HS2 generated cookie
-   * @return The httponly cookie
-   */
-  private static String getHttpOnlyCookieHeader(Cookie cookie) {
-    NewCookie newCookie = new NewCookie(cookie.getName(), cookie.getValue(),
-      cookie.getPath(), cookie.getDomain(), cookie.getVersion(),
-      cookie.getComment(), cookie.getMaxAge(), cookie.getSecure());
-    return newCookie + "; HttpOnly";
-  }
-
-  /**
-   * Do the LDAP/PAM authentication
-   * @param request
-   * @param authType
-   * @throws HttpAuthenticationException
-   */
-  private String doPasswdAuth(HttpServletRequest request, String authType)
-      throws HttpAuthenticationException {
-    String userName = getUsername(request, authType);
-    // No-op when authType is NOSASL
-    if (!authType.equalsIgnoreCase(HiveAuthFactory.AuthTypes.NOSASL.toString())) {
-      try {
-        AuthMethods authMethod = AuthMethods.getValidAuthMethod(authType);
-        PasswdAuthenticationProvider provider =
-            AuthenticationProviderFactory.getAuthenticationProvider(authMethod);
-        provider.Authenticate(userName, getPassword(request, authType));
-
-      } catch (Exception e) {
-        throw new HttpAuthenticationException(e);
-      }
-    }
-    return userName;
-  }
-
-  /**
-   * Do the GSS-API kerberos authentication.
-   * We already have a logged in subject in the form of serviceUGI,
-   * which GSS-API will extract information from.
-   * In case of a SPNego request we use the httpUGI,
-   * for the authenticating service tickets.
-   * @param request
-   * @return
-   * @throws HttpAuthenticationException
-   */
-  private String doKerberosAuth(HttpServletRequest request)
-      throws HttpAuthenticationException {
-    // Try authenticating with the http/_HOST principal
-    if (httpUGI != null) {
-      try {
-        return httpUGI.doAs(new HttpKerberosServerAction(request, httpUGI));
-      } catch (Exception e) {
-        LOG.info("Failed to authenticate with http/_HOST kerberos principal, " +
-            "trying with hive/_HOST kerberos principal");
-      }
-    }
-    // Now try with hive/_HOST principal
-    try {
-      return serviceUGI.doAs(new HttpKerberosServerAction(request, serviceUGI));
-    } catch (Exception e) {
-      LOG.error("Failed to authenticate with hive/_HOST kerberos principal");
-      throw new HttpAuthenticationException(e);
-    }
-
-  }
-
-  class HttpKerberosServerAction implements PrivilegedExceptionAction<String> {
-    HttpServletRequest request;
-    UserGroupInformation serviceUGI;
-
-    HttpKerberosServerAction(HttpServletRequest request,
-        UserGroupInformation serviceUGI) {
-      this.request = request;
-      this.serviceUGI = serviceUGI;
-    }
-
-    @Override
-    public String run() throws HttpAuthenticationException {
-      // Get own Kerberos credentials for accepting connection
-      GSSManager manager = GSSManager.getInstance();
-      GSSContext gssContext = null;
-      String serverPrincipal = getPrincipalWithoutRealm(
-          serviceUGI.getUserName());
-      try {
-        // This Oid for Kerberos GSS-API mechanism.
-        Oid kerberosMechOid = new Oid("1.2.840.113554.1.2.2");
-        // Oid for SPNego GSS-API mechanism.
-        Oid spnegoMechOid = new Oid("1.3.6.1.5.5.2");
-        // Oid for kerberos principal name
-        Oid krb5PrincipalOid = new Oid("1.2.840.113554.1.2.2.1");
-
-        // GSS name for server
-        GSSName serverName = manager.createName(serverPrincipal, krb5PrincipalOid);
-
-        // GSS credentials for server
-        GSSCredential serverCreds = manager.createCredential(serverName,
-            GSSCredential.DEFAULT_LIFETIME,
-            new Oid[]{kerberosMechOid, spnegoMechOid},
-            GSSCredential.ACCEPT_ONLY);
-
-        // Create a GSS context
-        gssContext = manager.createContext(serverCreds);
-        // Get service ticket from the authorization header
-        String serviceTicketBase64 = getAuthHeader(request, authType);
-        byte[] inToken = Base64.decodeBase64(serviceTicketBase64.getBytes());
-        gssContext.acceptSecContext(inToken, 0, inToken.length);
-        // Authenticate or deny based on its context completion
-        if (!gssContext.isEstablished()) {
-          throw new HttpAuthenticationException("Kerberos authentication failed: " +
-              "unable to establish context with the service ticket " +
-              "provided by the client.");
-        }
-        else {
-          return getPrincipalWithoutRealmAndHost(gssContext.getSrcName().toString());
-        }
-      }
-      catch (GSSException e) {
-        throw new HttpAuthenticationException("Kerberos authentication failed: ", e);
-      }
-      finally {
-        if (gssContext != null) {
-          try {
-            gssContext.dispose();
-          } catch (GSSException e) {
-            // No-op
-          }
-        }
-      }
-    }
-
-    private String getPrincipalWithoutRealm(String fullPrincipal)
-        throws HttpAuthenticationException {
-      KerberosNameShim fullKerberosName;
-      try {
-        fullKerberosName = ShimLoader.getHadoopShims().getKerberosNameShim(fullPrincipal);
-      } catch (IOException e) {
-        throw new HttpAuthenticationException(e);
-      }
-      String serviceName = fullKerberosName.getServiceName();
-      String hostName = fullKerberosName.getHostName();
-      String principalWithoutRealm = serviceName;
-      if (hostName != null) {
-        principalWithoutRealm = serviceName + "/" + hostName;
-      }
-      return principalWithoutRealm;
-    }
-
-    private String getPrincipalWithoutRealmAndHost(String fullPrincipal)
-        throws HttpAuthenticationException {
-      KerberosNameShim fullKerberosName;
-      try {
-        fullKerberosName = ShimLoader.getHadoopShims().getKerberosNameShim(fullPrincipal);
-        return fullKerberosName.getShortName();
-      } catch (IOException e) {
-        throw new HttpAuthenticationException(e);
-      }
-    }
-  }
-
-  private String getUsername(HttpServletRequest request, String authType)
-      throws HttpAuthenticationException {
-    String[] creds = getAuthHeaderTokens(request, authType);
-    // Username must be present
-    if (creds[0] == null || creds[0].isEmpty()) {
-      throw new HttpAuthenticationException("Authorization header received " +
-          "from the client does not contain username.");
-    }
-    return creds[0];
-  }
-
-  private String getPassword(HttpServletRequest request, String authType)
-      throws HttpAuthenticationException {
-    String[] creds = getAuthHeaderTokens(request, authType);
-    // Password must be present
-    if (creds[1] == null || creds[1].isEmpty()) {
-      throw new HttpAuthenticationException("Authorization header received " +
-          "from the client does not contain username.");
-    }
-    return creds[1];
-  }
-
-  private String[] getAuthHeaderTokens(HttpServletRequest request,
-      String authType) throws HttpAuthenticationException {
-    String authHeaderBase64 = getAuthHeader(request, authType);
-    String authHeaderString = StringUtils.newStringUtf8(
-        Base64.decodeBase64(authHeaderBase64.getBytes()));
-    String[] creds = authHeaderString.split(":");
-    return creds;
-  }
-
-  /**
-   * Returns the base64 encoded auth header payload
-   * @param request
-   * @param authType
-   * @return
-   * @throws HttpAuthenticationException
-   */
-  private String getAuthHeader(HttpServletRequest request, String authType)
-      throws HttpAuthenticationException {
-    String authHeader = request.getHeader(HttpAuthUtils.AUTHORIZATION);
-    // Each http request must have an Authorization header
-    if (authHeader == null || authHeader.isEmpty()) {
-      throw new HttpAuthenticationException("Authorization header received " +
-          "from the client is empty.");
-    }
-
-    String authHeaderBase64String;
-    int beginIndex;
-    if (isKerberosAuthMode(authType)) {
-      beginIndex = (HttpAuthUtils.NEGOTIATE + " ").length();
-    }
-    else {
-      beginIndex = (HttpAuthUtils.BASIC + " ").length();
-    }
-    authHeaderBase64String = authHeader.substring(beginIndex);
-    // Authorization header must have a payload
-    if (authHeaderBase64String == null || authHeaderBase64String.isEmpty()) {
-      throw new HttpAuthenticationException("Authorization header received " +
-          "from the client does not contain any data.");
-    }
-    return authHeaderBase64String;
-  }
-
-  private boolean isKerberosAuthMode(String authType) {
-    return authType.equalsIgnoreCase(HiveAuthFactory.AuthTypes.KERBEROS.toString());
-  }
-
-  private static String getDoAsQueryParam(String queryString) {
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("URL query string:" + queryString);
-    }
-    if (queryString == null) {
-      return null;
-    }
-    Map<String, String[]> params = javax.servlet.http.HttpUtils.parseQueryString( queryString );
-    Set<String> keySet = params.keySet();
-    for (String key: keySet) {
-      if (key.equalsIgnoreCase("doAs")) {
-        return params.get(key)[0];
-      }
-    }
-    return null;
-  }
-
-}
-
-
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/server/HiveServer2.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/server/HiveServer2.java
deleted file mode 100644
index 95233996cbbcb..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/server/HiveServer2.java
+++ /dev/null
@@ -1,277 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.server;
-
-import java.util.Properties;
-
-import scala.runtime.AbstractFunction0;
-import scala.runtime.BoxedUnit;
-
-import org.apache.commons.cli.GnuParser;
-import org.apache.commons.cli.HelpFormatter;
-import org.apache.commons.cli.Option;
-import org.apache.commons.cli.OptionBuilder;
-import org.apache.commons.cli.Options;
-import org.apache.commons.cli.ParseException;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.shims.ShimLoader;
-import org.apache.hive.common.util.HiveStringUtils;
-import org.apache.hive.service.CompositeService;
-import org.apache.hive.service.cli.CLIService;
-import org.apache.hive.service.cli.thrift.ThriftBinaryCLIService;
-import org.apache.hive.service.cli.thrift.ThriftCLIService;
-import org.apache.hive.service.cli.thrift.ThriftHttpCLIService;
-
-import org.apache.spark.util.ShutdownHookManager;
-
-/**
- * HiveServer2.
- *
- */
-public class HiveServer2 extends CompositeService {
-  private static final Log LOG = LogFactory.getLog(HiveServer2.class);
-
-  private CLIService cliService;
-  private ThriftCLIService thriftCLIService;
-
-  public HiveServer2() {
-    super(HiveServer2.class.getSimpleName());
-    HiveConf.setLoadHiveServer2Config(true);
-  }
-
-  @Override
-  public synchronized void init(HiveConf hiveConf) {
-    cliService = new CLIService(this);
-    addService(cliService);
-    if (isHTTPTransportMode(hiveConf)) {
-      thriftCLIService = new ThriftHttpCLIService(cliService);
-    } else {
-      thriftCLIService = new ThriftBinaryCLIService(cliService);
-    }
-    addService(thriftCLIService);
-    super.init(hiveConf);
-
-    // Add a shutdown hook for catching SIGTERM & SIGINT
-    // this must be higher than the Hadoop Filesystem priority of 10,
-    // which the default priority is.
-    // The signature of the callback must match that of a scala () -> Unit
-    // function
-    ShutdownHookManager.addShutdownHook(
-        new AbstractFunction0<BoxedUnit>() {
-          public BoxedUnit apply() {
-            try {
-              LOG.info("Hive Server Shutdown hook invoked");
-              stop();
-            } catch (Throwable e) {
-              LOG.warn("Ignoring Exception while stopping Hive Server from shutdown hook",
-                  e);
-            }
-            return BoxedUnit.UNIT;
-          }
-        });
-  }
-
-  public static boolean isHTTPTransportMode(HiveConf hiveConf) {
-    String transportMode = System.getenv("HIVE_SERVER2_TRANSPORT_MODE");
-    if (transportMode == null) {
-      transportMode = hiveConf.getVar(HiveConf.ConfVars.HIVE_SERVER2_TRANSPORT_MODE);
-    }
-    if (transportMode != null && (transportMode.equalsIgnoreCase("http"))) {
-      return true;
-    }
-    return false;
-  }
-
-  @Override
-  public synchronized void start() {
-    super.start();
-  }
-
-  @Override
-  public synchronized void stop() {
-    LOG.info("Shutting down HiveServer2");
-    super.stop();
-  }
-
-  private static void startHiveServer2() throws Throwable {
-    long attempts = 0, maxAttempts = 1;
-    while (true) {
-      LOG.info("Starting HiveServer2");
-      HiveConf hiveConf = new HiveConf();
-      maxAttempts = hiveConf.getLongVar(HiveConf.ConfVars.HIVE_SERVER2_MAX_START_ATTEMPTS);
-      HiveServer2 server = null;
-      try {
-        server = new HiveServer2();
-        server.init(hiveConf);
-        server.start();
-        ShimLoader.getHadoopShims().startPauseMonitor(hiveConf);
-        break;
-      } catch (Throwable throwable) {
-        if (server != null) {
-          try {
-            server.stop();
-          } catch (Throwable t) {
-            LOG.info("Exception caught when calling stop of HiveServer2 before retrying start", t);
-          } finally {
-            server = null;
-          }
-        }
-        if (++attempts >= maxAttempts) {
-          throw new Error("Max start attempts " + maxAttempts + " exhausted", throwable);
-        } else {
-          LOG.warn("Error starting HiveServer2 on attempt " + attempts
-              + ", will retry in 60 seconds", throwable);
-          try {
-            Thread.sleep(60L * 1000L);
-          } catch (InterruptedException e) {
-            Thread.currentThread().interrupt();
-          }
-        }
-      }
-    }
-  }
-
-  public static void main(String[] args) {
-    HiveConf.setLoadHiveServer2Config(true);
-    ServerOptionsProcessor oproc = new ServerOptionsProcessor("hiveserver2");
-    ServerOptionsProcessorResponse oprocResponse = oproc.parse(args);
-
-    HiveStringUtils.startupShutdownMessage(HiveServer2.class, args, LOG);
-
-    // Call the executor which will execute the appropriate command based on the parsed options
-    oprocResponse.getServerOptionsExecutor().execute();
-  }
-
-  /**
-   * ServerOptionsProcessor.
-   * Process arguments given to HiveServer2 (-hiveconf property=value)
-   * Set properties in System properties
-   * Create an appropriate response object,
-   * which has executor to execute the appropriate command based on the parsed options.
-   */
-  public static class ServerOptionsProcessor {
-    private final Options options = new Options();
-    private org.apache.commons.cli.CommandLine commandLine;
-    private final String serverName;
-    private final StringBuilder debugMessage = new StringBuilder();
-
-    @SuppressWarnings("static-access")
-    public ServerOptionsProcessor(String serverName) {
-      this.serverName = serverName;
-      // -hiveconf x=y
-      options.addOption(OptionBuilder
-          .withValueSeparator()
-          .hasArgs(2)
-          .withArgName("property=value")
-          .withLongOpt("hiveconf")
-          .withDescription("Use value for given property")
-          .create());
-      options.addOption(new Option("H", "help", false, "Print help information"));
-    }
-
-    public ServerOptionsProcessorResponse parse(String[] argv) {
-      try {
-        commandLine = new GnuParser().parse(options, argv);
-        // Process --hiveconf
-        // Get hiveconf param values and set the System property values
-        Properties confProps = commandLine.getOptionProperties("hiveconf");
-        for (String propKey : confProps.stringPropertyNames()) {
-          // save logging message for log4j output latter after log4j initialize properly
-          debugMessage.append("Setting " + propKey + "=" + confProps.getProperty(propKey) + ";\n");
-          System.setProperty(propKey, confProps.getProperty(propKey));
-        }
-
-        // Process --help
-        if (commandLine.hasOption('H')) {
-          return new ServerOptionsProcessorResponse(new HelpOptionExecutor(serverName, options));
-        }
-      } catch (ParseException e) {
-        // Error out & exit - we were not able to parse the args successfully
-        System.err.println("Error starting HiveServer2 with given arguments: ");
-        System.err.println(e.getMessage());
-        System.exit(-1);
-      }
-      // Default executor, when no option is specified
-      return new ServerOptionsProcessorResponse(new StartOptionExecutor());
-    }
-
-    StringBuilder getDebugMessage() {
-      return debugMessage;
-    }
-  }
-
-  /**
-   * The response sent back from {@link ServerOptionsProcessor#parse(String[])}
-   */
-  static class ServerOptionsProcessorResponse {
-    private final ServerOptionsExecutor serverOptionsExecutor;
-
-    ServerOptionsProcessorResponse(ServerOptionsExecutor serverOptionsExecutor) {
-      this.serverOptionsExecutor = serverOptionsExecutor;
-    }
-
-    ServerOptionsExecutor getServerOptionsExecutor() {
-      return serverOptionsExecutor;
-    }
-  }
-
-  /**
-   * The executor interface for running the appropriate HiveServer2 command based on parsed options
-   */
-  interface ServerOptionsExecutor {
-    void execute();
-  }
-
-  /**
-   * HelpOptionExecutor: executes the --help option by printing out the usage
-   */
-  static class HelpOptionExecutor implements ServerOptionsExecutor {
-    private final Options options;
-    private final String serverName;
-
-    HelpOptionExecutor(String serverName, Options options) {
-      this.options = options;
-      this.serverName = serverName;
-    }
-
-    @Override
-    public void execute() {
-      new HelpFormatter().printHelp(serverName, options);
-      System.exit(0);
-    }
-  }
-
-  /**
-   * StartOptionExecutor: starts HiveServer2.
-   * This is the default executor, when no option is specified.
-   */
-  static class StartOptionExecutor implements ServerOptionsExecutor {
-    @Override
-    public void execute() {
-      try {
-        startHiveServer2();
-      } catch (Throwable t) {
-        LOG.fatal("Error starting HiveServer2", t);
-        System.exit(-1);
-      }
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java b/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java
deleted file mode 100644
index 8ee98103f7ef7..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.hive.service.server;
-
-import java.util.Map;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.metastore.HiveMetaStore;
-import org.apache.hadoop.hive.metastore.RawStore;
-
-/**
- * A HiveServer2 thread used to construct new server threads.
- * In particular, this thread ensures an orderly cleanup,
- * when killed by its corresponding ExecutorService.
- */
-public class ThreadWithGarbageCleanup extends Thread {
-  private static final Log LOG = LogFactory.getLog(ThreadWithGarbageCleanup.class);
-
-  Map<Long, RawStore> threadRawStoreMap =
-      ThreadFactoryWithGarbageCleanup.getThreadRawStoreMap();
-
-  public ThreadWithGarbageCleanup(Runnable runnable) {
-    super(runnable);
-  }
-
-  /**
-   * Add any Thread specific garbage cleanup code here.
-   * Currently, it shuts down the RawStore object for this thread if it is not null.
-   */
-  @Override
-  public void finalize() throws Throwable {
-    cleanRawStore();
-    super.finalize();
-  }
-
-  private void cleanRawStore() {
-    Long threadId = this.getId();
-    RawStore threadLocalRawStore = threadRawStoreMap.get(threadId);
-    if (threadLocalRawStore != null) {
-      LOG.debug("RawStore: " + threadLocalRawStore + ", for the thread: " +
-          this.getName()  +  " will be closed now.");
-      threadLocalRawStore.shutdown();
-      threadRawStoreMap.remove(threadId);
-    }
-  }
-
-  /**
-   * Cache the ThreadLocal RawStore object. Called from the corresponding thread.
-   */
-  public void cacheThreadLocalRawStore() {
-    Long threadId = this.getId();
-    RawStore threadLocalRawStore = HiveMetaStore.HMSHandler.getRawStore();
-    if (threadLocalRawStore != null && !threadRawStoreMap.containsKey(threadId)) {
-      LOG.debug("Adding RawStore: " + threadLocalRawStore + ", for the thread: " +
-          this.getName() + " to threadRawStoreMap for future cleanup.");
-      threadRawStoreMap.put(threadId, threadLocalRawStore);
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/v1.2/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala b/sql/hive-thriftserver/v1.2/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala
deleted file mode 100644
index 9a28dd6a31e6e..0000000000000
--- a/sql/hive-thriftserver/v1.2/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.hive.thriftserver
-
-import org.apache.commons.logging.LogFactory
-import org.apache.hadoop.hive.ql.session.SessionState
-import org.apache.hive.service.cli.{RowSet, RowSetFactory, TableSchema, Type}
-import org.apache.hive.service.cli.Type._
-import org.apache.hive.service.cli.thrift.TProtocolVersion._
-
-/**
- * Various utilities for hive-thriftserver used to upgrade the built-in Hive.
- */
-private[thriftserver] object ThriftserverShimUtils {
-
-  private[thriftserver] object TOperationType {
-    val GET_TYPE_INFO = org.apache.hive.service.cli.thrift.TOperationType.GET_TYPE_INFO
-  }
-
-  private[thriftserver] type TProtocolVersion = org.apache.hive.service.cli.thrift.TProtocolVersion
-  private[thriftserver] type Client = org.apache.hive.service.cli.thrift.TCLIService.Client
-  private[thriftserver] type TOpenSessionReq = org.apache.hive.service.cli.thrift.TOpenSessionReq
-  private[thriftserver] type TGetSchemasReq = org.apache.hive.service.cli.thrift.TGetSchemasReq
-  private[thriftserver] type TGetTablesReq = org.apache.hive.service.cli.thrift.TGetTablesReq
-  private[thriftserver] type TGetColumnsReq = org.apache.hive.service.cli.thrift.TGetColumnsReq
-  private[thriftserver] type TGetInfoReq = org.apache.hive.service.cli.thrift.TGetInfoReq
-  private[thriftserver] type TExecuteStatementReq =
-    org.apache.hive.service.cli.thrift.TExecuteStatementReq
-  private[thriftserver] type THandleIdentifier =
-    org.apache.hive.service.cli.thrift.THandleIdentifier
-  private[thriftserver] type TOperationType = org.apache.hive.service.cli.thrift.TOperationType
-  private[thriftserver] type TOperationHandle = org.apache.hive.service.cli.thrift.TOperationHandle
-
-  private[thriftserver] def getConsole: SessionState.LogHelper = {
-    val LOG = LogFactory.getLog(classOf[SparkSQLCLIDriver])
-    new SessionState.LogHelper(LOG)
-  }
-
-  private[thriftserver] def resultRowSet(
-      getResultSetSchema: TableSchema,
-      getProtocolVersion: TProtocolVersion): RowSet = {
-    RowSetFactory.create(getResultSetSchema, getProtocolVersion)
-  }
-
-  private[thriftserver] def supportedType(): Seq[Type] = {
-    Seq(NULL_TYPE, BOOLEAN_TYPE, STRING_TYPE, BINARY_TYPE,
-      TINYINT_TYPE, SMALLINT_TYPE, INT_TYPE, BIGINT_TYPE,
-      FLOAT_TYPE, DOUBLE_TYPE, DECIMAL_TYPE,
-      DATE_TYPE, TIMESTAMP_TYPE,
-      ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE)
-  }
-
-  private[thriftserver] val testedProtocolVersions = Seq(
-    HIVE_CLI_SERVICE_PROTOCOL_V1,
-    HIVE_CLI_SERVICE_PROTOCOL_V2,
-    HIVE_CLI_SERVICE_PROTOCOL_V3,
-    HIVE_CLI_SERVICE_PROTOCOL_V4,
-    HIVE_CLI_SERVICE_PROTOCOL_V5,
-    HIVE_CLI_SERVICE_PROTOCOL_V6,
-    HIVE_CLI_SERVICE_PROTOCOL_V7,
-    HIVE_CLI_SERVICE_PROTOCOL_V8)
-}
diff --git a/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-hive1.2-results.txt b/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-hive1.2-results.txt
deleted file mode 100644
index 85884a1aaf739..0000000000000
--- a/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-hive1.2-results.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_251-b08 on Mac OS X 10.15.4
-Intel(R) Core(TM) i9-9980HK CPU @ 2.40GHz
-insert hive table benchmark:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-INSERT INTO DYNAMIC                                6812           7043         328          0.0      665204.8       1.0X
-INSERT INTO HYBRID                                  817            852          32          0.0       79783.6       8.3X
-INSERT INTO STATIC                                  231            246          21          0.0       22568.2      29.5X
-INSERT OVERWRITE DYNAMIC                          25947          26671        1024          0.0     2533910.2       0.3X
-INSERT OVERWRITE HYBRID                            2846           2884          54          0.0      277908.7       2.4X
-INSERT OVERWRITE STATIC                             232            247          26          0.0       22659.9      29.4X
-
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 805bcb2bc3a60..1611a3da8a3da 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -296,8 +296,7 @@ private[hive] class HiveClientImpl(
       case e: NoClassDefFoundError
         if HiveUtils.isHive23 && e.getMessage.contains("org/apache/hadoop/hive/serde2/SerDe") =>
         throw new ClassNotFoundException("The SerDe interface removed since Hive 2.3(HIVE-15167)." +
-          " Please migrate your custom SerDes to Hive 2.3 or build your own Spark with" +
-          " hive-1.2 profile. See HIVE-15167 for more details.", e)
+          " Please migrate your custom SerDes to Hive 2.3. See HIVE-15167 for more details.", e)
     } finally {
       state.getConf.setClassLoader(originalConfLoader)
       Thread.currentThread().setContextClassLoader(original)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertIntoHiveTableBenchmark.scala b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertIntoHiveTableBenchmark.scala
index 81eb5e2591f13..da34c54cb36a2 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertIntoHiveTableBenchmark.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertIntoHiveTableBenchmark.scala
@@ -28,14 +28,11 @@ import org.apache.spark.sql.hive.test.TestHive
  * {{{
  *   1. without sbt: bin/spark-submit --class <this class>
  *        --jars <spark catalyst test jar>,<spark core test jar>,<spark hive jar>
- *        --packages org.spark-project.hive:hive-exec:1.2.1.spark2
  *        <spark hive test jar>
- *   2. build/sbt "hive/test:runMain <this class>" -Phive-1.2 or
- *      build/sbt "hive/test:runMain <this class>" -Phive-2.3
+ *   2. build/sbt "hive/test:runMain <this class>"
  *   3. generate result:
  *   SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "hive/test:runMain <this class>"
  *      Results will be written to "benchmarks/InsertIntoHiveTableBenchmark-hive2.3-results.txt".
- *   4. -Phive-1.2 does not work for JDK 11
  * }}}
  */
 object InsertIntoHiveTableBenchmark extends SqlBasedBenchmark {
@@ -136,5 +133,5 @@ object InsertIntoHiveTableBenchmark extends SqlBasedBenchmark {
     }
   }
 
-  override def suffix: String = if (HiveUtils.isHive23) "-hive2.3" else "-hive1.2"
+  override def suffix: String = "-hive2.3"
 }

From a0aa8f33a9420feb9228b51a3dfad2e7e86d65a5 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Tue, 6 Oct 2020 09:09:58 +0900
Subject: [PATCH 0168/1009] [SPARK-33069][INFRA] Skip test result report if no
 JUnit XML files are found

### What changes were proposed in this pull request?

This PR proposes to skip test reporting ("Report test results") if there are no JUnit XML files are found.

Currently, we're running and skipping the tests dynamically. For example,
- if there are only changes in SparkR at the underlying commit, it only runs the SparkR tests, and skip the other tests and generate JUnit XML files for SparkR test cases.
- if there are only changes in `docs` at the underlying commit, the build skips all tests except linters and do not generate any JUnit XML files.

When test reporting ("Report test results") job is triggered after the main build ("Build and test
") is finished, and there are no JUnit XML files found, it reports the case as a failure. See https://github.com/apache/spark/runs/1196184007 as an example.

This PR works around it by simply skipping the testing report when there are no JUnit XML files are found.
Please see https://github.com/apache/spark/pull/29906#issuecomment-702525542 for more details.

### Why are the changes needed?

To avoid false alarm for test results.

### Does this PR introduce _any_ user-facing change?

No, dev-only.

### How was this patch tested?

Manually tested in my fork.

Positive case:

https://github.com/HyukjinKwon/spark/runs/1208624679?check_suite_focus=true
https://github.com/HyukjinKwon/spark/actions/runs/288996327

Negative case:

https://github.com/HyukjinKwon/spark/runs/1208229838?check_suite_focus=true
https://github.com/HyukjinKwon/spark/actions/runs/289000058

Closes #29946 from HyukjinKwon/test-junit-files.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .github/workflows/test_report.yml | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/.github/workflows/test_report.yml b/.github/workflows/test_report.yml
index 93cdb86687261..060a8795b6a77 100644
--- a/.github/workflows/test_report.yml
+++ b/.github/workflows/test_report.yml
@@ -15,7 +15,16 @@ jobs:
         github_token: ${{ secrets.GITHUB_TOKEN }}
         workflow: ${{ github.event.workflow_run.workflow_id }}
         commit: ${{ github.event.workflow_run.head_commit.id }}
+    - name: Check if JUnit report XML files exist
+      run: |
+        if ls **/target/test-reports/*.xml > /dev/null 2>&1; then
+          echo '::set-output name=FILE_EXISTS::true'
+        else
+          echo '::set-output name=FILE_EXISTS::false'
+        fi
+      id: check-junit-file
     - name: Publish test report
+      if: steps.check-junit-file.outputs.FILE_EXISTS == 'true'
       uses: scacap/action-surefire-report@v1
       with:
         check_name: Report test results

From 9870cf9c086172a390c80f5ef23aacfe2ce3f2cf Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Tue, 6 Oct 2020 13:01:57 +0900
Subject: [PATCH 0169/1009] [SPARK-33067][SQL][TESTS] Add negative checks to
 JDBC v2 Table Catalog tests

### What changes were proposed in this pull request?
Add checks for the cases when JDBC v2 Table Catalog commands fail.

### Why are the changes needed?
To improve test coverage.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running `JDBCTableCatalogSuite`.

Closes #29945 from MaxGekk/jdbcv2-negative-tests.

Lead-authored-by: Max Gekk <max.gekk@gmail.com>
Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../v2/jdbc/JDBCTableCatalogSuite.scala       | 114 +++++++++++++++++-
 1 file changed, 111 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
index b308934ba03c0..bf71f90779b71 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
@@ -20,7 +20,8 @@ import java.sql.{Connection, DriverManager}
 import java.util.Properties
 
 import org.apache.spark.SparkConf
-import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
@@ -63,6 +64,8 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
 
   test("show tables") {
     checkAnswer(sql("SHOW TABLES IN h2.test"), Seq(Row("test", "people")))
+    // Check not existing namespace
+    checkAnswer(sql("SHOW TABLES IN h2.bad_test"), Seq())
   }
 
   test("drop a table and test whether the table exists") {
@@ -72,6 +75,11 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
     checkAnswer(sql("SHOW TABLES IN h2.test"), Seq(Row("test", "to_drop"), Row("test", "people")))
     sql("DROP TABLE h2.test.to_drop")
     checkAnswer(sql("SHOW TABLES IN h2.test"), Seq(Row("test", "people")))
+    Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table =>
+      intercept[NoSuchTableException] {
+        sql(s"DROP TABLE $table")
+      }
+    }
   }
 
   test("rename a table") {
@@ -87,6 +95,26 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
         sql("SHOW TABLES IN h2.test"),
         Seq(Row("test", "dst_table"), Row("test", "people")))
     }
+    // Rename not existing table or namespace
+    Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table =>
+      intercept[org.h2.jdbc.JdbcSQLException] {
+        sql(s"ALTER TABLE $table RENAME TO test.dst_table")
+      }
+    }
+    // Rename to an existing table
+    withTable("h2.test.dst_table") {
+      withConnection { conn =>
+        conn.prepareStatement("""CREATE TABLE "test"."dst_table" (id INTEGER)""").executeUpdate()
+      }
+      withTable("h2.test.src_table") {
+        withConnection { conn =>
+          conn.prepareStatement("""CREATE TABLE "test"."src_table" (id INTEGER)""").executeUpdate()
+        }
+        intercept[org.h2.jdbc.JdbcSQLException] {
+          sql("ALTER TABLE h2.test.src_table RENAME TO h2.test.dst_table")
+        }
+      }
+    }
   }
 
   test("load a table") {
@@ -95,6 +123,11 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       .add("NAME", StringType)
       .add("ID", IntegerType)
     assert(t.schema === expectedSchema)
+    Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table =>
+      intercept[AnalysisException] {
+        spark.table(s"h2.$table").schema
+      }
+    }
   }
 
   test("create a table") {
@@ -105,6 +138,15 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
         sql("SHOW TABLES IN h2.test"),
         Seq(Row("test", "people"), Row("test", "new_table")))
     }
+    withTable("h2.test.new_table") {
+      sql("CREATE TABLE h2.test.new_table(i INT, j STRING) USING _")
+      intercept[AnalysisException] {
+        sql("CREATE TABLE h2.test.new_table(i INT, j STRING) USING _")
+      }
+    }
+    intercept[org.h2.jdbc.JdbcSQLException] {
+      sql("CREATE TABLE h2.bad_test.new_table(i INT, j STRING) USING _")
+    }
   }
 
   test("alter table ... add column") {
@@ -121,16 +163,38 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       t = spark.table("h2.test.alt_table")
       expectedSchema = expectedSchema.add("C3", DoubleType)
       assert(t.schema === expectedSchema)
+      // Add already existing column
+      intercept[AnalysisException] {
+        sql("ALTER TABLE h2.test.alt_table ADD COLUMNS (C3 DOUBLE)")
+      }
+    }
+    // Add a column to not existing table and namespace
+    Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table =>
+      intercept[AnalysisException] {
+        sql(s"ALTER TABLE $table ADD COLUMNS (C4 STRING)")
+      }
     }
   }
 
   test("alter table ... rename column") {
     withTable("h2.test.alt_table") {
-      sql("CREATE TABLE h2.test.alt_table (ID INTEGER) USING _")
+      sql("CREATE TABLE h2.test.alt_table (ID INTEGER, C0 INTEGER) USING _")
       sql("ALTER TABLE h2.test.alt_table RENAME COLUMN ID TO C")
       val t = spark.table("h2.test.alt_table")
-      val expectedSchema = new StructType().add("C", IntegerType)
+      val expectedSchema = new StructType()
+        .add("C", IntegerType)
+        .add("C0", IntegerType)
       assert(t.schema === expectedSchema)
+      // Rename to already existing column
+      intercept[AnalysisException] {
+        sql("ALTER TABLE h2.test.alt_table RENAME COLUMN C TO C0")
+      }
+    }
+    // Rename a column in not existing table and namespace
+    Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table =>
+      intercept[AnalysisException] {
+        sql(s"ALTER TABLE $table RENAME COLUMN ID TO C")
+      }
     }
   }
 
@@ -141,6 +205,16 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       val t = spark.table("h2.test.alt_table")
       val expectedSchema = new StructType().add("C2", IntegerType)
       assert(t.schema === expectedSchema)
+      // Drop not existing column
+      intercept[AnalysisException] {
+        sql("ALTER TABLE h2.test.alt_table DROP COLUMN bad_column")
+      }
+    }
+    // Drop a column to not existing table and namespace
+    Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table =>
+      intercept[AnalysisException] {
+        sql(s"ALTER TABLE $table DROP COLUMN C1")
+      }
     }
   }
 
@@ -151,6 +225,20 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       val t = spark.table("h2.test.alt_table")
       val expectedSchema = new StructType().add("ID", DoubleType)
       assert(t.schema === expectedSchema)
+      // Update not existing column
+      intercept[AnalysisException] {
+        sql("ALTER TABLE h2.test.alt_table ALTER COLUMN bad_column TYPE DOUBLE")
+      }
+      // Update column to wrong type
+      intercept[AnalysisException] {
+        sql("ALTER TABLE h2.test.alt_table ALTER COLUMN id TYPE bad_type")
+      }
+    }
+    // Update column type in not existing table and namespace
+    Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table =>
+      intercept[AnalysisException] {
+        sql(s"ALTER TABLE $table ALTER COLUMN id TYPE DOUBLE")
+      }
     }
   }
 
@@ -161,6 +249,16 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       val t = spark.table("h2.test.alt_table")
       val expectedSchema = new StructType().add("ID", IntegerType, nullable = true)
       assert(t.schema === expectedSchema)
+      // Update nullability of not existing column
+      intercept[AnalysisException] {
+        sql("ALTER TABLE h2.test.alt_table ALTER COLUMN bad_column DROP NOT NULL")
+      }
+    }
+    // Update column nullability in not existing table and namespace
+    Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table =>
+      intercept[AnalysisException] {
+        sql(s"ALTER TABLE $table ALTER COLUMN ID DROP NOT NULL")
+      }
     }
   }
 
@@ -171,6 +269,16 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
         sql("ALTER TABLE h2.test.alt_table ALTER COLUMN ID COMMENT 'test'")
       }
       assert(thrown.getMessage.contains("Unsupported TableChange"))
+      // Update comment for not existing column
+      intercept[AnalysisException] {
+        sql("ALTER TABLE h2.test.alt_table ALTER COLUMN bad_column COMMENT 'test'")
+      }
+    }
+    // Update column comments in not existing table and namespace
+    Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table =>
+      intercept[AnalysisException] {
+        sql(s"ALTER TABLE $table ALTER COLUMN ID COMMENT 'test'")
+      }
     }
   }
 }

From 4adc2822a3c7b7552b436ffb61d5c134680e56b3 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Tue, 6 Oct 2020 08:32:55 +0000
Subject: [PATCH 0170/1009] [SPARK-33035][SQL] Updates the obsoleted entries of
 attribute mapping in QueryPlan#transformUpWithNewOutput

### What changes were proposed in this pull request?

This PR intends to fix corner-case bugs in the `QueryPlan#transformUpWithNewOutput` that is used to propagate updated `ExprId`s in a bottom-up way. Let's say we have a rule to simply assign new `ExprId`s in a projection list like this;
```
case class TestRule extends Rule[LogicalPlan] {
  override def apply(plan: LogicalPlan): LogicalPlan = plan.transformUpWithNewOutput {
    case p  Project(projList, _) =>
      val newPlan = p.copy(projectList = projList.map { _.transform {
        // Assigns a new `ExprId` for references
        case a: AttributeReference => Alias(a, a.name)()
      }}.asInstanceOf[Seq[NamedExpression]])

      val attrMapping = p.output.zip(newPlan.output)
      newPlan -> attrMapping
  }
}
```
Then, this rule is applied into a plan below;
```
(3) Project [a#5, b#6]
+- (2) Project [a#5, b#6]
   +- (1) Project [a#5, b#6]
      +- LocalRelation <empty>, [a#5, b#6]
```
In the first transformation, the rule assigns new `ExprId`s in `(1) Project` (e.g., a#5 AS a#7, b#6 AS b#8). In the second transformation, the rule corrects the input references of `(2) Project`  first by using attribute mapping given from `(1) Project` (a#5->a#7 and b#6->b#8) and then assigns new `ExprId`s (e.g., a#7 AS a#9, b#8 AS b#10). But, in the third transformation, the rule fails because it tries to correct the references of `(3) Project` by using incorrect attribute mapping (a#7->a#9 and b#8->b#10) even though the correct one is a#5->a#9 and b#6->b#10. To fix this issue, this PR modified the code to update the attribute mapping entries that are obsoleted by generated entries in a given rule.

### Why are the changes needed?

bugfix.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Added tests in `QueryPlanSuite`.

Closes #29911 from maropu/QueryPlanBug.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/plans/QueryPlan.scala  | 26 ++++++++++++++-----
 .../sql/catalyst/plans/QueryPlanSuite.scala   | 26 ++++++++++++++++---
 2 files changed, 41 insertions(+), 11 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index a89f055e2ac80..3e8467bab0348 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -201,11 +201,6 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
           case (oldAttr, _) => plan.references.contains(oldAttr)
         }
 
-        val (planAfterRule, newAttrMapping) = CurrentOrigin.withOrigin(origin) {
-          rule.applyOrElse(newPlan, (plan: PlanType) => plan -> Nil)
-        }
-        newPlan = planAfterRule
-
         if (attrMappingForCurrentPlan.nonEmpty) {
           assert(!attrMappingForCurrentPlan.groupBy(_._1.exprId)
             .exists(_._2.map(_._2.exprId).distinct.length > 1),
@@ -222,10 +217,27 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
           }
         }
 
-        attrMapping ++= newAttrMapping.filter {
+        val (planAfterRule, newAttrMapping) = CurrentOrigin.withOrigin(origin) {
+          rule.applyOrElse(newPlan, (plan: PlanType) => plan -> Nil)
+        }
+
+        val newValidAttrMapping = newAttrMapping.filter {
           case (a1, a2) => a1.exprId != a2.exprId
         }
-        newPlan -> attrMapping.toSeq
+
+        // Updates the `attrMapping` entries that are obsoleted by generated entries in `rule`.
+        // For example, `attrMapping` has a mapping entry 'id#1 -> id#2' and `rule`
+        // generates a new entry 'id#2 -> id#3'. In this case, we need to update
+        // the corresponding old entry from 'id#1 -> id#2' to '#id#1 -> #id#3'.
+        val updatedAttrMap = AttributeMap(newValidAttrMapping)
+        val transferAttrMapping = attrMapping.map {
+          case (a1, a2) => (a1, updatedAttrMap.getOrElse(a2, a2))
+        }
+        val newOtherAttrMapping = {
+          val existingAttrMappingSet = transferAttrMapping.map(_._2).toSet
+          newValidAttrMapping.filterNot { case (_, a) => existingAttrMappingSet.contains(a) }
+        }
+        planAfterRule -> (transferAttrMapping ++ newOtherAttrMapping).toSeq
       }
     }
     rewrite(this)._1
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/QueryPlanSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/QueryPlanSuite.scala
index 91ce187f4d270..404c8895c4d11 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/QueryPlanSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/QueryPlanSuite.scala
@@ -20,9 +20,11 @@ package org.apache.spark.sql.catalyst.plans
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
-import org.apache.spark.sql.catalyst.dsl.plans
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, ListQuery, Literal, NamedExpression}
-import org.apache.spark.sql.catalyst.plans.logical.{Filter, Project, Union}
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Expression, ListQuery, Literal, NamedExpression}
+import org.apache.spark.sql.catalyst.plans.logical.{Filter, LocalRelation, LogicalPlan, Project, Union}
+import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin}
 import org.apache.spark.sql.types.IntegerType
 
@@ -31,7 +33,7 @@ class QueryPlanSuite extends SparkFunSuite {
   test("origin remains the same after mapExpressions (SPARK-23823)") {
     CurrentOrigin.setPosition(0, 0)
     val column = AttributeReference("column", IntegerType)(NamedExpression.newExprId)
-    val query = plans.DslLogicalPlan(plans.table("table")).select(column)
+    val query = DslLogicalPlan(table("table")).select(column)
     CurrentOrigin.reset()
 
     val mappedQuery = query mapExpressions {
@@ -83,4 +85,20 @@ class QueryPlanSuite extends SparkFunSuite {
     assert(countRelationsInPlan == 2)
     assert(countRelationsInPlanAndSubqueries == 5)
   }
+
+  test("SPARK-33035: consecutive attribute updates in parent plan nodes") {
+    val testRule = new Rule[LogicalPlan] {
+      override def apply(plan: LogicalPlan): LogicalPlan = plan.transformUpWithNewOutput {
+        case p @ Project(projList, _) =>
+          // Assigns new `ExprId`s for output references
+          val newPlan = p.copy(projectList = projList.map { ne => Alias(ne, ne.name)() })
+          val attrMapping = p.output.zip(newPlan.output)
+          newPlan -> attrMapping
+      }
+    }
+
+    val t = LocalRelation('a.int, 'b.int)
+    val plan = t.select($"a", $"b").select($"a", $"b").select($"a", $"b").analyze
+    assert(testRule(plan).resolved)
+  }
 }

From 279334797234f5f83abd6879874b389e110920c2 Mon Sep 17 00:00:00 2001
From: "fqaiser94@gmail.com" <fqaiser94@gmail.com>
Date: Tue, 6 Oct 2020 08:53:30 +0000
Subject: [PATCH 0171/1009] [SPARK-32511][SQL] Add dropFields method to Column
 class

### What changes were proposed in this pull request?

1. Refactored `WithFields` Expression to make it more extensible (now `UpdateFields`).
2. Added a new `dropFields` method to the `Column` class. This method should allow users to drop a `StructField` in a `StructType` column (with similar semantics to the `drop` method on `Dataset`).

### Why are the changes needed?

Often Spark users have to work with deeply nested data e.g. to fix a data quality issue with an existing `StructField`. To do this with the existing Spark APIs, users have to rebuild the entire struct column.

For example, let's say you have the following deeply nested data structure which has a data quality issue (`5` is missing):
```
import org.apache.spark.sql._
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types._

val data = spark.createDataFrame(sc.parallelize(
      Seq(Row(Row(Row(1, 2, 3), Row(Row(4, null, 6), Row(7, 8, 9), Row(10, 11, 12)), Row(13, 14, 15))))),
      StructType(Seq(
        StructField("a", StructType(Seq(
          StructField("a", StructType(Seq(
            StructField("a", IntegerType),
            StructField("b", IntegerType),
            StructField("c", IntegerType)))),
          StructField("b", StructType(Seq(
            StructField("a", StructType(Seq(
              StructField("a", IntegerType),
              StructField("b", IntegerType),
              StructField("c", IntegerType)))),
            StructField("b", StructType(Seq(
              StructField("a", IntegerType),
              StructField("b", IntegerType),
              StructField("c", IntegerType)))),
            StructField("c", StructType(Seq(
              StructField("a", IntegerType),
              StructField("b", IntegerType),
              StructField("c", IntegerType))))
          ))),
          StructField("c", StructType(Seq(
            StructField("a", IntegerType),
            StructField("b", IntegerType),
            StructField("c", IntegerType))))
        )))))).cache

data.show(false)
+---------------------------------+
|a                                |
+---------------------------------+
|[[1, 2, 3], [[4,, 6], [7, 8, 9]]]|
+---------------------------------+
```
Currently, to drop the missing value users would have to do something like this:
```
val result = data.withColumn("a",
  struct(
    $"a.a",
    struct(
      struct(
        $"a.b.a.a",
        $"a.b.a.c"
      ).as("a"),
      $"a.b.b",
      $"a.b.c"
    ).as("b"),
    $"a.c"
  ))

result.show(false)
+---------------------------------------------------------------+
|a                                                              |
+---------------------------------------------------------------+
|[[1, 2, 3], [[4, 6], [7, 8, 9], [10, 11, 12]], [13, 14, 15]]|
+---------------------------------------------------------------+
```
As you can see above, with the existing methods users must call the `struct` function and list all fields, including fields they don't want to change. This is not ideal as:
>this leads to complex, fragile code that cannot survive schema evolution.
[SPARK-16483](https://issues.apache.org/jira/browse/SPARK-16483)

In contrast, with the method added in this PR, a user could simply do something like this to get the same result:
```
val result = data.withColumn("a", 'a.dropFields("b.a.b"))
result.show(false)
+---------------------------------------------------------------+
|a                                                              |
+---------------------------------------------------------------+
|[[1, 2, 3], [[4, 6], [7, 8, 9], [10, 11, 12]], [13, 14, 15]]|
+---------------------------------------------------------------+

```

This is the second of maybe 3 methods that could be added to the `Column` class to make it easier to manipulate nested data.
Other methods under discussion in [SPARK-22231](https://issues.apache.org/jira/browse/SPARK-22231) include `withFieldRenamed`.
However, this should be added in a separate PR.

### Does this PR introduce _any_ user-facing change?

The documentation for `Column.withField` method has changed to include an additional note about how to write optimized queries when adding multiple nested Column directly.

### How was this patch tested?

New unit tests were added. Jenkins must pass them.

### Related JIRAs:
More discussion on this topic can be found here:
- https://issues.apache.org/jira/browse/SPARK-22231
- https://issues.apache.org/jira/browse/SPARK-16483

Closes #29795 from fqaiser94/SPARK-32511-dropFields-second-try.

Authored-by: fqaiser94@gmail.com <fqaiser94@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/complexTypeCreator.scala      | 127 ++-
 .../sql/catalyst/optimizer/ComplexTypes.scala |  20 +-
 .../sql/catalyst/optimizer/Optimizer.scala    |   6 +-
 .../{WithFields.scala => UpdateFields.scala}  |  16 +-
 ...e.scala => CombineUpdateFieldsSuite.scala} |  41 +-
 .../optimizer/complexTypesSuite.scala         | 345 ++++++-
 .../UpdateFieldsBenchmark-results.txt         |  26 +
 .../scala/org/apache/spark/sql/Column.scala   | 119 ++-
 .../spark/sql/ColumnExpressionSuite.scala     | 881 ++++++++++++++++--
 .../spark/sql/UpdateFieldsBenchmark.scala     | 224 +++++
 10 files changed, 1607 insertions(+), 198 deletions(-)
 rename sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/{WithFields.scala => UpdateFields.scala} (68%)
 rename sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/{CombineWithFieldsSuite.scala => CombineUpdateFieldsSuite.scala} (65%)
 create mode 100644 sql/core/benchmarks/UpdateFieldsBenchmark-results.txt
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/UpdateFieldsBenchmark.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index c1471455b58c0..d5b1950e82c56 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -17,8 +17,10 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import scala.collection.mutable.ArrayBuffer
+
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion}
+import org.apache.spark.sql.catalyst.analysis.{Resolver, TypeCheckResult, TypeCoercion}
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.{FUNC_ALIAS, FunctionBuilder}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
@@ -548,57 +550,114 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E
 }
 
 /**
- * Adds/replaces field in struct by name.
+ * Represents an operation to be applied to the fields of a struct.
  */
-case class WithFields(
-    structExpr: Expression,
-    names: Seq[String],
-    valExprs: Seq[Expression]) extends Unevaluable {
+trait StructFieldsOperation {
+
+  val resolver: Resolver = SQLConf.get.resolver
 
-  assert(names.length == valExprs.length)
+  /**
+   * Returns an updated list of StructFields and Expressions that will ultimately be used
+   * as the fields argument for [[StructType]] and as the children argument for
+   * [[CreateNamedStruct]] respectively inside of [[UpdateFields]].
+   */
+  def apply(values: Seq[(StructField, Expression)]): Seq[(StructField, Expression)]
+}
+
+/**
+ * Add or replace a field by name.
+ *
+ * We extend [[Unevaluable]] here to ensure that [[UpdateFields]] can include it as part of its
+ * children, and thereby enable the analyzer to resolve and transform valExpr as necessary.
+ */
+case class WithField(name: String, valExpr: Expression)
+  extends Unevaluable with StructFieldsOperation {
+
+  override def apply(values: Seq[(StructField, Expression)]): Seq[(StructField, Expression)] = {
+    val newFieldExpr = (StructField(name, valExpr.dataType, valExpr.nullable), valExpr)
+    val result = ArrayBuffer.empty[(StructField, Expression)]
+    var hasMatch = false
+    for (existingFieldExpr @ (existingField, _) <- values) {
+      if (resolver(existingField.name, name)) {
+        hasMatch = true
+        result += newFieldExpr
+      } else {
+        result += existingFieldExpr
+      }
+    }
+    if (!hasMatch) result += newFieldExpr
+    result
+  }
+
+  override def children: Seq[Expression] = valExpr :: Nil
+
+  override def dataType: DataType = throw new IllegalStateException(
+    "WithField.dataType should not be called.")
+
+  override def nullable: Boolean = throw new IllegalStateException(
+    "WithField.nullable should not be called.")
+
+  override def prettyName: String = "WithField"
+}
+
+/**
+ * Drop a field by name.
+ */
+case class DropField(name: String) extends StructFieldsOperation {
+  override def apply(values: Seq[(StructField, Expression)]): Seq[(StructField, Expression)] =
+    values.filterNot { case (field, _) => resolver(field.name, name) }
+}
+
+/**
+ * Updates fields in a struct.
+ */
+case class UpdateFields(structExpr: Expression, fieldOps: Seq[StructFieldsOperation])
+  extends Unevaluable {
 
   override def checkInputDataTypes(): TypeCheckResult = {
-    if (!structExpr.dataType.isInstanceOf[StructType]) {
-      TypeCheckResult.TypeCheckFailure(
-        "struct argument should be struct type, got: " + structExpr.dataType.catalogString)
+    val dataType = structExpr.dataType
+    if (!dataType.isInstanceOf[StructType]) {
+      TypeCheckResult.TypeCheckFailure("struct argument should be struct type, got: " +
+        dataType.catalogString)
+    } else if (newExprs.isEmpty) {
+      TypeCheckResult.TypeCheckFailure("cannot drop all fields in struct")
     } else {
       TypeCheckResult.TypeCheckSuccess
     }
   }
 
-  override def children: Seq[Expression] = structExpr +: valExprs
+  override def children: Seq[Expression] = structExpr +: fieldOps.collect {
+    case e: Expression => e
+  }
 
-  override def dataType: StructType = evalExpr.dataType.asInstanceOf[StructType]
+  override def dataType: StructType = StructType(newFields)
 
   override def nullable: Boolean = structExpr.nullable
 
-  override def prettyName: String = "with_fields"
+  override def prettyName: String = "update_fields"
 
-  lazy val evalExpr: Expression = {
-    val existingExprs = structExpr.dataType.asInstanceOf[StructType].fieldNames.zipWithIndex.map {
-      case (name, i) => (name, GetStructField(KnownNotNull(structExpr), i).asInstanceOf[Expression])
-    }
+  private lazy val newFieldExprs: Seq[(StructField, Expression)] = {
+    val existingFieldExprs: Seq[(StructField, Expression)] =
+      structExpr.dataType.asInstanceOf[StructType].fields.zipWithIndex.map {
+        case (field, i) => (field, GetStructField(structExpr, i))
+      }
 
-    val addOrReplaceExprs = names.zip(valExprs)
-
-    val resolver = SQLConf.get.resolver
-    val newExprs = addOrReplaceExprs.foldLeft(existingExprs) {
-      case (resultExprs, newExpr @ (newExprName, _)) =>
-        if (resultExprs.exists(x => resolver(x._1, newExprName))) {
-          resultExprs.map {
-            case (name, _) if resolver(name, newExprName) => newExpr
-            case x => x
-          }
-        } else {
-          resultExprs :+ newExpr
-        }
-    }.flatMap { case (name, expr) => Seq(Literal(name), expr) }
+    fieldOps.foldLeft(existingFieldExprs)((exprs, op) => op(exprs))
+  }
+
+  private lazy val newFields: Seq[StructField] = newFieldExprs.map(_._1)
+
+  lazy val newExprs: Seq[Expression] = newFieldExprs.map(_._2)
+
+  lazy val evalExpr: Expression = {
+    val createNamedStructExpr = CreateNamedStruct(newFieldExprs.flatMap {
+      case (field, expr) => Seq(Literal(field.name), expr)
+    })
 
-    val expr = CreateNamedStruct(newExprs)
     if (structExpr.nullable) {
-      If(IsNull(structExpr), Literal(null, expr.dataType), expr)
+      If(IsNull(structExpr), Literal(null, dataType), createNamedStructExpr)
     } else {
-      expr
+      createNamedStructExpr
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
index 2aba4bae397c7..860219e55b052 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.optimizer
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.types.StructType
 
 /**
  * Simplify redundant [[CreateNamedStruct]], [[CreateArray]] and [[CreateMap]] expressions.
@@ -39,18 +40,13 @@ object SimplifyExtractValueOps extends Rule[LogicalPlan] {
       // Remove redundant field extraction.
       case GetStructField(createNamedStruct: CreateNamedStruct, ordinal, _) =>
         createNamedStruct.valExprs(ordinal)
-      case GetStructField(w @ WithFields(struct, names, valExprs), ordinal, maybeName) =>
-        val name = w.dataType(ordinal).name
-        val matches = names.zip(valExprs).filter(_._1 == name)
-        if (matches.nonEmpty) {
-          // return last matching element as that is the final value for the field being extracted.
-          // For example, if a user submits a query like this:
-          // `$"struct_col".withField("b", lit(1)).withField("b", lit(2)).getField("b")`
-          // we want to return `lit(2)` (and not `lit(1)`).
-          val expr = matches.last._2
-          If(IsNull(struct), Literal(null, expr.dataType), expr)
-        } else {
-          GetStructField(struct, ordinal, maybeName)
+      case GetStructField(u: UpdateFields, ordinal, _)if !u.structExpr.isInstanceOf[UpdateFields] =>
+        val structExpr = u.structExpr
+        u.newExprs(ordinal) match {
+          // if the struct itself is null, then any value extracted from it (expr) will be null
+          // so we don't need to wrap expr in If(IsNull(struct), Literal(null, expr.dataType), expr)
+          case expr: GetStructField if expr.child.semanticEquals(structExpr) => expr
+          case expr => If(IsNull(structExpr), Literal(null, expr.dataType), expr)
         }
       // Remove redundant array indexing.
       case GetArrayStructFields(CreateArray(elems, useStringTypeWhenEmpty), field, ordinal, _, _) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index f2360150e47b5..5bdaa504a3beb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -110,7 +110,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
         RemoveRedundantAliases,
         UnwrapCastInBinaryComparison,
         RemoveNoopOperators,
-        CombineWithFields,
+        CombineUpdateFields,
         SimplifyExtractValueOps,
         OptimizeJsonExprs,
         CombineConcats) ++
@@ -223,7 +223,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
       RemoveNoopOperators) :+
     // This batch must be executed after the `RewriteSubquery` batch, which creates joins.
     Batch("NormalizeFloatingNumbers", Once, NormalizeFloatingNumbers) :+
-    Batch("ReplaceWithFieldsExpression", Once, ReplaceWithFieldsExpression)
+    Batch("ReplaceUpdateFieldsExpression", Once, ReplaceUpdateFieldsExpression)
 
     // remove any batches with no rules. this may happen when subclasses do not add optional rules.
     batches.filter(_.rules.nonEmpty)
@@ -257,7 +257,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
       RewriteCorrelatedScalarSubquery.ruleName ::
       RewritePredicateSubquery.ruleName ::
       NormalizeFloatingNumbers.ruleName ::
-      ReplaceWithFieldsExpression.ruleName :: Nil
+      ReplaceUpdateFieldsExpression.ruleName :: Nil
 
   /**
    * Optimize all the subqueries inside expression.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/WithFields.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UpdateFields.scala
similarity index 68%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/WithFields.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UpdateFields.scala
index 05c90864e4bb0..c7154210e0c62 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/WithFields.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UpdateFields.scala
@@ -17,26 +17,26 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import org.apache.spark.sql.catalyst.expressions.WithFields
+import org.apache.spark.sql.catalyst.expressions.UpdateFields
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
 
 
 /**
- * Combines all adjacent [[WithFields]] expression into a single [[WithFields]] expression.
+ * Combines all adjacent [[UpdateFields]] expression into a single [[UpdateFields]] expression.
  */
-object CombineWithFields extends Rule[LogicalPlan] {
+object CombineUpdateFields extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
-    case WithFields(WithFields(struct, names1, valExprs1), names2, valExprs2) =>
-      WithFields(struct, names1 ++ names2, valExprs1 ++ valExprs2)
+    case UpdateFields(UpdateFields(struct, fieldOps1), fieldOps2) =>
+      UpdateFields(struct, fieldOps1 ++ fieldOps2)
   }
 }
 
 /**
- * Replaces [[WithFields]] expression with an evaluable expression.
+ * Replaces [[UpdateFields]] expression with an evaluable expression.
  */
-object ReplaceWithFieldsExpression extends Rule[LogicalPlan] {
+object ReplaceUpdateFieldsExpression extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
-    case w: WithFields => w.evalExpr
+    case u: UpdateFields => u.evalExpr
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombineWithFieldsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombineUpdateFieldsSuite.scala
similarity index 65%
rename from sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombineWithFieldsSuite.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombineUpdateFieldsSuite.scala
index a3e0bbc57e639..ff9c60a2fa5bd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombineWithFieldsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombineUpdateFieldsSuite.scala
@@ -19,56 +19,53 @@ package org.apache.spark.sql.catalyst.optimizer
 
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.{Alias, Literal, WithFields}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Literal, UpdateFields, WithField}
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 
 
-class CombineWithFieldsSuite extends PlanTest {
+class CombineUpdateFieldsSuite extends PlanTest {
 
   object Optimize extends RuleExecutor[LogicalPlan] {
-    val batches = Batch("CombineWithFields", FixedPoint(10), CombineWithFields) :: Nil
+    val batches = Batch("CombineUpdateFields", FixedPoint(10), CombineUpdateFields) :: Nil
   }
 
   private val testRelation = LocalRelation('a.struct('a1.int))
 
-  test("combines two WithFields") {
+  test("combines two adjacent UpdateFields Expressions") {
     val originalQuery = testRelation
       .select(Alias(
-        WithFields(
-          WithFields(
+        UpdateFields(
+          UpdateFields(
             'a,
-            Seq("b1"),
-            Seq(Literal(4))),
-          Seq("c1"),
-          Seq(Literal(5))), "out")())
+            WithField("b1", Literal(4)) :: Nil),
+          WithField("c1", Literal(5)) :: Nil), "out")())
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
-      .select(Alias(WithFields('a, Seq("b1", "c1"), Seq(Literal(4), Literal(5))), "out")())
+      .select(Alias(UpdateFields('a, WithField("b1", Literal(4)) :: WithField("c1", Literal(5)) ::
+        Nil), "out")())
       .analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
-  test("combines three WithFields") {
+  test("combines three adjacent UpdateFields Expressions") {
     val originalQuery = testRelation
       .select(Alias(
-        WithFields(
-          WithFields(
-            WithFields(
+        UpdateFields(
+          UpdateFields(
+            UpdateFields(
               'a,
-              Seq("b1"),
-              Seq(Literal(4))),
-            Seq("c1"),
-            Seq(Literal(5))),
-          Seq("d1"),
-          Seq(Literal(6))), "out")())
+              WithField("b1", Literal(4)) :: Nil),
+            WithField("c1", Literal(5)) :: Nil),
+          WithField("d1", Literal(6)) :: Nil), "out")())
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
-      .select(Alias(WithFields('a, Seq("b1", "c1", "d1"), Seq(4, 5, 6).map(Literal(_))), "out")())
+      .select(Alias(UpdateFields('a, WithField("b1", Literal(4)) :: WithField("c1", Literal(5)) ::
+        WithField("d1", Literal(6)) :: Nil), "out")())
       .analyze
 
     comparePlans(optimized, correctAnswer)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
index 00aed6a10cd64..d9cefdaf3fe70 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.catalyst.plans.PlanTest
-import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Range}
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, OneRowRelation, Project, Range}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.util.GenericArrayData
 import org.apache.spark.sql.types._
@@ -37,14 +37,15 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
   object Optimizer extends RuleExecutor[LogicalPlan] {
     val batches =
       Batch("collapse projections", FixedPoint(10),
-          CollapseProject) ::
+        CollapseProject) ::
       Batch("Constant Folding", FixedPoint(10),
-          NullPropagation,
-          ConstantFolding,
-          BooleanSimplification,
-          SimplifyConditionals,
-          SimplifyBinaryComparison,
-          SimplifyExtractValueOps) :: Nil
+         NullPropagation,
+         ConstantFolding,
+         BooleanSimplification,
+         SimplifyConditionals,
+         SimplifyBinaryComparison,
+         CombineUpdateFields,
+         SimplifyExtractValueOps) :: Nil
   }
 
   private val idAtt = ('id).long.notNull
@@ -453,58 +454,182 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
     checkEvaluation(GetMapValue(mb0, Literal(Array[Byte](3, 4))), null)
   }
 
-  private val structAttr = 'struct1.struct('a.int).withNullability(false)
+  private val structAttr = 'struct1.struct('a.int, 'b.int).withNullability(false)
   private val testStructRelation = LocalRelation(structAttr)
 
-  private val nullableStructAttr = 'struct1.struct('a.int)
+  private val nullableStructAttr = 'struct1.struct('a.int, 'b.int)
   private val testNullableStructRelation = LocalRelation(nullableStructAttr)
 
-  test("simplify GetStructField on WithFields that is not changing the attribute being extracted") {
-    def query(relation: LocalRelation): LogicalPlan = relation.select(
-      GetStructField(WithFields('struct1, Seq("b"), Seq(Literal(1))), 0, Some("a")) as "outerAttr")
+  test("simplify GetStructField on basic UpdateFields") {
+    def check(fieldOps: Seq[StructFieldsOperation], ordinal: Int, expected: Expression): Unit = {
+      def query(relation: LocalRelation): LogicalPlan =
+        relation.select(GetStructField(UpdateFields('struct1, fieldOps), ordinal).as("res"))
+
+      checkRule(
+        query(testStructRelation),
+        testStructRelation.select(expected.as("res")))
+
+      checkRule(
+        query(testNullableStructRelation),
+        testNullableStructRelation.select((expected match {
+          case expr: GetStructField => expr
+          case expr => If(IsNull('struct1), Literal(null, expr.dataType), expr)
+        }).as("res")))
+    }
+
+    // scalastyle:off line.size.limit
+
+    // add attribute, extract an attribute from the original struct
+    check(WithField("c", Literal(3)) :: Nil, 0, GetStructField('struct1, 0))
+    check(WithField("c", Literal(3)) :: Nil, 1, GetStructField('struct1, 1))
+    // add attribute, extract added attribute
+    check(WithField("c", Literal(3)) :: Nil, 2, Literal(3))
+
+    // replace attribute, extract an attribute from the original struct
+    check(WithField("a", Literal(1)) :: Nil, 1, GetStructField('struct1, 1))
+    check(WithField("b", Literal(2)) :: Nil, 0, GetStructField('struct1, 0))
+    // replace attribute, extract replaced attribute
+    check(WithField("a", Literal(1)) :: Nil, 0, Literal(1))
+    check(WithField("b", Literal(2)) :: Nil, 1, Literal(2))
+
+    // add multiple attributes, extract an attribute from the original struct
+    check(WithField("c", Literal(3)) :: WithField("c", Literal(4)) :: Nil, 0, GetStructField('struct1, 0))
+    check(WithField("c", Literal(3)) :: WithField("d", Literal(4)) :: Nil, 0, GetStructField('struct1, 0))
+    check(WithField("c", Literal(3)) :: WithField("c", Literal(4)) :: Nil, 1, GetStructField('struct1, 1))
+    check(WithField("c", Literal(3)) :: WithField("d", Literal(4)) :: Nil, 1, GetStructField('struct1, 1))
+    // add multiple attributes, extract newly added attribute
+    check(WithField("c", Literal(3)) :: WithField("c", Literal(4)) :: Nil, 2, Literal(4))
+    check(WithField("c", Literal(4)) :: WithField("c", Literal(3)) :: Nil, 2, Literal(3))
+    check(WithField("c", Literal(3)) :: WithField("d", Literal(4)) :: Nil, 2, Literal(3))
+    check(WithField("c", Literal(3)) :: WithField("d", Literal(4)) :: Nil, 3, Literal(4))
+    check(WithField("d", Literal(4)) :: WithField("c", Literal(3)) :: Nil, 2, Literal(4))
+    check(WithField("d", Literal(4)) :: WithField("c", Literal(3)) :: Nil, 3, Literal(3))
+
+    // drop attribute, extract an attribute from the original struct
+    check(DropField("b") :: Nil, 0, GetStructField('struct1, 0))
+    check(DropField("a") :: Nil, 0, GetStructField('struct1, 1))
+
+    // drop attribute, add attribute, extract an attribute from the original struct
+    check(DropField("b") :: WithField("c", Literal(3)) :: Nil, 0, GetStructField('struct1, 0))
+    check(DropField("a") :: WithField("c", Literal(3)) :: Nil, 0, GetStructField('struct1, 1))
+    // drop attribute, add attribute, extract added attribute
+    check(DropField("b") :: WithField("c", Literal(3)) :: Nil, 1, Literal(3))
+    check(DropField("a") :: WithField("c", Literal(3)) :: Nil, 1, Literal(3))
+
+    // add attribute, drop attribute, extract an attribute from the original struct
+    check(WithField("c", Literal(3)) :: DropField("a") :: Nil, 0, GetStructField('struct1, 1))
+    check(WithField("c", Literal(3)) :: DropField("b") :: Nil, 0, GetStructField('struct1, 0))
+    // add attribute, drop attribute, extract added attribute
+    check(WithField("c", Literal(3)) :: DropField("a") :: Nil, 1, Literal(3))
+    check(WithField("c", Literal(3)) :: DropField("b") :: Nil, 1, Literal(3))
+
+    // replace attribute, drop same attribute, extract an attribute from the original struct
+    check(WithField("b", Literal(3)) :: DropField("b") :: Nil, 0, GetStructField('struct1, 0))
+    check(WithField("a", Literal(3)) :: DropField("a") :: Nil, 0, GetStructField('struct1, 1))
+
+    // add attribute, drop same attribute, extract an attribute from the original struct
+    check(WithField("c", Literal(3)) :: DropField("c") :: Nil, 0, GetStructField('struct1, 0))
+    check(WithField("c", Literal(3)) :: DropField("c") :: Nil, 1, GetStructField('struct1, 1))
+
+    // replace attribute, drop another attribute, extract added attribute
+    check(WithField("b", Literal(3)) :: DropField("a") :: Nil, 0, Literal(3))
+    check(WithField("a", Literal(3)) :: DropField("b") :: Nil, 0, Literal(3))
+
+    // drop attribute, add same attribute, extract attribute from the original struct
+    check(DropField("b") :: WithField("b", Literal(3)) :: Nil, 0, GetStructField('struct1, 0))
+    check(DropField("a") :: WithField("a", Literal(3)) :: Nil, 0, GetStructField('struct1, 1))
+    // drop attribute, add same attribute, extract added attribute
+    check(DropField("b") :: WithField("b", Literal(3)) :: Nil, 1, Literal(3))
+    check(DropField("a") :: WithField("a", Literal(3)) :: Nil, 1, Literal(3))
+
+    // drop non-existent attribute, add same attribute, extract attribute from the original struct
+    check(DropField("c") :: WithField("c", Literal(3)) :: Nil, 0, GetStructField('struct1, 0))
+    check(DropField("c") :: WithField("c", Literal(3)) :: Nil, 1, GetStructField('struct1, 1))
+    // drop non-existent attribute, add same attribute, extract added attribute
+    check(DropField("c") :: WithField("c", Literal(3)) :: Nil, 2, Literal(3))
+
+    // scalastyle:on  line.size.limit
+  }
+
+  test("simplify GetStructField that is extracting a field nested inside a struct") {
+    val struct2 = 'struct2.struct('b.int)
+    val testStructRelation = LocalRelation(structAttr, struct2)
+    val testNullableStructRelation = LocalRelation(nullableStructAttr, struct2)
+
+    // if the field being extracted is from the same struct that UpdateFields is modifying,
+    // we can just return GetStructField in both the non-nullable and nullable struct scenario
+
+    def addFieldFromSameStructAndThenExtractIt(relation: LocalRelation): LogicalPlan =
+      relation.select(GetStructField(
+        UpdateFields('struct1, WithField("b", GetStructField('struct1, 0)) :: Nil), 1).as("res"))
 
     checkRule(
-      query(testStructRelation),
-      testStructRelation.select(GetStructField('struct1, 0, Some("a")) as "outerAttr"))
+      addFieldFromSameStructAndThenExtractIt(testStructRelation),
+      testStructRelation.select(GetStructField('struct1, 0).as("res")))
 
     checkRule(
-      query(testNullableStructRelation),
-      testNullableStructRelation.select(GetStructField('struct1, 0, Some("a")) as "outerAttr"))
-  }
+      addFieldFromSameStructAndThenExtractIt(testNullableStructRelation),
+      testNullableStructRelation.select(GetStructField('struct1, 0).as("res")))
 
-  test("simplify GetStructField on WithFields that is changing the attribute being extracted") {
-    def query(relation: LocalRelation): LogicalPlan = relation.select(
-      GetStructField(WithFields('struct1, Seq("b"), Seq(Literal(1))), 1, Some("b")) as "res")
+    // if the field being extracted is from a different struct than the one UpdateFields is
+    // modifying, we must return GetStructField wrapped in If(IsNull(struct), null, GetStructField)
+    // in the nullable struct scenario
+
+    def addFieldFromAnotherStructAndThenExtractIt(relation: LocalRelation): LogicalPlan =
+      relation.select(GetStructField(
+        UpdateFields('struct1, WithField("b", GetStructField('struct2, 0)) :: Nil), 1).as("res"))
 
     checkRule(
-      query(testStructRelation),
-      testStructRelation.select(Literal(1) as "res"))
+      addFieldFromAnotherStructAndThenExtractIt(testStructRelation),
+      testStructRelation.select(GetStructField('struct2, 0).as("res")))
 
     checkRule(
-      query(testNullableStructRelation),
+      addFieldFromAnotherStructAndThenExtractIt(testNullableStructRelation),
       testNullableStructRelation.select(
-        If(IsNull('struct1), Literal(null, IntegerType), Literal(1)) as "res"))
+        If(IsNull('struct1), Literal(null, IntegerType), GetStructField('struct2, 0)).as("res")))
   }
 
-  test(
-    "simplify GetStructField on WithFields that is changing the attribute being extracted twice") {
-    def query(relation: LocalRelation): LogicalPlan = relation.select(
-      GetStructField(WithFields('struct1, Seq("b", "b"), Seq(Literal(1), Literal(2))), 1, Some("b"))
-        as "outerAtt")
+  test("simplify GetStructField on nested UpdateFields") {
+    def query(relation: LocalRelation, ordinal: Int): LogicalPlan = {
+      val nestedUpdateFields =
+        UpdateFields(
+          UpdateFields(
+            UpdateFields(
+              UpdateFields(
+                'struct1,
+                WithField("c", Literal(1)) :: Nil),
+              WithField("d", Literal(2)) :: Nil),
+            WithField("e", Literal(3)) :: Nil),
+          WithField("f", Literal(4)) :: Nil)
+
+      relation.select(GetStructField(nestedUpdateFields, ordinal) as "res")
+    }
+
+    // extract newly added field
 
     checkRule(
-      query(testStructRelation),
-      testStructRelation.select(Literal(2) as "outerAtt"))
+      query(testStructRelation, 5),
+      testStructRelation.select(Literal(4) as "res"))
 
     checkRule(
-      query(testNullableStructRelation),
+      query(testNullableStructRelation, 5),
       testNullableStructRelation.select(
-        If(IsNull('struct1), Literal(null, IntegerType), Literal(2)) as "outerAtt"))
+        If(IsNull('struct1), Literal(null, IntegerType), Literal(4)) as "res"))
+
+    // extract field from original struct
+
+    checkRule(
+      query(testStructRelation, 0),
+      testStructRelation.select(GetStructField('struct1, 0) as "res"))
+
+    checkRule(
+      query(testNullableStructRelation, 0),
+      testNullableStructRelation.select(GetStructField('struct1, 0) as "res"))
   }
 
-  test("collapse multiple GetStructField on the same WithFields") {
+  test("simplify multiple GetStructField on the same UpdateFields") {
     def query(relation: LocalRelation): LogicalPlan = relation
-      .select(WithFields('struct1, Seq("b"), Seq(Literal(2))) as "struct2")
+      .select(UpdateFields('struct1, WithField("b", Literal(2)) :: Nil) as "struct2")
       .select(
         GetStructField('struct2, 0, Some("a")) as "struct1A",
         GetStructField('struct2, 1, Some("b")) as "struct1B")
@@ -512,21 +637,21 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
     checkRule(
       query(testStructRelation),
       testStructRelation.select(
-        GetStructField('struct1, 0, Some("a")) as "struct1A",
+        GetStructField('struct1, 0) as "struct1A",
         Literal(2) as "struct1B"))
 
     checkRule(
       query(testNullableStructRelation),
       testNullableStructRelation.select(
-        GetStructField('struct1, 0, Some("a")) as "struct1A",
+        GetStructField('struct1, 0) as "struct1A",
         If(IsNull('struct1), Literal(null, IntegerType), Literal(2)) as "struct1B"))
   }
 
-  test("collapse multiple GetStructField on different WithFields") {
+  test("simplify multiple GetStructField on different UpdateFields") {
     def query(relation: LocalRelation): LogicalPlan = relation
       .select(
-        WithFields('struct1, Seq("b"), Seq(Literal(2))) as "struct2",
-        WithFields('struct1, Seq("b"), Seq(Literal(3))) as "struct3")
+        UpdateFields('struct1, WithField("b", Literal(2)) :: Nil) as "struct2",
+        UpdateFields('struct1, WithField("b", Literal(3)) :: Nil) as "struct3")
       .select(
         GetStructField('struct2, 0, Some("a")) as "struct2A",
         GetStructField('struct2, 1, Some("b")) as "struct2B",
@@ -537,18 +662,148 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
       query(testStructRelation),
       testStructRelation
         .select(
-          GetStructField('struct1, 0, Some("a")) as "struct2A",
+          GetStructField('struct1, 0) as "struct2A",
           Literal(2) as "struct2B",
-          GetStructField('struct1, 0, Some("a")) as "struct3A",
+          GetStructField('struct1, 0) as "struct3A",
           Literal(3) as "struct3B"))
 
     checkRule(
       query(testNullableStructRelation),
       testNullableStructRelation
         .select(
-          GetStructField('struct1, 0, Some("a")) as "struct2A",
+          GetStructField('struct1, 0) as "struct2A",
           If(IsNull('struct1), Literal(null, IntegerType), Literal(2)) as "struct2B",
-          GetStructField('struct1, 0, Some("a")) as "struct3A",
+          GetStructField('struct1, 0) as "struct3A",
           If(IsNull('struct1), Literal(null, IntegerType), Literal(3)) as "struct3B"))
   }
+
+  test("simplify add multiple nested fields to non-nullable struct") {
+    // this scenario is possible if users add multiple nested columns to a non-nullable struct
+    // using the Column.withField API in a non-performant way
+    val structLevel2 = LocalRelation(
+      'a1.struct(
+        'a2.struct('a3.int.notNull)).notNull)
+
+    val query = {
+      val addB3toA1A2 = UpdateFields('a1, Seq(WithField("a2",
+        UpdateFields(GetStructField('a1, 0), Seq(WithField("b3", Literal(2)))))))
+
+      structLevel2.select(
+        UpdateFields(
+          addB3toA1A2,
+          Seq(WithField("a2", UpdateFields(
+            GetStructField(addB3toA1A2, 0), Seq(WithField("c3", Literal(3))))))).as("a1"))
+    }
+
+    val expected = structLevel2.select(
+      UpdateFields('a1, Seq(
+        // scalastyle:off line.size.limit
+        WithField("a2", UpdateFields(GetStructField('a1, 0), WithField("b3", 2) :: Nil)),
+        WithField("a2", UpdateFields(GetStructField('a1, 0), WithField("b3", 2) :: WithField("c3", 3) :: Nil))
+        // scalastyle:on line.size.limit
+      )).as("a1"))
+
+    checkRule(query, expected)
+  }
+
+  test("simplify add multiple nested fields to nullable struct") {
+    // this scenario is possible if users add multiple nested columns to a nullable struct
+    // using the Column.withField API in a non-performant way
+    val structLevel2 = LocalRelation(
+      'a1.struct(
+        'a2.struct('a3.int.notNull)))
+
+    val query = {
+      val addB3toA1A2 = UpdateFields('a1, Seq(WithField("a2",
+        UpdateFields(GetStructField('a1, 0), Seq(WithField("b3", Literal(2)))))))
+
+      structLevel2.select(
+        UpdateFields(
+          addB3toA1A2,
+          Seq(WithField("a2", UpdateFields(
+            GetStructField(addB3toA1A2, 0), Seq(WithField("c3", Literal(3))))))).as("a1"))
+    }
+
+    val expected = {
+      val repeatedExpr = UpdateFields(GetStructField('a1, 0), WithField("b3", Literal(2)) :: Nil)
+      val repeatedExprDataType = StructType(Seq(
+        StructField("a3", IntegerType, nullable = false),
+        StructField("b3", IntegerType, nullable = false)))
+
+      structLevel2.select(
+        UpdateFields('a1, Seq(
+          WithField("a2", repeatedExpr),
+          WithField("a2", UpdateFields(
+            If(IsNull('a1), Literal(null, repeatedExprDataType), repeatedExpr),
+            WithField("c3", Literal(3)) :: Nil))
+        )).as("a1"))
+    }
+
+    checkRule(query, expected)
+  }
+
+  test("simplify drop multiple nested fields in non-nullable struct") {
+    // this scenario is possible if users drop multiple nested columns in a non-nullable struct
+    // using the Column.dropFields API in a non-performant way
+    val structLevel2 = LocalRelation(
+      'a1.struct(
+        'a2.struct('a3.int.notNull, 'b3.int.notNull, 'c3.int.notNull).notNull
+      ).notNull)
+
+    val query = {
+      val dropA1A2B = UpdateFields('a1, Seq(WithField("a2", UpdateFields(
+        GetStructField('a1, 0), Seq(DropField("b3"))))))
+
+      structLevel2.select(
+        UpdateFields(
+          dropA1A2B,
+          Seq(WithField("a2", UpdateFields(
+            GetStructField(dropA1A2B, 0), Seq(DropField("c3")))))).as("a1"))
+    }
+
+    val expected = structLevel2.select(
+      UpdateFields('a1, Seq(
+        WithField("a2", UpdateFields(GetStructField('a1, 0), Seq(DropField("b3")))),
+        WithField("a2", UpdateFields(GetStructField('a1, 0), Seq(DropField("b3"), DropField("c3"))))
+      )).as("a1"))
+
+    checkRule(query, expected)
+  }
+
+  test("simplify drop multiple nested fields in nullable struct") {
+    // this scenario is possible if users drop multiple nested columns in a nullable struct
+    // using the Column.dropFields API in a non-performant way
+    val structLevel2 = LocalRelation(
+      'a1.struct(
+        'a2.struct('a3.int.notNull, 'b3.int.notNull, 'c3.int.notNull)
+      ))
+
+    val query = {
+      val dropA1A2B = UpdateFields('a1, Seq(WithField("a2", UpdateFields(
+        GetStructField('a1, 0), Seq(DropField("b3"))))))
+
+      structLevel2.select(
+        UpdateFields(
+          dropA1A2B,
+          Seq(WithField("a2", UpdateFields(
+            GetStructField(dropA1A2B, 0), Seq(DropField("c3")))))).as("a1"))
+    }
+
+    val expected = {
+      val repeatedExpr = UpdateFields(GetStructField('a1, 0), DropField("b3") :: Nil)
+      val repeatedExprDataType = StructType(Seq(
+        StructField("a3", IntegerType, nullable = false),
+        StructField("c3", IntegerType, nullable = false)))
+
+      structLevel2.select(
+        UpdateFields('a1, Seq(
+          WithField("a2", repeatedExpr),
+          WithField("a2", UpdateFields(
+            If(IsNull('a1), Literal(null, repeatedExprDataType), repeatedExpr),
+            DropField("c3") :: Nil))
+        )).as("a1"))
+    }
+
+    checkRule(query, expected)
+  }
 }
diff --git a/sql/core/benchmarks/UpdateFieldsBenchmark-results.txt b/sql/core/benchmarks/UpdateFieldsBenchmark-results.txt
new file mode 100644
index 0000000000000..5feca0e100bb1
--- /dev/null
+++ b/sql/core/benchmarks/UpdateFieldsBenchmark-results.txt
@@ -0,0 +1,26 @@
+================================================================================================
+Add 2 columns and drop 2 columns at 3 different depths of nesting
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_212-b03 on Mac OS X 10.14.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+Add 2 columns and drop 2 columns at 3 different depths of nesting:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------------------------------
+To non-nullable StructTypes using performant method                           10             11           2          0.0      Infinity       1.0X
+To nullable StructTypes using performant method                                9             10           1          0.0      Infinity       1.0X
+To non-nullable StructTypes using non-performant method                     2457           2464          10          0.0      Infinity       0.0X
+To nullable StructTypes using non-performant method                        42641          43804        1644          0.0      Infinity       0.0X
+
+
+================================================================================================
+Add 50 columns and drop 50 columns at 100 different depths of nesting
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_212-b03 on Mac OS X 10.14.6
+Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
+Add 50 columns and drop 50 columns at 100 different depths of nesting:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-----------------------------------------------------------------------------------------------------------------------------------------------------
+To non-nullable StructTypes using performant method                             4595           4927         470          0.0      Infinity       1.0X
+To nullable StructTypes using performant method                                 5185           5516         468          0.0      Infinity       0.9X
+
+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index da542c67d9c51..a46d6c0bb2282 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -901,6 +901,23 @@ class Column(val expr: Expression) extends Logging {
    *   // result: org.apache.spark.sql.AnalysisException: Ambiguous reference to fields
    * }}}
    *
+   * This method supports adding/replacing nested fields directly e.g.
+   *
+   * {{{
+   *   val df = sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col")
+   *   df.select($"struct_col".withField("a.c", lit(3)).withField("a.d", lit(4)))
+   *   // result: {"a":{"a":1,"b":2,"c":3,"d":4}}
+   * }}}
+   *
+   * However, if you are going to add/replace multiple nested fields, it is more optimal to extract
+   * out the nested struct before adding/replacing multiple fields e.g.
+   *
+   * {{{
+   *   val df = sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col")
+   *   df.select($"struct_col".withField("a", $"struct_col.a".withField("c", lit(3)).withField("d", lit(4))))
+   *   // result: {"a":{"a":1,"b":2,"c":3,"d":4}}
+   * }}}
+   *
    * @group expr_ops
    * @since 3.1.0
    */
@@ -908,32 +925,102 @@ class Column(val expr: Expression) extends Logging {
   def withField(fieldName: String, col: Column): Column = withExpr {
     require(fieldName != null, "fieldName cannot be null")
     require(col != null, "col cannot be null")
+    updateFieldsHelper(expr, nameParts(fieldName), name => WithField(name, col.expr))
+  }
 
-    val nameParts = if (fieldName.isEmpty) {
+  // scalastyle:off line.size.limit
+  /**
+   * An expression that drops fields in `StructType` by name.
+   * This is a no-op if schema doesn't contain field name(s).
+   *
+   * {{{
+   *   val df = sql("SELECT named_struct('a', 1, 'b', 2) struct_col")
+   *   df.select($"struct_col".dropFields("b"))
+   *   // result: {"a":1}
+   *
+   *   val df = sql("SELECT named_struct('a', 1, 'b', 2) struct_col")
+   *   df.select($"struct_col".dropFields("c"))
+   *   // result: {"a":1,"b":2}
+   *
+   *   val df = sql("SELECT named_struct('a', 1, 'b', 2, 'c', 3) struct_col")
+   *   df.select($"struct_col".dropFields("b", "c"))
+   *   // result: {"a":1}
+   *
+   *   val df = sql("SELECT named_struct('a', 1, 'b', 2) struct_col")
+   *   df.select($"struct_col".dropFields("a", "b"))
+   *   // result: org.apache.spark.sql.AnalysisException: cannot resolve 'update_fields(update_fields(`struct_col`))' due to data type mismatch: cannot drop all fields in struct
+   *
+   *   val df = sql("SELECT CAST(NULL AS struct<a:int,b:int>) struct_col")
+   *   df.select($"struct_col".dropFields("b"))
+   *   // result: null of type struct<a:int>
+   *
+   *   val df = sql("SELECT named_struct('a', 1, 'b', 2, 'b', 3) struct_col")
+   *   df.select($"struct_col".dropFields("b"))
+   *   // result: {"a":1}
+   *
+   *   val df = sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col")
+   *   df.select($"struct_col".dropFields("a.b"))
+   *   // result: {"a":{"a":1}}
+   *
+   *   val df = sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col")
+   *   df.select($"struct_col".dropFields("a.c"))
+   *   // result: org.apache.spark.sql.AnalysisException: Ambiguous reference to fields
+   * }}}
+   *
+   * This method supports dropping multiple nested fields directly e.g.
+   *
+   * {{{
+   *   val df = sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col")
+   *   df.select($"struct_col".dropFields("a.b", "a.c"))
+   *   // result: {"a":{"a":1}}
+   * }}}
+   *
+   * However, if you are going to drop multiple nested fields, it is more optimal to extract
+   * out the nested struct before dropping multiple fields from it e.g.
+   *
+   * {{{
+   *   val df = sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col")
+   *   df.select($"struct_col".withField("a", $"struct_col.a".dropFields("b", "c")))
+   *   // result: {"a":{"a":1}}
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.1.0
+   */
+  // scalastyle:on line.size.limit
+  def dropFields(fieldNames: String*): Column = withExpr {
+    def dropField(structExpr: Expression, fieldName: String): UpdateFields =
+      updateFieldsHelper(structExpr, nameParts(fieldName), name => DropField(name))
+
+    fieldNames.tail.foldLeft(dropField(expr, fieldNames.head)) {
+      (resExpr, fieldName) => dropField(resExpr, fieldName)
+    }
+  }
+
+  private def nameParts(fieldName: String): Seq[String] = {
+    require(fieldName != null, "fieldName cannot be null")
+
+    if (fieldName.isEmpty) {
       fieldName :: Nil
     } else {
       CatalystSqlParser.parseMultipartIdentifier(fieldName)
     }
-    withFieldHelper(expr, nameParts, Nil, col.expr)
   }
 
-  private def withFieldHelper(
-      struct: Expression,
+  private def updateFieldsHelper(
+      structExpr: Expression,
       namePartsRemaining: Seq[String],
-      namePartsDone: Seq[String],
-      value: Expression) : WithFields = {
-    val name = namePartsRemaining.head
+      valueFunc: String => StructFieldsOperation): UpdateFields = {
+
+    val fieldName = namePartsRemaining.head
     if (namePartsRemaining.length == 1) {
-      WithFields(struct, name :: Nil, value :: Nil)
+      UpdateFields(structExpr, valueFunc(fieldName) :: Nil)
     } else {
-      val newNamesRemaining = namePartsRemaining.tail
-      val newNamesDone = namePartsDone :+ name
-      val newValue = withFieldHelper(
-        struct = UnresolvedExtractValue(struct, Literal(name)),
-        namePartsRemaining = newNamesRemaining,
-        namePartsDone = newNamesDone,
-        value = value)
-      WithFields(struct, name :: Nil, newValue :: Nil)
+      val newValue = updateFieldsHelper(
+        structExpr = UnresolvedExtractValue(structExpr, Literal(fieldName)),
+        namePartsRemaining = namePartsRemaining.tail,
+        valueFunc = valueFunc)
+      UpdateFields(structExpr, WithField(fieldName, newValue) :: Nil)
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index 24419968c0472..b11f4c603dfd6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -24,6 +24,7 @@ import org.apache.hadoop.io.{LongWritable, Text}
 import org.apache.hadoop.mapreduce.lib.input.{TextInputFormat => NewTextInputFormat}
 import org.scalatest.matchers.should.Matchers._
 
+import org.apache.spark.sql.UpdateFieldsBenchmark._
 import org.apache.spark.sql.catalyst.expressions.{InSet, Literal, NamedExpression}
 import org.apache.spark.sql.execution.ProjectExec
 import org.apache.spark.sql.functions._
@@ -922,11 +923,10 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
     assert(inSet.sql === "('a' IN ('a', 'b'))")
   }
 
-  def checkAnswerAndSchema(
+  def checkAnswer(
       df: => DataFrame,
       expectedAnswer: Seq[Row],
       expectedSchema: StructType): Unit = {
-
     checkAnswer(df, expectedAnswer)
     assert(df.schema == expectedSchema)
   }
@@ -940,8 +940,8 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
     sparkContext.parallelize(Row(Row(1, null, 3)) :: Nil),
     StructType(Seq(StructField("a", structType, nullable = false))))
 
-  private lazy val nullStructLevel1: DataFrame = spark.createDataFrame(
-    sparkContext.parallelize(Row(null) :: Nil),
+  private lazy val nullableStructLevel1: DataFrame = spark.createDataFrame(
+    sparkContext.parallelize(Row(null) :: Row(Row(1, null, 3)) :: Nil),
     StructType(Seq(StructField("a", structType, nullable = true))))
 
   private lazy val structLevel2: DataFrame = spark.createDataFrame(
@@ -951,12 +951,12 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
         StructField("a", structType, nullable = false))),
         nullable = false))))
 
-  private lazy val nullStructLevel2: DataFrame = spark.createDataFrame(
-    sparkContext.parallelize(Row(Row(null)) :: Nil),
+  private lazy val nullableStructLevel2: DataFrame = spark.createDataFrame(
+    sparkContext.parallelize(Row(null) :: Row(Row(null)) :: Row(Row(Row(1, null, 3))) :: Nil),
     StructType(Seq(
       StructField("a", StructType(Seq(
         StructField("a", structType, nullable = true))),
-        nullable = false))))
+        nullable = true))))
 
   private lazy val structLevel3: DataFrame = spark.createDataFrame(
     sparkContext.parallelize(Row(Row(Row(Row(1, null, 3)))) :: Nil),
@@ -1018,7 +1018,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   }
 
   test("withField should add field with no name") {
-    checkAnswerAndSchema(
+    checkAnswer(
       structLevel1.withColumn("a", $"a".withField("", lit(4))),
       Row(Row(1, null, 3, 4)) :: Nil,
       StructType(Seq(
@@ -1031,7 +1031,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   }
 
   test("withField should add field to struct") {
-    checkAnswerAndSchema(
+    checkAnswer(
       structLevel1.withColumn("a", 'a.withField("d", lit(4))),
       Row(Row(1, null, 3, 4)) :: Nil,
       StructType(Seq(
@@ -1043,10 +1043,10 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
           nullable = false))))
   }
 
-  test("withField should add field to null struct") {
-    checkAnswerAndSchema(
-      nullStructLevel1.withColumn("a", $"a".withField("d", lit(4))),
-      Row(null) :: Nil,
+  test("withField should add field to nullable struct") {
+    checkAnswer(
+      nullableStructLevel1.withColumn("a", $"a".withField("d", lit(4))),
+      Row(null) :: Row(Row(1, null, 3, 4)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
           StructField("a", IntegerType, nullable = false),
@@ -1056,10 +1056,10 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
           nullable = true))))
   }
 
-  test("withField should add field to nested null struct") {
-    checkAnswerAndSchema(
-      nullStructLevel2.withColumn("a", $"a".withField("a.d", lit(4))),
-      Row(Row(null)) :: Nil,
+  test("withField should add field to nested nullable struct") {
+    checkAnswer(
+      nullableStructLevel2.withColumn("a", $"a".withField("a.d", lit(4))),
+      Row(null) :: Row(Row(null)) :: Row(Row(Row(1, null, 3, 4))) :: Nil,
       StructType(
         Seq(StructField("a", StructType(Seq(
           StructField("a", StructType(Seq(
@@ -1068,11 +1068,11 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
             StructField("c", IntegerType, nullable = false),
             StructField("d", IntegerType, nullable = false))),
             nullable = true))),
-          nullable = false))))
+          nullable = true))))
   }
 
   test("withField should add null field to struct") {
-    checkAnswerAndSchema(
+    checkAnswer(
       structLevel1.withColumn("a", 'a.withField("d", lit(null).cast(IntegerType))),
       Row(Row(1, null, 3, null)) :: Nil,
       StructType(Seq(
@@ -1085,7 +1085,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   }
 
   test("withField should add multiple fields to struct") {
-    checkAnswerAndSchema(
+    checkAnswer(
       structLevel1.withColumn("a", 'a.withField("d", lit(4)).withField("e", lit(5))),
       Row(Row(1, null, 3, 4, 5)) :: Nil,
       StructType(Seq(
@@ -1098,12 +1098,26 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
           nullable = false))))
   }
 
+  test("withField should add multiple fields to nullable struct") {
+    checkAnswer(
+      nullableStructLevel1.withColumn("a", 'a.withField("d", lit(4)).withField("e", lit(5))),
+      Row(null) :: Row(Row(1, null, 3, 4, 5)) :: Nil,
+      StructType(Seq(
+        StructField("a", StructType(Seq(
+          StructField("a", IntegerType, nullable = false),
+          StructField("b", IntegerType, nullable = true),
+          StructField("c", IntegerType, nullable = false),
+          StructField("d", IntegerType, nullable = false),
+          StructField("e", IntegerType, nullable = false))),
+          nullable = true))))
+  }
+
   test("withField should add field to nested struct") {
     Seq(
       structLevel2.withColumn("a", 'a.withField("a.d", lit(4))),
       structLevel2.withColumn("a", 'a.withField("a", $"a.a".withField("d", lit(4))))
     ).foreach { df =>
-      checkAnswerAndSchema(
+      checkAnswer(
         df,
         Row(Row(Row(1, null, 3, 4))) :: Nil,
         StructType(
@@ -1118,8 +1132,50 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
     }
   }
 
+  test("withField should add multiple fields to nested struct") {
+    Seq(
+      col("a").withField("a", $"a.a".withField("d", lit(4)).withField("e", lit(5))),
+      col("a").withField("a.d", lit(4)).withField("a.e", lit(5))
+    ).foreach { column =>
+      checkAnswer(
+        structLevel2.select(column.as("a")),
+        Row(Row(Row(1, null, 3, 4, 5))) :: Nil,
+        StructType(Seq(
+          StructField("a", StructType(Seq(
+            StructField("a", StructType(Seq(
+              StructField("a", IntegerType, nullable = false),
+              StructField("b", IntegerType, nullable = true),
+              StructField("c", IntegerType, nullable = false),
+              StructField("d", IntegerType, nullable = false),
+              StructField("e", IntegerType, nullable = false))),
+              nullable = false))),
+            nullable = false))))
+    }
+  }
+
+  test("withField should add multiple fields to nested nullable struct") {
+    Seq(
+      col("a").withField("a", $"a.a".withField("d", lit(4)).withField("e", lit(5))),
+      col("a").withField("a.d", lit(4)).withField("a.e", lit(5))
+    ).foreach { column =>
+      checkAnswer(
+        nullableStructLevel2.select(column.as("a")),
+        Row(null) :: Row(Row(null)) :: Row(Row(Row(1, null, 3, 4, 5))) :: Nil,
+        StructType(Seq(
+          StructField("a", StructType(Seq(
+            StructField("a", StructType(Seq(
+              StructField("a", IntegerType, nullable = false),
+              StructField("b", IntegerType, nullable = true),
+              StructField("c", IntegerType, nullable = false),
+              StructField("d", IntegerType, nullable = false),
+              StructField("e", IntegerType, nullable = false))),
+              nullable = true))),
+            nullable = true))))
+    }
+  }
+
   test("withField should add field to deeply nested struct") {
-    checkAnswerAndSchema(
+    checkAnswer(
       structLevel3.withColumn("a", 'a.withField("a.a.d", lit(4))),
       Row(Row(Row(Row(1, null, 3, 4)))) :: Nil,
       StructType(Seq(
@@ -1136,7 +1192,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   }
 
   test("withField should replace field in struct") {
-    checkAnswerAndSchema(
+    checkAnswer(
       structLevel1.withColumn("a", 'a.withField("b", lit(2))),
       Row(Row(1, 2, 3)) :: Nil,
       StructType(Seq(
@@ -1147,10 +1203,10 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
           nullable = false))))
   }
 
-  test("withField should replace field in null struct") {
-    checkAnswerAndSchema(
-      nullStructLevel1.withColumn("a", 'a.withField("b", lit("foo"))),
-      Row(null) :: Nil,
+  test("withField should replace field in nullable struct") {
+    checkAnswer(
+      nullableStructLevel1.withColumn("a", 'a.withField("b", lit("foo"))),
+      Row(null) :: Row(Row(1, "foo", 3)) ::  Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
           StructField("a", IntegerType, nullable = false),
@@ -1159,10 +1215,10 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
           nullable = true))))
   }
 
-  test("withField should replace field in nested null struct") {
-    checkAnswerAndSchema(
-      nullStructLevel2.withColumn("a", $"a".withField("a.b", lit("foo"))),
-      Row(Row(null)) :: Nil,
+  test("withField should replace field in nested nullable struct") {
+    checkAnswer(
+      nullableStructLevel2.withColumn("a", $"a".withField("a.b", lit("foo"))),
+      Row(null) :: Row(Row(null)) :: Row(Row(Row(1, "foo", 3))) :: Nil,
       StructType(
         Seq(StructField("a", StructType(Seq(
           StructField("a", StructType(Seq(
@@ -1170,11 +1226,11 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
             StructField("b", StringType, nullable = false),
             StructField("c", IntegerType, nullable = false))),
             nullable = true))),
-          nullable = false))))
+          nullable = true))))
   }
 
   test("withField should replace field with null value in struct") {
-    checkAnswerAndSchema(
+    checkAnswer(
       structLevel1.withColumn("a", 'a.withField("c", lit(null).cast(IntegerType))),
       Row(Row(1, null, null)) :: Nil,
       StructType(Seq(
@@ -1186,7 +1242,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   }
 
   test("withField should replace multiple fields in struct") {
-    checkAnswerAndSchema(
+    checkAnswer(
       structLevel1.withColumn("a", 'a.withField("a", lit(10)).withField("b", lit(20))),
       Row(Row(10, 20, 3)) :: Nil,
       StructType(Seq(
@@ -1197,12 +1253,24 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
           nullable = false))))
   }
 
+  test("withField should replace multiple fields in nullable struct") {
+    checkAnswer(
+      nullableStructLevel1.withColumn("a", 'a.withField("a", lit(10)).withField("b", lit(20))),
+      Row(null) :: Row(Row(10, 20, 3)) :: Nil,
+      StructType(Seq(
+        StructField("a", StructType(Seq(
+          StructField("a", IntegerType, nullable = false),
+          StructField("b", IntegerType, nullable = false),
+          StructField("c", IntegerType, nullable = false))),
+          nullable = true))))
+  }
+
   test("withField should replace field in nested struct") {
     Seq(
       structLevel2.withColumn("a", $"a".withField("a.b", lit(2))),
       structLevel2.withColumn("a", 'a.withField("a", $"a.a".withField("b", lit(2))))
     ).foreach { df =>
-      checkAnswerAndSchema(
+      checkAnswer(
         df,
         Row(Row(Row(1, 2, 3))) :: Nil,
         StructType(Seq(
@@ -1216,8 +1284,46 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
     }
   }
 
+  test("withField should replace multiple fields in nested struct") {
+    Seq(
+      col("a").withField("a", $"a.a".withField("a", lit(10)).withField("b", lit(20))),
+      col("a").withField("a.a", lit(10)).withField("a.b", lit(20))
+    ).foreach { column =>
+      checkAnswer(
+        structLevel2.select(column.as("a")),
+        Row(Row(Row(10, 20, 3))) :: Nil,
+        StructType(Seq(
+          StructField("a", StructType(Seq(
+            StructField("a", StructType(Seq(
+              StructField("a", IntegerType, nullable = false),
+              StructField("b", IntegerType, nullable = false),
+              StructField("c", IntegerType, nullable = false))),
+              nullable = false))),
+            nullable = false))))
+    }
+  }
+
+  test("withField should replace multiple fields in nested nullable struct") {
+    Seq(
+      col("a").withField("a", $"a.a".withField("a", lit(10)).withField("b", lit(20))),
+      col("a").withField("a.a", lit(10)).withField("a.b", lit(20))
+    ).foreach { column =>
+      checkAnswer(
+        nullableStructLevel2.select(column.as("a")),
+        Row(null) :: Row(Row(null)) :: Row(Row(Row(10, 20, 3))) :: Nil,
+        StructType(Seq(
+          StructField("a", StructType(Seq(
+            StructField("a", StructType(Seq(
+              StructField("a", IntegerType, nullable = false),
+              StructField("b", IntegerType, nullable = false),
+              StructField("c", IntegerType, nullable = false))),
+              nullable = true))),
+            nullable = true))))
+    }
+  }
+
   test("withField should replace field in deeply nested struct") {
-    checkAnswerAndSchema(
+    checkAnswer(
       structLevel3.withColumn("a", $"a".withField("a.a.b", lit(2))),
       Row(Row(Row(Row(1, 2, 3)))) :: Nil,
       StructType(Seq(
@@ -1242,7 +1348,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
           StructField("b", IntegerType, nullable = false))),
           nullable = false))))
 
-    checkAnswerAndSchema(
+    checkAnswer(
       structLevel1.withColumn("a", 'a.withField("b", lit(100))),
       Row(Row(1, 100, 100)) :: Nil,
       StructType(Seq(
@@ -1254,7 +1360,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   }
 
   test("withField should replace fields in struct in given order") {
-    checkAnswerAndSchema(
+    checkAnswer(
       structLevel1.withColumn("a", 'a.withField("b", lit(2)).withField("b", lit(20))),
       Row(Row(1, 20, 3)) :: Nil,
       StructType(Seq(
@@ -1266,7 +1372,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   }
 
   test("withField should add field and then replace same field in struct") {
-    checkAnswerAndSchema(
+    checkAnswer(
       structLevel1.withColumn("a", 'a.withField("d", lit(4)).withField("d", lit(5))),
       Row(Row(1, null, 3, 5)) :: Nil,
       StructType(Seq(
@@ -1290,7 +1396,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
             nullable = false))),
           nullable = false))))
 
-    checkAnswerAndSchema(
+    checkAnswer(
       df.withColumn("a", 'a.withField("`a.b`.`e.f`", lit(2))),
       Row(Row(Row(1, 2, 3))) :: Nil,
       StructType(Seq(
@@ -1317,7 +1423,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should replace field in struct even if casing is different") {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
-      checkAnswerAndSchema(
+      checkAnswer(
         mixedCaseStructLevel1.withColumn("a", 'a.withField("A", lit(2))),
         Row(Row(2, 1)) :: Nil,
         StructType(Seq(
@@ -1326,7 +1432,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
             StructField("B", IntegerType, nullable = false))),
             nullable = false))))
 
-      checkAnswerAndSchema(
+      checkAnswer(
         mixedCaseStructLevel1.withColumn("a", 'a.withField("b", lit(2))),
         Row(Row(1, 2)) :: Nil,
         StructType(Seq(
@@ -1339,7 +1445,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should add field to struct because casing is different") {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
-      checkAnswerAndSchema(
+      checkAnswer(
         mixedCaseStructLevel1.withColumn("a", 'a.withField("A", lit(2))),
         Row(Row(1, 1, 2)) :: Nil,
         StructType(Seq(
@@ -1349,7 +1455,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
             StructField("A", IntegerType, nullable = false))),
             nullable = false))))
 
-      checkAnswerAndSchema(
+      checkAnswer(
         mixedCaseStructLevel1.withColumn("a", 'a.withField("b", lit(2))),
         Row(Row(1, 1, 2)) :: Nil,
         StructType(Seq(
@@ -1377,7 +1483,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should replace nested field in struct even if casing is different") {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
-      checkAnswerAndSchema(
+      checkAnswer(
         mixedCaseStructLevel2.withColumn("a", 'a.withField("A.a", lit(2))),
         Row(Row(Row(2, 1), Row(1, 1))) :: Nil,
         StructType(Seq(
@@ -1392,7 +1498,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
               nullable = false))),
             nullable = false))))
 
-      checkAnswerAndSchema(
+      checkAnswer(
         mixedCaseStructLevel2.withColumn("a", 'a.withField("b.a", lit(2))),
         Row(Row(Row(1, 1), Row(2, 1))) :: Nil,
         StructType(Seq(
@@ -1451,30 +1557,41 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
       sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col")
         .select($"struct_col".withField("a.c", lit(3)))
     }.getMessage should include("Ambiguous reference to fields")
+
+    checkAnswer(
+      sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col")
+        .select($"struct_col".withField("a.c", lit(3)).withField("a.d", lit(4))),
+      Row(Row(Row(1, 2, 3, 4))))
+
+    checkAnswer(
+      sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col")
+        .select($"struct_col".withField("a",
+          $"struct_col.a".withField("c", lit(3)).withField("d", lit(4)))),
+      Row(Row(Row(1, 2, 3, 4))))
   }
 
   test("SPARK-32641: extracting field from non-null struct column after withField should return " +
     "field value") {
     // extract newly added field
-    checkAnswerAndSchema(
+    checkAnswer(
       structLevel1.withColumn("a", $"a".withField("d", lit(4)).getField("d")),
       Row(4) :: Nil,
       StructType(Seq(StructField("a", IntegerType, nullable = false))))
 
     // extract newly replaced field
-    checkAnswerAndSchema(
+    checkAnswer(
       structLevel1.withColumn("a", $"a".withField("a", lit(4)).getField("a")),
       Row(4) :: Nil,
       StructType(Seq(StructField("a", IntegerType, nullable = false))))
 
     // add new field, extract another field from original struct
-    checkAnswerAndSchema(
+    checkAnswer(
       structLevel1.withColumn("a", $"a".withField("d", lit(4)).getField("c")),
       Row(3):: Nil,
       StructType(Seq(StructField("a", IntegerType, nullable = false))))
 
     // replace field, extract another field from original struct
-    checkAnswerAndSchema(
+    checkAnswer(
       structLevel1.withColumn("a", $"a".withField("a", lit(4)).getField("c")),
       Row(3):: Nil,
       StructType(Seq(StructField("a", IntegerType, nullable = false))))
@@ -1482,26 +1599,30 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("SPARK-32641: extracting field from null struct column after withField should return " +
     "null if the original struct was null") {
+    val nullStructLevel1 = spark.createDataFrame(
+      sparkContext.parallelize(Row(null) :: Nil),
+      StructType(Seq(StructField("a", structType, nullable = true))))
+
     // extract newly added field
-    checkAnswerAndSchema(
+    checkAnswer(
       nullStructLevel1.withColumn("a", $"a".withField("d", lit(4)).getField("d")),
       Row(null) :: Nil,
       StructType(Seq(StructField("a", IntegerType, nullable = true))))
 
     // extract newly replaced field
-    checkAnswerAndSchema(
+    checkAnswer(
       nullStructLevel1.withColumn("a", $"a".withField("a", lit(4)).getField("a")),
       Row(null):: Nil,
       StructType(Seq(StructField("a", IntegerType, nullable = true))))
 
     // add new field, extract another field from original struct
-    checkAnswerAndSchema(
+    checkAnswer(
       nullStructLevel1.withColumn("a", $"a".withField("d", lit(4)).getField("c")),
       Row(null):: Nil,
       StructType(Seq(StructField("a", IntegerType, nullable = true))))
 
     // replace field, extract another field from original struct
-    checkAnswerAndSchema(
+    checkAnswer(
       nullStructLevel1.withColumn("a", $"a".withField("a", lit(4)).getField("c")),
       Row(null):: Nil,
       StructType(Seq(StructField("a", IntegerType, nullable = true))))
@@ -1514,27 +1635,671 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
       StructType(Seq(StructField("a", structType, nullable = true))))
 
     // extract newly added field
-    checkAnswerAndSchema(
+    checkAnswer(
       df.withColumn("a", $"a".withField("d", lit(4)).getField("d")),
       Row(4) :: Row(null) :: Nil,
       StructType(Seq(StructField("a", IntegerType, nullable = true))))
 
     // extract newly replaced field
-    checkAnswerAndSchema(
+    checkAnswer(
       df.withColumn("a", $"a".withField("a", lit(4)).getField("a")),
       Row(4) :: Row(null):: Nil,
       StructType(Seq(StructField("a", IntegerType, nullable = true))))
 
     // add new field, extract another field from original struct
-    checkAnswerAndSchema(
+    checkAnswer(
       df.withColumn("a", $"a".withField("d", lit(4)).getField("c")),
       Row(3) :: Row(null):: Nil,
       StructType(Seq(StructField("a", IntegerType, nullable = true))))
 
     // replace field, extract another field from original struct
-    checkAnswerAndSchema(
+    checkAnswer(
       df.withColumn("a", $"a".withField("a", lit(4)).getField("c")),
       Row(3) :: Row(null):: Nil,
       StructType(Seq(StructField("a", IntegerType, nullable = true))))
   }
+
+
+  test("dropFields should throw an exception if called on a non-StructType column") {
+    intercept[AnalysisException] {
+      testData.withColumn("key", $"key".dropFields("a"))
+    }.getMessage should include("struct argument should be struct type, got: int")
+  }
+
+  test("dropFields should throw an exception if fieldName argument is null") {
+    intercept[IllegalArgumentException] {
+      structLevel1.withColumn("a", $"a".dropFields(null))
+    }.getMessage should include("fieldName cannot be null")
+  }
+
+  test("dropFields should throw an exception if any intermediate structs don't exist") {
+    intercept[AnalysisException] {
+      structLevel2.withColumn("a", 'a.dropFields("x.b"))
+    }.getMessage should include("No such struct field x in a")
+
+    intercept[AnalysisException] {
+      structLevel3.withColumn("a", 'a.dropFields("a.x.b"))
+    }.getMessage should include("No such struct field x in a")
+  }
+
+  test("dropFields should throw an exception if intermediate field is not a struct") {
+    intercept[AnalysisException] {
+      structLevel1.withColumn("a", 'a.dropFields("b.a"))
+    }.getMessage should include("struct argument should be struct type, got: int")
+  }
+
+  test("dropFields should throw an exception if intermediate field reference is ambiguous") {
+    intercept[AnalysisException] {
+      val structLevel2: DataFrame = spark.createDataFrame(
+        sparkContext.parallelize(Row(Row(Row(1, null, 3), 4)) :: Nil),
+        StructType(Seq(
+          StructField("a", StructType(Seq(
+            StructField("a", structType, nullable = false),
+            StructField("a", structType, nullable = false))),
+            nullable = false))))
+
+      structLevel2.withColumn("a", 'a.dropFields("a.b"))
+    }.getMessage should include("Ambiguous reference to fields")
+  }
+
+  test("dropFields should drop field in struct") {
+    checkAnswer(
+      structLevel1.withColumn("a", 'a.dropFields("b")),
+      Row(Row(1, 3)) :: Nil,
+      StructType(Seq(
+        StructField("a", StructType(Seq(
+          StructField("a", IntegerType, nullable = false),
+          StructField("c", IntegerType, nullable = false))),
+          nullable = false))))
+  }
+
+  test("dropFields should drop field in nullable struct") {
+    checkAnswer(
+      nullableStructLevel1.withColumn("a", $"a".dropFields("b")),
+      Row(null) :: Row(Row(1, 3)) :: Nil,
+      StructType(Seq(
+        StructField("a", StructType(Seq(
+          StructField("a", IntegerType, nullable = false),
+          StructField("c", IntegerType, nullable = false))),
+          nullable = true))))
+  }
+
+  test("dropFields should drop multiple fields in struct") {
+    Seq(
+      structLevel1.withColumn("a", $"a".dropFields("b", "c")),
+      structLevel1.withColumn("a", 'a.dropFields("b").dropFields("c"))
+    ).foreach { df =>
+      checkAnswer(
+        df,
+        Row(Row(1)) :: Nil,
+        StructType(Seq(
+          StructField("a", StructType(Seq(
+            StructField("a", IntegerType, nullable = false))),
+            nullable = false))))
+    }
+  }
+
+  test("dropFields should throw an exception if no fields will be left in struct") {
+    intercept[AnalysisException] {
+      structLevel1.withColumn("a", 'a.dropFields("a", "b", "c"))
+    }.getMessage should include("cannot drop all fields in struct")
+  }
+
+  test("dropFields should drop field with no name in struct") {
+    val structType = StructType(Seq(
+      StructField("a", IntegerType, nullable = false),
+      StructField("", IntegerType, nullable = false)))
+
+    val structLevel1: DataFrame = spark.createDataFrame(
+      sparkContext.parallelize(Row(Row(1, 2)) :: Nil),
+      StructType(Seq(StructField("a", structType, nullable = false))))
+
+    checkAnswer(
+      structLevel1.withColumn("a", $"a".dropFields("")),
+      Row(Row(1)) :: Nil,
+      StructType(Seq(
+        StructField("a", StructType(Seq(
+          StructField("a", IntegerType, nullable = false))),
+          nullable = false))))
+  }
+
+  test("dropFields should drop field in nested struct") {
+    checkAnswer(
+      structLevel2.withColumn("a", 'a.dropFields("a.b")),
+      Row(Row(Row(1, 3))) :: Nil,
+      StructType(
+        Seq(StructField("a", StructType(Seq(
+          StructField("a", StructType(Seq(
+            StructField("a", IntegerType, nullable = false),
+            StructField("c", IntegerType, nullable = false))),
+            nullable = false))),
+          nullable = false))))
+  }
+
+  test("dropFields should drop multiple fields in nested struct") {
+    checkAnswer(
+      structLevel2.withColumn("a", 'a.dropFields("a.b", "a.c")),
+      Row(Row(Row(1))) :: Nil,
+      StructType(
+        Seq(StructField("a", StructType(Seq(
+          StructField("a", StructType(Seq(
+            StructField("a", IntegerType, nullable = false))),
+            nullable = false))),
+          nullable = false))))
+  }
+
+  test("dropFields should drop field in nested nullable struct") {
+    checkAnswer(
+      nullableStructLevel2.withColumn("a", $"a".dropFields("a.b")),
+      Row(null) :: Row(Row(null)) :: Row(Row(Row(1, 3))) :: Nil,
+      StructType(
+        Seq(StructField("a", StructType(Seq(
+          StructField("a", StructType(Seq(
+            StructField("a", IntegerType, nullable = false),
+            StructField("c", IntegerType, nullable = false))),
+            nullable = true))),
+          nullable = true))))
+  }
+
+  test("dropFields should drop multiple fields in nested nullable struct") {
+    checkAnswer(
+      nullableStructLevel2.withColumn("a", $"a".dropFields("a.b", "a.c")),
+      Row(null) :: Row(Row(null)) :: Row(Row(Row(1))) :: Nil,
+      StructType(
+        Seq(StructField("a", StructType(Seq(
+          StructField("a", StructType(Seq(
+            StructField("a", IntegerType, nullable = false))),
+            nullable = true))),
+          nullable = true))))
+  }
+
+  test("dropFields should drop field in deeply nested struct") {
+    checkAnswer(
+      structLevel3.withColumn("a", 'a.dropFields("a.a.b")),
+      Row(Row(Row(Row(1, 3)))) :: Nil,
+      StructType(Seq(
+        StructField("a", StructType(Seq(
+          StructField("a", StructType(Seq(
+            StructField("a", StructType(Seq(
+              StructField("a", IntegerType, nullable = false),
+              StructField("c", IntegerType, nullable = false))),
+              nullable = false))),
+            nullable = false))),
+          nullable = false))))
+  }
+
+  test("dropFields should drop all fields with given name in struct") {
+    val structLevel1 = spark.createDataFrame(
+      sparkContext.parallelize(Row(Row(1, 2, 3)) :: Nil),
+      StructType(Seq(
+        StructField("a", StructType(Seq(
+          StructField("a", IntegerType, nullable = false),
+          StructField("b", IntegerType, nullable = false),
+          StructField("b", IntegerType, nullable = false))),
+          nullable = false))))
+
+    checkAnswer(
+      structLevel1.withColumn("a", 'a.dropFields("b")),
+      Row(Row(1)) :: Nil,
+      StructType(Seq(
+        StructField("a", StructType(Seq(
+          StructField("a", IntegerType, nullable = false))),
+          nullable = false))))
+  }
+
+  test("dropFields should drop field in struct even if casing is different") {
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+      checkAnswer(
+        mixedCaseStructLevel1.withColumn("a", 'a.dropFields("A")),
+        Row(Row(1)) :: Nil,
+        StructType(Seq(
+          StructField("a", StructType(Seq(
+            StructField("B", IntegerType, nullable = false))),
+            nullable = false))))
+
+      checkAnswer(
+        mixedCaseStructLevel1.withColumn("a", 'a.dropFields("b")),
+        Row(Row(1)) :: Nil,
+        StructType(Seq(
+          StructField("a", StructType(Seq(
+            StructField("a", IntegerType, nullable = false))),
+            nullable = false))))
+    }
+  }
+
+  test("dropFields should not drop field in struct because casing is different") {
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      checkAnswer(
+        mixedCaseStructLevel1.withColumn("a", 'a.dropFields("A")),
+        Row(Row(1, 1)) :: Nil,
+        StructType(Seq(
+          StructField("a", StructType(Seq(
+            StructField("a", IntegerType, nullable = false),
+            StructField("B", IntegerType, nullable = false))),
+            nullable = false))))
+
+      checkAnswer(
+        mixedCaseStructLevel1.withColumn("a", 'a.dropFields("b")),
+        Row(Row(1, 1)) :: Nil,
+        StructType(Seq(
+          StructField("a", StructType(Seq(
+            StructField("a", IntegerType, nullable = false),
+            StructField("B", IntegerType, nullable = false))),
+            nullable = false))))
+    }
+  }
+
+  test("dropFields should drop nested field in struct even if casing is different") {
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+      checkAnswer(
+        mixedCaseStructLevel2.withColumn("a", 'a.dropFields("A.a")),
+        Row(Row(Row(1), Row(1, 1))) :: Nil,
+        StructType(Seq(
+          StructField("a", StructType(Seq(
+            StructField("A", StructType(Seq(
+              StructField("b", IntegerType, nullable = false))),
+              nullable = false),
+            StructField("B", StructType(Seq(
+              StructField("a", IntegerType, nullable = false),
+              StructField("b", IntegerType, nullable = false))),
+              nullable = false))),
+            nullable = false))))
+
+      checkAnswer(
+        mixedCaseStructLevel2.withColumn("a", 'a.dropFields("b.a")),
+        Row(Row(Row(1, 1), Row(1))) :: Nil,
+        StructType(Seq(
+          StructField("a", StructType(Seq(
+            StructField("a", StructType(Seq(
+              StructField("a", IntegerType, nullable = false),
+              StructField("b", IntegerType, nullable = false))),
+              nullable = false),
+            StructField("b", StructType(Seq(
+              StructField("b", IntegerType, nullable = false))),
+              nullable = false))),
+            nullable = false))))
+    }
+  }
+
+  test("dropFields should throw an exception because casing is different") {
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      intercept[AnalysisException] {
+        mixedCaseStructLevel2.withColumn("a", 'a.dropFields("A.a"))
+      }.getMessage should include("No such struct field A in a, B")
+
+      intercept[AnalysisException] {
+        mixedCaseStructLevel2.withColumn("a", 'a.dropFields("b.a"))
+      }.getMessage should include("No such struct field b in a, B")
+    }
+  }
+
+  test("dropFields should drop only fields that exist") {
+    checkAnswer(
+      structLevel1.withColumn("a", 'a.dropFields("d")),
+      Row(Row(1, null, 3)) :: Nil,
+      StructType(Seq(
+        StructField("a", StructType(Seq(
+          StructField("a", IntegerType, nullable = false),
+          StructField("b", IntegerType, nullable = true),
+          StructField("c", IntegerType, nullable = false))),
+          nullable = false))))
+
+    checkAnswer(
+      structLevel1.withColumn("a", 'a.dropFields("b", "d")),
+      Row(Row(1, 3)) :: Nil,
+      StructType(Seq(
+        StructField("a", StructType(Seq(
+          StructField("a", IntegerType, nullable = false),
+          StructField("c", IntegerType, nullable = false))),
+          nullable = false))))
+
+    checkAnswer(
+      structLevel2.withColumn("a", $"a".dropFields("a.b", "a.d")),
+      Row(Row(Row(1, 3))) :: Nil,
+      StructType(
+        Seq(StructField("a", StructType(Seq(
+          StructField("a", StructType(Seq(
+            StructField("a", IntegerType, nullable = false),
+            StructField("c", IntegerType, nullable = false))),
+            nullable = false))),
+          nullable = false))))
+  }
+
+  test("dropFields should drop multiple fields at arbitrary levels of nesting in a single call") {
+    val df: DataFrame = spark.createDataFrame(
+      sparkContext.parallelize(Row(Row(Row(1, null, 3), 4)) :: Nil),
+      StructType(Seq(
+        StructField("a", StructType(Seq(
+          StructField("a", structType, nullable = false),
+          StructField("b", IntegerType, nullable = false))),
+          nullable = false))))
+
+    checkAnswer(
+      df.withColumn("a", $"a".dropFields("a.b", "b")),
+      Row(Row(Row(1, 3))) :: Nil,
+      StructType(Seq(
+        StructField("a", StructType(Seq(
+          StructField("a", StructType(Seq(
+            StructField("a", IntegerType, nullable = false),
+            StructField("c", IntegerType, nullable = false))), nullable = false))),
+          nullable = false))))
+  }
+
+  test("dropFields user-facing examples") {
+    checkAnswer(
+      sql("SELECT named_struct('a', 1, 'b', 2) struct_col")
+        .select($"struct_col".dropFields("b")),
+      Row(Row(1)))
+
+    checkAnswer(
+      sql("SELECT named_struct('a', 1, 'b', 2) struct_col")
+        .select($"struct_col".dropFields("c")),
+      Row(Row(1, 2)))
+
+    checkAnswer(
+      sql("SELECT named_struct('a', 1, 'b', 2, 'c', 3) struct_col")
+        .select($"struct_col".dropFields("b", "c")),
+      Row(Row(1)))
+
+    intercept[AnalysisException] {
+      sql("SELECT named_struct('a', 1, 'b', 2) struct_col")
+        .select($"struct_col".dropFields("a", "b"))
+    }.getMessage should include("cannot drop all fields in struct")
+
+    checkAnswer(
+      sql("SELECT CAST(NULL AS struct<a:int,b:int>) struct_col")
+        .select($"struct_col".dropFields("b")),
+      Row(null))
+
+    checkAnswer(
+      sql("SELECT named_struct('a', 1, 'b', 2, 'b', 3) struct_col")
+        .select($"struct_col".dropFields("b")),
+      Row(Row(1)))
+
+    checkAnswer(
+      sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col")
+        .select($"struct_col".dropFields("a.b")),
+      Row(Row(Row(1))))
+
+    intercept[AnalysisException] {
+      sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col")
+        .select($"struct_col".dropFields("a.c"))
+    }.getMessage should include("Ambiguous reference to fields")
+
+    checkAnswer(
+      sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2, 'c', 3)) struct_col")
+        .select($"struct_col".dropFields("a.b", "a.c")),
+      Row(Row(Row(1))))
+
+    checkAnswer(
+      sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2, 'c', 3)) struct_col")
+        .select($"struct_col".withField("a", $"struct_col.a".dropFields("b", "c"))),
+      Row(Row(Row(1))))
+  }
+
+  test("should correctly handle different dropField + withField + getField combinations") {
+    val structType = StructType(Seq(
+      StructField("a", IntegerType, nullable = false),
+      StructField("b", IntegerType, nullable = false)))
+
+    val structLevel1: DataFrame = spark.createDataFrame(
+      sparkContext.parallelize(Row(Row(1, 2)) :: Nil),
+      StructType(Seq(StructField("a", structType, nullable = false))))
+
+    val nullStructLevel1: DataFrame = spark.createDataFrame(
+      sparkContext.parallelize(Row(null) :: Nil),
+      StructType(Seq(StructField("a", structType, nullable = true))))
+
+    val nullableStructLevel1: DataFrame = spark.createDataFrame(
+      sparkContext.parallelize(Row(Row(1, 2)) :: Row(null) :: Nil),
+      StructType(Seq(StructField("a", structType, nullable = true))))
+
+    def check(
+      fieldOps: Column => Column,
+      getFieldName: String,
+      expectedValue: Option[Int]): Unit = {
+
+      def query(df: DataFrame): DataFrame =
+        df.select(fieldOps(col("a")).getField(getFieldName).as("res"))
+
+      checkAnswer(
+        query(structLevel1),
+        Row(expectedValue.orNull) :: Nil,
+        StructType(Seq(StructField("res", IntegerType, nullable = expectedValue.isEmpty))))
+
+      checkAnswer(
+        query(nullStructLevel1),
+        Row(null) :: Nil,
+        StructType(Seq(StructField("res", IntegerType, nullable = true))))
+
+      checkAnswer(
+        query(nullableStructLevel1),
+        Row(expectedValue.orNull) :: Row(null) :: Nil,
+        StructType(Seq(StructField("res", IntegerType, nullable = true))))
+    }
+
+    // add attribute, extract an attribute from the original struct
+    check(_.withField("c", lit(3)), "a", Some(1))
+    check(_.withField("c", lit(3)), "b", Some(2))
+
+    // add attribute, extract added attribute
+    check(_.withField("c", lit(3)), "c", Some(3))
+    check(_.withField("c", col("a.a")), "c", Some(1))
+    check(_.withField("c", col("a.b")), "c", Some(2))
+    check(_.withField("c", lit(null).cast(IntegerType)), "c", None)
+
+    // replace attribute, extract an attribute from the original struct
+    check(_.withField("b", lit(3)), "a", Some(1))
+    check(_.withField("a", lit(3)), "b", Some(2))
+
+    // replace attribute, extract replaced attribute
+    check(_.withField("b", lit(3)), "b", Some(3))
+    check(_.withField("b", lit(null).cast(IntegerType)), "b", None)
+    check(_.withField("a", lit(3)), "a", Some(3))
+    check(_.withField("a", lit(null).cast(IntegerType)), "a", None)
+
+    // drop attribute, extract an attribute from the original struct
+    check(_.dropFields("b"), "a", Some(1))
+    check(_.dropFields("a"), "b", Some(2))
+
+    // drop attribute, add attribute, extract an attribute from the original struct
+    check(_.dropFields("b").withField("c", lit(3)), "a", Some(1))
+    check(_.dropFields("a").withField("c", lit(3)), "b", Some(2))
+
+    // drop attribute, add another attribute, extract added attribute
+    check(_.dropFields("a").withField("c", lit(3)), "c", Some(3))
+    check(_.dropFields("b").withField("c", lit(3)), "c", Some(3))
+
+    // add attribute, drop attribute, extract an attribute from the original struct
+    check(_.withField("c", lit(3)).dropFields("a"), "b", Some(2))
+    check(_.withField("c", lit(3)).dropFields("b"), "a", Some(1))
+
+    // add attribute, drop another attribute, extract added attribute
+    check(_.withField("c", lit(3)).dropFields("a"), "c", Some(3))
+    check(_.withField("c", lit(3)).dropFields("b"), "c", Some(3))
+
+    // replace attribute, drop same attribute, extract an attribute from the original struct
+    check(_.withField("b", lit(3)).dropFields("b"), "a", Some(1))
+    check(_.withField("a", lit(3)).dropFields("a"), "b", Some(2))
+
+    // add attribute, drop same attribute, extract an attribute from the original struct
+    check(_.withField("c", lit(3)).dropFields("c"), "a", Some(1))
+    check(_.withField("c", lit(3)).dropFields("c"), "b", Some(2))
+
+    // add attribute, drop another attribute, extract added attribute
+    check(_.withField("b", lit(3)).dropFields("a"), "b", Some(3))
+    check(_.withField("a", lit(3)).dropFields("b"), "a", Some(3))
+    check(_.withField("b", lit(null).cast(IntegerType)).dropFields("a"), "b", None)
+    check(_.withField("a", lit(null).cast(IntegerType)).dropFields("b"), "a", None)
+
+    // drop attribute, add same attribute, extract added attribute
+    check(_.dropFields("b").withField("b", lit(3)), "b", Some(3))
+    check(_.dropFields("a").withField("a", lit(3)), "a", Some(3))
+    check(_.dropFields("b").withField("b", lit(null).cast(IntegerType)), "b", None)
+    check(_.dropFields("a").withField("a", lit(null).cast(IntegerType)), "a", None)
+    check(_.dropFields("c").withField("c", lit(3)), "c", Some(3))
+
+    // add attribute, drop same attribute, add same attribute again, extract added attribute
+    check(_.withField("c", lit(3)).dropFields("c").withField("c", lit(4)), "c", Some(4))
+  }
+
+  test("should move field up one level of nesting") {
+    // move a field up one level
+    checkAnswer(
+      nullableStructLevel2.select(
+        col("a").withField("c", col("a.a.c")).dropFields("a.c").as("res")),
+      Row(null) :: Row(Row(null, null)) ::  Row(Row(Row(1, null), 3)) :: Nil,
+      StructType(Seq(
+        StructField("res", StructType(Seq(
+          StructField("a", StructType(Seq(
+            StructField("a", IntegerType, nullable = false),
+            StructField("b", IntegerType, nullable = true))),
+            nullable = true),
+          StructField("c", IntegerType, nullable = true))),
+          nullable = true))))
+
+    // move a field up one level and then extract it
+    checkAnswer(
+      nullableStructLevel2.select(
+        col("a").withField("c", col("a.a.c")).dropFields("a.c").getField("c").as("res")),
+      Row(null) :: Row(null) :: Row(3) :: Nil,
+      StructType(Seq(StructField("res", IntegerType, nullable = true))))
+  }
+
+  test("should be able to refer to newly added nested column") {
+    intercept[AnalysisException] {
+      structLevel1.select($"a".withField("d", lit(4)).withField("e", $"a.d" + 1).as("a"))
+    }.getMessage should include("No such struct field d in a, b, c")
+
+    checkAnswer(
+      structLevel1
+        .select($"a".withField("d", lit(4)).as("a"))
+        .select($"a".withField("e", $"a.d" + 1).as("a")),
+      Row(Row(1, null, 3, 4, 5)) :: Nil,
+      StructType(Seq(
+        StructField("a", StructType(Seq(
+          StructField("a", IntegerType, nullable = false),
+          StructField("b", IntegerType, nullable = true),
+          StructField("c", IntegerType, nullable = false),
+          StructField("d", IntegerType, nullable = false),
+          StructField("e", IntegerType, nullable = false))),
+          nullable = false))))
+  }
+
+  test("should be able to drop newly added nested column") {
+    Seq(
+      structLevel1.select($"a".withField("d", lit(4)).dropFields("d").as("a")),
+      structLevel1
+        .select($"a".withField("d", lit(4)).as("a"))
+        .select($"a".dropFields("d").as("a"))
+    ).foreach { query =>
+      checkAnswer(
+        query,
+        Row(Row(1, null, 3)) :: Nil,
+        StructType(Seq(
+          StructField("a", structType, nullable = false))))
+    }
+  }
+
+  test("should still be able to refer to dropped column within the same select statement") {
+    // we can still access the nested column even after dropping it within the same select statement
+    checkAnswer(
+      structLevel1.select($"a".dropFields("c").withField("z", $"a.c").as("a")),
+      Row(Row(1, null, 3)) :: Nil,
+      StructType(Seq(
+        StructField("a", StructType(Seq(
+          StructField("a", IntegerType, nullable = false),
+          StructField("b", IntegerType, nullable = true),
+          StructField("z", IntegerType, nullable = false))),
+          nullable = false))))
+
+    // we can't access the nested column in subsequent select statement after dropping it in a
+    // previous select statement
+    intercept[AnalysisException]{
+      structLevel1
+        .select($"a".dropFields("c").as("a"))
+        .select($"a".withField("z", $"a.c")).as("a")
+    }.getMessage should include("No such struct field c in a, b;")
+  }
+
+  test("nestedDf should generate nested DataFrames") {
+    checkAnswer(
+      emptyNestedDf(1, 1, nullable = false),
+      Seq.empty[Row],
+      StructType(Seq(StructField("nested0Col0", StructType(Seq(
+        StructField("nested1Col0", IntegerType, nullable = false))),
+        nullable = false))))
+
+    checkAnswer(
+      emptyNestedDf(1, 2, nullable = false),
+      Seq.empty[Row],
+      StructType(Seq(StructField("nested0Col0", StructType(Seq(
+        StructField("nested1Col0", IntegerType, nullable = false),
+        StructField("nested1Col1", IntegerType, nullable = false))),
+        nullable = false))))
+
+    checkAnswer(
+      emptyNestedDf(2, 1, nullable = false),
+      Seq.empty[Row],
+      StructType(Seq(StructField("nested0Col0", StructType(Seq(
+        StructField("nested1Col0", StructType(Seq(
+          StructField("nested2Col0", IntegerType, nullable = false))),
+          nullable = false))),
+        nullable = false))))
+
+    checkAnswer(
+      emptyNestedDf(2, 2, nullable = false),
+      Seq.empty[Row],
+      StructType(Seq(StructField("nested0Col0", StructType(Seq(
+        StructField("nested1Col0", StructType(Seq(
+          StructField("nested2Col0", IntegerType, nullable = false),
+          StructField("nested2Col1", IntegerType, nullable = false))),
+          nullable = false),
+        StructField("nested1Col1", IntegerType, nullable = false))),
+        nullable = false))))
+
+    checkAnswer(
+      emptyNestedDf(2, 2, nullable = true),
+      Seq.empty[Row],
+      StructType(Seq(StructField("nested0Col0", StructType(Seq(
+        StructField("nested1Col0", StructType(Seq(
+          StructField("nested2Col0", IntegerType, nullable = false),
+          StructField("nested2Col1", IntegerType, nullable = false))),
+          nullable = true),
+        StructField("nested1Col1", IntegerType, nullable = false))),
+        nullable = true))))
+  }
+
+  Seq(Performant, NonPerformant).foreach { method =>
+    Seq(false, true).foreach { nullable =>
+      test(s"should add and drop 1 column at each depth of nesting using ${method.name} method, " +
+        s"nullable = $nullable") {
+        val maxDepth = 3
+
+        // dataframe with nested*Col0 to nested*Col2 at each depth
+        val inputDf = emptyNestedDf(maxDepth, 3, nullable)
+
+        // add nested*Col3 and drop nested*Col2
+        val modifiedColumn = method(
+          column = col(nestedColName(0, 0)),
+          numsToAdd = Seq(3),
+          numsToDrop = Seq(2),
+          maxDepth = maxDepth
+        ).as(nestedColName(0, 0))
+        val resultDf = inputDf.select(modifiedColumn)
+
+        // dataframe with nested*Col0, nested*Col1, nested*Col3 at each depth
+        val expectedDf = {
+          val colNums = Seq(0, 1, 3)
+          val nestedColumnDataType = nestedStructType(colNums, nullable, maxDepth)
+
+          spark.createDataFrame(
+            spark.sparkContext.emptyRDD[Row],
+            StructType(Seq(StructField(nestedColName(0, 0), nestedColumnDataType, nullable))))
+        }
+
+        checkAnswer(resultDf, expectedDf.collect(), expectedDf.schema)
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UpdateFieldsBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/UpdateFieldsBenchmark.scala
new file mode 100644
index 0000000000000..28af552fe586b
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UpdateFieldsBenchmark.scala
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark
+import org.apache.spark.sql.functions.{col, lit}
+import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
+
+/**
+ * Benchmark to measure Spark's performance analyzing and optimizing long UpdateFields chains.
+ *
+ * {{{
+ *   To run this benchmark:
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> <spark sql test jar>
+ *   2. with sbt:
+ *      build/sbt "sql/test:runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *   Results will be written to "benchmarks/UpdateFieldsBenchmark-results.txt".
+ * }}}
+ */
+object UpdateFieldsBenchmark extends SqlBasedBenchmark {
+
+  def nestedColName(d: Int, colNum: Int): String = s"nested${d}Col$colNum"
+
+  def nestedStructType(
+      colNums: Seq[Int],
+      nullable: Boolean,
+      maxDepth: Int,
+      currDepth: Int = 1): StructType = {
+
+    if (currDepth == maxDepth) {
+      val fields = colNums.map { colNum =>
+        val name = nestedColName(currDepth, colNum)
+        StructField(name, IntegerType, nullable = false)
+      }
+      StructType(fields)
+    } else {
+      val fields = colNums.foldLeft(Seq.empty[StructField]) {
+        case (structFields, colNum) if colNum == 0 =>
+          val nested = nestedStructType(colNums, nullable, maxDepth, currDepth + 1)
+          structFields :+ StructField(nestedColName(currDepth, colNum), nested, nullable)
+        case (structFields, colNum) =>
+          val name = nestedColName(currDepth, colNum)
+          structFields :+ StructField(name, IntegerType, nullable = false)
+      }
+      StructType(fields)
+    }
+  }
+
+  /**
+   * Utility function for generating an empty DataFrame with nested columns.
+   *
+   * @param maxDepth: The depth to which to create nested columns.
+   * @param numColsAtEachDepth: The number of columns to create at each depth.
+   * @param nullable: This value is used to set the nullability of any StructType columns.
+   */
+  def emptyNestedDf(maxDepth: Int, numColsAtEachDepth: Int, nullable: Boolean): DataFrame = {
+    require(maxDepth > 0)
+    require(numColsAtEachDepth > 0)
+
+    val nestedColumnDataType = nestedStructType(0 until numColsAtEachDepth, nullable, maxDepth)
+    spark.createDataFrame(
+      spark.sparkContext.emptyRDD[Row],
+      StructType(Seq(StructField(nestedColName(0, 0), nestedColumnDataType, nullable))))
+  }
+
+  trait ModifyNestedColumns {
+    val name: String
+    def apply(column: Column, numsToAdd: Seq[Int], numsToDrop: Seq[Int], maxDepth: Int): Column
+  }
+
+  object Performant extends ModifyNestedColumns {
+    override val name: String = "performant"
+
+    override def apply(
+        column: Column,
+        numsToAdd: Seq[Int],
+        numsToDrop: Seq[Int],
+        maxDepth: Int): Column = helper(column, numsToAdd, numsToDrop, maxDepth, 1)
+
+    private def helper(
+        column: Column,
+        numsToAdd: Seq[Int],
+        numsToDrop: Seq[Int],
+        maxDepth: Int,
+        currDepth: Int): Column = {
+
+      // drop columns at the current depth
+      val dropped = if (numsToDrop.nonEmpty) {
+        column.dropFields(numsToDrop.map(num => nestedColName(currDepth, num)): _*)
+      } else column
+
+      // add columns at the current depth
+      val added = numsToAdd.foldLeft(dropped) {
+        (res, num) => res.withField(nestedColName(currDepth, num), lit(num))
+      }
+
+      if (currDepth == maxDepth) {
+        added
+      } else {
+        // add/drop columns at the next depth
+        val newValue = helper(
+          column = col((0 to currDepth).map(d => nestedColName(d, 0)).mkString(".")),
+          numsToAdd = numsToAdd,
+          numsToDrop = numsToDrop,
+          currDepth = currDepth + 1,
+          maxDepth = maxDepth)
+        added.withField(nestedColName(currDepth, 0), newValue)
+      }
+    }
+  }
+
+  object NonPerformant extends ModifyNestedColumns {
+    override val name: String = "non-performant"
+
+    override def apply(
+        column: Column,
+        numsToAdd: Seq[Int],
+        numsToDrop: Seq[Int],
+        maxDepth: Int): Column = {
+
+      val dropped = if (numsToDrop.nonEmpty) {
+        val colsToDrop = (1 to maxDepth).flatMap { depth =>
+          numsToDrop.map(num => s"${prefix(depth)}${nestedColName(depth, num)}")
+        }
+        column.dropFields(colsToDrop: _*)
+      } else column
+
+      val added = {
+        val colsToAdd = (1 to maxDepth).flatMap { depth =>
+          numsToAdd.map(num => (s"${prefix(depth)}${nestedColName(depth, num)}", lit(num)))
+        }
+        colsToAdd.foldLeft(dropped)((col, add) => col.withField(add._1, add._2))
+      }
+
+      added
+    }
+
+    private def prefix(depth: Int): String =
+      if (depth == 1) ""
+      else (1 until depth).map(d => nestedColName(d, 0)).mkString("", ".", ".")
+  }
+
+  private def updateFieldsBenchmark(
+      methods: Seq[ModifyNestedColumns],
+      maxDepth: Int,
+      initialNumberOfColumns: Int,
+      numsToAdd: Seq[Int] = Seq.empty,
+      numsToDrop: Seq[Int] = Seq.empty): Unit = {
+
+    val name = s"Add ${numsToAdd.length} columns and drop ${numsToDrop.length} columns " +
+      s"at $maxDepth different depths of nesting"
+
+    runBenchmark(name) {
+      val benchmark = new Benchmark(
+        name = name,
+        // The purpose of this benchmark is to ensure Spark is able to analyze and optimize long
+        // UpdateFields chains quickly so it runs over 0 rows of data.
+        valuesPerIteration = 0,
+        output = output)
+
+      val nonNullableStructsDf = emptyNestedDf(maxDepth, initialNumberOfColumns, nullable = false)
+      val nullableStructsDf = emptyNestedDf(maxDepth, initialNumberOfColumns, nullable = true)
+
+      methods.foreach { method =>
+        val modifiedColumn = method(
+          column = col(nestedColName(0, 0)),
+          numsToAdd = numsToAdd,
+          numsToDrop = numsToDrop,
+          maxDepth = maxDepth
+        ).as(nestedColName(0, 0))
+
+        benchmark.addCase(s"To non-nullable StructTypes using ${method.name} method") { _ =>
+          nonNullableStructsDf.select(modifiedColumn).queryExecution.optimizedPlan
+        }
+
+        benchmark.addCase(s"To nullable StructTypes using ${method.name} method") { _ =>
+          nullableStructsDf.select(modifiedColumn).queryExecution.optimizedPlan
+        }
+      }
+
+      benchmark.run()
+    }
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    // This benchmark compares the performant and non-performant methods of writing the same query.
+    // We use small values for maxDepth, numsToAdd, and numsToDrop because the NonPerformant method
+    // scales extremely poorly with the number of nested columns being added/dropped.
+    updateFieldsBenchmark(
+      methods = Seq(Performant, NonPerformant),
+      maxDepth = 3,
+      initialNumberOfColumns = 5,
+      numsToAdd = 5 to 6,
+      numsToDrop = 3 to 4)
+
+    // This benchmark is to show that the performant method of writing a query when we want to add
+    // and drop a large number of nested columns scales nicely.
+    updateFieldsBenchmark(
+      methods = Seq(Performant),
+      maxDepth = 100,
+      initialNumberOfColumns = 51,
+      numsToAdd = 51 to 100,
+      numsToDrop = 1 to 50)
+  }
+}

From ddc7012b3d4cd05c6695378989c9d1a78102bbbd Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Tue, 6 Oct 2020 09:09:19 +0000
Subject: [PATCH 0172/1009] [SPARK-32243][SQL] HiveSessionCatalog call
 super.makeFunctionExpression should throw earlier when got Spark UDAF Invalid
 arguments number error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
When we create a UDAF function use class extended `UserDefinedAggregeteFunction`,  when we call the function,  in support hive mode, in HiveSessionCatalog, it will call super.makeFunctionExpression, 

but it will catch error  such as the function need 2 parameter and we only give 1, throw exception only show 
```
No handler for UDF/UDAF/UDTF xxxxxxxx
```
This is confused for develop , we should show error thrown by super method too,

For this pr's UT :
Before change, throw Exception like
```
No handler for UDF/UDAF/UDTF 'org.apache.spark.sql.hive.execution.LongProductSum'; line 1 pos 7
```
After this pr, throw exception
```
Spark UDAF Error: Invalid number of arguments for function longProductSum. Expected: 2; Found: 1;
Hive UDF/UDAF/UDTF Error: No handler for UDF/UDAF/UDTF 'org.apache.spark.sql.hive.execution.LongProductSum'; line 1 pos 7
```

### Why are the changes needed?
Show more detail error message when define UDAF

### Does this PR introduce _any_ user-facing change?
People will see more detail error message when use spark sql's UDAF  in hive support Mode

### How was this patch tested?
Added UT

Closes #29054 from AngersZhuuuu/SPARK-32243.

Authored-by: angerszhu <angers.zhu@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalog/InvalidUDFClassException.scala    |  28 +++++
 .../sql/catalyst/catalog/SessionCatalog.scala |   2 +-
 .../spark/sql/hive/HiveSessionCatalog.scala   | 103 ++++++++++--------
 .../sql/hive/execution/HiveUDAFSuite.scala    |  14 +++
 4 files changed, 102 insertions(+), 45 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InvalidUDFClassException.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InvalidUDFClassException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InvalidUDFClassException.scala
new file mode 100644
index 0000000000000..bc02efd5113c2
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InvalidUDFClassException.scala
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.catalog
+
+import org.apache.spark.sql.AnalysisException
+
+/**
+ * Thrown when a query failed for invalid function class, usually because a SQL
+ * function's class does not follow the rules of the UDF/UDAF/UDTF class definition.
+ */
+class InvalidUDFClassException private[sql](message: String)
+  extends AnalysisException(message, None, None, None, None) {
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index e9a02c15f7362..4865629329831 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -1335,7 +1335,7 @@ class SessionCatalog(
       }
       e
     } else {
-      throw new AnalysisException(s"No handler for UDAF '${clazz.getCanonicalName}'. " +
+      throw new InvalidUDFClassException(s"No handler for UDAF '${clazz.getCanonicalName}'. " +
         s"Use sparkSession.udf.register(...) instead.")
     }
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index bc7760c982aab..f24834b938a1e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -30,7 +30,7 @@ import org.apache.hadoop.hive.ql.udf.generic.{AbstractGenericUDAFResolver, Gener
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
-import org.apache.spark.sql.catalyst.catalog.{CatalogFunction, ExternalCatalog, FunctionResourceLoader, GlobalTempViewManager, SessionCatalog}
+import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions.{Cast, Expression}
 import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.hive.HiveShim.HiveFunctionWrapper
@@ -57,6 +57,56 @@ private[sql] class HiveSessionCatalog(
       parser,
       functionResourceLoader) {
 
+  private def makeHiveFunctionExpression(
+      name: String,
+      clazz: Class[_],
+      input: Seq[Expression]): Expression = {
+    var udfExpr: Option[Expression] = None
+    try {
+      // When we instantiate hive UDF wrapper class, we may throw exception if the input
+      // expressions don't satisfy the hive UDF, such as type mismatch, input number
+      // mismatch, etc. Here we catch the exception and throw AnalysisException instead.
+      if (classOf[UDF].isAssignableFrom(clazz)) {
+        udfExpr = Some(HiveSimpleUDF(name, new HiveFunctionWrapper(clazz.getName), input))
+        udfExpr.get.dataType // Force it to check input data types.
+      } else if (classOf[GenericUDF].isAssignableFrom(clazz)) {
+        udfExpr = Some(HiveGenericUDF(name, new HiveFunctionWrapper(clazz.getName), input))
+        udfExpr.get.dataType // Force it to check input data types.
+      } else if (classOf[AbstractGenericUDAFResolver].isAssignableFrom(clazz)) {
+        udfExpr = Some(HiveUDAFFunction(name, new HiveFunctionWrapper(clazz.getName), input))
+        udfExpr.get.dataType // Force it to check input data types.
+      } else if (classOf[UDAF].isAssignableFrom(clazz)) {
+        udfExpr = Some(HiveUDAFFunction(
+          name,
+          new HiveFunctionWrapper(clazz.getName),
+          input,
+          isUDAFBridgeRequired = true))
+        udfExpr.get.dataType // Force it to check input data types.
+      } else if (classOf[GenericUDTF].isAssignableFrom(clazz)) {
+        udfExpr = Some(HiveGenericUDTF(name, new HiveFunctionWrapper(clazz.getName), input))
+        // Force it to check data types.
+        udfExpr.get.asInstanceOf[HiveGenericUDTF].elementSchema
+      }
+    } catch {
+      case NonFatal(e) =>
+        val noHandlerMsg = s"No handler for UDF/UDAF/UDTF '${clazz.getCanonicalName}': $e"
+        val errorMsg =
+          if (classOf[GenericUDTF].isAssignableFrom(clazz)) {
+            s"$noHandlerMsg\nPlease make sure your function overrides " +
+              "`public StructObjectInspector initialize(ObjectInspector[] args)`."
+          } else {
+            noHandlerMsg
+          }
+        val analysisException = new AnalysisException(errorMsg)
+        analysisException.setStackTrace(e.getStackTrace)
+        throw analysisException
+    }
+    udfExpr.getOrElse {
+      throw new InvalidUDFClassException(
+        s"No handler for UDF/UDAF/UDTF '${clazz.getCanonicalName}'")
+    }
+  }
+
   /**
    * Constructs a [[Expression]] based on the provided class that represents a function.
    *
@@ -69,49 +119,14 @@ private[sql] class HiveSessionCatalog(
     // Current thread context classloader may not be the one loaded the class. Need to switch
     // context classloader to initialize instance properly.
     Utils.withContextClassLoader(clazz.getClassLoader) {
-      Try(super.makeFunctionExpression(name, clazz, input)).getOrElse {
-        var udfExpr: Option[Expression] = None
-        try {
-          // When we instantiate hive UDF wrapper class, we may throw exception if the input
-          // expressions don't satisfy the hive UDF, such as type mismatch, input number
-          // mismatch, etc. Here we catch the exception and throw AnalysisException instead.
-          if (classOf[UDF].isAssignableFrom(clazz)) {
-            udfExpr = Some(HiveSimpleUDF(name, new HiveFunctionWrapper(clazz.getName), input))
-            udfExpr.get.dataType // Force it to check input data types.
-          } else if (classOf[GenericUDF].isAssignableFrom(clazz)) {
-            udfExpr = Some(HiveGenericUDF(name, new HiveFunctionWrapper(clazz.getName), input))
-            udfExpr.get.dataType // Force it to check input data types.
-          } else if (classOf[AbstractGenericUDAFResolver].isAssignableFrom(clazz)) {
-            udfExpr = Some(HiveUDAFFunction(name, new HiveFunctionWrapper(clazz.getName), input))
-            udfExpr.get.dataType // Force it to check input data types.
-          } else if (classOf[UDAF].isAssignableFrom(clazz)) {
-            udfExpr = Some(HiveUDAFFunction(
-              name,
-              new HiveFunctionWrapper(clazz.getName),
-              input,
-              isUDAFBridgeRequired = true))
-            udfExpr.get.dataType // Force it to check input data types.
-          } else if (classOf[GenericUDTF].isAssignableFrom(clazz)) {
-            udfExpr = Some(HiveGenericUDTF(name, new HiveFunctionWrapper(clazz.getName), input))
-            udfExpr.get.asInstanceOf[HiveGenericUDTF].elementSchema // Force it to check data types.
-          }
-        } catch {
-          case NonFatal(e) =>
-            val noHandlerMsg = s"No handler for UDF/UDAF/UDTF '${clazz.getCanonicalName}': $e"
-            val errorMsg =
-              if (classOf[GenericUDTF].isAssignableFrom(clazz)) {
-                s"$noHandlerMsg\nPlease make sure your function overrides " +
-                  "`public StructObjectInspector initialize(ObjectInspector[] args)`."
-              } else {
-                noHandlerMsg
-              }
-            val analysisException = new AnalysisException(errorMsg)
-            analysisException.setStackTrace(e.getStackTrace)
-            throw analysisException
-        }
-        udfExpr.getOrElse {
-          throw new AnalysisException(s"No handler for UDF/UDAF/UDTF '${clazz.getCanonicalName}'")
-        }
+      try {
+        super.makeFunctionExpression(name, clazz, input)
+      } catch {
+        // If `super.makeFunctionExpression` throw `InvalidUDFClassException`, we construct
+        // Hive UDF/UDAF/UDTF with function definition. Otherwise, we just throw it earlier.
+        case _: InvalidUDFClassException =>
+          makeHiveFunctionExpression(name, clazz, input)
+        case e => throw e
       }
     }
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala
index 9e33a8ee4cc5c..ed44dcd8d7a29 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala
@@ -161,6 +161,20 @@ class HiveUDAFSuite extends QueryTest
       checkAnswer(sql("select histogram_numeric(a,2) from abc where a=3"), Row(null))
     }
   }
+
+  test("SPARK-32243: Spark UDAF Invalid arguments number error should throw earlier") {
+    // func need two arguments
+    val functionName = "longProductSum"
+    val functionClass = "org.apache.spark.sql.hive.execution.LongProductSum"
+    withUserDefinedFunction(functionName -> true) {
+      sql(s"CREATE TEMPORARY FUNCTION $functionName AS '$functionClass'")
+      val e = intercept[AnalysisException] {
+        sql(s"SELECT $functionName(100)")
+      }.getMessage
+      assert(e.contains(
+        s"Invalid number of arguments for function $functionName. Expected: 2; Found: 1;"))
+    }
+  }
 }
 
 /**

From 0812d6c17cc4876bb87a9d1fec35ec8c7b2365f0 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Tue, 6 Oct 2020 18:11:24 +0900
Subject: [PATCH 0173/1009] [SPARK-33073][PYTHON] Improve error handling on
 Pandas to Arrow conversion failures

### What changes were proposed in this pull request?

This improves error handling when a failure in conversion from Pandas to Arrow occurs. And fixes tests to be compatible with upcoming Arrow 2.0.0 release.

### Why are the changes needed?

Current tests will fail with Arrow 2.0.0 because of a change in error message when the schema is invalid. For these cases, the current error message also includes information on disabling safe conversion config, which is mainly meant for floating point truncation and overflow. The tests have been updated to use a message that is show for past Arrow versions, and upcoming.

If the user enters an invalid schema, the error produced by pyarrow is not consistent and either `TypeError` or `ArrowInvalid`, with the latter being caught, and raised as a `RuntimeError` with the extra info.

The error handling is improved by:

- narrowing the exception type to `TypeError`s, which `ArrowInvalid` is a subclass and what is raised on safe conversion failures.
- The exception is only raised with additional information on disabling "spark.sql.execution.pandas.convertToArrowArraySafely" if it is enabled in the first place.
- The original exception is chained to better show it to the user.

### Does this PR introduce _any_ user-facing change?

Yes, the error re-raised changes from a RuntimeError to a ValueError, which better categorizes this type of error and in-line with the original Arrow error.

### How was this patch tested?

Existing tests, using pyarrow 1.0.1 and 2.0.0-snapshot

Closes #29951 from BryanCutler/arrow-better-handle-pandas-errors-SPARK-33073.

Authored-by: Bryan Cutler <cutlerb@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/sql/pandas/serializers.py        | 17 ++++++++++-------
 python/pyspark/sql/tests/test_arrow.py          |  9 +++++----
 .../sql/tests/test_pandas_grouped_map.py        | 15 ++++++++-------
 3 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/python/pyspark/sql/pandas/serializers.py b/python/pyspark/sql/pandas/serializers.py
index 4b91c6a0f8730..63fb8562799e3 100644
--- a/python/pyspark/sql/pandas/serializers.py
+++ b/python/pyspark/sql/pandas/serializers.py
@@ -153,13 +153,16 @@ def create_array(s, t):
                 s = s.astype(s.dtypes.categories.dtype)
             try:
                 array = pa.Array.from_pandas(s, mask=mask, type=t, safe=self._safecheck)
-            except pa.ArrowException as e:
-                error_msg = "Exception thrown when converting pandas.Series (%s) to Arrow " + \
-                            "Array (%s). It can be caused by overflows or other unsafe " + \
-                            "conversions warned by Arrow. Arrow safe type check can be " + \
-                            "disabled by using SQL config " + \
-                            "`spark.sql.execution.pandas.convertToArrowArraySafely`."
-                raise RuntimeError(error_msg % (s.dtype, t), e)
+            except ValueError as e:
+                if self._safecheck:
+                    error_msg = "Exception thrown when converting pandas.Series (%s) to " + \
+                                "Arrow Array (%s). It can be caused by overflows or other " + \
+                                "unsafe conversions warned by Arrow. Arrow safe type check " + \
+                                "can be disabled by using SQL config " + \
+                                "`spark.sql.execution.pandas.convertToArrowArraySafely`."
+                    raise ValueError(error_msg % (s.dtype, t)) from e
+                else:
+                    raise e
             return array
 
         arrs = []
diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py
index c6497923d84fb..55d5e9017b345 100644
--- a/python/pyspark/sql/tests/test_arrow.py
+++ b/python/pyspark/sql/tests/test_arrow.py
@@ -264,11 +264,12 @@ def test_createDataFrame_with_schema(self):
     def test_createDataFrame_with_incorrect_schema(self):
         pdf = self.create_pandas_data_frame()
         fields = list(self.schema)
-        fields[0], fields[1] = fields[1], fields[0]  # swap str with int
+        fields[5], fields[6] = fields[6], fields[5]  # swap decimal with date
         wrong_schema = StructType(fields)
-        with QuietTest(self.sc):
-            with self.assertRaisesRegexp(Exception, "integer.*required"):
-                self.spark.createDataFrame(pdf, schema=wrong_schema)
+        with self.sql_conf({"spark.sql.execution.pandas.convertToArrowArraySafely": False}):
+            with QuietTest(self.sc):
+                with self.assertRaisesRegexp(Exception, "[D|d]ecimal.*got.*date"):
+                    self.spark.createDataFrame(pdf, schema=wrong_schema)
 
     def test_createDataFrame_with_names(self):
         pdf = self.create_pandas_data_frame()
diff --git a/python/pyspark/sql/tests/test_pandas_grouped_map.py b/python/pyspark/sql/tests/test_pandas_grouped_map.py
index 81b6d5efb710a..93e37125eaa33 100644
--- a/python/pyspark/sql/tests/test_pandas_grouped_map.py
+++ b/python/pyspark/sql/tests/test_pandas_grouped_map.py
@@ -446,15 +446,16 @@ def int_index(pdf):
         def column_name_typo(pdf):
             return pd.DataFrame({'iid': pdf.id, 'v': pdf.v})
 
-        @pandas_udf('id long, v int', PandasUDFType.GROUPED_MAP)
+        @pandas_udf('id long, v decimal', PandasUDFType.GROUPED_MAP)
         def invalid_positional_types(pdf):
-            return pd.DataFrame([(u'a', 1.2)])
+            return pd.DataFrame([(1, datetime.date(2020, 10, 5))])
 
-        with QuietTest(self.sc):
-            with self.assertRaisesRegexp(Exception, "KeyError: 'id'"):
-                grouped_df.apply(column_name_typo).collect()
-            with self.assertRaisesRegexp(Exception, "an integer is required"):
-                grouped_df.apply(invalid_positional_types).collect()
+        with self.sql_conf({"spark.sql.execution.pandas.convertToArrowArraySafely": False}):
+            with QuietTest(self.sc):
+                with self.assertRaisesRegexp(Exception, "KeyError: 'id'"):
+                    grouped_df.apply(column_name_typo).collect()
+                with self.assertRaisesRegexp(Exception, "[D|d]ecimal.*got.*date"):
+                    grouped_df.apply(invalid_positional_types).collect()
 
     def test_positional_assignment_conf(self):
         with self.sql_conf({

From b5e4b8c73e10743eef4d35b6e82053a5a065b2ed Mon Sep 17 00:00:00 2001
From: Michael Munday <mike.munday@ibm.com>
Date: Tue, 6 Oct 2020 08:31:06 -0500
Subject: [PATCH 0174/1009] [SPARK-27428][CORE][TEST] Increase receive buffer
 size used in StatsdSinkSuite

### What changes were proposed in this pull request?

Increase size of socket receive buffer in these tests.

### Why are the changes needed?

The socket receive buffer size set in this test was too small for
the StatsdSinkSuite tests to run reliably on some systems. For a
test in this suite to run reliably the buffer needs to be large
enough to hold all the data in the packets being sent in a test
along with any additional kernel or protocol overhead. The amount
of kernel overhead per packet can vary from system to system but is
typically far higher than the protocol overhead.

If the receive buffer is too small and fills up then packets are
silently dropped. This leads to the test failing with a timeout.

If the socket defaults to a larger receive buffer (normally true)
then we should keep that size.

As well as increasing the minimum buffer size I've also decoupled
the datagram packet buffer size from the receive buffer size. The
receive buffer should in general be far larger to account for the
fact that multiple packets might be buffered, as well as the
aforementioned overhead. Any truncated data in individual packets
will be picked up by the tests.

### Does this PR introduce _any_ user-facing change?

No, this only affects the tests.

### How was this patch tested?
Existing tests on IBM Z and x86.

Closes #29819 from mundaym/fix-statsd.

Authored-by: Michael Munday <mike.munday@ibm.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../spark/metrics/sink/StatsdSinkSuite.scala  | 29 ++++++++++++++-----
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/metrics/sink/StatsdSinkSuite.scala b/core/src/test/scala/org/apache/spark/metrics/sink/StatsdSinkSuite.scala
index 0e21a36071c42..3d4b8c868d6fc 100644
--- a/core/src/test/scala/org/apache/spark/metrics/sink/StatsdSinkSuite.scala
+++ b/core/src/test/scala/org/apache/spark/metrics/sink/StatsdSinkSuite.scala
@@ -35,12 +35,27 @@ class StatsdSinkSuite extends SparkFunSuite {
     STATSD_KEY_UNIT -> "seconds",
     STATSD_KEY_HOST -> "127.0.0.1"
   )
-  private val socketTimeout = 30000 // milliseconds
-  private val socketBufferSize = 8192
+  // The maximum size of a single datagram packet payload. Payloads
+  // larger than this will be truncated.
+  private val maxPayloadSize = 256 // bytes
+
+  // The receive buffer must be large enough to hold all inflight
+  // packets. This includes any kernel and protocol overhead.
+  // This value was determined experimentally and should be
+  // increased if timeouts are seen.
+  private val socketMinRecvBufferSize = 16384 // bytes
+  private val socketTimeout = 30000           // milliseconds
 
   private def withSocketAndSink(testCode: (DatagramSocket, StatsdSink) => Any): Unit = {
     val socket = new DatagramSocket
-    socket.setReceiveBufferSize(socketBufferSize)
+
+    // Leave the receive buffer size untouched unless it is too
+    // small. If the receive buffer is too small packets will be
+    // silently dropped and receive operations will timeout.
+    if (socket.getReceiveBufferSize() < socketMinRecvBufferSize) {
+      socket.setReceiveBufferSize(socketMinRecvBufferSize)
+    }
+
     socket.setSoTimeout(socketTimeout)
     val props = new Properties
     defaultProps.foreach(e => props.put(e._1, e._2))
@@ -61,7 +76,7 @@ class StatsdSinkSuite extends SparkFunSuite {
       sink.registry.register("counter", counter)
       sink.report()
 
-      val p = new DatagramPacket(new Array[Byte](socketBufferSize), socketBufferSize)
+      val p = new DatagramPacket(new Array[Byte](maxPayloadSize), maxPayloadSize)
       socket.receive(p)
 
       val result = new String(p.getData, 0, p.getLength, UTF_8)
@@ -77,7 +92,7 @@ class StatsdSinkSuite extends SparkFunSuite {
       sink.registry.register("gauge", gauge)
       sink.report()
 
-      val p = new DatagramPacket(new Array[Byte](socketBufferSize), socketBufferSize)
+      val p = new DatagramPacket(new Array[Byte](maxPayloadSize), maxPayloadSize)
       socket.receive(p)
 
       val result = new String(p.getData, 0, p.getLength, UTF_8)
@@ -87,7 +102,7 @@ class StatsdSinkSuite extends SparkFunSuite {
 
   test("metrics StatsD sink with Histogram") {
     withSocketAndSink { (socket, sink) =>
-      val p = new DatagramPacket(new Array[Byte](socketBufferSize), socketBufferSize)
+      val p = new DatagramPacket(new Array[Byte](maxPayloadSize), maxPayloadSize)
       val histogram = new Histogram(new UniformReservoir)
       histogram.update(10)
       histogram.update(20)
@@ -121,7 +136,7 @@ class StatsdSinkSuite extends SparkFunSuite {
 
   test("metrics StatsD sink with Timer") {
     withSocketAndSink { (socket, sink) =>
-      val p = new DatagramPacket(new Array[Byte](socketBufferSize), socketBufferSize)
+      val p = new DatagramPacket(new Array[Byte](maxPayloadSize), maxPayloadSize)
       val timer = new Timer()
       timer.update(1, SECONDS)
       timer.update(2, SECONDS)

From ec6fccb922f721e5a44d89c93f711f44ce9d6592 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 6 Oct 2020 14:33:34 +0000
Subject: [PATCH 0175/1009] [SPARK-32243][SQL][FOLLOWUP] Fix compilation in
 HiveSessionCatalog

Fix a mistake when merging https://github.com/apache/spark/pull/29054

Closes #29955 from cloud-fan/hot-fix.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index f24834b938a1e..8a248a251820f 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -126,7 +126,7 @@ private[sql] class HiveSessionCatalog(
         // Hive UDF/UDAF/UDTF with function definition. Otherwise, we just throw it earlier.
         case _: InvalidUDFClassException =>
           makeHiveFunctionExpression(name, clazz, input)
-        case e => throw e
+        case NonFatal(e) => throw e
       }
     }
   }

From 17d309dfacd4bdebbcd9609dd24a9e65a1a2b4f5 Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Tue, 6 Oct 2020 16:01:10 +0000
Subject: [PATCH 0176/1009] [SPARK-32963][SQL] empty string should be
 consistent for schema name in SparkGetSchemasOperation

### What changes were proposed in this pull request?
This PR makes the empty string for schema name pattern match the global temp view as same as it works for other databases.

This PR also add new tests to covering different kinds of wildcards to verify the SparkGetSchemasOperation

### Why are the changes needed?

When the schema name is empty string, it is considered as ".*" and can match all databases in the catalog.
But when it can not match the global temp view as it is not converted to ".*"

### Does this PR introduce _any_ user-facing change?

yes , JDBC operation like `statement.getConnection.getMetaData..getSchemas(null, "")` now also provides the global temp view in the result set.

### How was this patch tested?

new tests

Closes #29834 from yaooqinn/SPARK-32963.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../SparkGetSchemasOperation.scala            |  3 +-
 .../SparkMetadataOperationSuite.scala         | 35 ++++++++++++++-----
 2 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala
index 16fd502048e80..e58357a415545 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala
@@ -77,7 +77,8 @@ private[hive] class SparkGetSchemasOperation(
 
       val globalTempViewDb = sqlContext.sessionState.catalog.globalTempViewManager.database
       val databasePattern = Pattern.compile(CLIServiceUtils.patternToRegex(schemaName))
-      if (databasePattern.matcher(globalTempViewDb).matches()) {
+      if (schemaName == null || schemaName.isEmpty ||
+          databasePattern.matcher(globalTempViewDb).matches()) {
         rowSet.addRow(Array[AnyRef](globalTempViewDb, DEFAULT_HIVE_CATALOG))
       }
       setState(OperationState.FINISHED)
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
index 7369dbfcf7a51..818f387f131d6 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.hive.thriftserver
 
 import java.sql.{DatabaseMetaData, ResultSet}
 
+import org.apache.hive.service.cli.HiveSQLException
+
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.types._
 
@@ -28,23 +30,40 @@ class SparkMetadataOperationSuite extends HiveThriftJdbcTest {
 
   test("Spark's own GetSchemasOperation(SparkGetSchemasOperation)") {
     def checkResult(rs: ResultSet, dbNames: Seq[String]): Unit = {
-      for (i <- dbNames.indices) {
-        assert(rs.next())
-        assert(rs.getString("TABLE_SCHEM") === dbNames(i))
+      val expected = dbNames.iterator
+      while(rs.next() || expected.hasNext) {
+        assert(rs.getString("TABLE_SCHEM") === expected.next)
+        assert(rs.getString("TABLE_CATALOG").isEmpty)
       }
       // Make sure there are no more elements
       assert(!rs.next())
+      assert(!expected.hasNext, "All expected schemas should be visited")
     }
 
-    withDatabase("db1", "db2") { statement =>
-      Seq("CREATE DATABASE db1", "CREATE DATABASE db2").foreach(statement.execute)
-
+    val dbs = Seq("db1", "db2", "db33", "db44")
+    val dbDflts = Seq("default", "global_temp")
+    withDatabase(dbs: _*) { statement =>
+      dbs.foreach( db => statement.execute(s"CREATE DATABASE IF NOT EXISTS $db"))
       val metaData = statement.getConnection.getMetaData
 
-      checkResult(metaData.getSchemas(null, "%"), Seq("db1", "db2", "default", "global_temp"))
+      Seq("", "%", null, ".*", "_*", "_%", ".%") foreach { pattern =>
+        checkResult(metaData.getSchemas(null, pattern), dbs ++ dbDflts)
+      }
+
+      Seq("db%", "db*") foreach { pattern =>
+        checkResult(metaData.getSchemas(null, pattern), dbs)
+      }
+
+      Seq("db_", "db.") foreach { pattern =>
+        checkResult(metaData.getSchemas(null, pattern), dbs.take(2))
+      }
+
       checkResult(metaData.getSchemas(null, "db1"), Seq("db1"))
       checkResult(metaData.getSchemas(null, "db_not_exist"), Seq.empty)
-      checkResult(metaData.getSchemas(null, "db*"), Seq("db1", "db2"))
+
+      val e = intercept[HiveSQLException](metaData.getSchemas(null, "*"))
+      assert(e.getCause.getMessage ===
+        "Error operating GET_SCHEMAS Dangling meta character '*' near index 0\n*\n^")
     }
   }
 

From 3b2a38d73578e8760dbd6c34e427896a8cde00dd Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Tue, 6 Oct 2020 09:40:16 -0700
Subject: [PATCH 0177/1009] [SPARK-32511][SQL][FOLLOWUP] Fix the broken build
 for Scala 2.13 with Maven

### What changes were proposed in this pull request?

This PR fixes the broken build for Scala 2.13 with Maven.
https://github.com/apache/spark/pull/29913/checks?check_run_id=1187826966

#29795 was merged though it doesn't successfully finish the build for Scala 2.13

### Why are the changes needed?

To fix the build.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

`build/mvn -Pscala-2.13 -Phive -Phive-thriftserver -DskipTests package`

Closes #29954 from sarutak/hotfix-seq.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/catalyst/expressions/complexTypeCreator.scala     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index d5b1950e82c56..f6485a51f8fae 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -586,7 +586,7 @@ case class WithField(name: String, valExpr: Expression)
       }
     }
     if (!hasMatch) result += newFieldExpr
-    result
+    result.toSeq
   }
 
   override def children: Seq[Expression] = valExpr :: Nil

From 0b326d532752fd4e05b08dd16c096f80afe7d727 Mon Sep 17 00:00:00 2001
From: "yi.wu" <yi.wu@databricks.com>
Date: Tue, 6 Oct 2020 14:18:37 -0700
Subject: [PATCH 0178/1009] [SPARK-32857][CORE] Fix flaky
 o.a.s.s.BarrierTaskContextSuite.throw exception if the number of barrier()
 calls are not the same on every task

### What changes were proposed in this pull request?

Fix the flaky test.

### Why are the changes needed?

The test is flaky: `Expected exception org.apache.spark.SparkException to be thrown, but no exception was thrown`.

Check the full error stack [here](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/128548/testReport/org.apache.spark.scheduler/BarrierTaskContextSuite/throw_exception_if_the_number_of_barrier___calls_are_not_the_same_on_every_task/).

By analyzing the log below, I found that task 0 hadn't reached the second `context.barrier()` when another three tasks already raised the sync timeout exceptions by the first `context.barrier()`. The timeout exceptions were caught by the `try...catch...`. Then, each task started another round barrier sync from the second `context.barrier()` and completed the sync successfully.

```scala
20/09/10 20:54:48.821 dispatcher-event-loop-10 INFO BarrierCoordinator: Current barrier epoch for Stage 0 (Attempt 0) is 0.
20/09/10 20:54:48.822 dispatcher-event-loop-10 INFO BarrierCoordinator: Barrier sync epoch 0 from Stage 0 (Attempt 0) received update from Task 2, current progress: 1/4.
20/09/10 20:54:48.826 dispatcher-BlockManagerMaster INFO BlockManagerInfo: Added broadcast_0_piece0 in memory on localhost:38420 (size: 2.2 KiB, free: 546.3 MiB)
20/09/10 20:54:48.908 dispatcher-event-loop-12 INFO BarrierCoordinator: Current barrier epoch for Stage 0 (Attempt 0) is 0.
20/09/10 20:54:48.909 dispatcher-event-loop-12 INFO BarrierCoordinator: Barrier sync epoch 0 from Stage 0 (Attempt 0) received update from Task 1, current progress: 2/4.
20/09/10 20:54:48.959 dispatcher-event-loop-11 INFO BarrierCoordinator: Current barrier epoch for Stage 0 (Attempt 0) is 0.
20/09/10 20:54:48.960 dispatcher-event-loop-11 INFO BarrierCoordinator: Barrier sync epoch 0 from Stage 0 (Attempt 0) received update from Task 3, current progress: 3/4.
20/09/10 20:54:49.616 dispatcher-CoarseGrainedScheduler INFO TaskSchedulerImpl: Skip current round of resource offers for barrier stage 0 because the barrier taskSet requires 4 slots, while the total number of available slots is 0.
20/09/10 20:54:49.899 dispatcher-event-loop-15 INFO BarrierCoordinator: Current barrier epoch for Stage 0 (Attempt 0) is 0.
20/09/10 20:54:49.900 dispatcher-event-loop-15 INFO BarrierCoordinator: Barrier sync epoch 0 from Stage 0 (Attempt 0) received update from Task 1, current progress: 1/4.
20/09/10 20:54:49.965 dispatcher-event-loop-13 INFO BarrierCoordinator: Current barrier epoch for Stage 0 (Attempt 0) is 0.
20/09/10 20:54:49.966 dispatcher-event-loop-13 INFO BarrierCoordinator: Barrier sync epoch 0 from Stage 0 (Attempt 0) received update from Task 3, current progress: 2/4.
20/09/10 20:54:50.112 dispatcher-event-loop-16 INFO BarrierCoordinator: Current barrier epoch for Stage 0 (Attempt 0) is 0.
20/09/10 20:54:50.113 dispatcher-event-loop-16 INFO BarrierCoordinator: Barrier sync epoch 0 from Stage 0 (Attempt 0) received update from Task 0, current progress: 3/4.
20/09/10 20:54:50.609 dispatcher-CoarseGrainedScheduler INFO TaskSchedulerImpl: Skip current round of resource offers for barrier stage 0 because the barrier taskSet requires 4 slots, while the total number of available slots is 0.
20/09/10 20:54:50.826 dispatcher-event-loop-17 INFO BarrierCoordinator: Current barrier epoch for Stage 0 (Attempt 0) is 0.
20/09/10 20:54:50.827 dispatcher-event-loop-17 INFO BarrierCoordinator: Barrier sync epoch 0 from Stage 0 (Attempt 0) received update from Task 2, current progress: 4/4.
20/09/10 20:54:50.827 dispatcher-event-loop-17 INFO BarrierCoordinator: Barrier sync epoch 0 from Stage 0 (Attempt 0) received all updates from tasks, finished successfully.
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Updated the test and tested a hundred times without failure(Previously, there could be several failures).

Closes #29732 from Ngone51/fix-flaky-throw-exception.

Authored-by: yi.wu <yi.wu@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/scheduler/BarrierTaskContextSuite.scala  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
index d18ca36f1fa60..e4ec62f8efc5b 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
@@ -189,7 +189,7 @@ class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext with
 
   test("throw exception if the number of barrier() calls are not the same on every task") {
     initLocalClusterSparkContext()
-    sc.conf.set("spark.barrier.sync.timeout", "1")
+    sc.conf.set("spark.barrier.sync.timeout", "5")
     val rdd = sc.makeRDD(1 to 10, 4)
     val rdd2 = rdd.barrier().mapPartitions { it =>
       val context = BarrierTaskContext.get()
@@ -212,7 +212,7 @@ class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext with
       rdd2.collect()
     }.getMessage
     assert(error.contains("The coordinator didn't get all barrier sync requests"))
-    assert(error.contains("within 1 second(s)"))
+    assert(error.contains("within 5 second(s)"))
   }
 
   def testBarrierTaskKilled(interruptOnKill: Boolean): Unit = {

From 57ed5a829b7dd8c92e5dfb7bb96373c8f464246c Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Tue, 6 Oct 2020 16:59:23 -0700
Subject: [PATCH 0179/1009] [SPARK-33007][SQL] Simplify named_struct + get
 struct field + from_json expression chain

### What changes were proposed in this pull request?

This proposes to simplify named_struct + get struct field + from_json expression chain from `struct(from_json.col1, from_json.col2, from_json.col3...)` to `struct(from_json)`.

### Why are the changes needed?

Simplify complex expression tree that could be produced by query optimization or user.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Unit test.

Closes #29942 from viirya/SPARK-33007.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../optimizer/OptimizeJsonExprs.scala         | 36 ++++++++++
 .../optimizer/OptimizeJsonExprsSuite.scala    | 67 +++++++++++++++++++
 2 files changed, 103 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala
index 59228904d84b7..fcd5412d66d41 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala
@@ -28,10 +28,46 @@ import org.apache.spark.sql.types.{ArrayType, StructType}
  * The optimization includes:
  * 1. JsonToStructs(StructsToJson(child)) => child.
  * 2. Prune unnecessary columns from GetStructField/GetArrayStructFields + JsonToStructs.
+ * 3. CreateNamedStruct(JsonToStructs(json).col1, JsonToStructs(json).col2, ...) =>
+ *      If(IsNull(json), nullStruct, KnownNotNull(JsonToStructs(prunedSchema, ..., json)))
+ *      if JsonToStructs(json) is shared among all fields of CreateNamedStruct. `prunedSchema`
+ *      contains all accessed fields in original CreateNamedStruct.
  */
 object OptimizeJsonExprs extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     case p => p.transformExpressions {
+
+      case c: CreateNamedStruct
+          // If we create struct from various fields of the same `JsonToStructs`.
+          if c.valExprs.forall { v =>
+            v.isInstanceOf[GetStructField] &&
+              v.asInstanceOf[GetStructField].child.isInstanceOf[JsonToStructs] &&
+              v.children.head.semanticEquals(c.valExprs.head.children.head)
+          } =>
+        val jsonToStructs = c.valExprs.map(_.children.head)
+        val sameFieldName = c.names.zip(c.valExprs).forall {
+          case (name, valExpr: GetStructField) =>
+            name.toString == valExpr.childSchema(valExpr.ordinal).name
+          case _ => false
+        }
+
+        // Although `CreateNamedStruct` allows duplicated field names, e.g. "a int, a int",
+        // `JsonToStructs` does not support parsing json with duplicated field names.
+        val duplicateFields = c.names.map(_.toString).distinct.length != c.names.length
+
+        // If we create struct from various fields of the same `JsonToStructs` and we don't
+        // alias field names and there is no duplicated field in the struct.
+        if (sameFieldName && !duplicateFields) {
+          val fromJson = jsonToStructs.head.asInstanceOf[JsonToStructs].copy(schema = c.dataType)
+          val nullFields = c.children.grouped(2).flatMap {
+            case Seq(name, value) => Seq(name, Literal(null, value.dataType))
+          }.toSeq
+
+          If(IsNull(fromJson.child), c.copy(children = nullFields), KnownNotNull(fromJson))
+        } else {
+          c
+        }
+
       case jsonToStructs @ JsonToStructs(_, options1,
         StructsToJson(options2, child, timeZoneId2), timeZoneId1)
           if options1.isEmpty && options2.isEmpty && timeZoneId1 == timeZoneId2 &&
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
index e47a141dfed1f..7d975a1b00466 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
@@ -199,4 +199,71 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper {
         JsonToStructs(prunedSchema2, options, 'json), field2, 0, 1, false).as("b")).analyze
     comparePlans(optimized2, expected2)
   }
+
+  test("SPARK-33007: simplify named_struct + from_json") {
+    val options = Map.empty[String, String]
+    val schema = StructType.fromDDL("a int, b int, c long, d string")
+
+    val prunedSchema1 = StructType.fromDDL("a int, b int")
+    val nullStruct = namedStruct("a", Literal(null, IntegerType), "b", Literal(null, IntegerType))
+
+    val UTC_OPT = Option("UTC")
+    val json: BoundReference = 'json.string.canBeNull.at(0)
+
+    assertEquivalent(
+      testRelation2,
+      namedStruct(
+        "a", GetStructField(JsonToStructs(schema, options, json, UTC_OPT), 0),
+        "b", GetStructField(JsonToStructs(schema, options, json, UTC_OPT), 1)).as("struct"),
+      If(IsNull(json),
+        nullStruct,
+        KnownNotNull(JsonToStructs(prunedSchema1, options, json, UTC_OPT))).as("struct"))
+
+    val field1 = StructType.fromDDL("a int")
+    val field2 = StructType.fromDDL("b int")
+
+    // Skip optimization if `namedStruct` aliases field name.
+    assertEquivalent(
+      testRelation2,
+      namedStruct(
+        "a1", GetStructField(JsonToStructs(schema, options, json, UTC_OPT), 0),
+        "b", GetStructField(JsonToStructs(schema, options, json, UTC_OPT), 1)).as("struct"),
+      namedStruct(
+        "a1", GetStructField(JsonToStructs(field1, options, json, UTC_OPT), 0),
+        "b", GetStructField(JsonToStructs(field2, options, json, UTC_OPT), 0)).as("struct"))
+
+    assertEquivalent(
+      testRelation2,
+      namedStruct(
+        "a", GetStructField(JsonToStructs(schema, options, json, UTC_OPT), 0),
+        "a", GetStructField(JsonToStructs(schema, options, json, UTC_OPT), 0)).as("struct"),
+      namedStruct(
+        "a", GetStructField(JsonToStructs(field1, options, json, UTC_OPT), 0),
+        "a", GetStructField(JsonToStructs(field1, options, json, UTC_OPT), 0)).as("struct"))
+
+    val PST = getZoneId("-08:00")
+    // Skip optimization if `JsonToStructs`s are not the same.
+    assertEquivalent(
+      testRelation2,
+      namedStruct(
+        "a", GetStructField(JsonToStructs(schema, options, json, UTC_OPT), 0),
+        "b", GetStructField(JsonToStructs(schema, options, json, Option(PST.getId)), 1))
+        .as("struct"),
+      namedStruct(
+        "a", GetStructField(JsonToStructs(field1, options, json, UTC_OPT), 0),
+        "b", GetStructField(JsonToStructs(field2, options, json, Option(PST.getId)), 0))
+        .as("struct"))
+  }
+
+  private def assertEquivalent(relation: LocalRelation, e1: Expression, e2: Expression): Unit = {
+    val plan = relation.select(e1).analyze
+    val actual = Optimizer.execute(plan)
+    val expected = relation.select(e2).analyze
+    comparePlans(actual, expected)
+
+    Seq("""{"a":1, "b":2, "c": 123, "d": "test"}""", null).foreach(v => {
+      val row = create_row(v)
+      checkEvaluation(e1, e2.eval(row), row)
+    })
+  }
 }

From 584f90c82e8e47cdcaab50f95e6c709f460cd789 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Wed, 7 Oct 2020 09:29:30 +0900
Subject: [PATCH 0180/1009] [SPARK-33067][SQL][TESTS][FOLLOWUP] Check error
 messages in JDBCTableCatalogSuite

### What changes were proposed in this pull request?
Get error message from the expected exception, and check that they are reasonable.

### Why are the changes needed?
To improve tests by expecting particular error messages.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running `JDBCTableCatalogSuite`.

Closes #29957 from MaxGekk/jdbcv2-negative-tests-followup.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../v2/jdbc/JDBCTableCatalogSuite.scala       | 116 +++++++++++-------
 1 file changed, 71 insertions(+), 45 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
index bf71f90779b71..ca86a8f593621 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
@@ -22,6 +22,7 @@ import java.util.Properties
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
+import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
@@ -75,10 +76,14 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
     checkAnswer(sql("SHOW TABLES IN h2.test"), Seq(Row("test", "to_drop"), Row("test", "people")))
     sql("DROP TABLE h2.test.to_drop")
     checkAnswer(sql("SHOW TABLES IN h2.test"), Seq(Row("test", "people")))
-    Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table =>
-      intercept[NoSuchTableException] {
+    Seq(
+      "h2.test.not_existing_table" -> "Table test.not_existing_table not found",
+      "h2.bad_test.not_existing_table" -> "Table bad_test.not_existing_table not found"
+    ).foreach { case (table, expectedMsg) =>
+      val msg = intercept[NoSuchTableException] {
         sql(s"DROP TABLE $table")
-      }
+      }.getMessage
+      assert(msg.contains(expectedMsg))
     }
   }
 
@@ -96,10 +101,14 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
         Seq(Row("test", "dst_table"), Row("test", "people")))
     }
     // Rename not existing table or namespace
-    Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table =>
-      intercept[org.h2.jdbc.JdbcSQLException] {
+    Seq(
+      "h2.test.not_existing_table" -> "Table \"not_existing_table\" not found",
+      "h2.bad_test.not_existing_table" -> "Schema \"bad_test\" not found"
+    ).foreach { case (table, expectedMsg) =>
+      val msg = intercept[org.h2.jdbc.JdbcSQLException] {
         sql(s"ALTER TABLE $table RENAME TO test.dst_table")
-      }
+      }.getMessage
+      assert(msg.contains(expectedMsg))
     }
     // Rename to an existing table
     withTable("h2.test.dst_table") {
@@ -110,9 +119,10 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
         withConnection { conn =>
           conn.prepareStatement("""CREATE TABLE "test"."src_table" (id INTEGER)""").executeUpdate()
         }
-        intercept[org.h2.jdbc.JdbcSQLException] {
-          sql("ALTER TABLE h2.test.src_table RENAME TO h2.test.dst_table")
-        }
+        val msg = intercept[org.h2.jdbc.JdbcSQLException] {
+          sql("ALTER TABLE h2.test.src_table RENAME TO test.dst_table")
+        }.getMessage
+        assert(msg.contains("Table \"dst_table\" already exists"))
       }
     }
   }
@@ -124,9 +134,10 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       .add("ID", IntegerType)
     assert(t.schema === expectedSchema)
     Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table =>
-      intercept[AnalysisException] {
-        spark.table(s"h2.$table").schema
-      }
+      val msg = intercept[AnalysisException] {
+        spark.table(table).schema
+      }.getMessage
+      assert(msg.contains("Table or view not found"))
     }
   }
 
@@ -140,13 +151,15 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
     }
     withTable("h2.test.new_table") {
       sql("CREATE TABLE h2.test.new_table(i INT, j STRING) USING _")
-      intercept[AnalysisException] {
+      val msg = intercept[AnalysisException] {
         sql("CREATE TABLE h2.test.new_table(i INT, j STRING) USING _")
-      }
+      }.getMessage
+      assert(msg.contains("Table test.new_table already exists"))
     }
-    intercept[org.h2.jdbc.JdbcSQLException] {
+    val msg = intercept[org.h2.jdbc.JdbcSQLException] {
       sql("CREATE TABLE h2.bad_test.new_table(i INT, j STRING) USING _")
-    }
+    }.getMessage
+    assert(msg.contains("Schema \"bad_test\" not found"))
   }
 
   test("alter table ... add column") {
@@ -164,15 +177,17 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       expectedSchema = expectedSchema.add("C3", DoubleType)
       assert(t.schema === expectedSchema)
       // Add already existing column
-      intercept[AnalysisException] {
+      val msg = intercept[AnalysisException] {
         sql("ALTER TABLE h2.test.alt_table ADD COLUMNS (C3 DOUBLE)")
-      }
+      }.getMessage
+      assert(msg.contains("Cannot add column, because C3 already exists"))
     }
     // Add a column to not existing table and namespace
     Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table =>
-      intercept[AnalysisException] {
+      val msg = intercept[AnalysisException] {
         sql(s"ALTER TABLE $table ADD COLUMNS (C4 STRING)")
-      }
+      }.getMessage
+      assert(msg.contains("Table not found"))
     }
   }
 
@@ -186,15 +201,17 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
         .add("C0", IntegerType)
       assert(t.schema === expectedSchema)
       // Rename to already existing column
-      intercept[AnalysisException] {
+      val msg = intercept[AnalysisException] {
         sql("ALTER TABLE h2.test.alt_table RENAME COLUMN C TO C0")
-      }
+      }.getMessage
+      assert(msg.contains("Cannot rename column, because C0 already exists"))
     }
     // Rename a column in not existing table and namespace
     Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table =>
-      intercept[AnalysisException] {
+      val msg = intercept[AnalysisException] {
         sql(s"ALTER TABLE $table RENAME COLUMN ID TO C")
-      }
+      }.getMessage
+      assert(msg.contains("Table not found"))
     }
   }
 
@@ -206,15 +223,17 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       val expectedSchema = new StructType().add("C2", IntegerType)
       assert(t.schema === expectedSchema)
       // Drop not existing column
-      intercept[AnalysisException] {
+      val msg = intercept[AnalysisException] {
         sql("ALTER TABLE h2.test.alt_table DROP COLUMN bad_column")
-      }
+      }.getMessage
+      assert(msg.contains("Cannot delete missing field bad_column in test.alt_table schema"))
     }
     // Drop a column to not existing table and namespace
     Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table =>
-      intercept[AnalysisException] {
+      val msg = intercept[AnalysisException] {
         sql(s"ALTER TABLE $table DROP COLUMN C1")
-      }
+      }.getMessage
+      assert(msg.contains("Table not found"))
     }
   }
 
@@ -226,19 +245,22 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       val expectedSchema = new StructType().add("ID", DoubleType)
       assert(t.schema === expectedSchema)
       // Update not existing column
-      intercept[AnalysisException] {
+      val msg1 = intercept[AnalysisException] {
         sql("ALTER TABLE h2.test.alt_table ALTER COLUMN bad_column TYPE DOUBLE")
-      }
+      }.getMessage
+      assert(msg1.contains("Cannot update missing field bad_column in test.alt_table schema"))
       // Update column to wrong type
-      intercept[AnalysisException] {
+      val msg2 = intercept[ParseException] {
         sql("ALTER TABLE h2.test.alt_table ALTER COLUMN id TYPE bad_type")
-      }
+      }.getMessage
+      assert(msg2.contains("DataType bad_type is not supported"))
     }
     // Update column type in not existing table and namespace
     Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table =>
-      intercept[AnalysisException] {
+      val msg = intercept[AnalysisException] {
         sql(s"ALTER TABLE $table ALTER COLUMN id TYPE DOUBLE")
-      }
+      }.getMessage
+      assert(msg.contains("Table not found"))
     }
   }
 
@@ -250,35 +272,39 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       val expectedSchema = new StructType().add("ID", IntegerType, nullable = true)
       assert(t.schema === expectedSchema)
       // Update nullability of not existing column
-      intercept[AnalysisException] {
+      val msg = intercept[AnalysisException] {
         sql("ALTER TABLE h2.test.alt_table ALTER COLUMN bad_column DROP NOT NULL")
-      }
+      }.getMessage
+      assert(msg.contains("Cannot update missing field bad_column in test.alt_table"))
     }
     // Update column nullability in not existing table and namespace
     Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table =>
-      intercept[AnalysisException] {
+      val msg = intercept[AnalysisException] {
         sql(s"ALTER TABLE $table ALTER COLUMN ID DROP NOT NULL")
-      }
+      }.getMessage
+      assert(msg.contains("Table not found"))
     }
   }
 
   test("alter table ... update column comment not supported") {
     withTable("h2.test.alt_table") {
       sql("CREATE TABLE h2.test.alt_table (ID INTEGER) USING _")
-      val thrown = intercept[java.sql.SQLFeatureNotSupportedException] {
+      val msg1 = intercept[java.sql.SQLFeatureNotSupportedException] {
         sql("ALTER TABLE h2.test.alt_table ALTER COLUMN ID COMMENT 'test'")
-      }
-      assert(thrown.getMessage.contains("Unsupported TableChange"))
+      }.getMessage
+      assert(msg1.contains("Unsupported TableChange"))
       // Update comment for not existing column
-      intercept[AnalysisException] {
+      val msg2 = intercept[AnalysisException] {
         sql("ALTER TABLE h2.test.alt_table ALTER COLUMN bad_column COMMENT 'test'")
-      }
+      }.getMessage
+      assert(msg2.contains("Cannot update missing field bad_column in test.alt_table"))
     }
     // Update column comments in not existing table and namespace
     Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table =>
-      intercept[AnalysisException] {
+      val msg = intercept[AnalysisException] {
         sql(s"ALTER TABLE $table ALTER COLUMN ID COMMENT 'test'")
-      }
+      }.getMessage
+      assert(msg.contains("Table not found"))
     }
   }
 }

From 5ce321dc80a699fa525ca5b69bf2c28e10f8a12a Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Wed, 7 Oct 2020 13:00:59 +0900
Subject: [PATCH 0181/1009] [SPARK-33017][PYTHON][DOCS][FOLLOW-UP] Add
 getCheckpointDir into API documentation

### What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/29918. We should add it into the documentation as well.

### Why are the changes needed?

To show users new APIs.

### Does this PR introduce _any_ user-facing change?

Yes, `SparkContext.getCheckpointDir` will be documented.

### How was this patch tested?

Manually built the PySpark documentation:

```bash
cd python/docs
make clean html
cd build/html
open index.html
```

Closes #29960 from HyukjinKwon/SPARK-33017.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/docs/source/reference/pyspark.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/docs/source/reference/pyspark.rst b/python/docs/source/reference/pyspark.rst
index c13df6ee2d2b4..fc0775eb7f8f5 100644
--- a/python/docs/source/reference/pyspark.rst
+++ b/python/docs/source/reference/pyspark.rst
@@ -64,6 +64,7 @@ Spark Context APIs
     SparkContext.defaultParallelism
     SparkContext.dump_profiles
     SparkContext.emptyRDD
+    SparkContext.getCheckpointDir
     SparkContext.getConf
     SparkContext.getLocalProperty
     SparkContext.getOrCreate

From aea78d2c8cdf12f4978fa6a69107d096c07c6fec Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Wed, 7 Oct 2020 04:48:57 +0000
Subject: [PATCH 0182/1009] [SPARK-33034][SQL] Support ALTER TABLE in JDBC v2
 Table Catalog: add, update type and nullability of columns (Oracle dialect)

### What changes were proposed in this pull request?
1. Override the default SQL strings in the Oracle Dialect for:
    - ALTER TABLE ADD COLUMN
    - ALTER TABLE UPDATE COLUMN TYPE
    - ALTER TABLE UPDATE COLUMN NULLABILITY
2. Add new docker integration test suite `jdbc/v2/OracleIntegrationSuite.scala`

### Why are the changes needed?
In SPARK-24907, we implemented JDBC v2 Table Catalog but it doesn't support some `ALTER TABLE` at the moment. This PR supports Oracle specific `ALTER TABLE`.

### Does this PR introduce _any_ user-facing change?
Yes

### How was this patch tested?
By running new integration test suite:
```
$ ./build/sbt -Pdocker-integration-tests "test-only *.OracleIntegrationSuite"
```

Closes #29912 from MaxGekk/jdbcv2-oracle-alter-table.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/jdbc/DockerJDBCIntegrationSuite.scala |  15 +-
 .../sql/jdbc/v2/OracleIntegrationSuite.scala  | 152 ++++++++++++++++++
 .../datasources/v2/jdbc/JDBCTable.scala       |   1 -
 .../apache/spark/sql/jdbc/JdbcDialects.scala  |  25 ++-
 .../apache/spark/sql/jdbc/OracleDialect.scala |  19 +++
 5 files changed, 200 insertions(+), 12 deletions(-)
 create mode 100644 external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
index 6d1a22dd22b65..24927da16d50c 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
@@ -98,7 +98,13 @@ abstract class DockerJDBCIntegrationSuite extends SharedSparkSession with Eventu
   val connectionTimeout = timeout(2.minutes)
 
   private var docker: DockerClient = _
-  protected var externalPort: Int = _
+  // Configure networking (necessary for boot2docker / Docker Machine)
+  protected lazy val externalPort: Int = {
+    val sock = new ServerSocket(0)
+    val port = sock.getLocalPort
+    sock.close()
+    port
+  }
   private var containerId: String = _
   protected var jdbcUrl: String = _
 
@@ -122,13 +128,6 @@ abstract class DockerJDBCIntegrationSuite extends SharedSparkSession with Eventu
           log.warn(s"Docker image ${db.imageName} not found; pulling image from registry")
           docker.pull(db.imageName)
       }
-      // Configure networking (necessary for boot2docker / Docker Machine)
-      externalPort = {
-        val sock = new ServerSocket(0)
-        val port = sock.getLocalPort
-        sock.close()
-        port
-      }
       val hostConfigBuilder = HostConfig.builder()
         .privileged(db.privileged)
         .networkMode("bridge")
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
new file mode 100644
index 0000000000000..400459c0ea17b
--- /dev/null
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
@@ -0,0 +1,152 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc.v2
+
+import java.sql.Connection
+
+import org.scalatest.time.SpanSugar._
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
+import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite}
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types._
+import org.apache.spark.tags.DockerTest
+
+/**
+ * The following would be the steps to test this
+ * 1. Build Oracle database in Docker, please refer below link about how to.
+ *    https://github.com/oracle/docker-images/blob/master/OracleDatabase/SingleInstance/README.md
+ * 2. export ORACLE_DOCKER_IMAGE_NAME=$ORACLE_DOCKER_IMAGE_NAME
+ *    Pull oracle $ORACLE_DOCKER_IMAGE_NAME image - docker pull $ORACLE_DOCKER_IMAGE_NAME
+ * 3. Start docker - sudo service docker start
+ * 4. Run spark test - ./build/sbt -Pdocker-integration-tests
+ *    "test-only org.apache.spark.sql.jdbc.v2.OracleIntegrationSuite"
+ *
+ * An actual sequence of commands to run the test is as follows
+ *
+ *  $ git clone https://github.com/oracle/docker-images.git
+ *  // Head SHA: 3e352a22618070595f823977a0fd1a3a8071a83c
+ *  $ cd docker-images/OracleDatabase/SingleInstance/dockerfiles
+ *  $ ./buildDockerImage.sh -v 18.4.0 -x
+ *  $ export ORACLE_DOCKER_IMAGE_NAME=oracle/database:18.4.0-xe
+ *  $ cd $SPARK_HOME
+ *  $ ./build/sbt -Pdocker-integration-tests
+ *    "test-only org.apache.spark.sql.jdbc.v2.OracleIntegrationSuite"
+ *
+ * It has been validated with 18.4.0 Express Edition.
+ */
+@DockerTest
+class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSparkSession {
+  override val db = new DatabaseOnDocker {
+    override val imageName = sys.env("ORACLE_DOCKER_IMAGE_NAME")
+    override val env = Map(
+      "ORACLE_PWD" -> "oracle"
+    )
+    override val usesIpc = false
+    override val jdbcPort: Int = 1521
+    override def getJdbcUrl(ip: String, port: Int): String =
+      s"jdbc:oracle:thin:system/oracle@//$ip:$port/xe"
+  }
+
+  override def sparkConf: SparkConf = super.sparkConf
+    .set("spark.sql.catalog.oracle", classOf[JDBCTableCatalog].getName)
+    .set("spark.sql.catalog.oracle.url", db.getJdbcUrl(dockerIp, externalPort))
+
+  override val connectionTimeout = timeout(7.minutes)
+  override def dataPreparation(conn: Connection): Unit = {}
+
+  test("SPARK-33034: ALTER TABLE ... add new columns") {
+    withTable("oracle.alt_table") {
+      sql("CREATE TABLE oracle.alt_table (ID STRING) USING _")
+      sql("ALTER TABLE oracle.alt_table ADD COLUMNS (C1 STRING, C2 STRING)")
+      var t = spark.table("oracle.alt_table")
+      var expectedSchema = new StructType()
+        .add("ID", StringType)
+        .add("C1", StringType)
+        .add("C2", StringType)
+      assert(t.schema === expectedSchema)
+      sql("ALTER TABLE oracle.alt_table ADD COLUMNS (C3 STRING)")
+      t = spark.table("oracle.alt_table")
+      expectedSchema = expectedSchema.add("C3", StringType)
+      assert(t.schema === expectedSchema)
+      // Add already existing column
+      val msg = intercept[AnalysisException] {
+        sql(s"ALTER TABLE oracle.alt_table ADD COLUMNS (C3 DOUBLE)")
+      }.getMessage
+      assert(msg.contains("Cannot add column, because C3 already exists"))
+    }
+    // Add a column to not existing table
+    val msg = intercept[AnalysisException] {
+      sql(s"ALTER TABLE oracle.not_existing_table ADD COLUMNS (C4 STRING)")
+    }.getMessage
+    assert(msg.contains("Table not found"))
+  }
+
+  test("SPARK-33034: ALTER TABLE ... update column type") {
+    withTable("oracle.alt_table") {
+      sql("CREATE TABLE oracle.alt_table (ID INTEGER) USING _")
+      sql("ALTER TABLE oracle.alt_table ALTER COLUMN id TYPE STRING")
+      val t = spark.table("oracle.alt_table")
+      val expectedSchema = new StructType().add("ID", StringType)
+      assert(t.schema === expectedSchema)
+      // Update column type from STRING to INTEGER
+      val msg1 = intercept[AnalysisException] {
+        sql("ALTER TABLE oracle.alt_table ALTER COLUMN id TYPE INTEGER")
+      }.getMessage
+      assert(msg1.contains("Cannot update alt_table field ID: string cannot be cast to int"))
+      // Update not existing column
+      val msg2 = intercept[AnalysisException] {
+        sql("ALTER TABLE oracle.alt_table ALTER COLUMN bad_column TYPE DOUBLE")
+      }.getMessage
+      assert(msg2.contains("Cannot update missing field bad_column"))
+      // Update column to wrong type
+      val msg3 = intercept[ParseException] {
+        sql("ALTER TABLE oracle.alt_table ALTER COLUMN id TYPE bad_type")
+      }.getMessage
+      assert(msg3.contains("DataType bad_type is not supported"))
+    }
+    // Update column type in not existing table
+    val msg = intercept[AnalysisException] {
+      sql(s"ALTER TABLE oracle.not_existing_table ALTER COLUMN id TYPE DOUBLE")
+    }.getMessage
+    assert(msg.contains("Table not found"))
+  }
+
+  test("SPARK-33034: ALTER TABLE ... update column nullability") {
+    withTable("oracle.alt_table") {
+      sql("CREATE TABLE oracle.alt_table (ID STRING NOT NULL) USING _")
+      sql("ALTER TABLE oracle.alt_table ALTER COLUMN ID DROP NOT NULL")
+      val t = spark.table("oracle.alt_table")
+      val expectedSchema = new StructType().add("ID", StringType, nullable = true)
+      assert(t.schema === expectedSchema)
+      // Update nullability of not existing column
+      val msg = intercept[AnalysisException] {
+        sql("ALTER TABLE oracle.alt_table ALTER COLUMN bad_column DROP NOT NULL")
+      }.getMessage
+      assert(msg.contains("Cannot update missing field bad_column"))
+    }
+    // Update column nullability in not existing table
+    val msg = intercept[AnalysisException] {
+      sql(s"ALTER TABLE oracle.not_existing_table ALTER COLUMN ID DROP NOT NULL")
+    }.getMessage
+    assert(msg.contains("Table not found"))
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTable.scala
index 55759497bd910..5e11ea66be4c6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTable.scala
@@ -30,7 +30,6 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 case class JDBCTable(ident: Identifier, schema: StructType, jdbcOptions: JDBCOptions)
   extends Table with SupportsRead with SupportsWrite {
-  assert(ident.namespace().length == 1)
 
   override def name(): String = ident.toString
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index cea5a20917532..a01720d1eefc7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -212,7 +212,7 @@ abstract class JdbcDialect extends Serializable {
         case add: AddColumn if add.fieldNames.length == 1 =>
           val dataType = JdbcUtils.getJdbcType(add.dataType(), this).databaseTypeDefinition
           val name = add.fieldNames
-          updateClause += s"ALTER TABLE $tableName ADD COLUMN ${name(0)} $dataType"
+          updateClause += getAddColumnQuery(tableName, name(0), dataType)
         case rename: RenameColumn if rename.fieldNames.length == 1 =>
           val name = rename.fieldNames
           updateClause += s"ALTER TABLE $tableName RENAME COLUMN ${name(0)} TO ${rename.newName}"
@@ -223,17 +223,36 @@ abstract class JdbcDialect extends Serializable {
           val name = updateColumnType.fieldNames
           val dataType = JdbcUtils.getJdbcType(updateColumnType.newDataType(), this)
             .databaseTypeDefinition
-          updateClause += s"ALTER TABLE $tableName ALTER COLUMN ${name(0)} $dataType"
+          updateClause += getUpdateColumnTypeQuery(tableName, name(0), dataType)
         case updateNull: UpdateColumnNullability if updateNull.fieldNames.length == 1 =>
           val name = updateNull.fieldNames
           val nullable = if (updateNull.nullable()) "NULL" else "NOT NULL"
-          updateClause += s"ALTER TABLE $tableName ALTER COLUMN ${name(0)} SET $nullable"
+          updateClause += getUpdateColumnNullabilityQuery(tableName, name(0), updateNull.nullable())
         case _ =>
           throw new SQLFeatureNotSupportedException(s"Unsupported TableChange $change")
       }
     }
     updateClause.result()
   }
+
+  def getAddColumnQuery(tableName: String, columnName: String, dataType: String): String = {
+    s"ALTER TABLE $tableName ADD COLUMN $columnName $dataType"
+  }
+
+  def getUpdateColumnTypeQuery(
+      tableName: String,
+      columnName: String,
+      newDataType: String): String = {
+    s"ALTER TABLE $tableName ALTER COLUMN $columnName $newDataType"
+  }
+
+  def getUpdateColumnNullabilityQuery(
+      tableName: String,
+      columnName: String,
+      isNullable: Boolean): String = {
+    val nullable = if (isNullable) "NULL" else "NOT NULL"
+    s"ALTER TABLE $tableName ALTER COLUMN $columnName SET $nullable"
+  }
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
index 3f12b9acd0fc4..128b90a190481 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
@@ -118,4 +118,23 @@ private case object OracleDialect extends JdbcDialect {
       case _ => s"TRUNCATE TABLE $table"
     }
   }
+
+  // see https://docs.oracle.com/cd/B28359_01/server.111/b28286/statements_3001.htm#SQLRF01001
+  override def getAddColumnQuery(tableName: String, columnName: String, dataType: String): String =
+    s"ALTER TABLE $tableName ADD $columnName $dataType"
+
+  // see https://docs.oracle.com/cd/B28359_01/server.111/b28286/statements_3001.htm#SQLRF01001
+  override def getUpdateColumnTypeQuery(
+    tableName: String,
+    columnName: String,
+    newDataType: String): String =
+    s"ALTER TABLE $tableName MODIFY $columnName $newDataType"
+
+  override def getUpdateColumnNullabilityQuery(
+    tableName: String,
+    columnName: String,
+    isNullable: Boolean): String = {
+    val nullable = if (isNullable) "NULL" else "NOT NULL"
+    s"ALTER TABLE $tableName MODIFY $columnName $nullable"
+  }
 }

From 7e99fcd64efa425f3c985df4fe957a3be274a49a Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Wed, 7 Oct 2020 06:33:20 +0000
Subject: [PATCH 0183/1009] [SPARK-33004][SQL] Migrate DESCRIBE column to use
 UnresolvedTableOrView to resolve the identifier

### What changes were proposed in this pull request?

This PR proposes to migrate `DESCRIBE tbl colname` to use `UnresolvedTableOrView` to resolve the table/view identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing).

### Why are the changes needed?

The current behavior is not consistent between v1 and v2 commands when resolving a temp view.
In v2, the `t` in the following example is resolved to a table:
```scala
sql("CREATE TABLE testcat.ns.t (id bigint) USING foo")
sql("CREATE TEMPORARY VIEW t AS SELECT 2 as i")
sql("USE testcat.ns")
sql("DESCRIBE t i") // 't' is resolved to testcat.ns.t

Describing columns is not supported for v2 tables.;
org.apache.spark.sql.AnalysisException: Describing columns is not supported for v2 tables.;
```
whereas in v1, the `t` is resolved to a temp view:
```scala
sql("CREATE DATABASE test")
sql("CREATE TABLE spark_catalog.test.t (id bigint) USING csv")
sql("CREATE TEMPORARY VIEW t AS SELECT 2 as i")
sql("USE spark_catalog.test")
sql("DESCRIBE t i").show // 't' is resolved to a temp view

+---------+----------+
|info_name|info_value|
+---------+----------+
| col_name|         i|
|data_type|       int|
|  comment|      NULL|
+---------+----------+
```

### Does this PR introduce _any_ user-facing change?

After this PR, `DESCRIBE t i` is resolved to a temp view `t` instead of `testcat.ns.t`.

### How was this patch tested?

Added a new test

Closes #29880 from imback82/describe_column_consistent.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/analysis/ResolveCatalogs.scala   |  4 ---
 .../sql/catalyst/parser/AstBuilder.scala      |  6 ++--
 ...hema.scala => DescribeCommandSchema.scala} | 10 +++++--
 .../catalyst/plans/logical/statements.scala   |  8 ------
 .../catalyst/plans/logical/v2Commands.scala   | 15 ++++++++--
 .../sql/catalyst/parser/DDLParserSuite.scala  | 28 +++++++++----------
 .../analysis/ResolveSessionCatalog.scala      | 12 +++++---
 .../spark/sql/execution/command/tables.scala  | 13 ++-------
 .../datasources/v2/DataSourceV2Strategy.scala |  3 ++
 .../apache/spark/sql/SQLQueryTestSuite.scala  |  2 +-
 .../sql/connector/DataSourceV2SQLSuite.scala  | 24 ++++++++++++++++
 .../hive/execution/HiveComparisonTest.scala   |  2 +-
 12 files changed, 78 insertions(+), 49 deletions(-)
 rename sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/{DescribeTableSchema.scala => DescribeCommandSchema.scala} (74%)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
index 0d0f80be359e7..65ddff8c44ed9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
@@ -142,10 +142,6 @@ class ResolveCatalogs(val catalogManager: CatalogManager)
       }
       RenameTable(catalog.asTableCatalog, oldName.asIdentifier, newNameParts.asIdentifier)
 
-    case DescribeColumnStatement(
-         NonSessionCatalogAndTable(catalog, tbl), colNameParts, isExtended) =>
-      throw new AnalysisException("Describing columns is not supported for v2 tables.")
-
     case c @ CreateTableStatement(
          NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _) =>
       assertNoNullTypeInSchema(c.tableSchema)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index f133235a2636e..f29e7b11e02de 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3183,7 +3183,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   }
 
   /**
-   * Create a [[DescribeColumnStatement]] or [[DescribeRelation]] commands.
+   * Create a [[DescribeColumn]] or [[DescribeRelation]] commands.
    */
   override def visitDescribeRelation(ctx: DescribeRelationContext): LogicalPlan = withOrigin(ctx) {
     val isExtended = ctx.EXTENDED != null || ctx.FORMATTED != null
@@ -3191,8 +3191,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
       if (ctx.partitionSpec != null) {
         throw new ParseException("DESC TABLE COLUMN for a specific partition is not supported", ctx)
       } else {
-        DescribeColumnStatement(
-          visitMultipartIdentifier(ctx.multipartIdentifier()),
+        DescribeColumn(
+          UnresolvedTableOrView(visitMultipartIdentifier(ctx.multipartIdentifier())),
           ctx.describeColName.nameParts.asScala.map(_.getText).toSeq,
           isExtended)
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/DescribeTableSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/DescribeCommandSchema.scala
similarity index 74%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/DescribeTableSchema.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/DescribeCommandSchema.scala
index ff35972b901f9..99d2ea7751959 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/DescribeTableSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/DescribeCommandSchema.scala
@@ -18,9 +18,9 @@
 package org.apache.spark.sql.catalyst.plans
 
 import org.apache.spark.sql.catalyst.expressions.AttributeReference
-import org.apache.spark.sql.types.{MetadataBuilder, StringType, StructField, StructType}
+import org.apache.spark.sql.types.{MetadataBuilder, StringType}
 
-private[sql] object DescribeTableSchema {
+private[sql] object DescribeCommandSchema {
   def describeTableAttributes(): Seq[AttributeReference] = Seq(
     AttributeReference("col_name", StringType, nullable = false,
       new MetadataBuilder().putString("comment", "name of the column").build())(),
@@ -28,4 +28,10 @@ private[sql] object DescribeTableSchema {
       new MetadataBuilder().putString("comment", "data type of the column").build())(),
     AttributeReference("comment", StringType, nullable = true,
       new MetadataBuilder().putString("comment", "comment of the column").build())())
+
+  def describeColumnAttributes(): Seq[AttributeReference] = Seq(
+    AttributeReference("info_name", StringType, nullable = false,
+      new MetadataBuilder().putString("comment", "name of the column info").build())(),
+    AttributeReference("info_value", StringType, nullable = false,
+      new MetadataBuilder().putString("comment", "value of the column info").build())())
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
index d09e08d105c21..d7c097af9120f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
@@ -298,14 +298,6 @@ case class DropViewStatement(
     viewName: Seq[String],
     ifExists: Boolean) extends ParsedStatement
 
-/**
- * A DESCRIBE TABLE tbl_name col_name statement, as parsed from SQL.
- */
-case class DescribeColumnStatement(
-    tableName: Seq[String],
-    colNameParts: Seq[String],
-    isExtended: Boolean) extends ParsedStatement
-
 /**
  * An INSERT INTO statement, as parsed from SQL.
  *
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index 475eb7d74773d..50af16ca276e1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.analysis.{NamedRelation, UnresolvedException}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, Unevaluable}
-import org.apache.spark.sql.catalyst.plans.DescribeTableSchema
+import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema
 import org.apache.spark.sql.connector.catalog._
 import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, ColumnChange}
 import org.apache.spark.sql.connector.expressions.Transform
@@ -312,7 +312,18 @@ case class DescribeRelation(
     partitionSpec: TablePartitionSpec,
     isExtended: Boolean) extends Command {
   override def children: Seq[LogicalPlan] = Seq(relation)
-  override def output: Seq[Attribute] = DescribeTableSchema.describeTableAttributes()
+  override def output: Seq[Attribute] = DescribeCommandSchema.describeTableAttributes()
+}
+
+/**
+ * The logical plan of the DESCRIBE relation_name col_name command that works for v2 tables.
+ */
+case class DescribeColumn(
+    relation: LogicalPlan,
+    colNameParts: Seq[String],
+    isExtended: Boolean) extends Command {
+  override def children: Seq[LogicalPlan] = Seq(relation)
+  override def output: Seq[Attribute] = DescribeCommandSchema.describeColumnAttributes()
 }
 
 /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 378026b1ce9c6..8b8531b2bb3b1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -780,27 +780,27 @@ class DDLParserSuite extends AnalysisTest {
 
   test("describe table column") {
     comparePlans(parsePlan("DESCRIBE t col"),
-      DescribeColumnStatement(
-        Seq("t"), Seq("col"), isExtended = false))
+      DescribeColumn(
+        UnresolvedTableOrView(Seq("t")), Seq("col"), isExtended = false))
     comparePlans(parsePlan("DESCRIBE t `abc.xyz`"),
-      DescribeColumnStatement(
-        Seq("t"), Seq("abc.xyz"), isExtended = false))
+      DescribeColumn(
+        UnresolvedTableOrView(Seq("t")), Seq("abc.xyz"), isExtended = false))
     comparePlans(parsePlan("DESCRIBE t abc.xyz"),
-      DescribeColumnStatement(
-        Seq("t"), Seq("abc", "xyz"), isExtended = false))
+      DescribeColumn(
+        UnresolvedTableOrView(Seq("t")), Seq("abc", "xyz"), isExtended = false))
     comparePlans(parsePlan("DESCRIBE t `a.b`.`x.y`"),
-      DescribeColumnStatement(
-        Seq("t"), Seq("a.b", "x.y"), isExtended = false))
+      DescribeColumn(
+        UnresolvedTableOrView(Seq("t")), Seq("a.b", "x.y"), isExtended = false))
 
     comparePlans(parsePlan("DESCRIBE TABLE t col"),
-      DescribeColumnStatement(
-        Seq("t"), Seq("col"), isExtended = false))
+      DescribeColumn(
+        UnresolvedTableOrView(Seq("t")), Seq("col"), isExtended = false))
     comparePlans(parsePlan("DESCRIBE TABLE EXTENDED t col"),
-      DescribeColumnStatement(
-        Seq("t"), Seq("col"), isExtended = true))
+      DescribeColumn(
+        UnresolvedTableOrView(Seq("t")), Seq("col"), isExtended = true))
     comparePlans(parsePlan("DESCRIBE TABLE FORMATTED t col"),
-      DescribeColumnStatement(
-        Seq("t"), Seq("col"), isExtended = true))
+      DescribeColumn(
+        UnresolvedTableOrView(Seq("t")), Seq("col"), isExtended = true))
 
     val caught = intercept[AnalysisException](
       parsePlan("DESCRIBE TABLE t PARTITION (ds='1970-01-01') col"))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 11493ad59a760..24382e07a2966 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -256,16 +256,20 @@ class ResolveSessionCatalog(
     case RenameTableStatement(TempViewOrV1Table(oldName), newName, isView) =>
       AlterTableRenameCommand(oldName.asTableIdentifier, newName.asTableIdentifier, isView)
 
-    case DescribeRelation(ResolvedTable(_, ident, _: V1Table), partitionSpec, isExtended) =>
+    case DescribeRelation(r @ ResolvedTable(_, ident, _: V1Table), partitionSpec, isExtended)
+        if isSessionCatalog(r.catalog) =>
       DescribeTableCommand(ident.asTableIdentifier, partitionSpec, isExtended)
 
     // Use v1 command to describe (temp) view, as v2 catalog doesn't support view yet.
     case DescribeRelation(ResolvedView(ident), partitionSpec, isExtended) =>
       DescribeTableCommand(ident.asTableIdentifier, partitionSpec, isExtended)
 
-    case DescribeColumnStatement(tbl, colNameParts, isExtended) =>
-      val name = parseTempViewOrV1Table(tbl, "Describing columns")
-      DescribeColumnCommand(name.asTableIdentifier, colNameParts, isExtended)
+    case DescribeColumn(r @ ResolvedTable(_, _, _: V1Table), colNameParts, isExtended)
+        if isSessionCatalog(r.catalog) =>
+      DescribeColumnCommand(r.identifier.asTableIdentifier, colNameParts, isExtended)
+
+    case DescribeColumn(ResolvedView(ident), colNameParts, isExtended) =>
+      DescribeColumnCommand(ident.asTableIdentifier, colNameParts, isExtended)
 
     // For CREATE TABLE [AS SELECT], we should use the v1 command if the catalog is resolved to the
     // session catalog and the table provider is not v2.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index e4be2a8d3bb8e..206f952fed0ca 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTableType._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
-import org.apache.spark.sql.catalyst.plans.DescribeTableSchema
+import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier, CaseInsensitiveMap}
 import org.apache.spark.sql.execution.datasources.{DataSource, PartitioningUtils}
@@ -597,7 +597,7 @@ case class TruncateTableCommand(
 }
 
 abstract class DescribeCommandBase extends RunnableCommand {
-  override val output = DescribeTableSchema.describeTableAttributes()
+  override val output = DescribeCommandSchema.describeTableAttributes()
 
   protected def describeSchema(
       schema: StructType,
@@ -760,14 +760,7 @@ case class DescribeColumnCommand(
     isExtended: Boolean)
   extends RunnableCommand {
 
-  override val output: Seq[Attribute] = {
-    Seq(
-      AttributeReference("info_name", StringType, nullable = false,
-        new MetadataBuilder().putString("comment", "name of the column info").build())(),
-      AttributeReference("info_value", StringType, nullable = false,
-        new MetadataBuilder().putString("comment", "value of the column info").build())()
-    )
-  }
+  override val output: Seq[Attribute] = DescribeCommandSchema.describeColumnAttributes()
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val catalog = sparkSession.sessionState.catalog
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index c5ddba43a56aa..3841bd0a66987 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -225,6 +225,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       }
       DescribeTableExec(desc.output, r.table, isExtended) :: Nil
 
+    case DescribeColumn(_: ResolvedTable, _, _) =>
+      throw new AnalysisException("Describing columns is not supported for v2 tables.")
+
     case DropTable(catalog, ident, ifExists) =>
       DropTableExec(catalog, ident, ifExists) :: Nil
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index b7cf0798a9d4b..0bb1f5e20fc5b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -502,7 +502,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
       case _: DescribeCommandBase
           | _: DescribeColumnCommand
           | _: DescribeRelation
-          | _: DescribeColumnStatement => true
+          | _: DescribeColumn => true
       case PhysicalOperation(_, _, Sort(_, true, _)) => true
       case _ => plan.children.iterator.exists(isSorted)
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index e3782c7409198..e3618f1326941 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -168,7 +168,31 @@ class DataSourceV2SQLSuite
       Array("Provider", "foo", ""),
       Array(TableCatalog.PROP_OWNER.capitalize, defaultUser, ""),
       Array("Table Properties", "[bar=baz]", "")))
+  }
 
+  test("Describe column is not supported for v2 catalog") {
+    withTable("testcat.tbl") {
+      spark.sql("CREATE TABLE testcat.tbl (id bigint) USING foo")
+      val ex = intercept[AnalysisException] {
+        spark.sql("DESCRIBE testcat.tbl id")
+      }
+      assert(ex.message.contains("Describing columns is not supported for v2 tables"))
+    }
+  }
+
+  test("SPARK-33004: Describe column should resolve to a temporary view first") {
+    withTable("testcat.ns.t") {
+      withTempView("t") {
+        sql("CREATE TABLE testcat.ns.t (id bigint) USING foo")
+        sql("CREATE TEMPORARY VIEW t AS SELECT 2 as i")
+        sql("USE testcat.ns")
+        checkAnswer(
+          sql("DESCRIBE t i"),
+          Seq(Row("col_name", "i"),
+            Row("data_type", "int"),
+            Row("comment", "NULL")))
+      }
+    }
   }
 
   test("CreateTable: use v2 plan and session catalog when provider is v2") {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index 7f198632a1cd6..01cf214574eeb 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -375,7 +375,7 @@ abstract class HiveComparisonTest extends SparkFunSuite with BeforeAndAfterAll {
                 (!hiveQuery.logical.isInstanceOf[DescribeFunction]) &&
                 (!hiveQuery.logical.isInstanceOf[DescribeCommandBase]) &&
                 (!hiveQuery.logical.isInstanceOf[DescribeRelation]) &&
-                (!hiveQuery.logical.isInstanceOf[DescribeColumnStatement]) &&
+                (!hiveQuery.logical.isInstanceOf[DescribeColumn]) &&
                 preparedHive != catalyst) {
 
               val hivePrintOut = s"== HIVE - ${preparedHive.size} row(s) ==" +: preparedHive

From 4e1ded67f88ffc869379319758d923aa538554b2 Mon Sep 17 00:00:00 2001
From: itholic <haejoon309@naver.com>
Date: Wed, 7 Oct 2020 16:39:25 +0900
Subject: [PATCH 0184/1009] [SPARK-32189][DOCS][PYTHON][FOLLOW-UP] Fixed broken
 link and typo in PySpark docs

### What changes were proposed in this pull request?

This PR is a follow-up of #29781 to fix broken link and typo.

<img width="638" alt="Screen Shot 2020-10-07 at 3 56 28 PM" src="https://user-images.githubusercontent.com/44108233/95297583-aa0ccb00-08b5-11eb-85db-89022c76d7e1.png">

<img width="734" alt="Screen Shot 2020-10-07 at 3 55 36 PM" src="https://user-images.githubusercontent.com/44108233/95297508-8ba6cf80-08b5-11eb-9caa-0b52a2482ada.png">

### Why are the changes needed?

Current link is not working properly because of wrong path.

### Does this PR introduce _any_ user-facing change?

Yes, the link is working properly now.

### How was this patch tested?

Manually built the doc.

Closes #29963 from itholic/SPARK-32189-FOLLOWUP.

Authored-by: itholic <haejoon309@naver.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/docs/source/development/debugging.rst   | 2 +-
 python/docs/source/development/setting_ide.rst | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/docs/source/development/debugging.rst b/python/docs/source/development/debugging.rst
index c5f3351527f11..bc141a6f44a6f 100644
--- a/python/docs/source/development/debugging.rst
+++ b/python/docs/source/development/debugging.rst
@@ -35,7 +35,7 @@ with JVM. Profiling and debugging JVM is described at `Useful Developer Tools <h
 
 Note that,
 
-- If you are running locally, you can directly debug the driver side via using your IDE without the remote debug feature. Setting PySpark with IDEs is documented `here <setting.html#setting-up-pyspark>`__.
+- If you are running locally, you can directly debug the driver side via using your IDE without the remote debug feature. Setting PySpark with IDEs is documented `here <setting_ide.rst#pycharm>`__.
 - *There are many other ways of debugging PySpark applications*. For example, you can remotely debug by using the open source `Remote Debugger <https://www.pydev.org/manual_adv_remote_debugger.html>`_ instead of using PyCharm Professional documented here.
 
 
diff --git a/python/docs/source/development/setting_ide.rst b/python/docs/source/development/setting_ide.rst
index dcb44c1483006..6e8f0148c6eb3 100644
--- a/python/docs/source/development/setting_ide.rst
+++ b/python/docs/source/development/setting_ide.rst
@@ -50,7 +50,7 @@ Let's go to the path ``python/pyspark/tests`` in PyCharm and try to run the any
 You might can see the ``KeyError: 'SPARK_HOME'`` because the environment variable has not been set yet.
 
 Go **Run -> Edit Configurations**, and set the environment variables as below.
-Please make sure to specify your own path for ``SPARK_HOME`` rather than ``/.../spark``. After completing the variable, click **Okay** to apply the changes.
+Please make sure to specify your own path for ``SPARK_HOME`` rather than ``/.../spark``. After completing the variable, click **OK** to apply the changes.
 
 .. image:: ../../../../docs/img/pycharm-with-pyspark2.png
     :alt: Setting up SPARK_HOME

From 72da6f86cfbdd36dac3fc440c333bc1db1935edd Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Wed, 7 Oct 2020 19:53:59 +0900
Subject: [PATCH 0185/1009] [SPARK-33002][PYTHON] Remove non-API annotations

### What changes were proposed in this pull request?

This PR:

- removes annotations for modules which are not part of the public API.
- removes `__init__.pyi` files, if no annotations, beyond exports, are present.

### Why are the changes needed?

Primarily to reduce maintenance overhead and as requested in the comments to https://github.com/apache/spark/pull/29591

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing tests and additional MyPy checks:

```
mypy --no-incremental --config python/mypy.ini python/pyspark
MYPYPATH=python/ mypy --no-incremental --config python/mypy.ini examples/src/main/python/ml examples/src/main/python/sql examples/src/main/python/sql/streaming
```

Closes #29879 from zero323/SPARK-33002.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/mypy.ini                           |   5 +-
 python/pyspark/_globals.pyi               |  27 -----
 python/pyspark/accumulators.pyi           |   4 +-
 python/pyspark/broadcast.pyi              |   4 +-
 python/pyspark/daemon.pyi                 |  29 -----
 python/pyspark/find_spark_home.pyi        |  17 ---
 python/pyspark/java_gateway.pyi           |  24 -----
 python/pyspark/join.pyi                   |  50 ---------
 python/pyspark/ml/__init__.pyi            |  45 --------
 python/pyspark/mllib/__init__.pyi         |  32 ------
 python/pyspark/rddsampler.pyi             |  54 ----------
 python/pyspark/resource/__init__.pyi      |  31 ------
 python/pyspark/serializers.py             |   2 +-
 python/pyspark/serializers.pyi            | 122 ----------------------
 python/pyspark/shell.py                   |   4 +-
 python/pyspark/shell.pyi                  |  31 ------
 python/pyspark/shuffle.pyi                | 109 -------------------
 python/pyspark/sql/avro/__init__.pyi      |  22 ----
 python/pyspark/sql/pandas/__init__.pyi    |  17 ---
 python/pyspark/sql/pandas/serializers.pyi |  65 ------------
 python/pyspark/sql/pandas/typehints.pyi   |  33 ------
 python/pyspark/sql/pandas/types.pyi       |  41 --------
 python/pyspark/sql/pandas/utils.pyi       |  20 ----
 python/pyspark/sql/utils.pyi              |  55 ----------
 python/pyspark/streaming/__init__.pyi     |  23 ----
 python/pyspark/streaming/util.pyi         |  48 ---------
 python/pyspark/traceback_utils.pyi        |  29 -----
 python/pyspark/util.py                    |   2 +-
 python/pyspark/util.pyi                   |  35 -------
 python/pyspark/worker.pyi                 |  73 -------------
 30 files changed, 14 insertions(+), 1039 deletions(-)
 delete mode 100644 python/pyspark/_globals.pyi
 delete mode 100644 python/pyspark/daemon.pyi
 delete mode 100644 python/pyspark/find_spark_home.pyi
 delete mode 100644 python/pyspark/java_gateway.pyi
 delete mode 100644 python/pyspark/join.pyi
 delete mode 100644 python/pyspark/ml/__init__.pyi
 delete mode 100644 python/pyspark/mllib/__init__.pyi
 delete mode 100644 python/pyspark/rddsampler.pyi
 delete mode 100644 python/pyspark/resource/__init__.pyi
 delete mode 100644 python/pyspark/serializers.pyi
 delete mode 100644 python/pyspark/shell.pyi
 delete mode 100644 python/pyspark/shuffle.pyi
 delete mode 100644 python/pyspark/sql/avro/__init__.pyi
 delete mode 100644 python/pyspark/sql/pandas/__init__.pyi
 delete mode 100644 python/pyspark/sql/pandas/serializers.pyi
 delete mode 100644 python/pyspark/sql/pandas/typehints.pyi
 delete mode 100644 python/pyspark/sql/pandas/types.pyi
 delete mode 100644 python/pyspark/sql/pandas/utils.pyi
 delete mode 100644 python/pyspark/sql/utils.pyi
 delete mode 100644 python/pyspark/streaming/__init__.pyi
 delete mode 100644 python/pyspark/streaming/util.pyi
 delete mode 100644 python/pyspark/traceback_utils.pyi
 delete mode 100644 python/pyspark/util.pyi
 delete mode 100644 python/pyspark/worker.pyi

diff --git a/python/mypy.ini b/python/mypy.ini
index a9523e622ca0d..4a5368a519097 100644
--- a/python/mypy.ini
+++ b/python/mypy.ini
@@ -32,5 +32,8 @@ ignore_missing_imports = True
 [mypy-pandas.*]
 ignore_missing_imports = True
 
-[mypy-pyarrow]
+[mypy-pyarrow.*]
+ignore_missing_imports = True
+
+[mypy-psutil.*]
 ignore_missing_imports = True
diff --git a/python/pyspark/_globals.pyi b/python/pyspark/_globals.pyi
deleted file mode 100644
index 9453775621196..0000000000000
--- a/python/pyspark/_globals.pyi
+++ /dev/null
@@ -1,27 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# NOTE: This dynamically typed stub was automatically generated by stubgen.
-
-from typing import Any
-
-__ALL__: Any
-
-class _NoValueType:
-    def __new__(cls): ...
-    def __reduce__(self): ...
diff --git a/python/pyspark/accumulators.pyi b/python/pyspark/accumulators.pyi
index 94f8023d1102b..13a1792cd247d 100644
--- a/python/pyspark/accumulators.pyi
+++ b/python/pyspark/accumulators.pyi
@@ -16,7 +16,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from typing import Callable, Generic, Tuple, Type, TypeVar
+from typing import Callable, Dict, Generic, Tuple, Type, TypeVar
 
 import socketserver.BaseRequestHandler  # type: ignore
 
@@ -27,6 +27,8 @@ U = TypeVar("U", bound=SupportsIAdd)
 
 import socketserver as SocketServer
 
+_accumulatorRegistry: Dict[int, Accumulator]
+
 class Accumulator(Generic[T]):
     aid: int
     accum_param: AccumulatorParam[T]
diff --git a/python/pyspark/broadcast.pyi b/python/pyspark/broadcast.pyi
index c2ea3c6f7d8b4..4b019a509a003 100644
--- a/python/pyspark/broadcast.pyi
+++ b/python/pyspark/broadcast.pyi
@@ -17,10 +17,12 @@
 # under the License.
 
 import threading
-from typing import Any, Generic, Optional, TypeVar
+from typing import Any, Dict, Generic, Optional, TypeVar
 
 T = TypeVar("T")
 
+_broadcastRegistry: Dict[int, Broadcast]
+
 class Broadcast(Generic[T]):
     def __init__(
         self,
diff --git a/python/pyspark/daemon.pyi b/python/pyspark/daemon.pyi
deleted file mode 100644
index dfacf30a9f8a7..0000000000000
--- a/python/pyspark/daemon.pyi
+++ /dev/null
@@ -1,29 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyspark.serializers import (  # noqa: F401
-    UTF8Deserializer as UTF8Deserializer,
-    read_int as read_int,
-    write_int as write_int,
-    write_with_length as write_with_length,
-)
-from typing import Any
-
-def compute_real_exit_code(exit_code: Any): ...
-def worker(sock: Any, authenticated: Any): ...
-def manager() -> None: ...
diff --git a/python/pyspark/find_spark_home.pyi b/python/pyspark/find_spark_home.pyi
deleted file mode 100644
index 217e5db960782..0000000000000
--- a/python/pyspark/find_spark_home.pyi
+++ /dev/null
@@ -1,17 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
diff --git a/python/pyspark/java_gateway.pyi b/python/pyspark/java_gateway.pyi
deleted file mode 100644
index 5b45206dc045c..0000000000000
--- a/python/pyspark/java_gateway.pyi
+++ /dev/null
@@ -1,24 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyspark.serializers import UTF8Deserializer as UTF8Deserializer, read_int as read_int, write_with_length as write_with_length  # type: ignore[attr-defined]
-from typing import Any, Optional
-
-def launch_gateway(conf: Optional[Any] = ..., popen_kwargs: Optional[Any] = ...): ...
-def local_connect_and_auth(port: Any, auth_secret: Any): ...
-def ensure_callback_server_started(gw: Any) -> None: ...
diff --git a/python/pyspark/join.pyi b/python/pyspark/join.pyi
deleted file mode 100644
index e89e0fbbcda9b..0000000000000
--- a/python/pyspark/join.pyi
+++ /dev/null
@@ -1,50 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Hashable, Iterable, Optional, Tuple, TypeVar
-
-from pyspark.resultiterable import ResultIterable
-import pyspark.rdd
-
-K = TypeVar("K", bound=Hashable)
-V = TypeVar("V")
-U = TypeVar("U")
-
-def python_join(
-    rdd: pyspark.rdd.RDD[Tuple[K, V]],
-    other: pyspark.rdd.RDD[Tuple[K, U]],
-    numPartitions: int,
-) -> pyspark.rdd.RDD[Tuple[K, Tuple[V, U]]]: ...
-def python_right_outer_join(
-    rdd: pyspark.rdd.RDD[Tuple[K, V]],
-    other: pyspark.rdd.RDD[Tuple[K, U]],
-    numPartitions: int,
-) -> pyspark.rdd.RDD[Tuple[K, Tuple[V, Optional[U]]]]: ...
-def python_left_outer_join(
-    rdd: pyspark.rdd.RDD[Tuple[K, V]],
-    other: pyspark.rdd.RDD[Tuple[K, U]],
-    numPartitions: int,
-) -> pyspark.rdd.RDD[Tuple[K, Tuple[Optional[V], U]]]: ...
-def python_full_outer_join(
-    rdd: pyspark.rdd.RDD[Tuple[K, V]],
-    other: pyspark.rdd.RDD[Tuple[K, U]],
-    numPartitions: int,
-) -> pyspark.rdd.RDD[Tuple[K, Tuple[Optional[V], Optional[U]]]]: ...
-def python_cogroup(
-    rdds: Iterable[pyspark.rdd.RDD[Tuple[K, V]]], numPartitions: int
-) -> pyspark.rdd.RDD[Tuple[K, Tuple[ResultIterable[V], ...]]]: ...
diff --git a/python/pyspark/ml/__init__.pyi b/python/pyspark/ml/__init__.pyi
deleted file mode 100644
index 8e3b8a5daeb08..0000000000000
--- a/python/pyspark/ml/__init__.pyi
+++ /dev/null
@@ -1,45 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyspark.ml import (  # noqa: F401
-    classification as classification,
-    clustering as clustering,
-    evaluation as evaluation,
-    feature as feature,
-    fpm as fpm,
-    image as image,
-    linalg as linalg,
-    param as param,
-    recommendation as recommendation,
-    regression as regression,
-    stat as stat,
-    tuning as tuning,
-    util as util,
-)
-from pyspark.ml.base import (  # noqa: F401
-    Estimator as Estimator,
-    Model as Model,
-    PredictionModel as PredictionModel,
-    Predictor as Predictor,
-    Transformer as Transformer,
-    UnaryTransformer as UnaryTransformer,
-)
-from pyspark.ml.pipeline import (  # noqa: F401
-    Pipeline as Pipeline,
-    PipelineModel as PipelineModel,
-)
diff --git a/python/pyspark/mllib/__init__.pyi b/python/pyspark/mllib/__init__.pyi
deleted file mode 100644
index 83032c4580fc8..0000000000000
--- a/python/pyspark/mllib/__init__.pyi
+++ /dev/null
@@ -1,32 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# NOTE: This dynamically typed stub was automatically generated by stubgen.
-
-# Names in __all__ with no definition:
-#   classification
-#   clustering
-#   feature
-#   fpm
-#   linalg
-#   random
-#   recommendation
-#   regression
-#   stat
-#   tree
-#   util
diff --git a/python/pyspark/rddsampler.pyi b/python/pyspark/rddsampler.pyi
deleted file mode 100644
index 8fbf72d90025c..0000000000000
--- a/python/pyspark/rddsampler.pyi
+++ /dev/null
@@ -1,54 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Any, Dict, Iterator, Optional, Tuple, TypeVar
-
-T = TypeVar("T")
-U = TypeVar("U")
-K = TypeVar("K")
-V = TypeVar("V")
-
-class RDDSamplerBase:
-    def __init__(self, withReplacement: bool, seed: Optional[int] = ...) -> None: ...
-    def initRandomGenerator(self, split: int) -> None: ...
-    def getUniformSample(self) -> float: ...
-    def getPoissonSample(self, mean: float) -> int: ...
-    def func(self, split: int, iterator: Iterator[Any]) -> Iterator[Any]: ...
-
-class RDDSampler(RDDSamplerBase):
-    def __init__(
-        self, withReplacement: bool, fraction: float, seed: Optional[int] = ...
-    ) -> None: ...
-    def func(self, split: int, iterator: Iterator[T]) -> Iterator[T]: ...
-
-class RDDRangeSampler(RDDSamplerBase):
-    def __init__(
-        self, lowerBound: T, upperBound: T, seed: Optional[Any] = ...
-    ) -> None: ...
-    def func(self, split: int, iterator: Iterator[T]) -> Iterator[T]: ...
-
-class RDDStratifiedSampler(RDDSamplerBase):
-    def __init__(
-        self,
-        withReplacement: bool,
-        fractions: Dict[K, float],
-        seed: Optional[int] = ...,
-    ) -> None: ...
-    def func(
-        self, split: int, iterator: Iterator[Tuple[K, V]]
-    ) -> Iterator[Tuple[K, V]]: ...
diff --git a/python/pyspark/resource/__init__.pyi b/python/pyspark/resource/__init__.pyi
deleted file mode 100644
index 87a9b53c268ac..0000000000000
--- a/python/pyspark/resource/__init__.pyi
+++ /dev/null
@@ -1,31 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyspark.resource.information import (  # noqa: F401
-    ResourceInformation as ResourceInformation,
-)
-from pyspark.resource.profile import (  # noqa: F401
-    ResourceProfile as ResourceProfile,
-    ResourceProfileBuilder as ResourceProfileBuilder,
-)
-from pyspark.resource.requests import (  # noqa: F401
-    ExecutorResourceRequest as ExecutorResourceRequest,
-    ExecutorResourceRequests as ExecutorResourceRequests,
-    TaskResourceRequest as TaskResourceRequest,
-    TaskResourceRequests as TaskResourceRequests,
-)
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index 80ce9b8408d4e..e6033dd7505c1 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -342,7 +342,7 @@ def dumps(self, obj):
 
 # Hack namedtuple, make it picklable
 
-__cls = {}
+__cls = {}  # type: ignore
 
 
 def _restore(name, fields, value):
diff --git a/python/pyspark/serializers.pyi b/python/pyspark/serializers.pyi
deleted file mode 100644
index 26ef17c38d227..0000000000000
--- a/python/pyspark/serializers.pyi
+++ /dev/null
@@ -1,122 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Any
-
-class SpecialLengths:
-    END_OF_DATA_SECTION: int = ...
-    PYTHON_EXCEPTION_THROWN: int = ...
-    TIMING_DATA: int = ...
-    END_OF_STREAM: int = ...
-    NULL: int = ...
-    START_ARROW_STREAM: int = ...
-
-class Serializer:
-    def dump_stream(self, iterator: Any, stream: Any) -> None: ...
-    def load_stream(self, stream: Any) -> None: ...
-    def __eq__(self, other: Any) -> Any: ...
-    def __ne__(self, other: Any) -> Any: ...
-    def __hash__(self) -> Any: ...
-
-class FramedSerializer(Serializer):
-    def __init__(self) -> None: ...
-    def dump_stream(self, iterator: Any, stream: Any) -> None: ...
-    def load_stream(self, stream: Any) -> None: ...
-    def dumps(self, obj: Any) -> None: ...
-    def loads(self, obj: Any) -> None: ...
-
-class BatchedSerializer(Serializer):
-    UNLIMITED_BATCH_SIZE: int = ...
-    UNKNOWN_BATCH_SIZE: int = ...
-    serializer: Any = ...
-    batchSize: Any = ...
-    def __init__(self, serializer: Any, batchSize: Any = ...) -> None: ...
-    def dump_stream(self, iterator: Any, stream: Any) -> None: ...
-    def load_stream(self, stream: Any): ...
-
-class FlattenedValuesSerializer(BatchedSerializer):
-    def __init__(self, serializer: Any, batchSize: int = ...) -> None: ...
-    def load_stream(self, stream: Any): ...
-
-class AutoBatchedSerializer(BatchedSerializer):
-    bestSize: Any = ...
-    def __init__(self, serializer: Any, bestSize: Any = ...) -> None: ...
-    def dump_stream(self, iterator: Any, stream: Any) -> None: ...
-
-class CartesianDeserializer(Serializer):
-    key_ser: Any = ...
-    val_ser: Any = ...
-    def __init__(self, key_ser: Any, val_ser: Any) -> None: ...
-    def load_stream(self, stream: Any): ...
-
-class PairDeserializer(Serializer):
-    key_ser: Any = ...
-    val_ser: Any = ...
-    def __init__(self, key_ser: Any, val_ser: Any) -> None: ...
-    def load_stream(self, stream: Any): ...
-
-class NoOpSerializer(FramedSerializer):
-    def loads(self, obj: Any): ...
-    def dumps(self, obj: Any): ...
-
-class PickleSerializer(FramedSerializer):
-    def dumps(self, obj: Any): ...
-    def loads(self, obj: Any, encoding: str = ...): ...
-
-class CloudPickleSerializer(PickleSerializer):
-    def dumps(self, obj: Any): ...
-
-class MarshalSerializer(FramedSerializer):
-    def dumps(self, obj: Any): ...
-    def loads(self, obj: Any): ...
-
-class AutoSerializer(FramedSerializer):
-    def __init__(self) -> None: ...
-    def dumps(self, obj: Any): ...
-    def loads(self, obj: Any): ...
-
-class CompressedSerializer(FramedSerializer):
-    serializer: Any = ...
-    def __init__(self, serializer: Any) -> None: ...
-    def dumps(self, obj: Any): ...
-    def loads(self, obj: Any): ...
-
-class UTF8Deserializer(Serializer):
-    use_unicode: Any = ...
-    def __init__(self, use_unicode: bool = ...) -> None: ...
-    def loads(self, stream: Any): ...
-    def load_stream(self, stream: Any) -> None: ...
-
-class ChunkedStream:
-    buffer_size: Any = ...
-    buffer: Any = ...
-    current_pos: int = ...
-    wrapped: Any = ...
-    def __init__(self, wrapped: Any, buffer_size: Any) -> None: ...
-    def write(self, bytes: Any) -> None: ...
-    def close(self) -> None: ...
-    @property
-    def closed(self): ...
-
-def write_with_length(obj: Any, stream: Any): ...
-def pack_long(value): ...
-def read_int(stream): ...
-def read_long(stream): ...
-def read_bool(stream): ...
-def write_int(value, stream): ...
-def write_long(value, stream): ...
diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py
index cde163bd2d73d..0c6cc1302ff62 100644
--- a/python/pyspark/shell.py
+++ b/python/pyspark/shell.py
@@ -32,10 +32,10 @@
 if os.environ.get("SPARK_EXECUTOR_URI"):
     SparkContext.setSystemProperty("spark.executor.uri", os.environ["SPARK_EXECUTOR_URI"])
 
-SparkContext._ensure_initialized()
+SparkContext._ensure_initialized()  # type: ignore
 
 try:
-    spark = SparkSession._create_shell_session()
+    spark = SparkSession._create_shell_session()  # type: ignore
 except Exception:
     import sys
     import traceback
diff --git a/python/pyspark/shell.pyi b/python/pyspark/shell.pyi
deleted file mode 100644
index 0760309542f8d..0000000000000
--- a/python/pyspark/shell.pyi
+++ /dev/null
@@ -1,31 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyspark import SparkConf as SparkConf  # noqa: F401
-from pyspark.context import SparkContext as SparkContext
-from pyspark.sql import SQLContext as SQLContext, SparkSession as SparkSession
-from typing import Any, Callable
-
-from pyspark.sql.dataframe import DataFrame
-
-spark: SparkSession
-sc: SparkContext
-sql: Callable[[str], DataFrame]
-sqlContext: SQLContext
-sqlCtx: SQLContext
-code: Any
diff --git a/python/pyspark/shuffle.pyi b/python/pyspark/shuffle.pyi
deleted file mode 100644
index 10648c51dca8f..0000000000000
--- a/python/pyspark/shuffle.pyi
+++ /dev/null
@@ -1,109 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyspark.serializers import (  # noqa: F401
-    AutoBatchedSerializer as AutoBatchedSerializer,
-    BatchedSerializer as BatchedSerializer,
-    CompressedSerializer as CompressedSerializer,
-    FlattenedValuesSerializer as FlattenedValuesSerializer,
-    PickleSerializer as PickleSerializer,
-)
-from pyspark.util import fail_on_stopiteration as fail_on_stopiteration  # noqa: F401
-from typing import Any, Optional
-
-process: Any
-
-def get_used_memory(): ...
-
-MemoryBytesSpilled: int
-DiskBytesSpilled: int
-
-class Aggregator:
-    createCombiner: Any = ...
-    mergeValue: Any = ...
-    mergeCombiners: Any = ...
-    def __init__(
-        self, createCombiner: Any, mergeValue: Any, mergeCombiners: Any
-    ) -> None: ...
-
-class SimpleAggregator(Aggregator):
-    def __init__(self, combiner: Any): ...
-
-class Merger:
-    agg: Any = ...
-    def __init__(self, aggregator: Any) -> None: ...
-    def mergeValues(self, iterator: Any) -> None: ...
-    def mergeCombiners(self, iterator: Any) -> None: ...
-    def items(self) -> None: ...
-
-class ExternalMerger(Merger):
-    MAX_TOTAL_PARTITIONS: int = ...
-    memory_limit: Any = ...
-    serializer: Any = ...
-    localdirs: Any = ...
-    partitions: Any = ...
-    batch: Any = ...
-    scale: Any = ...
-    data: Any = ...
-    pdata: Any = ...
-    spills: int = ...
-    def __init__(
-        self,
-        aggregator: Any,
-        memory_limit: int = ...,
-        serializer: Optional[Any] = ...,
-        localdirs: Optional[Any] = ...,
-        scale: int = ...,
-        partitions: int = ...,
-        batch: int = ...,
-    ) -> None: ...
-    def mergeValues(self, iterator: Any) -> None: ...
-    def mergeCombiners(self, iterator: Any, limit: Optional[Any] = ...) -> None: ...
-    def items(self): ...
-
-class ExternalSorter:
-    memory_limit: Any = ...
-    local_dirs: Any = ...
-    serializer: Any = ...
-    def __init__(self, memory_limit: Any, serializer: Optional[Any] = ...) -> None: ...
-    def sorted(self, iterator: Any, key: Optional[Any] = ..., reverse: bool = ...): ...
-
-class ExternalList:
-    LIMIT: int = ...
-    values: Any = ...
-    count: Any = ...
-    def __init__(self, values: Any) -> None: ...
-    def __iter__(self) -> Any: ...
-    def __len__(self): ...
-    def append(self, value: Any) -> None: ...
-    def __del__(self) -> None: ...
-
-class ExternalListOfList(ExternalList):
-    count: Any = ...
-    def __init__(self, values: Any) -> None: ...
-    def append(self, value: Any) -> None: ...
-    def __iter__(self) -> Any: ...
-
-class GroupByKey:
-    iterator: Any = ...
-    def __init__(self, iterator: Any) -> None: ...
-    def __iter__(self) -> Any: ...
-
-class ExternalGroupBy(ExternalMerger):
-    SORT_KEY_LIMIT: int = ...
-    def flattened_serializer(self): ...
diff --git a/python/pyspark/sql/avro/__init__.pyi b/python/pyspark/sql/avro/__init__.pyi
deleted file mode 100644
index 0d7871da4c100..0000000000000
--- a/python/pyspark/sql/avro/__init__.pyi
+++ /dev/null
@@ -1,22 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# NOTE: This dynamically typed stub was automatically generated by stubgen.
-
-# Names in __all__ with no definition:
-#   functions
diff --git a/python/pyspark/sql/pandas/__init__.pyi b/python/pyspark/sql/pandas/__init__.pyi
deleted file mode 100644
index 217e5db960782..0000000000000
--- a/python/pyspark/sql/pandas/__init__.pyi
+++ /dev/null
@@ -1,17 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
diff --git a/python/pyspark/sql/pandas/serializers.pyi b/python/pyspark/sql/pandas/serializers.pyi
deleted file mode 100644
index 8be3c0dcbc9ad..0000000000000
--- a/python/pyspark/sql/pandas/serializers.pyi
+++ /dev/null
@@ -1,65 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyspark.serializers import (  # noqa: F401
-    Serializer as Serializer,
-    UTF8Deserializer as UTF8Deserializer,
-    read_int as read_int,
-    write_int as write_int,
-)
-from typing import Any
-
-class SpecialLengths:
-    END_OF_DATA_SECTION: int = ...
-    PYTHON_EXCEPTION_THROWN: int = ...
-    TIMING_DATA: int = ...
-    END_OF_STREAM: int = ...
-    NULL: int = ...
-    START_ARROW_STREAM: int = ...
-
-class ArrowCollectSerializer(Serializer):
-    serializer: Any = ...
-    def __init__(self) -> None: ...
-    def dump_stream(self, iterator: Any, stream: Any): ...
-    def load_stream(self, stream: Any) -> None: ...
-
-class ArrowStreamSerializer(Serializer):
-    def dump_stream(self, iterator: Any, stream: Any) -> None: ...
-    def load_stream(self, stream: Any) -> None: ...
-
-class ArrowStreamPandasSerializer(ArrowStreamSerializer):
-    def __init__(
-        self, timezone: Any, safecheck: Any, assign_cols_by_name: Any
-    ) -> None: ...
-    def arrow_to_pandas(self, arrow_column: Any): ...
-    def dump_stream(self, iterator: Any, stream: Any) -> None: ...
-    def load_stream(self, stream: Any) -> None: ...
-
-class ArrowStreamPandasUDFSerializer(ArrowStreamPandasSerializer):
-    def __init__(
-        self,
-        timezone: Any,
-        safecheck: Any,
-        assign_cols_by_name: Any,
-        df_for_struct: bool = ...,
-    ) -> None: ...
-    def arrow_to_pandas(self, arrow_column: Any): ...
-    def dump_stream(self, iterator: Any, stream: Any): ...
-
-class CogroupUDFSerializer(ArrowStreamPandasUDFSerializer):
-    def load_stream(self, stream: Any) -> None: ...
diff --git a/python/pyspark/sql/pandas/typehints.pyi b/python/pyspark/sql/pandas/typehints.pyi
deleted file mode 100644
index eea9c86225332..0000000000000
--- a/python/pyspark/sql/pandas/typehints.pyi
+++ /dev/null
@@ -1,33 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyspark.sql.pandas.utils import (  # noqa: F401
-    require_minimum_pandas_version as require_minimum_pandas_version,
-)
-from typing import Any, Optional
-
-def infer_eval_type(sig: Any): ...
-def check_tuple_annotation(
-    annotation: Any, parameter_check_func: Optional[Any] = ...
-): ...
-def check_iterator_annotation(
-    annotation: Any, parameter_check_func: Optional[Any] = ...
-): ...
-def check_union_annotation(
-    annotation: Any, parameter_check_func: Optional[Any] = ...
-): ...
diff --git a/python/pyspark/sql/pandas/types.pyi b/python/pyspark/sql/pandas/types.pyi
deleted file mode 100644
index 5ae29bd273180..0000000000000
--- a/python/pyspark/sql/pandas/types.pyi
+++ /dev/null
@@ -1,41 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyspark.sql.types import (  # noqa: F401
-    ArrayType as ArrayType,
-    BinaryType as BinaryType,
-    BooleanType as BooleanType,
-    ByteType as ByteType,
-    DateType as DateType,
-    DecimalType as DecimalType,
-    DoubleType as DoubleType,
-    FloatType as FloatType,
-    IntegerType as IntegerType,
-    LongType as LongType,
-    ShortType as ShortType,
-    StringType as StringType,
-    StructField as StructField,
-    StructType as StructType,
-    TimestampType as TimestampType,
-)
-from typing import Any
-
-def to_arrow_type(dt: Any): ...
-def to_arrow_schema(schema: Any): ...
-def from_arrow_type(at: Any): ...
-def from_arrow_schema(arrow_schema: Any): ...
diff --git a/python/pyspark/sql/pandas/utils.pyi b/python/pyspark/sql/pandas/utils.pyi
deleted file mode 100644
index e4d315b0ce205..0000000000000
--- a/python/pyspark/sql/pandas/utils.pyi
+++ /dev/null
@@ -1,20 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-def require_minimum_pandas_version() -> None: ...
-def require_minimum_pyarrow_version() -> None: ...
diff --git a/python/pyspark/sql/utils.pyi b/python/pyspark/sql/utils.pyi
deleted file mode 100644
index c11e4bed54e7f..0000000000000
--- a/python/pyspark/sql/utils.pyi
+++ /dev/null
@@ -1,55 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# NOTE: This dynamically typed stub was automatically generated by stubgen.
-
-from pyspark import SparkContext as SparkContext  # noqa: F401
-from typing import Any, Optional
-
-class CapturedException(Exception):
-    desc: Any = ...
-    stackTrace: Any = ...
-    cause: Any = ...
-    def __init__(
-        self, desc: Any, stackTrace: Any, cause: Optional[Any] = ...
-    ) -> None: ...
-
-class AnalysisException(CapturedException): ...
-class ParseException(CapturedException): ...
-class IllegalArgumentException(CapturedException): ...
-class StreamingQueryException(CapturedException): ...
-class QueryExecutionException(CapturedException): ...
-class PythonException(CapturedException): ...
-class UnknownException(CapturedException): ...
-
-def convert_exception(e: Any): ...
-def capture_sql_exception(f: Any): ...
-def install_exception_handler() -> None: ...
-def toJArray(gateway: Any, jtype: Any, arr: Any): ...
-def require_test_compiled() -> None: ...
-
-class ForeachBatchFunction:
-    sql_ctx: Any = ...
-    func: Any = ...
-    def __init__(self, sql_ctx: Any, func: Any) -> None: ...
-    error: Any = ...
-    def call(self, jdf: Any, batch_id: Any) -> None: ...
-    class Java:
-        implements: Any = ...
-
-def to_str(value: Any): ...
diff --git a/python/pyspark/streaming/__init__.pyi b/python/pyspark/streaming/__init__.pyi
deleted file mode 100644
index 281c06e51cc60..0000000000000
--- a/python/pyspark/streaming/__init__.pyi
+++ /dev/null
@@ -1,23 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyspark.streaming.context import StreamingContext as StreamingContext  # noqa: F401
-from pyspark.streaming.dstream import DStream as DStream  # noqa: F401
-from pyspark.streaming.listener import (  # noqa: F401
-    StreamingListener as StreamingListener,
-)
diff --git a/python/pyspark/streaming/util.pyi b/python/pyspark/streaming/util.pyi
deleted file mode 100644
index d552eb15f4818..0000000000000
--- a/python/pyspark/streaming/util.pyi
+++ /dev/null
@@ -1,48 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# NOTE: This dynamically typed stub was automatically generated by stubgen.
-
-from typing import Any, Optional
-
-class TransformFunction:
-    ctx: Any
-    func: Any
-    deserializers: Any
-    rdd_wrap_func: Any
-    failure: Any
-    def __init__(self, ctx, func, *deserializers) -> None: ...
-    def rdd_wrapper(self, func): ...
-    def call(self, milliseconds, jrdds): ...
-    def getLastFailure(self): ...
-    class Java:
-        implements: Any
-
-class TransformFunctionSerializer:
-    ctx: Any
-    serializer: Any
-    gateway: Any
-    failure: Any
-    def __init__(self, ctx, serializer, gateway: Optional[Any] = ...) -> None: ...
-    def dumps(self, id): ...
-    def loads(self, data): ...
-    def getLastFailure(self): ...
-    class Java:
-        implements: Any
-
-def rddToFileName(prefix, suffix, timestamp): ...
diff --git a/python/pyspark/traceback_utils.pyi b/python/pyspark/traceback_utils.pyi
deleted file mode 100644
index 33b1b7dc3227f..0000000000000
--- a/python/pyspark/traceback_utils.pyi
+++ /dev/null
@@ -1,29 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from collections import namedtuple
-from typing import Any
-
-CallSite = namedtuple("CallSite", "function file linenum")
-
-def first_spark_call(): ...
-
-class SCCallSiteSync:
-    def __init__(self, sc: Any) -> None: ...
-    def __enter__(self) -> None: ...
-    def __exit__(self, type: Any, value: Any, tb: Any) -> None: ...
diff --git a/python/pyspark/util.py b/python/pyspark/util.py
index 86e5ab5a01585..d2ca484e8ace6 100644
--- a/python/pyspark/util.py
+++ b/python/pyspark/util.py
@@ -23,7 +23,7 @@
 
 from py4j.clientserver import ClientServer
 
-__all__ = []
+__all__ = []  # type: ignore
 
 
 def print_exec(stream):
diff --git a/python/pyspark/util.pyi b/python/pyspark/util.pyi
deleted file mode 100644
index 023b409831459..0000000000000
--- a/python/pyspark/util.pyi
+++ /dev/null
@@ -1,35 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-from typing import Any, Tuple
-from pyspark._typing import F
-
-import threading
-
-def print_exec(stream: Any) -> None: ...
-
-class VersionUtils:
-    @staticmethod
-    def majorMinorVersion(sparkVersion: str) -> Tuple[int, int]: ...
-
-def fail_on_stopiteration(f: F) -> F: ...
-
-class InheritableThread(threading.Thread):
-    def __init__(self, target: Any, *args: Any, **kwargs: Any): ...
-    def __del__(self) -> None: ...
diff --git a/python/pyspark/worker.pyi b/python/pyspark/worker.pyi
deleted file mode 100644
index cc264823cc867..0000000000000
--- a/python/pyspark/worker.pyi
+++ /dev/null
@@ -1,73 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from pyspark import shuffle as shuffle
-from pyspark.broadcast import Broadcast as Broadcast
-from pyspark.files import SparkFiles as SparkFiles
-from pyspark.java_gateway import local_connect_and_auth as local_connect_and_auth
-from pyspark.rdd import PythonEvalType as PythonEvalType
-from pyspark.resource import ResourceInformation as ResourceInformation
-from pyspark.serializers import (
-    BatchedSerializer as BatchedSerializer,
-    PickleSerializer as PickleSerializer,
-    SpecialLengths as SpecialLengths,
-    UTF8Deserializer as UTF8Deserializer,
-    read_bool as read_bool,
-    read_int as read_int,
-    read_long as read_long,
-    write_int as write_int,
-    write_long as write_long,
-    write_with_length as write_with_length,
-)
-from pyspark.sql.pandas.serializers import (
-    ArrowStreamPandasUDFSerializer as ArrowStreamPandasUDFSerializer,
-    CogroupUDFSerializer as CogroupUDFSerializer,
-)
-from pyspark.sql.pandas.types import to_arrow_type as to_arrow_type
-from pyspark.sql.types import StructType as StructType
-from pyspark.taskcontext import (
-    BarrierTaskContext as BarrierTaskContext,
-    TaskContext as TaskContext,
-)
-from pyspark.util import fail_on_stopiteration as fail_on_stopiteration
-from typing import Any
-
-has_resource_module: bool
-pickleSer: Any
-utf8_deserializer: Any
-
-def report_times(outfile: Any, boot: Any, init: Any, finish: Any) -> None: ...
-def add_path(path: Any) -> None: ...
-def read_command(serializer: Any, file: Any): ...
-def chain(f: Any, g: Any): ...
-def wrap_udf(f: Any, return_type: Any): ...
-def wrap_scalar_pandas_udf(f: Any, return_type: Any): ...
-def wrap_pandas_iter_udf(f: Any, return_type: Any): ...
-def wrap_cogrouped_map_pandas_udf(f: Any, return_type: Any, argspec: Any): ...
-def wrap_grouped_map_pandas_udf(f: Any, return_type: Any, argspec: Any): ...
-def wrap_grouped_agg_pandas_udf(f: Any, return_type: Any): ...
-def wrap_window_agg_pandas_udf(
-    f: Any, return_type: Any, runner_conf: Any, udf_index: Any
-): ...
-def wrap_unbounded_window_agg_pandas_udf(f: Any, return_type: Any): ...
-def wrap_bounded_window_agg_pandas_udf(f: Any, return_type: Any): ...
-def read_single_udf(
-    pickleSer: Any, infile: Any, eval_type: Any, runner_conf: Any, udf_index: Any
-): ...
-def read_udfs(pickleSer: Any, infile: Any, eval_type: Any): ...
-def main(infile: Any, outfile: Any) -> None: ...

From 94d648dff5f24b4dea3873fd8e6609b1a099d0a2 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Wed, 7 Oct 2020 20:16:40 +0900
Subject: [PATCH 0186/1009] [SPARK-33036][SQL] Refactor
 RewriteCorrelatedScalarSubquery code to replace exprIds in a bottom-up manner

### What changes were proposed in this pull request?

This PR intends to refactor code in `RewriteCorrelatedScalarSubquery` for replacing `ExprId`s in a bottom-up manner instead of doing in a top-down one.

This PR comes from the talk with cloud-fan in https://github.com/apache/spark/pull/29585#discussion_r490371252.

### Why are the changes needed?

To improve code.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #29913 from maropu/RefactorRewriteCorrelatedScalarSubquery.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../sql/catalyst/optimizer/subquery.scala     | 80 ++++++++++++-------
 1 file changed, 51 insertions(+), 29 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
index a168dcd7a83f5..f184253ef0595 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
@@ -338,20 +338,15 @@ object PullupCorrelatedPredicates extends Rule[LogicalPlan] with PredicateHelper
 object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] {
   /**
    * Extract all correlated scalar subqueries from an expression. The subqueries are collected using
-   * the given collector. To avoid the reuse of `exprId`s, this method generates new `exprId`
-   * for the subqueries and rewrite references in the given `expression`.
-   * This method returns extracted subqueries and the corresponding `exprId`s and these values
-   * will be used later in `constructLeftJoins` for building the child plan that
-   * returns subquery output with the `exprId`s.
+   * the given collector. The expression is rewritten and returned.
    */
   private def extractCorrelatedScalarSubqueries[E <: Expression](
       expression: E,
-      subqueries: ArrayBuffer[(ScalarSubquery, ExprId)]): E = {
+      subqueries: ArrayBuffer[ScalarSubquery]): E = {
     val newExpression = expression transform {
       case s: ScalarSubquery if s.children.nonEmpty =>
-        val newExprId = NamedExpression.newExprId
-        subqueries += s -> newExprId
-        s.plan.output.head.withExprId(newExprId)
+        subqueries += s
+        s.plan.output.head
     }
     newExpression.asInstanceOf[E]
   }
@@ -512,19 +507,23 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] {
 
   /**
    * Construct a new child plan by left joining the given subqueries to a base plan.
+   * This method returns the child plan and an attribute mapping
+   * for the updated `ExprId`s of subqueries. If the non-empty mapping returned,
+   * this rule will rewrite subquery references in a parent plan based on it.
    */
   private def constructLeftJoins(
       child: LogicalPlan,
-      subqueries: ArrayBuffer[(ScalarSubquery, ExprId)]): LogicalPlan = {
-    subqueries.foldLeft(child) {
-      case (currentChild, (ScalarSubquery(query, conditions, _), newExprId)) =>
+      subqueries: ArrayBuffer[ScalarSubquery]): (LogicalPlan, AttributeMap[Attribute]) = {
+    val subqueryAttrMapping = ArrayBuffer[(Attribute, Attribute)]()
+    val newChild = subqueries.foldLeft(child) {
+      case (currentChild, ScalarSubquery(query, conditions, _)) =>
         val origOutput = query.output.head
 
         val resultWithZeroTups = evalSubqueryOnZeroTups(query)
         if (resultWithZeroTups.isEmpty) {
           // CASE 1: Subquery guaranteed not to have the COUNT bug
           Project(
-            currentChild.output :+ Alias(origOutput, origOutput.name)(exprId = newExprId),
+            currentChild.output :+ origOutput,
             Join(currentChild, query, LeftOuter, conditions.reduceOption(And), JoinHint.NONE))
         } else {
           // Subquery might have the COUNT bug. Add appropriate corrections.
@@ -544,12 +543,13 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] {
 
           if (havingNode.isEmpty) {
             // CASE 2: Subquery with no HAVING clause
+            val subqueryResultExpr =
+              Alias(If(IsNull(alwaysTrueRef),
+                resultWithZeroTups.get,
+                aggValRef), origOutput.name)()
+            subqueryAttrMapping += ((origOutput, subqueryResultExpr.toAttribute))
             Project(
-              currentChild.output :+
-                Alias(
-                  If(IsNull(alwaysTrueRef),
-                    resultWithZeroTups.get,
-                    aggValRef), origOutput.name)(exprId = newExprId),
+              currentChild.output :+ subqueryResultExpr,
               Join(currentChild,
                 Project(query.output :+ alwaysTrueExpr, query),
                 LeftOuter, conditions.reduceOption(And), JoinHint.NONE))
@@ -576,7 +576,9 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] {
               (IsNull(alwaysTrueRef), resultWithZeroTups.get),
               (Not(havingNode.get.condition), Literal.create(null, aggValRef.dataType))),
               aggValRef),
-              origOutput.name)(exprId = newExprId)
+              origOutput.name)()
+
+            subqueryAttrMapping += ((origOutput, caseExpr.toAttribute))
 
             Project(
               currentChild.output :+ caseExpr,
@@ -587,6 +589,20 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] {
           }
         }
     }
+    (newChild, AttributeMap(subqueryAttrMapping.toSeq))
+  }
+
+  private def updateAttrs[E <: Expression](
+      exprs: Seq[E],
+      attrMap: AttributeMap[Attribute]): Seq[E] = {
+    if (attrMap.nonEmpty) {
+      val newExprs = exprs.map { _.transform {
+        case a: AttributeReference => attrMap.getOrElse(a, a)
+      }}
+      newExprs.asInstanceOf[Seq[E]]
+    } else {
+      exprs
+    }
   }
 
   /**
@@ -595,36 +611,42 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] {
    */
   def apply(plan: LogicalPlan): LogicalPlan = plan transformUpWithNewOutput {
     case a @ Aggregate(grouping, expressions, child) =>
-      val subqueries = ArrayBuffer.empty[(ScalarSubquery, ExprId)]
-      val newExpressions = expressions.map(extractCorrelatedScalarSubqueries(_, subqueries))
+      val subqueries = ArrayBuffer.empty[ScalarSubquery]
+      val rewriteExprs = expressions.map(extractCorrelatedScalarSubqueries(_, subqueries))
       if (subqueries.nonEmpty) {
         // We currently only allow correlated subqueries in an aggregate if they are part of the
         // grouping expressions. As a result we need to replace all the scalar subqueries in the
         // grouping expressions by their result.
         val newGrouping = grouping.map { e =>
-          subqueries.find(_._1.semanticEquals(e)).map(_._1.plan.output.head).getOrElse(e)
+          subqueries.find(_.semanticEquals(e)).map(_.plan.output.head).getOrElse(e)
         }
-        val newAgg = Aggregate(newGrouping, newExpressions, constructLeftJoins(child, subqueries))
+        val (newChild, subqueryAttrMapping) = constructLeftJoins(child, subqueries)
+        val newExprs = updateAttrs(rewriteExprs, subqueryAttrMapping)
+        val newAgg = Aggregate(newGrouping, newExprs, newChild)
         val attrMapping = a.output.zip(newAgg.output)
         newAgg -> attrMapping
       } else {
         a -> Nil
       }
     case p @ Project(expressions, child) =>
-      val subqueries = ArrayBuffer.empty[(ScalarSubquery, ExprId)]
-      val newExpressions = expressions.map(extractCorrelatedScalarSubqueries(_, subqueries))
+      val subqueries = ArrayBuffer.empty[ScalarSubquery]
+      val rewriteExprs = expressions.map(extractCorrelatedScalarSubqueries(_, subqueries))
       if (subqueries.nonEmpty) {
-        val newProj = Project(newExpressions, constructLeftJoins(child, subqueries))
+        val (newChild, subqueryAttrMapping) = constructLeftJoins(child, subqueries)
+        val newExprs = updateAttrs(rewriteExprs, subqueryAttrMapping)
+        val newProj = Project(newExprs, newChild)
         val attrMapping = p.output.zip(newProj.output)
         newProj -> attrMapping
       } else {
         p -> Nil
       }
     case f @ Filter(condition, child) =>
-      val subqueries = ArrayBuffer.empty[(ScalarSubquery, ExprId)]
-      val newCondition = extractCorrelatedScalarSubqueries(condition, subqueries)
+      val subqueries = ArrayBuffer.empty[ScalarSubquery]
+      val rewriteCondition = extractCorrelatedScalarSubqueries(condition, subqueries)
       if (subqueries.nonEmpty) {
-        val newProj = Project(f.output, Filter(newCondition, constructLeftJoins(child, subqueries)))
+        val (newChild, subqueryAttrMapping) = constructLeftJoins(child, subqueries)
+        val newCondition = updateAttrs(Seq(rewriteCondition), subqueryAttrMapping).head
+        val newProj = Project(f.output, Filter(newCondition, newChild))
         val attrMapping = f.output.zip(newProj.output)
         newProj -> attrMapping
       } else {

From 3099fd9f9d576c96642c0e66c74797b8882b70bb Mon Sep 17 00:00:00 2001
From: Stijn De Haes <stijndehaes@gmail.com>
Date: Wed, 7 Oct 2020 09:52:00 -0700
Subject: [PATCH 0187/1009] [SPARK-32067][K8S] Use unique ConfigMap name for
 executor pod template

### What changes were proposed in this pull request?

The pod template configmap always had the same name. This PR makes it unique.

### Why are the changes needed?

If you scheduled 2 spark jobs they will both use the same configmap name this will result in conflicts. This PR fixes that

**BEFORE**
```
$ kubectl get cm --all-namespaces -w | grep podspec
podspec-configmap                              1      65s
```

**AFTER**
```
$ kubectl get cm --all-namespaces -w | grep podspec
aaece65ef82e4a30b7b7800aad600d4f   spark-test-app-aac9f37502b2ca55-driver-podspec-conf-map   1      0s
```

This can be seen when running the integration tests

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Unit tests and the integration tests test if this works

Closes #29934 from stijndehaes/bugfix/SPARK-32067-unique-name-for-template-configmap.

Authored-by: Stijn De Haes <stijndehaes@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../scala/org/apache/spark/deploy/k8s/Constants.scala     | 2 +-
 .../deploy/k8s/features/PodTemplateConfigMapStep.scala    | 6 ++++--
 .../k8s/features/PodTemplateConfigMapStepSuite.scala      | 8 +++++---
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala
index c9c5aa606cf55..991205a47f846 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala
@@ -77,7 +77,7 @@ private[spark] object Constants {
   val EXECUTOR_POD_SPEC_TEMPLATE_FILE_NAME = "pod-spec-template.yml"
   val EXECUTOR_POD_SPEC_TEMPLATE_MOUNTPATH = "/opt/spark/pod-template"
   val POD_TEMPLATE_VOLUME = "pod-template-volume"
-  val POD_TEMPLATE_CONFIGMAP = "podspec-configmap"
+  val POD_TEMPLATE_CONFIGMAP = "driver-podspec-conf-map"
   val POD_TEMPLATE_KEY = "podspec-configmap-key"
 
   // Miscellaneous
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStep.scala
index 7f41ca43589b6..1040419a4a6e9 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStep.scala
@@ -31,6 +31,8 @@ private[spark] class PodTemplateConfigMapStep(conf: KubernetesConf)
 
   private val hasTemplate = conf.contains(KUBERNETES_EXECUTOR_PODTEMPLATE_FILE)
 
+  private val configmapName = s"${conf.resourceNamePrefix}-$POD_TEMPLATE_CONFIGMAP"
+
   def configurePod(pod: SparkPod): SparkPod = {
     if (hasTemplate) {
       val podWithVolume = new PodBuilder(pod.pod)
@@ -38,7 +40,7 @@ private[spark] class PodTemplateConfigMapStep(conf: KubernetesConf)
             .addNewVolume()
               .withName(POD_TEMPLATE_VOLUME)
               .withNewConfigMap()
-                .withName(POD_TEMPLATE_CONFIGMAP)
+                .withName(configmapName)
                 .addNewItem()
                   .withKey(POD_TEMPLATE_KEY)
                   .withPath(EXECUTOR_POD_SPEC_TEMPLATE_FILE_NAME)
@@ -76,7 +78,7 @@ private[spark] class PodTemplateConfigMapStep(conf: KubernetesConf)
       val podTemplateString = Files.toString(new File(podTemplateFile), StandardCharsets.UTF_8)
       Seq(new ConfigMapBuilder()
           .withNewMetadata()
-            .withName(POD_TEMPLATE_CONFIGMAP)
+            .withName(configmapName)
           .endMetadata()
           .addToData(POD_TEMPLATE_KEY, podTemplateString)
         .build())
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStepSuite.scala
index 051320fa44c5e..1b38fd6a0d2ab 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStepSuite.scala
@@ -16,13 +16,14 @@
  */
 package org.apache.spark.deploy.k8s.features
 
-import java.io.{File, PrintWriter}
+import java.io.PrintWriter
 import java.nio.file.Files
 
 import io.fabric8.kubernetes.api.model.ConfigMap
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.k8s._
+import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.util.Utils
 
 class PodTemplateConfigMapStepSuite extends SparkFunSuite {
@@ -56,8 +57,9 @@ class PodTemplateConfigMapStepSuite extends SparkFunSuite {
 
     assert(configuredPod.pod.getSpec.getVolumes.size() === 1)
     val volume = configuredPod.pod.getSpec.getVolumes.get(0)
+    val generatedResourceName = s"${kubernetesConf.resourceNamePrefix}-$POD_TEMPLATE_CONFIGMAP"
     assert(volume.getName === Constants.POD_TEMPLATE_VOLUME)
-    assert(volume.getConfigMap.getName === Constants.POD_TEMPLATE_CONFIGMAP)
+    assert(volume.getConfigMap.getName === generatedResourceName)
     assert(volume.getConfigMap.getItems.size() === 1)
     assert(volume.getConfigMap.getItems.get(0).getKey === Constants.POD_TEMPLATE_KEY)
     assert(volume.getConfigMap.getItems.get(0).getPath ===
@@ -70,7 +72,7 @@ class PodTemplateConfigMapStepSuite extends SparkFunSuite {
 
     val resources = step.getAdditionalKubernetesResources()
     assert(resources.size === 1)
-    assert(resources.head.getMetadata.getName === Constants.POD_TEMPLATE_CONFIGMAP)
+    assert(resources.head.getMetadata.getName === generatedResourceName)
     assert(resources.head.isInstanceOf[ConfigMap])
     val configMap = resources.head.asInstanceOf[ConfigMap]
     assert(configMap.getData.size() === 1)

From a127387a53e1a24e76de83c5a1858fcdbd38c3a2 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Wed, 7 Oct 2020 12:27:23 -0700
Subject: [PATCH 0188/1009] [SPARK-33082][SQL] Remove hive-1.2 workaround code

### What changes were proposed in this pull request?

This PR removes old Hive-1.2 profile related workaround code.

### Why are the changes needed?

To simply the code.
### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CI.

Closes #29961 from dongjoon-hyun/SPARK-HIVE12.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../BaseScriptTransformationSuite.scala       | 12 +----
 .../datasources/orc/OrcSourceSuite.scala      |  7 ++-
 .../SparkGetTablesOperation.scala             |  6 +--
 .../thriftserver/SparkSQLCLIService.scala     | 11 +----
 .../HiveThriftServer2Suites.scala             | 18 ++-----
 ...arkThriftServerProtocolVersionsSuite.scala |  8 +---
 .../execution/HiveCompatibilitySuite.scala    |  5 +-
 .../org/apache/spark/sql/hive/HiveShim.scala  | 48 ++++++-------------
 .../org/apache/spark/sql/hive/HiveUtils.scala |  3 +-
 .../sql/hive/client/HiveClientImpl.scala      | 11 ++---
 .../org/apache/spark/sql/hive/hiveUDFs.scala  | 18 ++-----
 .../spark/sql/hive/orc/OrcFilters.scala       | 16 +------
 .../sql/hive/ClasspathDependenciesSuite.scala | 25 +++-------
 .../sql/hive/HiveMetastoreCatalogSuite.scala  | 18 ++-----
 .../apache/spark/sql/hive/HiveShimSuite.scala | 12 +----
 .../spark/sql/hive/StatisticsSuite.scala      | 34 +++----------
 .../HiveScriptTransformationSuite.scala       |  2 -
 .../sql/hive/execution/HiveUDFSuite.scala     |  1 -
 .../sql/hive/execution/SQLQuerySuite.scala    |  1 -
 .../sql/hive/orc/HiveOrcFilterSuite.scala     | 19 +-------
 .../sql/hive/orc/HiveOrcQuerySuite.scala      |  1 -
 .../sql/hive/orc/HiveOrcSourceSuite.scala     | 10 +---
 22 files changed, 60 insertions(+), 226 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
index 02f447bd14339..c07ea0f12f94e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
@@ -63,16 +63,6 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
     uncaughtExceptionHandler.cleanStatus()
   }
 
-  def isHive23OrSpark: Boolean
-
-  // In Hive 1.2, the string representation of a decimal omits trailing zeroes.
-  // But in Hive 2.3, it is always padded to 18 digits with trailing zeroes if necessary.
-  val decimalToString: Column => Column = if (isHive23OrSpark) {
-    c => c.cast("string")
-  } else {
-    c => c.cast("decimal(1, 0)").cast("string")
-  }
-
   def createScriptTransformationExec(
       input: Seq[Expression],
       script: String,
@@ -142,7 +132,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
         'a.cast("string"),
         'b.cast("string"),
         'c.cast("string"),
-        decimalToString('d),
+        'd.cast("string"),
         'e.cast("string")).collect())
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
index b70fd7476ed98..b6f41ab085fe1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
@@ -120,8 +120,7 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
     }
   }
 
-  protected def testSelectiveDictionaryEncoding(isSelective: Boolean,
-      isHive23: Boolean = false): Unit = {
+  protected def testSelectiveDictionaryEncoding(isSelective: Boolean, isHiveOrc: Boolean): Unit = {
     val tableName = "orcTable"
 
     withTempDir { dir =>
@@ -174,7 +173,7 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
           // Hive 0.11 and RLE v2 is introduced in Hive 0.12 ORC with more improvements.
           // For more details, see https://orc.apache.org/specification/
           assert(stripe.getColumns(1).getKind === DICTIONARY_V2)
-          if (isSelective || isHive23) {
+          if (isSelective || isHiveOrc) {
             assert(stripe.getColumns(2).getKind === DIRECT_V2)
           } else {
             assert(stripe.getColumns(2).getKind === DICTIONARY_V2)
@@ -581,7 +580,7 @@ class OrcSourceSuite extends OrcSuite with SharedSparkSession {
   }
 
   test("Enforce direct encoding column-wise selectively") {
-    testSelectiveDictionaryEncoding(isSelective = true)
+    testSelectiveDictionaryEncoding(isSelective = true, isHiveOrc = false)
   }
 
   test("SPARK-11412 read and merge orc schemas in parallel") {
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala
index 0d4b9b392f074..bccad865be27a 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala
@@ -125,10 +125,6 @@ private[hive] class SparkGetTablesOperation(
       tableType,
       comment.getOrElse(""))
     // Since HIVE-7575(Hive 2.0.0), adds 5 additional columns to the ResultSet of GetTables.
-    if (HiveUtils.isHive23) {
-      rowSet.addRow(rowData ++ Array(null, null, null, null, null))
-    } else {
-      rowSet.addRow(rowData)
-    }
+    rowSet.addRow(rowData ++ Array(null, null, null, null, null))
   }
 }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
index 984625c76e057..c39d2ecdd7923 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
@@ -113,17 +113,10 @@ private[hive] class SparkSQLCLIService(hiveServer: HiveServer2, sqlContext: SQLC
 
 private[thriftserver] trait ReflectedCompositeService { this: AbstractService =>
 
-  private val logInfo = (msg: String) => if (HiveUtils.isHive23) {
-    getAncestorField[Logger](this, 3, "LOG").info(msg)
-  } else {
-    getAncestorField[Log](this, 3, "LOG").info(msg)
-  }
+  private val logInfo = (msg: String) => getAncestorField[Logger](this, 3, "LOG").info(msg)
 
-  private val logError = (msg: String, e: Throwable) => if (HiveUtils.isHive23) {
+  private val logError = (msg: String, e: Throwable) =>
     getAncestorField[Logger](this, 3, "LOG").error(msg, e)
-  } else {
-    getAncestorField[Log](this, 3, "LOG").error(msg, e)
-  }
 
   def initCompositeService(hiveConf: HiveConf): Unit = {
     // Emulating `CompositeService.init(hiveConf)`
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
index ad0f97cae3f8e..27d4c4bc40bec 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
@@ -544,11 +544,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
         conf += resultSet.getString(1) -> resultSet.getString(2)
       }
 
-      if (HiveUtils.isHive23) {
-        assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.7"))
-      } else {
-        assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("1.2.1"))
-      }
+      assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.7"))
     }
   }
 
@@ -561,11 +557,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
         conf += resultSet.getString(1) -> resultSet.getString(2)
       }
 
-      if (HiveUtils.isHive23) {
-        assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.7"))
-      } else {
-        assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("1.2.1"))
-      }
+      assert(conf.get(HiveUtils.FAKE_HIVE_VERSION.key) === Some("2.3.7"))
     }
   }
 
@@ -643,11 +635,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
       val sessionHandle = client.openSession(user, "")
       val sessionID = sessionHandle.getSessionId
 
-      if (HiveUtils.isHive23) {
-        assert(pipeoutFileList(sessionID).length == 2)
-      } else {
-        assert(pipeoutFileList(sessionID).length == 1)
-      }
+      assert(pipeoutFileList(sessionID).length == 2)
 
       client.closeSession(sessionHandle)
 
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala
index fa001b11253f5..d5582077d6170 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala
@@ -356,12 +356,8 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest {
         assert(metaData.getColumnName(1) === "NULL")
         assert(metaData.getColumnTypeName(1) === "void")
         assert(metaData.getColumnType(1) === java.sql.Types.NULL)
-        if (HiveUtils.isHive23) {
-          // For Hive 1.2 the o.a.h.j.JdbcColumn.typeStringToHiveType can not recognize `null` as
-          // type name.
-          assert(metaData.getPrecision(1) === 0)
-          assert(metaData.getScale(1) === 0)
-        }
+        assert(metaData.getPrecision(1) === 0)
+        assert(metaData.getScale(1) === 0)
       }
     }
 
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index b7ea0630dd85f..a685549290f0e 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -1145,11 +1145,8 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
    * The set of tests that are believed to be working in catalyst. Tests not on includeList or
    * excludeList are implicitly marked as ignored.
    */
-  override def includeList: Seq[String] = if (HiveUtils.isHive23) {
+  override def includeList: Seq[String] =
     commonIncludeList ++ Seq(
       "decimal_1_1"
     )
-  } else {
-    commonIncludeList
-  }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
index 04a6a8f8aa9a5..1f8ce04270a04 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
@@ -161,46 +161,26 @@ private[hive] object HiveShim {
     }
 
     def deserializePlan[UDFType](is: java.io.InputStream, clazz: Class[_]): UDFType = {
-      if (HiveUtils.isHive23) {
-        val borrowKryo = serUtilClass.getMethod("borrowKryo")
-        val kryo = borrowKryo.invoke(serUtilClass)
-        val deserializeObjectByKryo = findMethod(serUtilClass, deserializeMethodName,
-          kryo.getClass.getSuperclass, classOf[InputStream], classOf[Class[_]])
-        try {
-          deserializeObjectByKryo.invoke(null, kryo, is, clazz).asInstanceOf[UDFType]
-        } finally {
-          serUtilClass.getMethod("releaseKryo", kryo.getClass.getSuperclass).invoke(null, kryo)
-        }
-      } else {
-        val runtimeSerializationKryo = utilClass.getField("runtimeSerializationKryo")
-        val threadLocalValue = runtimeSerializationKryo.get(utilClass)
-        val getMethod = threadLocalValue.getClass.getMethod("get")
-        val kryo = getMethod.invoke(threadLocalValue)
-        val deserializeObjectByKryo = findMethod(utilClass, deserializeMethodName,
-          kryo.getClass, classOf[InputStream], classOf[Class[_]])
+      val borrowKryo = serUtilClass.getMethod("borrowKryo")
+      val kryo = borrowKryo.invoke(serUtilClass)
+      val deserializeObjectByKryo = findMethod(serUtilClass, deserializeMethodName,
+        kryo.getClass.getSuperclass, classOf[InputStream], classOf[Class[_]])
+      try {
         deserializeObjectByKryo.invoke(null, kryo, is, clazz).asInstanceOf[UDFType]
+      } finally {
+        serUtilClass.getMethod("releaseKryo", kryo.getClass.getSuperclass).invoke(null, kryo)
       }
     }
 
     def serializePlan(function: AnyRef, out: java.io.OutputStream): Unit = {
-      if (HiveUtils.isHive23) {
-        val borrowKryo = serUtilClass.getMethod("borrowKryo")
-        val kryo = borrowKryo.invoke(serUtilClass)
-        val serializeObjectByKryo = findMethod(serUtilClass, serializeMethodName,
-          kryo.getClass.getSuperclass, classOf[Object], classOf[OutputStream])
-        try {
-          serializeObjectByKryo.invoke(null, kryo, function, out)
-        } finally {
-          serUtilClass.getMethod("releaseKryo", kryo.getClass.getSuperclass).invoke(null, kryo)
-        }
-      } else {
-        val runtimeSerializationKryo = utilClass.getField("runtimeSerializationKryo")
-        val threadLocalValue = runtimeSerializationKryo.get(utilClass)
-        val getMethod = threadLocalValue.getClass.getMethod("get")
-        val kryo = getMethod.invoke(threadLocalValue)
-        val serializeObjectByKryo = findMethod(utilClass, serializeMethodName,
-          kryo.getClass, classOf[Object], classOf[OutputStream])
+      val borrowKryo = serUtilClass.getMethod("borrowKryo")
+      val kryo = borrowKryo.invoke(serUtilClass)
+      val serializeObjectByKryo = findMethod(serUtilClass, serializeMethodName,
+        kryo.getClass.getSuperclass, classOf[Object], classOf[OutputStream])
+      try {
         serializeObjectByKryo.invoke(null, kryo, function, out)
+      } finally {
+        serUtilClass.getMethod("releaseKryo", kryo.getClass.getSuperclass).invoke(null, kryo)
       }
     }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index 62ff2db2ecb3c..7d4bf7305546c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -56,10 +56,9 @@ private[spark] object HiveUtils extends Logging {
   }
 
   private val hiveVersion = HiveVersionInfo.getVersion
-  val isHive23: Boolean = hiveVersion.startsWith("2.3")
 
   /** The version of hive used internally by Spark SQL. */
-  val builtinHiveVersion: String = if (isHive23) hiveVersion else "1.2.1"
+  val builtinHiveVersion: String = hiveVersion
 
   val HIVE_METASTORE_VERSION = buildStaticConf("spark.sql.hive.metastore.version")
     .doc("Version of the Hive metastore. Available options are " +
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 1611a3da8a3da..a78e1cebc588c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -169,9 +169,7 @@ private[hive] class HiveClientImpl(
     // since HIVE-11878, and ADDJarCommand will add jars to clientLoader.classLoader.
     // For this reason we cannot load the jars added by ADDJarCommand because of class loader
     // got changed. We reset it to clientLoader.ClassLoader here.
-    if (HiveUtils.isHive23) {
-      state.getConf.setClassLoader(clientLoader.classLoader)
-    }
+    state.getConf.setClassLoader(clientLoader.classLoader)
     SessionState.start(state)
     state.out = new PrintStream(outputBuffer, true, UTF_8.name())
     state.err = new PrintStream(outputBuffer, true, UTF_8.name())
@@ -179,9 +177,7 @@ private[hive] class HiveClientImpl(
   }
 
   /** Returns the configuration for the current session. */
-  def conf: HiveConf = if (!HiveUtils.isHive23) {
-    state.getConf
-  } else {
+  def conf: HiveConf = {
     val hiveConf = state.getConf
     // Hive changed the default of datanucleus.schema.autoCreateAll from true to false
     // and hive.metastore.schema.verification from false to true since Hive 2.0.
@@ -293,8 +289,7 @@ private[hive] class HiveClientImpl(
     val ret = try {
       f
     } catch {
-      case e: NoClassDefFoundError
-        if HiveUtils.isHive23 && e.getMessage.contains("org/apache/hadoop/hive/serde2/SerDe") =>
+      case e: NoClassDefFoundError if e.getMessage.contains("apache/hadoop/hive/serde2/SerDe") =>
         throw new ClassNotFoundException("The SerDe interface removed since Hive 2.3(HIVE-15167)." +
           " Please migrate your custom SerDes to Hive 2.3. See HIVE-15167 for more details.", e)
     } finally {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
index 462e67c4ed35c..7fccb72fb913b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -350,19 +350,11 @@ private[hive] case class HiveUDAFFunction(
     }
 
     val clazz = Utils.classForName(classOf[SimpleGenericUDAFParameterInfo].getName)
-    if (HiveUtils.isHive23) {
-      val ctor = clazz.getDeclaredConstructor(
-        classOf[Array[ObjectInspector]], JBoolean.TYPE, JBoolean.TYPE, JBoolean.TYPE)
-      val args = Array[AnyRef](inputInspectors, JBoolean.FALSE, JBoolean.FALSE, JBoolean.FALSE)
-      val parameterInfo = ctor.newInstance(args: _*).asInstanceOf[SimpleGenericUDAFParameterInfo]
-      resolver.getEvaluator(parameterInfo)
-    } else {
-      val ctor = clazz.getDeclaredConstructor(
-        classOf[Array[ObjectInspector]], JBoolean.TYPE, JBoolean.TYPE)
-      val args = Array[AnyRef](inputInspectors, JBoolean.FALSE, JBoolean.FALSE)
-      val parameterInfo = ctor.newInstance(args: _*).asInstanceOf[SimpleGenericUDAFParameterInfo]
-      resolver.getEvaluator(parameterInfo)
-    }
+    val ctor = clazz.getDeclaredConstructor(
+      classOf[Array[ObjectInspector]], JBoolean.TYPE, JBoolean.TYPE, JBoolean.TYPE)
+    val args = Array[AnyRef](inputInspectors, JBoolean.FALSE, JBoolean.FALSE, JBoolean.FALSE)
+    val parameterInfo = ctor.newInstance(args: _*).asInstanceOf[SimpleGenericUDAFParameterInfo]
+    resolver.getEvaluator(parameterInfo)
   }
 
   private case class HiveEvaluator(
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala
index f9c514567c639..ea5c7ca15b065 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala
@@ -71,21 +71,7 @@ private[orc] object OrcFilters extends Logging {
   }
 
   def createFilter(schema: StructType, filters: Array[Filter]): Option[SearchArgument] = {
-    if (HiveUtils.isHive23) {
-      DatasourceOrcFilters.createFilter(schema, filters).asInstanceOf[Option[SearchArgument]]
-    } else {
-      val dataTypeMap = schema.map(f => quoteIfNeeded(f.name) -> f.dataType).toMap
-      // TODO (SPARK-25557): ORC doesn't support nested predicate pushdown, so they are removed.
-      val newFilters = filters.filter(!_.containsNestedColumn)
-      // Combines all convertible filters using `And` to produce a single conjunction
-      val conjunctionOptional = buildTree(convertibleFilters(schema, dataTypeMap, newFilters))
-      conjunctionOptional.map { conjunction =>
-        // Then tries to build a single ORC `SearchArgument` for the conjunction predicate.
-        // The input predicate is fully convertible. There should not be any empty result in the
-        // following recursive method call `buildSearchArgument`.
-        buildSearchArgument(dataTypeMap, conjunction, newBuilder).build()
-      }
-    }
+    DatasourceOrcFilters.createFilter(schema, filters).asInstanceOf[Option[SearchArgument]]
   }
 
   def convertibleFilters(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala
index a696d6aaff27b..c136c4c9790fd 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala
@@ -57,20 +57,12 @@ class ClasspathDependenciesSuite extends SparkFunSuite {
     }
   }
 
-  test("shaded Protobuf") {
-    if (HiveUtils.isHive23) {
-      assertLoads("com.google.protobuf.ServiceException")
-    } else {
-      assertLoads("org.apache.hive.com.google.protobuf.ServiceException")
-    }
+  test("protobuf") {
+    assertLoads("com.google.protobuf.ServiceException")
   }
 
-  test("shaded Kryo") {
-    if (HiveUtils.isHive23) {
-      assertLoads("com.esotericsoftware.kryo.Kryo")
-    } else {
-      assertLoads("org.apache.hive.com.esotericsoftware.kryo.Kryo")
-    }
+  test("kryo") {
+    assertLoads("com.esotericsoftware.kryo.Kryo")
   }
 
   test("hive-common") {
@@ -89,12 +81,7 @@ class ClasspathDependenciesSuite extends SparkFunSuite {
   }
 
   test("parquet-hadoop-bundle") {
-    if (HiveUtils.isHive23) {
-      assertLoads("org.apache.parquet.hadoop.ParquetOutputFormat")
-      assertLoads("org.apache.parquet.hadoop.ParquetInputFormat")
-    } else {
-      assertLoads("parquet.hadoop.ParquetOutputFormat")
-      assertLoads("parquet.hadoop.ParquetInputFormat")
-    }
+    assertLoads("org.apache.parquet.hadoop.ParquetOutputFormat")
+    assertLoads("org.apache.parquet.hadoop.ParquetInputFormat")
   }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
index 95e99c653d6f6..8f71ba3337aa2 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
@@ -206,13 +206,8 @@ class DataSourceWithHiveMetastoreCatalogSuite
         assert(columns.map(_.dataType) === Seq(DecimalType(10, 3), StringType))
 
         checkAnswer(table("t"), testDF)
-        if (HiveUtils.isHive23) {
-          assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") ===
-            Seq("1.100\t1", "2.100\t2"))
-        } else {
-          assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") ===
-            Seq("1.1\t1", "2.1\t2"))
-        }
+        assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") ===
+          Seq("1.100\t1", "2.100\t2"))
       }
     }
 
@@ -244,13 +239,8 @@ class DataSourceWithHiveMetastoreCatalogSuite
           assert(columns.map(_.dataType) === Seq(DecimalType(10, 3), StringType))
 
           checkAnswer(table("t"), testDF)
-          if (HiveUtils.isHive23) {
-            assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") ===
-              Seq("1.100\t1", "2.100\t2"))
-          } else {
-            assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") ===
-              Seq("1.1\t1", "2.1\t2"))
-          }
+          assert(sparkSession.metadataHive.runSqlHive("SELECT * FROM t") ===
+            Seq("1.100\t1", "2.100\t2"))
         }
       }
     }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala
index 14d07cdf8db08..54c64a4eeb190 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala
@@ -35,18 +35,10 @@ class HiveShimSuite extends SparkFunSuite {
 
     // test when READ_COLUMN_NAMES_CONF_STR is empty
     HiveShim.appendReadColumns(conf, ids, names)
-    if (HiveUtils.isHive23) {
-      assert(names === ColumnProjectionUtils.getReadColumnNames(conf))
-    } else {
-      assert(names.asJava === ColumnProjectionUtils.getReadColumnNames(conf))
-    }
+    assert(names === ColumnProjectionUtils.getReadColumnNames(conf))
 
     // test when READ_COLUMN_NAMES_CONF_STR is non-empty
     HiveShim.appendReadColumns(conf, moreIds, moreNames)
-    if (HiveUtils.isHive23) {
-      assert((names ++ moreNames) === ColumnProjectionUtils.getReadColumnNames(conf))
-    } else {
-      assert((names ++ moreNames).asJava === ColumnProjectionUtils.getReadColumnNames(conf))
-    }
+    assert((names ++ moreNames) === ColumnProjectionUtils.getReadColumnNames(conf))
   }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index 1f3878ad2925d..52dd2b34a0e95 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -101,14 +101,9 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
             .asInstanceOf[HiveTableRelation]
 
           val properties = relation.tableMeta.ignoredProperties
-          if (HiveUtils.isHive23) {
-            // Since HIVE-6727, Hive fixes table-level stats for external tables are incorrect.
-            assert(properties("totalSize").toLong == 6)
-            assert(properties.get("rawDataSize").isEmpty)
-          } else {
-            assert(properties("totalSize").toLong <= 0, "external table totalSize must be <= 0")
-            assert(properties("rawDataSize").toLong <= 0, "external table rawDataSize must be <= 0")
-          }
+          // Since HIVE-6727, Hive fixes table-level stats for external tables are incorrect.
+          assert(properties("totalSize").toLong == 6)
+          assert(properties.get("rawDataSize").isEmpty)
 
           val sizeInBytes = relation.stats.sizeInBytes
           assert(sizeInBytes === BigInt(file1.length() + file2.length()))
@@ -872,25 +867,10 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
         assert(totalSize.isDefined && totalSize.get > 0, "totalSize is lost")
 
         val numRows = extractStatsPropValues(describeResult, "numRows")
-        if (HiveUtils.isHive23) {
-          // Since HIVE-15653(Hive 2.3.0), Hive fixs some ALTER TABLE commands drop table stats.
-          assert(numRows.isDefined && numRows.get == 500)
-          val rawDataSize = extractStatsPropValues(describeResult, "rawDataSize")
-          assert(rawDataSize.isDefined && rawDataSize.get == 5312)
-          checkTableStats(tabName, hasSizeInBytes = true, expectedRowCounts = Some(500))
-        } else {
-          // ALTER TABLE SET/UNSET TBLPROPERTIES invalidates some Hive specific statistics, but not
-          // Spark specific statistics. This is triggered by the Hive alterTable API.
-          assert(numRows.isDefined && numRows.get == -1, "numRows is lost")
-          val rawDataSize = extractStatsPropValues(describeResult, "rawDataSize")
-          assert(rawDataSize.isDefined && rawDataSize.get == -1, "rawDataSize is lost")
-
-          if (analyzedBySpark) {
-            checkTableStats(tabName, hasSizeInBytes = true, expectedRowCounts = Some(500))
-          } else {
-            checkTableStats(tabName, hasSizeInBytes = true, expectedRowCounts = None)
-          }
-        }
+        assert(numRows.isDefined && numRows.get == 500)
+        val rawDataSize = extractStatsPropValues(describeResult, "rawDataSize")
+        assert(rawDataSize.isDefined && rawDataSize.get == 5312)
+        checkTableStats(tabName, hasSizeInBytes = true, expectedRowCounts = Some(500))
       }
     }
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
index bb87246acf4ca..d247f37130776 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
@@ -38,8 +38,6 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
 
   import ScriptTransformationIOSchema._
 
-  override def isHive23OrSpark: Boolean = HiveUtils.isHive23
-
   override def createScriptTransformationExec(
       input: Seq[Expression],
       script: String,
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
index f5cd4f9f843d8..dd797b39e0939 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
@@ -660,7 +660,6 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
   }
 
   test("SPARK-32877: add test for Hive UDF complex decimal type") {
-    assume(HiveUtils.isHive23)
     withUserDefinedFunction("testArraySum" -> false) {
       sql(s"CREATE FUNCTION testArraySum AS '${classOf[ArraySumUDF].getName}'")
       checkAnswer(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 96bca5404831d..a69a949e3a3a2 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -2242,7 +2242,6 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
   }
 
   test("SPARK-32889: ORC table column name supports special characters") {
-    assume(HiveUtils.isHive23)
     // " " "," is not allowed.
     Seq("$", ";", "{", "}", "(", ")", "\n", "\t", "=").foreach { name =>
       val source = "ORC"
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala
index 5fc41067f661d..deb85f30463ae 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala
@@ -81,21 +81,13 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton {
       (predicate: Predicate, stringExpr: String)
       (implicit df: DataFrame): Unit = {
     def checkLogicalOperator(filter: SearchArgument) = {
-      if (HiveUtils.isHive23) {
-        assert(filter.toString == stringExpr.replace("\n", ", "))
-      } else {
-        assert(filter.toString == stringExpr)
-      }
+      assert(filter.toString == stringExpr.replace("\n", ", "))
     }
     checkFilterPredicate(df, predicate, checkLogicalOperator)
   }
 
   private def assertResultWithDiffHiveVersion(expected : String)(c : scala.Any) = {
-    if (HiveUtils.isHive23) {
-      assertResult(expected.replace("\n", ", "))(c)
-    } else {
-      assertResult(expected)(c)
-    }
+    assertResult(expected.replace("\n", ", "))(c)
   }
 
   private def checkNoFilterPredicate
@@ -354,13 +346,6 @@ class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton {
     withOrcDataFrame((1 to 4).map(i => Tuple1(i.b))) { implicit df =>
       checkNoFilterPredicate($"_1" <=> 1.b)
     }
-    // DateType
-    if (!HiveUtils.isHive23) {
-      val stringDate = "2015-01-01"
-      withOrcDataFrame(Seq(Tuple1(Date.valueOf(stringDate)))) { implicit df =>
-        checkNoFilterPredicate($"_1" === Date.valueOf(stringDate))
-      }
-    }
     // MapType
     withOrcDataFrame((1 to 4).map(i => Tuple1(Map(i -> i)))) { implicit df =>
       checkNoFilterPredicate($"_1".isNotNull)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
index 12ee5bea7c2f9..1901ed505197c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
@@ -224,7 +224,6 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton {
   }
 
   test("SPARK-26437 Can not query decimal type when value is 0") {
-    assume(HiveUtils.isHive23, "bad test: This bug fixed by HIVE-13083(Hive 2.0.1)")
     withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "false") {
       withTable("spark_26437") {
         sql("CREATE TABLE spark_26437 STORED AS ORCFILE AS SELECT 0.00 AS c1")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
index 91fd8a47339fc..e94e0b39c859c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
@@ -149,12 +149,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton {
   test("Check BloomFilter creation") {
     Seq(true, false).foreach { convertMetastore =>
       withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") {
-        if (HiveUtils.isHive23) {
-          testBloomFilterCreation(org.apache.orc.OrcProto.Stream.Kind.BLOOM_FILTER_UTF8)
-        } else {
-          // Before ORC-101
-          testBloomFilterCreation(org.apache.orc.OrcProto.Stream.Kind.BLOOM_FILTER)
-        }
+        testBloomFilterCreation(org.apache.orc.OrcProto.Stream.Kind.BLOOM_FILTER_UTF8)
       }
     }
   }
@@ -162,7 +157,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton {
   test("Enforce direct encoding column-wise selectively") {
     Seq(true, false).foreach { convertMetastore =>
       withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> s"$convertMetastore") {
-        testSelectiveDictionaryEncoding(isSelective = false, isHive23 = HiveUtils.isHive23)
+        testSelectiveDictionaryEncoding(isSelective = false, isHiveOrc = true)
       }
     }
   }
@@ -322,7 +317,6 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton {
   }
 
   test("SPARK-31580: Read a file written before ORC-569") {
-    assume(HiveUtils.isHive23) // Hive 1.2 doesn't use Apache ORC
     // Test ORC file came from ORC-621
     val df = readResourceOrcFile("test-data/TestStringDictionary.testRowIndex.orc")
     assert(df.where("str < 'row 001000'").count() === 1000)

From 23afc930ae2fb0f3d7fd214324351fc6a0b8253a Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Wed, 7 Oct 2020 13:50:15 -0700
Subject: [PATCH 0189/1009] [SPARK-26499][SQL][FOLLOWUP] Print the loading
 provider exception starting from the INFO level

### What changes were proposed in this pull request?
1. Don't print the exception in the error message while loading a built-in provider.
2. Print the exception starting from the INFO level.

Up to the INFO level, the output is:
```
17:48:32.342 ERROR org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProvider: Failed to load built in provider.
```
and starting from the INFO level:
```
17:48:32.342 ERROR org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProvider: Failed to load built in provider.
17:48:32.342 INFO org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProvider: Loading of the provider failed with the exception:
java.util.ServiceConfigurationError: org.apache.spark.sql.jdbc.JdbcConnectionProvider: Provider org.apache.spark.sql.execution.datasources.jdbc.connection.IntentionallyFaultyConnectionProvider could not be instantiated
	at java.util.ServiceLoader.fail(ServiceLoader.java:232)
	at java.util.ServiceLoader.access$100(ServiceLoader.java:185)
	at java.util.ServiceLoader$LazyIterator.nextService(ServiceLoader.java:384)
	at java.util.ServiceLoader$LazyIterator.next(ServiceLoader.java:404)
	at java.util.ServiceLoader$1.next(ServiceLoader.java:480)
	at org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProvider$.loadProviders(ConnectionProvider.scala:41)
```

### Why are the changes needed?
To avoid "noise" in logs while running tests. Currently, logs are blown up:
```
org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProvider: Loading of the provider failed with the exception:
java.util.ServiceConfigurationError: org.apache.spark.sql.jdbc.JdbcConnectionProvider: Provider org.apache.spark.sql.execution.datasources.jdbc.connection.IntentionallyFaultyConnectionProvider could not be instantiated
	at java.util.ServiceLoader.fail(ServiceLoader.java:232)
	at java.util.ServiceLoader.access$100(ServiceLoader.java:185)
	at java.util.ServiceLoader$LazyIterator.nextService(ServiceLoader.java:384)
	at java.util.ServiceLoader$LazyIterator.next(ServiceLoader.java:404)
	at java.util.ServiceLoader$1.next(ServiceLoader.java:480)
	at org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProvider$.loadProviders(ConnectionProvider.scala:41)
...
	at java.lang.Thread.run(Thread.java:748)
Caused by: java.lang.IllegalArgumentException: Intentional Exception
	at org.apache.spark.sql.execution.datasources.jdbc.connection.IntentionallyFaultyConnectionProvider.<init>(IntentionallyFaultyConnectionProvider.scala:26)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at java.lang.Class.newInstance(Class.java:442)
	at java.util.ServiceLoader$LazyIterator.nextService(ServiceLoader.java:380)
```

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running:
```
$ build/sbt "sql/test:testOnly org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalogSuite"
```

Closes #29968 from MaxGekk/gaborgsomogyi-SPARK-32001-followup.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../datasources/jdbc/connection/ConnectionProvider.scala     | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
index 546756677edce..649a0bda4236c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
@@ -39,11 +39,12 @@ private[jdbc] object ConnectionProvider extends Logging {
     while (iterator.hasNext) {
       try {
         val provider = iterator.next
-        logDebug(s"Loaded built in provider: $provider")
+        logDebug(s"Loaded built-in provider: $provider")
         providers += provider
       } catch {
         case t: Throwable =>
-          logError(s"Failed to load built in provider.", t)
+          logError("Failed to load built-in provider.")
+          logInfo("Loading of the provider failed with the exception:", t)
       }
     }
     // Seems duplicate but it's needed for Scala 2.13

From 6daa2aeb0164277088396102897b2ea4426b9f1c Mon Sep 17 00:00:00 2001
From: Denis Pyshev <git@gemelen.net>
Date: Wed, 7 Oct 2020 15:28:00 -0700
Subject: [PATCH 0190/1009] [SPARK-21708][BUILD] Migrate build to sbt 1.x

### What changes were proposed in this pull request?

Migrate sbt-launcher URL to download one for sbt 1.x.
Update plugins versions where required by sbt update.
Change sbt version to be used to latest released at the moment, 1.3.13
Adjust build settings according to plugins and sbt changes.

### Why are the changes needed?

Migration to sbt 1.x:
1. enhances dev experience in development
2. updates build plugins to bring there new features/to fix bugs in them
3. enhances build performance on sbt side
4. eases movement to Scala 3 / dotty

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

All existing tests passed, both on Jenkins and via Github Actions, also manually for Scala 2.13 profile.

Closes #29286 from gemelen/feature/sbt-1.x.

Authored-by: Denis Pyshev <git@gemelen.net>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .sbtopts                                      | 17 ++++
 build/sbt-launch-lib.bash                     |  2 +-
 project/MimaBuild.scala                       | 17 ++--
 project/MimaExcludes.scala                    | 30 ++++++
 project/SparkBuild.scala                      | 96 ++++++++++++-------
 project/build.properties                      |  2 +-
 project/plugins.sbt                           | 30 ++----
 .../spark/tools/GenerateMIMAIgnore.scala      |  3 +-
 8 files changed, 128 insertions(+), 69 deletions(-)
 create mode 100644 .sbtopts

diff --git a/.sbtopts b/.sbtopts
new file mode 100644
index 0000000000000..9afbdca6db1c7
--- /dev/null
+++ b/.sbtopts
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+-J-Xmx4G
+-J-Xss4m
diff --git a/build/sbt-launch-lib.bash b/build/sbt-launch-lib.bash
index 162bfbf2257c7..423ba3b766e61 100755
--- a/build/sbt-launch-lib.bash
+++ b/build/sbt-launch-lib.bash
@@ -39,7 +39,7 @@ dlog () {
 
 acquire_sbt_jar () {
   SBT_VERSION=`awk -F "=" '/sbt\.version/ {print $2}' ./project/build.properties`
-  URL1=https://dl.bintray.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar
+  URL1=https://repo1.maven.org/maven2/org/scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch-${SBT_VERSION}.jar
   JAR=build/sbt-launch-${SBT_VERSION}.jar
 
   sbt_jar=$JAR
diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala
index 10c02103aeddb..badcdf34a2ad0 100644
--- a/project/MimaBuild.scala
+++ b/project/MimaBuild.scala
@@ -22,9 +22,7 @@ import com.typesafe.tools.mima.core._
 import com.typesafe.tools.mima.core.MissingClassProblem
 import com.typesafe.tools.mima.core.MissingTypesProblem
 import com.typesafe.tools.mima.core.ProblemFilters._
-import com.typesafe.tools.mima.plugin.MimaKeys.{mimaBinaryIssueFilters, mimaPreviousArtifacts}
-import com.typesafe.tools.mima.plugin.MimaPlugin.mimaDefaultSettings
-
+import com.typesafe.tools.mima.plugin.MimaKeys.{mimaBinaryIssueFilters, mimaPreviousArtifacts, mimaFailOnNoPrevious}
 
 object MimaBuild {
 
@@ -86,14 +84,17 @@ object MimaBuild {
     ignoredMembers.flatMap(excludeMember) ++ MimaExcludes.excludes(currentSparkVersion)
   }
 
-  def mimaSettings(sparkHome: File, projectRef: ProjectRef) = {
+  def mimaSettings(sparkHome: File, projectRef: ProjectRef): Seq[Setting[_]] = {
     val organization = "org.apache.spark"
-    val previousSparkVersion = "2.4.0"
+    val previousSparkVersion = "3.0.0"
     val project = projectRef.project
     val fullId = "spark-" + project + "_2.12"
-    mimaDefaultSettings ++
-    Seq(mimaPreviousArtifacts := Set(organization % fullId % previousSparkVersion),
-      mimaBinaryIssueFilters ++= ignoredABIProblems(sparkHome, version.value))
+
+    Seq(
+      mimaFailOnNoPrevious := true,
+      mimaPreviousArtifacts := Set(organization % fullId % previousSparkVersion),
+      mimaBinaryIssueFilters ++= ignoredABIProblems(sparkHome, version.value)
+    )
   }
 
 }
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index d32d31daae8e7..98769d951b6ac 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -36,6 +36,36 @@ object MimaExcludes {
 
   // Exclude rules for 3.1.x
   lazy val v31excludes = v30excludes ++ Seq(
+    // mima plugin update caused new incompatibilities to be detected
+    // core module
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.shuffle.sort.io.LocalDiskShuffleMapOutputWriter.commitAllPartitions"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.shuffle.api.ShuffleMapOutputWriter.commitAllPartitions"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.shuffle.api.ShuffleMapOutputWriter.commitAllPartitions"),
+    // mllib module
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionTrainingSummary.totalIterations"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionTrainingSummary.$init$"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.labels"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.truePositiveRateByLabel"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.falsePositiveRateByLabel"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.precisionByLabel"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.recallByLabel"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.fMeasureByLabel"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.fMeasureByLabel"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.accuracy"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedTruePositiveRate"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedFalsePositiveRate"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedRecall"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedPrecision"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedFMeasure"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionSummary.weightedFMeasure"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.roc"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.areaUnderROC"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.pr"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.fMeasureByThreshold"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.precisionByThreshold"),
+    ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.recallByThreshold"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.classification.FMClassifier.trainImpl"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.FMRegressor.trainImpl"),
     // [SPARK-31077] Remove ChiSqSelector dependency on mllib.ChiSqSelectorModel
     // private constructor
     ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.ml.feature.ChiSqSelectorModel.this"),
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 6328daec027ef..6929342d2f539 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -28,13 +28,13 @@ import scala.collection.mutable.Stack
 import sbt._
 import sbt.Classpaths.publishTask
 import sbt.Keys._
-import sbtunidoc.Plugin.UnidocKeys.unidocGenjavadocVersion
 import com.etsy.sbt.checkstyle.CheckstylePlugin.autoImport._
 import com.simplytyped.Antlr4Plugin._
 import com.typesafe.sbt.pom.{PomBuild, SbtPomKeys}
 import com.typesafe.tools.mima.plugin.MimaKeys
 import org.scalastyle.sbt.ScalastylePlugin.autoImport._
 import org.scalastyle.sbt.Tasks
+import sbtassembly.AssemblyPlugin.autoImport._
 
 import spray.revolver.RevolverPlugin._
 
@@ -83,6 +83,8 @@ object BuildCommons {
 object SparkBuild extends PomBuild {
 
   import BuildCommons._
+  import sbtunidoc.GenJavadocPlugin
+  import sbtunidoc.GenJavadocPlugin.autoImport._
   import scala.collection.mutable.Map
 
   val projectsMap: Map[String, Seq[Setting[_]]] = Map.empty
@@ -106,13 +108,10 @@ object SparkBuild extends PomBuild {
   override val userPropertiesMap = System.getProperties.asScala.toMap
 
   lazy val MavenCompile = config("m2r") extend(Compile)
-  lazy val publishLocalBoth = TaskKey[Unit]("publish-local", "publish local for m2 and ivy")
+  lazy val publishLocalBoth = TaskKey[Unit]("localPublish", "publish local for m2 and ivy", KeyRanks.ATask)
 
-  lazy val sparkGenjavadocSettings: Seq[sbt.Def.Setting[_]] = Seq(
-    libraryDependencies += compilerPlugin(
-      "com.typesafe.genjavadoc" %% "genjavadoc-plugin" % unidocGenjavadocVersion.value cross CrossVersion.full),
+  lazy val sparkGenjavadocSettings: Seq[sbt.Def.Setting[_]] = GenJavadocPlugin.projectSettings ++ Seq(
     scalacOptions ++= Seq(
-      "-P:genjavadoc:out=" + (target.value / "java"),
       "-P:genjavadoc:strictVisibility=true" // hide package private types
     )
   )
@@ -157,7 +156,7 @@ object SparkBuild extends PomBuild {
         val scalaSourceV = Seq(file(scalaSource.in(config).value.getAbsolutePath))
         val configV = (baseDirectory in ThisBuild).value / scalaStyleOnCompileConfig
         val configUrlV = scalastyleConfigUrl.in(config).value
-        val streamsV = streams.in(config).value
+        val streamsV = (streams.in(config).value: @sbtUnchecked)
         val failOnErrorV = true
         val failOnWarningV = false
         val scalastyleTargetV = scalastyleTarget.in(config).value
@@ -204,7 +203,6 @@ object SparkBuild extends PomBuild {
     javaHome := sys.env.get("JAVA_HOME")
       .orElse(sys.props.get("java.home").map { p => new File(p).getParentFile().getAbsolutePath() })
       .map(file),
-    incOptions := incOptions.value.withNameHashing(true),
     publishMavenStyle := true,
     unidocGenjavadocVersion := "0.16",
 
@@ -219,10 +217,12 @@ object SparkBuild extends PomBuild {
     ),
     externalResolvers := resolvers.value,
     otherResolvers := SbtPomKeys.mvnLocalRepository(dotM2 => Seq(Resolver.file("dotM2", dotM2))).value,
-    publishLocalConfiguration in MavenCompile :=
-      new PublishConfiguration(None, "dotM2", packagedArtifacts.value, Seq(), ivyLoggingLevel.value),
+    publishLocalConfiguration in MavenCompile := PublishConfiguration()
+        .withResolverName("dotM2")
+        .withArtifacts(packagedArtifacts.value.toVector)
+        .withLogging(ivyLoggingLevel.value),
     publishMavenStyle in MavenCompile := true,
-    publishLocal in MavenCompile := publishTask(publishLocalConfiguration in MavenCompile, deliverLocal).value,
+    publishLocal in MavenCompile := publishTask(publishLocalConfiguration in MavenCompile).value,
     publishLocalBoth := Seq(publishLocal in MavenCompile, publishLocal).dependOn.value,
 
     javacOptions in (Compile, doc) ++= {
@@ -251,6 +251,8 @@ object SparkBuild extends PomBuild {
       "-sourcepath", (baseDirectory in ThisBuild).value.getAbsolutePath  // Required for relative source links in scaladoc
     ),
 
+    SbtPomKeys.profiles := profiles,
+
     // Remove certain packages from Scaladoc
     scalacOptions in (Compile, doc) := Seq(
       "-groups",
@@ -273,14 +275,15 @@ object SparkBuild extends PomBuild {
       val out = streams.value
 
       def logProblem(l: (=> String) => Unit, f: File, p: xsbti.Problem) = {
-        l(f.toString + ":" + p.position.line.fold("")(_ + ":") + " " + p.message)
+        val jmap = new java.util.function.Function[Integer, String]() {override def apply(i: Integer): String = {i.toString}}
+        l(f.toString + ":" + p.position.line.map[String](jmap.apply).map(_ + ":").orElse("") + " " + p.message)
         l(p.position.lineContent)
         l("")
       }
 
       var failed = 0
-      analysis.infos.allInfos.foreach { case (k, i) =>
-        i.reportedProblems foreach { p =>
+      analysis.asInstanceOf[sbt.internal.inc.Analysis].infos.allInfos.foreach { case (k, i) =>
+        i.getReportedProblems foreach { p =>
           val deprecation = p.message.contains("deprecated")
 
           if (!deprecation) {
@@ -302,7 +305,10 @@ object SparkBuild extends PomBuild {
         sys.error(s"$failed fatal warnings")
       }
       analysis
-    }
+    },
+    // disable Mima check for all modules,
+    // to be enabled in specific ones that have previous artifacts
+    MimaKeys.mimaFailOnNoPrevious := false
   )
 
   def enable(settings: Seq[Setting[_]])(projectRef: ProjectRef) = {
@@ -411,7 +417,7 @@ object SparkBuild extends PomBuild {
     }
   ))(assembly)
 
-  enable(Seq(sparkShell := sparkShell in LocalProject("assembly")))(spark)
+  enable(Seq(sparkShell := (sparkShell in LocalProject("assembly")).value))(spark)
 
   // TODO: move this to its upstream project.
   override def projectDefinitions(baseDirectory: File): Seq[Project] = {
@@ -485,12 +491,12 @@ object SparkParallelTestGrouping {
     testGrouping in Test := {
       val tests: Seq[TestDefinition] = (definedTests in Test).value
       val defaultForkOptions = ForkOptions(
-        bootJars = Nil,
         javaHome = javaHome.value,
-        connectInput = connectInput.value,
         outputStrategy = outputStrategy.value,
-        runJVMOptions = (javaOptions in Test).value,
+        bootJars = Vector.empty[java.io.File],
         workingDirectory = Some(baseDirectory.value),
+        runJVMOptions = (javaOptions in Test).value.toVector,
+        connectInput = connectInput.value,
         envVars = (envVars in Test).value
       )
       tests.groupBy(test => testNameToTestGroup(test.name)).map { case (groupName, groupTests) =>
@@ -498,7 +504,7 @@ object SparkParallelTestGrouping {
           if (groupName == DEFAULT_TEST_GROUP) {
             defaultForkOptions
           } else {
-            defaultForkOptions.copy(runJVMOptions = defaultForkOptions.runJVMOptions ++
+            defaultForkOptions.withRunJVMOptions(defaultForkOptions.runJVMOptions ++
               Seq(s"-Djava.io.tmpdir=${baseDirectory.value}/target/tmp/$groupName"))
           }
         }
@@ -512,6 +518,7 @@ object SparkParallelTestGrouping {
 }
 
 object Core {
+  import scala.sys.process.Process
   lazy val settings = Seq(
     resourceGenerators in Compile += Def.task {
       val buildScript = baseDirectory.value + "/../build/spark-build-info"
@@ -557,6 +564,7 @@ object DockerIntegrationTests {
  */
 object KubernetesIntegrationTests {
   import BuildCommons._
+  import scala.sys.process.Process
 
   val dockerBuild = TaskKey[Unit]("docker-imgs", "Build the docker images for ITs.")
   val runITs = TaskKey[Unit]("run-its", "Only run ITs, skip image build.")
@@ -634,7 +642,9 @@ object ExcludedDependencies {
  */
 object OldDeps {
 
-  lazy val project = Project("oldDeps", file("dev"), settings = oldDepsSettings)
+  lazy val project = Project("oldDeps", file("dev"))
+    .settings(oldDepsSettings)
+    .disablePlugins(com.typesafe.sbt.pom.PomReaderPlugin)
 
   lazy val allPreviousArtifactKeys = Def.settingDyn[Seq[Set[ModuleID]]] {
     SparkBuild.mimaProjects
@@ -650,7 +660,10 @@ object OldDeps {
 }
 
 object Catalyst {
-  lazy val settings = antlr4Settings ++ Seq(
+  import com.simplytyped.Antlr4Plugin
+  import com.simplytyped.Antlr4Plugin.autoImport._
+
+  lazy val settings = Antlr4Plugin.projectSettings ++ Seq(
     antlr4Version in Antlr4 := SbtPomKeys.effectivePom.value.getProperties.get("antlr4.version").asInstanceOf[String],
     antlr4PackageName in Antlr4 := Some("org.apache.spark.sql.catalyst.parser"),
     antlr4GenListener in Antlr4 := true,
@@ -660,6 +673,9 @@ object Catalyst {
 }
 
 object SQL {
+
+  import sbtavro.SbtAvro.autoImport._
+
   lazy val settings = Seq(
     initialCommands in console :=
       """
@@ -681,8 +697,10 @@ object SQL {
         |import sqlContext.implicits._
         |import sqlContext._
       """.stripMargin,
-    cleanupCommands in console := "sc.stop()"
+    cleanupCommands in console := "sc.stop()",
+    Test / avroGenerate := (Compile / avroGenerate).value
   )
+
 }
 
 object Hive {
@@ -721,27 +739,27 @@ object Hive {
 
 object Assembly {
   import sbtassembly.AssemblyUtils._
-  import sbtassembly.Plugin._
-  import AssemblyKeys._
+  import sbtassembly.AssemblyPlugin.autoImport._
 
   val hadoopVersion = taskKey[String]("The version of hadoop that spark is compiled against.")
 
-  lazy val settings = assemblySettings ++ Seq(
+  lazy val settings = baseAssemblySettings ++ Seq(
     test in assembly := {},
     hadoopVersion := {
       sys.props.get("hadoop.version")
         .getOrElse(SbtPomKeys.effectivePom.value.getProperties.get("hadoop.version").asInstanceOf[String])
     },
-    jarName in assembly := {
+    assemblyJarName in assembly := {
+      lazy val hdpVersion = hadoopVersion.value
       if (moduleName.value.contains("streaming-kafka-0-10-assembly")
         || moduleName.value.contains("streaming-kinesis-asl-assembly")) {
         s"${moduleName.value}-${version.value}.jar"
       } else {
-        s"${moduleName.value}-${version.value}-hadoop${hadoopVersion.value}.jar"
+        s"${moduleName.value}-${version.value}-hadoop${hdpVersion}.jar"
       }
     },
-    jarName in (Test, assembly) := s"${moduleName.value}-test-${version.value}.jar",
-    mergeStrategy in assembly := {
+    assemblyJarName in (Test, assembly) := s"${moduleName.value}-test-${version.value}.jar",
+    assemblyMergeStrategy in assembly := {
       case m if m.toLowerCase(Locale.ROOT).endsWith("manifest.mf")
                                                                => MergeStrategy.discard
       case m if m.toLowerCase(Locale.ROOT).matches("meta-inf.*\\.sf$")
@@ -756,8 +774,7 @@ object Assembly {
 }
 
 object PySparkAssembly {
-  import sbtassembly.Plugin._
-  import AssemblyKeys._
+  import sbtassembly.AssemblyPlugin.autoImport._
   import java.util.zip.{ZipOutputStream, ZipEntry}
 
   lazy val settings = Seq(
@@ -807,8 +824,13 @@ object PySparkAssembly {
 object Unidoc {
 
   import BuildCommons._
-  import sbtunidoc.Plugin._
-  import UnidocKeys._
+  import sbtunidoc.BaseUnidocPlugin
+  import sbtunidoc.JavaUnidocPlugin
+  import sbtunidoc.ScalaUnidocPlugin
+  import sbtunidoc.BaseUnidocPlugin.autoImport._
+  import sbtunidoc.GenJavadocPlugin.autoImport._
+  import sbtunidoc.JavaUnidocPlugin.autoImport._
+  import sbtunidoc.ScalaUnidocPlugin.autoImport._
 
   private def ignoreUndocumentedPackages(packages: Seq[Seq[File]]): Seq[Seq[File]] = {
     packages
@@ -838,6 +860,7 @@ object Unidoc {
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/catalog/v2/utils")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/hive")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/v2/avro")))
+      .map(_.filterNot(_.getCanonicalPath.contains("SSLOptions")))
   }
 
   private def ignoreClasspaths(classpaths: Seq[Classpath]): Seq[Classpath] = {
@@ -848,7 +871,10 @@ object Unidoc {
 
   val unidocSourceBase = settingKey[String]("Base URL of source links in Scaladoc.")
 
-  lazy val settings = scalaJavaUnidocSettings ++ Seq (
+  lazy val settings = BaseUnidocPlugin.projectSettings ++
+                      ScalaUnidocPlugin.projectSettings ++
+                      JavaUnidocPlugin.projectSettings ++
+                      Seq (
     publish := {},
 
     unidocProjectFilter in(ScalaUnidoc, unidoc) :=
diff --git a/project/build.properties b/project/build.properties
index 23aa187fb35a7..b1e5e313d853f 100644
--- a/project/build.properties
+++ b/project/build.properties
@@ -14,4 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-sbt.version=0.13.18
+sbt.version=1.3.13
diff --git a/project/plugins.sbt b/project/plugins.sbt
index 5f21d8126e48a..da466da9945c1 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -23,8 +23,7 @@ libraryDependencies += "com.puppycrawl.tools" % "checkstyle" % "8.25"
 // checkstyle uses guava 23.0.
 libraryDependencies += "com.google.guava" % "guava" % "23.0"
 
-// need to make changes to uptake sbt 1.0 support in "com.eed3si9n" % "sbt-assembly" % "1.14.5"
-addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.11.2")
+addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.15.0")
 
 addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "5.2.4")
 
@@ -32,19 +31,12 @@ addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.9.2")
 
 addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0")
 
-// SPARK-29560 Only sbt-mima-plugin needs this repo
-resolvers += Resolver.url("bintray",
-  new java.net.URL("https://dl.bintray.com/typesafe/sbt-plugins"))(Resolver.defaultIvyPatterns)
-addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.3.0")
+addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.8.0")
 
-// sbt 1.0.0 support: https://github.com/AlpineNow/junit_xml_listener/issues/6
-addSbtPlugin("com.alpinenow" % "junit_xml_listener" % "0.5.1")
+addSbtPlugin("com.eed3si9n" % "sbt-unidoc" % "0.4.3")
 
-// need to make changes to uptake sbt 1.0 support in "com.eed3si9n" % "sbt-unidoc" % "0.4.1"
-addSbtPlugin("com.eed3si9n" % "sbt-unidoc" % "0.3.3")
-
-// need to make changes to uptake sbt 1.0 support in "com.cavorite" % "sbt-avro-1-7" % "1.1.2"
-addSbtPlugin("com.cavorite" % "sbt-avro" % "0.3.2")
+addSbtPlugin("com.cavorite" % "sbt-avro" % "2.1.1")
+libraryDependencies += "org.apache.avro" % "avro-compiler" % "1.8.2"
 
 addSbtPlugin("io.spray" % "sbt-revolver" % "0.9.1")
 
@@ -52,14 +44,6 @@ libraryDependencies += "org.ow2.asm"  % "asm" % "7.2"
 
 libraryDependencies += "org.ow2.asm"  % "asm-commons" % "7.2"
 
-// sbt 1.0.0 support: https://github.com/ihji/sbt-antlr4/issues/14
-addSbtPlugin("com.simplytyped" % "sbt-antlr4" % "0.7.13")
-
-// Spark uses a custom fork of the sbt-pom-reader plugin which contains a patch to fix issues
-// related to test-jar dependencies (https://github.com/sbt/sbt-pom-reader/pull/14). The source for
-// this fork is published at https://github.com/JoshRosen/sbt-pom-reader/tree/v1.0.0-spark
-// and corresponds to commit b160317fcb0b9d1009635a7c5aa05d0f3be61936 in that repository.
-// In the long run, we should try to merge our patch upstream and switch to an upstream version of
-// the plugin; this is tracked at SPARK-14401.
+addSbtPlugin("com.simplytyped" % "sbt-antlr4" % "0.8.2")
 
-addSbtPlugin("org.spark-project" % "sbt-pom-reader" % "1.0.0-spark")
+addSbtPlugin("com.typesafe.sbt" % "sbt-pom-reader" % "2.2.0")
diff --git a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
index f9bc499961ad7..a6fee8616df11 100644
--- a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
+++ b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
@@ -24,6 +24,7 @@ import scala.reflect.runtime.universe.runtimeMirror
 import scala.util.Try
 
 import org.clapper.classutil.ClassFinder
+import org.objectweb.asm.Opcodes
 
 /**
  * A tool for generating classes to be excluded during binary checking with MIMA. It is expected
@@ -146,7 +147,7 @@ object GenerateMIMAIgnore {
    * and subpackages both from directories and jars present on the classpath.
    */
   private def getClasses(packageName: String): Set[String] = {
-    val finder = ClassFinder()
+    val finder = ClassFinder(maybeOverrideAsmVersion = Some(Opcodes.ASM7))
     finder
       .getClasses
       .map(_.name)

From 37e1b0c4a5e999ba420cc6eacb2f5a7100fef029 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Thu, 8 Oct 2020 10:32:30 +0900
Subject: [PATCH 0191/1009] [SPARK-33086][PYTHON] Add static annotations for
 pyspark.resource

### What changes were proposed in this pull request?

This PR replaces dynamically generated annotations for following modules:

- `pyspark.resource.information`
- `pyspark.resource.profile`
- `pyspark.resource.requests`

### Why are the changes needed?

These modules where not manually annotated in `pyspark-stubs`, but are part of the public API and we should provide more precise annotations.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

MyPy tests:

```
mypy --no-incremental --config python/mypy.ini python/pyspark
```

Closes #29969 from zero323/SPARK-32714-FOLLOW-UP-RESOURCE.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/resource/information.pyi |  8 ++--
 python/pyspark/resource/profile.pyi     | 31 ++++++++-----
 python/pyspark/resource/requests.pyi    | 60 +++++++++++++++----------
 3 files changed, 60 insertions(+), 39 deletions(-)

diff --git a/python/pyspark/resource/information.pyi b/python/pyspark/resource/information.pyi
index 7baa6ca8520bd..5474afa659746 100644
--- a/python/pyspark/resource/information.pyi
+++ b/python/pyspark/resource/information.pyi
@@ -16,11 +16,11 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from typing import Any
+from typing import List
 
 class ResourceInformation:
-    def __init__(self, name: Any, addresses: Any) -> None: ...
+    def __init__(self, name: str, addresses: List[str]) -> None: ...
     @property
-    def name(self): ...
+    def name(self) -> str: ...
     @property
-    def addresses(self): ...
+    def addresses(self) -> List[str]: ...
diff --git a/python/pyspark/resource/profile.pyi b/python/pyspark/resource/profile.pyi
index 8ce7d93b29e93..d6ea340bb510f 100644
--- a/python/pyspark/resource/profile.pyi
+++ b/python/pyspark/resource/profile.pyi
@@ -22,30 +22,39 @@ from pyspark.resource.requests import (  # noqa: F401
     TaskResourceRequest as TaskResourceRequest,
     TaskResourceRequests as TaskResourceRequests,
 )
-from typing import Any, Optional
+from typing import overload, Dict, Optional, Union
+from py4j.java_gateway import JavaObject  # type: ignore[import]
 
 class ResourceProfile:
+    @overload
     def __init__(
         self,
-        _java_resource_profile: Optional[Any] = ...,
-        _exec_req: Any = ...,
-        _task_req: Any = ...,
+        _java_resource_profile: JavaObject,
+    ) -> None: ...
+    @overload
+    def __init__(
+        self,
+        _java_resource_profile: None = ...,
+        _exec_req: Dict[str, ExecutorResourceRequest] = ...,
+        _task_req: Dict[str, TaskResourceRequest] = ...,
     ) -> None: ...
     @property
-    def id(self): ...
+    def id(self) -> int: ...
     @property
-    def taskResources(self): ...
+    def taskResources(self) -> Dict[str, TaskResourceRequest]: ...
     @property
-    def executorResources(self): ...
+    def executorResources(self) -> Dict[str, ExecutorResourceRequest]: ...
 
 class ResourceProfileBuilder:
     def __init__(self) -> None: ...
-    def require(self, resourceRequest: Any): ...
+    def require(
+        self, resourceRequest: Union[ExecutorResourceRequest, TaskResourceRequests]
+    ): ...
     def clearExecutorResourceRequests(self) -> None: ...
     def clearTaskResourceRequests(self) -> None: ...
     @property
-    def taskResources(self): ...
+    def taskResources(self) -> Dict[str, TaskResourceRequest]: ...
     @property
-    def executorResources(self): ...
+    def executorResources(self) -> Dict[str, ExecutorResourceRequest]: ...
     @property
-    def build(self): ...
+    def build(self) -> ResourceProfile: ...
diff --git a/python/pyspark/resource/requests.pyi b/python/pyspark/resource/requests.pyi
index f9448d0780409..6ba14d65eb516 100644
--- a/python/pyspark/resource/requests.pyi
+++ b/python/pyspark/resource/requests.pyi
@@ -16,56 +16,68 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from typing import Any, Optional
+from typing import overload, Dict, Optional
+
+from py4j.java_gateway import JVMView  # type: ignore[import]
 
 class ExecutorResourceRequest:
     def __init__(
         self,
-        resourceName: Any,
-        amount: Any,
+        resourceName: str,
+        amount: int,
         discoveryScript: str = ...,
         vendor: str = ...,
     ) -> None: ...
     @property
-    def resourceName(self): ...
+    def resourceName(self) -> str: ...
     @property
-    def amount(self): ...
+    def amount(self) -> int: ...
     @property
-    def discoveryScript(self): ...
+    def discoveryScript(self) -> str: ...
     @property
-    def vendor(self): ...
+    def vendor(self) -> str: ...
 
 class ExecutorResourceRequests:
+    @overload
+    def __init__(self, _jvm: JVMView) -> None: ...
+    @overload
     def __init__(
-        self, _jvm: Optional[Any] = ..., _requests: Optional[Any] = ...
+        self,
+        _jvm: None = ...,
+        _requests: Optional[Dict[str, ExecutorResourceRequest]] = ...,
     ) -> None: ...
-    def memory(self, amount: Any): ...
-    def memoryOverhead(self, amount: Any): ...
-    def pysparkMemory(self, amount: Any): ...
-    def offheapMemory(self, amount: Any): ...
-    def cores(self, amount: Any): ...
+    def memory(self, amount: str) -> ExecutorResourceRequests: ...
+    def memoryOverhead(self, amount: str) -> ExecutorResourceRequests: ...
+    def pysparkMemory(self, amount: str) -> ExecutorResourceRequests: ...
+    def offheapMemory(self, amount: str) -> ExecutorResourceRequests: ...
+    def cores(self, amount: int) -> ExecutorResourceRequests: ...
     def resource(
         self,
-        resourceName: Any,
-        amount: Any,
+        resourceName: str,
+        amount: int,
         discoveryScript: str = ...,
         vendor: str = ...,
-    ): ...
+    ) -> ExecutorResourceRequests: ...
     @property
-    def requests(self): ...
+    def requests(self) -> Dict[str, ExecutorResourceRequest]: ...
 
 class TaskResourceRequest:
-    def __init__(self, resourceName: Any, amount: Any) -> None: ...
+    def __init__(self, resourceName: str, amount: float) -> None: ...
     @property
-    def resourceName(self): ...
+    def resourceName(self) -> str: ...
     @property
-    def amount(self): ...
+    def amount(self) -> float: ...
 
 class TaskResourceRequests:
+    @overload
+    def __init__(self, _jvm: JVMView) -> None: ...
+    @overload
     def __init__(
-        self, _jvm: Optional[Any] = ..., _requests: Optional[Any] = ...
+        self,
+        _jvm: None = ...,
+        _requests: Optional[Dict[str, TaskResourceRequest]] = ...,
     ) -> None: ...
-    def cpus(self, amount: Any): ...
-    def resource(self, resourceName: Any, amount: Any): ...
+    def cpus(self, amount: int) -> TaskResourceRequests: ...
+    def resource(self, resourceName: str, amount: float) -> TaskResourceRequests: ...
     @property
-    def requests(self): ...
+    def requests(self) -> Dict[str, TaskResourceRequest]: ...

From 473b3ba6aa3ead60c6f3d66c982b7883e39b7ad2 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Thu, 8 Oct 2020 10:37:42 +0900
Subject: [PATCH 0192/1009] [SPARK-32511][FOLLOW-UP][SQL][R][PYTHON] Add
 dropFields to SparkR and PySpark

### What changes were proposed in this pull request?

This PR adds `dropFields` method to:

- PySpark `Column`
- SparkR `Column`

### Why are the changes needed?

Feature parity.

### Does this PR introduce _any_ user-facing change?

No, new API.

### How was this patch tested?

- New unit tests.
- Manual verification of examples / doctests.
- Manual run of MyPy tests

Closes #29967 from zero323/SPARK-32511-FOLLOW-UP-PYSPARK-SPARKR.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 R/pkg/NAMESPACE                         |  1 +
 R/pkg/R/column.R                        | 69 +++++++++++++++++++++++++
 R/pkg/R/generics.R                      |  3 ++
 R/pkg/tests/fulltests/test_sparkSQL.R   | 19 ++++++-
 python/pyspark/sql/column.py            | 51 ++++++++++++++++++
 python/pyspark/sql/column.pyi           |  1 +
 python/pyspark/sql/tests/test_column.py | 22 ++++++++
 7 files changed, 165 insertions(+), 1 deletion(-)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 25162f3e23b38..2fadf20da491c 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -272,6 +272,7 @@ exportMethods("%<=>%",
               "degrees",
               "dense_rank",
               "desc",
+              "dropFields",
               "element_at",
               "encode",
               "endsWith",
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 36d792c647e52..c5fcfaff94029 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -387,3 +387,72 @@ setMethod("withField",
             jc <- callJMethod(x@jc, "withField", fieldName, col@jc)
             column(jc)
           })
+
+#' dropFields
+#'
+#' Drops fields in a struct \code{Column} by name.
+#'
+#' @param x a Column
+#' @param ... names of the fields to be dropped.
+#'
+#' @rdname dropFields
+#' @aliases dropFields dropFields,Column-method
+#' @examples
+#' \dontrun{
+#' df <- select(
+#'   createDataFrame(iris),
+#'   alias(
+#'     struct(
+#'       column("Sepal_Width"), column("Sepal_Length"),
+#'       alias(
+#'         struct(
+#'           column("Petal_Width"), column("Petal_Length"),
+#'           alias(
+#'             column("Petal_Width") * column("Petal_Length"),
+#'             "Petal_Product"
+#'           )
+#'         ),
+#'         "Petal"
+#'       )
+#'     ),
+#'     "dimensions"
+#'   )
+#' )
+#' head(withColumn(df, "dimensions", dropFields(df$dimensions, "Petal")))
+#'
+#' head(
+#'   withColumn(
+#'     df, "dimensions",
+#'     dropFields(df$dimensions, "Sepal_Width", "Sepal_Length")
+#'   )
+#' )
+#'
+#' # This method supports dropping multiple nested fields directly e.g.
+#' head(
+#'   withColumn(
+#'     df, "dimensions",
+#'     dropFields(df$dimensions, "Petal.Petal_Width", "Petal.Petal_Length")
+#'   )
+#' )
+#'
+#' # However, if you are going to add/replace multiple nested fields,
+#' # it is preffered to extract out the nested struct before
+#' # adding/replacing multiple fields e.g.
+#' head(
+#'   withColumn(
+#'     df, "dimensions",
+#'     withField(
+#'       column("dimensions"),
+#'       "Petal",
+#'       dropFields(column("dimensions.Petal"), "Petal_Width", "Petal_Length")
+#'     )
+#'   )
+#' )
+#' }
+#' @note dropFields since 3.1.0
+setMethod("dropFields",
+          signature(x = "Column"),
+          function(x, ...) {
+            jc <- callJMethod(x@jc, "dropFields", list(...))
+            column(jc)
+          })
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 993fc758adbe5..b9cf0261adc28 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -732,6 +732,9 @@ setGeneric("%<=>%", function(x, value) { standardGeneric("%<=>%") })
 #' @rdname withField
 setGeneric("withField", function(x, fieldName, col) { standardGeneric("withField") })
 
+#' @rdname dropFields
+setGeneric("dropFields", function(x, ...) { standardGeneric("dropFields") })
+
 ###################### WindowSpec Methods ##########################
 
 #' @rdname partitionBy
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index c3b271b1205c5..2ac3093e77ea8 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1809,7 +1809,7 @@ test_that("column functions", {
   expect_equal(actual, expected)
 
   # Test withField
-  lines <- c("{\"Person\": {\"name\":\"Bob\", \"age\":24}}")
+  lines <- c("{\"Person\": {\"name\":\"Bob\", \"age\":24, \"height\": 170}}")
   jsonPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp")
   writeLines(lines, jsonPath)
   df <- read.df(jsonPath, "json")
@@ -1820,6 +1820,23 @@ test_that("column functions", {
       )
   )
   expect_equal(result, data.frame(dummy = 42))
+
+  # Test dropFields
+  expect_setequal(
+    colnames(select(
+      withColumn(df, "Person", dropFields(df$Person, "age")),
+      column("Person.*")
+    )),
+    c("name", "height")
+  )
+
+  expect_equal(
+    colnames(select(
+      withColumn(df, "Person", dropFields(df$Person, "height", "name")),
+      column("Person.*")
+    )),
+    "age"
+  )
 })
 
 test_that("column binary mathfunctions", {
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 0e073d2a5da28..3cf7a033641d8 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -358,6 +358,57 @@ def withField(self, fieldName, col):
 
         return Column(self._jc.withField(fieldName, col._jc))
 
+    @since(3.1)
+    def dropFields(self, *fieldNames):
+        """
+        An expression that drops fields in :class:`StructType` by name.
+
+        >>> from pyspark.sql import Row
+        >>> from pyspark.sql.functions import col, lit
+        >>> df = spark.createDataFrame([
+        ...     Row(a=Row(b=1, c=2, d=3, e=Row(f=4, g=5, h=6)))])
+        >>> df.withColumn('a', df['a'].dropFields('b')).show()
+        +-----------------+
+        |                a|
+        +-----------------+
+        |{2, 3, {4, 5, 6}}|
+        +-----------------+
+
+        >>> df.withColumn('a', df['a'].dropFields('b', 'c')).show()
+        +--------------+
+        |             a|
+        +--------------+
+        |{3, {4, 5, 6}}|
+        +--------------+
+
+        This method supports dropping multiple nested fields directly e.g.
+
+        >>> df.withColumn("a", col("a").dropFields("e.g", "e.h")).show()
+        +--------------+
+        |             a|
+        +--------------+
+        |{1, 2, 3, {4}}|
+        +--------------+
+
+        However, if you are going to add/replace multiple nested fields,
+        it is preffered to extract out the nested struct before
+        adding/replacing multiple fields e.g.
+
+        >>> df.select(col("a").withField(
+        ...     "e", col("a.e").dropFields("g", "h")).alias("a")
+        ... ).show()
+        +--------------+
+        |             a|
+        +--------------+
+        |{1, 2, 3, {4}}|
+        +--------------+
+
+        """
+        sc = SparkContext._active_spark_context
+
+        jc = self._jc.dropFields(_to_seq(sc, fieldNames))
+        return Column(jc)
+
     def __getattr__(self, item):
         if item.startswith("__"):
             raise AttributeError(item)
diff --git a/python/pyspark/sql/column.pyi b/python/pyspark/sql/column.pyi
index 261fb6e5f3911..0fbb10053fdbf 100644
--- a/python/pyspark/sql/column.pyi
+++ b/python/pyspark/sql/column.pyi
@@ -80,6 +80,7 @@ class Column:
     def getItem(self, key: Any) -> Column: ...
     def getField(self, name: Any) -> Column: ...
     def withField(self, fieldName: str, col: Column) -> Column: ...
+    def dropFields(self, *fieldNames: str) -> Column: ...
     def __getattr__(self, item: Any) -> Column: ...
     def __iter__(self) -> None: ...
     def rlike(self, item: str) -> Column: ...
diff --git a/python/pyspark/sql/tests/test_column.py b/python/pyspark/sql/tests/test_column.py
index 7e03e2ef3e6d0..4b4ac3bf9cd6c 100644
--- a/python/pyspark/sql/tests/test_column.py
+++ b/python/pyspark/sql/tests/test_column.py
@@ -156,6 +156,28 @@ def test_with_field(self):
                                'fieldName should be a string',
                                lambda: df['a'].withField(col('b'), lit(3)))
 
+    def test_drop_fields(self):
+        df = self.spark.createDataFrame([Row(a=Row(b=1, c=2, d=Row(e=3, f=4)))])
+        self.assertIsInstance(df["a"].dropFields("b"), Column)
+        self.assertIsInstance(df["a"].dropFields("b", "c"), Column)
+        self.assertIsInstance(df["a"].dropFields("d.e"), Column)
+
+        result = df.select(
+            df["a"].dropFields("b").alias("a1"),
+            df["a"].dropFields("d.e").alias("a2"),
+        ).first().asDict(True)
+
+        self.assertTrue(
+            "b" not in result["a1"] and
+            "c" in result["a1"] and
+            "d" in result["a1"]
+        )
+
+        self.assertTrue(
+            "e" not in result["a2"]["d"] and
+            "f" in result["a2"]["d"]
+        )
+
 if __name__ == "__main__":
     import unittest
     from pyspark.sql.tests.test_column import *  # noqa: F401

From 39510b0e9b79ca59c073bed2219d35d4b81fb7f1 Mon Sep 17 00:00:00 2001
From: Karen Feng <karen.feng@databricks.com>
Date: Thu, 8 Oct 2020 12:05:39 +0900
Subject: [PATCH 0193/1009] [SPARK-32793][SQL] Add raise_error function, adds
 error message parameter to assert_true

## What changes were proposed in this pull request?

Adds a SQL function `raise_error` which underlies the refactored `assert_true` function. `assert_true` now also (optionally) accepts a custom error message field.
`raise_error` is exposed in SQL, Python, Scala, and R.
`assert_true` was previously only exposed in SQL; it is now also exposed in Python, Scala, and R.

### Why are the changes needed?

Improves usability of `assert_true` by clarifying error messaging, and adds the useful helper function `raise_error`.

### Does this PR introduce _any_ user-facing change?

Yes:
- Adds `raise_error` function to the SQL, Python, Scala, and R APIs.
- Adds `assert_true` function to the SQL, Python and R APIs.

### How was this patch tested?

Adds unit tests in SQL, Python, Scala, and R for `assert_true` and `raise_error`.

Closes #29947 from karenfeng/spark-32793.

Lead-authored-by: Karen Feng <karen.feng@databricks.com>
Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 R/pkg/R/functions.R                           | 49 +++++++++++
 R/pkg/R/generics.R                            |  8 ++
 R/pkg/tests/fulltests/test_sparkSQL.R         | 18 ++++
 python/docs/source/reference/pyspark.sql.rst  |  2 +
 python/pyspark/sql/functions.py               | 55 +++++++++++-
 python/pyspark/sql/functions.pyi              |  2 +
 python/pyspark/sql/tests/test_functions.py    | 50 +++++++++++
 .../catalyst/analysis/FunctionRegistry.scala  |  1 +
 .../spark/sql/catalyst/expressions/misc.scala | 84 +++++++++++++------
 .../expressions/CodeGenerationSuite.scala     |  2 +-
 .../expressions/ExpressionEvalHelper.scala    |  6 +-
 .../expressions/MiscExpressionsSuite.scala    | 30 +++----
 .../org/apache/spark/sql/functions.scala      | 30 +++++++
 .../sql-functions/sql-expression-schema.md    |  5 +-
 .../sql-tests/inputs/misc-functions.sql       | 12 +++
 .../sql-tests/results/misc-functions.sql.out  | 81 +++++++++++++++++-
 .../spark/sql/ColumnExpressionSuite.scala     | 51 +++++++++++
 .../sql/expressions/ExpressionInfoSuite.scala | 17 +++-
 18 files changed, 450 insertions(+), 53 deletions(-)

diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 959edf29e2429..ce384a64bccaf 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -826,6 +826,55 @@ setMethod("xxhash64",
             column(jc)
           })
 
+#' @details
+#' \code{assert_true}: Returns null if the input column is true; throws an exception
+#' with the provided error message otherwise.
+#'
+#' @param errMsg (optional) The error message to be thrown.
+#'
+#' @rdname column_misc_functions
+#' @aliases assert_true assert_true,Column-method
+#' @examples
+#' \dontrun{
+#' tmp <- mutate(df, v1 = assert_true(df$vs < 2),
+#'                   v2 = assert_true(df$vs < 2, "custom error message"),
+#'                   v3 = assert_true(df$vs < 2, df$vs))
+#' head(tmp)}
+#' @note assert_true since 3.1.0
+setMethod("assert_true",
+          signature(x = "Column"),
+          function(x, errMsg = NULL) {
+            jc <- if (is.null(errMsg)) {
+              callJStatic("org.apache.spark.sql.functions", "assert_true", x@jc)
+            } else {
+              if (is.character(errMsg) && length(errMsg) == 1) {
+                errMsg <- lit(errMsg)
+              }
+              callJStatic("org.apache.spark.sql.functions", "assert_true", x@jc, errMsg@jc)
+            }
+            column(jc)
+          })
+
+#' @details
+#' \code{raise_error}: Throws an exception with the provided error message.
+#'
+#' @rdname column_misc_functions
+#' @aliases raise_error raise_error,characterOrColumn-method
+#' @examples
+#' \dontrun{
+#' tmp <- mutate(df, v1 = raise_error("error message"))
+#' head(tmp)}
+#' @note raise_error since 3.1.0
+setMethod("raise_error",
+          signature(x = "characterOrColumn"),
+          function(x) {
+            if (is.character(x) && length(x) == 1) {
+              x <- lit(x)
+            }
+            jc <- callJStatic("org.apache.spark.sql.functions", "raise_error", x@jc)
+            column(jc)
+          })
+
 #' @details
 #' \code{dayofmonth}: Extracts the day of the month as an integer from a
 #' given date/timestamp/string.
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index b9cf0261adc28..6b732e594cd3f 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -850,6 +850,10 @@ setGeneric("arrays_zip_with", function(x, y, f) { standardGeneric("arrays_zip_wi
 #' @name NULL
 setGeneric("ascii", function(x) { standardGeneric("ascii") })
 
+#' @rdname column_misc_functions
+#' @name NULL
+setGeneric("assert_true", function(x, errMsg = NULL) { standardGeneric("assert_true") })
+
 #' @param x Column to compute on or a GroupedData object.
 #' @param ... additional argument(s) when \code{x} is a GroupedData object.
 #' @rdname avg
@@ -1223,6 +1227,10 @@ setGeneric("posexplode_outer", function(x) { standardGeneric("posexplode_outer")
 #' @name NULL
 setGeneric("quarter", function(x) { standardGeneric("quarter") })
 
+#' @rdname column_misc_functions
+#' @name NULL
+setGeneric("raise_error", function(x) { standardGeneric("raise_error") })
+
 #' @rdname column_nonaggregate_functions
 #' @name NULL
 setGeneric("rand", function(seed) { standardGeneric("rand") })
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 2ac3093e77ea8..268f5734813ba 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -3945,6 +3945,24 @@ test_that("catalog APIs, listTables, listColumns, listFunctions", {
   dropTempView("cars")
 })
 
+test_that("assert_true, raise_error", {
+  df <- read.json(jsonPath)
+  filtered <- filter(df, "age < 20")
+
+  expect_equal(collect(select(filtered, assert_true(filtered$age < 20)))$age, c(NULL))
+  expect_equal(collect(select(filtered, assert_true(filtered$age < 20, "error message")))$age,
+               c(NULL))
+  expect_equal(collect(select(filtered, assert_true(filtered$age < 20, filtered$name)))$age,
+               c(NULL))
+  expect_error(collect(select(df, assert_true(df$age < 20))), "is not true!")
+  expect_error(collect(select(df, assert_true(df$age < 20, "error message"))),
+               "error message")
+  expect_error(collect(select(df, assert_true(df$age < 20, df$name))), "Michael")
+
+  expect_error(collect(select(filtered, raise_error("error message"))), "error message")
+  expect_error(collect(select(filtered, raise_error(filtered$name))), "Justin")
+})
+
 compare_list <- function(list1, list2) {
   # get testthat to show the diff by first making the 2 lists equal in length
   expect_equal(length(list1), length(list2))
diff --git a/python/docs/source/reference/pyspark.sql.rst b/python/docs/source/reference/pyspark.sql.rst
index 692d098c89cdc..0ed2f1b86ada5 100644
--- a/python/docs/source/reference/pyspark.sql.rst
+++ b/python/docs/source/reference/pyspark.sql.rst
@@ -292,6 +292,7 @@ Functions
     asc_nulls_last
     ascii
     asin
+    assert_true
     atan
     atan2
     avg
@@ -420,6 +421,7 @@ Functions
     pow
     quarter
     radians
+    raise_error
     rand
     randn
     rank
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 7007d505d048d..97146fdb804ab 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1592,6 +1592,57 @@ def xxhash64(*cols):
     return Column(jc)
 
 
+@since(3.1)
+def assert_true(col, errMsg=None):
+    """
+    Returns null if the input column is true; throws an exception with the provided error message
+    otherwise.
+
+    >>> df = spark.createDataFrame([(0,1)], ['a', 'b'])
+    >>> df.select(assert_true(df.a < df.b).alias('r')).collect()
+    [Row(r=None)]
+    >>> df = spark.createDataFrame([(0,1)], ['a', 'b'])
+    >>> df.select(assert_true(df.a < df.b, df.a).alias('r')).collect()
+    [Row(r=None)]
+    >>> df = spark.createDataFrame([(0,1)], ['a', 'b'])
+    >>> df.select(assert_true(df.a < df.b, 'error').alias('r')).collect()
+    [Row(r=None)]
+    """
+    sc = SparkContext._active_spark_context
+    if errMsg is None:
+        return Column(sc._jvm.functions.assert_true(_to_java_column(col)))
+    if not isinstance(errMsg, (str, Column)):
+        raise TypeError(
+            "errMsg should be a Column or a str, got {}".format(type(errMsg))
+        )
+
+    errMsg = (
+        _create_column_from_literal(errMsg)
+        if isinstance(errMsg, str)
+        else _to_java_column(errMsg)
+    )
+    return Column(sc._jvm.functions.assert_true(_to_java_column(col), errMsg))
+
+
+@since(3.1)
+def raise_error(errMsg):
+    """
+    Throws an exception with the provided error message.
+    """
+    if not isinstance(errMsg, (str, Column)):
+        raise TypeError(
+            "errMsg should be a Column or a str, got {}".format(type(errMsg))
+        )
+
+    sc = SparkContext._active_spark_context
+    errMsg = (
+        _create_column_from_literal(errMsg)
+        if isinstance(errMsg, str)
+        else _to_java_column(errMsg)
+    )
+    return Column(sc._jvm.functions.raise_error(errMsg))
+
+
 # ---------------------- String/Binary functions ------------------------------
 
 _string_functions = {
@@ -3448,14 +3499,14 @@ def bucket(numBuckets, col):
     ... ).createOrReplace()
 
     .. warning::
-        This function can be used only in combinatiion with
+        This function can be used only in combination with
         :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy`
         method of the `DataFrameWriterV2`.
 
     """
     if not isinstance(numBuckets, (int, Column)):
         raise TypeError(
-            "numBuckets should be a Column or and int, got {}".format(type(numBuckets))
+            "numBuckets should be a Column or an int, got {}".format(type(numBuckets))
         )
 
     sc = SparkContext._active_spark_context
diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi
index 8efe65205315e..6249bca5cef68 100644
--- a/python/pyspark/sql/functions.pyi
+++ b/python/pyspark/sql/functions.pyi
@@ -137,6 +137,8 @@ def sha1(col: ColumnOrName) -> Column: ...
 def sha2(col: ColumnOrName, numBits: int) -> Column: ...
 def hash(*cols: ColumnOrName) -> Column: ...
 def xxhash64(*cols: ColumnOrName) -> Column: ...
+def assert_true(col: ColumnOrName, errMsg: Union[Column, str] = ...): ...
+def raise_error(errMsg: Union[Column, str]): ...
 def concat(*cols: ColumnOrName) -> Column: ...
 def concat_ws(sep: str, *cols: ColumnOrName) -> Column: ...
 def decode(col: ColumnOrName, charset: str) -> Column: ...
diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py
index 8d05ed28b8d4e..26d260fe77b0c 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -19,6 +19,7 @@
 from itertools import chain
 import re
 
+from py4j.protocol import Py4JJavaError
 from pyspark.sql import Row, Window
 from pyspark.sql.functions import udf, input_file_name, col, percentile_approx, lit
 from pyspark.testing.sqlutils import ReusedSQLTestCase
@@ -524,6 +525,55 @@ def test_datetime_functions(self):
         parse_result = df.select(functions.to_date(functions.col("dateCol"))).first()
         self.assertEquals(date(2017, 1, 22), parse_result['to_date(dateCol)'])
 
+    def test_assert_true(self):
+        from pyspark.sql.functions import assert_true
+
+        df = self.spark.range(3)
+
+        self.assertEquals(
+            df.select(assert_true(df.id < 3)).toDF("val").collect(),
+            [Row(val=None), Row(val=None), Row(val=None)],
+        )
+
+        with self.assertRaises(Py4JJavaError) as cm:
+            df.select(assert_true(df.id < 2, 'too big')).toDF("val").collect()
+        self.assertIn("java.lang.RuntimeException", str(cm.exception))
+        self.assertIn("too big", str(cm.exception))
+
+        with self.assertRaises(Py4JJavaError) as cm:
+            df.select(assert_true(df.id < 2, df.id * 1e6)).toDF("val").collect()
+        self.assertIn("java.lang.RuntimeException", str(cm.exception))
+        self.assertIn("2000000", str(cm.exception))
+
+        with self.assertRaises(TypeError) as cm:
+            df.select(assert_true(df.id < 2, 5))
+        self.assertEquals(
+            "errMsg should be a Column or a str, got <class 'int'>",
+            str(cm.exception)
+        )
+
+    def test_raise_error(self):
+        from pyspark.sql.functions import raise_error
+
+        df = self.spark.createDataFrame([Row(id="foobar")])
+
+        with self.assertRaises(Py4JJavaError) as cm:
+            df.select(raise_error(df.id)).collect()
+        self.assertIn("java.lang.RuntimeException", str(cm.exception))
+        self.assertIn("foobar", str(cm.exception))
+
+        with self.assertRaises(Py4JJavaError) as cm:
+            df.select(raise_error("barfoo")).collect()
+        self.assertIn("java.lang.RuntimeException", str(cm.exception))
+        self.assertIn("barfoo", str(cm.exception))
+
+        with self.assertRaises(TypeError) as cm:
+            df.select(raise_error(None))
+        self.assertEquals(
+            "errMsg should be a Column or a str, got <class 'NoneType'>",
+            str(cm.exception)
+        )
+
 
 if __name__ == "__main__":
     import unittest
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 3fae34cbf00c2..508239077a70e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -479,6 +479,7 @@ object FunctionRegistry {
 
     // misc functions
     expression[AssertTrue]("assert_true"),
+    expression[RaiseError]("raise_error"),
     expression[Crc32]("crc32"),
     expression[Md5]("md5"),
     expression[Uuid]("uuid"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index 1eec26c8e987a..4e71c8c103889 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -53,51 +53,81 @@ case class PrintToStderr(child: Expression) extends UnaryExpression {
 }
 
 /**
- * A function throws an exception if 'condition' is not true.
+ * Throw with the result of an expression (used for debugging).
  */
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Throws an exception if `expr` is not true.",
+  usage = "_FUNC_(expr) - Throws an exception with `expr`.",
   examples = """
     Examples:
-      > SELECT _FUNC_(0 < 1);
-       NULL
+      > SELECT _FUNC_('custom error message');
+       java.lang.RuntimeException
+       custom error message
   """,
-  since = "2.0.0")
-case class AssertTrue(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
+  since = "3.1.0")
+case class RaiseError(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
+  override def foldable: Boolean = false
   override def nullable: Boolean = true
-
-  override def inputTypes: Seq[DataType] = Seq(BooleanType)
-
   override def dataType: DataType = NullType
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringType)
 
-  override def prettyName: String = "assert_true"
+  override def prettyName: String = "raise_error"
 
-  private val errMsg = s"'${child.simpleString(SQLConf.get.maxToStringFields)}' is not true!"
-
-  override def eval(input: InternalRow) : Any = {
-    val v = child.eval(input)
-    if (v == null || java.lang.Boolean.FALSE.equals(v)) {
-      throw new RuntimeException(errMsg)
-    } else {
-      null
+  override def eval(input: InternalRow): Any = {
+    val value = child.eval(input)
+    if (value == null) {
+      throw new RuntimeException()
     }
+    throw new RuntimeException(value.toString)
   }
 
+  // if (true) is to avoid codegen compilation exception that statement is unreachable
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val eval = child.genCode(ctx)
+    ExprCode(
+      code = code"""${eval.code}
+        |if (true) {
+        |  if (${eval.isNull}) {
+        |    throw new RuntimeException();
+        |  }
+        |  throw new RuntimeException(${eval.value}.toString());
+        |}""".stripMargin,
+      isNull = TrueLiteral,
+      value = JavaCode.defaultLiteral(dataType)
+    )
+  }
+}
 
-    // Use unnamed reference that doesn't create a local field here to reduce the number of fields
-    // because errMsgField is used only when the value is null or false.
-    val errMsgField = ctx.addReferenceObj("errMsg", errMsg)
-    ExprCode(code = code"""${eval.code}
-       |if (${eval.isNull} || !${eval.value}) {
-       |  throw new RuntimeException($errMsgField);
-       |}""".stripMargin, isNull = TrueLiteral,
-      value = JavaCode.defaultLiteral(dataType))
+/**
+ * A function that throws an exception if 'condition' is not true.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr) - Throws an exception if `expr` is not true.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(0 < 1);
+       NULL
+  """,
+  since = "2.0.0")
+case class AssertTrue(left: Expression, right: Expression, child: Expression)
+  extends RuntimeReplaceable {
+
+  override def prettyName: String = "assert_true"
+
+  def this(left: Expression, right: Expression) = {
+    this(left, right, If(left, Literal(null), RaiseError(right)))
   }
 
-  override def sql: String = s"assert_true(${child.sql})"
+  def this(left: Expression) = {
+    this(left, Literal(s"'${left.simpleString(SQLConf.get.maxToStringFields)}' is not true!"))
+  }
+
+  override def flatArguments: Iterator[Any] = Iterator(left, right)
+  override def exprsReplaced: Seq[Expression] = Seq(left, right)
+}
+
+object AssertTrue {
+  def apply(left: Expression): AssertTrue = new AssertTrue(left)
 }
 
 /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
index f1de63adc3d9a..adaabfe4d32bb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
@@ -332,7 +332,7 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("SPARK-17160: field names are properly escaped by AssertTrue") {
-    GenerateUnsafeProjection.generate(AssertTrue(Cast(Literal("\""), BooleanType)) :: Nil)
+    GenerateUnsafeProjection.generate(AssertTrue(Cast(Literal("\""), BooleanType)).child :: Nil)
   }
 
   test("should not apply common subexpression elimination on conditional expressions") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
index 341b26ddf6575..d0b0d04d1f719 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
@@ -164,7 +164,11 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB
         val errMsg = intercept[T] {
           eval
         }.getMessage
-        if (!errMsg.contains(expectedErrMsg)) {
+        if (errMsg == null) {
+          if (expectedErrMsg != null) {
+            fail(s"Expected null error message, but `$errMsg` found")
+          }
+        } else if (!errMsg.contains(expectedErrMsg)) {
           fail(s"Expected error message is `$expectedErrMsg`, but `$errMsg` found")
         }
       }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscExpressionsSuite.scala
index 4b2d153a28cc8..d42081024c1dd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscExpressionsSuite.scala
@@ -26,21 +26,21 @@ import org.apache.spark.sql.types._
 
 class MiscExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
-  test("assert_true") {
-    intercept[RuntimeException] {
-      checkEvaluation(AssertTrue(Literal.create(false, BooleanType)), null)
-    }
-    intercept[RuntimeException] {
-      checkEvaluation(AssertTrue(Cast(Literal(0), BooleanType)), null)
-    }
-    intercept[RuntimeException] {
-      checkEvaluation(AssertTrue(Literal.create(null, NullType)), null)
-    }
-    intercept[RuntimeException] {
-      checkEvaluation(AssertTrue(Literal.create(null, BooleanType)), null)
-    }
-    checkEvaluation(AssertTrue(Literal.create(true, BooleanType)), null)
-    checkEvaluation(AssertTrue(Cast(Literal(1), BooleanType)), null)
+  test("RaiseError") {
+    checkExceptionInExpression[RuntimeException](
+      RaiseError(Literal("error message")),
+      EmptyRow,
+      "error message"
+    )
+
+    checkExceptionInExpression[RuntimeException](
+      RaiseError(Literal.create(null, StringType)),
+      EmptyRow,
+      null
+    )
+
+    // Expects a string
+    assert(RaiseError(Literal(5)).checkInputDataTypes().isFailure)
   }
 
   test("uuid") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 2c545fe762b6d..2efe5aae09709 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2318,6 +2318,36 @@ object functions {
     new XxHash64(cols.map(_.expr))
   }
 
+  /**
+   * Returns null if the condition is true, and throws an exception otherwise.
+   *
+   * @group misc_funcs
+   * @since 3.1.0
+   */
+  def assert_true(c: Column): Column = withExpr {
+    new AssertTrue(c.expr)
+  }
+
+  /**
+   * Returns null if the condition is true; throws an exception with the error message otherwise.
+   *
+   * @group misc_funcs
+   * @since 3.1.0
+   */
+  def assert_true(c: Column, e: Column): Column = withExpr {
+    new AssertTrue(c.expr, e.expr)
+  }
+
+  /**
+   * Throws an exception with the provided error message.
+   *
+   * @group misc_funcs
+   * @since 3.1.0
+   */
+  def raise_error(c: Column): Column = withExpr {
+    RaiseError(c.expr)
+  }
+
   //////////////////////////////////////////////////////////////////////////////////////////////
   // String functions
   //////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 473204c182a69..1675fb1cc7c62 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -1,6 +1,6 @@
 <!-- Automatically generated by ExpressionsSchemaSuite -->
 ## Summary
-  - Number of queries: 340
+  - Number of queries: 341
   - Number of expressions that missing example: 13
   - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint,window
 ## Schema of Built-in Functions
@@ -34,7 +34,7 @@
 | org.apache.spark.sql.catalyst.expressions.Ascii | ascii | SELECT ascii('222') | struct<ascii(222):int> |
 | org.apache.spark.sql.catalyst.expressions.Asin | asin | SELECT asin(0) | struct<ASIN(CAST(0 AS DOUBLE)):double> |
 | org.apache.spark.sql.catalyst.expressions.Asinh | asinh | SELECT asinh(0) | struct<ASINH(CAST(0 AS DOUBLE)):double> |
-| org.apache.spark.sql.catalyst.expressions.AssertTrue | assert_true | SELECT assert_true(0 < 1) | struct<assert_true((0 < 1)):null> |
+| org.apache.spark.sql.catalyst.expressions.AssertTrue | assert_true | SELECT assert_true(0 < 1) | struct<assert_true((0 < 1), '(0 < 1)' is not true!):null> |
 | org.apache.spark.sql.catalyst.expressions.Atan | atan | SELECT atan(0) | struct<ATAN(CAST(0 AS DOUBLE)):double> |
 | org.apache.spark.sql.catalyst.expressions.Atan2 | atan2 | SELECT atan2(0, 0) | struct<ATAN2(CAST(0 AS DOUBLE), CAST(0 AS DOUBLE)):double> |
 | org.apache.spark.sql.catalyst.expressions.Atanh | atanh | SELECT atanh(0) | struct<ATANH(CAST(0 AS DOUBLE)):double> |
@@ -210,6 +210,7 @@
 | org.apache.spark.sql.catalyst.expressions.Pow | power | SELECT power(2, 3) | struct<POWER(CAST(2 AS DOUBLE), CAST(3 AS DOUBLE)):double> |
 | org.apache.spark.sql.catalyst.expressions.Quarter | quarter | SELECT quarter('2016-08-31') | struct<quarter(CAST(2016-08-31 AS DATE)):int> |
 | org.apache.spark.sql.catalyst.expressions.RLike | rlike | SELECT '%SystemDrive%\Users\John' rlike '%SystemDrive%\\Users.*' | struct<%SystemDrive%UsersJohn RLIKE %SystemDrive%\Users.*:boolean> |
+| org.apache.spark.sql.catalyst.expressions.RaiseError | raise_error | SELECT raise_error('custom error message') | struct<raise_error(custom error message):null> |
 | org.apache.spark.sql.catalyst.expressions.Rand | rand | SELECT rand() | struct<rand():double> |
 | org.apache.spark.sql.catalyst.expressions.Rand | random | SELECT random() | struct<rand():double> |
 | org.apache.spark.sql.catalyst.expressions.Randn | randn | SELECT randn() | struct<randn():double> |
diff --git a/sql/core/src/test/resources/sql-tests/inputs/misc-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/misc-functions.sql
index 95f71925e9294..907ff33000d8e 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/misc-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/misc-functions.sql
@@ -8,3 +8,15 @@ select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2);
 select typeof(date '1986-05-23'),  typeof(timestamp '1986-05-23'), typeof(interval '23 days');
 select typeof(x'ABCD'), typeof('SPARK');
 select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 'b', 'spark'));
+
+-- Spark-32793: Rewrite AssertTrue with RaiseError
+SELECT assert_true(true), assert_true(boolean(1));
+SELECT assert_true(false);
+SELECT assert_true(boolean(0));
+SELECT assert_true(null);
+SELECT assert_true(boolean(null));
+SELECT assert_true(false, 'custom error message');
+
+CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v);
+SELECT raise_error('error message');
+SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc;
diff --git a/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out
index bd8ffb82ee129..bf45ec3d10215 100644
--- a/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 7
+-- Number of queries: 16
 
 
 -- !query
@@ -56,3 +56,82 @@ select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 'b',
 struct<typeof(array(1, 2)):string,typeof(map(1, 2)):string,typeof(named_struct(a, 1, b, spark)):string>
 -- !query output
 array<int>	map<int,int>	struct<a:int,b:string>
+
+
+-- !query
+SELECT assert_true(true), assert_true(boolean(1))
+-- !query schema
+struct<assert_true(true, 'true' is not true!):null,assert_true(CAST(1 AS BOOLEAN), 'cast(1 as boolean)' is not true!):null>
+-- !query output
+NULL	NULL
+
+
+-- !query
+SELECT assert_true(false)
+-- !query schema
+struct<>
+-- !query output
+java.lang.RuntimeException
+'false' is not true!
+
+
+-- !query
+SELECT assert_true(boolean(0))
+-- !query schema
+struct<>
+-- !query output
+java.lang.RuntimeException
+'cast(0 as boolean)' is not true!
+
+
+-- !query
+SELECT assert_true(null)
+-- !query schema
+struct<>
+-- !query output
+java.lang.RuntimeException
+'null' is not true!
+
+
+-- !query
+SELECT assert_true(boolean(null))
+-- !query schema
+struct<>
+-- !query output
+java.lang.RuntimeException
+'cast(null as boolean)' is not true!
+
+
+-- !query
+SELECT assert_true(false, 'custom error message')
+-- !query schema
+struct<>
+-- !query output
+java.lang.RuntimeException
+custom error message
+
+
+-- !query
+CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT raise_error('error message')
+-- !query schema
+struct<>
+-- !query output
+java.lang.RuntimeException
+error message
+
+
+-- !query
+SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc
+-- !query schema
+struct<>
+-- !query output
+java.lang.RuntimeException
+too big: 8
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index b11f4c603dfd6..937de92bcaba6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -24,6 +24,7 @@ import org.apache.hadoop.io.{LongWritable, Text}
 import org.apache.hadoop.mapreduce.lib.input.{TextInputFormat => NewTextInputFormat}
 import org.scalatest.matchers.should.Matchers._
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.UpdateFieldsBenchmark._
 import org.apache.spark.sql.catalyst.expressions.{InSet, Literal, NamedExpression}
 import org.apache.spark.sql.execution.ProjectExec
@@ -2302,4 +2303,54 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
       }
     }
   }
+
+  test("assert_true") {
+    // assert_true(condition, errMsgCol)
+    val booleanDf = Seq((true), (false)).toDF("cond")
+    checkAnswer(
+      booleanDf.filter("cond = true").select(assert_true($"cond")),
+      Row(null) :: Nil
+    )
+    val e1 = intercept[SparkException] {
+      booleanDf.select(assert_true($"cond", lit(null.asInstanceOf[String]))).collect()
+    }
+    assert(e1.getCause.isInstanceOf[RuntimeException])
+    assert(e1.getCause.getMessage == null)
+
+    val nullDf = Seq(("first row", None), ("second row", Some(true))).toDF("n", "cond")
+    checkAnswer(
+      nullDf.filter("cond = true").select(assert_true($"cond", $"cond")),
+      Row(null) :: Nil
+    )
+    val e2 = intercept[SparkException] {
+      nullDf.select(assert_true($"cond", $"n")).collect()
+    }
+    assert(e2.getCause.isInstanceOf[RuntimeException])
+    assert(e2.getCause.getMessage == "first row")
+
+    // assert_true(condition)
+    val intDf = Seq((0, 1)).toDF("a", "b")
+    checkAnswer(intDf.select(assert_true($"a" < $"b")), Row(null) :: Nil)
+    val e3 = intercept[SparkException] {
+      intDf.select(assert_true($"a" > $"b")).collect()
+    }
+    assert(e3.getCause.isInstanceOf[RuntimeException])
+    assert(e3.getCause.getMessage == "'('a > 'b)' is not true!")
+  }
+
+  test("raise_error") {
+    val strDf = Seq(("hello")).toDF("a")
+
+    val e1 = intercept[SparkException] {
+      strDf.select(raise_error(lit(null.asInstanceOf[String]))).collect()
+    }
+    assert(e1.getCause.isInstanceOf[RuntimeException])
+    assert(e1.getCause.getMessage == null)
+
+    val e2 = intercept[SparkException] {
+      strDf.select(raise_error($"a")).collect()
+    }
+    assert(e2.getCause.isInstanceOf[RuntimeException])
+    assert(e2.getCause.getMessage == "hello")
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
index f487a30c8dfa3..9f62ff8301ebc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
@@ -163,7 +163,9 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
       "org.apache.spark.sql.catalyst.expressions.InputFileBlockLength",
       // The example calls methods that return unstable results.
       "org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection",
-      "org.apache.spark.sql.catalyst.expressions.SparkVersion")
+      "org.apache.spark.sql.catalyst.expressions.SparkVersion",
+      // Throws an error
+      "org.apache.spark.sql.catalyst.expressions.RaiseError")
 
     val parFuncs = new ParVector(spark.sessionState.functionRegistry.listFunction().toVector)
     parFuncs.foreach { funcId =>
@@ -197,9 +199,16 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
     val exprTypesToCheck = Seq(classOf[UnaryExpression], classOf[BinaryExpression],
       classOf[TernaryExpression], classOf[QuaternaryExpression], classOf[SeptenaryExpression])
 
-    // Do not check these expressions, because these expressions extend NullIntolerant
-    // and override the eval method to avoid evaluating input1 if input2 is 0.
-    val ignoreSet = Set(classOf[IntegralDivide], classOf[Divide], classOf[Remainder], classOf[Pmod])
+    // Do not check these expressions, because these expressions override the eval method
+    val ignoreSet = Set(
+      // Extend NullIntolerant and avoid evaluating input1 if input2 is 0
+      classOf[IntegralDivide],
+      classOf[Divide],
+      classOf[Remainder],
+      classOf[Pmod],
+      // Throws an exception, even if input is null
+      classOf[RaiseError]
+    )
 
     val candidateExprsToCheck = spark.sessionState.functionRegistry.listFunction()
       .map(spark.sessionState.catalog.lookupFunctionInfo).map(_.getClassName)

From bbc887bf73233b8c65ace05929290c0de4f63de8 Mon Sep 17 00:00:00 2001
From: Yuning Zhang <yuning.zhang@databricks.com>
Date: Thu, 8 Oct 2020 12:18:06 +0900
Subject: [PATCH 0194/1009] [SPARK-33089][SQL] make avro format propagate
 Hadoop config from DS options to underlying HDFS file system

### What changes were proposed in this pull request?

In `AvroUtils`'s `inferSchema()`, propagate Hadoop config from DS options to underlying HDFS file system.

### Why are the changes needed?

There is a bug that when running:
```scala
spark.read.format("avro").options(conf).load(path)
```
The underlying file system will not receive the `conf` options.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

unit test added

Closes #29971 from yuningzh-db/avro_options.

Authored-by: Yuning Zhang <yuning.zhang@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../scala/org/apache/spark/sql/avro/AvroUtils.scala    |  2 +-
 .../scala/org/apache/spark/sql/avro/AvroSuite.scala    | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
index 4a38970812f9d..3583b38a01333 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
@@ -43,7 +43,7 @@ private[sql] object AvroUtils extends Logging {
       spark: SparkSession,
       options: Map[String, String],
       files: Seq[FileStatus]): Option[StructType] = {
-    val conf = spark.sessionState.newHadoopConf()
+    val conf = spark.sessionState.newHadoopConfWithOptions(options)
     val parsedOptions = new AvroOptions(options, conf)
 
     if (parsedOptions.parameters.contains(ignoreExtensionKey)) {
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index b995a667be2b1..1005a274d0304 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -1802,6 +1802,16 @@ abstract class AvroSuite extends QueryTest with SharedSparkSession with NestedDa
       assert(version === SPARK_VERSION_SHORT)
     }
   }
+
+  test("SPARK-33089: should propagate Hadoop config from DS options to underlying file system") {
+    withSQLConf(
+      "fs.file.impl" -> classOf[FakeFileSystemRequiringDSOption].getName,
+      "fs.file.impl.disable.cache" -> "true") {
+      val conf = Map("ds_option" -> "value")
+      val path = "file:" + testAvro.stripPrefix("file:")
+      spark.read.format("avro").options(conf).load(path)
+    }
+  }
 }
 
 class AvroV1Suite extends AvroSuite {

From 1c781a4354666bba4329e588a0e9a9fa8980303b Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Thu, 8 Oct 2020 04:58:41 +0000
Subject: [PATCH 0195/1009] [SPARK-32282][SQL] Improve
 EnsureRquirement.reorderJoinKeys to handle more scenarios such as
 PartitioningCollection

### What changes were proposed in this pull request?

This PR proposes to improve  `EnsureRquirement.reorderJoinKeys` to handle the following scenarios:
1. If the keys cannot be reordered to match the left-side `HashPartitioning`, consider the right-side `HashPartitioning`.
2. Handle `PartitioningCollection`, which may contain `HashPartitioning`

### Why are the changes needed?

1. For the scenario 1), the current behavior matches either the left-side `HashPartitioning` or the right-side `HashPartitioning`. This means that if both sides are `HashPartitioning`, it will try to match only the left side.
The following will not consider the right-side `HashPartitioning`:
```
val df1 = (0 until 10).map(i => (i % 5, i % 13)).toDF("i1", "j1")
val df2 = (0 until 10).map(i => (i % 7, i % 11)).toDF("i2", "j2")
df1.write.format("parquet").bucketBy(4, "i1", "j1").saveAsTable("t1")df2.write.format("parquet").bucketBy(4, "i2", "j2").saveAsTable("t2")
val t1 = spark.table("t1")
val t2 = spark.table("t2")
val join = t1.join(t2, t1("i1") === t2("j2") && t1("i1") === t2("i2"))
 join.explain

== Physical Plan ==
*(5) SortMergeJoin [i1#26, i1#26], [j2#31, i2#30], Inner
:- *(2) Sort [i1#26 ASC NULLS FIRST, i1#26 ASC NULLS FIRST], false, 0
:  +- Exchange hashpartitioning(i1#26, i1#26, 4), true, [id=#69]
:     +- *(1) Project [i1#26, j1#27]
:        +- *(1) Filter isnotnull(i1#26)
:           +- *(1) ColumnarToRow
:              +- FileScan parquet default.t1[i1#26,j1#27] Batched: true, DataFilters: [isnotnull(i1#26)], Format: Parquet, Location: InMemoryFileIndex[..., PartitionFilters: [], PushedFilters: [IsNotNull(i1)], ReadSchema: struct<i1:int,j1:int>, SelectedBucketsCount: 4 out of 4
+- *(4) Sort [j2#31 ASC NULLS FIRST, i2#30 ASC NULLS FIRST], false, 0.
   +- Exchange hashpartitioning(j2#31, i2#30, 4), true, [id=#79].       <===== This can be removed
      +- *(3) Project [i2#30, j2#31]
         +- *(3) Filter (((j2#31 = i2#30) AND isnotnull(j2#31)) AND isnotnull(i2#30))
            +- *(3) ColumnarToRow
               +- FileScan parquet default.t2[i2#30,j2#31] Batched: true, DataFilters: [(j2#31 = i2#30), isnotnull(j2#31), isnotnull(i2#30)], Format: Parquet, Location: InMemoryFileIndex[..., PartitionFilters: [], PushedFilters: [IsNotNull(j2), IsNotNull(i2)], ReadSchema: struct<i2:int,j2:int>, SelectedBucketsCount: 4 out of 4

```

2.  For the scenario 2), the current behavior does not handle `PartitioningCollection`:
```
val df1 = (0 until 100).map(i => (i % 5, i % 13)).toDF("i1", "j1")
val df2 = (0 until 100).map(i => (i % 7, i % 11)).toDF("i2", "j2")
val df3 = (0 until 100).map(i => (i % 5, i % 13)).toDF("i3", "j3")
val join = df1.join(df2, df1("i1") === df2("i2") && df1("j1") === df2("j2")) // PartitioningCollection
val join2 = join.join(df3, join("j1") === df3("j3") && join("i1") === df3("i3"))
join2.explain

== Physical Plan ==
*(9) SortMergeJoin [j1#8, i1#7], [j3#30, i3#29], Inner
:- *(6) Sort [j1#8 ASC NULLS FIRST, i1#7 ASC NULLS FIRST], false, 0.       <===== This can be removed
:  +- Exchange hashpartitioning(j1#8, i1#7, 5), true, [id=#58]             <===== This can be removed
:     +- *(5) SortMergeJoin [i1#7, j1#8], [i2#18, j2#19], Inner
:        :- *(2) Sort [i1#7 ASC NULLS FIRST, j1#8 ASC NULLS FIRST], false, 0
:        :  +- Exchange hashpartitioning(i1#7, j1#8, 5), true, [id=#45]
:        :     +- *(1) Project [_1#2 AS i1#7, _2#3 AS j1#8]
:        :        +- *(1) LocalTableScan [_1#2, _2#3]
:        +- *(4) Sort [i2#18 ASC NULLS FIRST, j2#19 ASC NULLS FIRST], false, 0
:           +- Exchange hashpartitioning(i2#18, j2#19, 5), true, [id=#51]
:              +- *(3) Project [_1#13 AS i2#18, _2#14 AS j2#19]
:                 +- *(3) LocalTableScan [_1#13, _2#14]
+- *(8) Sort [j3#30 ASC NULLS FIRST, i3#29 ASC NULLS FIRST], false, 0
   +- Exchange hashpartitioning(j3#30, i3#29, 5), true, [id=#64]
      +- *(7) Project [_1#24 AS i3#29, _2#25 AS j3#30]
         +- *(7) LocalTableScan [_1#24, _2#25]
```
### Does this PR introduce _any_ user-facing change?

Yes, now from the above examples, the shuffle/sort nodes pointed by `This can be removed` are now removed:
1. Senario 1):
```
== Physical Plan ==
*(4) SortMergeJoin [i1#26, i1#26], [i2#30, j2#31], Inner
:- *(2) Sort [i1#26 ASC NULLS FIRST, i1#26 ASC NULLS FIRST], false, 0
:  +- Exchange hashpartitioning(i1#26, i1#26, 4), true, [id=#67]
:     +- *(1) Project [i1#26, j1#27]
:        +- *(1) Filter isnotnull(i1#26)
:           +- *(1) ColumnarToRow
:              +- FileScan parquet default.t1[i1#26,j1#27] Batched: true, DataFilters: [isnotnull(i1#26)], Format: Parquet, Location: InMemoryFileIndex[..., PartitionFilters: [], PushedFilters: [IsNotNull(i1)], ReadSchema: struct<i1:int,j1:int>, SelectedBucketsCount: 4 out of 4
+- *(3) Sort [i2#30 ASC NULLS FIRST, j2#31 ASC NULLS FIRST], false, 0
   +- *(3) Project [i2#30, j2#31]
      +- *(3) Filter (((j2#31 = i2#30) AND isnotnull(j2#31)) AND isnotnull(i2#30))
         +- *(3) ColumnarToRow
            +- FileScan parquet default.t2[i2#30,j2#31] Batched: true, DataFilters: [(j2#31 = i2#30), isnotnull(j2#31), isnotnull(i2#30)], Format: Parquet, Location: InMemoryFileIndex[..., PartitionFilters: [], PushedFilters: [IsNotNull(j2), IsNotNull(i2)], ReadSchema: struct<i2:int,j2:int>, SelectedBucketsCount: 4 out of 4
```
2. Scenario 2):
```
== Physical Plan ==
*(8) SortMergeJoin [i1#7, j1#8], [i3#29, j3#30], Inner
:- *(5) SortMergeJoin [i1#7, j1#8], [i2#18, j2#19], Inner
:  :- *(2) Sort [i1#7 ASC NULLS FIRST, j1#8 ASC NULLS FIRST], false, 0
:  :  +- Exchange hashpartitioning(i1#7, j1#8, 5), true, [id=#43]
:  :     +- *(1) Project [_1#2 AS i1#7, _2#3 AS j1#8]
:  :        +- *(1) LocalTableScan [_1#2, _2#3]
:  +- *(4) Sort [i2#18 ASC NULLS FIRST, j2#19 ASC NULLS FIRST], false, 0
:     +- Exchange hashpartitioning(i2#18, j2#19, 5), true, [id=#49]
:        +- *(3) Project [_1#13 AS i2#18, _2#14 AS j2#19]
:           +- *(3) LocalTableScan [_1#13, _2#14]
+- *(7) Sort [i3#29 ASC NULLS FIRST, j3#30 ASC NULLS FIRST], false, 0
   +- Exchange hashpartitioning(i3#29, j3#30, 5), true, [id=#58]
      +- *(6) Project [_1#24 AS i3#29, _2#25 AS j3#30]
         +- *(6) LocalTableScan [_1#24, _2#25]
```

### How was this patch tested?

Added tests.

Closes #29074 from imback82/reorder_keys.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../exchange/EnsureRequirements.scala         |  58 +++++++--
 .../exchange/EnsureRequirementsSuite.scala    | 122 ++++++++++++++++++
 2 files changed, 168 insertions(+), 12 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
index b176598ed8c2c..3641654b89b76 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
@@ -135,9 +135,14 @@ case class EnsureRequirements(conf: SQLConf) extends Rule[SparkPlan] {
       leftKeys: IndexedSeq[Expression],
       rightKeys: IndexedSeq[Expression],
       expectedOrderOfKeys: Seq[Expression],
-      currentOrderOfKeys: Seq[Expression]): (Seq[Expression], Seq[Expression]) = {
+      currentOrderOfKeys: Seq[Expression]): Option[(Seq[Expression], Seq[Expression])] = {
     if (expectedOrderOfKeys.size != currentOrderOfKeys.size) {
-      return (leftKeys, rightKeys)
+      return None
+    }
+
+    // Check if the current order already satisfies the expected order.
+    if (expectedOrderOfKeys.zip(currentOrderOfKeys).forall(p => p._1.semanticEquals(p._2))) {
+      return Some(leftKeys, rightKeys)
     }
 
     // Build a lookup between an expression and the positions its holds in the current key seq.
@@ -164,10 +169,10 @@ case class EnsureRequirements(conf: SQLConf) extends Rule[SparkPlan] {
           rightKeysBuffer += rightKeys(index)
         case _ =>
           // The expression cannot be found, or we have exhausted all indices for that expression.
-          return (leftKeys, rightKeys)
+          return None
       }
     }
-    (leftKeysBuffer.toSeq, rightKeysBuffer.toSeq)
+    Some(leftKeysBuffer.toSeq, rightKeysBuffer.toSeq)
   }
 
   private def reorderJoinKeys(
@@ -176,19 +181,48 @@ case class EnsureRequirements(conf: SQLConf) extends Rule[SparkPlan] {
       leftPartitioning: Partitioning,
       rightPartitioning: Partitioning): (Seq[Expression], Seq[Expression]) = {
     if (leftKeys.forall(_.deterministic) && rightKeys.forall(_.deterministic)) {
-      (leftPartitioning, rightPartitioning) match {
-        case (HashPartitioning(leftExpressions, _), _) =>
-          reorder(leftKeys.toIndexedSeq, rightKeys.toIndexedSeq, leftExpressions, leftKeys)
-        case (_, HashPartitioning(rightExpressions, _)) =>
-          reorder(leftKeys.toIndexedSeq, rightKeys.toIndexedSeq, rightExpressions, rightKeys)
-        case _ =>
-          (leftKeys, rightKeys)
-      }
+      reorderJoinKeysRecursively(
+        leftKeys,
+        rightKeys,
+        Some(leftPartitioning),
+        Some(rightPartitioning))
+        .getOrElse((leftKeys, rightKeys))
     } else {
       (leftKeys, rightKeys)
     }
   }
 
+  /**
+   * Recursively reorders the join keys based on partitioning. It starts reordering the
+   * join keys to match HashPartitioning on either side, followed by PartitioningCollection.
+   */
+  private def reorderJoinKeysRecursively(
+      leftKeys: Seq[Expression],
+      rightKeys: Seq[Expression],
+      leftPartitioning: Option[Partitioning],
+      rightPartitioning: Option[Partitioning]): Option[(Seq[Expression], Seq[Expression])] = {
+    (leftPartitioning, rightPartitioning) match {
+      case (Some(HashPartitioning(leftExpressions, _)), _) =>
+        reorder(leftKeys.toIndexedSeq, rightKeys.toIndexedSeq, leftExpressions, leftKeys)
+          .orElse(reorderJoinKeysRecursively(
+            leftKeys, rightKeys, None, rightPartitioning))
+      case (_, Some(HashPartitioning(rightExpressions, _))) =>
+        reorder(leftKeys.toIndexedSeq, rightKeys.toIndexedSeq, rightExpressions, rightKeys)
+          .orElse(reorderJoinKeysRecursively(
+            leftKeys, rightKeys, leftPartitioning, None))
+      case (Some(PartitioningCollection(partitionings)), _) =>
+        partitionings.foldLeft(Option.empty[(Seq[Expression], Seq[Expression])]) { (res, p) =>
+          res.orElse(reorderJoinKeysRecursively(leftKeys, rightKeys, Some(p), rightPartitioning))
+        }.orElse(reorderJoinKeysRecursively(leftKeys, rightKeys, None, rightPartitioning))
+      case (_, Some(PartitioningCollection(partitionings))) =>
+        partitionings.foldLeft(Option.empty[(Seq[Expression], Seq[Expression])]) { (res, p) =>
+          res.orElse(reorderJoinKeysRecursively(leftKeys, rightKeys, leftPartitioning, Some(p)))
+        }.orElse(None)
+      case _ =>
+        None
+    }
+  }
+
   /**
    * When the physical operators are created for JOIN, the ordering of join keys is based on order
    * in which the join keys appear in the user query. That might not match with the output
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala
new file mode 100644
index 0000000000000..38e68cd2512e7
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.exchange
+
+import org.apache.spark.sql.catalyst.expressions.Literal
+import org.apache.spark.sql.catalyst.plans.Inner
+import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, PartitioningCollection}
+import org.apache.spark.sql.execution.{DummySparkPlan, SortExec}
+import org.apache.spark.sql.execution.joins.SortMergeJoinExec
+import org.apache.spark.sql.test.SharedSparkSession
+
+class EnsureRequirementsSuite extends SharedSparkSession {
+  private val exprA = Literal(1)
+  private val exprB = Literal(2)
+  private val exprC = Literal(3)
+
+  test("reorder should handle PartitioningCollection") {
+    val plan1 = DummySparkPlan(
+      outputPartitioning = PartitioningCollection(Seq(
+        HashPartitioning(exprA :: exprB :: Nil, 5),
+        HashPartitioning(exprA :: Nil, 5))))
+    val plan2 = DummySparkPlan()
+
+    // Test PartitioningCollection on the left side of join.
+    val smjExec1 = SortMergeJoinExec(
+      exprB :: exprA :: Nil, exprA :: exprB :: Nil, Inner, None, plan1, plan2)
+    EnsureRequirements(spark.sessionState.conf).apply(smjExec1) match {
+      case SortMergeJoinExec(leftKeys, rightKeys, _, _,
+        SortExec(_, _, DummySparkPlan(_, _, _: PartitioningCollection, _, _), _),
+        SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _), _) =>
+        assert(leftKeys === Seq(exprA, exprB))
+        assert(rightKeys === Seq(exprB, exprA))
+      case other => fail(other.toString)
+    }
+
+    // Test PartitioningCollection on the right side of join.
+    val smjExec2 = SortMergeJoinExec(
+      exprA :: exprB :: Nil, exprB :: exprA :: Nil, Inner, None, plan2, plan1)
+    EnsureRequirements(spark.sessionState.conf).apply(smjExec2) match {
+      case SortMergeJoinExec(leftKeys, rightKeys, _, _,
+        SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _),
+        SortExec(_, _, DummySparkPlan(_, _, _: PartitioningCollection, _, _), _), _) =>
+        assert(leftKeys === Seq(exprB, exprA))
+        assert(rightKeys === Seq(exprA, exprB))
+      case other => fail(other.toString)
+    }
+
+    // Both sides are PartitioningCollection, but left side cannot be reorderd to match
+    // and it should fall back to the right side.
+    val smjExec3 = SortMergeJoinExec(
+      exprA :: exprC :: Nil, exprB :: exprA :: Nil, Inner, None, plan1, plan1)
+    EnsureRequirements(spark.sessionState.conf).apply(smjExec3) match {
+      case SortMergeJoinExec(leftKeys, rightKeys, _, _,
+        SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _),
+        SortExec(_, _, DummySparkPlan(_, _, _: PartitioningCollection, _, _), _), _) =>
+        assert(leftKeys === Seq(exprC, exprA))
+        assert(rightKeys === Seq(exprA, exprB))
+      case other => fail(other.toString)
+    }
+  }
+
+  test("reorder should fallback to the other side partitioning") {
+    val plan1 = DummySparkPlan(
+      outputPartitioning = HashPartitioning(exprA :: exprB :: exprC :: Nil, 5))
+    val plan2 = DummySparkPlan(
+      outputPartitioning = HashPartitioning(exprB :: exprC :: Nil, 5))
+
+    // Test fallback to the right side, which has HashPartitioning.
+    val smjExec1 = SortMergeJoinExec(
+      exprA :: exprB :: Nil, exprC :: exprB :: Nil, Inner, None, plan1, plan2)
+    EnsureRequirements(spark.sessionState.conf).apply(smjExec1) match {
+      case SortMergeJoinExec(leftKeys, rightKeys, _, _,
+        SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _),
+        SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _), _) =>
+        assert(leftKeys === Seq(exprB, exprA))
+        assert(rightKeys === Seq(exprB, exprC))
+      case other => fail(other.toString)
+    }
+
+    // Test fallback to the right side, which has PartitioningCollection.
+    val plan3 = DummySparkPlan(
+      outputPartitioning = PartitioningCollection(Seq(HashPartitioning(exprB :: exprC :: Nil, 5))))
+    val smjExec2 = SortMergeJoinExec(
+      exprA :: exprB :: Nil, exprC :: exprB :: Nil, Inner, None, plan1, plan3)
+    EnsureRequirements(spark.sessionState.conf).apply(smjExec2) match {
+      case SortMergeJoinExec(leftKeys, rightKeys, _, _,
+        SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _),
+        SortExec(_, _, DummySparkPlan(_, _, _: PartitioningCollection, _, _), _), _) =>
+        assert(leftKeys === Seq(exprB, exprA))
+        assert(rightKeys === Seq(exprB, exprC))
+      case other => fail(other.toString)
+    }
+
+    // The right side has HashPartitioning, so it is matched first, but no reordering match is
+    // found, and it should fall back to the left side, which has a PartitioningCollection.
+    val smjExec3 = SortMergeJoinExec(
+      exprC :: exprB :: Nil, exprA :: exprB :: Nil, Inner, None, plan3, plan1)
+    EnsureRequirements(spark.sessionState.conf).apply(smjExec3) match {
+      case SortMergeJoinExec(leftKeys, rightKeys, _, _,
+        SortExec(_, _, DummySparkPlan(_, _, _: PartitioningCollection, _, _), _),
+        SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _), _) =>
+        assert(leftKeys === Seq(exprB, exprC))
+        assert(rightKeys === Seq(exprB, exprA))
+      case other => fail(other.toString)
+    }
+  }
+}

From 7d6e3fb998021b4873f3bee8a8218d2504ed88a0 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Thu, 8 Oct 2020 05:28:33 +0000
Subject: [PATCH 0196/1009] [SPARK-33074][SQL] Classify dialect exceptions in
 JDBC v2 Table Catalog

### What changes were proposed in this pull request?
1. Add new method to the `JdbcDialect` class - `classifyException()`. It converts dialect specific exception to Spark's `AnalysisException` or its sub-classes.
2. Replace H2 exception  `org.h2.jdbc.JdbcSQLException` in `JDBCTableCatalogSuite` by `AnalysisException`.
3. Add `H2Dialect`

### Why are the changes needed?
Currently JDBC v2 Table Catalog implementation throws dialect specific exception and ignores exceptions defined in the `TableCatalog` interface. This PR adds new method for converting dialect specific exception, and assumes that follow up PRs will implement `classifyException()`.

### Does this PR introduce _any_ user-facing change?
Yes.

### How was this patch tested?
By running existing test suites `JDBCTableCatalogSuite` and `JDBCV2Suite`.

Closes #29952 from MaxGekk/jdbcv2-classify-exception.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../analysis/AlreadyExistException.scala      |  3 +-
 .../analysis/NoSuchItemException.scala        |  6 ++-
 .../v2/jdbc/JDBCTableCatalog.scala            | 24 ++++++++--
 .../org/apache/spark/sql/jdbc/H2Dialect.scala | 48 +++++++++++++++++++
 .../apache/spark/sql/jdbc/JdbcDialects.scala  | 12 +++++
 .../v2/jdbc/JDBCTableCatalogSuite.scala       | 47 ++++++++++--------
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 11 +++--
 .../spark/sql/jdbc/JDBCWriteSuite.scala       | 37 +++++++-------
 8 files changed, 142 insertions(+), 46 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala
index bfc3b3d0ac966..c50ba623c27b2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala
@@ -37,7 +37,8 @@ class NamespaceAlreadyExistsException(message: String) extends AnalysisException
   }
 }
 
-class TableAlreadyExistsException(message: String) extends AnalysisException(message) {
+class TableAlreadyExistsException(message: String, cause: Option[Throwable] = None)
+  extends AnalysisException(message, cause = cause) {
   def this(db: String, table: String) = {
     this(s"Table or view '$table' already exists in database '$db'")
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala
index 88be441d808db..8a1913b40b310 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala
@@ -32,13 +32,15 @@ import org.apache.spark.sql.types.StructType
 class NoSuchDatabaseException(
     val db: String) extends NoSuchNamespaceException(s"Database '$db' not found")
 
-class NoSuchNamespaceException(message: String) extends AnalysisException(message) {
+class NoSuchNamespaceException(message: String, cause: Option[Throwable] = None)
+  extends AnalysisException(message, cause = cause) {
   def this(namespace: Array[String]) = {
     this(s"Namespace '${namespace.quoted}' not found")
   }
 }
 
-class NoSuchTableException(message: String) extends AnalysisException(message) {
+class NoSuchTableException(message: String, cause: Option[Throwable] = None)
+  extends AnalysisException(message, cause = cause) {
   def this(db: String, table: String) = {
     this(s"Table or view '$table' not found in database '$db'")
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
index 41f650d1f2ff5..8edc2fe5585e0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
@@ -70,7 +70,9 @@ class JDBCTableCatalog extends TableCatalog with Logging {
     checkNamespace(ident.namespace())
     val writeOptions = new JdbcOptionsInWrite(
       options.parameters + (JDBCOptions.JDBC_TABLE_NAME -> getTableName(ident)))
-    withConnection(JdbcUtils.tableExists(_, writeOptions))
+    classifyException(s"Failed table existence check: $ident") {
+      withConnection(JdbcUtils.tableExists(_, writeOptions))
+    }
   }
 
   override def dropTable(ident: Identifier): Boolean = {
@@ -88,7 +90,9 @@ class JDBCTableCatalog extends TableCatalog with Logging {
   override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = {
     checkNamespace(oldIdent.namespace())
     withConnection { conn =>
-      JdbcUtils.renameTable(conn, getTableName(oldIdent), getTableName(newIdent), options)
+      classifyException(s"Failed table renaming from $oldIdent to $newIdent") {
+        JdbcUtils.renameTable(conn, getTableName(oldIdent), getTableName(newIdent), options)
+      }
     }
   }
 
@@ -123,7 +127,9 @@ class JDBCTableCatalog extends TableCatalog with Logging {
       options.parameters + (JDBCOptions.JDBC_TABLE_NAME -> getTableName(ident)))
     val caseSensitive = SQLConf.get.caseSensitiveAnalysis
     withConnection { conn =>
-      JdbcUtils.createTable(conn, getTableName(ident), schema, caseSensitive, writeOptions)
+      classifyException(s"Failed table creation: $ident") {
+        JdbcUtils.createTable(conn, getTableName(ident), schema, caseSensitive, writeOptions)
+      }
     }
 
     JDBCTable(ident, schema, writeOptions)
@@ -132,7 +138,9 @@ class JDBCTableCatalog extends TableCatalog with Logging {
   override def alterTable(ident: Identifier, changes: TableChange*): Table = {
     checkNamespace(ident.namespace())
     withConnection { conn =>
-      JdbcUtils.alterTable(conn, getTableName(ident), changes, options)
+      classifyException(s"Failed table altering: $ident") {
+        JdbcUtils.alterTable(conn, getTableName(ident), changes, options)
+      }
       loadTable(ident)
     }
   }
@@ -156,4 +164,12 @@ class JDBCTableCatalog extends TableCatalog with Logging {
   private def getTableName(ident: Identifier): String = {
     (ident.namespace() :+ ident.name()).map(dialect.quoteIdentifier).mkString(".")
   }
+
+  private def classifyException[T](message: String)(f: => T): T = {
+    try {
+      f
+    } catch {
+      case e: Throwable => throw dialect.classifyException(message, e)
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
new file mode 100644
index 0000000000000..9c727957ffab8
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc
+
+import java.sql.SQLException
+import java.util.Locale
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis.{NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
+
+private object H2Dialect extends JdbcDialect {
+  override def canHandle(url: String): Boolean =
+    url.toLowerCase(Locale.ROOT).startsWith("jdbc:h2")
+
+  override def classifyException(message: String, e: Throwable): AnalysisException = {
+    if (e.isInstanceOf[SQLException]) {
+      // Error codes are from https://www.h2database.com/javadoc/org/h2/api/ErrorCode.html
+      e.asInstanceOf[SQLException].getErrorCode match {
+        // TABLE_OR_VIEW_ALREADY_EXISTS_1
+        case 42101 =>
+          throw new TableAlreadyExistsException(message, cause = Some(e))
+        // TABLE_OR_VIEW_NOT_FOUND_1
+        case 42102 =>
+          throw new NoSuchTableException(message, cause = Some(e))
+        // SCHEMA_NOT_FOUND_1
+        case 90079 =>
+          throw new NoSuchNamespaceException(message, cause = Some(e))
+        case _ =>
+      }
+    }
+    super.classifyException(message, e)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index a01720d1eefc7..5f8d788bc7a22 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -24,6 +24,7 @@ import scala.collection.mutable.ArrayBuilder
 import org.apache.commons.lang3.StringUtils
 
 import org.apache.spark.annotation.{DeveloperApi, Since}
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.connector.catalog.TableChange
 import org.apache.spark.sql.connector.catalog.TableChange._
 import org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils
@@ -253,6 +254,16 @@ abstract class JdbcDialect extends Serializable {
     val nullable = if (isNullable) "NULL" else "NOT NULL"
     s"ALTER TABLE $tableName ALTER COLUMN $columnName SET $nullable"
   }
+
+  /**
+   * Gets a dialect exception, classifies it and wraps it by `AnalysisException`.
+   * @param message The error message to be placed to the returned exception.
+   * @param e The dialect specific exception.
+   * @return `AnalysisException` or its sub-class.
+   */
+  def classifyException(message: String, e: Throwable): AnalysisException = {
+    new AnalysisException(message, cause = Some(e))
+  }
 }
 
 /**
@@ -297,6 +308,7 @@ object JdbcDialects {
   registerDialect(DerbyDialect)
   registerDialect(OracleDialect)
   registerDialect(TeradataDialect)
+  registerDialect(H2Dialect)
 
   /**
    * Fetch the JdbcDialect class corresponding to a given database url.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
index ca86a8f593621..8fe58e3a0a28a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
@@ -21,7 +21,7 @@ import java.util.Properties
 
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
-import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
+import org.apache.spark.sql.catalyst.analysis.{NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
@@ -101,15 +101,18 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
         Seq(Row("test", "dst_table"), Row("test", "people")))
     }
     // Rename not existing table or namespace
-    Seq(
-      "h2.test.not_existing_table" -> "Table \"not_existing_table\" not found",
-      "h2.bad_test.not_existing_table" -> "Schema \"bad_test\" not found"
-    ).foreach { case (table, expectedMsg) =>
-      val msg = intercept[org.h2.jdbc.JdbcSQLException] {
-        sql(s"ALTER TABLE $table RENAME TO test.dst_table")
-      }.getMessage
-      assert(msg.contains(expectedMsg))
+    val exp1 = intercept[NoSuchTableException] {
+      sql(s"ALTER TABLE h2.test.not_existing_table RENAME TO test.dst_table")
+    }
+    assert(exp1.getMessage.contains(
+      "Failed table renaming from test.not_existing_table to test.dst_table"))
+    assert(exp1.cause.get.getMessage.contains("Table \"not_existing_table\" not found"))
+    val exp2 = intercept[NoSuchNamespaceException] {
+      sql(s"ALTER TABLE h2.bad_test.not_existing_table RENAME TO test.dst_table")
     }
+    assert(exp2.getMessage.contains(
+      "Failed table renaming from bad_test.not_existing_table to test.dst_table"))
+    assert(exp2.cause.get.getMessage.contains("Schema \"bad_test\" not found"))
     // Rename to an existing table
     withTable("h2.test.dst_table") {
       withConnection { conn =>
@@ -119,10 +122,12 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
         withConnection { conn =>
           conn.prepareStatement("""CREATE TABLE "test"."src_table" (id INTEGER)""").executeUpdate()
         }
-        val msg = intercept[org.h2.jdbc.JdbcSQLException] {
+        val exp = intercept[TableAlreadyExistsException] {
           sql("ALTER TABLE h2.test.src_table RENAME TO test.dst_table")
-        }.getMessage
-        assert(msg.contains("Table \"dst_table\" already exists"))
+        }
+        assert(exp.getMessage.contains(
+          "Failed table renaming from test.src_table to test.dst_table"))
+        assert(exp.cause.get.getMessage.contains("Table \"dst_table\" already exists"))
       }
     }
   }
@@ -156,10 +161,11 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       }.getMessage
       assert(msg.contains("Table test.new_table already exists"))
     }
-    val msg = intercept[org.h2.jdbc.JdbcSQLException] {
+    val exp = intercept[NoSuchNamespaceException] {
       sql("CREATE TABLE h2.bad_test.new_table(i INT, j STRING) USING _")
-    }.getMessage
-    assert(msg.contains("Schema \"bad_test\" not found"))
+    }
+    assert(exp.getMessage.contains("Failed table creation: bad_test.new_table"))
+    assert(exp.cause.get.getMessage.contains("Schema \"bad_test\" not found"))
   }
 
   test("alter table ... add column") {
@@ -289,15 +295,16 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
   test("alter table ... update column comment not supported") {
     withTable("h2.test.alt_table") {
       sql("CREATE TABLE h2.test.alt_table (ID INTEGER) USING _")
-      val msg1 = intercept[java.sql.SQLFeatureNotSupportedException] {
+      val exp = intercept[AnalysisException] {
         sql("ALTER TABLE h2.test.alt_table ALTER COLUMN ID COMMENT 'test'")
-      }.getMessage
-      assert(msg1.contains("Unsupported TableChange"))
+      }
+      assert(exp.getMessage.contains("Failed table altering: test.alt_table"))
+      assert(exp.cause.get.getMessage.contains("Unsupported TableChange"))
       // Update comment for not existing column
-      val msg2 = intercept[AnalysisException] {
+      val msg = intercept[AnalysisException] {
         sql("ALTER TABLE h2.test.alt_table ALTER COLUMN bad_column COMMENT 'test'")
       }.getMessage
-      assert(msg2.contains("Cannot update missing field bad_column in test.alt_table"))
+      assert(msg.contains("Cannot update missing field bad_column in test.alt_table"))
     }
     // Update column comments in not existing table and namespace
     Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 7af55550a7736..f0b19071a969b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -770,9 +770,14 @@ class JDBCSuite extends QueryTest
   }
 
   test("Dialect unregister") {
-    JdbcDialects.registerDialect(testH2Dialect)
-    JdbcDialects.unregisterDialect(testH2Dialect)
-    assert(JdbcDialects.get(urlWithUserAndPass) == NoopDialect)
+    JdbcDialects.unregisterDialect(H2Dialect)
+    try {
+      JdbcDialects.registerDialect(testH2Dialect)
+      JdbcDialects.unregisterDialect(testH2Dialect)
+      assert(JdbcDialects.get(urlWithUserAndPass) == NoopDialect)
+    } finally {
+      JdbcDialects.registerDialect(H2Dialect)
+    }
   }
 
   test("Aggregated dialects") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
index 3f621e04338a3..fb46c2ff4c0ea 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
@@ -194,24 +194,29 @@ class JDBCWriteSuite extends SharedSparkSession with BeforeAndAfter {
   }
 
   test("Truncate") {
-    JdbcDialects.registerDialect(testH2Dialect)
-    val df = spark.createDataFrame(sparkContext.parallelize(arr2x2), schema2)
-    val df2 = spark.createDataFrame(sparkContext.parallelize(arr1x2), schema2)
-    val df3 = spark.createDataFrame(sparkContext.parallelize(arr2x3), schema3)
-
-    df.write.jdbc(url1, "TEST.TRUNCATETEST", properties)
-    df2.write.mode(SaveMode.Overwrite).option("truncate", true)
-      .jdbc(url1, "TEST.TRUNCATETEST", properties)
-    assert(1 === spark.read.jdbc(url1, "TEST.TRUNCATETEST", properties).count())
-    assert(2 === spark.read.jdbc(url1, "TEST.TRUNCATETEST", properties).collect()(0).length)
+    JdbcDialects.unregisterDialect(H2Dialect)
+    try {
+      JdbcDialects.registerDialect(testH2Dialect)
+      val df = spark.createDataFrame(sparkContext.parallelize(arr2x2), schema2)
+      val df2 = spark.createDataFrame(sparkContext.parallelize(arr1x2), schema2)
+      val df3 = spark.createDataFrame(sparkContext.parallelize(arr2x3), schema3)
 
-    val m = intercept[AnalysisException] {
-      df3.write.mode(SaveMode.Overwrite).option("truncate", true)
+      df.write.jdbc(url1, "TEST.TRUNCATETEST", properties)
+      df2.write.mode(SaveMode.Overwrite).option("truncate", true)
         .jdbc(url1, "TEST.TRUNCATETEST", properties)
-    }.getMessage
-    assert(m.contains("Column \"seq\" not found"))
-    assert(0 === spark.read.jdbc(url1, "TEST.TRUNCATETEST", properties).count())
-    JdbcDialects.unregisterDialect(testH2Dialect)
+      assert(1 === spark.read.jdbc(url1, "TEST.TRUNCATETEST", properties).count())
+      assert(2 === spark.read.jdbc(url1, "TEST.TRUNCATETEST", properties).collect()(0).length)
+
+      val m = intercept[AnalysisException] {
+        df3.write.mode(SaveMode.Overwrite).option("truncate", true)
+          .jdbc(url1, "TEST.TRUNCATETEST", properties)
+      }.getMessage
+      assert(m.contains("Column \"seq\" not found"))
+      assert(0 === spark.read.jdbc(url1, "TEST.TRUNCATETEST", properties).count())
+    } finally {
+      JdbcDialects.unregisterDialect(testH2Dialect)
+      JdbcDialects.registerDialect(H2Dialect)
+    }
   }
 
   test("createTableOptions") {

From 5effa8ea261ba59214afedc2853d1b248b330ca6 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Thu, 8 Oct 2020 16:29:15 +0900
Subject: [PATCH 0197/1009] [SPARK-33091][SQL] Avoid using map instead of
 foreach to avoid potential side effect at callers of
 OrcUtils.readCatalystSchema

### What changes were proposed in this pull request?

This is a kind of a followup of SPARK-32646. New JIRA was filed to control the fixed versions properly.

When you use `map`, it might be lazily evaluated and not executed. To avoid this,  we should better use `foreach`. See also SPARK-16694. Current codes look not causing any bug for now but it should be best to fix to avoid potential issues.

### Why are the changes needed?

To avoid potential issues from `map` being lazy and not executed.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Ran related tests. CI in this PR should verify.

Closes #29974 from HyukjinKwon/SPARK-32646.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../spark/sql/execution/datasources/orc/OrcFileFormat.scala     | 2 +-
 .../datasources/v2/orc/OrcPartitionReaderFactory.scala          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
index 8e9a566d45971..2671682e18f31 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
@@ -185,7 +185,7 @@ class OrcFileFormat
       } else {
         // ORC predicate pushdown
         if (orcFilterPushDown) {
-          OrcUtils.readCatalystSchema(filePath, conf, ignoreCorruptFiles).map { fileSchema =>
+          OrcUtils.readCatalystSchema(filePath, conf, ignoreCorruptFiles).foreach { fileSchema =>
             OrcFilters.createFilter(fileSchema, filters).foreach { f =>
               OrcInputFormat.setSearchArgument(conf, f, fileSchema.fieldNames)
             }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
index 1f38128e98fa5..b0ddee0a6b336 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
@@ -69,7 +69,7 @@ case class OrcPartitionReaderFactory(
 
   private def pushDownPredicates(filePath: Path, conf: Configuration): Unit = {
     if (orcFilterPushDown) {
-      OrcUtils.readCatalystSchema(filePath, conf, ignoreCorruptFiles).map { fileSchema =>
+      OrcUtils.readCatalystSchema(filePath, conf, ignoreCorruptFiles).foreach { fileSchema =>
         OrcFilters.createFilter(fileSchema, filters).foreach { f =>
           OrcInputFormat.setSearchArgument(conf, f, fileSchema.fieldNames)
         }

From 4a47b3e1103170eacf2fb910864c6db22a9a37e6 Mon Sep 17 00:00:00 2001
From: manubatham20 <manubatham2006@gmail.com>
Date: Thu, 8 Oct 2020 07:52:00 -0500
Subject: [PATCH 0198/1009] [DOC][MINOR] pySpark usage - removed repeated
 keyword causing confusion

### What changes were proposed in this pull request?
While explaining pySpark usage, use of repeated synonymous words were causing confusion.
Removed "instead of a JAR" word, to keep it more readable.

### Why are the changes needed?
To keep the docs more readable and easy to understand.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
No code changes, minor documentation change only. No tests added.

Closes #29956 from manubatham20/patch-1.

Authored-by: manubatham20 <manubatham2006@gmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 docs/submitting-applications.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/submitting-applications.md b/docs/submitting-applications.md
index b27cf36b863ee..7a0f17f5ad13f 100644
--- a/docs/submitting-applications.md
+++ b/docs/submitting-applications.md
@@ -76,7 +76,7 @@ locally on your laptop), it is common to use `cluster` mode to minimize network
 the drivers and the executors. Currently, the standalone mode does not support cluster mode for Python
 applications.
 
-For Python applications, simply pass a `.py` file in the place of `<application-jar>` instead of a JAR,
+For Python applications, simply pass a `.py` file in the place of `<application-jar>`,
 and add Python `.zip`, `.egg` or `.py` files to the search path with `--py-files`.
 
 There are a few options available that are specific to the

From 4987db8c88b49a0c0d8503b6291455e92e114efa Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Thu, 8 Oct 2020 11:50:53 -0700
Subject: [PATCH 0199/1009] [SPARK-33096][K8S] Use LinkedHashMap instead of Map
 for newlyCreatedExecutors

### What changes were proposed in this pull request?

This PR aims to use `LinkedHashMap` instead of `Map` for `newlyCreatedExecutors`.

### Why are the changes needed?

This makes log messages (INFO/DEBUG) more readable. This is helpful when `spark.kubernetes.allocation.batch.size` is large and especially when K8s dynamic allocation is used.

**BEFORE**
```
20/10/08 10:24:21 DEBUG ExecutorPodsAllocator: Executor with id 8 was not found in the Kubernetes cluster since it was created 0 milliseconds ago.
20/10/08 10:24:21 DEBUG ExecutorPodsAllocator: Executor with id 2 was not found in the Kubernetes cluster since it was created 0 milliseconds ago.
20/10/08 10:24:21 DEBUG ExecutorPodsAllocator: Executor with id 5 was not found in the Kubernetes cluster since it was created 0 milliseconds ago.
20/10/08 10:24:21 DEBUG ExecutorPodsAllocator: Executor with id 4 was not found in the Kubernetes cluster since it was created 0 milliseconds ago.
20/10/08 10:24:21 DEBUG ExecutorPodsAllocator: Executor with id 7 was not found in the Kubernetes cluster since it was created 0 milliseconds ago.
20/10/08 10:24:21 DEBUG ExecutorPodsAllocator: Executor with id 10 was not found in the Kubernetes cluster since it was created 0 milliseconds ago.
20/10/08 10:24:21 DEBUG ExecutorPodsAllocator: Executor with id 9 was not found in the Kubernetes cluster since it was created 0 milliseconds ago.
20/10/08 10:24:21 DEBUG ExecutorPodsAllocator: Executor with id 3 was not found in the Kubernetes cluster since it was created 0 milliseconds ago.
20/10/08 10:24:21 DEBUG ExecutorPodsAllocator: Executor with id 6 was not found in the Kubernetes cluster since it was created 0 milliseconds ago.
20/10/08 10:24:21 INFO ExecutorPodsAllocator: Deleting 9 excess pod requests (5,10,6,9,2,7,3,8,4).
```

**AFTER**
```
20/10/08 10:25:17 DEBUG ExecutorPodsAllocator: Executor with id 2 was not found in the Kubernetes cluster since it was created 0 milliseconds ago.
20/10/08 10:25:17 DEBUG ExecutorPodsAllocator: Executor with id 3 was not found in the Kubernetes cluster since it was created 0 milliseconds ago.
20/10/08 10:25:17 DEBUG ExecutorPodsAllocator: Executor with id 4 was not found in the Kubernetes cluster since it was created 0 milliseconds ago.
20/10/08 10:25:17 DEBUG ExecutorPodsAllocator: Executor with id 5 was not found in the Kubernetes cluster since it was created 0 milliseconds ago.
20/10/08 10:25:17 DEBUG ExecutorPodsAllocator: Executor with id 6 was not found in the Kubernetes cluster since it was created 0 milliseconds ago.
20/10/08 10:25:17 DEBUG ExecutorPodsAllocator: Executor with id 7 was not found in the Kubernetes cluster since it was created 0 milliseconds ago.
20/10/08 10:25:17 DEBUG ExecutorPodsAllocator: Executor with id 8 was not found in the Kubernetes cluster since it was created 0 milliseconds ago.
20/10/08 10:25:17 DEBUG ExecutorPodsAllocator: Executor with id 9 was not found in the Kubernetes cluster since it was created 0 milliseconds ago.
20/10/08 10:25:17 DEBUG ExecutorPodsAllocator: Executor with id 10 was not found in the Kubernetes cluster since it was created 0 milliseconds ago.
20/10/08 10:25:17 INFO ExecutorPodsAllocator: Deleting 9 excess pod requests (2,3,4,5,6,7,8,9,10).
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CI or `build/sbt -Pkubernetes "kubernetes/test"`

Closes #29979 from dongjoon-hyun/SPARK-K8S-LOG.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
index 2bf8685038cf5..774ef34f69e40 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
@@ -67,7 +67,7 @@ private[spark] class ExecutorPodsAllocator(
 
   // Executor IDs that have been requested from Kubernetes but have not been detected in any
   // snapshot yet. Mapped to the timestamp when they were created.
-  private val newlyCreatedExecutors = mutable.Map.empty[Long, Long]
+  private val newlyCreatedExecutors = mutable.LinkedHashMap.empty[Long, Long]
 
   private val dynamicAllocationEnabled = Utils.isDynamicAllocationEnabled(conf)
 

From c5f6af9f17498bb0ec393c16616f2d99e5d3ee3d Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Thu, 8 Oct 2020 11:59:30 -0700
Subject: [PATCH 0200/1009] [SPARK-33094][SQL] Make ORC format propagate Hadoop
 config from DS options to underlying HDFS file system

### What changes were proposed in this pull request?
Propagate ORC options to Hadoop configs in Hive `OrcFileFormat` and in the regular ORC datasource.

### Why are the changes needed?
There is a bug that when running:
```scala
spark.read.format("orc").options(conf).load(path)
```
The underlying file system will not receive the conf options.

### Does this PR introduce _any_ user-facing change?
Yes

### How was this patch tested?
Added UT to `OrcSourceSuite`.

Closes #29976 from MaxGekk/orc-option-propagation.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../execution/datasources/orc/OrcUtils.scala    |  6 +++---
 .../datasources/orc/OrcSourceSuite.scala        | 17 ++++++++++++++++-
 .../spark/sql/hive/orc/OrcFileFormat.scala      |  2 +-
 3 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
index 264cf8165e13b..623f4f7a54d00 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
@@ -81,10 +81,10 @@ object OrcUtils extends Logging {
     }
   }
 
-  def readSchema(sparkSession: SparkSession, files: Seq[FileStatus])
+  def readSchema(sparkSession: SparkSession, files: Seq[FileStatus], options: Map[String, String])
       : Option[StructType] = {
     val ignoreCorruptFiles = sparkSession.sessionState.conf.ignoreCorruptFiles
-    val conf = sparkSession.sessionState.newHadoopConf()
+    val conf = sparkSession.sessionState.newHadoopConfWithOptions(options)
     files.toIterator.map(file => readSchema(file.getPath, conf, ignoreCorruptFiles)).collectFirst {
       case Some(schema) =>
         logDebug(s"Reading schema from file $files, got Hive schema string: $schema")
@@ -125,7 +125,7 @@ object OrcUtils extends Logging {
       SchemaMergeUtils.mergeSchemasInParallel(
         sparkSession, options, files, OrcUtils.readOrcSchemasInParallel)
     } else {
-      OrcUtils.readSchema(sparkSession, files)
+      OrcUtils.readSchema(sparkSession, files, options)
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
index b6f41ab085fe1..1242b8c693d64 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
@@ -32,7 +32,7 @@ import org.apache.orc.impl.RecordReaderImpl
 import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.{SPARK_VERSION_SHORT, SparkException}
-import org.apache.spark.sql.{Row, SPARK_VERSION_METADATA_KEY}
+import org.apache.spark.sql.{FakeFileSystemRequiringDSOption, Row, SPARK_VERSION_METADATA_KEY}
 import org.apache.spark.sql.execution.datasources.SchemaMergeUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
@@ -537,6 +537,21 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
       }
     }
   }
+
+  test("SPARK-33094: should propagate Hadoop config from DS options to underlying file system") {
+    withSQLConf(
+      "fs.file.impl" -> classOf[FakeFileSystemRequiringDSOption].getName,
+      "fs.file.impl.disable.cache" -> "true") {
+      Seq(false, true).foreach { mergeSchema =>
+        withTempPath { dir =>
+          val path = dir.getAbsolutePath
+          val conf = Map("ds_option" -> "value", "mergeSchema" -> mergeSchema.toString)
+          spark.range(1).write.options(conf).orc(path)
+          checkAnswer(spark.read.options(conf).orc(path), Row(0))
+        }
+      }
+    }
+  }
 }
 
 class OrcSourceSuite extends OrcSuite with SharedSparkSession {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
index 356b92b4652b3..d1ee1baadcbce 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
@@ -75,7 +75,7 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
       val ignoreCorruptFiles = sparkSession.sessionState.conf.ignoreCorruptFiles
       OrcFileOperator.readSchema(
         files.map(_.getPath.toString),
-        Some(sparkSession.sessionState.newHadoopConf()),
+        Some(sparkSession.sessionState.newHadoopConfWithOptions(options)),
         ignoreCorruptFiles
       )
     }

From a9077299d769bc9569a15f6500754661111fe9ab Mon Sep 17 00:00:00 2001
From: ulysses <youxiduo@weidian.com>
Date: Fri, 9 Oct 2020 09:25:22 +0900
Subject: [PATCH 0201/1009] [SPARK-32743][SQL] Add distinct info at
 UnresolvedFunction toString

### What changes were proposed in this pull request?

Add distinct info at `UnresolvedFunction.toString`.

### Why are the changes needed?

Make `UnresolvedFunction` info complete.

```
create table test (c1 int, c2 int);
explain extended select sum(distinct c1) from test;

-- before this pr
== Parsed Logical Plan ==
'Project [unresolvedalias('sum('c1), None)]
+- 'UnresolvedRelation [test]

-- after this pr
== Parsed Logical Plan ==
'Project [unresolvedalias('sum(distinct 'c1), None)]
+- 'UnresolvedRelation [test]
```

### Does this PR introduce _any_ user-facing change?

Yes, get distinct info during sql parse.

### How was this patch tested?

manual test.

Closes #29586 from ulysses-you/SPARK-32743.

Authored-by: ulysses <youxiduo@weidian.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../sql/catalyst/analysis/unresolved.scala    |  5 ++-
 .../sql-tests/inputs/explain-aqe.sql          |  1 +
 .../resources/sql-tests/inputs/explain.sql    |  6 ++++
 .../sql-tests/results/explain-aqe.sql.out     | 33 +++++++++++++++++++
 .../sql-tests/results/explain.sql.out         | 33 +++++++++++++++++++
 5 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 9c7d572a12071..efc9e971df72a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -268,7 +268,10 @@ case class UnresolvedFunction(
   override lazy val resolved = false
 
   override def prettyName: String = name.unquotedString
-  override def toString: String = s"'$name(${children.mkString(", ")})"
+  override def toString: String = {
+    val distinct = if (isDistinct) "distinct " else ""
+    s"'$name($distinct${children.mkString(", ")})"
+  }
 }
 
 object UnresolvedFunction {
diff --git a/sql/core/src/test/resources/sql-tests/inputs/explain-aqe.sql b/sql/core/src/test/resources/sql-tests/inputs/explain-aqe.sql
index f4afa2b77a9d7..7aef901da4fb5 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/explain-aqe.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/explain-aqe.sql
@@ -1,3 +1,4 @@
 --IMPORT explain.sql
 
 --SET spark.sql.adaptive.enabled=true
+--SET spark.sql.maxMetadataStringLength = 500
diff --git a/sql/core/src/test/resources/sql-tests/inputs/explain.sql b/sql/core/src/test/resources/sql-tests/inputs/explain.sql
index 80bf258704c70..fdff1b4eef941 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/explain.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/explain.sql
@@ -1,5 +1,6 @@
 --SET spark.sql.codegen.wholeStage = true
 --SET spark.sql.adaptive.enabled = false
+--SET spark.sql.maxMetadataStringLength = 500
 
 -- Test tables
 CREATE table  explain_temp1 (key int, val int) USING PARQUET;
@@ -9,6 +10,11 @@ CREATE table  explain_temp4 (key int, val string) USING PARQUET;
 
 SET spark.sql.codegen.wholeStage = true;
 
+-- distinct func
+EXPLAIN EXTENDED
+  SELECT sum(distinct val)
+  FROM explain_temp1;
+
 -- single table
 EXPLAIN FORMATTED
   SELECT key, max(val) 
diff --git a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out
index 5435cde050fd1..567e0eabe1805 100644
--- a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out
@@ -42,6 +42,39 @@ struct<key:string,value:string>
 spark.sql.codegen.wholeStage	true
 
 
+-- !query
+EXPLAIN EXTENDED
+  SELECT sum(distinct val)
+  FROM explain_temp1
+-- !query schema
+struct<plan:string>
+-- !query output
+== Parsed Logical Plan ==
+'Project [unresolvedalias('sum(distinct 'val), None)]
++- 'UnresolvedRelation [explain_temp1], [], false
+
+== Analyzed Logical Plan ==
+sum(DISTINCT val): bigint
+Aggregate [sum(distinct cast(val#x as bigint)) AS sum(DISTINCT val)#xL]
++- SubqueryAlias spark_catalog.default.explain_temp1
+   +- Relation[key#x,val#x] parquet
+
+== Optimized Logical Plan ==
+Aggregate [sum(distinct cast(val#x as bigint)) AS sum(DISTINCT val)#xL]
++- Project [val#x]
+   +- Relation[key#x,val#x] parquet
+
+== Physical Plan ==
+AdaptiveSparkPlan isFinalPlan=false
++- HashAggregate(keys=[], functions=[sum(distinct cast(val#x as bigint)#xL)], output=[sum(DISTINCT val)#xL])
+   +- Exchange SinglePartition, true, [id=#x]
+      +- HashAggregate(keys=[], functions=[partial_sum(distinct cast(val#x as bigint)#xL)], output=[sum#xL])
+         +- HashAggregate(keys=[cast(val#x as bigint)#xL], functions=[], output=[cast(val#x as bigint)#xL])
+            +- Exchange hashpartitioning(cast(val#x as bigint)#xL, 4), true, [id=#x]
+               +- HashAggregate(keys=[cast(val#x as bigint) AS cast(val#x as bigint)#xL], functions=[], output=[cast(val#x as bigint)#xL])
+                  +- FileScan parquet default.explain_temp1[val#x] Batched: true, DataFilters: [], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/explain_temp1], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<val:int>
+
+
 -- !query
 EXPLAIN FORMATTED
   SELECT key, max(val) 
diff --git a/sql/core/src/test/resources/sql-tests/results/explain.sql.out b/sql/core/src/test/resources/sql-tests/results/explain.sql.out
index 6b3b71f85ced2..fcd69549f2c6e 100644
--- a/sql/core/src/test/resources/sql-tests/results/explain.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/explain.sql.out
@@ -42,6 +42,39 @@ struct<key:string,value:string>
 spark.sql.codegen.wholeStage	true
 
 
+-- !query
+EXPLAIN EXTENDED
+  SELECT sum(distinct val)
+  FROM explain_temp1
+-- !query schema
+struct<plan:string>
+-- !query output
+== Parsed Logical Plan ==
+'Project [unresolvedalias('sum(distinct 'val), None)]
++- 'UnresolvedRelation [explain_temp1], [], false
+
+== Analyzed Logical Plan ==
+sum(DISTINCT val): bigint
+Aggregate [sum(distinct cast(val#x as bigint)) AS sum(DISTINCT val)#xL]
++- SubqueryAlias spark_catalog.default.explain_temp1
+   +- Relation[key#x,val#x] parquet
+
+== Optimized Logical Plan ==
+Aggregate [sum(distinct cast(val#x as bigint)) AS sum(DISTINCT val)#xL]
++- Project [val#x]
+   +- Relation[key#x,val#x] parquet
+
+== Physical Plan ==
+*HashAggregate(keys=[], functions=[sum(distinct cast(val#x as bigint)#xL)], output=[sum(DISTINCT val)#xL])
++- Exchange SinglePartition, true, [id=#x]
+   +- *HashAggregate(keys=[], functions=[partial_sum(distinct cast(val#x as bigint)#xL)], output=[sum#xL])
+      +- *HashAggregate(keys=[cast(val#x as bigint)#xL], functions=[], output=[cast(val#x as bigint)#xL])
+         +- Exchange hashpartitioning(cast(val#x as bigint)#xL, 4), true, [id=#x]
+            +- *HashAggregate(keys=[cast(val#x as bigint) AS cast(val#x as bigint)#xL], functions=[], output=[cast(val#x as bigint)#xL])
+               +- *ColumnarToRow
+                  +- FileScan parquet default.explain_temp1[val#x] Batched: true, DataFilters: [], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/explain_temp1], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<val:int>
+
+
 -- !query
 EXPLAIN FORMATTED
   SELECT key, max(val) 

From 3beab8d8a8e2ed5e46e063d5a44face40c5fac90 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Fri, 9 Oct 2020 09:50:45 +0900
Subject: [PATCH 0202/1009] [SPARK-32793][FOLLOW-UP] Minor corrections for
 PySpark annotations and SparkR

### What changes were proposed in this pull request?

- Annotated return types of `assert_true` and `raise_error` as discussed [here](https://github.com/apache/spark/pull/29947#pullrequestreview-504495801).
- Add `assert_true` and `raise_error`  to SparkR NAMESPACE.
- Validating message vector size in SparkR as discussed [here](https://github.com/apache/spark/pull/29947#pullrequestreview-504539004).

### Why are the changes needed?

As discussed in review for #29947.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

- Existing tests.
- Validation of annotations using MyPy

Closes #29978 from zero323/SPARK-32793-FOLLOW-UP.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 R/pkg/NAMESPACE                  | 2 ++
 R/pkg/R/functions.R              | 6 ++++--
 python/pyspark/sql/functions.pyi | 4 ++--
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 2fadf20da491c..a9cca4bf6f6fc 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -230,6 +230,7 @@ exportMethods("%<=>%",
               "asc",
               "ascii",
               "asin",
+              "assert_true",
               "atan",
               "atan2",
               "avg",
@@ -361,6 +362,7 @@ exportMethods("%<=>%",
               "posexplode_outer",
               "quarter",
               "radians",
+              "raise_error",
               "rand",
               "randn",
               "rank",
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index ce384a64bccaf..bcd798a8c31e2 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -847,7 +847,8 @@ setMethod("assert_true",
             jc <- if (is.null(errMsg)) {
               callJStatic("org.apache.spark.sql.functions", "assert_true", x@jc)
             } else {
-              if (is.character(errMsg) && length(errMsg) == 1) {
+              if (is.character(errMsg)) {
+                stopifnot(length(errMsg) == 1)
                 errMsg <- lit(errMsg)
               }
               callJStatic("org.apache.spark.sql.functions", "assert_true", x@jc, errMsg@jc)
@@ -868,7 +869,8 @@ setMethod("assert_true",
 setMethod("raise_error",
           signature(x = "characterOrColumn"),
           function(x) {
-            if (is.character(x) && length(x) == 1) {
+            if (is.character(x)) {
+              stopifnot(length(x) == 1)
               x <- lit(x)
             }
             jc <- callJStatic("org.apache.spark.sql.functions", "raise_error", x@jc)
diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi
index 6249bca5cef68..779a29c086d5a 100644
--- a/python/pyspark/sql/functions.pyi
+++ b/python/pyspark/sql/functions.pyi
@@ -137,8 +137,8 @@ def sha1(col: ColumnOrName) -> Column: ...
 def sha2(col: ColumnOrName, numBits: int) -> Column: ...
 def hash(*cols: ColumnOrName) -> Column: ...
 def xxhash64(*cols: ColumnOrName) -> Column: ...
-def assert_true(col: ColumnOrName, errMsg: Union[Column, str] = ...): ...
-def raise_error(errMsg: Union[Column, str]): ...
+def assert_true(col: ColumnOrName, errMsg: Union[Column, str] = ...) -> Column: ...
+def raise_error(errMsg: Union[Column, str]) -> Column: ...
 def concat(*cols: ColumnOrName) -> Column: ...
 def concat_ws(sep: str, *cols: ColumnOrName) -> Column: ...
 def decode(col: ColumnOrName, charset: str) -> Column: ...

From 1234c66fa6b6d2c45edb40237788fa3bfdf96cf3 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Fri, 9 Oct 2020 02:37:47 -0700
Subject: [PATCH 0203/1009] [SPARK-33101][ML] Make LibSVM format propagate
 Hadoop config from DS options to underlying HDFS file system

### What changes were proposed in this pull request?
Propagate LibSVM options to Hadoop configs in the LibSVM datasource.

### Why are the changes needed?
There is a bug that when running:
```scala
spark.read.format("libsvm").options(conf).load(path)
```
The underlying file system will not receive the `conf` options.

### Does this PR introduce _any_ user-facing change?
Yes. After the changes, for example, users should read files from Azure Data Lake successfully:
```scala
def hadoopConf1() = Map[String, String](
  s"fs.adl.oauth2.access.token.provider.type" -> "ClientCredential",
  s"fs.adl.oauth2.client.id" -> dbutils.secrets.get(scope = "...", key = "..."),
  s"fs.adl.oauth2.credential" -> dbutils.secrets.get(scope = "...", key = "..."),
  s"fs.adl.oauth2.refresh.url" -> s"https://login.microsoftonline.com/.../oauth2/token")
val df = spark.read.format("libsvm").options(hadoopConf1).load("adl://....azuredatalakestore.net/foldersp1/...")
```
and not get the following exception because the settings above are not propagated to the filesystem:
```java
java.lang.IllegalArgumentException: No value for fs.adl.oauth2.access.token.provider found in conf file.
	at ....adl.AdlFileSystem.getNonEmptyVal(AdlFileSystem.java:820)
	at ....adl.AdlFileSystem.getCustomAccessTokenProvider(AdlFileSystem.java:220)
	at ....adl.AdlFileSystem.getAccessTokenProvider(AdlFileSystem.java:257)
	at ....adl.AdlFileSystem.initialize(AdlFileSystem.java:164)
	at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2669)
```

### How was this patch tested?
Added UT to `LibSVMRelationSuite`.

Closes #29984 from MaxGekk/ml-option-propagation.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/ml/source/libsvm/LibSVMRelation.scala    |  2 +-
 .../org/apache/spark/mllib/util/MLUtils.scala      |  6 ++++--
 .../ml/source/libsvm/LibSVMRelationSuite.scala     | 14 ++++++++++++--
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
index 11be1d85fbead..df64de4b10075 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
@@ -101,7 +101,7 @@ private[libsvm] class LibSVMFileFormat
         "'numFeatures' option to avoid the extra scan.")
 
       val paths = files.map(_.getPath.toString)
-      val parsed = MLUtils.parseLibSVMFile(sparkSession, paths)
+      val parsed = MLUtils.parseLibSVMFile(sparkSession, paths, options)
       MLUtils.computeNumFeatures(parsed)
     }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
index a20949910d25e..832f31323f546 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@@ -105,13 +105,15 @@ object MLUtils extends Logging {
   }
 
   private[spark] def parseLibSVMFile(
-      sparkSession: SparkSession, paths: Seq[String]): RDD[(Double, Array[Int], Array[Double])] = {
+      sparkSession: SparkSession,
+      paths: Seq[String],
+      options: Map[String, String]): RDD[(Double, Array[Int], Array[Double])] = {
     val lines = sparkSession.baseRelationToDataFrame(
       DataSource.apply(
         sparkSession,
         paths = paths,
         className = classOf[TextFileFormat].getName,
-        options = Map(DataSource.GLOB_PATHS_KEY -> "false")
+        options = options ++ Map(DataSource.GLOB_PATHS_KEY -> "false")
       ).resolveRelation(checkFilesExist = false))
       .select("value")
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
index 0999892364e2c..cc0ca308cb668 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
@@ -27,12 +27,13 @@ import org.apache.spark.ml.attribute.AttributeGroup
 import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
 import org.apache.spark.ml.linalg.SQLDataTypes.VectorType
 import org.apache.spark.mllib.util.MLlibTestSparkContext
-import org.apache.spark.sql.{Row, SaveMode}
+import org.apache.spark.sql.{FakeFileSystemRequiringDSOption, Row, SaveMode}
+import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
 import org.apache.spark.util.Utils
 
 
-class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext {
+class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext with SQLHelper {
   // Path for dataset
   var path: String = _
 
@@ -211,4 +212,13 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext {
       assert(v == Vectors.sparse(2, Seq((0, 2.0), (1, 3.0))))
     }
   }
+
+  test("SPARK-33101: should propagate Hadoop config from DS options to underlying file system") {
+    withSQLConf(
+      "fs.file.impl" -> classOf[FakeFileSystemRequiringDSOption].getName,
+      "fs.file.impl.disable.cache" -> "true") {
+      val df = spark.read.option("ds_option", "value").format("libsvm").load(path)
+      assert(df.columns(0) == "label")
+    }
+  }
 }

From e1909c96fbfc3d3f7808f6ddcadec88cc4d11fb9 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Fri, 9 Oct 2020 02:50:38 -0700
Subject: [PATCH 0204/1009] [SPARK-33099][K8S] Respect executor idle timeout
 conf in ExecutorPodsAllocator

### What changes were proposed in this pull request?

This PR aims to protect the executor pod request or pending pod during executor idle timeout.

### Why are the changes needed?

In case of dynamic allocation, Apache Spark K8s `ExecutorPodsAllocator` cancels the pod requests or pending pods too eagerly. Like the following example, `ExecutorPodsAllocator` received the new total executor adjust request rapidly in two minutes. Sometimes, it's called 3 times in a single second. It repeats `request` and `delete` on that request or pending pod frequently. This PR is reusing `spark.dynamicAllocation.executorIdleTimeout (default: 60s)` to keep the pod request or pending pod.

```
20/10/08 05:58:08 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 3
20/10/08 05:58:08 INFO ExecutorPodsAllocator: Going to request 3 executors from Kubernetes.
20/10/08 05:58:09 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 3
20/10/08 05:58:43 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 1
20/10/08 05:58:47 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 0
20/10/08 05:59:26 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 3
20/10/08 05:59:30 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 2
20/10/08 05:59:31 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 3
20/10/08 05:59:44 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 2
20/10/08 05:59:44 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 0
20/10/08 05:59:45 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 3
20/10/08 05:59:50 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 2
20/10/08 05:59:50 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 1
20/10/08 05:59:50 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 0
20/10/08 05:59:54 INFO ExecutorPodsAllocator: Set totalExpectedExecutors to 3
20/10/08 05:59:54 INFO ExecutorPodsAllocator: Going to request 1 executors from Kubernetes.
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the newly added test case.

Closes #29981 from dongjoon-hyun/SPARK-K8S-INITIAL.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../cluster/k8s/ExecutorPodsAllocator.scala   | 30 +++++++++--
 .../k8s/ExecutorLifecycleTestUtils.scala      |  5 ++
 .../k8s/ExecutorPodsAllocatorSuite.scala      | 53 ++++++++++++++++++-
 ...ecutorPodsPollingSnapshotSourceSuite.scala |  8 +--
 .../k8s/ExecutorPodsSnapshotSuite.scala       |  5 +-
 ...ExecutorPodsWatchSnapshotSourceSuite.scala | 10 ++--
 6 files changed, 97 insertions(+), 14 deletions(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
index 774ef34f69e40..5e09de37f2848 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
@@ -16,6 +16,8 @@
  */
 package org.apache.spark.scheduler.cluster.k8s
 
+import java.time.Instant
+import java.time.format.DateTimeParseException
 import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger, AtomicLong}
 
 import scala.collection.mutable
@@ -30,6 +32,7 @@ import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.KubernetesConf
 import org.apache.spark.deploy.k8s.KubernetesUtils.addOwnerReference
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT
 import org.apache.spark.util.{Clock, Utils}
 
 private[spark] class ExecutorPodsAllocator(
@@ -50,6 +53,8 @@ private[spark] class ExecutorPodsAllocator(
 
   private val podCreationTimeout = math.max(podAllocationDelay * 5, 60000)
 
+  private val executorIdleTimeout = conf.get(DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT) * 1000
+
   private val namespace = conf.get(KUBERNETES_NAMESPACE)
 
   private val kubernetesDriverPodName = conf
@@ -87,6 +92,7 @@ private[spark] class ExecutorPodsAllocator(
   }
 
   def setTotalExpectedExecutors(total: Int): Unit = {
+    logDebug(s"Set totalExpectedExecutors to $total")
     totalExpectedExecutors.set(total)
     if (!hasPendingPods.get()) {
       snapshotsStore.notifySubscribers()
@@ -149,7 +155,6 @@ private[spark] class ExecutorPodsAllocator(
         case (_, PodPending(_)) => true
         case _ => false
       }
-      .map { case (id, _) => id }
 
     // Make a local, non-volatile copy of the reference since it's used multiple times. This
     // is the only method that modifies the list, so this is safe.
@@ -173,7 +178,8 @@ private[spark] class ExecutorPodsAllocator(
     // It's possible that we have outstanding pods that are outdated when dynamic allocation
     // decides to downscale the application. So check if we can release any pending pods early
     // instead of waiting for them to time out. Drop them first from the unacknowledged list,
-    // then from the pending.
+    // then from the pending. However, in order to prevent too frequent frunctuation, newly
+    // requested pods are protected during executorIdleTimeout period.
     //
     // TODO: with dynamic allocation off, handle edge cases if we end up with more running
     // executors than expected.
@@ -181,8 +187,13 @@ private[spark] class ExecutorPodsAllocator(
       newlyCreatedExecutors.size
     if (knownPodCount > currentTotalExpectedExecutors) {
       val excess = knownPodCount - currentTotalExpectedExecutors
-      val knownPendingToDelete = currentPendingExecutors.take(excess - newlyCreatedExecutors.size)
-      val toDelete = newlyCreatedExecutors.keys.take(excess).toList ++ knownPendingToDelete
+      val knownPendingToDelete = currentPendingExecutors
+        .filter(x => isExecutorIdleTimedOut(x._2, currentTime))
+        .map { case (id, _) => id }
+        .take(excess - newlyCreatedExecutors.size)
+      val toDelete = newlyCreatedExecutors
+        .filter(x => currentTime - x._2 > executorIdleTimeout)
+        .keys.take(excess).toList ++ knownPendingToDelete
 
       if (toDelete.nonEmpty) {
         logInfo(s"Deleting ${toDelete.size} excess pod requests (${toDelete.mkString(",")}).")
@@ -268,4 +279,15 @@ private[spark] class ExecutorPodsAllocator(
       }
     }
   }
+
+  private def isExecutorIdleTimedOut(state: ExecutorPodState, currentTime: Long): Boolean = {
+    try {
+      val startTime = Instant.parse(state.pod.getStatus.getStartTime).toEpochMilli()
+      currentTime - startTime > executorIdleTimeout
+    } catch {
+      case _: Exception =>
+        logDebug(s"Cannot get startTime of pod ${state.pod}")
+        true
+    }
+  }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala
index 2e883623a4b1c..0377e54f3cd76 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala
@@ -16,6 +16,8 @@
  */
 package org.apache.spark.scheduler.cluster.k8s
 
+import java.time.Instant
+
 import io.fabric8.kubernetes.api.model.{ContainerBuilder, Pod, PodBuilder}
 
 import org.apache.spark.deploy.k8s.Constants._
@@ -29,6 +31,7 @@ object ExecutorLifecycleTestUtils {
     new PodBuilder(podWithAttachedContainerForId(executorId))
       .editOrNewStatus()
         .withPhase("failed")
+        .withStartTime(Instant.now.toString)
         .addNewContainerStatus()
           .withName("spark-executor")
           .withImage("k8s-spark")
@@ -59,6 +62,7 @@ object ExecutorLifecycleTestUtils {
     new PodBuilder(podWithAttachedContainerForId(executorId))
       .editOrNewStatus()
         .withPhase("pending")
+        .withStartTime(Instant.now.toString)
         .endStatus()
       .build()
   }
@@ -67,6 +71,7 @@ object ExecutorLifecycleTestUtils {
     new PodBuilder(podWithAttachedContainerForId(executorId))
       .editOrNewStatus()
         .withPhase("running")
+        .withStartTime(Instant.now.toString)
         .endStatus()
       .build()
   }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
index e4b36e46594f6..c1c33b2a0f199 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
@@ -16,6 +16,8 @@
  */
 package org.apache.spark.scheduler.cluster.k8s
 
+import java.time.Instant
+
 import io.fabric8.kubernetes.api.model.{DoneablePod, Pod, PodBuilder}
 import io.fabric8.kubernetes.client.KubernetesClient
 import io.fabric8.kubernetes.client.dsl.PodResource
@@ -31,6 +33,7 @@ import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, KubernetesExecutorSp
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.Fabric8Aliases._
+import org.apache.spark.internal.config.DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT
 import org.apache.spark.scheduler.cluster.k8s.ExecutorLifecycleTestUtils._
 import org.apache.spark.util.ManualClock
 
@@ -47,11 +50,14 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
       .endMetadata()
     .build()
 
-  private val conf = new SparkConf().set(KUBERNETES_DRIVER_POD_NAME, driverPodName)
+  private val conf = new SparkConf()
+    .set(KUBERNETES_DRIVER_POD_NAME, driverPodName)
+    .set(DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT.key, "10s")
 
   private val podAllocationSize = conf.get(KUBERNETES_ALLOCATION_BATCH_SIZE)
   private val podAllocationDelay = conf.get(KUBERNETES_ALLOCATION_BATCH_DELAY)
   private val podCreationTimeout = math.max(podAllocationDelay * 5, 60000L)
+  private val executorIdleTimeout = conf.get(DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT) * 1000
   private val secMgr = new SecurityManager(conf)
 
   private var waitForExecutorPodsClock: ManualClock = _
@@ -159,6 +165,9 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
       .withLabelIn(meq(SPARK_EXECUTOR_ID_LABEL), any()))
       .thenReturn(podOperations)
 
+    val startTime = Instant.now.toEpochMilli
+    waitForExecutorPodsClock.setTime(startTime)
+
     // Target 1 executor, make sure it's requested, even with an empty initial snapshot.
     podsAllocatorUnderTest.setTotalExpectedExecutors(1)
     verify(podOperations).create(podWithAttachedContainerForId(1))
@@ -184,6 +193,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     verify(podOperations, never()).delete()
 
     // Scale down to 1. Pending executors (both acknowledged and not) should be deleted.
+    waitForExecutorPodsClock.advance(executorIdleTimeout * 2)
     podsAllocatorUnderTest.setTotalExpectedExecutors(1)
     snapshotsStore.notifySubscribers()
     verify(podOperations, times(4)).create(any())
@@ -202,6 +212,47 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     assert(!podsAllocatorUnderTest.isDeleted("4"))
   }
 
+  test("SPARK-33099: Respect executor idle timeout configuration") {
+    when(podOperations
+      .withField("status.phase", "Pending"))
+      .thenReturn(podOperations)
+    when(podOperations
+      .withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID))
+      .thenReturn(podOperations)
+    when(podOperations
+      .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE))
+      .thenReturn(podOperations)
+    when(podOperations
+      .withLabelIn(meq(SPARK_EXECUTOR_ID_LABEL), any()))
+      .thenReturn(podOperations)
+
+    val startTime = Instant.now.toEpochMilli
+    waitForExecutorPodsClock.setTime(startTime)
+
+    podsAllocatorUnderTest.setTotalExpectedExecutors(5)
+    verify(podOperations).create(podWithAttachedContainerForId(1))
+    verify(podOperations).create(podWithAttachedContainerForId(2))
+    verify(podOperations).create(podWithAttachedContainerForId(3))
+    verify(podOperations).create(podWithAttachedContainerForId(4))
+    verify(podOperations).create(podWithAttachedContainerForId(5))
+    verify(podOperations, times(5)).create(any())
+
+    snapshotsStore.updatePod(pendingExecutor(1))
+    snapshotsStore.updatePod(pendingExecutor(2))
+
+    // Newly created executors (both acknowledged and not) are protected by executorIdleTimeout
+    podsAllocatorUnderTest.setTotalExpectedExecutors(0)
+    snapshotsStore.notifySubscribers()
+    verify(podOperations, never()).withLabelIn(SPARK_EXECUTOR_ID_LABEL, "1", "2", "3", "4", "5")
+    verify(podOperations, never()).delete()
+
+    // Newly created executors (both acknowledged and not) are cleaned up.
+    waitForExecutorPodsClock.advance(executorIdleTimeout * 2)
+    snapshotsStore.notifySubscribers()
+    verify(podOperations).withLabelIn(SPARK_EXECUTOR_ID_LABEL, "1", "2", "3", "4", "5")
+    verify(podOperations).delete()
+  }
+
   private def executorPodAnswer(): Answer[KubernetesExecutorSpec] =
     (invocation: InvocationOnMock) => {
       val k8sConf: KubernetesExecutorConf = invocation.getArgument(0)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsPollingSnapshotSourceSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsPollingSnapshotSourceSuite.scala
index 63e43bd40c728..a8e825678d1f5 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsPollingSnapshotSourceSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsPollingSnapshotSourceSuite.scala
@@ -77,13 +77,15 @@ class ExecutorPodsPollingSnapshotSourceSuite extends SparkFunSuite with BeforeAn
   }
 
   test("Items returned by the API should be pushed to the event queue") {
+    val exec1 = runningExecutor(1)
+    val exec2 = runningExecutor(2)
     when(activeExecutorPods.list())
       .thenReturn(new PodListBuilder()
         .addToItems(
-          runningExecutor(1),
-          runningExecutor(2))
+          exec1,
+          exec2)
         .build())
     pollingExecutor.tick(pollingInterval, TimeUnit.MILLISECONDS)
-    verify(eventQueue).replaceSnapshot(Seq(runningExecutor(1), runningExecutor(2)))
+    verify(eventQueue).replaceSnapshot(Seq(exec1, exec2))
   }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotSuite.scala
index 70e19c904eddb..6ca1733bcd32b 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotSuite.scala
@@ -50,11 +50,12 @@ class ExecutorPodsSnapshotSuite extends SparkFunSuite {
       Map(
         0L -> PodPending(originalPods(0)),
         1L -> PodSucceeded(succeededExecutor(1))))
-    val snapshotWithNewPod = snapshotWithUpdatedPod.withUpdate(pendingExecutor(2))
+    val pendingExec = pendingExecutor(2)
+    val snapshotWithNewPod = snapshotWithUpdatedPod.withUpdate(pendingExec)
     assert(snapshotWithNewPod.executorPods ===
       Map(
         0L -> PodPending(originalPods(0)),
         1L -> PodSucceeded(succeededExecutor(1)),
-        2L -> PodPending(pendingExecutor(2))))
+        2L -> PodPending(pendingExec)))
   }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsWatchSnapshotSourceSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsWatchSnapshotSourceSuite.scala
index ac1968b4ff810..e35fc83019b8d 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsWatchSnapshotSourceSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsWatchSnapshotSourceSuite.scala
@@ -67,9 +67,11 @@ class ExecutorPodsWatchSnapshotSourceSuite extends SparkFunSuite with BeforeAndA
   }
 
   test("Watch events should be pushed to the snapshots store as snapshot updates.") {
-    watch.getValue.eventReceived(Action.ADDED, runningExecutor(1))
-    watch.getValue.eventReceived(Action.MODIFIED, runningExecutor(2))
-    verify(eventQueue).updatePod(runningExecutor(1))
-    verify(eventQueue).updatePod(runningExecutor(2))
+    val exec1 = runningExecutor(1)
+    val exec2 = runningExecutor(2)
+    watch.getValue.eventReceived(Action.ADDED, exec1)
+    watch.getValue.eventReceived(Action.MODIFIED, exec2)
+    verify(eventQueue).updatePod(exec1)
+    verify(eventQueue).updatePod(exec2)
   }
 }

From edb140eb5cb7f20af3e2ee7d2f9fb72f3e20e796 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan.opensource@gmail.com>
Date: Fri, 9 Oct 2020 03:01:54 -0700
Subject: [PATCH 0205/1009] [SPARK-32896][SS] Add DataStreamWriter.table API

### What changes were proposed in this pull request?

This PR proposes to add `DataStreamWriter.table` to specify the output "table" to write from the streaming query.

### Why are the changes needed?

For now, there's no way to write to the table (especially catalog table) even the table is capable to handle streaming write, so even with Spark 3, writing to the catalog table via SS should go through the `DataStreamWriter.format(provider)` and wish the provider can handle it as same as we do with catalog table.

With the new API, we can directly point to the catalog table which supports streaming write. Some of usages are covered with tests - simply saying, end users can do the following:

```scala
// assuming `testcat` is a custom catalog, and `ns` is a namespace in the catalog
spark.sql("CREATE TABLE testcat.ns.table1 (id bigint, data string) USING foo")

val query = inputDF
      .writeStream
      .table("testcat.ns.table1")
      .option(...)
      .start()
```

### Does this PR introduce _any_ user-facing change?

Yes, as this adds a new public API in DataStreamWriter. This doesn't bring backward incompatible change.

### How was this patch tested?

New unit tests.

Closes #29767 from HeartSaVioR/SPARK-32896.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/connector/InMemoryTable.scala   |  59 ++++++-
 .../sql/streaming/DataStreamWriter.scala      | 138 +++++++++------
 .../test/DataStreamTableAPISuite.scala        | 162 ++++++++++++++++--
 3 files changed, 299 insertions(+), 60 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
index 616fc72320caf..6a78b9e2bddd0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.connector.catalog._
 import org.apache.spark.sql.connector.expressions.{BucketTransform, DaysTransform, HoursTransform, IdentityTransform, MonthsTransform, Transform, YearsTransform}
 import org.apache.spark.sql.connector.read._
 import org.apache.spark.sql.connector.write._
+import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.sources.{And, EqualTo, Filter, IsNotNull}
 import org.apache.spark.sql.types.{DataType, DateType, StructType, TimestampType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -145,6 +146,7 @@ class InMemoryTable(
   override def capabilities: util.Set[TableCapability] = Set(
     TableCapability.BATCH_READ,
     TableCapability.BATCH_WRITE,
+    TableCapability.STREAMING_WRITE,
     TableCapability.OVERWRITE_BY_FILTER,
     TableCapability.OVERWRITE_DYNAMIC,
     TableCapability.TRUNCATE).asJava
@@ -169,26 +171,35 @@ class InMemoryTable(
 
     new WriteBuilder with SupportsTruncate with SupportsOverwrite with SupportsDynamicOverwrite {
       private var writer: BatchWrite = Append
+      private var streamingWriter: StreamingWrite = StreamingAppend
 
       override def truncate(): WriteBuilder = {
         assert(writer == Append)
         writer = TruncateAndAppend
+        streamingWriter = StreamingTruncateAndAppend
         this
       }
 
       override def overwrite(filters: Array[Filter]): WriteBuilder = {
         assert(writer == Append)
         writer = new Overwrite(filters)
+        streamingWriter = new StreamingNotSupportedOperation(s"overwrite ($filters)")
         this
       }
 
       override def overwriteDynamicPartitions(): WriteBuilder = {
         assert(writer == Append)
         writer = DynamicOverwrite
+        streamingWriter = new StreamingNotSupportedOperation("overwriteDynamicPartitions")
         this
       }
 
       override def buildForBatch(): BatchWrite = writer
+
+      override def buildForStreaming(): StreamingWrite = streamingWriter match {
+        case exc: StreamingNotSupportedOperation => exc.throwsException()
+        case s => s
+      }
     }
   }
 
@@ -231,6 +242,45 @@ class InMemoryTable(
     }
   }
 
+  private abstract class TestStreamingWrite extends StreamingWrite {
+    def createStreamingWriterFactory(info: PhysicalWriteInfo): StreamingDataWriterFactory = {
+      BufferedRowsWriterFactory
+    }
+
+    def abort(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {}
+  }
+
+  private class StreamingNotSupportedOperation(operation: String) extends TestStreamingWrite {
+    override def createStreamingWriterFactory(info: PhysicalWriteInfo): StreamingDataWriterFactory =
+      throwsException()
+
+    override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit =
+      throwsException()
+
+    override def abort(epochId: Long, messages: Array[WriterCommitMessage]): Unit =
+      throwsException()
+
+    def throwsException[T](): T = throw new IllegalStateException("The operation " +
+      s"${operation} isn't supported for streaming query.")
+  }
+
+  private object StreamingAppend extends TestStreamingWrite {
+    override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {
+      dataMap.synchronized {
+        withData(messages.map(_.asInstanceOf[BufferedRows]))
+      }
+    }
+  }
+
+  private object StreamingTruncateAndAppend extends TestStreamingWrite {
+    override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {
+      dataMap.synchronized {
+        dataMap.clear
+        withData(messages.map(_.asInstanceOf[BufferedRows]))
+      }
+    }
+  }
+
   override def deleteWhere(filters: Array[Filter]): Unit = dataMap.synchronized {
     import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper
     dataMap --= InMemoryTable.filtersToKeys(dataMap.keys, partCols.map(_.toSeq.quoted), filters)
@@ -310,10 +360,17 @@ private class BufferedRowsReader(partition: BufferedRows) extends PartitionReade
   override def close(): Unit = {}
 }
 
-private object BufferedRowsWriterFactory extends DataWriterFactory {
+private object BufferedRowsWriterFactory extends DataWriterFactory with StreamingDataWriterFactory {
   override def createWriter(partitionId: Int, taskId: Long): DataWriter[InternalRow] = {
     new BufferWriter
   }
+
+  override def createWriter(
+      partitionId: Int,
+      taskId: Long,
+      epochId: Long): DataWriter[InternalRow] = {
+    new BufferWriter
+  }
 }
 
 private class BufferWriter extends DataWriter[InternalRow] {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index dda6dec9c4ebc..239b4fc2de374 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -27,7 +27,7 @@ import org.apache.spark.api.java.function.VoidFunction2
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-import org.apache.spark.sql.connector.catalog.{SupportsWrite, TableProvider}
+import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table, TableProvider}
 import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.DataSource
@@ -45,6 +45,7 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
  */
 @Evolving
 final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
+  import DataStreamWriter._
 
   private val df = ds.toDF()
 
@@ -294,60 +295,75 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
   @throws[TimeoutException]
   def start(): StreamingQuery = startInternal(None)
 
+  /**
+   * Starts the execution of the streaming query, which will continually output results to the given
+   * table as new data arrives. The returned [[StreamingQuery]] object can be used to interact with
+   * the stream.
+   *
+   * @since 3.1.0
+   */
+  @throws[TimeoutException]
+  def saveAsTable(tableName: String): StreamingQuery = {
+    this.source = SOURCE_NAME_TABLE
+    this.tableName = tableName
+    startInternal(None)
+  }
+
   private def startInternal(path: Option[String]): StreamingQuery = {
     if (source.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) {
       throw new AnalysisException("Hive data source can only be used with tables, you can not " +
         "write files of Hive data source directly.")
     }
 
-    if (source == "memory") {
-      assertNotPartitioned("memory")
+    if (source == SOURCE_NAME_TABLE) {
+      assertNotPartitioned(SOURCE_NAME_TABLE)
+
+      import df.sparkSession.sessionState.analyzer.CatalogAndIdentifier
+
+      import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+      val originalMultipartIdentifier = df.sparkSession.sessionState.sqlParser
+        .parseMultipartIdentifier(tableName)
+      val CatalogAndIdentifier(catalog, identifier) = originalMultipartIdentifier
+
+      // Currently we don't create a logical streaming writer node in logical plan, so cannot rely
+      // on analyzer to resolve it. Directly lookup only for temp view to provide clearer message.
+      // TODO (SPARK-27484): we should add the writing node before the plan is analyzed.
+      if (df.sparkSession.sessionState.catalog.isTempView(originalMultipartIdentifier)) {
+        throw new AnalysisException(s"Temporary view $tableName doesn't support streaming write")
+      }
+
+      val tableInstance = catalog.asTableCatalog.loadTable(identifier)
+
+      import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
+      val sink = tableInstance match {
+        case t: SupportsWrite if t.supports(STREAMING_WRITE) => t
+        case t => throw new AnalysisException(s"Table $tableName doesn't support streaming " +
+          s"write - $t")
+      }
+
+      startQuery(sink, extraOptions)
+    } else if (source == SOURCE_NAME_MEMORY) {
+      assertNotPartitioned(SOURCE_NAME_MEMORY)
       if (extraOptions.get("queryName").isEmpty) {
         throw new AnalysisException("queryName must be specified for memory sink")
       }
       val sink = new MemorySink()
       val resultDf = Dataset.ofRows(df.sparkSession, new MemoryPlan(sink, df.schema.toAttributes))
-      val chkpointLoc = extraOptions.get("checkpointLocation")
       val recoverFromChkpoint = outputMode == OutputMode.Complete()
-      val query = df.sparkSession.sessionState.streamingQueryManager.startQuery(
-        extraOptions.get("queryName"),
-        chkpointLoc,
-        df,
-        extraOptions.toMap,
-        sink,
-        outputMode,
-        useTempCheckpointLocation = true,
-        recoverFromCheckpointLocation = recoverFromChkpoint,
-        trigger = trigger)
+      val query = startQuery(sink, extraOptions, recoverFromCheckpoint = recoverFromChkpoint)
       resultDf.createOrReplaceTempView(query.name)
       query
-    } else if (source == "foreach") {
-      assertNotPartitioned("foreach")
+    } else if (source == SOURCE_NAME_FOREACH) {
+      assertNotPartitioned(SOURCE_NAME_FOREACH)
       val sink = ForeachWriterTable[T](foreachWriter, ds.exprEnc)
-      df.sparkSession.sessionState.streamingQueryManager.startQuery(
-        extraOptions.get("queryName"),
-        extraOptions.get("checkpointLocation"),
-        df,
-        extraOptions.toMap,
-        sink,
-        outputMode,
-        useTempCheckpointLocation = true,
-        trigger = trigger)
-    } else if (source == "foreachBatch") {
-      assertNotPartitioned("foreachBatch")
+      startQuery(sink, extraOptions)
+    } else if (source == SOURCE_NAME_FOREACH_BATCH) {
+      assertNotPartitioned(SOURCE_NAME_FOREACH_BATCH)
       if (trigger.isInstanceOf[ContinuousTrigger]) {
-        throw new AnalysisException("'foreachBatch' is not supported with continuous trigger")
+        throw new AnalysisException(s"'$source' is not supported with continuous trigger")
       }
       val sink = new ForeachBatchSink[T](foreachBatchWriter, ds.exprEnc)
-      df.sparkSession.sessionState.streamingQueryManager.startQuery(
-        extraOptions.get("queryName"),
-        extraOptions.get("checkpointLocation"),
-        df,
-        extraOptions.toMap,
-        sink,
-        outputMode,
-        useTempCheckpointLocation = true,
-        trigger = trigger)
+      startQuery(sink, extraOptions)
     } else {
       val cls = DataSource.lookupDataSource(source, df.sparkSession.sessionState.conf)
       val disabledSources = df.sparkSession.sqlContext.conf.disabledV2StreamingWriters.split(",")
@@ -380,19 +396,28 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
         createV1Sink(optionsWithPath)
       }
 
-      df.sparkSession.sessionState.streamingQueryManager.startQuery(
-        extraOptions.get("queryName"),
-        extraOptions.get("checkpointLocation"),
-        df,
-        optionsWithPath.originalMap,
-        sink,
-        outputMode,
-        useTempCheckpointLocation = source == "console" || source == "noop",
-        recoverFromCheckpointLocation = true,
-        trigger = trigger)
+      startQuery(sink, optionsWithPath)
     }
   }
 
+  private def startQuery(
+      sink: Table,
+      newOptions: CaseInsensitiveMap[String],
+      recoverFromCheckpoint: Boolean = true): StreamingQuery = {
+    val useTempCheckpointLocation = SOURCES_ALLOW_ONE_TIME_QUERY.contains(source)
+
+    df.sparkSession.sessionState.streamingQueryManager.startQuery(
+      newOptions.get("queryName"),
+      newOptions.get("checkpointLocation"),
+      df,
+      newOptions.originalMap,
+      sink,
+      outputMode,
+      useTempCheckpointLocation = useTempCheckpointLocation,
+      recoverFromCheckpointLocation = recoverFromCheckpoint,
+      trigger = trigger)
+  }
+
   private def createV1Sink(optionsWithPath: CaseInsensitiveMap[String]): Sink = {
     val ds = DataSource(
       df.sparkSession,
@@ -409,7 +434,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
    * @since 2.0.0
    */
   def foreach(writer: ForeachWriter[T]): DataStreamWriter[T] = {
-    this.source = "foreach"
+    this.source = SOURCE_NAME_FOREACH
     this.foreachWriter = if (writer != null) {
       ds.sparkSession.sparkContext.clean(writer)
     } else {
@@ -433,7 +458,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
    */
   @Evolving
   def foreachBatch(function: (Dataset[T], Long) => Unit): DataStreamWriter[T] = {
-    this.source = "foreachBatch"
+    this.source = SOURCE_NAME_FOREACH_BATCH
     if (function == null) throw new IllegalArgumentException("foreachBatch function cannot be null")
     this.foreachBatchWriter = function
     this
@@ -485,6 +510,8 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
 
   private var source: String = df.sparkSession.sessionState.conf.defaultDataSourceName
 
+  private var tableName: String = null
+
   private var outputMode: OutputMode = OutputMode.Append
 
   private var trigger: Trigger = Trigger.ProcessingTime(0L)
@@ -497,3 +524,16 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
 
   private var partitioningColumns: Option[Seq[String]] = None
 }
+
+object DataStreamWriter {
+  val SOURCE_NAME_MEMORY = "memory"
+  val SOURCE_NAME_FOREACH = "foreach"
+  val SOURCE_NAME_FOREACH_BATCH = "foreachBatch"
+  val SOURCE_NAME_CONSOLE = "console"
+  val SOURCE_NAME_TABLE = "table"
+  val SOURCE_NAME_NOOP = "noop"
+
+  // these writer sources are also used for one-time query, hence allow temp checkpoint location
+  val SOURCES_ALLOW_ONE_TIME_QUERY = Seq(SOURCE_NAME_MEMORY, SOURCE_NAME_FOREACH,
+    SOURCE_NAME_FOREACH_BATCH, SOURCE_NAME_CONSOLE, SOURCE_NAME_NOOP)
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
index 788452dace84b..062b1060bc601 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.streaming.test
 
+import java.io.File
 import java.util
 
 import scala.collection.JavaConverters._
@@ -25,10 +26,10 @@ import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
+import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
 import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
-import org.apache.spark.sql.connector.{FakeV2Provider, InMemoryTableCatalog}
+import org.apache.spark.sql.connector.{FakeV2Provider, InMemoryTableCatalog, InMemoryTableSessionCatalog}
 import org.apache.spark.sql.connector.catalog.{Identifier, SupportsRead, Table, TableCapability, V2TableWithV1Fallback}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.connector.read.ScanBuilder
@@ -51,9 +52,10 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
   after {
     spark.sessionState.catalogManager.reset()
     spark.sessionState.conf.clear()
+    sqlContext.streams.active.foreach(_.stop())
   }
 
-  test("table API with file source") {
+  test("read: table API with file source") {
     Seq("parquet", "").foreach { source =>
       withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> source) {
         withTempDir { tempDir =>
@@ -72,13 +74,13 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
     }
   }
 
-  test("read non-exist table") {
+  test("read: read non-exist table") {
     intercept[AnalysisException] {
       spark.readStream.table("non_exist_table")
     }.message.contains("Table not found")
   }
 
-  test("stream table API with temp view") {
+  test("read: stream table API with temp view") {
     val tblName = "my_table"
     val stream = MemoryStream[Int]
     withTable(tblName) {
@@ -93,7 +95,7 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
     }
   }
 
-  test("stream table API with non-streaming temp view") {
+  test("read: stream table API with non-streaming temp view") {
     val tblName = "my_table"
     withTable(tblName) {
       spark.range(3).createOrReplaceTempView(tblName)
@@ -103,7 +105,7 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
     }
   }
 
-  test("read table without streaming capability support") {
+  test("read: read table without streaming capability support") {
     val tableIdentifer = "testcat.table_name"
 
     spark.sql(s"CREATE TABLE $tableIdentifer (id bigint, data string) USING foo")
@@ -113,7 +115,7 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
     }.message.contains("does not support either micro-batch or continuous scan")
   }
 
-  test("read table with custom catalog") {
+  test("read: read table with custom catalog") {
     val tblName = "teststream.table_name"
     withTable(tblName) {
       spark.sql(s"CREATE TABLE $tblName (data int) USING foo")
@@ -131,7 +133,7 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
     }
   }
 
-  test("read table with custom catalog & namespace") {
+  test("read: read table with custom catalog & namespace") {
     spark.sql("CREATE NAMESPACE teststream.ns")
 
     val tblName = "teststream.ns.table_name"
@@ -151,7 +153,7 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
     }
   }
 
-  test("fallback to V1 relation") {
+  test("read: fallback to V1 relation") {
     val tblName = DataStreamTableAPISuite.V1FallbackTestTableName
     spark.conf.set(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION.key,
       classOf[InMemoryStreamTableCatalog].getName)
@@ -169,6 +171,146 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
       }
     }
   }
+
+  test("write: write to table with custom catalog & no namespace") {
+    val tableIdentifier = "testcat.table_name"
+
+    spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING foo")
+    checkAnswer(spark.table(tableIdentifier), Seq.empty)
+
+    runTestWithStreamAppend(tableIdentifier)
+  }
+
+  test("write: write to table with custom catalog & namespace") {
+    spark.sql("CREATE NAMESPACE testcat.ns")
+
+    val tableIdentifier = "testcat.ns.table_name"
+
+    spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING foo")
+    checkAnswer(spark.table(tableIdentifier), Seq.empty)
+
+    runTestWithStreamAppend(tableIdentifier)
+  }
+
+  test("write: write to table with default session catalog") {
+    val v2Source = classOf[FakeV2Provider].getName
+    spark.conf.set(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION.key,
+      classOf[InMemoryTableSessionCatalog].getName)
+
+    spark.sql("CREATE NAMESPACE ns")
+
+    val tableIdentifier = "ns.table_name"
+    spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING $v2Source")
+    checkAnswer(spark.table(tableIdentifier), Seq.empty)
+
+    runTestWithStreamAppend(tableIdentifier)
+  }
+
+  test("write: write to non-exist table with custom catalog") {
+    val tableIdentifier = "testcat.nonexisttable"
+    spark.sql("CREATE NAMESPACE testcat.ns")
+
+    withTempDir { checkpointDir =>
+      val exc = intercept[NoSuchTableException] {
+        runStreamQueryAppendMode(tableIdentifier, checkpointDir, Seq.empty, Seq.empty)
+      }
+      assert(exc.getMessage.contains("nonexisttable"))
+    }
+  }
+
+  test("write: write to file provider based table isn't allowed yet") {
+    val tableIdentifier = "table_name"
+
+    spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING parquet")
+    checkAnswer(spark.table(tableIdentifier), Seq.empty)
+
+    withTempDir { checkpointDir =>
+      val exc = intercept[AnalysisException] {
+        runStreamQueryAppendMode(tableIdentifier, checkpointDir, Seq.empty, Seq.empty)
+      }
+      assert(exc.getMessage.contains("doesn't support streaming write"))
+    }
+  }
+
+  test("write: write to temporary view isn't allowed yet") {
+    val tableIdentifier = "testcat.table_name"
+    val tempViewIdentifier = "temp_view"
+
+    spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING foo")
+    checkAnswer(spark.table(tableIdentifier), Seq.empty)
+
+    spark.table(tableIdentifier).createOrReplaceTempView(tempViewIdentifier)
+
+    withTempDir { checkpointDir =>
+      val exc = intercept[AnalysisException] {
+        runStreamQueryAppendMode(tempViewIdentifier, checkpointDir, Seq.empty, Seq.empty)
+      }
+      assert(exc.getMessage.contains("doesn't support streaming write"))
+    }
+  }
+
+  test("write: write to view shouldn't be allowed") {
+    val tableIdentifier = "testcat.table_name"
+    val viewIdentifier = "table_view"
+
+    spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING foo")
+    checkAnswer(spark.table(tableIdentifier), Seq.empty)
+
+    spark.sql(s"CREATE VIEW $viewIdentifier AS SELECT id, data FROM $tableIdentifier")
+
+    withTempDir { checkpointDir =>
+      val exc = intercept[AnalysisException] {
+        runStreamQueryAppendMode(viewIdentifier, checkpointDir, Seq.empty, Seq.empty)
+      }
+      assert(exc.getMessage.contains("doesn't support streaming write"))
+    }
+  }
+
+  private def runTestWithStreamAppend(tableIdentifier: String) = {
+    withTempDir { checkpointDir =>
+      val input1 = Seq((1L, "a"), (2L, "b"), (3L, "c"))
+      verifyStreamAppend(tableIdentifier, checkpointDir, Seq.empty, input1, input1)
+
+      val input2 = Seq((4L, "d"), (5L, "e"), (6L, "f"))
+      verifyStreamAppend(tableIdentifier, checkpointDir, Seq(input1), input2, input1 ++ input2)
+    }
+  }
+
+  private def runStreamQueryAppendMode(
+      tableIdentifier: String,
+      checkpointDir: File,
+      prevInputs: Seq[Seq[(Long, String)]],
+      newInputs: Seq[(Long, String)]): Unit = {
+    val inputData = MemoryStream[(Long, String)]
+    val inputDF = inputData.toDF().toDF("id", "data")
+
+    prevInputs.foreach { inputsPerBatch =>
+      inputData.addData(inputsPerBatch: _*)
+    }
+
+    val query = inputDF
+      .writeStream
+      .option("checkpointLocation", checkpointDir.getAbsolutePath)
+      .saveAsTable(tableIdentifier)
+
+    inputData.addData(newInputs: _*)
+
+    query.processAllAvailable()
+    query.stop()
+  }
+
+  private def verifyStreamAppend(
+      tableIdentifier: String,
+      checkpointDir: File,
+      prevInputs: Seq[Seq[(Long, String)]],
+      newInputs: Seq[(Long, String)],
+      expectedOutputs: Seq[(Long, String)]): Unit = {
+    runStreamQueryAppendMode(tableIdentifier, checkpointDir, prevInputs, newInputs)
+    checkAnswer(
+      spark.table(tableIdentifier),
+      expectedOutputs.map { case (id, data) => Row(id, data) }
+    )
+  }
 }
 
 object DataStreamTableAPISuite {

From 2e07ed30418d45e89d108bc4bc020d2933c20a3a Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Fri, 9 Oct 2020 03:04:26 -0700
Subject: [PATCH 0206/1009] [SPARK-33082][SPARK-20202][BUILD][SQL][FOLLOW-UP]
 Remove Hive 1.2 workarounds and Hive 1.2 profile in Jenkins script

### What changes were proposed in this pull request?

This PR removes the leftover of Hive 1.2 workarounds and Hive 1.2 profile in Jenkins script.

- `test-hive1.2` title is not used anymore in Jenkins
- Remove some comments related to Hive 1.2
- Remove unused codes in `OrcFilters.scala`  Hive
- Test `spark.sql.hive.convertMetastoreOrc` disabled case for the tests added at SPARK-19809 and SPARK-22267

### Why are the changes needed?

To remove unused codes & improve test coverage

### Does this PR introduce _any_ user-facing change?

No, dev-only.

### How was this patch tested?

Manually ran the unit tests. Also It will be tested in CI in this PR.

Closes #29973 from HyukjinKwon/SPARK-33082-SPARK-20202.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/deps/spark-deps-hadoop-2.7-hive-1.2       | 230 ---------
 dev/run-tests-jenkins.py                      |   2 -
 .../datasources/orc/OrcFilterSuite.scala      |   4 -
 .../datasources/orc/OrcQuerySuite.scala       |   1 -
 .../execution/datasources/orc/OrcTest.scala   |   1 -
 .../spark/sql/hive/orc/OrcFileFormat.scala    |   6 +-
 .../spark/sql/hive/orc/OrcFilters.scala       | 248 ---------
 .../sql/hive/orc/HiveOrcFilterSuite.scala     | 469 ------------------
 .../sql/hive/orc/HiveOrcQuerySuite.scala      |  22 +-
 9 files changed, 13 insertions(+), 970 deletions(-)
 delete mode 100644 dev/deps/spark-deps-hadoop-2.7-hive-1.2
 delete mode 100644 sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala
 delete mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala

diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-1.2 b/dev/deps/spark-deps-hadoop-2.7-hive-1.2
deleted file mode 100644
index d07b04608328f..0000000000000
--- a/dev/deps/spark-deps-hadoop-2.7-hive-1.2
+++ /dev/null
@@ -1,230 +0,0 @@
-JLargeArrays/1.5//JLargeArrays-1.5.jar
-JTransforms/3.1//JTransforms-3.1.jar
-JavaEWAH/0.3.2//JavaEWAH-0.3.2.jar
-RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar
-ST4/4.0.4//ST4-4.0.4.jar
-activation/1.1.1//activation-1.1.1.jar
-aircompressor/0.10//aircompressor-0.10.jar
-algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar
-antlr-runtime/3.4//antlr-runtime-3.4.jar
-antlr/2.7.7//antlr-2.7.7.jar
-antlr4-runtime/4.7.1//antlr4-runtime-4.7.1.jar
-aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar
-aopalliance/1.0//aopalliance-1.0.jar
-apache-log4j-extras/1.2.17//apache-log4j-extras-1.2.17.jar
-apacheds-i18n/2.0.0-M15//apacheds-i18n-2.0.0-M15.jar
-apacheds-kerberos-codec/2.0.0-M15//apacheds-kerberos-codec-2.0.0-M15.jar
-api-asn1-api/1.0.0-M20//api-asn1-api-1.0.0-M20.jar
-api-util/1.0.0-M20//api-util-1.0.0-M20.jar
-arpack_combined_all/0.1//arpack_combined_all-0.1.jar
-arrow-format/1.0.1//arrow-format-1.0.1.jar
-arrow-memory-core/1.0.1//arrow-memory-core-1.0.1.jar
-arrow-memory-netty/1.0.1//arrow-memory-netty-1.0.1.jar
-arrow-vector/1.0.1//arrow-vector-1.0.1.jar
-audience-annotations/0.5.0//audience-annotations-0.5.0.jar
-automaton/1.11-8//automaton-1.11-8.jar
-avro-ipc/1.8.2//avro-ipc-1.8.2.jar
-avro-mapred/1.8.2/hadoop2/avro-mapred-1.8.2-hadoop2.jar
-avro/1.8.2//avro-1.8.2.jar
-bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar
-breeze-macros_2.12/1.0//breeze-macros_2.12-1.0.jar
-breeze_2.12/1.0//breeze_2.12-1.0.jar
-cats-kernel_2.12/2.0.0-M4//cats-kernel_2.12-2.0.0-M4.jar
-chill-java/0.9.5//chill-java-0.9.5.jar
-chill_2.12/0.9.5//chill_2.12-0.9.5.jar
-commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar
-commons-cli/1.2//commons-cli-1.2.jar
-commons-codec/1.10//commons-codec-1.10.jar
-commons-collections/3.2.2//commons-collections-3.2.2.jar
-commons-compiler/3.0.16//commons-compiler-3.0.16.jar
-commons-compress/1.8.1//commons-compress-1.8.1.jar
-commons-configuration/1.6//commons-configuration-1.6.jar
-commons-crypto/1.0.0//commons-crypto-1.0.0.jar
-commons-dbcp/1.4//commons-dbcp-1.4.jar
-commons-digester/1.8//commons-digester-1.8.jar
-commons-httpclient/3.1//commons-httpclient-3.1.jar
-commons-io/2.4//commons-io-2.4.jar
-commons-lang/2.6//commons-lang-2.6.jar
-commons-lang3/3.10//commons-lang3-3.10.jar
-commons-logging/1.1.3//commons-logging-1.1.3.jar
-commons-math3/3.4.1//commons-math3-3.4.1.jar
-commons-net/3.1//commons-net-3.1.jar
-commons-pool/1.5.4//commons-pool-1.5.4.jar
-commons-text/1.6//commons-text-1.6.jar
-compress-lzf/1.0.3//compress-lzf-1.0.3.jar
-core/1.1.2//core-1.1.2.jar
-curator-client/2.7.1//curator-client-2.7.1.jar
-curator-framework/2.7.1//curator-framework-2.7.1.jar
-curator-recipes/2.7.1//curator-recipes-2.7.1.jar
-datanucleus-api-jdo/3.2.6//datanucleus-api-jdo-3.2.6.jar
-datanucleus-core/3.2.10//datanucleus-core-3.2.10.jar
-datanucleus-rdbms/3.2.9//datanucleus-rdbms-3.2.9.jar
-derby/10.12.1.1//derby-10.12.1.1.jar
-flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar
-generex/1.0.2//generex-1.0.2.jar
-gson/2.2.4//gson-2.2.4.jar
-guava/14.0.1//guava-14.0.1.jar
-guice-servlet/3.0//guice-servlet-3.0.jar
-guice/3.0//guice-3.0.jar
-hadoop-annotations/2.7.4//hadoop-annotations-2.7.4.jar
-hadoop-auth/2.7.4//hadoop-auth-2.7.4.jar
-hadoop-client/2.7.4//hadoop-client-2.7.4.jar
-hadoop-common/2.7.4//hadoop-common-2.7.4.jar
-hadoop-hdfs/2.7.4//hadoop-hdfs-2.7.4.jar
-hadoop-mapreduce-client-app/2.7.4//hadoop-mapreduce-client-app-2.7.4.jar
-hadoop-mapreduce-client-common/2.7.4//hadoop-mapreduce-client-common-2.7.4.jar
-hadoop-mapreduce-client-core/2.7.4//hadoop-mapreduce-client-core-2.7.4.jar
-hadoop-mapreduce-client-jobclient/2.7.4//hadoop-mapreduce-client-jobclient-2.7.4.jar
-hadoop-mapreduce-client-shuffle/2.7.4//hadoop-mapreduce-client-shuffle-2.7.4.jar
-hadoop-yarn-api/2.7.4//hadoop-yarn-api-2.7.4.jar
-hadoop-yarn-client/2.7.4//hadoop-yarn-client-2.7.4.jar
-hadoop-yarn-common/2.7.4//hadoop-yarn-common-2.7.4.jar
-hadoop-yarn-server-common/2.7.4//hadoop-yarn-server-common-2.7.4.jar
-hadoop-yarn-server-web-proxy/2.7.4//hadoop-yarn-server-web-proxy-2.7.4.jar
-hk2-api/2.6.1//hk2-api-2.6.1.jar
-hk2-locator/2.6.1//hk2-locator-2.6.1.jar
-hk2-utils/2.6.1//hk2-utils-2.6.1.jar
-htrace-core/3.1.0-incubating//htrace-core-3.1.0-incubating.jar
-httpclient/4.5.6//httpclient-4.5.6.jar
-httpcore/4.4.12//httpcore-4.4.12.jar
-istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar
-ivy/2.4.0//ivy-2.4.0.jar
-jackson-annotations/2.10.0//jackson-annotations-2.10.0.jar
-jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar
-jackson-core/2.10.0//jackson-core-2.10.0.jar
-jackson-databind/2.10.0//jackson-databind-2.10.0.jar
-jackson-dataformat-yaml/2.10.0//jackson-dataformat-yaml-2.10.0.jar
-jackson-datatype-jsr310/2.10.3//jackson-datatype-jsr310-2.10.3.jar
-jackson-jaxrs/1.9.13//jackson-jaxrs-1.9.13.jar
-jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar
-jackson-module-jaxb-annotations/2.10.0//jackson-module-jaxb-annotations-2.10.0.jar
-jackson-module-paranamer/2.10.0//jackson-module-paranamer-2.10.0.jar
-jackson-module-scala_2.12/2.10.0//jackson-module-scala_2.12-2.10.0.jar
-jackson-xc/1.9.13//jackson-xc-1.9.13.jar
-jakarta.activation-api/1.2.1//jakarta.activation-api-1.2.1.jar
-jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar
-jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar
-jakarta.validation-api/2.0.2//jakarta.validation-api-2.0.2.jar
-jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar
-jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar
-janino/3.0.16//janino-3.0.16.jar
-javassist/3.25.0-GA//javassist-3.25.0-GA.jar
-javax.inject/1//javax.inject-1.jar
-javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar
-javolution/5.5.1//javolution-5.5.1.jar
-jaxb-api/2.2.2//jaxb-api-2.2.2.jar
-jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar
-jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar
-jdo-api/3.0.1//jdo-api-3.0.1.jar
-jersey-client/2.30//jersey-client-2.30.jar
-jersey-common/2.30//jersey-common-2.30.jar
-jersey-container-servlet-core/2.30//jersey-container-servlet-core-2.30.jar
-jersey-container-servlet/2.30//jersey-container-servlet-2.30.jar
-jersey-hk2/2.30//jersey-hk2-2.30.jar
-jersey-media-jaxb/2.30//jersey-media-jaxb-2.30.jar
-jersey-server/2.30//jersey-server-2.30.jar
-jetty-sslengine/6.1.26//jetty-sslengine-6.1.26.jar
-jetty-util/6.1.26//jetty-util-6.1.26.jar
-jetty/6.1.26//jetty-6.1.26.jar
-jline/2.14.6//jline-2.14.6.jar
-joda-time/2.10.5//joda-time-2.10.5.jar
-jodd-core/3.5.2//jodd-core-3.5.2.jar
-jpam/1.1//jpam-1.1.jar
-json4s-ast_2.12/3.7.0-M5//json4s-ast_2.12-3.7.0-M5.jar
-json4s-core_2.12/3.7.0-M5//json4s-core_2.12-3.7.0-M5.jar
-json4s-jackson_2.12/3.7.0-M5//json4s-jackson_2.12-3.7.0-M5.jar
-json4s-scalap_2.12/3.7.0-M5//json4s-scalap_2.12-3.7.0-M5.jar
-jsp-api/2.1//jsp-api-2.1.jar
-jsr305/3.0.0//jsr305-3.0.0.jar
-jta/1.1//jta-1.1.jar
-jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar
-kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
-kubernetes-client/4.10.3//kubernetes-client-4.10.3.jar
-kubernetes-model-admissionregistration/4.10.3//kubernetes-model-admissionregistration-4.10.3.jar
-kubernetes-model-apiextensions/4.10.3//kubernetes-model-apiextensions-4.10.3.jar
-kubernetes-model-apps/4.10.3//kubernetes-model-apps-4.10.3.jar
-kubernetes-model-autoscaling/4.10.3//kubernetes-model-autoscaling-4.10.3.jar
-kubernetes-model-batch/4.10.3//kubernetes-model-batch-4.10.3.jar
-kubernetes-model-certificates/4.10.3//kubernetes-model-certificates-4.10.3.jar
-kubernetes-model-common/4.10.3//kubernetes-model-common-4.10.3.jar
-kubernetes-model-coordination/4.10.3//kubernetes-model-coordination-4.10.3.jar
-kubernetes-model-core/4.10.3//kubernetes-model-core-4.10.3.jar
-kubernetes-model-discovery/4.10.3//kubernetes-model-discovery-4.10.3.jar
-kubernetes-model-events/4.10.3//kubernetes-model-events-4.10.3.jar
-kubernetes-model-extensions/4.10.3//kubernetes-model-extensions-4.10.3.jar
-kubernetes-model-metrics/4.10.3//kubernetes-model-metrics-4.10.3.jar
-kubernetes-model-networking/4.10.3//kubernetes-model-networking-4.10.3.jar
-kubernetes-model-policy/4.10.3//kubernetes-model-policy-4.10.3.jar
-kubernetes-model-rbac/4.10.3//kubernetes-model-rbac-4.10.3.jar
-kubernetes-model-scheduling/4.10.3//kubernetes-model-scheduling-4.10.3.jar
-kubernetes-model-settings/4.10.3//kubernetes-model-settings-4.10.3.jar
-kubernetes-model-storageclass/4.10.3//kubernetes-model-storageclass-4.10.3.jar
-leveldbjni-all/1.8//leveldbjni-all-1.8.jar
-libfb303/0.9.3//libfb303-0.9.3.jar
-libthrift/0.12.0//libthrift-0.12.0.jar
-log4j/1.2.17//log4j-1.2.17.jar
-logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar
-lz4-java/1.7.1//lz4-java-1.7.1.jar
-machinist_2.12/0.6.8//machinist_2.12-0.6.8.jar
-macro-compat_2.12/1.1.1//macro-compat_2.12-1.1.1.jar
-mesos/1.4.0/shaded-protobuf/mesos-1.4.0-shaded-protobuf.jar
-metrics-core/4.1.1//metrics-core-4.1.1.jar
-metrics-graphite/4.1.1//metrics-graphite-4.1.1.jar
-metrics-jmx/4.1.1//metrics-jmx-4.1.1.jar
-metrics-json/4.1.1//metrics-json-4.1.1.jar
-metrics-jvm/4.1.1//metrics-jvm-4.1.1.jar
-minlog/1.3.0//minlog-1.3.0.jar
-netty-all/4.1.51.Final//netty-all-4.1.51.Final.jar
-objenesis/2.6//objenesis-2.6.jar
-okhttp/3.12.12//okhttp-3.12.12.jar
-okio/1.14.0//okio-1.14.0.jar
-opencsv/2.3//opencsv-2.3.jar
-openshift-model/4.10.3//openshift-model-4.10.3.jar
-orc-core/1.5.12/nohive/orc-core-1.5.12-nohive.jar
-orc-mapreduce/1.5.12/nohive/orc-mapreduce-1.5.12-nohive.jar
-orc-shims/1.5.12//orc-shims-1.5.12.jar
-oro/2.0.8//oro-2.0.8.jar
-osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
-paranamer/2.8//paranamer-2.8.jar
-parquet-column/1.10.1//parquet-column-1.10.1.jar
-parquet-common/1.10.1//parquet-common-1.10.1.jar
-parquet-encoding/1.10.1//parquet-encoding-1.10.1.jar
-parquet-format/2.4.0//parquet-format-2.4.0.jar
-parquet-hadoop-bundle/1.6.0//parquet-hadoop-bundle-1.6.0.jar
-parquet-hadoop/1.10.1//parquet-hadoop-1.10.1.jar
-parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar
-protobuf-java/2.5.0//protobuf-java-2.5.0.jar
-py4j/0.10.9//py4j-0.10.9.jar
-pyrolite/4.30//pyrolite-4.30.jar
-scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar
-scala-compiler/2.12.10//scala-compiler-2.12.10.jar
-scala-library/2.12.10//scala-library-2.12.10.jar
-scala-parser-combinators_2.12/1.1.2//scala-parser-combinators_2.12-1.1.2.jar
-scala-reflect/2.12.10//scala-reflect-2.12.10.jar
-scala-xml_2.12/1.2.0//scala-xml_2.12-1.2.0.jar
-shapeless_2.12/2.3.3//shapeless_2.12-2.3.3.jar
-shims/0.9.0//shims-0.9.0.jar
-slf4j-api/1.7.30//slf4j-api-1.7.30.jar
-slf4j-log4j12/1.7.30//slf4j-log4j12-1.7.30.jar
-snakeyaml/1.24//snakeyaml-1.24.jar
-snappy-java/1.1.7.5//snappy-java-1.1.7.5.jar
-snappy/0.2//snappy-0.2.jar
-spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar
-spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar
-spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar
-spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar
-stax-api/1.0-2//stax-api-1.0-2.jar
-stax-api/1.0.1//stax-api-1.0.1.jar
-stream/2.9.6//stream-2.9.6.jar
-stringtemplate/3.2.1//stringtemplate-3.2.1.jar
-super-csv/2.2.0//super-csv-2.2.0.jar
-threeten-extra/1.5.0//threeten-extra-1.5.0.jar
-univocity-parsers/2.9.0//univocity-parsers-2.9.0.jar
-xbean-asm7-shaded/4.15//xbean-asm7-shaded-4.15.jar
-xercesImpl/2.12.0//xercesImpl-2.12.0.jar
-xml-apis/1.4.01//xml-apis-1.4.01.jar
-xmlenc/0.52//xmlenc-0.52.jar
-xz/1.5//xz-1.5.jar
-zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar
-zookeeper/3.4.14//zookeeper-3.4.14.jar
-zstd-jni/1.4.5-4//zstd-jni-1.4.5-4.jar
diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py
index 4ff5b327e3325..610fb1fd27027 100755
--- a/dev/run-tests-jenkins.py
+++ b/dev/run-tests-jenkins.py
@@ -175,8 +175,6 @@ def main():
     if "test-hadoop3.2" in ghprb_pull_title:
         os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop3.2"
     # Switch the Hive profile based on the PR title:
-    if "test-hive1.2" in ghprb_pull_title:
-        os.environ["AMPLAB_JENKINS_BUILD_HIVE_PROFILE"] = "hive1.2"
     if "test-hive2.3" in ghprb_pull_title:
         os.environ["AMPLAB_JENKINS_BUILD_HIVE_PROFILE"] = "hive2.3"
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
index afc83d7c395f0..681ed91afaa12 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
@@ -39,10 +39,6 @@ import org.apache.spark.sql.types._
 
 /**
  * A test suite that tests Apache ORC filter API based filter pushdown optimization.
- * OrcFilterSuite and HiveOrcFilterSuite is logically duplicated to provide the same test coverage.
- * The difference are the packages containing 'Predicate' and 'SearchArgument' classes.
- * - OrcFilterSuite uses 'org.apache.orc.storage.ql.io.sarg' package.
- * - HiveOrcFilterSuite uses 'org.apache.hadoop.hive.ql.io.sarg' package.
  */
 class OrcFilterSuite extends OrcTest with SharedSparkSession {
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
index d2970ef1bb63d..ead2c2cf1b70f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
@@ -217,7 +217,6 @@ abstract class OrcQueryTest extends OrcTest {
     }
   }
 
-  // Hive supports zlib, snappy and none for Hive 1.2.1.
   test("Compression options for writing to an ORC file (SNAPPY, ZLIB and NONE)") {
     withTempPath { file =>
       spark.range(0, 10).write
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
index aec61acda5444..4243318ac1dd8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
@@ -46,7 +46,6 @@ import org.apache.spark.sql.internal.SQLConf.ORC_IMPLEMENTATION
  *       -> OrcPartitionDiscoverySuite
  *       -> HiveOrcPartitionDiscoverySuite
  *   -> OrcFilterSuite
- *   -> HiveOrcFilterSuite
  */
 abstract class OrcTest extends QueryTest with FileBasedDataSourceTest with BeforeAndAfterAll {
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
index d1ee1baadcbce..2868bb4ba85d3 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
@@ -47,7 +47,7 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.execution.datasources._
-import org.apache.spark.sql.execution.datasources.orc.OrcOptions
+import org.apache.spark.sql.execution.datasources.orc.{OrcFilters, OrcOptions}
 import org.apache.spark.sql.hive.{HiveInspectors, HiveShim}
 import org.apache.spark.sql.sources.{Filter, _}
 import org.apache.spark.sql.types._
@@ -139,7 +139,7 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
 
     if (sparkSession.sessionState.conf.orcFilterPushDown) {
       // Sets pushed predicates
-      OrcFilters.createFilter(requiredSchema, filters.toArray).foreach { f =>
+      OrcFilters.createFilter(requiredSchema, filters).foreach { f =>
         hadoopConf.set(OrcFileFormat.SARG_PUSHDOWN, toKryo(f))
         hadoopConf.setBoolean(ConfVars.HIVEOPTINDEXFILTER.varname, true)
       }
@@ -296,7 +296,7 @@ private[orc] class OrcOutputWriter(
 
   override def close(): Unit = {
     if (recordWriterInstantiated) {
-      // Hive 1.2.1 ORC initializes its private `writer` field at the first write.
+      // Hive ORC initializes its private `writer` field at the first write.
       OrcFileFormat.addSparkVersionMetadata(recordWriter)
       recordWriter.close(Reporter.NULL)
     }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala
deleted file mode 100644
index ea5c7ca15b065..0000000000000
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.hive.orc
-
-import java.lang.reflect.Method
-
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.Builder
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory.newBuilder
-
-import org.apache.spark.SparkException
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quoteIfNeeded
-import org.apache.spark.sql.execution.datasources.orc.{OrcFilters => DatasourceOrcFilters}
-import org.apache.spark.sql.execution.datasources.orc.OrcFilters.buildTree
-import org.apache.spark.sql.hive.HiveUtils
-import org.apache.spark.sql.sources._
-import org.apache.spark.sql.types._
-
-/**
- * Helper object for building ORC `SearchArgument`s, which are used for ORC predicate push-down.
- *
- * Due to limitation of ORC `SearchArgument` builder, we had to end up with a pretty weird double-
- * checking pattern when converting `And`/`Or`/`Not` filters.
- *
- * An ORC `SearchArgument` must be built in one pass using a single builder.  For example, you can't
- * build `a = 1` and `b = 2` first, and then combine them into `a = 1 AND b = 2`.  This is quite
- * different from the cases in Spark SQL or Parquet, where complex filters can be easily built using
- * existing simpler ones.
- *
- * The annoying part is that, `SearchArgument` builder methods like `startAnd()`, `startOr()`, and
- * `startNot()` mutate internal state of the builder instance.  This forces us to translate all
- * convertible filters with a single builder instance. However, before actually converting a filter,
- * we've no idea whether it can be recognized by ORC or not. Thus, when an inconvertible filter is
- * found, we may already end up with a builder whose internal state is inconsistent.
- *
- * For example, to convert an `And` filter with builder `b`, we call `b.startAnd()` first, and then
- * try to convert its children.  Say we convert `left` child successfully, but find that `right`
- * child is inconvertible.  Alas, `b.startAnd()` call can't be rolled back, and `b` is inconsistent
- * now.
- *
- * The workaround employed here is that, for `And`/`Or`/`Not`, we first try to convert their
- * children with brand new builders, and only do the actual conversion with the right builder
- * instance when the children are proven to be convertible.
- *
- * P.S.: Hive seems to use `SearchArgument` together with `ExprNodeGenericFuncDesc` only.  Usage of
- * builder methods mentioned above can only be found in test code, where all tested filters are
- * known to be convertible.
- */
-private[orc] object OrcFilters extends Logging {
-
-  private def findMethod(klass: Class[_], name: String, args: Class[_]*): Method = {
-    val method = klass.getMethod(name, args: _*)
-    method.setAccessible(true)
-    method
-  }
-
-  def createFilter(schema: StructType, filters: Array[Filter]): Option[SearchArgument] = {
-    DatasourceOrcFilters.createFilter(schema, filters).asInstanceOf[Option[SearchArgument]]
-  }
-
-  def convertibleFilters(
-      schema: StructType,
-      dataTypeMap: Map[String, DataType],
-      filters: Seq[Filter]): Seq[Filter] = {
-    import org.apache.spark.sql.sources._
-
-    def convertibleFiltersHelper(
-        filter: Filter,
-        canPartialPushDown: Boolean): Option[Filter] = filter match {
-      // At here, it is not safe to just convert one side and remove the other side
-      // if we do not understand what the parent filters are.
-      //
-      // Here is an example used to explain the reason.
-      // Let's say we have NOT(a = 2 AND b in ('1')) and we do not understand how to
-      // convert b in ('1'). If we only convert a = 2, we will end up with a filter
-      // NOT(a = 2), which will generate wrong results.
-      //
-      // Pushing one side of AND down is only safe to do at the top level or in the child
-      // AND before hitting NOT or OR conditions, and in this case, the unsupported predicate
-      // can be safely removed.
-      case And(left, right) =>
-        val leftResultOptional = convertibleFiltersHelper(left, canPartialPushDown)
-        val rightResultOptional = convertibleFiltersHelper(right, canPartialPushDown)
-        (leftResultOptional, rightResultOptional) match {
-          case (Some(leftResult), Some(rightResult)) => Some(And(leftResult, rightResult))
-          case (Some(leftResult), None) if canPartialPushDown => Some(leftResult)
-          case (None, Some(rightResult)) if canPartialPushDown => Some(rightResult)
-          case _ => None
-        }
-
-      // The Or predicate is convertible when both of its children can be pushed down.
-      // That is to say, if one/both of the children can be partially pushed down, the Or
-      // predicate can be partially pushed down as well.
-      //
-      // Here is an example used to explain the reason.
-      // Let's say we have
-      // (a1 AND a2) OR (b1 AND b2),
-      // a1 and b1 is convertible, while a2 and b2 is not.
-      // The predicate can be converted as
-      // (a1 OR b1) AND (a1 OR b2) AND (a2 OR b1) AND (a2 OR b2)
-      // As per the logical in And predicate, we can push down (a1 OR b1).
-      case Or(left, right) =>
-        for {
-          lhs <- convertibleFiltersHelper(left, canPartialPushDown)
-          rhs <- convertibleFiltersHelper(right, canPartialPushDown)
-        } yield Or(lhs, rhs)
-      case Not(pred) =>
-        val childResultOptional = convertibleFiltersHelper(pred, canPartialPushDown = false)
-        childResultOptional.map(Not)
-      case other =>
-        for (_ <- buildLeafSearchArgument(dataTypeMap, other, newBuilder())) yield other
-    }
-    filters.flatMap { filter =>
-      convertibleFiltersHelper(filter, true)
-    }
-  }
-
-  /**
-   * Build a SearchArgument and return the builder so far.
-   *
-   * @param dataTypeMap a map from the attribute name to its data type.
-   * @param expression the input predicates, which should be fully convertible to SearchArgument.
-   * @param builder the input SearchArgument.Builder.
-   * @return the builder so far.
-   */
-  private def buildSearchArgument(
-      dataTypeMap: Map[String, DataType],
-      expression: Filter,
-      builder: Builder): Builder = {
-    expression match {
-      case And(left, right) =>
-        val lhs = buildSearchArgument(dataTypeMap, left, builder.startAnd())
-        val rhs = buildSearchArgument(dataTypeMap, right, lhs)
-        rhs.end()
-
-      case Or(left, right) =>
-        val lhs = buildSearchArgument(dataTypeMap, left, builder.startOr())
-        val rhs = buildSearchArgument(dataTypeMap, right, lhs)
-        rhs.end()
-
-      case Not(child) =>
-        buildSearchArgument(dataTypeMap, child, builder.startNot()).end()
-
-      case other =>
-        buildLeafSearchArgument(dataTypeMap, other, builder).getOrElse {
-          throw new SparkException(
-            "The input filter of OrcFilters.buildSearchArgument should be fully convertible.")
-        }
-    }
-  }
-
-  /**
-   * Build a SearchArgument for a leaf predicate and return the builder so far.
-   *
-   * @param dataTypeMap a map from the attribute name to its data type.
-   * @param expression the input filter predicates.
-   * @param builder the input SearchArgument.Builder.
-   * @return the builder so far.
-   */
-  private def buildLeafSearchArgument(
-      dataTypeMap: Map[String, DataType],
-      expression: Filter,
-      builder: Builder): Option[Builder] = {
-    def isSearchableType(dataType: DataType): Boolean = dataType match {
-      // Only the values in the Spark types below can be recognized by
-      // the `SearchArgumentImpl.BuilderImpl.boxLiteral()` method.
-      case ByteType | ShortType | FloatType | DoubleType => true
-      case IntegerType | LongType | StringType | BooleanType => true
-      case TimestampType | _: DecimalType => true
-      case _ => false
-    }
-
-    import org.apache.spark.sql.sources._
-
-    // NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()`
-    // call is mandatory. ORC `SearchArgument` builder requires that all leaf predicates must be
-    // wrapped by a "parent" predicate (`And`, `Or`, or `Not`).
-    expression match {
-      // NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()`
-      // call is mandatory.  ORC `SearchArgument` builder requires that all leaf predicates must be
-      // wrapped by a "parent" predicate (`And`, `Or`, or `Not`).
-
-      case EqualTo(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        val bd = builder.startAnd()
-        val method = findMethod(bd.getClass, "equals", classOf[String], classOf[Object])
-        Some(method.invoke(bd, attribute, value.asInstanceOf[AnyRef]).asInstanceOf[Builder].end())
-
-      case EqualNullSafe(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        val bd = builder.startAnd()
-        val method = findMethod(bd.getClass, "nullSafeEquals", classOf[String], classOf[Object])
-        Some(method.invoke(bd, attribute, value.asInstanceOf[AnyRef]).asInstanceOf[Builder].end())
-
-      case LessThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        val bd = builder.startAnd()
-        val method = findMethod(bd.getClass, "lessThan", classOf[String], classOf[Object])
-        Some(method.invoke(bd, attribute, value.asInstanceOf[AnyRef]).asInstanceOf[Builder].end())
-
-      case LessThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        val bd = builder.startAnd()
-        val method = findMethod(bd.getClass, "lessThanEquals", classOf[String], classOf[Object])
-        Some(method.invoke(bd, attribute, value.asInstanceOf[AnyRef]).asInstanceOf[Builder].end())
-
-      case GreaterThan(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        val bd = builder.startNot()
-        val method = findMethod(bd.getClass, "lessThanEquals", classOf[String], classOf[Object])
-        Some(method.invoke(bd, attribute, value.asInstanceOf[AnyRef]).asInstanceOf[Builder].end())
-
-      case GreaterThanOrEqual(attribute, value) if isSearchableType(dataTypeMap(attribute)) =>
-        val bd = builder.startNot()
-        val method = findMethod(bd.getClass, "lessThan", classOf[String], classOf[Object])
-        Some(method.invoke(bd, attribute, value.asInstanceOf[AnyRef]).asInstanceOf[Builder].end())
-
-      case IsNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
-        val bd = builder.startAnd()
-        val method = findMethod(bd.getClass, "isNull", classOf[String])
-        Some(method.invoke(bd, attribute).asInstanceOf[Builder].end())
-
-      case IsNotNull(attribute) if isSearchableType(dataTypeMap(attribute)) =>
-        val bd = builder.startNot()
-        val method = findMethod(bd.getClass, "isNull", classOf[String])
-        Some(method.invoke(bd, attribute).asInstanceOf[Builder].end())
-
-      case In(attribute, values) if isSearchableType(dataTypeMap(attribute)) =>
-        val bd = builder.startAnd()
-        val method = findMethod(bd.getClass, "in", classOf[String], classOf[Array[Object]])
-        Some(method.invoke(bd, attribute, values.map(_.asInstanceOf[AnyRef]))
-          .asInstanceOf[Builder].end())
-
-      case _ => None
-    }
-  }
-}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala
deleted file mode 100644
index deb85f30463ae..0000000000000
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcFilterSuite.scala
+++ /dev/null
@@ -1,469 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.hive.orc
-
-import java.nio.charset.StandardCharsets
-import java.sql.{Date, Timestamp}
-
-import scala.collection.JavaConverters._
-
-import org.apache.hadoop.hive.ql.io.sarg.{PredicateLeaf, SearchArgument}
-
-import org.apache.spark.sql.{Column, DataFrame}
-import org.apache.spark.sql.catalyst.dsl.expressions._
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.planning.PhysicalOperation
-import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelation}
-import org.apache.spark.sql.execution.datasources.orc.OrcTest
-import org.apache.spark.sql.hive.HiveUtils
-import org.apache.spark.sql.hive.test.TestHiveSingleton
-import org.apache.spark.sql.types._
-
-/**
- * A test suite that tests Hive ORC filter API based filter pushdown optimization.
- */
-class HiveOrcFilterSuite extends OrcTest with TestHiveSingleton {
-
-  override val orcImp: String = "hive"
-
-  private def checkFilterPredicate(
-      df: DataFrame,
-      predicate: Predicate,
-      checker: (SearchArgument) => Unit): Unit = {
-    val output = predicate.collect { case a: Attribute => a }.distinct
-    val query = df
-      .select(output.map(e => Column(e)): _*)
-      .where(Column(predicate))
-
-    var maybeRelation: Option[HadoopFsRelation] = None
-    val maybeAnalyzedPredicate = query.queryExecution.optimizedPlan.collect {
-      case PhysicalOperation(_, filters, LogicalRelation(orcRelation: HadoopFsRelation, _, _, _)) =>
-        maybeRelation = Some(orcRelation)
-        filters
-    }.flatten.reduceLeftOption(_ && _)
-    assert(maybeAnalyzedPredicate.isDefined, "No filter is analyzed from the given query")
-
-    val (_, selectedFilters, _) =
-      DataSourceStrategy.selectFilters(maybeRelation.get, maybeAnalyzedPredicate.toSeq)
-    assert(selectedFilters.nonEmpty, "No filter is pushed down")
-
-    val maybeFilter = OrcFilters.createFilter(query.schema, selectedFilters.toArray)
-    assert(maybeFilter.isDefined, s"Couldn't generate filter predicate for $selectedFilters")
-    checker(maybeFilter.get)
-  }
-
-  private def checkFilterPredicate
-      (predicate: Predicate, filterOperator: PredicateLeaf.Operator)
-      (implicit df: DataFrame): Unit = {
-    def checkComparisonOperator(filter: SearchArgument) = {
-      val operator = filter.getLeaves.asScala
-      assert(operator.map(_.getOperator).contains(filterOperator))
-    }
-    checkFilterPredicate(df, predicate, checkComparisonOperator)
-  }
-
-  private def checkFilterPredicateWithDiffHiveVersion
-      (predicate: Predicate, stringExpr: String)
-      (implicit df: DataFrame): Unit = {
-    def checkLogicalOperator(filter: SearchArgument) = {
-      assert(filter.toString == stringExpr.replace("\n", ", "))
-    }
-    checkFilterPredicate(df, predicate, checkLogicalOperator)
-  }
-
-  private def assertResultWithDiffHiveVersion(expected : String)(c : scala.Any) = {
-    assertResult(expected.replace("\n", ", "))(c)
-  }
-
-  private def checkNoFilterPredicate
-      (predicate: Predicate)
-      (implicit df: DataFrame): Unit = {
-    val output = predicate.collect { case a: Attribute => a }.distinct
-    val query = df
-      .select(output.map(e => Column(e)): _*)
-      .where(Column(predicate))
-
-    var maybeRelation: Option[HadoopFsRelation] = None
-    val maybeAnalyzedPredicate = query.queryExecution.optimizedPlan.collect {
-      case PhysicalOperation(_, filters, LogicalRelation(orcRelation: HadoopFsRelation, _, _, _)) =>
-        maybeRelation = Some(orcRelation)
-        filters
-    }.flatten.reduceLeftOption(_ && _)
-    assert(maybeAnalyzedPredicate.isDefined, "No filter is analyzed from the given query")
-
-    val (_, selectedFilters, _) =
-      DataSourceStrategy.selectFilters(maybeRelation.get, maybeAnalyzedPredicate.toSeq)
-    assert(selectedFilters.nonEmpty, "No filter is pushed down")
-
-    val maybeFilter = OrcFilters.createFilter(query.schema, selectedFilters.toArray)
-    assert(maybeFilter.isEmpty, s"Could generate filter predicate for $selectedFilters")
-  }
-
-  test("filter pushdown - integer") {
-    withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { implicit df =>
-      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
-    }
-  }
-
-  test("filter pushdown - long") {
-    withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toLong)))) { implicit df =>
-      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
-    }
-  }
-
-  test("filter pushdown - float") {
-    withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toFloat)))) { implicit df =>
-      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
-    }
-  }
-
-  test("filter pushdown - double") {
-    withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toDouble)))) { implicit df =>
-      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate($"_1" === 1, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate($"_1" <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate($"_1" < 2, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate($"_1" > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate($"_1" <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate($"_1" >= 4, PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(1) === $"_1", PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(1) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(2) > $"_1", PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(3) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(1) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(4) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
-    }
-  }
-
-  test("filter pushdown - string") {
-    withOrcDataFrame((1 to 4).map(i => Tuple1(i.toString))) { implicit df =>
-      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate($"_1" === "1", PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate($"_1" <=> "1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate($"_1" < "2", PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate($"_1" > "3", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate($"_1" <= "1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate($"_1" >= "4", PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal("1") === $"_1", PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal("1") <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal("2") > $"_1", PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal("3") < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal("1") >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal("4") <= $"_1", PredicateLeaf.Operator.LESS_THAN)
-    }
-  }
-
-  test("filter pushdown - boolean") {
-    withOrcDataFrame((true :: false :: Nil).map(b => Tuple1.apply(Option(b)))) { implicit df =>
-      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate($"_1" === true, PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate($"_1" <=> true, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate($"_1" < true, PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate($"_1" > false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate($"_1" <= false, PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate($"_1" >= false, PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(false) === $"_1", PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(false) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(false) > $"_1", PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(true) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(true) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(true) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
-    }
-  }
-
-  test("filter pushdown - decimal") {
-    withOrcDataFrame((1 to 4).map(i => Tuple1.apply(BigDecimal.valueOf(i)))) { implicit df =>
-      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate($"_1" === BigDecimal.valueOf(1), PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate($"_1" <=> BigDecimal.valueOf(1), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate($"_1" < BigDecimal.valueOf(2), PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate($"_1" > BigDecimal.valueOf(3), PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate($"_1" <= BigDecimal.valueOf(1), PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate($"_1" >= BigDecimal.valueOf(4), PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(
-        Literal(BigDecimal.valueOf(1)) === $"_1", PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(
-        Literal(BigDecimal.valueOf(1)) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(
-        Literal(BigDecimal.valueOf(2)) > $"_1", PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(
-        Literal(BigDecimal.valueOf(3)) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(
-        Literal(BigDecimal.valueOf(1)) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(
-        Literal(BigDecimal.valueOf(4)) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
-    }
-  }
-
-  test("filter pushdown - timestamp") {
-    val timeString = "2015-08-20 14:57:00"
-    val timestamps = (1 to 4).map { i =>
-      val milliseconds = Timestamp.valueOf(timeString).getTime + i * 3600
-      new Timestamp(milliseconds)
-    }
-    withOrcDataFrame(timestamps.map(Tuple1(_))) { implicit df =>
-      checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL)
-
-      checkFilterPredicate($"_1" === timestamps(0), PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate($"_1" <=> timestamps(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-
-      checkFilterPredicate($"_1" < timestamps(1), PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate($"_1" > timestamps(2), PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate($"_1" <= timestamps(0), PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate($"_1" >= timestamps(3), PredicateLeaf.Operator.LESS_THAN)
-
-      checkFilterPredicate(Literal(timestamps(0)) === $"_1", PredicateLeaf.Operator.EQUALS)
-      checkFilterPredicate(Literal(timestamps(0)) <=> $"_1",
-        PredicateLeaf.Operator.NULL_SAFE_EQUALS)
-      checkFilterPredicate(Literal(timestamps(1)) > $"_1", PredicateLeaf.Operator.LESS_THAN)
-      checkFilterPredicate(Literal(timestamps(2)) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(timestamps(0)) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
-      checkFilterPredicate(Literal(timestamps(3)) <= $"_1", PredicateLeaf.Operator.LESS_THAN)
-    }
-  }
-
-  test("filter pushdown - combinations with logical operators") {
-    withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { implicit df =>
-      // Because `ExpressionTree` is not accessible at Hive 1.2.x, this should be checked
-      // in string form in order to check filter creation including logical operators
-      // such as `and`, `or` or `not`. So, this function uses `SearchArgument.toString()`
-      // to produce string expression and then compare it to given string expression below.
-      // This might have to be changed after Hive version is upgraded.
-      checkFilterPredicateWithDiffHiveVersion(
-        $"_1".isNotNull,
-        """leaf-0 = (IS_NULL _1)
-          |expr = (not leaf-0)""".stripMargin.trim
-      )
-      checkFilterPredicateWithDiffHiveVersion(
-        $"_1" =!= 1,
-        """leaf-0 = (IS_NULL _1)
-          |leaf-1 = (EQUALS _1 1)
-          |expr = (and (not leaf-0) (not leaf-1))""".stripMargin.trim
-      )
-      checkFilterPredicateWithDiffHiveVersion(
-        !($"_1" < 4),
-        """leaf-0 = (IS_NULL _1)
-          |leaf-1 = (LESS_THAN _1 4)
-          |expr = (and (not leaf-0) (not leaf-1))""".stripMargin.trim
-      )
-      checkFilterPredicateWithDiffHiveVersion(
-        $"_1" < 2 || $"_1" > 3,
-        """leaf-0 = (LESS_THAN _1 2)
-          |leaf-1 = (LESS_THAN_EQUALS _1 3)
-          |expr = (or leaf-0 (not leaf-1))""".stripMargin.trim
-      )
-      checkFilterPredicateWithDiffHiveVersion(
-        $"_1" < 2 && $"_1" > 3,
-        """leaf-0 = (IS_NULL _1)
-          |leaf-1 = (LESS_THAN _1 2)
-          |leaf-2 = (LESS_THAN_EQUALS _1 3)
-          |expr = (and (not leaf-0) leaf-1 (not leaf-2))""".stripMargin.trim
-      )
-    }
-  }
-
-  test("no filter pushdown - non-supported types") {
-    implicit class IntToBinary(int: Int) {
-      def b: Array[Byte] = int.toString.getBytes(StandardCharsets.UTF_8)
-    }
-    // ArrayType
-    withOrcDataFrame((1 to 4).map(i => Tuple1(Array(i)))) { implicit df =>
-      checkNoFilterPredicate($"_1".isNull)
-    }
-    // BinaryType
-    withOrcDataFrame((1 to 4).map(i => Tuple1(i.b))) { implicit df =>
-      checkNoFilterPredicate($"_1" <=> 1.b)
-    }
-    // MapType
-    withOrcDataFrame((1 to 4).map(i => Tuple1(Map(i -> i)))) { implicit df =>
-      checkNoFilterPredicate($"_1".isNotNull)
-    }
-  }
-
-  test("SPARK-12218 and SPARK-25699 Converting conjunctions into ORC SearchArguments") {
-    import org.apache.spark.sql.sources._
-    // The `LessThan` should be converted while the `StringContains` shouldn't
-    val schema = new StructType(
-      Array(
-        StructField("a", IntegerType, nullable = true),
-        StructField("b", StringType, nullable = true)))
-    assertResultWithDiffHiveVersion(
-      """leaf-0 = (LESS_THAN a 10)
-        |expr = leaf-0
-      """.stripMargin.trim
-    ) {
-      OrcFilters.createFilter(schema, Array(
-        LessThan("a", 10),
-        StringContains("b", "prefix")
-      )).get.toString
-    }
-
-    // The `LessThan` should be converted while the whole inner `And` shouldn't
-    assertResultWithDiffHiveVersion(
-      """leaf-0 = (LESS_THAN a 10)
-        |expr = leaf-0
-      """.stripMargin.trim
-    ) {
-      OrcFilters.createFilter(schema, Array(
-        LessThan("a", 10),
-        Not(And(
-          GreaterThan("a", 1),
-          StringContains("b", "prefix")
-        ))
-      )).get.toString
-    }
-
-    // Safely remove unsupported `StringContains` predicate and push down `LessThan`
-    assertResultWithDiffHiveVersion(
-      """leaf-0 = (LESS_THAN a 10)
-        |expr = leaf-0
-      """.stripMargin.trim
-    ) {
-      OrcFilters.createFilter(schema, Array(
-        And(
-          LessThan("a", 10),
-          StringContains("b", "prefix")
-        )
-      )).get.toString
-    }
-
-    // Safely remove unsupported `StringContains` predicate, push down `LessThan` and `GreaterThan`.
-    assertResultWithDiffHiveVersion(
-      """leaf-0 = (LESS_THAN a 10)
-        |leaf-1 = (LESS_THAN_EQUALS a 1)
-        |expr = (and leaf-0 (not leaf-1))
-      """.stripMargin.trim
-    ) {
-      OrcFilters.createFilter(schema, Array(
-        And(
-          And(
-            LessThan("a", 10),
-            StringContains("b", "prefix")
-          ),
-          GreaterThan("a", 1)
-        )
-      )).get.toString
-    }
-  }
-
-  test("SPARK-27699 Converting disjunctions into ORC SearchArguments") {
-    import org.apache.spark.sql.sources._
-    // The `LessThan` should be converted while the `StringContains` shouldn't
-    val schema = new StructType(
-      Array(
-        StructField("a", IntegerType, nullable = true),
-        StructField("b", StringType, nullable = true)))
-
-    // The predicate `StringContains` predicate is not able to be pushed down.
-    assertResultWithDiffHiveVersion("leaf-0 = (LESS_THAN_EQUALS a 10)\nleaf-1 = (LESS_THAN a 1)\n" +
-      "expr = (or (not leaf-0) leaf-1)") {
-      OrcFilters.createFilter(schema, Array(
-        Or(
-          GreaterThan("a", 10),
-          And(
-            StringContains("b", "prefix"),
-            LessThan("a", 1)
-          )
-        )
-      )).get.toString
-    }
-
-    assertResultWithDiffHiveVersion("leaf-0 = (LESS_THAN_EQUALS a 10)\nleaf-1 = (LESS_THAN a 1)\n" +
-      "expr = (or (not leaf-0) leaf-1)") {
-      OrcFilters.createFilter(schema, Array(
-        Or(
-          And(
-            GreaterThan("a", 10),
-            StringContains("b", "foobar")
-          ),
-          And(
-            StringContains("b", "prefix"),
-            LessThan("a", 1)
-          )
-        )
-      )).get.toString
-    }
-
-    assert(OrcFilters.createFilter(schema, Array(
-      Or(
-        StringContains("b", "foobar"),
-        And(
-          StringContains("b", "prefix"),
-          LessThan("a", 1)
-        )
-      )
-    )).isEmpty)
-  }
-}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
index 1901ed505197c..fcf7febe33121 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
@@ -168,9 +168,6 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton {
     }
   }
 
-  // Since Hive 1.2.1 library code path still has this problem, users may hit this
-  // when spark.sql.hive.convertMetastoreOrc=false. However, after SPARK-22279,
-  // Apache Spark with the default configuration doesn't hit this bug.
   test("SPARK-22267 Spark SQL incorrectly reads ORC files when column order is different") {
     Seq("native", "hive").foreach { orcImpl =>
       withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> orcImpl) {
@@ -179,10 +176,12 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton {
           Seq(1 -> 2).toDF("c1", "c2").write.orc(path)
           checkAnswer(spark.read.orc(path), Row(1, 2))
 
-          withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "true") { // default since 2.3.0
-            withTable("t") {
-              sql(s"CREATE EXTERNAL TABLE t(c2 INT, c1 INT) STORED AS ORC LOCATION '$path'")
-              checkAnswer(spark.table("t"), Row(2, 1))
+          Seq(true, false).foreach { convertMetastoreOrc =>
+            withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> convertMetastoreOrc.toString) {
+              withTable("t") {
+                sql(s"CREATE EXTERNAL TABLE t(c2 INT, c1 INT) STORED AS ORC LOCATION '$path'")
+                checkAnswer(spark.table("t"), Row(2, 1))
+              }
             }
           }
         }
@@ -190,9 +189,6 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton {
     }
   }
 
-  // Since Hive 1.2.1 library code path still has this problem, users may hit this
-  // when spark.sql.hive.convertMetastoreOrc=false. However, after SPARK-22279,
-  // Apache Spark with the default configuration doesn't hit this bug.
   test("SPARK-19809 NullPointerException on zero-size ORC file") {
     Seq("native", "hive").foreach { orcImpl =>
       withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> orcImpl) {
@@ -201,8 +197,10 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton {
             sql(s"CREATE TABLE spark_19809(a int) STORED AS ORC LOCATION '$dir'")
             Files.touch(new File(s"${dir.getCanonicalPath}", "zero.orc"))
 
-            withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "true") { // default since 2.3.0
-              checkAnswer(spark.table("spark_19809"), Seq.empty)
+            Seq(true, false).foreach { convertMetastoreOrc =>
+              withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> convertMetastoreOrc.toString) {
+                checkAnswer(spark.table("spark_19809"), Seq.empty)
+              }
             }
           }
         }

From 018811f9747d063a44543ceb265351377f0bc917 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Sat, 10 Oct 2020 13:48:26 +0900
Subject: [PATCH 0207/1009] [SPARK-33105][INFRA] Change default R arch from
 i386 to x64 and parametrize BINPREF

### What changes were proposed in this pull request?

- Change default R `arch` from `i386` to `x64`, to match Rtools  version.
- Parameterize `BINPREF` with `WIN` (https://stackoverflow.com/a/44035904)

Reported on dev:

http://apache-spark-developers-list.1001551.n3.nabble.com/Broken-rlang-installation-on-AppVeyor-td30294.html

### Why are the changes needed?

It seems like update from rlang 0.4.7 to 0.4.8 exposed an issue, where build fails because of incompatible ddl

```
c:/Rtools40/mingw64/bin/../lib/gcc/x86_64-w64-mingw32/8.3.0/../../../../x86_64-w64-mingw32/bin/ld.exe:
skipping incompatible C:/R/bin/i386/R.dll when searching for -lR
[00:01:52]
c:/Rtools40/mingw64/bin/../lib/gcc/x86_64-w64-mingw32/8.3.0/../../../../x86_64-w64-mingw32/bin/ld.exe:
skipping incompatible C:/R/bin/i386/R.dll when searching for -lR
[00:01:52]
c:/Rtools40/mingw64/bin/../lib/gcc/x86_64-w64-mingw32/8.3.0/../../../../x86_64-w64-mingw32/bin/ld.exe:
cannot find -lR
[00:01:52] collect2.exe: error: ld returned 1 exit status
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #29991 from zero323/APPVEYOR-DEAFAULT-ARCH.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 dev/appveyor-install-dependencies.ps1 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dev/appveyor-install-dependencies.ps1 b/dev/appveyor-install-dependencies.ps1
index e344a7fc23191..fb4cc22de35f4 100644
--- a/dev/appveyor-install-dependencies.ps1
+++ b/dev/appveyor-install-dependencies.ps1
@@ -19,7 +19,7 @@ $CRAN = "https://cloud.r-project.org"
 
 Function InstallR {
   if ( -not(Test-Path Env:\R_ARCH) ) {
-    $arch = "i386"
+    $arch = "x64"
   }
   Else {
     $arch = $env:R_ARCH
@@ -68,7 +68,7 @@ Function InstallRtools {
     $gccPath = $env:GCC_PATH
   }
   $env:PATH = $RtoolsDrive + '\Rtools40\bin;' + $RtoolsDrive + '\Rtools40\mingw64\bin;' + $RtoolsDrive + '\Rtools40\' + $gccPath + '\bin;' + $env:PATH
-  $env:BINPREF=$RtoolsDrive + '/Rtools40/mingw64/bin/'
+  $env:BINPREF=$RtoolsDrive + '/Rtools40/mingw$(WIN)/bin/'
 }
 
 # create tools directory outside of Spark directory

From 1e63dcc8f0b267f3d835db5b59c60360c04d9c0a Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Sat, 10 Oct 2020 13:53:09 +0900
Subject: [PATCH 0208/1009] [SPARK-33102][SQL] Use stringToSeq on SQL list
 typed parameters

### What changes were proposed in this pull request?
While I've implemented JDBC provider disable functionality it has been popped up [here](https://github.com/apache/spark/pull/29964#discussion_r501786746) that `Utils.stringToSeq` must be used when String list type SQL parameter handled. In this PR I've fixed the problematic parameters.

### Why are the changes needed?
`Utils.stringToSeq` must be used when String list type SQL parameter handled.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Existing unit tests.

Closes #29989 from gaborgsomogyi/SPARK-33102.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../spark/sql/execution/streaming/MicroBatchExecution.scala   | 4 ++--
 .../org/apache/spark/sql/streaming/DataStreamWriter.scala     | 4 +++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index aad212cc13486..c485d0f7d8b2d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.execution.datasources.v2.{StreamingDataSourceV2Relat
 import org.apache.spark.sql.execution.streaming.sources.WriteToMicroBatchDataSource
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.{OutputMode, Trigger}
-import org.apache.spark.util.Clock
+import org.apache.spark.util.{Clock, Utils}
 
 class MicroBatchExecution(
     sparkSession: SparkSession,
@@ -76,7 +76,7 @@ class MicroBatchExecution(
     // transformation is responsible for replacing attributes with their final values.
 
     val disabledSources =
-      sparkSession.sqlContext.conf.disabledV2StreamingMicroBatchReaders.split(",")
+      Utils.stringToSeq(sparkSession.sqlContext.conf.disabledV2StreamingMicroBatchReaders)
 
     import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
     val _logicalPlan = analyzedPlan.transform {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index 239b4fc2de374..2867bf581df81 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -36,6 +36,7 @@ import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.sources._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.util.Utils
 
 /**
  * Interface used to write a streaming `Dataset` to external storage systems (e.g. file systems,
@@ -366,7 +367,8 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
       startQuery(sink, extraOptions)
     } else {
       val cls = DataSource.lookupDataSource(source, df.sparkSession.sessionState.conf)
-      val disabledSources = df.sparkSession.sqlContext.conf.disabledV2StreamingWriters.split(",")
+      val disabledSources =
+        Utils.stringToSeq(df.sparkSession.sqlContext.conf.disabledV2StreamingWriters)
       val useV1Source = disabledSources.contains(cls.getCanonicalName) ||
         // file source v2 does not support streaming yet.
         classOf[FileDataSourceV2].isAssignableFrom(cls)

From dfb7790a9dad8e98bd27001a613b4e13a5eb9d51 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 9 Oct 2020 22:35:12 -0700
Subject: [PATCH 0209/1009] [SPARK-33108][BUILD] Remove sbt-dependency-graph
 SBT plugin

### What changes were proposed in this pull request?

This PR aims to remove `sbt-dependency-graph` SBT plugin.

### Why are the changes needed?

`sbt-dependency-graph` officially doesn't support SBT 1.3.x and it's broken due to `NoSuchMethodError`. This cannot be fixed in `sbt-dependency-graph` side at SBT 1.3.x
- https://github.com/sbt/sbt-dependency-graph
    > Note: Under sbt >= 1.3.x some features might currently not work as expected or not at all (like dependencyLicenses).

```
$ build/sbt dependencyTree
Launching sbt from build/sbt-launch-1.3.13.jar
[info] welcome to sbt 1.3.13 (AdoptOpenJDK Java 1.8.0_252)
...
[error] java.lang.NoSuchMethodError: sbt.internal.LibraryManagement$.cachedUpdate(Lsbt/librarymanagement/DependencyResolution;Lsbt/librarymanagement/ModuleDescriptor;Lsbt/util/CacheStoreFactory;Ljava/lang/String;Lsbt/librarymanagement/UpdateConfiguration;Lscala/Function1;ZZZLsbt/librarymanagement/UnresolvedWarningConfiguration;Lsbt/librarymanagement/EvictionWarningOptions;ZLsbt/internal/librarymanagement/CompatibilityWarningOptions;Lsbt/util/Logger;)Lsbt/librarymanagement/UpdateReport;
```

**ALTERNATIVES**
- One alternative is `coursier`, but it requires `coursier-based sbt launcher` which is more intrusive.
  - https://get-coursier.io/docs/sbt-coursier.html#sbt-13x
    > you'll have to use the coursier-based sbt launcher, via its custom sbt-extras launcher for example.

- Another alternative is moving to `SBT 1.4.0` which uses `sbt-dependency-graph` as a built-in, but it's still new and will requires many change.

So, this PR aims to remove the broken plugin simply.

### Does this PR introduce _any_ user-facing change?

No. This is a dev-only change.

### How was this patch tested?

Manual.
```
$ build/sbt dependencyTree
...
[error] Not a valid command: dependencyTree
[error] Not a valid project ID: dependencyTree
[error] Not a valid key: dependencyTree (similar: dependencyOverrides, sbtDependency, dependencyResolution)
[error] dependencyTree
[error]               ^
```

Closes #29997 from dongjoon-hyun/remove_depedencyTree.

Lead-authored-by: Dongjoon Hyun <dongjoon@apache.org>
Co-authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 project/plugins.sbt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/project/plugins.sbt b/project/plugins.sbt
index da466da9945c1..920aa677f9e92 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -27,8 +27,6 @@ addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.15.0")
 
 addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "5.2.4")
 
-addSbtPlugin("net.virtual-void" % "sbt-dependency-graph" % "0.9.2")
-
 addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0")
 
 addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.8.0")
@@ -47,3 +45,5 @@ libraryDependencies += "org.ow2.asm"  % "asm-commons" % "7.2"
 addSbtPlugin("com.simplytyped" % "sbt-antlr4" % "0.8.2")
 
 addSbtPlugin("com.typesafe.sbt" % "sbt-pom-reader" % "2.2.0")
+
+// TODO(SPARK-33109) Upgrade to SBT 1.4 and support `dependencyTree` back

From 7696ca56732166977642a777f1d94cfba67b4151 Mon Sep 17 00:00:00 2001
From: Holden Karau <hkarau@apple.com>
Date: Sat, 10 Oct 2020 16:24:50 -0700
Subject: [PATCH 0210/1009] [SPARK-32881][CORE] Catch some race condition
 errors and log them more clearly

### What changes were proposed in this pull request?

Decommissioning can run out of time resulting in some race condition, these race conditions result in confusing error messages but not negative impact.

### Why are the changes needed?

The NPE & element missing errors in the log can create a missunderstanding.

### Does this PR introduce _any_ user-facing change?
Logs change.

### How was this patch tested?
Existing tests pass.

Closes #29992 from holdenk/SPARK-32881-error-messaging-on-decom-race-messages.

Authored-by: Holden Karau <hkarau@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/MapOutputTracker.scala   | 21 +++++++++-------
 .../storage/BlockManagerMasterEndpoint.scala  | 24 ++++++++++++-------
 2 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index 64102ccc05882..c3152d9225107 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -125,14 +125,19 @@ private class ShuffleStatus(numPartitions: Int) extends Logging {
    * Update the map output location (e.g. during migration).
    */
   def updateMapOutput(mapId: Long, bmAddress: BlockManagerId): Unit = withWriteLock {
-    val mapStatusOpt = mapStatuses.find(_.mapId == mapId)
-    mapStatusOpt match {
-      case Some(mapStatus) =>
-        logInfo(s"Updating map output for ${mapId} to ${bmAddress}")
-        mapStatus.updateLocation(bmAddress)
-        invalidateSerializedMapOutputStatusCache()
-      case None =>
-        logError(s"Asked to update map output ${mapId} for untracked map status.")
+    try {
+      val mapStatusOpt = mapStatuses.find(_.mapId == mapId)
+      mapStatusOpt match {
+        case Some(mapStatus) =>
+          logInfo(s"Updating map output for ${mapId} to ${bmAddress}")
+          mapStatus.updateLocation(bmAddress)
+          invalidateSerializedMapOutputStatusCache()
+        case None =>
+          logWarning(s"Asked to update map output ${mapId} for untracked map status.")
+      }
+    } catch {
+      case e: java.lang.NullPointerException =>
+        logWarning(s"Unable to update map output for ${mapId}, status removed in-flight")
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
index a3d42348befaa..61a88b4f26c00 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
@@ -380,16 +380,22 @@ class BlockManagerMasterEndpoint(
    * @return Seq of ReplicateBlock
    */
   private def getReplicateInfoForRDDBlocks(blockManagerId: BlockManagerId): Seq[ReplicateBlock] = {
-    val info = blockManagerInfo(blockManagerId)
+    try {
+      val info = blockManagerInfo(blockManagerId)
 
-    val rddBlocks = info.blocks.keySet().asScala.filter(_.isRDD)
-    rddBlocks.map { blockId =>
-      val currentBlockLocations = blockLocations.get(blockId)
-      val maxReplicas = currentBlockLocations.size + 1
-      val remainingLocations = currentBlockLocations.toSeq.filter(bm => bm != blockManagerId)
-      val replicateMsg = ReplicateBlock(blockId, remainingLocations, maxReplicas)
-      replicateMsg
-    }.toSeq
+      val rddBlocks = info.blocks.keySet().asScala.filter(_.isRDD)
+      rddBlocks.map { blockId =>
+        val currentBlockLocations = blockLocations.get(blockId)
+        val maxReplicas = currentBlockLocations.size + 1
+        val remainingLocations = currentBlockLocations.toSeq.filter(bm => bm != blockManagerId)
+        val replicateMsg = ReplicateBlock(blockId, remainingLocations, maxReplicas)
+        replicateMsg
+      }.toSeq
+    } catch {
+      // If the block manager has already exited, nothing to replicate.
+      case e: java.util.NoSuchElementException =>
+        Seq.empty[ReplicateBlock]
+    }
   }
 
   // Remove a block from the workers that have it. This can only be used to remove

From 5e170140b0374762087b204008da141febaacee3 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Sat, 10 Oct 2020 16:41:42 -0700
Subject: [PATCH 0211/1009] [SPARK-33107][SQL] Remove hive-2.3 workaround code

### What changes were proposed in this pull request?

This pr remove `hive-2.3` workaround code.

### Why are the changes needed?

Make code more clear and readable.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing unit tests.

Closes #29996 from wangyum/SPARK-33107.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../SparkExecuteStatementOperation.scala      |  3 +-
 .../SparkGetTypeInfoOperation.scala           | 14 +++-
 .../hive/thriftserver/SparkSQLCLIDriver.scala |  5 +-
 .../thriftserver/SparkSQLSessionManager.scala |  3 +-
 .../thriftserver/ThriftserverShimUtils.scala  | 80 -------------------
 .../GetCatalogsOperationMock.scala            |  3 +-
 .../thriftserver/HiveSessionImplSuite.scala   |  3 +-
 .../HiveThriftServer2Suites.scala             |  3 +-
 .../thriftserver/SharedThriftServer.scala     |  3 +-
 .../SparkExecuteStatementOperationSuite.scala |  3 +-
 .../SparkMetadataOperationSuite.scala         |  2 +-
 ...arkThriftServerProtocolVersionsSuite.scala | 33 ++++----
 .../org/apache/spark/sql/hive/HiveShim.scala  | 36 +--------
 .../org/apache/spark/sql/hive/HiveUtils.scala |  4 +-
 .../org/apache/spark/sql/hive/hiveUDFs.scala  |  8 +-
 15 files changed, 51 insertions(+), 152 deletions(-)
 delete mode 100644 sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index d30951f89cf6b..ec2c795e95c83 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -118,8 +118,7 @@ private[hive] class SparkExecuteStatementOperation(
     validateDefaultFetchOrientation(order)
     assertState(OperationState.FINISHED)
     setHasResultSet(true)
-    val resultRowSet: RowSet =
-      ThriftserverShimUtils.resultRowSet(getResultSetSchema, getProtocolVersion)
+    val resultRowSet: RowSet = RowSetFactory.create(getResultSetSchema, getProtocolVersion, false)
 
     // Reset iter when FETCH_FIRST or FETCH_PRIOR
     if ((order.equals(FetchOrientation.FETCH_FIRST) ||
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala
index c2568ad4ada0a..26b5f8ad8cee1 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala
@@ -20,6 +20,8 @@ package org.apache.spark.sql.hive.thriftserver
 import java.util.UUID
 
 import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType
+import org.apache.hadoop.hive.serde2.thrift.Type
+import org.apache.hadoop.hive.serde2.thrift.Type._
 import org.apache.hive.service.cli.OperationState
 import org.apache.hive.service.cli.operation.GetTypeInfoOperation
 import org.apache.hive.service.cli.session.HiveSession
@@ -61,7 +63,7 @@ private[hive] class SparkGetTypeInfoOperation(
       parentSession.getUsername)
 
     try {
-      ThriftserverShimUtils.supportedType().foreach(typeInfo => {
+      SparkGetTypeInfoUtil.supportedType.foreach(typeInfo => {
         val rowData = Array[AnyRef](
           typeInfo.getName, // TYPE_NAME
           typeInfo.toJavaSQLType.asInstanceOf[AnyRef], // DATA_TYPE
@@ -90,3 +92,13 @@ private[hive] class SparkGetTypeInfoOperation(
     HiveThriftServer2.eventManager.onStatementFinish(statementId)
   }
 }
+
+private[hive] object SparkGetTypeInfoUtil {
+  val supportedType: Seq[Type] = {
+    Seq(NULL_TYPE, BOOLEAN_TYPE, STRING_TYPE, BINARY_TYPE,
+      TINYINT_TYPE, SMALLINT_TYPE, INT_TYPE, BIGINT_TYPE,
+      FLOAT_TYPE, DOUBLE_TYPE, DECIMAL_TYPE,
+      DATE_TYPE, TIMESTAMP_TYPE,
+      ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE)
+  }
+}
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index 6676223af4fce..965f28ebe0840 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -37,6 +37,7 @@ import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 import org.apache.log4j.Level
 import org.apache.thrift.transport.TSocket
+import org.slf4j.LoggerFactory
 import sun.misc.{Signal, SignalHandler}
 
 import org.apache.spark.SparkConf
@@ -307,7 +308,9 @@ private[hive] object SparkSQLCLIDriver extends Logging {
 private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
   private val sessionState = SessionState.get().asInstanceOf[CliSessionState]
 
-  private val console = ThriftserverShimUtils.getConsole
+  private val LOG = LoggerFactory.getLogger(classOf[SparkSQLCLIDriver])
+
+  private val console = new SessionState.LogHelper(LOG)
 
   private val isRemoteMode = {
     SparkSQLCLIDriver.isRemoteMode(sessionState)
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
index 806b6146b2db1..e4559e69e7585 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
@@ -24,6 +24,7 @@ import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 import org.apache.hive.service.cli.SessionHandle
 import org.apache.hive.service.cli.session.SessionManager
+import org.apache.hive.service.rpc.thrift.TProtocolVersion
 import org.apache.hive.service.server.HiveServer2
 
 import org.apache.spark.sql.SQLContext
@@ -45,7 +46,7 @@ private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext:
   }
 
   override def openSession(
-      protocol: ThriftserverShimUtils.TProtocolVersion,
+      protocol: TProtocolVersion,
       username: String,
       passwd: String,
       ipAddress: String,
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala
deleted file mode 100644
index c8ac5226b296e..0000000000000
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.hive.thriftserver
-
-import org.apache.hadoop.hive.ql.session.SessionState
-import org.apache.hadoop.hive.serde2.thrift.Type
-import org.apache.hadoop.hive.serde2.thrift.Type._
-import org.apache.hive.service.cli.{RowSet, RowSetFactory, TableSchema}
-import org.apache.hive.service.rpc.thrift.TProtocolVersion._
-import org.slf4j.LoggerFactory
-
-/**
- * Various utilities for hive-thriftserver used to upgrade the built-in Hive.
- */
-private[thriftserver] object ThriftserverShimUtils {
-
-  private[thriftserver] object TOperationType {
-    val GET_TYPE_INFO = org.apache.hive.service.rpc.thrift.TOperationType.GET_TYPE_INFO
-  }
-
-  private[thriftserver] type TProtocolVersion = org.apache.hive.service.rpc.thrift.TProtocolVersion
-  private[thriftserver] type Client = org.apache.hive.service.rpc.thrift.TCLIService.Client
-  private[thriftserver] type TOpenSessionReq = org.apache.hive.service.rpc.thrift.TOpenSessionReq
-  private[thriftserver] type TGetSchemasReq = org.apache.hive.service.rpc.thrift.TGetSchemasReq
-  private[thriftserver] type TGetTablesReq = org.apache.hive.service.rpc.thrift.TGetTablesReq
-  private[thriftserver] type TGetColumnsReq = org.apache.hive.service.rpc.thrift.TGetColumnsReq
-  private[thriftserver] type TGetInfoReq = org.apache.hive.service.rpc.thrift.TGetInfoReq
-  private[thriftserver] type TExecuteStatementReq =
-    org.apache.hive.service.rpc.thrift.TExecuteStatementReq
-  private[thriftserver] type THandleIdentifier =
-    org.apache.hive.service.rpc.thrift.THandleIdentifier
-  private[thriftserver] type TOperationType = org.apache.hive.service.rpc.thrift.TOperationType
-  private[thriftserver] type TOperationHandle = org.apache.hive.service.rpc.thrift.TOperationHandle
-
-  private[thriftserver] def getConsole: SessionState.LogHelper = {
-    val LOG = LoggerFactory.getLogger(classOf[SparkSQLCLIDriver])
-    new SessionState.LogHelper(LOG)
-  }
-
-  private[thriftserver] def resultRowSet(
-      getResultSetSchema: TableSchema,
-      getProtocolVersion: TProtocolVersion): RowSet = {
-    RowSetFactory.create(getResultSetSchema, getProtocolVersion, false)
-  }
-
-  private[thriftserver] def supportedType(): Seq[Type] = {
-    Seq(NULL_TYPE, BOOLEAN_TYPE, STRING_TYPE, BINARY_TYPE,
-      TINYINT_TYPE, SMALLINT_TYPE, INT_TYPE, BIGINT_TYPE,
-      FLOAT_TYPE, DOUBLE_TYPE, DECIMAL_TYPE,
-      DATE_TYPE, TIMESTAMP_TYPE,
-      ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE)
-  }
-
-  private[thriftserver] val testedProtocolVersions = Seq(
-    HIVE_CLI_SERVICE_PROTOCOL_V1,
-    HIVE_CLI_SERVICE_PROTOCOL_V2,
-    HIVE_CLI_SERVICE_PROTOCOL_V3,
-    HIVE_CLI_SERVICE_PROTOCOL_V4,
-    HIVE_CLI_SERVICE_PROTOCOL_V5,
-    HIVE_CLI_SERVICE_PROTOCOL_V6,
-    HIVE_CLI_SERVICE_PROTOCOL_V7,
-    HIVE_CLI_SERVICE_PROTOCOL_V8,
-    HIVE_CLI_SERVICE_PROTOCOL_V9,
-    HIVE_CLI_SERVICE_PROTOCOL_V10)
-}
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/GetCatalogsOperationMock.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/GetCatalogsOperationMock.scala
index 764f1690d5a66..1bc9aaf672c3b 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/GetCatalogsOperationMock.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/GetCatalogsOperationMock.scala
@@ -22,8 +22,7 @@ import java.util.UUID
 import org.apache.hive.service.cli.OperationHandle
 import org.apache.hive.service.cli.operation.GetCatalogsOperation
 import org.apache.hive.service.cli.session.HiveSession
-
-import org.apache.spark.sql.hive.thriftserver.ThriftserverShimUtils.{THandleIdentifier, TOperationHandle, TOperationType}
+import org.apache.hive.service.rpc.thrift.{THandleIdentifier, TOperationHandle, TOperationType}
 
 class GetCatalogsOperationMock(parentSession: HiveSession)
   extends GetCatalogsOperation(parentSession) {
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveSessionImplSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveSessionImplSuite.scala
index 47db7e34a5a2c..13dc74b92d4b3 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveSessionImplSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveSessionImplSuite.scala
@@ -25,6 +25,7 @@ import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hive.service.cli.OperationHandle
 import org.apache.hive.service.cli.operation.{GetCatalogsOperation, Operation, OperationManager}
 import org.apache.hive.service.cli.session.{HiveSession, HiveSessionImpl, SessionManager}
+import org.apache.hive.service.rpc.thrift.TProtocolVersion
 
 import org.apache.spark.SparkFunSuite
 
@@ -39,7 +40,7 @@ class HiveSessionImplSuite extends SparkFunSuite {
     operationManager = new OperationManagerMock()
 
     session = new HiveSessionImpl(
-      ThriftserverShimUtils.testedProtocolVersions.head,
+      TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V1,
       "",
       "",
       new HiveConf(),
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
index 27d4c4bc40bec..75c00000dee47 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
@@ -37,6 +37,7 @@ import org.apache.hive.jdbc.HiveDriver
 import org.apache.hive.service.auth.PlainSaslHelper
 import org.apache.hive.service.cli.{FetchOrientation, FetchType, GetInfoType, RowSet}
 import org.apache.hive.service.cli.thrift.ThriftCLIServiceClient
+import org.apache.hive.service.rpc.thrift.TCLIService.Client
 import org.apache.thrift.protocol.TBinaryProtocol
 import org.apache.thrift.transport.TSocket
 import org.scalatest.BeforeAndAfterAll
@@ -67,7 +68,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
     val user = System.getProperty("user.name")
     val transport = PlainSaslHelper.getPlainTransport(user, "anonymous", rawTransport)
     val protocol = new TBinaryProtocol(transport)
-    val client = new ThriftCLIServiceClient(new ThriftserverShimUtils.Client(protocol))
+    val client = new ThriftCLIServiceClient(new Client(protocol))
 
     transport.open()
     try f(client) finally transport.close()
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SharedThriftServer.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SharedThriftServer.scala
index 5f17607585521..8f61268c838fe 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SharedThriftServer.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SharedThriftServer.scala
@@ -31,6 +31,7 @@ import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.hive.jdbc.HttpBasicAuthInterceptor
 import org.apache.hive.service.auth.PlainSaslHelper
 import org.apache.hive.service.cli.thrift.{ThriftCLIService, ThriftCLIServiceClient}
+import org.apache.hive.service.rpc.thrift.TCLIService.Client
 import org.apache.http.impl.client.HttpClientBuilder
 import org.apache.thrift.protocol.TBinaryProtocol
 import org.apache.thrift.transport.{THttpClient, TSocket}
@@ -115,7 +116,7 @@ trait SharedThriftServer extends SharedSparkSession {
     }
 
     val protocol = new TBinaryProtocol(transport)
-    val client = new ThriftCLIServiceClient(new ThriftserverShimUtils.Client(protocol))
+    val client = new ThriftCLIServiceClient(new Client(protocol))
 
     transport.open()
     try f(client) finally transport.close()
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperationSuite.scala
index 4c2f29e0bf394..ca1f9a2f74244 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperationSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperationSuite.scala
@@ -25,6 +25,7 @@ import scala.concurrent.duration._
 import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hive.service.cli.OperationState
 import org.apache.hive.service.cli.session.{HiveSession, HiveSessionImpl}
+import org.apache.hive.service.rpc.thrift.TProtocolVersion
 import org.mockito.Mockito.{doReturn, mock, spy, when, RETURNS_DEEP_STUBS}
 import org.mockito.invocation.InvocationOnMock
 
@@ -64,7 +65,7 @@ class SparkExecuteStatementOperationSuite extends SparkFunSuite with SharedSpark
   ).foreach { case (finalState, transition) =>
     test("SPARK-32057 SparkExecuteStatementOperation should not transiently become ERROR " +
       s"before being set to $finalState") {
-      val hiveSession = new HiveSessionImpl(ThriftserverShimUtils.testedProtocolVersions.head,
+      val hiveSession = new HiveSessionImpl(TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V1,
       "username", "password", new HiveConf, "ip address")
       hiveSession.open(new util.HashMap)
 
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
index 818f387f131d6..b94d819326d16 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
@@ -255,7 +255,7 @@ class SparkMetadataOperationSuite extends HiveThriftJdbcTest {
 
     withJdbcStatement() { statement =>
       val metaData = statement.getConnection.getMetaData
-      checkResult(metaData.getTypeInfo, ThriftserverShimUtils.supportedType().map(_.getName))
+      checkResult(metaData.getTypeInfo, SparkGetTypeInfoUtil.supportedType.map(_.getName))
     }
   }
 
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala
index d5582077d6170..52cf429441d16 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala
@@ -23,11 +23,12 @@ import java.util.{List => JList, Properties}
 import org.apache.hive.jdbc.{HiveConnection, HiveQueryResultSet}
 import org.apache.hive.service.auth.PlainSaslHelper
 import org.apache.hive.service.cli.GetInfoType
+import org.apache.hive.service.rpc.thrift.{TExecuteStatementReq, TGetInfoReq, TGetTablesReq, TOpenSessionReq, TProtocolVersion}
+import org.apache.hive.service.rpc.thrift.TCLIService.Client
 import org.apache.thrift.protocol.TBinaryProtocol
 import org.apache.thrift.transport.TSocket
 
 import org.apache.spark.sql.catalyst.util.NumberConverter
-import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.unsafe.types.UTF8String
 
 class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest {
@@ -35,20 +36,20 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest {
   override def mode: ServerMode.Value = ServerMode.binary
 
   def testExecuteStatementWithProtocolVersion(
-      version: ThriftserverShimUtils.TProtocolVersion,
+      version: TProtocolVersion,
       sql: String)(f: HiveQueryResultSet => Unit): Unit = {
     val rawTransport = new TSocket("localhost", serverPort)
     val connection = new HiveConnection(s"jdbc:hive2://localhost:$serverPort", new Properties)
     val user = System.getProperty("user.name")
     val transport = PlainSaslHelper.getPlainTransport(user, "anonymous", rawTransport)
-    val client = new ThriftserverShimUtils.Client(new TBinaryProtocol(transport))
+    val client = new Client(new TBinaryProtocol(transport))
     transport.open()
     var rs: HiveQueryResultSet = null
     try {
-      val clientProtocol = new ThriftserverShimUtils.TOpenSessionReq(version)
+      val clientProtocol = new TOpenSessionReq(version)
       val openResp = client.OpenSession(clientProtocol)
       val sessHandle = openResp.getSessionHandle
-      val execReq = new ThriftserverShimUtils.TExecuteStatementReq(sessHandle, sql)
+      val execReq = new TExecuteStatementReq(sessHandle, sql)
       val execResp = client.ExecuteStatement(execReq)
       val stmtHandle = execResp.getOperationHandle
 
@@ -73,23 +74,21 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest {
     }
   }
 
-  def testGetInfoWithProtocolVersion(version: ThriftserverShimUtils.TProtocolVersion): Unit = {
+  def testGetInfoWithProtocolVersion(version: TProtocolVersion): Unit = {
     val rawTransport = new TSocket("localhost", serverPort)
     val connection = new HiveConnection(s"jdbc:hive2://localhost:$serverPort", new Properties)
     val transport = PlainSaslHelper.getPlainTransport(user, "anonymous", rawTransport)
-    val client = new ThriftserverShimUtils.Client(new TBinaryProtocol(transport))
+    val client = new Client(new TBinaryProtocol(transport))
     transport.open()
     try {
-      val clientProtocol = new ThriftserverShimUtils.TOpenSessionReq(version)
+      val clientProtocol = new TOpenSessionReq(version)
       val openResp = client.OpenSession(clientProtocol)
       val sessHandle = openResp.getSessionHandle
 
-      val dbVersionReq =
-        new ThriftserverShimUtils.TGetInfoReq(sessHandle, GetInfoType.CLI_DBMS_VER.toTGetInfoType)
+      val dbVersionReq = new TGetInfoReq(sessHandle, GetInfoType.CLI_DBMS_VER.toTGetInfoType)
       val dbVersion = client.GetInfo(dbVersionReq).getInfoValue.getStringValue
 
-      val dbNameReq =
-        new ThriftserverShimUtils.TGetInfoReq(sessHandle, GetInfoType.CLI_DBMS_NAME.toTGetInfoType)
+      val dbNameReq = new TGetInfoReq(sessHandle, GetInfoType.CLI_DBMS_NAME.toTGetInfoType)
       val dbName = client.GetInfo(dbNameReq).getInfoValue.getStringValue
 
       assert(dbVersion === org.apache.spark.SPARK_VERSION)
@@ -102,21 +101,21 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest {
   }
 
   def testGetTablesWithProtocolVersion(
-      version: ThriftserverShimUtils.TProtocolVersion,
+      version: TProtocolVersion,
       schema: String,
       tableNamePattern: String,
       tableTypes: JList[String])(f: HiveQueryResultSet => Unit): Unit = {
     val rawTransport = new TSocket("localhost", serverPort)
     val connection = new HiveConnection(s"jdbc:hive2://localhost:$serverPort", new Properties)
     val transport = PlainSaslHelper.getPlainTransport(user, "anonymous", rawTransport)
-    val client = new ThriftserverShimUtils.Client(new TBinaryProtocol(transport))
+    val client = new Client(new TBinaryProtocol(transport))
     transport.open()
     var rs: HiveQueryResultSet = null
     try {
-      val clientProtocol = new ThriftserverShimUtils.TOpenSessionReq(version)
+      val clientProtocol = new TOpenSessionReq(version)
       val openResp = client.OpenSession(clientProtocol)
       val sessHandle = openResp.getSessionHandle
-      val getTableReq = new ThriftserverShimUtils.TGetTablesReq(sessHandle)
+      val getTableReq = new TGetTablesReq(sessHandle)
       getTableReq.setSchemaName(schema)
       getTableReq.setTableName(tableNamePattern)
       getTableReq.setTableTypes(tableTypes)
@@ -144,7 +143,7 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest {
     }
   }
 
-  ThriftserverShimUtils.testedProtocolVersions.foreach { version =>
+  TProtocolVersion.values().foreach { version =>
     test(s"$version get byte type") {
       testExecuteStatementWithProtocolVersion(version, "SELECT cast(1 as byte)") { rs =>
         assert(rs.next())
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
index 1f8ce04270a04..3a53a2a8dadd8 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
@@ -17,18 +17,16 @@
 
 package org.apache.spark.sql.hive
 
-import java.io.{InputStream, OutputStream}
-import java.lang.reflect.Method
 import java.rmi.server.UID
 
 import scala.collection.JavaConverters._
 import scala.language.implicitConversions
-import scala.reflect.ClassTag
 
 import com.google.common.base.Objects
 import org.apache.avro.Schema
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
+import org.apache.hadoop.hive.ql.exec.SerializationUtilities
 import org.apache.hadoop.hive.ql.exec.UDF
 import org.apache.hadoop.hive.ql.plan.{FileSinkDesc, TableDesc}
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFMacro
@@ -148,40 +146,12 @@ private[hive] object HiveShim {
       case _ => false
     }
 
-    private lazy val serUtilClass =
-      Utils.classForName("org.apache.hadoop.hive.ql.exec.SerializationUtilities")
-    private lazy val utilClass = Utils.classForName("org.apache.hadoop.hive.ql.exec.Utilities")
-    private val deserializeMethodName = "deserializeObjectByKryo"
-    private val serializeMethodName = "serializeObjectByKryo"
-
-    private def findMethod(klass: Class[_], name: String, args: Class[_]*): Method = {
-      val method = klass.getDeclaredMethod(name, args: _*)
-      method.setAccessible(true)
-      method
-    }
-
     def deserializePlan[UDFType](is: java.io.InputStream, clazz: Class[_]): UDFType = {
-      val borrowKryo = serUtilClass.getMethod("borrowKryo")
-      val kryo = borrowKryo.invoke(serUtilClass)
-      val deserializeObjectByKryo = findMethod(serUtilClass, deserializeMethodName,
-        kryo.getClass.getSuperclass, classOf[InputStream], classOf[Class[_]])
-      try {
-        deserializeObjectByKryo.invoke(null, kryo, is, clazz).asInstanceOf[UDFType]
-      } finally {
-        serUtilClass.getMethod("releaseKryo", kryo.getClass.getSuperclass).invoke(null, kryo)
-      }
+      SerializationUtilities.deserializePlan(is, clazz).asInstanceOf[UDFType]
     }
 
     def serializePlan(function: AnyRef, out: java.io.OutputStream): Unit = {
-      val borrowKryo = serUtilClass.getMethod("borrowKryo")
-      val kryo = borrowKryo.invoke(serUtilClass)
-      val serializeObjectByKryo = findMethod(serUtilClass, serializeMethodName,
-        kryo.getClass.getSuperclass, classOf[Object], classOf[OutputStream])
-      try {
-        serializeObjectByKryo.invoke(null, kryo, function, out)
-      } finally {
-        serUtilClass.getMethod("releaseKryo", kryo.getClass.getSuperclass).invoke(null, kryo)
-      }
+      SerializationUtilities.serializePlan(function, out)
     }
 
     def writeExternal(out: java.io.ObjectOutput): Unit = {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index 7d4bf7305546c..96c207913d49a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -55,10 +55,8 @@ private[spark] object HiveUtils extends Logging {
     sc
   }
 
-  private val hiveVersion = HiveVersionInfo.getVersion
-
   /** The version of hive used internally by Spark SQL. */
-  val builtinHiveVersion: String = hiveVersion
+  val builtinHiveVersion: String = HiveVersionInfo.getVersion
 
   val HIVE_METASTORE_VERSION = buildStaticConf("spark.sql.hive.metastore.version")
     .doc("Version of the Hive metastore. Available options are " +
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
index 7fccb72fb913b..c7002853bed54 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.hive
 
-import java.lang.{Boolean => JBoolean}
 import java.nio.ByteBuffer
 
 import scala.collection.JavaConverters._
@@ -39,7 +38,6 @@ import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
 import org.apache.spark.sql.hive.HiveShim._
 import org.apache.spark.sql.types._
-import org.apache.spark.util.Utils
 
 /**
  * Here we cannot extends `ImplicitTypeCasts` to compatible with UDF input data type, the reason is:
@@ -349,11 +347,7 @@ private[hive] case class HiveUDAFFunction(
       funcWrapper.createFunction[AbstractGenericUDAFResolver]()
     }
 
-    val clazz = Utils.classForName(classOf[SimpleGenericUDAFParameterInfo].getName)
-    val ctor = clazz.getDeclaredConstructor(
-      classOf[Array[ObjectInspector]], JBoolean.TYPE, JBoolean.TYPE, JBoolean.TYPE)
-    val args = Array[AnyRef](inputInspectors, JBoolean.FALSE, JBoolean.FALSE, JBoolean.FALSE)
-    val parameterInfo = ctor.newInstance(args: _*).asInstanceOf[SimpleGenericUDAFParameterInfo]
+    val parameterInfo = new SimpleGenericUDAFParameterInfo(inputInspectors, false, false, false)
     resolver.getEvaluator(parameterInfo)
   }
 

From 83f8e13956d5602ff4d37b742da427aa07537c1f Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Mon, 12 Oct 2020 10:29:28 +0900
Subject: [PATCH 0212/1009] [SPARK-33086][FOLLOW-UP] Remove unused Optional
 import from pyspark.resource.profile stub

### What changes were proposed in this pull request?

Remove unused `typing.Optional` import from `pyspark.resource.profile` stub.

### Why are the changes needed?

Since SPARK-32319 we don't allow unused imports.  However, this one slipped both local and CI tests for some reason.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing tests and mypy check.

Closes #30002 from zero323/SPARK-33086-FOLLOWUP.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/resource/profile.pyi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/resource/profile.pyi b/python/pyspark/resource/profile.pyi
index d6ea340bb510f..6763baf6590a3 100644
--- a/python/pyspark/resource/profile.pyi
+++ b/python/pyspark/resource/profile.pyi
@@ -22,7 +22,7 @@ from pyspark.resource.requests import (  # noqa: F401
     TaskResourceRequest as TaskResourceRequest,
     TaskResourceRequests as TaskResourceRequests,
 )
-from typing import overload, Dict, Optional, Union
+from typing import overload, Dict, Union
 from py4j.java_gateway import JavaObject  # type: ignore[import]
 
 class ResourceProfile:

From c78971b1c7214357a275481a5af468259bcf406f Mon Sep 17 00:00:00 2001
From: Denis Pyshev <git@gemelen.net>
Date: Sun, 11 Oct 2020 18:51:00 -0700
Subject: [PATCH 0213/1009] [SPARK-33106][BUILD] Fix resolvers clash in SBT

### What changes were proposed in this pull request?

Rename manually added resolver for local Ivy repo.
Create configuration to publish to local Ivy repo similar to Maven one.
Use `publishLocal` to publish both to local Maven and Ivy repos instead
of custom task `localPublish` (renamed from `publish-local` of sbt 0.13.x).

### Why are the changes needed?

There are two resolvers (bootResolvers's "local" and manually added "local") that point to the same local Ivy repo, but have different configurations, which led to excessive warnings in logs and, potentially, resolution issues.
Changeset fixes that case, observable in sbt output as
```
[warn] Multiple resolvers having different access mechanism configured with same name 'local'. To avoid conflict, Remove duplicate project resolvers (`resolvers`) or rename publishing resolve
r (`publishTo`).
```

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Executed `build/sbt`'s `publishLocal` task on individual module and on root project.

Closes #30006 from gemelen/feature/local-resolvers.

Authored-by: Denis Pyshev <git@gemelen.net>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 project/SparkBuild.scala | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 6929342d2f539..47369722ba9b2 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -108,7 +108,7 @@ object SparkBuild extends PomBuild {
   override val userPropertiesMap = System.getProperties.asScala.toMap
 
   lazy val MavenCompile = config("m2r") extend(Compile)
-  lazy val publishLocalBoth = TaskKey[Unit]("localPublish", "publish local for m2 and ivy", KeyRanks.ATask)
+  lazy val SbtCompile = config("sbt") extend(Compile)
 
   lazy val sparkGenjavadocSettings: Seq[sbt.Def.Setting[_]] = GenJavadocPlugin.projectSettings ++ Seq(
     scalacOptions ++= Seq(
@@ -213,7 +213,7 @@ object SparkBuild extends PomBuild {
       "gcs-maven-central-mirror" at "https://maven-central.storage-download.googleapis.com/maven2/",
       DefaultMavenRepository,
       Resolver.mavenLocal,
-      Resolver.file("local", file(Path.userHome.absolutePath + "/.ivy2/local"))(Resolver.ivyStylePatterns)
+      Resolver.file("ivyLocal", file(Path.userHome.absolutePath + "/.ivy2/local"))(Resolver.ivyStylePatterns)
     ),
     externalResolvers := resolvers.value,
     otherResolvers := SbtPomKeys.mvnLocalRepository(dotM2 => Seq(Resolver.file("dotM2", dotM2))).value,
@@ -221,9 +221,15 @@ object SparkBuild extends PomBuild {
         .withResolverName("dotM2")
         .withArtifacts(packagedArtifacts.value.toVector)
         .withLogging(ivyLoggingLevel.value),
+    publishLocalConfiguration in SbtCompile := PublishConfiguration()
+        .withResolverName("ivyLocal")
+        .withArtifacts(packagedArtifacts.value.toVector)
+        .withLogging(ivyLoggingLevel.value),
     publishMavenStyle in MavenCompile := true,
+    publishMavenStyle in SbtCompile := false,
     publishLocal in MavenCompile := publishTask(publishLocalConfiguration in MavenCompile).value,
-    publishLocalBoth := Seq(publishLocal in MavenCompile, publishLocal).dependOn.value,
+    publishLocal in SbtCompile := publishTask(publishLocalConfiguration in SbtCompile).value,
+    publishLocal := Seq(publishLocal in MavenCompile, publishLocal in SbtCompile).dependOn.value,
 
     javacOptions in (Compile, doc) ++= {
       val versionParts = System.getProperty("java.version").split("[+.\\-]+", 3)

From 50b2a497f37c7a51b34dee1c0cb80910687ad4a2 Mon Sep 17 00:00:00 2001
From: William Hyun <williamhyun3@gmail.com>
Date: Sun, 11 Oct 2020 20:05:40 -0700
Subject: [PATCH 0214/1009] [SPARK-21708][BUILD][FOLLOWUP] Rename hdpVersion to
 hadoopVersionValue

This PR aims to rename hdpVersion to versionValue.

Use the general variable name.

No.

Pass the CI.

Closes #30008 from williamhyun/sbt.

Authored-by: William Hyun <williamhyun3@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 project/SparkBuild.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 47369722ba9b2..f20a84451c5c5 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -756,12 +756,12 @@ object Assembly {
         .getOrElse(SbtPomKeys.effectivePom.value.getProperties.get("hadoop.version").asInstanceOf[String])
     },
     assemblyJarName in assembly := {
-      lazy val hdpVersion = hadoopVersion.value
+      lazy val hadoopVersionValue = hadoopVersion.value
       if (moduleName.value.contains("streaming-kafka-0-10-assembly")
         || moduleName.value.contains("streaming-kinesis-asl-assembly")) {
         s"${moduleName.value}-${version.value}.jar"
       } else {
-        s"${moduleName.value}-${version.value}-hadoop${hdpVersion}.jar"
+        s"${moduleName.value}-${version.value}-hadoop${hadoopVersionValue}.jar"
       }
     },
     assemblyJarName in (Test, assembly) := s"${moduleName.value}-test-${version.value}.jar",

From 4af1ac93846a0dfdcc57ec7604ed51d7787bd6fd Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Mon, 12 Oct 2020 12:24:54 +0900
Subject: [PATCH 0215/1009] [SPARK-32047][SQL] Add JDBC connection provider
 disable possibility

### What changes were proposed in this pull request?
At the moment there is no possibility to turn off JDBC authentication providers which exists on the classpath. This can be problematic because service providers are loaded with service loader. In this PR I've added `spark.sql.sources.disabledJdbcConnProviderList` configuration possibility (default: empty).

### Why are the changes needed?
No possibility to turn off JDBC authentication providers.

### Does this PR introduce _any_ user-facing change?
Yes, it introduces new configuration option.

### How was this patch tested?
* Existing + newly added unit tests.
* Existing integration tests.

Closes #29964 from gaborgsomogyi/SPARK-32047.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../org/apache/spark/sql/internal/SQLConf.scala   | 11 +++++++++++
 .../jdbc/connection/BasicConnectionProvider.scala |  2 ++
 .../jdbc/connection/ConnectionProvider.scala      |  7 +++++--
 .../jdbc/connection/DB2ConnectionProvider.scala   |  2 ++
 .../jdbc/connection/MSSQLConnectionProvider.scala |  2 ++
 .../connection/MariaDBConnectionProvider.scala    |  2 ++
 .../connection/OracleConnectionProvider.scala     |  2 ++
 .../connection/PostgresConnectionProvider.scala   |  2 ++
 .../spark/sql/jdbc/JdbcConnectionProvider.scala   |  6 ++++++
 .../jdbc/connection/ConnectionProviderSuite.scala | 15 +++++++++++++--
 .../connection/ConnectionProviderSuiteBase.scala  |  2 +-
 .../IntentionallyFaultyConnectionProvider.scala   |  1 +
 12 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 18ffc655b2174..8cbdbfe16d2bc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2775,6 +2775,15 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val DISABLED_JDBC_CONN_PROVIDER_LIST =
+    buildConf("spark.sql.sources.disabledJdbcConnProviderList")
+    .internal()
+    .doc("Configures a list of JDBC connection providers, which are disabled. " +
+      "The list contains the name of the JDBC connection providers separated by comma.")
+    .version("3.1.0")
+    .stringConf
+    .createWithDefault("")
+
   /**
    * Holds information about keys that have been deprecated.
    *
@@ -3399,6 +3408,8 @@ class SQLConf extends Serializable with Logging {
 
   def truncateTrashEnabled: Boolean = getConf(SQLConf.TRUNCATE_TRASH_ENABLED)
 
+  def disabledJdbcConnectionProviders: String = getConf(SQLConf.DISABLED_JDBC_CONN_PROVIDER_LIST)
+
   /** ********************** SQLConf functionality methods ************ */
 
   /** Set Spark SQL configuration properties. */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala
index a5f04649e6628..1c0513f982a1e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala
@@ -30,6 +30,8 @@ private[jdbc] class BasicConnectionProvider extends JdbcConnectionProvider with
    */
   def getAdditionalProperties(options: JDBCOptions): Properties = new Properties()
 
+  override val name: String = "basic"
+
   override def canHandle(driver: Driver, options: Map[String, String]): Boolean = {
     val jdbcOptions = new JDBCOptions(options)
     jdbcOptions.keytab == null || jdbcOptions.principal == null
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
index 649a0bda4236c..e81add4df960a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala
@@ -24,6 +24,7 @@ import scala.collection.mutable
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.security.SecurityConfigurationLock
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.jdbc.JdbcConnectionProvider
 import org.apache.spark.util.Utils
 
@@ -47,8 +48,10 @@ private[jdbc] object ConnectionProvider extends Logging {
           logInfo("Loading of the provider failed with the exception:", t)
       }
     }
-    // Seems duplicate but it's needed for Scala 2.13
-    providers.toSeq
+
+    val disabledProviders = Utils.stringToSeq(SQLConf.get.disabledJdbcConnectionProviders)
+    // toSeq seems duplicate but it's needed for Scala 2.13
+    providers.filterNot(p => disabledProviders.contains(p.name)).toSeq
   }
 
   def create(driver: Driver, options: Map[String, String]): Connection = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/DB2ConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/DB2ConnectionProvider.scala
index ca82cdc561bef..775c3ae4a533a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/DB2ConnectionProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/DB2ConnectionProvider.scala
@@ -28,6 +28,8 @@ import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
 private[sql] class DB2ConnectionProvider extends SecureConnectionProvider {
   override val driverClass = "com.ibm.db2.jcc.DB2Driver"
 
+  override val name: String = "db2"
+
   override def appEntry(driver: Driver, options: JDBCOptions): String = "JaasClient"
 
   override def getConnection(driver: Driver, options: Map[String, String]): Connection = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MSSQLConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MSSQLConnectionProvider.scala
index 4e405b2187e56..e3d3e1a43d510 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MSSQLConnectionProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MSSQLConnectionProvider.scala
@@ -29,6 +29,8 @@ private[sql] class MSSQLConnectionProvider extends SecureConnectionProvider {
   override val driverClass = "com.microsoft.sqlserver.jdbc.SQLServerDriver"
   val parserMethod: String = "parseAndMergeProperties"
 
+  override val name: String = "mssql"
+
   override def appEntry(driver: Driver, options: JDBCOptions): String = {
     val configName = "jaasConfigurationName"
     val appEntryDefault = "SQLJDBCDriver"
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MariaDBConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MariaDBConnectionProvider.scala
index d5fe13bf0ca19..29a08d0b5f269 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MariaDBConnectionProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/MariaDBConnectionProvider.scala
@@ -24,6 +24,8 @@ import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
 private[jdbc] class MariaDBConnectionProvider extends SecureConnectionProvider {
   override val driverClass = "org.mariadb.jdbc.Driver"
 
+  override val name: String = "mariadb"
+
   override def appEntry(driver: Driver, options: JDBCOptions): String =
     "Krb5ConnectorContext"
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/OracleConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/OracleConnectionProvider.scala
index 3defda3871765..0d43851bb255e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/OracleConnectionProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/OracleConnectionProvider.scala
@@ -28,6 +28,8 @@ import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
 private[sql] class OracleConnectionProvider extends SecureConnectionProvider {
   override val driverClass = "oracle.jdbc.OracleDriver"
 
+  override val name: String = "oracle"
+
   override def appEntry(driver: Driver, options: JDBCOptions): String = "kprb5module"
 
   override def getConnection(driver: Driver, options: Map[String, String]): Connection = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/PostgresConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/PostgresConnectionProvider.scala
index dae8aea81f20a..f26a11e34dc38 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/PostgresConnectionProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/PostgresConnectionProvider.scala
@@ -25,6 +25,8 @@ import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
 private[jdbc] class PostgresConnectionProvider extends SecureConnectionProvider {
   override val driverClass = "org.postgresql.Driver"
 
+  override val name: String = "postgres"
+
   override def appEntry(driver: Driver, options: JDBCOptions): String = {
     val parseURL = driver.getClass.getMethod("parseURL", classOf[String], classOf[Properties])
     val properties = parseURL.invoke(driver, options.url, null).asInstanceOf[Properties]
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcConnectionProvider.scala
index caf574b0c2284..1e8abca197025 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcConnectionProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcConnectionProvider.scala
@@ -34,6 +34,12 @@ import org.apache.spark.annotation.{DeveloperApi, Unstable}
 @DeveloperApi
 @Unstable
 abstract class JdbcConnectionProvider {
+  /**
+   * Name of the service to provide JDBC connections. This name should be unique. Spark will
+   * internally use this name to differentiate JDBC connection providers.
+   */
+  val name: String
+
   /**
    * Checks if this connection provider instance can handle the connection initiated by the driver.
    * There must be exactly one active connection provider which can handle the connection for a
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuite.scala
index a48dbdebea7e9..0e9498b2681e2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuite.scala
@@ -19,8 +19,11 @@ package org.apache.spark.sql.execution.datasources.jdbc.connection
 
 import javax.security.auth.login.Configuration
 
-class ConnectionProviderSuite extends ConnectionProviderSuiteBase {
-  test("All built-in provides must be loaded") {
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+class ConnectionProviderSuite extends ConnectionProviderSuiteBase with SharedSparkSession {
+  test("All built-in providers must be loaded") {
     IntentionallyFaultyConnectionProvider.constructed = false
     val providers = ConnectionProvider.loadProviders()
     assert(providers.exists(_.isInstanceOf[BasicConnectionProvider]))
@@ -34,6 +37,14 @@ class ConnectionProviderSuite extends ConnectionProviderSuiteBase {
     assert(providers.size === 6)
   }
 
+  test("Disabled provider must not be loaded") {
+    withSQLConf(SQLConf.DISABLED_JDBC_CONN_PROVIDER_LIST.key -> "db2") {
+      val providers = ConnectionProvider.loadProviders()
+      assert(!providers.exists(_.isInstanceOf[DB2ConnectionProvider]))
+      assert(providers.size === 5)
+    }
+  }
+
   test("Multiple security configs must be reachable") {
     Configuration.setConfiguration(null)
     val postgresProvider = new PostgresConnectionProvider()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuiteBase.scala
index be08a3c2f7367..a299841b3c149 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProviderSuiteBase.scala
@@ -42,7 +42,7 @@ abstract class ConnectionProviderSuiteBase extends SparkFunSuite with BeforeAndA
     JDBCOptions.JDBC_PRINCIPAL -> "principal"
   ))
 
-  override def afterEach(): Unit = {
+  protected override def afterEach(): Unit = {
     try {
       Configuration.setConfiguration(null)
     } finally {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/IntentionallyFaultyConnectionProvider.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/IntentionallyFaultyConnectionProvider.scala
index fbefcb91cccde..329d79cae62e8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/IntentionallyFaultyConnectionProvider.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/IntentionallyFaultyConnectionProvider.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.jdbc.JdbcConnectionProvider
 private class IntentionallyFaultyConnectionProvider extends JdbcConnectionProvider {
   IntentionallyFaultyConnectionProvider.constructed = true
   throw new IllegalArgumentException("Intentional Exception")
+  override val name: String = "IntentionallyFaultyConnectionProvider"
   override def canHandle(driver: Driver, options: Map[String, String]): Boolean = true
   override def getConnection(driver: Driver, options: Map[String, String]): Connection = null
 }

From 543d59dfbffadeb4e11f06d6bbf857f21ac03f73 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Sun, 11 Oct 2020 21:54:56 -0700
Subject: [PATCH 0216/1009] [SPARK-33107][BUILD][FOLLOW-UP] Remove
 com.twitter:parquet-hadoop-bundle:1.6.0 and orc.classifier

### What changes were proposed in this pull request?

This pr removes `com.twitter:parquet-hadoop-bundle:1.6.0` and `orc.classifier`.

### Why are the changes needed?

To make code more clear and readable.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing test.

Closes #30005 from wangyum/SPARK-33107.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 examples/pom.xml |  5 -----
 pom.xml          | 12 ------------
 sql/core/pom.xml |  2 --
 sql/hive/pom.xml |  5 -----
 4 files changed, 24 deletions(-)

diff --git a/examples/pom.xml b/examples/pom.xml
index c5bf2409964bb..8b632cef6d44d 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -109,11 +109,6 @@
       <artifactId>scopt_${scala.binary.version}</artifactId>
       <version>3.7.1</version>
     </dependency>
-    <dependency>
-      <groupId>${hive.parquet.group}</groupId>
-      <artifactId>parquet-hadoop-bundle</artifactId>
-      <scope>provided</scope>
-    </dependency>
   </dependencies>
 
   <build>
diff --git a/pom.xml b/pom.xml
index b13d5ab81856c..7f678ccf1e4e5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -137,9 +137,6 @@
     <derby.version>10.12.1.1</derby.version>
     <parquet.version>1.10.1</parquet.version>
     <orc.version>1.5.12</orc.version>
-    <orc.classifier></orc.classifier>
-    <hive.parquet.group>com.twitter</hive.parquet.group>
-    <hive.parquet.version>1.6.0</hive.parquet.version>
     <jetty.version>9.4.28.v20200408</jetty.version>
     <javaxservlet.version>3.1.0</javaxservlet.version>
     <chill.version>0.9.5</chill.version>
@@ -236,7 +233,6 @@
     -->
     <hadoop.deps.scope>compile</hadoop.deps.scope>
     <hive.deps.scope>compile</hive.deps.scope>
-    <hive.parquet.scope>provided</hive.parquet.scope>
     <hive.storage.version>2.7.2</hive.storage.version>
     <hive.storage.scope>compile</hive.storage.scope>
     <hive.common.scope>compile</hive.common.scope>
@@ -2169,7 +2165,6 @@
         <groupId>org.apache.orc</groupId>
         <artifactId>orc-core</artifactId>
         <version>${orc.version}</version>
-        <classifier>${orc.classifier}</classifier>
         <scope>${orc.deps.scope}</scope>
         <exclusions>
           <exclusion>
@@ -2194,7 +2189,6 @@
         <groupId>org.apache.orc</groupId>
         <artifactId>orc-mapreduce</artifactId>
         <version>${orc.version}</version>
-        <classifier>${orc.classifier}</classifier>
         <scope>${orc.deps.scope}</scope>
         <exclusions>
           <exclusion>
@@ -2243,12 +2237,6 @@
         <version>${parquet.version}</version>
         <scope>${parquet.test.deps.scope}</scope>
       </dependency>
-      <dependency>
-        <groupId>${hive.parquet.group}</groupId>
-        <artifactId>parquet-hadoop-bundle</artifactId>
-        <version>${hive.parquet.version}</version>
-        <scope>${hive.parquet.scope}</scope>
-      </dependency>
       <dependency>
         <groupId>org.codehaus.janino</groupId>
         <artifactId>janino</artifactId>
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 0f5d3fd55c15d..3f088e420a9a3 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -88,12 +88,10 @@
     <dependency>
       <groupId>org.apache.orc</groupId>
       <artifactId>orc-core</artifactId>
-      <classifier>${orc.classifier}</classifier>
     </dependency>
     <dependency>
       <groupId>org.apache.orc</groupId>
       <artifactId>orc-mapreduce</artifactId>
-      <classifier>${orc.classifier}</classifier>
     </dependency>
     <dependency>
       <groupId>org.apache.hive</groupId>
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 474c6066ed040..0453094cf8b7b 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -35,11 +35,6 @@
   </properties>
 
   <dependencies>
-    <!-- Added for Hive Parquet SerDe -->
-    <dependency>
-      <groupId>${hive.parquet.group}</groupId>
-      <artifactId>parquet-hadoop-bundle</artifactId>
-    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>

From 9896288b881788660cfaa3f45e90496105889bde Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Mon, 12 Oct 2020 00:27:53 -0700
Subject: [PATCH 0217/1009] [SPARK-33117][BUILD] Update zstd-jni to 1.4.5-6

### What changes were proposed in this pull request?

This PR aims to upgrade ZStandard library for Apache Spark 3.1.0.

### Why are the changes needed?

This will bring the latest bug fixes.
- https://github.com/luben/zstd-jni/commit/2662fbdc320ce482a24c20b8fcac8b1d5b79fe33
- https://github.com/luben/zstd-jni/commit/bbe140b758be2e0ba64566e16d44cafd6e4ba142

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CI.

Closes #30010 from dongjoon-hyun/SPARK-33117.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 2 +-
 dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 2 +-
 pom.xml                                 | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index 979bb1419ce7b..f049ad1f5bb74 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -241,4 +241,4 @@ xmlenc/0.52//xmlenc-0.52.jar
 xz/1.5//xz-1.5.jar
 zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar
 zookeeper/3.4.14//zookeeper-3.4.14.jar
-zstd-jni/1.4.5-4//zstd-jni-1.4.5-4.jar
+zstd-jni/1.4.5-6//zstd-jni-1.4.5-6.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index ebaff6d1977c9..a4dbeb112473a 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -255,4 +255,4 @@ xbean-asm7-shaded/4.15//xbean-asm7-shaded-4.15.jar
 xz/1.5//xz-1.5.jar
 zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar
 zookeeper/3.4.14//zookeeper-3.4.14.jar
-zstd-jni/1.4.5-4//zstd-jni-1.4.5-4.jar
+zstd-jni/1.4.5-6//zstd-jni-1.4.5-6.jar
diff --git a/pom.xml b/pom.xml
index 7f678ccf1e4e5..75b6776cbe470 100644
--- a/pom.xml
+++ b/pom.xml
@@ -688,7 +688,7 @@
       <dependency>
         <groupId>com.github.luben</groupId>
         <artifactId>zstd-jni</artifactId>
-        <version>1.4.5-4</version>
+        <version>1.4.5-6</version>
       </dependency>
       <dependency>
         <groupId>com.clearspring.analytics</groupId>

From 78c0967bbe27d3872aa73ff9e6fafb095fd149c1 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 12 Oct 2020 16:54:21 +0900
Subject: [PATCH 0218/1009] [SPARK-33092][SQL] Support subexpression
 elimination in ProjectExec

### What changes were proposed in this pull request?

This patch proposes to add subexpression elimination support into `ProjectExec`. It can be controlled by `spark.sql.subexpressionElimination.enabled` config.

Before this change:

```scala
val df = spark.read.option("header", true).csv("/tmp/test.csv")
 df.withColumn("my_map", expr("str_to_map(foo, '&', '=')")).select(col("my_map")("foo"), col("my_map")("bar"), col("my_map")("baz")).debugCodegen
```

L27-40: first `str_to_map`.
L68:81: second `str_to_map`.
L109-122: third `str_to_map`.

```
/* 024 */   private void project_doConsume_0(InternalRow inputadapter_row_0, UTF8String project_expr_0_0, boolean project_exprIsNull_0_0) throws java.io.IOException {
/* 025 */     boolean project_isNull_0 = true;
/* 026 */     UTF8String project_value_0 = null;
/* 027 */     boolean project_isNull_1 = true;
/* 028 */     MapData project_value_1 = null;
/* 029 */
/* 030 */     if (!project_exprIsNull_0_0) {
/* 031 */       project_isNull_1 = false; // resultCode could change nullability.
/* 032 */
/* 033 */       UTF8String[] project_kvs_0 = project_expr_0_0.split(((UTF8String) references[1] /* literal */), -1);
/* 034 */       for(UTF8String kvEntry: project_kvs_0) {
/* 035 */         UTF8String[] kv = kvEntry.split(((UTF8String) references[2] /* literal */), 2);
/* 036 */         ((org.apache.spark.sql.catalyst.util.ArrayBasedMapBuilder) references[0] /* mapBuilder */).put(kv[0], kv.length == 2 ? kv[1] : null);
/* 037 */       }
/* 038 */       project_value_1 = ((org.apache.spark.sql.catalyst.util.ArrayBasedMapBuilder) references[0] /* mapBuilder */).build();
/* 039 */
/* 040 */     }
/* 041 */     if (!project_isNull_1) {
/* 042 */       project_isNull_0 = false; // resultCode could change nullability.
/* 043 */
/* 044 */       final int project_length_0 = project_value_1.numElements();
/* 045 */       final ArrayData project_keys_0 = project_value_1.keyArray();
/* 046 */       final ArrayData project_values_0 = project_value_1.valueArray();
/* 047 */
/* 048 */       int project_index_0 = 0;
/* 049 */       boolean project_found_0 = false;
/* 050 */       while (project_index_0 < project_length_0 && !project_found_0) {
/* 051 */         final UTF8String project_key_0 = project_keys_0.getUTF8String(project_index_0);
/* 052 */         if (project_key_0.equals(((UTF8String) references[3] /* literal */))) {
/* 053 */           project_found_0 = true;
/* 054 */         } else {
/* 055 */           project_index_0++;
/* 056 */         }
/* 057 */       }
/* 058 */
/* 059 */       if (!project_found_0 || project_values_0.isNullAt(project_index_0)) {
/* 060 */         project_isNull_0 = true;
/* 061 */       } else {
/* 062 */         project_value_0 = project_values_0.getUTF8String(project_index_0);
/* 063 */       }
/* 064 */
/* 065 */     }
/* 066 */     boolean project_isNull_6 = true;
/* 067 */     UTF8String project_value_6 = null;
/* 068 */     boolean project_isNull_7 = true;
/* 069 */     MapData project_value_7 = null;
/* 070 */
/* 071 */     if (!project_exprIsNull_0_0) {
/* 072 */       project_isNull_7 = false; // resultCode could change nullability.
/* 073 */
/* 074 */       UTF8String[] project_kvs_1 = project_expr_0_0.split(((UTF8String) references[5] /* literal */), -1);
/* 075 */       for(UTF8String kvEntry: project_kvs_1) {
/* 076 */         UTF8String[] kv = kvEntry.split(((UTF8String) references[6] /* literal */), 2);
/* 077 */         ((org.apache.spark.sql.catalyst.util.ArrayBasedMapBuilder) references[4] /* mapBuilder */).put(kv[0], kv.length == 2 ? kv[1] : null);
/* 078 */       }
/* 079 */       project_value_7 = ((org.apache.spark.sql.catalyst.util.ArrayBasedMapBuilder) references[4] /* mapBuilder */).build();
/* 080 */
/* 081 */     }
/* 082 */     if (!project_isNull_7) {
/* 083 */       project_isNull_6 = false; // resultCode could change nullability.
/* 084 */
/* 085 */       final int project_length_1 = project_value_7.numElements();
/* 086 */       final ArrayData project_keys_1 = project_value_7.keyArray();
/* 087 */       final ArrayData project_values_1 = project_value_7.valueArray();
/* 088 */
/* 089 */       int project_index_1 = 0;
/* 090 */       boolean project_found_1 = false;
/* 091 */       while (project_index_1 < project_length_1 && !project_found_1) {
/* 092 */         final UTF8String project_key_1 = project_keys_1.getUTF8String(project_index_1);
/* 093 */         if (project_key_1.equals(((UTF8String) references[7] /* literal */))) {
/* 094 */           project_found_1 = true;
/* 095 */         } else {
/* 096 */           project_index_1++;
/* 097 */         }
/* 098 */       }
/* 099 */
/* 100 */       if (!project_found_1 || project_values_1.isNullAt(project_index_1)) {
/* 101 */         project_isNull_6 = true;
/* 102 */       } else {
/* 103 */         project_value_6 = project_values_1.getUTF8String(project_index_1);
/* 104 */       }
/* 105 */
/* 106 */     }
/* 107 */     boolean project_isNull_12 = true;
/* 108 */     UTF8String project_value_12 = null;
/* 109 */     boolean project_isNull_13 = true;
/* 110 */     MapData project_value_13 = null;
/* 111 */
/* 112 */     if (!project_exprIsNull_0_0) {
/* 113 */       project_isNull_13 = false; // resultCode could change nullability.
/* 114 */
/* 115 */       UTF8String[] project_kvs_2 = project_expr_0_0.split(((UTF8String) references[9] /* literal */), -1);
/* 116 */       for(UTF8String kvEntry: project_kvs_2) {
/* 117 */         UTF8String[] kv = kvEntry.split(((UTF8String) references[10] /* literal */), 2);
/* 118 */         ((org.apache.spark.sql.catalyst.util.ArrayBasedMapBuilder) references[8] /* mapBuilder */).put(kv[0], kv.length == 2 ? kv[1] : null);
/* 119 */       }
/* 120 */       project_value_13 = ((org.apache.spark.sql.catalyst.util.ArrayBasedMapBuilder) references[8] /* mapBuilder */).build();
/* 121 */
/* 122 */     }
...
```
After this change:

L27-40 evaluates the common map variable.

```
/* 024 */   private void project_doConsume_0(InternalRow inputadapter_row_0, UTF8String project_expr_0_0, boolean project_exprIsNull_0_0) throws java.io.IOException {
/* 025 */     // common sub-expressions
/* 026 */
/* 027 */     boolean project_isNull_0 = true;
/* 028 */     MapData project_value_0 = null;
/* 029 */
/* 030 */     if (!project_exprIsNull_0_0) {
/* 031 */       project_isNull_0 = false; // resultCode could change nullability.
/* 032 */
/* 033 */       UTF8String[] project_kvs_0 = project_expr_0_0.split(((UTF8String) references[1] /* literal */), -1);
/* 034 */       for(UTF8String kvEntry: project_kvs_0) {
/* 035 */         UTF8String[] kv = kvEntry.split(((UTF8String) references[2] /* literal */), 2);
/* 036 */         ((org.apache.spark.sql.catalyst.util.ArrayBasedMapBuilder) references[0] /* mapBuilder */).put(kv[0], kv.length == 2 ? kv[1] : null);
/* 037 */       }
/* 038 */       project_value_0 = ((org.apache.spark.sql.catalyst.util.ArrayBasedMapBuilder) references[0] /* mapBuilder */).build();
/* 039 */
/* 040 */     }
/* 041 */
/* 042 */     boolean project_isNull_4 = true;
/* 043 */     UTF8String project_value_4 = null;
/* 044 */
/* 045 */     if (!project_isNull_0) {
/* 046 */       project_isNull_4 = false; // resultCode could change nullability.
/* 047 */
/* 048 */       final int project_length_0 = project_value_0.numElements();
/* 049 */       final ArrayData project_keys_0 = project_value_0.keyArray();
/* 050 */       final ArrayData project_values_0 = project_value_0.valueArray();
/* 051 */
/* 052 */       int project_index_0 = 0;
/* 053 */       boolean project_found_0 = false;
/* 054 */       while (project_index_0 < project_length_0 && !project_found_0) {
/* 055 */         final UTF8String project_key_0 = project_keys_0.getUTF8String(project_index_0);
/* 056 */         if (project_key_0.equals(((UTF8String) references[3] /* literal */))) {
/* 057 */           project_found_0 = true;
/* 058 */         } else {
/* 059 */           project_index_0++;
/* 060 */         }
/* 061 */       }
/* 062 */
/* 063 */       if (!project_found_0 || project_values_0.isNullAt(project_index_0)) {
/* 064 */         project_isNull_4 = true;
/* 065 */       } else {
/* 066 */         project_value_4 = project_values_0.getUTF8String(project_index_0);
/* 067 */       }
/* 068 */
/* 069 */     }
/* 070 */     boolean project_isNull_6 = true;
/* 071 */     UTF8String project_value_6 = null;
/* 072 */
/* 073 */     if (!project_isNull_0) {
/* 074 */       project_isNull_6 = false; // resultCode could change nullability.
/* 075 */
/* 076 */       final int project_length_1 = project_value_0.numElements();
/* 077 */       final ArrayData project_keys_1 = project_value_0.keyArray();
/* 078 */       final ArrayData project_values_1 = project_value_0.valueArray();
/* 079 */
/* 080 */       int project_index_1 = 0;
/* 081 */       boolean project_found_1 = false;
/* 082 */       while (project_index_1 < project_length_1 && !project_found_1) {
/* 083 */         final UTF8String project_key_1 = project_keys_1.getUTF8String(project_index_1);
/* 084 */         if (project_key_1.equals(((UTF8String) references[4] /* literal */))) {
/* 085 */           project_found_1 = true;
/* 086 */         } else {
/* 087 */           project_index_1++;
/* 088 */         }
/* 089 */       }
/* 090 */
/* 091 */       if (!project_found_1 || project_values_1.isNullAt(project_index_1)) {
/* 092 */         project_isNull_6 = true;
/* 093 */       } else {
/* 094 */         project_value_6 = project_values_1.getUTF8String(project_index_1);
/* 095 */       }
/* 096 */
/* 097 */     }
/* 098 */     boolean project_isNull_8 = true;
/* 099 */     UTF8String project_value_8 = null;
/* 100 */
...
```

When the code is split into separated method:

```
/* 026 */   private void project_doConsume_0(InternalRow inputadapter_row_0, UTF8String project_expr_0_0, boolean project_exprIsNull_0_0) throws java.io.IOException {
/* 027 */     // common sub-expressions
/* 028 */
/* 029 */     MapData project_subExprValue_0 = project_subExpr_0(project_exprIsNull_0_0, project_expr_0_0);
/* 030 */
...
/* 140 */   private MapData project_subExpr_0(boolean project_exprIsNull_0_0, org.apache.spark.unsafe.types.UTF8String project_expr_0_0) {
/* 141 */     boolean project_isNull_0 = true;
/* 142 */     MapData project_value_0 = null;
/* 143 */
/* 144 */     if (!project_exprIsNull_0_0) {
/* 145 */       project_isNull_0 = false; // resultCode could change nullability.
/* 146 */
/* 147 */       UTF8String[] project_kvs_0 = project_expr_0_0.split(((UTF8String) references[1] /* literal */), -1);
/* 148 */       for(UTF8String kvEntry: project_kvs_0) {
/* 149 */         UTF8String[] kv = kvEntry.split(((UTF8String) references[2] /* literal */), 2);
/* 150 */         ((org.apache.spark.sql.catalyst.util.ArrayBasedMapBuilder) references[0] /* mapBuilder */).put(kv[0], kv.length == 2 ? kv[1] : null);
/* 151 */       }
/* 152 */       project_value_0 = ((org.apache.spark.sql.catalyst.util.ArrayBasedMapBuilder) references[0] /* mapBuilder */).build();
/* 153 */
/* 154 */     }
/* 155 */     project_subExprIsNull_0 = project_isNull_0;
/* 156 */     return project_value_0;
/* 157 */   }
```

### Why are the changes needed?

Users occasionally write repeated expression in projection. It is also possibly that query optimizer optimizes a query to evaluate same expression many times in a Project. Currently in ProjectExec, we don't support subexpression elimination in Whole-stage codegen. We can support it to reduce redundant evaluation.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

`spark.sql.subexpressionElimination.enabled` is enabled by default. So that's said we should pass all tests with this change.

Closes #29975 from viirya/SPARK-33092.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../expressions/codegen/CodeGenerator.scala   | 55 ++++++++++++++-----
 .../aggregate/HashAggregateExec.scala         |  2 +-
 .../execution/basicPhysicalOperators.scala    | 15 ++++-
 .../sql/connector/DataSourceV2Suite.scala     |  2 +-
 4 files changed, 56 insertions(+), 18 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 71d36733464f6..9a26c388f59af 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -90,8 +90,13 @@ case class SubExprEliminationState(isNull: ExprValue, value: ExprValue)
  * @param codes Strings representing the codes that evaluate common subexpressions.
  * @param states Foreach expression that is participating in subexpression elimination,
  *               the state to use.
+ * @param exprCodesNeedEvaluate Some expression codes that need to be evaluated before
+ *                              calling common subexpressions.
  */
-case class SubExprCodes(codes: Seq[String], states: Map[Expression, SubExprEliminationState])
+case class SubExprCodes(
+  codes: Seq[String],
+  states: Map[Expression, SubExprEliminationState],
+  exprCodesNeedEvaluate: Seq[ExprCode])
 
 /**
  * The main information about a new added function.
@@ -1044,7 +1049,7 @@ class CodegenContext extends Logging {
     // Get all the expressions that appear at least twice and set up the state for subexpression
     // elimination.
     val commonExprs = equivalentExpressions.getAllEquivalentExprs.filter(_.size > 1)
-    val commonExprVals = commonExprs.map(_.head.genCode(this))
+    lazy val commonExprVals = commonExprs.map(_.head.genCode(this))
 
     lazy val nonSplitExprCode = {
       commonExprs.zip(commonExprVals).map { case (exprs, eval) =>
@@ -1055,10 +1060,17 @@ class CodegenContext extends Logging {
       }
     }
 
-    val codes = if (commonExprVals.map(_.code.length).sum > SQLConf.get.methodSplitThreshold) {
-      val inputVarsForAllFuncs = commonExprs.map { expr =>
-        getLocalInputVariableValues(this, expr.head).toSeq
-      }
+    // For some operators, they do not require all its child's outputs to be evaluated in advance.
+    // Instead it only early evaluates part of outputs, for example, `ProjectExec` only early
+    // evaluate the outputs used more than twice. So we need to extract these variables used by
+    // subexpressions and evaluate them before subexpressions.
+    val (inputVarsForAllFuncs, exprCodesNeedEvaluate) = commonExprs.map { expr =>
+      val (inputVars, exprCodes) = getLocalInputVariableValues(this, expr.head)
+      (inputVars.toSeq, exprCodes.toSeq)
+    }.unzip
+
+    val splitThreshold = SQLConf.get.methodSplitThreshold
+    val codes = if (commonExprVals.map(_.code.length).sum > splitThreshold) {
       if (inputVarsForAllFuncs.map(calculateParamLengthFromExprValues).forall(isValidParamLength)) {
         commonExprs.zipWithIndex.map { case (exprs, i) =>
           val expr = exprs.head
@@ -1109,7 +1121,7 @@ class CodegenContext extends Logging {
     } else {
       nonSplitExprCode
     }
-    SubExprCodes(codes, localSubExprEliminationExprs.toMap)
+    SubExprCodes(codes, localSubExprEliminationExprs.toMap, exprCodesNeedEvaluate.flatten)
   }
 
   /**
@@ -1732,15 +1744,23 @@ object CodeGenerator extends Logging {
   }
 
   /**
-   * Extracts all the input variables from references and subexpression elimination states
-   * for a given `expr`. This result will be used to split the generated code of
-   * expressions into multiple functions.
+   * This methods returns two values in a Tuple.
+   *
+   * First value: Extracts all the input variables from references and subexpression
+   * elimination states for a given `expr`. This result will be used to split the
+   * generated code of expressions into multiple functions.
+   *
+   * Second value: Returns the set of `ExprCodes`s which are necessary codes before
+   * evaluating subexpressions.
    */
   def getLocalInputVariableValues(
       ctx: CodegenContext,
       expr: Expression,
-      subExprs: Map[Expression, SubExprEliminationState] = Map.empty): Set[VariableValue] = {
+      subExprs: Map[Expression, SubExprEliminationState] = Map.empty)
+      : (Set[VariableValue], Set[ExprCode]) = {
     val argSet = mutable.Set[VariableValue]()
+    val exprCodesNeedEvaluate = mutable.Set[ExprCode]()
+
     if (ctx.INPUT_ROW != null) {
       argSet += JavaCode.variable(ctx.INPUT_ROW, classOf[InternalRow])
     }
@@ -1761,16 +1781,21 @@ object CodeGenerator extends Logging {
 
         case ref: BoundReference if ctx.currentVars != null &&
             ctx.currentVars(ref.ordinal) != null =>
-          val ExprCode(_, isNull, value) = ctx.currentVars(ref.ordinal)
-          collectLocalVariable(value)
-          collectLocalVariable(isNull)
+          val exprCode = ctx.currentVars(ref.ordinal)
+          // If the referred variable is not evaluated yet.
+          if (exprCode.code != EmptyBlock) {
+            exprCodesNeedEvaluate += exprCode.copy()
+            exprCode.code = EmptyBlock
+          }
+          collectLocalVariable(exprCode.value)
+          collectLocalVariable(exprCode.isNull)
 
         case e =>
           stack.pushAll(e.children)
       }
     }
 
-    argSet.toSet
+    (argSet.toSet, exprCodesNeedEvaluate.toSet)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index dcb465707a0ed..52d0450afb181 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -263,7 +263,7 @@ case class HashAggregateExec(
     } else {
       val inputVars = aggBufferUpdatingExprs.map { aggExprsForOneFunc =>
         val inputVarsForOneFunc = aggExprsForOneFunc.map(
-          CodeGenerator.getLocalInputVariableValues(ctx, _, subExprs)).reduce(_ ++ _).toSeq
+          CodeGenerator.getLocalInputVariableValues(ctx, _, subExprs)._1).reduce(_ ++ _).toSeq
         val paramLength = CodeGenerator.calculateParamLengthFromExprValues(inputVarsForOneFunc)
 
         // Checks if a parameter length for the `aggExprsForOneFunc` does not go over the JVM limit
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index 1f70fde3f7654..7334ea1e27284 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -66,10 +66,23 @@ case class ProjectExec(projectList: Seq[NamedExpression], child: SparkPlan)
 
   override def doConsume(ctx: CodegenContext, input: Seq[ExprCode], row: ExprCode): String = {
     val exprs = bindReferences[Expression](projectList, child.output)
-    val resultVars = exprs.map(_.genCode(ctx))
+    val (subExprsCode, resultVars, localValInputs) = if (conf.subexpressionEliminationEnabled) {
+      // subexpression elimination
+      val subExprs = ctx.subexpressionEliminationForWholeStageCodegen(exprs)
+      val genVars = ctx.withSubExprEliminationExprs(subExprs.states) {
+        exprs.map(_.genCode(ctx))
+      }
+      (subExprs.codes.mkString("\n"), genVars, subExprs.exprCodesNeedEvaluate)
+    } else {
+      ("", exprs.map(_.genCode(ctx)), Seq.empty)
+    }
+
     // Evaluation of non-deterministic expressions can't be deferred.
     val nonDeterministicAttrs = projectList.filterNot(_.deterministic).map(_.toAttribute)
     s"""
+       |// common sub-expressions
+       |${evaluateVariables(localValInputs)}
+       |$subExprsCode
        |${evaluateRequiredVariables(output, resultVars, AttributeSet(nonDeterministicAttrs))}
        |${consume(ctx, resultVars)}
      """.stripMargin
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
index a9c521eb46499..ec1ac00d08bf8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
@@ -268,7 +268,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
           }
         }
         // this input data will fail to read middle way.
-        val input = spark.range(10).select(failingUdf('id).as('i)).select('i, -'i as 'j)
+        val input = spark.range(15).select(failingUdf('id).as('i)).select('i, -'i as 'j)
         val e3 = intercept[SparkException] {
           input.write.format(cls.getName).option("path", path).mode("overwrite").save()
         }

From a0e324460e5d05cc8beeba5b1b0d1887b71254ea Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Mon, 12 Oct 2020 22:54:31 +0900
Subject: [PATCH 0219/1009] [SPARK-32704][SQL][FOLLOWUP] Corrects version
 values of plan logging configs in SQLConf

### What changes were proposed in this pull request?

This PR intends to correct version values (`3.0.0` -> `3.1.0`) of three configs below in `SQLConf`:
 - spark.sql.planChangeLog.level
 - spark.sql.planChangeLog.rules
 - spark.sql.planChangeLog.batches

This PR comes from https://github.com/apache/spark/pull/29544#discussion_r503049350.

### Why are the changes needed?

Bugfix.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

N/A

Closes #30015 from maropu/pr29544-FOLLOWUP.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../main/scala/org/apache/spark/sql/internal/SQLConf.scala  | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 8cbdbfe16d2bc..99c10b38c53b1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -221,7 +221,7 @@ object SQLConf {
     .doc("Configures the log level for logging the change from the original plan to the new " +
       "plan after a rule or batch is applied. The value can be 'trace', 'debug', 'info', " +
       "'warn', or 'error'. The default log level is 'trace'.")
-    .version("3.0.0")
+    .version("3.1.0")
     .stringConf
     .transform(_.toUpperCase(Locale.ROOT))
     .checkValue(logLevel => Set("TRACE", "DEBUG", "INFO", "WARN", "ERROR").contains(logLevel),
@@ -233,7 +233,7 @@ object SQLConf {
     .internal()
     .doc("Configures a list of rules for logging plan changes, in which the rules are " +
       "specified by their rule names and separated by comma.")
-    .version("3.0.0")
+    .version("3.1.0")
     .stringConf
     .createOptional
 
@@ -241,7 +241,7 @@ object SQLConf {
     .internal()
     .doc("Configures a list of batches for logging plan changes, in which the batches " +
       "are specified by their batch names and separated by comma.")
-    .version("3.0.0")
+    .version("3.1.0")
     .stringConf
     .createOptional
 

From ed2fe8d80635014681ec18b29e33e6ecfaf883d7 Mon Sep 17 00:00:00 2001
From: zhengruifeng <ruifengz@foxmail.com>
Date: Mon, 12 Oct 2020 09:01:03 -0500
Subject: [PATCH 0220/1009] [SPARK-33111][ML] aft transform optimization

### What changes were proposed in this pull request?
1, when `predictionCol` and `quantilesCol` are both set, we only need one prediction for each row: prediction is just the variable `lambda` in `predictQuantiles`;
2, in the computation of variable `quantiles` in `predictQuantiles`, a pre-computed vector `val baseQuantiles = $(quantileProbabilities).map(q => math.exp(math.log(-math.log1p(-q)) * scale))` can be reused for each row;

### Why are the changes needed?
avoid redundant computation in transform, like what we did in `ProbabilisticClassificationModel`, `GaussianMixtureModel`, etc

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
existing testsuite

Closes #30000 from zhengruifeng/aft_predict_transform_opt.

Authored-by: zhengruifeng <ruifengz@foxmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../spark/ml/regression/AFTSurvivalRegression.scala  | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
index f301c349a2dc7..595a2f0e742df 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
@@ -421,9 +421,17 @@ class AFTSurvivalRegressionModel private[ml] (
     }
 
     if (hasQuantilesCol) {
-      val predictQuantilesUDF = udf { features: Vector => predictQuantiles(features)}
+      val baseQuantiles = $(quantileProbabilities)
+        .map(q => math.exp(math.log(-math.log1p(-q)) * scale))
+      val lambdaCol = if ($(predictionCol).nonEmpty) {
+        predictionColumns.head
+      } else {
+        udf { features: Vector => predict(features) }.apply(col($(featuresCol)))
+      }
+      val predictQuantilesUDF =
+        udf { lambda: Double => Vectors.dense(baseQuantiles.map(q => q * lambda)) }
       predictionColNames :+= $(quantilesCol)
-      predictionColumns :+= predictQuantilesUDF(col($(featuresCol)))
+      predictionColumns :+= predictQuantilesUDF(lambdaCol)
         .as($(quantilesCol), outputSchema($(quantilesCol)).metadata)
     }
 

From b27a287ff293c02dcad0c45cca71a5244664d7f5 Mon Sep 17 00:00:00 2001
From: "xuewei.linxuewei" <xuewei.linxuewei@alibaba-inc.com>
Date: Mon, 12 Oct 2020 14:48:40 +0000
Subject: [PATCH 0221/1009] [SPARK-33016][SQL] Potential SQLMetrics missed
 which might cause WEB UI display issue while AQE is on

### What changes were proposed in this pull request?

With following scenario when AQE is on, SQLMetrics could be incorrect.

1. Stage A and B are created, and UI updated thru event onAdaptiveExecutionUpdate.
2. Stage A and B are running. Subquery in stage A keep updating metrics thru event onAdaptiveSQLMetricUpdate.
3. Stage B completes, while stage A's subquery is still running, updating metrics.
4. Completion of stage B triggers new stage creation and UI update thru event onAdaptiveExecutionUpdate again (just like step 1).

So decided to make a trade off of keeping more duplicate SQLMetrics without deleting them when AQE with newPlan updated.

### Why are the changes needed?

Make SQLMetrics behavior 100% correct.

### Does this PR introduce any user-facing change?
No.

### How was this patch tested?
Updated SQLAppStatusListenerSuite.

Closes #29965 from leanken/leanken-SPARK-33016.

Authored-by: xuewei.linxuewei <xuewei.linxuewei@alibaba-inc.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/execution/ui/SQLAppStatusListener.scala  |  4 ++--
 .../execution/ui/SQLAppStatusListenerSuite.scala | 16 ++++++++--------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
index 175340d2dfaa7..963aec7ca36c5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
@@ -341,7 +341,7 @@ class SQLAppStatusListener(
 
     val exec = getOrCreateExecution(executionId)
     exec.physicalPlanDescription = physicalPlanDescription
-    exec.metrics = sqlPlanMetrics
+    exec.metrics ++= sqlPlanMetrics
     update(exec)
   }
 
@@ -349,7 +349,7 @@ class SQLAppStatusListener(
     val SparkListenerSQLAdaptiveSQLMetricUpdates(executionId, sqlPlanMetrics) = event
 
     val exec = getOrCreateExecution(executionId)
-    exec.metrics = exec.metrics ++ sqlPlanMetrics
+    exec.metrics ++= sqlPlanMetrics
     update(exec)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
index f49a3a384b450..00f23718a0e9e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
@@ -680,7 +680,7 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
     assert(sparkPlanInfo.nodeName === "WholeStageCodegen (2)")
   }
 
-  test("SPARK-32615: SQLMetrics validation after sparkPlanInfo updated in AQE") {
+  test("SPARK-32615,SPARK-33016: SQLMetrics validation after sparkPlanInfo updated in AQE") {
     val statusStore = createStatusStore()
     val listener = statusStore.listener.get
 
@@ -755,7 +755,7 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
         .allNodes.flatMap(_.metrics.map(_.accumulatorId))
 
     // Assume that AQE update sparkPlanInfo with newPlan
-    // ExecutionMetrics will be replaced using newPlan's SQLMetrics
+    // ExecutionMetrics will be appended using newPlan's SQLMetrics
     listener.onOtherEvent(SparkListenerSQLAdaptiveExecutionUpdate(
       executionId,
       "test",
@@ -770,8 +770,8 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
     listener.onStageSubmitted(SparkListenerStageSubmitted(createStageInfo(1, 0)))
     listener.onTaskStart(SparkListenerTaskStart(1, 0, createTaskInfo(0, 0)))
 
-    // live metrics will be override, and ExecutionMetrics should be empty as the newPlan updated.
-    assert(statusStore.executionMetrics(executionId).isEmpty)
+    // historical metrics will be kept despite of the newPlan updated.
+    assert(statusStore.executionMetrics(executionId).size == 2)
 
     // update new metrics with Id 4 & 5, since 3 is timing metrics,
     // timing metrics has a complicated string presentation so we don't test it here.
@@ -780,9 +780,9 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
       (0L, 1, 0, createAccumulatorInfos(newMetricsValueMap))
     )))
 
-    assert(statusStore.executionMetrics(executionId).size == 2)
+    assert(statusStore.executionMetrics(executionId).size == 4)
     statusStore.executionMetrics(executionId).foreach { m =>
-      assert(m._2 == "500")
+      assert(m._2 == "100" || m._2 == "500")
     }
 
     listener.onTaskEnd(SparkListenerTaskEnd(
@@ -802,10 +802,10 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
       JobSucceeded
     ))
 
-    // aggregateMetrics should ignore metrics from job 0
+    // aggregateMetrics should contains all metrics from job 0 and job 1
     val aggregateMetrics = listener.liveExecutionMetrics(executionId)
     if (aggregateMetrics.isDefined) {
-      oldAccumulatorIds.foreach(id => assert(!aggregateMetrics.get.contains(id)))
+      assert(aggregateMetrics.get.keySet.size == 4)
     }
 
     listener.onOtherEvent(SparkListenerSQLExecutionEnd(

From 819f12ee2fe3cce0c59221c2b02831274c769b23 Mon Sep 17 00:00:00 2001
From: Pablo <pablo.langa@stratio.com>
Date: Mon, 12 Oct 2020 14:18:34 -0700
Subject: [PATCH 0222/1009] [SPARK-33118][SQL] CREATE TEMPORARY TABLE fails
 with location

### What changes were proposed in this pull request?

We have a problem when you use CREATE TEMPORARY TABLE with LOCATION

```scala
spark.range(3).write.parquet("/tmp/testspark1")

sql("CREATE TEMPORARY TABLE t USING parquet OPTIONS (path '/tmp/testspark1')")
sql("CREATE TEMPORARY TABLE t USING parquet LOCATION '/tmp/testspark1'")
```
```scala
org.apache.spark.sql.AnalysisException: Unable to infer schema for Parquet. It must be specified manually.;
  at org.apache.spark.sql.execution.datasources.DataSource.$anonfun$getOrInferFileFormatSchema$12(DataSource.scala:200)
  at scala.Option.getOrElse(Option.scala:189)
  at org.apache.spark.sql.execution.datasources.DataSource.getOrInferFileFormatSchema(DataSource.scala:200)
  at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:408)
  at org.apache.spark.sql.execution.datasources.CreateTempViewUsing.run(ddl.scala:94)
  at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70)
  at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68)
  at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79)
  at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:229)
  at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3618)
  at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:100)
  at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160)
  at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87)
  at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
  at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
  at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3616)
  at org.apache.spark.sql.Dataset.<init>(Dataset.scala:229)
  at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100)
  at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
  at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)
  at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:607)
  at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
  at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:602)
```
This bug was introduced by SPARK-30507.
sparksqlparser --> visitCreateTable --> visitCreateTableClauses --> cleanTableOptions extract the path from the options but in this case CreateTempViewUsing need the path in the options map.

### Why are the changes needed?

To fix the problem

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Unit testing and manual testing

Closes #30014 from planga82/bugfix/SPARK-33118_create_temp_table_location.

Authored-by: Pablo <pablo.langa@stratio.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../apache/spark/sql/execution/SparkSqlParser.scala   |  6 ++++--
 .../spark/sql/execution/SparkSqlParserSuite.scala     | 11 ++++++++++-
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 11934c934e316..0a5f4c3ed4bcb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -262,7 +262,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
         operationNotAllowed("CREATE TEMPORARY TABLE IF NOT EXISTS", ctx)
       }
 
-      val (_, _, _, options, _, _) = visitCreateTableClauses(ctx.createTableClauses())
+      val (_, _, _, options, location, _) = visitCreateTableClauses(ctx.createTableClauses())
       val provider = Option(ctx.tableProvider).map(_.multipartIdentifier.getText).getOrElse(
         throw new ParseException("CREATE TEMPORARY TABLE without a provider is not allowed.", ctx))
       val schema = Option(ctx.colTypeList()).map(createSchema)
@@ -271,7 +271,9 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
           "CREATE TEMPORARY VIEW ... USING ... instead")
 
       val table = tableIdentifier(ident, "CREATE TEMPORARY VIEW", ctx)
-      CreateTempViewUsing(table, schema, replace = false, global = false, provider, options)
+      val optionsWithLocation = location.map(l => options + ("path" -> l)).getOrElse(options)
+      CreateTempViewUsing(table, schema, replace = false, global = false, provider,
+        optionsWithLocation)
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
index af9088003f3b0..5e6808eeba0f6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat,
 import org.apache.spark.sql.catalyst.expressions.{Ascending, AttributeReference, Concat, SortOrder}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.execution.command._
-import org.apache.spark.sql.execution.datasources.{CreateTable, RefreshResource}
+import org.apache.spark.sql.execution.datasources.{CreateTable, CreateTempViewUsing, RefreshResource}
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf, StaticSQLConf}
 import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType}
 
@@ -160,6 +160,15 @@ class SparkSqlParserSuite extends AnalysisTest {
     intercept("REFRESH", "Resource paths cannot be empty in REFRESH statements")
   }
 
+  test("SPARK-33118 CREATE TMEPORARY TABLE with LOCATION") {
+    assertEqual("CREATE TEMPORARY TABLE t USING parquet OPTIONS (path '/data/tmp/testspark1')",
+      CreateTempViewUsing(TableIdentifier("t", None), None, false, false, "parquet",
+        Map("path" -> "/data/tmp/testspark1")))
+    assertEqual("CREATE TEMPORARY TABLE t USING parquet LOCATION '/data/tmp/testspark1'",
+      CreateTempViewUsing(TableIdentifier("t", None), None, false, false, "parquet",
+        Map("path" -> "/data/tmp/testspark1")))
+  }
+
   private def createTableUsing(
       table: String,
       database: Option[String] = None,

From 86d26b46a53acf52b85ac990059be9e5a3ec0318 Mon Sep 17 00:00:00 2001
From: zhengruifeng <ruifengz@foxmail.com>
Date: Tue, 13 Oct 2020 13:09:40 +0800
Subject: [PATCH 0223/1009] [SPARK-32455][ML][FOLLOW-UP]
 LogisticRegressionModel prediction optimization - fix incorrect
 initialization

### What changes were proposed in this pull request?
use `lazy array` instead of `var` for auxiliary variables in binary lor

### Why are the changes needed?
In https://github.com/apache/spark/pull/29255, I made a mistake:
the `private var _threshold` and `_rawThreshold`  are initialized by defaut values of `threshold`, that is beacuse:
1, param `threshold` is set default value at first;
2, `_threshold` and `_rawThreshold` are initialized based on the default value;
3, param `threshold` is updated by the value from estimator, by `copyValues` method:
```
      if (map.contains(param) && to.hasParam(param.name)) {
        to.set(param.name, map(param))
      }
```

We can update `_threshold` and `_rawThreshold` in `setThreshold` and `setThresholds`, but we can not update them in `set`/`copyValues` so their values are kept until methods `setThreshold` and `setThresholds` are called.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
test in repl

Closes #30013 from zhengruifeng/lor_threshold_init.

Authored-by: zhengruifeng <ruifengz@foxmail.com>
Signed-off-by: zhengruifeng <ruifengz@foxmail.com>
---
 .../classification/LogisticRegression.scala   | 26 +++++++++++--------
 .../LogisticRegressionSuite.scala             |  3 +--
 2 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 4d763cbd29d3c..a43ad466a7c80 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -1100,20 +1100,24 @@ class LogisticRegressionModel private[spark] (
 
   private lazy val _intercept = interceptVector(0)
   private lazy val _interceptVector = interceptVector.toDense
-  private var _threshold = Double.NaN
-  private var _rawThreshold = Double.NaN
-
-  updateBinaryThreshold()
+  private lazy val _binaryThresholdArray = {
+    val array = Array(Double.NaN, Double.NaN)
+    updateBinaryThresholds(array)
+    array
+  }
+  private def _threshold: Double = _binaryThresholdArray(0)
+  private def _rawThreshold: Double = _binaryThresholdArray(1)
 
-  private def updateBinaryThreshold(): Unit = {
+  private def updateBinaryThresholds(array: Array[Double]): Unit = {
     if (!isMultinomial) {
-      _threshold = getThreshold
+      val _threshold = getThreshold
+      array(0) = _threshold
       if (_threshold == 0.0) {
-        _rawThreshold = Double.NegativeInfinity
+        array(1) = Double.NegativeInfinity
       } else if (_threshold == 1.0) {
-        _rawThreshold = Double.PositiveInfinity
+        array(1) = Double.PositiveInfinity
       } else {
-        _rawThreshold = math.log(_threshold / (1.0 - _threshold))
+        array(1) = math.log(_threshold / (1.0 - _threshold))
       }
     }
   }
@@ -1121,7 +1125,7 @@ class LogisticRegressionModel private[spark] (
   @Since("1.5.0")
   override def setThreshold(value: Double): this.type = {
     super.setThreshold(value)
-    updateBinaryThreshold()
+    updateBinaryThresholds(_binaryThresholdArray)
     this
   }
 
@@ -1131,7 +1135,7 @@ class LogisticRegressionModel private[spark] (
   @Since("1.5.0")
   override def setThresholds(value: Array[Double]): this.type = {
     super.setThresholds(value)
-    updateBinaryThreshold()
+    updateBinaryThresholds(_binaryThresholdArray)
     this
   }
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 56eadff6df078..51a6ae3c7e49b 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -400,10 +400,9 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
   }
 
   test("thresholds prediction") {
-    val blr = new LogisticRegression().setFamily("binomial")
+    val blr = new LogisticRegression().setFamily("binomial").setThreshold(1.0)
     val binaryModel = blr.fit(smallBinaryDataset)
 
-    binaryModel.setThreshold(1.0)
     testTransformer[(Double, Vector)](smallBinaryDataset.toDF(), binaryModel, "prediction") {
       row => assert(row.getDouble(0) === 0.0)
     }

From e34f2d8df222056e9c2195dec6138fa1af9ca4e1 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Tue, 13 Oct 2020 17:41:55 +0900
Subject: [PATCH 0224/1009] [SPARK-33119][SQL] ScalarSubquery should returns
 the first two rows to avoid Driver OOM

### What changes were proposed in this pull request?

`ScalarSubquery` should returns the first two rows.

### Why are the changes needed?

To avoid Driver OOM.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing test: https://github.com/apache/spark/blob/d6f3138352042e33a2291e11c325b8eadb8dd5f2/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala#L147-L154

Closes #30016 from wangyum/SPARK-33119.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../main/scala/org/apache/spark/sql/execution/subquery.scala   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
index 1a6b99a455bf7..14cc76f0dbb78 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
@@ -80,7 +80,8 @@ case class ScalarSubquery(
   @volatile private var updated: Boolean = false
 
   def updateResult(): Unit = {
-    val rows = plan.executeCollect()
+    // Only return the first two rows as an array to avoid Driver OOM.
+    val rows = plan.executeTake(2)
     if (rows.length > 1) {
       sys.error(s"more than one row returned by a subquery used as an expression:\n$plan")
     }

From 17eebd72097ee65e22cdaddf375e868074251f5a Mon Sep 17 00:00:00 2001
From: "tanel.kiis@gmail.com" <tanel.kiis@gmail.com>
Date: Tue, 13 Oct 2020 20:11:04 +0900
Subject: [PATCH 0225/1009] [SPARK-32295][SQL] Add not null and size > 0
 filters before inner explode/inline to benefit from predicate pushdown

### What changes were proposed in this pull request?

Add `And(IsNotNull(e), GreaterThan(Size(e), Literal(0)))` filter before Explode, PosExplode and Inline, when `outer = false`.
Removed unused `InferFiltersFromConstraints` from `operatorOptimizationRuleSet` to avoid confusion that happened during the review process.

### Why are the changes needed?

Predicate pushdown will be able to move this new filter down through joins and into data sources for performance improvement.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Unit test

Closes #29092 from tanelk/SPARK-32295.

Lead-authored-by: tanel.kiis@gmail.com <tanel.kiis@gmail.com>
Co-authored-by: Tanel Kiis <tanel.kiis@reach-u.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../sql/catalyst/optimizer/Optimizer.scala    | 43 +++++++++--
 .../InferFiltersFromGenerateSuite.scala       | 75 +++++++++++++++++++
 2 files changed, 113 insertions(+), 5 deletions(-)
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 5bdaa504a3beb..7586bdf4392f5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -79,7 +79,6 @@ abstract class Optimizer(catalogManager: CatalogManager)
         PushLeftSemiLeftAntiThroughJoin,
         LimitPushDown,
         ColumnPruning,
-        InferFiltersFromConstraints,
         // Operator combine
         CollapseRepartition,
         CollapseProject,
@@ -117,14 +116,13 @@ abstract class Optimizer(catalogManager: CatalogManager)
         extendedOperatorOptimizationRules
 
     val operatorOptimizationBatch: Seq[Batch] = {
-      val rulesWithoutInferFiltersFromConstraints =
-        operatorOptimizationRuleSet.filterNot(_ == InferFiltersFromConstraints)
       Batch("Operator Optimization before Inferring Filters", fixedPoint,
-        rulesWithoutInferFiltersFromConstraints: _*) ::
+        operatorOptimizationRuleSet: _*) ::
       Batch("Infer Filters", Once,
+        InferFiltersFromGenerate,
         InferFiltersFromConstraints) ::
       Batch("Operator Optimization after Inferring Filters", fixedPoint,
-        rulesWithoutInferFiltersFromConstraints: _*) ::
+        operatorOptimizationRuleSet: _*) ::
       // Set strategy to Once to avoid pushing filter every time because we do not change the
       // join condition.
       Batch("Push extra predicate through join", fixedPoint,
@@ -868,6 +866,41 @@ object TransposeWindow extends Rule[LogicalPlan] {
   }
 }
 
+/**
+ * Infers filters from [[Generate]], such that rows that would have been removed
+ * by this [[Generate]] can be removed earlier - before joins and in data sources.
+ */
+object InferFiltersFromGenerate extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
+    // This rule does not infer filters from foldable expressions to avoid constant filters
+    // like 'size([1, 2, 3]) > 0'. These do not show up in child's constraints and
+    // then the idempotence will break.
+    case generate @ Generate(e, _, _, _, _, _)
+      if !e.deterministic || e.children.forall(_.foldable) => generate
+
+    case generate @ Generate(g, _, false, _, _, _) if canInferFilters(g) =>
+      // Exclude child's constraints to guarantee idempotency
+      val inferredFilters = ExpressionSet(
+        Seq(
+          GreaterThan(Size(g.children.head), Literal(0)),
+          IsNotNull(g.children.head)
+        )
+      ) -- generate.child.constraints
+
+      if (inferredFilters.nonEmpty) {
+        generate.copy(child = Filter(inferredFilters.reduce(And), generate.child))
+      } else {
+        generate
+      }
+  }
+
+  private def canInferFilters(g: Generator): Boolean = g match {
+    case _: ExplodeBase => true
+    case _: Inline => true
+    case _ => false
+  }
+}
+
 /**
  * Generate a list of additional filters from an operator's existing constraint but remove those
  * that are either already part of the operator's condition or are part of the operator's child
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala
new file mode 100644
index 0000000000000..3f83971aa9821
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
+
+class InferFiltersFromGenerateSuite extends PlanTest {
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches = Batch("Infer Filters", Once, InferFiltersFromGenerate) :: Nil
+  }
+
+  val testRelation = LocalRelation('a.array(StructType(Seq(
+    StructField("x", IntegerType),
+    StructField("y", IntegerType)
+  ))))
+
+  Seq(Explode(_), PosExplode(_), Inline(_)).foreach { f =>
+    val generator = f('a)
+    test("Infer filters from " + generator) {
+      val originalQuery = testRelation.generate(generator).analyze
+      val correctAnswer = testRelation
+        .where(IsNotNull('a) && Size('a) > 0)
+        .generate(generator)
+        .analyze
+      val optimized = Optimize.execute(originalQuery)
+      comparePlans(optimized, correctAnswer)
+    }
+
+    test("Don't infer duplicate filters from " + generator) {
+      val originalQuery = testRelation
+        .where(IsNotNull('a) && Size('a) > 0)
+        .generate(generator)
+        .analyze
+      val optimized = Optimize.execute(originalQuery)
+      comparePlans(optimized, originalQuery)
+    }
+
+    test("Don't infer filters from outer " + generator) {
+      val originalQuery = testRelation.generate(generator, outer = true).analyze
+      val optimized = Optimize.execute(originalQuery)
+      comparePlans(optimized, originalQuery)
+    }
+
+    val foldableExplode = f(CreateArray(Seq(
+      CreateStruct(Seq(Literal(0), Literal(1))),
+      CreateStruct(Seq(Literal(2), Literal(3)))
+    )))
+    test("Don't infer filters from " + foldableExplode) {
+      val originalQuery = testRelation.generate(foldableExplode).analyze
+      val optimized = Optimize.execute(originalQuery)
+      comparePlans(optimized, originalQuery)
+    }
+  }
+}

From 1b0875b6924b4f29aa3cdecc26f8103fcae3dc55 Mon Sep 17 00:00:00 2001
From: Denis Pyshev <git@gemelen.net>
Date: Tue, 13 Oct 2020 21:37:26 +0900
Subject: [PATCH 0226/1009] [SPARK-33115][BUILD][DOCS] Fix javadoc errors in
 `kvstore` and `unsafe` modules

### What changes were proposed in this pull request?

Fix Javadoc generation errors in `kvstore` and `unsafe` modules according to error message hints.

### Why are the changes needed?

Fixes `doc` task failures which prevented other tasks successful executions (eg `publishLocal` task depends on `doc` task).

### Does this PR introduce _any_ user-facing change?

No.
Meaning of text in Javadoc is stayed the same.

### How was this patch tested?

Run `build/sbt kvstore/Compile/doc`, `build/sbt unsafe/Compile/doc` and `build/sbt doc` without errors.

Closes #30007 from gemelen/feature/doc-task-fix.

Authored-by: Denis Pyshev <git@gemelen.net>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../java/org/apache/spark/util/kvstore/InMemoryStore.java    | 5 +++--
 .../main/java/org/apache/spark/unsafe/types/UTF8String.java  | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/InMemoryStore.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/InMemoryStore.java
index 42e090bc83ed1..431c7e42774e4 100644
--- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/InMemoryStore.java
+++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/InMemoryStore.java
@@ -164,8 +164,9 @@ public void clear() {
   }
 
   /**
-   * An alias class for the type "ConcurrentHashMap<Comparable<Object>, Boolean>", which is used
-   * as a concurrent hashset for storing natural keys and the boolean value doesn't matter.
+   * An alias class for the type "{@literal ConcurrentHashMap<Comparable<Object>, Boolean>}",
+   * which is used as a concurrent hashset for storing natural keys
+   * and the boolean value doesn't matter.
    */
   private static class NaturalKeys extends ConcurrentHashMap<Comparable<Object>, Boolean> {}
 
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index 43bd7976c5d33..b8dda22240042 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -563,7 +563,7 @@ public UTF8String trim() {
   }
 
   /**
-   * Trims whitespaces (<= ASCII 32) from both ends of this string.
+   * Trims whitespaces ({@literal <=} ASCII 32) from both ends of this string.
    *
    * Note that, this method is the same as java's {@link String#trim}, and different from
    * {@link UTF8String#trim()} which remove only spaces(= ASCII 32) from both ends.

From feee8da14bf506cda30506780fbcf0b8723123f9 Mon Sep 17 00:00:00 2001
From: Chao Sun <sunchao@apple.com>
Date: Tue, 13 Oct 2020 12:44:20 +0000
Subject: [PATCH 0227/1009] [SPARK-32858][SQL] UnwrapCastInBinaryComparison:
 support other numeric types

### What changes were proposed in this pull request?

In SPARK-24994 we implemented unwrapping cast for **integral types**. This extends it to support **numeric types** such as float/double/decimal, so that filters involving these types can be better pushed down to data sources.

Unlike the cases of integral types, conversions between numeric types can result to rounding up or downs. Consider the following case:

```sql
cast(e as double) < 1.9
```

assume type of `e` is short, since 1.9 is not representable in the type, the casting will either truncate or round. Now suppose the literal is truncated, we cannot convert the expression to:

```sql
e < cast(1.9 as short)
```

as in the previous implementation, since if `e` is 1, the original expression evaluates to true, but converted expression will evaluate to false.

To resolve the above, this PR first finds out whether casting from the wider type to the narrower type will result to truncate or round, by comparing a _roundtrip value_ derived from **converting the literal first to the narrower type, and then to the wider type**, versus the original literal value. For instance, in the above, we'll first obtain a roundtrip value via the conversion (double) 1.9 -> (short) 1 -> (double) 1.0, and then compare it against 1.9.

<img width="1153" alt="Screen Shot 2020-09-28 at 3 30 27 PM" src="https://user-images.githubusercontent.com/506679/94492719-bd29e780-019f-11eb-9111-71d6e3d157f7.png">

Now in the case of truncate, we'd convert the original expression to:
```sql
e <= cast(1.9 as short)
```
instead, so that the conversion also is valid when `e` is 1.

For more details, please check [this blog post](https://prestosql.io/blog/2019/05/21/optimizing-the-casts-away.html) by Presto which offers a very good explanation on how it works.

### Why are the changes needed?

For queries such as:
```sql
SELECT * FROM tbl WHERE short_col < 100.5
```
The predicate `short_col < 100.5` can't be pushed down to data sources because it involves casts. This eliminates the cast so these queries can run more efficiently.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Unit tests

Closes #29792 from sunchao/SPARK-32858.

Lead-authored-by: Chao Sun <sunchao@apple.com>
Co-authored-by: Chao Sun <sunchao@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../UnwrapCastInBinaryComparison.scala        | 202 +++++++++++-------
 .../UnwrapCastInBinaryComparisonSuite.scala   | 166 +++++++++++---
 .../UnwrapCastInComparisonEndToEndSuite.scala | 194 +++++++++++++++++
 3 files changed, 454 insertions(+), 108 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/UnwrapCastInComparisonEndToEndSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala
index d0acfe036d443..fe325f00e0baf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala
@@ -35,18 +35,32 @@ import org.apache.spark.sql.types._
  * to be optimized away later and pushed down to data sources.
  *
  * Currently this only handles cases where:
- *   1). `fromType` (of `fromExp`) and `toType` are of integral types (i.e., byte, short, int and
- *     long)
+ *   1). `fromType` (of `fromExp`) and `toType` are of numeric types (i.e., short, int, float,
+ *     decimal, etc)
  *   2). `fromType` can be safely coerced to `toType` without precision loss (e.g., short to int,
  *     int to long, but not long to int)
  *
  * If the above conditions are satisfied, the rule checks to see if the literal `value` is within
  * range `(min, max)`, where `min` and `max` are the minimum and maximum value of `fromType`,
- * respectively. If this is true then it means we can safely cast `value` to `fromType` and thus
+ * respectively. If this is true then it means we may safely cast `value` to `fromType` and thus
  * able to move the cast to the literal side. That is:
  *
  *   `cast(fromExp, toType) op value` ==> `fromExp op cast(value, fromType)`
  *
+ * Note there are some exceptions to the above: if casting from `value` to `fromType` causes
+ * rounding up or down, the above conversion will no longer be valid. Instead, the rule does the
+ * following:
+ *
+ * if casting `value` to `fromType` causes rounding up:
+ *  - `cast(fromExp, toType) > value` ==> `fromExp >= cast(value, fromType)`
+ *  - `cast(fromExp, toType) >= value` ==> `fromExp >= cast(value, fromType)`
+ *  - `cast(fromExp, toType) === value` ==> if(isnull(fromExp), null, false)
+ *  - `cast(fromExp, toType) <=> value` ==> false (if `fromExp` is deterministic)
+ *  - `cast(fromExp, toType) <= value` ==> `fromExp < cast(value, fromType)`
+ *  - `cast(fromExp, toType) < value` ==> `fromExp < cast(value, fromType)`
+ *
+ * Similarly for the case when casting `value` to `fromType` causes rounding down.
+ *
  * If the `value` is not within range `(min, max)`, the rule breaks the scenario into different
  * cases and try to replace each with simpler constructs.
  *
@@ -55,8 +69,6 @@ import org.apache.spark.sql.types._
  *  - `cast(fromExp, toType) >= value` ==> if(isnull(fromExp), null, false)
  *  - `cast(fromExp, toType) === value` ==> if(isnull(fromExp), null, false)
  *  - `cast(fromExp, toType) <=> value` ==> false (if `fromExp` is deterministic)
- *  - `cast(fromExp, toType) <=> value` ==> cast(fromExp, toType) <=> value (if `fromExp` is
- *       non-deterministic)
  *  - `cast(fromExp, toType) <= value` ==> if(isnull(fromExp), null, true)
  *  - `cast(fromExp, toType) < value` ==> if(isnull(fromExp), null, true)
  *
@@ -100,12 +112,12 @@ object UnwrapCastInBinaryComparison extends Rule[LogicalPlan] {
 
       swap(unwrapCast(swap(exp)))
 
-    // In case both sides have integral type, optimize the comparison by removing casts or
+    // In case both sides have numeric type, optimize the comparison by removing casts or
     // moving cast to the literal side.
     case be @ BinaryComparison(
-      Cast(fromExp, toType: IntegralType, _), Literal(value, literalType))
+      Cast(fromExp, toType: NumericType, _), Literal(value, literalType))
         if canImplicitlyCast(fromExp, toType, literalType) =>
-      simplifyIntegralComparison(be, fromExp, toType, value)
+      simplifyNumericComparison(be, fromExp, toType, value)
 
     case _ => exp
   }
@@ -116,82 +128,118 @@ object UnwrapCastInBinaryComparison extends Rule[LogicalPlan] {
    * optimizes the expression by moving the cast to the literal side. Otherwise if result is not
    * true, this replaces the input binary comparison `exp` with simpler expressions.
    */
-  private def simplifyIntegralComparison(
+  private def simplifyNumericComparison(
       exp: BinaryComparison,
       fromExp: Expression,
-      toType: IntegralType,
+      toType: NumericType,
       value: Any): Expression = {
 
     val fromType = fromExp.dataType
-    val (min, max) = getRange(fromType)
-    val (minInToType, maxInToType) = {
-      (Cast(Literal(min), toType).eval(), Cast(Literal(max), toType).eval())
-    }
     val ordering = toType.ordering.asInstanceOf[Ordering[Any]]
-    val minCmp = ordering.compare(value, minInToType)
-    val maxCmp = ordering.compare(value, maxInToType)
+    val range = getRange(fromType)
 
-    if (maxCmp > 0) {
-      exp match {
-        case EqualTo(_, _) | GreaterThan(_, _) | GreaterThanOrEqual(_, _) =>
-          falseIfNotNull(fromExp)
-        case LessThan(_, _) | LessThanOrEqual(_, _) =>
-          trueIfNotNull(fromExp)
-        // make sure the expression is evaluated if it is non-deterministic
-        case EqualNullSafe(_, _) if exp.deterministic =>
-          FalseLiteral
-        case _ => exp
+    if (range.isDefined) {
+      val (min, max) = range.get
+      val (minInToType, maxInToType) = {
+        (Cast(Literal(min), toType).eval(), Cast(Literal(max), toType).eval())
       }
-    } else if (maxCmp == 0) {
-      exp match {
-        case GreaterThan(_, _) =>
-          falseIfNotNull(fromExp)
-        case LessThanOrEqual(_, _) =>
-          trueIfNotNull(fromExp)
-        case LessThan(_, _) =>
-          Not(EqualTo(fromExp, Literal(max, fromType)))
-        case GreaterThanOrEqual(_, _) | EqualTo(_, _) =>
-          EqualTo(fromExp, Literal(max, fromType))
-        case EqualNullSafe(_, _) =>
-          EqualNullSafe(fromExp, Literal(max, fromType))
-        case _ => exp
+      val minCmp = ordering.compare(value, minInToType)
+      val maxCmp = ordering.compare(value, maxInToType)
+
+      if (maxCmp >= 0 || minCmp <= 0) {
+        return if (maxCmp > 0) {
+          exp match {
+            case EqualTo(_, _) | GreaterThan(_, _) | GreaterThanOrEqual(_, _) =>
+              falseIfNotNull(fromExp)
+            case LessThan(_, _) | LessThanOrEqual(_, _) =>
+              trueIfNotNull(fromExp)
+            // make sure the expression is evaluated if it is non-deterministic
+            case EqualNullSafe(_, _) if exp.deterministic =>
+              FalseLiteral
+            case _ => exp
+          }
+        } else if (maxCmp == 0) {
+          exp match {
+            case GreaterThan(_, _) =>
+              falseIfNotNull(fromExp)
+            case LessThanOrEqual(_, _) =>
+              trueIfNotNull(fromExp)
+            case LessThan(_, _) =>
+              Not(EqualTo(fromExp, Literal(max, fromType)))
+            case GreaterThanOrEqual(_, _) | EqualTo(_, _) =>
+              EqualTo(fromExp, Literal(max, fromType))
+            case EqualNullSafe(_, _) =>
+              EqualNullSafe(fromExp, Literal(max, fromType))
+            case _ => exp
+          }
+        } else if (minCmp < 0) {
+          exp match {
+            case GreaterThan(_, _) | GreaterThanOrEqual(_, _) =>
+              trueIfNotNull(fromExp)
+            case LessThan(_, _) | LessThanOrEqual(_, _) | EqualTo(_, _) =>
+              falseIfNotNull(fromExp)
+            // make sure the expression is evaluated if it is non-deterministic
+            case EqualNullSafe(_, _) if exp.deterministic =>
+              FalseLiteral
+            case _ => exp
+          }
+        } else { // minCmp == 0
+          exp match {
+            case LessThan(_, _) =>
+              falseIfNotNull(fromExp)
+            case GreaterThanOrEqual(_, _) =>
+              trueIfNotNull(fromExp)
+            case GreaterThan(_, _) =>
+              Not(EqualTo(fromExp, Literal(min, fromType)))
+            case LessThanOrEqual(_, _) | EqualTo(_, _) =>
+              EqualTo(fromExp, Literal(min, fromType))
+            case EqualNullSafe(_, _) =>
+              EqualNullSafe(fromExp, Literal(min, fromType))
+            case _ => exp
+          }
+        }
       }
-    } else if (minCmp < 0) {
+    }
+
+    // When we reach to this point, it means either there is no min/max for the `fromType` (e.g.,
+    // decimal type), or that the literal `value` is within range `(min, max)`. For these, we
+    // optimize by moving the cast to the literal side.
+
+    val newValue = Cast(Literal(value), fromType).eval()
+    if (newValue == null) {
+      // This means the cast failed, for instance, due to the value is not representable in the
+      // narrower type. In this case we simply return the original expression.
+      return exp
+    }
+    val valueRoundTrip = Cast(Literal(newValue, fromType), toType).eval()
+    val lit = Literal(newValue, fromType)
+    val cmp = ordering.compare(value, valueRoundTrip)
+    if (cmp == 0) {
       exp match {
-        case GreaterThan(_, _) | GreaterThanOrEqual(_, _) =>
-          trueIfNotNull(fromExp)
-        case LessThan(_, _) | LessThanOrEqual(_, _) | EqualTo(_, _) =>
-          falseIfNotNull(fromExp)
-        // make sure the expression is evaluated if it is non-deterministic
-        case EqualNullSafe(_, _) if exp.deterministic =>
-          FalseLiteral
+        case GreaterThan(_, _) => GreaterThan(fromExp, lit)
+        case GreaterThanOrEqual(_, _) => GreaterThanOrEqual(fromExp, lit)
+        case EqualTo(_, _) => EqualTo(fromExp, lit)
+        case EqualNullSafe(_, _) => EqualNullSafe(fromExp, lit)
+        case LessThan(_, _) => LessThan(fromExp, lit)
+        case LessThanOrEqual(_, _) => LessThanOrEqual(fromExp, lit)
         case _ => exp
       }
-    } else if (minCmp == 0) {
+    } else if (cmp < 0) {
+      // This means the literal value is rounded up after casting to `fromType`
       exp match {
-        case LessThan(_, _) =>
-          falseIfNotNull(fromExp)
-        case GreaterThanOrEqual(_, _) =>
-          trueIfNotNull(fromExp)
-        case GreaterThan(_, _) =>
-          Not(EqualTo(fromExp, Literal(min, fromType)))
-        case LessThanOrEqual(_, _) | EqualTo(_, _) =>
-          EqualTo(fromExp, Literal(min, fromType))
-        case EqualNullSafe(_, _) =>
-          EqualNullSafe(fromExp, Literal(min, fromType))
+        case EqualTo(_, _) => falseIfNotNull(fromExp)
+        case EqualNullSafe(_, _) if fromExp.deterministic => FalseLiteral
+        case GreaterThan(_, _) | GreaterThanOrEqual(_, _) => GreaterThanOrEqual(fromExp, lit)
+        case LessThan(_, _) | LessThanOrEqual(_, _) => LessThan(fromExp, lit)
         case _ => exp
       }
     } else {
-      // This means `value` is within range `(min, max)`. Optimize this by moving the cast to the
-      // literal side.
-      val lit = Literal(Cast(Literal(value), fromType).eval(), fromType)
+      // This means the literal value is rounded down after casting to `fromType`
       exp match {
-        case GreaterThan(_, _) => GreaterThan(fromExp, lit)
-        case GreaterThanOrEqual(_, _) => GreaterThanOrEqual(fromExp, lit)
-        case EqualTo(_, _) => EqualTo(fromExp, lit)
-        case EqualNullSafe(_, _) => EqualNullSafe(fromExp, lit)
-        case LessThan(_, _) => LessThan(fromExp, lit)
-        case LessThanOrEqual(_, _) => LessThanOrEqual(fromExp, lit)
+        case EqualTo(_, _) => falseIfNotNull(fromExp)
+        case EqualNullSafe(_, _) => FalseLiteral
+        case GreaterThan(_, _) | GreaterThanOrEqual(_, _) => GreaterThan(fromExp, lit)
+        case LessThan(_, _) | LessThanOrEqual(_, _) => LessThanOrEqual(fromExp, lit)
         case _ => exp
       }
     }
@@ -200,7 +248,7 @@ object UnwrapCastInBinaryComparison extends Rule[LogicalPlan] {
   /**
    * Check if the input `fromExp` can be safely cast to `toType` without any loss of precision,
    * i.e., the conversion is injective. Note this only handles the case when both sides are of
-   * integral type.
+   * numeric type.
    */
   private def canImplicitlyCast(
       fromExp: Expression,
@@ -208,17 +256,19 @@ object UnwrapCastInBinaryComparison extends Rule[LogicalPlan] {
       literalType: DataType): Boolean = {
     toType.sameType(literalType) &&
       !fromExp.foldable &&
-      fromExp.dataType.isInstanceOf[IntegralType] &&
-      toType.isInstanceOf[IntegralType] &&
+      fromExp.dataType.isInstanceOf[NumericType] &&
+      toType.isInstanceOf[NumericType] &&
       Cast.canUpCast(fromExp.dataType, toType)
   }
 
-  private def getRange(dt: DataType): (Any, Any) = dt match {
-    case ByteType => (Byte.MinValue, Byte.MaxValue)
-    case ShortType => (Short.MinValue, Short.MaxValue)
-    case IntegerType => (Int.MinValue, Int.MaxValue)
-    case LongType => (Long.MinValue, Long.MaxValue)
-    case other => throw new IllegalArgumentException(s"Unsupported type: ${other.catalogString}")
+  private[optimizer] def getRange(dt: DataType): Option[(Any, Any)] = dt match {
+    case ByteType => Some((Byte.MinValue, Byte.MaxValue))
+    case ShortType => Some((Short.MinValue, Short.MaxValue))
+    case IntegerType => Some((Int.MinValue, Int.MaxValue))
+    case LongType => Some((Long.MinValue, Long.MaxValue))
+    case FloatType => Some((Float.NegativeInfinity, Float.NaN))
+    case DoubleType => Some((Double.NegativeInfinity, Double.NaN))
+    case _ => None
   }
 
   /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala
index 373c1febd2488..0afb166b80ca5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala
@@ -36,8 +36,10 @@ class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelp
         NullPropagation, UnwrapCastInBinaryComparison) :: Nil
   }
 
-  val testRelation: LocalRelation = LocalRelation('a.short, 'b.float)
+  val testRelation: LocalRelation = LocalRelation('a.short, 'b.float, 'c.decimal(5, 2))
   val f: BoundReference = 'a.short.canBeNull.at(0)
+  val f2: BoundReference = 'b.float.canBeNull.at(1)
+  val f3: BoundReference = 'c.decimal(5, 2).canBeNull.at(2)
 
   test("unwrap casts when literal == max") {
     val v = Short.MaxValue
@@ -47,6 +49,14 @@ class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelp
     assertEquivalent(castInt(f) <=> v.toInt, f <=> v)
     assertEquivalent(castInt(f) <= v.toInt, trueIfNotNull(f))
     assertEquivalent(castInt(f) < v.toInt, f =!= v)
+
+    val d = Float.NaN
+    assertEquivalent(castDouble(f2) > d.toDouble, falseIfNotNull(f2))
+    assertEquivalent(castDouble(f2) >= d.toDouble, f2 === d)
+    assertEquivalent(castDouble(f2) === d.toDouble, f2 === d)
+    assertEquivalent(castDouble(f2) <=> d.toDouble, f2 <=> d)
+    assertEquivalent(castDouble(f2) <= d.toDouble, trueIfNotNull(f2))
+    assertEquivalent(castDouble(f2) < d.toDouble, f2 =!= d)
   }
 
   test("unwrap casts when literal > max") {
@@ -67,6 +77,23 @@ class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelp
     assertEquivalent(castInt(f) <=> v.toInt, f <=> v)
     assertEquivalent(castInt(f) <= v.toInt, f === v)
     assertEquivalent(castInt(f) < v.toInt, falseIfNotNull(f))
+
+    val d = Float.NegativeInfinity
+    assertEquivalent(castDouble(f2) > d.toDouble, f2 =!= d)
+    assertEquivalent(castDouble(f2) >= d.toDouble, trueIfNotNull(f2))
+    assertEquivalent(castDouble(f2) === d.toDouble, f2 === d)
+    assertEquivalent(castDouble(f2) <=> d.toDouble, f2 <=> d)
+    assertEquivalent(castDouble(f2) <= d.toDouble, f2 === d)
+    assertEquivalent(castDouble(f2) < d.toDouble, falseIfNotNull(f2))
+
+    // Double.NegativeInfinity == Float.NegativeInfinity
+    val d2 = Double.NegativeInfinity
+    assertEquivalent(castDouble(f2) > d2, f2 =!= d)
+    assertEquivalent(castDouble(f2) >= d2, trueIfNotNull(f2))
+    assertEquivalent(castDouble(f2) === d2, f2 === d)
+    assertEquivalent(castDouble(f2) <=> d2, f2 <=> d)
+    assertEquivalent(castDouble(f2) <= d2, f2 === d)
+    assertEquivalent(castDouble(f2) < d2, falseIfNotNull(f2))
   }
 
   test("unwrap casts when literal < min") {
@@ -79,13 +106,65 @@ class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelp
     assertEquivalent(castInt(f) < v, falseIfNotNull(f))
   }
 
-  test("unwrap casts when literal is within range (min, max)") {
-    assertEquivalent(castInt(f) > 300, f > 300.toShort)
-    assertEquivalent(castInt(f) >= 500, f >= 500.toShort)
-    assertEquivalent(castInt(f) === 32766, f === 32766.toShort)
-    assertEquivalent(castInt(f) <=> 32766, f <=> 32766.toShort)
-    assertEquivalent(castInt(f) <= -6000, f <= -6000.toShort)
-    assertEquivalent(castInt(f) < -32767, f < -32767.toShort)
+  test("unwrap casts when literal is within range (min, max) or fromType has no range") {
+    Seq(300, 500, 32766, -6000, -32767).foreach(v => {
+      assertEquivalent(castInt(f) > v, f > v.toShort)
+      assertEquivalent(castInt(f) >= v, f >= v.toShort)
+      assertEquivalent(castInt(f) === v, f === v.toShort)
+      assertEquivalent(castInt(f) <=> v, f <=> v.toShort)
+      assertEquivalent(castInt(f) <= v, f <= v.toShort)
+      assertEquivalent(castInt(f) < v, f < v.toShort)
+    })
+
+    Seq(3.14.toFloat.toDouble, -1000.0.toFloat.toDouble,
+      20.0.toFloat.toDouble, -2.414.toFloat.toDouble,
+      Float.MinValue.toDouble, Float.MaxValue.toDouble, Float.PositiveInfinity.toDouble
+    ).foreach(v => {
+      assertEquivalent(castDouble(f2) > v, f2 > v.toFloat)
+      assertEquivalent(castDouble(f2) >= v, f2 >= v.toFloat)
+      assertEquivalent(castDouble(f2) === v, f2 === v.toFloat)
+      assertEquivalent(castDouble(f2) <=> v, f2 <=> v.toFloat)
+      assertEquivalent(castDouble(f2) <= v, f2 <= v.toFloat)
+      assertEquivalent(castDouble(f2) < v, f2 < v.toFloat)
+    })
+
+    Seq(decimal2(100.20), decimal2(-200.50)).foreach(v => {
+      assertEquivalent(castDecimal2(f3) > v, f3 > decimal(v))
+      assertEquivalent(castDecimal2(f3) >= v, f3 >= decimal(v))
+      assertEquivalent(castDecimal2(f3) === v, f3 === decimal(v))
+      assertEquivalent(castDecimal2(f3) <=> v, f3 <=> decimal(v))
+      assertEquivalent(castDecimal2(f3) <= v, f3 <= decimal(v))
+      assertEquivalent(castDecimal2(f3) < v, f3 < decimal(v))
+    })
+  }
+
+  test("unwrap cast when literal is within range (min, max) AND has round up or down") {
+    // Cases for rounding down
+    var doubleValue = 100.6
+    assertEquivalent(castDouble(f) > doubleValue, f > doubleValue.toShort)
+    assertEquivalent(castDouble(f) >= doubleValue, f > doubleValue.toShort)
+    assertEquivalent(castDouble(f) === doubleValue, falseIfNotNull(f))
+    assertEquivalent(castDouble(f) <=> doubleValue, false)
+    assertEquivalent(castDouble(f) <= doubleValue, f <= doubleValue.toShort)
+    assertEquivalent(castDouble(f) < doubleValue, f <= doubleValue.toShort)
+
+    // Cases for rounding up: 3.14 will be rounded to 3.14000010... after casting to float
+    doubleValue = 3.14
+    assertEquivalent(castDouble(f2) > doubleValue, f2 >= doubleValue.toFloat)
+    assertEquivalent(castDouble(f2) >= doubleValue, f2 >= doubleValue.toFloat)
+    assertEquivalent(castDouble(f2) === doubleValue, falseIfNotNull(f2))
+    assertEquivalent(castDouble(f2) <=> doubleValue, false)
+    assertEquivalent(castDouble(f2) <= doubleValue, f2 < doubleValue.toFloat)
+    assertEquivalent(castDouble(f2) < doubleValue, f2 < doubleValue.toFloat)
+
+    // Another case: 400.5678 is rounded up to 400.57
+    val decimalValue = decimal2(400.5678)
+    assertEquivalent(castDecimal2(f3) > decimalValue, f3 >= decimal(decimalValue))
+    assertEquivalent(castDecimal2(f3) >= decimalValue, f3 >= decimal(decimalValue))
+    assertEquivalent(castDecimal2(f3) === decimalValue, falseIfNotNull(f3))
+    assertEquivalent(castDecimal2(f3) <=> decimalValue, false)
+    assertEquivalent(castDecimal2(f3) <= decimalValue, f3 < decimal(decimalValue))
+    assertEquivalent(castDecimal2(f3) < decimalValue, f3 < decimal(decimalValue))
   }
 
   test("unwrap casts when cast is on rhs") {
@@ -100,27 +179,8 @@ class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelp
     assertEquivalent(Literal(30) <= castInt(f), Literal(30.toShort, ShortType) <= f)
   }
 
-  test("unwrap cast should have no effect when input is not integral type") {
-    Seq(
-      castDouble('b) > 42.0,
-      castDouble('b) >= 42.0,
-      castDouble('b) === 42.0,
-      castDouble('b) <=> 42.0,
-      castDouble('b) <= 42.0,
-      castDouble('b) < 42.0,
-      Literal(42.0) > castDouble('b),
-      Literal(42.0) >= castDouble('b),
-      Literal(42.0) === castDouble('b),
-      Literal(42.0) <=> castDouble('b),
-      Literal(42.0) <= castDouble('b),
-      Literal(42.0) < castDouble('b)
-    ).foreach(e =>
-      assertEquivalent(e, e, evaluate = false)
-    )
-  }
-
-  test("unwrap cast should skip when expression is non-deterministic or foldable") {
-    Seq(positiveInt, negativeInt).foreach (v => {
+ test("unwrap cast should skip when expression is non-deterministic or foldable") {
+    Seq(positiveInt, negativeInt).foreach(v => {
       val e = Cast(First(f, ignoreNulls = true), IntegerType) <=> v
       assertEquivalent(e, e, evaluate = false)
       val e2 = Cast(Literal(30.toShort), IntegerType) >= v
@@ -139,13 +199,46 @@ class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelp
     assertEquivalent(castInt(f) < intLit, nullLit)
   }
 
+  test("unwrap casts should skip if downcast failed") {
+    val decimalValue = decimal2(123456.1234)
+    assertEquivalent(castDecimal2(f3) === decimalValue, castDecimal2(f3) === decimalValue)
+  }
+
   test("unwrap cast should skip if cannot coerce type") {
     assertEquivalent(Cast(f, ByteType) > 100.toByte, Cast(f, ByteType) > 100.toByte)
   }
 
-  private def castInt(e: Expression): Expression = Cast(e, IntegerType)
+  test("test getRange()") {
+    assert(Some((Byte.MinValue, Byte.MaxValue)) === getRange(ByteType))
+    assert(Some((Short.MinValue, Short.MaxValue)) === getRange(ShortType))
+    assert(Some((Int.MinValue, Int.MaxValue)) === getRange(IntegerType))
+    assert(Some((Long.MinValue, Long.MaxValue)) === getRange(LongType))
 
+    val floatRange = getRange(FloatType)
+    assert(floatRange.isDefined)
+    val (floatMin, floatMax) = floatRange.get
+    assert(floatMin.isInstanceOf[Float])
+    assert(floatMin.asInstanceOf[Float].isNegInfinity)
+    assert(floatMax.isInstanceOf[Float])
+    assert(floatMax.asInstanceOf[Float].isNaN)
+
+    val doubleRange = getRange(DoubleType)
+    assert(doubleRange.isDefined)
+    val (doubleMin, doubleMax) = doubleRange.get
+    assert(doubleMin.isInstanceOf[Double])
+    assert(doubleMin.asInstanceOf[Double].isNegInfinity)
+    assert(doubleMax.isInstanceOf[Double])
+    assert(doubleMax.asInstanceOf[Double].isNaN)
+
+    assert(getRange(DecimalType(5, 2)).isEmpty)
+  }
+
+  private def castInt(e: Expression): Expression = Cast(e, IntegerType)
   private def castDouble(e: Expression): Expression = Cast(e, DoubleType)
+  private def castDecimal2(e: Expression): Expression = Cast(e, DecimalType(10, 4))
+
+  private def decimal(v: Decimal): Decimal = Decimal(v.toJavaBigDecimal, 5, 2)
+  private def decimal2(v: BigDecimal): Decimal = Decimal(v, 10, 4)
 
   private def assertEquivalent(e1: Expression, e2: Expression, evaluate: Boolean = true): Unit = {
     val plan = testRelation.where(e1).analyze
@@ -154,8 +247,17 @@ class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelp
     comparePlans(actual, expected)
 
     if (evaluate) {
-      Seq(100.toShort, -300.toShort, null).foreach(v => {
-        val row = create_row(v)
+      Seq(
+        (100.toShort, 3.14.toFloat, decimal2(100)),
+        (-300.toShort, 3.1415927.toFloat, decimal2(-3000.50)),
+        (null, Float.NaN, decimal2(12345.6789)),
+        (null, null, null),
+        (Short.MaxValue, Float.PositiveInfinity, decimal2(Short.MaxValue)),
+        (Short.MinValue, Float.NegativeInfinity, decimal2(Short.MinValue)),
+        (0.toShort, Float.MaxValue, decimal2(0)),
+        (0.toShort, Float.MinValue, decimal2(0.01))
+      ).foreach(v => {
+        val row = create_row(v._1, v._2, v._3)
         checkEvaluation(e1, e2.eval(row), row)
       })
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UnwrapCastInComparisonEndToEndSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UnwrapCastInComparisonEndToEndSuite.scala
new file mode 100644
index 0000000000000..e6f0426428bd4
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UnwrapCastInComparisonEndToEndSuite.scala
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.catalyst.expressions.IntegralLiteralTestUtils.{negativeInt, positiveInt}
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.Decimal
+
+class UnwrapCastInComparisonEndToEndSuite extends QueryTest with SharedSparkSession {
+  import testImplicits._
+
+  val t = "test_table"
+
+  test("cases when literal is max") {
+    withTable(t) {
+      Seq[(Integer, java.lang.Short, java.lang.Float)](
+        (1, 100.toShort, 3.14.toFloat),
+        (2, Short.MaxValue, Float.NaN),
+        (3, Short.MinValue, Float.PositiveInfinity),
+        (4, 0.toShort, Float.MaxValue),
+        (5, null, null))
+        .toDF("c1", "c2", "c3").write.saveAsTable(t)
+      val df = spark.table(t)
+
+      val lit = Short.MaxValue.toInt
+      checkAnswer(df.where(s"c2 > $lit").select("c1"), Seq.empty)
+      checkAnswer(df.where(s"c2 >= $lit").select("c1"), Row(2))
+      checkAnswer(df.where(s"c2 == $lit").select("c1"), Row(2))
+      checkAnswer(df.where(s"c2 <=> $lit").select("c1"), Row(2))
+      checkAnswer(df.where(s"c2 != $lit").select("c1"), Row(1) :: Row(3) :: Row(4) :: Nil)
+      checkAnswer(df.where(s"c2 <= $lit").select("c1"), Row(1) :: Row(2) :: Row(3) :: Row(4) :: Nil)
+      checkAnswer(df.where(s"c2 < $lit").select("c1"), Row(1) :: Row(3) :: Row(4) :: Nil)
+
+      checkAnswer(df.where(s"c3 > double('nan')").select("c1"), Seq.empty)
+      checkAnswer(df.where(s"c3 >= double('nan')").select("c1"), Row(2))
+      checkAnswer(df.where(s"c3 == double('nan')").select("c1"), Row(2))
+      checkAnswer(df.where(s"c3 <=> double('nan')").select("c1"), Row(2))
+      checkAnswer(df.where(s"c3 != double('nan')").select("c1"), Row(1) :: Row(3) :: Row(4) :: Nil)
+      checkAnswer(df.where(s"c3 <= double('nan')").select("c1"),
+        Row(1) :: Row(2) :: Row(3) :: Row(4) :: Nil)
+      checkAnswer(df.where(s"c3 < double('nan')").select("c1"), Row(1) :: Row(3) :: Row(4) :: Nil)
+    }
+  }
+
+  test("cases when literal is > max") {
+    withTable(t) {
+      Seq[(Integer, java.lang.Short)](
+        (1, 100.toShort),
+        (2, Short.MaxValue),
+        (3, null))
+        .toDF("c1", "c2").write.saveAsTable(t)
+      val df = spark.table(t)
+      val lit = positiveInt
+      checkAnswer(df.where(s"c2 > $lit").select("c1"), Seq.empty)
+      checkAnswer(df.where(s"c2 >= $lit").select("c1"), Seq.empty)
+      checkAnswer(df.where(s"c2 == $lit").select("c1"), Seq.empty)
+      checkAnswer(df.where(s"c2 <=> $lit").select("c1"), Seq.empty)
+      checkAnswer(df.where(s"c2 != $lit").select("c1"), Row(1) :: Row(2) :: Nil)
+      checkAnswer(df.where(s"c2 <= $lit").select("c1"), Row(1) :: Row(2) :: Nil)
+      checkAnswer(df.where(s"c2 < $lit").select("c1"), Row(1) :: Row(2) :: Nil)
+
+      // No test for float case since NaN is greater than any other numeric value
+    }
+  }
+
+  test("cases when literal is min") {
+    withTable(t) {
+      Seq[(Integer, java.lang.Short, java.lang.Float)](
+        (1, 100.toShort, 3.14.toFloat),
+        (2, Short.MinValue, Float.NegativeInfinity),
+        (3, Short.MaxValue, Float.MinValue),
+        (4, null, null))
+        .toDF("c1", "c2", "c3").write.saveAsTable(t)
+      val df = spark.table(t)
+
+      val lit = Short.MinValue.toInt
+      checkAnswer(df.where(s"c2 > $lit").select("c1"), Row(1) :: Row(3) :: Nil)
+      checkAnswer(df.where(s"c2 >= $lit").select("c1"), Row(1) :: Row(2) :: Row(3) :: Nil)
+      checkAnswer(df.where(s"c2 == $lit").select("c1"), Row(2))
+      checkAnswer(df.where(s"c2 <=> $lit").select("c1"), Row(2))
+      checkAnswer(df.where(s"c2 != $lit").select("c1"), Row(1) :: Row(3) :: Nil)
+      checkAnswer(df.where(s"c2 <= $lit").select("c1"), Row(2))
+      checkAnswer(df.where(s"c2 < $lit").select("c1"), Seq.empty)
+
+      checkAnswer(df.where(s"c3 > double('-inf')").select("c1"), Row(1) :: Row(3) :: Nil)
+      checkAnswer(df.where(s"c3 >= double('-inf')").select("c1"), Row(1) :: Row(2) :: Row(3) :: Nil)
+      checkAnswer(df.where(s"c3 == double('-inf')").select("c1"), Row(2))
+      checkAnswer(df.where(s"c3 <=> double('-inf')").select("c1"), Row(2))
+      checkAnswer(df.where(s"c3 != double('-inf')").select("c1"), Row(1) :: Row(3) :: Nil)
+      checkAnswer(df.where(s"c3 <= double('-inf')").select("c1"), Row(2) :: Nil)
+      checkAnswer(df.where(s"c3 < double('-inf')").select("c1"), Seq.empty)
+    }
+  }
+
+  test("cases when literal is < min") {
+    val t = "test_table"
+    withTable(t) {
+      Seq[(Integer, java.lang.Short)](
+        (1, 100.toShort),
+        (2, Short.MinValue),
+        (3, null))
+        .toDF("c1", "c2").write.saveAsTable(t)
+      val df = spark.table(t)
+
+      val lit = negativeInt
+      checkAnswer(df.where(s"c2 > $lit").select("c1"), Row(1) :: Row(2) :: Nil)
+      checkAnswer(df.where(s"c2 >= $lit").select("c1"), Row(1) :: Row(2) :: Nil)
+      checkAnswer(df.where(s"c2 == $lit").select("c1"), Seq.empty)
+      checkAnswer(df.where(s"c2 <=> $lit").select("c1"), Seq.empty)
+      checkAnswer(df.where(s"c2 != $lit").select("c1"), Row(1) :: Row(2) :: Nil)
+      checkAnswer(df.where(s"c2 <= $lit").select("c1"), Seq.empty)
+      checkAnswer(df.where(s"c2 < $lit").select("c1"), Seq.empty)
+    }
+  }
+
+  test("cases when literal is within range (min, max)") {
+    withTable(t) {
+      Seq((1, 300.toShort), (2, 500.toShort)).toDF("c1", "c2").write.saveAsTable(t)
+      val df = spark.table(t)
+
+      checkAnswer(df.where("c2 < 200").select("c1"), Seq.empty)
+      checkAnswer(df.where("c2 < 400").select("c1"), Row(1) :: Nil)
+      checkAnswer(df.where("c2 < 600").select("c1"), Row(1) :: Row(2) :: Nil)
+
+      checkAnswer(df.where("c2 <= 100").select("c1"), Seq.empty)
+      checkAnswer(df.where("c2 <= 300").select("c1"), Row(1) :: Nil)
+      checkAnswer(df.where("c2 <= 500").select("c1"), Row(1) :: Row(2) :: Nil)
+
+      checkAnswer(df.where("c2 == 100").select("c1"), Seq.empty)
+      checkAnswer(df.where("c2 == 300").select("c1"), Row(1) :: Nil)
+      checkAnswer(df.where("c2 == 500").select("c1"), Row(2) :: Nil)
+
+      checkAnswer(df.where("c2 <=> 100").select("c1"), Seq.empty)
+      checkAnswer(df.where("c2 <=> 300").select("c1"), Row(1) :: Nil)
+      checkAnswer(df.where("c2 <=> 500").select("c1"), Row(2) :: Nil)
+      checkAnswer(df.where("c2 <=> null").select("c1"), Seq.empty)
+
+      checkAnswer(df.where("c2 >= 200").select("c1"), Row(1) :: Row(2) :: Nil)
+      checkAnswer(df.where("c2 >= 400").select("c1"), Row(2) :: Nil)
+      checkAnswer(df.where("c2 >= 600").select("c1"), Seq.empty)
+
+      checkAnswer(df.where("c2 > 100").select("c1"), Row(1) :: Row(2) :: Nil)
+      checkAnswer(df.where("c2 > 300").select("c1"), Row(2) :: Nil)
+      checkAnswer(df.where("c2 > 500").select("c1"), Seq.empty)
+    }
+  }
+
+  test("cases when literal is within range (min, max) and has rounding up or down") {
+    withTable(t) {
+      Seq((1, 100, 3.14.toFloat, decimal(200.12)))
+        .toDF("c1", "c2", "c3", "c4").write.saveAsTable(t)
+      val df = spark.table(t)
+
+      checkAnswer(df.where("c2 > 99.6").select("c1"), Row(1))
+      checkAnswer(df.where("c2 > 100.4").select("c1"), Seq.empty)
+      checkAnswer(df.where("c2 == 100.4").select("c1"), Seq.empty)
+      checkAnswer(df.where("c2 <=> 100.4").select("c1"), Seq.empty)
+      checkAnswer(df.where("c2 < 99.6").select("c1"), Seq.empty)
+      checkAnswer(df.where("c2 < 100.4").select("c1"), Row(1))
+
+      checkAnswer(df.where("c3 >= 3.14").select("c1"), Row(1))
+      // float(3.14) is casted to double(3.140000104904175)
+      checkAnswer(df.where("c3 >= 3.14000010").select("c1"), Row(1))
+      checkAnswer(df.where("c3 == 3.14").select("c1"), Seq.empty)
+      checkAnswer(df.where("c3 <=> 3.14").select("c1"), Seq.empty)
+      checkAnswer(df.where("c3 < 3.14000010").select("c1"), Seq.empty)
+      checkAnswer(df.where("c3 <= 3.14").select("c1"), Seq.empty)
+
+      checkAnswer(df.where("c4 > cast(200.1199 as decimal(10, 4))").select("c1"), Row(1))
+      checkAnswer(df.where("c4 >= cast(200.1201 as decimal(10, 4))").select("c1"), Seq.empty)
+      checkAnswer(df.where("c4 == cast(200.1156 as decimal(10, 4))").select("c1"), Seq.empty)
+      checkAnswer(df.where("c4 <=> cast(200.1201 as decimal(10, 4))").select("c1"), Seq.empty)
+      checkAnswer(df.where("c4 <= cast(200.1201 as decimal(10, 4))").select("c1"), Row(1))
+      checkAnswer(df.where("c4 < cast(200.1159 as decimal(10, 4))").select("c1"), Seq.empty)
+    }
+  }
+
+  private def decimal(v: BigDecimal): Decimal = Decimal(v, 5, 2)
+}

From af3e2f7d58507a47e2d767552209c309637a3170 Mon Sep 17 00:00:00 2001
From: Huaxin Gao <huaxing@us.ibm.com>
Date: Tue, 13 Oct 2020 12:57:54 +0000
Subject: [PATCH 0228/1009] [SPARK-33081][SQL] Support ALTER TABLE in JDBC v2
 Table Catalog: update type and nullability of columns (DB2 dialect)

### What changes were proposed in this pull request?
- Override the default SQL strings in the DB2 Dialect for:

  * ALTER TABLE UPDATE COLUMN TYPE
  * ALTER TABLE UPDATE COLUMN NULLABILITY

- Add new docker integration test suite jdbc/v2/DB2IntegrationSuite.scala

### Why are the changes needed?
In SPARK-24907, we implemented JDBC v2 Table Catalog but it doesn't support some ALTER TABLE at the moment. This PR supports DB2 specific ALTER TABLE.

### Does this PR introduce _any_ user-facing change?
Yes

### How was this patch tested?
By running new integration test suite:

$ ./build/sbt -Pdocker-integration-tests "test-only *.DB2IntegrationSuite"

Closes #29972 from huaxingao/db2_docker.

Authored-by: Huaxin Gao <huaxing@us.ibm.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/jdbc/v2/DB2IntegrationSuite.scala     | 76 ++++++++++++++
 .../sql/jdbc/v2/OracleIntegrationSuite.scala  | 92 +++--------------
 .../apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 98 +++++++++++++++++++
 .../sql/catalyst/parser/DDLParserSuite.scala  |  7 ++
 .../apache/spark/sql/jdbc/DB2Dialect.scala    | 20 ++++
 5 files changed, 216 insertions(+), 77 deletions(-)
 create mode 100644 external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
 create mode 100644 external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
new file mode 100644
index 0000000000000..82f9f978c5da2
--- /dev/null
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc.v2
+
+import java.sql.Connection
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
+import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite}
+import org.apache.spark.sql.types._
+import org.apache.spark.tags.DockerTest
+
+/**
+ * To run this test suite for a specific version (e.g., ibmcom/db2:11.5.4.0):
+ * {{{
+ *   DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0
+ *     ./build/sbt -Pdocker-integration-tests "test-only *DB2IntegrationSuite"
+ * }}}
+ */
+@DockerTest
+class DB2IntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest {
+  override val catalogName: String = "db2"
+  override val db = new DatabaseOnDocker {
+    override val imageName = sys.env.getOrElse("DB2_DOCKER_IMAGE_NAME", "ibmcom/db2:11.5.4.0")
+    override val env = Map(
+      "DB2INST1_PASSWORD" -> "rootpass",
+      "LICENSE" -> "accept",
+      "DBNAME" -> "foo",
+      "ARCHIVE_LOGS" -> "false",
+      "AUTOCONFIG" -> "false"
+    )
+    override val usesIpc = false
+    override val jdbcPort: Int = 50000
+    override val privileged = true
+    override def getJdbcUrl(ip: String, port: Int): String =
+      s"jdbc:db2://$ip:$port/foo:user=db2inst1;password=rootpass;retrieveMessagesFromServerOnGetMessage=true;" //scalastyle:ignore
+  }
+
+  override def sparkConf: SparkConf = super.sparkConf
+    .set("spark.sql.catalog.db2", classOf[JDBCTableCatalog].getName)
+    .set("spark.sql.catalog.db2.url", db.getJdbcUrl(dockerIp, externalPort))
+
+  override def dataPreparation(conn: Connection): Unit = {}
+
+  override def testUpdateColumnType(tbl: String): Unit = {
+    sql(s"CREATE TABLE $tbl (ID INTEGER) USING _")
+    var t = spark.table(tbl)
+    var expectedSchema = new StructType().add("ID", IntegerType)
+    assert(t.schema === expectedSchema)
+    sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE DOUBLE")
+    t = spark.table(tbl)
+    expectedSchema = new StructType().add("ID", DoubleType)
+    assert(t.schema === expectedSchema)
+    // Update column type from DOUBLE to STRING
+    val msg1 = intercept[AnalysisException] {
+      sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE VARCHAR(10)")
+    }.getMessage
+    assert(msg1.contains("Cannot update alt_table field ID: double cannot be cast to varchar"))
+  }
+}
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
index 400459c0ea17b..1b51d43c1d139 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
@@ -23,10 +23,8 @@ import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
 import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite}
-import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 import org.apache.spark.tags.DockerTest
 
@@ -54,7 +52,8 @@ import org.apache.spark.tags.DockerTest
  * It has been validated with 18.4.0 Express Edition.
  */
 @DockerTest
-class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSparkSession {
+class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest {
+  override val catalogName: String = "oracle"
   override val db = new DatabaseOnDocker {
     override val imageName = sys.env("ORACLE_DOCKER_IMAGE_NAME")
     override val env = Map(
@@ -73,80 +72,19 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark
   override val connectionTimeout = timeout(7.minutes)
   override def dataPreparation(conn: Connection): Unit = {}
 
-  test("SPARK-33034: ALTER TABLE ... add new columns") {
-    withTable("oracle.alt_table") {
-      sql("CREATE TABLE oracle.alt_table (ID STRING) USING _")
-      sql("ALTER TABLE oracle.alt_table ADD COLUMNS (C1 STRING, C2 STRING)")
-      var t = spark.table("oracle.alt_table")
-      var expectedSchema = new StructType()
-        .add("ID", StringType)
-        .add("C1", StringType)
-        .add("C2", StringType)
-      assert(t.schema === expectedSchema)
-      sql("ALTER TABLE oracle.alt_table ADD COLUMNS (C3 STRING)")
-      t = spark.table("oracle.alt_table")
-      expectedSchema = expectedSchema.add("C3", StringType)
-      assert(t.schema === expectedSchema)
-      // Add already existing column
-      val msg = intercept[AnalysisException] {
-        sql(s"ALTER TABLE oracle.alt_table ADD COLUMNS (C3 DOUBLE)")
-      }.getMessage
-      assert(msg.contains("Cannot add column, because C3 already exists"))
-    }
-    // Add a column to not existing table
-    val msg = intercept[AnalysisException] {
-      sql(s"ALTER TABLE oracle.not_existing_table ADD COLUMNS (C4 STRING)")
+  override def testUpdateColumnType(tbl: String): Unit = {
+    sql(s"CREATE TABLE $tbl (ID INTEGER) USING _")
+    var t = spark.table(tbl)
+    var expectedSchema = new StructType().add("ID", DecimalType(10, 0))
+    assert(t.schema === expectedSchema)
+    sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE STRING")
+    t = spark.table(tbl)
+    expectedSchema = new StructType().add("ID", StringType)
+    assert(t.schema === expectedSchema)
+    // Update column type from STRING to INTEGER
+    val msg1 = intercept[AnalysisException] {
+      sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE INTEGER")
     }.getMessage
-    assert(msg.contains("Table not found"))
-  }
-
-  test("SPARK-33034: ALTER TABLE ... update column type") {
-    withTable("oracle.alt_table") {
-      sql("CREATE TABLE oracle.alt_table (ID INTEGER) USING _")
-      sql("ALTER TABLE oracle.alt_table ALTER COLUMN id TYPE STRING")
-      val t = spark.table("oracle.alt_table")
-      val expectedSchema = new StructType().add("ID", StringType)
-      assert(t.schema === expectedSchema)
-      // Update column type from STRING to INTEGER
-      val msg1 = intercept[AnalysisException] {
-        sql("ALTER TABLE oracle.alt_table ALTER COLUMN id TYPE INTEGER")
-      }.getMessage
-      assert(msg1.contains("Cannot update alt_table field ID: string cannot be cast to int"))
-      // Update not existing column
-      val msg2 = intercept[AnalysisException] {
-        sql("ALTER TABLE oracle.alt_table ALTER COLUMN bad_column TYPE DOUBLE")
-      }.getMessage
-      assert(msg2.contains("Cannot update missing field bad_column"))
-      // Update column to wrong type
-      val msg3 = intercept[ParseException] {
-        sql("ALTER TABLE oracle.alt_table ALTER COLUMN id TYPE bad_type")
-      }.getMessage
-      assert(msg3.contains("DataType bad_type is not supported"))
-    }
-    // Update column type in not existing table
-    val msg = intercept[AnalysisException] {
-      sql(s"ALTER TABLE oracle.not_existing_table ALTER COLUMN id TYPE DOUBLE")
-    }.getMessage
-    assert(msg.contains("Table not found"))
-  }
-
-  test("SPARK-33034: ALTER TABLE ... update column nullability") {
-    withTable("oracle.alt_table") {
-      sql("CREATE TABLE oracle.alt_table (ID STRING NOT NULL) USING _")
-      sql("ALTER TABLE oracle.alt_table ALTER COLUMN ID DROP NOT NULL")
-      val t = spark.table("oracle.alt_table")
-      val expectedSchema = new StructType().add("ID", StringType, nullable = true)
-      assert(t.schema === expectedSchema)
-      // Update nullability of not existing column
-      val msg = intercept[AnalysisException] {
-        sql("ALTER TABLE oracle.alt_table ALTER COLUMN bad_column DROP NOT NULL")
-      }.getMessage
-      assert(msg.contains("Cannot update missing field bad_column"))
-    }
-    // Update column nullability in not existing table
-    val msg = intercept[AnalysisException] {
-      sql(s"ALTER TABLE oracle.not_existing_table ALTER COLUMN ID DROP NOT NULL")
-    }.getMessage
-    assert(msg.contains("Table not found"))
+    assert(msg1.contains("Cannot update alt_table field ID: string cannot be cast to int"))
   }
 }
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
new file mode 100644
index 0000000000000..384bcc22f27d8
--- /dev/null
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc.v2
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types._
+import org.apache.spark.tags.DockerTest
+
+@DockerTest
+trait V2JDBCTest extends SharedSparkSession {
+  val catalogName: String
+  // dialect specific update column type test
+  def testUpdateColumnType(tbl: String): Unit
+
+  test("SPARK-33034: ALTER TABLE ... add new columns") {
+    withTable(s"$catalogName.alt_table") {
+      sql(s"CREATE TABLE $catalogName.alt_table (ID STRING) USING _")
+      var t = spark.table(s"$catalogName.alt_table")
+      var expectedSchema = new StructType().add("ID", StringType)
+      assert(t.schema === expectedSchema)
+      sql(s"ALTER TABLE $catalogName.alt_table ADD COLUMNS (C1 STRING, C2 STRING)")
+      t = spark.table(s"$catalogName.alt_table")
+      expectedSchema = expectedSchema.add("C1", StringType).add("C2", StringType)
+      assert(t.schema === expectedSchema)
+      sql(s"ALTER TABLE $catalogName.alt_table ADD COLUMNS (C3 STRING)")
+      t = spark.table(s"$catalogName.alt_table")
+      expectedSchema = expectedSchema.add("C3", StringType)
+      assert(t.schema === expectedSchema)
+      // Add already existing column
+      val msg = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $catalogName.alt_table ADD COLUMNS (C3 DOUBLE)")
+      }.getMessage
+      assert(msg.contains("Cannot add column, because C3 already exists"))
+    }
+    // Add a column to not existing table
+    val msg = intercept[AnalysisException] {
+      sql(s"ALTER TABLE $catalogName.not_existing_table ADD COLUMNS (C4 STRING)")
+    }.getMessage
+    assert(msg.contains("Table not found"))
+  }
+
+  test("SPARK-33034: ALTER TABLE ... update column type") {
+    withTable(s"$catalogName.alt_table") {
+      testUpdateColumnType(s"$catalogName.alt_table")
+      // Update not existing column
+      val msg2 = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN bad_column TYPE DOUBLE")
+      }.getMessage
+      assert(msg2.contains("Cannot update missing field bad_column"))
+    }
+    // Update column type in not existing table
+    val msg = intercept[AnalysisException] {
+      sql(s"ALTER TABLE $catalogName.not_existing_table ALTER COLUMN id TYPE DOUBLE")
+    }.getMessage
+    assert(msg.contains("Table not found"))
+  }
+
+  test("SPARK-33034: ALTER TABLE ... update column nullability") {
+    withTable(s"$catalogName.alt_table") {
+      sql(s"CREATE TABLE $catalogName.alt_table (ID STRING NOT NULL) USING _")
+      var t = spark.table(s"$catalogName.alt_table")
+      // nullable is true in the expecteSchema because Spark always sets nullable to true
+      // regardless of the JDBC metadata https://github.com/apache/spark/pull/18445
+      var expectedSchema = new StructType().add("ID", StringType, nullable = true)
+      assert(t.schema === expectedSchema)
+      sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN ID DROP NOT NULL")
+      t = spark.table(s"$catalogName.alt_table")
+      expectedSchema = new StructType().add("ID", StringType, nullable = true)
+      assert(t.schema === expectedSchema)
+      // Update nullability of not existing column
+      val msg = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN bad_column DROP NOT NULL")
+      }.getMessage
+      assert(msg.contains("Cannot update missing field bad_column"))
+    }
+    // Update column nullability in not existing table
+    val msg = intercept[AnalysisException] {
+      sql(s"ALTER TABLE $catalogName.not_existing_table ALTER COLUMN ID DROP NOT NULL")
+    }.getMessage
+    assert(msg.contains("Table not found"))
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 8b8531b2bb3b1..621d416c55457 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -591,6 +591,13 @@ class DDLParserSuite extends AnalysisTest {
         None))
   }
 
+  test("alter table: update column type invalid type") {
+    val msg = intercept[ParseException] {
+      parsePlan("ALTER TABLE table_name ALTER COLUMN a.b.c TYPE bad_type")
+    }.getMessage
+    assert(msg.contains("DataType bad_type is not supported"))
+  }
+
   test("alter table: update column type") {
     comparePlans(
       parsePlan("ALTER TABLE table_name CHANGE COLUMN a.b.c TYPE bigint"),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
index 430ca9edab799..908e03726d887 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
@@ -58,4 +58,24 @@ private object DB2Dialect extends JdbcDialect {
   override def renameTable(oldTable: String, newTable: String): String = {
     s"RENAME TABLE $oldTable TO $newTable"
   }
+
+  // scalastyle:off line.size.limit
+  // See https://www.ibm.com/support/knowledgecenter/en/SSEPGG_11.5.0/com.ibm.db2.luw.sql.ref.doc/doc/r0000888.html
+  // scalastyle:on line.size.limit
+  override def getUpdateColumnTypeQuery(
+      tableName: String,
+      columnName: String,
+      newDataType: String): String =
+    s"ALTER TABLE $tableName ALTER COLUMN $columnName SET DATA TYPE $newDataType"
+
+  // scalastyle:off line.size.limit
+  // See https://www.ibm.com/support/knowledgecenter/en/SSEPGG_11.5.0/com.ibm.db2.luw.sql.ref.doc/doc/r0000888.html
+  // scalastyle:on line.size.limit
+  override def getUpdateColumnNullabilityQuery(
+      tableName: String,
+      columnName: String,
+      isNullable: Boolean): String = {
+    val nullable = if (isNullable) "DROP NOT NULL" else "SET NOT NULL"
+    s"ALTER TABLE $tableName ALTER COLUMN $columnName $nullable"
+  }
 }

From 2b7239edfb02dc74415f6c9e6a675e1ba46ac195 Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Tue, 13 Oct 2020 13:12:17 +0000
Subject: [PATCH 0229/1009] [SPARK-33125][SQL] Improve the error when Lead and
 Lag are not allowed to specify window frame

### What changes were proposed in this pull request?
Except for Postgresql, other data sources (for example: vertica, oracle, redshift, mysql, presto) are not allowed to specify window frame for the Lead and Lag functions.

But the current error message is not clear enough.
`Window Frame $f must match the required frame`
This PR will use the following error message.
`Cannot specify window frame for lead function`

### Why are the changes needed?
Make clear error message.

### Does this PR introduce _any_ user-facing change?
Yes
Users will see the clearer error message.

### How was this patch tested?
Jenkins test.

Closes #30021 from beliefer/SPARK-33125.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: beliefer <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala      | 3 +++
 .../spark/sql/catalyst/analysis/AnalysisErrorSuite.scala       | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 77a6631b250e8..337cf1c0bdc50 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -2974,6 +2974,9 @@ class Analyzer(
    */
   object ResolveWindowFrame extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions {
+      case WindowExpression(wf: OffsetWindowFunction,
+        WindowSpecDefinition(_, _, f: SpecifiedWindowFrame)) if wf.frame != f =>
+        failAnalysis(s"Cannot specify window frame for ${wf.prettyName} function")
       case WindowExpression(wf: WindowFunction, WindowSpecDefinition(_, _, f: SpecifiedWindowFrame))
           if wf.frame != UnspecifiedFrame && wf.frame != f =>
         failAnalysis(s"Window Frame $f must match the required frame ${wf.frame}")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index d3a14e511cdc2..44128c4419951 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -231,7 +231,7 @@ class AnalysisErrorSuite extends AnalysisTest {
           UnresolvedAttribute("a") :: Nil,
           SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
           SpecifiedWindowFrame(RangeFrame, Literal(1), Literal(2)))).as("window")),
-    "window frame" :: "must match the required frame" :: Nil)
+    "Cannot specify window frame for lead function" :: Nil)
 
   errorTest(
     "the offset of nth_value window function is negative or zero",

From dc697a8b598aea922ee6620d87f3ace2f7947231 Mon Sep 17 00:00:00 2001
From: "xuewei.linxuewei" <xuewei.linxuewei@alibaba-inc.com>
Date: Tue, 13 Oct 2020 13:21:45 +0000
Subject: [PATCH 0230/1009] [SPARK-13860][SQL] Change statistical aggregate
 function to return null instead of Double.NaN when divideByZero

### What changes were proposed in this pull request?

As [SPARK-13860](https://issues.apache.org/jira/browse/SPARK-13860) stated, TPCDS Query 39 returns wrong results using SparkSQL. The root cause is that when stddev_samp is applied to a single element set, with TPCDS answer, it return null; as in SparkSQL, it return Double.NaN which caused the wrong result.

Add an extra legacy config to fallback into the NaN logical, and return null by default to align with TPCDS standard.

### Why are the changes needed?

SQL correctness issue.

### Does this PR introduce any user-facing change?
Yes. See sql-migration-guide

In Spark 3.1, statistical aggregation function includes `std`, `stddev`, `stddev_samp`, `variance`, `var_samp`, `skewness`, `kurtosis`, `covar_samp`, `corr` will return `NULL` instead of `Double.NaN` when `DivideByZero` occurs during expression evaluation, for example, when `stddev_samp` applied on a single element set. In Spark version 3.0 and earlier, it will return `Double.NaN` in such case. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.statisticalAggregate` to `true`.

### How was this patch tested?
Updated DataFrameAggregateSuite/DataFrameWindowFunctionsSuite to test both default and legacy behavior.
Adjust DataFrameWindowFunctionsSuite/SQLQueryTestSuite and some R case to update to the default return null behavior.

Closes #29983 from leanken/leanken-SPARK-13860.

Authored-by: xuewei.linxuewei <xuewei.linxuewei@alibaba-inc.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 R/pkg/tests/fulltests/test_sparkSQL.R         |   4 +-
 docs/sql-migration-guide.md                   |   2 +
 .../sql/catalyst/analysis/TypeCoercion.scala  |  18 +-
 .../aggregate/CentralMomentAgg.scala          |  60 ++++-
 .../catalyst/expressions/aggregate/Corr.scala |  22 +-
 .../expressions/aggregate/Covariance.scala    |  32 ++-
 .../apache/spark/sql/internal/SQLConf.scala   |  12 +
 .../postgreSQL/aggregates_part1.sql.out       |   4 +-
 .../results/postgreSQL/window_part4.sql.out   |  32 +--
 .../native/promoteStrings.sql.out             |   8 +-
 .../postgreSQL/udf-aggregates_part1.sql.out   |   4 +-
 .../sql-tests/results/udf/udf-window.sql.out  |   8 +-
 .../sql-tests/results/window.sql.out          |  10 +-
 .../spark/sql/DataFrameAggregateSuite.scala   |  62 +++--
 .../sql/DataFrameWindowFunctionsSuite.scala   | 252 ++++++++++++------
 .../execution/AggregationQuerySuite.scala     |  12 +-
 .../sql/hive/execution/WindowQuerySuite.scala |   9 +-
 17 files changed, 385 insertions(+), 166 deletions(-)

diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 268f5734813ba..077dfc6770d94 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -2147,7 +2147,7 @@ test_that("group by, agg functions", {
   df3 <- agg(gd, age = "stddev")
   expect_is(df3, "SparkDataFrame")
   df3_local <- collect(df3)
-  expect_true(is.nan(df3_local[df3_local$name == "Andy", ][1, 2]))
+  expect_true(is.na(df3_local[df3_local$name == "Andy", ][1, 2]))
 
   df4 <- agg(gd, sumAge = sum(df$age))
   expect_is(df4, "SparkDataFrame")
@@ -2178,7 +2178,7 @@ test_that("group by, agg functions", {
   df7 <- agg(gd2, value = "stddev")
   df7_local <- collect(df7)
   expect_true(abs(df7_local[df7_local$name == "ID1", ][1, 2] - 6.928203) < 1e-6)
-  expect_true(is.nan(df7_local[df7_local$name == "ID2", ][1, 2]))
+  expect_true(is.na(df7_local[df7_local$name == "ID2", ][1, 2]))
 
   mockLines3 <- c("{\"name\":\"Andy\", \"age\":30}",
                   "{\"name\":\"Andy\", \"age\":30}",
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index feff2c7e9f543..c1de58d85d5bf 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -24,6 +24,8 @@ license: |
 
 ## Upgrading from Spark SQL 3.0 to 3.1
 
+  - In Spark 3.1, statistical aggregation function includes `std`, `stddev`, `stddev_samp`, `variance`, `var_samp`, `skewness`, `kurtosis`, `covar_samp`, `corr` will return `NULL` instead of `Double.NaN` when `DivideByZero` occurs during expression evaluation, for example, when `stddev_samp` applied on a single element set. In Spark version 3.0 and earlier, it will return `Double.NaN` in such case. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.statisticalAggregate` to `true`.
+
   - In Spark 3.1, grouping_id() returns long values. In Spark version 3.0 and earlier, this function returns int values. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.integerGroupingId` to `true`.
 
   - In Spark 3.1, SQL UI data adopts the `formatted` mode for the query plan explain results. To restore the behavior before Spark 3.1, you can set `spark.sql.ui.explainMode` to `extended`.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index deaa49bf423b1..f72d9be205df3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -450,14 +450,20 @@ object TypeCoercion {
       case Abs(e @ StringType()) => Abs(Cast(e, DoubleType))
       case Sum(e @ StringType()) => Sum(Cast(e, DoubleType))
       case Average(e @ StringType()) => Average(Cast(e, DoubleType))
-      case StddevPop(e @ StringType()) => StddevPop(Cast(e, DoubleType))
-      case StddevSamp(e @ StringType()) => StddevSamp(Cast(e, DoubleType))
+      case s @ StddevPop(e @ StringType(), _) =>
+        s.withNewChildren(Seq(Cast(e, DoubleType)))
+      case s @ StddevSamp(e @ StringType(), _) =>
+        s.withNewChildren(Seq(Cast(e, DoubleType)))
       case UnaryMinus(e @ StringType()) => UnaryMinus(Cast(e, DoubleType))
       case UnaryPositive(e @ StringType()) => UnaryPositive(Cast(e, DoubleType))
-      case VariancePop(e @ StringType()) => VariancePop(Cast(e, DoubleType))
-      case VarianceSamp(e @ StringType()) => VarianceSamp(Cast(e, DoubleType))
-      case Skewness(e @ StringType()) => Skewness(Cast(e, DoubleType))
-      case Kurtosis(e @ StringType()) => Kurtosis(Cast(e, DoubleType))
+      case v @ VariancePop(e @ StringType(), _) =>
+        v.withNewChildren(Seq(Cast(e, DoubleType)))
+      case v @ VarianceSamp(e @ StringType(), _) =>
+        v.withNewChildren(Seq(Cast(e, DoubleType)))
+      case s @ Skewness(e @ StringType(), _) =>
+        s.withNewChildren(Seq(Cast(e, DoubleType)))
+      case k @ Kurtosis(e @ StringType(), _) =>
+        k.withNewChildren(Seq(Cast(e, DoubleType)))
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
index 53759ca3d9165..2cc9adb5aa06e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions.aggregate
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 /**
@@ -43,7 +44,7 @@ import org.apache.spark.sql.types._
  *
  * @param child to compute central moments of.
  */
-abstract class CentralMomentAgg(child: Expression)
+abstract class CentralMomentAgg(child: Expression, nullOnDivideByZero: Boolean)
   extends DeclarativeAggregate with ImplicitCastInputTypes {
 
   /**
@@ -62,6 +63,13 @@ abstract class CentralMomentAgg(child: Expression)
   protected val m3 = AttributeReference("m3", DoubleType, nullable = false)()
   protected val m4 = AttributeReference("m4", DoubleType, nullable = false)()
 
+  protected def divideByZeroEvalResult: Expression = {
+    if (nullOnDivideByZero) Literal.create(null, DoubleType) else Double.NaN
+  }
+
+  override def stringArgs: Iterator[Any] =
+    super.stringArgs.filter(_.isInstanceOf[Expression])
+
   private def trimHigherOrder[T](expressions: Seq[T]) = expressions.take(momentOrder + 1)
 
   override val aggBufferAttributes = trimHigherOrder(Seq(n, avg, m2, m3, m4))
@@ -145,7 +153,12 @@ abstract class CentralMomentAgg(child: Expression)
   group = "agg_funcs",
   since = "1.6.0")
 // scalastyle:on line.size.limit
-case class StddevPop(child: Expression) extends CentralMomentAgg(child) {
+case class StddevPop(
+    child: Expression,
+    nullOnDivideByZero: Boolean = !SQLConf.get.legacyStatisticalAggregate)
+  extends CentralMomentAgg(child, nullOnDivideByZero) {
+
+  def this(child: Expression) = this(child, !SQLConf.get.legacyStatisticalAggregate)
 
   override protected def momentOrder = 2
 
@@ -168,13 +181,18 @@ case class StddevPop(child: Expression) extends CentralMomentAgg(child) {
   group = "agg_funcs",
   since = "1.6.0")
 // scalastyle:on line.size.limit
-case class StddevSamp(child: Expression) extends CentralMomentAgg(child) {
+case class StddevSamp(
+    child: Expression,
+    nullOnDivideByZero: Boolean = !SQLConf.get.legacyStatisticalAggregate)
+  extends CentralMomentAgg(child, nullOnDivideByZero) {
+
+  def this(child: Expression) = this(child, !SQLConf.get.legacyStatisticalAggregate)
 
   override protected def momentOrder = 2
 
   override val evaluateExpression: Expression = {
     If(n === 0.0, Literal.create(null, DoubleType),
-      If(n === 1.0, Double.NaN, sqrt(m2 / (n - 1.0))))
+      If(n === 1.0, divideByZeroEvalResult, sqrt(m2 / (n - 1.0))))
   }
 
   override def prettyName: String =
@@ -191,7 +209,12 @@ case class StddevSamp(child: Expression) extends CentralMomentAgg(child) {
   """,
   group = "agg_funcs",
   since = "1.6.0")
-case class VariancePop(child: Expression) extends CentralMomentAgg(child) {
+case class VariancePop(
+    child: Expression,
+    nullOnDivideByZero: Boolean = !SQLConf.get.legacyStatisticalAggregate)
+  extends CentralMomentAgg(child, nullOnDivideByZero) {
+
+  def this(child: Expression) = this(child, !SQLConf.get.legacyStatisticalAggregate)
 
   override protected def momentOrder = 2
 
@@ -212,13 +235,18 @@ case class VariancePop(child: Expression) extends CentralMomentAgg(child) {
   """,
   group = "agg_funcs",
   since = "1.6.0")
-case class VarianceSamp(child: Expression) extends CentralMomentAgg(child) {
+case class VarianceSamp(
+    child: Expression,
+    nullOnDivideByZero: Boolean = !SQLConf.get.legacyStatisticalAggregate)
+  extends CentralMomentAgg(child, nullOnDivideByZero) {
+
+  def this(child: Expression) = this(child, !SQLConf.get.legacyStatisticalAggregate)
 
   override protected def momentOrder = 2
 
   override val evaluateExpression: Expression = {
     If(n === 0.0, Literal.create(null, DoubleType),
-      If(n === 1.0, Double.NaN, m2 / (n - 1.0)))
+      If(n === 1.0, divideByZeroEvalResult, m2 / (n - 1.0)))
   }
 
   override def prettyName: String = getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("var_samp")
@@ -235,7 +263,12 @@ case class VarianceSamp(child: Expression) extends CentralMomentAgg(child) {
   """,
   group = "agg_funcs",
   since = "1.6.0")
-case class Skewness(child: Expression) extends CentralMomentAgg(child) {
+case class Skewness(
+    child: Expression,
+    nullOnDivideByZero: Boolean = !SQLConf.get.legacyStatisticalAggregate)
+  extends CentralMomentAgg(child, nullOnDivideByZero) {
+
+  def this(child: Expression) = this(child, !SQLConf.get.legacyStatisticalAggregate)
 
   override def prettyName: String = "skewness"
 
@@ -243,7 +276,7 @@ case class Skewness(child: Expression) extends CentralMomentAgg(child) {
 
   override val evaluateExpression: Expression = {
     If(n === 0.0, Literal.create(null, DoubleType),
-      If(m2 === 0.0, Double.NaN, sqrt(n) * m3 / sqrt(m2 * m2 * m2)))
+      If(m2 === 0.0, divideByZeroEvalResult, sqrt(n) * m3 / sqrt(m2 * m2 * m2)))
   }
 }
 
@@ -258,13 +291,18 @@ case class Skewness(child: Expression) extends CentralMomentAgg(child) {
   """,
   group = "agg_funcs",
   since = "1.6.0")
-case class Kurtosis(child: Expression) extends CentralMomentAgg(child) {
+case class Kurtosis(
+    child: Expression,
+    nullOnDivideByZero: Boolean = !SQLConf.get.legacyStatisticalAggregate)
+  extends CentralMomentAgg(child, nullOnDivideByZero) {
+
+  def this(child: Expression) = this(child, !SQLConf.get.legacyStatisticalAggregate)
 
   override protected def momentOrder = 4
 
   override val evaluateExpression: Expression = {
     If(n === 0.0, Literal.create(null, DoubleType),
-      If(m2 === 0.0, Double.NaN, n * m4 / (m2 * m2) - 3.0))
+      If(m2 === 0.0, divideByZeroEvalResult, n * m4 / (m2 * m2) - 3.0))
   }
 
   override def prettyName: String = "kurtosis"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala
index 9ef05bb5d4fec..737e8cd3ffa41 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Corr.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions.aggregate
 
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 /**
@@ -28,7 +29,7 @@ import org.apache.spark.sql.types._
  * Definition of Pearson correlation can be found at
  * http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient
  */
-abstract class PearsonCorrelation(x: Expression, y: Expression)
+abstract class PearsonCorrelation(x: Expression, y: Expression, nullOnDivideByZero: Boolean)
   extends DeclarativeAggregate with ImplicitCastInputTypes {
 
   override def children: Seq[Expression] = Seq(x, y)
@@ -43,6 +44,13 @@ abstract class PearsonCorrelation(x: Expression, y: Expression)
   protected val xMk = AttributeReference("xMk", DoubleType, nullable = false)()
   protected val yMk = AttributeReference("yMk", DoubleType, nullable = false)()
 
+  protected def divideByZeroEvalResult: Expression = {
+    if (nullOnDivideByZero) Literal.create(null, DoubleType) else Double.NaN
+  }
+
+  override def stringArgs: Iterator[Any] =
+    super.stringArgs.filter(_.isInstanceOf[Expression])
+
   override val aggBufferAttributes: Seq[AttributeReference] = Seq(n, xAvg, yAvg, ck, xMk, yMk)
 
   override val initialValues: Seq[Expression] = Array.fill(6)(Literal(0.0))
@@ -102,12 +110,18 @@ abstract class PearsonCorrelation(x: Expression, y: Expression)
   group = "agg_funcs",
   since = "1.6.0")
 // scalastyle:on line.size.limit
-case class Corr(x: Expression, y: Expression)
-  extends PearsonCorrelation(x, y) {
+case class Corr(
+    x: Expression,
+    y: Expression,
+    nullOnDivideByZero: Boolean = !SQLConf.get.legacyStatisticalAggregate)
+  extends PearsonCorrelation(x, y, nullOnDivideByZero) {
+
+  def this(x: Expression, y: Expression) =
+    this(x, y, !SQLConf.get.legacyStatisticalAggregate)
 
   override val evaluateExpression: Expression = {
     If(n === 0.0, Literal.create(null, DoubleType),
-      If(n === 1.0, Double.NaN, ck / sqrt(xMk * yMk)))
+      If(n === 1.0, divideByZeroEvalResult, ck / sqrt(xMk * yMk)))
   }
 
   override def prettyName: String = "corr"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala
index f03c2f2710a04..7c4d6ded6559e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala
@@ -19,13 +19,14 @@ package org.apache.spark.sql.catalyst.expressions.aggregate
 
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 /**
  * Compute the covariance between two expressions.
  * When applied on empty data (i.e., count is zero), it returns NULL.
  */
-abstract class Covariance(x: Expression, y: Expression)
+abstract class Covariance(x: Expression, y: Expression, nullOnDivideByZero: Boolean)
   extends DeclarativeAggregate with ImplicitCastInputTypes {
 
   override def children: Seq[Expression] = Seq(x, y)
@@ -38,6 +39,13 @@ abstract class Covariance(x: Expression, y: Expression)
   protected val yAvg = AttributeReference("yAvg", DoubleType, nullable = false)()
   protected val ck = AttributeReference("ck", DoubleType, nullable = false)()
 
+  protected def divideByZeroEvalResult: Expression = {
+    if (nullOnDivideByZero) Literal.create(null, DoubleType) else Double.NaN
+  }
+
+  override def stringArgs: Iterator[Any] =
+    super.stringArgs.filter(_.isInstanceOf[Expression])
+
   override val aggBufferAttributes: Seq[AttributeReference] = Seq(n, xAvg, yAvg, ck)
 
   override val initialValues: Seq[Expression] = Array.fill(4)(Literal(0.0))
@@ -88,7 +96,15 @@ abstract class Covariance(x: Expression, y: Expression)
   """,
   group = "agg_funcs",
   since = "2.0.0")
-case class CovPopulation(left: Expression, right: Expression) extends Covariance(left, right) {
+case class CovPopulation(
+    left: Expression,
+    right: Expression,
+    nullOnDivideByZero: Boolean = !SQLConf.get.legacyStatisticalAggregate)
+  extends Covariance(left, right, nullOnDivideByZero) {
+
+  def this(left: Expression, right: Expression) =
+    this(left, right, !SQLConf.get.legacyStatisticalAggregate)
+
   override val evaluateExpression: Expression = {
     If(n === 0.0, Literal.create(null, DoubleType), ck / n)
   }
@@ -105,10 +121,18 @@ case class CovPopulation(left: Expression, right: Expression) extends Covariance
   """,
   group = "agg_funcs",
   since = "2.0.0")
-case class CovSample(left: Expression, right: Expression) extends Covariance(left, right) {
+case class CovSample(
+    left: Expression,
+    right: Expression,
+    nullOnDivideByZero: Boolean = !SQLConf.get.legacyStatisticalAggregate)
+  extends Covariance(left, right, nullOnDivideByZero) {
+
+  def this(left: Expression, right: Expression) =
+    this(left, right, !SQLConf.get.legacyStatisticalAggregate)
+
   override val evaluateExpression: Expression = {
     If(n === 0.0, Literal.create(null, DoubleType),
-      If(n === 1.0, Double.NaN, ck / (n - 1.0)))
+      If(n === 1.0, divideByZeroEvalResult, ck / (n - 1.0)))
   }
   override def prettyName: String = "covar_samp"
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 99c10b38c53b1..d4c7dd7f3160c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2342,6 +2342,16 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val LEGACY_STATISTICAL_AGGREGATE =
+    buildConf("spark.sql.legacy.statisticalAggregate")
+      .internal()
+      .doc("When set to true, statistical aggregate function returns Double.NaN " +
+        "if divide by zero occurred during expression evaluation, otherwise, it returns null. " +
+        "Before version 3.1.0, it returns NaN in divideByZero case by default.")
+      .version("3.1.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val TRUNCATE_TABLE_IGNORE_PERMISSION_ACL =
     buildConf("spark.sql.truncateTable.ignorePermissionAcl.enabled")
       .internal()
@@ -3364,6 +3374,8 @@ class SQLConf extends Serializable with Logging {
   def allowNegativeScaleOfDecimalEnabled: Boolean =
     getConf(SQLConf.LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED)
 
+  def legacyStatisticalAggregate: Boolean = getConf(SQLConf.LEGACY_STATISTICAL_AGGREGATE)
+
   def truncateTableIgnorePermissionAcl: Boolean =
     getConf(SQLConf.TRUNCATE_TABLE_IGNORE_PERMISSION_ACL)
 
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out
index f7bba96738eab..212365f92946c 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out
@@ -143,7 +143,7 @@ SELECT var_pop(1.0), var_samp(2.0)
 -- !query schema
 struct<var_pop(CAST(1.0 AS DOUBLE)):double,var_samp(CAST(2.0 AS DOUBLE)):double>
 -- !query output
-0.0	NaN
+0.0	NULL
 
 
 -- !query
@@ -151,7 +151,7 @@ SELECT stddev_pop(CAST(3.0 AS Decimal(38,0))), stddev_samp(CAST(4.0 AS Decimal(3
 -- !query schema
 struct<stddev_pop(CAST(CAST(3.0 AS DECIMAL(38,0)) AS DOUBLE)):double,stddev_samp(CAST(CAST(4.0 AS DECIMAL(38,0)) AS DOUBLE)):double>
 -- !query output
-0.0	NaN
+0.0	NULL
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out
index 4dd4712345a89..f7439d873b4eb 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out
@@ -195,7 +195,7 @@ struct<var_samp(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN
 18491.666666666668
 27129.999999999996
 8450.0
-NaN
+NULL
 
 
 -- !query
@@ -208,7 +208,7 @@ struct<var_samp(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN
 18491.666666666668
 27129.999999999996
 8450.0
-NaN
+NULL
 
 
 -- !query
@@ -221,7 +221,7 @@ struct<var_samp(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN
 18491.666666666668
 27129.999999999996
 8450.0
-NaN
+NULL
 
 
 -- !query
@@ -234,7 +234,7 @@ struct<var_samp(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN
 18491.666666666668
 27129.999999999996
 8450.0
-NaN
+NULL
 
 
 -- !query
@@ -247,7 +247,7 @@ struct<variance(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN
 18491.666666666668
 27129.999999999996
 8450.0
-NaN
+NULL
 
 
 -- !query
@@ -260,7 +260,7 @@ struct<variance(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN
 18491.666666666668
 27129.999999999996
 8450.0
-NaN
+NULL
 
 
 -- !query
@@ -273,7 +273,7 @@ struct<variance(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN
 18491.666666666668
 27129.999999999996
 8450.0
-NaN
+NULL
 
 
 -- !query
@@ -286,7 +286,7 @@ struct<variance(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN
 18491.666666666668
 27129.999999999996
 8450.0
-NaN
+NULL
 
 
 -- !query
@@ -356,7 +356,7 @@ struct<stddev_samp(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETW
 164.7118696390761
 164.7118696390761
 91.92388155425118
-NaN
+NULL
 
 
 -- !query
@@ -370,7 +370,7 @@ struct<stddev_samp(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETW
 164.7118696390761
 164.7118696390761
 91.92388155425118
-NaN
+NULL
 
 
 -- !query
@@ -384,7 +384,7 @@ struct<stddev_samp(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETW
 164.7118696390761
 164.7118696390761
 91.92388155425118
-NaN
+NULL
 
 
 -- !query
@@ -398,7 +398,7 @@ struct<stddev_samp(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETW
 164.7118696390761
 164.7118696390761
 91.92388155425118
-NaN
+NULL
 
 
 -- !query
@@ -412,7 +412,7 @@ struct<stddev(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN C
 164.7118696390761
 164.7118696390761
 91.92388155425118
-NaN
+NULL
 
 
 -- !query
@@ -426,7 +426,7 @@ struct<stddev(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN C
 164.7118696390761
 164.7118696390761
 91.92388155425118
-NaN
+NULL
 
 
 -- !query
@@ -440,7 +440,7 @@ struct<stddev(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN C
 164.7118696390761
 164.7118696390761
 91.92388155425118
-NaN
+NULL
 
 
 -- !query
@@ -454,7 +454,7 @@ struct<stddev(CAST(n AS DOUBLE)) OVER (ORDER BY i ASC NULLS FIRST ROWS BETWEEN C
 164.7118696390761
 164.7118696390761
 91.92388155425118
-NaN
+NULL
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out
index b8c190beeae19..08941f2890cf2 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out
@@ -2526,7 +2526,7 @@ SELECT stddev_samp('1') FROM t
 -- !query schema
 struct<stddev_samp(CAST(1 AS DOUBLE)):double>
 -- !query output
-NaN
+NULL
 
 
 -- !query
@@ -2558,7 +2558,7 @@ SELECT var_samp('1') FROM t
 -- !query schema
 struct<var_samp(CAST(1 AS DOUBLE)):double>
 -- !query output
-NaN
+NULL
 
 
 -- !query
@@ -2566,7 +2566,7 @@ SELECT skewness('1') FROM t
 -- !query schema
 struct<skewness(CAST(1 AS DOUBLE)):double>
 -- !query output
-NaN
+NULL
 
 
 -- !query
@@ -2574,4 +2574,4 @@ SELECT kurtosis('1') FROM t
 -- !query schema
 struct<kurtosis(CAST(1 AS DOUBLE)):double>
 -- !query output
-NaN
+NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out
index 76637bf578e6f..a428a7a9c923b 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out
@@ -143,7 +143,7 @@ SELECT udf(var_pop(1.0)), var_samp(udf(2.0))
 -- !query schema
 struct<CAST(udf(ansi_cast(var_pop(ansi_cast(1.0 as double)) as string)) AS DOUBLE):double,var_samp(CAST(CAST(udf(ansi_cast(2.0 as string)) AS DECIMAL(2,1)) AS DOUBLE)):double>
 -- !query output
-0.0	NaN
+0.0	NULL
 
 
 -- !query
@@ -151,7 +151,7 @@ SELECT stddev_pop(udf(CAST(3.0 AS Decimal(38,0)))), stddev_samp(CAST(udf(4.0) AS
 -- !query schema
 struct<stddev_pop(CAST(CAST(udf(ansi_cast(ansi_cast(3.0 as decimal(38,0)) as string)) AS DECIMAL(38,0)) AS DOUBLE)):double,stddev_samp(CAST(CAST(CAST(udf(ansi_cast(4.0 as string)) AS DECIMAL(2,1)) AS DECIMAL(38,0)) AS DOUBLE)):double>
 -- !query output
-0.0	NaN
+0.0	NULL
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out
index a84070535b658..928b9ebb12364 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out
@@ -289,13 +289,13 @@ ORDER BY cate, udf(val)
 struct<CAST(udf(cast(val as string)) AS INT):int,cate:string,max:int,min:int,min:int,count:bigint,sum:bigint,avg:double,stddev:double,first_value:int,first_value_ignore_null:int,first_value_contain_null:int,last_value:int,last_value_ignore_null:int,last_value_contain_null:int,rank:int,dense_rank:int,cume_dist:double,percent_rank:double,ntile:int,row_number:int,var_pop:double,var_samp:double,approx_count_distinct:bigint,covar_pop:double,corr:double,stddev_samp:double,stddev_pop:double,collect_list:array<int>,collect_set:array<int>,skewness:double,kurtosis:double>
 -- !query output
 NULL	NULL	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.5	0.0	1	1	NULL	NULL	0	NULL	NULL	NULL	NULL	[]	[]	NULL	NULL
-3	NULL	3	3	3	1	3	3.0	NaN	NULL	3	NULL	3	3	3	2	2	1.0	1.0	2	2	0.0	NaN	1	0.0	NaN	NaN	0.0	[3]	[3]	NaN	NaN
-NULL	a	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.25	0.0	1	1	NULL	NULL	0	NULL	NULL	NULL	NULL	[]	[]	NaN	NaN
+3	NULL	3	3	3	1	3	3.0	NULL	NULL	3	NULL	3	3	3	2	2	1.0	1.0	2	2	0.0	NULL	1	0.0	NULL	NULL	0.0	[3]	[3]	NULL	NULL
+NULL	a	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.25	0.0	1	1	NULL	NULL	0	NULL	NULL	NULL	NULL	[]	[]	NULL	NULL
 1	a	1	1	1	2	2	1.0	0.0	NULL	1	NULL	1	1	1	2	2	0.75	0.3333333333333333	1	2	0.0	0.0	1	0.0	NULL	0.0	0.0	[1,1]	[1]	0.7071067811865476	-1.5
 1	a	1	1	1	2	2	1.0	0.0	NULL	1	NULL	1	1	1	2	2	0.75	0.3333333333333333	2	3	0.0	0.0	1	0.0	NULL	0.0	0.0	[1,1]	[1]	0.7071067811865476	-1.5
 2	a	2	1	1	3	4	1.3333333333333333	0.5773502691896258	NULL	1	NULL	2	2	2	4	3	1.0	1.0	2	4	0.22222222222222224	0.33333333333333337	2	4.772185885555555E8	1.0	0.5773502691896258	0.4714045207910317	[1,1,2]	[1,2]	1.1539890888012805	-0.6672217220327235
-1	b	1	1	1	1	1	1.0	NaN	1	1	1	1	1	1	1	1	0.3333333333333333	0.0	1	1	0.0	NaN	1	NULL	NULL	NaN	0.0	[1]	[1]	NaN	NaN
-2	b	2	1	1	2	3	1.5	0.7071067811865476	1	1	1	2	2	2	2	2	0.6666666666666666	0.5	1	2	0.25	0.5	2	0.0	NaN	0.7071067811865476	0.5	[1,2]	[1,2]	0.0	-2.0000000000000013
+1	b	1	1	1	1	1	1.0	NULL	1	1	1	1	1	1	1	1	0.3333333333333333	0.0	1	1	0.0	NULL	1	NULL	NULL	NULL	0.0	[1]	[1]	NULL	NULL
+2	b	2	1	1	2	3	1.5	0.7071067811865476	1	1	1	2	2	2	2	2	0.6666666666666666	0.5	1	2	0.25	0.5	2	0.0	NULL	0.7071067811865476	0.5	[1,2]	[1,2]	0.0	-2.0000000000000013
 3	b	3	1	1	3	6	2.0	1.0	1	1	1	3	3	3	3	3	1.0	1.0	2	3	0.6666666666666666	1.0	3	5.3687091175E8	1.0	1.0	0.816496580927726	[1,2,3]	[1,2,3]	0.7057890433107311	-1.4999999999999984
 
 
diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out
index a8875fd449bad..028dd7a12d25d 100644
--- a/sql/core/src/test/resources/sql-tests/results/window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 29
+-- Number of queries: 32
 
 
 -- !query
@@ -313,13 +313,13 @@ ORDER BY cate, val
 struct<val:int,cate:string,max:int,min:int,min:int,count:bigint,sum:bigint,avg:double,stddev:double,first_value:int,first_value_ignore_null:int,first_value_contain_null:int,last_value:int,last_value_ignore_null:int,last_value_contain_null:int,rank:int,dense_rank:int,cume_dist:double,percent_rank:double,ntile:int,row_number:int,var_pop:double,var_samp:double,approx_count_distinct:bigint,covar_pop:double,corr:double,stddev_samp:double,stddev_pop:double,collect_list:array<int>,collect_set:array<int>,skewness:double,kurtosis:double>
 -- !query output
 NULL	NULL	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.5	0.0	1	1	NULL	NULL	0	NULL	NULL	NULL	NULL	[]	[]	NULL	NULL
-3	NULL	3	3	3	1	3	3.0	NaN	NULL	3	NULL	3	3	3	2	2	1.0	1.0	2	2	0.0	NaN	1	0.0	NaN	NaN	0.0	[3]	[3]	NaN	NaN
-NULL	a	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.25	0.0	1	1	NULL	NULL	0	NULL	NULL	NULL	NULL	[]	[]	NaN	NaN
+3	NULL	3	3	3	1	3	3.0	NULL	NULL	3	NULL	3	3	3	2	2	1.0	1.0	2	2	0.0	NULL	1	0.0	NULL	NULL	0.0	[3]	[3]	NULL	NULL
+NULL	a	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.25	0.0	1	1	NULL	NULL	0	NULL	NULL	NULL	NULL	[]	[]	NULL	NULL
 1	a	1	1	1	2	2	1.0	0.0	NULL	1	NULL	1	1	1	2	2	0.75	0.3333333333333333	1	2	0.0	0.0	1	0.0	NULL	0.0	0.0	[1,1]	[1]	0.7071067811865476	-1.5
 1	a	1	1	1	2	2	1.0	0.0	NULL	1	NULL	1	1	1	2	2	0.75	0.3333333333333333	2	3	0.0	0.0	1	0.0	NULL	0.0	0.0	[1,1]	[1]	0.7071067811865476	-1.5
 2	a	2	1	1	3	4	1.3333333333333333	0.5773502691896258	NULL	1	NULL	2	2	2	4	3	1.0	1.0	2	4	0.22222222222222224	0.33333333333333337	2	4.772185885555555E8	1.0	0.5773502691896258	0.4714045207910317	[1,1,2]	[1,2]	1.1539890888012805	-0.6672217220327235
-1	b	1	1	1	1	1	1.0	NaN	1	1	1	1	1	1	1	1	0.3333333333333333	0.0	1	1	0.0	NaN	1	NULL	NULL	NaN	0.0	[1]	[1]	NaN	NaN
-2	b	2	1	1	2	3	1.5	0.7071067811865476	1	1	1	2	2	2	2	2	0.6666666666666666	0.5	1	2	0.25	0.5	2	0.0	NaN	0.7071067811865476	0.5	[1,2]	[1,2]	0.0	-2.0000000000000013
+1	b	1	1	1	1	1	1.0	NULL	1	1	1	1	1	1	1	1	0.3333333333333333	0.0	1	1	0.0	NULL	1	NULL	NULL	NULL	0.0	[1]	[1]	NULL	NULL
+2	b	2	1	1	2	3	1.5	0.7071067811865476	1	1	1	2	2	2	2	2	0.6666666666666666	0.5	1	2	0.25	0.5	2	0.0	NULL	0.7071067811865476	0.5	[1,2]	[1,2]	0.0	-2.0000000000000013
 3	b	3	1	1	3	6	2.0	1.0	1	1	1	3	3	3	3	3	1.0	1.0	2	3	0.6666666666666666	1.0	3	5.3687091175E8	1.0	1.0	0.816496580927726	[1,2,3]	[1,2,3]	0.7057890433107311	-1.4999999999999984
 
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 353444b664412..d4e64aa03df0e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -456,25 +456,51 @@ class DataFrameAggregateSuite extends QueryTest
   }
 
   test("zero moments") {
-    val input = Seq((1, 2)).toDF("a", "b")
-    checkAnswer(
-      input.agg(stddev($"a"), stddev_samp($"a"), stddev_pop($"a"), variance($"a"),
-        var_samp($"a"), var_pop($"a"), skewness($"a"), kurtosis($"a")),
-      Row(Double.NaN, Double.NaN, 0.0, Double.NaN, Double.NaN, 0.0,
-        Double.NaN, Double.NaN))
+    withSQLConf(SQLConf.LEGACY_STATISTICAL_AGGREGATE.key -> "true") {
+      val input = Seq((1, 2)).toDF("a", "b")
+      checkAnswer(
+        input.agg(stddev($"a"), stddev_samp($"a"), stddev_pop($"a"), variance($"a"),
+          var_samp($"a"), var_pop($"a"), skewness($"a"), kurtosis($"a")),
+        Row(Double.NaN, Double.NaN, 0.0, Double.NaN, Double.NaN, 0.0,
+          Double.NaN, Double.NaN))
 
-    checkAnswer(
-      input.agg(
-        expr("stddev(a)"),
-        expr("stddev_samp(a)"),
-        expr("stddev_pop(a)"),
-        expr("variance(a)"),
-        expr("var_samp(a)"),
-        expr("var_pop(a)"),
-        expr("skewness(a)"),
-        expr("kurtosis(a)")),
-      Row(Double.NaN, Double.NaN, 0.0, Double.NaN, Double.NaN, 0.0,
-        Double.NaN, Double.NaN))
+      checkAnswer(
+        input.agg(
+          expr("stddev(a)"),
+          expr("stddev_samp(a)"),
+          expr("stddev_pop(a)"),
+          expr("variance(a)"),
+          expr("var_samp(a)"),
+          expr("var_pop(a)"),
+          expr("skewness(a)"),
+          expr("kurtosis(a)")),
+        Row(Double.NaN, Double.NaN, 0.0, Double.NaN, Double.NaN, 0.0,
+          Double.NaN, Double.NaN))
+    }
+  }
+
+  test("SPARK-13860: zero moments LEGACY_STATISTICAL_AGGREGATE off") {
+    withSQLConf(SQLConf.LEGACY_STATISTICAL_AGGREGATE.key -> "false") {
+      val input = Seq((1, 2)).toDF("a", "b")
+      checkAnswer(
+        input.agg(stddev($"a"), stddev_samp($"a"), stddev_pop($"a"), variance($"a"),
+          var_samp($"a"), var_pop($"a"), skewness($"a"), kurtosis($"a")),
+        Row(null, null, 0.0, null, null, 0.0,
+          null, null))
+
+      checkAnswer(
+        input.agg(
+          expr("stddev(a)"),
+          expr("stddev_samp(a)"),
+          expr("stddev_pop(a)"),
+          expr("variance(a)"),
+          expr("var_samp(a)"),
+          expr("var_pop(a)"),
+          expr("skewness(a)"),
+          expr("kurtosis(a)")),
+        Row(null, null, 0.0, null, null, 0.0,
+          null, null))
+    }
   }
 
   test("null moments") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index c5dcdc44cc64f..616e333033aa9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -94,89 +94,187 @@ class DataFrameWindowFunctionsSuite extends QueryTest
   }
 
   test("corr, covar_pop, stddev_pop functions in specific window") {
-    val df = Seq(
-      ("a", "p1", 10.0, 20.0),
-      ("b", "p1", 20.0, 10.0),
-      ("c", "p2", 20.0, 20.0),
-      ("d", "p2", 20.0, 20.0),
-      ("e", "p3", 0.0, 0.0),
-      ("f", "p3", 6.0, 12.0),
-      ("g", "p3", 6.0, 12.0),
-      ("h", "p3", 8.0, 16.0),
-      ("i", "p4", 5.0, 5.0)).toDF("key", "partitionId", "value1", "value2")
-    checkAnswer(
-      df.select(
-        $"key",
-        corr("value1", "value2").over(Window.partitionBy("partitionId")
-          .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)),
-        covar_pop("value1", "value2")
-          .over(Window.partitionBy("partitionId")
+    withSQLConf(SQLConf.LEGACY_STATISTICAL_AGGREGATE.key -> "true") {
+      val df = Seq(
+        ("a", "p1", 10.0, 20.0),
+        ("b", "p1", 20.0, 10.0),
+        ("c", "p2", 20.0, 20.0),
+        ("d", "p2", 20.0, 20.0),
+        ("e", "p3", 0.0, 0.0),
+        ("f", "p3", 6.0, 12.0),
+        ("g", "p3", 6.0, 12.0),
+        ("h", "p3", 8.0, 16.0),
+        ("i", "p4", 5.0, 5.0)).toDF("key", "partitionId", "value1", "value2")
+      checkAnswer(
+        df.select(
+          $"key",
+          corr("value1", "value2").over(Window.partitionBy("partitionId")
+            .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)),
+          covar_pop("value1", "value2")
+            .over(Window.partitionBy("partitionId")
+              .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)),
+          var_pop("value1")
+            .over(Window.partitionBy("partitionId")
+              .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)),
+          stddev_pop("value1")
+            .over(Window.partitionBy("partitionId")
+              .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)),
+          var_pop("value2")
+            .over(Window.partitionBy("partitionId")
+              .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)),
+          stddev_pop("value2")
+            .over(Window.partitionBy("partitionId")
+              .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing))),
+
+        // As stddev_pop(expr) = sqrt(var_pop(expr))
+        // the "stddev_pop" column can be calculated from the "var_pop" column.
+        //
+        // As corr(expr1, expr2) = covar_pop(expr1, expr2) / (stddev_pop(expr1) * stddev_pop(expr2))
+        // the "corr" column can be calculated from the "covar_pop" and the two "stddev_pop" columns
+        Seq(
+          Row("a", -1.0, -25.0, 25.0, 5.0, 25.0, 5.0),
+          Row("b", -1.0, -25.0, 25.0, 5.0, 25.0, 5.0),
+          Row("c", null, 0.0, 0.0, 0.0, 0.0, 0.0),
+          Row("d", null, 0.0, 0.0, 0.0, 0.0, 0.0),
+          Row("e", 1.0, 18.0, 9.0, 3.0, 36.0, 6.0),
+          Row("f", 1.0, 18.0, 9.0, 3.0, 36.0, 6.0),
+          Row("g", 1.0, 18.0, 9.0, 3.0, 36.0, 6.0),
+          Row("h", 1.0, 18.0, 9.0, 3.0, 36.0, 6.0),
+          Row("i", Double.NaN, 0.0, 0.0, 0.0, 0.0, 0.0)))
+    }
+  }
+
+  test("SPARK-13860: " +
+    "corr, covar_pop, stddev_pop functions in specific window " +
+    "LEGACY_STATISTICAL_AGGREGATE off") {
+    withSQLConf(SQLConf.LEGACY_STATISTICAL_AGGREGATE.key -> "false") {
+      val df = Seq(
+        ("a", "p1", 10.0, 20.0),
+        ("b", "p1", 20.0, 10.0),
+        ("c", "p2", 20.0, 20.0),
+        ("d", "p2", 20.0, 20.0),
+        ("e", "p3", 0.0, 0.0),
+        ("f", "p3", 6.0, 12.0),
+        ("g", "p3", 6.0, 12.0),
+        ("h", "p3", 8.0, 16.0),
+        ("i", "p4", 5.0, 5.0)).toDF("key", "partitionId", "value1", "value2")
+      checkAnswer(
+        df.select(
+          $"key",
+          corr("value1", "value2").over(Window.partitionBy("partitionId")
+            .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)),
+          covar_pop("value1", "value2")
+            .over(Window.partitionBy("partitionId")
+              .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)),
+          var_pop("value1")
+            .over(Window.partitionBy("partitionId")
+              .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)),
+          stddev_pop("value1")
+            .over(Window.partitionBy("partitionId")
+              .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)),
+          var_pop("value2")
+            .over(Window.partitionBy("partitionId")
+              .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)),
+          stddev_pop("value2")
+            .over(Window.partitionBy("partitionId")
+              .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing))),
+
+        // As stddev_pop(expr) = sqrt(var_pop(expr))
+        // the "stddev_pop" column can be calculated from the "var_pop" column.
+        //
+        // As corr(expr1, expr2) = covar_pop(expr1, expr2) / (stddev_pop(expr1) * stddev_pop(expr2))
+        // the "corr" column can be calculated from the "covar_pop" and the two "stddev_pop" columns
+        Seq(
+          Row("a", -1.0, -25.0, 25.0, 5.0, 25.0, 5.0),
+          Row("b", -1.0, -25.0, 25.0, 5.0, 25.0, 5.0),
+          Row("c", null, 0.0, 0.0, 0.0, 0.0, 0.0),
+          Row("d", null, 0.0, 0.0, 0.0, 0.0, 0.0),
+          Row("e", 1.0, 18.0, 9.0, 3.0, 36.0, 6.0),
+          Row("f", 1.0, 18.0, 9.0, 3.0, 36.0, 6.0),
+          Row("g", 1.0, 18.0, 9.0, 3.0, 36.0, 6.0),
+          Row("h", 1.0, 18.0, 9.0, 3.0, 36.0, 6.0),
+          Row("i", null, 0.0, 0.0, 0.0, 0.0, 0.0)))
+    }
+  }
+
+  test("covar_samp, var_samp (variance), stddev_samp (stddev) functions in specific window") {
+    withSQLConf(SQLConf.LEGACY_STATISTICAL_AGGREGATE.key -> "true") {
+      val df = Seq(
+        ("a", "p1", 10.0, 20.0),
+        ("b", "p1", 20.0, 10.0),
+        ("c", "p2", 20.0, 20.0),
+        ("d", "p2", 20.0, 20.0),
+        ("e", "p3", 0.0, 0.0),
+        ("f", "p3", 6.0, 12.0),
+        ("g", "p3", 6.0, 12.0),
+        ("h", "p3", 8.0, 16.0),
+        ("i", "p4", 5.0, 5.0)).toDF("key", "partitionId", "value1", "value2")
+      checkAnswer(
+        df.select(
+          $"key",
+          covar_samp("value1", "value2").over(Window.partitionBy("partitionId")
             .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)),
-        var_pop("value1")
-          .over(Window.partitionBy("partitionId")
+          var_samp("value1").over(Window.partitionBy("partitionId")
             .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)),
-        stddev_pop("value1")
-          .over(Window.partitionBy("partitionId")
+          variance("value1").over(Window.partitionBy("partitionId")
             .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)),
-        var_pop("value2")
-          .over(Window.partitionBy("partitionId")
+          stddev_samp("value1").over(Window.partitionBy("partitionId")
             .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)),
-        stddev_pop("value2")
-          .over(Window.partitionBy("partitionId")
-            .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing))),
-
-      // As stddev_pop(expr) = sqrt(var_pop(expr))
-      // the "stddev_pop" column can be calculated from the "var_pop" column.
-      //
-      // As corr(expr1, expr2) = covar_pop(expr1, expr2) / (stddev_pop(expr1) * stddev_pop(expr2))
-      // the "corr" column can be calculated from the "covar_pop" and the two "stddev_pop" columns.
-      Seq(
-        Row("a", -1.0, -25.0, 25.0, 5.0, 25.0, 5.0),
-        Row("b", -1.0, -25.0, 25.0, 5.0, 25.0, 5.0),
-        Row("c", null, 0.0, 0.0, 0.0, 0.0, 0.0),
-        Row("d", null, 0.0, 0.0, 0.0, 0.0, 0.0),
-        Row("e", 1.0, 18.0, 9.0, 3.0, 36.0, 6.0),
-        Row("f", 1.0, 18.0, 9.0, 3.0, 36.0, 6.0),
-        Row("g", 1.0, 18.0, 9.0, 3.0, 36.0, 6.0),
-        Row("h", 1.0, 18.0, 9.0, 3.0, 36.0, 6.0),
-        Row("i", Double.NaN, 0.0, 0.0, 0.0, 0.0, 0.0)))
+          stddev("value1").over(Window.partitionBy("partitionId")
+            .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing))
+        ),
+        Seq(
+          Row("a", -50.0, 50.0, 50.0, 7.0710678118654755, 7.0710678118654755),
+          Row("b", -50.0, 50.0, 50.0, 7.0710678118654755, 7.0710678118654755),
+          Row("c", 0.0, 0.0, 0.0, 0.0, 0.0),
+          Row("d", 0.0, 0.0, 0.0, 0.0, 0.0),
+          Row("e", 24.0, 12.0, 12.0, 3.4641016151377544, 3.4641016151377544),
+          Row("f", 24.0, 12.0, 12.0, 3.4641016151377544, 3.4641016151377544),
+          Row("g", 24.0, 12.0, 12.0, 3.4641016151377544, 3.4641016151377544),
+          Row("h", 24.0, 12.0, 12.0, 3.4641016151377544, 3.4641016151377544),
+          Row("i", Double.NaN, Double.NaN, Double.NaN, Double.NaN, Double.NaN)))
+    }
   }
 
-  test("covar_samp, var_samp (variance), stddev_samp (stddev) functions in specific window") {
-    val df = Seq(
-      ("a", "p1", 10.0, 20.0),
-      ("b", "p1", 20.0, 10.0),
-      ("c", "p2", 20.0, 20.0),
-      ("d", "p2", 20.0, 20.0),
-      ("e", "p3", 0.0, 0.0),
-      ("f", "p3", 6.0, 12.0),
-      ("g", "p3", 6.0, 12.0),
-      ("h", "p3", 8.0, 16.0),
-      ("i", "p4", 5.0, 5.0)).toDF("key", "partitionId", "value1", "value2")
-    checkAnswer(
-      df.select(
-        $"key",
-        covar_samp("value1", "value2").over(Window.partitionBy("partitionId")
-          .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)),
-        var_samp("value1").over(Window.partitionBy("partitionId")
-          .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)),
-        variance("value1").over(Window.partitionBy("partitionId")
-          .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)),
-        stddev_samp("value1").over(Window.partitionBy("partitionId")
-          .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)),
-        stddev("value1").over(Window.partitionBy("partitionId")
-          .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing))
-      ),
-      Seq(
-        Row("a", -50.0, 50.0, 50.0, 7.0710678118654755, 7.0710678118654755),
-        Row("b", -50.0, 50.0, 50.0, 7.0710678118654755, 7.0710678118654755),
-        Row("c", 0.0, 0.0, 0.0, 0.0, 0.0),
-        Row("d", 0.0, 0.0, 0.0, 0.0, 0.0),
-        Row("e", 24.0, 12.0, 12.0, 3.4641016151377544, 3.4641016151377544),
-        Row("f", 24.0, 12.0, 12.0, 3.4641016151377544, 3.4641016151377544),
-        Row("g", 24.0, 12.0, 12.0, 3.4641016151377544, 3.4641016151377544),
-        Row("h", 24.0, 12.0, 12.0, 3.4641016151377544, 3.4641016151377544),
-        Row("i", Double.NaN, Double.NaN, Double.NaN, Double.NaN, Double.NaN)))
+  test("SPARK-13860: " +
+    "covar_samp, var_samp (variance), stddev_samp (stddev) functions in specific window " +
+    "LEGACY_STATISTICAL_AGGREGATE off") {
+    withSQLConf(SQLConf.LEGACY_STATISTICAL_AGGREGATE.key -> "false") {
+      val df = Seq(
+        ("a", "p1", 10.0, 20.0),
+        ("b", "p1", 20.0, 10.0),
+        ("c", "p2", 20.0, 20.0),
+        ("d", "p2", 20.0, 20.0),
+        ("e", "p3", 0.0, 0.0),
+        ("f", "p3", 6.0, 12.0),
+        ("g", "p3", 6.0, 12.0),
+        ("h", "p3", 8.0, 16.0),
+        ("i", "p4", 5.0, 5.0)).toDF("key", "partitionId", "value1", "value2")
+      checkAnswer(
+        df.select(
+          $"key",
+          covar_samp("value1", "value2").over(Window.partitionBy("partitionId")
+            .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)),
+          var_samp("value1").over(Window.partitionBy("partitionId")
+            .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)),
+          variance("value1").over(Window.partitionBy("partitionId")
+            .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)),
+          stddev_samp("value1").over(Window.partitionBy("partitionId")
+            .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)),
+          stddev("value1").over(Window.partitionBy("partitionId")
+            .orderBy("key").rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing))
+        ),
+        Seq(
+          Row("a", -50.0, 50.0, 50.0, 7.0710678118654755, 7.0710678118654755),
+          Row("b", -50.0, 50.0, 50.0, 7.0710678118654755, 7.0710678118654755),
+          Row("c", 0.0, 0.0, 0.0, 0.0, 0.0),
+          Row("d", 0.0, 0.0, 0.0, 0.0, 0.0),
+          Row("e", 24.0, 12.0, 12.0, 3.4641016151377544, 3.4641016151377544),
+          Row("f", 24.0, 12.0, 12.0, 3.4641016151377544, 3.4641016151377544),
+          Row("g", 24.0, 12.0, 12.0, 3.4641016151377544, 3.4641016151377544),
+          Row("h", 24.0, 12.0, 12.0, 3.4641016151377544, 3.4641016151377544),
+          Row("i", null, null, null, null, null)))
+    }
   }
 
   test("collect_list in ascending ordered window") {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
index 87771eed17b1b..70dcfb05c2ba9 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
@@ -825,7 +825,7 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te
           """
             |SELECT corr(b, c) FROM covar_tab WHERE a = 3
           """.stripMargin),
-        Row(Double.NaN) :: Nil)
+        Row(null) :: Nil)
 
       checkAnswer(
         spark.sql(
@@ -834,10 +834,10 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te
           """.stripMargin),
         Row(1, null) ::
         Row(2, null) ::
-        Row(3, Double.NaN) ::
-        Row(4, Double.NaN) ::
-        Row(5, Double.NaN) ::
-        Row(6, Double.NaN) :: Nil)
+        Row(3, null) ::
+        Row(4, null) ::
+        Row(5, null) ::
+        Row(6, null) :: Nil)
 
       val corr7 = spark.sql("SELECT corr(b, c) FROM covar_tab").collect()(0).getDouble(0)
       assert(math.abs(corr7 - 0.6633880657639323) < 1e-12)
@@ -869,7 +869,7 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te
 
     // one row test
     val df3 = Seq.tabulate(1)(x => (1 * x, x * x * x - 2)).toDF("a", "b")
-    checkAnswer(df3.groupBy().agg(covar_samp("a", "b")), Row(Double.NaN))
+    checkAnswer(df3.groupBy().agg(covar_samp("a", "b")), Row(null))
     checkAnswer(df3.groupBy().agg(covar_pop("a", "b")), Row(0.0))
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala
index 15712a18ce751..6bf7bd6cbb90e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala
@@ -62,7 +62,6 @@ class WindowQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleto
     // Moved because:
     // - Spark uses a different default stddev (sample instead of pop)
     // - Tiny numerical differences in stddev results.
-    // - Different StdDev behavior when n=1 (NaN instead of 0)
     checkAnswer(sql(s"""
       |select  p_mfgr,p_name, p_size,
       |rank() over(distribute by p_mfgr sort by p_name) as r,
@@ -88,22 +87,22 @@ class WindowQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleto
         Row("Manufacturer#1", "almond antique salmon chartreuse burlywood", 6, 4, 3, 0.6666666666666666, 0.6, 2, 4, 11.0, 15.448840301675292, 2, 6, 2),
         Row("Manufacturer#1", "almond aquamarine burnished black steel", 28, 5, 4, 0.8333333333333334, 0.8, 3, 5, 14.4, 15.388307249337076, 2, 28, 34),
         Row("Manufacturer#1", "almond aquamarine pink moccasin thistle", 42, 6, 5, 1.0, 1.0, 3, 6, 19.0, 17.787636155487327, 2, 42, 6),
-        Row("Manufacturer#2", "almond antique violet chocolate turquoise", 14, 1, 1, 0.2, 0.0, 1, 1, 14.0, Double.NaN, 4, 14, 14),
+        Row("Manufacturer#2", "almond antique violet chocolate turquoise", 14, 1, 1, 0.2, 0.0, 1, 1, 14.0, null, 4, 14, 14),
         Row("Manufacturer#2", "almond antique violet turquoise frosted", 40, 2, 2, 0.4, 0.25, 1, 2, 27.0, 18.384776310850235, 4, 40, 14),
         Row("Manufacturer#2", "almond aquamarine midnight light salmon", 2, 3, 3, 0.6, 0.5, 2, 3, 18.666666666666668, 19.42506971244462, 4, 2, 14),
         Row("Manufacturer#2", "almond aquamarine rose maroon antique", 25, 4, 4, 0.8, 0.75, 2, 4, 20.25, 16.17353805861084, 4, 25, 40),
         Row("Manufacturer#2", "almond aquamarine sandy cyan gainsboro", 18, 5, 5, 1.0, 1.0, 3, 5, 19.8, 14.042791745233567, 4, 18, 2),
-        Row("Manufacturer#3", "almond antique chartreuse khaki white", 17, 1, 1, 0.2, 0.0, 1, 1, 17.0,Double.NaN, 2, 17, 17),
+        Row("Manufacturer#3", "almond antique chartreuse khaki white", 17, 1, 1, 0.2, 0.0, 1, 1, 17.0, null, 2, 17, 17),
         Row("Manufacturer#3", "almond antique forest lavender goldenrod", 14, 2, 2, 0.4, 0.25, 1, 2, 15.5, 2.1213203435596424, 2, 14, 17),
         Row("Manufacturer#3", "almond antique metallic orange dim", 19, 3, 3, 0.6, 0.5, 2, 3, 16.666666666666668, 2.516611478423583, 2, 19, 17),
         Row("Manufacturer#3", "almond antique misty red olive", 1, 4, 4, 0.8, 0.75, 2, 4, 12.75, 8.098353742170895, 2, 1, 14),
         Row("Manufacturer#3", "almond antique olive coral navajo", 45, 5, 5, 1.0, 1.0, 3, 5, 19.2, 16.037456157383566, 2, 45, 19),
-        Row("Manufacturer#4", "almond antique gainsboro frosted violet", 10, 1, 1, 0.2, 0.0, 1, 1, 10.0, Double.NaN, 0, 10, 10),
+        Row("Manufacturer#4", "almond antique gainsboro frosted violet", 10, 1, 1, 0.2, 0.0, 1, 1, 10.0, null, 0, 10, 10),
         Row("Manufacturer#4", "almond antique violet mint lemon", 39, 2, 2, 0.4, 0.25, 1, 2, 24.5, 20.506096654409877, 0, 39, 10),
         Row("Manufacturer#4", "almond aquamarine floral ivory bisque", 27, 3, 3, 0.6, 0.5, 2, 3, 25.333333333333332, 14.571661996262929, 0, 27, 10),
         Row("Manufacturer#4", "almond aquamarine yellow dodger mint", 7, 4, 4, 0.8, 0.75, 2, 4, 20.75, 15.01943185787443, 0, 7, 39),
         Row("Manufacturer#4", "almond azure aquamarine papaya violet", 12, 5, 5, 1.0, 1.0, 3, 5, 19.0, 13.583077707206124, 0, 12, 27),
-        Row("Manufacturer#5", "almond antique blue firebrick mint", 31, 1, 1, 0.2, 0.0, 1, 1, 31.0, Double.NaN, 1, 31, 31),
+        Row("Manufacturer#5", "almond antique blue firebrick mint", 31, 1, 1, 0.2, 0.0, 1, 1, 31.0, null, 1, 31, 31),
         Row("Manufacturer#5", "almond antique medium spring khaki", 6, 2, 2, 0.4, 0.25, 1, 2, 18.5, 17.67766952966369, 1, 6, 31),
         Row("Manufacturer#5", "almond antique sky peru orange", 2, 3, 3, 0.6, 0.5, 2, 3, 13.0, 15.716233645501712, 1, 2, 31),
         Row("Manufacturer#5", "almond aquamarine dodger light gainsboro", 46, 4, 4, 0.8, 0.75, 2, 4, 21.25, 20.902551678363736, 1, 46, 6),

From 304ca1ec93e299ebb32f961eafcaac249a45585c Mon Sep 17 00:00:00 2001
From: Prashant Sharma <prashsh1@in.ibm.com>
Date: Tue, 13 Oct 2020 09:21:06 -0700
Subject: [PATCH 0231/1009] [SPARK-33129][BUILD][DOCS] Updating the build/sbt
 references to test-only with testOnly for SBT 1.3.x

### What changes were proposed in this pull request?

test-only - > testOnly in docs across the project.

### Why are the changes needed?

Since the sbt version is updated, the older way or running i.e. `test-only` is no longer valid.

### Does this PR introduce _any_ user-facing change?

docs update.

### How was this patch tested?

Manually.

Closes #30028 from ScrapCodes/fix-build/sbt-sample.

Authored-by: Prashant Sharma <prashsh1@in.ibm.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala   | 2 +-
 .../apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala    | 2 +-
 .../spark/sql/jdbc/MsSqlServerIntegrationSuite.scala      | 2 +-
 .../org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala | 2 +-
 .../apache/spark/sql/jdbc/OracleIntegrationSuite.scala    | 4 ++--
 .../apache/spark/sql/jdbc/PostgresIntegrationSuite.scala  | 2 +-
 .../spark/sql/jdbc/PostgresKrbIntegrationSuite.scala      | 2 +-
 .../apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala | 4 ++--
 .../org/apache/spark/sql/ExpressionsSchemaSuite.scala     | 4 ++--
 .../scala/org/apache/spark/sql/PlanStabilitySuite.scala   | 8 ++++----
 .../scala/org/apache/spark/sql/SQLQueryTestSuite.scala    | 8 ++++----
 .../hive/thriftserver/ThriftServerQueryTestSuite.scala    | 4 ++--
 12 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
index 91498493e78e2..4b9acd0d39f3f 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.tags.DockerTest
  * To run this test suite for a specific version (e.g., ibmcom/db2:11.5.4.0):
  * {{{
  *   DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0
- *     ./build/sbt -Pdocker-integration-tests "test-only *DB2IntegrationSuite"
+ *     ./build/sbt -Pdocker-integration-tests "testOnly *DB2IntegrationSuite"
  * }}}
  */
 @DockerTest
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala
index 7ab544c17a5d8..9c3a609b98bbe 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala
@@ -33,7 +33,7 @@ import org.apache.spark.tags.DockerTest
  * To run this test suite for a specific version (e.g., ibmcom/db2:11.5.4.0):
  * {{{
  *   DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0
- *     ./build/sbt -Pdocker-integration-tests "test-only *DB2KrbIntegrationSuite"
+ *     ./build/sbt -Pdocker-integration-tests "testOnly *DB2KrbIntegrationSuite"
  * }}}
  */
 @DockerTest
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
index 5d3deff9d2704..f1ffc8f0f3dc7 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.tags.DockerTest
  * To run this test suite for a specific version (e.g., 2019-GA-ubuntu-16.04):
  * {{{
  *   MSSQLSERVER_DOCKER_IMAGE_NAME=2019-GA-ubuntu-16.04
- *     ./build/sbt -Pdocker-integration-tests "test-only *MsSqlServerIntegrationSuite"
+ *     ./build/sbt -Pdocker-integration-tests "testOnly *MsSqlServerIntegrationSuite"
  * }}}
  */
 @DockerTest
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
index 4cd27f8b9fff2..6f96ab33d0fee 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.tags.DockerTest
  * To run this test suite for a specific version (e.g., mysql:5.7.31):
  * {{{
  *   MYSQL_DOCKER_IMAGE_NAME=mysql:5.7.31
- *     ./build/sbt -Pdocker-integration-tests "test-only *MySQLIntegrationSuite"
+ *     ./build/sbt -Pdocker-integration-tests "testOnly *MySQLIntegrationSuite"
  * }}}
  */
 @DockerTest
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
index ce63d1df6f028..60eb1c055a38e 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
@@ -40,7 +40,7 @@ import org.apache.spark.tags.DockerTest
  *    Pull oracle $ORACLE_DOCKER_IMAGE_NAME image - docker pull $ORACLE_DOCKER_IMAGE_NAME
  * 3. Start docker - sudo service docker start
  * 4. Run spark test - ./build/sbt -Pdocker-integration-tests
- *    "test-only org.apache.spark.sql.jdbc.OracleIntegrationSuite"
+ *    "testOnly org.apache.spark.sql.jdbc.OracleIntegrationSuite"
  *
  * An actual sequence of commands to run the test is as follows
  *
@@ -51,7 +51,7 @@ import org.apache.spark.tags.DockerTest
  *  $ export ORACLE_DOCKER_IMAGE_NAME=oracle/database:18.4.0-xe
  *  $ cd $SPARK_HOME
  *  $ ./build/sbt -Pdocker-integration-tests
- *    "test-only org.apache.spark.sql.jdbc.OracleIntegrationSuite"
+ *    "testOnly org.apache.spark.sql.jdbc.OracleIntegrationSuite"
  *
  * It has been validated with 18.4.0 Express Edition.
  */
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
index ba71c942714da..de9c0660c51c1 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.tags.DockerTest
  * To run this test suite for a specific version (e.g., postgres:13.0):
  * {{{
  *   POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0
- *     ./build/sbt -Pdocker-integration-tests "test-only *PostgresIntegrationSuite"
+ *     ./build/sbt -Pdocker-integration-tests "testOnly *PostgresIntegrationSuite"
  * }}}
  */
 @DockerTest
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala
index 6b215485247d9..984890f22f492 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.tags.DockerTest
  * To run this test suite for a specific version (e.g., postgres:13.0):
  * {{{
  *   POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0
- *     ./build/sbt -Pdocker-integration-tests "test-only *PostgresKrbIntegrationSuite"
+ *     ./build/sbt -Pdocker-integration-tests "testOnly *PostgresKrbIntegrationSuite"
  * }}}
  */
 @DockerTest
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
index 1b51d43c1d139..403f16aac6356 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
@@ -36,7 +36,7 @@ import org.apache.spark.tags.DockerTest
  *    Pull oracle $ORACLE_DOCKER_IMAGE_NAME image - docker pull $ORACLE_DOCKER_IMAGE_NAME
  * 3. Start docker - sudo service docker start
  * 4. Run spark test - ./build/sbt -Pdocker-integration-tests
- *    "test-only org.apache.spark.sql.jdbc.v2.OracleIntegrationSuite"
+ *    "testOnly org.apache.spark.sql.jdbc.v2.OracleIntegrationSuite"
  *
  * An actual sequence of commands to run the test is as follows
  *
@@ -47,7 +47,7 @@ import org.apache.spark.tags.DockerTest
  *  $ export ORACLE_DOCKER_IMAGE_NAME=oracle/database:18.4.0-xe
  *  $ cd $SPARK_HOME
  *  $ ./build/sbt -Pdocker-integration-tests
- *    "test-only org.apache.spark.sql.jdbc.v2.OracleIntegrationSuite"
+ *    "testOnly org.apache.spark.sql.jdbc.v2.OracleIntegrationSuite"
  *
  * It has been validated with 18.4.0 Express Edition.
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
index 37ef04d9cb02f..f3db4d811dd86 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
@@ -32,12 +32,12 @@ import org.apache.spark.tags.ExtendedSQLTest
  *
  * To run the entire test suite:
  * {{{
- *   build/sbt "sql/test-only *ExpressionsSchemaSuite"
+ *   build/sbt "sql/testOnly *ExpressionsSchemaSuite"
  * }}}
  *
  * To re-generate golden files for entire suite, run:
  * {{{
- *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/test-only *ExpressionsSchemaSuite"
+ *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *ExpressionsSchemaSuite"
  * }}}
  *
  * For example:
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala
index f78fc269986b5..c2aee0ad4c9a1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala
@@ -51,22 +51,22 @@ import org.apache.spark.tags.ExtendedSQLTest
  *
  * To run the entire test suite:
  * {{{
- *   build/sbt "sql/test-only *PlanStability[WithStats]Suite"
+ *   build/sbt "sql/testOnly *PlanStability[WithStats]Suite"
  * }}}
  *
  * To run a single test file upon change:
  * {{{
- *   build/sbt "sql/test-only *PlanStability[WithStats]Suite -- -z (tpcds-v1.4/q49)"
+ *   build/sbt "sql/testOnly *PlanStability[WithStats]Suite -- -z (tpcds-v1.4/q49)"
  * }}}
  *
  * To re-generate golden files for entire suite, run:
  * {{{
- *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/test-only *PlanStability[WithStats]Suite"
+ *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *PlanStability[WithStats]Suite"
  * }}}
  *
  * To re-generate golden file for a single test, run:
  * {{{
- *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/test-only *PlanStability[WithStats]Suite -- -z (tpcds-v1.4/q49)"
+ *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *PlanStability[WithStats]Suite -- -z (tpcds-v1.4/q49)"
  * }}}
  */
 // scalastyle:on line.size.limit
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index 0bb1f5e20fc5b..36e55c0994f18 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -49,22 +49,22 @@ import org.apache.spark.util.Utils
  *
  * To run the entire test suite:
  * {{{
- *   build/sbt "sql/test-only *SQLQueryTestSuite"
+ *   build/sbt "sql/testOnly *SQLQueryTestSuite"
  * }}}
  *
  * To run a single test file upon change:
  * {{{
- *   build/sbt "~sql/test-only *SQLQueryTestSuite -- -z inline-table.sql"
+ *   build/sbt "~sql/testOnly *SQLQueryTestSuite -- -z inline-table.sql"
  * }}}
  *
  * To re-generate golden files for entire suite, run:
  * {{{
- *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/test-only *SQLQueryTestSuite"
+ *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *SQLQueryTestSuite"
  * }}}
  *
  * To re-generate golden file for a single test, run:
  * {{{
- *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/test-only *SQLQueryTestSuite -- -z describe.sql"
+ *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *SQLQueryTestSuite -- -z describe.sql"
  * }}}
  *
  * The format for input files is simple:
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
index ecc7ce71d950e..be42497113469 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
@@ -39,12 +39,12 @@ import org.apache.spark.sql.types._
  *
  * To run the entire test suite:
  * {{{
- *   build/sbt "hive-thriftserver/test-only *ThriftServerQueryTestSuite" -Phive-thriftserver
+ *   build/sbt "hive-thriftserver/testOnly *ThriftServerQueryTestSuite" -Phive-thriftserver
  * }}}
  *
  * This test suite won't generate golden files. To re-generate golden files for entire suite, run:
  * {{{
- *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/test-only *SQLQueryTestSuite"
+ *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *SQLQueryTestSuite"
  * }}}
  *
  * TODO:

From 1bfcb51eebf074588ce84cc2143113ab05f07392 Mon Sep 17 00:00:00 2001
From: neko <echohlne@gmail.com>
Date: Tue, 13 Oct 2020 09:29:05 -0700
Subject: [PATCH 0232/1009] [SPARK-33132][WEBUI] Make `formatBytes` return `0.0
 B` for negative input instead of `NaN`

### What changes were proposed in this pull request?
when bytesRead metric was negative, `formatBytes` in `ui.js` should just return `0.0 B` to avoid `NaN Undefined` result.

### Why are the changes needed?
Strengthen the parameter validataion to improve metric display on Summary Metrics of Spark  Stage UI.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
It's a small change, just manual test.

Closes #30030 from akiyamaneko/formatBytes_NaN.

Authored-by: neko <echohlne@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 core/src/main/resources/org/apache/spark/ui/static/utils.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/utils.js b/core/src/main/resources/org/apache/spark/ui/static/utils.js
index 4571fc1aec4dd..4cd83332cde5f 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/utils.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/utils.js
@@ -39,7 +39,7 @@ function formatDuration(milliseconds) {
 
 function formatBytes(bytes, type) {
     if (type !== 'display') return bytes;
-    if (bytes == 0) return '0.0 B';
+    if (bytes <= 0) return '0.0 B';
     var k = 1024;
     var dm = 1;
     var sizes = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'];

From 05a62dcada0176301307b0af194b50c383f496ff Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Wed, 14 Oct 2020 12:13:54 +0900
Subject: [PATCH 0233/1009] [SPARK-33134][SQL] Return partial results only for
 root JSON objects

### What changes were proposed in this pull request?
In the PR, I propose to restrict the partial result feature only by root JSON objects. JSON datasource as well as `from_json()` will return `null` for malformed nested JSON objects.

### Why are the changes needed?
1. To not raise exception to users in the PERMISSIVE mode
2. To fix a regression and to have the same behavior as Spark 2.4.x has
3. Current implementation of partial result is supposed to work only for root (top-level) JSON objects, and not tested for bad nested complex JSON fields.

### Does this PR introduce _any_ user-facing change?
Yes. Before the changes, the code below:
```scala
    val pokerhand_raw = Seq("""[{"cards": [19], "playerId": 123456}]""").toDF("events")
    val event = new StructType().add("playerId", LongType).add("cards", ArrayType(new StructType().add("id", LongType).add("rank", StringType)))
    val pokerhand_events = pokerhand_raw.select(from_json($"events", ArrayType(event)).as("event"))
    pokerhand_events.show
```
throws the exception even in the default **PERMISSIVE** mode:
```java
java.lang.ClassCastException: java.lang.Long cannot be cast to org.apache.spark.sql.catalyst.util.ArrayData
  at org.apache.spark.sql.catalyst.expressions.BaseGenericInternalRow.getArray(rows.scala:48)
  at org.apache.spark.sql.catalyst.expressions.BaseGenericInternalRow.getArray$(rows.scala:48)
  at org.apache.spark.sql.catalyst.expressions.GenericInternalRow.getArray(rows.scala:195)
```

After the changes:
```
+-----+
|event|
+-----+
| null|
+-----+
```

### How was this patch tested?
Added a test to `JsonFunctionsSuite`.

Closes #30031 from MaxGekk/json-skip-row-wrong-schema.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../spark/sql/catalyst/json/JacksonParser.scala    |  7 ++++---
 .../org/apache/spark/sql/JsonFunctionsSuite.scala  | 14 ++++++++++++++
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
index 0da2baf24fbcb..bbcff4949ae87 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
@@ -93,7 +93,7 @@ class JacksonParser(
       new NoopFilters
     }
     (parser: JsonParser) => parseJsonToken[Iterable[InternalRow]](parser, st) {
-      case START_OBJECT => convertObject(parser, st, fieldConverters, jsonFilters)
+      case START_OBJECT => convertObject(parser, st, fieldConverters, jsonFilters, isRoot = true)
         // SPARK-3308: support reading top level JSON arrays and take every element
         // in such an array as a row
         //
@@ -383,7 +383,8 @@ class JacksonParser(
       parser: JsonParser,
       schema: StructType,
       fieldConverters: Array[ValueConverter],
-      structFilters: StructFilters = new NoopFilters()): Option[InternalRow] = {
+      structFilters: StructFilters = new NoopFilters(),
+      isRoot: Boolean = false): Option[InternalRow] = {
     val row = new GenericInternalRow(schema.length)
     var badRecordException: Option[Throwable] = None
     var skipRow = false
@@ -397,7 +398,7 @@ class JacksonParser(
             skipRow = structFilters.skipRow(row, index)
           } catch {
             case e: SparkUpgradeException => throw e
-            case NonFatal(e) =>
+            case NonFatal(e) if isRoot =>
               badRecordException = badRecordException.orElse(Some(e))
               parser.skipChildren()
           }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index 03b48451c7495..5a1a3550d855b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -733,4 +733,18 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
          | """.stripMargin)
     checkAnswer(toDF("yyyy-MM-dd'T'HH:mm:ss.SSSXXX"), toDF("yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]"))
   }
+
+  test("SPARK-33134: return partial results only for root JSON objects") {
+    val st = new StructType()
+      .add("c1", LongType)
+      .add("c2", ArrayType(new StructType().add("c3", LongType).add("c4", StringType)))
+    val df1 = Seq("""{"c2": [19], "c1": 123456}""").toDF("c0")
+    checkAnswer(df1.select(from_json($"c0", st)), Row(Row(123456, null)))
+    val df2 = Seq("""{"data": {"c2": [19], "c1": 123456}}""").toDF("c0")
+    checkAnswer(df2.select(from_json($"c0", new StructType().add("data", st))), Row(Row(null)))
+    val df3 = Seq("""[{"c2": [19], "c1": 123456}]""").toDF("c0")
+    checkAnswer(df3.select(from_json($"c0", ArrayType(st))), Row(null))
+    val df4 = Seq("""{"c2": [19]}""").toDF("c0")
+    checkAnswer(df4.select(from_json($"c0", MapType(StringType, st))), Row(null))
+  }
 }

From d8c4a47ea19d18b0aad22263d002267d663c2f66 Mon Sep 17 00:00:00 2001
From: Richard Penney <rwp@rwpenney.uk>
Date: Wed, 14 Oct 2020 08:48:55 -0500
Subject: [PATCH 0234/1009] [SPARK-33061][SQL] Expose inverse hyperbolic trig
 functions through sql.functions API

This patch is a small extension to change-request SPARK-28133, which added inverse hyperbolic functions to the SQL interpreter, but did not include those methods within the Scala `sql.functions._` API. This patch makes `acosh`, `asinh` and `atanh` functions available through the Scala API.

Unit-tests have been added to `sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala`. Manual testing has been done via `spark-shell`, using the following recipe:
```
val df = spark.range(0, 11)
              .toDF("x")
              .withColumn("x", ($"x" - 5) / 2.0)
val hyps = df.withColumn("tanh", tanh($"x"))
             .withColumn("sinh", sinh($"x"))
             .withColumn("cosh", cosh($"x"))
val invhyps = hyps.withColumn("atanh", atanh($"tanh"))
                  .withColumn("asinh", asinh($"sinh"))
                  .withColumn("acosh", acosh($"cosh"))
invhyps.show
```
which produces the following output:
```
+----+--------------------+-------------------+------------------+-------------------+-------------------+------------------+
|   x|                tanh|               sinh|              cosh|              atanh|              asinh|             acosh|
+----+--------------------+-------------------+------------------+-------------------+-------------------+------------------+
|-2.5| -0.9866142981514303|-6.0502044810397875| 6.132289479663686| -2.500000000000001|-2.4999999999999956|               2.5|
|-2.0| -0.9640275800758169| -3.626860407847019|3.7621956910836314|-2.0000000000000004|-1.9999999999999991|               2.0|
|-1.5| -0.9051482536448664|-2.1292794550948173| 2.352409615243247|-1.4999999999999998|-1.4999999999999998|               1.5|
|-1.0| -0.7615941559557649|-1.1752011936438014| 1.543080634815244|               -1.0|               -1.0|               1.0|
|-0.5|-0.46211715726000974|-0.5210953054937474|1.1276259652063807|               -0.5|-0.5000000000000002|0.4999999999999998|
| 0.0|                 0.0|                0.0|               1.0|                0.0|                0.0|               0.0|
| 0.5| 0.46211715726000974| 0.5210953054937474|1.1276259652063807|                0.5|                0.5|0.4999999999999998|
| 1.0|  0.7615941559557649| 1.1752011936438014| 1.543080634815244|                1.0|                1.0|               1.0|
| 1.5|  0.9051482536448664| 2.1292794550948173| 2.352409615243247| 1.4999999999999998|                1.5|               1.5|
| 2.0|  0.9640275800758169|  3.626860407847019|3.7621956910836314| 2.0000000000000004|                2.0|               2.0|
| 2.5|  0.9866142981514303| 6.0502044810397875| 6.132289479663686|  2.500000000000001|                2.5|               2.5|
+----+--------------------+-------------------+------------------+-------------------+-------------------+------------------+
```

Closes #29938 from rwpenney/fix/inverse-hyperbolics.

Authored-by: Richard Penney <rwp@rwpenney.uk>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../org/apache/spark/sql/functions.scala      | 50 ++++++++++++++++++-
 .../apache/spark/sql/MathFunctionsSuite.scala | 15 ++++++
 2 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 2efe5aae09709..21e22d90f0f80 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -1427,6 +1427,22 @@ object functions {
    */
   def acos(columnName: String): Column = acos(Column(columnName))
 
+  /**
+   * @return inverse hyperbolic cosine of `e`
+   *
+   * @group math_funcs
+   * @since 3.1.0
+   */
+  def acosh(e: Column): Column = withExpr { Acosh(e.expr) }
+
+  /**
+   * @return inverse hyperbolic cosine of `columnName`
+   *
+   * @group math_funcs
+   * @since 3.1.0
+   */
+  def acosh(columnName: String): Column = acosh(Column(columnName))
+
   /**
    * @return inverse sine of `e` in radians, as if computed by `java.lang.Math.asin`
    *
@@ -1444,7 +1460,23 @@ object functions {
   def asin(columnName: String): Column = asin(Column(columnName))
 
   /**
-   * @return inverse tangent of `e`, as if computed by `java.lang.Math.atan`
+   * @return inverse hyperbolic sine of `e`
+   *
+   * @group math_funcs
+   * @since 3.1.0
+   */
+  def asinh(e: Column): Column = withExpr { Asinh(e.expr) }
+
+  /**
+   * @return inverse hyperbolic sine of `columnName`
+   *
+   * @group math_funcs
+   * @since 3.1.0
+   */
+  def asinh(columnName: String): Column = asinh(Column(columnName))
+
+  /**
+   * @return inverse tangent of `e` as if computed by `java.lang.Math.atan`
    *
    * @group math_funcs
    * @since 1.4.0
@@ -1572,6 +1604,22 @@ object functions {
    */
   def atan2(yValue: Double, xName: String): Column = atan2(yValue, Column(xName))
 
+  /**
+   * @return inverse hyperbolic tangent of `e`
+   *
+   * @group math_funcs
+   * @since 3.1.0
+   */
+  def atanh(e: Column): Column = withExpr { Atanh(e.expr) }
+
+  /**
+   * @return inverse hyperbolic tangent of `columnName`
+   *
+   * @group math_funcs
+   * @since 3.1.0
+   */
+  def atanh(columnName: String): Column = atanh(Column(columnName))
+
   /**
    * An expression that returns the string representation of the binary value of the given long
    * column. For example, bin("12") returns "1100".
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
index bd86c2ec075b0..cd92976571230 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
@@ -125,6 +125,11 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
     testOneToOneMathFunction(sinh, math.sinh)
   }
 
+  test("asinh") {
+    testOneToOneMathFunction(asinh,
+      (x: Double) => math.log(x + math.sqrt(x * x + 1)) )
+  }
+
   test("cos") {
     testOneToOneMathFunction(cos, math.cos)
   }
@@ -137,6 +142,11 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
     testOneToOneMathFunction(cosh, math.cosh)
   }
 
+  test("acosh") {
+    testOneToOneMathFunction(acosh,
+      (x: Double) => math.log(x + math.sqrt(x * x - 1)) )
+  }
+
   test("tan") {
     testOneToOneMathFunction(tan, math.tan)
   }
@@ -149,6 +159,11 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
     testOneToOneMathFunction(tanh, math.tanh)
   }
 
+  test("atanh") {
+    testOneToOneMathFunction(atanh,
+      (x: Double) => (0.5 * (math.log1p(x) - math.log1p(-x))) )
+  }
+
   test("degrees") {
     testOneToOneMathFunction(degrees, math.toDegrees)
     checkAnswer(

From 8e5cb1d276686ec428e4e6aa1c3cfd6bb99e4e9a Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan.opensource@gmail.com>
Date: Wed, 14 Oct 2020 08:30:03 -0700
Subject: [PATCH 0235/1009] [SPARK-33136][SQL] Fix mistakenly swapped parameter
 in V2WriteCommand.outputResolved

### What changes were proposed in this pull request?

This PR proposes to fix a bug on calling `DataType.equalsIgnoreCompatibleNullability` with mistakenly swapped parameters in `V2WriteCommand.outputResolved`. The order of parameters for `DataType.equalsIgnoreCompatibleNullability` are `from` and `to`, which says that the right order of matching variables are `inAttr` and `outAttr`.

### Why are the changes needed?

Spark throws AnalysisException due to unresolved operator in v2 write, while the operator is unresolved due to a bug that parameters to call `DataType.equalsIgnoreCompatibleNullability` in `outputResolved` have been swapped.

### Does this PR introduce _any_ user-facing change?

Yes, end users no longer suffer on unresolved operator in v2 write if they're trying to write dataframe containing non-nullable complex types against table matching complex types as nullable.

### How was this patch tested?

New UT added.

Closes #30033 from HeartSaVioR/SPARK-33136.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../catalyst/plans/logical/v2Commands.scala   |  2 +-
 .../spark/sql/DataFrameWriterV2Suite.scala    | 87 ++++++++++++++++++-
 2 files changed, 84 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index 50af16ca276e1..272c19b98512b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -45,7 +45,7 @@ trait V2WriteCommand extends Command {
           case (inAttr, outAttr) =>
             // names and types must match, nullability must be compatible
             inAttr.name == outAttr.name &&
-              DataType.equalsIgnoreCompatibleNullability(outAttr.dataType, inAttr.dataType) &&
+              DataType.equalsIgnoreCompatibleNullability(inAttr.dataType, outAttr.dataType) &&
               (outAttr.nullable || !inAttr.nullable)
         }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
index 508eefafd0754..ff5c6242987de 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
@@ -23,16 +23,15 @@ import scala.collection.JavaConverters._
 
 import org.scalatest.BeforeAndAfter
 
-import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException}
-import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic}
+import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NamedRelation, NoSuchTableException, TableAlreadyExistsException}
+import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, V2WriteCommand}
 import org.apache.spark.sql.connector.{InMemoryTable, InMemoryTableCatalog}
 import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
 import org.apache.spark.sql.connector.expressions.{BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, YearsTransform}
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType}
-import org.apache.spark.sql.types.TimestampType
+import org.apache.spark.sql.types.{ArrayType, DataType, IntegerType, LongType, MapType, StringType, StructField, StructType, TimestampType}
 import org.apache.spark.sql.util.QueryExecutionListener
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.Utils
@@ -101,6 +100,86 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo
     assert(v2.catalog.exists(_ == catalogPlugin))
   }
 
+  case class FakeV2WriteCommand(table: NamedRelation, query: LogicalPlan) extends V2WriteCommand
+
+  test("SPARK-33136 output resolved on complex types for V2 write commands") {
+    val tableCatalog = catalog("testcat")
+
+    def assertTypeCompatibility(name: String, fromType: DataType, toType: DataType): Unit = {
+      val fromTableName = s"from_table_$name"
+      tableCatalog.createTable(
+        Identifier.of(Array(), fromTableName),
+        StructType(Array(StructField("col", fromType))),
+        Array.empty,
+        new java.util.HashMap[String, String]())
+
+      val toTable = tableCatalog.createTable(
+        Identifier.of(Array(), s"to_table_$name"),
+        StructType(Array(StructField("col", toType))),
+        Array.empty,
+        new java.util.HashMap[String, String]())
+
+      val df = spark.table(s"testcat.$fromTableName")
+
+      val relation = DataSourceV2Relation.create(toTable, Some(tableCatalog), None)
+      val writeCommand = FakeV2WriteCommand(relation, df.queryExecution.analyzed)
+
+      assert(writeCommand.outputResolved, s"Unable to write from type $fromType to type $toType.")
+    }
+
+    // The major difference between `from` and `to` is that `from` is a complex type
+    // with non-nullable, whereas `to` is same data type with flipping nullable.
+
+    // nested struct type
+    val fromStructType = StructType(Array(
+      StructField("s", StringType),
+      StructField("i_nonnull", IntegerType, nullable = false),
+      StructField("st", StructType(Array(
+        StructField("l", LongType),
+        StructField("s_nonnull", StringType, nullable = false))))))
+
+    val toStructType = StructType(Array(
+      StructField("s", StringType),
+      StructField("i_nonnull", IntegerType),
+      StructField("st", StructType(Array(
+        StructField("l", LongType),
+        StructField("s_nonnull", StringType))))))
+
+    assertTypeCompatibility("struct", fromStructType, toStructType)
+
+    // array type
+    assertTypeCompatibility("array", ArrayType(LongType, containsNull = false),
+      ArrayType(LongType, containsNull = true))
+
+    // array type with struct type
+    val fromArrayWithStructType = ArrayType(
+      StructType(Array(StructField("s", StringType, nullable = false))),
+      containsNull = false)
+
+    val toArrayWithStructType = ArrayType(
+      StructType(Array(StructField("s", StringType))),
+      containsNull = true)
+
+    assertTypeCompatibility("array_struct", fromArrayWithStructType, toArrayWithStructType)
+
+    // map type
+    assertTypeCompatibility("map", MapType(IntegerType, StringType, valueContainsNull = false),
+      MapType(IntegerType, StringType, valueContainsNull = true))
+
+    // map type with struct type
+    val fromMapWithStructType = MapType(
+      IntegerType,
+      StructType(Array(StructField("s", StringType, nullable = false))),
+      valueContainsNull = false)
+
+    val toMapWithStructType = MapType(
+      IntegerType,
+      StructType(Array(StructField("s", StringType))),
+      valueContainsNull = true)
+
+    assertTypeCompatibility("map_struct", fromMapWithStructType, toMapWithStructType)
+  }
+
   test("Append: basic append") {
     spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")
 

From f3ad32f4b6fc55e89e7fb222ed565ad3e32d47c6 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 14 Oct 2020 16:17:28 +0000
Subject: [PATCH 0236/1009] [SPARK-33026][SQL][FOLLOWUP] metrics name should be
 numOutputRows

### What changes were proposed in this pull request?

Follow the convention and rename the metrics `numRows` to `numOutputRows`

### Why are the changes needed?

`FilterExec`, `HashAggregateExec`, etc. all use `numOutputRows`

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

existing tests

Closes #30039 from cloud-fan/minor.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/execution/exchange/BroadcastExchangeExec.scala    | 8 ++++----
 .../spark/sql/execution/metric/SQLMetricsSuite.scala      | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
index 4b884dfe537e8..0c5fee20385e1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
@@ -78,7 +78,7 @@ case class BroadcastExchangeExec(
 
   override lazy val metrics = Map(
     "dataSize" -> SQLMetrics.createSizeMetric(sparkContext, "data size"),
-    "numRows" -> SQLMetrics.createMetric(sparkContext, "number of rows"),
+    "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
     "collectTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to collect"),
     "buildTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to build"),
     "broadcastTime" -> SQLMetrics.createTimingMetric(sparkContext, "time to broadcast"))
@@ -91,8 +91,8 @@ case class BroadcastExchangeExec(
 
   override def runtimeStatistics: Statistics = {
     val dataSize = metrics("dataSize").value
-    val numRows = metrics("numRows").value
-    Statistics(dataSize, Some(numRows))
+    val rowCount = metrics("numOutputRows").value
+    Statistics(dataSize, Some(rowCount))
   }
 
   @transient
@@ -116,11 +116,11 @@ case class BroadcastExchangeExec(
             val beforeCollect = System.nanoTime()
             // Use executeCollect/executeCollectIterator to avoid conversion to Scala types
             val (numRows, input) = child.executeCollectIterator()
+            longMetric("numOutputRows") += numRows
             if (numRows >= MAX_BROADCAST_TABLE_ROWS) {
               throw new SparkException(
                 s"Cannot broadcast the table over $MAX_BROADCAST_TABLE_ROWS rows: $numRows rows")
             }
-            longMetric("numRows") += numRows
 
             val beforeBuild = System.nanoTime()
             longMetric("collectTime") += NANOSECONDS.toMillis(beforeBuild - beforeCollect)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index e404e460fe611..4872906dbfec3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -751,7 +751,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
         }
 
         assert(exchanges.size === 1)
-        testMetricsInSparkPlanOperator(exchanges.head, Map("numRows" -> 2))
+        testMetricsInSparkPlanOperator(exchanges.head, Map("numOutputRows" -> 2))
       }
     }
   }

From 9ab0ec4e38e5df0537b38cb0f89e004ad57bec90 Mon Sep 17 00:00:00 2001
From: Adam Binford <adam.binford@radiantsolutions.com>
Date: Thu, 15 Oct 2020 11:59:29 +0900
Subject: [PATCH 0237/1009] [SPARK-33146][CORE] Check for non-fatal errors when
 loading new applications in SHS

### What changes were proposed in this pull request?

Adds an additional check for non-fatal errors when attempting to add a new entry to the history server application listing.

### Why are the changes needed?

A bad rolling event log folder (missing appstatus file or no log files) would cause no applications to be loaded by the Spark history server. Figuring out why invalid event log folders are created in the first place will be addressed in separate issues, this just lets the history server skip the invalid folder and successfully load all the valid applications.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

New UT

Closes #30037 from Kimahriman/bug/rolling-log-crashing-history.

Authored-by: Adam Binford <adam.binford@radiantsolutions.com>
Signed-off-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
---
 .../deploy/history/FsHistoryProvider.scala    |  3 ++
 .../history/FsHistoryProviderSuite.scala      | 49 +++++++++++++++++++
 2 files changed, 52 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index fe8be0b3b20d3..168bd1e68a304 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -538,6 +538,9 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
                 reader.fileSizeForLastIndex > 0
               } catch {
                 case _: FileNotFoundException => false
+                case NonFatal(e) =>
+                  logWarning(s"Error while reading new log ${reader.rootPath}", e)
+                  false
               }
 
             case NonFatal(e) =>
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index 3f8c875f5a552..1578b908b1b55 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -1475,6 +1475,55 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
     }
   }
 
+  test("SPARK-33146: don't let one bad rolling log folder prevent loading other applications") {
+    withTempDir { dir =>
+      val conf = createTestConf(true)
+      conf.set(HISTORY_LOG_DIR, dir.getAbsolutePath)
+      val hadoopConf = SparkHadoopUtil.newConfiguration(conf)
+      val fs = new Path(dir.getAbsolutePath).getFileSystem(hadoopConf)
+
+      val provider = new FsHistoryProvider(conf)
+
+      val writer = new RollingEventLogFilesWriter("app", None, dir.toURI, conf, hadoopConf)
+      writer.start()
+
+      writeEventsToRollingWriter(writer, Seq(
+        SparkListenerApplicationStart("app", Some("app"), 0, "user", None),
+        SparkListenerJobStart(1, 0, Seq.empty)), rollFile = false)
+      provider.checkForLogs()
+      provider.cleanLogs()
+      assert(dir.listFiles().size === 1)
+      assert(provider.getListing.length === 1)
+
+      // Manually delete the appstatus file to make an invalid rolling event log
+      val appStatusPath = RollingEventLogFilesWriter.getAppStatusFilePath(new Path(writer.logPath),
+        "app", None, true)
+      fs.delete(appStatusPath, false)
+      provider.checkForLogs()
+      provider.cleanLogs()
+      assert(provider.getListing.length === 0)
+
+      // Create a new application
+      val writer2 = new RollingEventLogFilesWriter("app2", None, dir.toURI, conf, hadoopConf)
+      writer2.start()
+      writeEventsToRollingWriter(writer2, Seq(
+        SparkListenerApplicationStart("app2", Some("app2"), 0, "user", None),
+        SparkListenerJobStart(1, 0, Seq.empty)), rollFile = false)
+
+      // Both folders exist but only one application found
+      provider.checkForLogs()
+      provider.cleanLogs()
+      assert(provider.getListing.length === 1)
+      assert(dir.listFiles().size === 2)
+
+      // Make sure a new provider sees the valid application
+      provider.stop()
+      val newProvider = new FsHistoryProvider(conf)
+      newProvider.checkForLogs()
+      assert(newProvider.getListing.length === 1)
+    }
+  }
+
   /**
    * Asks the provider to check for logs and calls a function to perform checks on the updated
    * app list. Example:

From ec34a001ad0ef57a496f29a6523d905128875b17 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Wed, 14 Oct 2020 20:48:13 -0700
Subject: [PATCH 0238/1009] [SPARK-33153][SQL][TESTS] Ignore Spark 2.4 in
 HiveExternalCatalogVersionsSuite on Python 3.8/3.9

### What changes were proposed in this pull request?

This PR aims to ignore Apache Spark 2.4.x distribution in HiveExternalCatalogVersionsSuite if Python version is 3.8 or 3.9.

### Why are the changes needed?

Currently, `HiveExternalCatalogVersionsSuite` is broken on the latest OS like `Ubuntu 20.04` because its default Python version is 3.8. PySpark 2.4.x doesn't work on Python 3.8 due to SPARK-29536.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manually.
```
$ python3 --version
Python 3.8.5

$ build/sbt "hive/testOnly *.HiveExternalCatalogVersionsSuite"
...
[info] All tests passed.
[info] Passed: Total 1, Failed 0, Errors 0, Passed 1
```

Closes #30044 from dongjoon-hyun/SPARK-33153.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../src/main/scala/org/apache/spark/TestUtils.scala | 13 +++++++++++++
 .../sql/hive/HiveExternalCatalogVersionsSuite.scala |  3 ++-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala
index 6947d1c72f12b..bc3644df0ebb5 100644
--- a/core/src/main/scala/org/apache/spark/TestUtils.scala
+++ b/core/src/main/scala/org/apache/spark/TestUtils.scala
@@ -255,6 +255,19 @@ private[spark] object TestUtils {
     attempt.isSuccess && attempt.get == 0
   }
 
+  def isPythonVersionAtLeast38(): Boolean = {
+    val attempt = if (Utils.isWindows) {
+      Try(Process(Seq("cmd.exe", "/C", "python3 --version"))
+        .run(ProcessLogger(s => s.startsWith("Python 3.8") || s.startsWith("Python 3.9")))
+        .exitValue())
+    } else {
+      Try(Process(Seq("sh", "-c", "python3 --version"))
+        .run(ProcessLogger(s => s.startsWith("Python 3.8") || s.startsWith("Python 3.9")))
+        .exitValue())
+    }
+    attempt.isSuccess && attempt.get == 0
+  }
+
   /**
    * Returns the response code from an HTTP(S) URL.
    */
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index cbfdb7fac88d8..b81b7e8ec0c0f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -234,7 +234,7 @@ object PROCESS_TABLES extends QueryTest with SQLTestUtils {
   // Tests the latest version of every release line.
   val testingVersions: Seq[String] = {
     import scala.io.Source
-    try {
+    val versions: Seq[String] = try {
       Source.fromURL(s"${releaseMirror}/spark").mkString
         .split("\n")
         .filter(_.contains("""<li><a href="spark-"""))
@@ -245,6 +245,7 @@ object PROCESS_TABLES extends QueryTest with SQLTestUtils {
       // do not throw exception during object initialization.
       case NonFatal(_) => Seq("3.0.1", "2.4.7") // A temporary fallback to use a specific version
     }
+    versions.filter(v => v.startsWith("3") || !TestUtils.isPythonVersionAtLeast38())
   }
 
   protected var spark: SparkSession = _

From 77a8efbc05cb4ecc40dd050c363429e71a9f23c1 Mon Sep 17 00:00:00 2001
From: manuzhang <owenzhang1990@gmail.com>
Date: Thu, 15 Oct 2020 05:53:32 +0000
Subject: [PATCH 0239/1009] [SPARK-32932][SQL] Do not use local shuffle reader
 at final stage on write command

### What changes were proposed in this pull request?
Do not use local shuffle reader at final stage if the root node is write command.

### Why are the changes needed?
Users usually repartition with partition column on dynamic partition overwrite. AQE could break it by removing physical shuffle with local shuffle reader. That could lead to a large number of output files, even exceeding the file system limit.

### Does this PR introduce _any_ user-facing change?
Yes.

### How was this patch tested?
Add test.

Closes #29797 from manuzhang/spark-32932.

Authored-by: manuzhang <owenzhang1990@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../adaptive/AdaptiveSparkPlanExec.scala      | 14 ++++-
 .../adaptive/AdaptiveQueryExecSuite.scala     | 51 ++++++++++++++++++-
 2 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index 0e032569bb8a7..5e75e26e6d074 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -36,6 +36,8 @@ import org.apache.spark.sql.catalyst.rules.{PlanChangeLogger, Rule}
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec._
+import org.apache.spark.sql.execution.command.DataWritingCommandExec
+import org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec
 import org.apache.spark.sql.execution.exchange._
 import org.apache.spark.sql.execution.ui.{SparkListenerSQLAdaptiveExecutionUpdate, SparkListenerSQLAdaptiveSQLMetricUpdates, SQLPlanMetric}
 import org.apache.spark.sql.internal.SQLConf
@@ -102,6 +104,16 @@ case class AdaptiveSparkPlanExec(
     OptimizeLocalShuffleReader(conf)
   )
 
+  private def finalStageOptimizerRules: Seq[Rule[SparkPlan]] =
+    context.qe.sparkPlan match {
+      case _: DataWritingCommandExec | _: V2TableWriteExec =>
+        // SPARK-32932: Local shuffle reader could break partitioning that works best
+        // for the following writing command
+        queryStageOptimizerRules.filterNot(_.isInstanceOf[OptimizeLocalShuffleReader])
+      case _ =>
+        queryStageOptimizerRules
+    }
+
   // A list of physical optimizer rules to be applied right after a new stage is created. The input
   // plan to these rules has exchange as its root node.
   @transient private val postStageCreationRules = Seq(
@@ -235,7 +247,7 @@ case class AdaptiveSparkPlanExec(
       // Run the final plan when there's no more unfinished stages.
       currentPhysicalPlan = applyPhysicalRules(
         result.newPlan,
-        queryStageOptimizerRules ++ postStageCreationRules,
+        finalStageOptimizerRules ++ postStageCreationRules,
         Some((planChangeLogger, "AQE Final Query Stage Optimization")))
       isFinalPlan = true
       executionId.foreach(onUpdatePlan(_, Seq(currentPhysicalPlan)))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index 0dfb1d2fd9eda..38a323b1c057e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -26,15 +26,19 @@ import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent, SparkListe
 import org.apache.spark.sql.{Dataset, QueryTest, Row, SparkSession, Strategy}
 import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight}
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
-import org.apache.spark.sql.execution.{PartialReducerPartitionSpec, ReusedSubqueryExec, ShuffledRowRDD, SparkPlan}
+import org.apache.spark.sql.execution.{PartialReducerPartitionSpec, QueryExecution, ReusedSubqueryExec, ShuffledRowRDD, SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.execution.command.DataWritingCommandExec
+import org.apache.spark.sql.execution.datasources.noop.NoopDataSource
+import org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec
 import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, Exchange, ReusedExchangeExec, ShuffleExchangeExec}
 import org.apache.spark.sql.execution.joins.{BaseJoinExec, BroadcastHashJoinExec, SortMergeJoinExec}
 import org.apache.spark.sql.execution.ui.SparkListenerSQLAdaptiveExecutionUpdate
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.PartitionOverwriteMode
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{IntegerType, StructType}
+import org.apache.spark.sql.util.QueryExecutionListener
 import org.apache.spark.util.Utils
 
 class AdaptiveQueryExecSuite
@@ -1258,4 +1262,49 @@ class AdaptiveQueryExecSuite
       }
     }
   }
+
+  test("SPARK-32932: Do not use local shuffle reader at final stage on write command") {
+    withSQLConf(SQLConf.PARTITION_OVERWRITE_MODE.key -> PartitionOverwriteMode.DYNAMIC.toString,
+      SQLConf.SHUFFLE_PARTITIONS.key -> "5",
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
+      val data = for (
+        i <- 1L to 10L;
+        j <- 1L to 3L
+      ) yield (i, j)
+
+      val df = data.toDF("i", "j").repartition($"j")
+      var noLocalReader: Boolean = false
+      val listener = new QueryExecutionListener {
+        override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = {
+          qe.executedPlan match {
+            case plan@(_: DataWritingCommandExec | _: V2TableWriteExec) =>
+              assert(plan.asInstanceOf[UnaryExecNode].child.isInstanceOf[AdaptiveSparkPlanExec])
+              noLocalReader = collect(plan) {
+                case exec: CustomShuffleReaderExec if exec.isLocalReader => exec
+              }.isEmpty
+            case _ => // ignore other events
+          }
+        }
+        override def onFailure(funcName: String, qe: QueryExecution,
+          exception: Exception): Unit = {}
+      }
+      spark.listenerManager.register(listener)
+
+      withTable("t") {
+        df.write.partitionBy("j").saveAsTable("t")
+        sparkContext.listenerBus.waitUntilEmpty()
+        assert(noLocalReader)
+        noLocalReader = false
+      }
+
+      // Test DataSource v2
+      val format = classOf[NoopDataSource].getName
+      df.write.format(format).mode("overwrite").save()
+      sparkContext.listenerBus.waitUntilEmpty()
+      assert(noLocalReader)
+      noLocalReader = false
+
+      spark.listenerManager.unregister(listener)
+    }
+  }
 }

From 8e7c39089f885413f5e5e1bdafc2d426291a8719 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Thu, 15 Oct 2020 01:51:01 -0700
Subject: [PATCH 0240/1009] [SPARK-33155][K8S]
 spark.kubernetes.pyspark.pythonVersion allows only '3'

### What changes were proposed in this pull request?

This PR makes `spark.kubernetes.pyspark.pythonVersion` allow only `3`. In other words, it will reject `2` for `Python 2`.
- [x] Configuration description and check is updated.
- [x] Documentation is updated
- [x] Unit test cases are updated.
- [x] Docker image script is updated.

### Why are the changes needed?

After SPARK-32138, Apache Spark 3.1 dropped Python 2 support.

### Does this PR introduce _any_ user-facing change?

Yes, but Python 2 support is already dropped officially.

### How was this patch tested?

Pass the CI.

Closes #30049 from dongjoon-hyun/SPARK-DROP-PYTHON2.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/running-on-kubernetes.md                              | 2 +-
 .../main/scala/org/apache/spark/deploy/k8s/Config.scala    | 6 +++---
 .../k8s/features/DriverCommandFeatureStepSuite.scala       | 4 +---
 .../docker/src/main/dockerfiles/spark/entrypoint.sh        | 7 +------
 .../deploy/k8s/integrationtest/DecommissionSuite.scala     | 1 -
 .../deploy/k8s/integrationtest/PythonTestsSuite.scala      | 4 +---
 .../kubernetes/integration-tests/tests/pyfiles.py          | 2 +-
 7 files changed, 8 insertions(+), 18 deletions(-)

diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index e9c292d21fd47..3bd1c410e8433 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -1087,7 +1087,7 @@ See the [configuration page](configuration.html) for information on Spark config
   <td><code>spark.kubernetes.pyspark.pythonVersion</code></td>
   <td><code>"3"</code></td>
   <td>
-   This sets the major Python version of the docker image used to run the driver and executor containers. Can either be 2 or 3.
+   This sets the major Python version of the docker image used to run the driver and executor containers. Can be 3.
   </td>
   <td>2.4.0</td>
 </tr>
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index d6dc56f9d9d1b..00eaff452ba45 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -285,11 +285,11 @@ private[spark] object Config extends Logging {
 
   val PYSPARK_MAJOR_PYTHON_VERSION =
     ConfigBuilder("spark.kubernetes.pyspark.pythonVersion")
-      .doc("This sets the major Python version. Either 2 or 3. (Python2 or Python3)")
+      .doc("This sets the major Python version. Only 3 is available for Python3.")
       .version("2.4.0")
       .stringConf
-      .checkValue(pv => List("2", "3").contains(pv),
-        "Ensure that major Python version is either Python2 or Python3")
+      .checkValue(pv => List("3").contains(pv),
+        "Ensure that major Python version is Python3")
       .createWithDefault("3")
 
   val KUBERNETES_KERBEROS_KRB5_FILE =
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala
index 829943f16beac..6a7366e9c6b7a 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala
@@ -43,7 +43,6 @@ class DriverCommandFeatureStepSuite extends SparkFunSuite {
   test("python resource") {
     val mainResource = "local:/main.py"
     val sparkConf = new SparkConf(false)
-      .set(PYSPARK_MAJOR_PYTHON_VERSION, "2")
     val spec = applyFeatureStep(
       PythonMainAppResource(mainResource),
       conf = sparkConf,
@@ -58,7 +57,7 @@ class DriverCommandFeatureStepSuite extends SparkFunSuite {
     val envs = spec.pod.container.getEnv.asScala
       .map { env => (env.getName, env.getValue) }
       .toMap
-    val expected = Map(ENV_PYSPARK_MAJOR_PYTHON_VERSION -> "2")
+    val expected = Map(ENV_PYSPARK_MAJOR_PYTHON_VERSION -> "3")
     assert(envs === expected)
   }
 
@@ -93,7 +92,6 @@ class DriverCommandFeatureStepSuite extends SparkFunSuite {
   test("SPARK-25355: python resource args with proxy-user") {
     val mainResource = "local:/main.py"
     val sparkConf = new SparkConf(false)
-      .set(PYSPARK_MAJOR_PYTHON_VERSION, "2")
     val spec = applyFeatureStep(
       PythonMainAppResource(mainResource),
       conf = sparkConf,
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
index 813a70c6e7ec3..d605ae43c024f 100755
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
@@ -44,12 +44,7 @@ if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then
   SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH"
 fi
 
-if [ "$PYSPARK_MAJOR_PYTHON_VERSION" == "2" ]; then
-    pyv="$(python -V 2>&1)"
-    export PYTHON_VERSION="${pyv:7}"
-    export PYSPARK_PYTHON="python"
-    export PYSPARK_DRIVER_PYTHON="python"
-elif [ "$PYSPARK_MAJOR_PYTHON_VERSION" == "3" ]; then
+if [ "$PYSPARK_MAJOR_PYTHON_VERSION" == "3" ]; then
     pyv3="$(python3 -V 2>&1)"
     export PYTHON_VERSION="${pyv3:7}"
     export PYSPARK_PYTHON="python3"
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
index 6e42819b1779e..fd14b12b112d3 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
@@ -26,7 +26,6 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite =>
   test("Test basic decommissioning", k8sTestTag) {
     sparkAppConf
       .set(config.DECOMMISSION_ENABLED.key, "true")
-      .set("spark.kubernetes.pyspark.pythonVersion", "3")
       .set("spark.kubernetes.container.image", pyImage)
       .set(config.STORAGE_DECOMMISSION_ENABLED.key, "true")
       .set(config.STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED.key, "true")
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PythonTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PythonTestsSuite.scala
index b16ccb429074f..bad6f1c1021ba 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PythonTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PythonTestsSuite.scala
@@ -35,10 +35,9 @@ private[spark] trait PythonTestsSuite { k8sSuite: KubernetesSuite =>
       isJVM = false)
   }
 
-  test("Run PySpark with Python3 to test a pyfiles example", k8sTestTag) {
+  test("Run PySpark to test a pyfiles example", k8sTestTag) {
     sparkAppConf
       .set("spark.kubernetes.container.image", pyImage)
-      .set("spark.kubernetes.pyspark.pythonVersion", "3")
     runSparkApplicationAndVerifyCompletion(
       appResource = PYSPARK_FILES,
       mainClass = "",
@@ -57,7 +56,6 @@ private[spark] trait PythonTestsSuite { k8sSuite: KubernetesSuite =>
   test("Run PySpark with memory customization", k8sTestTag) {
     sparkAppConf
       .set("spark.kubernetes.container.image", pyImage)
-      .set("spark.kubernetes.pyspark.pythonVersion", "3")
       .set("spark.kubernetes.memoryOverheadFactor", s"$memOverheadConstant")
       .set("spark.executor.pyspark.memory", s"${additionalMemory}m")
     runSparkApplicationAndVerifyCompletion(
diff --git a/resource-managers/kubernetes/integration-tests/tests/pyfiles.py b/resource-managers/kubernetes/integration-tests/tests/pyfiles.py
index 51c0160554866..73c53be482c03 100644
--- a/resource-managers/kubernetes/integration-tests/tests/pyfiles.py
+++ b/resource-managers/kubernetes/integration-tests/tests/pyfiles.py
@@ -31,7 +31,7 @@
 
     from py_container_checks import version_check
     # Begin of Python container checks
-    version_check(sys.argv[1], 2 if sys.argv[1] == "python" else 3)
+    version_check(sys.argv[1], 3)
 
     # Check python executable at executors
     spark.udf.register("get_sys_ver",

From e85ed8a14c7766ea0fafc32fd9c6ac95c86c8c8f Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Thu, 15 Oct 2020 02:24:49 -0700
Subject: [PATCH 0241/1009] [SPARK-33156][INFRA] Upgrade GithubAction image
 from 18.04 to 20.04

### What changes were proposed in this pull request?

This PR aims to upgrade `Github Action` runner image from `Ubuntu 18.04 (LTS)` to `Ubuntu 20.04 (LTS)`.

### Why are the changes needed?

`ubuntu-latest` in `GitHub Action` is still `Ubuntu 18.04 (LTS)`.
- https://github.com/actions/virtual-environments#available-environments

This upgrade will help Apache Spark 3.1+ preparation for vote and release on the latest OS.

This is tested here.
- https://github.com/dongjoon-hyun/spark/pull/36

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the `Github Action` in this PR.

Closes #30050 from dongjoon-hyun/ubuntu_20.04.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .github/workflows/build_and_test.yml | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 667371dacf5dc..cd2f01ba7e846 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -17,7 +17,8 @@ jobs:
   # Build: build Spark and run the tests for specified modules.
   build:
     name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
-    runs-on: ubuntu-latest
+    # Ubuntu 20.04 is the latest LTS. The next LTS is 22.04.
+    runs-on: ubuntu-20.04
     strategy:
       fail-fast: false
       matrix:
@@ -204,7 +205,7 @@ jobs:
   # Static analysis, and documentation build
   lint:
     name: Linters, licenses, dependencies and documentation generation
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v2
@@ -271,7 +272,7 @@ jobs:
 
   java11:
     name: Java 11 build
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v2
@@ -296,7 +297,7 @@ jobs:
 
   scala-213:
     name: Scala 2.13 build
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-20.04
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v2

From 513b6f5af2b873ca8737fd7f0c42fdfd4fa24292 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Thu, 15 Oct 2020 20:51:20 +0900
Subject: [PATCH 0242/1009] [SPARK-33079][TESTS] Replace the existing Maven job
 for Scala 2.13 in Github Actions with SBT job

### What changes were proposed in this pull request?

SPARK-32926 added a build test to GitHub Action for Scala 2.13 but it's only with Maven.
As SPARK-32873 reported, some compilation error happens only with SBT so I think we need to add another build test to GitHub Action for SBT.
Unfortunately, we don't have abundant resources for GitHub Actions so instead of just adding the new SBT job, let's replace the existing Maven job with the new SBT job for Scala 2.13.

### Why are the changes needed?

To ensure build test passes even with SBT for Scala 2.13.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

GitHub Actions' job.

Closes #29958 from sarutak/add-sbt-job-for-scala-2.13.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .github/workflows/build_and_test.yml             | 16 ++++++----------
 .../kinesis/KinesisBackedBlockRDD.scala          |  2 +-
 2 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index cd2f01ba7e846..5f2dc52cc7893 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -301,22 +301,18 @@ jobs:
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v2
-    - name: Cache Maven local repository
+    - name: Cache Ivy local repository
       uses: actions/cache@v2
       with:
-        path: ~/.m2/repository
-        key: scala-213-maven-${{ hashFiles('**/pom.xml') }}
+        path: ~/.ivy2/cache
+        key: scala-213-ivy-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
         restore-keys: |
-          scala-213-maven-
+          scala-213-ivy-
     - name: Install Java 11
       uses: actions/setup-java@v1
       with:
         java-version: 11
-    - name: Build with Maven
+    - name: Build with SBT
       run: |
-        export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
-        export MAVEN_CLI_OPTS="--no-transfer-progress"
-        mkdir -p ~/.m2
         ./dev/change-scala-version.sh 2.13
-        ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 -Pscala-2.13 install
-        rm -rf ~/.m2/repository/org/apache/spark
+        ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Djava.version=11 -Pscala-2.13 compile test:compile
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala
index 5072b3ae21d87..ab55d545770e9 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala
@@ -91,7 +91,7 @@ class KinesisBackedBlockRDD[T: ClassTag](
   require(_blockIds.length == arrayOfseqNumberRanges.length,
     "Number of blockIds is not equal to the number of sequence number ranges")
 
-  override def isValid(): Boolean = true
+  override def isValid: Boolean = true
 
   override def getPartitions: Array[Partition] = {
     Array.tabulate(_blockIds.length) { i =>

From 31f7097ce0d7eade17a96fe01184e62a88fd2bbd Mon Sep 17 00:00:00 2001
From: Huaxin Gao <huaxing@us.ibm.com>
Date: Thu, 15 Oct 2020 15:33:23 +0000
Subject: [PATCH 0243/1009] [SPARK-32402][SQL][FOLLOW-UP] Use quoted column
 name for JDBCTableCatalog.alterTable

### What changes were proposed in this pull request?
I currently have unquoted column names in alter table, e.g. ```ALTER TABLE "test"."alt_table" DROP COLUMN c1```
should change to quoted column name ```ALTER TABLE "test"."alt_table" DROP COLUMN "c1"```

### Why are the changes needed?
We should always use quoted identifiers in JDBC SQLs, e.g. ```CREATE TABLE "test"."abc" ("col" INTEGER )  ``` or ```INSERT INTO "test"."abc" ("col") VALUES (?)```. Using unquoted column name in alterTable causes problems, for example:
```
sql("CREATE TABLE h2.test.alt_table (c1 INTEGER, c2 INTEGER) USING _")
sql("ALTER TABLE h2.test.alt_table DROP COLUMN c1")

org.apache.spark.sql.AnalysisException: Failed table altering: test.alt_table;
......

Caused by: org.h2.jdbc.JdbcSQLException: Column "C1" not found; SQL statement:
ALTER TABLE "test"."alt_table" DROP COLUMN c1 [42122-195]

```

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing tests

Closes #30041 from huaxingao/alter_table_followup.

Authored-by: Huaxin Gao <huaxing@us.ibm.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/jdbc/DB2Dialect.scala    |  5 ++--
 .../apache/spark/sql/jdbc/JdbcDialects.scala  | 25 ++++++++++--------
 .../apache/spark/sql/jdbc/OracleDialect.scala | 11 +++++---
 .../v2/jdbc/JDBCTableCatalogSuite.scala       | 26 +++++++++++--------
 4 files changed, 39 insertions(+), 28 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
index 908e03726d887..0b394db5c8932 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
@@ -66,7 +66,8 @@ private object DB2Dialect extends JdbcDialect {
       tableName: String,
       columnName: String,
       newDataType: String): String =
-    s"ALTER TABLE $tableName ALTER COLUMN $columnName SET DATA TYPE $newDataType"
+    s"ALTER TABLE $tableName ALTER COLUMN ${quoteIdentifier(columnName)}" +
+      s" SET DATA TYPE $newDataType"
 
   // scalastyle:off line.size.limit
   // See https://www.ibm.com/support/knowledgecenter/en/SSEPGG_11.5.0/com.ibm.db2.luw.sql.ref.doc/doc/r0000888.html
@@ -76,6 +77,6 @@ private object DB2Dialect extends JdbcDialect {
       columnName: String,
       isNullable: Boolean): String = {
     val nullable = if (isNullable) "DROP NOT NULL" else "SET NOT NULL"
-    s"ALTER TABLE $tableName ALTER COLUMN $columnName $nullable"
+    s"ALTER TABLE $tableName ALTER COLUMN ${quoteIdentifier(columnName)} $nullable"
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index 5f8d788bc7a22..e0703195051dc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -200,7 +200,6 @@ abstract class JdbcDialect extends Serializable {
 
   /**
    * Alter an existing table.
-   * TODO (SPARK-32523): Override this method in the dialects that have different syntax.
    *
    * @param tableName The name of the table to be altered.
    * @param changes Changes to apply to the table.
@@ -216,10 +215,10 @@ abstract class JdbcDialect extends Serializable {
           updateClause += getAddColumnQuery(tableName, name(0), dataType)
         case rename: RenameColumn if rename.fieldNames.length == 1 =>
           val name = rename.fieldNames
-          updateClause += s"ALTER TABLE $tableName RENAME COLUMN ${name(0)} TO ${rename.newName}"
+          updateClause += getRenameColumnQuery(tableName, name(0), rename.newName)
         case delete: DeleteColumn if delete.fieldNames.length == 1 =>
           val name = delete.fieldNames
-          updateClause += s"ALTER TABLE $tableName DROP COLUMN ${name(0)}"
+          updateClause += getDeleteColumnQuery(tableName, name(0))
         case updateColumnType: UpdateColumnType if updateColumnType.fieldNames.length == 1 =>
           val name = updateColumnType.fieldNames
           val dataType = JdbcUtils.getJdbcType(updateColumnType.newDataType(), this)
@@ -227,7 +226,6 @@ abstract class JdbcDialect extends Serializable {
           updateClause += getUpdateColumnTypeQuery(tableName, name(0), dataType)
         case updateNull: UpdateColumnNullability if updateNull.fieldNames.length == 1 =>
           val name = updateNull.fieldNames
-          val nullable = if (updateNull.nullable()) "NULL" else "NOT NULL"
           updateClause += getUpdateColumnNullabilityQuery(tableName, name(0), updateNull.nullable())
         case _ =>
           throw new SQLFeatureNotSupportedException(s"Unsupported TableChange $change")
@@ -236,23 +234,28 @@ abstract class JdbcDialect extends Serializable {
     updateClause.result()
   }
 
-  def getAddColumnQuery(tableName: String, columnName: String, dataType: String): String = {
-    s"ALTER TABLE $tableName ADD COLUMN $columnName $dataType"
-  }
+  def getAddColumnQuery(tableName: String, columnName: String, dataType: String): String =
+    s"ALTER TABLE $tableName ADD COLUMN ${quoteIdentifier(columnName)} $dataType"
+
+  def getRenameColumnQuery(tableName: String, columnName: String, newName: String): String =
+    s"ALTER TABLE $tableName RENAME COLUMN ${quoteIdentifier(columnName)} TO" +
+      s" ${quoteIdentifier(newName)}"
+
+  def getDeleteColumnQuery(tableName: String, columnName: String): String =
+    s"ALTER TABLE $tableName DROP COLUMN ${quoteIdentifier(columnName)}"
 
   def getUpdateColumnTypeQuery(
       tableName: String,
       columnName: String,
-      newDataType: String): String = {
-    s"ALTER TABLE $tableName ALTER COLUMN $columnName $newDataType"
-  }
+      newDataType: String): String =
+    s"ALTER TABLE $tableName ALTER COLUMN ${quoteIdentifier(columnName)} $newDataType"
 
   def getUpdateColumnNullabilityQuery(
       tableName: String,
       columnName: String,
       isNullable: Boolean): String = {
     val nullable = if (isNullable) "NULL" else "NOT NULL"
-    s"ALTER TABLE $tableName ALTER COLUMN $columnName SET $nullable"
+    s"ALTER TABLE $tableName ALTER COLUMN ${quoteIdentifier(columnName)} SET $nullable"
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
index 128b90a190481..491b6e29ecf2c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
@@ -120,21 +120,24 @@ private case object OracleDialect extends JdbcDialect {
   }
 
   // see https://docs.oracle.com/cd/B28359_01/server.111/b28286/statements_3001.htm#SQLRF01001
-  override def getAddColumnQuery(tableName: String, columnName: String, dataType: String): String =
-    s"ALTER TABLE $tableName ADD $columnName $dataType"
+  override def getAddColumnQuery(
+      tableName: String,
+      columnName: String,
+      dataType: String): String =
+    s"ALTER TABLE $tableName ADD ${quoteIdentifier(columnName)} $dataType"
 
   // see https://docs.oracle.com/cd/B28359_01/server.111/b28286/statements_3001.htm#SQLRF01001
   override def getUpdateColumnTypeQuery(
     tableName: String,
     columnName: String,
     newDataType: String): String =
-    s"ALTER TABLE $tableName MODIFY $columnName $newDataType"
+    s"ALTER TABLE $tableName MODIFY ${quoteIdentifier(columnName)} $newDataType"
 
   override def getUpdateColumnNullabilityQuery(
     tableName: String,
     columnName: String,
     isNullable: Boolean): String = {
     val nullable = if (isNullable) "NULL" else "NOT NULL"
-    s"ALTER TABLE $tableName MODIFY $columnName $nullable"
+    s"ALTER TABLE $tableName MODIFY ${quoteIdentifier(columnName)} $nullable"
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
index 8fe58e3a0a28a..209f5609e447f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
@@ -178,15 +178,15 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
         .add("C1", IntegerType)
         .add("C2", StringType)
       assert(t.schema === expectedSchema)
-      sql("ALTER TABLE h2.test.alt_table ADD COLUMNS (C3 DOUBLE)")
+      sql("ALTER TABLE h2.test.alt_table ADD COLUMNS (c3 DOUBLE)")
       t = spark.table("h2.test.alt_table")
-      expectedSchema = expectedSchema.add("C3", DoubleType)
+      expectedSchema = expectedSchema.add("c3", DoubleType)
       assert(t.schema === expectedSchema)
       // Add already existing column
       val msg = intercept[AnalysisException] {
-        sql("ALTER TABLE h2.test.alt_table ADD COLUMNS (C3 DOUBLE)")
+        sql("ALTER TABLE h2.test.alt_table ADD COLUMNS (c3 DOUBLE)")
       }.getMessage
-      assert(msg.contains("Cannot add column, because C3 already exists"))
+      assert(msg.contains("Cannot add column, because c3 already exists"))
     }
     // Add a column to not existing table and namespace
     Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table =>
@@ -199,8 +199,8 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
 
   test("alter table ... rename column") {
     withTable("h2.test.alt_table") {
-      sql("CREATE TABLE h2.test.alt_table (ID INTEGER, C0 INTEGER) USING _")
-      sql("ALTER TABLE h2.test.alt_table RENAME COLUMN ID TO C")
+      sql("CREATE TABLE h2.test.alt_table (id INTEGER, C0 INTEGER) USING _")
+      sql("ALTER TABLE h2.test.alt_table RENAME COLUMN id TO C")
       val t = spark.table("h2.test.alt_table")
       val expectedSchema = new StructType()
         .add("C", IntegerType)
@@ -223,8 +223,9 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
 
   test("alter table ... drop column") {
     withTable("h2.test.alt_table") {
-      sql("CREATE TABLE h2.test.alt_table (C1 INTEGER, C2 INTEGER) USING _")
+      sql("CREATE TABLE h2.test.alt_table (C1 INTEGER, C2 INTEGER, c3 INTEGER) USING _")
       sql("ALTER TABLE h2.test.alt_table DROP COLUMN C1")
+      sql("ALTER TABLE h2.test.alt_table DROP COLUMN c3")
       val t = spark.table("h2.test.alt_table")
       val expectedSchema = new StructType().add("C2", IntegerType)
       assert(t.schema === expectedSchema)
@@ -245,10 +246,11 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
 
   test("alter table ... update column type") {
     withTable("h2.test.alt_table") {
-      sql("CREATE TABLE h2.test.alt_table (ID INTEGER) USING _")
+      sql("CREATE TABLE h2.test.alt_table (ID INTEGER, deptno INTEGER) USING _")
       sql("ALTER TABLE h2.test.alt_table ALTER COLUMN id TYPE DOUBLE")
+      sql("ALTER TABLE h2.test.alt_table ALTER COLUMN deptno TYPE DOUBLE")
       val t = spark.table("h2.test.alt_table")
-      val expectedSchema = new StructType().add("ID", DoubleType)
+      val expectedSchema = new StructType().add("ID", DoubleType).add("deptno", DoubleType)
       assert(t.schema === expectedSchema)
       // Update not existing column
       val msg1 = intercept[AnalysisException] {
@@ -272,10 +274,12 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
 
   test("alter table ... update column nullability") {
     withTable("h2.test.alt_table") {
-      sql("CREATE TABLE h2.test.alt_table (ID INTEGER NOT NULL) USING _")
+      sql("CREATE TABLE h2.test.alt_table (ID INTEGER NOT NULL, deptno INTEGER NOT NULL) USING _")
       sql("ALTER TABLE h2.test.alt_table ALTER COLUMN ID DROP NOT NULL")
+      sql("ALTER TABLE h2.test.alt_table ALTER COLUMN deptno DROP NOT NULL")
       val t = spark.table("h2.test.alt_table")
-      val expectedSchema = new StructType().add("ID", IntegerType, nullable = true)
+      val expectedSchema = new StructType()
+        .add("ID", IntegerType, nullable = true).add("deptno", IntegerType, nullable = true)
       assert(t.schema === expectedSchema)
       // Update nullability of not existing column
       val msg = intercept[AnalysisException] {

From b089fe5376d72ccd0a6724ac9aa2386c5a81b06b Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Thu, 15 Oct 2020 09:08:14 -0700
Subject: [PATCH 0244/1009] [SPARK-32247][INFRA] Install and test scipy with
 PyPy in GitHub Actions

### What changes were proposed in this pull request?

This PR proposes to install `scipy` as well in PyPy. It will test several ML specific test cases in PyPy as well. For example, https://github.com/apache/spark/blob/31a16fbb405a19dc3eb732347e0e1f873b16971d/python/pyspark/mllib/tests/test_linalg.py#L487

It was not installed when GitHub Actions build was added because it failed to install for an unknown reason. Seems like it's fixed in the latest scipy.

### Why are the changes needed?

To improve test coverage.

### Does this PR introduce _any_ user-facing change?

No, dev-only.

### How was this patch tested?

GitHub Actions build in this PR will test it out.

Closes #30054 from HyukjinKwon/SPARK-32247.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .github/workflows/build_and_test.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 5f2dc52cc7893..9b7026eeca4c8 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -155,12 +155,11 @@ jobs:
     - name: Install Python packages (Python 3.6 and PyPy3)
       if: contains(matrix.modules, 'pyspark')
       # PyArrow is not supported in PyPy yet, see ARROW-2651.
-      # TODO(SPARK-32247): scipy installation with PyPy fails for an unknown reason.
       run: |
         python3.6 -m pip install numpy pyarrow pandas scipy xmlrunner
         python3.6 -m pip list
         # PyPy does not have xmlrunner
-        pypy3 -m pip install numpy pandas
+        pypy3 -m pip install numpy pandas scipy
         pypy3 -m pip list
     - name: Install Python packages (Python 3.8)
       if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))

From 82eea13c7686fb4bfbe8fb4185db81438d2ea884 Mon Sep 17 00:00:00 2001
From: Min Shen <mshen@linkedin.com>
Date: Thu, 15 Oct 2020 12:34:52 -0500
Subject: [PATCH 0245/1009] [SPARK-32915][CORE] Network-layer and shuffle RPC
 layer changes to support push shuffle blocks

### What changes were proposed in this pull request?

This is the first patch for SPIP SPARK-30602 for push-based shuffle.
Summary of changes:
* Introduce new API in ExternalBlockStoreClient to push blocks to a remote shuffle service.
* Leveraging the streaming upload functionality in SPARK-6237, it also enables the ExternalBlockHandler to delegate the handling of block push requests to MergedShuffleFileManager.
* Propose the API for MergedShuffleFileManager, where the core logic on the shuffle service side to handle block push requests is defined. The actual implementation of this API is deferred into a later RB to restrict the size of this PR.
* Introduce OneForOneBlockPusher to enable pushing blocks to remote shuffle services in shuffle RPC layer.
* New protocols in shuffle RPC layer to support the functionalities.

### Why are the changes needed?

Refer to the SPIP in SPARK-30602

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Added unit tests.
The reference PR with the consolidated changes covering the complete implementation is also provided in SPARK-30602.
We have already verified the functionality and the improved performance as documented in the SPIP doc.

Lead-authored-by: Min Shen <mshenlinkedin.com>
Co-authored-by: Chandni Singh <chsinghlinkedin.com>
Co-authored-by: Ye Zhou <yezhoulinkedin.com>

Closes #29855 from Victsm/SPARK-32915.

Lead-authored-by: Min Shen <mshen@linkedin.com>
Co-authored-by: Chandni Singh <chsingh@linkedin.com>
Co-authored-by: Ye Zhou <yezhou@linkedin.com>
Co-authored-by: Chandni Singh <singh.chandni@gmail.com>
Co-authored-by: Min Shen <victor.nju@gmail.com>
Signed-off-by: Mridul Muralidharan <mridul<at>gmail.com>
---
 common/network-common/pom.xml                 |   4 +
 .../spark/network/protocol/Encoders.java      |  63 +++++++
 common/network-shuffle/pom.xml                |   9 +
 .../network/shuffle/BlockStoreClient.java     |  21 +++
 .../spark/network/shuffle/ErrorHandler.java   |  85 ++++++++++
 .../network/shuffle/ExternalBlockHandler.java | 104 +++++++++++-
 .../shuffle/ExternalBlockStoreClient.java     |  52 +++++-
 .../network/shuffle/MergedBlockMeta.java      |  64 +++++++
 .../shuffle/MergedShuffleFileManager.java     | 116 +++++++++++++
 .../network/shuffle/OneForOneBlockPusher.java | 123 ++++++++++++++
 .../network/shuffle/RetryingBlockFetcher.java |  27 ++-
 .../protocol/BlockTransferMessage.java        |   6 +-
 .../protocol/FinalizeShuffleMerge.java        |  84 +++++++++
 .../shuffle/protocol/MergeStatuses.java       | 118 +++++++++++++
 .../shuffle/protocol/PushBlockStream.java     |  95 +++++++++++
 .../network/shuffle/ErrorHandlerSuite.java    |  51 ++++++
 .../shuffle/ExternalBlockHandlerSuite.java    |  40 ++++-
 .../shuffle/OneForOneBlockPusherSuite.java    | 159 ++++++++++++++++++
 .../ExternalShuffleServiceMetricsSuite.scala  |   3 +-
 .../yarn/YarnShuffleServiceMetricsSuite.scala |   2 +-
 .../yarn/YarnShuffleServiceSuite.scala        |   1 +
 21 files changed, 1212 insertions(+), 15 deletions(-)
 create mode 100644 common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java
 create mode 100644 common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedBlockMeta.java
 create mode 100644 common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedShuffleFileManager.java
 create mode 100644 common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockPusher.java
 create mode 100644 common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FinalizeShuffleMerge.java
 create mode 100644 common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/MergeStatuses.java
 create mode 100644 common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/PushBlockStream.java
 create mode 100644 common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ErrorHandlerSuite.java
 create mode 100644 common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockPusherSuite.java

diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 9d5bc9aae0719..d328a7de0a762 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -91,6 +91,10 @@
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-crypto</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.roaringbitmap</groupId>
+      <artifactId>RoaringBitmap</artifactId>
+    </dependency>
 
     <!-- Test dependencies -->
     <dependency>
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/Encoders.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/Encoders.java
index 490915f6de4b3..4fa191b3917e3 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/Encoders.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/Encoders.java
@@ -17,9 +17,11 @@
 
 package org.apache.spark.network.protocol;
 
+import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 
 import io.netty.buffer.ByteBuf;
+import org.roaringbitmap.RoaringBitmap;
 
 /** Provides a canonical set of Encoders for simple types. */
 public class Encoders {
@@ -44,6 +46,40 @@ public static String decode(ByteBuf buf) {
     }
   }
 
+  /** Bitmaps are encoded with their serialization length followed by the serialization bytes. */
+  public static class Bitmaps {
+    public static int encodedLength(RoaringBitmap b) {
+      // Compress the bitmap before serializing it. Note that since BlockTransferMessage
+      // needs to invoke encodedLength first to figure out the length for the ByteBuf, it
+      // guarantees that the bitmap will always be compressed before being serialized.
+      b.trim();
+      b.runOptimize();
+      return b.serializedSizeInBytes();
+    }
+
+    public static void encode(ByteBuf buf, RoaringBitmap b) {
+      int encodedLength = b.serializedSizeInBytes();
+      // RoaringBitmap requires nio ByteBuffer for serde. We expose the netty ByteBuf as a nio
+      // ByteBuffer. Here, we need to explicitly manage the index so we can write into the
+      // ByteBuffer, and the write is reflected in the underneath ByteBuf.
+      b.serialize(buf.nioBuffer(buf.writerIndex(), encodedLength));
+      buf.writerIndex(buf.writerIndex() + encodedLength);
+    }
+
+    public static RoaringBitmap decode(ByteBuf buf) {
+      RoaringBitmap bitmap = new RoaringBitmap();
+      try {
+        bitmap.deserialize(buf.nioBuffer());
+        // RoaringBitmap deserialize does not advance the reader index of the underlying ByteBuf.
+        // Manually update the index here.
+        buf.readerIndex(buf.readerIndex() + bitmap.serializedSizeInBytes());
+      } catch (IOException e) {
+        throw new RuntimeException("Exception while decoding bitmap", e);
+      }
+      return bitmap;
+    }
+  }
+
   /** Byte arrays are encoded with their length followed by bytes. */
   public static class ByteArrays {
     public static int encodedLength(byte[] arr) {
@@ -135,4 +171,31 @@ public static long[] decode(ByteBuf buf) {
       return longs;
     }
   }
+
+  /** Bitmap arrays are encoded with the number of bitmaps followed by per-Bitmap encoding. */
+  public static class BitmapArrays {
+    public static int encodedLength(RoaringBitmap[] bitmaps) {
+      int totalLength = 4;
+      for (RoaringBitmap b : bitmaps) {
+        totalLength += Bitmaps.encodedLength(b);
+      }
+      return totalLength;
+    }
+
+    public static void encode(ByteBuf buf, RoaringBitmap[] bitmaps) {
+      buf.writeInt(bitmaps.length);
+      for (RoaringBitmap b : bitmaps) {
+        Bitmaps.encode(buf, b);
+      }
+    }
+
+    public static RoaringBitmap[] decode(ByteBuf buf) {
+      int numBitmaps = buf.readInt();
+      RoaringBitmap[] bitmaps = new RoaringBitmap[numBitmaps];
+      for (int i = 0; i < bitmaps.length; i ++) {
+        bitmaps[i] = Bitmaps.decode(buf);
+      }
+      return bitmaps;
+    }
+  }
 }
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 00f1defbb0093..a4a1ff92ef9a0 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -57,6 +57,10 @@
       <groupId>com.google.guava</groupId>
       <artifactId>guava</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.roaringbitmap</groupId>
+      <artifactId>RoaringBitmap</artifactId>
+    </dependency>
 
     <!-- Test dependencies -->
     <dependency>
@@ -93,6 +97,11 @@
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 
   <build>
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java
index e762bd2071632..37befcd4b67fa 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java
@@ -29,6 +29,7 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.apache.spark.network.buffer.ManagedBuffer;
 import org.apache.spark.network.client.RpcResponseCallback;
 import org.apache.spark.network.client.TransportClient;
 import org.apache.spark.network.client.TransportClientFactory;
@@ -135,4 +136,24 @@ public void onFailure(Throwable t) {
       hostLocalDirsCompletable.completeExceptionally(e);
     }
   }
+
+  /**
+   * Push a sequence of shuffle blocks in a best-effort manner to a remote node asynchronously.
+   * These shuffle blocks, along with blocks pushed by other clients, will be merged into
+   * per-shuffle partition merged shuffle files on the destination node.
+   *
+   * @param host the host of the remote node.
+   * @param port the port of the remote node.
+   * @param blockIds block ids to be pushed
+   * @param buffers buffers to be pushed
+   * @param listener the listener to receive block push status.
+   */
+  public void pushBlocks(
+      String host,
+      int port,
+      String[] blockIds,
+      ManagedBuffer[] buffers,
+      BlockFetchingListener listener) {
+    throw new UnsupportedOperationException();
+  }
 }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java
new file mode 100644
index 0000000000000..308b0b7a6b33b
--- /dev/null
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.net.ConnectException;
+
+import com.google.common.base.Throwables;
+
+/**
+ * Plugs into {@link RetryingBlockFetcher} to further control when an exception should be retried
+ * and logged.
+ * Note: {@link RetryingBlockFetcher} will delegate the exception to this handler only when
+ * - remaining retries < max retries
+ * - exception is an IOException
+ */
+
+public interface ErrorHandler {
+
+  boolean shouldRetryError(Throwable t);
+
+  default boolean shouldLogError(Throwable t) {
+    return true;
+  }
+
+  /**
+   * A no-op error handler instance.
+   */
+  ErrorHandler NOOP_ERROR_HANDLER = t -> true;
+
+  /**
+   * The error handler for pushing shuffle blocks to remote shuffle services.
+   */
+  class BlockPushErrorHandler implements ErrorHandler {
+    /**
+     * String constant used for generating exception messages indicating a block to be merged
+     * arrives too late on the server side, and also for later checking such exceptions on the
+     * client side. When we get a block push failure because of the block arrives too late, we
+     * will not retry pushing the block nor log the exception on the client side.
+     */
+    public static final String TOO_LATE_MESSAGE_SUFFIX =
+      "received after merged shuffle is finalized";
+
+    /**
+     * String constant used for generating exception messages indicating the server couldn't
+     * append a block after all available attempts due to collision with other blocks belonging
+     * to the same shuffle partition, and also for later checking such exceptions on the client
+     * side. When we get a block push failure because of the block couldn't be written due to
+     * this reason, we will not log the exception on the client side.
+     */
+    public static final String BLOCK_APPEND_COLLISION_DETECTED_MSG_PREFIX =
+      "Couldn't find an opportunity to write block";
+
+    @Override
+    public boolean shouldRetryError(Throwable t) {
+      // If it is a connection time out or a connection closed exception, no need to retry.
+      if (t.getCause() != null && t.getCause() instanceof ConnectException) {
+        return false;
+      }
+      // If the block is too late, there is no need to retry it
+      return !Throwables.getStackTraceAsString(t).contains(TOO_LATE_MESSAGE_SUFFIX);
+    }
+
+    @Override
+    public boolean shouldLogError(Throwable t) {
+      String errorStackTrace = Throwables.getStackTraceAsString(t);
+      return !errorStackTrace.contains(BLOCK_APPEND_COLLISION_DETECTED_MSG_PREFIX) &&
+        !errorStackTrace.contains(TOO_LATE_MESSAGE_SUFFIX);
+    }
+  }
+}
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java
index 33865a21ea914..321b25305c504 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java
@@ -32,6 +32,7 @@
 import com.codahale.metrics.Timer;
 import com.codahale.metrics.Counter;
 import com.google.common.annotations.VisibleForTesting;
+import org.apache.spark.network.client.StreamCallbackWithID;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -61,11 +62,21 @@ public class ExternalBlockHandler extends RpcHandler {
   final ExternalShuffleBlockResolver blockManager;
   private final OneForOneStreamManager streamManager;
   private final ShuffleMetrics metrics;
+  private final MergedShuffleFileManager mergeManager;
 
   public ExternalBlockHandler(TransportConf conf, File registeredExecutorFile)
     throws IOException {
     this(new OneForOneStreamManager(),
-      new ExternalShuffleBlockResolver(conf, registeredExecutorFile));
+      new ExternalShuffleBlockResolver(conf, registeredExecutorFile),
+      new NoOpMergedShuffleFileManager());
+  }
+
+  public ExternalBlockHandler(
+      TransportConf conf,
+      File registeredExecutorFile,
+      MergedShuffleFileManager mergeManager) throws IOException {
+    this(new OneForOneStreamManager(),
+      new ExternalShuffleBlockResolver(conf, registeredExecutorFile), mergeManager);
   }
 
   @VisibleForTesting
@@ -78,9 +89,19 @@ public ExternalShuffleBlockResolver getBlockResolver() {
   public ExternalBlockHandler(
       OneForOneStreamManager streamManager,
       ExternalShuffleBlockResolver blockManager) {
+    this(streamManager, blockManager, new NoOpMergedShuffleFileManager());
+  }
+
+  /** Enables mocking out the StreamManager, BlockManager, and MergeManager. */
+  @VisibleForTesting
+  public ExternalBlockHandler(
+      OneForOneStreamManager streamManager,
+      ExternalShuffleBlockResolver blockManager,
+      MergedShuffleFileManager mergeManager) {
     this.metrics = new ShuffleMetrics();
     this.streamManager = streamManager;
     this.blockManager = blockManager;
+    this.mergeManager = mergeManager;
   }
 
   @Override
@@ -89,6 +110,21 @@ public void receive(TransportClient client, ByteBuffer message, RpcResponseCallb
     handleMessage(msgObj, client, callback);
   }
 
+  @Override
+  public StreamCallbackWithID receiveStream(
+      TransportClient client,
+      ByteBuffer messageHeader,
+      RpcResponseCallback callback) {
+    BlockTransferMessage msgObj = BlockTransferMessage.Decoder.fromByteBuffer(messageHeader);
+    if (msgObj instanceof PushBlockStream) {
+      PushBlockStream message = (PushBlockStream) msgObj;
+      checkAuth(client, message.appId);
+      return mergeManager.receiveBlockDataAsStream(message);
+    } else {
+      throw new UnsupportedOperationException("Unexpected message with #receiveStream: " + msgObj);
+    }
+  }
+
   protected void handleMessage(
       BlockTransferMessage msgObj,
       TransportClient client,
@@ -139,6 +175,7 @@ protected void handleMessage(
         RegisterExecutor msg = (RegisterExecutor) msgObj;
         checkAuth(client, msg.appId);
         blockManager.registerExecutor(msg.appId, msg.execId, msg.executorInfo);
+        mergeManager.registerExecutor(msg.appId, msg.executorInfo.localDirs);
         callback.onSuccess(ByteBuffer.wrap(new byte[0]));
       } finally {
         responseDelayContext.stop();
@@ -156,6 +193,20 @@ protected void handleMessage(
       Map<String, String[]> localDirs = blockManager.getLocalDirs(msg.appId, msg.execIds);
       callback.onSuccess(new LocalDirsForExecutors(localDirs).toByteBuffer());
 
+    } else if (msgObj instanceof FinalizeShuffleMerge) {
+      final Timer.Context responseDelayContext =
+          metrics.finalizeShuffleMergeLatencyMillis.time();
+      FinalizeShuffleMerge msg = (FinalizeShuffleMerge) msgObj;
+      try {
+        checkAuth(client, msg.appId);
+        MergeStatuses statuses = mergeManager.finalizeShuffleMerge(msg);
+        callback.onSuccess(statuses.toByteBuffer());
+      } catch(IOException e) {
+        throw new RuntimeException(String.format("Error while finalizing shuffle merge "
+          + "for application %s shuffle %d", msg.appId, msg.shuffleId), e);
+      } finally {
+        responseDelayContext.stop();
+      }
     } else {
       throw new UnsupportedOperationException("Unexpected message: " + msgObj);
     }
@@ -225,6 +276,8 @@ public class ShuffleMetrics implements MetricSet {
     private final Timer openBlockRequestLatencyMillis = new Timer();
     // Time latency for executor registration latency in ms
     private final Timer registerExecutorRequestLatencyMillis = new Timer();
+    // Time latency for processing finalize shuffle merge request latency in ms
+    private final Timer finalizeShuffleMergeLatencyMillis = new Timer();
     // Block transfer rate in byte per second
     private final Meter blockTransferRateBytes = new Meter();
     // Number of active connections to the shuffle service
@@ -236,6 +289,7 @@ public ShuffleMetrics() {
       allMetrics = new HashMap<>();
       allMetrics.put("openBlockRequestLatencyMillis", openBlockRequestLatencyMillis);
       allMetrics.put("registerExecutorRequestLatencyMillis", registerExecutorRequestLatencyMillis);
+      allMetrics.put("finalizeShuffleMergeLatencyMillis", finalizeShuffleMergeLatencyMillis);
       allMetrics.put("blockTransferRateBytes", blockTransferRateBytes);
       allMetrics.put("registeredExecutorsSize",
                      (Gauge<Integer>) () -> blockManager.getRegisteredExecutorsSize());
@@ -373,6 +427,54 @@ public ManagedBuffer next() {
     }
   }
 
+  /**
+   * Dummy implementation of merged shuffle file manager. Suitable for when push-based shuffle
+   * is not enabled.
+   */
+  private static class NoOpMergedShuffleFileManager implements MergedShuffleFileManager {
+
+    @Override
+    public StreamCallbackWithID receiveBlockDataAsStream(PushBlockStream msg) {
+      throw new UnsupportedOperationException("Cannot handle shuffle block merge");
+    }
+
+    @Override
+    public MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) throws IOException {
+      throw new UnsupportedOperationException("Cannot handle shuffle block merge");
+    }
+
+    @Override
+    public void registerApplication(String appId, String user) {
+      // No-op. Do nothing.
+    }
+
+    @Override
+    public void registerExecutor(String appId, String[] localDirs) {
+      // No-Op. Do nothing.
+    }
+
+    @Override
+    public void applicationRemoved(String appId, boolean cleanupLocalDirs) {
+      throw new UnsupportedOperationException("Cannot handle shuffle block merge");
+    }
+
+    @Override
+    public ManagedBuffer getMergedBlockData(
+        String appId, int shuffleId, int reduceId, int chunkId) {
+      throw new UnsupportedOperationException("Cannot handle shuffle block merge");
+    }
+
+    @Override
+    public MergedBlockMeta getMergedBlockMeta(String appId, int shuffleId, int reduceId) {
+      throw new UnsupportedOperationException("Cannot handle shuffle block merge");
+    }
+
+    @Override
+    public String[] getMergedBlockDirs(String appId) {
+      throw new UnsupportedOperationException("Cannot handle shuffle block merge");
+    }
+  }
+
   @Override
   public void channelActive(TransportClient client) {
     metrics.activeConnections.inc();
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java
index 76e23e7c69d2d..eca35ed290467 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java
@@ -20,21 +20,24 @@
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.Arrays;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.Future;
 
 import com.codahale.metrics.MetricSet;
 import com.google.common.collect.Lists;
+
+import org.apache.spark.network.TransportContext;
+import org.apache.spark.network.buffer.ManagedBuffer;
 import org.apache.spark.network.client.RpcResponseCallback;
 import org.apache.spark.network.client.TransportClient;
 import org.apache.spark.network.client.TransportClientBootstrap;
-import org.apache.spark.network.shuffle.protocol.*;
-
-import org.apache.spark.network.TransportContext;
 import org.apache.spark.network.crypto.AuthClientBootstrap;
 import org.apache.spark.network.sasl.SecretKeyHolder;
 import org.apache.spark.network.server.NoOpRpcHandler;
+import org.apache.spark.network.shuffle.protocol.*;
 import org.apache.spark.network.util.TransportConf;
 
 /**
@@ -43,6 +46,8 @@
  * (via BlockTransferService), which has the downside of losing the data if we lose the executors.
  */
 public class ExternalBlockStoreClient extends BlockStoreClient {
+  private static final ErrorHandler PUSH_ERROR_HANDLER = new ErrorHandler.BlockPushErrorHandler();
+
   private final TransportConf conf;
   private final boolean authEnabled;
   private final SecretKeyHolder secretKeyHolder;
@@ -90,12 +95,12 @@ public void fetchBlocks(
     try {
       int maxRetries = conf.maxIORetries();
       RetryingBlockFetcher.BlockFetchStarter blockFetchStarter =
-          (blockIds1, listener1) -> {
+          (inputBlockId, inputListener) -> {
             // Unless this client is closed.
             if (clientFactory != null) {
               TransportClient client = clientFactory.createClient(host, port, maxRetries > 0);
               new OneForOneBlockFetcher(client, appId, execId,
-                blockIds1, listener1, conf, downloadFileManager).start();
+                inputBlockId, inputListener, conf, downloadFileManager).start();
             } else {
               logger.info("This clientFactory was closed. Skipping further block fetch retries.");
             }
@@ -116,6 +121,43 @@ public void fetchBlocks(
     }
   }
 
+  @Override
+  public void pushBlocks(
+      String host,
+      int port,
+      String[] blockIds,
+      ManagedBuffer[] buffers,
+      BlockFetchingListener listener) {
+    checkInit();
+    assert blockIds.length == buffers.length : "Number of block ids and buffers do not match.";
+
+    Map<String, ManagedBuffer> buffersWithId = new HashMap<>();
+    for (int i = 0; i < blockIds.length; i++) {
+      buffersWithId.put(blockIds[i], buffers[i]);
+    }
+    logger.debug("Push {} shuffle blocks to {}:{}", blockIds.length, host, port);
+    try {
+      RetryingBlockFetcher.BlockFetchStarter blockPushStarter =
+          (inputBlockId, inputListener) -> {
+            TransportClient client = clientFactory.createClient(host, port);
+            new OneForOneBlockPusher(client, appId, inputBlockId, inputListener, buffersWithId)
+              .start();
+          };
+      int maxRetries = conf.maxIORetries();
+      if (maxRetries > 0) {
+        new RetryingBlockFetcher(
+          conf, blockPushStarter, blockIds, listener, PUSH_ERROR_HANDLER).start();
+      } else {
+        blockPushStarter.createAndStart(blockIds, listener);
+      }
+    } catch (Exception e) {
+      logger.error("Exception while beginning pushBlocks", e);
+      for (String blockId : blockIds) {
+        listener.onBlockFetchFailure(blockId, e);
+      }
+    }
+  }
+
   @Override
   public MetricSet shuffleMetrics() {
     checkInit();
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedBlockMeta.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedBlockMeta.java
new file mode 100644
index 0000000000000..e9d9e53495469
--- /dev/null
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedBlockMeta.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import com.google.common.base.Preconditions;
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.Unpooled;
+import org.roaringbitmap.RoaringBitmap;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.protocol.Encoders;
+
+/**
+ * Contains meta information for a merged block. Currently this information constitutes:
+ * 1. Number of chunks in a merged shuffle block.
+ * 2. Bitmaps for each chunk in the merged block. A chunk bitmap contains all the mapIds that were
+ *    merged to that merged block chunk.
+ */
+public class MergedBlockMeta {
+  private final int numChunks;
+  private final ManagedBuffer chunksBitmapBuffer;
+
+  public MergedBlockMeta(int numChunks, ManagedBuffer chunksBitmapBuffer) {
+    this.numChunks = numChunks;
+    this.chunksBitmapBuffer = Preconditions.checkNotNull(chunksBitmapBuffer);
+  }
+
+  public int getNumChunks() {
+    return numChunks;
+  }
+
+  public ManagedBuffer getChunksBitmapBuffer() {
+    return chunksBitmapBuffer;
+  }
+
+  public RoaringBitmap[] readChunkBitmaps() throws IOException {
+    ByteBuf buf = Unpooled.wrappedBuffer(chunksBitmapBuffer.nioByteBuffer());
+    List<RoaringBitmap> bitmaps = new ArrayList<>();
+    while(buf.isReadable()) {
+      bitmaps.add(Encoders.Bitmaps.decode(buf));
+    }
+    assert (bitmaps.size() == numChunks);
+    return bitmaps.toArray(new RoaringBitmap[0]);
+  }
+}
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedShuffleFileManager.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedShuffleFileManager.java
new file mode 100644
index 0000000000000..ef4dbb2bd0059
--- /dev/null
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedShuffleFileManager.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.io.IOException;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.client.StreamCallbackWithID;
+import org.apache.spark.network.shuffle.protocol.FinalizeShuffleMerge;
+import org.apache.spark.network.shuffle.protocol.MergeStatuses;
+import org.apache.spark.network.shuffle.protocol.PushBlockStream;
+
+
+/**
+ * The MergedShuffleFileManager is used to process push based shuffle when enabled. It works
+ * along side {@link ExternalBlockHandler} and serves as an RPCHandler for
+ * {@link org.apache.spark.network.server.RpcHandler#receiveStream}, where it processes the
+ * remotely pushed streams of shuffle blocks to merge them into merged shuffle files. Right
+ * now, support for push based shuffle is only implemented for external shuffle service in
+ * YARN mode.
+ */
+public interface MergedShuffleFileManager {
+  /**
+   * Provides the stream callback used to process a remotely pushed block. The callback is
+   * used by the {@link org.apache.spark.network.client.StreamInterceptor} installed on the
+   * channel to process the block data in the channel outside of the message frame.
+   *
+   * @param msg metadata of the remotely pushed blocks. This is processed inside the message frame
+   * @return A stream callback to process the block data in streaming fashion as it arrives
+   */
+  StreamCallbackWithID receiveBlockDataAsStream(PushBlockStream msg);
+
+  /**
+   * Handles the request to finalize shuffle merge for a given shuffle.
+   *
+   * @param msg contains appId and shuffleId to uniquely identify a shuffle to be finalized
+   * @return The statuses of the merged shuffle partitions for the given shuffle on this
+   *         shuffle service
+   * @throws IOException
+   */
+  MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) throws IOException;
+
+  /**
+   * Registers an application when it starts. It also stores the username which is necessary
+   * for generating the host local directories for merged shuffle files.
+   * Right now, this is invoked by YarnShuffleService.
+   *
+   * @param appId application ID
+   * @param user username
+   */
+  void registerApplication(String appId, String user);
+
+  /**
+   * Registers an executor with its local dir list when it starts. This provides the specific path
+   * so MergedShuffleFileManager knows where to store and look for shuffle data for a
+   * given application. It is invoked by the RPC call when executor tries to register with the
+   * local shuffle service.
+   *
+   * @param appId application ID
+   * @param localDirs The list of local dirs that this executor gets granted from NodeManager
+   */
+  void registerExecutor(String appId, String[] localDirs);
+
+  /**
+   * Invoked when an application finishes. This cleans up any remaining metadata associated with
+   * this application, and optionally deletes the application specific directory path.
+   *
+   * @param appId application ID
+   * @param cleanupLocalDirs flag indicating whether MergedShuffleFileManager should handle
+   *                         deletion of local dirs itself.
+   */
+  void applicationRemoved(String appId, boolean cleanupLocalDirs);
+
+  /**
+   * Get the buffer for a given merged shuffle chunk when serving merged shuffle to reducers
+   *
+   * @param appId application ID
+   * @param shuffleId shuffle ID
+   * @param reduceId reducer ID
+   * @param chunkId merged shuffle file chunk ID
+   * @return The {@link ManagedBuffer} for the given merged shuffle chunk
+   */
+  ManagedBuffer getMergedBlockData(String appId, int shuffleId, int reduceId, int chunkId);
+
+  /**
+   * Get the meta information of a merged block.
+   *
+   * @param appId application ID
+   * @param shuffleId shuffle ID
+   * @param reduceId reducer ID
+   * @return meta information of a merged block
+   */
+  MergedBlockMeta getMergedBlockMeta(String appId, int shuffleId, int reduceId);
+
+  /**
+   * Get the local directories which stores the merged shuffle files.
+   *
+   * @param appId application ID
+   */
+  String[] getMergedBlockDirs(String appId);
+}
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockPusher.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockPusher.java
new file mode 100644
index 0000000000000..407b248170a46
--- /dev/null
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockPusher.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Map;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.buffer.NioManagedBuffer;
+import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.shuffle.protocol.PushBlockStream;
+
+/**
+ * Similar to {@link OneForOneBlockFetcher}, but for pushing blocks to remote shuffle service to
+ * be merged instead of for fetching them from remote shuffle services. This is used by
+ * ShuffleWriter when the block push process is initiated. The supplied BlockFetchingListener
+ * is used to handle the success or failure in pushing each blocks.
+ */
+public class OneForOneBlockPusher {
+  private static final Logger logger = LoggerFactory.getLogger(OneForOneBlockPusher.class);
+  private static final ErrorHandler PUSH_ERROR_HANDLER = new ErrorHandler.BlockPushErrorHandler();
+
+  private final TransportClient client;
+  private final String appId;
+  private final String[] blockIds;
+  private final BlockFetchingListener listener;
+  private final Map<String, ManagedBuffer> buffers;
+
+  public OneForOneBlockPusher(
+      TransportClient client,
+      String appId,
+      String[] blockIds,
+      BlockFetchingListener listener,
+      Map<String, ManagedBuffer> buffers) {
+    this.client = client;
+    this.appId = appId;
+    this.blockIds = blockIds;
+    this.listener = listener;
+    this.buffers = buffers;
+  }
+
+  private class BlockPushCallback implements RpcResponseCallback {
+
+    private int index;
+    private String blockId;
+
+    BlockPushCallback(int index, String blockId) {
+      this.index = index;
+      this.blockId = blockId;
+    }
+
+    @Override
+    public void onSuccess(ByteBuffer response) {
+      // On receipt of a successful block push
+      listener.onBlockFetchSuccess(blockId, new NioManagedBuffer(ByteBuffer.allocate(0)));
+    }
+
+    @Override
+    public void onFailure(Throwable e) {
+      // Since block push is best effort, i.e., if we encountered a block push failure that's not
+      // retriable or exceeding the max retires, we should not fail all remaining block pushes.
+      // The best effort nature makes block push tolerable of a partial completion. Thus, we only
+      // fail the block that's actually failed. Not that, on the RetryingBlockFetcher side, once
+      // retry is initiated, it would still invalidate the previous active retry listener, and
+      // retry all outstanding blocks. We are preventing forwarding unnecessary block push failures
+      // to the parent listener of the retry listener. The only exceptions would be if the block
+      // push failure is due to block arriving on the server side after merge finalization, or the
+      // client fails to establish connection to the server side. In both cases, we would fail all
+      // remaining blocks.
+      if (PUSH_ERROR_HANDLER.shouldRetryError(e)) {
+        String[] targetBlockId = Arrays.copyOfRange(blockIds, index, index + 1);
+        failRemainingBlocks(targetBlockId, e);
+      } else {
+        String[] targetBlockId = Arrays.copyOfRange(blockIds, index, blockIds.length);
+        failRemainingBlocks(targetBlockId, e);
+      }
+    }
+  }
+
+  private void failRemainingBlocks(String[] failedBlockIds, Throwable e) {
+    for (String blockId : failedBlockIds) {
+      try {
+        listener.onBlockFetchFailure(blockId, e);
+      } catch (Exception e2) {
+        logger.error("Error in block push failure callback", e2);
+      }
+    }
+  }
+
+  /**
+   * Begins the block pushing process, calling the listener with every block pushed.
+   */
+  public void start() {
+    logger.debug("Start pushing {} blocks", blockIds.length);
+    for (int i = 0; i < blockIds.length; i++) {
+      assert buffers.containsKey(blockIds[i]) : "Could not find the block buffer for block "
+        + blockIds[i];
+      ByteBuffer header = new PushBlockStream(appId, blockIds[i], i).toByteBuffer();
+      client.uploadStream(new NioManagedBuffer(header), buffers.get(blockIds[i]),
+        new BlockPushCallback(i, blockIds[i]));
+    }
+  }
+}
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockFetcher.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockFetcher.java
index 6bf3da94030d4..43bde1610e41e 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockFetcher.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockFetcher.java
@@ -99,11 +99,14 @@ void createAndStart(String[] blockIds, BlockFetchingListener listener)
    */
   private RetryingBlockFetchListener currentListener;
 
+  private final ErrorHandler errorHandler;
+
   public RetryingBlockFetcher(
       TransportConf conf,
       RetryingBlockFetcher.BlockFetchStarter fetchStarter,
       String[] blockIds,
-      BlockFetchingListener listener) {
+      BlockFetchingListener listener,
+      ErrorHandler errorHandler) {
     this.fetchStarter = fetchStarter;
     this.listener = listener;
     this.maxRetries = conf.maxIORetries();
@@ -111,6 +114,15 @@ public RetryingBlockFetcher(
     this.outstandingBlocksIds = Sets.newLinkedHashSet();
     Collections.addAll(outstandingBlocksIds, blockIds);
     this.currentListener = new RetryingBlockFetchListener();
+    this.errorHandler = errorHandler;
+  }
+
+  public RetryingBlockFetcher(
+      TransportConf conf,
+      BlockFetchStarter fetchStarter,
+      String[] blockIds,
+      BlockFetchingListener listener) {
+    this(conf, fetchStarter, blockIds, listener, ErrorHandler.NOOP_ERROR_HANDLER);
   }
 
   /**
@@ -178,7 +190,7 @@ private synchronized boolean shouldRetry(Throwable e) {
     boolean isIOException = e instanceof IOException
       || (e.getCause() != null && e.getCause() instanceof IOException);
     boolean hasRemainingRetries = retryCount < maxRetries;
-    return isIOException && hasRemainingRetries;
+    return isIOException && hasRemainingRetries && errorHandler.shouldRetryError(e);
   }
 
   /**
@@ -215,8 +227,15 @@ public void onBlockFetchFailure(String blockId, Throwable exception) {
           if (shouldRetry(exception)) {
             initiateRetry();
           } else {
-            logger.error(String.format("Failed to fetch block %s, and will not retry (%s retries)",
-              blockId, retryCount), exception);
+            if (errorHandler.shouldLogError(exception)) {
+              logger.error(
+                String.format("Failed to fetch block %s, and will not retry (%s retries)",
+                  blockId, retryCount), exception);
+            } else {
+              logger.debug(
+                String.format("Failed to fetch block %s, and will not retry (%s retries)",
+                  blockId, retryCount), exception);
+            }
             outstandingBlocksIds.remove(blockId);
             shouldForwardFailure = true;
           }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java
index 89d8dfe8716b8..7f5058124988f 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java
@@ -47,7 +47,8 @@ public abstract class BlockTransferMessage implements Encodable {
   public enum Type {
     OPEN_BLOCKS(0), UPLOAD_BLOCK(1), REGISTER_EXECUTOR(2), STREAM_HANDLE(3), REGISTER_DRIVER(4),
     HEARTBEAT(5), UPLOAD_BLOCK_STREAM(6), REMOVE_BLOCKS(7), BLOCKS_REMOVED(8),
-    FETCH_SHUFFLE_BLOCKS(9), GET_LOCAL_DIRS_FOR_EXECUTORS(10), LOCAL_DIRS_FOR_EXECUTORS(11);
+    FETCH_SHUFFLE_BLOCKS(9), GET_LOCAL_DIRS_FOR_EXECUTORS(10), LOCAL_DIRS_FOR_EXECUTORS(11),
+    PUSH_BLOCK_STREAM(12), FINALIZE_SHUFFLE_MERGE(13), MERGE_STATUSES(14);
 
     private final byte id;
 
@@ -78,6 +79,9 @@ public static BlockTransferMessage fromByteBuffer(ByteBuffer msg) {
         case 9: return FetchShuffleBlocks.decode(buf);
         case 10: return GetLocalDirsForExecutors.decode(buf);
         case 11: return LocalDirsForExecutors.decode(buf);
+        case 12: return PushBlockStream.decode(buf);
+        case 13: return FinalizeShuffleMerge.decode(buf);
+        case 14: return MergeStatuses.decode(buf);
         default: throw new IllegalArgumentException("Unknown message type: " + type);
       }
     }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FinalizeShuffleMerge.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FinalizeShuffleMerge.java
new file mode 100644
index 0000000000000..9058575df57ef
--- /dev/null
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FinalizeShuffleMerge.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle.protocol;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+
+import org.apache.spark.network.protocol.Encoders;
+
+/**
+ * Request to finalize merge for a given shuffle.
+ * Returns {@link MergeStatuses}
+ */
+public class FinalizeShuffleMerge extends BlockTransferMessage {
+  public final String appId;
+  public final int shuffleId;
+
+  public FinalizeShuffleMerge(
+      String appId,
+      int shuffleId) {
+    this.appId = appId;
+    this.shuffleId = shuffleId;
+  }
+
+  @Override
+  protected BlockTransferMessage.Type type() {
+    return Type.FINALIZE_SHUFFLE_MERGE;
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(appId, shuffleId);
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("appId", appId)
+      .add("shuffleId", shuffleId)
+      .toString();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other != null && other instanceof FinalizeShuffleMerge) {
+      FinalizeShuffleMerge o = (FinalizeShuffleMerge) other;
+      return Objects.equal(appId, o.appId)
+        && shuffleId == o.shuffleId;
+    }
+    return false;
+  }
+
+  @Override
+  public int encodedLength() {
+    return Encoders.Strings.encodedLength(appId) + 4;
+  }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    Encoders.Strings.encode(buf, appId);
+    buf.writeInt(shuffleId);
+  }
+
+  public static FinalizeShuffleMerge decode(ByteBuf buf) {
+    String appId = Encoders.Strings.decode(buf);
+    int shuffleId = buf.readInt();
+    return new FinalizeShuffleMerge(appId, shuffleId);
+  }
+}
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/MergeStatuses.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/MergeStatuses.java
new file mode 100644
index 0000000000000..f57e8b326e5e2
--- /dev/null
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/MergeStatuses.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle.protocol;
+
+import java.util.Arrays;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+import org.roaringbitmap.RoaringBitmap;
+
+import org.apache.spark.network.protocol.Encoders;
+
+/**
+ * Result returned by an ExternalShuffleService to the DAGScheduler. This represents the result
+ * of all the remote shuffle block merge operations performed by an ExternalShuffleService
+ * for a given shuffle ID. It includes the shuffle ID, an array of bitmaps each representing
+ * the set of mapper partition blocks that are merged for a given reducer partition, an array
+ * of reducer IDs, and an array of merged shuffle partition sizes. The 3 arrays list information
+ * about all the reducer partitions merged by the ExternalShuffleService in the same order.
+ */
+public class MergeStatuses extends BlockTransferMessage {
+  /** Shuffle ID **/
+  public final int shuffleId;
+  /**
+   * Array of bitmaps tracking the set of mapper partition blocks merged for each
+   * reducer partition
+   */
+  public final RoaringBitmap[] bitmaps;
+  /** Array of reducer IDs **/
+  public final int[] reduceIds;
+  /**
+   * Array of merged shuffle partition block size. Each represents the total size of all
+   * merged shuffle partition blocks for one reducer partition.
+   * **/
+  public final long[] sizes;
+
+  public MergeStatuses(
+      int shuffleId,
+      RoaringBitmap[] bitmaps,
+      int[] reduceIds,
+      long[] sizes) {
+    this.shuffleId = shuffleId;
+    this.bitmaps = bitmaps;
+    this.reduceIds = reduceIds;
+    this.sizes = sizes;
+  }
+
+  @Override
+  protected Type type() {
+    return Type.MERGE_STATUSES;
+  }
+
+  @Override
+  public int hashCode() {
+    int objectHashCode = Objects.hashCode(shuffleId);
+    return (objectHashCode * 41 + Arrays.hashCode(reduceIds) * 41
+      + Arrays.hashCode(bitmaps) * 41 + Arrays.hashCode(sizes));
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("shuffleId", shuffleId)
+      .add("reduceId size", reduceIds.length)
+      .toString();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other != null && other instanceof MergeStatuses) {
+      MergeStatuses o = (MergeStatuses) other;
+      return Objects.equal(shuffleId, o.shuffleId)
+        && Arrays.equals(bitmaps, o.bitmaps)
+        && Arrays.equals(reduceIds, o.reduceIds)
+        && Arrays.equals(sizes, o.sizes);
+    }
+    return false;
+  }
+
+  @Override
+  public int encodedLength() {
+    return 4 // int
+      + Encoders.BitmapArrays.encodedLength(bitmaps)
+      + Encoders.IntArrays.encodedLength(reduceIds)
+      + Encoders.LongArrays.encodedLength(sizes);
+  }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    buf.writeInt(shuffleId);
+    Encoders.BitmapArrays.encode(buf, bitmaps);
+    Encoders.IntArrays.encode(buf, reduceIds);
+    Encoders.LongArrays.encode(buf, sizes);
+  }
+
+  public static MergeStatuses decode(ByteBuf buf) {
+    int shuffleId = buf.readInt();
+    RoaringBitmap[] bitmaps = Encoders.BitmapArrays.decode(buf);
+    int[] reduceIds = Encoders.IntArrays.decode(buf);
+    long[] sizes = Encoders.LongArrays.decode(buf);
+    return new MergeStatuses(shuffleId, bitmaps, reduceIds, sizes);
+  }
+}
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/PushBlockStream.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/PushBlockStream.java
new file mode 100644
index 0000000000000..7eab5a644783c
--- /dev/null
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/PushBlockStream.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle.protocol;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+
+import org.apache.spark.network.protocol.Encoders;
+
+// Needed by ScalaDoc. See SPARK-7726
+import static org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type;
+
+
+/**
+ * Request to push a block to a remote shuffle service to be merged in push based shuffle.
+ * The remote shuffle service will also include this message when responding the push requests.
+ */
+public class PushBlockStream extends BlockTransferMessage {
+  public final String appId;
+  public final String blockId;
+  // Similar to the chunkIndex in StreamChunkId, indicating the index of a block in a batch of
+  // blocks to be pushed.
+  public final int index;
+
+  public PushBlockStream(String appId, String blockId, int index) {
+    this.appId = appId;
+    this.blockId = blockId;
+    this.index = index;
+  }
+
+  @Override
+  protected Type type() {
+    return Type.PUSH_BLOCK_STREAM;
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(appId, blockId, index);
+  }
+
+  @Override
+  public String toString() {
+    return Objects.toStringHelper(this)
+      .add("appId", appId)
+      .add("blockId", blockId)
+      .add("index", index)
+      .toString();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other != null && other instanceof PushBlockStream) {
+      PushBlockStream o = (PushBlockStream) other;
+      return Objects.equal(appId, o.appId)
+        && Objects.equal(blockId, o.blockId)
+        && index == o.index;
+    }
+    return false;
+  }
+
+  @Override
+  public int encodedLength() {
+    return Encoders.Strings.encodedLength(appId)
+      + Encoders.Strings.encodedLength(blockId) + 4;
+  }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    Encoders.Strings.encode(buf, appId);
+    Encoders.Strings.encode(buf, blockId);
+    buf.writeInt(index);
+  }
+
+  public static PushBlockStream decode(ByteBuf buf) {
+    String appId = Encoders.Strings.decode(buf);
+    String blockId = Encoders.Strings.decode(buf);
+    int index = buf.readInt();
+    return new PushBlockStream(appId, blockId, index);
+  }
+}
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ErrorHandlerSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ErrorHandlerSuite.java
new file mode 100644
index 0000000000000..992e7762c5a54
--- /dev/null
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ErrorHandlerSuite.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.net.ConnectException;
+
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+/**
+ * Test suite for {@link ErrorHandler}
+ */
+public class ErrorHandlerSuite {
+
+  @Test
+  public void testPushErrorRetry() {
+    ErrorHandler.BlockPushErrorHandler handler = new ErrorHandler.BlockPushErrorHandler();
+    assertFalse(handler.shouldRetryError(new RuntimeException(new IllegalArgumentException(
+      ErrorHandler.BlockPushErrorHandler.TOO_LATE_MESSAGE_SUFFIX))));
+    assertFalse(handler.shouldRetryError(new RuntimeException(new ConnectException())));
+    assertTrue(handler.shouldRetryError(new RuntimeException(new IllegalArgumentException(
+      ErrorHandler.BlockPushErrorHandler.BLOCK_APPEND_COLLISION_DETECTED_MSG_PREFIX))));
+    assertTrue(handler.shouldRetryError(new Throwable()));
+  }
+
+  @Test
+  public void testPushErrorLogging() {
+    ErrorHandler.BlockPushErrorHandler handler = new ErrorHandler.BlockPushErrorHandler();
+    assertFalse(handler.shouldLogError(new RuntimeException(new IllegalArgumentException(
+      ErrorHandler.BlockPushErrorHandler.TOO_LATE_MESSAGE_SUFFIX))));
+    assertFalse(handler.shouldLogError(new RuntimeException(new IllegalArgumentException(
+      ErrorHandler.BlockPushErrorHandler.BLOCK_APPEND_COLLISION_DETECTED_MSG_PREFIX))));
+    assertTrue(handler.shouldLogError(new Throwable()));
+  }
+}
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java
index 455351fcf767c..680b8d74a2eea 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java
@@ -17,6 +17,7 @@
 
 package org.apache.spark.network.shuffle;
 
+import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.Iterator;
 
@@ -25,6 +26,7 @@
 import org.junit.Before;
 import org.junit.Test;
 import org.mockito.ArgumentCaptor;
+import org.roaringbitmap.RoaringBitmap;
 
 import static org.junit.Assert.*;
 import static org.mockito.ArgumentMatchers.any;
@@ -39,6 +41,8 @@
 import org.apache.spark.network.shuffle.protocol.BlockTransferMessage;
 import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
 import org.apache.spark.network.shuffle.protocol.FetchShuffleBlocks;
+import org.apache.spark.network.shuffle.protocol.FinalizeShuffleMerge;
+import org.apache.spark.network.shuffle.protocol.MergeStatuses;
 import org.apache.spark.network.shuffle.protocol.OpenBlocks;
 import org.apache.spark.network.shuffle.protocol.RegisterExecutor;
 import org.apache.spark.network.shuffle.protocol.StreamHandle;
@@ -50,6 +54,7 @@ public class ExternalBlockHandlerSuite {
   OneForOneStreamManager streamManager;
   ExternalShuffleBlockResolver blockResolver;
   RpcHandler handler;
+  MergedShuffleFileManager mergedShuffleManager;
   ManagedBuffer[] blockMarkers = {
     new NioManagedBuffer(ByteBuffer.wrap(new byte[3])),
     new NioManagedBuffer(ByteBuffer.wrap(new byte[7]))
@@ -59,17 +64,20 @@ public class ExternalBlockHandlerSuite {
   public void beforeEach() {
     streamManager = mock(OneForOneStreamManager.class);
     blockResolver = mock(ExternalShuffleBlockResolver.class);
-    handler = new ExternalBlockHandler(streamManager, blockResolver);
+    mergedShuffleManager = mock(MergedShuffleFileManager.class);
+    handler = new ExternalBlockHandler(streamManager, blockResolver, mergedShuffleManager);
   }
 
   @Test
   public void testRegisterExecutor() {
     RpcResponseCallback callback = mock(RpcResponseCallback.class);
 
-    ExecutorShuffleInfo config = new ExecutorShuffleInfo(new String[] {"/a", "/b"}, 16, "sort");
+    String[] localDirs = new String[] {"/a", "/b"};
+    ExecutorShuffleInfo config = new ExecutorShuffleInfo(localDirs, 16, "sort");
     ByteBuffer registerMessage = new RegisterExecutor("app0", "exec1", config).toByteBuffer();
     handler.receive(client, registerMessage, callback);
     verify(blockResolver, times(1)).registerExecutor("app0", "exec1", config);
+    verify(mergedShuffleManager, times(1)).registerExecutor("app0", localDirs);
 
     verify(callback, times(1)).onSuccess(any(ByteBuffer.class));
     verify(callback, never()).onFailure(any(Throwable.class));
@@ -222,4 +230,32 @@ public void testBadMessages() {
     verify(callback, never()).onSuccess(any(ByteBuffer.class));
     verify(callback, never()).onFailure(any(Throwable.class));
   }
+
+  @Test
+  public void testFinalizeShuffleMerge() throws IOException {
+    RpcResponseCallback callback = mock(RpcResponseCallback.class);
+
+    FinalizeShuffleMerge req = new FinalizeShuffleMerge("app0", 0);
+    RoaringBitmap bitmap = RoaringBitmap.bitmapOf(0, 1, 2);
+    MergeStatuses statuses = new MergeStatuses(0, new RoaringBitmap[]{bitmap},
+      new int[]{3}, new long[]{30});
+    when(mergedShuffleManager.finalizeShuffleMerge(req)).thenReturn(statuses);
+
+    ByteBuffer reqBuf = req.toByteBuffer();
+    handler.receive(client, reqBuf, callback);
+    verify(mergedShuffleManager, times(1)).finalizeShuffleMerge(req);
+    ArgumentCaptor<ByteBuffer> response = ArgumentCaptor.forClass(ByteBuffer.class);
+    verify(callback, times(1)).onSuccess(response.capture());
+    verify(callback, never()).onFailure(any());
+
+    MergeStatuses mergeStatuses =
+      (MergeStatuses) BlockTransferMessage.Decoder.fromByteBuffer(response.getValue());
+    assertEquals(mergeStatuses, statuses);
+
+    Timer finalizeShuffleMergeLatencyMillis = (Timer) ((ExternalBlockHandler) handler)
+        .getAllMetrics()
+        .getMetrics()
+        .get("finalizeShuffleMergeLatencyMillis");
+    assertEquals(1, finalizeShuffleMergeLatencyMillis.getCount());
+  }
 }
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockPusherSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockPusherSuite.java
new file mode 100644
index 0000000000000..ebcdba72aa1a8
--- /dev/null
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockPusherSuite.java
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+import com.google.common.collect.Maps;
+import io.netty.buffer.Unpooled;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+import static org.mockito.AdditionalMatchers.*;
+import static org.mockito.Mockito.*;
+
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.buffer.NettyManagedBuffer;
+import org.apache.spark.network.buffer.NioManagedBuffer;
+import org.apache.spark.network.client.RpcResponseCallback;
+import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.shuffle.protocol.BlockTransferMessage;
+import org.apache.spark.network.shuffle.protocol.PushBlockStream;
+
+
+public class OneForOneBlockPusherSuite {
+
+  @Test
+  public void testPushOne() {
+    LinkedHashMap<String, ManagedBuffer> blocks = Maps.newLinkedHashMap();
+    blocks.put("shuffle_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[1])));
+    String[] blockIds = blocks.keySet().toArray(new String[blocks.size()]);
+
+    BlockFetchingListener listener = pushBlocks(
+      blocks,
+      blockIds,
+      Arrays.asList(new PushBlockStream("app-id", "shuffle_0_0_0", 0)));
+
+    verify(listener).onBlockFetchSuccess(eq("shuffle_0_0_0"), any());
+  }
+
+  @Test
+  public void testPushThree() {
+    LinkedHashMap<String, ManagedBuffer> blocks = Maps.newLinkedHashMap();
+    blocks.put("b0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
+    blocks.put("b1", new NioManagedBuffer(ByteBuffer.wrap(new byte[23])));
+    blocks.put("b2", new NettyManagedBuffer(Unpooled.wrappedBuffer(new byte[23])));
+    String[] blockIds = blocks.keySet().toArray(new String[blocks.size()]);
+
+    BlockFetchingListener listener = pushBlocks(
+      blocks,
+      blockIds,
+      Arrays.asList(new PushBlockStream("app-id", "b0", 0),
+        new PushBlockStream("app-id", "b1", 1),
+        new PushBlockStream("app-id", "b2", 2)));
+
+    for (int i = 0; i < 3; i ++) {
+      verify(listener, times(1)).onBlockFetchSuccess(eq("b" + i), any());
+    }
+  }
+
+  @Test
+  public void testServerFailures() {
+    LinkedHashMap<String, ManagedBuffer> blocks = Maps.newLinkedHashMap();
+    blocks.put("b0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
+    blocks.put("b1", new NioManagedBuffer(ByteBuffer.wrap(new byte[0])));
+    blocks.put("b2", new NioManagedBuffer(ByteBuffer.wrap(new byte[0])));
+    String[] blockIds = blocks.keySet().toArray(new String[blocks.size()]);
+
+    BlockFetchingListener listener = pushBlocks(
+      blocks,
+      blockIds,
+      Arrays.asList(new PushBlockStream("app-id", "b0", 0),
+        new PushBlockStream("app-id", "b1", 1),
+        new PushBlockStream("app-id", "b2", 2)));
+
+    verify(listener, times(1)).onBlockFetchSuccess(eq("b0"), any());
+    verify(listener, times(1)).onBlockFetchFailure(eq("b1"), any());
+    verify(listener, times(1)).onBlockFetchFailure(eq("b2"), any());
+  }
+
+  @Test
+  public void testHandlingRetriableFailures() {
+    LinkedHashMap<String, ManagedBuffer> blocks = Maps.newLinkedHashMap();
+    blocks.put("b0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
+    blocks.put("b1", null);
+    blocks.put("b2", new NioManagedBuffer(ByteBuffer.wrap(new byte[0])));
+    String[] blockIds = blocks.keySet().toArray(new String[blocks.size()]);
+
+    BlockFetchingListener listener = pushBlocks(
+      blocks,
+      blockIds,
+      Arrays.asList(new PushBlockStream("app-id", "b0", 0),
+        new PushBlockStream("app-id", "b1", 1),
+        new PushBlockStream("app-id", "b2", 2)));
+
+    verify(listener, times(1)).onBlockFetchSuccess(eq("b0"), any());
+    verify(listener, times(0)).onBlockFetchSuccess(not(eq("b0")), any());
+    verify(listener, times(0)).onBlockFetchFailure(eq("b0"), any());
+    verify(listener, times(1)).onBlockFetchFailure(eq("b1"), any());
+    verify(listener, times(2)).onBlockFetchFailure(eq("b2"), any());
+  }
+
+  /**
+   * Begins a push on the given set of blocks by mocking the response from server side.
+   * If a block is an empty byte, a server side retriable exception will be thrown.
+   * If a block is null, a non-retriable exception will be thrown.
+   */
+  private static BlockFetchingListener pushBlocks(
+      LinkedHashMap<String, ManagedBuffer> blocks,
+      String[] blockIds,
+      Iterable<BlockTransferMessage> expectMessages) {
+    TransportClient client = mock(TransportClient.class);
+    BlockFetchingListener listener = mock(BlockFetchingListener.class);
+    OneForOneBlockPusher pusher =
+      new OneForOneBlockPusher(client, "app-id", blockIds, listener, blocks);
+
+    Iterator<Map.Entry<String, ManagedBuffer>> blockIterator = blocks.entrySet().iterator();
+    Iterator<BlockTransferMessage> msgIterator = expectMessages.iterator();
+    doAnswer(invocation -> {
+      ByteBuffer header = ((ManagedBuffer) invocation.getArguments()[0]).nioByteBuffer();
+      BlockTransferMessage message = BlockTransferMessage.Decoder.fromByteBuffer(header);
+      RpcResponseCallback callback = (RpcResponseCallback) invocation.getArguments()[2];
+      Map.Entry<String, ManagedBuffer> entry = blockIterator.next();
+      ManagedBuffer block = entry.getValue();
+      if (block != null && block.nioByteBuffer().capacity() > 0) {
+        callback.onSuccess(header);
+      } else if (block != null) {
+        callback.onFailure(new RuntimeException("Failed " + entry.getKey()
+          + ErrorHandler.BlockPushErrorHandler.BLOCK_APPEND_COLLISION_DETECTED_MSG_PREFIX));
+      } else {
+        callback.onFailure(new RuntimeException("Quick fail " + entry.getKey()
+          + ErrorHandler.BlockPushErrorHandler.TOO_LATE_MESSAGE_SUFFIX));
+      }
+      assertEquals(msgIterator.next(), message);
+      return null;
+    }).when(client).uploadStream(any(ManagedBuffer.class), any(), any(RpcResponseCallback.class));
+
+    pusher.start();
+    return listener;
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/ExternalShuffleServiceMetricsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/ExternalShuffleServiceMetricsSuite.scala
index d681c13337e0d..ea4d252f0dbae 100644
--- a/core/src/test/scala/org/apache/spark/deploy/ExternalShuffleServiceMetricsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/ExternalShuffleServiceMetricsSuite.scala
@@ -61,7 +61,8 @@ class ExternalShuffleServiceMetricsSuite extends SparkFunSuite {
         "registeredExecutorsSize",
         "registerExecutorRequestLatencyMillis",
         "shuffle-server.usedDirectMemory",
-        "shuffle-server.usedHeapMemory")
+        "shuffle-server.usedHeapMemory",
+        "finalizeShuffleMergeLatencyMillis")
     )
   }
 }
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceMetricsSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceMetricsSuite.scala
index 63ac1af8a9127..9239d891aae3b 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceMetricsSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceMetricsSuite.scala
@@ -40,7 +40,7 @@ class YarnShuffleServiceMetricsSuite extends SparkFunSuite with Matchers {
     val allMetrics = Set(
       "openBlockRequestLatencyMillis", "registerExecutorRequestLatencyMillis",
       "blockTransferRateBytes", "registeredExecutorsSize", "numActiveConnections",
-      "numCaughtExceptions")
+      "numCaughtExceptions", "finalizeShuffleMergeLatencyMillis")
 
     metrics.getMetrics.keySet().asScala should be (allMetrics)
   }
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala
index 46e596575533d..a6a302ad5df95 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala
@@ -405,6 +405,7 @@ class YarnShuffleServiceSuite extends SparkFunSuite with Matchers with BeforeAnd
       "openBlockRequestLatencyMillis",
       "registeredExecutorsSize",
       "registerExecutorRequestLatencyMillis",
+      "finalizeShuffleMergeLatencyMillis",
       "shuffle-server.usedDirectMemory",
       "shuffle-server.usedHeapMemory"
     ))

From 9e3746469c23fd88f6dacc5082a157ca6970414e Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 15 Oct 2020 12:38:10 -0700
Subject: [PATCH 0246/1009] [SPARK-33078][SQL] Add config for json expression
 optimization

### What changes were proposed in this pull request?

This proposes to add a config for json expression optimization.

### Why are the changes needed?

For the new Json expression optimization rules, it is safer if we can disable it using SQL config.

### Does this PR introduce _any_ user-facing change?

Yes, users can disable json expression optimization rule.

### How was this patch tested?

Unit test

Closes #30047 from viirya/SPARK-33078.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../optimizer/OptimizeJsonExprs.scala         |  3 ++-
 .../apache/spark/sql/internal/SQLConf.scala   | 11 ++++++++++
 .../optimizer/OptimizeJsonExprsSuite.scala    | 21 +++++++++++++++++++
 3 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala
index fcd5412d66d41..ce86d8cdd4999 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.optimizer
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{ArrayType, StructType}
 
 /**
@@ -35,7 +36,7 @@ import org.apache.spark.sql.types.{ArrayType, StructType}
  */
 object OptimizeJsonExprs extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan transform {
-    case p => p.transformExpressions {
+    case p if SQLConf.get.jsonExpressionOptimization => p.transformExpressions {
 
       case c: CreateNamedStruct
           // If we create struct from various fields of the same `JsonToStructs`.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index d4c7dd7f3160c..79d78088f51a0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1458,6 +1458,15 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  val JSON_EXPRESSION_OPTIMIZATION =
+    buildConf("spark.sql.optimizer.enableJsonExpressionOptimization")
+      .doc("Whether to optimize JSON expressions in SQL optimizer. It includes pruning " +
+        "unnecessary columns from from_json, simplifing from_json + to_json, to_json + " +
+        "named_struct(from_json.col1, from_json.col2, ....).")
+      .version("3.1.0")
+      .booleanConf
+      .createWithDefault(true)
+
   val FILE_SINK_LOG_DELETION = buildConf("spark.sql.streaming.fileSink.log.deletion")
     .internal()
     .doc("Whether to delete the expired log files in file stream sink.")
@@ -3232,6 +3241,8 @@ class SQLConf extends Serializable with Logging {
 
   def jsonGeneratorIgnoreNullFields: Boolean = getConf(SQLConf.JSON_GENERATOR_IGNORE_NULL_FIELDS)
 
+  def jsonExpressionOptimization: Boolean = getConf(SQLConf.JSON_EXPRESSION_OPTIMIZATION)
+
   def parallelFileListingInStatsComputation: Boolean =
     getConf(SQLConf.PARALLEL_FILE_LISTING_IN_STATS_COMPUTATION)
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
index 7d975a1b00466..4129a37eb69a2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
@@ -29,6 +29,15 @@ import org.apache.spark.sql.types._
 
 class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper {
 
+  private var jsonExpressionOptimizeEnabled: Boolean = _
+  protected override def beforeAll(): Unit = {
+    jsonExpressionOptimizeEnabled = SQLConf.get.jsonExpressionOptimization
+  }
+
+  protected override def afterAll(): Unit = {
+    SQLConf.get.setConf(SQLConf.JSON_EXPRESSION_OPTIMIZATION, jsonExpressionOptimizeEnabled)
+  }
+
   object Optimizer extends RuleExecutor[LogicalPlan] {
     val batches = Batch("Json optimization", FixedPoint(10), OptimizeJsonExprs) :: Nil
   }
@@ -266,4 +275,16 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper {
       checkEvaluation(e1, e2.eval(row), row)
     })
   }
+
+  test("SPARK-33078: disable json optimization") {
+    withSQLConf(SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") {
+      val options = Map.empty[String, String]
+
+      val query = testRelation
+        .select(JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct"))
+      val optimized = Optimizer.execute(query.analyze)
+
+      comparePlans(optimized, query.analyze)
+    }
+  }
 }

From ba69d68d91eed2773c56a1cd82043aba42cecea3 Mon Sep 17 00:00:00 2001
From: Denis Pyshev <git@gemelen.net>
Date: Thu, 15 Oct 2020 14:49:43 -0500
Subject: [PATCH 0247/1009] [SPARK-33080][BUILD] Replace fatal warnings snippet

### What changes were proposed in this pull request?

Current solution in build file to enable build failure on compilation warnings with exclusion of deprecation ones is not portable after SBT version 1.3.13 (build import fails with compilation error with SBT 1.4) and could be replaced with more robust and maintainable, especially since Scala 2.13.2 with similar built-in functionality.

Additionally, warnings were fixed to pass the build, with as few changes as possible:
warnings in 2.12 compilation fixed in code,
warnings in 2.13 compilation covered by configuration to be addressed separately

### Why are the changes needed?

Unblocks upgrade to SBT after 1.3.13.
Enhances build file maintainability.
Allows fine tune of warnings configuration in scope of Scala 2.13 compilation.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

`build/sbt`'s `compile` and `Test/compile` for both Scala 2.12 and 2.13 profiles.

Closes #29995 from gemelen/feature/warnings-reporter.

Authored-by: Denis Pyshev <git@gemelen.net>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../HostLocalShuffleReadingSuite.scala        |  1 +
 .../spark/storage/BlockManagerSuite.scala     |  4 +-
 project/SparkBuild.scala                      | 84 ++++++++++---------
 .../catalyst/optimizer/OptimizerSuite.scala   |  2 +-
 .../sql/catalyst/util/UnsafeArraySuite.scala  |  3 +-
 .../spark/sql/connector/InMemoryTable.scala   |  8 ++
 .../sql/streaming/StreamingQuerySuite.scala   |  2 +-
 .../sql/hive/thriftserver/CliSuite.scala      |  6 +-
 8 files changed, 62 insertions(+), 48 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/shuffle/HostLocalShuffleReadingSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/HostLocalShuffleReadingSuite.scala
index 12c40f4462c7c..8f0c4da88feb2 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/HostLocalShuffleReadingSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/HostLocalShuffleReadingSuite.scala
@@ -58,6 +58,7 @@ class HostLocalShuffleReadingSuite extends SparkFunSuite with Matchers with Loca
     val conf = new SparkConf()
       .set(SHUFFLE_HOST_LOCAL_DISK_READING_ENABLED, true)
 
+    import scala.language.existentials
     val (essStatus, blockStoreClientClass) = if (isESSEnabled) {
       // LocalSparkCluster will disable the ExternalShuffleService by default. Therefore,
       // we have to manually setup an server which embedded with ExternalBlockHandler to
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 861c16269583a..5450a4b67c00b 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -240,7 +240,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     val driverEndpoint = rpcEnv.setupEndpoint(CoarseGrainedSchedulerBackend.ENDPOINT_NAME,
       new RpcEndpoint {
         private val executorSet = mutable.HashSet[String]()
-        override val rpcEnv: RpcEnv = this.rpcEnv
+        override val rpcEnv: RpcEnv = BlockManagerSuite.this.rpcEnv
         override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
           case CoarseGrainedClusterMessages.RegisterExecutor(executorId, _, _, _, _, _, _, _) =>
             executorSet += executorId
@@ -254,7 +254,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     def createAndRegisterBlockManager(timeout: Boolean): BlockManagerId = {
       val id = if (timeout) "timeout" else "normal"
       val bmRef = rpcEnv.setupEndpoint(s"bm-$id", new RpcEndpoint {
-        override val rpcEnv: RpcEnv = this.rpcEnv
+        override val rpcEnv: RpcEnv = BlockManagerSuite.this.rpcEnv
         private def reply[T](context: RpcCallContext, response: T): Unit = {
           if (timeout) {
             Thread.sleep(conf.getTimeAsMs(Network.RPC_ASK_TIMEOUT.key) + 1000)
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index f20a84451c5c5..5f2ef480f8de5 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -28,6 +28,7 @@ import scala.collection.mutable.Stack
 import sbt._
 import sbt.Classpaths.publishTask
 import sbt.Keys._
+import sbt.librarymanagement.{ VersionNumber, SemanticSelector }
 import com.etsy.sbt.checkstyle.CheckstylePlugin.autoImport._
 import com.simplytyped.Antlr4Plugin._
 import com.typesafe.sbt.pom.{PomBuild, SbtPomKeys}
@@ -196,7 +197,52 @@ object SparkBuild extends PomBuild {
     }
   )
 
+  // Silencer: Scala compiler plugin for warning suppression
+  // Aim: enable fatal warnings, but supress ones related to using of deprecated APIs
+  // depends on scala version:
+  // <2.13 - silencer 1.6.0 and compiler settings to enable fatal warnings
+  // 2.13.0,2.13.1 - silencer 1.7.1 and compiler settings to enable fatal warnings
+  // 2.13.2+ - no silencer and configured warnings to achieve the same
+  lazy val compilerWarningSettings: Seq[sbt.Def.Setting[_]] = Seq(
+    libraryDependencies ++= {
+      if (VersionNumber(scalaVersion.value).matchesSemVer(SemanticSelector("<2.13.2"))) {
+        val silencerVersion = if (scalaBinaryVersion.value == "2.13") "1.7.1" else "1.6.0"
+        Seq(
+          "org.scala-lang.modules" %% "scala-collection-compat" % "2.2.0",
+          compilerPlugin("com.github.ghik" % "silencer-plugin" % silencerVersion cross CrossVersion.full),
+          "com.github.ghik" % "silencer-lib" % silencerVersion % Provided cross CrossVersion.full
+        )
+      } else {
+        Seq.empty
+      }
+    },
+    scalacOptions in Compile ++= {
+      if (VersionNumber(scalaVersion.value).matchesSemVer(SemanticSelector("<2.13.2"))) {
+        Seq(
+          "-Xfatal-warnings",
+          "-deprecation",
+          "-P:silencer:globalFilters=.*deprecated.*" //regex to catch deprecation warnings and supress them
+        )
+      } else {
+        Seq(
+          // replace -Xfatal-warnings with fine-grained configuration, since 2.13.2
+          // verbose warning on deprecation, error on all others
+          // see `scalac -Wconf:help` for details
+          "-Wconf:cat=deprecation:wv,any:e",
+          // 2.13-specific warning hits to be muted (as narrowly as possible) and addressed separately
+          "-Wconf:cat=lint-multiarg-infix:wv",
+          "-Wconf:cat=other-nullary-override:wv",
+          "-Wconf:cat=other-match-analysis&site=org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupFunction.catalogFunction:wv",
+          "-Wconf:cat=other-pure-statement&site=org.apache.spark.streaming.util.FileBasedWriteAheadLog.readAll.readFile:wv",
+          "-Wconf:cat=other-pure-statement&site=org.apache.spark.scheduler.OutputCommitCoordinatorSuite.<local OutputCommitCoordinatorSuite>.futureAction:wv",
+          "-Wconf:cat=other-pure-statement&site=org.apache.spark.sql.streaming.sources.StreamingDataSourceV2Suite.testPositiveCase.\\$anonfun:wv"
+        )
+      }
+    }
+  )
+
   lazy val sharedSettings = sparkGenjavadocSettings ++
+                            compilerWarningSettings ++
       (if (sys.env.contains("NOLINT_ON_COMPILE")) Nil else enableScalaStyle) ++ Seq(
     exportJars in Compile := true,
     exportJars in Test := false,
@@ -274,44 +320,6 @@ object SparkBuild extends PomBuild {
       if (scalaBinaryVersion.value == "2.12") Seq("-no-java-comments") else Seq.empty
     },
 
-    // Implements -Xfatal-warnings, ignoring deprecation warnings.
-    // Code snippet taken from https://issues.scala-lang.org/browse/SI-8410.
-    compile in Compile := {
-      val analysis = (compile in Compile).value
-      val out = streams.value
-
-      def logProblem(l: (=> String) => Unit, f: File, p: xsbti.Problem) = {
-        val jmap = new java.util.function.Function[Integer, String]() {override def apply(i: Integer): String = {i.toString}}
-        l(f.toString + ":" + p.position.line.map[String](jmap.apply).map(_ + ":").orElse("") + " " + p.message)
-        l(p.position.lineContent)
-        l("")
-      }
-
-      var failed = 0
-      analysis.asInstanceOf[sbt.internal.inc.Analysis].infos.allInfos.foreach { case (k, i) =>
-        i.getReportedProblems foreach { p =>
-          val deprecation = p.message.contains("deprecated")
-
-          if (!deprecation) {
-            failed = failed + 1
-          }
-
-          val printer: (=> String) => Unit = s => if (deprecation) {
-            out.log.warn(s)
-          } else {
-            out.log.error("[warn] " + s)
-          }
-
-          logProblem(printer, k, p)
-
-        }
-      }
-
-      if (failed > 0) {
-        sys.error(s"$failed fatal warnings")
-      }
-      analysis
-    },
     // disable Mima check for all modules,
     // to be enabled in specific ones that have previous artifacts
     MimaKeys.mimaFailOnNoPrevious := false
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerSuite.scala
index b48555ec2fb28..48c62fe2990e9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerSuite.scala
@@ -58,7 +58,7 @@ class OptimizerSuite extends PlanTest {
         try {
           optimizer.execute(analyzed)
         } catch {
-          case ex: TreeNodeException[LogicalPlan]
+          case ex: TreeNodeException[_]
             if ex.getMessage.contains(SQLConf.OPTIMIZER_MAX_ITERATIONS.key) =>
               fail("optimizer.execute should not reach max iterations.")
         }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala
index 6d8ef68473778..2e190c6ba6d4b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala
@@ -72,9 +72,8 @@ class UnsafeArraySuite extends SparkFunSuite {
     arrayData
   }
 
-  private def toUnsafeArray[T : TypeTag](array: Array[T]): ArrayData = {
+  private def toUnsafeArray[T: TypeTag](array: Array[T]): ArrayData = {
     val converted = ExpressionEncoder[Array[T]].createSerializer().apply(array).getArray(0)
-    assert(converted.isInstanceOf[T])
     assert(converted.numElements == array.length)
     converted
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
index 6a78b9e2bddd0..b0325600e7530 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
@@ -107,6 +107,8 @@ class InMemoryTable(
           case (micros: Long, TimestampType) =>
             val localDate = DateTimeUtils.microsToInstant(micros).atZone(UTC).toLocalDate
             ChronoUnit.YEARS.between(EPOCH_LOCAL_DATE, localDate)
+          case (v, t) =>
+            throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)")
         }
       case MonthsTransform(ref) =>
         extractor(ref.fieldNames, schema, row) match {
@@ -115,6 +117,8 @@ class InMemoryTable(
           case (micros: Long, TimestampType) =>
             val localDate = DateTimeUtils.microsToInstant(micros).atZone(UTC).toLocalDate
             ChronoUnit.MONTHS.between(EPOCH_LOCAL_DATE, localDate)
+          case (v, t) =>
+            throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)")
         }
       case DaysTransform(ref) =>
         extractor(ref.fieldNames, schema, row) match {
@@ -122,11 +126,15 @@ class InMemoryTable(
             days
           case (micros: Long, TimestampType) =>
             ChronoUnit.DAYS.between(Instant.EPOCH, DateTimeUtils.microsToInstant(micros))
+          case (v, t) =>
+            throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)")
         }
       case HoursTransform(ref) =>
         extractor(ref.fieldNames, schema, row) match {
           case (micros: Long, TimestampType) =>
             ChronoUnit.HOURS.between(Instant.EPOCH, DateTimeUtils.microsToInstant(micros))
+          case (v, t) =>
+            throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)")
         }
       case BucketTransform(numBuckets, ref) =>
         (extractor(ref.fieldNames, schema, row).hashCode() & Integer.MAX_VALUE) % numBuckets
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index 1f408d55fd811..9c2403dffbb1a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -705,7 +705,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
     val q2 = startQuery(input(1).toDS.map { i =>
       // Emulate that `StreamingQuery` get captured with normal usage unintentionally.
       // It should not fail the query.
-      q1
+      val q = q1
       i
     }, "stream_serializable_test_2")
     val q3 = startQuery(input(2).toDS.map { i =>
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 2064a99137bf9..f5ce21f2af335 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -98,10 +98,8 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
           Seq(answer)
         } else {
           // spark-sql echoes the submitted queries
-          val queryEcho = query.split("\n").toList match {
-            case firstLine :: tail =>
-              s"spark-sql> $firstLine" :: tail.map(l => s"         > $l")
-          }
+          val xs = query.split("\n").toList
+          val queryEcho = s"spark-sql> ${xs.head}" :: xs.tail.map(l => s"         > $l")
           // longer lines sometimes get split in the output,
           // match the first 60 characters of each query line
           queryEcho.map(_.take(60)) :+ answer

From 81d3a8eeca80e6cef0415c5fd1a8c5b8852962a3 Mon Sep 17 00:00:00 2001
From: Chuliang Xiao <ChuliangX@gmail.com>
Date: Thu, 15 Oct 2020 17:24:22 -0700
Subject: [PATCH 0248/1009] [MINOR][PYTHON] Fix the typo in the docstring of
 method agg()

### What changes were proposed in this pull request?
Change `df.groupBy.agg()` to `df.groupBy().agg()` in the docstring of `agg()`

### Why are the changes needed?
Fix typo in a docstring

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
No

Closes #30060 from ChuliangXiao/patch-1.

Authored-by: Chuliang Xiao <ChuliangX@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 python/pyspark/sql/dataframe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 94a7df33f335e..487135cd2329a 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -1514,7 +1514,7 @@ def cube(self, *cols):
     @since(1.3)
     def agg(self, *exprs):
         """ Aggregate on the entire :class:`DataFrame` without groups
-        (shorthand for ``df.groupBy.agg()``).
+        (shorthand for ``df.groupBy().agg()``).
 
         >>> df.agg({"age": "max"}).collect()
         [Row(max(age)=5)]

From 9f5eff0ae1fbf526bbb5ae7a6582325279aaa3cd Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 15 Oct 2020 17:58:58 -0700
Subject: [PATCH 0249/1009] [SPARK-33162][INFRA] Use pre-built image at GitHub
 Action PySpark jobs

### What changes were proposed in this pull request?

This PR aims to use `pre-built image` at Github Action PySpark jobs. To isolate the changes, `pyspark` jobs are split from the main job. The docker image is built by the following.

| Item                   | URL                |
| --------------- | ------------- |
| Dockerfile         | https://github.com/dongjoon-hyun/ApacheSparkGitHubActionImage/blob/main/Dockerfile |
| Builder               | https://github.com/dongjoon-hyun/ApacheSparkGitHubActionImage/blob/main/.github/workflows/build.yml |
| Image Location | https://hub.docker.com/r/dongjoon/apache-spark-github-action-image |

Please note that.
1. The community still will use `build_and_test.yml` to add new features like as we did until now. The `Dockerfile` will be updated regularly.
2. When Apache Spark gets an official docker repository location, we will use it.
3. Also, it's the best if we keep this docker file and builder script at a new Apache Spark dev branch instead of outside GitHub repository.

### Why are the changes needed?

Currently, two `pyspark` test jobs take over one and half hour always. In total, 3 hours 14 minutes.
- https://github.com/apache/spark/runs/1240470628 (1 hour 35 mins)
- https://github.com/apache/spark/runs/1240470634 (1 hour 39 mins)

This PR will remove the package installation steps which takes 16 minutes and causes flakiness. Note that `Python 3.6 package installation` is not included in the pre-built image and it only takes `20s`.

**BEFORE**
![Screen Shot 2020-10-15 at 10 32 17 AM](https://user-images.githubusercontent.com/9700541/96165634-be625080-0ed1-11eb-974b-940c112152e9.png)

**AFTER**
![Screen Shot 2020-10-15 at 10 58 17 AM](https://user-images.githubusercontent.com/9700541/96168262-5d3c7c00-0ed5-11eb-83c5-e9dc189a156b.png)

In short, `pyspark` GitHub jobs take shorter time. In total, 2 hours 23 minutes (<- 3 hours 14 minutes, previously).
- https://github.com/apache/spark/pull/30059/checks?check_run_id=1260512568 (1 hour 18 mins)
- https://github.com/apache/spark/pull/30059/checks?check_run_id=1260512582 (1 hour 5 mins)

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the GitHub Action on this PR without `package installation steps`.

Closes #30059 from dongjoon-hyun/SPARK-33162.

Lead-authored-by: Dongjoon Hyun <dongjoon@apache.org>
Co-authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .github/workflows/build_and_test.yml | 119 ++++++++++++++++++++-------
 1 file changed, 89 insertions(+), 30 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 9b7026eeca4c8..cdbe34129637e 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -42,10 +42,6 @@ jobs:
             streaming, sql-kafka-0-10, streaming-kafka-0-10,
             mllib-local, mllib,
             yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl
-          - >-
-            pyspark-sql, pyspark-mllib, pyspark-resource
-          - >-
-            pyspark-core, pyspark-streaming, pyspark-ml
           - >-
             sparkr
         # Here, we split Hive and SQL tests into some of slow ones and the rest of them.
@@ -128,41 +124,17 @@ jobs:
       uses: actions/setup-java@v1
       with:
         java-version: ${{ matrix.java }}
-    # PySpark
-    - name: Install PyPy3
-      # Note that order of Python installations here matters because default python3 is
-      # overridden by pypy3.
-      uses: actions/setup-python@v2
-      if: contains(matrix.modules, 'pyspark')
-      with:
-        python-version: pypy3
-        architecture: x64
-    - name: Install Python 3.6
-      uses: actions/setup-python@v2
-      if: contains(matrix.modules, 'pyspark')
-      with:
-        python-version: 3.6
-        architecture: x64
     - name: Install Python 3.8
       uses: actions/setup-python@v2
       # We should install one Python that is higher then 3+ for SQL and Yarn because:
       # - SQL component also has Python related tests, for example, IntegratedUDFTestUtils.
       # - Yarn has a Python specific test too, for example, YarnClusterSuite.
-      if: contains(matrix.modules, 'yarn') || contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
+      if: contains(matrix.modules, 'yarn') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
       with:
         python-version: 3.8
         architecture: x64
-    - name: Install Python packages (Python 3.6 and PyPy3)
-      if: contains(matrix.modules, 'pyspark')
-      # PyArrow is not supported in PyPy yet, see ARROW-2651.
-      run: |
-        python3.6 -m pip install numpy pyarrow pandas scipy xmlrunner
-        python3.6 -m pip list
-        # PyPy does not have xmlrunner
-        pypy3 -m pip install numpy pandas scipy
-        pypy3 -m pip list
     - name: Install Python packages (Python 3.8)
-      if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
+      if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
       run: |
         python3.8 -m pip install numpy pyarrow pandas scipy xmlrunner
         python3.8 -m pip list
@@ -201,6 +173,93 @@ jobs:
         name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
         path: "**/target/unit-tests.log"
 
+  pyspark:
+    name: "Build modules: ${{ matrix.modules }}"
+    runs-on: ubuntu-20.04
+    container:
+      image: dongjoon/apache-spark-github-action-image:20201015
+    strategy:
+      fail-fast: false
+      matrix:
+        modules:
+          - >-
+            pyspark-sql, pyspark-mllib, pyspark-resource
+          - >-
+            pyspark-core, pyspark-streaming, pyspark-ml
+    env:
+      MODULES_TO_TEST: ${{ matrix.modules }}
+      HADOOP_PROFILE: hadoop3.2
+      HIVE_PROFILE: hive2.3
+      # GitHub Actions' default miniconda to use in pip packaging test.
+      CONDA_PREFIX: /usr/share/miniconda
+      GITHUB_PREV_SHA: ${{ github.event.before }}
+      GITHUB_INPUT_BRANCH: ${{ github.event.inputs.target }}
+    steps:
+    - name: Checkout Spark repository
+      uses: actions/checkout@v2
+      # In order to fetch changed files
+      with:
+        fetch-depth: 0
+    - name: Merge dispatched input branch
+      if: ${{ github.event.inputs.target != '' }}
+      run: git merge --progress --ff-only origin/${{ github.event.inputs.target }}
+    # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
+    - name: Cache Scala, SBT, Maven and Zinc
+      uses: actions/cache@v2
+      with:
+        path: |
+          build/apache-maven-*
+          build/zinc-*
+          build/scala-*
+          build/*.jar
+        key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
+        restore-keys: |
+          build-
+    - name: Cache Maven local repository
+      uses: actions/cache@v2
+      with:
+        path: ~/.m2/repository
+        key: pyspark-maven-${{ hashFiles('**/pom.xml') }}
+        restore-keys: |
+          pyspark-maven-
+    - name: Cache Ivy local repository
+      uses: actions/cache@v2
+      with:
+        path: ~/.ivy2/cache
+        key: pyspark-ivy-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
+        restore-keys: |
+          pyspark-ivy-
+    - name: Install Python 3.6
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.6
+        architecture: x64
+    # This step takes much less time (~30s) than other Python versions so it is not included
+    # in the Docker image being used. There is also a technical issue to install Python 3.6 on
+    # Ubuntu 20.04. See also SPARK-33162.
+    - name: Install Python packages (Python 3.6)
+      run: |
+        python3.6 -m pip install numpy pyarrow pandas scipy xmlrunner
+        python3.6 -m pip list
+    # Run the tests.
+    - name: Run tests
+      run: |
+        mkdir -p ~/.m2
+        ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST"
+        rm -rf ~/.m2/repository/org/apache/spark
+    - name: Upload test results to report
+      if: always()
+      uses: actions/upload-artifact@v2
+      with:
+        name: test-results-${{ matrix.modules }}--1.8-hadoop3.2-hive2.3
+        path: "**/target/test-reports/*.xml"
+    - name: Upload unit tests log files
+      if: failure()
+      uses: actions/upload-artifact@v2
+      with:
+        name: unit-tests-log-${{ matrix.modules }}--1.8-hadoop3.2-hive2.3
+        path: "**/target/unit-tests.log"
+
   # Static analysis, and documentation build
   lint:
     name: Linters, licenses, dependencies and documentation generation

From 38c05af1d5538fc6ad00cdb57c1a90e90d04e25d Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Fri, 16 Oct 2020 10:28:15 +0900
Subject: [PATCH 0250/1009] [SPARK-33163][SQL][TESTS] Check the metadata key
 'org.apache.spark.legacyDateTime' in Avro/Parquet files

### What changes were proposed in this pull request?
Added a couple tests to `AvroSuite` and to `ParquetIOSuite` to check that the metadata key 'org.apache.spark.legacyDateTime' is written correctly depending on the SQL configs:
- spark.sql.legacy.avro.datetimeRebaseModeInWrite
- spark.sql.legacy.parquet.datetimeRebaseModeInWrite

This is a follow up https://github.com/apache/spark/pull/28137.

### Why are the changes needed?
1. To improve test coverage
2. To make sure that the metadata key is actually saved to Avro/Parquet files

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running the added tests:
```
$ build/sbt "testOnly org.apache.spark.sql.execution.datasources.parquet.ParquetIOSuite"
$ build/sbt "avro/test:testOnly org.apache.spark.sql.avro.AvroV1Suite"
$ build/sbt "avro/test:testOnly org.apache.spark.sql.avro.AvroV2Suite"
```

Closes #30061 from MaxGekk/parquet-test-metakey.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../org/apache/spark/sql/avro/AvroSuite.scala | 40 ++++++++++++---
 .../datasources/parquet/ParquetIOSuite.scala  | 51 ++++++++++++++-----
 2 files changed, 73 insertions(+), 18 deletions(-)

diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index 1005a274d0304..b0f2f8ed09a96 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -1791,15 +1791,19 @@ abstract class AvroSuite extends QueryTest with SharedSparkSession with NestedDa
     }
   }
 
+  private def checkMetaData(path: java.io.File, key: String, expectedValue: String): Unit = {
+    val avroFiles = path.listFiles()
+      .filter(f => f.isFile && !f.getName.startsWith(".") && !f.getName.startsWith("_"))
+    assert(avroFiles.length === 1)
+    val reader = DataFileReader.openReader(avroFiles(0), new GenericDatumReader[GenericRecord]())
+    val value = reader.asInstanceOf[DataFileReader[_]].getMetaString(key)
+    assert(value === expectedValue)
+  }
+
   test("SPARK-31327: Write Spark version into Avro file metadata") {
     withTempPath { path =>
       spark.range(1).repartition(1).write.format("avro").save(path.getCanonicalPath)
-      val avroFiles = path.listFiles()
-        .filter(f => f.isFile && !f.getName.startsWith(".") && !f.getName.startsWith("_"))
-      assert(avroFiles.length === 1)
-      val reader = DataFileReader.openReader(avroFiles(0), new GenericDatumReader[GenericRecord]())
-      val version = reader.asInstanceOf[DataFileReader[_]].getMetaString(SPARK_VERSION_METADATA_KEY)
-      assert(version === SPARK_VERSION_SHORT)
+      checkMetaData(path, SPARK_VERSION_METADATA_KEY, SPARK_VERSION_SHORT)
     }
   }
 
@@ -1812,6 +1816,30 @@ abstract class AvroSuite extends QueryTest with SharedSparkSession with NestedDa
       spark.read.format("avro").options(conf).load(path)
     }
   }
+
+  test("SPARK-33163: write the metadata key 'org.apache.spark.legacyDateTime'") {
+    def saveTs(dir: java.io.File): Unit = {
+      Seq(Timestamp.valueOf("2020-10-15 01:02:03")).toDF()
+        .repartition(1)
+        .write
+        .format("avro")
+        .save(dir.getAbsolutePath)
+    }
+    withSQLConf(SQLConf.LEGACY_AVRO_REBASE_MODE_IN_WRITE.key -> LEGACY.toString) {
+      withTempPath { dir =>
+        saveTs(dir)
+        checkMetaData(dir, SPARK_LEGACY_DATETIME, "")
+      }
+    }
+    Seq(CORRECTED, EXCEPTION).foreach { mode =>
+      withSQLConf(SQLConf.LEGACY_AVRO_REBASE_MODE_IN_WRITE.key -> mode.toString) {
+        withTempPath { dir =>
+          saveTs(dir)
+          checkMetaData(dir, SPARK_LEGACY_DATETIME, null)
+        }
+      }
+    }
+  }
 }
 
 class AvroV1Suite extends AvroSuite {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index 2dc8a062bb73d..ff406f7bc62de 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -859,20 +859,24 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
     }
   }
 
-  test("Write Spark version into Parquet metadata") {
-    withTempPath { dir =>
-      val path = dir.getAbsolutePath
-      spark.range(1).repartition(1).write.parquet(path)
-      val file = SpecificParquetRecordReaderBase.listDirectory(dir).get(0)
-
-      val conf = new Configuration()
-      val hadoopInputFile = HadoopInputFile.fromPath(new Path(file), conf)
-      val parquetReadOptions = HadoopReadOptions.builder(conf).build()
-      val m = ParquetFileReader.open(hadoopInputFile, parquetReadOptions)
-      val metaData = m.getFileMetaData.getKeyValueMetaData
+  private def getMetaData(dir: java.io.File): Map[String, String] = {
+    val file = SpecificParquetRecordReaderBase.listDirectory(dir).get(0)
+    val conf = new Configuration()
+    val hadoopInputFile = HadoopInputFile.fromPath(new Path(file), conf)
+    val parquetReadOptions = HadoopReadOptions.builder(conf).build()
+    val m = ParquetFileReader.open(hadoopInputFile, parquetReadOptions)
+    val metadata = try {
+      m.getFileMetaData.getKeyValueMetaData
+    } finally {
       m.close()
+    }
+    metadata.asScala.toMap
+  }
 
-      assert(metaData.get(SPARK_VERSION_METADATA_KEY) === SPARK_VERSION_SHORT)
+  test("Write Spark version into Parquet metadata") {
+    withTempPath { dir =>
+      spark.range(1).repartition(1).write.parquet(dir.getAbsolutePath)
+      assert(getMetaData(dir)(SPARK_VERSION_METADATA_KEY) === SPARK_VERSION_SHORT)
     }
   }
 
@@ -1109,6 +1113,29 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
       }
     }
   }
+
+  test("SPARK-33163: write the metadata key 'org.apache.spark.legacyDateTime'") {
+    def saveTs(dir: java.io.File): Unit = {
+      Seq(Timestamp.valueOf("2020-10-15 01:02:03")).toDF()
+        .repartition(1)
+        .write
+        .parquet(dir.getAbsolutePath)
+    }
+    withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> LEGACY.toString) {
+      withTempPath { dir =>
+        saveTs(dir)
+        assert(getMetaData(dir)(SPARK_LEGACY_DATETIME) === "")
+      }
+    }
+    Seq(CORRECTED, EXCEPTION).foreach { mode =>
+      withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> mode.toString) {
+        withTempPath { dir =>
+          saveTs(dir)
+          assert(getMetaData(dir).get(SPARK_LEGACY_DATETIME).isEmpty)
+        }
+      }
+    }
+  }
 }
 
 class JobCommitFailureParquetOutputCommitter(outputPath: Path, context: TaskAttemptContext)

From bf594a978812419e5905a47535b50167dbad532f Mon Sep 17 00:00:00 2001
From: Huaxin Gao <huaxing@us.ibm.com>
Date: Fri, 16 Oct 2020 11:04:35 +0900
Subject: [PATCH 0251/1009] [SPARK-32402][SQL][FOLLOW-UP] Add case sensitivity
 tests for column resolution in ALTER TABLE

### What changes were proposed in this pull request?
Add case sensitivity tests for column resolution in ALTER TABLE

### Why are the changes needed?
To make sure `spark.sql.caseSensitive` works for `ResolveAlterTableChanges`

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
new test

Closes #30063 from huaxingao/caseSensitivity.

Authored-by: Huaxin Gao <huaxing@us.ibm.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../v2/jdbc/JDBCTableCatalogSuite.scala       | 155 +++++++++++++-----
 1 file changed, 114 insertions(+), 41 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
index 209f5609e447f..d99ccf85683ed 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
@@ -23,6 +23,7 @@ import org.apache.spark.SparkConf
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.catalyst.analysis.{NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
@@ -168,23 +169,24 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
     assert(exp.cause.get.getMessage.contains("Schema \"bad_test\" not found"))
   }
 
-  test("alter table ... add column") {
-    withTable("h2.test.alt_table") {
-      sql("CREATE TABLE h2.test.alt_table (ID INTEGER) USING _")
-      sql("ALTER TABLE h2.test.alt_table ADD COLUMNS (C1 INTEGER, C2 STRING)")
-      var t = spark.table("h2.test.alt_table")
+  test("ALTER TABLE ... add column") {
+    val tableName = "h2.test.alt_table"
+    withTable(tableName) {
+      sql(s"CREATE TABLE $tableName (ID INTEGER) USING _")
+      sql(s"ALTER TABLE $tableName ADD COLUMNS (C1 INTEGER, C2 STRING)")
+      var t = spark.table(tableName)
       var expectedSchema = new StructType()
         .add("ID", IntegerType)
         .add("C1", IntegerType)
         .add("C2", StringType)
       assert(t.schema === expectedSchema)
-      sql("ALTER TABLE h2.test.alt_table ADD COLUMNS (c3 DOUBLE)")
-      t = spark.table("h2.test.alt_table")
+      sql(s"ALTER TABLE $tableName ADD COLUMNS (c3 DOUBLE)")
+      t = spark.table(tableName)
       expectedSchema = expectedSchema.add("c3", DoubleType)
       assert(t.schema === expectedSchema)
       // Add already existing column
       val msg = intercept[AnalysisException] {
-        sql("ALTER TABLE h2.test.alt_table ADD COLUMNS (c3 DOUBLE)")
+        sql(s"ALTER TABLE $tableName ADD COLUMNS (c3 DOUBLE)")
       }.getMessage
       assert(msg.contains("Cannot add column, because c3 already exists"))
     }
@@ -197,18 +199,19 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
     }
   }
 
-  test("alter table ... rename column") {
-    withTable("h2.test.alt_table") {
-      sql("CREATE TABLE h2.test.alt_table (id INTEGER, C0 INTEGER) USING _")
-      sql("ALTER TABLE h2.test.alt_table RENAME COLUMN id TO C")
-      val t = spark.table("h2.test.alt_table")
+  test("ALTER TABLE ... rename column") {
+    val tableName = "h2.test.alt_table"
+    withTable(tableName) {
+      sql(s"CREATE TABLE $tableName (id INTEGER, C0 INTEGER) USING _")
+      sql(s"ALTER TABLE $tableName RENAME COLUMN id TO C")
+      val t = spark.table(tableName)
       val expectedSchema = new StructType()
         .add("C", IntegerType)
         .add("C0", IntegerType)
       assert(t.schema === expectedSchema)
       // Rename to already existing column
       val msg = intercept[AnalysisException] {
-        sql("ALTER TABLE h2.test.alt_table RENAME COLUMN C TO C0")
+        sql(s"ALTER TABLE $tableName RENAME COLUMN C TO C0")
       }.getMessage
       assert(msg.contains("Cannot rename column, because C0 already exists"))
     }
@@ -221,17 +224,18 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
     }
   }
 
-  test("alter table ... drop column") {
-    withTable("h2.test.alt_table") {
-      sql("CREATE TABLE h2.test.alt_table (C1 INTEGER, C2 INTEGER, c3 INTEGER) USING _")
-      sql("ALTER TABLE h2.test.alt_table DROP COLUMN C1")
-      sql("ALTER TABLE h2.test.alt_table DROP COLUMN c3")
-      val t = spark.table("h2.test.alt_table")
+  test("ALTER TABLE ... drop column") {
+    val tableName = "h2.test.alt_table"
+    withTable(tableName) {
+      sql(s"CREATE TABLE $tableName (C1 INTEGER, C2 INTEGER, c3 INTEGER) USING _")
+      sql(s"ALTER TABLE $tableName DROP COLUMN C1")
+      sql(s"ALTER TABLE $tableName DROP COLUMN c3")
+      val t = spark.table(tableName)
       val expectedSchema = new StructType().add("C2", IntegerType)
       assert(t.schema === expectedSchema)
       // Drop not existing column
       val msg = intercept[AnalysisException] {
-        sql("ALTER TABLE h2.test.alt_table DROP COLUMN bad_column")
+        sql(s"ALTER TABLE $tableName DROP COLUMN bad_column")
       }.getMessage
       assert(msg.contains("Cannot delete missing field bad_column in test.alt_table schema"))
     }
@@ -244,22 +248,23 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
     }
   }
 
-  test("alter table ... update column type") {
-    withTable("h2.test.alt_table") {
-      sql("CREATE TABLE h2.test.alt_table (ID INTEGER, deptno INTEGER) USING _")
-      sql("ALTER TABLE h2.test.alt_table ALTER COLUMN id TYPE DOUBLE")
-      sql("ALTER TABLE h2.test.alt_table ALTER COLUMN deptno TYPE DOUBLE")
-      val t = spark.table("h2.test.alt_table")
+  test("ALTER TABLE ... update column type") {
+    val tableName = "h2.test.alt_table"
+    withTable(tableName) {
+      sql(s"CREATE TABLE $tableName (ID INTEGER, deptno INTEGER) USING _")
+      sql(s"ALTER TABLE $tableName ALTER COLUMN id TYPE DOUBLE")
+      sql(s"ALTER TABLE $tableName ALTER COLUMN deptno TYPE DOUBLE")
+      val t = spark.table(tableName)
       val expectedSchema = new StructType().add("ID", DoubleType).add("deptno", DoubleType)
       assert(t.schema === expectedSchema)
       // Update not existing column
       val msg1 = intercept[AnalysisException] {
-        sql("ALTER TABLE h2.test.alt_table ALTER COLUMN bad_column TYPE DOUBLE")
+        sql(s"ALTER TABLE $tableName ALTER COLUMN bad_column TYPE DOUBLE")
       }.getMessage
       assert(msg1.contains("Cannot update missing field bad_column in test.alt_table schema"))
       // Update column to wrong type
       val msg2 = intercept[ParseException] {
-        sql("ALTER TABLE h2.test.alt_table ALTER COLUMN id TYPE bad_type")
+        sql(s"ALTER TABLE $tableName ALTER COLUMN id TYPE bad_type")
       }.getMessage
       assert(msg2.contains("DataType bad_type is not supported"))
     }
@@ -272,18 +277,19 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
     }
   }
 
-  test("alter table ... update column nullability") {
-    withTable("h2.test.alt_table") {
-      sql("CREATE TABLE h2.test.alt_table (ID INTEGER NOT NULL, deptno INTEGER NOT NULL) USING _")
-      sql("ALTER TABLE h2.test.alt_table ALTER COLUMN ID DROP NOT NULL")
-      sql("ALTER TABLE h2.test.alt_table ALTER COLUMN deptno DROP NOT NULL")
-      val t = spark.table("h2.test.alt_table")
+  test("ALTER TABLE ... update column nullability") {
+    val tableName = "h2.test.alt_table"
+    withTable(tableName) {
+      sql(s"CREATE TABLE $tableName (ID INTEGER NOT NULL, deptno INTEGER NOT NULL) USING _")
+      sql(s"ALTER TABLE $tableName ALTER COLUMN ID DROP NOT NULL")
+      sql(s"ALTER TABLE $tableName ALTER COLUMN deptno DROP NOT NULL")
+      val t = spark.table(tableName)
       val expectedSchema = new StructType()
         .add("ID", IntegerType, nullable = true).add("deptno", IntegerType, nullable = true)
       assert(t.schema === expectedSchema)
       // Update nullability of not existing column
       val msg = intercept[AnalysisException] {
-        sql("ALTER TABLE h2.test.alt_table ALTER COLUMN bad_column DROP NOT NULL")
+        sql(s"ALTER TABLE $tableName ALTER COLUMN bad_column DROP NOT NULL")
       }.getMessage
       assert(msg.contains("Cannot update missing field bad_column in test.alt_table"))
     }
@@ -296,17 +302,18 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
     }
   }
 
-  test("alter table ... update column comment not supported") {
-    withTable("h2.test.alt_table") {
-      sql("CREATE TABLE h2.test.alt_table (ID INTEGER) USING _")
+  test("ALTER TABLE ... update column comment not supported") {
+    val tableName = "h2.test.alt_table"
+    withTable(tableName) {
+      sql(s"CREATE TABLE $tableName (ID INTEGER) USING _")
       val exp = intercept[AnalysisException] {
-        sql("ALTER TABLE h2.test.alt_table ALTER COLUMN ID COMMENT 'test'")
+        sql(s"ALTER TABLE $tableName ALTER COLUMN ID COMMENT 'test'")
       }
       assert(exp.getMessage.contains("Failed table altering: test.alt_table"))
       assert(exp.cause.get.getMessage.contains("Unsupported TableChange"))
       // Update comment for not existing column
       val msg = intercept[AnalysisException] {
-        sql("ALTER TABLE h2.test.alt_table ALTER COLUMN bad_column COMMENT 'test'")
+        sql(s"ALTER TABLE $tableName ALTER COLUMN bad_column COMMENT 'test'")
       }.getMessage
       assert(msg.contains("Cannot update missing field bad_column in test.alt_table"))
     }
@@ -318,4 +325,70 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       assert(msg.contains("Table not found"))
     }
   }
+
+  test("ALTER TABLE case sensitivity") {
+    val tableName = "h2.test.alt_table"
+    withTable(tableName) {
+      sql(s"CREATE TABLE $tableName (c1 INTEGER NOT NULL, c2 INTEGER) USING _")
+      var t = spark.table(tableName)
+      var expectedSchema = new StructType().add("c1", IntegerType).add("c2", IntegerType)
+      assert(t.schema === expectedSchema)
+
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+        val msg = intercept[AnalysisException] {
+          sql(s"ALTER TABLE $tableName RENAME COLUMN C2 TO c3")
+        }.getMessage
+        assert(msg.contains("Cannot rename missing field C2 in test.alt_table schema"))
+      }
+
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+        sql(s"ALTER TABLE $tableName RENAME COLUMN C2 TO c3")
+        expectedSchema = new StructType().add("c1", IntegerType).add("c3", IntegerType)
+        t = spark.table(tableName)
+        assert(t.schema === expectedSchema)
+      }
+
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+        val msg = intercept[AnalysisException] {
+          sql(s"ALTER TABLE $tableName DROP COLUMN C3")
+        }.getMessage
+        assert(msg.contains("Cannot delete missing field C3 in test.alt_table schema"))
+      }
+
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+        sql(s"ALTER TABLE $tableName DROP COLUMN C3")
+        expectedSchema = new StructType().add("c1", IntegerType)
+        t = spark.table(tableName)
+        assert(t.schema === expectedSchema)
+      }
+
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+        val msg = intercept[AnalysisException] {
+          sql(s"ALTER TABLE $tableName ALTER COLUMN C1 TYPE DOUBLE")
+        }.getMessage
+        assert(msg.contains("Cannot update missing field C1 in test.alt_table schema"))
+      }
+
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+        sql(s"ALTER TABLE $tableName ALTER COLUMN C1 TYPE DOUBLE")
+        expectedSchema = new StructType().add("c1", DoubleType)
+        t = spark.table(tableName)
+        assert(t.schema === expectedSchema)
+      }
+
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+        val msg = intercept[AnalysisException] {
+          sql(s"ALTER TABLE $tableName ALTER COLUMN C1 DROP NOT NULL")
+        }.getMessage
+        assert(msg.contains("Cannot update missing field C1 in test.alt_table schema"))
+      }
+
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+        sql(s"ALTER TABLE $tableName ALTER COLUMN C1 DROP NOT NULL")
+        expectedSchema = new StructType().add("c1", DoubleType, nullable = true)
+        t = spark.table(tableName)
+        assert(t.schema === expectedSchema)
+      }
+    }
+  }
 }

From a5c17de24148ac3ef290e091dcf2978e26afa58c Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Fri, 16 Oct 2020 11:39:09 +0900
Subject: [PATCH 0252/1009] [SPARK-33165][SQL][TEST] Remove
 dependencies(scalatest,scalactic) from Benchmark

### What changes were proposed in this pull request?

This PR proposes to remove `assert` from `Benchmark` for making it easier to run benchmark codes via `spark-submit`.

### Why are the changes needed?

Since the current `Benchmark` (`master` and `branch-3.0`) has `assert`, we need to pass the proper jars of `scalatest` and `scalactic`;
 - scalatest-core_2.12-3.2.0.jar
 - scalatest-compatible-3.2.0.jar
 - scalactic_2.12-3.0.jar
```
./bin/spark-submit --jars scalatest-core_2.12-3.2.0.jar,scalatest-compatible-3.2.0.jar,scalactic_2.12-3.0.jar,./sql/catalyst/target/spark-catalyst_2.12-3.1.0-SNAPSHOT-tests.jar,./core/target/spark-core_2.12-3.1.0-SNAPSHOT-tests.jar --class org.apache.spark.sql.execution.benchmark.TPCDSQueryBenchmark ./sql/core/target/spark-sql_2.12-3.1.0-SNAPSHOT-tests.jar --data-location /tmp/tpcds-sf1
```

This update can make developers submit benchmark codes without these dependencies;
```
./bin/spark-submit --jars ./sql/catalyst/target/spark-catalyst_2.12-3.1.0-SNAPSHOT-tests.jar,./core/target/spark-core_2.12-3.1.0-SNAPSHOT-tests.jar --class org.apache.spark.sql.execution.benchmark.TPCDSQueryBenchmark ./sql/core/target/spark-sql_2.12-3.1.0-SNAPSHOT-tests.jar --data-location /tmp/tpcds-sf1
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manually checked.

Closes #30064 from maropu/RemoveDepInBenchmark.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../test/scala/org/apache/spark/benchmark/Benchmark.scala    | 5 -----
 .../spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala  | 3 ++-
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala
index 72c05a92848ff..0b2f512b947e1 100644
--- a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala
+++ b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala
@@ -26,7 +26,6 @@ import scala.util.Try
 
 import org.apache.commons.io.output.TeeOutputStream
 import org.apache.commons.lang3.SystemUtils
-import org.scalatest.Assertions._
 
 import org.apache.spark.util.Utils
 
@@ -162,7 +161,6 @@ private[spark] class Benchmark(
     // scalastyle:off
     println(s"  Stopped after $i iterations, ${NANOSECONDS.toMillis(runTimes.sum)} ms")
     // scalastyle:on
-    assert(runTimes.nonEmpty)
     val best = runTimes.min
     val avg = runTimes.sum / runTimes.size
     val stdev = if (runTimes.size > 1) {
@@ -184,18 +182,15 @@ private[spark] object Benchmark {
     private var timeStart: Long = 0L
 
     def startTiming(): Unit = {
-      assert(timeStart == 0L, "Already started timing.")
       timeStart = System.nanoTime
     }
 
     def stopTiming(): Unit = {
-      assert(timeStart != 0L, "Have not started timing.")
       accumulatedTime += System.nanoTime - timeStart
       timeStart = 0L
     }
 
     def totalTime(): Long = {
-      assert(timeStart == 0L, "Have not stopped timing.")
       accumulatedTime
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
index ad3d79760adf0..7bbf0795eb052 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
@@ -31,7 +31,8 @@ import org.apache.spark.sql.execution.datasources.LogicalRelation
  * To run this:
  * {{{
  *   1. without sbt:
- *        bin/spark-submit --class <this class> <spark sql test jar> --data-location <location>
+ *        bin/spark-submit --jars <spark core test jar>,<spark catalyst test jar>
+ *          --class <this class> <spark sql test jar> --data-location <location>
  *   2. build/sbt "sql/test:runMain <this class> --data-location <TPCDS data location>"
  *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt
  *        "sql/test:runMain <this class> --data-location <location>"

From 8f4fc22dc460eb05c47e0d61facf116c60b1be37 Mon Sep 17 00:00:00 2001
From: Samuel Souza <ssouza@palantir.com>
Date: Thu, 15 Oct 2020 22:12:41 -0500
Subject: [PATCH 0253/1009] [SPARK-33088][CORE] Enhance ExecutorPlugin API to
 include callbacks on task start and end events

### What changes were proposed in this pull request?
Proposing a new set of APIs for ExecutorPlugins, to provide callbacks invoked at the start and end of each task of a job. Not very opinionated on the shape of the API, tried to be as minimal as possible for now.

### Why are the changes needed?
Changes described in detail on [SPARK-33088](https://issues.apache.org/jira/browse/SPARK-33088), but mostly this boils down to:

1. This feature was considered when the ExecutorPlugin API was initially introduced in #21923, but never implemented.
2. The use-case which **requires** this feature is to propagate tracing information from the driver to the executor, such that calls from the same job can all be traced.
  a. Tracing frameworks usually are setup in thread locals, therefore it's important for the setup to happen in the same thread which runs the tasks.
  b. Executors can be for multiple jobs, therefore it's not sufficient to set tracing information at executor startup time -- it needs to happen every time a task starts or ends.

### Does this PR introduce _any_ user-facing change?
No. This PR introduces new features for future developers to use.

### How was this patch tested?
Unit tests on `PluginContainerSuite`.

Closes #29977 from fsamuel-bs/SPARK-33088.

Authored-by: Samuel Souza <ssouza@palantir.com>
Signed-off-by: Mridul Muralidharan <mridul<at>gmail.com>
---
 .../spark/api/plugin/ExecutorPlugin.java      | 42 ++++++++++++++++
 .../org/apache/spark/executor/Executor.scala  | 32 +++++++-----
 .../internal/plugin/PluginContainer.scala     | 49 ++++++++++++++++++-
 .../org/apache/spark/scheduler/Task.scala     |  6 ++-
 .../plugin/PluginContainerSuite.scala         | 47 ++++++++++++++++++
 .../spark/scheduler/TaskContextSuite.scala    |  4 +-
 6 files changed, 163 insertions(+), 17 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/api/plugin/ExecutorPlugin.java b/core/src/main/java/org/apache/spark/api/plugin/ExecutorPlugin.java
index 4961308035163..481bf985f1c6c 100644
--- a/core/src/main/java/org/apache/spark/api/plugin/ExecutorPlugin.java
+++ b/core/src/main/java/org/apache/spark/api/plugin/ExecutorPlugin.java
@@ -19,6 +19,7 @@
 
 import java.util.Map;
 
+import org.apache.spark.TaskFailedReason;
 import org.apache.spark.annotation.DeveloperApi;
 
 /**
@@ -54,4 +55,45 @@ default void init(PluginContext ctx, Map<String, String> extraConf) {}
    */
   default void shutdown() {}
 
+  /**
+   * Perform any action before the task is run.
+   * <p>
+   * This method is invoked from the same thread the task will be executed.
+   * Task-specific information can be accessed via {@link org.apache.spark.TaskContext#get}.
+   * <p>
+   * Plugin authors should avoid expensive operations here, as this method will be called
+   * on every task, and doing something expensive can significantly slow down a job.
+   * It is not recommended for a user to call a remote service, for example.
+   * <p>
+   * Exceptions thrown from this method do not propagate - they're caught,
+   * logged, and suppressed. Therefore exceptions when executing this method won't
+   * make the job fail.
+   *
+   * @since 3.1.0
+   */
+  default void onTaskStart() {}
+
+  /**
+   * Perform an action after tasks completes without exceptions.
+   * <p>
+   * As {@link #onTaskStart() onTaskStart} exceptions are suppressed, this method
+   * will still be invoked even if the corresponding {@link #onTaskStart} call for this
+   * task failed.
+   * <p>
+   * Same warnings of {@link #onTaskStart() onTaskStart} apply here.
+   *
+   * @since 3.1.0
+   */
+  default void onTaskSucceeded() {}
+
+  /**
+   * Perform an action after tasks completes with exceptions.
+   * <p>
+   * Same warnings of {@link #onTaskStart() onTaskStart} apply here.
+   *
+   * @param failureReason the exception thrown from the failed task.
+   *
+   * @since 3.1.0
+   */
+  default void onTaskFailed(TaskFailedReason failureReason) {}
 }
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index 27addd8fc12e2..6653650615192 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -253,7 +253,7 @@ private[spark] class Executor(
   }
 
   def launchTask(context: ExecutorBackend, taskDescription: TaskDescription): Unit = {
-    val tr = new TaskRunner(context, taskDescription)
+    val tr = new TaskRunner(context, taskDescription, plugins)
     runningTasks.put(taskDescription.taskId, tr)
     threadPool.execute(tr)
     if (decommissioned) {
@@ -332,7 +332,8 @@ private[spark] class Executor(
 
   class TaskRunner(
       execBackend: ExecutorBackend,
-      private val taskDescription: TaskDescription)
+      private val taskDescription: TaskDescription,
+      private val plugins: Option[PluginContainer])
     extends Runnable {
 
     val taskId = taskDescription.taskId
@@ -479,7 +480,8 @@ private[spark] class Executor(
             taskAttemptId = taskId,
             attemptNumber = taskDescription.attemptNumber,
             metricsSystem = env.metricsSystem,
-            resources = taskDescription.resources)
+            resources = taskDescription.resources,
+            plugins = plugins)
           threwException = false
           res
         } {
@@ -614,6 +616,7 @@ private[spark] class Executor(
 
         executorSource.SUCCEEDED_TASKS.inc(1L)
         setTaskFinishedAndClearInterruptStatus()
+        plugins.foreach(_.onTaskSucceeded())
         execBackend.statusUpdate(taskId, TaskState.FINISHED, serializedResult)
       } catch {
         case t: TaskKilledException =>
@@ -623,9 +626,9 @@ private[spark] class Executor(
           // Here and below, put task metric peaks in a WrappedArray to expose them as a Seq
           // without requiring a copy.
           val metricPeaks = WrappedArray.make(metricsPoller.getTaskMetricPeaks(taskId))
-          val serializedTK = ser.serialize(
-            TaskKilled(t.reason, accUpdates, accums, metricPeaks.toSeq))
-          execBackend.statusUpdate(taskId, TaskState.KILLED, serializedTK)
+          val reason = TaskKilled(t.reason, accUpdates, accums, metricPeaks.toSeq)
+          plugins.foreach(_.onTaskFailed(reason))
+          execBackend.statusUpdate(taskId, TaskState.KILLED, ser.serialize(reason))
 
         case _: InterruptedException | NonFatal(_) if
             task != null && task.reasonIfKilled.isDefined =>
@@ -634,9 +637,9 @@ private[spark] class Executor(
 
           val (accums, accUpdates) = collectAccumulatorsAndResetStatusOnFailure(taskStartTimeNs)
           val metricPeaks = WrappedArray.make(metricsPoller.getTaskMetricPeaks(taskId))
-          val serializedTK = ser.serialize(
-            TaskKilled(killReason, accUpdates, accums, metricPeaks.toSeq))
-          execBackend.statusUpdate(taskId, TaskState.KILLED, serializedTK)
+          val reason = TaskKilled(killReason, accUpdates, accums, metricPeaks.toSeq)
+          plugins.foreach(_.onTaskFailed(reason))
+          execBackend.statusUpdate(taskId, TaskState.KILLED, ser.serialize(reason))
 
         case t: Throwable if hasFetchFailure && !Utils.isFatalError(t) =>
           val reason = task.context.fetchFailed.get.toTaskFailedReason
@@ -650,11 +653,13 @@ private[spark] class Executor(
               s"other exception: $t")
           }
           setTaskFinishedAndClearInterruptStatus()
+          plugins.foreach(_.onTaskFailed(reason))
           execBackend.statusUpdate(taskId, TaskState.FAILED, ser.serialize(reason))
 
         case CausedBy(cDE: CommitDeniedException) =>
           val reason = cDE.toTaskCommitDeniedReason
           setTaskFinishedAndClearInterruptStatus()
+          plugins.foreach(_.onTaskFailed(reason))
           execBackend.statusUpdate(taskId, TaskState.KILLED, ser.serialize(reason))
 
         case t: Throwable if env.isStopped =>
@@ -677,21 +682,22 @@ private[spark] class Executor(
             val (accums, accUpdates) = collectAccumulatorsAndResetStatusOnFailure(taskStartTimeNs)
             val metricPeaks = WrappedArray.make(metricsPoller.getTaskMetricPeaks(taskId))
 
-            val serializedTaskEndReason = {
+            val (taskFailureReason, serializedTaskFailureReason) = {
               try {
                 val ef = new ExceptionFailure(t, accUpdates).withAccums(accums)
                   .withMetricPeaks(metricPeaks.toSeq)
-                ser.serialize(ef)
+                (ef, ser.serialize(ef))
               } catch {
                 case _: NotSerializableException =>
                   // t is not serializable so just send the stacktrace
                   val ef = new ExceptionFailure(t, accUpdates, false).withAccums(accums)
                     .withMetricPeaks(metricPeaks.toSeq)
-                  ser.serialize(ef)
+                  (ef, ser.serialize(ef))
               }
             }
             setTaskFinishedAndClearInterruptStatus()
-            execBackend.statusUpdate(taskId, TaskState.FAILED, serializedTaskEndReason)
+            plugins.foreach(_.onTaskFailed(taskFailureReason))
+            execBackend.statusUpdate(taskId, TaskState.FAILED, serializedTaskFailureReason)
           } else {
             logInfo("Not reporting error to driver during JVM shutdown.")
           }
diff --git a/core/src/main/scala/org/apache/spark/internal/plugin/PluginContainer.scala b/core/src/main/scala/org/apache/spark/internal/plugin/PluginContainer.scala
index 4eda4767094ad..f78ec250f7173 100644
--- a/core/src/main/scala/org/apache/spark/internal/plugin/PluginContainer.scala
+++ b/core/src/main/scala/org/apache/spark/internal/plugin/PluginContainer.scala
@@ -20,7 +20,7 @@ package org.apache.spark.internal.plugin
 import scala.collection.JavaConverters._
 import scala.util.{Either, Left, Right}
 
-import org.apache.spark.{SparkContext, SparkEnv}
+import org.apache.spark.{SparkContext, SparkEnv, TaskFailedReason}
 import org.apache.spark.api.plugin._
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
@@ -31,6 +31,9 @@ sealed abstract class PluginContainer {
 
   def shutdown(): Unit
   def registerMetrics(appId: String): Unit
+  def onTaskStart(): Unit
+  def onTaskSucceeded(): Unit
+  def onTaskFailed(failureReason: TaskFailedReason): Unit
 
 }
 
@@ -85,6 +88,17 @@ private class DriverPluginContainer(
     }
   }
 
+  override def onTaskStart(): Unit = {
+    throw new IllegalStateException("Should not be called for the driver container.")
+  }
+
+  override def onTaskSucceeded(): Unit = {
+    throw new IllegalStateException("Should not be called for the driver container.")
+  }
+
+  override def onTaskFailed(failureReason: TaskFailedReason): Unit = {
+    throw new IllegalStateException("Should not be called for the driver container.")
+  }
 }
 
 private class ExecutorPluginContainer(
@@ -134,6 +148,39 @@ private class ExecutorPluginContainer(
       }
     }
   }
+
+  override def onTaskStart(): Unit = {
+    executorPlugins.foreach { case (name, plugin) =>
+      try {
+        plugin.onTaskStart()
+      } catch {
+        case t: Throwable =>
+          logInfo(s"Exception while calling onTaskStart on plugin $name.", t)
+      }
+    }
+  }
+
+  override def onTaskSucceeded(): Unit = {
+    executorPlugins.foreach { case (name, plugin) =>
+      try {
+        plugin.onTaskSucceeded()
+      } catch {
+        case t: Throwable =>
+          logInfo(s"Exception while calling onTaskSucceeded on plugin $name.", t)
+      }
+    }
+  }
+
+  override def onTaskFailed(failureReason: TaskFailedReason): Unit = {
+    executorPlugins.foreach { case (name, plugin) =>
+      try {
+        plugin.onTaskFailed(failureReason)
+      } catch {
+        case t: Throwable =>
+          logInfo(s"Exception while calling onTaskFailed on plugin $name.", t)
+      }
+    }
+  }
 }
 
 object PluginContainer {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Task.scala b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
index ebc1c05435fee..81f984bb2b511 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Task.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
@@ -23,6 +23,7 @@ import java.util.Properties
 import org.apache.spark._
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.internal.config.APP_CALLER_CONTEXT
+import org.apache.spark.internal.plugin.PluginContainer
 import org.apache.spark.memory.{MemoryMode, TaskMemoryManager}
 import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.rdd.InputFileBlockHolder
@@ -82,7 +83,8 @@ private[spark] abstract class Task[T](
       taskAttemptId: Long,
       attemptNumber: Int,
       metricsSystem: MetricsSystem,
-      resources: Map[String, ResourceInformation]): T = {
+      resources: Map[String, ResourceInformation],
+      plugins: Option[PluginContainer]): T = {
     SparkEnv.get.blockManager.registerTask(taskAttemptId)
     // TODO SPARK-24874 Allow create BarrierTaskContext based on partitions, instead of whether
     // the stage is barrier.
@@ -123,6 +125,8 @@ private[spark] abstract class Task[T](
       Option(taskAttemptId),
       Option(attemptNumber)).setCurrentContext()
 
+    plugins.foreach(_.onTaskStart())
+
     try {
       runTask(context)
     } catch {
diff --git a/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala b/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala
index 7888796dd55e6..e7fbe5b998a88 100644
--- a/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala
@@ -129,6 +129,38 @@ class PluginContainerSuite extends SparkFunSuite with BeforeAndAfterEach with Lo
     assert(TestSparkPlugin.driverPlugin != null)
   }
 
+  test("SPARK-33088: executor tasks trigger plugin calls") {
+    val conf = new SparkConf()
+      .setAppName(getClass().getName())
+      .set(SparkLauncher.SPARK_MASTER, "local[1]")
+      .set(PLUGINS, Seq(classOf[TestSparkPlugin].getName()))
+
+    sc = new SparkContext(conf)
+    sc.parallelize(1 to 10, 2).count()
+
+    assert(TestSparkPlugin.executorPlugin.numOnTaskStart == 2)
+    assert(TestSparkPlugin.executorPlugin.numOnTaskSucceeded == 2)
+    assert(TestSparkPlugin.executorPlugin.numOnTaskFailed == 0)
+  }
+
+  test("SPARK-33088: executor failed tasks trigger plugin calls") {
+    val conf = new SparkConf()
+      .setAppName(getClass().getName())
+      .set(SparkLauncher.SPARK_MASTER, "local[1]")
+      .set(PLUGINS, Seq(classOf[TestSparkPlugin].getName()))
+
+    sc = new SparkContext(conf)
+    try {
+      sc.parallelize(1 to 10, 2).foreach(i => throw new RuntimeException)
+    } catch {
+      case t: Throwable => // ignore exception
+    }
+
+    assert(TestSparkPlugin.executorPlugin.numOnTaskStart == 2)
+    assert(TestSparkPlugin.executorPlugin.numOnTaskSucceeded == 0)
+    assert(TestSparkPlugin.executorPlugin.numOnTaskFailed == 2)
+  }
+
   test("plugin initialization in non-local mode") {
     val path = Utils.createTempDir()
 
@@ -309,6 +341,10 @@ private class TestDriverPlugin extends DriverPlugin {
 
 private class TestExecutorPlugin extends ExecutorPlugin {
 
+  var numOnTaskStart: Int = 0
+  var numOnTaskSucceeded: Int = 0
+  var numOnTaskFailed: Int = 0
+
   override def init(ctx: PluginContext, extraConf: JMap[String, String]): Unit = {
     ctx.metricRegistry().register("executorMetric", new Gauge[Int] {
       override def getValue(): Int = 84
@@ -316,6 +352,17 @@ private class TestExecutorPlugin extends ExecutorPlugin {
     TestSparkPlugin.executorContext = ctx
   }
 
+  override def onTaskStart(): Unit = {
+    numOnTaskStart += 1
+  }
+
+  override def onTaskSucceeded(): Unit = {
+    numOnTaskSucceeded += 1
+  }
+
+  override def onTaskFailed(failureReason: TaskFailedReason): Unit = {
+    numOnTaskFailed += 1
+  }
 }
 
 private object TestSparkPlugin {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
index 394a2a9fbf7cb..8a7ff9eb6dcd3 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
@@ -70,7 +70,7 @@ class TaskContextSuite extends SparkFunSuite with BeforeAndAfter with LocalSpark
       0, 0, taskBinary, rdd.partitions(0), Seq.empty, 0, new Properties,
       closureSerializer.serialize(TaskMetrics.registered).array())
     intercept[RuntimeException] {
-      task.run(0, 0, null, null)
+      task.run(0, 0, null, null, Option.empty)
     }
     assert(TaskContextSuite.completed)
   }
@@ -92,7 +92,7 @@ class TaskContextSuite extends SparkFunSuite with BeforeAndAfter with LocalSpark
       0, 0, taskBinary, rdd.partitions(0), Seq.empty, 0, new Properties,
       closureSerializer.serialize(TaskMetrics.registered).array())
     intercept[RuntimeException] {
-      task.run(0, 0, null, null)
+      task.run(0, 0, null, null, Option.empty)
     }
     assert(TaskContextSuite.lastError.getMessage == "damn error")
   }

From bf52fa83b2a031cfa66bdf00a0710c7d6d2b326b Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Fri, 16 Oct 2020 13:50:57 +0900
Subject: [PATCH 0254/1009] [SPARK-33165][SQL][TESTS][FOLLOW-UP] Use
 scala.Predef.assert instead

### What changes were proposed in this pull request?

This PR proposes to use `scala.Predef.assert` instead of `org.scalatest.Assertions.assert` removed at https://github.com/apache/spark/pull/30064

### Why are the changes needed?

Just to keep the same behaviour.

### Does this PR introduce _any_ user-facing change?

No, dev-only

### How was this patch tested?

Recover the existing asserts.

Closes #30065 from HyukjinKwon/SPARK-33165.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../src/test/scala/org/apache/spark/benchmark/Benchmark.scala | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala
index 0b2f512b947e1..5511852ca176e 100644
--- a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala
+++ b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala
@@ -161,6 +161,7 @@ private[spark] class Benchmark(
     // scalastyle:off
     println(s"  Stopped after $i iterations, ${NANOSECONDS.toMillis(runTimes.sum)} ms")
     // scalastyle:on
+    assert(runTimes.nonEmpty)
     val best = runTimes.min
     val avg = runTimes.sum / runTimes.size
     val stdev = if (runTimes.size > 1) {
@@ -182,15 +183,18 @@ private[spark] object Benchmark {
     private var timeStart: Long = 0L
 
     def startTiming(): Unit = {
+      assert(timeStart == 0L, "Already started timing.")
       timeStart = System.nanoTime
     }
 
     def stopTiming(): Unit = {
+      assert(timeStart != 0L, "Have not started timing.")
       accumulatedTime += System.nanoTime - timeStart
       timeStart = 0L
     }
 
     def totalTime(): Long = {
+      assert(timeStart == 0L, "Have not stopped timing.")
       accumulatedTime
     }
   }

From 306872eefaa9228eaed1e797be11c8c5fa1705cd Mon Sep 17 00:00:00 2001
From: "xuewei.linxuewei" <xuewei.linxuewei@alibaba-inc.com>
Date: Fri, 16 Oct 2020 06:05:17 +0000
Subject: [PATCH 0255/1009] [SPARK-33139][SQL] protect setActionSession and
 clearActiveSession

### What changes were proposed in this pull request?

This PR is a sub-task of [SPARK-33138](https://issues.apache.org/jira/browse/SPARK-33138). In order to make SQLConf.get reliable and stable, we need to make sure user can't pollute the SQLConf and SparkSession Context via calling setActiveSession and clearActiveSession.

Change of the PR:

* add legacy config spark.sql.legacy.allowModifyActiveSession to fallback to old behavior if user do need to call these two API.
* by default, if user call these two API, it will throw exception
* add extra two internal and private API setActiveSessionInternal and clearActiveSessionInternal for current internal usage
* change all internal reference to new internal API except for SQLContext.setActive and SQLContext.clearActive

### Why are the changes needed?

Make SQLConf.get reliable and stable.

### Does this PR introduce any user-facing change?
No.

### How was this patch tested?

* Add UT in SparkSessionBuilderSuite to test the legacy config
* Existing test

Closes #30042 from leanken/leanken-SPARK-33139.

Authored-by: xuewei.linxuewei <xuewei.linxuewei@alibaba-inc.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide.md                   |  2 +
 .../kafka010/KafkaMicroBatchSourceSuite.scala |  2 +-
 .../org/apache/spark/SharedSparkSession.java  |  3 +-
 .../mllib/util/MLlibTestSparkContext.scala    |  2 +-
 python/pyspark/sql/session.py                 | 15 ++++++--
 .../apache/spark/sql/internal/SQLConf.scala   |  3 ++
 .../spark/sql/internal/StaticSQLConf.scala    |  9 +++++
 .../org/apache/spark/sql/SparkSession.scala   | 26 +++++++++++--
 .../spark/sql/execution/SQLExecution.scala    |  6 +--
 .../spark/sql/execution/SparkPlan.scala       |  2 +-
 .../execution/streaming/StreamExecution.scala |  2 +-
 .../apache/spark/sql/DeprecatedAPISuite.scala |  6 +--
 .../apache/spark/sql/LocalSparkSession.scala  |  4 +-
 .../apache/spark/sql/SQLContextSuite.scala    |  2 +-
 .../org/apache/spark/sql/SQLQuerySuite.scala  |  2 +-
 .../apache/spark/sql/SessionStateSuite.scala  |  2 +-
 .../spark/sql/SparkSessionBuilderSuite.scala  | 37 ++++++++++++++-----
 .../sql/SparkSessionExtensionSuite.scala      |  2 +-
 .../sql/connector/V1WriteFallbackSuite.scala  |  4 +-
 .../CoalesceShufflePartitionsSuite.scala      |  4 +-
 .../adaptive/AdaptiveQueryExecSuite.scala     |  4 +-
 .../state/StateStoreCoordinatorSuite.scala    |  2 +-
 .../streaming/state/StateStoreSuite.scala     |  2 +-
 .../SymmetricHashJoinStateManagerSuite.scala  |  2 +-
 .../sql/streaming/StreamingJoinSuite.scala    |  4 +-
 .../apache/spark/sql/test/SQLTestUtils.scala  |  2 +-
 .../spark/sql/test/SharedSparkSession.scala   |  2 +-
 .../spark/sql/test/TestSQLContext.scala       |  2 +-
 .../hive/thriftserver/SparkOperation.scala    |  6 +--
 .../apache/spark/sql/hive/test/TestHive.scala |  2 +-
 30 files changed, 112 insertions(+), 51 deletions(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index c1de58d85d5bf..cc69e78108ffd 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -24,6 +24,8 @@ license: |
 
 ## Upgrading from Spark SQL 3.0 to 3.1
 
+  - In Spark 3.1, `SparkSession.setActiveSession` and `SparkSession.clearActiveSession` are deprecated and unsupported, it will throw `UnsupportedOperationException` if called. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.allowModifyActiveSession` to true if you really need to use these APIs.
+
   - In Spark 3.1, statistical aggregation function includes `std`, `stddev`, `stddev_samp`, `variance`, `var_samp`, `skewness`, `kurtosis`, `covar_samp`, `corr` will return `NULL` instead of `Double.NaN` when `DivideByZero` occurs during expression evaluation, for example, when `stddev_samp` applied on a single element set. In Spark version 3.0 and earlier, it will return `Double.NaN` in such case. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.statisticalAggregate` to `true`.
 
   - In Spark 3.1, grouping_id() returns long values. In Spark version 3.0 and earlier, this function returns int values. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.integerGroupingId` to `true`.
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
index 63659989dec1b..853d201ba7ea5 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
@@ -1189,7 +1189,7 @@ class KafkaMicroBatchV2SourceSuite extends KafkaMicroBatchSourceSuiteBase {
         numPartitionsGenerated: Int,
         reusesConsumers: Boolean): Unit = {
 
-      SparkSession.setActiveSession(spark)
+      SparkSession.setActiveSessionInternal(spark)
       withTempDir { dir =>
         val provider = new KafkaSourceProvider()
         val options = Map(
diff --git a/mllib/src/test/java/org/apache/spark/SharedSparkSession.java b/mllib/src/test/java/org/apache/spark/SharedSparkSession.java
index 35a250955b282..49bd0a43a16d6 100644
--- a/mllib/src/test/java/org/apache/spark/SharedSparkSession.java
+++ b/mllib/src/test/java/org/apache/spark/SharedSparkSession.java
@@ -20,6 +20,7 @@
 import java.io.IOException;
 import java.io.Serializable;
 
+import org.apache.spark.sql.SparkSession$;
 import org.junit.After;
 import org.junit.Before;
 
@@ -47,7 +48,7 @@ public void tearDown() {
       spark = null;
     } finally {
       SparkSession.clearDefaultSession();
-      SparkSession.clearActiveSession();
+      SparkSession$.MODULE$.clearActiveSessionInternal();
     }
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
index 5eb128abacdb9..840ca6f8af0b1 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
@@ -48,7 +48,7 @@ trait MLlibTestSparkContext extends TempDirectory { self: Suite =>
   override def afterAll(): Unit = {
     try {
       Utils.deleteRecursively(new File(checkpointDir))
-      SparkSession.clearActiveSession()
+      SparkSession.clearActiveSessionInternal()
       if (spark != null) {
         spark.stop()
       }
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index 8ca6e41a9b940..e6ab1ea3878f3 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -230,7 +230,10 @@ def __init__(self, sparkContext, jsparkSession=None):
             SparkSession._instantiatedSession = self
             SparkSession._activeSession = self
             self._jvm.SparkSession.setDefaultSession(self._jsparkSession)
-            self._jvm.SparkSession.setActiveSession(self._jsparkSession)
+            self._jvm.java.lang.Class.forName("org.apache.spark.sql.SparkSession$")\
+                .getDeclaredField("MODULE$")\
+                .get(None)\
+                .setActiveSessionInternal(self._jsparkSession)
 
     def _repr_html_(self):
         return """
@@ -561,7 +564,10 @@ def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=Tr
         Py4JJavaError: ...
         """
         SparkSession._activeSession = self
-        self._jvm.SparkSession.setActiveSession(self._jsparkSession)
+        self._jvm.java.lang.Class.forName("org.apache.spark.sql.SparkSession$")\
+            .getDeclaredField("MODULE$")\
+            .get(None)\
+            .setActiveSessionInternal(self._jsparkSession)
         if isinstance(data, DataFrame):
             raise TypeError("data is already a DataFrame")
 
@@ -683,7 +689,10 @@ def stop(self):
         self._sc.stop()
         # We should clean the default session up. See SPARK-23228.
         self._jvm.SparkSession.clearDefaultSession()
-        self._jvm.SparkSession.clearActiveSession()
+        self._jvm.java.lang.Class.forName("org.apache.spark.sql.SparkSession$")\
+            .getDeclaredField("MODULE$")\
+            .get(None)\
+            .clearActiveSessionInternal()
         SparkSession._instantiatedSession = None
         SparkSession._activeSession = None
         SQLContext._instantiatedContext = None
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 79d78088f51a0..319387fe854cf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -3414,6 +3414,9 @@ class SQLConf extends Serializable with Logging {
 
   def integerGroupingIdEnabled: Boolean = getConf(SQLConf.LEGACY_INTEGER_GROUPING_ID)
 
+  def legacyAllowModifyActiveSession: Boolean =
+    getConf(StaticSQLConf.LEGACY_ALLOW_MODIFY_ACTIVE_SESSION)
+
   def legacyAllowCastNumericToTimestamp: Boolean =
     getConf(SQLConf.LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
index ca1074fcf6fc0..b9446465e1f79 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
@@ -249,4 +249,13 @@ object StaticSQLConf {
     .version("3.1.0")
     .timeConf(TimeUnit.SECONDS)
     .createWithDefault(-1)
+
+  val LEGACY_ALLOW_MODIFY_ACTIVE_SESSION =
+    buildStaticConf("spark.sql.legacy.allowModifyActiveSession")
+      .internal()
+      .doc("When set to true, user is allowed to use setActiveSession or clearActiveSession " +
+        "to modify the current active SparkSession, otherwise an exception will be thrown.")
+      .version("3.1.0")
+      .booleanConf
+      .createWithDefault(false)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 5704414df2d0d..b15d6f981291c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -765,9 +765,9 @@ class SparkSession private(
     // set and not the default session. This to prevent that we promote the default session to the
     // active session once we are done.
     val old = SparkSession.activeThreadSession.get()
-    SparkSession.setActiveSession(this)
+    SparkSession.setActiveSessionInternal(this)
     try block finally {
-      SparkSession.setActiveSession(old)
+      SparkSession.setActiveSessionInternal(old)
     }
   }
 }
@@ -946,7 +946,7 @@ object SparkSession extends Logging {
 
         session = new SparkSession(sparkContext, None, None, extensions, options.toMap)
         setDefaultSession(session)
-        setActiveSession(session)
+        setActiveSessionInternal(session)
         registerContextListener(sparkContext)
       }
 
@@ -984,7 +984,16 @@ object SparkSession extends Logging {
    *
    * @since 2.0.0
    */
+  @deprecated("This method is deprecated and will be removed in future versions.", "3.1.0")
   def setActiveSession(session: SparkSession): Unit = {
+    if (SQLConf.get.legacyAllowModifyActiveSession) {
+      setActiveSessionInternal(session)
+    } else {
+      throw new UnsupportedOperationException("Not allowed to modify active Spark session.")
+    }
+  }
+
+  private[sql] def setActiveSessionInternal(session: SparkSession): Unit = {
     activeThreadSession.set(session)
   }
 
@@ -994,7 +1003,16 @@ object SparkSession extends Logging {
    *
    * @since 2.0.0
    */
+  @deprecated("This method is deprecated and will be removed in future versions.", "3.1.0")
   def clearActiveSession(): Unit = {
+    if (SQLConf.get.legacyAllowModifyActiveSession) {
+      clearActiveSessionInternal()
+    } else {
+      throw new UnsupportedOperationException("Not allowed to modify active Spark session.")
+    }
+  }
+
+  private[spark] def clearActiveSessionInternal(): Unit = {
     activeThreadSession.remove()
   }
 
@@ -1149,7 +1167,7 @@ object SparkSession extends Logging {
            |
          """.stripMargin)
       session.get.stop()
-      SparkSession.clearActiveSession()
+      SparkSession.clearActiveSessionInternal()
       SparkSession.clearDefaultSession()
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
index c62670b227bcc..1465e57743323 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
@@ -180,15 +180,15 @@ object SQLExecution {
     exec.submit(() => {
       val originalSession = SparkSession.getActiveSession
       val originalLocalProps = sc.getLocalProperties
-      SparkSession.setActiveSession(activeSession)
+      SparkSession.setActiveSessionInternal(activeSession)
       sc.setLocalProperties(localProps)
       val res = body
       // reset active session and local props.
       sc.setLocalProperties(originalLocalProps)
       if (originalSession.nonEmpty) {
-        SparkSession.setActiveSession(originalSession.get)
+        SparkSession.setActiveSessionInternal(originalSession.get)
       } else {
-        SparkSession.clearActiveSession()
+        SparkSession.clearActiveSessionInternal()
       }
       res
     })
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
index ead8c00031112..42eb131b8e4ce 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -82,7 +82,7 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
   /** Overridden make copy also propagates sqlContext to copied plan. */
   override def makeCopy(newArgs: Array[AnyRef]): SparkPlan = {
     if (sqlContext != null) {
-      SparkSession.setActiveSession(sqlContext.sparkSession)
+      SparkSession.setActiveSessionInternal(sqlContext.sparkSession)
     }
     super.makeCopy(newArgs)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index aba0463f56cd7..09c0d2148307c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -315,7 +315,7 @@ abstract class StreamExecution(
       startLatch.countDown()
 
       // While active, repeatedly attempt to run batches.
-      SparkSession.setActiveSession(sparkSession)
+      SparkSession.setActiveSessionInternal(sparkSession)
 
       updateStatusMessage("Initializing sources")
       // force initialization of the logical plan so that the sources can be created
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DeprecatedAPISuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DeprecatedAPISuite.scala
index 25b8849d61248..d27333ec727d0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DeprecatedAPISuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DeprecatedAPISuite.scala
@@ -130,10 +130,10 @@ class DeprecatedAPISuite extends QueryTest with SharedSparkSession {
   test("SQLContext.setActive/clearActive") {
     val sc = spark.sparkContext
     val sqlContext = new SQLContext(sc)
-    SQLContext.setActive(sqlContext)
+    intercept[UnsupportedOperationException](SQLContext.setActive(sqlContext))
+    assert(SparkSession.getActiveSession === Some(spark))
+    intercept[UnsupportedOperationException](SQLContext.clearActive())
     assert(SparkSession.getActiveSession === Some(spark))
-    SQLContext.clearActive()
-    assert(SparkSession.getActiveSession === None)
   }
 
   test("SQLContext.applySchema") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/LocalSparkSession.scala b/sql/core/src/test/scala/org/apache/spark/sql/LocalSparkSession.scala
index 36db95ff8a31b..8fdf55aeae6d4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/LocalSparkSession.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/LocalSparkSession.scala
@@ -30,14 +30,14 @@ trait LocalSparkSession extends BeforeAndAfterEach with BeforeAndAfterAll { self
   override def beforeAll(): Unit = {
     super.beforeAll()
     InternalLoggerFactory.setDefaultFactory(Slf4JLoggerFactory.INSTANCE)
-    SparkSession.clearActiveSession()
+    SparkSession.clearActiveSessionInternal()
     SparkSession.clearDefaultSession()
   }
 
   override def afterEach(): Unit = {
     try {
       LocalSparkSession.stop(spark)
-      SparkSession.clearActiveSession()
+      SparkSession.clearActiveSessionInternal()
       SparkSession.clearDefaultSession()
       spark = null
     } finally {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala
index a1799829932b8..aec124de81049 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala
@@ -43,7 +43,7 @@ class SQLContextSuite extends SparkFunSuite with SharedSparkContext {
     val newSession = sqlContext.newSession()
     assert(SQLContext.getOrCreate(sc).eq(sqlContext),
       "SQLContext.getOrCreate after explicitly created SQLContext did not return the context")
-    SparkSession.setActiveSession(newSession.sparkSession)
+    SparkSession.setActiveSessionInternal(newSession.sparkSession)
     assert(SQLContext.getOrCreate(sc).eq(newSession),
       "SQLContext.getOrCreate after explicitly setActive() did not return the active context")
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index b86df4db816b3..a002f720a3c4a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -3468,7 +3468,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     // problem before the fix.
     withSQLConf(SQLConf.CODEGEN_FALLBACK.key -> "true") {
       val cloned = spark.cloneSession()
-      SparkSession.setActiveSession(cloned)
+      SparkSession.setActiveSessionInternal(cloned)
       assert(SQLConf.get.getConf(SQLConf.CODEGEN_FALLBACK) === true)
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
index 003f5bc835d5f..2f766a270ad73 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
@@ -48,7 +48,7 @@ class SessionStateSuite extends SparkFunSuite {
       if (activeSession != null) {
         activeSession.stop()
         activeSession = null
-        SparkSession.clearActiveSession()
+        SparkSession.clearActiveSessionInternal()
         SparkSession.clearDefaultSession()
       }
     } finally {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
index 9da32d02aa723..e1f7b6f455e14 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
@@ -22,7 +22,7 @@ import org.scalatest.BeforeAndAfterEach
 import org.apache.spark.{SparkConf, SparkContext, SparkException, SparkFunSuite}
 import org.apache.spark.internal.config.EXECUTOR_ALLOW_SPARK_CONTEXT
 import org.apache.spark.internal.config.UI.UI_ENABLED
-import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 import org.apache.spark.sql.internal.StaticSQLConf._
 
 /**
@@ -33,7 +33,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach {
   override def afterEach(): Unit = {
     // This suite should not interfere with the other test suites.
     SparkSession.getActiveSession.foreach(_.stop())
-    SparkSession.clearActiveSession()
+    SparkSession.clearActiveSessionInternal()
     SparkSession.getDefaultSession.foreach(_.stop())
     SparkSession.clearDefaultSession()
   }
@@ -64,7 +64,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach {
   test("get active or default session") {
     val session = SparkSession.builder().master("local").getOrCreate()
     assert(SparkSession.active == session)
-    SparkSession.clearActiveSession()
+    SparkSession.clearActiveSessionInternal()
     assert(SparkSession.active == session)
     SparkSession.clearDefaultSession()
     intercept[IllegalStateException](SparkSession.active)
@@ -82,7 +82,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach {
   test("use session from active thread session and propagate config options") {
     val defaultSession = SparkSession.builder().master("local").getOrCreate()
     val activeSession = defaultSession.newSession()
-    SparkSession.setActiveSession(activeSession)
+    SparkSession.setActiveSessionInternal(activeSession)
     val session = SparkSession.builder().config("spark-config2", "a").getOrCreate()
 
     assert(activeSession != defaultSession)
@@ -90,7 +90,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach {
     assert(session.conf.get("spark-config2") == "a")
     assert(session.sessionState.conf == SQLConf.get)
     assert(SQLConf.get.getConfString("spark-config2") == "a")
-    SparkSession.clearActiveSession()
+    SparkSession.clearActiveSessionInternal()
 
     assert(SparkSession.builder().getOrCreate() == defaultSession)
   }
@@ -105,7 +105,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach {
 
   test("create a new session if the active thread session has been stopped") {
     val activeSession = SparkSession.builder().master("local").getOrCreate()
-    SparkSession.setActiveSession(activeSession)
+    SparkSession.setActiveSessionInternal(activeSession)
     activeSession.stop()
     val newSession = SparkSession.builder().master("local").getOrCreate()
     assert(newSession != activeSession)
@@ -181,7 +181,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach {
       .master("local")
       .getOrCreate()
     val postFirstCreation = context.listenerBus.listeners.size()
-    SparkSession.clearActiveSession()
+    SparkSession.clearActiveSessionInternal()
     SparkSession.clearDefaultSession()
 
     SparkSession
@@ -190,7 +190,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach {
       .master("local")
       .getOrCreate()
     val postSecondCreation = context.listenerBus.listeners.size()
-    SparkSession.clearActiveSession()
+    SparkSession.clearActiveSessionInternal()
     SparkSession.clearDefaultSession()
     assert(postFirstCreation == postSecondCreation)
   }
@@ -211,7 +211,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach {
     assert(session1.conf.get(GLOBAL_TEMP_DATABASE) === "globaltempdb-spark-31532")
 
     // do not propagate static sql configs to the existing default session
-    SparkSession.clearActiveSession()
+    SparkSession.clearActiveSessionInternal()
     val session2 = SparkSession
       .builder()
       .config(WAREHOUSE_PATH.key, "SPARK-31532-db")
@@ -281,4 +281,23 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach {
       ()
     }
   }
+
+  test("SPARK-33139: Test SparkSession.setActiveSession/clearActiveSession") {
+    Seq(true, false).foreach { allowModifyActiveSession =>
+      val session = SparkSession.builder()
+        .master("local")
+        .config(StaticSQLConf.LEGACY_ALLOW_MODIFY_ACTIVE_SESSION.key, allowModifyActiveSession)
+        .getOrCreate()
+
+      val newSession = session.newSession()
+      if (!allowModifyActiveSession) {
+        intercept[UnsupportedOperationException](SparkSession.setActiveSession(newSession))
+        intercept[UnsupportedOperationException](SparkSession.clearActiveSession())
+      } else {
+        SparkSession.setActiveSession(newSession)
+        SparkSession.clearActiveSession()
+      }
+      session.stop()
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
index e5e8bc6917799..ebe4e8dea97e3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
@@ -51,7 +51,7 @@ class SparkSessionExtensionSuite extends SparkFunSuite {
 
   private def stop(spark: SparkSession): Unit = {
     spark.stop()
-    SparkSession.clearActiveSession()
+    SparkSession.clearActiveSessionInternal()
     SparkSession.clearDefaultSession()
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
index 4b52a4cbf4116..0a86a41e86255 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
@@ -130,7 +130,7 @@ class V1WriteFallbackSuite extends QueryTest with SharedSparkSession with Before
   }
 
   test("fallback writes should only analyze plan once") {
-    SparkSession.clearActiveSession()
+    SparkSession.clearActiveSessionInternal()
     SparkSession.clearDefaultSession()
     try {
       val session = SparkSession.builder()
@@ -141,7 +141,7 @@ class V1WriteFallbackSuite extends QueryTest with SharedSparkSession with Before
       val df = session.createDataFrame(Seq((1, "x"), (2, "y"), (3, "z")))
       df.write.mode("append").option("name", "t1").format(v2Format).saveAsTable("test")
     } finally {
-      SparkSession.setActiveSession(spark)
+      SparkSession.setActiveSessionInternal(spark)
       SparkSession.setDefaultSession(spark)
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala
index 22c5b651f7e12..fd55ad69ed386 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala
@@ -38,14 +38,14 @@ class CoalesceShufflePartitionsSuite extends SparkFunSuite with BeforeAndAfterAl
     originalActiveSparkSession = SparkSession.getActiveSession
     originalInstantiatedSparkSession = SparkSession.getDefaultSession
 
-    SparkSession.clearActiveSession()
+    SparkSession.clearActiveSessionInternal()
     SparkSession.clearDefaultSession()
   }
 
   override protected def afterAll(): Unit = {
     try {
       // Set these states back.
-      originalActiveSparkSession.foreach(ctx => SparkSession.setActiveSession(ctx))
+      originalActiveSparkSession.foreach(ctx => SparkSession.setActiveSessionInternal(ctx))
       originalInstantiatedSparkSession.foreach(ctx => SparkSession.setDefaultSession(ctx))
     } finally {
       super.afterAll()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index 38a323b1c057e..fc95ab53dade8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -999,9 +999,9 @@ class AdaptiveQueryExecSuite
     withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
       val df = spark.range(10).select(sum('id))
       assert(df.queryExecution.executedPlan.isInstanceOf[AdaptiveSparkPlanExec])
-      SparkSession.setActiveSession(null)
+      SparkSession.setActiveSessionInternal(null)
       checkAnswer(df, Seq(Row(45)))
-      SparkSession.setActiveSession(spark) // recover the active session.
+      SparkSession.setActiveSessionInternal(spark) // recover the active session.
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala
index 7bca225dfdd8f..20e488960fa59 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala
@@ -121,7 +121,7 @@ class StateStoreCoordinatorSuite extends SparkFunSuite with SharedSparkContext {
     var coordRef: StateStoreCoordinatorRef = null
     try {
       val spark = SparkSession.builder().sparkContext(sc).getOrCreate()
-      SparkSession.setActiveSession(spark)
+      SparkSession.setActiveSessionInternal(spark)
       import spark.implicits._
       coordRef = spark.streams.stateStoreCoordinator
       implicit val sqlContext = spark.sqlContext
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
index 488879938339d..5dbc6723a3ff9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
@@ -567,7 +567,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
     try {
       val checkpointLocation = Utils.createTempDir().getAbsoluteFile
       val spark = SparkSession.builder().master("local[2]").getOrCreate()
-      SparkSession.setActiveSession(spark)
+      SparkSession.setActiveSessionInternal(spark)
       implicit val sqlContext = spark.sqlContext
       spark.conf.set(SQLConf.SHUFFLE_PARTITIONS.key, "1")
       import spark.implicits._
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala
index ce1eabeb932fb..5df47e1d5faa0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.types._
 class SymmetricHashJoinStateManagerSuite extends StreamTest with BeforeAndAfter {
 
   before {
-    SparkSession.setActiveSession(spark) // set this before force initializing 'joinExec'
+    SparkSession.setActiveSessionInternal(spark) // set this before force initializing 'joinExec'
     spark.streams.stateStoreCoordinator // initialize the lazy coordinator
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
index b182727408bbf..b235bf7c3180a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
@@ -45,7 +45,7 @@ import org.apache.spark.util.Utils
 class StreamingInnerJoinSuite extends StreamTest with StateStoreMetricsTest with BeforeAndAfter {
 
   before {
-    SparkSession.setActiveSession(spark)  // set this before force initializing 'joinExec'
+    SparkSession.setActiveSessionInternal(spark)  // set this before force initializing 'joinExec'
     spark.streams.stateStoreCoordinator   // initialize the lazy coordinator
   }
 
@@ -492,7 +492,7 @@ class StreamingOuterJoinSuite extends StreamTest with StateStoreMetricsTest with
   import org.apache.spark.sql.functions._
 
   before {
-    SparkSession.setActiveSession(spark) // set this before force initializing 'joinExec'
+    SparkSession.setActiveSessionInternal(spark) // set this before force initializing 'joinExec'
     spark.streams.stateStoreCoordinator // initialize the lazy coordinator
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
index 7be15e9d87004..d15dc8c6bccd5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -242,7 +242,7 @@ private[sql] trait SQLTestUtilsBase
   }
 
   protected override def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = {
-    SparkSession.setActiveSession(spark)
+    SparkSession.setActiveSessionInternal(spark)
     super.withSQLConf(pairs: _*)(f)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
index cfc92a780308d..a38b360b79c05 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
@@ -144,7 +144,7 @@ trait SharedSparkSessionBase
           }
         }
       } finally {
-        SparkSession.clearActiveSession()
+        SparkSession.clearActiveSessionInternal()
         SparkSession.clearDefaultSession()
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
index ac06e1f41bfb3..a477eed4478e8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
@@ -35,7 +35,7 @@ private[spark] class TestSparkSession(sc: SparkContext) extends SparkSession(sc)
   }
 
   SparkSession.setDefaultSession(this)
-  SparkSession.setActiveSession(this)
+  SparkSession.setActiveSessionInternal(this)
 
   @transient
   override lazy val sessionState: SessionState = {
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkOperation.scala
index bbfc1b83379aa..be9c024f9ca64 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkOperation.scala
@@ -65,7 +65,7 @@ private[hive] trait SparkOperation extends Operation with Logging {
 
     try {
       // Set active SparkSession
-      SparkSession.setActiveSession(sqlContext.sparkSession)
+      SparkSession.setActiveSessionInternal(sqlContext.sparkSession)
 
       // Set scheduler pool
       sqlContext.sparkSession.conf.getOption(SQLConf.THRIFTSERVER_POOL.key) match {
@@ -81,8 +81,8 @@ private[hive] trait SparkOperation extends Operation with Logging {
       sqlContext.sparkContext.setLocalProperties(originalProps)
 
       originalSession match {
-        case Some(session) => SparkSession.setActiveSession(session)
-        case None => SparkSession.clearActiveSession()
+        case Some(session) => SparkSession.setActiveSessionInternal(session)
+        case None => SparkSession.clearActiveSessionInternal()
       }
     }
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
index accfcb8d9deff..0c601ef798dcc 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -195,7 +195,7 @@ private[hive] class TestHiveSparkSession(
   }
 
   SparkSession.setDefaultSession(this)
-  SparkSession.setActiveSession(this)
+  SparkSession.setActiveSessionInternal(this)
 
   { // set the metastore temporary configuration
     val metastoreTempConf = HiveUtils.newTemporaryConfiguration(useInMemoryDerby = false) ++ Map(

From b69e0651fee0b8f3ae97ffdca713fb6578b9a0da Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Fri, 16 Oct 2020 11:11:57 +0000
Subject: [PATCH 0256/1009] [SPARK-33126][SQL] Simplify offset window
 function(Remove direction field)

### What changes were proposed in this pull request?
The current `Lead`/`Lag` extends `OffsetWindowFunction`. `OffsetWindowFunction` contains field `direction` and use `direction` to calculates the `boundary`.

We can use single literal expression unify the two properties.
For example:
3 means `direction` is Asc and `boundary` is 3.
-3 means `direction` is Desc and `boundary` is -3.

### Why are the changes needed?
Improve the current implement of `Lead`/`Lag`.

### Does this PR introduce _any_ user-facing change?
 'No'.

### How was this patch tested?
Jenkins test.

Closes #30023 from beliefer/SPARK-33126.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: beliefer <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/windowExpressions.scala       | 33 +++++++------------
 1 file changed, 12 insertions(+), 21 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index 0e15ff2904306..bc0b4ac018f9e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -348,16 +348,13 @@ abstract class OffsetWindowFunction
 
   /**
    * (Foldable) expression that contains the number of rows between the current row and the row
-   * where the input expression is evaluated.
+   * where the input expression is evaluated. If `offset` is a positive integer, it means that
+   * the direction of the `offset` is from front to back. If it is a negative integer, the direction
+   * of the `offset` is from back to front. If it is zero, it means that the offset is ignored and
+   * use current row.
    */
   val offset: Expression
 
-  /**
-   * Direction of the number of rows between the current row and the row where the input expression
-   * is evaluated.
-   */
-  val direction: SortDirection
-
   override def children: Seq[Expression] = Seq(input, offset, default)
 
   /*
@@ -373,16 +370,7 @@ abstract class OffsetWindowFunction
 
   override def nullable: Boolean = default == null || default.nullable || input.nullable
 
-  override lazy val frame: WindowFrame = {
-    val boundary = direction match {
-      case Ascending => offset
-      case Descending => UnaryMinus(offset) match {
-          case e: Expression if e.foldable => Literal.create(e.eval(EmptyRow), e.dataType)
-          case o => o
-      }
-    }
-    SpecifiedWindowFrame(RowFrame, boundary, boundary)
-  }
+  override lazy val frame: WindowFrame = SpecifiedWindowFrame(RowFrame, offset, offset)
 
   override def checkInputDataTypes(): TypeCheckResult = {
     val check = super.checkInputDataTypes()
@@ -444,8 +432,6 @@ case class Lead(input: Expression, offset: Expression, default: Expression)
   def this(input: Expression) = this(input, Literal(1))
 
   def this() = this(Literal(null))
-
-  override val direction = Ascending
 }
 
 /**
@@ -480,7 +466,7 @@ case class Lead(input: Expression, offset: Expression, default: Expression)
   since = "2.0.0",
   group = "window_funcs")
 // scalastyle:on line.size.limit line.contains.tab
-case class Lag(input: Expression, offset: Expression, default: Expression)
+case class Lag(input: Expression, inputOffset: Expression, default: Expression)
     extends OffsetWindowFunction {
 
   def this(input: Expression, offset: Expression) = this(input, offset, Literal(null))
@@ -489,7 +475,12 @@ case class Lag(input: Expression, offset: Expression, default: Expression)
 
   def this() = this(Literal(null))
 
-  override val direction = Descending
+  override def children: Seq[Expression] = Seq(input, inputOffset, default)
+
+  override val offset: Expression = UnaryMinus(inputOffset) match {
+    case e: Expression if e.foldable => Literal.create(e.eval(EmptyRow), e.dataType)
+    case o => o
+  }
 }
 
 abstract class AggregateWindowFunction extends DeclarativeAggregate with WindowFunction {

From 3ae1520185e2d96d1bdbd08c989f0d48ad3ba578 Mon Sep 17 00:00:00 2001
From: ulysses <youxiduo@weidian.com>
Date: Fri, 16 Oct 2020 11:26:27 +0000
Subject: [PATCH 0257/1009] [SPARK-33131][SQL] Fix grouping sets with having
 clause can not resolve qualified col name

### What changes were proposed in this pull request?

Correct the resolution of having clause.

### Why are the changes needed?

Grouping sets construct new aggregate lost the qualified name of grouping expression. Here is a example:
```
-- Works resolved by `ResolveReferences`
select c1 from values (1) as t1(c1) group by grouping sets(t1.c1) having c1 = 1

-- Works because of the extra expression c1
select c1 as c2 from values (1) as t1(c1) group by grouping sets(t1.c1) having t1.c1 = 1

-- Failed
select c1 from values (1) as t1(c1) group by grouping sets(t1.c1) having t1.c1 = 1
```

It wroks with `Aggregate` without grouping sets through `ResolveReferences`, but Grouping sets not works since the exprId has been changed.

### Does this PR introduce _any_ user-facing change?

Yes, bug fix.

### How was this patch tested?

add test.

Closes #30029 from ulysses-you/SPARK-33131.

Authored-by: ulysses <youxiduo@weidian.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  2 +-
 .../resources/sql-tests/inputs/having.sql     |  6 ++++
 .../sql-tests/results/having.sql.out          | 32 +++++++++++++++++++
 3 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 337cf1c0bdc50..0ba150ec1efb4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -470,7 +470,7 @@ class Analyzer(
      */
     private def constructGroupByAlias(groupByExprs: Seq[Expression]): Seq[Alias] = {
       groupByExprs.map {
-        case e: NamedExpression => Alias(e, e.name)()
+        case e: NamedExpression => Alias(e, e.name)(qualifier = e.qualifier)
         case other => Alias(other, other.toString)()
       }
     }
diff --git a/sql/core/src/test/resources/sql-tests/inputs/having.sql b/sql/core/src/test/resources/sql-tests/inputs/having.sql
index 3b75be19b5677..2799b1a94d085 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/having.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/having.sql
@@ -24,3 +24,9 @@ SELECT SUM(a) AS b, CAST('2020-01-01' AS DATE) AS fake FROM VALUES (1, 10), (2,
 SELECT SUM(a) AS b FROM VALUES (1, 10), (2, 20) AS T(a, b) GROUP BY GROUPING SETS ((b), (a, b)) HAVING b > 10;
 SELECT SUM(a) AS b FROM VALUES (1, 10), (2, 20) AS T(a, b) GROUP BY CUBE(a, b) HAVING b > 10;
 SELECT SUM(a) AS b FROM VALUES (1, 10), (2, 20) AS T(a, b) GROUP BY ROLLUP(a, b) HAVING b > 10;
+
+-- SPARK-33131: Grouping sets with having clause can not resolve qualified col name.
+SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY GROUPING SETS(t.c1) HAVING t.c1 = 1;
+SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY CUBE(t.c1) HAVING t.c1 = 1;
+SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY ROLLUP(t.c1) HAVING t.c1 = 1;
+SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY t.c1 HAVING t.c1 = 1;
diff --git a/sql/core/src/test/resources/sql-tests/results/having.sql.out b/sql/core/src/test/resources/sql-tests/results/having.sql.out
index 1b3ac7865159f..6508143e6f9fe 100644
--- a/sql/core/src/test/resources/sql-tests/results/having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/having.sql.out
@@ -81,3 +81,35 @@ SELECT SUM(a) AS b FROM VALUES (1, 10), (2, 20) AS T(a, b) GROUP BY ROLLUP(a, b)
 struct<b:bigint>
 -- !query output
 2
+
+
+-- !query
+SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY GROUPING SETS(t.c1) HAVING t.c1 = 1
+-- !query schema
+struct<c1:int>
+-- !query output
+1
+
+
+-- !query
+SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY CUBE(t.c1) HAVING t.c1 = 1
+-- !query schema
+struct<c1:int>
+-- !query output
+1
+
+
+-- !query
+SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY ROLLUP(t.c1) HAVING t.c1 = 1
+-- !query schema
+struct<c1:int>
+-- !query output
+1
+
+
+-- !query
+SELECT c1 FROM VALUES (1, 2) as t(c1, c2) GROUP BY t.c1 HAVING t.c1 = 1
+-- !query schema
+struct<c1:int>
+-- !query output
+1

From e029e891abeb37f383e4d5237edf693c8ad53bed Mon Sep 17 00:00:00 2001
From: neko <echohlne@gmail.com>
Date: Fri, 16 Oct 2020 23:13:22 +0800
Subject: [PATCH 0258/1009] [SPARK-33145][WEBUI] Fix when `Succeeded Jobs` has
 many child url elements,they will extend over the edge of the page

### What changes were proposed in this pull request?
In Execution web page, when `Succeeded Job`(or Failed Jobs) has many child url elements,they will extend over the edge of the page.

### Why are the changes needed?
To make the page more friendly.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

Munual test result shows  as below:

![fixed](https://user-images.githubusercontent.com/52202080/95977319-50734600-0e4b-11eb-93c0-b8deb565bcd8.png)

Closes #30035 from akiyamaneko/sql_execution_job_overflow.

Authored-by: neko <echohlne@gmail.com>
Signed-off-by: Gengliang Wang <gengliang.wang@databricks.com>
---
 .../apache/spark/sql/execution/ui/static/spark-sql-viz.css   | 5 +++++
 .../org/apache/spark/sql/execution/ui/ExecutionPage.scala    | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.css b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.css
index 9a32b79cd070f..dbdbf9fbf57b1 100644
--- a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.css
+++ b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.css
@@ -52,3 +52,8 @@
 .tooltip-inner {
   word-wrap:break-word;
 }
+
+/* Breaks the long job url list when showing Details for Query in SQL */
+.job-url {
+  word-wrap: break-word;
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala
index 76bc7faf18d01..b15c70a7eba75 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala
@@ -45,7 +45,7 @@ class ExecutionPage(parent: SQLTab) extends WebUIPage("execution") with Logging
           if (jobStatus == status) Some(jobId) else None
         }
         if (jobs.nonEmpty) {
-          <li>
+          <li class="job-url">
             <strong>{label} </strong>
             {jobs.toSeq.sorted.map { jobId =>
               <a href={jobURL(request, jobId.intValue())}>{jobId.toString}</a><span>&nbsp;</span>

From 250730170570140788819765ee7519bd823c173d Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Fri, 16 Oct 2020 09:37:54 -0700
Subject: [PATCH 0259/1009] [SPARK-33159][SQL] Use hive-service-rpc as
 dependency instead of inlining the generated code

### What changes were proposed in this pull request?

Hive's `hive-service-rpc` module started since hive-2.1.0 and it contains only the thrift IDL file and the code generated by it.

Removing the inlined code will help maintain and upgrade builtin hive versions

### Why are the changes needed?

to simply the code.

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

passing CI

Closes #30055 from yaooqinn/SPARK-33159.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/deps/spark-deps-hadoop-2.7-hive-2.3       |     1 +
 dev/deps/spark-deps-hadoop-3.2-hive-2.3       |     1 +
 pom.xml                                       |    19 +-
 sql/hive-thriftserver/if/TCLIService.thrift   |  1269 --
 sql/hive-thriftserver/pom.xml                 |    24 +-
 .../service/rpc/thrift/TArrayTypeEntry.java   |   387 -
 .../service/rpc/thrift/TBinaryColumn.java     |   548 -
 .../hive/service/rpc/thrift/TBoolColumn.java  |   548 -
 .../hive/service/rpc/thrift/TBoolValue.java   |   390 -
 .../hive/service/rpc/thrift/TByteColumn.java  |   548 -
 .../hive/service/rpc/thrift/TByteValue.java   |   390 -
 .../hive/service/rpc/thrift/TCLIService.java  | 18138 ----------------
 .../rpc/thrift/TCLIServiceConstants.java      |   106 -
 .../rpc/thrift/TCancelDelegationTokenReq.java |   495 -
 .../thrift/TCancelDelegationTokenResp.java    |   394 -
 .../rpc/thrift/TCancelOperationReq.java       |   394 -
 .../rpc/thrift/TCancelOperationResp.java      |   394 -
 .../rpc/thrift/TCloseOperationReq.java        |   394 -
 .../rpc/thrift/TCloseOperationResp.java       |   394 -
 .../service/rpc/thrift/TCloseSessionReq.java  |   394 -
 .../service/rpc/thrift/TCloseSessionResp.java |   394 -
 .../hive/service/rpc/thrift/TColumn.java      |   736 -
 .../hive/service/rpc/thrift/TColumnDesc.java  |   704 -
 .../hive/service/rpc/thrift/TColumnValue.java |   675 -
 .../service/rpc/thrift/TDoubleColumn.java     |   548 -
 .../hive/service/rpc/thrift/TDoubleValue.java |   390 -
 .../rpc/thrift/TExecuteStatementReq.java      |   863 -
 .../rpc/thrift/TExecuteStatementResp.java     |   509 -
 .../service/rpc/thrift/TFetchOrientation.java |    57 -
 .../service/rpc/thrift/TFetchResultsReq.java  |   714 -
 .../service/rpc/thrift/TFetchResultsResp.java |   612 -
 .../service/rpc/thrift/TGetCatalogsReq.java   |   394 -
 .../service/rpc/thrift/TGetCatalogsResp.java  |   509 -
 .../service/rpc/thrift/TGetColumnsReq.java    |   822 -
 .../service/rpc/thrift/TGetColumnsResp.java   |   509 -
 .../rpc/thrift/TGetCrossReferenceReq.java     |  1034 -
 .../rpc/thrift/TGetCrossReferenceResp.java    |   509 -
 .../rpc/thrift/TGetDelegationTokenReq.java    |   596 -
 .../rpc/thrift/TGetDelegationTokenResp.java   |   504 -
 .../service/rpc/thrift/TGetFunctionsReq.java  |   711 -
 .../service/rpc/thrift/TGetFunctionsResp.java |   509 -
 .../hive/service/rpc/thrift/TGetInfoReq.java  |   507 -
 .../hive/service/rpc/thrift/TGetInfoResp.java |   497 -
 .../hive/service/rpc/thrift/TGetInfoType.java |   180 -
 .../service/rpc/thrift/TGetInfoValue.java     |   597 -
 .../rpc/thrift/TGetOperationStatusReq.java    |   501 -
 .../rpc/thrift/TGetOperationStatusResp.java   |  1342 --
 .../rpc/thrift/TGetPrimaryKeysReq.java        |   716 -
 .../rpc/thrift/TGetPrimaryKeysResp.java       |   509 -
 .../rpc/thrift/TGetResultSetMetadataReq.java  |   394 -
 .../rpc/thrift/TGetResultSetMetadataResp.java |   509 -
 .../service/rpc/thrift/TGetSchemasReq.java    |   610 -
 .../service/rpc/thrift/TGetSchemasResp.java   |   509 -
 .../service/rpc/thrift/TGetTableTypesReq.java |   394 -
 .../rpc/thrift/TGetTableTypesResp.java        |   509 -
 .../service/rpc/thrift/TGetTablesReq.java     |   871 -
 .../service/rpc/thrift/TGetTablesResp.java    |   509 -
 .../service/rpc/thrift/TGetTypeInfoReq.java   |   394 -
 .../service/rpc/thrift/TGetTypeInfoResp.java  |   509 -
 .../service/rpc/thrift/THandleIdentifier.java |   508 -
 .../hive/service/rpc/thrift/TI16Column.java   |   548 -
 .../hive/service/rpc/thrift/TI16Value.java    |   390 -
 .../hive/service/rpc/thrift/TI32Column.java   |   548 -
 .../hive/service/rpc/thrift/TI32Value.java    |   390 -
 .../hive/service/rpc/thrift/TI64Column.java   |   548 -
 .../hive/service/rpc/thrift/TI64Value.java    |   390 -
 .../rpc/thrift/TJobExecutionStatus.java       |    48 -
 .../service/rpc/thrift/TMapTypeEntry.java     |   482 -
 .../service/rpc/thrift/TOpenSessionReq.java   |   778 -
 .../service/rpc/thrift/TOpenSessionResp.java  |   783 -
 .../service/rpc/thrift/TOperationHandle.java  |   709 -
 .../service/rpc/thrift/TOperationState.java   |    66 -
 .../service/rpc/thrift/TOperationType.java    |    66 -
 .../rpc/thrift/TPrimitiveTypeEntry.java       |   516 -
 .../rpc/thrift/TProgressUpdateResp.java       |  1033 -
 .../service/rpc/thrift/TProtocolVersion.java  |    69 -
 .../rpc/thrift/TRenewDelegationTokenReq.java  |   495 -
 .../rpc/thrift/TRenewDelegationTokenResp.java |   394 -
 .../apache/hive/service/rpc/thrift/TRow.java  |   443 -
 .../hive/service/rpc/thrift/TRowSet.java      |   920 -
 .../service/rpc/thrift/TSessionHandle.java    |   394 -
 .../hive/service/rpc/thrift/TStatus.java      |   875 -
 .../hive/service/rpc/thrift/TStatusCode.java  |    54 -
 .../service/rpc/thrift/TStringColumn.java     |   548 -
 .../hive/service/rpc/thrift/TStringValue.java |   393 -
 .../service/rpc/thrift/TStructTypeEntry.java  |   452 -
 .../hive/service/rpc/thrift/TTableSchema.java |   443 -
 .../hive/service/rpc/thrift/TTypeDesc.java    |   443 -
 .../hive/service/rpc/thrift/TTypeEntry.java   |   614 -
 .../hive/service/rpc/thrift/TTypeId.java      |   105 -
 .../rpc/thrift/TTypeQualifierValue.java       |   365 -
 .../service/rpc/thrift/TTypeQualifiers.java   |   454 -
 .../service/rpc/thrift/TUnionTypeEntry.java   |   452 -
 .../rpc/thrift/TUserDefinedTypeEntry.java     |   389 -
 94 files changed, 17 insertions(+), 63672 deletions(-)
 delete mode 100644 sql/hive-thriftserver/if/TCLIService.thrift
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TArrayTypeEntry.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBinaryColumn.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolColumn.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolValue.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TByteColumn.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TByteValue.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIService.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIServiceConstants.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenReq.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenResp.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationReq.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationResp.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationReq.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationResp.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionReq.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionResp.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumn.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnDesc.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnValue.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleColumn.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleValue.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementReq.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementResp.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchOrientation.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsReq.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsResp.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsReq.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsResp.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsReq.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsResp.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceReq.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceResp.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenReq.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenResp.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsReq.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsResp.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoReq.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoResp.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoType.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoValue.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusReq.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusResp.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysReq.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysResp.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataReq.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataResp.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasReq.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasResp.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesReq.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesResp.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesReq.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesResp.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoReq.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoResp.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/THandleIdentifier.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Column.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Value.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Column.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Value.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Column.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Value.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TJobExecutionStatus.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TMapTypeEntry.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionReq.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionResp.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationHandle.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationState.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationType.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TPrimitiveTypeEntry.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TProgressUpdateResp.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TProtocolVersion.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenReq.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenResp.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRow.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRowSet.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TSessionHandle.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStatus.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStatusCode.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStringColumn.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStringValue.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStructTypeEntry.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTableSchema.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeDesc.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeEntry.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeId.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifierValue.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifiers.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TUnionTypeEntry.java
 delete mode 100644 sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TUserDefinedTypeEntry.java

diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index f049ad1f5bb74..c389c885cb0e5 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -88,6 +88,7 @@ hive-jdbc/2.3.7//hive-jdbc-2.3.7.jar
 hive-llap-common/2.3.7//hive-llap-common-2.3.7.jar
 hive-metastore/2.3.7//hive-metastore-2.3.7.jar
 hive-serde/2.3.7//hive-serde-2.3.7.jar
+hive-service-rpc/2.3.7//hive-service-rpc-2.3.7.jar
 hive-shims-0.23/2.3.7//hive-shims-0.23-2.3.7.jar
 hive-shims-common/2.3.7//hive-shims-common-2.3.7.jar
 hive-shims-scheduler/2.3.7//hive-shims-scheduler-2.3.7.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index a4dbeb112473a..ed0db42828301 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -87,6 +87,7 @@ hive-jdbc/2.3.7//hive-jdbc-2.3.7.jar
 hive-llap-common/2.3.7//hive-llap-common-2.3.7.jar
 hive-metastore/2.3.7//hive-metastore-2.3.7.jar
 hive-serde/2.3.7//hive-serde-2.3.7.jar
+hive-service-rpc/2.3.7//hive-service-rpc-2.3.7.jar
 hive-shims-0.23/2.3.7//hive-shims-0.23-2.3.7.jar
 hive-shims-common/2.3.7//hive-shims-common-2.3.7.jar
 hive-shims-scheduler/2.3.7//hive-shims-scheduler-2.3.7.jar
diff --git a/pom.xml b/pom.xml
index 75b6776cbe470..96406d9bcef13 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1533,7 +1533,6 @@
             <artifactId>hive-service</artifactId>
           </exclusion>
           <exclusion>
-            <!-- All classes are covered by spark's hive-thriftserver module -->
             <groupId>${hive.group}</groupId>
             <artifactId>hive-service-rpc</artifactId>
           </exclusion>
@@ -1593,11 +1592,6 @@
             <groupId>${hive.group}</groupId>
             <artifactId>hive-service</artifactId>
           </exclusion>
-          <exclusion>
-            <!-- All classes are covered by spark's hive-thriftserver module -->
-            <groupId>${hive.group}</groupId>
-            <artifactId>hive-service-rpc</artifactId>
-          </exclusion>
           <exclusion>
             <groupId>${hive.group}</groupId>
             <artifactId>hive-shims</artifactId>
@@ -1852,7 +1846,6 @@
             <artifactId>hive-service</artifactId>
           </exclusion>
           <exclusion>
-            <!-- All classes are covered by spark's hive-thriftserver module -->
             <groupId>${hive.group}</groupId>
             <artifactId>hive-service-rpc</artifactId>
           </exclusion>
@@ -2007,7 +2000,6 @@
           </exclusion>
           <!-- Begin of Hive 2.3 exclusion -->
           <exclusion>
-            <!-- All classes are covered by spark's hive-thriftserver module -->
             <groupId>${hive.group}</groupId>
             <artifactId>hive-service-rpc</artifactId>
           </exclusion>
@@ -2029,6 +2021,17 @@
         </exclusions>
       </dependency>
 
+      <dependency>
+        <groupId>${hive.group}</groupId>
+        <artifactId>hive-service-rpc</artifactId>
+        <version>${hive.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>*</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
       <dependency>
         <groupId>net.sf.jpam</groupId>
         <artifactId>jpam</artifactId>
diff --git a/sql/hive-thriftserver/if/TCLIService.thrift b/sql/hive-thriftserver/if/TCLIService.thrift
deleted file mode 100644
index 9026cd25df5b3..0000000000000
--- a/sql/hive-thriftserver/if/TCLIService.thrift
+++ /dev/null
@@ -1,1269 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Coding Conventions for this file:
-//
-// Structs/Enums/Unions
-// * Struct, Enum, and Union names begin with a "T",
-//   and use a capital letter for each new word, with no underscores.
-// * All fields should be declared as either optional or required.
-//
-// Functions
-// * Function names start with a capital letter and have a capital letter for
-//   each new word, with no underscores.
-// * Each function should take exactly one parameter, named TFunctionNameReq,
-//   and should return either void or TFunctionNameResp. This convention allows
-//   incremental updates.
-//
-// Services
-// * Service names begin with the letter "T", use a capital letter for each
-//   new word (with no underscores), and end with the word "Service".
-
-namespace java org.apache.hive.service.rpc.thrift
-namespace cpp apache.hive.service.rpc.thrift
-
-// List of protocol versions. A new token should be
-// added to the end of this list every time a change is made.
-enum TProtocolVersion {
-  HIVE_CLI_SERVICE_PROTOCOL_V1,
-
-  // V2 adds support for asynchronous execution
-  HIVE_CLI_SERVICE_PROTOCOL_V2
-
-  // V3 add varchar type, primitive type qualifiers
-  HIVE_CLI_SERVICE_PROTOCOL_V3
-
-  // V4 add decimal precision/scale, char type
-  HIVE_CLI_SERVICE_PROTOCOL_V4
-
-  // V5 adds error details when GetOperationStatus returns in error state
-  HIVE_CLI_SERVICE_PROTOCOL_V5
-
-  // V6 uses binary type for binary payload (was string) and uses columnar result set
-  HIVE_CLI_SERVICE_PROTOCOL_V6
-
-  // V7 adds support for delegation token based connection
-  HIVE_CLI_SERVICE_PROTOCOL_V7
-
-  // V8 adds support for interval types
-  HIVE_CLI_SERVICE_PROTOCOL_V8
-
-  // V9 adds support for serializing ResultSets in SerDe
-  HIVE_CLI_SERVICE_PROTOCOL_V9
-
-  // V10 adds support for in place updates via GetOperationStatus
-  HIVE_CLI_SERVICE_PROTOCOL_V10
-}
-
-enum TTypeId {
-  BOOLEAN_TYPE,
-  TINYINT_TYPE,
-  SMALLINT_TYPE,
-  INT_TYPE,
-  BIGINT_TYPE,
-  FLOAT_TYPE,
-  DOUBLE_TYPE,
-  STRING_TYPE,
-  TIMESTAMP_TYPE,
-  BINARY_TYPE,
-  ARRAY_TYPE,
-  MAP_TYPE,
-  STRUCT_TYPE,
-  UNION_TYPE,
-  USER_DEFINED_TYPE,
-  DECIMAL_TYPE,
-  NULL_TYPE,
-  DATE_TYPE,
-  VARCHAR_TYPE,
-  CHAR_TYPE,
-  INTERVAL_YEAR_MONTH_TYPE,
-  INTERVAL_DAY_TIME_TYPE
-}
-
-const set<TTypeId> PRIMITIVE_TYPES = [
-  TTypeId.BOOLEAN_TYPE,
-  TTypeId.TINYINT_TYPE,
-  TTypeId.SMALLINT_TYPE,
-  TTypeId.INT_TYPE,
-  TTypeId.BIGINT_TYPE,
-  TTypeId.FLOAT_TYPE,
-  TTypeId.DOUBLE_TYPE,
-  TTypeId.STRING_TYPE,
-  TTypeId.TIMESTAMP_TYPE,
-  TTypeId.BINARY_TYPE,
-  TTypeId.DECIMAL_TYPE,
-  TTypeId.NULL_TYPE,
-  TTypeId.DATE_TYPE,
-  TTypeId.VARCHAR_TYPE,
-  TTypeId.CHAR_TYPE,
-  TTypeId.INTERVAL_YEAR_MONTH_TYPE,
-  TTypeId.INTERVAL_DAY_TIME_TYPE
-]
-
-const set<TTypeId> COMPLEX_TYPES = [
-  TTypeId.ARRAY_TYPE
-  TTypeId.MAP_TYPE
-  TTypeId.STRUCT_TYPE
-  TTypeId.UNION_TYPE
-  TTypeId.USER_DEFINED_TYPE
-]
-
-const set<TTypeId> COLLECTION_TYPES = [
-  TTypeId.ARRAY_TYPE
-  TTypeId.MAP_TYPE
-]
-
-const map<TTypeId,string> TYPE_NAMES = {
-  TTypeId.BOOLEAN_TYPE: "BOOLEAN",
-  TTypeId.TINYINT_TYPE: "TINYINT",
-  TTypeId.SMALLINT_TYPE: "SMALLINT",
-  TTypeId.INT_TYPE: "INT",
-  TTypeId.BIGINT_TYPE: "BIGINT",
-  TTypeId.FLOAT_TYPE: "FLOAT",
-  TTypeId.DOUBLE_TYPE: "DOUBLE",
-  TTypeId.STRING_TYPE: "STRING",
-  TTypeId.TIMESTAMP_TYPE: "TIMESTAMP",
-  TTypeId.BINARY_TYPE: "BINARY",
-  TTypeId.ARRAY_TYPE: "ARRAY",
-  TTypeId.MAP_TYPE: "MAP",
-  TTypeId.STRUCT_TYPE: "STRUCT",
-  TTypeId.UNION_TYPE: "UNIONTYPE",
-  TTypeId.DECIMAL_TYPE: "DECIMAL",
-  TTypeId.NULL_TYPE: "NULL"
-  TTypeId.DATE_TYPE: "DATE"
-  TTypeId.VARCHAR_TYPE: "VARCHAR"
-  TTypeId.CHAR_TYPE: "CHAR"
-  TTypeId.INTERVAL_YEAR_MONTH_TYPE: "INTERVAL_YEAR_MONTH"
-  TTypeId.INTERVAL_DAY_TIME_TYPE: "INTERVAL_DAY_TIME"
-}
-
-// Thrift does not support recursively defined types or forward declarations,
-// which makes it difficult to represent Hive's nested types.
-// To get around these limitations TTypeDesc employs a type list that maps
-// integer "pointers" to TTypeEntry objects. The following examples show
-// how different types are represented using this scheme:
-//
-// "INT":
-// TTypeDesc {
-//   types = [
-//     TTypeEntry.primitive_entry {
-//       type = INT_TYPE
-//     }
-//   ]
-// }
-//
-// "ARRAY<INT>":
-// TTypeDesc {
-//   types = [
-//     TTypeEntry.array_entry {
-//       object_type_ptr = 1
-//     },
-//     TTypeEntry.primitive_entry {
-//       type = INT_TYPE
-//     }
-//   ]
-// }
-//
-// "MAP<INT,STRING>":
-// TTypeDesc {
-//   types = [
-//     TTypeEntry.map_entry {
-//       key_type_ptr = 1
-//       value_type_ptr = 2
-//     },
-//     TTypeEntry.primitive_entry {
-//       type = INT_TYPE
-//     },
-//     TTypeEntry.primitive_entry {
-//       type = STRING_TYPE
-//     }
-//   ]
-// }
-
-typedef i32 TTypeEntryPtr
-
-// Valid TTypeQualifiers key names
-const string CHARACTER_MAXIMUM_LENGTH = "characterMaximumLength"
-
-// Type qualifier key name for decimal
-const string PRECISION = "precision"
-const string SCALE = "scale"
-
-union TTypeQualifierValue {
-  1: optional i32 i32Value
-  2: optional string stringValue
-}
-
-// Type qualifiers for primitive type.
-struct TTypeQualifiers {
-  1: required map <string, TTypeQualifierValue> qualifiers
-}
-
-// Type entry for a primitive type.
-struct TPrimitiveTypeEntry {
-  // The primitive type token. This must satisfy the condition
-  // that type is in the PRIMITIVE_TYPES set.
-  1: required TTypeId type
-  2: optional TTypeQualifiers typeQualifiers
-}
-
-// Type entry for an ARRAY type.
-struct TArrayTypeEntry {
-  1: required TTypeEntryPtr objectTypePtr
-}
-
-// Type entry for a MAP type.
-struct TMapTypeEntry {
-  1: required TTypeEntryPtr keyTypePtr
-  2: required TTypeEntryPtr valueTypePtr
-}
-
-// Type entry for a STRUCT type.
-struct TStructTypeEntry {
-  1: required map<string, TTypeEntryPtr> nameToTypePtr
-}
-
-// Type entry for a UNIONTYPE type.
-struct TUnionTypeEntry {
-  1: required map<string, TTypeEntryPtr> nameToTypePtr
-}
-
-struct TUserDefinedTypeEntry {
-  // The fully qualified name of the class implementing this type.
-  1: required string typeClassName
-}
-
-// We use a union here since Thrift does not support inheritance.
-union TTypeEntry {
-  1: TPrimitiveTypeEntry primitiveEntry
-  2: TArrayTypeEntry arrayEntry
-  3: TMapTypeEntry mapEntry
-  4: TStructTypeEntry structEntry
-  5: TUnionTypeEntry unionEntry
-  6: TUserDefinedTypeEntry userDefinedTypeEntry
-}
-
-// Type descriptor for columns.
-struct TTypeDesc {
-  // The "top" type is always the first element of the list.
-  // If the top type is an ARRAY, MAP, STRUCT, or UNIONTYPE
-  // type, then subsequent elements represent nested types.
-  1: required list<TTypeEntry> types
-}
-
-// A result set column descriptor.
-struct TColumnDesc {
-  // The name of the column
-  1: required string columnName
-
-  // The type descriptor for this column
-  2: required TTypeDesc typeDesc
-
-  // The ordinal position of this column in the schema
-  3: required i32 position
-
-  4: optional string comment
-}
-
-// Metadata used to describe the schema (column names, types, comments)
-// of result sets.
-struct TTableSchema {
-  1: required list<TColumnDesc> columns
-}
-
-// A Boolean column value.
-struct TBoolValue {
-  // NULL if value is unset.
-  1: optional bool value
-}
-
-// A Byte column value.
-struct TByteValue {
-  // NULL if value is unset.
-  1: optional byte value
-}
-
-// A signed, 16 bit column value.
-struct TI16Value {
-  // NULL if value is unset
-  1: optional i16 value
-}
-
-// A signed, 32 bit column value
-struct TI32Value {
-  // NULL if value is unset
-  1: optional i32 value
-}
-
-// A signed 64 bit column value
-struct TI64Value {
-  // NULL if value is unset
-  1: optional i64 value
-}
-
-// A floating point 64 bit column value
-struct TDoubleValue {
-  // NULL if value is unset
-  1: optional double value
-}
-
-struct TStringValue {
-  // NULL if value is unset
-  1: optional string value
-}
-
-// A single column value in a result set.
-// Note that Hive's type system is richer than Thrift's,
-// so in some cases we have to map multiple Hive types
-// to the same Thrift type. On the client-side this is
-// disambiguated by looking at the Schema of the
-// result set.
-union TColumnValue {
-  1: TBoolValue   boolVal      // BOOLEAN
-  2: TByteValue   byteVal      // TINYINT
-  3: TI16Value    i16Val       // SMALLINT
-  4: TI32Value    i32Val       // INT
-  5: TI64Value    i64Val       // BIGINT, TIMESTAMP
-  6: TDoubleValue doubleVal    // FLOAT, DOUBLE
-  7: TStringValue stringVal    // STRING, LIST, MAP, STRUCT, UNIONTYPE, BINARY, DECIMAL, NULL, INTERVAL_YEAR_MONTH, INTERVAL_DAY_TIME
-}
-
-// Represents a row in a rowset.
-struct TRow {
-  1: required list<TColumnValue> colVals
-}
-
-struct TBoolColumn {
-  1: required list<bool> values
-  2: required binary nulls
-}
-
-struct TByteColumn {
-  1: required list<byte> values
-  2: required binary nulls
-}
-
-struct TI16Column {
-  1: required list<i16> values
-  2: required binary nulls
-}
-
-struct TI32Column {
-  1: required list<i32> values
-  2: required binary nulls
-}
-
-struct TI64Column {
-  1: required list<i64> values
-  2: required binary nulls
-}
-
-struct TDoubleColumn {
-  1: required list<double> values
-  2: required binary nulls
-}
-
-struct TStringColumn {
-  1: required list<string> values
-  2: required binary nulls
-}
-
-struct TBinaryColumn {
-  1: required list<binary> values
-  2: required binary nulls
-}
-
-// Note that Hive's type system is richer than Thrift's,
-// so in some cases we have to map multiple Hive types
-// to the same Thrift type. On the client-side this is
-// disambiguated by looking at the Schema of the
-// result set.
-union TColumn {
-  1: TBoolColumn   boolVal      // BOOLEAN
-  2: TByteColumn   byteVal      // TINYINT
-  3: TI16Column    i16Val       // SMALLINT
-  4: TI32Column    i32Val       // INT
-  5: TI64Column    i64Val       // BIGINT, TIMESTAMP
-  6: TDoubleColumn doubleVal    // FLOAT, DOUBLE
-  7: TStringColumn stringVal    // STRING, LIST, MAP, STRUCT, UNIONTYPE, DECIMAL, NULL
-  8: TBinaryColumn binaryVal    // BINARY
-}
-
-// Represents a rowset
-struct TRowSet {
-  // The starting row offset of this rowset.
-  1: required i64 startRowOffset
-  2: required list<TRow> rows
-  3: optional list<TColumn> columns
-  4: optional binary binaryColumns
-  5: optional i32 columnCount
-}
-
-// The return status code contained in each response.
-enum TStatusCode {
-  SUCCESS_STATUS,
-  SUCCESS_WITH_INFO_STATUS,
-  STILL_EXECUTING_STATUS,
-  ERROR_STATUS,
-  INVALID_HANDLE_STATUS
-}
-
-// The return status of a remote request
-struct TStatus {
-  1: required TStatusCode statusCode
-
-  // If status is SUCCESS_WITH_INFO, info_msgs may be populated with
-  // additional diagnostic information.
-  2: optional list<string> infoMessages
-
-  // If status is ERROR, then the following fields may be set
-  3: optional string sqlState  // as defined in the ISO/IEF CLI specification
-  4: optional i32 errorCode    // internal error code
-  5: optional string errorMessage
-}
-
-// The state of an operation (i.e. a query or other
-// asynchronous operation that generates a result set)
-// on the server.
-enum TOperationState {
-  // The operation has been initialized
-  INITIALIZED_STATE,
-
-  // The operation is running. In this state the result
-  // set is not available.
-  RUNNING_STATE,
-
-  // The operation has completed. When an operation is in
-  // this state its result set may be fetched.
-  FINISHED_STATE,
-
-  // The operation was canceled by a client
-  CANCELED_STATE,
-
-  // The operation was closed by a client
-  CLOSED_STATE,
-
-  // The operation failed due to an error
-  ERROR_STATE,
-
-  // The operation is in an unrecognized state
-  UKNOWN_STATE,
-
-  // The operation is in an pending state
-  PENDING_STATE,
-
-  // The operation is in an timedout state
-  TIMEDOUT_STATE,
-}
-
-// A string identifier. This is interpreted literally.
-typedef string TIdentifier
-
-// A search pattern.
-//
-// Valid search pattern characters:
-// '_': Any single character.
-// '%': Any sequence of zero or more characters.
-// '\': Escape character used to include special characters,
-//      e.g. '_', '%', '\'. If a '\' precedes a non-special
-//      character it has no special meaning and is interpreted
-//      literally.
-typedef string TPattern
-
-
-// A search pattern or identifier. Used as input
-// parameter for many of the catalog functions.
-typedef string TPatternOrIdentifier
-
-struct THandleIdentifier {
-  // 16 byte globally unique identifier
-  // This is the public ID of the handle and
-  // can be used for reporting.
-  1: required binary guid,
-
-  // 16 byte secret generated by the server
-  // and used to verify that the handle is not
-  // being hijacked by another user.
-  2: required binary secret,
-}
-
-// Client-side handle to persistent
-// session information on the server-side.
-struct TSessionHandle {
-  1: required THandleIdentifier sessionId
-}
-
-// The subtype of an OperationHandle.
-enum TOperationType {
-  EXECUTE_STATEMENT,
-  GET_TYPE_INFO,
-  GET_CATALOGS,
-  GET_SCHEMAS,
-  GET_TABLES,
-  GET_TABLE_TYPES,
-  GET_COLUMNS,
-  GET_FUNCTIONS,
-  UNKNOWN,
-}
-
-// Client-side reference to a task running
-// asynchronously on the server.
-struct TOperationHandle {
-  1: required THandleIdentifier operationId
-  2: required TOperationType operationType
-
-  // If hasResultSet = TRUE, then this operation
-  // generates a result set that can be fetched.
-  // Note that the result set may be empty.
-  //
-  // If hasResultSet = FALSE, then this operation
-  // does not generate a result set, and calling
-  // GetResultSetMetadata or FetchResults against
-  // this OperationHandle will generate an error.
-  3: required bool hasResultSet
-
-  // For operations that don't generate result sets,
-  // modifiedRowCount is either:
-  //
-  // 1) The number of rows that were modified by
-  //    the DML operation (e.g. number of rows inserted,
-  //    number of rows deleted, etc).
-  //
-  // 2) 0 for operations that don't modify or add rows.
-  //
-  // 3) < 0 if the operation is capable of modifiying rows,
-  //    but Hive is unable to determine how many rows were
-  //    modified. For example, Hive's LOAD DATA command
-  //    doesn't generate row count information because
-  //    Hive doesn't inspect the data as it is loaded.
-  //
-  // modifiedRowCount is unset if the operation generates
-  // a result set.
-  4: optional double modifiedRowCount
-}
-
-
-// OpenSession()
-//
-// Open a session (connection) on the server against
-// which operations may be executed.
-struct TOpenSessionReq {
-  // The version of the HiveServer2 protocol that the client is using.
-  1: required TProtocolVersion client_protocol = TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V10
-
-  // Username and password for authentication.
-  // Depending on the authentication scheme being used,
-  // this information may instead be provided by a lower
-  // protocol layer, in which case these fields may be
-  // left unset.
-  2: optional string username
-  3: optional string password
-
-  // Configuration overlay which is applied when the session is
-  // first created.
-  4: optional map<string, string> configuration
-}
-
-struct TOpenSessionResp {
-  1: required TStatus status
-
-  // The protocol version that the server is using.
-  2: required TProtocolVersion serverProtocolVersion = TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V10
-
-  // Session Handle
-  3: optional TSessionHandle sessionHandle
-
-  // The configuration settings for this session.
-  4: optional map<string, string> configuration
-}
-
-
-// CloseSession()
-//
-// Closes the specified session and frees any resources
-// currently allocated to that session. Any open
-// operations in that session will be canceled.
-struct TCloseSessionReq {
-  1: required TSessionHandle sessionHandle
-}
-
-struct TCloseSessionResp {
-  1: required TStatus status
-}
-
-
-
-enum TGetInfoType {
-  CLI_MAX_DRIVER_CONNECTIONS =           0,
-  CLI_MAX_CONCURRENT_ACTIVITIES =        1,
-  CLI_DATA_SOURCE_NAME =                 2,
-  CLI_FETCH_DIRECTION =                  8,
-  CLI_SERVER_NAME =                      13,
-  CLI_SEARCH_PATTERN_ESCAPE =            14,
-  CLI_DBMS_NAME =                        17,
-  CLI_DBMS_VER =                         18,
-  CLI_ACCESSIBLE_TABLES =                19,
-  CLI_ACCESSIBLE_PROCEDURES =            20,
-  CLI_CURSOR_COMMIT_BEHAVIOR =           23,
-  CLI_DATA_SOURCE_READ_ONLY =            25,
-  CLI_DEFAULT_TXN_ISOLATION =            26,
-  CLI_IDENTIFIER_CASE =                  28,
-  CLI_IDENTIFIER_QUOTE_CHAR =            29,
-  CLI_MAX_COLUMN_NAME_LEN =              30,
-  CLI_MAX_CURSOR_NAME_LEN =              31,
-  CLI_MAX_SCHEMA_NAME_LEN =              32,
-  CLI_MAX_CATALOG_NAME_LEN =             34,
-  CLI_MAX_TABLE_NAME_LEN =               35,
-  CLI_SCROLL_CONCURRENCY =               43,
-  CLI_TXN_CAPABLE =                      46,
-  CLI_USER_NAME =                        47,
-  CLI_TXN_ISOLATION_OPTION =             72,
-  CLI_INTEGRITY =                        73,
-  CLI_GETDATA_EXTENSIONS =               81,
-  CLI_NULL_COLLATION =                   85,
-  CLI_ALTER_TABLE =                      86,
-  CLI_ORDER_BY_COLUMNS_IN_SELECT =       90,
-  CLI_SPECIAL_CHARACTERS =               94,
-  CLI_MAX_COLUMNS_IN_GROUP_BY =          97,
-  CLI_MAX_COLUMNS_IN_INDEX =             98,
-  CLI_MAX_COLUMNS_IN_ORDER_BY =          99,
-  CLI_MAX_COLUMNS_IN_SELECT =            100,
-  CLI_MAX_COLUMNS_IN_TABLE =             101,
-  CLI_MAX_INDEX_SIZE =                   102,
-  CLI_MAX_ROW_SIZE =                     104,
-  CLI_MAX_STATEMENT_LEN =                105,
-  CLI_MAX_TABLES_IN_SELECT =             106,
-  CLI_MAX_USER_NAME_LEN =                107,
-  CLI_OJ_CAPABILITIES =                  115,
-
-  CLI_XOPEN_CLI_YEAR =                   10000,
-  CLI_CURSOR_SENSITIVITY =               10001,
-  CLI_DESCRIBE_PARAMETER =               10002,
-  CLI_CATALOG_NAME =                     10003,
-  CLI_COLLATION_SEQ =                    10004,
-  CLI_MAX_IDENTIFIER_LEN =               10005,
-}
-
-union TGetInfoValue {
-  1: string stringValue
-  2: i16 smallIntValue
-  3: i32 integerBitmask
-  4: i32 integerFlag
-  5: i32 binaryValue
-  6: i64 lenValue
-}
-
-// GetInfo()
-//
-// This function is based on ODBC's CLIGetInfo() function.
-// The function returns general information about the data source
-// using the same keys as ODBC.
-struct TGetInfoReq {
-  // The sesssion to run this request against
-  1: required TSessionHandle sessionHandle
-
-  2: required TGetInfoType infoType
-}
-
-struct TGetInfoResp {
-  1: required TStatus status
-
-  2: required TGetInfoValue infoValue
-}
-
-
-// ExecuteStatement()
-//
-// Execute a statement.
-// The returned OperationHandle can be used to check on the
-// status of the statement, and to fetch results once the
-// statement has finished executing.
-struct TExecuteStatementReq {
-  // The session to execute the statement against
-  1: required TSessionHandle sessionHandle
-
-  // The statement to be executed (DML, DDL, SET, etc)
-  2: required string statement
-
-  // Configuration properties that are overlayed on top of the
-  // the existing session configuration before this statement
-  // is executed. These properties apply to this statement
-  // only and will not affect the subsequent state of the Session.
-  3: optional map<string, string> confOverlay
-
-  // Execute asynchronously when runAsync is true
-  4: optional bool runAsync = false
-
-  // The number of seconds after which the query will timeout on the server
-  5: optional i64 queryTimeout = 0
-}
-
-struct TExecuteStatementResp {
-  1: required TStatus status
-  2: optional TOperationHandle operationHandle
-}
-
-// GetTypeInfo()
-//
-// Get information about types supported by the HiveServer instance.
-// The information is returned as a result set which can be fetched
-// using the OperationHandle provided in the response.
-//
-// Refer to the documentation for ODBC's CLIGetTypeInfo function for
-// the format of the result set.
-struct TGetTypeInfoReq {
-  // The session to run this request against.
-  1: required TSessionHandle sessionHandle
-}
-
-struct TGetTypeInfoResp {
-  1: required TStatus status
-  2: optional TOperationHandle operationHandle
-}
-
-
-// GetCatalogs()
-//
-// Returns the list of catalogs (databases)
-// Results are ordered by TABLE_CATALOG
-//
-// Resultset columns :
-// col1
-// name: TABLE_CAT
-// type: STRING
-// desc: Catalog name. NULL if not applicable.
-//
-struct TGetCatalogsReq {
-  // Session to run this request against
-  1: required TSessionHandle sessionHandle
-}
-
-struct TGetCatalogsResp {
-  1: required TStatus status
-  2: optional TOperationHandle operationHandle
-}
-
-
-// GetSchemas()
-//
-// Retrieves the schema names available in this database. 
-// The results are ordered by TABLE_CATALOG and TABLE_SCHEM.
-// col1
-// name: TABLE_SCHEM
-// type: STRING
-// desc: schema name
-// col2
-// name: TABLE_CATALOG
-// type: STRING
-// desc: catalog name
-struct TGetSchemasReq {
-  // Session to run this request against
-  1: required TSessionHandle sessionHandle
-
-  // Name of the catalog. Must not contain a search pattern.
-  2: optional TIdentifier catalogName
-
-  // schema name or pattern
-  3: optional TPatternOrIdentifier schemaName
-}
-
-struct TGetSchemasResp {
-  1: required TStatus status
-  2: optional TOperationHandle operationHandle
-}
-
-
-// GetTables()
-//
-// Returns a list of tables with catalog, schema, and table
-// type information. The information is returned as a result
-// set which can be fetched using the OperationHandle
-// provided in the response.
-// Results are ordered by TABLE_TYPE, TABLE_CAT, TABLE_SCHEM, and TABLE_NAME
-//
-// Result Set Columns:
-//
-// col1
-// name: TABLE_CAT
-// type: STRING
-// desc: Catalog name. NULL if not applicable.
-//
-// col2
-// name: TABLE_SCHEM
-// type: STRING
-// desc: Schema name.
-//
-// col3
-// name: TABLE_NAME
-// type: STRING
-// desc: Table name.
-//
-// col4
-// name: TABLE_TYPE
-// type: STRING
-// desc: The table type, e.g. "TABLE", "VIEW", etc.
-//
-// col5
-// name: REMARKS
-// type: STRING
-// desc: Comments about the table
-//
-struct TGetTablesReq {
-  // Session to run this request against
-  1: required TSessionHandle sessionHandle
-
-  // Name of the catalog or a search pattern.
-  2: optional TPatternOrIdentifier catalogName
-
-  // Name of the schema or a search pattern.
-  3: optional TPatternOrIdentifier schemaName
-
-  // Name of the table or a search pattern.
-  4: optional TPatternOrIdentifier tableName
-
-  // List of table types to match
-  // e.g. "TABLE", "VIEW", "SYSTEM TABLE", "GLOBAL TEMPORARY",
-  // "LOCAL TEMPORARY", "ALIAS", "SYNONYM", etc.
-  5: optional list<string> tableTypes
-}
-
-struct TGetTablesResp {
-  1: required TStatus status
-  2: optional TOperationHandle operationHandle
-}
-
-
-// GetTableTypes()
-//
-// Returns the table types available in this database.
-// The results are ordered by table type.
-//
-// col1
-// name: TABLE_TYPE
-// type: STRING
-// desc: Table type name.
-struct TGetTableTypesReq {
-  // Session to run this request against
-  1: required TSessionHandle sessionHandle
-}
-
-struct TGetTableTypesResp {
-  1: required TStatus status
-  2: optional TOperationHandle operationHandle
-}
-
-
-// GetColumns()
-//
-// Returns a list of columns in the specified tables.
-// The information is returned as a result set which can be fetched
-// using the OperationHandle provided in the response.
-// Results are ordered by TABLE_CAT, TABLE_SCHEM, TABLE_NAME,
-// and ORDINAL_POSITION.
-//
-// Result Set Columns are the same as those for the ODBC CLIColumns
-// function.
-//
-struct TGetColumnsReq {
-  // Session to run this request against
-  1: required TSessionHandle sessionHandle
-
-  // Name of the catalog. Must not contain a search pattern.
-  2: optional TIdentifier catalogName
-
-  // Schema name or search pattern
-  3: optional TPatternOrIdentifier schemaName
-
-  // Table name or search pattern
-  4: optional TPatternOrIdentifier tableName
-
-  // Column name or search pattern
-  5: optional TPatternOrIdentifier columnName
-}
-
-struct TGetColumnsResp {
-  1: required TStatus status
-  2: optional TOperationHandle operationHandle
-}
-
-
-// GetFunctions()
-//
-// Returns a list of functions supported by the data source. The
-// behavior of this function matches
-// java.sql.DatabaseMetaData.getFunctions() both in terms of
-// inputs and outputs.
-//
-// Result Set Columns:
-//
-// col1
-// name: FUNCTION_CAT
-// type: STRING
-// desc: Function catalog (may be null)
-//
-// col2
-// name: FUNCTION_SCHEM
-// type: STRING
-// desc: Function schema (may be null)
-//
-// col3
-// name: FUNCTION_NAME
-// type: STRING
-// desc: Function name. This is the name used to invoke the function.
-//
-// col4
-// name: REMARKS
-// type: STRING
-// desc: Explanatory comment on the function.
-//
-// col5
-// name: FUNCTION_TYPE
-// type: SMALLINT
-// desc: Kind of function. One of:
-//       * functionResultUnknown - Cannot determine if a return value or a table
-//                                 will be returned.
-//       * functionNoTable       - Does not a return a table.
-//       * functionReturnsTable  - Returns a table.
-//
-// col6
-// name: SPECIFIC_NAME
-// type: STRING
-// desc: The name which uniquely identifies this function within its schema.
-//       In this case this is the fully qualified class name of the class
-//       that implements this function.
-//
-struct TGetFunctionsReq {
-  // Session to run this request against
-  1: required TSessionHandle sessionHandle
-
-  // A catalog name; must match the catalog name as it is stored in the
-  // database; "" retrieves those without a catalog; null means
-  // that the catalog name should not be used to narrow the search.
-  2: optional TIdentifier catalogName
-
-  // A schema name pattern; must match the schema name as it is stored
-  // in the database; "" retrieves those without a schema; null means
-  // that the schema name should not be used to narrow the search.
-  3: optional TPatternOrIdentifier schemaName
-
-  // A function name pattern; must match the function name as it is stored
-  // in the database.
-  4: required TPatternOrIdentifier functionName
-}
-
-struct TGetFunctionsResp {
-  1: required TStatus status
-  2: optional TOperationHandle operationHandle
-}
-
-struct TGetPrimaryKeysReq {
-  // Session to run this request against
-  1: required TSessionHandle sessionHandle
-
-  // Name of the catalog.
-  2: optional TIdentifier catalogName
-
-  // Name of the schema.
-  3: optional TIdentifier schemaName
-
-  // Name of the table.
-  4: optional TIdentifier tableName
-}
-
-struct TGetPrimaryKeysResp {
-  1: required TStatus status
-  2: optional TOperationHandle operationHandle
-}
-
-struct TGetCrossReferenceReq {
-  // Session to run this request against
-  1: required TSessionHandle sessionHandle
-
-  // Name of the parent catalog.
-  2: optional TIdentifier parentCatalogName
-
-  // Name of the parent schema.
-  3: optional TIdentifier parentSchemaName
-
-  // Name of the parent table.
-  4: optional TIdentifier parentTableName
-
-  // Name of the foreign catalog.
-  5: optional TIdentifier foreignCatalogName
-
-  // Name of the foreign schema.
-  6: optional TIdentifier foreignSchemaName
-
-  // Name of the foreign table.
-  7: optional TIdentifier foreignTableName
-}
-
-struct TGetCrossReferenceResp {
-  1: required TStatus status
-  2: optional TOperationHandle operationHandle
-}
-
-// GetOperationStatus()
-//
-// Get the status of an operation running on the server.
-struct TGetOperationStatusReq {
-  // Session to run this request against
-  1: required TOperationHandle operationHandle
-  // optional arguments to get progress information
-  2: optional bool getProgressUpdate
-}
-
-struct TGetOperationStatusResp {
-  1: required TStatus status
-  2: optional TOperationState operationState
-
-  // If operationState is ERROR_STATE, then the following fields may be set
-  // sqlState as defined in the ISO/IEF CLI specification
-  3: optional string sqlState
-
-  // Internal error code
-  4: optional i32 errorCode
-
-  // Error message
-  5: optional string errorMessage
-
-  // List of statuses of sub tasks
-  6: optional string taskStatus
-
-  // When was the operation started
-  7: optional i64 operationStarted
-
-  // When was the operation completed
-  8: optional i64 operationCompleted
-
-  // If the operation has the result
-  9: optional bool hasResultSet
-
-  10: optional TProgressUpdateResp progressUpdateResponse
-
-}
-
-
-// CancelOperation()
-//
-// Cancels processing on the specified operation handle and
-// frees any resources which were allocated.
-struct TCancelOperationReq {
-  // Operation to cancel
-  1: required TOperationHandle operationHandle
-}
-
-struct TCancelOperationResp {
-  1: required TStatus status
-}
-
-
-// CloseOperation()
-//
-// Given an operation in the FINISHED, CANCELED,
-// or ERROR states, CloseOperation() will free
-// all of the resources which were allocated on
-// the server to service the operation.
-struct TCloseOperationReq {
-  1: required TOperationHandle operationHandle
-}
-
-struct TCloseOperationResp {
-  1: required TStatus status
-}
-
-
-// GetResultSetMetadata()
-//
-// Retrieves schema information for the specified operation
-struct TGetResultSetMetadataReq {
-  // Operation for which to fetch result set schema information
-  1: required TOperationHandle operationHandle
-}
-
-struct TGetResultSetMetadataResp {
-  1: required TStatus status
-  2: optional TTableSchema schema
-}
-
-
-enum TFetchOrientation {
-  // Get the next rowset. The fetch offset is ignored.
-  FETCH_NEXT,
-
-  // Get the previous rowset. The fetch offset is ignored.
-  FETCH_PRIOR,
-
-  // Return the rowset at the given fetch offset relative
-  // to the curren rowset.
-  // NOT SUPPORTED
-  FETCH_RELATIVE,
-
-  // Return the rowset at the specified fetch offset.
-  // NOT SUPPORTED
-  FETCH_ABSOLUTE,
-
-  // Get the first rowset in the result set.
-  FETCH_FIRST,
-
-  // Get the last rowset in the result set.
-  // NOT SUPPORTED
-  FETCH_LAST
-}
-
-// FetchResults()
-//
-// Fetch rows from the server corresponding to
-// a particular OperationHandle.
-struct TFetchResultsReq {
-  // Operation from which to fetch results.
-  1: required TOperationHandle operationHandle
-
-  // The fetch orientation. This must be either
-  // FETCH_NEXT, FETCH_PRIOR or FETCH_FIRST. Defaults to FETCH_NEXT.
-  2: required TFetchOrientation orientation = TFetchOrientation.FETCH_NEXT
-
-  // Max number of rows that should be returned in
-  // the rowset.
-  3: required i64 maxRows
-
-  // The type of a fetch results request. 0 represents Query output. 1 represents Log
-  4: optional i16 fetchType = 0
-}
-
-struct TFetchResultsResp {
-  1: required TStatus status
-
-  // TRUE if there are more rows left to fetch from the server.
-  2: optional bool hasMoreRows
-
-  // The rowset. This is optional so that we have the
-  // option in the future of adding alternate formats for
-  // representing result set data, e.g. delimited strings,
-  // binary encoded, etc.
-  3: optional TRowSet results
-}
-
-// GetDelegationToken()
-// Retrieve delegation token for the current user
-struct  TGetDelegationTokenReq {
-  // session handle
-  1: required TSessionHandle sessionHandle
-
-  // userid for the proxy user
-  2: required string owner
-
-  // designated renewer userid
-  3: required string renewer
-}
-
-struct TGetDelegationTokenResp {
-  // status of the request
-  1: required TStatus status
-
-  // delegation token string
-  2: optional string delegationToken
-}
-
-// CancelDelegationToken()
-// Cancel the given delegation token
-struct TCancelDelegationTokenReq {
-  // session handle
-  1: required TSessionHandle sessionHandle
-
-  // delegation token to cancel
-  2: required string delegationToken
-}
-
-struct TCancelDelegationTokenResp {
-  // status of the request
-  1: required TStatus status
-}
-
-// RenewDelegationToken()
-// Renew the given delegation token
-struct TRenewDelegationTokenReq {
-  // session handle
-  1: required TSessionHandle sessionHandle
-
-  // delegation token to renew
-  2: required string delegationToken
-}
-
-struct TRenewDelegationTokenResp {
-  // status of the request
-  1: required TStatus status
-}
-
-enum TJobExecutionStatus {
-    IN_PROGRESS,
-    COMPLETE,
-    NOT_AVAILABLE
-}
-
-struct TProgressUpdateResp {
-  1: required list<string> headerNames
-  2: required list<list<string>> rows
-  3: required double progressedPercentage
-  4: required TJobExecutionStatus status
-  5: required string footerSummary
-  6: required i64 startTime
-}
-
-service TCLIService {
-
-  TOpenSessionResp OpenSession(1:TOpenSessionReq req);
-
-  TCloseSessionResp CloseSession(1:TCloseSessionReq req);
-
-  TGetInfoResp GetInfo(1:TGetInfoReq req);
-
-  TExecuteStatementResp ExecuteStatement(1:TExecuteStatementReq req);
-
-  TGetTypeInfoResp GetTypeInfo(1:TGetTypeInfoReq req);
-
-  TGetCatalogsResp GetCatalogs(1:TGetCatalogsReq req);
-
-  TGetSchemasResp GetSchemas(1:TGetSchemasReq req);
-
-  TGetTablesResp GetTables(1:TGetTablesReq req);
-
-  TGetTableTypesResp GetTableTypes(1:TGetTableTypesReq req);
-
-  TGetColumnsResp GetColumns(1:TGetColumnsReq req);
-
-  TGetFunctionsResp GetFunctions(1:TGetFunctionsReq req);
-
-  TGetPrimaryKeysResp GetPrimaryKeys(1:TGetPrimaryKeysReq req);
-
-  TGetCrossReferenceResp GetCrossReference(1:TGetCrossReferenceReq req);
-
-  TGetOperationStatusResp GetOperationStatus(1:TGetOperationStatusReq req);
-
-  TCancelOperationResp CancelOperation(1:TCancelOperationReq req);
-
-  TCloseOperationResp CloseOperation(1:TCloseOperationReq req);
-
-  TGetResultSetMetadataResp GetResultSetMetadata(1:TGetResultSetMetadataReq req);
-
-  TFetchResultsResp FetchResults(1:TFetchResultsReq req);
-
-  TGetDelegationTokenResp GetDelegationToken(1:TGetDelegationTokenReq req);
-
-  TCancelDelegationTokenResp CancelDelegationToken(1:TCancelDelegationTokenReq req);
-
-  TRenewDelegationTokenResp RenewDelegationToken(1:TRenewDelegationTokenReq req);
-}
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 4a96afe9df20a..9cd8adb6cb4df 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -77,6 +77,10 @@
       <groupId>${hive.group}</groupId>
       <artifactId>hive-beeline</artifactId>
     </dependency>
+    <dependency>
+      <groupId>${hive.group}</groupId>
+      <artifactId>hive-service-rpc</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.eclipse.jetty</groupId>
       <artifactId>jetty-server</artifactId>
@@ -133,25 +137,5 @@
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-    <plugins>
-      <plugin>
-        <groupId>org.codehaus.mojo</groupId>
-        <artifactId>build-helper-maven-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>add-source</id>
-            <phase>generate-sources</phase>
-            <goals>
-              <goal>add-source</goal>
-            </goals>
-            <configuration>
-              <sources>
-                <source>src/gen/java</source>
-              </sources>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-    </plugins>
   </build>
 </project>
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TArrayTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TArrayTypeEntry.java
deleted file mode 100644
index 358e322632144..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TArrayTypeEntry.java
+++ /dev/null
@@ -1,387 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TArrayTypeEntry implements org.apache.thrift.TBase<TArrayTypeEntry, TArrayTypeEntry._Fields>, java.io.Serializable, Cloneable, Comparable<TArrayTypeEntry> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TArrayTypeEntry");
-
-  private static final org.apache.thrift.protocol.TField OBJECT_TYPE_PTR_FIELD_DESC = new org.apache.thrift.protocol.TField("objectTypePtr", org.apache.thrift.protocol.TType.I32, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TArrayTypeEntryStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TArrayTypeEntryTupleSchemeFactory());
-  }
-
-  private int objectTypePtr; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    OBJECT_TYPE_PTR((short)1, "objectTypePtr");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // OBJECT_TYPE_PTR
-          return OBJECT_TYPE_PTR;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __OBJECTTYPEPTR_ISSET_ID = 0;
-  private byte __isset_bitfield = 0;
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.OBJECT_TYPE_PTR, new org.apache.thrift.meta_data.FieldMetaData("objectTypePtr", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32        , "TTypeEntryPtr")));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TArrayTypeEntry.class, metaDataMap);
-  }
-
-  public TArrayTypeEntry() {
-  }
-
-  public TArrayTypeEntry(
-    int objectTypePtr)
-  {
-    this();
-    this.objectTypePtr = objectTypePtr;
-    setObjectTypePtrIsSet(true);
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TArrayTypeEntry(TArrayTypeEntry other) {
-    __isset_bitfield = other.__isset_bitfield;
-    this.objectTypePtr = other.objectTypePtr;
-  }
-
-  public TArrayTypeEntry deepCopy() {
-    return new TArrayTypeEntry(this);
-  }
-
-  @Override
-  public void clear() {
-    setObjectTypePtrIsSet(false);
-    this.objectTypePtr = 0;
-  }
-
-  public int getObjectTypePtr() {
-    return this.objectTypePtr;
-  }
-
-  public void setObjectTypePtr(int objectTypePtr) {
-    this.objectTypePtr = objectTypePtr;
-    setObjectTypePtrIsSet(true);
-  }
-
-  public void unsetObjectTypePtr() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __OBJECTTYPEPTR_ISSET_ID);
-  }
-
-  /** Returns true if field objectTypePtr is set (has been assigned a value) and false otherwise */
-  public boolean isSetObjectTypePtr() {
-    return EncodingUtils.testBit(__isset_bitfield, __OBJECTTYPEPTR_ISSET_ID);
-  }
-
-  public void setObjectTypePtrIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __OBJECTTYPEPTR_ISSET_ID, value);
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case OBJECT_TYPE_PTR:
-      if (value == null) {
-        unsetObjectTypePtr();
-      } else {
-        setObjectTypePtr((Integer)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case OBJECT_TYPE_PTR:
-      return getObjectTypePtr();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case OBJECT_TYPE_PTR:
-      return isSetObjectTypePtr();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TArrayTypeEntry)
-      return this.equals((TArrayTypeEntry)that);
-    return false;
-  }
-
-  public boolean equals(TArrayTypeEntry that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_objectTypePtr = true;
-    boolean that_present_objectTypePtr = true;
-    if (this_present_objectTypePtr || that_present_objectTypePtr) {
-      if (!(this_present_objectTypePtr && that_present_objectTypePtr))
-        return false;
-      if (this.objectTypePtr != that.objectTypePtr)
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_objectTypePtr = true;
-    list.add(present_objectTypePtr);
-    if (present_objectTypePtr)
-      list.add(objectTypePtr);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TArrayTypeEntry other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetObjectTypePtr()).compareTo(other.isSetObjectTypePtr());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetObjectTypePtr()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.objectTypePtr, other.objectTypePtr);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TArrayTypeEntry(");
-    boolean first = true;
-
-    sb.append("objectTypePtr:");
-    sb.append(this.objectTypePtr);
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetObjectTypePtr()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'objectTypePtr' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TArrayTypeEntryStandardSchemeFactory implements SchemeFactory {
-    public TArrayTypeEntryStandardScheme getScheme() {
-      return new TArrayTypeEntryStandardScheme();
-    }
-  }
-
-  private static class TArrayTypeEntryStandardScheme extends StandardScheme<TArrayTypeEntry> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TArrayTypeEntry struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // OBJECT_TYPE_PTR
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.objectTypePtr = iprot.readI32();
-              struct.setObjectTypePtrIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TArrayTypeEntry struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      oprot.writeFieldBegin(OBJECT_TYPE_PTR_FIELD_DESC);
-      oprot.writeI32(struct.objectTypePtr);
-      oprot.writeFieldEnd();
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TArrayTypeEntryTupleSchemeFactory implements SchemeFactory {
-    public TArrayTypeEntryTupleScheme getScheme() {
-      return new TArrayTypeEntryTupleScheme();
-    }
-  }
-
-  private static class TArrayTypeEntryTupleScheme extends TupleScheme<TArrayTypeEntry> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TArrayTypeEntry struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      oprot.writeI32(struct.objectTypePtr);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TArrayTypeEntry struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.objectTypePtr = iprot.readI32();
-      struct.setObjectTypePtrIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBinaryColumn.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBinaryColumn.java
deleted file mode 100644
index a869cee007c0b..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBinaryColumn.java
+++ /dev/null
@@ -1,548 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TBinaryColumn implements org.apache.thrift.TBase<TBinaryColumn, TBinaryColumn._Fields>, java.io.Serializable, Cloneable, Comparable<TBinaryColumn> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TBinaryColumn");
-
-  private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1);
-  private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TBinaryColumnStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TBinaryColumnTupleSchemeFactory());
-  }
-
-  private List<ByteBuffer> values; // required
-  private ByteBuffer nulls; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUES((short)1, "values"),
-    NULLS((short)2, "nulls");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUES
-          return VALUES;
-        case 2: // NULLS
-          return NULLS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING            , true))));
-    tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TBinaryColumn.class, metaDataMap);
-  }
-
-  public TBinaryColumn() {
-  }
-
-  public TBinaryColumn(
-    List<ByteBuffer> values,
-    ByteBuffer nulls)
-  {
-    this();
-    this.values = values;
-    this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls);
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TBinaryColumn(TBinaryColumn other) {
-    if (other.isSetValues()) {
-      List<ByteBuffer> __this__values = new ArrayList<ByteBuffer>(other.values);
-      this.values = __this__values;
-    }
-    if (other.isSetNulls()) {
-      this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls);
-    }
-  }
-
-  public TBinaryColumn deepCopy() {
-    return new TBinaryColumn(this);
-  }
-
-  @Override
-  public void clear() {
-    this.values = null;
-    this.nulls = null;
-  }
-
-  public int getValuesSize() {
-    return (this.values == null) ? 0 : this.values.size();
-  }
-
-  public java.util.Iterator<ByteBuffer> getValuesIterator() {
-    return (this.values == null) ? null : this.values.iterator();
-  }
-
-  public void addToValues(ByteBuffer elem) {
-    if (this.values == null) {
-      this.values = new ArrayList<ByteBuffer>();
-    }
-    this.values.add(elem);
-  }
-
-  public List<ByteBuffer> getValues() {
-    return this.values;
-  }
-
-  public void setValues(List<ByteBuffer> values) {
-    this.values = values;
-  }
-
-  public void unsetValues() {
-    this.values = null;
-  }
-
-  /** Returns true if field values is set (has been assigned a value) and false otherwise */
-  public boolean isSetValues() {
-    return this.values != null;
-  }
-
-  public void setValuesIsSet(boolean value) {
-    if (!value) {
-      this.values = null;
-    }
-  }
-
-  public byte[] getNulls() {
-    setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls));
-    return nulls == null ? null : nulls.array();
-  }
-
-  public ByteBuffer bufferForNulls() {
-    return org.apache.thrift.TBaseHelper.copyBinary(nulls);
-  }
-
-  public void setNulls(byte[] nulls) {
-    this.nulls = nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(nulls, nulls.length));
-  }
-
-  public void setNulls(ByteBuffer nulls) {
-    this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls);
-  }
-
-  public void unsetNulls() {
-    this.nulls = null;
-  }
-
-  /** Returns true if field nulls is set (has been assigned a value) and false otherwise */
-  public boolean isSetNulls() {
-    return this.nulls != null;
-  }
-
-  public void setNullsIsSet(boolean value) {
-    if (!value) {
-      this.nulls = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUES:
-      if (value == null) {
-        unsetValues();
-      } else {
-        setValues((List<ByteBuffer>)value);
-      }
-      break;
-
-    case NULLS:
-      if (value == null) {
-        unsetNulls();
-      } else {
-        setNulls((ByteBuffer)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUES:
-      return getValues();
-
-    case NULLS:
-      return getNulls();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUES:
-      return isSetValues();
-    case NULLS:
-      return isSetNulls();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TBinaryColumn)
-      return this.equals((TBinaryColumn)that);
-    return false;
-  }
-
-  public boolean equals(TBinaryColumn that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_values = true && this.isSetValues();
-    boolean that_present_values = true && that.isSetValues();
-    if (this_present_values || that_present_values) {
-      if (!(this_present_values && that_present_values))
-        return false;
-      if (!this.values.equals(that.values))
-        return false;
-    }
-
-    boolean this_present_nulls = true && this.isSetNulls();
-    boolean that_present_nulls = true && that.isSetNulls();
-    if (this_present_nulls || that_present_nulls) {
-      if (!(this_present_nulls && that_present_nulls))
-        return false;
-      if (!this.nulls.equals(that.nulls))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_values = true && (isSetValues());
-    list.add(present_values);
-    if (present_values)
-      list.add(values);
-
-    boolean present_nulls = true && (isSetNulls());
-    list.add(present_nulls);
-    if (present_nulls)
-      list.add(nulls);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TBinaryColumn other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetValues()).compareTo(other.isSetValues());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValues()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, other.values);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetNulls()).compareTo(other.isSetNulls());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetNulls()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, other.nulls);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TBinaryColumn(");
-    boolean first = true;
-
-    sb.append("values:");
-    if (this.values == null) {
-      sb.append("null");
-    } else {
-      org.apache.thrift.TBaseHelper.toString(this.values, sb);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("nulls:");
-    if (this.nulls == null) {
-      sb.append("null");
-    } else {
-      org.apache.thrift.TBaseHelper.toString(this.nulls, sb);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetValues()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString());
-    }
-
-    if (!isSetNulls()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TBinaryColumnStandardSchemeFactory implements SchemeFactory {
-    public TBinaryColumnStandardScheme getScheme() {
-      return new TBinaryColumnStandardScheme();
-    }
-  }
-
-  private static class TBinaryColumnStandardScheme extends StandardScheme<TBinaryColumn> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TBinaryColumn struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUES
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list110 = iprot.readListBegin();
-                struct.values = new ArrayList<ByteBuffer>(_list110.size);
-                ByteBuffer _elem111;
-                for (int _i112 = 0; _i112 < _list110.size; ++_i112)
-                {
-                  _elem111 = iprot.readBinary();
-                  struct.values.add(_elem111);
-                }
-                iprot.readListEnd();
-              }
-              struct.setValuesIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // NULLS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.nulls = iprot.readBinary();
-              struct.setNullsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TBinaryColumn struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.values != null) {
-        oprot.writeFieldBegin(VALUES_FIELD_DESC);
-        {
-          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, struct.values.size()));
-          for (ByteBuffer _iter113 : struct.values)
-          {
-            oprot.writeBinary(_iter113);
-          }
-          oprot.writeListEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      if (struct.nulls != null) {
-        oprot.writeFieldBegin(NULLS_FIELD_DESC);
-        oprot.writeBinary(struct.nulls);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TBinaryColumnTupleSchemeFactory implements SchemeFactory {
-    public TBinaryColumnTupleScheme getScheme() {
-      return new TBinaryColumnTupleScheme();
-    }
-  }
-
-  private static class TBinaryColumnTupleScheme extends TupleScheme<TBinaryColumn> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TBinaryColumn struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.values.size());
-        for (ByteBuffer _iter114 : struct.values)
-        {
-          oprot.writeBinary(_iter114);
-        }
-      }
-      oprot.writeBinary(struct.nulls);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TBinaryColumn struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TList _list115 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32());
-        struct.values = new ArrayList<ByteBuffer>(_list115.size);
-        ByteBuffer _elem116;
-        for (int _i117 = 0; _i117 < _list115.size; ++_i117)
-        {
-          _elem116 = iprot.readBinary();
-          struct.values.add(_elem116);
-        }
-      }
-      struct.setValuesIsSet(true);
-      struct.nulls = iprot.readBinary();
-      struct.setNullsIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolColumn.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolColumn.java
deleted file mode 100644
index 9bb636672aa1e..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolColumn.java
+++ /dev/null
@@ -1,548 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TBoolColumn implements org.apache.thrift.TBase<TBoolColumn, TBoolColumn._Fields>, java.io.Serializable, Cloneable, Comparable<TBoolColumn> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TBoolColumn");
-
-  private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1);
-  private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TBoolColumnStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TBoolColumnTupleSchemeFactory());
-  }
-
-  private List<Boolean> values; // required
-  private ByteBuffer nulls; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUES((short)1, "values"),
-    NULLS((short)2, "nulls");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUES
-          return VALUES;
-        case 2: // NULLS
-          return NULLS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL))));
-    tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TBoolColumn.class, metaDataMap);
-  }
-
-  public TBoolColumn() {
-  }
-
-  public TBoolColumn(
-    List<Boolean> values,
-    ByteBuffer nulls)
-  {
-    this();
-    this.values = values;
-    this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls);
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TBoolColumn(TBoolColumn other) {
-    if (other.isSetValues()) {
-      List<Boolean> __this__values = new ArrayList<Boolean>(other.values);
-      this.values = __this__values;
-    }
-    if (other.isSetNulls()) {
-      this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls);
-    }
-  }
-
-  public TBoolColumn deepCopy() {
-    return new TBoolColumn(this);
-  }
-
-  @Override
-  public void clear() {
-    this.values = null;
-    this.nulls = null;
-  }
-
-  public int getValuesSize() {
-    return (this.values == null) ? 0 : this.values.size();
-  }
-
-  public java.util.Iterator<Boolean> getValuesIterator() {
-    return (this.values == null) ? null : this.values.iterator();
-  }
-
-  public void addToValues(boolean elem) {
-    if (this.values == null) {
-      this.values = new ArrayList<Boolean>();
-    }
-    this.values.add(elem);
-  }
-
-  public List<Boolean> getValues() {
-    return this.values;
-  }
-
-  public void setValues(List<Boolean> values) {
-    this.values = values;
-  }
-
-  public void unsetValues() {
-    this.values = null;
-  }
-
-  /** Returns true if field values is set (has been assigned a value) and false otherwise */
-  public boolean isSetValues() {
-    return this.values != null;
-  }
-
-  public void setValuesIsSet(boolean value) {
-    if (!value) {
-      this.values = null;
-    }
-  }
-
-  public byte[] getNulls() {
-    setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls));
-    return nulls == null ? null : nulls.array();
-  }
-
-  public ByteBuffer bufferForNulls() {
-    return org.apache.thrift.TBaseHelper.copyBinary(nulls);
-  }
-
-  public void setNulls(byte[] nulls) {
-    this.nulls = nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(nulls, nulls.length));
-  }
-
-  public void setNulls(ByteBuffer nulls) {
-    this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls);
-  }
-
-  public void unsetNulls() {
-    this.nulls = null;
-  }
-
-  /** Returns true if field nulls is set (has been assigned a value) and false otherwise */
-  public boolean isSetNulls() {
-    return this.nulls != null;
-  }
-
-  public void setNullsIsSet(boolean value) {
-    if (!value) {
-      this.nulls = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUES:
-      if (value == null) {
-        unsetValues();
-      } else {
-        setValues((List<Boolean>)value);
-      }
-      break;
-
-    case NULLS:
-      if (value == null) {
-        unsetNulls();
-      } else {
-        setNulls((ByteBuffer)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUES:
-      return getValues();
-
-    case NULLS:
-      return getNulls();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUES:
-      return isSetValues();
-    case NULLS:
-      return isSetNulls();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TBoolColumn)
-      return this.equals((TBoolColumn)that);
-    return false;
-  }
-
-  public boolean equals(TBoolColumn that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_values = true && this.isSetValues();
-    boolean that_present_values = true && that.isSetValues();
-    if (this_present_values || that_present_values) {
-      if (!(this_present_values && that_present_values))
-        return false;
-      if (!this.values.equals(that.values))
-        return false;
-    }
-
-    boolean this_present_nulls = true && this.isSetNulls();
-    boolean that_present_nulls = true && that.isSetNulls();
-    if (this_present_nulls || that_present_nulls) {
-      if (!(this_present_nulls && that_present_nulls))
-        return false;
-      if (!this.nulls.equals(that.nulls))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_values = true && (isSetValues());
-    list.add(present_values);
-    if (present_values)
-      list.add(values);
-
-    boolean present_nulls = true && (isSetNulls());
-    list.add(present_nulls);
-    if (present_nulls)
-      list.add(nulls);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TBoolColumn other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetValues()).compareTo(other.isSetValues());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValues()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, other.values);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetNulls()).compareTo(other.isSetNulls());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetNulls()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, other.nulls);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TBoolColumn(");
-    boolean first = true;
-
-    sb.append("values:");
-    if (this.values == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.values);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("nulls:");
-    if (this.nulls == null) {
-      sb.append("null");
-    } else {
-      org.apache.thrift.TBaseHelper.toString(this.nulls, sb);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetValues()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString());
-    }
-
-    if (!isSetNulls()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TBoolColumnStandardSchemeFactory implements SchemeFactory {
-    public TBoolColumnStandardScheme getScheme() {
-      return new TBoolColumnStandardScheme();
-    }
-  }
-
-  private static class TBoolColumnStandardScheme extends StandardScheme<TBoolColumn> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TBoolColumn struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUES
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list54 = iprot.readListBegin();
-                struct.values = new ArrayList<Boolean>(_list54.size);
-                boolean _elem55;
-                for (int _i56 = 0; _i56 < _list54.size; ++_i56)
-                {
-                  _elem55 = iprot.readBool();
-                  struct.values.add(_elem55);
-                }
-                iprot.readListEnd();
-              }
-              struct.setValuesIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // NULLS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.nulls = iprot.readBinary();
-              struct.setNullsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TBoolColumn struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.values != null) {
-        oprot.writeFieldBegin(VALUES_FIELD_DESC);
-        {
-          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.BOOL, struct.values.size()));
-          for (boolean _iter57 : struct.values)
-          {
-            oprot.writeBool(_iter57);
-          }
-          oprot.writeListEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      if (struct.nulls != null) {
-        oprot.writeFieldBegin(NULLS_FIELD_DESC);
-        oprot.writeBinary(struct.nulls);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TBoolColumnTupleSchemeFactory implements SchemeFactory {
-    public TBoolColumnTupleScheme getScheme() {
-      return new TBoolColumnTupleScheme();
-    }
-  }
-
-  private static class TBoolColumnTupleScheme extends TupleScheme<TBoolColumn> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TBoolColumn struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.values.size());
-        for (boolean _iter58 : struct.values)
-        {
-          oprot.writeBool(_iter58);
-        }
-      }
-      oprot.writeBinary(struct.nulls);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TBoolColumn struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TList _list59 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.BOOL, iprot.readI32());
-        struct.values = new ArrayList<Boolean>(_list59.size);
-        boolean _elem60;
-        for (int _i61 = 0; _i61 < _list59.size; ++_i61)
-        {
-          _elem60 = iprot.readBool();
-          struct.values.add(_elem60);
-        }
-      }
-      struct.setValuesIsSet(true);
-      struct.nulls = iprot.readBinary();
-      struct.setNullsIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolValue.java
deleted file mode 100644
index 87b3070a89b11..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TBoolValue.java
+++ /dev/null
@@ -1,390 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TBoolValue implements org.apache.thrift.TBase<TBoolValue, TBoolValue._Fields>, java.io.Serializable, Cloneable, Comparable<TBoolValue> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TBoolValue");
-
-  private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.BOOL, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TBoolValueStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TBoolValueTupleSchemeFactory());
-  }
-
-  private boolean value; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUE((short)1, "value");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUE
-          return VALUE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __VALUE_ISSET_ID = 0;
-  private byte __isset_bitfield = 0;
-  private static final _Fields optionals[] = {_Fields.VALUE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TBoolValue.class, metaDataMap);
-  }
-
-  public TBoolValue() {
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TBoolValue(TBoolValue other) {
-    __isset_bitfield = other.__isset_bitfield;
-    this.value = other.value;
-  }
-
-  public TBoolValue deepCopy() {
-    return new TBoolValue(this);
-  }
-
-  @Override
-  public void clear() {
-    setValueIsSet(false);
-    this.value = false;
-  }
-
-  public boolean isValue() {
-    return this.value;
-  }
-
-  public void setValue(boolean value) {
-    this.value = value;
-    setValueIsSet(true);
-  }
-
-  public void unsetValue() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID);
-  }
-
-  /** Returns true if field value is set (has been assigned a value) and false otherwise */
-  public boolean isSetValue() {
-    return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID);
-  }
-
-  public void setValueIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value);
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUE:
-      if (value == null) {
-        unsetValue();
-      } else {
-        setValue((Boolean)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUE:
-      return isValue();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUE:
-      return isSetValue();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TBoolValue)
-      return this.equals((TBoolValue)that);
-    return false;
-  }
-
-  public boolean equals(TBoolValue that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_value = true && this.isSetValue();
-    boolean that_present_value = true && that.isSetValue();
-    if (this_present_value || that_present_value) {
-      if (!(this_present_value && that_present_value))
-        return false;
-      if (this.value != that.value)
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_value = true && (isSetValue());
-    list.add(present_value);
-    if (present_value)
-      list.add(value);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TBoolValue other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetValue()).compareTo(other.isSetValue());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValue()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, other.value);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TBoolValue(");
-    boolean first = true;
-
-    if (isSetValue()) {
-      sb.append("value:");
-      sb.append(this.value);
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TBoolValueStandardSchemeFactory implements SchemeFactory {
-    public TBoolValueStandardScheme getScheme() {
-      return new TBoolValueStandardScheme();
-    }
-  }
-
-  private static class TBoolValueStandardScheme extends StandardScheme<TBoolValue> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TBoolValue struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUE
-            if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) {
-              struct.value = iprot.readBool();
-              struct.setValueIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TBoolValue struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.isSetValue()) {
-        oprot.writeFieldBegin(VALUE_FIELD_DESC);
-        oprot.writeBool(struct.value);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TBoolValueTupleSchemeFactory implements SchemeFactory {
-    public TBoolValueTupleScheme getScheme() {
-      return new TBoolValueTupleScheme();
-    }
-  }
-
-  private static class TBoolValueTupleScheme extends TupleScheme<TBoolValue> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TBoolValue struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      BitSet optionals = new BitSet();
-      if (struct.isSetValue()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetValue()) {
-        oprot.writeBool(struct.value);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TBoolValue struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.value = iprot.readBool();
-        struct.setValueIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TByteColumn.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TByteColumn.java
deleted file mode 100644
index 68b3d3c31eb03..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TByteColumn.java
+++ /dev/null
@@ -1,548 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TByteColumn implements org.apache.thrift.TBase<TByteColumn, TByteColumn._Fields>, java.io.Serializable, Cloneable, Comparable<TByteColumn> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TByteColumn");
-
-  private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1);
-  private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TByteColumnStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TByteColumnTupleSchemeFactory());
-  }
-
-  private List<Byte> values; // required
-  private ByteBuffer nulls; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUES((short)1, "values"),
-    NULLS((short)2, "nulls");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUES
-          return VALUES;
-        case 2: // NULLS
-          return NULLS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BYTE))));
-    tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TByteColumn.class, metaDataMap);
-  }
-
-  public TByteColumn() {
-  }
-
-  public TByteColumn(
-    List<Byte> values,
-    ByteBuffer nulls)
-  {
-    this();
-    this.values = values;
-    this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls);
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TByteColumn(TByteColumn other) {
-    if (other.isSetValues()) {
-      List<Byte> __this__values = new ArrayList<Byte>(other.values);
-      this.values = __this__values;
-    }
-    if (other.isSetNulls()) {
-      this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls);
-    }
-  }
-
-  public TByteColumn deepCopy() {
-    return new TByteColumn(this);
-  }
-
-  @Override
-  public void clear() {
-    this.values = null;
-    this.nulls = null;
-  }
-
-  public int getValuesSize() {
-    return (this.values == null) ? 0 : this.values.size();
-  }
-
-  public java.util.Iterator<Byte> getValuesIterator() {
-    return (this.values == null) ? null : this.values.iterator();
-  }
-
-  public void addToValues(byte elem) {
-    if (this.values == null) {
-      this.values = new ArrayList<Byte>();
-    }
-    this.values.add(elem);
-  }
-
-  public List<Byte> getValues() {
-    return this.values;
-  }
-
-  public void setValues(List<Byte> values) {
-    this.values = values;
-  }
-
-  public void unsetValues() {
-    this.values = null;
-  }
-
-  /** Returns true if field values is set (has been assigned a value) and false otherwise */
-  public boolean isSetValues() {
-    return this.values != null;
-  }
-
-  public void setValuesIsSet(boolean value) {
-    if (!value) {
-      this.values = null;
-    }
-  }
-
-  public byte[] getNulls() {
-    setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls));
-    return nulls == null ? null : nulls.array();
-  }
-
-  public ByteBuffer bufferForNulls() {
-    return org.apache.thrift.TBaseHelper.copyBinary(nulls);
-  }
-
-  public void setNulls(byte[] nulls) {
-    this.nulls = nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(nulls, nulls.length));
-  }
-
-  public void setNulls(ByteBuffer nulls) {
-    this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls);
-  }
-
-  public void unsetNulls() {
-    this.nulls = null;
-  }
-
-  /** Returns true if field nulls is set (has been assigned a value) and false otherwise */
-  public boolean isSetNulls() {
-    return this.nulls != null;
-  }
-
-  public void setNullsIsSet(boolean value) {
-    if (!value) {
-      this.nulls = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUES:
-      if (value == null) {
-        unsetValues();
-      } else {
-        setValues((List<Byte>)value);
-      }
-      break;
-
-    case NULLS:
-      if (value == null) {
-        unsetNulls();
-      } else {
-        setNulls((ByteBuffer)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUES:
-      return getValues();
-
-    case NULLS:
-      return getNulls();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUES:
-      return isSetValues();
-    case NULLS:
-      return isSetNulls();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TByteColumn)
-      return this.equals((TByteColumn)that);
-    return false;
-  }
-
-  public boolean equals(TByteColumn that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_values = true && this.isSetValues();
-    boolean that_present_values = true && that.isSetValues();
-    if (this_present_values || that_present_values) {
-      if (!(this_present_values && that_present_values))
-        return false;
-      if (!this.values.equals(that.values))
-        return false;
-    }
-
-    boolean this_present_nulls = true && this.isSetNulls();
-    boolean that_present_nulls = true && that.isSetNulls();
-    if (this_present_nulls || that_present_nulls) {
-      if (!(this_present_nulls && that_present_nulls))
-        return false;
-      if (!this.nulls.equals(that.nulls))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_values = true && (isSetValues());
-    list.add(present_values);
-    if (present_values)
-      list.add(values);
-
-    boolean present_nulls = true && (isSetNulls());
-    list.add(present_nulls);
-    if (present_nulls)
-      list.add(nulls);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TByteColumn other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetValues()).compareTo(other.isSetValues());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValues()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, other.values);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetNulls()).compareTo(other.isSetNulls());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetNulls()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, other.nulls);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TByteColumn(");
-    boolean first = true;
-
-    sb.append("values:");
-    if (this.values == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.values);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("nulls:");
-    if (this.nulls == null) {
-      sb.append("null");
-    } else {
-      org.apache.thrift.TBaseHelper.toString(this.nulls, sb);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetValues()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString());
-    }
-
-    if (!isSetNulls()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TByteColumnStandardSchemeFactory implements SchemeFactory {
-    public TByteColumnStandardScheme getScheme() {
-      return new TByteColumnStandardScheme();
-    }
-  }
-
-  private static class TByteColumnStandardScheme extends StandardScheme<TByteColumn> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TByteColumn struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUES
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list62 = iprot.readListBegin();
-                struct.values = new ArrayList<Byte>(_list62.size);
-                byte _elem63;
-                for (int _i64 = 0; _i64 < _list62.size; ++_i64)
-                {
-                  _elem63 = iprot.readByte();
-                  struct.values.add(_elem63);
-                }
-                iprot.readListEnd();
-              }
-              struct.setValuesIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // NULLS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.nulls = iprot.readBinary();
-              struct.setNullsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TByteColumn struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.values != null) {
-        oprot.writeFieldBegin(VALUES_FIELD_DESC);
-        {
-          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.BYTE, struct.values.size()));
-          for (byte _iter65 : struct.values)
-          {
-            oprot.writeByte(_iter65);
-          }
-          oprot.writeListEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      if (struct.nulls != null) {
-        oprot.writeFieldBegin(NULLS_FIELD_DESC);
-        oprot.writeBinary(struct.nulls);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TByteColumnTupleSchemeFactory implements SchemeFactory {
-    public TByteColumnTupleScheme getScheme() {
-      return new TByteColumnTupleScheme();
-    }
-  }
-
-  private static class TByteColumnTupleScheme extends TupleScheme<TByteColumn> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TByteColumn struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.values.size());
-        for (byte _iter66 : struct.values)
-        {
-          oprot.writeByte(_iter66);
-        }
-      }
-      oprot.writeBinary(struct.nulls);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TByteColumn struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TList _list67 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.BYTE, iprot.readI32());
-        struct.values = new ArrayList<Byte>(_list67.size);
-        byte _elem68;
-        for (int _i69 = 0; _i69 < _list67.size; ++_i69)
-        {
-          _elem68 = iprot.readByte();
-          struct.values.add(_elem68);
-        }
-      }
-      struct.setValuesIsSet(true);
-      struct.nulls = iprot.readBinary();
-      struct.setNullsIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TByteValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TByteValue.java
deleted file mode 100644
index a3d5951335fa7..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TByteValue.java
+++ /dev/null
@@ -1,390 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TByteValue implements org.apache.thrift.TBase<TByteValue, TByteValue._Fields>, java.io.Serializable, Cloneable, Comparable<TByteValue> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TByteValue");
-
-  private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.BYTE, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TByteValueStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TByteValueTupleSchemeFactory());
-  }
-
-  private byte value; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUE((short)1, "value");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUE
-          return VALUE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __VALUE_ISSET_ID = 0;
-  private byte __isset_bitfield = 0;
-  private static final _Fields optionals[] = {_Fields.VALUE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BYTE)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TByteValue.class, metaDataMap);
-  }
-
-  public TByteValue() {
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TByteValue(TByteValue other) {
-    __isset_bitfield = other.__isset_bitfield;
-    this.value = other.value;
-  }
-
-  public TByteValue deepCopy() {
-    return new TByteValue(this);
-  }
-
-  @Override
-  public void clear() {
-    setValueIsSet(false);
-    this.value = 0;
-  }
-
-  public byte getValue() {
-    return this.value;
-  }
-
-  public void setValue(byte value) {
-    this.value = value;
-    setValueIsSet(true);
-  }
-
-  public void unsetValue() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID);
-  }
-
-  /** Returns true if field value is set (has been assigned a value) and false otherwise */
-  public boolean isSetValue() {
-    return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID);
-  }
-
-  public void setValueIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value);
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUE:
-      if (value == null) {
-        unsetValue();
-      } else {
-        setValue((Byte)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUE:
-      return getValue();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUE:
-      return isSetValue();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TByteValue)
-      return this.equals((TByteValue)that);
-    return false;
-  }
-
-  public boolean equals(TByteValue that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_value = true && this.isSetValue();
-    boolean that_present_value = true && that.isSetValue();
-    if (this_present_value || that_present_value) {
-      if (!(this_present_value && that_present_value))
-        return false;
-      if (this.value != that.value)
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_value = true && (isSetValue());
-    list.add(present_value);
-    if (present_value)
-      list.add(value);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TByteValue other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetValue()).compareTo(other.isSetValue());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValue()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, other.value);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TByteValue(");
-    boolean first = true;
-
-    if (isSetValue()) {
-      sb.append("value:");
-      sb.append(this.value);
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TByteValueStandardSchemeFactory implements SchemeFactory {
-    public TByteValueStandardScheme getScheme() {
-      return new TByteValueStandardScheme();
-    }
-  }
-
-  private static class TByteValueStandardScheme extends StandardScheme<TByteValue> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TByteValue struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUE
-            if (schemeField.type == org.apache.thrift.protocol.TType.BYTE) {
-              struct.value = iprot.readByte();
-              struct.setValueIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TByteValue struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.isSetValue()) {
-        oprot.writeFieldBegin(VALUE_FIELD_DESC);
-        oprot.writeByte(struct.value);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TByteValueTupleSchemeFactory implements SchemeFactory {
-    public TByteValueTupleScheme getScheme() {
-      return new TByteValueTupleScheme();
-    }
-  }
-
-  private static class TByteValueTupleScheme extends TupleScheme<TByteValue> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TByteValue struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      BitSet optionals = new BitSet();
-      if (struct.isSetValue()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetValue()) {
-        oprot.writeByte(struct.value);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TByteValue struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.value = iprot.readByte();
-        struct.setValueIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIService.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIService.java
deleted file mode 100644
index 6584c24a0142a..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIService.java
+++ /dev/null
@@ -1,18138 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TCLIService {
-
-  public interface Iface {
-
-    public TOpenSessionResp OpenSession(TOpenSessionReq req) throws org.apache.thrift.TException;
-
-    public TCloseSessionResp CloseSession(TCloseSessionReq req) throws org.apache.thrift.TException;
-
-    public TGetInfoResp GetInfo(TGetInfoReq req) throws org.apache.thrift.TException;
-
-    public TExecuteStatementResp ExecuteStatement(TExecuteStatementReq req) throws org.apache.thrift.TException;
-
-    public TGetTypeInfoResp GetTypeInfo(TGetTypeInfoReq req) throws org.apache.thrift.TException;
-
-    public TGetCatalogsResp GetCatalogs(TGetCatalogsReq req) throws org.apache.thrift.TException;
-
-    public TGetSchemasResp GetSchemas(TGetSchemasReq req) throws org.apache.thrift.TException;
-
-    public TGetTablesResp GetTables(TGetTablesReq req) throws org.apache.thrift.TException;
-
-    public TGetTableTypesResp GetTableTypes(TGetTableTypesReq req) throws org.apache.thrift.TException;
-
-    public TGetColumnsResp GetColumns(TGetColumnsReq req) throws org.apache.thrift.TException;
-
-    public TGetFunctionsResp GetFunctions(TGetFunctionsReq req) throws org.apache.thrift.TException;
-
-    public TGetPrimaryKeysResp GetPrimaryKeys(TGetPrimaryKeysReq req) throws org.apache.thrift.TException;
-
-    public TGetCrossReferenceResp GetCrossReference(TGetCrossReferenceReq req) throws org.apache.thrift.TException;
-
-    public TGetOperationStatusResp GetOperationStatus(TGetOperationStatusReq req) throws org.apache.thrift.TException;
-
-    public TCancelOperationResp CancelOperation(TCancelOperationReq req) throws org.apache.thrift.TException;
-
-    public TCloseOperationResp CloseOperation(TCloseOperationReq req) throws org.apache.thrift.TException;
-
-    public TGetResultSetMetadataResp GetResultSetMetadata(TGetResultSetMetadataReq req) throws org.apache.thrift.TException;
-
-    public TFetchResultsResp FetchResults(TFetchResultsReq req) throws org.apache.thrift.TException;
-
-    public TGetDelegationTokenResp GetDelegationToken(TGetDelegationTokenReq req) throws org.apache.thrift.TException;
-
-    public TCancelDelegationTokenResp CancelDelegationToken(TCancelDelegationTokenReq req) throws org.apache.thrift.TException;
-
-    public TRenewDelegationTokenResp RenewDelegationToken(TRenewDelegationTokenReq req) throws org.apache.thrift.TException;
-
-  }
-
-  public interface AsyncIface {
-
-    public void OpenSession(TOpenSessionReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
-
-    public void CloseSession(TCloseSessionReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
-
-    public void GetInfo(TGetInfoReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
-
-    public void ExecuteStatement(TExecuteStatementReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
-
-    public void GetTypeInfo(TGetTypeInfoReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
-
-    public void GetCatalogs(TGetCatalogsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
-
-    public void GetSchemas(TGetSchemasReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
-
-    public void GetTables(TGetTablesReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
-
-    public void GetTableTypes(TGetTableTypesReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
-
-    public void GetColumns(TGetColumnsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
-
-    public void GetFunctions(TGetFunctionsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
-
-    public void GetPrimaryKeys(TGetPrimaryKeysReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
-
-    public void GetCrossReference(TGetCrossReferenceReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
-
-    public void GetOperationStatus(TGetOperationStatusReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
-
-    public void CancelOperation(TCancelOperationReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
-
-    public void CloseOperation(TCloseOperationReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
-
-    public void GetResultSetMetadata(TGetResultSetMetadataReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
-
-    public void FetchResults(TFetchResultsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
-
-    public void GetDelegationToken(TGetDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
-
-    public void CancelDelegationToken(TCancelDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
-
-    public void RenewDelegationToken(TRenewDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException;
-
-  }
-
-  public static class Client extends org.apache.thrift.TServiceClient implements Iface {
-    public static class Factory implements org.apache.thrift.TServiceClientFactory<Client> {
-      public Factory() {}
-      public Client getClient(org.apache.thrift.protocol.TProtocol prot) {
-        return new Client(prot);
-      }
-      public Client getClient(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TProtocol oprot) {
-        return new Client(iprot, oprot);
-      }
-    }
-
-    public Client(org.apache.thrift.protocol.TProtocol prot)
-    {
-      super(prot, prot);
-    }
-
-    public Client(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TProtocol oprot) {
-      super(iprot, oprot);
-    }
-
-    public TOpenSessionResp OpenSession(TOpenSessionReq req) throws org.apache.thrift.TException
-    {
-      send_OpenSession(req);
-      return recv_OpenSession();
-    }
-
-    public void send_OpenSession(TOpenSessionReq req) throws org.apache.thrift.TException
-    {
-      OpenSession_args args = new OpenSession_args();
-      args.setReq(req);
-      sendBase("OpenSession", args);
-    }
-
-    public TOpenSessionResp recv_OpenSession() throws org.apache.thrift.TException
-    {
-      OpenSession_result result = new OpenSession_result();
-      receiveBase(result, "OpenSession");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "OpenSession failed: unknown result");
-    }
-
-    public TCloseSessionResp CloseSession(TCloseSessionReq req) throws org.apache.thrift.TException
-    {
-      send_CloseSession(req);
-      return recv_CloseSession();
-    }
-
-    public void send_CloseSession(TCloseSessionReq req) throws org.apache.thrift.TException
-    {
-      CloseSession_args args = new CloseSession_args();
-      args.setReq(req);
-      sendBase("CloseSession", args);
-    }
-
-    public TCloseSessionResp recv_CloseSession() throws org.apache.thrift.TException
-    {
-      CloseSession_result result = new CloseSession_result();
-      receiveBase(result, "CloseSession");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "CloseSession failed: unknown result");
-    }
-
-    public TGetInfoResp GetInfo(TGetInfoReq req) throws org.apache.thrift.TException
-    {
-      send_GetInfo(req);
-      return recv_GetInfo();
-    }
-
-    public void send_GetInfo(TGetInfoReq req) throws org.apache.thrift.TException
-    {
-      GetInfo_args args = new GetInfo_args();
-      args.setReq(req);
-      sendBase("GetInfo", args);
-    }
-
-    public TGetInfoResp recv_GetInfo() throws org.apache.thrift.TException
-    {
-      GetInfo_result result = new GetInfo_result();
-      receiveBase(result, "GetInfo");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetInfo failed: unknown result");
-    }
-
-    public TExecuteStatementResp ExecuteStatement(TExecuteStatementReq req) throws org.apache.thrift.TException
-    {
-      send_ExecuteStatement(req);
-      return recv_ExecuteStatement();
-    }
-
-    public void send_ExecuteStatement(TExecuteStatementReq req) throws org.apache.thrift.TException
-    {
-      ExecuteStatement_args args = new ExecuteStatement_args();
-      args.setReq(req);
-      sendBase("ExecuteStatement", args);
-    }
-
-    public TExecuteStatementResp recv_ExecuteStatement() throws org.apache.thrift.TException
-    {
-      ExecuteStatement_result result = new ExecuteStatement_result();
-      receiveBase(result, "ExecuteStatement");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "ExecuteStatement failed: unknown result");
-    }
-
-    public TGetTypeInfoResp GetTypeInfo(TGetTypeInfoReq req) throws org.apache.thrift.TException
-    {
-      send_GetTypeInfo(req);
-      return recv_GetTypeInfo();
-    }
-
-    public void send_GetTypeInfo(TGetTypeInfoReq req) throws org.apache.thrift.TException
-    {
-      GetTypeInfo_args args = new GetTypeInfo_args();
-      args.setReq(req);
-      sendBase("GetTypeInfo", args);
-    }
-
-    public TGetTypeInfoResp recv_GetTypeInfo() throws org.apache.thrift.TException
-    {
-      GetTypeInfo_result result = new GetTypeInfo_result();
-      receiveBase(result, "GetTypeInfo");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetTypeInfo failed: unknown result");
-    }
-
-    public TGetCatalogsResp GetCatalogs(TGetCatalogsReq req) throws org.apache.thrift.TException
-    {
-      send_GetCatalogs(req);
-      return recv_GetCatalogs();
-    }
-
-    public void send_GetCatalogs(TGetCatalogsReq req) throws org.apache.thrift.TException
-    {
-      GetCatalogs_args args = new GetCatalogs_args();
-      args.setReq(req);
-      sendBase("GetCatalogs", args);
-    }
-
-    public TGetCatalogsResp recv_GetCatalogs() throws org.apache.thrift.TException
-    {
-      GetCatalogs_result result = new GetCatalogs_result();
-      receiveBase(result, "GetCatalogs");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetCatalogs failed: unknown result");
-    }
-
-    public TGetSchemasResp GetSchemas(TGetSchemasReq req) throws org.apache.thrift.TException
-    {
-      send_GetSchemas(req);
-      return recv_GetSchemas();
-    }
-
-    public void send_GetSchemas(TGetSchemasReq req) throws org.apache.thrift.TException
-    {
-      GetSchemas_args args = new GetSchemas_args();
-      args.setReq(req);
-      sendBase("GetSchemas", args);
-    }
-
-    public TGetSchemasResp recv_GetSchemas() throws org.apache.thrift.TException
-    {
-      GetSchemas_result result = new GetSchemas_result();
-      receiveBase(result, "GetSchemas");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetSchemas failed: unknown result");
-    }
-
-    public TGetTablesResp GetTables(TGetTablesReq req) throws org.apache.thrift.TException
-    {
-      send_GetTables(req);
-      return recv_GetTables();
-    }
-
-    public void send_GetTables(TGetTablesReq req) throws org.apache.thrift.TException
-    {
-      GetTables_args args = new GetTables_args();
-      args.setReq(req);
-      sendBase("GetTables", args);
-    }
-
-    public TGetTablesResp recv_GetTables() throws org.apache.thrift.TException
-    {
-      GetTables_result result = new GetTables_result();
-      receiveBase(result, "GetTables");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetTables failed: unknown result");
-    }
-
-    public TGetTableTypesResp GetTableTypes(TGetTableTypesReq req) throws org.apache.thrift.TException
-    {
-      send_GetTableTypes(req);
-      return recv_GetTableTypes();
-    }
-
-    public void send_GetTableTypes(TGetTableTypesReq req) throws org.apache.thrift.TException
-    {
-      GetTableTypes_args args = new GetTableTypes_args();
-      args.setReq(req);
-      sendBase("GetTableTypes", args);
-    }
-
-    public TGetTableTypesResp recv_GetTableTypes() throws org.apache.thrift.TException
-    {
-      GetTableTypes_result result = new GetTableTypes_result();
-      receiveBase(result, "GetTableTypes");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetTableTypes failed: unknown result");
-    }
-
-    public TGetColumnsResp GetColumns(TGetColumnsReq req) throws org.apache.thrift.TException
-    {
-      send_GetColumns(req);
-      return recv_GetColumns();
-    }
-
-    public void send_GetColumns(TGetColumnsReq req) throws org.apache.thrift.TException
-    {
-      GetColumns_args args = new GetColumns_args();
-      args.setReq(req);
-      sendBase("GetColumns", args);
-    }
-
-    public TGetColumnsResp recv_GetColumns() throws org.apache.thrift.TException
-    {
-      GetColumns_result result = new GetColumns_result();
-      receiveBase(result, "GetColumns");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetColumns failed: unknown result");
-    }
-
-    public TGetFunctionsResp GetFunctions(TGetFunctionsReq req) throws org.apache.thrift.TException
-    {
-      send_GetFunctions(req);
-      return recv_GetFunctions();
-    }
-
-    public void send_GetFunctions(TGetFunctionsReq req) throws org.apache.thrift.TException
-    {
-      GetFunctions_args args = new GetFunctions_args();
-      args.setReq(req);
-      sendBase("GetFunctions", args);
-    }
-
-    public TGetFunctionsResp recv_GetFunctions() throws org.apache.thrift.TException
-    {
-      GetFunctions_result result = new GetFunctions_result();
-      receiveBase(result, "GetFunctions");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetFunctions failed: unknown result");
-    }
-
-    public TGetPrimaryKeysResp GetPrimaryKeys(TGetPrimaryKeysReq req) throws org.apache.thrift.TException
-    {
-      send_GetPrimaryKeys(req);
-      return recv_GetPrimaryKeys();
-    }
-
-    public void send_GetPrimaryKeys(TGetPrimaryKeysReq req) throws org.apache.thrift.TException
-    {
-      GetPrimaryKeys_args args = new GetPrimaryKeys_args();
-      args.setReq(req);
-      sendBase("GetPrimaryKeys", args);
-    }
-
-    public TGetPrimaryKeysResp recv_GetPrimaryKeys() throws org.apache.thrift.TException
-    {
-      GetPrimaryKeys_result result = new GetPrimaryKeys_result();
-      receiveBase(result, "GetPrimaryKeys");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetPrimaryKeys failed: unknown result");
-    }
-
-    public TGetCrossReferenceResp GetCrossReference(TGetCrossReferenceReq req) throws org.apache.thrift.TException
-    {
-      send_GetCrossReference(req);
-      return recv_GetCrossReference();
-    }
-
-    public void send_GetCrossReference(TGetCrossReferenceReq req) throws org.apache.thrift.TException
-    {
-      GetCrossReference_args args = new GetCrossReference_args();
-      args.setReq(req);
-      sendBase("GetCrossReference", args);
-    }
-
-    public TGetCrossReferenceResp recv_GetCrossReference() throws org.apache.thrift.TException
-    {
-      GetCrossReference_result result = new GetCrossReference_result();
-      receiveBase(result, "GetCrossReference");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetCrossReference failed: unknown result");
-    }
-
-    public TGetOperationStatusResp GetOperationStatus(TGetOperationStatusReq req) throws org.apache.thrift.TException
-    {
-      send_GetOperationStatus(req);
-      return recv_GetOperationStatus();
-    }
-
-    public void send_GetOperationStatus(TGetOperationStatusReq req) throws org.apache.thrift.TException
-    {
-      GetOperationStatus_args args = new GetOperationStatus_args();
-      args.setReq(req);
-      sendBase("GetOperationStatus", args);
-    }
-
-    public TGetOperationStatusResp recv_GetOperationStatus() throws org.apache.thrift.TException
-    {
-      GetOperationStatus_result result = new GetOperationStatus_result();
-      receiveBase(result, "GetOperationStatus");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetOperationStatus failed: unknown result");
-    }
-
-    public TCancelOperationResp CancelOperation(TCancelOperationReq req) throws org.apache.thrift.TException
-    {
-      send_CancelOperation(req);
-      return recv_CancelOperation();
-    }
-
-    public void send_CancelOperation(TCancelOperationReq req) throws org.apache.thrift.TException
-    {
-      CancelOperation_args args = new CancelOperation_args();
-      args.setReq(req);
-      sendBase("CancelOperation", args);
-    }
-
-    public TCancelOperationResp recv_CancelOperation() throws org.apache.thrift.TException
-    {
-      CancelOperation_result result = new CancelOperation_result();
-      receiveBase(result, "CancelOperation");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "CancelOperation failed: unknown result");
-    }
-
-    public TCloseOperationResp CloseOperation(TCloseOperationReq req) throws org.apache.thrift.TException
-    {
-      send_CloseOperation(req);
-      return recv_CloseOperation();
-    }
-
-    public void send_CloseOperation(TCloseOperationReq req) throws org.apache.thrift.TException
-    {
-      CloseOperation_args args = new CloseOperation_args();
-      args.setReq(req);
-      sendBase("CloseOperation", args);
-    }
-
-    public TCloseOperationResp recv_CloseOperation() throws org.apache.thrift.TException
-    {
-      CloseOperation_result result = new CloseOperation_result();
-      receiveBase(result, "CloseOperation");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "CloseOperation failed: unknown result");
-    }
-
-    public TGetResultSetMetadataResp GetResultSetMetadata(TGetResultSetMetadataReq req) throws org.apache.thrift.TException
-    {
-      send_GetResultSetMetadata(req);
-      return recv_GetResultSetMetadata();
-    }
-
-    public void send_GetResultSetMetadata(TGetResultSetMetadataReq req) throws org.apache.thrift.TException
-    {
-      GetResultSetMetadata_args args = new GetResultSetMetadata_args();
-      args.setReq(req);
-      sendBase("GetResultSetMetadata", args);
-    }
-
-    public TGetResultSetMetadataResp recv_GetResultSetMetadata() throws org.apache.thrift.TException
-    {
-      GetResultSetMetadata_result result = new GetResultSetMetadata_result();
-      receiveBase(result, "GetResultSetMetadata");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetResultSetMetadata failed: unknown result");
-    }
-
-    public TFetchResultsResp FetchResults(TFetchResultsReq req) throws org.apache.thrift.TException
-    {
-      send_FetchResults(req);
-      return recv_FetchResults();
-    }
-
-    public void send_FetchResults(TFetchResultsReq req) throws org.apache.thrift.TException
-    {
-      FetchResults_args args = new FetchResults_args();
-      args.setReq(req);
-      sendBase("FetchResults", args);
-    }
-
-    public TFetchResultsResp recv_FetchResults() throws org.apache.thrift.TException
-    {
-      FetchResults_result result = new FetchResults_result();
-      receiveBase(result, "FetchResults");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "FetchResults failed: unknown result");
-    }
-
-    public TGetDelegationTokenResp GetDelegationToken(TGetDelegationTokenReq req) throws org.apache.thrift.TException
-    {
-      send_GetDelegationToken(req);
-      return recv_GetDelegationToken();
-    }
-
-    public void send_GetDelegationToken(TGetDelegationTokenReq req) throws org.apache.thrift.TException
-    {
-      GetDelegationToken_args args = new GetDelegationToken_args();
-      args.setReq(req);
-      sendBase("GetDelegationToken", args);
-    }
-
-    public TGetDelegationTokenResp recv_GetDelegationToken() throws org.apache.thrift.TException
-    {
-      GetDelegationToken_result result = new GetDelegationToken_result();
-      receiveBase(result, "GetDelegationToken");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "GetDelegationToken failed: unknown result");
-    }
-
-    public TCancelDelegationTokenResp CancelDelegationToken(TCancelDelegationTokenReq req) throws org.apache.thrift.TException
-    {
-      send_CancelDelegationToken(req);
-      return recv_CancelDelegationToken();
-    }
-
-    public void send_CancelDelegationToken(TCancelDelegationTokenReq req) throws org.apache.thrift.TException
-    {
-      CancelDelegationToken_args args = new CancelDelegationToken_args();
-      args.setReq(req);
-      sendBase("CancelDelegationToken", args);
-    }
-
-    public TCancelDelegationTokenResp recv_CancelDelegationToken() throws org.apache.thrift.TException
-    {
-      CancelDelegationToken_result result = new CancelDelegationToken_result();
-      receiveBase(result, "CancelDelegationToken");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "CancelDelegationToken failed: unknown result");
-    }
-
-    public TRenewDelegationTokenResp RenewDelegationToken(TRenewDelegationTokenReq req) throws org.apache.thrift.TException
-    {
-      send_RenewDelegationToken(req);
-      return recv_RenewDelegationToken();
-    }
-
-    public void send_RenewDelegationToken(TRenewDelegationTokenReq req) throws org.apache.thrift.TException
-    {
-      RenewDelegationToken_args args = new RenewDelegationToken_args();
-      args.setReq(req);
-      sendBase("RenewDelegationToken", args);
-    }
-
-    public TRenewDelegationTokenResp recv_RenewDelegationToken() throws org.apache.thrift.TException
-    {
-      RenewDelegationToken_result result = new RenewDelegationToken_result();
-      receiveBase(result, "RenewDelegationToken");
-      if (result.isSetSuccess()) {
-        return result.success;
-      }
-      throw new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.MISSING_RESULT, "RenewDelegationToken failed: unknown result");
-    }
-
-  }
-  public static class AsyncClient extends org.apache.thrift.async.TAsyncClient implements AsyncIface {
-    public static class Factory implements org.apache.thrift.async.TAsyncClientFactory<AsyncClient> {
-      private org.apache.thrift.async.TAsyncClientManager clientManager;
-      private org.apache.thrift.protocol.TProtocolFactory protocolFactory;
-      public Factory(org.apache.thrift.async.TAsyncClientManager clientManager, org.apache.thrift.protocol.TProtocolFactory protocolFactory) {
-        this.clientManager = clientManager;
-        this.protocolFactory = protocolFactory;
-      }
-      public AsyncClient getAsyncClient(org.apache.thrift.transport.TNonblockingTransport transport) {
-        return new AsyncClient(protocolFactory, clientManager, transport);
-      }
-    }
-
-    public AsyncClient(org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.async.TAsyncClientManager clientManager, org.apache.thrift.transport.TNonblockingTransport transport) {
-      super(protocolFactory, clientManager, transport);
-    }
-
-    public void OpenSession(TOpenSessionReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      OpenSession_call method_call = new OpenSession_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class OpenSession_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TOpenSessionReq req;
-      public OpenSession_call(TOpenSessionReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("OpenSession", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        OpenSession_args args = new OpenSession_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TOpenSessionResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_OpenSession();
-      }
-    }
-
-    public void CloseSession(TCloseSessionReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      CloseSession_call method_call = new CloseSession_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class CloseSession_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TCloseSessionReq req;
-      public CloseSession_call(TCloseSessionReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("CloseSession", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        CloseSession_args args = new CloseSession_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TCloseSessionResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_CloseSession();
-      }
-    }
-
-    public void GetInfo(TGetInfoReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      GetInfo_call method_call = new GetInfo_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class GetInfo_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TGetInfoReq req;
-      public GetInfo_call(TGetInfoReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetInfo", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        GetInfo_args args = new GetInfo_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TGetInfoResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_GetInfo();
-      }
-    }
-
-    public void ExecuteStatement(TExecuteStatementReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      ExecuteStatement_call method_call = new ExecuteStatement_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class ExecuteStatement_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TExecuteStatementReq req;
-      public ExecuteStatement_call(TExecuteStatementReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("ExecuteStatement", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        ExecuteStatement_args args = new ExecuteStatement_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TExecuteStatementResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_ExecuteStatement();
-      }
-    }
-
-    public void GetTypeInfo(TGetTypeInfoReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      GetTypeInfo_call method_call = new GetTypeInfo_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class GetTypeInfo_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TGetTypeInfoReq req;
-      public GetTypeInfo_call(TGetTypeInfoReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetTypeInfo", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        GetTypeInfo_args args = new GetTypeInfo_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TGetTypeInfoResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_GetTypeInfo();
-      }
-    }
-
-    public void GetCatalogs(TGetCatalogsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      GetCatalogs_call method_call = new GetCatalogs_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class GetCatalogs_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TGetCatalogsReq req;
-      public GetCatalogs_call(TGetCatalogsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetCatalogs", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        GetCatalogs_args args = new GetCatalogs_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TGetCatalogsResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_GetCatalogs();
-      }
-    }
-
-    public void GetSchemas(TGetSchemasReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      GetSchemas_call method_call = new GetSchemas_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class GetSchemas_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TGetSchemasReq req;
-      public GetSchemas_call(TGetSchemasReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetSchemas", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        GetSchemas_args args = new GetSchemas_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TGetSchemasResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_GetSchemas();
-      }
-    }
-
-    public void GetTables(TGetTablesReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      GetTables_call method_call = new GetTables_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class GetTables_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TGetTablesReq req;
-      public GetTables_call(TGetTablesReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetTables", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        GetTables_args args = new GetTables_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TGetTablesResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_GetTables();
-      }
-    }
-
-    public void GetTableTypes(TGetTableTypesReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      GetTableTypes_call method_call = new GetTableTypes_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class GetTableTypes_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TGetTableTypesReq req;
-      public GetTableTypes_call(TGetTableTypesReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetTableTypes", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        GetTableTypes_args args = new GetTableTypes_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TGetTableTypesResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_GetTableTypes();
-      }
-    }
-
-    public void GetColumns(TGetColumnsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      GetColumns_call method_call = new GetColumns_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class GetColumns_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TGetColumnsReq req;
-      public GetColumns_call(TGetColumnsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetColumns", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        GetColumns_args args = new GetColumns_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TGetColumnsResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_GetColumns();
-      }
-    }
-
-    public void GetFunctions(TGetFunctionsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      GetFunctions_call method_call = new GetFunctions_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class GetFunctions_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TGetFunctionsReq req;
-      public GetFunctions_call(TGetFunctionsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetFunctions", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        GetFunctions_args args = new GetFunctions_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TGetFunctionsResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_GetFunctions();
-      }
-    }
-
-    public void GetPrimaryKeys(TGetPrimaryKeysReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      GetPrimaryKeys_call method_call = new GetPrimaryKeys_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class GetPrimaryKeys_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TGetPrimaryKeysReq req;
-      public GetPrimaryKeys_call(TGetPrimaryKeysReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetPrimaryKeys", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        GetPrimaryKeys_args args = new GetPrimaryKeys_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TGetPrimaryKeysResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_GetPrimaryKeys();
-      }
-    }
-
-    public void GetCrossReference(TGetCrossReferenceReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      GetCrossReference_call method_call = new GetCrossReference_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class GetCrossReference_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TGetCrossReferenceReq req;
-      public GetCrossReference_call(TGetCrossReferenceReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetCrossReference", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        GetCrossReference_args args = new GetCrossReference_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TGetCrossReferenceResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_GetCrossReference();
-      }
-    }
-
-    public void GetOperationStatus(TGetOperationStatusReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      GetOperationStatus_call method_call = new GetOperationStatus_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class GetOperationStatus_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TGetOperationStatusReq req;
-      public GetOperationStatus_call(TGetOperationStatusReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetOperationStatus", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        GetOperationStatus_args args = new GetOperationStatus_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TGetOperationStatusResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_GetOperationStatus();
-      }
-    }
-
-    public void CancelOperation(TCancelOperationReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      CancelOperation_call method_call = new CancelOperation_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class CancelOperation_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TCancelOperationReq req;
-      public CancelOperation_call(TCancelOperationReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("CancelOperation", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        CancelOperation_args args = new CancelOperation_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TCancelOperationResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_CancelOperation();
-      }
-    }
-
-    public void CloseOperation(TCloseOperationReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      CloseOperation_call method_call = new CloseOperation_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class CloseOperation_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TCloseOperationReq req;
-      public CloseOperation_call(TCloseOperationReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("CloseOperation", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        CloseOperation_args args = new CloseOperation_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TCloseOperationResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_CloseOperation();
-      }
-    }
-
-    public void GetResultSetMetadata(TGetResultSetMetadataReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      GetResultSetMetadata_call method_call = new GetResultSetMetadata_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class GetResultSetMetadata_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TGetResultSetMetadataReq req;
-      public GetResultSetMetadata_call(TGetResultSetMetadataReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetResultSetMetadata", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        GetResultSetMetadata_args args = new GetResultSetMetadata_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TGetResultSetMetadataResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_GetResultSetMetadata();
-      }
-    }
-
-    public void FetchResults(TFetchResultsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      FetchResults_call method_call = new FetchResults_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class FetchResults_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TFetchResultsReq req;
-      public FetchResults_call(TFetchResultsReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("FetchResults", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        FetchResults_args args = new FetchResults_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TFetchResultsResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_FetchResults();
-      }
-    }
-
-    public void GetDelegationToken(TGetDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      GetDelegationToken_call method_call = new GetDelegationToken_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class GetDelegationToken_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TGetDelegationTokenReq req;
-      public GetDelegationToken_call(TGetDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("GetDelegationToken", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        GetDelegationToken_args args = new GetDelegationToken_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TGetDelegationTokenResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_GetDelegationToken();
-      }
-    }
-
-    public void CancelDelegationToken(TCancelDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      CancelDelegationToken_call method_call = new CancelDelegationToken_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class CancelDelegationToken_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TCancelDelegationTokenReq req;
-      public CancelDelegationToken_call(TCancelDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("CancelDelegationToken", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        CancelDelegationToken_args args = new CancelDelegationToken_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TCancelDelegationTokenResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_CancelDelegationToken();
-      }
-    }
-
-    public void RenewDelegationToken(TRenewDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler) throws org.apache.thrift.TException {
-      checkReady();
-      RenewDelegationToken_call method_call = new RenewDelegationToken_call(req, resultHandler, this, ___protocolFactory, ___transport);
-      this.___currentMethod = method_call;
-      ___manager.call(method_call);
-    }
-
-    public static class RenewDelegationToken_call extends org.apache.thrift.async.TAsyncMethodCall {
-      private TRenewDelegationTokenReq req;
-      public RenewDelegationToken_call(TRenewDelegationTokenReq req, org.apache.thrift.async.AsyncMethodCallback resultHandler, org.apache.thrift.async.TAsyncClient client, org.apache.thrift.protocol.TProtocolFactory protocolFactory, org.apache.thrift.transport.TNonblockingTransport transport) throws org.apache.thrift.TException {
-        super(client, protocolFactory, transport, resultHandler, false);
-        this.req = req;
-      }
-
-      public void write_args(org.apache.thrift.protocol.TProtocol prot) throws org.apache.thrift.TException {
-        prot.writeMessageBegin(new org.apache.thrift.protocol.TMessage("RenewDelegationToken", org.apache.thrift.protocol.TMessageType.CALL, 0));
-        RenewDelegationToken_args args = new RenewDelegationToken_args();
-        args.setReq(req);
-        args.write(prot);
-        prot.writeMessageEnd();
-      }
-
-      public TRenewDelegationTokenResp getResult() throws org.apache.thrift.TException {
-        if (getState() != org.apache.thrift.async.TAsyncMethodCall.State.RESPONSE_READ) {
-          throw new IllegalStateException("Method call not finished!");
-        }
-        org.apache.thrift.transport.TMemoryInputTransport memoryTransport = new org.apache.thrift.transport.TMemoryInputTransport(getFrameBuffer().array());
-        org.apache.thrift.protocol.TProtocol prot = client.getProtocolFactory().getProtocol(memoryTransport);
-        return (new Client(prot)).recv_RenewDelegationToken();
-      }
-    }
-
-  }
-
-  public static class Processor<I extends Iface> extends org.apache.thrift.TBaseProcessor<I> implements org.apache.thrift.TProcessor {
-    private static final Logger LOGGER = LoggerFactory.getLogger(Processor.class.getName());
-    public Processor(I iface) {
-      super(iface, getProcessMap(new HashMap<String, org.apache.thrift.ProcessFunction<I, ? extends org.apache.thrift.TBase>>()));
-    }
-
-    protected Processor(I iface, Map<String,  org.apache.thrift.ProcessFunction<I, ? extends  org.apache.thrift.TBase>> processMap) {
-      super(iface, getProcessMap(processMap));
-    }
-
-    private static <I extends Iface> Map<String,  org.apache.thrift.ProcessFunction<I, ? extends  org.apache.thrift.TBase>> getProcessMap(Map<String,  org.apache.thrift.ProcessFunction<I, ? extends  org.apache.thrift.TBase>> processMap) {
-      processMap.put("OpenSession", new OpenSession());
-      processMap.put("CloseSession", new CloseSession());
-      processMap.put("GetInfo", new GetInfo());
-      processMap.put("ExecuteStatement", new ExecuteStatement());
-      processMap.put("GetTypeInfo", new GetTypeInfo());
-      processMap.put("GetCatalogs", new GetCatalogs());
-      processMap.put("GetSchemas", new GetSchemas());
-      processMap.put("GetTables", new GetTables());
-      processMap.put("GetTableTypes", new GetTableTypes());
-      processMap.put("GetColumns", new GetColumns());
-      processMap.put("GetFunctions", new GetFunctions());
-      processMap.put("GetPrimaryKeys", new GetPrimaryKeys());
-      processMap.put("GetCrossReference", new GetCrossReference());
-      processMap.put("GetOperationStatus", new GetOperationStatus());
-      processMap.put("CancelOperation", new CancelOperation());
-      processMap.put("CloseOperation", new CloseOperation());
-      processMap.put("GetResultSetMetadata", new GetResultSetMetadata());
-      processMap.put("FetchResults", new FetchResults());
-      processMap.put("GetDelegationToken", new GetDelegationToken());
-      processMap.put("CancelDelegationToken", new CancelDelegationToken());
-      processMap.put("RenewDelegationToken", new RenewDelegationToken());
-      return processMap;
-    }
-
-    public static class OpenSession<I extends Iface> extends org.apache.thrift.ProcessFunction<I, OpenSession_args> {
-      public OpenSession() {
-        super("OpenSession");
-      }
-
-      public OpenSession_args getEmptyArgsInstance() {
-        return new OpenSession_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public OpenSession_result getResult(I iface, OpenSession_args args) throws org.apache.thrift.TException {
-        OpenSession_result result = new OpenSession_result();
-        result.success = iface.OpenSession(args.req);
-        return result;
-      }
-    }
-
-    public static class CloseSession<I extends Iface> extends org.apache.thrift.ProcessFunction<I, CloseSession_args> {
-      public CloseSession() {
-        super("CloseSession");
-      }
-
-      public CloseSession_args getEmptyArgsInstance() {
-        return new CloseSession_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public CloseSession_result getResult(I iface, CloseSession_args args) throws org.apache.thrift.TException {
-        CloseSession_result result = new CloseSession_result();
-        result.success = iface.CloseSession(args.req);
-        return result;
-      }
-    }
-
-    public static class GetInfo<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetInfo_args> {
-      public GetInfo() {
-        super("GetInfo");
-      }
-
-      public GetInfo_args getEmptyArgsInstance() {
-        return new GetInfo_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public GetInfo_result getResult(I iface, GetInfo_args args) throws org.apache.thrift.TException {
-        GetInfo_result result = new GetInfo_result();
-        result.success = iface.GetInfo(args.req);
-        return result;
-      }
-    }
-
-    public static class ExecuteStatement<I extends Iface> extends org.apache.thrift.ProcessFunction<I, ExecuteStatement_args> {
-      public ExecuteStatement() {
-        super("ExecuteStatement");
-      }
-
-      public ExecuteStatement_args getEmptyArgsInstance() {
-        return new ExecuteStatement_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public ExecuteStatement_result getResult(I iface, ExecuteStatement_args args) throws org.apache.thrift.TException {
-        ExecuteStatement_result result = new ExecuteStatement_result();
-        result.success = iface.ExecuteStatement(args.req);
-        return result;
-      }
-    }
-
-    public static class GetTypeInfo<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetTypeInfo_args> {
-      public GetTypeInfo() {
-        super("GetTypeInfo");
-      }
-
-      public GetTypeInfo_args getEmptyArgsInstance() {
-        return new GetTypeInfo_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public GetTypeInfo_result getResult(I iface, GetTypeInfo_args args) throws org.apache.thrift.TException {
-        GetTypeInfo_result result = new GetTypeInfo_result();
-        result.success = iface.GetTypeInfo(args.req);
-        return result;
-      }
-    }
-
-    public static class GetCatalogs<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetCatalogs_args> {
-      public GetCatalogs() {
-        super("GetCatalogs");
-      }
-
-      public GetCatalogs_args getEmptyArgsInstance() {
-        return new GetCatalogs_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public GetCatalogs_result getResult(I iface, GetCatalogs_args args) throws org.apache.thrift.TException {
-        GetCatalogs_result result = new GetCatalogs_result();
-        result.success = iface.GetCatalogs(args.req);
-        return result;
-      }
-    }
-
-    public static class GetSchemas<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetSchemas_args> {
-      public GetSchemas() {
-        super("GetSchemas");
-      }
-
-      public GetSchemas_args getEmptyArgsInstance() {
-        return new GetSchemas_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public GetSchemas_result getResult(I iface, GetSchemas_args args) throws org.apache.thrift.TException {
-        GetSchemas_result result = new GetSchemas_result();
-        result.success = iface.GetSchemas(args.req);
-        return result;
-      }
-    }
-
-    public static class GetTables<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetTables_args> {
-      public GetTables() {
-        super("GetTables");
-      }
-
-      public GetTables_args getEmptyArgsInstance() {
-        return new GetTables_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public GetTables_result getResult(I iface, GetTables_args args) throws org.apache.thrift.TException {
-        GetTables_result result = new GetTables_result();
-        result.success = iface.GetTables(args.req);
-        return result;
-      }
-    }
-
-    public static class GetTableTypes<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetTableTypes_args> {
-      public GetTableTypes() {
-        super("GetTableTypes");
-      }
-
-      public GetTableTypes_args getEmptyArgsInstance() {
-        return new GetTableTypes_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public GetTableTypes_result getResult(I iface, GetTableTypes_args args) throws org.apache.thrift.TException {
-        GetTableTypes_result result = new GetTableTypes_result();
-        result.success = iface.GetTableTypes(args.req);
-        return result;
-      }
-    }
-
-    public static class GetColumns<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetColumns_args> {
-      public GetColumns() {
-        super("GetColumns");
-      }
-
-      public GetColumns_args getEmptyArgsInstance() {
-        return new GetColumns_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public GetColumns_result getResult(I iface, GetColumns_args args) throws org.apache.thrift.TException {
-        GetColumns_result result = new GetColumns_result();
-        result.success = iface.GetColumns(args.req);
-        return result;
-      }
-    }
-
-    public static class GetFunctions<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetFunctions_args> {
-      public GetFunctions() {
-        super("GetFunctions");
-      }
-
-      public GetFunctions_args getEmptyArgsInstance() {
-        return new GetFunctions_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public GetFunctions_result getResult(I iface, GetFunctions_args args) throws org.apache.thrift.TException {
-        GetFunctions_result result = new GetFunctions_result();
-        result.success = iface.GetFunctions(args.req);
-        return result;
-      }
-    }
-
-    public static class GetPrimaryKeys<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetPrimaryKeys_args> {
-      public GetPrimaryKeys() {
-        super("GetPrimaryKeys");
-      }
-
-      public GetPrimaryKeys_args getEmptyArgsInstance() {
-        return new GetPrimaryKeys_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public GetPrimaryKeys_result getResult(I iface, GetPrimaryKeys_args args) throws org.apache.thrift.TException {
-        GetPrimaryKeys_result result = new GetPrimaryKeys_result();
-        result.success = iface.GetPrimaryKeys(args.req);
-        return result;
-      }
-    }
-
-    public static class GetCrossReference<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetCrossReference_args> {
-      public GetCrossReference() {
-        super("GetCrossReference");
-      }
-
-      public GetCrossReference_args getEmptyArgsInstance() {
-        return new GetCrossReference_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public GetCrossReference_result getResult(I iface, GetCrossReference_args args) throws org.apache.thrift.TException {
-        GetCrossReference_result result = new GetCrossReference_result();
-        result.success = iface.GetCrossReference(args.req);
-        return result;
-      }
-    }
-
-    public static class GetOperationStatus<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetOperationStatus_args> {
-      public GetOperationStatus() {
-        super("GetOperationStatus");
-      }
-
-      public GetOperationStatus_args getEmptyArgsInstance() {
-        return new GetOperationStatus_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public GetOperationStatus_result getResult(I iface, GetOperationStatus_args args) throws org.apache.thrift.TException {
-        GetOperationStatus_result result = new GetOperationStatus_result();
-        result.success = iface.GetOperationStatus(args.req);
-        return result;
-      }
-    }
-
-    public static class CancelOperation<I extends Iface> extends org.apache.thrift.ProcessFunction<I, CancelOperation_args> {
-      public CancelOperation() {
-        super("CancelOperation");
-      }
-
-      public CancelOperation_args getEmptyArgsInstance() {
-        return new CancelOperation_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public CancelOperation_result getResult(I iface, CancelOperation_args args) throws org.apache.thrift.TException {
-        CancelOperation_result result = new CancelOperation_result();
-        result.success = iface.CancelOperation(args.req);
-        return result;
-      }
-    }
-
-    public static class CloseOperation<I extends Iface> extends org.apache.thrift.ProcessFunction<I, CloseOperation_args> {
-      public CloseOperation() {
-        super("CloseOperation");
-      }
-
-      public CloseOperation_args getEmptyArgsInstance() {
-        return new CloseOperation_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public CloseOperation_result getResult(I iface, CloseOperation_args args) throws org.apache.thrift.TException {
-        CloseOperation_result result = new CloseOperation_result();
-        result.success = iface.CloseOperation(args.req);
-        return result;
-      }
-    }
-
-    public static class GetResultSetMetadata<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetResultSetMetadata_args> {
-      public GetResultSetMetadata() {
-        super("GetResultSetMetadata");
-      }
-
-      public GetResultSetMetadata_args getEmptyArgsInstance() {
-        return new GetResultSetMetadata_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public GetResultSetMetadata_result getResult(I iface, GetResultSetMetadata_args args) throws org.apache.thrift.TException {
-        GetResultSetMetadata_result result = new GetResultSetMetadata_result();
-        result.success = iface.GetResultSetMetadata(args.req);
-        return result;
-      }
-    }
-
-    public static class FetchResults<I extends Iface> extends org.apache.thrift.ProcessFunction<I, FetchResults_args> {
-      public FetchResults() {
-        super("FetchResults");
-      }
-
-      public FetchResults_args getEmptyArgsInstance() {
-        return new FetchResults_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public FetchResults_result getResult(I iface, FetchResults_args args) throws org.apache.thrift.TException {
-        FetchResults_result result = new FetchResults_result();
-        result.success = iface.FetchResults(args.req);
-        return result;
-      }
-    }
-
-    public static class GetDelegationToken<I extends Iface> extends org.apache.thrift.ProcessFunction<I, GetDelegationToken_args> {
-      public GetDelegationToken() {
-        super("GetDelegationToken");
-      }
-
-      public GetDelegationToken_args getEmptyArgsInstance() {
-        return new GetDelegationToken_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public GetDelegationToken_result getResult(I iface, GetDelegationToken_args args) throws org.apache.thrift.TException {
-        GetDelegationToken_result result = new GetDelegationToken_result();
-        result.success = iface.GetDelegationToken(args.req);
-        return result;
-      }
-    }
-
-    public static class CancelDelegationToken<I extends Iface> extends org.apache.thrift.ProcessFunction<I, CancelDelegationToken_args> {
-      public CancelDelegationToken() {
-        super("CancelDelegationToken");
-      }
-
-      public CancelDelegationToken_args getEmptyArgsInstance() {
-        return new CancelDelegationToken_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public CancelDelegationToken_result getResult(I iface, CancelDelegationToken_args args) throws org.apache.thrift.TException {
-        CancelDelegationToken_result result = new CancelDelegationToken_result();
-        result.success = iface.CancelDelegationToken(args.req);
-        return result;
-      }
-    }
-
-    public static class RenewDelegationToken<I extends Iface> extends org.apache.thrift.ProcessFunction<I, RenewDelegationToken_args> {
-      public RenewDelegationToken() {
-        super("RenewDelegationToken");
-      }
-
-      public RenewDelegationToken_args getEmptyArgsInstance() {
-        return new RenewDelegationToken_args();
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public RenewDelegationToken_result getResult(I iface, RenewDelegationToken_args args) throws org.apache.thrift.TException {
-        RenewDelegationToken_result result = new RenewDelegationToken_result();
-        result.success = iface.RenewDelegationToken(args.req);
-        return result;
-      }
-    }
-
-  }
-
-  public static class AsyncProcessor<I extends AsyncIface> extends org.apache.thrift.TBaseAsyncProcessor<I> {
-    private static final Logger LOGGER = LoggerFactory.getLogger(AsyncProcessor.class.getName());
-    public AsyncProcessor(I iface) {
-      super(iface, getProcessMap(new HashMap<String, org.apache.thrift.AsyncProcessFunction<I, ? extends org.apache.thrift.TBase, ?>>()));
-    }
-
-    protected AsyncProcessor(I iface, Map<String,  org.apache.thrift.AsyncProcessFunction<I, ? extends  org.apache.thrift.TBase, ?>> processMap) {
-      super(iface, getProcessMap(processMap));
-    }
-
-    private static <I extends AsyncIface> Map<String,  org.apache.thrift.AsyncProcessFunction<I, ? extends  org.apache.thrift.TBase,?>> getProcessMap(Map<String,  org.apache.thrift.AsyncProcessFunction<I, ? extends  org.apache.thrift.TBase, ?>> processMap) {
-      processMap.put("OpenSession", new OpenSession());
-      processMap.put("CloseSession", new CloseSession());
-      processMap.put("GetInfo", new GetInfo());
-      processMap.put("ExecuteStatement", new ExecuteStatement());
-      processMap.put("GetTypeInfo", new GetTypeInfo());
-      processMap.put("GetCatalogs", new GetCatalogs());
-      processMap.put("GetSchemas", new GetSchemas());
-      processMap.put("GetTables", new GetTables());
-      processMap.put("GetTableTypes", new GetTableTypes());
-      processMap.put("GetColumns", new GetColumns());
-      processMap.put("GetFunctions", new GetFunctions());
-      processMap.put("GetPrimaryKeys", new GetPrimaryKeys());
-      processMap.put("GetCrossReference", new GetCrossReference());
-      processMap.put("GetOperationStatus", new GetOperationStatus());
-      processMap.put("CancelOperation", new CancelOperation());
-      processMap.put("CloseOperation", new CloseOperation());
-      processMap.put("GetResultSetMetadata", new GetResultSetMetadata());
-      processMap.put("FetchResults", new FetchResults());
-      processMap.put("GetDelegationToken", new GetDelegationToken());
-      processMap.put("CancelDelegationToken", new CancelDelegationToken());
-      processMap.put("RenewDelegationToken", new RenewDelegationToken());
-      return processMap;
-    }
-
-    public static class OpenSession<I extends AsyncIface> extends org.apache.thrift.AsyncProcessFunction<I, OpenSession_args, TOpenSessionResp> {
-      public OpenSession() {
-        super("OpenSession");
-      }
-
-      public OpenSession_args getEmptyArgsInstance() {
-        return new OpenSession_args();
-      }
-
-      public AsyncMethodCallback<TOpenSessionResp> getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) {
-        final org.apache.thrift.AsyncProcessFunction fcall = this;
-        return new AsyncMethodCallback<TOpenSessionResp>() { 
-          public void onComplete(TOpenSessionResp o) {
-            OpenSession_result result = new OpenSession_result();
-            result.success = o;
-            try {
-              fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid);
-              return;
-            } catch (Exception e) {
-              LOGGER.error("Exception writing to internal frame buffer", e);
-            }
-            fb.close();
-          }
-          public void onError(Exception e) {
-            byte msgType = org.apache.thrift.protocol.TMessageType.REPLY;
-            org.apache.thrift.TBase msg;
-            OpenSession_result result = new OpenSession_result();
-            {
-              msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION;
-              msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage());
-            }
-            try {
-              fcall.sendResponse(fb,msg,msgType,seqid);
-              return;
-            } catch (Exception ex) {
-              LOGGER.error("Exception writing to internal frame buffer", ex);
-            }
-            fb.close();
-          }
-        };
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public void start(I iface, OpenSession_args args, org.apache.thrift.async.AsyncMethodCallback<TOpenSessionResp> resultHandler) throws TException {
-        iface.OpenSession(args.req,resultHandler);
-      }
-    }
-
-    public static class CloseSession<I extends AsyncIface> extends org.apache.thrift.AsyncProcessFunction<I, CloseSession_args, TCloseSessionResp> {
-      public CloseSession() {
-        super("CloseSession");
-      }
-
-      public CloseSession_args getEmptyArgsInstance() {
-        return new CloseSession_args();
-      }
-
-      public AsyncMethodCallback<TCloseSessionResp> getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) {
-        final org.apache.thrift.AsyncProcessFunction fcall = this;
-        return new AsyncMethodCallback<TCloseSessionResp>() { 
-          public void onComplete(TCloseSessionResp o) {
-            CloseSession_result result = new CloseSession_result();
-            result.success = o;
-            try {
-              fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid);
-              return;
-            } catch (Exception e) {
-              LOGGER.error("Exception writing to internal frame buffer", e);
-            }
-            fb.close();
-          }
-          public void onError(Exception e) {
-            byte msgType = org.apache.thrift.protocol.TMessageType.REPLY;
-            org.apache.thrift.TBase msg;
-            CloseSession_result result = new CloseSession_result();
-            {
-              msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION;
-              msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage());
-            }
-            try {
-              fcall.sendResponse(fb,msg,msgType,seqid);
-              return;
-            } catch (Exception ex) {
-              LOGGER.error("Exception writing to internal frame buffer", ex);
-            }
-            fb.close();
-          }
-        };
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public void start(I iface, CloseSession_args args, org.apache.thrift.async.AsyncMethodCallback<TCloseSessionResp> resultHandler) throws TException {
-        iface.CloseSession(args.req,resultHandler);
-      }
-    }
-
-    public static class GetInfo<I extends AsyncIface> extends org.apache.thrift.AsyncProcessFunction<I, GetInfo_args, TGetInfoResp> {
-      public GetInfo() {
-        super("GetInfo");
-      }
-
-      public GetInfo_args getEmptyArgsInstance() {
-        return new GetInfo_args();
-      }
-
-      public AsyncMethodCallback<TGetInfoResp> getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) {
-        final org.apache.thrift.AsyncProcessFunction fcall = this;
-        return new AsyncMethodCallback<TGetInfoResp>() { 
-          public void onComplete(TGetInfoResp o) {
-            GetInfo_result result = new GetInfo_result();
-            result.success = o;
-            try {
-              fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid);
-              return;
-            } catch (Exception e) {
-              LOGGER.error("Exception writing to internal frame buffer", e);
-            }
-            fb.close();
-          }
-          public void onError(Exception e) {
-            byte msgType = org.apache.thrift.protocol.TMessageType.REPLY;
-            org.apache.thrift.TBase msg;
-            GetInfo_result result = new GetInfo_result();
-            {
-              msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION;
-              msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage());
-            }
-            try {
-              fcall.sendResponse(fb,msg,msgType,seqid);
-              return;
-            } catch (Exception ex) {
-              LOGGER.error("Exception writing to internal frame buffer", ex);
-            }
-            fb.close();
-          }
-        };
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public void start(I iface, GetInfo_args args, org.apache.thrift.async.AsyncMethodCallback<TGetInfoResp> resultHandler) throws TException {
-        iface.GetInfo(args.req,resultHandler);
-      }
-    }
-
-    public static class ExecuteStatement<I extends AsyncIface> extends org.apache.thrift.AsyncProcessFunction<I, ExecuteStatement_args, TExecuteStatementResp> {
-      public ExecuteStatement() {
-        super("ExecuteStatement");
-      }
-
-      public ExecuteStatement_args getEmptyArgsInstance() {
-        return new ExecuteStatement_args();
-      }
-
-      public AsyncMethodCallback<TExecuteStatementResp> getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) {
-        final org.apache.thrift.AsyncProcessFunction fcall = this;
-        return new AsyncMethodCallback<TExecuteStatementResp>() { 
-          public void onComplete(TExecuteStatementResp o) {
-            ExecuteStatement_result result = new ExecuteStatement_result();
-            result.success = o;
-            try {
-              fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid);
-              return;
-            } catch (Exception e) {
-              LOGGER.error("Exception writing to internal frame buffer", e);
-            }
-            fb.close();
-          }
-          public void onError(Exception e) {
-            byte msgType = org.apache.thrift.protocol.TMessageType.REPLY;
-            org.apache.thrift.TBase msg;
-            ExecuteStatement_result result = new ExecuteStatement_result();
-            {
-              msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION;
-              msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage());
-            }
-            try {
-              fcall.sendResponse(fb,msg,msgType,seqid);
-              return;
-            } catch (Exception ex) {
-              LOGGER.error("Exception writing to internal frame buffer", ex);
-            }
-            fb.close();
-          }
-        };
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public void start(I iface, ExecuteStatement_args args, org.apache.thrift.async.AsyncMethodCallback<TExecuteStatementResp> resultHandler) throws TException {
-        iface.ExecuteStatement(args.req,resultHandler);
-      }
-    }
-
-    public static class GetTypeInfo<I extends AsyncIface> extends org.apache.thrift.AsyncProcessFunction<I, GetTypeInfo_args, TGetTypeInfoResp> {
-      public GetTypeInfo() {
-        super("GetTypeInfo");
-      }
-
-      public GetTypeInfo_args getEmptyArgsInstance() {
-        return new GetTypeInfo_args();
-      }
-
-      public AsyncMethodCallback<TGetTypeInfoResp> getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) {
-        final org.apache.thrift.AsyncProcessFunction fcall = this;
-        return new AsyncMethodCallback<TGetTypeInfoResp>() { 
-          public void onComplete(TGetTypeInfoResp o) {
-            GetTypeInfo_result result = new GetTypeInfo_result();
-            result.success = o;
-            try {
-              fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid);
-              return;
-            } catch (Exception e) {
-              LOGGER.error("Exception writing to internal frame buffer", e);
-            }
-            fb.close();
-          }
-          public void onError(Exception e) {
-            byte msgType = org.apache.thrift.protocol.TMessageType.REPLY;
-            org.apache.thrift.TBase msg;
-            GetTypeInfo_result result = new GetTypeInfo_result();
-            {
-              msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION;
-              msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage());
-            }
-            try {
-              fcall.sendResponse(fb,msg,msgType,seqid);
-              return;
-            } catch (Exception ex) {
-              LOGGER.error("Exception writing to internal frame buffer", ex);
-            }
-            fb.close();
-          }
-        };
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public void start(I iface, GetTypeInfo_args args, org.apache.thrift.async.AsyncMethodCallback<TGetTypeInfoResp> resultHandler) throws TException {
-        iface.GetTypeInfo(args.req,resultHandler);
-      }
-    }
-
-    public static class GetCatalogs<I extends AsyncIface> extends org.apache.thrift.AsyncProcessFunction<I, GetCatalogs_args, TGetCatalogsResp> {
-      public GetCatalogs() {
-        super("GetCatalogs");
-      }
-
-      public GetCatalogs_args getEmptyArgsInstance() {
-        return new GetCatalogs_args();
-      }
-
-      public AsyncMethodCallback<TGetCatalogsResp> getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) {
-        final org.apache.thrift.AsyncProcessFunction fcall = this;
-        return new AsyncMethodCallback<TGetCatalogsResp>() { 
-          public void onComplete(TGetCatalogsResp o) {
-            GetCatalogs_result result = new GetCatalogs_result();
-            result.success = o;
-            try {
-              fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid);
-              return;
-            } catch (Exception e) {
-              LOGGER.error("Exception writing to internal frame buffer", e);
-            }
-            fb.close();
-          }
-          public void onError(Exception e) {
-            byte msgType = org.apache.thrift.protocol.TMessageType.REPLY;
-            org.apache.thrift.TBase msg;
-            GetCatalogs_result result = new GetCatalogs_result();
-            {
-              msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION;
-              msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage());
-            }
-            try {
-              fcall.sendResponse(fb,msg,msgType,seqid);
-              return;
-            } catch (Exception ex) {
-              LOGGER.error("Exception writing to internal frame buffer", ex);
-            }
-            fb.close();
-          }
-        };
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public void start(I iface, GetCatalogs_args args, org.apache.thrift.async.AsyncMethodCallback<TGetCatalogsResp> resultHandler) throws TException {
-        iface.GetCatalogs(args.req,resultHandler);
-      }
-    }
-
-    public static class GetSchemas<I extends AsyncIface> extends org.apache.thrift.AsyncProcessFunction<I, GetSchemas_args, TGetSchemasResp> {
-      public GetSchemas() {
-        super("GetSchemas");
-      }
-
-      public GetSchemas_args getEmptyArgsInstance() {
-        return new GetSchemas_args();
-      }
-
-      public AsyncMethodCallback<TGetSchemasResp> getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) {
-        final org.apache.thrift.AsyncProcessFunction fcall = this;
-        return new AsyncMethodCallback<TGetSchemasResp>() { 
-          public void onComplete(TGetSchemasResp o) {
-            GetSchemas_result result = new GetSchemas_result();
-            result.success = o;
-            try {
-              fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid);
-              return;
-            } catch (Exception e) {
-              LOGGER.error("Exception writing to internal frame buffer", e);
-            }
-            fb.close();
-          }
-          public void onError(Exception e) {
-            byte msgType = org.apache.thrift.protocol.TMessageType.REPLY;
-            org.apache.thrift.TBase msg;
-            GetSchemas_result result = new GetSchemas_result();
-            {
-              msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION;
-              msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage());
-            }
-            try {
-              fcall.sendResponse(fb,msg,msgType,seqid);
-              return;
-            } catch (Exception ex) {
-              LOGGER.error("Exception writing to internal frame buffer", ex);
-            }
-            fb.close();
-          }
-        };
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public void start(I iface, GetSchemas_args args, org.apache.thrift.async.AsyncMethodCallback<TGetSchemasResp> resultHandler) throws TException {
-        iface.GetSchemas(args.req,resultHandler);
-      }
-    }
-
-    public static class GetTables<I extends AsyncIface> extends org.apache.thrift.AsyncProcessFunction<I, GetTables_args, TGetTablesResp> {
-      public GetTables() {
-        super("GetTables");
-      }
-
-      public GetTables_args getEmptyArgsInstance() {
-        return new GetTables_args();
-      }
-
-      public AsyncMethodCallback<TGetTablesResp> getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) {
-        final org.apache.thrift.AsyncProcessFunction fcall = this;
-        return new AsyncMethodCallback<TGetTablesResp>() { 
-          public void onComplete(TGetTablesResp o) {
-            GetTables_result result = new GetTables_result();
-            result.success = o;
-            try {
-              fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid);
-              return;
-            } catch (Exception e) {
-              LOGGER.error("Exception writing to internal frame buffer", e);
-            }
-            fb.close();
-          }
-          public void onError(Exception e) {
-            byte msgType = org.apache.thrift.protocol.TMessageType.REPLY;
-            org.apache.thrift.TBase msg;
-            GetTables_result result = new GetTables_result();
-            {
-              msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION;
-              msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage());
-            }
-            try {
-              fcall.sendResponse(fb,msg,msgType,seqid);
-              return;
-            } catch (Exception ex) {
-              LOGGER.error("Exception writing to internal frame buffer", ex);
-            }
-            fb.close();
-          }
-        };
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public void start(I iface, GetTables_args args, org.apache.thrift.async.AsyncMethodCallback<TGetTablesResp> resultHandler) throws TException {
-        iface.GetTables(args.req,resultHandler);
-      }
-    }
-
-    public static class GetTableTypes<I extends AsyncIface> extends org.apache.thrift.AsyncProcessFunction<I, GetTableTypes_args, TGetTableTypesResp> {
-      public GetTableTypes() {
-        super("GetTableTypes");
-      }
-
-      public GetTableTypes_args getEmptyArgsInstance() {
-        return new GetTableTypes_args();
-      }
-
-      public AsyncMethodCallback<TGetTableTypesResp> getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) {
-        final org.apache.thrift.AsyncProcessFunction fcall = this;
-        return new AsyncMethodCallback<TGetTableTypesResp>() { 
-          public void onComplete(TGetTableTypesResp o) {
-            GetTableTypes_result result = new GetTableTypes_result();
-            result.success = o;
-            try {
-              fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid);
-              return;
-            } catch (Exception e) {
-              LOGGER.error("Exception writing to internal frame buffer", e);
-            }
-            fb.close();
-          }
-          public void onError(Exception e) {
-            byte msgType = org.apache.thrift.protocol.TMessageType.REPLY;
-            org.apache.thrift.TBase msg;
-            GetTableTypes_result result = new GetTableTypes_result();
-            {
-              msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION;
-              msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage());
-            }
-            try {
-              fcall.sendResponse(fb,msg,msgType,seqid);
-              return;
-            } catch (Exception ex) {
-              LOGGER.error("Exception writing to internal frame buffer", ex);
-            }
-            fb.close();
-          }
-        };
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public void start(I iface, GetTableTypes_args args, org.apache.thrift.async.AsyncMethodCallback<TGetTableTypesResp> resultHandler) throws TException {
-        iface.GetTableTypes(args.req,resultHandler);
-      }
-    }
-
-    public static class GetColumns<I extends AsyncIface> extends org.apache.thrift.AsyncProcessFunction<I, GetColumns_args, TGetColumnsResp> {
-      public GetColumns() {
-        super("GetColumns");
-      }
-
-      public GetColumns_args getEmptyArgsInstance() {
-        return new GetColumns_args();
-      }
-
-      public AsyncMethodCallback<TGetColumnsResp> getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) {
-        final org.apache.thrift.AsyncProcessFunction fcall = this;
-        return new AsyncMethodCallback<TGetColumnsResp>() { 
-          public void onComplete(TGetColumnsResp o) {
-            GetColumns_result result = new GetColumns_result();
-            result.success = o;
-            try {
-              fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid);
-              return;
-            } catch (Exception e) {
-              LOGGER.error("Exception writing to internal frame buffer", e);
-            }
-            fb.close();
-          }
-          public void onError(Exception e) {
-            byte msgType = org.apache.thrift.protocol.TMessageType.REPLY;
-            org.apache.thrift.TBase msg;
-            GetColumns_result result = new GetColumns_result();
-            {
-              msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION;
-              msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage());
-            }
-            try {
-              fcall.sendResponse(fb,msg,msgType,seqid);
-              return;
-            } catch (Exception ex) {
-              LOGGER.error("Exception writing to internal frame buffer", ex);
-            }
-            fb.close();
-          }
-        };
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public void start(I iface, GetColumns_args args, org.apache.thrift.async.AsyncMethodCallback<TGetColumnsResp> resultHandler) throws TException {
-        iface.GetColumns(args.req,resultHandler);
-      }
-    }
-
-    public static class GetFunctions<I extends AsyncIface> extends org.apache.thrift.AsyncProcessFunction<I, GetFunctions_args, TGetFunctionsResp> {
-      public GetFunctions() {
-        super("GetFunctions");
-      }
-
-      public GetFunctions_args getEmptyArgsInstance() {
-        return new GetFunctions_args();
-      }
-
-      public AsyncMethodCallback<TGetFunctionsResp> getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) {
-        final org.apache.thrift.AsyncProcessFunction fcall = this;
-        return new AsyncMethodCallback<TGetFunctionsResp>() { 
-          public void onComplete(TGetFunctionsResp o) {
-            GetFunctions_result result = new GetFunctions_result();
-            result.success = o;
-            try {
-              fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid);
-              return;
-            } catch (Exception e) {
-              LOGGER.error("Exception writing to internal frame buffer", e);
-            }
-            fb.close();
-          }
-          public void onError(Exception e) {
-            byte msgType = org.apache.thrift.protocol.TMessageType.REPLY;
-            org.apache.thrift.TBase msg;
-            GetFunctions_result result = new GetFunctions_result();
-            {
-              msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION;
-              msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage());
-            }
-            try {
-              fcall.sendResponse(fb,msg,msgType,seqid);
-              return;
-            } catch (Exception ex) {
-              LOGGER.error("Exception writing to internal frame buffer", ex);
-            }
-            fb.close();
-          }
-        };
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public void start(I iface, GetFunctions_args args, org.apache.thrift.async.AsyncMethodCallback<TGetFunctionsResp> resultHandler) throws TException {
-        iface.GetFunctions(args.req,resultHandler);
-      }
-    }
-
-    public static class GetPrimaryKeys<I extends AsyncIface> extends org.apache.thrift.AsyncProcessFunction<I, GetPrimaryKeys_args, TGetPrimaryKeysResp> {
-      public GetPrimaryKeys() {
-        super("GetPrimaryKeys");
-      }
-
-      public GetPrimaryKeys_args getEmptyArgsInstance() {
-        return new GetPrimaryKeys_args();
-      }
-
-      public AsyncMethodCallback<TGetPrimaryKeysResp> getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) {
-        final org.apache.thrift.AsyncProcessFunction fcall = this;
-        return new AsyncMethodCallback<TGetPrimaryKeysResp>() { 
-          public void onComplete(TGetPrimaryKeysResp o) {
-            GetPrimaryKeys_result result = new GetPrimaryKeys_result();
-            result.success = o;
-            try {
-              fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid);
-              return;
-            } catch (Exception e) {
-              LOGGER.error("Exception writing to internal frame buffer", e);
-            }
-            fb.close();
-          }
-          public void onError(Exception e) {
-            byte msgType = org.apache.thrift.protocol.TMessageType.REPLY;
-            org.apache.thrift.TBase msg;
-            GetPrimaryKeys_result result = new GetPrimaryKeys_result();
-            {
-              msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION;
-              msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage());
-            }
-            try {
-              fcall.sendResponse(fb,msg,msgType,seqid);
-              return;
-            } catch (Exception ex) {
-              LOGGER.error("Exception writing to internal frame buffer", ex);
-            }
-            fb.close();
-          }
-        };
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public void start(I iface, GetPrimaryKeys_args args, org.apache.thrift.async.AsyncMethodCallback<TGetPrimaryKeysResp> resultHandler) throws TException {
-        iface.GetPrimaryKeys(args.req,resultHandler);
-      }
-    }
-
-    public static class GetCrossReference<I extends AsyncIface> extends org.apache.thrift.AsyncProcessFunction<I, GetCrossReference_args, TGetCrossReferenceResp> {
-      public GetCrossReference() {
-        super("GetCrossReference");
-      }
-
-      public GetCrossReference_args getEmptyArgsInstance() {
-        return new GetCrossReference_args();
-      }
-
-      public AsyncMethodCallback<TGetCrossReferenceResp> getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) {
-        final org.apache.thrift.AsyncProcessFunction fcall = this;
-        return new AsyncMethodCallback<TGetCrossReferenceResp>() { 
-          public void onComplete(TGetCrossReferenceResp o) {
-            GetCrossReference_result result = new GetCrossReference_result();
-            result.success = o;
-            try {
-              fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid);
-              return;
-            } catch (Exception e) {
-              LOGGER.error("Exception writing to internal frame buffer", e);
-            }
-            fb.close();
-          }
-          public void onError(Exception e) {
-            byte msgType = org.apache.thrift.protocol.TMessageType.REPLY;
-            org.apache.thrift.TBase msg;
-            GetCrossReference_result result = new GetCrossReference_result();
-            {
-              msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION;
-              msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage());
-            }
-            try {
-              fcall.sendResponse(fb,msg,msgType,seqid);
-              return;
-            } catch (Exception ex) {
-              LOGGER.error("Exception writing to internal frame buffer", ex);
-            }
-            fb.close();
-          }
-        };
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public void start(I iface, GetCrossReference_args args, org.apache.thrift.async.AsyncMethodCallback<TGetCrossReferenceResp> resultHandler) throws TException {
-        iface.GetCrossReference(args.req,resultHandler);
-      }
-    }
-
-    public static class GetOperationStatus<I extends AsyncIface> extends org.apache.thrift.AsyncProcessFunction<I, GetOperationStatus_args, TGetOperationStatusResp> {
-      public GetOperationStatus() {
-        super("GetOperationStatus");
-      }
-
-      public GetOperationStatus_args getEmptyArgsInstance() {
-        return new GetOperationStatus_args();
-      }
-
-      public AsyncMethodCallback<TGetOperationStatusResp> getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) {
-        final org.apache.thrift.AsyncProcessFunction fcall = this;
-        return new AsyncMethodCallback<TGetOperationStatusResp>() { 
-          public void onComplete(TGetOperationStatusResp o) {
-            GetOperationStatus_result result = new GetOperationStatus_result();
-            result.success = o;
-            try {
-              fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid);
-              return;
-            } catch (Exception e) {
-              LOGGER.error("Exception writing to internal frame buffer", e);
-            }
-            fb.close();
-          }
-          public void onError(Exception e) {
-            byte msgType = org.apache.thrift.protocol.TMessageType.REPLY;
-            org.apache.thrift.TBase msg;
-            GetOperationStatus_result result = new GetOperationStatus_result();
-            {
-              msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION;
-              msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage());
-            }
-            try {
-              fcall.sendResponse(fb,msg,msgType,seqid);
-              return;
-            } catch (Exception ex) {
-              LOGGER.error("Exception writing to internal frame buffer", ex);
-            }
-            fb.close();
-          }
-        };
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public void start(I iface, GetOperationStatus_args args, org.apache.thrift.async.AsyncMethodCallback<TGetOperationStatusResp> resultHandler) throws TException {
-        iface.GetOperationStatus(args.req,resultHandler);
-      }
-    }
-
-    public static class CancelOperation<I extends AsyncIface> extends org.apache.thrift.AsyncProcessFunction<I, CancelOperation_args, TCancelOperationResp> {
-      public CancelOperation() {
-        super("CancelOperation");
-      }
-
-      public CancelOperation_args getEmptyArgsInstance() {
-        return new CancelOperation_args();
-      }
-
-      public AsyncMethodCallback<TCancelOperationResp> getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) {
-        final org.apache.thrift.AsyncProcessFunction fcall = this;
-        return new AsyncMethodCallback<TCancelOperationResp>() { 
-          public void onComplete(TCancelOperationResp o) {
-            CancelOperation_result result = new CancelOperation_result();
-            result.success = o;
-            try {
-              fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid);
-              return;
-            } catch (Exception e) {
-              LOGGER.error("Exception writing to internal frame buffer", e);
-            }
-            fb.close();
-          }
-          public void onError(Exception e) {
-            byte msgType = org.apache.thrift.protocol.TMessageType.REPLY;
-            org.apache.thrift.TBase msg;
-            CancelOperation_result result = new CancelOperation_result();
-            {
-              msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION;
-              msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage());
-            }
-            try {
-              fcall.sendResponse(fb,msg,msgType,seqid);
-              return;
-            } catch (Exception ex) {
-              LOGGER.error("Exception writing to internal frame buffer", ex);
-            }
-            fb.close();
-          }
-        };
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public void start(I iface, CancelOperation_args args, org.apache.thrift.async.AsyncMethodCallback<TCancelOperationResp> resultHandler) throws TException {
-        iface.CancelOperation(args.req,resultHandler);
-      }
-    }
-
-    public static class CloseOperation<I extends AsyncIface> extends org.apache.thrift.AsyncProcessFunction<I, CloseOperation_args, TCloseOperationResp> {
-      public CloseOperation() {
-        super("CloseOperation");
-      }
-
-      public CloseOperation_args getEmptyArgsInstance() {
-        return new CloseOperation_args();
-      }
-
-      public AsyncMethodCallback<TCloseOperationResp> getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) {
-        final org.apache.thrift.AsyncProcessFunction fcall = this;
-        return new AsyncMethodCallback<TCloseOperationResp>() { 
-          public void onComplete(TCloseOperationResp o) {
-            CloseOperation_result result = new CloseOperation_result();
-            result.success = o;
-            try {
-              fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid);
-              return;
-            } catch (Exception e) {
-              LOGGER.error("Exception writing to internal frame buffer", e);
-            }
-            fb.close();
-          }
-          public void onError(Exception e) {
-            byte msgType = org.apache.thrift.protocol.TMessageType.REPLY;
-            org.apache.thrift.TBase msg;
-            CloseOperation_result result = new CloseOperation_result();
-            {
-              msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION;
-              msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage());
-            }
-            try {
-              fcall.sendResponse(fb,msg,msgType,seqid);
-              return;
-            } catch (Exception ex) {
-              LOGGER.error("Exception writing to internal frame buffer", ex);
-            }
-            fb.close();
-          }
-        };
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public void start(I iface, CloseOperation_args args, org.apache.thrift.async.AsyncMethodCallback<TCloseOperationResp> resultHandler) throws TException {
-        iface.CloseOperation(args.req,resultHandler);
-      }
-    }
-
-    public static class GetResultSetMetadata<I extends AsyncIface> extends org.apache.thrift.AsyncProcessFunction<I, GetResultSetMetadata_args, TGetResultSetMetadataResp> {
-      public GetResultSetMetadata() {
-        super("GetResultSetMetadata");
-      }
-
-      public GetResultSetMetadata_args getEmptyArgsInstance() {
-        return new GetResultSetMetadata_args();
-      }
-
-      public AsyncMethodCallback<TGetResultSetMetadataResp> getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) {
-        final org.apache.thrift.AsyncProcessFunction fcall = this;
-        return new AsyncMethodCallback<TGetResultSetMetadataResp>() { 
-          public void onComplete(TGetResultSetMetadataResp o) {
-            GetResultSetMetadata_result result = new GetResultSetMetadata_result();
-            result.success = o;
-            try {
-              fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid);
-              return;
-            } catch (Exception e) {
-              LOGGER.error("Exception writing to internal frame buffer", e);
-            }
-            fb.close();
-          }
-          public void onError(Exception e) {
-            byte msgType = org.apache.thrift.protocol.TMessageType.REPLY;
-            org.apache.thrift.TBase msg;
-            GetResultSetMetadata_result result = new GetResultSetMetadata_result();
-            {
-              msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION;
-              msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage());
-            }
-            try {
-              fcall.sendResponse(fb,msg,msgType,seqid);
-              return;
-            } catch (Exception ex) {
-              LOGGER.error("Exception writing to internal frame buffer", ex);
-            }
-            fb.close();
-          }
-        };
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public void start(I iface, GetResultSetMetadata_args args, org.apache.thrift.async.AsyncMethodCallback<TGetResultSetMetadataResp> resultHandler) throws TException {
-        iface.GetResultSetMetadata(args.req,resultHandler);
-      }
-    }
-
-    public static class FetchResults<I extends AsyncIface> extends org.apache.thrift.AsyncProcessFunction<I, FetchResults_args, TFetchResultsResp> {
-      public FetchResults() {
-        super("FetchResults");
-      }
-
-      public FetchResults_args getEmptyArgsInstance() {
-        return new FetchResults_args();
-      }
-
-      public AsyncMethodCallback<TFetchResultsResp> getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) {
-        final org.apache.thrift.AsyncProcessFunction fcall = this;
-        return new AsyncMethodCallback<TFetchResultsResp>() { 
-          public void onComplete(TFetchResultsResp o) {
-            FetchResults_result result = new FetchResults_result();
-            result.success = o;
-            try {
-              fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid);
-              return;
-            } catch (Exception e) {
-              LOGGER.error("Exception writing to internal frame buffer", e);
-            }
-            fb.close();
-          }
-          public void onError(Exception e) {
-            byte msgType = org.apache.thrift.protocol.TMessageType.REPLY;
-            org.apache.thrift.TBase msg;
-            FetchResults_result result = new FetchResults_result();
-            {
-              msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION;
-              msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage());
-            }
-            try {
-              fcall.sendResponse(fb,msg,msgType,seqid);
-              return;
-            } catch (Exception ex) {
-              LOGGER.error("Exception writing to internal frame buffer", ex);
-            }
-            fb.close();
-          }
-        };
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public void start(I iface, FetchResults_args args, org.apache.thrift.async.AsyncMethodCallback<TFetchResultsResp> resultHandler) throws TException {
-        iface.FetchResults(args.req,resultHandler);
-      }
-    }
-
-    public static class GetDelegationToken<I extends AsyncIface> extends org.apache.thrift.AsyncProcessFunction<I, GetDelegationToken_args, TGetDelegationTokenResp> {
-      public GetDelegationToken() {
-        super("GetDelegationToken");
-      }
-
-      public GetDelegationToken_args getEmptyArgsInstance() {
-        return new GetDelegationToken_args();
-      }
-
-      public AsyncMethodCallback<TGetDelegationTokenResp> getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) {
-        final org.apache.thrift.AsyncProcessFunction fcall = this;
-        return new AsyncMethodCallback<TGetDelegationTokenResp>() { 
-          public void onComplete(TGetDelegationTokenResp o) {
-            GetDelegationToken_result result = new GetDelegationToken_result();
-            result.success = o;
-            try {
-              fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid);
-              return;
-            } catch (Exception e) {
-              LOGGER.error("Exception writing to internal frame buffer", e);
-            }
-            fb.close();
-          }
-          public void onError(Exception e) {
-            byte msgType = org.apache.thrift.protocol.TMessageType.REPLY;
-            org.apache.thrift.TBase msg;
-            GetDelegationToken_result result = new GetDelegationToken_result();
-            {
-              msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION;
-              msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage());
-            }
-            try {
-              fcall.sendResponse(fb,msg,msgType,seqid);
-              return;
-            } catch (Exception ex) {
-              LOGGER.error("Exception writing to internal frame buffer", ex);
-            }
-            fb.close();
-          }
-        };
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public void start(I iface, GetDelegationToken_args args, org.apache.thrift.async.AsyncMethodCallback<TGetDelegationTokenResp> resultHandler) throws TException {
-        iface.GetDelegationToken(args.req,resultHandler);
-      }
-    }
-
-    public static class CancelDelegationToken<I extends AsyncIface> extends org.apache.thrift.AsyncProcessFunction<I, CancelDelegationToken_args, TCancelDelegationTokenResp> {
-      public CancelDelegationToken() {
-        super("CancelDelegationToken");
-      }
-
-      public CancelDelegationToken_args getEmptyArgsInstance() {
-        return new CancelDelegationToken_args();
-      }
-
-      public AsyncMethodCallback<TCancelDelegationTokenResp> getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) {
-        final org.apache.thrift.AsyncProcessFunction fcall = this;
-        return new AsyncMethodCallback<TCancelDelegationTokenResp>() { 
-          public void onComplete(TCancelDelegationTokenResp o) {
-            CancelDelegationToken_result result = new CancelDelegationToken_result();
-            result.success = o;
-            try {
-              fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid);
-              return;
-            } catch (Exception e) {
-              LOGGER.error("Exception writing to internal frame buffer", e);
-            }
-            fb.close();
-          }
-          public void onError(Exception e) {
-            byte msgType = org.apache.thrift.protocol.TMessageType.REPLY;
-            org.apache.thrift.TBase msg;
-            CancelDelegationToken_result result = new CancelDelegationToken_result();
-            {
-              msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION;
-              msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage());
-            }
-            try {
-              fcall.sendResponse(fb,msg,msgType,seqid);
-              return;
-            } catch (Exception ex) {
-              LOGGER.error("Exception writing to internal frame buffer", ex);
-            }
-            fb.close();
-          }
-        };
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public void start(I iface, CancelDelegationToken_args args, org.apache.thrift.async.AsyncMethodCallback<TCancelDelegationTokenResp> resultHandler) throws TException {
-        iface.CancelDelegationToken(args.req,resultHandler);
-      }
-    }
-
-    public static class RenewDelegationToken<I extends AsyncIface> extends org.apache.thrift.AsyncProcessFunction<I, RenewDelegationToken_args, TRenewDelegationTokenResp> {
-      public RenewDelegationToken() {
-        super("RenewDelegationToken");
-      }
-
-      public RenewDelegationToken_args getEmptyArgsInstance() {
-        return new RenewDelegationToken_args();
-      }
-
-      public AsyncMethodCallback<TRenewDelegationTokenResp> getResultHandler(final AbstractNonblockingServer.AsyncFrameBuffer fb, final int seqid) {
-        final org.apache.thrift.AsyncProcessFunction fcall = this;
-        return new AsyncMethodCallback<TRenewDelegationTokenResp>() { 
-          public void onComplete(TRenewDelegationTokenResp o) {
-            RenewDelegationToken_result result = new RenewDelegationToken_result();
-            result.success = o;
-            try {
-              fcall.sendResponse(fb,result, org.apache.thrift.protocol.TMessageType.REPLY,seqid);
-              return;
-            } catch (Exception e) {
-              LOGGER.error("Exception writing to internal frame buffer", e);
-            }
-            fb.close();
-          }
-          public void onError(Exception e) {
-            byte msgType = org.apache.thrift.protocol.TMessageType.REPLY;
-            org.apache.thrift.TBase msg;
-            RenewDelegationToken_result result = new RenewDelegationToken_result();
-            {
-              msgType = org.apache.thrift.protocol.TMessageType.EXCEPTION;
-              msg = (org.apache.thrift.TBase)new org.apache.thrift.TApplicationException(org.apache.thrift.TApplicationException.INTERNAL_ERROR, e.getMessage());
-            }
-            try {
-              fcall.sendResponse(fb,msg,msgType,seqid);
-              return;
-            } catch (Exception ex) {
-              LOGGER.error("Exception writing to internal frame buffer", ex);
-            }
-            fb.close();
-          }
-        };
-      }
-
-      protected boolean isOneway() {
-        return false;
-      }
-
-      public void start(I iface, RenewDelegationToken_args args, org.apache.thrift.async.AsyncMethodCallback<TRenewDelegationTokenResp> resultHandler) throws TException {
-        iface.RenewDelegationToken(args.req,resultHandler);
-      }
-    }
-
-  }
-
-  public static class OpenSession_args implements org.apache.thrift.TBase<OpenSession_args, OpenSession_args._Fields>, java.io.Serializable, Cloneable, Comparable<OpenSession_args>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("OpenSession_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new OpenSession_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new OpenSession_argsTupleSchemeFactory());
-    }
-
-    private TOpenSessionReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOpenSessionReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(OpenSession_args.class, metaDataMap);
-    }
-
-    public OpenSession_args() {
-    }
-
-    public OpenSession_args(
-      TOpenSessionReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public OpenSession_args(OpenSession_args other) {
-      if (other.isSetReq()) {
-        this.req = new TOpenSessionReq(other.req);
-      }
-    }
-
-    public OpenSession_args deepCopy() {
-      return new OpenSession_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TOpenSessionReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TOpenSessionReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TOpenSessionReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof OpenSession_args)
-        return this.equals((OpenSession_args)that);
-      return false;
-    }
-
-    public boolean equals(OpenSession_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_req = true && (isSetReq());
-      list.add(present_req);
-      if (present_req)
-        list.add(req);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(OpenSession_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("OpenSession_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class OpenSession_argsStandardSchemeFactory implements SchemeFactory {
-      public OpenSession_argsStandardScheme getScheme() {
-        return new OpenSession_argsStandardScheme();
-      }
-    }
-
-    private static class OpenSession_argsStandardScheme extends StandardScheme<OpenSession_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, OpenSession_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TOpenSessionReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, OpenSession_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class OpenSession_argsTupleSchemeFactory implements SchemeFactory {
-      public OpenSession_argsTupleScheme getScheme() {
-        return new OpenSession_argsTupleScheme();
-      }
-    }
-
-    private static class OpenSession_argsTupleScheme extends TupleScheme<OpenSession_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, OpenSession_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, OpenSession_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TOpenSessionReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class OpenSession_result implements org.apache.thrift.TBase<OpenSession_result, OpenSession_result._Fields>, java.io.Serializable, Cloneable, Comparable<OpenSession_result>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("OpenSession_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new OpenSession_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new OpenSession_resultTupleSchemeFactory());
-    }
-
-    private TOpenSessionResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOpenSessionResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(OpenSession_result.class, metaDataMap);
-    }
-
-    public OpenSession_result() {
-    }
-
-    public OpenSession_result(
-      TOpenSessionResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public OpenSession_result(OpenSession_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TOpenSessionResp(other.success);
-      }
-    }
-
-    public OpenSession_result deepCopy() {
-      return new OpenSession_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TOpenSessionResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TOpenSessionResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TOpenSessionResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof OpenSession_result)
-        return this.equals((OpenSession_result)that);
-      return false;
-    }
-
-    public boolean equals(OpenSession_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_success = true && (isSetSuccess());
-      list.add(present_success);
-      if (present_success)
-        list.add(success);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(OpenSession_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("OpenSession_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class OpenSession_resultStandardSchemeFactory implements SchemeFactory {
-      public OpenSession_resultStandardScheme getScheme() {
-        return new OpenSession_resultStandardScheme();
-      }
-    }
-
-    private static class OpenSession_resultStandardScheme extends StandardScheme<OpenSession_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, OpenSession_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TOpenSessionResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, OpenSession_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class OpenSession_resultTupleSchemeFactory implements SchemeFactory {
-      public OpenSession_resultTupleScheme getScheme() {
-        return new OpenSession_resultTupleScheme();
-      }
-    }
-
-    private static class OpenSession_resultTupleScheme extends TupleScheme<OpenSession_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, OpenSession_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, OpenSession_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TOpenSessionResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class CloseSession_args implements org.apache.thrift.TBase<CloseSession_args, CloseSession_args._Fields>, java.io.Serializable, Cloneable, Comparable<CloseSession_args>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CloseSession_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new CloseSession_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new CloseSession_argsTupleSchemeFactory());
-    }
-
-    private TCloseSessionReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCloseSessionReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CloseSession_args.class, metaDataMap);
-    }
-
-    public CloseSession_args() {
-    }
-
-    public CloseSession_args(
-      TCloseSessionReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public CloseSession_args(CloseSession_args other) {
-      if (other.isSetReq()) {
-        this.req = new TCloseSessionReq(other.req);
-      }
-    }
-
-    public CloseSession_args deepCopy() {
-      return new CloseSession_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TCloseSessionReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TCloseSessionReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TCloseSessionReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof CloseSession_args)
-        return this.equals((CloseSession_args)that);
-      return false;
-    }
-
-    public boolean equals(CloseSession_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_req = true && (isSetReq());
-      list.add(present_req);
-      if (present_req)
-        list.add(req);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(CloseSession_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("CloseSession_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class CloseSession_argsStandardSchemeFactory implements SchemeFactory {
-      public CloseSession_argsStandardScheme getScheme() {
-        return new CloseSession_argsStandardScheme();
-      }
-    }
-
-    private static class CloseSession_argsStandardScheme extends StandardScheme<CloseSession_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, CloseSession_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TCloseSessionReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, CloseSession_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class CloseSession_argsTupleSchemeFactory implements SchemeFactory {
-      public CloseSession_argsTupleScheme getScheme() {
-        return new CloseSession_argsTupleScheme();
-      }
-    }
-
-    private static class CloseSession_argsTupleScheme extends TupleScheme<CloseSession_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, CloseSession_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, CloseSession_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TCloseSessionReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class CloseSession_result implements org.apache.thrift.TBase<CloseSession_result, CloseSession_result._Fields>, java.io.Serializable, Cloneable, Comparable<CloseSession_result>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CloseSession_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new CloseSession_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new CloseSession_resultTupleSchemeFactory());
-    }
-
-    private TCloseSessionResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCloseSessionResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CloseSession_result.class, metaDataMap);
-    }
-
-    public CloseSession_result() {
-    }
-
-    public CloseSession_result(
-      TCloseSessionResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public CloseSession_result(CloseSession_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TCloseSessionResp(other.success);
-      }
-    }
-
-    public CloseSession_result deepCopy() {
-      return new CloseSession_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TCloseSessionResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TCloseSessionResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TCloseSessionResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof CloseSession_result)
-        return this.equals((CloseSession_result)that);
-      return false;
-    }
-
-    public boolean equals(CloseSession_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_success = true && (isSetSuccess());
-      list.add(present_success);
-      if (present_success)
-        list.add(success);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(CloseSession_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("CloseSession_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class CloseSession_resultStandardSchemeFactory implements SchemeFactory {
-      public CloseSession_resultStandardScheme getScheme() {
-        return new CloseSession_resultStandardScheme();
-      }
-    }
-
-    private static class CloseSession_resultStandardScheme extends StandardScheme<CloseSession_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, CloseSession_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TCloseSessionResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, CloseSession_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class CloseSession_resultTupleSchemeFactory implements SchemeFactory {
-      public CloseSession_resultTupleScheme getScheme() {
-        return new CloseSession_resultTupleScheme();
-      }
-    }
-
-    private static class CloseSession_resultTupleScheme extends TupleScheme<CloseSession_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, CloseSession_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, CloseSession_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TCloseSessionResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetInfo_args implements org.apache.thrift.TBase<GetInfo_args, GetInfo_args._Fields>, java.io.Serializable, Cloneable, Comparable<GetInfo_args>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetInfo_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetInfo_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetInfo_argsTupleSchemeFactory());
-    }
-
-    private TGetInfoReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetInfoReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetInfo_args.class, metaDataMap);
-    }
-
-    public GetInfo_args() {
-    }
-
-    public GetInfo_args(
-      TGetInfoReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetInfo_args(GetInfo_args other) {
-      if (other.isSetReq()) {
-        this.req = new TGetInfoReq(other.req);
-      }
-    }
-
-    public GetInfo_args deepCopy() {
-      return new GetInfo_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TGetInfoReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TGetInfoReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TGetInfoReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetInfo_args)
-        return this.equals((GetInfo_args)that);
-      return false;
-    }
-
-    public boolean equals(GetInfo_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_req = true && (isSetReq());
-      list.add(present_req);
-      if (present_req)
-        list.add(req);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(GetInfo_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetInfo_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetInfo_argsStandardSchemeFactory implements SchemeFactory {
-      public GetInfo_argsStandardScheme getScheme() {
-        return new GetInfo_argsStandardScheme();
-      }
-    }
-
-    private static class GetInfo_argsStandardScheme extends StandardScheme<GetInfo_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetInfo_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TGetInfoReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetInfo_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetInfo_argsTupleSchemeFactory implements SchemeFactory {
-      public GetInfo_argsTupleScheme getScheme() {
-        return new GetInfo_argsTupleScheme();
-      }
-    }
-
-    private static class GetInfo_argsTupleScheme extends TupleScheme<GetInfo_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetInfo_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetInfo_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TGetInfoReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetInfo_result implements org.apache.thrift.TBase<GetInfo_result, GetInfo_result._Fields>, java.io.Serializable, Cloneable, Comparable<GetInfo_result>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetInfo_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetInfo_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetInfo_resultTupleSchemeFactory());
-    }
-
-    private TGetInfoResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetInfoResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetInfo_result.class, metaDataMap);
-    }
-
-    public GetInfo_result() {
-    }
-
-    public GetInfo_result(
-      TGetInfoResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetInfo_result(GetInfo_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TGetInfoResp(other.success);
-      }
-    }
-
-    public GetInfo_result deepCopy() {
-      return new GetInfo_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TGetInfoResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TGetInfoResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TGetInfoResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetInfo_result)
-        return this.equals((GetInfo_result)that);
-      return false;
-    }
-
-    public boolean equals(GetInfo_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_success = true && (isSetSuccess());
-      list.add(present_success);
-      if (present_success)
-        list.add(success);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(GetInfo_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetInfo_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetInfo_resultStandardSchemeFactory implements SchemeFactory {
-      public GetInfo_resultStandardScheme getScheme() {
-        return new GetInfo_resultStandardScheme();
-      }
-    }
-
-    private static class GetInfo_resultStandardScheme extends StandardScheme<GetInfo_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetInfo_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TGetInfoResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetInfo_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetInfo_resultTupleSchemeFactory implements SchemeFactory {
-      public GetInfo_resultTupleScheme getScheme() {
-        return new GetInfo_resultTupleScheme();
-      }
-    }
-
-    private static class GetInfo_resultTupleScheme extends TupleScheme<GetInfo_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetInfo_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetInfo_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TGetInfoResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class ExecuteStatement_args implements org.apache.thrift.TBase<ExecuteStatement_args, ExecuteStatement_args._Fields>, java.io.Serializable, Cloneable, Comparable<ExecuteStatement_args>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("ExecuteStatement_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new ExecuteStatement_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new ExecuteStatement_argsTupleSchemeFactory());
-    }
-
-    private TExecuteStatementReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TExecuteStatementReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(ExecuteStatement_args.class, metaDataMap);
-    }
-
-    public ExecuteStatement_args() {
-    }
-
-    public ExecuteStatement_args(
-      TExecuteStatementReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public ExecuteStatement_args(ExecuteStatement_args other) {
-      if (other.isSetReq()) {
-        this.req = new TExecuteStatementReq(other.req);
-      }
-    }
-
-    public ExecuteStatement_args deepCopy() {
-      return new ExecuteStatement_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TExecuteStatementReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TExecuteStatementReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TExecuteStatementReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof ExecuteStatement_args)
-        return this.equals((ExecuteStatement_args)that);
-      return false;
-    }
-
-    public boolean equals(ExecuteStatement_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_req = true && (isSetReq());
-      list.add(present_req);
-      if (present_req)
-        list.add(req);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(ExecuteStatement_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("ExecuteStatement_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class ExecuteStatement_argsStandardSchemeFactory implements SchemeFactory {
-      public ExecuteStatement_argsStandardScheme getScheme() {
-        return new ExecuteStatement_argsStandardScheme();
-      }
-    }
-
-    private static class ExecuteStatement_argsStandardScheme extends StandardScheme<ExecuteStatement_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, ExecuteStatement_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TExecuteStatementReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, ExecuteStatement_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class ExecuteStatement_argsTupleSchemeFactory implements SchemeFactory {
-      public ExecuteStatement_argsTupleScheme getScheme() {
-        return new ExecuteStatement_argsTupleScheme();
-      }
-    }
-
-    private static class ExecuteStatement_argsTupleScheme extends TupleScheme<ExecuteStatement_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, ExecuteStatement_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, ExecuteStatement_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TExecuteStatementReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class ExecuteStatement_result implements org.apache.thrift.TBase<ExecuteStatement_result, ExecuteStatement_result._Fields>, java.io.Serializable, Cloneable, Comparable<ExecuteStatement_result>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("ExecuteStatement_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new ExecuteStatement_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new ExecuteStatement_resultTupleSchemeFactory());
-    }
-
-    private TExecuteStatementResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TExecuteStatementResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(ExecuteStatement_result.class, metaDataMap);
-    }
-
-    public ExecuteStatement_result() {
-    }
-
-    public ExecuteStatement_result(
-      TExecuteStatementResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public ExecuteStatement_result(ExecuteStatement_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TExecuteStatementResp(other.success);
-      }
-    }
-
-    public ExecuteStatement_result deepCopy() {
-      return new ExecuteStatement_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TExecuteStatementResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TExecuteStatementResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TExecuteStatementResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof ExecuteStatement_result)
-        return this.equals((ExecuteStatement_result)that);
-      return false;
-    }
-
-    public boolean equals(ExecuteStatement_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_success = true && (isSetSuccess());
-      list.add(present_success);
-      if (present_success)
-        list.add(success);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(ExecuteStatement_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("ExecuteStatement_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class ExecuteStatement_resultStandardSchemeFactory implements SchemeFactory {
-      public ExecuteStatement_resultStandardScheme getScheme() {
-        return new ExecuteStatement_resultStandardScheme();
-      }
-    }
-
-    private static class ExecuteStatement_resultStandardScheme extends StandardScheme<ExecuteStatement_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, ExecuteStatement_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TExecuteStatementResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, ExecuteStatement_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class ExecuteStatement_resultTupleSchemeFactory implements SchemeFactory {
-      public ExecuteStatement_resultTupleScheme getScheme() {
-        return new ExecuteStatement_resultTupleScheme();
-      }
-    }
-
-    private static class ExecuteStatement_resultTupleScheme extends TupleScheme<ExecuteStatement_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, ExecuteStatement_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, ExecuteStatement_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TExecuteStatementResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetTypeInfo_args implements org.apache.thrift.TBase<GetTypeInfo_args, GetTypeInfo_args._Fields>, java.io.Serializable, Cloneable, Comparable<GetTypeInfo_args>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTypeInfo_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetTypeInfo_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetTypeInfo_argsTupleSchemeFactory());
-    }
-
-    private TGetTypeInfoReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTypeInfoReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTypeInfo_args.class, metaDataMap);
-    }
-
-    public GetTypeInfo_args() {
-    }
-
-    public GetTypeInfo_args(
-      TGetTypeInfoReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetTypeInfo_args(GetTypeInfo_args other) {
-      if (other.isSetReq()) {
-        this.req = new TGetTypeInfoReq(other.req);
-      }
-    }
-
-    public GetTypeInfo_args deepCopy() {
-      return new GetTypeInfo_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TGetTypeInfoReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TGetTypeInfoReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TGetTypeInfoReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetTypeInfo_args)
-        return this.equals((GetTypeInfo_args)that);
-      return false;
-    }
-
-    public boolean equals(GetTypeInfo_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_req = true && (isSetReq());
-      list.add(present_req);
-      if (present_req)
-        list.add(req);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(GetTypeInfo_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetTypeInfo_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetTypeInfo_argsStandardSchemeFactory implements SchemeFactory {
-      public GetTypeInfo_argsStandardScheme getScheme() {
-        return new GetTypeInfo_argsStandardScheme();
-      }
-    }
-
-    private static class GetTypeInfo_argsStandardScheme extends StandardScheme<GetTypeInfo_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetTypeInfo_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TGetTypeInfoReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetTypeInfo_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetTypeInfo_argsTupleSchemeFactory implements SchemeFactory {
-      public GetTypeInfo_argsTupleScheme getScheme() {
-        return new GetTypeInfo_argsTupleScheme();
-      }
-    }
-
-    private static class GetTypeInfo_argsTupleScheme extends TupleScheme<GetTypeInfo_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetTypeInfo_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetTypeInfo_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TGetTypeInfoReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetTypeInfo_result implements org.apache.thrift.TBase<GetTypeInfo_result, GetTypeInfo_result._Fields>, java.io.Serializable, Cloneable, Comparable<GetTypeInfo_result>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTypeInfo_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetTypeInfo_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetTypeInfo_resultTupleSchemeFactory());
-    }
-
-    private TGetTypeInfoResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTypeInfoResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTypeInfo_result.class, metaDataMap);
-    }
-
-    public GetTypeInfo_result() {
-    }
-
-    public GetTypeInfo_result(
-      TGetTypeInfoResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetTypeInfo_result(GetTypeInfo_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TGetTypeInfoResp(other.success);
-      }
-    }
-
-    public GetTypeInfo_result deepCopy() {
-      return new GetTypeInfo_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TGetTypeInfoResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TGetTypeInfoResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TGetTypeInfoResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetTypeInfo_result)
-        return this.equals((GetTypeInfo_result)that);
-      return false;
-    }
-
-    public boolean equals(GetTypeInfo_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_success = true && (isSetSuccess());
-      list.add(present_success);
-      if (present_success)
-        list.add(success);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(GetTypeInfo_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetTypeInfo_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetTypeInfo_resultStandardSchemeFactory implements SchemeFactory {
-      public GetTypeInfo_resultStandardScheme getScheme() {
-        return new GetTypeInfo_resultStandardScheme();
-      }
-    }
-
-    private static class GetTypeInfo_resultStandardScheme extends StandardScheme<GetTypeInfo_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetTypeInfo_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TGetTypeInfoResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetTypeInfo_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetTypeInfo_resultTupleSchemeFactory implements SchemeFactory {
-      public GetTypeInfo_resultTupleScheme getScheme() {
-        return new GetTypeInfo_resultTupleScheme();
-      }
-    }
-
-    private static class GetTypeInfo_resultTupleScheme extends TupleScheme<GetTypeInfo_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetTypeInfo_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetTypeInfo_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TGetTypeInfoResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetCatalogs_args implements org.apache.thrift.TBase<GetCatalogs_args, GetCatalogs_args._Fields>, java.io.Serializable, Cloneable, Comparable<GetCatalogs_args>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetCatalogs_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetCatalogs_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetCatalogs_argsTupleSchemeFactory());
-    }
-
-    private TGetCatalogsReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetCatalogsReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetCatalogs_args.class, metaDataMap);
-    }
-
-    public GetCatalogs_args() {
-    }
-
-    public GetCatalogs_args(
-      TGetCatalogsReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetCatalogs_args(GetCatalogs_args other) {
-      if (other.isSetReq()) {
-        this.req = new TGetCatalogsReq(other.req);
-      }
-    }
-
-    public GetCatalogs_args deepCopy() {
-      return new GetCatalogs_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TGetCatalogsReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TGetCatalogsReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TGetCatalogsReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetCatalogs_args)
-        return this.equals((GetCatalogs_args)that);
-      return false;
-    }
-
-    public boolean equals(GetCatalogs_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_req = true && (isSetReq());
-      list.add(present_req);
-      if (present_req)
-        list.add(req);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(GetCatalogs_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetCatalogs_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetCatalogs_argsStandardSchemeFactory implements SchemeFactory {
-      public GetCatalogs_argsStandardScheme getScheme() {
-        return new GetCatalogs_argsStandardScheme();
-      }
-    }
-
-    private static class GetCatalogs_argsStandardScheme extends StandardScheme<GetCatalogs_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetCatalogs_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TGetCatalogsReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetCatalogs_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetCatalogs_argsTupleSchemeFactory implements SchemeFactory {
-      public GetCatalogs_argsTupleScheme getScheme() {
-        return new GetCatalogs_argsTupleScheme();
-      }
-    }
-
-    private static class GetCatalogs_argsTupleScheme extends TupleScheme<GetCatalogs_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetCatalogs_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetCatalogs_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TGetCatalogsReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetCatalogs_result implements org.apache.thrift.TBase<GetCatalogs_result, GetCatalogs_result._Fields>, java.io.Serializable, Cloneable, Comparable<GetCatalogs_result>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetCatalogs_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetCatalogs_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetCatalogs_resultTupleSchemeFactory());
-    }
-
-    private TGetCatalogsResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetCatalogsResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetCatalogs_result.class, metaDataMap);
-    }
-
-    public GetCatalogs_result() {
-    }
-
-    public GetCatalogs_result(
-      TGetCatalogsResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetCatalogs_result(GetCatalogs_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TGetCatalogsResp(other.success);
-      }
-    }
-
-    public GetCatalogs_result deepCopy() {
-      return new GetCatalogs_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TGetCatalogsResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TGetCatalogsResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TGetCatalogsResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetCatalogs_result)
-        return this.equals((GetCatalogs_result)that);
-      return false;
-    }
-
-    public boolean equals(GetCatalogs_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_success = true && (isSetSuccess());
-      list.add(present_success);
-      if (present_success)
-        list.add(success);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(GetCatalogs_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetCatalogs_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetCatalogs_resultStandardSchemeFactory implements SchemeFactory {
-      public GetCatalogs_resultStandardScheme getScheme() {
-        return new GetCatalogs_resultStandardScheme();
-      }
-    }
-
-    private static class GetCatalogs_resultStandardScheme extends StandardScheme<GetCatalogs_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetCatalogs_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TGetCatalogsResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetCatalogs_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetCatalogs_resultTupleSchemeFactory implements SchemeFactory {
-      public GetCatalogs_resultTupleScheme getScheme() {
-        return new GetCatalogs_resultTupleScheme();
-      }
-    }
-
-    private static class GetCatalogs_resultTupleScheme extends TupleScheme<GetCatalogs_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetCatalogs_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetCatalogs_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TGetCatalogsResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetSchemas_args implements org.apache.thrift.TBase<GetSchemas_args, GetSchemas_args._Fields>, java.io.Serializable, Cloneable, Comparable<GetSchemas_args>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetSchemas_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetSchemas_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetSchemas_argsTupleSchemeFactory());
-    }
-
-    private TGetSchemasReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetSchemasReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetSchemas_args.class, metaDataMap);
-    }
-
-    public GetSchemas_args() {
-    }
-
-    public GetSchemas_args(
-      TGetSchemasReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetSchemas_args(GetSchemas_args other) {
-      if (other.isSetReq()) {
-        this.req = new TGetSchemasReq(other.req);
-      }
-    }
-
-    public GetSchemas_args deepCopy() {
-      return new GetSchemas_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TGetSchemasReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TGetSchemasReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TGetSchemasReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetSchemas_args)
-        return this.equals((GetSchemas_args)that);
-      return false;
-    }
-
-    public boolean equals(GetSchemas_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_req = true && (isSetReq());
-      list.add(present_req);
-      if (present_req)
-        list.add(req);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(GetSchemas_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetSchemas_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetSchemas_argsStandardSchemeFactory implements SchemeFactory {
-      public GetSchemas_argsStandardScheme getScheme() {
-        return new GetSchemas_argsStandardScheme();
-      }
-    }
-
-    private static class GetSchemas_argsStandardScheme extends StandardScheme<GetSchemas_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetSchemas_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TGetSchemasReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetSchemas_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetSchemas_argsTupleSchemeFactory implements SchemeFactory {
-      public GetSchemas_argsTupleScheme getScheme() {
-        return new GetSchemas_argsTupleScheme();
-      }
-    }
-
-    private static class GetSchemas_argsTupleScheme extends TupleScheme<GetSchemas_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetSchemas_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetSchemas_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TGetSchemasReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetSchemas_result implements org.apache.thrift.TBase<GetSchemas_result, GetSchemas_result._Fields>, java.io.Serializable, Cloneable, Comparable<GetSchemas_result>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetSchemas_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetSchemas_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetSchemas_resultTupleSchemeFactory());
-    }
-
-    private TGetSchemasResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetSchemasResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetSchemas_result.class, metaDataMap);
-    }
-
-    public GetSchemas_result() {
-    }
-
-    public GetSchemas_result(
-      TGetSchemasResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetSchemas_result(GetSchemas_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TGetSchemasResp(other.success);
-      }
-    }
-
-    public GetSchemas_result deepCopy() {
-      return new GetSchemas_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TGetSchemasResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TGetSchemasResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TGetSchemasResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetSchemas_result)
-        return this.equals((GetSchemas_result)that);
-      return false;
-    }
-
-    public boolean equals(GetSchemas_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_success = true && (isSetSuccess());
-      list.add(present_success);
-      if (present_success)
-        list.add(success);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(GetSchemas_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetSchemas_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetSchemas_resultStandardSchemeFactory implements SchemeFactory {
-      public GetSchemas_resultStandardScheme getScheme() {
-        return new GetSchemas_resultStandardScheme();
-      }
-    }
-
-    private static class GetSchemas_resultStandardScheme extends StandardScheme<GetSchemas_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetSchemas_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TGetSchemasResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetSchemas_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetSchemas_resultTupleSchemeFactory implements SchemeFactory {
-      public GetSchemas_resultTupleScheme getScheme() {
-        return new GetSchemas_resultTupleScheme();
-      }
-    }
-
-    private static class GetSchemas_resultTupleScheme extends TupleScheme<GetSchemas_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetSchemas_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetSchemas_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TGetSchemasResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetTables_args implements org.apache.thrift.TBase<GetTables_args, GetTables_args._Fields>, java.io.Serializable, Cloneable, Comparable<GetTables_args>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTables_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetTables_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetTables_argsTupleSchemeFactory());
-    }
-
-    private TGetTablesReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTablesReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTables_args.class, metaDataMap);
-    }
-
-    public GetTables_args() {
-    }
-
-    public GetTables_args(
-      TGetTablesReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetTables_args(GetTables_args other) {
-      if (other.isSetReq()) {
-        this.req = new TGetTablesReq(other.req);
-      }
-    }
-
-    public GetTables_args deepCopy() {
-      return new GetTables_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TGetTablesReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TGetTablesReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TGetTablesReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetTables_args)
-        return this.equals((GetTables_args)that);
-      return false;
-    }
-
-    public boolean equals(GetTables_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_req = true && (isSetReq());
-      list.add(present_req);
-      if (present_req)
-        list.add(req);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(GetTables_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetTables_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetTables_argsStandardSchemeFactory implements SchemeFactory {
-      public GetTables_argsStandardScheme getScheme() {
-        return new GetTables_argsStandardScheme();
-      }
-    }
-
-    private static class GetTables_argsStandardScheme extends StandardScheme<GetTables_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetTables_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TGetTablesReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetTables_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetTables_argsTupleSchemeFactory implements SchemeFactory {
-      public GetTables_argsTupleScheme getScheme() {
-        return new GetTables_argsTupleScheme();
-      }
-    }
-
-    private static class GetTables_argsTupleScheme extends TupleScheme<GetTables_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetTables_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetTables_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TGetTablesReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetTables_result implements org.apache.thrift.TBase<GetTables_result, GetTables_result._Fields>, java.io.Serializable, Cloneable, Comparable<GetTables_result>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTables_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetTables_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetTables_resultTupleSchemeFactory());
-    }
-
-    private TGetTablesResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTablesResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTables_result.class, metaDataMap);
-    }
-
-    public GetTables_result() {
-    }
-
-    public GetTables_result(
-      TGetTablesResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetTables_result(GetTables_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TGetTablesResp(other.success);
-      }
-    }
-
-    public GetTables_result deepCopy() {
-      return new GetTables_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TGetTablesResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TGetTablesResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TGetTablesResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetTables_result)
-        return this.equals((GetTables_result)that);
-      return false;
-    }
-
-    public boolean equals(GetTables_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_success = true && (isSetSuccess());
-      list.add(present_success);
-      if (present_success)
-        list.add(success);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(GetTables_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetTables_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetTables_resultStandardSchemeFactory implements SchemeFactory {
-      public GetTables_resultStandardScheme getScheme() {
-        return new GetTables_resultStandardScheme();
-      }
-    }
-
-    private static class GetTables_resultStandardScheme extends StandardScheme<GetTables_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetTables_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TGetTablesResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetTables_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetTables_resultTupleSchemeFactory implements SchemeFactory {
-      public GetTables_resultTupleScheme getScheme() {
-        return new GetTables_resultTupleScheme();
-      }
-    }
-
-    private static class GetTables_resultTupleScheme extends TupleScheme<GetTables_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetTables_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetTables_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TGetTablesResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetTableTypes_args implements org.apache.thrift.TBase<GetTableTypes_args, GetTableTypes_args._Fields>, java.io.Serializable, Cloneable, Comparable<GetTableTypes_args>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTableTypes_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetTableTypes_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetTableTypes_argsTupleSchemeFactory());
-    }
-
-    private TGetTableTypesReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTableTypesReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTableTypes_args.class, metaDataMap);
-    }
-
-    public GetTableTypes_args() {
-    }
-
-    public GetTableTypes_args(
-      TGetTableTypesReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetTableTypes_args(GetTableTypes_args other) {
-      if (other.isSetReq()) {
-        this.req = new TGetTableTypesReq(other.req);
-      }
-    }
-
-    public GetTableTypes_args deepCopy() {
-      return new GetTableTypes_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TGetTableTypesReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TGetTableTypesReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TGetTableTypesReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetTableTypes_args)
-        return this.equals((GetTableTypes_args)that);
-      return false;
-    }
-
-    public boolean equals(GetTableTypes_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_req = true && (isSetReq());
-      list.add(present_req);
-      if (present_req)
-        list.add(req);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(GetTableTypes_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetTableTypes_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetTableTypes_argsStandardSchemeFactory implements SchemeFactory {
-      public GetTableTypes_argsStandardScheme getScheme() {
-        return new GetTableTypes_argsStandardScheme();
-      }
-    }
-
-    private static class GetTableTypes_argsStandardScheme extends StandardScheme<GetTableTypes_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetTableTypes_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TGetTableTypesReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetTableTypes_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetTableTypes_argsTupleSchemeFactory implements SchemeFactory {
-      public GetTableTypes_argsTupleScheme getScheme() {
-        return new GetTableTypes_argsTupleScheme();
-      }
-    }
-
-    private static class GetTableTypes_argsTupleScheme extends TupleScheme<GetTableTypes_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetTableTypes_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetTableTypes_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TGetTableTypesReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetTableTypes_result implements org.apache.thrift.TBase<GetTableTypes_result, GetTableTypes_result._Fields>, java.io.Serializable, Cloneable, Comparable<GetTableTypes_result>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetTableTypes_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetTableTypes_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetTableTypes_resultTupleSchemeFactory());
-    }
-
-    private TGetTableTypesResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetTableTypesResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetTableTypes_result.class, metaDataMap);
-    }
-
-    public GetTableTypes_result() {
-    }
-
-    public GetTableTypes_result(
-      TGetTableTypesResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetTableTypes_result(GetTableTypes_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TGetTableTypesResp(other.success);
-      }
-    }
-
-    public GetTableTypes_result deepCopy() {
-      return new GetTableTypes_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TGetTableTypesResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TGetTableTypesResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TGetTableTypesResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetTableTypes_result)
-        return this.equals((GetTableTypes_result)that);
-      return false;
-    }
-
-    public boolean equals(GetTableTypes_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_success = true && (isSetSuccess());
-      list.add(present_success);
-      if (present_success)
-        list.add(success);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(GetTableTypes_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetTableTypes_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetTableTypes_resultStandardSchemeFactory implements SchemeFactory {
-      public GetTableTypes_resultStandardScheme getScheme() {
-        return new GetTableTypes_resultStandardScheme();
-      }
-    }
-
-    private static class GetTableTypes_resultStandardScheme extends StandardScheme<GetTableTypes_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetTableTypes_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TGetTableTypesResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetTableTypes_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetTableTypes_resultTupleSchemeFactory implements SchemeFactory {
-      public GetTableTypes_resultTupleScheme getScheme() {
-        return new GetTableTypes_resultTupleScheme();
-      }
-    }
-
-    private static class GetTableTypes_resultTupleScheme extends TupleScheme<GetTableTypes_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetTableTypes_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetTableTypes_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TGetTableTypesResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetColumns_args implements org.apache.thrift.TBase<GetColumns_args, GetColumns_args._Fields>, java.io.Serializable, Cloneable, Comparable<GetColumns_args>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetColumns_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetColumns_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetColumns_argsTupleSchemeFactory());
-    }
-
-    private TGetColumnsReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetColumnsReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetColumns_args.class, metaDataMap);
-    }
-
-    public GetColumns_args() {
-    }
-
-    public GetColumns_args(
-      TGetColumnsReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetColumns_args(GetColumns_args other) {
-      if (other.isSetReq()) {
-        this.req = new TGetColumnsReq(other.req);
-      }
-    }
-
-    public GetColumns_args deepCopy() {
-      return new GetColumns_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TGetColumnsReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TGetColumnsReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TGetColumnsReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetColumns_args)
-        return this.equals((GetColumns_args)that);
-      return false;
-    }
-
-    public boolean equals(GetColumns_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_req = true && (isSetReq());
-      list.add(present_req);
-      if (present_req)
-        list.add(req);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(GetColumns_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetColumns_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetColumns_argsStandardSchemeFactory implements SchemeFactory {
-      public GetColumns_argsStandardScheme getScheme() {
-        return new GetColumns_argsStandardScheme();
-      }
-    }
-
-    private static class GetColumns_argsStandardScheme extends StandardScheme<GetColumns_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetColumns_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TGetColumnsReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetColumns_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetColumns_argsTupleSchemeFactory implements SchemeFactory {
-      public GetColumns_argsTupleScheme getScheme() {
-        return new GetColumns_argsTupleScheme();
-      }
-    }
-
-    private static class GetColumns_argsTupleScheme extends TupleScheme<GetColumns_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetColumns_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetColumns_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TGetColumnsReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetColumns_result implements org.apache.thrift.TBase<GetColumns_result, GetColumns_result._Fields>, java.io.Serializable, Cloneable, Comparable<GetColumns_result>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetColumns_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetColumns_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetColumns_resultTupleSchemeFactory());
-    }
-
-    private TGetColumnsResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetColumnsResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetColumns_result.class, metaDataMap);
-    }
-
-    public GetColumns_result() {
-    }
-
-    public GetColumns_result(
-      TGetColumnsResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetColumns_result(GetColumns_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TGetColumnsResp(other.success);
-      }
-    }
-
-    public GetColumns_result deepCopy() {
-      return new GetColumns_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TGetColumnsResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TGetColumnsResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TGetColumnsResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetColumns_result)
-        return this.equals((GetColumns_result)that);
-      return false;
-    }
-
-    public boolean equals(GetColumns_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_success = true && (isSetSuccess());
-      list.add(present_success);
-      if (present_success)
-        list.add(success);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(GetColumns_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetColumns_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetColumns_resultStandardSchemeFactory implements SchemeFactory {
-      public GetColumns_resultStandardScheme getScheme() {
-        return new GetColumns_resultStandardScheme();
-      }
-    }
-
-    private static class GetColumns_resultStandardScheme extends StandardScheme<GetColumns_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetColumns_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TGetColumnsResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetColumns_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetColumns_resultTupleSchemeFactory implements SchemeFactory {
-      public GetColumns_resultTupleScheme getScheme() {
-        return new GetColumns_resultTupleScheme();
-      }
-    }
-
-    private static class GetColumns_resultTupleScheme extends TupleScheme<GetColumns_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetColumns_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetColumns_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TGetColumnsResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetFunctions_args implements org.apache.thrift.TBase<GetFunctions_args, GetFunctions_args._Fields>, java.io.Serializable, Cloneable, Comparable<GetFunctions_args>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetFunctions_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetFunctions_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetFunctions_argsTupleSchemeFactory());
-    }
-
-    private TGetFunctionsReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetFunctionsReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetFunctions_args.class, metaDataMap);
-    }
-
-    public GetFunctions_args() {
-    }
-
-    public GetFunctions_args(
-      TGetFunctionsReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetFunctions_args(GetFunctions_args other) {
-      if (other.isSetReq()) {
-        this.req = new TGetFunctionsReq(other.req);
-      }
-    }
-
-    public GetFunctions_args deepCopy() {
-      return new GetFunctions_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TGetFunctionsReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TGetFunctionsReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TGetFunctionsReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetFunctions_args)
-        return this.equals((GetFunctions_args)that);
-      return false;
-    }
-
-    public boolean equals(GetFunctions_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_req = true && (isSetReq());
-      list.add(present_req);
-      if (present_req)
-        list.add(req);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(GetFunctions_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetFunctions_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetFunctions_argsStandardSchemeFactory implements SchemeFactory {
-      public GetFunctions_argsStandardScheme getScheme() {
-        return new GetFunctions_argsStandardScheme();
-      }
-    }
-
-    private static class GetFunctions_argsStandardScheme extends StandardScheme<GetFunctions_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetFunctions_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TGetFunctionsReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetFunctions_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetFunctions_argsTupleSchemeFactory implements SchemeFactory {
-      public GetFunctions_argsTupleScheme getScheme() {
-        return new GetFunctions_argsTupleScheme();
-      }
-    }
-
-    private static class GetFunctions_argsTupleScheme extends TupleScheme<GetFunctions_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetFunctions_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetFunctions_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TGetFunctionsReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetFunctions_result implements org.apache.thrift.TBase<GetFunctions_result, GetFunctions_result._Fields>, java.io.Serializable, Cloneable, Comparable<GetFunctions_result>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetFunctions_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetFunctions_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetFunctions_resultTupleSchemeFactory());
-    }
-
-    private TGetFunctionsResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetFunctionsResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetFunctions_result.class, metaDataMap);
-    }
-
-    public GetFunctions_result() {
-    }
-
-    public GetFunctions_result(
-      TGetFunctionsResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetFunctions_result(GetFunctions_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TGetFunctionsResp(other.success);
-      }
-    }
-
-    public GetFunctions_result deepCopy() {
-      return new GetFunctions_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TGetFunctionsResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TGetFunctionsResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TGetFunctionsResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetFunctions_result)
-        return this.equals((GetFunctions_result)that);
-      return false;
-    }
-
-    public boolean equals(GetFunctions_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_success = true && (isSetSuccess());
-      list.add(present_success);
-      if (present_success)
-        list.add(success);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(GetFunctions_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetFunctions_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetFunctions_resultStandardSchemeFactory implements SchemeFactory {
-      public GetFunctions_resultStandardScheme getScheme() {
-        return new GetFunctions_resultStandardScheme();
-      }
-    }
-
-    private static class GetFunctions_resultStandardScheme extends StandardScheme<GetFunctions_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetFunctions_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TGetFunctionsResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetFunctions_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetFunctions_resultTupleSchemeFactory implements SchemeFactory {
-      public GetFunctions_resultTupleScheme getScheme() {
-        return new GetFunctions_resultTupleScheme();
-      }
-    }
-
-    private static class GetFunctions_resultTupleScheme extends TupleScheme<GetFunctions_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetFunctions_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetFunctions_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TGetFunctionsResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetPrimaryKeys_args implements org.apache.thrift.TBase<GetPrimaryKeys_args, GetPrimaryKeys_args._Fields>, java.io.Serializable, Cloneable, Comparable<GetPrimaryKeys_args>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetPrimaryKeys_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetPrimaryKeys_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetPrimaryKeys_argsTupleSchemeFactory());
-    }
-
-    private TGetPrimaryKeysReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetPrimaryKeysReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetPrimaryKeys_args.class, metaDataMap);
-    }
-
-    public GetPrimaryKeys_args() {
-    }
-
-    public GetPrimaryKeys_args(
-      TGetPrimaryKeysReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetPrimaryKeys_args(GetPrimaryKeys_args other) {
-      if (other.isSetReq()) {
-        this.req = new TGetPrimaryKeysReq(other.req);
-      }
-    }
-
-    public GetPrimaryKeys_args deepCopy() {
-      return new GetPrimaryKeys_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TGetPrimaryKeysReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TGetPrimaryKeysReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TGetPrimaryKeysReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetPrimaryKeys_args)
-        return this.equals((GetPrimaryKeys_args)that);
-      return false;
-    }
-
-    public boolean equals(GetPrimaryKeys_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_req = true && (isSetReq());
-      list.add(present_req);
-      if (present_req)
-        list.add(req);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(GetPrimaryKeys_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetPrimaryKeys_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetPrimaryKeys_argsStandardSchemeFactory implements SchemeFactory {
-      public GetPrimaryKeys_argsStandardScheme getScheme() {
-        return new GetPrimaryKeys_argsStandardScheme();
-      }
-    }
-
-    private static class GetPrimaryKeys_argsStandardScheme extends StandardScheme<GetPrimaryKeys_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetPrimaryKeys_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TGetPrimaryKeysReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetPrimaryKeys_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetPrimaryKeys_argsTupleSchemeFactory implements SchemeFactory {
-      public GetPrimaryKeys_argsTupleScheme getScheme() {
-        return new GetPrimaryKeys_argsTupleScheme();
-      }
-    }
-
-    private static class GetPrimaryKeys_argsTupleScheme extends TupleScheme<GetPrimaryKeys_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetPrimaryKeys_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetPrimaryKeys_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TGetPrimaryKeysReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetPrimaryKeys_result implements org.apache.thrift.TBase<GetPrimaryKeys_result, GetPrimaryKeys_result._Fields>, java.io.Serializable, Cloneable, Comparable<GetPrimaryKeys_result>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetPrimaryKeys_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetPrimaryKeys_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetPrimaryKeys_resultTupleSchemeFactory());
-    }
-
-    private TGetPrimaryKeysResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetPrimaryKeysResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetPrimaryKeys_result.class, metaDataMap);
-    }
-
-    public GetPrimaryKeys_result() {
-    }
-
-    public GetPrimaryKeys_result(
-      TGetPrimaryKeysResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetPrimaryKeys_result(GetPrimaryKeys_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TGetPrimaryKeysResp(other.success);
-      }
-    }
-
-    public GetPrimaryKeys_result deepCopy() {
-      return new GetPrimaryKeys_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TGetPrimaryKeysResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TGetPrimaryKeysResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TGetPrimaryKeysResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetPrimaryKeys_result)
-        return this.equals((GetPrimaryKeys_result)that);
-      return false;
-    }
-
-    public boolean equals(GetPrimaryKeys_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_success = true && (isSetSuccess());
-      list.add(present_success);
-      if (present_success)
-        list.add(success);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(GetPrimaryKeys_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetPrimaryKeys_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetPrimaryKeys_resultStandardSchemeFactory implements SchemeFactory {
-      public GetPrimaryKeys_resultStandardScheme getScheme() {
-        return new GetPrimaryKeys_resultStandardScheme();
-      }
-    }
-
-    private static class GetPrimaryKeys_resultStandardScheme extends StandardScheme<GetPrimaryKeys_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetPrimaryKeys_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TGetPrimaryKeysResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetPrimaryKeys_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetPrimaryKeys_resultTupleSchemeFactory implements SchemeFactory {
-      public GetPrimaryKeys_resultTupleScheme getScheme() {
-        return new GetPrimaryKeys_resultTupleScheme();
-      }
-    }
-
-    private static class GetPrimaryKeys_resultTupleScheme extends TupleScheme<GetPrimaryKeys_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetPrimaryKeys_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetPrimaryKeys_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TGetPrimaryKeysResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetCrossReference_args implements org.apache.thrift.TBase<GetCrossReference_args, GetCrossReference_args._Fields>, java.io.Serializable, Cloneable, Comparable<GetCrossReference_args>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetCrossReference_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetCrossReference_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetCrossReference_argsTupleSchemeFactory());
-    }
-
-    private TGetCrossReferenceReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetCrossReferenceReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetCrossReference_args.class, metaDataMap);
-    }
-
-    public GetCrossReference_args() {
-    }
-
-    public GetCrossReference_args(
-      TGetCrossReferenceReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetCrossReference_args(GetCrossReference_args other) {
-      if (other.isSetReq()) {
-        this.req = new TGetCrossReferenceReq(other.req);
-      }
-    }
-
-    public GetCrossReference_args deepCopy() {
-      return new GetCrossReference_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TGetCrossReferenceReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TGetCrossReferenceReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TGetCrossReferenceReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetCrossReference_args)
-        return this.equals((GetCrossReference_args)that);
-      return false;
-    }
-
-    public boolean equals(GetCrossReference_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_req = true && (isSetReq());
-      list.add(present_req);
-      if (present_req)
-        list.add(req);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(GetCrossReference_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetCrossReference_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetCrossReference_argsStandardSchemeFactory implements SchemeFactory {
-      public GetCrossReference_argsStandardScheme getScheme() {
-        return new GetCrossReference_argsStandardScheme();
-      }
-    }
-
-    private static class GetCrossReference_argsStandardScheme extends StandardScheme<GetCrossReference_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetCrossReference_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TGetCrossReferenceReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetCrossReference_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetCrossReference_argsTupleSchemeFactory implements SchemeFactory {
-      public GetCrossReference_argsTupleScheme getScheme() {
-        return new GetCrossReference_argsTupleScheme();
-      }
-    }
-
-    private static class GetCrossReference_argsTupleScheme extends TupleScheme<GetCrossReference_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetCrossReference_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetCrossReference_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TGetCrossReferenceReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetCrossReference_result implements org.apache.thrift.TBase<GetCrossReference_result, GetCrossReference_result._Fields>, java.io.Serializable, Cloneable, Comparable<GetCrossReference_result>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetCrossReference_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetCrossReference_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetCrossReference_resultTupleSchemeFactory());
-    }
-
-    private TGetCrossReferenceResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetCrossReferenceResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetCrossReference_result.class, metaDataMap);
-    }
-
-    public GetCrossReference_result() {
-    }
-
-    public GetCrossReference_result(
-      TGetCrossReferenceResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetCrossReference_result(GetCrossReference_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TGetCrossReferenceResp(other.success);
-      }
-    }
-
-    public GetCrossReference_result deepCopy() {
-      return new GetCrossReference_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TGetCrossReferenceResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TGetCrossReferenceResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TGetCrossReferenceResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetCrossReference_result)
-        return this.equals((GetCrossReference_result)that);
-      return false;
-    }
-
-    public boolean equals(GetCrossReference_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_success = true && (isSetSuccess());
-      list.add(present_success);
-      if (present_success)
-        list.add(success);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(GetCrossReference_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetCrossReference_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetCrossReference_resultStandardSchemeFactory implements SchemeFactory {
-      public GetCrossReference_resultStandardScheme getScheme() {
-        return new GetCrossReference_resultStandardScheme();
-      }
-    }
-
-    private static class GetCrossReference_resultStandardScheme extends StandardScheme<GetCrossReference_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetCrossReference_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TGetCrossReferenceResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetCrossReference_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetCrossReference_resultTupleSchemeFactory implements SchemeFactory {
-      public GetCrossReference_resultTupleScheme getScheme() {
-        return new GetCrossReference_resultTupleScheme();
-      }
-    }
-
-    private static class GetCrossReference_resultTupleScheme extends TupleScheme<GetCrossReference_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetCrossReference_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetCrossReference_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TGetCrossReferenceResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetOperationStatus_args implements org.apache.thrift.TBase<GetOperationStatus_args, GetOperationStatus_args._Fields>, java.io.Serializable, Cloneable, Comparable<GetOperationStatus_args>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetOperationStatus_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetOperationStatus_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetOperationStatus_argsTupleSchemeFactory());
-    }
-
-    private TGetOperationStatusReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetOperationStatusReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetOperationStatus_args.class, metaDataMap);
-    }
-
-    public GetOperationStatus_args() {
-    }
-
-    public GetOperationStatus_args(
-      TGetOperationStatusReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetOperationStatus_args(GetOperationStatus_args other) {
-      if (other.isSetReq()) {
-        this.req = new TGetOperationStatusReq(other.req);
-      }
-    }
-
-    public GetOperationStatus_args deepCopy() {
-      return new GetOperationStatus_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TGetOperationStatusReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TGetOperationStatusReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TGetOperationStatusReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetOperationStatus_args)
-        return this.equals((GetOperationStatus_args)that);
-      return false;
-    }
-
-    public boolean equals(GetOperationStatus_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_req = true && (isSetReq());
-      list.add(present_req);
-      if (present_req)
-        list.add(req);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(GetOperationStatus_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetOperationStatus_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetOperationStatus_argsStandardSchemeFactory implements SchemeFactory {
-      public GetOperationStatus_argsStandardScheme getScheme() {
-        return new GetOperationStatus_argsStandardScheme();
-      }
-    }
-
-    private static class GetOperationStatus_argsStandardScheme extends StandardScheme<GetOperationStatus_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetOperationStatus_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TGetOperationStatusReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetOperationStatus_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetOperationStatus_argsTupleSchemeFactory implements SchemeFactory {
-      public GetOperationStatus_argsTupleScheme getScheme() {
-        return new GetOperationStatus_argsTupleScheme();
-      }
-    }
-
-    private static class GetOperationStatus_argsTupleScheme extends TupleScheme<GetOperationStatus_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetOperationStatus_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetOperationStatus_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TGetOperationStatusReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetOperationStatus_result implements org.apache.thrift.TBase<GetOperationStatus_result, GetOperationStatus_result._Fields>, java.io.Serializable, Cloneable, Comparable<GetOperationStatus_result>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetOperationStatus_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetOperationStatus_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetOperationStatus_resultTupleSchemeFactory());
-    }
-
-    private TGetOperationStatusResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetOperationStatusResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetOperationStatus_result.class, metaDataMap);
-    }
-
-    public GetOperationStatus_result() {
-    }
-
-    public GetOperationStatus_result(
-      TGetOperationStatusResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetOperationStatus_result(GetOperationStatus_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TGetOperationStatusResp(other.success);
-      }
-    }
-
-    public GetOperationStatus_result deepCopy() {
-      return new GetOperationStatus_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TGetOperationStatusResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TGetOperationStatusResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TGetOperationStatusResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetOperationStatus_result)
-        return this.equals((GetOperationStatus_result)that);
-      return false;
-    }
-
-    public boolean equals(GetOperationStatus_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_success = true && (isSetSuccess());
-      list.add(present_success);
-      if (present_success)
-        list.add(success);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(GetOperationStatus_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetOperationStatus_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetOperationStatus_resultStandardSchemeFactory implements SchemeFactory {
-      public GetOperationStatus_resultStandardScheme getScheme() {
-        return new GetOperationStatus_resultStandardScheme();
-      }
-    }
-
-    private static class GetOperationStatus_resultStandardScheme extends StandardScheme<GetOperationStatus_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetOperationStatus_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TGetOperationStatusResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetOperationStatus_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetOperationStatus_resultTupleSchemeFactory implements SchemeFactory {
-      public GetOperationStatus_resultTupleScheme getScheme() {
-        return new GetOperationStatus_resultTupleScheme();
-      }
-    }
-
-    private static class GetOperationStatus_resultTupleScheme extends TupleScheme<GetOperationStatus_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetOperationStatus_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetOperationStatus_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TGetOperationStatusResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class CancelOperation_args implements org.apache.thrift.TBase<CancelOperation_args, CancelOperation_args._Fields>, java.io.Serializable, Cloneable, Comparable<CancelOperation_args>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CancelOperation_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new CancelOperation_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new CancelOperation_argsTupleSchemeFactory());
-    }
-
-    private TCancelOperationReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCancelOperationReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CancelOperation_args.class, metaDataMap);
-    }
-
-    public CancelOperation_args() {
-    }
-
-    public CancelOperation_args(
-      TCancelOperationReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public CancelOperation_args(CancelOperation_args other) {
-      if (other.isSetReq()) {
-        this.req = new TCancelOperationReq(other.req);
-      }
-    }
-
-    public CancelOperation_args deepCopy() {
-      return new CancelOperation_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TCancelOperationReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TCancelOperationReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TCancelOperationReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof CancelOperation_args)
-        return this.equals((CancelOperation_args)that);
-      return false;
-    }
-
-    public boolean equals(CancelOperation_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_req = true && (isSetReq());
-      list.add(present_req);
-      if (present_req)
-        list.add(req);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(CancelOperation_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("CancelOperation_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class CancelOperation_argsStandardSchemeFactory implements SchemeFactory {
-      public CancelOperation_argsStandardScheme getScheme() {
-        return new CancelOperation_argsStandardScheme();
-      }
-    }
-
-    private static class CancelOperation_argsStandardScheme extends StandardScheme<CancelOperation_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, CancelOperation_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TCancelOperationReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, CancelOperation_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class CancelOperation_argsTupleSchemeFactory implements SchemeFactory {
-      public CancelOperation_argsTupleScheme getScheme() {
-        return new CancelOperation_argsTupleScheme();
-      }
-    }
-
-    private static class CancelOperation_argsTupleScheme extends TupleScheme<CancelOperation_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, CancelOperation_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, CancelOperation_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TCancelOperationReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class CancelOperation_result implements org.apache.thrift.TBase<CancelOperation_result, CancelOperation_result._Fields>, java.io.Serializable, Cloneable, Comparable<CancelOperation_result>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CancelOperation_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new CancelOperation_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new CancelOperation_resultTupleSchemeFactory());
-    }
-
-    private TCancelOperationResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCancelOperationResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CancelOperation_result.class, metaDataMap);
-    }
-
-    public CancelOperation_result() {
-    }
-
-    public CancelOperation_result(
-      TCancelOperationResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public CancelOperation_result(CancelOperation_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TCancelOperationResp(other.success);
-      }
-    }
-
-    public CancelOperation_result deepCopy() {
-      return new CancelOperation_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TCancelOperationResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TCancelOperationResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TCancelOperationResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof CancelOperation_result)
-        return this.equals((CancelOperation_result)that);
-      return false;
-    }
-
-    public boolean equals(CancelOperation_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_success = true && (isSetSuccess());
-      list.add(present_success);
-      if (present_success)
-        list.add(success);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(CancelOperation_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("CancelOperation_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class CancelOperation_resultStandardSchemeFactory implements SchemeFactory {
-      public CancelOperation_resultStandardScheme getScheme() {
-        return new CancelOperation_resultStandardScheme();
-      }
-    }
-
-    private static class CancelOperation_resultStandardScheme extends StandardScheme<CancelOperation_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, CancelOperation_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TCancelOperationResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, CancelOperation_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class CancelOperation_resultTupleSchemeFactory implements SchemeFactory {
-      public CancelOperation_resultTupleScheme getScheme() {
-        return new CancelOperation_resultTupleScheme();
-      }
-    }
-
-    private static class CancelOperation_resultTupleScheme extends TupleScheme<CancelOperation_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, CancelOperation_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, CancelOperation_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TCancelOperationResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class CloseOperation_args implements org.apache.thrift.TBase<CloseOperation_args, CloseOperation_args._Fields>, java.io.Serializable, Cloneable, Comparable<CloseOperation_args>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CloseOperation_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new CloseOperation_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new CloseOperation_argsTupleSchemeFactory());
-    }
-
-    private TCloseOperationReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCloseOperationReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CloseOperation_args.class, metaDataMap);
-    }
-
-    public CloseOperation_args() {
-    }
-
-    public CloseOperation_args(
-      TCloseOperationReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public CloseOperation_args(CloseOperation_args other) {
-      if (other.isSetReq()) {
-        this.req = new TCloseOperationReq(other.req);
-      }
-    }
-
-    public CloseOperation_args deepCopy() {
-      return new CloseOperation_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TCloseOperationReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TCloseOperationReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TCloseOperationReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof CloseOperation_args)
-        return this.equals((CloseOperation_args)that);
-      return false;
-    }
-
-    public boolean equals(CloseOperation_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_req = true && (isSetReq());
-      list.add(present_req);
-      if (present_req)
-        list.add(req);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(CloseOperation_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("CloseOperation_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class CloseOperation_argsStandardSchemeFactory implements SchemeFactory {
-      public CloseOperation_argsStandardScheme getScheme() {
-        return new CloseOperation_argsStandardScheme();
-      }
-    }
-
-    private static class CloseOperation_argsStandardScheme extends StandardScheme<CloseOperation_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, CloseOperation_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TCloseOperationReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, CloseOperation_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class CloseOperation_argsTupleSchemeFactory implements SchemeFactory {
-      public CloseOperation_argsTupleScheme getScheme() {
-        return new CloseOperation_argsTupleScheme();
-      }
-    }
-
-    private static class CloseOperation_argsTupleScheme extends TupleScheme<CloseOperation_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, CloseOperation_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, CloseOperation_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TCloseOperationReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class CloseOperation_result implements org.apache.thrift.TBase<CloseOperation_result, CloseOperation_result._Fields>, java.io.Serializable, Cloneable, Comparable<CloseOperation_result>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CloseOperation_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new CloseOperation_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new CloseOperation_resultTupleSchemeFactory());
-    }
-
-    private TCloseOperationResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCloseOperationResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CloseOperation_result.class, metaDataMap);
-    }
-
-    public CloseOperation_result() {
-    }
-
-    public CloseOperation_result(
-      TCloseOperationResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public CloseOperation_result(CloseOperation_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TCloseOperationResp(other.success);
-      }
-    }
-
-    public CloseOperation_result deepCopy() {
-      return new CloseOperation_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TCloseOperationResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TCloseOperationResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TCloseOperationResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof CloseOperation_result)
-        return this.equals((CloseOperation_result)that);
-      return false;
-    }
-
-    public boolean equals(CloseOperation_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_success = true && (isSetSuccess());
-      list.add(present_success);
-      if (present_success)
-        list.add(success);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(CloseOperation_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("CloseOperation_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class CloseOperation_resultStandardSchemeFactory implements SchemeFactory {
-      public CloseOperation_resultStandardScheme getScheme() {
-        return new CloseOperation_resultStandardScheme();
-      }
-    }
-
-    private static class CloseOperation_resultStandardScheme extends StandardScheme<CloseOperation_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, CloseOperation_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TCloseOperationResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, CloseOperation_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class CloseOperation_resultTupleSchemeFactory implements SchemeFactory {
-      public CloseOperation_resultTupleScheme getScheme() {
-        return new CloseOperation_resultTupleScheme();
-      }
-    }
-
-    private static class CloseOperation_resultTupleScheme extends TupleScheme<CloseOperation_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, CloseOperation_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, CloseOperation_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TCloseOperationResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetResultSetMetadata_args implements org.apache.thrift.TBase<GetResultSetMetadata_args, GetResultSetMetadata_args._Fields>, java.io.Serializable, Cloneable, Comparable<GetResultSetMetadata_args>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetResultSetMetadata_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetResultSetMetadata_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetResultSetMetadata_argsTupleSchemeFactory());
-    }
-
-    private TGetResultSetMetadataReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetResultSetMetadataReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetResultSetMetadata_args.class, metaDataMap);
-    }
-
-    public GetResultSetMetadata_args() {
-    }
-
-    public GetResultSetMetadata_args(
-      TGetResultSetMetadataReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetResultSetMetadata_args(GetResultSetMetadata_args other) {
-      if (other.isSetReq()) {
-        this.req = new TGetResultSetMetadataReq(other.req);
-      }
-    }
-
-    public GetResultSetMetadata_args deepCopy() {
-      return new GetResultSetMetadata_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TGetResultSetMetadataReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TGetResultSetMetadataReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TGetResultSetMetadataReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetResultSetMetadata_args)
-        return this.equals((GetResultSetMetadata_args)that);
-      return false;
-    }
-
-    public boolean equals(GetResultSetMetadata_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_req = true && (isSetReq());
-      list.add(present_req);
-      if (present_req)
-        list.add(req);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(GetResultSetMetadata_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetResultSetMetadata_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetResultSetMetadata_argsStandardSchemeFactory implements SchemeFactory {
-      public GetResultSetMetadata_argsStandardScheme getScheme() {
-        return new GetResultSetMetadata_argsStandardScheme();
-      }
-    }
-
-    private static class GetResultSetMetadata_argsStandardScheme extends StandardScheme<GetResultSetMetadata_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetResultSetMetadata_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TGetResultSetMetadataReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetResultSetMetadata_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetResultSetMetadata_argsTupleSchemeFactory implements SchemeFactory {
-      public GetResultSetMetadata_argsTupleScheme getScheme() {
-        return new GetResultSetMetadata_argsTupleScheme();
-      }
-    }
-
-    private static class GetResultSetMetadata_argsTupleScheme extends TupleScheme<GetResultSetMetadata_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetResultSetMetadata_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetResultSetMetadata_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TGetResultSetMetadataReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetResultSetMetadata_result implements org.apache.thrift.TBase<GetResultSetMetadata_result, GetResultSetMetadata_result._Fields>, java.io.Serializable, Cloneable, Comparable<GetResultSetMetadata_result>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetResultSetMetadata_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetResultSetMetadata_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetResultSetMetadata_resultTupleSchemeFactory());
-    }
-
-    private TGetResultSetMetadataResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetResultSetMetadataResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetResultSetMetadata_result.class, metaDataMap);
-    }
-
-    public GetResultSetMetadata_result() {
-    }
-
-    public GetResultSetMetadata_result(
-      TGetResultSetMetadataResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetResultSetMetadata_result(GetResultSetMetadata_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TGetResultSetMetadataResp(other.success);
-      }
-    }
-
-    public GetResultSetMetadata_result deepCopy() {
-      return new GetResultSetMetadata_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TGetResultSetMetadataResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TGetResultSetMetadataResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TGetResultSetMetadataResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetResultSetMetadata_result)
-        return this.equals((GetResultSetMetadata_result)that);
-      return false;
-    }
-
-    public boolean equals(GetResultSetMetadata_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_success = true && (isSetSuccess());
-      list.add(present_success);
-      if (present_success)
-        list.add(success);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(GetResultSetMetadata_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetResultSetMetadata_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetResultSetMetadata_resultStandardSchemeFactory implements SchemeFactory {
-      public GetResultSetMetadata_resultStandardScheme getScheme() {
-        return new GetResultSetMetadata_resultStandardScheme();
-      }
-    }
-
-    private static class GetResultSetMetadata_resultStandardScheme extends StandardScheme<GetResultSetMetadata_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetResultSetMetadata_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TGetResultSetMetadataResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetResultSetMetadata_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetResultSetMetadata_resultTupleSchemeFactory implements SchemeFactory {
-      public GetResultSetMetadata_resultTupleScheme getScheme() {
-        return new GetResultSetMetadata_resultTupleScheme();
-      }
-    }
-
-    private static class GetResultSetMetadata_resultTupleScheme extends TupleScheme<GetResultSetMetadata_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetResultSetMetadata_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetResultSetMetadata_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TGetResultSetMetadataResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class FetchResults_args implements org.apache.thrift.TBase<FetchResults_args, FetchResults_args._Fields>, java.io.Serializable, Cloneable, Comparable<FetchResults_args>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("FetchResults_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new FetchResults_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new FetchResults_argsTupleSchemeFactory());
-    }
-
-    private TFetchResultsReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TFetchResultsReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(FetchResults_args.class, metaDataMap);
-    }
-
-    public FetchResults_args() {
-    }
-
-    public FetchResults_args(
-      TFetchResultsReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public FetchResults_args(FetchResults_args other) {
-      if (other.isSetReq()) {
-        this.req = new TFetchResultsReq(other.req);
-      }
-    }
-
-    public FetchResults_args deepCopy() {
-      return new FetchResults_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TFetchResultsReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TFetchResultsReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TFetchResultsReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof FetchResults_args)
-        return this.equals((FetchResults_args)that);
-      return false;
-    }
-
-    public boolean equals(FetchResults_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_req = true && (isSetReq());
-      list.add(present_req);
-      if (present_req)
-        list.add(req);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(FetchResults_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("FetchResults_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class FetchResults_argsStandardSchemeFactory implements SchemeFactory {
-      public FetchResults_argsStandardScheme getScheme() {
-        return new FetchResults_argsStandardScheme();
-      }
-    }
-
-    private static class FetchResults_argsStandardScheme extends StandardScheme<FetchResults_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, FetchResults_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TFetchResultsReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, FetchResults_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class FetchResults_argsTupleSchemeFactory implements SchemeFactory {
-      public FetchResults_argsTupleScheme getScheme() {
-        return new FetchResults_argsTupleScheme();
-      }
-    }
-
-    private static class FetchResults_argsTupleScheme extends TupleScheme<FetchResults_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, FetchResults_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, FetchResults_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TFetchResultsReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class FetchResults_result implements org.apache.thrift.TBase<FetchResults_result, FetchResults_result._Fields>, java.io.Serializable, Cloneable, Comparable<FetchResults_result>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("FetchResults_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new FetchResults_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new FetchResults_resultTupleSchemeFactory());
-    }
-
-    private TFetchResultsResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TFetchResultsResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(FetchResults_result.class, metaDataMap);
-    }
-
-    public FetchResults_result() {
-    }
-
-    public FetchResults_result(
-      TFetchResultsResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public FetchResults_result(FetchResults_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TFetchResultsResp(other.success);
-      }
-    }
-
-    public FetchResults_result deepCopy() {
-      return new FetchResults_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TFetchResultsResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TFetchResultsResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TFetchResultsResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof FetchResults_result)
-        return this.equals((FetchResults_result)that);
-      return false;
-    }
-
-    public boolean equals(FetchResults_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_success = true && (isSetSuccess());
-      list.add(present_success);
-      if (present_success)
-        list.add(success);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(FetchResults_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("FetchResults_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class FetchResults_resultStandardSchemeFactory implements SchemeFactory {
-      public FetchResults_resultStandardScheme getScheme() {
-        return new FetchResults_resultStandardScheme();
-      }
-    }
-
-    private static class FetchResults_resultStandardScheme extends StandardScheme<FetchResults_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, FetchResults_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TFetchResultsResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, FetchResults_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class FetchResults_resultTupleSchemeFactory implements SchemeFactory {
-      public FetchResults_resultTupleScheme getScheme() {
-        return new FetchResults_resultTupleScheme();
-      }
-    }
-
-    private static class FetchResults_resultTupleScheme extends TupleScheme<FetchResults_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, FetchResults_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, FetchResults_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TFetchResultsResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetDelegationToken_args implements org.apache.thrift.TBase<GetDelegationToken_args, GetDelegationToken_args._Fields>, java.io.Serializable, Cloneable, Comparable<GetDelegationToken_args>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetDelegationToken_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetDelegationToken_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetDelegationToken_argsTupleSchemeFactory());
-    }
-
-    private TGetDelegationTokenReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetDelegationTokenReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetDelegationToken_args.class, metaDataMap);
-    }
-
-    public GetDelegationToken_args() {
-    }
-
-    public GetDelegationToken_args(
-      TGetDelegationTokenReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetDelegationToken_args(GetDelegationToken_args other) {
-      if (other.isSetReq()) {
-        this.req = new TGetDelegationTokenReq(other.req);
-      }
-    }
-
-    public GetDelegationToken_args deepCopy() {
-      return new GetDelegationToken_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TGetDelegationTokenReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TGetDelegationTokenReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TGetDelegationTokenReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetDelegationToken_args)
-        return this.equals((GetDelegationToken_args)that);
-      return false;
-    }
-
-    public boolean equals(GetDelegationToken_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_req = true && (isSetReq());
-      list.add(present_req);
-      if (present_req)
-        list.add(req);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(GetDelegationToken_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetDelegationToken_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetDelegationToken_argsStandardSchemeFactory implements SchemeFactory {
-      public GetDelegationToken_argsStandardScheme getScheme() {
-        return new GetDelegationToken_argsStandardScheme();
-      }
-    }
-
-    private static class GetDelegationToken_argsStandardScheme extends StandardScheme<GetDelegationToken_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetDelegationToken_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TGetDelegationTokenReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetDelegationToken_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetDelegationToken_argsTupleSchemeFactory implements SchemeFactory {
-      public GetDelegationToken_argsTupleScheme getScheme() {
-        return new GetDelegationToken_argsTupleScheme();
-      }
-    }
-
-    private static class GetDelegationToken_argsTupleScheme extends TupleScheme<GetDelegationToken_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetDelegationToken_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetDelegationToken_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TGetDelegationTokenReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class GetDelegationToken_result implements org.apache.thrift.TBase<GetDelegationToken_result, GetDelegationToken_result._Fields>, java.io.Serializable, Cloneable, Comparable<GetDelegationToken_result>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("GetDelegationToken_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new GetDelegationToken_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new GetDelegationToken_resultTupleSchemeFactory());
-    }
-
-    private TGetDelegationTokenResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetDelegationTokenResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(GetDelegationToken_result.class, metaDataMap);
-    }
-
-    public GetDelegationToken_result() {
-    }
-
-    public GetDelegationToken_result(
-      TGetDelegationTokenResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public GetDelegationToken_result(GetDelegationToken_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TGetDelegationTokenResp(other.success);
-      }
-    }
-
-    public GetDelegationToken_result deepCopy() {
-      return new GetDelegationToken_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TGetDelegationTokenResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TGetDelegationTokenResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TGetDelegationTokenResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof GetDelegationToken_result)
-        return this.equals((GetDelegationToken_result)that);
-      return false;
-    }
-
-    public boolean equals(GetDelegationToken_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_success = true && (isSetSuccess());
-      list.add(present_success);
-      if (present_success)
-        list.add(success);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(GetDelegationToken_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("GetDelegationToken_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class GetDelegationToken_resultStandardSchemeFactory implements SchemeFactory {
-      public GetDelegationToken_resultStandardScheme getScheme() {
-        return new GetDelegationToken_resultStandardScheme();
-      }
-    }
-
-    private static class GetDelegationToken_resultStandardScheme extends StandardScheme<GetDelegationToken_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, GetDelegationToken_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TGetDelegationTokenResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, GetDelegationToken_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class GetDelegationToken_resultTupleSchemeFactory implements SchemeFactory {
-      public GetDelegationToken_resultTupleScheme getScheme() {
-        return new GetDelegationToken_resultTupleScheme();
-      }
-    }
-
-    private static class GetDelegationToken_resultTupleScheme extends TupleScheme<GetDelegationToken_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, GetDelegationToken_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, GetDelegationToken_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TGetDelegationTokenResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class CancelDelegationToken_args implements org.apache.thrift.TBase<CancelDelegationToken_args, CancelDelegationToken_args._Fields>, java.io.Serializable, Cloneable, Comparable<CancelDelegationToken_args>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CancelDelegationToken_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new CancelDelegationToken_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new CancelDelegationToken_argsTupleSchemeFactory());
-    }
-
-    private TCancelDelegationTokenReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCancelDelegationTokenReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CancelDelegationToken_args.class, metaDataMap);
-    }
-
-    public CancelDelegationToken_args() {
-    }
-
-    public CancelDelegationToken_args(
-      TCancelDelegationTokenReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public CancelDelegationToken_args(CancelDelegationToken_args other) {
-      if (other.isSetReq()) {
-        this.req = new TCancelDelegationTokenReq(other.req);
-      }
-    }
-
-    public CancelDelegationToken_args deepCopy() {
-      return new CancelDelegationToken_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TCancelDelegationTokenReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TCancelDelegationTokenReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TCancelDelegationTokenReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof CancelDelegationToken_args)
-        return this.equals((CancelDelegationToken_args)that);
-      return false;
-    }
-
-    public boolean equals(CancelDelegationToken_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_req = true && (isSetReq());
-      list.add(present_req);
-      if (present_req)
-        list.add(req);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(CancelDelegationToken_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("CancelDelegationToken_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class CancelDelegationToken_argsStandardSchemeFactory implements SchemeFactory {
-      public CancelDelegationToken_argsStandardScheme getScheme() {
-        return new CancelDelegationToken_argsStandardScheme();
-      }
-    }
-
-    private static class CancelDelegationToken_argsStandardScheme extends StandardScheme<CancelDelegationToken_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, CancelDelegationToken_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TCancelDelegationTokenReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, CancelDelegationToken_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class CancelDelegationToken_argsTupleSchemeFactory implements SchemeFactory {
-      public CancelDelegationToken_argsTupleScheme getScheme() {
-        return new CancelDelegationToken_argsTupleScheme();
-      }
-    }
-
-    private static class CancelDelegationToken_argsTupleScheme extends TupleScheme<CancelDelegationToken_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, CancelDelegationToken_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, CancelDelegationToken_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TCancelDelegationTokenReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class CancelDelegationToken_result implements org.apache.thrift.TBase<CancelDelegationToken_result, CancelDelegationToken_result._Fields>, java.io.Serializable, Cloneable, Comparable<CancelDelegationToken_result>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("CancelDelegationToken_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new CancelDelegationToken_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new CancelDelegationToken_resultTupleSchemeFactory());
-    }
-
-    private TCancelDelegationTokenResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TCancelDelegationTokenResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(CancelDelegationToken_result.class, metaDataMap);
-    }
-
-    public CancelDelegationToken_result() {
-    }
-
-    public CancelDelegationToken_result(
-      TCancelDelegationTokenResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public CancelDelegationToken_result(CancelDelegationToken_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TCancelDelegationTokenResp(other.success);
-      }
-    }
-
-    public CancelDelegationToken_result deepCopy() {
-      return new CancelDelegationToken_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TCancelDelegationTokenResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TCancelDelegationTokenResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TCancelDelegationTokenResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof CancelDelegationToken_result)
-        return this.equals((CancelDelegationToken_result)that);
-      return false;
-    }
-
-    public boolean equals(CancelDelegationToken_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_success = true && (isSetSuccess());
-      list.add(present_success);
-      if (present_success)
-        list.add(success);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(CancelDelegationToken_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("CancelDelegationToken_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class CancelDelegationToken_resultStandardSchemeFactory implements SchemeFactory {
-      public CancelDelegationToken_resultStandardScheme getScheme() {
-        return new CancelDelegationToken_resultStandardScheme();
-      }
-    }
-
-    private static class CancelDelegationToken_resultStandardScheme extends StandardScheme<CancelDelegationToken_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, CancelDelegationToken_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TCancelDelegationTokenResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, CancelDelegationToken_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class CancelDelegationToken_resultTupleSchemeFactory implements SchemeFactory {
-      public CancelDelegationToken_resultTupleScheme getScheme() {
-        return new CancelDelegationToken_resultTupleScheme();
-      }
-    }
-
-    private static class CancelDelegationToken_resultTupleScheme extends TupleScheme<CancelDelegationToken_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, CancelDelegationToken_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, CancelDelegationToken_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TCancelDelegationTokenResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class RenewDelegationToken_args implements org.apache.thrift.TBase<RenewDelegationToken_args, RenewDelegationToken_args._Fields>, java.io.Serializable, Cloneable, Comparable<RenewDelegationToken_args>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("RenewDelegationToken_args");
-
-    private static final org.apache.thrift.protocol.TField REQ_FIELD_DESC = new org.apache.thrift.protocol.TField("req", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new RenewDelegationToken_argsStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new RenewDelegationToken_argsTupleSchemeFactory());
-    }
-
-    private TRenewDelegationTokenReq req; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      REQ((short)1, "req");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 1: // REQ
-            return REQ;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.REQ, new org.apache.thrift.meta_data.FieldMetaData("req", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TRenewDelegationTokenReq.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(RenewDelegationToken_args.class, metaDataMap);
-    }
-
-    public RenewDelegationToken_args() {
-    }
-
-    public RenewDelegationToken_args(
-      TRenewDelegationTokenReq req)
-    {
-      this();
-      this.req = req;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public RenewDelegationToken_args(RenewDelegationToken_args other) {
-      if (other.isSetReq()) {
-        this.req = new TRenewDelegationTokenReq(other.req);
-      }
-    }
-
-    public RenewDelegationToken_args deepCopy() {
-      return new RenewDelegationToken_args(this);
-    }
-
-    @Override
-    public void clear() {
-      this.req = null;
-    }
-
-    public TRenewDelegationTokenReq getReq() {
-      return this.req;
-    }
-
-    public void setReq(TRenewDelegationTokenReq req) {
-      this.req = req;
-    }
-
-    public void unsetReq() {
-      this.req = null;
-    }
-
-    /** Returns true if field req is set (has been assigned a value) and false otherwise */
-    public boolean isSetReq() {
-      return this.req != null;
-    }
-
-    public void setReqIsSet(boolean value) {
-      if (!value) {
-        this.req = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case REQ:
-        if (value == null) {
-          unsetReq();
-        } else {
-          setReq((TRenewDelegationTokenReq)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case REQ:
-        return getReq();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case REQ:
-        return isSetReq();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof RenewDelegationToken_args)
-        return this.equals((RenewDelegationToken_args)that);
-      return false;
-    }
-
-    public boolean equals(RenewDelegationToken_args that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_req = true && this.isSetReq();
-      boolean that_present_req = true && that.isSetReq();
-      if (this_present_req || that_present_req) {
-        if (!(this_present_req && that_present_req))
-          return false;
-        if (!this.req.equals(that.req))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_req = true && (isSetReq());
-      list.add(present_req);
-      if (present_req)
-        list.add(req);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(RenewDelegationToken_args other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetReq()).compareTo(other.isSetReq());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetReq()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.req, other.req);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-    }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("RenewDelegationToken_args(");
-      boolean first = true;
-
-      sb.append("req:");
-      if (this.req == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.req);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (req != null) {
-        req.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class RenewDelegationToken_argsStandardSchemeFactory implements SchemeFactory {
-      public RenewDelegationToken_argsStandardScheme getScheme() {
-        return new RenewDelegationToken_argsStandardScheme();
-      }
-    }
-
-    private static class RenewDelegationToken_argsStandardScheme extends StandardScheme<RenewDelegationToken_args> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, RenewDelegationToken_args struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 1: // REQ
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.req = new TRenewDelegationTokenReq();
-                struct.req.read(iprot);
-                struct.setReqIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, RenewDelegationToken_args struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.req != null) {
-          oprot.writeFieldBegin(REQ_FIELD_DESC);
-          struct.req.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class RenewDelegationToken_argsTupleSchemeFactory implements SchemeFactory {
-      public RenewDelegationToken_argsTupleScheme getScheme() {
-        return new RenewDelegationToken_argsTupleScheme();
-      }
-    }
-
-    private static class RenewDelegationToken_argsTupleScheme extends TupleScheme<RenewDelegationToken_args> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, RenewDelegationToken_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetReq()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetReq()) {
-          struct.req.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, RenewDelegationToken_args struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.req = new TRenewDelegationTokenReq();
-          struct.req.read(iprot);
-          struct.setReqIsSet(true);
-        }
-      }
-    }
-
-  }
-
-  public static class RenewDelegationToken_result implements org.apache.thrift.TBase<RenewDelegationToken_result, RenewDelegationToken_result._Fields>, java.io.Serializable, Cloneable, Comparable<RenewDelegationToken_result>   {
-    private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("RenewDelegationToken_result");
-
-    private static final org.apache.thrift.protocol.TField SUCCESS_FIELD_DESC = new org.apache.thrift.protocol.TField("success", org.apache.thrift.protocol.TType.STRUCT, (short)0);
-
-    private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-    static {
-      schemes.put(StandardScheme.class, new RenewDelegationToken_resultStandardSchemeFactory());
-      schemes.put(TupleScheme.class, new RenewDelegationToken_resultTupleSchemeFactory());
-    }
-
-    private TRenewDelegationTokenResp success; // required
-
-    /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-    public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-      SUCCESS((short)0, "success");
-
-      private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-      static {
-        for (_Fields field : EnumSet.allOf(_Fields.class)) {
-          byName.put(field.getFieldName(), field);
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, or null if its not found.
-       */
-      public static _Fields findByThriftId(int fieldId) {
-        switch(fieldId) {
-          case 0: // SUCCESS
-            return SUCCESS;
-          default:
-            return null;
-        }
-      }
-
-      /**
-       * Find the _Fields constant that matches fieldId, throwing an exception
-       * if it is not found.
-       */
-      public static _Fields findByThriftIdOrThrow(int fieldId) {
-        _Fields fields = findByThriftId(fieldId);
-        if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-        return fields;
-      }
-
-      /**
-       * Find the _Fields constant that matches name, or null if its not found.
-       */
-      public static _Fields findByName(String name) {
-        return byName.get(name);
-      }
-
-      private final short _thriftId;
-      private final String _fieldName;
-
-      _Fields(short thriftId, String fieldName) {
-        _thriftId = thriftId;
-        _fieldName = fieldName;
-      }
-
-      public short getThriftFieldId() {
-        return _thriftId;
-      }
-
-      public String getFieldName() {
-        return _fieldName;
-      }
-    }
-
-    // isset id assignments
-    public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-    static {
-      Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-      tmpMap.put(_Fields.SUCCESS, new org.apache.thrift.meta_data.FieldMetaData("success", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-          new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TRenewDelegationTokenResp.class)));
-      metaDataMap = Collections.unmodifiableMap(tmpMap);
-      org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(RenewDelegationToken_result.class, metaDataMap);
-    }
-
-    public RenewDelegationToken_result() {
-    }
-
-    public RenewDelegationToken_result(
-      TRenewDelegationTokenResp success)
-    {
-      this();
-      this.success = success;
-    }
-
-    /**
-     * Performs a deep copy on <i>other</i>.
-     */
-    public RenewDelegationToken_result(RenewDelegationToken_result other) {
-      if (other.isSetSuccess()) {
-        this.success = new TRenewDelegationTokenResp(other.success);
-      }
-    }
-
-    public RenewDelegationToken_result deepCopy() {
-      return new RenewDelegationToken_result(this);
-    }
-
-    @Override
-    public void clear() {
-      this.success = null;
-    }
-
-    public TRenewDelegationTokenResp getSuccess() {
-      return this.success;
-    }
-
-    public void setSuccess(TRenewDelegationTokenResp success) {
-      this.success = success;
-    }
-
-    public void unsetSuccess() {
-      this.success = null;
-    }
-
-    /** Returns true if field success is set (has been assigned a value) and false otherwise */
-    public boolean isSetSuccess() {
-      return this.success != null;
-    }
-
-    public void setSuccessIsSet(boolean value) {
-      if (!value) {
-        this.success = null;
-      }
-    }
-
-    public void setFieldValue(_Fields field, Object value) {
-      switch (field) {
-      case SUCCESS:
-        if (value == null) {
-          unsetSuccess();
-        } else {
-          setSuccess((TRenewDelegationTokenResp)value);
-        }
-        break;
-
-      }
-    }
-
-    public Object getFieldValue(_Fields field) {
-      switch (field) {
-      case SUCCESS:
-        return getSuccess();
-
-      }
-      throw new IllegalStateException();
-    }
-
-    /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-    public boolean isSet(_Fields field) {
-      if (field == null) {
-        throw new IllegalArgumentException();
-      }
-
-      switch (field) {
-      case SUCCESS:
-        return isSetSuccess();
-      }
-      throw new IllegalStateException();
-    }
-
-    @Override
-    public boolean equals(Object that) {
-      if (that == null)
-        return false;
-      if (that instanceof RenewDelegationToken_result)
-        return this.equals((RenewDelegationToken_result)that);
-      return false;
-    }
-
-    public boolean equals(RenewDelegationToken_result that) {
-      if (that == null)
-        return false;
-
-      boolean this_present_success = true && this.isSetSuccess();
-      boolean that_present_success = true && that.isSetSuccess();
-      if (this_present_success || that_present_success) {
-        if (!(this_present_success && that_present_success))
-          return false;
-        if (!this.success.equals(that.success))
-          return false;
-      }
-
-      return true;
-    }
-
-    @Override
-    public int hashCode() {
-      List<Object> list = new ArrayList<Object>();
-
-      boolean present_success = true && (isSetSuccess());
-      list.add(present_success);
-      if (present_success)
-        list.add(success);
-
-      return list.hashCode();
-    }
-
-    @Override
-    public int compareTo(RenewDelegationToken_result other) {
-      if (!getClass().equals(other.getClass())) {
-        return getClass().getName().compareTo(other.getClass().getName());
-      }
-
-      int lastComparison = 0;
-
-      lastComparison = Boolean.valueOf(isSetSuccess()).compareTo(other.isSetSuccess());
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-      if (isSetSuccess()) {
-        lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.success, other.success);
-        if (lastComparison != 0) {
-          return lastComparison;
-        }
-      }
-      return 0;
-    }
-
-    public _Fields fieldForId(int fieldId) {
-      return _Fields.findByThriftId(fieldId);
-    }
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-      schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-      schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-      }
-
-    @Override
-    public String toString() {
-      StringBuilder sb = new StringBuilder("RenewDelegationToken_result(");
-      boolean first = true;
-
-      sb.append("success:");
-      if (this.success == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.success);
-      }
-      first = false;
-      sb.append(")");
-      return sb.toString();
-    }
-
-    public void validate() throws org.apache.thrift.TException {
-      // check for required fields
-      // check for sub-struct validity
-      if (success != null) {
-        success.validate();
-      }
-    }
-
-    private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-      try {
-        write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-      try {
-        read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-      } catch (org.apache.thrift.TException te) {
-        throw new java.io.IOException(te);
-      }
-    }
-
-    private static class RenewDelegationToken_resultStandardSchemeFactory implements SchemeFactory {
-      public RenewDelegationToken_resultStandardScheme getScheme() {
-        return new RenewDelegationToken_resultStandardScheme();
-      }
-    }
-
-    private static class RenewDelegationToken_resultStandardScheme extends StandardScheme<RenewDelegationToken_result> {
-
-      public void read(org.apache.thrift.protocol.TProtocol iprot, RenewDelegationToken_result struct) throws org.apache.thrift.TException {
-        org.apache.thrift.protocol.TField schemeField;
-        iprot.readStructBegin();
-        while (true)
-        {
-          schemeField = iprot.readFieldBegin();
-          if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-            break;
-          }
-          switch (schemeField.id) {
-            case 0: // SUCCESS
-              if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-                struct.success = new TRenewDelegationTokenResp();
-                struct.success.read(iprot);
-                struct.setSuccessIsSet(true);
-              } else { 
-                org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-              }
-              break;
-            default:
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-          }
-          iprot.readFieldEnd();
-        }
-        iprot.readStructEnd();
-        struct.validate();
-      }
-
-      public void write(org.apache.thrift.protocol.TProtocol oprot, RenewDelegationToken_result struct) throws org.apache.thrift.TException {
-        struct.validate();
-
-        oprot.writeStructBegin(STRUCT_DESC);
-        if (struct.success != null) {
-          oprot.writeFieldBegin(SUCCESS_FIELD_DESC);
-          struct.success.write(oprot);
-          oprot.writeFieldEnd();
-        }
-        oprot.writeFieldStop();
-        oprot.writeStructEnd();
-      }
-
-    }
-
-    private static class RenewDelegationToken_resultTupleSchemeFactory implements SchemeFactory {
-      public RenewDelegationToken_resultTupleScheme getScheme() {
-        return new RenewDelegationToken_resultTupleScheme();
-      }
-    }
-
-    private static class RenewDelegationToken_resultTupleScheme extends TupleScheme<RenewDelegationToken_result> {
-
-      @Override
-      public void write(org.apache.thrift.protocol.TProtocol prot, RenewDelegationToken_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol oprot = (TTupleProtocol) prot;
-        BitSet optionals = new BitSet();
-        if (struct.isSetSuccess()) {
-          optionals.set(0);
-        }
-        oprot.writeBitSet(optionals, 1);
-        if (struct.isSetSuccess()) {
-          struct.success.write(oprot);
-        }
-      }
-
-      @Override
-      public void read(org.apache.thrift.protocol.TProtocol prot, RenewDelegationToken_result struct) throws org.apache.thrift.TException {
-        TTupleProtocol iprot = (TTupleProtocol) prot;
-        BitSet incoming = iprot.readBitSet(1);
-        if (incoming.get(0)) {
-          struct.success = new TRenewDelegationTokenResp();
-          struct.success.read(iprot);
-          struct.setSuccessIsSet(true);
-        }
-      }
-    }
-
-  }
-
-}
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIServiceConstants.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIServiceConstants.java
deleted file mode 100644
index 930bed731ed2a..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCLIServiceConstants.java
+++ /dev/null
@@ -1,106 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-public class TCLIServiceConstants {
-
-  public static final Set<TTypeId> PRIMITIVE_TYPES = new HashSet<TTypeId>();
-  static {
-    PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.BOOLEAN_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.TINYINT_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.SMALLINT_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.INT_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.BIGINT_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.FLOAT_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.DOUBLE_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.STRING_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.TIMESTAMP_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.BINARY_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.DECIMAL_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.NULL_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.DATE_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.VARCHAR_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.CHAR_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.INTERVAL_YEAR_MONTH_TYPE);
-    PRIMITIVE_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.INTERVAL_DAY_TIME_TYPE);
-  }
-
-  public static final Set<TTypeId> COMPLEX_TYPES = new HashSet<TTypeId>();
-  static {
-    COMPLEX_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.ARRAY_TYPE);
-    COMPLEX_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.MAP_TYPE);
-    COMPLEX_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.STRUCT_TYPE);
-    COMPLEX_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.UNION_TYPE);
-    COMPLEX_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.USER_DEFINED_TYPE);
-  }
-
-  public static final Set<TTypeId> COLLECTION_TYPES = new HashSet<TTypeId>();
-  static {
-    COLLECTION_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.ARRAY_TYPE);
-    COLLECTION_TYPES.add(org.apache.hive.service.rpc.thrift.TTypeId.MAP_TYPE);
-  }
-
-  public static final Map<TTypeId,String> TYPE_NAMES = new HashMap<TTypeId,String>();
-  static {
-    TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.BOOLEAN_TYPE, "BOOLEAN");
-    TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.TINYINT_TYPE, "TINYINT");
-    TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.SMALLINT_TYPE, "SMALLINT");
-    TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.INT_TYPE, "INT");
-    TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.BIGINT_TYPE, "BIGINT");
-    TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.FLOAT_TYPE, "FLOAT");
-    TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.DOUBLE_TYPE, "DOUBLE");
-    TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.STRING_TYPE, "STRING");
-    TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.TIMESTAMP_TYPE, "TIMESTAMP");
-    TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.BINARY_TYPE, "BINARY");
-    TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.ARRAY_TYPE, "ARRAY");
-    TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.MAP_TYPE, "MAP");
-    TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.STRUCT_TYPE, "STRUCT");
-    TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.UNION_TYPE, "UNIONTYPE");
-    TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.DECIMAL_TYPE, "DECIMAL");
-    TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.NULL_TYPE, "NULL");
-    TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.DATE_TYPE, "DATE");
-    TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.VARCHAR_TYPE, "VARCHAR");
-    TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.CHAR_TYPE, "CHAR");
-    TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.INTERVAL_YEAR_MONTH_TYPE, "INTERVAL_YEAR_MONTH");
-    TYPE_NAMES.put(org.apache.hive.service.rpc.thrift.TTypeId.INTERVAL_DAY_TIME_TYPE, "INTERVAL_DAY_TIME");
-  }
-
-  public static final String CHARACTER_MAXIMUM_LENGTH = "characterMaximumLength";
-
-  public static final String PRECISION = "precision";
-
-  public static final String SCALE = "scale";
-
-}
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenReq.java
deleted file mode 100644
index a7d4e7de1f60d..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenReq.java
+++ /dev/null
@@ -1,495 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TCancelDelegationTokenReq implements org.apache.thrift.TBase<TCancelDelegationTokenReq, TCancelDelegationTokenReq._Fields>, java.io.Serializable, Cloneable, Comparable<TCancelDelegationTokenReq> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCancelDelegationTokenReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField DELEGATION_TOKEN_FIELD_DESC = new org.apache.thrift.protocol.TField("delegationToken", org.apache.thrift.protocol.TType.STRING, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TCancelDelegationTokenReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TCancelDelegationTokenReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-  private String delegationToken; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle"),
-    DELEGATION_TOKEN((short)2, "delegationToken");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        case 2: // DELEGATION_TOKEN
-          return DELEGATION_TOKEN;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    tmpMap.put(_Fields.DELEGATION_TOKEN, new org.apache.thrift.meta_data.FieldMetaData("delegationToken", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCancelDelegationTokenReq.class, metaDataMap);
-  }
-
-  public TCancelDelegationTokenReq() {
-  }
-
-  public TCancelDelegationTokenReq(
-    TSessionHandle sessionHandle,
-    String delegationToken)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-    this.delegationToken = delegationToken;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TCancelDelegationTokenReq(TCancelDelegationTokenReq other) {
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-    if (other.isSetDelegationToken()) {
-      this.delegationToken = other.delegationToken;
-    }
-  }
-
-  public TCancelDelegationTokenReq deepCopy() {
-    return new TCancelDelegationTokenReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-    this.delegationToken = null;
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public String getDelegationToken() {
-    return this.delegationToken;
-  }
-
-  public void setDelegationToken(String delegationToken) {
-    this.delegationToken = delegationToken;
-  }
-
-  public void unsetDelegationToken() {
-    this.delegationToken = null;
-  }
-
-  /** Returns true if field delegationToken is set (has been assigned a value) and false otherwise */
-  public boolean isSetDelegationToken() {
-    return this.delegationToken != null;
-  }
-
-  public void setDelegationTokenIsSet(boolean value) {
-    if (!value) {
-      this.delegationToken = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    case DELEGATION_TOKEN:
-      if (value == null) {
-        unsetDelegationToken();
-      } else {
-        setDelegationToken((String)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    case DELEGATION_TOKEN:
-      return getDelegationToken();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    case DELEGATION_TOKEN:
-      return isSetDelegationToken();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TCancelDelegationTokenReq)
-      return this.equals((TCancelDelegationTokenReq)that);
-    return false;
-  }
-
-  public boolean equals(TCancelDelegationTokenReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    boolean this_present_delegationToken = true && this.isSetDelegationToken();
-    boolean that_present_delegationToken = true && that.isSetDelegationToken();
-    if (this_present_delegationToken || that_present_delegationToken) {
-      if (!(this_present_delegationToken && that_present_delegationToken))
-        return false;
-      if (!this.delegationToken.equals(that.delegationToken))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    list.add(present_sessionHandle);
-    if (present_sessionHandle)
-      list.add(sessionHandle);
-
-    boolean present_delegationToken = true && (isSetDelegationToken());
-    list.add(present_delegationToken);
-    if (present_delegationToken)
-      list.add(delegationToken);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TCancelDelegationTokenReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetDelegationToken()).compareTo(other.isSetDelegationToken());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetDelegationToken()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.delegationToken, other.delegationToken);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TCancelDelegationTokenReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("delegationToken:");
-    if (this.delegationToken == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.delegationToken);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    if (!isSetDelegationToken()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'delegationToken' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TCancelDelegationTokenReqStandardSchemeFactory implements SchemeFactory {
-    public TCancelDelegationTokenReqStandardScheme getScheme() {
-      return new TCancelDelegationTokenReqStandardScheme();
-    }
-  }
-
-  private static class TCancelDelegationTokenReqStandardScheme extends StandardScheme<TCancelDelegationTokenReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TCancelDelegationTokenReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // DELEGATION_TOKEN
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.delegationToken = iprot.readString();
-              struct.setDelegationTokenIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TCancelDelegationTokenReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.delegationToken != null) {
-        oprot.writeFieldBegin(DELEGATION_TOKEN_FIELD_DESC);
-        oprot.writeString(struct.delegationToken);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TCancelDelegationTokenReqTupleSchemeFactory implements SchemeFactory {
-    public TCancelDelegationTokenReqTupleScheme getScheme() {
-      return new TCancelDelegationTokenReqTupleScheme();
-    }
-  }
-
-  private static class TCancelDelegationTokenReqTupleScheme extends TupleScheme<TCancelDelegationTokenReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TCancelDelegationTokenReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-      oprot.writeString(struct.delegationToken);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TCancelDelegationTokenReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-      struct.delegationToken = iprot.readString();
-      struct.setDelegationTokenIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenResp.java
deleted file mode 100644
index 611e92ca2af30..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelDelegationTokenResp.java
+++ /dev/null
@@ -1,394 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TCancelDelegationTokenResp implements org.apache.thrift.TBase<TCancelDelegationTokenResp, TCancelDelegationTokenResp._Fields>, java.io.Serializable, Cloneable, Comparable<TCancelDelegationTokenResp> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCancelDelegationTokenResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TCancelDelegationTokenRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TCancelDelegationTokenRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCancelDelegationTokenResp.class, metaDataMap);
-  }
-
-  public TCancelDelegationTokenResp() {
-  }
-
-  public TCancelDelegationTokenResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TCancelDelegationTokenResp(TCancelDelegationTokenResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-  }
-
-  public TCancelDelegationTokenResp deepCopy() {
-    return new TCancelDelegationTokenResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TCancelDelegationTokenResp)
-      return this.equals((TCancelDelegationTokenResp)that);
-    return false;
-  }
-
-  public boolean equals(TCancelDelegationTokenResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_status = true && (isSetStatus());
-    list.add(present_status);
-    if (present_status)
-      list.add(status);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TCancelDelegationTokenResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TCancelDelegationTokenResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TCancelDelegationTokenRespStandardSchemeFactory implements SchemeFactory {
-    public TCancelDelegationTokenRespStandardScheme getScheme() {
-      return new TCancelDelegationTokenRespStandardScheme();
-    }
-  }
-
-  private static class TCancelDelegationTokenRespStandardScheme extends StandardScheme<TCancelDelegationTokenResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TCancelDelegationTokenResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TCancelDelegationTokenResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TCancelDelegationTokenRespTupleSchemeFactory implements SchemeFactory {
-    public TCancelDelegationTokenRespTupleScheme getScheme() {
-      return new TCancelDelegationTokenRespTupleScheme();
-    }
-  }
-
-  private static class TCancelDelegationTokenRespTupleScheme extends TupleScheme<TCancelDelegationTokenResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TCancelDelegationTokenResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TCancelDelegationTokenResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationReq.java
deleted file mode 100644
index 4076c573fafb7..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationReq.java
+++ /dev/null
@@ -1,394 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TCancelOperationReq implements org.apache.thrift.TBase<TCancelOperationReq, TCancelOperationReq._Fields>, java.io.Serializable, Cloneable, Comparable<TCancelOperationReq> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCancelOperationReq");
-
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TCancelOperationReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TCancelOperationReqTupleSchemeFactory());
-  }
-
-  private TOperationHandle operationHandle; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    OPERATION_HANDLE((short)1, "operationHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCancelOperationReq.class, metaDataMap);
-  }
-
-  public TCancelOperationReq() {
-  }
-
-  public TCancelOperationReq(
-    TOperationHandle operationHandle)
-  {
-    this();
-    this.operationHandle = operationHandle;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TCancelOperationReq(TCancelOperationReq other) {
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-  }
-
-  public TCancelOperationReq deepCopy() {
-    return new TCancelOperationReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.operationHandle = null;
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TCancelOperationReq)
-      return this.equals((TCancelOperationReq)that);
-    return false;
-  }
-
-  public boolean equals(TCancelOperationReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    list.add(present_operationHandle);
-    if (present_operationHandle)
-      list.add(operationHandle);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TCancelOperationReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TCancelOperationReq(");
-    boolean first = true;
-
-    sb.append("operationHandle:");
-    if (this.operationHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.operationHandle);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetOperationHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationHandle' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TCancelOperationReqStandardSchemeFactory implements SchemeFactory {
-    public TCancelOperationReqStandardScheme getScheme() {
-      return new TCancelOperationReqStandardScheme();
-    }
-  }
-
-  private static class TCancelOperationReqStandardScheme extends StandardScheme<TCancelOperationReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TCancelOperationReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TCancelOperationReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.operationHandle != null) {
-        oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-        struct.operationHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TCancelOperationReqTupleSchemeFactory implements SchemeFactory {
-    public TCancelOperationReqTupleScheme getScheme() {
-      return new TCancelOperationReqTupleScheme();
-    }
-  }
-
-  private static class TCancelOperationReqTupleScheme extends TupleScheme<TCancelOperationReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TCancelOperationReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.operationHandle.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TCancelOperationReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.operationHandle = new TOperationHandle();
-      struct.operationHandle.read(iprot);
-      struct.setOperationHandleIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationResp.java
deleted file mode 100644
index 7bcc765c85daa..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCancelOperationResp.java
+++ /dev/null
@@ -1,394 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TCancelOperationResp implements org.apache.thrift.TBase<TCancelOperationResp, TCancelOperationResp._Fields>, java.io.Serializable, Cloneable, Comparable<TCancelOperationResp> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCancelOperationResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TCancelOperationRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TCancelOperationRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCancelOperationResp.class, metaDataMap);
-  }
-
-  public TCancelOperationResp() {
-  }
-
-  public TCancelOperationResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TCancelOperationResp(TCancelOperationResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-  }
-
-  public TCancelOperationResp deepCopy() {
-    return new TCancelOperationResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TCancelOperationResp)
-      return this.equals((TCancelOperationResp)that);
-    return false;
-  }
-
-  public boolean equals(TCancelOperationResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_status = true && (isSetStatus());
-    list.add(present_status);
-    if (present_status)
-      list.add(status);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TCancelOperationResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TCancelOperationResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TCancelOperationRespStandardSchemeFactory implements SchemeFactory {
-    public TCancelOperationRespStandardScheme getScheme() {
-      return new TCancelOperationRespStandardScheme();
-    }
-  }
-
-  private static class TCancelOperationRespStandardScheme extends StandardScheme<TCancelOperationResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TCancelOperationResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TCancelOperationResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TCancelOperationRespTupleSchemeFactory implements SchemeFactory {
-    public TCancelOperationRespTupleScheme getScheme() {
-      return new TCancelOperationRespTupleScheme();
-    }
-  }
-
-  private static class TCancelOperationRespTupleScheme extends TupleScheme<TCancelOperationResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TCancelOperationResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TCancelOperationResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationReq.java
deleted file mode 100644
index 47a6b8329c05b..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationReq.java
+++ /dev/null
@@ -1,394 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TCloseOperationReq implements org.apache.thrift.TBase<TCloseOperationReq, TCloseOperationReq._Fields>, java.io.Serializable, Cloneable, Comparable<TCloseOperationReq> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCloseOperationReq");
-
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TCloseOperationReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TCloseOperationReqTupleSchemeFactory());
-  }
-
-  private TOperationHandle operationHandle; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    OPERATION_HANDLE((short)1, "operationHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCloseOperationReq.class, metaDataMap);
-  }
-
-  public TCloseOperationReq() {
-  }
-
-  public TCloseOperationReq(
-    TOperationHandle operationHandle)
-  {
-    this();
-    this.operationHandle = operationHandle;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TCloseOperationReq(TCloseOperationReq other) {
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-  }
-
-  public TCloseOperationReq deepCopy() {
-    return new TCloseOperationReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.operationHandle = null;
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TCloseOperationReq)
-      return this.equals((TCloseOperationReq)that);
-    return false;
-  }
-
-  public boolean equals(TCloseOperationReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    list.add(present_operationHandle);
-    if (present_operationHandle)
-      list.add(operationHandle);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TCloseOperationReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TCloseOperationReq(");
-    boolean first = true;
-
-    sb.append("operationHandle:");
-    if (this.operationHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.operationHandle);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetOperationHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationHandle' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TCloseOperationReqStandardSchemeFactory implements SchemeFactory {
-    public TCloseOperationReqStandardScheme getScheme() {
-      return new TCloseOperationReqStandardScheme();
-    }
-  }
-
-  private static class TCloseOperationReqStandardScheme extends StandardScheme<TCloseOperationReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TCloseOperationReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TCloseOperationReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.operationHandle != null) {
-        oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-        struct.operationHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TCloseOperationReqTupleSchemeFactory implements SchemeFactory {
-    public TCloseOperationReqTupleScheme getScheme() {
-      return new TCloseOperationReqTupleScheme();
-    }
-  }
-
-  private static class TCloseOperationReqTupleScheme extends TupleScheme<TCloseOperationReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TCloseOperationReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.operationHandle.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TCloseOperationReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.operationHandle = new TOperationHandle();
-      struct.operationHandle.read(iprot);
-      struct.setOperationHandleIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationResp.java
deleted file mode 100644
index 0860a2b1c5bac..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseOperationResp.java
+++ /dev/null
@@ -1,394 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TCloseOperationResp implements org.apache.thrift.TBase<TCloseOperationResp, TCloseOperationResp._Fields>, java.io.Serializable, Cloneable, Comparable<TCloseOperationResp> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCloseOperationResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TCloseOperationRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TCloseOperationRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCloseOperationResp.class, metaDataMap);
-  }
-
-  public TCloseOperationResp() {
-  }
-
-  public TCloseOperationResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TCloseOperationResp(TCloseOperationResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-  }
-
-  public TCloseOperationResp deepCopy() {
-    return new TCloseOperationResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TCloseOperationResp)
-      return this.equals((TCloseOperationResp)that);
-    return false;
-  }
-
-  public boolean equals(TCloseOperationResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_status = true && (isSetStatus());
-    list.add(present_status);
-    if (present_status)
-      list.add(status);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TCloseOperationResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TCloseOperationResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TCloseOperationRespStandardSchemeFactory implements SchemeFactory {
-    public TCloseOperationRespStandardScheme getScheme() {
-      return new TCloseOperationRespStandardScheme();
-    }
-  }
-
-  private static class TCloseOperationRespStandardScheme extends StandardScheme<TCloseOperationResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TCloseOperationResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TCloseOperationResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TCloseOperationRespTupleSchemeFactory implements SchemeFactory {
-    public TCloseOperationRespTupleScheme getScheme() {
-      return new TCloseOperationRespTupleScheme();
-    }
-  }
-
-  private static class TCloseOperationRespTupleScheme extends TupleScheme<TCloseOperationResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TCloseOperationResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TCloseOperationResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionReq.java
deleted file mode 100644
index 43ee87f487a67..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionReq.java
+++ /dev/null
@@ -1,394 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TCloseSessionReq implements org.apache.thrift.TBase<TCloseSessionReq, TCloseSessionReq._Fields>, java.io.Serializable, Cloneable, Comparable<TCloseSessionReq> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCloseSessionReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TCloseSessionReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TCloseSessionReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCloseSessionReq.class, metaDataMap);
-  }
-
-  public TCloseSessionReq() {
-  }
-
-  public TCloseSessionReq(
-    TSessionHandle sessionHandle)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TCloseSessionReq(TCloseSessionReq other) {
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-  }
-
-  public TCloseSessionReq deepCopy() {
-    return new TCloseSessionReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TCloseSessionReq)
-      return this.equals((TCloseSessionReq)that);
-    return false;
-  }
-
-  public boolean equals(TCloseSessionReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    list.add(present_sessionHandle);
-    if (present_sessionHandle)
-      list.add(sessionHandle);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TCloseSessionReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TCloseSessionReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TCloseSessionReqStandardSchemeFactory implements SchemeFactory {
-    public TCloseSessionReqStandardScheme getScheme() {
-      return new TCloseSessionReqStandardScheme();
-    }
-  }
-
-  private static class TCloseSessionReqStandardScheme extends StandardScheme<TCloseSessionReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TCloseSessionReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TCloseSessionReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TCloseSessionReqTupleSchemeFactory implements SchemeFactory {
-    public TCloseSessionReqTupleScheme getScheme() {
-      return new TCloseSessionReqTupleScheme();
-    }
-  }
-
-  private static class TCloseSessionReqTupleScheme extends TupleScheme<TCloseSessionReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TCloseSessionReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TCloseSessionReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionResp.java
deleted file mode 100644
index 38f82ac8d3cd2..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TCloseSessionResp.java
+++ /dev/null
@@ -1,394 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TCloseSessionResp implements org.apache.thrift.TBase<TCloseSessionResp, TCloseSessionResp._Fields>, java.io.Serializable, Cloneable, Comparable<TCloseSessionResp> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TCloseSessionResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TCloseSessionRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TCloseSessionRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TCloseSessionResp.class, metaDataMap);
-  }
-
-  public TCloseSessionResp() {
-  }
-
-  public TCloseSessionResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TCloseSessionResp(TCloseSessionResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-  }
-
-  public TCloseSessionResp deepCopy() {
-    return new TCloseSessionResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TCloseSessionResp)
-      return this.equals((TCloseSessionResp)that);
-    return false;
-  }
-
-  public boolean equals(TCloseSessionResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_status = true && (isSetStatus());
-    list.add(present_status);
-    if (present_status)
-      list.add(status);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TCloseSessionResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TCloseSessionResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TCloseSessionRespStandardSchemeFactory implements SchemeFactory {
-    public TCloseSessionRespStandardScheme getScheme() {
-      return new TCloseSessionRespStandardScheme();
-    }
-  }
-
-  private static class TCloseSessionRespStandardScheme extends StandardScheme<TCloseSessionResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TCloseSessionResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TCloseSessionResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TCloseSessionRespTupleSchemeFactory implements SchemeFactory {
-    public TCloseSessionRespTupleScheme getScheme() {
-      return new TCloseSessionRespTupleScheme();
-    }
-  }
-
-  private static class TCloseSessionRespTupleScheme extends TupleScheme<TCloseSessionResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TCloseSessionResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TCloseSessionResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumn.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumn.java
deleted file mode 100644
index dd79482200961..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumn.java
+++ /dev/null
@@ -1,736 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-public class TColumn extends org.apache.thrift.TUnion<TColumn, TColumn._Fields> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TColumn");
-  private static final org.apache.thrift.protocol.TField BOOL_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("boolVal", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField BYTE_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("byteVal", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-  private static final org.apache.thrift.protocol.TField I16_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i16Val", org.apache.thrift.protocol.TType.STRUCT, (short)3);
-  private static final org.apache.thrift.protocol.TField I32_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i32Val", org.apache.thrift.protocol.TType.STRUCT, (short)4);
-  private static final org.apache.thrift.protocol.TField I64_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i64Val", org.apache.thrift.protocol.TType.STRUCT, (short)5);
-  private static final org.apache.thrift.protocol.TField DOUBLE_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("doubleVal", org.apache.thrift.protocol.TType.STRUCT, (short)6);
-  private static final org.apache.thrift.protocol.TField STRING_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("stringVal", org.apache.thrift.protocol.TType.STRUCT, (short)7);
-  private static final org.apache.thrift.protocol.TField BINARY_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("binaryVal", org.apache.thrift.protocol.TType.STRUCT, (short)8);
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    BOOL_VAL((short)1, "boolVal"),
-    BYTE_VAL((short)2, "byteVal"),
-    I16_VAL((short)3, "i16Val"),
-    I32_VAL((short)4, "i32Val"),
-    I64_VAL((short)5, "i64Val"),
-    DOUBLE_VAL((short)6, "doubleVal"),
-    STRING_VAL((short)7, "stringVal"),
-    BINARY_VAL((short)8, "binaryVal");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // BOOL_VAL
-          return BOOL_VAL;
-        case 2: // BYTE_VAL
-          return BYTE_VAL;
-        case 3: // I16_VAL
-          return I16_VAL;
-        case 4: // I32_VAL
-          return I32_VAL;
-        case 5: // I64_VAL
-          return I64_VAL;
-        case 6: // DOUBLE_VAL
-          return DOUBLE_VAL;
-        case 7: // STRING_VAL
-          return STRING_VAL;
-        case 8: // BINARY_VAL
-          return BINARY_VAL;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.BOOL_VAL, new org.apache.thrift.meta_data.FieldMetaData("boolVal", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TBoolColumn.class)));
-    tmpMap.put(_Fields.BYTE_VAL, new org.apache.thrift.meta_data.FieldMetaData("byteVal", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TByteColumn.class)));
-    tmpMap.put(_Fields.I16_VAL, new org.apache.thrift.meta_data.FieldMetaData("i16Val", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI16Column.class)));
-    tmpMap.put(_Fields.I32_VAL, new org.apache.thrift.meta_data.FieldMetaData("i32Val", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI32Column.class)));
-    tmpMap.put(_Fields.I64_VAL, new org.apache.thrift.meta_data.FieldMetaData("i64Val", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI64Column.class)));
-    tmpMap.put(_Fields.DOUBLE_VAL, new org.apache.thrift.meta_data.FieldMetaData("doubleVal", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TDoubleColumn.class)));
-    tmpMap.put(_Fields.STRING_VAL, new org.apache.thrift.meta_data.FieldMetaData("stringVal", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStringColumn.class)));
-    tmpMap.put(_Fields.BINARY_VAL, new org.apache.thrift.meta_data.FieldMetaData("binaryVal", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TBinaryColumn.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TColumn.class, metaDataMap);
-  }
-
-  public TColumn() {
-    super();
-  }
-
-  public TColumn(TColumn._Fields setField, Object value) {
-    super(setField, value);
-  }
-
-  public TColumn(TColumn other) {
-    super(other);
-  }
-  public TColumn deepCopy() {
-    return new TColumn(this);
-  }
-
-  public static TColumn boolVal(TBoolColumn value) {
-    TColumn x = new TColumn();
-    x.setBoolVal(value);
-    return x;
-  }
-
-  public static TColumn byteVal(TByteColumn value) {
-    TColumn x = new TColumn();
-    x.setByteVal(value);
-    return x;
-  }
-
-  public static TColumn i16Val(TI16Column value) {
-    TColumn x = new TColumn();
-    x.setI16Val(value);
-    return x;
-  }
-
-  public static TColumn i32Val(TI32Column value) {
-    TColumn x = new TColumn();
-    x.setI32Val(value);
-    return x;
-  }
-
-  public static TColumn i64Val(TI64Column value) {
-    TColumn x = new TColumn();
-    x.setI64Val(value);
-    return x;
-  }
-
-  public static TColumn doubleVal(TDoubleColumn value) {
-    TColumn x = new TColumn();
-    x.setDoubleVal(value);
-    return x;
-  }
-
-  public static TColumn stringVal(TStringColumn value) {
-    TColumn x = new TColumn();
-    x.setStringVal(value);
-    return x;
-  }
-
-  public static TColumn binaryVal(TBinaryColumn value) {
-    TColumn x = new TColumn();
-    x.setBinaryVal(value);
-    return x;
-  }
-
-
-  @Override
-  protected void checkType(_Fields setField, Object value) throws ClassCastException {
-    switch (setField) {
-      case BOOL_VAL:
-        if (value instanceof TBoolColumn) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TBoolColumn for field 'boolVal', but got " + value.getClass().getSimpleName());
-      case BYTE_VAL:
-        if (value instanceof TByteColumn) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TByteColumn for field 'byteVal', but got " + value.getClass().getSimpleName());
-      case I16_VAL:
-        if (value instanceof TI16Column) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TI16Column for field 'i16Val', but got " + value.getClass().getSimpleName());
-      case I32_VAL:
-        if (value instanceof TI32Column) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TI32Column for field 'i32Val', but got " + value.getClass().getSimpleName());
-      case I64_VAL:
-        if (value instanceof TI64Column) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TI64Column for field 'i64Val', but got " + value.getClass().getSimpleName());
-      case DOUBLE_VAL:
-        if (value instanceof TDoubleColumn) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TDoubleColumn for field 'doubleVal', but got " + value.getClass().getSimpleName());
-      case STRING_VAL:
-        if (value instanceof TStringColumn) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TStringColumn for field 'stringVal', but got " + value.getClass().getSimpleName());
-      case BINARY_VAL:
-        if (value instanceof TBinaryColumn) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TBinaryColumn for field 'binaryVal', but got " + value.getClass().getSimpleName());
-      default:
-        throw new IllegalArgumentException("Unknown field id " + setField);
-    }
-  }
-
-  @Override
-  protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TField field) throws org.apache.thrift.TException {
-    _Fields setField = _Fields.findByThriftId(field.id);
-    if (setField != null) {
-      switch (setField) {
-        case BOOL_VAL:
-          if (field.type == BOOL_VAL_FIELD_DESC.type) {
-            TBoolColumn boolVal;
-            boolVal = new TBoolColumn();
-            boolVal.read(iprot);
-            return boolVal;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case BYTE_VAL:
-          if (field.type == BYTE_VAL_FIELD_DESC.type) {
-            TByteColumn byteVal;
-            byteVal = new TByteColumn();
-            byteVal.read(iprot);
-            return byteVal;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case I16_VAL:
-          if (field.type == I16_VAL_FIELD_DESC.type) {
-            TI16Column i16Val;
-            i16Val = new TI16Column();
-            i16Val.read(iprot);
-            return i16Val;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case I32_VAL:
-          if (field.type == I32_VAL_FIELD_DESC.type) {
-            TI32Column i32Val;
-            i32Val = new TI32Column();
-            i32Val.read(iprot);
-            return i32Val;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case I64_VAL:
-          if (field.type == I64_VAL_FIELD_DESC.type) {
-            TI64Column i64Val;
-            i64Val = new TI64Column();
-            i64Val.read(iprot);
-            return i64Val;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case DOUBLE_VAL:
-          if (field.type == DOUBLE_VAL_FIELD_DESC.type) {
-            TDoubleColumn doubleVal;
-            doubleVal = new TDoubleColumn();
-            doubleVal.read(iprot);
-            return doubleVal;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case STRING_VAL:
-          if (field.type == STRING_VAL_FIELD_DESC.type) {
-            TStringColumn stringVal;
-            stringVal = new TStringColumn();
-            stringVal.read(iprot);
-            return stringVal;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case BINARY_VAL:
-          if (field.type == BINARY_VAL_FIELD_DESC.type) {
-            TBinaryColumn binaryVal;
-            binaryVal = new TBinaryColumn();
-            binaryVal.read(iprot);
-            return binaryVal;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        default:
-          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
-      }
-    } else {
-      org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-      return null;
-    }
-  }
-
-  @Override
-  protected void standardSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    switch (setField_) {
-      case BOOL_VAL:
-        TBoolColumn boolVal = (TBoolColumn)value_;
-        boolVal.write(oprot);
-        return;
-      case BYTE_VAL:
-        TByteColumn byteVal = (TByteColumn)value_;
-        byteVal.write(oprot);
-        return;
-      case I16_VAL:
-        TI16Column i16Val = (TI16Column)value_;
-        i16Val.write(oprot);
-        return;
-      case I32_VAL:
-        TI32Column i32Val = (TI32Column)value_;
-        i32Val.write(oprot);
-        return;
-      case I64_VAL:
-        TI64Column i64Val = (TI64Column)value_;
-        i64Val.write(oprot);
-        return;
-      case DOUBLE_VAL:
-        TDoubleColumn doubleVal = (TDoubleColumn)value_;
-        doubleVal.write(oprot);
-        return;
-      case STRING_VAL:
-        TStringColumn stringVal = (TStringColumn)value_;
-        stringVal.write(oprot);
-        return;
-      case BINARY_VAL:
-        TBinaryColumn binaryVal = (TBinaryColumn)value_;
-        binaryVal.write(oprot);
-        return;
-      default:
-        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
-    }
-  }
-
-  @Override
-  protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, short fieldID) throws org.apache.thrift.TException {
-    _Fields setField = _Fields.findByThriftId(fieldID);
-    if (setField != null) {
-      switch (setField) {
-        case BOOL_VAL:
-          TBoolColumn boolVal;
-          boolVal = new TBoolColumn();
-          boolVal.read(iprot);
-          return boolVal;
-        case BYTE_VAL:
-          TByteColumn byteVal;
-          byteVal = new TByteColumn();
-          byteVal.read(iprot);
-          return byteVal;
-        case I16_VAL:
-          TI16Column i16Val;
-          i16Val = new TI16Column();
-          i16Val.read(iprot);
-          return i16Val;
-        case I32_VAL:
-          TI32Column i32Val;
-          i32Val = new TI32Column();
-          i32Val.read(iprot);
-          return i32Val;
-        case I64_VAL:
-          TI64Column i64Val;
-          i64Val = new TI64Column();
-          i64Val.read(iprot);
-          return i64Val;
-        case DOUBLE_VAL:
-          TDoubleColumn doubleVal;
-          doubleVal = new TDoubleColumn();
-          doubleVal.read(iprot);
-          return doubleVal;
-        case STRING_VAL:
-          TStringColumn stringVal;
-          stringVal = new TStringColumn();
-          stringVal.read(iprot);
-          return stringVal;
-        case BINARY_VAL:
-          TBinaryColumn binaryVal;
-          binaryVal = new TBinaryColumn();
-          binaryVal.read(iprot);
-          return binaryVal;
-        default:
-          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
-      }
-    } else {
-      throw new TProtocolException("Couldn't find a field with field id " + fieldID);
-    }
-  }
-
-  @Override
-  protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    switch (setField_) {
-      case BOOL_VAL:
-        TBoolColumn boolVal = (TBoolColumn)value_;
-        boolVal.write(oprot);
-        return;
-      case BYTE_VAL:
-        TByteColumn byteVal = (TByteColumn)value_;
-        byteVal.write(oprot);
-        return;
-      case I16_VAL:
-        TI16Column i16Val = (TI16Column)value_;
-        i16Val.write(oprot);
-        return;
-      case I32_VAL:
-        TI32Column i32Val = (TI32Column)value_;
-        i32Val.write(oprot);
-        return;
-      case I64_VAL:
-        TI64Column i64Val = (TI64Column)value_;
-        i64Val.write(oprot);
-        return;
-      case DOUBLE_VAL:
-        TDoubleColumn doubleVal = (TDoubleColumn)value_;
-        doubleVal.write(oprot);
-        return;
-      case STRING_VAL:
-        TStringColumn stringVal = (TStringColumn)value_;
-        stringVal.write(oprot);
-        return;
-      case BINARY_VAL:
-        TBinaryColumn binaryVal = (TBinaryColumn)value_;
-        binaryVal.write(oprot);
-        return;
-      default:
-        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
-    }
-  }
-
-  @Override
-  protected org.apache.thrift.protocol.TField getFieldDesc(_Fields setField) {
-    switch (setField) {
-      case BOOL_VAL:
-        return BOOL_VAL_FIELD_DESC;
-      case BYTE_VAL:
-        return BYTE_VAL_FIELD_DESC;
-      case I16_VAL:
-        return I16_VAL_FIELD_DESC;
-      case I32_VAL:
-        return I32_VAL_FIELD_DESC;
-      case I64_VAL:
-        return I64_VAL_FIELD_DESC;
-      case DOUBLE_VAL:
-        return DOUBLE_VAL_FIELD_DESC;
-      case STRING_VAL:
-        return STRING_VAL_FIELD_DESC;
-      case BINARY_VAL:
-        return BINARY_VAL_FIELD_DESC;
-      default:
-        throw new IllegalArgumentException("Unknown field id " + setField);
-    }
-  }
-
-  @Override
-  protected org.apache.thrift.protocol.TStruct getStructDesc() {
-    return STRUCT_DESC;
-  }
-
-  @Override
-  protected _Fields enumForId(short id) {
-    return _Fields.findByThriftIdOrThrow(id);
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-
-  public TBoolColumn getBoolVal() {
-    if (getSetField() == _Fields.BOOL_VAL) {
-      return (TBoolColumn)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'boolVal' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setBoolVal(TBoolColumn value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.BOOL_VAL;
-    value_ = value;
-  }
-
-  public TByteColumn getByteVal() {
-    if (getSetField() == _Fields.BYTE_VAL) {
-      return (TByteColumn)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'byteVal' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setByteVal(TByteColumn value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.BYTE_VAL;
-    value_ = value;
-  }
-
-  public TI16Column getI16Val() {
-    if (getSetField() == _Fields.I16_VAL) {
-      return (TI16Column)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'i16Val' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setI16Val(TI16Column value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.I16_VAL;
-    value_ = value;
-  }
-
-  public TI32Column getI32Val() {
-    if (getSetField() == _Fields.I32_VAL) {
-      return (TI32Column)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'i32Val' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setI32Val(TI32Column value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.I32_VAL;
-    value_ = value;
-  }
-
-  public TI64Column getI64Val() {
-    if (getSetField() == _Fields.I64_VAL) {
-      return (TI64Column)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'i64Val' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setI64Val(TI64Column value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.I64_VAL;
-    value_ = value;
-  }
-
-  public TDoubleColumn getDoubleVal() {
-    if (getSetField() == _Fields.DOUBLE_VAL) {
-      return (TDoubleColumn)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'doubleVal' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setDoubleVal(TDoubleColumn value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.DOUBLE_VAL;
-    value_ = value;
-  }
-
-  public TStringColumn getStringVal() {
-    if (getSetField() == _Fields.STRING_VAL) {
-      return (TStringColumn)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'stringVal' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setStringVal(TStringColumn value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.STRING_VAL;
-    value_ = value;
-  }
-
-  public TBinaryColumn getBinaryVal() {
-    if (getSetField() == _Fields.BINARY_VAL) {
-      return (TBinaryColumn)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'binaryVal' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setBinaryVal(TBinaryColumn value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.BINARY_VAL;
-    value_ = value;
-  }
-
-  public boolean isSetBoolVal() {
-    return setField_ == _Fields.BOOL_VAL;
-  }
-
-
-  public boolean isSetByteVal() {
-    return setField_ == _Fields.BYTE_VAL;
-  }
-
-
-  public boolean isSetI16Val() {
-    return setField_ == _Fields.I16_VAL;
-  }
-
-
-  public boolean isSetI32Val() {
-    return setField_ == _Fields.I32_VAL;
-  }
-
-
-  public boolean isSetI64Val() {
-    return setField_ == _Fields.I64_VAL;
-  }
-
-
-  public boolean isSetDoubleVal() {
-    return setField_ == _Fields.DOUBLE_VAL;
-  }
-
-
-  public boolean isSetStringVal() {
-    return setField_ == _Fields.STRING_VAL;
-  }
-
-
-  public boolean isSetBinaryVal() {
-    return setField_ == _Fields.BINARY_VAL;
-  }
-
-
-  public boolean equals(Object other) {
-    if (other instanceof TColumn) {
-      return equals((TColumn)other);
-    } else {
-      return false;
-    }
-  }
-
-  public boolean equals(TColumn other) {
-    return other != null && getSetField() == other.getSetField() && getFieldValue().equals(other.getFieldValue());
-  }
-
-  @Override
-  public int compareTo(TColumn other) {
-    int lastComparison = org.apache.thrift.TBaseHelper.compareTo(getSetField(), other.getSetField());
-    if (lastComparison == 0) {
-      return org.apache.thrift.TBaseHelper.compareTo(getFieldValue(), other.getFieldValue());
-    }
-    return lastComparison;
-  }
-
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-    list.add(this.getClass().getName());
-    org.apache.thrift.TFieldIdEnum setField = getSetField();
-    if (setField != null) {
-      list.add(setField.getThriftFieldId());
-      Object value = getFieldValue();
-      if (value instanceof org.apache.thrift.TEnum) {
-        list.add(((org.apache.thrift.TEnum)getFieldValue()).getValue());
-      } else {
-        list.add(value);
-      }
-    }
-    return list.hashCode();
-  }
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-
-}
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnDesc.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnDesc.java
deleted file mode 100644
index 31472c8f54b94..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnDesc.java
+++ /dev/null
@@ -1,704 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TColumnDesc implements org.apache.thrift.TBase<TColumnDesc, TColumnDesc._Fields>, java.io.Serializable, Cloneable, Comparable<TColumnDesc> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TColumnDesc");
-
-  private static final org.apache.thrift.protocol.TField COLUMN_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("columnName", org.apache.thrift.protocol.TType.STRING, (short)1);
-  private static final org.apache.thrift.protocol.TField TYPE_DESC_FIELD_DESC = new org.apache.thrift.protocol.TField("typeDesc", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-  private static final org.apache.thrift.protocol.TField POSITION_FIELD_DESC = new org.apache.thrift.protocol.TField("position", org.apache.thrift.protocol.TType.I32, (short)3);
-  private static final org.apache.thrift.protocol.TField COMMENT_FIELD_DESC = new org.apache.thrift.protocol.TField("comment", org.apache.thrift.protocol.TType.STRING, (short)4);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TColumnDescStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TColumnDescTupleSchemeFactory());
-  }
-
-  private String columnName; // required
-  private TTypeDesc typeDesc; // required
-  private int position; // required
-  private String comment; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    COLUMN_NAME((short)1, "columnName"),
-    TYPE_DESC((short)2, "typeDesc"),
-    POSITION((short)3, "position"),
-    COMMENT((short)4, "comment");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // COLUMN_NAME
-          return COLUMN_NAME;
-        case 2: // TYPE_DESC
-          return TYPE_DESC;
-        case 3: // POSITION
-          return POSITION;
-        case 4: // COMMENT
-          return COMMENT;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __POSITION_ISSET_ID = 0;
-  private byte __isset_bitfield = 0;
-  private static final _Fields optionals[] = {_Fields.COMMENT};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.COLUMN_NAME, new org.apache.thrift.meta_data.FieldMetaData("columnName", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    tmpMap.put(_Fields.TYPE_DESC, new org.apache.thrift.meta_data.FieldMetaData("typeDesc", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TTypeDesc.class)));
-    tmpMap.put(_Fields.POSITION, new org.apache.thrift.meta_data.FieldMetaData("position", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)));
-    tmpMap.put(_Fields.COMMENT, new org.apache.thrift.meta_data.FieldMetaData("comment", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TColumnDesc.class, metaDataMap);
-  }
-
-  public TColumnDesc() {
-  }
-
-  public TColumnDesc(
-    String columnName,
-    TTypeDesc typeDesc,
-    int position)
-  {
-    this();
-    this.columnName = columnName;
-    this.typeDesc = typeDesc;
-    this.position = position;
-    setPositionIsSet(true);
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TColumnDesc(TColumnDesc other) {
-    __isset_bitfield = other.__isset_bitfield;
-    if (other.isSetColumnName()) {
-      this.columnName = other.columnName;
-    }
-    if (other.isSetTypeDesc()) {
-      this.typeDesc = new TTypeDesc(other.typeDesc);
-    }
-    this.position = other.position;
-    if (other.isSetComment()) {
-      this.comment = other.comment;
-    }
-  }
-
-  public TColumnDesc deepCopy() {
-    return new TColumnDesc(this);
-  }
-
-  @Override
-  public void clear() {
-    this.columnName = null;
-    this.typeDesc = null;
-    setPositionIsSet(false);
-    this.position = 0;
-    this.comment = null;
-  }
-
-  public String getColumnName() {
-    return this.columnName;
-  }
-
-  public void setColumnName(String columnName) {
-    this.columnName = columnName;
-  }
-
-  public void unsetColumnName() {
-    this.columnName = null;
-  }
-
-  /** Returns true if field columnName is set (has been assigned a value) and false otherwise */
-  public boolean isSetColumnName() {
-    return this.columnName != null;
-  }
-
-  public void setColumnNameIsSet(boolean value) {
-    if (!value) {
-      this.columnName = null;
-    }
-  }
-
-  public TTypeDesc getTypeDesc() {
-    return this.typeDesc;
-  }
-
-  public void setTypeDesc(TTypeDesc typeDesc) {
-    this.typeDesc = typeDesc;
-  }
-
-  public void unsetTypeDesc() {
-    this.typeDesc = null;
-  }
-
-  /** Returns true if field typeDesc is set (has been assigned a value) and false otherwise */
-  public boolean isSetTypeDesc() {
-    return this.typeDesc != null;
-  }
-
-  public void setTypeDescIsSet(boolean value) {
-    if (!value) {
-      this.typeDesc = null;
-    }
-  }
-
-  public int getPosition() {
-    return this.position;
-  }
-
-  public void setPosition(int position) {
-    this.position = position;
-    setPositionIsSet(true);
-  }
-
-  public void unsetPosition() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __POSITION_ISSET_ID);
-  }
-
-  /** Returns true if field position is set (has been assigned a value) and false otherwise */
-  public boolean isSetPosition() {
-    return EncodingUtils.testBit(__isset_bitfield, __POSITION_ISSET_ID);
-  }
-
-  public void setPositionIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __POSITION_ISSET_ID, value);
-  }
-
-  public String getComment() {
-    return this.comment;
-  }
-
-  public void setComment(String comment) {
-    this.comment = comment;
-  }
-
-  public void unsetComment() {
-    this.comment = null;
-  }
-
-  /** Returns true if field comment is set (has been assigned a value) and false otherwise */
-  public boolean isSetComment() {
-    return this.comment != null;
-  }
-
-  public void setCommentIsSet(boolean value) {
-    if (!value) {
-      this.comment = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case COLUMN_NAME:
-      if (value == null) {
-        unsetColumnName();
-      } else {
-        setColumnName((String)value);
-      }
-      break;
-
-    case TYPE_DESC:
-      if (value == null) {
-        unsetTypeDesc();
-      } else {
-        setTypeDesc((TTypeDesc)value);
-      }
-      break;
-
-    case POSITION:
-      if (value == null) {
-        unsetPosition();
-      } else {
-        setPosition((Integer)value);
-      }
-      break;
-
-    case COMMENT:
-      if (value == null) {
-        unsetComment();
-      } else {
-        setComment((String)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case COLUMN_NAME:
-      return getColumnName();
-
-    case TYPE_DESC:
-      return getTypeDesc();
-
-    case POSITION:
-      return getPosition();
-
-    case COMMENT:
-      return getComment();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case COLUMN_NAME:
-      return isSetColumnName();
-    case TYPE_DESC:
-      return isSetTypeDesc();
-    case POSITION:
-      return isSetPosition();
-    case COMMENT:
-      return isSetComment();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TColumnDesc)
-      return this.equals((TColumnDesc)that);
-    return false;
-  }
-
-  public boolean equals(TColumnDesc that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_columnName = true && this.isSetColumnName();
-    boolean that_present_columnName = true && that.isSetColumnName();
-    if (this_present_columnName || that_present_columnName) {
-      if (!(this_present_columnName && that_present_columnName))
-        return false;
-      if (!this.columnName.equals(that.columnName))
-        return false;
-    }
-
-    boolean this_present_typeDesc = true && this.isSetTypeDesc();
-    boolean that_present_typeDesc = true && that.isSetTypeDesc();
-    if (this_present_typeDesc || that_present_typeDesc) {
-      if (!(this_present_typeDesc && that_present_typeDesc))
-        return false;
-      if (!this.typeDesc.equals(that.typeDesc))
-        return false;
-    }
-
-    boolean this_present_position = true;
-    boolean that_present_position = true;
-    if (this_present_position || that_present_position) {
-      if (!(this_present_position && that_present_position))
-        return false;
-      if (this.position != that.position)
-        return false;
-    }
-
-    boolean this_present_comment = true && this.isSetComment();
-    boolean that_present_comment = true && that.isSetComment();
-    if (this_present_comment || that_present_comment) {
-      if (!(this_present_comment && that_present_comment))
-        return false;
-      if (!this.comment.equals(that.comment))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_columnName = true && (isSetColumnName());
-    list.add(present_columnName);
-    if (present_columnName)
-      list.add(columnName);
-
-    boolean present_typeDesc = true && (isSetTypeDesc());
-    list.add(present_typeDesc);
-    if (present_typeDesc)
-      list.add(typeDesc);
-
-    boolean present_position = true;
-    list.add(present_position);
-    if (present_position)
-      list.add(position);
-
-    boolean present_comment = true && (isSetComment());
-    list.add(present_comment);
-    if (present_comment)
-      list.add(comment);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TColumnDesc other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetColumnName()).compareTo(other.isSetColumnName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetColumnName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.columnName, other.columnName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetTypeDesc()).compareTo(other.isSetTypeDesc());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetTypeDesc()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.typeDesc, other.typeDesc);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetPosition()).compareTo(other.isSetPosition());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetPosition()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.position, other.position);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetComment()).compareTo(other.isSetComment());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetComment()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.comment, other.comment);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TColumnDesc(");
-    boolean first = true;
-
-    sb.append("columnName:");
-    if (this.columnName == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.columnName);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("typeDesc:");
-    if (this.typeDesc == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.typeDesc);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("position:");
-    sb.append(this.position);
-    first = false;
-    if (isSetComment()) {
-      if (!first) sb.append(", ");
-      sb.append("comment:");
-      if (this.comment == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.comment);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetColumnName()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'columnName' is unset! Struct:" + toString());
-    }
-
-    if (!isSetTypeDesc()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'typeDesc' is unset! Struct:" + toString());
-    }
-
-    if (!isSetPosition()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'position' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (typeDesc != null) {
-      typeDesc.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TColumnDescStandardSchemeFactory implements SchemeFactory {
-    public TColumnDescStandardScheme getScheme() {
-      return new TColumnDescStandardScheme();
-    }
-  }
-
-  private static class TColumnDescStandardScheme extends StandardScheme<TColumnDesc> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TColumnDesc struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // COLUMN_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.columnName = iprot.readString();
-              struct.setColumnNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // TYPE_DESC
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.typeDesc = new TTypeDesc();
-              struct.typeDesc.read(iprot);
-              struct.setTypeDescIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // POSITION
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.position = iprot.readI32();
-              struct.setPositionIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 4: // COMMENT
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.comment = iprot.readString();
-              struct.setCommentIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TColumnDesc struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.columnName != null) {
-        oprot.writeFieldBegin(COLUMN_NAME_FIELD_DESC);
-        oprot.writeString(struct.columnName);
-        oprot.writeFieldEnd();
-      }
-      if (struct.typeDesc != null) {
-        oprot.writeFieldBegin(TYPE_DESC_FIELD_DESC);
-        struct.typeDesc.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldBegin(POSITION_FIELD_DESC);
-      oprot.writeI32(struct.position);
-      oprot.writeFieldEnd();
-      if (struct.comment != null) {
-        if (struct.isSetComment()) {
-          oprot.writeFieldBegin(COMMENT_FIELD_DESC);
-          oprot.writeString(struct.comment);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TColumnDescTupleSchemeFactory implements SchemeFactory {
-    public TColumnDescTupleScheme getScheme() {
-      return new TColumnDescTupleScheme();
-    }
-  }
-
-  private static class TColumnDescTupleScheme extends TupleScheme<TColumnDesc> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TColumnDesc struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      oprot.writeString(struct.columnName);
-      struct.typeDesc.write(oprot);
-      oprot.writeI32(struct.position);
-      BitSet optionals = new BitSet();
-      if (struct.isSetComment()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetComment()) {
-        oprot.writeString(struct.comment);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TColumnDesc struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.columnName = iprot.readString();
-      struct.setColumnNameIsSet(true);
-      struct.typeDesc = new TTypeDesc();
-      struct.typeDesc.read(iprot);
-      struct.setTypeDescIsSet(true);
-      struct.position = iprot.readI32();
-      struct.setPositionIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.comment = iprot.readString();
-        struct.setCommentIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnValue.java
deleted file mode 100644
index d1cc8e919bc0c..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TColumnValue.java
+++ /dev/null
@@ -1,675 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-public class TColumnValue extends org.apache.thrift.TUnion<TColumnValue, TColumnValue._Fields> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TColumnValue");
-  private static final org.apache.thrift.protocol.TField BOOL_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("boolVal", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField BYTE_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("byteVal", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-  private static final org.apache.thrift.protocol.TField I16_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i16Val", org.apache.thrift.protocol.TType.STRUCT, (short)3);
-  private static final org.apache.thrift.protocol.TField I32_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i32Val", org.apache.thrift.protocol.TType.STRUCT, (short)4);
-  private static final org.apache.thrift.protocol.TField I64_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("i64Val", org.apache.thrift.protocol.TType.STRUCT, (short)5);
-  private static final org.apache.thrift.protocol.TField DOUBLE_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("doubleVal", org.apache.thrift.protocol.TType.STRUCT, (short)6);
-  private static final org.apache.thrift.protocol.TField STRING_VAL_FIELD_DESC = new org.apache.thrift.protocol.TField("stringVal", org.apache.thrift.protocol.TType.STRUCT, (short)7);
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    BOOL_VAL((short)1, "boolVal"),
-    BYTE_VAL((short)2, "byteVal"),
-    I16_VAL((short)3, "i16Val"),
-    I32_VAL((short)4, "i32Val"),
-    I64_VAL((short)5, "i64Val"),
-    DOUBLE_VAL((short)6, "doubleVal"),
-    STRING_VAL((short)7, "stringVal");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // BOOL_VAL
-          return BOOL_VAL;
-        case 2: // BYTE_VAL
-          return BYTE_VAL;
-        case 3: // I16_VAL
-          return I16_VAL;
-        case 4: // I32_VAL
-          return I32_VAL;
-        case 5: // I64_VAL
-          return I64_VAL;
-        case 6: // DOUBLE_VAL
-          return DOUBLE_VAL;
-        case 7: // STRING_VAL
-          return STRING_VAL;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.BOOL_VAL, new org.apache.thrift.meta_data.FieldMetaData("boolVal", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TBoolValue.class)));
-    tmpMap.put(_Fields.BYTE_VAL, new org.apache.thrift.meta_data.FieldMetaData("byteVal", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TByteValue.class)));
-    tmpMap.put(_Fields.I16_VAL, new org.apache.thrift.meta_data.FieldMetaData("i16Val", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI16Value.class)));
-    tmpMap.put(_Fields.I32_VAL, new org.apache.thrift.meta_data.FieldMetaData("i32Val", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI32Value.class)));
-    tmpMap.put(_Fields.I64_VAL, new org.apache.thrift.meta_data.FieldMetaData("i64Val", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TI64Value.class)));
-    tmpMap.put(_Fields.DOUBLE_VAL, new org.apache.thrift.meta_data.FieldMetaData("doubleVal", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TDoubleValue.class)));
-    tmpMap.put(_Fields.STRING_VAL, new org.apache.thrift.meta_data.FieldMetaData("stringVal", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStringValue.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TColumnValue.class, metaDataMap);
-  }
-
-  public TColumnValue() {
-    super();
-  }
-
-  public TColumnValue(TColumnValue._Fields setField, Object value) {
-    super(setField, value);
-  }
-
-  public TColumnValue(TColumnValue other) {
-    super(other);
-  }
-  public TColumnValue deepCopy() {
-    return new TColumnValue(this);
-  }
-
-  public static TColumnValue boolVal(TBoolValue value) {
-    TColumnValue x = new TColumnValue();
-    x.setBoolVal(value);
-    return x;
-  }
-
-  public static TColumnValue byteVal(TByteValue value) {
-    TColumnValue x = new TColumnValue();
-    x.setByteVal(value);
-    return x;
-  }
-
-  public static TColumnValue i16Val(TI16Value value) {
-    TColumnValue x = new TColumnValue();
-    x.setI16Val(value);
-    return x;
-  }
-
-  public static TColumnValue i32Val(TI32Value value) {
-    TColumnValue x = new TColumnValue();
-    x.setI32Val(value);
-    return x;
-  }
-
-  public static TColumnValue i64Val(TI64Value value) {
-    TColumnValue x = new TColumnValue();
-    x.setI64Val(value);
-    return x;
-  }
-
-  public static TColumnValue doubleVal(TDoubleValue value) {
-    TColumnValue x = new TColumnValue();
-    x.setDoubleVal(value);
-    return x;
-  }
-
-  public static TColumnValue stringVal(TStringValue value) {
-    TColumnValue x = new TColumnValue();
-    x.setStringVal(value);
-    return x;
-  }
-
-
-  @Override
-  protected void checkType(_Fields setField, Object value) throws ClassCastException {
-    switch (setField) {
-      case BOOL_VAL:
-        if (value instanceof TBoolValue) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TBoolValue for field 'boolVal', but got " + value.getClass().getSimpleName());
-      case BYTE_VAL:
-        if (value instanceof TByteValue) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TByteValue for field 'byteVal', but got " + value.getClass().getSimpleName());
-      case I16_VAL:
-        if (value instanceof TI16Value) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TI16Value for field 'i16Val', but got " + value.getClass().getSimpleName());
-      case I32_VAL:
-        if (value instanceof TI32Value) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TI32Value for field 'i32Val', but got " + value.getClass().getSimpleName());
-      case I64_VAL:
-        if (value instanceof TI64Value) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TI64Value for field 'i64Val', but got " + value.getClass().getSimpleName());
-      case DOUBLE_VAL:
-        if (value instanceof TDoubleValue) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TDoubleValue for field 'doubleVal', but got " + value.getClass().getSimpleName());
-      case STRING_VAL:
-        if (value instanceof TStringValue) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TStringValue for field 'stringVal', but got " + value.getClass().getSimpleName());
-      default:
-        throw new IllegalArgumentException("Unknown field id " + setField);
-    }
-  }
-
-  @Override
-  protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TField field) throws org.apache.thrift.TException {
-    _Fields setField = _Fields.findByThriftId(field.id);
-    if (setField != null) {
-      switch (setField) {
-        case BOOL_VAL:
-          if (field.type == BOOL_VAL_FIELD_DESC.type) {
-            TBoolValue boolVal;
-            boolVal = new TBoolValue();
-            boolVal.read(iprot);
-            return boolVal;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case BYTE_VAL:
-          if (field.type == BYTE_VAL_FIELD_DESC.type) {
-            TByteValue byteVal;
-            byteVal = new TByteValue();
-            byteVal.read(iprot);
-            return byteVal;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case I16_VAL:
-          if (field.type == I16_VAL_FIELD_DESC.type) {
-            TI16Value i16Val;
-            i16Val = new TI16Value();
-            i16Val.read(iprot);
-            return i16Val;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case I32_VAL:
-          if (field.type == I32_VAL_FIELD_DESC.type) {
-            TI32Value i32Val;
-            i32Val = new TI32Value();
-            i32Val.read(iprot);
-            return i32Val;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case I64_VAL:
-          if (field.type == I64_VAL_FIELD_DESC.type) {
-            TI64Value i64Val;
-            i64Val = new TI64Value();
-            i64Val.read(iprot);
-            return i64Val;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case DOUBLE_VAL:
-          if (field.type == DOUBLE_VAL_FIELD_DESC.type) {
-            TDoubleValue doubleVal;
-            doubleVal = new TDoubleValue();
-            doubleVal.read(iprot);
-            return doubleVal;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case STRING_VAL:
-          if (field.type == STRING_VAL_FIELD_DESC.type) {
-            TStringValue stringVal;
-            stringVal = new TStringValue();
-            stringVal.read(iprot);
-            return stringVal;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        default:
-          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
-      }
-    } else {
-      org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-      return null;
-    }
-  }
-
-  @Override
-  protected void standardSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    switch (setField_) {
-      case BOOL_VAL:
-        TBoolValue boolVal = (TBoolValue)value_;
-        boolVal.write(oprot);
-        return;
-      case BYTE_VAL:
-        TByteValue byteVal = (TByteValue)value_;
-        byteVal.write(oprot);
-        return;
-      case I16_VAL:
-        TI16Value i16Val = (TI16Value)value_;
-        i16Val.write(oprot);
-        return;
-      case I32_VAL:
-        TI32Value i32Val = (TI32Value)value_;
-        i32Val.write(oprot);
-        return;
-      case I64_VAL:
-        TI64Value i64Val = (TI64Value)value_;
-        i64Val.write(oprot);
-        return;
-      case DOUBLE_VAL:
-        TDoubleValue doubleVal = (TDoubleValue)value_;
-        doubleVal.write(oprot);
-        return;
-      case STRING_VAL:
-        TStringValue stringVal = (TStringValue)value_;
-        stringVal.write(oprot);
-        return;
-      default:
-        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
-    }
-  }
-
-  @Override
-  protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, short fieldID) throws org.apache.thrift.TException {
-    _Fields setField = _Fields.findByThriftId(fieldID);
-    if (setField != null) {
-      switch (setField) {
-        case BOOL_VAL:
-          TBoolValue boolVal;
-          boolVal = new TBoolValue();
-          boolVal.read(iprot);
-          return boolVal;
-        case BYTE_VAL:
-          TByteValue byteVal;
-          byteVal = new TByteValue();
-          byteVal.read(iprot);
-          return byteVal;
-        case I16_VAL:
-          TI16Value i16Val;
-          i16Val = new TI16Value();
-          i16Val.read(iprot);
-          return i16Val;
-        case I32_VAL:
-          TI32Value i32Val;
-          i32Val = new TI32Value();
-          i32Val.read(iprot);
-          return i32Val;
-        case I64_VAL:
-          TI64Value i64Val;
-          i64Val = new TI64Value();
-          i64Val.read(iprot);
-          return i64Val;
-        case DOUBLE_VAL:
-          TDoubleValue doubleVal;
-          doubleVal = new TDoubleValue();
-          doubleVal.read(iprot);
-          return doubleVal;
-        case STRING_VAL:
-          TStringValue stringVal;
-          stringVal = new TStringValue();
-          stringVal.read(iprot);
-          return stringVal;
-        default:
-          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
-      }
-    } else {
-      throw new TProtocolException("Couldn't find a field with field id " + fieldID);
-    }
-  }
-
-  @Override
-  protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    switch (setField_) {
-      case BOOL_VAL:
-        TBoolValue boolVal = (TBoolValue)value_;
-        boolVal.write(oprot);
-        return;
-      case BYTE_VAL:
-        TByteValue byteVal = (TByteValue)value_;
-        byteVal.write(oprot);
-        return;
-      case I16_VAL:
-        TI16Value i16Val = (TI16Value)value_;
-        i16Val.write(oprot);
-        return;
-      case I32_VAL:
-        TI32Value i32Val = (TI32Value)value_;
-        i32Val.write(oprot);
-        return;
-      case I64_VAL:
-        TI64Value i64Val = (TI64Value)value_;
-        i64Val.write(oprot);
-        return;
-      case DOUBLE_VAL:
-        TDoubleValue doubleVal = (TDoubleValue)value_;
-        doubleVal.write(oprot);
-        return;
-      case STRING_VAL:
-        TStringValue stringVal = (TStringValue)value_;
-        stringVal.write(oprot);
-        return;
-      default:
-        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
-    }
-  }
-
-  @Override
-  protected org.apache.thrift.protocol.TField getFieldDesc(_Fields setField) {
-    switch (setField) {
-      case BOOL_VAL:
-        return BOOL_VAL_FIELD_DESC;
-      case BYTE_VAL:
-        return BYTE_VAL_FIELD_DESC;
-      case I16_VAL:
-        return I16_VAL_FIELD_DESC;
-      case I32_VAL:
-        return I32_VAL_FIELD_DESC;
-      case I64_VAL:
-        return I64_VAL_FIELD_DESC;
-      case DOUBLE_VAL:
-        return DOUBLE_VAL_FIELD_DESC;
-      case STRING_VAL:
-        return STRING_VAL_FIELD_DESC;
-      default:
-        throw new IllegalArgumentException("Unknown field id " + setField);
-    }
-  }
-
-  @Override
-  protected org.apache.thrift.protocol.TStruct getStructDesc() {
-    return STRUCT_DESC;
-  }
-
-  @Override
-  protected _Fields enumForId(short id) {
-    return _Fields.findByThriftIdOrThrow(id);
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-
-  public TBoolValue getBoolVal() {
-    if (getSetField() == _Fields.BOOL_VAL) {
-      return (TBoolValue)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'boolVal' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setBoolVal(TBoolValue value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.BOOL_VAL;
-    value_ = value;
-  }
-
-  public TByteValue getByteVal() {
-    if (getSetField() == _Fields.BYTE_VAL) {
-      return (TByteValue)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'byteVal' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setByteVal(TByteValue value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.BYTE_VAL;
-    value_ = value;
-  }
-
-  public TI16Value getI16Val() {
-    if (getSetField() == _Fields.I16_VAL) {
-      return (TI16Value)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'i16Val' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setI16Val(TI16Value value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.I16_VAL;
-    value_ = value;
-  }
-
-  public TI32Value getI32Val() {
-    if (getSetField() == _Fields.I32_VAL) {
-      return (TI32Value)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'i32Val' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setI32Val(TI32Value value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.I32_VAL;
-    value_ = value;
-  }
-
-  public TI64Value getI64Val() {
-    if (getSetField() == _Fields.I64_VAL) {
-      return (TI64Value)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'i64Val' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setI64Val(TI64Value value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.I64_VAL;
-    value_ = value;
-  }
-
-  public TDoubleValue getDoubleVal() {
-    if (getSetField() == _Fields.DOUBLE_VAL) {
-      return (TDoubleValue)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'doubleVal' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setDoubleVal(TDoubleValue value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.DOUBLE_VAL;
-    value_ = value;
-  }
-
-  public TStringValue getStringVal() {
-    if (getSetField() == _Fields.STRING_VAL) {
-      return (TStringValue)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'stringVal' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setStringVal(TStringValue value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.STRING_VAL;
-    value_ = value;
-  }
-
-  public boolean isSetBoolVal() {
-    return setField_ == _Fields.BOOL_VAL;
-  }
-
-
-  public boolean isSetByteVal() {
-    return setField_ == _Fields.BYTE_VAL;
-  }
-
-
-  public boolean isSetI16Val() {
-    return setField_ == _Fields.I16_VAL;
-  }
-
-
-  public boolean isSetI32Val() {
-    return setField_ == _Fields.I32_VAL;
-  }
-
-
-  public boolean isSetI64Val() {
-    return setField_ == _Fields.I64_VAL;
-  }
-
-
-  public boolean isSetDoubleVal() {
-    return setField_ == _Fields.DOUBLE_VAL;
-  }
-
-
-  public boolean isSetStringVal() {
-    return setField_ == _Fields.STRING_VAL;
-  }
-
-
-  public boolean equals(Object other) {
-    if (other instanceof TColumnValue) {
-      return equals((TColumnValue)other);
-    } else {
-      return false;
-    }
-  }
-
-  public boolean equals(TColumnValue other) {
-    return other != null && getSetField() == other.getSetField() && getFieldValue().equals(other.getFieldValue());
-  }
-
-  @Override
-  public int compareTo(TColumnValue other) {
-    int lastComparison = org.apache.thrift.TBaseHelper.compareTo(getSetField(), other.getSetField());
-    if (lastComparison == 0) {
-      return org.apache.thrift.TBaseHelper.compareTo(getFieldValue(), other.getFieldValue());
-    }
-    return lastComparison;
-  }
-
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-    list.add(this.getClass().getName());
-    org.apache.thrift.TFieldIdEnum setField = getSetField();
-    if (setField != null) {
-      list.add(setField.getThriftFieldId());
-      Object value = getFieldValue();
-      if (value instanceof org.apache.thrift.TEnum) {
-        list.add(((org.apache.thrift.TEnum)getFieldValue()).getValue());
-      } else {
-        list.add(value);
-      }
-    }
-    return list.hashCode();
-  }
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-
-}
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleColumn.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleColumn.java
deleted file mode 100644
index f93c9b4f0edc3..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleColumn.java
+++ /dev/null
@@ -1,548 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TDoubleColumn implements org.apache.thrift.TBase<TDoubleColumn, TDoubleColumn._Fields>, java.io.Serializable, Cloneable, Comparable<TDoubleColumn> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TDoubleColumn");
-
-  private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1);
-  private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TDoubleColumnStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TDoubleColumnTupleSchemeFactory());
-  }
-
-  private List<Double> values; // required
-  private ByteBuffer nulls; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUES((short)1, "values"),
-    NULLS((short)2, "nulls");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUES
-          return VALUES;
-        case 2: // NULLS
-          return NULLS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.DOUBLE))));
-    tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TDoubleColumn.class, metaDataMap);
-  }
-
-  public TDoubleColumn() {
-  }
-
-  public TDoubleColumn(
-    List<Double> values,
-    ByteBuffer nulls)
-  {
-    this();
-    this.values = values;
-    this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls);
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TDoubleColumn(TDoubleColumn other) {
-    if (other.isSetValues()) {
-      List<Double> __this__values = new ArrayList<Double>(other.values);
-      this.values = __this__values;
-    }
-    if (other.isSetNulls()) {
-      this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls);
-    }
-  }
-
-  public TDoubleColumn deepCopy() {
-    return new TDoubleColumn(this);
-  }
-
-  @Override
-  public void clear() {
-    this.values = null;
-    this.nulls = null;
-  }
-
-  public int getValuesSize() {
-    return (this.values == null) ? 0 : this.values.size();
-  }
-
-  public java.util.Iterator<Double> getValuesIterator() {
-    return (this.values == null) ? null : this.values.iterator();
-  }
-
-  public void addToValues(double elem) {
-    if (this.values == null) {
-      this.values = new ArrayList<Double>();
-    }
-    this.values.add(elem);
-  }
-
-  public List<Double> getValues() {
-    return this.values;
-  }
-
-  public void setValues(List<Double> values) {
-    this.values = values;
-  }
-
-  public void unsetValues() {
-    this.values = null;
-  }
-
-  /** Returns true if field values is set (has been assigned a value) and false otherwise */
-  public boolean isSetValues() {
-    return this.values != null;
-  }
-
-  public void setValuesIsSet(boolean value) {
-    if (!value) {
-      this.values = null;
-    }
-  }
-
-  public byte[] getNulls() {
-    setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls));
-    return nulls == null ? null : nulls.array();
-  }
-
-  public ByteBuffer bufferForNulls() {
-    return org.apache.thrift.TBaseHelper.copyBinary(nulls);
-  }
-
-  public void setNulls(byte[] nulls) {
-    this.nulls = nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(nulls, nulls.length));
-  }
-
-  public void setNulls(ByteBuffer nulls) {
-    this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls);
-  }
-
-  public void unsetNulls() {
-    this.nulls = null;
-  }
-
-  /** Returns true if field nulls is set (has been assigned a value) and false otherwise */
-  public boolean isSetNulls() {
-    return this.nulls != null;
-  }
-
-  public void setNullsIsSet(boolean value) {
-    if (!value) {
-      this.nulls = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUES:
-      if (value == null) {
-        unsetValues();
-      } else {
-        setValues((List<Double>)value);
-      }
-      break;
-
-    case NULLS:
-      if (value == null) {
-        unsetNulls();
-      } else {
-        setNulls((ByteBuffer)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUES:
-      return getValues();
-
-    case NULLS:
-      return getNulls();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUES:
-      return isSetValues();
-    case NULLS:
-      return isSetNulls();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TDoubleColumn)
-      return this.equals((TDoubleColumn)that);
-    return false;
-  }
-
-  public boolean equals(TDoubleColumn that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_values = true && this.isSetValues();
-    boolean that_present_values = true && that.isSetValues();
-    if (this_present_values || that_present_values) {
-      if (!(this_present_values && that_present_values))
-        return false;
-      if (!this.values.equals(that.values))
-        return false;
-    }
-
-    boolean this_present_nulls = true && this.isSetNulls();
-    boolean that_present_nulls = true && that.isSetNulls();
-    if (this_present_nulls || that_present_nulls) {
-      if (!(this_present_nulls && that_present_nulls))
-        return false;
-      if (!this.nulls.equals(that.nulls))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_values = true && (isSetValues());
-    list.add(present_values);
-    if (present_values)
-      list.add(values);
-
-    boolean present_nulls = true && (isSetNulls());
-    list.add(present_nulls);
-    if (present_nulls)
-      list.add(nulls);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TDoubleColumn other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetValues()).compareTo(other.isSetValues());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValues()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, other.values);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetNulls()).compareTo(other.isSetNulls());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetNulls()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, other.nulls);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TDoubleColumn(");
-    boolean first = true;
-
-    sb.append("values:");
-    if (this.values == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.values);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("nulls:");
-    if (this.nulls == null) {
-      sb.append("null");
-    } else {
-      org.apache.thrift.TBaseHelper.toString(this.nulls, sb);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetValues()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString());
-    }
-
-    if (!isSetNulls()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TDoubleColumnStandardSchemeFactory implements SchemeFactory {
-    public TDoubleColumnStandardScheme getScheme() {
-      return new TDoubleColumnStandardScheme();
-    }
-  }
-
-  private static class TDoubleColumnStandardScheme extends StandardScheme<TDoubleColumn> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TDoubleColumn struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUES
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list94 = iprot.readListBegin();
-                struct.values = new ArrayList<Double>(_list94.size);
-                double _elem95;
-                for (int _i96 = 0; _i96 < _list94.size; ++_i96)
-                {
-                  _elem95 = iprot.readDouble();
-                  struct.values.add(_elem95);
-                }
-                iprot.readListEnd();
-              }
-              struct.setValuesIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // NULLS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.nulls = iprot.readBinary();
-              struct.setNullsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TDoubleColumn struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.values != null) {
-        oprot.writeFieldBegin(VALUES_FIELD_DESC);
-        {
-          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.DOUBLE, struct.values.size()));
-          for (double _iter97 : struct.values)
-          {
-            oprot.writeDouble(_iter97);
-          }
-          oprot.writeListEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      if (struct.nulls != null) {
-        oprot.writeFieldBegin(NULLS_FIELD_DESC);
-        oprot.writeBinary(struct.nulls);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TDoubleColumnTupleSchemeFactory implements SchemeFactory {
-    public TDoubleColumnTupleScheme getScheme() {
-      return new TDoubleColumnTupleScheme();
-    }
-  }
-
-  private static class TDoubleColumnTupleScheme extends TupleScheme<TDoubleColumn> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TDoubleColumn struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.values.size());
-        for (double _iter98 : struct.values)
-        {
-          oprot.writeDouble(_iter98);
-        }
-      }
-      oprot.writeBinary(struct.nulls);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TDoubleColumn struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TList _list99 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.DOUBLE, iprot.readI32());
-        struct.values = new ArrayList<Double>(_list99.size);
-        double _elem100;
-        for (int _i101 = 0; _i101 < _list99.size; ++_i101)
-        {
-          _elem100 = iprot.readDouble();
-          struct.values.add(_elem100);
-        }
-      }
-      struct.setValuesIsSet(true);
-      struct.nulls = iprot.readBinary();
-      struct.setNullsIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleValue.java
deleted file mode 100644
index 5700355aad94d..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TDoubleValue.java
+++ /dev/null
@@ -1,390 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TDoubleValue implements org.apache.thrift.TBase<TDoubleValue, TDoubleValue._Fields>, java.io.Serializable, Cloneable, Comparable<TDoubleValue> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TDoubleValue");
-
-  private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.DOUBLE, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TDoubleValueStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TDoubleValueTupleSchemeFactory());
-  }
-
-  private double value; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUE((short)1, "value");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUE
-          return VALUE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __VALUE_ISSET_ID = 0;
-  private byte __isset_bitfield = 0;
-  private static final _Fields optionals[] = {_Fields.VALUE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.DOUBLE)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TDoubleValue.class, metaDataMap);
-  }
-
-  public TDoubleValue() {
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TDoubleValue(TDoubleValue other) {
-    __isset_bitfield = other.__isset_bitfield;
-    this.value = other.value;
-  }
-
-  public TDoubleValue deepCopy() {
-    return new TDoubleValue(this);
-  }
-
-  @Override
-  public void clear() {
-    setValueIsSet(false);
-    this.value = 0.0;
-  }
-
-  public double getValue() {
-    return this.value;
-  }
-
-  public void setValue(double value) {
-    this.value = value;
-    setValueIsSet(true);
-  }
-
-  public void unsetValue() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID);
-  }
-
-  /** Returns true if field value is set (has been assigned a value) and false otherwise */
-  public boolean isSetValue() {
-    return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID);
-  }
-
-  public void setValueIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value);
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUE:
-      if (value == null) {
-        unsetValue();
-      } else {
-        setValue((Double)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUE:
-      return getValue();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUE:
-      return isSetValue();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TDoubleValue)
-      return this.equals((TDoubleValue)that);
-    return false;
-  }
-
-  public boolean equals(TDoubleValue that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_value = true && this.isSetValue();
-    boolean that_present_value = true && that.isSetValue();
-    if (this_present_value || that_present_value) {
-      if (!(this_present_value && that_present_value))
-        return false;
-      if (this.value != that.value)
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_value = true && (isSetValue());
-    list.add(present_value);
-    if (present_value)
-      list.add(value);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TDoubleValue other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetValue()).compareTo(other.isSetValue());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValue()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, other.value);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TDoubleValue(");
-    boolean first = true;
-
-    if (isSetValue()) {
-      sb.append("value:");
-      sb.append(this.value);
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TDoubleValueStandardSchemeFactory implements SchemeFactory {
-    public TDoubleValueStandardScheme getScheme() {
-      return new TDoubleValueStandardScheme();
-    }
-  }
-
-  private static class TDoubleValueStandardScheme extends StandardScheme<TDoubleValue> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TDoubleValue struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUE
-            if (schemeField.type == org.apache.thrift.protocol.TType.DOUBLE) {
-              struct.value = iprot.readDouble();
-              struct.setValueIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TDoubleValue struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.isSetValue()) {
-        oprot.writeFieldBegin(VALUE_FIELD_DESC);
-        oprot.writeDouble(struct.value);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TDoubleValueTupleSchemeFactory implements SchemeFactory {
-    public TDoubleValueTupleScheme getScheme() {
-      return new TDoubleValueTupleScheme();
-    }
-  }
-
-  private static class TDoubleValueTupleScheme extends TupleScheme<TDoubleValue> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TDoubleValue struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      BitSet optionals = new BitSet();
-      if (struct.isSetValue()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetValue()) {
-        oprot.writeDouble(struct.value);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TDoubleValue struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.value = iprot.readDouble();
-        struct.setValueIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementReq.java
deleted file mode 100644
index 1f73cec61af78..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementReq.java
+++ /dev/null
@@ -1,863 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TExecuteStatementReq implements org.apache.thrift.TBase<TExecuteStatementReq, TExecuteStatementReq._Fields>, java.io.Serializable, Cloneable, Comparable<TExecuteStatementReq> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TExecuteStatementReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField STATEMENT_FIELD_DESC = new org.apache.thrift.protocol.TField("statement", org.apache.thrift.protocol.TType.STRING, (short)2);
-  private static final org.apache.thrift.protocol.TField CONF_OVERLAY_FIELD_DESC = new org.apache.thrift.protocol.TField("confOverlay", org.apache.thrift.protocol.TType.MAP, (short)3);
-  private static final org.apache.thrift.protocol.TField RUN_ASYNC_FIELD_DESC = new org.apache.thrift.protocol.TField("runAsync", org.apache.thrift.protocol.TType.BOOL, (short)4);
-  private static final org.apache.thrift.protocol.TField QUERY_TIMEOUT_FIELD_DESC = new org.apache.thrift.protocol.TField("queryTimeout", org.apache.thrift.protocol.TType.I64, (short)5);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TExecuteStatementReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TExecuteStatementReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-  private String statement; // required
-  private Map<String,String> confOverlay; // optional
-  private boolean runAsync; // optional
-  private long queryTimeout; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle"),
-    STATEMENT((short)2, "statement"),
-    CONF_OVERLAY((short)3, "confOverlay"),
-    RUN_ASYNC((short)4, "runAsync"),
-    QUERY_TIMEOUT((short)5, "queryTimeout");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        case 2: // STATEMENT
-          return STATEMENT;
-        case 3: // CONF_OVERLAY
-          return CONF_OVERLAY;
-        case 4: // RUN_ASYNC
-          return RUN_ASYNC;
-        case 5: // QUERY_TIMEOUT
-          return QUERY_TIMEOUT;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __RUNASYNC_ISSET_ID = 0;
-  private static final int __QUERYTIMEOUT_ISSET_ID = 1;
-  private byte __isset_bitfield = 0;
-  private static final _Fields optionals[] = {_Fields.CONF_OVERLAY,_Fields.RUN_ASYNC,_Fields.QUERY_TIMEOUT};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    tmpMap.put(_Fields.STATEMENT, new org.apache.thrift.meta_data.FieldMetaData("statement", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    tmpMap.put(_Fields.CONF_OVERLAY, new org.apache.thrift.meta_data.FieldMetaData("confOverlay", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))));
-    tmpMap.put(_Fields.RUN_ASYNC, new org.apache.thrift.meta_data.FieldMetaData("runAsync", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL)));
-    tmpMap.put(_Fields.QUERY_TIMEOUT, new org.apache.thrift.meta_data.FieldMetaData("queryTimeout", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TExecuteStatementReq.class, metaDataMap);
-  }
-
-  public TExecuteStatementReq() {
-    this.runAsync = false;
-
-    this.queryTimeout = 0L;
-
-  }
-
-  public TExecuteStatementReq(
-    TSessionHandle sessionHandle,
-    String statement)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-    this.statement = statement;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TExecuteStatementReq(TExecuteStatementReq other) {
-    __isset_bitfield = other.__isset_bitfield;
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-    if (other.isSetStatement()) {
-      this.statement = other.statement;
-    }
-    if (other.isSetConfOverlay()) {
-      Map<String,String> __this__confOverlay = new HashMap<String,String>(other.confOverlay);
-      this.confOverlay = __this__confOverlay;
-    }
-    this.runAsync = other.runAsync;
-    this.queryTimeout = other.queryTimeout;
-  }
-
-  public TExecuteStatementReq deepCopy() {
-    return new TExecuteStatementReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-    this.statement = null;
-    this.confOverlay = null;
-    this.runAsync = false;
-
-    this.queryTimeout = 0L;
-
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public String getStatement() {
-    return this.statement;
-  }
-
-  public void setStatement(String statement) {
-    this.statement = statement;
-  }
-
-  public void unsetStatement() {
-    this.statement = null;
-  }
-
-  /** Returns true if field statement is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatement() {
-    return this.statement != null;
-  }
-
-  public void setStatementIsSet(boolean value) {
-    if (!value) {
-      this.statement = null;
-    }
-  }
-
-  public int getConfOverlaySize() {
-    return (this.confOverlay == null) ? 0 : this.confOverlay.size();
-  }
-
-  public void putToConfOverlay(String key, String val) {
-    if (this.confOverlay == null) {
-      this.confOverlay = new HashMap<String,String>();
-    }
-    this.confOverlay.put(key, val);
-  }
-
-  public Map<String,String> getConfOverlay() {
-    return this.confOverlay;
-  }
-
-  public void setConfOverlay(Map<String,String> confOverlay) {
-    this.confOverlay = confOverlay;
-  }
-
-  public void unsetConfOverlay() {
-    this.confOverlay = null;
-  }
-
-  /** Returns true if field confOverlay is set (has been assigned a value) and false otherwise */
-  public boolean isSetConfOverlay() {
-    return this.confOverlay != null;
-  }
-
-  public void setConfOverlayIsSet(boolean value) {
-    if (!value) {
-      this.confOverlay = null;
-    }
-  }
-
-  public boolean isRunAsync() {
-    return this.runAsync;
-  }
-
-  public void setRunAsync(boolean runAsync) {
-    this.runAsync = runAsync;
-    setRunAsyncIsSet(true);
-  }
-
-  public void unsetRunAsync() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __RUNASYNC_ISSET_ID);
-  }
-
-  /** Returns true if field runAsync is set (has been assigned a value) and false otherwise */
-  public boolean isSetRunAsync() {
-    return EncodingUtils.testBit(__isset_bitfield, __RUNASYNC_ISSET_ID);
-  }
-
-  public void setRunAsyncIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __RUNASYNC_ISSET_ID, value);
-  }
-
-  public long getQueryTimeout() {
-    return this.queryTimeout;
-  }
-
-  public void setQueryTimeout(long queryTimeout) {
-    this.queryTimeout = queryTimeout;
-    setQueryTimeoutIsSet(true);
-  }
-
-  public void unsetQueryTimeout() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __QUERYTIMEOUT_ISSET_ID);
-  }
-
-  /** Returns true if field queryTimeout is set (has been assigned a value) and false otherwise */
-  public boolean isSetQueryTimeout() {
-    return EncodingUtils.testBit(__isset_bitfield, __QUERYTIMEOUT_ISSET_ID);
-  }
-
-  public void setQueryTimeoutIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __QUERYTIMEOUT_ISSET_ID, value);
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    case STATEMENT:
-      if (value == null) {
-        unsetStatement();
-      } else {
-        setStatement((String)value);
-      }
-      break;
-
-    case CONF_OVERLAY:
-      if (value == null) {
-        unsetConfOverlay();
-      } else {
-        setConfOverlay((Map<String,String>)value);
-      }
-      break;
-
-    case RUN_ASYNC:
-      if (value == null) {
-        unsetRunAsync();
-      } else {
-        setRunAsync((Boolean)value);
-      }
-      break;
-
-    case QUERY_TIMEOUT:
-      if (value == null) {
-        unsetQueryTimeout();
-      } else {
-        setQueryTimeout((Long)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    case STATEMENT:
-      return getStatement();
-
-    case CONF_OVERLAY:
-      return getConfOverlay();
-
-    case RUN_ASYNC:
-      return isRunAsync();
-
-    case QUERY_TIMEOUT:
-      return getQueryTimeout();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    case STATEMENT:
-      return isSetStatement();
-    case CONF_OVERLAY:
-      return isSetConfOverlay();
-    case RUN_ASYNC:
-      return isSetRunAsync();
-    case QUERY_TIMEOUT:
-      return isSetQueryTimeout();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TExecuteStatementReq)
-      return this.equals((TExecuteStatementReq)that);
-    return false;
-  }
-
-  public boolean equals(TExecuteStatementReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    boolean this_present_statement = true && this.isSetStatement();
-    boolean that_present_statement = true && that.isSetStatement();
-    if (this_present_statement || that_present_statement) {
-      if (!(this_present_statement && that_present_statement))
-        return false;
-      if (!this.statement.equals(that.statement))
-        return false;
-    }
-
-    boolean this_present_confOverlay = true && this.isSetConfOverlay();
-    boolean that_present_confOverlay = true && that.isSetConfOverlay();
-    if (this_present_confOverlay || that_present_confOverlay) {
-      if (!(this_present_confOverlay && that_present_confOverlay))
-        return false;
-      if (!this.confOverlay.equals(that.confOverlay))
-        return false;
-    }
-
-    boolean this_present_runAsync = true && this.isSetRunAsync();
-    boolean that_present_runAsync = true && that.isSetRunAsync();
-    if (this_present_runAsync || that_present_runAsync) {
-      if (!(this_present_runAsync && that_present_runAsync))
-        return false;
-      if (this.runAsync != that.runAsync)
-        return false;
-    }
-
-    boolean this_present_queryTimeout = true && this.isSetQueryTimeout();
-    boolean that_present_queryTimeout = true && that.isSetQueryTimeout();
-    if (this_present_queryTimeout || that_present_queryTimeout) {
-      if (!(this_present_queryTimeout && that_present_queryTimeout))
-        return false;
-      if (this.queryTimeout != that.queryTimeout)
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    list.add(present_sessionHandle);
-    if (present_sessionHandle)
-      list.add(sessionHandle);
-
-    boolean present_statement = true && (isSetStatement());
-    list.add(present_statement);
-    if (present_statement)
-      list.add(statement);
-
-    boolean present_confOverlay = true && (isSetConfOverlay());
-    list.add(present_confOverlay);
-    if (present_confOverlay)
-      list.add(confOverlay);
-
-    boolean present_runAsync = true && (isSetRunAsync());
-    list.add(present_runAsync);
-    if (present_runAsync)
-      list.add(runAsync);
-
-    boolean present_queryTimeout = true && (isSetQueryTimeout());
-    list.add(present_queryTimeout);
-    if (present_queryTimeout)
-      list.add(queryTimeout);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TExecuteStatementReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetStatement()).compareTo(other.isSetStatement());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatement()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.statement, other.statement);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetConfOverlay()).compareTo(other.isSetConfOverlay());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetConfOverlay()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.confOverlay, other.confOverlay);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetRunAsync()).compareTo(other.isSetRunAsync());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetRunAsync()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.runAsync, other.runAsync);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetQueryTimeout()).compareTo(other.isSetQueryTimeout());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetQueryTimeout()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.queryTimeout, other.queryTimeout);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TExecuteStatementReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("statement:");
-    if (this.statement == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.statement);
-    }
-    first = false;
-    if (isSetConfOverlay()) {
-      if (!first) sb.append(", ");
-      sb.append("confOverlay:");
-      if (this.confOverlay == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.confOverlay);
-      }
-      first = false;
-    }
-    if (isSetRunAsync()) {
-      if (!first) sb.append(", ");
-      sb.append("runAsync:");
-      sb.append(this.runAsync);
-      first = false;
-    }
-    if (isSetQueryTimeout()) {
-      if (!first) sb.append(", ");
-      sb.append("queryTimeout:");
-      sb.append(this.queryTimeout);
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    if (!isSetStatement()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'statement' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TExecuteStatementReqStandardSchemeFactory implements SchemeFactory {
-    public TExecuteStatementReqStandardScheme getScheme() {
-      return new TExecuteStatementReqStandardScheme();
-    }
-  }
-
-  private static class TExecuteStatementReqStandardScheme extends StandardScheme<TExecuteStatementReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TExecuteStatementReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // STATEMENT
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.statement = iprot.readString();
-              struct.setStatementIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // CONF_OVERLAY
-            if (schemeField.type == org.apache.thrift.protocol.TType.MAP) {
-              {
-                org.apache.thrift.protocol.TMap _map162 = iprot.readMapBegin();
-                struct.confOverlay = new HashMap<String,String>(2*_map162.size);
-                String _key163;
-                String _val164;
-                for (int _i165 = 0; _i165 < _map162.size; ++_i165)
-                {
-                  _key163 = iprot.readString();
-                  _val164 = iprot.readString();
-                  struct.confOverlay.put(_key163, _val164);
-                }
-                iprot.readMapEnd();
-              }
-              struct.setConfOverlayIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 4: // RUN_ASYNC
-            if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) {
-              struct.runAsync = iprot.readBool();
-              struct.setRunAsyncIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 5: // QUERY_TIMEOUT
-            if (schemeField.type == org.apache.thrift.protocol.TType.I64) {
-              struct.queryTimeout = iprot.readI64();
-              struct.setQueryTimeoutIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TExecuteStatementReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.statement != null) {
-        oprot.writeFieldBegin(STATEMENT_FIELD_DESC);
-        oprot.writeString(struct.statement);
-        oprot.writeFieldEnd();
-      }
-      if (struct.confOverlay != null) {
-        if (struct.isSetConfOverlay()) {
-          oprot.writeFieldBegin(CONF_OVERLAY_FIELD_DESC);
-          {
-            oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, struct.confOverlay.size()));
-            for (Map.Entry<String, String> _iter166 : struct.confOverlay.entrySet())
-            {
-              oprot.writeString(_iter166.getKey());
-              oprot.writeString(_iter166.getValue());
-            }
-            oprot.writeMapEnd();
-          }
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.isSetRunAsync()) {
-        oprot.writeFieldBegin(RUN_ASYNC_FIELD_DESC);
-        oprot.writeBool(struct.runAsync);
-        oprot.writeFieldEnd();
-      }
-      if (struct.isSetQueryTimeout()) {
-        oprot.writeFieldBegin(QUERY_TIMEOUT_FIELD_DESC);
-        oprot.writeI64(struct.queryTimeout);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TExecuteStatementReqTupleSchemeFactory implements SchemeFactory {
-    public TExecuteStatementReqTupleScheme getScheme() {
-      return new TExecuteStatementReqTupleScheme();
-    }
-  }
-
-  private static class TExecuteStatementReqTupleScheme extends TupleScheme<TExecuteStatementReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TExecuteStatementReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-      oprot.writeString(struct.statement);
-      BitSet optionals = new BitSet();
-      if (struct.isSetConfOverlay()) {
-        optionals.set(0);
-      }
-      if (struct.isSetRunAsync()) {
-        optionals.set(1);
-      }
-      if (struct.isSetQueryTimeout()) {
-        optionals.set(2);
-      }
-      oprot.writeBitSet(optionals, 3);
-      if (struct.isSetConfOverlay()) {
-        {
-          oprot.writeI32(struct.confOverlay.size());
-          for (Map.Entry<String, String> _iter167 : struct.confOverlay.entrySet())
-          {
-            oprot.writeString(_iter167.getKey());
-            oprot.writeString(_iter167.getValue());
-          }
-        }
-      }
-      if (struct.isSetRunAsync()) {
-        oprot.writeBool(struct.runAsync);
-      }
-      if (struct.isSetQueryTimeout()) {
-        oprot.writeI64(struct.queryTimeout);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TExecuteStatementReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-      struct.statement = iprot.readString();
-      struct.setStatementIsSet(true);
-      BitSet incoming = iprot.readBitSet(3);
-      if (incoming.get(0)) {
-        {
-          org.apache.thrift.protocol.TMap _map168 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, iprot.readI32());
-          struct.confOverlay = new HashMap<String,String>(2*_map168.size);
-          String _key169;
-          String _val170;
-          for (int _i171 = 0; _i171 < _map168.size; ++_i171)
-          {
-            _key169 = iprot.readString();
-            _val170 = iprot.readString();
-            struct.confOverlay.put(_key169, _val170);
-          }
-        }
-        struct.setConfOverlayIsSet(true);
-      }
-      if (incoming.get(1)) {
-        struct.runAsync = iprot.readBool();
-        struct.setRunAsyncIsSet(true);
-      }
-      if (incoming.get(2)) {
-        struct.queryTimeout = iprot.readI64();
-        struct.setQueryTimeoutIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementResp.java
deleted file mode 100644
index 7101fa5bdb84c..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TExecuteStatementResp.java
+++ /dev/null
@@ -1,509 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TExecuteStatementResp implements org.apache.thrift.TBase<TExecuteStatementResp, TExecuteStatementResp._Fields>, java.io.Serializable, Cloneable, Comparable<TExecuteStatementResp> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TExecuteStatementResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TExecuteStatementRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TExecuteStatementRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private TOperationHandle operationHandle; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    OPERATION_HANDLE((short)2, "operationHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final _Fields optionals[] = {_Fields.OPERATION_HANDLE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TExecuteStatementResp.class, metaDataMap);
-  }
-
-  public TExecuteStatementResp() {
-  }
-
-  public TExecuteStatementResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TExecuteStatementResp(TExecuteStatementResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-  }
-
-  public TExecuteStatementResp deepCopy() {
-    return new TExecuteStatementResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.operationHandle = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TExecuteStatementResp)
-      return this.equals((TExecuteStatementResp)that);
-    return false;
-  }
-
-  public boolean equals(TExecuteStatementResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_status = true && (isSetStatus());
-    list.add(present_status);
-    if (present_status)
-      list.add(status);
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    list.add(present_operationHandle);
-    if (present_operationHandle)
-      list.add(operationHandle);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TExecuteStatementResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TExecuteStatementResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (isSetOperationHandle()) {
-      if (!first) sb.append(", ");
-      sb.append("operationHandle:");
-      if (this.operationHandle == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.operationHandle);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TExecuteStatementRespStandardSchemeFactory implements SchemeFactory {
-    public TExecuteStatementRespStandardScheme getScheme() {
-      return new TExecuteStatementRespStandardScheme();
-    }
-  }
-
-  private static class TExecuteStatementRespStandardScheme extends StandardScheme<TExecuteStatementResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TExecuteStatementResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TExecuteStatementResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.operationHandle != null) {
-        if (struct.isSetOperationHandle()) {
-          oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-          struct.operationHandle.write(oprot);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TExecuteStatementRespTupleSchemeFactory implements SchemeFactory {
-    public TExecuteStatementRespTupleScheme getScheme() {
-      return new TExecuteStatementRespTupleScheme();
-    }
-  }
-
-  private static class TExecuteStatementRespTupleScheme extends TupleScheme<TExecuteStatementResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TExecuteStatementResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetOperationHandle()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetOperationHandle()) {
-        struct.operationHandle.write(oprot);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TExecuteStatementResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.operationHandle = new TOperationHandle();
-        struct.operationHandle.read(iprot);
-        struct.setOperationHandleIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchOrientation.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchOrientation.java
deleted file mode 100644
index 159be45259434..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchOrientation.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-
-import java.util.Map;
-import java.util.HashMap;
-import org.apache.thrift.TEnum;
-
-public enum TFetchOrientation implements org.apache.thrift.TEnum {
-  FETCH_NEXT(0),
-  FETCH_PRIOR(1),
-  FETCH_RELATIVE(2),
-  FETCH_ABSOLUTE(3),
-  FETCH_FIRST(4),
-  FETCH_LAST(5);
-
-  private final int value;
-
-  private TFetchOrientation(int value) {
-    this.value = value;
-  }
-
-  /**
-   * Get the integer value of this enum value, as defined in the Thrift IDL.
-   */
-  public int getValue() {
-    return value;
-  }
-
-  /**
-   * Find a the enum type by its integer value, as defined in the Thrift IDL.
-   * @return null if the value is not found.
-   */
-  public static TFetchOrientation findByValue(int value) { 
-    switch (value) {
-      case 0:
-        return FETCH_NEXT;
-      case 1:
-        return FETCH_PRIOR;
-      case 2:
-        return FETCH_RELATIVE;
-      case 3:
-        return FETCH_ABSOLUTE;
-      case 4:
-        return FETCH_FIRST;
-      case 5:
-        return FETCH_LAST;
-      default:
-        return null;
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsReq.java
deleted file mode 100644
index 2c93339d0c68b..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsReq.java
+++ /dev/null
@@ -1,714 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TFetchResultsReq implements org.apache.thrift.TBase<TFetchResultsReq, TFetchResultsReq._Fields>, java.io.Serializable, Cloneable, Comparable<TFetchResultsReq> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TFetchResultsReq");
-
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField ORIENTATION_FIELD_DESC = new org.apache.thrift.protocol.TField("orientation", org.apache.thrift.protocol.TType.I32, (short)2);
-  private static final org.apache.thrift.protocol.TField MAX_ROWS_FIELD_DESC = new org.apache.thrift.protocol.TField("maxRows", org.apache.thrift.protocol.TType.I64, (short)3);
-  private static final org.apache.thrift.protocol.TField FETCH_TYPE_FIELD_DESC = new org.apache.thrift.protocol.TField("fetchType", org.apache.thrift.protocol.TType.I16, (short)4);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TFetchResultsReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TFetchResultsReqTupleSchemeFactory());
-  }
-
-  private TOperationHandle operationHandle; // required
-  private TFetchOrientation orientation; // required
-  private long maxRows; // required
-  private short fetchType; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    OPERATION_HANDLE((short)1, "operationHandle"),
-    /**
-     * 
-     * @see TFetchOrientation
-     */
-    ORIENTATION((short)2, "orientation"),
-    MAX_ROWS((short)3, "maxRows"),
-    FETCH_TYPE((short)4, "fetchType");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        case 2: // ORIENTATION
-          return ORIENTATION;
-        case 3: // MAX_ROWS
-          return MAX_ROWS;
-        case 4: // FETCH_TYPE
-          return FETCH_TYPE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __MAXROWS_ISSET_ID = 0;
-  private static final int __FETCHTYPE_ISSET_ID = 1;
-  private byte __isset_bitfield = 0;
-  private static final _Fields optionals[] = {_Fields.FETCH_TYPE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    tmpMap.put(_Fields.ORIENTATION, new org.apache.thrift.meta_data.FieldMetaData("orientation", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TFetchOrientation.class)));
-    tmpMap.put(_Fields.MAX_ROWS, new org.apache.thrift.meta_data.FieldMetaData("maxRows", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
-    tmpMap.put(_Fields.FETCH_TYPE, new org.apache.thrift.meta_data.FieldMetaData("fetchType", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I16)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TFetchResultsReq.class, metaDataMap);
-  }
-
-  public TFetchResultsReq() {
-    this.orientation = org.apache.hive.service.rpc.thrift.TFetchOrientation.FETCH_NEXT;
-
-    this.fetchType = (short)0;
-
-  }
-
-  public TFetchResultsReq(
-    TOperationHandle operationHandle,
-    TFetchOrientation orientation,
-    long maxRows)
-  {
-    this();
-    this.operationHandle = operationHandle;
-    this.orientation = orientation;
-    this.maxRows = maxRows;
-    setMaxRowsIsSet(true);
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TFetchResultsReq(TFetchResultsReq other) {
-    __isset_bitfield = other.__isset_bitfield;
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-    if (other.isSetOrientation()) {
-      this.orientation = other.orientation;
-    }
-    this.maxRows = other.maxRows;
-    this.fetchType = other.fetchType;
-  }
-
-  public TFetchResultsReq deepCopy() {
-    return new TFetchResultsReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.operationHandle = null;
-    this.orientation = org.apache.hive.service.rpc.thrift.TFetchOrientation.FETCH_NEXT;
-
-    setMaxRowsIsSet(false);
-    this.maxRows = 0;
-    this.fetchType = (short)0;
-
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  /**
-   * 
-   * @see TFetchOrientation
-   */
-  public TFetchOrientation getOrientation() {
-    return this.orientation;
-  }
-
-  /**
-   * 
-   * @see TFetchOrientation
-   */
-  public void setOrientation(TFetchOrientation orientation) {
-    this.orientation = orientation;
-  }
-
-  public void unsetOrientation() {
-    this.orientation = null;
-  }
-
-  /** Returns true if field orientation is set (has been assigned a value) and false otherwise */
-  public boolean isSetOrientation() {
-    return this.orientation != null;
-  }
-
-  public void setOrientationIsSet(boolean value) {
-    if (!value) {
-      this.orientation = null;
-    }
-  }
-
-  public long getMaxRows() {
-    return this.maxRows;
-  }
-
-  public void setMaxRows(long maxRows) {
-    this.maxRows = maxRows;
-    setMaxRowsIsSet(true);
-  }
-
-  public void unsetMaxRows() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __MAXROWS_ISSET_ID);
-  }
-
-  /** Returns true if field maxRows is set (has been assigned a value) and false otherwise */
-  public boolean isSetMaxRows() {
-    return EncodingUtils.testBit(__isset_bitfield, __MAXROWS_ISSET_ID);
-  }
-
-  public void setMaxRowsIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __MAXROWS_ISSET_ID, value);
-  }
-
-  public short getFetchType() {
-    return this.fetchType;
-  }
-
-  public void setFetchType(short fetchType) {
-    this.fetchType = fetchType;
-    setFetchTypeIsSet(true);
-  }
-
-  public void unsetFetchType() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __FETCHTYPE_ISSET_ID);
-  }
-
-  /** Returns true if field fetchType is set (has been assigned a value) and false otherwise */
-  public boolean isSetFetchType() {
-    return EncodingUtils.testBit(__isset_bitfield, __FETCHTYPE_ISSET_ID);
-  }
-
-  public void setFetchTypeIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __FETCHTYPE_ISSET_ID, value);
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    case ORIENTATION:
-      if (value == null) {
-        unsetOrientation();
-      } else {
-        setOrientation((TFetchOrientation)value);
-      }
-      break;
-
-    case MAX_ROWS:
-      if (value == null) {
-        unsetMaxRows();
-      } else {
-        setMaxRows((Long)value);
-      }
-      break;
-
-    case FETCH_TYPE:
-      if (value == null) {
-        unsetFetchType();
-      } else {
-        setFetchType((Short)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    case ORIENTATION:
-      return getOrientation();
-
-    case MAX_ROWS:
-      return getMaxRows();
-
-    case FETCH_TYPE:
-      return getFetchType();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    case ORIENTATION:
-      return isSetOrientation();
-    case MAX_ROWS:
-      return isSetMaxRows();
-    case FETCH_TYPE:
-      return isSetFetchType();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TFetchResultsReq)
-      return this.equals((TFetchResultsReq)that);
-    return false;
-  }
-
-  public boolean equals(TFetchResultsReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    boolean this_present_orientation = true && this.isSetOrientation();
-    boolean that_present_orientation = true && that.isSetOrientation();
-    if (this_present_orientation || that_present_orientation) {
-      if (!(this_present_orientation && that_present_orientation))
-        return false;
-      if (!this.orientation.equals(that.orientation))
-        return false;
-    }
-
-    boolean this_present_maxRows = true;
-    boolean that_present_maxRows = true;
-    if (this_present_maxRows || that_present_maxRows) {
-      if (!(this_present_maxRows && that_present_maxRows))
-        return false;
-      if (this.maxRows != that.maxRows)
-        return false;
-    }
-
-    boolean this_present_fetchType = true && this.isSetFetchType();
-    boolean that_present_fetchType = true && that.isSetFetchType();
-    if (this_present_fetchType || that_present_fetchType) {
-      if (!(this_present_fetchType && that_present_fetchType))
-        return false;
-      if (this.fetchType != that.fetchType)
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    list.add(present_operationHandle);
-    if (present_operationHandle)
-      list.add(operationHandle);
-
-    boolean present_orientation = true && (isSetOrientation());
-    list.add(present_orientation);
-    if (present_orientation)
-      list.add(orientation.getValue());
-
-    boolean present_maxRows = true;
-    list.add(present_maxRows);
-    if (present_maxRows)
-      list.add(maxRows);
-
-    boolean present_fetchType = true && (isSetFetchType());
-    list.add(present_fetchType);
-    if (present_fetchType)
-      list.add(fetchType);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TFetchResultsReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOrientation()).compareTo(other.isSetOrientation());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOrientation()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.orientation, other.orientation);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetMaxRows()).compareTo(other.isSetMaxRows());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetMaxRows()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.maxRows, other.maxRows);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetFetchType()).compareTo(other.isSetFetchType());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetFetchType()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.fetchType, other.fetchType);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TFetchResultsReq(");
-    boolean first = true;
-
-    sb.append("operationHandle:");
-    if (this.operationHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.operationHandle);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("orientation:");
-    if (this.orientation == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.orientation);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("maxRows:");
-    sb.append(this.maxRows);
-    first = false;
-    if (isSetFetchType()) {
-      if (!first) sb.append(", ");
-      sb.append("fetchType:");
-      sb.append(this.fetchType);
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetOperationHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationHandle' is unset! Struct:" + toString());
-    }
-
-    if (!isSetOrientation()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'orientation' is unset! Struct:" + toString());
-    }
-
-    if (!isSetMaxRows()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'maxRows' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TFetchResultsReqStandardSchemeFactory implements SchemeFactory {
-    public TFetchResultsReqStandardScheme getScheme() {
-      return new TFetchResultsReqStandardScheme();
-    }
-  }
-
-  private static class TFetchResultsReqStandardScheme extends StandardScheme<TFetchResultsReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TFetchResultsReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // ORIENTATION
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.orientation = org.apache.hive.service.rpc.thrift.TFetchOrientation.findByValue(iprot.readI32());
-              struct.setOrientationIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // MAX_ROWS
-            if (schemeField.type == org.apache.thrift.protocol.TType.I64) {
-              struct.maxRows = iprot.readI64();
-              struct.setMaxRowsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 4: // FETCH_TYPE
-            if (schemeField.type == org.apache.thrift.protocol.TType.I16) {
-              struct.fetchType = iprot.readI16();
-              struct.setFetchTypeIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TFetchResultsReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.operationHandle != null) {
-        oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-        struct.operationHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.orientation != null) {
-        oprot.writeFieldBegin(ORIENTATION_FIELD_DESC);
-        oprot.writeI32(struct.orientation.getValue());
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldBegin(MAX_ROWS_FIELD_DESC);
-      oprot.writeI64(struct.maxRows);
-      oprot.writeFieldEnd();
-      if (struct.isSetFetchType()) {
-        oprot.writeFieldBegin(FETCH_TYPE_FIELD_DESC);
-        oprot.writeI16(struct.fetchType);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TFetchResultsReqTupleSchemeFactory implements SchemeFactory {
-    public TFetchResultsReqTupleScheme getScheme() {
-      return new TFetchResultsReqTupleScheme();
-    }
-  }
-
-  private static class TFetchResultsReqTupleScheme extends TupleScheme<TFetchResultsReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TFetchResultsReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.operationHandle.write(oprot);
-      oprot.writeI32(struct.orientation.getValue());
-      oprot.writeI64(struct.maxRows);
-      BitSet optionals = new BitSet();
-      if (struct.isSetFetchType()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetFetchType()) {
-        oprot.writeI16(struct.fetchType);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TFetchResultsReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.operationHandle = new TOperationHandle();
-      struct.operationHandle.read(iprot);
-      struct.setOperationHandleIsSet(true);
-      struct.orientation = org.apache.hive.service.rpc.thrift.TFetchOrientation.findByValue(iprot.readI32());
-      struct.setOrientationIsSet(true);
-      struct.maxRows = iprot.readI64();
-      struct.setMaxRowsIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.fetchType = iprot.readI16();
-        struct.setFetchTypeIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsResp.java
deleted file mode 100644
index 8f86cee3ad468..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TFetchResultsResp.java
+++ /dev/null
@@ -1,612 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TFetchResultsResp implements org.apache.thrift.TBase<TFetchResultsResp, TFetchResultsResp._Fields>, java.io.Serializable, Cloneable, Comparable<TFetchResultsResp> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TFetchResultsResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField HAS_MORE_ROWS_FIELD_DESC = new org.apache.thrift.protocol.TField("hasMoreRows", org.apache.thrift.protocol.TType.BOOL, (short)2);
-  private static final org.apache.thrift.protocol.TField RESULTS_FIELD_DESC = new org.apache.thrift.protocol.TField("results", org.apache.thrift.protocol.TType.STRUCT, (short)3);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TFetchResultsRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TFetchResultsRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private boolean hasMoreRows; // optional
-  private TRowSet results; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    HAS_MORE_ROWS((short)2, "hasMoreRows"),
-    RESULTS((short)3, "results");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // HAS_MORE_ROWS
-          return HAS_MORE_ROWS;
-        case 3: // RESULTS
-          return RESULTS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __HASMOREROWS_ISSET_ID = 0;
-  private byte __isset_bitfield = 0;
-  private static final _Fields optionals[] = {_Fields.HAS_MORE_ROWS,_Fields.RESULTS};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.HAS_MORE_ROWS, new org.apache.thrift.meta_data.FieldMetaData("hasMoreRows", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL)));
-    tmpMap.put(_Fields.RESULTS, new org.apache.thrift.meta_data.FieldMetaData("results", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TRowSet.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TFetchResultsResp.class, metaDataMap);
-  }
-
-  public TFetchResultsResp() {
-  }
-
-  public TFetchResultsResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TFetchResultsResp(TFetchResultsResp other) {
-    __isset_bitfield = other.__isset_bitfield;
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    this.hasMoreRows = other.hasMoreRows;
-    if (other.isSetResults()) {
-      this.results = new TRowSet(other.results);
-    }
-  }
-
-  public TFetchResultsResp deepCopy() {
-    return new TFetchResultsResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    setHasMoreRowsIsSet(false);
-    this.hasMoreRows = false;
-    this.results = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public boolean isHasMoreRows() {
-    return this.hasMoreRows;
-  }
-
-  public void setHasMoreRows(boolean hasMoreRows) {
-    this.hasMoreRows = hasMoreRows;
-    setHasMoreRowsIsSet(true);
-  }
-
-  public void unsetHasMoreRows() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __HASMOREROWS_ISSET_ID);
-  }
-
-  /** Returns true if field hasMoreRows is set (has been assigned a value) and false otherwise */
-  public boolean isSetHasMoreRows() {
-    return EncodingUtils.testBit(__isset_bitfield, __HASMOREROWS_ISSET_ID);
-  }
-
-  public void setHasMoreRowsIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __HASMOREROWS_ISSET_ID, value);
-  }
-
-  public TRowSet getResults() {
-    return this.results;
-  }
-
-  public void setResults(TRowSet results) {
-    this.results = results;
-  }
-
-  public void unsetResults() {
-    this.results = null;
-  }
-
-  /** Returns true if field results is set (has been assigned a value) and false otherwise */
-  public boolean isSetResults() {
-    return this.results != null;
-  }
-
-  public void setResultsIsSet(boolean value) {
-    if (!value) {
-      this.results = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case HAS_MORE_ROWS:
-      if (value == null) {
-        unsetHasMoreRows();
-      } else {
-        setHasMoreRows((Boolean)value);
-      }
-      break;
-
-    case RESULTS:
-      if (value == null) {
-        unsetResults();
-      } else {
-        setResults((TRowSet)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case HAS_MORE_ROWS:
-      return isHasMoreRows();
-
-    case RESULTS:
-      return getResults();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case HAS_MORE_ROWS:
-      return isSetHasMoreRows();
-    case RESULTS:
-      return isSetResults();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TFetchResultsResp)
-      return this.equals((TFetchResultsResp)that);
-    return false;
-  }
-
-  public boolean equals(TFetchResultsResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_hasMoreRows = true && this.isSetHasMoreRows();
-    boolean that_present_hasMoreRows = true && that.isSetHasMoreRows();
-    if (this_present_hasMoreRows || that_present_hasMoreRows) {
-      if (!(this_present_hasMoreRows && that_present_hasMoreRows))
-        return false;
-      if (this.hasMoreRows != that.hasMoreRows)
-        return false;
-    }
-
-    boolean this_present_results = true && this.isSetResults();
-    boolean that_present_results = true && that.isSetResults();
-    if (this_present_results || that_present_results) {
-      if (!(this_present_results && that_present_results))
-        return false;
-      if (!this.results.equals(that.results))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_status = true && (isSetStatus());
-    list.add(present_status);
-    if (present_status)
-      list.add(status);
-
-    boolean present_hasMoreRows = true && (isSetHasMoreRows());
-    list.add(present_hasMoreRows);
-    if (present_hasMoreRows)
-      list.add(hasMoreRows);
-
-    boolean present_results = true && (isSetResults());
-    list.add(present_results);
-    if (present_results)
-      list.add(results);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TFetchResultsResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetHasMoreRows()).compareTo(other.isSetHasMoreRows());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetHasMoreRows()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.hasMoreRows, other.hasMoreRows);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetResults()).compareTo(other.isSetResults());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetResults()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.results, other.results);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TFetchResultsResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (isSetHasMoreRows()) {
-      if (!first) sb.append(", ");
-      sb.append("hasMoreRows:");
-      sb.append(this.hasMoreRows);
-      first = false;
-    }
-    if (isSetResults()) {
-      if (!first) sb.append(", ");
-      sb.append("results:");
-      if (this.results == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.results);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-    if (results != null) {
-      results.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TFetchResultsRespStandardSchemeFactory implements SchemeFactory {
-    public TFetchResultsRespStandardScheme getScheme() {
-      return new TFetchResultsRespStandardScheme();
-    }
-  }
-
-  private static class TFetchResultsRespStandardScheme extends StandardScheme<TFetchResultsResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TFetchResultsResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // HAS_MORE_ROWS
-            if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) {
-              struct.hasMoreRows = iprot.readBool();
-              struct.setHasMoreRowsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // RESULTS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.results = new TRowSet();
-              struct.results.read(iprot);
-              struct.setResultsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TFetchResultsResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.isSetHasMoreRows()) {
-        oprot.writeFieldBegin(HAS_MORE_ROWS_FIELD_DESC);
-        oprot.writeBool(struct.hasMoreRows);
-        oprot.writeFieldEnd();
-      }
-      if (struct.results != null) {
-        if (struct.isSetResults()) {
-          oprot.writeFieldBegin(RESULTS_FIELD_DESC);
-          struct.results.write(oprot);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TFetchResultsRespTupleSchemeFactory implements SchemeFactory {
-    public TFetchResultsRespTupleScheme getScheme() {
-      return new TFetchResultsRespTupleScheme();
-    }
-  }
-
-  private static class TFetchResultsRespTupleScheme extends TupleScheme<TFetchResultsResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TFetchResultsResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetHasMoreRows()) {
-        optionals.set(0);
-      }
-      if (struct.isSetResults()) {
-        optionals.set(1);
-      }
-      oprot.writeBitSet(optionals, 2);
-      if (struct.isSetHasMoreRows()) {
-        oprot.writeBool(struct.hasMoreRows);
-      }
-      if (struct.isSetResults()) {
-        struct.results.write(oprot);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TFetchResultsResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      BitSet incoming = iprot.readBitSet(2);
-      if (incoming.get(0)) {
-        struct.hasMoreRows = iprot.readBool();
-        struct.setHasMoreRowsIsSet(true);
-      }
-      if (incoming.get(1)) {
-        struct.results = new TRowSet();
-        struct.results.read(iprot);
-        struct.setResultsIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsReq.java
deleted file mode 100644
index b8a2ca6648069..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsReq.java
+++ /dev/null
@@ -1,394 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TGetCatalogsReq implements org.apache.thrift.TBase<TGetCatalogsReq, TGetCatalogsReq._Fields>, java.io.Serializable, Cloneable, Comparable<TGetCatalogsReq> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetCatalogsReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetCatalogsReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetCatalogsReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetCatalogsReq.class, metaDataMap);
-  }
-
-  public TGetCatalogsReq() {
-  }
-
-  public TGetCatalogsReq(
-    TSessionHandle sessionHandle)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetCatalogsReq(TGetCatalogsReq other) {
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-  }
-
-  public TGetCatalogsReq deepCopy() {
-    return new TGetCatalogsReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetCatalogsReq)
-      return this.equals((TGetCatalogsReq)that);
-    return false;
-  }
-
-  public boolean equals(TGetCatalogsReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    list.add(present_sessionHandle);
-    if (present_sessionHandle)
-      list.add(sessionHandle);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TGetCatalogsReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetCatalogsReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetCatalogsReqStandardSchemeFactory implements SchemeFactory {
-    public TGetCatalogsReqStandardScheme getScheme() {
-      return new TGetCatalogsReqStandardScheme();
-    }
-  }
-
-  private static class TGetCatalogsReqStandardScheme extends StandardScheme<TGetCatalogsReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetCatalogsReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetCatalogsReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetCatalogsReqTupleSchemeFactory implements SchemeFactory {
-    public TGetCatalogsReqTupleScheme getScheme() {
-      return new TGetCatalogsReqTupleScheme();
-    }
-  }
-
-  private static class TGetCatalogsReqTupleScheme extends TupleScheme<TGetCatalogsReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetCatalogsReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetCatalogsReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsResp.java
deleted file mode 100644
index eeeac9a1f9292..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCatalogsResp.java
+++ /dev/null
@@ -1,509 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TGetCatalogsResp implements org.apache.thrift.TBase<TGetCatalogsResp, TGetCatalogsResp._Fields>, java.io.Serializable, Cloneable, Comparable<TGetCatalogsResp> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetCatalogsResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetCatalogsRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetCatalogsRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private TOperationHandle operationHandle; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    OPERATION_HANDLE((short)2, "operationHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final _Fields optionals[] = {_Fields.OPERATION_HANDLE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetCatalogsResp.class, metaDataMap);
-  }
-
-  public TGetCatalogsResp() {
-  }
-
-  public TGetCatalogsResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetCatalogsResp(TGetCatalogsResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-  }
-
-  public TGetCatalogsResp deepCopy() {
-    return new TGetCatalogsResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.operationHandle = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetCatalogsResp)
-      return this.equals((TGetCatalogsResp)that);
-    return false;
-  }
-
-  public boolean equals(TGetCatalogsResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_status = true && (isSetStatus());
-    list.add(present_status);
-    if (present_status)
-      list.add(status);
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    list.add(present_operationHandle);
-    if (present_operationHandle)
-      list.add(operationHandle);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TGetCatalogsResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetCatalogsResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (isSetOperationHandle()) {
-      if (!first) sb.append(", ");
-      sb.append("operationHandle:");
-      if (this.operationHandle == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.operationHandle);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetCatalogsRespStandardSchemeFactory implements SchemeFactory {
-    public TGetCatalogsRespStandardScheme getScheme() {
-      return new TGetCatalogsRespStandardScheme();
-    }
-  }
-
-  private static class TGetCatalogsRespStandardScheme extends StandardScheme<TGetCatalogsResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetCatalogsResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetCatalogsResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.operationHandle != null) {
-        if (struct.isSetOperationHandle()) {
-          oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-          struct.operationHandle.write(oprot);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetCatalogsRespTupleSchemeFactory implements SchemeFactory {
-    public TGetCatalogsRespTupleScheme getScheme() {
-      return new TGetCatalogsRespTupleScheme();
-    }
-  }
-
-  private static class TGetCatalogsRespTupleScheme extends TupleScheme<TGetCatalogsResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetCatalogsResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetOperationHandle()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetOperationHandle()) {
-        struct.operationHandle.write(oprot);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetCatalogsResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.operationHandle = new TOperationHandle();
-        struct.operationHandle.read(iprot);
-        struct.setOperationHandleIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsReq.java
deleted file mode 100644
index ba80279294957..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsReq.java
+++ /dev/null
@@ -1,822 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TGetColumnsReq implements org.apache.thrift.TBase<TGetColumnsReq, TGetColumnsReq._Fields>, java.io.Serializable, Cloneable, Comparable<TGetColumnsReq> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetColumnsReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField CATALOG_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("catalogName", org.apache.thrift.protocol.TType.STRING, (short)2);
-  private static final org.apache.thrift.protocol.TField SCHEMA_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("schemaName", org.apache.thrift.protocol.TType.STRING, (short)3);
-  private static final org.apache.thrift.protocol.TField TABLE_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("tableName", org.apache.thrift.protocol.TType.STRING, (short)4);
-  private static final org.apache.thrift.protocol.TField COLUMN_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("columnName", org.apache.thrift.protocol.TType.STRING, (short)5);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetColumnsReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetColumnsReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-  private String catalogName; // optional
-  private String schemaName; // optional
-  private String tableName; // optional
-  private String columnName; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle"),
-    CATALOG_NAME((short)2, "catalogName"),
-    SCHEMA_NAME((short)3, "schemaName"),
-    TABLE_NAME((short)4, "tableName"),
-    COLUMN_NAME((short)5, "columnName");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        case 2: // CATALOG_NAME
-          return CATALOG_NAME;
-        case 3: // SCHEMA_NAME
-          return SCHEMA_NAME;
-        case 4: // TABLE_NAME
-          return TABLE_NAME;
-        case 5: // COLUMN_NAME
-          return COLUMN_NAME;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final _Fields optionals[] = {_Fields.CATALOG_NAME,_Fields.SCHEMA_NAME,_Fields.TABLE_NAME,_Fields.COLUMN_NAME};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    tmpMap.put(_Fields.CATALOG_NAME, new org.apache.thrift.meta_data.FieldMetaData("catalogName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TIdentifier")));
-    tmpMap.put(_Fields.SCHEMA_NAME, new org.apache.thrift.meta_data.FieldMetaData("schemaName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TPatternOrIdentifier")));
-    tmpMap.put(_Fields.TABLE_NAME, new org.apache.thrift.meta_data.FieldMetaData("tableName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TPatternOrIdentifier")));
-    tmpMap.put(_Fields.COLUMN_NAME, new org.apache.thrift.meta_data.FieldMetaData("columnName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TPatternOrIdentifier")));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetColumnsReq.class, metaDataMap);
-  }
-
-  public TGetColumnsReq() {
-  }
-
-  public TGetColumnsReq(
-    TSessionHandle sessionHandle)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetColumnsReq(TGetColumnsReq other) {
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-    if (other.isSetCatalogName()) {
-      this.catalogName = other.catalogName;
-    }
-    if (other.isSetSchemaName()) {
-      this.schemaName = other.schemaName;
-    }
-    if (other.isSetTableName()) {
-      this.tableName = other.tableName;
-    }
-    if (other.isSetColumnName()) {
-      this.columnName = other.columnName;
-    }
-  }
-
-  public TGetColumnsReq deepCopy() {
-    return new TGetColumnsReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-    this.catalogName = null;
-    this.schemaName = null;
-    this.tableName = null;
-    this.columnName = null;
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public String getCatalogName() {
-    return this.catalogName;
-  }
-
-  public void setCatalogName(String catalogName) {
-    this.catalogName = catalogName;
-  }
-
-  public void unsetCatalogName() {
-    this.catalogName = null;
-  }
-
-  /** Returns true if field catalogName is set (has been assigned a value) and false otherwise */
-  public boolean isSetCatalogName() {
-    return this.catalogName != null;
-  }
-
-  public void setCatalogNameIsSet(boolean value) {
-    if (!value) {
-      this.catalogName = null;
-    }
-  }
-
-  public String getSchemaName() {
-    return this.schemaName;
-  }
-
-  public void setSchemaName(String schemaName) {
-    this.schemaName = schemaName;
-  }
-
-  public void unsetSchemaName() {
-    this.schemaName = null;
-  }
-
-  /** Returns true if field schemaName is set (has been assigned a value) and false otherwise */
-  public boolean isSetSchemaName() {
-    return this.schemaName != null;
-  }
-
-  public void setSchemaNameIsSet(boolean value) {
-    if (!value) {
-      this.schemaName = null;
-    }
-  }
-
-  public String getTableName() {
-    return this.tableName;
-  }
-
-  public void setTableName(String tableName) {
-    this.tableName = tableName;
-  }
-
-  public void unsetTableName() {
-    this.tableName = null;
-  }
-
-  /** Returns true if field tableName is set (has been assigned a value) and false otherwise */
-  public boolean isSetTableName() {
-    return this.tableName != null;
-  }
-
-  public void setTableNameIsSet(boolean value) {
-    if (!value) {
-      this.tableName = null;
-    }
-  }
-
-  public String getColumnName() {
-    return this.columnName;
-  }
-
-  public void setColumnName(String columnName) {
-    this.columnName = columnName;
-  }
-
-  public void unsetColumnName() {
-    this.columnName = null;
-  }
-
-  /** Returns true if field columnName is set (has been assigned a value) and false otherwise */
-  public boolean isSetColumnName() {
-    return this.columnName != null;
-  }
-
-  public void setColumnNameIsSet(boolean value) {
-    if (!value) {
-      this.columnName = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    case CATALOG_NAME:
-      if (value == null) {
-        unsetCatalogName();
-      } else {
-        setCatalogName((String)value);
-      }
-      break;
-
-    case SCHEMA_NAME:
-      if (value == null) {
-        unsetSchemaName();
-      } else {
-        setSchemaName((String)value);
-      }
-      break;
-
-    case TABLE_NAME:
-      if (value == null) {
-        unsetTableName();
-      } else {
-        setTableName((String)value);
-      }
-      break;
-
-    case COLUMN_NAME:
-      if (value == null) {
-        unsetColumnName();
-      } else {
-        setColumnName((String)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    case CATALOG_NAME:
-      return getCatalogName();
-
-    case SCHEMA_NAME:
-      return getSchemaName();
-
-    case TABLE_NAME:
-      return getTableName();
-
-    case COLUMN_NAME:
-      return getColumnName();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    case CATALOG_NAME:
-      return isSetCatalogName();
-    case SCHEMA_NAME:
-      return isSetSchemaName();
-    case TABLE_NAME:
-      return isSetTableName();
-    case COLUMN_NAME:
-      return isSetColumnName();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetColumnsReq)
-      return this.equals((TGetColumnsReq)that);
-    return false;
-  }
-
-  public boolean equals(TGetColumnsReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    boolean this_present_catalogName = true && this.isSetCatalogName();
-    boolean that_present_catalogName = true && that.isSetCatalogName();
-    if (this_present_catalogName || that_present_catalogName) {
-      if (!(this_present_catalogName && that_present_catalogName))
-        return false;
-      if (!this.catalogName.equals(that.catalogName))
-        return false;
-    }
-
-    boolean this_present_schemaName = true && this.isSetSchemaName();
-    boolean that_present_schemaName = true && that.isSetSchemaName();
-    if (this_present_schemaName || that_present_schemaName) {
-      if (!(this_present_schemaName && that_present_schemaName))
-        return false;
-      if (!this.schemaName.equals(that.schemaName))
-        return false;
-    }
-
-    boolean this_present_tableName = true && this.isSetTableName();
-    boolean that_present_tableName = true && that.isSetTableName();
-    if (this_present_tableName || that_present_tableName) {
-      if (!(this_present_tableName && that_present_tableName))
-        return false;
-      if (!this.tableName.equals(that.tableName))
-        return false;
-    }
-
-    boolean this_present_columnName = true && this.isSetColumnName();
-    boolean that_present_columnName = true && that.isSetColumnName();
-    if (this_present_columnName || that_present_columnName) {
-      if (!(this_present_columnName && that_present_columnName))
-        return false;
-      if (!this.columnName.equals(that.columnName))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    list.add(present_sessionHandle);
-    if (present_sessionHandle)
-      list.add(sessionHandle);
-
-    boolean present_catalogName = true && (isSetCatalogName());
-    list.add(present_catalogName);
-    if (present_catalogName)
-      list.add(catalogName);
-
-    boolean present_schemaName = true && (isSetSchemaName());
-    list.add(present_schemaName);
-    if (present_schemaName)
-      list.add(schemaName);
-
-    boolean present_tableName = true && (isSetTableName());
-    list.add(present_tableName);
-    if (present_tableName)
-      list.add(tableName);
-
-    boolean present_columnName = true && (isSetColumnName());
-    list.add(present_columnName);
-    if (present_columnName)
-      list.add(columnName);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TGetColumnsReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetCatalogName()).compareTo(other.isSetCatalogName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetCatalogName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.catalogName, other.catalogName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetSchemaName()).compareTo(other.isSetSchemaName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSchemaName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.schemaName, other.schemaName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetTableName()).compareTo(other.isSetTableName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetTableName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.tableName, other.tableName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetColumnName()).compareTo(other.isSetColumnName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetColumnName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.columnName, other.columnName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetColumnsReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    if (isSetCatalogName()) {
-      if (!first) sb.append(", ");
-      sb.append("catalogName:");
-      if (this.catalogName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.catalogName);
-      }
-      first = false;
-    }
-    if (isSetSchemaName()) {
-      if (!first) sb.append(", ");
-      sb.append("schemaName:");
-      if (this.schemaName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.schemaName);
-      }
-      first = false;
-    }
-    if (isSetTableName()) {
-      if (!first) sb.append(", ");
-      sb.append("tableName:");
-      if (this.tableName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.tableName);
-      }
-      first = false;
-    }
-    if (isSetColumnName()) {
-      if (!first) sb.append(", ");
-      sb.append("columnName:");
-      if (this.columnName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.columnName);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetColumnsReqStandardSchemeFactory implements SchemeFactory {
-    public TGetColumnsReqStandardScheme getScheme() {
-      return new TGetColumnsReqStandardScheme();
-    }
-  }
-
-  private static class TGetColumnsReqStandardScheme extends StandardScheme<TGetColumnsReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetColumnsReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // CATALOG_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.catalogName = iprot.readString();
-              struct.setCatalogNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // SCHEMA_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.schemaName = iprot.readString();
-              struct.setSchemaNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 4: // TABLE_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.tableName = iprot.readString();
-              struct.setTableNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 5: // COLUMN_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.columnName = iprot.readString();
-              struct.setColumnNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetColumnsReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.catalogName != null) {
-        if (struct.isSetCatalogName()) {
-          oprot.writeFieldBegin(CATALOG_NAME_FIELD_DESC);
-          oprot.writeString(struct.catalogName);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.schemaName != null) {
-        if (struct.isSetSchemaName()) {
-          oprot.writeFieldBegin(SCHEMA_NAME_FIELD_DESC);
-          oprot.writeString(struct.schemaName);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.tableName != null) {
-        if (struct.isSetTableName()) {
-          oprot.writeFieldBegin(TABLE_NAME_FIELD_DESC);
-          oprot.writeString(struct.tableName);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.columnName != null) {
-        if (struct.isSetColumnName()) {
-          oprot.writeFieldBegin(COLUMN_NAME_FIELD_DESC);
-          oprot.writeString(struct.columnName);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetColumnsReqTupleSchemeFactory implements SchemeFactory {
-    public TGetColumnsReqTupleScheme getScheme() {
-      return new TGetColumnsReqTupleScheme();
-    }
-  }
-
-  private static class TGetColumnsReqTupleScheme extends TupleScheme<TGetColumnsReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetColumnsReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetCatalogName()) {
-        optionals.set(0);
-      }
-      if (struct.isSetSchemaName()) {
-        optionals.set(1);
-      }
-      if (struct.isSetTableName()) {
-        optionals.set(2);
-      }
-      if (struct.isSetColumnName()) {
-        optionals.set(3);
-      }
-      oprot.writeBitSet(optionals, 4);
-      if (struct.isSetCatalogName()) {
-        oprot.writeString(struct.catalogName);
-      }
-      if (struct.isSetSchemaName()) {
-        oprot.writeString(struct.schemaName);
-      }
-      if (struct.isSetTableName()) {
-        oprot.writeString(struct.tableName);
-      }
-      if (struct.isSetColumnName()) {
-        oprot.writeString(struct.columnName);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetColumnsReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-      BitSet incoming = iprot.readBitSet(4);
-      if (incoming.get(0)) {
-        struct.catalogName = iprot.readString();
-        struct.setCatalogNameIsSet(true);
-      }
-      if (incoming.get(1)) {
-        struct.schemaName = iprot.readString();
-        struct.setSchemaNameIsSet(true);
-      }
-      if (incoming.get(2)) {
-        struct.tableName = iprot.readString();
-        struct.setTableNameIsSet(true);
-      }
-      if (incoming.get(3)) {
-        struct.columnName = iprot.readString();
-        struct.setColumnNameIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsResp.java
deleted file mode 100644
index c68aac9042fc1..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetColumnsResp.java
+++ /dev/null
@@ -1,509 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TGetColumnsResp implements org.apache.thrift.TBase<TGetColumnsResp, TGetColumnsResp._Fields>, java.io.Serializable, Cloneable, Comparable<TGetColumnsResp> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetColumnsResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetColumnsRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetColumnsRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private TOperationHandle operationHandle; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    OPERATION_HANDLE((short)2, "operationHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final _Fields optionals[] = {_Fields.OPERATION_HANDLE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetColumnsResp.class, metaDataMap);
-  }
-
-  public TGetColumnsResp() {
-  }
-
-  public TGetColumnsResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetColumnsResp(TGetColumnsResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-  }
-
-  public TGetColumnsResp deepCopy() {
-    return new TGetColumnsResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.operationHandle = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetColumnsResp)
-      return this.equals((TGetColumnsResp)that);
-    return false;
-  }
-
-  public boolean equals(TGetColumnsResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_status = true && (isSetStatus());
-    list.add(present_status);
-    if (present_status)
-      list.add(status);
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    list.add(present_operationHandle);
-    if (present_operationHandle)
-      list.add(operationHandle);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TGetColumnsResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetColumnsResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (isSetOperationHandle()) {
-      if (!first) sb.append(", ");
-      sb.append("operationHandle:");
-      if (this.operationHandle == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.operationHandle);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetColumnsRespStandardSchemeFactory implements SchemeFactory {
-    public TGetColumnsRespStandardScheme getScheme() {
-      return new TGetColumnsRespStandardScheme();
-    }
-  }
-
-  private static class TGetColumnsRespStandardScheme extends StandardScheme<TGetColumnsResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetColumnsResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetColumnsResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.operationHandle != null) {
-        if (struct.isSetOperationHandle()) {
-          oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-          struct.operationHandle.write(oprot);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetColumnsRespTupleSchemeFactory implements SchemeFactory {
-    public TGetColumnsRespTupleScheme getScheme() {
-      return new TGetColumnsRespTupleScheme();
-    }
-  }
-
-  private static class TGetColumnsRespTupleScheme extends TupleScheme<TGetColumnsResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetColumnsResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetOperationHandle()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetOperationHandle()) {
-        struct.operationHandle.write(oprot);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetColumnsResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.operationHandle = new TOperationHandle();
-        struct.operationHandle.read(iprot);
-        struct.setOperationHandleIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceReq.java
deleted file mode 100644
index 972957063b297..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceReq.java
+++ /dev/null
@@ -1,1034 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TGetCrossReferenceReq implements org.apache.thrift.TBase<TGetCrossReferenceReq, TGetCrossReferenceReq._Fields>, java.io.Serializable, Cloneable, Comparable<TGetCrossReferenceReq> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetCrossReferenceReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField PARENT_CATALOG_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("parentCatalogName", org.apache.thrift.protocol.TType.STRING, (short)2);
-  private static final org.apache.thrift.protocol.TField PARENT_SCHEMA_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("parentSchemaName", org.apache.thrift.protocol.TType.STRING, (short)3);
-  private static final org.apache.thrift.protocol.TField PARENT_TABLE_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("parentTableName", org.apache.thrift.protocol.TType.STRING, (short)4);
-  private static final org.apache.thrift.protocol.TField FOREIGN_CATALOG_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("foreignCatalogName", org.apache.thrift.protocol.TType.STRING, (short)5);
-  private static final org.apache.thrift.protocol.TField FOREIGN_SCHEMA_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("foreignSchemaName", org.apache.thrift.protocol.TType.STRING, (short)6);
-  private static final org.apache.thrift.protocol.TField FOREIGN_TABLE_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("foreignTableName", org.apache.thrift.protocol.TType.STRING, (short)7);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetCrossReferenceReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetCrossReferenceReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-  private String parentCatalogName; // optional
-  private String parentSchemaName; // optional
-  private String parentTableName; // optional
-  private String foreignCatalogName; // optional
-  private String foreignSchemaName; // optional
-  private String foreignTableName; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle"),
-    PARENT_CATALOG_NAME((short)2, "parentCatalogName"),
-    PARENT_SCHEMA_NAME((short)3, "parentSchemaName"),
-    PARENT_TABLE_NAME((short)4, "parentTableName"),
-    FOREIGN_CATALOG_NAME((short)5, "foreignCatalogName"),
-    FOREIGN_SCHEMA_NAME((short)6, "foreignSchemaName"),
-    FOREIGN_TABLE_NAME((short)7, "foreignTableName");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        case 2: // PARENT_CATALOG_NAME
-          return PARENT_CATALOG_NAME;
-        case 3: // PARENT_SCHEMA_NAME
-          return PARENT_SCHEMA_NAME;
-        case 4: // PARENT_TABLE_NAME
-          return PARENT_TABLE_NAME;
-        case 5: // FOREIGN_CATALOG_NAME
-          return FOREIGN_CATALOG_NAME;
-        case 6: // FOREIGN_SCHEMA_NAME
-          return FOREIGN_SCHEMA_NAME;
-        case 7: // FOREIGN_TABLE_NAME
-          return FOREIGN_TABLE_NAME;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final _Fields optionals[] = {_Fields.PARENT_CATALOG_NAME,_Fields.PARENT_SCHEMA_NAME,_Fields.PARENT_TABLE_NAME,_Fields.FOREIGN_CATALOG_NAME,_Fields.FOREIGN_SCHEMA_NAME,_Fields.FOREIGN_TABLE_NAME};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    tmpMap.put(_Fields.PARENT_CATALOG_NAME, new org.apache.thrift.meta_data.FieldMetaData("parentCatalogName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TIdentifier")));
-    tmpMap.put(_Fields.PARENT_SCHEMA_NAME, new org.apache.thrift.meta_data.FieldMetaData("parentSchemaName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TIdentifier")));
-    tmpMap.put(_Fields.PARENT_TABLE_NAME, new org.apache.thrift.meta_data.FieldMetaData("parentTableName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TIdentifier")));
-    tmpMap.put(_Fields.FOREIGN_CATALOG_NAME, new org.apache.thrift.meta_data.FieldMetaData("foreignCatalogName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TIdentifier")));
-    tmpMap.put(_Fields.FOREIGN_SCHEMA_NAME, new org.apache.thrift.meta_data.FieldMetaData("foreignSchemaName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TIdentifier")));
-    tmpMap.put(_Fields.FOREIGN_TABLE_NAME, new org.apache.thrift.meta_data.FieldMetaData("foreignTableName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TIdentifier")));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetCrossReferenceReq.class, metaDataMap);
-  }
-
-  public TGetCrossReferenceReq() {
-  }
-
-  public TGetCrossReferenceReq(
-    TSessionHandle sessionHandle)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetCrossReferenceReq(TGetCrossReferenceReq other) {
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-    if (other.isSetParentCatalogName()) {
-      this.parentCatalogName = other.parentCatalogName;
-    }
-    if (other.isSetParentSchemaName()) {
-      this.parentSchemaName = other.parentSchemaName;
-    }
-    if (other.isSetParentTableName()) {
-      this.parentTableName = other.parentTableName;
-    }
-    if (other.isSetForeignCatalogName()) {
-      this.foreignCatalogName = other.foreignCatalogName;
-    }
-    if (other.isSetForeignSchemaName()) {
-      this.foreignSchemaName = other.foreignSchemaName;
-    }
-    if (other.isSetForeignTableName()) {
-      this.foreignTableName = other.foreignTableName;
-    }
-  }
-
-  public TGetCrossReferenceReq deepCopy() {
-    return new TGetCrossReferenceReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-    this.parentCatalogName = null;
-    this.parentSchemaName = null;
-    this.parentTableName = null;
-    this.foreignCatalogName = null;
-    this.foreignSchemaName = null;
-    this.foreignTableName = null;
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public String getParentCatalogName() {
-    return this.parentCatalogName;
-  }
-
-  public void setParentCatalogName(String parentCatalogName) {
-    this.parentCatalogName = parentCatalogName;
-  }
-
-  public void unsetParentCatalogName() {
-    this.parentCatalogName = null;
-  }
-
-  /** Returns true if field parentCatalogName is set (has been assigned a value) and false otherwise */
-  public boolean isSetParentCatalogName() {
-    return this.parentCatalogName != null;
-  }
-
-  public void setParentCatalogNameIsSet(boolean value) {
-    if (!value) {
-      this.parentCatalogName = null;
-    }
-  }
-
-  public String getParentSchemaName() {
-    return this.parentSchemaName;
-  }
-
-  public void setParentSchemaName(String parentSchemaName) {
-    this.parentSchemaName = parentSchemaName;
-  }
-
-  public void unsetParentSchemaName() {
-    this.parentSchemaName = null;
-  }
-
-  /** Returns true if field parentSchemaName is set (has been assigned a value) and false otherwise */
-  public boolean isSetParentSchemaName() {
-    return this.parentSchemaName != null;
-  }
-
-  public void setParentSchemaNameIsSet(boolean value) {
-    if (!value) {
-      this.parentSchemaName = null;
-    }
-  }
-
-  public String getParentTableName() {
-    return this.parentTableName;
-  }
-
-  public void setParentTableName(String parentTableName) {
-    this.parentTableName = parentTableName;
-  }
-
-  public void unsetParentTableName() {
-    this.parentTableName = null;
-  }
-
-  /** Returns true if field parentTableName is set (has been assigned a value) and false otherwise */
-  public boolean isSetParentTableName() {
-    return this.parentTableName != null;
-  }
-
-  public void setParentTableNameIsSet(boolean value) {
-    if (!value) {
-      this.parentTableName = null;
-    }
-  }
-
-  public String getForeignCatalogName() {
-    return this.foreignCatalogName;
-  }
-
-  public void setForeignCatalogName(String foreignCatalogName) {
-    this.foreignCatalogName = foreignCatalogName;
-  }
-
-  public void unsetForeignCatalogName() {
-    this.foreignCatalogName = null;
-  }
-
-  /** Returns true if field foreignCatalogName is set (has been assigned a value) and false otherwise */
-  public boolean isSetForeignCatalogName() {
-    return this.foreignCatalogName != null;
-  }
-
-  public void setForeignCatalogNameIsSet(boolean value) {
-    if (!value) {
-      this.foreignCatalogName = null;
-    }
-  }
-
-  public String getForeignSchemaName() {
-    return this.foreignSchemaName;
-  }
-
-  public void setForeignSchemaName(String foreignSchemaName) {
-    this.foreignSchemaName = foreignSchemaName;
-  }
-
-  public void unsetForeignSchemaName() {
-    this.foreignSchemaName = null;
-  }
-
-  /** Returns true if field foreignSchemaName is set (has been assigned a value) and false otherwise */
-  public boolean isSetForeignSchemaName() {
-    return this.foreignSchemaName != null;
-  }
-
-  public void setForeignSchemaNameIsSet(boolean value) {
-    if (!value) {
-      this.foreignSchemaName = null;
-    }
-  }
-
-  public String getForeignTableName() {
-    return this.foreignTableName;
-  }
-
-  public void setForeignTableName(String foreignTableName) {
-    this.foreignTableName = foreignTableName;
-  }
-
-  public void unsetForeignTableName() {
-    this.foreignTableName = null;
-  }
-
-  /** Returns true if field foreignTableName is set (has been assigned a value) and false otherwise */
-  public boolean isSetForeignTableName() {
-    return this.foreignTableName != null;
-  }
-
-  public void setForeignTableNameIsSet(boolean value) {
-    if (!value) {
-      this.foreignTableName = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    case PARENT_CATALOG_NAME:
-      if (value == null) {
-        unsetParentCatalogName();
-      } else {
-        setParentCatalogName((String)value);
-      }
-      break;
-
-    case PARENT_SCHEMA_NAME:
-      if (value == null) {
-        unsetParentSchemaName();
-      } else {
-        setParentSchemaName((String)value);
-      }
-      break;
-
-    case PARENT_TABLE_NAME:
-      if (value == null) {
-        unsetParentTableName();
-      } else {
-        setParentTableName((String)value);
-      }
-      break;
-
-    case FOREIGN_CATALOG_NAME:
-      if (value == null) {
-        unsetForeignCatalogName();
-      } else {
-        setForeignCatalogName((String)value);
-      }
-      break;
-
-    case FOREIGN_SCHEMA_NAME:
-      if (value == null) {
-        unsetForeignSchemaName();
-      } else {
-        setForeignSchemaName((String)value);
-      }
-      break;
-
-    case FOREIGN_TABLE_NAME:
-      if (value == null) {
-        unsetForeignTableName();
-      } else {
-        setForeignTableName((String)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    case PARENT_CATALOG_NAME:
-      return getParentCatalogName();
-
-    case PARENT_SCHEMA_NAME:
-      return getParentSchemaName();
-
-    case PARENT_TABLE_NAME:
-      return getParentTableName();
-
-    case FOREIGN_CATALOG_NAME:
-      return getForeignCatalogName();
-
-    case FOREIGN_SCHEMA_NAME:
-      return getForeignSchemaName();
-
-    case FOREIGN_TABLE_NAME:
-      return getForeignTableName();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    case PARENT_CATALOG_NAME:
-      return isSetParentCatalogName();
-    case PARENT_SCHEMA_NAME:
-      return isSetParentSchemaName();
-    case PARENT_TABLE_NAME:
-      return isSetParentTableName();
-    case FOREIGN_CATALOG_NAME:
-      return isSetForeignCatalogName();
-    case FOREIGN_SCHEMA_NAME:
-      return isSetForeignSchemaName();
-    case FOREIGN_TABLE_NAME:
-      return isSetForeignTableName();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetCrossReferenceReq)
-      return this.equals((TGetCrossReferenceReq)that);
-    return false;
-  }
-
-  public boolean equals(TGetCrossReferenceReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    boolean this_present_parentCatalogName = true && this.isSetParentCatalogName();
-    boolean that_present_parentCatalogName = true && that.isSetParentCatalogName();
-    if (this_present_parentCatalogName || that_present_parentCatalogName) {
-      if (!(this_present_parentCatalogName && that_present_parentCatalogName))
-        return false;
-      if (!this.parentCatalogName.equals(that.parentCatalogName))
-        return false;
-    }
-
-    boolean this_present_parentSchemaName = true && this.isSetParentSchemaName();
-    boolean that_present_parentSchemaName = true && that.isSetParentSchemaName();
-    if (this_present_parentSchemaName || that_present_parentSchemaName) {
-      if (!(this_present_parentSchemaName && that_present_parentSchemaName))
-        return false;
-      if (!this.parentSchemaName.equals(that.parentSchemaName))
-        return false;
-    }
-
-    boolean this_present_parentTableName = true && this.isSetParentTableName();
-    boolean that_present_parentTableName = true && that.isSetParentTableName();
-    if (this_present_parentTableName || that_present_parentTableName) {
-      if (!(this_present_parentTableName && that_present_parentTableName))
-        return false;
-      if (!this.parentTableName.equals(that.parentTableName))
-        return false;
-    }
-
-    boolean this_present_foreignCatalogName = true && this.isSetForeignCatalogName();
-    boolean that_present_foreignCatalogName = true && that.isSetForeignCatalogName();
-    if (this_present_foreignCatalogName || that_present_foreignCatalogName) {
-      if (!(this_present_foreignCatalogName && that_present_foreignCatalogName))
-        return false;
-      if (!this.foreignCatalogName.equals(that.foreignCatalogName))
-        return false;
-    }
-
-    boolean this_present_foreignSchemaName = true && this.isSetForeignSchemaName();
-    boolean that_present_foreignSchemaName = true && that.isSetForeignSchemaName();
-    if (this_present_foreignSchemaName || that_present_foreignSchemaName) {
-      if (!(this_present_foreignSchemaName && that_present_foreignSchemaName))
-        return false;
-      if (!this.foreignSchemaName.equals(that.foreignSchemaName))
-        return false;
-    }
-
-    boolean this_present_foreignTableName = true && this.isSetForeignTableName();
-    boolean that_present_foreignTableName = true && that.isSetForeignTableName();
-    if (this_present_foreignTableName || that_present_foreignTableName) {
-      if (!(this_present_foreignTableName && that_present_foreignTableName))
-        return false;
-      if (!this.foreignTableName.equals(that.foreignTableName))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    list.add(present_sessionHandle);
-    if (present_sessionHandle)
-      list.add(sessionHandle);
-
-    boolean present_parentCatalogName = true && (isSetParentCatalogName());
-    list.add(present_parentCatalogName);
-    if (present_parentCatalogName)
-      list.add(parentCatalogName);
-
-    boolean present_parentSchemaName = true && (isSetParentSchemaName());
-    list.add(present_parentSchemaName);
-    if (present_parentSchemaName)
-      list.add(parentSchemaName);
-
-    boolean present_parentTableName = true && (isSetParentTableName());
-    list.add(present_parentTableName);
-    if (present_parentTableName)
-      list.add(parentTableName);
-
-    boolean present_foreignCatalogName = true && (isSetForeignCatalogName());
-    list.add(present_foreignCatalogName);
-    if (present_foreignCatalogName)
-      list.add(foreignCatalogName);
-
-    boolean present_foreignSchemaName = true && (isSetForeignSchemaName());
-    list.add(present_foreignSchemaName);
-    if (present_foreignSchemaName)
-      list.add(foreignSchemaName);
-
-    boolean present_foreignTableName = true && (isSetForeignTableName());
-    list.add(present_foreignTableName);
-    if (present_foreignTableName)
-      list.add(foreignTableName);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TGetCrossReferenceReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetParentCatalogName()).compareTo(other.isSetParentCatalogName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetParentCatalogName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.parentCatalogName, other.parentCatalogName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetParentSchemaName()).compareTo(other.isSetParentSchemaName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetParentSchemaName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.parentSchemaName, other.parentSchemaName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetParentTableName()).compareTo(other.isSetParentTableName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetParentTableName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.parentTableName, other.parentTableName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetForeignCatalogName()).compareTo(other.isSetForeignCatalogName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetForeignCatalogName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.foreignCatalogName, other.foreignCatalogName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetForeignSchemaName()).compareTo(other.isSetForeignSchemaName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetForeignSchemaName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.foreignSchemaName, other.foreignSchemaName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetForeignTableName()).compareTo(other.isSetForeignTableName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetForeignTableName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.foreignTableName, other.foreignTableName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetCrossReferenceReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    if (isSetParentCatalogName()) {
-      if (!first) sb.append(", ");
-      sb.append("parentCatalogName:");
-      if (this.parentCatalogName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.parentCatalogName);
-      }
-      first = false;
-    }
-    if (isSetParentSchemaName()) {
-      if (!first) sb.append(", ");
-      sb.append("parentSchemaName:");
-      if (this.parentSchemaName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.parentSchemaName);
-      }
-      first = false;
-    }
-    if (isSetParentTableName()) {
-      if (!first) sb.append(", ");
-      sb.append("parentTableName:");
-      if (this.parentTableName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.parentTableName);
-      }
-      first = false;
-    }
-    if (isSetForeignCatalogName()) {
-      if (!first) sb.append(", ");
-      sb.append("foreignCatalogName:");
-      if (this.foreignCatalogName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.foreignCatalogName);
-      }
-      first = false;
-    }
-    if (isSetForeignSchemaName()) {
-      if (!first) sb.append(", ");
-      sb.append("foreignSchemaName:");
-      if (this.foreignSchemaName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.foreignSchemaName);
-      }
-      first = false;
-    }
-    if (isSetForeignTableName()) {
-      if (!first) sb.append(", ");
-      sb.append("foreignTableName:");
-      if (this.foreignTableName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.foreignTableName);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetCrossReferenceReqStandardSchemeFactory implements SchemeFactory {
-    public TGetCrossReferenceReqStandardScheme getScheme() {
-      return new TGetCrossReferenceReqStandardScheme();
-    }
-  }
-
-  private static class TGetCrossReferenceReqStandardScheme extends StandardScheme<TGetCrossReferenceReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetCrossReferenceReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // PARENT_CATALOG_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.parentCatalogName = iprot.readString();
-              struct.setParentCatalogNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // PARENT_SCHEMA_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.parentSchemaName = iprot.readString();
-              struct.setParentSchemaNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 4: // PARENT_TABLE_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.parentTableName = iprot.readString();
-              struct.setParentTableNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 5: // FOREIGN_CATALOG_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.foreignCatalogName = iprot.readString();
-              struct.setForeignCatalogNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 6: // FOREIGN_SCHEMA_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.foreignSchemaName = iprot.readString();
-              struct.setForeignSchemaNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 7: // FOREIGN_TABLE_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.foreignTableName = iprot.readString();
-              struct.setForeignTableNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetCrossReferenceReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.parentCatalogName != null) {
-        if (struct.isSetParentCatalogName()) {
-          oprot.writeFieldBegin(PARENT_CATALOG_NAME_FIELD_DESC);
-          oprot.writeString(struct.parentCatalogName);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.parentSchemaName != null) {
-        if (struct.isSetParentSchemaName()) {
-          oprot.writeFieldBegin(PARENT_SCHEMA_NAME_FIELD_DESC);
-          oprot.writeString(struct.parentSchemaName);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.parentTableName != null) {
-        if (struct.isSetParentTableName()) {
-          oprot.writeFieldBegin(PARENT_TABLE_NAME_FIELD_DESC);
-          oprot.writeString(struct.parentTableName);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.foreignCatalogName != null) {
-        if (struct.isSetForeignCatalogName()) {
-          oprot.writeFieldBegin(FOREIGN_CATALOG_NAME_FIELD_DESC);
-          oprot.writeString(struct.foreignCatalogName);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.foreignSchemaName != null) {
-        if (struct.isSetForeignSchemaName()) {
-          oprot.writeFieldBegin(FOREIGN_SCHEMA_NAME_FIELD_DESC);
-          oprot.writeString(struct.foreignSchemaName);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.foreignTableName != null) {
-        if (struct.isSetForeignTableName()) {
-          oprot.writeFieldBegin(FOREIGN_TABLE_NAME_FIELD_DESC);
-          oprot.writeString(struct.foreignTableName);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetCrossReferenceReqTupleSchemeFactory implements SchemeFactory {
-    public TGetCrossReferenceReqTupleScheme getScheme() {
-      return new TGetCrossReferenceReqTupleScheme();
-    }
-  }
-
-  private static class TGetCrossReferenceReqTupleScheme extends TupleScheme<TGetCrossReferenceReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetCrossReferenceReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetParentCatalogName()) {
-        optionals.set(0);
-      }
-      if (struct.isSetParentSchemaName()) {
-        optionals.set(1);
-      }
-      if (struct.isSetParentTableName()) {
-        optionals.set(2);
-      }
-      if (struct.isSetForeignCatalogName()) {
-        optionals.set(3);
-      }
-      if (struct.isSetForeignSchemaName()) {
-        optionals.set(4);
-      }
-      if (struct.isSetForeignTableName()) {
-        optionals.set(5);
-      }
-      oprot.writeBitSet(optionals, 6);
-      if (struct.isSetParentCatalogName()) {
-        oprot.writeString(struct.parentCatalogName);
-      }
-      if (struct.isSetParentSchemaName()) {
-        oprot.writeString(struct.parentSchemaName);
-      }
-      if (struct.isSetParentTableName()) {
-        oprot.writeString(struct.parentTableName);
-      }
-      if (struct.isSetForeignCatalogName()) {
-        oprot.writeString(struct.foreignCatalogName);
-      }
-      if (struct.isSetForeignSchemaName()) {
-        oprot.writeString(struct.foreignSchemaName);
-      }
-      if (struct.isSetForeignTableName()) {
-        oprot.writeString(struct.foreignTableName);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetCrossReferenceReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-      BitSet incoming = iprot.readBitSet(6);
-      if (incoming.get(0)) {
-        struct.parentCatalogName = iprot.readString();
-        struct.setParentCatalogNameIsSet(true);
-      }
-      if (incoming.get(1)) {
-        struct.parentSchemaName = iprot.readString();
-        struct.setParentSchemaNameIsSet(true);
-      }
-      if (incoming.get(2)) {
-        struct.parentTableName = iprot.readString();
-        struct.setParentTableNameIsSet(true);
-      }
-      if (incoming.get(3)) {
-        struct.foreignCatalogName = iprot.readString();
-        struct.setForeignCatalogNameIsSet(true);
-      }
-      if (incoming.get(4)) {
-        struct.foreignSchemaName = iprot.readString();
-        struct.setForeignSchemaNameIsSet(true);
-      }
-      if (incoming.get(5)) {
-        struct.foreignTableName = iprot.readString();
-        struct.setForeignTableNameIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceResp.java
deleted file mode 100644
index 1bfe6d192df06..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetCrossReferenceResp.java
+++ /dev/null
@@ -1,509 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TGetCrossReferenceResp implements org.apache.thrift.TBase<TGetCrossReferenceResp, TGetCrossReferenceResp._Fields>, java.io.Serializable, Cloneable, Comparable<TGetCrossReferenceResp> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetCrossReferenceResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetCrossReferenceRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetCrossReferenceRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private TOperationHandle operationHandle; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    OPERATION_HANDLE((short)2, "operationHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final _Fields optionals[] = {_Fields.OPERATION_HANDLE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetCrossReferenceResp.class, metaDataMap);
-  }
-
-  public TGetCrossReferenceResp() {
-  }
-
-  public TGetCrossReferenceResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetCrossReferenceResp(TGetCrossReferenceResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-  }
-
-  public TGetCrossReferenceResp deepCopy() {
-    return new TGetCrossReferenceResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.operationHandle = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetCrossReferenceResp)
-      return this.equals((TGetCrossReferenceResp)that);
-    return false;
-  }
-
-  public boolean equals(TGetCrossReferenceResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_status = true && (isSetStatus());
-    list.add(present_status);
-    if (present_status)
-      list.add(status);
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    list.add(present_operationHandle);
-    if (present_operationHandle)
-      list.add(operationHandle);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TGetCrossReferenceResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetCrossReferenceResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (isSetOperationHandle()) {
-      if (!first) sb.append(", ");
-      sb.append("operationHandle:");
-      if (this.operationHandle == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.operationHandle);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetCrossReferenceRespStandardSchemeFactory implements SchemeFactory {
-    public TGetCrossReferenceRespStandardScheme getScheme() {
-      return new TGetCrossReferenceRespStandardScheme();
-    }
-  }
-
-  private static class TGetCrossReferenceRespStandardScheme extends StandardScheme<TGetCrossReferenceResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetCrossReferenceResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetCrossReferenceResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.operationHandle != null) {
-        if (struct.isSetOperationHandle()) {
-          oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-          struct.operationHandle.write(oprot);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetCrossReferenceRespTupleSchemeFactory implements SchemeFactory {
-    public TGetCrossReferenceRespTupleScheme getScheme() {
-      return new TGetCrossReferenceRespTupleScheme();
-    }
-  }
-
-  private static class TGetCrossReferenceRespTupleScheme extends TupleScheme<TGetCrossReferenceResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetCrossReferenceResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetOperationHandle()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetOperationHandle()) {
-        struct.operationHandle.write(oprot);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetCrossReferenceResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.operationHandle = new TOperationHandle();
-        struct.operationHandle.read(iprot);
-        struct.setOperationHandleIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenReq.java
deleted file mode 100644
index e3e28c5860522..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenReq.java
+++ /dev/null
@@ -1,596 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TGetDelegationTokenReq implements org.apache.thrift.TBase<TGetDelegationTokenReq, TGetDelegationTokenReq._Fields>, java.io.Serializable, Cloneable, Comparable<TGetDelegationTokenReq> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetDelegationTokenReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField OWNER_FIELD_DESC = new org.apache.thrift.protocol.TField("owner", org.apache.thrift.protocol.TType.STRING, (short)2);
-  private static final org.apache.thrift.protocol.TField RENEWER_FIELD_DESC = new org.apache.thrift.protocol.TField("renewer", org.apache.thrift.protocol.TType.STRING, (short)3);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetDelegationTokenReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetDelegationTokenReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-  private String owner; // required
-  private String renewer; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle"),
-    OWNER((short)2, "owner"),
-    RENEWER((short)3, "renewer");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        case 2: // OWNER
-          return OWNER;
-        case 3: // RENEWER
-          return RENEWER;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    tmpMap.put(_Fields.OWNER, new org.apache.thrift.meta_data.FieldMetaData("owner", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    tmpMap.put(_Fields.RENEWER, new org.apache.thrift.meta_data.FieldMetaData("renewer", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetDelegationTokenReq.class, metaDataMap);
-  }
-
-  public TGetDelegationTokenReq() {
-  }
-
-  public TGetDelegationTokenReq(
-    TSessionHandle sessionHandle,
-    String owner,
-    String renewer)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-    this.owner = owner;
-    this.renewer = renewer;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetDelegationTokenReq(TGetDelegationTokenReq other) {
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-    if (other.isSetOwner()) {
-      this.owner = other.owner;
-    }
-    if (other.isSetRenewer()) {
-      this.renewer = other.renewer;
-    }
-  }
-
-  public TGetDelegationTokenReq deepCopy() {
-    return new TGetDelegationTokenReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-    this.owner = null;
-    this.renewer = null;
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public String getOwner() {
-    return this.owner;
-  }
-
-  public void setOwner(String owner) {
-    this.owner = owner;
-  }
-
-  public void unsetOwner() {
-    this.owner = null;
-  }
-
-  /** Returns true if field owner is set (has been assigned a value) and false otherwise */
-  public boolean isSetOwner() {
-    return this.owner != null;
-  }
-
-  public void setOwnerIsSet(boolean value) {
-    if (!value) {
-      this.owner = null;
-    }
-  }
-
-  public String getRenewer() {
-    return this.renewer;
-  }
-
-  public void setRenewer(String renewer) {
-    this.renewer = renewer;
-  }
-
-  public void unsetRenewer() {
-    this.renewer = null;
-  }
-
-  /** Returns true if field renewer is set (has been assigned a value) and false otherwise */
-  public boolean isSetRenewer() {
-    return this.renewer != null;
-  }
-
-  public void setRenewerIsSet(boolean value) {
-    if (!value) {
-      this.renewer = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    case OWNER:
-      if (value == null) {
-        unsetOwner();
-      } else {
-        setOwner((String)value);
-      }
-      break;
-
-    case RENEWER:
-      if (value == null) {
-        unsetRenewer();
-      } else {
-        setRenewer((String)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    case OWNER:
-      return getOwner();
-
-    case RENEWER:
-      return getRenewer();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    case OWNER:
-      return isSetOwner();
-    case RENEWER:
-      return isSetRenewer();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetDelegationTokenReq)
-      return this.equals((TGetDelegationTokenReq)that);
-    return false;
-  }
-
-  public boolean equals(TGetDelegationTokenReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    boolean this_present_owner = true && this.isSetOwner();
-    boolean that_present_owner = true && that.isSetOwner();
-    if (this_present_owner || that_present_owner) {
-      if (!(this_present_owner && that_present_owner))
-        return false;
-      if (!this.owner.equals(that.owner))
-        return false;
-    }
-
-    boolean this_present_renewer = true && this.isSetRenewer();
-    boolean that_present_renewer = true && that.isSetRenewer();
-    if (this_present_renewer || that_present_renewer) {
-      if (!(this_present_renewer && that_present_renewer))
-        return false;
-      if (!this.renewer.equals(that.renewer))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    list.add(present_sessionHandle);
-    if (present_sessionHandle)
-      list.add(sessionHandle);
-
-    boolean present_owner = true && (isSetOwner());
-    list.add(present_owner);
-    if (present_owner)
-      list.add(owner);
-
-    boolean present_renewer = true && (isSetRenewer());
-    list.add(present_renewer);
-    if (present_renewer)
-      list.add(renewer);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TGetDelegationTokenReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOwner()).compareTo(other.isSetOwner());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOwner()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.owner, other.owner);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetRenewer()).compareTo(other.isSetRenewer());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetRenewer()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.renewer, other.renewer);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetDelegationTokenReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("owner:");
-    if (this.owner == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.owner);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("renewer:");
-    if (this.renewer == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.renewer);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    if (!isSetOwner()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'owner' is unset! Struct:" + toString());
-    }
-
-    if (!isSetRenewer()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'renewer' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetDelegationTokenReqStandardSchemeFactory implements SchemeFactory {
-    public TGetDelegationTokenReqStandardScheme getScheme() {
-      return new TGetDelegationTokenReqStandardScheme();
-    }
-  }
-
-  private static class TGetDelegationTokenReqStandardScheme extends StandardScheme<TGetDelegationTokenReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetDelegationTokenReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // OWNER
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.owner = iprot.readString();
-              struct.setOwnerIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // RENEWER
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.renewer = iprot.readString();
-              struct.setRenewerIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetDelegationTokenReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.owner != null) {
-        oprot.writeFieldBegin(OWNER_FIELD_DESC);
-        oprot.writeString(struct.owner);
-        oprot.writeFieldEnd();
-      }
-      if (struct.renewer != null) {
-        oprot.writeFieldBegin(RENEWER_FIELD_DESC);
-        oprot.writeString(struct.renewer);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetDelegationTokenReqTupleSchemeFactory implements SchemeFactory {
-    public TGetDelegationTokenReqTupleScheme getScheme() {
-      return new TGetDelegationTokenReqTupleScheme();
-    }
-  }
-
-  private static class TGetDelegationTokenReqTupleScheme extends TupleScheme<TGetDelegationTokenReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetDelegationTokenReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-      oprot.writeString(struct.owner);
-      oprot.writeString(struct.renewer);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetDelegationTokenReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-      struct.owner = iprot.readString();
-      struct.setOwnerIsSet(true);
-      struct.renewer = iprot.readString();
-      struct.setRenewerIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenResp.java
deleted file mode 100644
index 6ef2acbbd9435..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetDelegationTokenResp.java
+++ /dev/null
@@ -1,504 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TGetDelegationTokenResp implements org.apache.thrift.TBase<TGetDelegationTokenResp, TGetDelegationTokenResp._Fields>, java.io.Serializable, Cloneable, Comparable<TGetDelegationTokenResp> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetDelegationTokenResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField DELEGATION_TOKEN_FIELD_DESC = new org.apache.thrift.protocol.TField("delegationToken", org.apache.thrift.protocol.TType.STRING, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetDelegationTokenRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetDelegationTokenRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private String delegationToken; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    DELEGATION_TOKEN((short)2, "delegationToken");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // DELEGATION_TOKEN
-          return DELEGATION_TOKEN;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final _Fields optionals[] = {_Fields.DELEGATION_TOKEN};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.DELEGATION_TOKEN, new org.apache.thrift.meta_data.FieldMetaData("delegationToken", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetDelegationTokenResp.class, metaDataMap);
-  }
-
-  public TGetDelegationTokenResp() {
-  }
-
-  public TGetDelegationTokenResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetDelegationTokenResp(TGetDelegationTokenResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetDelegationToken()) {
-      this.delegationToken = other.delegationToken;
-    }
-  }
-
-  public TGetDelegationTokenResp deepCopy() {
-    return new TGetDelegationTokenResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.delegationToken = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public String getDelegationToken() {
-    return this.delegationToken;
-  }
-
-  public void setDelegationToken(String delegationToken) {
-    this.delegationToken = delegationToken;
-  }
-
-  public void unsetDelegationToken() {
-    this.delegationToken = null;
-  }
-
-  /** Returns true if field delegationToken is set (has been assigned a value) and false otherwise */
-  public boolean isSetDelegationToken() {
-    return this.delegationToken != null;
-  }
-
-  public void setDelegationTokenIsSet(boolean value) {
-    if (!value) {
-      this.delegationToken = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case DELEGATION_TOKEN:
-      if (value == null) {
-        unsetDelegationToken();
-      } else {
-        setDelegationToken((String)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case DELEGATION_TOKEN:
-      return getDelegationToken();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case DELEGATION_TOKEN:
-      return isSetDelegationToken();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetDelegationTokenResp)
-      return this.equals((TGetDelegationTokenResp)that);
-    return false;
-  }
-
-  public boolean equals(TGetDelegationTokenResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_delegationToken = true && this.isSetDelegationToken();
-    boolean that_present_delegationToken = true && that.isSetDelegationToken();
-    if (this_present_delegationToken || that_present_delegationToken) {
-      if (!(this_present_delegationToken && that_present_delegationToken))
-        return false;
-      if (!this.delegationToken.equals(that.delegationToken))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_status = true && (isSetStatus());
-    list.add(present_status);
-    if (present_status)
-      list.add(status);
-
-    boolean present_delegationToken = true && (isSetDelegationToken());
-    list.add(present_delegationToken);
-    if (present_delegationToken)
-      list.add(delegationToken);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TGetDelegationTokenResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetDelegationToken()).compareTo(other.isSetDelegationToken());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetDelegationToken()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.delegationToken, other.delegationToken);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetDelegationTokenResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (isSetDelegationToken()) {
-      if (!first) sb.append(", ");
-      sb.append("delegationToken:");
-      if (this.delegationToken == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.delegationToken);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetDelegationTokenRespStandardSchemeFactory implements SchemeFactory {
-    public TGetDelegationTokenRespStandardScheme getScheme() {
-      return new TGetDelegationTokenRespStandardScheme();
-    }
-  }
-
-  private static class TGetDelegationTokenRespStandardScheme extends StandardScheme<TGetDelegationTokenResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetDelegationTokenResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // DELEGATION_TOKEN
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.delegationToken = iprot.readString();
-              struct.setDelegationTokenIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetDelegationTokenResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.delegationToken != null) {
-        if (struct.isSetDelegationToken()) {
-          oprot.writeFieldBegin(DELEGATION_TOKEN_FIELD_DESC);
-          oprot.writeString(struct.delegationToken);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetDelegationTokenRespTupleSchemeFactory implements SchemeFactory {
-    public TGetDelegationTokenRespTupleScheme getScheme() {
-      return new TGetDelegationTokenRespTupleScheme();
-    }
-  }
-
-  private static class TGetDelegationTokenRespTupleScheme extends TupleScheme<TGetDelegationTokenResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetDelegationTokenResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetDelegationToken()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetDelegationToken()) {
-        oprot.writeString(struct.delegationToken);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetDelegationTokenResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.delegationToken = iprot.readString();
-        struct.setDelegationTokenIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsReq.java
deleted file mode 100644
index ad4f8a5b031e8..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsReq.java
+++ /dev/null
@@ -1,711 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TGetFunctionsReq implements org.apache.thrift.TBase<TGetFunctionsReq, TGetFunctionsReq._Fields>, java.io.Serializable, Cloneable, Comparable<TGetFunctionsReq> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetFunctionsReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField CATALOG_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("catalogName", org.apache.thrift.protocol.TType.STRING, (short)2);
-  private static final org.apache.thrift.protocol.TField SCHEMA_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("schemaName", org.apache.thrift.protocol.TType.STRING, (short)3);
-  private static final org.apache.thrift.protocol.TField FUNCTION_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("functionName", org.apache.thrift.protocol.TType.STRING, (short)4);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetFunctionsReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetFunctionsReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-  private String catalogName; // optional
-  private String schemaName; // optional
-  private String functionName; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle"),
-    CATALOG_NAME((short)2, "catalogName"),
-    SCHEMA_NAME((short)3, "schemaName"),
-    FUNCTION_NAME((short)4, "functionName");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        case 2: // CATALOG_NAME
-          return CATALOG_NAME;
-        case 3: // SCHEMA_NAME
-          return SCHEMA_NAME;
-        case 4: // FUNCTION_NAME
-          return FUNCTION_NAME;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final _Fields optionals[] = {_Fields.CATALOG_NAME,_Fields.SCHEMA_NAME};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    tmpMap.put(_Fields.CATALOG_NAME, new org.apache.thrift.meta_data.FieldMetaData("catalogName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TIdentifier")));
-    tmpMap.put(_Fields.SCHEMA_NAME, new org.apache.thrift.meta_data.FieldMetaData("schemaName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TPatternOrIdentifier")));
-    tmpMap.put(_Fields.FUNCTION_NAME, new org.apache.thrift.meta_data.FieldMetaData("functionName", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TPatternOrIdentifier")));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetFunctionsReq.class, metaDataMap);
-  }
-
-  public TGetFunctionsReq() {
-  }
-
-  public TGetFunctionsReq(
-    TSessionHandle sessionHandle,
-    String functionName)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-    this.functionName = functionName;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetFunctionsReq(TGetFunctionsReq other) {
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-    if (other.isSetCatalogName()) {
-      this.catalogName = other.catalogName;
-    }
-    if (other.isSetSchemaName()) {
-      this.schemaName = other.schemaName;
-    }
-    if (other.isSetFunctionName()) {
-      this.functionName = other.functionName;
-    }
-  }
-
-  public TGetFunctionsReq deepCopy() {
-    return new TGetFunctionsReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-    this.catalogName = null;
-    this.schemaName = null;
-    this.functionName = null;
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public String getCatalogName() {
-    return this.catalogName;
-  }
-
-  public void setCatalogName(String catalogName) {
-    this.catalogName = catalogName;
-  }
-
-  public void unsetCatalogName() {
-    this.catalogName = null;
-  }
-
-  /** Returns true if field catalogName is set (has been assigned a value) and false otherwise */
-  public boolean isSetCatalogName() {
-    return this.catalogName != null;
-  }
-
-  public void setCatalogNameIsSet(boolean value) {
-    if (!value) {
-      this.catalogName = null;
-    }
-  }
-
-  public String getSchemaName() {
-    return this.schemaName;
-  }
-
-  public void setSchemaName(String schemaName) {
-    this.schemaName = schemaName;
-  }
-
-  public void unsetSchemaName() {
-    this.schemaName = null;
-  }
-
-  /** Returns true if field schemaName is set (has been assigned a value) and false otherwise */
-  public boolean isSetSchemaName() {
-    return this.schemaName != null;
-  }
-
-  public void setSchemaNameIsSet(boolean value) {
-    if (!value) {
-      this.schemaName = null;
-    }
-  }
-
-  public String getFunctionName() {
-    return this.functionName;
-  }
-
-  public void setFunctionName(String functionName) {
-    this.functionName = functionName;
-  }
-
-  public void unsetFunctionName() {
-    this.functionName = null;
-  }
-
-  /** Returns true if field functionName is set (has been assigned a value) and false otherwise */
-  public boolean isSetFunctionName() {
-    return this.functionName != null;
-  }
-
-  public void setFunctionNameIsSet(boolean value) {
-    if (!value) {
-      this.functionName = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    case CATALOG_NAME:
-      if (value == null) {
-        unsetCatalogName();
-      } else {
-        setCatalogName((String)value);
-      }
-      break;
-
-    case SCHEMA_NAME:
-      if (value == null) {
-        unsetSchemaName();
-      } else {
-        setSchemaName((String)value);
-      }
-      break;
-
-    case FUNCTION_NAME:
-      if (value == null) {
-        unsetFunctionName();
-      } else {
-        setFunctionName((String)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    case CATALOG_NAME:
-      return getCatalogName();
-
-    case SCHEMA_NAME:
-      return getSchemaName();
-
-    case FUNCTION_NAME:
-      return getFunctionName();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    case CATALOG_NAME:
-      return isSetCatalogName();
-    case SCHEMA_NAME:
-      return isSetSchemaName();
-    case FUNCTION_NAME:
-      return isSetFunctionName();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetFunctionsReq)
-      return this.equals((TGetFunctionsReq)that);
-    return false;
-  }
-
-  public boolean equals(TGetFunctionsReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    boolean this_present_catalogName = true && this.isSetCatalogName();
-    boolean that_present_catalogName = true && that.isSetCatalogName();
-    if (this_present_catalogName || that_present_catalogName) {
-      if (!(this_present_catalogName && that_present_catalogName))
-        return false;
-      if (!this.catalogName.equals(that.catalogName))
-        return false;
-    }
-
-    boolean this_present_schemaName = true && this.isSetSchemaName();
-    boolean that_present_schemaName = true && that.isSetSchemaName();
-    if (this_present_schemaName || that_present_schemaName) {
-      if (!(this_present_schemaName && that_present_schemaName))
-        return false;
-      if (!this.schemaName.equals(that.schemaName))
-        return false;
-    }
-
-    boolean this_present_functionName = true && this.isSetFunctionName();
-    boolean that_present_functionName = true && that.isSetFunctionName();
-    if (this_present_functionName || that_present_functionName) {
-      if (!(this_present_functionName && that_present_functionName))
-        return false;
-      if (!this.functionName.equals(that.functionName))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    list.add(present_sessionHandle);
-    if (present_sessionHandle)
-      list.add(sessionHandle);
-
-    boolean present_catalogName = true && (isSetCatalogName());
-    list.add(present_catalogName);
-    if (present_catalogName)
-      list.add(catalogName);
-
-    boolean present_schemaName = true && (isSetSchemaName());
-    list.add(present_schemaName);
-    if (present_schemaName)
-      list.add(schemaName);
-
-    boolean present_functionName = true && (isSetFunctionName());
-    list.add(present_functionName);
-    if (present_functionName)
-      list.add(functionName);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TGetFunctionsReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetCatalogName()).compareTo(other.isSetCatalogName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetCatalogName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.catalogName, other.catalogName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetSchemaName()).compareTo(other.isSetSchemaName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSchemaName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.schemaName, other.schemaName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetFunctionName()).compareTo(other.isSetFunctionName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetFunctionName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.functionName, other.functionName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetFunctionsReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    if (isSetCatalogName()) {
-      if (!first) sb.append(", ");
-      sb.append("catalogName:");
-      if (this.catalogName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.catalogName);
-      }
-      first = false;
-    }
-    if (isSetSchemaName()) {
-      if (!first) sb.append(", ");
-      sb.append("schemaName:");
-      if (this.schemaName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.schemaName);
-      }
-      first = false;
-    }
-    if (!first) sb.append(", ");
-    sb.append("functionName:");
-    if (this.functionName == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.functionName);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    if (!isSetFunctionName()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'functionName' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetFunctionsReqStandardSchemeFactory implements SchemeFactory {
-    public TGetFunctionsReqStandardScheme getScheme() {
-      return new TGetFunctionsReqStandardScheme();
-    }
-  }
-
-  private static class TGetFunctionsReqStandardScheme extends StandardScheme<TGetFunctionsReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetFunctionsReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // CATALOG_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.catalogName = iprot.readString();
-              struct.setCatalogNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // SCHEMA_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.schemaName = iprot.readString();
-              struct.setSchemaNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 4: // FUNCTION_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.functionName = iprot.readString();
-              struct.setFunctionNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetFunctionsReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.catalogName != null) {
-        if (struct.isSetCatalogName()) {
-          oprot.writeFieldBegin(CATALOG_NAME_FIELD_DESC);
-          oprot.writeString(struct.catalogName);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.schemaName != null) {
-        if (struct.isSetSchemaName()) {
-          oprot.writeFieldBegin(SCHEMA_NAME_FIELD_DESC);
-          oprot.writeString(struct.schemaName);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.functionName != null) {
-        oprot.writeFieldBegin(FUNCTION_NAME_FIELD_DESC);
-        oprot.writeString(struct.functionName);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetFunctionsReqTupleSchemeFactory implements SchemeFactory {
-    public TGetFunctionsReqTupleScheme getScheme() {
-      return new TGetFunctionsReqTupleScheme();
-    }
-  }
-
-  private static class TGetFunctionsReqTupleScheme extends TupleScheme<TGetFunctionsReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetFunctionsReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-      oprot.writeString(struct.functionName);
-      BitSet optionals = new BitSet();
-      if (struct.isSetCatalogName()) {
-        optionals.set(0);
-      }
-      if (struct.isSetSchemaName()) {
-        optionals.set(1);
-      }
-      oprot.writeBitSet(optionals, 2);
-      if (struct.isSetCatalogName()) {
-        oprot.writeString(struct.catalogName);
-      }
-      if (struct.isSetSchemaName()) {
-        oprot.writeString(struct.schemaName);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetFunctionsReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-      struct.functionName = iprot.readString();
-      struct.setFunctionNameIsSet(true);
-      BitSet incoming = iprot.readBitSet(2);
-      if (incoming.get(0)) {
-        struct.catalogName = iprot.readString();
-        struct.setCatalogNameIsSet(true);
-      }
-      if (incoming.get(1)) {
-        struct.schemaName = iprot.readString();
-        struct.setSchemaNameIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsResp.java
deleted file mode 100644
index ead37fb91cc2f..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetFunctionsResp.java
+++ /dev/null
@@ -1,509 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TGetFunctionsResp implements org.apache.thrift.TBase<TGetFunctionsResp, TGetFunctionsResp._Fields>, java.io.Serializable, Cloneable, Comparable<TGetFunctionsResp> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetFunctionsResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetFunctionsRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetFunctionsRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private TOperationHandle operationHandle; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    OPERATION_HANDLE((short)2, "operationHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final _Fields optionals[] = {_Fields.OPERATION_HANDLE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetFunctionsResp.class, metaDataMap);
-  }
-
-  public TGetFunctionsResp() {
-  }
-
-  public TGetFunctionsResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetFunctionsResp(TGetFunctionsResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-  }
-
-  public TGetFunctionsResp deepCopy() {
-    return new TGetFunctionsResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.operationHandle = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetFunctionsResp)
-      return this.equals((TGetFunctionsResp)that);
-    return false;
-  }
-
-  public boolean equals(TGetFunctionsResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_status = true && (isSetStatus());
-    list.add(present_status);
-    if (present_status)
-      list.add(status);
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    list.add(present_operationHandle);
-    if (present_operationHandle)
-      list.add(operationHandle);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TGetFunctionsResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetFunctionsResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (isSetOperationHandle()) {
-      if (!first) sb.append(", ");
-      sb.append("operationHandle:");
-      if (this.operationHandle == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.operationHandle);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetFunctionsRespStandardSchemeFactory implements SchemeFactory {
-    public TGetFunctionsRespStandardScheme getScheme() {
-      return new TGetFunctionsRespStandardScheme();
-    }
-  }
-
-  private static class TGetFunctionsRespStandardScheme extends StandardScheme<TGetFunctionsResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetFunctionsResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetFunctionsResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.operationHandle != null) {
-        if (struct.isSetOperationHandle()) {
-          oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-          struct.operationHandle.write(oprot);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetFunctionsRespTupleSchemeFactory implements SchemeFactory {
-    public TGetFunctionsRespTupleScheme getScheme() {
-      return new TGetFunctionsRespTupleScheme();
-    }
-  }
-
-  private static class TGetFunctionsRespTupleScheme extends TupleScheme<TGetFunctionsResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetFunctionsResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetOperationHandle()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetOperationHandle()) {
-        struct.operationHandle.write(oprot);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetFunctionsResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.operationHandle = new TOperationHandle();
-        struct.operationHandle.read(iprot);
-        struct.setOperationHandleIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoReq.java
deleted file mode 100644
index b319b70e5eba5..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoReq.java
+++ /dev/null
@@ -1,507 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TGetInfoReq implements org.apache.thrift.TBase<TGetInfoReq, TGetInfoReq._Fields>, java.io.Serializable, Cloneable, Comparable<TGetInfoReq> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetInfoReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField INFO_TYPE_FIELD_DESC = new org.apache.thrift.protocol.TField("infoType", org.apache.thrift.protocol.TType.I32, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetInfoReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetInfoReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-  private TGetInfoType infoType; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle"),
-    /**
-     * 
-     * @see TGetInfoType
-     */
-    INFO_TYPE((short)2, "infoType");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        case 2: // INFO_TYPE
-          return INFO_TYPE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    tmpMap.put(_Fields.INFO_TYPE, new org.apache.thrift.meta_data.FieldMetaData("infoType", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TGetInfoType.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetInfoReq.class, metaDataMap);
-  }
-
-  public TGetInfoReq() {
-  }
-
-  public TGetInfoReq(
-    TSessionHandle sessionHandle,
-    TGetInfoType infoType)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-    this.infoType = infoType;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetInfoReq(TGetInfoReq other) {
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-    if (other.isSetInfoType()) {
-      this.infoType = other.infoType;
-    }
-  }
-
-  public TGetInfoReq deepCopy() {
-    return new TGetInfoReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-    this.infoType = null;
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  /**
-   * 
-   * @see TGetInfoType
-   */
-  public TGetInfoType getInfoType() {
-    return this.infoType;
-  }
-
-  /**
-   * 
-   * @see TGetInfoType
-   */
-  public void setInfoType(TGetInfoType infoType) {
-    this.infoType = infoType;
-  }
-
-  public void unsetInfoType() {
-    this.infoType = null;
-  }
-
-  /** Returns true if field infoType is set (has been assigned a value) and false otherwise */
-  public boolean isSetInfoType() {
-    return this.infoType != null;
-  }
-
-  public void setInfoTypeIsSet(boolean value) {
-    if (!value) {
-      this.infoType = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    case INFO_TYPE:
-      if (value == null) {
-        unsetInfoType();
-      } else {
-        setInfoType((TGetInfoType)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    case INFO_TYPE:
-      return getInfoType();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    case INFO_TYPE:
-      return isSetInfoType();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetInfoReq)
-      return this.equals((TGetInfoReq)that);
-    return false;
-  }
-
-  public boolean equals(TGetInfoReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    boolean this_present_infoType = true && this.isSetInfoType();
-    boolean that_present_infoType = true && that.isSetInfoType();
-    if (this_present_infoType || that_present_infoType) {
-      if (!(this_present_infoType && that_present_infoType))
-        return false;
-      if (!this.infoType.equals(that.infoType))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    list.add(present_sessionHandle);
-    if (present_sessionHandle)
-      list.add(sessionHandle);
-
-    boolean present_infoType = true && (isSetInfoType());
-    list.add(present_infoType);
-    if (present_infoType)
-      list.add(infoType.getValue());
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TGetInfoReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetInfoType()).compareTo(other.isSetInfoType());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetInfoType()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.infoType, other.infoType);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetInfoReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("infoType:");
-    if (this.infoType == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.infoType);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    if (!isSetInfoType()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'infoType' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetInfoReqStandardSchemeFactory implements SchemeFactory {
-    public TGetInfoReqStandardScheme getScheme() {
-      return new TGetInfoReqStandardScheme();
-    }
-  }
-
-  private static class TGetInfoReqStandardScheme extends StandardScheme<TGetInfoReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetInfoReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // INFO_TYPE
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.infoType = org.apache.hive.service.rpc.thrift.TGetInfoType.findByValue(iprot.readI32());
-              struct.setInfoTypeIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetInfoReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.infoType != null) {
-        oprot.writeFieldBegin(INFO_TYPE_FIELD_DESC);
-        oprot.writeI32(struct.infoType.getValue());
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetInfoReqTupleSchemeFactory implements SchemeFactory {
-    public TGetInfoReqTupleScheme getScheme() {
-      return new TGetInfoReqTupleScheme();
-    }
-  }
-
-  private static class TGetInfoReqTupleScheme extends TupleScheme<TGetInfoReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetInfoReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-      oprot.writeI32(struct.infoType.getValue());
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetInfoReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-      struct.infoType = org.apache.hive.service.rpc.thrift.TGetInfoType.findByValue(iprot.readI32());
-      struct.setInfoTypeIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoResp.java
deleted file mode 100644
index 9be810b024987..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoResp.java
+++ /dev/null
@@ -1,497 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TGetInfoResp implements org.apache.thrift.TBase<TGetInfoResp, TGetInfoResp._Fields>, java.io.Serializable, Cloneable, Comparable<TGetInfoResp> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetInfoResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField INFO_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("infoValue", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetInfoRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetInfoRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private TGetInfoValue infoValue; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    INFO_VALUE((short)2, "infoValue");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // INFO_VALUE
-          return INFO_VALUE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.INFO_VALUE, new org.apache.thrift.meta_data.FieldMetaData("infoValue", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TGetInfoValue.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetInfoResp.class, metaDataMap);
-  }
-
-  public TGetInfoResp() {
-  }
-
-  public TGetInfoResp(
-    TStatus status,
-    TGetInfoValue infoValue)
-  {
-    this();
-    this.status = status;
-    this.infoValue = infoValue;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetInfoResp(TGetInfoResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetInfoValue()) {
-      this.infoValue = new TGetInfoValue(other.infoValue);
-    }
-  }
-
-  public TGetInfoResp deepCopy() {
-    return new TGetInfoResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.infoValue = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public TGetInfoValue getInfoValue() {
-    return this.infoValue;
-  }
-
-  public void setInfoValue(TGetInfoValue infoValue) {
-    this.infoValue = infoValue;
-  }
-
-  public void unsetInfoValue() {
-    this.infoValue = null;
-  }
-
-  /** Returns true if field infoValue is set (has been assigned a value) and false otherwise */
-  public boolean isSetInfoValue() {
-    return this.infoValue != null;
-  }
-
-  public void setInfoValueIsSet(boolean value) {
-    if (!value) {
-      this.infoValue = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case INFO_VALUE:
-      if (value == null) {
-        unsetInfoValue();
-      } else {
-        setInfoValue((TGetInfoValue)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case INFO_VALUE:
-      return getInfoValue();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case INFO_VALUE:
-      return isSetInfoValue();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetInfoResp)
-      return this.equals((TGetInfoResp)that);
-    return false;
-  }
-
-  public boolean equals(TGetInfoResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_infoValue = true && this.isSetInfoValue();
-    boolean that_present_infoValue = true && that.isSetInfoValue();
-    if (this_present_infoValue || that_present_infoValue) {
-      if (!(this_present_infoValue && that_present_infoValue))
-        return false;
-      if (!this.infoValue.equals(that.infoValue))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_status = true && (isSetStatus());
-    list.add(present_status);
-    if (present_status)
-      list.add(status);
-
-    boolean present_infoValue = true && (isSetInfoValue());
-    list.add(present_infoValue);
-    if (present_infoValue)
-      list.add(infoValue);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TGetInfoResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetInfoValue()).compareTo(other.isSetInfoValue());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetInfoValue()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.infoValue, other.infoValue);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetInfoResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("infoValue:");
-    if (this.infoValue == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.infoValue);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    if (!isSetInfoValue()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'infoValue' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetInfoRespStandardSchemeFactory implements SchemeFactory {
-    public TGetInfoRespStandardScheme getScheme() {
-      return new TGetInfoRespStandardScheme();
-    }
-  }
-
-  private static class TGetInfoRespStandardScheme extends StandardScheme<TGetInfoResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetInfoResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // INFO_VALUE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.infoValue = new TGetInfoValue();
-              struct.infoValue.read(iprot);
-              struct.setInfoValueIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetInfoResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.infoValue != null) {
-        oprot.writeFieldBegin(INFO_VALUE_FIELD_DESC);
-        struct.infoValue.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetInfoRespTupleSchemeFactory implements SchemeFactory {
-    public TGetInfoRespTupleScheme getScheme() {
-      return new TGetInfoRespTupleScheme();
-    }
-  }
-
-  private static class TGetInfoRespTupleScheme extends TupleScheme<TGetInfoResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetInfoResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      struct.infoValue.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetInfoResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      struct.infoValue = new TGetInfoValue();
-      struct.infoValue.read(iprot);
-      struct.setInfoValueIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoType.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoType.java
deleted file mode 100644
index 5b219b62656d7..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoType.java
+++ /dev/null
@@ -1,180 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-
-import java.util.Map;
-import java.util.HashMap;
-import org.apache.thrift.TEnum;
-
-public enum TGetInfoType implements org.apache.thrift.TEnum {
-  CLI_MAX_DRIVER_CONNECTIONS(0),
-  CLI_MAX_CONCURRENT_ACTIVITIES(1),
-  CLI_DATA_SOURCE_NAME(2),
-  CLI_FETCH_DIRECTION(8),
-  CLI_SERVER_NAME(13),
-  CLI_SEARCH_PATTERN_ESCAPE(14),
-  CLI_DBMS_NAME(17),
-  CLI_DBMS_VER(18),
-  CLI_ACCESSIBLE_TABLES(19),
-  CLI_ACCESSIBLE_PROCEDURES(20),
-  CLI_CURSOR_COMMIT_BEHAVIOR(23),
-  CLI_DATA_SOURCE_READ_ONLY(25),
-  CLI_DEFAULT_TXN_ISOLATION(26),
-  CLI_IDENTIFIER_CASE(28),
-  CLI_IDENTIFIER_QUOTE_CHAR(29),
-  CLI_MAX_COLUMN_NAME_LEN(30),
-  CLI_MAX_CURSOR_NAME_LEN(31),
-  CLI_MAX_SCHEMA_NAME_LEN(32),
-  CLI_MAX_CATALOG_NAME_LEN(34),
-  CLI_MAX_TABLE_NAME_LEN(35),
-  CLI_SCROLL_CONCURRENCY(43),
-  CLI_TXN_CAPABLE(46),
-  CLI_USER_NAME(47),
-  CLI_TXN_ISOLATION_OPTION(72),
-  CLI_INTEGRITY(73),
-  CLI_GETDATA_EXTENSIONS(81),
-  CLI_NULL_COLLATION(85),
-  CLI_ALTER_TABLE(86),
-  CLI_ORDER_BY_COLUMNS_IN_SELECT(90),
-  CLI_SPECIAL_CHARACTERS(94),
-  CLI_MAX_COLUMNS_IN_GROUP_BY(97),
-  CLI_MAX_COLUMNS_IN_INDEX(98),
-  CLI_MAX_COLUMNS_IN_ORDER_BY(99),
-  CLI_MAX_COLUMNS_IN_SELECT(100),
-  CLI_MAX_COLUMNS_IN_TABLE(101),
-  CLI_MAX_INDEX_SIZE(102),
-  CLI_MAX_ROW_SIZE(104),
-  CLI_MAX_STATEMENT_LEN(105),
-  CLI_MAX_TABLES_IN_SELECT(106),
-  CLI_MAX_USER_NAME_LEN(107),
-  CLI_OJ_CAPABILITIES(115),
-  CLI_XOPEN_CLI_YEAR(10000),
-  CLI_CURSOR_SENSITIVITY(10001),
-  CLI_DESCRIBE_PARAMETER(10002),
-  CLI_CATALOG_NAME(10003),
-  CLI_COLLATION_SEQ(10004),
-  CLI_MAX_IDENTIFIER_LEN(10005);
-
-  private final int value;
-
-  private TGetInfoType(int value) {
-    this.value = value;
-  }
-
-  /**
-   * Get the integer value of this enum value, as defined in the Thrift IDL.
-   */
-  public int getValue() {
-    return value;
-  }
-
-  /**
-   * Find a the enum type by its integer value, as defined in the Thrift IDL.
-   * @return null if the value is not found.
-   */
-  public static TGetInfoType findByValue(int value) { 
-    switch (value) {
-      case 0:
-        return CLI_MAX_DRIVER_CONNECTIONS;
-      case 1:
-        return CLI_MAX_CONCURRENT_ACTIVITIES;
-      case 2:
-        return CLI_DATA_SOURCE_NAME;
-      case 8:
-        return CLI_FETCH_DIRECTION;
-      case 13:
-        return CLI_SERVER_NAME;
-      case 14:
-        return CLI_SEARCH_PATTERN_ESCAPE;
-      case 17:
-        return CLI_DBMS_NAME;
-      case 18:
-        return CLI_DBMS_VER;
-      case 19:
-        return CLI_ACCESSIBLE_TABLES;
-      case 20:
-        return CLI_ACCESSIBLE_PROCEDURES;
-      case 23:
-        return CLI_CURSOR_COMMIT_BEHAVIOR;
-      case 25:
-        return CLI_DATA_SOURCE_READ_ONLY;
-      case 26:
-        return CLI_DEFAULT_TXN_ISOLATION;
-      case 28:
-        return CLI_IDENTIFIER_CASE;
-      case 29:
-        return CLI_IDENTIFIER_QUOTE_CHAR;
-      case 30:
-        return CLI_MAX_COLUMN_NAME_LEN;
-      case 31:
-        return CLI_MAX_CURSOR_NAME_LEN;
-      case 32:
-        return CLI_MAX_SCHEMA_NAME_LEN;
-      case 34:
-        return CLI_MAX_CATALOG_NAME_LEN;
-      case 35:
-        return CLI_MAX_TABLE_NAME_LEN;
-      case 43:
-        return CLI_SCROLL_CONCURRENCY;
-      case 46:
-        return CLI_TXN_CAPABLE;
-      case 47:
-        return CLI_USER_NAME;
-      case 72:
-        return CLI_TXN_ISOLATION_OPTION;
-      case 73:
-        return CLI_INTEGRITY;
-      case 81:
-        return CLI_GETDATA_EXTENSIONS;
-      case 85:
-        return CLI_NULL_COLLATION;
-      case 86:
-        return CLI_ALTER_TABLE;
-      case 90:
-        return CLI_ORDER_BY_COLUMNS_IN_SELECT;
-      case 94:
-        return CLI_SPECIAL_CHARACTERS;
-      case 97:
-        return CLI_MAX_COLUMNS_IN_GROUP_BY;
-      case 98:
-        return CLI_MAX_COLUMNS_IN_INDEX;
-      case 99:
-        return CLI_MAX_COLUMNS_IN_ORDER_BY;
-      case 100:
-        return CLI_MAX_COLUMNS_IN_SELECT;
-      case 101:
-        return CLI_MAX_COLUMNS_IN_TABLE;
-      case 102:
-        return CLI_MAX_INDEX_SIZE;
-      case 104:
-        return CLI_MAX_ROW_SIZE;
-      case 105:
-        return CLI_MAX_STATEMENT_LEN;
-      case 106:
-        return CLI_MAX_TABLES_IN_SELECT;
-      case 107:
-        return CLI_MAX_USER_NAME_LEN;
-      case 115:
-        return CLI_OJ_CAPABILITIES;
-      case 10000:
-        return CLI_XOPEN_CLI_YEAR;
-      case 10001:
-        return CLI_CURSOR_SENSITIVITY;
-      case 10002:
-        return CLI_DESCRIBE_PARAMETER;
-      case 10003:
-        return CLI_CATALOG_NAME;
-      case 10004:
-        return CLI_COLLATION_SEQ;
-      case 10005:
-        return CLI_MAX_IDENTIFIER_LEN;
-      default:
-        return null;
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoValue.java
deleted file mode 100644
index 8e3045a58e5ac..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetInfoValue.java
+++ /dev/null
@@ -1,597 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-public class TGetInfoValue extends org.apache.thrift.TUnion<TGetInfoValue, TGetInfoValue._Fields> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetInfoValue");
-  private static final org.apache.thrift.protocol.TField STRING_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("stringValue", org.apache.thrift.protocol.TType.STRING, (short)1);
-  private static final org.apache.thrift.protocol.TField SMALL_INT_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("smallIntValue", org.apache.thrift.protocol.TType.I16, (short)2);
-  private static final org.apache.thrift.protocol.TField INTEGER_BITMASK_FIELD_DESC = new org.apache.thrift.protocol.TField("integerBitmask", org.apache.thrift.protocol.TType.I32, (short)3);
-  private static final org.apache.thrift.protocol.TField INTEGER_FLAG_FIELD_DESC = new org.apache.thrift.protocol.TField("integerFlag", org.apache.thrift.protocol.TType.I32, (short)4);
-  private static final org.apache.thrift.protocol.TField BINARY_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("binaryValue", org.apache.thrift.protocol.TType.I32, (short)5);
-  private static final org.apache.thrift.protocol.TField LEN_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("lenValue", org.apache.thrift.protocol.TType.I64, (short)6);
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STRING_VALUE((short)1, "stringValue"),
-    SMALL_INT_VALUE((short)2, "smallIntValue"),
-    INTEGER_BITMASK((short)3, "integerBitmask"),
-    INTEGER_FLAG((short)4, "integerFlag"),
-    BINARY_VALUE((short)5, "binaryValue"),
-    LEN_VALUE((short)6, "lenValue");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STRING_VALUE
-          return STRING_VALUE;
-        case 2: // SMALL_INT_VALUE
-          return SMALL_INT_VALUE;
-        case 3: // INTEGER_BITMASK
-          return INTEGER_BITMASK;
-        case 4: // INTEGER_FLAG
-          return INTEGER_FLAG;
-        case 5: // BINARY_VALUE
-          return BINARY_VALUE;
-        case 6: // LEN_VALUE
-          return LEN_VALUE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STRING_VALUE, new org.apache.thrift.meta_data.FieldMetaData("stringValue", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    tmpMap.put(_Fields.SMALL_INT_VALUE, new org.apache.thrift.meta_data.FieldMetaData("smallIntValue", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I16)));
-    tmpMap.put(_Fields.INTEGER_BITMASK, new org.apache.thrift.meta_data.FieldMetaData("integerBitmask", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)));
-    tmpMap.put(_Fields.INTEGER_FLAG, new org.apache.thrift.meta_data.FieldMetaData("integerFlag", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)));
-    tmpMap.put(_Fields.BINARY_VALUE, new org.apache.thrift.meta_data.FieldMetaData("binaryValue", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)));
-    tmpMap.put(_Fields.LEN_VALUE, new org.apache.thrift.meta_data.FieldMetaData("lenValue", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetInfoValue.class, metaDataMap);
-  }
-
-  public TGetInfoValue() {
-    super();
-  }
-
-  public TGetInfoValue(TGetInfoValue._Fields setField, Object value) {
-    super(setField, value);
-  }
-
-  public TGetInfoValue(TGetInfoValue other) {
-    super(other);
-  }
-  public TGetInfoValue deepCopy() {
-    return new TGetInfoValue(this);
-  }
-
-  public static TGetInfoValue stringValue(String value) {
-    TGetInfoValue x = new TGetInfoValue();
-    x.setStringValue(value);
-    return x;
-  }
-
-  public static TGetInfoValue smallIntValue(short value) {
-    TGetInfoValue x = new TGetInfoValue();
-    x.setSmallIntValue(value);
-    return x;
-  }
-
-  public static TGetInfoValue integerBitmask(int value) {
-    TGetInfoValue x = new TGetInfoValue();
-    x.setIntegerBitmask(value);
-    return x;
-  }
-
-  public static TGetInfoValue integerFlag(int value) {
-    TGetInfoValue x = new TGetInfoValue();
-    x.setIntegerFlag(value);
-    return x;
-  }
-
-  public static TGetInfoValue binaryValue(int value) {
-    TGetInfoValue x = new TGetInfoValue();
-    x.setBinaryValue(value);
-    return x;
-  }
-
-  public static TGetInfoValue lenValue(long value) {
-    TGetInfoValue x = new TGetInfoValue();
-    x.setLenValue(value);
-    return x;
-  }
-
-
-  @Override
-  protected void checkType(_Fields setField, Object value) throws ClassCastException {
-    switch (setField) {
-      case STRING_VALUE:
-        if (value instanceof String) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type String for field 'stringValue', but got " + value.getClass().getSimpleName());
-      case SMALL_INT_VALUE:
-        if (value instanceof Short) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type Short for field 'smallIntValue', but got " + value.getClass().getSimpleName());
-      case INTEGER_BITMASK:
-        if (value instanceof Integer) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type Integer for field 'integerBitmask', but got " + value.getClass().getSimpleName());
-      case INTEGER_FLAG:
-        if (value instanceof Integer) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type Integer for field 'integerFlag', but got " + value.getClass().getSimpleName());
-      case BINARY_VALUE:
-        if (value instanceof Integer) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type Integer for field 'binaryValue', but got " + value.getClass().getSimpleName());
-      case LEN_VALUE:
-        if (value instanceof Long) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type Long for field 'lenValue', but got " + value.getClass().getSimpleName());
-      default:
-        throw new IllegalArgumentException("Unknown field id " + setField);
-    }
-  }
-
-  @Override
-  protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TField field) throws org.apache.thrift.TException {
-    _Fields setField = _Fields.findByThriftId(field.id);
-    if (setField != null) {
-      switch (setField) {
-        case STRING_VALUE:
-          if (field.type == STRING_VALUE_FIELD_DESC.type) {
-            String stringValue;
-            stringValue = iprot.readString();
-            return stringValue;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case SMALL_INT_VALUE:
-          if (field.type == SMALL_INT_VALUE_FIELD_DESC.type) {
-            Short smallIntValue;
-            smallIntValue = iprot.readI16();
-            return smallIntValue;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case INTEGER_BITMASK:
-          if (field.type == INTEGER_BITMASK_FIELD_DESC.type) {
-            Integer integerBitmask;
-            integerBitmask = iprot.readI32();
-            return integerBitmask;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case INTEGER_FLAG:
-          if (field.type == INTEGER_FLAG_FIELD_DESC.type) {
-            Integer integerFlag;
-            integerFlag = iprot.readI32();
-            return integerFlag;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case BINARY_VALUE:
-          if (field.type == BINARY_VALUE_FIELD_DESC.type) {
-            Integer binaryValue;
-            binaryValue = iprot.readI32();
-            return binaryValue;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case LEN_VALUE:
-          if (field.type == LEN_VALUE_FIELD_DESC.type) {
-            Long lenValue;
-            lenValue = iprot.readI64();
-            return lenValue;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        default:
-          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
-      }
-    } else {
-      org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-      return null;
-    }
-  }
-
-  @Override
-  protected void standardSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    switch (setField_) {
-      case STRING_VALUE:
-        String stringValue = (String)value_;
-        oprot.writeString(stringValue);
-        return;
-      case SMALL_INT_VALUE:
-        Short smallIntValue = (Short)value_;
-        oprot.writeI16(smallIntValue);
-        return;
-      case INTEGER_BITMASK:
-        Integer integerBitmask = (Integer)value_;
-        oprot.writeI32(integerBitmask);
-        return;
-      case INTEGER_FLAG:
-        Integer integerFlag = (Integer)value_;
-        oprot.writeI32(integerFlag);
-        return;
-      case BINARY_VALUE:
-        Integer binaryValue = (Integer)value_;
-        oprot.writeI32(binaryValue);
-        return;
-      case LEN_VALUE:
-        Long lenValue = (Long)value_;
-        oprot.writeI64(lenValue);
-        return;
-      default:
-        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
-    }
-  }
-
-  @Override
-  protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, short fieldID) throws org.apache.thrift.TException {
-    _Fields setField = _Fields.findByThriftId(fieldID);
-    if (setField != null) {
-      switch (setField) {
-        case STRING_VALUE:
-          String stringValue;
-          stringValue = iprot.readString();
-          return stringValue;
-        case SMALL_INT_VALUE:
-          Short smallIntValue;
-          smallIntValue = iprot.readI16();
-          return smallIntValue;
-        case INTEGER_BITMASK:
-          Integer integerBitmask;
-          integerBitmask = iprot.readI32();
-          return integerBitmask;
-        case INTEGER_FLAG:
-          Integer integerFlag;
-          integerFlag = iprot.readI32();
-          return integerFlag;
-        case BINARY_VALUE:
-          Integer binaryValue;
-          binaryValue = iprot.readI32();
-          return binaryValue;
-        case LEN_VALUE:
-          Long lenValue;
-          lenValue = iprot.readI64();
-          return lenValue;
-        default:
-          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
-      }
-    } else {
-      throw new TProtocolException("Couldn't find a field with field id " + fieldID);
-    }
-  }
-
-  @Override
-  protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    switch (setField_) {
-      case STRING_VALUE:
-        String stringValue = (String)value_;
-        oprot.writeString(stringValue);
-        return;
-      case SMALL_INT_VALUE:
-        Short smallIntValue = (Short)value_;
-        oprot.writeI16(smallIntValue);
-        return;
-      case INTEGER_BITMASK:
-        Integer integerBitmask = (Integer)value_;
-        oprot.writeI32(integerBitmask);
-        return;
-      case INTEGER_FLAG:
-        Integer integerFlag = (Integer)value_;
-        oprot.writeI32(integerFlag);
-        return;
-      case BINARY_VALUE:
-        Integer binaryValue = (Integer)value_;
-        oprot.writeI32(binaryValue);
-        return;
-      case LEN_VALUE:
-        Long lenValue = (Long)value_;
-        oprot.writeI64(lenValue);
-        return;
-      default:
-        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
-    }
-  }
-
-  @Override
-  protected org.apache.thrift.protocol.TField getFieldDesc(_Fields setField) {
-    switch (setField) {
-      case STRING_VALUE:
-        return STRING_VALUE_FIELD_DESC;
-      case SMALL_INT_VALUE:
-        return SMALL_INT_VALUE_FIELD_DESC;
-      case INTEGER_BITMASK:
-        return INTEGER_BITMASK_FIELD_DESC;
-      case INTEGER_FLAG:
-        return INTEGER_FLAG_FIELD_DESC;
-      case BINARY_VALUE:
-        return BINARY_VALUE_FIELD_DESC;
-      case LEN_VALUE:
-        return LEN_VALUE_FIELD_DESC;
-      default:
-        throw new IllegalArgumentException("Unknown field id " + setField);
-    }
-  }
-
-  @Override
-  protected org.apache.thrift.protocol.TStruct getStructDesc() {
-    return STRUCT_DESC;
-  }
-
-  @Override
-  protected _Fields enumForId(short id) {
-    return _Fields.findByThriftIdOrThrow(id);
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-
-  public String getStringValue() {
-    if (getSetField() == _Fields.STRING_VALUE) {
-      return (String)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'stringValue' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setStringValue(String value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.STRING_VALUE;
-    value_ = value;
-  }
-
-  public short getSmallIntValue() {
-    if (getSetField() == _Fields.SMALL_INT_VALUE) {
-      return (Short)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'smallIntValue' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setSmallIntValue(short value) {
-    setField_ = _Fields.SMALL_INT_VALUE;
-    value_ = value;
-  }
-
-  public int getIntegerBitmask() {
-    if (getSetField() == _Fields.INTEGER_BITMASK) {
-      return (Integer)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'integerBitmask' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setIntegerBitmask(int value) {
-    setField_ = _Fields.INTEGER_BITMASK;
-    value_ = value;
-  }
-
-  public int getIntegerFlag() {
-    if (getSetField() == _Fields.INTEGER_FLAG) {
-      return (Integer)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'integerFlag' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setIntegerFlag(int value) {
-    setField_ = _Fields.INTEGER_FLAG;
-    value_ = value;
-  }
-
-  public int getBinaryValue() {
-    if (getSetField() == _Fields.BINARY_VALUE) {
-      return (Integer)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'binaryValue' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setBinaryValue(int value) {
-    setField_ = _Fields.BINARY_VALUE;
-    value_ = value;
-  }
-
-  public long getLenValue() {
-    if (getSetField() == _Fields.LEN_VALUE) {
-      return (Long)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'lenValue' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setLenValue(long value) {
-    setField_ = _Fields.LEN_VALUE;
-    value_ = value;
-  }
-
-  public boolean isSetStringValue() {
-    return setField_ == _Fields.STRING_VALUE;
-  }
-
-
-  public boolean isSetSmallIntValue() {
-    return setField_ == _Fields.SMALL_INT_VALUE;
-  }
-
-
-  public boolean isSetIntegerBitmask() {
-    return setField_ == _Fields.INTEGER_BITMASK;
-  }
-
-
-  public boolean isSetIntegerFlag() {
-    return setField_ == _Fields.INTEGER_FLAG;
-  }
-
-
-  public boolean isSetBinaryValue() {
-    return setField_ == _Fields.BINARY_VALUE;
-  }
-
-
-  public boolean isSetLenValue() {
-    return setField_ == _Fields.LEN_VALUE;
-  }
-
-
-  public boolean equals(Object other) {
-    if (other instanceof TGetInfoValue) {
-      return equals((TGetInfoValue)other);
-    } else {
-      return false;
-    }
-  }
-
-  public boolean equals(TGetInfoValue other) {
-    return other != null && getSetField() == other.getSetField() && getFieldValue().equals(other.getFieldValue());
-  }
-
-  @Override
-  public int compareTo(TGetInfoValue other) {
-    int lastComparison = org.apache.thrift.TBaseHelper.compareTo(getSetField(), other.getSetField());
-    if (lastComparison == 0) {
-      return org.apache.thrift.TBaseHelper.compareTo(getFieldValue(), other.getFieldValue());
-    }
-    return lastComparison;
-  }
-
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-    list.add(this.getClass().getName());
-    org.apache.thrift.TFieldIdEnum setField = getSetField();
-    if (setField != null) {
-      list.add(setField.getThriftFieldId());
-      Object value = getFieldValue();
-      if (value instanceof org.apache.thrift.TEnum) {
-        list.add(((org.apache.thrift.TEnum)getFieldValue()).getValue());
-      } else {
-        list.add(value);
-      }
-    }
-    return list.hashCode();
-  }
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-
-}
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusReq.java
deleted file mode 100644
index af31ce2b22819..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusReq.java
+++ /dev/null
@@ -1,501 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TGetOperationStatusReq implements org.apache.thrift.TBase<TGetOperationStatusReq, TGetOperationStatusReq._Fields>, java.io.Serializable, Cloneable, Comparable<TGetOperationStatusReq> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetOperationStatusReq");
-
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField GET_PROGRESS_UPDATE_FIELD_DESC = new org.apache.thrift.protocol.TField("getProgressUpdate", org.apache.thrift.protocol.TType.BOOL, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetOperationStatusReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetOperationStatusReqTupleSchemeFactory());
-  }
-
-  private TOperationHandle operationHandle; // required
-  private boolean getProgressUpdate; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    OPERATION_HANDLE((short)1, "operationHandle"),
-    GET_PROGRESS_UPDATE((short)2, "getProgressUpdate");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        case 2: // GET_PROGRESS_UPDATE
-          return GET_PROGRESS_UPDATE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __GETPROGRESSUPDATE_ISSET_ID = 0;
-  private byte __isset_bitfield = 0;
-  private static final _Fields optionals[] = {_Fields.GET_PROGRESS_UPDATE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    tmpMap.put(_Fields.GET_PROGRESS_UPDATE, new org.apache.thrift.meta_data.FieldMetaData("getProgressUpdate", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetOperationStatusReq.class, metaDataMap);
-  }
-
-  public TGetOperationStatusReq() {
-  }
-
-  public TGetOperationStatusReq(
-    TOperationHandle operationHandle)
-  {
-    this();
-    this.operationHandle = operationHandle;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetOperationStatusReq(TGetOperationStatusReq other) {
-    __isset_bitfield = other.__isset_bitfield;
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-    this.getProgressUpdate = other.getProgressUpdate;
-  }
-
-  public TGetOperationStatusReq deepCopy() {
-    return new TGetOperationStatusReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.operationHandle = null;
-    setGetProgressUpdateIsSet(false);
-    this.getProgressUpdate = false;
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  public boolean isGetProgressUpdate() {
-    return this.getProgressUpdate;
-  }
-
-  public void setGetProgressUpdate(boolean getProgressUpdate) {
-    this.getProgressUpdate = getProgressUpdate;
-    setGetProgressUpdateIsSet(true);
-  }
-
-  public void unsetGetProgressUpdate() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __GETPROGRESSUPDATE_ISSET_ID);
-  }
-
-  /** Returns true if field getProgressUpdate is set (has been assigned a value) and false otherwise */
-  public boolean isSetGetProgressUpdate() {
-    return EncodingUtils.testBit(__isset_bitfield, __GETPROGRESSUPDATE_ISSET_ID);
-  }
-
-  public void setGetProgressUpdateIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __GETPROGRESSUPDATE_ISSET_ID, value);
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    case GET_PROGRESS_UPDATE:
-      if (value == null) {
-        unsetGetProgressUpdate();
-      } else {
-        setGetProgressUpdate((Boolean)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    case GET_PROGRESS_UPDATE:
-      return isGetProgressUpdate();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    case GET_PROGRESS_UPDATE:
-      return isSetGetProgressUpdate();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetOperationStatusReq)
-      return this.equals((TGetOperationStatusReq)that);
-    return false;
-  }
-
-  public boolean equals(TGetOperationStatusReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    boolean this_present_getProgressUpdate = true && this.isSetGetProgressUpdate();
-    boolean that_present_getProgressUpdate = true && that.isSetGetProgressUpdate();
-    if (this_present_getProgressUpdate || that_present_getProgressUpdate) {
-      if (!(this_present_getProgressUpdate && that_present_getProgressUpdate))
-        return false;
-      if (this.getProgressUpdate != that.getProgressUpdate)
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    list.add(present_operationHandle);
-    if (present_operationHandle)
-      list.add(operationHandle);
-
-    boolean present_getProgressUpdate = true && (isSetGetProgressUpdate());
-    list.add(present_getProgressUpdate);
-    if (present_getProgressUpdate)
-      list.add(getProgressUpdate);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TGetOperationStatusReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetGetProgressUpdate()).compareTo(other.isSetGetProgressUpdate());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetGetProgressUpdate()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.getProgressUpdate, other.getProgressUpdate);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetOperationStatusReq(");
-    boolean first = true;
-
-    sb.append("operationHandle:");
-    if (this.operationHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.operationHandle);
-    }
-    first = false;
-    if (isSetGetProgressUpdate()) {
-      if (!first) sb.append(", ");
-      sb.append("getProgressUpdate:");
-      sb.append(this.getProgressUpdate);
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetOperationHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationHandle' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetOperationStatusReqStandardSchemeFactory implements SchemeFactory {
-    public TGetOperationStatusReqStandardScheme getScheme() {
-      return new TGetOperationStatusReqStandardScheme();
-    }
-  }
-
-  private static class TGetOperationStatusReqStandardScheme extends StandardScheme<TGetOperationStatusReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetOperationStatusReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // GET_PROGRESS_UPDATE
-            if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) {
-              struct.getProgressUpdate = iprot.readBool();
-              struct.setGetProgressUpdateIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetOperationStatusReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.operationHandle != null) {
-        oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-        struct.operationHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.isSetGetProgressUpdate()) {
-        oprot.writeFieldBegin(GET_PROGRESS_UPDATE_FIELD_DESC);
-        oprot.writeBool(struct.getProgressUpdate);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetOperationStatusReqTupleSchemeFactory implements SchemeFactory {
-    public TGetOperationStatusReqTupleScheme getScheme() {
-      return new TGetOperationStatusReqTupleScheme();
-    }
-  }
-
-  private static class TGetOperationStatusReqTupleScheme extends TupleScheme<TGetOperationStatusReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetOperationStatusReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.operationHandle.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetGetProgressUpdate()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetGetProgressUpdate()) {
-        oprot.writeBool(struct.getProgressUpdate);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetOperationStatusReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.operationHandle = new TOperationHandle();
-      struct.operationHandle.read(iprot);
-      struct.setOperationHandleIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.getProgressUpdate = iprot.readBool();
-        struct.setGetProgressUpdateIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusResp.java
deleted file mode 100644
index dbfbb44aa6986..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetOperationStatusResp.java
+++ /dev/null
@@ -1,1342 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TGetOperationStatusResp implements org.apache.thrift.TBase<TGetOperationStatusResp, TGetOperationStatusResp._Fields>, java.io.Serializable, Cloneable, Comparable<TGetOperationStatusResp> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetOperationStatusResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField OPERATION_STATE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationState", org.apache.thrift.protocol.TType.I32, (short)2);
-  private static final org.apache.thrift.protocol.TField SQL_STATE_FIELD_DESC = new org.apache.thrift.protocol.TField("sqlState", org.apache.thrift.protocol.TType.STRING, (short)3);
-  private static final org.apache.thrift.protocol.TField ERROR_CODE_FIELD_DESC = new org.apache.thrift.protocol.TField("errorCode", org.apache.thrift.protocol.TType.I32, (short)4);
-  private static final org.apache.thrift.protocol.TField ERROR_MESSAGE_FIELD_DESC = new org.apache.thrift.protocol.TField("errorMessage", org.apache.thrift.protocol.TType.STRING, (short)5);
-  private static final org.apache.thrift.protocol.TField TASK_STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("taskStatus", org.apache.thrift.protocol.TType.STRING, (short)6);
-  private static final org.apache.thrift.protocol.TField OPERATION_STARTED_FIELD_DESC = new org.apache.thrift.protocol.TField("operationStarted", org.apache.thrift.protocol.TType.I64, (short)7);
-  private static final org.apache.thrift.protocol.TField OPERATION_COMPLETED_FIELD_DESC = new org.apache.thrift.protocol.TField("operationCompleted", org.apache.thrift.protocol.TType.I64, (short)8);
-  private static final org.apache.thrift.protocol.TField HAS_RESULT_SET_FIELD_DESC = new org.apache.thrift.protocol.TField("hasResultSet", org.apache.thrift.protocol.TType.BOOL, (short)9);
-  private static final org.apache.thrift.protocol.TField PROGRESS_UPDATE_RESPONSE_FIELD_DESC = new org.apache.thrift.protocol.TField("progressUpdateResponse", org.apache.thrift.protocol.TType.STRUCT, (short)10);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetOperationStatusRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetOperationStatusRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private TOperationState operationState; // optional
-  private String sqlState; // optional
-  private int errorCode; // optional
-  private String errorMessage; // optional
-  private String taskStatus; // optional
-  private long operationStarted; // optional
-  private long operationCompleted; // optional
-  private boolean hasResultSet; // optional
-  private TProgressUpdateResp progressUpdateResponse; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    /**
-     * 
-     * @see TOperationState
-     */
-    OPERATION_STATE((short)2, "operationState"),
-    SQL_STATE((short)3, "sqlState"),
-    ERROR_CODE((short)4, "errorCode"),
-    ERROR_MESSAGE((short)5, "errorMessage"),
-    TASK_STATUS((short)6, "taskStatus"),
-    OPERATION_STARTED((short)7, "operationStarted"),
-    OPERATION_COMPLETED((short)8, "operationCompleted"),
-    HAS_RESULT_SET((short)9, "hasResultSet"),
-    PROGRESS_UPDATE_RESPONSE((short)10, "progressUpdateResponse");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // OPERATION_STATE
-          return OPERATION_STATE;
-        case 3: // SQL_STATE
-          return SQL_STATE;
-        case 4: // ERROR_CODE
-          return ERROR_CODE;
-        case 5: // ERROR_MESSAGE
-          return ERROR_MESSAGE;
-        case 6: // TASK_STATUS
-          return TASK_STATUS;
-        case 7: // OPERATION_STARTED
-          return OPERATION_STARTED;
-        case 8: // OPERATION_COMPLETED
-          return OPERATION_COMPLETED;
-        case 9: // HAS_RESULT_SET
-          return HAS_RESULT_SET;
-        case 10: // PROGRESS_UPDATE_RESPONSE
-          return PROGRESS_UPDATE_RESPONSE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __ERRORCODE_ISSET_ID = 0;
-  private static final int __OPERATIONSTARTED_ISSET_ID = 1;
-  private static final int __OPERATIONCOMPLETED_ISSET_ID = 2;
-  private static final int __HASRESULTSET_ISSET_ID = 3;
-  private byte __isset_bitfield = 0;
-  private static final _Fields optionals[] = {_Fields.OPERATION_STATE,_Fields.SQL_STATE,_Fields.ERROR_CODE,_Fields.ERROR_MESSAGE,_Fields.TASK_STATUS,_Fields.OPERATION_STARTED,_Fields.OPERATION_COMPLETED,_Fields.HAS_RESULT_SET,_Fields.PROGRESS_UPDATE_RESPONSE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.OPERATION_STATE, new org.apache.thrift.meta_data.FieldMetaData("operationState", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TOperationState.class)));
-    tmpMap.put(_Fields.SQL_STATE, new org.apache.thrift.meta_data.FieldMetaData("sqlState", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    tmpMap.put(_Fields.ERROR_CODE, new org.apache.thrift.meta_data.FieldMetaData("errorCode", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)));
-    tmpMap.put(_Fields.ERROR_MESSAGE, new org.apache.thrift.meta_data.FieldMetaData("errorMessage", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    tmpMap.put(_Fields.TASK_STATUS, new org.apache.thrift.meta_data.FieldMetaData("taskStatus", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    tmpMap.put(_Fields.OPERATION_STARTED, new org.apache.thrift.meta_data.FieldMetaData("operationStarted", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
-    tmpMap.put(_Fields.OPERATION_COMPLETED, new org.apache.thrift.meta_data.FieldMetaData("operationCompleted", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
-    tmpMap.put(_Fields.HAS_RESULT_SET, new org.apache.thrift.meta_data.FieldMetaData("hasResultSet", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL)));
-    tmpMap.put(_Fields.PROGRESS_UPDATE_RESPONSE, new org.apache.thrift.meta_data.FieldMetaData("progressUpdateResponse", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRUCT        , "TProgressUpdateResp")));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetOperationStatusResp.class, metaDataMap);
-  }
-
-  public TGetOperationStatusResp() {
-  }
-
-  public TGetOperationStatusResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetOperationStatusResp(TGetOperationStatusResp other) {
-    __isset_bitfield = other.__isset_bitfield;
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetOperationState()) {
-      this.operationState = other.operationState;
-    }
-    if (other.isSetSqlState()) {
-      this.sqlState = other.sqlState;
-    }
-    this.errorCode = other.errorCode;
-    if (other.isSetErrorMessage()) {
-      this.errorMessage = other.errorMessage;
-    }
-    if (other.isSetTaskStatus()) {
-      this.taskStatus = other.taskStatus;
-    }
-    this.operationStarted = other.operationStarted;
-    this.operationCompleted = other.operationCompleted;
-    this.hasResultSet = other.hasResultSet;
-    if (other.isSetProgressUpdateResponse()) {
-      this.progressUpdateResponse = other.progressUpdateResponse;
-    }
-  }
-
-  public TGetOperationStatusResp deepCopy() {
-    return new TGetOperationStatusResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.operationState = null;
-    this.sqlState = null;
-    setErrorCodeIsSet(false);
-    this.errorCode = 0;
-    this.errorMessage = null;
-    this.taskStatus = null;
-    setOperationStartedIsSet(false);
-    this.operationStarted = 0;
-    setOperationCompletedIsSet(false);
-    this.operationCompleted = 0;
-    setHasResultSetIsSet(false);
-    this.hasResultSet = false;
-    this.progressUpdateResponse = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  /**
-   * 
-   * @see TOperationState
-   */
-  public TOperationState getOperationState() {
-    return this.operationState;
-  }
-
-  /**
-   * 
-   * @see TOperationState
-   */
-  public void setOperationState(TOperationState operationState) {
-    this.operationState = operationState;
-  }
-
-  public void unsetOperationState() {
-    this.operationState = null;
-  }
-
-  /** Returns true if field operationState is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationState() {
-    return this.operationState != null;
-  }
-
-  public void setOperationStateIsSet(boolean value) {
-    if (!value) {
-      this.operationState = null;
-    }
-  }
-
-  public String getSqlState() {
-    return this.sqlState;
-  }
-
-  public void setSqlState(String sqlState) {
-    this.sqlState = sqlState;
-  }
-
-  public void unsetSqlState() {
-    this.sqlState = null;
-  }
-
-  /** Returns true if field sqlState is set (has been assigned a value) and false otherwise */
-  public boolean isSetSqlState() {
-    return this.sqlState != null;
-  }
-
-  public void setSqlStateIsSet(boolean value) {
-    if (!value) {
-      this.sqlState = null;
-    }
-  }
-
-  public int getErrorCode() {
-    return this.errorCode;
-  }
-
-  public void setErrorCode(int errorCode) {
-    this.errorCode = errorCode;
-    setErrorCodeIsSet(true);
-  }
-
-  public void unsetErrorCode() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __ERRORCODE_ISSET_ID);
-  }
-
-  /** Returns true if field errorCode is set (has been assigned a value) and false otherwise */
-  public boolean isSetErrorCode() {
-    return EncodingUtils.testBit(__isset_bitfield, __ERRORCODE_ISSET_ID);
-  }
-
-  public void setErrorCodeIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __ERRORCODE_ISSET_ID, value);
-  }
-
-  public String getErrorMessage() {
-    return this.errorMessage;
-  }
-
-  public void setErrorMessage(String errorMessage) {
-    this.errorMessage = errorMessage;
-  }
-
-  public void unsetErrorMessage() {
-    this.errorMessage = null;
-  }
-
-  /** Returns true if field errorMessage is set (has been assigned a value) and false otherwise */
-  public boolean isSetErrorMessage() {
-    return this.errorMessage != null;
-  }
-
-  public void setErrorMessageIsSet(boolean value) {
-    if (!value) {
-      this.errorMessage = null;
-    }
-  }
-
-  public String getTaskStatus() {
-    return this.taskStatus;
-  }
-
-  public void setTaskStatus(String taskStatus) {
-    this.taskStatus = taskStatus;
-  }
-
-  public void unsetTaskStatus() {
-    this.taskStatus = null;
-  }
-
-  /** Returns true if field taskStatus is set (has been assigned a value) and false otherwise */
-  public boolean isSetTaskStatus() {
-    return this.taskStatus != null;
-  }
-
-  public void setTaskStatusIsSet(boolean value) {
-    if (!value) {
-      this.taskStatus = null;
-    }
-  }
-
-  public long getOperationStarted() {
-    return this.operationStarted;
-  }
-
-  public void setOperationStarted(long operationStarted) {
-    this.operationStarted = operationStarted;
-    setOperationStartedIsSet(true);
-  }
-
-  public void unsetOperationStarted() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __OPERATIONSTARTED_ISSET_ID);
-  }
-
-  /** Returns true if field operationStarted is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationStarted() {
-    return EncodingUtils.testBit(__isset_bitfield, __OPERATIONSTARTED_ISSET_ID);
-  }
-
-  public void setOperationStartedIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __OPERATIONSTARTED_ISSET_ID, value);
-  }
-
-  public long getOperationCompleted() {
-    return this.operationCompleted;
-  }
-
-  public void setOperationCompleted(long operationCompleted) {
-    this.operationCompleted = operationCompleted;
-    setOperationCompletedIsSet(true);
-  }
-
-  public void unsetOperationCompleted() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __OPERATIONCOMPLETED_ISSET_ID);
-  }
-
-  /** Returns true if field operationCompleted is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationCompleted() {
-    return EncodingUtils.testBit(__isset_bitfield, __OPERATIONCOMPLETED_ISSET_ID);
-  }
-
-  public void setOperationCompletedIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __OPERATIONCOMPLETED_ISSET_ID, value);
-  }
-
-  public boolean isHasResultSet() {
-    return this.hasResultSet;
-  }
-
-  public void setHasResultSet(boolean hasResultSet) {
-    this.hasResultSet = hasResultSet;
-    setHasResultSetIsSet(true);
-  }
-
-  public void unsetHasResultSet() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __HASRESULTSET_ISSET_ID);
-  }
-
-  /** Returns true if field hasResultSet is set (has been assigned a value) and false otherwise */
-  public boolean isSetHasResultSet() {
-    return EncodingUtils.testBit(__isset_bitfield, __HASRESULTSET_ISSET_ID);
-  }
-
-  public void setHasResultSetIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __HASRESULTSET_ISSET_ID, value);
-  }
-
-  public TProgressUpdateResp getProgressUpdateResponse() {
-    return this.progressUpdateResponse;
-  }
-
-  public void setProgressUpdateResponse(TProgressUpdateResp progressUpdateResponse) {
-    this.progressUpdateResponse = progressUpdateResponse;
-  }
-
-  public void unsetProgressUpdateResponse() {
-    this.progressUpdateResponse = null;
-  }
-
-  /** Returns true if field progressUpdateResponse is set (has been assigned a value) and false otherwise */
-  public boolean isSetProgressUpdateResponse() {
-    return this.progressUpdateResponse != null;
-  }
-
-  public void setProgressUpdateResponseIsSet(boolean value) {
-    if (!value) {
-      this.progressUpdateResponse = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case OPERATION_STATE:
-      if (value == null) {
-        unsetOperationState();
-      } else {
-        setOperationState((TOperationState)value);
-      }
-      break;
-
-    case SQL_STATE:
-      if (value == null) {
-        unsetSqlState();
-      } else {
-        setSqlState((String)value);
-      }
-      break;
-
-    case ERROR_CODE:
-      if (value == null) {
-        unsetErrorCode();
-      } else {
-        setErrorCode((Integer)value);
-      }
-      break;
-
-    case ERROR_MESSAGE:
-      if (value == null) {
-        unsetErrorMessage();
-      } else {
-        setErrorMessage((String)value);
-      }
-      break;
-
-    case TASK_STATUS:
-      if (value == null) {
-        unsetTaskStatus();
-      } else {
-        setTaskStatus((String)value);
-      }
-      break;
-
-    case OPERATION_STARTED:
-      if (value == null) {
-        unsetOperationStarted();
-      } else {
-        setOperationStarted((Long)value);
-      }
-      break;
-
-    case OPERATION_COMPLETED:
-      if (value == null) {
-        unsetOperationCompleted();
-      } else {
-        setOperationCompleted((Long)value);
-      }
-      break;
-
-    case HAS_RESULT_SET:
-      if (value == null) {
-        unsetHasResultSet();
-      } else {
-        setHasResultSet((Boolean)value);
-      }
-      break;
-
-    case PROGRESS_UPDATE_RESPONSE:
-      if (value == null) {
-        unsetProgressUpdateResponse();
-      } else {
-        setProgressUpdateResponse((TProgressUpdateResp)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case OPERATION_STATE:
-      return getOperationState();
-
-    case SQL_STATE:
-      return getSqlState();
-
-    case ERROR_CODE:
-      return getErrorCode();
-
-    case ERROR_MESSAGE:
-      return getErrorMessage();
-
-    case TASK_STATUS:
-      return getTaskStatus();
-
-    case OPERATION_STARTED:
-      return getOperationStarted();
-
-    case OPERATION_COMPLETED:
-      return getOperationCompleted();
-
-    case HAS_RESULT_SET:
-      return isHasResultSet();
-
-    case PROGRESS_UPDATE_RESPONSE:
-      return getProgressUpdateResponse();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case OPERATION_STATE:
-      return isSetOperationState();
-    case SQL_STATE:
-      return isSetSqlState();
-    case ERROR_CODE:
-      return isSetErrorCode();
-    case ERROR_MESSAGE:
-      return isSetErrorMessage();
-    case TASK_STATUS:
-      return isSetTaskStatus();
-    case OPERATION_STARTED:
-      return isSetOperationStarted();
-    case OPERATION_COMPLETED:
-      return isSetOperationCompleted();
-    case HAS_RESULT_SET:
-      return isSetHasResultSet();
-    case PROGRESS_UPDATE_RESPONSE:
-      return isSetProgressUpdateResponse();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetOperationStatusResp)
-      return this.equals((TGetOperationStatusResp)that);
-    return false;
-  }
-
-  public boolean equals(TGetOperationStatusResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_operationState = true && this.isSetOperationState();
-    boolean that_present_operationState = true && that.isSetOperationState();
-    if (this_present_operationState || that_present_operationState) {
-      if (!(this_present_operationState && that_present_operationState))
-        return false;
-      if (!this.operationState.equals(that.operationState))
-        return false;
-    }
-
-    boolean this_present_sqlState = true && this.isSetSqlState();
-    boolean that_present_sqlState = true && that.isSetSqlState();
-    if (this_present_sqlState || that_present_sqlState) {
-      if (!(this_present_sqlState && that_present_sqlState))
-        return false;
-      if (!this.sqlState.equals(that.sqlState))
-        return false;
-    }
-
-    boolean this_present_errorCode = true && this.isSetErrorCode();
-    boolean that_present_errorCode = true && that.isSetErrorCode();
-    if (this_present_errorCode || that_present_errorCode) {
-      if (!(this_present_errorCode && that_present_errorCode))
-        return false;
-      if (this.errorCode != that.errorCode)
-        return false;
-    }
-
-    boolean this_present_errorMessage = true && this.isSetErrorMessage();
-    boolean that_present_errorMessage = true && that.isSetErrorMessage();
-    if (this_present_errorMessage || that_present_errorMessage) {
-      if (!(this_present_errorMessage && that_present_errorMessage))
-        return false;
-      if (!this.errorMessage.equals(that.errorMessage))
-        return false;
-    }
-
-    boolean this_present_taskStatus = true && this.isSetTaskStatus();
-    boolean that_present_taskStatus = true && that.isSetTaskStatus();
-    if (this_present_taskStatus || that_present_taskStatus) {
-      if (!(this_present_taskStatus && that_present_taskStatus))
-        return false;
-      if (!this.taskStatus.equals(that.taskStatus))
-        return false;
-    }
-
-    boolean this_present_operationStarted = true && this.isSetOperationStarted();
-    boolean that_present_operationStarted = true && that.isSetOperationStarted();
-    if (this_present_operationStarted || that_present_operationStarted) {
-      if (!(this_present_operationStarted && that_present_operationStarted))
-        return false;
-      if (this.operationStarted != that.operationStarted)
-        return false;
-    }
-
-    boolean this_present_operationCompleted = true && this.isSetOperationCompleted();
-    boolean that_present_operationCompleted = true && that.isSetOperationCompleted();
-    if (this_present_operationCompleted || that_present_operationCompleted) {
-      if (!(this_present_operationCompleted && that_present_operationCompleted))
-        return false;
-      if (this.operationCompleted != that.operationCompleted)
-        return false;
-    }
-
-    boolean this_present_hasResultSet = true && this.isSetHasResultSet();
-    boolean that_present_hasResultSet = true && that.isSetHasResultSet();
-    if (this_present_hasResultSet || that_present_hasResultSet) {
-      if (!(this_present_hasResultSet && that_present_hasResultSet))
-        return false;
-      if (this.hasResultSet != that.hasResultSet)
-        return false;
-    }
-
-    boolean this_present_progressUpdateResponse = true && this.isSetProgressUpdateResponse();
-    boolean that_present_progressUpdateResponse = true && that.isSetProgressUpdateResponse();
-    if (this_present_progressUpdateResponse || that_present_progressUpdateResponse) {
-      if (!(this_present_progressUpdateResponse && that_present_progressUpdateResponse))
-        return false;
-      if (!this.progressUpdateResponse.equals(that.progressUpdateResponse))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_status = true && (isSetStatus());
-    list.add(present_status);
-    if (present_status)
-      list.add(status);
-
-    boolean present_operationState = true && (isSetOperationState());
-    list.add(present_operationState);
-    if (present_operationState)
-      list.add(operationState.getValue());
-
-    boolean present_sqlState = true && (isSetSqlState());
-    list.add(present_sqlState);
-    if (present_sqlState)
-      list.add(sqlState);
-
-    boolean present_errorCode = true && (isSetErrorCode());
-    list.add(present_errorCode);
-    if (present_errorCode)
-      list.add(errorCode);
-
-    boolean present_errorMessage = true && (isSetErrorMessage());
-    list.add(present_errorMessage);
-    if (present_errorMessage)
-      list.add(errorMessage);
-
-    boolean present_taskStatus = true && (isSetTaskStatus());
-    list.add(present_taskStatus);
-    if (present_taskStatus)
-      list.add(taskStatus);
-
-    boolean present_operationStarted = true && (isSetOperationStarted());
-    list.add(present_operationStarted);
-    if (present_operationStarted)
-      list.add(operationStarted);
-
-    boolean present_operationCompleted = true && (isSetOperationCompleted());
-    list.add(present_operationCompleted);
-    if (present_operationCompleted)
-      list.add(operationCompleted);
-
-    boolean present_hasResultSet = true && (isSetHasResultSet());
-    list.add(present_hasResultSet);
-    if (present_hasResultSet)
-      list.add(hasResultSet);
-
-    boolean present_progressUpdateResponse = true && (isSetProgressUpdateResponse());
-    list.add(present_progressUpdateResponse);
-    if (present_progressUpdateResponse)
-      list.add(progressUpdateResponse);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TGetOperationStatusResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOperationState()).compareTo(other.isSetOperationState());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationState()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationState, other.operationState);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetSqlState()).compareTo(other.isSetSqlState());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSqlState()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sqlState, other.sqlState);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetErrorCode()).compareTo(other.isSetErrorCode());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetErrorCode()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.errorCode, other.errorCode);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetErrorMessage()).compareTo(other.isSetErrorMessage());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetErrorMessage()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.errorMessage, other.errorMessage);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetTaskStatus()).compareTo(other.isSetTaskStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetTaskStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.taskStatus, other.taskStatus);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOperationStarted()).compareTo(other.isSetOperationStarted());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationStarted()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationStarted, other.operationStarted);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOperationCompleted()).compareTo(other.isSetOperationCompleted());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationCompleted()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationCompleted, other.operationCompleted);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetHasResultSet()).compareTo(other.isSetHasResultSet());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetHasResultSet()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.hasResultSet, other.hasResultSet);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetProgressUpdateResponse()).compareTo(other.isSetProgressUpdateResponse());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetProgressUpdateResponse()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.progressUpdateResponse, other.progressUpdateResponse);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetOperationStatusResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (isSetOperationState()) {
-      if (!first) sb.append(", ");
-      sb.append("operationState:");
-      if (this.operationState == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.operationState);
-      }
-      first = false;
-    }
-    if (isSetSqlState()) {
-      if (!first) sb.append(", ");
-      sb.append("sqlState:");
-      if (this.sqlState == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.sqlState);
-      }
-      first = false;
-    }
-    if (isSetErrorCode()) {
-      if (!first) sb.append(", ");
-      sb.append("errorCode:");
-      sb.append(this.errorCode);
-      first = false;
-    }
-    if (isSetErrorMessage()) {
-      if (!first) sb.append(", ");
-      sb.append("errorMessage:");
-      if (this.errorMessage == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.errorMessage);
-      }
-      first = false;
-    }
-    if (isSetTaskStatus()) {
-      if (!first) sb.append(", ");
-      sb.append("taskStatus:");
-      if (this.taskStatus == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.taskStatus);
-      }
-      first = false;
-    }
-    if (isSetOperationStarted()) {
-      if (!first) sb.append(", ");
-      sb.append("operationStarted:");
-      sb.append(this.operationStarted);
-      first = false;
-    }
-    if (isSetOperationCompleted()) {
-      if (!first) sb.append(", ");
-      sb.append("operationCompleted:");
-      sb.append(this.operationCompleted);
-      first = false;
-    }
-    if (isSetHasResultSet()) {
-      if (!first) sb.append(", ");
-      sb.append("hasResultSet:");
-      sb.append(this.hasResultSet);
-      first = false;
-    }
-    if (isSetProgressUpdateResponse()) {
-      if (!first) sb.append(", ");
-      sb.append("progressUpdateResponse:");
-      if (this.progressUpdateResponse == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.progressUpdateResponse);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetOperationStatusRespStandardSchemeFactory implements SchemeFactory {
-    public TGetOperationStatusRespStandardScheme getScheme() {
-      return new TGetOperationStatusRespStandardScheme();
-    }
-  }
-
-  private static class TGetOperationStatusRespStandardScheme extends StandardScheme<TGetOperationStatusResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetOperationStatusResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // OPERATION_STATE
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.operationState = org.apache.hive.service.rpc.thrift.TOperationState.findByValue(iprot.readI32());
-              struct.setOperationStateIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // SQL_STATE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.sqlState = iprot.readString();
-              struct.setSqlStateIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 4: // ERROR_CODE
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.errorCode = iprot.readI32();
-              struct.setErrorCodeIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 5: // ERROR_MESSAGE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.errorMessage = iprot.readString();
-              struct.setErrorMessageIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 6: // TASK_STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.taskStatus = iprot.readString();
-              struct.setTaskStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 7: // OPERATION_STARTED
-            if (schemeField.type == org.apache.thrift.protocol.TType.I64) {
-              struct.operationStarted = iprot.readI64();
-              struct.setOperationStartedIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 8: // OPERATION_COMPLETED
-            if (schemeField.type == org.apache.thrift.protocol.TType.I64) {
-              struct.operationCompleted = iprot.readI64();
-              struct.setOperationCompletedIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 9: // HAS_RESULT_SET
-            if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) {
-              struct.hasResultSet = iprot.readBool();
-              struct.setHasResultSetIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 10: // PROGRESS_UPDATE_RESPONSE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.progressUpdateResponse = new TProgressUpdateResp();
-              struct.progressUpdateResponse.read(iprot);
-              struct.setProgressUpdateResponseIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetOperationStatusResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.operationState != null) {
-        if (struct.isSetOperationState()) {
-          oprot.writeFieldBegin(OPERATION_STATE_FIELD_DESC);
-          oprot.writeI32(struct.operationState.getValue());
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.sqlState != null) {
-        if (struct.isSetSqlState()) {
-          oprot.writeFieldBegin(SQL_STATE_FIELD_DESC);
-          oprot.writeString(struct.sqlState);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.isSetErrorCode()) {
-        oprot.writeFieldBegin(ERROR_CODE_FIELD_DESC);
-        oprot.writeI32(struct.errorCode);
-        oprot.writeFieldEnd();
-      }
-      if (struct.errorMessage != null) {
-        if (struct.isSetErrorMessage()) {
-          oprot.writeFieldBegin(ERROR_MESSAGE_FIELD_DESC);
-          oprot.writeString(struct.errorMessage);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.taskStatus != null) {
-        if (struct.isSetTaskStatus()) {
-          oprot.writeFieldBegin(TASK_STATUS_FIELD_DESC);
-          oprot.writeString(struct.taskStatus);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.isSetOperationStarted()) {
-        oprot.writeFieldBegin(OPERATION_STARTED_FIELD_DESC);
-        oprot.writeI64(struct.operationStarted);
-        oprot.writeFieldEnd();
-      }
-      if (struct.isSetOperationCompleted()) {
-        oprot.writeFieldBegin(OPERATION_COMPLETED_FIELD_DESC);
-        oprot.writeI64(struct.operationCompleted);
-        oprot.writeFieldEnd();
-      }
-      if (struct.isSetHasResultSet()) {
-        oprot.writeFieldBegin(HAS_RESULT_SET_FIELD_DESC);
-        oprot.writeBool(struct.hasResultSet);
-        oprot.writeFieldEnd();
-      }
-      if (struct.progressUpdateResponse != null) {
-        if (struct.isSetProgressUpdateResponse()) {
-          oprot.writeFieldBegin(PROGRESS_UPDATE_RESPONSE_FIELD_DESC);
-          struct.progressUpdateResponse.write(oprot);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetOperationStatusRespTupleSchemeFactory implements SchemeFactory {
-    public TGetOperationStatusRespTupleScheme getScheme() {
-      return new TGetOperationStatusRespTupleScheme();
-    }
-  }
-
-  private static class TGetOperationStatusRespTupleScheme extends TupleScheme<TGetOperationStatusResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetOperationStatusResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetOperationState()) {
-        optionals.set(0);
-      }
-      if (struct.isSetSqlState()) {
-        optionals.set(1);
-      }
-      if (struct.isSetErrorCode()) {
-        optionals.set(2);
-      }
-      if (struct.isSetErrorMessage()) {
-        optionals.set(3);
-      }
-      if (struct.isSetTaskStatus()) {
-        optionals.set(4);
-      }
-      if (struct.isSetOperationStarted()) {
-        optionals.set(5);
-      }
-      if (struct.isSetOperationCompleted()) {
-        optionals.set(6);
-      }
-      if (struct.isSetHasResultSet()) {
-        optionals.set(7);
-      }
-      if (struct.isSetProgressUpdateResponse()) {
-        optionals.set(8);
-      }
-      oprot.writeBitSet(optionals, 9);
-      if (struct.isSetOperationState()) {
-        oprot.writeI32(struct.operationState.getValue());
-      }
-      if (struct.isSetSqlState()) {
-        oprot.writeString(struct.sqlState);
-      }
-      if (struct.isSetErrorCode()) {
-        oprot.writeI32(struct.errorCode);
-      }
-      if (struct.isSetErrorMessage()) {
-        oprot.writeString(struct.errorMessage);
-      }
-      if (struct.isSetTaskStatus()) {
-        oprot.writeString(struct.taskStatus);
-      }
-      if (struct.isSetOperationStarted()) {
-        oprot.writeI64(struct.operationStarted);
-      }
-      if (struct.isSetOperationCompleted()) {
-        oprot.writeI64(struct.operationCompleted);
-      }
-      if (struct.isSetHasResultSet()) {
-        oprot.writeBool(struct.hasResultSet);
-      }
-      if (struct.isSetProgressUpdateResponse()) {
-        struct.progressUpdateResponse.write(oprot);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetOperationStatusResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      BitSet incoming = iprot.readBitSet(9);
-      if (incoming.get(0)) {
-        struct.operationState = org.apache.hive.service.rpc.thrift.TOperationState.findByValue(iprot.readI32());
-        struct.setOperationStateIsSet(true);
-      }
-      if (incoming.get(1)) {
-        struct.sqlState = iprot.readString();
-        struct.setSqlStateIsSet(true);
-      }
-      if (incoming.get(2)) {
-        struct.errorCode = iprot.readI32();
-        struct.setErrorCodeIsSet(true);
-      }
-      if (incoming.get(3)) {
-        struct.errorMessage = iprot.readString();
-        struct.setErrorMessageIsSet(true);
-      }
-      if (incoming.get(4)) {
-        struct.taskStatus = iprot.readString();
-        struct.setTaskStatusIsSet(true);
-      }
-      if (incoming.get(5)) {
-        struct.operationStarted = iprot.readI64();
-        struct.setOperationStartedIsSet(true);
-      }
-      if (incoming.get(6)) {
-        struct.operationCompleted = iprot.readI64();
-        struct.setOperationCompletedIsSet(true);
-      }
-      if (incoming.get(7)) {
-        struct.hasResultSet = iprot.readBool();
-        struct.setHasResultSetIsSet(true);
-      }
-      if (incoming.get(8)) {
-        struct.progressUpdateResponse = new TProgressUpdateResp();
-        struct.progressUpdateResponse.read(iprot);
-        struct.setProgressUpdateResponseIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysReq.java
deleted file mode 100644
index 1bec9b51c72d8..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysReq.java
+++ /dev/null
@@ -1,716 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TGetPrimaryKeysReq implements org.apache.thrift.TBase<TGetPrimaryKeysReq, TGetPrimaryKeysReq._Fields>, java.io.Serializable, Cloneable, Comparable<TGetPrimaryKeysReq> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetPrimaryKeysReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField CATALOG_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("catalogName", org.apache.thrift.protocol.TType.STRING, (short)2);
-  private static final org.apache.thrift.protocol.TField SCHEMA_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("schemaName", org.apache.thrift.protocol.TType.STRING, (short)3);
-  private static final org.apache.thrift.protocol.TField TABLE_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("tableName", org.apache.thrift.protocol.TType.STRING, (short)4);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetPrimaryKeysReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetPrimaryKeysReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-  private String catalogName; // optional
-  private String schemaName; // optional
-  private String tableName; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle"),
-    CATALOG_NAME((short)2, "catalogName"),
-    SCHEMA_NAME((short)3, "schemaName"),
-    TABLE_NAME((short)4, "tableName");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        case 2: // CATALOG_NAME
-          return CATALOG_NAME;
-        case 3: // SCHEMA_NAME
-          return SCHEMA_NAME;
-        case 4: // TABLE_NAME
-          return TABLE_NAME;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final _Fields optionals[] = {_Fields.CATALOG_NAME,_Fields.SCHEMA_NAME,_Fields.TABLE_NAME};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    tmpMap.put(_Fields.CATALOG_NAME, new org.apache.thrift.meta_data.FieldMetaData("catalogName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TIdentifier")));
-    tmpMap.put(_Fields.SCHEMA_NAME, new org.apache.thrift.meta_data.FieldMetaData("schemaName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TIdentifier")));
-    tmpMap.put(_Fields.TABLE_NAME, new org.apache.thrift.meta_data.FieldMetaData("tableName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TIdentifier")));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetPrimaryKeysReq.class, metaDataMap);
-  }
-
-  public TGetPrimaryKeysReq() {
-  }
-
-  public TGetPrimaryKeysReq(
-    TSessionHandle sessionHandle)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetPrimaryKeysReq(TGetPrimaryKeysReq other) {
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-    if (other.isSetCatalogName()) {
-      this.catalogName = other.catalogName;
-    }
-    if (other.isSetSchemaName()) {
-      this.schemaName = other.schemaName;
-    }
-    if (other.isSetTableName()) {
-      this.tableName = other.tableName;
-    }
-  }
-
-  public TGetPrimaryKeysReq deepCopy() {
-    return new TGetPrimaryKeysReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-    this.catalogName = null;
-    this.schemaName = null;
-    this.tableName = null;
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public String getCatalogName() {
-    return this.catalogName;
-  }
-
-  public void setCatalogName(String catalogName) {
-    this.catalogName = catalogName;
-  }
-
-  public void unsetCatalogName() {
-    this.catalogName = null;
-  }
-
-  /** Returns true if field catalogName is set (has been assigned a value) and false otherwise */
-  public boolean isSetCatalogName() {
-    return this.catalogName != null;
-  }
-
-  public void setCatalogNameIsSet(boolean value) {
-    if (!value) {
-      this.catalogName = null;
-    }
-  }
-
-  public String getSchemaName() {
-    return this.schemaName;
-  }
-
-  public void setSchemaName(String schemaName) {
-    this.schemaName = schemaName;
-  }
-
-  public void unsetSchemaName() {
-    this.schemaName = null;
-  }
-
-  /** Returns true if field schemaName is set (has been assigned a value) and false otherwise */
-  public boolean isSetSchemaName() {
-    return this.schemaName != null;
-  }
-
-  public void setSchemaNameIsSet(boolean value) {
-    if (!value) {
-      this.schemaName = null;
-    }
-  }
-
-  public String getTableName() {
-    return this.tableName;
-  }
-
-  public void setTableName(String tableName) {
-    this.tableName = tableName;
-  }
-
-  public void unsetTableName() {
-    this.tableName = null;
-  }
-
-  /** Returns true if field tableName is set (has been assigned a value) and false otherwise */
-  public boolean isSetTableName() {
-    return this.tableName != null;
-  }
-
-  public void setTableNameIsSet(boolean value) {
-    if (!value) {
-      this.tableName = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    case CATALOG_NAME:
-      if (value == null) {
-        unsetCatalogName();
-      } else {
-        setCatalogName((String)value);
-      }
-      break;
-
-    case SCHEMA_NAME:
-      if (value == null) {
-        unsetSchemaName();
-      } else {
-        setSchemaName((String)value);
-      }
-      break;
-
-    case TABLE_NAME:
-      if (value == null) {
-        unsetTableName();
-      } else {
-        setTableName((String)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    case CATALOG_NAME:
-      return getCatalogName();
-
-    case SCHEMA_NAME:
-      return getSchemaName();
-
-    case TABLE_NAME:
-      return getTableName();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    case CATALOG_NAME:
-      return isSetCatalogName();
-    case SCHEMA_NAME:
-      return isSetSchemaName();
-    case TABLE_NAME:
-      return isSetTableName();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetPrimaryKeysReq)
-      return this.equals((TGetPrimaryKeysReq)that);
-    return false;
-  }
-
-  public boolean equals(TGetPrimaryKeysReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    boolean this_present_catalogName = true && this.isSetCatalogName();
-    boolean that_present_catalogName = true && that.isSetCatalogName();
-    if (this_present_catalogName || that_present_catalogName) {
-      if (!(this_present_catalogName && that_present_catalogName))
-        return false;
-      if (!this.catalogName.equals(that.catalogName))
-        return false;
-    }
-
-    boolean this_present_schemaName = true && this.isSetSchemaName();
-    boolean that_present_schemaName = true && that.isSetSchemaName();
-    if (this_present_schemaName || that_present_schemaName) {
-      if (!(this_present_schemaName && that_present_schemaName))
-        return false;
-      if (!this.schemaName.equals(that.schemaName))
-        return false;
-    }
-
-    boolean this_present_tableName = true && this.isSetTableName();
-    boolean that_present_tableName = true && that.isSetTableName();
-    if (this_present_tableName || that_present_tableName) {
-      if (!(this_present_tableName && that_present_tableName))
-        return false;
-      if (!this.tableName.equals(that.tableName))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    list.add(present_sessionHandle);
-    if (present_sessionHandle)
-      list.add(sessionHandle);
-
-    boolean present_catalogName = true && (isSetCatalogName());
-    list.add(present_catalogName);
-    if (present_catalogName)
-      list.add(catalogName);
-
-    boolean present_schemaName = true && (isSetSchemaName());
-    list.add(present_schemaName);
-    if (present_schemaName)
-      list.add(schemaName);
-
-    boolean present_tableName = true && (isSetTableName());
-    list.add(present_tableName);
-    if (present_tableName)
-      list.add(tableName);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TGetPrimaryKeysReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetCatalogName()).compareTo(other.isSetCatalogName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetCatalogName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.catalogName, other.catalogName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetSchemaName()).compareTo(other.isSetSchemaName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSchemaName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.schemaName, other.schemaName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetTableName()).compareTo(other.isSetTableName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetTableName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.tableName, other.tableName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetPrimaryKeysReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    if (isSetCatalogName()) {
-      if (!first) sb.append(", ");
-      sb.append("catalogName:");
-      if (this.catalogName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.catalogName);
-      }
-      first = false;
-    }
-    if (isSetSchemaName()) {
-      if (!first) sb.append(", ");
-      sb.append("schemaName:");
-      if (this.schemaName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.schemaName);
-      }
-      first = false;
-    }
-    if (isSetTableName()) {
-      if (!first) sb.append(", ");
-      sb.append("tableName:");
-      if (this.tableName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.tableName);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetPrimaryKeysReqStandardSchemeFactory implements SchemeFactory {
-    public TGetPrimaryKeysReqStandardScheme getScheme() {
-      return new TGetPrimaryKeysReqStandardScheme();
-    }
-  }
-
-  private static class TGetPrimaryKeysReqStandardScheme extends StandardScheme<TGetPrimaryKeysReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetPrimaryKeysReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // CATALOG_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.catalogName = iprot.readString();
-              struct.setCatalogNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // SCHEMA_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.schemaName = iprot.readString();
-              struct.setSchemaNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 4: // TABLE_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.tableName = iprot.readString();
-              struct.setTableNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetPrimaryKeysReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.catalogName != null) {
-        if (struct.isSetCatalogName()) {
-          oprot.writeFieldBegin(CATALOG_NAME_FIELD_DESC);
-          oprot.writeString(struct.catalogName);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.schemaName != null) {
-        if (struct.isSetSchemaName()) {
-          oprot.writeFieldBegin(SCHEMA_NAME_FIELD_DESC);
-          oprot.writeString(struct.schemaName);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.tableName != null) {
-        if (struct.isSetTableName()) {
-          oprot.writeFieldBegin(TABLE_NAME_FIELD_DESC);
-          oprot.writeString(struct.tableName);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetPrimaryKeysReqTupleSchemeFactory implements SchemeFactory {
-    public TGetPrimaryKeysReqTupleScheme getScheme() {
-      return new TGetPrimaryKeysReqTupleScheme();
-    }
-  }
-
-  private static class TGetPrimaryKeysReqTupleScheme extends TupleScheme<TGetPrimaryKeysReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetPrimaryKeysReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetCatalogName()) {
-        optionals.set(0);
-      }
-      if (struct.isSetSchemaName()) {
-        optionals.set(1);
-      }
-      if (struct.isSetTableName()) {
-        optionals.set(2);
-      }
-      oprot.writeBitSet(optionals, 3);
-      if (struct.isSetCatalogName()) {
-        oprot.writeString(struct.catalogName);
-      }
-      if (struct.isSetSchemaName()) {
-        oprot.writeString(struct.schemaName);
-      }
-      if (struct.isSetTableName()) {
-        oprot.writeString(struct.tableName);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetPrimaryKeysReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-      BitSet incoming = iprot.readBitSet(3);
-      if (incoming.get(0)) {
-        struct.catalogName = iprot.readString();
-        struct.setCatalogNameIsSet(true);
-      }
-      if (incoming.get(1)) {
-        struct.schemaName = iprot.readString();
-        struct.setSchemaNameIsSet(true);
-      }
-      if (incoming.get(2)) {
-        struct.tableName = iprot.readString();
-        struct.setTableNameIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysResp.java
deleted file mode 100644
index 72d9507fe1031..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetPrimaryKeysResp.java
+++ /dev/null
@@ -1,509 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TGetPrimaryKeysResp implements org.apache.thrift.TBase<TGetPrimaryKeysResp, TGetPrimaryKeysResp._Fields>, java.io.Serializable, Cloneable, Comparable<TGetPrimaryKeysResp> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetPrimaryKeysResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetPrimaryKeysRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetPrimaryKeysRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private TOperationHandle operationHandle; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    OPERATION_HANDLE((short)2, "operationHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final _Fields optionals[] = {_Fields.OPERATION_HANDLE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetPrimaryKeysResp.class, metaDataMap);
-  }
-
-  public TGetPrimaryKeysResp() {
-  }
-
-  public TGetPrimaryKeysResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetPrimaryKeysResp(TGetPrimaryKeysResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-  }
-
-  public TGetPrimaryKeysResp deepCopy() {
-    return new TGetPrimaryKeysResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.operationHandle = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetPrimaryKeysResp)
-      return this.equals((TGetPrimaryKeysResp)that);
-    return false;
-  }
-
-  public boolean equals(TGetPrimaryKeysResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_status = true && (isSetStatus());
-    list.add(present_status);
-    if (present_status)
-      list.add(status);
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    list.add(present_operationHandle);
-    if (present_operationHandle)
-      list.add(operationHandle);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TGetPrimaryKeysResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetPrimaryKeysResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (isSetOperationHandle()) {
-      if (!first) sb.append(", ");
-      sb.append("operationHandle:");
-      if (this.operationHandle == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.operationHandle);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetPrimaryKeysRespStandardSchemeFactory implements SchemeFactory {
-    public TGetPrimaryKeysRespStandardScheme getScheme() {
-      return new TGetPrimaryKeysRespStandardScheme();
-    }
-  }
-
-  private static class TGetPrimaryKeysRespStandardScheme extends StandardScheme<TGetPrimaryKeysResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetPrimaryKeysResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetPrimaryKeysResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.operationHandle != null) {
-        if (struct.isSetOperationHandle()) {
-          oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-          struct.operationHandle.write(oprot);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetPrimaryKeysRespTupleSchemeFactory implements SchemeFactory {
-    public TGetPrimaryKeysRespTupleScheme getScheme() {
-      return new TGetPrimaryKeysRespTupleScheme();
-    }
-  }
-
-  private static class TGetPrimaryKeysRespTupleScheme extends TupleScheme<TGetPrimaryKeysResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetPrimaryKeysResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetOperationHandle()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetOperationHandle()) {
-        struct.operationHandle.write(oprot);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetPrimaryKeysResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.operationHandle = new TOperationHandle();
-        struct.operationHandle.read(iprot);
-        struct.setOperationHandleIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataReq.java
deleted file mode 100644
index b94d827de264d..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataReq.java
+++ /dev/null
@@ -1,394 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TGetResultSetMetadataReq implements org.apache.thrift.TBase<TGetResultSetMetadataReq, TGetResultSetMetadataReq._Fields>, java.io.Serializable, Cloneable, Comparable<TGetResultSetMetadataReq> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetResultSetMetadataReq");
-
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetResultSetMetadataReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetResultSetMetadataReqTupleSchemeFactory());
-  }
-
-  private TOperationHandle operationHandle; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    OPERATION_HANDLE((short)1, "operationHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetResultSetMetadataReq.class, metaDataMap);
-  }
-
-  public TGetResultSetMetadataReq() {
-  }
-
-  public TGetResultSetMetadataReq(
-    TOperationHandle operationHandle)
-  {
-    this();
-    this.operationHandle = operationHandle;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetResultSetMetadataReq(TGetResultSetMetadataReq other) {
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-  }
-
-  public TGetResultSetMetadataReq deepCopy() {
-    return new TGetResultSetMetadataReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.operationHandle = null;
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetResultSetMetadataReq)
-      return this.equals((TGetResultSetMetadataReq)that);
-    return false;
-  }
-
-  public boolean equals(TGetResultSetMetadataReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    list.add(present_operationHandle);
-    if (present_operationHandle)
-      list.add(operationHandle);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TGetResultSetMetadataReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetResultSetMetadataReq(");
-    boolean first = true;
-
-    sb.append("operationHandle:");
-    if (this.operationHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.operationHandle);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetOperationHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationHandle' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetResultSetMetadataReqStandardSchemeFactory implements SchemeFactory {
-    public TGetResultSetMetadataReqStandardScheme getScheme() {
-      return new TGetResultSetMetadataReqStandardScheme();
-    }
-  }
-
-  private static class TGetResultSetMetadataReqStandardScheme extends StandardScheme<TGetResultSetMetadataReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetResultSetMetadataReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetResultSetMetadataReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.operationHandle != null) {
-        oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-        struct.operationHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetResultSetMetadataReqTupleSchemeFactory implements SchemeFactory {
-    public TGetResultSetMetadataReqTupleScheme getScheme() {
-      return new TGetResultSetMetadataReqTupleScheme();
-    }
-  }
-
-  private static class TGetResultSetMetadataReqTupleScheme extends TupleScheme<TGetResultSetMetadataReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetResultSetMetadataReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.operationHandle.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetResultSetMetadataReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.operationHandle = new TOperationHandle();
-      struct.operationHandle.read(iprot);
-      struct.setOperationHandleIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataResp.java
deleted file mode 100644
index ae2021ebd5a10..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetResultSetMetadataResp.java
+++ /dev/null
@@ -1,509 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TGetResultSetMetadataResp implements org.apache.thrift.TBase<TGetResultSetMetadataResp, TGetResultSetMetadataResp._Fields>, java.io.Serializable, Cloneable, Comparable<TGetResultSetMetadataResp> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetResultSetMetadataResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField SCHEMA_FIELD_DESC = new org.apache.thrift.protocol.TField("schema", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetResultSetMetadataRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetResultSetMetadataRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private TTableSchema schema; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    SCHEMA((short)2, "schema");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // SCHEMA
-          return SCHEMA;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final _Fields optionals[] = {_Fields.SCHEMA};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.SCHEMA, new org.apache.thrift.meta_data.FieldMetaData("schema", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TTableSchema.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetResultSetMetadataResp.class, metaDataMap);
-  }
-
-  public TGetResultSetMetadataResp() {
-  }
-
-  public TGetResultSetMetadataResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetResultSetMetadataResp(TGetResultSetMetadataResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetSchema()) {
-      this.schema = new TTableSchema(other.schema);
-    }
-  }
-
-  public TGetResultSetMetadataResp deepCopy() {
-    return new TGetResultSetMetadataResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.schema = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public TTableSchema getSchema() {
-    return this.schema;
-  }
-
-  public void setSchema(TTableSchema schema) {
-    this.schema = schema;
-  }
-
-  public void unsetSchema() {
-    this.schema = null;
-  }
-
-  /** Returns true if field schema is set (has been assigned a value) and false otherwise */
-  public boolean isSetSchema() {
-    return this.schema != null;
-  }
-
-  public void setSchemaIsSet(boolean value) {
-    if (!value) {
-      this.schema = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case SCHEMA:
-      if (value == null) {
-        unsetSchema();
-      } else {
-        setSchema((TTableSchema)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case SCHEMA:
-      return getSchema();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case SCHEMA:
-      return isSetSchema();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetResultSetMetadataResp)
-      return this.equals((TGetResultSetMetadataResp)that);
-    return false;
-  }
-
-  public boolean equals(TGetResultSetMetadataResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_schema = true && this.isSetSchema();
-    boolean that_present_schema = true && that.isSetSchema();
-    if (this_present_schema || that_present_schema) {
-      if (!(this_present_schema && that_present_schema))
-        return false;
-      if (!this.schema.equals(that.schema))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_status = true && (isSetStatus());
-    list.add(present_status);
-    if (present_status)
-      list.add(status);
-
-    boolean present_schema = true && (isSetSchema());
-    list.add(present_schema);
-    if (present_schema)
-      list.add(schema);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TGetResultSetMetadataResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetSchema()).compareTo(other.isSetSchema());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSchema()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.schema, other.schema);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetResultSetMetadataResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (isSetSchema()) {
-      if (!first) sb.append(", ");
-      sb.append("schema:");
-      if (this.schema == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.schema);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-    if (schema != null) {
-      schema.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetResultSetMetadataRespStandardSchemeFactory implements SchemeFactory {
-    public TGetResultSetMetadataRespStandardScheme getScheme() {
-      return new TGetResultSetMetadataRespStandardScheme();
-    }
-  }
-
-  private static class TGetResultSetMetadataRespStandardScheme extends StandardScheme<TGetResultSetMetadataResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetResultSetMetadataResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // SCHEMA
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.schema = new TTableSchema();
-              struct.schema.read(iprot);
-              struct.setSchemaIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetResultSetMetadataResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.schema != null) {
-        if (struct.isSetSchema()) {
-          oprot.writeFieldBegin(SCHEMA_FIELD_DESC);
-          struct.schema.write(oprot);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetResultSetMetadataRespTupleSchemeFactory implements SchemeFactory {
-    public TGetResultSetMetadataRespTupleScheme getScheme() {
-      return new TGetResultSetMetadataRespTupleScheme();
-    }
-  }
-
-  private static class TGetResultSetMetadataRespTupleScheme extends TupleScheme<TGetResultSetMetadataResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetResultSetMetadataResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetSchema()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetSchema()) {
-        struct.schema.write(oprot);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetResultSetMetadataResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.schema = new TTableSchema();
-        struct.schema.read(iprot);
-        struct.setSchemaIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasReq.java
deleted file mode 100644
index 17eed87ae096f..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasReq.java
+++ /dev/null
@@ -1,610 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TGetSchemasReq implements org.apache.thrift.TBase<TGetSchemasReq, TGetSchemasReq._Fields>, java.io.Serializable, Cloneable, Comparable<TGetSchemasReq> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetSchemasReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField CATALOG_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("catalogName", org.apache.thrift.protocol.TType.STRING, (short)2);
-  private static final org.apache.thrift.protocol.TField SCHEMA_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("schemaName", org.apache.thrift.protocol.TType.STRING, (short)3);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetSchemasReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetSchemasReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-  private String catalogName; // optional
-  private String schemaName; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle"),
-    CATALOG_NAME((short)2, "catalogName"),
-    SCHEMA_NAME((short)3, "schemaName");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        case 2: // CATALOG_NAME
-          return CATALOG_NAME;
-        case 3: // SCHEMA_NAME
-          return SCHEMA_NAME;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final _Fields optionals[] = {_Fields.CATALOG_NAME,_Fields.SCHEMA_NAME};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    tmpMap.put(_Fields.CATALOG_NAME, new org.apache.thrift.meta_data.FieldMetaData("catalogName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TIdentifier")));
-    tmpMap.put(_Fields.SCHEMA_NAME, new org.apache.thrift.meta_data.FieldMetaData("schemaName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TPatternOrIdentifier")));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetSchemasReq.class, metaDataMap);
-  }
-
-  public TGetSchemasReq() {
-  }
-
-  public TGetSchemasReq(
-    TSessionHandle sessionHandle)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetSchemasReq(TGetSchemasReq other) {
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-    if (other.isSetCatalogName()) {
-      this.catalogName = other.catalogName;
-    }
-    if (other.isSetSchemaName()) {
-      this.schemaName = other.schemaName;
-    }
-  }
-
-  public TGetSchemasReq deepCopy() {
-    return new TGetSchemasReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-    this.catalogName = null;
-    this.schemaName = null;
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public String getCatalogName() {
-    return this.catalogName;
-  }
-
-  public void setCatalogName(String catalogName) {
-    this.catalogName = catalogName;
-  }
-
-  public void unsetCatalogName() {
-    this.catalogName = null;
-  }
-
-  /** Returns true if field catalogName is set (has been assigned a value) and false otherwise */
-  public boolean isSetCatalogName() {
-    return this.catalogName != null;
-  }
-
-  public void setCatalogNameIsSet(boolean value) {
-    if (!value) {
-      this.catalogName = null;
-    }
-  }
-
-  public String getSchemaName() {
-    return this.schemaName;
-  }
-
-  public void setSchemaName(String schemaName) {
-    this.schemaName = schemaName;
-  }
-
-  public void unsetSchemaName() {
-    this.schemaName = null;
-  }
-
-  /** Returns true if field schemaName is set (has been assigned a value) and false otherwise */
-  public boolean isSetSchemaName() {
-    return this.schemaName != null;
-  }
-
-  public void setSchemaNameIsSet(boolean value) {
-    if (!value) {
-      this.schemaName = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    case CATALOG_NAME:
-      if (value == null) {
-        unsetCatalogName();
-      } else {
-        setCatalogName((String)value);
-      }
-      break;
-
-    case SCHEMA_NAME:
-      if (value == null) {
-        unsetSchemaName();
-      } else {
-        setSchemaName((String)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    case CATALOG_NAME:
-      return getCatalogName();
-
-    case SCHEMA_NAME:
-      return getSchemaName();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    case CATALOG_NAME:
-      return isSetCatalogName();
-    case SCHEMA_NAME:
-      return isSetSchemaName();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetSchemasReq)
-      return this.equals((TGetSchemasReq)that);
-    return false;
-  }
-
-  public boolean equals(TGetSchemasReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    boolean this_present_catalogName = true && this.isSetCatalogName();
-    boolean that_present_catalogName = true && that.isSetCatalogName();
-    if (this_present_catalogName || that_present_catalogName) {
-      if (!(this_present_catalogName && that_present_catalogName))
-        return false;
-      if (!this.catalogName.equals(that.catalogName))
-        return false;
-    }
-
-    boolean this_present_schemaName = true && this.isSetSchemaName();
-    boolean that_present_schemaName = true && that.isSetSchemaName();
-    if (this_present_schemaName || that_present_schemaName) {
-      if (!(this_present_schemaName && that_present_schemaName))
-        return false;
-      if (!this.schemaName.equals(that.schemaName))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    list.add(present_sessionHandle);
-    if (present_sessionHandle)
-      list.add(sessionHandle);
-
-    boolean present_catalogName = true && (isSetCatalogName());
-    list.add(present_catalogName);
-    if (present_catalogName)
-      list.add(catalogName);
-
-    boolean present_schemaName = true && (isSetSchemaName());
-    list.add(present_schemaName);
-    if (present_schemaName)
-      list.add(schemaName);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TGetSchemasReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetCatalogName()).compareTo(other.isSetCatalogName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetCatalogName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.catalogName, other.catalogName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetSchemaName()).compareTo(other.isSetSchemaName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSchemaName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.schemaName, other.schemaName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetSchemasReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    if (isSetCatalogName()) {
-      if (!first) sb.append(", ");
-      sb.append("catalogName:");
-      if (this.catalogName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.catalogName);
-      }
-      first = false;
-    }
-    if (isSetSchemaName()) {
-      if (!first) sb.append(", ");
-      sb.append("schemaName:");
-      if (this.schemaName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.schemaName);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetSchemasReqStandardSchemeFactory implements SchemeFactory {
-    public TGetSchemasReqStandardScheme getScheme() {
-      return new TGetSchemasReqStandardScheme();
-    }
-  }
-
-  private static class TGetSchemasReqStandardScheme extends StandardScheme<TGetSchemasReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetSchemasReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // CATALOG_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.catalogName = iprot.readString();
-              struct.setCatalogNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // SCHEMA_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.schemaName = iprot.readString();
-              struct.setSchemaNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetSchemasReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.catalogName != null) {
-        if (struct.isSetCatalogName()) {
-          oprot.writeFieldBegin(CATALOG_NAME_FIELD_DESC);
-          oprot.writeString(struct.catalogName);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.schemaName != null) {
-        if (struct.isSetSchemaName()) {
-          oprot.writeFieldBegin(SCHEMA_NAME_FIELD_DESC);
-          oprot.writeString(struct.schemaName);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetSchemasReqTupleSchemeFactory implements SchemeFactory {
-    public TGetSchemasReqTupleScheme getScheme() {
-      return new TGetSchemasReqTupleScheme();
-    }
-  }
-
-  private static class TGetSchemasReqTupleScheme extends TupleScheme<TGetSchemasReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetSchemasReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetCatalogName()) {
-        optionals.set(0);
-      }
-      if (struct.isSetSchemaName()) {
-        optionals.set(1);
-      }
-      oprot.writeBitSet(optionals, 2);
-      if (struct.isSetCatalogName()) {
-        oprot.writeString(struct.catalogName);
-      }
-      if (struct.isSetSchemaName()) {
-        oprot.writeString(struct.schemaName);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetSchemasReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-      BitSet incoming = iprot.readBitSet(2);
-      if (incoming.get(0)) {
-        struct.catalogName = iprot.readString();
-        struct.setCatalogNameIsSet(true);
-      }
-      if (incoming.get(1)) {
-        struct.schemaName = iprot.readString();
-        struct.setSchemaNameIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasResp.java
deleted file mode 100644
index e5317f7ff5046..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetSchemasResp.java
+++ /dev/null
@@ -1,509 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TGetSchemasResp implements org.apache.thrift.TBase<TGetSchemasResp, TGetSchemasResp._Fields>, java.io.Serializable, Cloneable, Comparable<TGetSchemasResp> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetSchemasResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetSchemasRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetSchemasRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private TOperationHandle operationHandle; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    OPERATION_HANDLE((short)2, "operationHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final _Fields optionals[] = {_Fields.OPERATION_HANDLE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetSchemasResp.class, metaDataMap);
-  }
-
-  public TGetSchemasResp() {
-  }
-
-  public TGetSchemasResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetSchemasResp(TGetSchemasResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-  }
-
-  public TGetSchemasResp deepCopy() {
-    return new TGetSchemasResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.operationHandle = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetSchemasResp)
-      return this.equals((TGetSchemasResp)that);
-    return false;
-  }
-
-  public boolean equals(TGetSchemasResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_status = true && (isSetStatus());
-    list.add(present_status);
-    if (present_status)
-      list.add(status);
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    list.add(present_operationHandle);
-    if (present_operationHandle)
-      list.add(operationHandle);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TGetSchemasResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetSchemasResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (isSetOperationHandle()) {
-      if (!first) sb.append(", ");
-      sb.append("operationHandle:");
-      if (this.operationHandle == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.operationHandle);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetSchemasRespStandardSchemeFactory implements SchemeFactory {
-    public TGetSchemasRespStandardScheme getScheme() {
-      return new TGetSchemasRespStandardScheme();
-    }
-  }
-
-  private static class TGetSchemasRespStandardScheme extends StandardScheme<TGetSchemasResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetSchemasResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetSchemasResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.operationHandle != null) {
-        if (struct.isSetOperationHandle()) {
-          oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-          struct.operationHandle.write(oprot);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetSchemasRespTupleSchemeFactory implements SchemeFactory {
-    public TGetSchemasRespTupleScheme getScheme() {
-      return new TGetSchemasRespTupleScheme();
-    }
-  }
-
-  private static class TGetSchemasRespTupleScheme extends TupleScheme<TGetSchemasResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetSchemasResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetOperationHandle()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetOperationHandle()) {
-        struct.operationHandle.write(oprot);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetSchemasResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.operationHandle = new TOperationHandle();
-        struct.operationHandle.read(iprot);
-        struct.setOperationHandleIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesReq.java
deleted file mode 100644
index c027748a336e6..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesReq.java
+++ /dev/null
@@ -1,394 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TGetTableTypesReq implements org.apache.thrift.TBase<TGetTableTypesReq, TGetTableTypesReq._Fields>, java.io.Serializable, Cloneable, Comparable<TGetTableTypesReq> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTableTypesReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetTableTypesReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetTableTypesReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTableTypesReq.class, metaDataMap);
-  }
-
-  public TGetTableTypesReq() {
-  }
-
-  public TGetTableTypesReq(
-    TSessionHandle sessionHandle)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetTableTypesReq(TGetTableTypesReq other) {
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-  }
-
-  public TGetTableTypesReq deepCopy() {
-    return new TGetTableTypesReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetTableTypesReq)
-      return this.equals((TGetTableTypesReq)that);
-    return false;
-  }
-
-  public boolean equals(TGetTableTypesReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    list.add(present_sessionHandle);
-    if (present_sessionHandle)
-      list.add(sessionHandle);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TGetTableTypesReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetTableTypesReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetTableTypesReqStandardSchemeFactory implements SchemeFactory {
-    public TGetTableTypesReqStandardScheme getScheme() {
-      return new TGetTableTypesReqStandardScheme();
-    }
-  }
-
-  private static class TGetTableTypesReqStandardScheme extends StandardScheme<TGetTableTypesReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTableTypesReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTableTypesReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetTableTypesReqTupleSchemeFactory implements SchemeFactory {
-    public TGetTableTypesReqTupleScheme getScheme() {
-      return new TGetTableTypesReqTupleScheme();
-    }
-  }
-
-  private static class TGetTableTypesReqTupleScheme extends TupleScheme<TGetTableTypesReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetTableTypesReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetTableTypesReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesResp.java
deleted file mode 100644
index c6ce0d4368fdd..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTableTypesResp.java
+++ /dev/null
@@ -1,509 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TGetTableTypesResp implements org.apache.thrift.TBase<TGetTableTypesResp, TGetTableTypesResp._Fields>, java.io.Serializable, Cloneable, Comparable<TGetTableTypesResp> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTableTypesResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetTableTypesRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetTableTypesRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private TOperationHandle operationHandle; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    OPERATION_HANDLE((short)2, "operationHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final _Fields optionals[] = {_Fields.OPERATION_HANDLE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTableTypesResp.class, metaDataMap);
-  }
-
-  public TGetTableTypesResp() {
-  }
-
-  public TGetTableTypesResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetTableTypesResp(TGetTableTypesResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-  }
-
-  public TGetTableTypesResp deepCopy() {
-    return new TGetTableTypesResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.operationHandle = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetTableTypesResp)
-      return this.equals((TGetTableTypesResp)that);
-    return false;
-  }
-
-  public boolean equals(TGetTableTypesResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_status = true && (isSetStatus());
-    list.add(present_status);
-    if (present_status)
-      list.add(status);
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    list.add(present_operationHandle);
-    if (present_operationHandle)
-      list.add(operationHandle);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TGetTableTypesResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetTableTypesResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (isSetOperationHandle()) {
-      if (!first) sb.append(", ");
-      sb.append("operationHandle:");
-      if (this.operationHandle == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.operationHandle);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetTableTypesRespStandardSchemeFactory implements SchemeFactory {
-    public TGetTableTypesRespStandardScheme getScheme() {
-      return new TGetTableTypesRespStandardScheme();
-    }
-  }
-
-  private static class TGetTableTypesRespStandardScheme extends StandardScheme<TGetTableTypesResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTableTypesResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTableTypesResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.operationHandle != null) {
-        if (struct.isSetOperationHandle()) {
-          oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-          struct.operationHandle.write(oprot);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetTableTypesRespTupleSchemeFactory implements SchemeFactory {
-    public TGetTableTypesRespTupleScheme getScheme() {
-      return new TGetTableTypesRespTupleScheme();
-    }
-  }
-
-  private static class TGetTableTypesRespTupleScheme extends TupleScheme<TGetTableTypesResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetTableTypesResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetOperationHandle()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetOperationHandle()) {
-        struct.operationHandle.write(oprot);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetTableTypesResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.operationHandle = new TOperationHandle();
-        struct.operationHandle.read(iprot);
-        struct.setOperationHandleIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesReq.java
deleted file mode 100644
index 1aa3f946727b6..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesReq.java
+++ /dev/null
@@ -1,871 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TGetTablesReq implements org.apache.thrift.TBase<TGetTablesReq, TGetTablesReq._Fields>, java.io.Serializable, Cloneable, Comparable<TGetTablesReq> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTablesReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField CATALOG_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("catalogName", org.apache.thrift.protocol.TType.STRING, (short)2);
-  private static final org.apache.thrift.protocol.TField SCHEMA_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("schemaName", org.apache.thrift.protocol.TType.STRING, (short)3);
-  private static final org.apache.thrift.protocol.TField TABLE_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("tableName", org.apache.thrift.protocol.TType.STRING, (short)4);
-  private static final org.apache.thrift.protocol.TField TABLE_TYPES_FIELD_DESC = new org.apache.thrift.protocol.TField("tableTypes", org.apache.thrift.protocol.TType.LIST, (short)5);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetTablesReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetTablesReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-  private String catalogName; // optional
-  private String schemaName; // optional
-  private String tableName; // optional
-  private List<String> tableTypes; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle"),
-    CATALOG_NAME((short)2, "catalogName"),
-    SCHEMA_NAME((short)3, "schemaName"),
-    TABLE_NAME((short)4, "tableName"),
-    TABLE_TYPES((short)5, "tableTypes");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        case 2: // CATALOG_NAME
-          return CATALOG_NAME;
-        case 3: // SCHEMA_NAME
-          return SCHEMA_NAME;
-        case 4: // TABLE_NAME
-          return TABLE_NAME;
-        case 5: // TABLE_TYPES
-          return TABLE_TYPES;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final _Fields optionals[] = {_Fields.CATALOG_NAME,_Fields.SCHEMA_NAME,_Fields.TABLE_NAME,_Fields.TABLE_TYPES};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    tmpMap.put(_Fields.CATALOG_NAME, new org.apache.thrift.meta_data.FieldMetaData("catalogName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TPatternOrIdentifier")));
-    tmpMap.put(_Fields.SCHEMA_NAME, new org.apache.thrift.meta_data.FieldMetaData("schemaName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TPatternOrIdentifier")));
-    tmpMap.put(_Fields.TABLE_NAME, new org.apache.thrift.meta_data.FieldMetaData("tableName", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , "TPatternOrIdentifier")));
-    tmpMap.put(_Fields.TABLE_TYPES, new org.apache.thrift.meta_data.FieldMetaData("tableTypes", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTablesReq.class, metaDataMap);
-  }
-
-  public TGetTablesReq() {
-  }
-
-  public TGetTablesReq(
-    TSessionHandle sessionHandle)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetTablesReq(TGetTablesReq other) {
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-    if (other.isSetCatalogName()) {
-      this.catalogName = other.catalogName;
-    }
-    if (other.isSetSchemaName()) {
-      this.schemaName = other.schemaName;
-    }
-    if (other.isSetTableName()) {
-      this.tableName = other.tableName;
-    }
-    if (other.isSetTableTypes()) {
-      List<String> __this__tableTypes = new ArrayList<String>(other.tableTypes);
-      this.tableTypes = __this__tableTypes;
-    }
-  }
-
-  public TGetTablesReq deepCopy() {
-    return new TGetTablesReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-    this.catalogName = null;
-    this.schemaName = null;
-    this.tableName = null;
-    this.tableTypes = null;
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public String getCatalogName() {
-    return this.catalogName;
-  }
-
-  public void setCatalogName(String catalogName) {
-    this.catalogName = catalogName;
-  }
-
-  public void unsetCatalogName() {
-    this.catalogName = null;
-  }
-
-  /** Returns true if field catalogName is set (has been assigned a value) and false otherwise */
-  public boolean isSetCatalogName() {
-    return this.catalogName != null;
-  }
-
-  public void setCatalogNameIsSet(boolean value) {
-    if (!value) {
-      this.catalogName = null;
-    }
-  }
-
-  public String getSchemaName() {
-    return this.schemaName;
-  }
-
-  public void setSchemaName(String schemaName) {
-    this.schemaName = schemaName;
-  }
-
-  public void unsetSchemaName() {
-    this.schemaName = null;
-  }
-
-  /** Returns true if field schemaName is set (has been assigned a value) and false otherwise */
-  public boolean isSetSchemaName() {
-    return this.schemaName != null;
-  }
-
-  public void setSchemaNameIsSet(boolean value) {
-    if (!value) {
-      this.schemaName = null;
-    }
-  }
-
-  public String getTableName() {
-    return this.tableName;
-  }
-
-  public void setTableName(String tableName) {
-    this.tableName = tableName;
-  }
-
-  public void unsetTableName() {
-    this.tableName = null;
-  }
-
-  /** Returns true if field tableName is set (has been assigned a value) and false otherwise */
-  public boolean isSetTableName() {
-    return this.tableName != null;
-  }
-
-  public void setTableNameIsSet(boolean value) {
-    if (!value) {
-      this.tableName = null;
-    }
-  }
-
-  public int getTableTypesSize() {
-    return (this.tableTypes == null) ? 0 : this.tableTypes.size();
-  }
-
-  public java.util.Iterator<String> getTableTypesIterator() {
-    return (this.tableTypes == null) ? null : this.tableTypes.iterator();
-  }
-
-  public void addToTableTypes(String elem) {
-    if (this.tableTypes == null) {
-      this.tableTypes = new ArrayList<String>();
-    }
-    this.tableTypes.add(elem);
-  }
-
-  public List<String> getTableTypes() {
-    return this.tableTypes;
-  }
-
-  public void setTableTypes(List<String> tableTypes) {
-    this.tableTypes = tableTypes;
-  }
-
-  public void unsetTableTypes() {
-    this.tableTypes = null;
-  }
-
-  /** Returns true if field tableTypes is set (has been assigned a value) and false otherwise */
-  public boolean isSetTableTypes() {
-    return this.tableTypes != null;
-  }
-
-  public void setTableTypesIsSet(boolean value) {
-    if (!value) {
-      this.tableTypes = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    case CATALOG_NAME:
-      if (value == null) {
-        unsetCatalogName();
-      } else {
-        setCatalogName((String)value);
-      }
-      break;
-
-    case SCHEMA_NAME:
-      if (value == null) {
-        unsetSchemaName();
-      } else {
-        setSchemaName((String)value);
-      }
-      break;
-
-    case TABLE_NAME:
-      if (value == null) {
-        unsetTableName();
-      } else {
-        setTableName((String)value);
-      }
-      break;
-
-    case TABLE_TYPES:
-      if (value == null) {
-        unsetTableTypes();
-      } else {
-        setTableTypes((List<String>)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    case CATALOG_NAME:
-      return getCatalogName();
-
-    case SCHEMA_NAME:
-      return getSchemaName();
-
-    case TABLE_NAME:
-      return getTableName();
-
-    case TABLE_TYPES:
-      return getTableTypes();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    case CATALOG_NAME:
-      return isSetCatalogName();
-    case SCHEMA_NAME:
-      return isSetSchemaName();
-    case TABLE_NAME:
-      return isSetTableName();
-    case TABLE_TYPES:
-      return isSetTableTypes();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetTablesReq)
-      return this.equals((TGetTablesReq)that);
-    return false;
-  }
-
-  public boolean equals(TGetTablesReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    boolean this_present_catalogName = true && this.isSetCatalogName();
-    boolean that_present_catalogName = true && that.isSetCatalogName();
-    if (this_present_catalogName || that_present_catalogName) {
-      if (!(this_present_catalogName && that_present_catalogName))
-        return false;
-      if (!this.catalogName.equals(that.catalogName))
-        return false;
-    }
-
-    boolean this_present_schemaName = true && this.isSetSchemaName();
-    boolean that_present_schemaName = true && that.isSetSchemaName();
-    if (this_present_schemaName || that_present_schemaName) {
-      if (!(this_present_schemaName && that_present_schemaName))
-        return false;
-      if (!this.schemaName.equals(that.schemaName))
-        return false;
-    }
-
-    boolean this_present_tableName = true && this.isSetTableName();
-    boolean that_present_tableName = true && that.isSetTableName();
-    if (this_present_tableName || that_present_tableName) {
-      if (!(this_present_tableName && that_present_tableName))
-        return false;
-      if (!this.tableName.equals(that.tableName))
-        return false;
-    }
-
-    boolean this_present_tableTypes = true && this.isSetTableTypes();
-    boolean that_present_tableTypes = true && that.isSetTableTypes();
-    if (this_present_tableTypes || that_present_tableTypes) {
-      if (!(this_present_tableTypes && that_present_tableTypes))
-        return false;
-      if (!this.tableTypes.equals(that.tableTypes))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    list.add(present_sessionHandle);
-    if (present_sessionHandle)
-      list.add(sessionHandle);
-
-    boolean present_catalogName = true && (isSetCatalogName());
-    list.add(present_catalogName);
-    if (present_catalogName)
-      list.add(catalogName);
-
-    boolean present_schemaName = true && (isSetSchemaName());
-    list.add(present_schemaName);
-    if (present_schemaName)
-      list.add(schemaName);
-
-    boolean present_tableName = true && (isSetTableName());
-    list.add(present_tableName);
-    if (present_tableName)
-      list.add(tableName);
-
-    boolean present_tableTypes = true && (isSetTableTypes());
-    list.add(present_tableTypes);
-    if (present_tableTypes)
-      list.add(tableTypes);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TGetTablesReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetCatalogName()).compareTo(other.isSetCatalogName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetCatalogName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.catalogName, other.catalogName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetSchemaName()).compareTo(other.isSetSchemaName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSchemaName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.schemaName, other.schemaName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetTableName()).compareTo(other.isSetTableName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetTableName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.tableName, other.tableName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetTableTypes()).compareTo(other.isSetTableTypes());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetTableTypes()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.tableTypes, other.tableTypes);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetTablesReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    if (isSetCatalogName()) {
-      if (!first) sb.append(", ");
-      sb.append("catalogName:");
-      if (this.catalogName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.catalogName);
-      }
-      first = false;
-    }
-    if (isSetSchemaName()) {
-      if (!first) sb.append(", ");
-      sb.append("schemaName:");
-      if (this.schemaName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.schemaName);
-      }
-      first = false;
-    }
-    if (isSetTableName()) {
-      if (!first) sb.append(", ");
-      sb.append("tableName:");
-      if (this.tableName == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.tableName);
-      }
-      first = false;
-    }
-    if (isSetTableTypes()) {
-      if (!first) sb.append(", ");
-      sb.append("tableTypes:");
-      if (this.tableTypes == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.tableTypes);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetTablesReqStandardSchemeFactory implements SchemeFactory {
-    public TGetTablesReqStandardScheme getScheme() {
-      return new TGetTablesReqStandardScheme();
-    }
-  }
-
-  private static class TGetTablesReqStandardScheme extends StandardScheme<TGetTablesReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTablesReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // CATALOG_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.catalogName = iprot.readString();
-              struct.setCatalogNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // SCHEMA_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.schemaName = iprot.readString();
-              struct.setSchemaNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 4: // TABLE_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.tableName = iprot.readString();
-              struct.setTableNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 5: // TABLE_TYPES
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list172 = iprot.readListBegin();
-                struct.tableTypes = new ArrayList<String>(_list172.size);
-                String _elem173;
-                for (int _i174 = 0; _i174 < _list172.size; ++_i174)
-                {
-                  _elem173 = iprot.readString();
-                  struct.tableTypes.add(_elem173);
-                }
-                iprot.readListEnd();
-              }
-              struct.setTableTypesIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTablesReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.catalogName != null) {
-        if (struct.isSetCatalogName()) {
-          oprot.writeFieldBegin(CATALOG_NAME_FIELD_DESC);
-          oprot.writeString(struct.catalogName);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.schemaName != null) {
-        if (struct.isSetSchemaName()) {
-          oprot.writeFieldBegin(SCHEMA_NAME_FIELD_DESC);
-          oprot.writeString(struct.schemaName);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.tableName != null) {
-        if (struct.isSetTableName()) {
-          oprot.writeFieldBegin(TABLE_NAME_FIELD_DESC);
-          oprot.writeString(struct.tableName);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.tableTypes != null) {
-        if (struct.isSetTableTypes()) {
-          oprot.writeFieldBegin(TABLE_TYPES_FIELD_DESC);
-          {
-            oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, struct.tableTypes.size()));
-            for (String _iter175 : struct.tableTypes)
-            {
-              oprot.writeString(_iter175);
-            }
-            oprot.writeListEnd();
-          }
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetTablesReqTupleSchemeFactory implements SchemeFactory {
-    public TGetTablesReqTupleScheme getScheme() {
-      return new TGetTablesReqTupleScheme();
-    }
-  }
-
-  private static class TGetTablesReqTupleScheme extends TupleScheme<TGetTablesReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetTablesReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetCatalogName()) {
-        optionals.set(0);
-      }
-      if (struct.isSetSchemaName()) {
-        optionals.set(1);
-      }
-      if (struct.isSetTableName()) {
-        optionals.set(2);
-      }
-      if (struct.isSetTableTypes()) {
-        optionals.set(3);
-      }
-      oprot.writeBitSet(optionals, 4);
-      if (struct.isSetCatalogName()) {
-        oprot.writeString(struct.catalogName);
-      }
-      if (struct.isSetSchemaName()) {
-        oprot.writeString(struct.schemaName);
-      }
-      if (struct.isSetTableName()) {
-        oprot.writeString(struct.tableName);
-      }
-      if (struct.isSetTableTypes()) {
-        {
-          oprot.writeI32(struct.tableTypes.size());
-          for (String _iter176 : struct.tableTypes)
-          {
-            oprot.writeString(_iter176);
-          }
-        }
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetTablesReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-      BitSet incoming = iprot.readBitSet(4);
-      if (incoming.get(0)) {
-        struct.catalogName = iprot.readString();
-        struct.setCatalogNameIsSet(true);
-      }
-      if (incoming.get(1)) {
-        struct.schemaName = iprot.readString();
-        struct.setSchemaNameIsSet(true);
-      }
-      if (incoming.get(2)) {
-        struct.tableName = iprot.readString();
-        struct.setTableNameIsSet(true);
-      }
-      if (incoming.get(3)) {
-        {
-          org.apache.thrift.protocol.TList _list177 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32());
-          struct.tableTypes = new ArrayList<String>(_list177.size);
-          String _elem178;
-          for (int _i179 = 0; _i179 < _list177.size; ++_i179)
-          {
-            _elem178 = iprot.readString();
-            struct.tableTypes.add(_elem178);
-          }
-        }
-        struct.setTableTypesIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesResp.java
deleted file mode 100644
index 0b7c3825d35a5..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTablesResp.java
+++ /dev/null
@@ -1,509 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TGetTablesResp implements org.apache.thrift.TBase<TGetTablesResp, TGetTablesResp._Fields>, java.io.Serializable, Cloneable, Comparable<TGetTablesResp> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTablesResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetTablesRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetTablesRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private TOperationHandle operationHandle; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    OPERATION_HANDLE((short)2, "operationHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final _Fields optionals[] = {_Fields.OPERATION_HANDLE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTablesResp.class, metaDataMap);
-  }
-
-  public TGetTablesResp() {
-  }
-
-  public TGetTablesResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetTablesResp(TGetTablesResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-  }
-
-  public TGetTablesResp deepCopy() {
-    return new TGetTablesResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.operationHandle = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetTablesResp)
-      return this.equals((TGetTablesResp)that);
-    return false;
-  }
-
-  public boolean equals(TGetTablesResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_status = true && (isSetStatus());
-    list.add(present_status);
-    if (present_status)
-      list.add(status);
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    list.add(present_operationHandle);
-    if (present_operationHandle)
-      list.add(operationHandle);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TGetTablesResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetTablesResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (isSetOperationHandle()) {
-      if (!first) sb.append(", ");
-      sb.append("operationHandle:");
-      if (this.operationHandle == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.operationHandle);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetTablesRespStandardSchemeFactory implements SchemeFactory {
-    public TGetTablesRespStandardScheme getScheme() {
-      return new TGetTablesRespStandardScheme();
-    }
-  }
-
-  private static class TGetTablesRespStandardScheme extends StandardScheme<TGetTablesResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTablesResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTablesResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.operationHandle != null) {
-        if (struct.isSetOperationHandle()) {
-          oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-          struct.operationHandle.write(oprot);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetTablesRespTupleSchemeFactory implements SchemeFactory {
-    public TGetTablesRespTupleScheme getScheme() {
-      return new TGetTablesRespTupleScheme();
-    }
-  }
-
-  private static class TGetTablesRespTupleScheme extends TupleScheme<TGetTablesResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetTablesResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetOperationHandle()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetOperationHandle()) {
-        struct.operationHandle.write(oprot);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetTablesResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.operationHandle = new TOperationHandle();
-        struct.operationHandle.read(iprot);
-        struct.setOperationHandleIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoReq.java
deleted file mode 100644
index 2e0ec60e4bc3d..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoReq.java
+++ /dev/null
@@ -1,394 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TGetTypeInfoReq implements org.apache.thrift.TBase<TGetTypeInfoReq, TGetTypeInfoReq._Fields>, java.io.Serializable, Cloneable, Comparable<TGetTypeInfoReq> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTypeInfoReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetTypeInfoReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetTypeInfoReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTypeInfoReq.class, metaDataMap);
-  }
-
-  public TGetTypeInfoReq() {
-  }
-
-  public TGetTypeInfoReq(
-    TSessionHandle sessionHandle)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetTypeInfoReq(TGetTypeInfoReq other) {
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-  }
-
-  public TGetTypeInfoReq deepCopy() {
-    return new TGetTypeInfoReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetTypeInfoReq)
-      return this.equals((TGetTypeInfoReq)that);
-    return false;
-  }
-
-  public boolean equals(TGetTypeInfoReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    list.add(present_sessionHandle);
-    if (present_sessionHandle)
-      list.add(sessionHandle);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TGetTypeInfoReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetTypeInfoReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetTypeInfoReqStandardSchemeFactory implements SchemeFactory {
-    public TGetTypeInfoReqStandardScheme getScheme() {
-      return new TGetTypeInfoReqStandardScheme();
-    }
-  }
-
-  private static class TGetTypeInfoReqStandardScheme extends StandardScheme<TGetTypeInfoReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTypeInfoReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTypeInfoReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetTypeInfoReqTupleSchemeFactory implements SchemeFactory {
-    public TGetTypeInfoReqTupleScheme getScheme() {
-      return new TGetTypeInfoReqTupleScheme();
-    }
-  }
-
-  private static class TGetTypeInfoReqTupleScheme extends TupleScheme<TGetTypeInfoReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetTypeInfoReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetTypeInfoReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoResp.java
deleted file mode 100644
index cc2910ef29feb..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TGetTypeInfoResp.java
+++ /dev/null
@@ -1,509 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TGetTypeInfoResp implements org.apache.thrift.TBase<TGetTypeInfoResp, TGetTypeInfoResp._Fields>, java.io.Serializable, Cloneable, Comparable<TGetTypeInfoResp> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TGetTypeInfoResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField OPERATION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationHandle", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TGetTypeInfoRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TGetTypeInfoRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private TOperationHandle operationHandle; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    OPERATION_HANDLE((short)2, "operationHandle");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // OPERATION_HANDLE
-          return OPERATION_HANDLE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final _Fields optionals[] = {_Fields.OPERATION_HANDLE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.OPERATION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("operationHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TOperationHandle.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TGetTypeInfoResp.class, metaDataMap);
-  }
-
-  public TGetTypeInfoResp() {
-  }
-
-  public TGetTypeInfoResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TGetTypeInfoResp(TGetTypeInfoResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetOperationHandle()) {
-      this.operationHandle = new TOperationHandle(other.operationHandle);
-    }
-  }
-
-  public TGetTypeInfoResp deepCopy() {
-    return new TGetTypeInfoResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.operationHandle = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public TOperationHandle getOperationHandle() {
-    return this.operationHandle;
-  }
-
-  public void setOperationHandle(TOperationHandle operationHandle) {
-    this.operationHandle = operationHandle;
-  }
-
-  public void unsetOperationHandle() {
-    this.operationHandle = null;
-  }
-
-  /** Returns true if field operationHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationHandle() {
-    return this.operationHandle != null;
-  }
-
-  public void setOperationHandleIsSet(boolean value) {
-    if (!value) {
-      this.operationHandle = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case OPERATION_HANDLE:
-      if (value == null) {
-        unsetOperationHandle();
-      } else {
-        setOperationHandle((TOperationHandle)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case OPERATION_HANDLE:
-      return getOperationHandle();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case OPERATION_HANDLE:
-      return isSetOperationHandle();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TGetTypeInfoResp)
-      return this.equals((TGetTypeInfoResp)that);
-    return false;
-  }
-
-  public boolean equals(TGetTypeInfoResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_operationHandle = true && this.isSetOperationHandle();
-    boolean that_present_operationHandle = true && that.isSetOperationHandle();
-    if (this_present_operationHandle || that_present_operationHandle) {
-      if (!(this_present_operationHandle && that_present_operationHandle))
-        return false;
-      if (!this.operationHandle.equals(that.operationHandle))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_status = true && (isSetStatus());
-    list.add(present_status);
-    if (present_status)
-      list.add(status);
-
-    boolean present_operationHandle = true && (isSetOperationHandle());
-    list.add(present_operationHandle);
-    if (present_operationHandle)
-      list.add(operationHandle);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TGetTypeInfoResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOperationHandle()).compareTo(other.isSetOperationHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationHandle, other.operationHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TGetTypeInfoResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (isSetOperationHandle()) {
-      if (!first) sb.append(", ");
-      sb.append("operationHandle:");
-      if (this.operationHandle == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.operationHandle);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-    if (operationHandle != null) {
-      operationHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TGetTypeInfoRespStandardSchemeFactory implements SchemeFactory {
-    public TGetTypeInfoRespStandardScheme getScheme() {
-      return new TGetTypeInfoRespStandardScheme();
-    }
-  }
-
-  private static class TGetTypeInfoRespStandardScheme extends StandardScheme<TGetTypeInfoResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TGetTypeInfoResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // OPERATION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationHandle = new TOperationHandle();
-              struct.operationHandle.read(iprot);
-              struct.setOperationHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TGetTypeInfoResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.operationHandle != null) {
-        if (struct.isSetOperationHandle()) {
-          oprot.writeFieldBegin(OPERATION_HANDLE_FIELD_DESC);
-          struct.operationHandle.write(oprot);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TGetTypeInfoRespTupleSchemeFactory implements SchemeFactory {
-    public TGetTypeInfoRespTupleScheme getScheme() {
-      return new TGetTypeInfoRespTupleScheme();
-    }
-  }
-
-  private static class TGetTypeInfoRespTupleScheme extends TupleScheme<TGetTypeInfoResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TGetTypeInfoResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      BitSet optionals = new BitSet();
-      if (struct.isSetOperationHandle()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetOperationHandle()) {
-        struct.operationHandle.write(oprot);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TGetTypeInfoResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.operationHandle = new TOperationHandle();
-        struct.operationHandle.read(iprot);
-        struct.setOperationHandleIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/THandleIdentifier.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/THandleIdentifier.java
deleted file mode 100644
index a3879d830000b..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/THandleIdentifier.java
+++ /dev/null
@@ -1,508 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class THandleIdentifier implements org.apache.thrift.TBase<THandleIdentifier, THandleIdentifier._Fields>, java.io.Serializable, Cloneable, Comparable<THandleIdentifier> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("THandleIdentifier");
-
-  private static final org.apache.thrift.protocol.TField GUID_FIELD_DESC = new org.apache.thrift.protocol.TField("guid", org.apache.thrift.protocol.TType.STRING, (short)1);
-  private static final org.apache.thrift.protocol.TField SECRET_FIELD_DESC = new org.apache.thrift.protocol.TField("secret", org.apache.thrift.protocol.TType.STRING, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new THandleIdentifierStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new THandleIdentifierTupleSchemeFactory());
-  }
-
-  private ByteBuffer guid; // required
-  private ByteBuffer secret; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    GUID((short)1, "guid"),
-    SECRET((short)2, "secret");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // GUID
-          return GUID;
-        case 2: // SECRET
-          return SECRET;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.GUID, new org.apache.thrift.meta_data.FieldMetaData("guid", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
-    tmpMap.put(_Fields.SECRET, new org.apache.thrift.meta_data.FieldMetaData("secret", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(THandleIdentifier.class, metaDataMap);
-  }
-
-  public THandleIdentifier() {
-  }
-
-  public THandleIdentifier(
-    ByteBuffer guid,
-    ByteBuffer secret)
-  {
-    this();
-    this.guid = org.apache.thrift.TBaseHelper.copyBinary(guid);
-    this.secret = org.apache.thrift.TBaseHelper.copyBinary(secret);
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public THandleIdentifier(THandleIdentifier other) {
-    if (other.isSetGuid()) {
-      this.guid = org.apache.thrift.TBaseHelper.copyBinary(other.guid);
-    }
-    if (other.isSetSecret()) {
-      this.secret = org.apache.thrift.TBaseHelper.copyBinary(other.secret);
-    }
-  }
-
-  public THandleIdentifier deepCopy() {
-    return new THandleIdentifier(this);
-  }
-
-  @Override
-  public void clear() {
-    this.guid = null;
-    this.secret = null;
-  }
-
-  public byte[] getGuid() {
-    setGuid(org.apache.thrift.TBaseHelper.rightSize(guid));
-    return guid == null ? null : guid.array();
-  }
-
-  public ByteBuffer bufferForGuid() {
-    return org.apache.thrift.TBaseHelper.copyBinary(guid);
-  }
-
-  public void setGuid(byte[] guid) {
-    this.guid = guid == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(guid, guid.length));
-  }
-
-  public void setGuid(ByteBuffer guid) {
-    this.guid = org.apache.thrift.TBaseHelper.copyBinary(guid);
-  }
-
-  public void unsetGuid() {
-    this.guid = null;
-  }
-
-  /** Returns true if field guid is set (has been assigned a value) and false otherwise */
-  public boolean isSetGuid() {
-    return this.guid != null;
-  }
-
-  public void setGuidIsSet(boolean value) {
-    if (!value) {
-      this.guid = null;
-    }
-  }
-
-  public byte[] getSecret() {
-    setSecret(org.apache.thrift.TBaseHelper.rightSize(secret));
-    return secret == null ? null : secret.array();
-  }
-
-  public ByteBuffer bufferForSecret() {
-    return org.apache.thrift.TBaseHelper.copyBinary(secret);
-  }
-
-  public void setSecret(byte[] secret) {
-    this.secret = secret == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(secret, secret.length));
-  }
-
-  public void setSecret(ByteBuffer secret) {
-    this.secret = org.apache.thrift.TBaseHelper.copyBinary(secret);
-  }
-
-  public void unsetSecret() {
-    this.secret = null;
-  }
-
-  /** Returns true if field secret is set (has been assigned a value) and false otherwise */
-  public boolean isSetSecret() {
-    return this.secret != null;
-  }
-
-  public void setSecretIsSet(boolean value) {
-    if (!value) {
-      this.secret = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case GUID:
-      if (value == null) {
-        unsetGuid();
-      } else {
-        setGuid((ByteBuffer)value);
-      }
-      break;
-
-    case SECRET:
-      if (value == null) {
-        unsetSecret();
-      } else {
-        setSecret((ByteBuffer)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case GUID:
-      return getGuid();
-
-    case SECRET:
-      return getSecret();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case GUID:
-      return isSetGuid();
-    case SECRET:
-      return isSetSecret();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof THandleIdentifier)
-      return this.equals((THandleIdentifier)that);
-    return false;
-  }
-
-  public boolean equals(THandleIdentifier that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_guid = true && this.isSetGuid();
-    boolean that_present_guid = true && that.isSetGuid();
-    if (this_present_guid || that_present_guid) {
-      if (!(this_present_guid && that_present_guid))
-        return false;
-      if (!this.guid.equals(that.guid))
-        return false;
-    }
-
-    boolean this_present_secret = true && this.isSetSecret();
-    boolean that_present_secret = true && that.isSetSecret();
-    if (this_present_secret || that_present_secret) {
-      if (!(this_present_secret && that_present_secret))
-        return false;
-      if (!this.secret.equals(that.secret))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_guid = true && (isSetGuid());
-    list.add(present_guid);
-    if (present_guid)
-      list.add(guid);
-
-    boolean present_secret = true && (isSetSecret());
-    list.add(present_secret);
-    if (present_secret)
-      list.add(secret);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(THandleIdentifier other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetGuid()).compareTo(other.isSetGuid());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetGuid()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.guid, other.guid);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetSecret()).compareTo(other.isSetSecret());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSecret()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.secret, other.secret);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("THandleIdentifier(");
-    boolean first = true;
-
-    sb.append("guid:");
-    if (this.guid == null) {
-      sb.append("null");
-    } else {
-      org.apache.thrift.TBaseHelper.toString(this.guid, sb);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("secret:");
-    if (this.secret == null) {
-      sb.append("null");
-    } else {
-      org.apache.thrift.TBaseHelper.toString(this.secret, sb);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetGuid()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'guid' is unset! Struct:" + toString());
-    }
-
-    if (!isSetSecret()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'secret' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class THandleIdentifierStandardSchemeFactory implements SchemeFactory {
-    public THandleIdentifierStandardScheme getScheme() {
-      return new THandleIdentifierStandardScheme();
-    }
-  }
-
-  private static class THandleIdentifierStandardScheme extends StandardScheme<THandleIdentifier> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, THandleIdentifier struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // GUID
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.guid = iprot.readBinary();
-              struct.setGuidIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // SECRET
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.secret = iprot.readBinary();
-              struct.setSecretIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, THandleIdentifier struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.guid != null) {
-        oprot.writeFieldBegin(GUID_FIELD_DESC);
-        oprot.writeBinary(struct.guid);
-        oprot.writeFieldEnd();
-      }
-      if (struct.secret != null) {
-        oprot.writeFieldBegin(SECRET_FIELD_DESC);
-        oprot.writeBinary(struct.secret);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class THandleIdentifierTupleSchemeFactory implements SchemeFactory {
-    public THandleIdentifierTupleScheme getScheme() {
-      return new THandleIdentifierTupleScheme();
-    }
-  }
-
-  private static class THandleIdentifierTupleScheme extends TupleScheme<THandleIdentifier> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, THandleIdentifier struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      oprot.writeBinary(struct.guid);
-      oprot.writeBinary(struct.secret);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, THandleIdentifier struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.guid = iprot.readBinary();
-      struct.setGuidIsSet(true);
-      struct.secret = iprot.readBinary();
-      struct.setSecretIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Column.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Column.java
deleted file mode 100644
index 3c44b602b4ff7..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Column.java
+++ /dev/null
@@ -1,548 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TI16Column implements org.apache.thrift.TBase<TI16Column, TI16Column._Fields>, java.io.Serializable, Cloneable, Comparable<TI16Column> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI16Column");
-
-  private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1);
-  private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TI16ColumnStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TI16ColumnTupleSchemeFactory());
-  }
-
-  private List<Short> values; // required
-  private ByteBuffer nulls; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUES((short)1, "values"),
-    NULLS((short)2, "nulls");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUES
-          return VALUES;
-        case 2: // NULLS
-          return NULLS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I16))));
-    tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI16Column.class, metaDataMap);
-  }
-
-  public TI16Column() {
-  }
-
-  public TI16Column(
-    List<Short> values,
-    ByteBuffer nulls)
-  {
-    this();
-    this.values = values;
-    this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls);
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TI16Column(TI16Column other) {
-    if (other.isSetValues()) {
-      List<Short> __this__values = new ArrayList<Short>(other.values);
-      this.values = __this__values;
-    }
-    if (other.isSetNulls()) {
-      this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls);
-    }
-  }
-
-  public TI16Column deepCopy() {
-    return new TI16Column(this);
-  }
-
-  @Override
-  public void clear() {
-    this.values = null;
-    this.nulls = null;
-  }
-
-  public int getValuesSize() {
-    return (this.values == null) ? 0 : this.values.size();
-  }
-
-  public java.util.Iterator<Short> getValuesIterator() {
-    return (this.values == null) ? null : this.values.iterator();
-  }
-
-  public void addToValues(short elem) {
-    if (this.values == null) {
-      this.values = new ArrayList<Short>();
-    }
-    this.values.add(elem);
-  }
-
-  public List<Short> getValues() {
-    return this.values;
-  }
-
-  public void setValues(List<Short> values) {
-    this.values = values;
-  }
-
-  public void unsetValues() {
-    this.values = null;
-  }
-
-  /** Returns true if field values is set (has been assigned a value) and false otherwise */
-  public boolean isSetValues() {
-    return this.values != null;
-  }
-
-  public void setValuesIsSet(boolean value) {
-    if (!value) {
-      this.values = null;
-    }
-  }
-
-  public byte[] getNulls() {
-    setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls));
-    return nulls == null ? null : nulls.array();
-  }
-
-  public ByteBuffer bufferForNulls() {
-    return org.apache.thrift.TBaseHelper.copyBinary(nulls);
-  }
-
-  public void setNulls(byte[] nulls) {
-    this.nulls = nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(nulls, nulls.length));
-  }
-
-  public void setNulls(ByteBuffer nulls) {
-    this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls);
-  }
-
-  public void unsetNulls() {
-    this.nulls = null;
-  }
-
-  /** Returns true if field nulls is set (has been assigned a value) and false otherwise */
-  public boolean isSetNulls() {
-    return this.nulls != null;
-  }
-
-  public void setNullsIsSet(boolean value) {
-    if (!value) {
-      this.nulls = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUES:
-      if (value == null) {
-        unsetValues();
-      } else {
-        setValues((List<Short>)value);
-      }
-      break;
-
-    case NULLS:
-      if (value == null) {
-        unsetNulls();
-      } else {
-        setNulls((ByteBuffer)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUES:
-      return getValues();
-
-    case NULLS:
-      return getNulls();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUES:
-      return isSetValues();
-    case NULLS:
-      return isSetNulls();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TI16Column)
-      return this.equals((TI16Column)that);
-    return false;
-  }
-
-  public boolean equals(TI16Column that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_values = true && this.isSetValues();
-    boolean that_present_values = true && that.isSetValues();
-    if (this_present_values || that_present_values) {
-      if (!(this_present_values && that_present_values))
-        return false;
-      if (!this.values.equals(that.values))
-        return false;
-    }
-
-    boolean this_present_nulls = true && this.isSetNulls();
-    boolean that_present_nulls = true && that.isSetNulls();
-    if (this_present_nulls || that_present_nulls) {
-      if (!(this_present_nulls && that_present_nulls))
-        return false;
-      if (!this.nulls.equals(that.nulls))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_values = true && (isSetValues());
-    list.add(present_values);
-    if (present_values)
-      list.add(values);
-
-    boolean present_nulls = true && (isSetNulls());
-    list.add(present_nulls);
-    if (present_nulls)
-      list.add(nulls);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TI16Column other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetValues()).compareTo(other.isSetValues());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValues()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, other.values);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetNulls()).compareTo(other.isSetNulls());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetNulls()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, other.nulls);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TI16Column(");
-    boolean first = true;
-
-    sb.append("values:");
-    if (this.values == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.values);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("nulls:");
-    if (this.nulls == null) {
-      sb.append("null");
-    } else {
-      org.apache.thrift.TBaseHelper.toString(this.nulls, sb);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetValues()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString());
-    }
-
-    if (!isSetNulls()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TI16ColumnStandardSchemeFactory implements SchemeFactory {
-    public TI16ColumnStandardScheme getScheme() {
-      return new TI16ColumnStandardScheme();
-    }
-  }
-
-  private static class TI16ColumnStandardScheme extends StandardScheme<TI16Column> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TI16Column struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUES
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list70 = iprot.readListBegin();
-                struct.values = new ArrayList<Short>(_list70.size);
-                short _elem71;
-                for (int _i72 = 0; _i72 < _list70.size; ++_i72)
-                {
-                  _elem71 = iprot.readI16();
-                  struct.values.add(_elem71);
-                }
-                iprot.readListEnd();
-              }
-              struct.setValuesIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // NULLS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.nulls = iprot.readBinary();
-              struct.setNullsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TI16Column struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.values != null) {
-        oprot.writeFieldBegin(VALUES_FIELD_DESC);
-        {
-          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I16, struct.values.size()));
-          for (short _iter73 : struct.values)
-          {
-            oprot.writeI16(_iter73);
-          }
-          oprot.writeListEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      if (struct.nulls != null) {
-        oprot.writeFieldBegin(NULLS_FIELD_DESC);
-        oprot.writeBinary(struct.nulls);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TI16ColumnTupleSchemeFactory implements SchemeFactory {
-    public TI16ColumnTupleScheme getScheme() {
-      return new TI16ColumnTupleScheme();
-    }
-  }
-
-  private static class TI16ColumnTupleScheme extends TupleScheme<TI16Column> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TI16Column struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.values.size());
-        for (short _iter74 : struct.values)
-        {
-          oprot.writeI16(_iter74);
-        }
-      }
-      oprot.writeBinary(struct.nulls);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TI16Column struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TList _list75 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I16, iprot.readI32());
-        struct.values = new ArrayList<Short>(_list75.size);
-        short _elem76;
-        for (int _i77 = 0; _i77 < _list75.size; ++_i77)
-        {
-          _elem76 = iprot.readI16();
-          struct.values.add(_elem76);
-        }
-      }
-      struct.setValuesIsSet(true);
-      struct.nulls = iprot.readBinary();
-      struct.setNullsIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Value.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Value.java
deleted file mode 100644
index 29fb4cb85201d..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI16Value.java
+++ /dev/null
@@ -1,390 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TI16Value implements org.apache.thrift.TBase<TI16Value, TI16Value._Fields>, java.io.Serializable, Cloneable, Comparable<TI16Value> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI16Value");
-
-  private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.I16, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TI16ValueStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TI16ValueTupleSchemeFactory());
-  }
-
-  private short value; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUE((short)1, "value");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUE
-          return VALUE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __VALUE_ISSET_ID = 0;
-  private byte __isset_bitfield = 0;
-  private static final _Fields optionals[] = {_Fields.VALUE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I16)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI16Value.class, metaDataMap);
-  }
-
-  public TI16Value() {
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TI16Value(TI16Value other) {
-    __isset_bitfield = other.__isset_bitfield;
-    this.value = other.value;
-  }
-
-  public TI16Value deepCopy() {
-    return new TI16Value(this);
-  }
-
-  @Override
-  public void clear() {
-    setValueIsSet(false);
-    this.value = 0;
-  }
-
-  public short getValue() {
-    return this.value;
-  }
-
-  public void setValue(short value) {
-    this.value = value;
-    setValueIsSet(true);
-  }
-
-  public void unsetValue() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID);
-  }
-
-  /** Returns true if field value is set (has been assigned a value) and false otherwise */
-  public boolean isSetValue() {
-    return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID);
-  }
-
-  public void setValueIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value);
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUE:
-      if (value == null) {
-        unsetValue();
-      } else {
-        setValue((Short)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUE:
-      return getValue();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUE:
-      return isSetValue();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TI16Value)
-      return this.equals((TI16Value)that);
-    return false;
-  }
-
-  public boolean equals(TI16Value that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_value = true && this.isSetValue();
-    boolean that_present_value = true && that.isSetValue();
-    if (this_present_value || that_present_value) {
-      if (!(this_present_value && that_present_value))
-        return false;
-      if (this.value != that.value)
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_value = true && (isSetValue());
-    list.add(present_value);
-    if (present_value)
-      list.add(value);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TI16Value other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetValue()).compareTo(other.isSetValue());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValue()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, other.value);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TI16Value(");
-    boolean first = true;
-
-    if (isSetValue()) {
-      sb.append("value:");
-      sb.append(this.value);
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TI16ValueStandardSchemeFactory implements SchemeFactory {
-    public TI16ValueStandardScheme getScheme() {
-      return new TI16ValueStandardScheme();
-    }
-  }
-
-  private static class TI16ValueStandardScheme extends StandardScheme<TI16Value> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TI16Value struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUE
-            if (schemeField.type == org.apache.thrift.protocol.TType.I16) {
-              struct.value = iprot.readI16();
-              struct.setValueIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TI16Value struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.isSetValue()) {
-        oprot.writeFieldBegin(VALUE_FIELD_DESC);
-        oprot.writeI16(struct.value);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TI16ValueTupleSchemeFactory implements SchemeFactory {
-    public TI16ValueTupleScheme getScheme() {
-      return new TI16ValueTupleScheme();
-    }
-  }
-
-  private static class TI16ValueTupleScheme extends TupleScheme<TI16Value> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TI16Value struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      BitSet optionals = new BitSet();
-      if (struct.isSetValue()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetValue()) {
-        oprot.writeI16(struct.value);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TI16Value struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.value = iprot.readI16();
-        struct.setValueIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Column.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Column.java
deleted file mode 100644
index 9834f1ce8f01b..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Column.java
+++ /dev/null
@@ -1,548 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TI32Column implements org.apache.thrift.TBase<TI32Column, TI32Column._Fields>, java.io.Serializable, Cloneable, Comparable<TI32Column> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI32Column");
-
-  private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1);
-  private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TI32ColumnStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TI32ColumnTupleSchemeFactory());
-  }
-
-  private List<Integer> values; // required
-  private ByteBuffer nulls; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUES((short)1, "values"),
-    NULLS((short)2, "nulls");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUES
-          return VALUES;
-        case 2: // NULLS
-          return NULLS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))));
-    tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI32Column.class, metaDataMap);
-  }
-
-  public TI32Column() {
-  }
-
-  public TI32Column(
-    List<Integer> values,
-    ByteBuffer nulls)
-  {
-    this();
-    this.values = values;
-    this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls);
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TI32Column(TI32Column other) {
-    if (other.isSetValues()) {
-      List<Integer> __this__values = new ArrayList<Integer>(other.values);
-      this.values = __this__values;
-    }
-    if (other.isSetNulls()) {
-      this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls);
-    }
-  }
-
-  public TI32Column deepCopy() {
-    return new TI32Column(this);
-  }
-
-  @Override
-  public void clear() {
-    this.values = null;
-    this.nulls = null;
-  }
-
-  public int getValuesSize() {
-    return (this.values == null) ? 0 : this.values.size();
-  }
-
-  public java.util.Iterator<Integer> getValuesIterator() {
-    return (this.values == null) ? null : this.values.iterator();
-  }
-
-  public void addToValues(int elem) {
-    if (this.values == null) {
-      this.values = new ArrayList<Integer>();
-    }
-    this.values.add(elem);
-  }
-
-  public List<Integer> getValues() {
-    return this.values;
-  }
-
-  public void setValues(List<Integer> values) {
-    this.values = values;
-  }
-
-  public void unsetValues() {
-    this.values = null;
-  }
-
-  /** Returns true if field values is set (has been assigned a value) and false otherwise */
-  public boolean isSetValues() {
-    return this.values != null;
-  }
-
-  public void setValuesIsSet(boolean value) {
-    if (!value) {
-      this.values = null;
-    }
-  }
-
-  public byte[] getNulls() {
-    setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls));
-    return nulls == null ? null : nulls.array();
-  }
-
-  public ByteBuffer bufferForNulls() {
-    return org.apache.thrift.TBaseHelper.copyBinary(nulls);
-  }
-
-  public void setNulls(byte[] nulls) {
-    this.nulls = nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(nulls, nulls.length));
-  }
-
-  public void setNulls(ByteBuffer nulls) {
-    this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls);
-  }
-
-  public void unsetNulls() {
-    this.nulls = null;
-  }
-
-  /** Returns true if field nulls is set (has been assigned a value) and false otherwise */
-  public boolean isSetNulls() {
-    return this.nulls != null;
-  }
-
-  public void setNullsIsSet(boolean value) {
-    if (!value) {
-      this.nulls = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUES:
-      if (value == null) {
-        unsetValues();
-      } else {
-        setValues((List<Integer>)value);
-      }
-      break;
-
-    case NULLS:
-      if (value == null) {
-        unsetNulls();
-      } else {
-        setNulls((ByteBuffer)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUES:
-      return getValues();
-
-    case NULLS:
-      return getNulls();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUES:
-      return isSetValues();
-    case NULLS:
-      return isSetNulls();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TI32Column)
-      return this.equals((TI32Column)that);
-    return false;
-  }
-
-  public boolean equals(TI32Column that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_values = true && this.isSetValues();
-    boolean that_present_values = true && that.isSetValues();
-    if (this_present_values || that_present_values) {
-      if (!(this_present_values && that_present_values))
-        return false;
-      if (!this.values.equals(that.values))
-        return false;
-    }
-
-    boolean this_present_nulls = true && this.isSetNulls();
-    boolean that_present_nulls = true && that.isSetNulls();
-    if (this_present_nulls || that_present_nulls) {
-      if (!(this_present_nulls && that_present_nulls))
-        return false;
-      if (!this.nulls.equals(that.nulls))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_values = true && (isSetValues());
-    list.add(present_values);
-    if (present_values)
-      list.add(values);
-
-    boolean present_nulls = true && (isSetNulls());
-    list.add(present_nulls);
-    if (present_nulls)
-      list.add(nulls);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TI32Column other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetValues()).compareTo(other.isSetValues());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValues()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, other.values);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetNulls()).compareTo(other.isSetNulls());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetNulls()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, other.nulls);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TI32Column(");
-    boolean first = true;
-
-    sb.append("values:");
-    if (this.values == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.values);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("nulls:");
-    if (this.nulls == null) {
-      sb.append("null");
-    } else {
-      org.apache.thrift.TBaseHelper.toString(this.nulls, sb);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetValues()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString());
-    }
-
-    if (!isSetNulls()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TI32ColumnStandardSchemeFactory implements SchemeFactory {
-    public TI32ColumnStandardScheme getScheme() {
-      return new TI32ColumnStandardScheme();
-    }
-  }
-
-  private static class TI32ColumnStandardScheme extends StandardScheme<TI32Column> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TI32Column struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUES
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list78 = iprot.readListBegin();
-                struct.values = new ArrayList<Integer>(_list78.size);
-                int _elem79;
-                for (int _i80 = 0; _i80 < _list78.size; ++_i80)
-                {
-                  _elem79 = iprot.readI32();
-                  struct.values.add(_elem79);
-                }
-                iprot.readListEnd();
-              }
-              struct.setValuesIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // NULLS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.nulls = iprot.readBinary();
-              struct.setNullsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TI32Column struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.values != null) {
-        oprot.writeFieldBegin(VALUES_FIELD_DESC);
-        {
-          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I32, struct.values.size()));
-          for (int _iter81 : struct.values)
-          {
-            oprot.writeI32(_iter81);
-          }
-          oprot.writeListEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      if (struct.nulls != null) {
-        oprot.writeFieldBegin(NULLS_FIELD_DESC);
-        oprot.writeBinary(struct.nulls);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TI32ColumnTupleSchemeFactory implements SchemeFactory {
-    public TI32ColumnTupleScheme getScheme() {
-      return new TI32ColumnTupleScheme();
-    }
-  }
-
-  private static class TI32ColumnTupleScheme extends TupleScheme<TI32Column> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TI32Column struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.values.size());
-        for (int _iter82 : struct.values)
-        {
-          oprot.writeI32(_iter82);
-        }
-      }
-      oprot.writeBinary(struct.nulls);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TI32Column struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TList _list83 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I32, iprot.readI32());
-        struct.values = new ArrayList<Integer>(_list83.size);
-        int _elem84;
-        for (int _i85 = 0; _i85 < _list83.size; ++_i85)
-        {
-          _elem84 = iprot.readI32();
-          struct.values.add(_elem84);
-        }
-      }
-      struct.setValuesIsSet(true);
-      struct.nulls = iprot.readBinary();
-      struct.setNullsIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Value.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Value.java
deleted file mode 100644
index 8a69632b2d76e..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI32Value.java
+++ /dev/null
@@ -1,390 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TI32Value implements org.apache.thrift.TBase<TI32Value, TI32Value._Fields>, java.io.Serializable, Cloneable, Comparable<TI32Value> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI32Value");
-
-  private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.I32, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TI32ValueStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TI32ValueTupleSchemeFactory());
-  }
-
-  private int value; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUE((short)1, "value");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUE
-          return VALUE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __VALUE_ISSET_ID = 0;
-  private byte __isset_bitfield = 0;
-  private static final _Fields optionals[] = {_Fields.VALUE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI32Value.class, metaDataMap);
-  }
-
-  public TI32Value() {
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TI32Value(TI32Value other) {
-    __isset_bitfield = other.__isset_bitfield;
-    this.value = other.value;
-  }
-
-  public TI32Value deepCopy() {
-    return new TI32Value(this);
-  }
-
-  @Override
-  public void clear() {
-    setValueIsSet(false);
-    this.value = 0;
-  }
-
-  public int getValue() {
-    return this.value;
-  }
-
-  public void setValue(int value) {
-    this.value = value;
-    setValueIsSet(true);
-  }
-
-  public void unsetValue() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID);
-  }
-
-  /** Returns true if field value is set (has been assigned a value) and false otherwise */
-  public boolean isSetValue() {
-    return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID);
-  }
-
-  public void setValueIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value);
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUE:
-      if (value == null) {
-        unsetValue();
-      } else {
-        setValue((Integer)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUE:
-      return getValue();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUE:
-      return isSetValue();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TI32Value)
-      return this.equals((TI32Value)that);
-    return false;
-  }
-
-  public boolean equals(TI32Value that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_value = true && this.isSetValue();
-    boolean that_present_value = true && that.isSetValue();
-    if (this_present_value || that_present_value) {
-      if (!(this_present_value && that_present_value))
-        return false;
-      if (this.value != that.value)
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_value = true && (isSetValue());
-    list.add(present_value);
-    if (present_value)
-      list.add(value);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TI32Value other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetValue()).compareTo(other.isSetValue());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValue()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, other.value);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TI32Value(");
-    boolean first = true;
-
-    if (isSetValue()) {
-      sb.append("value:");
-      sb.append(this.value);
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TI32ValueStandardSchemeFactory implements SchemeFactory {
-    public TI32ValueStandardScheme getScheme() {
-      return new TI32ValueStandardScheme();
-    }
-  }
-
-  private static class TI32ValueStandardScheme extends StandardScheme<TI32Value> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TI32Value struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUE
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.value = iprot.readI32();
-              struct.setValueIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TI32Value struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.isSetValue()) {
-        oprot.writeFieldBegin(VALUE_FIELD_DESC);
-        oprot.writeI32(struct.value);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TI32ValueTupleSchemeFactory implements SchemeFactory {
-    public TI32ValueTupleScheme getScheme() {
-      return new TI32ValueTupleScheme();
-    }
-  }
-
-  private static class TI32ValueTupleScheme extends TupleScheme<TI32Value> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TI32Value struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      BitSet optionals = new BitSet();
-      if (struct.isSetValue()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetValue()) {
-        oprot.writeI32(struct.value);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TI32Value struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.value = iprot.readI32();
-        struct.setValueIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Column.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Column.java
deleted file mode 100644
index cd5ef2d7a9ed9..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Column.java
+++ /dev/null
@@ -1,548 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TI64Column implements org.apache.thrift.TBase<TI64Column, TI64Column._Fields>, java.io.Serializable, Cloneable, Comparable<TI64Column> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI64Column");
-
-  private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1);
-  private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TI64ColumnStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TI64ColumnTupleSchemeFactory());
-  }
-
-  private List<Long> values; // required
-  private ByteBuffer nulls; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUES((short)1, "values"),
-    NULLS((short)2, "nulls");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUES
-          return VALUES;
-        case 2: // NULLS
-          return NULLS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))));
-    tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI64Column.class, metaDataMap);
-  }
-
-  public TI64Column() {
-  }
-
-  public TI64Column(
-    List<Long> values,
-    ByteBuffer nulls)
-  {
-    this();
-    this.values = values;
-    this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls);
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TI64Column(TI64Column other) {
-    if (other.isSetValues()) {
-      List<Long> __this__values = new ArrayList<Long>(other.values);
-      this.values = __this__values;
-    }
-    if (other.isSetNulls()) {
-      this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls);
-    }
-  }
-
-  public TI64Column deepCopy() {
-    return new TI64Column(this);
-  }
-
-  @Override
-  public void clear() {
-    this.values = null;
-    this.nulls = null;
-  }
-
-  public int getValuesSize() {
-    return (this.values == null) ? 0 : this.values.size();
-  }
-
-  public java.util.Iterator<Long> getValuesIterator() {
-    return (this.values == null) ? null : this.values.iterator();
-  }
-
-  public void addToValues(long elem) {
-    if (this.values == null) {
-      this.values = new ArrayList<Long>();
-    }
-    this.values.add(elem);
-  }
-
-  public List<Long> getValues() {
-    return this.values;
-  }
-
-  public void setValues(List<Long> values) {
-    this.values = values;
-  }
-
-  public void unsetValues() {
-    this.values = null;
-  }
-
-  /** Returns true if field values is set (has been assigned a value) and false otherwise */
-  public boolean isSetValues() {
-    return this.values != null;
-  }
-
-  public void setValuesIsSet(boolean value) {
-    if (!value) {
-      this.values = null;
-    }
-  }
-
-  public byte[] getNulls() {
-    setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls));
-    return nulls == null ? null : nulls.array();
-  }
-
-  public ByteBuffer bufferForNulls() {
-    return org.apache.thrift.TBaseHelper.copyBinary(nulls);
-  }
-
-  public void setNulls(byte[] nulls) {
-    this.nulls = nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(nulls, nulls.length));
-  }
-
-  public void setNulls(ByteBuffer nulls) {
-    this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls);
-  }
-
-  public void unsetNulls() {
-    this.nulls = null;
-  }
-
-  /** Returns true if field nulls is set (has been assigned a value) and false otherwise */
-  public boolean isSetNulls() {
-    return this.nulls != null;
-  }
-
-  public void setNullsIsSet(boolean value) {
-    if (!value) {
-      this.nulls = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUES:
-      if (value == null) {
-        unsetValues();
-      } else {
-        setValues((List<Long>)value);
-      }
-      break;
-
-    case NULLS:
-      if (value == null) {
-        unsetNulls();
-      } else {
-        setNulls((ByteBuffer)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUES:
-      return getValues();
-
-    case NULLS:
-      return getNulls();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUES:
-      return isSetValues();
-    case NULLS:
-      return isSetNulls();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TI64Column)
-      return this.equals((TI64Column)that);
-    return false;
-  }
-
-  public boolean equals(TI64Column that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_values = true && this.isSetValues();
-    boolean that_present_values = true && that.isSetValues();
-    if (this_present_values || that_present_values) {
-      if (!(this_present_values && that_present_values))
-        return false;
-      if (!this.values.equals(that.values))
-        return false;
-    }
-
-    boolean this_present_nulls = true && this.isSetNulls();
-    boolean that_present_nulls = true && that.isSetNulls();
-    if (this_present_nulls || that_present_nulls) {
-      if (!(this_present_nulls && that_present_nulls))
-        return false;
-      if (!this.nulls.equals(that.nulls))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_values = true && (isSetValues());
-    list.add(present_values);
-    if (present_values)
-      list.add(values);
-
-    boolean present_nulls = true && (isSetNulls());
-    list.add(present_nulls);
-    if (present_nulls)
-      list.add(nulls);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TI64Column other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetValues()).compareTo(other.isSetValues());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValues()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, other.values);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetNulls()).compareTo(other.isSetNulls());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetNulls()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, other.nulls);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TI64Column(");
-    boolean first = true;
-
-    sb.append("values:");
-    if (this.values == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.values);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("nulls:");
-    if (this.nulls == null) {
-      sb.append("null");
-    } else {
-      org.apache.thrift.TBaseHelper.toString(this.nulls, sb);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetValues()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString());
-    }
-
-    if (!isSetNulls()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TI64ColumnStandardSchemeFactory implements SchemeFactory {
-    public TI64ColumnStandardScheme getScheme() {
-      return new TI64ColumnStandardScheme();
-    }
-  }
-
-  private static class TI64ColumnStandardScheme extends StandardScheme<TI64Column> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TI64Column struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUES
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list86 = iprot.readListBegin();
-                struct.values = new ArrayList<Long>(_list86.size);
-                long _elem87;
-                for (int _i88 = 0; _i88 < _list86.size; ++_i88)
-                {
-                  _elem87 = iprot.readI64();
-                  struct.values.add(_elem87);
-                }
-                iprot.readListEnd();
-              }
-              struct.setValuesIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // NULLS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.nulls = iprot.readBinary();
-              struct.setNullsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TI64Column struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.values != null) {
-        oprot.writeFieldBegin(VALUES_FIELD_DESC);
-        {
-          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I64, struct.values.size()));
-          for (long _iter89 : struct.values)
-          {
-            oprot.writeI64(_iter89);
-          }
-          oprot.writeListEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      if (struct.nulls != null) {
-        oprot.writeFieldBegin(NULLS_FIELD_DESC);
-        oprot.writeBinary(struct.nulls);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TI64ColumnTupleSchemeFactory implements SchemeFactory {
-    public TI64ColumnTupleScheme getScheme() {
-      return new TI64ColumnTupleScheme();
-    }
-  }
-
-  private static class TI64ColumnTupleScheme extends TupleScheme<TI64Column> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TI64Column struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.values.size());
-        for (long _iter90 : struct.values)
-        {
-          oprot.writeI64(_iter90);
-        }
-      }
-      oprot.writeBinary(struct.nulls);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TI64Column struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TList _list91 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.I64, iprot.readI32());
-        struct.values = new ArrayList<Long>(_list91.size);
-        long _elem92;
-        for (int _i93 = 0; _i93 < _list91.size; ++_i93)
-        {
-          _elem92 = iprot.readI64();
-          struct.values.add(_elem92);
-        }
-      }
-      struct.setValuesIsSet(true);
-      struct.nulls = iprot.readBinary();
-      struct.setNullsIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Value.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Value.java
deleted file mode 100644
index 393c0bd28610d..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TI64Value.java
+++ /dev/null
@@ -1,390 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TI64Value implements org.apache.thrift.TBase<TI64Value, TI64Value._Fields>, java.io.Serializable, Cloneable, Comparable<TI64Value> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TI64Value");
-
-  private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.I64, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TI64ValueStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TI64ValueTupleSchemeFactory());
-  }
-
-  private long value; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUE((short)1, "value");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUE
-          return VALUE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __VALUE_ISSET_ID = 0;
-  private byte __isset_bitfield = 0;
-  private static final _Fields optionals[] = {_Fields.VALUE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TI64Value.class, metaDataMap);
-  }
-
-  public TI64Value() {
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TI64Value(TI64Value other) {
-    __isset_bitfield = other.__isset_bitfield;
-    this.value = other.value;
-  }
-
-  public TI64Value deepCopy() {
-    return new TI64Value(this);
-  }
-
-  @Override
-  public void clear() {
-    setValueIsSet(false);
-    this.value = 0;
-  }
-
-  public long getValue() {
-    return this.value;
-  }
-
-  public void setValue(long value) {
-    this.value = value;
-    setValueIsSet(true);
-  }
-
-  public void unsetValue() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUE_ISSET_ID);
-  }
-
-  /** Returns true if field value is set (has been assigned a value) and false otherwise */
-  public boolean isSetValue() {
-    return EncodingUtils.testBit(__isset_bitfield, __VALUE_ISSET_ID);
-  }
-
-  public void setValueIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUE_ISSET_ID, value);
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUE:
-      if (value == null) {
-        unsetValue();
-      } else {
-        setValue((Long)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUE:
-      return getValue();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUE:
-      return isSetValue();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TI64Value)
-      return this.equals((TI64Value)that);
-    return false;
-  }
-
-  public boolean equals(TI64Value that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_value = true && this.isSetValue();
-    boolean that_present_value = true && that.isSetValue();
-    if (this_present_value || that_present_value) {
-      if (!(this_present_value && that_present_value))
-        return false;
-      if (this.value != that.value)
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_value = true && (isSetValue());
-    list.add(present_value);
-    if (present_value)
-      list.add(value);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TI64Value other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetValue()).compareTo(other.isSetValue());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValue()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, other.value);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TI64Value(");
-    boolean first = true;
-
-    if (isSetValue()) {
-      sb.append("value:");
-      sb.append(this.value);
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TI64ValueStandardSchemeFactory implements SchemeFactory {
-    public TI64ValueStandardScheme getScheme() {
-      return new TI64ValueStandardScheme();
-    }
-  }
-
-  private static class TI64ValueStandardScheme extends StandardScheme<TI64Value> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TI64Value struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUE
-            if (schemeField.type == org.apache.thrift.protocol.TType.I64) {
-              struct.value = iprot.readI64();
-              struct.setValueIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TI64Value struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.isSetValue()) {
-        oprot.writeFieldBegin(VALUE_FIELD_DESC);
-        oprot.writeI64(struct.value);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TI64ValueTupleSchemeFactory implements SchemeFactory {
-    public TI64ValueTupleScheme getScheme() {
-      return new TI64ValueTupleScheme();
-    }
-  }
-
-  private static class TI64ValueTupleScheme extends TupleScheme<TI64Value> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TI64Value struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      BitSet optionals = new BitSet();
-      if (struct.isSetValue()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetValue()) {
-        oprot.writeI64(struct.value);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TI64Value struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.value = iprot.readI64();
-        struct.setValueIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TJobExecutionStatus.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TJobExecutionStatus.java
deleted file mode 100644
index b39f208c1b878..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TJobExecutionStatus.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-
-import java.util.Map;
-import java.util.HashMap;
-import org.apache.thrift.TEnum;
-
-public enum TJobExecutionStatus implements org.apache.thrift.TEnum {
-  IN_PROGRESS(0),
-  COMPLETE(1),
-  NOT_AVAILABLE(2);
-
-  private final int value;
-
-  private TJobExecutionStatus(int value) {
-    this.value = value;
-  }
-
-  /**
-   * Get the integer value of this enum value, as defined in the Thrift IDL.
-   */
-  public int getValue() {
-    return value;
-  }
-
-  /**
-   * Find a the enum type by its integer value, as defined in the Thrift IDL.
-   * @return null if the value is not found.
-   */
-  public static TJobExecutionStatus findByValue(int value) { 
-    switch (value) {
-      case 0:
-        return IN_PROGRESS;
-      case 1:
-        return COMPLETE;
-      case 2:
-        return NOT_AVAILABLE;
-      default:
-        return null;
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TMapTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TMapTypeEntry.java
deleted file mode 100644
index 7ebc15c9432be..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TMapTypeEntry.java
+++ /dev/null
@@ -1,482 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TMapTypeEntry implements org.apache.thrift.TBase<TMapTypeEntry, TMapTypeEntry._Fields>, java.io.Serializable, Cloneable, Comparable<TMapTypeEntry> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TMapTypeEntry");
-
-  private static final org.apache.thrift.protocol.TField KEY_TYPE_PTR_FIELD_DESC = new org.apache.thrift.protocol.TField("keyTypePtr", org.apache.thrift.protocol.TType.I32, (short)1);
-  private static final org.apache.thrift.protocol.TField VALUE_TYPE_PTR_FIELD_DESC = new org.apache.thrift.protocol.TField("valueTypePtr", org.apache.thrift.protocol.TType.I32, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TMapTypeEntryStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TMapTypeEntryTupleSchemeFactory());
-  }
-
-  private int keyTypePtr; // required
-  private int valueTypePtr; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    KEY_TYPE_PTR((short)1, "keyTypePtr"),
-    VALUE_TYPE_PTR((short)2, "valueTypePtr");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // KEY_TYPE_PTR
-          return KEY_TYPE_PTR;
-        case 2: // VALUE_TYPE_PTR
-          return VALUE_TYPE_PTR;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __KEYTYPEPTR_ISSET_ID = 0;
-  private static final int __VALUETYPEPTR_ISSET_ID = 1;
-  private byte __isset_bitfield = 0;
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.KEY_TYPE_PTR, new org.apache.thrift.meta_data.FieldMetaData("keyTypePtr", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32        , "TTypeEntryPtr")));
-    tmpMap.put(_Fields.VALUE_TYPE_PTR, new org.apache.thrift.meta_data.FieldMetaData("valueTypePtr", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32        , "TTypeEntryPtr")));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TMapTypeEntry.class, metaDataMap);
-  }
-
-  public TMapTypeEntry() {
-  }
-
-  public TMapTypeEntry(
-    int keyTypePtr,
-    int valueTypePtr)
-  {
-    this();
-    this.keyTypePtr = keyTypePtr;
-    setKeyTypePtrIsSet(true);
-    this.valueTypePtr = valueTypePtr;
-    setValueTypePtrIsSet(true);
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TMapTypeEntry(TMapTypeEntry other) {
-    __isset_bitfield = other.__isset_bitfield;
-    this.keyTypePtr = other.keyTypePtr;
-    this.valueTypePtr = other.valueTypePtr;
-  }
-
-  public TMapTypeEntry deepCopy() {
-    return new TMapTypeEntry(this);
-  }
-
-  @Override
-  public void clear() {
-    setKeyTypePtrIsSet(false);
-    this.keyTypePtr = 0;
-    setValueTypePtrIsSet(false);
-    this.valueTypePtr = 0;
-  }
-
-  public int getKeyTypePtr() {
-    return this.keyTypePtr;
-  }
-
-  public void setKeyTypePtr(int keyTypePtr) {
-    this.keyTypePtr = keyTypePtr;
-    setKeyTypePtrIsSet(true);
-  }
-
-  public void unsetKeyTypePtr() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __KEYTYPEPTR_ISSET_ID);
-  }
-
-  /** Returns true if field keyTypePtr is set (has been assigned a value) and false otherwise */
-  public boolean isSetKeyTypePtr() {
-    return EncodingUtils.testBit(__isset_bitfield, __KEYTYPEPTR_ISSET_ID);
-  }
-
-  public void setKeyTypePtrIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __KEYTYPEPTR_ISSET_ID, value);
-  }
-
-  public int getValueTypePtr() {
-    return this.valueTypePtr;
-  }
-
-  public void setValueTypePtr(int valueTypePtr) {
-    this.valueTypePtr = valueTypePtr;
-    setValueTypePtrIsSet(true);
-  }
-
-  public void unsetValueTypePtr() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __VALUETYPEPTR_ISSET_ID);
-  }
-
-  /** Returns true if field valueTypePtr is set (has been assigned a value) and false otherwise */
-  public boolean isSetValueTypePtr() {
-    return EncodingUtils.testBit(__isset_bitfield, __VALUETYPEPTR_ISSET_ID);
-  }
-
-  public void setValueTypePtrIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __VALUETYPEPTR_ISSET_ID, value);
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case KEY_TYPE_PTR:
-      if (value == null) {
-        unsetKeyTypePtr();
-      } else {
-        setKeyTypePtr((Integer)value);
-      }
-      break;
-
-    case VALUE_TYPE_PTR:
-      if (value == null) {
-        unsetValueTypePtr();
-      } else {
-        setValueTypePtr((Integer)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case KEY_TYPE_PTR:
-      return getKeyTypePtr();
-
-    case VALUE_TYPE_PTR:
-      return getValueTypePtr();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case KEY_TYPE_PTR:
-      return isSetKeyTypePtr();
-    case VALUE_TYPE_PTR:
-      return isSetValueTypePtr();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TMapTypeEntry)
-      return this.equals((TMapTypeEntry)that);
-    return false;
-  }
-
-  public boolean equals(TMapTypeEntry that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_keyTypePtr = true;
-    boolean that_present_keyTypePtr = true;
-    if (this_present_keyTypePtr || that_present_keyTypePtr) {
-      if (!(this_present_keyTypePtr && that_present_keyTypePtr))
-        return false;
-      if (this.keyTypePtr != that.keyTypePtr)
-        return false;
-    }
-
-    boolean this_present_valueTypePtr = true;
-    boolean that_present_valueTypePtr = true;
-    if (this_present_valueTypePtr || that_present_valueTypePtr) {
-      if (!(this_present_valueTypePtr && that_present_valueTypePtr))
-        return false;
-      if (this.valueTypePtr != that.valueTypePtr)
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_keyTypePtr = true;
-    list.add(present_keyTypePtr);
-    if (present_keyTypePtr)
-      list.add(keyTypePtr);
-
-    boolean present_valueTypePtr = true;
-    list.add(present_valueTypePtr);
-    if (present_valueTypePtr)
-      list.add(valueTypePtr);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TMapTypeEntry other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetKeyTypePtr()).compareTo(other.isSetKeyTypePtr());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetKeyTypePtr()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.keyTypePtr, other.keyTypePtr);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetValueTypePtr()).compareTo(other.isSetValueTypePtr());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValueTypePtr()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.valueTypePtr, other.valueTypePtr);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TMapTypeEntry(");
-    boolean first = true;
-
-    sb.append("keyTypePtr:");
-    sb.append(this.keyTypePtr);
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("valueTypePtr:");
-    sb.append(this.valueTypePtr);
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetKeyTypePtr()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'keyTypePtr' is unset! Struct:" + toString());
-    }
-
-    if (!isSetValueTypePtr()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'valueTypePtr' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TMapTypeEntryStandardSchemeFactory implements SchemeFactory {
-    public TMapTypeEntryStandardScheme getScheme() {
-      return new TMapTypeEntryStandardScheme();
-    }
-  }
-
-  private static class TMapTypeEntryStandardScheme extends StandardScheme<TMapTypeEntry> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TMapTypeEntry struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // KEY_TYPE_PTR
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.keyTypePtr = iprot.readI32();
-              struct.setKeyTypePtrIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // VALUE_TYPE_PTR
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.valueTypePtr = iprot.readI32();
-              struct.setValueTypePtrIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TMapTypeEntry struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      oprot.writeFieldBegin(KEY_TYPE_PTR_FIELD_DESC);
-      oprot.writeI32(struct.keyTypePtr);
-      oprot.writeFieldEnd();
-      oprot.writeFieldBegin(VALUE_TYPE_PTR_FIELD_DESC);
-      oprot.writeI32(struct.valueTypePtr);
-      oprot.writeFieldEnd();
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TMapTypeEntryTupleSchemeFactory implements SchemeFactory {
-    public TMapTypeEntryTupleScheme getScheme() {
-      return new TMapTypeEntryTupleScheme();
-    }
-  }
-
-  private static class TMapTypeEntryTupleScheme extends TupleScheme<TMapTypeEntry> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TMapTypeEntry struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      oprot.writeI32(struct.keyTypePtr);
-      oprot.writeI32(struct.valueTypePtr);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TMapTypeEntry struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.keyTypePtr = iprot.readI32();
-      struct.setKeyTypePtrIsSet(true);
-      struct.valueTypePtr = iprot.readI32();
-      struct.setValueTypePtrIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionReq.java
deleted file mode 100644
index e47abbb862cf1..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionReq.java
+++ /dev/null
@@ -1,778 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TOpenSessionReq implements org.apache.thrift.TBase<TOpenSessionReq, TOpenSessionReq._Fields>, java.io.Serializable, Cloneable, Comparable<TOpenSessionReq> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TOpenSessionReq");
-
-  private static final org.apache.thrift.protocol.TField CLIENT_PROTOCOL_FIELD_DESC = new org.apache.thrift.protocol.TField("client_protocol", org.apache.thrift.protocol.TType.I32, (short)1);
-  private static final org.apache.thrift.protocol.TField USERNAME_FIELD_DESC = new org.apache.thrift.protocol.TField("username", org.apache.thrift.protocol.TType.STRING, (short)2);
-  private static final org.apache.thrift.protocol.TField PASSWORD_FIELD_DESC = new org.apache.thrift.protocol.TField("password", org.apache.thrift.protocol.TType.STRING, (short)3);
-  private static final org.apache.thrift.protocol.TField CONFIGURATION_FIELD_DESC = new org.apache.thrift.protocol.TField("configuration", org.apache.thrift.protocol.TType.MAP, (short)4);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TOpenSessionReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TOpenSessionReqTupleSchemeFactory());
-  }
-
-  private TProtocolVersion client_protocol; // required
-  private String username; // optional
-  private String password; // optional
-  private Map<String,String> configuration; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    /**
-     * 
-     * @see TProtocolVersion
-     */
-    CLIENT_PROTOCOL((short)1, "client_protocol"),
-    USERNAME((short)2, "username"),
-    PASSWORD((short)3, "password"),
-    CONFIGURATION((short)4, "configuration");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // CLIENT_PROTOCOL
-          return CLIENT_PROTOCOL;
-        case 2: // USERNAME
-          return USERNAME;
-        case 3: // PASSWORD
-          return PASSWORD;
-        case 4: // CONFIGURATION
-          return CONFIGURATION;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final _Fields optionals[] = {_Fields.USERNAME,_Fields.PASSWORD,_Fields.CONFIGURATION};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.CLIENT_PROTOCOL, new org.apache.thrift.meta_data.FieldMetaData("client_protocol", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TProtocolVersion.class)));
-    tmpMap.put(_Fields.USERNAME, new org.apache.thrift.meta_data.FieldMetaData("username", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    tmpMap.put(_Fields.PASSWORD, new org.apache.thrift.meta_data.FieldMetaData("password", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    tmpMap.put(_Fields.CONFIGURATION, new org.apache.thrift.meta_data.FieldMetaData("configuration", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TOpenSessionReq.class, metaDataMap);
-  }
-
-  public TOpenSessionReq() {
-    this.client_protocol = org.apache.hive.service.rpc.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V10;
-
-  }
-
-  public TOpenSessionReq(
-    TProtocolVersion client_protocol)
-  {
-    this();
-    this.client_protocol = client_protocol;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TOpenSessionReq(TOpenSessionReq other) {
-    if (other.isSetClient_protocol()) {
-      this.client_protocol = other.client_protocol;
-    }
-    if (other.isSetUsername()) {
-      this.username = other.username;
-    }
-    if (other.isSetPassword()) {
-      this.password = other.password;
-    }
-    if (other.isSetConfiguration()) {
-      Map<String,String> __this__configuration = new HashMap<String,String>(other.configuration);
-      this.configuration = __this__configuration;
-    }
-  }
-
-  public TOpenSessionReq deepCopy() {
-    return new TOpenSessionReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.client_protocol = org.apache.hive.service.rpc.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V10;
-
-    this.username = null;
-    this.password = null;
-    this.configuration = null;
-  }
-
-  /**
-   * 
-   * @see TProtocolVersion
-   */
-  public TProtocolVersion getClient_protocol() {
-    return this.client_protocol;
-  }
-
-  /**
-   * 
-   * @see TProtocolVersion
-   */
-  public void setClient_protocol(TProtocolVersion client_protocol) {
-    this.client_protocol = client_protocol;
-  }
-
-  public void unsetClient_protocol() {
-    this.client_protocol = null;
-  }
-
-  /** Returns true if field client_protocol is set (has been assigned a value) and false otherwise */
-  public boolean isSetClient_protocol() {
-    return this.client_protocol != null;
-  }
-
-  public void setClient_protocolIsSet(boolean value) {
-    if (!value) {
-      this.client_protocol = null;
-    }
-  }
-
-  public String getUsername() {
-    return this.username;
-  }
-
-  public void setUsername(String username) {
-    this.username = username;
-  }
-
-  public void unsetUsername() {
-    this.username = null;
-  }
-
-  /** Returns true if field username is set (has been assigned a value) and false otherwise */
-  public boolean isSetUsername() {
-    return this.username != null;
-  }
-
-  public void setUsernameIsSet(boolean value) {
-    if (!value) {
-      this.username = null;
-    }
-  }
-
-  public String getPassword() {
-    return this.password;
-  }
-
-  public void setPassword(String password) {
-    this.password = password;
-  }
-
-  public void unsetPassword() {
-    this.password = null;
-  }
-
-  /** Returns true if field password is set (has been assigned a value) and false otherwise */
-  public boolean isSetPassword() {
-    return this.password != null;
-  }
-
-  public void setPasswordIsSet(boolean value) {
-    if (!value) {
-      this.password = null;
-    }
-  }
-
-  public int getConfigurationSize() {
-    return (this.configuration == null) ? 0 : this.configuration.size();
-  }
-
-  public void putToConfiguration(String key, String val) {
-    if (this.configuration == null) {
-      this.configuration = new HashMap<String,String>();
-    }
-    this.configuration.put(key, val);
-  }
-
-  public Map<String,String> getConfiguration() {
-    return this.configuration;
-  }
-
-  public void setConfiguration(Map<String,String> configuration) {
-    this.configuration = configuration;
-  }
-
-  public void unsetConfiguration() {
-    this.configuration = null;
-  }
-
-  /** Returns true if field configuration is set (has been assigned a value) and false otherwise */
-  public boolean isSetConfiguration() {
-    return this.configuration != null;
-  }
-
-  public void setConfigurationIsSet(boolean value) {
-    if (!value) {
-      this.configuration = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case CLIENT_PROTOCOL:
-      if (value == null) {
-        unsetClient_protocol();
-      } else {
-        setClient_protocol((TProtocolVersion)value);
-      }
-      break;
-
-    case USERNAME:
-      if (value == null) {
-        unsetUsername();
-      } else {
-        setUsername((String)value);
-      }
-      break;
-
-    case PASSWORD:
-      if (value == null) {
-        unsetPassword();
-      } else {
-        setPassword((String)value);
-      }
-      break;
-
-    case CONFIGURATION:
-      if (value == null) {
-        unsetConfiguration();
-      } else {
-        setConfiguration((Map<String,String>)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case CLIENT_PROTOCOL:
-      return getClient_protocol();
-
-    case USERNAME:
-      return getUsername();
-
-    case PASSWORD:
-      return getPassword();
-
-    case CONFIGURATION:
-      return getConfiguration();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case CLIENT_PROTOCOL:
-      return isSetClient_protocol();
-    case USERNAME:
-      return isSetUsername();
-    case PASSWORD:
-      return isSetPassword();
-    case CONFIGURATION:
-      return isSetConfiguration();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TOpenSessionReq)
-      return this.equals((TOpenSessionReq)that);
-    return false;
-  }
-
-  public boolean equals(TOpenSessionReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_client_protocol = true && this.isSetClient_protocol();
-    boolean that_present_client_protocol = true && that.isSetClient_protocol();
-    if (this_present_client_protocol || that_present_client_protocol) {
-      if (!(this_present_client_protocol && that_present_client_protocol))
-        return false;
-      if (!this.client_protocol.equals(that.client_protocol))
-        return false;
-    }
-
-    boolean this_present_username = true && this.isSetUsername();
-    boolean that_present_username = true && that.isSetUsername();
-    if (this_present_username || that_present_username) {
-      if (!(this_present_username && that_present_username))
-        return false;
-      if (!this.username.equals(that.username))
-        return false;
-    }
-
-    boolean this_present_password = true && this.isSetPassword();
-    boolean that_present_password = true && that.isSetPassword();
-    if (this_present_password || that_present_password) {
-      if (!(this_present_password && that_present_password))
-        return false;
-      if (!this.password.equals(that.password))
-        return false;
-    }
-
-    boolean this_present_configuration = true && this.isSetConfiguration();
-    boolean that_present_configuration = true && that.isSetConfiguration();
-    if (this_present_configuration || that_present_configuration) {
-      if (!(this_present_configuration && that_present_configuration))
-        return false;
-      if (!this.configuration.equals(that.configuration))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_client_protocol = true && (isSetClient_protocol());
-    list.add(present_client_protocol);
-    if (present_client_protocol)
-      list.add(client_protocol.getValue());
-
-    boolean present_username = true && (isSetUsername());
-    list.add(present_username);
-    if (present_username)
-      list.add(username);
-
-    boolean present_password = true && (isSetPassword());
-    list.add(present_password);
-    if (present_password)
-      list.add(password);
-
-    boolean present_configuration = true && (isSetConfiguration());
-    list.add(present_configuration);
-    if (present_configuration)
-      list.add(configuration);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TOpenSessionReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetClient_protocol()).compareTo(other.isSetClient_protocol());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetClient_protocol()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.client_protocol, other.client_protocol);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetUsername()).compareTo(other.isSetUsername());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetUsername()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.username, other.username);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetPassword()).compareTo(other.isSetPassword());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetPassword()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.password, other.password);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetConfiguration()).compareTo(other.isSetConfiguration());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetConfiguration()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.configuration, other.configuration);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TOpenSessionReq(");
-    boolean first = true;
-
-    sb.append("client_protocol:");
-    if (this.client_protocol == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.client_protocol);
-    }
-    first = false;
-    if (isSetUsername()) {
-      if (!first) sb.append(", ");
-      sb.append("username:");
-      if (this.username == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.username);
-      }
-      first = false;
-    }
-    if (isSetPassword()) {
-      if (!first) sb.append(", ");
-      sb.append("password:");
-      if (this.password == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.password);
-      }
-      first = false;
-    }
-    if (isSetConfiguration()) {
-      if (!first) sb.append(", ");
-      sb.append("configuration:");
-      if (this.configuration == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.configuration);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetClient_protocol()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'client_protocol' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TOpenSessionReqStandardSchemeFactory implements SchemeFactory {
-    public TOpenSessionReqStandardScheme getScheme() {
-      return new TOpenSessionReqStandardScheme();
-    }
-  }
-
-  private static class TOpenSessionReqStandardScheme extends StandardScheme<TOpenSessionReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TOpenSessionReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // CLIENT_PROTOCOL
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.client_protocol = org.apache.hive.service.rpc.thrift.TProtocolVersion.findByValue(iprot.readI32());
-              struct.setClient_protocolIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // USERNAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.username = iprot.readString();
-              struct.setUsernameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // PASSWORD
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.password = iprot.readString();
-              struct.setPasswordIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 4: // CONFIGURATION
-            if (schemeField.type == org.apache.thrift.protocol.TType.MAP) {
-              {
-                org.apache.thrift.protocol.TMap _map142 = iprot.readMapBegin();
-                struct.configuration = new HashMap<String,String>(2*_map142.size);
-                String _key143;
-                String _val144;
-                for (int _i145 = 0; _i145 < _map142.size; ++_i145)
-                {
-                  _key143 = iprot.readString();
-                  _val144 = iprot.readString();
-                  struct.configuration.put(_key143, _val144);
-                }
-                iprot.readMapEnd();
-              }
-              struct.setConfigurationIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TOpenSessionReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.client_protocol != null) {
-        oprot.writeFieldBegin(CLIENT_PROTOCOL_FIELD_DESC);
-        oprot.writeI32(struct.client_protocol.getValue());
-        oprot.writeFieldEnd();
-      }
-      if (struct.username != null) {
-        if (struct.isSetUsername()) {
-          oprot.writeFieldBegin(USERNAME_FIELD_DESC);
-          oprot.writeString(struct.username);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.password != null) {
-        if (struct.isSetPassword()) {
-          oprot.writeFieldBegin(PASSWORD_FIELD_DESC);
-          oprot.writeString(struct.password);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.configuration != null) {
-        if (struct.isSetConfiguration()) {
-          oprot.writeFieldBegin(CONFIGURATION_FIELD_DESC);
-          {
-            oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, struct.configuration.size()));
-            for (Map.Entry<String, String> _iter146 : struct.configuration.entrySet())
-            {
-              oprot.writeString(_iter146.getKey());
-              oprot.writeString(_iter146.getValue());
-            }
-            oprot.writeMapEnd();
-          }
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TOpenSessionReqTupleSchemeFactory implements SchemeFactory {
-    public TOpenSessionReqTupleScheme getScheme() {
-      return new TOpenSessionReqTupleScheme();
-    }
-  }
-
-  private static class TOpenSessionReqTupleScheme extends TupleScheme<TOpenSessionReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TOpenSessionReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      oprot.writeI32(struct.client_protocol.getValue());
-      BitSet optionals = new BitSet();
-      if (struct.isSetUsername()) {
-        optionals.set(0);
-      }
-      if (struct.isSetPassword()) {
-        optionals.set(1);
-      }
-      if (struct.isSetConfiguration()) {
-        optionals.set(2);
-      }
-      oprot.writeBitSet(optionals, 3);
-      if (struct.isSetUsername()) {
-        oprot.writeString(struct.username);
-      }
-      if (struct.isSetPassword()) {
-        oprot.writeString(struct.password);
-      }
-      if (struct.isSetConfiguration()) {
-        {
-          oprot.writeI32(struct.configuration.size());
-          for (Map.Entry<String, String> _iter147 : struct.configuration.entrySet())
-          {
-            oprot.writeString(_iter147.getKey());
-            oprot.writeString(_iter147.getValue());
-          }
-        }
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TOpenSessionReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.client_protocol = org.apache.hive.service.rpc.thrift.TProtocolVersion.findByValue(iprot.readI32());
-      struct.setClient_protocolIsSet(true);
-      BitSet incoming = iprot.readBitSet(3);
-      if (incoming.get(0)) {
-        struct.username = iprot.readString();
-        struct.setUsernameIsSet(true);
-      }
-      if (incoming.get(1)) {
-        struct.password = iprot.readString();
-        struct.setPasswordIsSet(true);
-      }
-      if (incoming.get(2)) {
-        {
-          org.apache.thrift.protocol.TMap _map148 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, iprot.readI32());
-          struct.configuration = new HashMap<String,String>(2*_map148.size);
-          String _key149;
-          String _val150;
-          for (int _i151 = 0; _i151 < _map148.size; ++_i151)
-          {
-            _key149 = iprot.readString();
-            _val150 = iprot.readString();
-            struct.configuration.put(_key149, _val150);
-          }
-        }
-        struct.setConfigurationIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionResp.java
deleted file mode 100644
index ee1c87bfd76fa..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOpenSessionResp.java
+++ /dev/null
@@ -1,783 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TOpenSessionResp implements org.apache.thrift.TBase<TOpenSessionResp, TOpenSessionResp._Fields>, java.io.Serializable, Cloneable, Comparable<TOpenSessionResp> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TOpenSessionResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField SERVER_PROTOCOL_VERSION_FIELD_DESC = new org.apache.thrift.protocol.TField("serverProtocolVersion", org.apache.thrift.protocol.TType.I32, (short)2);
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)3);
-  private static final org.apache.thrift.protocol.TField CONFIGURATION_FIELD_DESC = new org.apache.thrift.protocol.TField("configuration", org.apache.thrift.protocol.TType.MAP, (short)4);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TOpenSessionRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TOpenSessionRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-  private TProtocolVersion serverProtocolVersion; // required
-  private TSessionHandle sessionHandle; // optional
-  private Map<String,String> configuration; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status"),
-    /**
-     * 
-     * @see TProtocolVersion
-     */
-    SERVER_PROTOCOL_VERSION((short)2, "serverProtocolVersion"),
-    SESSION_HANDLE((short)3, "sessionHandle"),
-    CONFIGURATION((short)4, "configuration");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        case 2: // SERVER_PROTOCOL_VERSION
-          return SERVER_PROTOCOL_VERSION;
-        case 3: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        case 4: // CONFIGURATION
-          return CONFIGURATION;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final _Fields optionals[] = {_Fields.SESSION_HANDLE,_Fields.CONFIGURATION};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    tmpMap.put(_Fields.SERVER_PROTOCOL_VERSION, new org.apache.thrift.meta_data.FieldMetaData("serverProtocolVersion", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TProtocolVersion.class)));
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    tmpMap.put(_Fields.CONFIGURATION, new org.apache.thrift.meta_data.FieldMetaData("configuration", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TOpenSessionResp.class, metaDataMap);
-  }
-
-  public TOpenSessionResp() {
-    this.serverProtocolVersion = org.apache.hive.service.rpc.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V10;
-
-  }
-
-  public TOpenSessionResp(
-    TStatus status,
-    TProtocolVersion serverProtocolVersion)
-  {
-    this();
-    this.status = status;
-    this.serverProtocolVersion = serverProtocolVersion;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TOpenSessionResp(TOpenSessionResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-    if (other.isSetServerProtocolVersion()) {
-      this.serverProtocolVersion = other.serverProtocolVersion;
-    }
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-    if (other.isSetConfiguration()) {
-      Map<String,String> __this__configuration = new HashMap<String,String>(other.configuration);
-      this.configuration = __this__configuration;
-    }
-  }
-
-  public TOpenSessionResp deepCopy() {
-    return new TOpenSessionResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-    this.serverProtocolVersion = org.apache.hive.service.rpc.thrift.TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V10;
-
-    this.sessionHandle = null;
-    this.configuration = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  /**
-   * 
-   * @see TProtocolVersion
-   */
-  public TProtocolVersion getServerProtocolVersion() {
-    return this.serverProtocolVersion;
-  }
-
-  /**
-   * 
-   * @see TProtocolVersion
-   */
-  public void setServerProtocolVersion(TProtocolVersion serverProtocolVersion) {
-    this.serverProtocolVersion = serverProtocolVersion;
-  }
-
-  public void unsetServerProtocolVersion() {
-    this.serverProtocolVersion = null;
-  }
-
-  /** Returns true if field serverProtocolVersion is set (has been assigned a value) and false otherwise */
-  public boolean isSetServerProtocolVersion() {
-    return this.serverProtocolVersion != null;
-  }
-
-  public void setServerProtocolVersionIsSet(boolean value) {
-    if (!value) {
-      this.serverProtocolVersion = null;
-    }
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public int getConfigurationSize() {
-    return (this.configuration == null) ? 0 : this.configuration.size();
-  }
-
-  public void putToConfiguration(String key, String val) {
-    if (this.configuration == null) {
-      this.configuration = new HashMap<String,String>();
-    }
-    this.configuration.put(key, val);
-  }
-
-  public Map<String,String> getConfiguration() {
-    return this.configuration;
-  }
-
-  public void setConfiguration(Map<String,String> configuration) {
-    this.configuration = configuration;
-  }
-
-  public void unsetConfiguration() {
-    this.configuration = null;
-  }
-
-  /** Returns true if field configuration is set (has been assigned a value) and false otherwise */
-  public boolean isSetConfiguration() {
-    return this.configuration != null;
-  }
-
-  public void setConfigurationIsSet(boolean value) {
-    if (!value) {
-      this.configuration = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    case SERVER_PROTOCOL_VERSION:
-      if (value == null) {
-        unsetServerProtocolVersion();
-      } else {
-        setServerProtocolVersion((TProtocolVersion)value);
-      }
-      break;
-
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    case CONFIGURATION:
-      if (value == null) {
-        unsetConfiguration();
-      } else {
-        setConfiguration((Map<String,String>)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    case SERVER_PROTOCOL_VERSION:
-      return getServerProtocolVersion();
-
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    case CONFIGURATION:
-      return getConfiguration();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    case SERVER_PROTOCOL_VERSION:
-      return isSetServerProtocolVersion();
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    case CONFIGURATION:
-      return isSetConfiguration();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TOpenSessionResp)
-      return this.equals((TOpenSessionResp)that);
-    return false;
-  }
-
-  public boolean equals(TOpenSessionResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_serverProtocolVersion = true && this.isSetServerProtocolVersion();
-    boolean that_present_serverProtocolVersion = true && that.isSetServerProtocolVersion();
-    if (this_present_serverProtocolVersion || that_present_serverProtocolVersion) {
-      if (!(this_present_serverProtocolVersion && that_present_serverProtocolVersion))
-        return false;
-      if (!this.serverProtocolVersion.equals(that.serverProtocolVersion))
-        return false;
-    }
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    boolean this_present_configuration = true && this.isSetConfiguration();
-    boolean that_present_configuration = true && that.isSetConfiguration();
-    if (this_present_configuration || that_present_configuration) {
-      if (!(this_present_configuration && that_present_configuration))
-        return false;
-      if (!this.configuration.equals(that.configuration))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_status = true && (isSetStatus());
-    list.add(present_status);
-    if (present_status)
-      list.add(status);
-
-    boolean present_serverProtocolVersion = true && (isSetServerProtocolVersion());
-    list.add(present_serverProtocolVersion);
-    if (present_serverProtocolVersion)
-      list.add(serverProtocolVersion.getValue());
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    list.add(present_sessionHandle);
-    if (present_sessionHandle)
-      list.add(sessionHandle);
-
-    boolean present_configuration = true && (isSetConfiguration());
-    list.add(present_configuration);
-    if (present_configuration)
-      list.add(configuration);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TOpenSessionResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetServerProtocolVersion()).compareTo(other.isSetServerProtocolVersion());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetServerProtocolVersion()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.serverProtocolVersion, other.serverProtocolVersion);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetConfiguration()).compareTo(other.isSetConfiguration());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetConfiguration()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.configuration, other.configuration);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TOpenSessionResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("serverProtocolVersion:");
-    if (this.serverProtocolVersion == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.serverProtocolVersion);
-    }
-    first = false;
-    if (isSetSessionHandle()) {
-      if (!first) sb.append(", ");
-      sb.append("sessionHandle:");
-      if (this.sessionHandle == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.sessionHandle);
-      }
-      first = false;
-    }
-    if (isSetConfiguration()) {
-      if (!first) sb.append(", ");
-      sb.append("configuration:");
-      if (this.configuration == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.configuration);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    if (!isSetServerProtocolVersion()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'serverProtocolVersion' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TOpenSessionRespStandardSchemeFactory implements SchemeFactory {
-    public TOpenSessionRespStandardScheme getScheme() {
-      return new TOpenSessionRespStandardScheme();
-    }
-  }
-
-  private static class TOpenSessionRespStandardScheme extends StandardScheme<TOpenSessionResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TOpenSessionResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // SERVER_PROTOCOL_VERSION
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.serverProtocolVersion = org.apache.hive.service.rpc.thrift.TProtocolVersion.findByValue(iprot.readI32());
-              struct.setServerProtocolVersionIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 4: // CONFIGURATION
-            if (schemeField.type == org.apache.thrift.protocol.TType.MAP) {
-              {
-                org.apache.thrift.protocol.TMap _map152 = iprot.readMapBegin();
-                struct.configuration = new HashMap<String,String>(2*_map152.size);
-                String _key153;
-                String _val154;
-                for (int _i155 = 0; _i155 < _map152.size; ++_i155)
-                {
-                  _key153 = iprot.readString();
-                  _val154 = iprot.readString();
-                  struct.configuration.put(_key153, _val154);
-                }
-                iprot.readMapEnd();
-              }
-              struct.setConfigurationIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TOpenSessionResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.serverProtocolVersion != null) {
-        oprot.writeFieldBegin(SERVER_PROTOCOL_VERSION_FIELD_DESC);
-        oprot.writeI32(struct.serverProtocolVersion.getValue());
-        oprot.writeFieldEnd();
-      }
-      if (struct.sessionHandle != null) {
-        if (struct.isSetSessionHandle()) {
-          oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-          struct.sessionHandle.write(oprot);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.configuration != null) {
-        if (struct.isSetConfiguration()) {
-          oprot.writeFieldBegin(CONFIGURATION_FIELD_DESC);
-          {
-            oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, struct.configuration.size()));
-            for (Map.Entry<String, String> _iter156 : struct.configuration.entrySet())
-            {
-              oprot.writeString(_iter156.getKey());
-              oprot.writeString(_iter156.getValue());
-            }
-            oprot.writeMapEnd();
-          }
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TOpenSessionRespTupleSchemeFactory implements SchemeFactory {
-    public TOpenSessionRespTupleScheme getScheme() {
-      return new TOpenSessionRespTupleScheme();
-    }
-  }
-
-  private static class TOpenSessionRespTupleScheme extends TupleScheme<TOpenSessionResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TOpenSessionResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-      oprot.writeI32(struct.serverProtocolVersion.getValue());
-      BitSet optionals = new BitSet();
-      if (struct.isSetSessionHandle()) {
-        optionals.set(0);
-      }
-      if (struct.isSetConfiguration()) {
-        optionals.set(1);
-      }
-      oprot.writeBitSet(optionals, 2);
-      if (struct.isSetSessionHandle()) {
-        struct.sessionHandle.write(oprot);
-      }
-      if (struct.isSetConfiguration()) {
-        {
-          oprot.writeI32(struct.configuration.size());
-          for (Map.Entry<String, String> _iter157 : struct.configuration.entrySet())
-          {
-            oprot.writeString(_iter157.getKey());
-            oprot.writeString(_iter157.getValue());
-          }
-        }
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TOpenSessionResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-      struct.serverProtocolVersion = org.apache.hive.service.rpc.thrift.TProtocolVersion.findByValue(iprot.readI32());
-      struct.setServerProtocolVersionIsSet(true);
-      BitSet incoming = iprot.readBitSet(2);
-      if (incoming.get(0)) {
-        struct.sessionHandle = new TSessionHandle();
-        struct.sessionHandle.read(iprot);
-        struct.setSessionHandleIsSet(true);
-      }
-      if (incoming.get(1)) {
-        {
-          org.apache.thrift.protocol.TMap _map158 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRING, iprot.readI32());
-          struct.configuration = new HashMap<String,String>(2*_map158.size);
-          String _key159;
-          String _val160;
-          for (int _i161 = 0; _i161 < _map158.size; ++_i161)
-          {
-            _key159 = iprot.readString();
-            _val160 = iprot.readString();
-            struct.configuration.put(_key159, _val160);
-          }
-        }
-        struct.setConfigurationIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationHandle.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationHandle.java
deleted file mode 100644
index 9eaf2be3ed5ea..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationHandle.java
+++ /dev/null
@@ -1,709 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TOperationHandle implements org.apache.thrift.TBase<TOperationHandle, TOperationHandle._Fields>, java.io.Serializable, Cloneable, Comparable<TOperationHandle> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TOperationHandle");
-
-  private static final org.apache.thrift.protocol.TField OPERATION_ID_FIELD_DESC = new org.apache.thrift.protocol.TField("operationId", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField OPERATION_TYPE_FIELD_DESC = new org.apache.thrift.protocol.TField("operationType", org.apache.thrift.protocol.TType.I32, (short)2);
-  private static final org.apache.thrift.protocol.TField HAS_RESULT_SET_FIELD_DESC = new org.apache.thrift.protocol.TField("hasResultSet", org.apache.thrift.protocol.TType.BOOL, (short)3);
-  private static final org.apache.thrift.protocol.TField MODIFIED_ROW_COUNT_FIELD_DESC = new org.apache.thrift.protocol.TField("modifiedRowCount", org.apache.thrift.protocol.TType.DOUBLE, (short)4);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TOperationHandleStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TOperationHandleTupleSchemeFactory());
-  }
-
-  private THandleIdentifier operationId; // required
-  private TOperationType operationType; // required
-  private boolean hasResultSet; // required
-  private double modifiedRowCount; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    OPERATION_ID((short)1, "operationId"),
-    /**
-     * 
-     * @see TOperationType
-     */
-    OPERATION_TYPE((short)2, "operationType"),
-    HAS_RESULT_SET((short)3, "hasResultSet"),
-    MODIFIED_ROW_COUNT((short)4, "modifiedRowCount");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // OPERATION_ID
-          return OPERATION_ID;
-        case 2: // OPERATION_TYPE
-          return OPERATION_TYPE;
-        case 3: // HAS_RESULT_SET
-          return HAS_RESULT_SET;
-        case 4: // MODIFIED_ROW_COUNT
-          return MODIFIED_ROW_COUNT;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __HASRESULTSET_ISSET_ID = 0;
-  private static final int __MODIFIEDROWCOUNT_ISSET_ID = 1;
-  private byte __isset_bitfield = 0;
-  private static final _Fields optionals[] = {_Fields.MODIFIED_ROW_COUNT};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.OPERATION_ID, new org.apache.thrift.meta_data.FieldMetaData("operationId", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, THandleIdentifier.class)));
-    tmpMap.put(_Fields.OPERATION_TYPE, new org.apache.thrift.meta_data.FieldMetaData("operationType", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TOperationType.class)));
-    tmpMap.put(_Fields.HAS_RESULT_SET, new org.apache.thrift.meta_data.FieldMetaData("hasResultSet", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.BOOL)));
-    tmpMap.put(_Fields.MODIFIED_ROW_COUNT, new org.apache.thrift.meta_data.FieldMetaData("modifiedRowCount", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.DOUBLE)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TOperationHandle.class, metaDataMap);
-  }
-
-  public TOperationHandle() {
-  }
-
-  public TOperationHandle(
-    THandleIdentifier operationId,
-    TOperationType operationType,
-    boolean hasResultSet)
-  {
-    this();
-    this.operationId = operationId;
-    this.operationType = operationType;
-    this.hasResultSet = hasResultSet;
-    setHasResultSetIsSet(true);
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TOperationHandle(TOperationHandle other) {
-    __isset_bitfield = other.__isset_bitfield;
-    if (other.isSetOperationId()) {
-      this.operationId = new THandleIdentifier(other.operationId);
-    }
-    if (other.isSetOperationType()) {
-      this.operationType = other.operationType;
-    }
-    this.hasResultSet = other.hasResultSet;
-    this.modifiedRowCount = other.modifiedRowCount;
-  }
-
-  public TOperationHandle deepCopy() {
-    return new TOperationHandle(this);
-  }
-
-  @Override
-  public void clear() {
-    this.operationId = null;
-    this.operationType = null;
-    setHasResultSetIsSet(false);
-    this.hasResultSet = false;
-    setModifiedRowCountIsSet(false);
-    this.modifiedRowCount = 0.0;
-  }
-
-  public THandleIdentifier getOperationId() {
-    return this.operationId;
-  }
-
-  public void setOperationId(THandleIdentifier operationId) {
-    this.operationId = operationId;
-  }
-
-  public void unsetOperationId() {
-    this.operationId = null;
-  }
-
-  /** Returns true if field operationId is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationId() {
-    return this.operationId != null;
-  }
-
-  public void setOperationIdIsSet(boolean value) {
-    if (!value) {
-      this.operationId = null;
-    }
-  }
-
-  /**
-   * 
-   * @see TOperationType
-   */
-  public TOperationType getOperationType() {
-    return this.operationType;
-  }
-
-  /**
-   * 
-   * @see TOperationType
-   */
-  public void setOperationType(TOperationType operationType) {
-    this.operationType = operationType;
-  }
-
-  public void unsetOperationType() {
-    this.operationType = null;
-  }
-
-  /** Returns true if field operationType is set (has been assigned a value) and false otherwise */
-  public boolean isSetOperationType() {
-    return this.operationType != null;
-  }
-
-  public void setOperationTypeIsSet(boolean value) {
-    if (!value) {
-      this.operationType = null;
-    }
-  }
-
-  public boolean isHasResultSet() {
-    return this.hasResultSet;
-  }
-
-  public void setHasResultSet(boolean hasResultSet) {
-    this.hasResultSet = hasResultSet;
-    setHasResultSetIsSet(true);
-  }
-
-  public void unsetHasResultSet() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __HASRESULTSET_ISSET_ID);
-  }
-
-  /** Returns true if field hasResultSet is set (has been assigned a value) and false otherwise */
-  public boolean isSetHasResultSet() {
-    return EncodingUtils.testBit(__isset_bitfield, __HASRESULTSET_ISSET_ID);
-  }
-
-  public void setHasResultSetIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __HASRESULTSET_ISSET_ID, value);
-  }
-
-  public double getModifiedRowCount() {
-    return this.modifiedRowCount;
-  }
-
-  public void setModifiedRowCount(double modifiedRowCount) {
-    this.modifiedRowCount = modifiedRowCount;
-    setModifiedRowCountIsSet(true);
-  }
-
-  public void unsetModifiedRowCount() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __MODIFIEDROWCOUNT_ISSET_ID);
-  }
-
-  /** Returns true if field modifiedRowCount is set (has been assigned a value) and false otherwise */
-  public boolean isSetModifiedRowCount() {
-    return EncodingUtils.testBit(__isset_bitfield, __MODIFIEDROWCOUNT_ISSET_ID);
-  }
-
-  public void setModifiedRowCountIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __MODIFIEDROWCOUNT_ISSET_ID, value);
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case OPERATION_ID:
-      if (value == null) {
-        unsetOperationId();
-      } else {
-        setOperationId((THandleIdentifier)value);
-      }
-      break;
-
-    case OPERATION_TYPE:
-      if (value == null) {
-        unsetOperationType();
-      } else {
-        setOperationType((TOperationType)value);
-      }
-      break;
-
-    case HAS_RESULT_SET:
-      if (value == null) {
-        unsetHasResultSet();
-      } else {
-        setHasResultSet((Boolean)value);
-      }
-      break;
-
-    case MODIFIED_ROW_COUNT:
-      if (value == null) {
-        unsetModifiedRowCount();
-      } else {
-        setModifiedRowCount((Double)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case OPERATION_ID:
-      return getOperationId();
-
-    case OPERATION_TYPE:
-      return getOperationType();
-
-    case HAS_RESULT_SET:
-      return isHasResultSet();
-
-    case MODIFIED_ROW_COUNT:
-      return getModifiedRowCount();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case OPERATION_ID:
-      return isSetOperationId();
-    case OPERATION_TYPE:
-      return isSetOperationType();
-    case HAS_RESULT_SET:
-      return isSetHasResultSet();
-    case MODIFIED_ROW_COUNT:
-      return isSetModifiedRowCount();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TOperationHandle)
-      return this.equals((TOperationHandle)that);
-    return false;
-  }
-
-  public boolean equals(TOperationHandle that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_operationId = true && this.isSetOperationId();
-    boolean that_present_operationId = true && that.isSetOperationId();
-    if (this_present_operationId || that_present_operationId) {
-      if (!(this_present_operationId && that_present_operationId))
-        return false;
-      if (!this.operationId.equals(that.operationId))
-        return false;
-    }
-
-    boolean this_present_operationType = true && this.isSetOperationType();
-    boolean that_present_operationType = true && that.isSetOperationType();
-    if (this_present_operationType || that_present_operationType) {
-      if (!(this_present_operationType && that_present_operationType))
-        return false;
-      if (!this.operationType.equals(that.operationType))
-        return false;
-    }
-
-    boolean this_present_hasResultSet = true;
-    boolean that_present_hasResultSet = true;
-    if (this_present_hasResultSet || that_present_hasResultSet) {
-      if (!(this_present_hasResultSet && that_present_hasResultSet))
-        return false;
-      if (this.hasResultSet != that.hasResultSet)
-        return false;
-    }
-
-    boolean this_present_modifiedRowCount = true && this.isSetModifiedRowCount();
-    boolean that_present_modifiedRowCount = true && that.isSetModifiedRowCount();
-    if (this_present_modifiedRowCount || that_present_modifiedRowCount) {
-      if (!(this_present_modifiedRowCount && that_present_modifiedRowCount))
-        return false;
-      if (this.modifiedRowCount != that.modifiedRowCount)
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_operationId = true && (isSetOperationId());
-    list.add(present_operationId);
-    if (present_operationId)
-      list.add(operationId);
-
-    boolean present_operationType = true && (isSetOperationType());
-    list.add(present_operationType);
-    if (present_operationType)
-      list.add(operationType.getValue());
-
-    boolean present_hasResultSet = true;
-    list.add(present_hasResultSet);
-    if (present_hasResultSet)
-      list.add(hasResultSet);
-
-    boolean present_modifiedRowCount = true && (isSetModifiedRowCount());
-    list.add(present_modifiedRowCount);
-    if (present_modifiedRowCount)
-      list.add(modifiedRowCount);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TOperationHandle other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetOperationId()).compareTo(other.isSetOperationId());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationId()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationId, other.operationId);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetOperationType()).compareTo(other.isSetOperationType());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetOperationType()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.operationType, other.operationType);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetHasResultSet()).compareTo(other.isSetHasResultSet());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetHasResultSet()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.hasResultSet, other.hasResultSet);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetModifiedRowCount()).compareTo(other.isSetModifiedRowCount());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetModifiedRowCount()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.modifiedRowCount, other.modifiedRowCount);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TOperationHandle(");
-    boolean first = true;
-
-    sb.append("operationId:");
-    if (this.operationId == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.operationId);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("operationType:");
-    if (this.operationType == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.operationType);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("hasResultSet:");
-    sb.append(this.hasResultSet);
-    first = false;
-    if (isSetModifiedRowCount()) {
-      if (!first) sb.append(", ");
-      sb.append("modifiedRowCount:");
-      sb.append(this.modifiedRowCount);
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetOperationId()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationId' is unset! Struct:" + toString());
-    }
-
-    if (!isSetOperationType()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'operationType' is unset! Struct:" + toString());
-    }
-
-    if (!isSetHasResultSet()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'hasResultSet' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (operationId != null) {
-      operationId.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TOperationHandleStandardSchemeFactory implements SchemeFactory {
-    public TOperationHandleStandardScheme getScheme() {
-      return new TOperationHandleStandardScheme();
-    }
-  }
-
-  private static class TOperationHandleStandardScheme extends StandardScheme<TOperationHandle> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TOperationHandle struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // OPERATION_ID
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.operationId = new THandleIdentifier();
-              struct.operationId.read(iprot);
-              struct.setOperationIdIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // OPERATION_TYPE
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.operationType = org.apache.hive.service.rpc.thrift.TOperationType.findByValue(iprot.readI32());
-              struct.setOperationTypeIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // HAS_RESULT_SET
-            if (schemeField.type == org.apache.thrift.protocol.TType.BOOL) {
-              struct.hasResultSet = iprot.readBool();
-              struct.setHasResultSetIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 4: // MODIFIED_ROW_COUNT
-            if (schemeField.type == org.apache.thrift.protocol.TType.DOUBLE) {
-              struct.modifiedRowCount = iprot.readDouble();
-              struct.setModifiedRowCountIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TOperationHandle struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.operationId != null) {
-        oprot.writeFieldBegin(OPERATION_ID_FIELD_DESC);
-        struct.operationId.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.operationType != null) {
-        oprot.writeFieldBegin(OPERATION_TYPE_FIELD_DESC);
-        oprot.writeI32(struct.operationType.getValue());
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldBegin(HAS_RESULT_SET_FIELD_DESC);
-      oprot.writeBool(struct.hasResultSet);
-      oprot.writeFieldEnd();
-      if (struct.isSetModifiedRowCount()) {
-        oprot.writeFieldBegin(MODIFIED_ROW_COUNT_FIELD_DESC);
-        oprot.writeDouble(struct.modifiedRowCount);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TOperationHandleTupleSchemeFactory implements SchemeFactory {
-    public TOperationHandleTupleScheme getScheme() {
-      return new TOperationHandleTupleScheme();
-    }
-  }
-
-  private static class TOperationHandleTupleScheme extends TupleScheme<TOperationHandle> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TOperationHandle struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.operationId.write(oprot);
-      oprot.writeI32(struct.operationType.getValue());
-      oprot.writeBool(struct.hasResultSet);
-      BitSet optionals = new BitSet();
-      if (struct.isSetModifiedRowCount()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetModifiedRowCount()) {
-        oprot.writeDouble(struct.modifiedRowCount);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TOperationHandle struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.operationId = new THandleIdentifier();
-      struct.operationId.read(iprot);
-      struct.setOperationIdIsSet(true);
-      struct.operationType = org.apache.hive.service.rpc.thrift.TOperationType.findByValue(iprot.readI32());
-      struct.setOperationTypeIsSet(true);
-      struct.hasResultSet = iprot.readBool();
-      struct.setHasResultSetIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.modifiedRowCount = iprot.readDouble();
-        struct.setModifiedRowCountIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationState.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationState.java
deleted file mode 100644
index 4390b4b887583..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationState.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-
-import java.util.Map;
-import java.util.HashMap;
-import org.apache.thrift.TEnum;
-
-public enum TOperationState implements org.apache.thrift.TEnum {
-  INITIALIZED_STATE(0),
-  RUNNING_STATE(1),
-  FINISHED_STATE(2),
-  CANCELED_STATE(3),
-  CLOSED_STATE(4),
-  ERROR_STATE(5),
-  UKNOWN_STATE(6),
-  PENDING_STATE(7),
-  TIMEDOUT_STATE(8);
-
-  private final int value;
-
-  private TOperationState(int value) {
-    this.value = value;
-  }
-
-  /**
-   * Get the integer value of this enum value, as defined in the Thrift IDL.
-   */
-  public int getValue() {
-    return value;
-  }
-
-  /**
-   * Find a the enum type by its integer value, as defined in the Thrift IDL.
-   * @return null if the value is not found.
-   */
-  public static TOperationState findByValue(int value) { 
-    switch (value) {
-      case 0:
-        return INITIALIZED_STATE;
-      case 1:
-        return RUNNING_STATE;
-      case 2:
-        return FINISHED_STATE;
-      case 3:
-        return CANCELED_STATE;
-      case 4:
-        return CLOSED_STATE;
-      case 5:
-        return ERROR_STATE;
-      case 6:
-        return UKNOWN_STATE;
-      case 7:
-        return PENDING_STATE;
-      case 8:
-        return TIMEDOUT_STATE;
-      default:
-        return null;
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationType.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationType.java
deleted file mode 100644
index 08002ad1dc8e8..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TOperationType.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-
-import java.util.Map;
-import java.util.HashMap;
-import org.apache.thrift.TEnum;
-
-public enum TOperationType implements org.apache.thrift.TEnum {
-  EXECUTE_STATEMENT(0),
-  GET_TYPE_INFO(1),
-  GET_CATALOGS(2),
-  GET_SCHEMAS(3),
-  GET_TABLES(4),
-  GET_TABLE_TYPES(5),
-  GET_COLUMNS(6),
-  GET_FUNCTIONS(7),
-  UNKNOWN(8);
-
-  private final int value;
-
-  private TOperationType(int value) {
-    this.value = value;
-  }
-
-  /**
-   * Get the integer value of this enum value, as defined in the Thrift IDL.
-   */
-  public int getValue() {
-    return value;
-  }
-
-  /**
-   * Find a the enum type by its integer value, as defined in the Thrift IDL.
-   * @return null if the value is not found.
-   */
-  public static TOperationType findByValue(int value) { 
-    switch (value) {
-      case 0:
-        return EXECUTE_STATEMENT;
-      case 1:
-        return GET_TYPE_INFO;
-      case 2:
-        return GET_CATALOGS;
-      case 3:
-        return GET_SCHEMAS;
-      case 4:
-        return GET_TABLES;
-      case 5:
-        return GET_TABLE_TYPES;
-      case 6:
-        return GET_COLUMNS;
-      case 7:
-        return GET_FUNCTIONS;
-      case 8:
-        return UNKNOWN;
-      default:
-        return null;
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TPrimitiveTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TPrimitiveTypeEntry.java
deleted file mode 100644
index 910c90967f614..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TPrimitiveTypeEntry.java
+++ /dev/null
@@ -1,516 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TPrimitiveTypeEntry implements org.apache.thrift.TBase<TPrimitiveTypeEntry, TPrimitiveTypeEntry._Fields>, java.io.Serializable, Cloneable, Comparable<TPrimitiveTypeEntry> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TPrimitiveTypeEntry");
-
-  private static final org.apache.thrift.protocol.TField TYPE_FIELD_DESC = new org.apache.thrift.protocol.TField("type", org.apache.thrift.protocol.TType.I32, (short)1);
-  private static final org.apache.thrift.protocol.TField TYPE_QUALIFIERS_FIELD_DESC = new org.apache.thrift.protocol.TField("typeQualifiers", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TPrimitiveTypeEntryStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TPrimitiveTypeEntryTupleSchemeFactory());
-  }
-
-  private TTypeId type; // required
-  private TTypeQualifiers typeQualifiers; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    /**
-     * 
-     * @see TTypeId
-     */
-    TYPE((short)1, "type"),
-    TYPE_QUALIFIERS((short)2, "typeQualifiers");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // TYPE
-          return TYPE;
-        case 2: // TYPE_QUALIFIERS
-          return TYPE_QUALIFIERS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final _Fields optionals[] = {_Fields.TYPE_QUALIFIERS};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.TYPE, new org.apache.thrift.meta_data.FieldMetaData("type", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TTypeId.class)));
-    tmpMap.put(_Fields.TYPE_QUALIFIERS, new org.apache.thrift.meta_data.FieldMetaData("typeQualifiers", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TTypeQualifiers.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TPrimitiveTypeEntry.class, metaDataMap);
-  }
-
-  public TPrimitiveTypeEntry() {
-  }
-
-  public TPrimitiveTypeEntry(
-    TTypeId type)
-  {
-    this();
-    this.type = type;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TPrimitiveTypeEntry(TPrimitiveTypeEntry other) {
-    if (other.isSetType()) {
-      this.type = other.type;
-    }
-    if (other.isSetTypeQualifiers()) {
-      this.typeQualifiers = new TTypeQualifiers(other.typeQualifiers);
-    }
-  }
-
-  public TPrimitiveTypeEntry deepCopy() {
-    return new TPrimitiveTypeEntry(this);
-  }
-
-  @Override
-  public void clear() {
-    this.type = null;
-    this.typeQualifiers = null;
-  }
-
-  /**
-   * 
-   * @see TTypeId
-   */
-  public TTypeId getType() {
-    return this.type;
-  }
-
-  /**
-   * 
-   * @see TTypeId
-   */
-  public void setType(TTypeId type) {
-    this.type = type;
-  }
-
-  public void unsetType() {
-    this.type = null;
-  }
-
-  /** Returns true if field type is set (has been assigned a value) and false otherwise */
-  public boolean isSetType() {
-    return this.type != null;
-  }
-
-  public void setTypeIsSet(boolean value) {
-    if (!value) {
-      this.type = null;
-    }
-  }
-
-  public TTypeQualifiers getTypeQualifiers() {
-    return this.typeQualifiers;
-  }
-
-  public void setTypeQualifiers(TTypeQualifiers typeQualifiers) {
-    this.typeQualifiers = typeQualifiers;
-  }
-
-  public void unsetTypeQualifiers() {
-    this.typeQualifiers = null;
-  }
-
-  /** Returns true if field typeQualifiers is set (has been assigned a value) and false otherwise */
-  public boolean isSetTypeQualifiers() {
-    return this.typeQualifiers != null;
-  }
-
-  public void setTypeQualifiersIsSet(boolean value) {
-    if (!value) {
-      this.typeQualifiers = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case TYPE:
-      if (value == null) {
-        unsetType();
-      } else {
-        setType((TTypeId)value);
-      }
-      break;
-
-    case TYPE_QUALIFIERS:
-      if (value == null) {
-        unsetTypeQualifiers();
-      } else {
-        setTypeQualifiers((TTypeQualifiers)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case TYPE:
-      return getType();
-
-    case TYPE_QUALIFIERS:
-      return getTypeQualifiers();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case TYPE:
-      return isSetType();
-    case TYPE_QUALIFIERS:
-      return isSetTypeQualifiers();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TPrimitiveTypeEntry)
-      return this.equals((TPrimitiveTypeEntry)that);
-    return false;
-  }
-
-  public boolean equals(TPrimitiveTypeEntry that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_type = true && this.isSetType();
-    boolean that_present_type = true && that.isSetType();
-    if (this_present_type || that_present_type) {
-      if (!(this_present_type && that_present_type))
-        return false;
-      if (!this.type.equals(that.type))
-        return false;
-    }
-
-    boolean this_present_typeQualifiers = true && this.isSetTypeQualifiers();
-    boolean that_present_typeQualifiers = true && that.isSetTypeQualifiers();
-    if (this_present_typeQualifiers || that_present_typeQualifiers) {
-      if (!(this_present_typeQualifiers && that_present_typeQualifiers))
-        return false;
-      if (!this.typeQualifiers.equals(that.typeQualifiers))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_type = true && (isSetType());
-    list.add(present_type);
-    if (present_type)
-      list.add(type.getValue());
-
-    boolean present_typeQualifiers = true && (isSetTypeQualifiers());
-    list.add(present_typeQualifiers);
-    if (present_typeQualifiers)
-      list.add(typeQualifiers);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TPrimitiveTypeEntry other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetType()).compareTo(other.isSetType());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetType()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.type, other.type);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetTypeQualifiers()).compareTo(other.isSetTypeQualifiers());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetTypeQualifiers()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.typeQualifiers, other.typeQualifiers);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TPrimitiveTypeEntry(");
-    boolean first = true;
-
-    sb.append("type:");
-    if (this.type == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.type);
-    }
-    first = false;
-    if (isSetTypeQualifiers()) {
-      if (!first) sb.append(", ");
-      sb.append("typeQualifiers:");
-      if (this.typeQualifiers == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.typeQualifiers);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetType()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'type' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (typeQualifiers != null) {
-      typeQualifiers.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TPrimitiveTypeEntryStandardSchemeFactory implements SchemeFactory {
-    public TPrimitiveTypeEntryStandardScheme getScheme() {
-      return new TPrimitiveTypeEntryStandardScheme();
-    }
-  }
-
-  private static class TPrimitiveTypeEntryStandardScheme extends StandardScheme<TPrimitiveTypeEntry> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TPrimitiveTypeEntry struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // TYPE
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.type = org.apache.hive.service.rpc.thrift.TTypeId.findByValue(iprot.readI32());
-              struct.setTypeIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // TYPE_QUALIFIERS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.typeQualifiers = new TTypeQualifiers();
-              struct.typeQualifiers.read(iprot);
-              struct.setTypeQualifiersIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TPrimitiveTypeEntry struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.type != null) {
-        oprot.writeFieldBegin(TYPE_FIELD_DESC);
-        oprot.writeI32(struct.type.getValue());
-        oprot.writeFieldEnd();
-      }
-      if (struct.typeQualifiers != null) {
-        if (struct.isSetTypeQualifiers()) {
-          oprot.writeFieldBegin(TYPE_QUALIFIERS_FIELD_DESC);
-          struct.typeQualifiers.write(oprot);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TPrimitiveTypeEntryTupleSchemeFactory implements SchemeFactory {
-    public TPrimitiveTypeEntryTupleScheme getScheme() {
-      return new TPrimitiveTypeEntryTupleScheme();
-    }
-  }
-
-  private static class TPrimitiveTypeEntryTupleScheme extends TupleScheme<TPrimitiveTypeEntry> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TPrimitiveTypeEntry struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      oprot.writeI32(struct.type.getValue());
-      BitSet optionals = new BitSet();
-      if (struct.isSetTypeQualifiers()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetTypeQualifiers()) {
-        struct.typeQualifiers.write(oprot);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TPrimitiveTypeEntry struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.type = org.apache.hive.service.rpc.thrift.TTypeId.findByValue(iprot.readI32());
-      struct.setTypeIsSet(true);
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.typeQualifiers = new TTypeQualifiers();
-        struct.typeQualifiers.read(iprot);
-        struct.setTypeQualifiersIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TProgressUpdateResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TProgressUpdateResp.java
deleted file mode 100644
index ecc413aad4cdc..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TProgressUpdateResp.java
+++ /dev/null
@@ -1,1033 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TProgressUpdateResp implements org.apache.thrift.TBase<TProgressUpdateResp, TProgressUpdateResp._Fields>, java.io.Serializable, Cloneable, Comparable<TProgressUpdateResp> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TProgressUpdateResp");
-
-  private static final org.apache.thrift.protocol.TField HEADER_NAMES_FIELD_DESC = new org.apache.thrift.protocol.TField("headerNames", org.apache.thrift.protocol.TType.LIST, (short)1);
-  private static final org.apache.thrift.protocol.TField ROWS_FIELD_DESC = new org.apache.thrift.protocol.TField("rows", org.apache.thrift.protocol.TType.LIST, (short)2);
-  private static final org.apache.thrift.protocol.TField PROGRESSED_PERCENTAGE_FIELD_DESC = new org.apache.thrift.protocol.TField("progressedPercentage", org.apache.thrift.protocol.TType.DOUBLE, (short)3);
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.I32, (short)4);
-  private static final org.apache.thrift.protocol.TField FOOTER_SUMMARY_FIELD_DESC = new org.apache.thrift.protocol.TField("footerSummary", org.apache.thrift.protocol.TType.STRING, (short)5);
-  private static final org.apache.thrift.protocol.TField START_TIME_FIELD_DESC = new org.apache.thrift.protocol.TField("startTime", org.apache.thrift.protocol.TType.I64, (short)6);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TProgressUpdateRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TProgressUpdateRespTupleSchemeFactory());
-  }
-
-  private List<String> headerNames; // required
-  private List<List<String>> rows; // required
-  private double progressedPercentage; // required
-  private TJobExecutionStatus status; // required
-  private String footerSummary; // required
-  private long startTime; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    HEADER_NAMES((short)1, "headerNames"),
-    ROWS((short)2, "rows"),
-    PROGRESSED_PERCENTAGE((short)3, "progressedPercentage"),
-    /**
-     * 
-     * @see TJobExecutionStatus
-     */
-    STATUS((short)4, "status"),
-    FOOTER_SUMMARY((short)5, "footerSummary"),
-    START_TIME((short)6, "startTime");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // HEADER_NAMES
-          return HEADER_NAMES;
-        case 2: // ROWS
-          return ROWS;
-        case 3: // PROGRESSED_PERCENTAGE
-          return PROGRESSED_PERCENTAGE;
-        case 4: // STATUS
-          return STATUS;
-        case 5: // FOOTER_SUMMARY
-          return FOOTER_SUMMARY;
-        case 6: // START_TIME
-          return START_TIME;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __PROGRESSEDPERCENTAGE_ISSET_ID = 0;
-  private static final int __STARTTIME_ISSET_ID = 1;
-  private byte __isset_bitfield = 0;
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.HEADER_NAMES, new org.apache.thrift.meta_data.FieldMetaData("headerNames", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))));
-    tmpMap.put(_Fields.ROWS, new org.apache.thrift.meta_data.FieldMetaData("rows", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-                new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)))));
-    tmpMap.put(_Fields.PROGRESSED_PERCENTAGE, new org.apache.thrift.meta_data.FieldMetaData("progressedPercentage", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.DOUBLE)));
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TJobExecutionStatus.class)));
-    tmpMap.put(_Fields.FOOTER_SUMMARY, new org.apache.thrift.meta_data.FieldMetaData("footerSummary", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    tmpMap.put(_Fields.START_TIME, new org.apache.thrift.meta_data.FieldMetaData("startTime", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TProgressUpdateResp.class, metaDataMap);
-  }
-
-  public TProgressUpdateResp() {
-  }
-
-  public TProgressUpdateResp(
-    List<String> headerNames,
-    List<List<String>> rows,
-    double progressedPercentage,
-    TJobExecutionStatus status,
-    String footerSummary,
-    long startTime)
-  {
-    this();
-    this.headerNames = headerNames;
-    this.rows = rows;
-    this.progressedPercentage = progressedPercentage;
-    setProgressedPercentageIsSet(true);
-    this.status = status;
-    this.footerSummary = footerSummary;
-    this.startTime = startTime;
-    setStartTimeIsSet(true);
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TProgressUpdateResp(TProgressUpdateResp other) {
-    __isset_bitfield = other.__isset_bitfield;
-    if (other.isSetHeaderNames()) {
-      List<String> __this__headerNames = new ArrayList<String>(other.headerNames);
-      this.headerNames = __this__headerNames;
-    }
-    if (other.isSetRows()) {
-      List<List<String>> __this__rows = new ArrayList<List<String>>(other.rows.size());
-      for (List<String> other_element : other.rows) {
-        List<String> __this__rows_copy = new ArrayList<String>(other_element);
-        __this__rows.add(__this__rows_copy);
-      }
-      this.rows = __this__rows;
-    }
-    this.progressedPercentage = other.progressedPercentage;
-    if (other.isSetStatus()) {
-      this.status = other.status;
-    }
-    if (other.isSetFooterSummary()) {
-      this.footerSummary = other.footerSummary;
-    }
-    this.startTime = other.startTime;
-  }
-
-  public TProgressUpdateResp deepCopy() {
-    return new TProgressUpdateResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.headerNames = null;
-    this.rows = null;
-    setProgressedPercentageIsSet(false);
-    this.progressedPercentage = 0.0;
-    this.status = null;
-    this.footerSummary = null;
-    setStartTimeIsSet(false);
-    this.startTime = 0;
-  }
-
-  public int getHeaderNamesSize() {
-    return (this.headerNames == null) ? 0 : this.headerNames.size();
-  }
-
-  public java.util.Iterator<String> getHeaderNamesIterator() {
-    return (this.headerNames == null) ? null : this.headerNames.iterator();
-  }
-
-  public void addToHeaderNames(String elem) {
-    if (this.headerNames == null) {
-      this.headerNames = new ArrayList<String>();
-    }
-    this.headerNames.add(elem);
-  }
-
-  public List<String> getHeaderNames() {
-    return this.headerNames;
-  }
-
-  public void setHeaderNames(List<String> headerNames) {
-    this.headerNames = headerNames;
-  }
-
-  public void unsetHeaderNames() {
-    this.headerNames = null;
-  }
-
-  /** Returns true if field headerNames is set (has been assigned a value) and false otherwise */
-  public boolean isSetHeaderNames() {
-    return this.headerNames != null;
-  }
-
-  public void setHeaderNamesIsSet(boolean value) {
-    if (!value) {
-      this.headerNames = null;
-    }
-  }
-
-  public int getRowsSize() {
-    return (this.rows == null) ? 0 : this.rows.size();
-  }
-
-  public java.util.Iterator<List<String>> getRowsIterator() {
-    return (this.rows == null) ? null : this.rows.iterator();
-  }
-
-  public void addToRows(List<String> elem) {
-    if (this.rows == null) {
-      this.rows = new ArrayList<List<String>>();
-    }
-    this.rows.add(elem);
-  }
-
-  public List<List<String>> getRows() {
-    return this.rows;
-  }
-
-  public void setRows(List<List<String>> rows) {
-    this.rows = rows;
-  }
-
-  public void unsetRows() {
-    this.rows = null;
-  }
-
-  /** Returns true if field rows is set (has been assigned a value) and false otherwise */
-  public boolean isSetRows() {
-    return this.rows != null;
-  }
-
-  public void setRowsIsSet(boolean value) {
-    if (!value) {
-      this.rows = null;
-    }
-  }
-
-  public double getProgressedPercentage() {
-    return this.progressedPercentage;
-  }
-
-  public void setProgressedPercentage(double progressedPercentage) {
-    this.progressedPercentage = progressedPercentage;
-    setProgressedPercentageIsSet(true);
-  }
-
-  public void unsetProgressedPercentage() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __PROGRESSEDPERCENTAGE_ISSET_ID);
-  }
-
-  /** Returns true if field progressedPercentage is set (has been assigned a value) and false otherwise */
-  public boolean isSetProgressedPercentage() {
-    return EncodingUtils.testBit(__isset_bitfield, __PROGRESSEDPERCENTAGE_ISSET_ID);
-  }
-
-  public void setProgressedPercentageIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __PROGRESSEDPERCENTAGE_ISSET_ID, value);
-  }
-
-  /**
-   * 
-   * @see TJobExecutionStatus
-   */
-  public TJobExecutionStatus getStatus() {
-    return this.status;
-  }
-
-  /**
-   * 
-   * @see TJobExecutionStatus
-   */
-  public void setStatus(TJobExecutionStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public String getFooterSummary() {
-    return this.footerSummary;
-  }
-
-  public void setFooterSummary(String footerSummary) {
-    this.footerSummary = footerSummary;
-  }
-
-  public void unsetFooterSummary() {
-    this.footerSummary = null;
-  }
-
-  /** Returns true if field footerSummary is set (has been assigned a value) and false otherwise */
-  public boolean isSetFooterSummary() {
-    return this.footerSummary != null;
-  }
-
-  public void setFooterSummaryIsSet(boolean value) {
-    if (!value) {
-      this.footerSummary = null;
-    }
-  }
-
-  public long getStartTime() {
-    return this.startTime;
-  }
-
-  public void setStartTime(long startTime) {
-    this.startTime = startTime;
-    setStartTimeIsSet(true);
-  }
-
-  public void unsetStartTime() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __STARTTIME_ISSET_ID);
-  }
-
-  /** Returns true if field startTime is set (has been assigned a value) and false otherwise */
-  public boolean isSetStartTime() {
-    return EncodingUtils.testBit(__isset_bitfield, __STARTTIME_ISSET_ID);
-  }
-
-  public void setStartTimeIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __STARTTIME_ISSET_ID, value);
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case HEADER_NAMES:
-      if (value == null) {
-        unsetHeaderNames();
-      } else {
-        setHeaderNames((List<String>)value);
-      }
-      break;
-
-    case ROWS:
-      if (value == null) {
-        unsetRows();
-      } else {
-        setRows((List<List<String>>)value);
-      }
-      break;
-
-    case PROGRESSED_PERCENTAGE:
-      if (value == null) {
-        unsetProgressedPercentage();
-      } else {
-        setProgressedPercentage((Double)value);
-      }
-      break;
-
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TJobExecutionStatus)value);
-      }
-      break;
-
-    case FOOTER_SUMMARY:
-      if (value == null) {
-        unsetFooterSummary();
-      } else {
-        setFooterSummary((String)value);
-      }
-      break;
-
-    case START_TIME:
-      if (value == null) {
-        unsetStartTime();
-      } else {
-        setStartTime((Long)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case HEADER_NAMES:
-      return getHeaderNames();
-
-    case ROWS:
-      return getRows();
-
-    case PROGRESSED_PERCENTAGE:
-      return getProgressedPercentage();
-
-    case STATUS:
-      return getStatus();
-
-    case FOOTER_SUMMARY:
-      return getFooterSummary();
-
-    case START_TIME:
-      return getStartTime();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case HEADER_NAMES:
-      return isSetHeaderNames();
-    case ROWS:
-      return isSetRows();
-    case PROGRESSED_PERCENTAGE:
-      return isSetProgressedPercentage();
-    case STATUS:
-      return isSetStatus();
-    case FOOTER_SUMMARY:
-      return isSetFooterSummary();
-    case START_TIME:
-      return isSetStartTime();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TProgressUpdateResp)
-      return this.equals((TProgressUpdateResp)that);
-    return false;
-  }
-
-  public boolean equals(TProgressUpdateResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_headerNames = true && this.isSetHeaderNames();
-    boolean that_present_headerNames = true && that.isSetHeaderNames();
-    if (this_present_headerNames || that_present_headerNames) {
-      if (!(this_present_headerNames && that_present_headerNames))
-        return false;
-      if (!this.headerNames.equals(that.headerNames))
-        return false;
-    }
-
-    boolean this_present_rows = true && this.isSetRows();
-    boolean that_present_rows = true && that.isSetRows();
-    if (this_present_rows || that_present_rows) {
-      if (!(this_present_rows && that_present_rows))
-        return false;
-      if (!this.rows.equals(that.rows))
-        return false;
-    }
-
-    boolean this_present_progressedPercentage = true;
-    boolean that_present_progressedPercentage = true;
-    if (this_present_progressedPercentage || that_present_progressedPercentage) {
-      if (!(this_present_progressedPercentage && that_present_progressedPercentage))
-        return false;
-      if (this.progressedPercentage != that.progressedPercentage)
-        return false;
-    }
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    boolean this_present_footerSummary = true && this.isSetFooterSummary();
-    boolean that_present_footerSummary = true && that.isSetFooterSummary();
-    if (this_present_footerSummary || that_present_footerSummary) {
-      if (!(this_present_footerSummary && that_present_footerSummary))
-        return false;
-      if (!this.footerSummary.equals(that.footerSummary))
-        return false;
-    }
-
-    boolean this_present_startTime = true;
-    boolean that_present_startTime = true;
-    if (this_present_startTime || that_present_startTime) {
-      if (!(this_present_startTime && that_present_startTime))
-        return false;
-      if (this.startTime != that.startTime)
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_headerNames = true && (isSetHeaderNames());
-    list.add(present_headerNames);
-    if (present_headerNames)
-      list.add(headerNames);
-
-    boolean present_rows = true && (isSetRows());
-    list.add(present_rows);
-    if (present_rows)
-      list.add(rows);
-
-    boolean present_progressedPercentage = true;
-    list.add(present_progressedPercentage);
-    if (present_progressedPercentage)
-      list.add(progressedPercentage);
-
-    boolean present_status = true && (isSetStatus());
-    list.add(present_status);
-    if (present_status)
-      list.add(status.getValue());
-
-    boolean present_footerSummary = true && (isSetFooterSummary());
-    list.add(present_footerSummary);
-    if (present_footerSummary)
-      list.add(footerSummary);
-
-    boolean present_startTime = true;
-    list.add(present_startTime);
-    if (present_startTime)
-      list.add(startTime);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TProgressUpdateResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetHeaderNames()).compareTo(other.isSetHeaderNames());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetHeaderNames()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.headerNames, other.headerNames);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetRows()).compareTo(other.isSetRows());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetRows()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.rows, other.rows);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetProgressedPercentage()).compareTo(other.isSetProgressedPercentage());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetProgressedPercentage()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.progressedPercentage, other.progressedPercentage);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetFooterSummary()).compareTo(other.isSetFooterSummary());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetFooterSummary()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.footerSummary, other.footerSummary);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetStartTime()).compareTo(other.isSetStartTime());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStartTime()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.startTime, other.startTime);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TProgressUpdateResp(");
-    boolean first = true;
-
-    sb.append("headerNames:");
-    if (this.headerNames == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.headerNames);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("rows:");
-    if (this.rows == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.rows);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("progressedPercentage:");
-    sb.append(this.progressedPercentage);
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("footerSummary:");
-    if (this.footerSummary == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.footerSummary);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("startTime:");
-    sb.append(this.startTime);
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetHeaderNames()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'headerNames' is unset! Struct:" + toString());
-    }
-
-    if (!isSetRows()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'rows' is unset! Struct:" + toString());
-    }
-
-    if (!isSetProgressedPercentage()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'progressedPercentage' is unset! Struct:" + toString());
-    }
-
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    if (!isSetFooterSummary()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'footerSummary' is unset! Struct:" + toString());
-    }
-
-    if (!isSetStartTime()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'startTime' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TProgressUpdateRespStandardSchemeFactory implements SchemeFactory {
-    public TProgressUpdateRespStandardScheme getScheme() {
-      return new TProgressUpdateRespStandardScheme();
-    }
-  }
-
-  private static class TProgressUpdateRespStandardScheme extends StandardScheme<TProgressUpdateResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TProgressUpdateResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // HEADER_NAMES
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list180 = iprot.readListBegin();
-                struct.headerNames = new ArrayList<String>(_list180.size);
-                String _elem181;
-                for (int _i182 = 0; _i182 < _list180.size; ++_i182)
-                {
-                  _elem181 = iprot.readString();
-                  struct.headerNames.add(_elem181);
-                }
-                iprot.readListEnd();
-              }
-              struct.setHeaderNamesIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // ROWS
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list183 = iprot.readListBegin();
-                struct.rows = new ArrayList<List<String>>(_list183.size);
-                List<String> _elem184;
-                for (int _i185 = 0; _i185 < _list183.size; ++_i185)
-                {
-                  {
-                    org.apache.thrift.protocol.TList _list186 = iprot.readListBegin();
-                    _elem184 = new ArrayList<String>(_list186.size);
-                    String _elem187;
-                    for (int _i188 = 0; _i188 < _list186.size; ++_i188)
-                    {
-                      _elem187 = iprot.readString();
-                      _elem184.add(_elem187);
-                    }
-                    iprot.readListEnd();
-                  }
-                  struct.rows.add(_elem184);
-                }
-                iprot.readListEnd();
-              }
-              struct.setRowsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // PROGRESSED_PERCENTAGE
-            if (schemeField.type == org.apache.thrift.protocol.TType.DOUBLE) {
-              struct.progressedPercentage = iprot.readDouble();
-              struct.setProgressedPercentageIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 4: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.status = org.apache.hive.service.rpc.thrift.TJobExecutionStatus.findByValue(iprot.readI32());
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 5: // FOOTER_SUMMARY
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.footerSummary = iprot.readString();
-              struct.setFooterSummaryIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 6: // START_TIME
-            if (schemeField.type == org.apache.thrift.protocol.TType.I64) {
-              struct.startTime = iprot.readI64();
-              struct.setStartTimeIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TProgressUpdateResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.headerNames != null) {
-        oprot.writeFieldBegin(HEADER_NAMES_FIELD_DESC);
-        {
-          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, struct.headerNames.size()));
-          for (String _iter189 : struct.headerNames)
-          {
-            oprot.writeString(_iter189);
-          }
-          oprot.writeListEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      if (struct.rows != null) {
-        oprot.writeFieldBegin(ROWS_FIELD_DESC);
-        {
-          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.LIST, struct.rows.size()));
-          for (List<String> _iter190 : struct.rows)
-          {
-            {
-              oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, _iter190.size()));
-              for (String _iter191 : _iter190)
-              {
-                oprot.writeString(_iter191);
-              }
-              oprot.writeListEnd();
-            }
-          }
-          oprot.writeListEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldBegin(PROGRESSED_PERCENTAGE_FIELD_DESC);
-      oprot.writeDouble(struct.progressedPercentage);
-      oprot.writeFieldEnd();
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        oprot.writeI32(struct.status.getValue());
-        oprot.writeFieldEnd();
-      }
-      if (struct.footerSummary != null) {
-        oprot.writeFieldBegin(FOOTER_SUMMARY_FIELD_DESC);
-        oprot.writeString(struct.footerSummary);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldBegin(START_TIME_FIELD_DESC);
-      oprot.writeI64(struct.startTime);
-      oprot.writeFieldEnd();
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TProgressUpdateRespTupleSchemeFactory implements SchemeFactory {
-    public TProgressUpdateRespTupleScheme getScheme() {
-      return new TProgressUpdateRespTupleScheme();
-    }
-  }
-
-  private static class TProgressUpdateRespTupleScheme extends TupleScheme<TProgressUpdateResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TProgressUpdateResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.headerNames.size());
-        for (String _iter192 : struct.headerNames)
-        {
-          oprot.writeString(_iter192);
-        }
-      }
-      {
-        oprot.writeI32(struct.rows.size());
-        for (List<String> _iter193 : struct.rows)
-        {
-          {
-            oprot.writeI32(_iter193.size());
-            for (String _iter194 : _iter193)
-            {
-              oprot.writeString(_iter194);
-            }
-          }
-        }
-      }
-      oprot.writeDouble(struct.progressedPercentage);
-      oprot.writeI32(struct.status.getValue());
-      oprot.writeString(struct.footerSummary);
-      oprot.writeI64(struct.startTime);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TProgressUpdateResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TList _list195 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32());
-        struct.headerNames = new ArrayList<String>(_list195.size);
-        String _elem196;
-        for (int _i197 = 0; _i197 < _list195.size; ++_i197)
-        {
-          _elem196 = iprot.readString();
-          struct.headerNames.add(_elem196);
-        }
-      }
-      struct.setHeaderNamesIsSet(true);
-      {
-        org.apache.thrift.protocol.TList _list198 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.LIST, iprot.readI32());
-        struct.rows = new ArrayList<List<String>>(_list198.size);
-        List<String> _elem199;
-        for (int _i200 = 0; _i200 < _list198.size; ++_i200)
-        {
-          {
-            org.apache.thrift.protocol.TList _list201 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32());
-            _elem199 = new ArrayList<String>(_list201.size);
-            String _elem202;
-            for (int _i203 = 0; _i203 < _list201.size; ++_i203)
-            {
-              _elem202 = iprot.readString();
-              _elem199.add(_elem202);
-            }
-          }
-          struct.rows.add(_elem199);
-        }
-      }
-      struct.setRowsIsSet(true);
-      struct.progressedPercentage = iprot.readDouble();
-      struct.setProgressedPercentageIsSet(true);
-      struct.status = org.apache.hive.service.rpc.thrift.TJobExecutionStatus.findByValue(iprot.readI32());
-      struct.setStatusIsSet(true);
-      struct.footerSummary = iprot.readString();
-      struct.setFooterSummaryIsSet(true);
-      struct.startTime = iprot.readI64();
-      struct.setStartTimeIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TProtocolVersion.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TProtocolVersion.java
deleted file mode 100644
index 18a782513c500..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TProtocolVersion.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-
-import java.util.Map;
-import java.util.HashMap;
-import org.apache.thrift.TEnum;
-
-public enum TProtocolVersion implements org.apache.thrift.TEnum {
-  HIVE_CLI_SERVICE_PROTOCOL_V1(0),
-  HIVE_CLI_SERVICE_PROTOCOL_V2(1),
-  HIVE_CLI_SERVICE_PROTOCOL_V3(2),
-  HIVE_CLI_SERVICE_PROTOCOL_V4(3),
-  HIVE_CLI_SERVICE_PROTOCOL_V5(4),
-  HIVE_CLI_SERVICE_PROTOCOL_V6(5),
-  HIVE_CLI_SERVICE_PROTOCOL_V7(6),
-  HIVE_CLI_SERVICE_PROTOCOL_V8(7),
-  HIVE_CLI_SERVICE_PROTOCOL_V9(8),
-  HIVE_CLI_SERVICE_PROTOCOL_V10(9);
-
-  private final int value;
-
-  private TProtocolVersion(int value) {
-    this.value = value;
-  }
-
-  /**
-   * Get the integer value of this enum value, as defined in the Thrift IDL.
-   */
-  public int getValue() {
-    return value;
-  }
-
-  /**
-   * Find a the enum type by its integer value, as defined in the Thrift IDL.
-   * @return null if the value is not found.
-   */
-  public static TProtocolVersion findByValue(int value) { 
-    switch (value) {
-      case 0:
-        return HIVE_CLI_SERVICE_PROTOCOL_V1;
-      case 1:
-        return HIVE_CLI_SERVICE_PROTOCOL_V2;
-      case 2:
-        return HIVE_CLI_SERVICE_PROTOCOL_V3;
-      case 3:
-        return HIVE_CLI_SERVICE_PROTOCOL_V4;
-      case 4:
-        return HIVE_CLI_SERVICE_PROTOCOL_V5;
-      case 5:
-        return HIVE_CLI_SERVICE_PROTOCOL_V6;
-      case 6:
-        return HIVE_CLI_SERVICE_PROTOCOL_V7;
-      case 7:
-        return HIVE_CLI_SERVICE_PROTOCOL_V8;
-      case 8:
-        return HIVE_CLI_SERVICE_PROTOCOL_V9;
-      case 9:
-        return HIVE_CLI_SERVICE_PROTOCOL_V10;
-      default:
-        return null;
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenReq.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenReq.java
deleted file mode 100644
index 8957ebc8d2fff..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenReq.java
+++ /dev/null
@@ -1,495 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TRenewDelegationTokenReq implements org.apache.thrift.TBase<TRenewDelegationTokenReq, TRenewDelegationTokenReq._Fields>, java.io.Serializable, Cloneable, Comparable<TRenewDelegationTokenReq> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TRenewDelegationTokenReq");
-
-  private static final org.apache.thrift.protocol.TField SESSION_HANDLE_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionHandle", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField DELEGATION_TOKEN_FIELD_DESC = new org.apache.thrift.protocol.TField("delegationToken", org.apache.thrift.protocol.TType.STRING, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TRenewDelegationTokenReqStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TRenewDelegationTokenReqTupleSchemeFactory());
-  }
-
-  private TSessionHandle sessionHandle; // required
-  private String delegationToken; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_HANDLE((short)1, "sessionHandle"),
-    DELEGATION_TOKEN((short)2, "delegationToken");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_HANDLE
-          return SESSION_HANDLE;
-        case 2: // DELEGATION_TOKEN
-          return DELEGATION_TOKEN;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_HANDLE, new org.apache.thrift.meta_data.FieldMetaData("sessionHandle", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TSessionHandle.class)));
-    tmpMap.put(_Fields.DELEGATION_TOKEN, new org.apache.thrift.meta_data.FieldMetaData("delegationToken", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TRenewDelegationTokenReq.class, metaDataMap);
-  }
-
-  public TRenewDelegationTokenReq() {
-  }
-
-  public TRenewDelegationTokenReq(
-    TSessionHandle sessionHandle,
-    String delegationToken)
-  {
-    this();
-    this.sessionHandle = sessionHandle;
-    this.delegationToken = delegationToken;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TRenewDelegationTokenReq(TRenewDelegationTokenReq other) {
-    if (other.isSetSessionHandle()) {
-      this.sessionHandle = new TSessionHandle(other.sessionHandle);
-    }
-    if (other.isSetDelegationToken()) {
-      this.delegationToken = other.delegationToken;
-    }
-  }
-
-  public TRenewDelegationTokenReq deepCopy() {
-    return new TRenewDelegationTokenReq(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionHandle = null;
-    this.delegationToken = null;
-  }
-
-  public TSessionHandle getSessionHandle() {
-    return this.sessionHandle;
-  }
-
-  public void setSessionHandle(TSessionHandle sessionHandle) {
-    this.sessionHandle = sessionHandle;
-  }
-
-  public void unsetSessionHandle() {
-    this.sessionHandle = null;
-  }
-
-  /** Returns true if field sessionHandle is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionHandle() {
-    return this.sessionHandle != null;
-  }
-
-  public void setSessionHandleIsSet(boolean value) {
-    if (!value) {
-      this.sessionHandle = null;
-    }
-  }
-
-  public String getDelegationToken() {
-    return this.delegationToken;
-  }
-
-  public void setDelegationToken(String delegationToken) {
-    this.delegationToken = delegationToken;
-  }
-
-  public void unsetDelegationToken() {
-    this.delegationToken = null;
-  }
-
-  /** Returns true if field delegationToken is set (has been assigned a value) and false otherwise */
-  public boolean isSetDelegationToken() {
-    return this.delegationToken != null;
-  }
-
-  public void setDelegationTokenIsSet(boolean value) {
-    if (!value) {
-      this.delegationToken = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_HANDLE:
-      if (value == null) {
-        unsetSessionHandle();
-      } else {
-        setSessionHandle((TSessionHandle)value);
-      }
-      break;
-
-    case DELEGATION_TOKEN:
-      if (value == null) {
-        unsetDelegationToken();
-      } else {
-        setDelegationToken((String)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_HANDLE:
-      return getSessionHandle();
-
-    case DELEGATION_TOKEN:
-      return getDelegationToken();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_HANDLE:
-      return isSetSessionHandle();
-    case DELEGATION_TOKEN:
-      return isSetDelegationToken();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TRenewDelegationTokenReq)
-      return this.equals((TRenewDelegationTokenReq)that);
-    return false;
-  }
-
-  public boolean equals(TRenewDelegationTokenReq that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionHandle = true && this.isSetSessionHandle();
-    boolean that_present_sessionHandle = true && that.isSetSessionHandle();
-    if (this_present_sessionHandle || that_present_sessionHandle) {
-      if (!(this_present_sessionHandle && that_present_sessionHandle))
-        return false;
-      if (!this.sessionHandle.equals(that.sessionHandle))
-        return false;
-    }
-
-    boolean this_present_delegationToken = true && this.isSetDelegationToken();
-    boolean that_present_delegationToken = true && that.isSetDelegationToken();
-    if (this_present_delegationToken || that_present_delegationToken) {
-      if (!(this_present_delegationToken && that_present_delegationToken))
-        return false;
-      if (!this.delegationToken.equals(that.delegationToken))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_sessionHandle = true && (isSetSessionHandle());
-    list.add(present_sessionHandle);
-    if (present_sessionHandle)
-      list.add(sessionHandle);
-
-    boolean present_delegationToken = true && (isSetDelegationToken());
-    list.add(present_delegationToken);
-    if (present_delegationToken)
-      list.add(delegationToken);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TRenewDelegationTokenReq other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetSessionHandle()).compareTo(other.isSetSessionHandle());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionHandle()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionHandle, other.sessionHandle);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetDelegationToken()).compareTo(other.isSetDelegationToken());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetDelegationToken()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.delegationToken, other.delegationToken);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TRenewDelegationTokenReq(");
-    boolean first = true;
-
-    sb.append("sessionHandle:");
-    if (this.sessionHandle == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionHandle);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("delegationToken:");
-    if (this.delegationToken == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.delegationToken);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionHandle()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionHandle' is unset! Struct:" + toString());
-    }
-
-    if (!isSetDelegationToken()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'delegationToken' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionHandle != null) {
-      sessionHandle.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TRenewDelegationTokenReqStandardSchemeFactory implements SchemeFactory {
-    public TRenewDelegationTokenReqStandardScheme getScheme() {
-      return new TRenewDelegationTokenReqStandardScheme();
-    }
-  }
-
-  private static class TRenewDelegationTokenReqStandardScheme extends StandardScheme<TRenewDelegationTokenReq> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TRenewDelegationTokenReq struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_HANDLE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionHandle = new TSessionHandle();
-              struct.sessionHandle.read(iprot);
-              struct.setSessionHandleIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // DELEGATION_TOKEN
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.delegationToken = iprot.readString();
-              struct.setDelegationTokenIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TRenewDelegationTokenReq struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionHandle != null) {
-        oprot.writeFieldBegin(SESSION_HANDLE_FIELD_DESC);
-        struct.sessionHandle.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      if (struct.delegationToken != null) {
-        oprot.writeFieldBegin(DELEGATION_TOKEN_FIELD_DESC);
-        oprot.writeString(struct.delegationToken);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TRenewDelegationTokenReqTupleSchemeFactory implements SchemeFactory {
-    public TRenewDelegationTokenReqTupleScheme getScheme() {
-      return new TRenewDelegationTokenReqTupleScheme();
-    }
-  }
-
-  private static class TRenewDelegationTokenReqTupleScheme extends TupleScheme<TRenewDelegationTokenReq> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TRenewDelegationTokenReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionHandle.write(oprot);
-      oprot.writeString(struct.delegationToken);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TRenewDelegationTokenReq struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionHandle = new TSessionHandle();
-      struct.sessionHandle.read(iprot);
-      struct.setSessionHandleIsSet(true);
-      struct.delegationToken = iprot.readString();
-      struct.setDelegationTokenIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenResp.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenResp.java
deleted file mode 100644
index 6f5004ccc38e4..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRenewDelegationTokenResp.java
+++ /dev/null
@@ -1,394 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TRenewDelegationTokenResp implements org.apache.thrift.TBase<TRenewDelegationTokenResp, TRenewDelegationTokenResp._Fields>, java.io.Serializable, Cloneable, Comparable<TRenewDelegationTokenResp> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TRenewDelegationTokenResp");
-
-  private static final org.apache.thrift.protocol.TField STATUS_FIELD_DESC = new org.apache.thrift.protocol.TField("status", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TRenewDelegationTokenRespStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TRenewDelegationTokenRespTupleSchemeFactory());
-  }
-
-  private TStatus status; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    STATUS((short)1, "status");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS
-          return STATUS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS, new org.apache.thrift.meta_data.FieldMetaData("status", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStatus.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TRenewDelegationTokenResp.class, metaDataMap);
-  }
-
-  public TRenewDelegationTokenResp() {
-  }
-
-  public TRenewDelegationTokenResp(
-    TStatus status)
-  {
-    this();
-    this.status = status;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TRenewDelegationTokenResp(TRenewDelegationTokenResp other) {
-    if (other.isSetStatus()) {
-      this.status = new TStatus(other.status);
-    }
-  }
-
-  public TRenewDelegationTokenResp deepCopy() {
-    return new TRenewDelegationTokenResp(this);
-  }
-
-  @Override
-  public void clear() {
-    this.status = null;
-  }
-
-  public TStatus getStatus() {
-    return this.status;
-  }
-
-  public void setStatus(TStatus status) {
-    this.status = status;
-  }
-
-  public void unsetStatus() {
-    this.status = null;
-  }
-
-  /** Returns true if field status is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatus() {
-    return this.status != null;
-  }
-
-  public void setStatusIsSet(boolean value) {
-    if (!value) {
-      this.status = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS:
-      if (value == null) {
-        unsetStatus();
-      } else {
-        setStatus((TStatus)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS:
-      return getStatus();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS:
-      return isSetStatus();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TRenewDelegationTokenResp)
-      return this.equals((TRenewDelegationTokenResp)that);
-    return false;
-  }
-
-  public boolean equals(TRenewDelegationTokenResp that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_status = true && this.isSetStatus();
-    boolean that_present_status = true && that.isSetStatus();
-    if (this_present_status || that_present_status) {
-      if (!(this_present_status && that_present_status))
-        return false;
-      if (!this.status.equals(that.status))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_status = true && (isSetStatus());
-    list.add(present_status);
-    if (present_status)
-      list.add(status);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TRenewDelegationTokenResp other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetStatus()).compareTo(other.isSetStatus());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatus()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.status, other.status);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TRenewDelegationTokenResp(");
-    boolean first = true;
-
-    sb.append("status:");
-    if (this.status == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.status);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatus()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'status' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (status != null) {
-      status.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TRenewDelegationTokenRespStandardSchemeFactory implements SchemeFactory {
-    public TRenewDelegationTokenRespStandardScheme getScheme() {
-      return new TRenewDelegationTokenRespStandardScheme();
-    }
-  }
-
-  private static class TRenewDelegationTokenRespStandardScheme extends StandardScheme<TRenewDelegationTokenResp> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TRenewDelegationTokenResp struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.status = new TStatus();
-              struct.status.read(iprot);
-              struct.setStatusIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TRenewDelegationTokenResp struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.status != null) {
-        oprot.writeFieldBegin(STATUS_FIELD_DESC);
-        struct.status.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TRenewDelegationTokenRespTupleSchemeFactory implements SchemeFactory {
-    public TRenewDelegationTokenRespTupleScheme getScheme() {
-      return new TRenewDelegationTokenRespTupleScheme();
-    }
-  }
-
-  private static class TRenewDelegationTokenRespTupleScheme extends TupleScheme<TRenewDelegationTokenResp> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TRenewDelegationTokenResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.status.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TRenewDelegationTokenResp struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.status = new TStatus();
-      struct.status.read(iprot);
-      struct.setStatusIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRow.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRow.java
deleted file mode 100644
index e95299df97c3a..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRow.java
+++ /dev/null
@@ -1,443 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TRow implements org.apache.thrift.TBase<TRow, TRow._Fields>, java.io.Serializable, Cloneable, Comparable<TRow> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TRow");
-
-  private static final org.apache.thrift.protocol.TField COL_VALS_FIELD_DESC = new org.apache.thrift.protocol.TField("colVals", org.apache.thrift.protocol.TType.LIST, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TRowStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TRowTupleSchemeFactory());
-  }
-
-  private List<TColumnValue> colVals; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    COL_VALS((short)1, "colVals");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // COL_VALS
-          return COL_VALS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.COL_VALS, new org.apache.thrift.meta_data.FieldMetaData("colVals", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TColumnValue.class))));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TRow.class, metaDataMap);
-  }
-
-  public TRow() {
-  }
-
-  public TRow(
-    List<TColumnValue> colVals)
-  {
-    this();
-    this.colVals = colVals;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TRow(TRow other) {
-    if (other.isSetColVals()) {
-      List<TColumnValue> __this__colVals = new ArrayList<TColumnValue>(other.colVals.size());
-      for (TColumnValue other_element : other.colVals) {
-        __this__colVals.add(new TColumnValue(other_element));
-      }
-      this.colVals = __this__colVals;
-    }
-  }
-
-  public TRow deepCopy() {
-    return new TRow(this);
-  }
-
-  @Override
-  public void clear() {
-    this.colVals = null;
-  }
-
-  public int getColValsSize() {
-    return (this.colVals == null) ? 0 : this.colVals.size();
-  }
-
-  public java.util.Iterator<TColumnValue> getColValsIterator() {
-    return (this.colVals == null) ? null : this.colVals.iterator();
-  }
-
-  public void addToColVals(TColumnValue elem) {
-    if (this.colVals == null) {
-      this.colVals = new ArrayList<TColumnValue>();
-    }
-    this.colVals.add(elem);
-  }
-
-  public List<TColumnValue> getColVals() {
-    return this.colVals;
-  }
-
-  public void setColVals(List<TColumnValue> colVals) {
-    this.colVals = colVals;
-  }
-
-  public void unsetColVals() {
-    this.colVals = null;
-  }
-
-  /** Returns true if field colVals is set (has been assigned a value) and false otherwise */
-  public boolean isSetColVals() {
-    return this.colVals != null;
-  }
-
-  public void setColValsIsSet(boolean value) {
-    if (!value) {
-      this.colVals = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case COL_VALS:
-      if (value == null) {
-        unsetColVals();
-      } else {
-        setColVals((List<TColumnValue>)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case COL_VALS:
-      return getColVals();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case COL_VALS:
-      return isSetColVals();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TRow)
-      return this.equals((TRow)that);
-    return false;
-  }
-
-  public boolean equals(TRow that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_colVals = true && this.isSetColVals();
-    boolean that_present_colVals = true && that.isSetColVals();
-    if (this_present_colVals || that_present_colVals) {
-      if (!(this_present_colVals && that_present_colVals))
-        return false;
-      if (!this.colVals.equals(that.colVals))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_colVals = true && (isSetColVals());
-    list.add(present_colVals);
-    if (present_colVals)
-      list.add(colVals);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TRow other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetColVals()).compareTo(other.isSetColVals());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetColVals()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.colVals, other.colVals);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TRow(");
-    boolean first = true;
-
-    sb.append("colVals:");
-    if (this.colVals == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.colVals);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetColVals()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'colVals' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TRowStandardSchemeFactory implements SchemeFactory {
-    public TRowStandardScheme getScheme() {
-      return new TRowStandardScheme();
-    }
-  }
-
-  private static class TRowStandardScheme extends StandardScheme<TRow> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TRow struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // COL_VALS
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list46 = iprot.readListBegin();
-                struct.colVals = new ArrayList<TColumnValue>(_list46.size);
-                TColumnValue _elem47;
-                for (int _i48 = 0; _i48 < _list46.size; ++_i48)
-                {
-                  _elem47 = new TColumnValue();
-                  _elem47.read(iprot);
-                  struct.colVals.add(_elem47);
-                }
-                iprot.readListEnd();
-              }
-              struct.setColValsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TRow struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.colVals != null) {
-        oprot.writeFieldBegin(COL_VALS_FIELD_DESC);
-        {
-          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, struct.colVals.size()));
-          for (TColumnValue _iter49 : struct.colVals)
-          {
-            _iter49.write(oprot);
-          }
-          oprot.writeListEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TRowTupleSchemeFactory implements SchemeFactory {
-    public TRowTupleScheme getScheme() {
-      return new TRowTupleScheme();
-    }
-  }
-
-  private static class TRowTupleScheme extends TupleScheme<TRow> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TRow struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.colVals.size());
-        for (TColumnValue _iter50 : struct.colVals)
-        {
-          _iter50.write(oprot);
-        }
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TRow struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TList _list51 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32());
-        struct.colVals = new ArrayList<TColumnValue>(_list51.size);
-        TColumnValue _elem52;
-        for (int _i53 = 0; _i53 < _list51.size; ++_i53)
-        {
-          _elem52 = new TColumnValue();
-          _elem52.read(iprot);
-          struct.colVals.add(_elem52);
-        }
-      }
-      struct.setColValsIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRowSet.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRowSet.java
deleted file mode 100644
index da3d9d3ca8820..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TRowSet.java
+++ /dev/null
@@ -1,920 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TRowSet implements org.apache.thrift.TBase<TRowSet, TRowSet._Fields>, java.io.Serializable, Cloneable, Comparable<TRowSet> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TRowSet");
-
-  private static final org.apache.thrift.protocol.TField START_ROW_OFFSET_FIELD_DESC = new org.apache.thrift.protocol.TField("startRowOffset", org.apache.thrift.protocol.TType.I64, (short)1);
-  private static final org.apache.thrift.protocol.TField ROWS_FIELD_DESC = new org.apache.thrift.protocol.TField("rows", org.apache.thrift.protocol.TType.LIST, (short)2);
-  private static final org.apache.thrift.protocol.TField COLUMNS_FIELD_DESC = new org.apache.thrift.protocol.TField("columns", org.apache.thrift.protocol.TType.LIST, (short)3);
-  private static final org.apache.thrift.protocol.TField BINARY_COLUMNS_FIELD_DESC = new org.apache.thrift.protocol.TField("binaryColumns", org.apache.thrift.protocol.TType.STRING, (short)4);
-  private static final org.apache.thrift.protocol.TField COLUMN_COUNT_FIELD_DESC = new org.apache.thrift.protocol.TField("columnCount", org.apache.thrift.protocol.TType.I32, (short)5);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TRowSetStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TRowSetTupleSchemeFactory());
-  }
-
-  private long startRowOffset; // required
-  private List<TRow> rows; // required
-  private List<TColumn> columns; // optional
-  private ByteBuffer binaryColumns; // optional
-  private int columnCount; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    START_ROW_OFFSET((short)1, "startRowOffset"),
-    ROWS((short)2, "rows"),
-    COLUMNS((short)3, "columns"),
-    BINARY_COLUMNS((short)4, "binaryColumns"),
-    COLUMN_COUNT((short)5, "columnCount");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // START_ROW_OFFSET
-          return START_ROW_OFFSET;
-        case 2: // ROWS
-          return ROWS;
-        case 3: // COLUMNS
-          return COLUMNS;
-        case 4: // BINARY_COLUMNS
-          return BINARY_COLUMNS;
-        case 5: // COLUMN_COUNT
-          return COLUMN_COUNT;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __STARTROWOFFSET_ISSET_ID = 0;
-  private static final int __COLUMNCOUNT_ISSET_ID = 1;
-  private byte __isset_bitfield = 0;
-  private static final _Fields optionals[] = {_Fields.COLUMNS,_Fields.BINARY_COLUMNS,_Fields.COLUMN_COUNT};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.START_ROW_OFFSET, new org.apache.thrift.meta_data.FieldMetaData("startRowOffset", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64)));
-    tmpMap.put(_Fields.ROWS, new org.apache.thrift.meta_data.FieldMetaData("rows", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TRow.class))));
-    tmpMap.put(_Fields.COLUMNS, new org.apache.thrift.meta_data.FieldMetaData("columns", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TColumn.class))));
-    tmpMap.put(_Fields.BINARY_COLUMNS, new org.apache.thrift.meta_data.FieldMetaData("binaryColumns", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
-    tmpMap.put(_Fields.COLUMN_COUNT, new org.apache.thrift.meta_data.FieldMetaData("columnCount", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TRowSet.class, metaDataMap);
-  }
-
-  public TRowSet() {
-  }
-
-  public TRowSet(
-    long startRowOffset,
-    List<TRow> rows)
-  {
-    this();
-    this.startRowOffset = startRowOffset;
-    setStartRowOffsetIsSet(true);
-    this.rows = rows;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TRowSet(TRowSet other) {
-    __isset_bitfield = other.__isset_bitfield;
-    this.startRowOffset = other.startRowOffset;
-    if (other.isSetRows()) {
-      List<TRow> __this__rows = new ArrayList<TRow>(other.rows.size());
-      for (TRow other_element : other.rows) {
-        __this__rows.add(new TRow(other_element));
-      }
-      this.rows = __this__rows;
-    }
-    if (other.isSetColumns()) {
-      List<TColumn> __this__columns = new ArrayList<TColumn>(other.columns.size());
-      for (TColumn other_element : other.columns) {
-        __this__columns.add(new TColumn(other_element));
-      }
-      this.columns = __this__columns;
-    }
-    if (other.isSetBinaryColumns()) {
-      this.binaryColumns = org.apache.thrift.TBaseHelper.copyBinary(other.binaryColumns);
-    }
-    this.columnCount = other.columnCount;
-  }
-
-  public TRowSet deepCopy() {
-    return new TRowSet(this);
-  }
-
-  @Override
-  public void clear() {
-    setStartRowOffsetIsSet(false);
-    this.startRowOffset = 0;
-    this.rows = null;
-    this.columns = null;
-    this.binaryColumns = null;
-    setColumnCountIsSet(false);
-    this.columnCount = 0;
-  }
-
-  public long getStartRowOffset() {
-    return this.startRowOffset;
-  }
-
-  public void setStartRowOffset(long startRowOffset) {
-    this.startRowOffset = startRowOffset;
-    setStartRowOffsetIsSet(true);
-  }
-
-  public void unsetStartRowOffset() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __STARTROWOFFSET_ISSET_ID);
-  }
-
-  /** Returns true if field startRowOffset is set (has been assigned a value) and false otherwise */
-  public boolean isSetStartRowOffset() {
-    return EncodingUtils.testBit(__isset_bitfield, __STARTROWOFFSET_ISSET_ID);
-  }
-
-  public void setStartRowOffsetIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __STARTROWOFFSET_ISSET_ID, value);
-  }
-
-  public int getRowsSize() {
-    return (this.rows == null) ? 0 : this.rows.size();
-  }
-
-  public java.util.Iterator<TRow> getRowsIterator() {
-    return (this.rows == null) ? null : this.rows.iterator();
-  }
-
-  public void addToRows(TRow elem) {
-    if (this.rows == null) {
-      this.rows = new ArrayList<TRow>();
-    }
-    this.rows.add(elem);
-  }
-
-  public List<TRow> getRows() {
-    return this.rows;
-  }
-
-  public void setRows(List<TRow> rows) {
-    this.rows = rows;
-  }
-
-  public void unsetRows() {
-    this.rows = null;
-  }
-
-  /** Returns true if field rows is set (has been assigned a value) and false otherwise */
-  public boolean isSetRows() {
-    return this.rows != null;
-  }
-
-  public void setRowsIsSet(boolean value) {
-    if (!value) {
-      this.rows = null;
-    }
-  }
-
-  public int getColumnsSize() {
-    return (this.columns == null) ? 0 : this.columns.size();
-  }
-
-  public java.util.Iterator<TColumn> getColumnsIterator() {
-    return (this.columns == null) ? null : this.columns.iterator();
-  }
-
-  public void addToColumns(TColumn elem) {
-    if (this.columns == null) {
-      this.columns = new ArrayList<TColumn>();
-    }
-    this.columns.add(elem);
-  }
-
-  public List<TColumn> getColumns() {
-    return this.columns;
-  }
-
-  public void setColumns(List<TColumn> columns) {
-    this.columns = columns;
-  }
-
-  public void unsetColumns() {
-    this.columns = null;
-  }
-
-  /** Returns true if field columns is set (has been assigned a value) and false otherwise */
-  public boolean isSetColumns() {
-    return this.columns != null;
-  }
-
-  public void setColumnsIsSet(boolean value) {
-    if (!value) {
-      this.columns = null;
-    }
-  }
-
-  public byte[] getBinaryColumns() {
-    setBinaryColumns(org.apache.thrift.TBaseHelper.rightSize(binaryColumns));
-    return binaryColumns == null ? null : binaryColumns.array();
-  }
-
-  public ByteBuffer bufferForBinaryColumns() {
-    return org.apache.thrift.TBaseHelper.copyBinary(binaryColumns);
-  }
-
-  public void setBinaryColumns(byte[] binaryColumns) {
-    this.binaryColumns = binaryColumns == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(binaryColumns, binaryColumns.length));
-  }
-
-  public void setBinaryColumns(ByteBuffer binaryColumns) {
-    this.binaryColumns = org.apache.thrift.TBaseHelper.copyBinary(binaryColumns);
-  }
-
-  public void unsetBinaryColumns() {
-    this.binaryColumns = null;
-  }
-
-  /** Returns true if field binaryColumns is set (has been assigned a value) and false otherwise */
-  public boolean isSetBinaryColumns() {
-    return this.binaryColumns != null;
-  }
-
-  public void setBinaryColumnsIsSet(boolean value) {
-    if (!value) {
-      this.binaryColumns = null;
-    }
-  }
-
-  public int getColumnCount() {
-    return this.columnCount;
-  }
-
-  public void setColumnCount(int columnCount) {
-    this.columnCount = columnCount;
-    setColumnCountIsSet(true);
-  }
-
-  public void unsetColumnCount() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __COLUMNCOUNT_ISSET_ID);
-  }
-
-  /** Returns true if field columnCount is set (has been assigned a value) and false otherwise */
-  public boolean isSetColumnCount() {
-    return EncodingUtils.testBit(__isset_bitfield, __COLUMNCOUNT_ISSET_ID);
-  }
-
-  public void setColumnCountIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __COLUMNCOUNT_ISSET_ID, value);
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case START_ROW_OFFSET:
-      if (value == null) {
-        unsetStartRowOffset();
-      } else {
-        setStartRowOffset((Long)value);
-      }
-      break;
-
-    case ROWS:
-      if (value == null) {
-        unsetRows();
-      } else {
-        setRows((List<TRow>)value);
-      }
-      break;
-
-    case COLUMNS:
-      if (value == null) {
-        unsetColumns();
-      } else {
-        setColumns((List<TColumn>)value);
-      }
-      break;
-
-    case BINARY_COLUMNS:
-      if (value == null) {
-        unsetBinaryColumns();
-      } else {
-        setBinaryColumns((ByteBuffer)value);
-      }
-      break;
-
-    case COLUMN_COUNT:
-      if (value == null) {
-        unsetColumnCount();
-      } else {
-        setColumnCount((Integer)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case START_ROW_OFFSET:
-      return getStartRowOffset();
-
-    case ROWS:
-      return getRows();
-
-    case COLUMNS:
-      return getColumns();
-
-    case BINARY_COLUMNS:
-      return getBinaryColumns();
-
-    case COLUMN_COUNT:
-      return getColumnCount();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case START_ROW_OFFSET:
-      return isSetStartRowOffset();
-    case ROWS:
-      return isSetRows();
-    case COLUMNS:
-      return isSetColumns();
-    case BINARY_COLUMNS:
-      return isSetBinaryColumns();
-    case COLUMN_COUNT:
-      return isSetColumnCount();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TRowSet)
-      return this.equals((TRowSet)that);
-    return false;
-  }
-
-  public boolean equals(TRowSet that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_startRowOffset = true;
-    boolean that_present_startRowOffset = true;
-    if (this_present_startRowOffset || that_present_startRowOffset) {
-      if (!(this_present_startRowOffset && that_present_startRowOffset))
-        return false;
-      if (this.startRowOffset != that.startRowOffset)
-        return false;
-    }
-
-    boolean this_present_rows = true && this.isSetRows();
-    boolean that_present_rows = true && that.isSetRows();
-    if (this_present_rows || that_present_rows) {
-      if (!(this_present_rows && that_present_rows))
-        return false;
-      if (!this.rows.equals(that.rows))
-        return false;
-    }
-
-    boolean this_present_columns = true && this.isSetColumns();
-    boolean that_present_columns = true && that.isSetColumns();
-    if (this_present_columns || that_present_columns) {
-      if (!(this_present_columns && that_present_columns))
-        return false;
-      if (!this.columns.equals(that.columns))
-        return false;
-    }
-
-    boolean this_present_binaryColumns = true && this.isSetBinaryColumns();
-    boolean that_present_binaryColumns = true && that.isSetBinaryColumns();
-    if (this_present_binaryColumns || that_present_binaryColumns) {
-      if (!(this_present_binaryColumns && that_present_binaryColumns))
-        return false;
-      if (!this.binaryColumns.equals(that.binaryColumns))
-        return false;
-    }
-
-    boolean this_present_columnCount = true && this.isSetColumnCount();
-    boolean that_present_columnCount = true && that.isSetColumnCount();
-    if (this_present_columnCount || that_present_columnCount) {
-      if (!(this_present_columnCount && that_present_columnCount))
-        return false;
-      if (this.columnCount != that.columnCount)
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_startRowOffset = true;
-    list.add(present_startRowOffset);
-    if (present_startRowOffset)
-      list.add(startRowOffset);
-
-    boolean present_rows = true && (isSetRows());
-    list.add(present_rows);
-    if (present_rows)
-      list.add(rows);
-
-    boolean present_columns = true && (isSetColumns());
-    list.add(present_columns);
-    if (present_columns)
-      list.add(columns);
-
-    boolean present_binaryColumns = true && (isSetBinaryColumns());
-    list.add(present_binaryColumns);
-    if (present_binaryColumns)
-      list.add(binaryColumns);
-
-    boolean present_columnCount = true && (isSetColumnCount());
-    list.add(present_columnCount);
-    if (present_columnCount)
-      list.add(columnCount);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TRowSet other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetStartRowOffset()).compareTo(other.isSetStartRowOffset());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStartRowOffset()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.startRowOffset, other.startRowOffset);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetRows()).compareTo(other.isSetRows());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetRows()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.rows, other.rows);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetColumns()).compareTo(other.isSetColumns());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetColumns()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.columns, other.columns);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetBinaryColumns()).compareTo(other.isSetBinaryColumns());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetBinaryColumns()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.binaryColumns, other.binaryColumns);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetColumnCount()).compareTo(other.isSetColumnCount());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetColumnCount()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.columnCount, other.columnCount);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TRowSet(");
-    boolean first = true;
-
-    sb.append("startRowOffset:");
-    sb.append(this.startRowOffset);
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("rows:");
-    if (this.rows == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.rows);
-    }
-    first = false;
-    if (isSetColumns()) {
-      if (!first) sb.append(", ");
-      sb.append("columns:");
-      if (this.columns == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.columns);
-      }
-      first = false;
-    }
-    if (isSetBinaryColumns()) {
-      if (!first) sb.append(", ");
-      sb.append("binaryColumns:");
-      if (this.binaryColumns == null) {
-        sb.append("null");
-      } else {
-        org.apache.thrift.TBaseHelper.toString(this.binaryColumns, sb);
-      }
-      first = false;
-    }
-    if (isSetColumnCount()) {
-      if (!first) sb.append(", ");
-      sb.append("columnCount:");
-      sb.append(this.columnCount);
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStartRowOffset()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'startRowOffset' is unset! Struct:" + toString());
-    }
-
-    if (!isSetRows()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'rows' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TRowSetStandardSchemeFactory implements SchemeFactory {
-    public TRowSetStandardScheme getScheme() {
-      return new TRowSetStandardScheme();
-    }
-  }
-
-  private static class TRowSetStandardScheme extends StandardScheme<TRowSet> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TRowSet struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // START_ROW_OFFSET
-            if (schemeField.type == org.apache.thrift.protocol.TType.I64) {
-              struct.startRowOffset = iprot.readI64();
-              struct.setStartRowOffsetIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // ROWS
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list118 = iprot.readListBegin();
-                struct.rows = new ArrayList<TRow>(_list118.size);
-                TRow _elem119;
-                for (int _i120 = 0; _i120 < _list118.size; ++_i120)
-                {
-                  _elem119 = new TRow();
-                  _elem119.read(iprot);
-                  struct.rows.add(_elem119);
-                }
-                iprot.readListEnd();
-              }
-              struct.setRowsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // COLUMNS
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list121 = iprot.readListBegin();
-                struct.columns = new ArrayList<TColumn>(_list121.size);
-                TColumn _elem122;
-                for (int _i123 = 0; _i123 < _list121.size; ++_i123)
-                {
-                  _elem122 = new TColumn();
-                  _elem122.read(iprot);
-                  struct.columns.add(_elem122);
-                }
-                iprot.readListEnd();
-              }
-              struct.setColumnsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 4: // BINARY_COLUMNS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.binaryColumns = iprot.readBinary();
-              struct.setBinaryColumnsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 5: // COLUMN_COUNT
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.columnCount = iprot.readI32();
-              struct.setColumnCountIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TRowSet struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      oprot.writeFieldBegin(START_ROW_OFFSET_FIELD_DESC);
-      oprot.writeI64(struct.startRowOffset);
-      oprot.writeFieldEnd();
-      if (struct.rows != null) {
-        oprot.writeFieldBegin(ROWS_FIELD_DESC);
-        {
-          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, struct.rows.size()));
-          for (TRow _iter124 : struct.rows)
-          {
-            _iter124.write(oprot);
-          }
-          oprot.writeListEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      if (struct.columns != null) {
-        if (struct.isSetColumns()) {
-          oprot.writeFieldBegin(COLUMNS_FIELD_DESC);
-          {
-            oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, struct.columns.size()));
-            for (TColumn _iter125 : struct.columns)
-            {
-              _iter125.write(oprot);
-            }
-            oprot.writeListEnd();
-          }
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.binaryColumns != null) {
-        if (struct.isSetBinaryColumns()) {
-          oprot.writeFieldBegin(BINARY_COLUMNS_FIELD_DESC);
-          oprot.writeBinary(struct.binaryColumns);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.isSetColumnCount()) {
-        oprot.writeFieldBegin(COLUMN_COUNT_FIELD_DESC);
-        oprot.writeI32(struct.columnCount);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TRowSetTupleSchemeFactory implements SchemeFactory {
-    public TRowSetTupleScheme getScheme() {
-      return new TRowSetTupleScheme();
-    }
-  }
-
-  private static class TRowSetTupleScheme extends TupleScheme<TRowSet> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TRowSet struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      oprot.writeI64(struct.startRowOffset);
-      {
-        oprot.writeI32(struct.rows.size());
-        for (TRow _iter126 : struct.rows)
-        {
-          _iter126.write(oprot);
-        }
-      }
-      BitSet optionals = new BitSet();
-      if (struct.isSetColumns()) {
-        optionals.set(0);
-      }
-      if (struct.isSetBinaryColumns()) {
-        optionals.set(1);
-      }
-      if (struct.isSetColumnCount()) {
-        optionals.set(2);
-      }
-      oprot.writeBitSet(optionals, 3);
-      if (struct.isSetColumns()) {
-        {
-          oprot.writeI32(struct.columns.size());
-          for (TColumn _iter127 : struct.columns)
-          {
-            _iter127.write(oprot);
-          }
-        }
-      }
-      if (struct.isSetBinaryColumns()) {
-        oprot.writeBinary(struct.binaryColumns);
-      }
-      if (struct.isSetColumnCount()) {
-        oprot.writeI32(struct.columnCount);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TRowSet struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.startRowOffset = iprot.readI64();
-      struct.setStartRowOffsetIsSet(true);
-      {
-        org.apache.thrift.protocol.TList _list128 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32());
-        struct.rows = new ArrayList<TRow>(_list128.size);
-        TRow _elem129;
-        for (int _i130 = 0; _i130 < _list128.size; ++_i130)
-        {
-          _elem129 = new TRow();
-          _elem129.read(iprot);
-          struct.rows.add(_elem129);
-        }
-      }
-      struct.setRowsIsSet(true);
-      BitSet incoming = iprot.readBitSet(3);
-      if (incoming.get(0)) {
-        {
-          org.apache.thrift.protocol.TList _list131 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32());
-          struct.columns = new ArrayList<TColumn>(_list131.size);
-          TColumn _elem132;
-          for (int _i133 = 0; _i133 < _list131.size; ++_i133)
-          {
-            _elem132 = new TColumn();
-            _elem132.read(iprot);
-            struct.columns.add(_elem132);
-          }
-        }
-        struct.setColumnsIsSet(true);
-      }
-      if (incoming.get(1)) {
-        struct.binaryColumns = iprot.readBinary();
-        struct.setBinaryColumnsIsSet(true);
-      }
-      if (incoming.get(2)) {
-        struct.columnCount = iprot.readI32();
-        struct.setColumnCountIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TSessionHandle.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TSessionHandle.java
deleted file mode 100644
index b5cb6e7b15aa6..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TSessionHandle.java
+++ /dev/null
@@ -1,394 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TSessionHandle implements org.apache.thrift.TBase<TSessionHandle, TSessionHandle._Fields>, java.io.Serializable, Cloneable, Comparable<TSessionHandle> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TSessionHandle");
-
-  private static final org.apache.thrift.protocol.TField SESSION_ID_FIELD_DESC = new org.apache.thrift.protocol.TField("sessionId", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TSessionHandleStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TSessionHandleTupleSchemeFactory());
-  }
-
-  private THandleIdentifier sessionId; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    SESSION_ID((short)1, "sessionId");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // SESSION_ID
-          return SESSION_ID;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.SESSION_ID, new org.apache.thrift.meta_data.FieldMetaData("sessionId", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, THandleIdentifier.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TSessionHandle.class, metaDataMap);
-  }
-
-  public TSessionHandle() {
-  }
-
-  public TSessionHandle(
-    THandleIdentifier sessionId)
-  {
-    this();
-    this.sessionId = sessionId;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TSessionHandle(TSessionHandle other) {
-    if (other.isSetSessionId()) {
-      this.sessionId = new THandleIdentifier(other.sessionId);
-    }
-  }
-
-  public TSessionHandle deepCopy() {
-    return new TSessionHandle(this);
-  }
-
-  @Override
-  public void clear() {
-    this.sessionId = null;
-  }
-
-  public THandleIdentifier getSessionId() {
-    return this.sessionId;
-  }
-
-  public void setSessionId(THandleIdentifier sessionId) {
-    this.sessionId = sessionId;
-  }
-
-  public void unsetSessionId() {
-    this.sessionId = null;
-  }
-
-  /** Returns true if field sessionId is set (has been assigned a value) and false otherwise */
-  public boolean isSetSessionId() {
-    return this.sessionId != null;
-  }
-
-  public void setSessionIdIsSet(boolean value) {
-    if (!value) {
-      this.sessionId = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case SESSION_ID:
-      if (value == null) {
-        unsetSessionId();
-      } else {
-        setSessionId((THandleIdentifier)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case SESSION_ID:
-      return getSessionId();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case SESSION_ID:
-      return isSetSessionId();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TSessionHandle)
-      return this.equals((TSessionHandle)that);
-    return false;
-  }
-
-  public boolean equals(TSessionHandle that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_sessionId = true && this.isSetSessionId();
-    boolean that_present_sessionId = true && that.isSetSessionId();
-    if (this_present_sessionId || that_present_sessionId) {
-      if (!(this_present_sessionId && that_present_sessionId))
-        return false;
-      if (!this.sessionId.equals(that.sessionId))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_sessionId = true && (isSetSessionId());
-    list.add(present_sessionId);
-    if (present_sessionId)
-      list.add(sessionId);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TSessionHandle other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetSessionId()).compareTo(other.isSetSessionId());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSessionId()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sessionId, other.sessionId);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TSessionHandle(");
-    boolean first = true;
-
-    sb.append("sessionId:");
-    if (this.sessionId == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.sessionId);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetSessionId()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'sessionId' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-    if (sessionId != null) {
-      sessionId.validate();
-    }
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TSessionHandleStandardSchemeFactory implements SchemeFactory {
-    public TSessionHandleStandardScheme getScheme() {
-      return new TSessionHandleStandardScheme();
-    }
-  }
-
-  private static class TSessionHandleStandardScheme extends StandardScheme<TSessionHandle> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TSessionHandle struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // SESSION_ID
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) {
-              struct.sessionId = new THandleIdentifier();
-              struct.sessionId.read(iprot);
-              struct.setSessionIdIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TSessionHandle struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.sessionId != null) {
-        oprot.writeFieldBegin(SESSION_ID_FIELD_DESC);
-        struct.sessionId.write(oprot);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TSessionHandleTupleSchemeFactory implements SchemeFactory {
-    public TSessionHandleTupleScheme getScheme() {
-      return new TSessionHandleTupleScheme();
-    }
-  }
-
-  private static class TSessionHandleTupleScheme extends TupleScheme<TSessionHandle> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TSessionHandle struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      struct.sessionId.write(oprot);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TSessionHandle struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.sessionId = new THandleIdentifier();
-      struct.sessionId.read(iprot);
-      struct.setSessionIdIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStatus.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStatus.java
deleted file mode 100644
index 50f4531b0a209..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStatus.java
+++ /dev/null
@@ -1,875 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TStatus implements org.apache.thrift.TBase<TStatus, TStatus._Fields>, java.io.Serializable, Cloneable, Comparable<TStatus> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TStatus");
-
-  private static final org.apache.thrift.protocol.TField STATUS_CODE_FIELD_DESC = new org.apache.thrift.protocol.TField("statusCode", org.apache.thrift.protocol.TType.I32, (short)1);
-  private static final org.apache.thrift.protocol.TField INFO_MESSAGES_FIELD_DESC = new org.apache.thrift.protocol.TField("infoMessages", org.apache.thrift.protocol.TType.LIST, (short)2);
-  private static final org.apache.thrift.protocol.TField SQL_STATE_FIELD_DESC = new org.apache.thrift.protocol.TField("sqlState", org.apache.thrift.protocol.TType.STRING, (short)3);
-  private static final org.apache.thrift.protocol.TField ERROR_CODE_FIELD_DESC = new org.apache.thrift.protocol.TField("errorCode", org.apache.thrift.protocol.TType.I32, (short)4);
-  private static final org.apache.thrift.protocol.TField ERROR_MESSAGE_FIELD_DESC = new org.apache.thrift.protocol.TField("errorMessage", org.apache.thrift.protocol.TType.STRING, (short)5);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TStatusStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TStatusTupleSchemeFactory());
-  }
-
-  private TStatusCode statusCode; // required
-  private List<String> infoMessages; // optional
-  private String sqlState; // optional
-  private int errorCode; // optional
-  private String errorMessage; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    /**
-     * 
-     * @see TStatusCode
-     */
-    STATUS_CODE((short)1, "statusCode"),
-    INFO_MESSAGES((short)2, "infoMessages"),
-    SQL_STATE((short)3, "sqlState"),
-    ERROR_CODE((short)4, "errorCode"),
-    ERROR_MESSAGE((short)5, "errorMessage");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // STATUS_CODE
-          return STATUS_CODE;
-        case 2: // INFO_MESSAGES
-          return INFO_MESSAGES;
-        case 3: // SQL_STATE
-          return SQL_STATE;
-        case 4: // ERROR_CODE
-          return ERROR_CODE;
-        case 5: // ERROR_MESSAGE
-          return ERROR_MESSAGE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final int __ERRORCODE_ISSET_ID = 0;
-  private byte __isset_bitfield = 0;
-  private static final _Fields optionals[] = {_Fields.INFO_MESSAGES,_Fields.SQL_STATE,_Fields.ERROR_CODE,_Fields.ERROR_MESSAGE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.STATUS_CODE, new org.apache.thrift.meta_data.FieldMetaData("statusCode", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TStatusCode.class)));
-    tmpMap.put(_Fields.INFO_MESSAGES, new org.apache.thrift.meta_data.FieldMetaData("infoMessages", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))));
-    tmpMap.put(_Fields.SQL_STATE, new org.apache.thrift.meta_data.FieldMetaData("sqlState", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    tmpMap.put(_Fields.ERROR_CODE, new org.apache.thrift.meta_data.FieldMetaData("errorCode", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)));
-    tmpMap.put(_Fields.ERROR_MESSAGE, new org.apache.thrift.meta_data.FieldMetaData("errorMessage", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TStatus.class, metaDataMap);
-  }
-
-  public TStatus() {
-  }
-
-  public TStatus(
-    TStatusCode statusCode)
-  {
-    this();
-    this.statusCode = statusCode;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TStatus(TStatus other) {
-    __isset_bitfield = other.__isset_bitfield;
-    if (other.isSetStatusCode()) {
-      this.statusCode = other.statusCode;
-    }
-    if (other.isSetInfoMessages()) {
-      List<String> __this__infoMessages = new ArrayList<String>(other.infoMessages);
-      this.infoMessages = __this__infoMessages;
-    }
-    if (other.isSetSqlState()) {
-      this.sqlState = other.sqlState;
-    }
-    this.errorCode = other.errorCode;
-    if (other.isSetErrorMessage()) {
-      this.errorMessage = other.errorMessage;
-    }
-  }
-
-  public TStatus deepCopy() {
-    return new TStatus(this);
-  }
-
-  @Override
-  public void clear() {
-    this.statusCode = null;
-    this.infoMessages = null;
-    this.sqlState = null;
-    setErrorCodeIsSet(false);
-    this.errorCode = 0;
-    this.errorMessage = null;
-  }
-
-  /**
-   * 
-   * @see TStatusCode
-   */
-  public TStatusCode getStatusCode() {
-    return this.statusCode;
-  }
-
-  /**
-   * 
-   * @see TStatusCode
-   */
-  public void setStatusCode(TStatusCode statusCode) {
-    this.statusCode = statusCode;
-  }
-
-  public void unsetStatusCode() {
-    this.statusCode = null;
-  }
-
-  /** Returns true if field statusCode is set (has been assigned a value) and false otherwise */
-  public boolean isSetStatusCode() {
-    return this.statusCode != null;
-  }
-
-  public void setStatusCodeIsSet(boolean value) {
-    if (!value) {
-      this.statusCode = null;
-    }
-  }
-
-  public int getInfoMessagesSize() {
-    return (this.infoMessages == null) ? 0 : this.infoMessages.size();
-  }
-
-  public java.util.Iterator<String> getInfoMessagesIterator() {
-    return (this.infoMessages == null) ? null : this.infoMessages.iterator();
-  }
-
-  public void addToInfoMessages(String elem) {
-    if (this.infoMessages == null) {
-      this.infoMessages = new ArrayList<String>();
-    }
-    this.infoMessages.add(elem);
-  }
-
-  public List<String> getInfoMessages() {
-    return this.infoMessages;
-  }
-
-  public void setInfoMessages(List<String> infoMessages) {
-    this.infoMessages = infoMessages;
-  }
-
-  public void unsetInfoMessages() {
-    this.infoMessages = null;
-  }
-
-  /** Returns true if field infoMessages is set (has been assigned a value) and false otherwise */
-  public boolean isSetInfoMessages() {
-    return this.infoMessages != null;
-  }
-
-  public void setInfoMessagesIsSet(boolean value) {
-    if (!value) {
-      this.infoMessages = null;
-    }
-  }
-
-  public String getSqlState() {
-    return this.sqlState;
-  }
-
-  public void setSqlState(String sqlState) {
-    this.sqlState = sqlState;
-  }
-
-  public void unsetSqlState() {
-    this.sqlState = null;
-  }
-
-  /** Returns true if field sqlState is set (has been assigned a value) and false otherwise */
-  public boolean isSetSqlState() {
-    return this.sqlState != null;
-  }
-
-  public void setSqlStateIsSet(boolean value) {
-    if (!value) {
-      this.sqlState = null;
-    }
-  }
-
-  public int getErrorCode() {
-    return this.errorCode;
-  }
-
-  public void setErrorCode(int errorCode) {
-    this.errorCode = errorCode;
-    setErrorCodeIsSet(true);
-  }
-
-  public void unsetErrorCode() {
-    __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __ERRORCODE_ISSET_ID);
-  }
-
-  /** Returns true if field errorCode is set (has been assigned a value) and false otherwise */
-  public boolean isSetErrorCode() {
-    return EncodingUtils.testBit(__isset_bitfield, __ERRORCODE_ISSET_ID);
-  }
-
-  public void setErrorCodeIsSet(boolean value) {
-    __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __ERRORCODE_ISSET_ID, value);
-  }
-
-  public String getErrorMessage() {
-    return this.errorMessage;
-  }
-
-  public void setErrorMessage(String errorMessage) {
-    this.errorMessage = errorMessage;
-  }
-
-  public void unsetErrorMessage() {
-    this.errorMessage = null;
-  }
-
-  /** Returns true if field errorMessage is set (has been assigned a value) and false otherwise */
-  public boolean isSetErrorMessage() {
-    return this.errorMessage != null;
-  }
-
-  public void setErrorMessageIsSet(boolean value) {
-    if (!value) {
-      this.errorMessage = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case STATUS_CODE:
-      if (value == null) {
-        unsetStatusCode();
-      } else {
-        setStatusCode((TStatusCode)value);
-      }
-      break;
-
-    case INFO_MESSAGES:
-      if (value == null) {
-        unsetInfoMessages();
-      } else {
-        setInfoMessages((List<String>)value);
-      }
-      break;
-
-    case SQL_STATE:
-      if (value == null) {
-        unsetSqlState();
-      } else {
-        setSqlState((String)value);
-      }
-      break;
-
-    case ERROR_CODE:
-      if (value == null) {
-        unsetErrorCode();
-      } else {
-        setErrorCode((Integer)value);
-      }
-      break;
-
-    case ERROR_MESSAGE:
-      if (value == null) {
-        unsetErrorMessage();
-      } else {
-        setErrorMessage((String)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case STATUS_CODE:
-      return getStatusCode();
-
-    case INFO_MESSAGES:
-      return getInfoMessages();
-
-    case SQL_STATE:
-      return getSqlState();
-
-    case ERROR_CODE:
-      return getErrorCode();
-
-    case ERROR_MESSAGE:
-      return getErrorMessage();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case STATUS_CODE:
-      return isSetStatusCode();
-    case INFO_MESSAGES:
-      return isSetInfoMessages();
-    case SQL_STATE:
-      return isSetSqlState();
-    case ERROR_CODE:
-      return isSetErrorCode();
-    case ERROR_MESSAGE:
-      return isSetErrorMessage();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TStatus)
-      return this.equals((TStatus)that);
-    return false;
-  }
-
-  public boolean equals(TStatus that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_statusCode = true && this.isSetStatusCode();
-    boolean that_present_statusCode = true && that.isSetStatusCode();
-    if (this_present_statusCode || that_present_statusCode) {
-      if (!(this_present_statusCode && that_present_statusCode))
-        return false;
-      if (!this.statusCode.equals(that.statusCode))
-        return false;
-    }
-
-    boolean this_present_infoMessages = true && this.isSetInfoMessages();
-    boolean that_present_infoMessages = true && that.isSetInfoMessages();
-    if (this_present_infoMessages || that_present_infoMessages) {
-      if (!(this_present_infoMessages && that_present_infoMessages))
-        return false;
-      if (!this.infoMessages.equals(that.infoMessages))
-        return false;
-    }
-
-    boolean this_present_sqlState = true && this.isSetSqlState();
-    boolean that_present_sqlState = true && that.isSetSqlState();
-    if (this_present_sqlState || that_present_sqlState) {
-      if (!(this_present_sqlState && that_present_sqlState))
-        return false;
-      if (!this.sqlState.equals(that.sqlState))
-        return false;
-    }
-
-    boolean this_present_errorCode = true && this.isSetErrorCode();
-    boolean that_present_errorCode = true && that.isSetErrorCode();
-    if (this_present_errorCode || that_present_errorCode) {
-      if (!(this_present_errorCode && that_present_errorCode))
-        return false;
-      if (this.errorCode != that.errorCode)
-        return false;
-    }
-
-    boolean this_present_errorMessage = true && this.isSetErrorMessage();
-    boolean that_present_errorMessage = true && that.isSetErrorMessage();
-    if (this_present_errorMessage || that_present_errorMessage) {
-      if (!(this_present_errorMessage && that_present_errorMessage))
-        return false;
-      if (!this.errorMessage.equals(that.errorMessage))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_statusCode = true && (isSetStatusCode());
-    list.add(present_statusCode);
-    if (present_statusCode)
-      list.add(statusCode.getValue());
-
-    boolean present_infoMessages = true && (isSetInfoMessages());
-    list.add(present_infoMessages);
-    if (present_infoMessages)
-      list.add(infoMessages);
-
-    boolean present_sqlState = true && (isSetSqlState());
-    list.add(present_sqlState);
-    if (present_sqlState)
-      list.add(sqlState);
-
-    boolean present_errorCode = true && (isSetErrorCode());
-    list.add(present_errorCode);
-    if (present_errorCode)
-      list.add(errorCode);
-
-    boolean present_errorMessage = true && (isSetErrorMessage());
-    list.add(present_errorMessage);
-    if (present_errorMessage)
-      list.add(errorMessage);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TStatus other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetStatusCode()).compareTo(other.isSetStatusCode());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetStatusCode()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.statusCode, other.statusCode);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetInfoMessages()).compareTo(other.isSetInfoMessages());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetInfoMessages()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.infoMessages, other.infoMessages);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetSqlState()).compareTo(other.isSetSqlState());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetSqlState()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.sqlState, other.sqlState);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetErrorCode()).compareTo(other.isSetErrorCode());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetErrorCode()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.errorCode, other.errorCode);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetErrorMessage()).compareTo(other.isSetErrorMessage());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetErrorMessage()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.errorMessage, other.errorMessage);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TStatus(");
-    boolean first = true;
-
-    sb.append("statusCode:");
-    if (this.statusCode == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.statusCode);
-    }
-    first = false;
-    if (isSetInfoMessages()) {
-      if (!first) sb.append(", ");
-      sb.append("infoMessages:");
-      if (this.infoMessages == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.infoMessages);
-      }
-      first = false;
-    }
-    if (isSetSqlState()) {
-      if (!first) sb.append(", ");
-      sb.append("sqlState:");
-      if (this.sqlState == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.sqlState);
-      }
-      first = false;
-    }
-    if (isSetErrorCode()) {
-      if (!first) sb.append(", ");
-      sb.append("errorCode:");
-      sb.append(this.errorCode);
-      first = false;
-    }
-    if (isSetErrorMessage()) {
-      if (!first) sb.append(", ");
-      sb.append("errorMessage:");
-      if (this.errorMessage == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.errorMessage);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetStatusCode()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'statusCode' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor.
-      __isset_bitfield = 0;
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TStatusStandardSchemeFactory implements SchemeFactory {
-    public TStatusStandardScheme getScheme() {
-      return new TStatusStandardScheme();
-    }
-  }
-
-  private static class TStatusStandardScheme extends StandardScheme<TStatus> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TStatus struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // STATUS_CODE
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.statusCode = org.apache.hive.service.rpc.thrift.TStatusCode.findByValue(iprot.readI32());
-              struct.setStatusCodeIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // INFO_MESSAGES
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list134 = iprot.readListBegin();
-                struct.infoMessages = new ArrayList<String>(_list134.size);
-                String _elem135;
-                for (int _i136 = 0; _i136 < _list134.size; ++_i136)
-                {
-                  _elem135 = iprot.readString();
-                  struct.infoMessages.add(_elem135);
-                }
-                iprot.readListEnd();
-              }
-              struct.setInfoMessagesIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 3: // SQL_STATE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.sqlState = iprot.readString();
-              struct.setSqlStateIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 4: // ERROR_CODE
-            if (schemeField.type == org.apache.thrift.protocol.TType.I32) {
-              struct.errorCode = iprot.readI32();
-              struct.setErrorCodeIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 5: // ERROR_MESSAGE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.errorMessage = iprot.readString();
-              struct.setErrorMessageIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TStatus struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.statusCode != null) {
-        oprot.writeFieldBegin(STATUS_CODE_FIELD_DESC);
-        oprot.writeI32(struct.statusCode.getValue());
-        oprot.writeFieldEnd();
-      }
-      if (struct.infoMessages != null) {
-        if (struct.isSetInfoMessages()) {
-          oprot.writeFieldBegin(INFO_MESSAGES_FIELD_DESC);
-          {
-            oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, struct.infoMessages.size()));
-            for (String _iter137 : struct.infoMessages)
-            {
-              oprot.writeString(_iter137);
-            }
-            oprot.writeListEnd();
-          }
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.sqlState != null) {
-        if (struct.isSetSqlState()) {
-          oprot.writeFieldBegin(SQL_STATE_FIELD_DESC);
-          oprot.writeString(struct.sqlState);
-          oprot.writeFieldEnd();
-        }
-      }
-      if (struct.isSetErrorCode()) {
-        oprot.writeFieldBegin(ERROR_CODE_FIELD_DESC);
-        oprot.writeI32(struct.errorCode);
-        oprot.writeFieldEnd();
-      }
-      if (struct.errorMessage != null) {
-        if (struct.isSetErrorMessage()) {
-          oprot.writeFieldBegin(ERROR_MESSAGE_FIELD_DESC);
-          oprot.writeString(struct.errorMessage);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TStatusTupleSchemeFactory implements SchemeFactory {
-    public TStatusTupleScheme getScheme() {
-      return new TStatusTupleScheme();
-    }
-  }
-
-  private static class TStatusTupleScheme extends TupleScheme<TStatus> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TStatus struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      oprot.writeI32(struct.statusCode.getValue());
-      BitSet optionals = new BitSet();
-      if (struct.isSetInfoMessages()) {
-        optionals.set(0);
-      }
-      if (struct.isSetSqlState()) {
-        optionals.set(1);
-      }
-      if (struct.isSetErrorCode()) {
-        optionals.set(2);
-      }
-      if (struct.isSetErrorMessage()) {
-        optionals.set(3);
-      }
-      oprot.writeBitSet(optionals, 4);
-      if (struct.isSetInfoMessages()) {
-        {
-          oprot.writeI32(struct.infoMessages.size());
-          for (String _iter138 : struct.infoMessages)
-          {
-            oprot.writeString(_iter138);
-          }
-        }
-      }
-      if (struct.isSetSqlState()) {
-        oprot.writeString(struct.sqlState);
-      }
-      if (struct.isSetErrorCode()) {
-        oprot.writeI32(struct.errorCode);
-      }
-      if (struct.isSetErrorMessage()) {
-        oprot.writeString(struct.errorMessage);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TStatus struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.statusCode = org.apache.hive.service.rpc.thrift.TStatusCode.findByValue(iprot.readI32());
-      struct.setStatusCodeIsSet(true);
-      BitSet incoming = iprot.readBitSet(4);
-      if (incoming.get(0)) {
-        {
-          org.apache.thrift.protocol.TList _list139 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32());
-          struct.infoMessages = new ArrayList<String>(_list139.size);
-          String _elem140;
-          for (int _i141 = 0; _i141 < _list139.size; ++_i141)
-          {
-            _elem140 = iprot.readString();
-            struct.infoMessages.add(_elem140);
-          }
-        }
-        struct.setInfoMessagesIsSet(true);
-      }
-      if (incoming.get(1)) {
-        struct.sqlState = iprot.readString();
-        struct.setSqlStateIsSet(true);
-      }
-      if (incoming.get(2)) {
-        struct.errorCode = iprot.readI32();
-        struct.setErrorCodeIsSet(true);
-      }
-      if (incoming.get(3)) {
-        struct.errorMessage = iprot.readString();
-        struct.setErrorMessageIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStatusCode.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStatusCode.java
deleted file mode 100644
index fbf14184fa9a8..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStatusCode.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-
-import java.util.Map;
-import java.util.HashMap;
-import org.apache.thrift.TEnum;
-
-public enum TStatusCode implements org.apache.thrift.TEnum {
-  SUCCESS_STATUS(0),
-  SUCCESS_WITH_INFO_STATUS(1),
-  STILL_EXECUTING_STATUS(2),
-  ERROR_STATUS(3),
-  INVALID_HANDLE_STATUS(4);
-
-  private final int value;
-
-  private TStatusCode(int value) {
-    this.value = value;
-  }
-
-  /**
-   * Get the integer value of this enum value, as defined in the Thrift IDL.
-   */
-  public int getValue() {
-    return value;
-  }
-
-  /**
-   * Find a the enum type by its integer value, as defined in the Thrift IDL.
-   * @return null if the value is not found.
-   */
-  public static TStatusCode findByValue(int value) { 
-    switch (value) {
-      case 0:
-        return SUCCESS_STATUS;
-      case 1:
-        return SUCCESS_WITH_INFO_STATUS;
-      case 2:
-        return STILL_EXECUTING_STATUS;
-      case 3:
-        return ERROR_STATUS;
-      case 4:
-        return INVALID_HANDLE_STATUS;
-      default:
-        return null;
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStringColumn.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStringColumn.java
deleted file mode 100644
index c83a1fd0de3c2..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStringColumn.java
+++ /dev/null
@@ -1,548 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TStringColumn implements org.apache.thrift.TBase<TStringColumn, TStringColumn._Fields>, java.io.Serializable, Cloneable, Comparable<TStringColumn> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TStringColumn");
-
-  private static final org.apache.thrift.protocol.TField VALUES_FIELD_DESC = new org.apache.thrift.protocol.TField("values", org.apache.thrift.protocol.TType.LIST, (short)1);
-  private static final org.apache.thrift.protocol.TField NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("nulls", org.apache.thrift.protocol.TType.STRING, (short)2);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TStringColumnStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TStringColumnTupleSchemeFactory());
-  }
-
-  private List<String> values; // required
-  private ByteBuffer nulls; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUES((short)1, "values"),
-    NULLS((short)2, "nulls");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUES
-          return VALUES;
-        case 2: // NULLS
-          return NULLS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUES, new org.apache.thrift.meta_data.FieldMetaData("values", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING))));
-    tmpMap.put(_Fields.NULLS, new org.apache.thrift.meta_data.FieldMetaData("nulls", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING        , true)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TStringColumn.class, metaDataMap);
-  }
-
-  public TStringColumn() {
-  }
-
-  public TStringColumn(
-    List<String> values,
-    ByteBuffer nulls)
-  {
-    this();
-    this.values = values;
-    this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls);
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TStringColumn(TStringColumn other) {
-    if (other.isSetValues()) {
-      List<String> __this__values = new ArrayList<String>(other.values);
-      this.values = __this__values;
-    }
-    if (other.isSetNulls()) {
-      this.nulls = org.apache.thrift.TBaseHelper.copyBinary(other.nulls);
-    }
-  }
-
-  public TStringColumn deepCopy() {
-    return new TStringColumn(this);
-  }
-
-  @Override
-  public void clear() {
-    this.values = null;
-    this.nulls = null;
-  }
-
-  public int getValuesSize() {
-    return (this.values == null) ? 0 : this.values.size();
-  }
-
-  public java.util.Iterator<String> getValuesIterator() {
-    return (this.values == null) ? null : this.values.iterator();
-  }
-
-  public void addToValues(String elem) {
-    if (this.values == null) {
-      this.values = new ArrayList<String>();
-    }
-    this.values.add(elem);
-  }
-
-  public List<String> getValues() {
-    return this.values;
-  }
-
-  public void setValues(List<String> values) {
-    this.values = values;
-  }
-
-  public void unsetValues() {
-    this.values = null;
-  }
-
-  /** Returns true if field values is set (has been assigned a value) and false otherwise */
-  public boolean isSetValues() {
-    return this.values != null;
-  }
-
-  public void setValuesIsSet(boolean value) {
-    if (!value) {
-      this.values = null;
-    }
-  }
-
-  public byte[] getNulls() {
-    setNulls(org.apache.thrift.TBaseHelper.rightSize(nulls));
-    return nulls == null ? null : nulls.array();
-  }
-
-  public ByteBuffer bufferForNulls() {
-    return org.apache.thrift.TBaseHelper.copyBinary(nulls);
-  }
-
-  public void setNulls(byte[] nulls) {
-    this.nulls = nulls == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(nulls, nulls.length));
-  }
-
-  public void setNulls(ByteBuffer nulls) {
-    this.nulls = org.apache.thrift.TBaseHelper.copyBinary(nulls);
-  }
-
-  public void unsetNulls() {
-    this.nulls = null;
-  }
-
-  /** Returns true if field nulls is set (has been assigned a value) and false otherwise */
-  public boolean isSetNulls() {
-    return this.nulls != null;
-  }
-
-  public void setNullsIsSet(boolean value) {
-    if (!value) {
-      this.nulls = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUES:
-      if (value == null) {
-        unsetValues();
-      } else {
-        setValues((List<String>)value);
-      }
-      break;
-
-    case NULLS:
-      if (value == null) {
-        unsetNulls();
-      } else {
-        setNulls((ByteBuffer)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUES:
-      return getValues();
-
-    case NULLS:
-      return getNulls();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUES:
-      return isSetValues();
-    case NULLS:
-      return isSetNulls();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TStringColumn)
-      return this.equals((TStringColumn)that);
-    return false;
-  }
-
-  public boolean equals(TStringColumn that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_values = true && this.isSetValues();
-    boolean that_present_values = true && that.isSetValues();
-    if (this_present_values || that_present_values) {
-      if (!(this_present_values && that_present_values))
-        return false;
-      if (!this.values.equals(that.values))
-        return false;
-    }
-
-    boolean this_present_nulls = true && this.isSetNulls();
-    boolean that_present_nulls = true && that.isSetNulls();
-    if (this_present_nulls || that_present_nulls) {
-      if (!(this_present_nulls && that_present_nulls))
-        return false;
-      if (!this.nulls.equals(that.nulls))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_values = true && (isSetValues());
-    list.add(present_values);
-    if (present_values)
-      list.add(values);
-
-    boolean present_nulls = true && (isSetNulls());
-    list.add(present_nulls);
-    if (present_nulls)
-      list.add(nulls);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TStringColumn other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetValues()).compareTo(other.isSetValues());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValues()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.values, other.values);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    lastComparison = Boolean.valueOf(isSetNulls()).compareTo(other.isSetNulls());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetNulls()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nulls, other.nulls);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TStringColumn(");
-    boolean first = true;
-
-    sb.append("values:");
-    if (this.values == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.values);
-    }
-    first = false;
-    if (!first) sb.append(", ");
-    sb.append("nulls:");
-    if (this.nulls == null) {
-      sb.append("null");
-    } else {
-      org.apache.thrift.TBaseHelper.toString(this.nulls, sb);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetValues()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'values' is unset! Struct:" + toString());
-    }
-
-    if (!isSetNulls()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nulls' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TStringColumnStandardSchemeFactory implements SchemeFactory {
-    public TStringColumnStandardScheme getScheme() {
-      return new TStringColumnStandardScheme();
-    }
-  }
-
-  private static class TStringColumnStandardScheme extends StandardScheme<TStringColumn> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TStringColumn struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUES
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list102 = iprot.readListBegin();
-                struct.values = new ArrayList<String>(_list102.size);
-                String _elem103;
-                for (int _i104 = 0; _i104 < _list102.size; ++_i104)
-                {
-                  _elem103 = iprot.readString();
-                  struct.values.add(_elem103);
-                }
-                iprot.readListEnd();
-              }
-              struct.setValuesIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          case 2: // NULLS
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.nulls = iprot.readBinary();
-              struct.setNullsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TStringColumn struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.values != null) {
-        oprot.writeFieldBegin(VALUES_FIELD_DESC);
-        {
-          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, struct.values.size()));
-          for (String _iter105 : struct.values)
-          {
-            oprot.writeString(_iter105);
-          }
-          oprot.writeListEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      if (struct.nulls != null) {
-        oprot.writeFieldBegin(NULLS_FIELD_DESC);
-        oprot.writeBinary(struct.nulls);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TStringColumnTupleSchemeFactory implements SchemeFactory {
-    public TStringColumnTupleScheme getScheme() {
-      return new TStringColumnTupleScheme();
-    }
-  }
-
-  private static class TStringColumnTupleScheme extends TupleScheme<TStringColumn> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TStringColumn struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.values.size());
-        for (String _iter106 : struct.values)
-        {
-          oprot.writeString(_iter106);
-        }
-      }
-      oprot.writeBinary(struct.nulls);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TStringColumn struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TList _list107 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRING, iprot.readI32());
-        struct.values = new ArrayList<String>(_list107.size);
-        String _elem108;
-        for (int _i109 = 0; _i109 < _list107.size; ++_i109)
-        {
-          _elem108 = iprot.readString();
-          struct.values.add(_elem108);
-        }
-      }
-      struct.setValuesIsSet(true);
-      struct.nulls = iprot.readBinary();
-      struct.setNullsIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStringValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStringValue.java
deleted file mode 100644
index 13874e5516632..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStringValue.java
+++ /dev/null
@@ -1,393 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TStringValue implements org.apache.thrift.TBase<TStringValue, TStringValue._Fields>, java.io.Serializable, Cloneable, Comparable<TStringValue> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TStringValue");
-
-  private static final org.apache.thrift.protocol.TField VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("value", org.apache.thrift.protocol.TType.STRING, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TStringValueStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TStringValueTupleSchemeFactory());
-  }
-
-  private String value; // optional
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    VALUE((short)1, "value");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // VALUE
-          return VALUE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  private static final _Fields optionals[] = {_Fields.VALUE};
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.VALUE, new org.apache.thrift.meta_data.FieldMetaData("value", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TStringValue.class, metaDataMap);
-  }
-
-  public TStringValue() {
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TStringValue(TStringValue other) {
-    if (other.isSetValue()) {
-      this.value = other.value;
-    }
-  }
-
-  public TStringValue deepCopy() {
-    return new TStringValue(this);
-  }
-
-  @Override
-  public void clear() {
-    this.value = null;
-  }
-
-  public String getValue() {
-    return this.value;
-  }
-
-  public void setValue(String value) {
-    this.value = value;
-  }
-
-  public void unsetValue() {
-    this.value = null;
-  }
-
-  /** Returns true if field value is set (has been assigned a value) and false otherwise */
-  public boolean isSetValue() {
-    return this.value != null;
-  }
-
-  public void setValueIsSet(boolean value) {
-    if (!value) {
-      this.value = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case VALUE:
-      if (value == null) {
-        unsetValue();
-      } else {
-        setValue((String)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case VALUE:
-      return getValue();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case VALUE:
-      return isSetValue();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TStringValue)
-      return this.equals((TStringValue)that);
-    return false;
-  }
-
-  public boolean equals(TStringValue that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_value = true && this.isSetValue();
-    boolean that_present_value = true && that.isSetValue();
-    if (this_present_value || that_present_value) {
-      if (!(this_present_value && that_present_value))
-        return false;
-      if (!this.value.equals(that.value))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_value = true && (isSetValue());
-    list.add(present_value);
-    if (present_value)
-      list.add(value);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TStringValue other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetValue()).compareTo(other.isSetValue());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetValue()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.value, other.value);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TStringValue(");
-    boolean first = true;
-
-    if (isSetValue()) {
-      sb.append("value:");
-      if (this.value == null) {
-        sb.append("null");
-      } else {
-        sb.append(this.value);
-      }
-      first = false;
-    }
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TStringValueStandardSchemeFactory implements SchemeFactory {
-    public TStringValueStandardScheme getScheme() {
-      return new TStringValueStandardScheme();
-    }
-  }
-
-  private static class TStringValueStandardScheme extends StandardScheme<TStringValue> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TStringValue struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // VALUE
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.value = iprot.readString();
-              struct.setValueIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TStringValue struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.value != null) {
-        if (struct.isSetValue()) {
-          oprot.writeFieldBegin(VALUE_FIELD_DESC);
-          oprot.writeString(struct.value);
-          oprot.writeFieldEnd();
-        }
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TStringValueTupleSchemeFactory implements SchemeFactory {
-    public TStringValueTupleScheme getScheme() {
-      return new TStringValueTupleScheme();
-    }
-  }
-
-  private static class TStringValueTupleScheme extends TupleScheme<TStringValue> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TStringValue struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      BitSet optionals = new BitSet();
-      if (struct.isSetValue()) {
-        optionals.set(0);
-      }
-      oprot.writeBitSet(optionals, 1);
-      if (struct.isSetValue()) {
-        oprot.writeString(struct.value);
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TStringValue struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      BitSet incoming = iprot.readBitSet(1);
-      if (incoming.get(0)) {
-        struct.value = iprot.readString();
-        struct.setValueIsSet(true);
-      }
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStructTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStructTypeEntry.java
deleted file mode 100644
index 6c2c4f5dd2ddf..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TStructTypeEntry.java
+++ /dev/null
@@ -1,452 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TStructTypeEntry implements org.apache.thrift.TBase<TStructTypeEntry, TStructTypeEntry._Fields>, java.io.Serializable, Cloneable, Comparable<TStructTypeEntry> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TStructTypeEntry");
-
-  private static final org.apache.thrift.protocol.TField NAME_TO_TYPE_PTR_FIELD_DESC = new org.apache.thrift.protocol.TField("nameToTypePtr", org.apache.thrift.protocol.TType.MAP, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TStructTypeEntryStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TStructTypeEntryTupleSchemeFactory());
-  }
-
-  private Map<String,Integer> nameToTypePtr; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    NAME_TO_TYPE_PTR((short)1, "nameToTypePtr");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // NAME_TO_TYPE_PTR
-          return NAME_TO_TYPE_PTR;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.NAME_TO_TYPE_PTR, new org.apache.thrift.meta_data.FieldMetaData("nameToTypePtr", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32            , "TTypeEntryPtr"))));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TStructTypeEntry.class, metaDataMap);
-  }
-
-  public TStructTypeEntry() {
-  }
-
-  public TStructTypeEntry(
-    Map<String,Integer> nameToTypePtr)
-  {
-    this();
-    this.nameToTypePtr = nameToTypePtr;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TStructTypeEntry(TStructTypeEntry other) {
-    if (other.isSetNameToTypePtr()) {
-      Map<String,Integer> __this__nameToTypePtr = new HashMap<String,Integer>(other.nameToTypePtr.size());
-      for (Map.Entry<String, Integer> other_element : other.nameToTypePtr.entrySet()) {
-
-        String other_element_key = other_element.getKey();
-        Integer other_element_value = other_element.getValue();
-
-        String __this__nameToTypePtr_copy_key = other_element_key;
-
-        Integer __this__nameToTypePtr_copy_value = other_element_value;
-
-        __this__nameToTypePtr.put(__this__nameToTypePtr_copy_key, __this__nameToTypePtr_copy_value);
-      }
-      this.nameToTypePtr = __this__nameToTypePtr;
-    }
-  }
-
-  public TStructTypeEntry deepCopy() {
-    return new TStructTypeEntry(this);
-  }
-
-  @Override
-  public void clear() {
-    this.nameToTypePtr = null;
-  }
-
-  public int getNameToTypePtrSize() {
-    return (this.nameToTypePtr == null) ? 0 : this.nameToTypePtr.size();
-  }
-
-  public void putToNameToTypePtr(String key, int val) {
-    if (this.nameToTypePtr == null) {
-      this.nameToTypePtr = new HashMap<String,Integer>();
-    }
-    this.nameToTypePtr.put(key, val);
-  }
-
-  public Map<String,Integer> getNameToTypePtr() {
-    return this.nameToTypePtr;
-  }
-
-  public void setNameToTypePtr(Map<String,Integer> nameToTypePtr) {
-    this.nameToTypePtr = nameToTypePtr;
-  }
-
-  public void unsetNameToTypePtr() {
-    this.nameToTypePtr = null;
-  }
-
-  /** Returns true if field nameToTypePtr is set (has been assigned a value) and false otherwise */
-  public boolean isSetNameToTypePtr() {
-    return this.nameToTypePtr != null;
-  }
-
-  public void setNameToTypePtrIsSet(boolean value) {
-    if (!value) {
-      this.nameToTypePtr = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case NAME_TO_TYPE_PTR:
-      if (value == null) {
-        unsetNameToTypePtr();
-      } else {
-        setNameToTypePtr((Map<String,Integer>)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case NAME_TO_TYPE_PTR:
-      return getNameToTypePtr();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case NAME_TO_TYPE_PTR:
-      return isSetNameToTypePtr();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TStructTypeEntry)
-      return this.equals((TStructTypeEntry)that);
-    return false;
-  }
-
-  public boolean equals(TStructTypeEntry that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_nameToTypePtr = true && this.isSetNameToTypePtr();
-    boolean that_present_nameToTypePtr = true && that.isSetNameToTypePtr();
-    if (this_present_nameToTypePtr || that_present_nameToTypePtr) {
-      if (!(this_present_nameToTypePtr && that_present_nameToTypePtr))
-        return false;
-      if (!this.nameToTypePtr.equals(that.nameToTypePtr))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_nameToTypePtr = true && (isSetNameToTypePtr());
-    list.add(present_nameToTypePtr);
-    if (present_nameToTypePtr)
-      list.add(nameToTypePtr);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TStructTypeEntry other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetNameToTypePtr()).compareTo(other.isSetNameToTypePtr());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetNameToTypePtr()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nameToTypePtr, other.nameToTypePtr);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TStructTypeEntry(");
-    boolean first = true;
-
-    sb.append("nameToTypePtr:");
-    if (this.nameToTypePtr == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.nameToTypePtr);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetNameToTypePtr()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nameToTypePtr' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TStructTypeEntryStandardSchemeFactory implements SchemeFactory {
-    public TStructTypeEntryStandardScheme getScheme() {
-      return new TStructTypeEntryStandardScheme();
-    }
-  }
-
-  private static class TStructTypeEntryStandardScheme extends StandardScheme<TStructTypeEntry> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TStructTypeEntry struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // NAME_TO_TYPE_PTR
-            if (schemeField.type == org.apache.thrift.protocol.TType.MAP) {
-              {
-                org.apache.thrift.protocol.TMap _map10 = iprot.readMapBegin();
-                struct.nameToTypePtr = new HashMap<String,Integer>(2*_map10.size);
-                String _key11;
-                int _val12;
-                for (int _i13 = 0; _i13 < _map10.size; ++_i13)
-                {
-                  _key11 = iprot.readString();
-                  _val12 = iprot.readI32();
-                  struct.nameToTypePtr.put(_key11, _val12);
-                }
-                iprot.readMapEnd();
-              }
-              struct.setNameToTypePtrIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TStructTypeEntry struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.nameToTypePtr != null) {
-        oprot.writeFieldBegin(NAME_TO_TYPE_PTR_FIELD_DESC);
-        {
-          oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.I32, struct.nameToTypePtr.size()));
-          for (Map.Entry<String, Integer> _iter14 : struct.nameToTypePtr.entrySet())
-          {
-            oprot.writeString(_iter14.getKey());
-            oprot.writeI32(_iter14.getValue());
-          }
-          oprot.writeMapEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TStructTypeEntryTupleSchemeFactory implements SchemeFactory {
-    public TStructTypeEntryTupleScheme getScheme() {
-      return new TStructTypeEntryTupleScheme();
-    }
-  }
-
-  private static class TStructTypeEntryTupleScheme extends TupleScheme<TStructTypeEntry> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TStructTypeEntry struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.nameToTypePtr.size());
-        for (Map.Entry<String, Integer> _iter15 : struct.nameToTypePtr.entrySet())
-        {
-          oprot.writeString(_iter15.getKey());
-          oprot.writeI32(_iter15.getValue());
-        }
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TStructTypeEntry struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TMap _map16 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.I32, iprot.readI32());
-        struct.nameToTypePtr = new HashMap<String,Integer>(2*_map16.size);
-        String _key17;
-        int _val18;
-        for (int _i19 = 0; _i19 < _map16.size; ++_i19)
-        {
-          _key17 = iprot.readString();
-          _val18 = iprot.readI32();
-          struct.nameToTypePtr.put(_key17, _val18);
-        }
-      }
-      struct.setNameToTypePtrIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTableSchema.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTableSchema.java
deleted file mode 100644
index 007b1603546ac..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTableSchema.java
+++ /dev/null
@@ -1,443 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TTableSchema implements org.apache.thrift.TBase<TTableSchema, TTableSchema._Fields>, java.io.Serializable, Cloneable, Comparable<TTableSchema> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TTableSchema");
-
-  private static final org.apache.thrift.protocol.TField COLUMNS_FIELD_DESC = new org.apache.thrift.protocol.TField("columns", org.apache.thrift.protocol.TType.LIST, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TTableSchemaStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TTableSchemaTupleSchemeFactory());
-  }
-
-  private List<TColumnDesc> columns; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    COLUMNS((short)1, "columns");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // COLUMNS
-          return COLUMNS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.COLUMNS, new org.apache.thrift.meta_data.FieldMetaData("columns", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TColumnDesc.class))));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TTableSchema.class, metaDataMap);
-  }
-
-  public TTableSchema() {
-  }
-
-  public TTableSchema(
-    List<TColumnDesc> columns)
-  {
-    this();
-    this.columns = columns;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TTableSchema(TTableSchema other) {
-    if (other.isSetColumns()) {
-      List<TColumnDesc> __this__columns = new ArrayList<TColumnDesc>(other.columns.size());
-      for (TColumnDesc other_element : other.columns) {
-        __this__columns.add(new TColumnDesc(other_element));
-      }
-      this.columns = __this__columns;
-    }
-  }
-
-  public TTableSchema deepCopy() {
-    return new TTableSchema(this);
-  }
-
-  @Override
-  public void clear() {
-    this.columns = null;
-  }
-
-  public int getColumnsSize() {
-    return (this.columns == null) ? 0 : this.columns.size();
-  }
-
-  public java.util.Iterator<TColumnDesc> getColumnsIterator() {
-    return (this.columns == null) ? null : this.columns.iterator();
-  }
-
-  public void addToColumns(TColumnDesc elem) {
-    if (this.columns == null) {
-      this.columns = new ArrayList<TColumnDesc>();
-    }
-    this.columns.add(elem);
-  }
-
-  public List<TColumnDesc> getColumns() {
-    return this.columns;
-  }
-
-  public void setColumns(List<TColumnDesc> columns) {
-    this.columns = columns;
-  }
-
-  public void unsetColumns() {
-    this.columns = null;
-  }
-
-  /** Returns true if field columns is set (has been assigned a value) and false otherwise */
-  public boolean isSetColumns() {
-    return this.columns != null;
-  }
-
-  public void setColumnsIsSet(boolean value) {
-    if (!value) {
-      this.columns = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case COLUMNS:
-      if (value == null) {
-        unsetColumns();
-      } else {
-        setColumns((List<TColumnDesc>)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case COLUMNS:
-      return getColumns();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case COLUMNS:
-      return isSetColumns();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TTableSchema)
-      return this.equals((TTableSchema)that);
-    return false;
-  }
-
-  public boolean equals(TTableSchema that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_columns = true && this.isSetColumns();
-    boolean that_present_columns = true && that.isSetColumns();
-    if (this_present_columns || that_present_columns) {
-      if (!(this_present_columns && that_present_columns))
-        return false;
-      if (!this.columns.equals(that.columns))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_columns = true && (isSetColumns());
-    list.add(present_columns);
-    if (present_columns)
-      list.add(columns);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TTableSchema other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetColumns()).compareTo(other.isSetColumns());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetColumns()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.columns, other.columns);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TTableSchema(");
-    boolean first = true;
-
-    sb.append("columns:");
-    if (this.columns == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.columns);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetColumns()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'columns' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TTableSchemaStandardSchemeFactory implements SchemeFactory {
-    public TTableSchemaStandardScheme getScheme() {
-      return new TTableSchemaStandardScheme();
-    }
-  }
-
-  private static class TTableSchemaStandardScheme extends StandardScheme<TTableSchema> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TTableSchema struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // COLUMNS
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list38 = iprot.readListBegin();
-                struct.columns = new ArrayList<TColumnDesc>(_list38.size);
-                TColumnDesc _elem39;
-                for (int _i40 = 0; _i40 < _list38.size; ++_i40)
-                {
-                  _elem39 = new TColumnDesc();
-                  _elem39.read(iprot);
-                  struct.columns.add(_elem39);
-                }
-                iprot.readListEnd();
-              }
-              struct.setColumnsIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TTableSchema struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.columns != null) {
-        oprot.writeFieldBegin(COLUMNS_FIELD_DESC);
-        {
-          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, struct.columns.size()));
-          for (TColumnDesc _iter41 : struct.columns)
-          {
-            _iter41.write(oprot);
-          }
-          oprot.writeListEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TTableSchemaTupleSchemeFactory implements SchemeFactory {
-    public TTableSchemaTupleScheme getScheme() {
-      return new TTableSchemaTupleScheme();
-    }
-  }
-
-  private static class TTableSchemaTupleScheme extends TupleScheme<TTableSchema> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TTableSchema struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.columns.size());
-        for (TColumnDesc _iter42 : struct.columns)
-        {
-          _iter42.write(oprot);
-        }
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TTableSchema struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TList _list43 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32());
-        struct.columns = new ArrayList<TColumnDesc>(_list43.size);
-        TColumnDesc _elem44;
-        for (int _i45 = 0; _i45 < _list43.size; ++_i45)
-        {
-          _elem44 = new TColumnDesc();
-          _elem44.read(iprot);
-          struct.columns.add(_elem44);
-        }
-      }
-      struct.setColumnsIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeDesc.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeDesc.java
deleted file mode 100644
index 055a14d06a2d6..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeDesc.java
+++ /dev/null
@@ -1,443 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TTypeDesc implements org.apache.thrift.TBase<TTypeDesc, TTypeDesc._Fields>, java.io.Serializable, Cloneable, Comparable<TTypeDesc> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TTypeDesc");
-
-  private static final org.apache.thrift.protocol.TField TYPES_FIELD_DESC = new org.apache.thrift.protocol.TField("types", org.apache.thrift.protocol.TType.LIST, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TTypeDescStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TTypeDescTupleSchemeFactory());
-  }
-
-  private List<TTypeEntry> types; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    TYPES((short)1, "types");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // TYPES
-          return TYPES;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.TYPES, new org.apache.thrift.meta_data.FieldMetaData("types", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.ListMetaData(org.apache.thrift.protocol.TType.LIST, 
-            new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TTypeEntry.class))));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TTypeDesc.class, metaDataMap);
-  }
-
-  public TTypeDesc() {
-  }
-
-  public TTypeDesc(
-    List<TTypeEntry> types)
-  {
-    this();
-    this.types = types;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TTypeDesc(TTypeDesc other) {
-    if (other.isSetTypes()) {
-      List<TTypeEntry> __this__types = new ArrayList<TTypeEntry>(other.types.size());
-      for (TTypeEntry other_element : other.types) {
-        __this__types.add(new TTypeEntry(other_element));
-      }
-      this.types = __this__types;
-    }
-  }
-
-  public TTypeDesc deepCopy() {
-    return new TTypeDesc(this);
-  }
-
-  @Override
-  public void clear() {
-    this.types = null;
-  }
-
-  public int getTypesSize() {
-    return (this.types == null) ? 0 : this.types.size();
-  }
-
-  public java.util.Iterator<TTypeEntry> getTypesIterator() {
-    return (this.types == null) ? null : this.types.iterator();
-  }
-
-  public void addToTypes(TTypeEntry elem) {
-    if (this.types == null) {
-      this.types = new ArrayList<TTypeEntry>();
-    }
-    this.types.add(elem);
-  }
-
-  public List<TTypeEntry> getTypes() {
-    return this.types;
-  }
-
-  public void setTypes(List<TTypeEntry> types) {
-    this.types = types;
-  }
-
-  public void unsetTypes() {
-    this.types = null;
-  }
-
-  /** Returns true if field types is set (has been assigned a value) and false otherwise */
-  public boolean isSetTypes() {
-    return this.types != null;
-  }
-
-  public void setTypesIsSet(boolean value) {
-    if (!value) {
-      this.types = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case TYPES:
-      if (value == null) {
-        unsetTypes();
-      } else {
-        setTypes((List<TTypeEntry>)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case TYPES:
-      return getTypes();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case TYPES:
-      return isSetTypes();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TTypeDesc)
-      return this.equals((TTypeDesc)that);
-    return false;
-  }
-
-  public boolean equals(TTypeDesc that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_types = true && this.isSetTypes();
-    boolean that_present_types = true && that.isSetTypes();
-    if (this_present_types || that_present_types) {
-      if (!(this_present_types && that_present_types))
-        return false;
-      if (!this.types.equals(that.types))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_types = true && (isSetTypes());
-    list.add(present_types);
-    if (present_types)
-      list.add(types);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TTypeDesc other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetTypes()).compareTo(other.isSetTypes());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetTypes()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.types, other.types);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TTypeDesc(");
-    boolean first = true;
-
-    sb.append("types:");
-    if (this.types == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.types);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetTypes()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'types' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TTypeDescStandardSchemeFactory implements SchemeFactory {
-    public TTypeDescStandardScheme getScheme() {
-      return new TTypeDescStandardScheme();
-    }
-  }
-
-  private static class TTypeDescStandardScheme extends StandardScheme<TTypeDesc> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TTypeDesc struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // TYPES
-            if (schemeField.type == org.apache.thrift.protocol.TType.LIST) {
-              {
-                org.apache.thrift.protocol.TList _list30 = iprot.readListBegin();
-                struct.types = new ArrayList<TTypeEntry>(_list30.size);
-                TTypeEntry _elem31;
-                for (int _i32 = 0; _i32 < _list30.size; ++_i32)
-                {
-                  _elem31 = new TTypeEntry();
-                  _elem31.read(iprot);
-                  struct.types.add(_elem31);
-                }
-                iprot.readListEnd();
-              }
-              struct.setTypesIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TTypeDesc struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.types != null) {
-        oprot.writeFieldBegin(TYPES_FIELD_DESC);
-        {
-          oprot.writeListBegin(new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, struct.types.size()));
-          for (TTypeEntry _iter33 : struct.types)
-          {
-            _iter33.write(oprot);
-          }
-          oprot.writeListEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TTypeDescTupleSchemeFactory implements SchemeFactory {
-    public TTypeDescTupleScheme getScheme() {
-      return new TTypeDescTupleScheme();
-    }
-  }
-
-  private static class TTypeDescTupleScheme extends TupleScheme<TTypeDesc> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TTypeDesc struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.types.size());
-        for (TTypeEntry _iter34 : struct.types)
-        {
-          _iter34.write(oprot);
-        }
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TTypeDesc struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TList _list35 = new org.apache.thrift.protocol.TList(org.apache.thrift.protocol.TType.STRUCT, iprot.readI32());
-        struct.types = new ArrayList<TTypeEntry>(_list35.size);
-        TTypeEntry _elem36;
-        for (int _i37 = 0; _i37 < _list35.size; ++_i37)
-        {
-          _elem36 = new TTypeEntry();
-          _elem36.read(iprot);
-          struct.types.add(_elem36);
-        }
-      }
-      struct.setTypesIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeEntry.java
deleted file mode 100644
index b609151b8fbee..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeEntry.java
+++ /dev/null
@@ -1,614 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-public class TTypeEntry extends org.apache.thrift.TUnion<TTypeEntry, TTypeEntry._Fields> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TTypeEntry");
-  private static final org.apache.thrift.protocol.TField PRIMITIVE_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("primitiveEntry", org.apache.thrift.protocol.TType.STRUCT, (short)1);
-  private static final org.apache.thrift.protocol.TField ARRAY_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("arrayEntry", org.apache.thrift.protocol.TType.STRUCT, (short)2);
-  private static final org.apache.thrift.protocol.TField MAP_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("mapEntry", org.apache.thrift.protocol.TType.STRUCT, (short)3);
-  private static final org.apache.thrift.protocol.TField STRUCT_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("structEntry", org.apache.thrift.protocol.TType.STRUCT, (short)4);
-  private static final org.apache.thrift.protocol.TField UNION_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("unionEntry", org.apache.thrift.protocol.TType.STRUCT, (short)5);
-  private static final org.apache.thrift.protocol.TField USER_DEFINED_TYPE_ENTRY_FIELD_DESC = new org.apache.thrift.protocol.TField("userDefinedTypeEntry", org.apache.thrift.protocol.TType.STRUCT, (short)6);
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    PRIMITIVE_ENTRY((short)1, "primitiveEntry"),
-    ARRAY_ENTRY((short)2, "arrayEntry"),
-    MAP_ENTRY((short)3, "mapEntry"),
-    STRUCT_ENTRY((short)4, "structEntry"),
-    UNION_ENTRY((short)5, "unionEntry"),
-    USER_DEFINED_TYPE_ENTRY((short)6, "userDefinedTypeEntry");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // PRIMITIVE_ENTRY
-          return PRIMITIVE_ENTRY;
-        case 2: // ARRAY_ENTRY
-          return ARRAY_ENTRY;
-        case 3: // MAP_ENTRY
-          return MAP_ENTRY;
-        case 4: // STRUCT_ENTRY
-          return STRUCT_ENTRY;
-        case 5: // UNION_ENTRY
-          return UNION_ENTRY;
-        case 6: // USER_DEFINED_TYPE_ENTRY
-          return USER_DEFINED_TYPE_ENTRY;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.PRIMITIVE_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("primitiveEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TPrimitiveTypeEntry.class)));
-    tmpMap.put(_Fields.ARRAY_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("arrayEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TArrayTypeEntry.class)));
-    tmpMap.put(_Fields.MAP_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("mapEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TMapTypeEntry.class)));
-    tmpMap.put(_Fields.STRUCT_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("structEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TStructTypeEntry.class)));
-    tmpMap.put(_Fields.UNION_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("unionEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TUnionTypeEntry.class)));
-    tmpMap.put(_Fields.USER_DEFINED_TYPE_ENTRY, new org.apache.thrift.meta_data.FieldMetaData("userDefinedTypeEntry", org.apache.thrift.TFieldRequirementType.DEFAULT, 
-        new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TUserDefinedTypeEntry.class)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TTypeEntry.class, metaDataMap);
-  }
-
-  public TTypeEntry() {
-    super();
-  }
-
-  public TTypeEntry(TTypeEntry._Fields setField, Object value) {
-    super(setField, value);
-  }
-
-  public TTypeEntry(TTypeEntry other) {
-    super(other);
-  }
-  public TTypeEntry deepCopy() {
-    return new TTypeEntry(this);
-  }
-
-  public static TTypeEntry primitiveEntry(TPrimitiveTypeEntry value) {
-    TTypeEntry x = new TTypeEntry();
-    x.setPrimitiveEntry(value);
-    return x;
-  }
-
-  public static TTypeEntry arrayEntry(TArrayTypeEntry value) {
-    TTypeEntry x = new TTypeEntry();
-    x.setArrayEntry(value);
-    return x;
-  }
-
-  public static TTypeEntry mapEntry(TMapTypeEntry value) {
-    TTypeEntry x = new TTypeEntry();
-    x.setMapEntry(value);
-    return x;
-  }
-
-  public static TTypeEntry structEntry(TStructTypeEntry value) {
-    TTypeEntry x = new TTypeEntry();
-    x.setStructEntry(value);
-    return x;
-  }
-
-  public static TTypeEntry unionEntry(TUnionTypeEntry value) {
-    TTypeEntry x = new TTypeEntry();
-    x.setUnionEntry(value);
-    return x;
-  }
-
-  public static TTypeEntry userDefinedTypeEntry(TUserDefinedTypeEntry value) {
-    TTypeEntry x = new TTypeEntry();
-    x.setUserDefinedTypeEntry(value);
-    return x;
-  }
-
-
-  @Override
-  protected void checkType(_Fields setField, Object value) throws ClassCastException {
-    switch (setField) {
-      case PRIMITIVE_ENTRY:
-        if (value instanceof TPrimitiveTypeEntry) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TPrimitiveTypeEntry for field 'primitiveEntry', but got " + value.getClass().getSimpleName());
-      case ARRAY_ENTRY:
-        if (value instanceof TArrayTypeEntry) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TArrayTypeEntry for field 'arrayEntry', but got " + value.getClass().getSimpleName());
-      case MAP_ENTRY:
-        if (value instanceof TMapTypeEntry) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TMapTypeEntry for field 'mapEntry', but got " + value.getClass().getSimpleName());
-      case STRUCT_ENTRY:
-        if (value instanceof TStructTypeEntry) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TStructTypeEntry for field 'structEntry', but got " + value.getClass().getSimpleName());
-      case UNION_ENTRY:
-        if (value instanceof TUnionTypeEntry) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TUnionTypeEntry for field 'unionEntry', but got " + value.getClass().getSimpleName());
-      case USER_DEFINED_TYPE_ENTRY:
-        if (value instanceof TUserDefinedTypeEntry) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type TUserDefinedTypeEntry for field 'userDefinedTypeEntry', but got " + value.getClass().getSimpleName());
-      default:
-        throw new IllegalArgumentException("Unknown field id " + setField);
-    }
-  }
-
-  @Override
-  protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TField field) throws org.apache.thrift.TException {
-    _Fields setField = _Fields.findByThriftId(field.id);
-    if (setField != null) {
-      switch (setField) {
-        case PRIMITIVE_ENTRY:
-          if (field.type == PRIMITIVE_ENTRY_FIELD_DESC.type) {
-            TPrimitiveTypeEntry primitiveEntry;
-            primitiveEntry = new TPrimitiveTypeEntry();
-            primitiveEntry.read(iprot);
-            return primitiveEntry;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case ARRAY_ENTRY:
-          if (field.type == ARRAY_ENTRY_FIELD_DESC.type) {
-            TArrayTypeEntry arrayEntry;
-            arrayEntry = new TArrayTypeEntry();
-            arrayEntry.read(iprot);
-            return arrayEntry;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case MAP_ENTRY:
-          if (field.type == MAP_ENTRY_FIELD_DESC.type) {
-            TMapTypeEntry mapEntry;
-            mapEntry = new TMapTypeEntry();
-            mapEntry.read(iprot);
-            return mapEntry;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case STRUCT_ENTRY:
-          if (field.type == STRUCT_ENTRY_FIELD_DESC.type) {
-            TStructTypeEntry structEntry;
-            structEntry = new TStructTypeEntry();
-            structEntry.read(iprot);
-            return structEntry;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case UNION_ENTRY:
-          if (field.type == UNION_ENTRY_FIELD_DESC.type) {
-            TUnionTypeEntry unionEntry;
-            unionEntry = new TUnionTypeEntry();
-            unionEntry.read(iprot);
-            return unionEntry;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case USER_DEFINED_TYPE_ENTRY:
-          if (field.type == USER_DEFINED_TYPE_ENTRY_FIELD_DESC.type) {
-            TUserDefinedTypeEntry userDefinedTypeEntry;
-            userDefinedTypeEntry = new TUserDefinedTypeEntry();
-            userDefinedTypeEntry.read(iprot);
-            return userDefinedTypeEntry;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        default:
-          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
-      }
-    } else {
-      org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-      return null;
-    }
-  }
-
-  @Override
-  protected void standardSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    switch (setField_) {
-      case PRIMITIVE_ENTRY:
-        TPrimitiveTypeEntry primitiveEntry = (TPrimitiveTypeEntry)value_;
-        primitiveEntry.write(oprot);
-        return;
-      case ARRAY_ENTRY:
-        TArrayTypeEntry arrayEntry = (TArrayTypeEntry)value_;
-        arrayEntry.write(oprot);
-        return;
-      case MAP_ENTRY:
-        TMapTypeEntry mapEntry = (TMapTypeEntry)value_;
-        mapEntry.write(oprot);
-        return;
-      case STRUCT_ENTRY:
-        TStructTypeEntry structEntry = (TStructTypeEntry)value_;
-        structEntry.write(oprot);
-        return;
-      case UNION_ENTRY:
-        TUnionTypeEntry unionEntry = (TUnionTypeEntry)value_;
-        unionEntry.write(oprot);
-        return;
-      case USER_DEFINED_TYPE_ENTRY:
-        TUserDefinedTypeEntry userDefinedTypeEntry = (TUserDefinedTypeEntry)value_;
-        userDefinedTypeEntry.write(oprot);
-        return;
-      default:
-        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
-    }
-  }
-
-  @Override
-  protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, short fieldID) throws org.apache.thrift.TException {
-    _Fields setField = _Fields.findByThriftId(fieldID);
-    if (setField != null) {
-      switch (setField) {
-        case PRIMITIVE_ENTRY:
-          TPrimitiveTypeEntry primitiveEntry;
-          primitiveEntry = new TPrimitiveTypeEntry();
-          primitiveEntry.read(iprot);
-          return primitiveEntry;
-        case ARRAY_ENTRY:
-          TArrayTypeEntry arrayEntry;
-          arrayEntry = new TArrayTypeEntry();
-          arrayEntry.read(iprot);
-          return arrayEntry;
-        case MAP_ENTRY:
-          TMapTypeEntry mapEntry;
-          mapEntry = new TMapTypeEntry();
-          mapEntry.read(iprot);
-          return mapEntry;
-        case STRUCT_ENTRY:
-          TStructTypeEntry structEntry;
-          structEntry = new TStructTypeEntry();
-          structEntry.read(iprot);
-          return structEntry;
-        case UNION_ENTRY:
-          TUnionTypeEntry unionEntry;
-          unionEntry = new TUnionTypeEntry();
-          unionEntry.read(iprot);
-          return unionEntry;
-        case USER_DEFINED_TYPE_ENTRY:
-          TUserDefinedTypeEntry userDefinedTypeEntry;
-          userDefinedTypeEntry = new TUserDefinedTypeEntry();
-          userDefinedTypeEntry.read(iprot);
-          return userDefinedTypeEntry;
-        default:
-          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
-      }
-    } else {
-      throw new TProtocolException("Couldn't find a field with field id " + fieldID);
-    }
-  }
-
-  @Override
-  protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    switch (setField_) {
-      case PRIMITIVE_ENTRY:
-        TPrimitiveTypeEntry primitiveEntry = (TPrimitiveTypeEntry)value_;
-        primitiveEntry.write(oprot);
-        return;
-      case ARRAY_ENTRY:
-        TArrayTypeEntry arrayEntry = (TArrayTypeEntry)value_;
-        arrayEntry.write(oprot);
-        return;
-      case MAP_ENTRY:
-        TMapTypeEntry mapEntry = (TMapTypeEntry)value_;
-        mapEntry.write(oprot);
-        return;
-      case STRUCT_ENTRY:
-        TStructTypeEntry structEntry = (TStructTypeEntry)value_;
-        structEntry.write(oprot);
-        return;
-      case UNION_ENTRY:
-        TUnionTypeEntry unionEntry = (TUnionTypeEntry)value_;
-        unionEntry.write(oprot);
-        return;
-      case USER_DEFINED_TYPE_ENTRY:
-        TUserDefinedTypeEntry userDefinedTypeEntry = (TUserDefinedTypeEntry)value_;
-        userDefinedTypeEntry.write(oprot);
-        return;
-      default:
-        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
-    }
-  }
-
-  @Override
-  protected org.apache.thrift.protocol.TField getFieldDesc(_Fields setField) {
-    switch (setField) {
-      case PRIMITIVE_ENTRY:
-        return PRIMITIVE_ENTRY_FIELD_DESC;
-      case ARRAY_ENTRY:
-        return ARRAY_ENTRY_FIELD_DESC;
-      case MAP_ENTRY:
-        return MAP_ENTRY_FIELD_DESC;
-      case STRUCT_ENTRY:
-        return STRUCT_ENTRY_FIELD_DESC;
-      case UNION_ENTRY:
-        return UNION_ENTRY_FIELD_DESC;
-      case USER_DEFINED_TYPE_ENTRY:
-        return USER_DEFINED_TYPE_ENTRY_FIELD_DESC;
-      default:
-        throw new IllegalArgumentException("Unknown field id " + setField);
-    }
-  }
-
-  @Override
-  protected org.apache.thrift.protocol.TStruct getStructDesc() {
-    return STRUCT_DESC;
-  }
-
-  @Override
-  protected _Fields enumForId(short id) {
-    return _Fields.findByThriftIdOrThrow(id);
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-
-  public TPrimitiveTypeEntry getPrimitiveEntry() {
-    if (getSetField() == _Fields.PRIMITIVE_ENTRY) {
-      return (TPrimitiveTypeEntry)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'primitiveEntry' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setPrimitiveEntry(TPrimitiveTypeEntry value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.PRIMITIVE_ENTRY;
-    value_ = value;
-  }
-
-  public TArrayTypeEntry getArrayEntry() {
-    if (getSetField() == _Fields.ARRAY_ENTRY) {
-      return (TArrayTypeEntry)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'arrayEntry' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setArrayEntry(TArrayTypeEntry value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.ARRAY_ENTRY;
-    value_ = value;
-  }
-
-  public TMapTypeEntry getMapEntry() {
-    if (getSetField() == _Fields.MAP_ENTRY) {
-      return (TMapTypeEntry)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'mapEntry' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setMapEntry(TMapTypeEntry value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.MAP_ENTRY;
-    value_ = value;
-  }
-
-  public TStructTypeEntry getStructEntry() {
-    if (getSetField() == _Fields.STRUCT_ENTRY) {
-      return (TStructTypeEntry)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'structEntry' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setStructEntry(TStructTypeEntry value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.STRUCT_ENTRY;
-    value_ = value;
-  }
-
-  public TUnionTypeEntry getUnionEntry() {
-    if (getSetField() == _Fields.UNION_ENTRY) {
-      return (TUnionTypeEntry)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'unionEntry' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setUnionEntry(TUnionTypeEntry value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.UNION_ENTRY;
-    value_ = value;
-  }
-
-  public TUserDefinedTypeEntry getUserDefinedTypeEntry() {
-    if (getSetField() == _Fields.USER_DEFINED_TYPE_ENTRY) {
-      return (TUserDefinedTypeEntry)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'userDefinedTypeEntry' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setUserDefinedTypeEntry(TUserDefinedTypeEntry value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.USER_DEFINED_TYPE_ENTRY;
-    value_ = value;
-  }
-
-  public boolean isSetPrimitiveEntry() {
-    return setField_ == _Fields.PRIMITIVE_ENTRY;
-  }
-
-
-  public boolean isSetArrayEntry() {
-    return setField_ == _Fields.ARRAY_ENTRY;
-  }
-
-
-  public boolean isSetMapEntry() {
-    return setField_ == _Fields.MAP_ENTRY;
-  }
-
-
-  public boolean isSetStructEntry() {
-    return setField_ == _Fields.STRUCT_ENTRY;
-  }
-
-
-  public boolean isSetUnionEntry() {
-    return setField_ == _Fields.UNION_ENTRY;
-  }
-
-
-  public boolean isSetUserDefinedTypeEntry() {
-    return setField_ == _Fields.USER_DEFINED_TYPE_ENTRY;
-  }
-
-
-  public boolean equals(Object other) {
-    if (other instanceof TTypeEntry) {
-      return equals((TTypeEntry)other);
-    } else {
-      return false;
-    }
-  }
-
-  public boolean equals(TTypeEntry other) {
-    return other != null && getSetField() == other.getSetField() && getFieldValue().equals(other.getFieldValue());
-  }
-
-  @Override
-  public int compareTo(TTypeEntry other) {
-    int lastComparison = org.apache.thrift.TBaseHelper.compareTo(getSetField(), other.getSetField());
-    if (lastComparison == 0) {
-      return org.apache.thrift.TBaseHelper.compareTo(getFieldValue(), other.getFieldValue());
-    }
-    return lastComparison;
-  }
-
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-    list.add(this.getClass().getName());
-    org.apache.thrift.TFieldIdEnum setField = getSetField();
-    if (setField != null) {
-      list.add(setField.getThriftFieldId());
-      Object value = getFieldValue();
-      if (value instanceof org.apache.thrift.TEnum) {
-        list.add(((org.apache.thrift.TEnum)getFieldValue()).getValue());
-      } else {
-        list.add(value);
-      }
-    }
-    return list.hashCode();
-  }
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-
-}
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeId.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeId.java
deleted file mode 100644
index a3735ebf3ec07..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeId.java
+++ /dev/null
@@ -1,105 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-
-import java.util.Map;
-import java.util.HashMap;
-import org.apache.thrift.TEnum;
-
-public enum TTypeId implements org.apache.thrift.TEnum {
-  BOOLEAN_TYPE(0),
-  TINYINT_TYPE(1),
-  SMALLINT_TYPE(2),
-  INT_TYPE(3),
-  BIGINT_TYPE(4),
-  FLOAT_TYPE(5),
-  DOUBLE_TYPE(6),
-  STRING_TYPE(7),
-  TIMESTAMP_TYPE(8),
-  BINARY_TYPE(9),
-  ARRAY_TYPE(10),
-  MAP_TYPE(11),
-  STRUCT_TYPE(12),
-  UNION_TYPE(13),
-  USER_DEFINED_TYPE(14),
-  DECIMAL_TYPE(15),
-  NULL_TYPE(16),
-  DATE_TYPE(17),
-  VARCHAR_TYPE(18),
-  CHAR_TYPE(19),
-  INTERVAL_YEAR_MONTH_TYPE(20),
-  INTERVAL_DAY_TIME_TYPE(21);
-
-  private final int value;
-
-  private TTypeId(int value) {
-    this.value = value;
-  }
-
-  /**
-   * Get the integer value of this enum value, as defined in the Thrift IDL.
-   */
-  public int getValue() {
-    return value;
-  }
-
-  /**
-   * Find a the enum type by its integer value, as defined in the Thrift IDL.
-   * @return null if the value is not found.
-   */
-  public static TTypeId findByValue(int value) { 
-    switch (value) {
-      case 0:
-        return BOOLEAN_TYPE;
-      case 1:
-        return TINYINT_TYPE;
-      case 2:
-        return SMALLINT_TYPE;
-      case 3:
-        return INT_TYPE;
-      case 4:
-        return BIGINT_TYPE;
-      case 5:
-        return FLOAT_TYPE;
-      case 6:
-        return DOUBLE_TYPE;
-      case 7:
-        return STRING_TYPE;
-      case 8:
-        return TIMESTAMP_TYPE;
-      case 9:
-        return BINARY_TYPE;
-      case 10:
-        return ARRAY_TYPE;
-      case 11:
-        return MAP_TYPE;
-      case 12:
-        return STRUCT_TYPE;
-      case 13:
-        return UNION_TYPE;
-      case 14:
-        return USER_DEFINED_TYPE;
-      case 15:
-        return DECIMAL_TYPE;
-      case 16:
-        return NULL_TYPE;
-      case 17:
-        return DATE_TYPE;
-      case 18:
-        return VARCHAR_TYPE;
-      case 19:
-        return CHAR_TYPE;
-      case 20:
-        return INTERVAL_YEAR_MONTH_TYPE;
-      case 21:
-        return INTERVAL_DAY_TIME_TYPE;
-      default:
-        return null;
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifierValue.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifierValue.java
deleted file mode 100644
index 1720c0e9a72c2..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifierValue.java
+++ /dev/null
@@ -1,365 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-public class TTypeQualifierValue extends org.apache.thrift.TUnion<TTypeQualifierValue, TTypeQualifierValue._Fields> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TTypeQualifierValue");
-  private static final org.apache.thrift.protocol.TField I32_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("i32Value", org.apache.thrift.protocol.TType.I32, (short)1);
-  private static final org.apache.thrift.protocol.TField STRING_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("stringValue", org.apache.thrift.protocol.TType.STRING, (short)2);
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    I32_VALUE((short)1, "i32Value"),
-    STRING_VALUE((short)2, "stringValue");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // I32_VALUE
-          return I32_VALUE;
-        case 2: // STRING_VALUE
-          return STRING_VALUE;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.I32_VALUE, new org.apache.thrift.meta_data.FieldMetaData("i32Value", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32)));
-    tmpMap.put(_Fields.STRING_VALUE, new org.apache.thrift.meta_data.FieldMetaData("stringValue", org.apache.thrift.TFieldRequirementType.OPTIONAL, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TTypeQualifierValue.class, metaDataMap);
-  }
-
-  public TTypeQualifierValue() {
-    super();
-  }
-
-  public TTypeQualifierValue(TTypeQualifierValue._Fields setField, Object value) {
-    super(setField, value);
-  }
-
-  public TTypeQualifierValue(TTypeQualifierValue other) {
-    super(other);
-  }
-  public TTypeQualifierValue deepCopy() {
-    return new TTypeQualifierValue(this);
-  }
-
-  public static TTypeQualifierValue i32Value(int value) {
-    TTypeQualifierValue x = new TTypeQualifierValue();
-    x.setI32Value(value);
-    return x;
-  }
-
-  public static TTypeQualifierValue stringValue(String value) {
-    TTypeQualifierValue x = new TTypeQualifierValue();
-    x.setStringValue(value);
-    return x;
-  }
-
-
-  @Override
-  protected void checkType(_Fields setField, Object value) throws ClassCastException {
-    switch (setField) {
-      case I32_VALUE:
-        if (value instanceof Integer) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type Integer for field 'i32Value', but got " + value.getClass().getSimpleName());
-      case STRING_VALUE:
-        if (value instanceof String) {
-          break;
-        }
-        throw new ClassCastException("Was expecting value of type String for field 'stringValue', but got " + value.getClass().getSimpleName());
-      default:
-        throw new IllegalArgumentException("Unknown field id " + setField);
-    }
-  }
-
-  @Override
-  protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, org.apache.thrift.protocol.TField field) throws org.apache.thrift.TException {
-    _Fields setField = _Fields.findByThriftId(field.id);
-    if (setField != null) {
-      switch (setField) {
-        case I32_VALUE:
-          if (field.type == I32_VALUE_FIELD_DESC.type) {
-            Integer i32Value;
-            i32Value = iprot.readI32();
-            return i32Value;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        case STRING_VALUE:
-          if (field.type == STRING_VALUE_FIELD_DESC.type) {
-            String stringValue;
-            stringValue = iprot.readString();
-            return stringValue;
-          } else {
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-            return null;
-          }
-        default:
-          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
-      }
-    } else {
-      org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type);
-      return null;
-    }
-  }
-
-  @Override
-  protected void standardSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    switch (setField_) {
-      case I32_VALUE:
-        Integer i32Value = (Integer)value_;
-        oprot.writeI32(i32Value);
-        return;
-      case STRING_VALUE:
-        String stringValue = (String)value_;
-        oprot.writeString(stringValue);
-        return;
-      default:
-        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
-    }
-  }
-
-  @Override
-  protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot, short fieldID) throws org.apache.thrift.TException {
-    _Fields setField = _Fields.findByThriftId(fieldID);
-    if (setField != null) {
-      switch (setField) {
-        case I32_VALUE:
-          Integer i32Value;
-          i32Value = iprot.readI32();
-          return i32Value;
-        case STRING_VALUE:
-          String stringValue;
-          stringValue = iprot.readString();
-          return stringValue;
-        default:
-          throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!");
-      }
-    } else {
-      throw new TProtocolException("Couldn't find a field with field id " + fieldID);
-    }
-  }
-
-  @Override
-  protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    switch (setField_) {
-      case I32_VALUE:
-        Integer i32Value = (Integer)value_;
-        oprot.writeI32(i32Value);
-        return;
-      case STRING_VALUE:
-        String stringValue = (String)value_;
-        oprot.writeString(stringValue);
-        return;
-      default:
-        throw new IllegalStateException("Cannot write union with unknown field " + setField_);
-    }
-  }
-
-  @Override
-  protected org.apache.thrift.protocol.TField getFieldDesc(_Fields setField) {
-    switch (setField) {
-      case I32_VALUE:
-        return I32_VALUE_FIELD_DESC;
-      case STRING_VALUE:
-        return STRING_VALUE_FIELD_DESC;
-      default:
-        throw new IllegalArgumentException("Unknown field id " + setField);
-    }
-  }
-
-  @Override
-  protected org.apache.thrift.protocol.TStruct getStructDesc() {
-    return STRUCT_DESC;
-  }
-
-  @Override
-  protected _Fields enumForId(short id) {
-    return _Fields.findByThriftIdOrThrow(id);
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-
-  public int getI32Value() {
-    if (getSetField() == _Fields.I32_VALUE) {
-      return (Integer)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'i32Value' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setI32Value(int value) {
-    setField_ = _Fields.I32_VALUE;
-    value_ = value;
-  }
-
-  public String getStringValue() {
-    if (getSetField() == _Fields.STRING_VALUE) {
-      return (String)getFieldValue();
-    } else {
-      throw new RuntimeException("Cannot get field 'stringValue' because union is currently set to " + getFieldDesc(getSetField()).name);
-    }
-  }
-
-  public void setStringValue(String value) {
-    if (value == null) throw new NullPointerException();
-    setField_ = _Fields.STRING_VALUE;
-    value_ = value;
-  }
-
-  public boolean isSetI32Value() {
-    return setField_ == _Fields.I32_VALUE;
-  }
-
-
-  public boolean isSetStringValue() {
-    return setField_ == _Fields.STRING_VALUE;
-  }
-
-
-  public boolean equals(Object other) {
-    if (other instanceof TTypeQualifierValue) {
-      return equals((TTypeQualifierValue)other);
-    } else {
-      return false;
-    }
-  }
-
-  public boolean equals(TTypeQualifierValue other) {
-    return other != null && getSetField() == other.getSetField() && getFieldValue().equals(other.getFieldValue());
-  }
-
-  @Override
-  public int compareTo(TTypeQualifierValue other) {
-    int lastComparison = org.apache.thrift.TBaseHelper.compareTo(getSetField(), other.getSetField());
-    if (lastComparison == 0) {
-      return org.apache.thrift.TBaseHelper.compareTo(getFieldValue(), other.getFieldValue());
-    }
-    return lastComparison;
-  }
-
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-    list.add(this.getClass().getName());
-    org.apache.thrift.TFieldIdEnum setField = getSetField();
-    if (setField != null) {
-      list.add(setField.getThriftFieldId());
-      Object value = getFieldValue();
-      if (value instanceof org.apache.thrift.TEnum) {
-        list.add(((org.apache.thrift.TEnum)getFieldValue()).getValue());
-      } else {
-        list.add(value);
-      }
-    }
-    return list.hashCode();
-  }
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-
-}
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifiers.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifiers.java
deleted file mode 100644
index f46d2ceb79caa..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TTypeQualifiers.java
+++ /dev/null
@@ -1,454 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TTypeQualifiers implements org.apache.thrift.TBase<TTypeQualifiers, TTypeQualifiers._Fields>, java.io.Serializable, Cloneable, Comparable<TTypeQualifiers> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TTypeQualifiers");
-
-  private static final org.apache.thrift.protocol.TField QUALIFIERS_FIELD_DESC = new org.apache.thrift.protocol.TField("qualifiers", org.apache.thrift.protocol.TType.MAP, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TTypeQualifiersStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TTypeQualifiersTupleSchemeFactory());
-  }
-
-  private Map<String,TTypeQualifierValue> qualifiers; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    QUALIFIERS((short)1, "qualifiers");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // QUALIFIERS
-          return QUALIFIERS;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.QUALIFIERS, new org.apache.thrift.meta_data.FieldMetaData("qualifiers", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), 
-            new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TTypeQualifierValue.class))));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TTypeQualifiers.class, metaDataMap);
-  }
-
-  public TTypeQualifiers() {
-  }
-
-  public TTypeQualifiers(
-    Map<String,TTypeQualifierValue> qualifiers)
-  {
-    this();
-    this.qualifiers = qualifiers;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TTypeQualifiers(TTypeQualifiers other) {
-    if (other.isSetQualifiers()) {
-      Map<String,TTypeQualifierValue> __this__qualifiers = new HashMap<String,TTypeQualifierValue>(other.qualifiers.size());
-      for (Map.Entry<String, TTypeQualifierValue> other_element : other.qualifiers.entrySet()) {
-
-        String other_element_key = other_element.getKey();
-        TTypeQualifierValue other_element_value = other_element.getValue();
-
-        String __this__qualifiers_copy_key = other_element_key;
-
-        TTypeQualifierValue __this__qualifiers_copy_value = new TTypeQualifierValue(other_element_value);
-
-        __this__qualifiers.put(__this__qualifiers_copy_key, __this__qualifiers_copy_value);
-      }
-      this.qualifiers = __this__qualifiers;
-    }
-  }
-
-  public TTypeQualifiers deepCopy() {
-    return new TTypeQualifiers(this);
-  }
-
-  @Override
-  public void clear() {
-    this.qualifiers = null;
-  }
-
-  public int getQualifiersSize() {
-    return (this.qualifiers == null) ? 0 : this.qualifiers.size();
-  }
-
-  public void putToQualifiers(String key, TTypeQualifierValue val) {
-    if (this.qualifiers == null) {
-      this.qualifiers = new HashMap<String,TTypeQualifierValue>();
-    }
-    this.qualifiers.put(key, val);
-  }
-
-  public Map<String,TTypeQualifierValue> getQualifiers() {
-    return this.qualifiers;
-  }
-
-  public void setQualifiers(Map<String,TTypeQualifierValue> qualifiers) {
-    this.qualifiers = qualifiers;
-  }
-
-  public void unsetQualifiers() {
-    this.qualifiers = null;
-  }
-
-  /** Returns true if field qualifiers is set (has been assigned a value) and false otherwise */
-  public boolean isSetQualifiers() {
-    return this.qualifiers != null;
-  }
-
-  public void setQualifiersIsSet(boolean value) {
-    if (!value) {
-      this.qualifiers = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case QUALIFIERS:
-      if (value == null) {
-        unsetQualifiers();
-      } else {
-        setQualifiers((Map<String,TTypeQualifierValue>)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case QUALIFIERS:
-      return getQualifiers();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case QUALIFIERS:
-      return isSetQualifiers();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TTypeQualifiers)
-      return this.equals((TTypeQualifiers)that);
-    return false;
-  }
-
-  public boolean equals(TTypeQualifiers that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_qualifiers = true && this.isSetQualifiers();
-    boolean that_present_qualifiers = true && that.isSetQualifiers();
-    if (this_present_qualifiers || that_present_qualifiers) {
-      if (!(this_present_qualifiers && that_present_qualifiers))
-        return false;
-      if (!this.qualifiers.equals(that.qualifiers))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_qualifiers = true && (isSetQualifiers());
-    list.add(present_qualifiers);
-    if (present_qualifiers)
-      list.add(qualifiers);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TTypeQualifiers other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetQualifiers()).compareTo(other.isSetQualifiers());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetQualifiers()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.qualifiers, other.qualifiers);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TTypeQualifiers(");
-    boolean first = true;
-
-    sb.append("qualifiers:");
-    if (this.qualifiers == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.qualifiers);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetQualifiers()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'qualifiers' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TTypeQualifiersStandardSchemeFactory implements SchemeFactory {
-    public TTypeQualifiersStandardScheme getScheme() {
-      return new TTypeQualifiersStandardScheme();
-    }
-  }
-
-  private static class TTypeQualifiersStandardScheme extends StandardScheme<TTypeQualifiers> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TTypeQualifiers struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // QUALIFIERS
-            if (schemeField.type == org.apache.thrift.protocol.TType.MAP) {
-              {
-                org.apache.thrift.protocol.TMap _map0 = iprot.readMapBegin();
-                struct.qualifiers = new HashMap<String,TTypeQualifierValue>(2*_map0.size);
-                String _key1;
-                TTypeQualifierValue _val2;
-                for (int _i3 = 0; _i3 < _map0.size; ++_i3)
-                {
-                  _key1 = iprot.readString();
-                  _val2 = new TTypeQualifierValue();
-                  _val2.read(iprot);
-                  struct.qualifiers.put(_key1, _val2);
-                }
-                iprot.readMapEnd();
-              }
-              struct.setQualifiersIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TTypeQualifiers struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.qualifiers != null) {
-        oprot.writeFieldBegin(QUALIFIERS_FIELD_DESC);
-        {
-          oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRUCT, struct.qualifiers.size()));
-          for (Map.Entry<String, TTypeQualifierValue> _iter4 : struct.qualifiers.entrySet())
-          {
-            oprot.writeString(_iter4.getKey());
-            _iter4.getValue().write(oprot);
-          }
-          oprot.writeMapEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TTypeQualifiersTupleSchemeFactory implements SchemeFactory {
-    public TTypeQualifiersTupleScheme getScheme() {
-      return new TTypeQualifiersTupleScheme();
-    }
-  }
-
-  private static class TTypeQualifiersTupleScheme extends TupleScheme<TTypeQualifiers> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TTypeQualifiers struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.qualifiers.size());
-        for (Map.Entry<String, TTypeQualifierValue> _iter5 : struct.qualifiers.entrySet())
-        {
-          oprot.writeString(_iter5.getKey());
-          _iter5.getValue().write(oprot);
-        }
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TTypeQualifiers struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TMap _map6 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.STRUCT, iprot.readI32());
-        struct.qualifiers = new HashMap<String,TTypeQualifierValue>(2*_map6.size);
-        String _key7;
-        TTypeQualifierValue _val8;
-        for (int _i9 = 0; _i9 < _map6.size; ++_i9)
-        {
-          _key7 = iprot.readString();
-          _val8 = new TTypeQualifierValue();
-          _val8.read(iprot);
-          struct.qualifiers.put(_key7, _val8);
-        }
-      }
-      struct.setQualifiersIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TUnionTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TUnionTypeEntry.java
deleted file mode 100644
index d53f74cb8eff1..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TUnionTypeEntry.java
+++ /dev/null
@@ -1,452 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TUnionTypeEntry implements org.apache.thrift.TBase<TUnionTypeEntry, TUnionTypeEntry._Fields>, java.io.Serializable, Cloneable, Comparable<TUnionTypeEntry> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TUnionTypeEntry");
-
-  private static final org.apache.thrift.protocol.TField NAME_TO_TYPE_PTR_FIELD_DESC = new org.apache.thrift.protocol.TField("nameToTypePtr", org.apache.thrift.protocol.TType.MAP, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TUnionTypeEntryStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TUnionTypeEntryTupleSchemeFactory());
-  }
-
-  private Map<String,Integer> nameToTypePtr; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    NAME_TO_TYPE_PTR((short)1, "nameToTypePtr");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // NAME_TO_TYPE_PTR
-          return NAME_TO_TYPE_PTR;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.NAME_TO_TYPE_PTR, new org.apache.thrift.meta_data.FieldMetaData("nameToTypePtr", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.MapMetaData(org.apache.thrift.protocol.TType.MAP, 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING), 
-            new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32            , "TTypeEntryPtr"))));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TUnionTypeEntry.class, metaDataMap);
-  }
-
-  public TUnionTypeEntry() {
-  }
-
-  public TUnionTypeEntry(
-    Map<String,Integer> nameToTypePtr)
-  {
-    this();
-    this.nameToTypePtr = nameToTypePtr;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TUnionTypeEntry(TUnionTypeEntry other) {
-    if (other.isSetNameToTypePtr()) {
-      Map<String,Integer> __this__nameToTypePtr = new HashMap<String,Integer>(other.nameToTypePtr.size());
-      for (Map.Entry<String, Integer> other_element : other.nameToTypePtr.entrySet()) {
-
-        String other_element_key = other_element.getKey();
-        Integer other_element_value = other_element.getValue();
-
-        String __this__nameToTypePtr_copy_key = other_element_key;
-
-        Integer __this__nameToTypePtr_copy_value = other_element_value;
-
-        __this__nameToTypePtr.put(__this__nameToTypePtr_copy_key, __this__nameToTypePtr_copy_value);
-      }
-      this.nameToTypePtr = __this__nameToTypePtr;
-    }
-  }
-
-  public TUnionTypeEntry deepCopy() {
-    return new TUnionTypeEntry(this);
-  }
-
-  @Override
-  public void clear() {
-    this.nameToTypePtr = null;
-  }
-
-  public int getNameToTypePtrSize() {
-    return (this.nameToTypePtr == null) ? 0 : this.nameToTypePtr.size();
-  }
-
-  public void putToNameToTypePtr(String key, int val) {
-    if (this.nameToTypePtr == null) {
-      this.nameToTypePtr = new HashMap<String,Integer>();
-    }
-    this.nameToTypePtr.put(key, val);
-  }
-
-  public Map<String,Integer> getNameToTypePtr() {
-    return this.nameToTypePtr;
-  }
-
-  public void setNameToTypePtr(Map<String,Integer> nameToTypePtr) {
-    this.nameToTypePtr = nameToTypePtr;
-  }
-
-  public void unsetNameToTypePtr() {
-    this.nameToTypePtr = null;
-  }
-
-  /** Returns true if field nameToTypePtr is set (has been assigned a value) and false otherwise */
-  public boolean isSetNameToTypePtr() {
-    return this.nameToTypePtr != null;
-  }
-
-  public void setNameToTypePtrIsSet(boolean value) {
-    if (!value) {
-      this.nameToTypePtr = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case NAME_TO_TYPE_PTR:
-      if (value == null) {
-        unsetNameToTypePtr();
-      } else {
-        setNameToTypePtr((Map<String,Integer>)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case NAME_TO_TYPE_PTR:
-      return getNameToTypePtr();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case NAME_TO_TYPE_PTR:
-      return isSetNameToTypePtr();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TUnionTypeEntry)
-      return this.equals((TUnionTypeEntry)that);
-    return false;
-  }
-
-  public boolean equals(TUnionTypeEntry that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_nameToTypePtr = true && this.isSetNameToTypePtr();
-    boolean that_present_nameToTypePtr = true && that.isSetNameToTypePtr();
-    if (this_present_nameToTypePtr || that_present_nameToTypePtr) {
-      if (!(this_present_nameToTypePtr && that_present_nameToTypePtr))
-        return false;
-      if (!this.nameToTypePtr.equals(that.nameToTypePtr))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_nameToTypePtr = true && (isSetNameToTypePtr());
-    list.add(present_nameToTypePtr);
-    if (present_nameToTypePtr)
-      list.add(nameToTypePtr);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TUnionTypeEntry other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetNameToTypePtr()).compareTo(other.isSetNameToTypePtr());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetNameToTypePtr()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.nameToTypePtr, other.nameToTypePtr);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TUnionTypeEntry(");
-    boolean first = true;
-
-    sb.append("nameToTypePtr:");
-    if (this.nameToTypePtr == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.nameToTypePtr);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetNameToTypePtr()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'nameToTypePtr' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TUnionTypeEntryStandardSchemeFactory implements SchemeFactory {
-    public TUnionTypeEntryStandardScheme getScheme() {
-      return new TUnionTypeEntryStandardScheme();
-    }
-  }
-
-  private static class TUnionTypeEntryStandardScheme extends StandardScheme<TUnionTypeEntry> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TUnionTypeEntry struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // NAME_TO_TYPE_PTR
-            if (schemeField.type == org.apache.thrift.protocol.TType.MAP) {
-              {
-                org.apache.thrift.protocol.TMap _map20 = iprot.readMapBegin();
-                struct.nameToTypePtr = new HashMap<String,Integer>(2*_map20.size);
-                String _key21;
-                int _val22;
-                for (int _i23 = 0; _i23 < _map20.size; ++_i23)
-                {
-                  _key21 = iprot.readString();
-                  _val22 = iprot.readI32();
-                  struct.nameToTypePtr.put(_key21, _val22);
-                }
-                iprot.readMapEnd();
-              }
-              struct.setNameToTypePtrIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TUnionTypeEntry struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.nameToTypePtr != null) {
-        oprot.writeFieldBegin(NAME_TO_TYPE_PTR_FIELD_DESC);
-        {
-          oprot.writeMapBegin(new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.I32, struct.nameToTypePtr.size()));
-          for (Map.Entry<String, Integer> _iter24 : struct.nameToTypePtr.entrySet())
-          {
-            oprot.writeString(_iter24.getKey());
-            oprot.writeI32(_iter24.getValue());
-          }
-          oprot.writeMapEnd();
-        }
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TUnionTypeEntryTupleSchemeFactory implements SchemeFactory {
-    public TUnionTypeEntryTupleScheme getScheme() {
-      return new TUnionTypeEntryTupleScheme();
-    }
-  }
-
-  private static class TUnionTypeEntryTupleScheme extends TupleScheme<TUnionTypeEntry> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TUnionTypeEntry struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      {
-        oprot.writeI32(struct.nameToTypePtr.size());
-        for (Map.Entry<String, Integer> _iter25 : struct.nameToTypePtr.entrySet())
-        {
-          oprot.writeString(_iter25.getKey());
-          oprot.writeI32(_iter25.getValue());
-        }
-      }
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TUnionTypeEntry struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      {
-        org.apache.thrift.protocol.TMap _map26 = new org.apache.thrift.protocol.TMap(org.apache.thrift.protocol.TType.STRING, org.apache.thrift.protocol.TType.I32, iprot.readI32());
-        struct.nameToTypePtr = new HashMap<String,Integer>(2*_map26.size);
-        String _key27;
-        int _val28;
-        for (int _i29 = 0; _i29 < _map26.size; ++_i29)
-        {
-          _key27 = iprot.readString();
-          _val28 = iprot.readI32();
-          struct.nameToTypePtr.put(_key27, _val28);
-        }
-      }
-      struct.setNameToTypePtrIsSet(true);
-    }
-  }
-
-}
-
diff --git a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TUserDefinedTypeEntry.java b/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TUserDefinedTypeEntry.java
deleted file mode 100644
index b80c4dd5c6302..0000000000000
--- a/sql/hive-thriftserver/src/gen/java/org/apache/hive/service/rpc/thrift/TUserDefinedTypeEntry.java
+++ /dev/null
@@ -1,389 +0,0 @@
-/**
- * Autogenerated by Thrift Compiler (0.9.3)
- *
- * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
- *  @generated
- */
-package org.apache.hive.service.rpc.thrift;
-
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import org.apache.thrift.async.AsyncMethodCallback;
-import org.apache.thrift.server.AbstractNonblockingServer.*;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import javax.annotation.Generated;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"})
-@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)")
-public class TUserDefinedTypeEntry implements org.apache.thrift.TBase<TUserDefinedTypeEntry, TUserDefinedTypeEntry._Fields>, java.io.Serializable, Cloneable, Comparable<TUserDefinedTypeEntry> {
-  private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TUserDefinedTypeEntry");
-
-  private static final org.apache.thrift.protocol.TField TYPE_CLASS_NAME_FIELD_DESC = new org.apache.thrift.protocol.TField("typeClassName", org.apache.thrift.protocol.TType.STRING, (short)1);
-
-  private static final Map<Class<? extends IScheme>, SchemeFactory> schemes = new HashMap<Class<? extends IScheme>, SchemeFactory>();
-  static {
-    schemes.put(StandardScheme.class, new TUserDefinedTypeEntryStandardSchemeFactory());
-    schemes.put(TupleScheme.class, new TUserDefinedTypeEntryTupleSchemeFactory());
-  }
-
-  private String typeClassName; // required
-
-  /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */
-  public enum _Fields implements org.apache.thrift.TFieldIdEnum {
-    TYPE_CLASS_NAME((short)1, "typeClassName");
-
-    private static final Map<String, _Fields> byName = new HashMap<String, _Fields>();
-
-    static {
-      for (_Fields field : EnumSet.allOf(_Fields.class)) {
-        byName.put(field.getFieldName(), field);
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, or null if its not found.
-     */
-    public static _Fields findByThriftId(int fieldId) {
-      switch(fieldId) {
-        case 1: // TYPE_CLASS_NAME
-          return TYPE_CLASS_NAME;
-        default:
-          return null;
-      }
-    }
-
-    /**
-     * Find the _Fields constant that matches fieldId, throwing an exception
-     * if it is not found.
-     */
-    public static _Fields findByThriftIdOrThrow(int fieldId) {
-      _Fields fields = findByThriftId(fieldId);
-      if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!");
-      return fields;
-    }
-
-    /**
-     * Find the _Fields constant that matches name, or null if its not found.
-     */
-    public static _Fields findByName(String name) {
-      return byName.get(name);
-    }
-
-    private final short _thriftId;
-    private final String _fieldName;
-
-    _Fields(short thriftId, String fieldName) {
-      _thriftId = thriftId;
-      _fieldName = fieldName;
-    }
-
-    public short getThriftFieldId() {
-      return _thriftId;
-    }
-
-    public String getFieldName() {
-      return _fieldName;
-    }
-  }
-
-  // isset id assignments
-  public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap;
-  static {
-    Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class);
-    tmpMap.put(_Fields.TYPE_CLASS_NAME, new org.apache.thrift.meta_data.FieldMetaData("typeClassName", org.apache.thrift.TFieldRequirementType.REQUIRED, 
-        new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING)));
-    metaDataMap = Collections.unmodifiableMap(tmpMap);
-    org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TUserDefinedTypeEntry.class, metaDataMap);
-  }
-
-  public TUserDefinedTypeEntry() {
-  }
-
-  public TUserDefinedTypeEntry(
-    String typeClassName)
-  {
-    this();
-    this.typeClassName = typeClassName;
-  }
-
-  /**
-   * Performs a deep copy on <i>other</i>.
-   */
-  public TUserDefinedTypeEntry(TUserDefinedTypeEntry other) {
-    if (other.isSetTypeClassName()) {
-      this.typeClassName = other.typeClassName;
-    }
-  }
-
-  public TUserDefinedTypeEntry deepCopy() {
-    return new TUserDefinedTypeEntry(this);
-  }
-
-  @Override
-  public void clear() {
-    this.typeClassName = null;
-  }
-
-  public String getTypeClassName() {
-    return this.typeClassName;
-  }
-
-  public void setTypeClassName(String typeClassName) {
-    this.typeClassName = typeClassName;
-  }
-
-  public void unsetTypeClassName() {
-    this.typeClassName = null;
-  }
-
-  /** Returns true if field typeClassName is set (has been assigned a value) and false otherwise */
-  public boolean isSetTypeClassName() {
-    return this.typeClassName != null;
-  }
-
-  public void setTypeClassNameIsSet(boolean value) {
-    if (!value) {
-      this.typeClassName = null;
-    }
-  }
-
-  public void setFieldValue(_Fields field, Object value) {
-    switch (field) {
-    case TYPE_CLASS_NAME:
-      if (value == null) {
-        unsetTypeClassName();
-      } else {
-        setTypeClassName((String)value);
-      }
-      break;
-
-    }
-  }
-
-  public Object getFieldValue(_Fields field) {
-    switch (field) {
-    case TYPE_CLASS_NAME:
-      return getTypeClassName();
-
-    }
-    throw new IllegalStateException();
-  }
-
-  /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */
-  public boolean isSet(_Fields field) {
-    if (field == null) {
-      throw new IllegalArgumentException();
-    }
-
-    switch (field) {
-    case TYPE_CLASS_NAME:
-      return isSetTypeClassName();
-    }
-    throw new IllegalStateException();
-  }
-
-  @Override
-  public boolean equals(Object that) {
-    if (that == null)
-      return false;
-    if (that instanceof TUserDefinedTypeEntry)
-      return this.equals((TUserDefinedTypeEntry)that);
-    return false;
-  }
-
-  public boolean equals(TUserDefinedTypeEntry that) {
-    if (that == null)
-      return false;
-
-    boolean this_present_typeClassName = true && this.isSetTypeClassName();
-    boolean that_present_typeClassName = true && that.isSetTypeClassName();
-    if (this_present_typeClassName || that_present_typeClassName) {
-      if (!(this_present_typeClassName && that_present_typeClassName))
-        return false;
-      if (!this.typeClassName.equals(that.typeClassName))
-        return false;
-    }
-
-    return true;
-  }
-
-  @Override
-  public int hashCode() {
-    List<Object> list = new ArrayList<Object>();
-
-    boolean present_typeClassName = true && (isSetTypeClassName());
-    list.add(present_typeClassName);
-    if (present_typeClassName)
-      list.add(typeClassName);
-
-    return list.hashCode();
-  }
-
-  @Override
-  public int compareTo(TUserDefinedTypeEntry other) {
-    if (!getClass().equals(other.getClass())) {
-      return getClass().getName().compareTo(other.getClass().getName());
-    }
-
-    int lastComparison = 0;
-
-    lastComparison = Boolean.valueOf(isSetTypeClassName()).compareTo(other.isSetTypeClassName());
-    if (lastComparison != 0) {
-      return lastComparison;
-    }
-    if (isSetTypeClassName()) {
-      lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.typeClassName, other.typeClassName);
-      if (lastComparison != 0) {
-        return lastComparison;
-      }
-    }
-    return 0;
-  }
-
-  public _Fields fieldForId(int fieldId) {
-    return _Fields.findByThriftId(fieldId);
-  }
-
-  public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException {
-    schemes.get(iprot.getScheme()).getScheme().read(iprot, this);
-  }
-
-  public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException {
-    schemes.get(oprot.getScheme()).getScheme().write(oprot, this);
-  }
-
-  @Override
-  public String toString() {
-    StringBuilder sb = new StringBuilder("TUserDefinedTypeEntry(");
-    boolean first = true;
-
-    sb.append("typeClassName:");
-    if (this.typeClassName == null) {
-      sb.append("null");
-    } else {
-      sb.append(this.typeClassName);
-    }
-    first = false;
-    sb.append(")");
-    return sb.toString();
-  }
-
-  public void validate() throws org.apache.thrift.TException {
-    // check for required fields
-    if (!isSetTypeClassName()) {
-      throw new org.apache.thrift.protocol.TProtocolException("Required field 'typeClassName' is unset! Struct:" + toString());
-    }
-
-    // check for sub-struct validity
-  }
-
-  private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException {
-    try {
-      write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException {
-    try {
-      read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in)));
-    } catch (org.apache.thrift.TException te) {
-      throw new java.io.IOException(te);
-    }
-  }
-
-  private static class TUserDefinedTypeEntryStandardSchemeFactory implements SchemeFactory {
-    public TUserDefinedTypeEntryStandardScheme getScheme() {
-      return new TUserDefinedTypeEntryStandardScheme();
-    }
-  }
-
-  private static class TUserDefinedTypeEntryStandardScheme extends StandardScheme<TUserDefinedTypeEntry> {
-
-    public void read(org.apache.thrift.protocol.TProtocol iprot, TUserDefinedTypeEntry struct) throws org.apache.thrift.TException {
-      org.apache.thrift.protocol.TField schemeField;
-      iprot.readStructBegin();
-      while (true)
-      {
-        schemeField = iprot.readFieldBegin();
-        if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { 
-          break;
-        }
-        switch (schemeField.id) {
-          case 1: // TYPE_CLASS_NAME
-            if (schemeField.type == org.apache.thrift.protocol.TType.STRING) {
-              struct.typeClassName = iprot.readString();
-              struct.setTypeClassNameIsSet(true);
-            } else { 
-              org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-            }
-            break;
-          default:
-            org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type);
-        }
-        iprot.readFieldEnd();
-      }
-      iprot.readStructEnd();
-      struct.validate();
-    }
-
-    public void write(org.apache.thrift.protocol.TProtocol oprot, TUserDefinedTypeEntry struct) throws org.apache.thrift.TException {
-      struct.validate();
-
-      oprot.writeStructBegin(STRUCT_DESC);
-      if (struct.typeClassName != null) {
-        oprot.writeFieldBegin(TYPE_CLASS_NAME_FIELD_DESC);
-        oprot.writeString(struct.typeClassName);
-        oprot.writeFieldEnd();
-      }
-      oprot.writeFieldStop();
-      oprot.writeStructEnd();
-    }
-
-  }
-
-  private static class TUserDefinedTypeEntryTupleSchemeFactory implements SchemeFactory {
-    public TUserDefinedTypeEntryTupleScheme getScheme() {
-      return new TUserDefinedTypeEntryTupleScheme();
-    }
-  }
-
-  private static class TUserDefinedTypeEntryTupleScheme extends TupleScheme<TUserDefinedTypeEntry> {
-
-    @Override
-    public void write(org.apache.thrift.protocol.TProtocol prot, TUserDefinedTypeEntry struct) throws org.apache.thrift.TException {
-      TTupleProtocol oprot = (TTupleProtocol) prot;
-      oprot.writeString(struct.typeClassName);
-    }
-
-    @Override
-    public void read(org.apache.thrift.protocol.TProtocol prot, TUserDefinedTypeEntry struct) throws org.apache.thrift.TException {
-      TTupleProtocol iprot = (TTupleProtocol) prot;
-      struct.typeClassName = iprot.readString();
-      struct.setTypeClassNameIsSet(true);
-    }
-  }
-
-}
-

From ab0bad9544367727fc017a9a43e4c5bf86da0445 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 16 Oct 2020 12:52:45 -0700
Subject: [PATCH 0260/1009] [SPARK-33171][INFRA] Mark
 ParquetV*FilterSuite/ParquetV*SchemaPruningSuite as ExtendedSQLTest

### What changes were proposed in this pull request?

This PR aims to mark ParquetV1FilterSuite and ParquetV2FilterSuite as `ExtendedSQLTest`.
- ParquetV1FilterSuite/ParquetV2FilterSuite
- ParquetV1SchemaPruningSuite/ParquetV2SchemaPruningSuite

### Why are the changes needed?

Currently, `sql - other tests` is the longest job. This PR will move the above tests to `sql - slow tests` job.

**BEFORE**
- https://github.com/apache/spark/runs/1264150802 (1 hour 37 minutes)

**AFTER**
- https://github.com/apache/spark/pull/30068/checks?check_run_id=1265879896 (1 hour 21 minutes)

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the Github Action with the reduced time.

Closes #30068 from dongjoon-hyun/MOVE3.

Lead-authored-by: Dongjoon Hyun <dongjoon@apache.org>
Co-authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/execution/datasources/parquet/ParquetFilterSuite.scala | 3 +++
 .../datasources/parquet/ParquetSchemaPruningSuite.scala        | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
index 5689b9d05d7bb..763f9315bfc5b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -45,6 +45,7 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.ParquetOutputTimestampType
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
+import org.apache.spark.tags.ExtendedSQLTest
 import org.apache.spark.util.{AccumulatorContext, AccumulatorV2}
 
 /**
@@ -1571,6 +1572,7 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
   }
 }
 
+@ExtendedSQLTest
 class ParquetV1FilterSuite extends ParquetFilterSuite {
   override protected def sparkConf: SparkConf =
     super
@@ -1650,6 +1652,7 @@ class ParquetV1FilterSuite extends ParquetFilterSuite {
   }
 }
 
+@ExtendedSQLTest
 class ParquetV2FilterSuite extends ParquetFilterSuite {
   // TODO: enable Parquet V2 write path after file source V2 writers are workable.
   override protected def sparkConf: SparkConf =
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
index c64e95078e916..cab93bd96fff4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.execution.datasources.SchemaPruningSuite
 import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.tags.ExtendedSQLTest
 
 abstract class ParquetSchemaPruningSuite extends SchemaPruningSuite with AdaptiveSparkPlanHelper {
   override protected val dataSourceName: String = "parquet"
@@ -33,6 +34,7 @@ abstract class ParquetSchemaPruningSuite extends SchemaPruningSuite with Adaptiv
 
 }
 
+@ExtendedSQLTest
 class ParquetV1SchemaPruningSuite extends ParquetSchemaPruningSuite {
   override protected def sparkConf: SparkConf =
     super
@@ -40,6 +42,7 @@ class ParquetV1SchemaPruningSuite extends ParquetSchemaPruningSuite {
       .set(SQLConf.USE_V1_SOURCE_LIST, "parquet")
 }
 
+@ExtendedSQLTest
 class ParquetV2SchemaPruningSuite extends ParquetSchemaPruningSuite {
   // TODO: enable Parquet V2 write path after file source V2 writers are workable.
   override protected def sparkConf: SparkConf =

From acb79f52db6f2b7e84fda005e3a38ea2aa3fc5ce Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Fri, 16 Oct 2020 14:27:27 -0700
Subject: [PATCH 0261/1009] [MINOR][SQL] Re-use `binaryToSQLTimestamp()` in
 `ParquetRowConverter`

### What changes were proposed in this pull request?
The function `binaryToSQLTimestamp()` is used by Parquet Vectorized reader. Parquet MR reader has similar code for de-serialization of INT96 timestamps. In this PR, I propose to de-duplicate code and re-use `binaryToSQLTimestamp()`.

### Why are the changes needed?
This should improve maintenance, and should allow to avoid errors while changing Vectorized and regular parquet readers.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By existing test suites, for instance `ParquetIOSuite`.

Closes #30069 from MaxGekk/int96-common-serde.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../datasources/parquet/ParquetRowConverter.scala      | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
index 9a010d7192081..e0008ed16d56d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
@@ -300,15 +300,7 @@ private[parquet] class ParquetRowConverter(
         new ParquetPrimitiveConverter(updater) {
           // Converts nanosecond timestamps stored as INT96
           override def addBinary(value: Binary): Unit = {
-            assert(
-              value.length() == 12,
-              "Timestamps (with nanoseconds) are expected to be stored in 12-byte long binaries, " +
-              s"but got a ${value.length()}-byte binary.")
-
-            val buf = value.toByteBuffer.order(ByteOrder.LITTLE_ENDIAN)
-            val timeOfDayNanos = buf.getLong
-            val julianDay = buf.getInt
-            val rawTime = DateTimeUtils.fromJulianDay(julianDay, timeOfDayNanos)
+            val rawTime = ParquetRowConverter.binaryToSQLTimestamp(value)
             val adjTime = convertTz.map(DateTimeUtils.convertTz(rawTime, _, ZoneOffset.UTC))
               .getOrElse(rawTime)
             updater.setLong(adjTime)

From ce6180c8c3b67a09b32735a2f5f9154d7d9aa14e Mon Sep 17 00:00:00 2001
From: Holden Karau <hkarau@apple.com>
Date: Fri, 16 Oct 2020 14:47:46 -0700
Subject: [PATCH 0262/1009] [SPARK-33154][CORE][K8S] Handle cleaned shuffles
 during migration

### What changes were proposed in this pull request?

If a block is removed between discovery to transfer fo the block, we short circuit that block and remove it from the list to transfer and increment the transferred blocks. This is complicated since both RPC errors and local read errors may be reported with the same exception class.

### Why are the changes needed?

Slow shuffle refreshes could waste time when decommissioning has already finished. Decommissioning might avoid transferring some some blocks to an otherwise live host which is marked as "full" if a deleted block fails to transfer to that host.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

New unit and integration tests.

Closes #30046 from holdenk/handle-cleaned-shuffles-during0migration.

Authored-by: Holden Karau <hkarau@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/network/BlockTransferService.scala  |  1 +
 .../shuffle/IndexShuffleBlockResolver.scala   | 39 +++++---
 .../storage/BlockManagerDecommissioner.scala  | 52 ++++++----
 .../BlockManagerDecommissionUnitSuite.scala   | 99 +++++++++++++++++--
 .../integrationtest/DecommissionSuite.scala   | 71 +++++++++++++
 .../k8s/integrationtest/ProcessUtils.scala    |  4 +-
 .../integration-tests/tests/autoscale.py      | 49 +++++++++
 .../tests/decommissioning.py                  |  2 +-
 .../tests/decommissioning_cleanup.py          | 59 +++++++++++
 9 files changed, 334 insertions(+), 42 deletions(-)
 create mode 100644 resource-managers/kubernetes/integration-tests/tests/autoscale.py
 create mode 100644 resource-managers/kubernetes/integration-tests/tests/decommissioning_cleanup.py

diff --git a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
index 98129b62b53df..c7f5a97e35612 100644
--- a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
@@ -110,6 +110,7 @@ abstract class BlockTransferService extends BlockStoreClient {
    * This method is similar to [[uploadBlock]], except this one blocks the thread
    * until the upload finishes.
    */
+  @throws[java.io.IOException]
   def uploadBlockSync(
       hostname: String,
       port: Int,
diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
index a019a3382d5b2..9496918760298 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
@@ -225,19 +225,32 @@ private[spark] class IndexShuffleBlockResolver(
    * Get the index & data block for migration.
    */
   def getMigrationBlocks(shuffleBlockInfo: ShuffleBlockInfo): List[(BlockId, ManagedBuffer)] = {
-    val shuffleId = shuffleBlockInfo.shuffleId
-    val mapId = shuffleBlockInfo.mapId
-    // Load the index block
-    val indexFile = getIndexFile(shuffleId, mapId)
-    val indexBlockId = ShuffleIndexBlockId(shuffleId, mapId, NOOP_REDUCE_ID)
-    val indexFileSize = indexFile.length()
-    val indexBlockData = new FileSegmentManagedBuffer(transportConf, indexFile, 0, indexFileSize)
-
-    // Load the data block
-    val dataFile = getDataFile(shuffleId, mapId)
-    val dataBlockId = ShuffleDataBlockId(shuffleId, mapId, NOOP_REDUCE_ID)
-    val dataBlockData = new FileSegmentManagedBuffer(transportConf, dataFile, 0, dataFile.length())
-    List((indexBlockId, indexBlockData), (dataBlockId, dataBlockData))
+    try {
+      val shuffleId = shuffleBlockInfo.shuffleId
+      val mapId = shuffleBlockInfo.mapId
+      // Load the index block
+      val indexFile = getIndexFile(shuffleId, mapId)
+      val indexBlockId = ShuffleIndexBlockId(shuffleId, mapId, NOOP_REDUCE_ID)
+      val indexFileSize = indexFile.length()
+      val indexBlockData = new FileSegmentManagedBuffer(
+        transportConf, indexFile, 0, indexFileSize)
+
+      // Load the data block
+      val dataFile = getDataFile(shuffleId, mapId)
+      val dataBlockId = ShuffleDataBlockId(shuffleId, mapId, NOOP_REDUCE_ID)
+      val dataBlockData = new FileSegmentManagedBuffer(
+        transportConf, dataFile, 0, dataFile.length())
+
+      // Make sure the files exist
+      assert(indexFile.exists() && dataFile.exists())
+
+      List((indexBlockId, indexBlockData), (dataBlockId, dataBlockData))
+    } catch {
+      case e: Exception => // If we can't load the blocks ignore them.
+        logWarning(s"Failed to resolve shuffle block ${shuffleBlockInfo}, skipping migration" +
+          "this is expected to occure if a block is removed after decommissioning has started.")
+        List.empty[(BlockId, ManagedBuffer)]
+    }
   }
 
 
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
index f0a8e47aa3200..3377b357a9231 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.storage
 
+import java.io.IOException
 import java.util.concurrent.ExecutorService
 import java.util.concurrent.atomic.AtomicInteger
 
@@ -82,23 +83,38 @@ private[storage] class BlockManagerDecommissioner(
               Thread.sleep(SLEEP_TIME_SECS * 1000L)
             case Some((shuffleBlockInfo, retryCount)) =>
               if (retryCount < maxReplicationFailuresForDecommission) {
-                logInfo(s"Trying to migrate shuffle ${shuffleBlockInfo} to ${peer}")
-                val blocks =
-                  bm.migratableResolver.getMigrationBlocks(shuffleBlockInfo)
+                logDebug(s"Trying to migrate shuffle ${shuffleBlockInfo} to ${peer}")
+                val blocks = bm.migratableResolver.getMigrationBlocks(shuffleBlockInfo)
                 logDebug(s"Got migration sub-blocks ${blocks}")
-                blocks.foreach { case (blockId, buffer) =>
-                  logDebug(s"Migrating sub-block ${blockId}")
-                  bm.blockTransferService.uploadBlockSync(
-                    peer.host,
-                    peer.port,
-                    peer.executorId,
-                    blockId,
-                    buffer,
-                    StorageLevel.DISK_ONLY,
-                    null)// class tag, we don't need for shuffle
-                  logDebug(s"Migrated sub block ${blockId}")
+
+                // Migrate the components of the blocks.
+                try {
+                  blocks.foreach { case (blockId, buffer) =>
+                    logDebug(s"Migrating sub-block ${blockId}")
+                    bm.blockTransferService.uploadBlockSync(
+                      peer.host,
+                      peer.port,
+                      peer.executorId,
+                      blockId,
+                      buffer,
+                      StorageLevel.DISK_ONLY,
+                      null)// class tag, we don't need for shuffle
+                    logDebug(s"Migrated sub block ${blockId}")
+                  }
+                  logDebug(s"Migrated ${shuffleBlockInfo} to ${peer}")
+                } catch {
+                  case e: IOException =>
+                    // If a block got deleted before netty opened the file handle, then trying to
+                    // load the blocks now will fail. This is most likely to occur if we start
+                    // migrating blocks and then the shuffle TTL cleaner kicks in. However this
+                    // could also happen with manually managed shuffles or a GC event on the driver
+                    // a no longer referenced RDD with shuffle files.
+                    if (bm.migratableResolver.getMigrationBlocks(shuffleBlockInfo).isEmpty) {
+                      logWarning(s"Skipping block ${shuffleBlockInfo}, block deleted.")
+                    } else {
+                      throw e
+                    }
                 }
-                logDebug(s"Migrated ${shuffleBlockInfo} to ${peer}")
               } else {
                 logError(s"Skipping block ${shuffleBlockInfo} because it has failed ${retryCount}")
               }
@@ -121,11 +137,11 @@ private[storage] class BlockManagerDecommissioner(
   }
 
   // Shuffles which are either in queue for migrations or migrated
-  private val migratingShuffles = mutable.HashSet[ShuffleBlockInfo]()
+  protected[storage] val migratingShuffles = mutable.HashSet[ShuffleBlockInfo]()
 
   // Shuffles which have migrated. This used to know when we are "done", being done can change
   // if a new shuffle file is created by a running task.
-  private val numMigratedShuffles = new AtomicInteger(0)
+  private[storage] val numMigratedShuffles = new AtomicInteger(0)
 
   // Shuffles which are queued for migration & number of retries so far.
   // Visible in storage for testing.
@@ -225,7 +241,7 @@ private[storage] class BlockManagerDecommissioner(
     // Update the queue of shuffles to be migrated
     logInfo("Offloading shuffle blocks")
     val localShuffles = bm.migratableResolver.getStoredShuffles().toSet
-    val newShufflesToMigrate = localShuffles.diff(migratingShuffles).toSeq
+    val newShufflesToMigrate = (localShuffles.diff(migratingShuffles)).toSeq
     shufflesToMigrate.addAll(newShufflesToMigrate.map(x => (x, 0)).asJava)
     migratingShuffles ++= newShufflesToMigrate
 
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionUnitSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionUnitSuite.scala
index 74ad8bd2bcf9d..a87fc1835f6b5 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionUnitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionUnitSuite.scala
@@ -63,9 +63,14 @@ class BlockManagerDecommissionUnitSuite extends SparkFunSuite with Matchers {
    * a constant Long.MaxValue timestamp.
    */
   private def validateDecommissionTimestamps(conf: SparkConf, bm: BlockManager,
-      migratableShuffleBlockResolver: MigratableResolver, fail: Boolean = false) = {
+      fail: Boolean = false) = {
     // Verify the decommissioning manager timestamps and status
     val bmDecomManager = new BlockManagerDecommissioner(conf, bm)
+    validateDecommissionTimestampsOnManager(bmDecomManager, fail)
+  }
+
+  private def validateDecommissionTimestampsOnManager(bmDecomManager: BlockManagerDecommissioner,
+      fail: Boolean = false, numShuffles: Option[Int] = None) = {
     var previousTime: Option[Long] = None
     try {
       bmDecomManager.start()
@@ -85,6 +90,9 @@ class BlockManagerDecommissionUnitSuite extends SparkFunSuite with Matchers {
           // If we expect migration to fail we should get the max value quickly.
           assert(currentTime === Long.MaxValue)
         }
+        numShuffles.foreach { s =>
+          assert(bmDecomManager.numMigratedShuffles.get() === s)
+        }
       }
       if (!fail) {
         // Wait 5 seconds and assert times keep moving forward.
@@ -110,7 +118,7 @@ class BlockManagerDecommissionUnitSuite extends SparkFunSuite with Matchers {
       .thenReturn(Seq(BlockManagerId("exec2", "host2", 12345)))
 
     // Verify the decom manager handles this correctly
-    validateDecommissionTimestamps(sparkConf, bm, migratableShuffleBlockResolver)
+    validateDecommissionTimestamps(sparkConf, bm)
   }
 
   test("block decom manager with no migrations configured") {
@@ -128,8 +136,7 @@ class BlockManagerDecommissionUnitSuite extends SparkFunSuite with Matchers {
       .set(config.STORAGE_DECOMMISSION_RDD_BLOCKS_ENABLED, false)
       .set(config.STORAGE_DECOMMISSION_REPLICATION_REATTEMPT_INTERVAL, 10L)
     // Verify the decom manager handles this correctly
-    validateDecommissionTimestamps(badConf, bm, migratableShuffleBlockResolver,
-      fail = true)
+    validateDecommissionTimestamps(badConf, bm, fail = true)
   }
 
   test("block decom manager with no peers") {
@@ -144,8 +151,7 @@ class BlockManagerDecommissionUnitSuite extends SparkFunSuite with Matchers {
       .thenReturn(Seq())
 
     // Verify the decom manager handles this correctly
-    validateDecommissionTimestamps(sparkConf, bm, migratableShuffleBlockResolver,
-      fail = true)
+    validateDecommissionTimestamps(sparkConf, bm, fail = true)
   }
 
 
@@ -161,7 +167,83 @@ class BlockManagerDecommissionUnitSuite extends SparkFunSuite with Matchers {
       .thenReturn(Seq(BlockManagerId("exec2", "host2", 12345)))
 
     // Verify the decom manager handles this correctly
-    validateDecommissionTimestamps(sparkConf, bm, migratableShuffleBlockResolver)
+    validateDecommissionTimestamps(sparkConf, bm)
+  }
+
+  test("block decom manager does not re-add removed shuffle files") {
+    // Set up the mocks so we return one shuffle block
+    val bm = mock(classOf[BlockManager])
+    val migratableShuffleBlockResolver = mock(classOf[MigratableResolver])
+    registerShuffleBlocks(migratableShuffleBlockResolver, Set())
+    when(bm.migratableResolver).thenReturn(migratableShuffleBlockResolver)
+    when(bm.getMigratableRDDBlocks())
+      .thenReturn(Seq())
+    when(bm.getPeers(mc.any()))
+      .thenReturn(Seq(BlockManagerId("exec2", "host2", 12345)))
+    val bmDecomManager = new BlockManagerDecommissioner(sparkConf, bm)
+    bmDecomManager.migratingShuffles += ShuffleBlockInfo(10, 10)
+
+    validateDecommissionTimestampsOnManager(bmDecomManager)
+  }
+
+  test("block decom manager handles IO failures") {
+    // Set up the mocks so we return one shuffle block
+    val bm = mock(classOf[BlockManager])
+    val migratableShuffleBlockResolver = mock(classOf[MigratableResolver])
+    registerShuffleBlocks(migratableShuffleBlockResolver, Set((1, 1L, 1)))
+    when(bm.migratableResolver).thenReturn(migratableShuffleBlockResolver)
+    when(bm.getMigratableRDDBlocks())
+      .thenReturn(Seq())
+    when(bm.getPeers(mc.any()))
+      .thenReturn(Seq(BlockManagerId("exec2", "host2", 12345)))
+
+    val blockTransferService = mock(classOf[BlockTransferService])
+    // Simulate an ambiguous IO error (e.g. block could be gone, connection failed, etc.)
+    when(blockTransferService.uploadBlockSync(
+      mc.any(), mc.any(), mc.any(), mc.any(), mc.any(), mc.any(), mc.isNull())).thenThrow(
+      new java.io.IOException("boop")
+    )
+
+    when(bm.blockTransferService).thenReturn(blockTransferService)
+
+    // Verify the decom manager handles this correctly
+    val bmDecomManager = new BlockManagerDecommissioner(sparkConf, bm)
+    validateDecommissionTimestampsOnManager(bmDecomManager, fail = false)
+  }
+
+  test("block decom manager short circuits removed blocks") {
+    // Set up the mocks so we return one shuffle block
+    val bm = mock(classOf[BlockManager])
+    val migratableShuffleBlockResolver = mock(classOf[MigratableResolver])
+    // First call get blocks, then empty list simulating a delete.
+    when(migratableShuffleBlockResolver.getStoredShuffles())
+      .thenReturn(Seq(ShuffleBlockInfo(1, 1)))
+      .thenReturn(Seq())
+    when(migratableShuffleBlockResolver.getMigrationBlocks(mc.any()))
+      .thenReturn(List(
+        (ShuffleIndexBlockId(1, 1, 1), mock(classOf[ManagedBuffer])),
+        (ShuffleDataBlockId(1, 1, 1), mock(classOf[ManagedBuffer]))))
+      .thenReturn(List())
+
+    when(bm.migratableResolver).thenReturn(migratableShuffleBlockResolver)
+    when(bm.getMigratableRDDBlocks())
+      .thenReturn(Seq())
+    when(bm.getPeers(mc.any()))
+      .thenReturn(Seq(BlockManagerId("exec2", "host2", 12345)))
+
+    val blockTransferService = mock(classOf[BlockTransferService])
+    // Simulate an ambiguous IO error (e.g. block could be gone, connection failed, etc.)
+    when(blockTransferService.uploadBlockSync(
+      mc.any(), mc.any(), mc.any(), mc.any(), mc.any(), mc.any(), mc.isNull())).thenThrow(
+      new java.io.IOException("boop")
+    )
+
+    when(bm.blockTransferService).thenReturn(blockTransferService)
+
+    // Verify the decom manager handles this correctly
+    val bmDecomManager = new BlockManagerDecommissioner(sparkConf, bm)
+    validateDecommissionTimestampsOnManager(bmDecomManager, fail = false,
+      numShuffles = Some(1))
   }
 
   test("test shuffle and cached rdd migration without any error") {
@@ -192,7 +274,8 @@ class BlockManagerDecommissionUnitSuite extends SparkFunSuite with Matchers {
 
       // We don't check that all blocks are migrated because out mock is always returning an RDD.
       eventually(timeout(100.second), interval(10.milliseconds)) {
-        assert(bmDecomManager.shufflesToMigrate.isEmpty == true)
+        assert(bmDecomManager.shufflesToMigrate.isEmpty === true)
+        assert(bmDecomManager.numMigratedShuffles.get() === 1)
         verify(bm, least(1)).replicateBlock(
           mc.eq(storedBlockId1), mc.any(), mc.any(), mc.eq(Some(3)))
         verify(blockTransferService, times(2))
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
index fd14b12b112d3..cdde8411d8b7b 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
@@ -53,9 +53,80 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite =>
       executorPatience = None,
       decommissioningTest = true)
   }
+
+  test("Test basic decommissioning with shuffle cleanup", k8sTestTag) {
+    sparkAppConf
+      .set(config.DECOMMISSION_ENABLED.key, "true")
+      .set("spark.kubernetes.container.image", pyImage)
+      .set(config.STORAGE_DECOMMISSION_ENABLED.key, "true")
+      .set(config.STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED.key, "true")
+      .set(config.STORAGE_DECOMMISSION_RDD_BLOCKS_ENABLED.key, "true")
+      .set(config.DYN_ALLOCATION_SHUFFLE_TRACKING_ENABLED.key, "true")
+      .set(config.DYN_ALLOCATION_SHUFFLE_TRACKING_TIMEOUT.key, "400")
+      // Ensure we have somewhere to migrate our data too
+      .set("spark.executor.instances", "3")
+      // The default of 30 seconds is fine, but for testing we just want to get this done fast.
+      .set("spark.storage.decommission.replicationReattemptInterval", "1")
+
+    runSparkApplicationAndVerifyCompletion(
+      appResource = PYSPARK_DECOMISSIONING_CLEANUP,
+      mainClass = "",
+      expectedLogOnCompletion = Seq(
+        "Finished waiting, stopping Spark",
+        "Received decommission executor message",
+        "Acknowledged decommissioning block manager",
+        ": Executor decommission."),
+      appArgs = Array.empty[String],
+      driverPodChecker = doBasicDriverPyPodCheck,
+      executorPodChecker = doBasicExecutorPyPodCheck,
+      appLocator = appLocator,
+      isJVM = false,
+      pyFiles = None,
+      executorPatience = None,
+      decommissioningTest = true)
+  }
+
+  test("Test decommissioning with dynamic allocation & shuffle cleanups", k8sTestTag) {
+    sparkAppConf
+      .set(config.DECOMMISSION_ENABLED.key, "true")
+      .set("spark.kubernetes.container.image", pyImage)
+      .set(config.STORAGE_DECOMMISSION_ENABLED.key, "true")
+      .set(config.STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED.key, "true")
+      .set(config.STORAGE_DECOMMISSION_RDD_BLOCKS_ENABLED.key, "true")
+      .set(config.DYN_ALLOCATION_SHUFFLE_TRACKING_ENABLED.key, "true")
+      .set(config.DYN_ALLOCATION_SHUFFLE_TRACKING_TIMEOUT.key, "30")
+      .set(config.DYN_ALLOCATION_CACHED_EXECUTOR_IDLE_TIMEOUT.key, "30")
+      .set(config.DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT.key, "5")
+      .set(config.DYN_ALLOCATION_MIN_EXECUTORS.key, "1")
+      .set(config.DYN_ALLOCATION_INITIAL_EXECUTORS.key, "2")
+      .set(config.DYN_ALLOCATION_ENABLED.key, "true")
+      // The default of 30 seconds is fine, but for testing we just want to get this done fast.
+      .set("spark.storage.decommission.replicationReattemptInterval", "1")
+
+    var execLogs: String = ""
+
+    runSparkApplicationAndVerifyCompletion(
+      appResource = PYSPARK_SCALE,
+      mainClass = "",
+      expectedLogOnCompletion = Seq(
+        "Finished waiting, stopping Spark",
+        "Received decommission executor message",
+        "Acknowledged decommissioning block manager",
+        ": Executor decommission."),
+      appArgs = Array.empty[String],
+      driverPodChecker = doBasicDriverPyPodCheck,
+      executorPodChecker = doBasicExecutorPyPodCheck,
+      appLocator = appLocator,
+      isJVM = false,
+      pyFiles = None,
+      executorPatience = None,
+      decommissioningTest = false)
+  }
 }
 
 private[spark] object DecommissionSuite {
   val TEST_LOCAL_PYSPARK: String = "local:///opt/spark/tests/"
   val PYSPARK_DECOMISSIONING: String = TEST_LOCAL_PYSPARK + "decommissioning.py"
+  val PYSPARK_DECOMISSIONING_CLEANUP: String = TEST_LOCAL_PYSPARK + "decommissioning_cleanup.py"
+  val PYSPARK_SCALE: String = TEST_LOCAL_PYSPARK + "autoscale.py"
 }
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ProcessUtils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ProcessUtils.scala
index cce842ce62f01..a1ecd48e747ea 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ProcessUtils.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ProcessUtils.scala
@@ -32,7 +32,7 @@ object ProcessUtils extends Logging {
   def executeProcess(
       fullCommand: Array[String],
       timeout: Long,
-      dumpErrors: Boolean = false): Seq[String] = {
+      dumpErrors: Boolean = true): Seq[String] = {
     val pb = new ProcessBuilder().command(fullCommand: _*)
     pb.redirectErrorStream(true)
     val proc = pb.start()
@@ -45,7 +45,7 @@ object ProcessUtils extends Logging {
     assert(proc.waitFor(timeout, TimeUnit.SECONDS),
       s"Timed out while executing ${fullCommand.mkString(" ")}")
     assert(proc.exitValue == 0,
-      s"Failed to execute ${fullCommand.mkString(" ")}" +
+      s"Failed to execute -- ${fullCommand.mkString(" ")} --" +
         s"${if (dumpErrors) "\n" + outputLines.mkString("\n")}")
     outputLines.toSeq
   }
diff --git a/resource-managers/kubernetes/integration-tests/tests/autoscale.py b/resource-managers/kubernetes/integration-tests/tests/autoscale.py
new file mode 100644
index 0000000000000..809b698fcdd8c
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/tests/autoscale.py
@@ -0,0 +1,49 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+import time
+
+from pyspark.sql import SparkSession
+
+
+if __name__ == "__main__":
+    """
+        Usage: autoscale
+    """
+    print("Starting autoscale test")
+    spark = SparkSession \
+        .builder \
+        .appName("AutoScale") \
+        .getOrCreate()
+    sc = spark._sc
+
+    initialRdd = sc.parallelize(range(100), 5)
+    # Trigger a shuffle so there are shuffle blocks to migrate
+    rdd = initialRdd.map(lambda x: (x, x)).groupByKey()
+    rdd.collect()
+    numCores = sc._jsc.sc().getExecutorMemoryStatus().size()
+    print("Have " + str(numCores))
+    print("Waiting for dynamic alloc")
+    time.sleep(150)
+    print("Finished waiting!")
+    rdd.count()
+    rdd.collect()
+    print("Finished waiting, stopping Spark.")
+    spark.stop()
+    print("Done, exiting Python")
+    sys.exit(0)
diff --git a/resource-managers/kubernetes/integration-tests/tests/decommissioning.py b/resource-managers/kubernetes/integration-tests/tests/decommissioning.py
index 5fcad083b007c..0880e8ab275b3 100644
--- a/resource-managers/kubernetes/integration-tests/tests/decommissioning.py
+++ b/resource-managers/kubernetes/integration-tests/tests/decommissioning.py
@@ -28,7 +28,7 @@
     print("Starting decom test")
     spark = SparkSession \
         .builder \
-        .appName("PyMemoryTest") \
+        .appName("DecomTest") \
         .getOrCreate()
     sc = spark._sc
     acc = sc.accumulator(0)
diff --git a/resource-managers/kubernetes/integration-tests/tests/decommissioning_cleanup.py b/resource-managers/kubernetes/integration-tests/tests/decommissioning_cleanup.py
new file mode 100644
index 0000000000000..8af558ee5214e
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/tests/decommissioning_cleanup.py
@@ -0,0 +1,59 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+import time
+
+from pyspark.sql import SparkSession
+
+
+if __name__ == "__main__":
+    """
+        Usage: decommissioning
+    """
+    print("Starting decom test")
+    spark = SparkSession \
+        .builder \
+        .appName("DecomTest") \
+        .getOrCreate()
+    sc = spark._sc
+    acc = sc.accumulator(0)
+
+    def addToAcc(x):
+        acc.add(1)
+        return x
+
+    initialRdd = sc.parallelize(range(100), 5)
+    accRdd = initialRdd.map(addToAcc)
+    # Trigger a shuffle so there are shuffle blocks to migrate
+    rdd = accRdd.map(lambda x: (x, x)).groupByKey()
+    # Make enough shuffle files to increase the chance of the race condition.
+    for i in range(1, 2):
+        shuffleRdd = sc.parallelize(range(1, 10), 5).map(lambda x: (x, x)).groupByKey()
+        shuffleRdd.collect()
+    rdd.collect()
+    print("1st accumulator value is: " + str(acc.value))
+    print("Waiting to give nodes time to finish migration, decom exec 1.")
+    print("...")
+    time.sleep(30)
+    rdd.count()
+    rdd.collect()
+    print("Final accumulator value is: " + str(acc.value))
+    print("Finished waiting, stopping Spark.")
+    spark.stop()
+    print("Done, exiting Python")
+    sys.exit(0)

From e574fcd23021cc94f043981d84287f3bb1308b5f Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Fri, 16 Oct 2020 14:48:14 -0700
Subject: [PATCH 0263/1009] [SPARK-32376][SQL] Make unionByName null-filling
 behavior work with struct columns

### What changes were proposed in this pull request?

SPARK-29358 added support for `unionByName` to work when the two datasets didn't necessarily have the same schema, but it does not work with nested columns like structs. This patch adds the support to work with struct columns.

The behavior before this PR:

```scala
scala> val df1 = spark.range(1).selectExpr("id c0", "named_struct('c', id + 1, 'b', id + 2, 'a', id + 3) c1")
scala> val df2 = spark.range(1).selectExpr("id c0", "named_struct('c', id + 1, 'b', id + 2) c1")
scala> df1.unionByName(df2, true).printSchema
org.apache.spark.sql.AnalysisException: Union can only be performed on tables with the compatible column types. struct<c:bigint,b:bigint> <> struct<c:bigint,b:bigint,a:bigint> at the second column of the second table;;
'Union false, false
:- Project [id#0L AS c0#2L, named_struct(c, (id#0L + cast(1 as bigint)), b, (id#0L + cast(2 as bigint)), a, (id#0L + cast(3 as bigint))) AS c1#3]
:  +- Range (0, 1, step=1, splits=Some(12))
+- Project [c0#8L, c1#9]
   +- Project [id#6L AS c0#8L, named_struct(c, (id#6L + cast(1 as bigint)), b, (id#6L + cast(2 as bigint))) AS c1#9]
      +- Range (0, 1, step=1, splits=Some(12))
```

The behavior after this PR:

```scala
scala> df1.unionByName(df2, true).printSchema
root
 |-- c0: long (nullable = false)
 |-- c1: struct (nullable = false)
 |    |-- a: long (nullable = true)
 |    |-- b: long (nullable = false)
 |    |-- c: long (nullable = false)
scala> df1.unionByName(df2, true).show()
+---+-------------+
| c0|           c1|
+---+-------------+
|  0|    {3, 2, 1}|
|  0|{ null, 2, 1}|
+---+-------------+
```

### Why are the changes needed?

The `allowMissingColumns` of `unionByName` is a feature allowing merging different schema from two datasets when unioning them together. Nested column support makes the feature more general and flexible for usage.

### Does this PR introduce _any_ user-facing change?

Yes, after this change users can union two datasets with different schema with different structs.

### How was this patch tested?

Unit tests.

Closes #29587 from viirya/SPARK-32376.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Liang-Chi Hsieh <viirya@gmail.com>
---
 .../sql/catalyst/analysis/ResolveUnion.scala  | 192 +++++++++++++++++-
 .../expressions/complexTypeCreator.scala      |  52 ++++-
 .../expressions/complexTypeExtractors.scala   |   4 +-
 .../apache/spark/sql/types/StructType.scala   |  35 ++++
 .../spark/sql/types/StructTypeSuite.scala     |  96 ++++++++-
 .../scala/org/apache/spark/sql/Column.scala   |  36 +---
 .../scala/org/apache/spark/sql/Dataset.scala  |   6 +
 .../sql/DataFrameSetOperationsSuite.scala     | 181 +++++++++++++++++
 8 files changed, 555 insertions(+), 47 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUnion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUnion.scala
index 693a5a4e75443..c1a9c9d3d9bab 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUnion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUnion.scala
@@ -17,29 +17,188 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
+import scala.collection.mutable
+
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.{Alias, Literal}
+import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.optimizer.CombineUnions
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, Union}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.SchemaUtils
+import org.apache.spark.unsafe.types.UTF8String
 
 /**
  * Resolves different children of Union to a common set of columns.
  */
 object ResolveUnion extends Rule[LogicalPlan] {
-  private def unionTwoSides(
+  /**
+   * This method sorts columns recursively in a struct expression based on column names.
+   */
+  private def sortStructFields(expr: Expression): Expression = {
+    val existingExprs = expr.dataType.asInstanceOf[StructType].fieldNames.zipWithIndex.map {
+      case (name, i) =>
+        val fieldExpr = GetStructField(KnownNotNull(expr), i)
+        if (fieldExpr.dataType.isInstanceOf[StructType]) {
+          (name, sortStructFields(fieldExpr))
+        } else {
+          (name, fieldExpr)
+        }
+    }.sortBy(_._1).flatMap(pair => Seq(Literal(pair._1), pair._2))
+
+    val newExpr = CreateNamedStruct(existingExprs)
+    if (expr.nullable) {
+      If(IsNull(expr), Literal(null, newExpr.dataType), newExpr)
+    } else {
+      newExpr
+    }
+  }
+
+  /**
+   * Assumes input expressions are field expression of `CreateNamedStruct`. This method
+   * sorts the expressions based on field names.
+   */
+  private def sortFieldExprs(fieldExprs: Seq[Expression]): Seq[Expression] = {
+    fieldExprs.grouped(2).map { e =>
+      Seq(e.head, e.last)
+    }.toSeq.sortBy { pair =>
+      assert(pair.head.isInstanceOf[Literal])
+      pair.head.eval().asInstanceOf[UTF8String].toString
+    }.flatten
+  }
+
+  /**
+   * This helper method sorts fields in a `UpdateFields` expression by field name.
+   */
+  private def sortStructFieldsInWithFields(expr: Expression): Expression = expr transformUp {
+    case u: UpdateFields if u.resolved =>
+      u.evalExpr match {
+        case i @ If(IsNull(_), _, CreateNamedStruct(fieldExprs)) =>
+          val sorted = sortFieldExprs(fieldExprs)
+          val newStruct = CreateNamedStruct(sorted)
+          i.copy(trueValue = Literal(null, newStruct.dataType), falseValue = newStruct)
+        case CreateNamedStruct(fieldExprs) =>
+          val sorted = sortFieldExprs(fieldExprs)
+          val newStruct = CreateNamedStruct(sorted)
+          newStruct
+        case other =>
+          throw new IllegalStateException(s"`UpdateFields` has incorrect expression: $other. " +
+            "Please file a bug report with this error message, stack trace, and the query.")
+      }
+  }
+
+  def simplifyWithFields(expr: Expression): Expression = {
+    expr.transformUp {
+      case UpdateFields(UpdateFields(struct, fieldOps1), fieldOps2) =>
+        UpdateFields(struct, fieldOps1 ++ fieldOps2)
+    }
+  }
+
+  /**
+   * Adds missing fields recursively into given `col` expression, based on the target `StructType`.
+   * This is called by `compareAndAddFields` when we find two struct columns with same name but
+   * different nested fields. This method will find out the missing nested fields from `col` to
+   * `target` struct and add these missing nested fields. Currently we don't support finding out
+   * missing nested fields of struct nested in array or struct nested in map.
+   */
+  private def addFields(col: NamedExpression, target: StructType): Expression = {
+    assert(col.dataType.isInstanceOf[StructType], "Only support StructType.")
+
+    val resolver = SQLConf.get.resolver
+    val missingFieldsOpt =
+      StructType.findMissingFields(col.dataType.asInstanceOf[StructType], target, resolver)
+
+    // We need to sort columns in result, because we might add another column in other side.
+    // E.g., we want to union two structs "a int, b long" and "a int, c string".
+    // If we don't sort, we will have "a int, b long, c string" and
+    // "a int, c string, b long", which are not compatible.
+    if (missingFieldsOpt.isEmpty) {
+      sortStructFields(col)
+    } else {
+      missingFieldsOpt.map { s =>
+        val struct = addFieldsInto(col, s.fields)
+        // Combines `WithFields`s to reduce expression tree.
+        val reducedStruct = simplifyWithFields(struct)
+        val sorted = sortStructFieldsInWithFields(reducedStruct)
+        sorted
+      }.get
+    }
+  }
+
+  /**
+   * Adds missing fields recursively into given `col` expression. The missing fields are given
+   * in `fields`. For example, given `col` as "z struct<z:int, y:int>, x int", and `fields` is
+   * "z struct<w:long>, w string". This method will add a nested `z.w` field and a top-level
+   * `w` field to `col` and fill null values for them. Note that because we might also add missing
+   * fields at other side of Union, we must make sure corresponding attributes at two sides have
+   * same field order in structs, so when we adding missing fields, we will sort the fields based on
+   * field names. So the data type of returned expression will be
+   * "w string, x int, z struct<w:long, y:int, z:int>".
+   */
+  private def addFieldsInto(
+      col: Expression,
+      fields: Seq[StructField]): Expression = {
+    fields.foldLeft(col) { case (currCol, field) =>
+      field.dataType match {
+        case st: StructType =>
+          val resolver = SQLConf.get.resolver
+          val colField = currCol.dataType.asInstanceOf[StructType]
+            .find(f => resolver(f.name, field.name))
+          if (colField.isEmpty) {
+            // The whole struct is missing. Add a null.
+            UpdateFields(currCol, field.name, Literal(null, st))
+          } else {
+            UpdateFields(currCol, field.name,
+              addFieldsInto(ExtractValue(currCol, Literal(field.name), resolver), st.fields))
+          }
+        case dt =>
+          UpdateFields(currCol, field.name, Literal(null, dt))
+      }
+    }
+  }
+
+  /**
+   * This method will compare right to left plan's outputs. If there is one struct attribute
+   * at right side has same name with left side struct attribute, but two structs are not the
+   * same data type, i.e., some missing (nested) fields at right struct attribute, then this
+   * method will try to add missing (nested) fields into the right attribute with null values.
+   */
+  private def compareAndAddFields(
       left: LogicalPlan,
       right: LogicalPlan,
-      allowMissingCol: Boolean): LogicalPlan = {
+      allowMissingCol: Boolean): (Seq[NamedExpression], Seq[NamedExpression]) = {
     val resolver = SQLConf.get.resolver
     val leftOutputAttrs = left.output
     val rightOutputAttrs = right.output
 
-    // Builds a project list for `right` based on `left` output names
+    val aliased = mutable.ArrayBuffer.empty[Attribute]
+
     val rightProjectList = leftOutputAttrs.map { lattr =>
-      rightOutputAttrs.find { rattr => resolver(lattr.name, rattr.name) }.getOrElse {
+      val found = rightOutputAttrs.find { rattr => resolver(lattr.name, rattr.name) }
+      if (found.isDefined) {
+        val foundAttr = found.get
+        val foundDt = foundAttr.dataType
+        (foundDt, lattr.dataType) match {
+          case (source: StructType, target: StructType)
+              if allowMissingCol && !source.sameType(target) =>
+            // Having an output with same name, but different struct type.
+            // We need to add missing fields. Note that if there are deeply nested structs such as
+            // nested struct of array in struct, we don't support to add missing deeply nested field
+            // like that. We will sort columns in the struct expression to make sure two sides of
+            // union have consistent schema.
+            aliased += foundAttr
+            Alias(addFields(foundAttr, target), foundAttr.name)()
+          case _ =>
+            // We don't need/try to add missing fields if:
+            // 1. The attributes of left and right side are the same struct type
+            // 2. The attributes are not struct types. They might be primitive types, or array, map
+            //    types. We don't support adding missing fields of nested structs in array or map
+            //    types now.
+            // 3. `allowMissingCol` is disabled.
+            foundAttr
+        }
+      } else {
         if (allowMissingCol) {
           Alias(Literal(null, lattr.dataType), lattr.name)()
         } else {
@@ -50,18 +209,29 @@ object ResolveUnion extends Rule[LogicalPlan] {
       }
     }
 
+    (rightProjectList, aliased.toSeq)
+  }
+
+  private def unionTwoSides(
+      left: LogicalPlan,
+      right: LogicalPlan,
+      allowMissingCol: Boolean): LogicalPlan = {
+    val rightOutputAttrs = right.output
+
+    // Builds a project list for `right` based on `left` output names
+    val (rightProjectList, aliased) = compareAndAddFields(left, right, allowMissingCol)
+
     // Delegates failure checks to `CheckAnalysis`
-    val notFoundAttrs = rightOutputAttrs.diff(rightProjectList)
+    val notFoundAttrs = rightOutputAttrs.diff(rightProjectList ++ aliased)
     val rightChild = Project(rightProjectList ++ notFoundAttrs, right)
 
     // Builds a project for `logicalPlan` based on `right` output names, if allowing
     // missing columns.
     val leftChild = if (allowMissingCol) {
-      val missingAttrs = notFoundAttrs.map { attr =>
-        Alias(Literal(null, attr.dataType), attr.name)()
-      }
-      if (missingAttrs.nonEmpty) {
-        Project(leftOutputAttrs ++ missingAttrs, left)
+      // Add missing (nested) fields to left plan.
+      val (leftProjectList, _) = compareAndAddFields(rightChild, left, allowMissingCol)
+      if (leftProjectList.map(_.toAttribute) != left.output) {
+        Project(leftProjectList, left)
       } else {
         left
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index f6485a51f8fae..3958cfd0af2a3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -20,10 +20,11 @@ package org.apache.spark.sql.catalyst.expressions
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.{Resolver, TypeCheckResult, TypeCoercion}
+import org.apache.spark.sql.catalyst.analysis.{Resolver, TypeCheckResult, TypeCoercion, UnresolvedExtractValue}
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.{FUNC_ALIAS, FunctionBuilder}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -661,3 +662,52 @@ case class UpdateFields(structExpr: Expression, fieldOps: Seq[StructFieldsOperat
     }
   }
 }
+
+object UpdateFields {
+  private def nameParts(fieldName: String): Seq[String] = {
+    require(fieldName != null, "fieldName cannot be null")
+
+    if (fieldName.isEmpty) {
+      fieldName :: Nil
+    } else {
+      CatalystSqlParser.parseMultipartIdentifier(fieldName)
+    }
+  }
+
+  /**
+   * Adds/replaces field of `StructType` into `col` expression by name.
+   */
+  def apply(col: Expression, fieldName: String, expr: Expression): UpdateFields = {
+    updateFieldsHelper(col, nameParts(fieldName), name => WithField(name, expr))
+  }
+
+  /**
+   * Drops fields of `StructType` in `col` expression by name.
+   */
+  def apply(col: Expression, fieldName: String): UpdateFields = {
+    updateFieldsHelper(col, nameParts(fieldName), name => DropField(name))
+  }
+
+  private def updateFieldsHelper(
+      structExpr: Expression,
+      namePartsRemaining: Seq[String],
+      valueFunc: String => StructFieldsOperation) : UpdateFields = {
+    val fieldName = namePartsRemaining.head
+    if (namePartsRemaining.length == 1) {
+      UpdateFields(structExpr, valueFunc(fieldName) :: Nil)
+    } else {
+      val newStruct = if (structExpr.resolved) {
+        val resolver = SQLConf.get.resolver
+        ExtractValue(structExpr, Literal(fieldName), resolver)
+      } else {
+        UnresolvedExtractValue(structExpr, Literal(fieldName))
+      }
+
+      val newValue = updateFieldsHelper(
+        structExpr = newStruct,
+        namePartsRemaining = namePartsRemaining.tail,
+        valueFunc = valueFunc)
+      UpdateFields(structExpr, WithField(fieldName, newValue) :: Nil)
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
index 89ff4facd25a9..60afe140960cc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
@@ -116,8 +116,10 @@ case class GetStructField(child: Expression, ordinal: Int, name: Option[String]
     s"$child.${name.getOrElse(fieldName)}"
   }
 
+  def extractFieldName: String = name.getOrElse(childSchema(ordinal).name)
+
   override def sql: String =
-    child.sql + s".${quoteIdentifier(name.getOrElse(childSchema(ordinal).name))}"
+    child.sql + s".${quoteIdentifier(extractFieldName)}"
 
   protected override def nullSafeEval(input: Any): Any =
     input.asInstanceOf[InternalRow].get(ordinal, childSchema(ordinal).dataType)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
index b14fb04cc4539..c5e76c160ff46 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -641,4 +641,39 @@ object StructType extends AbstractDataType {
     fields.foreach(s => map.put(s.name, s))
     map
   }
+
+  /**
+   * Returns a `StructType` that contains missing fields recursively from `source` to `target`.
+   * Note that this doesn't support looking into array type and map type recursively.
+   */
+  def findMissingFields(
+      source: StructType,
+      target: StructType,
+      resolver: Resolver): Option[StructType] = {
+    def bothStructType(dt1: DataType, dt2: DataType): Boolean =
+      dt1.isInstanceOf[StructType] && dt2.isInstanceOf[StructType]
+
+    val newFields = mutable.ArrayBuffer.empty[StructField]
+
+    target.fields.foreach { field =>
+      val found = source.fields.find(f => resolver(field.name, f.name))
+      if (found.isEmpty) {
+        // Found a missing field in `source`.
+        newFields += field
+      } else if (bothStructType(found.get.dataType, field.dataType) &&
+          !found.get.dataType.sameType(field.dataType)) {
+        // Found a field with same name, but different data type.
+        findMissingFields(found.get.dataType.asInstanceOf[StructType],
+          field.dataType.asInstanceOf[StructType], resolver).map { missingType =>
+          newFields += found.get.copy(dataType = missingType)
+        }
+      }
+    }
+
+    if (newFields.isEmpty) {
+      None
+    } else {
+      Some(StructType(newFields.toSeq))
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
index 6824a64badc10..645e65f06508d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
@@ -18,9 +18,11 @@
 package org.apache.spark.sql.types
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType.fromDDL
 
-class StructTypeSuite extends SparkFunSuite {
+class StructTypeSuite extends SparkFunSuite with SQLHelper {
 
   private val s = StructType.fromDDL("a INT, b STRING")
 
@@ -103,4 +105,96 @@ class StructTypeSuite extends SparkFunSuite {
     val interval = "`a` INTERVAL"
     assert(fromDDL(interval).toDDL === interval)
   }
+
+  test("find missing (nested) fields") {
+    val schema = StructType.fromDDL("c1 INT, c2 STRUCT<c3: INT, c4: STRUCT<c5: INT, c6: INT>>")
+    val resolver = SQLConf.get.resolver
+
+    val source1 = StructType.fromDDL("c1 INT")
+    val missing1 = StructType.fromDDL("c2 STRUCT<c3: INT, c4: STRUCT<c5: INT, c6: INT>>")
+    assert(StructType.findMissingFields(source1, schema, resolver)
+      .exists(_.sameType(missing1)))
+
+    val source2 = StructType.fromDDL("c1 INT, c3 STRING")
+    val missing2 = StructType.fromDDL("c2 STRUCT<c3: INT, c4: STRUCT<c5: INT, c6: INT>>")
+    assert(StructType.findMissingFields(source2, schema, resolver)
+      .exists(_.sameType(missing2)))
+
+    val source3 = StructType.fromDDL("c1 INT, c2 STRUCT<c3: INT>")
+    val missing3 = StructType.fromDDL("c2 STRUCT<c4: STRUCT<c5: INT, c6: INT>>")
+    assert(StructType.findMissingFields(source3, schema, resolver)
+      .exists(_.sameType(missing3)))
+
+    val source4 = StructType.fromDDL("c1 INT, c2 STRUCT<c3: INT, c4: STRUCT<c6: INT>>")
+    val missing4 = StructType.fromDDL("c2 STRUCT<c4: STRUCT<c5: INT>>")
+    assert(StructType.findMissingFields(source4, schema, resolver)
+      .exists(_.sameType(missing4)))
+  }
+
+  test("find missing (nested) fields: array and map") {
+    val resolver = SQLConf.get.resolver
+
+    val schemaWithArray = StructType.fromDDL("c1 INT, c2 ARRAY<STRUCT<c3: INT, c4: LONG>>")
+    val source5 = StructType.fromDDL("c1 INT")
+    val missing5 = StructType.fromDDL("c2 ARRAY<STRUCT<c3: INT, c4: LONG>>")
+    assert(
+      StructType.findMissingFields(source5, schemaWithArray, resolver)
+        .exists(_.sameType(missing5)))
+
+    val schemaWithMap1 = StructType.fromDDL(
+      "c1 INT, c2 MAP<STRUCT<c3: INT, c4: LONG>, STRING>, c3 LONG")
+    val source6 = StructType.fromDDL("c1 INT, c3 LONG")
+    val missing6 = StructType.fromDDL("c2 MAP<STRUCT<c3: INT, c4: LONG>, STRING>")
+    assert(
+      StructType.findMissingFields(source6, schemaWithMap1, resolver)
+        .exists(_.sameType(missing6)))
+
+    val schemaWithMap2 = StructType.fromDDL(
+      "c1 INT, c2 MAP<STRING, STRUCT<c3: INT, c4: LONG>>, c3 STRING")
+    val source7 = StructType.fromDDL("c1 INT, c3 STRING")
+    val missing7 = StructType.fromDDL("c2 MAP<STRING, STRUCT<c3: INT, c4: LONG>>")
+    assert(
+      StructType.findMissingFields(source7, schemaWithMap2, resolver)
+        .exists(_.sameType(missing7)))
+
+    // Unsupported: nested struct in array, map
+    val source8 = StructType.fromDDL("c1 INT, c2 ARRAY<STRUCT<c3: INT>>")
+    // `findMissingFields` doesn't support looking into nested struct in array type.
+    assert(StructType.findMissingFields(source8, schemaWithArray, resolver).isEmpty)
+
+    val source9 = StructType.fromDDL("c1 INT, c2 MAP<STRUCT<c3: INT>, STRING>, c3 LONG")
+    // `findMissingFields` doesn't support looking into nested struct in map type.
+    assert(StructType.findMissingFields(source9, schemaWithMap1, resolver).isEmpty)
+
+    val source10 = StructType.fromDDL("c1 INT, c2 MAP<STRING, STRUCT<c3: INT>>, c3 STRING")
+    // `findMissingFields` doesn't support looking into nested struct in map type.
+    assert(StructType.findMissingFields(source10, schemaWithMap2, resolver).isEmpty)
+  }
+
+  test("find missing (nested) fields: case sensitive cases") {
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      val schema = StructType.fromDDL("c1 INT, c2 STRUCT<c3: INT, C4: STRUCT<C5: INT, c6: INT>>")
+      val resolver = SQLConf.get.resolver
+
+      val source1 = StructType.fromDDL("c1 INT, C2 LONG")
+      val missing1 = StructType.fromDDL("c2 STRUCT<c3: INT, C4: STRUCT<C5: INT, c6: INT>>")
+      assert(StructType.findMissingFields(source1, schema, resolver)
+        .exists(_.sameType(missing1)))
+
+      val source2 = StructType.fromDDL("c2 LONG")
+      val missing2 = StructType.fromDDL("c1 INT")
+      assert(StructType.findMissingFields(source2, schema, resolver)
+        .exists(_.sameType(missing2)))
+
+      val source3 = StructType.fromDDL("c1 INT, c2 STRUCT<c3: INT, C4: STRUCT<c5: INT>>")
+      val missing3 = StructType.fromDDL("c2 STRUCT<C4: STRUCT<C5: INT, c6: INT>>")
+      assert(StructType.findMissingFields(source3, schema, resolver)
+        .exists(_.sameType(missing3)))
+
+      val source4 = StructType.fromDDL("c1 INT, c2 STRUCT<c3: INT, C4: STRUCT<C5: Int>>")
+      val missing4 = StructType.fromDDL("c2 STRUCT<C4: STRUCT<c6: INT>>")
+      assert(StructType.findMissingFields(source4, schema, resolver)
+        .exists(_.sameType(missing4)))
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index a46d6c0bb2282..30792c9bacd53 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -925,7 +925,7 @@ class Column(val expr: Expression) extends Logging {
   def withField(fieldName: String, col: Column): Column = withExpr {
     require(fieldName != null, "fieldName cannot be null")
     require(col != null, "col cannot be null")
-    updateFieldsHelper(expr, nameParts(fieldName), name => WithField(name, col.expr))
+    UpdateFields(expr, fieldName, col.expr)
   }
 
   // scalastyle:off line.size.limit
@@ -989,38 +989,8 @@ class Column(val expr: Expression) extends Logging {
    */
   // scalastyle:on line.size.limit
   def dropFields(fieldNames: String*): Column = withExpr {
-    def dropField(structExpr: Expression, fieldName: String): UpdateFields =
-      updateFieldsHelper(structExpr, nameParts(fieldName), name => DropField(name))
-
-    fieldNames.tail.foldLeft(dropField(expr, fieldNames.head)) {
-      (resExpr, fieldName) => dropField(resExpr, fieldName)
-    }
-  }
-
-  private def nameParts(fieldName: String): Seq[String] = {
-    require(fieldName != null, "fieldName cannot be null")
-
-    if (fieldName.isEmpty) {
-      fieldName :: Nil
-    } else {
-      CatalystSqlParser.parseMultipartIdentifier(fieldName)
-    }
-  }
-
-  private def updateFieldsHelper(
-      structExpr: Expression,
-      namePartsRemaining: Seq[String],
-      valueFunc: String => StructFieldsOperation): UpdateFields = {
-
-    val fieldName = namePartsRemaining.head
-    if (namePartsRemaining.length == 1) {
-      UpdateFields(structExpr, valueFunc(fieldName) :: Nil)
-    } else {
-      val newValue = updateFieldsHelper(
-        structExpr = UnresolvedExtractValue(structExpr, Literal(fieldName)),
-        namePartsRemaining = namePartsRemaining.tail,
-        valueFunc = valueFunc)
-      UpdateFields(structExpr, WithField(fieldName, newValue) :: Nil)
+    fieldNames.tail.foldLeft(UpdateFields(expr, fieldNames.head)) {
+      (resExpr, fieldName) => UpdateFields(resExpr, fieldName)
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 87b9aea80c823..3d431d6ff13a9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2067,6 +2067,12 @@ class Dataset[T] private[sql](
    *   // +----+----+----+----+
    * }}}
    *
+   * Note that `allowMissingColumns` supports nested column in struct types. Missing nested columns
+   * of struct columns with same name will also be filled with null values. This currently does not
+   * support nested columns in array and map types. Note that if there is any missing nested columns
+   * to be filled, in order to make consistent schema between two sides of union, the nested fields
+   * of structs will be sorted after merging schema.
+   *
    * @group typedrel
    * @since 3.1.0
    */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
index e72b8ce860b28..5f28dc60962ba 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
@@ -536,4 +536,185 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
       assert(union2.schema.fieldNames === Array("a", "B", "C", "c"))
     }
   }
+
+  test("SPARK-32376: Make unionByName null-filling behavior work with struct columns - simple") {
+    val df1 = Seq(((1, 2, 3), 0), ((2, 3, 4), 1), ((3, 4, 5), 2)).toDF("a", "idx")
+    val df2 = Seq(((3, 4), 0), ((1, 2), 1), ((2, 3), 2)).toDF("a", "idx")
+    val df3 = Seq(((100, 101, 102, 103), 0), ((110, 111, 112, 113), 1), ((120, 121, 122, 123), 2))
+      .toDF("a", "idx")
+
+    var unionDf = df1.unionByName(df2, true)
+
+    checkAnswer(unionDf,
+      Row(Row(1, 2, 3), 0) :: Row(Row(2, 3, 4), 1) :: Row(Row(3, 4, 5), 2) ::
+        Row(Row(3, 4, null), 0) :: Row(Row(1, 2, null), 1) :: Row(Row(2, 3, null), 2) :: Nil
+    )
+
+    assert(unionDf.schema.toDDL == "`a` STRUCT<`_1`: INT, `_2`: INT, `_3`: INT>,`idx` INT")
+
+    unionDf = df1.unionByName(df2, true).unionByName(df3, true)
+
+    checkAnswer(unionDf,
+      Row(Row(1, 2, 3, null), 0) ::
+        Row(Row(2, 3, 4, null), 1) ::
+        Row(Row(3, 4, 5, null), 2) :: // df1
+        Row(Row(3, 4, null, null), 0) ::
+        Row(Row(1, 2, null, null), 1) ::
+        Row(Row(2, 3, null, null), 2) :: // df2
+        Row(Row(100, 101, 102, 103), 0) ::
+        Row(Row(110, 111, 112, 113), 1) ::
+        Row(Row(120, 121, 122, 123), 2) :: Nil // df3
+    )
+    assert(unionDf.schema.toDDL ==
+      "`a` STRUCT<`_1`: INT, `_2`: INT, `_3`: INT, `_4`: INT>,`idx` INT")
+  }
+
+  test("SPARK-32376: Make unionByName null-filling behavior work with struct columns - nested") {
+    val df1 = Seq((0, UnionClass1a(0, 1L, UnionClass2(1, "2")))).toDF("id", "a")
+    val df2 = Seq((1, UnionClass1b(1, 2L, UnionClass3(2, 3L)))).toDF("id", "a")
+
+    val expectedSchema = "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
+      "`nested`: STRUCT<`a`: INT, `b`: BIGINT, `c`: STRING>>"
+
+    var unionDf = df1.unionByName(df2, true)
+    checkAnswer(unionDf,
+      Row(0, Row(0, 1, Row(1, null, "2"))) ::
+        Row(1, Row(1, 2, Row(2, 3L, null))) :: Nil)
+    assert(unionDf.schema.toDDL == expectedSchema)
+
+    unionDf = df2.unionByName(df1, true)
+    checkAnswer(unionDf,
+      Row(1, Row(1, 2, Row(2, 3L, null))) ::
+        Row(0, Row(0, 1, Row(1, null, "2"))) :: Nil)
+    assert(unionDf.schema.toDDL == expectedSchema)
+
+    val df3 = Seq((2, UnionClass1b(2, 3L, null))).toDF("id", "a")
+    unionDf = df1.unionByName(df3, true)
+    checkAnswer(unionDf,
+      Row(0, Row(0, 1, Row(1, null, "2"))) ::
+        Row(2, Row(2, 3, null)) :: Nil)
+    assert(unionDf.schema.toDDL == expectedSchema)
+  }
+
+  test("SPARK-32376: Make unionByName null-filling behavior work with struct columns" +
+      " - case-sensitive cases") {
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      val df1 = Seq((0, UnionClass1a(0, 1L, UnionClass2(1, "2")))).toDF("id", "a")
+      val df2 = Seq((1, UnionClass1c(1, 2L, UnionClass4(2, 3L)))).toDF("id", "a")
+
+      var unionDf = df1.unionByName(df2, true)
+      checkAnswer(unionDf,
+        Row(0, Row(0, 1, Row(null, 1, null, "2"))) ::
+          Row(1, Row(1, 2, Row(2, null, 3L, null))) :: Nil)
+      assert(unionDf.schema.toDDL ==
+        "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
+          "`nested`: STRUCT<`A`: INT, `a`: INT, `b`: BIGINT, `c`: STRING>>")
+
+      unionDf = df2.unionByName(df1, true)
+      checkAnswer(unionDf,
+        Row(1, Row(1, 2, Row(2, null, 3L, null))) ::
+          Row(0, Row(0, 1, Row(null, 1, null, "2"))) :: Nil)
+      assert(unionDf.schema.toDDL ==
+        "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
+          "`nested`: STRUCT<`A`: INT, `a`: INT, `b`: BIGINT, `c`: STRING>>")
+
+      val df3 = Seq((2, UnionClass1b(2, 3L, UnionClass3(4, 5L)))).toDF("id", "a")
+      unionDf = df2.unionByName(df3, true)
+      checkAnswer(unionDf,
+        Row(1, Row(1, 2, Row(2, null, 3L))) ::
+          Row(2, Row(2, 3, Row(null, 4, 5L))) :: Nil)
+      assert(unionDf.schema.toDDL ==
+        "`id` INT,`a` STRUCT<`a`: INT, `b`: BIGINT, " +
+          "`nested`: STRUCT<`A`: INT, `a`: INT, `b`: BIGINT>>")
+    }
+  }
+
+  test("SPARK-32376: Make unionByName null-filling behavior work with struct columns - edge case") {
+    val nestedStructType1 = StructType(Seq(
+      StructField("b", StringType)))
+    val nestedStructValues1 = Row("b")
+
+    val nestedStructType2 = StructType(Seq(
+      StructField("b", StringType),
+      StructField("a", StringType)))
+    val nestedStructValues2 = Row("b", "a")
+
+    val df1: DataFrame = spark.createDataFrame(
+      sparkContext.parallelize(Row(nestedStructValues1) :: Nil),
+      StructType(Seq(StructField("topLevelCol", nestedStructType1))))
+
+    val df2: DataFrame = spark.createDataFrame(
+      sparkContext.parallelize(Row(nestedStructValues2) :: Nil),
+      StructType(Seq(StructField("topLevelCol", nestedStructType2))))
+
+    val union = df1.unionByName(df2, allowMissingColumns = true)
+    checkAnswer(union, Row(Row(null, "b")) :: Row(Row("a", "b")) :: Nil)
+    assert(union.schema.toDDL == "`topLevelCol` STRUCT<`a`: STRING, `b`: STRING>")
+  }
+
+  test("SPARK-32376: Make unionByName null-filling behavior work with struct columns - deep expr") {
+    def nestedDf(depth: Int, numColsAtEachDepth: Int): DataFrame = {
+      val initialNestedStructType = StructType(
+        (0 to numColsAtEachDepth).map(i =>
+          StructField(s"nested${depth}Col$i", IntegerType, nullable = false))
+      )
+      val initialNestedValues = Row(0 to numColsAtEachDepth: _*)
+
+      var depthCounter = depth - 1
+      var structType = initialNestedStructType
+      var struct = initialNestedValues
+      while (depthCounter != 0) {
+        struct = Row((struct +: (1 to numColsAtEachDepth)): _*)
+        structType = StructType(
+          StructField(s"nested${depthCounter}Col0", structType, nullable = false) +:
+            (1 to numColsAtEachDepth).map(i =>
+              StructField(s"nested${depthCounter}Col$i", IntegerType, nullable = false))
+        )
+        depthCounter -= 1
+      }
+
+      val df: DataFrame = spark.createDataFrame(
+        sparkContext.parallelize(Row(struct) :: Nil),
+        StructType(Seq(StructField("nested0Col0", structType))))
+
+      df
+    }
+
+    val df1 = nestedDf(depth = 10, numColsAtEachDepth = 1)
+    val df2 = nestedDf(depth = 10, numColsAtEachDepth = 20)
+    val union = df1.unionByName(df2, allowMissingColumns = true)
+    // scalastyle:off
+    val row1 = Row(Row(Row(Row(Row(Row(Row(Row(Row(Row(
+      Row(0, 1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null),
+      1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null),
+      1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null),
+      1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null),
+      1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null),
+      1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null),
+      1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null),
+      1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null),
+      1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null),
+      1, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null))
+    val row2 = Row(Row(Row(Row(Row(Row(Row(Row(Row(Row(
+      Row(0, 1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 2, 20, 3, 4, 5, 6, 7, 8, 9),
+      1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 2, 20, 3, 4, 5, 6, 7, 8, 9),
+      1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 2, 20, 3, 4, 5, 6, 7, 8, 9),
+      1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 2, 20, 3, 4, 5, 6, 7, 8, 9),
+      1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 2, 20, 3, 4, 5, 6, 7, 8, 9),
+      1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 2, 20, 3, 4, 5, 6, 7, 8, 9),
+      1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 2, 20, 3, 4, 5, 6, 7, 8, 9),
+      1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 2, 20, 3, 4, 5, 6, 7, 8, 9),
+      1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 2, 20, 3, 4, 5, 6, 7, 8, 9),
+      1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 2, 20, 3, 4, 5, 6, 7, 8, 9))
+    // scalastyle:on
+    checkAnswer(union, row1 :: row2 :: Nil)
+  }
 }
+
+case class UnionClass1a(a: Int, b: Long, nested: UnionClass2)
+case class UnionClass1b(a: Int, b: Long, nested: UnionClass3)
+case class UnionClass1c(a: Int, b: Long, nested: UnionClass4)
+
+case class UnionClass2(a: Int, c: String)
+case class UnionClass3(a: Int, b: Long)
+case class UnionClass4(A: Int, b: Long)

From 0411def0b1da22d3c15fe7c85f6296da41d5d2cd Mon Sep 17 00:00:00 2001
From: Denis Pyshev <git@gemelen.net>
Date: Fri, 16 Oct 2020 16:32:09 -0700
Subject: [PATCH 0264/1009] [SPARK-33109][BUILD] Upgrade to sbt 1.4.0

### What changes were proposed in this pull request?

Upgrade sbt to release 1.4.0

### Why are the changes needed?

Bring built-in `dependencyTree` instead of removed `sbt-dependency-graph` plugin, that doesn't work with sbt used in build.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Should pass all the tests.

Closes #30070 from gemelen/feature/sbt-1.4.

Authored-by: Denis Pyshev <git@gemelen.net>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 project/build.properties | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/project/build.properties b/project/build.properties
index b1e5e313d853f..e391883fbbc2d 100644
--- a/project/build.properties
+++ b/project/build.properties
@@ -14,4 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-sbt.version=1.3.13
+sbt.version=1.4.0

From 911dcd39838eab742f1993776cada57e1c0f6f6c Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Fri, 16 Oct 2020 21:23:21 -0700
Subject: [PATCH 0265/1009] [SPARK-33173][CORE][TESTS] Use `eventually` to
 check `numOnTaskFailed` in PluginContainerSuite

### What changes were proposed in this pull request?

This PR aims to use `eventually` to fix the flakiness of the test case `SPARK-33088: executor failed tasks trigger plugin calls`.

### Why are the changes needed?

The test case checks like the following.
```scala
assert(TestSparkPlugin.executorPlugin.numOnTaskStart == 2)
assert(TestSparkPlugin.executorPlugin.numOnTaskSucceeded == 0)
assert(TestSparkPlugin.executorPlugin.numOnTaskFailed == 2)
```

Although first and second passed, the third can fail.
- https://amplab.cs.berkeley.edu/jenkins/view/Spark%20QA%20Test%20(Dashboard)/job/spark-master-test-maven-hadoop-3.2-hive-2.3-jdk-11/lastCompletedBuild/testReport/org.apache.spark.internal.plugin/PluginContainerSuite/SPARK_33088__executor_failed_tasks_trigger_plugin_calls/
- https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/129919/testReport/
```
sbt.ForkMain$ForkError: org.scalatest.exceptions.TestFailedException: 1 did not equal 2
	at org.scalatest.Assertions.newAssertionFailedException(Assertions.scala:472)
	at org.scalatest.Assertions.newAssertionFailedException$(Assertions.scala:471)
	at org.scalatest.Assertions$.newAssertionFailedException(Assertions.scala:1231)
	at org.scalatest.Assertions$AssertionsHelper.macroAssert(Assertions.scala:1295)
	at org.apache.spark.internal.plugin.PluginContainerSuite.$anonfun$new$8(PluginContainerSuite.scala:161)
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

This only improves the robustness.

Closes #30072 from dongjoon-hyun/SPARK-33173.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/internal/plugin/PluginContainerSuite.scala      | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala b/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala
index e7fbe5b998a88..15966e2744491 100644
--- a/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala
@@ -156,9 +156,11 @@ class PluginContainerSuite extends SparkFunSuite with BeforeAndAfterEach with Lo
       case t: Throwable => // ignore exception
     }
 
-    assert(TestSparkPlugin.executorPlugin.numOnTaskStart == 2)
-    assert(TestSparkPlugin.executorPlugin.numOnTaskSucceeded == 0)
-    assert(TestSparkPlugin.executorPlugin.numOnTaskFailed == 2)
+    eventually(timeout(10.seconds), interval(100.millis)) {
+      assert(TestSparkPlugin.executorPlugin.numOnTaskStart == 2)
+      assert(TestSparkPlugin.executorPlugin.numOnTaskSucceeded == 0)
+      assert(TestSparkPlugin.executorPlugin.numOnTaskFailed == 2)
+    }
   }
 
   test("plugin initialization in non-local mode") {

From 2c4599db4b5de8f7b86af4b4c4b4a43b80e82d1a Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Fri, 16 Oct 2020 22:18:12 -0700
Subject: [PATCH 0266/1009] [MINOR][SS][DOCS] Update Structured Streaming guide
 doc and update code typo

### What changes were proposed in this pull request?

This is a minor change to update structured-streaming-programming-guide and typos in code.

### Why are the changes needed?

Keep the user-facing document correct and updated.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit tests.

Closes #30074 from viirya/ss-minor.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/structured-streaming-programming-guide.md         |  6 ++++--
 .../analysis/UnsupportedOperationChecker.scala         | 10 +++++-----
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index f137915eaa57c..ccd6f41f5c664 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -1763,7 +1763,9 @@ Here is the compatibility matrix.
   <tr>
     <td colspan="2" style="vertical-align: middle;">Queries with <code>mapGroupsWithState</code></td>
     <td style="vertical-align: middle;">Update</td>
-    <td style="vertical-align: middle;"></td>
+    <td style="vertical-align: middle;">
+      Aggregations not allowed in a query with <code>mapGroupsWithState</code>.
+    </td>
   </tr>
   <tr>
     <td rowspan="2" style="vertical-align: middle;">Queries with <code>flatMapGroupsWithState</code></td>
@@ -1777,7 +1779,7 @@ Here is the compatibility matrix.
     <td style="vertical-align: middle;">Update operation mode</td>
     <td style="vertical-align: middle;">Update</td>
     <td style="vertical-align: middle;">
-      Aggregations not allowed after <code>flatMapGroupsWithState</code>.
+      Aggregations not allowed in a query with <code>flatMapGroupsWithState</code>.
     </td>
   </tr>
   <tr>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index 46f178f3a9ce2..44e8602ba7e81 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -212,11 +212,11 @@ object UnsupportedOperationChecker extends Logging {
         case m: FlatMapGroupsWithState if m.isStreaming =>
 
           // Check compatibility with output modes and aggregations in query
-          val aggsAfterFlatMapGroups = collectStreamingAggregates(plan)
+          val aggsInQuery = collectStreamingAggregates(plan)
 
           if (m.isMapGroupsWithState) {                       // check mapGroupsWithState
             // allowed only in update query output mode and without aggregation
-            if (aggsAfterFlatMapGroups.nonEmpty) {
+            if (aggsInQuery.nonEmpty) {
               throwError(
                 "mapGroupsWithState is not supported with aggregation " +
                   "on a streaming DataFrame/Dataset")
@@ -225,8 +225,8 @@ object UnsupportedOperationChecker extends Logging {
                 "mapGroupsWithState is not supported with " +
                   s"$outputMode output mode on a streaming DataFrame/Dataset")
             }
-          } else {                                           // check latMapGroupsWithState
-            if (aggsAfterFlatMapGroups.isEmpty) {
+          } else {                                           // check flatMapGroupsWithState
+            if (aggsInQuery.isEmpty) {
               // flatMapGroupsWithState without aggregation: operation's output mode must
               // match query output mode
               m.outputMode match {
@@ -252,7 +252,7 @@ object UnsupportedOperationChecker extends Logging {
               } else if (collectStreamingAggregates(m).nonEmpty) {
                 throwError(
                   "flatMapGroupsWithState in append mode is not supported after " +
-                    s"aggregation on a streaming DataFrame/Dataset")
+                    "aggregation on a streaming DataFrame/Dataset")
               }
             }
           }

From 3010e9044e068216d7a7a9ec510453ecbb159f95 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Sat, 17 Oct 2020 21:02:25 -0700
Subject: [PATCH 0267/1009] [SPARK-33170][SQL] Add SQL config to control
 fast-fail behavior in FileFormatWriter

### What changes were proposed in this pull request?

This patch proposes to add a config we can control fast-fail behavior in FileFormatWriter and set it false by default.

### Why are the changes needed?

In SPARK-29649, we catch `FileAlreadyExistsException` in `FileFormatWriter` and fail fast for the task set to prevent task retry.

Due to latest discussion, it is important to be able to keep original behavior that is to retry tasks even `FileAlreadyExistsException` is thrown, because `FileAlreadyExistsException` could be recoverable in some cases.

We are going to add a config we can control this behavior and set it false for fast-fail by default.

### Does this PR introduce _any_ user-facing change?

Yes. By default the task in FileFormatWriter will retry even if `FileAlreadyExistsException` is thrown. This is the behavior before Spark 3.0. User can control fast-fail behavior by enabling it.

### How was this patch tested?

Unit test.

Closes #30073 from viirya/SPARK-33170.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../apache/spark/sql/internal/SQLConf.scala   | 16 ++++++++++
 .../datasources/FileFormatWriter.scala        |  3 +-
 .../spark/sql/sources/InsertSuite.scala       | 32 ++++++++++++-------
 3 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 319387fe854cf..0497c9b7e80b8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2052,6 +2052,20 @@ object SQLConf {
       .stringConf
       .createWithDefault("")
 
+  val FASTFAIL_ON_FILEFORMAT_OUTPUT =
+    buildConf("spark.sql.execution.fastFailOnFileFormatOutput")
+      .internal()
+      .doc("Whether to fast fail task execution when writing output to FileFormat datasource. " +
+        "If this is enabled, in `FileFormatWriter` we will catch `FileAlreadyExistsException` " +
+        "and fast fail output task without further task retry. Only enabling this if you know " +
+        "the `FileAlreadyExistsException` of the output task is unrecoverable, i.e., further " +
+        "task attempts won't be able to success. If the `FileAlreadyExistsException` might be " +
+        "recoverable, you should keep this as disabled and let Spark to retry output tasks. " +
+        "This is disabled by default.")
+      .version("3.0.2")
+      .booleanConf
+      .createWithDefault(false)
+
   object PartitionOverwriteMode extends Enumeration {
     val STATIC, DYNAMIC = Value
   }
@@ -3336,6 +3350,8 @@ class SQLConf extends Serializable with Logging {
   def disabledV2StreamingMicroBatchReaders: String =
     getConf(DISABLED_V2_STREAMING_MICROBATCH_READERS)
 
+  def fastFailFileFormatOutput: Boolean = getConf(FASTFAIL_ON_FILEFORMAT_OUTPUT)
+
   def concatBinaryAsString: Boolean = getConf(CONCAT_BINARY_AS_STRING)
 
   def eltOutputAsString: Boolean = getConf(ELT_OUTPUT_AS_STRING)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
index 219c778b9164a..abb88ae73cabf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
@@ -39,6 +39,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCo
 import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
 import org.apache.spark.sql.execution.{ProjectExec, SortExec, SparkPlan, SQLExecution}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StringType
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.{SerializableConfiguration, Utils}
@@ -283,7 +284,7 @@ object FileFormatWriter extends Logging {
     } catch {
       case e: FetchFailedException =>
         throw e
-      case f: FileAlreadyExistsException =>
+      case f: FileAlreadyExistsException if SQLConf.get.fastFailFileFormatOutput =>
         // If any output file to write already exists, it does not make sense to re-run this task.
         // We throw the exception and let Executor throw ExceptionFailure to abort the job.
         throw new TaskOutputFileAlreadyExistException(f)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index 32c4fb60b8c54..9b5466e8a68f1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -826,21 +826,29 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
   }
 
   test("Stop task set if FileAlreadyExistsException was thrown") {
-    withSQLConf("fs.file.impl" -> classOf[FileExistingTestFileSystem].getName,
-        "fs.file.impl.disable.cache" -> "true") {
-      withTable("t") {
-        sql(
-          """
-            |CREATE TABLE t(i INT, part1 INT) USING PARQUET
-            |PARTITIONED BY (part1)
+    Seq(true, false).foreach { fastFail =>
+      withSQLConf("fs.file.impl" -> classOf[FileExistingTestFileSystem].getName,
+        "fs.file.impl.disable.cache" -> "true",
+        SQLConf.FASTFAIL_ON_FILEFORMAT_OUTPUT.key -> fastFail.toString) {
+        withTable("t") {
+          sql(
+            """
+              |CREATE TABLE t(i INT, part1 INT) USING PARQUET
+              |PARTITIONED BY (part1)
           """.stripMargin)
 
-        val df = Seq((1, 1)).toDF("i", "part1")
-        val err = intercept[SparkException] {
-          df.write.mode("overwrite").format("parquet").insertInto("t")
+          val df = Seq((1, 1)).toDF("i", "part1")
+          val err = intercept[SparkException] {
+            df.write.mode("overwrite").format("parquet").insertInto("t")
+          }
+
+          if (fastFail) {
+            assert(err.getCause.getMessage.contains("can not write to output file: " +
+              "org.apache.hadoop.fs.FileAlreadyExistsException"))
+          } else {
+            assert(err.getCause.getMessage.contains("Task failed while writing rows"))
+          }
         }
-        assert(err.getCause.getMessage.contains("can not write to output file: " +
-          "org.apache.hadoop.fs.FileAlreadyExistsException"))
       }
     }
   }

From 7766a6fb5f66c6b339909ae25d7f01769f580b18 Mon Sep 17 00:00:00 2001
From: Keiji Yoshida <kjmrknsn@gmail.com>
Date: Sun, 18 Oct 2020 16:47:04 +0900
Subject: [PATCH 0268/1009] [MINOR][DOCS][EXAMPLE] Fix the Python
 manual_load_options_csv example

### What changes were proposed in this pull request?
This pull request changes the `sep` parameter's value from `:` to `;` in the example of `examples/src/main/python/sql/datasource.py`. This code snippet is shown on the Spark SQL Guide documentation. The `sep` parameter's value should be `;` since the data in https://github.com/apache/spark/blob/master/examples/src/main/resources/people.csv is separated by `;`.

### Why are the changes needed?
To fix the example code so that it can be executed properly.

### Does this PR introduce _any_ user-facing change?
Yes.
This code snippet is shown on the Spark SQL Guide documentation: https://spark.apache.org/docs/latest/sql-data-sources-load-save-functions.html#manually-specifying-options

### How was this patch tested?
By building the documentation and checking the Spark SQL Guide documentation manually in the local environment.

Closes #30082 from kjmrknsn/fix-example-python-datasource.

Authored-by: Keiji Yoshida <kjmrknsn@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 examples/src/main/python/sql/datasource.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/src/main/python/sql/datasource.py b/examples/src/main/python/sql/datasource.py
index 94a41a7e5e7b4..eecd8c2d84788 100644
--- a/examples/src/main/python/sql/datasource.py
+++ b/examples/src/main/python/sql/datasource.py
@@ -94,7 +94,7 @@ def basic_datasource_example(spark):
 
     # $example on:manual_load_options_csv$
     df = spark.read.load("examples/src/main/resources/people.csv",
-                         format="csv", sep=":", inferSchema="true", header="true")
+                         format="csv", sep=";", inferSchema="true", header="true")
     # $example off:manual_load_options_csv$
 
     # $example on:manual_save_options_orc$

From d2f328aba6f1d218425fe5d41bdec66dcaa33c85 Mon Sep 17 00:00:00 2001
From: Keiji Yoshida <kjmrknsn@gmail.com>
Date: Sun, 18 Oct 2020 17:13:55 +0900
Subject: [PATCH 0269/1009] [MINOR][DOCS] Fix the link to the pickle module
 page in RDD Programming Guide

### What changes were proposed in this pull request?
This pull request changes the link to the pickle module page from https://docs.python.org/2/library/pickle.html to https://docs.python.org/3/library/pickle.html in RDD Programming Guide.

### Why are the changes needed?
Since Python 2 is no longer supported and it is preferable to refer to the pickle module page of Python 3.

### Does this PR introduce _any_ user-facing change?
Yes.
Before: the `Pickle` link's destination page was https://docs.python.org/2/library/pickle.html
After: the `Pickle` link's destination page is https://docs.python.org/3/library/pickle.html

### How was this patch tested?
By building the documentation site and check the link's destination page is changed correctly in the local environment.

Closes #30081 from kjmrknsn/docs-fix-pickle-link.

Authored-by: Keiji Yoshida <kjmrknsn@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/rdd-programming-guide.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/rdd-programming-guide.md b/docs/rdd-programming-guide.md
index b48540dc09ece..acc682b27681b 100644
--- a/docs/rdd-programming-guide.md
+++ b/docs/rdd-programming-guide.md
@@ -1254,7 +1254,7 @@ storage levels is:
 </tr>
 </table>
 
-**Note:** *In Python, stored objects will always be serialized with the [Pickle](https://docs.python.org/2/library/pickle.html) library,
+**Note:** *In Python, stored objects will always be serialized with the [Pickle](https://docs.python.org/3/library/pickle.html) library,
 so it does not matter whether you choose a serialized level. The available storage levels in Python include `MEMORY_ONLY`, `MEMORY_ONLY_2`,
 `MEMORY_AND_DISK`, `MEMORY_AND_DISK_2`, `DISK_ONLY`, `DISK_ONLY_2`, and `DISK_ONLY_3`.*
 

From 20b7b923abc2266cf280b8623d6b5b9b277177ec Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sun, 18 Oct 2020 09:21:07 -0700
Subject: [PATCH 0270/1009] [SPARK-33176][K8S] Use 11-jre-slim as default in
 K8s Dockerfile

### What changes were proposed in this pull request?

This PR aims to use `openjdk:11-jre-slim` as default in K8s Dockerfile.

### Why are the changes needed?

Although Apache Spark supports both Java8/Java11, there is a difference.

1. Java8-built distribution can run both Java8/Java11
2. Java11-built distribution can run on Java11, but not Java8.

In short, we had better use Java11 in Dockerfile to embrace both cases without any issues.

### Does this PR introduce _any_ user-facing change?

Yes. This will remove the change of user frustration when they build with JDK11 and build the image without overriding Java base image.

### How was this patch tested?

Pass the K8s IT.

Closes #30083 from dongjoon-hyun/SPARK-33176.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../kubernetes/docker/src/main/dockerfiles/spark/Dockerfile     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
index a13fe67c9eb72..8c3db7e243d8b 100644
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-ARG java_image_tag=8-jre-slim
+ARG java_image_tag=11-jre-slim
 
 FROM openjdk:${java_image_tag}
 

From ad99f14b4277616b681c91778eba4d9184f8eecf Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Sun, 18 Oct 2020 09:24:44 -0700
Subject: [PATCH 0271/1009] [SPARK-33109][BUILD][FOLLOW-UP] Remove the obsolete
 comment about bringing sbt-dependency-graph back

### What changes were proposed in this pull request?

This PR proposes to remove an obsolete comment about adding the `sbt-dependency-graph` back in SBT plugins.

### Why are the changes needed?

sbt-dependency-graph is now built-in from SBT 1.4.0, see https://github.com/sbt/sbt/releases/tag/v1.4.0.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manually tested `./build/sbt dependencyTree`.

Closes #30085 from HyukjinKwon/SPARK-33109.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 project/plugins.sbt | 2 --
 1 file changed, 2 deletions(-)

diff --git a/project/plugins.sbt b/project/plugins.sbt
index 920aa677f9e92..c33a96772d5a1 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -45,5 +45,3 @@ libraryDependencies += "org.ow2.asm"  % "asm-commons" % "7.2"
 addSbtPlugin("com.simplytyped" % "sbt-antlr4" % "0.8.2")
 
 addSbtPlugin("com.typesafe.sbt" % "sbt-pom-reader" % "2.2.0")
-
-// TODO(SPARK-33109) Upgrade to SBT 1.4 and support `dependencyTree` back

From 97605cd1269987ed5ba3013a5f8497375ce8913e Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Sun, 18 Oct 2020 09:59:50 -0700
Subject: [PATCH 0272/1009] [SPARK-33175][K8S] Detect duplicated mountPaths and
 fail at Spark side

### What changes were proposed in this pull request?

This PR aims to detect duplicate `mountPath`s and stop the job.

### Why are the changes needed?

If there is a conflict on `mountPath`, the pod is created and repeats the following error messages and keeps running. Spark job should not keep running and wasting the cluster resources. We had better fail at Spark side.
```
$ k get pod -l 'spark-role in (driver,executor)'
NAME    READY   STATUS    RESTARTS   AGE
tpcds   1/1     Running   0          33m
```

```
20/10/18 05:09:26 WARN ExecutorPodsSnapshotsStoreImpl: Exception when notifying snapshot subscriber.
io.fabric8.kubernetes.client.KubernetesClientException: Failure executing: POST at: ...
Message: Pod "tpcds-exec-1" is invalid: spec.containers[0].volumeMounts[1].mountPath:
Invalid value: "/data1": must be unique.
...
```

**AFTER THIS PR**
The job will stop with the following error message instead of keeping running.
```
20/10/18 06:58:45 ERROR ExecutorPodsSnapshotsStoreImpl: Going to stop due to IllegalArgumentException
java.lang.IllegalArgumentException: requirement failed: Found duplicated mountPath: `/data1`
```

### Does this PR introduce _any_ user-facing change?

Yes, but this is a bug fix.

### How was this patch tested?

Pass the CI with the newly added test case.

Closes #30084 from dongjoon-hyun/SPARK-33175-2.

Lead-authored-by: Dongjoon Hyun <dhyun@apple.com>
Co-authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../features/MountVolumesFeatureStep.scala    |  5 ++++
 .../k8s/ExecutorPodsSnapshotsStoreImpl.scala  |  3 +++
 .../MountVolumesFeatureStepSuite.scala        | 25 +++++++++++++++++++
 3 files changed, 33 insertions(+)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
index e297656520200..c66756fd69116 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
@@ -49,6 +49,11 @@ private[spark] class MountVolumesFeatureStep(conf: KubernetesConf)
   private def constructVolumes(
     volumeSpecs: Iterable[KubernetesVolumeSpec]
   ): Iterable[(VolumeMount, Volume)] = {
+    val duplicateMountPaths = volumeSpecs.map(_.mountPath).toSeq.groupBy(identity).collect {
+      case (x, ys) if ys.length > 1 => s"'$x'"
+    }
+    require(duplicateMountPaths.isEmpty,
+      s"Found duplicated mountPath: ${duplicateMountPaths.mkString(", ")}")
     volumeSpecs.zipWithIndex.map { case (spec, i) =>
       val volumeMount = new VolumeMountBuilder()
         .withMountPath(spec.mountPath)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreImpl.scala
index 5c192c690eba5..3f2cb485bbb31 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreImpl.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreImpl.scala
@@ -133,6 +133,9 @@ private[spark] class ExecutorPodsSnapshotsStoreImpl(subscribersExecutor: Schedul
             snapshotsBuffer.drainTo(snapshots)
             onNewSnapshots(snapshots.asScala.toSeq)
           } catch {
+            case e: IllegalArgumentException =>
+              logError("Going to stop due to IllegalArgumentException", e)
+              System.exit(1)
             case NonFatal(e) => logWarning("Exception when notifying snapshot subscriber.", e)
           } finally {
             lock.unlock()
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
index e95af264d09ec..bbb89fd0a1c24 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
@@ -236,6 +236,31 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
     assert(configuredPod.container.getVolumeMounts.size() === 2)
   }
 
+  test("mountPath should be unique") {
+    val hpVolumeConf = KubernetesVolumeSpec(
+      "hpVolume",
+      "/data",
+      "",
+      false,
+      KubernetesHostPathVolumeConf("/hostPath/tmp")
+    )
+    val pvcVolumeConf = KubernetesVolumeSpec(
+      "checkpointVolume",
+      "/data",
+      "",
+      true,
+      KubernetesPVCVolumeConf("pvcClaim")
+    )
+    val kubernetesConf = KubernetesTestConf.createDriverConf(
+      volumes = Seq(hpVolumeConf, pvcVolumeConf))
+
+    val step = new MountVolumesFeatureStep(kubernetesConf)
+    val m = intercept[IllegalArgumentException] {
+      step.configurePod(SparkPod.initialPod())
+    }.getMessage
+    assert(m.contains("Found duplicated mountPath: '/data'"))
+  }
+
   test("Mounts subpath on emptyDir") {
     val volumeConf = KubernetesVolumeSpec(
       "testVolume",

From ce498943d23e1660ba2b724e8831739f3b8a0bbf Mon Sep 17 00:00:00 2001
From: "tanel.kiis@gmail.com" <tanel.kiis@gmail.com>
Date: Mon, 19 Oct 2020 09:50:59 +0900
Subject: [PATCH 0273/1009] [SPARK-33177][SQL] CollectList and CollectSet
 should not be nullable

### What changes were proposed in this pull request?

Mark `CollectList` and `CollectSet` as non-nullable.

### Why are the changes needed?

`CollectList` and `CollectSet` SQL expressions never return null value. Marking them as non-nullable can have some performance benefits, because some optimizer rules apply only to non-nullable expressions

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Did not find any existing tests on the nullability of aggregate functions.

Closes #30087 from tanelk/SPARK-33177_collect.

Authored-by: tanel.kiis@gmail.com <tanel.kiis@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../spark/sql/catalyst/expressions/aggregate/collect.scala      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
index 0a3d87623be8b..f95f44c808092 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
@@ -39,7 +39,7 @@ abstract class Collect[T <: Growable[Any] with Iterable[Any]] extends TypedImper
 
   override def children: Seq[Expression] = child :: Nil
 
-  override def nullable: Boolean = true
+  override def nullable: Boolean = false
 
   override def dataType: DataType = ArrayType(child.dataType, false)
 

From f8277d3aa308d267ff0423f85ffd884480cedf59 Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Sun, 18 Oct 2020 19:02:21 -0700
Subject: [PATCH 0274/1009] [SPARK-32069][CORE][SQL] Improve error message on
 reading unexpected directory

### What changes were proposed in this pull request?
Improve error message on reading unexpected directory

### Why are the changes needed?
Improve error message on reading unexpected directory

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Ut

Closes #30027 from AngersZhuuuu/SPARK-32069.

Authored-by: angerszhu <angers.zhu@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/rdd/HadoopRDD.scala      |  4 ++++
 .../sql/hive/HiveParquetSourceSuite.scala     | 16 +++++++++----
 .../hive/execution/HiveTableScanSuite.scala   | 23 +++++++++++++++++++
 3 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index d5f21112c0c9e..5fc0b4f736d55 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -232,6 +232,10 @@ class HadoopRDD[K, V](
         logWarning(s"${jobConf.get(FileInputFormat.INPUT_DIR)} doesn't exist and no" +
             s" partitions returned from this path.", e)
         Array.empty[Partition]
+      case e: IOException if e.getMessage.startsWith("Not a file:") =>
+        val path = e.getMessage.split(":").map(_.trim).apply(2)
+        throw new IOException(s"Path: ${path} is a directory, which is not supported by the " +
+          s"record reader when `mapreduce.input.fileinputformat.input.dir.recursive` is false.")
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
index b557fe73f1154..86fc32cd8ca63 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
@@ -230,6 +230,12 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest {
         withTempPath { path =>
           withTable("parq_tbl1", "parq_tbl2", "parq_tbl3",
             "tbl1", "tbl2", "tbl3", "tbl4", "tbl5", "tbl6") {
+
+            def checkErrorMsg(path: String): String = {
+              s"Path: ${path} is a directory, which is not supported by the record reader " +
+                s"when `mapreduce.input.fileinputformat.input.dir.recursive` is false."
+            }
+
             val parquetTblStatement1 =
               s"""
                  |CREATE EXTERNAL TABLE parq_tbl1(
@@ -287,7 +293,7 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest {
               val msg = intercept[IOException] {
                 sql("SELECT * FROM tbl1").show()
               }.getMessage
-              assert(msg.contains("Not a file:"))
+              assert(msg.contains(checkErrorMsg(s"$path/l1")))
             }
 
             val l1DirStatement =
@@ -305,7 +311,7 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest {
               val msg = intercept[IOException] {
                 sql("SELECT * FROM tbl2").show()
               }.getMessage
-              assert(msg.contains("Not a file:"))
+              assert(msg.contains(checkErrorMsg(s"$path/l1/l2")))
             }
 
             val l2DirStatement =
@@ -323,7 +329,7 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest {
               val msg = intercept[IOException] {
                 sql("SELECT * FROM tbl3").show()
               }.getMessage
-              assert(msg.contains("Not a file:"))
+              assert(msg.contains(checkErrorMsg(s"$path/l1/l2/l3")))
             }
 
             val wildcardTopDirStatement =
@@ -341,7 +347,7 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest {
               val msg = intercept[IOException] {
                 sql("SELECT * FROM tbl4").show()
               }.getMessage
-              assert(msg.contains("Not a file:"))
+              assert(msg.contains(checkErrorMsg(s"$path/l1/l2")))
             }
 
             val wildcardL1DirStatement =
@@ -359,7 +365,7 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest {
               val msg = intercept[IOException] {
                 sql("SELECT * FROM tbl5").show()
               }.getMessage
-              assert(msg.contains("Not a file:"))
+              assert(msg.contains(checkErrorMsg(s"$path/l1/l2/l3")))
             }
 
             val wildcardL2DirStatement =
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
index bdccfccbc5bdb..ba6dbb01d5901 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.hive.execution
 
+import java.io.{File, IOException}
+
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.hive.HiveUtils
@@ -248,6 +250,27 @@ class HiveTableScanSuite extends HiveComparisonTest with SQLTestUtils with TestH
     }
   }
 
+  test("SPARK-32069: Improve error message on reading unexpected directory") {
+    withTable("t") {
+      withTempDir { f =>
+        sql(s"CREATE TABLE t(i LONG) LOCATION '${f.getAbsolutePath}'")
+        sql("INSERT INTO t VALUES(1)")
+        val dir = new File(f.getCanonicalPath + "/data")
+        dir.mkdir()
+        sql("set mapreduce.input.fileinputformat.input.dir.recursive=true")
+        assert(sql("select * from t").collect().head.getLong(0) == 1)
+        sql("set mapreduce.input.fileinputformat.input.dir.recursive=false")
+        val e = intercept[IOException] {
+          sql("SELECT * FROM t").collect()
+        }
+        assert(e.getMessage.contains(s"Path: ${dir.getAbsoluteFile} is a directory, " +
+          s"which is not supported by the record reader " +
+          s"when `mapreduce.input.fileinputformat.input.dir.recursive` is false."))
+        dir.delete()
+      }
+    }
+  }
+
   private def getHiveTableScanExec(query: String): HiveTableScanExec = {
     sql(query).queryExecution.sparkPlan.collectFirst {
       case p: HiveTableScanExec => p

From e6c53c2c1b538d6272df4d1ca294d04f8b49bd6c Mon Sep 17 00:00:00 2001
From: William Hyun <williamhyun3@gmail.com>
Date: Mon, 19 Oct 2020 14:13:37 +0900
Subject: [PATCH 0275/1009] [SPARK-33123][INFRA] Ignore GitHub only changes in
 Amplab Jenkins build

### What changes were proposed in this pull request?

This PR aims to ignore GitHub only changes in Amplab Jenkins build.

### Why are the changes needed?

This will save server resources.

### Does this PR introduce _any_ user-facing change?

No, this is a dev-only change.

### How was this patch tested?

Manually. I used the following doctest during testing and removed it at the clean-up.

E2E tests:

```
cd dev
cat test.py
```

```python
import importlib
runtests = importlib.import_module("run-tests")
print([x.name for x in runtests.determine_modules_for_files([".github/workflows/build_and_test.yml"])])
```

```python
$ GITHUB_ACTIONS=1 python test.py
['root']
$ python test.py
[]
```

Unittests:

```bash
$ GITHUN_ACTIONS=1 python3 -m doctest dev/run-tests.py
$ python3 -m doctest dev/run-tests.py
```

Closes #30020 from williamhyun/SPARK-33123.

Lead-authored-by: William Hyun <williamhyun3@gmail.com>
Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 dev/run-tests.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/dev/run-tests.py b/dev/run-tests.py
index 48191e9bb024d..662ac2d6311dd 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -42,7 +42,8 @@ def determine_modules_for_files(filenames):
     """
     Given a list of filenames, return the set of modules that contain those files.
     If a file is not associated with a more specific submodule, then this method will consider that
-    file to belong to the 'root' module. GitHub Action and Appveyor files are ignored.
+    file to belong to the 'root' module. `.github` directory is counted only in GitHub Actions,
+    and `appveyor.yml` is always ignored because this file is dedicated only to AppVeyor builds.
 
     >>> sorted(x.name for x in determine_modules_for_files(["python/pyspark/a.py", "sql/core/foo"]))
     ['pyspark-core', 'sql']
@@ -55,6 +56,8 @@ def determine_modules_for_files(filenames):
     for filename in filenames:
         if filename in ("appveyor.yml",):
             continue
+        if ("GITHUB_ACTIONS" not in os.environ) and filename.startswith(".github"):
+            continue
         matched_at_least_one_module = False
         for module in modules.all_modules:
             if module.contains_file(filename):

From 53783e706dde943adee978a8eeee95a6f60687bd Mon Sep 17 00:00:00 2001
From: William Hyun <williamhyun3@gmail.com>
Date: Mon, 19 Oct 2020 15:54:52 +0900
Subject: [PATCH 0276/1009] [SPARK-33179][TESTS] Switch default Hadoop profile
 in run-tests.py

### What changes were proposed in this pull request?

This PR aims to switch the default Hadoop profile from `hadoop2.7` to `hadoop3.2` in `dev/run-tests.py` when it's running in local or GitHub Action environments.

### Why are the changes needed?

The default Hadoop version is 3.2. We had better be consistent.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Manually.

**BEFORE**
```
% dev/run-tests.py
Cannot install SparkR as R was not found in PATH
[info] Using build tool sbt with Hadoop profile hadoop2.7 and Hive profile hive2.3 under environment local
```

**AFTER**
```
% dev/run-tests.py
Cannot install SparkR as R was not found in PATH
[info] Using build tool sbt with Hadoop profile hadoop3.2 and Hive profile hive2.3 under environment local
```

Closes #30090 from williamhyun/SPARK-33179.

Authored-by: William Hyun <williamhyun3@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 dev/run-tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/run-tests.py b/dev/run-tests.py
index 662ac2d6311dd..5bdbc0ffb850c 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -638,7 +638,7 @@ def main():
     else:
         # else we're running locally or Github Actions.
         build_tool = "sbt"
-        hadoop_version = os.environ.get("HADOOP_PROFILE", "hadoop2.7")
+        hadoop_version = os.environ.get("HADOOP_PROFILE", "hadoop3.2")
         hive_version = os.environ.get("HIVE_PROFILE", "hive2.3")
         if "GITHUB_ACTIONS" in os.environ:
             test_env = "github_actions"

From 388e067a909516a9a509399fe17d79ce1fb54d31 Mon Sep 17 00:00:00 2001
From: "xuewei.linxuewei" <xuewei.linxuewei@alibaba-inc.com>
Date: Mon, 19 Oct 2020 16:40:48 +0900
Subject: [PATCH 0277/1009] [SPARK-33139][SQL][FOLLOW-UP] Avoid using reflect
 call on session.py

### What changes were proposed in this pull request?

In [SPARK-33139](https://github.com/apache/spark/pull/30042), I was using reflect "Class.forName" in python code to invoke method in SparkSession which is not recommended. using getattr to access "SparkSession$.Module$" instead.

### Why are the changes needed?

Code refine.

### Does this PR introduce any user-facing change?
No.

### How was this patch tested?

Existing tests.

Closes #30092 from leanken/leanken-SPARK-33139-followup.

Authored-by: xuewei.linxuewei <xuewei.linxuewei@alibaba-inc.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/sql/session.py | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index e6ab1ea3878f3..d724b76e3bfc3 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -230,9 +230,7 @@ def __init__(self, sparkContext, jsparkSession=None):
             SparkSession._instantiatedSession = self
             SparkSession._activeSession = self
             self._jvm.SparkSession.setDefaultSession(self._jsparkSession)
-            self._jvm.java.lang.Class.forName("org.apache.spark.sql.SparkSession$")\
-                .getDeclaredField("MODULE$")\
-                .get(None)\
+            getattr(getattr(self._jvm, "SparkSession$"), "MODULE$")\
                 .setActiveSessionInternal(self._jsparkSession)
 
     def _repr_html_(self):
@@ -564,9 +562,7 @@ def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=Tr
         Py4JJavaError: ...
         """
         SparkSession._activeSession = self
-        self._jvm.java.lang.Class.forName("org.apache.spark.sql.SparkSession$")\
-            .getDeclaredField("MODULE$")\
-            .get(None)\
+        getattr(getattr(self._jvm, "SparkSession$"), "MODULE$")\
             .setActiveSessionInternal(self._jsparkSession)
         if isinstance(data, DataFrame):
             raise TypeError("data is already a DataFrame")
@@ -689,10 +685,7 @@ def stop(self):
         self._sc.stop()
         # We should clean the default session up. See SPARK-23228.
         self._jvm.SparkSession.clearDefaultSession()
-        self._jvm.java.lang.Class.forName("org.apache.spark.sql.SparkSession$")\
-            .getDeclaredField("MODULE$")\
-            .get(None)\
-            .clearActiveSessionInternal()
+        getattr(getattr(self._jvm, "SparkSession$"), "MODULE$").clearActiveSessionInternal()
         SparkSession._instantiatedSession = None
         SparkSession._activeSession = None
         SQLContext._instantiatedContext = None

From a7a8dae4836f455a26ba6cb3c7d733775b6af0f6 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Mon, 19 Oct 2020 17:13:47 +0900
Subject: [PATCH 0278/1009] Revert "[SPARK-33069][INFRA] Skip test result
 report if no JUnit XML files are found"

This reverts commit a0aa8f33a9420feb9228b51a3dfad2e7e86d65a5.
---
 .github/workflows/test_report.yml | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/.github/workflows/test_report.yml b/.github/workflows/test_report.yml
index 060a8795b6a77..93cdb86687261 100644
--- a/.github/workflows/test_report.yml
+++ b/.github/workflows/test_report.yml
@@ -15,16 +15,7 @@ jobs:
         github_token: ${{ secrets.GITHUB_TOKEN }}
         workflow: ${{ github.event.workflow_run.workflow_id }}
         commit: ${{ github.event.workflow_run.head_commit.id }}
-    - name: Check if JUnit report XML files exist
-      run: |
-        if ls **/target/test-reports/*.xml > /dev/null 2>&1; then
-          echo '::set-output name=FILE_EXISTS::true'
-        else
-          echo '::set-output name=FILE_EXISTS::false'
-        fi
-      id: check-junit-file
     - name: Publish test report
-      if: steps.check-junit-file.outputs.FILE_EXISTS == 'true'
       uses: scacap/action-surefire-report@v1
       with:
         check_name: Report test results

From 26b13c70c312147e42db27cd986e970115a55cdd Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 19 Oct 2020 17:47:49 +0900
Subject: [PATCH 0279/1009] [SPARK-33169][SQL][TESTS] Check propagation of
 datasource options to underlying file system for built-in file-based
 datasources

### What changes were proposed in this pull request?
1. Add the common trait `CommonFileDataSourceSuite` with tests that can be executed for all built-in file-based datasources.
2. Add a test `CommonFileDataSourceSuite` to check that datasource options are propagated to underlying file systems as Hadoop configs.
3. Mix `CommonFileDataSourceSuite` to `AvroSuite`, `OrcSourceSuite`, `TextSuite`, `JsonSuite`, CSVSuite` and to `ParquetFileFormatSuite`.
4. Remove duplicated tests from `AvroSuite` and from `OrcSourceSuite`.

### Why are the changes needed?
To improve test coverage and test all built-in file-based datasources.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running the affected test suites.

Closes #30067 from MaxGekk/ds-options-common-test.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../org/apache/spark/sql/avro/AvroSuite.scala | 20 +++---
 .../source/libsvm/LibSVMRelationSuite.scala   | 28 +++++----
 .../spark/sql/FileBasedDataSourceSuite.scala  | 16 -----
 .../CommonFileDataSourceSuite.scala           | 62 +++++++++++++++++++
 .../execution/datasources/csv/CSVSuite.scala  | 10 ++-
 .../datasources/json/JsonSuite.scala          | 11 +++-
 .../datasources/orc/OrcSourceSuite.scala      | 23 ++-----
 .../parquet/ParquetFileFormatSuite.scala      | 25 +++++++-
 .../datasources/text/TextSuite.scala          |  5 +-
 9 files changed, 136 insertions(+), 64 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CommonFileDataSourceSuite.scala

diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index b0f2f8ed09a96..52cab880ab897 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -40,7 +40,7 @@ import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.plans.logical.Filter
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{withDefaultTimeZone, LA, UTC}
 import org.apache.spark.sql.execution.{FormattedMode, SparkPlan}
-import org.apache.spark.sql.execution.datasources.{DataSource, FilePartition}
+import org.apache.spark.sql.execution.datasources.{CommonFileDataSourceSuite, DataSource, FilePartition}
 import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.internal.SQLConf
@@ -50,9 +50,15 @@ import org.apache.spark.sql.types._
 import org.apache.spark.sql.v2.avro.AvroScan
 import org.apache.spark.util.Utils
 
-abstract class AvroSuite extends QueryTest with SharedSparkSession with NestedDataSourceSuiteBase {
+abstract class AvroSuite
+  extends QueryTest
+  with SharedSparkSession
+  with CommonFileDataSourceSuite
+  with NestedDataSourceSuiteBase {
+
   import testImplicits._
 
+  override protected def dataSourceFormat = "avro"
   override val nestedDataSources = Seq("avro")
   val episodesAvro = testFile("episodes.avro")
   val testAvro = testFile("test.avro")
@@ -1807,16 +1813,6 @@ abstract class AvroSuite extends QueryTest with SharedSparkSession with NestedDa
     }
   }
 
-  test("SPARK-33089: should propagate Hadoop config from DS options to underlying file system") {
-    withSQLConf(
-      "fs.file.impl" -> classOf[FakeFileSystemRequiringDSOption].getName,
-      "fs.file.impl.disable.cache" -> "true") {
-      val conf = Map("ds_option" -> "value")
-      val path = "file:" + testAvro.stripPrefix("file:")
-      spark.read.format("avro").options(conf).load(path)
-    }
-  }
-
   test("SPARK-33163: write the metadata key 'org.apache.spark.legacyDateTime'") {
     def saveTs(dir: java.io.File): Unit = {
       Seq(Timestamp.valueOf("2020-10-15 01:02:03")).toDF()
diff --git a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
index cc0ca308cb668..a456409cfe3bc 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
@@ -27,13 +27,26 @@ import org.apache.spark.ml.attribute.AttributeGroup
 import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
 import org.apache.spark.ml.linalg.SQLDataTypes.VectorType
 import org.apache.spark.mllib.util.MLlibTestSparkContext
-import org.apache.spark.sql.{FakeFileSystemRequiringDSOption, Row, SaveMode}
-import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.{Row, SaveMode}
+import org.apache.spark.sql.execution.datasources.CommonFileDataSourceSuite
 import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
 import org.apache.spark.util.Utils
 
+class LibSVMRelationSuite
+  extends SparkFunSuite
+  with MLlibTestSparkContext
+  with CommonFileDataSourceSuite {
+
+  override protected def dataSourceFormat = "libsvm"
+  override protected def inputDataset = {
+    val rawData = new java.util.ArrayList[Row]()
+    rawData.add(Row(1.0, Vectors.sparse(1, Seq((0, 1.0)))))
+    val struct = new StructType()
+      .add("labelFoo", DoubleType, false)
+      .add("featuresBar", VectorType, false)
+    spark.createDataFrame(rawData, struct)
+  }
 
-class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext with SQLHelper {
   // Path for dataset
   var path: String = _
 
@@ -212,13 +225,4 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext with
       assert(v == Vectors.sparse(2, Seq((0, 2.0), (1, 3.0))))
     }
   }
-
-  test("SPARK-33101: should propagate Hadoop config from DS options to underlying file system") {
-    withSQLConf(
-      "fs.file.impl" -> classOf[FakeFileSystemRequiringDSOption].getName,
-      "fs.file.impl.disable.cache" -> "true") {
-      val df = spark.read.option("ds_option", "value").format("libsvm").load(path)
-      assert(df.columns(0) == "label")
-    }
-  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index 77e07e5550f35..b27c1145181bd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -842,22 +842,6 @@ class FileBasedDataSourceSuite extends QueryTest
     }
   }
 
-  test("SPARK-31935: Hadoop file system config should be effective in data source options") {
-    Seq("parquet", "").foreach { format =>
-      withSQLConf(
-        SQLConf.USE_V1_SOURCE_LIST.key -> format,
-        "fs.file.impl" -> classOf[FakeFileSystemRequiringDSOption].getName,
-        "fs.file.impl.disable.cache" -> "true") {
-        withTempDir { dir =>
-          val path = "file:" + dir.getCanonicalPath.stripPrefix("file:")
-          spark.range(10).write.option("ds_option", "value").mode("overwrite").parquet(path)
-          checkAnswer(
-            spark.read.option("ds_option", "value").parquet(path), spark.range(10).toDF())
-        }
-      }
-    }
-  }
-
   test("SPARK-31116: Select nested schema with case insensitive mode") {
     // This test case failed at only Parquet. ORC is added for test coverage parity.
     Seq("orc", "parquet").foreach { format =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CommonFileDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CommonFileDataSourceSuite.scala
new file mode 100644
index 0000000000000..b7d0a7fc306ad
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CommonFileDataSourceSuite.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import org.scalatest.funsuite.AnyFunSuite
+
+import org.apache.spark.sql.{Dataset, Encoders, FakeFileSystemRequiringDSOption, SparkSession}
+import org.apache.spark.sql.catalyst.plans.SQLHelper
+
+/**
+ * The trait contains tests for all file-based data sources.
+ * The tests that are not applicable to all file-based data sources should be placed to
+ * [[org.apache.spark.sql.FileBasedDataSourceSuite]].
+ */
+trait CommonFileDataSourceSuite extends SQLHelper { self: AnyFunSuite =>
+
+  protected def spark: SparkSession
+  protected def dataSourceFormat: String
+  protected def inputDataset: Dataset[_] = spark.createDataset(Seq("abc"))(Encoders.STRING)
+
+  test(s"Propagate Hadoop configs from $dataSourceFormat options to underlying file system") {
+    withSQLConf(
+      "fs.file.impl" -> classOf[FakeFileSystemRequiringDSOption].getName,
+      "fs.file.impl.disable.cache" -> "true") {
+      Seq(false, true).foreach { mergeSchema =>
+        withTempPath { dir =>
+          val path = dir.getAbsolutePath
+          val conf = Map("ds_option" -> "value", "mergeSchema" -> mergeSchema.toString)
+          inputDataset
+            .write
+            .options(conf)
+            .format(dataSourceFormat)
+            .save(path)
+          Seq(path, "file:" + path.stripPrefix("file:")).foreach { p =>
+            val readback = spark
+              .read
+              .options(conf)
+              .format(dataSourceFormat)
+              .load(p)
+            // Checks that read doesn't throw the exception from `FakeFileSystemRequiringDSOption`
+            readback.write.mode("overwrite").format("noop").save()
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index 066259075d6bf..a236814fdcdcd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -36,13 +36,21 @@ import org.apache.hadoop.io.compress.GzipCodec
 import org.apache.spark.{SparkConf, SparkException, TestUtils}
 import org.apache.spark.sql.{AnalysisException, Column, DataFrame, QueryTest, Row}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.execution.datasources.CommonFileDataSourceSuite
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 
-abstract class CSVSuite extends QueryTest with SharedSparkSession with TestCsvData {
+abstract class CSVSuite
+  extends QueryTest
+  with SharedSparkSession
+  with TestCsvData
+  with CommonFileDataSourceSuite {
+
   import testImplicits._
 
+  override protected def dataSourceFormat = "csv"
+
   private val carsFile = "test-data/cars.csv"
   private val carsMalformedFile = "test-data/cars-malformed.csv"
   private val carsFile8859 = "test-data/cars_iso-8859-1.csv"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index d9270024d5b28..76e05a2ed6ed7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.{functions => F, _}
 import org.apache.spark.sql.catalyst.json._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.ExternalRDD
-import org.apache.spark.sql.execution.datasources.{DataSource, InMemoryFileIndex, NoopCache}
+import org.apache.spark.sql.execution.datasources.{CommonFileDataSourceSuite, DataSource, InMemoryFileIndex, NoopCache}
 import org.apache.spark.sql.execution.datasources.v2.json.JsonScanBuilder
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
@@ -49,9 +49,16 @@ class TestFileFilter extends PathFilter {
   override def accept(path: Path): Boolean = path.getParent.getName != "p=2"
 }
 
-abstract class JsonSuite extends QueryTest with SharedSparkSession with TestJsonData {
+abstract class JsonSuite
+  extends QueryTest
+  with SharedSparkSession
+  with TestJsonData
+  with CommonFileDataSourceSuite {
+
   import testImplicits._
 
+  override protected def dataSourceFormat = "json"
+
   test("Type promotion") {
     def checkTypePromotion(expected: Any, actual: Any): Unit = {
       assert(expected.getClass == actual.getClass,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
index 1242b8c693d64..4c489bdcc649e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
@@ -32,8 +32,8 @@ import org.apache.orc.impl.RecordReaderImpl
 import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.{SPARK_VERSION_SHORT, SparkException}
-import org.apache.spark.sql.{FakeFileSystemRequiringDSOption, Row, SPARK_VERSION_METADATA_KEY}
-import org.apache.spark.sql.execution.datasources.SchemaMergeUtils
+import org.apache.spark.sql.{Row, SPARK_VERSION_METADATA_KEY}
+import org.apache.spark.sql.execution.datasources.{CommonFileDataSourceSuite, SchemaMergeUtils}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{LongType, StructField, StructType}
@@ -41,9 +41,11 @@ import org.apache.spark.util.Utils
 
 case class OrcData(intField: Int, stringField: String)
 
-abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
+abstract class OrcSuite extends OrcTest with BeforeAndAfterAll with CommonFileDataSourceSuite {
   import testImplicits._
 
+  override protected def dataSourceFormat = "orc"
+
   var orcTableDir: File = null
   var orcTableAsDir: File = null
 
@@ -537,21 +539,6 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
       }
     }
   }
-
-  test("SPARK-33094: should propagate Hadoop config from DS options to underlying file system") {
-    withSQLConf(
-      "fs.file.impl" -> classOf[FakeFileSystemRequiringDSOption].getName,
-      "fs.file.impl.disable.cache" -> "true") {
-      Seq(false, true).foreach { mergeSchema =>
-        withTempPath { dir =>
-          val path = dir.getAbsolutePath
-          val conf = Map("ds_option" -> "value", "mergeSchema" -> mergeSchema.toString)
-          spark.range(1).write.options(conf).orc(path)
-          checkAnswer(spark.read.options(conf).orc(path), Row(0))
-        }
-      }
-    }
-  }
 }
 
 class OrcSourceSuite extends OrcSuite with SharedSparkSession {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala
index e65f4d12bf7f2..c52b57eb31e4d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala
@@ -19,12 +19,19 @@ package org.apache.spark.sql.execution.datasources.parquet
 
 import org.apache.hadoop.fs.{FileSystem, Path}
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.execution.datasources.CommonFileDataSourceSuite
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 
-class ParquetFileFormatSuite extends QueryTest with ParquetTest with SharedSparkSession {
+abstract class ParquetFileFormatSuite
+  extends QueryTest
+  with ParquetTest
+  with SharedSparkSession
+  with CommonFileDataSourceSuite {
+
+  override protected def dataSourceFormat = "parquet"
 
   test("read parquet footers in parallel") {
     def testReadFooters(ignoreCorruptFiles: Boolean): Unit = {
@@ -57,3 +64,17 @@ class ParquetFileFormatSuite extends QueryTest with ParquetTest with SharedSpark
     assert(exception.getMessage().contains("Could not read footer for file"))
   }
 }
+
+class ParquetFileFormatV1Suite extends ParquetFileFormatSuite {
+  override protected def sparkConf: SparkConf =
+    super
+      .sparkConf
+      .set(SQLConf.USE_V1_SOURCE_LIST, "parquet")
+}
+
+class ParquetFileFormatV2Suite extends ParquetFileFormatSuite {
+  override protected def sparkConf: SparkConf =
+    super
+      .sparkConf
+      .set(SQLConf.USE_V1_SOURCE_LIST, "")
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala
index 7e97994476694..1eb32ed285799 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala
@@ -26,14 +26,17 @@ import org.apache.hadoop.io.compress.GzipCodec
 
 import org.apache.spark.{SparkConf, TestUtils}
 import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row, SaveMode}
+import org.apache.spark.sql.execution.datasources.CommonFileDataSourceSuite
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{StringType, StructType}
 import org.apache.spark.util.Utils
 
-abstract class TextSuite extends QueryTest with SharedSparkSession {
+abstract class TextSuite extends QueryTest with SharedSparkSession with CommonFileDataSourceSuite {
   import testImplicits._
 
+  override protected def dataSourceFormat = "text"
+
   test("reading text file") {
     verifyFrame(spark.read.format("text").load(testFile))
   }

From 66c5e0132209a5a94f9d7efb5e895f143b0ef53b Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 19 Oct 2020 10:35:34 -0700
Subject: [PATCH 0280/1009] [SPARK-32941][SQL] Optimize UpdateFields expression
 chain and put the rule early in Analysis phase

### What changes were proposed in this pull request?

This patch proposes to add more optimization to `UpdateFields` expression chain. And optimize `UpdateFields` early in analysis phase.

### Why are the changes needed?

`UpdateFields` can manipulate complex nested data, but using `UpdateFields` can easily create inefficient expression chain. We should optimize it further.

Because when manipulating deeply nested schema, the `UpdateFields` expression tree could be too complex to analyze, this change optimizes `UpdateFields` early in analysis phase.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Unit test.

Closes #29812 from viirya/SPARK-32941.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  6 ++
 .../sql/catalyst/analysis/ResolveUnion.scala  | 11 +---
 .../sql/catalyst/optimizer/ComplexTypes.scala |  7 +-
 .../sql/catalyst/optimizer/Optimizer.scala    |  2 +-
 .../sql/catalyst/optimizer/UpdateFields.scala | 57 +++++++++++++++--
 ...te.scala => OptimizeWithFieldsSuite.scala} | 64 +++++++++++++++++--
 .../optimizer/complexTypesSuite.scala         |  6 +-
 7 files changed, 129 insertions(+), 24 deletions(-)
 rename sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/{CombineUpdateFieldsSuite.scala => OptimizeWithFieldsSuite.scala} (51%)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 0ba150ec1efb4..4264627e0d9bd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -33,6 +33,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.SubExprUtils._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.expressions.objects._
+import org.apache.spark.sql.catalyst.optimizer.OptimizeUpdateFields
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
@@ -207,6 +208,11 @@ class Analyzer(
 
   lazy val batches: Seq[Batch] = Seq(
     Batch("Substitution", fixedPoint,
+      // This rule optimizes `UpdateFields` expression chains so looks more like optimization rule.
+      // However, when manipulating deeply nested schema, `UpdateFields` expression tree could be
+      // very complex and make analysis impossible. Thus we need to optimize `UpdateFields` early
+      // at the beginning of analysis.
+      OptimizeUpdateFields,
       CTESubstitution,
       WindowsSubstitution,
       EliminateUnions,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUnion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUnion.scala
index c1a9c9d3d9bab..b08e116642ece 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUnion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUnion.scala
@@ -21,7 +21,7 @@ import scala.collection.mutable
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.optimizer.CombineUnions
+import org.apache.spark.sql.catalyst.optimizer.{CombineUnions, OptimizeUpdateFields}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, Union}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.internal.SQLConf
@@ -88,13 +88,6 @@ object ResolveUnion extends Rule[LogicalPlan] {
       }
   }
 
-  def simplifyWithFields(expr: Expression): Expression = {
-    expr.transformUp {
-      case UpdateFields(UpdateFields(struct, fieldOps1), fieldOps2) =>
-        UpdateFields(struct, fieldOps1 ++ fieldOps2)
-    }
-  }
-
   /**
    * Adds missing fields recursively into given `col` expression, based on the target `StructType`.
    * This is called by `compareAndAddFields` when we find two struct columns with same name but
@@ -119,7 +112,7 @@ object ResolveUnion extends Rule[LogicalPlan] {
       missingFieldsOpt.map { s =>
         val struct = addFieldsInto(col, s.fields)
         // Combines `WithFields`s to reduce expression tree.
-        val reducedStruct = simplifyWithFields(struct)
+        val reducedStruct = struct.transformUp(OptimizeUpdateFields.optimizeUpdateFields)
         val sorted = sortStructFieldsInWithFields(reducedStruct)
         sorted
       }.get
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
index 860219e55b052..2ac8f62b67b3d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
@@ -46,7 +46,12 @@ object SimplifyExtractValueOps extends Rule[LogicalPlan] {
           // if the struct itself is null, then any value extracted from it (expr) will be null
           // so we don't need to wrap expr in If(IsNull(struct), Literal(null, expr.dataType), expr)
           case expr: GetStructField if expr.child.semanticEquals(structExpr) => expr
-          case expr => If(IsNull(structExpr), Literal(null, expr.dataType), expr)
+          case expr =>
+            if (structExpr.nullable) {
+              If(IsNull(structExpr), Literal(null, expr.dataType), expr)
+            } else {
+              expr
+            }
         }
       // Remove redundant array indexing.
       case GetArrayStructFields(CreateArray(elems, useStringTypeWhenEmpty), field, ordinal, _, _) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 7586bdf4392f5..3e9a97419682d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -109,7 +109,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
         RemoveRedundantAliases,
         UnwrapCastInBinaryComparison,
         RemoveNoopOperators,
-        CombineUpdateFields,
+        OptimizeUpdateFields,
         SimplifyExtractValueOps,
         OptimizeJsonExprs,
         CombineConcats) ++
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UpdateFields.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UpdateFields.scala
index c7154210e0c62..465d2efe2775c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UpdateFields.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UpdateFields.scala
@@ -17,19 +17,68 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import org.apache.spark.sql.catalyst.expressions.UpdateFields
+import java.util.Locale
+
+import scala.collection.mutable
+
+import org.apache.spark.sql.catalyst.expressions.{Expression, UpdateFields, WithField}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.internal.SQLConf
 
 
 /**
- * Combines all adjacent [[UpdateFields]] expression into a single [[UpdateFields]] expression.
+ * Optimizes [[UpdateFields]] expression chains.
  */
-object CombineUpdateFields extends Rule[LogicalPlan] {
-  def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
+object OptimizeUpdateFields extends Rule[LogicalPlan] {
+  private def canOptimize(names: Seq[String]): Boolean = {
+    if (SQLConf.get.caseSensitiveAnalysis) {
+      names.distinct.length != names.length
+    } else {
+      names.map(_.toLowerCase(Locale.ROOT)).distinct.length != names.length
+    }
+  }
+
+  val optimizeUpdateFields: PartialFunction[Expression, Expression] = {
+    case UpdateFields(structExpr, fieldOps)
+      if fieldOps.forall(_.isInstanceOf[WithField]) &&
+        canOptimize(fieldOps.map(_.asInstanceOf[WithField].name)) =>
+      val caseSensitive = SQLConf.get.caseSensitiveAnalysis
+
+      val withFields = fieldOps.map(_.asInstanceOf[WithField])
+      val names = withFields.map(_.name)
+      val values = withFields.map(_.valExpr)
+
+      val newNames = mutable.ArrayBuffer.empty[String]
+      val newValues = mutable.ArrayBuffer.empty[Expression]
+
+      if (caseSensitive) {
+        names.zip(values).reverse.foreach { case (name, value) =>
+          if (!newNames.contains(name)) {
+            newNames += name
+            newValues += value
+          }
+        }
+      } else {
+        val nameSet = mutable.HashSet.empty[String]
+        names.zip(values).reverse.foreach { case (name, value) =>
+          val lowercaseName = name.toLowerCase(Locale.ROOT)
+          if (!nameSet.contains(lowercaseName)) {
+            newNames += name
+            newValues += value
+            nameSet += lowercaseName
+          }
+        }
+      }
+
+      val newWithFields = newNames.reverse.zip(newValues.reverse).map(p => WithField(p._1, p._2))
+      UpdateFields(structExpr, newWithFields.toSeq)
+
     case UpdateFields(UpdateFields(struct, fieldOps1), fieldOps2) =>
       UpdateFields(struct, fieldOps1 ++ fieldOps2)
   }
+
+  def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions(optimizeUpdateFields)
 }
 
 /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombineUpdateFieldsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWithFieldsSuite.scala
similarity index 51%
rename from sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombineUpdateFieldsSuite.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWithFieldsSuite.scala
index ff9c60a2fa5bd..b093b39cc4b88 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombineUpdateFieldsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWithFieldsSuite.scala
@@ -19,19 +19,21 @@ package org.apache.spark.sql.catalyst.optimizer
 
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.{Alias, Literal, UpdateFields, WithField}
+import org.apache.spark.sql.catalyst.expressions.{Alias, GetStructField, Literal, UpdateFields, WithField}
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
+import org.apache.spark.sql.internal.SQLConf
 
-
-class CombineUpdateFieldsSuite extends PlanTest {
+class OptimizeWithFieldsSuite extends PlanTest {
 
   object Optimize extends RuleExecutor[LogicalPlan] {
-    val batches = Batch("CombineUpdateFields", FixedPoint(10), CombineUpdateFields) :: Nil
+    val batches = Batch("OptimizeUpdateFields", FixedPoint(10),
+      OptimizeUpdateFields, SimplifyExtractValueOps) :: Nil
   }
 
   private val testRelation = LocalRelation('a.struct('a1.int))
+  private val testRelation2 = LocalRelation('a.struct('a1.int).notNull)
 
   test("combines two adjacent UpdateFields Expressions") {
     val originalQuery = testRelation
@@ -70,4 +72,58 @@ class CombineUpdateFieldsSuite extends PlanTest {
 
     comparePlans(optimized, correctAnswer)
   }
+
+  test("SPARK-32941: optimize WithFields followed by GetStructField") {
+    val originalQuery = testRelation2
+      .select(Alias(
+        GetStructField(UpdateFields('a,
+          WithField("b1", Literal(4)) :: Nil), 1), "out")())
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    val correctAnswer = testRelation2
+      .select(Alias(Literal(4), "out")())
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("SPARK-32941: optimize WithFields chain - case insensitive") {
+    val originalQuery = testRelation
+      .select(
+        Alias(UpdateFields('a,
+          WithField("b1", Literal(4)) :: WithField("b1", Literal(5)) :: Nil), "out1")(),
+        Alias(UpdateFields('a,
+          WithField("b1", Literal(4)) :: WithField("B1", Literal(5)) :: Nil), "out2")())
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    val correctAnswer = testRelation
+      .select(
+        Alias(UpdateFields('a, WithField("b1", Literal(5)) :: Nil), "out1")(),
+        Alias(UpdateFields('a, WithField("B1", Literal(5)) :: Nil), "out2")())
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("SPARK-32941: optimize WithFields chain - case sensitive") {
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      val originalQuery = testRelation
+        .select(
+          Alias(UpdateFields('a,
+            WithField("b1", Literal(4)) :: WithField("b1", Literal(5)) :: Nil), "out1")(),
+          Alias(UpdateFields('a,
+              WithField("b1", Literal(4)) :: WithField("B1", Literal(5)) :: Nil), "out2")())
+
+      val optimized = Optimize.execute(originalQuery.analyze)
+      val correctAnswer = testRelation
+        .select(
+          Alias(UpdateFields('a, WithField("b1", Literal(5)) :: Nil), "out1")(),
+          Alias(
+            UpdateFields('a,
+              WithField("b1", Literal(4)) :: WithField("B1", Literal(5)) :: Nil), "out2")())
+        .analyze
+
+      comparePlans(optimized, correctAnswer)
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
index d9cefdaf3fe70..9878969959bfd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
@@ -44,7 +44,7 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
          BooleanSimplification,
          SimplifyConditionals,
          SimplifyBinaryComparison,
-         CombineUpdateFields,
+         OptimizeUpdateFields,
          SimplifyExtractValueOps) :: Nil
   }
 
@@ -698,7 +698,6 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
     val expected = structLevel2.select(
       UpdateFields('a1, Seq(
         // scalastyle:off line.size.limit
-        WithField("a2", UpdateFields(GetStructField('a1, 0), WithField("b3", 2) :: Nil)),
         WithField("a2", UpdateFields(GetStructField('a1, 0), WithField("b3", 2) :: WithField("c3", 3) :: Nil))
         // scalastyle:on line.size.limit
       )).as("a1"))
@@ -732,7 +731,6 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
 
       structLevel2.select(
         UpdateFields('a1, Seq(
-          WithField("a2", repeatedExpr),
           WithField("a2", UpdateFields(
             If(IsNull('a1), Literal(null, repeatedExprDataType), repeatedExpr),
             WithField("c3", Literal(3)) :: Nil))
@@ -763,7 +761,6 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
 
     val expected = structLevel2.select(
       UpdateFields('a1, Seq(
-        WithField("a2", UpdateFields(GetStructField('a1, 0), Seq(DropField("b3")))),
         WithField("a2", UpdateFields(GetStructField('a1, 0), Seq(DropField("b3"), DropField("c3"))))
       )).as("a1"))
 
@@ -797,7 +794,6 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
 
       structLevel2.select(
         UpdateFields('a1, Seq(
-          WithField("a2", repeatedExpr),
           WithField("a2", UpdateFields(
             If(IsNull('a1), Literal(null, repeatedExprDataType), repeatedExpr),
             DropField("c3") :: Nil))

From 6ad75cda1eb9704ca1fd1539ea80454d66681965 Mon Sep 17 00:00:00 2001
From: Fokko Driesprong <fokko@apache.org>
Date: Mon, 19 Oct 2020 12:50:01 -0700
Subject: [PATCH 0281/1009] [SPARK-17333][PYSPARK] Enable mypy

### What changes were proposed in this pull request?

Add MyPy to the CI. Once this is installed on the CI: https://issues.apache.org/jira/browse/SPARK-32797?jql=project%20%3D%20SPARK%20AND%20text%20~%20mypy this wil automatically check the types.

### Why are the changes needed?

We should check if the types are still correct on the CI.

```
MacBook-Pro-van-Fokko:spark fokkodriesprong$ ./dev/lint-python
starting python compilation test...
python compilation succeeded.

starting pycodestyle test...
pycodestyle checks passed.

starting flake8 test...
flake8 checks passed.

starting mypy test...
mypy checks passed.

The sphinx-build command was not found. Skipping Sphinx build for now.

all lint-python tests passed!
```

### Does this PR introduce _any_ user-facing change?

No :)

### How was this patch tested?

By running `./dev/lint-python` locally.

Closes #30088 from Fokko/SPARK-17333.

Authored-by: Fokko Driesprong <fokko@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .github/workflows/build_and_test.yml |  2 +-
 .gitignore                           |  1 +
 dev/lint-python                      | 27 ++++++++++++++++++++++++++-
 3 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index cdbe34129637e..762e22f24cc2b 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -287,7 +287,7 @@ jobs:
       run: |
         # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
         #   See also https://github.com/sphinx-doc/sphinx/issues/7551.
-        pip3 install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme ipython nbsphinx
+        pip3 install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme ipython nbsphinx mypy
     - name: Install R 4.0
       uses: r-lib/actions/setup-r@v1
       with:
diff --git a/.gitignore b/.gitignore
index 2e4f77ad6fb42..82414d1fa35bf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -68,6 +68,7 @@ python/docs/source/reference/api/
 python/test_coverage/coverage_data
 python/test_coverage/htmlcov
 python/pyspark/python
+.mypy_cache/
 reports/
 scalastyle-on-compile.generated.xml
 scalastyle-output.xml
diff --git a/dev/lint-python b/dev/lint-python
index 21949e5d8e4e7..62664818dc106 100755
--- a/dev/lint-python
+++ b/dev/lint-python
@@ -18,7 +18,7 @@
 # define test binaries + versions
 FLAKE8_BUILD="flake8"
 MINIMUM_FLAKE8="3.5.0"
-
+MYPY_BUILD="mypy"
 PYCODESTYLE_BUILD="pycodestyle"
 MINIMUM_PYCODESTYLE="2.6.0"
 
@@ -122,6 +122,30 @@ function pycodestyle_test {
     fi
 }
 
+function mypy_test {
+    local MYPY_REPORT=
+    local MYPY_STATUS=
+
+    if ! hash "$MYPY_BUILD" 2> /dev/null; then
+        echo "The $MYPY_BUILD command was not found. Skipping for now."
+        return
+    fi
+
+    echo "starting $MYPY_BUILD test..."
+    MYPY_REPORT=$( ($MYPY_BUILD --config-file python/mypy.ini python/pyspark) 2>&1)
+    MYPY_STATUS=$?
+
+    if [ "$MYPY_STATUS" -ne 0 ]; then
+        echo "mypy checks failed:"
+        echo "$MYPY_REPORT"
+        echo "$MYPY_STATUS"
+        exit "$MYPY_STATUS"
+    else
+        echo "mypy checks passed."
+        echo
+    fi
+}
+
 function flake8_test {
     local FLAKE8_VERSION=
     local EXPECTED_FLAKE8=
@@ -246,6 +270,7 @@ PYTHON_SOURCE="$(find . -name "*.py")"
 compile_python_test "$PYTHON_SOURCE"
 pycodestyle_test "$PYTHON_SOURCE"
 flake8_test
+mypy_test
 sphinx_test
 
 echo

From f65a24412b6691ecdb4254e70d6e7abc846edb66 Mon Sep 17 00:00:00 2001
From: liaoaoyuan97 <al3468@columbia.edu>
Date: Tue, 20 Oct 2020 10:23:58 +0900
Subject: [PATCH 0282/1009] [SPARK-33181][SQL][DOCS] Document Load Table
 Directly from File in SQL Select Reference

### What changes were proposed in this pull request?

Add the link to the feature: "Run SQL on files directly" to SQL reference documentation page

### Why are the changes needed?

To make SQL Reference complete

### Does this PR introduce _any_ user-facing change?

yes. Previously, reading in sql from file directly is not included in the documentation: https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select.html, not listed in from_items. The new link is added to the select statement documentation, like the below:

![image](https://user-images.githubusercontent.com/16770242/96517999-c34f3900-121e-11eb-8d56-c4ba0432855e.png)
![image](https://user-images.githubusercontent.com/16770242/96518808-8126f700-1220-11eb-8c98-fb398eee0330.png)

### How was this patch tested?

Manually built and tested

Closes #30095 from liaoaoyuan97/master.

Authored-by: liaoaoyuan97 <al3468@columbia.edu>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/sql-ref-syntax-qry-select.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/sql-ref-syntax-qry-select.md b/docs/sql-ref-syntax-qry-select.md
index 453737a2f36b8..655766d4c6d22 100644
--- a/docs/sql-ref-syntax-qry-select.md
+++ b/docs/sql-ref-syntax-qry-select.md
@@ -85,6 +85,7 @@ SELECT [ hints , ... ] [ ALL | DISTINCT ] { named_expression [ , ... ] }
      * [Table-value function](sql-ref-syntax-qry-select-tvf.html)
      * [Inline table](sql-ref-syntax-qry-select-inline-table.html)
      * Subquery
+     * [File](sql-data-sources-load-save-functions.html#run-sql-on-files-directly)
      
 * **PIVOT**
 

From 35133901f79209bd5e6e3e17531095d0ecae737d Mon Sep 17 00:00:00 2001
From: Nan Zhu <nanzhu@uber.com>
Date: Tue, 20 Oct 2020 11:13:16 +0900
Subject: [PATCH 0283/1009] [SPARK-32351][SQL] Show partially pushed down
 partition filters in explain()

### What changes were proposed in this pull request?

Currently, actual non-dynamic partition pruning is executed in the optimizer phase (PruneFileSourcePartitions) if an input relation has a catalog file index. The current code assumes the same partition filters are generated again in FileSourceStrategy and passed into FileSourceScanExec. FileSourceScanExec uses the partition filters when listing files, but these non-dynamic partition filters do nothing because unnecessary partitions are already pruned in advance, so the filters are mainly used for explain output in this case. If a WHERE clause has DNF-ed predicates, FileSourceStrategy cannot extract the same filters with PruneFileSourcePartitions and then PartitionFilters is not shown in explain output.

This patch proposes to extract partition filters in FileSourceStrategy and HiveStrategy with `extractPredicatesWithinOutputSet` added in https://github.com/apache/spark/pull/29101/files#diff-6be42cfa3c62a7536b1eb1d6447c073c again, then It will show the partially pushed down partition filter in explain().

### Why are the changes needed?

without the patch, the explained plan is inconsistent with what is actually executed

<b>without the change </b> the explained plan of `"SELECT * FROM t WHERE p = '1' OR (p = '2' AND i = 1)"` for datasource and hive tables are like the following respectively (missing pushed down partition filters)

```
== Physical Plan ==
*(1) Filter ((p#21 = 1) OR ((p#21 = 2) AND (i#20 = 1)))
+- *(1) ColumnarToRow
   +- FileScan parquet default.t[i#20,p#21] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex[file:/Users/nanzhu/code/spark/sql/hive/target/tmp/hive_execution_test_group/war..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<i:int>
```

```
   == Physical Plan ==
   *(1) Filter ((p#33 = 1) OR ((p#33 = 2) AND (i#32 = 1)))
   +- Scan hive default.t [i#32, p#33], HiveTableRelation [`default`.`t`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, Data Cols: [i#32], Partition Cols: [p#33], Pruned Partitions: [(p=1), (p=2)]]
```

<b> with change </b> the  plan looks like (the actually executed partition filters are exhibited)

```
== Physical Plan ==
*(1) Filter ((p#21 = 1) OR ((p#21 = 2) AND (i#20 = 1)))
+- *(1) ColumnarToRow
   +- FileScan parquet default.t[i#20,p#21] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex[file:/Users/nanzhu/code/spark/sql/hive/target/tmp/hive_execution_test_group/war..., PartitionFilters: [((p#21 = 1) OR (p#21 = 2))], PushedFilters: [], ReadSchema: struct<i:int>
```

```
== Physical Plan ==
*(1) Filter ((p#37 = 1) OR ((p#37 = 2) AND (i#36 = 1)))
+- Scan hive default.t [i#36, p#37], HiveTableRelation [`default`.`t`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, Data Cols: [i#36], Partition Cols: [p#37], Pruned Partitions: [(p=1), (p=2)]], [((p#37 = 1) OR (p#37 = 2))]
```

### Does this PR introduce _any_ user-facing change

no

### How was this patch tested?
Unit test.

Closes #29831 from CodingCat/SPARK-32351.

Lead-authored-by: Nan Zhu <nanzhu@uber.com>
Co-authored-by: Nan Zhu <CodingCat@users.noreply.github.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../datasources/DataSourceStrategy.scala      | 17 +++++-
 .../datasources/FileSourceStrategy.scala      | 12 ++--
 .../spark/sql/hive/HiveStrategies.scala       | 17 +++---
 .../PruneHiveTablePartitionsSuite.scala       | 17 +++++-
 .../execution/PrunePartitionSuiteBase.scala   | 59 +++++++++++++++----
 5 files changed, 93 insertions(+), 29 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 86e85719272e8..9d666fc3a063e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -42,6 +42,7 @@ import org.apache.spark.sql.connector.catalog.SupportsRead
 import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.execution.{RowDataSourceScanExec, SparkPlan}
 import org.apache.spark.sql.execution.command._
+import org.apache.spark.sql.execution.datasources.FileSourceStrategy.{extractPredicatesWithinOutputSet, logInfo}
 import org.apache.spark.sql.execution.streaming.StreamingRelation
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
@@ -467,7 +468,7 @@ case class DataSourceStrategy(conf: SQLConf) extends Strategy with Logging with
   }
 }
 
-object DataSourceStrategy {
+object DataSourceStrategy extends PredicateHelper {
   /**
    * The attribute name may differ from the one in the schema if the query analyzer
    * is case insensitive. We should change attribute names to match the ones in the schema,
@@ -484,6 +485,20 @@ object DataSourceStrategy {
     }
   }
 
+  def getPushedDownFilters(
+      partitionColumns: Seq[Expression],
+      normalizedFilters: Seq[Expression]): ExpressionSet = {
+    if (partitionColumns.isEmpty) {
+      ExpressionSet(Nil)
+    } else {
+      val partitionSet = AttributeSet(partitionColumns)
+      val predicates = ExpressionSet(normalizedFilters
+        .flatMap(extractPredicatesWithinOutputSet(_, partitionSet)))
+      logInfo(s"Pruning directories with: ${predicates.mkString(",")}")
+      predicates
+    }
+  }
+
   private def translateLeafNodeFilter(
       predicate: Expression,
       pushableColumn: PushableColumnBase): Option[Filter] = predicate match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
index 7928f6e0f9324..1191f99cc98a2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
@@ -154,15 +154,11 @@ object FileSourceStrategy extends Strategy with PredicateHelper with Logging {
         l.resolve(
           fsRelation.partitionSchema, fsRelation.sparkSession.sessionState.analyzer.resolver)
       val partitionSet = AttributeSet(partitionColumns)
-      val partitionKeyFilters = if (partitionColumns.isEmpty) {
-        ExpressionSet(Nil)
-      } else {
-        val predicates = ExpressionSet(normalizedFilters
-          .filter(_.references.subsetOf(partitionSet)))
-        logInfo(s"Pruning directories with: ${predicates.mkString(",")}")
-        predicates
-      }
 
+      // this partitionKeyFilters should be the same with the ones being executed in
+      // PruneFileSourcePartitions
+      val partitionKeyFilters = DataSourceStrategy.getPushedDownFilters(partitionColumns,
+        normalizedFilters)
 
       // subquery expressions are filtered out because they can't be used to prune buckets or pushed
       // down as data filters, yet they would be executed
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index 97e1dee5913a4..2ace96583d9cc 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.connector.catalog.CatalogV2Util.assertNoNullTypeInSchema
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.command.{CreateTableCommand, DDLUtils}
-import org.apache.spark.sql.execution.datasources.CreateTable
+import org.apache.spark.sql.execution.datasources.{CreateTable, DataSourceStrategy}
 import org.apache.spark.sql.hive.execution._
 import org.apache.spark.sql.hive.execution.HiveScriptTransformationExec
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
@@ -256,20 +256,21 @@ private[hive] trait HiveStrategies {
    */
   object HiveTableScans extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
-      case ScanOperation(projectList, predicates, relation: HiveTableRelation) =>
+      case ScanOperation(projectList, filters, relation: HiveTableRelation) =>
         // Filter out all predicates that only deal with partition keys, these are given to the
         // hive table scan operator to be used for partition pruning.
         val partitionKeyIds = AttributeSet(relation.partitionCols)
-        val (pruningPredicates, otherPredicates) = predicates.partition { predicate =>
-          !predicate.references.isEmpty &&
-          predicate.references.subsetOf(partitionKeyIds)
-        }
+        val normalizedFilters = DataSourceStrategy.normalizeExprs(
+          filters.filter(_.deterministic), relation.output)
+
+        val partitionKeyFilters = DataSourceStrategy.getPushedDownFilters(relation.partitionCols,
+          normalizedFilters)
 
         pruneFilterProject(
           projectList,
-          otherPredicates,
+          filters.filter(f => f.references.isEmpty || !f.references.subsetOf(partitionKeyIds)),
           identity[Seq[Expression]],
-          HiveTableScanExec(_, relation, pruningPredicates)(sparkSession)) :: Nil
+          HiveTableScanExec(_, relation, partitionKeyFilters.toSeq)(sparkSession)) :: Nil
       case _ =>
         Nil
     }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitionsSuite.scala
index 06aea084330fa..018df35403be5 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitionsSuite.scala
@@ -75,7 +75,22 @@ class PruneHiveTablePartitionsSuite extends PrunePartitionSuiteBase {
         }
         val scale = 20
         val predicate = (1 to scale).map(i => s"(p0 = '$i' AND p1 = '$i')").mkString(" OR ")
-        assertPrunedPartitions(s"SELECT * FROM t WHERE $predicate", scale)
+        val expectedStr = {
+          // left
+          "(((((((`p0` = 1) && (`p1` = 1)) || ((`p0` = 2) && (`p1` = 2))) ||" +
+          " ((`p0` = 3) && (`p1` = 3))) || (((`p0` = 4) && (`p1` = 4)) ||" +
+          " ((`p0` = 5) && (`p1` = 5)))) || (((((`p0` = 6) && (`p1` = 6)) ||" +
+          " ((`p0` = 7) && (`p1` = 7))) || ((`p0` = 8) && (`p1` = 8))) ||" +
+          " (((`p0` = 9) && (`p1` = 9)) || ((`p0` = 10) && (`p1` = 10))))) ||" +
+          // right
+          " ((((((`p0` = 11) && (`p1` = 11)) || ((`p0` = 12) && (`p1` = 12))) ||" +
+          " ((`p0` = 13) && (`p1` = 13))) || (((`p0` = 14) && (`p1` = 14)) ||" +
+          " ((`p0` = 15) && (`p1` = 15)))) || (((((`p0` = 16) && (`p1` = 16)) ||" +
+          " ((`p0` = 17) && (`p1` = 17))) || ((`p0` = 18) && (`p1` = 18))) ||" +
+          " (((`p0` = 19) && (`p1` = 19)) || ((`p0` = 20) && (`p1` = 20))))))"
+        }
+        assertPrunedPartitions(s"SELECT * FROM t WHERE $predicate", scale,
+          expectedStr)
       }
     }
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala
index d088061cdc6e5..993a730524f6f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.sql.hive.execution
 
 import org.apache.spark.sql.QueryTest
-import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, BinaryOperator, EqualTo, Expression, IsNotNull, Literal}
+import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan}
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.test.SQLTestUtils
 
@@ -46,30 +47,66 @@ abstract class PrunePartitionSuiteBase extends QueryTest with SQLTestUtils with
         }
 
         assertPrunedPartitions(
-          "SELECT * FROM t WHERE p = '1' OR (p = '2' AND i = 1)", 2)
+          "SELECT * FROM t WHERE p = '1' OR (p = '2' AND i = 1)", 2,
+          "((`p` = '1') || (`p` = '2'))")
         assertPrunedPartitions(
-          "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (i = 1 OR p = '2')", 4)
+          "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (i = 1 OR p = '2')", 4,
+          "")
         assertPrunedPartitions(
-          "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (p = '3' AND i = 3 )", 2)
+          "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (p = '3' AND i = 3 )", 2,
+          "((`p` = '1') || (`p` = '3'))")
         assertPrunedPartitions(
-          "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (p = '2' OR p = '3')", 3)
+          "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (p = '2' OR p = '3')", 3,
+          "((`p` = '1') || ((`p` = '2') || (`p` = '3')))")
         assertPrunedPartitions(
-          "SELECT * FROM t", 4)
+          "SELECT * FROM t", 4,
+          "")
         assertPrunedPartitions(
-          "SELECT * FROM t WHERE p = '1' AND i = 2", 1)
+          "SELECT * FROM t WHERE p = '1' AND i = 2", 1,
+          "(`p` = '1')")
         assertPrunedPartitions(
           """
             |SELECT i, COUNT(1) FROM (
             |SELECT * FROM t WHERE  p = '1' OR (p = '2' AND i = 1)
             |) tmp GROUP BY i
-          """.stripMargin, 2)
+          """.stripMargin, 2, "((`p` = '1') || (`p` = '2'))")
       }
     }
   }
 
-  protected def assertPrunedPartitions(query: String, expected: Long): Unit = {
-    val plan = sql(query).queryExecution.sparkPlan
-    assert(getScanExecPartitionSize(plan) == expected)
+  private def getCleanStringRepresentation(exp: Expression): String = exp match {
+    case attr: AttributeReference =>
+      attr.sql.replaceAll("spark_catalog.default.t.", "")
+    case l: Literal =>
+      l.sql
+    case e: BinaryOperator =>
+      s"(${getCleanStringRepresentation(e.left)} ${e.symbol} " +
+        s"${getCleanStringRepresentation(e.right)})"
+  }
+
+  protected def assertPrunedPartitions(
+      query: String,
+      expectedPartitionCount: Long,
+      expectedPushedDownFilters: String): Unit = {
+    val qe = sql(query).queryExecution
+    val plan = qe.sparkPlan
+    assert(getScanExecPartitionSize(plan) == expectedPartitionCount)
+
+    val pushedDownPartitionFilters = qe.executedPlan.collectFirst {
+      case scan: FileSourceScanExec => scan.partitionFilters
+      case scan: HiveTableScanExec => scan.partitionPruningPred
+    }.map(exps => exps.filterNot(e => e.isInstanceOf[IsNotNull]))
+    val pushedFilters = pushedDownPartitionFilters.map(filters => {
+      filters.foldLeft("")((currentStr, exp) => {
+        if (currentStr == "") {
+          s"${getCleanStringRepresentation(exp)}"
+        } else {
+          s"$currentStr AND ${getCleanStringRepresentation(exp)}"
+        }
+      })
+    })
+
+    assert(pushedFilters == Some(expectedPushedDownFilters))
   }
 
   protected def getScanExecPartitionSize(plan: SparkPlan): Long

From a44e008de3ae5aecad9e0f1a7af6a1e8b0d97f4e Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Tue, 20 Oct 2020 14:58:59 +0900
Subject: [PATCH 0284/1009] [SPARK-33160][SQL] Allow saving/loading INT96 in
 parquet w/o rebasing

### What changes were proposed in this pull request?
1. Add the SQL config `spark.sql.legacy.parquet.int96RebaseModeInWrite` to control timestamps rebasing in saving them as INT96. It supports the same set of values as `spark.sql.legacy.parquet.datetimeRebaseModeInWrite` but the default value is `LEGACY` to preserve backward compatibility with Spark <= 3.0.
2. Write the metadata key `org.apache.spark.int96NoRebase` to parquet files if the files are saved with `spark.sql.legacy.parquet.int96RebaseModeInWrite` isn't set to `LEGACY`.
3. Add the SQL config `spark.sql.legacy.parquet.datetimeRebaseModeInRead` to control loading INT96 timestamps when parquet metadata doesn't have enough info (the `org.apache.spark.int96NoRebase` tag) about parquet writer - either INT96 was written by Proleptic Gregorian system or some Julian one.
4. Modified Vectorized and Parquet-mr Readers to support loading/saving INT96 timestamps w/o rebasing depending on SQL config and the metadata tag:
    - **No rebasing** in testing when the SQL config `spark.test.forceNoRebase` is set to `true`
    - **No rebasing** if parquet metadata contains the tag `org.apache.spark.int96NoRebase`. This is the case when parquet files are saved by Spark >= 3.1 with `spark.sql.legacy.parquet.datetimeRebaseModeInWrite` is set to `CORRECTED`, or saved by other systems with the tag `org.apache.spark.int96NoRebase`.
    - **With rebasing** if parquet files saved by Spark (any versions) without the metadata tag `org.apache.spark.int96NoRebase`.
    - Rebasing depend on the SQL config `spark.sql.legacy.parquet.datetimeRebaseModeInRead` if there are no metadata tags `org.apache.spark.version` and `org.apache.spark.int96NoRebase`.

New SQL configs are added instead of re-using existing `spark.sql.legacy.parquet.datetimeRebaseModeInWrite` and `spark.sql.legacy.parquet.datetimeRebaseModeInRead` because of:
- To allow users have different modes for INT96 and for TIMESTAMP_MICROS (MILLIS). For example, users might want to save INT96 as LEGACY but TIMESTAMP_MICROS as CORRECTED.
- To have different modes for INT96 and DATE in load (or in save).
- To be backward compatible with Spark 2.4. For now, `spark.sql.legacy.parquet.datetimeRebaseModeInWrite/Read` are set to `EXCEPTION` by default.

### Why are the changes needed?
1. Parquet spec says that INT96 must be stored as Julian days (see https://github.com/apache/parquet-format/pull/49). This doesn't mean that a reader ( or a writer) is based on the Julian calendar. So, rebasing from Proleptic Gregorian to Julian calendar can be not needed.
2. Rebasing from/to Julian calendar can loose information because dates in one calendar don't exist in another one. Like 1582-10-04..1582-10-15 exist in Proleptic Gregorian calendar but not in the hybrid calendar (Julian + Gregorian), and visa versa, Julian date 1000-02-29 doesn't exist in Proleptic Gregorian calendar. We should allow users to save timestamps without loosing such dates (rebasing shifts such dates to the next valid date).
3. It would also make Spark compatible with other systems such as Impala and newer versions of Hive that write proleptic Gregorian based INT96 timestamps.

### Does this PR introduce _any_ user-facing change?
It can when `spark.sql.legacy.parquet.int96RebaseModeInWrite` is set non-default value `LEGACY`.

### How was this patch tested?
- Added a test to check the metadata key `org.apache.spark.int96NoRebase`
- By `ParquetIOSuite`

Closes #30056 from MaxGekk/parquet-rebase-int96.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../sql/catalyst/util/DateTimeUtils.scala     |   5 +-
 .../apache/spark/sql/internal/SQLConf.scala   |  29 ++++
 .../catalyst/util/DateTimeUtilsSuite.scala    |   9 +-
 .../parquet/VectorizedColumnReader.java       | 124 ++++++++++++++----
 .../VectorizedParquetRecordReader.java        |  23 +++-
 .../datasources/DataSourceUtils.scala         |  38 ++++--
 .../parquet/ParquetFileFormat.scala           |   9 +-
 .../parquet/ParquetReadSupport.scala          |  12 +-
 .../parquet/ParquetRecordMaterializer.scala   |  13 +-
 .../parquet/ParquetRowConverter.scala         |  14 +-
 .../parquet/ParquetWriteSupport.scala         |  17 ++-
 .../ParquetPartitionReaderFactory.scala       |  24 +++-
 .../scala/org/apache/spark/sql/package.scala  |   6 +
 .../datasources/parquet/ParquetIOSuite.scala  |  92 ++++++++++---
 14 files changed, 327 insertions(+), 88 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index f0dab8095fc75..ff6b106d93d1d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -169,8 +169,7 @@ object DateTimeUtils {
    */
   def fromJulianDay(days: Int, nanos: Long): Long = {
     // use Long to avoid rounding errors
-    val micros = (days - JULIAN_DAY_OF_EPOCH).toLong * MICROS_PER_DAY + nanos / NANOS_PER_MICROS
-    rebaseJulianToGregorianMicros(micros)
+    (days - JULIAN_DAY_OF_EPOCH).toLong * MICROS_PER_DAY + nanos / NANOS_PER_MICROS
   }
 
   /**
@@ -179,7 +178,7 @@ object DateTimeUtils {
    * Note: support timestamp since 4717 BC (without negative nanoseconds, compatible with Hive).
    */
   def toJulianDay(micros: Long): (Int, Long) = {
-    val julianUs = rebaseGregorianToJulianMicros(micros) + JULIAN_DAY_OF_EPOCH * MICROS_PER_DAY
+    val julianUs = micros + JULIAN_DAY_OF_EPOCH * MICROS_PER_DAY
     val days = julianUs / MICROS_PER_DAY
     val us = julianUs % MICROS_PER_DAY
     (days.toInt, MICROSECONDS.toNanos(us))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 0497c9b7e80b8..3648615a1eaee 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2654,6 +2654,20 @@ object SQLConf {
       .checkValues(LegacyBehaviorPolicy.values.map(_.toString))
       .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString)
 
+  val LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE =
+    buildConf("spark.sql.legacy.parquet.int96RebaseModeInWrite")
+      .internal()
+      .doc("When LEGACY, which is the default, Spark will rebase INT96 timestamps from " +
+        "Proleptic Gregorian calendar to the legacy hybrid (Julian + Gregorian) calendar when " +
+        "writing Parquet files. When CORRECTED, Spark will not do rebase and write the timestamps" +
+        " as it is. When EXCEPTION, Spark will fail the writing if it sees ancient timestamps " +
+        "that are ambiguous between the two calendars.")
+      .version("3.1.0")
+      .stringConf
+      .transform(_.toUpperCase(Locale.ROOT))
+      .checkValues(LegacyBehaviorPolicy.values.map(_.toString))
+      .createWithDefault(LegacyBehaviorPolicy.LEGACY.toString)
+
   val LEGACY_PARQUET_REBASE_MODE_IN_READ =
     buildConf("spark.sql.legacy.parquet.datetimeRebaseModeInRead")
       .internal()
@@ -2669,6 +2683,21 @@ object SQLConf {
       .checkValues(LegacyBehaviorPolicy.values.map(_.toString))
       .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString)
 
+  val LEGACY_PARQUET_INT96_REBASE_MODE_IN_READ =
+    buildConf("spark.sql.legacy.parquet.int96RebaseModeInRead")
+      .internal()
+      .doc("When LEGACY, which is the default, Spark will rebase INT96 timestamps from " +
+        "the legacy hybrid (Julian + Gregorian) calendar to Proleptic Gregorian calendar when " +
+        "reading Parquet files. When CORRECTED, Spark will not do rebase and read the timestamps " +
+        "as it is. When EXCEPTION, Spark will fail the reading if it sees ancient timestamps " +
+        "that are ambiguous between the two calendars. This config is only effective if the " +
+        "writer info (like Spark, Hive) of the Parquet files is unknown.")
+      .version("3.1.0")
+      .stringConf
+      .transform(_.toUpperCase(Locale.ROOT))
+      .checkValues(LegacyBehaviorPolicy.values.map(_.toString))
+      .createWithDefault(LegacyBehaviorPolicy.LEGACY.toString)
+
   val LEGACY_AVRO_REBASE_MODE_IN_WRITE =
     buildConf("spark.sql.legacy.avro.datetimeRebaseModeInWrite")
       .internal()
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index fe761f672c041..7bbdf44d78c3c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils._
+import org.apache.spark.sql.catalyst.util.RebaseDateTime.rebaseJulianToGregorianMicros
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
 class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
@@ -70,17 +71,17 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
   }
 
   test("us and julian day") {
-    val (d, ns) = toJulianDay(0)
+    val (d, ns) = toJulianDay(RebaseDateTime.rebaseGregorianToJulianMicros(0))
     assert(d === JULIAN_DAY_OF_EPOCH)
     assert(ns === 0)
-    assert(fromJulianDay(d, ns) == 0L)
+    assert(rebaseJulianToGregorianMicros(fromJulianDay(d, ns)) == 0L)
 
     Seq(Timestamp.valueOf("2015-06-11 10:10:10.100"),
       Timestamp.valueOf("2015-06-11 20:10:10.100"),
       Timestamp.valueOf("1900-06-11 20:10:10.100")).foreach { t =>
-      val (d, ns) = toJulianDay(fromJavaTimestamp(t))
+      val (d, ns) = toJulianDay(RebaseDateTime.rebaseGregorianToJulianMicros(fromJavaTimestamp(t)))
       assert(ns > 0)
-      val t1 = toJavaTimestamp(fromJulianDay(d, ns))
+      val t1 = toJavaTimestamp(rebaseJulianToGregorianMicros(fromJulianDay(d, ns)))
       assert(t.equals(t1))
     }
   }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java
index 3e409ab9a50a1..1b8b18d4d8735 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java
@@ -104,13 +104,15 @@ public class VectorizedColumnReader {
   private final ZoneId convertTz;
   private static final ZoneId UTC = ZoneOffset.UTC;
   private final String datetimeRebaseMode;
+  private final String int96RebaseMode;
 
   public VectorizedColumnReader(
       ColumnDescriptor descriptor,
       OriginalType originalType,
       PageReader pageReader,
       ZoneId convertTz,
-      String datetimeRebaseMode) throws IOException {
+      String datetimeRebaseMode,
+      String int96RebaseMode) throws IOException {
     this.descriptor = descriptor;
     this.pageReader = pageReader;
     this.convertTz = convertTz;
@@ -136,6 +138,9 @@ public VectorizedColumnReader(
     assert "LEGACY".equals(datetimeRebaseMode) || "EXCEPTION".equals(datetimeRebaseMode) ||
       "CORRECTED".equals(datetimeRebaseMode);
     this.datetimeRebaseMode = datetimeRebaseMode;
+    assert "LEGACY".equals(int96RebaseMode) || "EXCEPTION".equals(int96RebaseMode) ||
+      "CORRECTED".equals(int96RebaseMode);
+    this.int96RebaseMode = int96RebaseMode;
   }
 
   /**
@@ -189,10 +194,13 @@ static int rebaseDays(int julianDays, final boolean failIfRebase) {
     }
   }
 
-  static long rebaseMicros(long julianMicros, final boolean failIfRebase) {
+  private static long rebaseTimestamp(
+      long julianMicros,
+      final boolean failIfRebase,
+      final String format) {
     if (failIfRebase) {
       if (julianMicros < RebaseDateTime.lastSwitchJulianTs()) {
-        throw DataSourceUtils.newRebaseExceptionInRead("Parquet");
+        throw DataSourceUtils.newRebaseExceptionInRead(format);
       } else {
         return julianMicros;
       }
@@ -201,6 +209,14 @@ static long rebaseMicros(long julianMicros, final boolean failIfRebase) {
     }
   }
 
+  static long rebaseMicros(long julianMicros, final boolean failIfRebase) {
+    return rebaseTimestamp(julianMicros, failIfRebase, "Parquet");
+  }
+
+  static long rebaseInt96(long julianMicros, final boolean failIfRebase) {
+    return rebaseTimestamp(julianMicros, failIfRebase, "Parquet INT96");
+  }
+
   /**
    * Reads `total` values from this columnReader into column.
    */
@@ -399,20 +415,44 @@ private void decodeDictionaryIds(
         break;
       case INT96:
         if (column.dataType() == DataTypes.TimestampType) {
+          final boolean failIfRebase = "EXCEPTION".equals(int96RebaseMode);
           if (!shouldConvertTimestamps()) {
-            for (int i = rowId; i < rowId + num; ++i) {
-              if (!column.isNullAt(i)) {
-                Binary v = dictionary.decodeToBinary(dictionaryIds.getDictId(i));
-                column.putLong(i, ParquetRowConverter.binaryToSQLTimestamp(v));
+            if ("CORRECTED".equals(int96RebaseMode)) {
+              for (int i = rowId; i < rowId + num; ++i) {
+                if (!column.isNullAt(i)) {
+                  Binary v = dictionary.decodeToBinary(dictionaryIds.getDictId(i));
+                  column.putLong(i, ParquetRowConverter.binaryToSQLTimestamp(v));
+                }
+              }
+            } else {
+              for (int i = rowId; i < rowId + num; ++i) {
+                if (!column.isNullAt(i)) {
+                  Binary v = dictionary.decodeToBinary(dictionaryIds.getDictId(i));
+                  long julianMicros = ParquetRowConverter.binaryToSQLTimestamp(v);
+                  long gregorianMicros = rebaseInt96(julianMicros, failIfRebase);
+                  column.putLong(i, gregorianMicros);
+                }
               }
             }
           } else {
-            for (int i = rowId; i < rowId + num; ++i) {
-              if (!column.isNullAt(i)) {
-                Binary v = dictionary.decodeToBinary(dictionaryIds.getDictId(i));
-                long rawTime = ParquetRowConverter.binaryToSQLTimestamp(v);
-                long adjTime = DateTimeUtils.convertTz(rawTime, convertTz, UTC);
-                column.putLong(i, adjTime);
+            if ("CORRECTED".equals(int96RebaseMode)) {
+              for (int i = rowId; i < rowId + num; ++i) {
+                if (!column.isNullAt(i)) {
+                  Binary v = dictionary.decodeToBinary(dictionaryIds.getDictId(i));
+                  long gregorianMicros = ParquetRowConverter.binaryToSQLTimestamp(v);
+                  long adjTime = DateTimeUtils.convertTz(gregorianMicros, convertTz, UTC);
+                  column.putLong(i, adjTime);
+                }
+              }
+            } else {
+              for (int i = rowId; i < rowId + num; ++i) {
+                if (!column.isNullAt(i)) {
+                  Binary v = dictionary.decodeToBinary(dictionaryIds.getDictId(i));
+                  long julianMicros = ParquetRowConverter.binaryToSQLTimestamp(v);
+                  long gregorianMicros = rebaseInt96(julianMicros, failIfRebase);
+                  long adjTime = DateTimeUtils.convertTz(gregorianMicros, convertTz, UTC);
+                  column.putLong(i, adjTime);
+                }
               }
             }
           }
@@ -577,25 +617,53 @@ private void readBinaryBatch(int rowId, int num, WritableColumnVector column) th
             || DecimalType.isByteArrayDecimalType(column.dataType())) {
       defColumn.readBinarys(num, column, rowId, maxDefLevel, data);
     } else if (column.dataType() == DataTypes.TimestampType) {
+      final boolean failIfRebase = "EXCEPTION".equals(int96RebaseMode);
       if (!shouldConvertTimestamps()) {
-        for (int i = 0; i < num; i++) {
-          if (defColumn.readInteger() == maxDefLevel) {
-            // Read 12 bytes for INT96
-            long rawTime = ParquetRowConverter.binaryToSQLTimestamp(data.readBinary(12));
-            column.putLong(rowId + i, rawTime);
-          } else {
-            column.putNull(rowId + i);
+        if ("CORRECTED".equals(int96RebaseMode)) {
+          for (int i = 0; i < num; i++) {
+            if (defColumn.readInteger() == maxDefLevel) {
+              // Read 12 bytes for INT96
+              long gregorianMicros = ParquetRowConverter.binaryToSQLTimestamp(data.readBinary(12));
+              column.putLong(rowId + i, gregorianMicros);
+            } else {
+              column.putNull(rowId + i);
+            }
+          }
+        } else {
+          for (int i = 0; i < num; i++) {
+            if (defColumn.readInteger() == maxDefLevel) {
+              // Read 12 bytes for INT96
+              long julianMicros = ParquetRowConverter.binaryToSQLTimestamp(data.readBinary(12));
+              long gregorianMicros = rebaseInt96(julianMicros, failIfRebase);
+              column.putLong(rowId + i, gregorianMicros);
+            } else {
+              column.putNull(rowId + i);
+            }
           }
         }
       } else {
-        for (int i = 0; i < num; i++) {
-          if (defColumn.readInteger() == maxDefLevel) {
-            // Read 12 bytes for INT96
-            long rawTime = ParquetRowConverter.binaryToSQLTimestamp(data.readBinary(12));
-            long adjTime = DateTimeUtils.convertTz(rawTime, convertTz, UTC);
-            column.putLong(rowId + i, adjTime);
-          } else {
-            column.putNull(rowId + i);
+        if ("CORRECTED".equals(int96RebaseMode)) {
+          for (int i = 0; i < num; i++) {
+            if (defColumn.readInteger() == maxDefLevel) {
+              // Read 12 bytes for INT96
+              long gregorianMicros = ParquetRowConverter.binaryToSQLTimestamp(data.readBinary(12));
+              long adjTime = DateTimeUtils.convertTz(gregorianMicros, convertTz, UTC);
+              column.putLong(rowId + i, adjTime);
+            } else {
+              column.putNull(rowId + i);
+            }
+          }
+        } else {
+          for (int i = 0; i < num; i++) {
+            if (defColumn.readInteger() == maxDefLevel) {
+              // Read 12 bytes for INT96
+              long julianMicros = ParquetRowConverter.binaryToSQLTimestamp(data.readBinary(12));
+              long gregorianMicros = rebaseInt96(julianMicros, failIfRebase);
+              long adjTime = DateTimeUtils.convertTz(gregorianMicros, convertTz, UTC);
+              column.putLong(rowId + i, adjTime);
+            } else {
+              column.putNull(rowId + i);
+            }
           }
         }
       }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java
index b40cc154d76fe..9d38a74a2956a 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java
@@ -93,6 +93,11 @@ public class VectorizedParquetRecordReader extends SpecificParquetRecordReaderBa
    */
   private final String datetimeRebaseMode;
 
+  /**
+   * The mode of rebasing INT96 timestamp from Julian to Proleptic Gregorian calendar.
+   */
+  private final String int96RebaseMode;
+
   /**
    * columnBatch object that is used for batch decoding. This is created on first use and triggers
    * batched decoding. It is not valid to interleave calls to the batched interface with the row
@@ -122,16 +127,21 @@ public class VectorizedParquetRecordReader extends SpecificParquetRecordReaderBa
   private final MemoryMode MEMORY_MODE;
 
   public VectorizedParquetRecordReader(
-    ZoneId convertTz, String datetimeRebaseMode, boolean useOffHeap, int capacity) {
+      ZoneId convertTz,
+      String datetimeRebaseMode,
+      String int96RebaseMode,
+      boolean useOffHeap,
+      int capacity) {
     this.convertTz = convertTz;
     this.datetimeRebaseMode = datetimeRebaseMode;
+    this.int96RebaseMode = int96RebaseMode;
     MEMORY_MODE = useOffHeap ? MemoryMode.OFF_HEAP : MemoryMode.ON_HEAP;
     this.capacity = capacity;
   }
 
   // For test only.
   public VectorizedParquetRecordReader(boolean useOffHeap, int capacity) {
-    this(null, "CORRECTED", useOffHeap, capacity);
+    this(null, "CORRECTED", "LEGACY", useOffHeap, capacity);
   }
 
   /**
@@ -320,8 +330,13 @@ private void checkEndOfRowGroup() throws IOException {
     columnReaders = new VectorizedColumnReader[columns.size()];
     for (int i = 0; i < columns.size(); ++i) {
       if (missingColumns[i]) continue;
-      columnReaders[i] = new VectorizedColumnReader(columns.get(i), types.get(i).getOriginalType(),
-        pages.getPageReader(columns.get(i)), convertTz, datetimeRebaseMode);
+      columnReaders[i] = new VectorizedColumnReader(
+        columns.get(i),
+        types.get(i).getOriginalType(),
+        pages.getPageReader(columns.get(i)),
+        convertTz,
+        datetimeRebaseMode,
+        int96RebaseMode);
     }
     totalCountLoadedSoFar += pages.getRowCount();
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
index b4308a872bb39..f8068a634977b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
@@ -26,7 +26,7 @@ import org.json4s.NoTypeHints
 import org.json4s.jackson.Serialization
 
 import org.apache.spark.SparkUpgradeException
-import org.apache.spark.sql.{SPARK_LEGACY_DATETIME, SPARK_VERSION_METADATA_KEY}
+import org.apache.spark.sql.{SPARK_INT96_NO_REBASE, SPARK_LEGACY_DATETIME, SPARK_VERSION_METADATA_KEY}
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogUtils}
 import org.apache.spark.sql.catalyst.util.RebaseDateTime
@@ -111,13 +111,26 @@ object DataSourceUtils {
     }.getOrElse(LegacyBehaviorPolicy.withName(modeByConfig))
   }
 
-  def newRebaseExceptionInRead(format: String): SparkUpgradeException = {
-    val config = if (format == "Parquet") {
-      SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ.key
-    } else if (format == "Avro") {
-      SQLConf.LEGACY_AVRO_REBASE_MODE_IN_READ.key
+  def int96RebaseMode(
+      lookupFileMeta: String => String,
+      modeByConfig: String): LegacyBehaviorPolicy.Value = {
+    if (Utils.isTesting && SQLConf.get.getConfString("spark.test.forceNoRebase", "") == "true") {
+      LegacyBehaviorPolicy.CORRECTED
+    } else if (lookupFileMeta(SPARK_INT96_NO_REBASE) != null) {
+      LegacyBehaviorPolicy.CORRECTED
+    } else if (lookupFileMeta(SPARK_VERSION_METADATA_KEY) != null) {
+      LegacyBehaviorPolicy.LEGACY
     } else {
-      throw new IllegalStateException("unrecognized format " + format)
+      LegacyBehaviorPolicy.withName(modeByConfig)
+    }
+  }
+
+  def newRebaseExceptionInRead(format: String): SparkUpgradeException = {
+    val config = format match {
+      case "Parquet INT96" => SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_READ.key
+      case "Parquet" => SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ.key
+      case "Avro" => SQLConf.LEGACY_AVRO_REBASE_MODE_IN_READ.key
+      case _ => throw new IllegalStateException("unrecognized format " + format)
     }
     new SparkUpgradeException("3.0", "reading dates before 1582-10-15 or timestamps before " +
       s"1900-01-01T00:00:00Z from $format files can be ambiguous, as the files may be written by " +
@@ -129,12 +142,11 @@ object DataSourceUtils {
   }
 
   def newRebaseExceptionInWrite(format: String): SparkUpgradeException = {
-    val config = if (format == "Parquet") {
-      SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key
-    } else if (format == "Avro") {
-      SQLConf.LEGACY_AVRO_REBASE_MODE_IN_WRITE.key
-    } else {
-      throw new IllegalStateException("unrecognized format " + format)
+    val config = format match {
+      case "Parquet INT96" => SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key
+      case "Parquet" => SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key
+      case "Avro" => SQLConf.LEGACY_AVRO_REBASE_MODE_IN_WRITE.key
+      case _ => throw new IllegalStateException("unrecognized format " + format)
     }
     new SparkUpgradeException("3.0", "writing dates before 1582-10-15 or timestamps before " +
       s"1900-01-01T00:00:00Z into $format files can be dangerous, as the files may be read by " +
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index 68f49f9442579..95f19f9dcee64 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -303,6 +303,9 @@ class ParquetFileFormat
       val datetimeRebaseMode = DataSourceUtils.datetimeRebaseMode(
         footerFileMetaData.getKeyValueMetaData.get,
         SQLConf.get.getConf(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ))
+      val int96RebaseMode = DataSourceUtils.int96RebaseMode(
+        footerFileMetaData.getKeyValueMetaData.get,
+        SQLConf.get.getConf(SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_READ))
 
       val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
       val hadoopAttemptContext =
@@ -318,6 +321,7 @@ class ParquetFileFormat
         val vectorizedReader = new VectorizedParquetRecordReader(
           convertTz.orNull,
           datetimeRebaseMode.toString,
+          int96RebaseMode.toString,
           enableOffHeapColumnVector && taskContext.isDefined,
           capacity)
         val iter = new RecordReaderIterator(vectorizedReader)
@@ -336,7 +340,10 @@ class ParquetFileFormat
         logDebug(s"Falling back to parquet-mr")
         // ParquetRecordReader returns InternalRow
         val readSupport = new ParquetReadSupport(
-          convertTz, enableVectorizedReader = false, datetimeRebaseMode)
+          convertTz,
+          enableVectorizedReader = false,
+          datetimeRebaseMode,
+          int96RebaseMode)
         val reader = if (pushed.isDefined && enableRecordFilter) {
           val parquetFilter = FilterCompat.get(pushed.get, null)
           new ParquetRecordReader[InternalRow](readSupport, parquetFilter)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
index a30d1c26b3b2d..e74872da0829d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
@@ -54,7 +54,8 @@ import org.apache.spark.sql.types._
 class ParquetReadSupport(
     val convertTz: Option[ZoneId],
     enableVectorizedReader: Boolean,
-    datetimeRebaseMode: LegacyBehaviorPolicy.Value)
+    datetimeRebaseMode: LegacyBehaviorPolicy.Value,
+    int96RebaseMode: LegacyBehaviorPolicy.Value)
   extends ReadSupport[InternalRow] with Logging {
   private var catalystRequestedSchema: StructType = _
 
@@ -62,7 +63,11 @@ class ParquetReadSupport(
     // We need a zero-arg constructor for SpecificParquetRecordReaderBase.  But that is only
     // used in the vectorized reader, where we get the convertTz/rebaseDateTime value directly,
     // and the values here are ignored.
-    this(None, enableVectorizedReader = true, datetimeRebaseMode = LegacyBehaviorPolicy.CORRECTED)
+    this(
+      None,
+      enableVectorizedReader = true,
+      datetimeRebaseMode = LegacyBehaviorPolicy.CORRECTED,
+      int96RebaseMode = LegacyBehaviorPolicy.LEGACY)
   }
 
   /**
@@ -131,7 +136,8 @@ class ParquetReadSupport(
       ParquetReadSupport.expandUDT(catalystRequestedSchema),
       new ParquetToSparkSchemaConverter(conf),
       convertTz,
-      datetimeRebaseMode)
+      datetimeRebaseMode,
+      int96RebaseMode)
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala
index bb528d548b6ef..80763ef019b01 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala
@@ -35,17 +35,26 @@ import org.apache.spark.sql.types.StructType
  * @param convertTz the optional time zone to convert to int96 data
  * @param datetimeRebaseMode the mode of rebasing date/timestamp from Julian to Proleptic Gregorian
  *                           calendar
+ * @param int96RebaseMode the mode of rebasing INT96 timestamp from Julian to Proleptic Gregorian
+ *                           calendar
  */
 private[parquet] class ParquetRecordMaterializer(
     parquetSchema: MessageType,
     catalystSchema: StructType,
     schemaConverter: ParquetToSparkSchemaConverter,
     convertTz: Option[ZoneId],
-    datetimeRebaseMode: LegacyBehaviorPolicy.Value)
+    datetimeRebaseMode: LegacyBehaviorPolicy.Value,
+    int96RebaseMode: LegacyBehaviorPolicy.Value)
   extends RecordMaterializer[InternalRow] {
 
   private val rootConverter = new ParquetRowConverter(
-    schemaConverter, parquetSchema, catalystSchema, convertTz, datetimeRebaseMode, NoopUpdater)
+    schemaConverter,
+    parquetSchema,
+    catalystSchema,
+    convertTz,
+    datetimeRebaseMode,
+    int96RebaseMode,
+    NoopUpdater)
 
   override def getCurrentRecord: InternalRow = rootConverter.currentRecord
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
index e0008ed16d56d..6ef56af927129 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
@@ -123,6 +123,8 @@ private[parquet] class ParquetPrimitiveConverter(val updater: ParentContainerUpd
  * @param convertTz the optional time zone to convert to int96 data
  * @param datetimeRebaseMode the mode of rebasing date/timestamp from Julian to Proleptic Gregorian
  *                           calendar
+ * @param int96RebaseMode the mode of rebasing INT96 timestamp from Julian to Proleptic Gregorian
+ *                           calendar
  * @param updater An updater which propagates converted field values to the parent container
  */
 private[parquet] class ParquetRowConverter(
@@ -131,6 +133,7 @@ private[parquet] class ParquetRowConverter(
     catalystType: StructType,
     convertTz: Option[ZoneId],
     datetimeRebaseMode: LegacyBehaviorPolicy.Value,
+    int96RebaseMode: LegacyBehaviorPolicy.Value,
     updater: ParentContainerUpdater)
   extends ParquetGroupConverter(updater) with Logging {
 
@@ -187,6 +190,9 @@ private[parquet] class ParquetRowConverter(
   private val timestampRebaseFunc = DataSourceUtils.creteTimestampRebaseFuncInRead(
     datetimeRebaseMode, "Parquet")
 
+  private val int96RebaseFunc = DataSourceUtils.creteTimestampRebaseFuncInRead(
+    int96RebaseMode, "Parquet INT96")
+
   // Converters for each field.
   private[this] val fieldConverters: Array[Converter with HasParentContainerUpdater] = {
     // (SPARK-31116) Use case insensitive map if spark.sql.caseSensitive is false
@@ -300,9 +306,10 @@ private[parquet] class ParquetRowConverter(
         new ParquetPrimitiveConverter(updater) {
           // Converts nanosecond timestamps stored as INT96
           override def addBinary(value: Binary): Unit = {
-            val rawTime = ParquetRowConverter.binaryToSQLTimestamp(value)
-            val adjTime = convertTz.map(DateTimeUtils.convertTz(rawTime, _, ZoneOffset.UTC))
-              .getOrElse(rawTime)
+            val julianMicros = ParquetRowConverter.binaryToSQLTimestamp(value)
+            val gregorianMicros = int96RebaseFunc(julianMicros)
+            val adjTime = convertTz.map(DateTimeUtils.convertTz(gregorianMicros, _, ZoneOffset.UTC))
+              .getOrElse(gregorianMicros)
             updater.setLong(adjTime)
           }
         }
@@ -363,6 +370,7 @@ private[parquet] class ParquetRowConverter(
           t,
           convertTz,
           datetimeRebaseMode,
+          int96RebaseMode,
           wrappedUpdater)
 
       case t =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala
index 6c333671d59cb..b538c2f2493d0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala
@@ -31,7 +31,7 @@ import org.apache.parquet.io.api.{Binary, RecordConsumer}
 
 import org.apache.spark.SPARK_VERSION_SHORT
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{SPARK_LEGACY_DATETIME, SPARK_VERSION_METADATA_KEY}
+import org.apache.spark.sql.{SPARK_INT96_NO_REBASE, SPARK_LEGACY_DATETIME, SPARK_VERSION_METADATA_KEY}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.SpecializedGetters
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
@@ -88,6 +88,12 @@ class ParquetWriteSupport extends WriteSupport[InternalRow] with Logging {
   private val timestampRebaseFunc = DataSourceUtils.creteTimestampRebaseFuncInWrite(
     datetimeRebaseMode, "Parquet")
 
+  private val int96RebaseMode = LegacyBehaviorPolicy.withName(
+    SQLConf.get.getConf(SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE))
+
+  private val int96RebaseFunc = DataSourceUtils.creteTimestampRebaseFuncInWrite(
+    int96RebaseMode, "Parquet INT96")
+
   override def init(configuration: Configuration): WriteContext = {
     val schemaString = configuration.get(ParquetWriteSupport.SPARK_ROW_SCHEMA)
     this.schema = StructType.fromString(schemaString)
@@ -115,6 +121,12 @@ class ParquetWriteSupport extends WriteSupport[InternalRow] with Logging {
       } else {
         None
       }
+    } ++ {
+      if (int96RebaseMode == LegacyBehaviorPolicy.LEGACY) {
+        None
+      } else {
+        Some(SPARK_INT96_NO_REBASE -> "")
+      }
     }
 
     logInfo(
@@ -193,7 +205,8 @@ class ParquetWriteSupport extends WriteSupport[InternalRow] with Logging {
         outputTimestampType match {
           case SQLConf.ParquetOutputTimestampType.INT96 =>
             (row: SpecializedGetters, ordinal: Int) =>
-              val (julianDay, timeOfDayNanos) = DateTimeUtils.toJulianDay(row.getLong(ordinal))
+              val micros = int96RebaseFunc(row.getLong(ordinal))
+              val (julianDay, timeOfDayNanos) = DateTimeUtils.toJulianDay(micros)
               val buf = ByteBuffer.wrap(timestampBuffer)
               buf.order(ByteOrder.LITTLE_ENDIAN).putLong(timeOfDayNanos).putInt(julianDay)
               recordConsumer.addBinary(Binary.fromReusedByteArray(timestampBuffer))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala
index 3b482b0c8ab62..e4d5e9b2d9f6d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala
@@ -119,6 +119,7 @@ case class ParquetPartitionReaderFactory(
       buildReaderFunc: (
         ParquetInputSplit, InternalRow, TaskAttemptContextImpl,
           Option[FilterPredicate], Option[ZoneId],
+          LegacyBehaviorPolicy.Value,
           LegacyBehaviorPolicy.Value) => RecordReader[Void, T]): RecordReader[Void, T] = {
     val conf = broadcastedConf.value.value
 
@@ -174,8 +175,17 @@ case class ParquetPartitionReaderFactory(
     val datetimeRebaseMode = DataSourceUtils.datetimeRebaseMode(
       footerFileMetaData.getKeyValueMetaData.get,
       SQLConf.get.getConf(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ))
+    val int96RebaseMode = DataSourceUtils.int96RebaseMode(
+      footerFileMetaData.getKeyValueMetaData.get,
+      SQLConf.get.getConf(SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_READ))
     val reader = buildReaderFunc(
-      split, file.partitionValues, hadoopAttemptContext, pushed, convertTz, datetimeRebaseMode)
+      split,
+      file.partitionValues,
+      hadoopAttemptContext,
+      pushed,
+      convertTz,
+      datetimeRebaseMode,
+      int96RebaseMode)
     reader.initialize(split, hadoopAttemptContext)
     reader
   }
@@ -190,12 +200,16 @@ case class ParquetPartitionReaderFactory(
       hadoopAttemptContext: TaskAttemptContextImpl,
       pushed: Option[FilterPredicate],
       convertTz: Option[ZoneId],
-      datetimeRebaseMode: LegacyBehaviorPolicy.Value): RecordReader[Void, InternalRow] = {
+      datetimeRebaseMode: LegacyBehaviorPolicy.Value,
+      int96RebaseMode: LegacyBehaviorPolicy.Value): RecordReader[Void, InternalRow] = {
     logDebug(s"Falling back to parquet-mr")
     val taskContext = Option(TaskContext.get())
     // ParquetRecordReader returns InternalRow
     val readSupport = new ParquetReadSupport(
-      convertTz, enableVectorizedReader = false, datetimeRebaseMode)
+      convertTz,
+      enableVectorizedReader = false,
+      datetimeRebaseMode,
+      int96RebaseMode)
     val reader = if (pushed.isDefined && enableRecordFilter) {
       val parquetFilter = FilterCompat.get(pushed.get, null)
       new ParquetRecordReader[InternalRow](readSupport, parquetFilter)
@@ -221,11 +235,13 @@ case class ParquetPartitionReaderFactory(
       hadoopAttemptContext: TaskAttemptContextImpl,
       pushed: Option[FilterPredicate],
       convertTz: Option[ZoneId],
-      datetimeRebaseMode: LegacyBehaviorPolicy.Value): VectorizedParquetRecordReader = {
+      datetimeRebaseMode: LegacyBehaviorPolicy.Value,
+      int96RebaseMode: LegacyBehaviorPolicy.Value): VectorizedParquetRecordReader = {
     val taskContext = Option(TaskContext.get())
     val vectorizedReader = new VectorizedParquetRecordReader(
       convertTz.orNull,
       datetimeRebaseMode.toString,
+      int96RebaseMode.toString,
       enableOffHeapColumnVector && taskContext.isDefined,
       capacity)
     val iter = new RecordReaderIterator(vectorizedReader)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/package.scala
index c0397010acba3..011be6d69c576 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/package.scala
@@ -60,4 +60,10 @@ package object sql {
    * values.
    */
   private[sql] val SPARK_LEGACY_DATETIME = "org.apache.spark.legacyDateTime"
+
+  /**
+   * Parquet file metadata key to indicate that the file with INT96 column type was written
+   * without rebasing.
+   */
+  private[sql] val SPARK_INT96_NO_REBASE = "org.apache.spark.int96NoRebase"
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index ff406f7bc62de..214f36a2df713 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -951,7 +951,9 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
         rowFunc: Int => (String, String),
         toJavaType: String => T,
         checkDefaultLegacyRead: String => Unit,
-        tsOutputType: String = "TIMESTAMP_MICROS"): Unit = {
+        tsOutputType: String = "TIMESTAMP_MICROS",
+        inWriteConf: String = SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key,
+        inReadConf: String = SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ.key): Unit = {
       withTempPaths(2) { paths =>
         paths.foreach(_.delete())
         val path2_4 = getResourceParquetFilePath("test-data/" + fileName)
@@ -962,18 +964,20 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
         withSQLConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key -> tsOutputType) {
           checkDefaultLegacyRead(path2_4)
           // By default we should fail to write ancient datetime values.
-          val e = intercept[SparkException](df.write.parquet(path3_0))
-          assert(e.getCause.getCause.getCause.isInstanceOf[SparkUpgradeException])
-          withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> CORRECTED.toString) {
+          if (tsOutputType != "INT96") {
+            val e = intercept[SparkException](df.write.parquet(path3_0))
+            assert(e.getCause.getCause.getCause.isInstanceOf[SparkUpgradeException])
+          }
+          withSQLConf(inWriteConf -> CORRECTED.toString) {
             df.write.mode("overwrite").parquet(path3_0)
           }
-          withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> LEGACY.toString) {
+          withSQLConf(inWriteConf -> LEGACY.toString) {
             df.write.parquet(path3_0_rebase)
           }
         }
         // For Parquet files written by Spark 3.0, we know the writer info and don't need the
         // config to guide the rebase behavior.
-        withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ.key -> LEGACY.toString) {
+        withSQLConf(inReadConf -> LEGACY.toString) {
           checkAnswer(
             spark.read.format("parquet").load(path2_4, path3_0, path3_0_rebase),
             (0 until N).flatMap { i =>
@@ -1015,15 +1019,22 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
           java.sql.Timestamp.valueOf,
           checkDefaultRead,
           tsOutputType = "TIMESTAMP_MILLIS")
-        // INT96 is a legacy timestamp format and we always rebase the seconds for it.
+      }
+    }
+    Seq(
+      "2_4_5" -> successInRead _,
+      "2_4_6" -> successInRead _).foreach { case (version, checkDefaultRead) =>
+      withAllParquetReaders {
         Seq("plain", "dict").foreach { enc =>
-          checkAnswer(readResourceParquetFile(
-            s"test-data/before_1582_timestamp_int96_${enc}_v$version.snappy.parquet"),
-            Seq.tabulate(N) { i =>
-              Row(
-                java.sql.Timestamp.valueOf("1001-01-01 01:02:03.123456"),
-                java.sql.Timestamp.valueOf(s"1001-01-0${i + 1} 01:02:03.123456"))
-            })
+          checkReadMixedFiles(
+            s"before_1582_timestamp_int96_${enc}_v$version.snappy.parquet",
+            "timestamp",
+            (i: Int) => ("1001-01-01 01:02:03.123456", s"1001-01-0${i + 1} 01:02:03.123456"),
+            java.sql.Timestamp.valueOf,
+            checkDefaultRead,
+            tsOutputType = "INT96",
+            inWriteConf = SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key,
+            inReadConf = SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_READ.key)
         }
       }
     }
@@ -1033,15 +1044,31 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
     val N = 8
     Seq(false, true).foreach { dictionaryEncoding =>
       Seq(
-        ("TIMESTAMP_MILLIS", "1001-01-01 01:02:03.123", "1001-01-07 01:09:05.123"),
-        ("TIMESTAMP_MICROS", "1001-01-01 01:02:03.123456", "1001-01-07 01:09:05.123456"),
-        ("INT96", "1001-01-01 01:02:03.123456", "1001-01-01 01:02:03.123456")
-      ).foreach { case (outType, tsStr, nonRebased) =>
+        (
+          "TIMESTAMP_MILLIS",
+          "1001-01-01 01:02:03.123",
+          "1001-01-07 01:09:05.123",
+          SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key,
+          SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ.key),
+        (
+          "TIMESTAMP_MICROS",
+          "1001-01-01 01:02:03.123456",
+          "1001-01-07 01:09:05.123456",
+          SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key,
+          SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ.key),
+        (
+          "INT96",
+          "1001-01-01 01:02:03.123456",
+          "1001-01-07 01:09:05.123456",
+          SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key,
+          SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_READ.key
+        )
+      ).foreach { case (outType, tsStr, nonRebased, inWriteConf, inReadConf) =>
         withClue(s"output type $outType") {
           withSQLConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key -> outType) {
             withTempPath { dir =>
               val path = dir.getAbsolutePath
-              withSQLConf(SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> LEGACY.toString) {
+              withSQLConf(inWriteConf -> LEGACY.toString) {
                 Seq.tabulate(N)(_ => tsStr).toDF("tsS")
                   .select($"tsS".cast("timestamp").as("ts"))
                   .repartition(1)
@@ -1054,8 +1081,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
                 // The file metadata indicates if it needs rebase or not, so we can always get the
                 // correct result regardless of the "rebase mode" config.
                 Seq(LEGACY, CORRECTED, EXCEPTION).foreach { mode =>
-                  withSQLConf(
-                    SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_READ.key -> mode.toString) {
+                  withSQLConf(inReadConf -> mode.toString) {
                     checkAnswer(
                       spark.read.parquet(path),
                       Seq.tabulate(N)(_ => Row(Timestamp.valueOf(tsStr))))
@@ -1136,6 +1162,30 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
       }
     }
   }
+
+  test("SPARK-33160: write the metadata key 'org.apache.spark.int96NoRebase'") {
+    def saveTs(dir: java.io.File): Unit = {
+      Seq(Timestamp.valueOf("1000-01-01 01:02:03")).toDF()
+        .repartition(1)
+        .write
+        .parquet(dir.getAbsolutePath)
+    }
+    withSQLConf(SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key -> LEGACY.toString) {
+      withTempPath { dir =>
+        saveTs(dir)
+        assert(getMetaData(dir).get(SPARK_INT96_NO_REBASE).isEmpty)
+      }
+    }
+    withSQLConf(SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key -> CORRECTED.toString) {
+      withTempPath { dir =>
+        saveTs(dir)
+        assert(getMetaData(dir)(SPARK_INT96_NO_REBASE) === "")
+      }
+    }
+    withSQLConf(SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key -> EXCEPTION.toString) {
+      withTempPath { dir => intercept[SparkException] { saveTs(dir) } }
+    }
+  }
 }
 
 class JobCommitFailureParquetOutputCommitter(outputPath: Path, context: TaskAttemptContext)

From fbb68436203627186e4070cac674707283c9dcc2 Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Tue, 20 Oct 2020 15:14:38 +0900
Subject: [PATCH 0285/1009] [SPARK-32229][SQL] Fix PostgresConnectionProvider
 and MSSQLConnectionProvider by accessing wrapped driver

### What changes were proposed in this pull request?
Postgres and MSSQL connection providers are not able to get custom `appEntry` because under some circumstances the driver is wrapped with `DriverWrapper`. Such case is not handled in the mentioned providers. In this PR I've added this edge case handling by passing unwrapped `Driver` from `JdbcUtils`.

### Why are the changes needed?
`DriverWrapper` is not considered.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Existing + additional unit tests.

Closes #30024 from gaborgsomogyi/SPARK-32229.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../datasources/jdbc/DriverRegistry.scala     | 11 +++++++
 .../datasources/jdbc/JdbcUtils.scala          | 12 ++-----
 .../jdbc/DriverRegistrySuite.scala            | 29 ++++++++++++++++
 .../jdbc/connection/TestDriver.scala          | 33 +++++++++++++++++++
 4 files changed, 75 insertions(+), 10 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/DriverRegistrySuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/TestDriver.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/DriverRegistry.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/DriverRegistry.scala
index 530d836d9fde3..3444d03beff5d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/DriverRegistry.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/DriverRegistry.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.datasources.jdbc
 
 import java.sql.{Driver, DriverManager}
 
+import scala.collection.JavaConverters._
 import scala.collection.mutable
 
 import org.apache.spark.internal.Logging
@@ -58,5 +59,15 @@ object DriverRegistry extends Logging {
       }
     }
   }
+
+  def get(className: String): Driver = {
+    DriverManager.getDrivers.asScala.collectFirst {
+      case d: DriverWrapper if d.wrapped.getClass.getCanonicalName == className => d.wrapped
+      case d if d.getClass.getCanonicalName == className => d
+    }.getOrElse {
+      throw new IllegalStateException(
+        s"Did not find registered driver with class $className")
+    }
+  }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index 202f2e03b68d8..24e380e3be3e1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -17,10 +17,9 @@
 
 package org.apache.spark.sql.execution.datasources.jdbc
 
-import java.sql.{Connection, Driver, DriverManager, JDBCType, PreparedStatement, ResultSet, ResultSetMetaData, SQLException, SQLFeatureNotSupportedException}
+import java.sql.{Connection, Driver, JDBCType, PreparedStatement, ResultSet, ResultSetMetaData, SQLException, SQLFeatureNotSupportedException}
 import java.util.Locale
 
-import scala.collection.JavaConverters._
 import scala.util.Try
 import scala.util.control.NonFatal
 
@@ -56,17 +55,10 @@ object JdbcUtils extends Logging {
     val driverClass: String = options.driverClass
     () => {
       DriverRegistry.register(driverClass)
-      val driver: Driver = DriverManager.getDrivers.asScala.collectFirst {
-        case d: DriverWrapper if d.wrapped.getClass.getCanonicalName == driverClass => d
-        case d if d.getClass.getCanonicalName == driverClass => d
-      }.getOrElse {
-        throw new IllegalStateException(
-          s"Did not find registered driver with class $driverClass")
-      }
+      val driver: Driver = DriverRegistry.get(driverClass)
       val connection = ConnectionProvider.create(driver, options.parameters)
       require(connection != null,
         s"The driver could not open a JDBC connection. Check the URL: ${options.url}")
-
       connection
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/DriverRegistrySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/DriverRegistrySuite.scala
new file mode 100644
index 0000000000000..51dbdacb5e0fe
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/DriverRegistrySuite.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.jdbc
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.execution.datasources.jdbc.connection.TestDriver
+
+class DriverRegistrySuite extends SparkFunSuite {
+  test("SPARK-32229: get must give back wrapped driver if wrapped") {
+    val className = classOf[TestDriver].getName
+    DriverRegistry.register(className)
+    assert(DriverRegistry.get(className).isInstanceOf[TestDriver])
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/TestDriver.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/TestDriver.scala
new file mode 100644
index 0000000000000..6b57a95ed458b
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/TestDriver.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.jdbc.connection
+
+import java.sql.{Connection, Driver, DriverPropertyInfo}
+import java.util.Properties
+import java.util.logging.Logger
+
+private[jdbc] class TestDriver() extends Driver {
+  override def connect(url: String, info: Properties): Connection = null
+  override def acceptsURL(url: String): Boolean = false
+  override def getPropertyInfo(url: String, info: Properties): Array[DriverPropertyInfo] =
+    Array.empty
+  override def getMajorVersion: Int = 0
+  override def getMinorVersion: Int = 0
+  override def jdbcCompliant(): Boolean = false
+  override def getParentLogger: Logger = null
+}

From eb9966b70055a67dd02451c78ec205d913a38a42 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Tue, 20 Oct 2020 17:35:09 +0900
Subject: [PATCH 0286/1009] [SPARK-33190][INFRA][TESTS] Set upper bound of
 PyArrow version in GitHub Actions

### What changes were proposed in this pull request?

PyArrow is uploaded into PyPI today (https://pypi.org/project/pyarrow/), and some tests fail with PyArrow 2.0.0+:

```
======================================================================
ERROR [0.774s]: test_grouped_over_window_with_key (pyspark.sql.tests.test_pandas_grouped_map.GroupedMapInPandasTests)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/__w/spark/spark/python/pyspark/sql/tests/test_pandas_grouped_map.py", line 595, in test_grouped_over_window_with_key
    .select('id', 'result').collect()
  File "/__w/spark/spark/python/pyspark/sql/dataframe.py", line 588, in collect
    sock_info = self._jdf.collectToPython()
  File "/__w/spark/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1305, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "/__w/spark/spark/python/pyspark/sql/utils.py", line 117, in deco
    raise converted from None
pyspark.sql.utils.PythonException:
  An exception was thrown from the Python worker. Please see the stack trace below.
Traceback (most recent call last):
  File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 601, in main
    process()
  File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 593, in process
    serializer.dump_stream(out_iter, outfile)
  File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", line 255, in dump_stream
    return ArrowStreamSerializer.dump_stream(self, init_stream_yield_batches(), stream)
  File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", line 81, in dump_stream
    for batch in iterator:
  File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", line 248, in init_stream_yield_batches
    for series in iterator:
  File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 426, in mapper
    return f(keys, vals)
  File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 170, in <lambda>
    return lambda k, v: [(wrapped(k, v), to_arrow_type(return_type))]
  File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 158, in wrapped
    result = f(key, pd.concat(value_series, axis=1))
  File "/__w/spark/spark/python/lib/pyspark.zip/pyspark/util.py", line 68, in wrapper
    return f(*args, **kwargs)
  File "/__w/spark/spark/python/pyspark/sql/tests/test_pandas_grouped_map.py", line 590, in f
    "{} != {}".format(expected_key[i][1], window_range)
AssertionError: {'start': datetime.datetime(2018, 3, 15, 0, 0), 'end': datetime.datetime(2018, 3, 20, 0, 0)} != {'start': datetime.datetime(2018, 3, 15, 0, 0, tzinfo=<StaticTzInfo 'Etc/UTC'>), 'end': datetime.datetime(2018, 3, 20, 0, 0, tzinfo=<StaticTzInfo 'Etc/UTC'>)}
```

https://github.com/apache/spark/runs/1278917457

This PR proposes to set the upper bound of PyArrow in GitHub Actions build. This should be removed when we properly support PyArrow 2.0.0+ (SPARK-33189).

### Why are the changes needed?

To make build pass.

### Does this PR introduce _any_ user-facing change?

No, dev-only.

### How was this patch tested?

GitHub Actions in this build will test it out.

Closes #30098 from HyukjinKwon/hot-fix-test.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .github/workflows/build_and_test.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 762e22f24cc2b..a1c99fd21a466 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -136,7 +136,7 @@ jobs:
     - name: Install Python packages (Python 3.8)
       if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
       run: |
-        python3.8 -m pip install numpy pyarrow pandas scipy xmlrunner
+        python3.8 -m pip install numpy 'pyarrow<2.0.0' pandas scipy xmlrunner
         python3.8 -m pip list
     # SparkR
     - name: Install R 4.0
@@ -239,7 +239,7 @@ jobs:
     # Ubuntu 20.04. See also SPARK-33162.
     - name: Install Python packages (Python 3.6)
       run: |
-        python3.6 -m pip install numpy pyarrow pandas scipy xmlrunner
+        python3.6 -m pip install numpy 'pyarrow<2.0.0' pandas scipy xmlrunner
         python3.6 -m pip list
     # Run the tests.
     - name: Run tests

From 2cfd215dc4fb1ff6865644fec8284ba93dcddd5c Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Wed, 21 Oct 2020 00:31:58 +0900
Subject: [PATCH 0287/1009] [SPARK-33191][YARN][TESTS] Fix PySpark test cases
 in YarnClusterSuite

### What changes were proposed in this pull request?

This PR proposes to fix:

```
org.apache.spark.deploy.yarn.YarnClusterSuite.run Python application in yarn-client mode
org.apache.spark.deploy.yarn.YarnClusterSuite.run Python application in yarn-cluster mode
org.apache.spark.deploy.yarn.YarnClusterSuite.run Python application in yarn-cluster mode using spark.yarn.appMasterEnv to override local envvar
```

it currently fails as below:

```
20/10/16 19:20:36 WARN TaskSetManager: Lost task 0.0 in stage 0.0 (TID 0) (amp-jenkins-worker-03.amp executor 1): org.apache.spark.SparkException:
Error from python worker:
  Traceback (most recent call last):
    File "/usr/lib64/python2.6/runpy.py", line 104, in _run_module_as_main
      loader, code, fname = _get_module_details(mod_name)
    File "/usr/lib64/python2.6/runpy.py", line 79, in _get_module_details
      loader = get_loader(mod_name)
    File "/usr/lib64/python2.6/pkgutil.py", line 456, in get_loader
      return find_loader(fullname)
    File "/usr/lib64/python2.6/pkgutil.py", line 466, in find_loader
      for importer in iter_importers(fullname):
    File "/usr/lib64/python2.6/pkgutil.py", line 422, in iter_importers
      __import__(pkg)
    File "/home/jenkins/workspace/SparkPullRequestBuilder2/python/pyspark/__init__.py", line 53, in <module>
      from pyspark.rdd import RDD, RDDBarrier
    File "/home/jenkins/workspace/SparkPullRequestBuilder2/python/pyspark/rdd.py", line 34, in <module>
      from pyspark.java_gateway import local_connect_and_auth
    File "/home/jenkins/workspace/SparkPullRequestBuilder2/python/pyspark/java_gateway.py", line 29, in <module>
      from py4j.java_gateway import java_import, JavaGateway, JavaObject, GatewayParameters
    File "/home/jenkins/workspace/SparkPullRequestBuilder2/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 60
      PY4J_TRUE = {"yes", "y", "t", "true"}
                        ^
  SyntaxError: invalid syntax
```

I think this was broken when Python 2 was dropped but was not caught because this specific test does not run when there's no change in YARN codes. See also https://github.com/apache/spark/pull/29843#issuecomment-712540024

The root cause seems like the paths are different, see https://github.com/apache/spark/pull/29843#pullrequestreview-502595199. I _think_ Jenkins uses a different Python executable via Anaconda and the executor side does not know where it is for some reasons.

This PR proposes to fix it just by explicitly specifying the absolute path for Python executable so the tests should pass in any environment.

### Why are the changes needed?

To make tests pass.

### Does this PR introduce _any_ user-facing change?

No, dev-only.

### How was this patch tested?

This issue looks specific to Jenkins. It should run the tests on Jenkins.

Closes #30099 from HyukjinKwon/SPARK-33191.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../scala/org/apache/spark/TestUtils.scala    | 22 ++++++++++++++++++-
 .../spark/deploy/yarn/YarnClusterSuite.scala  | 16 +++++++++++---
 2 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala
index bc3644df0ebb5..9632d6c691085 100644
--- a/core/src/main/scala/org/apache/spark/TestUtils.scala
+++ b/core/src/main/scala/org/apache/spark/TestUtils.scala
@@ -20,13 +20,14 @@ package org.apache.spark
 import java.io.{ByteArrayInputStream, File, FileInputStream, FileOutputStream}
 import java.net.{HttpURLConnection, URI, URL}
 import java.nio.charset.StandardCharsets
-import java.nio.file.{Files => JavaFiles}
+import java.nio.file.{Files => JavaFiles, Paths}
 import java.nio.file.attribute.PosixFilePermission.{OWNER_EXECUTE, OWNER_READ, OWNER_WRITE}
 import java.security.SecureRandom
 import java.security.cert.X509Certificate
 import java.util.{Arrays, EnumSet, Locale, Properties}
 import java.util.concurrent.{TimeoutException, TimeUnit}
 import java.util.jar.{JarEntry, JarOutputStream, Manifest}
+import java.util.regex.Pattern
 import javax.net.ssl._
 import javax.tools.{JavaFileObject, SimpleJavaFileObject, ToolProvider}
 
@@ -37,6 +38,7 @@ import scala.sys.process.{Process, ProcessLogger}
 import scala.util.Try
 
 import com.google.common.io.{ByteStreams, Files}
+import org.apache.commons.lang3.StringUtils
 import org.apache.log4j.PropertyConfigurator
 import org.json4s.JsonAST.JValue
 import org.json4s.jackson.JsonMethods.{compact, render}
@@ -268,6 +270,24 @@ private[spark] object TestUtils {
     attempt.isSuccess && attempt.get == 0
   }
 
+  /**
+   * Get the absolute path from the executable. This implementation was borrowed from
+   * `spark/dev/sparktestsupport/shellutils.py`.
+   */
+  def getAbsolutePathFromExecutable(executable: String): Option[String] = {
+    val command = if (Utils.isWindows) s"$executable.exe" else executable
+    if (command.split(File.separator, 2).length == 1 &&
+        JavaFiles.isRegularFile(Paths.get(command)) &&
+        JavaFiles.isExecutable(Paths.get(command))) {
+      Some(Paths.get(command).toAbsolutePath.toString)
+    } else {
+      sys.env("PATH").split(Pattern.quote(File.pathSeparator))
+        .map(path => Paths.get(s"${StringUtils.strip(path, "\"")}${File.separator}$command"))
+        .find(p => JavaFiles.isRegularFile(p) && JavaFiles.isExecutable(p))
+        .map(_.toString)
+    }
+  }
+
   /**
    * Returns the response code from an HTTP(S) URL.
    */
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index 14438bc141056..cf754cca315f0 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -52,6 +52,13 @@ import org.apache.spark.util.{Utils, YarnContainerInfoHelper}
 @ExtendedYarnTest
 class YarnClusterSuite extends BaseYarnClusterSuite {
 
+  private val pythonExecutablePath = {
+    // To make sure to use the same Python executable.
+    val maybePath = TestUtils.getAbsolutePathFromExecutable("python3")
+    assert(maybePath.isDefined)
+    maybePath.get
+  }
+
   override def newYarnConfig(): YarnConfiguration = new YarnConfiguration()
 
   private val TEST_PYFILE = """
@@ -175,9 +182,9 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
       clientMode = false,
       extraConf = Map(
         "spark.yarn.appMasterEnv.PYSPARK_DRIVER_PYTHON"
-          -> sys.env.getOrElse("PYSPARK_DRIVER_PYTHON", "python"),
+          -> sys.env.getOrElse("PYSPARK_DRIVER_PYTHON", pythonExecutablePath),
         "spark.yarn.appMasterEnv.PYSPARK_PYTHON"
-          -> sys.env.getOrElse("PYSPARK_PYTHON", "python")),
+          -> sys.env.getOrElse("PYSPARK_PYTHON", pythonExecutablePath)),
       extraEnv = Map(
         "PYSPARK_DRIVER_PYTHON" -> "not python",
         "PYSPARK_PYTHON" -> "not python"))
@@ -275,7 +282,10 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
         s"$sparkHome/python")
     val extraEnvVars = Map(
       "PYSPARK_ARCHIVES_PATH" -> pythonPath.map("local:" + _).mkString(File.pathSeparator),
-      "PYTHONPATH" -> pythonPath.mkString(File.pathSeparator)) ++ extraEnv
+      "PYTHONPATH" -> pythonPath.mkString(File.pathSeparator),
+      "PYSPARK_DRIVER_PYTHON" -> pythonExecutablePath,
+      "PYSPARK_PYTHON" -> pythonExecutablePath
+    ) ++ extraEnv
 
     val moduleDir = {
       val subdir = new File(tempDir, "pyModules")

From 46ad325e56abd95c0ffdbe64aad78582da8c725d Mon Sep 17 00:00:00 2001
From: Keiji Yoshida <kjmrknsn@gmail.com>
Date: Wed, 21 Oct 2020 00:36:45 +0900
Subject: [PATCH 0288/1009] [MINOR][DOCS] Fix the description about to_avro and
 from_avro functions

### What changes were proposed in this pull request?
This pull request changes the description about `to_avro` and `from_avro` functions to include Python as a supported language as the functions have been supported in Python since Apache Spark 3.0.0 [[SPARK-26856](https://issues.apache.org/jira/browse/SPARK-26856)].

### Why are the changes needed?
Same as above.

### Does this PR introduce _any_ user-facing change?
Yes. The description changed by this pull request is on https://spark.apache.org/docs/latest/sql-data-sources-avro.html#to_avro-and-from_avro.

### How was this patch tested?
Tested manually by building and checking the document in the local environment.

Closes #30105 from kjmrknsn/fix-docs-sql-data-sources-avro.

Authored-by: Keiji Yoshida <kjmrknsn@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/sql-data-sources-avro.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/sql-data-sources-avro.md b/docs/sql-data-sources-avro.md
index d926ae7703268..69b165ed28bae 100644
--- a/docs/sql-data-sources-avro.md
+++ b/docs/sql-data-sources-avro.md
@@ -88,7 +88,7 @@ Kafka key-value record will be augmented with some metadata, such as the ingesti
 * If the "value" field that contains your data is in Avro, you could use `from_avro()` to extract your data, enrich it, clean it, and then push it downstream to Kafka again or write it out to a file.
 * `to_avro()` can be used to turn structs into Avro records. This method is particularly useful when you would like to re-encode multiple columns into a single one when writing data out to Kafka.
 
-Both functions are currently only available in Scala and Java.
+Both functions are currently only available in Scala, Java, and Python.
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">

From c824db2d8b154acf51637844f5f268e988bd0081 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 20 Oct 2020 14:55:08 -0700
Subject: [PATCH 0289/1009] [MINOR][CORE] Improve log message during storage
 decommission

### What changes were proposed in this pull request?

This PR aims to improve the log message for better analysis.

### Why are the changes needed?

Good logs are crucial always.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manual review.

Closes #30109 from dongjoon-hyun/k8s_log.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/storage/BlockManagerDecommissioner.scala       | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
index 3377b357a9231..66df72921acb2 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
@@ -83,9 +83,10 @@ private[storage] class BlockManagerDecommissioner(
               Thread.sleep(SLEEP_TIME_SECS * 1000L)
             case Some((shuffleBlockInfo, retryCount)) =>
               if (retryCount < maxReplicationFailuresForDecommission) {
-                logDebug(s"Trying to migrate shuffle ${shuffleBlockInfo} to ${peer}")
+                logInfo(s"Trying to migrate shuffle ${shuffleBlockInfo} to ${peer} " +
+                  "($retryCount / $maxReplicationFailuresForDecommission)")
                 val blocks = bm.migratableResolver.getMigrationBlocks(shuffleBlockInfo)
-                logDebug(s"Got migration sub-blocks ${blocks}")
+                logInfo(s"Got migration sub-blocks ${blocks}")
 
                 // Migrate the components of the blocks.
                 try {
@@ -101,7 +102,7 @@ private[storage] class BlockManagerDecommissioner(
                       null)// class tag, we don't need for shuffle
                     logDebug(s"Migrated sub block ${blockId}")
                   }
-                  logDebug(s"Migrated ${shuffleBlockInfo} to ${peer}")
+                  logInfo(s"Migrated ${shuffleBlockInfo} to ${peer}")
                 } catch {
                   case e: IOException =>
                     // If a block got deleted before netty opened the file handle, then trying to
@@ -244,6 +245,8 @@ private[storage] class BlockManagerDecommissioner(
     val newShufflesToMigrate = (localShuffles.diff(migratingShuffles)).toSeq
     shufflesToMigrate.addAll(newShufflesToMigrate.map(x => (x, 0)).asJava)
     migratingShuffles ++= newShufflesToMigrate
+    logInfo(s"${newShufflesToMigrate.size} of ${localShuffles.size} local shuffles " +
+      "are added. In total, ${migratingShuffles.size} shuffles are remained.")
 
     // Update the threads doing migrations
     val livePeerSet = bm.getPeers(false).toSet

From 385d5db9413a7f23c8a4c2d802541e88ce3a4633 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 20 Oct 2020 15:02:36 -0700
Subject: [PATCH 0290/1009] [SPARK-33198][CORE] getMigrationBlocks should not
 fail at missing files

### What changes were proposed in this pull request?

This PR aims to fix `getMigrationBlocks` error handling and to add test coverage.
1. `getMigrationBlocks` should not fail at indexFile only case.
2. `assert` causes `java.lang.AssertionError` which is not an `Exception`.

### Why are the changes needed?

To handle the exception correctly.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CI with the newly added test case.

Closes #30110 from dongjoon-hyun/SPARK-33198.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../shuffle/IndexShuffleBlockResolver.scala   | 19 ++++++++++++-------
 .../sort/IndexShuffleBlockResolverSuite.scala |  5 +++++
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
index 9496918760298..525b8fd3f6923 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
@@ -241,14 +241,19 @@ private[spark] class IndexShuffleBlockResolver(
       val dataBlockData = new FileSegmentManagedBuffer(
         transportConf, dataFile, 0, dataFile.length())
 
-      // Make sure the files exist
-      assert(indexFile.exists() && dataFile.exists())
-
-      List((indexBlockId, indexBlockData), (dataBlockId, dataBlockData))
+      // Make sure the index exist.
+      if (!indexFile.exists()) {
+        throw new FileNotFoundException("Index file is deleted already.")
+      }
+      if (dataFile.exists()) {
+        List((indexBlockId, indexBlockData), (dataBlockId, dataBlockData))
+      } else {
+        List((indexBlockId, indexBlockData))
+      }
     } catch {
-      case e: Exception => // If we can't load the blocks ignore them.
-        logWarning(s"Failed to resolve shuffle block ${shuffleBlockInfo}, skipping migration" +
-          "this is expected to occure if a block is removed after decommissioning has started.")
+      case _: Exception => // If we can't load the blocks ignore them.
+        logWarning(s"Failed to resolve shuffle block ${shuffleBlockInfo}, skipping migration. " +
+          "This is expected to occur if a block is removed after decommissioning has started.")
         List.empty[(BlockId, ManagedBuffer)]
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala
index 725a1d90557a2..91260d01eb8b6 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala
@@ -156,4 +156,9 @@ class IndexShuffleBlockResolverSuite extends SparkFunSuite with BeforeAndAfterEa
       indexIn2.close()
     }
   }
+
+  test("SPARK-33198 getMigrationBlocks should not fail at missing files") {
+    val resolver = new IndexShuffleBlockResolver(conf, blockManager)
+    assert(resolver.getMigrationBlocks(ShuffleBlockInfo(Int.MaxValue, Long.MaxValue)).isEmpty)
+  }
 }

From 47a6568265525002021c1e5cfa4330f5b1a91469 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Wed, 21 Oct 2020 09:13:33 +0900
Subject: [PATCH 0291/1009] [SPARK-33189][PYTHON][TESTS] Add env var to tests
 for legacy nested timestamps in pyarrow

### What changes were proposed in this pull request?

Add an environment variable `PYARROW_IGNORE_TIMEZONE` to pyspark tests in run-tests.py to use legacy nested timestamp behavior. This means that when converting arrow to pandas, nested timestamps with timezones will have the timezone localized during conversion.

### Why are the changes needed?

The default behavior was changed in PyArrow 2.0.0 to propagate timezone information. Using the environment variable enables testing with newer versions of pyarrow until the issue can be fixed in SPARK-32285.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing tests

Closes #30111 from BryanCutler/arrow-enable-legacy-nested-timestamps-SPARK-33189.

Authored-by: Bryan Cutler <cutlerb@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .github/workflows/build_and_test.yml | 4 ++--
 python/run-tests.py                  | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index a1c99fd21a466..27607a799d038 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -136,7 +136,7 @@ jobs:
     - name: Install Python packages (Python 3.8)
       if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
       run: |
-        python3.8 -m pip install numpy 'pyarrow<2.0.0' pandas scipy xmlrunner
+        python3.8 -m pip install numpy 'pyarrow<3.0.0' pandas scipy xmlrunner
         python3.8 -m pip list
     # SparkR
     - name: Install R 4.0
@@ -239,7 +239,7 @@ jobs:
     # Ubuntu 20.04. See also SPARK-33162.
     - name: Install Python packages (Python 3.6)
       run: |
-        python3.6 -m pip install numpy 'pyarrow<2.0.0' pandas scipy xmlrunner
+        python3.6 -m pip install numpy 'pyarrow<3.0.0' pandas scipy xmlrunner
         python3.6 -m pip list
     # Run the tests.
     - name: Run tests
diff --git a/python/run-tests.py b/python/run-tests.py
index ad2e90edad2bc..712f38fb81b83 100755
--- a/python/run-tests.py
+++ b/python/run-tests.py
@@ -72,6 +72,8 @@ def run_individual_python_test(target_dir, test_name, pyspark_python):
         'SPARK_PREPEND_CLASSES': '1',
         'PYSPARK_PYTHON': which(pyspark_python),
         'PYSPARK_DRIVER_PYTHON': which(pyspark_python),
+        # Preserve legacy nested timezone behavior for pyarrow>=2, remove after SPARK-32285
+        'PYARROW_IGNORE_TIMEZONE': '1',
     })
 
     # Create a unique temp directory under 'target/' for each run. The TMPDIR variable is

From dcb08204339e2291727be8e1a206e272652f9ae4 Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Wed, 21 Oct 2020 15:51:16 +0900
Subject: [PATCH 0292/1009] [SPARK-32785][SQL][DOCS][FOLLOWUP] Update
 migaration guide for incomplete interval literals
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

Address comments  https://github.com/apache/spark/pull/29635#discussion_r507241899 to improve migration guide

### Why are the changes needed?

improve migration guide

### Does this PR introduce _any_ user-facing change?

NO，only doc update

### How was this patch tested?

passing GitHub action

Closes #30113 from yaooqinn/SPARK-32785-F.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 docs/sql-migration-guide.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index cc69e78108ffd..5612e4f1453f1 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -44,7 +44,7 @@ license: |
 
   - In Spark 3.1, `path` option cannot coexist when the following methods are called with path parameter(s): `DataFrameReader.load()`, `DataFrameWriter.save()`, `DataStreamReader.load()`, or `DataStreamWriter.start()`. In addition, `paths` option cannot coexist for `DataFrameReader.load()`. For example, `spark.read.format("csv").option("path", "/tmp").load("/tmp2")` or `spark.read.option("path", "/tmp").csv("/tmp2")` will throw `org.apache.spark.sql.AnalysisException`. In Spark version 3.0 and below, `path` option is overwritten if one path parameter is passed to above methods; `path` option is added to the overall paths if multiple path parameters are passed to `DataFrameReader.load()`. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.pathOptionBehavior.enabled` to `true`.
 
-  - In Spark 3.1, incomplete interval literals, e.g. `INTERVAL '1'`, `INTERVAL '1 DAY 2'` will fail with IllegalArgumentException. In Spark 3.0, they result `NULL`s.
+  - In Spark 3.1, `IllegalArgumentException` is returned for the incomplete interval literals, e.g. `INTERVAL '1'`, `INTERVAL '1 DAY 2'`, which are invalid. In Spark 3.0, these literals result in `NULL`s.
 
   - In Spark 3.1, we remove the built-in Hive 1.2. You need to migrate your custom SerDes to Hive 2.3. See [HIVE-15167](https://issues.apache.org/jira/browse/HIVE-15167) for more details.
 

From 618695b78fe93ae6506650ecfbebe807a43c5f0c Mon Sep 17 00:00:00 2001
From: zhengruifeng <ruifengz@foxmail.com>
Date: Wed, 21 Oct 2020 08:49:25 -0500
Subject: [PATCH 0293/1009] [SPARK-33111][ML][FOLLOW-UP] aft transform
 optimization - predictQuantiles

### What changes were proposed in this pull request?
1, optimize `predictQuantiles` by pre-computing an auxiliary var.

### Why are the changes needed?
In https://github.com/apache/spark/pull/30000, I optimized the `transform` method. I find that we can also optimize `predictQuantiles` by pre-computing an auxiliary var.

It is about 56% faster than existing impl.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
existing testsuites

Closes #30034 from zhengruifeng/aft_quantiles_opt.

Authored-by: zhengruifeng <ruifengz@foxmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../ml/regression/AFTSurvivalRegression.scala | 42 +++++++++++--------
 .../AFTSurvivalRegressionSuite.scala          |  2 +-
 2 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
index 595a2f0e742df..3870a71a91a20 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
@@ -383,22 +383,32 @@ class AFTSurvivalRegressionModel private[ml] (
 
   /** @group setParam */
   @Since("1.6.0")
-  def setQuantileProbabilities(value: Array[Double]): this.type = set(quantileProbabilities, value)
+  def setQuantileProbabilities(value: Array[Double]): this.type = {
+    set(quantileProbabilities, value)
+    _quantiles(0) = $(quantileProbabilities).map(q => math.exp(math.log(-math.log1p(-q)) * scale))
+    this
+  }
 
   /** @group setParam */
   @Since("1.6.0")
   def setQuantilesCol(value: String): this.type = set(quantilesCol, value)
 
+  private lazy val _quantiles = {
+    Array($(quantileProbabilities).map(q => math.exp(math.log(-math.log1p(-q)) * scale)))
+  }
+
+  private def lambda2Quantiles(lambda: Double): Vector = {
+    val quantiles = _quantiles(0).clone()
+    var i = 0
+    while (i < quantiles.length) { quantiles(i) *= lambda; i += 1 }
+    Vectors.dense(quantiles)
+  }
+
   @Since("2.0.0")
   def predictQuantiles(features: Vector): Vector = {
     // scale parameter for the Weibull distribution of lifetime
-    val lambda = math.exp(BLAS.dot(coefficients, features) + intercept)
-    // shape parameter for the Weibull distribution of lifetime
-    val k = 1 / scale
-    val quantiles = $(quantileProbabilities).map {
-      q => lambda * math.exp(math.log(-math.log1p(-q)) / k)
-    }
-    Vectors.dense(quantiles)
+    val lambda = predict(features)
+    lambda2Quantiles(lambda)
   }
 
   @Since("2.0.0")
@@ -414,24 +424,20 @@ class AFTSurvivalRegressionModel private[ml] (
     var predictionColumns = Seq.empty[Column]
 
     if ($(predictionCol).nonEmpty) {
-      val predictUDF = udf { features: Vector => predict(features) }
+      val predCol = udf(predict _).apply(col($(featuresCol)))
       predictionColNames :+= $(predictionCol)
-      predictionColumns :+= predictUDF(col($(featuresCol)))
+      predictionColumns :+= predCol
         .as($(predictionCol), outputSchema($(predictionCol)).metadata)
     }
 
     if (hasQuantilesCol) {
-      val baseQuantiles = $(quantileProbabilities)
-        .map(q => math.exp(math.log(-math.log1p(-q)) * scale))
-      val lambdaCol = if ($(predictionCol).nonEmpty) {
-        predictionColumns.head
+      val quanCol = if ($(predictionCol).nonEmpty) {
+        udf(lambda2Quantiles _).apply(predictionColumns.head)
       } else {
-        udf { features: Vector => predict(features) }.apply(col($(featuresCol)))
+        udf(predictQuantiles _).apply(col($(featuresCol)))
       }
-      val predictQuantilesUDF =
-        udf { lambda: Double => Vectors.dense(baseQuantiles.map(q => q * lambda)) }
       predictionColNames :+= $(quantilesCol)
-      predictionColumns :+= predictQuantilesUDF(lambdaCol)
+      predictionColumns :+= quanCol
         .as($(quantilesCol), outputSchema($(quantilesCol)).metadata)
     }
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
index a66143ab12e49..63ccfa3834624 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
@@ -130,9 +130,9 @@ class AFTSurvivalRegressionSuite extends MLTest with DefaultReadWriteTest {
   test("aft survival regression with univariate") {
     val quantileProbabilities = Array(0.1, 0.5, 0.9)
     val trainer = new AFTSurvivalRegression()
-      .setQuantileProbabilities(quantileProbabilities)
       .setQuantilesCol("quantiles")
     val model = trainer.fit(datasetUnivariate)
+    model.setQuantileProbabilities(quantileProbabilities)
 
     /*
        Using the following R code to load the data and train the model using survival package.

From 1b7367ccd7cdcbfc9ff9a3893693a3261a5eb7c1 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Wed, 21 Oct 2020 13:04:39 -0700
Subject: [PATCH 0294/1009] [SPARK-33205][BUILD] Bump snappy-java version to
 1.1.8

### What changes were proposed in this pull request?

This PR intends to upgrade snappy-java from 1.1.7.5 to 1.1.8.

### Why are the changes needed?

For performance improvements; the released `snappy-java` bundles the latest `Snappy` v1.1.8 binaries with small performance improvements.
 - snappy-java release note: https://github.com/xerial/snappy-java/releases/tag/1.1.8
 - snappy release note: https://github.com/google/snappy/releases/tag/1.1.8

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

GA tests.

Closes #30120 from maropu/Snappy1.1.8.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Liang-Chi Hsieh <viirya@gmail.com>
---
 dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 2 +-
 dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 2 +-
 pom.xml                                 | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index c389c885cb0e5..e365559ed8cbf 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -222,7 +222,7 @@ shims/0.9.0//shims-0.9.0.jar
 slf4j-api/1.7.30//slf4j-api-1.7.30.jar
 slf4j-log4j12/1.7.30//slf4j-log4j12-1.7.30.jar
 snakeyaml/1.24//snakeyaml-1.24.jar
-snappy-java/1.1.7.5//snappy-java-1.1.7.5.jar
+snappy-java/1.1.8//snappy-java-1.1.8.jar
 spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar
 spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar
 spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index ed0db42828301..0c050d62db3da 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -237,7 +237,7 @@ shims/0.9.0//shims-0.9.0.jar
 slf4j-api/1.7.30//slf4j-api-1.7.30.jar
 slf4j-log4j12/1.7.30//slf4j-log4j12-1.7.30.jar
 snakeyaml/1.24//snakeyaml-1.24.jar
-snappy-java/1.1.7.5//snappy-java-1.1.7.5.jar
+snappy-java/1.1.8//snappy-java-1.1.8.jar
 spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar
 spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar
 spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar
diff --git a/pom.xml b/pom.xml
index 96406d9bcef13..2fd002e91751f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -170,7 +170,7 @@
     <scalafmt.skip>true</scalafmt.skip>
     <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
     <fasterxml.jackson.version>2.10.0</fasterxml.jackson.version>
-    <snappy.version>1.1.7.5</snappy.version>
+    <snappy.version>1.1.8</snappy.version>
     <netlib.java.version>1.1.2</netlib.java.version>
     <commons-codec.version>1.10</commons-codec.version>
     <commons-io.version>2.5</commons-io.version>

From 7aed81d4926c8f13ffb38f7ff90162b15c876016 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Wed, 21 Oct 2020 14:37:56 -0700
Subject: [PATCH 0295/1009] [SPARK-33202][CORE] Fix BlockManagerDecommissioner
 to return the correct migration status

### What changes were proposed in this pull request?

This PR changes `<` into `>` in the following to fix data loss during storage migrations.

```scala
// If we found any new shuffles to migrate or otherwise have not migrated everything.
- newShufflesToMigrate.nonEmpty || migratingShuffles.size < numMigratedShuffles.get()
+ newShufflesToMigrate.nonEmpty || migratingShuffles.size > numMigratedShuffles.get()
```

### Why are the changes needed?

`refreshOffloadingShuffleBlocks` should return `true` when the migration is still on-going.

Since `migratingShuffles` is defined like the following, `migratingShuffles.size > numMigratedShuffles.get()` means the migration is not finished.
```scala
// Shuffles which are either in queue for migrations or migrated
protected[storage] val migratingShuffles = mutable.HashSet[ShuffleBlockInfo]()
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CI with the updated test cases.

Closes #30116 from dongjoon-hyun/SPARK-33202.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/storage/BlockManagerDecommissioner.scala   |  7 ++++---
 .../storage/BlockManagerDecommissionUnitSuite.scala  | 12 ++++++------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
index 66df72921acb2..89d12406365dc 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
@@ -84,7 +84,7 @@ private[storage] class BlockManagerDecommissioner(
             case Some((shuffleBlockInfo, retryCount)) =>
               if (retryCount < maxReplicationFailuresForDecommission) {
                 logInfo(s"Trying to migrate shuffle ${shuffleBlockInfo} to ${peer} " +
-                  "($retryCount / $maxReplicationFailuresForDecommission)")
+                  s"($retryCount / $maxReplicationFailuresForDecommission)")
                 val blocks = bm.migratableResolver.getMigrationBlocks(shuffleBlockInfo)
                 logInfo(s"Got migration sub-blocks ${blocks}")
 
@@ -130,6 +130,7 @@ private[storage] class BlockManagerDecommissioner(
             case Some((shuffleMap, retryCount)) =>
               logError(s"Error during migration, adding ${shuffleMap} back to migration queue", e)
               shufflesToMigrate.add((shuffleMap, retryCount + 1))
+              running = false
             case None =>
               logError(s"Error while waiting for block to migrate", e)
           }
@@ -246,7 +247,7 @@ private[storage] class BlockManagerDecommissioner(
     shufflesToMigrate.addAll(newShufflesToMigrate.map(x => (x, 0)).asJava)
     migratingShuffles ++= newShufflesToMigrate
     logInfo(s"${newShufflesToMigrate.size} of ${localShuffles.size} local shuffles " +
-      "are added. In total, ${migratingShuffles.size} shuffles are remained.")
+      s"are added. In total, ${migratingShuffles.size} shuffles are remained.")
 
     // Update the threads doing migrations
     val livePeerSet = bm.getPeers(false).toSet
@@ -268,7 +269,7 @@ private[storage] class BlockManagerDecommissioner(
       stoppedShuffle = true
     }
     // If we found any new shuffles to migrate or otherwise have not migrated everything.
-    newShufflesToMigrate.nonEmpty || migratingShuffles.size < numMigratedShuffles.get()
+    newShufflesToMigrate.nonEmpty || migratingShuffles.size > numMigratedShuffles.get()
   }
 
   /**
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionUnitSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionUnitSuite.scala
index a87fc1835f6b5..b7ac378b4c6cd 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionUnitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionUnitSuite.scala
@@ -63,20 +63,20 @@ class BlockManagerDecommissionUnitSuite extends SparkFunSuite with Matchers {
    * a constant Long.MaxValue timestamp.
    */
   private def validateDecommissionTimestamps(conf: SparkConf, bm: BlockManager,
-      fail: Boolean = false) = {
+      fail: Boolean = false, assertDone: Boolean = true) = {
     // Verify the decommissioning manager timestamps and status
     val bmDecomManager = new BlockManagerDecommissioner(conf, bm)
-    validateDecommissionTimestampsOnManager(bmDecomManager, fail)
+    validateDecommissionTimestampsOnManager(bmDecomManager, fail, assertDone)
   }
 
   private def validateDecommissionTimestampsOnManager(bmDecomManager: BlockManagerDecommissioner,
-      fail: Boolean = false, numShuffles: Option[Int] = None) = {
+      fail: Boolean = false, assertDone: Boolean = true, numShuffles: Option[Int] = None) = {
     var previousTime: Option[Long] = None
     try {
       bmDecomManager.start()
       eventually(timeout(100.second), interval(10.milliseconds)) {
         val (currentTime, done) = bmDecomManager.lastMigrationInfo()
-        assert(done)
+        assert(!assertDone || done)
         // Make sure the time stamp starts moving forward.
         if (!fail) {
           previousTime match {
@@ -98,7 +98,7 @@ class BlockManagerDecommissionUnitSuite extends SparkFunSuite with Matchers {
         // Wait 5 seconds and assert times keep moving forward.
         Thread.sleep(5000)
         val (currentTime, done) = bmDecomManager.lastMigrationInfo()
-        assert(done && currentTime > previousTime.get)
+        assert((!assertDone || done) && currentTime > previousTime.get)
       }
     } finally {
       bmDecomManager.stop()
@@ -183,7 +183,7 @@ class BlockManagerDecommissionUnitSuite extends SparkFunSuite with Matchers {
     val bmDecomManager = new BlockManagerDecommissioner(sparkConf, bm)
     bmDecomManager.migratingShuffles += ShuffleBlockInfo(10, 10)
 
-    validateDecommissionTimestampsOnManager(bmDecomManager)
+    validateDecommissionTimestampsOnManager(bmDecomManager, fail = false, assertDone = false)
   }
 
   test("block decom manager handles IO failures") {

From 66005a323625fc8c7346d28e9a8c52f91ae8d1a0 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Wed, 21 Oct 2020 14:46:47 -0700
Subject: [PATCH 0296/1009] [SPARK-31964][PYTHON][FOLLOW-UP] Use
 is_categorical_dtype instead of deprecated is_categorical

### What changes were proposed in this pull request?

This PR is a small followup of https://github.com/apache/spark/pull/28793 and  proposes to use `is_categorical_dtype` instead of deprecated `is_categorical`.

`is_categorical_dtype` exists from minimum pandas version we support (https://github.com/pandas-dev/pandas/blob/v0.23.2/pandas/core/dtypes/api.py), and `is_categorical` was deprecated from pandas 1.1.0 (https://github.com/pandas-dev/pandas/commit/87a1cc21cab751c16fda4e6f0a95988a8d90462b).

### Why are the changes needed?

To avoid using deprecated APIs, and remove warnings.

### Does this PR introduce _any_ user-facing change?

Yes, it will remove warnings that says `is_categorical` is deprecated.

### How was this patch tested?

By running any pandas UDF with pandas 1.1.0+:

```python
import pandas as pd
from pyspark.sql.functions import pandas_udf

def func(x: pd.Series) -> pd.Series:
    return x

spark.range(10).select(pandas_udf(func, "long")("id")).show()
```

Before:

```
/.../python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py:151: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
...
```

After:

```
...
```

Closes #30114 from HyukjinKwon/replace-deprecated-is_categorical.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Bryan Cutler <cutlerb@gmail.com>
---
 python/pyspark/sql/pandas/serializers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/sql/pandas/serializers.py b/python/pyspark/sql/pandas/serializers.py
index 63fb8562799e3..09c7cf1b312bc 100644
--- a/python/pyspark/sql/pandas/serializers.py
+++ b/python/pyspark/sql/pandas/serializers.py
@@ -136,7 +136,7 @@ def _create_batch(self, series):
         import pandas as pd
         import pyarrow as pa
         from pyspark.sql.pandas.types import _check_series_convert_timestamps_internal
-        from pandas.api.types import is_categorical
+        from pandas.api.types import is_categorical_dtype
         # Make input conform to [(series1, type1), (series2, type2), ...]
         if not isinstance(series, (list, tuple)) or \
                 (len(series) == 2 and isinstance(series[1], pa.DataType)):
@@ -148,7 +148,7 @@ def create_array(s, t):
             # Ensure timestamp series are in expected form for Spark internal representation
             if t is not None and pa.types.is_timestamp(t):
                 s = _check_series_convert_timestamps_internal(s, self._timezone)
-            elif is_categorical(s.dtype):
+            elif is_categorical_dtype(s.dtype):
                 # Note: This can be removed once minimum pyarrow version is >= 0.16.1
                 s = s.astype(s.dtypes.categories.dtype)
             try:

From bbf2d6f6df0011c3035d829a56b035a2b094295c Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Thu, 22 Oct 2020 10:03:41 +0900
Subject: [PATCH 0297/1009] [SPARK-33160][SQL][FOLLOWUP] Update benchmarks of
 INT96 type rebasing

### What changes were proposed in this pull request?
1. Turn off/on the SQL config `spark.sql.legacy.parquet.int96RebaseModeInWrite` which was added by https://github.com/apache/spark/pull/30056 in `DateTimeRebaseBenchmark`. The parquet readers should infer correct rebasing mode automatically from metadata.
2. Regenerate benchmark results of `DateTimeRebaseBenchmark` in the environment:

| Item | Description |
| ---- | ----|
| Region | us-west-2 (Oregon) |
| Instance | r3.xlarge (spot instance) |
| AMI | ami-06f2f779464715dc5 (ubuntu/images/hvm-ssd/ubuntu-bionic-18.04-amd64-server-20190722.1) |
| Java | OpenJDK8/11 installed by`sudo add-apt-repository ppa:openjdk-r/ppa` & `sudo apt install openjdk-11-jdk`|

### Why are the changes needed?
To have up-to-date info about INT96 performance which is the default type for Catalyst's timestamp type.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By updating benchmark results:
```
$ SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain org.apache.spark.sql.execution.benchmark.DateTimeRebaseBenchmark"
```

Closes #30118 from MaxGekk/int96-rebase-benchmark.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../DateTimeRebaseBenchmark-jdk11-results.txt | 206 +++++++++---------
 .../DateTimeRebaseBenchmark-results.txt       | 206 +++++++++---------
 .../benchmark/DateTimeRebaseBenchmark.scala   |   3 +-
 3 files changed, 208 insertions(+), 207 deletions(-)

diff --git a/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk11-results.txt b/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk11-results.txt
index 05896a4d69b47..74b19f2eef6a8 100644
--- a/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk11-results.txt
@@ -2,153 +2,153 @@
 Rebasing dates/timestamps in Parquet datasource
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 5.3.0-1034-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Save DATE to parquet:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, noop                                  20023          20023           0          5.0         200.2       1.0X
-before 1582, noop                                 10729          10729           0          9.3         107.3       1.9X
-after 1582, rebase EXCEPTION                      31834          31834           0          3.1         318.3       0.6X
-after 1582, rebase LEGACY                         31997          31997           0          3.1         320.0       0.6X
-after 1582, rebase CORRECTED                      31712          31712           0          3.2         317.1       0.6X
-before 1582, rebase LEGACY                        23663          23663           0          4.2         236.6       0.8X
-before 1582, rebase CORRECTED                     22749          22749           0          4.4         227.5       0.9X
-
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+after 1582, noop                                  21041          21041           0          4.8         210.4       1.0X
+before 1582, noop                                 11202          11202           0          8.9         112.0       1.9X
+after 1582, rebase EXCEPTION                      32810          32810           0          3.0         328.1       0.6X
+after 1582, rebase LEGACY                         32530          32530           0          3.1         325.3       0.6X
+after 1582, rebase CORRECTED                      32849          32849           0          3.0         328.5       0.6X
+before 1582, rebase LEGACY                        23537          23537           0          4.2         235.4       0.9X
+before 1582, rebase CORRECTED                     22870          22870           0          4.4         228.7       0.9X
+
+OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 5.3.0-1034-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Load DATE from parquet:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, vec off, rebase EXCEPTION             12984          13262         257          7.7         129.8       1.0X
-after 1582, vec off, rebase LEGACY                13278          13330          50          7.5         132.8       1.0X
-after 1582, vec off, rebase CORRECTED             13202          13255          50          7.6         132.0       1.0X
-after 1582, vec on, rebase EXCEPTION               3823           3853          40         26.2          38.2       3.4X
-after 1582, vec on, rebase LEGACY                  3846           3876          27         26.0          38.5       3.4X
-after 1582, vec on, rebase CORRECTED               3775           3838          62         26.5          37.7       3.4X
-before 1582, vec off, rebase LEGACY               13671          13692          26          7.3         136.7       0.9X
-before 1582, vec off, rebase CORRECTED            13387          13476         106          7.5         133.9       1.0X
-before 1582, vec on, rebase LEGACY                 4477           4484           7         22.3          44.8       2.9X
-before 1582, vec on, rebase CORRECTED              3729           3773          50         26.8          37.3       3.5X
-
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+after 1582, vec off, rebase EXCEPTION             13114          13225         104          7.6         131.1       1.0X
+after 1582, vec off, rebase LEGACY                13175          13189          15          7.6         131.8       1.0X
+after 1582, vec off, rebase CORRECTED             13080          13115          34          7.6         130.8       1.0X
+after 1582, vec on, rebase EXCEPTION               3698           3726          29         27.0          37.0       3.5X
+after 1582, vec on, rebase LEGACY                  3730           3745          17         26.8          37.3       3.5X
+after 1582, vec on, rebase CORRECTED               3714           3758          75         26.9          37.1       3.5X
+before 1582, vec off, rebase LEGACY               13519          13575          63          7.4         135.2       1.0X
+before 1582, vec off, rebase CORRECTED            13210          13309         108          7.6         132.1       1.0X
+before 1582, vec on, rebase LEGACY                 4459           4488          44         22.4          44.6       2.9X
+before 1582, vec on, rebase CORRECTED              3661           3718          88         27.3          36.6       3.6X
+
+OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 5.3.0-1034-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Save TIMESTAMP_INT96 to parquet:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   3020           3020           0         33.1          30.2       1.0X
-before 1900, noop                                  3013           3013           0         33.2          30.1       1.0X
-after 1900, rebase EXCEPTION                      28796          28796           0          3.5         288.0       0.1X
-after 1900, rebase LEGACY                         28869          28869           0          3.5         288.7       0.1X
-after 1900, rebase CORRECTED                      28522          28522           0          3.5         285.2       0.1X
-before 1900, rebase LEGACY                        30594          30594           0          3.3         305.9       0.1X
-before 1900, rebase CORRECTED                     30743          30743           0          3.3         307.4       0.1X
-
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+after 1900, noop                                   2900           2900           0         34.5          29.0       1.0X
+before 1900, noop                                  2848           2848           0         35.1          28.5       1.0X
+after 1900, rebase EXCEPTION                      27623          27623           0          3.6         276.2       0.1X
+after 1900, rebase LEGACY                         27305          27305           0          3.7         273.0       0.1X
+after 1900, rebase CORRECTED                      27715          27715           0          3.6         277.2       0.1X
+before 1900, rebase LEGACY                        30911          30911           0          3.2         309.1       0.1X
+before 1900, rebase CORRECTED                     27944          27944           0          3.6         279.4       0.1X
+
+OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 5.3.0-1034-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Load TIMESTAMP_INT96 from parquet:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             19325          19468         135          5.2         193.3       1.0X
-after 1900, vec off, rebase LEGACY                19568          19602          30          5.1         195.7       1.0X
-after 1900, vec off, rebase CORRECTED             19532          19538           6          5.1         195.3       1.0X
-after 1900, vec on, rebase EXCEPTION               9884           9990          94         10.1          98.8       2.0X
-after 1900, vec on, rebase LEGACY                  9933           9985          49         10.1          99.3       1.9X
-after 1900, vec on, rebase CORRECTED               9967          10043          76         10.0          99.7       1.9X
-before 1900, vec off, rebase LEGACY               24162          24198          37          4.1         241.6       0.8X
-before 1900, vec off, rebase CORRECTED            24034          24056          20          4.2         240.3       0.8X
-before 1900, vec on, rebase LEGACY                12548          12625          72          8.0         125.5       1.5X
-before 1900, vec on, rebase CORRECTED             12580          12660         115          7.9         125.8       1.5X
-
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+after 1900, vec off, rebase EXCEPTION             16853          16885          41          5.9         168.5       1.0X
+after 1900, vec off, rebase LEGACY                16804          16816          21          6.0         168.0       1.0X
+after 1900, vec off, rebase CORRECTED             16985          17020          58          5.9         169.9       1.0X
+after 1900, vec on, rebase EXCEPTION               7044           7063          19         14.2          70.4       2.4X
+after 1900, vec on, rebase LEGACY                  7183           7255          94         13.9          71.8       2.3X
+after 1900, vec on, rebase CORRECTED               7047           7137          86         14.2          70.5       2.4X
+before 1900, vec off, rebase LEGACY               20371          20458          81          4.9         203.7       0.8X
+before 1900, vec off, rebase CORRECTED            17484          17541          54          5.7         174.8       1.0X
+before 1900, vec on, rebase LEGACY                10284          10327          45          9.7         102.8       1.6X
+before 1900, vec on, rebase CORRECTED              7044           7073          37         14.2          70.4       2.4X
+
+OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 5.3.0-1034-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Save TIMESTAMP_MICROS to parquet:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   3159           3159           0         31.7          31.6       1.0X
-before 1900, noop                                  3038           3038           0         32.9          30.4       1.0X
-after 1900, rebase EXCEPTION                      16885          16885           0          5.9         168.8       0.2X
-after 1900, rebase LEGACY                         17171          17171           0          5.8         171.7       0.2X
-after 1900, rebase CORRECTED                      17353          17353           0          5.8         173.5       0.2X
-before 1900, rebase LEGACY                        20579          20579           0          4.9         205.8       0.2X
-before 1900, rebase CORRECTED                     17544          17544           0          5.7         175.4       0.2X
-
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+after 1900, noop                                   2848           2848           0         35.1          28.5       1.0X
+before 1900, noop                                  2855           2855           0         35.0          28.6       1.0X
+after 1900, rebase EXCEPTION                      15622          15622           0          6.4         156.2       0.2X
+after 1900, rebase LEGACY                         16148          16148           0          6.2         161.5       0.2X
+after 1900, rebase CORRECTED                      16946          16946           0          5.9         169.5       0.2X
+before 1900, rebase LEGACY                        19486          19486           0          5.1         194.9       0.1X
+before 1900, rebase CORRECTED                     17029          17029           0          5.9         170.3       0.2X
+
+OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 5.3.0-1034-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Load TIMESTAMP_MICROS from parquet:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             16304          16345          58          6.1         163.0       1.0X
-after 1900, vec off, rebase LEGACY                16503          16585          75          6.1         165.0       1.0X
-after 1900, vec off, rebase CORRECTED             16413          16463          44          6.1         164.1       1.0X
-after 1900, vec on, rebase EXCEPTION               5017           5034          29         19.9          50.2       3.2X
-after 1900, vec on, rebase LEGACY                  5060           5094          30         19.8          50.6       3.2X
-after 1900, vec on, rebase CORRECTED               4969           4971           1         20.1          49.7       3.3X
-before 1900, vec off, rebase LEGACY               19767          20001         203          5.1         197.7       0.8X
-before 1900, vec off, rebase CORRECTED            16421          16465          38          6.1         164.2       1.0X
-before 1900, vec on, rebase LEGACY                 8535           8608          64         11.7          85.4       1.9X
-before 1900, vec on, rebase CORRECTED              5044           5077          32         19.8          50.4       3.2X
-
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+after 1900, vec off, rebase EXCEPTION             15785          15848          56          6.3         157.9       1.0X
+after 1900, vec off, rebase LEGACY                15935          15954          17          6.3         159.3       1.0X
+after 1900, vec off, rebase CORRECTED             15976          16046          62          6.3         159.8       1.0X
+after 1900, vec on, rebase EXCEPTION               4925           4941          20         20.3          49.3       3.2X
+after 1900, vec on, rebase LEGACY                  5033           5041          11         19.9          50.3       3.1X
+after 1900, vec on, rebase CORRECTED               4946           4972          29         20.2          49.5       3.2X
+before 1900, vec off, rebase LEGACY               18619          18782         176          5.4         186.2       0.8X
+before 1900, vec off, rebase CORRECTED            15956          16018          56          6.3         159.6       1.0X
+before 1900, vec on, rebase LEGACY                 8461           8472          14         11.8          84.6       1.9X
+before 1900, vec on, rebase CORRECTED              4953           4962          12         20.2          49.5       3.2X
+
+OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 5.3.0-1034-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Save TIMESTAMP_MILLIS to parquet:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2995           2995           0         33.4          29.9       1.0X
-before 1900, noop                                  2981           2981           0         33.5          29.8       1.0X
-after 1900, rebase EXCEPTION                      16196          16196           0          6.2         162.0       0.2X
-after 1900, rebase LEGACY                         16550          16550           0          6.0         165.5       0.2X
-after 1900, rebase CORRECTED                      16908          16908           0          5.9         169.1       0.2X
-before 1900, rebase LEGACY                        20087          20087           0          5.0         200.9       0.1X
-before 1900, rebase CORRECTED                     17171          17171           0          5.8         171.7       0.2X
-
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+after 1900, noop                                   3019           3019           0         33.1          30.2       1.0X
+before 1900, noop                                  2896           2896           0         34.5          29.0       1.0X
+after 1900, rebase EXCEPTION                      15525          15525           0          6.4         155.2       0.2X
+after 1900, rebase LEGACY                         15903          15903           0          6.3         159.0       0.2X
+after 1900, rebase CORRECTED                      16468          16468           0          6.1         164.7       0.2X
+before 1900, rebase LEGACY                        19620          19620           0          5.1         196.2       0.2X
+before 1900, rebase CORRECTED                     16470          16470           0          6.1         164.7       0.2X
+
+OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 5.3.0-1034-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Load TIMESTAMP_MILLIS from parquet:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             16688          16787          88          6.0         166.9       1.0X
-after 1900, vec off, rebase LEGACY                17383          17462          73          5.8         173.8       1.0X
-after 1900, vec off, rebase CORRECTED             17317          17329          11          5.8         173.2       1.0X
-after 1900, vec on, rebase EXCEPTION               6342           6348           6         15.8          63.4       2.6X
-after 1900, vec on, rebase LEGACY                  6500           6521          18         15.4          65.0       2.6X
-after 1900, vec on, rebase CORRECTED               6164           6172          11         16.2          61.6       2.7X
-before 1900, vec off, rebase LEGACY               20575          20665          81          4.9         205.7       0.8X
-before 1900, vec off, rebase CORRECTED            17239          17290          61          5.8         172.4       1.0X
-before 1900, vec on, rebase LEGACY                 9310           9373          60         10.7          93.1       1.8X
-before 1900, vec on, rebase CORRECTED              6091           6105          16         16.4          60.9       2.7X
+after 1900, vec off, rebase EXCEPTION             16329          16357          26          6.1         163.3       1.0X
+after 1900, vec off, rebase LEGACY                16609          16659          51          6.0         166.1       1.0X
+after 1900, vec off, rebase CORRECTED             16659          16765          91          6.0         166.6       1.0X
+after 1900, vec on, rebase EXCEPTION               6132           6162          28         16.3          61.3       2.7X
+after 1900, vec on, rebase LEGACY                  6344           6397          61         15.8          63.4       2.6X
+after 1900, vec on, rebase CORRECTED               6023           6024           2         16.6          60.2       2.7X
+before 1900, vec off, rebase LEGACY               19611          19626          13          5.1         196.1       0.8X
+before 1900, vec off, rebase CORRECTED            16765          16784          19          6.0         167.7       1.0X
+before 1900, vec on, rebase LEGACY                 9136           9158          19         10.9          91.4       1.8X
+before 1900, vec on, rebase CORRECTED              6023           6042          30         16.6          60.2       2.7X
 
 
 ================================================================================================
 Rebasing dates/timestamps in ORC datasource
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 5.3.0-1034-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Save DATE to ORC:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, noop                                  19583          19583           0          5.1         195.8       1.0X
-before 1582, noop                                 10711          10711           0          9.3         107.1       1.8X
-after 1582                                        27864          27864           0          3.6         278.6       0.7X
-before 1582                                       19648          19648           0          5.1         196.5       1.0X
+after 1582, noop                                  20934          20934           0          4.8         209.3       1.0X
+before 1582, noop                                 11098          11098           0          9.0         111.0       1.9X
+after 1582                                        29249          29249           0          3.4         292.5       0.7X
+before 1582                                       20059          20059           0          5.0         200.6       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 5.3.0-1034-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Load DATE from ORC:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, vec off                               10383          10560         192          9.6         103.8       1.0X
-after 1582, vec on                                 3844           3864          33         26.0          38.4       2.7X
-before 1582, vec off                              10867          10916          48          9.2         108.7       1.0X
-before 1582, vec on                                4158           4170          12         24.0          41.6       2.5X
+after 1582, vec off                               10751          10802          56          9.3         107.5       1.0X
+after 1582, vec on                                 3815           3870          62         26.2          38.1       2.8X
+before 1582, vec off                              11144          11174          37          9.0         111.4       1.0X
+before 1582, vec on                                4120           4126           8         24.3          41.2       2.6X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 5.3.0-1034-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Save TIMESTAMP to ORC:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2989           2989           0         33.5          29.9       1.0X
-before 1900, noop                                  3000           3000           0         33.3          30.0       1.0X
-after 1900                                        19426          19426           0          5.1         194.3       0.2X
-before 1900                                       23282          23282           0          4.3         232.8       0.1X
+after 1900, noop                                   2858           2858           0         35.0          28.6       1.0X
+before 1900, noop                                  2859           2859           0         35.0          28.6       1.0X
+after 1900                                        17098          17098           0          5.8         171.0       0.2X
+before 1900                                       20639          20639           0          4.8         206.4       0.1X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.8+10-post-Ubuntu-0ubuntu118.04.1 on Linux 5.3.0-1034-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Load TIMESTAMP from ORC:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off                               12089          12102          15          8.3         120.9       1.0X
-after 1900, vec on                                 5210           5325         100         19.2          52.1       2.3X
-before 1900, vec off                              15320          15373          46          6.5         153.2       0.8X
-before 1900, vec on                                7937           7970          48         12.6          79.4       1.5X
+after 1900, vec off                               12292          12318          23          8.1         122.9       1.0X
+after 1900, vec on                                 5198           5271          95         19.2          52.0       2.4X
+before 1900, vec off                              15108          15145          53          6.6         151.1       0.8X
+before 1900, vec on                                8085           8277         245         12.4          80.8       1.5X
 
 
diff --git a/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt b/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt
index 3e94d6c6fcfa7..07b156a62e2ec 100644
--- a/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt
+++ b/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt
@@ -2,153 +2,153 @@
 Rebasing dates/timestamps in Parquet datasource
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01 on Linux 5.3.0-1034-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Save DATE to parquet:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, noop                                  23300          23300           0          4.3         233.0       1.0X
-before 1582, noop                                 10585          10585           0          9.4         105.9       2.2X
-after 1582, rebase EXCEPTION                      35215          35215           0          2.8         352.1       0.7X
-after 1582, rebase LEGACY                         34927          34927           0          2.9         349.3       0.7X
-after 1582, rebase CORRECTED                      35479          35479           0          2.8         354.8       0.7X
-before 1582, rebase LEGACY                        22767          22767           0          4.4         227.7       1.0X
-before 1582, rebase CORRECTED                     22527          22527           0          4.4         225.3       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+after 1582, noop                                  22736          22736           0          4.4         227.4       1.0X
+before 1582, noop                                 10512          10512           0          9.5         105.1       2.2X
+after 1582, rebase EXCEPTION                      35759          35759           0          2.8         357.6       0.6X
+after 1582, rebase LEGACY                         36229          36229           0          2.8         362.3       0.6X
+after 1582, rebase CORRECTED                      35489          35489           0          2.8         354.9       0.6X
+before 1582, rebase LEGACY                        23514          23514           0          4.3         235.1       1.0X
+before 1582, rebase CORRECTED                     23234          23234           0          4.3         232.3       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01 on Linux 5.3.0-1034-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Load DATE from parquet:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, vec off, rebase EXCEPTION             13480          13577          94          7.4         134.8       1.0X
-after 1582, vec off, rebase LEGACY                13466          13586         118          7.4         134.7       1.0X
-after 1582, vec off, rebase CORRECTED             13526          13558          41          7.4         135.3       1.0X
-after 1582, vec on, rebase EXCEPTION               3759           3778          28         26.6          37.6       3.6X
-after 1582, vec on, rebase LEGACY                  3957           4004          57         25.3          39.6       3.4X
-after 1582, vec on, rebase CORRECTED               3739           3755          25         26.7          37.4       3.6X
-before 1582, vec off, rebase LEGACY               13986          14038          67          7.1         139.9       1.0X
-before 1582, vec off, rebase CORRECTED            13453          13491          49          7.4         134.5       1.0X
-before 1582, vec on, rebase LEGACY                 4716           4724          10         21.2          47.2       2.9X
-before 1582, vec on, rebase CORRECTED              3701           3750          50         27.0          37.0       3.6X
-
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+after 1582, vec off, rebase EXCEPTION             13036          13121          85          7.7         130.4       1.0X
+after 1582, vec off, rebase LEGACY                13567          13631          55          7.4         135.7       1.0X
+after 1582, vec off, rebase CORRECTED             13476          13498          28          7.4         134.8       1.0X
+after 1582, vec on, rebase EXCEPTION               3676           3679           3         27.2          36.8       3.5X
+after 1582, vec on, rebase LEGACY                  3842           3863          19         26.0          38.4       3.4X
+after 1582, vec on, rebase CORRECTED               3706           3756          69         27.0          37.1       3.5X
+before 1582, vec off, rebase LEGACY               13781          13832          68          7.3         137.8       0.9X
+before 1582, vec off, rebase CORRECTED            13414          13445          28          7.5         134.1       1.0X
+before 1582, vec on, rebase LEGACY                 4774           4788          14         20.9          47.7       2.7X
+before 1582, vec on, rebase CORRECTED              3650           3691          38         27.4          36.5       3.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01 on Linux 5.3.0-1034-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Save TIMESTAMP_INT96 to parquet:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2790           2790           0         35.8          27.9       1.0X
-before 1900, noop                                  2812           2812           0         35.6          28.1       1.0X
-after 1900, rebase EXCEPTION                      24789          24789           0          4.0         247.9       0.1X
-after 1900, rebase LEGACY                         24539          24539           0          4.1         245.4       0.1X
-after 1900, rebase CORRECTED                      24543          24543           0          4.1         245.4       0.1X
-before 1900, rebase LEGACY                        30496          30496           0          3.3         305.0       0.1X
-before 1900, rebase CORRECTED                     30428          30428           0          3.3         304.3       0.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+after 1900, noop                                   2696           2696           0         37.1          27.0       1.0X
+before 1900, noop                                  2687           2687           0         37.2          26.9       1.0X
+after 1900, rebase EXCEPTION                      29085          29085           0          3.4         290.9       0.1X
+after 1900, rebase LEGACY                         29789          29789           0          3.4         297.9       0.1X
+after 1900, rebase CORRECTED                      29563          29563           0          3.4         295.6       0.1X
+before 1900, rebase LEGACY                        34033          34033           0          2.9         340.3       0.1X
+before 1900, rebase CORRECTED                     29687          29687           0          3.4         296.9       0.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01 on Linux 5.3.0-1034-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Load TIMESTAMP_INT96 from parquet:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             17106          17192          75          5.8         171.1       1.0X
-after 1900, vec off, rebase LEGACY                17273          17337          55          5.8         172.7       1.0X
-after 1900, vec off, rebase CORRECTED             17073          17215         128          5.9         170.7       1.0X
-after 1900, vec on, rebase EXCEPTION               8903           8976         117         11.2          89.0       1.9X
-after 1900, vec on, rebase LEGACY                  8793           8876          84         11.4          87.9       1.9X
-after 1900, vec on, rebase CORRECTED               8820           8878          53         11.3          88.2       1.9X
-before 1900, vec off, rebase LEGACY               20997          21069          82          4.8         210.0       0.8X
-before 1900, vec off, rebase CORRECTED            20874          20946          90          4.8         208.7       0.8X
-before 1900, vec on, rebase LEGACY                12024          12090          58          8.3         120.2       1.4X
-before 1900, vec on, rebase CORRECTED             12020          12069          64          8.3         120.2       1.4X
-
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+after 1900, vec off, rebase EXCEPTION             16623          16711          78          6.0         166.2       1.0X
+after 1900, vec off, rebase LEGACY                16525          16641         103          6.1         165.3       1.0X
+after 1900, vec off, rebase CORRECTED             16698          16847         133          6.0         167.0       1.0X
+after 1900, vec on, rebase EXCEPTION               8614           8723          97         11.6          86.1       1.9X
+after 1900, vec on, rebase LEGACY                  9790           9812          20         10.2          97.9       1.7X
+after 1900, vec on, rebase CORRECTED               8607           8671          73         11.6          86.1       1.9X
+before 1900, vec off, rebase LEGACY               21389          21553         142          4.7         213.9       0.8X
+before 1900, vec off, rebase CORRECTED            17539          17545           6          5.7         175.4       0.9X
+before 1900, vec on, rebase LEGACY                13594          13627          40          7.4         135.9       1.2X
+before 1900, vec on, rebase CORRECTED              8620           8666          73         11.6          86.2       1.9X
+
+OpenJDK 64-Bit Server VM 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01 on Linux 5.3.0-1034-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Save TIMESTAMP_MICROS to parquet:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2939           2939           0         34.0          29.4       1.0X
-before 1900, noop                                  2917           2917           0         34.3          29.2       1.0X
-after 1900, rebase EXCEPTION                      15954          15954           0          6.3         159.5       0.2X
-after 1900, rebase LEGACY                         16402          16402           0          6.1         164.0       0.2X
-after 1900, rebase CORRECTED                      16541          16541           0          6.0         165.4       0.2X
-before 1900, rebase LEGACY                        20500          20500           0          4.9         205.0       0.1X
-before 1900, rebase CORRECTED                     16764          16764           0          6.0         167.6       0.2X
-
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+after 1900, noop                                   2755           2755           0         36.3          27.5       1.0X
+before 1900, noop                                  2819           2819           0         35.5          28.2       1.0X
+after 1900, rebase EXCEPTION                      16742          16742           0          6.0         167.4       0.2X
+after 1900, rebase LEGACY                         16978          16978           0          5.9         169.8       0.2X
+after 1900, rebase CORRECTED                      17508          17508           0          5.7         175.1       0.2X
+before 1900, rebase LEGACY                        21961          21961           0          4.6         219.6       0.1X
+before 1900, rebase CORRECTED                     17770          17770           0          5.6         177.7       0.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01 on Linux 5.3.0-1034-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Load TIMESTAMP_MICROS from parquet:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             15607          15655          81          6.4         156.1       1.0X
-after 1900, vec off, rebase LEGACY                15616          15676          54          6.4         156.2       1.0X
-after 1900, vec off, rebase CORRECTED             15634          15732         108          6.4         156.3       1.0X
-after 1900, vec on, rebase EXCEPTION               5041           5057          16         19.8          50.4       3.1X
-after 1900, vec on, rebase LEGACY                  5516           5539          29         18.1          55.2       2.8X
-after 1900, vec on, rebase CORRECTED               5087           5104          28         19.7          50.9       3.1X
-before 1900, vec off, rebase LEGACY               19262          19338          79          5.2         192.6       0.8X
-before 1900, vec off, rebase CORRECTED            15718          15755          53          6.4         157.2       1.0X
-before 1900, vec on, rebase LEGACY                10147          10240         114          9.9         101.5       1.5X
-before 1900, vec on, rebase CORRECTED              5062           5080          21         19.8          50.6       3.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+after 1900, vec off, rebase EXCEPTION             15311          15405          82          6.5         153.1       1.0X
+after 1900, vec off, rebase LEGACY                15501          15578          73          6.5         155.0       1.0X
+after 1900, vec off, rebase CORRECTED             15331          15472         123          6.5         153.3       1.0X
+after 1900, vec on, rebase EXCEPTION               4976           5008          38         20.1          49.8       3.1X
+after 1900, vec on, rebase LEGACY                  5366           5443          67         18.6          53.7       2.9X
+after 1900, vec on, rebase CORRECTED               4977           4982           9         20.1          49.8       3.1X
+before 1900, vec off, rebase LEGACY               19205          19281          65          5.2         192.1       0.8X
+before 1900, vec off, rebase CORRECTED            15458          15490          28          6.5         154.6       1.0X
+before 1900, vec on, rebase LEGACY                 9878           9933          79         10.1          98.8       1.5X
+before 1900, vec on, rebase CORRECTED              4886           4961          66         20.5          48.9       3.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01 on Linux 5.3.0-1034-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Save TIMESTAMP_MILLIS to parquet:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2915           2915           0         34.3          29.2       1.0X
-before 1900, noop                                  2894           2894           0         34.6          28.9       1.0X
-after 1900, rebase EXCEPTION                      15545          15545           0          6.4         155.4       0.2X
-after 1900, rebase LEGACY                         15840          15840           0          6.3         158.4       0.2X
-after 1900, rebase CORRECTED                      16324          16324           0          6.1         163.2       0.2X
-before 1900, rebase LEGACY                        20359          20359           0          4.9         203.6       0.1X
-before 1900, rebase CORRECTED                     16292          16292           0          6.1         162.9       0.2X
-
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+after 1900, noop                                   2836           2836           0         35.3          28.4       1.0X
+before 1900, noop                                  2813           2813           0         35.6          28.1       1.0X
+after 1900, rebase EXCEPTION                      16549          16549           0          6.0         165.5       0.2X
+after 1900, rebase LEGACY                         16296          16296           0          6.1         163.0       0.2X
+after 1900, rebase CORRECTED                      16913          16913           0          5.9         169.1       0.2X
+before 1900, rebase LEGACY                        21150          21150           0          4.7         211.5       0.1X
+before 1900, rebase CORRECTED                     17090          17090           0          5.9         170.9       0.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01 on Linux 5.3.0-1034-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Load TIMESTAMP_MILLIS from parquet:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             15857          16015         223          6.3         158.6       1.0X
-after 1900, vec off, rebase LEGACY                16174          16231          63          6.2         161.7       1.0X
-after 1900, vec off, rebase CORRECTED             16353          16400          67          6.1         163.5       1.0X
-after 1900, vec on, rebase EXCEPTION               6449           6459           9         15.5          64.5       2.5X
-after 1900, vec on, rebase LEGACY                  7028           7035           6         14.2          70.3       2.3X
-after 1900, vec on, rebase CORRECTED               6585           6623          37         15.2          65.8       2.4X
-before 1900, vec off, rebase LEGACY               19929          20027          95          5.0         199.3       0.8X
-before 1900, vec off, rebase CORRECTED            16401          16451          49          6.1         164.0       1.0X
-before 1900, vec on, rebase LEGACY                10517          10563          40          9.5         105.2       1.5X
-before 1900, vec on, rebase CORRECTED              6659           6675          26         15.0          66.6       2.4X
+after 1900, vec off, rebase EXCEPTION             15706          15823         132          6.4         157.1       1.0X
+after 1900, vec off, rebase LEGACY                16100          16194          88          6.2         161.0       1.0X
+after 1900, vec off, rebase CORRECTED             16227          16282          81          6.2         162.3       1.0X
+after 1900, vec on, rebase EXCEPTION               6383           6404          26         15.7          63.8       2.5X
+after 1900, vec on, rebase LEGACY                  6994           7006          15         14.3          69.9       2.2X
+after 1900, vec on, rebase CORRECTED               6580           6597          15         15.2          65.8       2.4X
+before 1900, vec off, rebase LEGACY               19601          19674          82          5.1         196.0       0.8X
+before 1900, vec off, rebase CORRECTED            16188          16215          25          6.2         161.9       1.0X
+before 1900, vec on, rebase LEGACY                10305          10360          51          9.7         103.1       1.5X
+before 1900, vec on, rebase CORRECTED              6573           6600          28         15.2          65.7       2.4X
 
 
 ================================================================================================
 Rebasing dates/timestamps in ORC datasource
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01 on Linux 5.3.0-1034-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Save DATE to ORC:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, noop                                  22782          22782           0          4.4         227.8       1.0X
-before 1582, noop                                 10555          10555           0          9.5         105.6       2.2X
-after 1582                                        31497          31497           0          3.2         315.0       0.7X
-before 1582                                       19803          19803           0          5.0         198.0       1.2X
+after 1582, noop                                  22766          22766           0          4.4         227.7       1.0X
+before 1582, noop                                 10535          10535           0          9.5         105.3       2.2X
+after 1582                                        31037          31037           0          3.2         310.4       0.7X
+before 1582                                       19755          19755           0          5.1         197.6       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01 on Linux 5.3.0-1034-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Load DATE from ORC:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, vec off                               10180          10214          44          9.8         101.8       1.0X
-after 1582, vec on                                 3785           3804          24         26.4          37.8       2.7X
-before 1582, vec off                              10537          10582          39          9.5         105.4       1.0X
-before 1582, vec on                                4117           4146          25         24.3          41.2       2.5X
+after 1582, vec off                               11137          11165          37          9.0         111.4       1.0X
+after 1582, vec on                                 3701           3734          51         27.0          37.0       3.0X
+before 1582, vec off                              11379          11409          50          8.8         113.8       1.0X
+before 1582, vec on                                4110           4160          57         24.3          41.1       2.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01 on Linux 5.3.0-1034-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Save TIMESTAMP to ORC:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2853           2853           0         35.1          28.5       1.0X
-before 1900, noop                                  2999           2999           0         33.3          30.0       1.0X
-after 1900                                        16757          16757           0          6.0         167.6       0.2X
-before 1900                                       21542          21542           0          4.6         215.4       0.1X
+after 1900, noop                                   2830           2830           0         35.3          28.3       1.0X
+before 1900, noop                                  2867           2867           0         34.9          28.7       1.0X
+after 1900                                        17867          17867           0          5.6         178.7       0.2X
+before 1900                                       21555          21555           0          4.6         215.6       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_265-8u265-b01-0ubuntu2~18.04-b01 on Linux 5.3.0-1034-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Load TIMESTAMP from ORC:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off                               12212          12254          39          8.2         122.1       1.0X
-after 1900, vec on                                 5369           5390          35         18.6          53.7       2.3X
-before 1900, vec off                              15661          15705          73          6.4         156.6       0.8X
-before 1900, vec on                                8720           8744          29         11.5          87.2       1.4X
+after 1900, vec off                               12245          12269          24          8.2         122.5       1.0X
+after 1900, vec on                                 5258           5303          63         19.0          52.6       2.3X
+before 1900, vec off                              15698          15777         119          6.4         157.0       0.8X
+before 1900, vec on                                8568           8674         138         11.7          85.7       1.4X
 
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeRebaseBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeRebaseBenchmark.scala
index 7caaa5376db7f..bc94d1f235800 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeRebaseBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeRebaseBenchmark.scala
@@ -165,7 +165,8 @@ object DateTimeRebaseBenchmark extends SqlBasedBenchmark {
                   benchmark.addCase(caseName(modernDates, dateTime, Some(mode)), 1) { _ =>
                     withSQLConf(
                       SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key -> getOutputType(dateTime),
-                      SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> mode.toString) {
+                      SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> mode.toString,
+                      SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key -> mode.toString) {
                       genDF(rowsNum, dateTime, modernDates)
                         .write
                         .mode("overwrite")

From 4a33cd928df4739e69ae9530aae23964e470d2f8 Mon Sep 17 00:00:00 2001
From: Alessandro Patti <ale812@yahoo.it>
Date: Wed, 21 Oct 2020 18:14:21 -0700
Subject: [PATCH 0298/1009] [SPARK-33203][PYTHON][TEST] Fix tests failing with
 rounding errors

### What changes were proposed in this pull request?

Increase tolerance for two tests that fail in some environments and fail in others (flaky? Pass/fail is constant within the same environment)

### Why are the changes needed?
The tests `pyspark.ml.recommendation` and `pyspark.ml.tests.test_algorithms` fail with
```
File "/home/jenkins/python/pyspark/ml/tests/test_algorithms.py", line 96, in test_raw_and_probability_prediction
    self.assertTrue(np.allclose(result.rawPrediction, expected_rawPrediction, atol=1))
AssertionError: False is not true
```
```
File "/home/jenkins/python/pyspark/ml/recommendation.py", line 256, in _main_.ALS
Failed example:
    predictions[0]
Expected:
    Row(user=0, item=2, newPrediction=0.6929101347923279)
Got:
    Row(user=0, item=2, newPrediction=0.6929104924201965)
...
```

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

This path changes a test target. Just executed the tests to verify they pass.

Closes #30104 from AlessandroPatti/apatti/rounding-errors.

Authored-by: Alessandro Patti <ale812@yahoo.it>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 python/pyspark/ml/recommendation.py        | 6 +++---
 python/pyspark/ml/tests/test_algorithms.py | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py
index 4f39c5abec785..4ef38534444cd 100644
--- a/python/pyspark/ml/recommendation.py
+++ b/python/pyspark/ml/recommendation.py
@@ -254,11 +254,11 @@ class ALS(JavaEstimator, _ALSParams, JavaMLWritable, JavaMLReadable):
     >>> test = spark.createDataFrame([(0, 2), (1, 0), (2, 0)], ["user", "item"])
     >>> predictions = sorted(model.transform(test).collect(), key=lambda r: r[0])
     >>> predictions[0]
-    Row(user=0, item=2, newPrediction=0.6929101347923279)
+    Row(user=0, item=2, newPrediction=0.692910...)
     >>> predictions[1]
-    Row(user=1, item=0, newPrediction=3.47356915473938)
+    Row(user=1, item=0, newPrediction=3.473569...)
     >>> predictions[2]
-    Row(user=2, item=0, newPrediction=-0.8991986513137817)
+    Row(user=2, item=0, newPrediction=-0.899198...)
     >>> user_recs = model.recommendForAllUsers(3)
     >>> user_recs.where(user_recs.user == 0)\
         .select("recommendations.item", "recommendations.rating").collect()
diff --git a/python/pyspark/ml/tests/test_algorithms.py b/python/pyspark/ml/tests/test_algorithms.py
index 03653c25b4ad4..f8b61b7c57919 100644
--- a/python/pyspark/ml/tests/test_algorithms.py
+++ b/python/pyspark/ml/tests/test_algorithms.py
@@ -86,7 +86,7 @@ def test_raw_and_probability_prediction(self):
         expected_rawPrediction = [-11.6081922998, -8.15827998691, 22.17757045]
         self.assertTrue(result.prediction, expected_prediction)
         self.assertTrue(np.allclose(result.probability, expected_probability, atol=1E-4))
-        self.assertTrue(np.allclose(result.rawPrediction, expected_rawPrediction, atol=1))
+        self.assertTrue(np.allclose(result.rawPrediction, expected_rawPrediction, rtol=0.1))
 
 
 class OneVsRestTests(SparkSessionTestCase):

From ba13b94f6b2b477a93c0849c1fc776ffd5f1a0e6 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Thu, 22 Oct 2020 03:04:29 +0000
Subject: [PATCH 0299/1009] [SPARK-33210][SQL] Set the rebasing mode for
 parquet INT96 type to `EXCEPTION` by default

### What changes were proposed in this pull request?
1. Set the default value for the SQL configs `spark.sql.legacy.parquet.int96RebaseModeInWrite` and `spark.sql.legacy.parquet.int96RebaseModeInRead` to `EXCEPTION`.
2. Update the SQL migration guide.

### Why are the changes needed?
Current default value `LEGACY` may lead to shifting timestamps in read or in write. We should leave the decision about rebasing to users.

### Does this PR introduce _any_ user-facing change?
Yes

### How was this patch tested?
By existing test suites like `ParquetIOSuite`.

Closes #30121 from MaxGekk/int96-exception-by-default.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide.md                              | 2 ++
 .../scala/org/apache/spark/sql/internal/SQLConf.scala    | 4 ++--
 .../datasources/parquet/ParquetFilterSuite.scala         | 3 ++-
 .../execution/datasources/parquet/ParquetIOSuite.scala   | 2 +-
 .../org/apache/spark/sql/hive/StatisticsSuite.scala      | 9 +++++----
 .../apache/spark/sql/sources/HadoopFsRelationTest.scala  | 1 +
 6 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 5612e4f1453f1..124b04fb2bede 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -47,6 +47,8 @@ license: |
   - In Spark 3.1, `IllegalArgumentException` is returned for the incomplete interval literals, e.g. `INTERVAL '1'`, `INTERVAL '1 DAY 2'`, which are invalid. In Spark 3.0, these literals result in `NULL`s.
 
   - In Spark 3.1, we remove the built-in Hive 1.2. You need to migrate your custom SerDes to Hive 2.3. See [HIVE-15167](https://issues.apache.org/jira/browse/HIVE-15167) for more details.
+  
+  - In Spark 3.1, loading and saving of timestamps from/to parquet files fails if the timestamps are before 1900-01-01 00:00:00Z, and loaded (saved) as the INT96 type. In Spark 3.0, the actions don't fail but might lead to shifting of the input timestamps due to rebasing from/to Julian to/from Proleptic Gregorian calendar. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.parquet.int96RebaseModeInRead` or/and `spark.sql.legacy.parquet.int96RebaseModeInWrite` to `LEGACY`.
 
 ## Upgrading from Spark SQL 3.0 to 3.0.1
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 3648615a1eaee..65d976958ffdd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2666,7 +2666,7 @@ object SQLConf {
       .stringConf
       .transform(_.toUpperCase(Locale.ROOT))
       .checkValues(LegacyBehaviorPolicy.values.map(_.toString))
-      .createWithDefault(LegacyBehaviorPolicy.LEGACY.toString)
+      .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString)
 
   val LEGACY_PARQUET_REBASE_MODE_IN_READ =
     buildConf("spark.sql.legacy.parquet.datetimeRebaseModeInRead")
@@ -2696,7 +2696,7 @@ object SQLConf {
       .stringConf
       .transform(_.toUpperCase(Locale.ROOT))
       .checkValues(LegacyBehaviorPolicy.values.map(_.toString))
-      .createWithDefault(LegacyBehaviorPolicy.LEGACY.toString)
+      .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString)
 
   val LEGACY_AVRO_REBASE_MODE_IN_WRITE =
     buildConf("spark.sql.legacy.avro.datetimeRebaseModeInWrite")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
index 763f9315bfc5b..24a1ba124e56b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -586,7 +586,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
     Seq(true, false).foreach { java8Api =>
       withSQLConf(
         SQLConf.DATETIME_JAVA8API_ENABLED.key -> java8Api.toString,
-        SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> "CORRECTED") {
+        SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> "CORRECTED",
+        SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key -> "CORRECTED") {
         // spark.sql.parquet.outputTimestampType = TIMESTAMP_MILLIS
         val millisData = Seq(
           "1000-06-14 08:28:53.123",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index 214f36a2df713..dac4e950a7823 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -1022,7 +1022,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
       }
     }
     Seq(
-      "2_4_5" -> successInRead _,
+      "2_4_5" -> failInRead _,
       "2_4_6" -> successInRead _).foreach { case (version, checkDefaultRead) =>
       withAllParquetReaders {
         Seq("plain", "dict").foreach { enc =>
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index 52dd2b34a0e95..db0e93787338e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -1513,26 +1513,27 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
         Seq(tbl, ext_tbl).foreach { tblName =>
           sql(s"INSERT INTO $tblName VALUES (1, 'a', '2019-12-13')")
 
+          val expectedSize = 636
           // analyze table
           sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS NOSCAN")
           var tableStats = getTableStats(tblName)
-          assert(tableStats.sizeInBytes == 601)
+          assert(tableStats.sizeInBytes == expectedSize)
           assert(tableStats.rowCount.isEmpty)
 
           sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS")
           tableStats = getTableStats(tblName)
-          assert(tableStats.sizeInBytes == 601)
+          assert(tableStats.sizeInBytes == expectedSize)
           assert(tableStats.rowCount.get == 1)
 
           // analyze a single partition
           sql(s"ANALYZE TABLE $tblName PARTITION (ds='2019-12-13') COMPUTE STATISTICS NOSCAN")
           var partStats = getPartitionStats(tblName, Map("ds" -> "2019-12-13"))
-          assert(partStats.sizeInBytes == 601)
+          assert(partStats.sizeInBytes == expectedSize)
           assert(partStats.rowCount.isEmpty)
 
           sql(s"ANALYZE TABLE $tblName PARTITION (ds='2019-12-13') COMPUTE STATISTICS")
           partStats = getPartitionStats(tblName, Map("ds" -> "2019-12-13"))
-          assert(partStats.sizeInBytes == 601)
+          assert(partStats.sizeInBytes == expectedSize)
           assert(partStats.rowCount.get == 1)
         }
       }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
index cbea74103343e..b65a00457c72c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
@@ -155,6 +155,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes
               withSQLConf(
                 SQLConf.DATETIME_JAVA8API_ENABLED.key -> java8Api.toString,
                 SQLConf.LEGACY_PARQUET_REBASE_MODE_IN_WRITE.key -> CORRECTED.toString,
+                SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key -> CORRECTED.toString,
                 SQLConf.LEGACY_AVRO_REBASE_MODE_IN_WRITE.key -> CORRECTED.toString) {
                 val dataGenerator = RandomDataGenerator.forType(
                   dataType = dataType,

From cb3fa6c9368e64184a5f7b19688181d11de9511c Mon Sep 17 00:00:00 2001
From: Chao Sun <sunchao@apple.com>
Date: Thu, 22 Oct 2020 03:21:34 +0000
Subject: [PATCH 0300/1009] [SPARK-33212][BUILD] Move to shaded clients for
 Hadoop 3.x profile

### What changes were proposed in this pull request?

This switches Spark to use shaded Hadoop clients, namely hadoop-client-api and hadoop-client-runtime, for Hadoop 3.x. For Hadoop 2.7, we'll still use the same modules such as hadoop-client.

In order to still keep default Hadoop profile to be hadoop-3.2, this defines the following Maven properties:

```
hadoop-client-api.artifact
hadoop-client-runtime.artifact
hadoop-client-minicluster.artifact
```

which default to:
```
hadoop-client-api
hadoop-client-runtime
hadoop-client-minicluster
```
but all switch to `hadoop-client` when the Hadoop profile is hadoop-2.7. A side affect from this is we'll import the same dependency multiple times. For this I have to disable Maven enforcer `banDuplicatePomDependencyVersions`.

Besides above, there are the following changes:
- explicitly add a few dependencies which are imported via transitive dependencies from Hadoop jars, but are removed from the shaded client jars.
- removed the use of `ProxyUriUtils.getPath` from `ApplicationMaster` which is a server-side/private API.
- modified `IsolatedClientLoader` to exclude `hadoop-auth` jars when Hadoop version is 3.x. This change should only matter when we're not sharing Hadoop classes with Spark (which is _mostly_ used in tests).

### Why are the changes needed?

This serves two purposes:
- to unblock Spark from upgrading to Hadoop 3.2.2/3.3.0+. Latest Hadoop versions have upgraded to use Guava 27+ and in order to adopt the latest Hadoop versions in Spark, we'll need to resolve the Guava conflicts. This takes the approach by switching to shaded client jars provided by Hadoop.
- avoid pulling 3rd party dependencies from Hadoop and avoid potential future conflicts.

### Does this PR introduce _any_ user-facing change?

When people use Spark with `hadoop-provided` option, they should make sure class path contains `hadoop-client-api` and `hadoop-client-runtime` jars. In addition, they may need to make sure these jars appear before other Hadoop jars in the order. Otherwise, classes may be loaded from the other non-shaded Hadoop jars and cause potential conflicts.

### How was this patch tested?

Relying on existing tests.

Closes #29843 from sunchao/SPARK-29250.

Authored-by: Chao Sun <sunchao@apple.com>
Signed-off-by: DB Tsai <d_tsai@apple.com>
---
 common/network-yarn/pom.xml                   |  8 ++-
 core/pom.xml                                  | 16 +++++-
 .../org/apache/spark/deploy/SparkSubmit.scala |  8 ++-
 dev/deps/spark-deps-hadoop-2.7-hive-2.3       |  3 +-
 dev/deps/spark-deps-hadoop-3.2-hive-2.3       | 52 +----------------
 external/kafka-0-10-assembly/pom.xml          |  8 ++-
 external/kafka-0-10-sql/pom.xml               |  4 ++
 external/kafka-0-10-token-provider/pom.xml    |  5 ++
 external/kinesis-asl-assembly/pom.xml         |  8 ++-
 hadoop-cloud/pom.xml                          |  7 ++-
 launcher/pom.xml                              |  9 ++-
 pom.xml                                       | 57 +++++++++++++++----
 resource-managers/yarn/pom.xml                | 53 +++++++++++------
 .../spark/deploy/yarn/ApplicationMaster.scala |  6 +-
 .../deploy/yarn/BaseYarnClusterSuite.scala    | 10 ++++
 sql/catalyst/pom.xml                          |  4 ++
 sql/hive/pom.xml                              |  5 ++
 .../hive/client/IsolatedClientLoader.scala    | 19 ++++++-
 18 files changed, 186 insertions(+), 96 deletions(-)

diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 0225db81925c5..9938e5d769e12 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -65,7 +65,13 @@
     <!-- Provided dependencies -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-client</artifactId>
+      <artifactId>${hadoop-client-api.artifact}</artifactId>
+      <version>${hadoop.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>${hadoop-client-runtime.artifact}</artifactId>
+      <version>${hadoop.version}</version>
     </dependency>
     <dependency>
       <groupId>org.slf4j</groupId>
diff --git a/core/pom.xml b/core/pom.xml
index 14b217d7fb22e..7a56c4ca3c638 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -66,7 +66,13 @@
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-client</artifactId>
+      <artifactId>${hadoop-client-api.artifact}</artifactId>
+      <version>${hadoop.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>${hadoop-client-runtime.artifact}</artifactId>
+      <version>${hadoop.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
@@ -177,6 +183,14 @@
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-text</artifactId>
     </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>commons-collections</groupId>
+      <artifactId>commons-collections</artifactId>
+    </dependency>
     <dependency>
       <groupId>com.google.code.findbugs</groupId>
       <artifactId>jsr305</artifactId>
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 8363d570d7320..93370f5dae72e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -1182,10 +1182,12 @@ private[spark] object SparkSubmitUtils {
   def resolveDependencyPaths(
       artifacts: Array[AnyRef],
       cacheDirectory: File): String = {
-    artifacts.map { artifactInfo =>
-      val artifact = artifactInfo.asInstanceOf[Artifact].getModuleRevisionId
+    artifacts.map { ai =>
+      val artifactInfo = ai.asInstanceOf[Artifact]
+      val artifact = artifactInfo.getModuleRevisionId
+      val testSuffix = if (artifactInfo.getType == "test-jar") "-tests" else ""
       cacheDirectory.getAbsolutePath + File.separator +
-        s"${artifact.getOrganisation}_${artifact.getName}-${artifact.getRevision}.jar"
+        s"${artifact.getOrganisation}_${artifact.getName}-${artifact.getRevision}${testSuffix}.jar"
     }.mkString(",")
   }
 
diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index e365559ed8cbf..b0b215a316df2 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -127,7 +127,7 @@ javax.inject/1//javax.inject-1.jar
 javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar
 javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar
 javolution/5.5.1//javolution-5.5.1.jar
-jaxb-api/2.2.2//jaxb-api-2.2.2.jar
+jaxb-api/2.2.11//jaxb-api-2.2.11.jar
 jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar
 jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar
 jdo-api/3.0.1//jdo-api-3.0.1.jar
@@ -227,7 +227,6 @@ spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar
 spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar
 spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar
 spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar
-stax-api/1.0-2//stax-api-1.0-2.jar
 stax-api/1.0.1//stax-api-1.0.1.jar
 stream/2.9.6//stream-2.9.6.jar
 super-csv/2.2.0//super-csv-2.2.0.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index 0c050d62db3da..b64c7989a4e02 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -3,14 +3,12 @@ JLargeArrays/1.5//JLargeArrays-1.5.jar
 JTransforms/3.1//JTransforms-3.1.jar
 RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar
 ST4/4.0.4//ST4-4.0.4.jar
-accessors-smart/1.2//accessors-smart-1.2.jar
 activation/1.1.1//activation-1.1.1.jar
 aircompressor/0.10//aircompressor-0.10.jar
 algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar
 antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
 antlr4-runtime/4.7.1//antlr4-runtime-4.7.1.jar
 aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar
-aopalliance/1.0//aopalliance-1.0.jar
 arpack_combined_all/0.1//arpack_combined_all-0.1.jar
 arrow-format/1.0.1//arrow-format-1.0.1.jar
 arrow-memory-core/1.0.1//arrow-memory-core-1.0.1.jar
@@ -27,15 +25,12 @@ breeze_2.12/1.0//breeze_2.12-1.0.jar
 cats-kernel_2.12/2.0.0-M4//cats-kernel_2.12-2.0.0-M4.jar
 chill-java/0.9.5//chill-java-0.9.5.jar
 chill_2.12/0.9.5//chill_2.12-0.9.5.jar
-commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar
 commons-cli/1.2//commons-cli-1.2.jar
 commons-codec/1.10//commons-codec-1.10.jar
 commons-collections/3.2.2//commons-collections-3.2.2.jar
 commons-compiler/3.0.16//commons-compiler-3.0.16.jar
 commons-compress/1.8.1//commons-compress-1.8.1.jar
-commons-configuration2/2.1.1//commons-configuration2-2.1.1.jar
 commons-crypto/1.0.0//commons-crypto-1.0.0.jar
-commons-daemon/1.0.13//commons-daemon-1.0.13.jar
 commons-dbcp/1.4//commons-dbcp-1.4.jar
 commons-httpclient/3.1//commons-httpclient-3.1.jar
 commons-io/2.5//commons-io-2.5.jar
@@ -55,30 +50,13 @@ datanucleus-api-jdo/4.2.4//datanucleus-api-jdo-4.2.4.jar
 datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar
 datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar
 derby/10.12.1.1//derby-10.12.1.1.jar
-dnsjava/2.1.7//dnsjava-2.1.7.jar
 dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar
-ehcache/3.3.1//ehcache-3.3.1.jar
 flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar
 generex/1.0.2//generex-1.0.2.jar
-geronimo-jcache_1.0_spec/1.0-alpha-1//geronimo-jcache_1.0_spec-1.0-alpha-1.jar
 gson/2.2.4//gson-2.2.4.jar
 guava/14.0.1//guava-14.0.1.jar
-guice-servlet/4.0//guice-servlet-4.0.jar
-guice/4.0//guice-4.0.jar
-hadoop-annotations/3.2.0//hadoop-annotations-3.2.0.jar
-hadoop-auth/3.2.0//hadoop-auth-3.2.0.jar
-hadoop-client/3.2.0//hadoop-client-3.2.0.jar
-hadoop-common/3.2.0//hadoop-common-3.2.0.jar
-hadoop-hdfs-client/3.2.0//hadoop-hdfs-client-3.2.0.jar
-hadoop-mapreduce-client-common/3.2.0//hadoop-mapreduce-client-common-3.2.0.jar
-hadoop-mapreduce-client-core/3.2.0//hadoop-mapreduce-client-core-3.2.0.jar
-hadoop-mapreduce-client-jobclient/3.2.0//hadoop-mapreduce-client-jobclient-3.2.0.jar
-hadoop-yarn-api/3.2.0//hadoop-yarn-api-3.2.0.jar
-hadoop-yarn-client/3.2.0//hadoop-yarn-client-3.2.0.jar
-hadoop-yarn-common/3.2.0//hadoop-yarn-common-3.2.0.jar
-hadoop-yarn-registry/3.2.0//hadoop-yarn-registry-3.2.0.jar
-hadoop-yarn-server-common/3.2.0//hadoop-yarn-server-common-3.2.0.jar
-hadoop-yarn-server-web-proxy/3.2.0//hadoop-yarn-server-web-proxy-3.2.0.jar
+hadoop-client-api/3.2.0//hadoop-client-api-3.2.0.jar
+hadoop-client-runtime/3.2.0//hadoop-client-runtime-3.2.0.jar
 hive-beeline/2.3.7//hive-beeline-2.3.7.jar
 hive-cli/2.3.7//hive-cli-2.3.7.jar
 hive-common/2.3.7//hive-common-2.3.7.jar
@@ -108,8 +86,6 @@ jackson-core/2.10.0//jackson-core-2.10.0.jar
 jackson-databind/2.10.0//jackson-databind-2.10.0.jar
 jackson-dataformat-yaml/2.10.0//jackson-dataformat-yaml-2.10.0.jar
 jackson-datatype-jsr310/2.10.3//jackson-datatype-jsr310-2.10.3.jar
-jackson-jaxrs-base/2.9.5//jackson-jaxrs-base-2.9.5.jar
-jackson-jaxrs-json-provider/2.9.5//jackson-jaxrs-json-provider-2.9.5.jar
 jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar
 jackson-module-jaxb-annotations/2.10.0//jackson-module-jaxb-annotations-2.10.0.jar
 jackson-module-paranamer/2.10.0//jackson-module-paranamer-2.10.0.jar
@@ -122,13 +98,11 @@ jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar
 jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar
 janino/3.0.16//janino-3.0.16.jar
 javassist/3.25.0-GA//javassist-3.25.0-GA.jar
-javax.inject/1//javax.inject-1.jar
 javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar
 javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar
 javolution/5.5.1//javolution-5.5.1.jar
 jaxb-api/2.2.11//jaxb-api-2.2.11.jar
 jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar
-jcip-annotations/1.0-1//jcip-annotations-1.0-1.jar
 jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar
 jdo-api/3.0.1//jdo-api-3.0.1.jar
 jersey-client/2.30//jersey-client-2.30.jar
@@ -142,30 +116,14 @@ jline/2.14.6//jline-2.14.6.jar
 joda-time/2.10.5//joda-time-2.10.5.jar
 jodd-core/3.5.2//jodd-core-3.5.2.jar
 jpam/1.1//jpam-1.1.jar
-json-smart/2.3//json-smart-2.3.jar
 json/1.8//json-1.8.jar
 json4s-ast_2.12/3.7.0-M5//json4s-ast_2.12-3.7.0-M5.jar
 json4s-core_2.12/3.7.0-M5//json4s-core_2.12-3.7.0-M5.jar
 json4s-jackson_2.12/3.7.0-M5//json4s-jackson_2.12-3.7.0-M5.jar
 json4s-scalap_2.12/3.7.0-M5//json4s-scalap_2.12-3.7.0-M5.jar
-jsp-api/2.1//jsp-api-2.1.jar
 jsr305/3.0.0//jsr305-3.0.0.jar
 jta/1.1//jta-1.1.jar
 jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar
-kerb-admin/1.0.1//kerb-admin-1.0.1.jar
-kerb-client/1.0.1//kerb-client-1.0.1.jar
-kerb-common/1.0.1//kerb-common-1.0.1.jar
-kerb-core/1.0.1//kerb-core-1.0.1.jar
-kerb-crypto/1.0.1//kerb-crypto-1.0.1.jar
-kerb-identity/1.0.1//kerb-identity-1.0.1.jar
-kerb-server/1.0.1//kerb-server-1.0.1.jar
-kerb-simplekdc/1.0.1//kerb-simplekdc-1.0.1.jar
-kerb-util/1.0.1//kerb-util-1.0.1.jar
-kerby-asn1/1.0.1//kerby-asn1-1.0.1.jar
-kerby-config/1.0.1//kerby-config-1.0.1.jar
-kerby-pkix/1.0.1//kerby-pkix-1.0.1.jar
-kerby-util/1.0.1//kerby-util-1.0.1.jar
-kerby-xdr/1.0.1//kerby-xdr-1.0.1.jar
 kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
 kubernetes-client/4.10.3//kubernetes-client-4.10.3.jar
 kubernetes-model-admissionregistration/4.10.3//kubernetes-model-admissionregistration-4.10.3.jar
@@ -203,9 +161,7 @@ metrics-json/4.1.1//metrics-json-4.1.1.jar
 metrics-jvm/4.1.1//metrics-jvm-4.1.1.jar
 minlog/1.3.0//minlog-1.3.0.jar
 netty-all/4.1.51.Final//netty-all-4.1.51.Final.jar
-nimbus-jose-jwt/4.41.1//nimbus-jose-jwt-4.41.1.jar
 objenesis/2.6//objenesis-2.6.jar
-okhttp/2.7.5//okhttp-2.7.5.jar
 okhttp/3.12.12//okhttp-3.12.12.jar
 okio/1.14.0//okio-1.14.0.jar
 opencsv/2.3//opencsv-2.3.jar
@@ -225,7 +181,6 @@ parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar
 protobuf-java/2.5.0//protobuf-java-2.5.0.jar
 py4j/0.10.9//py4j-0.10.9.jar
 pyrolite/4.30//pyrolite-4.30.jar
-re2j/1.1//re2j-1.1.jar
 scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar
 scala-compiler/2.12.10//scala-compiler-2.12.10.jar
 scala-library/2.12.10//scala-library-2.12.10.jar
@@ -243,15 +198,12 @@ spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar
 spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar
 spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar
 stax-api/1.0.1//stax-api-1.0.1.jar
-stax2-api/3.1.4//stax2-api-3.1.4.jar
 stream/2.9.6//stream-2.9.6.jar
 super-csv/2.2.0//super-csv-2.2.0.jar
 threeten-extra/1.5.0//threeten-extra-1.5.0.jar
-token-provider/1.0.1//token-provider-1.0.1.jar
 transaction-api/1.1//transaction-api-1.1.jar
 univocity-parsers/2.9.0//univocity-parsers-2.9.0.jar
 velocity/1.5//velocity-1.5.jar
-woodstox-core/5.0.3//woodstox-core-5.0.3.jar
 xbean-asm7-shaded/4.15//xbean-asm7-shaded-4.15.jar
 xz/1.5//xz-1.5.jar
 zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index d9d9fb7f55c77..b1e306c499385 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -71,9 +71,15 @@
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-client</artifactId>
+      <artifactId>${hadoop-client-api.artifact}</artifactId>
+      <version>${hadoop.version}</version>
       <scope>provided</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>${hadoop-client-runtime.artifact}</artifactId>
+      <version>${hadoop.version}</version>
+    </dependency>
     <dependency>
       <groupId>org.apache.avro</groupId>
       <artifactId>avro-mapred</artifactId>
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 95a99ac88412e..06a6bef005e69 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -79,6 +79,10 @@
       <artifactId>kafka-clients</artifactId>
       <version>${kafka.version}</version>
     </dependency>
+    <dependency>
+      <groupId>com.google.code.findbugs</groupId>
+      <artifactId>jsr305</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-pool2</artifactId>
diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml
index 941946f30e96f..1b0d6d322917f 100644
--- a/external/kafka-0-10-token-provider/pom.xml
+++ b/external/kafka-0-10-token-provider/pom.xml
@@ -58,6 +58,11 @@
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>${hadoop-client-runtime.artifact}</artifactId>
+      <scope>${hadoop.deps.scope}</scope>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index 76ee5bb7b2f85..5a49358a84241 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -91,9 +91,15 @@
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-client</artifactId>
+      <artifactId>${hadoop-client-api.artifact}</artifactId>
+      <version>${hadoop.version}</version>
       <scope>provided</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>${hadoop-client-runtime.artifact}</artifactId>
+      <version>${hadoop.version}</version>
+    </dependency>
     <dependency>
       <groupId>org.apache.avro</groupId>
       <artifactId>avro-ipc</artifactId>
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index 8689e0b8a9ea8..a5642a5a68fe4 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -58,10 +58,15 @@
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-client</artifactId>
+      <artifactId>${hadoop-client-api.artifact}</artifactId>
       <version>${hadoop.version}</version>
       <scope>provided</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>${hadoop-client-runtime.artifact}</artifactId>
+      <version>${hadoop.version}</version>
+    </dependency>
     <!--
       the AWS module pulls in jackson; its transitive dependencies can create
       intra-jackson-module version problems.
diff --git a/launcher/pom.xml b/launcher/pom.xml
index a2550ac939e83..ebfd77a67a529 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -81,7 +81,14 @@
     <!-- Not needed by the test code, but referenced by SparkSubmit which is used by the tests. -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-client</artifactId>
+      <artifactId>${hadoop-client-api.artifact}</artifactId>
+      <version>${hadoop.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>${hadoop-client-runtime.artifact}</artifactId>
+      <version>${hadoop.version}</version>
       <scope>test</scope>
     </dependency>
   </dependencies>
diff --git a/pom.xml b/pom.xml
index 2fd002e91751f..8b2130f2d9f56 100644
--- a/pom.xml
+++ b/pom.xml
@@ -243,6 +243,15 @@
     <parquet.deps.scope>compile</parquet.deps.scope>
     <parquet.test.deps.scope>test</parquet.test.deps.scope>
 
+    <!--
+      These default to Hadoop 3.x shaded client/minicluster jars, but are switched to hadoop-client
+      when the Hadoop profile is hadoop-2.7, because these are only available in 3.x. Note that,
+      as result we have to include the same hadoop-client dependency multiple times in hadoop-2.7.
+    -->
+    <hadoop-client-api.artifact>hadoop-client-api</hadoop-client-api.artifact>
+    <hadoop-client-runtime.artifact>hadoop-client-runtime</hadoop-client-runtime.artifact>
+    <hadoop-client-minicluster.artifact>hadoop-client-minicluster</hadoop-client-minicluster.artifact>
+
     <!--
       Overridable test home. So that you can call individual pom files directly without
       things breaking.
@@ -851,6 +860,11 @@
         <artifactId>javax.ws.rs-api</artifactId>
         <version>2.0.1</version>
       </dependency>
+      <dependency>
+        <groupId>javax.xml.bind</groupId>
+        <artifactId>jaxb-api</artifactId>
+        <version>2.2.11</version>
+      </dependency>
       <dependency>
         <groupId>org.scalanlp</groupId>
         <artifactId>breeze_${scala.binary.version}</artifactId>
@@ -1059,6 +1073,26 @@
         <version>${curator.version}</version>
         <scope>test</scope>
       </dependency>
+      <!-- Hadoop 3.x dependencies -->
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-client-api</artifactId>
+        <version>${hadoop.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-client-runtime</artifactId>
+        <version>${hadoop.version}</version>
+        <scope>runtime</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-client-minicluster</artifactId>
+        <version>${yarn.version}</version>
+        <scope>test</scope>
+      </dependency>
+      <!-- End of Hadoop 3.x dependencies -->
       <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-client</artifactId>
@@ -1632,6 +1666,14 @@
             <groupId>org.apache.ant</groupId>
             <artifactId>ant</artifactId>
           </exclusion>
+          <exclusion>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-common</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-auth</artifactId>
+          </exclusion>
           <exclusion>
             <groupId>org.apache.zookeeper</groupId>
             <artifactId>zookeeper</artifactId>
@@ -2396,17 +2438,6 @@
                 </rules>
               </configuration>
             </execution>
-            <execution>
-              <id>enforce-no-duplicate-dependencies</id>
-              <goals>
-                <goal>enforce</goal>
-              </goals>
-              <configuration>
-                <rules>
-                  <banDuplicatePomDependencyVersions/>
-                </rules>
-              </configuration>
-            </execution>
           </executions>
         </plugin>
 	<plugin>
@@ -2866,6 +2897,7 @@
         <artifactId>maven-shade-plugin</artifactId>
         <configuration>
           <shadedArtifactAttached>false</shadedArtifactAttached>
+          <createDependencyReducedPom>false</createDependencyReducedPom>
           <artifactSet>
             <includes>
               <include>org.spark-project.spark:unused</include>
@@ -3127,6 +3159,9 @@
         <hadoop.version>2.7.4</hadoop.version>
         <curator.version>2.7.1</curator.version>
         <commons-io.version>2.4</commons-io.version>
+        <hadoop-client-api.artifact>hadoop-client</hadoop-client-api.artifact>
+        <hadoop-client-runtime.artifact>hadoop-client</hadoop-client-runtime.artifact>
+        <hadoop-client-minicluster.artifact>hadoop-client</hadoop-client-minicluster.artifact>
       </properties>
     </profile>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index bc80769be2390..da715c6bdc59f 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -40,6 +40,33 @@
         <spark.yarn.isHadoopProvided>true</spark.yarn.isHadoopProvided>
       </properties>
     </profile>
+    <profile>
+      <id>hadoop-2.7</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-yarn-api</artifactId>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-yarn-common</artifactId>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-yarn-server-web-proxy</artifactId>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-yarn-client</artifactId>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-yarn-server-tests</artifactId>
+          <classifier>tests</classifier>
+          <scope>test</scope>
+        </dependency>
+      </dependencies>
+    </profile>
   </profiles>
 
   <dependencies>
@@ -69,23 +96,20 @@
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-yarn-api</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-yarn-common</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-yarn-server-web-proxy</artifactId>
+      <artifactId>${hadoop-client-api.artifact}</artifactId>
+      <version>${hadoop.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-yarn-client</artifactId>
+      <artifactId>${hadoop-client-runtime.artifact}</artifactId>
+      <version>${hadoop.version}</version>
+      <scope>${hadoop.deps.scope}</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-client</artifactId>
+      <artifactId>${hadoop-client-minicluster.artifact}</artifactId>
+      <version>${hadoop.version}</version>
+      <scope>test</scope>
     </dependency>
 
     <dependency>
@@ -142,13 +166,6 @@
       <scope>test</scope>
     </dependency>
 
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-yarn-server-tests</artifactId>
-      <classifier>tests</classifier>
-      <scope>test</scope>
-    </dependency>
-
     <dependency>
       <groupId>org.mockito</groupId>
       <artifactId>mockito-core</artifactId>
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 5f632fbb259ff..9b99e8ff9265c 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -19,7 +19,7 @@ package org.apache.spark.deploy.yarn
 
 import java.io.{File, IOException}
 import java.lang.reflect.{InvocationTargetException, Modifier}
-import java.net.{URI, URL}
+import java.net.{URI, URL, URLEncoder}
 import java.security.PrivilegedExceptionAction
 import java.util.concurrent.{TimeoutException, TimeUnit}
 
@@ -36,7 +36,6 @@ import org.apache.hadoop.yarn.api._
 import org.apache.hadoop.yarn.api.records._
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException
-import org.apache.hadoop.yarn.server.webproxy.ProxyUriUtils
 import org.apache.hadoop.yarn.util.{ConverterUtils, Records}
 
 import org.apache.spark._
@@ -308,7 +307,8 @@ private[spark] class ApplicationMaster(
       // The client-mode AM doesn't listen for incoming connections, so report an invalid port.
       registerAM(Utils.localHostName, -1, sparkConf,
         sparkConf.getOption("spark.driver.appUIAddress"), appAttemptId)
-      addAmIpFilter(Some(driverRef), ProxyUriUtils.getPath(appAttemptId.getApplicationId))
+      val encodedAppId = URLEncoder.encode(appAttemptId.getApplicationId.toString, "UTF-8")
+      addAmIpFilter(Some(driverRef), s"/proxy/$encodedAppId")
       createAllocator(driverRef, sparkConf, clientRpcEnv, appAttemptId, cachedResourcesConf)
       reporterThread.join()
     } catch {
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
index 20f5339c46fef..a813b9913f23b 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
@@ -80,6 +80,16 @@ abstract class BaseYarnClusterSuite
     yarnConf.set("yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage",
       "100.0")
 
+    // capacity-scheduler.xml is missing in hadoop-client-minicluster so this is a workaround
+    yarnConf.set("yarn.scheduler.capacity.root.queues", "default")
+    yarnConf.setInt("yarn.scheduler.capacity.root.default.capacity", 100)
+    yarnConf.setFloat("yarn.scheduler.capacity.root.default.user-limit-factor", 1)
+    yarnConf.setInt("yarn.scheduler.capacity.root.default.maximum-capacity", 100)
+    yarnConf.set("yarn.scheduler.capacity.root.default.state", "RUNNING")
+    yarnConf.set("yarn.scheduler.capacity.root.default.acl_submit_applications", "*")
+    yarnConf.set("yarn.scheduler.capacity.root.default.acl_administer_queue", "*")
+    yarnConf.setInt("yarn.scheduler.capacity.node-locality-delay", -1)
+
     yarnCluster = new MiniYARNCluster(getClass().getName(), 1, 1, 1)
     yarnCluster.init(yarnConf)
     yarnCluster.start()
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 6b79eb722fcdd..af976fa1fa983 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -104,6 +104,10 @@
       <groupId>org.antlr</groupId>
       <artifactId>antlr4-runtime</artifactId>
     </dependency>
+    <dependency>
+      <groupId>javax.xml.bind</groupId>
+      <artifactId>jaxb-api</artifactId>
+    </dependency>
     <dependency>
       <groupId>commons-codec</groupId>
       <artifactId>commons-codec</artifactId>
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 0453094cf8b7b..4fca6264c0594 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -162,6 +162,11 @@
       <groupId>org.datanucleus</groupId>
       <artifactId>datanucleus-core</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>${hadoop-client-runtime.artifact}</artifactId>
+      <scope>${hadoop.deps.scope}</scope>
+    </dependency>
     <dependency>
       <groupId>org.apache.thrift</groupId>
       <artifactId>libthrift</artifactId>
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index 42a0ec0253b85..f9946fe8e0616 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -118,11 +118,24 @@ private[hive] object IsolatedClientLoader extends Logging {
       hadoopVersion: String,
       ivyPath: Option[String],
       remoteRepos: String): Seq[URL] = {
+    val hadoopJarNames = if (hadoopVersion.startsWith("3")) {
+      Seq(s"org.apache.hadoop:hadoop-client-api:$hadoopVersion",
+        s"org.apache.hadoop:hadoop-client-runtime:$hadoopVersion")
+    } else {
+      Seq(s"org.apache.hadoop:hadoop-client:$hadoopVersion")
+    }
     val hiveArtifacts = version.extraDeps ++
       Seq("hive-metastore", "hive-exec", "hive-common", "hive-serde")
         .map(a => s"org.apache.hive:$a:${version.fullVersion}") ++
-      Seq("com.google.guava:guava:14.0.1",
-        s"org.apache.hadoop:hadoop-client:$hadoopVersion")
+      Seq("com.google.guava:guava:14.0.1") ++ hadoopJarNames
+
+    val extraExclusions = if (hadoopVersion.startsWith("3")) {
+      // this introduced from lower version of Hive could conflict with jars in Hadoop 3.2+, so
+      // exclude here in favor of the ones in Hadoop 3.2+
+      Seq("org.apache.hadoop:hadoop-auth")
+    } else {
+      Seq.empty
+    }
 
     val classpath = quietly {
       SparkSubmitUtils.resolveMavenCoordinates(
@@ -130,7 +143,7 @@ private[hive] object IsolatedClientLoader extends Logging {
         SparkSubmitUtils.buildIvySettings(
           Some(remoteRepos),
           ivyPath),
-        exclusions = version.exclusions)
+        exclusions = version.exclusions ++ extraExclusions)
     }
     val allFiles = classpath.split(",").map(new File(_)).toSet
 

From eb33bcb4b2db2a13b3da783e58feb8852e04637b Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Thu, 22 Oct 2020 07:59:49 +0000
Subject: [PATCH 0301/1009] [SPARK-30796][SQL] Add parameter position for
 REGEXP_REPLACE

### What changes were proposed in this pull request?
`REGEXP_REPLACE` could replace all substrings of string that match regexp with replacement string.
But `REGEXP_REPLACE` lost some flexibility. such as: converts camel case strings to a string containing lower case words separated by an underscore:
AddressLine1 -> address_line_1
If we support the parameter position, we can do like this(e.g. Oracle):

```
WITH strings as (
  SELECT 'AddressLine1' s FROM dual union all
  SELECT 'ZipCode' s FROM dual union all
  SELECT 'Country' s FROM dual
)
  SELECT s "STRING",
         lower(regexp_replace(s, '([A-Z0-9])', '_\1', 2)) "MODIFIED_STRING"
  FROM strings;
```
The output:
```
  STRING               MODIFIED_STRING
-------------------- --------------------
AddressLine1         address_line_1
ZipCode              zip_code
Country              country
```

There are some mainstream database support the syntax.

**Oracle**
https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/REGEXP_REPLACE.html#GUID-EA80A33C-441A-4692-A959-273B5A224490

**Vertica**
https://www.vertica.com/docs/9.2.x/HTML/Content/Authoring/SQLReferenceManual/Functions/RegularExpressions/REGEXP_REPLACE.htm?zoom_highlight=regexp_replace

**Redshift**
https://docs.aws.amazon.com/redshift/latest/dg/REGEXP_REPLACE.html

### Why are the changes needed?
The parameter position for `REGEXP_REPLACE` is very useful.

### Does this PR introduce _any_ user-facing change?
'Yes'.

### How was this patch tested?
Jenkins test.

Closes #29891 from beliefer/add-position-for-regex_replace.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: beliefer <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/regexpExpressions.scala       | 101 ++++++++++++++----
 .../expressions/RegexpExpressionsSuite.scala  |  14 +++
 .../sql-functions/sql-expression-schema.md    |   2 +-
 .../sql-tests/inputs/regexp-functions.sql     |  12 +++
 .../results/regexp-functions.sql.out          |  84 ++++++++++++++-
 5 files changed, 188 insertions(+), 25 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index 8eb7f463e049c..c9dd7c7acddde 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -24,6 +24,8 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.apache.commons.text.StringEscapeUtils
 
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.util.{GenericArrayData, StringUtils}
@@ -318,7 +320,24 @@ case class StringSplit(str: Expression, regex: Expression, limit: Expression)
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(str, regexp, rep) - Replaces all substrings of `str` that match `regexp` with `rep`.",
+  usage = "_FUNC_(str, regexp, rep[, position]) - Replaces all substrings of `str` that match `regexp` with `rep`.",
+  arguments = """
+    Arguments:
+      * str - a string expression to search for a regular expression pattern match.
+      * regexp - a string representing a regular expression. The regex string should be a
+          Java regular expression.
+
+          Since Spark 2.0, string literals (including regex patterns) are unescaped in our SQL
+          parser. For example, to match "\abc", a regular expression for `regexp` can be
+          "^\\abc$".
+
+          There is a SQL config 'spark.sql.parser.escapedStringLiterals' that can be used to
+          fallback to the Spark 1.6 behavior regarding string literal parsing. For example,
+          if the config is enabled, the `regexp` that can match "\abc" is "^\abc$".
+      * rep - a string expression to replace matched substrings.
+      * position - a positive integer literal that indicates the position within `str` to begin searching.
+          The default is 1. If position is greater than the number of characters in `str`, the result is `str`.
+  """,
   examples = """
     Examples:
       > SELECT _FUNC_('100-200', '(\\d+)', 'num');
@@ -326,8 +345,24 @@ case class StringSplit(str: Expression, regex: Expression, limit: Expression)
   """,
   since = "1.5.0")
 // scalastyle:on line.size.limit
-case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expression)
-  extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
+case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expression, pos: Expression)
+  extends QuaternaryExpression with ImplicitCastInputTypes with NullIntolerant {
+
+  def this(subject: Expression, regexp: Expression, rep: Expression) =
+    this(subject, regexp, rep, Literal(1))
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    if (!pos.foldable) {
+      return TypeCheckFailure(s"Position expression must be foldable, but got $pos")
+    }
+
+    val posEval = pos.eval()
+    if (posEval == null || posEval.asInstanceOf[Int] > 0) {
+      TypeCheckSuccess
+    } else {
+      TypeCheckFailure(s"Position expression must be positive, but got: $posEval")
+    }
+  }
 
   // last regex in string, we will update the pattern iff regexp value changed.
   @transient private var lastRegex: UTF8String = _
@@ -339,7 +374,7 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
   // result buffer write by Matcher
   @transient private lazy val result: StringBuffer = new StringBuffer
 
-  override def nullSafeEval(s: Any, p: Any, r: Any): Any = {
+  override def nullSafeEval(s: Any, p: Any, r: Any, i: Any): Any = {
     if (!p.equals(lastRegex)) {
       // regex value changed
       lastRegex = p.asInstanceOf[UTF8String].clone()
@@ -350,20 +385,26 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
       lastReplacementInUTF8 = r.asInstanceOf[UTF8String].clone()
       lastReplacement = lastReplacementInUTF8.toString
     }
-    val m = pattern.matcher(s.toString())
-    result.delete(0, result.length())
-
-    while (m.find) {
-      m.appendReplacement(result, lastReplacement)
+    val source = s.toString()
+    val position = i.asInstanceOf[Int] - 1
+    if (position < source.length) {
+      val m = pattern.matcher(source)
+      m.region(position, source.length)
+      result.delete(0, result.length())
+      while (m.find) {
+        m.appendReplacement(result, lastReplacement)
+      }
+      m.appendTail(result)
+      UTF8String.fromString(result.toString)
+    } else {
+      s
     }
-    m.appendTail(result)
-
-    UTF8String.fromString(result.toString)
   }
 
   override def dataType: DataType = StringType
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType, StringType)
-  override def children: Seq[Expression] = subject :: regexp :: rep :: Nil
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringType, StringType, StringType, IntegerType)
+  override def children: Seq[Expression] = subject :: regexp :: rep :: pos :: Nil
   override def prettyName: String = "regexp_replace"
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
@@ -373,6 +414,8 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
     val classNameStringBuffer = classOf[java.lang.StringBuffer].getCanonicalName
 
     val matcher = ctx.freshName("matcher")
+    val source = ctx.freshName("source")
+    val position = ctx.freshName("position")
 
     val termLastRegex = ctx.addMutableState("UTF8String", "lastRegex")
     val termPattern = ctx.addMutableState(classNamePattern, "pattern")
@@ -385,7 +428,7 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
       ""
     }
 
-    nullSafeCodeGen(ctx, ev, (subject, regexp, rep) => {
+    nullSafeCodeGen(ctx, ev, (subject, regexp, rep, pos) => {
     s"""
       if (!$regexp.equals($termLastRegex)) {
         // regex value changed
@@ -397,21 +440,33 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
         $termLastReplacementInUTF8 = $rep.clone();
         $termLastReplacement = $termLastReplacementInUTF8.toString();
       }
-      $classNameStringBuffer $termResult = new $classNameStringBuffer();
-      java.util.regex.Matcher $matcher = $termPattern.matcher($subject.toString());
-
-      while ($matcher.find()) {
-        $matcher.appendReplacement($termResult, $termLastReplacement);
+      String $source = $subject.toString();
+      int $position = $pos - 1;
+      if ($position < $source.length()) {
+        $classNameStringBuffer $termResult = new $classNameStringBuffer();
+        java.util.regex.Matcher $matcher = $termPattern.matcher($source);
+        $matcher.region($position, $source.length());
+
+        while ($matcher.find()) {
+          $matcher.appendReplacement($termResult, $termLastReplacement);
+        }
+        $matcher.appendTail($termResult);
+        ${ev.value} = UTF8String.fromString($termResult.toString());
+        $termResult = null;
+      } else {
+        ${ev.value} = $subject;
       }
-      $matcher.appendTail($termResult);
-      ${ev.value} = UTF8String.fromString($termResult.toString());
-      $termResult = null;
       $setEvNotNull
     """
     })
   }
 }
 
+object RegExpReplace {
+  def apply(subject: Expression, regexp: Expression, rep: Expression): RegExpReplace =
+    new RegExpReplace(subject, regexp, rep)
+}
+
 object RegExpExtractBase {
   def checkGroupIndex(groupCount: Int, groupIndex: Int): Unit = {
     if (groupIndex < 0) {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
index 205dc10efc8a8..77a32a735f76d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
@@ -253,6 +253,20 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(expr, null, row4)
     checkEvaluation(expr, null, row5)
     checkEvaluation(expr, null, row6)
+    // test position
+    val exprWithPos = RegExpReplace(s, p, r, 4)
+    checkEvaluation(exprWithPos, "100-num", row1)
+    checkEvaluation(exprWithPos, "100-###", row2)
+    checkEvaluation(exprWithPos, "100###200", row3)
+    checkEvaluation(exprWithPos, null, row4)
+    checkEvaluation(exprWithPos, null, row5)
+    checkEvaluation(exprWithPos, null, row6)
+    val exprWithLargePos = RegExpReplace(s, p, r, 7)
+    checkEvaluation(exprWithLargePos, "100-20num", row1)
+    checkEvaluation(exprWithLargePos, "100-20###", row2)
+    val exprWithExceedLength = RegExpReplace(s, p, r, 8)
+    checkEvaluation(exprWithExceedLength, "100-200", row1)
+    checkEvaluation(exprWithExceedLength, "100-200", row2)
 
     val nonNullExpr = RegExpReplace(Literal("100-200"), Literal("(\\d+)"), Literal("num"))
     checkEvaluation(nonNullExpr, "num-num", row1)
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 1675fb1cc7c62..da83df4994d8d 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -217,7 +217,7 @@
 | org.apache.spark.sql.catalyst.expressions.Rank | rank | SELECT a, b, rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,RANK() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int> |
 | org.apache.spark.sql.catalyst.expressions.RegExpExtract | regexp_extract | SELECT regexp_extract('100-200', '(\\d+)-(\\d+)', 1) | struct<regexp_extract(100-200, (\d+)-(\d+), 1):string> |
 | org.apache.spark.sql.catalyst.expressions.RegExpExtractAll | regexp_extract_all | SELECT regexp_extract_all('100-200, 300-400', '(\\d+)-(\\d+)', 1) | struct<regexp_extract_all(100-200, 300-400, (\d+)-(\d+), 1):array<string>> |
-| org.apache.spark.sql.catalyst.expressions.RegExpReplace | regexp_replace | SELECT regexp_replace('100-200', '(\\d+)', 'num') | struct<regexp_replace(100-200, (\d+), num):string> |
+| org.apache.spark.sql.catalyst.expressions.RegExpReplace | regexp_replace | SELECT regexp_replace('100-200', '(\\d+)', 'num') | struct<regexp_replace(100-200, (\d+), num, 1):string> |
 | org.apache.spark.sql.catalyst.expressions.Remainder | % | SELECT 2 % 1.8 | struct<(CAST(CAST(2 AS DECIMAL(1,0)) AS DECIMAL(2,1)) % CAST(1.8 AS DECIMAL(2,1))):decimal(2,1)> |
 | org.apache.spark.sql.catalyst.expressions.Remainder | mod | SELECT 2 % 1.8 | struct<(CAST(CAST(2 AS DECIMAL(1,0)) AS DECIMAL(2,1)) % CAST(1.8 AS DECIMAL(2,1))):decimal(2,1)> |
 | org.apache.spark.sql.catalyst.expressions.Reverse | reverse | SELECT reverse('Spark SQL') | struct<reverse(Spark SQL):string> |
diff --git a/sql/core/src/test/resources/sql-tests/inputs/regexp-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/regexp-functions.sql
index 7128dee0a00d7..3f3eaaae9ee4e 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/regexp-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/regexp-functions.sql
@@ -31,3 +31,15 @@ SELECT regexp_extract_all('1a 2b 14m', '(\\d+)([a-z]+)', 3);
 SELECT regexp_extract_all('1a 2b 14m', '(\\d+)([a-z]+)', -1);
 SELECT regexp_extract_all('1a 2b 14m', '(\\d+)?([a-z]+)', 1);
 SELECT regexp_extract_all('a 2b 14m', '(\\d+)?([a-z]+)', 1);
+
+-- regexp_replace
+SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something');
+SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', -2);
+SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', 0);
+SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', 1);
+SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', 2);
+SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', 8);
+SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', 26);
+SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', 27);
+SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', 30);
+SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', null);
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out
index 2eef926f63e37..8d471a5bb1c87 100644
--- a/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 30
+-- Number of queries: 37
 
 
 -- !query
@@ -252,3 +252,85 @@ SELECT regexp_extract_all('a 2b 14m', '(\\d+)?([a-z]+)', 1)
 struct<regexp_extract_all(a 2b 14m, (\d+)?([a-z]+), 1):array<string>>
 -- !query output
 ["","2","14"]
+
+
+-- !query
+SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something')
+-- !query schema
+struct<regexp_replace(healthy, wealthy, and wise, \w+thy, something, 1):string>
+-- !query output
+something, something, and wise
+
+
+-- !query
+SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', -2)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', -2)' due to data type mismatch: Position expression must be positive, but got: -2; line 1 pos 7
+
+
+-- !query
+SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', 0)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', 0)' due to data type mismatch: Position expression must be positive, but got: 0; line 1 pos 7
+
+
+-- !query
+SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', 1)
+-- !query schema
+struct<regexp_replace(healthy, wealthy, and wise, \w+thy, something, 1):string>
+-- !query output
+something, something, and wise
+
+
+-- !query
+SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', 2)
+-- !query schema
+struct<regexp_replace(healthy, wealthy, and wise, \w+thy, something, 2):string>
+-- !query output
+hsomething, something, and wise
+
+
+-- !query
+SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', 8)
+-- !query schema
+struct<regexp_replace(healthy, wealthy, and wise, \w+thy, something, 8):string>
+-- !query output
+healthy, something, and wise
+
+
+-- !query
+SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', 26)
+-- !query schema
+struct<regexp_replace(healthy, wealthy, and wise, \w, something, 26):string>
+-- !query output
+healthy, wealthy, and wissomething
+
+
+-- !query
+SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', 27)
+-- !query schema
+struct<regexp_replace(healthy, wealthy, and wise, \w, something, 27):string>
+-- !query output
+healthy, wealthy, and wise
+
+
+-- !query
+SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', 30)
+-- !query schema
+struct<regexp_replace(healthy, wealthy, and wise, \w, something, 30):string>
+-- !query output
+healthy, wealthy, and wise
+
+
+-- !query
+SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', null)
+-- !query schema
+struct<regexp_replace(healthy, wealthy, and wise, \w, something, NULL):string>
+-- !query output
+NULL
\ No newline at end of file

From a908b67502164d5b1409aca912dac7042e825586 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Thu, 22 Oct 2020 01:10:24 -0700
Subject: [PATCH 0302/1009] [SPARK-33218][CORE] Update misleading log messages
 for removed shuffle blocks

### What changes were proposed in this pull request?

This updates the misleading log messages for removed shuffle block during migration.

### Why are the changes needed?

1. For the deleted shuffle blocks, `IndexShuffleBlockResolver` shows users WARN message saying `skipping migration`. However, `BlockManagerDecommissioner` shows users INFO message including `Migrated ShuffleBlockInfo(...)` inconsistently. Technically, we didn't migrated. We should not show `Migrated` message in this case.

```
INFO BlockManagerDecommissioner: Trying to migrate shuffle ShuffleBlockInfo(109,18924) to BlockManagerId(...) (2 / 3)
WARN IndexShuffleBlockResolver: Failed to resolve shuffle block ShuffleBlockInfo(109,18924), skipping migration. This is expected to occur if a block is removed after decommissioning has started.
INFO BlockManagerDecommissioner: Got migration sub-blocks List()
...
INFO BlockManagerDecommissioner: Migrated ShuffleBlockInfo(109,18924) to BlockManagerId(...)
```

2. In addition, if the shuffle file is deleted while the information is in the queue, the above messages are repeated multiple times, `spark.storage.decommission.maxReplicationFailuresPerBlock`. We had better use one line instead of the group of messages for that case.
```
INFO BlockManagerDecommissioner: Trying to migrate shuffle ShuffleBlockInfo(109,18924) to BlockManagerId(...) (0 / 3)
...
INFO BlockManagerDecommissioner: Trying to migrate shuffle ShuffleBlockInfo(109,18924) to BlockManagerId(...) (1 / 3)
...
INFO BlockManagerDecommissioner: Trying to migrate shuffle ShuffleBlockInfo(109,18924) to BlockManagerId(...) (2 / 3)
```

3. Skipping or not is a role of `BlockManagerDecommissioner` class. `IndexShuffleBlockResolver.getMigrationBlocks` is used twice differently like the following. We had better inform users at `BlockManagerDecommissioner` once.
    - At the beginning, to get the sub-blocks.
    - In case of `IOException`, to determine whether ignoring it or re-throwing. And, `BlockManagerDecommissioner` shows WARN message (`Skipping block ...`) again.

### Does this PR introduce _any_ user-facing change?

No. This is an update for log message info to be consistent.

### How was this patch tested?

Manually.

Closes #30129 from dongjoon-hyun/SPARK-33218.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../shuffle/IndexShuffleBlockResolver.scala   |  2 +-
 .../storage/BlockManagerDecommissioner.scala  | 64 ++++++++++---------
 2 files changed, 35 insertions(+), 31 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
index 525b8fd3f6923..e5df27c0d3c7a 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
@@ -252,7 +252,7 @@ private[spark] class IndexShuffleBlockResolver(
       }
     } catch {
       case _: Exception => // If we can't load the blocks ignore them.
-        logWarning(s"Failed to resolve shuffle block ${shuffleBlockInfo}, skipping migration. " +
+        logWarning(s"Failed to resolve shuffle block ${shuffleBlockInfo}. " +
           "This is expected to occur if a block is removed after decommissioning has started.")
         List.empty[(BlockId, ManagedBuffer)]
     }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
index 89d12406365dc..d1e89418a4897 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
@@ -83,38 +83,42 @@ private[storage] class BlockManagerDecommissioner(
               Thread.sleep(SLEEP_TIME_SECS * 1000L)
             case Some((shuffleBlockInfo, retryCount)) =>
               if (retryCount < maxReplicationFailuresForDecommission) {
-                logInfo(s"Trying to migrate shuffle ${shuffleBlockInfo} to ${peer} " +
-                  s"($retryCount / $maxReplicationFailuresForDecommission)")
                 val blocks = bm.migratableResolver.getMigrationBlocks(shuffleBlockInfo)
-                logInfo(s"Got migration sub-blocks ${blocks}")
-
-                // Migrate the components of the blocks.
-                try {
-                  blocks.foreach { case (blockId, buffer) =>
-                    logDebug(s"Migrating sub-block ${blockId}")
-                    bm.blockTransferService.uploadBlockSync(
-                      peer.host,
-                      peer.port,
-                      peer.executorId,
-                      blockId,
-                      buffer,
-                      StorageLevel.DISK_ONLY,
-                      null)// class tag, we don't need for shuffle
-                    logDebug(s"Migrated sub block ${blockId}")
-                  }
-                  logInfo(s"Migrated ${shuffleBlockInfo} to ${peer}")
-                } catch {
-                  case e: IOException =>
-                    // If a block got deleted before netty opened the file handle, then trying to
-                    // load the blocks now will fail. This is most likely to occur if we start
-                    // migrating blocks and then the shuffle TTL cleaner kicks in. However this
-                    // could also happen with manually managed shuffles or a GC event on the driver
-                    // a no longer referenced RDD with shuffle files.
-                    if (bm.migratableResolver.getMigrationBlocks(shuffleBlockInfo).isEmpty) {
-                      logWarning(s"Skipping block ${shuffleBlockInfo}, block deleted.")
-                    } else {
-                      throw e
+                if (blocks.isEmpty) {
+                  logInfo(s"Ignore empty shuffle block $shuffleBlockInfo")
+                } else {
+                  logInfo(s"Got migration sub-blocks ${blocks}")
+                  logInfo(s"Trying to migrate shuffle ${shuffleBlockInfo} to ${peer} " +
+                    s"($retryCount / $maxReplicationFailuresForDecommission)")
+
+                  // Migrate the components of the blocks.
+                  try {
+                    blocks.foreach { case (blockId, buffer) =>
+                      logDebug(s"Migrating sub-block ${blockId}")
+                      bm.blockTransferService.uploadBlockSync(
+                        peer.host,
+                        peer.port,
+                        peer.executorId,
+                        blockId,
+                        buffer,
+                        StorageLevel.DISK_ONLY,
+                        null) // class tag, we don't need for shuffle
+                      logDebug(s"Migrated sub block ${blockId}")
                     }
+                    logInfo(s"Migrated ${shuffleBlockInfo} to ${peer}")
+                  } catch {
+                    case e: IOException =>
+                      // If a block got deleted before netty opened the file handle, then trying to
+                      // load the blocks now will fail. This is most likely to occur if we start
+                      // migrating blocks and then the shuffle TTL cleaner kicks in. However this
+                      // could also happen with manually managed shuffles or a GC event on the
+                      // driver a no longer referenced RDD with shuffle files.
+                      if (bm.migratableResolver.getMigrationBlocks(shuffleBlockInfo).isEmpty) {
+                        logWarning(s"Skipping block ${shuffleBlockInfo}, block deleted.")
+                      } else {
+                        throw e
+                      }
+                  }
                 }
               } else {
                 logError(s"Skipping block ${shuffleBlockInfo} because it has failed ${retryCount}")

From d9ee33cfb95e1f05878e498c93c5cc65ce449f0e Mon Sep 17 00:00:00 2001
From: Xuedong Luan <luanxuedong2009@gmail.com>
Date: Thu, 22 Oct 2020 17:23:10 +0900
Subject: [PATCH 0303/1009] [SPARK-26533][SQL] Support query auto timeout
 cancel on thriftserver

### What changes were proposed in this pull request?

Support query auto cancelling when running too long on thriftserver.

This is the rework of #28991 and the credit should be the original author, leoluan2009.

Closes #28991

### Why are the changes needed?

For some cases, we use thriftserver as long-running applications.
Some times we want all the query need not to run more than given time.
In these cases, we can enable auto cancel for time-consumed query.Which can let us release resources for other queries to run.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Added tests.

Closes #29933 from maropu/pr28991.

Lead-authored-by: Xuedong Luan <luanxuedong2009@gmail.com>
Co-authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Co-authored-by: Luan <luanxuedong2009@gmail.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../apache/spark/sql/internal/SQLConf.scala   | 11 ++++
 .../cli/operation/OperationManager.java       |  4 +-
 .../service/cli/operation/SQLOperation.java   |  7 ++-
 .../hive/thriftserver/HiveThriftServer2.scala |  2 +-
 .../SparkExecuteStatementOperation.scala      | 45 ++++++++++++++-
 .../server/SparkSQLOperationManager.scala     |  5 +-
 .../ui/HiveThriftServer2AppStatusStore.scala  |  1 +
 .../ui/HiveThriftServer2EventManager.scala    |  7 +++
 .../ui/HiveThriftServer2Listener.scala        | 10 ++++
 .../HiveThriftServer2Suites.scala             | 55 ++++++++++++++++++-
 .../SparkExecuteStatementOperationSuite.scala |  3 +-
 .../ui/HiveThriftServer2ListenerSuite.scala   |  1 +
 12 files changed, 140 insertions(+), 11 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 65d976958ffdd..dad59ba0e7327 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -893,6 +893,17 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val THRIFTSERVER_QUERY_TIMEOUT =
+    buildConf("spark.sql.thriftServer.queryTimeout")
+      .doc("Set a query duration timeout in seconds in Thrift Server. If the timeout is set to " +
+        "a positive value, a running query will be cancelled automatically when the timeout is " +
+        "exceeded, otherwise the query continues to run till completion. If timeout values are " +
+        "set for each statement via `java.sql.Statement.setQueryTimeout` and they are smaller " +
+        "than this configuration value, they take precedence.")
+      .version("3.1.0")
+      .timeConf(TimeUnit.SECONDS)
+      .createWithDefault(0L)
+
   val THRIFTSERVER_UI_STATEMENT_LIMIT =
     buildConf("spark.sql.thriftserver.ui.retainedStatements")
       .doc("The number of SQL statements kept in the JDBC/ODBC web UI history.")
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java
index 75edc5763ce44..3df842d2b4af9 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java
@@ -97,7 +97,8 @@ public ExecuteStatementOperation newExecuteStatementOperation(HiveSession parent
   public ExecuteStatementOperation newExecuteStatementOperation(HiveSession parentSession,
       String statement, Map<String, String> confOverlay, boolean runAsync, long queryTimeout)
           throws HiveSQLException {
-    return newExecuteStatementOperation(parentSession, statement, confOverlay, runAsync);
+    return newExecuteStatementOperation(parentSession, statement, confOverlay, runAsync,
+        queryTimeout);
   }
 
   public GetTypeInfoOperation newGetTypeInfoOperation(HiveSession parentSession) {
@@ -207,6 +208,7 @@ public void cancelOperation(OperationHandle opHandle) throws HiveSQLException {
     Operation operation = getOperation(opHandle);
     OperationState opState = operation.getStatus().getState();
     if (opState == OperationState.CANCELED ||
+        opState == OperationState.TIMEDOUT ||
         opState == OperationState.CLOSED ||
         opState == OperationState.FINISHED ||
         opState == OperationState.ERROR ||
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
index e2ac1ea78c1ab..894793152f409 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
@@ -155,11 +155,12 @@ private void runQuery(HiveConf sqlOperationConf) throws HiveSQLException {
         throw toSQLException("Error while processing statement", response);
       }
     } catch (HiveSQLException e) {
-      // If the operation was cancelled by another thread,
+      // If the operation was cancelled by another thread or timed out,
       // Driver#run will return a non-zero response code.
-      // We will simply return if the operation state is CANCELED,
+      // We will simply return if the operation state is CANCELED or TIMEDOUT,
       // otherwise throw an exception
-      if (getStatus().getState() == OperationState.CANCELED) {
+      if (getStatus().getState() == OperationState.CANCELED ||
+          getStatus().getState() == OperationState.TIMEDOUT) {
         return;
       }
       else {
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
index 4e6729faced43..a1f2d62a0b72c 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
@@ -116,7 +116,7 @@ object HiveThriftServer2 extends Logging {
   }
 
   private[thriftserver] object ExecutionState extends Enumeration {
-    val STARTED, COMPILED, CANCELED, FAILED, FINISHED, CLOSED = Value
+    val STARTED, COMPILED, CANCELED, TIMEDOUT, FAILED, FINISHED, CLOSED = Value
     type ExecutionState = Value
   }
 }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index ec2c795e95c83..bc8cc16746a30 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.hive.thriftserver
 
 import java.security.PrivilegedExceptionAction
 import java.util.{Arrays, Map => JMap}
-import java.util.concurrent.RejectedExecutionException
+import java.util.concurrent.{Executors, RejectedExecutionException, TimeUnit}
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
@@ -45,11 +45,24 @@ private[hive] class SparkExecuteStatementOperation(
     parentSession: HiveSession,
     statement: String,
     confOverlay: JMap[String, String],
-    runInBackground: Boolean = true)
+    runInBackground: Boolean = true,
+    queryTimeout: Long)
   extends ExecuteStatementOperation(parentSession, statement, confOverlay, runInBackground)
   with SparkOperation
   with Logging {
 
+  // If a timeout value `queryTimeout` is specified by users and it is smaller than
+  // a global timeout value, we use the user-specified value.
+  // This code follows the Hive timeout behaviour (See #29933 for details).
+  private val timeout = {
+    val globalTimeout = sqlContext.conf.getConf(SQLConf.THRIFTSERVER_QUERY_TIMEOUT)
+    if (globalTimeout > 0 && (queryTimeout <= 0 || globalTimeout < queryTimeout)) {
+      globalTimeout
+    } else {
+      queryTimeout
+    }
+  }
+
   private var result: DataFrame = _
 
   // We cache the returned rows to get iterators again in case the user wants to use FETCH_FIRST.
@@ -200,6 +213,23 @@ private[hive] class SparkExecuteStatementOperation(
       parentSession.getUsername)
     setHasResultSet(true) // avoid no resultset for async run
 
+    if (timeout > 0) {
+      val timeoutExecutor = Executors.newSingleThreadScheduledExecutor()
+      timeoutExecutor.schedule(new Runnable {
+        override def run(): Unit = {
+          try {
+            timeoutCancel()
+          } catch {
+            case NonFatal(e) =>
+              setOperationException(new HiveSQLException(e))
+              logError(s"Error cancelling the query after timeout: $timeout seconds")
+          } finally {
+            timeoutExecutor.shutdown()
+          }
+        }
+      }, timeout, TimeUnit.SECONDS)
+    }
+
     if (!runInBackground) {
       execute()
     } else {
@@ -328,6 +358,17 @@ private[hive] class SparkExecuteStatementOperation(
     }
   }
 
+  def timeoutCancel(): Unit = {
+    synchronized {
+      if (!getStatus.getState.isTerminal) {
+        logInfo(s"Query with $statementId timed out after $timeout seconds")
+        setState(OperationState.TIMEDOUT)
+        cleanup()
+        HiveThriftServer2.eventManager.onStatementTimeout(statementId)
+      }
+    }
+  }
+
   override def cancel(): Unit = {
     synchronized {
       if (!getStatus.getState.isTerminal) {
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala
index bc9c13eb0d4f8..ba42eefed2a22 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala
@@ -44,14 +44,15 @@ private[thriftserver] class SparkSQLOperationManager()
       parentSession: HiveSession,
       statement: String,
       confOverlay: JMap[String, String],
-      async: Boolean): ExecuteStatementOperation = synchronized {
+      async: Boolean,
+      queryTimeout: Long): ExecuteStatementOperation = synchronized {
     val sqlContext = sessionToContexts.get(parentSession.getSessionHandle)
     require(sqlContext != null, s"Session handle: ${parentSession.getSessionHandle} has not been" +
       s" initialized or had already closed.")
     val conf = sqlContext.sessionState.conf
     val runInBackground = async && conf.getConf(HiveUtils.HIVE_THRIFT_SERVER_ASYNC)
     val operation = new SparkExecuteStatementOperation(
-      sqlContext, parentSession, statement, confOverlay, runInBackground)
+      sqlContext, parentSession, statement, confOverlay, runInBackground, queryTimeout)
     handleToOperation.put(operation.getHandle, operation)
     logDebug(s"Created Operation for $statement with session=$parentSession, " +
       s"runInBackground=$runInBackground")
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2AppStatusStore.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2AppStatusStore.scala
index 5cb78f6e64650..8bd8f29a4b9ec 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2AppStatusStore.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2AppStatusStore.scala
@@ -119,6 +119,7 @@ private[thriftserver] class ExecutionInfo(
   def isExecutionActive: Boolean = {
     !(state == ExecutionState.FAILED ||
       state == ExecutionState.CANCELED ||
+      state == ExecutionState.TIMEDOUT ||
       state == ExecutionState.CLOSED)
   }
 
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2EventManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2EventManager.scala
index fa04c67896a69..202fdf33c0dd9 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2EventManager.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2EventManager.scala
@@ -57,6 +57,10 @@ private[thriftserver] class HiveThriftServer2EventManager(sc: SparkContext) {
     postLiveListenerBus(SparkListenerThriftServerOperationCanceled(id, System.currentTimeMillis()))
   }
 
+  def onStatementTimeout(id: String): Unit = {
+    postLiveListenerBus(SparkListenerThriftServerOperationTimeout(id, System.currentTimeMillis()))
+  }
+
   def onStatementError(id: String, errorMsg: String, errorTrace: String): Unit = {
     postLiveListenerBus(SparkListenerThriftServerOperationError(id, errorMsg, errorTrace,
       System.currentTimeMillis()))
@@ -96,6 +100,9 @@ private[thriftserver] case class SparkListenerThriftServerOperationParsed(
 private[thriftserver] case class SparkListenerThriftServerOperationCanceled(
     id: String, finishTime: Long) extends SparkListenerEvent
 
+private[thriftserver] case class SparkListenerThriftServerOperationTimeout(
+    id: String, finishTime: Long) extends SparkListenerEvent
+
 private[thriftserver] case class SparkListenerThriftServerOperationError(
     id: String,
     errorMsg: String,
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2Listener.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2Listener.scala
index 6b7e5ee611417..4cf672e3d9d9e 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2Listener.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2Listener.scala
@@ -119,6 +119,7 @@ private[thriftserver] class HiveThriftServer2Listener(
       case e: SparkListenerThriftServerOperationStart => onOperationStart(e)
       case e: SparkListenerThriftServerOperationParsed => onOperationParsed(e)
       case e: SparkListenerThriftServerOperationCanceled => onOperationCanceled(e)
+      case e: SparkListenerThriftServerOperationTimeout => onOperationTimeout(e)
       case e: SparkListenerThriftServerOperationError => onOperationError(e)
       case e: SparkListenerThriftServerOperationFinish => onOperationFinished(e)
       case e: SparkListenerThriftServerOperationClosed => onOperationClosed(e)
@@ -181,6 +182,15 @@ private[thriftserver] class HiveThriftServer2Listener(
       case None => logWarning(s"onOperationCanceled called with unknown operation id: ${e.id}")
     }
 
+  private def onOperationTimeout(e: SparkListenerThriftServerOperationTimeout): Unit =
+    Option(executionList.get(e.id)) match {
+      case Some(executionData) =>
+        executionData.finishTimestamp = e.finishTime
+        executionData.state = ExecutionState.TIMEDOUT
+        updateLiveStore(executionData)
+      case None => logWarning(s"onOperationCanceled called with unknown operation id: ${e.id}")
+    }
+
   private def onOperationError(e: SparkListenerThriftServerOperationError): Unit =
     Option(executionList.get(e.id)) match {
       case Some(executionData) =>
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
index 75c00000dee47..7cc60bb505089 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
@@ -46,6 +46,7 @@ import org.apache.spark.{SparkException, SparkFunSuite}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.test.HiveTestJars
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.HIVE_THRIFT_SERVER_SINGLESESSION
 import org.apache.spark.sql.test.ProcessTestUtils.ProcessOutputCapturer
 import org.apache.spark.util.{ThreadUtils, Utils}
@@ -285,7 +286,6 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
   }
 
   test("test multiple session") {
-    import org.apache.spark.sql.internal.SQLConf
     var defaultV1: String = null
     var defaultV2: String = null
     var data: ArrayBuffer[Int] = null
@@ -880,6 +880,59 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
       assert(rs.getString(1) === expected.toString)
     }
   }
+
+  test("SPARK-26533: Support query auto timeout cancel on thriftserver - setQueryTimeout") {
+    withJdbcStatement() { statement =>
+      statement.setQueryTimeout(1)
+      val e = intercept[SQLException] {
+        statement.execute("select java_method('java.lang.Thread', 'sleep', 10000L)")
+      }.getMessage
+      assert(e.contains("Query timed out after"))
+
+      statement.setQueryTimeout(0)
+      val rs1 = statement.executeQuery(
+        "select 'test', java_method('java.lang.Thread', 'sleep', 3000L)")
+      rs1.next()
+      assert(rs1.getString(1) == "test")
+
+      statement.setQueryTimeout(-1)
+      val rs2 = statement.executeQuery(
+        "select 'test', java_method('java.lang.Thread', 'sleep', 3000L)")
+      rs2.next()
+      assert(rs2.getString(1) == "test")
+    }
+  }
+
+  test("SPARK-26533: Support query auto timeout cancel on thriftserver - SQLConf") {
+    withJdbcStatement() { statement =>
+      statement.execute(s"SET ${SQLConf.THRIFTSERVER_QUERY_TIMEOUT.key}=1")
+      val e1 = intercept[SQLException] {
+        statement.execute("select java_method('java.lang.Thread', 'sleep', 10000L)")
+      }.getMessage
+      assert(e1.contains("Query timed out after"))
+
+      statement.execute(s"SET ${SQLConf.THRIFTSERVER_QUERY_TIMEOUT.key}=0")
+      val rs = statement.executeQuery(
+        "select 'test', java_method('java.lang.Thread', 'sleep', 3000L)")
+      rs.next()
+      assert(rs.getString(1) == "test")
+
+      // Uses a smaller timeout value of a config value and an a user-specified one
+      statement.execute(s"SET ${SQLConf.THRIFTSERVER_QUERY_TIMEOUT.key}=1")
+      statement.setQueryTimeout(30)
+      val e2 = intercept[SQLException] {
+        statement.execute("select java_method('java.lang.Thread', 'sleep', 10000L)")
+      }.getMessage
+      assert(e2.contains("Query timed out after"))
+
+      statement.execute(s"SET ${SQLConf.THRIFTSERVER_QUERY_TIMEOUT.key}=30")
+      statement.setQueryTimeout(1)
+      val e3 = intercept[SQLException] {
+        statement.execute("select java_method('java.lang.Thread', 'sleep', 10000L)")
+      }.getMessage
+      assert(e3.contains("Query timed out after"))
+    }
+  }
 }
 
 class SingleSessionSuite extends HiveThriftJdbcTest {
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperationSuite.scala
index ca1f9a2f74244..c8bb6d9ee0821 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperationSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperationSuite.scala
@@ -61,6 +61,7 @@ class SparkExecuteStatementOperationSuite extends SparkFunSuite with SharedSpark
 
   Seq(
     (OperationState.CANCELED, (_: SparkExecuteStatementOperation).cancel()),
+    (OperationState.TIMEDOUT, (_: SparkExecuteStatementOperation).timeoutCancel()),
     (OperationState.CLOSED, (_: SparkExecuteStatementOperation).close())
   ).foreach { case (finalState, transition) =>
     test("SPARK-32057 SparkExecuteStatementOperation should not transiently become ERROR " +
@@ -109,7 +110,7 @@ class SparkExecuteStatementOperationSuite extends SparkFunSuite with SharedSpark
       signal: Semaphore,
       finalState: OperationState)
     extends SparkExecuteStatementOperation(sqlContext, hiveSession, statement,
-      new util.HashMap, false) {
+      new util.HashMap, false, 0) {
 
     override def cleanup(): Unit = {
       super.cleanup()
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2ListenerSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2ListenerSuite.scala
index 9a9f574153a0a..3f0538dd1c943 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2ListenerSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2ListenerSuite.scala
@@ -151,6 +151,7 @@ class HiveThriftServer2ListenerSuite extends SparkFunSuite with BeforeAndAfter {
       "stmt", "groupId", 0))
     listener.onOtherEvent(SparkListenerThriftServerOperationParsed(unknownOperation, "query"))
     listener.onOtherEvent(SparkListenerThriftServerOperationCanceled(unknownOperation, 0))
+    listener.onOtherEvent(SparkListenerThriftServerOperationTimeout(unknownOperation, 0))
     listener.onOtherEvent(SparkListenerThriftServerOperationError(unknownOperation,
       "msg", "trace", 0))
     listener.onOtherEvent(SparkListenerThriftServerOperationFinish(unknownOperation, 0))

From 8cae7f88b011939473fc9a6373012e23398bbc07 Mon Sep 17 00:00:00 2001
From: Prashant Sharma <prashsh1@in.ibm.com>
Date: Thu, 22 Oct 2020 13:51:42 +0000
Subject: [PATCH 0304/1009] [SPARK-33095][SQL] Support ALTER TABLE in JDBC v2
 Table Catalog: add, update type and nullability of columns (MySQL dialect)

### What changes were proposed in this pull request?

Override the default SQL strings for:
ALTER TABLE UPDATE COLUMN TYPE
ALTER TABLE UPDATE COLUMN NULLABILITY
in the following MySQL JDBC dialect according to official documentation.
Write MySQL integration tests for JDBC.

### Why are the changes needed?
Improved code coverage and support mysql dialect for jdbc.

### Does this PR introduce _any_ user-facing change?

Yes, Support ALTER TABLE in JDBC v2 Table Catalog: add, update type and nullability of columns (MySQL dialect)

### How was this patch tested?

Added tests.

Closes #30025 from ScrapCodes/mysql-dialect.

Authored-by: Prashant Sharma <prashsh1@in.ibm.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/jdbc/v2/MySQLIntegrationSuite.scala   | 89 +++++++++++++++++++
 .../apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 34 +++----
 .../apache/spark/sql/jdbc/MySQLDialect.scala  | 24 ++++-
 3 files changed, 131 insertions(+), 16 deletions(-)
 create mode 100644 external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
new file mode 100644
index 0000000000000..ec958cd55c943
--- /dev/null
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc.v2
+
+import java.sql.{Connection, SQLFeatureNotSupportedException}
+
+import org.scalatest.time.SpanSugar._
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
+import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite}
+import org.apache.spark.sql.types._
+import org.apache.spark.tags.DockerTest
+
+/**
+ *
+ * To run this test suite for a specific version (e.g., mysql:5.7.31):
+ * {{{
+ * MYSQL_DOCKER_IMAGE_NAME=mysql:5.7.31
+ *         ./build/sbt -Pdocker-integration-tests "testOnly *v2*MySQLIntegrationSuite"
+ *
+ * }}}
+ *
+ */
+@DockerTest
+class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest {
+  override val catalogName: String = "mysql"
+  override val db = new DatabaseOnDocker {
+    override val imageName = sys.env.getOrElse("MYSQL_DOCKER_IMAGE_NAME", "mysql:5.7.31")
+    override val env = Map(
+      "MYSQL_ROOT_PASSWORD" -> "rootpass"
+    )
+    override val usesIpc = false
+    override val jdbcPort: Int = 3306
+
+    override def getJdbcUrl(ip: String, port: Int): String =
+      s"jdbc:mysql://$ip:$port/mysql?user=root&password=rootpass"
+  }
+
+  override def sparkConf: SparkConf = super.sparkConf
+    .set("spark.sql.catalog.mysql", classOf[JDBCTableCatalog].getName)
+    .set("spark.sql.catalog.mysql.url", db.getJdbcUrl(dockerIp, externalPort))
+
+  override val connectionTimeout = timeout(7.minutes)
+
+  override def dataPreparation(conn: Connection): Unit = {}
+
+  override def testUpdateColumnType(tbl: String): Unit = {
+    sql(s"CREATE TABLE $tbl (ID INTEGER) USING _")
+    var t = spark.table(tbl)
+    var expectedSchema = new StructType().add("ID", IntegerType)
+    assert(t.schema === expectedSchema)
+    sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE STRING")
+    t = spark.table(tbl)
+    expectedSchema = new StructType().add("ID", StringType)
+    assert(t.schema === expectedSchema)
+    // Update column type from STRING to INTEGER
+    val msg1 = intercept[AnalysisException] {
+      sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE INTEGER")
+    }.getMessage
+    assert(msg1.contains("Cannot update alt_table field ID: string cannot be cast to int"))
+  }
+
+  override def testUpdateColumnNullability(tbl: String): Unit = {
+    sql("CREATE TABLE mysql.alt_table (ID STRING NOT NULL) USING _")
+    // Update nullability is unsupported for mysql db.
+    val msg = intercept[AnalysisException] {
+      sql("ALTER TABLE mysql.alt_table ALTER COLUMN ID DROP NOT NULL")
+    }.getCause.asInstanceOf[SQLFeatureNotSupportedException].getMessage
+
+    assert(msg.contains("UpdateColumnNullability is not supported"))
+  }
+}
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
index 384bcc22f27d8..942c6237fd358 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
@@ -28,6 +28,24 @@ trait V2JDBCTest extends SharedSparkSession {
   // dialect specific update column type test
   def testUpdateColumnType(tbl: String): Unit
 
+  def testUpdateColumnNullability(tbl: String): Unit = {
+    sql(s"CREATE TABLE $catalogName.alt_table (ID STRING NOT NULL) USING _")
+    var t = spark.table(s"$catalogName.alt_table")
+    // nullable is true in the expecteSchema because Spark always sets nullable to true
+    // regardless of the JDBC metadata https://github.com/apache/spark/pull/18445
+    var expectedSchema = new StructType().add("ID", StringType, nullable = true)
+    assert(t.schema === expectedSchema)
+    sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN ID DROP NOT NULL")
+    t = spark.table(s"$catalogName.alt_table")
+    expectedSchema = new StructType().add("ID", StringType, nullable = true)
+    assert(t.schema === expectedSchema)
+    // Update nullability of not existing column
+    val msg = intercept[AnalysisException] {
+      sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN bad_column DROP NOT NULL")
+    }.getMessage
+    assert(msg.contains("Cannot update missing field bad_column"))
+  }
+
   test("SPARK-33034: ALTER TABLE ... add new columns") {
     withTable(s"$catalogName.alt_table") {
       sql(s"CREATE TABLE $catalogName.alt_table (ID STRING) USING _")
@@ -73,21 +91,7 @@ trait V2JDBCTest extends SharedSparkSession {
 
   test("SPARK-33034: ALTER TABLE ... update column nullability") {
     withTable(s"$catalogName.alt_table") {
-      sql(s"CREATE TABLE $catalogName.alt_table (ID STRING NOT NULL) USING _")
-      var t = spark.table(s"$catalogName.alt_table")
-      // nullable is true in the expecteSchema because Spark always sets nullable to true
-      // regardless of the JDBC metadata https://github.com/apache/spark/pull/18445
-      var expectedSchema = new StructType().add("ID", StringType, nullable = true)
-      assert(t.schema === expectedSchema)
-      sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN ID DROP NOT NULL")
-      t = spark.table(s"$catalogName.alt_table")
-      expectedSchema = new StructType().add("ID", StringType, nullable = true)
-      assert(t.schema === expectedSchema)
-      // Update nullability of not existing column
-      val msg = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN bad_column DROP NOT NULL")
-      }.getMessage
-      assert(msg.contains("Cannot update missing field bad_column"))
+      testUpdateColumnNullability(s"$catalogName.alt_table")
     }
     // Update column nullability in not existing table
     val msg = intercept[AnalysisException] {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
index 24b31b14d9427..a516e9e76ef31 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.jdbc
 
-import java.sql.Types
+import java.sql.{SQLFeatureNotSupportedException, Types}
 import java.util.Locale
 
 import org.apache.spark.sql.types.{BooleanType, DataType, LongType, MetadataBuilder}
@@ -48,4 +48,26 @@ private case object MySQLDialect extends JdbcDialect {
   }
 
   override def isCascadingTruncateTable(): Option[Boolean] = Some(false)
+
+  // See https://dev.mysql.com/doc/refman/8.0/en/alter-table.html
+  override def getUpdateColumnTypeQuery(
+      tableName: String,
+      columnName: String,
+      newDataType: String): String = {
+    s"ALTER TABLE $tableName MODIFY COLUMN ${quoteIdentifier(columnName)} $newDataType"
+  }
+
+  // See https://dev.mysql.com/doc/refman/8.0/en/alter-table.html
+  // require to have column data type to change the column nullability
+  // ALTER TABLE tbl_name MODIFY [COLUMN] col_name column_definition
+  // column_definition:
+  //    data_type [NOT NULL | NULL]
+  // e.g. ALTER TABLE t1 MODIFY b INT NOT NULL;
+  // We don't have column data type here, so throw Exception for now
+  override def getUpdateColumnNullabilityQuery(
+      tableName: String,
+      columnName: String,
+      isNullable: Boolean): String = {
+    throw new SQLFeatureNotSupportedException(s"UpdateColumnNullability is not supported")
+  }
 }

From a1629b4a5790dce1a57e2c2bad9e04c627b88d29 Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Thu, 22 Oct 2020 13:53:01 +0000
Subject: [PATCH 0305/1009] [SPARK-32852][SQL] spark.sql.hive.metastore.jars
 support HDFS location

### What changes were proposed in this pull request?
Support `spark.sql.hive.metastore.jars` use HDFS location.

When user need to use path to set hive metastore jars, you should set
`spark.sql.hive.metasstore.jars=path` and set real path in `spark.sql.hive.metastore.jars.path`
since we use `File.pathSeperator` to split path, but `FIle.pathSeparator` is `:` in unix, it will split hdfs location `hdfs://nameservice/xx`. So add new config `spark.sql.hive.metastore.jars.path` to set comma separated paths.
To keep both two way supported

### Why are the changes needed?
All spark app can fetch internal version hive jars in HDFS location, not need distribute to all node.

### Does this PR introduce _any_ user-facing change?
User can use HDFS location to store hive metastore jars

### How was this patch tested?
Manuel tested.

Closes #29881 from AngersZhuuuu/SPARK-32852.

Authored-by: angerszhu <angers.zhu@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/hive/HiveUtils.scala | 92 ++++++++++++++++---
 1 file changed, 77 insertions(+), 15 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index 96c207913d49a..399f8911ef679 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -40,6 +40,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.execution.command.DDLUtils
+import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.hive.client._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf._
@@ -77,7 +78,7 @@ private[spark] object HiveUtils extends Logging {
   val HIVE_METASTORE_JARS = buildStaticConf("spark.sql.hive.metastore.jars")
     .doc(s"""
       | Location of the jars that should be used to instantiate the HiveMetastoreClient.
-      | This property can be one of three options: "
+      | This property can be one of four options: "
       | 1. "builtin"
       |   Use Hive ${builtinHiveVersion}, which is bundled with the Spark assembly when
       |   <code>-Phive</code> is enabled. When this option is chosen,
@@ -85,12 +86,32 @@ private[spark] object HiveUtils extends Logging {
       |   <code>${builtinHiveVersion}</code> or not defined.
       | 2. "maven"
       |   Use Hive jars of specified version downloaded from Maven repositories.
-      | 3. A classpath in the standard format for both Hive and Hadoop.
+      | 3. "path"
+      |   Use Hive jars configured by `spark.sql.hive.metastore.jars.path`
+      |   in comma separated format. Support both local or remote paths.
+      | 4. A classpath in the standard format for both Hive and Hadoop.
       """.stripMargin)
     .version("1.4.0")
     .stringConf
     .createWithDefault("builtin")
 
+  val HIVE_METASTORE_JARS_PATH = buildStaticConf("spark.sql.hive.metastore.jars.path")
+    .doc(s"Comma separated URL of Hive jars, support both local and remote paths," +
+      s"Such as: " +
+      s" 1. file://path/to/jar/xxx.jar\n" +
+      s" 2. hdfs://nameservice/path/to/jar/xxx.jar\n" +
+      s" 3. /path/to/jar/ (path without URI scheme follow conf `fs.defaultFS`'s URI schema)\n" +
+      s" 4. [http/https/ftp]://path/to/jar/xxx.jar\n" +
+      s"Notice: `http/https/ftp` doesn't support wildcard, but other URLs support" +
+      s"nested path wildcard, Such as: " +
+      s" 1. file://path/to/jar/*, file://path/to/jar/*/*\n" +
+      s" 2. hdfs://nameservice/path/to/jar/*, hdfs://nameservice/path/to/jar/*/*\n" +
+      s"When ${HIVE_METASTORE_JARS.key} is set to `path`, we will use Hive jars configured by this")
+    .version("3.1.0")
+    .stringConf
+    .toSequence
+    .createWithDefault(Nil)
+
   val CONVERT_METASTORE_PARQUET = buildConf("spark.sql.hive.convertMetastoreParquet")
     .doc("When set to true, the built-in Parquet reader and writer are used to process " +
       "parquet tables created by using the HiveQL syntax, instead of Hive serde.")
@@ -175,6 +196,7 @@ private[spark] object HiveUtils extends Logging {
    * The location of the jars that should be used to instantiate the HiveMetastoreClient.  This
    * property can be one of three options:
    *  - a classpath in the standard format for both hive and hadoop.
+   *  - path - attempt to discover the jars with paths configured by `HIVE_METASTORE_JARS_PATH`.
    *  - builtin - attempt to discover the jars that were used to load Spark SQL and use those. This
    *              option is only valid when using the execution version of Hive.
    *  - maven - download the correct version of hive on demand from maven.
@@ -183,6 +205,13 @@ private[spark] object HiveUtils extends Logging {
     conf.getConf(HIVE_METASTORE_JARS)
   }
 
+  /**
+   * Hive jars paths, only work when `HIVE_METASTORE_JARS` is `path`.
+   */
+  private def hiveMetastoreJarsPath(conf: SQLConf): Seq[String] = {
+    conf.getConf(HIVE_METASTORE_JARS_PATH)
+  }
+
   /**
    * A comma separated list of class prefixes that should be loaded using the classloader that
    * is shared between Spark SQL and a specific version of Hive. An example of classes that should
@@ -333,6 +362,20 @@ private[spark] object HiveUtils extends Logging {
     val hiveMetastoreBarrierPrefixes = HiveUtils.hiveMetastoreBarrierPrefixes(sqlConf)
     val metaVersion = IsolatedClientLoader.hiveVersion(hiveMetastoreVersion)
 
+    def addLocalHiveJars(file: File): Seq[URL] = {
+      if (file.getName == "*") {
+        val files = file.getParentFile.listFiles()
+        if (files == null) {
+          logWarning(s"Hive jar path '${file.getPath}' does not exist.")
+          Nil
+        } else {
+          files.filter(_.getName.toLowerCase(Locale.ROOT).endsWith(".jar")).map(_.toURL).toSeq
+        }
+      } else {
+        file.toURL :: Nil
+      }
+    }
+
     val isolatedLoader = if (hiveMetastoreJars == "builtin") {
       if (builtinHiveVersion != hiveMetastoreVersion) {
         throw new IllegalArgumentException(
@@ -393,24 +436,43 @@ private[spark] object HiveUtils extends Logging {
         config = configurations,
         barrierPrefixes = hiveMetastoreBarrierPrefixes,
         sharedPrefixes = hiveMetastoreSharedPrefixes)
+    } else if (hiveMetastoreJars == "path") {
+      // Convert to files and expand any directories.
+      val jars =
+        HiveUtils.hiveMetastoreJarsPath(sqlConf)
+          .flatMap {
+            case path if path.contains("\\") && Utils.isWindows =>
+              addLocalHiveJars(new File(path))
+            case path =>
+              DataSource.checkAndGlobPathIfNecessary(
+                pathStrings = Seq(path),
+                hadoopConf = hadoopConf,
+                checkEmptyGlobPath = true,
+                checkFilesExist = false,
+                enableGlobbing = true
+              ).map(_.toUri.toURL)
+          }
+
+      logInfo(
+        s"Initializing HiveMetastoreConnection version $hiveMetastoreVersion " +
+          s"using path: ${jars.mkString(";")}")
+      new IsolatedClientLoader(
+        version = metaVersion,
+        sparkConf = conf,
+        hadoopConf = hadoopConf,
+        execJars = jars.toSeq,
+        config = configurations,
+        isolationOn = true,
+        barrierPrefixes = hiveMetastoreBarrierPrefixes,
+        sharedPrefixes = hiveMetastoreSharedPrefixes)
     } else {
       // Convert to files and expand any directories.
       val jars =
         hiveMetastoreJars
           .split(File.pathSeparator)
-          .flatMap {
-          case path if new File(path).getName == "*" =>
-            val files = new File(path).getParentFile.listFiles()
-            if (files == null) {
-              logWarning(s"Hive jar path '$path' does not exist.")
-              Nil
-            } else {
-              files.filter(_.getName.toLowerCase(Locale.ROOT).endsWith(".jar")).toSeq
-            }
-          case path =>
-            new File(path) :: Nil
-        }
-          .map(_.toURI.toURL)
+          .flatMap { path =>
+            addLocalHiveJars(new File(path))
+          }
 
       logInfo(
         s"Initializing HiveMetastoreConnection version $hiveMetastoreVersion " +

From b38f3a5557b45503e0f8d67bc77c5d390a67a42f Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Thu, 22 Oct 2020 14:01:07 +0000
Subject: [PATCH 0306/1009] [SPARK-32978][SQL] Make sure the number of dynamic
 part metric is correct

### What changes were proposed in this pull request?

The purpose of this pr is to resolve SPARK-32978.

The main reason of bad case describe in SPARK-32978 is the `BasicWriteTaskStatsTracker` directly reports the new added partition number of each task, which makes it impossible to remove duplicate data in driver side.

The main of this pr is change to report partitionValues to driver and remove duplicate data at driver side to make sure the number of dynamic part metric is correct.

### Why are the changes needed?
The the number of dynamic part metric we display on the UI should be correct.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Add a new test case refer to described in SPARK-32978

Closes #30026 from LuciferYang/SPARK-32978.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 ...namicPartitionsBenchmark-jdk11-results.txt |   8 ++
 ...WithDynamicPartitionsBenchmark-results.txt |   8 ++
 .../datasources/BasicWriteStatsTracker.scala  |  16 +--
 ...tTableWithDynamicPartitionsBenchmark.scala | 103 ++++++++++++++++++
 ...BasicWriteJobStatsTrackerMetricSuite.scala |  59 ++++++++++
 5 files changed, 187 insertions(+), 7 deletions(-)
 create mode 100644 sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk11-results.txt
 create mode 100644 sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-results.txt
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertTableWithDynamicPartitionsBenchmark.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/BasicWriteJobStatsTrackerMetricSuite.scala

diff --git a/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk11-results.txt b/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..12fe0e1f5a7ce
--- /dev/null
+++ b/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk11-results.txt
@@ -0,0 +1,8 @@
+OpenJDK 64-Bit Server VM 11.0.8+10-LTS on Mac OS X 10.15.7
+Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz
+dynamic insert table benchmark, totalRows = 200000:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+----------------------------------------------------------------------------------------------------------------------------------
+one partition column, 100 partitions                        16396          16688         413          0.0       81978.3       1.0X
+two partition columns, 500 partitions                       50356          50924         804          0.0      251777.9       0.3X
+three partition columns, 2000 partitions                   144342         144850         718          0.0      721710.9       0.1X
+
diff --git a/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-results.txt b/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-results.txt
new file mode 100644
index 0000000000000..c042d74091a3b
--- /dev/null
+++ b/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-results.txt
@@ -0,0 +1,8 @@
+OpenJDK 64-Bit Server VM 1.8.0_232-b18 on Mac OS X 10.15.7
+Intel(R) Core(TM) i5-7360U CPU @ 2.30GHz
+dynamic insert table benchmark, totalRows = 200000:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+----------------------------------------------------------------------------------------------------------------------------------
+one partition column, 100 partitions                        23370          23588         309          0.0      116848.3       1.0X
+two partition columns, 500 partitions                       37686          38079         555          0.0      188432.2       0.6X
+three partition columns, 2000 partitions                   112489         113049         792          0.0      562446.1       0.2X
+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala
index b71c2d12f02b8..6babbb465a3fb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.datasources
 
 import java.io.FileNotFoundException
 
+import scala.collection.mutable
+
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
@@ -35,7 +37,7 @@ import org.apache.spark.util.SerializableConfiguration
  * These were first introduced in https://github.com/apache/spark/pull/18159 (SPARK-20703).
  */
 case class BasicWriteTaskStats(
-    numPartitions: Int,
+    partitions: Seq[InternalRow],
     numFiles: Int,
     numBytes: Long,
     numRows: Long)
@@ -48,7 +50,7 @@ case class BasicWriteTaskStats(
 class BasicWriteTaskStatsTracker(hadoopConf: Configuration)
   extends WriteTaskStatsTracker with Logging {
 
-  private[this] var numPartitions: Int = 0
+  private[this] val partitions: mutable.ArrayBuffer[InternalRow] = mutable.ArrayBuffer.empty
   private[this] var numFiles: Int = 0
   private[this] var submittedFiles: Int = 0
   private[this] var numBytes: Long = 0L
@@ -76,7 +78,7 @@ class BasicWriteTaskStatsTracker(hadoopConf: Configuration)
 
 
   override def newPartition(partitionValues: InternalRow): Unit = {
-    numPartitions += 1
+    partitions.append(partitionValues)
   }
 
   override def newBucket(bucketId: Int): Unit = {
@@ -117,7 +119,7 @@ class BasicWriteTaskStatsTracker(hadoopConf: Configuration)
         "This could be due to the output format not writing empty files, " +
         "or files being not immediately visible in the filesystem.")
     }
-    BasicWriteTaskStats(numPartitions, numFiles, numBytes, numRows)
+    BasicWriteTaskStats(partitions.toSeq, numFiles, numBytes, numRows)
   }
 }
 
@@ -139,7 +141,7 @@ class BasicWriteJobStatsTracker(
 
   override def processStats(stats: Seq[WriteTaskStats]): Unit = {
     val sparkContext = SparkContext.getActive.get
-    var numPartitions: Long = 0L
+    var partitionsSet: mutable.Set[InternalRow] = mutable.HashSet.empty
     var numFiles: Long = 0L
     var totalNumBytes: Long = 0L
     var totalNumOutput: Long = 0L
@@ -147,7 +149,7 @@ class BasicWriteJobStatsTracker(
     val basicStats = stats.map(_.asInstanceOf[BasicWriteTaskStats])
 
     basicStats.foreach { summary =>
-      numPartitions += summary.numPartitions
+      partitionsSet ++= summary.partitions
       numFiles += summary.numFiles
       totalNumBytes += summary.numBytes
       totalNumOutput += summary.numRows
@@ -156,7 +158,7 @@ class BasicWriteJobStatsTracker(
     metrics(BasicWriteJobStatsTracker.NUM_FILES_KEY).add(numFiles)
     metrics(BasicWriteJobStatsTracker.NUM_OUTPUT_BYTES_KEY).add(totalNumBytes)
     metrics(BasicWriteJobStatsTracker.NUM_OUTPUT_ROWS_KEY).add(totalNumOutput)
-    metrics(BasicWriteJobStatsTracker.NUM_PARTS_KEY).add(numPartitions)
+    metrics(BasicWriteJobStatsTracker.NUM_PARTS_KEY).add(partitionsSet.size)
 
     val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
     SQLMetrics.postDriverMetricUpdates(sparkContext, executionId, metrics.values.toList)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertTableWithDynamicPartitionsBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertTableWithDynamicPartitionsBenchmark.scala
new file mode 100644
index 0000000000000..81a29cefd0045
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertTableWithDynamicPartitionsBenchmark.scala
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+import org.apache.spark.benchmark.Benchmark
+
+/**
+ * Benchmark to measure insert into table with dynamic partition columns.
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt: bin/spark-submit --class <this class> <spark sql test jar>
+ *   2. build/sbt "sql/test:runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      Results will be written to
+ *      "benchmarks/InsertTableWithDynamicPartitionsBenchmark-results.txt".
+ * }}}
+ */
+object InsertTableWithDynamicPartitionsBenchmark extends DataSourceWriteBenchmark {
+
+  def prepareSourceTableAndGetTotalRows(numberRows: Long, sourceTable: String,
+      part1Step: Int, part2Step: Int, part3Step: Int): Long = {
+    val dataFrame = spark.range(0, numberRows, 1, 4)
+    val dataFrame1 = spark.range(0, numberRows, part1Step, 4)
+    val dataFrame2 = spark.range(0, numberRows, part2Step, 4)
+    val dataFrame3 = spark.range(0, numberRows, part3Step, 4)
+
+    val data = dataFrame.join(dataFrame1).join(dataFrame2).join(dataFrame3)
+      .toDF("id", "part1", "part2", "part3")
+    data.write.saveAsTable(sourceTable)
+    data.count()
+  }
+
+  def writeOnePartitionColumnTable(tableName: String,
+      partitionNumber: Long, benchmark: Benchmark): Unit = {
+    spark.sql(s"create table $tableName(i bigint, part bigint) " +
+      "using parquet partitioned by (part)")
+    benchmark.addCase(s"one partition column, $partitionNumber partitions") { _ =>
+      spark.sql(s"insert overwrite table $tableName partition(part) " +
+        "select id, part1 as part from sourceTable")
+    }
+  }
+
+  def writeTwoPartitionColumnTable(tableName: String,
+      partitionNumber: Long, benchmark: Benchmark): Unit = {
+    spark.sql(s"create table $tableName(i bigint, part1 bigint, part2 bigint) " +
+      "using parquet partitioned by (part1, part2)")
+    benchmark.addCase(s"two partition columns, $partitionNumber partitions") { _ =>
+      spark.sql(s"insert overwrite table $tableName partition(part1, part2) " +
+        "select id, part1, part2 from sourceTable")
+    }
+  }
+
+  def writeThreePartitionColumnTable(tableName: String,
+      partitionNumber: Long, benchmark: Benchmark): Unit = {
+    spark.sql(s"create table $tableName(i bigint, part1 bigint, part2 bigint, part3 bigint) " +
+      "using parquet partitioned by (part1, part2, part3)")
+    benchmark.addCase(s"three partition columns, $partitionNumber partitions") { _ =>
+      spark.sql(s"insert overwrite table $tableName partition(part1, part2, part3) " +
+        "select id, part1, part2, part3 from sourceTable")
+    }
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    val sourceTable = "sourceTable"
+    val onePartColTable = "onePartColTable"
+    val twoPartColTable = "twoPartColTable"
+    val threePartColTable = "threePartColTable"
+    val numberRows = 100L
+    val part1Step = 1
+    val part2Step = 20
+    val part3Step = 25
+    val part1Number = numberRows / part1Step
+    val part2Number = numberRows / part2Step *  part1Number
+    val part3Number = numberRows / part3Step *  part2Number
+
+    withTable(sourceTable, onePartColTable, twoPartColTable, threePartColTable) {
+      val totalRows =
+        prepareSourceTableAndGetTotalRows(numberRows, sourceTable, part1Step, part2Step, part3Step)
+      val benchmark =
+        new Benchmark(s"dynamic insert table benchmark, totalRows = $totalRows",
+          totalRows, output = output)
+      writeOnePartitionColumnTable(onePartColTable, part1Number, benchmark)
+      writeTwoPartitionColumnTable(twoPartColTable, part2Number, benchmark)
+      writeThreePartitionColumnTable(threePartColTable, part3Number, benchmark)
+      benchmark.run()
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/BasicWriteJobStatsTrackerMetricSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/BasicWriteJobStatsTrackerMetricSuite.scala
new file mode 100644
index 0000000000000..3e58c225d8c7a
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/BasicWriteJobStatsTrackerMetricSuite.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.{LocalSparkSession, SparkSession}
+
+class BasicWriteJobStatsTrackerMetricSuite extends SparkFunSuite with LocalSparkSession {
+
+  test("SPARK-32978: make sure the number of dynamic part metric is correct") {
+    try {
+      val partitions = "50"
+      spark = SparkSession.builder().master("local[4]").getOrCreate()
+      val statusStore = spark.sharedState.statusStore
+
+      spark.sql("create table dynamic_partition(i bigint, part bigint) " +
+        "using parquet partitioned by (part)")
+      val oldExecutionsSize = statusStore.executionsList().size
+      spark.sql("insert overwrite table dynamic_partition partition(part) " +
+        s"select id, id % $partitions as part from range(10000)")
+
+      // Wait for listener to finish computing the metrics for the executions.
+      while (statusStore.executionsList().size - oldExecutionsSize < 1 ||
+        statusStore.executionsList().last.metricValues == null) {
+        Thread.sleep(100)
+      }
+
+      // There should be 2 SQLExecutionUIData in executionsList and the 2nd item is we need,
+      // but the executionId is indeterminate in maven test,
+      // so the `statusStore.execution(executionId)` API is not used.
+      assert(statusStore.executionsCount() == 2)
+      val executionData = statusStore.executionsList()(1)
+      val accumulatorIdOpt =
+        executionData.metrics.find(_.name == "number of dynamic part").map(_.accumulatorId)
+      assert(accumulatorIdOpt.isDefined)
+      val numPartsOpt = executionData.metricValues.get(accumulatorIdOpt.get)
+      assert(numPartsOpt.isDefined && numPartsOpt.get == partitions)
+
+    } finally {
+      spark.sql("drop table if exists dynamic_partition")
+      spark.stop()
+    }
+  }
+}

From a03d77d32696f5a33770e9bee654acde904da7d4 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Thu, 22 Oct 2020 15:57:03 +0000
Subject: [PATCH 0307/1009] [SPARK-33160][SQL][FOLLOWUP] Replace the parquet
 metadata key `org.apache.spark.int96NoRebase` by
 `org.apache.spark.legacyINT96`

### What changes were proposed in this pull request?
1. Replace the metadata key `org.apache.spark.int96NoRebase` by `org.apache.spark.legacyINT96`.
2. Change the condition when new key should be saved to parquet metadata: it should be saved when the SQL config `spark.sql.legacy.parquet.int96RebaseModeInWrite` is set to `LEGACY`.
3. Change handling the metadata key in read:
    - If there is no the key in parquet metadata, take the rebase mode from the SQL config: `spark.sql.legacy.parquet.int96RebaseModeInRead`
    - If parquet files were saved by Spark < 3.1.0, use the `LEGACY` rebasing mode for INT96 type.
    - For files written by Spark >= 3.1.0, if the `org.apache.spark.legacyINT96` presents in metadata, perform rebasing otherwise don't.

### Why are the changes needed?
- To not increase parquet size by default when `spark.sql.legacy.parquet.int96RebaseModeInWrite` is `EXCEPTION` after https://github.com/apache/spark/pull/30121.
- To have the implementation similar to `org.apache.spark.legacyDateTime`
- To minimise impact on other subsystems that are based on file sizes like gathering statistics.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Modified test in `ParquetIOSuite`

Closes #30132 from MaxGekk/int96-flip-metadata-rebase-key.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../datasources/DataSourceUtils.scala         | 22 ++++++++++++-------
 .../parquet/ParquetWriteSupport.scala         |  6 ++---
 .../scala/org/apache/spark/sql/package.scala  |  4 ++--
 .../datasources/parquet/ParquetIOSuite.scala  | 16 +++++++++-----
 .../spark/sql/hive/StatisticsSuite.scala      |  2 +-
 5 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
index f8068a634977b..b54747a25d5a3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
@@ -26,7 +26,7 @@ import org.json4s.NoTypeHints
 import org.json4s.jackson.Serialization
 
 import org.apache.spark.SparkUpgradeException
-import org.apache.spark.sql.{SPARK_INT96_NO_REBASE, SPARK_LEGACY_DATETIME, SPARK_VERSION_METADATA_KEY}
+import org.apache.spark.sql.{SPARK_LEGACY_DATETIME, SPARK_LEGACY_INT96, SPARK_VERSION_METADATA_KEY}
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogUtils}
 import org.apache.spark.sql.catalyst.util.RebaseDateTime
@@ -115,14 +115,20 @@ object DataSourceUtils {
       lookupFileMeta: String => String,
       modeByConfig: String): LegacyBehaviorPolicy.Value = {
     if (Utils.isTesting && SQLConf.get.getConfString("spark.test.forceNoRebase", "") == "true") {
-      LegacyBehaviorPolicy.CORRECTED
-    } else if (lookupFileMeta(SPARK_INT96_NO_REBASE) != null) {
-      LegacyBehaviorPolicy.CORRECTED
-    } else if (lookupFileMeta(SPARK_VERSION_METADATA_KEY) != null) {
-      LegacyBehaviorPolicy.LEGACY
-    } else {
-      LegacyBehaviorPolicy.withName(modeByConfig)
+      return LegacyBehaviorPolicy.CORRECTED
     }
+    // If there is no version, we return the mode specified by the config.
+    Option(lookupFileMeta(SPARK_VERSION_METADATA_KEY)).map { version =>
+      // Files written by Spark 3.0 and earlier follow the legacy hybrid calendar and we need to
+      // rebase the INT96 timestamp values.
+      // Files written by Spark 3.1 and latter may also need the rebase if they were written with
+      // the "LEGACY" rebase mode.
+      if (version < "3.1.0" || lookupFileMeta(SPARK_LEGACY_INT96) != null) {
+        LegacyBehaviorPolicy.LEGACY
+      } else {
+        LegacyBehaviorPolicy.CORRECTED
+      }
+    }.getOrElse(LegacyBehaviorPolicy.withName(modeByConfig))
   }
 
   def newRebaseExceptionInRead(format: String): SparkUpgradeException = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala
index b538c2f2493d0..26074719364a4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala
@@ -31,7 +31,7 @@ import org.apache.parquet.io.api.{Binary, RecordConsumer}
 
 import org.apache.spark.SPARK_VERSION_SHORT
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{SPARK_INT96_NO_REBASE, SPARK_LEGACY_DATETIME, SPARK_VERSION_METADATA_KEY}
+import org.apache.spark.sql.{SPARK_LEGACY_DATETIME, SPARK_LEGACY_INT96, SPARK_VERSION_METADATA_KEY}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.SpecializedGetters
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
@@ -123,9 +123,9 @@ class ParquetWriteSupport extends WriteSupport[InternalRow] with Logging {
       }
     } ++ {
       if (int96RebaseMode == LegacyBehaviorPolicy.LEGACY) {
-        None
+        Some(SPARK_LEGACY_INT96 -> "")
       } else {
-        Some(SPARK_INT96_NO_REBASE -> "")
+        None
       }
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/package.scala
index 011be6d69c576..022fecf1ae412 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/package.scala
@@ -63,7 +63,7 @@ package object sql {
 
   /**
    * Parquet file metadata key to indicate that the file with INT96 column type was written
-   * without rebasing.
+   * with rebasing.
    */
-  private[sql] val SPARK_INT96_NO_REBASE = "org.apache.spark.int96NoRebase"
+  private[sql] val SPARK_LEGACY_INT96 = "org.apache.spark.legacyINT96"
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index dac4e950a7823..34bdef7bdb402 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -1163,9 +1163,9 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
     }
   }
 
-  test("SPARK-33160: write the metadata key 'org.apache.spark.int96NoRebase'") {
-    def saveTs(dir: java.io.File): Unit = {
-      Seq(Timestamp.valueOf("1000-01-01 01:02:03")).toDF()
+  test("SPARK-33160: write the metadata key 'org.apache.spark.legacyINT96'") {
+    def saveTs(dir: java.io.File, ts: String = "1000-01-01 01:02:03"): Unit = {
+      Seq(Timestamp.valueOf(ts)).toDF()
         .repartition(1)
         .write
         .parquet(dir.getAbsolutePath)
@@ -1173,18 +1173,24 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
     withSQLConf(SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key -> LEGACY.toString) {
       withTempPath { dir =>
         saveTs(dir)
-        assert(getMetaData(dir).get(SPARK_INT96_NO_REBASE).isEmpty)
+        assert(getMetaData(dir)(SPARK_LEGACY_INT96) === "")
       }
     }
     withSQLConf(SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key -> CORRECTED.toString) {
       withTempPath { dir =>
         saveTs(dir)
-        assert(getMetaData(dir)(SPARK_INT96_NO_REBASE) === "")
+        assert(getMetaData(dir).get(SPARK_LEGACY_INT96).isEmpty)
       }
     }
     withSQLConf(SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key -> EXCEPTION.toString) {
       withTempPath { dir => intercept[SparkException] { saveTs(dir) } }
     }
+    withSQLConf(SQLConf.LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key -> EXCEPTION.toString) {
+      withTempPath { dir =>
+        saveTs(dir, "2020-10-22 01:02:03")
+        assert(getMetaData(dir).get(SPARK_LEGACY_INT96).isEmpty)
+      }
+    }
   }
 }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index db0e93787338e..7d5a200606356 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -1513,7 +1513,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
         Seq(tbl, ext_tbl).foreach { tblName =>
           sql(s"INSERT INTO $tblName VALUES (1, 'a', '2019-12-13')")
 
-          val expectedSize = 636
+          val expectedSize = 601
           // analyze table
           sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS NOSCAN")
           var tableStats = getTableStats(tblName)

From 3819d39607392aa968595e3d97b84fedf83d08d9 Mon Sep 17 00:00:00 2001
From: Ankit Srivastava <ankit_srivastava@apple.com>
Date: Thu, 22 Oct 2020 16:35:55 -0700
Subject: [PATCH 0308/1009] [SPARK-32998][BUILD] Add ability to override
 default remote repos with internal one

### What changes were proposed in this pull request?
- Building spark internally in orgs where access to outside internet is not allowed takes a long time because unsuccessful attempts are made to download artifacts from repositories which are not accessible. The unsuccessful attempts unnecessarily add significant amount of time to the build. I have seen a difference of up-to 1hr for some runs.
- Adding 1 environment variables that should be present that the start of the build and if they exist, override the default repos defined in the code and scripts.
envVariables:
      - DEFAULT_ARTIFACT_REPOSITORY=https://artifacts.internal.com/libs-release/

### Why are the changes needed?

To allow orgs to build spark internally without relying on external repositories for artifact downloads.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Multiple builds with and without env variables set.

Closes #29874 from ankits/SPARK-32998.

Authored-by: Ankit Srivastava <ankit_srivastava@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 build/sbt-launch-lib.bash                                   | 6 +++++-
 .../main/scala/org/apache/spark/deploy/SparkSubmit.scala    | 5 ++++-
 .../main/scala/org/apache/spark/sql/internal/SQLConf.scala  | 3 ++-
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/build/sbt-launch-lib.bash b/build/sbt-launch-lib.bash
index 423ba3b766e61..1d79989f3c3c3 100755
--- a/build/sbt-launch-lib.bash
+++ b/build/sbt-launch-lib.bash
@@ -39,7 +39,11 @@ dlog () {
 
 acquire_sbt_jar () {
   SBT_VERSION=`awk -F "=" '/sbt\.version/ {print $2}' ./project/build.properties`
-  URL1=https://repo1.maven.org/maven2/org/scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch-${SBT_VERSION}.jar
+  # DEFAULT_ARTIFACT_REPOSITORY env variable can be used to only fetch
+  # artifacts from internal repos only.
+  # Ex:
+  #   DEFAULT_ARTIFACT_REPOSITORY=https://artifacts.internal.com/libs-release/
+  URL1=${DEFAULT_ARTIFACT_REPOSITORY:-https://repo1.maven.org/maven2/}org/scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch-${SBT_VERSION}.jar
   JAR=build/sbt-launch-${SBT_VERSION}.jar
 
   sbt_jar=$JAR
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 93370f5dae72e..9a316e8c5b5a9 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -1160,13 +1160,16 @@ private[spark] object SparkSubmitUtils {
     val br: IBiblioResolver = new IBiblioResolver
     br.setM2compatible(true)
     br.setUsepoms(true)
+    val defaultInternalRepo : Option[String] = sys.env.get("DEFAULT_ARTIFACT_REPOSITORY")
+    br.setRoot(defaultInternalRepo.getOrElse("https://repo1.maven.org/maven2/"))
     br.setName("central")
     cr.add(br)
 
     val sp: IBiblioResolver = new IBiblioResolver
     sp.setM2compatible(true)
     sp.setUsepoms(true)
-    sp.setRoot("https://dl.bintray.com/spark-packages/maven")
+    sp.setRoot(sys.env.getOrElse(
+      "DEFAULT_ARTIFACT_REPOSITORY", "https://dl.bintray.com/spark-packages/maven"))
     sp.setName("spark-packages")
     cr.add(sp)
     cr
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index dad59ba0e7327..952785b9a3e65 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2566,7 +2566,8 @@ object SQLConf {
       .version("3.0.0")
       .stringConf
       .createWithDefault(
-        "https://maven-central.storage-download.googleapis.com/maven2/")
+        sys.env.getOrElse("DEFAULT_ARTIFACT_REPOSITORY",
+          "https://maven-central.storage-download.googleapis.com/maven2/"))
 
   val LEGACY_FROM_DAYTIME_STRING =
     buildConf("spark.sql.legacy.fromDayTimeString.enabled")

From 87b32f65ef907707a5f76777ecd4570a8c34eedd Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Fri, 23 Oct 2020 13:35:46 +0900
Subject: [PATCH 0309/1009] [MINOR][DOCS][TESTS] Fix PLAN_CHANGE_LOG_LEVEL
 document

### What changes were proposed in this pull request?

`PLAN_CHANGE_LOG_LEVEL` config document is wrong. This is to fix it.

### Why are the changes needed?

Fix wrong doc.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Only doc change.

Closes #30136 from viirya/minor-sqlconf.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala  | 2 +-
 .../spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 952785b9a3e65..35ef24c1c3ba6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -225,7 +225,7 @@ object SQLConf {
     .stringConf
     .transform(_.toUpperCase(Locale.ROOT))
     .checkValue(logLevel => Set("TRACE", "DEBUG", "INFO", "WARN", "ERROR").contains(logLevel),
-      "Invalid value for 'spark.sql.optimizer.planChangeLog.level'. Valid values are " +
+      "Invalid value for 'spark.sql.planChangeLog.level'. Valid values are " +
         "'trace', 'debug', 'info', 'warn' and 'error'.")
     .createWithDefault("trace")
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala
index 68c5e2e2f7694..1187950c04240 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala
@@ -100,7 +100,7 @@ class OptimizerLoggingSuite extends PlanTest {
         withSQLConf(SQLConf.PLAN_CHANGE_LOG_LEVEL.key -> level) {}
       }
       assert(error.getMessage.contains(
-        "Invalid value for 'spark.sql.optimizer.planChangeLog.level'."))
+        "Invalid value for 'spark.sql.planChangeLog.level'."))
     }
   }
 

From edeecada665e4974bb9e0f125dc30d71bd0a54ee Mon Sep 17 00:00:00 2001
From: "yi.wu" <yi.wu@databricks.com>
Date: Fri, 23 Oct 2020 13:58:44 +0900
Subject: [PATCH 0310/1009] [SPARK-32850][CORE][K8S] Simplify the RPC message
 flow of decommission

### What changes were proposed in this pull request?

This PR cleans up the RPC message flow among the multiple decommission use cases, it includes changes:

* Keep `Worker`'s decommission status be consistent between the case where decommission starts from `Worker` and the case where decommission starts from the `MasterWebUI`: sending `DecommissionWorker` from `Master` to `Worker` in the latter case.

* Change from two-way communication to one-way communication when notifying decommission between driver and executor: it's obviously unnecessary for the executor to acknowledge the decommission status to the driver since the decommission request is from the driver. And it's same in reverse.

* Only send one message instead of two(`DecommissionSelf`/`DecommissionBlockManager`) when decommission the executor: executor and `BlockManager` are in the same JVM.

* Clean up codes around here.

### Why are the changes needed?

Before:

<img width="1948" alt="WeChat56c00cc34d9785a67a544dca036d49da" src="https://user-images.githubusercontent.com/16397174/92850308-dc461c80-f41e-11ea-8ac0-287825f4e0c4.png">

After:
<img width="1968" alt="WeChat05f7afb017e3f0132394c5e54245e49e" src="https://user-images.githubusercontent.com/16397174/93189571-de88dd80-f774-11ea-9300-1943920aa27d.png">

(Note the diagrams only counts those RPC calls that needed to go through the network. Local RPC calls are not counted here.)

After this change, We reduced 6 original RPC calls and added one more RPC call for keeping the consistent decommission status for the Worker. And the RPC flow becomes more clear.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Updated existing tests.

Closes #29817 from Ngone51/simplify-decommission-rpc.

Authored-by: yi.wu <yi.wu@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../spark/ExecutorAllocationClient.scala      | 19 +++-
 .../spark/ExecutorAllocationManager.scala     |  5 +-
 .../apache/spark/deploy/DeployMessage.scala   | 32 ++++++-
 .../apache/spark/deploy/master/Master.scala   | 21 +++--
 .../apache/spark/deploy/worker/Worker.scala   | 29 ++++--
 .../CoarseGrainedExecutorBackend.scala        | 70 ++++++++------
 .../cluster/CoarseGrainedClusterMessage.scala | 16 +++-
 .../CoarseGrainedSchedulerBackend.scala       | 93 +++++++------------
 .../cluster/StandaloneSchedulerBackend.scala  |  7 +-
 .../apache/spark/storage/BlockManager.scala   |  9 +-
 .../storage/BlockManagerMasterEndpoint.scala  | 25 ++---
 .../storage/BlockManagerStorageEndpoint.scala |  2 +-
 .../deploy/DecommissionWorkerSuite.scala      |  4 +-
 .../spark/deploy/client/AppClientSuite.scala  |  8 +-
 .../spark/deploy/master/MasterSuite.scala     | 18 ++--
 .../scheduler/WorkerDecommissionSuite.scala   |  7 +-
 ...kManagerDecommissionIntegrationSuite.scala | 40 ++++++++
 .../integrationtest/DecommissionSuite.scala   | 12 +--
 .../ExecutorAllocationManagerSuite.scala      |  6 +-
 19 files changed, 257 insertions(+), 166 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
index ce47f3fd32203..cdba1c44034c0 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
@@ -91,11 +91,13 @@ private[spark] trait ExecutorAllocationClient {
    * @param executorsAndDecomInfo identifiers of executors & decom info.
    * @param adjustTargetNumExecutors whether the target number of executors will be adjusted down
    *                                 after these executors have been decommissioned.
+   * @param triggeredByExecutor whether the decommission is triggered at executor.
    * @return the ids of the executors acknowledged by the cluster manager to be removed.
    */
   def decommissionExecutors(
-    executorsAndDecomInfo: Array[(String, ExecutorDecommissionInfo)],
-    adjustTargetNumExecutors: Boolean): Seq[String] = {
+      executorsAndDecomInfo: Array[(String, ExecutorDecommissionInfo)],
+      adjustTargetNumExecutors: Boolean,
+      triggeredByExecutor: Boolean): Seq[String] = {
     killExecutors(executorsAndDecomInfo.map(_._1),
       adjustTargetNumExecutors,
       countFailures = false)
@@ -109,14 +111,21 @@ private[spark] trait ExecutorAllocationClient {
    * @param executorId identifiers of executor to decommission
    * @param decommissionInfo information about the decommission (reason, host loss)
    * @param adjustTargetNumExecutors if we should adjust the target number of executors.
+   * @param triggeredByExecutor whether the decommission is triggered at executor.
+   *                            (TODO: add a new type like `ExecutorDecommissionInfo` for the
+   *                            case where executor is decommissioned at executor first, so we
+   *                            don't need this extra parameter.)
    * @return whether the request is acknowledged by the cluster manager.
    */
-  final def decommissionExecutor(executorId: String,
+  final def decommissionExecutor(
+      executorId: String,
       decommissionInfo: ExecutorDecommissionInfo,
-      adjustTargetNumExecutors: Boolean): Boolean = {
+      adjustTargetNumExecutors: Boolean,
+      triggeredByExecutor: Boolean = false): Boolean = {
     val decommissionedExecutors = decommissionExecutors(
       Array((executorId, decommissionInfo)),
-      adjustTargetNumExecutors = adjustTargetNumExecutors)
+      adjustTargetNumExecutors = adjustTargetNumExecutors,
+      triggeredByExecutor = triggeredByExecutor)
     decommissionedExecutors.nonEmpty && decommissionedExecutors(0).equals(executorId)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index 596508a2cf8c8..1dd64df106bc2 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -580,7 +580,10 @@ private[spark] class ExecutorAllocationManager(
       if (decommissionEnabled) {
         val executorIdsWithoutHostLoss = executorIdsToBeRemoved.toSeq.map(
           id => (id, ExecutorDecommissionInfo("spark scale down"))).toArray
-        client.decommissionExecutors(executorIdsWithoutHostLoss, adjustTargetNumExecutors = false)
+        client.decommissionExecutors(
+          executorIdsWithoutHostLoss,
+          adjustTargetNumExecutors = false,
+          triggeredByExecutor = false)
       } else {
         client.killExecutors(executorIdsToBeRemoved.toSeq, adjustTargetNumExecutors = false,
           countFailures = false, force = false)
diff --git a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
index 83f373d526e90..d5b5375d64f4d 100644
--- a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
@@ -61,13 +61,35 @@ private[deploy] object DeployMessages {
   }
 
   /**
+   * An internal message that used by Master itself, in order to handle the
+   * `DecommissionWorkersOnHosts` request from `MasterWebUI` asynchronously.
+   * @param ids A collection of Worker ids, which should be decommissioned.
+   */
+  case class DecommissionWorkers(ids: Seq[String]) extends DeployMessage
+
+  /**
+   * A message that sent from Master to Worker to decommission the Worker.
+   * It's used for the case where decommission is triggered at MasterWebUI.
+   *
+   * Note that decommission a Worker will cause all the executors on that Worker
+   * to be decommissioned as well.
+   */
+  object DecommissionWorker extends DeployMessage
+
+  /**
+   * A message that sent by the Worker to itself when it receives PWR signal,
+   * indicating the Worker starts to decommission.
+   */
+  object WorkerSigPWRReceived extends DeployMessage
+
+  /**
+   * A message sent from Worker to Master to tell Master that the Worker has started
+   * decommissioning. It's used for the case where decommission is triggered at Worker.
+   *
    * @param id the worker id
-   * @param worker the worker endpoint ref
+   * @param workerRef the worker endpoint ref
    */
-  case class WorkerDecommission(
-      id: String,
-      worker: RpcEndpointRef)
-    extends DeployMessage
+  case class WorkerDecommissioning(id: String, workerRef: RpcEndpointRef) extends DeployMessage
 
   case class ExecutorStateChanged(
       appId: String,
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 48516cdf83291..ceeb01149f5db 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -245,8 +245,7 @@ private[deploy] class Master(
       logError("Leadership has been revoked -- master shutting down.")
       System.exit(0)
 
-    case WorkerDecommission(id, workerRef) =>
-      logInfo("Recording worker %s decommissioning".format(id))
+    case WorkerDecommissioning(id, workerRef) =>
       if (state == RecoveryState.STANDBY) {
         workerRef.send(MasterInStandby)
       } else {
@@ -254,6 +253,19 @@ private[deploy] class Master(
         idToWorker.get(id).foreach(decommissionWorker)
       }
 
+    case DecommissionWorkers(ids) =>
+      // The caller has already checked the state when handling DecommissionWorkersOnHosts,
+      // so it should not be the STANDBY
+      assert(state != RecoveryState.STANDBY)
+      ids.foreach ( id =>
+        // We use foreach since get gives us an option and we can skip the failures.
+        idToWorker.get(id).foreach { w =>
+          decommissionWorker(w)
+          // Also send a message to the worker node to notify.
+          w.endpoint.send(DecommissionWorker)
+        }
+      )
+
     case RegisterWorker(
       id, workerHost, workerPort, workerRef, cores, memory, workerWebUiUrl,
       masterAddress, resources) =>
@@ -891,10 +903,7 @@ private[deploy] class Master(
     logInfo(s"Decommissioning the workers with host:ports ${workersToRemoveHostPorts}")
 
     // The workers are removed async to avoid blocking the receive loop for the entire batch
-    workersToRemove.foreach(wi => {
-      logInfo(s"Sending the worker decommission to ${wi.id} and ${wi.endpoint}")
-      self.send(WorkerDecommission(wi.id, wi.endpoint))
-    })
+    self.send(DecommissionWorkers(workersToRemove.map(_.id).toSeq))
 
     // Return the count of workers actually removed
     workersToRemove.size
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 7649bc37c30b6..0660dbdafd605 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -70,7 +70,10 @@ private[deploy] class Worker(
   if (conf.get(config.DECOMMISSION_ENABLED)) {
     logInfo("Registering SIGPWR handler to trigger decommissioning.")
     SignalUtils.register("PWR", "Failed to register SIGPWR handler - " +
-      "disabling worker decommission feature.")(decommissionSelf)
+      "disabling worker decommission feature.") {
+       self.send(WorkerSigPWRReceived)
+       true
+    }
   } else {
     logInfo("Worker decommissioning not enabled, SIGPWR will result in exiting.")
   }
@@ -137,7 +140,8 @@ private[deploy] class Worker(
   private var registered = false
   private var connected = false
   private var decommissioned = false
-  private val workerId = generateWorkerId()
+  // expose for test
+  private[spark] val workerId = generateWorkerId()
   private val sparkHome =
     if (sys.props.contains(IS_TESTING.key)) {
       assert(sys.props.contains("spark.test.home"), "spark.test.home is not set!")
@@ -668,8 +672,14 @@ private[deploy] class Worker(
       finishedApps += id
       maybeCleanupApplication(id)
 
-    case WorkerDecommission(_, _) =>
+    case DecommissionWorker =>
+      decommissionSelf()
+
+    case WorkerSigPWRReceived =>
       decommissionSelf()
+      // Tell the Master that we are starting decommissioning
+      // so it stops trying to launch executor/driver on us
+      sendToMaster(WorkerDecommissioning(workerId, self))
   }
 
   override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
@@ -768,16 +778,15 @@ private[deploy] class Worker(
     }
   }
 
-  private[deploy] def decommissionSelf(): Boolean = {
-    if (conf.get(config.DECOMMISSION_ENABLED)) {
-      logDebug("Decommissioning self")
+  private[deploy] def decommissionSelf(): Unit = {
+    if (conf.get(config.DECOMMISSION_ENABLED) && !decommissioned) {
       decommissioned = true
-      sendToMaster(WorkerDecommission(workerId, self))
+      logInfo(s"Decommission worker $workerId.")
+    } else if (decommissioned) {
+      logWarning(s"Worker $workerId already started decommissioning.")
     } else {
-      logWarning("Asked to decommission self, but decommissioning not enabled")
+      logWarning(s"Receive decommission request, but decommission feature is disabled.")
     }
-    // Return true since can be called as a signal handler
-    true
   }
 
   private[worker] def handleDriverStateChanged(driverStateChanged: DriverStateChanged): Unit = {
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index 48045bafe6e3f..b2bc6b3b68007 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -40,7 +40,7 @@ import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.resource.ResourceProfile._
 import org.apache.spark.resource.ResourceUtils._
 import org.apache.spark.rpc._
-import org.apache.spark.scheduler.{ExecutorDecommissionInfo, ExecutorLossReason, TaskDescription}
+import org.apache.spark.scheduler.{ExecutorLossReason, TaskDescription}
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
 import org.apache.spark.serializer.SerializerInstance
 import org.apache.spark.util.{ChildFirstURLClassLoader, MutableURLClassLoader, SignalUtils, ThreadUtils, Utils}
@@ -79,12 +79,14 @@ private[spark] class CoarseGrainedExecutorBackend(
    */
   private[executor] val taskResources = new mutable.HashMap[Long, Map[String, ResourceInformation]]
 
-  @volatile private var decommissioned = false
+  private var decommissioned = false
 
   override def onStart(): Unit = {
-    logInfo("Registering PWR handler.")
-    SignalUtils.register("PWR", "Failed to register SIGPWR handler - " +
-      "disabling decommission feature.")(decommissionSelf)
+    if (env.conf.get(DECOMMISSION_ENABLED)) {
+      logInfo("Registering PWR handler to trigger decommissioning.")
+      SignalUtils.register("PWR", "Failed to register SIGPWR handler - " +
+        "disabling executor decommission feature.") (self.askSync[Boolean](ExecutorSigPWRReceived))
+    }
 
     logInfo("Connecting to driver: " + driverUrl)
     try {
@@ -166,17 +168,6 @@ private[spark] class CoarseGrainedExecutorBackend(
       if (executor == null) {
         exitExecutor(1, "Received LaunchTask command but executor was null")
       } else {
-        if (decommissioned) {
-          val msg = "Asked to launch a task while decommissioned."
-          logError(msg)
-          driver match {
-            case Some(endpoint) =>
-              logInfo("Sending DecommissionExecutor to driver.")
-              endpoint.send(DecommissionExecutor(executorId, ExecutorDecommissionInfo(msg)))
-            case _ =>
-              logError("No registered driver to send Decommission to.")
-          }
-        }
         val taskDesc = TaskDescription.decode(data.value)
         logInfo("Got assigned task " + taskDesc.taskId)
         taskResources(taskDesc.taskId) = taskDesc.resources
@@ -213,11 +204,31 @@ private[spark] class CoarseGrainedExecutorBackend(
       logInfo(s"Received tokens of ${tokenBytes.length} bytes")
       SparkHadoopUtil.get.addDelegationTokens(tokenBytes, env.conf)
 
-    case DecommissionSelf =>
-      logInfo("Received decommission self")
+    case DecommissionExecutor =>
       decommissionSelf()
   }
 
+  override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
+    case ExecutorSigPWRReceived =>
+      var driverNotified = false
+      try {
+        driver.foreach { driverRef =>
+          // Tell driver that we are starting decommissioning so it stops trying to schedule us
+          driverNotified = driverRef.askSync[Boolean](ExecutorDecommissioning(executorId))
+          if (driverNotified) decommissionSelf()
+        }
+      } catch {
+        case e: Exception =>
+          if (driverNotified) {
+            logError("Fail to decommission self (but driver has been notified).", e)
+          } else {
+            logError("Fail to tell driver that we are starting decommissioning", e)
+          }
+          decommissioned = false
+      }
+      context.reply(decommissioned)
+  }
+
   override def onDisconnected(remoteAddress: RpcAddress): Unit = {
     if (stopping.get()) {
       logInfo(s"Driver from $remoteAddress disconnected during shutdown")
@@ -264,17 +275,20 @@ private[spark] class CoarseGrainedExecutorBackend(
     System.exit(code)
   }
 
-  private def decommissionSelf(): Boolean = {
-    val msg = "Decommissioning self w/sync"
+  private def decommissionSelf(): Unit = {
+    if (!env.conf.get(DECOMMISSION_ENABLED)) {
+      logWarning(s"Receive decommission request, but decommission feature is disabled.")
+      return
+    } else if (decommissioned) {
+      logWarning(s"Executor $executorId already started decommissioning.")
+      return
+    }
+    val msg = s"Decommission executor $executorId."
     logInfo(msg)
     try {
       decommissioned = true
-      // Tell master we are are decommissioned so it stops trying to schedule us
-      if (driver.nonEmpty) {
-        driver.get.askSync[Boolean](DecommissionExecutor(
-          executorId, ExecutorDecommissionInfo(msg)))
-      } else {
-        logError("No driver to message decommissioning.")
+      if (env.conf.get(STORAGE_DECOMMISSION_ENABLED)) {
+        env.blockManager.decommissionBlockManager()
       }
       if (executor != null) {
         executor.decommission()
@@ -333,12 +347,10 @@ private[spark] class CoarseGrainedExecutorBackend(
       shutdownThread.start()
 
       logInfo("Will exit when finished decommissioning")
-      // Return true since we are handling a signal
-      true
     } catch {
       case e: Exception =>
+        decommissioned = false
         logError("Unexpected error while decommissioning self", e)
-        false
     }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
index 7242ab7786061..d1b0e798c51be 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
@@ -95,8 +95,17 @@ private[spark] object CoarseGrainedClusterMessages {
   case class RemoveExecutor(executorId: String, reason: ExecutorLossReason)
     extends CoarseGrainedClusterMessage
 
-  case class DecommissionExecutor(executorId: String, decommissionInfo: ExecutorDecommissionInfo)
-    extends CoarseGrainedClusterMessage
+  // A message that sent from executor to driver to tell driver that the executor has started
+  // decommissioning. It's used for the case where decommission is triggered at executor (e.g., K8S)
+  case class ExecutorDecommissioning(executorId: String) extends CoarseGrainedClusterMessage
+
+  // A message that sent from driver to executor to decommission that executor.
+  // It's used for Standalone's cases, where decommission is triggered at MasterWebUI or Worker.
+  object DecommissionExecutor extends CoarseGrainedClusterMessage
+
+  // A message that sent to the executor itself when it receives PWR signal,
+  // indicating the executor starts to decommission.
+  object ExecutorSigPWRReceived extends CoarseGrainedClusterMessage
 
   case class RemoveWorker(workerId: String, host: String, message: String)
     extends CoarseGrainedClusterMessage
@@ -136,7 +145,4 @@ private[spark] object CoarseGrainedClusterMessages {
 
   // The message to check if `CoarseGrainedSchedulerBackend` thinks the executor is alive or not.
   case class IsExecutorAlive(executorId: String) extends CoarseGrainedClusterMessage
-
-  // Used to ask an executor to decommission itself. (Can be an internal message)
-  case object DecommissionSelf extends CoarseGrainedClusterMessage
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index 37ea648d80048..1d2689034f1ff 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -191,11 +191,6 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
         executorDataMap.get(executorId).foreach(_.executorEndpoint.send(StopExecutor))
         removeExecutor(executorId, reason)
 
-      // Do not change this code without running the K8s integration suites
-      case DecommissionExecutor(executorId, decommissionInfo) =>
-        logError(s"Received decommission executor message ${executorId}: $decommissionInfo")
-        decommissionExecutor(executorId, decommissionInfo, adjustTargetNumExecutors = false)
-
       case RemoveWorker(workerId, host, message) =>
         removeWorker(workerId, host, message)
 
@@ -274,10 +269,14 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
         context.reply(true)
 
       // Do not change this code without running the K8s integration suites
-      case DecommissionExecutor(executorId, decommissionInfo) =>
-        logError(s"Received decommission executor message ${executorId}: ${decommissionInfo}.")
-        context.reply(decommissionExecutor(executorId, decommissionInfo,
-          adjustTargetNumExecutors = false))
+      case ExecutorDecommissioning(executorId) =>
+        logWarning(s"Received executor $executorId decommissioned message")
+        context.reply(
+          decommissionExecutor(
+            executorId,
+            ExecutorDecommissionInfo(s"Executor $executorId is decommissioned."),
+            adjustTargetNumExecutors = false,
+            triggeredByExecutor = true))
 
       case RetrieveSparkAppConfig(resourceProfileId) =>
         val rp = scheduler.sc.resourceProfileManager.resourceProfileFromId(resourceProfileId)
@@ -465,72 +464,50 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
    * @param executorsAndDecomInfo Identifiers of executors & decommission info.
    * @param adjustTargetNumExecutors whether the target number of executors will be adjusted down
    *                                 after these executors have been decommissioned.
+   * @param triggeredByExecutor whether the decommission is triggered at executor.
    * @return the ids of the executors acknowledged by the cluster manager to be removed.
    */
   override def decommissionExecutors(
       executorsAndDecomInfo: Array[(String, ExecutorDecommissionInfo)],
-      adjustTargetNumExecutors: Boolean): Seq[String] = {
-
+      adjustTargetNumExecutors: Boolean,
+      triggeredByExecutor: Boolean): Seq[String] = withLock {
     // Do not change this code without running the K8s integration suites
-    val executorsToDecommission = executorsAndDecomInfo.filter { case (executorId, decomInfo) =>
-      CoarseGrainedSchedulerBackend.this.synchronized {
-        // Only bother decommissioning executors which are alive.
-        if (isExecutorActive(executorId)) {
-          executorsPendingDecommission(executorId) = decomInfo.workerHost
-          true
-        } else {
-          false
-        }
+    val executorsToDecommission = executorsAndDecomInfo.flatMap { case (executorId, decomInfo) =>
+      // Only bother decommissioning executors which are alive.
+      if (isExecutorActive(executorId)) {
+        scheduler.executorDecommission(executorId, decomInfo)
+        executorsPendingDecommission(executorId) = decomInfo.workerHost
+        Some(executorId)
+      } else {
+        None
       }
     }
+    logInfo(s"Decommission executors: ${executorsToDecommission.mkString(", ")}")
 
     // If we don't want to replace the executors we are decommissioning
     if (adjustTargetNumExecutors) {
-      adjustExecutors(executorsToDecommission.map(_._1))
+      adjustExecutors(executorsToDecommission)
     }
 
-    executorsToDecommission.filter { case (executorId, decomInfo) =>
-      doDecommission(executorId, decomInfo)
-    }.map(_._1)
-  }
-
-  // Do not change this code without running the K8s integration suites
-  private def doDecommission(executorId: String,
-      decomInfo: ExecutorDecommissionInfo): Boolean = {
-
-    logInfo(s"Asking executor $executorId to decommissioning.")
-    scheduler.executorDecommission(executorId, decomInfo)
-    // Send decommission message to the executor (it could have originated on the executor
-    // but not necessarily).
-    CoarseGrainedSchedulerBackend.this.synchronized {
-      executorDataMap.get(executorId) match {
-        case Some(executorInfo) =>
-          executorInfo.executorEndpoint.send(DecommissionSelf)
-        case None =>
-          // Ignoring the executor since it is not registered.
-          logWarning(s"Attempted to decommission unknown executor $executorId.")
-          return false
+    // Mark those corresponding BlockManagers as decommissioned first before we sending
+    // decommission notification to executors. So, it's less likely to lead to the race
+    // condition where `getPeer` request from the decommissioned executor comes first
+    // before the BlockManagers are marked as decommissioned.
+    // Note that marking BlockManager as decommissioned doesn't need depend on
+    // `spark.storage.decommission.enabled`. Because it's meaningless to save more blocks
+    // for the BlockManager since the executor will be shutdown soon.
+    scheduler.sc.env.blockManager.master.decommissionBlockManagers(executorsToDecommission)
+
+    if (!triggeredByExecutor) {
+      executorsToDecommission.foreach { executorId =>
+        logInfo(s"Notify executor $executorId to decommissioning.")
+        executorDataMap(executorId).executorEndpoint.send(DecommissionExecutor)
       }
     }
-    logInfo(s"Asked executor $executorId to decommission.")
-
-    if (conf.get(STORAGE_DECOMMISSION_ENABLED)) {
-      try {
-        logInfo(s"Asking block manager corresponding to executor $executorId to decommission.")
-        scheduler.sc.env.blockManager.master.decommissionBlockManagers(Seq(executorId))
-      } catch {
-        case e: Exception =>
-          logError("Unexpected error during block manager " +
-            s"decommissioning for executor $executorId: ${e.toString}", e)
-          return false
-      }
-      logInfo(s"Acknowledged decommissioning block manager corresponding to $executorId.")
-    }
 
-    true
+    executorsToDecommission
   }
 
-
   override def start(): Unit = {
     if (UserGroupInformation.isSecurityEnabled()) {
       delegationTokenManager = createTokenManager()
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
index 34b03dfec9e80..b9ac8d2ba2784 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
@@ -178,9 +178,12 @@ private[spark] class StandaloneSchedulerBackend(
   }
 
   override def executorDecommissioned(fullId: String, decommissionInfo: ExecutorDecommissionInfo) {
-    logInfo("Asked to decommission executor")
+    logInfo(s"Asked to decommission executor $fullId")
     val execId = fullId.split("/")(1)
-    decommissionExecutors(Array((execId, decommissionInfo)), adjustTargetNumExecutors = false)
+    decommissionExecutors(
+      Array((execId, decommissionInfo)),
+      adjustTargetNumExecutors = false,
+      triggeredByExecutor = false)
     logInfo("Executor %s decommissioned: %s".format(fullId, decommissionInfo))
   }
 
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index ff0f38a2479b0..3909c02c5bb1f 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -56,7 +56,7 @@ import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.serializer.{SerializerInstance, SerializerManager}
 import org.apache.spark.shuffle.{MigratableResolver, ShuffleManager, ShuffleWriteMetricsReporter}
 import org.apache.spark.shuffle.{ShuffleManager, ShuffleWriteMetricsReporter}
-import org.apache.spark.storage.BlockManagerMessages.ReplicateBlock
+import org.apache.spark.storage.BlockManagerMessages.{DecommissionBlockManager, ReplicateBlock}
 import org.apache.spark.storage.memory._
 import org.apache.spark.unsafe.Platform
 import org.apache.spark.util._
@@ -243,8 +243,9 @@ private[spark] class BlockManager(
 
   private var blockReplicationPolicy: BlockReplicationPolicy = _
 
+  // visible for test
   // This is volatile since if it's defined we should not accept remote blocks.
-  @volatile private var decommissioner: Option[BlockManagerDecommissioner] = None
+  @volatile private[spark] var decommissioner: Option[BlockManagerDecommissioner] = None
 
   // A DownloadFileManager used to track all the files of remote blocks which are above the
   // specified memory threshold. Files will be deleted automatically based on weak reference.
@@ -1809,7 +1810,9 @@ private[spark] class BlockManager(
     blocksToRemove.size
   }
 
-  def decommissionBlockManager(): Unit = synchronized {
+  def decommissionBlockManager(): Unit = storageEndpoint.ask(DecommissionBlockManager)
+
+  private[spark] def decommissionSelf(): Unit = synchronized {
     decommissioner match {
       case None =>
         logInfo("Starting block manager decommissioning process...")
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
index 61a88b4f26c00..569d7d32284bc 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
@@ -163,8 +163,14 @@ class BlockManagerMasterEndpoint(
       context.reply(true)
 
     case DecommissionBlockManagers(executorIds) =>
-      val bmIds = executorIds.flatMap(blockManagerIdByExecutor.get)
-      decommissionBlockManagers(bmIds)
+      // Mark corresponding BlockManagers as being decommissioning by adding them to
+      // decommissioningBlockManagerSet, so they won't be used to replicate or migrate blocks.
+      // Note that BlockManagerStorageEndpoint will be notified about decommissioning when the
+      // executor is notified(see BlockManager.decommissionSelf), so we don't need to send the
+      // notification here.
+      val bms = executorIds.flatMap(blockManagerIdByExecutor.get)
+      logInfo(s"Mark BlockManagers (${bms.mkString(", ")}) as being decommissioning.")
+      decommissioningBlockManagerSet ++= bms
       context.reply(true)
 
     case GetReplicateInfoForRDDBlocks(blockManagerId) =>
@@ -359,21 +365,6 @@ class BlockManagerMasterEndpoint(
     blockManagerIdByExecutor.get(execId).foreach(removeBlockManager)
   }
 
-  /**
-   * Decommission the given Seq of blockmanagers
-   *    - Adds these block managers to decommissioningBlockManagerSet Set
-   *    - Sends the DecommissionBlockManager message to each of the [[BlockManagerReplicaEndpoint]]
-   */
-  def decommissionBlockManagers(blockManagerIds: Seq[BlockManagerId]): Future[Seq[Unit]] = {
-    val newBlockManagersToDecommission = blockManagerIds.toSet.diff(decommissioningBlockManagerSet)
-    val futures = newBlockManagersToDecommission.map { blockManagerId =>
-      decommissioningBlockManagerSet.add(blockManagerId)
-      val info = blockManagerInfo(blockManagerId)
-      info.storageEndpoint.ask[Unit](DecommissionBlockManager)
-    }
-    Future.sequence{ futures.toSeq }
-  }
-
   /**
    * Returns a Seq of ReplicateBlock for each RDD block stored by given blockManagerId
    * @param blockManagerId - block manager id for which ReplicateBlock info is needed
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala
index a69bebc23c661..54a72568b18fa 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala
@@ -62,7 +62,7 @@ class BlockManagerStorageEndpoint(
       }
 
     case DecommissionBlockManager =>
-      context.reply(blockManager.decommissionBlockManager())
+      context.reply(blockManager.decommissionSelf())
 
     case RemoveBroadcast(broadcastId, _) =>
       doAsync[Int]("removing broadcast " + broadcastId, context) {
diff --git a/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala
index 9c5e460854053..abe5b7a71ca63 100644
--- a/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala
@@ -28,7 +28,7 @@ import org.scalatest.BeforeAndAfterEach
 import org.scalatest.concurrent.Eventually._
 
 import org.apache.spark._
-import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState, WorkerDecommission}
+import org.apache.spark.deploy.DeployMessages.{DecommissionWorkers, MasterStateResponse, RequestMasterState}
 import org.apache.spark.deploy.master.{ApplicationInfo, Master, WorkerInfo}
 import org.apache.spark.deploy.worker.Worker
 import org.apache.spark.internal.{config, Logging}
@@ -414,7 +414,7 @@ class DecommissionWorkerSuite
 
   def decommissionWorkerOnMaster(workerInfo: WorkerInfo, reason: String): Unit = {
     logInfo(s"Trying to decommission worker ${workerInfo.id} for reason `$reason`")
-    master.self.send(WorkerDecommission(workerInfo.id, workerInfo.endpoint))
+    master.self.send(DecommissionWorkers(Seq(workerInfo.id)))
   }
 
   def killWorkerAfterTimeout(workerInfo: WorkerInfo, secondsToWait: Int): Unit = {
diff --git a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
index fe88822bb46b5..93c0aa000e207 100644
--- a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
@@ -27,7 +27,7 @@ import org.scalatest.concurrent.{Eventually, ScalaFutures}
 
 import org.apache.spark._
 import org.apache.spark.deploy.{ApplicationDescription, Command}
-import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState}
+import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState, WorkerDecommissioning}
 import org.apache.spark.deploy.master.{ApplicationInfo, Master}
 import org.apache.spark.deploy.worker.Worker
 import org.apache.spark.internal.{config, Logging}
@@ -122,7 +122,11 @@ class AppClientSuite
 
       // Send a decommission self to all the workers
       // Note: normally the worker would send this on their own.
-      workers.foreach(worker => worker.decommissionSelf())
+      workers.foreach { worker =>
+        worker.decommissionSelf()
+        // send the notice to Master to tell the decommission of Workers
+        master.self.send(WorkerDecommissioning(worker.workerId, worker.self))
+      }
 
       // Decommissioning is async.
       eventually(timeout(1.seconds), interval(10.millis)) {
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
index 8f19fb5cc80bd..3329300b64d13 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
@@ -72,6 +72,7 @@ class MockWorker(master: RpcEndpointRef, conf: SparkConf = new SparkConf) extend
     })
   }
 
+  var decommissioned = false
   var appDesc = DeployTestUtils.createAppDesc()
   val drivers = mutable.HashSet[String]()
   val driverResources = new mutable.HashMap[String, Map[String, Set[String]]]
@@ -96,6 +97,8 @@ class MockWorker(master: RpcEndpointRef, conf: SparkConf = new SparkConf) extend
         case None =>
       }
       driverIdToAppId.remove(driverId)
+    case DecommissionWorker =>
+      decommissioned = true
   }
 }
 
@@ -742,9 +745,9 @@ class MasterSuite extends SparkFunSuite
       hostnames: Seq[String]): Unit = {
     val conf = new SparkConf()
     val master = makeAliveMaster(conf)
-    val workerRegs = (1 to numWorkers).map{idx =>
+    val workers = (1 to numWorkers).map { idx =>
       val worker = new MockWorker(master.self, conf)
-      worker.rpcEnv.setupEndpoint("worker", worker)
+      worker.rpcEnv.setupEndpoint(s"worker-$idx", worker)
       val workerReg = RegisterWorker(
         worker.id,
         "localhost",
@@ -755,14 +758,14 @@ class MasterSuite extends SparkFunSuite
         "http://localhost:8080",
         RpcAddress("localhost", 10000))
       master.self.send(workerReg)
-      workerReg
+      worker
     }
 
     eventually(timeout(10.seconds)) {
       val masterState = master.self.askSync[MasterStateResponse](RequestMasterState)
       assert(masterState.workers.length === numWorkers)
       assert(masterState.workers.forall(_.state == WorkerState.ALIVE))
-      assert(masterState.workers.map(_.id).toSet == workerRegs.map(_.id).toSet)
+      assert(masterState.workers.map(_.id).toSet == workers.map(_.id).toSet)
     }
 
     val decomWorkersCount = master.self.askSync[Integer](DecommissionWorkersOnHosts(hostnames))
@@ -773,8 +776,11 @@ class MasterSuite extends SparkFunSuite
     eventually(timeout(30.seconds)) {
       val masterState = master.self.askSync[MasterStateResponse](RequestMasterState)
       assert(masterState.workers.length === numWorkers)
-      val workersActuallyDecomed = masterState.workers.count(_.state == WorkerState.DECOMMISSIONED)
-      assert(workersActuallyDecomed === numWorkersExpectedToDecom)
+      val workersActuallyDecomed = masterState.workers
+        .filter(_.state == WorkerState.DECOMMISSIONED).map(_.id)
+      val decommissionedWorkers = workers.filter(w => workersActuallyDecomed.contains(w.id))
+      assert(workersActuallyDecomed.length === numWorkersExpectedToDecom)
+      assert(decommissionedWorkers.forall(_.decommissioned))
     }
 
     // Decommissioning a worker again should return the same answer since we want this call to be
diff --git a/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala
index 83bb66efdac9e..4a92cbcb85847 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala
@@ -31,7 +31,7 @@ import org.apache.spark.util.{RpcUtils, SerializableBuffer, ThreadUtils}
 class WorkerDecommissionSuite extends SparkFunSuite with LocalSparkContext {
 
   override def beforeEach(): Unit = {
-    val conf = new SparkConf().setAppName("test").setMaster("local")
+    val conf = new SparkConf().setAppName("test")
       .set(config.DECOMMISSION_ENABLED, true)
 
     sc = new SparkContext("local-cluster[2, 1, 1024]", "test", conf)
@@ -78,7 +78,10 @@ class WorkerDecommissionSuite extends SparkFunSuite with LocalSparkContext {
     val execs = sched.getExecutorIds()
     // Make the executors decommission, finish, exit, and not be replaced.
     val execsAndDecomInfo = execs.map((_, ExecutorDecommissionInfo("", None))).toArray
-    sched.decommissionExecutors(execsAndDecomInfo, adjustTargetNumExecutors = true)
+    sched.decommissionExecutors(
+      execsAndDecomInfo,
+      adjustTargetNumExecutors = true,
+      triggeredByExecutor = false)
     val asyncCountResult = ThreadUtils.awaitResult(asyncCount, 20.seconds)
     assert(asyncCountResult === 10)
   }
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala
index dcf313f671d5e..bb685cd353ddc 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala
@@ -40,6 +40,46 @@ class BlockManagerDecommissionIntegrationSuite extends SparkFunSuite with LocalS
   val TaskEnded = "TASK_ENDED"
   val JobEnded = "JOB_ENDED"
 
+  Seq(false, true).foreach { isEnabled =>
+    test(s"SPARK-32850: BlockManager decommission should respect the configuration " +
+      s"(enabled=${isEnabled})") {
+      val conf = new SparkConf()
+        .setAppName("test-blockmanager-decommissioner")
+        .setMaster("local-cluster[2, 1, 1024]")
+        .set(config.DECOMMISSION_ENABLED, true)
+        .set(config.STORAGE_DECOMMISSION_ENABLED, isEnabled)
+      sc = new SparkContext(conf)
+      TestUtils.waitUntilExecutorsUp(sc, 2, 6000)
+      val executors = sc.getExecutorIds().toArray
+      val decommissionListener = new SparkListener {
+        override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
+          // ensure Tasks launched at executors before they're marked as decommissioned by driver
+          Thread.sleep(3000)
+          sc.schedulerBackend.asInstanceOf[StandaloneSchedulerBackend]
+            .decommissionExecutors(
+              executors.map { id => (id, ExecutorDecommissionInfo("test")) },
+              true,
+              false)
+        }
+      }
+      sc.addSparkListener(decommissionListener)
+
+      val decommissionStatus: Seq[Boolean] = sc.parallelize(1 to 100, 2).mapPartitions { _ =>
+        val startTime = System.currentTimeMillis()
+        while (SparkEnv.get.blockManager.decommissioner.isEmpty &&
+          // wait at most 6 seconds for BlockManager to start to decommission (if enabled)
+          System.currentTimeMillis() - startTime < 6000) {
+          Thread.sleep(300)
+        }
+        val blockManagerDecommissionStatus =
+          if (SparkEnv.get.blockManager.decommissioner.isEmpty) false else true
+        Iterator.single(blockManagerDecommissionStatus)
+      }.collect()
+      assert(decommissionStatus.forall(_ == isEnabled))
+      sc.removeSparkListener(decommissionListener)
+    }
+  }
+
   testRetry(s"verify that an already running task which is going to cache data succeeds " +
     s"on a decommissioned executor after task start") {
     runDecomTest(true, false, TaskStarted)
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
index cdde8411d8b7b..9d7db04bb72b0 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
@@ -40,9 +40,7 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite =>
       mainClass = "",
       expectedLogOnCompletion = Seq(
         "Finished waiting, stopping Spark",
-        "Received decommission executor message",
-        "Acknowledged decommissioning block manager",
-        ": Executor decommission.",
+        "Decommission executors",
         "Final accumulator value is: 100"),
       appArgs = Array.empty[String],
       driverPodChecker = doBasicDriverPyPodCheck,
@@ -73,9 +71,7 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite =>
       mainClass = "",
       expectedLogOnCompletion = Seq(
         "Finished waiting, stopping Spark",
-        "Received decommission executor message",
-        "Acknowledged decommissioning block manager",
-        ": Executor decommission."),
+        "Decommission executors"),
       appArgs = Array.empty[String],
       driverPodChecker = doBasicDriverPyPodCheck,
       executorPodChecker = doBasicExecutorPyPodCheck,
@@ -110,9 +106,7 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite =>
       mainClass = "",
       expectedLogOnCompletion = Seq(
         "Finished waiting, stopping Spark",
-        "Received decommission executor message",
-        "Acknowledged decommissioning block manager",
-        ": Executor decommission."),
+        "Decommission executors"),
       appArgs = Array.empty[String],
       driverPodChecker = doBasicDriverPyPodCheck,
       executorPodChecker = doBasicExecutorPyPodCheck,
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
index f1870718c6730..293498ae5c37b 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.streaming.scheduler
 
-import org.mockito.ArgumentMatchers.{eq => meq}
+import org.mockito.ArgumentMatchers.{any, eq => meq}
 import org.mockito.Mockito.{never, reset, times, verify, when}
 import org.scalatest.{BeforeAndAfterEach, PrivateMethodTester}
 import org.scalatest.concurrent.Eventually.{eventually, timeout}
@@ -101,12 +101,12 @@ class ExecutorAllocationManagerSuite extends TestSuiteBase
           val decomInfo = ExecutorDecommissionInfo("spark scale down", None)
           if (decommissioning) {
             verify(allocationClient, times(1)).decommissionExecutor(
-              meq(expectedExec.get), meq(decomInfo), meq(true))
+              meq(expectedExec.get), meq(decomInfo), meq(true), any())
             verify(allocationClient, never).killExecutor(meq(expectedExec.get))
           } else {
             verify(allocationClient, times(1)).killExecutor(meq(expectedExec.get))
             verify(allocationClient, never).decommissionExecutor(
-              meq(expectedExec.get), meq(decomInfo), meq(true))
+              meq(expectedExec.get), meq(decomInfo), meq(true), any())
           }
         } else {
           if (decommissioning) {

From e21bb710e5473831ca7f1aba6081a217067789a8 Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Fri, 23 Oct 2020 05:52:38 +0000
Subject: [PATCH 0311/1009] [SPARK-32991][SQL] Use conf in shared state as the
 original configuraion for RESET
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

####  case

the case here covers the static and dynamic SQL configs behavior in `sharedState` and `sessionState`,  and the specially handled config `spark.sql.warehouse.dir`
the case can be found here - https://github.com/yaooqinn/sugar/blob/master/src/main/scala/com/netease/mammut/spark/training/sql/WarehouseSCBeforeSS.scala

```scala

import java.lang.reflect.Field

import org.apache.spark.sql.SparkSession
import org.apache.spark.{SparkConf, SparkContext}

object WarehouseSCBeforeSS extends App {

  val wh = "spark.sql.warehouse.dir"
  val td = "spark.sql.globalTempDatabase"
  val custom = "spark.sql.custom"

  val conf = new SparkConf()
    .setMaster("local")
    .setAppName("SPARK-32991")
    .set(wh, "./data1")
    .set(td, "bob")

  val sc = new SparkContext(conf)

  val spark = SparkSession.builder()
    .config(wh, "./data2")
    .config(td, "alice")
    .config(custom, "kyao")
    .getOrCreate()

  val confField: Field = spark.sharedState.getClass.getDeclaredField("conf")
  confField.setAccessible(true)
  private val shared: SparkConf = confField.get(spark.sharedState).asInstanceOf[SparkConf]
  println()
  println(s"=====> SharedState: $wh=${shared.get(wh)}")
  println(s"=====> SharedState: $td=${shared.get(td)}")
  println(s"=====> SharedState: $custom=${shared.get(custom, "")}")

  println(s"=====> SessionState: $wh=${spark.conf.get(wh)}")
  println(s"=====> SessionState: $td=${spark.conf.get(td)}")
  println(s"=====> SessionState: $custom=${spark.conf.get(custom, "")}")

  val spark2 = SparkSession.builder().config(td, "fred").getOrCreate()

  println(s"=====> SessionState 2: $wh=${spark2.conf.get(wh)}")
  println(s"=====> SessionState 2: $td=${spark2.conf.get(td)}")
  println(s"=====> SessionState 2: $custom=${spark2.conf.get(custom, "")}")

  SparkSession.setActiveSession(spark)
  spark.sql("RESET")

  println(s"=====> SessionState RESET: $wh=${spark.conf.get(wh)}")
  println(s"=====> SessionState RESET: $td=${spark.conf.get(td)}")
  println(s"=====> SessionState RESET: $custom=${spark.conf.get(custom, "")}")

  val spark3 = SparkSession.builder().getOrCreate()

  println(s"=====> SessionState 3: $wh=${spark2.conf.get(wh)}")
  println(s"=====> SessionState 3: $td=${spark2.conf.get(td)}")
  println(s"=====> SessionState 3: $custom=${spark2.conf.get(custom, "")}")
}
```

#### outputs and analysis
```
// 1. Make the cloned spark conf in shared state respect the warehouse dir from the 1st SparkSession
//=====> SharedState: spark.sql.warehouse.dir=./data1
// 2. ⏬
//=====> SharedState: spark.sql.globalTempDatabase=alice
//=====> SharedState: spark.sql.custom=kyao
//=====> SessionState: spark.sql.warehouse.dir=./data2
//=====> SessionState: spark.sql.globalTempDatabase=alice
//=====> SessionState: spark.sql.custom=kyao
//=====> SessionState 2: spark.sql.warehouse.dir=./data2
//=====> SessionState 2: spark.sql.globalTempDatabase=alice
//=====> SessionState 2: spark.sql.custom=kyao
// 2'.🔼 OK until here
// 3. Make the below 3 ones respect the cloned spark conf in shared state with issue 1 fixed
//=====> SessionState RESET: spark.sql.warehouse.dir=./data1
//=====> SessionState RESET: spark.sql.globalTempDatabase=bob
//=====> SessionState RESET: spark.sql.custom=
// 4. Then the SparkSessions created after RESET will be corrected.
//=====> SessionState 3: spark.sql.warehouse.dir=./data1
//=====> SessionState 3: spark.sql.globalTempDatabase=bob
//=====> SessionState 3: spark.sql.custom=
```

In this PR, we gather all valid config to the cloned conf of `sharedState` during being constructed, well, actually only `spark.sql.warehouse.dir` is missing. Then we use this conf as defaults for `RESET` Command.

`SparkSession.clearActiveSession/clearDefaultSession` will make the shared state invisible and unsharable. They will be internal only soon (confirmed with Wenchen), so cases with them called will not be a problem.

### Why are the changes needed?

bugfix for programming API to call RESET while users creating SparkContext first and config SparkSession later.

### Does this PR introduce _any_ user-facing change?

yes, before this change when you use programming API and call RESET, all configs will be reset to  SparkContext.conf, now they go to SparkSession.sharedState.conf

### How was this patch tested?

new tests

Closes #30045 from yaooqinn/SPARK-32991.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/execution/command/SetCommand.scala    |  2 +-
 .../spark/sql/internal/SharedState.scala      | 15 ++++--
 .../spark/sql/SparkSessionBuilderSuite.scala  | 54 +++++++++++++++++++
 .../spark/sql/hive/HiveSharedStateSuite.scala | 46 +++++++++++-----
 4 files changed, 99 insertions(+), 18 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
index fd89e361fe3d1..61ee6d7f4a299 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
@@ -172,7 +172,7 @@ object SetCommand {
 case class ResetCommand(config: Option[String]) extends RunnableCommand with IgnoreCachedData {
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
-    val defaults = sparkSession.sparkContext.conf
+    val defaults = sparkSession.sharedState.conf
     config match {
       case Some(key) =>
         sparkSession.conf.unset(key)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index ce4385d88f1e9..1acdc4bd5f0e3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -55,10 +55,11 @@ private[sql] class SharedState(
 
   SharedState.setFsUrlStreamHandlerFactory(sparkContext.conf, sparkContext.hadoopConfiguration)
 
-  private val (conf, hadoopConf) = {
+  private[sql] val (conf, hadoopConf) = {
     // Load hive-site.xml into hadoopConf and determine the warehouse path which will be set into
     // both spark conf and hadoop conf avoiding be affected by any SparkSession level options
-    SharedState.loadHiveConfFile(sparkContext.conf, sparkContext.hadoopConfiguration)
+    SharedState.loadHiveConfFile(
+      sparkContext.conf, sparkContext.hadoopConfiguration, initialConfigs)
     val confClone = sparkContext.conf.clone()
     val hadoopConfClone = new Configuration(sparkContext.hadoopConfiguration)
     // If `SparkSession` is instantiated using an existing `SparkContext` instance and no existing
@@ -227,7 +228,8 @@ object SharedState extends Logging {
    */
   def loadHiveConfFile(
       sparkConf: SparkConf,
-      hadoopConf: Configuration): Unit = {
+      hadoopConf: Configuration,
+      initialConfigs: scala.collection.Map[String, String] = Map.empty): Unit = {
     val hiveWarehouseKey = "hive.metastore.warehouse.dir"
     val configFile = Utils.getContextOrSparkClassLoader.getResource("hive-site.xml")
     if (configFile != null) {
@@ -238,11 +240,13 @@ object SharedState extends Logging {
         hadoopConf.setIfUnset(entry.getKey, entry.getValue)
       }
     }
+    val sparkWarehouseOption =
+      initialConfigs.get(WAREHOUSE_PATH.key).orElse(sparkConf.getOption(WAREHOUSE_PATH.key))
     // hive.metastore.warehouse.dir only stay in hadoopConf
     sparkConf.remove(hiveWarehouseKey)
     // Set the Hive metastore warehouse path to the one we use
     val hiveWarehouseDir = hadoopConf.get(hiveWarehouseKey)
-    val warehousePath = if (hiveWarehouseDir != null && !sparkConf.contains(WAREHOUSE_PATH.key)) {
+    val warehousePath = if (hiveWarehouseDir != null && sparkWarehouseOption.isEmpty) {
       // If hive.metastore.warehouse.dir is set and spark.sql.warehouse.dir is not set,
       // we will respect the value of hive.metastore.warehouse.dir.
       sparkConf.set(WAREHOUSE_PATH.key, hiveWarehouseDir)
@@ -254,9 +258,10 @@ object SharedState extends Logging {
       // the value of spark.sql.warehouse.dir.
       // When neither spark.sql.warehouse.dir nor hive.metastore.warehouse.dir is set
       // we will set hive.metastore.warehouse.dir to the default value of spark.sql.warehouse.dir.
-      val sparkWarehouseDir = sparkConf.get(WAREHOUSE_PATH)
+      val sparkWarehouseDir = sparkWarehouseOption.getOrElse(WAREHOUSE_PATH.defaultValueString)
       logInfo(s"Setting $hiveWarehouseKey ('$hiveWarehouseDir') to the value of " +
         s"${WAREHOUSE_PATH.key} ('$sparkWarehouseDir').")
+      sparkConf.set(WAREHOUSE_PATH.key, sparkWarehouseDir)
       hadoopConf.set(hiveWarehouseKey, sparkWarehouseDir)
       sparkWarehouseDir
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
index e1f7b6f455e14..23695af0f59c1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
@@ -300,4 +300,58 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach {
       session.stop()
     }
   }
+
+  test("SPARK-32991: Use conf in shared state as the original configuration for RESET") {
+    val wh = "spark.sql.warehouse.dir"
+    val td = "spark.sql.globalTempDatabase"
+    val custom = "spark.sql.custom"
+
+    val conf = new SparkConf()
+      .setMaster("local")
+      .setAppName("SPARK-32991")
+      .set(wh, "./data1")
+      .set(td, "bob")
+
+    val sc = new SparkContext(conf)
+
+    val spark = SparkSession.builder()
+      .config(wh, "./data2")
+      .config(td, "alice")
+      .config(custom, "kyao")
+      .getOrCreate()
+
+    // When creating the first session like above, we will update the shared spark conf to the
+    // newly specified values
+    val sharedWH = spark.sharedState.conf.get(wh)
+    val sharedTD = spark.sharedState.conf.get(td)
+    val sharedCustom = spark.sharedState.conf.get(custom)
+    assert(sharedWH === "./data2",
+      "The warehouse dir in shared state should be determined by the 1st created spark session")
+    assert(sharedTD === "alice",
+      "Static sql configs in shared state should be determined by the 1st created spark session")
+    assert(sharedCustom === "kyao",
+      "Dynamic sql configs in shared state should be determined by the 1st created spark session")
+
+    assert(spark.conf.get(wh) === sharedWH,
+      "The warehouse dir in session conf and shared state conf should be consistent")
+    assert(spark.conf.get(td) === sharedTD,
+      "Static sql configs in session conf and shared state conf should be consistent")
+    assert(spark.conf.get(custom) === sharedCustom,
+      "Dynamic sql configs in session conf and shared state conf should be consistent before" +
+        " setting to new ones")
+
+    spark.sql("RESET")
+
+    assert(spark.conf.get(wh) === sharedWH,
+      "The warehouse dir in shared state should be respect after RESET")
+    assert(spark.conf.get(td) === sharedTD,
+      "Static sql configs in shared state should be respect after RESET")
+    assert(spark.conf.get(custom) === sharedCustom,
+      "Dynamic sql configs in shared state should be respect after RESET")
+
+    val spark2 = SparkSession.builder().getOrCreate()
+    assert(spark2.conf.get(wh) === sharedWH)
+    assert(spark2.conf.get(td) === sharedTD)
+    assert(spark2.conf.get(custom) === sharedCustom)
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSharedStateSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSharedStateSuite.scala
index 78535b094b83d..d2d4546ea18ea 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSharedStateSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSharedStateSuite.scala
@@ -20,35 +20,46 @@ package org.apache.spark.sql.hive
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 
 import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
-import org.apache.spark.sql.internal.SharedState
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.internal.StaticSQLConf._
 import org.apache.spark.util.Utils
 
 class HiveSharedStateSuite extends SparkFunSuite {
 
+  override def beforeEach(): Unit = {
+    SparkSession.clearActiveSessionInternal()
+    SparkSession.clearDefaultSession()
+    super.beforeEach()
+  }
+
   test("initial configs should be passed to SharedState but not SparkContext") {
     val conf = new SparkConf().setMaster("local").setAppName("SharedState Test")
     val sc = SparkContext.getOrCreate(conf)
+    val wareHouseDir = Utils.createTempDir().toString
     val invalidPath = "invalid/path"
     val metastorePath = Utils.createTempDir()
     val tmpDb = "tmp_db"
 
     // The initial configs used to generate SharedState, none of these should affect the global
-    // shared SparkContext's configurations. Especially, all these configs are passed to the cloned
-    // confs inside SharedState except metastore warehouse dir.
+    // shared SparkContext's configurations, except spark.sql.warehouse.dir.
+    // Especially, all these configs are passed to the cloned confs inside SharedState for sharing
+    // cross sessions.
     val initialConfigs = Map("spark.foo" -> "bar",
-      WAREHOUSE_PATH.key -> invalidPath,
-      ConfVars.METASTOREWAREHOUSE.varname -> invalidPath,
+      WAREHOUSE_PATH.key -> wareHouseDir,
+      ConfVars.METASTOREWAREHOUSE.varname -> wareHouseDir,
       CATALOG_IMPLEMENTATION.key -> "hive",
       ConfVars.METASTORECONNECTURLKEY.varname ->
         s"jdbc:derby:;databaseName=$metastorePath/metastore_db;create=true",
       GLOBAL_TEMP_DATABASE.key -> tmpDb)
 
-    val state = new SharedState(sc, initialConfigs)
-    assert(sc.conf.get(WAREHOUSE_PATH.key) !== invalidPath,
-      "warehouse conf in session options can't affect application wide spark conf")
-    assert(sc.hadoopConfiguration.get(ConfVars.METASTOREWAREHOUSE.varname) !== invalidPath,
-      "warehouse conf in session options can't affect application wide hadoop conf")
+    val builder = SparkSession.builder()
+    initialConfigs.foreach { case (k, v) => builder.config(k, v) }
+    val ss = builder.getOrCreate()
+    val state = ss.sharedState
+    assert(sc.conf.get(WAREHOUSE_PATH.key) === wareHouseDir,
+      "initial warehouse conf in session options can affect application wide spark conf")
+    assert(sc.hadoopConfiguration.get(ConfVars.METASTOREWAREHOUSE.varname) === wareHouseDir,
+      "initial warehouse conf in session options can affect application wide hadoop conf")
 
     assert(!state.sparkContext.conf.contains("spark.foo"),
       "static spark conf should not be affected by session")
@@ -57,9 +68,20 @@ class HiveSharedStateSuite extends SparkFunSuite {
     val client = state.externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog].client
     assert(client.getConf("spark.foo", "") === "bar",
       "session level conf should be passed to catalog")
-    assert(client.getConf(ConfVars.METASTOREWAREHOUSE.varname, invalidPath) !== invalidPath,
-      "session level conf should be passed to catalog except warehouse dir")
+    assert(client.getConf(ConfVars.METASTOREWAREHOUSE.varname, "") === wareHouseDir,
+      "session level conf should be passed to catalog")
 
     assert(state.globalTempViewManager.database === tmpDb)
+
+   val ss2 =
+     builder.config("spark.foo", "bar2222").config(WAREHOUSE_PATH.key, invalidPath).getOrCreate()
+
+    assert(ss2.sparkContext.conf.get(WAREHOUSE_PATH.key) !== invalidPath,
+      "warehouse conf in session options can't affect application wide spark conf")
+    assert(ss2.sparkContext.hadoopConfiguration.get(ConfVars.METASTOREWAREHOUSE.varname) !==
+      invalidPath, "warehouse conf in session options can't affect application wide hadoop conf")
+    assert(ss.conf.get("spark.foo") === "bar2222", "session level conf should be passed to catalog")
+    assert(ss.conf.get(WAREHOUSE_PATH) !== invalidPath,
+      "session level conf should be passed to catalog")
   }
 }

From 5e5b48d9a8a65c23d5abd0ea973e9d515731f17e Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Thu, 22 Oct 2020 22:53:24 -0700
Subject: [PATCH 0312/1009] [SPARK-33226][BUILD] Upgrade to SBT 1.4.1

### What changes were proposed in this pull request?

This PR aims to upgrade SBT from 1.4.0 to 1.4.1.

### Why are the changes needed?

SBT 1.4.1 is a maintenance release at 1.4.x line. There are many bug fixes already.
- https://github.com/sbt/sbt/releases/tag/v1.4.1 (Released on 2020-10-19)

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CI and check [the Jenkins log](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/130185/testReport).

```
========================================================================
Building Spark
========================================================================
[info] Building Spark using SBT with these arguments:  -Phadoop-3.2 -Phive-2.3 -Phive -Pspark-ganglia-lgpl -Pkinesis-asl -Pyarn -Phadoop-cloud -Phive-thriftserver -Pkubernetes -Pmesos test:package streaming-kinesis-asl-assembly/assembly
Using /usr/java/jdk1.8.0_191 as default JAVA_HOME.
Note, this will be overridden by -java-home if it is set.
Attempting to fetch sbt
Launching sbt from build/sbt-launch-1.4.1.jar
[info] [launcher] getting org.scala-sbt sbt 1.4.1  (this may take some time)...
downloading https://repo1.maven.org/maven2/org/scala-sbt/sbt/1.4.1/sbt-1.4.1.jar ...
```

Closes #30137 from dongjoon-hyun/SBT_1.4.1.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Liang-Chi Hsieh <viirya@gmail.com>
---
 project/build.properties | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/project/build.properties b/project/build.properties
index e391883fbbc2d..d70d98448e4ca 100644
--- a/project/build.properties
+++ b/project/build.properties
@@ -14,4 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-sbt.version=1.4.0
+sbt.version=1.4.1

From 10bd42cd475eea8d5e5689e770e4773cebf62374 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Fri, 23 Oct 2020 19:19:02 +0900
Subject: [PATCH 0313/1009] [SPARK-33104][BUILD] Exclude
 'org.apache.hadoop:hadoop-yarn-server-resourcemanager:jar:tests'

### What changes were proposed in this pull request?

This PR proposes to exclude `org.apache.hadoop:hadoop-yarn-server-resourcemanager:jar:tests` from `hadoop-yarn-server-tests` when we use Hadoop 2 profile.

For some reasons, after SBT 1.3 upgrade at SPARK-21708, SBT starts to pull the dependencies of 'hadoop-yarn-server-tests'  with 'tests' classifier:

```
org/apache/hadoop/hadoop-common/2.7.4/hadoop-common-2.7.4-tests.jar
org/apache/hadoop/hadoop-yarn-common/2.7.4/hadoop-yarn-common-2.7.4-tests.jar
org/apache/hadoop/hadoop-yarn-server-resourcemanager/2.7.4/hadoop-yarn-server-resourcemanager-2.7.4-tests.jar
```
these were not pulled before the upgrade.

This specific `hadoop-yarn-server-resourcemanager-2.7.4-tests.jar` causes the problem (SPARK-33104)

1. When the test case creates the Hadoop configuration here,
    https://github.com/apache/spark/blob/cc06266ade5a4eb35089501a3b32736624208d4c/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala#L122

2. Such jars above have higher precedence in the class path, instead of the specified custom `core-site.xml` in the test:

    https://github.com/apache/spark/blob/e93b8f02cd706bedc47c9b55a73f632fe9e61ec3/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala#L1375

3. Later, `core-site.xml` in the jar is picked instead in Hadoop's `Configuration`:

    Before this fix:

    ```
    jar:file:/.../https/maven-central.storage-download.googleapis.com/maven2/org/apache/hadoop/
    hadoop-yarn-server-resourcemanager/2.7.4/hadoop-yarn-server-resourcemanager-2.7.4-tests.jar!/core-site.xml
    ```

    After this fix:

    ```
    file:/.../spark/resource-managers/yarn/target/org.apache.spark.deploy.yarn.YarnClusterSuite/
    org.apache.spark.deploy.yarn.YarnClusterSuite-localDir-nm-0_0/
    usercache/.../filecache/10/__spark_conf__.zip/__hadoop_conf__/core-site.xml
    ```

4. the `core-site.xml` in the jar of course does not contain:

    https://github.com/apache/spark/blob/2cfd215dc4fb1ff6865644fec8284ba93dcddd5c/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala#L133-L141

    and the specific test fails.

This PR uses some kind of hacky approach. It was excluded from  'hadoop-yarn-server-tests'  with 'tests' classifier, and then added back as a proper dependency (when Hadoop 2 profile is used). In this way, SBT does not pull `hadoop-yarn-server-resourcemanager` with `tests` classifier anymore.

### Why are the changes needed?

To make the build pass. This is a blocker.

### Does this PR introduce _any_ user-facing change?

No, test-only.

### How was this patch tested?

Manually tested and debugged:

```bash
build/sbt clean "yarn/testOnly *.YarnClusterSuite -- -z SparkHadoopUtil" -Pyarn -Phadoop-2.7 -Phive -Phive-2.3
```

Closes #30133 from HyukjinKwon/SPARK-33104.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 pom.xml                        | 16 ++++++++++++++++
 resource-managers/yarn/pom.xml |  9 +++++++++
 2 files changed, 25 insertions(+)

diff --git a/pom.xml b/pom.xml
index 8b2130f2d9f56..2c6f458ee25fd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1399,8 +1399,24 @@
             <groupId>com.sun.jersey.contribs</groupId>
             <artifactId>*</artifactId>
           </exclusion>
+          <exclusion>
+            <groupId>org.apache.hadoop</groupId>
+            <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
+          </exclusion>
         </exclusions>
       </dependency>
+      <!--
+        Hack to exclude org.apache.hadoop:hadoop-yarn-server-resourcemanager:jar:tests.
+        For some reasons, SBT starts to pull the dependencies of 'hadoop-yarn-server-tests' above
+        with 'tests' classifier after upgrading SBT 1.3 (SPARK-21708). Otherwise, some tests might
+        fail, see also SPARK-33104.
+      -->
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
+        <version>${yarn.version}</version>
+        <scope>test</scope>
+      </dependency>
       <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-yarn-server-web-proxy</artifactId>
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index da715c6bdc59f..f6d6ddccc99c3 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -65,6 +65,15 @@
           <classifier>tests</classifier>
           <scope>test</scope>
         </dependency>
+        <!--
+          Hack to exclude org.apache.hadoop:hadoop-yarn-server-resourcemanager:jar:tests.
+          See the parent pom.xml for more details.
+        -->
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
+          <scope>test</scope>
+        </dependency>
       </dependencies>
     </profile>
   </profiles>

From 82d500a05cb81019107376e5a9e7d1d3d27ff808 Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Fri, 23 Oct 2020 13:34:33 -0700
Subject: [PATCH 0314/1009] [SPARK-33193][SQL][TEST] Hive ThriftServer JDBC
 Database MetaData API Behavior Auditing

### What changes were proposed in this pull request?

Add a test case to audit all JDBC metadata behaviors to check and prevent potential APIs silent changing from both the upstream hive-jdbc module or the Spark thrift server side.

Forked from my kyuubi project here https://github.com/yaooqinn/kyuubi/blob/master/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/operation/SparkOperationSuite.scala

### Why are the changes needed?

Make the SparkThriftServer safer to evolve.

### Does this PR introduce _any_ user-facing change?

dev only

### How was this patch tested?

new tests

Closes #30101 from yaooqinn/SPARK-33193.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../SparkMetadataOperationSuite.scala         | 206 +++++++++++++++++-
 1 file changed, 205 insertions(+), 1 deletion(-)

diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
index b94d819326d16..b413b46adcaa1 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
@@ -17,12 +17,15 @@
 
 package org.apache.spark.sql.hive.thriftserver
 
-import java.sql.{DatabaseMetaData, ResultSet}
+import java.sql.{DatabaseMetaData, ResultSet, SQLFeatureNotSupportedException}
 
+import org.apache.hive.common.util.HiveVersionInfo
 import org.apache.hive.service.cli.HiveSQLException
 
+import org.apache.spark.SPARK_VERSION
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.types._
+import org.apache.spark.util.VersionUtils
 
 class SparkMetadataOperationSuite extends HiveThriftJdbcTest {
 
@@ -396,4 +399,205 @@ class SparkMetadataOperationSuite extends HiveThriftJdbcTest {
       }
     }
   }
+
+  test("Hive ThriftServer JDBC Database MetaData API Auditing - Method not supported") {
+    // These APIs belong to the upstream Apache Hive's hive-jdbc artifact where defines the hive
+    // behavior. Users can also use it to interact with Spark ThriftServer directly. Some behaviors
+    // are not fully consistent with Spark e.g. we support correlated subqueries but the hive-jdbc
+    // now fail directly at client side. There is nothing we can do but accept the current
+    // condition and highlight the difference and make it perspective in future changes both from
+    // upstream and inside Spark.
+    withJdbcStatement() { statement =>
+      val metaData = statement.getConnection.getMetaData
+      Seq(
+        () => metaData.allProceduresAreCallable,
+        () => metaData.getURL,
+        () => metaData.getUserName,
+        () => metaData.isReadOnly,
+        () => metaData.nullsAreSortedHigh,
+        () => metaData.nullsAreSortedLow,
+        () => metaData.nullsAreSortedAtStart,
+        () => metaData.nullsAreSortedAtEnd,
+        () => metaData.usesLocalFiles,
+        () => metaData.usesLocalFilePerTable,
+        () => metaData.supportsMixedCaseIdentifiers,
+        () => metaData.supportsMixedCaseQuotedIdentifiers,
+        () => metaData.storesUpperCaseIdentifiers,
+        () => metaData.storesUpperCaseQuotedIdentifiers,
+        () => metaData.storesLowerCaseIdentifiers,
+        () => metaData.storesLowerCaseQuotedIdentifiers,
+        () => metaData.storesMixedCaseIdentifiers,
+        () => metaData.storesMixedCaseQuotedIdentifiers,
+        () => metaData.getSQLKeywords,
+        () => metaData.nullPlusNonNullIsNull,
+        () => metaData.supportsConvert,
+        () => metaData.supportsTableCorrelationNames,
+        () => metaData.supportsDifferentTableCorrelationNames,
+        () => metaData.supportsExpressionsInOrderBy,
+        () => metaData.supportsOrderByUnrelated,
+        () => metaData.supportsGroupByUnrelated,
+        () => metaData.supportsGroupByBeyondSelect,
+        () => metaData.supportsLikeEscapeClause,
+        () => metaData.supportsMultipleTransactions,
+        () => metaData.supportsMinimumSQLGrammar,
+        () => metaData.supportsCoreSQLGrammar,
+        () => metaData.supportsExtendedSQLGrammar,
+        () => metaData.supportsANSI92EntryLevelSQL,
+        () => metaData.supportsANSI92IntermediateSQL,
+        () => metaData.supportsANSI92FullSQL,
+        () => metaData.supportsIntegrityEnhancementFacility,
+        () => metaData.isCatalogAtStart,
+        () => metaData.supportsSubqueriesInComparisons,
+        () => metaData.supportsSubqueriesInExists,
+        () => metaData.supportsSubqueriesInIns,
+        () => metaData.supportsSubqueriesInQuantifieds,
+        // Spark support this, see https://issues.apache.org/jira/browse/SPARK-18455
+        () => metaData.supportsCorrelatedSubqueries,
+        () => metaData.supportsOpenCursorsAcrossCommit,
+        () => metaData.supportsOpenCursorsAcrossRollback,
+        () => metaData.supportsOpenStatementsAcrossCommit,
+        () => metaData.supportsOpenStatementsAcrossRollback,
+        () => metaData.getMaxBinaryLiteralLength,
+        () => metaData.getMaxCharLiteralLength,
+        () => metaData.getMaxColumnsInGroupBy,
+        () => metaData.getMaxColumnsInIndex,
+        () => metaData.getMaxColumnsInOrderBy,
+        () => metaData.getMaxColumnsInSelect,
+        () => metaData.getMaxColumnsInTable,
+        () => metaData.getMaxConnections,
+        () => metaData.getMaxCursorNameLength,
+        () => metaData.getMaxIndexLength,
+        () => metaData.getMaxSchemaNameLength,
+        () => metaData.getMaxProcedureNameLength,
+        () => metaData.getMaxCatalogNameLength,
+        () => metaData.getMaxRowSize,
+        () => metaData.doesMaxRowSizeIncludeBlobs,
+        () => metaData.getMaxStatementLength,
+        () => metaData.getMaxStatements,
+        () => metaData.getMaxTableNameLength,
+        () => metaData.getMaxTablesInSelect,
+        () => metaData.getMaxUserNameLength,
+        () => metaData.supportsTransactionIsolationLevel(1),
+        () => metaData.supportsDataDefinitionAndDataManipulationTransactions,
+        () => metaData.supportsDataManipulationTransactionsOnly,
+        () => metaData.dataDefinitionCausesTransactionCommit,
+        () => metaData.dataDefinitionIgnoredInTransactions,
+        () => metaData.getColumnPrivileges("", "%", "%", "%"),
+        () => metaData.getTablePrivileges("", "%", "%"),
+        () => metaData.getBestRowIdentifier("", "%", "%", 0, true),
+        () => metaData.getVersionColumns("", "%", "%"),
+        () => metaData.getExportedKeys("", "default", ""),
+        () => metaData.supportsResultSetConcurrency(ResultSet.TYPE_FORWARD_ONLY, 2),
+        () => metaData.ownUpdatesAreVisible(ResultSet.TYPE_FORWARD_ONLY),
+        () => metaData.ownDeletesAreVisible(ResultSet.TYPE_FORWARD_ONLY),
+        () => metaData.ownInsertsAreVisible(ResultSet.TYPE_FORWARD_ONLY),
+        () => metaData.othersUpdatesAreVisible(ResultSet.TYPE_FORWARD_ONLY),
+        () => metaData.othersDeletesAreVisible(ResultSet.TYPE_FORWARD_ONLY),
+        () => metaData.othersInsertsAreVisible(ResultSet.TYPE_FORWARD_ONLY),
+        () => metaData.updatesAreDetected(ResultSet.TYPE_FORWARD_ONLY),
+        () => metaData.deletesAreDetected(ResultSet.TYPE_FORWARD_ONLY),
+        () => metaData.insertsAreDetected(ResultSet.TYPE_FORWARD_ONLY),
+        () => metaData.supportsNamedParameters,
+        () => metaData.supportsMultipleOpenResults,
+        () => metaData.supportsGetGeneratedKeys,
+        () => metaData.getSuperTypes("", "%", "%"),
+        () => metaData.getSuperTables("", "%", "%"),
+        () => metaData.getAttributes("", "%", "%", "%"),
+        () => metaData.getResultSetHoldability,
+        () => metaData.locatorsUpdateCopy,
+        () => metaData.supportsStatementPooling,
+        () => metaData.getRowIdLifetime,
+        () => metaData.supportsStoredFunctionsUsingCallSyntax,
+        () => metaData.autoCommitFailureClosesAllResultSets,
+        () => metaData.getClientInfoProperties,
+        () => metaData.getFunctionColumns("", "%", "%", "%"),
+        () => metaData.getPseudoColumns("", "%", "%", "%"),
+        () => metaData.generatedKeyAlwaysReturned).foreach { func =>
+        val e = intercept[SQLFeatureNotSupportedException](func())
+        assert(e.getMessage === "Method not supported")
+      }
+    }
+  }
+
+  test("Hive ThriftServer JDBC Database MetaData API Auditing - Method supported") {
+    // These APIs belong to the upstream Apache Hive's hive-jdbc artifact where defines the hive
+    // behavior. Users can also use it to interact with Spark ThriftServer directly. Some behaviors
+    // are not fully consistent with Spark e.g. we can work with multiple catalogs.
+    // There is nothing we can do but accept the current condition and highlight the difference
+    // and make it perspective in future changes both from upstream and inside Spark.
+    withJdbcStatement() { statement =>
+      val metaData = statement.getConnection.getMetaData
+      assert(metaData.allTablesAreSelectable)
+      assert(metaData.getDatabaseProductName === "Spark SQL")
+      assert(metaData.getDatabaseProductVersion === SPARK_VERSION)
+      assert(metaData.getDriverName === "Hive JDBC")
+      assert(metaData.getDriverVersion === HiveVersionInfo.getVersion)
+      assert(metaData.getDatabaseMajorVersion === VersionUtils.majorVersion(SPARK_VERSION))
+      assert(metaData.getDatabaseMinorVersion === VersionUtils.minorVersion(SPARK_VERSION))
+      assert(metaData.getIdentifierQuoteString === " ",
+        "This method returns a space \" \" if identifier quoting is not supported")
+      assert(metaData.getNumericFunctions === "")
+      assert(metaData.getStringFunctions === "")
+      assert(metaData.getSystemFunctions === "")
+      assert(metaData.getTimeDateFunctions === "")
+      assert(metaData.getSearchStringEscape === "\\")
+      assert(metaData.getExtraNameCharacters === "")
+      assert(metaData.supportsAlterTableWithAddColumn())
+      assert(!metaData.supportsAlterTableWithDropColumn())
+      assert(metaData.supportsColumnAliasing())
+      assert(metaData.supportsGroupBy)
+      assert(!metaData.supportsMultipleResultSets)
+      assert(!metaData.supportsNonNullableColumns)
+      assert(metaData.supportsOuterJoins)
+      assert(metaData.supportsFullOuterJoins)
+      assert(metaData.supportsLimitedOuterJoins)
+      assert(metaData.getSchemaTerm === "database")
+      assert(metaData.getProcedureTerm === "UDF")
+      assert(metaData.getCatalogTerm === "instance")
+      assert(metaData.getCatalogSeparator === ".")
+      assert(metaData.supportsSchemasInDataManipulation)
+      assert(!metaData.supportsSchemasInProcedureCalls)
+      assert(metaData.supportsSchemasInTableDefinitions)
+      assert(!metaData.supportsSchemasInIndexDefinitions)
+      assert(!metaData.supportsSchemasInPrivilegeDefinitions)
+      // This is actually supported, but hive jdbc package return false
+      assert(!metaData.supportsCatalogsInDataManipulation)
+      assert(!metaData.supportsCatalogsInProcedureCalls)
+      // This is actually supported, but hive jdbc package return false
+      assert(!metaData.supportsCatalogsInTableDefinitions)
+      assert(!metaData.supportsCatalogsInIndexDefinitions)
+      assert(!metaData.supportsCatalogsInPrivilegeDefinitions)
+      assert(!metaData.supportsPositionedDelete)
+      assert(!metaData.supportsPositionedUpdate)
+      assert(!metaData.supportsSelectForUpdate)
+      assert(!metaData.supportsStoredProcedures)
+      // This is actually supported, but hive jdbc package return false
+      assert(!metaData.supportsUnion)
+      assert(metaData.supportsUnionAll)
+      assert(metaData.getMaxColumnNameLength === 128)
+      assert(metaData.getDefaultTransactionIsolation === java.sql.Connection.TRANSACTION_NONE)
+      assert(!metaData.supportsTransactions)
+      assert(!metaData.getProcedureColumns("", "%", "%", "%").next())
+      assert(!metaData.getImportedKeys("", "default", "").next())
+
+      // TODO: SPARK-33219 Disable GetPrimaryKeys and GetCrossReference APIs explicitly
+      // for Spark ThriftServer
+      assert(!metaData.getPrimaryKeys("", "default", "").next())
+      assert(!metaData.getCrossReference("", "default", "src", "", "default", "src2").next())
+
+      assert(!metaData.getIndexInfo("", "default", "src", true, true).next())
+      assert(metaData.supportsResultSetType(ResultSet.TYPE_FORWARD_ONLY))
+      assert(metaData.supportsResultSetType(ResultSet.TYPE_SCROLL_INSENSITIVE))
+      assert(metaData.supportsResultSetType(ResultSet.TYPE_SCROLL_SENSITIVE))
+      assert(!metaData.supportsBatchUpdates)
+      assert(!metaData.getUDTs(",", "%", "%", null).next())
+      assert(!metaData.supportsSavepoints)
+      assert(!metaData.supportsResultSetHoldability(ResultSet.HOLD_CURSORS_OVER_COMMIT))
+      assert(metaData.getJDBCMajorVersion === 3)
+      assert(metaData.getJDBCMinorVersion === 0)
+      assert(metaData.getSQLStateType === DatabaseMetaData.sqlStateSQL)
+      assert(metaData.getMaxLogicalLobSize === 0)
+      assert(!metaData.supportsRefCursors)
+    }
+  }
 }

From d7f15b025b16c99768516cfb7fd96ab2e6ee1c2b Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Sat, 24 Oct 2020 10:00:04 +0900
Subject: [PATCH 0315/1009] [SPARK-33003][PYTHON][DOCS] Add type hints
 guidelines to the documentation

### What changes were proposed in this pull request?

Add type hints guidelines to developer docs.

### Why are the changes needed?

Since it is a new and still somewhat evolving feature, we should provided clear guidelines for potential contributors.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Closes #30094 from zero323/SPARK-33003.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../docs/source/development/contributing.rst  | 45 ++++++++++++++++++-
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/python/docs/source/development/contributing.rst b/python/docs/source/development/contributing.rst
index cb4e47a6b4197..2b62c953e0786 100644
--- a/python/docs/source/development/contributing.rst
+++ b/python/docs/source/development/contributing.rst
@@ -77,6 +77,50 @@ There are a couple of additional notes to keep in mind when contributing to code
 * Be Pythonic.
 * APIs are matched with Scala and Java sides in general.
 * PySpark specific APIs can still be considered as long as they are Pythonic and do not conflict with other existent APIs, for example, decorator usage of UDFs.
+* If you extend or modify public API, please adjust corresponding type hints. See `Contributing and Maintaining Type Hints`_ for details.
+
+Contributing and Maintaining Type Hints
+----------------------------------------
+
+PySpark type hints are provided using stub files, placed in the same directory as the annotated module, with exception to ``# type: ignore`` in modules which don't have their own stubs (tests, examples and non-public API).
+As a rule of thumb, only public API is annotated.
+
+Annotations should, when possible:
+
+* Reflect expectations of the underlying JVM API, to help avoid type related failures outside Python interpreter.
+* In case of conflict between too broad (``Any``) and too narrow argument annotations, prefer the latter as one, as long as it is covering most of the typical use cases.
+* Indicate nonsensical combinations of arguments using ``@overload``  annotations. For example, to indicate that ``*Col`` and ``*Cols`` arguments are mutually exclusive:
+
+  .. code-block:: python
+
+    @overload
+    def __init__(
+        self,
+        *,
+        threshold: float = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...
+    ) -> None: ...
+    @overload
+    def __init__(
+        self,
+        *,
+        thresholds: Optional[List[float]] = ...,
+        inputCols: Optional[List[str]] = ...,
+        outputCols: Optional[List[str]] = ...
+    ) -> None: ...
+
+* Be compatible with the current stable MyPy release.
+
+
+Complex supporting type definitions, should be placed in dedicated ``_typing.pyi`` stubs. See for example `pyspark.sql._typing.pyi <https://github.com/apache/spark/blob/master/python/pyspark/sql/_typing.pyi>`_.
+
+Annotations can be validated using ``dev/lint-python`` script or by invoking mypy directly:
+
+.. code-block:: bash
+
+    mypy --config python/mypy.ini python/pyspark
+
 
 
 Code Style Guide
@@ -90,4 +134,3 @@ the APIs were inspired by Java. PySpark also follows `camelCase` for exposed API
 There is an exception ``functions.py`` that uses `snake_case`. It was in order to make APIs SQL (and Python) friendly.
 
 PySpark leverages linters such as `pycodestyle <https://pycodestyle.pycqa.org/en/latest/>`_ and `flake8 <https://flake8.pycqa.org/en/latest/>`_, which ``dev/lint-python`` runs. Therefore, make sure to run that script to double check.
-

From f65952772702f0a8772c93b79f562f35c337f5a5 Mon Sep 17 00:00:00 2001
From: Shiqi Sun <s.sun@salesforce.com>
Date: Sat, 24 Oct 2020 09:55:57 -0700
Subject: [PATCH 0316/1009] [SPARK-30821][K8S] Handle executor failure with
 multiple containers

Handle executor failure with multiple containers

Added a spark property spark.kubernetes.executor.checkAllContainers,
with default being false. When it's true, the executor snapshot will
take all containers in the executor into consideration when deciding
whether the executor is in "Running" state, if the pod restart policy is
"Never". Also, added the new spark property to the doc.

### What changes were proposed in this pull request?

Checking of all containers in the executor pod when reporting executor status, if the `spark.kubernetes.executor.checkAllContainers` property is set to true.

### Why are the changes needed?

Currently, a pod remains "running" as long as there is at least one running container. This prevents Spark from noticing when a container has failed in an executor pod with multiple containers. With this change, user can configure the behavior to be different. Namely, if any container in the executor pod has failed, either the executor process or one of its sidecars, the pod is considered to be failed, and it will be rescheduled.

### Does this PR introduce _any_ user-facing change?

Yes, new spark property added.
User is now able to choose whether to turn on this feature using the `spark.kubernetes.executor.checkAllContainers` property.

### How was this patch tested?

Unit test was added and all passed.
I tried to run integration test by following the instruction [here](https://spark.apache.org/developer-tools.html) (section "Testing K8S") and also [here](https://github.com/apache/spark/blob/master/resource-managers/kubernetes/integration-tests/README.md), but I wasn't able to run it smoothly as it fails to talk with minikube cluster. Maybe it's because my minikube version is too new (I'm using v1.13.1)...? Since I've been trying it for two days and still can't make it work, I decided to submit this PR and hopefully the Jenkins test will pass.

Closes #29924 from huskysun/exec-sidecar-failure.

Authored-by: Shiqi Sun <s.sun@salesforce.com>
Signed-off-by: Holden Karau <hkarau@apple.com>
---
 docs/running-on-kubernetes.md                 |  8 +++
 .../org/apache/spark/deploy/k8s/Config.scala  |  8 +++
 .../cluster/k8s/ExecutorPodsSnapshot.scala    | 16 +++++-
 .../k8s/KubernetesClusterManager.scala        |  3 +
 ...erministicExecutorPodsSnapshotsStore.scala |  2 +
 .../k8s/ExecutorLifecycleTestUtils.scala      | 32 ++++++++++-
 .../k8s/ExecutorPodsSnapshotSuite.scala       | 56 +++++++++++++------
 .../k8s/ExecutorPodsSnapshotsStoreSuite.scala |  1 +
 8 files changed, 108 insertions(+), 18 deletions(-)

diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index 3bd1c410e8433..4714e3517f16e 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -1181,6 +1181,14 @@ See the [configuration page](configuration.html) for information on Spark config
   </td>
   <td>3.0.0</td>
 </tr>
+<tr>
+  <td><code>spark.kubernetes.executor.checkAllContainers</code></td>
+  <td>false</td>
+  <td>
+  Specify whether executor pods should be check all containers (including sidecars) or only the executor container when determining the pod status.
+  </td>
+  <td>3.1.0</td>
+</tr>
 <tr>
   <td><code>spark.kubernetes.submission.connectionTimeout</code></td>
   <td>10000</td>
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index 00eaff452ba45..d399f66b45981 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -417,6 +417,14 @@ private[spark] object Config extends Logging {
       .stringConf
       .createOptional
 
+  val KUBERNETES_EXECUTOR_CHECK_ALL_CONTAINERS =
+    ConfigBuilder("spark.kubernetes.executor.checkAllContainers")
+      .doc("If set to true, all containers in the executor pod will be checked when reporting" +
+        "executor status.")
+      .version("3.1.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val KUBERNETES_DRIVER_LABEL_PREFIX = "spark.kubernetes.driver.label."
   val KUBERNETES_DRIVER_ANNOTATION_PREFIX = "spark.kubernetes.driver.annotation."
   val KUBERNETES_DRIVER_SERVICE_ANNOTATION_PREFIX = "spark.kubernetes.driver.service.annotation."
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala
index 30030ab539048..be75311bc3d4a 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala
@@ -18,6 +18,7 @@ package org.apache.spark.scheduler.cluster.k8s
 
 import java.util.Locale
 
+import io.fabric8.kubernetes.api.model.ContainerStateTerminated
 import io.fabric8.kubernetes.api.model.Pod
 
 import org.apache.spark.deploy.k8s.Constants._
@@ -37,6 +38,7 @@ private[spark] case class ExecutorPodsSnapshot(executorPods: Map[Long, ExecutorP
 }
 
 object ExecutorPodsSnapshot extends Logging {
+  private var shouldCheckAllContainers: Boolean = _
 
   def apply(executorPods: Seq[Pod]): ExecutorPodsSnapshot = {
     ExecutorPodsSnapshot(toStatesByExecutorId(executorPods))
@@ -44,6 +46,10 @@ object ExecutorPodsSnapshot extends Logging {
 
   def apply(): ExecutorPodsSnapshot = ExecutorPodsSnapshot(Map.empty[Long, ExecutorPodState])
 
+  def setShouldCheckAllContainers(watchAllContainers: Boolean): Unit = {
+    shouldCheckAllContainers = watchAllContainers
+  }
+
   private def toStatesByExecutorId(executorPods: Seq[Pod]): Map[Long, ExecutorPodState] = {
     executorPods.map { pod =>
       (pod.getMetadata.getLabels.get(SPARK_EXECUTOR_ID_LABEL).toLong, toState(pod))
@@ -59,7 +65,15 @@ object ExecutorPodsSnapshot extends Logging {
         case "pending" =>
           PodPending(pod)
         case "running" =>
-          PodRunning(pod)
+          if (shouldCheckAllContainers &&
+            "Never" == pod.getSpec.getRestartPolicy &&
+            pod.getStatus.getContainerStatuses.stream
+              .map[ContainerStateTerminated](cs => cs.getState.getTerminated)
+              .anyMatch(t => t != null && t.getExitCode != 0)) {
+            PodFailed(pod)
+          } else {
+            PodRunning(pod)
+          }
         case "failed" =>
           PodFailed(pod)
         case "succeeded" =>
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
index fcaa3687b14b4..cc5c2f4b6325d 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
@@ -95,10 +95,13 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit
     val schedulerExecutorService = ThreadUtils.newDaemonSingleThreadScheduledExecutor(
       "kubernetes-executor-maintenance")
 
+    ExecutorPodsSnapshot.setShouldCheckAllContainers(
+      sc.conf.get(KUBERNETES_EXECUTOR_CHECK_ALL_CONTAINERS))
     val subscribersExecutor = ThreadUtils
       .newDaemonThreadPoolScheduledExecutor(
         "kubernetes-executor-snapshots-subscribers", 2)
     val snapshotsStore = new ExecutorPodsSnapshotsStoreImpl(subscribersExecutor)
+
     val removedExecutorsCache = CacheBuilder.newBuilder()
       .expireAfterWrite(3, TimeUnit.MINUTES)
       .build[java.lang.Long, java.lang.Long]()
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/DeterministicExecutorPodsSnapshotsStore.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/DeterministicExecutorPodsSnapshotsStore.scala
index 6dc052a5e6894..6e989316310e6 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/DeterministicExecutorPodsSnapshotsStore.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/DeterministicExecutorPodsSnapshotsStore.scala
@@ -21,6 +21,8 @@ import scala.collection.mutable
 
 class DeterministicExecutorPodsSnapshotsStore extends ExecutorPodsSnapshotsStore {
 
+  ExecutorPodsSnapshot.setShouldCheckAllContainers(false)
+
   private val snapshotsBuffer = mutable.Buffer.empty[ExecutorPodsSnapshot]
   private val subscribers = mutable.Buffer.empty[Seq[ExecutorPodsSnapshot] => Unit]
 
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala
index 0377e54f3cd76..62c79e6f7cba5 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala
@@ -76,6 +76,33 @@ object ExecutorLifecycleTestUtils {
       .build()
   }
 
+  /**
+   * [SPARK-30821]
+   * This creates a pod with one container in running state and one container in failed
+   * state (terminated with non-zero exit code). This pod is used for unit-testing the
+   * spark.kubernetes.executor.checkAllContainers Spark Conf.
+   */
+  def runningExecutorWithFailedContainer(executorId: Long): Pod = {
+    new PodBuilder(podWithAttachedContainerForId(executorId))
+      .editOrNewStatus()
+        .withPhase("running")
+        .addNewContainerStatus()
+          .withNewState()
+            .withNewTerminated()
+              .withExitCode(1)
+            .endTerminated()
+          .endState()
+        .endContainerStatus()
+        .addNewContainerStatus()
+          .withNewState()
+            .withNewRunning()
+            .endRunning()
+          .endState()
+        .endContainerStatus()
+      .endStatus()
+      .build()
+  }
+
   def succeededExecutor(executorId: Long): Pod = {
     new PodBuilder(podWithAttachedContainerForId(executorId))
       .editOrNewStatus()
@@ -117,7 +144,10 @@ object ExecutorLifecycleTestUtils {
         .addToLabels(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID)
         .addToLabels(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)
         .addToLabels(SPARK_EXECUTOR_ID_LABEL, executorId.toString)
-        .endMetadata()
+      .endMetadata()
+      .editOrNewSpec()
+        .withRestartPolicy("Never")
+      .endSpec()
       .build()
     val container = new ContainerBuilder()
       .withName("spark-executor")
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotSuite.scala
index 6ca1733bcd32b..ad12461bfaf8c 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotSuite.scala
@@ -16,31 +16,55 @@
  */
 package org.apache.spark.scheduler.cluster.k8s
 
+import io.fabric8.kubernetes.api.model.Pod
+
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.scheduler.cluster.k8s.ExecutorLifecycleTestUtils._
 
 class ExecutorPodsSnapshotSuite extends SparkFunSuite {
 
+  def testCase(pod: Pod, state: Pod => ExecutorPodState): (Pod, ExecutorPodState) =
+    (pod, state(pod))
+
+  def doTest(testCases: Seq[(Pod, ExecutorPodState)]): Unit = {
+    val snapshot = ExecutorPodsSnapshot(testCases.map(_._1))
+    for (((_, state), i) <- testCases.zipWithIndex) {
+      assertResult(state.getClass.getName, s"executor ID $i") {
+        snapshot.executorPods(i).getClass.getName
+      }
+    }
+  }
+
   test("States are interpreted correctly from pod metadata.") {
-    val pods = Seq(
-      pendingExecutor(0),
-      runningExecutor(1),
-      succeededExecutor(2),
-      failedExecutorWithoutDeletion(3),
-      deletedExecutor(4),
-      unknownExecutor(5))
-    val snapshot = ExecutorPodsSnapshot(pods)
-    assert(snapshot.executorPods ===
-      Map(
-        0L -> PodPending(pods(0)),
-        1L -> PodRunning(pods(1)),
-        2L -> PodSucceeded(pods(2)),
-        3L -> PodFailed(pods(3)),
-        4L -> PodDeleted(pods(4)),
-        5L -> PodUnknown(pods(5))))
+    ExecutorPodsSnapshot.setShouldCheckAllContainers(false)
+    val testCases = Seq(
+      testCase(pendingExecutor(0), PodPending),
+      testCase(runningExecutor(1), PodRunning),
+      testCase(succeededExecutor(2), PodSucceeded),
+      testCase(failedExecutorWithoutDeletion(3), PodFailed),
+      testCase(deletedExecutor(4), PodDeleted),
+      testCase(unknownExecutor(5), PodUnknown)
+    )
+    doTest(testCases)
+  }
+
+  test("SPARK-30821: States are interpreted correctly from pod metadata"
+    + " when configured to check all containers.") {
+    ExecutorPodsSnapshot.setShouldCheckAllContainers(true)
+    val testCases = Seq(
+      testCase(pendingExecutor(0), PodPending),
+      testCase(runningExecutor(1), PodRunning),
+      testCase(runningExecutorWithFailedContainer(2), PodFailed),
+      testCase(succeededExecutor(3), PodSucceeded),
+      testCase(failedExecutorWithoutDeletion(4), PodFailed),
+      testCase(deletedExecutor(5), PodDeleted),
+      testCase(unknownExecutor(6), PodUnknown)
+    )
+    doTest(testCases)
   }
 
   test("Updates add new pods for non-matching ids and edit existing pods for matching ids") {
+    ExecutorPodsSnapshot.setShouldCheckAllContainers(false)
     val originalPods = Seq(
       pendingExecutor(0),
       runningExecutor(1))
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreSuite.scala
index cf54b3c4eb329..614c198bd9caf 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreSuite.scala
@@ -35,6 +35,7 @@ class ExecutorPodsSnapshotsStoreSuite extends SparkFunSuite with BeforeAndAfter
   before {
     eventBufferScheduler = new DeterministicScheduler()
     eventQueueUnderTest = new ExecutorPodsSnapshotsStoreImpl(eventBufferScheduler)
+    ExecutorPodsSnapshot.setShouldCheckAllContainers(false)
   }
 
   test("Subscribers get notified of events periodically.") {

From 0c66a88d1d1336c9f3b474622315254952cbd56e Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan.opensource@gmail.com>
Date: Sat, 24 Oct 2020 15:36:41 -0700
Subject: [PATCH 0317/1009] [SPARK-29438][SS][FOLLOWUP] Add regression tests
 for Streaming Aggregation and flatMapGroupsWithState

### What changes were proposed in this pull request?

This patch adds new UTs to prevent SPARK-29438 for streaming aggregation as well as flatMapGroupsWithState, as we agree about the review comment quote here:

https://github.com/apache/spark/pull/26162#issuecomment-576929692

> LGTM for this PR. But on a additional note, this is a very subtle and easy-to-make bug with TaskContext.getPartitionId. I wonder if this bug is present in any other stateful operation. I wonder if this bug is present in any other stateful operation. Can you please verify how partitionId is used in the other stateful operations?

For now they're not broken, but even better if we have UTs to prevent the case for the future.

### Why are the changes needed?

New UTs will prevent streaming aggregation and flatMapGroupsWithState to be broken in future where it is placed on the right side of UNION and the number of partition is changing on the left side of UNION. Please refer SPARK-29438 for more details.

### Does this PR introduce any user-facing change?

No.

### How was this patch tested?

Added UTs.

Closes #27333 from HeartSaVioR/SPARK-29438-add-regression-test.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
Signed-off-by: Liang-Chi Hsieh <viirya@gmail.com>
---
 .../FlatMapGroupsWithStateSuite.scala         | 52 ++++++++++++++++++-
 .../streaming/StreamingAggregationSuite.scala | 45 +++++++++++++++-
 .../StreamingDeduplicationSuite.scala         | 42 +++++++++++++++
 3 files changed, 137 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
index e2887e78b0508..2efd715b7731c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
@@ -26,7 +26,7 @@ import org.scalatest.exceptions.TestFailedException
 
 import org.apache.spark.SparkException
 import org.apache.spark.api.java.function.FlatMapGroupsWithStateFunction
-import org.apache.spark.sql.Encoder
+import org.apache.spark.sql.{DataFrame, Encoder}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.catalyst.plans.logical.FlatMapGroupsWithState
@@ -1020,6 +1020,56 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest {
       spark.createDataset(Seq(("a", 2), ("b", 1))).toDF)
   }
 
+  testWithAllStateVersions("SPARK-29438: ensure UNION doesn't lead (flat)MapGroupsWithState" +
+    " to use shifted partition IDs") {
+    val stateFunc = (key: String, values: Iterator[String], state: GroupState[RunningCount]) => {
+      val count = state.getOption.map(_.count).getOrElse(0L) + values.size
+      state.update(RunningCount(count))
+      (key, count.toString)
+    }
+
+    def constructUnionDf(desiredPartitionsForInput1: Int)
+      : (MemoryStream[String], MemoryStream[String], DataFrame) = {
+      val input1 = MemoryStream[String](desiredPartitionsForInput1)
+      val input2 = MemoryStream[String]
+      val df1 = input1.toDF()
+        .select($"value", $"value")
+      val df2 = input2.toDS()
+        .groupByKey(x => x)
+        .mapGroupsWithState(stateFunc) // Types = State: MyState, Out: (Str, Str)
+        .toDF()
+
+      // Unioned DF would have columns as (String, String)
+      (input1, input2, df1.union(df2))
+    }
+
+    withTempDir { checkpointDir =>
+      val (input1, input2, unionDf) = constructUnionDf(2)
+      testStream(unionDf, Update)(
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+        MultiAddData(input1, "input1-a")(input2, "input2-a"),
+        CheckNewAnswer(("input1-a", "input1-a"), ("input2-a", "1")),
+        StopStream
+      )
+
+      // We're restoring the query with different number of partitions in left side of UNION,
+      // which may lead right side of union to have mismatched partition IDs (e.g. if it relies on
+      // TaskContext.partitionId()). This test will verify (flat)MapGroupsWithState doesn't have
+      // such issue.
+
+      val (newInput1, newInput2, newUnionDf) = constructUnionDf(3)
+
+      newInput1.addData("input1-a")
+      newInput2.addData("input2-a")
+
+      testStream(newUnionDf, Update)(
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+        MultiAddData(newInput1, "input1-a")(newInput2, "input2-a", "input2-b"),
+        CheckNewAnswer(("input1-a", "input1-a"), ("input2-a", "2"), ("input2-b", "1"))
+      )
+    }
+  }
+
   testQuietly("StateStore.abort on task failure handling") {
     val stateFunc = (key: String, values: Iterator[String], state: GroupState[RunningCount]) => {
       if (FlatMapGroupsWithStateSuite.failInTask) throw new Exception("expected failure")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
index cb69460ca1580..4a57cc27b1d59 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
@@ -27,7 +27,7 @@ import org.scalatest.Assertions
 
 import org.apache.spark.{SparkEnv, SparkException}
 import org.apache.spark.rdd.BlockRDD
-import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, SparkSession}
+import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, Row, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.plans.logical.Aggregate
 import org.apache.spark.sql.catalyst.util.DateTimeConstants._
@@ -337,6 +337,49 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
     )
   }
 
+  testWithAllStateVersions("SPARK-29438: ensure UNION doesn't lead streaming aggregation to use" +
+    " shifted partition IDs") {
+    def constructUnionDf(desiredPartitionsForInput1: Int)
+      : (MemoryStream[Int], MemoryStream[Int], DataFrame) = {
+      val input1 = MemoryStream[Int](desiredPartitionsForInput1)
+      val input2 = MemoryStream[Int]
+      val df1 = input1.toDF()
+        .select($"value", $"value" + 1)
+      val df2 = input2.toDF()
+        .groupBy($"value")
+        .agg(count("*"))
+
+      // Unioned DF would have columns as (Int, Int)
+      (input1, input2, df1.union(df2))
+    }
+
+    withTempDir { checkpointDir =>
+      val (input1, input2, unionDf) = constructUnionDf(2)
+      testStream(unionDf, Update)(
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+        MultiAddData(input1, 11, 12)(input2, 21, 22),
+        CheckNewAnswer(Row(11, 12), Row(12, 13), Row(21, 1), Row(22, 1)),
+        StopStream
+      )
+
+      // We're restoring the query with different number of partitions in left side of UNION,
+      // which may lead right side of union to have mismatched partition IDs (e.g. if it relies on
+      // TaskContext.partitionId()). This test will verify streaming aggregation doesn't have
+      // such issue.
+
+      val (newInput1, newInput2, newUnionDf) = constructUnionDf(3)
+
+      newInput1.addData(11, 12)
+      newInput2.addData(21, 22)
+
+      testStream(newUnionDf, Update)(
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+        MultiAddData(newInput1, 13, 14)(newInput2, 22, 23),
+        CheckNewAnswer(Row(13, 14), Row(14, 15), Row(22, 2), Row(23, 1))
+      )
+    }
+  }
+
   testQuietlyWithAllStateVersions("midbatch failure") {
     val inputData = MemoryStream[Int]
     FailureSingleton.firstTime = true
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala
index 1f346aac8d2c2..e1505acf3ecda 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.streaming
 
 import org.scalatest.BeforeAndAfterAll
 
+import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, HashPartitioning, SinglePartition}
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.execution.streaming.{MemoryStream, StreamingDeduplicateExec}
@@ -294,4 +295,45 @@ class StreamingDeduplicationSuite extends StateStoreMetricsTest {
     testWithFlag(true)
     testWithFlag(false)
   }
+
+  test("SPARK-29438: ensure UNION doesn't lead streaming deduplication to use" +
+    " shifted partition IDs") {
+    def constructUnionDf(desiredPartitionsForInput1: Int)
+      : (MemoryStream[Int], MemoryStream[Int], DataFrame) = {
+      val input1 = MemoryStream[Int](desiredPartitionsForInput1)
+      val input2 = MemoryStream[Int]
+      val df1 = input1.toDF().select($"value")
+      val df2 = input2.toDF().dropDuplicates("value")
+
+      // Unioned DF would have columns as (Int)
+      (input1, input2, df1.union(df2))
+    }
+
+    withTempDir { checkpointDir =>
+      val (input1, input2, unionDf) = constructUnionDf(2)
+      testStream(unionDf, Append)(
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+        MultiAddData(input1, 11, 12)(input2, 21, 22),
+        CheckNewAnswer(11, 12, 21, 22),
+        StopStream
+      )
+
+      // We're restoring the query with different number of partitions in left side of UNION,
+      // which may lead right side of union to have mismatched partition IDs (e.g. if it relies on
+      // TaskContext.partitionId()). This test will verify streaming deduplication doesn't have
+      // such issue.
+
+      val (newInput1, newInput2, newUnionDf) = constructUnionDf(3)
+
+      newInput1.addData(11, 12)
+      newInput2.addData(21, 22)
+
+      testStream(newUnionDf, Append)(
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+        MultiAddData(newInput1, 13, 14)(newInput2, 22, 23),
+        CheckNewAnswer(13, 14, 23)
+      )
+    }
+  }
+
 }

From 87b498462b82fce02dd50286887092cf7858d2e8 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Sun, 25 Oct 2020 16:15:55 -0700
Subject: [PATCH 0318/1009] [SPARK-33228][SQL] Don't uncache data when
 replacing a view having the same logical plan

### What changes were proposed in this pull request?

SPARK-30494's updated the `CreateViewCommand` code to implicitly drop cache when replacing an existing view. But, this change drops cache even when replacing a view having the same logical plan. A sequence of queries to reproduce this as follows;
```
// Spark v2.4.6+
scala> val df = spark.range(1).selectExpr("id a", "id b")
scala> df.cache()
scala> df.explain()
== Physical Plan ==
*(1) ColumnarToRow
+- InMemoryTableScan [a#2L, b#3L]
      +- InMemoryRelation [a#2L, b#3L], StorageLevel(disk, memory, deserialized, 1 replicas)
            +- *(1) Project [id#0L AS a#2L, id#0L AS b#3L]
               +- *(1) Range (0, 1, step=1, splits=4)

scala> df.createOrReplaceTempView("t")
scala> sql("select * from t").explain()
== Physical Plan ==
*(1) ColumnarToRow
+- InMemoryTableScan [a#2L, b#3L]
      +- InMemoryRelation [a#2L, b#3L], StorageLevel(disk, memory, deserialized, 1 replicas)
            +- *(1) Project [id#0L AS a#2L, id#0L AS b#3L]
               +- *(1) Range (0, 1, step=1, splits=4)

// If one re-runs the same query `df.createOrReplaceTempView("t")`, the cache's swept away
scala> df.createOrReplaceTempView("t")
scala> sql("select * from t").explain()
== Physical Plan ==
*(1) Project [id#0L AS a#2L, id#0L AS b#3L]
+- *(1) Range (0, 1, step=1, splits=4)

// Until v2.4.6
scala> val df = spark.range(1).selectExpr("id a", "id b")
scala> df.cache()
scala> df.createOrReplaceTempView("t")
scala> sql("select * from t").explain()
20/10/23 22:33:42 WARN ObjectStore: Failed to get database global_temp, returning NoSuchObjectException
== Physical Plan ==
*(1) InMemoryTableScan [a#2L, b#3L]
   +- InMemoryRelation [a#2L, b#3L], StorageLevel(disk, memory, deserialized, 1 replicas)
         +- *(1) Project [id#0L AS a#2L, id#0L AS b#3L]
            +- *(1) Range (0, 1, step=1, splits=4)

scala> df.createOrReplaceTempView("t")
scala> sql("select * from t").explain()
== Physical Plan ==
*(1) InMemoryTableScan [a#2L, b#3L]
   +- InMemoryRelation [a#2L, b#3L], StorageLevel(disk, memory, deserialized, 1 replicas)
         +- *(1) Project [id#0L AS a#2L, id#0L AS b#3L]
            +- *(1) Range (0, 1, step=1, splits=4)
```

### Why are the changes needed?

bugfix.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Added tests.

Closes #30140 from maropu/FixBugInReplaceView.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/execution/command/views.scala   | 10 ++++----
 .../apache/spark/sql/CachedTableSuite.scala   | 24 +++++++++++++++++++
 2 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index 94f34a9b39b28..bcc0e1fd82d7a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -110,17 +110,19 @@ case class CreateViewCommand(
     verifyTemporaryObjectsNotExists(catalog)
 
     if (viewType == LocalTempView) {
-      if (replace && catalog.getTempView(name.table).isDefined) {
-        logDebug(s"Try to uncache ${name.quotedString} before replacing.")
+      if (replace && catalog.getTempView(name.table).isDefined &&
+          !catalog.getTempView(name.table).get.sameResult(child)) {
+        logInfo(s"Try to uncache ${name.quotedString} before replacing.")
         CommandUtils.uncacheTableOrView(sparkSession, name.quotedString)
       }
       val aliasedPlan = aliasPlan(sparkSession, analyzedPlan)
       catalog.createTempView(name.table, aliasedPlan, overrideIfExists = replace)
     } else if (viewType == GlobalTempView) {
-      if (replace && catalog.getGlobalTempView(name.table).isDefined) {
+      if (replace && catalog.getGlobalTempView(name.table).isDefined &&
+          !catalog.getGlobalTempView(name.table).get.sameResult(child)) {
         val db = sparkSession.sessionState.conf.getConf(StaticSQLConf.GLOBAL_TEMP_DATABASE)
         val globalTempView = TableIdentifier(name.table, Option(db))
-        logDebug(s"Try to uncache ${globalTempView.quotedString} before replacing.")
+        logInfo(s"Try to uncache ${globalTempView.quotedString} before replacing.")
         CommandUtils.uncacheTableOrView(sparkSession, globalTempView.quotedString)
       }
       val aliasedPlan = aliasPlan(sparkSession, analyzedPlan)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index 20f2a7f947b81..adc725ed9b062 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -1184,4 +1184,28 @@ class CachedTableSuite extends QueryTest with SQLTestUtils
       assert(spark.sharedState.cacheManager.isEmpty)
     }
   }
+
+  test("SPARK-33228: Don't uncache data when replacing an existing view having the same plan") {
+    withTempView("tempView") {
+      spark.catalog.clearCache()
+      val df = spark.range(1).selectExpr("id a", "id b")
+      df.cache()
+      assert(spark.sharedState.cacheManager.lookupCachedData(df).isDefined)
+      df.createOrReplaceTempView("tempView")
+      assert(spark.sharedState.cacheManager.lookupCachedData(df).isDefined)
+      df.createOrReplaceTempView("tempView")
+      assert(spark.sharedState.cacheManager.lookupCachedData(df).isDefined)
+    }
+
+    withTempView("tempGlobalTempView") {
+      spark.catalog.clearCache()
+      val df = spark.range(1).selectExpr("id a", "id b")
+      df.cache()
+      assert(spark.sharedState.cacheManager.lookupCachedData(df).isDefined)
+      df.createOrReplaceGlobalTempView("tempGlobalTempView")
+      assert(spark.sharedState.cacheManager.lookupCachedData(df).isDefined)
+      df.createOrReplaceGlobalTempView("tempGlobalTempView")
+      assert(spark.sharedState.cacheManager.lookupCachedData(df).isDefined)
+    }
+  }
 }

From ce0ebf5f023b1d2230bbd4b9ffad294edef3bca7 Mon Sep 17 00:00:00 2001
From: Emi <emilian.bold@gmail.com>
Date: Sun, 25 Oct 2020 17:06:06 -0700
Subject: [PATCH 0319/1009] [SPARK-33234][INFRA] Generates SHA-512 using shasum

### What changes were proposed in this pull request?

I am generating the SHA-512 using the standard shasum which also has a better output compared to GPG.

### Why are the changes needed?

Which makes the hash much easier to verify for users that don't have GPG.

Because an user having GPG can check the keys but an user without GPG will have a hard time validating the SHA-512 based on the 'pretty printed' format.

Apache Spark is the only project where I've seen this format. Most other Apache projects have a one-line hash file.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

This patch assumes the build system has shasum (it should, but I can't test this).

Closes #30123 from emilianbold/master.

Authored-by: Emi <emilian.bold@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/create-release/release-build.sh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index c7fee13d39c6b..240f4c8dfd371 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -182,8 +182,7 @@ if [[ "$1" == "package" ]]; then
   tar cvzf spark-$SPARK_VERSION.tgz --exclude spark-$SPARK_VERSION/.git spark-$SPARK_VERSION
   echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --armour --output spark-$SPARK_VERSION.tgz.asc \
     --detach-sig spark-$SPARK_VERSION.tgz
-  echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \
-    SHA512 spark-$SPARK_VERSION.tgz > spark-$SPARK_VERSION.tgz.sha512
+  shasum -a 512 spark-$SPARK_VERSION.tgz > spark-$SPARK_VERSION.tgz.sha512
   rm -rf spark-$SPARK_VERSION
 
   ZINC_PORT=3035

From 56ab60fb7ae37ca64d668bc4a1f18216cc7186fd Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Mon, 26 Oct 2020 11:20:29 +0900
Subject: [PATCH 0320/1009] [SPARK-32388][SQL] TRANSFORM with schema-less mode
 should keep the same with hive

### What changes were proposed in this pull request?
In current Spark script transformation with hive serde mode, in case of schema less, result is different with hive.
This pr to keep result same with hive script transform  serde.

#### Hive Scrip Transform with serde in schemaless
```
hive> create table t (c0 int, c1 int, c2 int);
hive> INSERT INTO t VALUES (1, 1, 1);
hive> INSERT INTO t VALUES (2, 2, 2);
hive> CREATE VIEW v AS SELECT TRANSFORM(c0, c1, c2) USING 'cat' FROM t;

hive> DESCRIBE v;
key                 	string
value               	string

hive> SELECT * FROM v;
1	1	1
2	2	2

hive> SELECT key FROM v;
1
2

hive> SELECT value FROM v;
1	1
2	2
```

#### Spark script transform with hive serde in schema less.
```
hive> create table t (c0 int, c1 int, c2 int);
hive> INSERT INTO t VALUES (1, 1, 1);
hive> INSERT INTO t VALUES (2, 2, 2);
hive> CREATE VIEW v AS SELECT TRANSFORM(c0, c1, c2) USING 'cat' FROM t;

hive> SELECT * FROM v;
1   1
2   2
```

**No serde mode in hive (ROW FORMATTED DELIMITED)**
![image](https://user-images.githubusercontent.com/46485123/90088770-55841e00-dd52-11ea-92dd-7fe52d93f0b3.png)

### Why are the changes needed?
Keep same behavior with hive script transform

### Does this PR introduce _any_ user-facing change?
Before this pr with hive serde script transform
```
select transform(*)
USING 'cat'
from (
select 1, 2, 3, 4
) tmp

key     value
1         2
```
After
```
select transform(*)
USING 'cat'
from (
select 1, 2, 3, 4
) tmp

key     value
1         2   3  4
```
### How was this patch tested?
UT

Closes #29421 from AngersZhuuuu/SPARK-32388.

Authored-by: angerszhu <angers.zhu@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../BaseScriptTransformationExec.scala        |  11 +-
 .../spark/sql/execution/SparkSqlParser.scala  |   4 +-
 .../BaseScriptTransformationSuite.scala       |  40 ++++-
 .../HiveScriptTransformationSuite.scala       | 159 ++++++++++++++++--
 4 files changed, 189 insertions(+), 25 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
index c5107645f46f8..74e5aa716ad67 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
@@ -111,15 +111,14 @@ trait BaseScriptTransformationExec extends UnaryExecNode {
               .zip(outputFieldWriters)
               .map { case (data, writer) => writer(data) })
       } else {
-        // In schema less mode, hive default serde will choose first two output column as output
-        // if output column size less then 2, it will throw ArrayIndexOutOfBoundsException.
-        // Here we change spark's behavior same as hive's default serde.
-        // But in hive, TRANSFORM with schema less behavior like origin spark, we will fix this
-        // to keep spark and hive behavior same in SPARK-32388
+        // In schema less mode, hive will choose first two output column as output.
+        // If output column size less then 2, it will return NULL for columns with missing values.
+        // Here we split row string and choose first 2 values, if values's size less then 2,
+        // we pad NULL value until 2 to make behavior same with hive.
         val kvWriter = CatalystTypeConverters.createToCatalystConverter(StringType)
         prevLine: String =>
           new GenericInternalRow(
-            prevLine.split(outputRowFormat).slice(0, 2)
+            prevLine.split(outputRowFormat).slice(0, 2).padTo(2, null)
               .map(kvWriter))
       }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 0a5f4c3ed4bcb..f46526d419158 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -785,7 +785,9 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
         // Use default (serde) format.
         val name = conf.getConfString("hive.script.serde",
           "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")
-        val props = Seq("field.delim" -> "\t")
+        val props = Seq(
+          "field.delim" -> "\t",
+          "serialization.last.column.takes.rest" -> "true")
         val recordHandler = Option(conf.getConfString(configKey, defaultConfigValue))
         (Nil, Option(name), props, recordHandler)
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
index c07ea0f12f94e..e6029400997a2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
@@ -137,10 +137,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
     }
   }
 
-  test("SPARK-25990: TRANSFORM should handle schema less correctly (no serde)") {
-    assume(TestUtils.testCommandAvailable("python"))
-    val scriptFilePath = copyAndGetResourceFile("test_script.py", ".py").getAbsoluteFile
-
+  test("SPARK-32388: TRANSFORM should handle schema less correctly (no serde)") {
     withTempView("v") {
       val df = Seq(
         (1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)),
@@ -157,7 +154,24 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
             df.col("c").expr,
             df.col("d").expr,
             df.col("e").expr),
-          script = s"python $scriptFilePath",
+          script = "cat",
+          output = Seq(
+            AttributeReference("key", StringType)(),
+            AttributeReference("value", StringType)()),
+          child = child,
+          ioschema = defaultIOSchema.copy(schemaLess = true)
+        ),
+        df.select(
+          'a.cast("string").as("key"),
+          'b.cast("string").as("value")).collect())
+
+      checkAnswer(
+        df,
+        (child: SparkPlan) => createScriptTransformationExec(
+          input = Seq(
+            df.col("a").expr,
+            df.col("b").expr),
+          script = "cat",
           output = Seq(
             AttributeReference("key", StringType)(),
             AttributeReference("value", StringType)()),
@@ -167,6 +181,22 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
         df.select(
           'a.cast("string").as("key"),
           'b.cast("string").as("value")).collect())
+
+      checkAnswer(
+        df,
+        (child: SparkPlan) => createScriptTransformationExec(
+          input = Seq(
+            df.col("a").expr),
+          script = "cat",
+          output = Seq(
+            AttributeReference("key", StringType)(),
+            AttributeReference("value", StringType)()),
+          child = child,
+          ioschema = defaultIOSchema.copy(schemaLess = true)
+        ),
+        df.select(
+          'a.cast("string").as("key"),
+          lit(null)).collect())
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
index d247f37130776..0af0563715e12 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
@@ -156,10 +156,7 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
     assert(uncaughtExceptionHandler.exception.isEmpty)
   }
 
-  test("SPARK-25990: TRANSFORM should handle schema less correctly (hive serde)") {
-    assume(TestUtils.testCommandAvailable("python"))
-    val scriptFilePath = copyAndGetResourceFile("test_script.py", ".py").getAbsolutePath
-
+  test("SPARK-32388: TRANSFORM should handle schema less correctly (hive serde)") {
     withTempView("v") {
       val df = Seq(
         (1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)),
@@ -168,21 +165,157 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
       ).toDF("a", "b", "c", "d", "e") // Note column d's data type is Decimal(38, 18)
       df.createTempView("v")
 
-      val query = sql(
-        s"""
-           |SELECT TRANSFORM(a, b, c, d, e)
-           |USING 'python ${scriptFilePath}'
-           |FROM v
-        """.stripMargin)
+      // In hive default serde mode, if we don't define output schema,
+      // when output column size > 2 and don't specify serde,
+      // it will choose take rest columns in second column as output schema
+      // (key: String, value: String)
+      checkAnswer(
+        sql(
+          s"""
+             |SELECT TRANSFORM(a, b, c, d, e)
+             |  USING 'cat'
+             |FROM v
+        """.stripMargin),
+        identity,
+        df.select(
+          'a.cast("string").as("key"),
+          concat_ws("\t",
+            'b.cast("string"),
+            'c.cast("string"),
+            decimalToString('d),
+            'e.cast("string")).as("value")).collect())
+
+      // In hive default serde mode, if we don't define output schema,
+      // when output column size > 2 and just specify serde,
+      // it will choose take rest columns in second column as output schema
+      // (key: String, value: String)
+      checkAnswer(
+        sql(
+          s"""
+             |SELECT TRANSFORM(a, b, c, d, e)
+             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+             |  WITH SERDEPROPERTIES (
+             |    'field.delim' = '\t'
+             |  )
+             |  USING 'cat'
+             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+             |  WITH SERDEPROPERTIES (
+             |    'field.delim' = '\t'
+             |  )
+             |FROM v
+        """.stripMargin),
+        identity,
+        df.select(
+          'a.cast("string").as("key"),
+          'b.cast("string").as("value")).collect())
+
+
+      // In hive default serde mode, if we don't define output schema,
+      // when output column size > 2 and specify serde with
+      // 'serialization.last.column.takes.rest=true',
+      // it will choose take rest columns in second column as output schema
+      // (key: String, value: String)
+      checkAnswer(
+        sql(
+          s"""
+             |SELECT TRANSFORM(a, b, c, d, e)
+             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+             |  WITH SERDEPROPERTIES (
+             |    'field.delim' = '\t',
+             |    'serialization.last.column.takes.rest' = 'true'
+             |  )
+             |  USING 'cat'
+             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+             |  WITH SERDEPROPERTIES (
+             |    'field.delim' = '\t',
+             |    'serialization.last.column.takes.rest' = 'true'
+             |  )
+             |FROM v
+        """.stripMargin),
+        identity,
+        df.select(
+          'a.cast("string").as("key"),
+          concat_ws("\t",
+            'b.cast("string"),
+            'c.cast("string"),
+            decimalToString('d),
+            'e.cast("string")).as("value")).collect())
+
+      // In hive default serde mode, if we don't define output schema,
+      // when output column size > 2 and specify serde
+      // with 'serialization.last.column.takes.rest=false',
+      // it will choose first two column as output schema (key: String, value: String)
+      checkAnswer(
+        sql(
+          s"""
+             |SELECT TRANSFORM(a, b, c, d, e)
+             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+             |  WITH SERDEPROPERTIES (
+             |    'field.delim' = '\t',
+             |    'serialization.last.column.takes.rest' = 'false'
+             |  )
+             |  USING 'cat'
+             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+             |  WITH SERDEPROPERTIES (
+             |    'field.delim' = '\t',
+             |    'serialization.last.column.takes.rest' = 'false'
+             |  )
+             |FROM v
+        """.stripMargin),
+        identity,
+        df.select(
+          'a.cast("string").as("key"),
+          'b.cast("string").as("value")).collect())
 
-      // In hive default serde mode, if we don't define output schema, it will choose first
-      // two column as output schema (key: String, value: String)
+      // In hive default serde mode, if we don't define output schema,
+      // when output column size = 2 and specify serde, it will these two column as
+      // output schema (key: String, value: String)
       checkAnswer(
-        query,
+        sql(
+          s"""
+             |SELECT TRANSFORM(a, b)
+             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+             |  WITH SERDEPROPERTIES (
+             |    'field.delim' = '\t',
+             |    'serialization.last.column.takes.rest' = 'true'
+             |  )
+             |  USING 'cat'
+             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+             |  WITH SERDEPROPERTIES (
+             |    'field.delim' = '\t',
+             |    'serialization.last.column.takes.rest' = 'true'
+             |  )
+             |FROM v
+        """.stripMargin),
         identity,
         df.select(
           'a.cast("string").as("key"),
           'b.cast("string").as("value")).collect())
+
+      // In hive default serde mode, if we don't define output schema,
+      // when output column size < 2 and specify serde, it will return null for deficiency
+      // output schema (key: String, value: String)
+      checkAnswer(
+        sql(
+          s"""
+             |SELECT TRANSFORM(a)
+             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+             |  WITH SERDEPROPERTIES (
+             |    'field.delim' = '\t',
+             |    'serialization.last.column.takes.rest' = 'true'
+             |  )
+             |  USING 'cat'
+             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+             |  WITH SERDEPROPERTIES (
+             |    'field.delim' = '\t',
+             |    'serialization.last.column.takes.rest' = 'true'
+             |  )
+             |FROM v
+        """.stripMargin),
+        identity,
+        df.select(
+          'a.cast("string").as("key"),
+          lit(null)).collect())
     }
   }
 

From 369cc614f369f9fd9be5b13a3f047a261c8e8d90 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Mon, 26 Oct 2020 11:38:48 +0900
Subject: [PATCH 0321/1009] Revert "[SPARK-32388][SQL] TRANSFORM with
 schema-less mode should keep the same with hive"

This reverts commit 56ab60fb7ae37ca64d668bc4a1f18216cc7186fd.
---
 .../BaseScriptTransformationExec.scala        |  11 +-
 .../spark/sql/execution/SparkSqlParser.scala  |   4 +-
 .../BaseScriptTransformationSuite.scala       |  40 +----
 .../HiveScriptTransformationSuite.scala       | 159 ++----------------
 4 files changed, 25 insertions(+), 189 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
index 74e5aa716ad67..c5107645f46f8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
@@ -111,14 +111,15 @@ trait BaseScriptTransformationExec extends UnaryExecNode {
               .zip(outputFieldWriters)
               .map { case (data, writer) => writer(data) })
       } else {
-        // In schema less mode, hive will choose first two output column as output.
-        // If output column size less then 2, it will return NULL for columns with missing values.
-        // Here we split row string and choose first 2 values, if values's size less then 2,
-        // we pad NULL value until 2 to make behavior same with hive.
+        // In schema less mode, hive default serde will choose first two output column as output
+        // if output column size less then 2, it will throw ArrayIndexOutOfBoundsException.
+        // Here we change spark's behavior same as hive's default serde.
+        // But in hive, TRANSFORM with schema less behavior like origin spark, we will fix this
+        // to keep spark and hive behavior same in SPARK-32388
         val kvWriter = CatalystTypeConverters.createToCatalystConverter(StringType)
         prevLine: String =>
           new GenericInternalRow(
-            prevLine.split(outputRowFormat).slice(0, 2).padTo(2, null)
+            prevLine.split(outputRowFormat).slice(0, 2)
               .map(kvWriter))
       }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index f46526d419158..0a5f4c3ed4bcb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -785,9 +785,7 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
         // Use default (serde) format.
         val name = conf.getConfString("hive.script.serde",
           "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")
-        val props = Seq(
-          "field.delim" -> "\t",
-          "serialization.last.column.takes.rest" -> "true")
+        val props = Seq("field.delim" -> "\t")
         val recordHandler = Option(conf.getConfString(configKey, defaultConfigValue))
         (Nil, Option(name), props, recordHandler)
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
index e6029400997a2..c07ea0f12f94e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
@@ -137,7 +137,10 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
     }
   }
 
-  test("SPARK-32388: TRANSFORM should handle schema less correctly (no serde)") {
+  test("SPARK-25990: TRANSFORM should handle schema less correctly (no serde)") {
+    assume(TestUtils.testCommandAvailable("python"))
+    val scriptFilePath = copyAndGetResourceFile("test_script.py", ".py").getAbsoluteFile
+
     withTempView("v") {
       val df = Seq(
         (1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)),
@@ -154,24 +157,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
             df.col("c").expr,
             df.col("d").expr,
             df.col("e").expr),
-          script = "cat",
-          output = Seq(
-            AttributeReference("key", StringType)(),
-            AttributeReference("value", StringType)()),
-          child = child,
-          ioschema = defaultIOSchema.copy(schemaLess = true)
-        ),
-        df.select(
-          'a.cast("string").as("key"),
-          'b.cast("string").as("value")).collect())
-
-      checkAnswer(
-        df,
-        (child: SparkPlan) => createScriptTransformationExec(
-          input = Seq(
-            df.col("a").expr,
-            df.col("b").expr),
-          script = "cat",
+          script = s"python $scriptFilePath",
           output = Seq(
             AttributeReference("key", StringType)(),
             AttributeReference("value", StringType)()),
@@ -181,22 +167,6 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
         df.select(
           'a.cast("string").as("key"),
           'b.cast("string").as("value")).collect())
-
-      checkAnswer(
-        df,
-        (child: SparkPlan) => createScriptTransformationExec(
-          input = Seq(
-            df.col("a").expr),
-          script = "cat",
-          output = Seq(
-            AttributeReference("key", StringType)(),
-            AttributeReference("value", StringType)()),
-          child = child,
-          ioschema = defaultIOSchema.copy(schemaLess = true)
-        ),
-        df.select(
-          'a.cast("string").as("key"),
-          lit(null)).collect())
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
index 0af0563715e12..d247f37130776 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
@@ -156,7 +156,10 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
     assert(uncaughtExceptionHandler.exception.isEmpty)
   }
 
-  test("SPARK-32388: TRANSFORM should handle schema less correctly (hive serde)") {
+  test("SPARK-25990: TRANSFORM should handle schema less correctly (hive serde)") {
+    assume(TestUtils.testCommandAvailable("python"))
+    val scriptFilePath = copyAndGetResourceFile("test_script.py", ".py").getAbsolutePath
+
     withTempView("v") {
       val df = Seq(
         (1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)),
@@ -165,157 +168,21 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
       ).toDF("a", "b", "c", "d", "e") // Note column d's data type is Decimal(38, 18)
       df.createTempView("v")
 
-      // In hive default serde mode, if we don't define output schema,
-      // when output column size > 2 and don't specify serde,
-      // it will choose take rest columns in second column as output schema
-      // (key: String, value: String)
-      checkAnswer(
-        sql(
-          s"""
-             |SELECT TRANSFORM(a, b, c, d, e)
-             |  USING 'cat'
-             |FROM v
-        """.stripMargin),
-        identity,
-        df.select(
-          'a.cast("string").as("key"),
-          concat_ws("\t",
-            'b.cast("string"),
-            'c.cast("string"),
-            decimalToString('d),
-            'e.cast("string")).as("value")).collect())
-
-      // In hive default serde mode, if we don't define output schema,
-      // when output column size > 2 and just specify serde,
-      // it will choose take rest columns in second column as output schema
-      // (key: String, value: String)
-      checkAnswer(
-        sql(
-          s"""
-             |SELECT TRANSFORM(a, b, c, d, e)
-             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-             |  WITH SERDEPROPERTIES (
-             |    'field.delim' = '\t'
-             |  )
-             |  USING 'cat'
-             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-             |  WITH SERDEPROPERTIES (
-             |    'field.delim' = '\t'
-             |  )
-             |FROM v
-        """.stripMargin),
-        identity,
-        df.select(
-          'a.cast("string").as("key"),
-          'b.cast("string").as("value")).collect())
-
-
-      // In hive default serde mode, if we don't define output schema,
-      // when output column size > 2 and specify serde with
-      // 'serialization.last.column.takes.rest=true',
-      // it will choose take rest columns in second column as output schema
-      // (key: String, value: String)
-      checkAnswer(
-        sql(
-          s"""
-             |SELECT TRANSFORM(a, b, c, d, e)
-             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-             |  WITH SERDEPROPERTIES (
-             |    'field.delim' = '\t',
-             |    'serialization.last.column.takes.rest' = 'true'
-             |  )
-             |  USING 'cat'
-             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-             |  WITH SERDEPROPERTIES (
-             |    'field.delim' = '\t',
-             |    'serialization.last.column.takes.rest' = 'true'
-             |  )
-             |FROM v
-        """.stripMargin),
-        identity,
-        df.select(
-          'a.cast("string").as("key"),
-          concat_ws("\t",
-            'b.cast("string"),
-            'c.cast("string"),
-            decimalToString('d),
-            'e.cast("string")).as("value")).collect())
-
-      // In hive default serde mode, if we don't define output schema,
-      // when output column size > 2 and specify serde
-      // with 'serialization.last.column.takes.rest=false',
-      // it will choose first two column as output schema (key: String, value: String)
-      checkAnswer(
-        sql(
-          s"""
-             |SELECT TRANSFORM(a, b, c, d, e)
-             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-             |  WITH SERDEPROPERTIES (
-             |    'field.delim' = '\t',
-             |    'serialization.last.column.takes.rest' = 'false'
-             |  )
-             |  USING 'cat'
-             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-             |  WITH SERDEPROPERTIES (
-             |    'field.delim' = '\t',
-             |    'serialization.last.column.takes.rest' = 'false'
-             |  )
-             |FROM v
-        """.stripMargin),
-        identity,
-        df.select(
-          'a.cast("string").as("key"),
-          'b.cast("string").as("value")).collect())
+      val query = sql(
+        s"""
+           |SELECT TRANSFORM(a, b, c, d, e)
+           |USING 'python ${scriptFilePath}'
+           |FROM v
+        """.stripMargin)
 
-      // In hive default serde mode, if we don't define output schema,
-      // when output column size = 2 and specify serde, it will these two column as
-      // output schema (key: String, value: String)
+      // In hive default serde mode, if we don't define output schema, it will choose first
+      // two column as output schema (key: String, value: String)
       checkAnswer(
-        sql(
-          s"""
-             |SELECT TRANSFORM(a, b)
-             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-             |  WITH SERDEPROPERTIES (
-             |    'field.delim' = '\t',
-             |    'serialization.last.column.takes.rest' = 'true'
-             |  )
-             |  USING 'cat'
-             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-             |  WITH SERDEPROPERTIES (
-             |    'field.delim' = '\t',
-             |    'serialization.last.column.takes.rest' = 'true'
-             |  )
-             |FROM v
-        """.stripMargin),
+        query,
         identity,
         df.select(
           'a.cast("string").as("key"),
           'b.cast("string").as("value")).collect())
-
-      // In hive default serde mode, if we don't define output schema,
-      // when output column size < 2 and specify serde, it will return null for deficiency
-      // output schema (key: String, value: String)
-      checkAnswer(
-        sql(
-          s"""
-             |SELECT TRANSFORM(a)
-             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-             |  WITH SERDEPROPERTIES (
-             |    'field.delim' = '\t',
-             |    'serialization.last.column.takes.rest' = 'true'
-             |  )
-             |  USING 'cat'
-             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-             |  WITH SERDEPROPERTIES (
-             |    'field.delim' = '\t',
-             |    'serialization.last.column.takes.rest' = 'true'
-             |  )
-             |FROM v
-        """.stripMargin),
-        identity,
-        df.select(
-          'a.cast("string").as("key"),
-          lit(null)).collect())
     }
   }
 

From d87a0bb2caa6804d59130c41a4c005acb2e4aad2 Mon Sep 17 00:00:00 2001
From: Cheng Su <chengsu@fb.com>
Date: Mon, 26 Oct 2020 13:33:06 +0900
Subject: [PATCH 0322/1009] [SPARK-32862][SS] Left semi stream-stream join

### What changes were proposed in this pull request?

This is to support left semi join in stream-stream join. The implementation of left semi join is (mostly in `StreamingSymmetricHashJoinExec` and `SymmetricHashJoinStateManager`):
* For left side input row, check if there's a match on right side state store.
  * if there's a match, output the left side row, but do not put the row in left side state store (no need to put in state store).
  * if there's no match, output nothing, but put the row in left side state store (with "matched" field to set to false in state store).
* For right side input row, check if there's a match on left side state store.
  * For all matched left rows in state store, output the rows with "matched" field as false. Set all left rows with "matched" field to be true. Only output the left side rows matched for the first time to guarantee left semi join semantics.
* State store eviction: evict rows from left/right side state store below watermark, same as inner join.

Note a followup optimization can be to evict matched left side rows from state store earlier, even when the rows are still above watermark. However this needs more change in `SymmetricHashJoinStateManager`, so will leave this as a followup.

### Why are the changes needed?

Current stream-stream join supports inner, left outer and right outer join (https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala#L166 ). We do see internally a lot of users are using left semi stream-stream join (not spark structured streaming), e.g. I want to get the ad impression (join left side) which has click (joint right side), but I don't care how many clicks per ad (left semi semantics).

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Added unit tests in `UnsupportedOperationChecker.scala` and `StreamingJoinSuite.scala`.

Closes #30076 from c21/stream-join.

Authored-by: Cheng Su <chengsu@fb.com>
Signed-off-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
---
 .../UnsupportedOperationChecker.scala         |  15 +-
 .../sql/catalyst/expressions/JoinedRow.scala  |  10 +
 .../analysis/UnsupportedOperationsSuite.scala |  66 ++-
 .../StreamingSymmetricHashJoinExec.scala      | 121 +++--
 .../state/SymmetricHashJoinStateManager.scala |  11 +-
 .../sql/streaming/StreamingJoinSuite.scala    | 502 +++++++++++++-----
 6 files changed, 545 insertions(+), 180 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index 44e8602ba7e81..809323455652e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -291,17 +291,17 @@ object UnsupportedOperationChecker extends Logging {
                 throwError("Full outer joins with streaming DataFrames/Datasets are not supported")
               }
 
-            case LeftSemi | LeftAnti =>
+            case LeftAnti =>
               if (right.isStreaming) {
-                throwError("Left semi/anti joins with a streaming DataFrame/Dataset " +
+                throwError("Left anti joins with a streaming DataFrame/Dataset " +
                     "on the right are not supported")
               }
 
-            // We support streaming left outer joins with static on the right always, and with
-            // stream on both sides under the appropriate conditions.
-            case LeftOuter =>
+            // We support streaming left outer and left semi joins with static on the right always,
+            // and with stream on both sides under the appropriate conditions.
+            case LeftOuter | LeftSemi =>
               if (!left.isStreaming && right.isStreaming) {
-                throwError("Left outer join with a streaming DataFrame/Dataset " +
+                throwError(s"$joinType join with a streaming DataFrame/Dataset " +
                   "on the right and a static DataFrame/Dataset on the left is not supported")
               } else if (left.isStreaming && right.isStreaming) {
                 val watermarkInJoinKeys = StreamingJoinHelper.isWatermarkInJoinKeys(subPlan)
@@ -311,7 +311,8 @@ object UnsupportedOperationChecker extends Logging {
                     left.outputSet, right.outputSet, condition, Some(1000000)).isDefined
 
                 if (!watermarkInJoinKeys && !hasValidWatermarkRange) {
-                  throwError("Stream-stream outer join between two streaming DataFrame/Datasets " +
+                  throwError(
+                    s"Stream-stream $joinType join between two streaming DataFrame/Datasets " +
                     "is not supported without a watermark in the join keys, or a watermark on " +
                     "the nullable side and an appropriate range condition")
                 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/JoinedRow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/JoinedRow.scala
index 7770684a5b399..86871223d66ad 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/JoinedRow.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/JoinedRow.scala
@@ -55,6 +55,16 @@ class JoinedRow extends InternalRow {
     this
   }
 
+  /** Gets this JoinedRow's left base row. */
+  def getLeft: InternalRow = {
+    row1
+  }
+
+  /** Gets this JoinedRow's right base row. */
+  def getRight: InternalRow = {
+    row2
+  }
+
   override def toSeq(fieldTypes: Seq[DataType]): Seq[Any] = {
     assert(fieldTypes.length == row1.numFields + row2.numFields)
     val (left, right) = fieldTypes.splitAt(row1.numFields)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
index 3ec6fdeedd4b8..b9943a9744985 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
@@ -490,7 +490,69 @@ class UnsupportedOperationsSuite extends SparkFunSuite {
     _.join(_, joinType = LeftSemi),
     streamStreamSupported = false,
     batchStreamSupported = false,
-    expectedMsg = "left semi/anti joins")
+    expectedMsg = "LeftSemi join")
+
+  // Left semi joins: update and complete mode not allowed
+  assertNotSupportedInStreamingPlan(
+    "left semi join with stream-stream relations and update mode",
+    streamRelation.join(streamRelation, joinType = LeftSemi,
+      condition = Some(attribute === attribute)),
+    OutputMode.Update(),
+    Seq("is not supported in Update output mode"))
+  assertNotSupportedInStreamingPlan(
+    "left semi join with stream-stream relations and complete mode",
+    Aggregate(Nil, aggExprs("d"), streamRelation.join(streamRelation, joinType = LeftSemi,
+      condition = Some(attribute === attribute))),
+    OutputMode.Complete(),
+    Seq("is not supported in Complete output mode"))
+
+  // Left semi joins: stream-stream allowed with join on watermark attribute
+  // Note that the attribute need not be watermarked on both sides.
+  assertSupportedInStreamingPlan(
+    "left semi join with stream-stream relations and join on attribute with left watermark",
+    streamRelation.join(streamRelation, joinType = LeftSemi,
+      condition = Some(attributeWithWatermark === attribute)),
+    OutputMode.Append())
+  assertSupportedInStreamingPlan(
+    "left semi join with stream-stream relations and join on attribute with right watermark",
+    streamRelation.join(streamRelation, joinType = LeftSemi,
+      condition = Some(attribute === attributeWithWatermark)),
+    OutputMode.Append())
+  assertNotSupportedInStreamingPlan(
+    "left semi join with stream-stream relations and join on non-watermarked attribute",
+    streamRelation.join(streamRelation, joinType = LeftSemi,
+      condition = Some(attribute === attribute)),
+    OutputMode.Append(),
+    Seq("without a watermark in the join keys"))
+
+  // Left semi joins: stream-stream allowed with range condition yielding state value watermark
+  assertSupportedInStreamingPlan(
+    "left semi join with stream-stream relations and state value watermark", {
+      val leftRelation = streamRelation
+      val rightTimeWithWatermark =
+        AttributeReference("b", IntegerType)().withMetadata(watermarkMetadata)
+      val rightRelation = new TestStreamingRelation(rightTimeWithWatermark)
+      leftRelation.join(
+        rightRelation,
+        joinType = LeftSemi,
+        condition = Some(attribute > rightTimeWithWatermark + 10))
+    },
+    OutputMode.Append())
+
+  // Left semi joins: stream-stream not allowed with insufficient range condition
+  assertNotSupportedInStreamingPlan(
+    "left semi join with stream-stream relations and state value watermark", {
+      val leftRelation = streamRelation
+      val rightTimeWithWatermark =
+        AttributeReference("b", IntegerType)().withMetadata(watermarkMetadata)
+      val rightRelation = new TestStreamingRelation(rightTimeWithWatermark)
+      leftRelation.join(
+        rightRelation,
+        joinType = LeftSemi,
+        condition = Some(attribute < rightTimeWithWatermark + 10))
+    },
+    OutputMode.Append(),
+    Seq("appropriate range condition"))
 
   // Left anti joins: stream-* not allowed
   testBinaryOperationInStreamingPlan(
@@ -498,7 +560,7 @@ class UnsupportedOperationsSuite extends SparkFunSuite {
     _.join(_, joinType = LeftAnti),
     streamStreamSupported = false,
     batchStreamSupported = false,
-    expectedMsg = "left semi/anti joins")
+    expectedMsg = "Left anti join")
 
   // Right outer joins: stream-* not allowed
   testBinaryOperationInStreamingPlan(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
index a52f5f4ac94ae..8b69205530769 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
@@ -152,7 +152,8 @@ case class StreamingSymmetricHashJoinExec(
   }
 
   if (stateFormatVersion < 2 && joinType != Inner) {
-    throw new IllegalArgumentException("The query is using stream-stream outer join with state" +
+    throw new IllegalArgumentException(
+      s"The query is using stream-stream $joinType join with state" +
       s" format version ${stateFormatVersion} - correctness issue is discovered. Please discard" +
       " the checkpoint and rerun the query. See SPARK-26154 for more details.")
   }
@@ -165,7 +166,7 @@ case class StreamingSymmetricHashJoinExec(
   }
 
   require(
-    joinType == Inner || joinType == LeftOuter || joinType == RightOuter,
+    joinType == Inner || joinType == LeftOuter || joinType == RightOuter || joinType == LeftSemi,
     errorMessageForJoinType)
   require(leftKeys.map(_.dataType) == rightKeys.map(_.dataType))
 
@@ -185,6 +186,7 @@ case class StreamingSymmetricHashJoinExec(
     case _: InnerLike => left.output ++ right.output
     case LeftOuter => left.output ++ right.output.map(_.withNullability(true))
     case RightOuter => left.output.map(_.withNullability(true)) ++ right.output
+    case LeftSemi => left.output
     case _ => throwBadJoinTypeException()
   }
 
@@ -193,6 +195,7 @@ case class StreamingSymmetricHashJoinExec(
       PartitioningCollection(Seq(left.outputPartitioning, right.outputPartitioning))
     case LeftOuter => left.outputPartitioning
     case RightOuter => right.outputPartitioning
+    case LeftSemi => left.outputPartitioning
     case _ => throwBadJoinTypeException()
   }
 
@@ -246,14 +249,21 @@ case class StreamingSymmetricHashJoinExec(
 
     //  Join one side input using the other side's buffered/state rows. Here is how it is done.
     //
-    //  - `leftSideJoiner.storeAndJoinWithOtherSide(rightSideJoiner)` generates all rows from
-    //    matching new left input with stored right input, and also stores all the left input
+    //  - `leftSideJoiner.storeAndJoinWithOtherSide(rightSideJoiner)`
+    //    - Inner, Left Outer, Right Outer Join: generates all rows from matching new left input
+    //      with stored right input, and also stores all the left input.
+    //    - Left Semi Join: generates all new left input rows from matching new left input with
+    //      stored right input, and also stores all the non-matched left input.
     //
-    //  - `rightSideJoiner.storeAndJoinWithOtherSide(leftSideJoiner)` generates all rows from
-    //    matching new right input with stored left input, and also stores all the right input.
-    //    It also generates all rows from matching new left input with new right input, since
-    //    the new left input has become stored by that point. This tiny asymmetry is necessary
-    //    to avoid duplication.
+    //  - `rightSideJoiner.storeAndJoinWithOtherSide(leftSideJoiner)`
+    //    - Inner, Left Outer, Right Outer Join: generates all rows from matching new right input
+    //      with stored left input, and also stores all the right input.
+    //      It also generates all rows from matching new left input with new right input, since
+    //      the new left input has become stored by that point. This tiny asymmetry is necessary
+    //      to avoid duplication.
+    //    - Left Semi Join: generates all stored left input rows, from matching new right input
+    //      with stored left input, and also stores all the right input. Note only first-time
+    //      matched left input rows will be generated, this is to guarantee left semi semantics.
     val leftOutputIter = leftSideJoiner.storeAndJoinWithOtherSide(rightSideJoiner) {
       (input: InternalRow, matched: InternalRow) => joinedRow.withLeft(input).withRight(matched)
     }
@@ -261,22 +271,21 @@ case class StreamingSymmetricHashJoinExec(
       (input: InternalRow, matched: InternalRow) => joinedRow.withLeft(matched).withRight(input)
     }
 
-    // We need to save the time that the inner join output iterator completes, since outer join
-    // output counts as both update and removal time.
-    var innerOutputCompletionTimeNs: Long = 0
-    def onInnerOutputCompletion = {
-      innerOutputCompletionTimeNs = System.nanoTime
+    // We need to save the time that the one side hash join output iterator completes, since
+    // other join output counts as both update and removal time.
+    var hashJoinOutputCompletionTimeNs: Long = 0
+    def onHashJoinOutputCompletion(): Unit = {
+      hashJoinOutputCompletionTimeNs = System.nanoTime
     }
-    // This is the iterator which produces the inner join rows. For outer joins, this will be
-    // prepended to a second iterator producing outer join rows; for inner joins, this is the full
-    // output.
-    val innerOutputIter = CompletionIterator[InternalRow, Iterator[InternalRow]](
-      (leftOutputIter ++ rightOutputIter), onInnerOutputCompletion)
-
+    // This is the iterator which produces the inner and left semi join rows. For other joins,
+    // this will be prepended to a second iterator producing other rows; for inner and left semi
+    // joins, this is the full output.
+    val hashJoinOutputIter = CompletionIterator[InternalRow, Iterator[InternalRow]](
+      leftOutputIter ++ rightOutputIter, onHashJoinOutputCompletion())
 
     val outputIter: Iterator[InternalRow] = joinType match {
-      case Inner =>
-        innerOutputIter
+      case Inner | LeftSemi =>
+        hashJoinOutputIter
       case LeftOuter =>
         // We generate the outer join input by:
         // * Getting an iterator over the rows that have aged out on the left side. These rows are
@@ -311,7 +320,7 @@ case class StreamingSymmetricHashJoinExec(
           }
         }.map(pair => joinedRow.withLeft(pair.value).withRight(nullRight))
 
-        innerOutputIter ++ outerOutputIter
+        hashJoinOutputIter ++ outerOutputIter
       case RightOuter =>
         // See comments for left outer case.
         def matchesWithLeftSideState(rightKeyValue: UnsafeRowPair) = {
@@ -330,11 +339,15 @@ case class StreamingSymmetricHashJoinExec(
           }
         }.map(pair => joinedRow.withLeft(nullLeft).withRight(pair.value))
 
-        innerOutputIter ++ outerOutputIter
+        hashJoinOutputIter ++ outerOutputIter
       case _ => throwBadJoinTypeException()
     }
 
-    val outputProjection = UnsafeProjection.create(left.output ++ right.output, output)
+    val outputProjection = if (joinType == LeftSemi) {
+      UnsafeProjection.create(output, output)
+    } else {
+      UnsafeProjection.create(left.output ++ right.output, output)
+    }
     val outputIterWithMetrics = outputIter.map { row =>
       numOutputRows += 1
       outputProjection(row)
@@ -345,24 +358,28 @@ case class StreamingSymmetricHashJoinExec(
       // All processing time counts as update time.
       allUpdatesTimeMs += math.max(NANOSECONDS.toMillis(System.nanoTime - updateStartTimeNs), 0)
 
-      // Processing time between inner output completion and here comes from the outer portion of a
-      // join, and thus counts as removal time as we remove old state from one side while iterating.
-      if (innerOutputCompletionTimeNs != 0) {
+      // Processing time between one side hash join output completion and here comes from the
+      // outer portion of a join, and thus counts as removal time as we remove old state from
+      // one side while iterating.
+      if (hashJoinOutputCompletionTimeNs != 0) {
         allRemovalsTimeMs +=
-          math.max(NANOSECONDS.toMillis(System.nanoTime - innerOutputCompletionTimeNs), 0)
+          math.max(NANOSECONDS.toMillis(System.nanoTime - hashJoinOutputCompletionTimeNs), 0)
       }
 
       allRemovalsTimeMs += timeTakenMs {
         // Remove any remaining state rows which aren't needed because they're below the watermark.
         //
-        // For inner joins, we have to remove unnecessary state rows from both sides if possible.
+        // For inner and left semi joins, we have to remove unnecessary state rows from both sides
+        // if possible.
+        //
         // For outer joins, we have already removed unnecessary state rows from the outer side
         // (e.g., left side for left outer join) while generating the outer "null" outputs. Now, we
         // have to remove unnecessary state rows from the other side (e.g., right side for the left
         // outer join) if possible. In all cases, nothing needs to be outputted, hence the removal
         // needs to be done greedily by immediately consuming the returned iterator.
         val cleanupIter = joinType match {
-          case Inner => leftSideJoiner.removeOldState() ++ rightSideJoiner.removeOldState()
+          case Inner | LeftSemi =>
+            leftSideJoiner.removeOldState() ++ rightSideJoiner.removeOldState()
           case LeftOuter => rightSideJoiner.removeOldState()
           case RightOuter => leftSideJoiner.removeOldState()
           case _ => throwBadJoinTypeException()
@@ -481,6 +498,26 @@ case class StreamingSymmetricHashJoinExec(
         case _ => (_: InternalRow) => Iterator.empty
       }
 
+      val excludeRowsAlreadyMatched = joinType == LeftSemi && joinSide == RightSide
+
+      val generateOutputIter: (InternalRow, Iterator[JoinedRow]) => Iterator[InternalRow] =
+        joinSide match {
+          case LeftSide if joinType == LeftSemi =>
+            (input: InternalRow, joinedRowIter: Iterator[JoinedRow]) =>
+              // For left side of left semi join, generate one left row if there is matched
+              // rows from right side. Otherwise, generate nothing.
+              if (joinedRowIter.nonEmpty) {
+                Iterator.single(input)
+              } else {
+                Iterator.empty
+              }
+          case RightSide if joinType == LeftSemi =>
+            (_: InternalRow, joinedRowIter: Iterator[JoinedRow]) =>
+              // For right side of left semi join, generate matched left rows only.
+              joinedRowIter.map(_.getLeft)
+          case _ => (_: InternalRow, joinedRowIter: Iterator[JoinedRow]) => joinedRowIter
+        }
+
       nonLateRows.flatMap { row =>
         val thisRow = row.asInstanceOf[UnsafeRow]
         // If this row fails the pre join filter, that means it can never satisfy the full join
@@ -489,8 +526,12 @@ case class StreamingSymmetricHashJoinExec(
         // the case of inner join).
         if (preJoinFilter(thisRow)) {
           val key = keyGenerator(thisRow)
-          val outputIter: Iterator[JoinedRow] = otherSideJoiner.joinStateManager
-            .getJoinedRows(key, thatRow => generateJoinedRow(thisRow, thatRow), postJoinFilter)
+          val joinedRowIter: Iterator[JoinedRow] = otherSideJoiner.joinStateManager.getJoinedRows(
+            key,
+            thatRow => generateJoinedRow(thisRow, thatRow),
+            postJoinFilter,
+            excludeRowsAlreadyMatched)
+          val outputIter = generateOutputIter(thisRow, joinedRowIter)
           new AddingProcessedRowToStateCompletionIterator(key, thisRow, outputIter)
         } else {
           generateFilteredJoinedRow(thisRow)
@@ -501,13 +542,19 @@ case class StreamingSymmetricHashJoinExec(
     private class AddingProcessedRowToStateCompletionIterator(
         key: UnsafeRow,
         thisRow: UnsafeRow,
-        subIter: Iterator[JoinedRow])
-      extends CompletionIterator[JoinedRow, Iterator[JoinedRow]](subIter) {
+        subIter: Iterator[InternalRow])
+      extends CompletionIterator[InternalRow, Iterator[InternalRow]](subIter) {
+
       private val iteratorNotEmpty: Boolean = super.hasNext
 
       override def completion(): Unit = {
-        val shouldAddToState = // add only if both removal predicates do not match
-          !stateKeyWatermarkPredicateFunc(key) && !stateValueWatermarkPredicateFunc(thisRow)
+        val isLeftSemiWithMatch =
+          joinType == LeftSemi && joinSide == LeftSide && iteratorNotEmpty
+        // Add to state store only if both removal predicates do not match,
+        // and the row is not matched for left side of left semi join.
+        val shouldAddToState =
+          !stateKeyWatermarkPredicateFunc(key) && !stateValueWatermarkPredicateFunc(thisRow) &&
+          !isLeftSemiWithMatch
         if (shouldAddToState) {
           joinStateManager.append(key, thisRow, matched = iteratorNotEmpty)
           updatedStateRowsCount += 1
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala
index 2aa2a18b9eaf4..3fae3979757fe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala
@@ -99,13 +99,20 @@ class SymmetricHashJoinStateManager(
   /**
    * Get all the matched values for given join condition, with marking matched.
    * This method is designed to mark joined rows properly without exposing internal index of row.
+   *
+   * @param excludeRowsAlreadyMatched Do not join with rows already matched previously.
+   *                                  This is used for right side of left semi join in
+   *                                  [[StreamingSymmetricHashJoinExec]] only.
    */
   def getJoinedRows(
       key: UnsafeRow,
       generateJoinedRow: InternalRow => JoinedRow,
-      predicate: JoinedRow => Boolean): Iterator[JoinedRow] = {
+      predicate: JoinedRow => Boolean,
+      excludeRowsAlreadyMatched: Boolean = false): Iterator[JoinedRow] = {
     val numValues = keyToNumValues.get(key)
-    keyWithIndexToValue.getAll(key, numValues).map { keyIdxToValue =>
+    keyWithIndexToValue.getAll(key, numValues).filterNot { keyIdxToValue =>
+      excludeRowsAlreadyMatched && keyIdxToValue.matched
+    }.map { keyIdxToValue =>
       val joinedRow = generateJoinedRow(keyIdxToValue.value)
       if (predicate(joinedRow)) {
         if (!keyIdxToValue.matched) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
index b235bf7c3180a..91d1f5de3f211 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
@@ -41,18 +41,174 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
+abstract class StreamingJoinSuite
+  extends StreamTest with StateStoreMetricsTest with BeforeAndAfter {
 
-class StreamingInnerJoinSuite extends StreamTest with StateStoreMetricsTest with BeforeAndAfter {
+  import testImplicits._
 
   before {
-    SparkSession.setActiveSessionInternal(spark)  // set this before force initializing 'joinExec'
-    spark.streams.stateStoreCoordinator   // initialize the lazy coordinator
+    SparkSession.setActiveSessionInternal(spark) // set this before force initializing 'joinExec'
+    spark.streams.stateStoreCoordinator // initialize the lazy coordinator
   }
 
   after {
     StateStore.stop()
   }
 
+  protected def setupStream(prefix: String, multiplier: Int): (MemoryStream[Int], DataFrame) = {
+    val input = MemoryStream[Int]
+    val df = input.toDF
+      .select(
+        'value as "key",
+        timestamp_seconds($"value")  as s"${prefix}Time",
+        ('value * multiplier) as s"${prefix}Value")
+      .withWatermark(s"${prefix}Time", "10 seconds")
+
+    (input, df)
+  }
+
+  protected def setupWindowedJoin(joinType: String)
+    : (MemoryStream[Int], MemoryStream[Int], DataFrame) = {
+
+    val (input1, df1) = setupStream("left", 2)
+    val (input2, df2) = setupStream("right", 3)
+    val windowed1 = df1.select('key, window('leftTime, "10 second"), 'leftValue)
+    val windowed2 = df2.select('key, window('rightTime, "10 second"), 'rightValue)
+    val joined = windowed1.join(windowed2, Seq("key", "window"), joinType)
+    val select = if (joinType == "left_semi") {
+      joined.select('key, $"window.end".cast("long"), 'leftValue)
+    } else {
+      joined.select('key, $"window.end".cast("long"), 'leftValue, 'rightValue)
+    }
+
+    (input1, input2, select)
+  }
+
+  protected def setupWindowedJoinWithLeftCondition(joinType: String)
+    : (MemoryStream[Int], MemoryStream[Int], DataFrame) = {
+
+    val (leftInput, df1) = setupStream("left", 2)
+    val (rightInput, df2) = setupStream("right", 3)
+    // Use different schemas to ensure the null row is being generated from the correct side.
+    val left = df1.select('key, window('leftTime, "10 second"), 'leftValue)
+    val right = df2.select('key, window('rightTime, "10 second"), 'rightValue.cast("string"))
+
+    val joined = left.join(
+      right,
+      left("key") === right("key")
+        && left("window") === right("window")
+        && 'leftValue > 4,
+      joinType)
+
+    val select = if (joinType == "left_semi") {
+      joined.select(left("key"), left("window.end").cast("long"), 'leftValue)
+    } else if (joinType == "left_outer") {
+      joined.select(left("key"), left("window.end").cast("long"), 'leftValue, 'rightValue)
+    } else if (joinType == "right_outer") {
+      joined.select(right("key"), right("window.end").cast("long"), 'leftValue, 'rightValue)
+    } else {
+      joined
+    }
+
+    (leftInput, rightInput, select)
+  }
+
+  protected def setupWindowedJoinWithRightCondition(joinType: String)
+    : (MemoryStream[Int], MemoryStream[Int], DataFrame) = {
+
+    val (leftInput, df1) = setupStream("left", 2)
+    val (rightInput, df2) = setupStream("right", 3)
+    // Use different schemas to ensure the null row is being generated from the correct side.
+    val left = df1.select('key, window('leftTime, "10 second"), 'leftValue)
+    val right = df2.select('key, window('rightTime, "10 second"), 'rightValue.cast("string"))
+
+    val joined = left.join(
+      right,
+      left("key") === right("key")
+        && left("window") === right("window")
+        && 'rightValue.cast("int") > 7,
+      joinType)
+
+    val select = if (joinType == "left_semi") {
+      joined.select(left("key"), left("window.end").cast("long"), 'leftValue)
+    } else if (joinType == "left_outer") {
+      joined.select(left("key"), left("window.end").cast("long"), 'leftValue, 'rightValue)
+    } else if (joinType == "right_outer") {
+      joined.select(right("key"), right("window.end").cast("long"), 'leftValue, 'rightValue)
+    } else {
+      joined
+    }
+
+    (leftInput, rightInput, select)
+  }
+
+  protected def setupWindowedJoinWithRangeCondition(joinType: String)
+    : (MemoryStream[(Int, Int)], MemoryStream[(Int, Int)], DataFrame) = {
+
+    val leftInput = MemoryStream[(Int, Int)]
+    val rightInput = MemoryStream[(Int, Int)]
+
+    val df1 = leftInput.toDF.toDF("leftKey", "time")
+      .select('leftKey, timestamp_seconds($"time") as "leftTime", ('leftKey * 2) as "leftValue")
+      .withWatermark("leftTime", "10 seconds")
+
+    val df2 = rightInput.toDF.toDF("rightKey", "time")
+      .select('rightKey, timestamp_seconds($"time") as "rightTime",
+        ('rightKey * 3) as "rightValue")
+      .withWatermark("rightTime", "10 seconds")
+
+    val joined =
+      df1.join(
+        df2,
+        expr("leftKey = rightKey AND " +
+          "leftTime BETWEEN rightTime - interval 5 seconds AND rightTime + interval 5 seconds"),
+        joinType)
+
+    val select = if (joinType == "left_semi") {
+      joined.select('leftKey, 'leftTime.cast("int"))
+    } else {
+      joined.select('leftKey, 'rightKey, 'leftTime.cast("int"), 'rightTime.cast("int"))
+    }
+
+    (leftInput, rightInput, select)
+  }
+
+  protected def setupWindowedSelfJoin(joinType: String)
+    : (MemoryStream[(Int, Long)], DataFrame) = {
+
+    val inputStream = MemoryStream[(Int, Long)]
+
+    val df = inputStream.toDS()
+      .select(col("_1").as("value"), timestamp_seconds($"_2").as("timestamp"))
+
+    val leftStream = df.select(col("value").as("leftId"), col("timestamp").as("leftTime"))
+
+    val rightStream = df
+      // Introduce misses for ease of debugging
+      .where(col("value") % 2 === 0)
+      .select(col("value").as("rightId"), col("timestamp").as("rightTime"))
+
+    val joined = leftStream
+      .withWatermark("leftTime", "5 seconds")
+      .join(
+        rightStream.withWatermark("rightTime", "5 seconds"),
+        expr("leftId = rightId AND rightTime >= leftTime AND " +
+          "rightTime <= leftTime + interval 5 seconds"),
+        joinType)
+
+    val select = if (joinType == "left_semi") {
+      joined.select(col("leftId"), col("leftTime").cast("int"))
+    } else {
+      joined.select(col("leftId"), col("leftTime").cast("int"),
+        col("rightId"), col("rightTime").cast("int"))
+    }
+
+    (inputStream, select)
+  }
+}
+
+class StreamingInnerJoinSuite extends StreamingJoinSuite {
+
   import testImplicits._
   test("stream stream inner join on non-time column") {
     val input1 = MemoryStream[Int]
@@ -486,58 +642,13 @@ class StreamingInnerJoinSuite extends StreamTest with StateStoreMetricsTest with
 }
 
 
-class StreamingOuterJoinSuite extends StreamTest with StateStoreMetricsTest with BeforeAndAfter {
+class StreamingOuterJoinSuite extends StreamingJoinSuite {
 
   import testImplicits._
   import org.apache.spark.sql.functions._
 
-  before {
-    SparkSession.setActiveSessionInternal(spark) // set this before force initializing 'joinExec'
-    spark.streams.stateStoreCoordinator // initialize the lazy coordinator
-  }
-
-  after {
-    StateStore.stop()
-  }
-
-  private def setupStream(prefix: String, multiplier: Int): (MemoryStream[Int], DataFrame) = {
-    val input = MemoryStream[Int]
-    val df = input.toDF
-      .select(
-        'value as "key",
-        timestamp_seconds($"value")  as s"${prefix}Time",
-        ('value * multiplier) as s"${prefix}Value")
-      .withWatermark(s"${prefix}Time", "10 seconds")
-
-    return (input, df)
-  }
-
-  private def setupWindowedJoin(joinType: String):
-  (MemoryStream[Int], MemoryStream[Int], DataFrame) = {
-    val (input1, df1) = setupStream("left", 2)
-    val (input2, df2) = setupStream("right", 3)
-    val windowed1 = df1.select('key, window('leftTime, "10 second"), 'leftValue)
-    val windowed2 = df2.select('key, window('rightTime, "10 second"), 'rightValue)
-    val joined = windowed1.join(windowed2, Seq("key", "window"), joinType)
-      .select('key, $"window.end".cast("long"), 'leftValue, 'rightValue)
-
-    (input1, input2, joined)
-  }
-
   test("left outer early state exclusion on left") {
-    val (leftInput, df1) = setupStream("left", 2)
-    val (rightInput, df2) = setupStream("right", 3)
-    // Use different schemas to ensure the null row is being generated from the correct side.
-    val left = df1.select('key, window('leftTime, "10 second"), 'leftValue)
-    val right = df2.select('key, window('rightTime, "10 second"), 'rightValue.cast("string"))
-
-    val joined = left.join(
-        right,
-        left("key") === right("key")
-          && left("window") === right("window")
-          && 'leftValue > 4,
-        "left_outer")
-        .select(left("key"), left("window.end").cast("long"), 'leftValue, 'rightValue)
+    val (leftInput, rightInput, joined) = setupWindowedJoinWithLeftCondition("left_outer")
 
     testStream(joined)(
       MultiAddData(leftInput, 1, 2, 3)(rightInput, 3, 4, 5),
@@ -554,19 +665,7 @@ class StreamingOuterJoinSuite extends StreamTest with StateStoreMetricsTest with
   }
 
   test("left outer early state exclusion on right") {
-    val (leftInput, df1) = setupStream("left", 2)
-    val (rightInput, df2) = setupStream("right", 3)
-    // Use different schemas to ensure the null row is being generated from the correct side.
-    val left = df1.select('key, window('leftTime, "10 second"), 'leftValue)
-    val right = df2.select('key, window('rightTime, "10 second"), 'rightValue.cast("string"))
-
-    val joined = left.join(
-      right,
-      left("key") === right("key")
-        && left("window") === right("window")
-        && 'rightValue.cast("int") > 7,
-      "left_outer")
-      .select(left("key"), left("window.end").cast("long"), 'leftValue, 'rightValue)
+    val (leftInput, rightInput, joined) = setupWindowedJoinWithRightCondition("left_outer")
 
     testStream(joined)(
       MultiAddData(leftInput, 3, 4, 5)(rightInput, 1, 2, 3),
@@ -583,19 +682,7 @@ class StreamingOuterJoinSuite extends StreamTest with StateStoreMetricsTest with
   }
 
   test("right outer early state exclusion on left") {
-    val (leftInput, df1) = setupStream("left", 2)
-    val (rightInput, df2) = setupStream("right", 3)
-    // Use different schemas to ensure the null row is being generated from the correct side.
-    val left = df1.select('key, window('leftTime, "10 second"), 'leftValue)
-    val right = df2.select('key, window('rightTime, "10 second"), 'rightValue.cast("string"))
-
-    val joined = left.join(
-      right,
-      left("key") === right("key")
-        && left("window") === right("window")
-        && 'leftValue > 4,
-      "right_outer")
-      .select(right("key"), right("window.end").cast("long"), 'leftValue, 'rightValue)
+    val (leftInput, rightInput, joined) = setupWindowedJoinWithLeftCondition("right_outer")
 
     testStream(joined)(
       MultiAddData(leftInput, 1, 2, 3)(rightInput, 3, 4, 5),
@@ -612,19 +699,7 @@ class StreamingOuterJoinSuite extends StreamTest with StateStoreMetricsTest with
   }
 
   test("right outer early state exclusion on right") {
-    val (leftInput, df1) = setupStream("left", 2)
-    val (rightInput, df2) = setupStream("right", 3)
-    // Use different schemas to ensure the null row is being generated from the correct side.
-    val left = df1.select('key, window('leftTime, "10 second"), 'leftValue)
-    val right = df2.select('key, window('rightTime, "10 second"), 'rightValue.cast("string"))
-
-    val joined = left.join(
-      right,
-      left("key") === right("key")
-        && left("window") === right("window")
-        && 'rightValue.cast("int") > 7,
-      "right_outer")
-      .select(right("key"), right("window.end").cast("long"), 'leftValue, 'rightValue)
+    val (leftInput, rightInput, joined) = setupWindowedJoinWithRightCondition("right_outer")
 
     testStream(joined)(
       MultiAddData(leftInput, 3, 4, 5)(rightInput, 1, 2, 3),
@@ -681,27 +756,8 @@ class StreamingOuterJoinSuite extends StreamTest with StateStoreMetricsTest with
     ("right_outer", Row(null, 2, null, 5))
   ).foreach { case (joinType: String, outerResult) =>
     test(s"${joinType.replaceAllLiterally("_", " ")} with watermark range condition") {
-      import org.apache.spark.sql.functions._
-
-      val leftInput = MemoryStream[(Int, Int)]
-      val rightInput = MemoryStream[(Int, Int)]
-
-      val df1 = leftInput.toDF.toDF("leftKey", "time")
-        .select('leftKey, timestamp_seconds($"time") as "leftTime", ('leftKey * 2) as "leftValue")
-        .withWatermark("leftTime", "10 seconds")
-
-      val df2 = rightInput.toDF.toDF("rightKey", "time")
-        .select('rightKey, timestamp_seconds($"time") as "rightTime",
-          ('rightKey * 3) as "rightValue")
-        .withWatermark("rightTime", "10 seconds")
-
-      val joined =
-        df1.join(
-          df2,
-          expr("leftKey = rightKey AND " +
-            "leftTime BETWEEN rightTime - interval 5 seconds AND rightTime + interval 5 seconds"),
-          joinType)
-          .select('leftKey, 'rightKey, 'leftTime.cast("int"), 'rightTime.cast("int"))
+      val (leftInput, rightInput, joined) = setupWindowedJoinWithRangeCondition(joinType)
+
       testStream(joined)(
         AddData(leftInput, (1, 5), (3, 5)),
         CheckAnswer(),
@@ -780,27 +836,7 @@ class StreamingOuterJoinSuite extends StreamTest with StateStoreMetricsTest with
   }
 
   test("SPARK-26187 self left outer join should not return outer nulls for already matched rows") {
-    val inputStream = MemoryStream[(Int, Long)]
-
-    val df = inputStream.toDS()
-      .select(col("_1").as("value"), timestamp_seconds($"_2").as("timestamp"))
-
-    val leftStream = df.select(col("value").as("leftId"), col("timestamp").as("leftTime"))
-
-    val rightStream = df
-      // Introduce misses for ease of debugging
-      .where(col("value") % 2 === 0)
-      .select(col("value").as("rightId"), col("timestamp").as("rightTime"))
-
-    val query = leftStream
-      .withWatermark("leftTime", "5 seconds")
-      .join(
-        rightStream.withWatermark("rightTime", "5 seconds"),
-        expr("leftId = rightId AND rightTime >= leftTime AND " +
-          "rightTime <= leftTime + interval 5 seconds"),
-        joinType = "leftOuter")
-      .select(col("leftId"), col("leftTime").cast("int"),
-        col("rightId"), col("rightTime").cast("int"))
+    val (inputStream, query) = setupWindowedSelfJoin("left_outer")
 
     testStream(query)(
       AddData(inputStream, (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L)),
@@ -938,7 +974,7 @@ class StreamingOuterJoinSuite extends StreamTest with StateStoreMetricsTest with
       throw writer.exception.get
     }
     assert(e.getMessage.toLowerCase(Locale.ROOT)
-      .contains("the query is using stream-stream outer join with state format version 1"))
+      .contains("the query is using stream-stream leftouter join with state format version 1"))
   }
 
   test("SPARK-29438: ensure UNION doesn't lead stream-stream join to use shifted partition IDs") {
@@ -1041,3 +1077,205 @@ class StreamingOuterJoinSuite extends StreamTest with StateStoreMetricsTest with
     )
   }
 }
+
+class StreamingLeftSemiJoinSuite extends StreamingJoinSuite {
+
+  import testImplicits._
+
+  test("windowed left semi join") {
+    val (leftInput, rightInput, joined) = setupWindowedJoin("left_semi")
+
+    testStream(joined)(
+      MultiAddData(leftInput, 1, 2, 3, 4, 5)(rightInput, 3, 4, 5, 6, 7),
+      CheckNewAnswer(Row(3, 10, 6), Row(4, 10, 8), Row(5, 10, 10)),
+      // states
+      // left: 1, 2, 3, 4 ,5
+      // right: 3, 4, 5, 6, 7
+      assertNumStateRows(total = 10, updated = 10),
+      MultiAddData(leftInput, 21)(rightInput, 22),
+      // Watermark = 11, should remove rows having window=[0,10].
+      CheckNewAnswer(),
+      // states
+      // left: 21
+      // right: 22
+      //
+      // states evicted
+      // left: 1, 2, 3, 4 ,5 (below watermark)
+      // right: 3, 4, 5, 6, 7 (below watermark)
+      assertNumStateRows(total = 2, updated = 2),
+      AddData(leftInput, 22),
+      CheckNewAnswer(Row(22, 30, 44)),
+      // Unlike inner/outer joins, given left input row matches with right input row,
+      // we don't buffer the matched left input row to the state store.
+      //
+      // states
+      // left: 21
+      // right: 22
+      assertNumStateRows(total = 2, updated = 0),
+      StopStream,
+      StartStream(),
+
+      AddData(leftInput, 1),
+      // Row not add as 1 < state key watermark = 12.
+      CheckNewAnswer(),
+      // states
+      // left: 21
+      // right: 22
+      assertNumStateRows(total = 2, updated = 0, droppedByWatermark = 1),
+      AddData(rightInput, 5),
+      // Row not add as 5 < state key watermark = 12.
+      CheckNewAnswer(),
+      // states
+      // left: 21
+      // right: 22
+      assertNumStateRows(total = 2, updated = 0, droppedByWatermark = 1)
+    )
+  }
+
+  test("left semi early state exclusion on left") {
+    val (leftInput, rightInput, joined) = setupWindowedJoinWithLeftCondition("left_semi")
+
+    testStream(joined)(
+      MultiAddData(leftInput, 1, 2, 3)(rightInput, 3, 4, 5),
+      // The left rows with leftValue <= 4 should not generate their semi join rows and
+      // not get added to the state.
+      CheckNewAnswer(Row(3, 10, 6)),
+      // states
+      // left: 3
+      // right: 3, 4, 5
+      assertNumStateRows(total = 4, updated = 4),
+      // We shouldn't get more semi join rows when the watermark advances.
+      MultiAddData(leftInput, 20)(rightInput, 21),
+      CheckNewAnswer(),
+      // states
+      // left: 20
+      // right: 21
+      //
+      // states evicted
+      // left: 3 (below watermark)
+      // right: 3, 4, 5 (below watermark)
+      assertNumStateRows(total = 2, updated = 2),
+      AddData(rightInput, 20),
+      CheckNewAnswer((20, 30, 40)),
+      // states
+      // left: 20
+      // right: 21, 20
+      assertNumStateRows(total = 3, updated = 1)
+    )
+  }
+
+  test("left semi early state exclusion on right") {
+    val (leftInput, rightInput, joined) = setupWindowedJoinWithRightCondition("left_semi")
+
+    testStream(joined)(
+      MultiAddData(leftInput, 3, 4, 5)(rightInput, 1, 2, 3),
+      // The right rows with rightValue <= 7 should never be added to the state.
+      // The right row with rightValue = 9 > 7, hence joined and added to state.
+      CheckNewAnswer(Row(3, 10, 6)),
+      // states
+      // left: 3, 4, 5
+      // right: 3
+      assertNumStateRows(total = 4, updated = 4),
+      // We shouldn't get more semi join rows when the watermark advances.
+      MultiAddData(leftInput, 20)(rightInput, 21),
+      CheckNewAnswer(),
+      // states
+      // left: 20
+      // right: 21
+      //
+      // states evicted
+      // left: 3, 4, 5 (below watermark)
+      // right: 3 (below watermark)
+      assertNumStateRows(total = 2, updated = 2),
+      AddData(rightInput, 20),
+      CheckNewAnswer((20, 30, 40)),
+      // states
+      // left: 20
+      // right: 21, 20
+      assertNumStateRows(total = 3, updated = 1)
+    )
+  }
+
+  test("left semi join with watermark range condition") {
+    val (leftInput, rightInput, joined) = setupWindowedJoinWithRangeCondition("left_semi")
+
+    testStream(joined)(
+      AddData(leftInput, (1, 5), (3, 5)),
+      CheckNewAnswer(),
+      // states
+      // left: (1, 5), (3, 5)
+      // right: nothing
+      assertNumStateRows(total = 2, updated = 2),
+      AddData(rightInput, (1, 10), (2, 5)),
+      // Match left row in the state.
+      CheckNewAnswer((1, 5)),
+      // states
+      // left: (1, 5), (3, 5)
+      // right: (1, 10), (2, 5)
+      assertNumStateRows(total = 4, updated = 2),
+      AddData(rightInput, (1, 9)),
+      // No match as left row is already matched.
+      CheckNewAnswer(),
+      // states
+      // left: (1, 5), (3, 5)
+      // right: (1, 10), (2, 5), (1, 9)
+      assertNumStateRows(total = 5, updated = 1),
+      // Increase event time watermark to 20s by adding data with time = 30s on both inputs.
+      AddData(leftInput, (1, 7), (1, 30)),
+      CheckNewAnswer((1, 7)),
+      // states
+      // left: (1, 5), (3, 5), (1, 30)
+      // right: (1, 10), (2, 5), (1, 9)
+      assertNumStateRows(total = 6, updated = 1),
+      // Watermark = 30 - 10 = 20, no matched row.
+      AddData(rightInput, (0, 30)),
+      CheckNewAnswer(),
+      // states
+      // left: (1, 30)
+      // right: (0, 30)
+      //
+      // states evicted
+      // left: (1, 5), (3, 5) (below watermark = 20)
+      // right: (1, 10), (2, 5), (1, 9) (below watermark = 20)
+      assertNumStateRows(total = 2, updated = 1)
+    )
+  }
+
+  test("self left semi join") {
+    val (inputStream, query) = setupWindowedSelfJoin("left_semi")
+
+    testStream(query)(
+      AddData(inputStream, (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L)),
+      CheckNewAnswer((2, 2), (4, 4)),
+      // batch 1 - global watermark = 0
+      // states
+      // left: (2, 2L), (4, 4L)
+      //       (left rows with value % 2 != 0 is filtered per [[PushPredicateThroughJoin]])
+      // right: (2, 2L), (4, 4L)
+      //       (right rows with value % 2 != 0 is filtered per [[PushPredicateThroughJoin]])
+      assertNumStateRows(total = 4, updated = 4),
+      AddData(inputStream, (6, 6L), (7, 7L), (8, 8L), (9, 9L), (10, 10L)),
+      CheckNewAnswer((6, 6), (8, 8), (10, 10)),
+      // batch 2 - global watermark = 5
+      // states
+      // left: (2, 2L), (4, 4L), (6, 6L), (8, 8L), (10, 10L)
+      // right: (6, 6L), (8, 8L), (10, 10L)
+      //
+      // states evicted
+      // left: nothing (it waits for 5 seconds more than watermark due to join condition)
+      // right: (2, 2L), (4, 4L)
+      assertNumStateRows(total = 8, updated = 6),
+      AddData(inputStream, (11, 11L), (12, 12L), (13, 13L), (14, 14L), (15, 15L)),
+      CheckNewAnswer((12, 12), (14, 14)),
+      // batch 3 - global watermark = 9
+      // states
+      // left: (4, 4L), (6, 6L), (8, 8L), (10, 10L), (12, 12L), (14, 14L)
+      // right: (10, 10L), (12, 12L), (14, 14L)
+      //
+      // states evicted
+      // left: (2, 2L)
+      // right: (6, 6L), (8, 8L)
+      assertNumStateRows(total = 9, updated = 4)
+    )
+  }
+}

From a21945ce6c725896d19647891d1f9fa9ef74bd87 Mon Sep 17 00:00:00 2001
From: Yuning Zhang <yuning.zhang@databricks.com>
Date: Mon, 26 Oct 2020 16:19:06 +0900
Subject: [PATCH 0323/1009] [SPARK-33197][SQL] Make changes to
 spark.sql.analyzer.maxIterations take effect at runtime

### What changes were proposed in this pull request?

Make changes to `spark.sql.analyzer.maxIterations` take effect at runtime.

### Why are the changes needed?

`spark.sql.analyzer.maxIterations` is not a static conf. However, before this patch, changing `spark.sql.analyzer.maxIterations` at runtime does not take effect.

### Does this PR introduce _any_ user-facing change?

Yes. Before this patch, changing `spark.sql.analyzer.maxIterations` at runtime does not take effect.

### How was this patch tested?

modified unit test

Closes #30108 from yuningzh-db/dynamic-analyzer-max-iterations.

Authored-by: Yuning Zhang <yuning.zhang@databricks.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 16 +++-----
 .../sql/catalyst/analysis/AnalysisSuite.scala | 41 +++++++++++++++++++
 2 files changed, 46 insertions(+), 11 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 4264627e0d9bd..457c41c39a196 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -132,8 +132,7 @@ object AnalysisContext {
  */
 class Analyzer(
     override val catalogManager: CatalogManager,
-    conf: SQLConf,
-    maxIterations: Int)
+    conf: SQLConf)
   extends RuleExecutor[LogicalPlan] with CheckAnalysis with LookupCatalog {
 
   private val v1SessionCatalog: SessionCatalog = catalogManager.v1SessionCatalog
@@ -148,12 +147,7 @@ class Analyzer(
   def this(catalog: SessionCatalog, conf: SQLConf) = {
     this(
       new CatalogManager(conf, FakeV2SessionCatalog, catalog),
-      conf,
-      conf.analyzerMaxIterations)
-  }
-
-  def this(catalogManager: CatalogManager, conf: SQLConf) = {
-    this(catalogManager, conf, conf.analyzerMaxIterations)
+      conf)
   }
 
   def executeAndCheck(plan: LogicalPlan, tracker: QueryPlanningTracker): LogicalPlan = {
@@ -188,9 +182,9 @@ class Analyzer(
    * If the plan cannot be resolved within maxIterations, analyzer will throw exception to inform
    * user to increase the value of SQLConf.ANALYZER_MAX_ITERATIONS.
    */
-  protected val fixedPoint =
+  protected def fixedPoint =
     FixedPoint(
-      maxIterations,
+      conf.analyzerMaxIterations,
       errorOnExceed = true,
       maxIterationsSetting = SQLConf.ANALYZER_MAX_ITERATIONS.key)
 
@@ -206,7 +200,7 @@ class Analyzer(
    */
   val postHocResolutionRules: Seq[Rule[LogicalPlan]] = Nil
 
-  lazy val batches: Seq[Batch] = Seq(
+  override def batches: Seq[Batch] = Seq(
     Batch("Substitution", fixedPoint,
       // This rule optimizes `UpdateFields` expression chains so looks more like optimization rule.
       // However, when manipulating deeply nested schema, `UpdateFields` expression tree could be
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 3a5c4b9769685..4f51b77d8ece0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -926,4 +926,45 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     )
     assertAnalysisSuccess(plan)
   }
+
+  test("SPARK-33197: Make sure changes to ANALYZER_MAX_ITERATIONS take effect at runtime") {
+    // RuleExecutor only throw exception or log warning when the rule is supposed to run
+    // more than once.
+    val maxIterations = 2
+    val maxIterationsEnough = 5
+    withSQLConf(SQLConf.ANALYZER_MAX_ITERATIONS.key -> maxIterations.toString) {
+      val conf = SQLConf.get
+      val testAnalyzer = new Analyzer(
+        new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin, conf), conf)
+
+      val plan = testRelation2.select(
+        $"a" / Literal(2) as "div1",
+        $"a" / $"b" as "div2",
+        $"a" / $"c" as "div3",
+        $"a" / $"d" as "div4",
+        $"e" / $"e" as "div5")
+
+      val message1 = intercept[TreeNodeException[LogicalPlan]] {
+        testAnalyzer.execute(plan)
+      }.getMessage
+      assert(message1.startsWith(s"Max iterations ($maxIterations) reached for batch Resolution, " +
+        s"please set '${SQLConf.ANALYZER_MAX_ITERATIONS.key}' to a larger value."))
+
+      withSQLConf(SQLConf.ANALYZER_MAX_ITERATIONS.key -> maxIterationsEnough.toString) {
+        try {
+          testAnalyzer.execute(plan)
+        } catch {
+          case ex: TreeNodeException[_]
+            if ex.getMessage.contains(SQLConf.ANALYZER_MAX_ITERATIONS.key) =>
+              fail("analyzer.execute should not reach max iterations.")
+        }
+      }
+
+      val message2 = intercept[TreeNodeException[LogicalPlan]] {
+        testAnalyzer.execute(plan)
+      }.getMessage
+      assert(message2.startsWith(s"Max iterations ($maxIterations) reached for batch Resolution, " +
+        s"please set '${SQLConf.ANALYZER_MAX_ITERATIONS.key}' to a larger value."))
+    }
+  }
 }

From 850adeb0fd188cc3cb6319758d58a12554cb6149 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Mon, 26 Oct 2020 01:50:23 -0700
Subject: [PATCH 0324/1009] [SPARK-33239][INFRA] Use pre-built image at GitHub
 Action SparkR job

### What changes were proposed in this pull request?

This PR aims to use a pre-built image for Github Action SparkR job.

### Why are the changes needed?

This will reduce the execution time and the flakiness.

**BEFORE (21 minutes 39 seconds)**
![Screen Shot 2020-10-16 at 1 24 43 PM](https://user-images.githubusercontent.com/9700541/96305593-fbeada80-0fb2-11eb-9b8e-86d8abaad9ef.png)

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the GitHub Action `sparkr` job in this PR.

Closes #30066 from dongjoon-hyun/SPARKR.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .github/workflows/build_and_test.yml | 77 ++++++++++++++++++++++------
 1 file changed, 61 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 27607a799d038..5b06485b9959e 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -42,8 +42,6 @@ jobs:
             streaming, sql-kafka-0-10, streaming-kafka-0-10,
             mllib-local, mllib,
             yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl
-          - >-
-            sparkr
         # Here, we split Hive and SQL tests into some of slow ones and the rest of them.
         included-tags: [""]
         excluded-tags: [""]
@@ -138,20 +136,6 @@ jobs:
       run: |
         python3.8 -m pip install numpy 'pyarrow<3.0.0' pandas scipy xmlrunner
         python3.8 -m pip list
-    # SparkR
-    - name: Install R 4.0
-      uses: r-lib/actions/setup-r@v1
-      if: contains(matrix.modules, 'sparkr')
-      with:
-        r-version: 4.0
-    - name: Install R packages
-      if: contains(matrix.modules, 'sparkr')
-      run: |
-        # qpdf is required to reduce the size of PDFs to make CRAN check pass. See SPARK-32497.
-        sudo apt-get install -y libcurl4-openssl-dev qpdf
-        sudo Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow', 'roxygen2'), repos='https://cloud.r-project.org/')"
-        # Show installed packages in R.
-        sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]'
     # Run the tests.
     - name: Run tests
       run: |
@@ -260,6 +244,67 @@ jobs:
         name: unit-tests-log-${{ matrix.modules }}--1.8-hadoop3.2-hive2.3
         path: "**/target/unit-tests.log"
 
+  sparkr:
+    name: Build modules - sparkr
+    runs-on: ubuntu-20.04
+    container:
+      image: dongjoon/apache-spark-github-action-image:20201025
+    env:
+      HADOOP_PROFILE: hadoop3.2
+      HIVE_PROFILE: hive2.3
+      GITHUB_PREV_SHA: ${{ github.event.before }}
+      GITHUB_INPUT_BRANCH: ${{ github.event.inputs.target }}
+    steps:
+    - name: Checkout Spark repository
+      uses: actions/checkout@v2
+      # In order to fetch changed files
+      with:
+        fetch-depth: 0
+    - name: Merge dispatched input branch
+      if: ${{ github.event.inputs.target != '' }}
+      run: git merge --progress --ff-only origin/${{ github.event.inputs.target }}
+    # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
+    - name: Cache Scala, SBT, Maven and Zinc
+      uses: actions/cache@v2
+      with:
+        path: |
+          build/apache-maven-*
+          build/zinc-*
+          build/scala-*
+          build/*.jar
+        key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
+        restore-keys: |
+          build-
+    - name: Cache Maven local repository
+      uses: actions/cache@v2
+      with:
+        path: ~/.m2/repository
+        key: sparkr-maven-${{ hashFiles('**/pom.xml') }}
+        restore-keys: |
+          sparkr-maven-
+    - name: Cache Ivy local repository
+      uses: actions/cache@v2
+      with:
+        path: ~/.ivy2/cache
+        key: sparkr-ivy-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
+        restore-keys: |
+          sparkr-ivy-
+    - name: Run tests
+      run: |
+        mkdir -p ~/.m2
+        # The followings are also used by `r-lib/actions/setup-r` to avoid
+        # R issues at docker environment
+        export TZ=UTC
+        export _R_CHECK_SYSTEM_CLOCK_=FALSE
+        ./dev/run-tests --parallelism 2 --modules sparkr
+        rm -rf ~/.m2/repository/org/apache/spark
+    - name: Upload test results to report
+      if: always()
+      uses: actions/upload-artifact@v2
+      with:
+        name: test-results-sparkr--1.8-hadoop3.2-hive2.3
+        path: "**/target/test-reports/*.xml"
+
   # Static analysis, and documentation build
   lint:
     name: Linters, licenses, dependencies and documentation generation

From 1042d49bf9d7bb5162215e981e2f8e98164b2aff Mon Sep 17 00:00:00 2001
From: Cheng Su <chengsu@fb.com>
Date: Mon, 26 Oct 2020 20:23:24 +0900
Subject: [PATCH 0325/1009] [SPARK-33075][SQL] Enable auto bucketed scan by
 default (disable only for cached query)

### What changes were proposed in this pull request?

This PR is to enable auto bucketed table scan by default, with exception to only disable for cached query (similar to AQE). The reason why disabling auto scan for cached query is that, the cached query output partitioning can be leveraged later to avoid shuffle and sort when doing join and aggregate.

### Why are the changes needed?

Enable auto bucketed table scan by default is useful as it can optimize query automatically under the hood, without users interaction.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Added unit test for cached query in `DisableUnnecessaryBucketedScanSuite.scala`. Also change a bunch of unit tests which should disable auto bucketed scan to make them work.

Closes #30138 from c21/enable-auto-bucket.

Authored-by: Cheng Su <chengsu@fb.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../apache/spark/sql/internal/SQLConf.scala   |  2 +-
 .../org/apache/spark/sql/SparkSession.scala   | 21 +++++++++++-
 .../spark/sql/execution/CacheManager.scala    | 29 +++++++++++-----
 .../adaptive/AdaptiveSparkPlanHelper.scala    | 16 ---------
 .../datasources/FileSourceStrategySuite.scala | 28 ++++++++-------
 .../execution/joins/BroadcastJoinSuite.scala  | 34 ++++++++++---------
 .../spark/sql/sources/BucketedReadSuite.scala | 34 ++++++++++---------
 .../DisableUnnecessaryBucketedScanSuite.scala | 23 +++++++++++++
 8 files changed, 116 insertions(+), 71 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 35ef24c1c3ba6..3024398399962 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -971,7 +971,7 @@ object SQLConf {
         "false, this configuration does not take any effect.")
       .version("3.1.0")
       .booleanConf
-      .createWithDefault(false)
+      .createWithDefault(true)
 
   val CROSS_JOINS_ENABLED = buildConf("spark.sql.crossJoin.enabled")
     .internal()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index b15d6f981291c..b33557dbfdb27 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -29,7 +29,7 @@ import org.apache.spark.{SPARK_VERSION, SparkConf, SparkContext, TaskContext}
 import org.apache.spark.annotation.{DeveloperApi, Experimental, Stable, Unstable}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config.EXECUTOR_ALLOW_SPARK_CONTEXT
+import org.apache.spark.internal.config.{ConfigEntry, EXECUTOR_ALLOW_SPARK_CONTEXT}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd}
 import org.apache.spark.sql.catalog.Catalog
@@ -1077,6 +1077,25 @@ object SparkSession extends Logging {
       throw new IllegalStateException("No active or default Spark session found")))
   }
 
+  /**
+   * Returns a cloned SparkSession with all specified configurations disabled, or
+   * the original SparkSession if all configurations are already disabled.
+   */
+  private[sql] def getOrCloneSessionWithConfigsOff(
+      session: SparkSession,
+      configurations: Seq[ConfigEntry[Boolean]]): SparkSession = {
+    val configsEnabled = configurations.filter(session.sessionState.conf.getConf(_))
+    if (configsEnabled.isEmpty) {
+      session
+    } else {
+      val newSession = session.cloneSession()
+      configsEnabled.foreach(conf => {
+        newSession.sessionState.conf.setConf(conf, false)
+      })
+      newSession
+    }
+  }
+
   ////////////////////////////////////////////////////////////////////////////////////////
   // Private methods from now on
   ////////////////////////////////////////////////////////////////////////////////////////
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index 7201026b11b6b..5f72d6005a8dd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -22,6 +22,7 @@ import scala.collection.immutable.IndexedSeq
 import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.ConfigEntry
 import org.apache.spark.sql.{Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.expressions.{Attribute, SubqueryExpression}
 import org.apache.spark.sql.catalyst.optimizer.EliminateResolvedHint
@@ -31,6 +32,7 @@ import org.apache.spark.sql.execution.columnar.{DefaultCachedBatchSerializer, In
 import org.apache.spark.sql.execution.command.CommandUtils
 import org.apache.spark.sql.execution.datasources.{FileIndex, HadoopFsRelation, LogicalRelation}
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileTable}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.storage.StorageLevel.MEMORY_AND_DISK
 
@@ -55,6 +57,17 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
   @transient @volatile
   private var cachedData = IndexedSeq[CachedData]()
 
+  /**
+   * Configurations needs to be turned off, to avoid regression for cached query, so that the
+   * outputPartitioning of the underlying cached query plan can be leveraged later.
+   * Configurations include:
+   * 1. AQE
+   * 2. Automatic bucketed table scan
+   */
+  private val forceDisableConfigs: Seq[ConfigEntry[Boolean]] = Seq(
+    SQLConf.ADAPTIVE_EXECUTION_ENABLED,
+    SQLConf.AUTO_BUCKETED_SCAN_ENABLED)
+
   /** Clears all cached tables. */
   def clearCache(): Unit = this.synchronized {
     cachedData.foreach(_.cachedRepresentation.cacheBuilder.clearCache())
@@ -79,10 +92,10 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
     if (lookupCachedData(planToCache).nonEmpty) {
       logWarning("Asked to cache already cached data.")
     } else {
-      // Turn off AQE so that the outputPartitioning of the underlying plan can be leveraged.
-      val sessionWithAqeOff = getOrCloneSessionWithAqeOff(query.sparkSession)
-      val inMemoryRelation = sessionWithAqeOff.withActive {
-        val qe = sessionWithAqeOff.sessionState.executePlan(planToCache)
+      val sessionWithConfigsOff = SparkSession.getOrCloneSessionWithConfigsOff(
+        query.sparkSession, forceDisableConfigs)
+      val inMemoryRelation = sessionWithConfigsOff.withActive {
+        val qe = sessionWithConfigsOff.sessionState.executePlan(planToCache)
         InMemoryRelation(
           storageLevel,
           qe,
@@ -188,10 +201,10 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
     }
     needToRecache.map { cd =>
       cd.cachedRepresentation.cacheBuilder.clearCache()
-      // Turn off AQE so that the outputPartitioning of the underlying plan can be leveraged.
-      val sessionWithAqeOff = getOrCloneSessionWithAqeOff(spark)
-      val newCache = sessionWithAqeOff.withActive {
-        val qe = sessionWithAqeOff.sessionState.executePlan(cd.plan)
+      val sessionWithConfigsOff = SparkSession.getOrCloneSessionWithConfigsOff(
+        spark, forceDisableConfigs)
+      val newCache = sessionWithConfigsOff.withActive {
+        val qe = sessionWithConfigsOff.sessionState.executePlan(cd.plan)
         InMemoryRelation(cd.cachedRepresentation.cacheBuilder, qe)
       }
       val recomputedPlan = cd.copy(cachedRepresentation = newCache)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanHelper.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanHelper.scala
index 8d7a2c95081c4..6ba375910a4eb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanHelper.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanHelper.scala
@@ -17,9 +17,7 @@
 
 package org.apache.spark.sql.execution.adaptive
 
-import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.internal.SQLConf
 
 /**
  * This class provides utility methods related to tree traversal of an [[AdaptiveSparkPlanExec]]
@@ -137,18 +135,4 @@ trait AdaptiveSparkPlanHelper {
     case a: AdaptiveSparkPlanExec => a.executedPlan
     case other => other
   }
-
-  /**
-   * Returns a cloned [[SparkSession]] with adaptive execution disabled, or the original
-   * [[SparkSession]] if its adaptive execution is already disabled.
-   */
-  def getOrCloneSessionWithAqeOff[T](session: SparkSession): SparkSession = {
-    if (!session.sessionState.conf.adaptiveExecutionEnabled) {
-      session
-    } else {
-      val newSession = session.cloneSession()
-      newSession.sessionState.conf.setConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED, false)
-      newSession
-    }
-  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
index dfd9ba03f5be0..50f32126e5dec 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
@@ -262,20 +262,22 @@ class FileSourceStrategySuite extends QueryTest with SharedSparkSession with Pre
           "p1=2/file7_0000" -> 1),
         buckets = 3)
 
-    // No partition pruning
-    checkScan(table) { partitions =>
-      assert(partitions.size == 3)
-      assert(partitions(0).files.size == 5)
-      assert(partitions(1).files.size == 0)
-      assert(partitions(2).files.size == 2)
-    }
+    withSQLConf(SQLConf.AUTO_BUCKETED_SCAN_ENABLED.key -> "false") {
+      // No partition pruning
+      checkScan(table) { partitions =>
+        assert(partitions.size == 3)
+        assert(partitions(0).files.size == 5)
+        assert(partitions(1).files.size == 0)
+        assert(partitions(2).files.size == 2)
+      }
 
-    // With partition pruning
-    checkScan(table.where("p1=2")) { partitions =>
-      assert(partitions.size == 3)
-      assert(partitions(0).files.size == 3)
-      assert(partitions(1).files.size == 0)
-      assert(partitions(2).files.size == 1)
+      // With partition pruning
+      checkScan(table.where("p1=2")) { partitions =>
+        assert(partitions.size == 3)
+        assert(partitions(0).files.size == 3)
+        assert(partitions(1).files.size == 0)
+        assert(partitions(2).files.size == 1)
+      }
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
index 7ff945f5cbfb4..b6d1baf6e7902 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
@@ -432,22 +432,24 @@ abstract class BroadcastJoinSuiteBase extends QueryTest with SQLTestUtils
         // join1 is a broadcast join where df2 is broadcasted. Note that output partitioning on the
         // streamed side (t1) is HashPartitioning (bucketed files).
         val join1 = t1.join(df2, t1("i1") === df2("i2") && t1("j1") === df2("j2"))
-        val plan1 = join1.queryExecution.executedPlan
-        assert(collect(plan1) { case e: ShuffleExchangeExec => e }.isEmpty)
-        val broadcastJoins = collect(plan1) { case b: BroadcastHashJoinExec => b }
-        assert(broadcastJoins.size == 1)
-        assert(broadcastJoins(0).outputPartitioning.isInstanceOf[PartitioningCollection])
-        val p = broadcastJoins(0).outputPartitioning.asInstanceOf[PartitioningCollection]
-        assert(p.partitionings.size == 4)
-        // Verify all the combinations of output partitioning.
-        Seq(Seq(t1("i1"), t1("j1")),
-          Seq(t1("i1"), df2("j2")),
-          Seq(df2("i2"), t1("j1")),
-          Seq(df2("i2"), df2("j2"))).foreach { expected =>
-          val expectedExpressions = expected.map(_.expr)
-          assert(p.partitionings.exists {
-            case h: HashPartitioning => expressionsEqual(h.expressions, expectedExpressions)
-          })
+        withSQLConf(SQLConf.AUTO_BUCKETED_SCAN_ENABLED.key -> "false") {
+          val plan1 = join1.queryExecution.executedPlan
+          assert(collect(plan1) { case e: ShuffleExchangeExec => e }.isEmpty)
+          val broadcastJoins = collect(plan1) { case b: BroadcastHashJoinExec => b }
+          assert(broadcastJoins.size == 1)
+          assert(broadcastJoins(0).outputPartitioning.isInstanceOf[PartitioningCollection])
+          val p = broadcastJoins(0).outputPartitioning.asInstanceOf[PartitioningCollection]
+          assert(p.partitionings.size == 4)
+          // Verify all the combinations of output partitioning.
+          Seq(Seq(t1("i1"), t1("j1")),
+            Seq(t1("i1"), df2("j2")),
+            Seq(df2("i2"), t1("j1")),
+            Seq(df2("i2"), df2("j2"))).foreach { expected =>
+            val expectedExpressions = expected.map(_.expr)
+            assert(p.partitionings.exists {
+              case h: HashPartitioning => expressionsEqual(h.expressions, expectedExpressions)
+            })
+          }
         }
 
         // Join on the column from the broadcasted side (i2, j2) and make sure output partitioning
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
index f8276b143c1e6..a188e4d9d6d90 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
@@ -81,22 +81,24 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils {
         .bucketBy(8, "j", "k")
         .saveAsTable("bucketed_table")
 
-      val bucketValue = Random.nextInt(maxI)
-      val table = spark.table("bucketed_table").filter($"i" === bucketValue)
-      val query = table.queryExecution
-      val output = query.analyzed.output
-      val rdd = query.toRdd
-
-      assert(rdd.partitions.length == 8)
-
-      val attrs = table.select("j", "k").queryExecution.analyzed.output
-      val checkBucketId = rdd.mapPartitionsWithIndex((index, rows) => {
-        val getBucketId = UnsafeProjection.create(
-          HashPartitioning(attrs, 8).partitionIdExpression :: Nil,
-          output)
-        rows.map(row => getBucketId(row).getInt(0) -> index)
-      })
-      checkBucketId.collect().foreach(r => assert(r._1 == r._2))
+      withSQLConf(SQLConf.AUTO_BUCKETED_SCAN_ENABLED.key -> "false") {
+        val bucketValue = Random.nextInt(maxI)
+        val table = spark.table("bucketed_table").filter($"i" === bucketValue)
+        val query = table.queryExecution
+        val output = query.analyzed.output
+        val rdd = query.toRdd
+
+        assert(rdd.partitions.length == 8)
+
+        val attrs = table.select("j", "k").queryExecution.analyzed.output
+        val checkBucketId = rdd.mapPartitionsWithIndex((index, rows) => {
+          val getBucketId = UnsafeProjection.create(
+            HashPartitioning(attrs, 8).partitionIdExpression :: Nil,
+            output)
+          rows.map(row => getBucketId(row).getInt(0) -> index)
+        })
+        checkBucketId.collect().foreach(r => assert(r._1 == r._2))
+      }
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala
index 1c258bc0dadb9..70b74aed40eca 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala
@@ -18,7 +18,10 @@
 package org.apache.spark.sql.sources
 
 import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.catalyst.expressions.AttributeReference
+import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
 import org.apache.spark.sql.execution.FileSourceScanExec
+import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
@@ -218,4 +221,24 @@ abstract class DisableUnnecessaryBucketedScanSuite extends QueryTest with SQLTes
       }
     }
   }
+
+  test("SPARK-33075: not disable bucketed table scan for cached query") {
+    withTable("t1") {
+      withSQLConf(SQLConf.AUTO_BUCKETED_SCAN_ENABLED.key -> "true") {
+        df1.write.format("parquet").bucketBy(8, "i").saveAsTable("t1")
+        spark.catalog.cacheTable("t1")
+        assertCached(spark.table("t1"))
+
+        // Verify cached bucketed table scan not disabled
+        val partitioning = spark.table("t1").queryExecution.executedPlan
+          .outputPartitioning
+        assert(partitioning match {
+          case HashPartitioning(Seq(column: AttributeReference), 8) if column.name == "i" => true
+          case _ => false
+        })
+        val aggregateQueryPlan = sql("SELECT SUM(i) FROM t1 GROUP BY i").queryExecution.executedPlan
+        assert(aggregateQueryPlan.find(_.isInstanceOf[ShuffleExchangeExec]).isEmpty)
+      }
+    }
+  }
 }

From 11bbb130df7b083f42acf0207531efe3912d89eb Mon Sep 17 00:00:00 2001
From: neko <echohlne@gmail.com>
Date: Mon, 26 Oct 2020 20:41:56 +0800
Subject: [PATCH 0326/1009] [SPARK-33204][UI] The 'Event Timeline' area cannot
 be opened when a spark application has some failed jobs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
The page returned by /jobs in Spark UI will  store the detail information of each job in javascript like this:
```javascript
{
  'className': 'executor added',
  'group': 'executors',
  'start': new Date(1602834008978),
  'content': '<div class="executor-event-content"' +
    'data-toggle="tooltip" data-placement="top"' +
    'data-title="Executor 3<br>' +
    'Added at 2020/10/16 15:40:08"' +
    'data-html="true">Executor 3 added</div>'
}
```
if an application has a failed job, the failure reason corresponding to the job will be stored in the ` content`  field in the javascript . if the failure  reason contains the character: **'**,   the  javascript code will throw an exception to cause the `event timeline url` had no response ， The following is an example of error json:
```javascript
{
  'className': 'executor removed',
  'group': 'executors',
  'start': new Date(1602925908654),
  'content': '<div class="executor-event-content"' +
    'data-toggle="tooltip" data-placement="top"' +
    'data-title="Executor 2<br>' +
    'Removed at 2020/10/17 17:11:48' +
    '<br>Reason: Container from a bad node: ...   20/10/17 16:00:42 WARN ShutdownHookManager: ShutdownHook **'$anon$2'** timeout..."' +
    'data-html="true">Executor 2 removed</div>'
}
```

So we need to considier this special case , if the returned job info contains the character:**'**, just remove it

### Why are the changes needed?

Ensure that the UI page can function normally

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

This pr only  fixes an exception in a special case, manual test result as blows:

![fixed](https://user-images.githubusercontent.com/52202080/96711638-74490580-13d0-11eb-93e0-b44d9ed5da5c.gif)

Closes #30119 from akiyamaneko/timeline_view_cannot_open.

Authored-by: neko <echohlne@gmail.com>
Signed-off-by: Gengliang Wang <gengliang.wang@databricks.com>
---
 core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala | 3 ++-
 core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala     | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
index 4e76ea289ede6..5f5a08fe0e574 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
@@ -147,7 +147,8 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
              |    'Removed at ${UIUtils.formatDate(removeTime)}' +
              |    '${
                       e.removeReason.map { reason =>
-                        s"""<br>Reason: ${reason.replace("\n", " ")}"""
+                        s"""<br>Reason: ${StringEscapeUtils.escapeEcmaScript(
+                          reason.replace("\n", " "))}"""
                       }.getOrElse("")
                    }"' +
              |    'data-html="true">Executor ${e.id} removed</div>'
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
index df239d6d0e187..19eccc5209b8e 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
@@ -127,7 +127,8 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP
              |    'Removed at ${UIUtils.formatDate(removeTime)}' +
              |    '${
                       e.removeReason.map { reason =>
-                        s"""<br>Reason: ${reason.replace("\n", " ")}"""
+                        s"""<br>Reason: ${StringEscapeUtils.escapeEcmaScript(
+                          reason.replace("\n", " "))}"""
                       }.getOrElse("")
                    }"' +
              |    'data-html="true">Executor ${e.id} removed</div>'

From 02fa19f102122f06e4358cf86c5e903fda28b289 Mon Sep 17 00:00:00 2001
From: Steve Loughran <stevel@cloudera.com>
Date: Mon, 26 Oct 2020 12:31:05 -0700
Subject: [PATCH 0327/1009] [SPARK-33230][SQL] Hadoop committers to get unique
 job ID in "spark.sql.sources.writeJobUUID"

### What changes were proposed in this pull request?

This reinstates the old option `spark.sql.sources.write.jobUUID` to set a unique jobId in the jobconf so that hadoop MR committers have a unique ID which is (a) consistent across tasks and workers and (b) not brittle compared to generated-timestamp job IDs. The latter matches that of what JobID requires, but as they are generated per-thread, may not always be unique within a cluster.

### Why are the changes needed?

If a committer (e.g s3a staging committer) uses job-attempt-ID as a unique ID then any two jobs started within the same second have the same ID, so can clash.

### Does this PR introduce _any_ user-facing change?

Good Q. It is "developer-facing" in the context of anyone writing a committer. But it reinstates a property which was in Spark 1.x and "went away"

### How was this patch tested?

Testing: no test here. You'd have to create a new committer which extracted the value in both job and task(s) and verified consistency. That is possible (with a task output whose records contained the UUID), but it would be pretty convoluted and a high maintenance cost.

Because it's trying to address a race condition, it's hard to regenerate the problem downstream and so verify a fix in a test run...I'll just look at the logs to see what temporary dir is being used in the cluster FS and verify it's a UUID

Closes #30141 from steveloughran/SPARK-33230-jobId.

Authored-by: Steve Loughran <stevel@cloudera.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/execution/datasources/FileFormatWriter.scala  | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
index abb88ae73cabf..a71aeb47872ce 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
@@ -133,7 +133,7 @@ object FileFormatWriter extends Logging {
       fileFormat.prepareWrite(sparkSession, job, caseInsensitiveOptions, dataSchema)
 
     val description = new WriteJobDescription(
-      uuid = UUID.randomUUID().toString,
+      uuid = UUID.randomUUID.toString,
       serializableHadoopConf = new SerializableConfiguration(job.getConfiguration),
       outputWriterFactory = outputWriterFactory,
       allColumns = outputSpec.outputColumns,
@@ -164,6 +164,10 @@ object FileFormatWriter extends Logging {
 
     SQLExecution.checkSQLExecutionId(sparkSession)
 
+    // propagate the decription UUID into the jobs, so that committers
+    // get an ID guaranteed to be unique.
+    job.getConfiguration.set("spark.sql.sources.writeJobUUID", description.uuid)
+
     // This call shouldn't be put into the `try` block below because it only initializes and
     // prepares the job, any exception thrown from here shouldn't cause abortJob() to be called.
     committer.setupJob(job)

From afa6aee4f5ea270db5331e48ad08e0b176cdd2a0 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Mon, 26 Oct 2020 15:29:12 -0700
Subject: [PATCH 0328/1009] [SPARK-33237][K8S][TESTS] Use default Hadoop-3.2
 profile from K8s IT Jenkins job

### What changes were proposed in this pull request?

This PR aims to use `hadoop-3.2` profile in K8s IT Jenkins jobs.
- [x] Switch the default value of `HADOOP_PROFILE` from `hadoop-2.7` to `hadoop-3.2`.
- [x] Remove `-Phadoop2.7` from Jenkins K8s IT job.
    - https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/configure

**BEFORE**
```
./dev/make-distribution.sh --name ${DATE}-${REVISION} --r --pip --tgz -DzincPort=${ZINC_PORT} \
     -Phadoop-2.7 -Pkubernetes -Pkinesis-asl -Phive -Phive-thriftserver
```

**AFTER**
```
./dev/make-distribution.sh --name ${DATE}-${REVISION} --r --pip --tgz -DzincPort=${ZINC_PORT} \
     -Pkubernetes -Pkinesis-asl -Phive -Phive-thriftserver
```

### Why are the changes needed?

Since Apache Spark 3.1.0, Hadoop 3 is the default.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Check the Jenkins K8s IT log and result.
- https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/34899/
```
+ /home/jenkins/workspace/SparkPullRequestBuilder-K8s/build/mvn clean package -DskipTests -DzincPort=4021 -Pkubernetes -Pkinesis-asl -Phive -Phive-thriftserver
Using `mvn` from path: /home/jenkins/tools/hudson.tasks.Maven_MavenInstallation/Maven_3.6.3/bin/mvn
[INFO] Scanning for projects...
[INFO] ------------------------------------------------------------------------
[INFO] Reactor Build Order:
[INFO]
```

Closes #30153 from dongjoon-hyun/SPARK-33237.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../integration-tests/dev/dev-run-integration-tests.sh          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh b/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh
index 9c03a97ef15d5..b72a4f74918ba 100755
--- a/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh
+++ b/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh
@@ -35,7 +35,7 @@ CONTEXT=
 INCLUDE_TAGS="k8s"
 EXCLUDE_TAGS=
 JAVA_VERSION="8"
-HADOOP_PROFILE="hadoop-2.7"
+HADOOP_PROFILE="hadoop-3.2"
 MVN="$TEST_ROOT_DIR/build/mvn"
 
 SCALA_VERSION=$("$MVN" help:evaluate -Dexpression=scala.binary.version 2>/dev/null\

From e43cd8ccef153ed504200c9f52966cb6a96e73bf Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Tue, 27 Oct 2020 09:25:53 +0900
Subject: [PATCH 0329/1009] [SPARK-32388][SQL] TRANSFORM with schema-less mode
 should keep the same with hive

### What changes were proposed in this pull request?
In current Spark script transformation with hive serde mode, in case of schema less, result is different with hive.
This pr to keep result same with hive script transform  serde.

#### Hive Scrip Transform with serde in schemaless
```
hive> create table t (c0 int, c1 int, c2 int);
hive> INSERT INTO t VALUES (1, 1, 1);
hive> INSERT INTO t VALUES (2, 2, 2);
hive> CREATE VIEW v AS SELECT TRANSFORM(c0, c1, c2) USING 'cat' FROM t;

hive> DESCRIBE v;
key                 	string
value               	string

hive> SELECT * FROM v;
1	1	1
2	2	2

hive> SELECT key FROM v;
1
2

hive> SELECT value FROM v;
1	1
2	2
```

#### Spark script transform with hive serde in schema less.
```
hive> create table t (c0 int, c1 int, c2 int);
hive> INSERT INTO t VALUES (1, 1, 1);
hive> INSERT INTO t VALUES (2, 2, 2);
hive> CREATE VIEW v AS SELECT TRANSFORM(c0, c1, c2) USING 'cat' FROM t;

hive> SELECT * FROM v;
1   1
2   2
```

**No serde mode in hive (ROW FORMATTED DELIMITED)**
![image](https://user-images.githubusercontent.com/46485123/90088770-55841e00-dd52-11ea-92dd-7fe52d93f0b3.png)

### Why are the changes needed?
Keep same behavior with hive script transform

### Does this PR introduce _any_ user-facing change?
Before this pr with hive serde script transform
```
select transform(*)
USING 'cat'
from (
select 1, 2, 3, 4
) tmp

key     value
1         2
```
After
```
select transform(*)
USING 'cat'
from (
select 1, 2, 3, 4
) tmp

key     value
1         2   3  4
```
### How was this patch tested?
UT

Closes #29421 from AngersZhuuuu/SPARK-32388.

Authored-by: angerszhu <angers.zhu@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../BaseScriptTransformationExec.scala        |  11 +-
 .../spark/sql/execution/SparkSqlParser.scala  |   4 +-
 .../BaseScriptTransformationSuite.scala       |  40 ++++-
 .../HiveScriptTransformationSuite.scala       | 159 ++++++++++++++++--
 4 files changed, 189 insertions(+), 25 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
index c5107645f46f8..74e5aa716ad67 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
@@ -111,15 +111,14 @@ trait BaseScriptTransformationExec extends UnaryExecNode {
               .zip(outputFieldWriters)
               .map { case (data, writer) => writer(data) })
       } else {
-        // In schema less mode, hive default serde will choose first two output column as output
-        // if output column size less then 2, it will throw ArrayIndexOutOfBoundsException.
-        // Here we change spark's behavior same as hive's default serde.
-        // But in hive, TRANSFORM with schema less behavior like origin spark, we will fix this
-        // to keep spark and hive behavior same in SPARK-32388
+        // In schema less mode, hive will choose first two output column as output.
+        // If output column size less then 2, it will return NULL for columns with missing values.
+        // Here we split row string and choose first 2 values, if values's size less then 2,
+        // we pad NULL value until 2 to make behavior same with hive.
         val kvWriter = CatalystTypeConverters.createToCatalystConverter(StringType)
         prevLine: String =>
           new GenericInternalRow(
-            prevLine.split(outputRowFormat).slice(0, 2)
+            prevLine.split(outputRowFormat).slice(0, 2).padTo(2, null)
               .map(kvWriter))
       }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 0a5f4c3ed4bcb..f46526d419158 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -785,7 +785,9 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
         // Use default (serde) format.
         val name = conf.getConfString("hive.script.serde",
           "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")
-        val props = Seq("field.delim" -> "\t")
+        val props = Seq(
+          "field.delim" -> "\t",
+          "serialization.last.column.takes.rest" -> "true")
         val recordHandler = Option(conf.getConfString(configKey, defaultConfigValue))
         (Nil, Option(name), props, recordHandler)
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
index c07ea0f12f94e..e6029400997a2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
@@ -137,10 +137,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
     }
   }
 
-  test("SPARK-25990: TRANSFORM should handle schema less correctly (no serde)") {
-    assume(TestUtils.testCommandAvailable("python"))
-    val scriptFilePath = copyAndGetResourceFile("test_script.py", ".py").getAbsoluteFile
-
+  test("SPARK-32388: TRANSFORM should handle schema less correctly (no serde)") {
     withTempView("v") {
       val df = Seq(
         (1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)),
@@ -157,7 +154,24 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
             df.col("c").expr,
             df.col("d").expr,
             df.col("e").expr),
-          script = s"python $scriptFilePath",
+          script = "cat",
+          output = Seq(
+            AttributeReference("key", StringType)(),
+            AttributeReference("value", StringType)()),
+          child = child,
+          ioschema = defaultIOSchema.copy(schemaLess = true)
+        ),
+        df.select(
+          'a.cast("string").as("key"),
+          'b.cast("string").as("value")).collect())
+
+      checkAnswer(
+        df,
+        (child: SparkPlan) => createScriptTransformationExec(
+          input = Seq(
+            df.col("a").expr,
+            df.col("b").expr),
+          script = "cat",
           output = Seq(
             AttributeReference("key", StringType)(),
             AttributeReference("value", StringType)()),
@@ -167,6 +181,22 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
         df.select(
           'a.cast("string").as("key"),
           'b.cast("string").as("value")).collect())
+
+      checkAnswer(
+        df,
+        (child: SparkPlan) => createScriptTransformationExec(
+          input = Seq(
+            df.col("a").expr),
+          script = "cat",
+          output = Seq(
+            AttributeReference("key", StringType)(),
+            AttributeReference("value", StringType)()),
+          child = child,
+          ioschema = defaultIOSchema.copy(schemaLess = true)
+        ),
+        df.select(
+          'a.cast("string").as("key"),
+          lit(null)).collect())
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
index d247f37130776..a8b10fc94d880 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
@@ -156,10 +156,7 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
     assert(uncaughtExceptionHandler.exception.isEmpty)
   }
 
-  test("SPARK-25990: TRANSFORM should handle schema less correctly (hive serde)") {
-    assume(TestUtils.testCommandAvailable("python"))
-    val scriptFilePath = copyAndGetResourceFile("test_script.py", ".py").getAbsolutePath
-
+  test("SPARK-32388: TRANSFORM should handle schema less correctly (hive serde)") {
     withTempView("v") {
       val df = Seq(
         (1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)),
@@ -168,21 +165,157 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
       ).toDF("a", "b", "c", "d", "e") // Note column d's data type is Decimal(38, 18)
       df.createTempView("v")
 
-      val query = sql(
-        s"""
-           |SELECT TRANSFORM(a, b, c, d, e)
-           |USING 'python ${scriptFilePath}'
-           |FROM v
-        """.stripMargin)
+      // In hive default serde mode, if we don't define output schema,
+      // when output column size > 2 and don't specify serde,
+      // it will choose take rest columns in second column as output schema
+      // (key: String, value: String)
+      checkAnswer(
+        sql(
+          s"""
+             |SELECT TRANSFORM(a, b, c, d, e)
+             |  USING 'cat'
+             |FROM v
+        """.stripMargin),
+        identity,
+        df.select(
+          'a.cast("string").as("key"),
+          concat_ws("\t",
+            'b.cast("string"),
+            'c.cast("string"),
+            'd.cast("string"),
+            'e.cast("string")).as("value")).collect())
+
+      // In hive default serde mode, if we don't define output schema,
+      // when output column size > 2 and just specify serde,
+      // it will choose take rest columns in second column as output schema
+      // (key: String, value: String)
+      checkAnswer(
+        sql(
+          s"""
+             |SELECT TRANSFORM(a, b, c, d, e)
+             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+             |  WITH SERDEPROPERTIES (
+             |    'field.delim' = '\t'
+             |  )
+             |  USING 'cat'
+             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+             |  WITH SERDEPROPERTIES (
+             |    'field.delim' = '\t'
+             |  )
+             |FROM v
+        """.stripMargin),
+        identity,
+        df.select(
+          'a.cast("string").as("key"),
+          'b.cast("string").as("value")).collect())
+
+
+      // In hive default serde mode, if we don't define output schema,
+      // when output column size > 2 and specify serde with
+      // 'serialization.last.column.takes.rest=true',
+      // it will choose take rest columns in second column as output schema
+      // (key: String, value: String)
+      checkAnswer(
+        sql(
+          s"""
+             |SELECT TRANSFORM(a, b, c, d, e)
+             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+             |  WITH SERDEPROPERTIES (
+             |    'field.delim' = '\t',
+             |    'serialization.last.column.takes.rest' = 'true'
+             |  )
+             |  USING 'cat'
+             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+             |  WITH SERDEPROPERTIES (
+             |    'field.delim' = '\t',
+             |    'serialization.last.column.takes.rest' = 'true'
+             |  )
+             |FROM v
+        """.stripMargin),
+        identity,
+        df.select(
+          'a.cast("string").as("key"),
+          concat_ws("\t",
+            'b.cast("string"),
+            'c.cast("string"),
+            'd.cast("string"),
+            'e.cast("string")).as("value")).collect())
+
+      // In hive default serde mode, if we don't define output schema,
+      // when output column size > 2 and specify serde
+      // with 'serialization.last.column.takes.rest=false',
+      // it will choose first two column as output schema (key: String, value: String)
+      checkAnswer(
+        sql(
+          s"""
+             |SELECT TRANSFORM(a, b, c, d, e)
+             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+             |  WITH SERDEPROPERTIES (
+             |    'field.delim' = '\t',
+             |    'serialization.last.column.takes.rest' = 'false'
+             |  )
+             |  USING 'cat'
+             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+             |  WITH SERDEPROPERTIES (
+             |    'field.delim' = '\t',
+             |    'serialization.last.column.takes.rest' = 'false'
+             |  )
+             |FROM v
+        """.stripMargin),
+        identity,
+        df.select(
+          'a.cast("string").as("key"),
+          'b.cast("string").as("value")).collect())
 
-      // In hive default serde mode, if we don't define output schema, it will choose first
-      // two column as output schema (key: String, value: String)
+      // In hive default serde mode, if we don't define output schema,
+      // when output column size = 2 and specify serde, it will these two column as
+      // output schema (key: String, value: String)
       checkAnswer(
-        query,
+        sql(
+          s"""
+             |SELECT TRANSFORM(a, b)
+             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+             |  WITH SERDEPROPERTIES (
+             |    'field.delim' = '\t',
+             |    'serialization.last.column.takes.rest' = 'true'
+             |  )
+             |  USING 'cat'
+             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+             |  WITH SERDEPROPERTIES (
+             |    'field.delim' = '\t',
+             |    'serialization.last.column.takes.rest' = 'true'
+             |  )
+             |FROM v
+        """.stripMargin),
         identity,
         df.select(
           'a.cast("string").as("key"),
           'b.cast("string").as("value")).collect())
+
+      // In hive default serde mode, if we don't define output schema,
+      // when output column size < 2 and specify serde, it will return null for deficiency
+      // output schema (key: String, value: String)
+      checkAnswer(
+        sql(
+          s"""
+             |SELECT TRANSFORM(a)
+             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+             |  WITH SERDEPROPERTIES (
+             |    'field.delim' = '\t',
+             |    'serialization.last.column.takes.rest' = 'true'
+             |  )
+             |  USING 'cat'
+             |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+             |  WITH SERDEPROPERTIES (
+             |    'field.delim' = '\t',
+             |    'serialization.last.column.takes.rest' = 'true'
+             |  )
+             |FROM v
+        """.stripMargin),
+        identity,
+        df.select(
+          'a.cast("string").as("key"),
+          lit(null)).collect())
     }
   }
 

From 7cdc921bc07c3d627a8fcbc81cd9c320bda0b873 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Tue, 27 Oct 2020 09:52:09 +0900
Subject: [PATCH 0330/1009] [SPARK-32188][PYTHON][DOCS][FOLLOW-UP] Document
 Column APIs in API reference

### What changes were proposed in this pull request?

This PR proposes to document the APIs in `Column` as well in API reference of PySpark documentation.

### Why are the changes needed?

To document common APIs in PySpark.

### Does this PR introduce _any_ user-facing change?

Yes, `Column.*` will be shown in API reference page.

### How was this patch tested?

Manually tested via `cd python` and `make clean html`.

Closes #30150 from HyukjinKwon/SPARK-32188.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/docs/source/reference/pyspark.sql.rst | 39 ++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/python/docs/source/reference/pyspark.sql.rst b/python/docs/source/reference/pyspark.sql.rst
index 0ed2f1b86ada5..f067b5500c1f4 100644
--- a/python/docs/source/reference/pyspark.sql.rst
+++ b/python/docs/source/reference/pyspark.sql.rst
@@ -217,6 +217,45 @@ DataFrame APIs
     DataFrameStatFunctions.freqItems
     DataFrameStatFunctions.sampleBy
 
+Column APIs
+-----------
+
+.. currentmodule:: pyspark.sql
+
+.. autosummary::
+    :toctree: api/
+
+    Column.alias
+    Column.asc
+    Column.asc_nulls_first
+    Column.asc_nulls_last
+    Column.astype
+    Column.between
+    Column.bitwiseAND
+    Column.bitwiseOR
+    Column.bitwiseXOR
+    Column.cast
+    Column.contains
+    Column.desc
+    Column.desc_nulls_first
+    Column.desc_nulls_last
+    Column.dropFields
+    Column.endswith
+    Column.eqNullSafe
+    Column.getField
+    Column.getItem
+    Column.isNotNull
+    Column.isNull
+    Column.isin
+    Column.like
+    Column.name
+    Column.otherwise
+    Column.over
+    Column.rlike
+    Column.startswith
+    Column.substr
+    Column.when
+    Column.withField
 
 Data Types
 ----------

From 4e6a310f8062102ea6a022fb21171f896c8296ae Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Tue, 27 Oct 2020 11:05:53 +0900
Subject: [PATCH 0331/1009] [SPARK-32084][PYTHON][SQL] Expand dictionary
 functions

### What changes were proposed in this pull request?

- [x] Expand dictionary definitions into standalone functions.
- [x] Fix annotations for ordering functions.

### Why are the changes needed?

To simplify further maintenance of docstrings.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #30143 from zero323/SPARK-32084.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/sql/functions.py            | 870 +++++++++++++++------
 python/pyspark/sql/functions.pyi           |  12 +-
 python/pyspark/sql/tests/test_functions.py |  10 +-
 3 files changed, 626 insertions(+), 266 deletions(-)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 97146fdb804ab..22941ab6f1157 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -24,8 +24,7 @@
 
 from pyspark import since, SparkContext
 from pyspark.rdd import PythonEvalType
-from pyspark.sql.column import Column, _to_java_column, _to_seq, _create_column_from_literal, \
-    _create_column_from_name
+from pyspark.sql.column import Column, _to_java_column, _to_seq, _create_column_from_literal
 from pyspark.sql.dataframe import DataFrame
 from pyspark.sql.types import StringType, DataType
 # Keep UserDefinedFunction import for backwards compatible import; moved in SPARK-22409
@@ -42,154 +41,457 @@
 # since it requires to make every single overridden definition.
 
 
-def _create_function(name, doc=""):
-    """Create a PySpark function by its name"""
-    def _(col):
-        sc = SparkContext._active_spark_context
-        jc = getattr(sc._jvm.functions, name)(col._jc if isinstance(col, Column) else col)
-        return Column(jc)
-    _.__name__ = name
-    _.__doc__ = doc
-    return _
+def _get_get_jvm_function(name, sc):
+    """
+    Retrieves JVM function identified by name from
+    Java gateway associated with sc.
+    """
+    return getattr(sc._jvm.functions, name)
 
 
-def _create_function_over_column(name, doc=""):
-    """Similar with `_create_function` but creates a PySpark function that takes a column
-    (as string as well). This is mainly for PySpark functions to take strings as
-    column names.
+def _invoke_function(name, *args):
+    """
+    Invokes JVM function identified by name with args
+    and wraps the result with :class:`Column`.
     """
-    def _(col):
-        sc = SparkContext._active_spark_context
-        jc = getattr(sc._jvm.functions, name)(_to_java_column(col))
-        return Column(jc)
-    _.__name__ = name
-    _.__doc__ = doc
-    return _
+    jf = _get_get_jvm_function(name, SparkContext._active_spark_context)
+    return Column(jf(*args))
 
 
-def _wrap_deprecated_function(func, message):
-    """ Wrap the deprecated function to print out deprecation warnings"""
-    def _(col):
-        warnings.warn(message, DeprecationWarning)
-        return func(col)
-    return functools.wraps(func)(_)
+def _invoke_function_over_column(name, col):
+    """
+    Invokes unary JVM function identified by name
+    and wraps the result with :class:`Column`.
+    """
+    return _invoke_function(name, _to_java_column(col))
 
 
-def _create_binary_mathfunction(name, doc=""):
-    """ Create a binary mathfunction by name"""
-    def _(col1, col2):
-        sc = SparkContext._active_spark_context
+def _invoke_binary_math_function(name, col1, col2):
+    """
+    Invokes binary JVM math function identified by name
+    and wraps the result with :class:`Column`.
+    """
+    return _invoke_function(
+        name,
         # For legacy reasons, the arguments here can be implicitly converted into floats,
         # if they are not columns or strings.
-        if isinstance(col1, Column):
-            arg1 = col1._jc
-        elif isinstance(col1, str):
-            arg1 = _create_column_from_name(col1)
-        else:
-            arg1 = float(col1)
-
-        if isinstance(col2, Column):
-            arg2 = col2._jc
-        elif isinstance(col2, str):
-            arg2 = _create_column_from_name(col2)
-        else:
-            arg2 = float(col2)
-
-        jc = getattr(sc._jvm.functions, name)(arg1, arg2)
-        return Column(jc)
-    _.__name__ = name
-    _.__doc__ = doc
-    return _
-
-
-def _create_window_function(name, doc=''):
-    """ Create a window function by name """
-    def _():
-        sc = SparkContext._active_spark_context
-        jc = getattr(sc._jvm.functions, name)()
-        return Column(jc)
-    _.__name__ = name
-    _.__doc__ = 'Window function: ' + doc
-    return _
+        _to_java_column(col1) if isinstance(col1, (str, Column)) else float(col1),
+        _to_java_column(col2) if isinstance(col2, (str, Column)) else float(col2)
+    )
 
 
 def _options_to_str(options):
     return {key: to_str(value) for (key, value) in options.items()}
 
-_lit_doc = """
+
+@since(1.3)
+def lit(col):
+    """
     Creates a :class:`Column` of literal value.
 
     >>> df.select(lit(5).alias('height')).withColumn('spark_user', lit(True)).take(1)
     [Row(height=5, spark_user=True)]
     """
-_functions = {
-    'lit': _lit_doc,
-    'col': 'Returns a :class:`Column` based on the given column name.',
-    'column': 'Returns a :class:`Column` based on the given column name.',
-    'asc': 'Returns a sort expression based on the ascending order of the given column name.',
-    'desc': 'Returns a sort expression based on the descending order of the given column name.',
-}
-
-_functions_over_column = {
-    'sqrt': 'Computes the square root of the specified float value.',
-    'abs': 'Computes the absolute value.',
-
-    'max': 'Aggregate function: returns the maximum value of the expression in a group.',
-    'min': 'Aggregate function: returns the minimum value of the expression in a group.',
-    'count': 'Aggregate function: returns the number of items in a group.',
-    'sum': 'Aggregate function: returns the sum of all values in the expression.',
-    'avg': 'Aggregate function: returns the average of the values in a group.',
-    'mean': 'Aggregate function: returns the average of the values in a group.',
-    'sumDistinct': 'Aggregate function: returns the sum of distinct values in the expression.',
-}
-
-_functions_1_4_over_column = {
-    # unary math functions
-    'acos': ':return: inverse cosine of `col`, as if computed by `java.lang.Math.acos()`',
-    'asin': ':return: inverse sine of `col`, as if computed by `java.lang.Math.asin()`',
-    'atan': ':return: inverse tangent of `col`, as if computed by `java.lang.Math.atan()`',
-    'cbrt': 'Computes the cube-root of the given value.',
-    'ceil': 'Computes the ceiling of the given value.',
-    'cos': """:param col: angle in radians
-           :return: cosine of the angle, as if computed by `java.lang.Math.cos()`.""",
-    'cosh': """:param col: hyperbolic angle
-           :return: hyperbolic cosine of the angle, as if computed by `java.lang.Math.cosh()`""",
-    'exp': 'Computes the exponential of the given value.',
-    'expm1': 'Computes the exponential of the given value minus one.',
-    'floor': 'Computes the floor of the given value.',
-    'log': 'Computes the natural logarithm of the given value.',
-    'log10': 'Computes the logarithm of the given value in Base 10.',
-    'log1p': 'Computes the natural logarithm of the given value plus one.',
-    'rint': 'Returns the double value that is closest in value to the argument and' +
-            ' is equal to a mathematical integer.',
-    'signum': 'Computes the signum of the given value.',
-    'sin': """:param col: angle in radians
-           :return: sine of the angle, as if computed by `java.lang.Math.sin()`""",
-    'sinh': """:param col: hyperbolic angle
-           :return: hyperbolic sine of the given value,
-                    as if computed by `java.lang.Math.sinh()`""",
-    'tan': """:param col: angle in radians
-           :return: tangent of the given value, as if computed by `java.lang.Math.tan()`""",
-    'tanh': """:param col: hyperbolic angle
-            :return: hyperbolic tangent of the given value,
-                     as if computed by `java.lang.Math.tanh()`""",
-    'toDegrees': '.. note:: Deprecated in 2.1, use :func:`degrees` instead.',
-    'toRadians': '.. note:: Deprecated in 2.1, use :func:`radians` instead.',
-    'bitwiseNOT': 'Computes bitwise not.',
-}
-
-_functions_2_4 = {
-    'asc_nulls_first': 'Returns a sort expression based on the ascending order of the given' +
-                       ' column name, and null values return before non-null values.',
-    'asc_nulls_last': 'Returns a sort expression based on the ascending order of the given' +
-                      ' column name, and null values appear after non-null values.',
-    'desc_nulls_first': 'Returns a sort expression based on the descending order of the given' +
-                        ' column name, and null values appear before non-null values.',
-    'desc_nulls_last': 'Returns a sort expression based on the descending order of the given' +
-                       ' column name, and null values appear after non-null values',
-}
-
-_collect_list_doc = """
+    return col if isinstance(col, Column) else _invoke_function("lit", col)
+
+
+@since(1.3)
+def col(col):
+    """
+    Returns a :class:`Column` based on the given column name.'
+    """
+    return _invoke_function("col", col)
+
+
+@since(1.3)
+def column(col):
+    """
+    Returns a :class:`Column` based on the given column name.'
+    """
+    return col(col)
+
+
+@since(1.3)
+def asc(col):
+    """
+    Returns a sort expression based on the ascending order of the given column name.
+    """
+    return _invoke_function("asc", col)
+
+
+@since(1.3)
+def desc(col):
+    """
+    Returns a sort expression based on the descending order of the given column name.
+    """
+    return _invoke_function("desc", col)
+
+
+@since(1.3)
+def sqrt(col):
+    """
+    Computes the square root of the specified float value.
+    """
+    return _invoke_function_over_column("sqrt", col)
+
+
+@since(1.3)
+def abs(col):
+    """
+    Computes the absolute value.
+    """
+    return _invoke_function_over_column("abs", col)
+
+
+@since(1.3)
+def max(col):
+    """
+    Aggregate function: returns the maximum value of the expression in a group.
+    """
+    return _invoke_function_over_column("max", col)
+
+
+@since(1.3)
+def min(col):
+    """
+    Aggregate function: returns the minimum value of the expression in a group.
+    """
+    return _invoke_function_over_column("min", col)
+
+
+@since(1.3)
+def count(col):
+    """
+    Aggregate function: returns the number of items in a group.
+    """
+    return _invoke_function_over_column("count", col)
+
+
+@since(1.3)
+def sum(col):
+    """
+    Aggregate function: returns the sum of all values in the expression.
+    """
+    return _invoke_function_over_column("sum", col)
+
+
+@since(1.3)
+def avg(col):
+    """
+    Aggregate function: returns the average of the values in a group.
+    """
+    return _invoke_function_over_column("avg", col)
+
+
+@since(1.3)
+def mean(col):
+    """
+    Aggregate function: returns the average of the values in a group.
+    """
+    return _invoke_function_over_column("mean", col)
+
+
+@since(1.3)
+def sumDistinct(col):
+    """
+    Aggregate function: returns the sum of distinct values in the expression.
+    """
+    return _invoke_function_over_column("sumDistinct", col)
+
+
+@since(1.4)
+def acos(col):
+    """
+    :return: inverse cosine of `col`, as if computed by `java.lang.Math.acos()`
+    """
+    return _invoke_function_over_column("acos", col)
+
+
+@since(1.4)
+def asin(col):
+    """
+    :return: inverse sine of `col`, as if computed by `java.lang.Math.asin()`
+    """
+    return _invoke_function_over_column("asin", col)
+
+
+@since(1.4)
+def atan(col):
+    """
+    :return: inverse tangent of `col`, as if computed by `java.lang.Math.atan()`
+    """
+    return _invoke_function_over_column("atan", col)
+
+
+@since(1.4)
+def cbrt(col):
+    """
+    Computes the cube-root of the given value.
+    """
+    return _invoke_function_over_column("cbrt", col)
+
+
+@since(1.4)
+def ceil(col):
+    """
+    Computes the ceiling of the given value.
+    """
+    return _invoke_function_over_column("ceil", col)
+
+
+@since(1.4)
+def cos(col):
+    """
+    :param col: angle in radians
+    :return: cosine of the angle, as if computed by `java.lang.Math.cos()`.
+    """
+    return _invoke_function_over_column("cos", col)
+
+
+@since(1.4)
+def cosh(col):
+    """
+    :param col: hyperbolic angle
+    :return: hyperbolic cosine of the angle, as if computed by `java.lang.Math.cosh()`
+    """
+    return _invoke_function_over_column("cosh", col)
+
+
+@since(1.4)
+def exp(col):
+    """
+    Computes the exponential of the given value.
+    """
+    return _invoke_function_over_column("exp", col)
+
+
+@since(1.4)
+def expm1(col):
+    """
+    Computes the exponential of the given value minus one.
+    """
+    return _invoke_function_over_column("expm1", col)
+
+
+@since(1.4)
+def floor(col):
+    """
+    Computes the floor of the given value.
+    """
+    return _invoke_function_over_column("floor", col)
+
+
+@since(1.4)
+def log(col):
+    """
+    Computes the natural logarithm of the given value.
+    """
+    return _invoke_function_over_column("log", col)
+
+
+@since(1.4)
+def log10(col):
+    """
+    Computes the logarithm of the given value in Base 10.
+    """
+    return _invoke_function_over_column("log10", col)
+
+
+@since(1.4)
+def log1p(col):
+    """
+    Computes the natural logarithm of the given value plus one.
+    """
+    return _invoke_function_over_column("log1p", col)
+
+
+@since(1.4)
+def rint(col):
+    """
+    Returns the double value that is closest in value to the argument and
+    is equal to a mathematical integer.
+    """
+    return _invoke_function_over_column("rint", col)
+
+
+@since(1.4)
+def signum(col):
+    """
+    Computes the signum of the given value.
+    """
+    return _invoke_function_over_column("signum", col)
+
+
+@since(1.4)
+def sin(col):
+    """
+    :param col: angle in radians
+    :return: sine of the angle, as if computed by `java.lang.Math.sin()`
+    """
+    return _invoke_function_over_column("sin", col)
+
+
+@since(1.4)
+def sinh(col):
+    """
+    :param col: hyperbolic angle
+    :return: hyperbolic sine of the given value,
+             as if computed by `java.lang.Math.sinh()`
+    """
+    return _invoke_function_over_column("sinh", col)
+
+
+@since(1.4)
+def tan(col):
+    """
+    :param col: angle in radians
+    :return: tangent of the given value, as if computed by `java.lang.Math.tan()`
+    """
+    return _invoke_function_over_column("tan", col)
+
+
+@since(1.4)
+def tanh(col):
+    """
+    :param col: hyperbolic angle
+    :return: hyperbolic tangent of the given value
+             as if computed by `java.lang.Math.tanh()`
+    """
+    return _invoke_function_over_column("tanh", col)
+
+
+@since(1.4)
+def toDegrees(col):
+    """
+    .. note:: Deprecated in 2.1, use :func:`degrees` instead.
+    """
+    warnings.warn("Deprecated in 2.1, use degrees instead.", DeprecationWarning)
+    return degrees(col)
+
+
+@since(1.4)
+def toRadians(col):
+    """
+    .. note:: Deprecated in 2.1, use :func:`radians` instead.
+    """
+    warnings.warn("Deprecated in 2.1, use radians instead.", DeprecationWarning)
+    return radians(col)
+
+
+@since(1.4)
+def bitwiseNOT(col):
+    """
+    Computes bitwise not.
+    """
+    return _invoke_function_over_column("bitwiseNOT", col)
+
+
+@since(2.4)
+def asc_nulls_first(col):
+    """
+    Returns a sort expression based on the ascending order of the given
+    column name, and null values return before non-null values.
+    """
+    return _invoke_function("asc_nulls_first", col)
+
+
+@since(2.4)
+def asc_nulls_last(col):
+    """
+    Returns a sort expression based on the ascending order of the given
+    column name, and null values appear after non-null values.
+    """
+    return _invoke_function("asc_nulls_last", col)
+
+
+@since(2.4)
+def desc_nulls_first(col):
+    """
+    Returns a sort expression based on the descending order of the given
+    column name, and null values appear before non-null values.
+    """
+    return _invoke_function("desc_nulls_first", col)
+
+
+@since(2.4)
+def desc_nulls_last(col):
+    """
+    Returns a sort expression based on the descending order of the given
+    column name, and null values appear after non-null values.
+    """
+    return _invoke_function("desc_nulls_last", col)
+
+
+@since(1.6)
+def stddev(col):
+    """
+    Aggregate function: alias for stddev_samp.
+    """
+    return _invoke_function_over_column("stddev", col)
+
+
+@since(1.6)
+def stddev_samp(col):
+    """
+    Aggregate function: returns the unbiased sample standard deviation of
+    the expression in a group.
+    """
+    return _invoke_function_over_column("stddev_samp", col)
+
+
+@since(1.6)
+def stddev_pop(col):
+    """
+    Aggregate function: returns population standard deviation of
+    the expression in a group.
+    """
+    return _invoke_function_over_column("stddev_pop", col)
+
+
+@since(1.6)
+def variance(col):
+    """
+    Aggregate function: alias for var_samp
+    """
+    return _invoke_function_over_column("variance", col)
+
+
+@since(1.6)
+def var_samp(col):
+    """
+    Aggregate function: returns the unbiased sample variance of
+    the values in a group.
+    """
+    return _invoke_function_over_column("var_samp", col)
+
+
+@since(1.6)
+def var_pop(col):
+    """
+    Aggregate function: returns the population variance of the values in a group.
+    """
+    return _invoke_function_over_column("var_pop", col)
+
+
+@since(1.6)
+def skewness(col):
+    """
+    Aggregate function: returns the skewness of the values in a group.
+    """
+    return _invoke_function_over_column("skewness", col)
+
+
+@since(1.6)
+def kurtosis(col):
+    """
+    Aggregate function: returns the kurtosis of the values in a group.
+    """
+    return _invoke_function_over_column("kurtosis", col)
+
+
+@since(1.6)
+def collect_list(col):
+    """
     Aggregate function: returns a list of objects with duplicates.
 
     .. note:: The function is non-deterministic because the order of collected results depends
@@ -199,7 +501,12 @@ def _options_to_str(options):
     >>> df2.agg(collect_list('age')).collect()
     [Row(collect_list(age)=[2, 5, 5])]
     """
-_collect_set_doc = """
+    return _invoke_function_over_column("collect_list", col)
+
+
+@since(1.6)
+def collect_set(col):
+    """
     Aggregate function: returns a set of objects with duplicate elements eliminated.
 
     .. note:: The function is non-deterministic because the order of collected results depends
@@ -209,111 +516,118 @@ def _options_to_str(options):
     >>> df2.agg(collect_set('age')).collect()
     [Row(collect_set(age)=[5, 2])]
     """
-_functions_1_6_over_column = {
-    # unary math functions
-    'stddev': 'Aggregate function: alias for stddev_samp.',
-    'stddev_samp': 'Aggregate function: returns the unbiased sample standard deviation of' +
-                   ' the expression in a group.',
-    'stddev_pop': 'Aggregate function: returns population standard deviation of' +
-                  ' the expression in a group.',
-    'variance': 'Aggregate function: alias for var_samp.',
-    'var_samp': 'Aggregate function: returns the unbiased sample variance of' +
-                ' the values in a group.',
-    'var_pop':  'Aggregate function: returns the population variance of the values in a group.',
-    'skewness': 'Aggregate function: returns the skewness of the values in a group.',
-    'kurtosis': 'Aggregate function: returns the kurtosis of the values in a group.',
-    'collect_list': _collect_list_doc,
-    'collect_set': _collect_set_doc
-}
-
-_functions_2_1_over_column = {
-    # unary math functions
-    'degrees': """
-               Converts an angle measured in radians to an approximately equivalent angle
-               measured in degrees.
-
-               :param col: angle in radians
-               :return: angle in degrees, as if computed by `java.lang.Math.toDegrees()`
-               """,
-    'radians': """
-               Converts an angle measured in degrees to an approximately equivalent angle
-               measured in radians.
-
-               :param col: angle in degrees
-               :return: angle in radians, as if computed by `java.lang.Math.toRadians()`
-               """,
-}
-
-# math functions that take two arguments as input
-_binary_mathfunctions = {
-    'atan2': """
-             :param col1: coordinate on y-axis
-             :param col2: coordinate on x-axis
-             :return: the `theta` component of the point
-                (`r`, `theta`)
-                in polar coordinates that corresponds to the point
-                (`x`, `y`) in Cartesian coordinates,
-                as if computed by `java.lang.Math.atan2()`
-             """,
-    'hypot': 'Computes ``sqrt(a^2 + b^2)`` without intermediate overflow or underflow.',
-    'pow': 'Returns the value of the first argument raised to the power of the second argument.',
-}
-
-_window_functions = {
-    'row_number':
-        """returns a sequential number starting at 1 within a window partition.""",
-    'dense_rank':
-        """returns the rank of rows within a window partition, without any gaps.
-
-        The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking
-        sequence when there are ties. That is, if you were ranking a competition using dense_rank
-        and had three people tie for second place, you would say that all three were in second
-        place and that the next person came in third. Rank would give me sequential numbers, making
-        the person that came in third place (after the ties) would register as coming in fifth.
-
-        This is equivalent to the DENSE_RANK function in SQL.""",
-    'rank':
-        """returns the rank of rows within a window partition.
-
-        The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking
-        sequence when there are ties. That is, if you were ranking a competition using dense_rank
-        and had three people tie for second place, you would say that all three were in second
-        place and that the next person came in third. Rank would give me sequential numbers, making
-        the person that came in third place (after the ties) would register as coming in fifth.
-
-        This is equivalent to the RANK function in SQL.""",
-    'cume_dist':
-        """returns the cumulative distribution of values within a window partition,
-        i.e. the fraction of rows that are below the current row.""",
-    'percent_rank':
-        """returns the relative rank (i.e. percentile) of rows within a window partition.""",
-}
-
-# Wraps deprecated functions (keys) with the messages (values).
-_functions_deprecated = {
-    'toDegrees': 'Deprecated in 2.1, use degrees instead.',
-    'toRadians': 'Deprecated in 2.1, use radians instead.',
-}
-
-for _name, _doc in _functions.items():
-    globals()[_name] = since(1.3)(_create_function(_name, _doc))
-for _name, _doc in _functions_over_column.items():
-    globals()[_name] = since(1.3)(_create_function_over_column(_name, _doc))
-for _name, _doc in _functions_1_4_over_column.items():
-    globals()[_name] = since(1.4)(_create_function_over_column(_name, _doc))
-for _name, _doc in _binary_mathfunctions.items():
-    globals()[_name] = since(1.4)(_create_binary_mathfunction(_name, _doc))
-for _name, _doc in _window_functions.items():
-    globals()[_name] = since(1.6)(_create_window_function(_name, _doc))
-for _name, _doc in _functions_1_6_over_column.items():
-    globals()[_name] = since(1.6)(_create_function_over_column(_name, _doc))
-for _name, _doc in _functions_2_1_over_column.items():
-    globals()[_name] = since(2.1)(_create_function_over_column(_name, _doc))
-for _name, _message in _functions_deprecated.items():
-    globals()[_name] = _wrap_deprecated_function(globals()[_name], _message)
-for _name, _doc in _functions_2_4.items():
-    globals()[_name] = since(2.4)(_create_function(_name, _doc))
-del _name, _doc
+    return _invoke_function_over_column("collect_set", col)
+
+
+@since(2.1)
+def degrees(col):
+    """
+    Converts an angle measured in radians to an approximately equivalent angle
+    measured in degrees.
+
+    :param col: angle in radians
+    :return: angle in degrees, as if computed by `java.lang.Math.toDegrees()`
+    """
+    return _invoke_function_over_column("degrees", col)
+
+
+@since(2.1)
+def radians(col):
+    """
+    Converts an angle measured in degrees to an approximately equivalent angle
+    measured in radians.
+
+    :param col: angle in degrees
+    :return: angle in radians, as if computed by `java.lang.Math.toRadians()`
+    """
+    return _invoke_function_over_column("radians", col)
+
+
+@since(1.4)
+def atan2(col1, col2):
+    """
+    :param col1: coordinate on y-axis
+    :param col2: coordinate on x-axis
+    :return: the `theta` component of the point
+          (`r`, `theta`)
+          in polar coordinates that corresponds to the point
+          (`x`, `y`) in Cartesian coordinates,
+          as if computed by `java.lang.Math.atan2()`
+    """
+    return _invoke_binary_math_function("atan2", col1, col2)
+
+
+@since(1.4)
+def hypot(col1, col2):
+    """
+    Computes ``sqrt(a^2 + b^2)`` without intermediate overflow or underflow.
+    """
+    return _invoke_binary_math_function("hypot", col1, col2)
+
+
+@since(1.4)
+def pow(col1, col2):
+    """
+    Returns the value of the first argument raised to the power of the second argument.
+    """
+    return _invoke_binary_math_function("pow", col1, col2)
+
+
+@since(1.6)
+def row_number():
+    """
+    Window function: returns a sequential number starting at 1 within a window partition.
+    """
+    return _invoke_function("row_number")
+
+
+@since(1.6)
+def dense_rank():
+    """
+    Window function: returns the rank of rows within a window partition, without any gaps.
+
+    The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking
+    sequence when there are ties. That is, if you were ranking a competition using dense_rank
+    and had three people tie for second place, you would say that all three were in second
+    place and that the next person came in third. Rank would give me sequential numbers, making
+    the person that came in third place (after the ties) would register as coming in fifth.
+
+    This is equivalent to the DENSE_RANK function in SQL.
+    """
+    return _invoke_function("dense_rank")
+
+
+@since(1.6)
+def rank():
+    """
+    Window function: returns the rank of rows within a window partition.
+
+    The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking
+    sequence when there are ties. That is, if you were ranking a competition using dense_rank
+    and had three people tie for second place, you would say that all three were in second
+    place and that the next person came in third. Rank would give me sequential numbers, making
+    the person that came in third place (after the ties) would register as coming in fifth.
+
+    This is equivalent to the RANK function in SQL.
+    """
+    return _invoke_function("rank")
+
+
+@since(1.6)
+def cume_dist():
+    """
+    Window function: returns the cumulative distribution of values within a window partition,
+    i.e. the fraction of rows that are below the current row.
+    """
+    return _invoke_function("cume_dist")
+
+
+@since(1.6)
+def percent_rank():
+    """
+    Window function: returns the relative rank (i.e. percentile) of rows within a window partition.
+    """
+    return _invoke_function("percent_rank")
 
 
 @since(1.3)
@@ -1645,21 +1959,68 @@ def raise_error(errMsg):
 
 # ---------------------- String/Binary functions ------------------------------
 
-_string_functions = {
-    'upper': 'Converts a string expression to upper case.',
-    'lower': 'Converts a string expression to lower case.',
-    'ascii': 'Computes the numeric value of the first character of the string column.',
-    'base64': 'Computes the BASE64 encoding of a binary column and returns it as a string column.',
-    'unbase64': 'Decodes a BASE64 encoded string column and returns it as a binary column.',
-    'ltrim': 'Trim the spaces from left end for the specified string value.',
-    'rtrim': 'Trim the spaces from right end for the specified string value.',
-    'trim': 'Trim the spaces from both ends for the specified string column.',
-}
+@since(1.5)
+def upper(col):
+    """
+    Converts a string expression to upper case.
+    """
+    return _invoke_function_over_column("upper", col)
+
+
+@since(1.5)
+def lower(col):
+    """
+    Converts a string expression to lower case.
+    """
+    return _invoke_function_over_column("lower", col)
+
 
+@since(1.5)
+def ascii(col):
+    """
+    Computes the numeric value of the first character of the string column.
+    """
+    return _invoke_function_over_column("ascii", col)
 
-for _name, _doc in _string_functions.items():
-    globals()[_name] = since(1.5)(_create_function_over_column(_name, _doc))
-del _name, _doc
+
+@since(1.5)
+def base64(col):
+    """
+    Computes the BASE64 encoding of a binary column and returns it as a string column.
+    """
+    return _invoke_function_over_column("base64", col)
+
+
+@since(1.5)
+def unbase64(col):
+    """
+    Decodes a BASE64 encoded string column and returns it as a binary column.
+    """
+    return _invoke_function_over_column("unbase64", col)
+
+
+@since(1.5)
+def ltrim(col):
+    """
+    Trim the spaces from left end for the specified string value.
+    """
+    return _invoke_function_over_column("ltrim", col)
+
+
+@since(1.5)
+def rtrim(col):
+    """
+    Trim the spaces from right end for the specified string value.
+    """
+    return _invoke_function_over_column("rtrim", col)
+
+
+@since(1.5)
+def trim(col):
+    """
+    Trim the spaces from both ends for the specified string column.
+    """
+    return _invoke_function_over_column("trim", col)
 
 
 @since(1.5)
@@ -2231,7 +2592,7 @@ def element_at(col, extraction):
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.element_at(
-        _to_java_column(col), lit(extraction)._jc))  # noqa: F821 'lit' is dynamically defined.
+        _to_java_column(col), lit(extraction)._jc))
 
 
 @since(2.4)
@@ -3607,13 +3968,6 @@ def udf(f=None, returnType=StringType()):
                            evalType=PythonEvalType.SQL_BATCHED_UDF)
 
 
-ignored_fns = ['map', 'since']
-__all__ = [k for k, v in globals().items()
-           if not k.startswith('_') and k[0].islower() and callable(v) and k not in ignored_fns]
-__all__ += ["PandasUDFType"]
-__all__.sort()
-
-
 def _test():
     import doctest
     from pyspark.sql import Row, SparkSession
diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi
index 779a29c086d5a..1d048efcc3ca5 100644
--- a/python/pyspark/sql/functions.pyi
+++ b/python/pyspark/sql/functions.pyi
@@ -258,9 +258,9 @@ def map_zip_with(
 ) -> Column: ...
 def abs(col: ColumnOrName) -> Column: ...
 def acos(col: ColumnOrName) -> Column: ...
-def asc(col: ColumnOrName) -> Column: ...
-def asc_nulls_first(col: ColumnOrName) -> Column: ...
-def asc_nulls_last(col: ColumnOrName) -> Column: ...
+def asc(col: str) -> Column: ...
+def asc_nulls_first(col: str) -> Column: ...
+def asc_nulls_last(col: str) -> Column: ...
 def ascii(col: ColumnOrName) -> Column: ...
 def asin(col: ColumnOrName) -> Column: ...
 def atan(col: ColumnOrName) -> Column: ...
@@ -285,9 +285,9 @@ def count(col: ColumnOrName) -> Column: ...
 def cume_dist() -> Column: ...
 def degrees(col: ColumnOrName) -> Column: ...
 def dense_rank() -> Column: ...
-def desc(col: ColumnOrName) -> Column: ...
-def desc_nulls_first(col: ColumnOrName) -> Column: ...
-def desc_nulls_last(col: ColumnOrName) -> Column: ...
+def desc(col: str) -> Column: ...
+def desc_nulls_first(col: str) -> Column: ...
+def desc_nulls_last(col: str) -> Column: ...
 def exp(col: ColumnOrName) -> Column: ...
 def expm1(col: ColumnOrName) -> Column: ...
 def floor(col: ColumnOrName) -> Column: ...
diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py
index 26d260fe77b0c..cc77b8d5dfe3e 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -161,14 +161,20 @@ def test_rand_functions(self):
 
     def test_string_functions(self):
         from pyspark.sql import functions
-        from pyspark.sql.functions import col, lit, _string_functions
+        from pyspark.sql.functions import col, lit
+        string_functions = [
+            "upper", "lower", "ascii",
+            "base64", "unbase64",
+            "ltrim", "rtrim", "trim"
+        ]
+
         df = self.spark.createDataFrame([['nick']], schema=['name'])
         self.assertRaisesRegexp(
             TypeError,
             "must be the same type",
             lambda: df.select(col('name').substr(0, lit(1))))
 
-        for name in _string_functions.keys():
+        for name in string_functions:
             self.assertEqual(
                 df.select(getattr(functions, name)("name")).first()[0],
                 df.select(getattr(functions, name)(col("name"))).first()[0])

From 9818f079aa00a390c1cbd267022f42e05db6d67b Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Tue, 27 Oct 2020 14:03:57 +0900
Subject: [PATCH 0332/1009] [SPARK-33243][PYTHON][BUILD] Add numpydoc into
 documentation dependency

### What changes were proposed in this pull request?

This PR proposes to initiate the migration to NumPy documentation style (from reST style) in PySpark docstrings.
This PR also adds one migration example of `SparkContext`.

- **Before:**
    ...
    ![Screen Shot 2020-10-26 at 7 02 05 PM](https://user-images.githubusercontent.com/6477701/97161090-a8ea0200-17c0-11eb-8204-0e70d18fc571.png)
    ...
    ![Screen Shot 2020-10-26 at 7 02 09 PM](https://user-images.githubusercontent.com/6477701/97161100-aab3c580-17c0-11eb-92ad-f5ad4441ce16.png)
    ...

- **After:**

    ...
    ![Screen Shot 2020-10-26 at 7 24 08 PM](https://user-images.githubusercontent.com/6477701/97161219-d636b000-17c0-11eb-80ab-d17a570ecb4b.png)
    ...

See also https://numpydoc.readthedocs.io/en/latest/format.html

### Why are the changes needed?

There are many reasons for switching to NumPy documentation style.

1. Arguably reST style doesn't fit well when the docstring grows large because it provides (arguably) less structures and syntax.

2. NumPy documentation style provides a better human readable docstring format. For example, notebook users often just do `help(...)` by `pydoc`.

3. NumPy documentation style is pretty commonly used in data science libraries, for example, pandas, numpy, Dask, Koalas,
matplotlib, ... Using NumPy documentation style can give users a consistent documentation style.

### Does this PR introduce _any_ user-facing change?

The dependency itself doesn't change anything user-facing.
The documentation change in `SparkContext` does, as shown above.

### How was this patch tested?

Manually tested via running `cd python` and `make clean html`.

Closes #30149 from HyukjinKwon/SPARK-33243.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .github/workflows/build_and_test.yml          |  4 +-
 dev/create-release/spark-rm/Dockerfile        |  2 +-
 dev/lint-python                               |  9 ++
 dev/requirements.txt                          |  1 +
 docs/README.md                                |  2 +-
 .../source/_templates/autosummary/class.rst   | 38 ++++++++
 .../{ => autosummary}/class_with_docs.rst     |  0
 python/docs/source/conf.py                    |  5 +-
 python/docs/source/reference/pyspark.ml.rst   | 28 +++---
 .../docs/source/reference/pyspark.mllib.rst   | 26 +++---
 python/docs/source/reference/pyspark.sql.rst  |  2 +-
 python/pyspark/context.py                     | 93 +++++++++++--------
 12 files changed, 137 insertions(+), 73 deletions(-)
 create mode 100644 python/docs/source/_templates/autosummary/class.rst
 rename python/docs/source/_templates/{ => autosummary}/class_with_docs.rst (100%)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 5b06485b9959e..55c578e15724a 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -332,7 +332,7 @@ jobs:
       run: |
         # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
         #   See also https://github.com/sphinx-doc/sphinx/issues/7551.
-        pip3 install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme ipython nbsphinx mypy
+        pip3 install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme ipython nbsphinx mypy numpydoc
     - name: Install R 4.0
       uses: r-lib/actions/setup-r@v1
       with:
@@ -353,7 +353,7 @@ jobs:
         sudo apt-get install -y libcurl4-openssl-dev pandoc
         # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
         #   See also https://github.com/sphinx-doc/sphinx/issues/7551.
-        pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx
+        pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx numpydoc
         gem install jekyll jekyll-redirect-from rouge
         sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"
     - name: Scala linter
diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile
index 4e007a5eeb93a..6b32f10490719 100644
--- a/dev/create-release/spark-rm/Dockerfile
+++ b/dev/create-release/spark-rm/Dockerfile
@@ -36,7 +36,7 @@ ARG APT_INSTALL="apt-get install --no-install-recommends -y"
 # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
 #   See also https://github.com/sphinx-doc/sphinx/issues/7551.
 #   We should use the latest Sphinx version once this is fixed.
-ARG PIP_PKGS="sphinx==3.0.4 mkdocs==1.0.4 numpy==1.18.1 pydata_sphinx_theme==0.3.1 ipython==7.16.1 nbsphinx==0.7.1"
+ARG PIP_PKGS="sphinx==3.0.4 mkdocs==1.0.4 numpy==1.18.1 pydata_sphinx_theme==0.3.1 ipython==7.16.1 nbsphinx==0.7.1 numpydoc==1.1.0"
 ARG GEM_PKGS="jekyll:4.0.0 jekyll-redirect-from:0.16.0 rouge:3.15.0"
 
 # Install extra needed repos and refresh.
diff --git a/dev/lint-python b/dev/lint-python
index 62664818dc106..2c244e0c0b297 100755
--- a/dev/lint-python
+++ b/dev/lint-python
@@ -126,6 +126,7 @@ function mypy_test {
     local MYPY_REPORT=
     local MYPY_STATUS=
 
+    # TODO(SPARK-32797): Install mypy on the Jenkins CI workers
     if ! hash "$MYPY_BUILD" 2> /dev/null; then
         echo "The $MYPY_BUILD command was not found. Skipping for now."
         return
@@ -236,6 +237,14 @@ function sphinx_test {
         return
     fi
 
+    # TODO(SPARK-33242): Install numpydoc in Jenkins machines
+    PYTHON_HAS_NUMPYDOC=$("$PYTHON_EXECUTABLE" -c 'import importlib.util; print(importlib.util.find_spec("numpydoc") is not None)')
+    if [[ "$PYTHON_HAS_NUMPYDOC" == "False" ]]; then
+        echo "$PYTHON_EXECUTABLE does not have numpydoc installed. Skipping Sphinx build for now."
+        echo
+        return
+    fi
+
     echo "starting $SPHINX_BUILD tests..."
     pushd python/docs &> /dev/null
     make clean &> /dev/null
diff --git a/dev/requirements.txt b/dev/requirements.txt
index b11f24fdbd4b2..c1546c8b8d4d3 100644
--- a/dev/requirements.txt
+++ b/dev/requirements.txt
@@ -6,3 +6,4 @@ sphinx
 pydata_sphinx_theme
 ipython
 nbsphinx
+numpydoc
diff --git a/docs/README.md b/docs/README.md
index 09982c1301163..af51dca6180a9 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -63,7 +63,7 @@ See also https://github.com/sphinx-doc/sphinx/issues/7551.
 -->
 
 ```sh
-$ sudo pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx
+$ sudo pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx numpydoc
 ```
 
 ## Generating the Documentation HTML
diff --git a/python/docs/source/_templates/autosummary/class.rst b/python/docs/source/_templates/autosummary/class.rst
new file mode 100644
index 0000000000000..d794f797ee2ad
--- /dev/null
+++ b/python/docs/source/_templates/autosummary/class.rst
@@ -0,0 +1,38 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+.. Workaround to avoud documenting __init__.
+
+{% extends "!autosummary/class.rst" %}
+
+{% if '__init__' in methods %}
+{% set caught_result = methods.remove('__init__') %}
+{% endif %}
+    
+{% block methods %}
+{% if methods %}
+
+   .. rubric:: Methods
+
+   .. autosummary::
+      {% for item in methods %}
+      ~{{ name }}.{{ item }}
+      {%- endfor %}
+
+{% endif %}
+{% endblock %}
+
diff --git a/python/docs/source/_templates/class_with_docs.rst b/python/docs/source/_templates/autosummary/class_with_docs.rst
similarity index 100%
rename from python/docs/source/_templates/class_with_docs.rst
rename to python/docs/source/_templates/autosummary/class_with_docs.rst
diff --git a/python/docs/source/conf.py b/python/docs/source/conf.py
index 9d87bbe27df2a..a1bcd3d502a97 100644
--- a/python/docs/source/conf.py
+++ b/python/docs/source/conf.py
@@ -47,9 +47,12 @@
     'sphinx.ext.autosummary',
     'nbsphinx',  # Converts Jupyter Notebook to reStructuredText files for Sphinx.
     # For ipython directive in reStructuredText files. It is generated by the notebook.
-    'IPython.sphinxext.ipython_console_highlighting'
+    'IPython.sphinxext.ipython_console_highlighting',
+    'numpydoc',  # handle NumPy documentation formatted docstrings.
 ]
 
+numpydoc_show_class_members = False
+
 # Links used globally in the RST files.
 # These are defined here to allow link substitutions dynamically.
 rst_epilog = """
diff --git a/python/docs/source/reference/pyspark.ml.rst b/python/docs/source/reference/pyspark.ml.rst
index 00d0e44e92715..5fafe5899f20b 100644
--- a/python/docs/source/reference/pyspark.ml.rst
+++ b/python/docs/source/reference/pyspark.ml.rst
@@ -25,7 +25,7 @@ ML Pipeline APIs
 .. currentmodule:: pyspark.ml
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
     Transformer
@@ -44,7 +44,7 @@ Parameters
 .. currentmodule:: pyspark.ml.param
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
     Param
@@ -58,7 +58,7 @@ Feature
 .. currentmodule:: pyspark.ml.feature
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
     ANOVASelector
@@ -126,7 +126,7 @@ Classification
 .. currentmodule:: pyspark.ml.classification
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
     LinearSVC
@@ -169,7 +169,7 @@ Clustering
 .. currentmodule:: pyspark.ml.clustering
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
     BisectingKMeans
@@ -205,7 +205,7 @@ Vector and Matrix
 .. currentmodule:: pyspark.ml.linalg
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
     Vector
@@ -224,7 +224,7 @@ Recommendation
 .. currentmodule:: pyspark.ml.recommendation
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
     ALS
@@ -237,7 +237,7 @@ Regression
 .. currentmodule:: pyspark.ml.regression
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
     AFTSurvivalRegression
@@ -268,7 +268,7 @@ Statistics
 .. currentmodule:: pyspark.ml.stat
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
     ANOVATest
@@ -287,7 +287,7 @@ Tuning
 .. currentmodule:: pyspark.ml.tuning
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
     ParamGridBuilder
@@ -303,7 +303,7 @@ Evaluation
 .. currentmodule:: pyspark.ml.evaluation
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
     Evaluator
@@ -321,7 +321,7 @@ Frequency Pattern Mining
 .. currentmodule:: pyspark.ml.fpm
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
     FPGrowth
@@ -335,7 +335,7 @@ Image
 .. currentmodule:: pyspark.ml.image
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
     ImageSchema
@@ -348,7 +348,7 @@ Utilities
 .. currentmodule:: pyspark.ml.util
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
     BaseReadWrite
diff --git a/python/docs/source/reference/pyspark.mllib.rst b/python/docs/source/reference/pyspark.mllib.rst
index 1251b1df752c7..acc834c065ac3 100644
--- a/python/docs/source/reference/pyspark.mllib.rst
+++ b/python/docs/source/reference/pyspark.mllib.rst
@@ -25,7 +25,7 @@ Classification
 .. currentmodule:: pyspark.mllib.classification
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
     LogisticRegressionModel
@@ -44,7 +44,7 @@ Clustering
 .. currentmodule:: pyspark.mllib.clustering
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
 
@@ -68,7 +68,7 @@ Evaluation
 .. currentmodule:: pyspark.mllib.evaluation
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
     BinaryClassificationMetrics
@@ -83,7 +83,7 @@ Feature
 .. currentmodule:: pyspark.mllib.feature
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
     Normalizer
@@ -105,7 +105,7 @@ Frequency Pattern Mining
 .. currentmodule:: pyspark.mllib.fpm
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
     FPGrowth
@@ -120,7 +120,7 @@ Vector and Matrix
 .. currentmodule:: pyspark.mllib.linalg
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
     Vector
@@ -140,7 +140,7 @@ Distributed Representation
 .. currentmodule:: pyspark.mllib.linalg.distributed
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
     BlockMatrix
@@ -159,7 +159,7 @@ Random
 .. currentmodule:: pyspark.mllib.random
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
     RandomRDDs
@@ -171,7 +171,7 @@ Recommendation
 .. currentmodule:: pyspark.mllib.recommendation
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
     MatrixFactorizationModel
@@ -185,7 +185,7 @@ Regression
 .. currentmodule:: pyspark.mllib.regression
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
     LabeledPoint
@@ -208,7 +208,7 @@ Statistics
 .. currentmodule:: pyspark.mllib.stat
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
     Statistics
@@ -224,7 +224,7 @@ Tree
 .. currentmodule:: pyspark.mllib.tree
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
     DecisionTreeModel
@@ -241,7 +241,7 @@ Utilities
 .. currentmodule:: pyspark.mllib.util
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
     JavaLoader
diff --git a/python/docs/source/reference/pyspark.sql.rst b/python/docs/source/reference/pyspark.sql.rst
index f067b5500c1f4..3f903fe8c7acd 100644
--- a/python/docs/source/reference/pyspark.sql.rst
+++ b/python/docs/source/reference/pyspark.sql.rst
@@ -263,7 +263,7 @@ Data Types
 .. currentmodule:: pyspark.sql.types
 
 .. autosummary::
-    :template: class_with_docs.rst
+    :template: autosummary/class_with_docs.rst
     :toctree: api/
 
     ArrayType
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 4213a742a1dc4..3f1643e2d21ac 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -63,12 +63,59 @@ class SparkContext(object):
     connection to a Spark cluster, and can be used to create :class:`RDD` and
     broadcast variables on that cluster.
 
-    .. note:: Only one :class:`SparkContext` should be active per JVM. You must `stop()`
-        the active :class:`SparkContext` before creating a new one.
-
-    .. note:: :class:`SparkContext` instance is not supported to share across multiple
-        processes out of the box, and PySpark does not guarantee multi-processing execution.
-        Use threads instead for concurrent processing purpose.
+    When you create a new SparkContext, at least the master and app name should
+    be set, either through the named parameters here or through `conf`.
+
+    Parameters
+    ----------
+    master : str, optional
+        Cluster URL to connect to (e.g. mesos://host:port, spark://host:port, local[4]).
+    appName : str, optional
+        A name for your job, to display on the cluster web UI.
+    sparkHome : str, optional
+        Location where Spark is installed on cluster nodes.
+    pyFiles : str, optional
+        Collection of .zip or .py files to send to the cluster
+        and add to PYTHONPATH.  These can be paths on the local file
+        system or HDFS, HTTP, HTTPS, or FTP URLs.
+    environment : dict, optional
+        A dictionary of environment variables to set on
+        worker nodes.
+    batchSize : int, optional
+        The number of Python objects represented as a single
+        Java object. Set 1 to disable batching, 0 to automatically choose
+        the batch size based on object sizes, or -1 to use an unlimited
+        batch size
+    serializer : :class:`pyspark.serializers.Serializer`, optional
+        The serializer for RDDs.
+    conf : dict, optional
+        A :class:`SparkConf` object setting Spark properties.
+    gateway : optional
+        Use an existing gateway and JVM, otherwise a new JVM
+        will be instantiated. This is only used internally.
+    jsc : optional
+        The JavaSparkContext instance. This is only used internally.
+    profiler_cls : :class:`pyspark.profiler.Profiler`, optional
+        A class of custom Profiler used to do profiling
+        (default is :class:`pyspark.profiler.BasicProfiler`).
+
+    Notes
+    -----
+    Only one :class:`SparkContext` should be active per JVM. You must `stop()`
+    the active :class:`SparkContext` before creating a new one.
+
+    :class:`SparkContext` instance is not supported to share across multiple
+    processes out of the box, and PySpark does not guarantee multi-processing execution.
+    Use threads instead for concurrent processing purpose.
+
+    Examples
+    --------
+    >>> from pyspark.context import SparkContext
+    >>> sc = SparkContext('local', 'test')
+    >>> sc2 = SparkContext('local', 'test2') # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    ValueError:...
     """
 
     _gateway = None
@@ -83,40 +130,6 @@ class SparkContext(object):
     def __init__(self, master=None, appName=None, sparkHome=None, pyFiles=None,
                  environment=None, batchSize=0, serializer=PickleSerializer(), conf=None,
                  gateway=None, jsc=None, profiler_cls=BasicProfiler):
-        """
-        Create a new SparkContext. At least the master and app name should be set,
-        either through the named parameters here or through `conf`.
-
-        :param master: Cluster URL to connect to
-               (e.g. mesos://host:port, spark://host:port, local[4]).
-        :param appName: A name for your job, to display on the cluster web UI.
-        :param sparkHome: Location where Spark is installed on cluster nodes.
-        :param pyFiles: Collection of .zip or .py files to send to the cluster
-               and add to PYTHONPATH.  These can be paths on the local file
-               system or HDFS, HTTP, HTTPS, or FTP URLs.
-        :param environment: A dictionary of environment variables to set on
-               worker nodes.
-        :param batchSize: The number of Python objects represented as a single
-               Java object. Set 1 to disable batching, 0 to automatically choose
-               the batch size based on object sizes, or -1 to use an unlimited
-               batch size
-        :param serializer: The serializer for RDDs.
-        :param conf: A :class:`SparkConf` object setting Spark properties.
-        :param gateway: Use an existing gateway and JVM, otherwise a new JVM
-               will be instantiated.
-        :param jsc: The JavaSparkContext instance (optional).
-        :param profiler_cls: A class of custom Profiler used to do profiling
-               (default is pyspark.profiler.BasicProfiler).
-
-
-        >>> from pyspark.context import SparkContext
-        >>> sc = SparkContext('local', 'test')
-
-        >>> sc2 = SparkContext('local', 'test2') # doctest: +IGNORE_EXCEPTION_DETAIL
-        Traceback (most recent call last):
-            ...
-        ValueError:...
-        """
         if (conf is None or
                 conf.get("spark.executor.allowSparkContext", "false").lower() != "true"):
             # In order to prevent SparkContext from being created in executors.

From 4b0e23e646b579b852056ffc87164b16adef5a09 Mon Sep 17 00:00:00 2001
From: Baohe Zhang <baohe.zhang@verizonmedia.com>
Date: Tue, 27 Oct 2020 14:28:20 +0900
Subject: [PATCH 0333/1009] [SPARK-33215][WEBUI] Speed up event log download by
 skipping UI rebuild

### What changes were proposed in this pull request?
This patch separates the view permission checks from the getAppUi in FsHistoryServerProvider, thus enabling SHS to do view permissions check of a given attempt for a given user without rebuilding the UI. This is achieved by adding a method "checkUIViewPermissions(appId: String, attemptId: Option[String], user: String): Boolean" to many layers of history server components. Currently, this feature is useful for event log download.

### Why are the changes needed?
Right now, when we want to download the event logs from the spark history server, SHS will need to parse entire the event log to rebuild UI, and this is just for view permission checks. UI rebuilding is a time-consuming and memory-intensive task, especially for large logs. However, this process is unnecessary for event log download. With this patch, UI rebuild can be skipped when downloading event logs from the history server. Thus the time of downloading a GB scale event log can be reduced from several minutes to several seconds, and the memory consumption of UI rebuilding can be avoided.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Added test cases to confirm the view permission checks work properly and download event logs won't trigger UI loading. Also did some manual tests to verify the download speed can be drastically improved and the authentication works properly.

Closes #30126 from baohe-zhang/bypass_ui_rebuild_for_log_download.

Authored-by: Baohe Zhang <baohe.zhang@verizonmedia.com>
Signed-off-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
---
 .../history/ApplicationHistoryProvider.scala  |  7 +++
 .../deploy/history/FsHistoryProvider.scala    | 34 ++++++++----
 .../spark/deploy/history/HistoryServer.scala  |  5 ++
 .../spark/status/api/v1/ApiRootResource.scala | 15 ++++++
 .../api/v1/OneApplicationResource.scala       |  9 ++--
 .../scala/org/apache/spark/ui/SparkUI.scala   |  5 ++
 .../history/FsHistoryProviderSuite.scala      | 54 ++++++++++++++++++-
 .../deploy/history/HistoryServerSuite.scala   | 18 +++++++
 8 files changed, 132 insertions(+), 15 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
index 472b52957ed7f..f3f7db6bb0aba 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
@@ -150,4 +150,11 @@ private[history] abstract class ApplicationHistoryProvider {
    */
   def onUIDetached(appId: String, attemptId: Option[String], ui: SparkUI): Unit = { }
 
+  /**
+   * Returns true if the given user has permission to view the UI of the given attempt.
+   *
+   * @throws NoSuchElementException if the given attempt doesn't exist
+   */
+  def checkUIViewPermissions(appId: String, attemptId: Option[String], user: String): Boolean
+
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 168bd1e68a304..400c82c1f9e63 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -359,15 +359,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     }
 
     val conf = this.conf.clone()
-    val secManager = new SecurityManager(conf)
-
-    secManager.setAcls(historyUiAclsEnable)
-    // make sure to set admin acls before view acls so they are properly picked up
-    secManager.setAdminAcls(historyUiAdminAcls ++ stringToSeq(attempt.adminAcls.getOrElse("")))
-    secManager.setViewAcls(attempt.info.sparkUser, stringToSeq(attempt.viewAcls.getOrElse("")))
-    secManager.setAdminAclsGroups(historyUiAdminAclsGroups ++
-      stringToSeq(attempt.adminAclsGroups.getOrElse("")))
-    secManager.setViewAclsGroups(stringToSeq(attempt.viewAclsGroups.getOrElse("")))
+    val secManager = createSecurityManager(conf, attempt)
 
     val kvstore = try {
       diskManager match {
@@ -461,6 +453,17 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     }
   }
 
+  override def checkUIViewPermissions(appId: String, attemptId: Option[String],
+      user: String): Boolean = {
+    val app = load(appId)
+    val attempt = app.attempts.find(_.info.attemptId == attemptId).orNull
+    if (attempt == null) {
+      throw new NoSuchElementException()
+    }
+    val secManager = createSecurityManager(this.conf.clone(), attempt)
+    secManager.checkUIViewPermissions(user)
+  }
+
   /**
    * Builds the application list based on the current contents of the log directory.
    * Tries to reuse as much of the data already in memory as possible, by not reading
@@ -1376,6 +1379,19 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         endProcessing(rootPath)
     }
   }
+
+  private def createSecurityManager(conf: SparkConf,
+      attempt: AttemptInfoWrapper): SecurityManager = {
+    val secManager = new SecurityManager(conf)
+    secManager.setAcls(historyUiAclsEnable)
+    // make sure to set admin acls before view acls so they are properly picked up
+    secManager.setAdminAcls(historyUiAdminAcls ++ stringToSeq(attempt.adminAcls.getOrElse("")))
+    secManager.setViewAcls(attempt.info.sparkUser, stringToSeq(attempt.viewAcls.getOrElse("")))
+    secManager.setAdminAclsGroups(historyUiAdminAclsGroups ++
+      stringToSeq(attempt.adminAclsGroups.getOrElse("")))
+    secManager.setViewAclsGroups(stringToSeq(attempt.viewAclsGroups.getOrElse("")))
+    secManager
+  }
 }
 
 private[history] object FsHistoryProvider {
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index ca21a8056d1b5..bb13f34818a62 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -128,6 +128,11 @@ class HistoryServer(
     appCache.withSparkUI(appId, attemptId)(fn)
   }
 
+  override def checkUIViewPermissions(appId: String, attemptId: Option[String],
+      user: String): Boolean = {
+    provider.checkUIViewPermissions(appId, attemptId, user)
+  }
+
   initialize()
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala
index 83f76db7e89da..cc21c1488f67c 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/ApiRootResource.scala
@@ -95,6 +95,8 @@ private[spark] trait UIRoot {
       .build()
   }
   def securityManager: SecurityManager
+
+  def checkUIViewPermissions(appId: String, attemptId: Option[String], user: String): Boolean
 }
 
 private[v1] object UIRootFromServletContext {
@@ -145,6 +147,19 @@ private[v1] trait BaseAppResource extends ApiRequestContext {
         throw new NotFoundException(s"no such app: $appKey")
     }
   }
+
+  protected def checkUIViewPermissions(): Unit = {
+    try {
+      val user = httpRequest.getRemoteUser()
+      if (!uiRoot.checkUIViewPermissions(appId, Option(attemptId), user)) {
+        throw new ForbiddenException(raw"""user "$user" is not authorized""")
+      }
+    } catch {
+      case _: NoSuchElementException =>
+        val appKey = Option(attemptId).map(appId + "/" + _).getOrElse(appId)
+        throw new NotFoundException(s"no such app: $appKey")
+    }
+  }
 }
 
 private[v1] class ForbiddenException(msg: String) extends WebApplicationException(
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala
index 536a1fcd59cd0..fb64ff5e60247 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala
@@ -115,15 +115,14 @@ private[v1] class AbstractApplicationResource extends BaseAppResource {
   @Path("logs")
   @Produces(Array(MediaType.APPLICATION_OCTET_STREAM))
   def getEventLogs(): Response = {
-    // Retrieve the UI for the application just to do access permission checks. For backwards
-    // compatibility, this code also tries with attemptId "1" if the UI without an attempt ID does
-    // not exist.
+    // For backwards compatibility, this code also tries with attemptId "1" if the UI
+    // without an attempt ID does not exist.
     try {
-      withUI { _ => }
+      checkUIViewPermissions()
     } catch {
       case _: NotFoundException if attemptId == null =>
         attemptId = "1"
-        withUI { _ => }
+        checkUIViewPermissions()
         attemptId = null
     }
 
diff --git a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
index 8ae9828c3fee1..b1769a8a9c9ee 100644
--- a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
@@ -110,6 +110,11 @@ private[spark] class SparkUI private (
     }
   }
 
+  override def checkUIViewPermissions(appId: String, attemptId: Option[String],
+      user: String): Boolean = {
+    securityManager.checkUIViewPermissions(user)
+  }
+
   def getApplicationInfoList: Iterator[ApplicationInfo] = {
     Iterator(new ApplicationInfo(
       id = appId,
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index 1578b908b1b55..0b0754be2f56f 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -44,7 +44,7 @@ import org.apache.spark.deploy.history.EventLogTestHelper._
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.DRIVER_LOG_DFS_DIR
 import org.apache.spark.internal.config.History._
-import org.apache.spark.internal.config.UI.{ADMIN_ACLS, ADMIN_ACLS_GROUPS, USER_GROUPS_MAPPING}
+import org.apache.spark.internal.config.UI.{ADMIN_ACLS, ADMIN_ACLS_GROUPS, UI_VIEW_ACLS, UI_VIEW_ACLS_GROUPS, USER_GROUPS_MAPPING}
 import org.apache.spark.io._
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.ExecutorInfo
@@ -1524,6 +1524,58 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
     }
   }
 
+  test("SPARK-33215: check ui view permissions without retrieving ui") {
+    val conf = createTestConf()
+      .set(HISTORY_SERVER_UI_ACLS_ENABLE, true)
+      .set(HISTORY_SERVER_UI_ADMIN_ACLS, Seq("user1", "user2"))
+      .set(HISTORY_SERVER_UI_ADMIN_ACLS_GROUPS, Seq("group1"))
+      .set(USER_GROUPS_MAPPING, classOf[TestGroupsMappingProvider].getName)
+
+    val provider = new FsHistoryProvider(conf)
+    val log = newLogFile("app1", Some("attempt1"), inProgress = false)
+    writeFile(log, None,
+      SparkListenerApplicationStart("app1", Some("app1"), System.currentTimeMillis(),
+        "test", Some("attempt1")),
+      SparkListenerEnvironmentUpdate(Map(
+        "Spark Properties" -> List((UI_VIEW_ACLS.key, "user"), (UI_VIEW_ACLS_GROUPS.key, "group")),
+        "Hadoop Properties" -> Seq.empty,
+        "JVM Information" -> Seq.empty,
+        "System Properties" -> Seq.empty,
+        "Classpath Entries" -> Seq.empty
+      )),
+      SparkListenerApplicationEnd(System.currentTimeMillis()))
+
+    provider.checkForLogs()
+
+    // attempt2 doesn't exist
+    intercept[NoSuchElementException] {
+      provider.checkUIViewPermissions("app1", Some("attempt2"), "user1")
+    }
+    // app2 doesn't exist
+    intercept[NoSuchElementException] {
+      provider.checkUIViewPermissions("app2", Some("attempt1"), "user1")
+    }
+
+    // user1 and user2 are admins
+    assert(provider.checkUIViewPermissions("app1", Some("attempt1"), "user1"))
+    assert(provider.checkUIViewPermissions("app1", Some("attempt1"), "user2"))
+    // user3 is a member of admin group "group1"
+    assert(provider.checkUIViewPermissions("app1", Some("attempt1"), "user3"))
+    // test is the app owner
+    assert(provider.checkUIViewPermissions("app1", Some("attempt1"), "test"))
+    // user is in the app's view acls
+    assert(provider.checkUIViewPermissions("app1", Some("attempt1"), "user"))
+    // user5 is a member of the app's view acls group "group"
+    assert(provider.checkUIViewPermissions("app1", Some("attempt1"), "user5"))
+
+    // abc, user6, user7 don't have permissions
+    assert(!provider.checkUIViewPermissions("app1", Some("attempt1"), "abc"))
+    assert(!provider.checkUIViewPermissions("app1", Some("attempt1"), "user6"))
+    assert(!provider.checkUIViewPermissions("app1", Some("attempt1"), "user7"))
+
+    provider.stop()
+  }
+
   /**
    * Asks the provider to check for logs and calls a function to perform checks on the updated
    * app list. Example:
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index 51e38f9cdcd2d..e4c23d3d1b1c3 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -584,6 +584,24 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     }
   }
 
+  test("SPARK-33215: speed up event log download by skipping UI rebuild") {
+    val appId = "local-1430917381535"
+
+    stop()
+    init()
+
+    val port = server.boundPort
+    val testUrls = Seq(
+      s"http://localhost:$port/api/v1/applications/$appId/logs",
+      s"http://localhost:$port/api/v1/applications/$appId/1/logs",
+      s"http://localhost:$port/api/v1/applications/$appId/2/logs")
+
+    testUrls.foreach { url =>
+      TestUtils.httpResponseCode(new URL(url))
+    }
+    assert(server.cacheMetrics.loadCount.getCount === 0, "downloading event log shouldn't load ui")
+  }
+
   test("access history application defaults to the last attempt id") {
 
     def getRedirectUrl(url: URL): (Int, String) = {

From 537a49fc0966b0b289b67ac9c6ea20093165b0da Mon Sep 17 00:00:00 2001
From: "xuewei.linxuewei" <xuewei.linxuewei@alibaba-inc.com>
Date: Tue, 27 Oct 2020 12:40:57 +0000
Subject: [PATCH 0334/1009] [SPARK-33140][SQL] remove SQLConf and SparkSession
 in all sub-class of Rule[QueryPlan]

### What changes were proposed in this pull request?

Since Issue [SPARK-33139](https://issues.apache.org/jira/browse/SPARK-33139) has been done, and SQLConf.get and SparkSession.active are more reliable. We are trying to refine the existing code usage of passing SQLConf and SparkSession into sub-class of Rule[QueryPlan].

In this PR.

* remove SQLConf from ctor-parameter of all sub-class of Rule[QueryPlan].
* using SQLConf.get to replace the original SQLConf instance.
* remove SparkSession from ctor-parameter of all sub-class of Rule[QueryPlan].
* using SparkSession.active to replace the original SparkSession instance.

### Why are the changes needed?

Code refine.

### Does this PR introduce any user-facing change?
No.

### How was this patch tested?

Existing test

Closes #30097 from leanken/leanken-SPARK-33140.

Authored-by: xuewei.linxuewei <xuewei.linxuewei@alibaba-inc.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 18 ++---
 .../sql/catalyst/analysis/ResolveHints.scala  | 14 ++--
 .../analysis/ResolveInlineTables.scala        |  3 +-
 .../SubstituteUnresolvedOrdinals.scala        |  3 +-
 .../sql/catalyst/analysis/TypeCoercion.scala  | 18 ++---
 .../analysis/higherOrderFunctions.scala       |  2 +-
 .../catalyst/analysis/timeZoneAnalysis.scala  |  2 +-
 .../optimizer/CostBasedJoinReorder.scala      |  2 -
 .../spark/sql/catalyst/rules/Rule.scala       |  3 +
 .../sql/catalyst/analysis/AnalysisSuite.scala |  6 +-
 .../sql/catalyst/analysis/AnalysisTest.scala  | 76 +++++++++----------
 .../analysis/DataSourceV2AnalysisSuite.scala  | 46 +++++++----
 .../ResolveGroupingAnalyticsSuite.scala       |  2 +-
 .../analysis/ResolveInlineTablesSuite.scala   | 26 +++----
 .../ResolveLambdaVariablesSuite.scala         |  2 +-
 .../ResolvedUuidExpressionsSuite.scala        |  2 +-
 .../SubstituteUnresolvedOrdinalsSuite.scala   | 20 +++--
 .../catalyst/analysis/TypeCoercionSuite.scala | 10 +--
 .../expressions/ExpressionEvalHelper.scala    |  2 +-
 .../expressions/ObjectExpressionsSuite.scala  |  2 +-
 .../expressions/SelectedFieldSuite.scala      |  2 +-
 .../optimizer/AggregateOptimizeSuite.scala    | 22 +++---
 .../optimizer/EliminateSortsSuite.scala       | 36 ++++-----
 .../analysis/ResolveSessionCatalog.scala      |  1 -
 .../apache/spark/sql/execution/Columnar.scala |  2 -
 .../spark/sql/execution/QueryExecution.scala  | 21 +++--
 .../execution/RemoveRedundantProjects.scala   |  2 +-
 .../sql/execution/WholeStageCodegenExec.scala |  1 -
 .../sql/execution/adaptive/AQEOptimizer.scala |  2 +-
 .../adaptive/AdaptiveSparkPlanExec.scala      | 18 ++---
 .../adaptive/CoalesceShufflePartitions.scala  |  6 +-
 .../adaptive/DemoteBroadcastHashJoin.scala    |  2 +-
 .../adaptive/InsertAdaptiveSparkPlan.scala    |  2 -
 .../adaptive/OptimizeLocalShuffleReader.scala |  8 +-
 .../adaptive/OptimizeSkewedJoin.scala         |  4 +-
 .../adaptive/ReuseAdaptiveSubquery.scala      |  1 -
 .../analysis/DetectAmbiguousSelfJoin.scala    |  2 +-
 .../bucketing/CoalesceBucketsInJoin.scala     |  2 +-
 .../DisableUnnecessaryBucketedScan.scala      |  2 +-
 .../datasources/DataSourceStrategy.scala      | 10 +--
 .../datasources/FallBackFileSourceV2.scala    |  4 +-
 .../sql/execution/datasources/rules.scala     | 31 ++++----
 .../PlanDynamicPruningFilters.scala           |  7 +-
 .../exchange/EnsureRequirements.scala         |  2 +-
 .../sql/execution/exchange/Exchange.scala     |  2 +-
 .../apache/spark/sql/execution/subquery.scala |  8 +-
 .../internal/BaseSessionStateBuilder.scala    | 16 ++--
 .../V2CommandsCaseSensitivitySuite.scala      |  2 +-
 .../sql/execution/ColumnarRulesSuite.scala    |  4 +-
 .../spark/sql/execution/PlannerSuite.scala    | 24 +++---
 .../RemoveRedundantProjectsSuite.scala        |  2 +-
 .../CoalesceBucketsInJoinSuite.scala          |  2 +-
 .../command/PlanResolutionSuite.scala         |  4 +-
 .../exchange/EnsureRequirementsSuite.scala    | 12 +--
 .../execution/joins/BroadcastJoinSuite.scala  |  6 +-
 .../execution/joins/ExistenceJoinSuite.scala  | 20 ++---
 .../sql/execution/joins/InnerJoinSuite.scala  |  6 +-
 .../sql/execution/joins/OuterJoinSuite.scala  |  4 +-
 .../sql/sources/DataSourceAnalysisSuite.scala | 40 +++++-----
 .../sql/hive/HiveSessionStateBuilder.scala    | 24 +++---
 .../spark/sql/hive/HiveStrategies.scala       | 12 ++-
 .../execution/PruneHiveTablePartitions.scala  | 11 ++-
 .../PruneHiveTablePartitionsSuite.scala       |  2 +-
 63 files changed, 327 insertions(+), 323 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 457c41c39a196..39816f499944b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -210,12 +210,12 @@ class Analyzer(
       CTESubstitution,
       WindowsSubstitution,
       EliminateUnions,
-      new SubstituteUnresolvedOrdinals(conf)),
+      SubstituteUnresolvedOrdinals),
     Batch("Disable Hints", Once,
-      new ResolveHints.DisableHints(conf)),
+      new ResolveHints.DisableHints),
     Batch("Hints", fixedPoint,
-      new ResolveHints.ResolveJoinStrategyHints(conf),
-      new ResolveHints.ResolveCoalesceHints(conf)),
+      ResolveHints.ResolveJoinStrategyHints,
+      ResolveHints.ResolveCoalesceHints),
     Batch("Simple Sanity Check", Once,
       LookupFunctions),
     Batch("Resolution", fixedPoint,
@@ -249,19 +249,19 @@ class Analyzer(
       GlobalAggregates ::
       ResolveAggregateFunctions ::
       TimeWindowing ::
-      ResolveInlineTables(conf) ::
+      ResolveInlineTables ::
       ResolveHigherOrderFunctions(v1SessionCatalog) ::
-      ResolveLambdaVariables(conf) ::
-      ResolveTimeZone(conf) ::
+      ResolveLambdaVariables ::
+      ResolveTimeZone ::
       ResolveRandomSeed ::
       ResolveBinaryArithmetic ::
       ResolveUnion ::
-      TypeCoercion.typeCoercionRules(conf) ++
+      TypeCoercion.typeCoercionRules ++
       extendedResolutionRules : _*),
     Batch("Post-Hoc Resolution", Once, postHocResolutionRules: _*),
     Batch("Normalize Alter Table", Once, ResolveAlterTableChanges),
     Batch("Remove Unresolved Hints", Once,
-      new ResolveHints.RemoveAllHints(conf)),
+      new ResolveHints.RemoveAllHints),
     Batch("Nondeterministic", Once,
       PullOutNondeterministic),
     Batch("UDF", Once,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
index c0a9414d61f8f..f1706c11e92ec 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
@@ -50,7 +50,7 @@ object ResolveHints {
    *
    * This rule must happen before common table expressions.
    */
-  class ResolveJoinStrategyHints(conf: SQLConf) extends Rule[LogicalPlan] {
+  object ResolveJoinStrategyHints extends Rule[LogicalPlan] {
     private val STRATEGY_HINT_NAMES = JoinStrategyHint.strategies.flatMap(_.hintAliases)
 
     private val hintErrorHandler = conf.hintErrorHandler
@@ -171,7 +171,9 @@ object ResolveHints {
   /**
    * COALESCE Hint accepts names "COALESCE", "REPARTITION", and "REPARTITION_BY_RANGE".
    */
-  class ResolveCoalesceHints(conf: SQLConf) extends Rule[LogicalPlan] {
+  object ResolveCoalesceHints extends Rule[LogicalPlan] {
+
+    val COALESCE_HINT_NAMES: Set[String] = Set("COALESCE", "REPARTITION", "REPARTITION_BY_RANGE")
 
     /**
      * This function handles hints for "COALESCE" and "REPARTITION".
@@ -260,15 +262,11 @@ object ResolveHints {
     }
   }
 
-  object ResolveCoalesceHints {
-    val COALESCE_HINT_NAMES: Set[String] = Set("COALESCE", "REPARTITION", "REPARTITION_BY_RANGE")
-  }
-
   /**
    * Removes all the hints, used to remove invalid hints provided by the user.
    * This must be executed after all the other hint rules are executed.
    */
-  class RemoveAllHints(conf: SQLConf) extends Rule[LogicalPlan] {
+  class RemoveAllHints extends Rule[LogicalPlan] {
 
     private val hintErrorHandler = conf.hintErrorHandler
 
@@ -284,7 +282,7 @@ object ResolveHints {
    * This is executed at the very beginning of the Analyzer to disable
    * the hint functionality.
    */
-  class DisableHints(conf: SQLConf) extends RemoveAllHints(conf: SQLConf) {
+  class DisableHints extends RemoveAllHints {
     override def apply(plan: LogicalPlan): LogicalPlan = {
       if (conf.getConf(SQLConf.DISABLE_HINTS)) super.apply(plan) else plan
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
index 4edfe507a7580..ab735c74ced9d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
@@ -22,13 +22,12 @@ import scala.util.control.NonFatal
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{StructField, StructType}
 
 /**
  * An analyzer rule that replaces [[UnresolvedInlineTable]] with [[LocalRelation]].
  */
-case class ResolveInlineTables(conf: SQLConf) extends Rule[LogicalPlan] with CastSupport {
+object ResolveInlineTables extends Rule[LogicalPlan] with CastSupport {
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     case table: UnresolvedInlineTable if table.expressionsResolved =>
       validateInputDimension(table)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala
index 860d20f897690..1e7480a69e40f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinals.scala
@@ -21,13 +21,12 @@ import org.apache.spark.sql.catalyst.expressions.{Expression, Literal, SortOrder
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan, Sort}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.CurrentOrigin.withOrigin
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.IntegerType
 
 /**
  * Replaces ordinal in 'order by' or 'group by' with UnresolvedOrdinal expression.
  */
-class SubstituteUnresolvedOrdinals(conf: SQLConf) extends Rule[LogicalPlan] {
+object SubstituteUnresolvedOrdinals extends Rule[LogicalPlan] {
   private def isIntLiteral(e: Expression) = e match {
     case Literal(_, IntegerType) => true
     case _ => false
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index f72d9be205df3..b69cb6091f02c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -47,16 +47,16 @@ import org.apache.spark.sql.types._
  */
 object TypeCoercion {
 
-  def typeCoercionRules(conf: SQLConf): List[Rule[LogicalPlan]] =
-    InConversion(conf) ::
+  def typeCoercionRules: List[Rule[LogicalPlan]] =
+    InConversion ::
       WidenSetOperationTypes ::
-      PromoteStrings(conf) ::
+      PromoteStrings ::
       DecimalPrecision ::
       BooleanEquality ::
       FunctionArgumentConversion ::
-      ConcatCoercion(conf) ::
+      ConcatCoercion ::
       MapZipWithCoercion ::
-      EltCoercion(conf) ::
+      EltCoercion ::
       CaseWhenCoercion ::
       IfCoercion ::
       StackCoercion ::
@@ -414,7 +414,7 @@ object TypeCoercion {
   /**
    * Promotes strings that appear in arithmetic expressions.
    */
-  case class PromoteStrings(conf: SQLConf) extends TypeCoercionRule {
+  object PromoteStrings extends TypeCoercionRule {
     private def castExpr(expr: Expression, targetType: DataType): Expression = {
       (expr.dataType, targetType) match {
         case (NullType, dt) => Literal.create(null, targetType)
@@ -481,7 +481,7 @@ object TypeCoercion {
    *    operator type is found the original expression will be returned and an
    *    Analysis Exception will be raised at the type checking phase.
    */
-  case class InConversion(conf: SQLConf) extends TypeCoercionRule {
+  object InConversion extends TypeCoercionRule {
     override protected def coerceTypes(
         plan: LogicalPlan): LogicalPlan = plan resolveExpressions {
       // Skip nodes who's children have not been resolved yet.
@@ -786,7 +786,7 @@ object TypeCoercion {
    * If `spark.sql.function.concatBinaryAsString` is false and all children types are binary,
    * the expected types are binary. Otherwise, the expected ones are strings.
    */
-  case class ConcatCoercion(conf: SQLConf) extends TypeCoercionRule {
+  object ConcatCoercion extends TypeCoercionRule {
 
     override protected def coerceTypes(plan: LogicalPlan): LogicalPlan = {
       plan resolveOperators { case p =>
@@ -834,7 +834,7 @@ object TypeCoercion {
    * If `spark.sql.function.eltOutputAsString` is false and all children types are binary,
    * the expected types are binary. Otherwise, the expected ones are strings.
    */
-  case class EltCoercion(conf: SQLConf) extends TypeCoercionRule {
+  object EltCoercion extends TypeCoercionRule {
 
     override protected def coerceTypes(plan: LogicalPlan): LogicalPlan = {
       plan resolveOperators { case p =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
index 11f94762d43e0..e10af3d5cc68d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
@@ -70,7 +70,7 @@ case class ResolveHigherOrderFunctions(catalog: SessionCatalog) extends Rule[Log
  *      be a lambda function defined in an outer scope, or a attribute in produced by the plan's
  *      child. If names are duplicate, the name defined in the most inner scope is used.
  */
-case class ResolveLambdaVariables(conf: SQLConf) extends Rule[LogicalPlan] {
+object ResolveLambdaVariables extends Rule[LogicalPlan] {
 
   type LambdaVariableMap = Map[String, NamedExpression]
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/timeZoneAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/timeZoneAnalysis.scala
index a27aa845bf0ae..d8062744a4264 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/timeZoneAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/timeZoneAnalysis.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.types.DataType
  * Replace [[TimeZoneAwareExpression]] without timezone id by its copy with session local
  * time zone.
  */
-case class ResolveTimeZone(conf: SQLConf) extends Rule[LogicalPlan] {
+object ResolveTimeZone extends Rule[LogicalPlan] {
   private val transformTimeZoneExprs: PartialFunction[Expression, Expression] = {
     case e: TimeZoneAwareExpression if e.timeZoneId.isEmpty =>
       e.withTimeZone(conf.sessionLocalTimeZone)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
index 45541051a6b13..11b675e75869e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
@@ -34,8 +34,6 @@ import org.apache.spark.sql.internal.SQLConf
  */
 object CostBasedJoinReorder extends Rule[LogicalPlan] with PredicateHelper {
 
-  private def conf = SQLConf.get
-
   def apply(plan: LogicalPlan): LogicalPlan = {
     if (!conf.cboEnabled || !conf.joinReorderEnabled) {
       plan
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala
index 7eb72724d7663..a774217ecc832 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.rules
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.trees.TreeNode
+import org.apache.spark.sql.internal.SQLConf
 
 abstract class Rule[TreeType <: TreeNode[_]] extends Logging {
 
@@ -29,4 +30,6 @@ abstract class Rule[TreeType <: TreeNode[_]] extends Logging {
   }
 
   def apply(plan: TreeType): TreeType
+
+  def conf: SQLConf = SQLConf.get
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 4f51b77d8ece0..37dcee1e59ee8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -197,7 +197,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
   }
 
   test("divide should be casted into fractional types") {
-    val plan = caseInsensitiveAnalyzer.execute(
+    val plan = getAnalyzer.execute(
       testRelation2.select(
         $"a" / Literal(2) as "div1",
         $"a" / $"b" as "div2",
@@ -258,13 +258,13 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       CreateStruct(Seq(att1, ((att1.as("aa")) + 1).as("a_plus_1"))).as("col"),
       att1
     )
-    val prevPlan = getAnalyzer(true).execute(plan)
+    val prevPlan = getAnalyzer.execute(plan)
     plan = prevPlan.select(CreateArray(Seq(
       CreateStruct(Seq(att1, (att1 + 1).as("a_plus_1"))).as("col1"),
       /** alias should be eliminated by [[CleanupAliases]] */
       "col".attr.as("col2")
     )).as("arr"))
-    plan = getAnalyzer(true).execute(plan)
+    plan = getAnalyzer.execute(plan)
 
     val expectedPlan = prevPlan.select(
       CreateArray(Seq(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
index 4473c20b2cca6..8c14ffffa17a5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
@@ -31,13 +31,9 @@ import org.apache.spark.sql.internal.SQLConf
 
 trait AnalysisTest extends PlanTest {
 
-  protected lazy val caseSensitiveAnalyzer = makeAnalyzer(caseSensitive = true)
-  protected lazy val caseInsensitiveAnalyzer = makeAnalyzer(caseSensitive = false)
-
   protected def extendedAnalysisRules: Seq[Rule[LogicalPlan]] = Nil
 
-  private def makeAnalyzer(caseSensitive: Boolean): Analyzer = {
-    val conf = new SQLConf().copy(SQLConf.CASE_SENSITIVE -> caseSensitive)
+  protected def getAnalyzer: Analyzer = {
     val catalog = new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin, conf)
     catalog.createDatabase(
       CatalogDatabase("default", "", new URI("loc"), Map.empty),
@@ -52,17 +48,15 @@ trait AnalysisTest extends PlanTest {
     }
   }
 
-  protected def getAnalyzer(caseSensitive: Boolean) = {
-    if (caseSensitive) caseSensitiveAnalyzer else caseInsensitiveAnalyzer
-  }
-
   protected def checkAnalysis(
       inputPlan: LogicalPlan,
       expectedPlan: LogicalPlan,
       caseSensitive: Boolean = true): Unit = {
-    val analyzer = getAnalyzer(caseSensitive)
-    val actualPlan = analyzer.executeAndCheck(inputPlan, new QueryPlanningTracker)
-    comparePlans(actualPlan, expectedPlan)
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+      val analyzer = getAnalyzer
+      val actualPlan = analyzer.executeAndCheck(inputPlan, new QueryPlanningTracker)
+      comparePlans(actualPlan, expectedPlan)
+    }
   }
 
   protected override def comparePlans(
@@ -76,18 +70,20 @@ trait AnalysisTest extends PlanTest {
   protected def assertAnalysisSuccess(
       inputPlan: LogicalPlan,
       caseSensitive: Boolean = true): Unit = {
-    val analyzer = getAnalyzer(caseSensitive)
-    val analysisAttempt = analyzer.execute(inputPlan)
-    try analyzer.checkAnalysis(analysisAttempt) catch {
-      case a: AnalysisException =>
-        fail(
-          s"""
-            |Failed to Analyze Plan
-            |$inputPlan
-            |
-            |Partial Analysis
-            |$analysisAttempt
-          """.stripMargin, a)
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+      val analyzer = getAnalyzer
+      val analysisAttempt = analyzer.execute(inputPlan)
+      try analyzer.checkAnalysis(analysisAttempt) catch {
+        case a: AnalysisException =>
+          fail(
+            s"""
+              |Failed to Analyze Plan
+              |$inputPlan
+              |
+              |Partial Analysis
+              |$analysisAttempt
+            """.stripMargin, a)
+      }
     }
   }
 
@@ -95,22 +91,24 @@ trait AnalysisTest extends PlanTest {
       inputPlan: LogicalPlan,
       expectedErrors: Seq[String],
       caseSensitive: Boolean = true): Unit = {
-    val analyzer = getAnalyzer(caseSensitive)
-    val e = intercept[AnalysisException] {
-      analyzer.checkAnalysis(analyzer.execute(inputPlan))
-    }
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+      val analyzer = getAnalyzer
+      val e = intercept[AnalysisException] {
+        analyzer.checkAnalysis(analyzer.execute(inputPlan))
+      }
 
-    if (!expectedErrors.map(_.toLowerCase(Locale.ROOT)).forall(
-        e.getMessage.toLowerCase(Locale.ROOT).contains)) {
-      fail(
-        s"""Exception message should contain the following substrings:
-           |
-           |  ${expectedErrors.mkString("\n  ")}
-           |
-           |Actual exception message:
-           |
-           |  ${e.getMessage}
-         """.stripMargin)
+      if (!expectedErrors.map(_.toLowerCase(Locale.ROOT)).forall(
+          e.getMessage.toLowerCase(Locale.ROOT).contains)) {
+        fail(
+          s"""Exception message should contain the following substrings:
+             |
+             |  ${expectedErrors.mkString("\n  ")}
+             |
+             |Actual exception message:
+             |
+             |  ${e.getMessage}
+           """.stripMargin)
+      }
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
index e466d558db1ef..7a2320f931da3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
@@ -114,28 +114,50 @@ case class TestRelationAcceptAnySchema(output: Seq[AttributeReference])
 }
 
 abstract class DataSourceV2ANSIAnalysisSuite extends DataSourceV2AnalysisBaseSuite {
-  override def getSQLConf(caseSensitive: Boolean): SQLConf =
-    super.getSQLConf(caseSensitive)
-      .copy(SQLConf.STORE_ASSIGNMENT_POLICY -> StoreAssignmentPolicy.ANSI)
-
 
   // For Ansi store assignment policy, expression `AnsiCast` is used instead of `Cast`.
   override def checkAnalysis(
       inputPlan: LogicalPlan,
       expectedPlan: LogicalPlan,
-      caseSensitive: Boolean): Unit = {
+      caseSensitive: Boolean = true): Unit = {
     val expectedPlanWithAnsiCast = expectedPlan transformAllExpressions {
       case c: Cast => AnsiCast(c.child, c.dataType, c.timeZoneId)
       case other => other
     }
-    super.checkAnalysis(inputPlan, expectedPlanWithAnsiCast, caseSensitive)
+
+    withSQLConf(SQLConf.STORE_ASSIGNMENT_POLICY.key -> StoreAssignmentPolicy.ANSI.toString) {
+      super.checkAnalysis(inputPlan, expectedPlanWithAnsiCast, caseSensitive)
+    }
+  }
+
+  override def assertAnalysisError(
+      inputPlan: LogicalPlan,
+      expectedErrors: Seq[String],
+      caseSensitive: Boolean = true): Unit = {
+    withSQLConf(SQLConf.STORE_ASSIGNMENT_POLICY.key -> StoreAssignmentPolicy.ANSI.toString) {
+      super.assertAnalysisError(inputPlan, expectedErrors, caseSensitive)
+    }
   }
 }
 
 abstract class DataSourceV2StrictAnalysisSuite extends DataSourceV2AnalysisBaseSuite {
-  override def getSQLConf(caseSensitive: Boolean): SQLConf =
-    super.getSQLConf(caseSensitive)
-      .copy(SQLConf.STORE_ASSIGNMENT_POLICY -> StoreAssignmentPolicy.STRICT)
+  override def checkAnalysis(
+      inputPlan: LogicalPlan,
+      expectedPlan: LogicalPlan,
+      caseSensitive: Boolean = true): Unit = {
+    withSQLConf(SQLConf.STORE_ASSIGNMENT_POLICY.key -> StoreAssignmentPolicy.STRICT.toString) {
+      super.checkAnalysis(inputPlan, expectedPlan, caseSensitive)
+    }
+  }
+
+  override def assertAnalysisError(
+      inputPlan: LogicalPlan,
+      expectedErrors: Seq[String],
+      caseSensitive: Boolean = true): Unit = {
+    withSQLConf(SQLConf.STORE_ASSIGNMENT_POLICY.key -> StoreAssignmentPolicy.STRICT.toString) {
+      super.assertAnalysisError(inputPlan, expectedErrors, caseSensitive)
+    }
+  }
 
   test("byName: fail canWrite check") {
     val parsedPlan = byName(table, widerTable)
@@ -200,11 +222,7 @@ abstract class DataSourceV2StrictAnalysisSuite extends DataSourceV2AnalysisBaseS
 
 abstract class DataSourceV2AnalysisBaseSuite extends AnalysisTest {
 
-  protected def getSQLConf(caseSensitive: Boolean): SQLConf =
-    new SQLConf().copy(SQLConf.CASE_SENSITIVE -> caseSensitive)
-
-  override def getAnalyzer(caseSensitive: Boolean): Analyzer = {
-    val conf = getSQLConf(caseSensitive)
+  override def getAnalyzer: Analyzer = {
     val catalog = new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin, conf)
     catalog.createDatabase(
       CatalogDatabase("default", "", new URI("loc"), Map.empty),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala
index 7284a6a30ef7e..249e7a49a0a90 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala
@@ -110,7 +110,7 @@ class ResolveGroupingAnalyticsSuite extends AnalysisTest {
       Seq(UnresolvedAlias(Multiply(unresolved_a, Literal(2))),
         unresolved_b, UnresolvedAlias(count(unresolved_c))))
 
-    val resultPlan = getAnalyzer(true).executeAndCheck(originalPlan2, new QueryPlanningTracker)
+    val resultPlan = getAnalyzer.executeAndCheck(originalPlan2, new QueryPlanningTracker)
     val gExpressions = resultPlan.asInstanceOf[Aggregate].groupingExpressions
     assert(gExpressions.size == 3)
     val firstGroupingExprAttrName =
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala
index 9e99c8e11cdfe..16d23153c1c53 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala
@@ -35,53 +35,53 @@ class ResolveInlineTablesSuite extends AnalysisTest with BeforeAndAfter {
   private def lit(v: Any): Literal = Literal(v)
 
   test("validate inputs are foldable") {
-    ResolveInlineTables(conf).validateInputEvaluable(
+    ResolveInlineTables.validateInputEvaluable(
       UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(lit(1)))))
 
     // nondeterministic (rand) should not work
     intercept[AnalysisException] {
-      ResolveInlineTables(conf).validateInputEvaluable(
+      ResolveInlineTables.validateInputEvaluable(
         UnresolvedInlineTable(Seq("c1"), Seq(Seq(Rand(1)))))
     }
 
     // aggregate should not work
     intercept[AnalysisException] {
-      ResolveInlineTables(conf).validateInputEvaluable(
+      ResolveInlineTables.validateInputEvaluable(
         UnresolvedInlineTable(Seq("c1"), Seq(Seq(Count(lit(1))))))
     }
 
     // unresolved attribute should not work
     intercept[AnalysisException] {
-      ResolveInlineTables(conf).validateInputEvaluable(
+      ResolveInlineTables.validateInputEvaluable(
         UnresolvedInlineTable(Seq("c1"), Seq(Seq(UnresolvedAttribute("A")))))
     }
   }
 
   test("validate input dimensions") {
-    ResolveInlineTables(conf).validateInputDimension(
+    ResolveInlineTables.validateInputDimension(
       UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(2)))))
 
     // num alias != data dimension
     intercept[AnalysisException] {
-      ResolveInlineTables(conf).validateInputDimension(
+      ResolveInlineTables.validateInputDimension(
         UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(lit(1)), Seq(lit(2)))))
     }
 
     // num alias == data dimension, but data themselves are inconsistent
     intercept[AnalysisException] {
-      ResolveInlineTables(conf).validateInputDimension(
+      ResolveInlineTables.validateInputDimension(
         UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(21), lit(22)))))
     }
   }
 
   test("do not fire the rule if not all expressions are resolved") {
     val table = UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(UnresolvedAttribute("A"))))
-    assert(ResolveInlineTables(conf)(table) == table)
+    assert(ResolveInlineTables(table) == table)
   }
 
   test("convert") {
     val table = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(2L))))
-    val converted = ResolveInlineTables(conf).convert(table)
+    val converted = ResolveInlineTables.convert(table)
 
     assert(converted.output.map(_.dataType) == Seq(LongType))
     assert(converted.data.size == 2)
@@ -92,8 +92,8 @@ class ResolveInlineTablesSuite extends AnalysisTest with BeforeAndAfter {
   test("convert TimeZoneAwareExpression") {
     val table = UnresolvedInlineTable(Seq("c1"),
       Seq(Seq(Cast(lit("1991-12-06 00:00:00.0"), TimestampType))))
-    val withTimeZone = ResolveTimeZone(conf).apply(table)
-    val LocalRelation(output, data, _) = ResolveInlineTables(conf).apply(withTimeZone)
+    val withTimeZone = ResolveTimeZone.apply(table)
+    val LocalRelation(output, data, _) = ResolveInlineTables.apply(withTimeZone)
     val correct = Cast(lit("1991-12-06 00:00:00.0"), TimestampType)
       .withTimeZone(conf.sessionLocalTimeZone).eval().asInstanceOf[Long]
     assert(output.map(_.dataType) == Seq(TimestampType))
@@ -103,11 +103,11 @@ class ResolveInlineTablesSuite extends AnalysisTest with BeforeAndAfter {
 
   test("nullability inference in convert") {
     val table1 = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(2L))))
-    val converted1 = ResolveInlineTables(conf).convert(table1)
+    val converted1 = ResolveInlineTables.convert(table1)
     assert(!converted1.schema.fields(0).nullable)
 
     val table2 = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(Literal(null, NullType))))
-    val converted2 = ResolveInlineTables(conf).convert(table2)
+    val converted2 = ResolveInlineTables.convert(table2)
     assert(converted2.schema.fields(0).nullable)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveLambdaVariablesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveLambdaVariablesSuite.scala
index a5847ba7c522d..b9233a27f3d7a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveLambdaVariablesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveLambdaVariablesSuite.scala
@@ -32,7 +32,7 @@ class ResolveLambdaVariablesSuite extends PlanTest {
   import org.apache.spark.sql.catalyst.dsl.plans._
 
   object Analyzer extends RuleExecutor[LogicalPlan] {
-    val batches = Batch("Resolution", FixedPoint(4), ResolveLambdaVariables(conf)) :: Nil
+    val batches = Batch("Resolution", FixedPoint(4), ResolveLambdaVariables) :: Nil
   }
 
   private val key = 'key.int
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolvedUuidExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolvedUuidExpressionsSuite.scala
index 64bd07534b19b..5ddfa9f2191e0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolvedUuidExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolvedUuidExpressionsSuite.scala
@@ -36,7 +36,7 @@ class ResolvedUuidExpressionsSuite extends AnalysisTest {
   private lazy val uuid1Ref = uuid1.toAttribute
 
   private val tracker = new QueryPlanningTracker
-  private val analyzer = getAnalyzer(caseSensitive = true)
+  private val analyzer = getAnalyzer
 
   private def getUuidExpressions(plan: LogicalPlan): Seq[Uuid] = {
     plan.flatMap {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala
index 2331346f325aa..c0312282c76c8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala
@@ -36,31 +36,35 @@ class SubstituteUnresolvedOrdinalsSuite extends AnalysisTest {
     // Tests order by ordinal, apply single rule.
     val plan = testRelation2.orderBy(Literal(1).asc, Literal(2).asc)
     comparePlans(
-      new SubstituteUnresolvedOrdinals(conf).apply(plan),
+      SubstituteUnresolvedOrdinals.apply(plan),
       testRelation2.orderBy(UnresolvedOrdinal(1).asc, UnresolvedOrdinal(2).asc))
 
     // Tests order by ordinal, do full analysis
     checkAnalysis(plan, testRelation2.orderBy(a.asc, b.asc))
 
     // order by ordinal can be turned off by config
-    comparePlans(
-      new SubstituteUnresolvedOrdinals(conf.copy(SQLConf.ORDER_BY_ORDINAL -> false)).apply(plan),
-      testRelation2.orderBy(Literal(1).asc, Literal(2).asc))
+    withSQLConf(SQLConf.ORDER_BY_ORDINAL.key -> "false") {
+      comparePlans(
+        SubstituteUnresolvedOrdinals.apply(plan),
+        testRelation2.orderBy(Literal(1).asc, Literal(2).asc))
+    }
   }
 
   test("group by ordinal") {
     // Tests group by ordinal, apply single rule.
     val plan2 = testRelation2.groupBy(Literal(1), Literal(2))('a, 'b)
     comparePlans(
-      new SubstituteUnresolvedOrdinals(conf).apply(plan2),
+      SubstituteUnresolvedOrdinals.apply(plan2),
       testRelation2.groupBy(UnresolvedOrdinal(1), UnresolvedOrdinal(2))('a, 'b))
 
     // Tests group by ordinal, do full analysis
     checkAnalysis(plan2, testRelation2.groupBy(a, b)(a, b))
 
     // group by ordinal can be turned off by config
-    comparePlans(
-      new SubstituteUnresolvedOrdinals(conf.copy(SQLConf.GROUP_BY_ORDINAL -> false)).apply(plan2),
-      testRelation2.groupBy(Literal(1), Literal(2))('a, 'b))
+    withSQLConf(SQLConf.GROUP_BY_ORDINAL.key -> "false") {
+      comparePlans(
+        SubstituteUnresolvedOrdinals.apply(plan2),
+        testRelation2.groupBy(Literal(1), Literal(2))('a, 'b))
+    }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
index 7b80de908fa08..1e5bc271ab270 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
@@ -1103,7 +1103,7 @@ class TypeCoercionSuite extends AnalysisTest {
   }
 
   test("type coercion for Concat") {
-    val rule = TypeCoercion.ConcatCoercion(conf)
+    val rule = TypeCoercion.ConcatCoercion
 
     ruleTest(rule,
       Concat(Seq(Literal("ab"), Literal("cde"))),
@@ -1157,7 +1157,7 @@ class TypeCoercionSuite extends AnalysisTest {
   }
 
   test("type coercion for Elt") {
-    val rule = TypeCoercion.EltCoercion(conf)
+    val rule = TypeCoercion.EltCoercion
 
     ruleTest(rule,
       Elt(Seq(Literal(1), Literal("ab"), Literal("cde"))),
@@ -1284,7 +1284,7 @@ class TypeCoercionSuite extends AnalysisTest {
     }
   }
 
-  private val timeZoneResolver = ResolveTimeZone(new SQLConf)
+  private val timeZoneResolver = ResolveTimeZone
 
   private def widenSetOperationTypes(plan: LogicalPlan): LogicalPlan = {
     timeZoneResolver(TypeCoercion.WidenSetOperationTypes(plan))
@@ -1437,7 +1437,7 @@ class TypeCoercionSuite extends AnalysisTest {
    */
   test("make sure rules do not fire early") {
     // InConversion
-    val inConversion = TypeCoercion.InConversion(conf)
+    val inConversion = TypeCoercion.InConversion
     ruleTest(inConversion,
       In(UnresolvedAttribute("a"), Seq(Literal(1))),
       In(UnresolvedAttribute("a"), Seq(Literal(1)))
@@ -1481,7 +1481,7 @@ class TypeCoercionSuite extends AnalysisTest {
   }
 
   test("binary comparison with string promotion") {
-    val rule = TypeCoercion.PromoteStrings(conf)
+    val rule = TypeCoercion.PromoteStrings
     ruleTest(rule,
       GreaterThan(Literal("123"), Literal(1)),
       GreaterThan(Cast(Literal("123"), IntegerType), Literal(1)))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
index d0b0d04d1f719..60ab98eeb410a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
@@ -74,7 +74,7 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB
 
   private def prepareEvaluation(expression: Expression): Expression = {
     val serializer = new JavaSerializer(new SparkConf()).newInstance
-    val resolver = ResolveTimeZone(new SQLConf)
+    val resolver = ResolveTimeZone
     val expr = resolver.resolveTimeZones(expression)
     assert(expr.resolved)
     serializer.deserialize(serializer.serialize(expr))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
index 02c5c9ab89088..ff33324c3bb18 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
@@ -270,7 +270,7 @@ class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   private def checkObjectExprEvaluation(
       expression: => Expression, expected: Any, inputRow: InternalRow = EmptyRow): Unit = {
     val serializer = new JavaSerializer(new SparkConf()).newInstance
-    val resolver = ResolveTimeZone(new SQLConf)
+    val resolver = ResolveTimeZone
     val expr = resolver.resolveTimeZones(serializer.deserialize(serializer.serialize(expression)))
     checkEvaluationWithoutCodegen(expr, expected, inputRow)
     checkEvaluationWithMutableProjection(expr, expected, inputRow)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SelectedFieldSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SelectedFieldSuite.scala
index 76d6890cc8f6f..cf5463be1faa1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SelectedFieldSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SelectedFieldSuite.scala
@@ -534,7 +534,7 @@ class SelectedFieldSuite extends AnalysisTest {
   private def unapplySelect(expr: String, relation: LocalRelation) = {
     val parsedExpr = parseAsCatalystExpression(Seq(expr)).head
     val select = relation.select(parsedExpr)
-    val analyzed = caseSensitiveAnalyzer.execute(select)
+    val analyzed = getAnalyzer.execute(select)
     SelectedField.unapply(analyzed.expressions.head)
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala
index f8ddc93597070..8984bad479a6b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala
@@ -17,21 +17,16 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry}
-import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
+import org.apache.spark.sql.catalyst.analysis.AnalysisTest
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.{CASE_SENSITIVE, GROUP_BY_ORDINAL}
 
-class AggregateOptimizeSuite extends PlanTest {
-  override val conf = new SQLConf().copy(CASE_SENSITIVE -> false, GROUP_BY_ORDINAL -> false)
-  val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf)
-  val analyzer = new Analyzer(catalog, conf)
+class AggregateOptimizeSuite extends AnalysisTest {
+  val analyzer = getAnalyzer
 
   object Optimize extends RuleExecutor[LogicalPlan] {
     val batches = Batch("Aggregate", FixedPoint(100),
@@ -51,11 +46,14 @@ class AggregateOptimizeSuite extends PlanTest {
   }
 
   test("do not remove all grouping expressions if they are all literals") {
-    val query = testRelation.groupBy(Literal("1"), Literal(1) + Literal(2))(sum('b))
-    val optimized = Optimize.execute(analyzer.execute(query))
-    val correctAnswer = analyzer.execute(testRelation.groupBy(Literal(0))(sum('b)))
+    withSQLConf(CASE_SENSITIVE.key -> "false", GROUP_BY_ORDINAL.key -> "false") {
+      val analyzer = getAnalyzer
+      val query = testRelation.groupBy(Literal("1"), Literal(1) + Literal(2))(sum('b))
+      val optimized = Optimize.execute(analyzer.execute(query))
+      val correctAnswer = analyzer.execute(testRelation.groupBy(Literal(0))(sum('b)))
 
-    comparePlans(optimized, correctAnswer)
+      comparePlans(optimized, correctAnswer)
+    }
   }
 
   test("Remove aliased literals") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
index 265f0a9936759..cc351e365113d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
@@ -18,8 +18,7 @@
 package org.apache.spark.sql.catalyst.optimizer
 
 import org.apache.spark.api.python.PythonEvalType
-import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry}
-import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
+import org.apache.spark.sql.catalyst.analysis.AnalysisTest
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
@@ -27,14 +26,11 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.{CASE_SENSITIVE, ORDER_BY_ORDINAL}
 import org.apache.spark.sql.types.IntegerType
 
-class EliminateSortsSuite extends PlanTest {
-  override val conf = new SQLConf().copy(CASE_SENSITIVE -> true, ORDER_BY_ORDINAL -> false)
-  val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf)
-  val analyzer = new Analyzer(catalog, conf)
+class EliminateSortsSuite extends AnalysisTest {
+  val analyzer = getAnalyzer
 
   object Optimize extends RuleExecutor[LogicalPlan] {
     val batches =
@@ -66,23 +62,29 @@ class EliminateSortsSuite extends PlanTest {
   }
 
   test("All the SortOrder are no-op") {
-    val x = testRelation
+    withSQLConf(CASE_SENSITIVE.key -> "true", ORDER_BY_ORDINAL.key -> "false") {
+      val x = testRelation
+      val analyzer = getAnalyzer
 
-    val query = x.orderBy(SortOrder(3, Ascending), SortOrder(-1, Ascending))
-    val optimized = Optimize.execute(analyzer.execute(query))
-    val correctAnswer = analyzer.execute(x)
+      val query = x.orderBy(SortOrder(3, Ascending), SortOrder(-1, Ascending))
+      val optimized = Optimize.execute(analyzer.execute(query))
+      val correctAnswer = analyzer.execute(x)
 
-    comparePlans(optimized, correctAnswer)
+      comparePlans(optimized, correctAnswer)
+    }
   }
 
   test("Partial order-by clauses contain no-op SortOrder") {
-    val x = testRelation
+    withSQLConf(CASE_SENSITIVE.key -> "true", ORDER_BY_ORDINAL.key -> "false") {
+      val x = testRelation
+      val analyzer = getAnalyzer
 
-    val query = x.orderBy(SortOrder(3, Ascending), 'a.asc)
-    val optimized = Optimize.execute(analyzer.execute(query))
-    val correctAnswer = analyzer.execute(x.orderBy('a.asc))
+      val query = x.orderBy(SortOrder(3, Ascending), 'a.asc)
+      val optimized = Optimize.execute(analyzer.execute(query))
+      val correctAnswer = analyzer.execute(x.orderBy('a.asc))
 
-    comparePlans(optimized, correctAnswer)
+      comparePlans(optimized, correctAnswer)
+    }
   }
 
   test("Remove no-op alias") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 24382e07a2966..c4fd84cd978d4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -38,7 +38,6 @@ import org.apache.spark.sql.types.{HIVE_TYPE_STRING, HiveStringType, MetadataBui
  */
 class ResolveSessionCatalog(
     val catalogManager: CatalogManager,
-    conf: SQLConf,
     isTempView: Seq[String] => Boolean,
     isTempFunction: String => Boolean)
   extends Rule[LogicalPlan] with LookupCatalog {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
index e47ec9ab9b61b..8d542792a0e28 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
@@ -29,7 +29,6 @@ import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
 import org.apache.spark.sql.execution.vectorized.{OffHeapColumnVector, OnHeapColumnVector, WritableColumnVector}
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.vectorized.{ColumnarBatch, ColumnVector}
 
@@ -494,7 +493,6 @@ case class RowToColumnarExec(child: SparkPlan) extends RowToColumnarTransition {
  * to/from columnar formatted data.
  */
 case class ApplyColumnarRulesAndInsertTransitions(
-    conf: SQLConf,
     columnarRules: Seq[ColumnarRule])
   extends Rule[SparkPlan] {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index a056500fa361a..c37e1e92c8576 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -339,17 +339,16 @@ object QueryExecution {
     // as the original plan is hidden behind `AdaptiveSparkPlanExec`.
     adaptiveExecutionRule.toSeq ++
     Seq(
-      CoalesceBucketsInJoin(sparkSession.sessionState.conf),
-      PlanDynamicPruningFilters(sparkSession),
-      PlanSubqueries(sparkSession),
-      RemoveRedundantProjects(sparkSession.sessionState.conf),
-      EnsureRequirements(sparkSession.sessionState.conf),
-      DisableUnnecessaryBucketedScan(sparkSession.sessionState.conf),
-      ApplyColumnarRulesAndInsertTransitions(sparkSession.sessionState.conf,
-        sparkSession.sessionState.columnarRules),
-      CollapseCodegenStages(sparkSession.sessionState.conf),
-      ReuseExchange(sparkSession.sessionState.conf),
-      ReuseSubquery(sparkSession.sessionState.conf)
+      CoalesceBucketsInJoin,
+      PlanDynamicPruningFilters,
+      PlanSubqueries,
+      RemoveRedundantProjects,
+      EnsureRequirements,
+      DisableUnnecessaryBucketedScan,
+      ApplyColumnarRulesAndInsertTransitions(sparkSession.sessionState.columnarRules),
+      CollapseCodegenStages(),
+      ReuseExchange,
+      ReuseSubquery
     )
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala
index 2bcf86edbea37..8746cc6f650d7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.internal.SQLConf
  * optimization to prune data. During physical planning, redundant project nodes can be removed
  * to simplify the query plan.
  */
-case class RemoveRedundantProjects(conf: SQLConf) extends Rule[SparkPlan] {
+object RemoveRedundantProjects extends Rule[SparkPlan] {
   def apply(plan: SparkPlan): SparkPlan = {
     if (!conf.getConf(SQLConf.REMOVE_REDUNDANT_PROJECTS_ENABLED)) {
       plan
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index bcd31c4c1d775..a8905ca530005 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -873,7 +873,6 @@ case class WholeStageCodegenExec(child: SparkPlan)(val codegenStageId: Int)
  * failed to generate/compile code.
  */
 case class CollapseCodegenStages(
-    conf: SQLConf,
     codegenStageCounter: AtomicInteger = new AtomicInteger(0))
   extends Rule[SparkPlan] {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala
index 0170f8b2f71c2..04b8ade8ac629 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala
@@ -28,7 +28,7 @@ import org.apache.spark.util.Utils
 class AQEOptimizer(conf: SQLConf) extends RuleExecutor[LogicalPlan] {
   private val defaultBatches = Seq(
     Batch("Demote BroadcastHashJoin", Once,
-      DemoteBroadcastHashJoin(conf)),
+      DemoteBroadcastHashJoin),
     Batch("Eliminate Join to Empty Relation", Once, EliminateJoinToEmptyRelation)
   )
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index 5e75e26e6d074..d30e16276b9f3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -82,8 +82,8 @@ case class AdaptiveSparkPlanExec(
   // The logical plan optimizer for re-optimizing the current logical plan.
   @transient private val optimizer = new AQEOptimizer(conf)
 
-  @transient private val removeRedundantProjects = RemoveRedundantProjects(conf)
-  @transient private val ensureRequirements = EnsureRequirements(conf)
+  @transient private val removeRedundantProjects = RemoveRedundantProjects
+  @transient private val ensureRequirements = EnsureRequirements
 
   // A list of physical plan rules to be applied before creation of query stages. The physical
   // plan should reach a final status of query stages (i.e., no more addition or removal of
@@ -96,12 +96,12 @@ case class AdaptiveSparkPlanExec(
   // A list of physical optimizer rules to be applied to a new stage before its execution. These
   // optimizations should be stage-independent.
   @transient private val queryStageOptimizerRules: Seq[Rule[SparkPlan]] = Seq(
-    ReuseAdaptiveSubquery(conf, context.subqueryCache),
-    CoalesceShufflePartitions(context.session),
+    ReuseAdaptiveSubquery(context.subqueryCache),
+    CoalesceShufflePartitions,
     // The following two rules need to make use of 'CustomShuffleReaderExec.partitionSpecs'
     // added by `CoalesceShufflePartitions`. So they must be executed after it.
-    OptimizeSkewedJoin(conf),
-    OptimizeLocalShuffleReader(conf)
+    OptimizeSkewedJoin,
+    OptimizeLocalShuffleReader
   )
 
   private def finalStageOptimizerRules: Seq[Rule[SparkPlan]] =
@@ -109,7 +109,7 @@ case class AdaptiveSparkPlanExec(
       case _: DataWritingCommandExec | _: V2TableWriteExec =>
         // SPARK-32932: Local shuffle reader could break partitioning that works best
         // for the following writing command
-        queryStageOptimizerRules.filterNot(_.isInstanceOf[OptimizeLocalShuffleReader])
+        queryStageOptimizerRules.filterNot(_ == OptimizeLocalShuffleReader)
       case _ =>
         queryStageOptimizerRules
     }
@@ -117,8 +117,8 @@ case class AdaptiveSparkPlanExec(
   // A list of physical optimizer rules to be applied right after a new stage is created. The input
   // plan to these rules has exchange as its root node.
   @transient private val postStageCreationRules = Seq(
-    ApplyColumnarRulesAndInsertTransitions(conf, context.session.sessionState.columnarRules),
-    CollapseCodegenStages(conf)
+    ApplyColumnarRulesAndInsertTransitions(context.session.sessionState.columnarRules),
+    CollapseCodegenStages()
   )
 
   @transient private val costEvaluator = SimpleCostEvaluator
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
index 84c65df31a7c5..ecf908a737442 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
@@ -26,9 +26,7 @@ import org.apache.spark.sql.internal.SQLConf
  * A rule to coalesce the shuffle partitions based on the map output statistics, which can
  * avoid many small reduce tasks that hurt performance.
  */
-case class CoalesceShufflePartitions(session: SparkSession) extends Rule[SparkPlan] {
-  private def conf = session.sessionState.conf
-
+object CoalesceShufflePartitions extends Rule[SparkPlan] {
   override def apply(plan: SparkPlan): SparkPlan = {
     if (!conf.coalesceShufflePartitionsEnabled) {
       return plan
@@ -65,7 +63,7 @@ case class CoalesceShufflePartitions(session: SparkSession) extends Rule[SparkPl
         // We fall back to Spark default parallelism if the minimum number of coalesced partitions
         // is not set, so to avoid perf regressions compared to no coalescing.
         val minPartitionNum = conf.getConf(SQLConf.COALESCE_PARTITIONS_MIN_PARTITION_NUM)
-          .getOrElse(session.sparkContext.defaultParallelism)
+          .getOrElse(SparkSession.active.sparkContext.defaultParallelism)
         val partitionSpecs = ShufflePartitionsUtil.coalescePartitions(
           validMetrics.toArray,
           advisoryTargetSize = conf.getConf(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/DemoteBroadcastHashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/DemoteBroadcastHashJoin.scala
index aba83b1337109..011acbf1b22a4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/DemoteBroadcastHashJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/DemoteBroadcastHashJoin.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.internal.SQLConf
  * This optimization rule detects a join child that has a high ratio of empty partitions and
  * adds a no-broadcast-hash-join hint to avoid it being broadcast.
  */
-case class DemoteBroadcastHashJoin(conf: SQLConf) extends Rule[LogicalPlan] {
+object DemoteBroadcastHashJoin extends Rule[LogicalPlan] {
 
   private def shouldDemote(plan: LogicalPlan): Boolean = plan match {
     case LogicalQueryStage(_, stage: ShuffleQueryStageExec) if stage.resultOption.get().isDefined
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala
index 754225dd3fe95..f8478f860b2d5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala
@@ -39,8 +39,6 @@ import org.apache.spark.sql.internal.SQLConf
 case class InsertAdaptiveSparkPlan(
     adaptiveExecutionContext: AdaptiveExecutionContext) extends Rule[SparkPlan] {
 
-  private val conf = adaptiveExecutionContext.session.sessionState.conf
-
   override def apply(plan: SparkPlan): SparkPlan = applyInternal(plan, false)
 
   private def applyInternal(plan: SparkPlan, isSubquery: Boolean): SparkPlan = plan match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeLocalShuffleReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeLocalShuffleReader.scala
index 7bb9265e1717a..8db2827beaf43 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeLocalShuffleReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeLocalShuffleReader.scala
@@ -33,10 +33,9 @@ import org.apache.spark.sql.internal.SQLConf
  * then run `EnsureRequirements` to check whether additional shuffle introduced.
  * If introduced, we will revert all the local readers.
  */
-case class OptimizeLocalShuffleReader(conf: SQLConf) extends Rule[SparkPlan] {
-  import OptimizeLocalShuffleReader._
+object OptimizeLocalShuffleReader extends Rule[SparkPlan] {
 
-  private val ensureRequirements = EnsureRequirements(conf)
+  private val ensureRequirements = EnsureRequirements
 
   // The build side is a broadcast query stage which should have been optimized using local reader
   // already. So we only need to deal with probe side here.
@@ -118,9 +117,6 @@ case class OptimizeLocalShuffleReader(conf: SQLConf) extends Rule[SparkPlan] {
         createProbeSideLocalReader(s)
     }
   }
-}
-
-object OptimizeLocalShuffleReader {
 
   object BroadcastJoinWithShuffleLeft {
     def unapply(plan: SparkPlan): Option[(SparkPlan, BuildSide)] = plan match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala
index a85b188727ba4..582d586c59358 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala
@@ -53,9 +53,9 @@ import org.apache.spark.sql.internal.SQLConf
  * Note that, when this rule is enabled, it also coalesces non-skewed partitions like
  * `CoalesceShufflePartitions` does.
  */
-case class OptimizeSkewedJoin(conf: SQLConf) extends Rule[SparkPlan] {
+object OptimizeSkewedJoin extends Rule[SparkPlan] {
 
-  private val ensureRequirements = EnsureRequirements(conf)
+  private val ensureRequirements = EnsureRequirements
 
   private val supportedJoinTypes =
     Inner :: Cross :: LeftSemi :: LeftAnti :: LeftOuter :: RightOuter :: Nil
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ReuseAdaptiveSubquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ReuseAdaptiveSubquery.scala
index 432f7e204791b..c3c7358641fcb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ReuseAdaptiveSubquery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ReuseAdaptiveSubquery.scala
@@ -24,7 +24,6 @@ import org.apache.spark.sql.execution.{BaseSubqueryExec, ExecSubqueryExpression,
 import org.apache.spark.sql.internal.SQLConf
 
 case class ReuseAdaptiveSubquery(
-    conf: SQLConf,
     reuseMap: TrieMap[SparkPlan, BaseSubqueryExec]) extends Rule[SparkPlan] {
 
   def apply(plan: SparkPlan): SparkPlan = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/DetectAmbiguousSelfJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/DetectAmbiguousSelfJoin.scala
index 136f7c47f5341..ef657ba35455f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/DetectAmbiguousSelfJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/DetectAmbiguousSelfJoin.scala
@@ -40,7 +40,7 @@ import org.apache.spark.sql.internal.SQLConf
  * Note that, this rule removes all the Dataset id related metadata from `AttributeReference`, so
  * that they don't exist after analyzer.
  */
-class DetectAmbiguousSelfJoin(conf: SQLConf) extends Rule[LogicalPlan] {
+object DetectAmbiguousSelfJoin extends Rule[LogicalPlan] {
 
   // Dataset column reference is an `AttributeReference` with 2 special metadata.
   private def isColumnReference(a: AttributeReference): Boolean = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoin.scala
index 22f308f331449..40a2a7a2359e0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoin.scala
@@ -38,7 +38,7 @@ import org.apache.spark.sql.internal.SQLConf
  *   - The ratio of the number of buckets is less than the value set in
  *     COALESCE_BUCKETS_IN_JOIN_MAX_BUCKET_RATIO.
  */
-case class CoalesceBucketsInJoin(conf: SQLConf) extends Rule[SparkPlan] {
+object CoalesceBucketsInJoin extends Rule[SparkPlan] {
   private def updateNumCoalescedBucketsInScan(
       plan: SparkPlan,
       numCoalescedBuckets: Int): SparkPlan = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala
index 9b4f898df00b6..2bbd5f5d969dc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala
@@ -75,7 +75,7 @@ import org.apache.spark.sql.internal.SQLConf
  * the paper "Access Path Selection in a Relational Database Management System"
  * (https://dl.acm.org/doi/10.1145/582095.582099).
  */
-case class DisableUnnecessaryBucketedScan(conf: SQLConf) extends Rule[SparkPlan] {
+object DisableUnnecessaryBucketedScan extends Rule[SparkPlan] {
 
   /**
    * Disable bucketed table scan with pre-order traversal of plan.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 9d666fc3a063e..02dd4e549f93b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -58,7 +58,7 @@ import org.apache.spark.unsafe.types.UTF8String
  * Note that, this rule must be run after `PreprocessTableCreation` and
  * `PreprocessTableInsertion`.
  */
-case class DataSourceAnalysis(conf: SQLConf) extends Rule[LogicalPlan] with CastSupport {
+object DataSourceAnalysis extends Rule[LogicalPlan] with CastSupport {
 
   def resolver: Resolver = conf.resolver
 
@@ -243,16 +243,16 @@ case class DataSourceAnalysis(conf: SQLConf) extends Rule[LogicalPlan] with Cast
  * TODO: we should remove the special handling for hive tables after completely making hive as a
  * data source.
  */
-class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan] {
+object FindDataSourceTable extends Rule[LogicalPlan] {
   private def readDataSourceTable(
       table: CatalogTable, extraOptions: CaseInsensitiveStringMap): LogicalPlan = {
     val qualifiedTableName = QualifiedTableName(table.database, table.identifier.table)
-    val catalog = sparkSession.sessionState.catalog
+    val catalog = SparkSession.active.sessionState.catalog
     val dsOptions = DataSourceUtils.generateDatasourceOptions(extraOptions, table)
     catalog.getCachedPlan(qualifiedTableName, () => {
       val dataSource =
         DataSource(
-          sparkSession,
+          SparkSession.active,
           // In older version(prior to 2.1) of Spark, the table schema can be empty and should be
           // inferred at runtime. We should still support it.
           userSpecifiedSchema = if (table.schema.isEmpty) None else Some(table.schema),
@@ -270,7 +270,7 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan]
       extraOptions: CaseInsensitiveStringMap): StreamingRelation = {
     val dsOptions = DataSourceUtils.generateDatasourceOptions(extraOptions, table)
     val dataSource = DataSource(
-      sparkSession,
+      SparkSession.active,
       className = table.provider.get,
       userSpecifiedSchema = Some(table.schema),
       options = dsOptions)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala
index 28a63c26604ec..0244175f1a1bd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, File
  * This is a temporary hack for making current data source V2 work. It should be
  * removed when Catalog support of file data source v2 is finished.
  */
-class FallBackFileSourceV2(sparkSession: SparkSession) extends Rule[LogicalPlan] {
+object FallBackFileSourceV2 extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     case i @
         InsertIntoStatement(d @ DataSourceV2Relation(table: FileTable, _, _, _, _), _, _, _, _) =>
@@ -42,7 +42,7 @@ class FallBackFileSourceV2(sparkSession: SparkSession) extends Rule[LogicalPlan]
         table.schema,
         None,
         v1FileFormat,
-        d.options.asScala.toMap)(sparkSession)
+        d.options.asScala.toMap)(SparkSession.active)
       i.copy(table = LogicalRelation(relation))
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index 5fb1a4d249070..5c46a36cf91f8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -38,16 +38,16 @@ import org.apache.spark.sql.util.SchemaUtils
 /**
  * Replaces [[UnresolvedRelation]]s if the plan is for direct query on files.
  */
-class ResolveSQLOnFile(sparkSession: SparkSession) extends Rule[LogicalPlan] {
+object ResolveSQLOnFile extends Rule[LogicalPlan] {
   private def maybeSQLFile(u: UnresolvedRelation): Boolean = {
-    sparkSession.sessionState.conf.runSQLonFile && u.multipartIdentifier.size == 2
+    conf.runSQLonFile && u.multipartIdentifier.size == 2
   }
 
   def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     case u: UnresolvedRelation if maybeSQLFile(u) =>
       try {
         val dataSource = DataSource(
-          sparkSession,
+          SparkSession.active,
           paths = u.multipartIdentifier.last :: Nil,
           className = u.multipartIdentifier.head)
 
@@ -73,9 +73,9 @@ class ResolveSQLOnFile(sparkSession: SparkSession) extends Rule[LogicalPlan] {
 /**
  * Preprocess [[CreateTable]], to do some normalization and checking.
  */
-case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[LogicalPlan] {
+object PreprocessTableCreation extends Rule[LogicalPlan] {
   // catalog is a def and not a val/lazy val as the latter would introduce a circular reference
-  private def catalog = sparkSession.sessionState.catalog
+  private def catalog = SparkSession.active.sessionState.catalog
 
   def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     // When we CREATE TABLE without specifying the table schema, we should fail the query if
@@ -112,7 +112,6 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
       }
 
       // Check if the specified data source match the data source of the existing table.
-      val conf = sparkSession.sessionState.conf
       val existingProvider = DataSource.lookupDataSource(existingTable.provider.get, conf)
       val specifiedProvider = DataSource.lookupDataSource(tableDesc.provider.get, conf)
       // TODO: Check that options from the resolved relation match the relation that we are
@@ -140,7 +139,7 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
             s"(${query.schema.catalogString})")
       }
 
-      val resolver = sparkSession.sessionState.conf.resolver
+      val resolver = conf.resolver
       val tableCols = existingTable.schema.map(_.name)
 
       // As we are inserting into an existing table, we should respect the existing schema and
@@ -245,7 +244,7 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
       val schema = create.tableSchema
       val partitioning = create.partitioning
       val identifier = create.tableName
-      val isCaseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
+      val isCaseSensitive = conf.caseSensitiveAnalysis
       // Check that columns are not duplicated in the schema
       val flattenedSchema = SchemaUtils.explodeNestedFieldNames(schema)
       SchemaUtils.checkColumnNameDuplication(
@@ -266,7 +265,7 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
         create
       } else {
         // Resolve and normalize partition columns as necessary
-        val resolver = sparkSession.sessionState.conf.resolver
+        val resolver = conf.resolver
         val normalizedPartitions = partitioning.map {
           case transform: RewritableTransform =>
             val rewritten = transform.references().map { ref =>
@@ -291,7 +290,7 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
     SchemaUtils.checkSchemaColumnNameDuplication(
       schema,
       "in the table definition of " + table.identifier,
-      sparkSession.sessionState.conf.caseSensitiveAnalysis)
+      conf.caseSensitiveAnalysis)
 
     assertNoNullTypeInSchema(schema)
 
@@ -317,12 +316,12 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
       tableName = table.identifier.unquotedString,
       tableCols = schema.map(_.name),
       partCols = table.partitionColumnNames,
-      resolver = sparkSession.sessionState.conf.resolver)
+      resolver = conf.resolver)
 
     SchemaUtils.checkColumnNameDuplication(
       normalizedPartitionCols,
       "in the partition schema",
-      sparkSession.sessionState.conf.resolver)
+      conf.resolver)
 
     if (schema.nonEmpty && normalizedPartitionCols.length == schema.length) {
       if (DDLUtils.isHiveTable(table)) {
@@ -351,16 +350,16 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
           tableName = table.identifier.unquotedString,
           tableCols = schema.map(_.name),
           bucketSpec = bucketSpec,
-          resolver = sparkSession.sessionState.conf.resolver)
+          resolver = conf.resolver)
 
         SchemaUtils.checkColumnNameDuplication(
           normalizedBucketSpec.bucketColumnNames,
           "in the bucket definition",
-          sparkSession.sessionState.conf.resolver)
+          conf.resolver)
         SchemaUtils.checkColumnNameDuplication(
           normalizedBucketSpec.sortColumnNames,
           "in the sort definition",
-          sparkSession.sessionState.conf.resolver)
+          conf.resolver)
 
         normalizedBucketSpec.sortColumnNames.map(schema(_)).map(_.dataType).foreach {
           case dt if RowOrdering.isOrderable(dt) => // OK
@@ -382,7 +381,7 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
  * table. It also does data type casting and field renaming, to make sure that the columns to be
  * inserted have the correct data type and fields have the correct names.
  */
-case class PreprocessTableInsertion(conf: SQLConf) extends Rule[LogicalPlan] {
+object PreprocessTableInsertion extends Rule[LogicalPlan] {
   private def preprocess(
       insert: InsertIntoStatement,
       tblName: String,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala
index 6973f55e8dca0..e1e996a857521 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala
@@ -34,8 +34,7 @@ import org.apache.spark.sql.internal.SQLConf
  * results of broadcast. For joins that are not planned as broadcast hash joins we keep
  * the fallback mechanism with subquery duplicate.
 */
-case class PlanDynamicPruningFilters(sparkSession: SparkSession)
-    extends Rule[SparkPlan] with PredicateHelper {
+object PlanDynamicPruningFilters extends Rule[SparkPlan] with PredicateHelper {
 
   /**
    * Identify the shape in which keys of a given plan are broadcasted.
@@ -54,7 +53,7 @@ case class PlanDynamicPruningFilters(sparkSession: SparkSession)
       case DynamicPruningSubquery(
           value, buildPlan, buildKeys, broadcastKeyIndex, onlyInBroadcast, exprId) =>
         val sparkPlan = QueryExecution.createSparkPlan(
-          sparkSession, sparkSession.sessionState.planner, buildPlan)
+          SparkSession.active, SparkSession.active.sessionState.planner, buildPlan)
         // Using `sparkPlan` is a little hacky as it is based on the assumption that this rule is
         // the first to be applied (apart from `InsertAdaptiveSparkPlan`).
         val canReuseExchange = SQLConf.get.exchangeReuseEnabled && buildKeys.nonEmpty &&
@@ -67,7 +66,7 @@ case class PlanDynamicPruningFilters(sparkSession: SparkSession)
           }.isDefined
 
         if (canReuseExchange) {
-          val executedPlan = QueryExecution.prepareExecutedPlan(sparkSession, sparkPlan)
+          val executedPlan = QueryExecution.prepareExecutedPlan(SparkSession.active, sparkPlan)
           val mode = broadcastMode(buildKeys, executedPlan.output)
           // plan a broadcast exchange of the build side of the join
           val exchange = BroadcastExchangeExec(mode, executedPlan)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
index 3641654b89b76..cf38fee055ca5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.internal.SQLConf
  * each operator by inserting [[ShuffleExchangeExec]] Operators where required.  Also ensure that
  * the input partition ordering requirements are met.
  */
-case class EnsureRequirements(conf: SQLConf) extends Rule[SparkPlan] {
+object EnsureRequirements extends Rule[SparkPlan] {
 
   private def ensureDistributionAndOrdering(operator: SparkPlan): SparkPlan = {
     val requiredChildDistributions: Seq[Distribution] = operator.requiredChildDistribution
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala
index c4062879c2727..aeaf59b7f0f4a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala
@@ -100,7 +100,7 @@ case class ReusedExchangeExec(override val output: Seq[Attribute], child: Exchan
  * Find out duplicated exchanges in the spark plan, then use the same exchange for all the
  * references.
  */
-case class ReuseExchange(conf: SQLConf) extends Rule[SparkPlan] {
+object ReuseExchange extends Rule[SparkPlan] {
 
   def apply(plan: SparkPlan): SparkPlan = {
     if (!conf.exchangeReuseEnabled) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
index 14cc76f0dbb78..7cf9af67aaa36 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
@@ -172,11 +172,11 @@ case class InSubqueryExec(
 /**
  * Plans subqueries that are present in the given [[SparkPlan]].
  */
-case class PlanSubqueries(sparkSession: SparkSession) extends Rule[SparkPlan] {
+object PlanSubqueries extends Rule[SparkPlan] {
   def apply(plan: SparkPlan): SparkPlan = {
     plan.transformAllExpressions {
       case subquery: expressions.ScalarSubquery =>
-        val executedPlan = QueryExecution.prepareExecutedPlan(sparkSession, subquery.plan)
+        val executedPlan = QueryExecution.prepareExecutedPlan(SparkSession.active, subquery.plan)
         ScalarSubquery(
           SubqueryExec(s"scalar-subquery#${subquery.exprId.id}", executedPlan),
           subquery.exprId)
@@ -190,7 +190,7 @@ case class PlanSubqueries(sparkSession: SparkSession) extends Rule[SparkPlan] {
             }
           )
         }
-        val executedPlan = QueryExecution.prepareExecutedPlan(sparkSession, query)
+        val executedPlan = QueryExecution.prepareExecutedPlan(SparkSession.active, query)
         InSubqueryExec(expr, SubqueryExec(s"subquery#${exprId.id}", executedPlan), exprId)
     }
   }
@@ -200,7 +200,7 @@ case class PlanSubqueries(sparkSession: SparkSession) extends Rule[SparkPlan] {
  * Find out duplicated subqueries in the spark plan, then use the same subquery result for all the
  * references.
  */
-case class ReuseSubquery(conf: SQLConf) extends Rule[SparkPlan] {
+object ReuseSubquery extends Rule[SparkPlan] {
 
   def apply(plan: SparkPlan): SparkPlan = {
     if (!conf.subqueryReuseEnabled) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 4ca1ac863addc..3cef9f9df0daa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -177,19 +177,19 @@ abstract class BaseSessionStateBuilder(
    */
   protected def analyzer: Analyzer = new Analyzer(catalogManager, conf) {
     override val extendedResolutionRules: Seq[Rule[LogicalPlan]] =
-      new FindDataSourceTable(session) +:
-        new ResolveSQLOnFile(session) +:
-        new FallBackFileSourceV2(session) +:
+      FindDataSourceTable +:
+        ResolveSQLOnFile +:
+        FallBackFileSourceV2 +:
         ResolveEncodersInScalaAgg +:
         new ResolveSessionCatalog(
-          catalogManager, conf, catalog.isTempView, catalog.isTempFunction) +:
+          catalogManager, catalog.isTempView, catalog.isTempFunction) +:
         customResolutionRules
 
     override val postHocResolutionRules: Seq[Rule[LogicalPlan]] =
-      new DetectAmbiguousSelfJoin(conf) +:
-        PreprocessTableCreation(session) +:
-        PreprocessTableInsertion(conf) +:
-        DataSourceAnalysis(conf) +:
+      DetectAmbiguousSelfJoin +:
+        PreprocessTableCreation +:
+        PreprocessTableInsertion +:
+        DataSourceAnalysis +:
         customPostHocResolutionRules
 
     override val extendedCheckRules: Seq[LogicalPlan => Unit] =
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala
index dd95ceb59bdc4..e5f46eb9b1098 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala
@@ -34,7 +34,7 @@ class V2CommandsCaseSensitivitySuite extends SharedSparkSession with AnalysisTes
   import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
   override protected def extendedAnalysisRules: Seq[Rule[LogicalPlan]] = {
-    Seq(PreprocessTableCreation(spark))
+    Seq(PreprocessTableCreation)
   }
 
   test("CreateTableAsSelect: using top level field for partitioning") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ColumnarRulesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ColumnarRulesSuite.scala
index d5d534eb5f878..dd2790040b9e8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ColumnarRulesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ColumnarRulesSuite.scala
@@ -27,7 +27,7 @@ class ColumnarRulesSuite extends PlanTest with SharedSparkSession {
 
   test("Idempotency of columnar rules - RowToColumnar/ColumnarToRow") {
     val rules = ApplyColumnarRulesAndInsertTransitions(
-      spark.sessionState.conf, spark.sessionState.columnarRules)
+      spark.sessionState.columnarRules)
 
     val plan = UnaryOp(UnaryOp(LeafOp(false), true), false)
     val expected =
@@ -40,7 +40,7 @@ class ColumnarRulesSuite extends PlanTest with SharedSparkSession {
 
   test("Idempotency of columnar rules - ColumnarToRow/RowToColumnar") {
     val rules = ApplyColumnarRulesAndInsertTransitions(
-      spark.sessionState.conf, spark.sessionState.columnarRules)
+      spark.sessionState.columnarRules)
 
     val plan = UnaryOp(UnaryOp(LeafOp(true), false), true)
     val expected = ColumnarToRowExec(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index ca52e51c87ea7..048466b3d8637 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -342,7 +342,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
       requiredChildDistribution = Seq(distribution, distribution),
       requiredChildOrdering = Seq(Seq.empty, Seq.empty)
     )
-    val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(inputPlan)
+    val outputPlan = EnsureRequirements.apply(inputPlan)
     assertDistributionRequirementsAreSatisfied(outputPlan)
   }
 
@@ -360,7 +360,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
       requiredChildDistribution = Seq(distribution, distribution),
       requiredChildOrdering = Seq(Seq.empty, Seq.empty)
     )
-    val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(inputPlan)
+    val outputPlan = EnsureRequirements.apply(inputPlan)
     assertDistributionRequirementsAreSatisfied(outputPlan)
     if (outputPlan.collect { case e: ShuffleExchangeExec => true }.isEmpty) {
       fail(s"Exchange should have been added:\n$outputPlan")
@@ -380,7 +380,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
       requiredChildDistribution = Seq(distribution, distribution),
       requiredChildOrdering = Seq(Seq.empty, Seq.empty)
     )
-    val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(inputPlan)
+    val outputPlan = EnsureRequirements.apply(inputPlan)
     assertDistributionRequirementsAreSatisfied(outputPlan)
     if (outputPlan.collect { case e: ShuffleExchangeExec => true }.nonEmpty) {
       fail(s"Exchange should not have been added:\n$outputPlan")
@@ -403,7 +403,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
       requiredChildDistribution = Seq(distribution, distribution),
       requiredChildOrdering = Seq(outputOrdering, outputOrdering)
     )
-    val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(inputPlan)
+    val outputPlan = EnsureRequirements.apply(inputPlan)
     assertDistributionRequirementsAreSatisfied(outputPlan)
     if (outputPlan.collect { case e: ShuffleExchangeExec => true }.nonEmpty) {
       fail(s"No Exchanges should have been added:\n$outputPlan")
@@ -418,7 +418,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
     val inputPlan = ShuffleExchangeExec(
       partitioning,
       DummySparkPlan(outputPartitioning = partitioning))
-    val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(inputPlan)
+    val outputPlan = EnsureRequirements.apply(inputPlan)
     assertDistributionRequirementsAreSatisfied(outputPlan)
     if (outputPlan.collect { case e: ShuffleExchangeExec => true }.size == 2) {
       fail(s"Topmost Exchange should have been eliminated:\n$outputPlan")
@@ -433,7 +433,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
     val inputPlan = ShuffleExchangeExec(
       partitioning,
       DummySparkPlan(outputPartitioning = partitioning))
-    val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(inputPlan)
+    val outputPlan = EnsureRequirements.apply(inputPlan)
     assertDistributionRequirementsAreSatisfied(outputPlan)
     if (outputPlan.collect { case e: ShuffleExchangeExec => true }.size == 1) {
       fail(s"Topmost Exchange should not have been eliminated:\n$outputPlan")
@@ -451,7 +451,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
         requiredChildDistribution = Seq(distribution),
         requiredChildOrdering = Seq(Seq.empty))
 
-    val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(inputPlan)
+    val outputPlan = EnsureRequirements.apply(inputPlan)
     val shuffle = outputPlan.collect { case e: ShuffleExchangeExec => e }
     assert(shuffle.size === 1)
     assert(shuffle.head.outputPartitioning === finalPartitioning)
@@ -476,7 +476,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
       shuffle,
       shuffle)
 
-    val outputPlan = ReuseExchange(spark.sessionState.conf).apply(inputPlan)
+    val outputPlan = ReuseExchange.apply(inputPlan)
     if (outputPlan.collect { case e: ReusedExchangeExec => true }.size != 1) {
       fail(s"Should re-use the shuffle:\n$outputPlan")
     }
@@ -493,7 +493,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
       ShuffleExchangeExec(finalPartitioning, inputPlan),
       ShuffleExchangeExec(finalPartitioning, inputPlan))
 
-    val outputPlan2 = ReuseExchange(spark.sessionState.conf).apply(inputPlan2)
+    val outputPlan2 = ReuseExchange.apply(inputPlan2)
     if (outputPlan2.collect { case e: ReusedExchangeExec => true }.size != 2) {
       fail(s"Should re-use the two shuffles:\n$outputPlan2")
     }
@@ -530,7 +530,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
       requiredChildOrdering = Seq(requiredOrdering),
       requiredChildDistribution = Seq(UnspecifiedDistribution)
     )
-    val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(inputPlan)
+    val outputPlan = EnsureRequirements.apply(inputPlan)
     assertDistributionRequirementsAreSatisfied(outputPlan)
     if (shouldHaveSort) {
       if (outputPlan.collect { case s: SortExec => true }.isEmpty) {
@@ -691,7 +691,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
     val smjExec = SortMergeJoinExec(
       exprA :: exprA :: Nil, exprB :: exprC :: Nil, Inner, None, plan1, plan2)
 
-    val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(smjExec)
+    val outputPlan = EnsureRequirements.apply(smjExec)
     outputPlan match {
       case SortMergeJoinExec(leftKeys, rightKeys, _, _, _, _, _) =>
         assert(leftKeys == Seq(exprA, exprA))
@@ -711,7 +711,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
       condition = None,
       left = plan1,
       right = plan2)
-    val outputPlan = EnsureRequirements(spark.sessionState.conf).apply(smjExec)
+    val outputPlan = EnsureRequirements.apply(smjExec)
     outputPlan match {
       case SortMergeJoinExec(leftKeys, rightKeys, _, _,
              SortExec(_, _,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala
index 930935f077665..2de9d21abca82 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala
@@ -141,7 +141,7 @@ abstract class RemoveRedundantProjectsSuiteBase
       }
 
       // Re-apply remove redundant project rule.
-      val rule = RemoveRedundantProjects(spark.sessionState.conf)
+      val rule = RemoveRedundantProjects
       val newExecutedPlan = rule.apply(newPlan)
       // The manually added ProjectExec node shouldn't be removed.
       assert(collectWithSubqueries(newExecutedPlan) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoinSuite.scala
index 89aee37a4246f..63964665fc81c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoinSuite.scala
@@ -99,7 +99,7 @@ class CoalesceBucketsInJoinSuite extends SQLTestUtils with SharedSparkSession {
           s.leftKeys, s.rightKeys, Inner, BuildLeft, None, lScan, rScan)
       }
 
-      val plan = CoalesceBucketsInJoin(spark.sessionState.conf)(join)
+      val plan = CoalesceBucketsInJoin(join)
 
       def verify(expected: Option[Int], subPlan: SparkPlan): Unit = {
         val coalesced = subPlan.collect {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index 2d6a5da6d67f7..8782295e5d33b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -155,10 +155,10 @@ class PlanResolutionSuite extends AnalysisTest {
     // TODO: run the analyzer directly.
     val rules = Seq(
       CTESubstitution,
-      ResolveInlineTables(conf),
+      ResolveInlineTables,
       analyzer.ResolveRelations,
       new ResolveCatalogs(catalogManager),
-      new ResolveSessionCatalog(catalogManager, conf, _ == Seq("v"), _ => false),
+      new ResolveSessionCatalog(catalogManager, _ == Seq("v"), _ => false),
       analyzer.ResolveTables,
       analyzer.ResolveReferences,
       analyzer.ResolveSubqueryColumnAliases,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala
index 38e68cd2512e7..296cbc3f3ad52 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala
@@ -39,7 +39,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
     // Test PartitioningCollection on the left side of join.
     val smjExec1 = SortMergeJoinExec(
       exprB :: exprA :: Nil, exprA :: exprB :: Nil, Inner, None, plan1, plan2)
-    EnsureRequirements(spark.sessionState.conf).apply(smjExec1) match {
+    EnsureRequirements.apply(smjExec1) match {
       case SortMergeJoinExec(leftKeys, rightKeys, _, _,
         SortExec(_, _, DummySparkPlan(_, _, _: PartitioningCollection, _, _), _),
         SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _), _) =>
@@ -51,7 +51,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
     // Test PartitioningCollection on the right side of join.
     val smjExec2 = SortMergeJoinExec(
       exprA :: exprB :: Nil, exprB :: exprA :: Nil, Inner, None, plan2, plan1)
-    EnsureRequirements(spark.sessionState.conf).apply(smjExec2) match {
+    EnsureRequirements.apply(smjExec2) match {
       case SortMergeJoinExec(leftKeys, rightKeys, _, _,
         SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _),
         SortExec(_, _, DummySparkPlan(_, _, _: PartitioningCollection, _, _), _), _) =>
@@ -64,7 +64,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
     // and it should fall back to the right side.
     val smjExec3 = SortMergeJoinExec(
       exprA :: exprC :: Nil, exprB :: exprA :: Nil, Inner, None, plan1, plan1)
-    EnsureRequirements(spark.sessionState.conf).apply(smjExec3) match {
+    EnsureRequirements.apply(smjExec3) match {
       case SortMergeJoinExec(leftKeys, rightKeys, _, _,
         SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _),
         SortExec(_, _, DummySparkPlan(_, _, _: PartitioningCollection, _, _), _), _) =>
@@ -83,7 +83,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
     // Test fallback to the right side, which has HashPartitioning.
     val smjExec1 = SortMergeJoinExec(
       exprA :: exprB :: Nil, exprC :: exprB :: Nil, Inner, None, plan1, plan2)
-    EnsureRequirements(spark.sessionState.conf).apply(smjExec1) match {
+    EnsureRequirements.apply(smjExec1) match {
       case SortMergeJoinExec(leftKeys, rightKeys, _, _,
         SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _),
         SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _), _) =>
@@ -97,7 +97,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       outputPartitioning = PartitioningCollection(Seq(HashPartitioning(exprB :: exprC :: Nil, 5))))
     val smjExec2 = SortMergeJoinExec(
       exprA :: exprB :: Nil, exprC :: exprB :: Nil, Inner, None, plan1, plan3)
-    EnsureRequirements(spark.sessionState.conf).apply(smjExec2) match {
+    EnsureRequirements.apply(smjExec2) match {
       case SortMergeJoinExec(leftKeys, rightKeys, _, _,
         SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _),
         SortExec(_, _, DummySparkPlan(_, _, _: PartitioningCollection, _, _), _), _) =>
@@ -110,7 +110,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
     // found, and it should fall back to the left side, which has a PartitioningCollection.
     val smjExec3 = SortMergeJoinExec(
       exprC :: exprB :: Nil, exprA :: exprB :: Nil, Inner, None, plan3, plan1)
-    EnsureRequirements(spark.sessionState.conf).apply(smjExec3) match {
+    EnsureRequirements.apply(smjExec3) match {
       case SortMergeJoinExec(leftKeys, rightKeys, _, _,
         SortExec(_, _, DummySparkPlan(_, _, _: PartitioningCollection, _, _), _),
         SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _), _) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
index b6d1baf6e7902..044e9ace6243f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
@@ -91,7 +91,7 @@ abstract class BroadcastJoinSuiteBase extends QueryTest with SQLTestUtils
     } else {
       df1.join(df2, joinExpression, joinType)
     }
-    val plan = EnsureRequirements(spark.sessionState.conf).apply(df3.queryExecution.sparkPlan)
+    val plan = EnsureRequirements.apply(df3.queryExecution.sparkPlan)
     assert(plan.collect { case p: T => p }.size === 1)
     plan
   }
@@ -171,7 +171,7 @@ abstract class BroadcastJoinSuiteBase extends QueryTest with SQLTestUtils
       val df4 = Seq((1, "5"), (2, "5")).toDF("key", "value")
       val df5 = df4.join(df3, Seq("key"), "inner")
 
-      val plan = EnsureRequirements(spark.sessionState.conf).apply(df5.queryExecution.sparkPlan)
+      val plan = EnsureRequirements.apply(df5.queryExecution.sparkPlan)
 
       assert(plan.collect { case p: BroadcastHashJoinExec => p }.size === 1)
       assert(plan.collect { case p: SortMergeJoinExec => p }.size === 1)
@@ -182,7 +182,7 @@ abstract class BroadcastJoinSuiteBase extends QueryTest with SQLTestUtils
     val df1 = Seq((1, "4"), (2, "2")).toDF("key", "value")
     val joined = df1.join(df, Seq("key"), "inner")
 
-    val plan = EnsureRequirements(spark.sessionState.conf).apply(joined.queryExecution.sparkPlan)
+    val plan = EnsureRequirements.apply(joined.queryExecution.sparkPlan)
 
     assert(plan.collect { case p: BroadcastHashJoinExec => p }.size === 1)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/ExistenceJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/ExistenceJoinSuite.scala
index e8ac09fdb634e..fcbc0da9d5551 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/ExistenceJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/ExistenceJoinSuite.scala
@@ -107,13 +107,13 @@ class ExistenceJoinSuite extends SparkPlanTest with SharedSparkSession {
       extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _, _) =>
         withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
           checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) =>
-            EnsureRequirements(left.sqlContext.sessionState.conf).apply(
+            EnsureRequirements.apply(
               ShuffledHashJoinExec(
                 leftKeys, rightKeys, joinType, BuildRight, boundCondition, left, right)),
             expectedAnswer,
             sortAnswers = true)
           checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) =>
-            EnsureRequirements(left.sqlContext.sessionState.conf).apply(
+            EnsureRequirements.apply(
               createLeftSemiPlusJoin(ShuffledHashJoinExec(
                 leftKeys, rightKeys, leftSemiPlus, BuildRight, boundCondition, left, right))),
             expectedAnswer,
@@ -126,13 +126,13 @@ class ExistenceJoinSuite extends SparkPlanTest with SharedSparkSession {
       extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _, _) =>
         withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
           checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) =>
-            EnsureRequirements(left.sqlContext.sessionState.conf).apply(
+            EnsureRequirements.apply(
               BroadcastHashJoinExec(
                 leftKeys, rightKeys, joinType, BuildRight, boundCondition, left, right)),
             expectedAnswer,
             sortAnswers = true)
           checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) =>
-            EnsureRequirements(left.sqlContext.sessionState.conf).apply(
+            EnsureRequirements.apply(
               createLeftSemiPlusJoin(BroadcastHashJoinExec(
                 leftKeys, rightKeys, leftSemiPlus, BuildRight, boundCondition, left, right))),
             expectedAnswer,
@@ -145,12 +145,12 @@ class ExistenceJoinSuite extends SparkPlanTest with SharedSparkSession {
       extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _, _) =>
         withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
           checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) =>
-            EnsureRequirements(left.sqlContext.sessionState.conf).apply(
+            EnsureRequirements.apply(
               SortMergeJoinExec(leftKeys, rightKeys, joinType, boundCondition, left, right)),
             expectedAnswer,
             sortAnswers = true)
           checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) =>
-            EnsureRequirements(left.sqlContext.sessionState.conf).apply(
+            EnsureRequirements.apply(
               createLeftSemiPlusJoin(SortMergeJoinExec(
                 leftKeys, rightKeys, leftSemiPlus, boundCondition, left, right))),
             expectedAnswer,
@@ -162,12 +162,12 @@ class ExistenceJoinSuite extends SparkPlanTest with SharedSparkSession {
     test(s"$testName using BroadcastNestedLoopJoin build left") {
       withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
         checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) =>
-          EnsureRequirements(left.sqlContext.sessionState.conf).apply(
+          EnsureRequirements.apply(
             BroadcastNestedLoopJoinExec(left, right, BuildLeft, joinType, Some(condition))),
           expectedAnswer,
           sortAnswers = true)
         checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) =>
-          EnsureRequirements(left.sqlContext.sessionState.conf).apply(
+          EnsureRequirements.apply(
             createLeftSemiPlusJoin(BroadcastNestedLoopJoinExec(
               left, right, BuildLeft, leftSemiPlus, Some(condition)))),
           expectedAnswer,
@@ -178,12 +178,12 @@ class ExistenceJoinSuite extends SparkPlanTest with SharedSparkSession {
     test(s"$testName using BroadcastNestedLoopJoin build right") {
       withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
         checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) =>
-          EnsureRequirements(left.sqlContext.sessionState.conf).apply(
+          EnsureRequirements.apply(
             BroadcastNestedLoopJoinExec(left, right, BuildRight, joinType, Some(condition))),
           expectedAnswer,
           sortAnswers = true)
         checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) =>
-          EnsureRequirements(left.sqlContext.sessionState.conf).apply(
+          EnsureRequirements.apply(
             createLeftSemiPlusJoin(BroadcastNestedLoopJoinExec(
               left, right, BuildRight, leftSemiPlus, Some(condition)))),
           expectedAnswer,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/InnerJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/InnerJoinSuite.scala
index 44ab3f7d023d3..f476c15f59983 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/InnerJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/InnerJoinSuite.scala
@@ -101,7 +101,7 @@ class InnerJoinSuite extends SparkPlanTest with SharedSparkSession {
         boundCondition,
         leftPlan,
         rightPlan)
-      EnsureRequirements(spark.sessionState.conf).apply(broadcastJoin)
+      EnsureRequirements.apply(broadcastJoin)
     }
 
     def makeShuffledHashJoin(
@@ -115,7 +115,7 @@ class InnerJoinSuite extends SparkPlanTest with SharedSparkSession {
         side, None, leftPlan, rightPlan)
       val filteredJoin =
         boundCondition.map(FilterExec(_, shuffledHashJoin)).getOrElse(shuffledHashJoin)
-      EnsureRequirements(spark.sessionState.conf).apply(filteredJoin)
+      EnsureRequirements.apply(filteredJoin)
     }
 
     def makeSortMergeJoin(
@@ -126,7 +126,7 @@ class InnerJoinSuite extends SparkPlanTest with SharedSparkSession {
         rightPlan: SparkPlan) = {
       val sortMergeJoin = joins.SortMergeJoinExec(leftKeys, rightKeys, Inner, boundCondition,
         leftPlan, rightPlan)
-      EnsureRequirements(spark.sessionState.conf).apply(sortMergeJoin)
+      EnsureRequirements.apply(sortMergeJoin)
     }
 
     testWithWholeStageCodegenOnAndOff(s"$testName using BroadcastHashJoin (build=left)") { _ =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala
index a466e05816ad8..9f7e0a14f6a5c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala
@@ -110,7 +110,7 @@ class OuterJoinSuite extends SparkPlanTest with SharedSparkSession {
           withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
             val buildSide = if (joinType == LeftOuter) BuildRight else BuildLeft
             checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) =>
-              EnsureRequirements(spark.sessionState.conf).apply(
+              EnsureRequirements.apply(
                 ShuffledHashJoinExec(
                   leftKeys, rightKeys, joinType, buildSide, boundCondition, left, right)),
               expectedAnswer.map(Row.fromTuple),
@@ -143,7 +143,7 @@ class OuterJoinSuite extends SparkPlanTest with SharedSparkSession {
       extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _, _) =>
         withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
           checkAnswer2(leftRows, rightRows, (left: SparkPlan, right: SparkPlan) =>
-            EnsureRequirements(spark.sessionState.conf).apply(
+            EnsureRequirements.apply(
               SortMergeJoinExec(leftKeys, rightKeys, joinType, boundCondition, left, right)),
             expectedAnswer.map(Row.fromTuple),
             sortAnswers = true)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala
index a6c50904d395b..81ce979ef0b62 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala
@@ -23,12 +23,13 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions.{Alias, AnsiCast, Attribute, Cast, Expression, Literal}
+import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.execution.datasources.DataSourceAnalysis
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
 import org.apache.spark.sql.types.{DataType, IntegerType, StructType}
 
-class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll {
+class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with SQLHelper {
 
   private var targetAttributes: Seq[Attribute] = _
   private var targetPartitionSchema: StructType = _
@@ -51,19 +52,26 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll {
   }
 
   Seq(true, false).foreach { caseSensitive =>
-    val conf = new SQLConf().copy(SQLConf.CASE_SENSITIVE -> caseSensitive)
+    def testRule(testName: String, caseSensitive: Boolean)(func: => Unit): Unit = {
+      test(s"$testName (caseSensitive: $caseSensitive)") {
+        withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+          func
+        }
+      }
+    }
+
     def cast(e: Expression, dt: DataType): Expression = {
-      conf.storeAssignmentPolicy match {
+      SQLConf.get.storeAssignmentPolicy match {
         case StoreAssignmentPolicy.ANSI | StoreAssignmentPolicy.STRICT =>
-          AnsiCast(e, dt, Option(conf.sessionLocalTimeZone))
+          AnsiCast(e, dt, Option(SQLConf.get.sessionLocalTimeZone))
         case _ =>
-          Cast(e, dt, Option(conf.sessionLocalTimeZone))
+          Cast(e, dt, Option(SQLConf.get.sessionLocalTimeZone))
       }
     }
-    val rule = DataSourceAnalysis(conf)
-    test(
-      s"convertStaticPartitions only handle INSERT having at least static partitions " +
-        s"(caseSensitive: $caseSensitive)") {
+    val rule = DataSourceAnalysis
+    testRule(
+      "convertStaticPartitions only handle INSERT having at least static partitions",
+        caseSensitive) {
       intercept[AssertionError] {
         rule.convertStaticPartitions(
           sourceAttributes = Seq('e.int, 'f.int),
@@ -73,7 +81,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll {
       }
     }
 
-    test(s"Missing columns (caseSensitive: $caseSensitive)") {
+    testRule("Missing columns", caseSensitive) {
       // Missing columns.
       intercept[AnalysisException] {
         rule.convertStaticPartitions(
@@ -84,7 +92,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll {
       }
     }
 
-    test(s"Missing partitioning columns (caseSensitive: $caseSensitive)") {
+    testRule("Missing partitioning columns", caseSensitive) {
       // Missing partitioning columns.
       intercept[AnalysisException] {
         rule.convertStaticPartitions(
@@ -113,7 +121,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll {
       }
     }
 
-    test(s"Wrong partitioning columns (caseSensitive: $caseSensitive)") {
+    testRule("Wrong partitioning columns", caseSensitive) {
       // Wrong partitioning columns.
       intercept[AnalysisException] {
         rule.convertStaticPartitions(
@@ -144,9 +152,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll {
       }
     }
 
-    test(
-      s"Static partitions need to appear before dynamic partitions" +
-      s" (caseSensitive: $caseSensitive)") {
+    testRule("Static partitions need to appear before dynamic partitions", caseSensitive) {
       // Static partitions need to appear before dynamic partitions.
       intercept[AnalysisException] {
         rule.convertStaticPartitions(
@@ -157,7 +163,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll {
       }
     }
 
-    test(s"All static partitions (caseSensitive: $caseSensitive)") {
+    testRule("All static partitions", caseSensitive) {
       if (!caseSensitive) {
         val nonPartitionedAttributes = Seq('e.int, 'f.int)
         val expected = nonPartitionedAttributes ++
@@ -195,7 +201,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll {
       }
     }
 
-    test(s"Static partition and dynamic partition (caseSensitive: $caseSensitive)") {
+    testRule("Static partition and dynamic partition", caseSensitive) {
       val nonPartitionedAttributes = Seq('e.int, 'f.int)
       val dynamicPartitionAttributes = Seq('g.int)
       val expected =
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index b9135733856a5..345f0288de4b1 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -75,22 +75,22 @@ class HiveSessionStateBuilder(
    */
   override protected def analyzer: Analyzer = new Analyzer(catalogManager, conf) {
     override val extendedResolutionRules: Seq[Rule[LogicalPlan]] =
-      new ResolveHiveSerdeTable(session) +:
-        new FindDataSourceTable(session) +:
-        new ResolveSQLOnFile(session) +:
-        new FallBackFileSourceV2(session) +:
+      ResolveHiveSerdeTable +:
+        FindDataSourceTable +:
+        ResolveSQLOnFile +:
+        FallBackFileSourceV2 +:
         ResolveEncodersInScalaAgg +:
         new ResolveSessionCatalog(
-          catalogManager, conf, catalog.isTempView, catalog.isTempFunction) +:
+          catalogManager, catalog.isTempView, catalog.isTempFunction) +:
         customResolutionRules
 
     override val postHocResolutionRules: Seq[Rule[LogicalPlan]] =
-      new DetectAmbiguousSelfJoin(conf) +:
-        new DetermineTableStats(session) +:
-        RelationConversions(conf, catalog) +:
-        PreprocessTableCreation(session) +:
-        PreprocessTableInsertion(conf) +:
-        DataSourceAnalysis(conf) +:
+      DetectAmbiguousSelfJoin +:
+        DetermineTableStats +:
+        RelationConversions(catalog) +:
+        PreprocessTableCreation +:
+        PreprocessTableInsertion +:
+        DataSourceAnalysis +:
         HiveAnalysis +:
         customPostHocResolutionRules
 
@@ -103,7 +103,7 @@ class HiveSessionStateBuilder(
   }
 
   override def customEarlyScanPushDownRules: Seq[Rule[LogicalPlan]] =
-    Seq(new PruneHiveTablePartitions(session))
+    Seq(PruneHiveTablePartitions)
 
   /**
    * Planner that takes into account Hive-specific strategies.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index 2ace96583d9cc..f91f78616abf5 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -41,7 +41,7 @@ import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
  * Determine the database, serde/format and schema of the Hive serde table, according to the storage
  * properties.
  */
-class ResolveHiveSerdeTable(session: SparkSession) extends Rule[LogicalPlan] {
+object ResolveHiveSerdeTable extends Rule[LogicalPlan] {
   private def determineHiveSerde(table: CatalogTable): CatalogTable = {
     if (table.storage.serde.nonEmpty) {
       table
@@ -50,7 +50,7 @@ class ResolveHiveSerdeTable(session: SparkSession) extends Rule[LogicalPlan] {
         throw new AnalysisException("Creating bucketed Hive serde table is not supported yet.")
       }
 
-      val defaultStorage = HiveSerDe.getDefaultStorage(session.sessionState.conf)
+      val defaultStorage = HiveSerDe.getDefaultStorage(conf)
       val options = new HiveOptions(table.storage.properties)
 
       val fileStorage = if (options.fileFormat.isDefined) {
@@ -90,7 +90,7 @@ class ResolveHiveSerdeTable(session: SparkSession) extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     case c @ CreateTable(t, _, query) if DDLUtils.isHiveTable(t) =>
       // Finds the database name if the name does not exist.
-      val dbName = t.identifier.database.getOrElse(session.catalog.currentDatabase)
+      val dbName = t.identifier.database.getOrElse(SparkSession.active.catalog.currentDatabase)
       val table = t.copy(identifier = t.identifier.copy(database = Some(dbName)))
 
       // Determines the serde/format of Hive tables
@@ -113,16 +113,15 @@ class ResolveHiveSerdeTable(session: SparkSession) extends Rule[LogicalPlan] {
   }
 }
 
-class DetermineTableStats(session: SparkSession) extends Rule[LogicalPlan] {
+object DetermineTableStats extends Rule[LogicalPlan] {
   private def hiveTableWithStats(relation: HiveTableRelation): HiveTableRelation = {
     val table = relation.tableMeta
     val partitionCols = relation.partitionCols
-    val conf = session.sessionState.conf
     // For partitioned tables, the partition directory may be outside of the table directory.
     // Which is expensive to get table size. Please see how we implemented it in the AnalyzeTable.
     val sizeInBytes = if (conf.fallBackToHdfsForStatsEnabled && partitionCols.isEmpty) {
       try {
-        val hadoopConf = session.sessionState.newHadoopConf()
+        val hadoopConf = SparkSession.active.sessionState.newHadoopConf()
         val tablePath = new Path(table.location)
         val fs: FileSystem = tablePath.getFileSystem(hadoopConf)
         fs.getContentSummary(tablePath).getLength
@@ -191,7 +190,6 @@ object HiveAnalysis extends Rule[LogicalPlan] {
  * `PreprocessTableCreation`, `PreprocessTableInsertion`, `DataSourceAnalysis` and `HiveAnalysis`.
  */
 case class RelationConversions(
-    conf: SQLConf,
     sessionCatalog: HiveSessionCatalog) extends Rule[LogicalPlan] {
   private def isConvertible(relation: HiveTableRelation): Boolean = {
     isConvertible(relation.tableMeta)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala
index f6aff10cbc147..50ced7870d9ed 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala
@@ -27,7 +27,6 @@ import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.datasources.DataSourceStrategy
-import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Prune hive table partitions using partition filters on [[HiveTableRelation]]. The pruned
@@ -40,10 +39,10 @@ import org.apache.spark.sql.internal.SQLConf
  *
  * TODO: merge this with PruneFileSourcePartitions after we completely make hive as a data source.
  */
-private[sql] class PruneHiveTablePartitions(session: SparkSession)
-  extends Rule[LogicalPlan] with CastSupport with PredicateHelper {
+private[sql] class PruneHiveTablePartitions
 
-  override val conf: SQLConf = session.sessionState.conf
+private[sql] object PruneHiveTablePartitions
+  extends Rule[LogicalPlan] with CastSupport with PredicateHelper {
 
   /**
    * Extract the partition filters from the filters on the table.
@@ -65,11 +64,11 @@ private[sql] class PruneHiveTablePartitions(session: SparkSession)
       relation: HiveTableRelation,
       partitionFilters: ExpressionSet): Seq[CatalogTablePartition] = {
     if (conf.metastorePartitionPruning) {
-      session.sessionState.catalog.listPartitionsByFilter(
+      SparkSession.active.sessionState.catalog.listPartitionsByFilter(
         relation.tableMeta.identifier, partitionFilters.toSeq)
     } else {
       ExternalCatalogUtils.prunePartitionsByFilter(relation.tableMeta,
-        session.sessionState.catalog.listPartitions(relation.tableMeta.identifier),
+        SparkSession.active.sessionState.catalog.listPartitions(relation.tableMeta.identifier),
         partitionFilters.toSeq, conf.sessionLocalTimeZone)
     }
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitionsSuite.scala
index 018df35403be5..6b35928067b50 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitionsSuite.scala
@@ -29,7 +29,7 @@ class PruneHiveTablePartitionsSuite extends PrunePartitionSuiteBase {
   object Optimize extends RuleExecutor[LogicalPlan] {
     val batches =
       Batch("PruneHiveTablePartitions", Once,
-        EliminateSubqueryAliases, new PruneHiveTablePartitions(spark)) :: Nil
+        EliminateSubqueryAliases, PruneHiveTablePartitions) :: Nil
   }
 
   test("SPARK-15616: statistics pruned after going through PruneHiveTablePartitions") {

From 281f99c70b2fab2839495638d07acc1e534e5ad6 Mon Sep 17 00:00:00 2001
From: "tanel.kiis@gmail.com" <tanel.kiis@gmail.com>
Date: Tue, 27 Oct 2020 22:53:05 +0900
Subject: [PATCH 0335/1009] [SPARK-33225][SQL] Extract AliasHelper trait

### What changes were proposed in this pull request?

Extract methods related to handling Aliases to a trait.

### Why are the changes needed?

Avoid code duplication

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing UTs cover this

Closes #30134 from tanelk/SPARK-33225_aliasHelper.

Lead-authored-by: tanel.kiis@gmail.com <tanel.kiis@gmail.com>
Co-authored-by: Tanel Kiis <tanel.kiis@reach-u.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  43 ++------
 .../catalyst/expressions/AliasHelper.scala    | 100 ++++++++++++++++++
 .../sql/catalyst/expressions/predicates.scala |  31 ++----
 .../sql/catalyst/optimizer/Optimizer.scala    |  46 +-------
 .../optimizer/PushDownLeftSemiAntiJoin.scala  |   4 +-
 .../sql/catalyst/optimizer/subquery.scala     |   4 +-
 6 files changed, 125 insertions(+), 103 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 39816f499944b..52c96f4a8f014 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -2193,7 +2193,7 @@ class Analyzer(
    * those in a HAVING clause or ORDER BY clause.  These expressions are pushed down to the
    * underlying aggregate operator and then projected away after the original operator.
    */
-  object ResolveAggregateFunctions extends Rule[LogicalPlan] {
+  object ResolveAggregateFunctions extends Rule[LogicalPlan] with AliasHelper {
     def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
       // Resolve aggregate with having clause to Filter(..., Aggregate()). Note, to avoid wrongly
       // resolve the having condition expression, here we skip resolving it in ResolveReferences
@@ -2226,8 +2226,7 @@ class Analyzer(
           // Aggregate.
           checkAnalysis(resolvedAggregate)
 
-          val originalAggExprs = aggregate.aggregateExpressions.map(
-            CleanupAliases.trimNonTopLevelAliases(_).asInstanceOf[NamedExpression])
+          val originalAggExprs = aggregate.aggregateExpressions.map(trimNonTopLevelAliases)
 
           // If the ordering expression is same with original aggregate expression, we don't need
           // to push down this ordering expression and can reference the original aggregate
@@ -2370,7 +2369,7 @@ class Analyzer(
             case _ => false
           }.isDefined }
       }
-      CleanupAliases.trimNonTopLevelAliases(expr) match {
+      trimNonTopLevelAliases(expr) match {
         case UnresolvedAlias(g: Generator, _) => hasInnerGenerator(g)
         case Alias(g: Generator, _) => hasInnerGenerator(g)
         case MultiAlias(g: Generator, _) => hasInnerGenerator(g)
@@ -2440,7 +2439,7 @@ class Analyzer(
 
         val projectExprs = Array.ofDim[NamedExpression](aggList.length)
         val newAggList = aggList
-          .map(CleanupAliases.trimNonTopLevelAliases(_).asInstanceOf[NamedExpression])
+          .map(trimNonTopLevelAliases)
           .zipWithIndex
           .flatMap {
             case (AliasedGenerator(generator, names, outer), idx) =>
@@ -2483,7 +2482,7 @@ class Analyzer(
         var resolvedGenerator: Generate = null
 
         val newProjectList = projectList
-          .map(CleanupAliases.trimNonTopLevelAliases(_).asInstanceOf[NamedExpression])
+          .map(trimNonTopLevelAliases)
           .flatMap {
             case AliasedGenerator(generator, names, outer) if generator.childrenResolved =>
               // It's a sanity check, this should not happen as the previous case will throw
@@ -3495,45 +3494,23 @@ object EliminateUnions extends Rule[LogicalPlan] {
  * are not in its `children`, e.g. `RuntimeReplaceable`, the transformation for Aliases in this
  * rule can't work for those parameters.
  */
-object CleanupAliases extends Rule[LogicalPlan] {
-  def trimAliases(e: Expression): Expression = {
-    e.transformDown {
-      case Alias(child, _) => child
-      case MultiAlias(child, _) => child
-    }
-  }
-
-  def trimNonTopLevelAliases(e: Expression): Expression = e match {
-    case a: Alias =>
-      a.copy(child = trimAliases(a.child))(
-        exprId = a.exprId,
-        qualifier = a.qualifier,
-        explicitMetadata = Some(a.metadata))
-    case a: MultiAlias =>
-      a.copy(child = trimAliases(a.child))
-    case other => trimAliases(other)
-  }
-
+object CleanupAliases extends Rule[LogicalPlan] with AliasHelper {
   override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
     case Project(projectList, child) =>
-      val cleanedProjectList =
-        projectList.map(trimNonTopLevelAliases(_).asInstanceOf[NamedExpression])
+      val cleanedProjectList = projectList.map(trimNonTopLevelAliases)
       Project(cleanedProjectList, child)
 
     case Aggregate(grouping, aggs, child) =>
-      val cleanedAggs = aggs.map(trimNonTopLevelAliases(_).asInstanceOf[NamedExpression])
+      val cleanedAggs = aggs.map(trimNonTopLevelAliases)
       Aggregate(grouping.map(trimAliases), cleanedAggs, child)
 
     case Window(windowExprs, partitionSpec, orderSpec, child) =>
-      val cleanedWindowExprs =
-        windowExprs.map(e => trimNonTopLevelAliases(e).asInstanceOf[NamedExpression])
+      val cleanedWindowExprs = windowExprs.map(trimNonTopLevelAliases)
       Window(cleanedWindowExprs, partitionSpec.map(trimAliases),
         orderSpec.map(trimAliases(_).asInstanceOf[SortOrder]), child)
 
     case CollectMetrics(name, metrics, child) =>
-      val cleanedMetrics = metrics.map {
-        e => trimNonTopLevelAliases(e).asInstanceOf[NamedExpression]
-      }
+      val cleanedMetrics = metrics.map(trimNonTopLevelAliases)
       CollectMetrics(name, cleanedMetrics, child)
 
     // Operators that operate on objects should only have expressions from encoders, which should
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala
new file mode 100644
index 0000000000000..ec47875754a6f
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.sql.catalyst.analysis.MultiAlias
+import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Project}
+
+/**
+ * Helper methods for collecting and replacing aliases.
+ */
+trait AliasHelper {
+
+  protected def getAliasMap(plan: Project): AttributeMap[Alias] = {
+    // Create a map of Aliases to their values from the child projection.
+    // e.g., 'SELECT a + b AS c, d ...' produces Map(c -> Alias(a + b, c)).
+    getAliasMap(plan.projectList)
+  }
+
+  protected def getAliasMap(plan: Aggregate): AttributeMap[Alias] = {
+    // Find all the aliased expressions in the aggregate list that don't include any actual
+    // AggregateExpression or PythonUDF, and create a map from the alias to the expression
+    val aliasMap = plan.aggregateExpressions.collect {
+      case a: Alias if a.child.find(e => e.isInstanceOf[AggregateExpression] ||
+        PythonUDF.isGroupedAggPandasUDF(e)).isEmpty =>
+        (a.toAttribute, a)
+    }
+    AttributeMap(aliasMap)
+  }
+
+  protected def getAliasMap(exprs: Seq[NamedExpression]): AttributeMap[Alias] = {
+    // Create a map of Aliases to their values from the child projection.
+    // e.g., 'SELECT a + b AS c, d ...' produces Map(c -> Alias(a + b, c)).
+    AttributeMap(exprs.collect { case a: Alias => (a.toAttribute, a) })
+  }
+
+  /**
+   * Replace all attributes, that reference an alias, with the aliased expression
+   */
+  protected def replaceAlias(
+      expr: Expression,
+      aliasMap: AttributeMap[Alias]): Expression = {
+    // Use transformUp to prevent infinite recursion when the replacement expression
+    // redefines the same ExprId,
+    trimAliases(expr.transformUp {
+      case a: Attribute => aliasMap.getOrElse(a, a)
+    })
+  }
+
+  /**
+   * Replace all attributes, that reference an alias, with the aliased expression,
+   * but keep the name of the outmost attribute.
+   */
+  protected def replaceAliasButKeepName(
+     expr: NamedExpression,
+     aliasMap: AttributeMap[Alias]): NamedExpression = {
+    // Use transformUp to prevent infinite recursion when the replacement expression
+    // redefines the same ExprId,
+    trimNonTopLevelAliases(expr.transformUp {
+      case a: Attribute => aliasMap.getOrElse(a, a)
+    }).asInstanceOf[NamedExpression]
+  }
+
+  protected def trimAliases(e: Expression): Expression = {
+    e.transformDown {
+      case Alias(child, _) => child
+      case MultiAlias(child, _) => child
+    }
+  }
+
+  protected def trimNonTopLevelAliases[T <: Expression](e: T): T = {
+    val res = e match {
+      case a: Alias =>
+        a.copy(child = trimAliases(a.child))(
+          exprId = a.exprId,
+          qualifier = a.qualifier,
+          explicitMetadata = Some(a.metadata))
+      case a: MultiAlias =>
+        a.copy(child = trimAliases(a.child))
+      case other => trimAliases(other)
+    }
+
+    res.asInstanceOf[T]
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 1f55045dbca74..f440534745ba1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -97,7 +97,7 @@ object Predicate extends CodeGeneratorWithInterpretedFallback[Expression, BasePr
   }
 }
 
-trait PredicateHelper extends Logging {
+trait PredicateHelper extends AliasHelper with Logging {
   protected def splitConjunctivePredicates(condition: Expression): Seq[Expression] = {
     condition match {
       case And(cond1, cond2) =>
@@ -117,18 +117,13 @@ trait PredicateHelper extends Logging {
       plan: LogicalPlan): Option[(Expression, LogicalPlan)] = {
 
     plan match {
-      case Project(projectList, child) =>
-        val aliases = AttributeMap(projectList.collect {
-          case a @ Alias(child, _) => (a.toAttribute, child)
-        })
-        findExpressionAndTrackLineageDown(replaceAlias(exp, aliases), child)
+      case p: Project =>
+        val aliases = getAliasMap(p)
+        findExpressionAndTrackLineageDown(replaceAlias(exp, aliases), p.child)
       // we can unwrap only if there are row projections, and no aggregation operation
-      case Aggregate(_, aggregateExpressions, child) =>
-        val aliasMap = AttributeMap(aggregateExpressions.collect {
-          case a: Alias if a.child.find(_.isInstanceOf[AggregateExpression]).isEmpty =>
-            (a.toAttribute, a.child)
-        })
-        findExpressionAndTrackLineageDown(replaceAlias(exp, aliasMap), child)
+      case a: Aggregate =>
+        val aliasMap = getAliasMap(a)
+        findExpressionAndTrackLineageDown(replaceAlias(exp, aliasMap), a.child)
       case l: LeafNode if exp.references.subsetOf(l.outputSet) =>
         Some((exp, l))
       case other =>
@@ -150,18 +145,6 @@ trait PredicateHelper extends Logging {
     }
   }
 
-  // Substitute any known alias from a map.
-  protected def replaceAlias(
-      condition: Expression,
-      aliases: AttributeMap[Expression]): Expression = {
-    // Use transformUp to prevent infinite recursion when the replacement expression
-    // redefines the same ExprId,
-    condition.transformUp {
-      case a: Attribute =>
-        aliases.getOrElse(a, a)
-    }
-  }
-
   /**
    * Returns true if `expr` can be evaluated using only the output of `plan`.  This method
    * can be used to determine when it is acceptable to move expression evaluation within a query
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 3e9a97419682d..f3f64031843e0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -729,7 +729,7 @@ object ColumnPruning extends Rule[LogicalPlan] {
  *    and the upper project consists of the same number of columns which is equal or aliasing.
  *    `GlobalLimit(LocalLimit)` pattern is also considered.
  */
-object CollapseProject extends Rule[LogicalPlan] {
+object CollapseProject extends Rule[LogicalPlan] with AliasHelper {
 
   def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
     case p1 @ Project(_, p2: Project) =>
@@ -758,17 +758,9 @@ object CollapseProject extends Rule[LogicalPlan] {
       s.copy(child = p2.copy(projectList = buildCleanedProjectList(l1, p2.projectList)))
   }
 
-  private def collectAliases(projectList: Seq[NamedExpression]): AttributeMap[Alias] = {
-    AttributeMap(projectList.collect {
-      case a: Alias => a.toAttribute -> a
-    })
-  }
-
   private def haveCommonNonDeterministicOutput(
       upper: Seq[NamedExpression], lower: Seq[NamedExpression]): Boolean = {
-    // Create a map of Aliases to their values from the lower projection.
-    // e.g., 'SELECT ... FROM (SELECT a + b AS c, d ...)' produces Map(c -> Alias(a + b, c)).
-    val aliases = collectAliases(lower)
+    val aliases = getAliasMap(lower)
 
     // Collapse upper and lower Projects if and only if their overlapped expressions are all
     // deterministic.
@@ -780,21 +772,8 @@ object CollapseProject extends Rule[LogicalPlan] {
   private def buildCleanedProjectList(
       upper: Seq[NamedExpression],
       lower: Seq[NamedExpression]): Seq[NamedExpression] = {
-    // Create a map of Aliases to their values from the lower projection.
-    // e.g., 'SELECT ... FROM (SELECT a + b AS c, d ...)' produces Map(c -> Alias(a + b, c)).
-    val aliases = collectAliases(lower)
-
-    // Substitute any attributes that are produced by the lower projection, so that we safely
-    // eliminate it.
-    // e.g., 'SELECT c + 1 FROM (SELECT a + b AS C ...' produces 'SELECT a + b + 1 ...'
-    // Use transformUp to prevent infinite recursion.
-    val rewrittenUpper = upper.map(_.transformUp {
-      case a: Attribute => aliases.getOrElse(a, a)
-    })
-    // collapse upper and lower Projects may introduce unnecessary Aliases, trim them here.
-    rewrittenUpper.map { p =>
-      CleanupAliases.trimNonTopLevelAliases(p).asInstanceOf[NamedExpression]
-    }
+    val aliases = getAliasMap(lower)
+    upper.map(replaceAliasButKeepName(_, aliases))
   }
 
   private def isRenaming(list1: Seq[NamedExpression], list2: Seq[NamedExpression]): Boolean = {
@@ -1271,23 +1250,6 @@ object PushPredicateThroughNonJoin extends Rule[LogicalPlan] with PredicateHelpe
       }
   }
 
-  def getAliasMap(plan: Project): AttributeMap[Expression] = {
-    // Create a map of Aliases to their values from the child projection.
-    // e.g., 'SELECT a + b AS c, d ...' produces Map(c -> a + b).
-    AttributeMap(plan.projectList.collect { case a: Alias => (a.toAttribute, a.child) })
-  }
-
-  def getAliasMap(plan: Aggregate): AttributeMap[Expression] = {
-    // Find all the aliased expressions in the aggregate list that don't include any actual
-    // AggregateExpression or PythonUDF, and create a map from the alias to the expression
-    val aliasMap = plan.aggregateExpressions.collect {
-      case a: Alias if a.child.find(e => e.isInstanceOf[AggregateExpression] ||
-          PythonUDF.isGroupedAggPandasUDF(e)).isEmpty =>
-        (a.toAttribute, a.child)
-    }
-    AttributeMap(aliasMap)
-  }
-
   def canPushThrough(p: UnaryNode): Boolean = p match {
     // Note that some operators (e.g. project, aggregate, union) are being handled separately
     // (earlier in this rule).
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala
index 606db85fcdea6..50fe0192d6f26 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala
@@ -42,7 +42,7 @@ object PushDownLeftSemiAntiJoin extends Rule[LogicalPlan] with PredicateHelper {
         // No join condition, just push down the Join below Project
         p.copy(child = Join(gChild, rightOp, joinType, joinCond, hint))
       } else {
-        val aliasMap = PushPredicateThroughNonJoin.getAliasMap(p)
+        val aliasMap = getAliasMap(p)
         val newJoinCond = if (aliasMap.nonEmpty) {
           Option(replaceAlias(joinCond.get, aliasMap))
         } else {
@@ -55,7 +55,7 @@ object PushDownLeftSemiAntiJoin extends Rule[LogicalPlan] with PredicateHelper {
     case join @ Join(agg: Aggregate, rightOp, LeftSemiOrAnti(_), _, _)
         if agg.aggregateExpressions.forall(_.deterministic) && agg.groupingExpressions.nonEmpty &&
         !agg.aggregateExpressions.exists(ScalarSubquery.hasCorrelatedScalarSubquery) =>
-      val aliasMap = PushPredicateThroughNonJoin.getAliasMap(agg)
+      val aliasMap = getAliasMap(agg)
       val canPushDownPredicate = (predicate: Expression) => {
         val replaced = replaceAlias(predicate, aliasMap)
         predicate.references.nonEmpty &&
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
index f184253ef0595..cb076f6e35184 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
@@ -335,7 +335,7 @@ object PullupCorrelatedPredicates extends Rule[LogicalPlan] with PredicateHelper
 /**
  * This rule rewrites correlated [[ScalarSubquery]] expressions into LEFT OUTER joins.
  */
-object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] {
+object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] with AliasHelper {
   /**
    * Extract all correlated scalar subqueries from an expression. The subqueries are collected using
    * the given collector. The expression is rewritten and returned.
@@ -357,7 +357,7 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] {
    */
   private def tryEvalExpr(expr: Expression): Expression = {
     // Removes Alias over given expression, because Alias is not foldable.
-    if (!CleanupAliases.trimAliases(expr).foldable) {
+    if (!trimAliases(expr).foldable) {
       // SPARK-28441: Some expressions, like PythonUDF, can't be statically evaluated.
       // Needs to evaluate them on query runtime.
       expr

From f284218dae23bf91e72e221943188cdb85e13dac Mon Sep 17 00:00:00 2001
From: Huaxin Gao <huaxing@us.ibm.com>
Date: Tue, 27 Oct 2020 15:04:53 +0000
Subject: [PATCH 0336/1009] [SPARK-33137][SQL] Support ALTER TABLE in JDBC v2
 Table Catalog: update type and nullability of columns (Postgres dialect)

### What changes were proposed in this pull request?
Override the default SQL strings in Postgres Dialect for:

- ALTER TABLE UPDATE COLUMN TYPE
- ALTER TABLE UPDATE COLUMN NULLABILITY

Add new docker integration test suite `jdbc/v2/PostgreSQLIntegrationSuite.scala`

### Why are the changes needed?
supports Postgres specific ALTER TABLE syntax.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Add new test `PostgreSQLIntegrationSuite`

Closes #30089 from huaxingao/postgres_docker.

Authored-by: Huaxin Gao <huaxing@us.ibm.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/jdbc/v2/DB2IntegrationSuite.scala     |  2 +-
 .../jdbc/v2/PostgresIntegrationSuite.scala    | 69 +++++++++++++++++++
 .../apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 21 ++++++
 .../spark/sql/jdbc/PostgresDialect.scala      | 16 +++++
 4 files changed, 107 insertions(+), 1 deletion(-)
 create mode 100644 external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
index 82f9f978c5da2..5c1442283aaed 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.tags.DockerTest
  * To run this test suite for a specific version (e.g., ibmcom/db2:11.5.4.0):
  * {{{
  *   DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0
- *     ./build/sbt -Pdocker-integration-tests "test-only *DB2IntegrationSuite"
+ *     ./build/sbt -Pdocker-integration-tests "testOnly *v2.DB2IntegrationSuite"
  * }}}
  */
 @DockerTest
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
new file mode 100644
index 0000000000000..45994a5093748
--- /dev/null
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc.v2
+
+import java.sql.Connection
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
+import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite}
+import org.apache.spark.sql.types._
+import org.apache.spark.tags.DockerTest
+
+/**
+ * To run this test suite for a specific version (e.g., postgres:13.0):
+ * {{{
+ *   POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0
+ *     ./build/sbt -Pdocker-integration-tests "testOnly *v2.PostgresIntegrationSuite"
+ * }}}
+ */
+@DockerTest
+class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest {
+  override val catalogName: String = "postgresql"
+  override val db = new DatabaseOnDocker {
+    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:13.0-alpine")
+    override val env = Map(
+      "POSTGRES_PASSWORD" -> "rootpass"
+    )
+    override val usesIpc = false
+    override val jdbcPort = 5432
+    override def getJdbcUrl(ip: String, port: Int): String =
+      s"jdbc:postgresql://$ip:$port/postgres?user=postgres&password=rootpass"
+  }
+  override def sparkConf: SparkConf = super.sparkConf
+    .set("spark.sql.catalog.postgresql", classOf[JDBCTableCatalog].getName)
+    .set("spark.sql.catalog.postgresql.url", db.getJdbcUrl(dockerIp, externalPort))
+  override def dataPreparation(conn: Connection): Unit = {}
+
+  override def testUpdateColumnType(tbl: String): Unit = {
+    sql(s"CREATE TABLE $tbl (ID INTEGER) USING _")
+    var t = spark.table(tbl)
+    var expectedSchema = new StructType().add("ID", IntegerType)
+    assert(t.schema === expectedSchema)
+    sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE STRING")
+    t = spark.table(tbl)
+    expectedSchema = new StructType().add("ID", StringType)
+    assert(t.schema === expectedSchema)
+    // Update column type from STRING to INTEGER
+    val msg = intercept[AnalysisException] {
+      sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE INTEGER")
+    }.getMessage
+    assert(msg.contains("Cannot update alt_table field ID: string cannot be cast to int"))
+  }
+}
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
index 942c6237fd358..8419db7784e88 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
@@ -73,6 +73,27 @@ trait V2JDBCTest extends SharedSparkSession {
     assert(msg.contains("Table not found"))
   }
 
+  test("SPARK-33034: ALTER TABLE ... drop column") {
+    withTable(s"$catalogName.alt_table") {
+      sql(s"CREATE TABLE $catalogName.alt_table (C1 INTEGER, C2 STRING, c3 INTEGER) USING _")
+      sql(s"ALTER TABLE $catalogName.alt_table DROP COLUMN C1")
+      sql(s"ALTER TABLE $catalogName.alt_table DROP COLUMN c3")
+      val t = spark.table(s"$catalogName.alt_table")
+      val expectedSchema = new StructType().add("C2", StringType)
+      assert(t.schema === expectedSchema)
+      // Drop not existing column
+      val msg = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $catalogName.alt_table DROP COLUMN bad_column")
+      }.getMessage
+      assert(msg.contains("Cannot delete missing field bad_column in alt_table schema"))
+    }
+    // Drop a column from a not existing table
+    val msg = intercept[AnalysisException] {
+      sql(s"ALTER TABLE $catalogName.not_existing_table DROP COLUMN C1")
+    }.getMessage
+    assert(msg.contains("Table not found"))
+  }
+
   test("SPARK-33034: ALTER TABLE ... update column type") {
     withTable(s"$catalogName.alt_table") {
       testUpdateColumnType(s"$catalogName.alt_table")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
index a1ce25a0464c3..ee8cbed1ff7a1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
@@ -126,4 +126,20 @@ private object PostgresDialect extends JdbcDialect {
     }
   }
 
+  // See https://www.postgresql.org/docs/12/sql-altertable.html
+  override def getUpdateColumnTypeQuery(
+      tableName: String,
+      columnName: String,
+      newDataType: String): String = {
+    s"ALTER TABLE $tableName ALTER COLUMN ${quoteIdentifier(columnName)} TYPE $newDataType"
+  }
+
+  // See https://www.postgresql.org/docs/12/sql-altertable.html
+  override def getUpdateColumnNullabilityQuery(
+      tableName: String,
+      columnName: String,
+      isNullable: Boolean): String = {
+    val nullable = if (isNullable) "DROP NOT NULL" else "SET NOT NULL"
+    s"ALTER TABLE $tableName ALTER COLUMN ${quoteIdentifier(columnName)} $nullable"
+  }
 }

From 98f0a219915dc9ed696602b9bfad82d9cf6c4113 Mon Sep 17 00:00:00 2001
From: Holden Karau <hkarau@apple.com>
Date: Tue, 27 Oct 2020 11:54:08 -0700
Subject: [PATCH 0337/1009] [SPARK-33231][SPARK-33262][CORE] Make pod
 allocation executor timeouts configurable & allow scheduling with pending
 pods

### What changes were proposed in this pull request?

Make pod allocation executor timeouts configurable. Keep all known pods in mind when allocating executors to avoid over-allocating if the pending time is much higher than the allocation interval.

This PR increases the default wait time to 600s from the current 60s.

Since nodes can now remain "pending" for long periods of time, we allow additional batches to be scheduled during pending allocation but keep the total number of pods in account.

### Why are the changes needed?
The current executor timeouts do not match that of all real world clusters especially under load. While this can be worked around by increasing the allocation batch delay, that will decrease the speed at which the total number of executors will be able to be requested.

The increase in default timeout is needed to handle real-world testing environments I've encountered on moderately busy clusters and K8s clusters with their own underlying dynamic scale-up of hardware (e.g. GKE, EKS, etc.)

### Does this PR introduce _any_ user-facing change?

Yes new configuration property

### How was this patch tested?

Updated existing test to use the timeout from the new configuration property. Verified test failed without the update.

Closes #30155 from holdenk/SPARK-33231-make-pod-creation-timeout-configurable.

Authored-by: Holden Karau <hkarau@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../main/scala/org/apache/spark/deploy/k8s/Config.scala  | 8 ++++++++
 .../scheduler/cluster/k8s/ExecutorPodsAllocator.scala    | 9 +++++----
 .../cluster/k8s/ExecutorPodsAllocatorSuite.scala         | 4 +++-
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index d399f66b45981..e3af1ccc24f1c 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -227,6 +227,14 @@ private[spark] object Config extends Logging {
       .checkValue(value => value > 0, "Allocation batch delay must be a positive time value.")
       .createWithDefaultString("1s")
 
+  val KUBERNETES_ALLOCATION_EXECUTOR_TIMEOUT =
+    ConfigBuilder("spark.kubernetes.allocation.executor.timeout")
+      .doc("Time to wait before considering a pending executor timedout.")
+      .version("3.1.0")
+      .timeConf(TimeUnit.MILLISECONDS)
+      .checkValue(value => value > 0, "Allocation executor timeout must be a positive time value.")
+      .createWithDefaultString("600s")
+
   val KUBERNETES_EXECUTOR_LOST_REASON_CHECK_MAX_ATTEMPTS =
     ConfigBuilder("spark.kubernetes.executor.lostCheck.maxAttempts")
       .doc("Maximum number of attempts allowed for checking the reason of an executor loss " +
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
index 5e09de37f2848..4e8ca47b8dd02 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
@@ -51,7 +51,9 @@ private[spark] class ExecutorPodsAllocator(
 
   private val podAllocationDelay = conf.get(KUBERNETES_ALLOCATION_BATCH_DELAY)
 
-  private val podCreationTimeout = math.max(podAllocationDelay * 5, 60000)
+  private val podCreationTimeout = math.max(
+    podAllocationDelay * 5,
+    conf.get(KUBERNETES_ALLOCATION_EXECUTOR_TIMEOUT))
 
   private val executorIdleTimeout = conf.get(DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT) * 1000
 
@@ -214,10 +216,9 @@ private[spark] class ExecutorPodsAllocator(
     }
 
     if (newlyCreatedExecutors.isEmpty
-        && currentPendingExecutors.isEmpty
-        && currentRunningCount < currentTotalExpectedExecutors) {
+        && knownPodCount < currentTotalExpectedExecutors) {
       val numExecutorsToAllocate = math.min(
-        currentTotalExpectedExecutors - currentRunningCount, podAllocationSize)
+        currentTotalExpectedExecutors - knownPodCount, podAllocationSize)
       logInfo(s"Going to request $numExecutorsToAllocate executors from Kubernetes.")
       for ( _ <- 0 until numExecutorsToAllocate) {
         val newExecutorId = EXECUTOR_ID_COUNTER.incrementAndGet()
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
index c1c33b2a0f199..84c07bc588b06 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
@@ -56,8 +56,10 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
 
   private val podAllocationSize = conf.get(KUBERNETES_ALLOCATION_BATCH_SIZE)
   private val podAllocationDelay = conf.get(KUBERNETES_ALLOCATION_BATCH_DELAY)
-  private val podCreationTimeout = math.max(podAllocationDelay * 5, 60000L)
   private val executorIdleTimeout = conf.get(DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT) * 1000
+  private val podCreationTimeout = math.max(podAllocationDelay * 5,
+    conf.get(KUBERNETES_ALLOCATION_EXECUTOR_TIMEOUT))
+
   private val secMgr = new SecurityManager(conf)
 
   private var waitForExecutorPodsClock: ManualClock = _

From 3f2a2b5fe6ada37ef86f00737387e6cf2496df74 Mon Sep 17 00:00:00 2001
From: Ankur Dave <ankurdave@gmail.com>
Date: Tue, 27 Oct 2020 13:20:22 -0700
Subject: [PATCH 0338/1009] [SPARK-33260][SQL] Fix incorrect results from
 SortExec when sortOrder is Stream

### What changes were proposed in this pull request?

The following query produces incorrect results. The query has two essential features: (1) it contains a string aggregate, resulting in a `SortExec` node, and (2) it contains a duplicate grouping key, causing `RemoveRepetitionFromGroupExpressions` to produce a sort order stored as a `Stream`.

```sql
SELECT bigint_col_1, bigint_col_9, MAX(CAST(bigint_col_1 AS string))
FROM table_4
GROUP BY bigint_col_1, bigint_col_9, bigint_col_9
```

When the sort order is stored as a `Stream`, the line `ordering.map(_.child.genCode(ctx))` in `GenerateOrdering#createOrderKeys()` produces unpredictable side effects to `ctx`. This is because `genCode(ctx)` modifies `ctx`. When ordering is a `Stream`, the modifications will not happen immediately as intended, but will instead occur lazily when the returned `Stream` is used later.

Similar bugs have occurred at least three times in the past: https://issues.apache.org/jira/browse/SPARK-24500, https://issues.apache.org/jira/browse/SPARK-25767, https://issues.apache.org/jira/browse/SPARK-26680.

The fix is to check if `ordering` is a `Stream` and force the modifications to happen immediately if so.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Added a unit test for `SortExec` where `sortOrder` is a `Stream`. The test previously failed and now passes.

Closes #30160 from ankurdave/SPARK-33260.

Authored-by: Ankur Dave <ankurdave@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../expressions/codegen/GenerateOrdering.scala      |  4 +++-
 .../org/apache/spark/sql/execution/SortSuite.scala  | 13 +++++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
index 63bd59e7628b2..5d00519d27c53 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
@@ -71,7 +71,9 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], BaseOrdering] with
     ctx.INPUT_ROW = row
     // to use INPUT_ROW we must make sure currentVars is null
     ctx.currentVars = null
-    ordering.map(_.child.genCode(ctx))
+    // SPARK-33260: To avoid unpredictable modifications to `ctx` when `ordering` is a Stream, we
+    // use `toIndexedSeq` to make the transformation eager.
+    ordering.toIndexedSeq.map(_.child.genCode(ctx))
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala
index 7654a9d982059..6a4f3f62641f8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala
@@ -97,6 +97,19 @@ class SortSuite extends SparkPlanTest with SharedSparkSession {
     }
   }
 
+  test("SPARK-33260: sort order is a Stream") {
+    val input = Seq(
+      ("Hello", 4, 2.0),
+      ("Hello", 1, 1.0),
+      ("World", 8, 3.0)
+    )
+    checkAnswer(
+      input.toDF("a", "b", "c"),
+      (child: SparkPlan) => SortExec(Stream('a.asc, 'b.asc, 'c.asc), global = true, child = child),
+      input.sortBy(t => (t._1, t._2, t._3)).map(Row.fromTuple),
+      sortAnswers = false)
+  }
+
   // Test sorting on different data types
   for (
     dataType <- DataTypeTestUtils.atomicTypes ++ Set(NullType);

From 7d11d972c356140d21909c6a62cdb8d813bd015e Mon Sep 17 00:00:00 2001
From: Stuart White <stuart@spotright.com>
Date: Wed, 28 Oct 2020 08:36:14 +0900
Subject: [PATCH 0339/1009] [SPARK-33246][SQL][DOCS] Correct documentation for
 null semantics of "NULL AND False"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

The documentation of the Spark SQL null semantics states that "NULL AND False" yields NULL.  This is incorrect.  "NULL AND False" yields False.

```
Seq[(java.lang.Boolean, java.lang.Boolean)](
  (null, false)
)
  .toDF("left_operand", "right_operand")
  .withColumn("AND", 'left_operand && 'right_operand)
  .show(truncate = false)

+------------+-------------+-----+
|left_operand|right_operand|AND  |
+------------+-------------+-----+
|null        |false        |false|
+------------+-------------+-----+
```

I propose the documentation be updated to reflect that "NULL AND False" yields False.

This contribution is my original work and I license it to the project under the project’s open source license.

### Why are the changes needed?

This change improves the accuracy of the documentation.

### Does this PR introduce _any_ user-facing change?

Yes.  This PR introduces a fix to the documentation.

### How was this patch tested?

Since this is only a documentation change, no tests were added.

Closes #30161 from stwhit/SPARK-33246.

Authored-by: Stuart White <stuart@spotright.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 docs/sql-ref-null-semantics.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/sql-ref-null-semantics.md b/docs/sql-ref-null-semantics.md
index fb5d2a312d0e1..3c12e7a28b64e 100644
--- a/docs/sql-ref-null-semantics.md
+++ b/docs/sql-ref-null-semantics.md
@@ -125,7 +125,7 @@ The following tables illustrate the behavior of logical operators when one or bo
 |True|NULL|True|NULL|
 |False|NULL|NULL|False|
 |NULL|True|True|NULL|
-|NULL|False|NULL|NULL|
+|NULL|False|NULL|False|
 |NULL|NULL|NULL|NULL|
 
 |operand|NOT|

From ea709d67486dd6329977df6c3ed7a443b835dd48 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Wed, 28 Oct 2020 09:46:13 +0900
Subject: [PATCH 0340/1009] [SPARK-33258][R][SQL] Add asc_nulls_* and
 desc_nulls_* methods to SparkR

### What changes were proposed in this pull request?

This PR adds the following `Column` methods to R API:

- asc_nulls_first
- asc_nulls_last
- desc_nulls_first
- desc_nulls_last

### Why are the changes needed?

Feature parity.

### Does this PR introduce _any_ user-facing change?

No, new methods.

### How was this patch tested?

New unit tests.

Closes #30159 from zero323/SPARK-33258.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 R/pkg/NAMESPACE                       |  4 ++++
 R/pkg/R/column.R                      |  6 +++++-
 R/pkg/R/generics.R                    | 12 ++++++++++++
 R/pkg/tests/fulltests/test_sparkSQL.R |  2 ++
 4 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index a9cca4bf6f6fc..404a6968ea429 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -228,6 +228,8 @@ exportMethods("%<=>%",
               "arrays_zip",
               "arrays_zip_with",
               "asc",
+              "asc_nulls_first",
+              "asc_nulls_last",
               "ascii",
               "asin",
               "assert_true",
@@ -273,6 +275,8 @@ exportMethods("%<=>%",
               "degrees",
               "dense_rank",
               "desc",
+              "desc_nulls_first",
+              "desc_nulls_last",
               "dropFields",
               "element_at",
               "encode",
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index c5fcfaff94029..835178990b485 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -67,7 +67,11 @@ operators <- list(
   # we can not override `&&` and `||`, so use `&` and `|` instead
   "&" = "and", "|" = "or", "^" = "pow"
 )
-column_functions1 <- c("asc", "desc", "isNaN", "isNull", "isNotNull")
+column_functions1 <- c(
+  "asc", "asc_nulls_first", "asc_nulls_last",
+  "desc", "desc_nulls_first", "desc_nulls_last",
+  "isNaN", "isNull", "isNotNull"
+)
 column_functions2 <- c("like", "rlike", "getField", "getItem", "contains")
 
 createOperator <- function(op) {
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 6b732e594cd3f..e372ae27e315a 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -675,6 +675,12 @@ setGeneric("broadcast", function(x) { standardGeneric("broadcast") })
 #' @rdname columnfunctions
 setGeneric("asc", function(x) { standardGeneric("asc") })
 
+#' @rdname columnfunctions
+setGeneric("asc_nulls_first", function(x) { standardGeneric("asc_nulls_first") })
+
+#' @rdname columnfunctions
+setGeneric("asc_nulls_last", function(x) { standardGeneric("asc_nulls_last") })
+
 #' @rdname between
 setGeneric("between", function(x, bounds) { standardGeneric("between") })
 
@@ -689,6 +695,12 @@ setGeneric("contains", function(x, ...) { standardGeneric("contains") })
 #' @rdname columnfunctions
 setGeneric("desc", function(x) { standardGeneric("desc") })
 
+#' @rdname columnfunctions
+setGeneric("desc_nulls_first", function(x) { standardGeneric("desc_nulls_first") })
+
+#' @rdname columnfunctions
+setGeneric("desc_nulls_last", function(x) { standardGeneric("desc_nulls_last") })
+
 #' @rdname endsWith
 setGeneric("endsWith", function(x, suffix) { standardGeneric("endsWith") })
 
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 077dfc6770d94..685e6e672bdf9 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1428,6 +1428,8 @@ test_that("column functions", {
     vector_to_array(c, "float32") + vector_to_array(c, "float64")
   c27 <- nth_value("x", 1L) + nth_value("y", 2, TRUE) +
     nth_value(column("v"), 3) + nth_value(column("z"), 4L, FALSE)
+  c28 <- asc_nulls_first(c1) + asc_nulls_last(c1) +
+    desc_nulls_first(c1) + desc_nulls_last(c1)
 
   # Test if base::is.nan() is exposed
   expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE))

From c2bea045e3628081bca1ba752669a5bc009ebd00 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Wed, 28 Oct 2020 11:21:35 +0900
Subject: [PATCH 0341/1009] [SPARK-33264][SQL][DOCS] Add a dedicated page for
 SQL-on-file in SQL documents

### What changes were proposed in this pull request?

This PR intends to add a dedicated page for SQL-on-file in SQL documents.
This comes from the comment: https://github.com/apache/spark/pull/30095/files#r508965149

### Why are the changes needed?

For better documentations.

### Does this PR introduce _any_ user-facing change?

<img width="544" alt="Screen Shot 2020-10-28 at 9 56 59" src="https://user-images.githubusercontent.com/692303/97378051-c1fbcb80-1904-11eb-86c0-a88c5269d41c.png">

### How was this patch tested?

N/A

Closes #30165 from maropu/DocForFile.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 docs/_data/menu-sql.yaml               |  2 +
 docs/sql-ref-syntax-qry-select-file.md | 76 ++++++++++++++++++++++++++
 docs/sql-ref-syntax-qry-select.md      |  3 +-
 docs/sql-ref-syntax-qry.md             |  1 +
 docs/sql-ref-syntax.md                 |  1 +
 5 files changed, 82 insertions(+), 1 deletion(-)
 create mode 100644 docs/sql-ref-syntax-qry-select-file.md

diff --git a/docs/_data/menu-sql.yaml b/docs/_data/menu-sql.yaml
index 63f6b4a0a204b..2207bd6a17656 100644
--- a/docs/_data/menu-sql.yaml
+++ b/docs/_data/menu-sql.yaml
@@ -175,6 +175,8 @@
                   url: sql-ref-syntax-qry-select-hints.html
                 - text: Inline Table
                   url: sql-ref-syntax-qry-select-inline-table.html
+                - text: File
+                  url: sql-ref-syntax-qry-select-file.html
                 - text: JOIN
                   url: sql-ref-syntax-qry-select-join.html
                 - text: LIKE Predicate
diff --git a/docs/sql-ref-syntax-qry-select-file.md b/docs/sql-ref-syntax-qry-select-file.md
new file mode 100644
index 0000000000000..c3dc406ee79e6
--- /dev/null
+++ b/docs/sql-ref-syntax-qry-select-file.md
@@ -0,0 +1,76 @@
+---
+layout: global
+title: File
+displayTitle: File
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+### Description
+
+You can query a file with a specified format directly with SQL.
+
+### Syntax
+
+```sql
+file_format.`file_path`
+```
+
+### Parameters
+
+* **file_format**
+
+    Specifies a file format for a given file path, could be TEXTFILE, ORC, PARQUET, etc.
+
+* **file_path**
+
+    Specifies a file path with a given format.
+
+### Examples
+
+```sql
+-- PARQUET file
+SELECT * FROM parquet.`examples/src/main/resources/users.parquet`;
++------+--------------+----------------+
+|  name|favorite_color|favorite_numbers|
++------+--------------+----------------+
+|Alyssa|          null|  [3, 9, 15, 20]|
+|   Ben|           red|              []|
++------+--------------+----------------+
+
+-- ORC file
+SELECT * FROM orc.`examples/src/main/resources/users.orc`;
++------+--------------+----------------+
+|  name|favorite_color|favorite_numbers|
++------+--------------+----------------+
+|Alyssa|          null|  [3, 9, 15, 20]|
+|   Ben|           red|              []|
++------+--------------+----------------+
+
+-- JSON file
+SELECT * FROM json.`examples/src/main/resources/people.json`;
++----+-------+
+| age|   name|
++----+-------+
+|null|Michael|
+|  30|   Andy|
+|  19| Justin|
++----+-------+
+```
+
+### Related Statements
+
+* [SELECT](sql-ref-syntax-qry-select.html)
diff --git a/docs/sql-ref-syntax-qry-select.md b/docs/sql-ref-syntax-qry-select.md
index 655766d4c6d22..bac7c2bc6a06d 100644
--- a/docs/sql-ref-syntax-qry-select.md
+++ b/docs/sql-ref-syntax-qry-select.md
@@ -85,7 +85,7 @@ SELECT [ hints , ... ] [ ALL | DISTINCT ] { named_expression [ , ... ] }
      * [Table-value function](sql-ref-syntax-qry-select-tvf.html)
      * [Inline table](sql-ref-syntax-qry-select-inline-table.html)
      * Subquery
-     * [File](sql-data-sources-load-save-functions.html#run-sql-on-files-directly)
+     * [File](sql-ref-syntax-qry-select-file.html)
      
 * **PIVOT**
 
@@ -165,6 +165,7 @@ SELECT [ hints , ... ] [ ALL | DISTINCT ] { named_expression [ , ... ] }
 * [Common Table Expression](sql-ref-syntax-qry-select-cte.html)
 * [Hints](sql-ref-syntax-qry-select-hints.html)
 * [Inline Table](sql-ref-syntax-qry-select-inline-table.html)
+* [File](sql-ref-syntax-qry-select-file.html)
 * [JOIN](sql-ref-syntax-qry-select-join.html)
 * [LIKE Predicate](sql-ref-syntax-qry-select-like.html)
 * [Set Operators](sql-ref-syntax-qry-select-setops.html)
diff --git a/docs/sql-ref-syntax-qry.md b/docs/sql-ref-syntax-qry.md
index d55ea43d15036..6751b90e12443 100644
--- a/docs/sql-ref-syntax-qry.md
+++ b/docs/sql-ref-syntax-qry.md
@@ -39,6 +39,7 @@ ability to generate logical and physical plan for a given query using
   * [Common Table Expression](sql-ref-syntax-qry-select-cte.html)
   * [Hints](sql-ref-syntax-qry-select-hints.html)
   * [Inline Table](sql-ref-syntax-qry-select-inline-table.html)
+  * [File](sql-ref-syntax-qry-select-file.html)
   * [JOIN](sql-ref-syntax-qry-select-join.html)
   * [LIKE Predicate](sql-ref-syntax-qry-select-like.html)
   * [Set Operators](sql-ref-syntax-qry-select-setops.html)
diff --git a/docs/sql-ref-syntax.md b/docs/sql-ref-syntax.md
index 4e58abb2a8596..f3d35b57d90cd 100644
--- a/docs/sql-ref-syntax.md
+++ b/docs/sql-ref-syntax.md
@@ -56,6 +56,7 @@ Spark SQL is Apache Spark's module for working with structured data. The SQL Syn
    * [HAVING Clause](sql-ref-syntax-qry-select-having.html)
    * [Hints](sql-ref-syntax-qry-select-hints.html)
    * [Inline Table](sql-ref-syntax-qry-select-inline-table.html)
+   * [File](sql-ref-syntax-qry-select-file.html)
    * [JOIN](sql-ref-syntax-qry-select-join.html)
    * [LIKE Predicate](sql-ref-syntax-qry-select-like.html)
    * [LIMIT Clause](sql-ref-syntax-qry-select-limit.html)

From fcf8aa59b5025dde9b4af36953146894659967e2 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan.opensource@gmail.com>
Date: Wed, 28 Oct 2020 03:31:11 +0000
Subject: [PATCH 0342/1009] [SPARK-33240][SQL] Fail fast when fails to
 instantiate configured v2 session catalog

### What changes were proposed in this pull request?

This patch proposes to change the behavior on failing fast when Spark fails to instantiate configured v2 session catalog.

### Why are the changes needed?

The Spark behavior is against the intention of the end users - if end users configure session catalog which Spark would fail to initialize, Spark would swallow the error with only logging the error message and silently use the default catalog implementation.

This follows the voices on [discussion thread](https://lists.apache.org/thread.html/rdfa22a5ebdc4ac66e2c5c8ff0cd9d750e8a1690cd6fb456d119c2400%40%3Cdev.spark.apache.org%3E) in dev mailing list.

### Does this PR introduce _any_ user-facing change?

Yes. After the PR Spark will fail immediately if Spark fails to instantiate configured session catalog.

### How was this patch tested?

New UT added.

Closes #30147 from HeartSaVioR/SPARK-33240.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/connector/catalog/CatalogManager.scala  | 12 ++----------
 .../connector/SupportsCatalogOptionsSuite.scala | 17 +++++++++++++++++
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
index c6d21540f27d5..8e8cd786b70c3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.connector.catalog
 
 import scala.collection.mutable
-import scala.util.control.NonFatal
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException
@@ -82,15 +81,8 @@ class CatalogManager(
    * in the fallback configuration, spark.sql.sources.write.useV1SourceList
    */
   private[sql] def v2SessionCatalog: CatalogPlugin = {
-    conf.getConf(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION).map { customV2SessionCatalog =>
-      try {
-        catalogs.getOrElseUpdate(SESSION_CATALOG_NAME, loadV2SessionCatalog())
-      } catch {
-        case NonFatal(_) =>
-          logError(
-            "Fail to instantiate the custom v2 session catalog: " + customV2SessionCatalog)
-          defaultSessionCatalog
-      }
+    conf.getConf(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION).map { _ =>
+      catalogs.getOrElseUpdate(SESSION_CATALOG_NAME, loadV2SessionCatalog())
     }.getOrElse(defaultSessionCatalog)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala
index 550bec7505422..eacdb9e2fcd7b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala
@@ -22,6 +22,7 @@ import scala.util.Try
 
 import org.scalatest.BeforeAndAfter
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.{DataFrame, QueryTest, SaveMode}
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression}
@@ -254,6 +255,22 @@ class SupportsCatalogOptionsSuite extends QueryTest with SharedSparkSession with
     }
   }
 
+  test("SPARK-33240: fail the query when instantiation on session catalog fails") {
+    try {
+      spark.sessionState.catalogManager.reset()
+      spark.conf.set(
+        V2_SESSION_CATALOG_IMPLEMENTATION.key, "InvalidCatalogClass")
+      val e = intercept[SparkException] {
+        sql(s"create table t1 (id bigint) using $format")
+      }
+
+      assert(e.getMessage.contains("Cannot find catalog plugin class"))
+      assert(e.getMessage.contains("InvalidCatalogClass"))
+    } finally {
+      spark.sessionState.catalogManager.reset()
+    }
+  }
+
   private def checkV2Identifiers(
       plan: LogicalPlan,
       identifier: String = "t1",

From 528160f0014206eaceb01ae0f3ad316bfbdc6885 Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Wed, 28 Oct 2020 05:44:55 +0000
Subject: [PATCH 0343/1009] [SPARK-33174][SQL] Migrate DROP TABLE to use
 UnresolvedTableOrView to resolve the identifier

### What changes were proposed in this pull request?

This PR proposes to migrate `DROP TABLE` to use `UnresolvedTableOrView` to resolve the table/view identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing).

### Why are the changes needed?

The current behavior is not consistent between v1 and v2 commands when resolving a temp view.
In v2, the `t` in the following example is resolved to a table:
```scala
sql("CREATE TABLE testcat.ns.t (id bigint) USING foo")
sql("CREATE TEMPORARY VIEW t AS SELECT 2")
sql("USE testcat.ns")
sql("DROP TABLE t") // 't' is resolved to testcat.ns.t
```
whereas in v1, the `t` is resolved to a temp view:
```scala
sql("CREATE DATABASE test")
sql("CREATE TABLE spark_catalog.test.t (id bigint) USING csv")
sql("CREATE TEMPORARY VIEW t AS SELECT 2")
sql("USE spark_catalog.test")
sql("DROP TABLE t") // 't' is resolved to a temp view
```

### Does this PR introduce _any_ user-facing change?

After this PR, for v2, `DROP TABLE t` is resolved to a temp view `t` instead of `testcat.ns.t`, consistent with v1 behavior.

### How was this patch tested?

Added a new test

Closes #30079 from imback82/drop_table_consistent.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 13 +++++---
 .../catalyst/analysis/ResolveCatalogs.scala   |  3 --
 .../analysis/ResolveNoopDropTable.scala       | 33 +++++++++++++++++++
 .../catalyst/analysis/v2ResolutionPlans.scala |  2 +-
 .../sql/catalyst/parser/AstBuilder.scala      |  7 ++--
 .../catalyst/plans/logical/statements.scala   |  8 -----
 .../catalyst/plans/logical/v2Commands.scala   | 13 ++++++--
 .../sql/catalyst/parser/DDLParserSuite.scala  | 23 +++++++++----
 .../analysis/ResolveSessionCatalog.scala      | 16 ++++++---
 .../datasources/v2/DataSourceV2Strategy.scala |  9 +++--
 .../sql/connector/DataSourceV2SQLSuite.scala  | 29 ++++++++++++++--
 .../connector/TestV2SessionCatalogBase.scala  | 17 ++++++++--
 .../command/PlanResolutionSuite.scala         | 16 ++++-----
 .../v2/jdbc/JDBCTableCatalogSuite.scala       |  6 ++--
 14 files changed, 142 insertions(+), 53 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveNoopDropTable.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 52c96f4a8f014..61c077fd12aa2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -258,7 +258,9 @@ class Analyzer(
       ResolveUnion ::
       TypeCoercion.typeCoercionRules ++
       extendedResolutionRules : _*),
-    Batch("Post-Hoc Resolution", Once, postHocResolutionRules: _*),
+    Batch("Post-Hoc Resolution", Once,
+      Seq(ResolveNoopDropTable) ++
+      postHocResolutionRules: _*),
     Batch("Normalize Alter Table", Once, ResolveAlterTableChanges),
     Batch("Remove Unresolved Hints", Once,
       new ResolveHints.RemoveAllHints),
@@ -864,7 +866,9 @@ class Analyzer(
         }
         u
       case u @ UnresolvedTableOrView(ident) =>
-        lookupTempView(ident).map(_ => ResolvedView(ident.asIdentifier)).getOrElse(u)
+        lookupTempView(ident)
+          .map(_ => ResolvedView(ident.asIdentifier, isTemp = true))
+          .getOrElse(u)
     }
 
     def lookupTempView(
@@ -1017,7 +1021,8 @@ class Analyzer(
       case u @ UnresolvedTable(identifier) =>
         lookupTableOrView(identifier).map {
           case v: ResolvedView =>
-            u.failAnalysis(s"${v.identifier.quoted} is a view not table.")
+            val viewStr = if (v.isTemp) "temp view" else "view"
+            u.failAnalysis(s"${v.identifier.quoted} is a $viewStr not table.")
           case table => table
         }.getOrElse(u)
 
@@ -1030,7 +1035,7 @@ class Analyzer(
         case SessionCatalogAndIdentifier(catalog, ident) =>
           CatalogV2Util.loadTable(catalog, ident).map {
             case v1Table: V1Table if v1Table.v1Table.tableType == CatalogTableType.VIEW =>
-              ResolvedView(ident)
+              ResolvedView(ident, isTemp = false)
             case table =>
               ResolvedTable(catalog.asTableCatalog, ident, table)
           }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
index 65ddff8c44ed9..d3bb72badeb13 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
@@ -198,9 +198,6 @@ class ResolveCatalogs(val catalogManager: CatalogManager)
         writeOptions = c.writeOptions,
         orCreate = c.orCreate)
 
-    case DropTableStatement(NonSessionCatalogAndTable(catalog, tbl), ifExists, _) =>
-      DropTable(catalog.asTableCatalog, tbl.asIdentifier, ifExists)
-
     case DropViewStatement(NonSessionCatalogAndTable(catalog, viewName), _) =>
       throw new AnalysisException(
         s"Can not specify catalog `${catalog.name}` for view ${viewName.quoted} " +
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveNoopDropTable.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveNoopDropTable.scala
new file mode 100644
index 0000000000000..f9da9174f85e6
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveNoopDropTable.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.plans.logical.{DropTable, LogicalPlan, NoopDropTable}
+import org.apache.spark.sql.catalyst.rules.Rule
+
+/**
+ * A rule for handling [[DropTable]] logical plan when the table or temp view is not resolved.
+ * If "ifExists" flag is set to true, the plan is resolved to [[NoopDropTable]],
+ * which is a no-op command.
+ */
+object ResolveNoopDropTable extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
+    case DropTable(u: UnresolvedTableOrView, ifExists, _) if ifExists =>
+      NoopDropTable(u.multipartIdentifier)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
index a16763f2cf943..1344d78838e1c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
@@ -81,7 +81,7 @@ case class ResolvedTable(catalog: TableCatalog, identifier: Identifier, table: T
  */
 // TODO: create a generic representation for temp view, v1 view and v2 view, after we add view
 //       support to v2 catalog. For now we only need the identifier to fallback to v1 command.
-case class ResolvedView(identifier: Identifier) extends LeafNode {
+case class ResolvedView(identifier: Identifier, isTemp: Boolean) extends LeafNode {
   override def output: Seq[Attribute] = Nil
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index f29e7b11e02de..f28375c8d7a4a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -2878,11 +2878,12 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   }
 
   /**
-   * Create a [[DropTableStatement]] command.
+   * Create a [[DropTable]] command.
    */
   override def visitDropTable(ctx: DropTableContext): LogicalPlan = withOrigin(ctx) {
-    DropTableStatement(
-      visitMultipartIdentifier(ctx.multipartIdentifier()),
+    // DROP TABLE works with either a table or a temporary view.
+    DropTable(
+      UnresolvedTableOrView(visitMultipartIdentifier(ctx.multipartIdentifier())),
       ctx.EXISTS != null,
       ctx.PURGE != null)
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
index d7c097af9120f..3a534b2eb8ceb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
@@ -283,14 +283,6 @@ case class RenameTableStatement(
     newName: Seq[String],
     isView: Boolean) extends ParsedStatement
 
-/**
- * A DROP TABLE statement, as parsed from SQL.
- */
-case class DropTableStatement(
-    tableName: Seq[String],
-    ifExists: Boolean,
-    purge: Boolean) extends ParsedStatement
-
 /**
  * A DROP VIEW statement, as parsed from SQL.
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index 272c19b98512b..96cb096ff97c9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -388,9 +388,16 @@ case class Assignment(key: Expression, value: Expression) extends Expression wit
  * The logical plan of the DROP TABLE command that works for v2 tables.
  */
 case class DropTable(
-    catalog: TableCatalog,
-    ident: Identifier,
-    ifExists: Boolean) extends Command
+    child: LogicalPlan,
+    ifExists: Boolean,
+    purge: Boolean) extends Command {
+  override def children: Seq[LogicalPlan] = child :: Nil
+}
+
+/**
+ * The logical plan for handling non-existing table for DROP TABLE command.
+ */
+case class NoopDropTable(multipartIdentifier: Seq[String]) extends Command
 
 /**
  * The logical plan of the ALTER TABLE command that works for v2 tables.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 621d416c55457..a81f9e16083d6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -381,19 +381,28 @@ class DDLParserSuite extends AnalysisTest {
 
   test("drop table") {
     parseCompare("DROP TABLE testcat.ns1.ns2.tbl",
-      DropTableStatement(Seq("testcat", "ns1", "ns2", "tbl"), ifExists = false, purge = false))
+      DropTable(
+        UnresolvedTableOrView(Seq("testcat", "ns1", "ns2", "tbl")),
+        ifExists = false,
+        purge = false))
     parseCompare(s"DROP TABLE db.tab",
-      DropTableStatement(Seq("db", "tab"), ifExists = false, purge = false))
+      DropTable(
+        UnresolvedTableOrView(Seq("db", "tab")), ifExists = false, purge = false))
     parseCompare(s"DROP TABLE IF EXISTS db.tab",
-      DropTableStatement(Seq("db", "tab"), ifExists = true, purge = false))
+      DropTable(
+        UnresolvedTableOrView(Seq("db", "tab")), ifExists = true, purge = false))
     parseCompare(s"DROP TABLE tab",
-      DropTableStatement(Seq("tab"), ifExists = false, purge = false))
+      DropTable(
+        UnresolvedTableOrView(Seq("tab")), ifExists = false, purge = false))
     parseCompare(s"DROP TABLE IF EXISTS tab",
-      DropTableStatement(Seq("tab"), ifExists = true, purge = false))
+      DropTable(
+        UnresolvedTableOrView(Seq("tab")), ifExists = true, purge = false))
     parseCompare(s"DROP TABLE tab PURGE",
-      DropTableStatement(Seq("tab"), ifExists = false, purge = true))
+      DropTable(
+        UnresolvedTableOrView(Seq("tab")), ifExists = false, purge = true))
     parseCompare(s"DROP TABLE IF EXISTS tab PURGE",
-      DropTableStatement(Seq("tab"), ifExists = true, purge = true))
+      DropTable(
+        UnresolvedTableOrView(Seq("tab")), ifExists = true, purge = true))
   }
 
   test("drop view") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index c4fd84cd978d4..f35eb41fe2ce1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -260,14 +260,14 @@ class ResolveSessionCatalog(
       DescribeTableCommand(ident.asTableIdentifier, partitionSpec, isExtended)
 
     // Use v1 command to describe (temp) view, as v2 catalog doesn't support view yet.
-    case DescribeRelation(ResolvedView(ident), partitionSpec, isExtended) =>
+    case DescribeRelation(ResolvedView(ident, _), partitionSpec, isExtended) =>
       DescribeTableCommand(ident.asTableIdentifier, partitionSpec, isExtended)
 
     case DescribeColumn(r @ ResolvedTable(_, _, _: V1Table), colNameParts, isExtended)
         if isSessionCatalog(r.catalog) =>
       DescribeColumnCommand(r.identifier.asTableIdentifier, colNameParts, isExtended)
 
-    case DescribeColumn(ResolvedView(ident), colNameParts, isExtended) =>
+    case DescribeColumn(ResolvedView(ident, _), colNameParts, isExtended) =>
       DescribeColumnCommand(ident.asTableIdentifier, colNameParts, isExtended)
 
     // For CREATE TABLE [AS SELECT], we should use the v1 command if the catalog is resolved to the
@@ -367,9 +367,17 @@ class ResolveSessionCatalog(
           orCreate = c.orCreate)
       }
 
+    case DropTable(
+        r @ ResolvedTable(_, _, _: V1Table), ifExists, purge) if isSessionCatalog(r.catalog) =>
+      DropTableCommand(r.identifier.asTableIdentifier, ifExists, isView = false, purge = purge)
+
     // v1 DROP TABLE supports temp view.
-    case DropTableStatement(TempViewOrV1Table(name), ifExists, purge) =>
-      DropTableCommand(name.asTableIdentifier, ifExists, isView = false, purge = purge)
+    case DropTable(r: ResolvedView, ifExists, purge) =>
+      if (!r.isTemp) {
+        throw new AnalysisException(
+          "Cannot drop a view with DROP TABLE. Please use DROP VIEW instead")
+      }
+      DropTableCommand(r.identifier.asTableIdentifier, ifExists, isView = false, purge = purge)
 
     // v1 DROP TABLE supports temp view.
     case DropViewStatement(TempViewOrV1Table(name), ifExists) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 3841bd0a66987..81a36dee58389 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.connector.catalog.{CatalogV2Util, StagingTableCatalog, SupportsNamespaces, TableCapability, TableCatalog, TableChange}
 import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream}
-import org.apache.spark.sql.execution.{FilterExec, LeafExecNode, ProjectExec, RowDataSourceScanExec, SparkPlan}
+import org.apache.spark.sql.execution.{FilterExec, LeafExecNode, LocalTableScanExec, ProjectExec, RowDataSourceScanExec, SparkPlan}
 import org.apache.spark.sql.execution.datasources.DataSourceStrategy
 import org.apache.spark.sql.execution.streaming.continuous.{WriteToContinuousDataSource, WriteToContinuousDataSourceExec}
 import org.apache.spark.sql.sources.{BaseRelation, TableScan}
@@ -228,8 +228,11 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
     case DescribeColumn(_: ResolvedTable, _, _) =>
       throw new AnalysisException("Describing columns is not supported for v2 tables.")
 
-    case DropTable(catalog, ident, ifExists) =>
-      DropTableExec(catalog, ident, ifExists) :: Nil
+    case DropTable(r: ResolvedTable, ifExists, _) =>
+      DropTableExec(r.catalog, r.identifier, ifExists) :: Nil
+
+    case NoopDropTable(multipartIdentifier) =>
+      LocalTableScanExec(Nil, Nil) :: Nil
 
     case AlterTable(catalog, ident, _, changes) =>
       AlterTableExec(catalog, ident, changes) :: Nil
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index e3618f1326941..298c07059ff44 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -744,10 +744,33 @@ class DataSourceV2SQLSuite
   }
 
   test("DropTable: if exists") {
-    intercept[NoSuchTableException] {
-      sql(s"DROP TABLE testcat.db.notbl")
+    val ex = intercept[AnalysisException] {
+      sql("DROP TABLE testcat.db.notbl")
+    }
+    assert(ex.getMessage.contains("Table or view not found: testcat.db.notbl"))
+    sql("DROP TABLE IF EXISTS testcat.db.notbl")
+  }
+
+  test("SPARK-33174: DROP TABLE should resolve to a temporary view first") {
+    withTable("testcat.ns.t") {
+      withTempView("t") {
+        sql("CREATE TABLE testcat.ns.t (id bigint) USING foo")
+        sql("CREATE TEMPORARY VIEW t AS SELECT 2")
+        sql("USE testcat.ns")
+
+        // Check the temporary view 't' exists.
+        runShowTablesSql(
+          "SHOW TABLES FROM spark_catalog.default LIKE 't'",
+          Seq(Row("", "t", true)),
+          expectV2Catalog = false)
+        sql("DROP TABLE t")
+        // Verify that the temporary view 't' is resolved first and dropped.
+        runShowTablesSql(
+          "SHOW TABLES FROM spark_catalog.default LIKE 't'",
+          Nil,
+          expectV2Catalog = false)
+      }
     }
-    sql(s"DROP TABLE IF EXISTS testcat.db.notbl")
   }
 
   test("Relation: basic") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
index 4e741ff35c29f..f57edb9eb220c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.connector
 
 import java.util
 import java.util.concurrent.ConcurrentHashMap
+import java.util.concurrent.atomic.AtomicBoolean
 
 import scala.collection.JavaConverters._
 
@@ -36,6 +37,13 @@ private[connector] trait TestV2SessionCatalogBase[T <: Table] extends Delegating
 
   protected val tables: util.Map[Identifier, T] = new ConcurrentHashMap[Identifier, T]()
 
+  private val tableCreated: AtomicBoolean = new AtomicBoolean(false)
+
+  private def addTable(ident: Identifier, table: T): Unit = {
+    tableCreated.set(true)
+    tables.put(ident, table)
+  }
+
   protected def newTable(
       name: String,
       schema: StructType,
@@ -51,7 +59,7 @@ private[connector] trait TestV2SessionCatalogBase[T <: Table] extends Delegating
         case v1Table: V1Table if v1Table.v1Table.tableType == CatalogTableType.VIEW => v1Table
         case t =>
           val table = newTable(t.name(), t.schema(), t.partitioning(), t.properties())
-          tables.put(ident, table)
+          addTable(ident, table)
           table
       }
     }
@@ -64,7 +72,7 @@ private[connector] trait TestV2SessionCatalogBase[T <: Table] extends Delegating
       properties: util.Map[String, String]): Table = {
     val created = super.createTable(ident, schema, partitions, properties)
     val t = newTable(created.name(), schema, partitions, properties)
-    tables.put(ident, t)
+    addTable(ident, t)
     t
   }
 
@@ -74,8 +82,11 @@ private[connector] trait TestV2SessionCatalogBase[T <: Table] extends Delegating
   }
 
   def clearTables(): Unit = {
-    assert(!tables.isEmpty, "Tables were empty, maybe didn't use the session catalog code path?")
+    assert(
+      tableCreated.get,
+      "Tables are not created, maybe didn't use the session catalog code path?")
     tables.keySet().asScala.foreach(super.dropTable)
     tables.clear()
+    tableCreated.set(false)
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index 8782295e5d33b..d5820b016736a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -630,10 +630,10 @@ class PlanResolutionSuite extends AnalysisTest {
   }
 
   test("drop table") {
-    val tableName1 = "db.tab"
-    val tableIdent1 = TableIdentifier("tab", Option("db"))
-    val tableName2 = "tab"
-    val tableIdent2 = TableIdentifier("tab", Some("default"))
+    val tableName1 = "db.v1Table"
+    val tableIdent1 = TableIdentifier("v1Table", Option("db"))
+    val tableName2 = "v1Table"
+    val tableIdent2 = TableIdentifier("v1Table", Some("default"))
 
     parseResolveCompare(s"DROP TABLE $tableName1",
       DropTableCommand(tableIdent1, ifExists = false, isView = false, purge = false))
@@ -656,13 +656,13 @@ class PlanResolutionSuite extends AnalysisTest {
     val tableIdent2 = Identifier.of(Array.empty, "tab")
 
     parseResolveCompare(s"DROP TABLE $tableName1",
-      DropTable(testCat, tableIdent1, ifExists = false))
+      DropTable(ResolvedTable(testCat, tableIdent1, table), ifExists = false, purge = false))
     parseResolveCompare(s"DROP TABLE IF EXISTS $tableName1",
-      DropTable(testCat, tableIdent1, ifExists = true))
+      DropTable(ResolvedTable(testCat, tableIdent1, table), ifExists = true, purge = false))
     parseResolveCompare(s"DROP TABLE $tableName2",
-      DropTable(testCat, tableIdent2, ifExists = false))
+      DropTable(ResolvedTable(testCat, tableIdent2, table), ifExists = false, purge = false))
     parseResolveCompare(s"DROP TABLE IF EXISTS $tableName2",
-      DropTable(testCat, tableIdent2, ifExists = true))
+      DropTable(ResolvedTable(testCat, tableIdent2, table), ifExists = true, purge = false))
   }
 
   test("drop view") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
index d99ccf85683ed..51316b464ab34 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
@@ -78,10 +78,10 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
     sql("DROP TABLE h2.test.to_drop")
     checkAnswer(sql("SHOW TABLES IN h2.test"), Seq(Row("test", "people")))
     Seq(
-      "h2.test.not_existing_table" -> "Table test.not_existing_table not found",
-      "h2.bad_test.not_existing_table" -> "Table bad_test.not_existing_table not found"
+      "h2.test.not_existing_table" -> "Table or view not found: h2.test.not_existing_table",
+      "h2.bad_test.not_existing_table" -> "Table or view not found: h2.bad_test.not_existing_table"
     ).foreach { case (table, expectedMsg) =>
-      val msg = intercept[NoSuchTableException] {
+      val msg = intercept[AnalysisException] {
         sql(s"DROP TABLE $table")
       }.getMessage
       assert(msg.contains(expectedMsg))

From 9fb45361fd00b046e04748e1a1c8add3fa09f01c Mon Sep 17 00:00:00 2001
From: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com>
Date: Wed, 28 Oct 2020 05:51:47 +0000
Subject: [PATCH 0344/1009] [SPARK-33183][SQL] Fix Optimizer rule
 EliminateSorts and add a physical rule to remove redundant sorts

### What changes were proposed in this pull request?
This PR aims to fix a correctness bug in the optimizer rule `EliminateSorts`. It also adds a new physical rule to remove redundant sorts that cannot be eliminated in the Optimizer rule after the bugfix.

### Why are the changes needed?
A global sort should not be eliminated even if its child is ordered since we don't know if its child ordering is global or local. For example, in the following scenario, the first sort shouldn't be removed because it has a stronger guarantee than the second sort even if the sort orders are the same for both sorts.

```
Sort(orders, global = True, ...)
  Sort(orders, global = False, ...)
```

Since there is no straightforward way to identify whether a node's output ordering is local or global, we should not remove a global sort even if its child is already ordered.

### Does this PR introduce _any_ user-facing change?
Yes

### How was this patch tested?
Unit tests

Closes #30093 from allisonwang-db/fix-sort.

Authored-by: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/optimizer/Optimizer.scala    |  16 +-
 .../apache/spark/sql/internal/SQLConf.scala   |   7 +
 .../optimizer/EliminateSortsSuite.scala       | 102 +++++++++++--
 .../spark/sql/execution/QueryExecution.scala  |   1 +
 .../sql/execution/RemoveRedundantSorts.scala  |  46 ++++++
 .../adaptive/AdaptiveSparkPlanExec.scala      |   2 +
 .../spark/sql/execution/PlannerSuite.scala    |  13 --
 .../execution/RemoveRedundantSortsSuite.scala | 144 ++++++++++++++++++
 8 files changed, 303 insertions(+), 28 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantSorts.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index f3f64031843e0..9519a56c2817a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -1020,7 +1020,7 @@ object CombineFilters extends Rule[LogicalPlan] with PredicateHelper {
  * Note that changes in the final output ordering may affect the file size (SPARK-32318).
  * This rule handles the following cases:
  * 1) if the sort order is empty or the sort order does not have any reference
- * 2) if the child is already sorted
+ * 2) if the Sort operator is a local sort and the child is already sorted
  * 3) if there is another Sort operator separated by 0...n Project, Filter, Repartition or
  *    RepartitionByExpression (with deterministic expressions) operators
  * 4) if the Sort operator is within Join separated by 0...n Project, Filter, Repartition or
@@ -1031,12 +1031,18 @@ object CombineFilters extends Rule[LogicalPlan] with PredicateHelper {
  *    function is order irrelevant
  */
 object EliminateSorts extends Rule[LogicalPlan] {
-  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+  def apply(plan: LogicalPlan): LogicalPlan = plan transform applyLocally
+
+  private val applyLocally: PartialFunction[LogicalPlan, LogicalPlan] = {
     case s @ Sort(orders, _, child) if orders.isEmpty || orders.exists(_.child.foldable) =>
       val newOrders = orders.filterNot(_.child.foldable)
-      if (newOrders.isEmpty) child else s.copy(order = newOrders)
-    case Sort(orders, true, child) if SortOrder.orderingSatisfies(child.outputOrdering, orders) =>
-      child
+      if (newOrders.isEmpty) {
+        applyLocally.lift(child).getOrElse(child)
+      } else {
+        s.copy(order = newOrders)
+      }
+    case Sort(orders, false, child) if SortOrder.orderingSatisfies(child.outputOrdering, orders) =>
+      applyLocally.lift(child).getOrElse(child)
     case s @ Sort(_, _, child) => s.copy(child = recursiveRemoveSort(child))
     case j @ Join(originLeft, originRight, _, cond, _) if cond.forall(_.deterministic) =>
       j.copy(left = recursiveRemoveSort(originLeft), right = recursiveRemoveSort(originRight))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 3024398399962..d84dfcc8f3086 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1253,6 +1253,13 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
+  val REMOVE_REDUNDANT_SORTS_ENABLED = buildConf("spark.sql.execution.removeRedundantSorts")
+    .internal()
+    .doc("Whether to remove redundant physical sort node")
+    .version("3.1.0")
+    .booleanConf
+    .createWithDefault(true)
+
   val STATE_STORE_PROVIDER_CLASS =
     buildConf("spark.sql.streaming.stateStore.providerClass")
       .internal()
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
index cc351e365113d..62deebd930752 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
@@ -99,12 +99,34 @@ class EliminateSortsSuite extends AnalysisTest {
     comparePlans(optimized, correctAnswer)
   }
 
-  test("remove redundant order by") {
+  test("SPARK-33183: remove consecutive no-op sorts") {
+    val plan = testRelation.orderBy().orderBy().orderBy()
+    val optimized = Optimize.execute(plan.analyze)
+    val correctAnswer = testRelation.analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("SPARK-33183: remove redundant sort by") {
     val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc_nullsFirst)
-    val unnecessaryReordered = orderedPlan.limit(2).select('a).orderBy('a.asc, 'b.desc_nullsFirst)
+    val unnecessaryReordered = orderedPlan.limit(2).select('a).sortBy('a.asc, 'b.desc_nullsFirst)
     val optimized = Optimize.execute(unnecessaryReordered.analyze)
     val correctAnswer = orderedPlan.limit(2).select('a).analyze
-    comparePlans(Optimize.execute(optimized), correctAnswer)
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("SPARK-33183: remove all redundant local sorts") {
+    val orderedPlan = testRelation.sortBy('a.asc).orderBy('a.asc).sortBy('a.asc)
+    val optimized = Optimize.execute(orderedPlan.analyze)
+    val correctAnswer = testRelation.orderBy('a.asc).analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("SPARK-33183: should not remove global sort") {
+    val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc_nullsFirst)
+    val reordered = orderedPlan.limit(2).select('a).orderBy('a.asc, 'b.desc_nullsFirst)
+    val optimized = Optimize.execute(reordered.analyze)
+    val correctAnswer = reordered.analyze
+    comparePlans(optimized, correctAnswer)
   }
 
   test("do not remove sort if the order is different") {
@@ -115,22 +137,39 @@ class EliminateSortsSuite extends AnalysisTest {
     comparePlans(optimized, correctAnswer)
   }
 
-  test("filters don't affect order") {
+  test("SPARK-33183: remove top level local sort with filter operators") {
     val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc)
-    val filteredAndReordered = orderedPlan.where('a > Literal(10)).orderBy('a.asc, 'b.desc)
+    val filteredAndReordered = orderedPlan.where('a > Literal(10)).sortBy('a.asc, 'b.desc)
     val optimized = Optimize.execute(filteredAndReordered.analyze)
     val correctAnswer = orderedPlan.where('a > Literal(10)).analyze
     comparePlans(optimized, correctAnswer)
   }
 
-  test("limits don't affect order") {
+  test("SPARK-33183: keep top level global sort with filter operators") {
+    val projectPlan = testRelation.select('a, 'b)
+    val orderedPlan = projectPlan.orderBy('a.asc, 'b.desc)
+    val filteredAndReordered = orderedPlan.where('a > Literal(10)).orderBy('a.asc, 'b.desc)
+    val optimized = Optimize.execute(filteredAndReordered.analyze)
+    val correctAnswer = projectPlan.where('a > Literal(10)).orderBy('a.asc, 'b.desc).analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("SPARK-33183: limits should not affect order for local sort") {
     val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc)
-    val filteredAndReordered = orderedPlan.limit(Literal(10)).orderBy('a.asc, 'b.desc)
+    val filteredAndReordered = orderedPlan.limit(Literal(10)).sortBy('a.asc, 'b.desc)
     val optimized = Optimize.execute(filteredAndReordered.analyze)
     val correctAnswer = orderedPlan.limit(Literal(10)).analyze
     comparePlans(optimized, correctAnswer)
   }
 
+  test("SPARK-33183: should not remove global sort with limit operators") {
+    val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc)
+    val filteredAndReordered = orderedPlan.limit(Literal(10)).orderBy('a.asc, 'b.desc)
+    val optimized = Optimize.execute(filteredAndReordered.analyze)
+    val correctAnswer = filteredAndReordered.analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
   test("different sorts are not simplified if limit is in between") {
     val orderedPlan = testRelation.select('a, 'b).orderBy('b.desc).limit(Literal(10))
       .orderBy('a.asc)
@@ -139,11 +178,11 @@ class EliminateSortsSuite extends AnalysisTest {
     comparePlans(optimized, correctAnswer)
   }
 
-  test("range is already sorted") {
+  test("SPARK-33183: should not remove global sort with range operator") {
     val inputPlan = Range(1L, 1000L, 1, 10)
     val orderedPlan = inputPlan.orderBy('id.asc)
     val optimized = Optimize.execute(orderedPlan.analyze)
-    val correctAnswer = inputPlan.analyze
+    val correctAnswer = orderedPlan.analyze
     comparePlans(optimized, correctAnswer)
 
     val reversedPlan = inputPlan.orderBy('id.desc)
@@ -154,10 +193,18 @@ class EliminateSortsSuite extends AnalysisTest {
     val negativeStepInputPlan = Range(10L, 1L, -1, 10)
     val negativeStepOrderedPlan = negativeStepInputPlan.orderBy('id.desc)
     val negativeStepOptimized = Optimize.execute(negativeStepOrderedPlan.analyze)
-    val negativeStepCorrectAnswer = negativeStepInputPlan.analyze
+    val negativeStepCorrectAnswer = negativeStepOrderedPlan.analyze
     comparePlans(negativeStepOptimized, negativeStepCorrectAnswer)
   }
 
+  test("SPARK-33183: remove local sort with range operator") {
+    val inputPlan = Range(1L, 1000L, 1, 10)
+    val orderedPlan = inputPlan.sortBy('id.asc)
+    val optimized = Optimize.execute(orderedPlan.analyze)
+    val correctAnswer = inputPlan.analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
   test("sort should not be removed when there is a node which doesn't guarantee any order") {
     val orderedPlan = testRelation.select('a, 'b)
     val groupedAndResorted = orderedPlan.groupBy('a)(sum('a)).orderBy('a.asc)
@@ -333,4 +380,39 @@ class EliminateSortsSuite extends AnalysisTest {
     val correctAnswer = PushDownOptimizer.execute(noOrderByPlan.analyze)
     comparePlans(optimized, correctAnswer)
   }
+
+  test("SPARK-33183: remove consecutive global sorts with the same ordering") {
+    Seq(
+      (testRelation.orderBy('a.asc).orderBy('a.asc), testRelation.orderBy('a.asc)),
+      (testRelation.orderBy('a.asc, 'b.desc).orderBy('a.asc), testRelation.orderBy('a.asc))
+    ).foreach { case (ordered, answer) =>
+      val optimized = Optimize.execute(ordered.analyze)
+      comparePlans(optimized, answer.analyze)
+    }
+  }
+
+  test("SPARK-33183: remove consecutive local sorts with the same ordering") {
+    val orderedPlan = testRelation.sortBy('a.asc).sortBy('a.asc).sortBy('a.asc)
+    val optimized = Optimize.execute(orderedPlan.analyze)
+    val correctAnswer = testRelation.sortBy('a.asc).analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("SPARK-33183: remove consecutive local sorts with different ordering") {
+    val orderedPlan = testRelation.sortBy('b.asc).sortBy('a.desc).sortBy('a.asc)
+    val optimized = Optimize.execute(orderedPlan.analyze)
+    val correctAnswer = testRelation.sortBy('a.asc).analyze
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("SPARK-33183: should keep global sort when child is a local sort with the same ordering") {
+    val correctAnswer = testRelation.orderBy('a.asc).analyze
+    Seq(
+      testRelation.sortBy('a.asc).orderBy('a.asc),
+      testRelation.orderBy('a.asc).sortBy('a.asc).orderBy('a.asc)
+    ).foreach { ordered =>
+      val optimized = Optimize.execute(ordered.analyze)
+      comparePlans(optimized, correctAnswer)
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index c37e1e92c8576..b998430c1602d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -343,6 +343,7 @@ object QueryExecution {
       PlanDynamicPruningFilters,
       PlanSubqueries,
       RemoveRedundantProjects,
+      RemoveRedundantSorts,
       EnsureRequirements,
       DisableUnnecessaryBucketedScan,
       ApplyColumnarRulesAndInsertTransitions(sparkSession.sessionState.columnarRules),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantSorts.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantSorts.scala
new file mode 100644
index 0000000000000..87c08ec865fe9
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantSorts.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.apache.spark.sql.catalyst.expressions.SortOrder
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * Remove redundant SortExec node from the spark plan. A sort node is redundant when
+ * its child satisfies both its sort orders and its required child distribution. Note
+ * this rule differs from the Optimizer rule EliminateSorts in that this rule also checks
+ * if the child satisfies the required distribution so that it is safe to remove not only a
+ * local sort but also a global sort when its child already satisfies required sort orders.
+ */
+object RemoveRedundantSorts extends Rule[SparkPlan] {
+  def apply(plan: SparkPlan): SparkPlan = {
+    if (!conf.getConf(SQLConf.REMOVE_REDUNDANT_SORTS_ENABLED)) {
+      plan
+    } else {
+      removeSorts(plan)
+    }
+  }
+
+  private def removeSorts(plan: SparkPlan): SparkPlan = plan transform {
+    case s @ SortExec(orders, _, child, _)
+        if SortOrder.orderingSatisfies(child.outputOrdering, orders) &&
+          child.outputPartitioning.satisfies(s.requiredChildDistribution.head) =>
+      child
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index d30e16276b9f3..a4a58dfe1de53 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -83,6 +83,7 @@ case class AdaptiveSparkPlanExec(
   @transient private val optimizer = new AQEOptimizer(conf)
 
   @transient private val removeRedundantProjects = RemoveRedundantProjects
+  @transient private val removeRedundantSorts = RemoveRedundantSorts
   @transient private val ensureRequirements = EnsureRequirements
 
   // A list of physical plan rules to be applied before creation of query stages. The physical
@@ -90,6 +91,7 @@ case class AdaptiveSparkPlanExec(
   // Exchange nodes) after running these rules.
   private def queryStagePreparationRules: Seq[Rule[SparkPlan]] = Seq(
     removeRedundantProjects,
+    removeRedundantSorts,
     ensureRequirements
   ) ++ context.session.sessionState.queryStagePrepRules
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index 048466b3d8637..be29acb6d3a7c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -234,19 +234,6 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
     }
   }
 
-  test("SPARK-23375: Cached sorted data doesn't need to be re-sorted") {
-    val query = testData.select('key, 'value).sort('key.desc).cache()
-    assert(query.queryExecution.optimizedPlan.isInstanceOf[InMemoryRelation])
-    val resorted = query.sort('key.desc)
-    assert(resorted.queryExecution.optimizedPlan.collect { case s: Sort => s}.isEmpty)
-    assert(resorted.select('key).collect().map(_.getInt(0)).toSeq ==
-      (1 to 100).reverse)
-    // with a different order, the sort is needed
-    val sortedAsc = query.sort('key)
-    assert(sortedAsc.queryExecution.optimizedPlan.collect { case s: Sort => s}.size == 1)
-    assert(sortedAsc.select('key).collect().map(_.getInt(0)).toSeq == (1 to 100))
-  }
-
   test("PartitioningCollection") {
     withTempView("normal", "small", "tiny") {
       testData.createOrReplaceTempView("normal")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala
new file mode 100644
index 0000000000000..54c5a33441900
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.apache.spark.sql.{DataFrame, QueryTest}
+import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, DisableAdaptiveExecutionSuite, EnableAdaptiveExecutionSuite}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+
+abstract class RemoveRedundantSortsSuiteBase
+    extends QueryTest
+    with SharedSparkSession
+    with AdaptiveSparkPlanHelper {
+  import testImplicits._
+
+  private def checkNumSorts(df: DataFrame, count: Int): Unit = {
+    val plan = df.queryExecution.executedPlan
+    assert(collectWithSubqueries(plan) { case s: SortExec => s }.length == count)
+  }
+
+  private def checkSorts(query: String, enabledCount: Int, disabledCount: Int): Unit = {
+    withSQLConf(SQLConf.REMOVE_REDUNDANT_SORTS_ENABLED.key -> "true") {
+      val df = sql(query)
+      checkNumSorts(df, enabledCount)
+      val result = df.collect()
+      withSQLConf(SQLConf.REMOVE_REDUNDANT_SORTS_ENABLED.key -> "false") {
+        val df = sql(query)
+        checkNumSorts(df, disabledCount)
+        checkAnswer(df, result)
+      }
+    }
+  }
+
+  test("remove redundant sorts with limit") {
+    withTempView("t") {
+      spark.range(100).select('id as "key").createOrReplaceTempView("t")
+      val query =
+        """
+          |SELECT key FROM
+          | (SELECT key FROM t WHERE key > 10 ORDER BY key DESC LIMIT 10)
+          |ORDER BY key DESC
+          |""".stripMargin
+      checkSorts(query, 0, 1)
+    }
+  }
+
+  test("remove redundant sorts with broadcast hash join") {
+    withTempView("t1", "t2") {
+      spark.range(1000).select('id as "key").createOrReplaceTempView("t1")
+      spark.range(1000).select('id as "key").createOrReplaceTempView("t2")
+
+      val queryTemplate = """
+        |SELECT /*+ BROADCAST(%s) */ t1.key FROM
+        | (SELECT key FROM t1 WHERE key > 10 ORDER BY key DESC LIMIT 10) t1
+        |JOIN
+        | (SELECT key FROM t2 WHERE key > 50 ORDER BY key DESC LIMIT 100) t2
+        |ON t1.key = t2.key
+        |ORDER BY %s
+      """.stripMargin
+
+      // No sort should be removed since the stream side (t2) order DESC
+      // does not satisfy the required sort order ASC.
+      val buildLeftOrderByRightAsc = queryTemplate.format("t1", "t2.key ASC")
+      checkSorts(buildLeftOrderByRightAsc, 1, 1)
+
+      // The top sort node should be removed since the stream side (t2) order DESC already
+      // satisfies the required sort order DESC.
+      val buildLeftOrderByRightDesc = queryTemplate.format("t1", "t2.key DESC")
+      checkSorts(buildLeftOrderByRightDesc, 0, 1)
+
+      // No sort should be removed since the sort ordering from broadcast-hash join is based
+      // on the stream side (t2) and the required sort order is from t1.
+      val buildLeftOrderByLeftDesc = queryTemplate.format("t1", "t1.key DESC")
+      checkSorts(buildLeftOrderByLeftDesc, 1, 1)
+
+      // The top sort node should be removed since the stream side (t1) order DESC already
+      // satisfies the required sort order DESC.
+      val buildRightOrderByLeftDesc = queryTemplate.format("t2", "t1.key DESC")
+      checkSorts(buildRightOrderByLeftDesc, 0, 1)
+    }
+  }
+
+  test("remove redundant sorts with sort merge join") {
+    withTempView("t1", "t2") {
+      spark.range(1000).select('id as "key").createOrReplaceTempView("t1")
+      spark.range(1000).select('id as "key").createOrReplaceTempView("t2")
+      val query = """
+        |SELECT /*+ MERGE(t1) */ t1.key FROM
+        | (SELECT key FROM t1 WHERE key > 10 ORDER BY key DESC LIMIT 10) t1
+        |JOIN
+        | (SELECT key FROM t2 WHERE key > 50 ORDER BY key DESC LIMIT 100) t2
+        |ON t1.key = t2.key
+        |ORDER BY t1.key
+      """.stripMargin
+
+      val queryAsc = query + " ASC"
+      checkSorts(queryAsc, 2, 3)
+
+      // The top level sort should not be removed since the child output ordering is ASC and
+      // the required ordering is DESC.
+      val queryDesc = query + " DESC"
+      checkSorts(queryDesc, 3, 3)
+    }
+  }
+
+  test("cached sorted data doesn't need to be re-sorted") {
+    withSQLConf(SQLConf.REMOVE_REDUNDANT_SORTS_ENABLED.key -> "true") {
+      val df = spark.range(1000).select('id as "key").sort('key.desc).cache()
+      val resorted = df.sort('key.desc)
+      val sortedAsc = df.sort('key.asc)
+      checkNumSorts(df, 0)
+      checkNumSorts(resorted, 0)
+      checkNumSorts(sortedAsc, 1)
+      val result = resorted.collect()
+      withSQLConf(SQLConf.REMOVE_REDUNDANT_SORTS_ENABLED.key -> "false") {
+        val resorted = df.sort('key.desc)
+        checkNumSorts(resorted, 1)
+        checkAnswer(resorted, result)
+      }
+    }
+  }
+}
+
+class RemoveRedundantSortsSuite extends RemoveRedundantSortsSuiteBase
+  with DisableAdaptiveExecutionSuite
+
+class RemoveRedundantSortsSuiteAE extends RemoveRedundantSortsSuiteBase
+  with EnableAdaptiveExecutionSuite

From 3c3ad5f7c00f6f68bc659d4cf7020fa944b7bc69 Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Wed, 28 Oct 2020 06:40:23 +0000
Subject: [PATCH 0345/1009] [SPARK-32934][SQL] Improve the performance for
 NTH_VALUE and reactor the OffsetWindowFunction

### What changes were proposed in this pull request?
Spark SQL supports some window function like `NTH_VALUE`.
If we specify window frame like `UNBOUNDED PRECEDING AND CURRENT ROW` or `UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING`, we can elimate some calculations.
For example: if we execute the SQL show below:
```
SELECT NTH_VALUE(col,
         2) OVER(ORDER BY rank UNBOUNDED PRECEDING
        AND CURRENT ROW)
FROM tab;
```
The output for row number greater than 1, return the fixed value. otherwise, return null. So we just calculate the value once and notice whether the row number less than 2.
`UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING` is simpler.

### Why are the changes needed?
Improve the performance for `NTH_VALUE`, `FIRST_VALUE` and `LAST_VALUE`.

### Does this PR introduce _any_ user-facing change?
 'No'.

### How was this patch tested?
Jenkins test.

Closes #29800 from beliefer/optimize-nth_value.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: beliefer <beliefer@163.com>
Co-authored-by: Jiaan Geng <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |   2 +-
 .../sql/catalyst/analysis/CheckAnalysis.scala |   5 +-
 .../expressions/windowExpressions.scala       |  86 +++++++-----
 .../sql/execution/window/WindowExec.scala     |   8 +-
 .../sql/execution/window/WindowExecBase.scala |  43 +++++-
 .../window/WindowFunctionFrame.scala          | 123 ++++++++++++++++--
 .../resources/sql-tests/inputs/window.sql     |  30 +++++
 .../sql-tests/results/window.sql.out          |  98 +++++++++++++-
 .../sql/DataFrameWindowFunctionsSuite.scala   |  17 +--
 9 files changed, 353 insertions(+), 59 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 61c077fd12aa2..c2116a2b8f471 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -2978,7 +2978,7 @@ class Analyzer(
    */
   object ResolveWindowFrame extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions {
-      case WindowExpression(wf: OffsetWindowFunction,
+      case WindowExpression(wf: FrameLessOffsetWindowFunction,
         WindowSpecDefinition(_, _, f: SpecifiedWindowFrame)) if wf.frame != f =>
         failAnalysis(s"Cannot specify window frame for ${wf.prettyName} function")
       case WindowExpression(wf: WindowFunction, WindowSpecDefinition(_, _, f: SpecifiedWindowFrame))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 351be32ee438e..d261f26072bcc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -166,7 +166,7 @@ trait CheckAnalysis extends PredicateHelper {
           case w @ WindowExpression(AggregateExpression(_, _, true, _, _), _) =>
             failAnalysis(s"Distinct window functions are not supported: $w")
 
-          case w @ WindowExpression(_: OffsetWindowFunction,
+          case w @ WindowExpression(_: FrameLessOffsetWindowFunction,
             WindowSpecDefinition(_, order, frame: SpecifiedWindowFrame))
              if order.isEmpty || !frame.isOffset =>
             failAnalysis("An offset window function can only be evaluated in an ordered " +
@@ -176,7 +176,8 @@ trait CheckAnalysis extends PredicateHelper {
             // Only allow window functions with an aggregate expression or an offset window
             // function or a Pandas window UDF.
             e match {
-              case _: AggregateExpression | _: OffsetWindowFunction | _: AggregateWindowFunction =>
+              case _: AggregateExpression | _: FrameLessOffsetWindowFunction |
+                  _: AggregateWindowFunction =>
                 w
               case f: PythonUDF if PythonUDF.isWindowPandasUDF(f) =>
                 w
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index bc0b4ac018f9e..168585dc3de00 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -327,25 +327,14 @@ object WindowFunctionType {
   }
 }
 
-
-/**
- * An offset window function is a window function that returns the value of the input column offset
- * by a number of rows within the partition. For instance: an OffsetWindowfunction for value x with
- * offset -2, will get the value of x 2 rows back in the partition.
- */
-abstract class OffsetWindowFunction
-  extends Expression with WindowFunction with Unevaluable with ImplicitCastInputTypes {
+trait OffsetWindowSpec extends Expression {
   /**
    * Input expression to evaluate against a row which a number of rows below or above (depending on
-   * the value and sign of the offset) the current row.
+   * the value and sign of the offset) the starting row (current row if isRelative=true, or the
+   * first row of the window frame otherwise).
    */
   val input: Expression
 
-  /**
-   * Default result value for the function when the `offset`th row does not exist.
-   */
-  val default: Expression
-
   /**
    * (Foldable) expression that contains the number of rows between the current row and the row
    * where the input expression is evaluated. If `offset` is a positive integer, it means that
@@ -355,6 +344,36 @@ abstract class OffsetWindowFunction
    */
   val offset: Expression
 
+  /**
+   * Default result value for the function when the `offset`th row does not exist.
+   */
+  val default: Expression
+
+  /**
+   * An optional specification that indicates the offset window function should skip null values in
+   * the determination of which row to use.
+   */
+  val ignoreNulls: Boolean
+
+  /**
+   * Whether the offset is starts with the current row. If `isRelative` is true, `offset` means
+   * the offset is start with the current row. otherwise, the offset is starts with the first
+   * row of the entire window frame.
+   */
+  val isRelative: Boolean
+
+  lazy val fakeFrame = SpecifiedWindowFrame(RowFrame, offset, offset)
+}
+
+/**
+ * A frameless offset window function is a window function that cannot specify window frame and
+ * returns the value of the input column offset by a number of rows within the partition.
+ * For instance: a FrameLessOffsetWindowFunction for value x with offset -2, will get the value of
+ * x 2 rows back in the partition.
+ */
+abstract class FrameLessOffsetWindowFunction
+  extends WindowFunction with OffsetWindowSpec with Unevaluable with ImplicitCastInputTypes {
+
   override def children: Seq[Expression] = Seq(input, offset, default)
 
   /*
@@ -370,7 +389,11 @@ abstract class OffsetWindowFunction
 
   override def nullable: Boolean = default == null || default.nullable || input.nullable
 
-  override lazy val frame: WindowFrame = SpecifiedWindowFrame(RowFrame, offset, offset)
+  override val ignoreNulls = false
+
+  override val isRelative = true
+
+  override lazy val frame: WindowFrame = fakeFrame
 
   override def checkInputDataTypes(): TypeCheckResult = {
     val check = super.checkInputDataTypes()
@@ -425,7 +448,7 @@ abstract class OffsetWindowFunction
   group = "window_funcs")
 // scalastyle:on line.size.limit line.contains.tab
 case class Lead(input: Expression, offset: Expression, default: Expression)
-    extends OffsetWindowFunction {
+    extends FrameLessOffsetWindowFunction {
 
   def this(input: Expression, offset: Expression) = this(input, offset, Literal(null))
 
@@ -467,7 +490,7 @@ case class Lead(input: Expression, offset: Expression, default: Expression)
   group = "window_funcs")
 // scalastyle:on line.size.limit line.contains.tab
 case class Lag(input: Expression, inputOffset: Expression, default: Expression)
-    extends OffsetWindowFunction {
+    extends FrameLessOffsetWindowFunction {
 
   def this(input: Expression, offset: Expression) = this(input, offset, Literal(null))
 
@@ -579,7 +602,6 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction {
 }
 
 // scalastyle:off line.size.limit line.contains.tab
-
 @ExpressionDescription(
   usage = """
     _FUNC_(input[, offset]) - Returns the value of `input` at the row that is the `offset`th row
@@ -607,12 +629,16 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction {
   since = "3.1.0",
   group = "window_funcs")
 // scalastyle:on line.size.limit line.contains.tab
-case class NthValue(input: Expression, offsetExpr: Expression, ignoreNulls: Boolean)
-    extends AggregateWindowFunction with ImplicitCastInputTypes {
+case class NthValue(input: Expression, offset: Expression, ignoreNulls: Boolean)
+    extends AggregateWindowFunction with OffsetWindowSpec with ImplicitCastInputTypes {
 
   def this(child: Expression, offset: Expression) = this(child, offset, false)
 
-  override def children: Seq[Expression] = input :: offsetExpr :: Nil
+  override lazy val default = Literal.create(null, input.dataType)
+
+  override val isRelative = false
+
+  override def children: Seq[Expression] = input :: offset :: Nil
 
   override val frame: WindowFrame = UnspecifiedFrame
 
@@ -624,35 +650,35 @@ case class NthValue(input: Expression, offsetExpr: Expression, ignoreNulls: Bool
     val check = super.checkInputDataTypes()
     if (check.isFailure) {
       check
-    } else if (!offsetExpr.foldable) {
-      TypeCheckFailure(s"Offset expression '$offsetExpr' must be a literal.")
-    } else if (offset <= 0) {
+    } else if (!offset.foldable) {
+      TypeCheckFailure(s"Offset expression '$offset' must be a literal.")
+    } else if (offsetVal <= 0) {
       TypeCheckFailure(
-        s"The 'offset' argument of nth_value must be greater than zero but it is $offset.")
+        s"The 'offset' argument of nth_value must be greater than zero but it is $offsetVal.")
     } else {
       TypeCheckSuccess
     }
   }
 
-  private lazy val offset = offsetExpr.eval().asInstanceOf[Int].toLong
+  private lazy val offsetVal = offset.eval().asInstanceOf[Int].toLong
   private lazy val result = AttributeReference("result", input.dataType)()
   private lazy val count = AttributeReference("count", LongType)()
   override lazy val aggBufferAttributes: Seq[AttributeReference] = result :: count :: Nil
 
   override lazy val initialValues: Seq[Literal] = Seq(
-    /* result = */ Literal.create(null, input.dataType),
+    /* result = */ default,
     /* count = */ Literal(1L)
   )
 
   override lazy val updateExpressions: Seq[Expression] = {
     if (ignoreNulls) {
       Seq(
-        /* result = */ If(count === offset && input.isNotNull, input, result),
+        /* result = */ If(count === offsetVal && input.isNotNull, input, result),
         /* count = */ If(input.isNull, count, count + 1L)
       )
     } else {
       Seq(
-        /* result = */ If(count === offset, input, result),
+        /* result = */ If(count === offsetVal, input, result),
         /* count = */ count + 1L
       )
     }
@@ -662,7 +688,7 @@ case class NthValue(input: Expression, offsetExpr: Expression, ignoreNulls: Bool
 
   override def prettyName: String = "nth_value"
   override def sql: String =
-    s"$prettyName(${input.sql}, ${offsetExpr.sql})${if (ignoreNulls) " ignore nulls" else ""}"
+    s"$prettyName(${input.sql}, ${offset.sql})${if (ignoreNulls) " ignore nulls" else ""}"
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
index eaca55df08d06..439c31a47fd3b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
@@ -57,8 +57,12 @@ import org.apache.spark.sql.types.{CalendarIntervalType, DateType, IntegerType,
  *     3. CURRENT ROW AND 1 FOLLOWING
  *     4. 1 PRECEDING AND 1 FOLLOWING
  *     5. 1 FOLLOWING AND 2 FOLLOWING
- * - Offset frame: The frame consist of one row, which is an offset number of rows away from the
- *   current row. Only [[OffsetWindowFunction]]s can be processed in an offset frame.
+ * - Offset frame: The frame consist of one row, which is an offset number of rows. There are three
+ * implement of offset frame.
+ *     1. [[FrameLessOffsetWindowFunction]] returns the value of the input column offset by a number
+ *        of rows according to the current row.
+ *     2. [[UnboundedOffsetWindowFunctionFrame]] and [[UnboundedPrecedingOffsetWindowFunctionFrame]]
+ *       returns the value of the input column offset by a number of rows within the frame.
  *
  * Different frame boundaries can be used in Growing, Shrinking and Moving frames. A frame
  * boundary can be either Row or Range based:
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
index ed055bb801ae5..f0b99c1522aa1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
@@ -136,8 +136,15 @@ trait WindowExecBase extends UnaryExecNode {
           val frame = spec.frameSpecification.asInstanceOf[SpecifiedWindowFrame]
           function match {
             case AggregateExpression(f, _, _, _, _) => collect("AGGREGATE", frame, e, f)
+            case f: FrameLessOffsetWindowFunction => collect("FRAME_LESS_OFFSET", frame, e, f)
+            case f: OffsetWindowSpec if !f.ignoreNulls &&
+              frame.frameType == RowFrame && frame.lower == UnboundedPreceding =>
+              frame.upper match {
+                case UnboundedFollowing => collect("UNBOUNDED_OFFSET", f.fakeFrame, e, f)
+                case CurrentRow => collect("UNBOUNDED_PRECEDING_OFFSET", f.fakeFrame, e, f)
+                case _ => collect("AGGREGATE", frame, e, f)
+              }
             case f: AggregateWindowFunction => collect("AGGREGATE", frame, e, f)
-            case f: OffsetWindowFunction => collect("OFFSET", frame, e, f)
             case f: PythonUDF => collect("AGGREGATE", frame, e, f)
             case f => sys.error(s"Unsupported window function: $f")
           }
@@ -171,18 +178,42 @@ trait WindowExecBase extends UnaryExecNode {
 
         // Create the factory to produce WindowFunctionFrame.
         val factory = key match {
-          // Offset Frame
-          case ("OFFSET", _, IntegerLiteral(offset), _) =>
+          // Frameless offset Frame
+          case ("FRAME_LESS_OFFSET", _, IntegerLiteral(offset), _) =>
             target: InternalRow =>
-              new OffsetWindowFunctionFrame(
+              new FrameLessOffsetWindowFunctionFrame(
                 target,
                 ordinal,
-                // OFFSET frame functions are guaranteed be OffsetWindowFunctions.
-                functions.map(_.asInstanceOf[OffsetWindowFunction]),
+                // OFFSET frame functions are guaranteed be OffsetWindowSpec.
+                functions.map(_.asInstanceOf[OffsetWindowSpec]),
                 child.output,
                 (expressions, schema) =>
                   MutableProjection.create(expressions, schema),
                 offset)
+          case ("UNBOUNDED_OFFSET", _, IntegerLiteral(offset), _) =>
+            target: InternalRow => {
+              new UnboundedOffsetWindowFunctionFrame(
+                target,
+                ordinal,
+                // OFFSET frame functions are guaranteed be OffsetWindowSpec.
+                functions.map(_.asInstanceOf[OffsetWindowSpec]),
+                child.output,
+                (expressions, schema) =>
+                  MutableProjection.create(expressions, schema),
+                offset)
+            }
+          case ("UNBOUNDED_PRECEDING_OFFSET", _, IntegerLiteral(offset), _) =>
+            target: InternalRow => {
+              new UnboundedPrecedingOffsetWindowFunctionFrame(
+                target,
+                ordinal,
+                // OFFSET frame functions are guaranteed be OffsetWindowSpec.
+                functions.map(_.asInstanceOf[OffsetWindowSpec]),
+                child.output,
+                (expressions, schema) =>
+                  MutableProjection.create(expressions, schema),
+                offset)
+            }
 
           // Entire Partition Frame.
           case ("AGGREGATE", _, UnboundedPreceding, UnboundedFollowing) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
index dc1b919feefe4..e8a83f9772d35 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
@@ -77,31 +77,31 @@ object WindowFunctionFrame {
  * @param newMutableProjection function used to create the projection.
  * @param offset by which rows get moved within a partition.
  */
-final class OffsetWindowFunctionFrame(
+abstract class OffsetWindowFunctionFrameBase(
     target: InternalRow,
     ordinal: Int,
-    expressions: Array[OffsetWindowFunction],
+    expressions: Array[OffsetWindowSpec],
     inputSchema: Seq[Attribute],
     newMutableProjection: (Seq[Expression], Seq[Attribute]) => MutableProjection,
     offset: Int)
   extends WindowFunctionFrame {
 
   /** Rows of the partition currently being processed. */
-  private[this] var input: ExternalAppendOnlyUnsafeRowArray = null
+  protected var input: ExternalAppendOnlyUnsafeRowArray = null
 
   /**
    * An iterator over the [[input]]
    */
-  private[this] var inputIterator: Iterator[UnsafeRow] = _
+  protected var inputIterator: Iterator[UnsafeRow] = _
 
   /** Index of the input row currently used for output. */
-  private[this] var inputIndex = 0
+  protected var inputIndex = 0
 
   /**
    * Create the projection used when the offset row exists.
    * Please note that this project always respect null input values (like PostgreSQL).
    */
-  private[this] val projection = {
+  protected val projection = {
     // Collect the expressions and bind them.
     val inputAttrs = inputSchema.map(_.withNullability(true))
     val boundExpressions = Seq.fill(ordinal)(NoOp) ++ bindReferences(
@@ -112,7 +112,7 @@ final class OffsetWindowFunctionFrame(
   }
 
   /** Create the projection used when the offset row DOES NOT exists. */
-  private[this] val fillDefaultValue = {
+  protected val fillDefaultValue = {
     // Collect the expressions and bind them.
     val inputAttrs: AttributeSeq = inputSchema.map(_.withNullability(true))
     val boundExpressions = Seq.fill(ordinal)(NoOp) ++ expressions.toSeq.map { e =>
@@ -129,6 +129,28 @@ final class OffsetWindowFunctionFrame(
     newMutableProjection(boundExpressions, Nil).target(target)
   }
 
+  override def currentLowerBound(): Int = throw new UnsupportedOperationException()
+
+  override def currentUpperBound(): Int = throw new UnsupportedOperationException()
+}
+
+/**
+ * The frameless offset window frame is an internal window frame just used to optimize the
+ * performance for the window function that returns the value of the input column offset
+ * by a number of rows according to the current row. The internal window frame is not a popular
+ * window frame cannot be specified and used directly by the users. This window frame
+ * calculates frames containing LEAD/LAG statements.
+ */
+class FrameLessOffsetWindowFunctionFrame(
+    target: InternalRow,
+    ordinal: Int,
+    expressions: Array[OffsetWindowSpec],
+    inputSchema: Seq[Attribute],
+    newMutableProjection: (Seq[Expression], Seq[Attribute]) => MutableProjection,
+    offset: Int)
+  extends OffsetWindowFunctionFrameBase(
+    target, ordinal, expressions, inputSchema, newMutableProjection, offset) {
+
   override def prepare(rows: ExternalAppendOnlyUnsafeRowArray): Unit = {
     input = rows
     inputIterator = input.generateIterator()
@@ -151,10 +173,93 @@ final class OffsetWindowFunctionFrame(
     }
     inputIndex += 1
   }
+}
 
-  override def currentLowerBound(): Int = throw new UnsupportedOperationException()
+/**
+ * The unbounded offset window frame is an internal window frame just used to optimize the
+ * performance for the window function that returns the value of the input column offset
+ * by a number of rows within the frame and has specified ROWS BETWEEN UNBOUNDED PRECEDING
+ * AND UNBOUNDED FOLLOWING. The internal window frame is not a popular window frame cannot be
+ * specified and used directly by the users.
+ * The unbounded offset window frame calculates frames containing NTH_VALUE statements.
+ * The unbounded offset window frame return the same value for all rows in the window partition.
+ */
+class UnboundedOffsetWindowFunctionFrame(
+    target: InternalRow,
+    ordinal: Int,
+    expressions: Array[OffsetWindowSpec],
+    inputSchema: Seq[Attribute],
+    newMutableProjection: (Seq[Expression], Seq[Attribute]) => MutableProjection,
+    offset: Int)
+  extends OffsetWindowFunctionFrameBase(
+    target, ordinal, expressions, inputSchema, newMutableProjection, offset) {
 
-  override def currentUpperBound(): Int = throw new UnsupportedOperationException()
+  override def prepare(rows: ExternalAppendOnlyUnsafeRowArray): Unit = {
+    input = rows
+    if (offset > input.length) {
+      fillDefaultValue(EmptyRow)
+    } else {
+      inputIterator = input.generateIterator()
+      // drain the first few rows if offset is larger than one
+      inputIndex = 0
+      while (inputIndex < offset - 1) {
+        if (inputIterator.hasNext) inputIterator.next()
+        inputIndex += 1
+      }
+      val r = WindowFunctionFrame.getNextOrNull(inputIterator)
+      projection(r)
+    }
+  }
+
+  override def write(index: Int, current: InternalRow): Unit = {
+    // The results are the same for each row in the partition, and have been evaluated in prepare.
+    // Don't need to recalculate here.
+  }
+}
+
+/**
+ * The unbounded preceding offset window frame is an internal window frame just used to optimize
+ * the performance for the window function that returns the value of the input column offset
+ * by a number of rows within the frame and has specified ROWS BETWEEN UNBOUNDED PRECEDING
+ * AND CURRENT ROW. The internal window frame is not a popular window frame cannot be specified
+ * and used directly by the users.
+ * The unbounded preceding offset window frame calculates frames containing NTH_VALUE statements.
+ * The unbounded preceding offset window frame return the same value for rows which index
+ * (starting from 1) equal to or greater than offset in the window partition.
+ */
+class UnboundedPrecedingOffsetWindowFunctionFrame(
+    target: InternalRow,
+    ordinal: Int,
+    expressions: Array[OffsetWindowSpec],
+    inputSchema: Seq[Attribute],
+    newMutableProjection: (Seq[Expression], Seq[Attribute]) => MutableProjection,
+    offset: Int)
+  extends OffsetWindowFunctionFrameBase(
+    target, ordinal, expressions, inputSchema, newMutableProjection, offset) {
+
+  var selectedRow: UnsafeRow = null
+
+  override def prepare(rows: ExternalAppendOnlyUnsafeRowArray): Unit = {
+    input = rows
+    inputIterator = input.generateIterator()
+    // drain the first few rows if offset is larger than one
+    inputIndex = 0
+    while (inputIndex < offset - 1) {
+      if (inputIterator.hasNext) inputIterator.next()
+      inputIndex += 1
+    }
+    if (inputIndex < input.length) {
+      selectedRow = WindowFunctionFrame.getNextOrNull(inputIterator)
+    }
+  }
+
+  override def write(index: Int, current: InternalRow): Unit = {
+    if (index >= inputIndex && selectedRow != null) {
+      projection(selectedRow)
+    } else {
+      fillDefaultValue(EmptyRow)
+    }
+  }
 }
 
 /**
diff --git a/sql/core/src/test/resources/sql-tests/inputs/window.sql b/sql/core/src/test/resources/sql-tests/inputs/window.sql
index 5de6db210ce36..c1be5fb27e6fa 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/window.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/window.sql
@@ -165,6 +165,16 @@ FROM
     basic_pays
 ORDER BY salary DESC;
 
+SELECT
+    employee_name,
+    salary,
+    nth_value(employee_name, 2) OVER (
+      ORDER BY salary DESC
+      ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) second_highest_salary
+FROM
+    basic_pays
+ORDER BY salary DESC;
+
 SELECT
     employee_name,
     salary,
@@ -205,6 +215,26 @@ FROM
     basic_pays
 ORDER BY salary DESC;
 
+SELECT
+    employee_name,
+    salary,
+    nth_value(employee_name, 2) OVER (
+      ORDER BY salary DESC
+      ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) second_highest_salary
+FROM
+    basic_pays
+ORDER BY salary DESC;
+
+SELECT
+    employee_name,
+    salary,
+    nth_value(employee_name, 2) OVER (
+      ORDER BY salary DESC
+      ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) second_highest_salary
+FROM
+    basic_pays
+ORDER BY salary DESC;
+
 SELECT
 	employee_name,
 	department,
diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out
index 028dd7a12d25d..f6506a77e239c 100644
--- a/sql/core/src/test/resources/sql-tests/results/window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 32
+-- Number of queries: 35
 
 
 -- !query
@@ -479,6 +479,38 @@ Anthony Bow	6627	Gerard Bondur
 Leslie Thompson	5186	Gerard Bondur
 
 
+-- !query
+SELECT
+    employee_name,
+    salary,
+    nth_value(employee_name, 2) OVER (
+      ORDER BY salary DESC
+      ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) second_highest_salary
+FROM
+    basic_pays
+ORDER BY salary DESC
+-- !query schema
+struct<employee_name:string,salary:int,second_highest_salary:string>
+-- !query output
+Larry Bott	11798	NULL
+Gerard Bondur	11472	Gerard Bondur
+Pamela Castillo	11303	Gerard Bondur
+Barry Jones	10586	Gerard Bondur
+George Vanauf	10563	Gerard Bondur
+Loui Bondur	10449	Gerard Bondur
+Mary Patterson	9998	Gerard Bondur
+Steve Patterson	9441	Gerard Bondur
+Julie Firrelli	9181	Gerard Bondur
+Jeff Firrelli	8992	Gerard Bondur
+William Patterson	8870	Gerard Bondur
+Diane Murphy	8435	Gerard Bondur
+Leslie Jennings	8113	Gerard Bondur
+Gerard Hernandez	6949	Gerard Bondur
+Foon Yue Tseng	6660	Gerard Bondur
+Anthony Bow	6627	Gerard Bondur
+Leslie Thompson	5186	Gerard Bondur
+
+
 -- !query
 SELECT
     employee_name,
@@ -607,6 +639,70 @@ Anthony Bow	6627	Gerard Bondur
 Leslie Thompson	5186	Gerard Bondur
 
 
+-- !query
+SELECT
+    employee_name,
+    salary,
+    nth_value(employee_name, 2) OVER (
+      ORDER BY salary DESC
+      ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) second_highest_salary
+FROM
+    basic_pays
+ORDER BY salary DESC
+-- !query schema
+struct<employee_name:string,salary:int,second_highest_salary:string>
+-- !query output
+Larry Bott	11798	Gerard Bondur
+Gerard Bondur	11472	Gerard Bondur
+Pamela Castillo	11303	Gerard Bondur
+Barry Jones	10586	Gerard Bondur
+George Vanauf	10563	Gerard Bondur
+Loui Bondur	10449	Gerard Bondur
+Mary Patterson	9998	Gerard Bondur
+Steve Patterson	9441	Gerard Bondur
+Julie Firrelli	9181	Gerard Bondur
+Jeff Firrelli	8992	Gerard Bondur
+William Patterson	8870	Gerard Bondur
+Diane Murphy	8435	Gerard Bondur
+Leslie Jennings	8113	Gerard Bondur
+Gerard Hernandez	6949	Gerard Bondur
+Foon Yue Tseng	6660	Gerard Bondur
+Anthony Bow	6627	Gerard Bondur
+Leslie Thompson	5186	Gerard Bondur
+
+
+-- !query
+SELECT
+    employee_name,
+    salary,
+    nth_value(employee_name, 2) OVER (
+      ORDER BY salary DESC
+      ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) second_highest_salary
+FROM
+    basic_pays
+ORDER BY salary DESC
+-- !query schema
+struct<employee_name:string,salary:int,second_highest_salary:string>
+-- !query output
+Larry Bott	11798	Gerard Bondur
+Gerard Bondur	11472	Gerard Bondur
+Pamela Castillo	11303	Gerard Bondur
+Barry Jones	10586	Gerard Bondur
+George Vanauf	10563	Gerard Bondur
+Loui Bondur	10449	Gerard Bondur
+Mary Patterson	9998	Gerard Bondur
+Steve Patterson	9441	Gerard Bondur
+Julie Firrelli	9181	Gerard Bondur
+Jeff Firrelli	8992	Gerard Bondur
+William Patterson	8870	Gerard Bondur
+Diane Murphy	8435	Gerard Bondur
+Leslie Jennings	8113	Gerard Bondur
+Gerard Hernandez	6949	Gerard Bondur
+Foon Yue Tseng	6660	Gerard Bondur
+Anthony Bow	6627	Gerard Bondur
+Leslie Thompson	5186	Gerard Bondur
+
+
 -- !query
 SELECT
 	employee_name,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index 616e333033aa9..207b2963f0b3b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -657,15 +657,16 @@ class DataFrameWindowFunctionsSuite extends QueryTest
         $"order",
         nth_value($"value", 2).over(window),
         nth_value($"value", 2, ignoreNulls = false).over(window),
-        nth_value($"value", 2, ignoreNulls = true).over(window)),
+        nth_value($"value", 2, ignoreNulls = true).over(window),
+        nth_value($"value", 3, ignoreNulls = false).over(window)),
       Seq(
-        Row("a", 0, null, null, null),
-        Row("a", 1, "x", "x", null),
-        Row("a", 2, "x", "x", "y"),
-        Row("a", 3, "x", "x", "y"),
-        Row("a", 4, "x", "x", "y"),
-        Row("b", 1, null, null, null),
-        Row("b", 2, null, null, null)))
+        Row("a", 0, null, null, null, null),
+        Row("a", 1, "x", "x", null, null),
+        Row("a", 2, "x", "x", "y", "y"),
+        Row("a", 3, "x", "x", "y", "y"),
+        Row("a", 4, "x", "x", "y", "y"),
+        Row("b", 1, null, null, null, null),
+        Row("b", 2, null, null, null, null)))
   }
 
   test("nth_value on descending ordered window") {

From 2b8fe6d9ae2fe31d1545da98003f931ee1aa11d5 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Wed, 28 Oct 2020 21:32:09 +0900
Subject: [PATCH 0346/1009] [SPARK-33269][INFRA] Ignore ".bsp/" directory in
 Git

### What changes were proposed in this pull request?

After SBT upgrade into 1.4.0 and above. there is always a ".bsp" directory after sbt starts:
https://github.com/sbt/sbt/releases/tag/v1.4.0
This PR is to put the directory in to `.gitignore`.

### Why are the changes needed?

The ".bsp" directory is an untracked file for git during development. This is annoying.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Manual local test

Closes #30171 from gengliangwang/ignoreBSP.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 82414d1fa35bf..9c145fba1bee9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,7 @@
 *.swp
 *~
 .DS_Store
+.bsp/
 .cache
 .classpath
 .ensime

From b26ae98407c6c017a4061c0c420f48685ddd6163 Mon Sep 17 00:00:00 2001
From: "zky.zhoukeyong" <zky.zhoukeyong@alibaba-inc.com>
Date: Wed, 28 Oct 2020 13:17:28 +0000
Subject: [PATCH 0347/1009] [SPARK-33208][SQL] Update the document of
 SparkSession#sql

Change-Id: I82db1f9e8f667573aa3a03e05152cbed0ea7686b

### What changes were proposed in this pull request?
Update the document of SparkSession#sql, mention that this API eagerly runs DDL/DML commands, but not for SELECT queries.

### Why are the changes needed?
To clarify the behavior of SparkSession#sql.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
No needed.

Closes #30168 from waitinfuture/master.

Authored-by: zky.zhoukeyong <zky.zhoukeyong@alibaba-inc.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala | 4 ++--
 .../src/main/scala/org/apache/spark/sql/SparkSession.scala    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 68ce82d5badda..7cf0b6bb70364 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -641,8 +641,8 @@ class SQLContext private[sql](val sparkSession: SparkSession)
   }
 
   /**
-   * Executes a SQL query using Spark, returning the result as a `DataFrame`. The dialect that is
-   * used for SQL parsing can be configured with 'spark.sql.dialect'.
+   * Executes a SQL query using Spark, returning the result as a `DataFrame`.
+   * This API eagerly runs DDL/DML commands, but not for SELECT queries.
    *
    * @group basic
    * @since 1.3.0
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index b33557dbfdb27..c4aadfb1d66bd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -600,7 +600,7 @@ class SparkSession private(
 
   /**
    * Executes a SQL query using Spark, returning the result as a `DataFrame`.
-   * The dialect that is used for SQL parsing can be configured with 'spark.sql.dialect'.
+   * This API eagerly runs DDL/DML commands, but not for SELECT queries.
    *
    * @since 2.0.0
    */

From a6216e2446b6befc3f6d6b370e694421aadda9dd Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Wed, 28 Oct 2020 08:33:02 -0700
Subject: [PATCH 0348/1009] [SPARK-33268][SQL][PYTHON] Fix bugs for casting
 data from/to PythonUserDefinedType

### What changes were proposed in this pull request?

This PR intends to fix bus for casting data from/to PythonUserDefinedType. A sequence of queries to reproduce this issue is as follows;
```
>>> from pyspark.sql import Row
>>> from pyspark.sql.functions import col
>>> from pyspark.sql.types import *
>>> from pyspark.testing.sqlutils import *
>>>
>>> row = Row(point=ExamplePoint(1.0, 2.0))
>>> df = spark.createDataFrame([row])
>>> df.select(col("point").cast(PythonOnlyUDT()))
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/Users/maropu/Repositories/spark/spark-master/python/pyspark/sql/dataframe.py", line 1402, in select
    jdf = self._jdf.select(self._jcols(*cols))
  File "/Users/maropu/Repositories/spark/spark-master/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1305, in __call__
  File "/Users/maropu/Repositories/spark/spark-master/python/pyspark/sql/utils.py", line 111, in deco
    return f(*a, **kw)
  File "/Users/maropu/Repositories/spark/spark-master/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py", line 328, in get_return_value
py4j.protocol.Py4JJavaError: An error occurred while calling o44.select.
: java.lang.NullPointerException
	at org.apache.spark.sql.types.UserDefinedType.acceptsType(UserDefinedType.scala:84)
	at org.apache.spark.sql.catalyst.expressions.Cast$.canCast(Cast.scala:96)
	at org.apache.spark.sql.catalyst.expressions.CastBase.checkInputDataTypes(Cast.scala:267)
	at org.apache.spark.sql.catalyst.expressions.CastBase.resolved$lzycompute(Cast.scala:290)
	at org.apache.spark.sql.catalyst.expressions.CastBase.resolved(Cast.scala:290)
```
A root cause of this issue is that, since `PythonUserDefinedType#userClassis` always null, `isAssignableFrom` in `UserDefinedType#acceptsType` throws a null exception. To fix it, this PR defines  `acceptsType` in `PythonUserDefinedType` and filters out the null case in `UserDefinedType#acceptsType`.

### Why are the changes needed?

Bug fixes.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Added tests.

Closes #30169 from maropu/FixPythonUDTCast.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 python/pyspark/sql/tests/test_types.py                   | 9 +++++++++
 .../org/apache/spark/sql/types/UserDefinedType.scala     | 9 +++++++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py
index e85e8a6e6d1ee..6b5c1ad6c4e46 100644
--- a/python/pyspark/sql/tests/test_types.py
+++ b/python/pyspark/sql/tests/test_types.py
@@ -27,6 +27,7 @@
 from pyspark.sql import Row
 from pyspark.sql.functions import col
 from pyspark.sql.udf import UserDefinedFunction
+from pyspark.sql.utils import AnalysisException
 from pyspark.sql.types import ByteType, ShortType, IntegerType, FloatType, DateType, \
     TimestampType, MapType, StringType, StructType, StructField, ArrayType, DoubleType, LongType, \
     DecimalType, BinaryType, BooleanType, NullType
@@ -441,6 +442,14 @@ def test_cast_to_string_with_udt(self):
         result = df.select(col('point').cast('string'), col('pypoint').cast('string')).head()
         self.assertEqual(result, Row(point=u'(1.0, 2.0)', pypoint=u'[3.0, 4.0]'))
 
+    def test_cast_to_udt_with_udt(self):
+        from pyspark.sql.functions import col
+        row = Row(point=ExamplePoint(1.0, 2.0), python_only_point=PythonOnlyPoint(1.0, 2.0))
+        df = self.spark.createDataFrame([row])
+        self.assertRaises(AnalysisException, lambda: df.select(col("point").cast(PythonOnlyUDT())))
+        self.assertRaises(AnalysisException,
+                          lambda: df.select(col("python_only_point").cast(ExamplePointUDT())))
+
     def test_struct_type(self):
         struct1 = StructType().add("f1", StringType(), True).add("f2", StringType(), True, None)
         struct2 = StructType([StructField("f1", StringType(), True),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala
index 592ce03606d4b..689c30f6b7fa9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala
@@ -78,8 +78,8 @@ abstract class UserDefinedType[UserType >: Null] extends DataType with Serializa
    */
   override private[spark] def asNullable: UserDefinedType[UserType] = this
 
-  override private[sql] def acceptsType(dataType: DataType) = dataType match {
-    case other: UserDefinedType[_] =>
+  override private[sql] def acceptsType(dataType: DataType): Boolean = dataType match {
+    case other: UserDefinedType[_] if this.userClass != null && other.userClass != null =>
       this.getClass == other.getClass ||
         this.userClass.isAssignableFrom(other.userClass)
     case _ => false
@@ -131,6 +131,11 @@ private[sql] class PythonUserDefinedType(
       ("sqlType" -> sqlType.jsonValue)
   }
 
+  override private[sql] def acceptsType(dataType: DataType): Boolean = dataType match {
+    case other: PythonUserDefinedType => pyUDT == other.pyUDT
+    case _ => false
+  }
+
   override def equals(other: Any): Boolean = other match {
     case that: PythonUserDefinedType => pyUDT == that.pyUDT
     case _ => false

From a744fea3be12f1a53ab553040b95da730210bc88 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan.opensource@gmail.com>
Date: Wed, 28 Oct 2020 10:00:29 -0700
Subject: [PATCH 0349/1009] [SPARK-33267][SQL] Fix NPE issue on 'In' filter
 when one of values contains null

### What changes were proposed in this pull request?

This PR proposes to fix the NPE issue on `In` filter when one of values contain null. In real case, you can trigger this issue when you try to push down the filter with `in (..., null)` against V2 source table. `DataSourceStrategy` caches the mapping (filter instance -> expression) in HashMap, which leverages hash code on the key, hence it could trigger the NPE issue.

### Why are the changes needed?

This is an obvious bug as `In` filter doesn't care about null value when calculating hash code.

### Does this PR introduce _any_ user-facing change?

Yes, previously the query with having `null` in "in" condition against data source V2 source table supporting push down filter failed with NPE, whereas after the PR the query will not fail.

### How was this patch tested?

UT added. The new UT fails without the PR and passes with the PR.

Closes #30170 from HeartSaVioR/SPARK-33267.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../scala/org/apache/spark/sql/sources/filters.scala   |  2 +-
 .../apache/spark/sql/connector/DataSourceV2Suite.scala | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala
index 7533793253513..2b44a3a861ed9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala
@@ -164,7 +164,7 @@ case class In(attribute: String, values: Array[Any]) extends Filter {
     var h = attribute.hashCode
     values.foreach { v =>
       h *= 41
-      h += v.hashCode()
+      h += (if (v != null) v.hashCode() else 0)
     }
     h
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
index ec1ac00d08bf8..ce28e615702db 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
@@ -413,6 +413,16 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
       }
     }
   }
+
+  test("SPARK-33267: push down with condition 'in (..., null)' should not throw NPE") {
+    Seq(classOf[AdvancedDataSourceV2], classOf[JavaAdvancedDataSourceV2]).foreach { cls =>
+      withClue(cls.getName) {
+        val df = spark.read.format(cls.getName).load()
+        // before SPARK-33267 below query just threw NPE
+        df.select('i).where("i in (1, null)").collect()
+      }
+    }
+  }
 }
 
 
From 2639ad43cb8357db235e7fc9ce24930cca7f2525 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 29 Oct 2020 07:37:16 +0900
Subject: [PATCH 0350/1009] [SPARK-33272][SQL] prune the attributes mapping in
 QueryPlan.transformUpWithNewOutput

### What changes were proposed in this pull request?

For complex query plans, `QueryPlan.transformUpWithNewOutput` will keep accumulating the attributes mapping to be propagated, which may hurt performance. This PR prunes the attributes mapping before propagating.

### Why are the changes needed?

A simple perf improvement.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

existing tests

Closes #30173 from cloud-fan/bug.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../spark/sql/catalyst/plans/QueryPlan.scala    | 17 +++++++++++++++--
 .../catalyst/plans/logical/AnalysisHelper.scala |  2 +-
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 3e8467bab0348..b1884eac27f73 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -180,10 +180,14 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
    *             rewrite attribute references in the parent nodes.
    * @param skipCond a boolean condition to indicate if we can skip transforming a plan node to save
    *                 time.
+   * @param canGetOutput a boolean condition to indicate if we can get the output of a plan node
+   *                     to prune the attributes mapping to be propagated. The default value is true
+   *                     as only unresolved logical plan can't get output.
    */
   def transformUpWithNewOutput(
       rule: PartialFunction[PlanType, (PlanType, Seq[(Attribute, Attribute)])],
-      skipCond: PlanType => Boolean = _ => false): PlanType = {
+      skipCond: PlanType => Boolean = _ => false,
+      canGetOutput: PlanType => Boolean = _ => true): PlanType = {
     def rewrite(plan: PlanType): (PlanType, Seq[(Attribute, Attribute)]) = {
       if (skipCond(plan)) {
         plan -> Nil
@@ -237,7 +241,16 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
           val existingAttrMappingSet = transferAttrMapping.map(_._2).toSet
           newValidAttrMapping.filterNot { case (_, a) => existingAttrMappingSet.contains(a) }
         }
-        planAfterRule -> (transferAttrMapping ++ newOtherAttrMapping).toSeq
+        val resultAttrMapping = if (canGetOutput(plan)) {
+          // We propagate the attributes mapping to the parent plan node to update attributes, so
+          // the `newAttr` must be part of this plan's output.
+          (transferAttrMapping ++ newOtherAttrMapping).filter {
+            case (_, newAttr) => planAfterRule.outputSet.contains(newAttr)
+          }
+        } else {
+          transferAttrMapping ++ newOtherAttrMapping
+        }
+        planAfterRule -> resultAttrMapping.toSeq
       }
     }
     rewrite(this)._1
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala
index 30447db1acc04..d8d18b46bcc74 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala
@@ -127,7 +127,7 @@ trait AnalysisHelper extends QueryPlan[LogicalPlan] { self: LogicalPlan =>
       rule: PartialFunction[LogicalPlan, (LogicalPlan, Seq[(Attribute, Attribute)])])
   : LogicalPlan = {
     if (!analyzed) {
-      transformUpWithNewOutput(rule, skipCond = _.analyzed)
+      transformUpWithNewOutput(rule, skipCond = _.analyzed, canGetOutput = _.resolved)
     } else {
       self
     }

From c592ae6ed81cf381512749c43ed688411ef1b431 Mon Sep 17 00:00:00 2001
From: Nathan Wreggit <obbay2@hotmail.com>
Date: Thu, 29 Oct 2020 10:28:50 +0900
Subject: [PATCH 0351/1009] [SQL][MINOR] Update from_unixtime doc

### What changes were proposed in this pull request?
This PR fixes from_unixtime documentation to show that fmt is optional parameter.

### Does this PR introduce _any_ user-facing change?
Yes, documentation update.
**Before change:**
![image](https://user-images.githubusercontent.com/4176173/97497659-18c6cc80-1928-11eb-93d8-453ef627ac7c.png)

**After change:**
![image](https://user-images.githubusercontent.com/4176173/97496153-c5537f00-1925-11eb-8102-457e85e019d5.png)

### How was this patch tested?
Style check using: ./dev/run-tests
Manual check and screenshotting with: ./sql/create-docs.sh
Manual verification of behavior with latest spark-sql binary.

Closes #30176 from Obbay2/from_unixtime_doc.

Authored-by: Nathan Wreggit <obbay2@hotmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../sql/catalyst/expressions/datetimeExpressions.scala     | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 571b0be40c6e6..223d0e661ed3e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -895,17 +895,20 @@ abstract class UnixTime extends ToTimestamp {
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(unix_time, fmt) - Returns `unix_time` in the specified `fmt`.",
+  usage = "_FUNC_(unix_time[, fmt]) - Returns `unix_time` in the specified `fmt`.",
   arguments = """
     Arguments:
       * unix_time - UNIX Timestamp to be converted to the provided format.
       * fmt - Date/time format pattern to follow. See <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">Datetime Patterns</a>
-              for valid date and time format patterns.
+              for valid date and time format patterns. The 'yyyy-MM-dd HH:mm:ss' pattern is used if omitted.
   """,
   examples = """
     Examples:
       > SELECT _FUNC_(0, 'yyyy-MM-dd HH:mm:ss');
        1969-12-31 16:00:00
+
+      > SELECT _FUNC_(0);
+       1969-12-31 16:00:00
   """,
   group = "datetime_funcs",
   since = "1.5.0")

From 9d5e48ea95d1c3017a51ff69584f32a18901b2b5 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Thu, 29 Oct 2020 10:30:41 +0900
Subject: [PATCH 0352/1009] [SPARK-33270][SQL] Return SQL schema instead of
 Catalog string from the `SchemaOfJson` expression

### What changes were proposed in this pull request?
Return schema in SQL format instead of Catalog string from the `SchemaOfJson` expression.

### Why are the changes needed?
In some cases, `from_json()` cannot parse schemas returned by `schema_of_json`, for instance, when JSON fields have spaces (gaps). Such fields will be quoted after the changes, and can be parsed by `from_json()`.

Here is the example:
```scala
val in = Seq("""{"a b": 1}""").toDS()
in.select(from_json('value, schema_of_json("""{"a b": 100}""")) as "parsed")
```
raises the exception:
```
== SQL ==
struct<a b:bigint>
------^^^

	at org.apache.spark.sql.catalyst.parser.ParseException.withCommand(ParseDriver.scala:263)
	at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parse(ParseDriver.scala:130)
	at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parseTableSchema(ParseDriver.scala:76)
	at org.apache.spark.sql.types.DataType$.fromDDL(DataType.scala:131)
	at org.apache.spark.sql.catalyst.expressions.ExprUtils$.evalTypeExpr(ExprUtils.scala:33)
	at org.apache.spark.sql.catalyst.expressions.JsonToStructs.<init>(jsonExpressions.scala:537)
	at org.apache.spark.sql.functions$.from_json(functions.scala:4141)
```

### Does this PR introduce _any_ user-facing change?
Yes. For example, `schema_of_json` for the input `{"col":0}`.

Before: `struct<col:bigint>`
After: `STRUCT<`col`: BIGINT>`

### How was this patch tested?
By existing test suites `JsonFunctionsSuite` and `JsonExpressionsSuite`.

Closes #30172 from MaxGekk/schema_of_json-sql-schema.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 R/pkg/tests/fulltests/test_sparkSQL.R         |  4 ++--
 docs/sql-migration-guide.md                   |  2 ++
 python/pyspark/sql/functions.py               |  4 ++--
 .../expressions/jsonExpressions.scala         |  6 +++---
 .../expressions/JsonExpressionsSuite.scala    |  8 ++++----
 .../sql-tests/results/json-functions.sql.out  |  6 +++---
 .../apache/spark/sql/JsonFunctionsSuite.scala | 19 +++++++++++++------
 7 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 685e6e672bdf9..22bd4133d46a8 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1717,9 +1717,9 @@ test_that("column functions", {
 
   df <- as.DataFrame(list(list("col" = "1")))
   c <- collect(select(df, schema_of_json('{"name":"Bob"}')))
-  expect_equal(c[[1]], "struct<name:string>")
+  expect_equal(c[[1]], "STRUCT<`name`: STRING>")
   c <- collect(select(df, schema_of_json(lit('{"name":"Bob"}'))))
-  expect_equal(c[[1]], "struct<name:string>")
+  expect_equal(c[[1]], "STRUCT<`name`: STRING>")
 
   # Test to_json() supports arrays of primitive types and arrays
   df <- sql("SELECT array(19, 42, 70) as age")
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 124b04fb2bede..ee82d9ac4724b 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -49,6 +49,8 @@ license: |
   - In Spark 3.1, we remove the built-in Hive 1.2. You need to migrate your custom SerDes to Hive 2.3. See [HIVE-15167](https://issues.apache.org/jira/browse/HIVE-15167) for more details.
   
   - In Spark 3.1, loading and saving of timestamps from/to parquet files fails if the timestamps are before 1900-01-01 00:00:00Z, and loaded (saved) as the INT96 type. In Spark 3.0, the actions don't fail but might lead to shifting of the input timestamps due to rebasing from/to Julian to/from Proleptic Gregorian calendar. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.parquet.int96RebaseModeInRead` or/and `spark.sql.legacy.parquet.int96RebaseModeInWrite` to `LEGACY`.
+  
+  - In Spark 3.1, the `schema_of_json` function returns the schema in the SQL format in which field names are quoted. In Spark 3.0, the function returns a catalog string without field quoting and in lower case. 
 
 ## Upgrading from Spark SQL 3.0 to 3.0.1
 
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 22941ab6f1157..68639ff7b6320 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2937,10 +2937,10 @@ def schema_of_json(json, options={}):
 
     >>> df = spark.range(1)
     >>> df.select(schema_of_json(lit('{"a": 0}')).alias("json")).collect()
-    [Row(json='struct<a:bigint>')]
+    [Row(json='STRUCT<`a`: BIGINT>')]
     >>> schema = schema_of_json('{a: 1}', {'allowUnquotedFieldNames':'true'})
     >>> df.select(schema.alias("json")).collect()
-    [Row(json='struct<a:bigint>')]
+    [Row(json='STRUCT<`a`: BIGINT>')]
     """
     if isinstance(json, str):
         col = _create_column_from_literal(json)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index ef02d2db97a3f..39d9eb5a36964 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -741,9 +741,9 @@ case class StructsToJson(
   examples = """
     Examples:
       > SELECT _FUNC_('[{"col":0}]');
-       array<struct<col:bigint>>
+       ARRAY<STRUCT<`col`: BIGINT>>
       > SELECT _FUNC_('[{"col":01}]', map('allowNumericLeadingZeros', 'true'));
-       array<struct<col:bigint>>
+       ARRAY<STRUCT<`col`: BIGINT>>
   """,
   group = "json_funcs",
   since = "2.4.0")
@@ -801,7 +801,7 @@ case class SchemaOfJson(
       }
     }
 
-    UTF8String.fromString(dt.catalogString)
+    UTF8String.fromString(dt.sql)
   }
 
   override def prettyName: String = "schema_of_json"
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
index 6f062dcc9a1ce..b3666936e5855 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
@@ -735,17 +735,17 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with
 
   test("SPARK-24709: infer schema of json strings") {
     checkEvaluation(new SchemaOfJson(Literal.create("""{"col":0}""")),
-      "struct<col:bigint>")
+      "STRUCT<`col`: BIGINT>")
     checkEvaluation(
       new SchemaOfJson(Literal.create("""{"col0":["a"], "col1": {"col2": "b"}}""")),
-      "struct<col0:array<string>,col1:struct<col2:string>>")
+      "STRUCT<`col0`: ARRAY<STRING>, `col1`: STRUCT<`col2`: STRING>>")
   }
 
   test("infer schema of JSON strings by using options") {
     checkEvaluation(
       new SchemaOfJson(Literal.create("""{"col":01}"""),
         CreateMap(Seq(Literal.create("allowNumericLeadingZeros"), Literal.create("true")))),
-      "struct<col:bigint>")
+      "STRUCT<`col`: BIGINT>")
   }
 
   test("parse date with locale") {
@@ -810,7 +810,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with
     }
 
     Seq("en-US", "ko-KR", "ru-RU", "de-DE").foreach {
-        checkDecimalInfer(_, """struct<d:decimal(7,3)>""")
+        checkDecimalInfer(_, """STRUCT<`d`: DECIMAL(7,3)>""")
     }
   }
 
diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
index 34a329627f5dd..3cc45890cf089 100644
--- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
@@ -213,7 +213,7 @@ select schema_of_json('{"c1":0, "c2":[1]}')
 -- !query schema
 struct<schema_of_json({"c1":0, "c2":[1]}):string>
 -- !query output
-struct<c1:bigint,c2:array<bigint>>
+STRUCT<`c1`: BIGINT, `c2`: ARRAY<BIGINT>>
 
 
 -- !query
@@ -352,7 +352,7 @@ select schema_of_json('{"c1":1}', map('primitivesAsString', 'true'))
 -- !query schema
 struct<schema_of_json({"c1":1}):string>
 -- !query output
-struct<c1:string>
+STRUCT<`c1`: STRING>
 
 
 -- !query
@@ -360,7 +360,7 @@ select schema_of_json('{"c1":01, "c2":0.1}', map('allowNumericLeadingZeros', 'tr
 -- !query schema
 struct<schema_of_json({"c1":01, "c2":0.1}):string>
 -- !query output
-struct<c1:bigint,c2:decimal(1,1)>
+STRUCT<`c1`: BIGINT, `c2`: DECIMAL(1,1)>
 
 
 -- !query
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index 5a1a3550d855b..e2a9cf536d154 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -411,7 +411,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
   test("infers schemas using options") {
     val df = spark.range(1)
       .select(schema_of_json(lit("{a:1}"), Map("allowUnquotedFieldNames" -> "true").asJava))
-    checkAnswer(df, Seq(Row("struct<a:bigint>")))
+    checkAnswer(df, Seq(Row("STRUCT<`a`: BIGINT>")))
   }
 
   test("from_json - array of primitive types") {
@@ -684,14 +684,14 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
     val input = regexp_replace(lit("""{"item_id": 1, "item_price": 0.1}"""), "item_", "")
     checkAnswer(
       spark.range(1).select(schema_of_json(input)),
-      Seq(Row("struct<id:bigint,price:double>")))
+      Seq(Row("STRUCT<`id`: BIGINT, `price`: DOUBLE>")))
   }
 
   test("SPARK-31065: schema_of_json - null and empty strings as strings") {
     Seq("""{"id": null}""", """{"id": ""}""").foreach { input =>
       checkAnswer(
         spark.range(1).select(schema_of_json(input)),
-        Seq(Row("struct<id:string>")))
+        Seq(Row("STRUCT<`id`: STRING>")))
     }
   }
 
@@ -703,7 +703,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
         schema_of_json(
           lit("""{"id": "a", "drop": {"drop": null}}"""),
           options.asJava)),
-      Seq(Row("struct<id:string>")))
+      Seq(Row("STRUCT<`id`: STRING>")))
 
     // Array of structs
     checkAnswer(
@@ -711,7 +711,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
         schema_of_json(
           lit("""[{"id": "a", "drop": {"drop": null}}]"""),
           options.asJava)),
-      Seq(Row("array<struct<id:string>>")))
+      Seq(Row("ARRAY<STRUCT<`id`: STRING>>")))
 
     // Other types are not affected.
     checkAnswer(
@@ -719,7 +719,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
         schema_of_json(
           lit("""null"""),
           options.asJava)),
-      Seq(Row("string")))
+      Seq(Row("STRING")))
   }
 
   test("optional datetime parser does not affect json time formatting") {
@@ -747,4 +747,11 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
     val df4 = Seq("""{"c2": [19]}""").toDF("c0")
     checkAnswer(df4.select(from_json($"c0", MapType(StringType, st))), Row(null))
   }
+
+  test("SPARK-33270: infers schema for JSON field with spaces and pass them to from_json") {
+    val in = Seq("""{"a b": 1}""").toDS()
+    val out = in.select(from_json('value, schema_of_json("""{"a b": 100}""")) as "parsed")
+    val expected = new StructType().add("parsed", new StructType().add("a b", LongType))
+    assert(out.schema == expected)
+  }
 }

From b409025641133fe7f352de4beaa2c0b037be3f56 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Thu, 29 Oct 2020 21:02:10 +0900
Subject: [PATCH 0353/1009] [SPARK-33281][SQL] Return SQL schema instead of
 Catalog string from the `SchemaOfCsv` expression

### What changes were proposed in this pull request?
Return schema in SQL format instead of Catalog string from the SchemaOfCsv expression.

### Why are the changes needed?
To unify output of the `schema_of_json()` and `schema_of_csv()`.

### Does this PR introduce _any_ user-facing change?
Yes, they can but `schema_of_csv()` is usually used in combination with `from_csv()`, so, the format of schema shouldn't be much matter.

Before:
```
> SELECT schema_of_csv('1,abc');
  struct<_c0:int,_c1:string>
```

After:
```
> SELECT schema_of_csv('1,abc');
  STRUCT<`_c0`: INT, `_c1`: STRING>
```

### How was this patch tested?
By existing test suites `CsvFunctionsSuite` and `CsvExpressionsSuite`.

Closes #30180 from MaxGekk/schema_of_csv-sql-schema.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 R/pkg/tests/fulltests/test_sparkSQL.R                     | 4 ++--
 docs/sql-migration-guide.md                               | 2 +-
 python/pyspark/sql/functions.py                           | 4 ++--
 .../spark/sql/catalyst/expressions/csvExpressions.scala   | 4 ++--
 .../sql/catalyst/expressions/CsvExpressionsSuite.scala    | 4 ++--
 .../resources/sql-tests/results/csv-functions.sql.out     | 2 +-
 .../scala/org/apache/spark/sql/CsvFunctionsSuite.scala    | 8 ++++----
 7 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 22bd4133d46a8..3a0d359e2ae79 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1682,9 +1682,9 @@ test_that("column functions", {
 
   df <- as.DataFrame(list(list("col" = "1")))
   c <- collect(select(df, schema_of_csv("Amsterdam,2018")))
-  expect_equal(c[[1]], "struct<_c0:string,_c1:int>")
+  expect_equal(c[[1]], "STRUCT<`_c0`: STRING, `_c1`: INT>")
   c <- collect(select(df, schema_of_csv(lit("Amsterdam,2018"))))
-  expect_equal(c[[1]], "struct<_c0:string,_c1:int>")
+  expect_equal(c[[1]], "STRUCT<`_c0`: STRING, `_c1`: INT>")
 
   # Test to_json(), from_json(), schema_of_json()
   df <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people")
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index ee82d9ac4724b..fdc764a93424b 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -50,7 +50,7 @@ license: |
   
   - In Spark 3.1, loading and saving of timestamps from/to parquet files fails if the timestamps are before 1900-01-01 00:00:00Z, and loaded (saved) as the INT96 type. In Spark 3.0, the actions don't fail but might lead to shifting of the input timestamps due to rebasing from/to Julian to/from Proleptic Gregorian calendar. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.parquet.int96RebaseModeInRead` or/and `spark.sql.legacy.parquet.int96RebaseModeInWrite` to `LEGACY`.
   
-  - In Spark 3.1, the `schema_of_json` function returns the schema in the SQL format in which field names are quoted. In Spark 3.0, the function returns a catalog string without field quoting and in lower case. 
+  - In Spark 3.1, the `schema_of_json` and `schema_of_csv` functions return the schema in the SQL format in which field names are quoted. In Spark 3.0, the function returns a catalog string without field quoting and in lower case. 
 
 ## Upgrading from Spark SQL 3.0 to 3.0.1
 
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 68639ff7b6320..69fdf220f19fe 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2964,9 +2964,9 @@ def schema_of_csv(csv, options={}):
 
     >>> df = spark.range(1)
     >>> df.select(schema_of_csv(lit('1|a'), {'sep':'|'}).alias("csv")).collect()
-    [Row(csv='struct<_c0:int,_c1:string>')]
+    [Row(csv='STRUCT<`_c0`: INT, `_c1`: STRING>')]
     >>> df.select(schema_of_csv('1|a', {'sep':'|'}).alias("csv")).collect()
-    [Row(csv='struct<_c0:int,_c1:string>')]
+    [Row(csv='STRUCT<`_c0`: INT, `_c1`: STRING>')]
     """
     if isinstance(csv, str):
         col = _create_column_from_literal(csv)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
index f9ccf3c8c811f..6fad272aa4557 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
@@ -144,7 +144,7 @@ case class CsvToStructs(
   examples = """
     Examples:
       > SELECT _FUNC_('1,abc');
-       struct<_c0:int,_c1:string>
+       STRUCT<`_c0`: INT, `_c1`: STRING>
   """,
   since = "3.0.0")
 case class SchemaOfCsv(
@@ -186,7 +186,7 @@ case class SchemaOfCsv(
     val inferSchema = new CSVInferSchema(parsedOptions)
     val fieldTypes = inferSchema.inferRowType(startType, row)
     val st = StructType(inferSchema.toStructFields(fieldTypes, header))
-    UTF8String.fromString(st.catalogString)
+    UTF8String.fromString(st.sql)
   }
 
   override def prettyName: String = "schema_of_csv"
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala
index 4a19add23fc58..7945974a1f3dc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala
@@ -158,13 +158,13 @@ class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P
   }
 
   test("infer schema of CSV strings") {
-    checkEvaluation(new SchemaOfCsv(Literal.create("1,abc")), "struct<_c0:int,_c1:string>")
+    checkEvaluation(new SchemaOfCsv(Literal.create("1,abc")), "STRUCT<`_c0`: INT, `_c1`: STRING>")
   }
 
   test("infer schema of CSV strings by using options") {
     checkEvaluation(
       new SchemaOfCsv(Literal.create("1|abc"), Map("delimiter" -> "|")),
-      "struct<_c0:int,_c1:string>")
+      "STRUCT<`_c0`: INT, `_c1`: STRING>")
   }
 
   test("to_csv - struct") {
diff --git a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out
index 1e3173172a528..7ba3f712363fe 100644
--- a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out
@@ -82,7 +82,7 @@ select schema_of_csv('1|abc', map('delimiter', '|'))
 -- !query schema
 struct<schema_of_csv(1|abc):string>
 -- !query output
-struct<_c0:int,_c1:string>
+STRUCT<`_c0`: INT, `_c1`: STRING>
 
 
 -- !query
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
index 800e294cca8c4..abccaf19084b2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
@@ -80,16 +80,16 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
   test("schema_of_csv - infers schemas") {
     checkAnswer(
       spark.range(1).select(schema_of_csv(lit("0.1,1"))),
-      Seq(Row("struct<_c0:double,_c1:int>")))
+      Seq(Row("STRUCT<`_c0`: DOUBLE, `_c1`: INT>")))
     checkAnswer(
       spark.range(1).select(schema_of_csv("0.1,1")),
-      Seq(Row("struct<_c0:double,_c1:int>")))
+      Seq(Row("STRUCT<`_c0`: DOUBLE, `_c1`: INT>")))
   }
 
   test("schema_of_csv - infers schemas using options") {
     val df = spark.range(1)
       .select(schema_of_csv(lit("0.1 1"), Map("sep" -> " ").asJava))
-    checkAnswer(df, Seq(Row("struct<_c0:double,_c1:int>")))
+    checkAnswer(df, Seq(Row("STRUCT<`_c0`: DOUBLE, `_c1`: INT>")))
   }
 
   test("to_csv - struct") {
@@ -236,7 +236,7 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
     val input = concat_ws(",", lit(0.1), lit(1))
     checkAnswer(
       spark.range(1).select(schema_of_csv(input)),
-      Seq(Row("struct<_c0:double,_c1:int>")))
+      Seq(Row("STRUCT<`_c0`: DOUBLE, `_c1`: INT>")))
   }
 
   test("optional datetime parser does not affect csv time formatting") {

From 056b62264b024c83840f2bf23f4bb9cabd13e136 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 29 Oct 2020 07:44:44 -0700
Subject: [PATCH 0354/1009] [SPARK-33263][SS] Configurable StateStore
 compression codec

### What changes were proposed in this pull request?

This patch proposes to make StateStore compression codec configurable.

### Why are the changes needed?

Currently the compression codec of StateStore is not configurable and hard-coded to be lz4. It is better if we can follow Spark other modules to configure the compression codec of StateStore. For example, we can choose zstd codec and zstd is configurable with different compression level.

### Does this PR introduce _any_ user-facing change?

Yes, after this change users can config different codec for StateStore.

### How was this patch tested?

Unit test.

Closes #30162 from viirya/SPARK-33263.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../apache/spark/sql/internal/SQLConf.scala   |  12 +++
 .../sql/execution/streaming/OffsetSeq.scala   |  10 ++-
 .../state/HDFSBackedStateStoreProvider.scala  |   8 +-
 .../streaming/state/StateStoreConf.scala      |   3 +
 .../commits/0                                 |   2 +
 .../metadata                                  |   1 +
 .../offsets/0                                 |   3 +
 .../state/0/0/1.delta                         | Bin 0 -> 85 bytes
 .../state/StateStoreCompatibilitySuite.scala  |  84 ++++++++++++++++++
 .../streaming/state/StateStoreSuite.scala     |  13 +--
 10 files changed, 125 insertions(+), 11 deletions(-)
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/commits/0
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/metadata
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/offsets/0
 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/state/0/0/1.delta
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCompatibilitySuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index d84dfcc8f3086..21357a492e39e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1331,6 +1331,16 @@ object SQLConf {
     .intConf
     .createWithDefault(2)
 
+  val STATE_STORE_COMPRESSION_CODEC =
+    buildConf("spark.sql.streaming.stateStore.compression.codec")
+      .internal()
+      .doc("The codec used to compress delta and snapshot files generated by StateStore. " +
+        "By default, Spark provides four codecs: lz4, lzf, snappy, and zstd. You can also " +
+        "use fully qualified class names to specify the codec. Default codec is lz4.")
+      .version("3.1.0")
+      .stringConf
+      .createWithDefault("lz4")
+
   val STREAMING_AGGREGATION_STATE_FORMAT_VERSION =
     buildConf("spark.sql.streaming.aggregation.stateFormatVersion")
       .internal()
@@ -3089,6 +3099,8 @@ class SQLConf extends Serializable with Logging {
 
   def maxBatchesToRetainInMemory: Int = getConf(MAX_BATCHES_TO_RETAIN_IN_MEMORY)
 
+  def stateStoreCompressionCodec: String = getConf(STATE_STORE_COMPRESSION_CODEC)
+
   def parquetFilterPushDown: Boolean = getConf(PARQUET_FILTER_PUSHDOWN_ENABLED)
 
   def parquetFilterPushDownDate: Boolean = getConf(PARQUET_FILTER_PUSHDOWN_DATE_ENABLED)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
index 1c59464268444..7d7ec76467836 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
@@ -89,10 +89,15 @@ case class OffsetSeqMetadata(
 
 object OffsetSeqMetadata extends Logging {
   private implicit val format = Serialization.formats(NoTypeHints)
+  /**
+   * These configs are related to streaming query execution and should not be changed across
+   * batches of a streaming query. The values of these configs are persisted into the offset
+   * log in the checkpoint position.
+   */
   private val relevantSQLConfs = Seq(
     SHUFFLE_PARTITIONS, STATE_STORE_PROVIDER_CLASS, STREAMING_MULTIPLE_WATERMARK_POLICY,
     FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION, STREAMING_AGGREGATION_STATE_FORMAT_VERSION,
-    STREAMING_JOIN_STATE_FORMAT_VERSION)
+    STREAMING_JOIN_STATE_FORMAT_VERSION, STATE_STORE_COMPRESSION_CODEC)
 
   /**
    * Default values of relevant configurations that are used for backward compatibility.
@@ -111,7 +116,8 @@ object OffsetSeqMetadata extends Logging {
     STREAMING_AGGREGATION_STATE_FORMAT_VERSION.key ->
       StreamingAggregationStateManager.legacyVersion.toString,
     STREAMING_JOIN_STATE_FORMAT_VERSION.key ->
-      SymmetricHashJoinStateManager.legacyVersion.toString
+      SymmetricHashJoinStateManager.legacyVersion.toString,
+    STATE_STORE_COMPRESSION_CODEC.key -> "lz4"
   )
 
   def apply(json: String): OffsetSeqMetadata = Serialization.read[OffsetSeqMetadata](json)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
index 31618922e44cf..0a25d51666321 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
@@ -33,7 +33,7 @@ import org.apache.hadoop.fs._
 
 import org.apache.spark.{SparkConf, SparkEnv}
 import org.apache.spark.internal.Logging
-import org.apache.spark.io.LZ4CompressionCodec
+import org.apache.spark.io.CompressionCodec
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.execution.streaming.CheckpointFileManager
 import org.apache.spark.sql.execution.streaming.CheckpointFileManager.CancellableFSDataOutputStream
@@ -696,12 +696,14 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit
   }
 
   private def compressStream(outputStream: DataOutputStream): DataOutputStream = {
-    val compressed = new LZ4CompressionCodec(sparkConf).compressedOutputStream(outputStream)
+    val compressed = CompressionCodec.createCodec(sparkConf, storeConf.compressionCodec)
+      .compressedOutputStream(outputStream)
     new DataOutputStream(compressed)
   }
 
   private def decompressStream(inputStream: DataInputStream): DataInputStream = {
-    val compressed = new LZ4CompressionCodec(sparkConf).compressedInputStream(inputStream)
+    val compressed = CompressionCodec.createCodec(sparkConf, storeConf.compressionCodec)
+      .compressedInputStream(inputStream)
     new DataInputStream(compressed)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala
index 84d0b76ac9158..11043bc81ae3f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala
@@ -52,6 +52,9 @@ class StateStoreConf(
   val formatValidationCheckValue: Boolean =
     extraOptions.getOrElse(StateStoreConf.FORMAT_VALIDATION_CHECK_VALUE_CONFIG, "true") == "true"
 
+  /** The compression codec used to compress delta and snapshot files. */
+  val compressionCodec: String = sqlConf.stateStoreCompressionCodec
+
   /**
    * Additional configurations related to state store. This will capture all configs in
    * SQLConf that start with `spark.sql.streaming.stateStore.` and extraOptions for a specific
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/commits/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/commits/0
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/commits/0
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/metadata
new file mode 100644
index 0000000000000..df5937f800382
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/metadata
@@ -0,0 +1 @@
+{"id":"6bcf6671-d23e-4ad8-824f-98aa5924ce6d"}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/offsets/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/offsets/0
new file mode 100644
index 0000000000000..d12f52147dd6a
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/offsets/0
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1603918440918,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"1"}}
+0
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/state/0/0/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/state/0/0/1.delta
new file mode 100644
index 0000000000000000000000000000000000000000..8de7bc89a5de82e3aa620251416c3b72f55d5936
GIT binary patch
literal 85
zcmeZ?GI7euPtI1=W?*2b0b;cUSscMYT7ZF(L70()fnS7ySCC1Wfsu)U--dxpkQvP6
T(O?K*_zwg=Q2nAnEpPw;QR5Aq

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCompatibilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCompatibilitySuite.scala
new file mode 100644
index 0000000000000..b189de8d2a21e
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCompatibilitySuite.scala
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming.state
+
+import java.io.File
+
+import org.apache.commons.io.FileUtils
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.io.CompressionCodec
+import org.apache.spark.sql.catalyst.plans.PlanTestBase
+import org.apache.spark.sql.catalyst.streaming.InternalOutputModes.Update
+import org.apache.spark.sql.execution.streaming.MemoryStream
+import org.apache.spark.sql.functions.count
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.streaming.StreamTest
+import org.apache.spark.util.Utils
+
+class StateStoreCompatibilitySuite extends StreamTest with StateStoreCodecsTest {
+   testWithAllCodec(
+      "SPARK-33263: Recovery from checkpoint before codec config introduced") {
+     val resourceUri = this.getClass.getResource(
+       "/structured-streaming/checkpoint-version-3.0.0-streaming-statestore-codec/").toURI
+     val checkpointDir = Utils.createTempDir().getCanonicalFile
+     FileUtils.copyDirectory(new File(resourceUri), checkpointDir)
+
+     import testImplicits._
+
+     val inputData = MemoryStream[Int]
+     val aggregated = inputData.toDF().groupBy("value").agg(count("*"))
+     inputData.addData(1, 2, 3)
+
+     /**
+      * Note: The checkpoint was generated using the following input in Spark version 3.0.0:
+      * AddData(inputData, 1, 2, 3)
+      */
+
+     testStream(aggregated, Update)(
+       StartStream(
+         checkpointLocation = checkpointDir.getAbsolutePath,
+         additionalConfs = Map(SQLConf.SHUFFLE_PARTITIONS.key -> "1")),
+       AddData(inputData, 1, 2),
+       CheckNewAnswer((1, 2), (2, 2))
+     )
+   }
+}
+
+trait StateStoreCodecsTest extends SparkFunSuite with PlanTestBase {
+  private val codecsInShortName =
+    CompressionCodec.ALL_COMPRESSION_CODECS.map { c => CompressionCodec.getShortName(c) }
+
+  protected def testWithAllCodec(name: String)(func: => Any): Unit = {
+    codecsInShortName.foreach { codecShortName =>
+      test(s"$name - with codec $codecShortName") {
+        withSQLConf(SQLConf.STATE_STORE_COMPRESSION_CODEC.key -> codecShortName) {
+          func
+        }
+      }
+    }
+
+    CompressionCodec.ALL_COMPRESSION_CODECS.foreach { codecShortName =>
+      test(s"$name - with codec $codecShortName") {
+        withSQLConf(SQLConf.STATE_STORE_COMPRESSION_CODEC.key -> codecShortName) {
+          func
+        }
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
index 5dbc6723a3ff9..9dc6c0a760d7e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
@@ -767,6 +767,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
     sqlConf.setConf(SQLConf.STATE_STORE_MIN_DELTAS_FOR_SNAPSHOT, minDeltasForSnapshot)
     sqlConf.setConf(SQLConf.MAX_BATCHES_TO_RETAIN_IN_MEMORY, numOfVersToRetainInMemory)
     sqlConf.setConf(SQLConf.MIN_BATCHES_TO_RETAIN, 2)
+    sqlConf.setConf(SQLConf.STATE_STORE_COMPRESSION_CODEC, SQLConf.get.stateStoreCompressionCodec)
     val provider = new HDFSBackedStateStoreProvider()
     provider.init(
       StateStoreId(dir, opId, partition),
@@ -815,10 +816,10 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
 }
 
 abstract class StateStoreSuiteBase[ProviderClass <: StateStoreProvider]
-  extends SparkFunSuite {
+  extends StateStoreCodecsTest {
   import StateStoreTestsHelper._
 
-  test("get, put, remove, commit, and all data iterator") {
+  testWithAllCodec("get, put, remove, commit, and all data iterator") {
     val provider = newStoreProvider()
 
     // Verify state before starting a new set of updates
@@ -870,7 +871,7 @@ abstract class StateStoreSuiteBase[ProviderClass <: StateStoreProvider]
     assert(getData(provider, version = 1) === Set("b" -> 2))
   }
 
-  test("removing while iterating") {
+  testWithAllCodec("removing while iterating") {
     val provider = newStoreProvider()
 
     // Verify state before starting a new set of updates
@@ -892,7 +893,7 @@ abstract class StateStoreSuiteBase[ProviderClass <: StateStoreProvider]
     assert(get(store, "b") === None)
   }
 
-  test("abort") {
+  testWithAllCodec("abort") {
     val provider = newStoreProvider()
     val store = provider.getStore(0)
     put(store, "a", 1)
@@ -905,7 +906,7 @@ abstract class StateStoreSuiteBase[ProviderClass <: StateStoreProvider]
     store1.abort()
   }
 
-  test("getStore with invalid versions") {
+  testWithAllCodec("getStore with invalid versions") {
     val provider = newStoreProvider()
 
     def checkInvalidVersion(version: Int): Unit = {
@@ -939,7 +940,7 @@ abstract class StateStoreSuiteBase[ProviderClass <: StateStoreProvider]
     checkInvalidVersion(3)
   }
 
-  test("two concurrent StateStores - one for read-only and one for read-write") {
+  testWithAllCodec("two concurrent StateStores - one for read-only and one for read-write") {
     // During Streaming Aggregation, we have two StateStores per task, one used as read-only in
     // `StateStoreRestoreExec`, and one read-write used in `StateStoreSaveExec`. `StateStore.abort`
     // will be called for these StateStores if they haven't committed their results. We need to

From fa6311731be8643f047d2a85faf16e82300883b0 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Thu, 29 Oct 2020 08:00:23 -0700
Subject: [PATCH 0355/1009] [SPARK-33283][CORE] Remove useless
 externalBlockStoreSize from RDDInfo

### What changes were proposed in this pull request?
"external block store" API was removed after SPARK-12667,  `externalBlockStoreSize` in `RDDInfo` looks like always 0 and useless. So this pr just to remove this useless variable.

### Why are the changes needed?
remove useless variable.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass the Jenkins or GitHub Action

Closes #30179 from LuciferYang/SPARK-12667-FOLLOWUP.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 core/src/main/scala/org/apache/spark/rdd/RDD.scala           | 5 ++---
 .../apache/spark/storage/BlockManagerMasterEndpoint.scala    | 1 -
 core/src/main/scala/org/apache/spark/storage/RDDInfo.scala   | 1 -
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 6095042de7f0c..15b00a4496da6 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -1919,9 +1919,8 @@ abstract class RDD[T: ClassTag](
 
       val persistence = if (storageLevel != StorageLevel.NONE) storageLevel.description else ""
       val storageInfo = rdd.context.getRDDStorageInfo(_.id == rdd.id).map(info =>
-        "    CachedPartitions: %d; MemorySize: %s; ExternalBlockStoreSize: %s; DiskSize: %s".format(
-          info.numCachedPartitions, bytesToString(info.memSize),
-          bytesToString(info.externalBlockStoreSize), bytesToString(info.diskSize)))
+        "    CachedPartitions: %d; MemorySize: %s; DiskSize: %s".format(
+          info.numCachedPartitions, bytesToString(info.memSize), bytesToString(info.diskSize)))
 
       s"$rdd [$persistence]" +: storageInfo
     }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
index 569d7d32284bc..b8c5cbd121861 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
@@ -737,7 +737,6 @@ private[spark] class BlockManagerInfo(
     if (storageLevel.isValid) {
       /* isValid means it is either stored in-memory or on-disk.
        * The memSize here indicates the data size in or dropped from memory,
-       * externalBlockStoreSize here indicates the data size in or dropped from externalBlockStore,
        * and the diskSize here indicates the data size in or dropped to disk.
        * They can be both larger than 0, when a block is dropped from memory to disk.
        * Therefore, a safe way to set BlockStatus is to set its info in accurate modes. */
diff --git a/core/src/main/scala/org/apache/spark/storage/RDDInfo.scala b/core/src/main/scala/org/apache/spark/storage/RDDInfo.scala
index 27a4d4b64175e..f3575c4e43eb0 100644
--- a/core/src/main/scala/org/apache/spark/storage/RDDInfo.scala
+++ b/core/src/main/scala/org/apache/spark/storage/RDDInfo.scala
@@ -38,7 +38,6 @@ class RDDInfo(
   var numCachedPartitions = 0
   var memSize = 0L
   var diskSize = 0L
-  var externalBlockStoreSize = 0L
 
   def isCached: Boolean = (memSize + diskSize > 0) && numCachedPartitions > 0
 

From cbd3fdea62dab73fc4a96702de8fd1f07722da66 Mon Sep 17 00:00:00 2001
From: luluorta <luluorta@gmail.com>
Date: Thu, 29 Oct 2020 16:44:17 +0000
Subject: [PATCH 0356/1009] [SPARK-33008][SQL] Division by zero on divide-like
 operations returns incorrect result

### What changes were proposed in this pull request?
In ANSI mode, when a division by zero occurs performing a divide-like operation (Divide, IntegralDivide, Remainder or Pmod), we are returning an incorrect value. Instead, we should throw an exception, as stated in the SQL standard.

### Why are the changes needed?
Result corrupt.

### Does this PR introduce any user-facing change?
No.

### How was this patch tested?
added UT + existing UTs (improved)

Closes #29882 from luluorta/SPARK-33008.

Authored-by: luluorta <luluorta@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  22 +--
 .../catalyst/analysis/DecimalPrecision.scala  |  40 ++--
 .../analysis/StreamingJoinHelper.scala        |   8 +-
 .../sql/catalyst/analysis/TypeCoercion.scala  |  10 +-
 .../catalyst/expressions/Canonicalize.scala   |   7 +-
 .../expressions/aggregate/Average.scala       |   6 +-
 .../sql/catalyst/expressions/arithmetic.scala | 159 +++++++++++-----
 .../expressions/bitwiseExpressions.scala      |   6 +
 .../expressions/intervalExpressions.scala     |  12 +-
 .../expressions/windowExpressions.scala       |   2 +-
 .../sql/catalyst/optimizer/expressions.scala  |  17 +-
 .../ArithmeticExpressionSuite.scala           | 174 +++++++++++++-----
 .../expressions/ExpressionEvalHelper.scala    |  15 ++
 .../sql-tests/inputs/postgreSQL/case.sql      |   2 +-
 .../inputs/postgreSQL/select_having.sql       |   1 +
 .../inputs/udf/postgreSQL/udf-case.sql        |   2 +-
 .../udf/postgreSQL/udf-select_having.sql      |   1 +
 .../sql-tests/results/postgreSQL/case.sql.out |  18 +-
 .../sql-tests/results/postgreSQL/int8.sql.out |  15 +-
 .../results/postgreSQL/numeric.sql.out        |   8 +-
 .../results/postgreSQL/select_having.sql.out  |   5 +-
 .../results/udf/postgreSQL/udf-case.sql.out   |  18 +-
 .../udf/postgreSQL/udf-select_having.sql.out  |   5 +-
 .../sql/SparkSessionExtensionSuite.scala      |   7 +-
 24 files changed, 379 insertions(+), 181 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index c2116a2b8f471..10fe5314b0ef9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -307,35 +307,35 @@ class Analyzer(
   object ResolveBinaryArithmetic extends Rule[LogicalPlan] {
     override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
       case p: LogicalPlan => p.transformExpressionsUp {
-        case a @ Add(l, r) if a.childrenResolved => (l.dataType, r.dataType) match {
+        case a @ Add(l, r, f) if a.childrenResolved => (l.dataType, r.dataType) match {
           case (CalendarIntervalType, CalendarIntervalType) => a
-          case (DateType, CalendarIntervalType) => DateAddInterval(l, r)
+          case (DateType, CalendarIntervalType) => DateAddInterval(l, r, ansiEnabled = f)
           case (_, CalendarIntervalType) => Cast(TimeAdd(l, r), l.dataType)
-          case (CalendarIntervalType, DateType) => DateAddInterval(r, l)
+          case (CalendarIntervalType, DateType) => DateAddInterval(r, l, ansiEnabled = f)
           case (CalendarIntervalType, _) => Cast(TimeAdd(r, l), r.dataType)
           case (DateType, dt) if dt != StringType => DateAdd(l, r)
           case (dt, DateType) if dt != StringType => DateAdd(r, l)
           case _ => a
         }
-        case s @ Subtract(l, r) if s.childrenResolved => (l.dataType, r.dataType) match {
+        case s @ Subtract(l, r, f) if s.childrenResolved => (l.dataType, r.dataType) match {
           case (CalendarIntervalType, CalendarIntervalType) => s
           case (DateType, CalendarIntervalType) =>
-            DatetimeSub(l, r, DateAddInterval(l, UnaryMinus(r)))
+            DatetimeSub(l, r, DateAddInterval(l, UnaryMinus(r, f), ansiEnabled = f))
           case (_, CalendarIntervalType) =>
-            Cast(DatetimeSub(l, r, TimeAdd(l, UnaryMinus(r))), l.dataType)
+            Cast(DatetimeSub(l, r, TimeAdd(l, UnaryMinus(r, f))), l.dataType)
           case (TimestampType, _) => SubtractTimestamps(l, r)
           case (_, TimestampType) => SubtractTimestamps(l, r)
           case (_, DateType) => SubtractDates(l, r)
           case (DateType, dt) if dt != StringType => DateSub(l, r)
           case _ => s
         }
-        case m @ Multiply(l, r) if m.childrenResolved => (l.dataType, r.dataType) match {
-          case (CalendarIntervalType, _) => MultiplyInterval(l, r)
-          case (_, CalendarIntervalType) => MultiplyInterval(r, l)
+        case m @ Multiply(l, r, f) if m.childrenResolved => (l.dataType, r.dataType) match {
+          case (CalendarIntervalType, _) => MultiplyInterval(l, r, f)
+          case (_, CalendarIntervalType) => MultiplyInterval(r, l, f)
           case _ => m
         }
-        case d @ Divide(l, r) if d.childrenResolved => (l.dataType, r.dataType) match {
-          case (CalendarIntervalType, _) => DivideInterval(l, r)
+        case d @ Divide(l, r, f) if d.childrenResolved => (l.dataType, r.dataType) match {
+          case (CalendarIntervalType, _) => DivideInterval(l, r, f)
           case _ => d
         }
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
index f2d607e5b737c..6eed152e6dd77 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
@@ -98,7 +98,7 @@ object DecimalPrecision extends TypeCoercionRule {
     // Skip nodes who is already promoted
     case e: BinaryArithmetic if e.left.isInstanceOf[PromotePrecision] => e
 
-    case Add(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) =>
+    case a @ Add(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2), _) =>
       val resultScale = max(s1, s2)
       val resultType = if (SQLConf.get.decimalOperationsAllowPrecisionLoss) {
         DecimalType.adjustPrecisionScale(max(p1 - s1, p2 - s2) + resultScale + 1,
@@ -106,10 +106,12 @@ object DecimalPrecision extends TypeCoercionRule {
       } else {
         DecimalType.bounded(max(p1 - s1, p2 - s2) + resultScale + 1, resultScale)
       }
-      CheckOverflow(Add(promotePrecision(e1, resultType), promotePrecision(e2, resultType)),
+      CheckOverflow(
+        a.withNewChildren(Seq(promotePrecision(e1, resultType), promotePrecision(e2, resultType))),
         resultType, nullOnOverflow)
 
-    case Subtract(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) =>
+    case s @ Subtract(e1 @ DecimalType.Expression(p1, s1),
+        e2 @ DecimalType.Expression(p2, s2), _) =>
       val resultScale = max(s1, s2)
       val resultType = if (SQLConf.get.decimalOperationsAllowPrecisionLoss) {
         DecimalType.adjustPrecisionScale(max(p1 - s1, p2 - s2) + resultScale + 1,
@@ -117,20 +119,23 @@ object DecimalPrecision extends TypeCoercionRule {
       } else {
         DecimalType.bounded(max(p1 - s1, p2 - s2) + resultScale + 1, resultScale)
       }
-      CheckOverflow(Subtract(promotePrecision(e1, resultType), promotePrecision(e2, resultType)),
+      CheckOverflow(
+        s.withNewChildren(Seq(promotePrecision(e1, resultType), promotePrecision(e2, resultType))),
         resultType, nullOnOverflow)
 
-    case Multiply(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) =>
+    case m @ Multiply(
+        e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2), _) =>
       val resultType = if (SQLConf.get.decimalOperationsAllowPrecisionLoss) {
         DecimalType.adjustPrecisionScale(p1 + p2 + 1, s1 + s2)
       } else {
         DecimalType.bounded(p1 + p2 + 1, s1 + s2)
       }
       val widerType = widerDecimalType(p1, s1, p2, s2)
-      CheckOverflow(Multiply(promotePrecision(e1, widerType), promotePrecision(e2, widerType)),
+      CheckOverflow(
+        m.withNewChildren(Seq(promotePrecision(e1, widerType), promotePrecision(e2, widerType))),
         resultType, nullOnOverflow)
 
-    case Divide(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) =>
+    case d @ Divide(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2), _) =>
       val resultType = if (SQLConf.get.decimalOperationsAllowPrecisionLoss) {
         // Precision: p1 - s1 + s2 + max(6, s1 + p2 + 1)
         // Scale: max(6, s1 + p2 + 1)
@@ -149,10 +154,12 @@ object DecimalPrecision extends TypeCoercionRule {
         DecimalType.bounded(intDig + decDig, decDig)
       }
       val widerType = widerDecimalType(p1, s1, p2, s2)
-      CheckOverflow(Divide(promotePrecision(e1, widerType), promotePrecision(e2, widerType)),
+      CheckOverflow(
+        d.withNewChildren(Seq(promotePrecision(e1, widerType), promotePrecision(e2, widerType))),
         resultType, nullOnOverflow)
 
-    case Remainder(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) =>
+    case r @ Remainder(
+        e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2), _) =>
       val resultType = if (SQLConf.get.decimalOperationsAllowPrecisionLoss) {
         DecimalType.adjustPrecisionScale(min(p1 - s1, p2 - s2) + max(s1, s2), max(s1, s2))
       } else {
@@ -160,10 +167,11 @@ object DecimalPrecision extends TypeCoercionRule {
       }
       // resultType may have lower precision, so we cast them into wider type first.
       val widerType = widerDecimalType(p1, s1, p2, s2)
-      CheckOverflow(Remainder(promotePrecision(e1, widerType), promotePrecision(e2, widerType)),
+      CheckOverflow(
+        r.withNewChildren(Seq(promotePrecision(e1, widerType), promotePrecision(e2, widerType))),
         resultType, nullOnOverflow)
 
-    case Pmod(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) =>
+    case p @ Pmod(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2), _) =>
       val resultType = if (SQLConf.get.decimalOperationsAllowPrecisionLoss) {
         DecimalType.adjustPrecisionScale(min(p1 - s1, p2 - s2) + max(s1, s2), max(s1, s2))
       } else {
@@ -171,15 +179,15 @@ object DecimalPrecision extends TypeCoercionRule {
       }
       // resultType may have lower precision, so we cast them into wider type first.
       val widerType = widerDecimalType(p1, s1, p2, s2)
-      CheckOverflow(Pmod(promotePrecision(e1, widerType), promotePrecision(e2, widerType)),
+      CheckOverflow(
+        p.withNewChildren(Seq(promotePrecision(e1, widerType), promotePrecision(e2, widerType))),
         resultType, nullOnOverflow)
 
     case expr @ IntegralDivide(
-        e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2)) =>
+        e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2), _) =>
       val widerType = widerDecimalType(p1, s1, p2, s2)
-      val promotedExpr = IntegralDivide(
-        promotePrecision(e1, widerType),
-        promotePrecision(e2, widerType))
+      val promotedExpr = expr.withNewChildren(
+        Seq(promotePrecision(e1, widerType), promotePrecision(e2, widerType)))
       if (expr.dataType.isInstanceOf[DecimalType]) {
         // This follows division rule
         val intDig = p1 - s1 + s2
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
index 6a2ff4b91e68d..cddc3a44f4d9d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
@@ -189,7 +189,7 @@ object StreamingJoinHelper extends PredicateHelper with Logging {
           if attributesWithEventWatermark.contains(a) && metadata.contains(delayKey) =>
           Multiply(Literal(eventWatermark.get.toDouble), Literal(1000.0))
       }
-    }.reduceLeft(Add)
+    }.reduceLeft(Add(_, _))
 
     // Calculate the constraint value
     logInfo(s"Final expression to evaluate constraint:\t$exprWithWatermarkSubstituted")
@@ -226,14 +226,14 @@ object StreamingJoinHelper extends PredicateHelper with Logging {
      */
     def collect(expr: Expression, negate: Boolean): Seq[Expression] = {
       expr match {
-        case Add(left, right) =>
+        case Add(left, right, _) =>
           collect(left, negate) ++ collect(right, negate)
-        case Subtract(left, right) =>
+        case Subtract(left, right, _) =>
           collect(left, negate) ++ collect(right, !negate)
         case TimeAdd(left, right, _) =>
           collect(left, negate) ++ collect(right, negate)
         case DatetimeSub(_, _, child) => collect(child, negate)
-        case UnaryMinus(child) =>
+        case UnaryMinus(child, _) =>
           collect(child, !negate)
         case CheckOverflow(child, _, _) =>
           collect(child, negate)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index b69cb6091f02c..becdef8b9c603 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -454,7 +454,7 @@ object TypeCoercion {
         s.withNewChildren(Seq(Cast(e, DoubleType)))
       case s @ StddevSamp(e @ StringType(), _) =>
         s.withNewChildren(Seq(Cast(e, DoubleType)))
-      case UnaryMinus(e @ StringType()) => UnaryMinus(Cast(e, DoubleType))
+      case m @ UnaryMinus(e @ StringType(), _) => m.withNewChildren(Seq(Cast(e, DoubleType)))
       case UnaryPositive(e @ StringType()) => UnaryPositive(Cast(e, DoubleType))
       case v @ VariancePop(e @ StringType(), _) =>
         v.withNewChildren(Seq(Cast(e, DoubleType)))
@@ -698,8 +698,8 @@ object TypeCoercion {
       // Decimal and Double remain the same
       case d: Divide if d.dataType == DoubleType => d
       case d: Divide if d.dataType.isInstanceOf[DecimalType] => d
-      case Divide(left, right) if isNumericOrNull(left) && isNumericOrNull(right) =>
-        Divide(Cast(left, DoubleType), Cast(right, DoubleType))
+      case d @ Divide(left, right, _) if isNumericOrNull(left) && isNumericOrNull(right) =>
+        d.withNewChildren(Seq(Cast(left, DoubleType), Cast(right, DoubleType)))
     }
 
     private def isNumericOrNull(ex: Expression): Boolean = {
@@ -715,8 +715,8 @@ object TypeCoercion {
   object IntegralDivision extends TypeCoercionRule {
     override protected def coerceTypes(plan: LogicalPlan): LogicalPlan = plan resolveExpressions {
       case e if !e.childrenResolved => e
-      case d @ IntegralDivide(left, right) =>
-        IntegralDivide(mayCastToLong(left), mayCastToLong(right))
+      case d @ IntegralDivide(left, right, _) =>
+        d.withNewChildren(Seq(mayCastToLong(left), mayCastToLong(right)))
     }
 
     private def mayCastToLong(expr: Expression): Expression = expr.dataType match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
index 1ecf4372cfb58..ae201359a762c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
@@ -70,8 +70,11 @@ object Canonicalize {
 
   /** Rearrange expressions that are commutative or associative. */
   private def expressionReorder(e: Expression): Expression = e match {
-    case a: Add => orderCommutative(a, { case Add(l, r) => Seq(l, r) }).reduce(Add)
-    case m: Multiply => orderCommutative(m, { case Multiply(l, r) => Seq(l, r) }).reduce(Multiply)
+    // TODO: do not reorder consecutive `Add`s or `Multiply`s with different `failOnError` flags
+    case a @ Add(_, _, f) =>
+      orderCommutative(a, { case Add(l, r, _) => Seq(l, r) }).reduce(Add(_, _, f))
+    case m @ Multiply(_, _, f) =>
+      orderCommutative(m, { case Multiply(l, r, _) => Seq(l, r) }).reduce(Multiply(_, _, f))
 
     case o: Or =>
       orderCommutative(o, { case Or(l, r) if l.deterministic && r.deterministic => Seq(l, r) })
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala
index d3ce1f8d331ab..13f38ac7c9ae5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala
@@ -77,11 +77,13 @@ case class Average(child: Expression) extends DeclarativeAggregate with Implicit
   )
 
   // If all input are nulls, count will be 0 and we will get null after the division.
+  // We can't directly use `/` as it throws an exception under ansi mode.
   override lazy val evaluateExpression = child.dataType match {
     case _: DecimalType =>
-      DecimalPrecision.decimalAndDecimal(sum / count.cast(DecimalType.LongDecimal)).cast(resultType)
+      DecimalPrecision.decimalAndDecimal(
+        Divide(sum, count.cast(DecimalType.LongDecimal), failOnError = false)).cast(resultType)
     case _ =>
-      sum.cast(resultType) / count.cast(resultType)
+      Divide(sum.cast(resultType), count.cast(resultType), failOnError = false)
   }
 
   override lazy val updateExpressions: Seq[Expression] = Seq(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
index f25fd9b672e8b..c69edccc696bb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -34,9 +34,12 @@ import org.apache.spark.unsafe.types.CalendarInterval
        -1
   """,
   since = "1.0.0")
-case class UnaryMinus(child: Expression) extends UnaryExpression
-    with ExpectsInputTypes with NullIntolerant {
-  private val checkOverflow = SQLConf.get.ansiEnabled
+case class UnaryMinus(
+    child: Expression,
+    failOnError: Boolean = SQLConf.get.ansiEnabled)
+  extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
+
+  def this(child: Expression) = this(child, SQLConf.get.ansiEnabled)
 
   override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection.NumericAndInterval)
 
@@ -44,11 +47,11 @@ case class UnaryMinus(child: Expression) extends UnaryExpression
 
   override def toString: String = s"-$child"
 
-  private lazy val numeric = TypeUtils.getNumeric(dataType, checkOverflow)
+  private lazy val numeric = TypeUtils.getNumeric(dataType, failOnError)
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = dataType match {
     case _: DecimalType => defineCodeGen(ctx, ev, c => s"$c.unary_$$minus()")
-    case ByteType | ShortType if checkOverflow =>
+    case ByteType | ShortType if failOnError =>
       nullSafeCodeGen(ctx, ev, eval => {
         val javaBoxedType = CodeGenerator.boxedType(dataType)
         val javaType = CodeGenerator.javaType(dataType)
@@ -61,7 +64,7 @@ case class UnaryMinus(child: Expression) extends UnaryExpression
            |${ev.value} = ($javaType)(-($originValue));
            """.stripMargin
       })
-    case IntegerType | LongType if checkOverflow =>
+    case IntegerType | LongType if failOnError =>
       nullSafeCodeGen(ctx, ev, eval => {
         val mathClass = classOf[Math].getName
         s"${ev.value} = $mathClass.negateExact($eval);"
@@ -76,12 +79,12 @@ case class UnaryMinus(child: Expression) extends UnaryExpression
       """})
     case _: CalendarIntervalType =>
       val iu = IntervalUtils.getClass.getCanonicalName.stripSuffix("$")
-      val method = if (checkOverflow) "negateExact" else "negate"
+      val method = if (failOnError) "negateExact" else "negate"
       defineCodeGen(ctx, ev, c => s"$iu.$method($c)")
   }
 
   protected override def nullSafeEval(input: Any): Any = dataType match {
-    case CalendarIntervalType if checkOverflow =>
+    case CalendarIntervalType if failOnError =>
       IntervalUtils.negateExact(input.asInstanceOf[CalendarInterval])
     case CalendarIntervalType => IntervalUtils.negate(input.asInstanceOf[CalendarInterval])
     case _ => numeric.negate(input)
@@ -104,7 +107,8 @@ case class UnaryMinus(child: Expression) extends UnaryExpression
   """,
   since = "1.5.0")
 case class UnaryPositive(child: Expression)
-    extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
+  extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
+
   override def prettyName: String = "positive"
 
   override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection.NumericAndInterval)
@@ -131,7 +135,7 @@ case class UnaryPositive(child: Expression)
   """,
   since = "1.2.0")
 case class Abs(child: Expression)
-    extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
+  extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
 
   override def inputTypes: Seq[AbstractDataType] = Seq(NumericType)
 
@@ -151,7 +155,7 @@ case class Abs(child: Expression)
 
 abstract class BinaryArithmetic extends BinaryOperator with NullIntolerant {
 
-  protected val checkOverflow = SQLConf.get.ansiEnabled
+  protected val failOnError: Boolean
 
   override def dataType: DataType = left.dataType
 
@@ -181,7 +185,7 @@ abstract class BinaryArithmetic extends BinaryOperator with NullIntolerant {
     case ByteType | ShortType =>
       nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
         val tmpResult = ctx.freshName("tmpResult")
-        val overflowCheck = if (checkOverflow) {
+        val overflowCheck = if (failOnError) {
           val javaType = CodeGenerator.boxedType(dataType)
           s"""
              |if ($tmpResult < $javaType.MIN_VALUE || $tmpResult > $javaType.MAX_VALUE) {
@@ -199,7 +203,7 @@ abstract class BinaryArithmetic extends BinaryOperator with NullIntolerant {
       })
     case IntegerType | LongType =>
       nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
-        val operation = if (checkOverflow && exactMathMethod.isDefined) {
+        val operation = if (failOnError && exactMathMethod.isDefined) {
           val mathClass = classOf[Math].getName
           s"$mathClass.${exactMathMethod.get}($eval1, $eval2)"
         } else {
@@ -233,7 +237,12 @@ object BinaryArithmetic {
        3
   """,
   since = "1.0.0")
-case class Add(left: Expression, right: Expression) extends BinaryArithmetic {
+case class Add(
+    left: Expression,
+    right: Expression,
+    failOnError: Boolean = SQLConf.get.ansiEnabled) extends BinaryArithmetic {
+
+  def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled)
 
   override def inputType: AbstractDataType = TypeCollection.NumericAndInterval
 
@@ -241,12 +250,12 @@ case class Add(left: Expression, right: Expression) extends BinaryArithmetic {
 
   override def decimalMethod: String = "$plus"
 
-  override def calendarIntervalMethod: String = if (checkOverflow) "addExact" else "add"
+  override def calendarIntervalMethod: String = if (failOnError) "addExact" else "add"
 
-  private lazy val numeric = TypeUtils.getNumeric(dataType, checkOverflow)
+  private lazy val numeric = TypeUtils.getNumeric(dataType, failOnError)
 
   protected override def nullSafeEval(input1: Any, input2: Any): Any = dataType match {
-    case CalendarIntervalType if checkOverflow =>
+    case CalendarIntervalType if failOnError =>
       IntervalUtils.addExact(
         input1.asInstanceOf[CalendarInterval], input2.asInstanceOf[CalendarInterval])
     case CalendarIntervalType =>
@@ -266,7 +275,12 @@ case class Add(left: Expression, right: Expression) extends BinaryArithmetic {
        1
   """,
   since = "1.0.0")
-case class Subtract(left: Expression, right: Expression) extends BinaryArithmetic {
+case class Subtract(
+    left: Expression,
+    right: Expression,
+    failOnError: Boolean = SQLConf.get.ansiEnabled) extends BinaryArithmetic {
+
+  def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled)
 
   override def inputType: AbstractDataType = TypeCollection.NumericAndInterval
 
@@ -274,12 +288,12 @@ case class Subtract(left: Expression, right: Expression) extends BinaryArithmeti
 
   override def decimalMethod: String = "$minus"
 
-  override def calendarIntervalMethod: String = if (checkOverflow) "subtractExact" else "subtract"
+  override def calendarIntervalMethod: String = if (failOnError) "subtractExact" else "subtract"
 
-  private lazy val numeric = TypeUtils.getNumeric(dataType, checkOverflow)
+  private lazy val numeric = TypeUtils.getNumeric(dataType, failOnError)
 
   protected override def nullSafeEval(input1: Any, input2: Any): Any = dataType match {
-    case CalendarIntervalType if checkOverflow =>
+    case CalendarIntervalType if failOnError =>
       IntervalUtils.subtractExact(
         input1.asInstanceOf[CalendarInterval], input2.asInstanceOf[CalendarInterval])
     case CalendarIntervalType =>
@@ -299,14 +313,19 @@ case class Subtract(left: Expression, right: Expression) extends BinaryArithmeti
        6
   """,
   since = "1.0.0")
-case class Multiply(left: Expression, right: Expression) extends BinaryArithmetic {
+case class Multiply(
+    left: Expression,
+    right: Expression,
+    failOnError: Boolean = SQLConf.get.ansiEnabled) extends BinaryArithmetic {
+
+  def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled)
 
   override def inputType: AbstractDataType = NumericType
 
   override def symbol: String = "*"
   override def decimalMethod: String = "$times"
 
-  private lazy val numeric = TypeUtils.getNumeric(dataType, checkOverflow)
+  private lazy val numeric = TypeUtils.getNumeric(dataType, failOnError)
 
   protected override def nullSafeEval(input1: Any, input2: Any): Any = numeric.times(input1, input2)
 
@@ -320,15 +339,25 @@ trait DivModLike extends BinaryArithmetic {
 
   override def nullable: Boolean = true
 
+  private lazy val isZero: Any => Boolean = right.dataType match {
+    case _: DecimalType => x => x.asInstanceOf[Decimal].isZero
+    case _ => x => x == 0
+  }
+
   final override def eval(input: InternalRow): Any = {
+    // evaluate right first as we have a chance to skip left if right is 0
     val input2 = right.eval(input)
-    if (input2 == null || input2 == 0) {
+    if (input2 == null || (!failOnError && isZero(input2))) {
       null
     } else {
       val input1 = left.eval(input)
       if (input1 == null) {
         null
       } else {
+        if (isZero(input2)) {
+          // when we reach here, failOnError must bet true.
+          throw new ArithmeticException("divide by zero")
+        }
         evalOperation(input1, input2)
       }
     }
@@ -337,7 +366,7 @@ trait DivModLike extends BinaryArithmetic {
   def evalOperation(left: Any, right: Any): Any
 
   /**
-   * Special case handling due to division/remainder by 0 => null.
+   * Special case handling due to division/remainder by 0 => null or ArithmeticException.
    */
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val eval1 = left.genCode(ctx)
@@ -354,29 +383,42 @@ trait DivModLike extends BinaryArithmetic {
     } else {
       s"($javaType)(${eval1.value} $symbol ${eval2.value})"
     }
+    // evaluate right first as we have a chance to skip left if right is 0
     if (!left.nullable && !right.nullable) {
+      val divByZero = if (failOnError) {
+        "throw new ArithmeticException(\"divide by zero\");"
+      } else {
+        s"${ev.isNull} = true;"
+      }
       ev.copy(code = code"""
         ${eval2.code}
         boolean ${ev.isNull} = false;
         $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
         if ($isZero) {
-          ${ev.isNull} = true;
+          $divByZero
         } else {
           ${eval1.code}
           ${ev.value} = $operation;
         }""")
     } else {
+      val nullOnErrorCondition = if (failOnError) "" else s" || $isZero"
+      val failOnErrorBranch = if (failOnError) {
+        s"""if ($isZero) throw new ArithmeticException("divide by zero");"""
+      } else {
+        ""
+      }
       ev.copy(code = code"""
         ${eval2.code}
         boolean ${ev.isNull} = false;
         $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
-        if (${eval2.isNull} || $isZero) {
+        if (${eval2.isNull}$nullOnErrorCondition) {
           ${ev.isNull} = true;
         } else {
           ${eval1.code}
           if (${eval1.isNull}) {
             ${ev.isNull} = true;
           } else {
+            $failOnErrorBranch
             ${ev.value} = $operation;
           }
         }""")
@@ -396,7 +438,12 @@ trait DivModLike extends BinaryArithmetic {
   """,
   since = "1.0.0")
 // scalastyle:on line.size.limit
-case class Divide(left: Expression, right: Expression) extends DivModLike {
+case class Divide(
+    left: Expression,
+    right: Expression,
+    failOnError: Boolean = SQLConf.get.ansiEnabled) extends DivModLike {
+
+  def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled)
 
   override def inputType: AbstractDataType = TypeCollection(DoubleType, DecimalType)
 
@@ -422,7 +469,10 @@ case class Divide(left: Expression, right: Expression) extends DivModLike {
 // scalastyle:on line.size.limit
 case class IntegralDivide(
     left: Expression,
-    right: Expression) extends DivModLike {
+    right: Expression,
+    failOnError: Boolean = SQLConf.get.ansiEnabled) extends DivModLike {
+
+  def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled)
 
   override def inputType: AbstractDataType = TypeCollection(LongType, DecimalType)
 
@@ -453,12 +503,6 @@ case class IntegralDivide(
   override def evalOperation(left: Any, right: Any): Any = div(left, right)
 }
 
-object IntegralDivide {
-  def apply(left: Expression, right: Expression): IntegralDivide = {
-    new IntegralDivide(left, right)
-  }
-}
-
 @ExpressionDescription(
   usage = "expr1 _FUNC_ expr2 - Returns the remainder after `expr1`/`expr2`.",
   examples = """
@@ -469,7 +513,12 @@ object IntegralDivide {
        0.2
   """,
   since = "1.0.0")
-case class Remainder(left: Expression, right: Expression) extends DivModLike {
+case class Remainder(
+    left: Expression,
+    right: Expression,
+    failOnError: Boolean = SQLConf.get.ansiEnabled) extends DivModLike {
+
+  def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled)
 
   override def inputType: AbstractDataType = NumericType
 
@@ -517,7 +566,12 @@ case class Remainder(left: Expression, right: Expression) extends DivModLike {
        2
   """,
   since = "1.5.0")
-case class Pmod(left: Expression, right: Expression) extends BinaryArithmetic {
+case class Pmod(
+    left: Expression,
+    right: Expression,
+    failOnError: Boolean = SQLConf.get.ansiEnabled) extends BinaryArithmetic {
+
+  def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled)
 
   override def toString: String = s"pmod($left, $right)"
 
@@ -530,15 +584,25 @@ case class Pmod(left: Expression, right: Expression) extends BinaryArithmetic {
 
   override def nullable: Boolean = true
 
-  override def eval(input: InternalRow): Any = {
+  private lazy val isZero: Any => Boolean = right.dataType match {
+    case _: DecimalType => x => x.asInstanceOf[Decimal].isZero
+    case _ => x => x == 0
+  }
+
+  final override def eval(input: InternalRow): Any = {
+    // evaluate right first as we have a chance to skip left if right is 0
     val input2 = right.eval(input)
-    if (input2 == null || input2 == 0) {
+    if (input2 == null || (!failOnError && isZero(input2))) {
       null
     } else {
       val input1 = left.eval(input)
       if (input1 == null) {
         null
       } else {
+        if (isZero(input2)) {
+          // when we reach here, failOnError must bet true.
+          throw new ArithmeticException("divide by zero")
+        }
         input1 match {
           case i: Integer => pmod(i, input2.asInstanceOf[java.lang.Integer])
           case l: Long => pmod(l, input2.asInstanceOf[java.lang.Long])
@@ -595,29 +659,42 @@ case class Pmod(left: Expression, right: Expression) extends BinaryArithmetic {
         """
     }
 
+    // evaluate right first as we have a chance to skip left if right is 0
     if (!left.nullable && !right.nullable) {
+      val divByZero = if (failOnError) {
+        "throw new ArithmeticException(\"divide by zero\");"
+      } else {
+        s"${ev.isNull} = true;"
+      }
       ev.copy(code = code"""
         ${eval2.code}
         boolean ${ev.isNull} = false;
         $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
         if ($isZero) {
-          ${ev.isNull} = true;
+          $divByZero
         } else {
           ${eval1.code}
           $result
         }""")
     } else {
+      val nullOnErrorCondition = if (failOnError) "" else s" || $isZero"
+      val failOnErrorBranch = if (failOnError) {
+        s"""if ($isZero) throw new ArithmeticException("divide by zero");"""
+      } else {
+        ""
+      }
       ev.copy(code = code"""
         ${eval2.code}
         boolean ${ev.isNull} = false;
         $javaType ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
-        if (${eval2.isNull} || $isZero) {
+        if (${eval2.isNull}$nullOnErrorCondition) {
           ${ev.isNull} = true;
         } else {
           ${eval1.code}
           if (${eval1.isNull}) {
             ${ev.isNull} = true;
           } else {
+            $failOnErrorBranch
             $result
           }
         }""")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
index aa3993dccd1c5..33ce60875c600 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
@@ -36,6 +36,8 @@ import org.apache.spark.sql.types._
   since = "1.4.0")
 case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithmetic {
 
+  protected override val failOnError: Boolean = false
+
   override def inputType: AbstractDataType = IntegralType
 
   override def symbol: String = "&"
@@ -69,6 +71,8 @@ case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithme
   since = "1.4.0")
 case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmetic {
 
+  protected override val failOnError: Boolean = false
+
   override def inputType: AbstractDataType = IntegralType
 
   override def symbol: String = "|"
@@ -102,6 +106,8 @@ case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmet
   since = "1.4.0")
 case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithmetic {
 
+  protected override val failOnError: Boolean = false
+
   override def inputType: AbstractDataType = IntegralType
 
   override def symbol: String = "^"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
index db3b2a38fece0..8b92c619df626 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
@@ -109,25 +109,25 @@ abstract class IntervalNumOperation(
 case class MultiplyInterval(
     interval: Expression,
     num: Expression,
-    checkOverflow: Boolean = SQLConf.get.ansiEnabled)
+    failOnError: Boolean = SQLConf.get.ansiEnabled)
   extends IntervalNumOperation(interval, num) {
 
   override protected val operation: (CalendarInterval, Double) => CalendarInterval =
-    if (checkOverflow) multiplyExact else multiply
+    if (failOnError) multiplyExact else multiply
 
-  override protected def operationName: String = if (checkOverflow) "multiplyExact" else "multiply"
+  override protected def operationName: String = if (failOnError) "multiplyExact" else "multiply"
 }
 
 case class DivideInterval(
     interval: Expression,
     num: Expression,
-    checkOverflow: Boolean = SQLConf.get.ansiEnabled)
+    failOnError: Boolean = SQLConf.get.ansiEnabled)
   extends IntervalNumOperation(interval, num) {
 
   override protected val operation: (CalendarInterval, Double) => CalendarInterval =
-    if (checkOverflow) divideExact else divide
+    if (failOnError) divideExact else divide
 
-  override protected def operationName: String = if (checkOverflow) "divideExact" else "divide"
+  override protected def operationName: String = if (failOnError) "divideExact" else "divide"
 }
 
 // scalastyle:off line.size.limit
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index 168585dc3de00..1a57afa8d9aae 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -235,7 +235,7 @@ case class SpecifiedWindowFrame(
 
   private def boundarySql(expr: Expression): String = expr match {
     case e: SpecialFrameBoundary => e.sql
-    case UnaryMinus(n) => n.sql + " PRECEDING"
+    case UnaryMinus(n, _) => n.sql + " PRECEDING"
     case e: Expression => e.sql + " FOLLOWING"
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 0e7a39c54050e..55a45f4410b34 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -177,7 +177,7 @@ object ReorderAssociativeOperator extends Rule[LogicalPlan] {
   private def flattenAdd(
     expression: Expression,
     groupSet: ExpressionSet): Seq[Expression] = expression match {
-    case expr @ Add(l, r) if !groupSet.contains(expr) =>
+    case expr @ Add(l, r, _) if !groupSet.contains(expr) =>
       flattenAdd(l, groupSet) ++ flattenAdd(r, groupSet)
     case other => other :: Nil
   }
@@ -185,7 +185,7 @@ object ReorderAssociativeOperator extends Rule[LogicalPlan] {
   private def flattenMultiply(
     expression: Expression,
     groupSet: ExpressionSet): Seq[Expression] = expression match {
-    case expr @ Multiply(l, r) if !groupSet.contains(expr) =>
+    case expr @ Multiply(l, r, _) if !groupSet.contains(expr) =>
       flattenMultiply(l, groupSet) ++ flattenMultiply(r, groupSet)
     case other => other :: Nil
   }
@@ -201,23 +201,24 @@ object ReorderAssociativeOperator extends Rule[LogicalPlan] {
       // We have to respect aggregate expressions which exists in grouping expressions when plan
       // is an Aggregate operator, otherwise the optimized expression could not be derived from
       // grouping expressions.
+      // TODO: do not reorder consecutive `Add`s or `Multiply`s with different `failOnError` flags
       val groupingExpressionSet = collectGroupingExpressions(q)
       q transformExpressionsDown {
-      case a: Add if a.deterministic && a.dataType.isInstanceOf[IntegralType] =>
+      case a @ Add(_, _, f) if a.deterministic && a.dataType.isInstanceOf[IntegralType] =>
         val (foldables, others) = flattenAdd(a, groupingExpressionSet).partition(_.foldable)
         if (foldables.size > 1) {
-          val foldableExpr = foldables.reduce((x, y) => Add(x, y))
+          val foldableExpr = foldables.reduce((x, y) => Add(x, y, f))
           val c = Literal.create(foldableExpr.eval(EmptyRow), a.dataType)
-          if (others.isEmpty) c else Add(others.reduce((x, y) => Add(x, y)), c)
+          if (others.isEmpty) c else Add(others.reduce((x, y) => Add(x, y, f)), c, f)
         } else {
           a
         }
-      case m: Multiply if m.deterministic && m.dataType.isInstanceOf[IntegralType] =>
+      case m @ Multiply(_, _, f) if m.deterministic && m.dataType.isInstanceOf[IntegralType] =>
         val (foldables, others) = flattenMultiply(m, groupingExpressionSet).partition(_.foldable)
         if (foldables.size > 1) {
-          val foldableExpr = foldables.reduce((x, y) => Multiply(x, y))
+          val foldableExpr = foldables.reduce((x, y) => Multiply(x, y, f))
           val c = Literal.create(foldableExpr.eval(EmptyRow), m.dataType)
-          if (others.isEmpty) c else Multiply(others.reduce((x, y) => Multiply(x, y)), c)
+          if (others.isEmpty) c else Multiply(others.reduce((x, y) => Multiply(x, y, f)), c, f)
         } else {
           m
         }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala
index f05598aeb5353..14dd04afebe28 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala
@@ -60,10 +60,10 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(Add(positiveIntLit, negativeIntLit), -1)
     checkEvaluation(Add(positiveLongLit, negativeLongLit), -1L)
 
-    Seq("true", "false").foreach { checkOverflow =>
-      withSQLConf(SQLConf.ANSI_ENABLED.key -> checkOverflow) {
+    Seq("true", "false").foreach { failOnError =>
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> failOnError) {
         DataTypeTestUtils.numericAndInterval.foreach { tpe =>
-          checkConsistencyBetweenInterpretedAndCodegenAllowingException(Add, tpe, tpe)
+          checkConsistencyBetweenInterpretedAndCodegenAllowingException(Add(_, _), tpe, tpe)
         }
       }
     }
@@ -103,8 +103,12 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(UnaryMinus(positiveLongLit), - positiveLong)
     checkEvaluation(UnaryMinus(negativeLongLit), - negativeLong)
 
-    DataTypeTestUtils.numericAndInterval.foreach { tpe =>
-      checkConsistencyBetweenInterpretedAndCodegen(UnaryMinus, tpe)
+    Seq("true", "false").foreach { failOnError =>
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> failOnError) {
+        DataTypeTestUtils.numericAndInterval.foreach { tpe =>
+          checkConsistencyBetweenInterpretedAndCodegenAllowingException(UnaryMinus(_), tpe)
+        }
+      }
     }
   }
 
@@ -121,10 +125,10 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(Subtract(positiveIntLit, negativeIntLit), positiveInt - negativeInt)
     checkEvaluation(Subtract(positiveLongLit, negativeLongLit), positiveLong - negativeLong)
 
-    Seq("true", "false").foreach { checkOverflow =>
-      withSQLConf(SQLConf.ANSI_ENABLED.key -> checkOverflow) {
+    Seq("true", "false").foreach { failOnError =>
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> failOnError) {
         DataTypeTestUtils.numericAndInterval.foreach { tpe =>
-          checkConsistencyBetweenInterpretedAndCodegenAllowingException(Subtract, tpe, tpe)
+          checkConsistencyBetweenInterpretedAndCodegenAllowingException(Subtract(_, _), tpe, tpe)
         }
       }
     }
@@ -143,10 +147,10 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(Multiply(positiveIntLit, negativeIntLit), positiveInt * negativeInt)
     checkEvaluation(Multiply(positiveLongLit, negativeLongLit), positiveLong * negativeLong)
 
-    Seq("true", "false").foreach { checkOverflow =>
-      withSQLConf(SQLConf.ANSI_ENABLED.key -> checkOverflow) {
+    Seq("true", "false").foreach { failOnError =>
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> failOnError) {
         DataTypeTestUtils.numericTypeWithoutDecimal.foreach { tpe =>
-          checkConsistencyBetweenInterpretedAndCodegenAllowingException(Multiply, tpe, tpe)
+          checkConsistencyBetweenInterpretedAndCodegenAllowingException(Multiply(_, _), tpe, tpe)
         }
       }
     }
@@ -161,21 +165,45 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     testDecimalAndDoubleType { convert =>
       val left = Literal(convert(2))
       val right = Literal(convert(1))
-      val dataType = left.dataType
       checkEvaluation(Divide(left, right), convert(2))
-      checkEvaluation(Divide(Literal.create(null, dataType), right), null)
+      checkEvaluation(Divide(Literal.create(null, left.dataType), right), null)
       checkEvaluation(Divide(left, Literal.create(null, right.dataType)), null)
       checkEvaluation(Divide(left, Literal(convert(0))), null)  // divide by zero
     }
 
-    Seq(DoubleType, DecimalType.SYSTEM_DEFAULT).foreach { tpe =>
-      checkConsistencyBetweenInterpretedAndCodegen(Divide, tpe, tpe)
+    Seq("true", "false").foreach { failOnError =>
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> failOnError) {
+        Seq(DoubleType, DecimalType.SYSTEM_DEFAULT).foreach { tpe =>
+          checkConsistencyBetweenInterpretedAndCodegenAllowingException(Divide(_, _), tpe, tpe)
+        }
+      }
     }
   }
 
-  test("/ (Divide) for Long type") {
-    checkEvaluation(IntegralDivide(Literal(1.toLong), Literal(2.toLong)), 0L)
+  private def testDecimalAndLongType(testFunc: (Int => Any) => Unit): Unit = {
+    testFunc(_.toLong)
+    testFunc(Decimal(_))
+  }
+
+  test("/ (Divide) for Long and Decimal type") {
+    testDecimalAndLongType { convert =>
+      val left = Literal(convert(1))
+      val right = Literal(convert(2))
+      checkEvaluation(IntegralDivide(left, right), 0L)
+      checkEvaluation(IntegralDivide(Literal.create(null, left.dataType), right), null)
+      checkEvaluation(IntegralDivide(left, Literal.create(null, right.dataType)), null)
+      checkEvaluation(IntegralDivide(left, Literal(convert(0))), null)  // divide by zero
+    }
     checkEvaluation(IntegralDivide(positiveLongLit, negativeLongLit), 0L)
+
+    Seq("true", "false").foreach { failOnError =>
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> failOnError) {
+        Seq(LongType, DecimalType.SYSTEM_DEFAULT).foreach { tpe =>
+          checkConsistencyBetweenInterpretedAndCodegenAllowingException(
+            IntegralDivide(_, _), tpe, tpe)
+        }
+      }
+    }
   }
 
   test("% (Remainder)") {
@@ -194,8 +222,12 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(Remainder(positiveLongLit, positiveLongLit), 0L)
     checkEvaluation(Remainder(negativeLongLit, negativeLongLit), 0L)
 
-    DataTypeTestUtils.numericTypeWithoutDecimal.foreach { tpe =>
-      checkConsistencyBetweenInterpretedAndCodegen(Remainder, tpe, tpe)
+    Seq("true", "false").foreach { failOnError =>
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> failOnError) {
+        DataTypeTestUtils.numericTypeWithoutDecimal.foreach { tpe =>
+          checkConsistencyBetweenInterpretedAndCodegenAllowingException(Remainder(_, _), tpe, tpe)
+        }
+      }
     }
   }
 
@@ -248,12 +280,13 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(Pmod(positiveInt, negativeInt), positiveInt)
     checkEvaluation(Pmod(positiveLong, negativeLong), positiveLong)
 
-    // mod by 0
-    checkEvaluation(Pmod(Literal(-7), Literal(0)), null)
-    checkEvaluation(Pmod(Literal(7.2D), Literal(0D)), null)
-    checkEvaluation(Pmod(Literal(7.2F), Literal(0F)), null)
-    checkEvaluation(Pmod(Literal(2.toByte), Literal(0.toByte)), null)
-    checkEvaluation(Pmod(positiveShort, 0.toShort), null)
+    Seq("true", "false").foreach { failOnError =>
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> failOnError) {
+        DataTypeTestUtils.numericTypeWithoutDecimal.foreach { tpe =>
+          checkConsistencyBetweenInterpretedAndCodegenAllowingException(Pmod(_, _), tpe, tpe)
+        }
+      }
+    }
   }
 
   test("function least") {
@@ -408,18 +441,24 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
   test("SPARK-24598: overflow on long returns wrong result") {
     val maxLongLiteral = Literal(Long.MaxValue)
     val minLongLiteral = Literal(Long.MinValue)
-    val e1 = Add(maxLongLiteral, Literal(1L))
-    val e2 = Subtract(maxLongLiteral, Literal(-1L))
-    val e3 = Multiply(maxLongLiteral, Literal(2L))
-    val e4 = Add(minLongLiteral, minLongLiteral)
-    val e5 = Subtract(minLongLiteral, maxLongLiteral)
-    val e6 = Multiply(minLongLiteral, minLongLiteral)
     withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+      val e1 = Add(maxLongLiteral, Literal(1L))
+      val e2 = Subtract(maxLongLiteral, Literal(-1L))
+      val e3 = Multiply(maxLongLiteral, Literal(2L))
+      val e4 = Add(minLongLiteral, minLongLiteral)
+      val e5 = Subtract(minLongLiteral, maxLongLiteral)
+      val e6 = Multiply(minLongLiteral, minLongLiteral)
       Seq(e1, e2, e3, e4, e5, e6).foreach { e =>
         checkExceptionInExpression[ArithmeticException](e, "overflow")
       }
     }
     withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      val e1 = Add(maxLongLiteral, Literal(1L))
+      val e2 = Subtract(maxLongLiteral, Literal(-1L))
+      val e3 = Multiply(maxLongLiteral, Literal(2L))
+      val e4 = Add(minLongLiteral, minLongLiteral)
+      val e5 = Subtract(minLongLiteral, maxLongLiteral)
+      val e6 = Multiply(minLongLiteral, minLongLiteral)
       checkEvaluation(e1, Long.MinValue)
       checkEvaluation(e2, Long.MinValue)
       checkEvaluation(e3, -2L)
@@ -432,18 +471,24 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
   test("SPARK-24598: overflow on integer returns wrong result") {
     val maxIntLiteral = Literal(Int.MaxValue)
     val minIntLiteral = Literal(Int.MinValue)
-    val e1 = Add(maxIntLiteral, Literal(1))
-    val e2 = Subtract(maxIntLiteral, Literal(-1))
-    val e3 = Multiply(maxIntLiteral, Literal(2))
-    val e4 = Add(minIntLiteral, minIntLiteral)
-    val e5 = Subtract(minIntLiteral, maxIntLiteral)
-    val e6 = Multiply(minIntLiteral, minIntLiteral)
     withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+      val e1 = Add(maxIntLiteral, Literal(1))
+      val e2 = Subtract(maxIntLiteral, Literal(-1))
+      val e3 = Multiply(maxIntLiteral, Literal(2))
+      val e4 = Add(minIntLiteral, minIntLiteral)
+      val e5 = Subtract(minIntLiteral, maxIntLiteral)
+      val e6 = Multiply(minIntLiteral, minIntLiteral)
       Seq(e1, e2, e3, e4, e5, e6).foreach { e =>
         checkExceptionInExpression[ArithmeticException](e, "overflow")
       }
     }
     withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      val e1 = Add(maxIntLiteral, Literal(1))
+      val e2 = Subtract(maxIntLiteral, Literal(-1))
+      val e3 = Multiply(maxIntLiteral, Literal(2))
+      val e4 = Add(minIntLiteral, minIntLiteral)
+      val e5 = Subtract(minIntLiteral, maxIntLiteral)
+      val e6 = Multiply(minIntLiteral, minIntLiteral)
       checkEvaluation(e1, Int.MinValue)
       checkEvaluation(e2, Int.MinValue)
       checkEvaluation(e3, -2)
@@ -456,18 +501,24 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
   test("SPARK-24598: overflow on short returns wrong result") {
     val maxShortLiteral = Literal(Short.MaxValue)
     val minShortLiteral = Literal(Short.MinValue)
-    val e1 = Add(maxShortLiteral, Literal(1.toShort))
-    val e2 = Subtract(maxShortLiteral, Literal((-1).toShort))
-    val e3 = Multiply(maxShortLiteral, Literal(2.toShort))
-    val e4 = Add(minShortLiteral, minShortLiteral)
-    val e5 = Subtract(minShortLiteral, maxShortLiteral)
-    val e6 = Multiply(minShortLiteral, minShortLiteral)
     withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+      val e1 = Add(maxShortLiteral, Literal(1.toShort))
+      val e2 = Subtract(maxShortLiteral, Literal((-1).toShort))
+      val e3 = Multiply(maxShortLiteral, Literal(2.toShort))
+      val e4 = Add(minShortLiteral, minShortLiteral)
+      val e5 = Subtract(minShortLiteral, maxShortLiteral)
+      val e6 = Multiply(minShortLiteral, minShortLiteral)
       Seq(e1, e2, e3, e4, e5, e6).foreach { e =>
         checkExceptionInExpression[ArithmeticException](e, "overflow")
       }
     }
     withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      val e1 = Add(maxShortLiteral, Literal(1.toShort))
+      val e2 = Subtract(maxShortLiteral, Literal((-1).toShort))
+      val e3 = Multiply(maxShortLiteral, Literal(2.toShort))
+      val e4 = Add(minShortLiteral, minShortLiteral)
+      val e5 = Subtract(minShortLiteral, maxShortLiteral)
+      val e6 = Multiply(minShortLiteral, minShortLiteral)
       checkEvaluation(e1, Short.MinValue)
       checkEvaluation(e2, Short.MinValue)
       checkEvaluation(e3, (-2).toShort)
@@ -480,18 +531,24 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
   test("SPARK-24598: overflow on byte returns wrong result") {
     val maxByteLiteral = Literal(Byte.MaxValue)
     val minByteLiteral = Literal(Byte.MinValue)
-    val e1 = Add(maxByteLiteral, Literal(1.toByte))
-    val e2 = Subtract(maxByteLiteral, Literal((-1).toByte))
-    val e3 = Multiply(maxByteLiteral, Literal(2.toByte))
-    val e4 = Add(minByteLiteral, minByteLiteral)
-    val e5 = Subtract(minByteLiteral, maxByteLiteral)
-    val e6 = Multiply(minByteLiteral, minByteLiteral)
     withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+      val e1 = Add(maxByteLiteral, Literal(1.toByte))
+      val e2 = Subtract(maxByteLiteral, Literal((-1).toByte))
+      val e3 = Multiply(maxByteLiteral, Literal(2.toByte))
+      val e4 = Add(minByteLiteral, minByteLiteral)
+      val e5 = Subtract(minByteLiteral, maxByteLiteral)
+      val e6 = Multiply(minByteLiteral, minByteLiteral)
       Seq(e1, e2, e3, e4, e5, e6).foreach { e =>
         checkExceptionInExpression[ArithmeticException](e, "overflow")
       }
     }
     withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      val e1 = Add(maxByteLiteral, Literal(1.toByte))
+      val e2 = Subtract(maxByteLiteral, Literal((-1).toByte))
+      val e3 = Multiply(maxByteLiteral, Literal(2.toByte))
+      val e4 = Add(minByteLiteral, minByteLiteral)
+      val e5 = Subtract(minByteLiteral, maxByteLiteral)
+      val e6 = Multiply(minByteLiteral, minByteLiteral)
       checkEvaluation(e1, Byte.MinValue)
       checkEvaluation(e2, Byte.MinValue)
       checkEvaluation(e3, (-2).toByte)
@@ -500,4 +557,23 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
       checkEvaluation(e6, 0.toByte)
     }
   }
+
+  test("SPARK-33008: division by zero on divide-like operations returns incorrect result") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+      val operators: Seq[((Expression, Expression) => Expression, ((Int => Any) => Unit) => Unit)] =
+        Seq((Divide(_, _), testDecimalAndDoubleType),
+          (IntegralDivide(_, _), testDecimalAndLongType),
+          (Remainder(_, _), testNumericDataTypes),
+          (Pmod(_, _), testNumericDataTypes))
+      operators.foreach { case (operator, testTypesFn) =>
+        testTypesFn { convert =>
+          val one = Literal(convert(1))
+          val zero = Literal(convert(0))
+          checkEvaluation(operator(Literal.create(null, one.dataType), zero), null)
+          checkEvaluation(operator(one, Literal.create(null, zero.dataType)), null)
+          checkExceptionInExpression[ArithmeticException](operator(one, zero), "divide by zero")
+        }
+      }
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
index 60ab98eeb410a..842c8f3243f2a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
@@ -349,6 +349,21 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB
     }
   }
 
+  /**
+   * Test evaluation results between Interpreted mode and Codegen mode, making sure we have
+   * consistent result regardless of the evaluation method we use. If an exception is thrown,
+   * it checks that both modes throw the same exception.
+   *
+   * This method test against unary expressions by feeding them arbitrary literals of `dataType`.
+   */
+  def checkConsistencyBetweenInterpretedAndCodegenAllowingException(
+      c: Expression => Expression,
+      dataType: DataType): Unit = {
+    forAll (LiteralGenerator.randomGen(dataType)) { (l: Literal) =>
+      cmpInterpretWithCodegen(EmptyRow, c(l), true)
+    }
+  }
+
   /**
    * Test evaluation results between Interpreted mode and Codegen mode, making sure we have
    * consistent result regardless of the evaluation method we use.
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/case.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/case.sql
index 6d9c44c67a96b..b39ccb85fb366 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/case.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/case.sql
@@ -65,11 +65,11 @@ SELECT '7' AS `None`,
   CASE WHEN rand() < 0 THEN 1
   END AS `NULL on no matches`;
 
+-- [SPARK-33008] Spark SQL throws an exception
 -- Constant-expression folding shouldn't evaluate unreachable subexpressions
 SELECT CASE WHEN 1=0 THEN 1/0 WHEN 1=1 THEN 1 ELSE 2/0 END;
 SELECT CASE 1 WHEN 0 THEN 1/0 WHEN 1 THEN 1 ELSE 2/0 END;
 
--- [SPARK-27923] PostgreSQL throws an exception but Spark SQL is NULL
 -- However we do not currently suppress folding of potentially
 -- reachable subexpressions
 SELECT CASE WHEN i > 100 THEN 1/0 ELSE 0 END FROM case_tbl;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select_having.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select_having.sql
index 2edde8df08047..0efe0877e9b3e 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select_having.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select_having.sql
@@ -49,6 +49,7 @@ SELECT 1 AS one FROM test_having HAVING a > 1;
 SELECT 1 AS one FROM test_having HAVING 1 > 2;
 SELECT 1 AS one FROM test_having HAVING 1 < 2;
 
+-- [SPARK-33008] Spark SQL throws an exception
 -- and just to prove that we aren't scanning the table:
 SELECT 1 AS one FROM test_having WHERE 1/a = 1 HAVING 1 < 2;
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-case.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-case.sql
index 8fa3c0a6dfec9..5322c1b502439 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-case.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-case.sql
@@ -67,11 +67,11 @@ SELECT '7' AS `None`,
   CASE WHEN rand() < udf(0) THEN 1
   END AS `NULL on no matches`;
 
+-- [SPARK-33008] Spark SQL throws an exception
 -- Constant-expression folding shouldn't evaluate unreachable subexpressions
 SELECT CASE WHEN udf(1=0) THEN 1/0 WHEN 1=1 THEN 1 ELSE 2/0 END;
 SELECT CASE 1 WHEN 0 THEN 1/udf(0) WHEN 1 THEN 1 ELSE 2/0 END;
 
--- [SPARK-27923] PostgreSQL throws an exception but Spark SQL is NULL
 -- However we do not currently suppress folding of potentially
 -- reachable subexpressions
 SELECT CASE WHEN i > 100 THEN udf(1/0) ELSE udf(0) END FROM case_tbl;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-select_having.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-select_having.sql
index 412d45b49a184..76c0b198aa439 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-select_having.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-select_having.sql
@@ -51,6 +51,7 @@ SELECT 1 AS one FROM test_having HAVING udf(a) > 1;
 SELECT 1 AS one FROM test_having HAVING udf(udf(1) > udf(2));
 SELECT 1 AS one FROM test_having HAVING udf(udf(1) < udf(2));
 
+-- [SPARK-33008] Spark SQL throws an exception
 -- and just to prove that we aren't scanning the table:
 SELECT 1 AS one FROM test_having WHERE 1/udf(a) = 1 HAVING 1 < 2;
 
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/case.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/case.sql.out
index 1b002c3f48ae2..0006768dbcb0f 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/case.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/case.sql.out
@@ -176,28 +176,28 @@ struct<None:string,NULL on no matches:int>
 -- !query
 SELECT CASE WHEN 1=0 THEN 1/0 WHEN 1=1 THEN 1 ELSE 2/0 END
 -- !query schema
-struct<CASE WHEN (1 = 0) THEN (CAST(1 AS DOUBLE) / CAST(0 AS DOUBLE)) WHEN (1 = 1) THEN CAST(1 AS DOUBLE) ELSE (CAST(2 AS DOUBLE) / CAST(0 AS DOUBLE)) END:double>
+struct<>
 -- !query output
-1.0
+java.lang.ArithmeticException
+divide by zero
 
 
 -- !query
 SELECT CASE 1 WHEN 0 THEN 1/0 WHEN 1 THEN 1 ELSE 2/0 END
 -- !query schema
-struct<CASE WHEN (1 = 0) THEN (CAST(1 AS DOUBLE) / CAST(0 AS DOUBLE)) WHEN (1 = 1) THEN CAST(1 AS DOUBLE) ELSE (CAST(2 AS DOUBLE) / CAST(0 AS DOUBLE)) END:double>
+struct<>
 -- !query output
-1.0
+java.lang.ArithmeticException
+divide by zero
 
 
 -- !query
 SELECT CASE WHEN i > 100 THEN 1/0 ELSE 0 END FROM case_tbl
 -- !query schema
-struct<CASE WHEN (i > 100) THEN (CAST(1 AS DOUBLE) / CAST(0 AS DOUBLE)) ELSE CAST(0 AS DOUBLE) END:double>
+struct<>
 -- !query output
-0.0
-0.0
-0.0
-0.0
+java.lang.ArithmeticException
+divide by zero
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out
index 18b0c821ae70f..6f98e2f9eeee7 100755
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out
@@ -569,25 +569,28 @@ struct<max(q1):bigint,max(q2):bigint>
 -- !query
 select bigint('9223372036854775800') / bigint('0')
 -- !query schema
-struct<(CAST(CAST(9223372036854775800 AS BIGINT) AS DOUBLE) / CAST(CAST(0 AS BIGINT) AS DOUBLE)):double>
+struct<>
 -- !query output
-NULL
+java.lang.ArithmeticException
+divide by zero
 
 
 -- !query
 select bigint('-9223372036854775808') / smallint('0')
 -- !query schema
-struct<(CAST(CAST(-9223372036854775808 AS BIGINT) AS DOUBLE) / CAST(CAST(0 AS SMALLINT) AS DOUBLE)):double>
+struct<>
 -- !query output
-NULL
+java.lang.ArithmeticException
+divide by zero
 
 
 -- !query
 select smallint('100') / bigint('0')
 -- !query schema
-struct<(CAST(CAST(100 AS SMALLINT) AS DOUBLE) / CAST(CAST(0 AS BIGINT) AS DOUBLE)):double>
+struct<>
 -- !query output
-NULL
+java.lang.ArithmeticException
+divide by zero
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out
index d97853d5fc6d0..fc2961a072e9f 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out
@@ -4673,7 +4673,7 @@ struct<(CAST(CAST(999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) div
 -- !query
 select mod(cast(999999999999999999999 as decimal(38, 0)),1000000000000000000000)
 -- !query schema
-struct<(CAST(CAST(999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) % CAST(1000000000000000000000 AS DECIMAL(38,0))):decimal(22,0)>
+struct<mod(CAST(CAST(999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)), CAST(1000000000000000000000 AS DECIMAL(38,0))):decimal(22,0)>
 -- !query output
 999999999999999999999
 
@@ -4689,7 +4689,7 @@ struct<(CAST(CAST(-9999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) di
 -- !query
 select mod(cast(-9999999999999999999999 as decimal(38, 0)),1000000000000000000000)
 -- !query schema
-struct<(CAST(CAST(-9999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) % CAST(1000000000000000000000 AS DECIMAL(38,0))):decimal(22,0)>
+struct<mod(CAST(CAST(-9999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)), CAST(1000000000000000000000 AS DECIMAL(38,0))):decimal(22,0)>
 -- !query output
 -999999999999999999999
 
@@ -4697,7 +4697,7 @@ struct<(CAST(CAST(-9999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) %
 -- !query
 select div(cast(-9999999999999999999999 as decimal(38, 0)),1000000000000000000000)*1000000000000000000000 + mod(cast(-9999999999999999999999 as decimal(38, 0)),1000000000000000000000)
 -- !query schema
-struct<(CAST((CAST(CAST((CAST(CAST(-9999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) div CAST(1000000000000000000000 AS DECIMAL(38,0))) AS DECIMAL(20,0)) AS DECIMAL(22,0)) * CAST(1000000000000000000000 AS DECIMAL(22,0))) AS DECIMAL(38,0)) + CAST((CAST(CAST(-9999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) % CAST(1000000000000000000000 AS DECIMAL(38,0))) AS DECIMAL(38,0))):decimal(38,0)>
+struct<(CAST((CAST(CAST((CAST(CAST(-9999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)) div CAST(1000000000000000000000 AS DECIMAL(38,0))) AS DECIMAL(20,0)) AS DECIMAL(22,0)) * CAST(1000000000000000000000 AS DECIMAL(22,0))) AS DECIMAL(38,0)) + CAST(mod(CAST(CAST(-9999999999999999999999 AS DECIMAL(38,0)) AS DECIMAL(38,0)), CAST(1000000000000000000000 AS DECIMAL(38,0))) AS DECIMAL(38,0))):decimal(38,0)>
 -- !query output
 -9999999999999999999999
 
@@ -4705,7 +4705,7 @@ struct<(CAST((CAST(CAST((CAST(CAST(-9999999999999999999999 AS DECIMAL(38,0)) AS
 -- !query
 select mod (70.0,70)
 -- !query schema
-struct<(CAST(70.0 AS DECIMAL(3,1)) % CAST(CAST(70 AS DECIMAL(2,0)) AS DECIMAL(3,1))):decimal(3,1)>
+struct<mod(CAST(70.0 AS DECIMAL(3,1)), CAST(CAST(70 AS DECIMAL(2,0)) AS DECIMAL(3,1))):decimal(3,1)>
 -- !query output
 0.0
 
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out
index d8d33d92a7cc4..e4b7f3b1f5e88 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out
@@ -174,9 +174,10 @@ struct<one:int>
 -- !query
 SELECT 1 AS one FROM test_having WHERE 1/a = 1 HAVING 1 < 2
 -- !query schema
-struct<one:int>
+struct<>
 -- !query output
-1
+java.lang.ArithmeticException
+divide by zero
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-case.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-case.sql.out
index 6c733e916d734..2f31d2684ca22 100755
--- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-case.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-case.sql.out
@@ -176,28 +176,28 @@ struct<None:string,NULL on no matches:int>
 -- !query
 SELECT CASE WHEN udf(1=0) THEN 1/0 WHEN 1=1 THEN 1 ELSE 2/0 END
 -- !query schema
-struct<CASE WHEN CAST(udf(ansi_cast((1 = 0) as string)) AS BOOLEAN) THEN (CAST(1 AS DOUBLE) / CAST(0 AS DOUBLE)) WHEN (1 = 1) THEN CAST(1 AS DOUBLE) ELSE (CAST(2 AS DOUBLE) / CAST(0 AS DOUBLE)) END:double>
+struct<>
 -- !query output
-1.0
+java.lang.ArithmeticException
+divide by zero
 
 
 -- !query
 SELECT CASE 1 WHEN 0 THEN 1/udf(0) WHEN 1 THEN 1 ELSE 2/0 END
 -- !query schema
-struct<CASE WHEN (1 = 0) THEN (CAST(1 AS DOUBLE) / CAST(CAST(udf(ansi_cast(0 as string)) AS INT) AS DOUBLE)) WHEN (1 = 1) THEN CAST(1 AS DOUBLE) ELSE (CAST(2 AS DOUBLE) / CAST(0 AS DOUBLE)) END:double>
+struct<>
 -- !query output
-1.0
+java.lang.ArithmeticException
+divide by zero
 
 
 -- !query
 SELECT CASE WHEN i > 100 THEN udf(1/0) ELSE udf(0) END FROM case_tbl
 -- !query schema
-struct<CASE WHEN (i > 100) THEN CAST(udf(ansi_cast((ansi_cast(1 as double) / ansi_cast(0 as double)) as string)) AS DOUBLE) ELSE CAST(CAST(udf(ansi_cast(0 as string)) AS INT) AS DOUBLE) END:double>
+struct<>
 -- !query output
-0.0
-0.0
-0.0
-0.0
+java.lang.ArithmeticException
+divide by zero
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out
index 50b6e60086747..89fc36a0da827 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out
@@ -174,9 +174,10 @@ struct<one:int>
 -- !query
 SELECT 1 AS one FROM test_having WHERE 1/udf(a) = 1 HAVING 1 < 2
 -- !query schema
-struct<one:int>
+struct<>
 -- !query output
-1
+java.lang.ArithmeticException
+divide by zero
 
 
 -- !query
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
index ebe4e8dea97e3..cc88f9ad3da40 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
@@ -643,8 +643,11 @@ class ColumnarProjectExec(projectList: Seq[NamedExpression], child: SparkPlan)
  * A version of add that supports columnar processing for longs.  This version is broken
  * on purpose so it adds the numbers plus 1 so that the tests can show that it was replaced.
  */
-class BrokenColumnarAdd(left: ColumnarExpression, right: ColumnarExpression)
-  extends Add(left, right) with ColumnarExpression {
+class BrokenColumnarAdd(
+    left: ColumnarExpression,
+    right: ColumnarExpression,
+    failOnError: Boolean = false)
+  extends Add(left, right, failOnError) with ColumnarExpression {
 
   override def supportsColumnar(): Boolean = left.supportsColumnar && right.supportsColumnar
 

From 838791bf0b8290143001fe8f94b1fbbd53a181d2 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Thu, 29 Oct 2020 19:10:01 -0700
Subject: [PATCH 0357/1009] [SPARK-33292][SQL] Make Literal ArrayBasedMapData
 string representation disambiguous

### What changes were proposed in this pull request?

This PR aims to wrap `ArrayBasedMapData` literal representation with `map(...)`.

### Why are the changes needed?

Literal ArrayBasedMapData has inconsistent string representation from `LogicalPlan` to `Optimized Logical Plan/Physical Plan`. Also, the representation at `Optimized Logical Plan` and `Physical Plan` is ambiguous like `[1 AS a#0, keys: [key1], values: [value1] AS b#1]`.

**BEFORE**
```scala
scala> spark.version
res0: String = 2.4.7

scala> sql("SELECT 1 a, map('key1', 'value1') b").explain(true)
== Parsed Logical Plan ==
'Project [1 AS a#0, 'map(key1, value1) AS b#1]
+- OneRowRelation

== Analyzed Logical Plan ==
a: int, b: map<string,string>
Project [1 AS a#0, map(key1, value1) AS b#1]
+- OneRowRelation

== Optimized Logical Plan ==
Project [1 AS a#0, keys: [key1], values: [value1] AS b#1]
+- OneRowRelation

== Physical Plan ==
*(1) Project [1 AS a#0, keys: [key1], values: [value1] AS b#1]
+- Scan OneRowRelation[]
```

**AFTER**
```scala
scala> spark.version
res0: String = 3.1.0-SNAPSHOT

scala> sql("SELECT 1 a, map('key1', 'value1') b").explain(true)
== Parsed Logical Plan ==
'Project [1 AS a#4, 'map(key1, value1) AS b#5]
+- OneRowRelation

== Analyzed Logical Plan ==
a: int, b: map<string,string>
Project [1 AS a#4, map(key1, value1) AS b#5]
+- OneRowRelation

== Optimized Logical Plan ==
Project [1 AS a#4, map(keys: [key1], values: [value1]) AS b#5]
+- OneRowRelation

== Physical Plan ==
*(1) Project [1 AS a#4, map(keys: [key1], values: [value1]) AS b#5]
+- *(1) Scan OneRowRelation[]
```

### Does this PR introduce _any_ user-facing change?

Yes. This changes the query plan's string representation in `explain` command and UI. However, this is a bug fix.

### How was this patch tested?

Pass the CI with the newly added test case.

Closes #30190 from dongjoon-hyun/SPARK-33292.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/sql/catalyst/expressions/literals.scala     | 1 +
 .../spark/sql/catalyst/expressions/LiteralExpressionSuite.scala  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index 213a58a3244e2..9e96ab8a9b6ca 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -297,6 +297,7 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression {
   override def toString: String = value match {
     case null => "null"
     case binary: Array[Byte] => s"0x" + DatatypeConverter.printHexBinary(binary)
+    case d: ArrayBasedMapData => s"map(${d.toString})"
     case other => other.toString
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
index 4714635a3370b..bb86135021b91 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
@@ -209,6 +209,7 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
     checkMapLiteral(Map("a" -> 1, "b" -> 2, "c" -> 3))
     checkMapLiteral(Map("1" -> 1.0, "2" -> 2.0, "3" -> 3.0))
+    assert(Literal.create(Map("a" -> 1)).toString === "map(keys: [a], values: [1])")
   }
 
   test("struct") {

From 343e0bb3adae465547e1423ea79f07d0e79adee7 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Fri, 30 Oct 2020 11:18:47 +0900
Subject: [PATCH 0358/1009] [SPARK-33286][SQL] Improve the error message about
 schema parsing by `from_json/from_csv`

# What changes were proposed in this pull request?
In the PR, I propose to improve the error message from `from_json`/`from_csv` by combining errors from all schema parsers:
- DataType.fromJson (except CSV)
- CatalystSqlParser.parseDataType
- CatalystSqlParser.parseTableSchema

Before the changes, `from_json` does not show error messages from the first parser in the chain that could mislead users.

### Why are the changes needed?
Currently, `from_json` outputs the error message from the fallback schema parser which can confuse end-users. For example:

```scala
    val invalidJsonSchema = """{"fields": [{"a":123}], "type": "struct"}"""
    df.select(from_json($"json", invalidJsonSchema, Map.empty[String, String])).show()
```
The JSON schema has an issue in `{"a":123}` but the error message doesn't point it out:
```
mismatched input '{' expecting {'ADD', 'AFTER', ...}(line 1, pos 0)

== SQL ==
{"fields": [{"a":123}], "type": "struct"}
^^^

org.apache.spark.sql.catalyst.parser.ParseException:
mismatched input '{' expecting {'ADD', 'AFTER',  ... }(line 1, pos 0)

== SQL ==
{"fields": [{"a":123}], "type": "struct"}
^^^
```

### Does this PR introduce _any_ user-facing change?
Yes, after the changes for the example above:
```
Cannot parse the schema in JSON format: Failed to convert the JSON string '{"a":123}' to a field.
Failed fallback parsing: Cannot parse the data type:
mismatched input '{' expecting {'ADD', 'AFTER', ...}(line 1, pos 0)

== SQL ==
{"fields": [{"a":123}], "type": "struct"}
^^^

Failed fallback parsing:
mismatched input '{' expecting {'ADD', 'AFTER', ...}(line 1, pos 0)

== SQL ==
{"fields": [{"a":123}], "type": "struct"}
^^^
```

### How was this patch tested?
- By existing tests suites like `JsonFunctionsSuite` and `JsonExpressionsSuite`.
- Add new test to `JsonFunctionsSuite`.
- Re-gen results for `json-functions.sql`.

Closes #30183 from MaxGekk/fromDDL-error-msg.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../org/apache/spark/sql/types/DataType.scala | 38 +++++++++++++++++--
 .../org/apache/spark/sql/functions.scala      | 12 +++---
 .../sql-tests/results/csv-functions.sql.out   |  9 ++++-
 .../sql-tests/results/json-functions.sql.out  |  9 ++++-
 .../apache/spark/sql/JsonFunctionsSuite.scala | 21 ++++++++++
 5 files changed, 78 insertions(+), 11 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
index 3f70b7647f195..043c88f88843c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
@@ -28,9 +28,10 @@ import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.Resolver
 import org.apache.spark.sql.catalyst.expressions.{Cast, Expression}
-import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
 import org.apache.spark.sql.catalyst.util.DataTypeJsonUtils.{DataTypeJsonDeserializer, DataTypeJsonSerializer}
 import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat
 import org.apache.spark.sql.internal.SQLConf
@@ -125,10 +126,41 @@ object DataType {
   private val FIXED_DECIMAL = """decimal\(\s*(\d+)\s*,\s*(\-?\d+)\s*\)""".r
 
   def fromDDL(ddl: String): DataType = {
+    parseTypeWithFallback(
+      ddl,
+      CatalystSqlParser.parseDataType,
+      "Cannot parse the data type: ",
+      fallbackParser = CatalystSqlParser.parseTableSchema)
+  }
+
+  /**
+   * Parses data type from a string with schema. It calls `parser` for `schema`.
+   * If it fails, calls `fallbackParser`. If the fallback function fails too, combines error message
+   * from `parser` and `fallbackParser`.
+   *
+   * @param schema The schema string to parse by `parser` or `fallbackParser`.
+   * @param parser The function that should be invoke firstly.
+   * @param errorMsg The error message for `parser`.
+   * @param fallbackParser The function that is called when `parser` fails.
+   * @return The data type parsed from the `schema` schema.
+   */
+  def parseTypeWithFallback(
+      schema: String,
+      parser: String => DataType,
+      errorMsg: String,
+      fallbackParser: String => DataType): DataType = {
     try {
-      CatalystSqlParser.parseDataType(ddl)
+      parser(schema)
     } catch {
-      case NonFatal(_) => CatalystSqlParser.parseTableSchema(ddl)
+      case NonFatal(e1) =>
+        try {
+          fallbackParser(schema)
+        } catch {
+          case NonFatal(e2) =>
+            throw new AnalysisException(
+              message = s"$errorMsg${e1.getMessage}\nFailed fallback parsing: ${e2.getMessage}",
+              cause = Some(e1.getCause))
+        }
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 21e22d90f0f80..ffa97c20c397c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -21,7 +21,6 @@ import scala.collection.JavaConverters._
 import scala.language.implicitConversions
 import scala.reflect.runtime.universe.{typeTag, TypeTag}
 import scala.util.Try
-import scala.util.control.NonFatal
 
 import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.api.java._
@@ -36,6 +35,7 @@ import org.apache.spark.sql.execution.SparkSqlParser
 import org.apache.spark.sql.expressions.{Aggregator, SparkUserDefinedFunction, UserDefinedAggregator, UserDefinedFunction}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.types.DataType.parseTypeWithFallback
 import org.apache.spark.util.Utils
 
 /**
@@ -4101,11 +4101,11 @@ object functions {
    * @since 2.3.0
    */
   def from_json(e: Column, schema: String, options: Map[String, String]): Column = {
-    val dataType = try {
-      DataType.fromJson(schema)
-    } catch {
-      case NonFatal(_) => DataType.fromDDL(schema)
-    }
+    val dataType = parseTypeWithFallback(
+      schema,
+      DataType.fromJson,
+      "Cannot parse the schema in JSON format: ",
+      fallbackParser = DataType.fromDDL)
     from_json(e, dataType, options)
   }
 
diff --git a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out
index 7ba3f712363fe..ed2341f71a1b0 100644
--- a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out
@@ -33,13 +33,20 @@ select from_csv('1', 'a InvalidType')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
+Cannot parse the data type: 
+extraneous input 'InvalidType' expecting <EOF>(line 1, pos 2)
 
+== SQL ==
+a InvalidType
+--^^^
+
+Failed fallback parsing: 
 DataType invalidtype is not supported.(line 1, pos 2)
 
 == SQL ==
 a InvalidType
 --^^^
-; line 1 pos 7
+;; line 1 pos 7
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
index 3cc45890cf089..838e4607d0324 100644
--- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
@@ -124,13 +124,20 @@ select from_json('{"a":1}', 'a InvalidType')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
+Cannot parse the data type: 
+extraneous input 'InvalidType' expecting <EOF>(line 1, pos 2)
 
+== SQL ==
+a InvalidType
+--^^^
+
+Failed fallback parsing: 
 DataType invalidtype is not supported.(line 1, pos 2)
 
 == SQL ==
 a InvalidType
 --^^^
-; line 1 pos 7
+;; line 1 pos 7
 
 
 -- !query
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index e2a9cf536d154..2e515ee92bceb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -754,4 +754,25 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
     val expected = new StructType().add("parsed", new StructType().add("a b", LongType))
     assert(out.schema == expected)
   }
+
+  test("SPARK-33286: from_json - combined error messages") {
+    val df = Seq("""{"a":1}""").toDF("json")
+    val invalidJsonSchema = """{"fields": [{"a":123}], "type": "struct"}"""
+    val errMsg1 = intercept[AnalysisException] {
+      df.select(from_json($"json", invalidJsonSchema, Map.empty[String, String])).collect()
+    }.getMessage
+    assert(errMsg1.contains("""Failed to convert the JSON string '{"a":123}' to a field"""))
+
+    val invalidDataType = "MAP<INT, cow>"
+    val errMsg2 = intercept[AnalysisException] {
+      df.select(from_json($"json", invalidDataType, Map.empty[String, String])).collect()
+    }.getMessage
+    assert(errMsg2.contains("DataType cow is not supported"))
+
+    val invalidTableSchema = "x INT, a cow"
+    val errMsg3 = intercept[AnalysisException] {
+      df.select(from_json($"json", invalidTableSchema, Map.empty[String, String])).collect()
+    }.getMessage
+    assert(errMsg3.contains("DataType cow is not supported"))
+  }
 }

From 0c943cd2fbc6f2d25588991613abf469ace0153e Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Fri, 30 Oct 2020 14:11:25 +0900
Subject: [PATCH 0359/1009] [SPARK-33248][SQL] Add a configuration to control
 the legacy behavior of whether need to pad null value when value size less
 then schema size

### What changes were proposed in this pull request?
Add a configuration to control the legacy behavior of whether need to pad null value when value size less then schema size.
Since we can't decide whether it's a but and some use need it behavior same as Hive.

### Why are the changes needed?
Provides a compatible choice between historical behavior and Hive

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existed UT

Closes #30156 from AngersZhuuuu/SPARK-33284.

Lead-authored-by: angerszhu <angers.zhu@gmail.com>
Co-authored-by: AngersZhuuuu <angers.zhu@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/sql-migration-guide.md                       |  2 ++
 .../org/apache/spark/sql/internal/SQLConf.scala   | 15 +++++++++++++++
 .../execution/BaseScriptTransformationExec.scala  | 10 ++++++++--
 3 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index fdc764a93424b..319e72172d597 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -51,6 +51,8 @@ license: |
   - In Spark 3.1, loading and saving of timestamps from/to parquet files fails if the timestamps are before 1900-01-01 00:00:00Z, and loaded (saved) as the INT96 type. In Spark 3.0, the actions don't fail but might lead to shifting of the input timestamps due to rebasing from/to Julian to/from Proleptic Gregorian calendar. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.parquet.int96RebaseModeInRead` or/and `spark.sql.legacy.parquet.int96RebaseModeInWrite` to `LEGACY`.
   
   - In Spark 3.1, the `schema_of_json` and `schema_of_csv` functions return the schema in the SQL format in which field names are quoted. In Spark 3.0, the function returns a catalog string without field quoting and in lower case. 
+ 
+  - In Spark 3.1, when `spark.sql.legacy.transformationPadNullWhenValueLessThenSchema` is true, Spark will pad NULL value when script transformation's output value size less then schema size in default-serde mode(script transformation with row format of `ROW FORMAT DELIMITED`). If false, Spark will keep original behavior to throw `ArrayIndexOutOfBoundsException`.
 
 ## Upgrading from Spark SQL 3.0 to 3.0.1
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 21357a492e39e..8825f4f96378d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2765,6 +2765,18 @@ object SQLConf {
       .checkValue(_ > 0, "The timeout value must be positive")
       .createWithDefault(10L)
 
+  val LEGACY_SCRIPT_TRANSFORM_PAD_NULL =
+    buildConf("spark.sql.legacy.transformationPadNullWhenValueLessThenSchema")
+      .internal()
+      .doc("Whether pad null value when transformation output's value size less then " +
+        "schema size in default-serde mode(script transformation with row format of " +
+        "`ROW FORMAT DELIMITED`)." +
+        "When true, Spark will pad NULL value to keep same behavior with hive." +
+        "When false, Spark keep original behavior to throw `ArrayIndexOutOfBoundsException`")
+      .version("3.1.0")
+      .booleanConf
+      .createWithDefault(true)
+
   val LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP =
     buildConf("spark.sql.legacy.allowCastNumericToTimestamp")
       .internal()
@@ -3493,6 +3505,9 @@ class SQLConf extends Serializable with Logging {
   def legacyAllowModifyActiveSession: Boolean =
     getConf(StaticSQLConf.LEGACY_ALLOW_MODIFY_ACTIVE_SESSION)
 
+  def legacyPadNullWhenValueLessThenSchema: Boolean =
+    getConf(SQLConf.LEGACY_SCRIPT_TRANSFORM_PAD_NULL)
+
   def legacyAllowCastNumericToTimestamp: Boolean =
     getConf(SQLConf.LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
index 74e5aa716ad67..f2cddc7ba7290 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
@@ -104,10 +104,16 @@ trait BaseScriptTransformationExec extends UnaryExecNode {
       val reader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8))
 
       val outputRowFormat = ioschema.outputRowFormatMap("TOK_TABLEROWFORMATFIELD")
+
+      val padNull = if (conf.legacyPadNullWhenValueLessThenSchema) {
+        (arr: Array[String], size: Int) => arr.padTo(size, null)
+      } else {
+        (arr: Array[String], size: Int) => arr
+      }
       val processRowWithoutSerde = if (!ioschema.schemaLess) {
         prevLine: String =>
           new GenericInternalRow(
-            prevLine.split(outputRowFormat).padTo(outputFieldWriters.size, null)
+            padNull(prevLine.split(outputRowFormat), outputFieldWriters.size)
               .zip(outputFieldWriters)
               .map { case (data, writer) => writer(data) })
       } else {
@@ -118,7 +124,7 @@ trait BaseScriptTransformationExec extends UnaryExecNode {
         val kvWriter = CatalystTypeConverters.createToCatalystConverter(StringType)
         prevLine: String =>
           new GenericInternalRow(
-            prevLine.split(outputRowFormat).slice(0, 2).padTo(2, null)
+            padNull(prevLine.split(outputRowFormat).slice(0, 2), 2)
               .map(kvWriter))
       }
 

From d59f6a709586ff0d1bfbfda50c4e4cf17d5a50ff Mon Sep 17 00:00:00 2001
From: ulysses <youxiduo@weidian.com>
Date: Fri, 30 Oct 2020 08:18:10 +0000
Subject: [PATCH 0360/1009] [SPARK-33294][SQL] Add query resolved check before
 analyze InsertIntoDir

### What changes were proposed in this pull request?

Add `query.resolved` before analyze `InsertIntoDir`.

### Why are the changes needed?

For better error msg.
```
INSERT OVERWRITE DIRECTORY '/tmp/file' USING PARQUET
SELECT * FROM (
 SELECT c3 FROM (
  SELECT c1, c2 from values(1,2) t(c1, c2)
  )
)
```
 Before this PR, we get such error msg
```
org.apache.spark.sql.catalyst.analysis.UnresolvedException: Invalid call to toAttribute on unresolved object, tree: *
  at org.apache.spark.sql.catalyst.analysis.Star.toAttribute(unresolved.scala:244)
  at org.apache.spark.sql.catalyst.plans.logical.Project$$anonfun$output$1.apply(basicLogicalOperators.scala:52)
  at org.apache.spark.sql.catalyst.plans.logical.Project$$anonfun$output$1.apply(basicLogicalOperators.scala:52)
  at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  at scala.collection.immutable.List.foreach(List.scala:392)
```

### Does this PR introduce _any_ user-facing change?

Yes, error msg changed.

### How was this patch tested?

New test.

Closes #30197 from ulysses-you/SPARK-33294.

Authored-by: ulysses <youxiduo@weidian.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../datasources/DataSourceStrategy.scala        |  4 ++--
 .../apache/spark/sql/sources/InsertSuite.scala  | 17 +++++++++++++++++
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 02dd4e549f93b..b1600a639a9bf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -163,8 +163,8 @@ object DataSourceAnalysis extends Rule[LogicalPlan] with CastSupport {
       InsertIntoDataSourceCommand(l, query, overwrite)
 
     case InsertIntoDir(_, storage, provider, query, overwrite)
-      if provider.isDefined && provider.get.toLowerCase(Locale.ROOT) != DDLUtils.HIVE_PROVIDER =>
-
+      if query.resolved && provider.isDefined &&
+        provider.get.toLowerCase(Locale.ROOT) != DDLUtils.HIVE_PROVIDER =>
       val outputPath = new Path(storage.locationUri.get)
       if (overwrite) DDLUtils.verifyNotReadPath(query, outputPath)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index 9b5466e8a68f1..4686a0c69de63 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -896,6 +896,23 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
       sql("INSERT INTO TABLE insertTable PARTITION(part1='1', part2) SELECT 1 ,'' AS part2")
     }
   }
+
+  test("SPARK-33294: Add query resolved check before analyze InsertIntoDir") {
+    withTempPath { path =>
+      val msg = intercept[AnalysisException] {
+        sql(
+          s"""
+            |INSERT OVERWRITE DIRECTORY '${path.getAbsolutePath}' USING PARQUET
+            |SELECT * FROM (
+            | SELECT c3 FROM (
+            |  SELECT c1, c2 from values(1,2) t(c1, c2)
+            |  )
+            |)
+          """.stripMargin)
+      }.getMessage
+      assert(msg.contains("cannot resolve '`c3`' given input columns"))
+    }
+  }
 }
 
 class FileExistingTestFileSystem extends RawLocalFileSystem {

From 3af1651e50be3bc2e441be8827441f87d34d99cc Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Fri, 30 Oct 2020 17:53:30 +0900
Subject: [PATCH 0361/1009] [SPARK-33297][BUILD] Switch to use flat class
 loader strategy in SBT

### What changes were proposed in this pull request?

This PR proposes to switch the class loader strategy from `ScalaLibrary` to `Flat` (see https://www.scala-sbt.org/1.x/docs/In-Process-Classloaders.html):

https://github.com/apache/spark/runs/1314691686

```
Error:  java.util.MissingResourceException: Can't find bundle for base name org.scalactic.ScalacticBundle, locale en
Error:  	at java.util.ResourceBundle.throwMissingResourceException(ResourceBundle.java:1581)
Error:  	at java.util.ResourceBundle.getBundleImpl(ResourceBundle.java:1396)
Error:  	at java.util.ResourceBundle.getBundle(ResourceBundle.java:782)
Error:  	at org.scalactic.Resources$.resourceBundle$lzycompute(Resources.scala:8)
Error:  	at org.scalactic.Resources$.resourceBundle(Resources.scala:8)
Error:  	at org.scalactic.Resources$.pleaseDefineScalacticFillFilePathnameEnvVar(Resources.scala:256)
Error:  	at org.scalactic.source.PositionMacro$PositionMacroImpl.apply(PositionMacro.scala:65)
Error:  	at org.scalactic.source.PositionMacro$.genPosition(PositionMacro.scala:85)
Error:  	at sun.reflect.GeneratedMethodAccessor34.invoke(Unknown Source)
Error:  	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
Error:  	at java.lang.reflect.Method.invoke(Method.java:498)
```

See also https://github.com/sbt/sbt/issues/5736

### Why are the changes needed?

To make the build unflaky.

### Does this PR introduce _any_ user-facing change?

No, dev-only.

### How was this patch tested?

GitHub Actions build in this test.

Closes #30198 from HyukjinKwon/SPARK-33297.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 project/SparkBuild.scala | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 5f2ef480f8de5..55c87fcb3aaa2 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -322,7 +322,11 @@ object SparkBuild extends PomBuild {
 
     // disable Mima check for all modules,
     // to be enabled in specific ones that have previous artifacts
-    MimaKeys.mimaFailOnNoPrevious := false
+    MimaKeys.mimaFailOnNoPrevious := false,
+
+    // To prevent intermittent compliation failures, see also SPARK-33297
+    // Apparently we can remove this when we use JDK 11.
+    Test / classLoaderLayeringStrategy := ClassLoaderLayeringStrategy.Flat
   )
 
   def enable(settings: Seq[Setting[_]])(projectRef: ProjectRef) = {

From 7c897c1216dd23e4a973bd82063a88ea9a6f7ca5 Mon Sep 17 00:00:00 2001
From: Dmitry Sabanin <sdmitry@gmail.com>
Date: Fri, 30 Oct 2020 11:14:42 -0700
Subject: [PATCH 0362/1009] [MINOR][CORE][DOCS] Fix typo in
 "spark.storage.decommission.shuffleBlocks.enabled" description

### What changes were proposed in this pull request?
Small typo fix in the description of `spark.storage.decommission.shuffleBlocks.enabled` property.

Closes #30208 from dsabanin/patch-1.

Authored-by: Dmitry Sabanin <sdmitry@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../main/scala/org/apache/spark/internal/config/package.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 9a7039a9cfe93..491395c3cbcde 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -423,7 +423,7 @@ package object config {
   private[spark] val STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED =
     ConfigBuilder("spark.storage.decommission.shuffleBlocks.enabled")
       .doc("Whether to transfer shuffle blocks during block manager decommissioning. Requires " +
-        "a migratable shuffle resolver (like sort based shuffe)")
+        "a migratable shuffle resolver (like sort based shuffle)")
       .version("3.1.0")
       .booleanConf
       .createWithDefault(false)

From 491a0fb08b0c57a99894a0b33c5814854db8de3d Mon Sep 17 00:00:00 2001
From: Holden Karau <hkarau@apple.com>
Date: Fri, 30 Oct 2020 11:26:30 -0700
Subject: [PATCH 0363/1009] [SPARK-33262][K8S][FOLLOWUP] Verify pod allocation
 does not stall

### What changes were proposed in this pull request?

Add a test that pending executor does not stall pod allocation.

### Why are the changes needed?

Better test coverage

### Does this PR introduce _any_ user-facing change?

Test only change.

### How was this patch tested?

New test passes.

Closes #30205 from holdenk/verify-pod-allocation-does-not-stall.

Authored-by: Holden Karau <hkarau@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../k8s/ExecutorPodsAllocatorSuite.scala      | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
index 84c07bc588b06..37f9caef656d0 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
@@ -255,6 +255,40 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     verify(podOperations).delete()
   }
 
+  test("SPARK-33262: pod allocator does not stall with pending pods") {
+    when(podOperations
+      .withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID))
+      .thenReturn(podOperations)
+    when(podOperations
+      .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE))
+      .thenReturn(podOperations)
+    when(podOperations
+      .withLabelIn(SPARK_EXECUTOR_ID_LABEL, "1"))
+      .thenReturn(labeledPods)
+    when(podOperations
+      .withLabelIn(SPARK_EXECUTOR_ID_LABEL, "2", "3", "4", "5", "6"))
+      .thenReturn(podOperations)
+
+    podsAllocatorUnderTest.setTotalExpectedExecutors(6)
+    // Initial request of pods
+    verify(podOperations).create(podWithAttachedContainerForId(1))
+    verify(podOperations).create(podWithAttachedContainerForId(2))
+    verify(podOperations).create(podWithAttachedContainerForId(3))
+    verify(podOperations).create(podWithAttachedContainerForId(4))
+    verify(podOperations).create(podWithAttachedContainerForId(5))
+    // 4 come up, 1 pending
+    snapshotsStore.updatePod(pendingExecutor(1))
+    snapshotsStore.updatePod(runningExecutor(2))
+    snapshotsStore.updatePod(runningExecutor(3))
+    snapshotsStore.updatePod(runningExecutor(4))
+    snapshotsStore.updatePod(runningExecutor(5))
+    // We move forward one allocation cycle
+    waitForExecutorPodsClock.setTime(podAllocationDelay + 1)
+    snapshotsStore.notifySubscribers()
+    // We request pod 6
+    verify(podOperations).create(podWithAttachedContainerForId(6))
+  }
+
   private def executorPodAnswer(): Answer[KubernetesExecutorSpec] =
     (invocation: InvocationOnMock) => {
       val k8sConf: KubernetesExecutorConf = invocation.getArgument(0)

From 72ad9dcd5d484a8dd64c08889de85ef9de2a6077 Mon Sep 17 00:00:00 2001
From: Thomas Graves <tgraves@nvidia.com>
Date: Fri, 30 Oct 2020 17:16:53 -0500
Subject: [PATCH 0364/1009] [SPARK-32037][CORE] Rename blacklisting feature

### What changes were proposed in this pull request?

this PR renames the blacklisting feature. I ended up using  "excludeOnFailure" or "excluded" in most cases but there is a mix. I renamed the BlacklistTracker to HealthTracker, but for the TaskSetBlacklist HealthTracker didn't make sense to me since its not the health of the taskset itself but rather tracking the things its excluded on so I renamed it to be TaskSetExcludeList.  Everything else I tried to use the context and in most cases excluded made sense. It made more sense to me then blocked since you are basically excluding those executors and nodes from scheduling tasks on them. Then can be unexcluded later after timeouts and such. The configs I changed the name to use excludeOnFailure which I thought explained it.

I unfortunately couldn't get rid of some of them because its part of the event listener and history files.  To keep backwards compatibility I kept the events and some of the parsing so that the history server would still properly read older history files.  It is not forward compatible though - meaning a new application write the "Excluded" events so the older history server won't properly read display them as being blacklisted.

A few of the files below are showing up as deleted and recreated even though I did a git mv on them. I'm not sure why.

### Why are the changes needed?

get rid of problematic language

### Does this PR introduce _any_ user-facing change?

Config name changes but the old configs still work but are deprecated.

### How was this patch tested?

updated tests and also manually tested the UI changes and manually tested the history server reading older versions of history files and vice versa.

Closes #29906 from tgravescs/SPARK-32037.

Lead-authored-by: Thomas Graves <tgraves@nvidia.com>
Co-authored-by: Thomas Graves <tgraves@apache.org>
Signed-off-by: Thomas Graves <tgraves@apache.org>
---
 .../apache/spark/SparkFirehoseListener.java   |  38 ++
 .../ui/static/executorspage-template.html     |   4 +-
 .../apache/spark/ui/static/executorspage.js   |  28 +-
 .../org/apache/spark/ui/static/stagepage.js   |   2 +-
 .../spark/ui/static/stagespage-template.html  |   4 +-
 .../spark/ExecutorAllocationManager.scala     |  12 +-
 .../scala/org/apache/spark/SparkConf.scala    |  28 +-
 .../org/apache/spark/TaskEndReason.scala      |   9 +-
 .../history/BasicEventFilterBuilder.scala     |   2 +
 .../history/HistoryAppStatusStore.scala       |   2 +-
 .../spark/internal/config/package.scala       |  75 ++-
 .../spark/scheduler/BlacklistTracker.scala    | 477 --------------
 .../apache/spark/scheduler/DAGScheduler.scala |   4 +-
 .../scheduler/EventLoggingListener.scala      |  26 +
 .../scheduler/ExecutorFailuresInTaskSet.scala |   2 +-
 .../spark/scheduler/HealthTracker.scala       | 491 ++++++++++++++
 .../spark/scheduler/SparkListener.scala       | 117 +++-
 .../spark/scheduler/SparkListenerBus.scala    |  12 +
 .../spark/scheduler/TaskSchedulerImpl.scala   |  59 +-
 ...acklist.scala => TaskSetExcludeList.scala} |  78 ++-
 .../spark/scheduler/TaskSetManager.scala      |  77 +--
 .../cluster/CoarseGrainedClusterMessage.scala |   2 +-
 .../CoarseGrainedSchedulerBackend.scala       |  25 +-
 .../spark/status/AppStatusListener.scala      | 131 +++-
 .../apache/spark/status/AppStatusSource.scala |  16 +
 .../org/apache/spark/status/LiveEntity.scala  |  21 +-
 .../org/apache/spark/status/api/v1/api.scala  |  10 +-
 .../scala/org/apache/spark/ui/ToolTips.scala  |   3 -
 ...cludeOnFailure_for_stage_expectation.json} |   6 +-
 ...OnFailure_node_for_stage_expectation.json} |  15 +-
 .../executor_list_json_expectation.json       |   4 +-
 ...ith_executor_metrics_json_expectation.json |  16 +-
 .../executor_memory_usage_expectation.json    |  20 +-
 ...or_node_excludeOnFailure_expectation.json} |  20 +-
 ...udeOnFailure_unexcluding_expectation.json} |  20 +-
 ...utor_resource_information_expectation.json |  12 +-
 .../one_stage_attempt_json_expectation.json   |   3 +-
 .../one_stage_json_expectation.json           |   3 +-
 ...age_with_accumulable_json_expectation.json |   3 +-
 .../stage_with_peak_metrics_expectation.json  |   6 +-
 .../ExecutorAllocationManagerSuite.scala      |   6 +-
 .../apache/spark/HeartbeatReceiverSuite.scala |   2 +-
 .../StandaloneDynamicAllocationSuite.scala    |   8 +-
 .../history/BasicEventFilterSuite.scala       |  10 +-
 .../history/EventLogFileCompactorSuite.scala  |  12 +-
 .../deploy/history/HistoryServerSuite.scala   |   9 +-
 .../scheduler/BlacklistTrackerSuite.scala     | 608 -----------------
 .../CoarseGrainedSchedulerBackendSuite.scala  |   2 +-
 ...la => HealthTrackerIntegrationSuite.scala} |  24 +-
 .../spark/scheduler/HealthTrackerSuite.scala  | 615 ++++++++++++++++++
 .../scheduler/TaskSchedulerImplSuite.scala    | 301 ++++-----
 .../scheduler/TaskSetBlacklistSuite.scala     | 287 --------
 .../scheduler/TaskSetExcludelistSuite.scala   | 310 +++++++++
 .../spark/scheduler/TaskSetManagerSuite.scala |  82 +--
 .../KryoSerializerDistributedSuite.scala      |   2 +-
 .../spark/status/AppStatusListenerSuite.scala |  43 +-
 .../status/api/v1/ExecutorSummarySuite.scala  |   6 +-
 .../apache/spark/util/JsonProtocolSuite.scala |  68 +-
 docs/configuration.md                         |  64 +-
 docs/monitoring.md                            |   6 +-
 docs/running-on-yarn.md                       |   8 +-
 .../KubernetesClusterSchedulerBackend.scala   |   2 +-
 .../MesosCoarseGrainedSchedulerBackend.scala  |   6 +-
 ...osCoarseGrainedSchedulerBackendSuite.scala |   2 +-
 .../spark/deploy/yarn/ApplicationMaster.scala |   6 +-
 .../spark/deploy/yarn/YarnAllocator.scala     |  20 +-
 ...a => YarnAllocatorNodeHealthTracker.scala} |  85 +--
 .../org/apache/spark/deploy/yarn/config.scala |  11 +-
 .../cluster/YarnSchedulerBackend.scala        |   8 +-
 ... => YarnAllocatorHealthTrackerSuite.scala} |  90 +--
 .../deploy/yarn/YarnAllocatorSuite.scala      |  29 +-
 .../cluster/YarnSchedulerBackendSuite.scala   |  17 +-
 72 files changed, 2557 insertions(+), 2075 deletions(-)
 delete mode 100644 core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala
 create mode 100644 core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala
 rename core/src/main/scala/org/apache/spark/scheduler/{TaskSetBlacklist.scala => TaskSetExcludeList.scala} (63%)
 rename core/src/test/resources/HistoryServerExpectations/{blacklisting_for_stage_expectation.json => excludeOnFailure_for_stage_expectation.json} (99%)
 rename core/src/test/resources/HistoryServerExpectations/{blacklisting_node_for_stage_expectation.json => excludeOnFailure_node_for_stage_expectation.json} (98%)
 rename core/src/test/resources/HistoryServerExpectations/{executor_node_blacklisting_expectation.json => executor_node_excludeOnFailure_expectation.json} (92%)
 rename core/src/test/resources/HistoryServerExpectations/{executor_node_blacklisting_unblacklisting_expectation.json => executor_node_excludeOnFailure_unexcluding_expectation.json} (90%)
 delete mode 100644 core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala
 rename core/src/test/scala/org/apache/spark/scheduler/{BlacklistIntegrationSuite.scala => HealthTrackerIntegrationSuite.scala} (86%)
 create mode 100644 core/src/test/scala/org/apache/spark/scheduler/HealthTrackerSuite.scala
 delete mode 100644 core/src/test/scala/org/apache/spark/scheduler/TaskSetBlacklistSuite.scala
 create mode 100644 core/src/test/scala/org/apache/spark/scheduler/TaskSetExcludelistSuite.scala
 rename resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/{YarnAllocatorBlacklistTracker.scala => YarnAllocatorNodeHealthTracker.scala} (63%)
 rename resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/{YarnAllocatorBlacklistTrackerSuite.scala => YarnAllocatorHealthTrackerSuite.scala} (54%)

diff --git a/core/src/main/java/org/apache/spark/SparkFirehoseListener.java b/core/src/main/java/org/apache/spark/SparkFirehoseListener.java
index c0e72b57d48bd..7cb2455affe48 100644
--- a/core/src/main/java/org/apache/spark/SparkFirehoseListener.java
+++ b/core/src/main/java/org/apache/spark/SparkFirehoseListener.java
@@ -17,6 +17,7 @@
 
 package org.apache.spark;
 
+import org.apache.spark.annotation.DeveloperApi;
 import org.apache.spark.scheduler.*;
 
 /**
@@ -27,7 +28,11 @@
  * new methods to SparkListener: forgetting to add a method will result in a compilation error (if
  * this was a concrete Scala class, default implementations of new event handlers would be inherited
  * from the SparkListener trait).
+ *
+ * Please note until Spark 3.1.0 this was missing the DevelopApi annotation, this needs to be
+ * taken into account if changing this API before a major release.
  */
+@DeveloperApi
 public class SparkFirehoseListener implements SparkListenerInterface {
 
   public void onEvent(SparkListenerEvent event) { }
@@ -124,34 +129,67 @@ public final void onExecutorBlacklisted(SparkListenerExecutorBlacklisted executo
     onEvent(executorBlacklisted);
   }
 
+  @Override
+  public final void onExecutorExcluded(SparkListenerExecutorExcluded executorExcluded) {
+    onEvent(executorExcluded);
+  }
+
   @Override
   public void onExecutorBlacklistedForStage(
       SparkListenerExecutorBlacklistedForStage executorBlacklistedForStage) {
     onEvent(executorBlacklistedForStage);
   }
 
+  @Override
+  public void onExecutorExcludedForStage(
+      SparkListenerExecutorExcludedForStage executorExcludedForStage) {
+    onEvent(executorExcludedForStage);
+  }
+
   @Override
   public void onNodeBlacklistedForStage(
       SparkListenerNodeBlacklistedForStage nodeBlacklistedForStage) {
     onEvent(nodeBlacklistedForStage);
   }
 
+  @Override
+  public void onNodeExcludedForStage(
+      SparkListenerNodeExcludedForStage nodeExcludedForStage) {
+    onEvent(nodeExcludedForStage);
+  }
+
   @Override
   public final void onExecutorUnblacklisted(
       SparkListenerExecutorUnblacklisted executorUnblacklisted) {
     onEvent(executorUnblacklisted);
   }
 
+  @Override
+  public final void onExecutorUnexcluded(
+      SparkListenerExecutorUnexcluded executorUnexcluded) {
+    onEvent(executorUnexcluded);
+  }
+
   @Override
   public final void onNodeBlacklisted(SparkListenerNodeBlacklisted nodeBlacklisted) {
     onEvent(nodeBlacklisted);
   }
 
+  @Override
+  public final void onNodeExcluded(SparkListenerNodeExcluded nodeExcluded) {
+    onEvent(nodeExcluded);
+  }
+
   @Override
   public final void onNodeUnblacklisted(SparkListenerNodeUnblacklisted nodeUnblacklisted) {
     onEvent(nodeUnblacklisted);
   }
 
+  @Override
+  public final void onNodeUnexcluded(SparkListenerNodeUnexcluded nodeUnexcluded) {
+    onEvent(nodeUnexcluded);
+  }
+
   @Override
   public void onBlockUpdated(SparkListenerBlockUpdated blockUpdated) {
     onEvent(blockUpdated);
diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html b/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html
index 0729dfe1cef72..5e835c053eb6c 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html
+++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html
@@ -56,8 +56,8 @@ <h4 class="title-table">Summary</h4>
         </th>
         <th>
           <span data-toggle="tooltip" data-placement="top"
-                title="Number of executors blacklisted by the scheduler due to task failures.">
-            Blacklisted</span>
+                title="Number of executors excluded by the scheduler due to task failures.">
+            Excluded</span>
         </th>
           </tr>
         </thead>
diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
index 520edb9cc3e34..d4eaea9103771 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
@@ -26,15 +26,15 @@ function getThreadDumpEnabled() {
 }
 
 function formatStatus(status, type, row) {
-    if (row.isBlacklisted) {
-        return "Blacklisted";
+    if (row.isExcluded) {
+        return "Excluded";
     }
 
     if (status) {
-        if (row.blacklistedInStages.length == 0) {
+        if (row.excludedInStages.length == 0) {
             return "Active"
         }
-        return "Active (Blacklisted in Stages: [" + row.blacklistedInStages.join(", ") + "])";
+        return "Active (Excluded in Stages: [" + row.excludedInStages.join(", ") + "])";
     }
     return "Dead"
 }
@@ -168,7 +168,7 @@ $(document).ready(function () {
             var allTotalInputBytes = 0;
             var allTotalShuffleRead = 0;
             var allTotalShuffleWrite = 0;
-            var allTotalBlacklisted = 0;
+            var allTotalExcluded = 0;
 
             var activeExecCnt = 0;
             var activeRDDBlocks = 0;
@@ -190,7 +190,7 @@ $(document).ready(function () {
             var activeTotalInputBytes = 0;
             var activeTotalShuffleRead = 0;
             var activeTotalShuffleWrite = 0;
-            var activeTotalBlacklisted = 0;
+            var activeTotalExcluded = 0;
 
             var deadExecCnt = 0;
             var deadRDDBlocks = 0;
@@ -212,7 +212,7 @@ $(document).ready(function () {
             var deadTotalInputBytes = 0;
             var deadTotalShuffleRead = 0;
             var deadTotalShuffleWrite = 0;
-            var deadTotalBlacklisted = 0;
+            var deadTotalExcluded = 0;
 
             response.forEach(function (exec) {
                 var memoryMetrics = {
@@ -246,7 +246,7 @@ $(document).ready(function () {
                 allTotalInputBytes += exec.totalInputBytes;
                 allTotalShuffleRead += exec.totalShuffleRead;
                 allTotalShuffleWrite += exec.totalShuffleWrite;
-                allTotalBlacklisted += exec.isBlacklisted ? 1 : 0;
+                allTotalExcluded += exec.isExcluded ? 1 : 0;
                 if (exec.isActive) {
                     activeExecCnt += 1;
                     activeRDDBlocks += exec.rddBlocks;
@@ -268,7 +268,7 @@ $(document).ready(function () {
                     activeTotalInputBytes += exec.totalInputBytes;
                     activeTotalShuffleRead += exec.totalShuffleRead;
                     activeTotalShuffleWrite += exec.totalShuffleWrite;
-                    activeTotalBlacklisted += exec.isBlacklisted ? 1 : 0;
+                    activeTotalExcluded += exec.isExcluded ? 1 : 0;
                 } else {
                     deadExecCnt += 1;
                     deadRDDBlocks += exec.rddBlocks;
@@ -290,7 +290,7 @@ $(document).ready(function () {
                     deadTotalInputBytes += exec.totalInputBytes;
                     deadTotalShuffleRead += exec.totalShuffleRead;
                     deadTotalShuffleWrite += exec.totalShuffleWrite;
-                    deadTotalBlacklisted += exec.isBlacklisted ? 1 : 0;
+                    deadTotalExcluded += exec.isExcluded ? 1 : 0; // todo - TEST BACKWARDS compatibility history?
                 }
             });
 
@@ -315,7 +315,7 @@ $(document).ready(function () {
                 "allTotalInputBytes": allTotalInputBytes,
                 "allTotalShuffleRead": allTotalShuffleRead,
                 "allTotalShuffleWrite": allTotalShuffleWrite,
-                "allTotalBlacklisted": allTotalBlacklisted
+                "allTotalExcluded": allTotalExcluded
             };
             var activeSummary = {
                 "execCnt": ( "Active(" + activeExecCnt + ")"),
@@ -338,7 +338,7 @@ $(document).ready(function () {
                 "allTotalInputBytes": activeTotalInputBytes,
                 "allTotalShuffleRead": activeTotalShuffleRead,
                 "allTotalShuffleWrite": activeTotalShuffleWrite,
-                "allTotalBlacklisted": activeTotalBlacklisted
+                "allTotalExcluded": activeTotalExcluded
             };
             var deadSummary = {
                 "execCnt": ( "Dead(" + deadExecCnt + ")" ),
@@ -361,7 +361,7 @@ $(document).ready(function () {
                 "allTotalInputBytes": deadTotalInputBytes,
                 "allTotalShuffleRead": deadTotalShuffleRead,
                 "allTotalShuffleWrite": deadTotalShuffleWrite,
-                "allTotalBlacklisted": deadTotalBlacklisted
+                "allTotalExcluded": deadTotalExcluded
             };
 
             var data = {executors: response, "execSummary": [activeSummary, deadSummary, totalSummary]};
@@ -547,7 +547,7 @@ $(document).ready(function () {
                         {data: 'allTotalInputBytes', render: formatBytes},
                         {data: 'allTotalShuffleRead', render: formatBytes},
                         {data: 'allTotalShuffleWrite', render: formatBytes},
-                        {data: 'allTotalBlacklisted'}
+                        {data: 'allTotalExcluded'}
                     ],
                     "paging": false,
                     "searching": false,
diff --git a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
index 93b37c296271b..ee1115868f69b 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
@@ -433,7 +433,7 @@ $(document).ready(function () {
                         {data : "failedTasks"},
                         {data : "killedTasks"},
                         {data : "succeededTasks"},
-                        {data : "isBlacklistedForStage"},
+                        {data : "isExcludedForStage"},
                         {
                             data : function (row, type) {
                                 return row.inputRecords != 0 ? formatBytes(row.inputBytes, type) + " / " + row.inputRecords : "";
diff --git a/core/src/main/resources/org/apache/spark/ui/static/stagespage-template.html b/core/src/main/resources/org/apache/spark/ui/static/stagespage-template.html
index 77ea70e4ad966..9b40d0dc4a230 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/stagespage-template.html
+++ b/core/src/main/resources/org/apache/spark/ui/static/stagespage-template.html
@@ -50,8 +50,8 @@ <h4 class="title-table">Aggregated Metrics by Executor</h4>
                 <th>Succeeded Tasks</th>
                 <th>
           <span data-toggle="tooltip" data-placement="top"
-                title="Shows if this executor has been blacklisted by the scheduler due to task failures.">
-            Blacklisted</span>
+                title="Shows if this executor has been excluded by the scheduler due to task failures.">
+            Excluded</span>
                 </th>
                 <th><span id="executor-summary-input">Input Size / Records</span></th>
                 <th><span id="executor-summary-output">Output Size / Records</span></th>
diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index 1dd64df106bc2..e445f188e1eed 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -312,8 +312,8 @@ private[spark] class ExecutorAllocationManager(
 
     if (unschedulableTaskSets > 0) {
       // Request additional executors to account for task sets having tasks that are unschedulable
-      // due to blacklisting when the active executor count has already reached the max needed
-      // which we would normally get.
+      // due to executors excluded for failures when the active executor count has already reached
+      // the max needed which we would normally get.
       val maxNeededForUnschedulables = math.ceil(unschedulableTaskSets * executorAllocationRatio /
         tasksPerExecutor).toInt
       math.max(maxNeededWithSpeculationLocalityOffset,
@@ -662,10 +662,10 @@ private[spark] class ExecutorAllocationManager(
     private val resourceProfileIdToStageAttempt =
       new mutable.HashMap[Int, mutable.Set[StageAttempt]]
 
-    // Keep track of unschedulable task sets due to blacklisting. This is a Set of StageAttempt's
-    // because we'll only take the last unschedulable task in a taskset although there can be more.
-    // This is done in order to avoid costly loops in the scheduling.
-    // Check TaskSetManager#getCompletelyBlacklistedTaskIfAny for more details.
+    // Keep track of unschedulable task sets because of executor/node exclusions from too many task
+    // failures. This is a Set of StageAttempt's because we'll only take the last unschedulable task
+    // in a taskset although there can be more. This is done in order to avoid costly loops in the
+    // scheduling. Check TaskSetManager#getCompletelyExcludedTaskIfAny for more details.
     private val unschedulableTaskSets = new mutable.HashSet[StageAttempt]
 
     // stageAttempt to tuple (the number of task with locality preferences, a map where each pair
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 427e98e616515..5f37a1abb1909 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -603,7 +603,7 @@ private[spark] object SparkConf extends Logging {
           "are no longer accepted. To specify the equivalent now, one may use '64k'."),
       DeprecatedConfig("spark.rpc", "2.0", "Not used anymore."),
       DeprecatedConfig("spark.scheduler.executorTaskBlacklistTime", "2.1.0",
-        "Please use the new blacklisting options, spark.blacklist.*"),
+        "Please use the new excludedOnFailure options, spark.excludeOnFailure.*"),
       DeprecatedConfig("spark.yarn.am.port", "2.0.0", "Not used anymore"),
       DeprecatedConfig("spark.executor.port", "2.0.0", "Not used anymore"),
       DeprecatedConfig("spark.shuffle.service.index.cache.entries", "2.3.0",
@@ -612,7 +612,31 @@ private[spark] object SparkConf extends Logging {
       DeprecatedConfig("spark.yarn.credentials.file.retention.days", "2.4.0", "Not used anymore."),
       DeprecatedConfig("spark.yarn.services", "3.0.0", "Feature no longer available."),
       DeprecatedConfig("spark.executor.plugins", "3.0.0",
-        "Feature replaced with new plugin API. See Monitoring documentation.")
+        "Feature replaced with new plugin API. See Monitoring documentation."),
+      DeprecatedConfig("spark.blacklist.enabled", "3.1.0",
+        "Please use spark.excludeOnFailure.enabled"),
+      DeprecatedConfig("spark.blacklist.task.maxTaskAttemptsPerExecutor", "3.1.0",
+        "Please use spark.excludeOnFailure.task.maxTaskAttemptsPerExecutor"),
+      DeprecatedConfig("spark.blacklist.task.maxTaskAttemptsPerNode", "3.1.0",
+        "Please use spark.excludeOnFailure.task.maxTaskAttemptsPerNode"),
+      DeprecatedConfig("spark.blacklist.application.maxFailedTasksPerExecutor", "3.1.0",
+        "Please use spark.excludeOnFailure.application.maxFailedTasksPerExecutor"),
+      DeprecatedConfig("spark.blacklist.stage.maxFailedTasksPerExecutor", "3.1.0",
+        "Please use spark.excludeOnFailure.stage.maxFailedTasksPerExecutor"),
+      DeprecatedConfig("spark.blacklist.application.maxFailedExecutorsPerNode", "3.1.0",
+        "Please use spark.excludeOnFailure.application.maxFailedExecutorsPerNode"),
+      DeprecatedConfig("spark.blacklist.stage.maxFailedExecutorsPerNode", "3.1.0",
+        "Please use spark.excludeOnFailure.stage.maxFailedExecutorsPerNode"),
+      DeprecatedConfig("spark.blacklist.timeout", "3.1.0",
+        "Please use spark.excludeOnFailure.timeout"),
+      DeprecatedConfig("spark.blacklist.application.fetchFailure.enabled", "3.1.0",
+        "Please use spark.excludeOnFailure.application.fetchFailure.enabled"),
+      DeprecatedConfig("spark.scheduler.blacklist.unschedulableTaskSetTimeout", "3.1.0",
+        "Please use spark.scheduler.excludeOnFailure.unschedulableTaskSetTimeout"),
+      DeprecatedConfig("spark.blacklist.killBlacklistedExecutors", "3.1.0",
+        "Please use spark.excludeOnFailure.killExcludedExecutors"),
+      DeprecatedConfig("spark.yarn.blacklist.executor.launch.blacklisting.enabled", "3.1.0",
+        "Please use spark.yarn.executor.launch.excludeOnFailure.enabled")
     )
 
     Map(configs.map { cfg => (cfg.key -> cfg) } : _*)
diff --git a/core/src/main/scala/org/apache/spark/TaskEndReason.scala b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
index 6606d317e7b86..b304eb97fbdf6 100644
--- a/core/src/main/scala/org/apache/spark/TaskEndReason.scala
+++ b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
@@ -98,10 +98,11 @@ case class FetchFailed(
   /**
    * Fetch failures lead to a different failure handling path: (1) we don't abort the stage after
    * 4 task failures, instead we immediately go back to the stage which generated the map output,
-   * and regenerate the missing data.  (2) we don't count fetch failures for blacklisting, since
-   * presumably its not the fault of the executor where the task ran, but the executor which
-   * stored the data. This is especially important because we might rack up a bunch of
-   * fetch-failures in rapid succession, on all nodes of the cluster, due to one bad node.
+   * and regenerate the missing data. (2) we don't count fetch failures from executors excluded
+   * due to too many task failures, since presumably its not the fault of the executor where
+   * the task ran, but the executor which stored the data. This is especially important because
+   * we might rack up a bunch of fetch-failures in rapid succession, on all nodes of the cluster,
+   * due to one bad node.
    */
   override def countTowardsTaskFailures: Boolean = false
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/BasicEventFilterBuilder.scala b/core/src/main/scala/org/apache/spark/deploy/history/BasicEventFilterBuilder.scala
index b18bf2665d6ce..c659d32d16314 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/BasicEventFilterBuilder.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/BasicEventFilterBuilder.scala
@@ -160,6 +160,8 @@ private[spark] class BasicEventFilter(
     case e: SparkListenerExecutorRemoved => liveExecutors.contains(e.executorId)
     case e: SparkListenerExecutorBlacklisted => liveExecutors.contains(e.executorId)
     case e: SparkListenerExecutorUnblacklisted => liveExecutors.contains(e.executorId)
+    case e: SparkListenerExecutorExcluded => liveExecutors.contains(e.executorId)
+    case e: SparkListenerExecutorUnexcluded => liveExecutors.contains(e.executorId)
     case e: SparkListenerStageExecutorMetrics => liveExecutors.contains(e.execId)
     case e: SparkListenerBlockManagerAdded => acceptBlockManagerEvent(e.blockManagerId)
     case e: SparkListenerBlockManagerRemoved => acceptBlockManagerEvent(e.blockManagerId)
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryAppStatusStore.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryAppStatusStore.scala
index 7973652b3e254..ac0f102d81a6a 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryAppStatusStore.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryAppStatusStore.scala
@@ -73,7 +73,7 @@ private[spark] class HistoryAppStatusStore(
       source.totalShuffleWrite, source.isBlacklisted, source.maxMemory, source.addTime,
       source.removeTime, source.removeReason, newExecutorLogs, source.memoryMetrics,
       source.blacklistedInStages, source.peakMemoryMetrics, source.attributes, source.resources,
-      source.resourceProfileId)
+      source.resourceProfileId, source.isExcluded, source.excludedInStages)
   }
 
 }
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 491395c3cbcde..6239ef0491a6f 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -722,74 +722,83 @@ package object config {
       .booleanConf
       .createWithDefault(true)
 
-  // Blacklist confs
-  private[spark] val BLACKLIST_ENABLED =
-    ConfigBuilder("spark.blacklist.enabled")
-      .version("2.1.0")
+  private[spark] val EXCLUDE_ON_FAILURE_ENABLED =
+    ConfigBuilder("spark.excludeOnFailure.enabled")
+      .version("3.1.0")
+      .withAlternative("spark.blacklist.enabled")
       .booleanConf
       .createOptional
 
   private[spark] val MAX_TASK_ATTEMPTS_PER_EXECUTOR =
-    ConfigBuilder("spark.blacklist.task.maxTaskAttemptsPerExecutor")
-      .version("2.1.0")
+    ConfigBuilder("spark.excludeOnFailure.task.maxTaskAttemptsPerExecutor")
+      .version("3.1.0")
+      .withAlternative("spark.blacklist.task.maxTaskAttemptsPerExecutor")
       .intConf
       .createWithDefault(1)
 
   private[spark] val MAX_TASK_ATTEMPTS_PER_NODE =
-    ConfigBuilder("spark.blacklist.task.maxTaskAttemptsPerNode")
-      .version("2.1.0")
+    ConfigBuilder("spark.excludeOnFailure.task.maxTaskAttemptsPerNode")
+      .version("3.1.0")
+      .withAlternative("spark.blacklist.task.maxTaskAttemptsPerNode")
       .intConf
       .createWithDefault(2)
 
   private[spark] val MAX_FAILURES_PER_EXEC =
-    ConfigBuilder("spark.blacklist.application.maxFailedTasksPerExecutor")
-      .version("2.2.0")
+    ConfigBuilder("spark.excludeOnFailure.application.maxFailedTasksPerExecutor")
+      .version("3.1.0")
+      .withAlternative("spark.blacklist.application.maxFailedTasksPerExecutor")
       .intConf
       .createWithDefault(2)
 
   private[spark] val MAX_FAILURES_PER_EXEC_STAGE =
-    ConfigBuilder("spark.blacklist.stage.maxFailedTasksPerExecutor")
-      .version("2.1.0")
+    ConfigBuilder("spark.excludeOnFailure.stage.maxFailedTasksPerExecutor")
+      .version("3.1.0")
+      .withAlternative("spark.blacklist.stage.maxFailedTasksPerExecutor")
       .intConf
       .createWithDefault(2)
 
   private[spark] val MAX_FAILED_EXEC_PER_NODE =
-    ConfigBuilder("spark.blacklist.application.maxFailedExecutorsPerNode")
-      .version("2.2.0")
+    ConfigBuilder("spark.excludeOnFailure.application.maxFailedExecutorsPerNode")
+      .version("3.1.0")
+      .withAlternative("spark.blacklist.application.maxFailedExecutorsPerNode")
       .intConf
       .createWithDefault(2)
 
   private[spark] val MAX_FAILED_EXEC_PER_NODE_STAGE =
-    ConfigBuilder("spark.blacklist.stage.maxFailedExecutorsPerNode")
-      .version("2.1.0")
+    ConfigBuilder("spark.excludeOnFailure.stage.maxFailedExecutorsPerNode")
+      .version("3.1.0")
+      .withAlternative("spark.blacklist.stage.maxFailedExecutorsPerNode")
       .intConf
       .createWithDefault(2)
 
-  private[spark] val BLACKLIST_TIMEOUT_CONF =
-    ConfigBuilder("spark.blacklist.timeout")
-      .version("2.1.0")
+  private[spark] val EXCLUDE_ON_FAILURE_TIMEOUT_CONF =
+    ConfigBuilder("spark.excludeOnFailure.timeout")
+      .version("3.1.0")
+      .withAlternative("spark.blacklist.timeout")
       .timeConf(TimeUnit.MILLISECONDS)
       .createOptional
 
-  private[spark] val BLACKLIST_KILL_ENABLED =
-    ConfigBuilder("spark.blacklist.killBlacklistedExecutors")
-      .version("2.2.0")
+  private[spark] val EXCLUDE_ON_FAILURE_KILL_ENABLED =
+    ConfigBuilder("spark.excludeOnFailure.killExcludedExecutors")
+      .version("3.1.0")
+      .withAlternative("spark.blacklist.killBlacklistedExecutors")
       .booleanConf
       .createWithDefault(false)
 
-  private[spark] val BLACKLIST_LEGACY_TIMEOUT_CONF =
-    ConfigBuilder("spark.scheduler.executorTaskBlacklistTime")
+  private[spark] val EXCLUDE_ON_FAILURE_LEGACY_TIMEOUT_CONF =
+    ConfigBuilder("spark.scheduler.executorTaskExcludeOnFailureTime")
       .internal()
-      .version("1.0.0")
+      .version("3.1.0")
+      .withAlternative("spark.scheduler.executorTaskBlacklistTime")
       .timeConf(TimeUnit.MILLISECONDS)
       .createOptional
 
-  private[spark] val BLACKLIST_FETCH_FAILURE_ENABLED =
-    ConfigBuilder("spark.blacklist.application.fetchFailure.enabled")
-      .version("2.3.0")
+  private[spark] val EXCLUDE_ON_FAILURE_FETCH_FAILURE_ENABLED =
+    ConfigBuilder("spark.excludeOnFailure.application.fetchFailure.enabled")
+      .version("3.1.0")
+      .withAlternative("spark.blacklist.application.fetchFailure.enabled")
       .booleanConf
       .createWithDefault(false)
-  // End blacklist confs
 
   private[spark] val UNREGISTER_OUTPUT_ON_HOST_ON_FETCH_FAILURE =
     ConfigBuilder("spark.files.fetchFailure.unRegisterOutputOnHost")
@@ -1453,10 +1462,12 @@ package object config {
       .createWithDefaultString("365d")
 
   private[spark] val UNSCHEDULABLE_TASKSET_TIMEOUT =
-    ConfigBuilder("spark.scheduler.blacklist.unschedulableTaskSetTimeout")
+    ConfigBuilder("spark.scheduler.excludeOnFailure.unschedulableTaskSetTimeout")
       .doc("The timeout in seconds to wait to acquire a new executor and schedule a task " +
-        "before aborting a TaskSet which is unschedulable because of being completely blacklisted.")
-      .version("2.4.1")
+        "before aborting a TaskSet which is unschedulable because all executors are " +
+        "excluded due to failures.")
+      .version("3.1.0")
+      .withAlternative("spark.scheduler.blacklist.unschedulableTaskSetTimeout")
       .timeConf(TimeUnit.SECONDS)
       .checkValue(v => v >= 0, "The value should be a non negative time value.")
       .createWithDefault(120)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala b/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala
deleted file mode 100644
index 9e524c52267be..0000000000000
--- a/core/src/main/scala/org/apache/spark/scheduler/BlacklistTracker.scala
+++ /dev/null
@@ -1,477 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.scheduler
-
-import java.util.concurrent.atomic.AtomicReference
-
-import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
-
-import org.apache.spark.{ExecutorAllocationClient, SparkConf, SparkContext}
-import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config
-import org.apache.spark.util.{Clock, SystemClock, Utils}
-
-/**
- * BlacklistTracker is designed to track problematic executors and nodes.  It supports blacklisting
- * executors and nodes across an entire application (with a periodic expiry).  TaskSetManagers add
- * additional blacklisting of executors and nodes for individual tasks and stages which works in
- * concert with the blacklisting here.
- *
- * The tracker needs to deal with a variety of workloads, eg.:
- *
- *  * bad user code --  this may lead to many task failures, but that should not count against
- *      individual executors
- *  * many small stages -- this may prevent a bad executor for having many failures within one
- *      stage, but still many failures over the entire application
- *  * "flaky" executors -- they don't fail every task, but are still faulty enough to merit
- *      blacklisting
- *
- * See the design doc on SPARK-8425 for a more in-depth discussion.
- *
- * THREADING: As with most helpers of TaskSchedulerImpl, this is not thread-safe.  Though it is
- * called by multiple threads, callers must already have a lock on the TaskSchedulerImpl.  The
- * one exception is [[nodeBlacklist()]], which can be called without holding a lock.
- */
-private[scheduler] class BlacklistTracker (
-    private val listenerBus: LiveListenerBus,
-    conf: SparkConf,
-    allocationClient: Option[ExecutorAllocationClient],
-    clock: Clock = new SystemClock()) extends Logging {
-
-  def this(sc: SparkContext, allocationClient: Option[ExecutorAllocationClient]) = {
-    this(sc.listenerBus, sc.conf, allocationClient)
-  }
-
-  BlacklistTracker.validateBlacklistConfs(conf)
-  private val MAX_FAILURES_PER_EXEC = conf.get(config.MAX_FAILURES_PER_EXEC)
-  private val MAX_FAILED_EXEC_PER_NODE = conf.get(config.MAX_FAILED_EXEC_PER_NODE)
-  val BLACKLIST_TIMEOUT_MILLIS = BlacklistTracker.getBlacklistTimeout(conf)
-  private val BLACKLIST_FETCH_FAILURE_ENABLED = conf.get(config.BLACKLIST_FETCH_FAILURE_ENABLED)
-
-  /**
-   * A map from executorId to information on task failures.  Tracks the time of each task failure,
-   * so that we can avoid blacklisting executors due to failures that are very far apart.  We do not
-   * actively remove from this as soon as tasks hit their timeouts, to avoid the time it would take
-   * to do so.  But it will not grow too large, because as soon as an executor gets too many
-   * failures, we blacklist the executor and remove its entry here.
-   */
-  private val executorIdToFailureList = new HashMap[String, ExecutorFailureList]()
-  val executorIdToBlacklistStatus = new HashMap[String, BlacklistedExecutor]()
-  val nodeIdToBlacklistExpiryTime = new HashMap[String, Long]()
-  /**
-   * An immutable copy of the set of nodes that are currently blacklisted.  Kept in an
-   * AtomicReference to make [[nodeBlacklist()]] thread-safe.
-   */
-  private val _nodeBlacklist = new AtomicReference[Set[String]](Set())
-  /**
-   * Time when the next blacklist will expire.  Used as a
-   * shortcut to avoid iterating over all entries in the blacklist when none will have expired.
-   */
-  var nextExpiryTime: Long = Long.MaxValue
-  /**
-   * Mapping from nodes to all of the executors that have been blacklisted on that node. We do *not*
-   * remove from this when executors are removed from spark, so we can track when we get multiple
-   * successive blacklisted executors on one node.  Nonetheless, it will not grow too large because
-   * there cannot be many blacklisted executors on one node, before we stop requesting more
-   * executors on that node, and we clean up the list of blacklisted executors once an executor has
-   * been blacklisted for BLACKLIST_TIMEOUT_MILLIS.
-   */
-  val nodeToBlacklistedExecs = new HashMap[String, HashSet[String]]()
-
-  /**
-   * Un-blacklists executors and nodes that have been blacklisted for at least
-   * BLACKLIST_TIMEOUT_MILLIS
-   */
-  def applyBlacklistTimeout(): Unit = {
-    val now = clock.getTimeMillis()
-    // quickly check if we've got anything to expire from blacklist -- if not, avoid doing any work
-    if (now > nextExpiryTime) {
-      // Apply the timeout to blacklisted nodes and executors
-      val execsToUnblacklist = executorIdToBlacklistStatus.filter(_._2.expiryTime < now).keys
-      if (execsToUnblacklist.nonEmpty) {
-        // Un-blacklist any executors that have been blacklisted longer than the blacklist timeout.
-        logInfo(s"Removing executors $execsToUnblacklist from blacklist because the blacklist " +
-          s"for those executors has timed out")
-        execsToUnblacklist.foreach { exec =>
-          val status = executorIdToBlacklistStatus.remove(exec).get
-          val failedExecsOnNode = nodeToBlacklistedExecs(status.node)
-          listenerBus.post(SparkListenerExecutorUnblacklisted(now, exec))
-          failedExecsOnNode.remove(exec)
-          if (failedExecsOnNode.isEmpty) {
-            nodeToBlacklistedExecs.remove(status.node)
-          }
-        }
-      }
-      val nodesToUnblacklist = nodeIdToBlacklistExpiryTime.filter(_._2 < now).keys
-      if (nodesToUnblacklist.nonEmpty) {
-        // Un-blacklist any nodes that have been blacklisted longer than the blacklist timeout.
-        logInfo(s"Removing nodes $nodesToUnblacklist from blacklist because the blacklist " +
-          s"has timed out")
-        nodesToUnblacklist.foreach { node =>
-          nodeIdToBlacklistExpiryTime.remove(node)
-          listenerBus.post(SparkListenerNodeUnblacklisted(now, node))
-        }
-        _nodeBlacklist.set(nodeIdToBlacklistExpiryTime.keySet.toSet)
-      }
-      updateNextExpiryTime()
-    }
-  }
-
-  private def updateNextExpiryTime(): Unit = {
-    val execMinExpiry = if (executorIdToBlacklistStatus.nonEmpty) {
-      executorIdToBlacklistStatus.map{_._2.expiryTime}.min
-    } else {
-      Long.MaxValue
-    }
-    val nodeMinExpiry = if (nodeIdToBlacklistExpiryTime.nonEmpty) {
-      nodeIdToBlacklistExpiryTime.values.min
-    } else {
-      Long.MaxValue
-    }
-    nextExpiryTime = math.min(execMinExpiry, nodeMinExpiry)
-  }
-
-  private def killExecutor(exec: String, msg: String): Unit = {
-    allocationClient match {
-      case Some(a) =>
-        logInfo(msg)
-        a.killExecutors(Seq(exec), adjustTargetNumExecutors = false, countFailures = false,
-          force = true)
-      case None =>
-        logInfo(s"Not attempting to kill blacklisted executor id $exec " +
-          s"since allocation client is not defined.")
-    }
-  }
-
-  private def killBlacklistedExecutor(exec: String): Unit = {
-    if (conf.get(config.BLACKLIST_KILL_ENABLED)) {
-      killExecutor(exec,
-        s"Killing blacklisted executor id $exec since ${config.BLACKLIST_KILL_ENABLED.key} is set.")
-    }
-  }
-
-  private[scheduler] def killBlacklistedIdleExecutor(exec: String): Unit = {
-    killExecutor(exec,
-      s"Killing blacklisted idle executor id $exec because of task unschedulability and trying " +
-        "to acquire a new executor.")
-  }
-
-  private def killExecutorsOnBlacklistedNode(node: String): Unit = {
-    if (conf.get(config.BLACKLIST_KILL_ENABLED)) {
-      allocationClient match {
-        case Some(a) =>
-          logInfo(s"Killing all executors on blacklisted host $node " +
-            s"since ${config.BLACKLIST_KILL_ENABLED.key} is set.")
-          if (a.killExecutorsOnHost(node) == false) {
-            logError(s"Killing executors on node $node failed.")
-          }
-        case None =>
-          logWarning(s"Not attempting to kill executors on blacklisted host $node " +
-            s"since allocation client is not defined.")
-      }
-    }
-  }
-
-  def updateBlacklistForFetchFailure(host: String, exec: String): Unit = {
-    if (BLACKLIST_FETCH_FAILURE_ENABLED) {
-      // If we blacklist on fetch failures, we are implicitly saying that we believe the failure is
-      // non-transient, and can't be recovered from (even if this is the first fetch failure,
-      // stage is retried after just one failure, so we don't always get a chance to collect
-      // multiple fetch failures).
-      // If the external shuffle-service is on, then every other executor on this node would
-      // be suffering from the same issue, so we should blacklist (and potentially kill) all
-      // of them immediately.
-
-      val now = clock.getTimeMillis()
-      val expiryTimeForNewBlacklists = now + BLACKLIST_TIMEOUT_MILLIS
-
-      if (conf.get(config.SHUFFLE_SERVICE_ENABLED)) {
-        if (!nodeIdToBlacklistExpiryTime.contains(host)) {
-          logInfo(s"blacklisting node $host due to fetch failure of external shuffle service")
-
-          nodeIdToBlacklistExpiryTime.put(host, expiryTimeForNewBlacklists)
-          listenerBus.post(SparkListenerNodeBlacklisted(now, host, 1))
-          _nodeBlacklist.set(nodeIdToBlacklistExpiryTime.keySet.toSet)
-          killExecutorsOnBlacklistedNode(host)
-          updateNextExpiryTime()
-        }
-      } else if (!executorIdToBlacklistStatus.contains(exec)) {
-        logInfo(s"Blacklisting executor $exec due to fetch failure")
-
-        executorIdToBlacklistStatus.put(exec, BlacklistedExecutor(host, expiryTimeForNewBlacklists))
-        // We hardcoded number of failure tasks to 1 for fetch failure, because there's no
-        // reattempt for such failure.
-        listenerBus.post(SparkListenerExecutorBlacklisted(now, exec, 1))
-        updateNextExpiryTime()
-        killBlacklistedExecutor(exec)
-
-        val blacklistedExecsOnNode = nodeToBlacklistedExecs.getOrElseUpdate(host, HashSet[String]())
-        blacklistedExecsOnNode += exec
-      }
-    }
-  }
-
-  def updateBlacklistForSuccessfulTaskSet(
-      stageId: Int,
-      stageAttemptId: Int,
-      failuresByExec: HashMap[String, ExecutorFailuresInTaskSet]): Unit = {
-    // if any tasks failed, we count them towards the overall failure count for the executor at
-    // this point.
-    val now = clock.getTimeMillis()
-    failuresByExec.foreach { case (exec, failuresInTaskSet) =>
-      val appFailuresOnExecutor =
-        executorIdToFailureList.getOrElseUpdate(exec, new ExecutorFailureList)
-      appFailuresOnExecutor.addFailures(stageId, stageAttemptId, failuresInTaskSet)
-      appFailuresOnExecutor.dropFailuresWithTimeoutBefore(now)
-      val newTotal = appFailuresOnExecutor.numUniqueTaskFailures
-
-      val expiryTimeForNewBlacklists = now + BLACKLIST_TIMEOUT_MILLIS
-      // If this pushes the total number of failures over the threshold, blacklist the executor.
-      // If its already blacklisted, we avoid "re-blacklisting" (which can happen if there were
-      // other tasks already running in another taskset when it got blacklisted), because it makes
-      // some of the logic around expiry times a little more confusing.  But it also wouldn't be a
-      // problem to re-blacklist, with a later expiry time.
-      if (newTotal >= MAX_FAILURES_PER_EXEC && !executorIdToBlacklistStatus.contains(exec)) {
-        logInfo(s"Blacklisting executor id: $exec because it has $newTotal" +
-          s" task failures in successful task sets")
-        val node = failuresInTaskSet.node
-        executorIdToBlacklistStatus.put(exec, BlacklistedExecutor(node, expiryTimeForNewBlacklists))
-        listenerBus.post(SparkListenerExecutorBlacklisted(now, exec, newTotal))
-        executorIdToFailureList.remove(exec)
-        updateNextExpiryTime()
-        killBlacklistedExecutor(exec)
-
-        // In addition to blacklisting the executor, we also update the data for failures on the
-        // node, and potentially put the entire node into a blacklist as well.
-        val blacklistedExecsOnNode = nodeToBlacklistedExecs.getOrElseUpdate(node, HashSet[String]())
-        blacklistedExecsOnNode += exec
-        // If the node is already in the blacklist, we avoid adding it again with a later expiry
-        // time.
-        if (blacklistedExecsOnNode.size >= MAX_FAILED_EXEC_PER_NODE &&
-            !nodeIdToBlacklistExpiryTime.contains(node)) {
-          logInfo(s"Blacklisting node $node because it has ${blacklistedExecsOnNode.size} " +
-            s"executors blacklisted: ${blacklistedExecsOnNode}")
-          nodeIdToBlacklistExpiryTime.put(node, expiryTimeForNewBlacklists)
-          listenerBus.post(SparkListenerNodeBlacklisted(now, node, blacklistedExecsOnNode.size))
-          _nodeBlacklist.set(nodeIdToBlacklistExpiryTime.keySet.toSet)
-          killExecutorsOnBlacklistedNode(node)
-        }
-      }
-    }
-  }
-
-  def isExecutorBlacklisted(executorId: String): Boolean = {
-    executorIdToBlacklistStatus.contains(executorId)
-  }
-
-  /**
-   * Get the full set of nodes that are blacklisted.  Unlike other methods in this class, this *IS*
-   * thread-safe -- no lock required on a taskScheduler.
-   */
-  def nodeBlacklist(): Set[String] = {
-    _nodeBlacklist.get()
-  }
-
-  def isNodeBlacklisted(node: String): Boolean = {
-    nodeIdToBlacklistExpiryTime.contains(node)
-  }
-
-  def handleRemovedExecutor(executorId: String): Unit = {
-    // We intentionally do not clean up executors that are already blacklisted in
-    // nodeToBlacklistedExecs, so that if another executor on the same node gets blacklisted, we can
-    // blacklist the entire node.  We also can't clean up executorIdToBlacklistStatus, so we can
-    // eventually remove the executor after the timeout.  Despite not clearing those structures
-    // here, we don't expect they will grow too big since you won't get too many executors on one
-    // node, and the timeout will clear it up periodically in any case.
-    executorIdToFailureList -= executorId
-  }
-
-
-  /**
-   * Tracks all failures for one executor (that have not passed the timeout).
-   *
-   * In general we actually expect this to be extremely small, since it won't contain more than the
-   * maximum number of task failures before an executor is failed (default 2).
-   */
-  private[scheduler] final class ExecutorFailureList extends Logging {
-
-    private case class TaskId(stage: Int, stageAttempt: Int, taskIndex: Int)
-
-    /**
-     * All failures on this executor in successful task sets.
-     */
-    private var failuresAndExpiryTimes = ArrayBuffer[(TaskId, Long)]()
-    /**
-     * As an optimization, we track the min expiry time over all entries in failuresAndExpiryTimes
-     * so its quick to tell if there are any failures with expiry before the current time.
-     */
-    private var minExpiryTime = Long.MaxValue
-
-    def addFailures(
-        stage: Int,
-        stageAttempt: Int,
-        failuresInTaskSet: ExecutorFailuresInTaskSet): Unit = {
-      failuresInTaskSet.taskToFailureCountAndFailureTime.foreach {
-        case (taskIdx, (_, failureTime)) =>
-          val expiryTime = failureTime + BLACKLIST_TIMEOUT_MILLIS
-          failuresAndExpiryTimes += ((TaskId(stage, stageAttempt, taskIdx), expiryTime))
-          if (expiryTime < minExpiryTime) {
-            minExpiryTime = expiryTime
-          }
-      }
-    }
-
-    /**
-     * The number of unique tasks that failed on this executor.  Only counts failures within the
-     * timeout, and in successful tasksets.
-     */
-    def numUniqueTaskFailures: Int = failuresAndExpiryTimes.size
-
-    def isEmpty: Boolean = failuresAndExpiryTimes.isEmpty
-
-    /**
-     * Apply the timeout to individual tasks.  This is to prevent one-off failures that are very
-     * spread out in time (and likely have nothing to do with problems on the executor) from
-     * triggering blacklisting.  However, note that we do *not* remove executors and nodes from
-     * the blacklist as we expire individual task failures -- each have their own timeout.  Eg.,
-     * suppose:
-     *  * timeout = 10, maxFailuresPerExec = 2
-     *  * Task 1 fails on exec 1 at time 0
-     *  * Task 2 fails on exec 1 at time 5
-     * -->  exec 1 is blacklisted from time 5 - 15.
-     * This is to simplify the implementation, as well as keep the behavior easier to understand
-     * for the end user.
-     */
-    def dropFailuresWithTimeoutBefore(dropBefore: Long): Unit = {
-      if (minExpiryTime < dropBefore) {
-        var newMinExpiry = Long.MaxValue
-        val newFailures = new ArrayBuffer[(TaskId, Long)]
-        failuresAndExpiryTimes.foreach { case (task, expiryTime) =>
-          if (expiryTime >= dropBefore) {
-            newFailures += ((task, expiryTime))
-            if (expiryTime < newMinExpiry) {
-              newMinExpiry = expiryTime
-            }
-          }
-        }
-        failuresAndExpiryTimes = newFailures
-        minExpiryTime = newMinExpiry
-      }
-    }
-
-    override def toString(): String = {
-      s"failures = $failuresAndExpiryTimes"
-    }
-  }
-
-}
-
-private[spark] object BlacklistTracker extends Logging {
-
-  private val DEFAULT_TIMEOUT = "1h"
-
-  /**
-   * Returns true if the blacklist is enabled, based on checking the configuration in the following
-   * order:
-   * 1. Is it specifically enabled or disabled?
-   * 2. Is it enabled via the legacy timeout conf?
-   * 3. Default is off
-   */
-  def isBlacklistEnabled(conf: SparkConf): Boolean = {
-    conf.get(config.BLACKLIST_ENABLED) match {
-      case Some(enabled) =>
-        enabled
-      case None =>
-        // if they've got a non-zero setting for the legacy conf, always enable the blacklist,
-        // otherwise, use the default.
-        val legacyKey = config.BLACKLIST_LEGACY_TIMEOUT_CONF.key
-        conf.get(config.BLACKLIST_LEGACY_TIMEOUT_CONF).exists { legacyTimeout =>
-          if (legacyTimeout == 0) {
-            logWarning(s"Turning off blacklisting due to legacy configuration: $legacyKey == 0")
-            false
-          } else {
-            logWarning(s"Turning on blacklisting due to legacy configuration: $legacyKey > 0")
-            true
-          }
-        }
-    }
-  }
-
-  def getBlacklistTimeout(conf: SparkConf): Long = {
-    conf.get(config.BLACKLIST_TIMEOUT_CONF).getOrElse {
-      conf.get(config.BLACKLIST_LEGACY_TIMEOUT_CONF).getOrElse {
-        Utils.timeStringAsMs(DEFAULT_TIMEOUT)
-      }
-    }
-  }
-
-  /**
-   * Verify that blacklist configurations are consistent; if not, throw an exception.  Should only
-   * be called if blacklisting is enabled.
-   *
-   * The configuration for the blacklist is expected to adhere to a few invariants.  Default
-   * values follow these rules of course, but users may unwittingly change one configuration
-   * without making the corresponding adjustment elsewhere.  This ensures we fail-fast when
-   * there are such misconfigurations.
-   */
-  def validateBlacklistConfs(conf: SparkConf): Unit = {
-
-    def mustBePos(k: String, v: String): Unit = {
-      throw new IllegalArgumentException(s"$k was $v, but must be > 0.")
-    }
-
-    Seq(
-      config.MAX_TASK_ATTEMPTS_PER_EXECUTOR,
-      config.MAX_TASK_ATTEMPTS_PER_NODE,
-      config.MAX_FAILURES_PER_EXEC_STAGE,
-      config.MAX_FAILED_EXEC_PER_NODE_STAGE,
-      config.MAX_FAILURES_PER_EXEC,
-      config.MAX_FAILED_EXEC_PER_NODE
-    ).foreach { config =>
-      val v = conf.get(config)
-      if (v <= 0) {
-        mustBePos(config.key, v.toString)
-      }
-    }
-
-    val timeout = getBlacklistTimeout(conf)
-    if (timeout <= 0) {
-      // first, figure out where the timeout came from, to include the right conf in the message.
-      conf.get(config.BLACKLIST_TIMEOUT_CONF) match {
-        case Some(t) =>
-          mustBePos(config.BLACKLIST_TIMEOUT_CONF.key, timeout.toString)
-        case None =>
-          mustBePos(config.BLACKLIST_LEGACY_TIMEOUT_CONF.key, timeout.toString)
-      }
-    }
-
-    val maxTaskFailures = conf.get(config.TASK_MAX_FAILURES)
-    val maxNodeAttempts = conf.get(config.MAX_TASK_ATTEMPTS_PER_NODE)
-
-    if (maxNodeAttempts >= maxTaskFailures) {
-      throw new IllegalArgumentException(s"${config.MAX_TASK_ATTEMPTS_PER_NODE.key} " +
-        s"( = ${maxNodeAttempts}) was >= ${config.TASK_MAX_FAILURES.key} " +
-        s"( = ${maxTaskFailures} ).  Though blacklisting is enabled, with this configuration, " +
-        s"Spark will not be robust to one bad node.  Decrease " +
-        s"${config.MAX_TASK_ATTEMPTS_PER_NODE.key}, increase ${config.TASK_MAX_FAILURES.key}, " +
-        s"or disable blacklisting with ${config.BLACKLIST_ENABLED.key}")
-    }
-  }
-}
-
-private final case class BlacklistedExecutor(node: String, expiryTime: Long)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index 080e0e7f1552f..13b766e654832 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -333,8 +333,8 @@ private[spark] class DAGScheduler(
   }
 
   /**
-   * Called by the TaskSetManager when a taskset becomes unschedulable due to blacklisting and
-   * dynamic allocation is enabled.
+   * Called by the TaskSetManager when a taskset becomes unschedulable due to executors being
+   * excluded because of too many task failures and dynamic allocation is enabled.
    */
   def unschedulableTaskSetAdded(
       stageId: Int,
diff --git a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
index b2e9a0b2a04e8..1fda03f732636 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
@@ -191,27 +191,53 @@ private[spark] class EventLoggingListener(
     logEvent(event, flushLogger = true)
   }
 
+  override def onExecutorExcluded(event: SparkListenerExecutorExcluded): Unit = {
+    logEvent(event, flushLogger = true)
+  }
+
   override def onExecutorBlacklistedForStage(
       event: SparkListenerExecutorBlacklistedForStage): Unit = {
     logEvent(event, flushLogger = true)
   }
 
+  override def onExecutorExcludedForStage(
+      event: SparkListenerExecutorExcludedForStage): Unit = {
+    logEvent(event, flushLogger = true)
+  }
+
   override def onNodeBlacklistedForStage(event: SparkListenerNodeBlacklistedForStage): Unit = {
     logEvent(event, flushLogger = true)
   }
 
+  override def onNodeExcludedForStage(event: SparkListenerNodeExcludedForStage): Unit = {
+    logEvent(event, flushLogger = true)
+  }
+
   override def onExecutorUnblacklisted(event: SparkListenerExecutorUnblacklisted): Unit = {
     logEvent(event, flushLogger = true)
   }
 
+  override def onExecutorUnexcluded(event: SparkListenerExecutorUnexcluded): Unit = {
+    logEvent(event, flushLogger = true)
+  }
+
+
   override def onNodeBlacklisted(event: SparkListenerNodeBlacklisted): Unit = {
     logEvent(event, flushLogger = true)
   }
 
+  override def onNodeExcluded(event: SparkListenerNodeExcluded): Unit = {
+    logEvent(event, flushLogger = true)
+  }
+
   override def onNodeUnblacklisted(event: SparkListenerNodeUnblacklisted): Unit = {
     logEvent(event, flushLogger = true)
   }
 
+  override def onNodeUnexcluded(event: SparkListenerNodeUnexcluded): Unit = {
+    logEvent(event, flushLogger = true)
+  }
+
   override def onBlockUpdated(event: SparkListenerBlockUpdated): Unit = {
     if (shouldLogBlockUpdates) {
       logEvent(event, flushLogger = true)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ExecutorFailuresInTaskSet.scala b/core/src/main/scala/org/apache/spark/scheduler/ExecutorFailuresInTaskSet.scala
index 70553d8be28b5..f27c1560f8272 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ExecutorFailuresInTaskSet.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ExecutorFailuresInTaskSet.scala
@@ -19,7 +19,7 @@ package org.apache.spark.scheduler
 import scala.collection.mutable.HashMap
 
 /**
- * Small helper for tracking failed tasks for blacklisting purposes.  Info on all failures on one
+ * Small helper for tracking failed tasks for exclusion purposes.  Info on all failures on one
  * executor, within one task set.
  */
 private[scheduler] class ExecutorFailuresInTaskSet(val node: String) {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala b/core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala
new file mode 100644
index 0000000000000..9bbacea94bf68
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala
@@ -0,0 +1,491 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler
+
+import java.util.concurrent.atomic.AtomicReference
+
+import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
+
+import org.apache.spark.{ExecutorAllocationClient, SparkConf, SparkContext}
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config
+import org.apache.spark.util.{Clock, SystemClock, Utils}
+
+/**
+ * HealthTracker is designed to track problematic executors and nodes.  It supports excluding
+ * executors and nodes across an entire application (with a periodic expiry). TaskSetManagers add
+ * additional logic for exclusion of executors and nodes for individual tasks and stages which
+ * works in concert with the logic here.
+ *
+ * The tracker needs to deal with a variety of workloads, eg.:
+ *
+ *  * bad user code -- this may lead to many task failures, but that should not count against
+ *      individual executors
+ *  * many small stages -- this may prevent a bad executor for having many failures within one
+ *      stage, but still many failures over the entire application
+ *  * "flaky" executors -- they don't fail every task, but are still faulty enough to merit
+ *      excluding
+ *
+ * See the design doc on SPARK-8425 for a more in-depth discussion. Note SPARK-32037 renamed
+ * the feature.
+ *
+ * THREADING: As with most helpers of TaskSchedulerImpl, this is not thread-safe.  Though it is
+ * called by multiple threads, callers must already have a lock on the TaskSchedulerImpl.  The
+ * one exception is [[excludedNodeList()]], which can be called without holding a lock.
+ */
+private[scheduler] class HealthTracker (
+    private val listenerBus: LiveListenerBus,
+    conf: SparkConf,
+    allocationClient: Option[ExecutorAllocationClient],
+    clock: Clock = new SystemClock()) extends Logging {
+
+  def this(sc: SparkContext, allocationClient: Option[ExecutorAllocationClient]) = {
+    this(sc.listenerBus, sc.conf, allocationClient)
+  }
+
+  HealthTracker.validateExcludeOnFailureConfs(conf)
+  private val MAX_FAILURES_PER_EXEC = conf.get(config.MAX_FAILURES_PER_EXEC)
+  private val MAX_FAILED_EXEC_PER_NODE = conf.get(config.MAX_FAILED_EXEC_PER_NODE)
+  val EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS = HealthTracker.getExludeOnFailureTimeout(conf)
+  private val EXCLUDE_FETCH_FAILURE_ENABLED =
+    conf.get(config.EXCLUDE_ON_FAILURE_FETCH_FAILURE_ENABLED)
+
+  /**
+   * A map from executorId to information on task failures. Tracks the time of each task failure,
+   * so that we can avoid excluding executors due to failures that are very far apart. We do not
+   * actively remove from this as soon as tasks hit their timeouts, to avoid the time it would take
+   * to do so. But it will not grow too large, because as soon as an executor gets too many
+   * failures, we exclude the executor and remove its entry here.
+   */
+  private val executorIdToFailureList = new HashMap[String, ExecutorFailureList]()
+  val executorIdToExcludedStatus = new HashMap[String, ExcludedExecutor]()
+  val nodeIdToExcludedExpiryTime = new HashMap[String, Long]()
+  /**
+   * An immutable copy of the set of nodes that are currently excluded.  Kept in an
+   * AtomicReference to make [[excludedNodeList()]] thread-safe.
+   */
+  private val _excludedNodeList = new AtomicReference[Set[String]](Set())
+  /**
+   * Time when the next excluded node will expire.  Used as a shortcut to
+   * avoid iterating over all entries in the excludedNodeList when none will have expired.
+   */
+  var nextExpiryTime: Long = Long.MaxValue
+  /**
+   * Mapping from nodes to all of the executors that have been excluded on that node. We do *not*
+   * remove from this when executors are removed from spark, so we can track when we get multiple
+   * successive excluded executors on one node.  Nonetheless, it will not grow too large because
+   * there cannot be many excluded executors on one node, before we stop requesting more
+   * executors on that node, and we clean up the list of exluded executors once an executor has
+   * been excluded for EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS.
+   */
+  val nodeToExcludedExecs = new HashMap[String, HashSet[String]]()
+
+  /**
+   * Include executors and nodes that have been excluded for at least
+   * EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS
+   */
+  def applyExcludeOnFailureTimeout(): Unit = {
+    val now = clock.getTimeMillis()
+    // quickly check if we've got anything to expire that is excluded -- if not,
+    // avoid doing any work
+    if (now > nextExpiryTime) {
+      // Apply the timeout to excluded nodes and executors
+      val execsToInclude = executorIdToExcludedStatus.filter(_._2.expiryTime < now).keys
+      if (execsToInclude.nonEmpty) {
+        // Include any executors that have been exluded longer than the excludeOnFailure timeout.
+        logInfo(s"Removing executors $execsToInclude from exclude list because the " +
+          s"the executors have reached the timed out")
+        execsToInclude.foreach { exec =>
+          val status = executorIdToExcludedStatus.remove(exec).get
+          val failedExecsOnNode = nodeToExcludedExecs(status.node)
+          // post both to keep backwards compatibility
+          listenerBus.post(SparkListenerExecutorUnblacklisted(now, exec))
+          listenerBus.post(SparkListenerExecutorUnexcluded(now, exec))
+          failedExecsOnNode.remove(exec)
+          if (failedExecsOnNode.isEmpty) {
+            nodeToExcludedExecs.remove(status.node)
+          }
+        }
+      }
+      val nodesToInclude = nodeIdToExcludedExpiryTime.filter(_._2 < now).keys
+      if (nodesToInclude.nonEmpty) {
+        // Include any nodes that have been excluded longer than the excludeOnFailure timeout.
+        logInfo(s"Removing nodes $nodesToInclude from exclude list because the " +
+          s"nodes have reached has timed out")
+        nodesToInclude.foreach { node =>
+          nodeIdToExcludedExpiryTime.remove(node)
+          // post both to keep backwards compatibility
+          listenerBus.post(SparkListenerNodeUnblacklisted(now, node))
+          listenerBus.post(SparkListenerNodeUnexcluded(now, node))
+        }
+        _excludedNodeList.set(nodeIdToExcludedExpiryTime.keySet.toSet)
+      }
+      updateNextExpiryTime()
+    }
+  }
+
+  private def updateNextExpiryTime(): Unit = {
+    val execMinExpiry = if (executorIdToExcludedStatus.nonEmpty) {
+      executorIdToExcludedStatus.map{_._2.expiryTime}.min
+    } else {
+      Long.MaxValue
+    }
+    val nodeMinExpiry = if (nodeIdToExcludedExpiryTime.nonEmpty) {
+      nodeIdToExcludedExpiryTime.values.min
+    } else {
+      Long.MaxValue
+    }
+    nextExpiryTime = math.min(execMinExpiry, nodeMinExpiry)
+  }
+
+  private def killExecutor(exec: String, msg: String): Unit = {
+    allocationClient match {
+      case Some(a) =>
+        logInfo(msg)
+        a.killExecutors(Seq(exec), adjustTargetNumExecutors = false, countFailures = false,
+          force = true)
+      case None =>
+        logInfo(s"Not attempting to kill excluded executor id $exec " +
+          s"since allocation client is not defined.")
+    }
+  }
+
+  private def killExcludedExecutor(exec: String): Unit = {
+    if (conf.get(config.EXCLUDE_ON_FAILURE_KILL_ENABLED)) {
+      killExecutor(exec, s"Killing excluded executor id $exec since " +
+        s"${config.EXCLUDE_ON_FAILURE_KILL_ENABLED.key} is set.")
+    }
+  }
+
+  private[scheduler] def killExcludedIdleExecutor(exec: String): Unit = {
+    killExecutor(exec,
+      s"Killing excluded idle executor id $exec because of task unschedulability and trying " +
+        "to acquire a new executor.")
+  }
+
+  private def killExecutorsOnExcludedNode(node: String): Unit = {
+    if (conf.get(config.EXCLUDE_ON_FAILURE_KILL_ENABLED)) {
+      allocationClient match {
+        case Some(a) =>
+          logInfo(s"Killing all executors on excluded host $node " +
+            s"since ${config.EXCLUDE_ON_FAILURE_KILL_ENABLED.key} is set.")
+          if (a.killExecutorsOnHost(node) == false) {
+            logError(s"Killing executors on node $node failed.")
+          }
+        case None =>
+          logWarning(s"Not attempting to kill executors on excluded host $node " +
+            s"since allocation client is not defined.")
+      }
+    }
+  }
+
+  def updateExcludedForFetchFailure(host: String, exec: String): Unit = {
+    if (EXCLUDE_FETCH_FAILURE_ENABLED) {
+      // If we exclude on fetch failures, we are implicitly saying that we believe the failure is
+      // non-transient, and can't be recovered from (even if this is the first fetch failure,
+      // stage is retried after just one failure, so we don't always get a chance to collect
+      // multiple fetch failures).
+      // If the external shuffle-service is on, then every other executor on this node would
+      // be suffering from the same issue, so we should exclude (and potentially kill) all
+      // of them immediately.
+
+      val now = clock.getTimeMillis()
+      val expiryTimeForNewExcludes = now + EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS
+
+      if (conf.get(config.SHUFFLE_SERVICE_ENABLED)) {
+        if (!nodeIdToExcludedExpiryTime.contains(host)) {
+          logInfo(s"excluding node $host due to fetch failure of external shuffle service")
+
+          nodeIdToExcludedExpiryTime.put(host, expiryTimeForNewExcludes)
+          // post both to keep backwards compatibility
+          listenerBus.post(SparkListenerNodeBlacklisted(now, host, 1))
+          listenerBus.post(SparkListenerNodeExcluded(now, host, 1))
+          _excludedNodeList.set(nodeIdToExcludedExpiryTime.keySet.toSet)
+          killExecutorsOnExcludedNode(host)
+          updateNextExpiryTime()
+        }
+      } else if (!executorIdToExcludedStatus.contains(exec)) {
+        logInfo(s"Excluding executor $exec due to fetch failure")
+
+        executorIdToExcludedStatus.put(exec, ExcludedExecutor(host, expiryTimeForNewExcludes))
+        // We hardcoded number of failure tasks to 1 for fetch failure, because there's no
+        // reattempt for such failure.
+        // post both to keep backwards compatibility
+        listenerBus.post(SparkListenerExecutorBlacklisted(now, exec, 1))
+        listenerBus.post(SparkListenerExecutorExcluded(now, exec, 1))
+        updateNextExpiryTime()
+        killExcludedExecutor(exec)
+
+        val excludedExecsOnNode = nodeToExcludedExecs.getOrElseUpdate(host, HashSet[String]())
+        excludedExecsOnNode += exec
+      }
+    }
+  }
+
+  def updateExcludedForSuccessfulTaskSet(
+      stageId: Int,
+      stageAttemptId: Int,
+      failuresByExec: HashMap[String, ExecutorFailuresInTaskSet]): Unit = {
+    // if any tasks failed, we count them towards the overall failure count for the executor at
+    // this point.
+    val now = clock.getTimeMillis()
+    failuresByExec.foreach { case (exec, failuresInTaskSet) =>
+      val appFailuresOnExecutor =
+        executorIdToFailureList.getOrElseUpdate(exec, new ExecutorFailureList)
+      appFailuresOnExecutor.addFailures(stageId, stageAttemptId, failuresInTaskSet)
+      appFailuresOnExecutor.dropFailuresWithTimeoutBefore(now)
+      val newTotal = appFailuresOnExecutor.numUniqueTaskFailures
+
+      val expiryTimeForNewExcludes = now + EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS
+      // If this pushes the total number of failures over the threshold, exclude the executor.
+      // If its already excluded, we avoid "re-excluding" (which can happen if there were
+      // other tasks already running in another taskset when it got excluded), because it makes
+      // some of the logic around expiry times a little more confusing.  But it also wouldn't be a
+      // problem to re-exclude, with a later expiry time.
+      if (newTotal >= MAX_FAILURES_PER_EXEC && !executorIdToExcludedStatus.contains(exec)) {
+        logInfo(s"Excluding executor id: $exec because it has $newTotal" +
+          s" task failures in successful task sets")
+        val node = failuresInTaskSet.node
+        executorIdToExcludedStatus.put(exec, ExcludedExecutor(node, expiryTimeForNewExcludes))
+        // post both to keep backwards compatibility
+        listenerBus.post(SparkListenerExecutorBlacklisted(now, exec, newTotal))
+        listenerBus.post(SparkListenerExecutorExcluded(now, exec, newTotal))
+        executorIdToFailureList.remove(exec)
+        updateNextExpiryTime()
+        killExcludedExecutor(exec)
+
+        // In addition to excluding the executor, we also update the data for failures on the
+        // node, and potentially exclude the entire node as well.
+        val excludedExecsOnNode = nodeToExcludedExecs.getOrElseUpdate(node, HashSet[String]())
+        excludedExecsOnNode += exec
+        // If the node is already excluded, we avoid adding it again with a later expiry
+        // time.
+        if (excludedExecsOnNode.size >= MAX_FAILED_EXEC_PER_NODE &&
+            !nodeIdToExcludedExpiryTime.contains(node)) {
+          logInfo(s"Excluding node $node because it has ${excludedExecsOnNode.size} " +
+            s"executors excluded: ${excludedExecsOnNode}")
+          nodeIdToExcludedExpiryTime.put(node, expiryTimeForNewExcludes)
+          // post both to keep backwards compatibility
+          listenerBus.post(SparkListenerNodeBlacklisted(now, node, excludedExecsOnNode.size))
+          listenerBus.post(SparkListenerNodeExcluded(now, node, excludedExecsOnNode.size))
+          _excludedNodeList.set(nodeIdToExcludedExpiryTime.keySet.toSet)
+          killExecutorsOnExcludedNode(node)
+        }
+      }
+    }
+  }
+
+  def isExecutorExcluded(executorId: String): Boolean = {
+    executorIdToExcludedStatus.contains(executorId)
+  }
+
+  /**
+   * Get the full set of nodes that are excluded.  Unlike other methods in this class, this *IS*
+   * thread-safe -- no lock required on a taskScheduler.
+   */
+  def excludedNodeList(): Set[String] = {
+    _excludedNodeList.get()
+  }
+
+  def isNodeExcluded(node: String): Boolean = {
+    nodeIdToExcludedExpiryTime.contains(node)
+  }
+
+  def handleRemovedExecutor(executorId: String): Unit = {
+    // We intentionally do not clean up executors that are already excluded in
+    // nodeToExcludedExecs, so that if another executor on the same node gets excluded, we can
+    // exclude the entire node. We also can't clean up executorIdToExcludedStatus, so we can
+    // eventually remove the executor after the timeout. Despite not clearing those structures
+    // here, we don't expect they will grow too big since you won't get too many executors on one
+    // node, and the timeout will clear it up periodically in any case.
+    executorIdToFailureList -= executorId
+  }
+
+  /**
+   * Tracks all failures for one executor (that have not passed the timeout).
+   *
+   * In general we actually expect this to be extremely small, since it won't contain more than the
+   * maximum number of task failures before an executor is failed (default 2).
+   */
+  private[scheduler] final class ExecutorFailureList extends Logging {
+
+    private case class TaskId(stage: Int, stageAttempt: Int, taskIndex: Int)
+
+    /**
+     * All failures on this executor in successful task sets.
+     */
+    private var failuresAndExpiryTimes = ArrayBuffer[(TaskId, Long)]()
+    /**
+     * As an optimization, we track the min expiry time over all entries in failuresAndExpiryTimes
+     * so its quick to tell if there are any failures with expiry before the current time.
+     */
+    private var minExpiryTime = Long.MaxValue
+
+    def addFailures(
+        stage: Int,
+        stageAttempt: Int,
+        failuresInTaskSet: ExecutorFailuresInTaskSet): Unit = {
+      failuresInTaskSet.taskToFailureCountAndFailureTime.foreach {
+        case (taskIdx, (_, failureTime)) =>
+          val expiryTime = failureTime + EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS
+          failuresAndExpiryTimes += ((TaskId(stage, stageAttempt, taskIdx), expiryTime))
+          if (expiryTime < minExpiryTime) {
+            minExpiryTime = expiryTime
+          }
+      }
+    }
+
+    /**
+     * The number of unique tasks that failed on this executor.  Only counts failures within the
+     * timeout, and in successful tasksets.
+     */
+    def numUniqueTaskFailures: Int = failuresAndExpiryTimes.size
+
+    def isEmpty: Boolean = failuresAndExpiryTimes.isEmpty
+
+    /**
+     * Apply the timeout to individual tasks.  This is to prevent one-off failures that are very
+     * spread out in time (and likely have nothing to do with problems on the executor) from
+     * triggering exlusion.  However, note that we do *not* remove executors and nodes from
+     * being excluded as we expire individual task failures -- each have their own timeout.  Eg.,
+     * suppose:
+     *  * timeout = 10, maxFailuresPerExec = 2
+     *  * Task 1 fails on exec 1 at time 0
+     *  * Task 2 fails on exec 1 at time 5
+     * -->  exec 1 is excluded from time 5 - 15.
+     * This is to simplify the implementation, as well as keep the behavior easier to understand
+     * for the end user.
+     */
+    def dropFailuresWithTimeoutBefore(dropBefore: Long): Unit = {
+      if (minExpiryTime < dropBefore) {
+        var newMinExpiry = Long.MaxValue
+        val newFailures = new ArrayBuffer[(TaskId, Long)]
+        failuresAndExpiryTimes.foreach { case (task, expiryTime) =>
+          if (expiryTime >= dropBefore) {
+            newFailures += ((task, expiryTime))
+            if (expiryTime < newMinExpiry) {
+              newMinExpiry = expiryTime
+            }
+          }
+        }
+        failuresAndExpiryTimes = newFailures
+        minExpiryTime = newMinExpiry
+      }
+    }
+
+    override def toString(): String = {
+      s"failures = $failuresAndExpiryTimes"
+    }
+  }
+
+}
+
+private[spark] object HealthTracker extends Logging {
+
+  private val DEFAULT_TIMEOUT = "1h"
+
+  /**
+   * Returns true if the excludeOnFailure is enabled, based on checking the configuration
+   * in the following order:
+   * 1. Is it specifically enabled or disabled?
+   * 2. Is it enabled via the legacy timeout conf?
+   * 3. Default is off
+   */
+  def isExcludeOnFailureEnabled(conf: SparkConf): Boolean = {
+    conf.get(config.EXCLUDE_ON_FAILURE_ENABLED) match {
+      case Some(enabled) =>
+        enabled
+      case None =>
+        // if they've got a non-zero setting for the legacy conf, always enable it,
+        // otherwise, use the default.
+        val legacyKey = config.EXCLUDE_ON_FAILURE_LEGACY_TIMEOUT_CONF.key
+        conf.get(config.EXCLUDE_ON_FAILURE_LEGACY_TIMEOUT_CONF).exists { legacyTimeout =>
+          if (legacyTimeout == 0) {
+            logWarning(s"Turning off excludeOnFailure due to legacy configuration: $legacyKey == 0")
+            false
+          } else {
+            logWarning(s"Turning on excludeOnFailure due to legacy configuration: $legacyKey > 0")
+            true
+          }
+        }
+    }
+  }
+
+  def getExludeOnFailureTimeout(conf: SparkConf): Long = {
+    conf.get(config.EXCLUDE_ON_FAILURE_TIMEOUT_CONF).getOrElse {
+      conf.get(config.EXCLUDE_ON_FAILURE_LEGACY_TIMEOUT_CONF).getOrElse {
+        Utils.timeStringAsMs(DEFAULT_TIMEOUT)
+      }
+    }
+  }
+
+  /**
+   * Verify that exclude on failure configurations are consistent; if not, throw an exception.
+   * Should only be called if excludeOnFailure is enabled.
+   *
+   * The configuration is expected to adhere to a few invariants.  Default values
+   * follow these rules of course, but users may unwittingly change one configuration
+   * without making the corresponding adjustment elsewhere. This ensures we fail-fast when
+   * there are such misconfigurations.
+   */
+  def validateExcludeOnFailureConfs(conf: SparkConf): Unit = {
+
+    def mustBePos(k: String, v: String): Unit = {
+      throw new IllegalArgumentException(s"$k was $v, but must be > 0.")
+    }
+
+    Seq(
+      config.MAX_TASK_ATTEMPTS_PER_EXECUTOR,
+      config.MAX_TASK_ATTEMPTS_PER_NODE,
+      config.MAX_FAILURES_PER_EXEC_STAGE,
+      config.MAX_FAILED_EXEC_PER_NODE_STAGE,
+      config.MAX_FAILURES_PER_EXEC,
+      config.MAX_FAILED_EXEC_PER_NODE
+    ).foreach { config =>
+      val v = conf.get(config)
+      if (v <= 0) {
+        mustBePos(config.key, v.toString)
+      }
+    }
+
+    val timeout = getExludeOnFailureTimeout(conf)
+    if (timeout <= 0) {
+      // first, figure out where the timeout came from, to include the right conf in the message.
+      conf.get(config.EXCLUDE_ON_FAILURE_TIMEOUT_CONF) match {
+        case Some(t) =>
+          mustBePos(config.EXCLUDE_ON_FAILURE_TIMEOUT_CONF.key, timeout.toString)
+        case None =>
+          mustBePos(config.EXCLUDE_ON_FAILURE_LEGACY_TIMEOUT_CONF.key, timeout.toString)
+      }
+    }
+
+    val maxTaskFailures = conf.get(config.TASK_MAX_FAILURES)
+    val maxNodeAttempts = conf.get(config.MAX_TASK_ATTEMPTS_PER_NODE)
+
+    if (maxNodeAttempts >= maxTaskFailures) {
+      throw new IllegalArgumentException(s"${config.MAX_TASK_ATTEMPTS_PER_NODE.key} " +
+        s"( = ${maxNodeAttempts}) was >= ${config.TASK_MAX_FAILURES.key} " +
+        s"( = ${maxTaskFailures} ). Though excludeOnFailure is enabled, with this configuration, " +
+        s"Spark will not be robust to one bad node. Decrease " +
+        s"${config.MAX_TASK_ATTEMPTS_PER_NODE.key}, increase ${config.TASK_MAX_FAILURES.key}, " +
+        s"or disable excludeOnFailure with ${config.EXCLUDE_ON_FAILURE_ENABLED.key}")
+    }
+  }
+}
+
+private final case class ExcludedExecutor(node: String, expiryTime: Long)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
index 8119215b8b74f..3fcb35b604ef6 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
@@ -118,12 +118,21 @@ case class SparkListenerExecutorRemoved(time: Long, executorId: String, reason:
   extends SparkListenerEvent
 
 @DeveloperApi
+@deprecated("use SparkListenerExecutorExcluded instead", "3.1.0")
 case class SparkListenerExecutorBlacklisted(
     time: Long,
     executorId: String,
     taskFailures: Int)
   extends SparkListenerEvent
 
+@DeveloperApi
+case class SparkListenerExecutorExcluded(
+    time: Long,
+    executorId: String,
+    taskFailures: Int)
+  extends SparkListenerEvent
+
+@deprecated("use SparkListenerExecutorExcludedForStage instead", "3.1.0")
 @DeveloperApi
 case class SparkListenerExecutorBlacklistedForStage(
     time: Long,
@@ -133,6 +142,17 @@ case class SparkListenerExecutorBlacklistedForStage(
     stageAttemptId: Int)
   extends SparkListenerEvent
 
+
+@DeveloperApi
+case class SparkListenerExecutorExcludedForStage(
+    time: Long,
+    executorId: String,
+    taskFailures: Int,
+    stageId: Int,
+    stageAttemptId: Int)
+  extends SparkListenerEvent
+
+@deprecated("use SparkListenerNodeExcludedForStage instead", "3.1.0")
 @DeveloperApi
 case class SparkListenerNodeBlacklistedForStage(
     time: Long,
@@ -142,10 +162,27 @@ case class SparkListenerNodeBlacklistedForStage(
     stageAttemptId: Int)
   extends SparkListenerEvent
 
+
+@DeveloperApi
+case class SparkListenerNodeExcludedForStage(
+    time: Long,
+    hostId: String,
+    executorFailures: Int,
+    stageId: Int,
+    stageAttemptId: Int)
+  extends SparkListenerEvent
+
+@deprecated("use SparkListenerExecutorUnexcluded instead", "3.1.0")
 @DeveloperApi
 case class SparkListenerExecutorUnblacklisted(time: Long, executorId: String)
   extends SparkListenerEvent
 
+
+@DeveloperApi
+case class SparkListenerExecutorUnexcluded(time: Long, executorId: String)
+  extends SparkListenerEvent
+
+@deprecated("use SparkListenerNodeExcluded instead", "3.1.0")
 @DeveloperApi
 case class SparkListenerNodeBlacklisted(
     time: Long,
@@ -153,10 +190,23 @@ case class SparkListenerNodeBlacklisted(
     executorFailures: Int)
   extends SparkListenerEvent
 
+
+@DeveloperApi
+case class SparkListenerNodeExcluded(
+    time: Long,
+    hostId: String,
+    executorFailures: Int)
+  extends SparkListenerEvent
+
+@deprecated("use SparkListenerNodeUnexcluded instead", "3.1.0")
 @DeveloperApi
 case class SparkListenerNodeUnblacklisted(time: Long, hostId: String)
   extends SparkListenerEvent
 
+@DeveloperApi
+case class SparkListenerNodeUnexcluded(time: Long, hostId: String)
+  extends SparkListenerEvent
+
 @DeveloperApi
 case class SparkListenerUnschedulableTaskSetAdded(
   stageId: Int,
@@ -319,38 +369,75 @@ private[spark] trait SparkListenerInterface {
   def onExecutorRemoved(executorRemoved: SparkListenerExecutorRemoved): Unit
 
   /**
-   * Called when the driver blacklists an executor for a Spark application.
+   * Called when the driver excludes an executor for a Spark application.
    */
+  @deprecated("use onExecutorExcluded instead", "3.1.0")
   def onExecutorBlacklisted(executorBlacklisted: SparkListenerExecutorBlacklisted): Unit
 
   /**
-   * Called when the driver blacklists an executor for a stage.
+   * Called when the driver excludes an executor for a Spark application.
    */
+  def onExecutorExcluded(executorExcluded: SparkListenerExecutorExcluded): Unit
+
+  /**
+   * Called when the driver excludes an executor for a stage.
+   */
+  @deprecated("use onExecutorExcludedForStage instead", "3.1.0")
   def onExecutorBlacklistedForStage(
       executorBlacklistedForStage: SparkListenerExecutorBlacklistedForStage): Unit
 
   /**
-   * Called when the driver blacklists a node for a stage.
+   * Called when the driver excludes an executor for a stage.
+   */
+  def onExecutorExcludedForStage(
+      executorExcludedForStage: SparkListenerExecutorExcludedForStage): Unit
+
+  /**
+   * Called when the driver excludes a node for a stage.
    */
+  @deprecated("use onNodeExcludedForStage instead", "3.1.0")
   def onNodeBlacklistedForStage(nodeBlacklistedForStage: SparkListenerNodeBlacklistedForStage): Unit
 
   /**
-   * Called when the driver re-enables a previously blacklisted executor.
+   * Called when the driver excludes a node for a stage.
+   */
+  def onNodeExcludedForStage(nodeExcludedForStage: SparkListenerNodeExcludedForStage): Unit
+
+  /**
+   * Called when the driver re-enables a previously excluded executor.
    */
+  @deprecated("use onExecutorUnexcluded instead", "3.1.0")
   def onExecutorUnblacklisted(executorUnblacklisted: SparkListenerExecutorUnblacklisted): Unit
 
   /**
-   * Called when the driver blacklists a node for a Spark application.
+   * Called when the driver re-enables a previously excluded executor.
+   */
+  def onExecutorUnexcluded(executorUnexcluded: SparkListenerExecutorUnexcluded): Unit
+
+  /**
+   * Called when the driver excludes a node for a Spark application.
    */
+  @deprecated("use onNodeExcluded instead", "3.1.0")
   def onNodeBlacklisted(nodeBlacklisted: SparkListenerNodeBlacklisted): Unit
 
   /**
-   * Called when the driver re-enables a previously blacklisted node.
+   * Called when the driver excludes a node for a Spark application.
    */
+  def onNodeExcluded(nodeExcluded: SparkListenerNodeExcluded): Unit
+
+  /**
+   * Called when the driver re-enables a previously excluded node.
+   */
+  @deprecated("use onNodeUnexcluded instead", "3.1.0")
   def onNodeUnblacklisted(nodeUnblacklisted: SparkListenerNodeUnblacklisted): Unit
 
   /**
-   * Called when a taskset becomes unschedulable due to blacklisting and dynamic allocation
+   * Called when the driver re-enables a previously excluded node.
+   */
+  def onNodeUnexcluded(nodeUnexcluded: SparkListenerNodeUnexcluded): Unit
+
+  /**
+   * Called when a taskset becomes unschedulable due to exludeOnFailure and dynamic allocation
    * is enabled.
    */
   def onUnschedulableTaskSetAdded(
@@ -433,21 +520,33 @@ abstract class SparkListener extends SparkListenerInterface {
 
   override def onExecutorBlacklisted(
       executorBlacklisted: SparkListenerExecutorBlacklisted): Unit = { }
+  override def onExecutorExcluded(
+      executorExcluded: SparkListenerExecutorExcluded): Unit = { }
 
-  def onExecutorBlacklistedForStage(
+  override def onExecutorBlacklistedForStage(
       executorBlacklistedForStage: SparkListenerExecutorBlacklistedForStage): Unit = { }
+  override def onExecutorExcludedForStage(
+      executorExcludedForStage: SparkListenerExecutorExcludedForStage): Unit = { }
 
-  def onNodeBlacklistedForStage(
+  override def onNodeBlacklistedForStage(
       nodeBlacklistedForStage: SparkListenerNodeBlacklistedForStage): Unit = { }
+  override def onNodeExcludedForStage(
+      nodeExcludedForStage: SparkListenerNodeExcludedForStage): Unit = { }
 
   override def onExecutorUnblacklisted(
       executorUnblacklisted: SparkListenerExecutorUnblacklisted): Unit = { }
+  override def onExecutorUnexcluded(
+      executorUnexcluded: SparkListenerExecutorUnexcluded): Unit = { }
 
   override def onNodeBlacklisted(
       nodeBlacklisted: SparkListenerNodeBlacklisted): Unit = { }
+  override def onNodeExcluded(
+      nodeExcluded: SparkListenerNodeExcluded): Unit = { }
 
   override def onNodeUnblacklisted(
       nodeUnblacklisted: SparkListenerNodeUnblacklisted): Unit = { }
+  override def onNodeUnexcluded(
+      nodeUnexcluded: SparkListenerNodeUnexcluded): Unit = { }
 
   override def onUnschedulableTaskSetAdded(
       unschedulableTaskSetAdded: SparkListenerUnschedulableTaskSetAdded): Unit = { }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala
index 13e65f4291fd0..ec0c0cf3cf82b 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala
@@ -75,6 +75,18 @@ private[spark] trait SparkListenerBus
         listener.onNodeBlacklisted(nodeBlacklisted)
       case nodeUnblacklisted: SparkListenerNodeUnblacklisted =>
         listener.onNodeUnblacklisted(nodeUnblacklisted)
+      case executorExcludedForStage: SparkListenerExecutorExcludedForStage =>
+        listener.onExecutorExcludedForStage(executorExcludedForStage)
+      case nodeExcludedForStage: SparkListenerNodeExcludedForStage =>
+        listener.onNodeExcludedForStage(nodeExcludedForStage)
+      case executorExcluded: SparkListenerExecutorExcluded =>
+        listener.onExecutorExcluded(executorExcluded)
+      case executorUnexcluded: SparkListenerExecutorUnexcluded =>
+        listener.onExecutorUnexcluded(executorUnexcluded)
+      case nodeExcluded: SparkListenerNodeExcluded =>
+        listener.onNodeExcluded(nodeExcluded)
+      case nodeUnexcluded: SparkListenerNodeUnexcluded =>
+        listener.onNodeUnexcluded(nodeUnexcluded)
       case blockUpdated: SparkListenerBlockUpdated =>
         listener.onBlockUpdated(blockUpdated)
       case speculativeTaskSubmitted: SparkListenerSpeculativeTaskSubmitted =>
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index 2fcf13d5268f8..57e219999b0d0 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -91,9 +91,9 @@ private[spark] class TaskSchedulerImpl(
     this(sc, sc.conf.get(config.TASK_MAX_FAILURES))
   }
 
-  // Lazily initializing blacklistTrackerOpt to avoid getting empty ExecutorAllocationClient,
+  // Lazily initializing healthTrackerOpt to avoid getting empty ExecutorAllocationClient,
   // because ExecutorAllocationClient is created after this TaskSchedulerImpl.
-  private[scheduler] lazy val blacklistTrackerOpt = maybeCreateBlacklistTracker(sc)
+  private[scheduler] lazy val healthTrackerOpt = maybeCreateHealthTracker(sc)
 
   val conf = sc.conf
 
@@ -281,7 +281,7 @@ private[spark] class TaskSchedulerImpl(
   private[scheduler] def createTaskSetManager(
       taskSet: TaskSet,
       maxTaskFailures: Int): TaskSetManager = {
-    new TaskSetManager(this, taskSet, maxTaskFailures, blacklistTrackerOpt, clock)
+    new TaskSetManager(this, taskSet, maxTaskFailures, healthTrackerOpt, clock)
   }
 
   override def cancelTasks(stageId: Int, interruptThread: Boolean): Unit = synchronized {
@@ -381,7 +381,7 @@ private[spark] class TaskSchedulerImpl(
     : (Boolean, Option[TaskLocality]) = {
     var noDelayScheduleRejects = true
     var minLaunchedLocality: Option[TaskLocality] = None
-    // nodes and executors that are blacklisted for the entire application have already been
+    // nodes and executors that are excluded for the entire application have already been
     // filtered out by this point
     for (i <- 0 until shuffledOffers.size) {
       val execId = shuffledOffers(i).executorId
@@ -515,15 +515,15 @@ private[spark] class TaskSchedulerImpl(
       hostsByRack.getOrElseUpdate(rack, new HashSet[String]()) += host
     }
 
-    // Before making any offers, remove any nodes from the blacklist whose blacklist has expired. Do
+    // Before making any offers, include any nodes whose expireOnFailure timeout has expired. Do
     // this here to avoid a separate thread and added synchronization overhead, and also because
-    // updating the blacklist is only relevant when task offers are being made.
-    blacklistTrackerOpt.foreach(_.applyBlacklistTimeout())
+    // updating the excluded executors and nodes is only relevant when task offers are being made.
+    healthTrackerOpt.foreach(_.applyExcludeOnFailureTimeout())
 
-    val filteredOffers = blacklistTrackerOpt.map { blacklistTracker =>
+    val filteredOffers = healthTrackerOpt.map { healthTracker =>
       offers.filter { offer =>
-        !blacklistTracker.isNodeBlacklisted(offer.host) &&
-          !blacklistTracker.isExecutorBlacklisted(offer.executorId)
+        !healthTracker.isNodeExcluded(offer.host) &&
+          !healthTracker.isExecutorExcluded(offer.executorId)
       }
     }.getOrElse(offers)
 
@@ -602,15 +602,15 @@ private[spark] class TaskSchedulerImpl(
         }
 
         if (!launchedAnyTask) {
-          taskSet.getCompletelyBlacklistedTaskIfAny(hostToExecutors).foreach { taskIndex =>
-              // If the taskSet is unschedulable we try to find an existing idle blacklisted
+          taskSet.getCompletelyExcludedTaskIfAny(hostToExecutors).foreach { taskIndex =>
+              // If the taskSet is unschedulable we try to find an existing idle excluded
               // executor and kill the idle executor and kick off an abortTimer which if it doesn't
               // schedule a task within the the timeout will abort the taskSet if we were unable to
               // schedule any task from the taskSet.
               // Note 1: We keep track of schedulability on a per taskSet basis rather than on a per
               // task basis.
               // Note 2: The taskSet can still be aborted when there are more than one idle
-              // blacklisted executors and dynamic allocation is on. This can happen when a killed
+              // excluded executors and dynamic allocation is on. This can happen when a killed
               // idle executor isn't replaced in time by ExecutorAllocationManager as it relies on
               // pending tasks and doesn't kill executors on idle timeouts, resulting in the abort
               // timer to expire and abort the taskSet.
@@ -621,7 +621,7 @@ private[spark] class TaskSchedulerImpl(
               executorIdToRunningTaskIds.find(x => !isExecutorBusy(x._1)) match {
                 case Some ((executorId, _)) =>
                   if (!unschedulableTaskSetToExpiryTime.contains(taskSet)) {
-                    blacklistTrackerOpt.foreach(blt => blt.killBlacklistedIdleExecutor(executorId))
+                    healthTrackerOpt.foreach(blt => blt.killExcludedIdleExecutor(executorId))
                     updateUnschedulableTaskSetTimeoutAndStartAbortTimer(taskSet, taskIndex)
                   }
                 case None =>
@@ -638,18 +638,19 @@ private[spark] class TaskSchedulerImpl(
                     }
                   } else {
                     // Abort Immediately
-                    logInfo("Cannot schedule any task because of complete blacklisting. No idle" +
-                      s" executors can be found to kill. Aborting stage ${taskSet.stageId}.")
-                    taskSet.abortSinceCompletelyBlacklisted(taskIndex)
+                    logInfo("Cannot schedule any task because all executors excluded from " +
+                      "failures. No idle executors can be found to kill. Aborting stage " +
+                      s"${taskSet.stageId}.")
+                    taskSet.abortSinceCompletelyExcludedOnFailure(taskIndex)
                   }
               }
           }
         } else {
-          // We want to defer killing any taskSets as long as we have a non blacklisted executor
+          // We want to defer killing any taskSets as long as we have a non excluded executor
           // which can be used to schedule a task from any active taskSets. This ensures that the
           // job can make progress.
           // Note: It is theoretically possible that a taskSet never gets scheduled on a
-          // non-blacklisted executor and the abort timer doesn't kick in because of a constant
+          // non-excluded executor and the abort timer doesn't kick in because of a constant
           // submission of new TaskSets. See the PR for more details.
           if (unschedulableTaskSetToExpiryTime.nonEmpty) {
             logInfo("Clearing the expiry times for all unschedulable taskSets as a task was " +
@@ -710,7 +711,7 @@ private[spark] class TaskSchedulerImpl(
     val timeout = conf.get(config.UNSCHEDULABLE_TASKSET_TIMEOUT) * 1000
     unschedulableTaskSetToExpiryTime(taskSet) = clock.getTimeMillis() + timeout
     logInfo(s"Waiting for $timeout ms for completely " +
-      s"blacklisted task to be schedulable again before aborting stage ${taskSet.stageId}.")
+      s"excluded task to be schedulable again before aborting stage ${taskSet.stageId}.")
     abortTimer.schedule(
       createUnschedulableTaskSetAbortTimer(taskSet, taskIndex), timeout)
   }
@@ -722,9 +723,9 @@ private[spark] class TaskSchedulerImpl(
       override def run(): Unit = TaskSchedulerImpl.this.synchronized {
         if (unschedulableTaskSetToExpiryTime.contains(taskSet) &&
             unschedulableTaskSetToExpiryTime(taskSet) <= clock.getTimeMillis()) {
-          logInfo("Cannot schedule any task because of complete blacklisting. " +
+          logInfo("Cannot schedule any task because all executors excluded due to failures. " +
             s"Wait time for scheduling expired. Aborting stage ${taskSet.stageId}.")
-          taskSet.abortSinceCompletelyBlacklisted(taskIndex)
+          taskSet.abortSinceCompletelyExcludedOnFailure(taskIndex)
         } else {
           this.cancel()
         }
@@ -1019,7 +1020,7 @@ private[spark] class TaskSchedulerImpl(
       executorIdToHost -= executorId
       rootPool.executorLost(executorId, host, reason)
     }
-    blacklistTrackerOpt.foreach(_.handleRemovedExecutor(executorId))
+    healthTrackerOpt.foreach(_.handleRemovedExecutor(executorId))
   }
 
   def executorAdded(execId: String, host: String): Unit = {
@@ -1060,11 +1061,11 @@ private[spark] class TaskSchedulerImpl(
   }
 
   /**
-   * Get a snapshot of the currently blacklisted nodes for the entire application.  This is
+   * Get a snapshot of the currently excluded nodes for the entire application. This is
    * thread-safe -- it can be called without a lock on the TaskScheduler.
    */
-  def nodeBlacklist(): Set[String] = {
-    blacklistTrackerOpt.map(_.nodeBlacklist()).getOrElse(Set.empty)
+  def excludedNodes(): Set[String] = {
+    healthTrackerOpt.map(_.excludedNodeList()).getOrElse(Set.empty)
   }
 
   /**
@@ -1223,13 +1224,13 @@ private[spark] object TaskSchedulerImpl {
     retval.toList
   }
 
-  private def maybeCreateBlacklistTracker(sc: SparkContext): Option[BlacklistTracker] = {
-    if (BlacklistTracker.isBlacklistEnabled(sc.conf)) {
+  private def maybeCreateHealthTracker(sc: SparkContext): Option[HealthTracker] = {
+    if (HealthTracker.isExcludeOnFailureEnabled(sc.conf)) {
       val executorAllocClient: Option[ExecutorAllocationClient] = sc.schedulerBackend match {
         case b: ExecutorAllocationClient => Some(b)
         case _ => None
       }
-      Some(new BlacklistTracker(sc, executorAllocClient))
+      Some(new HealthTracker(sc, executorAllocClient))
     } else {
       None
     }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetBlacklist.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetExcludeList.scala
similarity index 63%
rename from core/src/main/scala/org/apache/spark/scheduler/TaskSetBlacklist.scala
rename to core/src/main/scala/org/apache/spark/scheduler/TaskSetExcludeList.scala
index 4df2889089ee9..d8c46db166fc5 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetBlacklist.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetExcludeList.scala
@@ -24,19 +24,19 @@ import org.apache.spark.internal.config
 import org.apache.spark.util.Clock
 
 /**
- * Handles blacklisting executors and nodes within a taskset.  This includes blacklisting specific
- * (task, executor) / (task, nodes) pairs, and also completely blacklisting executors and nodes
+ * Handles excluding executors and nodes within a taskset.  This includes excluding specific
+ * (task, executor) / (task, nodes) pairs, and also completely excluding executors and nodes
  * for the entire taskset.
  *
- * It also must store sufficient information in task failures for application level blacklisting,
- * which is handled by [[BlacklistTracker]].  Note that BlacklistTracker does not know anything
+ * It also must store sufficient information in task failures for application level exclusion,
+ * which is handled by [[HealthTracker]].  Note that HealthTracker does not know anything
  * about task failures until a taskset completes successfully.
  *
  * THREADING:  This class is a helper to [[TaskSetManager]]; as with the methods in
  * [[TaskSetManager]] this class is designed only to be called from code with a lock on the
  * TaskScheduler (e.g. its event handlers). It should not be called from other threads.
  */
-private[scheduler] class TaskSetBlacklist(
+private[scheduler] class TaskSetExcludelist(
     private val listenerBus: LiveListenerBus,
     val conf: SparkConf,
     val stageId: Int,
@@ -49,9 +49,9 @@ private[scheduler] class TaskSetBlacklist(
   private val MAX_FAILED_EXEC_PER_NODE_STAGE = conf.get(config.MAX_FAILED_EXEC_PER_NODE_STAGE)
 
   /**
-   * A map from each executor to the task failures on that executor.  This is used for blacklisting
-   * within this taskset, and it is also relayed onto [[BlacklistTracker]] for app-level
-   * blacklisting if this taskset completes successfully.
+   * A map from each executor to the task failures on that executor.  This is used for exclusion
+   * within this taskset, and it is also relayed onto [[HealthTracker]] for app-level
+   * exlucsion if this taskset completes successfully.
    */
   val execToFailures = new HashMap[String, ExecutorFailuresInTaskSet]()
 
@@ -61,9 +61,9 @@ private[scheduler] class TaskSetBlacklist(
    * node -> execs mapping in the usual case when there aren't any failures).
    */
   private val nodeToExecsWithFailures = new HashMap[String, HashSet[String]]()
-  private val nodeToBlacklistedTaskIndexes = new HashMap[String, HashSet[Int]]()
-  private val blacklistedExecs = new HashSet[String]()
-  private val blacklistedNodes = new HashSet[String]()
+  private val nodeToExcludedTaskIndexes = new HashMap[String, HashSet[Int]]()
+  private val excludedExecs = new HashSet[String]()
+  private val excludedNodes = new HashSet[String]()
 
   private var latestFailureReason: String = null
 
@@ -75,36 +75,36 @@ private[scheduler] class TaskSetBlacklist(
   }
 
   /**
-   * Return true if this executor is blacklisted for the given task.  This does *not*
-   * need to return true if the executor is blacklisted for the entire stage, or blacklisted
+   * Return true if this executor is excluded for the given task.  This does *not*
+   * need to return true if the executor is excluded for the entire stage, or excluded
    * for the entire application.  That is to keep this method as fast as possible in the inner-loop
    * of the scheduler, where those filters will have already been applied.
    */
-  def isExecutorBlacklistedForTask(executorId: String, index: Int): Boolean = {
+  def isExecutorExcludedForTask(executorId: String, index: Int): Boolean = {
     execToFailures.get(executorId).exists { execFailures =>
       execFailures.getNumTaskFailures(index) >= MAX_TASK_ATTEMPTS_PER_EXECUTOR
     }
   }
 
-  def isNodeBlacklistedForTask(node: String, index: Int): Boolean = {
-    nodeToBlacklistedTaskIndexes.get(node).exists(_.contains(index))
+  def isNodeExcludedForTask(node: String, index: Int): Boolean = {
+    nodeToExcludedTaskIndexes.get(node).exists(_.contains(index))
   }
 
   /**
-   * Return true if this executor is blacklisted for the given stage.  Completely ignores whether
-   * the executor is blacklisted for the entire application (or anything to do with the node the
+   * Return true if this executor is excluded for the given stage.  Completely ignores whether
+   * the executor is excluded for the entire application (or anything to do with the node the
    * executor is on).  That is to keep this method as fast as possible in the inner-loop of the
    * scheduler, where those filters will already have been applied.
    */
-  def isExecutorBlacklistedForTaskSet(executorId: String): Boolean = {
-    blacklistedExecs.contains(executorId)
+  def isExecutorExcludedForTaskSet(executorId: String): Boolean = {
+    excludedExecs.contains(executorId)
   }
 
-  def isNodeBlacklistedForTaskSet(node: String): Boolean = {
-    blacklistedNodes.contains(node)
+  def isNodeExcludedForTaskSet(node: String): Boolean = {
+    excludedNodes.contains(node)
   }
 
-  private[scheduler] def updateBlacklistForFailedTask(
+  private[scheduler] def updateExcludedForFailedTask(
       host: String,
       exec: String,
       index: Int,
@@ -114,7 +114,7 @@ private[scheduler] class TaskSetBlacklist(
     execFailures.updateWithFailure(index, clock.getTimeMillis())
 
     // check if this task has also failed on other executors on the same host -- if its gone
-    // over the limit, blacklist this task from the entire host.
+    // over the limit, exclude this task from the entire host.
     val execsWithFailuresOnNode = nodeToExecsWithFailures.getOrElseUpdate(host, new HashSet())
     execsWithFailuresOnNode += exec
     val failuresOnHost = execsWithFailuresOnNode.toIterator.flatMap { exec =>
@@ -127,27 +127,35 @@ private[scheduler] class TaskSetBlacklist(
       }
     }.sum
     if (failuresOnHost >= MAX_TASK_ATTEMPTS_PER_NODE) {
-      nodeToBlacklistedTaskIndexes.getOrElseUpdate(host, new HashSet()) += index
+      nodeToExcludedTaskIndexes.getOrElseUpdate(host, new HashSet()) += index
     }
 
-    // Check if enough tasks have failed on the executor to blacklist it for the entire stage.
+    // Check if enough tasks have failed on the executor to exclude it for the entire stage.
     val numFailures = execFailures.numUniqueTasksWithFailures
     if (numFailures >= MAX_FAILURES_PER_EXEC_STAGE) {
-      if (blacklistedExecs.add(exec)) {
-        logInfo(s"Blacklisting executor ${exec} for stage $stageId")
-        // This executor has been pushed into the blacklist for this stage.  Let's check if it
-        // pushes the whole node into the blacklist.
-        val blacklistedExecutorsOnNode =
-          execsWithFailuresOnNode.filter(blacklistedExecs.contains(_))
+      if (excludedExecs.add(exec)) {
+        logInfo(s"Excluding executor ${exec} for stage $stageId")
+        // This executor has been excluded for this stage.  Let's check if it
+        // the whole node should be excluded.
+        val excludedExecutorsOnNode =
+          execsWithFailuresOnNode.filter(excludedExecs.contains(_))
         val now = clock.getTimeMillis()
+        // SparkListenerExecutorBlacklistedForStage is deprecated but post both events
+        // to keep backward compatibility
         listenerBus.post(
           SparkListenerExecutorBlacklistedForStage(now, exec, numFailures, stageId, stageAttemptId))
-        val numFailExec = blacklistedExecutorsOnNode.size
+        listenerBus.post(
+          SparkListenerExecutorExcludedForStage(now, exec, numFailures, stageId, stageAttemptId))
+        val numFailExec = excludedExecutorsOnNode.size
         if (numFailExec >= MAX_FAILED_EXEC_PER_NODE_STAGE) {
-          if (blacklistedNodes.add(host)) {
-            logInfo(s"Blacklisting ${host} for stage $stageId")
+          if (excludedNodes.add(host)) {
+            logInfo(s"Excluding ${host} for stage $stageId")
+            // SparkListenerNodeBlacklistedForStage is deprecated but post both events
+            // to keep backward compatibility
             listenerBus.post(
               SparkListenerNodeBlacklistedForStage(now, host, numFailExec, stageId, stageAttemptId))
+            listenerBus.post(
+              SparkListenerNodeExcludedForStage(now, host, numFailExec, stageId, stageAttemptId))
           }
         }
       }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index 78fd412ef154c..0cfa76583bfbb 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -55,7 +55,7 @@ private[spark] class TaskSetManager(
     sched: TaskSchedulerImpl,
     val taskSet: TaskSet,
     val maxTaskFailures: Int,
-    blacklistTracker: Option[BlacklistTracker] = None,
+    healthTracker: Option[HealthTracker] = None,
     clock: Clock = new SystemClock()) extends Schedulable with Logging {
 
   private val conf = sched.sc.conf
@@ -130,9 +130,9 @@ private[spark] class TaskSetManager(
   private var totalResultSize = 0L
   private var calculatedTasks = 0
 
-  private[scheduler] val taskSetBlacklistHelperOpt: Option[TaskSetBlacklist] = {
-    blacklistTracker.map { _ =>
-      new TaskSetBlacklist(sched.sc.listenerBus, conf, stageId, taskSet.stageAttemptId, clock)
+  private[scheduler] val taskSetExcludelistHelperOpt: Option[TaskSetExcludelist] = {
+    healthTracker.map { _ =>
+      new TaskSetExcludelist(sched.sc.listenerBus, conf, stageId, taskSet.stageAttemptId, clock)
     }
   }
 
@@ -294,7 +294,7 @@ private[spark] class TaskSetManager(
     while (indexOffset > 0) {
       indexOffset -= 1
       val index = list(indexOffset)
-      if (!isTaskBlacklistedOnExecOrNode(index, execId, host) &&
+      if (!isTaskExcludededOnExecOrNode(index, execId, host) &&
           !(speculative && hasAttemptOnHost(index, host))) {
         // This should almost always be list.trimEnd(1) to remove tail
         list.remove(indexOffset)
@@ -317,10 +317,10 @@ private[spark] class TaskSetManager(
     taskAttempts(taskIndex).exists(_.host == host)
   }
 
-  private def isTaskBlacklistedOnExecOrNode(index: Int, execId: String, host: String): Boolean = {
-    taskSetBlacklistHelperOpt.exists { blacklist =>
-      blacklist.isNodeBlacklistedForTask(host, index) ||
-        blacklist.isExecutorBlacklistedForTask(execId, index)
+  private def isTaskExcludededOnExecOrNode(index: Int, execId: String, host: String): Boolean = {
+    taskSetExcludelistHelperOpt.exists { excludeList =>
+      excludeList.isNodeExcludedForTask(host, index) ||
+        excludeList.isExecutorExcludedForTask(execId, index)
     }
   }
 
@@ -421,11 +421,11 @@ private[spark] class TaskSetManager(
       taskResourceAssignments: Map[String, ResourceInformation] = Map.empty)
     : (Option[TaskDescription], Boolean) =
   {
-    val offerBlacklisted = taskSetBlacklistHelperOpt.exists { blacklist =>
-      blacklist.isNodeBlacklistedForTaskSet(host) ||
-        blacklist.isExecutorBlacklistedForTaskSet(execId)
+    val offerExcluded = taskSetExcludelistHelperOpt.exists { excludeList =>
+      excludeList.isNodeExcludedForTaskSet(host) ||
+        excludeList.isExecutorExcludedForTaskSet(execId)
     }
-    if (!isZombie && !offerBlacklisted) {
+    if (!isZombie && !offerExcluded) {
       val curTime = clock.getTimeMillis()
 
       var allowedLocality = maxLocality
@@ -518,10 +518,10 @@ private[spark] class TaskSetManager(
     if (isZombie && runningTasks == 0) {
       sched.taskSetFinished(this)
       if (tasksSuccessful == numTasks) {
-        blacklistTracker.foreach(_.updateBlacklistForSuccessfulTaskSet(
+        healthTracker.foreach(_.updateExcludedForSuccessfulTaskSet(
           taskSet.stageId,
           taskSet.stageAttemptId,
-          taskSetBlacklistHelperOpt.get.execToFailures))
+          taskSetExcludelistHelperOpt.get.execToFailures))
       }
     }
   }
@@ -606,12 +606,13 @@ private[spark] class TaskSetManager(
   }
 
   /**
-   * Check whether the given task set has been blacklisted to the point that it can't run anywhere.
+   * Check whether the given task set has been excluded to the point that it can't run anywhere.
    *
    * It is possible that this taskset has become impossible to schedule *anywhere* due to the
-   * blacklist.  The most common scenario would be if there are fewer executors than
-   * spark.task.maxFailures. We need to detect this so we can avoid the job from being hung.
-   * We try to acquire new executor/s by killing an existing idle blacklisted executor.
+   * failures that lead executors being excluded from the ones we can run on. The most common
+   * scenario would be if there are fewer executors than spark.task.maxFailures.
+   * We need to detect this so we can avoid the job from being hung. We try to acquire new
+   * executor/s by killing an existing idle excluded executor.
    *
    * There's a tradeoff here: we could make sure all tasks in the task set are schedulable, but that
    * would add extra time to each iteration of the scheduling loop. Here, we take the approach of
@@ -620,12 +621,12 @@ private[spark] class TaskSetManager(
    * method is faster in the typical case. In the worst case, this method can take
    * O(maxTaskFailures + numTasks) time, but it will be faster when there haven't been any task
    * failures (this is because the method picks one unscheduled task, and then iterates through each
-   * executor until it finds one that the task isn't blacklisted on).
+   * executor until it finds one that the task isn't excluded on).
    */
-  private[scheduler] def getCompletelyBlacklistedTaskIfAny(
+  private[scheduler] def getCompletelyExcludedTaskIfAny(
       hostToExecutors: HashMap[String, HashSet[String]]): Option[Int] = {
-    taskSetBlacklistHelperOpt.flatMap { taskSetBlacklist =>
-      val appBlacklist = blacklistTracker.get
+    taskSetExcludelistHelperOpt.flatMap { taskSetExcludelist =>
+      val appHealthTracker = healthTracker.get
       // Only look for unschedulable tasks when at least one executor has registered. Otherwise,
       // task sets will be (unnecessarily) aborted in cases when no executors have registered yet.
       if (hostToExecutors.nonEmpty) {
@@ -651,18 +652,18 @@ private[spark] class TaskSetManager(
           // when that unschedulable task is the last task remaining.
           hostToExecutors.forall { case (host, execsOnHost) =>
             // Check if the task can run on the node
-            val nodeBlacklisted =
-              appBlacklist.isNodeBlacklisted(host) ||
-                taskSetBlacklist.isNodeBlacklistedForTaskSet(host) ||
-                taskSetBlacklist.isNodeBlacklistedForTask(host, indexInTaskSet)
-            if (nodeBlacklisted) {
+            val nodeExcluded =
+              appHealthTracker.isNodeExcluded(host) ||
+                taskSetExcludelist.isNodeExcludedForTaskSet(host) ||
+                taskSetExcludelist.isNodeExcludedForTask(host, indexInTaskSet)
+            if (nodeExcluded) {
               true
             } else {
               // Check if the task can run on any of the executors
               execsOnHost.forall { exec =>
-                appBlacklist.isExecutorBlacklisted(exec) ||
-                  taskSetBlacklist.isExecutorBlacklistedForTaskSet(exec) ||
-                  taskSetBlacklist.isExecutorBlacklistedForTask(exec, indexInTaskSet)
+                appHealthTracker.isExecutorExcluded(exec) ||
+                  taskSetExcludelist.isExecutorExcludedForTaskSet(exec) ||
+                  taskSetExcludelist.isExecutorExcludedForTask(exec, indexInTaskSet)
               }
             }
           }
@@ -673,16 +674,16 @@ private[spark] class TaskSetManager(
     }
   }
 
-  private[scheduler] def abortSinceCompletelyBlacklisted(indexInTaskSet: Int): Unit = {
-    taskSetBlacklistHelperOpt.foreach { taskSetBlacklist =>
+  private[scheduler] def abortSinceCompletelyExcludedOnFailure(indexInTaskSet: Int): Unit = {
+    taskSetExcludelistHelperOpt.foreach { taskSetExcludelist =>
       val partition = tasks(indexInTaskSet).partitionId
       abort(s"""
          |Aborting $taskSet because task $indexInTaskSet (partition $partition)
-         |cannot run anywhere due to node and executor blacklist.
+         |cannot run anywhere due to node and executor excludeOnFailure.
          |Most recent failure:
-         |${taskSetBlacklist.getLatestFailureReason}
+         |${taskSetExcludelist.getLatestFailureReason}
          |
-         |Blacklisting behavior can be configured via spark.blacklist.*.
+         |ExcludeOnFailure behavior can be configured via spark.excludeOnFailure.*.
          |""".stripMargin)
     }
   }
@@ -821,7 +822,7 @@ private[spark] class TaskSetManager(
         isZombie = true
 
         if (fetchFailed.bmAddress != null) {
-          blacklistTracker.foreach(_.updateBlacklistForFetchFailure(
+          healthTracker.foreach(_.updateExcludedForFetchFailure(
             fetchFailed.bmAddress.host, fetchFailed.bmAddress.executorId))
         }
 
@@ -899,7 +900,7 @@ private[spark] class TaskSetManager(
 
     if (!isZombie && reason.countTowardsTaskFailures) {
       assert (null != failureReason)
-      taskSetBlacklistHelperOpt.foreach(_.updateBlacklistForFailedTask(
+      taskSetExcludelistHelperOpt.foreach(_.updateExcludedForFailedTask(
         info.host, info.executorId, index, failureReason))
       numFailures(index) += 1
       if (numFailures(index) >= maxTaskFailures) {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
index d1b0e798c51be..eda1cb52d4abc 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
@@ -131,7 +131,7 @@ private[spark] object CoarseGrainedClusterMessages {
       resourceProfileToTotalExecs: Map[ResourceProfile, Int],
       numLocalityAwareTasksPerResourceProfileId: Map[Int, Int],
       hostToLocalTaskCount: Map[Int, Map[String, Int]],
-      nodeBlacklist: Set[String])
+      excludedNodes: Set[String])
     extends CoarseGrainedClusterMessage
 
   // Check if an executor was force-killed but for a reason unrelated to the running tasks.
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index 1d2689034f1ff..2bd0b4cc4b7d0 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -209,13 +209,14 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
           attributes, resources, resourceProfileId) =>
         if (executorDataMap.contains(executorId)) {
           context.sendFailure(new IllegalStateException(s"Duplicate executor ID: $executorId"))
-        } else if (scheduler.nodeBlacklist.contains(hostname) ||
-            isBlacklisted(executorId, hostname)) {
-          // If the cluster manager gives us an executor on a blacklisted node (because it
-          // already started allocating those resources before we informed it of our blacklist,
-          // or if it ignored our blacklist), then we reject that executor immediately.
-          logInfo(s"Rejecting $executorId as it has been blacklisted.")
-          context.sendFailure(new IllegalStateException(s"Executor is blacklisted: $executorId"))
+        } else if (scheduler.excludedNodes.contains(hostname) ||
+            isExecutorExcluded(executorId, hostname)) {
+          // If the cluster manager gives us an executor on an excluded node (because it
+          // already started allocating those resources before we informed it of our exclusion,
+          // or if it ignored our exclusion), then we reject that executor immediately.
+          logInfo(s"Rejecting $executorId as it has been excluded.")
+          context.sendFailure(
+            new IllegalStateException(s"Executor is excluded due to failures: $executorId"))
         } else {
           // If the executor's rpc env is not listening for incoming connections, `hostPort`
           // will be null, and the client connection should be used to contact the executor.
@@ -852,7 +853,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
   final override def killExecutorsOnHost(host: String): Boolean = {
     logInfo(s"Requesting to kill any and all executors on host ${host}")
     // A potential race exists if a new executor attempts to register on a host
-    // that is on the blacklist and is no no longer valid. To avoid this race,
+    // that is on the exclude list and is no no longer valid. To avoid this race,
     // all executor registration and killing happens in the event loop. This way, either
     // an executor will fail to register, or will be killed when all executors on a host
     // are killed.
@@ -884,13 +885,13 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
   protected def currentDelegationTokens: Array[Byte] = delegationTokens.get()
 
   /**
-   * Checks whether the executor is blacklisted. This is called when the executor tries to
-   * register with the scheduler, and will deny registration if this method returns true.
+   * Checks whether the executor is excluded due to failure(s). This is called when the executor
+   * tries to register with the scheduler, and will deny registration if this method returns true.
    *
-   * This is in addition to the blacklist kept by the task scheduler, so custom implementations
+   * This is in addition to the exclude list kept by the task scheduler, so custom implementations
    * don't need to check there.
    */
-  protected def isBlacklisted(executorId: String, hostname: String): Boolean = false
+  protected def isExecutorExcluded(executorId: String, hostname: String): Boolean = false
 
   // SPARK-27112: We need to ensure that there is ordering of lock acquisition
   // between TaskSchedulerImpl and CoarseGrainedSchedulerBackend objects in order to fix
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
index 7ae9117137caa..5b0c1dc389af0 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
@@ -283,82 +283,141 @@ private[spark] class AppStatusListener(
     }
   }
 
+  // Note, the blacklisted functions are left here for backwards compatibility to allow
+  // new history server to properly read and display older event logs.
   override def onExecutorBlacklisted(event: SparkListenerExecutorBlacklisted): Unit = {
-    updateBlackListStatus(event.executorId, true)
+    updateExecExclusionStatus(event.executorId, true)
+  }
+
+  override def onExecutorExcluded(event: SparkListenerExecutorExcluded): Unit = {
+    updateExecExclusionStatus(event.executorId, true)
   }
 
   override def onExecutorBlacklistedForStage(
       event: SparkListenerExecutorBlacklistedForStage): Unit = {
-    val now = System.nanoTime()
+    updateExclusionStatusForStage(event.stageId, event.stageAttemptId, event.executorId)
+  }
 
-    Option(liveStages.get((event.stageId, event.stageAttemptId))).foreach { stage =>
-      setStageBlackListStatus(stage, now, event.executorId)
-    }
-    liveExecutors.get(event.executorId).foreach { exec =>
-      addBlackListedStageTo(exec, event.stageId, now)
-    }
+  override def onExecutorExcludedForStage(
+      event: SparkListenerExecutorExcludedForStage): Unit = {
+    updateExclusionStatusForStage(event.stageId, event.stageAttemptId, event.executorId)
   }
 
   override def onNodeBlacklistedForStage(event: SparkListenerNodeBlacklistedForStage): Unit = {
-    val now = System.nanoTime()
+    updateNodeExclusionStatusForStage(event.stageId, event.stageAttemptId, event.hostId)
+  }
 
-    // Implicitly blacklist every available executor for the stage associated with this node
-    Option(liveStages.get((event.stageId, event.stageAttemptId))).foreach { stage =>
-      val executorIds = liveExecutors.values.filter(_.host == event.hostId).map(_.executorId).toSeq
-      setStageBlackListStatus(stage, now, executorIds: _*)
-    }
-    liveExecutors.values.filter(_.hostname == event.hostId).foreach { exec =>
-      addBlackListedStageTo(exec, event.stageId, now)
-    }
+  override def onNodeExcludedForStage(event: SparkListenerNodeExcludedForStage): Unit = {
+    updateNodeExclusionStatusForStage(event.stageId, event.stageAttemptId, event.hostId)
   }
 
-  private def addBlackListedStageTo(exec: LiveExecutor, stageId: Int, now: Long): Unit = {
-    exec.blacklistedInStages += stageId
+  private def addExcludedStageTo(exec: LiveExecutor, stageId: Int, now: Long): Unit = {
+    exec.excludedInStages += stageId
     liveUpdate(exec, now)
   }
 
   private def setStageBlackListStatus(stage: LiveStage, now: Long, executorIds: String*): Unit = {
     executorIds.foreach { executorId =>
       val executorStageSummary = stage.executorSummary(executorId)
-      executorStageSummary.isBlacklisted = true
+      executorStageSummary.isExcluded = true
       maybeUpdate(executorStageSummary, now)
     }
-    stage.blackListedExecutors ++= executorIds
+    stage.excludedExecutors ++= executorIds
+    maybeUpdate(stage, now)
+  }
+
+  private def setStageExcludedStatus(stage: LiveStage, now: Long, executorIds: String*): Unit = {
+    executorIds.foreach { executorId =>
+      val executorStageSummary = stage.executorSummary(executorId)
+      executorStageSummary.isExcluded = true
+      maybeUpdate(executorStageSummary, now)
+    }
+    stage.excludedExecutors ++= executorIds
     maybeUpdate(stage, now)
   }
 
   override def onExecutorUnblacklisted(event: SparkListenerExecutorUnblacklisted): Unit = {
-    updateBlackListStatus(event.executorId, false)
+    updateExecExclusionStatus(event.executorId, false)
+  }
+
+  override def onExecutorUnexcluded(event: SparkListenerExecutorUnexcluded): Unit = {
+    updateExecExclusionStatus(event.executorId, false)
   }
 
   override def onNodeBlacklisted(event: SparkListenerNodeBlacklisted): Unit = {
-    updateNodeBlackList(event.hostId, true)
+    updateNodeExcluded(event.hostId, true)
+  }
+
+  override def onNodeExcluded(event: SparkListenerNodeExcluded): Unit = {
+    updateNodeExcluded(event.hostId, true)
   }
 
   override def onNodeUnblacklisted(event: SparkListenerNodeUnblacklisted): Unit = {
-    updateNodeBlackList(event.hostId, false)
+    updateNodeExcluded(event.hostId, false)
+  }
+
+  override def onNodeUnexcluded(event: SparkListenerNodeUnexcluded): Unit = {
+    updateNodeExcluded(event.hostId, false)
   }
 
-  private def updateBlackListStatus(execId: String, blacklisted: Boolean): Unit = {
+  private def updateNodeExclusionStatusForStage(stageId: Int, stageAttemptId: Int,
+      hostId: String): Unit = {
+    val now = System.nanoTime()
+
+    // Implicitly exclude every available executor for the stage associated with this node
+    Option(liveStages.get((stageId, stageAttemptId))).foreach { stage =>
+      val executorIds = liveExecutors.values.filter(_.host == hostId).map(_.executorId).toSeq
+      setStageExcludedStatus(stage, now, executorIds: _*)
+    }
+    liveExecutors.values.filter(_.hostname == hostId).foreach { exec =>
+      addExcludedStageTo(exec, stageId, now)
+    }
+  }
+
+  private def updateExclusionStatusForStage(stageId: Int, stageAttemptId: Int,
+      execId: String): Unit = {
+    val now = System.nanoTime()
+
+    Option(liveStages.get((stageId, stageAttemptId))).foreach { stage =>
+      setStageExcludedStatus(stage, now, execId)
+    }
+    liveExecutors.get(execId).foreach { exec =>
+      addExcludedStageTo(exec, stageId, now)
+    }
+  }
+
+  private def updateExecExclusionStatus(execId: String, excluded: Boolean): Unit = {
     liveExecutors.get(execId).foreach { exec =>
-      exec.isBlacklisted = blacklisted
-      if (blacklisted) {
+      updateExecExclusionStatus(exec, excluded, System.nanoTime())
+    }
+  }
+
+  private def updateExecExclusionStatus(exec: LiveExecutor, excluded: Boolean, now: Long): Unit = {
+    // Since we are sending both blacklisted and excluded events for backwards compatibility
+    // we need to protect against double counting so don't increment if already in
+    // that state. Also protects against executor being excluded and then node being
+    // separately excluded which could result in this being called twice for same
+    // executor.
+    if (exec.isExcluded != excluded) {
+      if (excluded) {
         appStatusSource.foreach(_.BLACKLISTED_EXECUTORS.inc())
+        appStatusSource.foreach(_.EXCLUDED_EXECUTORS.inc())
       } else {
         appStatusSource.foreach(_.UNBLACKLISTED_EXECUTORS.inc())
+        appStatusSource.foreach(_.UNEXCLUDED_EXECUTORS.inc())
       }
-      liveUpdate(exec, System.nanoTime())
+      exec.isExcluded = excluded
+      liveUpdate(exec, now)
     }
   }
 
-  private def updateNodeBlackList(host: String, blacklisted: Boolean): Unit = {
+  private def updateNodeExcluded(host: String, excluded: Boolean): Unit = {
     val now = System.nanoTime()
 
-    // Implicitly (un)blacklist every executor associated with the node.
+    // Implicitly (un)exclude every executor associated with the node.
     liveExecutors.values.foreach { exec =>
       if (exec.hostname == host) {
-        exec.isBlacklisted = blacklisted
-        liveUpdate(exec, now)
+        updateExecExclusionStatus(exec, excluded, now)
       }
     }
   }
@@ -759,10 +818,10 @@ private[spark] class AppStatusListener(
         update(pool, now)
       }
 
-      val executorIdsForStage = stage.blackListedExecutors
+      val executorIdsForStage = stage.excludedExecutors
       executorIdsForStage.foreach { executorId =>
         liveExecutors.get(executorId).foreach { exec =>
-          removeBlackListedStageFrom(exec, event.stageInfo.stageId, now)
+          removeExcludedStageFrom(exec, event.stageInfo.stageId, now)
         }
       }
 
@@ -782,8 +841,8 @@ private[spark] class AppStatusListener(
     deadExecutors.retain((execId, exec) => isExecutorActiveForLiveStages(exec))
   }
 
-  private def removeBlackListedStageFrom(exec: LiveExecutor, stageId: Int, now: Long) = {
-    exec.blacklistedInStages -= stageId
+  private def removeExcludedStageFrom(exec: LiveExecutor, stageId: Int, now: Long) = {
+    exec.excludedInStages -= stageId
     liveUpdate(exec, now)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusSource.scala b/core/src/main/scala/org/apache/spark/status/AppStatusSource.scala
index 20f171bd3c375..d19744db089ba 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusSource.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusSource.scala
@@ -59,9 +59,25 @@ private[spark] class AppStatusSource extends Source {
 
   val SKIPPED_TASKS = getCounter("tasks", "skippedTasks")
 
+  // This is the count of how many executors have been blacklisted at the application level,
+  // does not include stage level blacklisting.
+  // this is private but user visible from metrics so just deprecate
+  @deprecated("use excludedExecutors instead", "3.1.0")
   val BLACKLISTED_EXECUTORS = getCounter("tasks", "blackListedExecutors")
 
+  // This is the count of how many executors have been unblacklisted at the application level,
+  // does not include stage level unblacklisting.
+  @deprecated("use unexcludedExecutors instead", "3.1.0")
   val UNBLACKLISTED_EXECUTORS = getCounter("tasks", "unblackListedExecutors")
+
+  // This is the count of how many executors have been excluded at the application level,
+  // does not include stage level exclusion.
+  val EXCLUDED_EXECUTORS = getCounter("tasks", "excludedExecutors")
+
+  // This is the count of how many executors have been unexcluded at the application level,
+  // does not include stage level unexclusion.
+  val UNEXCLUDED_EXECUTORS = getCounter("tasks", "unexcludedExecutors")
+
 }
 
 private[spark] object AppStatusSource {
diff --git a/core/src/main/scala/org/apache/spark/status/LiveEntity.scala b/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
index 0fadd330a01ad..38f1f25f2fcaa 100644
--- a/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
+++ b/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
@@ -286,8 +286,8 @@ private[spark] class LiveExecutor(val executorId: String, _addTime: Long) extend
   var totalInputBytes = 0L
   var totalShuffleRead = 0L
   var totalShuffleWrite = 0L
-  var isBlacklisted = false
-  var blacklistedInStages: Set[Int] = TreeSet()
+  var isExcluded = false
+  var excludedInStages: Set[Int] = TreeSet()
 
   var executorLogs = Map[String, String]()
   var attributes = Map[String, String]()
@@ -334,18 +334,20 @@ private[spark] class LiveExecutor(val executorId: String, _addTime: Long) extend
       totalInputBytes,
       totalShuffleRead,
       totalShuffleWrite,
-      isBlacklisted,
+      isExcluded,
       maxMemory,
       addTime,
       Option(removeTime),
       Option(removeReason),
       executorLogs,
       memoryMetrics,
-      blacklistedInStages,
+      excludedInStages,
       Some(peakExecutorMetrics).filter(_.isSet),
       attributes,
       resources,
-      resourceProfileId)
+      resourceProfileId,
+      isExcluded,
+      excludedInStages)
     new ExecutorSummaryWrapper(info)
   }
 }
@@ -361,7 +363,7 @@ private class LiveExecutorStageSummary(
   var succeededTasks = 0
   var failedTasks = 0
   var killedTasks = 0
-  var isBlacklisted = false
+  var isExcluded = false
 
   var metrics = createMetrics(default = 0L)
 
@@ -383,8 +385,9 @@ private class LiveExecutorStageSummary(
       metrics.shuffleWriteMetrics.recordsWritten,
       metrics.memoryBytesSpilled,
       metrics.diskBytesSpilled,
-      isBlacklisted,
-      Some(peakExecutorMetrics).filter(_.isSet))
+      isExcluded,
+      Some(peakExecutorMetrics).filter(_.isSet),
+      isExcluded)
     new ExecutorStageSummaryWrapper(stageId, attemptId, executorId, info)
   }
 
@@ -421,7 +424,7 @@ private class LiveStage extends LiveEntity {
 
   val activeTasksPerExecutor = new HashMap[String, Int]().withDefaultValue(0)
 
-  var blackListedExecutors = new HashSet[String]()
+  var excludedExecutors = new HashSet[String]()
 
   val peakExecutorMetrics = new ExecutorMetrics()
 
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
index 5a8cf09e1cba6..96f5b7b5cf27e 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
@@ -82,10 +82,12 @@ class ExecutorStageSummary private[spark](
     val shuffleWriteRecords : Long,
     val memoryBytesSpilled : Long,
     val diskBytesSpilled : Long,
+    @deprecated("use isExcludedForStage instead", "3.1.0")
     val isBlacklistedForStage: Boolean,
     @JsonSerialize(using = classOf[ExecutorMetricsJsonSerializer])
     @JsonDeserialize(using = classOf[ExecutorMetricsJsonDeserializer])
-    val peakMemoryMetrics: Option[ExecutorMetrics])
+    val peakMemoryMetrics: Option[ExecutorMetrics],
+    val isExcludedForStage: Boolean)
 
 class ExecutorSummary private[spark](
     val id: String,
@@ -105,6 +107,7 @@ class ExecutorSummary private[spark](
     val totalInputBytes: Long,
     val totalShuffleRead: Long,
     val totalShuffleWrite: Long,
+    @deprecated("use isExcluded instead", "3.1.0")
     val isBlacklisted: Boolean,
     val maxMemory: Long,
     val addTime: Date,
@@ -112,13 +115,16 @@ class ExecutorSummary private[spark](
     val removeReason: Option[String],
     val executorLogs: Map[String, String],
     val memoryMetrics: Option[MemoryMetrics],
+    @deprecated("use excludedInStages instead", "3.1.0")
     val blacklistedInStages: Set[Int],
     @JsonSerialize(using = classOf[ExecutorMetricsJsonSerializer])
     @JsonDeserialize(using = classOf[ExecutorMetricsJsonDeserializer])
     val peakMemoryMetrics: Option[ExecutorMetrics],
     val attributes: Map[String, String],
     val resources: Map[String, ResourceInformation],
-    val resourceProfileId: Int)
+    val resourceProfileId: Int,
+    val isExcluded: Boolean,
+    val excludedInStages: Set[Int])
 
 class MemoryMetrics private[spark](
     val usedOnHeapStorageMemory: Long,
diff --git a/core/src/main/scala/org/apache/spark/ui/ToolTips.scala b/core/src/main/scala/org/apache/spark/ui/ToolTips.scala
index aefd001e573f9..a7c42b86468b2 100644
--- a/core/src/main/scala/org/apache/spark/ui/ToolTips.scala
+++ b/core/src/main/scala/org/apache/spark/ui/ToolTips.scala
@@ -91,9 +91,6 @@ private[spark] object ToolTips {
   val TASK_TIME =
   "Shaded red when garbage collection (GC) time is over 10% of task time"
 
-  val BLACKLISTED =
-  "Shows if this executor has been blacklisted by the scheduler due to task failures."
-
   val APPLICATION_EXECUTOR_LIMIT =
     """Maximum number of executors that this application will use. This limit is finite only when
        dynamic allocation is enabled. The number of granted executors may exceed the limit
diff --git a/core/src/test/resources/HistoryServerExpectations/blacklisting_for_stage_expectation.json b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json
similarity index 99%
rename from core/src/test/resources/HistoryServerExpectations/blacklisting_for_stage_expectation.json
rename to core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json
index 0d197eab0e25d..a69940fa5a1a5 100644
--- a/core/src/test/resources/HistoryServerExpectations/blacklisting_for_stage_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json
@@ -697,7 +697,8 @@
       "shuffleWriteRecords" : 0,
       "memoryBytesSpilled" : 0,
       "diskBytesSpilled" : 0,
-      "isBlacklistedForStage" : true
+      "isBlacklistedForStage" : true,
+      "isExcludedForStage" : true
     },
     "1" : {
       "taskTime" : 708,
@@ -714,7 +715,8 @@
       "shuffleWriteRecords" : 10,
       "memoryBytesSpilled" : 0,
       "diskBytesSpilled" : 0,
-      "isBlacklistedForStage" : false
+      "isBlacklistedForStage" : false,
+      "isExcludedForStage" : false
     }
   },
   "killedTasksSummary" : { },
diff --git a/core/src/test/resources/HistoryServerExpectations/blacklisting_node_for_stage_expectation.json b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json
similarity index 98%
rename from core/src/test/resources/HistoryServerExpectations/blacklisting_node_for_stage_expectation.json
rename to core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json
index 24d73faa45021..bda9caedbbe81 100644
--- a/core/src/test/resources/HistoryServerExpectations/blacklisting_node_for_stage_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json
@@ -805,7 +805,8 @@
       "shuffleWriteRecords" : 0,
       "memoryBytesSpilled" : 0,
       "diskBytesSpilled" : 0,
-      "isBlacklistedForStage" : true
+      "isBlacklistedForStage" : true,
+      "isExcludedForStage" : true
     },
     "5" : {
       "taskTime" : 1579,
@@ -822,7 +823,8 @@
       "shuffleWriteRecords" : 0,
       "memoryBytesSpilled" : 0,
       "diskBytesSpilled" : 0,
-      "isBlacklistedForStage" : true
+      "isBlacklistedForStage" : true,
+      "isExcludedForStage" : true
     },
     "1" : {
       "taskTime" : 2411,
@@ -839,7 +841,8 @@
       "shuffleWriteRecords" : 12,
       "memoryBytesSpilled" : 0,
       "diskBytesSpilled" : 0,
-      "isBlacklistedForStage" : false
+      "isBlacklistedForStage" : false,
+      "isExcludedForStage" : false
     },
     "2" : {
       "taskTime" : 2446,
@@ -856,7 +859,8 @@
       "shuffleWriteRecords" : 15,
       "memoryBytesSpilled" : 0,
       "diskBytesSpilled" : 0,
-      "isBlacklistedForStage" : false
+      "isBlacklistedForStage" : false,
+      "isExcludedForStage" : false
     },
     "3" : {
       "taskTime" : 1774,
@@ -873,7 +877,8 @@
       "shuffleWriteRecords" : 3,
       "memoryBytesSpilled" : 0,
       "diskBytesSpilled" : 0,
-      "isBlacklistedForStage" : true
+      "isBlacklistedForStage" : true,
+      "isExcludedForStage" : true
     }
   },
   "killedTasksSummary" : { },
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
index 67425676a62d6..c18a2e31dff3c 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
@@ -23,5 +23,7 @@
   "blacklistedInStages" : [ ],
   "attributes" : { },
   "resources" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "isExcluded" : false,
+  "excludedInStages" : [ ]
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_metrics_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_metrics_json_expectation.json
index d052a27385f66..bf3e93f3d3783 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_metrics_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_list_with_executor_metrics_json_expectation.json
@@ -51,7 +51,9 @@
   },
   "attributes" : { },
   "resources" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "isExcluded" : false,
+  "excludedInStages" : [ ]
 }, {
   "id" : "3",
   "hostPort" : "test-3.vpc.company.com:37641",
@@ -118,7 +120,9 @@
     "CONTAINER_ID" : "container_1553914137147_0018_01_000004"
   },
   "resources" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "isExcluded" : false,
+  "excludedInStages" : [ ]
 }, {
   "id" : "2",
   "hostPort" : "test-4.vpc.company.com:33179",
@@ -185,7 +189,9 @@
     "CONTAINER_ID" : "container_1553914137147_0018_01_000003"
   },
   "resources" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "isExcluded" : false,
+  "excludedInStages" : [ ]
 }, {
   "id" : "1",
   "hostPort" : "test-2.vpc.company.com:43764",
@@ -252,5 +258,7 @@
     "CONTAINER_ID" : "container_1553914137147_0018_01_000002"
   },
   "resources" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "isExcluded" : false,
+  "excludedInStages" : [ ]
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json
index 91574ca8266b2..9adda275b5609 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json
@@ -29,7 +29,9 @@
   "blacklistedInStages" : [ ],
   "attributes" : { },
   "resources" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "isExcluded" : true,
+  "excludedInStages" : [ ]
 }, {
   "id" : "3",
   "hostPort" : "172.22.0.167:51485",
@@ -64,7 +66,9 @@
   "blacklistedInStages" : [ ],
   "attributes" : { },
   "resources" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "isExcluded" : true,
+  "excludedInStages" : [ ]
 } ,{
   "id" : "2",
   "hostPort" : "172.22.0.167:51487",
@@ -99,7 +103,9 @@
   "blacklistedInStages" : [ ],
   "attributes" : { },
   "resources" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "isExcluded" : true,
+  "excludedInStages" : [ ]
 }, {
   "id" : "1",
   "hostPort" : "172.22.0.167:51490",
@@ -134,7 +140,9 @@
   "blacklistedInStages" : [ ],
   "attributes" : { },
   "resources" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "isExcluded" : true,
+  "excludedInStages" : [ ]
 }, {
   "id" : "0",
   "hostPort" : "172.22.0.167:51491",
@@ -169,5 +177,7 @@
   "blacklistedInStages" : [ ],
   "attributes" : { },
   "resources" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "isExcluded" : true,
+  "excludedInStages" : [ ]
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_expectation.json
similarity index 92%
rename from core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_expectation.json
rename to core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_expectation.json
index f14b9a5085a42..65bd309c1025e 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_expectation.json
@@ -29,7 +29,9 @@
   "blacklistedInStages" : [ ],
   "attributes" : { },
   "resources" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "isExcluded" : true,
+  "excludedInStages" : [ ]
 }, {
   "id" : "3",
   "hostPort" : "172.22.0.167:51485",
@@ -64,7 +66,9 @@
   "blacklistedInStages" : [ ],
   "attributes" : { },
   "resources" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "isExcluded" : true,
+  "excludedInStages" : [ ]
 }, {
   "id" : "2",
   "hostPort" : "172.22.0.167:51487",
@@ -99,7 +103,9 @@
   "blacklistedInStages" : [ ],
   "attributes" : { },
   "resources" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "isExcluded" : true,
+  "excludedInStages" : [ ]
 }, {
   "id" : "1",
   "hostPort" : "172.22.0.167:51490",
@@ -134,7 +140,9 @@
   "blacklistedInStages" : [ ],
   "attributes" : { },
   "resources" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "isExcluded" : true,
+  "excludedInStages" : [ ]
 }, {
   "id" : "0",
   "hostPort" : "172.22.0.167:51491",
@@ -169,5 +177,7 @@
   "blacklistedInStages" : [ ],
   "attributes" : { },
   "resources" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "isExcluded" : true,
+  "excludedInStages" : [ ]
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_unblacklisting_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_unexcluding_expectation.json
similarity index 90%
rename from core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_unblacklisting_expectation.json
rename to core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_unexcluding_expectation.json
index 3645387317ca1..46e8f81d0e245 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_node_blacklisting_unblacklisting_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_unexcluding_expectation.json
@@ -23,7 +23,9 @@
   "blacklistedInStages" : [ ],
   "attributes" : { },
   "resources" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "isExcluded" : false,
+  "excludedInStages" : [ ]
 }, {
   "id" : "3",
   "hostPort" : "172.22.0.111:64543",
@@ -52,7 +54,9 @@
   "blacklistedInStages" : [ ],
   "attributes" : { },
   "resources" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "isExcluded" : false,
+  "excludedInStages" : [ ]
 }, {
   "id" : "2",
   "hostPort" : "172.22.0.111:64539",
@@ -81,7 +85,9 @@
   "blacklistedInStages" : [ ],
   "attributes" : { },
   "resources" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "isExcluded" : false,
+  "excludedInStages" : [ ]
 }, {
   "id" : "1",
   "hostPort" : "172.22.0.111:64541",
@@ -110,7 +116,9 @@
   "blacklistedInStages" : [ ],
   "attributes" : { },
   "resources" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "isExcluded" : false,
+  "excludedInStages" : [ ]
 }, {
   "id" : "0",
   "hostPort" : "172.22.0.111:64540",
@@ -139,5 +147,7 @@
   "blacklistedInStages" : [ ],
   "attributes" : { },
   "resources" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "isExcluded" : false,
+  "excludedInStages" : [ ]
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_resource_information_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_resource_information_expectation.json
index 165389cf25027..53ae9a0c7909e 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_resource_information_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_resource_information_expectation.json
@@ -29,7 +29,9 @@
   "blacklistedInStages" : [ ],
   "attributes" : { },
   "resources" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "isExcluded" : false,
+  "excludedInStages" : [ ]
 }, {
   "id" : "2",
   "hostPort" : "tomg-test:46005",
@@ -79,7 +81,9 @@
       "addresses" : [ "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12" ]
     }
   },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "isExcluded" : false,
+  "excludedInStages" : [ ]
 }, {
   "id" : "1",
   "hostPort" : "tomg-test:44873",
@@ -129,5 +133,7 @@
       "addresses" : [ "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12" ]
     }
   },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "isExcluded" : false,
+  "excludedInStages" : [ ]
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json
index 3db7d551b6130..41e54c68858ad 100644
--- a/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json
@@ -459,7 +459,8 @@
       "shuffleWriteRecords" : 0,
       "memoryBytesSpilled" : 0,
       "diskBytesSpilled" : 0,
-      "isBlacklistedForStage" : false
+      "isBlacklistedForStage" : false,
+      "isExcludedForStage" : false
     }
   },
   "killedTasksSummary" : { },
diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json
index 8ef3769c1ca6b..7a6685a609523 100644
--- a/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json
@@ -459,7 +459,8 @@
       "shuffleWriteRecords" : 0,
       "memoryBytesSpilled" : 0,
       "diskBytesSpilled" : 0,
-      "isBlacklistedForStage" : false
+      "isBlacklistedForStage" : false,
+      "isExcludedForStage" : false
     }
   },
   "killedTasksSummary" : { },
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json
index 3b5476ae8b160..066b6a4f884a7 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json
@@ -503,7 +503,8 @@
       "shuffleWriteRecords" : 0,
       "memoryBytesSpilled" : 0,
       "diskBytesSpilled" : 0,
-      "isBlacklistedForStage" : false
+      "isBlacklistedForStage" : false,
+      "isExcludedForStage" : false
     }
   },
   "killedTasksSummary" : { },
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_with_peak_metrics_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_with_peak_metrics_expectation.json
index 373510d23058e..20a958073245a 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_with_peak_metrics_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_with_peak_metrics_expectation.json
@@ -929,7 +929,8 @@
         "MinorGCTime" : 0,
         "MajorGCCount" : 0,
         "MajorGCTime" : 0
-      }
+      },
+      "isExcludedForStage" : false
     },
     "driver" : {
       "taskTime" : 0,
@@ -968,7 +969,8 @@
         "MinorGCTime" : 115,
         "MajorGCCount" : 4,
         "MajorGCTime" : 339
-      }
+      },
+      "isExcludedForStage" : false
     }
   },
   "killedTasksSummary" : { },
diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
index 6a38bba5dd0e5..d1edb80e40b21 100644
--- a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
@@ -524,7 +524,7 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
     assert(numExecutorsTarget(manager, defaultProfile.id) === 1)
     assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) == 1)
 
-    // Stage 0 becomes unschedulable due to blacklisting
+    // Stage 0 becomes unschedulable due to excludeOnFailure
     post(SparkListenerUnschedulableTaskSetAdded(0, 0))
     clock.advance(1000)
     manager invokePrivate _updateAndSyncNumExecutorsTarget(clock.nanoTime())
@@ -580,7 +580,7 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
     post(SparkListenerTaskEnd(0, 0, null, Success, t2Info, new ExecutorMetrics, null))
     post(SparkListenerStageCompleted(createStageInfo(0, 2)))
 
-    // Stage 1 and 2 becomes unschedulable now due to blacklisting
+    // Stage 1 and 2 becomes unschedulable now due to excludeOnFailure
     post(SparkListenerUnschedulableTaskSetAdded(1, 0))
     post(SparkListenerUnschedulableTaskSetAdded(2, 0))
 
@@ -637,7 +637,7 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
     (0 to 3).foreach { i => assert(removeExecutorDefaultProfile(manager, i.toString)) }
     (0 to 3).foreach { i => onExecutorRemoved(manager, i.toString) }
 
-    // Now due to blacklisting, the task becomes unschedulable
+    // Now due to executor being excluded, the task becomes unschedulable
     post(SparkListenerUnschedulableTaskSetAdded(0, 0))
     clock.advance(1000)
     manager invokePrivate _updateAndSyncNumExecutorsTarget(clock.nanoTime())
diff --git a/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala b/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala
index a2e70b23a3e5d..c9d43f517afba 100644
--- a/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala
+++ b/core/src/test/scala/org/apache/spark/HeartbeatReceiverSuite.scala
@@ -76,7 +76,7 @@ class HeartbeatReceiverSuite
     sc = spy(new SparkContext(conf))
     scheduler = mock(classOf[TaskSchedulerImpl])
     when(sc.taskScheduler).thenReturn(scheduler)
-    when(scheduler.nodeBlacklist).thenReturn(Predef.Set[String]())
+    when(scheduler.excludedNodes).thenReturn(Predef.Set[String]())
     when(scheduler.sc).thenReturn(sc)
     heartbeatReceiverClock = new ManualClock
     heartbeatReceiver = new HeartbeatReceiver(sc, heartbeatReceiverClock)
diff --git a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
index c7c3ad27675fa..e1d4eff0a62cb 100644
--- a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
@@ -497,19 +497,19 @@ class StandaloneDynamicAllocationSuite
     }
   }
 
-  test("executor registration on a blacklisted host must fail") {
+  test("executor registration on a excluded host must fail") {
     // The context isn't really used by the test, but it helps with creating a test scheduler,
     // since CoarseGrainedSchedulerBackend makes a lot of calls to the context instance.
-    sc = new SparkContext(appConf.set(config.BLACKLIST_ENABLED.key, "true"))
+    sc = new SparkContext(appConf.set(config.EXCLUDE_ON_FAILURE_ENABLED.key, "true"))
 
     val endpointRef = mock(classOf[RpcEndpointRef])
     val mockAddress = mock(classOf[RpcAddress])
     when(endpointRef.address).thenReturn(mockAddress)
-    val message = RegisterExecutor("one", endpointRef, "blacklisted-host", 10, Map.empty,
+    val message = RegisterExecutor("one", endpointRef, "excluded-host", 10, Map.empty,
       Map.empty, Map.empty, ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
 
     val taskScheduler = mock(classOf[TaskSchedulerImpl])
-    when(taskScheduler.nodeBlacklist()).thenReturn(Set("blacklisted-host"))
+    when(taskScheduler.excludedNodes()).thenReturn(Set("excluded-host"))
     when(taskScheduler.resourceOffers(any(), any[Boolean])).thenReturn(Nil)
     when(taskScheduler.sc).thenReturn(sc)
 
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/BasicEventFilterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/BasicEventFilterSuite.scala
index 2da40dccba53e..5d40a0610eb6c 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/BasicEventFilterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/BasicEventFilterSuite.scala
@@ -135,6 +135,8 @@ class BasicEventFilterSuite extends SparkFunSuite {
       SparkListenerStageExecutorMetrics(1.toString, 0, 0, new ExecutorMetrics)))
     assert(Some(false) === acceptFn(SparkListenerExecutorBlacklisted(0, 1.toString, 1)))
     assert(Some(false) === acceptFn(SparkListenerExecutorUnblacklisted(0, 1.toString)))
+    assert(Some(false) === acceptFn(SparkListenerExecutorExcluded(0, 1.toString, 1)))
+    assert(Some(false) === acceptFn(SparkListenerExecutorUnexcluded(0, 1.toString)))
     assert(Some(false) === acceptFn(createExecutorRemovedEvent(1)))
     val bmId = BlockManagerId(1.toString, "host1", 1)
     assert(Some(false) === acceptFn(SparkListenerBlockManagerAdded(0, bmId, 1)))
@@ -148,6 +150,10 @@ class BasicEventFilterSuite extends SparkFunSuite {
       SparkListenerStageExecutorMetrics(2.toString, 0, 0, new ExecutorMetrics)))
     assert(Some(true) === acceptFn(SparkListenerExecutorBlacklisted(0, 2.toString, 1)))
     assert(Some(true) === acceptFn(SparkListenerExecutorUnblacklisted(0, 2.toString)))
+    assert(None === acceptFn(SparkListenerNodeBlacklisted(0, "host1", 1)))
+    assert(None === acceptFn(SparkListenerNodeUnblacklisted(0, "host1")))
+    assert(Some(true) === acceptFn(SparkListenerExecutorExcluded(0, 2.toString, 1)))
+    assert(Some(true) === acceptFn(SparkListenerExecutorUnexcluded(0, 2.toString)))
     assert(Some(true) === acceptFn(createExecutorRemovedEvent(2)))
     val bmId2 = BlockManagerId(2.toString, "host1", 1)
     assert(Some(true) === acceptFn(SparkListenerBlockManagerAdded(0, bmId2, 1)))
@@ -164,8 +170,8 @@ class BasicEventFilterSuite extends SparkFunSuite {
     assert(None === acceptFn(SparkListenerEnvironmentUpdate(Map.empty)))
     assert(None === acceptFn(SparkListenerApplicationStart("1", Some("1"), 0, "user", None)))
     assert(None === acceptFn(SparkListenerApplicationEnd(1)))
-    assert(None === acceptFn(SparkListenerNodeBlacklisted(0, "host1", 1)))
-    assert(None === acceptFn(SparkListenerNodeUnblacklisted(0, "host1")))
+    assert(None === acceptFn(SparkListenerNodeExcluded(0, "host1", 1)))
+    assert(None === acceptFn(SparkListenerNodeUnexcluded(0, "host1")))
     assert(None === acceptFn(SparkListenerLogStart("testVersion")))
   }
 
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileCompactorSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileCompactorSuite.scala
index 2a914023ec821..ac39f022d5ca6 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileCompactorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileCompactorSuite.scala
@@ -219,10 +219,10 @@ class EventLogFileCompactorSuite extends SparkFunSuite {
       override def acceptFn(): PartialFunction[SparkListenerEvent, Boolean] = {
         case _: SparkListenerApplicationEnd => true
         case _: SparkListenerEnvironmentUpdate => true
-        case _: SparkListenerNodeBlacklisted => true
+        case _: SparkListenerNodeExcluded => true
         case _: SparkListenerBlockManagerAdded => false
         case _: SparkListenerApplicationStart => false
-        case _: SparkListenerNodeUnblacklisted => false
+        case _: SparkListenerNodeUnexcluded => false
       }
 
       override def statistics(): Option[EventFilter.FilterStatistics] = None
@@ -254,11 +254,11 @@ class EventLogFileCompactorSuite extends SparkFunSuite {
       // filterApplicationStart: Some(false) & Some(false) => filter out
       writeEventToWriter(writer, SparkListenerApplicationStart("app", None, 0, "user", None))
 
-      // filterNodeBlacklisted: None & Some(true) => filter in
-      expectedLines += writeEventToWriter(writer, SparkListenerNodeBlacklisted(0, "host1", 1))
+      // filterNodeExcluded: None & Some(true) => filter in
+      expectedLines += writeEventToWriter(writer, SparkListenerNodeExcluded(0, "host1", 1))
 
-      // filterNodeUnblacklisted: None & Some(false) => filter out
-      writeEventToWriter(writer, SparkListenerNodeUnblacklisted(0, "host1"))
+      // filterNodeUnexcluded: None & Some(false) => filter out
+      writeEventToWriter(writer, SparkListenerNodeUnexcluded(0, "host1"))
 
       // other events: None & None => filter in
       expectedLines += writeEventToWriter(writer, SparkListenerUnpersistRDD(0))
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index e4c23d3d1b1c3..08b2118065521 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -169,12 +169,13 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
       "applications/local-1426533911241/1/stages/0/0/taskList",
     "stage task list from multi-attempt app json(2)" ->
       "applications/local-1426533911241/2/stages/0/0/taskList",
-    "blacklisting for stage" -> "applications/app-20180109111548-0000/stages/0/0",
-    "blacklisting node for stage" -> "applications/application_1516285256255_0012/stages/0/0",
+    "excludeOnFailure for stage" -> "applications/app-20180109111548-0000/stages/0/0",
+    "excludeOnFailure node for stage" -> "applications/application_1516285256255_0012/stages/0/0",
 
     "rdd list storage json" -> "applications/local-1422981780767/storage/rdd",
-    "executor node blacklisting" -> "applications/app-20161116163331-0000/executors",
-    "executor node blacklisting unblacklisting" -> "applications/app-20161115172038-0000/executors",
+    "executor node excludeOnFailure" -> "applications/app-20161116163331-0000/executors",
+    "executor node excludeOnFailure unexcluding" ->
+      "applications/app-20161115172038-0000/executors",
     "executor memory usage" -> "applications/app-20161116163331-0000/executors",
     "executor resource information" -> "applications/application_1555004656427_0144/executors",
     "multiple resource profiles" -> "applications/application_1578436911597_0052/environment",
diff --git a/core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala
deleted file mode 100644
index a1671a58f0d9b..0000000000000
--- a/core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala
+++ /dev/null
@@ -1,608 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.scheduler
-
-import org.mockito.ArgumentMatchers.any
-import org.mockito.Mockito.{never, verify, when}
-import org.mockito.invocation.InvocationOnMock
-import org.scalatest.BeforeAndAfterEach
-import org.scalatestplus.mockito.MockitoSugar
-
-import org.apache.spark._
-import org.apache.spark.internal.config
-import org.apache.spark.util.ManualClock
-
-class BlacklistTrackerSuite extends SparkFunSuite with BeforeAndAfterEach with MockitoSugar
-    with LocalSparkContext {
-
-  private val clock = new ManualClock(0)
-
-  private var blacklist: BlacklistTracker = _
-  private var listenerBusMock: LiveListenerBus = _
-  private var scheduler: TaskSchedulerImpl = _
-  private var conf: SparkConf = _
-
-  override def beforeEach(): Unit = {
-    conf = new SparkConf().setAppName("test").setMaster("local")
-      .set(config.BLACKLIST_ENABLED.key, "true")
-    scheduler = mockTaskSchedWithConf(conf)
-
-    clock.setTime(0)
-
-    listenerBusMock = mock[LiveListenerBus]
-    blacklist = new BlacklistTracker(listenerBusMock, conf, None, clock)
-  }
-
-  override def afterEach(): Unit = {
-    if (blacklist != null) {
-      blacklist = null
-    }
-    if (scheduler != null) {
-      scheduler.stop()
-      scheduler = null
-    }
-    super.afterEach()
-  }
-
-  // All executors and hosts used in tests should be in this set, so that [[assertEquivalentToSet]]
-  // works.  Its OK if its got extraneous entries
-  val allExecutorAndHostIds = {
-    (('A' to 'Z')++ (1 to 100).map(_.toString))
-      .flatMap{ suffix =>
-        Seq(s"host$suffix", s"host-$suffix")
-      }
-  }.toSet
-
-  /**
-   * Its easier to write our tests as if we could directly look at the sets of nodes & executors in
-   * the blacklist.  However the api doesn't expose a set, so this is a simple way to test
-   * something similar, since we know the universe of values that might appear in these sets.
-   */
-  def assertEquivalentToSet(f: String => Boolean, expected: Set[String]): Unit = {
-    allExecutorAndHostIds.foreach { id =>
-      val actual = f(id)
-      val exp = expected.contains(id)
-      assert(actual === exp, raw"""for string "$id" """)
-    }
-  }
-
-  def mockTaskSchedWithConf(conf: SparkConf): TaskSchedulerImpl = {
-    sc = new SparkContext(conf)
-    val scheduler = mock[TaskSchedulerImpl]
-    when(scheduler.sc).thenReturn(sc)
-    when(scheduler.mapOutputTracker).thenReturn(
-      SparkEnv.get.mapOutputTracker.asInstanceOf[MapOutputTrackerMaster])
-    scheduler
-  }
-
-  def createTaskSetBlacklist(stageId: Int = 0): TaskSetBlacklist = {
-    new TaskSetBlacklist(listenerBusMock, conf, stageId, stageAttemptId = 0, clock = clock)
-  }
-
-  test("executors can be blacklisted with only a few failures per stage") {
-    // For many different stages, executor 1 fails a task, then executor 2 succeeds the task,
-    // and then the task set is done.  Not enough failures to blacklist the executor *within*
-    // any particular taskset, but we still blacklist the executor overall eventually.
-    // Also, we intentionally have a mix of task successes and failures -- there are even some
-    // successes after the executor is blacklisted.  The idea here is those tasks get scheduled
-    // before the executor is blacklisted.  We might get successes after blacklisting (because the
-    // executor might be flaky but not totally broken).  But successes should not unblacklist the
-    // executor.
-    val failuresUntilBlacklisted = conf.get(config.MAX_FAILURES_PER_EXEC)
-    var failuresSoFar = 0
-    (0 until failuresUntilBlacklisted * 10).foreach { stageId =>
-      val taskSetBlacklist = createTaskSetBlacklist(stageId)
-      if (stageId % 2 == 0) {
-        // fail one task in every other taskset
-        taskSetBlacklist.updateBlacklistForFailedTask(
-          "hostA", exec = "1", index = 0, failureReason = "testing")
-        failuresSoFar += 1
-      }
-      blacklist.updateBlacklistForSuccessfulTaskSet(stageId, 0, taskSetBlacklist.execToFailures)
-      assert(failuresSoFar == stageId / 2 + 1)
-      if (failuresSoFar < failuresUntilBlacklisted) {
-        assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set())
-      } else {
-        assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set("1"))
-        verify(listenerBusMock).post(
-          SparkListenerExecutorBlacklisted(0, "1", failuresUntilBlacklisted))
-      }
-    }
-  }
-
-  // If an executor has many task failures, but the task set ends up failing, it shouldn't be
-  // counted against the executor.
-  test("executors aren't blacklisted as a result of tasks in failed task sets") {
-    val failuresUntilBlacklisted = conf.get(config.MAX_FAILURES_PER_EXEC)
-    // for many different stages, executor 1 fails a task, and then the taskSet fails.
-    (0 until failuresUntilBlacklisted * 10).foreach { stage =>
-      val taskSetBlacklist = createTaskSetBlacklist(stage)
-      taskSetBlacklist.updateBlacklistForFailedTask(
-        "hostA", exec = "1", index = 0, failureReason = "testing")
-    }
-    assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set())
-  }
-
-  Seq(true, false).foreach { succeedTaskSet =>
-    val label = if (succeedTaskSet) "success" else "failure"
-    test(s"stage blacklist updates correctly on stage $label") {
-      // Within one taskset, an executor fails a few times, so it's blacklisted for the taskset.
-      // But if the taskset fails, we shouldn't blacklist the executor after the stage.
-      val taskSetBlacklist = createTaskSetBlacklist(0)
-      // We trigger enough failures for both the taskset blacklist, and the application blacklist.
-      val numFailures = math.max(conf.get(config.MAX_FAILURES_PER_EXEC),
-        conf.get(config.MAX_FAILURES_PER_EXEC_STAGE))
-      (0 until numFailures).foreach { index =>
-        taskSetBlacklist.updateBlacklistForFailedTask(
-          "hostA", exec = "1", index = index, failureReason = "testing")
-      }
-      assert(taskSetBlacklist.isExecutorBlacklistedForTaskSet("1"))
-      assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set())
-      if (succeedTaskSet) {
-        // The task set succeeded elsewhere, so we should count those failures against our executor,
-        // and it should be blacklisted for the entire application.
-        blacklist.updateBlacklistForSuccessfulTaskSet(0, 0, taskSetBlacklist.execToFailures)
-        assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set("1"))
-        verify(listenerBusMock).post(SparkListenerExecutorBlacklisted(0, "1", numFailures))
-      } else {
-        // The task set failed, so we don't count these failures against the executor for other
-        // stages.
-        assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set())
-      }
-    }
-  }
-
-  test("blacklisted executors and nodes get recovered with time") {
-    val taskSetBlacklist0 = createTaskSetBlacklist(stageId = 0)
-    // Fail 4 tasks in one task set on executor 1, so that executor gets blacklisted for the whole
-    // application.
-    (0 until 4).foreach { partition =>
-      taskSetBlacklist0.updateBlacklistForFailedTask(
-        "hostA", exec = "1", index = partition, failureReason = "testing")
-    }
-    blacklist.updateBlacklistForSuccessfulTaskSet(0, 0, taskSetBlacklist0.execToFailures)
-    assert(blacklist.nodeBlacklist() === Set())
-    assertEquivalentToSet(blacklist.isNodeBlacklisted(_), Set())
-    assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set("1"))
-    verify(listenerBusMock).post(SparkListenerExecutorBlacklisted(0, "1", 4))
-
-    val taskSetBlacklist1 = createTaskSetBlacklist(stageId = 1)
-    // Fail 4 tasks in one task set on executor 2, so that executor gets blacklisted for the whole
-    // application.  Since that's the second executor that is blacklisted on the same node, we also
-    // blacklist that node.
-    (0 until 4).foreach { partition =>
-      taskSetBlacklist1.updateBlacklistForFailedTask(
-        "hostA", exec = "2", index = partition, failureReason = "testing")
-    }
-    blacklist.updateBlacklistForSuccessfulTaskSet(0, 0, taskSetBlacklist1.execToFailures)
-    assert(blacklist.nodeBlacklist() === Set("hostA"))
-    assertEquivalentToSet(blacklist.isNodeBlacklisted(_), Set("hostA"))
-    verify(listenerBusMock).post(SparkListenerNodeBlacklisted(0, "hostA", 2))
-    assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set("1", "2"))
-    verify(listenerBusMock).post(SparkListenerExecutorBlacklisted(0, "2", 4))
-
-    // Advance the clock and then make sure hostA and executors 1 and 2 have been removed from the
-    // blacklist.
-    val timeout = blacklist.BLACKLIST_TIMEOUT_MILLIS + 1
-    clock.advance(timeout)
-    blacklist.applyBlacklistTimeout()
-    assert(blacklist.nodeBlacklist() === Set())
-    assertEquivalentToSet(blacklist.isNodeBlacklisted(_), Set())
-    assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set())
-    verify(listenerBusMock).post(SparkListenerExecutorUnblacklisted(timeout, "2"))
-    verify(listenerBusMock).post(SparkListenerExecutorUnblacklisted(timeout, "1"))
-    verify(listenerBusMock).post(SparkListenerNodeUnblacklisted(timeout, "hostA"))
-
-    // Fail one more task, but executor isn't put back into blacklist since the count of failures
-    // on that executor should have been reset to 0.
-    val taskSetBlacklist2 = createTaskSetBlacklist(stageId = 2)
-    taskSetBlacklist2.updateBlacklistForFailedTask(
-      "hostA", exec = "1", index = 0, failureReason = "testing")
-    blacklist.updateBlacklistForSuccessfulTaskSet(2, 0, taskSetBlacklist2.execToFailures)
-    assert(blacklist.nodeBlacklist() === Set())
-    assertEquivalentToSet(blacklist.isNodeBlacklisted(_), Set())
-    assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set())
-  }
-
-  test("blacklist can handle lost executors") {
-    // The blacklist should still work if an executor is killed completely.  We should still
-    // be able to blacklist the entire node.
-    val taskSetBlacklist0 = createTaskSetBlacklist(stageId = 0)
-    // Lets say that executor 1 dies completely.  We get some task failures, but
-    // the taskset then finishes successfully (elsewhere).
-    (0 until 4).foreach { partition =>
-      taskSetBlacklist0.updateBlacklistForFailedTask(
-        "hostA", exec = "1", index = partition, failureReason = "testing")
-    }
-    blacklist.handleRemovedExecutor("1")
-    blacklist.updateBlacklistForSuccessfulTaskSet(
-      stageId = 0,
-      stageAttemptId = 0,
-      taskSetBlacklist0.execToFailures)
-    assert(blacklist.isExecutorBlacklisted("1"))
-    verify(listenerBusMock).post(SparkListenerExecutorBlacklisted(0, "1", 4))
-    val t1 = blacklist.BLACKLIST_TIMEOUT_MILLIS / 2
-    clock.advance(t1)
-
-    // Now another executor gets spun up on that host, but it also dies.
-    val taskSetBlacklist1 = createTaskSetBlacklist(stageId = 1)
-    (0 until 4).foreach { partition =>
-      taskSetBlacklist1.updateBlacklistForFailedTask(
-        "hostA", exec = "2", index = partition, failureReason = "testing")
-    }
-    blacklist.handleRemovedExecutor("2")
-    blacklist.updateBlacklistForSuccessfulTaskSet(
-      stageId = 1,
-      stageAttemptId = 0,
-      taskSetBlacklist1.execToFailures)
-    // We've now had two bad executors on the hostA, so we should blacklist the entire node.
-    assert(blacklist.isExecutorBlacklisted("1"))
-    assert(blacklist.isExecutorBlacklisted("2"))
-    verify(listenerBusMock).post(SparkListenerExecutorBlacklisted(t1, "2", 4))
-    assert(blacklist.isNodeBlacklisted("hostA"))
-    verify(listenerBusMock).post(SparkListenerNodeBlacklisted(t1, "hostA", 2))
-
-    // Advance the clock so that executor 1 should no longer be explicitly blacklisted, but
-    // everything else should still be blacklisted.
-    val t2 = blacklist.BLACKLIST_TIMEOUT_MILLIS / 2 + 1
-    clock.advance(t2)
-    blacklist.applyBlacklistTimeout()
-    assert(!blacklist.isExecutorBlacklisted("1"))
-    verify(listenerBusMock).post(SparkListenerExecutorUnblacklisted(t1 + t2, "1"))
-    assert(blacklist.isExecutorBlacklisted("2"))
-    assert(blacklist.isNodeBlacklisted("hostA"))
-    // make sure we don't leak memory
-    assert(!blacklist.executorIdToBlacklistStatus.contains("1"))
-    assert(!blacklist.nodeToBlacklistedExecs("hostA").contains("1"))
-    // Advance the timeout again so now hostA should be removed from the blacklist.
-    clock.advance(t1)
-    blacklist.applyBlacklistTimeout()
-    assert(!blacklist.nodeIdToBlacklistExpiryTime.contains("hostA"))
-    verify(listenerBusMock).post(SparkListenerNodeUnblacklisted(t1 + t2 + t1, "hostA"))
-    // Even though unblacklisting a node implicitly unblacklists all of its executors,
-    // there will be no SparkListenerExecutorUnblacklisted sent here.
-  }
-
-  test("task failures expire with time") {
-    // Verifies that 2 failures within the timeout period cause an executor to be blacklisted, but
-    // if task failures are spaced out by more than the timeout period, the first failure is timed
-    // out, and the executor isn't blacklisted.
-    var stageId = 0
-
-    def failOneTaskInTaskSet(exec: String): Unit = {
-      val taskSetBlacklist = createTaskSetBlacklist(stageId = stageId)
-      taskSetBlacklist.updateBlacklistForFailedTask("host-" + exec, exec, 0, "testing")
-      blacklist.updateBlacklistForSuccessfulTaskSet(stageId, 0, taskSetBlacklist.execToFailures)
-      stageId += 1
-    }
-
-    failOneTaskInTaskSet(exec = "1")
-    // We have one sporadic failure on exec 2, but that's it.  Later checks ensure that we never
-    // blacklist executor 2 despite this one failure.
-    failOneTaskInTaskSet(exec = "2")
-    assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set())
-    assert(blacklist.nextExpiryTime === Long.MaxValue)
-
-    // We advance the clock past the expiry time.
-    clock.advance(blacklist.BLACKLIST_TIMEOUT_MILLIS + 1)
-    val t0 = clock.getTimeMillis()
-    blacklist.applyBlacklistTimeout()
-    assert(blacklist.nextExpiryTime === Long.MaxValue)
-    failOneTaskInTaskSet(exec = "1")
-
-    // Because the 2nd failure on executor 1 happened past the expiry time, nothing should have been
-    // blacklisted.
-    assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set())
-
-    // Now we add one more failure, within the timeout, and it should be counted.
-    clock.setTime(t0 + blacklist.BLACKLIST_TIMEOUT_MILLIS - 1)
-    val t1 = clock.getTimeMillis()
-    failOneTaskInTaskSet(exec = "1")
-    blacklist.applyBlacklistTimeout()
-    assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set("1"))
-    verify(listenerBusMock).post(SparkListenerExecutorBlacklisted(t1, "1", 2))
-    assert(blacklist.nextExpiryTime === t1 + blacklist.BLACKLIST_TIMEOUT_MILLIS)
-
-    // Add failures on executor 3, make sure it gets put on the blacklist.
-    clock.setTime(t1 + blacklist.BLACKLIST_TIMEOUT_MILLIS - 1)
-    val t2 = clock.getTimeMillis()
-    failOneTaskInTaskSet(exec = "3")
-    failOneTaskInTaskSet(exec = "3")
-    blacklist.applyBlacklistTimeout()
-    assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set("1", "3"))
-    verify(listenerBusMock).post(SparkListenerExecutorBlacklisted(t2, "3", 2))
-    assert(blacklist.nextExpiryTime === t1 + blacklist.BLACKLIST_TIMEOUT_MILLIS)
-
-    // Now we go past the timeout for executor 1, so it should be dropped from the blacklist.
-    clock.setTime(t1 + blacklist.BLACKLIST_TIMEOUT_MILLIS + 1)
-    blacklist.applyBlacklistTimeout()
-    assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set("3"))
-    verify(listenerBusMock).post(SparkListenerExecutorUnblacklisted(clock.getTimeMillis(), "1"))
-    assert(blacklist.nextExpiryTime === t2 + blacklist.BLACKLIST_TIMEOUT_MILLIS)
-
-    // Make sure that we update correctly when we go from having blacklisted executors to
-    // just having tasks with timeouts.
-    clock.setTime(t2 + blacklist.BLACKLIST_TIMEOUT_MILLIS - 1)
-    failOneTaskInTaskSet(exec = "4")
-    blacklist.applyBlacklistTimeout()
-    assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set("3"))
-    assert(blacklist.nextExpiryTime === t2 + blacklist.BLACKLIST_TIMEOUT_MILLIS)
-
-    clock.setTime(t2 + blacklist.BLACKLIST_TIMEOUT_MILLIS + 1)
-    blacklist.applyBlacklistTimeout()
-    assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set())
-    verify(listenerBusMock).post(SparkListenerExecutorUnblacklisted(clock.getTimeMillis(), "3"))
-    // we've got one task failure still, but we don't bother setting nextExpiryTime to it, to
-    // avoid wasting time checking for expiry of individual task failures.
-    assert(blacklist.nextExpiryTime === Long.MaxValue)
-  }
-
-  test("task failure timeout works as expected for long-running tasksets") {
-    // This ensures that we don't trigger spurious blacklisting for long tasksets, when the taskset
-    // finishes long after the task failures.  We create two tasksets, each with one failure.
-    // Individually they shouldn't cause any blacklisting since there is only one failure.
-    // Furthermore, we space the failures out so far that even when both tasksets have completed,
-    // we still don't trigger any blacklisting.
-    val taskSetBlacklist1 = createTaskSetBlacklist(stageId = 1)
-    val taskSetBlacklist2 = createTaskSetBlacklist(stageId = 2)
-    // Taskset1 has one failure immediately
-    taskSetBlacklist1.updateBlacklistForFailedTask("host-1", "1", 0, "testing")
-    // Then we have a *long* delay, much longer than the timeout, before any other failures or
-    // taskset completion
-    clock.advance(blacklist.BLACKLIST_TIMEOUT_MILLIS * 5)
-    // After the long delay, we have one failure on taskset 2, on the same executor
-    taskSetBlacklist2.updateBlacklistForFailedTask("host-1", "1", 0, "testing")
-    // Finally, we complete both tasksets.  Its important here to complete taskset2 *first*.  We
-    // want to make sure that when taskset 1 finishes, even though we've now got two task failures,
-    // we realize that the task failure we just added was well before the timeout.
-    clock.advance(1)
-    blacklist.updateBlacklistForSuccessfulTaskSet(stageId = 2, 0, taskSetBlacklist2.execToFailures)
-    clock.advance(1)
-    blacklist.updateBlacklistForSuccessfulTaskSet(stageId = 1, 0, taskSetBlacklist1.execToFailures)
-
-    // Make sure nothing was blacklisted
-    assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set())
-  }
-
-  test("only blacklist nodes for the application when enough executors have failed on that " +
-    "specific host") {
-    // we blacklist executors on two different hosts -- make sure that doesn't lead to any
-    // node blacklisting
-    val taskSetBlacklist0 = createTaskSetBlacklist(stageId = 0)
-    taskSetBlacklist0.updateBlacklistForFailedTask(
-      "hostA", exec = "1", index = 0, failureReason = "testing")
-    taskSetBlacklist0.updateBlacklistForFailedTask(
-      "hostA", exec = "1", index = 1, failureReason = "testing")
-    blacklist.updateBlacklistForSuccessfulTaskSet(0, 0, taskSetBlacklist0.execToFailures)
-    assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set("1"))
-    verify(listenerBusMock).post(SparkListenerExecutorBlacklisted(0, "1", 2))
-    assertEquivalentToSet(blacklist.isNodeBlacklisted(_), Set())
-
-    val taskSetBlacklist1 = createTaskSetBlacklist(stageId = 1)
-    taskSetBlacklist1.updateBlacklistForFailedTask(
-      "hostB", exec = "2", index = 0, failureReason = "testing")
-    taskSetBlacklist1.updateBlacklistForFailedTask(
-      "hostB", exec = "2", index = 1, failureReason = "testing")
-    blacklist.updateBlacklistForSuccessfulTaskSet(1, 0, taskSetBlacklist1.execToFailures)
-    assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set("1", "2"))
-    verify(listenerBusMock).post(SparkListenerExecutorBlacklisted(0, "2", 2))
-    assertEquivalentToSet(blacklist.isNodeBlacklisted(_), Set())
-
-    // Finally, blacklist another executor on the same node as the original blacklisted executor,
-    // and make sure this time we *do* blacklist the node.
-    val taskSetBlacklist2 = createTaskSetBlacklist(stageId = 0)
-    taskSetBlacklist2.updateBlacklistForFailedTask(
-      "hostA", exec = "3", index = 0, failureReason = "testing")
-    taskSetBlacklist2.updateBlacklistForFailedTask(
-      "hostA", exec = "3", index = 1, failureReason = "testing")
-    blacklist.updateBlacklistForSuccessfulTaskSet(0, 0, taskSetBlacklist2.execToFailures)
-    assertEquivalentToSet(blacklist.isExecutorBlacklisted(_), Set("1", "2", "3"))
-    verify(listenerBusMock).post(SparkListenerExecutorBlacklisted(0, "3", 2))
-    assertEquivalentToSet(blacklist.isNodeBlacklisted(_), Set("hostA"))
-    verify(listenerBusMock).post(SparkListenerNodeBlacklisted(0, "hostA", 2))
-  }
-
-  test("blacklist still respects legacy configs") {
-    val conf = new SparkConf().setMaster("local")
-    assert(!BlacklistTracker.isBlacklistEnabled(conf))
-    conf.set(config.BLACKLIST_LEGACY_TIMEOUT_CONF, 5000L)
-    assert(BlacklistTracker.isBlacklistEnabled(conf))
-    assert(5000 === BlacklistTracker.getBlacklistTimeout(conf))
-    // the new conf takes precedence, though
-    conf.set(config.BLACKLIST_TIMEOUT_CONF, 1000L)
-    assert(1000 === BlacklistTracker.getBlacklistTimeout(conf))
-
-    // if you explicitly set the legacy conf to 0, that also would disable blacklisting
-    conf.set(config.BLACKLIST_LEGACY_TIMEOUT_CONF, 0L)
-    assert(!BlacklistTracker.isBlacklistEnabled(conf))
-    // but again, the new conf takes precedence
-    conf.set(config.BLACKLIST_ENABLED, true)
-    assert(BlacklistTracker.isBlacklistEnabled(conf))
-    assert(1000 === BlacklistTracker.getBlacklistTimeout(conf))
-  }
-
-  test("check blacklist configuration invariants") {
-    val conf = new SparkConf().setMaster("yarn").set(config.SUBMIT_DEPLOY_MODE, "cluster")
-    Seq(
-      (2, 2),
-      (2, 3)
-    ).foreach { case (maxTaskFailures, maxNodeAttempts) =>
-      conf.set(config.TASK_MAX_FAILURES, maxTaskFailures)
-      conf.set(config.MAX_TASK_ATTEMPTS_PER_NODE.key, maxNodeAttempts.toString)
-      val excMsg = intercept[IllegalArgumentException] {
-        BlacklistTracker.validateBlacklistConfs(conf)
-      }.getMessage()
-      assert(excMsg === s"${config.MAX_TASK_ATTEMPTS_PER_NODE.key} " +
-        s"( = ${maxNodeAttempts}) was >= ${config.TASK_MAX_FAILURES.key} " +
-        s"( = ${maxTaskFailures} ).  Though blacklisting is enabled, with this configuration, " +
-        s"Spark will not be robust to one bad node.  Decrease " +
-        s"${config.MAX_TASK_ATTEMPTS_PER_NODE.key}, increase ${config.TASK_MAX_FAILURES.key}, " +
-        s"or disable blacklisting with ${config.BLACKLIST_ENABLED.key}")
-    }
-
-    conf.remove(config.TASK_MAX_FAILURES)
-    conf.remove(config.MAX_TASK_ATTEMPTS_PER_NODE)
-
-    Seq(
-      config.MAX_TASK_ATTEMPTS_PER_EXECUTOR,
-      config.MAX_TASK_ATTEMPTS_PER_NODE,
-      config.MAX_FAILURES_PER_EXEC_STAGE,
-      config.MAX_FAILED_EXEC_PER_NODE_STAGE,
-      config.MAX_FAILURES_PER_EXEC,
-      config.MAX_FAILED_EXEC_PER_NODE,
-      config.BLACKLIST_TIMEOUT_CONF
-    ).foreach { config =>
-      conf.set(config.key, "0")
-      val excMsg = intercept[IllegalArgumentException] {
-        BlacklistTracker.validateBlacklistConfs(conf)
-      }.getMessage()
-      assert(excMsg.contains(s"${config.key} was 0, but must be > 0."))
-      conf.remove(config)
-    }
-  }
-
-  test("blacklisting kills executors, configured by BLACKLIST_KILL_ENABLED") {
-    val allocationClientMock = mock[ExecutorAllocationClient]
-    when(allocationClientMock.killExecutors(any(), any(), any(), any())).thenReturn(Seq("called"))
-    when(allocationClientMock.killExecutorsOnHost("hostA")).thenAnswer { (_: InvocationOnMock) =>
-      // To avoid a race between blacklisting and killing, it is important that the nodeBlacklist
-      // is updated before we ask the executor allocation client to kill all the executors
-      // on a particular host.
-      if (blacklist.nodeBlacklist.contains("hostA")) {
-        true
-      } else {
-        throw new IllegalStateException("hostA should be on the blacklist")
-      }
-    }
-    blacklist = new BlacklistTracker(listenerBusMock, conf, Some(allocationClientMock), clock)
-
-    // Disable auto-kill. Blacklist an executor and make sure killExecutors is not called.
-    conf.set(config.BLACKLIST_KILL_ENABLED, false)
-
-    val taskSetBlacklist0 = createTaskSetBlacklist(stageId = 0)
-    // Fail 4 tasks in one task set on executor 1, so that executor gets blacklisted for the whole
-    // application.
-    (0 until 4).foreach { partition =>
-      taskSetBlacklist0.updateBlacklistForFailedTask(
-        "hostA", exec = "1", index = partition, failureReason = "testing")
-    }
-    blacklist.updateBlacklistForSuccessfulTaskSet(0, 0, taskSetBlacklist0.execToFailures)
-
-    verify(allocationClientMock, never).killExecutor(any())
-
-    val taskSetBlacklist1 = createTaskSetBlacklist(stageId = 1)
-    // Fail 4 tasks in one task set on executor 2, so that executor gets blacklisted for the whole
-    // application.  Since that's the second executor that is blacklisted on the same node, we also
-    // blacklist that node.
-    (0 until 4).foreach { partition =>
-      taskSetBlacklist1.updateBlacklistForFailedTask(
-        "hostA", exec = "2", index = partition, failureReason = "testing")
-    }
-    blacklist.updateBlacklistForSuccessfulTaskSet(0, 0, taskSetBlacklist1.execToFailures)
-
-    verify(allocationClientMock, never).killExecutors(any(), any(), any(), any())
-    verify(allocationClientMock, never).killExecutorsOnHost(any())
-
-    // Enable auto-kill. Blacklist an executor and make sure killExecutors is called.
-    conf.set(config.BLACKLIST_KILL_ENABLED, true)
-    blacklist = new BlacklistTracker(listenerBusMock, conf, Some(allocationClientMock), clock)
-
-    val taskSetBlacklist2 = createTaskSetBlacklist(stageId = 0)
-    // Fail 4 tasks in one task set on executor 1, so that executor gets blacklisted for the whole
-    // application.
-    (0 until 4).foreach { partition =>
-      taskSetBlacklist2.updateBlacklistForFailedTask(
-        "hostA", exec = "1", index = partition, failureReason = "testing")
-    }
-    blacklist.updateBlacklistForSuccessfulTaskSet(0, 0, taskSetBlacklist2.execToFailures)
-
-    verify(allocationClientMock).killExecutors(Seq("1"), false, false, true)
-
-    val taskSetBlacklist3 = createTaskSetBlacklist(stageId = 1)
-    // Fail 4 tasks in one task set on executor 2, so that executor gets blacklisted for the whole
-    // application.  Since that's the second executor that is blacklisted on the same node, we also
-    // blacklist that node.
-    (0 until 4).foreach { partition =>
-      taskSetBlacklist3.updateBlacklistForFailedTask(
-        "hostA", exec = "2", index = partition, failureReason = "testing")
-    }
-    blacklist.updateBlacklistForSuccessfulTaskSet(0, 0, taskSetBlacklist3.execToFailures)
-
-    verify(allocationClientMock).killExecutors(Seq("2"), false, false, true)
-    verify(allocationClientMock).killExecutorsOnHost("hostA")
-  }
-
-  test("fetch failure blacklisting kills executors, configured by BLACKLIST_KILL_ENABLED") {
-    val allocationClientMock = mock[ExecutorAllocationClient]
-    when(allocationClientMock.killExecutors(any(), any(), any(), any())).thenReturn(Seq("called"))
-    when(allocationClientMock.killExecutorsOnHost("hostA")).thenAnswer { (_: InvocationOnMock) =>
-      // To avoid a race between blacklisting and killing, it is important that the nodeBlacklist
-      // is updated before we ask the executor allocation client to kill all the executors
-      // on a particular host.
-      if (blacklist.nodeBlacklist.contains("hostA")) {
-        true
-      } else {
-        throw new IllegalStateException("hostA should be on the blacklist")
-      }
-    }
-
-    conf.set(config.BLACKLIST_FETCH_FAILURE_ENABLED, true)
-    blacklist = new BlacklistTracker(listenerBusMock, conf, Some(allocationClientMock), clock)
-
-    // Disable auto-kill. Blacklist an executor and make sure killExecutors is not called.
-    conf.set(config.BLACKLIST_KILL_ENABLED, false)
-    blacklist.updateBlacklistForFetchFailure("hostA", exec = "1")
-
-    verify(allocationClientMock, never).killExecutors(any(), any(), any(), any())
-    verify(allocationClientMock, never).killExecutorsOnHost(any())
-
-    assert(blacklist.nodeToBlacklistedExecs.contains("hostA"))
-    assert(blacklist.nodeToBlacklistedExecs("hostA").contains("1"))
-
-    // Enable auto-kill. Blacklist an executor and make sure killExecutors is called.
-    conf.set(config.BLACKLIST_KILL_ENABLED, true)
-    blacklist = new BlacklistTracker(listenerBusMock, conf, Some(allocationClientMock), clock)
-    clock.advance(1000)
-    blacklist.updateBlacklistForFetchFailure("hostA", exec = "1")
-
-    verify(allocationClientMock).killExecutors(Seq("1"), false, false, true)
-    verify(allocationClientMock, never).killExecutorsOnHost(any())
-
-    assert(blacklist.executorIdToBlacklistStatus.contains("1"))
-    assert(blacklist.executorIdToBlacklistStatus("1").node === "hostA")
-    assert(blacklist.executorIdToBlacklistStatus("1").expiryTime ===
-      1000 + blacklist.BLACKLIST_TIMEOUT_MILLIS)
-    assert(blacklist.nextExpiryTime === 1000 + blacklist.BLACKLIST_TIMEOUT_MILLIS)
-    assert(blacklist.nodeIdToBlacklistExpiryTime.isEmpty)
-    assert(blacklist.nodeToBlacklistedExecs.contains("hostA"))
-    assert(blacklist.nodeToBlacklistedExecs("hostA").contains("1"))
-
-    // Enable external shuffle service to see if all the executors on this node will be killed.
-    conf.set(config.SHUFFLE_SERVICE_ENABLED, true)
-    clock.advance(1000)
-    blacklist.updateBlacklistForFetchFailure("hostA", exec = "2")
-
-    verify(allocationClientMock, never).killExecutors(Seq("2"), true, true)
-    verify(allocationClientMock).killExecutorsOnHost("hostA")
-
-    assert(blacklist.nodeIdToBlacklistExpiryTime.contains("hostA"))
-    assert(blacklist.nodeIdToBlacklistExpiryTime("hostA") ===
-      2000 + blacklist.BLACKLIST_TIMEOUT_MILLIS)
-    assert(blacklist.nextExpiryTime === 1000 + blacklist.BLACKLIST_TIMEOUT_MILLIS)
-  }
-}
diff --git a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
index d648293fdbe06..47e37fc55cefe 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
@@ -300,7 +300,7 @@ private class CSMockExternalClusterManager extends ExternalClusterManager {
     when(ts.applicationId()).thenReturn("appid1")
     when(ts.applicationAttemptId()).thenReturn(Some("attempt1"))
     when(ts.schedulingMode).thenReturn(SchedulingMode.FIFO)
-    when(ts.nodeBlacklist()).thenReturn(Set.empty[String])
+    when(ts.excludedNodes()).thenReturn(Set.empty[String])
     ts
   }
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/BlacklistIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/HealthTrackerIntegrationSuite.scala
similarity index 86%
rename from core/src/test/scala/org/apache/spark/scheduler/BlacklistIntegrationSuite.scala
rename to core/src/test/scala/org/apache/spark/scheduler/HealthTrackerIntegrationSuite.scala
index 246d4b2f56ec9..29a8f4be8b72b 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/BlacklistIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/HealthTrackerIntegrationSuite.scala
@@ -20,7 +20,7 @@ import org.apache.spark._
 import org.apache.spark.internal.config
 import org.apache.spark.internal.config.Tests._
 
-class BlacklistIntegrationSuite extends SchedulerIntegrationSuite[MultiExecutorMockBackend]{
+class HealthTrackerIntegrationSuite extends SchedulerIntegrationSuite[MultiExecutorMockBackend]{
 
   val badHost = "host-0"
 
@@ -40,9 +40,9 @@ class BlacklistIntegrationSuite extends SchedulerIntegrationSuite[MultiExecutorM
 
   // Test demonstrating the issue -- without a config change, the scheduler keeps scheduling
   // according to locality preferences, and so the job fails
-  testScheduler("If preferred node is bad, without blacklist job will fail",
+  testScheduler("If preferred node is bad, without excludeOnFailure job will fail",
     extraConfs = Seq(
-      config.BLACKLIST_ENABLED.key -> "false"
+      config.EXCLUDE_ON_FAILURE_ENABLED.key -> "false"
   )) {
     val rdd = new MockRDDWithLocalityPrefs(sc, 10, Nil, badHost)
     withBackend(badHostBackend _) {
@@ -55,19 +55,19 @@ class BlacklistIntegrationSuite extends SchedulerIntegrationSuite[MultiExecutorM
   testScheduler(
     "With default settings, job can succeed despite multiple bad executors on node",
     extraConfs = Seq(
-      config.BLACKLIST_ENABLED.key -> "true",
+      config.EXCLUDE_ON_FAILURE_ENABLED.key -> "true",
       config.TASK_MAX_FAILURES.key -> "4",
       TEST_N_HOSTS.key -> "2",
       TEST_N_EXECUTORS_HOST.key -> "5",
       TEST_N_CORES_EXECUTOR.key -> "10"
     )
   ) {
-    // To reliably reproduce the failure that would occur without blacklisting, we have to use 1
+    // To reliably reproduce the failure that would occur without exludeOnFailure, we have to use 1
     // task.  That way, we ensure this 1 task gets rotated through enough bad executors on the host
     // to fail the taskSet, before we have a bunch of different tasks fail in the executors so we
-    // blacklist them.
-    // But the point here is -- without blacklisting, we would never schedule anything on the good
-    // host-1 before we hit too many failures trying our preferred host-0.
+    // exclude them.
+    // But the point here is -- without excludeOnFailure, we would never schedule anything on the
+    // good host-1 before we hit too many failures trying our preferred host-0.
     val rdd = new MockRDDWithLocalityPrefs(sc, 1, Nil, badHost)
     withBackend(badHostBackend _) {
       val jobFuture = submit(rdd, (0 until 1).toArray)
@@ -76,12 +76,12 @@ class BlacklistIntegrationSuite extends SchedulerIntegrationSuite[MultiExecutorM
     assertDataStructuresEmpty(noFailure = true)
   }
 
-  // Here we run with the blacklist on, and the default config takes care of having this
+  // Here we run with the excludeOnFailure on, and the default config takes care of having this
   // robust to one bad node.
   testScheduler(
     "Bad node with multiple executors, job will still succeed with the right confs",
     extraConfs = Seq(
-       config.BLACKLIST_ENABLED.key -> "true",
+       config.EXCLUDE_ON_FAILURE_ENABLED.key -> "true",
       // just to avoid this test taking too long
       config.LOCALITY_WAIT.key -> "10ms"
     )
@@ -100,7 +100,7 @@ class BlacklistIntegrationSuite extends SchedulerIntegrationSuite[MultiExecutorM
   testScheduler(
     "SPARK-15865 Progress with fewer executors than maxTaskFailures",
     extraConfs = Seq(
-      config.BLACKLIST_ENABLED.key -> "true",
+      config.EXCLUDE_ON_FAILURE_ENABLED.key -> "true",
       TEST_N_HOSTS.key -> "2",
       TEST_N_EXECUTORS_HOST.key -> "1",
       TEST_N_CORES_EXECUTOR.key -> "1",
@@ -116,7 +116,7 @@ class BlacklistIntegrationSuite extends SchedulerIntegrationSuite[MultiExecutorM
       awaitJobTermination(jobFuture, duration)
       val pattern = (
         s"""|Aborting TaskSet 0.0 because task .*
-            |cannot run anywhere due to node and executor blacklist""".stripMargin).r
+            |cannot run anywhere due to node and executor excludeOnFailure""".stripMargin).r
       assert(pattern.findFirstIn(failure.getMessage).isDefined,
         s"Couldn't find $pattern in ${failure.getMessage()}")
     }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/HealthTrackerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/HealthTrackerSuite.scala
new file mode 100644
index 0000000000000..7ecc1f51ce236
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/scheduler/HealthTrackerSuite.scala
@@ -0,0 +1,615 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler
+
+import org.mockito.ArgumentMatchers.any
+import org.mockito.Mockito.{never, verify, when}
+import org.mockito.invocation.InvocationOnMock
+import org.scalatest.BeforeAndAfterEach
+import org.scalatestplus.mockito.MockitoSugar
+
+import org.apache.spark._
+import org.apache.spark.internal.config
+import org.apache.spark.util.ManualClock
+
+class HealthTrackerSuite extends SparkFunSuite with BeforeAndAfterEach with MockitoSugar
+    with LocalSparkContext {
+
+  private val clock = new ManualClock(0)
+
+  private var healthTracker: HealthTracker = _
+  private var listenerBusMock: LiveListenerBus = _
+  private var scheduler: TaskSchedulerImpl = _
+  private var conf: SparkConf = _
+
+  override def beforeEach(): Unit = {
+    conf = new SparkConf().setAppName("test").setMaster("local")
+      .set(config.EXCLUDE_ON_FAILURE_ENABLED.key, "true")
+    scheduler = mockTaskSchedWithConf(conf)
+
+    clock.setTime(0)
+
+    listenerBusMock = mock[LiveListenerBus]
+    healthTracker = new HealthTracker(listenerBusMock, conf, None, clock)
+  }
+
+  override def afterEach(): Unit = {
+    if (healthTracker != null) {
+      healthTracker = null
+    }
+    if (scheduler != null) {
+      scheduler.stop()
+      scheduler = null
+    }
+    super.afterEach()
+  }
+
+  // All executors and hosts used in tests should be in this set, so that [[assertEquivalentToSet]]
+  // works.  Its OK if its got extraneous entries
+  val allExecutorAndHostIds = {
+    (('A' to 'Z')++ (1 to 100).map(_.toString))
+      .flatMap{ suffix =>
+        Seq(s"host$suffix", s"host-$suffix")
+      }
+  }.toSet
+
+  /**
+   * Its easier to write our tests as if we could directly look at the sets of nodes & executors in
+   * the exclude.  However the api doesn't expose a set, so this is a simple way to test
+   * something similar, since we know the universe of values that might appear in these sets.
+   */
+  def assertEquivalentToSet(f: String => Boolean, expected: Set[String]): Unit = {
+    allExecutorAndHostIds.foreach { id =>
+      val actual = f(id)
+      val exp = expected.contains(id)
+      assert(actual === exp, raw"""for string "$id" """)
+    }
+  }
+
+  def mockTaskSchedWithConf(conf: SparkConf): TaskSchedulerImpl = {
+    sc = new SparkContext(conf)
+    val scheduler = mock[TaskSchedulerImpl]
+    when(scheduler.sc).thenReturn(sc)
+    when(scheduler.mapOutputTracker).thenReturn(
+      SparkEnv.get.mapOutputTracker.asInstanceOf[MapOutputTrackerMaster])
+    scheduler
+  }
+
+  def createTaskSetExcludelist(stageId: Int = 0): TaskSetExcludelist = {
+    new TaskSetExcludelist(listenerBusMock, conf, stageId, stageAttemptId = 0, clock = clock)
+  }
+
+  test("executors can be excluded with only a few failures per stage") {
+    // For many different stages, executor 1 fails a task, then executor 2 succeeds the task,
+    // and then the task set is done.  Not enough failures to exclude the executor *within*
+    // any particular taskset, but we still exclude the executor overall eventually.
+    // Also, we intentionally have a mix of task successes and failures -- there are even some
+    // successes after the executor is excluded.  The idea here is those tasks get scheduled
+    // before the executor is excluded.  We might get successes after excluding (because the
+    // executor might be flaky but not totally broken).  But successes should not unexclude the
+    // executor.
+    val failuresUntilExcludeed = conf.get(config.MAX_FAILURES_PER_EXEC)
+    var failuresSoFar = 0
+    (0 until failuresUntilExcludeed * 10).foreach { stageId =>
+      val taskSetExclude = createTaskSetExcludelist(stageId)
+      if (stageId % 2 == 0) {
+        // fail one task in every other taskset
+        taskSetExclude.updateExcludedForFailedTask(
+          "hostA", exec = "1", index = 0, failureReason = "testing")
+        failuresSoFar += 1
+      }
+      healthTracker.updateExcludedForSuccessfulTaskSet(stageId, 0, taskSetExclude.execToFailures)
+      assert(failuresSoFar == stageId / 2 + 1)
+      if (failuresSoFar < failuresUntilExcludeed) {
+        assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set())
+      } else {
+        assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set("1"))
+        verify(listenerBusMock).post(
+          SparkListenerExecutorExcluded(0, "1", failuresUntilExcludeed))
+        verify(listenerBusMock).post(
+          SparkListenerExecutorBlacklisted(0, "1", failuresUntilExcludeed))
+      }
+    }
+  }
+
+  // If an executor has many task failures, but the task set ends up failing, it shouldn't be
+  // counted against the executor.
+  test("executors aren't excluded as a result of tasks in failed task sets") {
+    val failuresUntilExcludeed = conf.get(config.MAX_FAILURES_PER_EXEC)
+    // for many different stages, executor 1 fails a task, and then the taskSet fails.
+    (0 until failuresUntilExcludeed * 10).foreach { stage =>
+      val taskSetExclude = createTaskSetExcludelist(stage)
+      taskSetExclude.updateExcludedForFailedTask(
+        "hostA", exec = "1", index = 0, failureReason = "testing")
+    }
+    assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set())
+  }
+
+  Seq(true, false).foreach { succeedTaskSet =>
+    val label = if (succeedTaskSet) "success" else "failure"
+    test(s"stage exclude updates correctly on stage $label") {
+      // Within one taskset, an executor fails a few times, so it's excluded for the taskset.
+      // But if the taskset fails, we shouldn't exclude the executor after the stage.
+      val taskSetExclude = createTaskSetExcludelist(0)
+      // We trigger enough failures for both the taskset exclude, and the application exclude.
+      val numFailures = math.max(conf.get(config.MAX_FAILURES_PER_EXEC),
+        conf.get(config.MAX_FAILURES_PER_EXEC_STAGE))
+      (0 until numFailures).foreach { index =>
+        taskSetExclude.updateExcludedForFailedTask(
+          "hostA", exec = "1", index = index, failureReason = "testing")
+      }
+      assert(taskSetExclude.isExecutorExcludedForTaskSet("1"))
+      assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set())
+      if (succeedTaskSet) {
+        // The task set succeeded elsewhere, so we should count those failures against our executor,
+        // and it should be excluded for the entire application.
+        healthTracker.updateExcludedForSuccessfulTaskSet(0, 0, taskSetExclude.execToFailures)
+        assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set("1"))
+        verify(listenerBusMock).post(SparkListenerExecutorExcluded(0, "1", numFailures))
+      } else {
+        // The task set failed, so we don't count these failures against the executor for other
+        // stages.
+        assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set())
+      }
+    }
+  }
+
+  test("excluded executors and nodes get recovered with time") {
+    val taskSetExclude0 = createTaskSetExcludelist(stageId = 0)
+    // Fail 4 tasks in one task set on executor 1, so that executor gets excluded for the whole
+    // application.
+    (0 until 4).foreach { partition =>
+      taskSetExclude0.updateExcludedForFailedTask(
+        "hostA", exec = "1", index = partition, failureReason = "testing")
+    }
+    healthTracker.updateExcludedForSuccessfulTaskSet(0, 0, taskSetExclude0.execToFailures)
+    assert(healthTracker.excludedNodeList() === Set())
+    assertEquivalentToSet(healthTracker.isNodeExcluded(_), Set())
+    assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set("1"))
+    verify(listenerBusMock).post(SparkListenerExecutorExcluded(0, "1", 4))
+    verify(listenerBusMock).post(SparkListenerExecutorBlacklisted(0, "1", 4))
+
+    val taskSetExclude1 = createTaskSetExcludelist(stageId = 1)
+    // Fail 4 tasks in one task set on executor 2, so that executor gets excluded for the whole
+    // application.  Since that's the second executor that is excluded on the same node, we also
+    // exclude that node.
+    (0 until 4).foreach { partition =>
+      taskSetExclude1.updateExcludedForFailedTask(
+        "hostA", exec = "2", index = partition, failureReason = "testing")
+    }
+    healthTracker.updateExcludedForSuccessfulTaskSet(0, 0, taskSetExclude1.execToFailures)
+    assert(healthTracker.excludedNodeList() === Set("hostA"))
+    assertEquivalentToSet(healthTracker.isNodeExcluded(_), Set("hostA"))
+    verify(listenerBusMock).post(SparkListenerNodeExcluded(0, "hostA", 2))
+    verify(listenerBusMock).post(SparkListenerNodeBlacklisted(0, "hostA", 2))
+    assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set("1", "2"))
+    verify(listenerBusMock).post(SparkListenerExecutorExcluded(0, "2", 4))
+    verify(listenerBusMock).post(SparkListenerExecutorBlacklisted(0, "2", 4))
+
+    // Advance the clock and then make sure hostA and executors 1 and 2 have been removed from the
+    // exclude.
+    val timeout = healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS + 1
+    clock.advance(timeout)
+    healthTracker.applyExcludeOnFailureTimeout()
+    assert(healthTracker.excludedNodeList() === Set())
+    assertEquivalentToSet(healthTracker.isNodeExcluded(_), Set())
+    assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set())
+    verify(listenerBusMock).post(SparkListenerExecutorUnexcluded(timeout, "2"))
+    verify(listenerBusMock).post(SparkListenerExecutorUnexcluded(timeout, "1"))
+    verify(listenerBusMock).post(SparkListenerExecutorUnblacklisted(timeout, "2"))
+    verify(listenerBusMock).post(SparkListenerExecutorUnblacklisted(timeout, "1"))
+    verify(listenerBusMock).post(SparkListenerNodeUnexcluded(timeout, "hostA"))
+
+    // Fail one more task, but executor isn't put back into exclude since the count of failures
+    // on that executor should have been reset to 0.
+    val taskSetExclude2 = createTaskSetExcludelist(stageId = 2)
+    taskSetExclude2.updateExcludedForFailedTask(
+      "hostA", exec = "1", index = 0, failureReason = "testing")
+    healthTracker.updateExcludedForSuccessfulTaskSet(2, 0, taskSetExclude2.execToFailures)
+    assert(healthTracker.excludedNodeList() === Set())
+    assertEquivalentToSet(healthTracker.isNodeExcluded(_), Set())
+    assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set())
+  }
+
+  test("exclude can handle lost executors") {
+    // The exclude should still work if an executor is killed completely.  We should still
+    // be able to exclude the entire node.
+    val taskSetExclude0 = createTaskSetExcludelist(stageId = 0)
+    // Lets say that executor 1 dies completely.  We get some task failures, but
+    // the taskset then finishes successfully (elsewhere).
+    (0 until 4).foreach { partition =>
+      taskSetExclude0.updateExcludedForFailedTask(
+        "hostA", exec = "1", index = partition, failureReason = "testing")
+    }
+    healthTracker.handleRemovedExecutor("1")
+    healthTracker.updateExcludedForSuccessfulTaskSet(
+      stageId = 0,
+      stageAttemptId = 0,
+      taskSetExclude0.execToFailures)
+    assert(healthTracker.isExecutorExcluded("1"))
+    verify(listenerBusMock).post(SparkListenerExecutorExcluded(0, "1", 4))
+    val t1 = healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS / 2
+    clock.advance(t1)
+
+    // Now another executor gets spun up on that host, but it also dies.
+    val taskSetExclude1 = createTaskSetExcludelist(stageId = 1)
+    (0 until 4).foreach { partition =>
+      taskSetExclude1.updateExcludedForFailedTask(
+        "hostA", exec = "2", index = partition, failureReason = "testing")
+    }
+    healthTracker.handleRemovedExecutor("2")
+    healthTracker.updateExcludedForSuccessfulTaskSet(
+      stageId = 1,
+      stageAttemptId = 0,
+      taskSetExclude1.execToFailures)
+    // We've now had two bad executors on the hostA, so we should exclude the entire node.
+    assert(healthTracker.isExecutorExcluded("1"))
+    assert(healthTracker.isExecutorExcluded("2"))
+    verify(listenerBusMock).post(SparkListenerExecutorExcluded(t1, "2", 4))
+    assert(healthTracker.isNodeExcluded("hostA"))
+    verify(listenerBusMock).post(SparkListenerNodeExcluded(t1, "hostA", 2))
+
+    // Advance the clock so that executor 1 should no longer be explicitly excluded, but
+    // everything else should still be excluded.
+    val t2 = healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS / 2 + 1
+    clock.advance(t2)
+    healthTracker.applyExcludeOnFailureTimeout()
+    assert(!healthTracker.isExecutorExcluded("1"))
+    verify(listenerBusMock).post(SparkListenerExecutorUnexcluded(t1 + t2, "1"))
+    assert(healthTracker.isExecutorExcluded("2"))
+    assert(healthTracker.isNodeExcluded("hostA"))
+    // make sure we don't leak memory
+    assert(!healthTracker.executorIdToExcludedStatus.contains("1"))
+    assert(!healthTracker.nodeToExcludedExecs("hostA").contains("1"))
+    // Advance the timeout again so now hostA should be removed from the exclude.
+    clock.advance(t1)
+    healthTracker.applyExcludeOnFailureTimeout()
+    assert(!healthTracker.nodeIdToExcludedExpiryTime.contains("hostA"))
+    verify(listenerBusMock).post(SparkListenerNodeUnexcluded(t1 + t2 + t1, "hostA"))
+    // Even though unexcluding a node implicitly unexcludes all of its executors,
+    // there will be no SparkListenerExecutorUnexcluded sent here.
+  }
+
+  test("task failures expire with time") {
+    // Verifies that 2 failures within the timeout period cause an executor to be excluded, but
+    // if task failures are spaced out by more than the timeout period, the first failure is timed
+    // out, and the executor isn't excluded.
+    var stageId = 0
+
+    def failOneTaskInTaskSet(exec: String): Unit = {
+      val taskSetExclude = createTaskSetExcludelist(stageId = stageId)
+      taskSetExclude.updateExcludedForFailedTask("host-" + exec, exec, 0, "testing")
+      healthTracker.updateExcludedForSuccessfulTaskSet(stageId, 0, taskSetExclude.execToFailures)
+      stageId += 1
+    }
+
+    failOneTaskInTaskSet(exec = "1")
+    // We have one sporadic failure on exec 2, but that's it.  Later checks ensure that we never
+    // exclude executor 2 despite this one failure.
+    failOneTaskInTaskSet(exec = "2")
+    assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set())
+    assert(healthTracker.nextExpiryTime === Long.MaxValue)
+
+    // We advance the clock past the expiry time.
+    clock.advance(healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS + 1)
+    val t0 = clock.getTimeMillis()
+    healthTracker.applyExcludeOnFailureTimeout()
+    assert(healthTracker.nextExpiryTime === Long.MaxValue)
+    failOneTaskInTaskSet(exec = "1")
+
+    // Because the 2nd failure on executor 1 happened past the expiry time, nothing should have been
+    // excluded.
+    assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set())
+
+    // Now we add one more failure, within the timeout, and it should be counted.
+    clock.setTime(t0 + healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS - 1)
+    val t1 = clock.getTimeMillis()
+    failOneTaskInTaskSet(exec = "1")
+    healthTracker.applyExcludeOnFailureTimeout()
+    assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set("1"))
+    verify(listenerBusMock).post(SparkListenerExecutorExcluded(t1, "1", 2))
+    assert(healthTracker.nextExpiryTime === t1 + healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS)
+
+    // Add failures on executor 3, make sure it gets put on the exclude.
+    clock.setTime(t1 + healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS - 1)
+    val t2 = clock.getTimeMillis()
+    failOneTaskInTaskSet(exec = "3")
+    failOneTaskInTaskSet(exec = "3")
+    healthTracker.applyExcludeOnFailureTimeout()
+    assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set("1", "3"))
+    verify(listenerBusMock).post(SparkListenerExecutorExcluded(t2, "3", 2))
+    assert(healthTracker.nextExpiryTime === t1 + healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS)
+
+    // Now we go past the timeout for executor 1, so it should be dropped from the exclude.
+    clock.setTime(t1 + healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS + 1)
+    healthTracker.applyExcludeOnFailureTimeout()
+    assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set("3"))
+    verify(listenerBusMock).post(SparkListenerExecutorUnexcluded(clock.getTimeMillis(), "1"))
+    assert(healthTracker.nextExpiryTime === t2 + healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS)
+
+    // Make sure that we update correctly when we go from having excluded executors to
+    // just having tasks with timeouts.
+    clock.setTime(t2 + healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS - 1)
+    failOneTaskInTaskSet(exec = "4")
+    healthTracker.applyExcludeOnFailureTimeout()
+    assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set("3"))
+    assert(healthTracker.nextExpiryTime === t2 + healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS)
+
+    clock.setTime(t2 + healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS + 1)
+    healthTracker.applyExcludeOnFailureTimeout()
+    assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set())
+    verify(listenerBusMock).post(SparkListenerExecutorUnexcluded(clock.getTimeMillis(), "3"))
+    // we've got one task failure still, but we don't bother setting nextExpiryTime to it, to
+    // avoid wasting time checking for expiry of individual task failures.
+    assert(healthTracker.nextExpiryTime === Long.MaxValue)
+  }
+
+  test("task failure timeout works as expected for long-running tasksets") {
+    // This ensures that we don't trigger spurious excluding for long tasksets, when the taskset
+    // finishes long after the task failures.  We create two tasksets, each with one failure.
+    // Individually they shouldn't cause any excluding since there is only one failure.
+    // Furthermore, we space the failures out so far that even when both tasksets have completed,
+    // we still don't trigger any excluding.
+    val taskSetExclude1 = createTaskSetExcludelist(stageId = 1)
+    val taskSetExclude2 = createTaskSetExcludelist(stageId = 2)
+    // Taskset1 has one failure immediately
+    taskSetExclude1.updateExcludedForFailedTask("host-1", "1", 0, "testing")
+    // Then we have a *long* delay, much longer than the timeout, before any other failures or
+    // taskset completion
+    clock.advance(healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS * 5)
+    // After the long delay, we have one failure on taskset 2, on the same executor
+    taskSetExclude2.updateExcludedForFailedTask("host-1", "1", 0, "testing")
+    // Finally, we complete both tasksets.  Its important here to complete taskset2 *first*.  We
+    // want to make sure that when taskset 1 finishes, even though we've now got two task failures,
+    // we realize that the task failure we just added was well before the timeout.
+    clock.advance(1)
+    healthTracker.updateExcludedForSuccessfulTaskSet(stageId = 2, 0, taskSetExclude2.execToFailures)
+    clock.advance(1)
+    healthTracker.updateExcludedForSuccessfulTaskSet(stageId = 1, 0, taskSetExclude1.execToFailures)
+
+    // Make sure nothing was excluded
+    assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set())
+  }
+
+  test("only exclude nodes for the application when enough executors have failed on that " +
+    "specific host") {
+    // we exclude executors on two different hosts -- make sure that doesn't lead to any
+    // node excluding
+    val taskSetExclude0 = createTaskSetExcludelist(stageId = 0)
+    taskSetExclude0.updateExcludedForFailedTask(
+      "hostA", exec = "1", index = 0, failureReason = "testing")
+    taskSetExclude0.updateExcludedForFailedTask(
+      "hostA", exec = "1", index = 1, failureReason = "testing")
+    healthTracker.updateExcludedForSuccessfulTaskSet(0, 0, taskSetExclude0.execToFailures)
+    assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set("1"))
+    verify(listenerBusMock).post(SparkListenerExecutorExcluded(0, "1", 2))
+    assertEquivalentToSet(healthTracker.isNodeExcluded(_), Set())
+
+    val taskSetExclude1 = createTaskSetExcludelist(stageId = 1)
+    taskSetExclude1.updateExcludedForFailedTask(
+      "hostB", exec = "2", index = 0, failureReason = "testing")
+    taskSetExclude1.updateExcludedForFailedTask(
+      "hostB", exec = "2", index = 1, failureReason = "testing")
+    healthTracker.updateExcludedForSuccessfulTaskSet(1, 0, taskSetExclude1.execToFailures)
+    assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set("1", "2"))
+    verify(listenerBusMock).post(SparkListenerExecutorExcluded(0, "2", 2))
+    assertEquivalentToSet(healthTracker.isNodeExcluded(_), Set())
+
+    // Finally, exclude another executor on the same node as the original excluded executor,
+    // and make sure this time we *do* exclude the node.
+    val taskSetExclude2 = createTaskSetExcludelist(stageId = 0)
+    taskSetExclude2.updateExcludedForFailedTask(
+      "hostA", exec = "3", index = 0, failureReason = "testing")
+    taskSetExclude2.updateExcludedForFailedTask(
+      "hostA", exec = "3", index = 1, failureReason = "testing")
+    healthTracker.updateExcludedForSuccessfulTaskSet(0, 0, taskSetExclude2.execToFailures)
+    assertEquivalentToSet(healthTracker.isExecutorExcluded(_), Set("1", "2", "3"))
+    verify(listenerBusMock).post(SparkListenerExecutorExcluded(0, "3", 2))
+    assertEquivalentToSet(healthTracker.isNodeExcluded(_), Set("hostA"))
+    verify(listenerBusMock).post(SparkListenerNodeExcluded(0, "hostA", 2))
+  }
+
+  test("exclude still respects legacy configs") {
+    val conf = new SparkConf().setMaster("local")
+    assert(!HealthTracker.isExcludeOnFailureEnabled(conf))
+    conf.set(config.EXCLUDE_ON_FAILURE_LEGACY_TIMEOUT_CONF, 5000L)
+    assert(HealthTracker.isExcludeOnFailureEnabled(conf))
+    assert(5000 === HealthTracker.getExludeOnFailureTimeout(conf))
+    // the new conf takes precedence, though
+    conf.set(config.EXCLUDE_ON_FAILURE_TIMEOUT_CONF, 1000L)
+    assert(1000 === HealthTracker.getExludeOnFailureTimeout(conf))
+
+    // if you explicitly set the legacy conf to 0, that also would disable excluding
+    conf.set(config.EXCLUDE_ON_FAILURE_LEGACY_TIMEOUT_CONF, 0L)
+    assert(!HealthTracker.isExcludeOnFailureEnabled(conf))
+    // but again, the new conf takes precedence
+    conf.set(config.EXCLUDE_ON_FAILURE_ENABLED, true)
+    assert(HealthTracker.isExcludeOnFailureEnabled(conf))
+    assert(1000 === HealthTracker.getExludeOnFailureTimeout(conf))
+  }
+
+  test("check exclude configuration invariants") {
+    val conf = new SparkConf().setMaster("yarn").set(config.SUBMIT_DEPLOY_MODE, "cluster")
+    Seq(
+      (2, 2),
+      (2, 3)
+    ).foreach { case (maxTaskFailures, maxNodeAttempts) =>
+      conf.set(config.TASK_MAX_FAILURES, maxTaskFailures)
+      conf.set(config.MAX_TASK_ATTEMPTS_PER_NODE.key, maxNodeAttempts.toString)
+      val excMsg = intercept[IllegalArgumentException] {
+        HealthTracker.validateExcludeOnFailureConfs(conf)
+      }.getMessage()
+      assert(excMsg === s"${config.MAX_TASK_ATTEMPTS_PER_NODE.key} " +
+        s"( = ${maxNodeAttempts}) was >= ${config.TASK_MAX_FAILURES.key} " +
+        s"( = ${maxTaskFailures} ). Though excludeOnFailure is enabled, with this " +
+        s"configuration, Spark will not be robust to one bad node. Decrease " +
+        s"${config.MAX_TASK_ATTEMPTS_PER_NODE.key}, increase ${config.TASK_MAX_FAILURES.key}, " +
+        s"or disable excludeOnFailure with ${config.EXCLUDE_ON_FAILURE_ENABLED.key}")
+    }
+
+    conf.remove(config.TASK_MAX_FAILURES)
+    conf.remove(config.MAX_TASK_ATTEMPTS_PER_NODE)
+
+    Seq(
+      config.MAX_TASK_ATTEMPTS_PER_EXECUTOR,
+      config.MAX_TASK_ATTEMPTS_PER_NODE,
+      config.MAX_FAILURES_PER_EXEC_STAGE,
+      config.MAX_FAILED_EXEC_PER_NODE_STAGE,
+      config.MAX_FAILURES_PER_EXEC,
+      config.MAX_FAILED_EXEC_PER_NODE,
+      config.EXCLUDE_ON_FAILURE_TIMEOUT_CONF
+    ).foreach { config =>
+      conf.set(config.key, "0")
+      val excMsg = intercept[IllegalArgumentException] {
+        HealthTracker.validateExcludeOnFailureConfs(conf)
+      }.getMessage()
+      assert(excMsg.contains(s"${config.key} was 0, but must be > 0."))
+      conf.remove(config)
+    }
+  }
+
+  test("excluding kills executors, configured by EXCLUDE_ON_FAILURE_KILL_ENABLED") {
+    val allocationClientMock = mock[ExecutorAllocationClient]
+    when(allocationClientMock.killExecutors(any(), any(), any(), any())).thenReturn(Seq("called"))
+    when(allocationClientMock.killExecutorsOnHost("hostA")).thenAnswer { (_: InvocationOnMock) =>
+      // To avoid a race between excluding and killing, it is important that the nodeExclude
+      // is updated before we ask the executor allocation client to kill all the executors
+      // on a particular host.
+      if (healthTracker.excludedNodeList().contains("hostA")) {
+        true
+      } else {
+        throw new IllegalStateException("hostA should be on the exclude")
+      }
+    }
+    healthTracker = new HealthTracker(listenerBusMock, conf, Some(allocationClientMock), clock)
+
+    // Disable auto-kill. Exclude an executor and make sure killExecutors is not called.
+    conf.set(config.EXCLUDE_ON_FAILURE_KILL_ENABLED, false)
+
+    val taskSetExclude0 = createTaskSetExcludelist(stageId = 0)
+    // Fail 4 tasks in one task set on executor 1, so that executor gets excluded for the whole
+    // application.
+    (0 until 4).foreach { partition =>
+      taskSetExclude0.updateExcludedForFailedTask(
+        "hostA", exec = "1", index = partition, failureReason = "testing")
+    }
+    healthTracker.updateExcludedForSuccessfulTaskSet(0, 0, taskSetExclude0.execToFailures)
+
+    verify(allocationClientMock, never).killExecutor(any())
+
+    val taskSetExclude1 = createTaskSetExcludelist(stageId = 1)
+    // Fail 4 tasks in one task set on executor 2, so that executor gets excluded for the whole
+    // application.  Since that's the second executor that is excluded on the same node, we also
+    // exclude that node.
+    (0 until 4).foreach { partition =>
+      taskSetExclude1.updateExcludedForFailedTask(
+        "hostA", exec = "2", index = partition, failureReason = "testing")
+    }
+    healthTracker.updateExcludedForSuccessfulTaskSet(0, 0, taskSetExclude1.execToFailures)
+
+    verify(allocationClientMock, never).killExecutors(any(), any(), any(), any())
+    verify(allocationClientMock, never).killExecutorsOnHost(any())
+
+    // Enable auto-kill. Exclude an executor and make sure killExecutors is called.
+    conf.set(config.EXCLUDE_ON_FAILURE_KILL_ENABLED, true)
+    healthTracker = new HealthTracker(listenerBusMock, conf, Some(allocationClientMock), clock)
+
+    val taskSetExclude2 = createTaskSetExcludelist(stageId = 0)
+    // Fail 4 tasks in one task set on executor 1, so that executor gets excluded for the whole
+    // application.
+    (0 until 4).foreach { partition =>
+      taskSetExclude2.updateExcludedForFailedTask(
+        "hostA", exec = "1", index = partition, failureReason = "testing")
+    }
+    healthTracker.updateExcludedForSuccessfulTaskSet(0, 0, taskSetExclude2.execToFailures)
+
+    verify(allocationClientMock).killExecutors(Seq("1"), false, false, true)
+
+    val taskSetExclude3 = createTaskSetExcludelist(stageId = 1)
+    // Fail 4 tasks in one task set on executor 2, so that executor gets excluded for the whole
+    // application.  Since that's the second executor that is excluded on the same node, we also
+    // exclude that node.
+    (0 until 4).foreach { partition =>
+      taskSetExclude3.updateExcludedForFailedTask(
+        "hostA", exec = "2", index = partition, failureReason = "testing")
+    }
+    healthTracker.updateExcludedForSuccessfulTaskSet(0, 0, taskSetExclude3.execToFailures)
+
+    verify(allocationClientMock).killExecutors(Seq("2"), false, false, true)
+    verify(allocationClientMock).killExecutorsOnHost("hostA")
+  }
+
+  test("fetch failure excluding kills executors, configured by EXCLUDE_ON_FAILURE_KILL_ENABLED") {
+    val allocationClientMock = mock[ExecutorAllocationClient]
+    when(allocationClientMock.killExecutors(any(), any(), any(), any())).thenReturn(Seq("called"))
+    when(allocationClientMock.killExecutorsOnHost("hostA")).thenAnswer { (_: InvocationOnMock) =>
+      // To avoid a race between excluding and killing, it is important that the nodeExclude
+      // is updated before we ask the executor allocation client to kill all the executors
+      // on a particular host.
+      if (healthTracker.excludedNodeList().contains("hostA")) {
+        true
+      } else {
+        throw new IllegalStateException("hostA should be on the exclude")
+      }
+    }
+
+    conf.set(config.EXCLUDE_ON_FAILURE_FETCH_FAILURE_ENABLED, true)
+    healthTracker = new HealthTracker(listenerBusMock, conf, Some(allocationClientMock), clock)
+
+    // Disable auto-kill. Exclude an executor and make sure killExecutors is not called.
+    conf.set(config.EXCLUDE_ON_FAILURE_KILL_ENABLED, false)
+    healthTracker.updateExcludedForFetchFailure("hostA", exec = "1")
+
+    verify(allocationClientMock, never).killExecutors(any(), any(), any(), any())
+    verify(allocationClientMock, never).killExecutorsOnHost(any())
+
+    assert(healthTracker.nodeToExcludedExecs.contains("hostA"))
+    assert(healthTracker.nodeToExcludedExecs("hostA").contains("1"))
+
+    // Enable auto-kill. Exclude an executor and make sure killExecutors is called.
+    conf.set(config.EXCLUDE_ON_FAILURE_KILL_ENABLED, true)
+    healthTracker = new HealthTracker(listenerBusMock, conf, Some(allocationClientMock), clock)
+    clock.advance(1000)
+    healthTracker.updateExcludedForFetchFailure("hostA", exec = "1")
+
+    verify(allocationClientMock).killExecutors(Seq("1"), false, false, true)
+    verify(allocationClientMock, never).killExecutorsOnHost(any())
+
+    assert(healthTracker.executorIdToExcludedStatus.contains("1"))
+    assert(healthTracker.executorIdToExcludedStatus("1").node === "hostA")
+    assert(healthTracker.executorIdToExcludedStatus("1").expiryTime ===
+      1000 + healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS)
+    assert(healthTracker.nextExpiryTime === 1000 + healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS)
+    assert(healthTracker.nodeIdToExcludedExpiryTime.isEmpty)
+    assert(healthTracker.nodeToExcludedExecs.contains("hostA"))
+    assert(healthTracker.nodeToExcludedExecs("hostA").contains("1"))
+
+    // Enable external shuffle service to see if all the executors on this node will be killed.
+    conf.set(config.SHUFFLE_SERVICE_ENABLED, true)
+    clock.advance(1000)
+    healthTracker.updateExcludedForFetchFailure("hostA", exec = "2")
+
+    verify(allocationClientMock, never).killExecutors(Seq("2"), true, true)
+    verify(allocationClientMock).killExecutorsOnHost("hostA")
+
+    assert(healthTracker.nodeIdToExcludedExpiryTime.contains("hostA"))
+    assert(healthTracker.nodeIdToExcludedExpiryTime("hostA") ===
+      2000 + healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS)
+    assert(healthTracker.nextExpiryTime === 1000 + healthTracker.EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
index f29eb70eb3628..0c60c42c054cf 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
@@ -51,11 +51,11 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
   var failedTaskSetReason: String = null
   var failedTaskSet = false
 
-  var blacklist: BlacklistTracker = null
+  var healthTracker: HealthTracker = null
   var taskScheduler: TaskSchedulerImpl = null
   var dagScheduler: DAGScheduler = null
 
-  val stageToMockTaskSetBlacklist = new HashMap[Int, TaskSetBlacklist]()
+  val stageToMockTaskSetExcludelist = new HashMap[Int, TaskSetExcludelist]()
   val stageToMockTaskSetManager = new HashMap[Int, TaskSetManager]()
 
   override def beforeEach(): Unit = {
@@ -63,7 +63,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     failedTaskSet = false
     failedTaskSetException = None
     failedTaskSetReason = null
-    stageToMockTaskSetBlacklist.clear()
+    stageToMockTaskSetExcludelist.clear()
     stageToMockTaskSetManager.clear()
   }
 
@@ -95,10 +95,10 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     setupHelper()
   }
 
-  def setupSchedulerWithMockTaskSetBlacklist(confs: (String, String)*): TaskSchedulerImpl = {
-    blacklist = mock[BlacklistTracker]
+  def setupSchedulerWithMockTaskSetExcludelist(confs: (String, String)*): TaskSchedulerImpl = {
+    healthTracker = mock[HealthTracker]
     val conf = new SparkConf().setMaster("local").setAppName("TaskSchedulerImplSuite")
-    conf.set(config.BLACKLIST_ENABLED, true)
+    conf.set(config.EXCLUDE_ON_FAILURE_ENABLED, true)
     confs.foreach { case (k, v) => conf.set(k, v) }
 
     sc = new SparkContext(conf)
@@ -106,16 +106,16 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
       new TaskSchedulerImpl(sc, sc.conf.get(config.TASK_MAX_FAILURES)) {
         override def createTaskSetManager(taskSet: TaskSet, maxFailures: Int): TaskSetManager = {
           val tsm = super.createTaskSetManager(taskSet, maxFailures)
-          // we need to create a spied tsm just so we can set the TaskSetBlacklist
+          // we need to create a spied tsm just so we can set the TaskSetExcludelist
           val tsmSpy = spy(tsm)
-          val taskSetBlacklist = mock[TaskSetBlacklist]
-          when(tsmSpy.taskSetBlacklistHelperOpt).thenReturn(Some(taskSetBlacklist))
+          val taskSetExcludelist = mock[TaskSetExcludelist]
+          when(tsmSpy.taskSetExcludelistHelperOpt).thenReturn(Some(taskSetExcludelist))
           stageToMockTaskSetManager(taskSet.stageId) = tsmSpy
-          stageToMockTaskSetBlacklist(taskSet.stageId) = taskSetBlacklist
+          stageToMockTaskSetExcludelist(taskSet.stageId) = taskSetExcludelist
           tsmSpy
         }
 
-        override private[scheduler] lazy val blacklistTrackerOpt = Some(blacklist)
+        override private[scheduler] lazy val healthTrackerOpt = Some(healthTracker)
       }
     setupHelper()
   }
@@ -230,7 +230,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
       sc.conf.get(config.TASK_MAX_FAILURES),
       clock = clock) {
       override def createTaskSetManager(taskSet: TaskSet, maxTaskFailures: Int): TaskSetManager = {
-        new TaskSetManager(this, taskSet, maxTaskFailures, blacklistTrackerOpt, clock)
+        new TaskSetManager(this, taskSet, maxTaskFailures, healthTrackerOpt, clock)
       }
       override def shuffleOffers(offers: IndexedSeq[WorkerOffer]): IndexedSeq[WorkerOffer] = {
         // Don't shuffle the offers around for this test.  Instead, we'll just pass in all
@@ -678,22 +678,22 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     assert(!failedTaskSet)
   }
 
-  test("scheduled tasks obey task and stage blacklists") {
-    taskScheduler = setupSchedulerWithMockTaskSetBlacklist()
+  test("scheduled tasks obey task and stage excludelist") {
+    taskScheduler = setupSchedulerWithMockTaskSetExcludelist()
     (0 to 2).foreach {stageId =>
       val taskSet = FakeTask.createTaskSet(numTasks = 2, stageId = stageId, stageAttemptId = 0)
       taskScheduler.submitTasks(taskSet)
     }
 
-    // Setup our mock blacklist:
-    // * stage 0 is blacklisted on node "host1"
-    // * stage 1 is blacklisted on executor "executor3"
-    // * stage 0, partition 0 is blacklisted on executor 0
-    // (mocked methods default to returning false, ie. no blacklisting)
-    when(stageToMockTaskSetBlacklist(0).isNodeBlacklistedForTaskSet("host1")).thenReturn(true)
-    when(stageToMockTaskSetBlacklist(1).isExecutorBlacklistedForTaskSet("executor3"))
+    // Setup our mock excludelist:
+    // * stage 0 is excluded on node "host1"
+    // * stage 1 is excluded on executor "executor3"
+    // * stage 0, partition 0 is excluded on executor 0
+    // (mocked methods default to returning false, ie. no excluding)
+    when(stageToMockTaskSetExcludelist(0).isNodeExcludedForTaskSet("host1")).thenReturn(true)
+    when(stageToMockTaskSetExcludelist(1).isExecutorExcludedForTaskSet("executor3"))
       .thenReturn(true)
-    when(stageToMockTaskSetBlacklist(0).isExecutorBlacklistedForTask("executor0", 0))
+    when(stageToMockTaskSetExcludelist(0).isExecutorExcludedForTask("executor0", 0))
       .thenReturn(true)
 
     val offers = IndexedSeq(
@@ -705,21 +705,21 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     val firstTaskAttempts = taskScheduler.resourceOffers(offers).flatten
     // We should schedule all tasks.
     assert(firstTaskAttempts.size === 6)
-    // Whenever we schedule a task, we must consult the node and executor blacklist.  (The test
+    // Whenever we schedule a task, we must consult the node and executor excludelist.  (The test
     // doesn't check exactly what checks are made because the offers get shuffled.)
     (0 to 2).foreach { stageId =>
-      verify(stageToMockTaskSetBlacklist(stageId), atLeast(1))
-        .isNodeBlacklistedForTaskSet(anyString())
-      verify(stageToMockTaskSetBlacklist(stageId), atLeast(1))
-        .isExecutorBlacklistedForTaskSet(anyString())
+      verify(stageToMockTaskSetExcludelist(stageId), atLeast(1))
+        .isNodeExcludedForTaskSet(anyString())
+      verify(stageToMockTaskSetExcludelist(stageId), atLeast(1))
+        .isExecutorExcludedForTaskSet(anyString())
     }
 
     def tasksForStage(stageId: Int): Seq[TaskDescription] = {
       firstTaskAttempts.filter{_.name.contains(s"stage $stageId")}
     }
     tasksForStage(0).foreach { task =>
-      // executors 1 & 2 blacklisted for node
-      // executor 0 blacklisted just for partition 0
+      // executors 1 & 2 excluded for node
+      // executor 0 excluded just for partition 0
       if (task.index == 0) {
         assert(task.executorId === "executor3")
       } else {
@@ -727,12 +727,12 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
       }
     }
     tasksForStage(1).foreach { task =>
-      // executor 3 blacklisted
+      // executor 3 excluded
       assert("executor3" != task.executorId)
     }
     // no restrictions on stage 2
 
-    // Finally, just make sure that we can still complete tasks as usual with blacklisting
+    // Finally, just make sure that we can still complete tasks as usual with exclusion
     // in effect.  Finish each of the tasksets -- taskset 0 & 1 complete successfully, taskset 2
     // fails.
     (0 to 2).foreach { stageId =>
@@ -770,23 +770,23 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     }
 
     // the tasksSets complete, so the tracker should be notified of the successful ones
-    verify(blacklist, times(1)).updateBlacklistForSuccessfulTaskSet(
+    verify(healthTracker, times(1)).updateExcludedForSuccessfulTaskSet(
       stageId = 0,
       stageAttemptId = 0,
-      failuresByExec = stageToMockTaskSetBlacklist(0).execToFailures)
-    verify(blacklist, times(1)).updateBlacklistForSuccessfulTaskSet(
+      failuresByExec = stageToMockTaskSetExcludelist(0).execToFailures)
+    verify(healthTracker, times(1)).updateExcludedForSuccessfulTaskSet(
       stageId = 1,
       stageAttemptId = 0,
-      failuresByExec = stageToMockTaskSetBlacklist(1).execToFailures)
+      failuresByExec = stageToMockTaskSetExcludelist(1).execToFailures)
     // but we shouldn't update for the failed taskset
-    verify(blacklist, never).updateBlacklistForSuccessfulTaskSet(
+    verify(healthTracker, never).updateExcludedForSuccessfulTaskSet(
       stageId = meq(2),
       stageAttemptId = anyInt(),
       failuresByExec = any())
   }
 
-  test("scheduled tasks obey node and executor blacklists") {
-    taskScheduler = setupSchedulerWithMockTaskSetBlacklist()
+  test("scheduled tasks obey node and executor excludelists") {
+    taskScheduler = setupSchedulerWithMockTaskSetExcludelist()
     (0 to 2).foreach { stageId =>
       val taskSet = FakeTask.createTaskSet(numTasks = 2, stageId = stageId, stageAttemptId = 0)
       taskScheduler.submitTasks(taskSet)
@@ -800,13 +800,13 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
       new WorkerOffer("executor4", "host3", 1)
     )
 
-    // setup our mock blacklist:
-    // host1, executor0 & executor3 are completely blacklisted
+    // setup our mock excludelist:
+    // host1, executor0 & executor3 are completely excluded
     // This covers everything *except* one core on executor4 / host3, so that everything is still
     // schedulable.
-    when(blacklist.isNodeBlacklisted("host1")).thenReturn(true)
-    when(blacklist.isExecutorBlacklisted("executor0")).thenReturn(true)
-    when(blacklist.isExecutorBlacklisted("executor3")).thenReturn(true)
+    when(healthTracker.isNodeExcluded("host1")).thenReturn(true)
+    when(healthTracker.isExecutorExcluded("executor0")).thenReturn(true)
+    when(healthTracker.isExecutorExcluded("executor3")).thenReturn(true)
 
     val stageToTsm = (0 to 2).map { stageId =>
       val tsm = taskScheduler.taskSetManagerForAttempt(stageId, 0).get
@@ -818,12 +818,12 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     assert(firstTaskAttempts.size === 1)
     assert(firstTaskAttempts.head.executorId === "executor4")
     ('0' until '2').foreach { hostNum =>
-      verify(blacklist, atLeast(1)).isNodeBlacklisted("host" + hostNum)
+      verify(healthTracker, atLeast(1)).isNodeExcluded("host" + hostNum)
     }
   }
 
-  test("abort stage when all executors are blacklisted and we cannot acquire new executor") {
-    taskScheduler = setupSchedulerWithMockTaskSetBlacklist()
+  test("abort stage when all executors are excluded and we cannot acquire new executor") {
+    taskScheduler = setupSchedulerWithMockTaskSetExcludelist()
     val taskSet = FakeTask.createTaskSet(numTasks = 10)
     taskScheduler.submitTasks(taskSet)
     val tsm = stageToMockTaskSetManager(0)
@@ -836,11 +836,11 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
       WorkerOffer("executor3", "host1", 2)
     ))
 
-    // now say our blacklist updates to blacklist a bunch of resources, but *not* everything
-    when(blacklist.isNodeBlacklisted("host1")).thenReturn(true)
-    when(blacklist.isExecutorBlacklisted("executor0")).thenReturn(true)
+    // now say our health tracker updates to exclude a bunch of resources, but *not* everything
+    when(healthTracker.isNodeExcluded("host1")).thenReturn(true)
+    when(healthTracker.isExecutorExcluded("executor0")).thenReturn(true)
 
-    // make an offer on the blacklisted resources.  We won't schedule anything, but also won't
+    // make an offer on the excluded resources.  We won't schedule anything, but also won't
     // abort yet, since we know of other resources that work
     assert(taskScheduler.resourceOffers(IndexedSeq(
       WorkerOffer("executor0", "host0", 2),
@@ -848,9 +848,9 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     )).flatten.size === 0)
     assert(!tsm.isZombie)
 
-    // now update the blacklist so that everything really is blacklisted
-    when(blacklist.isExecutorBlacklisted("executor1")).thenReturn(true)
-    when(blacklist.isExecutorBlacklisted("executor2")).thenReturn(true)
+    // now update the health tracker so that everything really is excluded
+    when(healthTracker.isExecutorExcluded("executor1")).thenReturn(true)
+    when(healthTracker.isExecutorExcluded("executor2")).thenReturn(true)
     assert(taskScheduler.resourceOffers(IndexedSeq(
       WorkerOffer("executor0", "host0", 2),
       WorkerOffer("executor3", "host1", 2)
@@ -859,10 +859,10 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     verify(tsm).abort(anyString(), any())
   }
 
-  test("SPARK-22148 abort timer should kick in when task is completely blacklisted & no new " +
+  test("SPARK-22148 abort timer should kick in when task is completely excluded & no new " +
       "executor can be acquired") {
     // set the abort timer to fail immediately
-    taskScheduler = setupSchedulerWithMockTaskSetBlacklist(
+    taskScheduler = setupSchedulerWithMockTaskSetExcludelist(
       config.UNSCHEDULABLE_TASKSET_TIMEOUT.key -> "0")
 
     // We have only 1 task remaining with 1 executor
@@ -878,10 +878,10 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     // Fail the running task
     val failedTask = firstTaskAttempts.find(_.executorId == "executor0").get
     failTask(failedTask.taskId, TaskState.FAILED, UnknownReason, tsm)
-    when(tsm.taskSetBlacklistHelperOpt.get.isExecutorBlacklistedForTask(
+    when(tsm.taskSetExcludelistHelperOpt.get.isExecutorExcludedForTask(
       "executor0", failedTask.index)).thenReturn(true)
 
-    // make an offer on the blacklisted executor.  We won't schedule anything, and set the abort
+    // make an offer on the excluded executor.  We won't schedule anything, and set the abort
     // timer to kick in immediately
     assert(taskScheduler.resourceOffers(IndexedSeq(
       WorkerOffer("executor0", "host0", 1)
@@ -894,7 +894,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
   }
 
   test("SPARK-22148 try to acquire a new executor when task is unschedulable with 1 executor") {
-    taskScheduler = setupSchedulerWithMockTaskSetBlacklist(
+    taskScheduler = setupSchedulerWithMockTaskSetExcludelist(
       config.UNSCHEDULABLE_TASKSET_TIMEOUT.key -> "10")
 
     // We have only 1 task remaining with 1 executor
@@ -910,11 +910,11 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     // Fail the running task
     val failedTask = firstTaskAttempts.head
     failTask(failedTask.taskId, TaskState.FAILED, UnknownReason, tsm)
-    when(tsm.taskSetBlacklistHelperOpt.get.isExecutorBlacklistedForTask(
+    when(tsm.taskSetExcludelistHelperOpt.get.isExecutorExcludedForTask(
       "executor0", failedTask.index)).thenReturn(true)
 
-    // make an offer on the blacklisted executor.  We won't schedule anything, and set the abort
-    // timer to expire if no new executors could be acquired. We kill the existing idle blacklisted
+    // make an offer on the excluded executor.  We won't schedule anything, and set the abort
+    // timer to expire if no new executors could be acquired. We kill the existing idle excluded
     // executor and try to acquire a new one.
     assert(taskScheduler.resourceOffers(IndexedSeq(
       WorkerOffer("executor0", "host0", 1)
@@ -930,12 +930,12 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     assert(!tsm.isZombie)
   }
 
-  // This is to test a scenario where we have two taskSets completely blacklisted and on acquiring
+  // This is to test a scenario where we have two taskSets completely excluded and on acquiring
   // a new executor we don't want the abort timer for the second taskSet to expire and abort the job
   test("SPARK-22148 abort timer should clear unschedulableTaskSetToExpiryTime for all TaskSets") {
-    taskScheduler = setupSchedulerWithMockTaskSetBlacklist()
+    taskScheduler = setupSchedulerWithMockTaskSetExcludelist()
 
-    // We have 2 taskSets with 1 task remaining in each with 1 executor completely blacklisted
+    // We have 2 taskSets with 1 task remaining in each with 1 executor completely excluded
     val taskSet1 = FakeTask.createTaskSet(numTasks = 1, stageId = 0, stageAttemptId = 0)
     taskScheduler.submitTasks(taskSet1)
     val taskSet2 = FakeTask.createTaskSet(numTasks = 1, stageId = 1, stageAttemptId = 0)
@@ -952,7 +952,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     // Fail the running task
     val failedTask = firstTaskAttempts.head
     failTask(failedTask.taskId, TaskState.FAILED, UnknownReason, tsm)
-    when(tsm.taskSetBlacklistHelperOpt.get.isExecutorBlacklistedForTask(
+    when(tsm.taskSetExcludelistHelperOpt.get.isExecutorExcludedForTask(
       "executor0", failedTask.index)).thenReturn(true)
 
     // make an offer. We will schedule the task from the second taskSet. Since a task was scheduled
@@ -966,10 +966,10 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     val tsm2 = stageToMockTaskSetManager(1)
     val failedTask2 = secondTaskAttempts.head
     failTask(failedTask2.taskId, TaskState.FAILED, UnknownReason, tsm2)
-    when(tsm2.taskSetBlacklistHelperOpt.get.isExecutorBlacklistedForTask(
+    when(tsm2.taskSetExcludelistHelperOpt.get.isExecutorExcludedForTask(
       "executor0", failedTask2.index)).thenReturn(true)
 
-    // make an offer on the blacklisted executor.  We won't schedule anything, and set the abort
+    // make an offer on the excluded executor.  We won't schedule anything, and set the abort
     // timer for taskSet1 and taskSet2
     assert(taskScheduler.resourceOffers(IndexedSeq(
       WorkerOffer("executor0", "host0", 1)
@@ -991,9 +991,9 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
 
   // this test is to check that we don't abort a taskSet which is not being scheduled on other
   // executors as it is waiting on locality timeout and not being aborted because it is still not
-  // completely blacklisted.
-  test("SPARK-22148 Ensure we don't abort the taskSet if we haven't been completely blacklisted") {
-    taskScheduler = setupSchedulerWithMockTaskSetBlacklist(
+  // completely excluded.
+  test("SPARK-22148 Ensure we don't abort the taskSet if we haven't been completely excluded") {
+    taskScheduler = setupSchedulerWithMockTaskSetExcludelist(
       config.UNSCHEDULABLE_TASKSET_TIMEOUT.key -> "0",
       // This is to avoid any potential flakiness in the test because of large pauses in jenkins
       config.LOCALITY_WAIT.key -> "30s"
@@ -1014,7 +1014,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     // Fail the running task
     val failedTask = taskAttempts.head
     failTask(failedTask.taskId, TaskState.FAILED, UnknownReason, tsm)
-    when(tsm.taskSetBlacklistHelperOpt.get.isExecutorBlacklistedForTask(
+    when(tsm.taskSetExcludelistHelperOpt.get.isExecutorExcludedForTask(
       "executor0", failedTask.index)).thenReturn(true)
 
     // make an offer but we won't schedule anything yet as scheduler locality is still PROCESS_LOCAL
@@ -1027,10 +1027,10 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     assert(!tsm.isZombie)
   }
 
-  test("SPARK-31418 abort timer should kick in when task is completely blacklisted &" +
+  test("SPARK-31418 abort timer should kick in when task is completely excluded &" +
     "allocation manager could not acquire a new executor before the timeout") {
     // set the abort timer to fail immediately
-    taskScheduler = setupSchedulerWithMockTaskSetBlacklist(
+    taskScheduler = setupSchedulerWithMockTaskSetExcludelist(
       config.UNSCHEDULABLE_TASKSET_TIMEOUT.key -> "0",
       config.DYN_ALLOCATION_ENABLED.key -> "true")
 
@@ -1044,14 +1044,14 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
 
     // Fail the running task
     failTask(0, TaskState.FAILED, UnknownReason, tsm)
-    when(tsm.taskSetBlacklistHelperOpt.get.isExecutorBlacklistedForTask(
+    when(tsm.taskSetExcludelistHelperOpt.get.isExecutorExcludedForTask(
       "executor0", 0)).thenReturn(true)
 
     // If the executor is busy, then dynamic allocation should kick in and try
-    // to acquire additional executors to schedule the blacklisted task
+    // to acquire additional executors to schedule the excluded task
     assert(taskScheduler.isExecutorBusy("executor0"))
 
-    // make an offer on the blacklisted executor.  We won't schedule anything, and set the abort
+    // make an offer on the excluded executor.  We won't schedule anything, and set the abort
     // timer to kick in immediately
     assert(taskScheduler.resourceOffers(IndexedSeq(
       WorkerOffer("executor0", "host0", 1)
@@ -1064,31 +1064,31 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
   }
 
   /**
-   * Helper for performance tests.  Takes the explicitly blacklisted nodes and executors; verifies
-   * that the blacklists are used efficiently to ensure scheduling is not O(numPendingTasks).
+   * Helper for performance tests.  Takes the explicitly excluded nodes and executors; verifies
+   * that the excluded are used efficiently to ensure scheduling is not O(numPendingTasks).
    * Creates 1 offer on executor[1-3].  Executor1 & 2 are on host1, executor3 is on host2.  Passed
    * in nodes and executors should be on that list.
    */
-  private def testBlacklistPerformance(
+  private def testExcludelistPerformance(
       testName: String,
-      nodeBlacklist: Seq[String],
-      execBlacklist: Seq[String]): Unit = {
+      nodeExcludelist: Seq[String],
+      execExcludelist: Seq[String]): Unit = {
     // Because scheduling involves shuffling the order of offers around, we run this test a few
     // times to cover more possibilities.  There are only 3 offers, which means 6 permutations,
     // so 10 iterations is pretty good.
     (0 until 10).foreach { testItr =>
       test(s"$testName: iteration $testItr") {
-        // When an executor or node is blacklisted, we want to make sure that we don't try
-        // scheduling each pending task, one by one, to discover they are all blacklisted.  This is
+        // When an executor or node is excluded, we want to make sure that we don't try
+        // scheduling each pending task, one by one, to discover they are all excluded.  This is
         // important for performance -- if we did check each task one-by-one, then responding to a
         // resource offer (which is usually O(1)-ish) would become O(numPendingTasks), which would
         // slow down scheduler throughput and slow down scheduling even on healthy executors.
         // Here, we check a proxy for the runtime -- we make sure the scheduling is short-circuited
-        // at the node or executor blacklist, so we never check the per-task blacklist.  We also
-        // make sure we don't check the node & executor blacklist for the entire taskset
+        // at the node or executor excludelist, so we never check the per-task excludelist.  We also
+        // make sure we don't check the node & executor excludelist for the entire taskset
         // O(numPendingTasks) times.
 
-        taskScheduler = setupSchedulerWithMockTaskSetBlacklist()
+        taskScheduler = setupSchedulerWithMockTaskSetExcludelist()
         // we schedule 500 tasks so we can clearly distinguish anything that is O(numPendingTasks)
         val taskSet = FakeTask.createTaskSet(numTasks = 500, stageId = 0, stageAttemptId = 0)
         taskScheduler.submitTasks(taskSet)
@@ -1098,91 +1098,92 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
           new WorkerOffer("executor2", "host1", 1),
           new WorkerOffer("executor3", "host2", 1)
         )
-        // We should check the node & exec blacklists, but only O(numOffers), not O(numPendingTasks)
-        // times.  In the worst case, after shuffling, we offer our blacklisted resource first, and
-        // then offer other resources which do get used.  The taskset blacklist is consulted
-        // repeatedly as we offer resources to the taskset -- each iteration either schedules
-        // something, or it terminates that locality level, so the maximum number of checks is
-        // numCores + numLocalityLevels
+        // We should check the node & exec excludelists, but only O(numOffers),
+        // not O(numPendingTasks) times.  In the worst case, after shuffling,
+        // we offer our excluded resource first, and then offer other resources
+        // which do get used.  The taskset excludelist is consulted repeatedly
+        // as we offer resources to the taskset -- each iteration either schedules
+        // something, or it terminates that locality level, so the maximum number of
+        // checks is numCores + numLocalityLevels
         val numCoresOnAllOffers = offers.map(_.cores).sum
         val numLocalityLevels = TaskLocality.values.size
-        val maxBlacklistChecks = numCoresOnAllOffers + numLocalityLevels
+        val maxExcludelistChecks = numCoresOnAllOffers + numLocalityLevels
 
-        // Setup the blacklist
-        nodeBlacklist.foreach { node =>
-          when(stageToMockTaskSetBlacklist(0).isNodeBlacklistedForTaskSet(node)).thenReturn(true)
+        // Setup the excludelist
+        nodeExcludelist.foreach { node =>
+          when(stageToMockTaskSetExcludelist(0).isNodeExcludedForTaskSet(node)).thenReturn(true)
         }
-        execBlacklist.foreach { exec =>
-          when(stageToMockTaskSetBlacklist(0).isExecutorBlacklistedForTaskSet(exec))
+        execExcludelist.foreach { exec =>
+          when(stageToMockTaskSetExcludelist(0).isExecutorExcludedForTaskSet(exec))
             .thenReturn(true)
         }
 
-        // Figure out which nodes have any effective blacklisting on them.  This means all nodes
-        // that are explicitly blacklisted, plus those that have *any* executors blacklisted.
-        val nodesForBlacklistedExecutors = offers.filter { offer =>
-          execBlacklist.contains(offer.executorId)
+        // Figure out which nodes have any effective exclusions on them.  This means all nodes
+        // that are explicitly excluded, plus those that have *any* executors excluded.
+        val nodesForExcludedExecutors = offers.filter { offer =>
+          execExcludelist.contains(offer.executorId)
         }.map(_.host).distinct
-        val nodesWithAnyBlacklisting = (nodeBlacklist ++ nodesForBlacklistedExecutors).toSet
-        // Similarly, figure out which executors have any blacklisting.  This means all executors
-        // that are explicitly blacklisted, plus all executors on nodes that are blacklisted.
-        val execsForBlacklistedNodes = offers.filter { offer =>
-          nodeBlacklist.contains(offer.host)
+        val nodesWithAnyExclusions = (nodeExcludelist ++ nodesForExcludedExecutors).toSet
+        // Similarly, figure out which executors have any exclusions.  This means all executors
+        // that are explicitly excluded, plus all executors on nodes that are excluded.
+        val execsForExcludedNodes = offers.filter { offer =>
+          nodeExcludelist.contains(offer.host)
         }.map(_.executorId).toSeq
-        val executorsWithAnyBlacklisting = (execBlacklist ++ execsForBlacklistedNodes).toSet
+        val executorsWithAnyExclusions = (execExcludelist ++ execsForExcludedNodes).toSet
 
         // Schedule a taskset, and make sure our test setup is correct -- we are able to schedule
-        // a task on all executors that aren't blacklisted (whether that executor is a explicitly
-        // blacklisted, or implicitly blacklisted via the node blacklist).
+        // a task on all executors that aren't excluded (whether that executor is a explicitly
+        // excluded, or implicitly excluded via the node excludeOnFailures).
         val firstTaskAttempts = taskScheduler.resourceOffers(offers).flatten
-        assert(firstTaskAttempts.size === offers.size - executorsWithAnyBlacklisting.size)
+        assert(firstTaskAttempts.size === offers.size - executorsWithAnyExclusions.size)
 
-        // Now check that we haven't made too many calls to any of the blacklist methods.
-        // We should be checking our node blacklist, but it should be within the bound we defined
+        // Now check that we haven't made too many calls to any of the excludelist methods.
+        // We should be checking our node excludelist, but it should be within the bound we defined
         // above.
-        verify(stageToMockTaskSetBlacklist(0), atMost(maxBlacklistChecks))
-          .isNodeBlacklistedForTaskSet(anyString())
-        // We shouldn't ever consult the per-task blacklist for the nodes that have been blacklisted
-        // for the entire taskset, since the taskset level blacklisting should prevent scheduling
+        verify(stageToMockTaskSetExcludelist(0), atMost(maxExcludelistChecks))
+          .isNodeExcludedForTaskSet(anyString())
+        // We shouldn't ever consult the per-task excludelist for the nodes that have been excluded
+        // for the entire taskset, since the taskset level exclusions should prevent scheduling
         // from ever looking at specific tasks.
-        nodesWithAnyBlacklisting.foreach { node =>
-          verify(stageToMockTaskSetBlacklist(0), never)
-            .isNodeBlacklistedForTask(meq(node), anyInt())
+        nodesWithAnyExclusions.foreach { node =>
+          verify(stageToMockTaskSetExcludelist(0), never)
+            .isNodeExcludedForTask(meq(node), anyInt())
         }
-        executorsWithAnyBlacklisting.foreach { exec =>
-          // We should be checking our executor blacklist, but it should be within the bound defined
-          // above.  Its possible that this will be significantly fewer calls, maybe even 0, if
-          // there is also a node-blacklist which takes effect first.  But this assert is all we
-          // need to avoid an O(numPendingTask) slowdown.
-          verify(stageToMockTaskSetBlacklist(0), atMost(maxBlacklistChecks))
-            .isExecutorBlacklistedForTaskSet(exec)
-          // We shouldn't ever consult the per-task blacklist for executors that have been
-          // blacklisted for the entire taskset, since the taskset level blacklisting should prevent
+        executorsWithAnyExclusions.foreach { exec =>
+          // We should be checking our executor excludelist, but it should be within the bound
+          // defined above. Its possible that this will be significantly fewer calls, maybe even
+          // 0, if there is also a node-excludelist which takes effect first. But this assert is
+          // all we need to avoid an O(numPendingTask) slowdown.
+          verify(stageToMockTaskSetExcludelist(0), atMost(maxExcludelistChecks))
+            .isExecutorExcludedForTaskSet(exec)
+          // We shouldn't ever consult the per-task excludelist for executors that have been
+          // excluded for the entire taskset, since the taskset level exclusions should prevent
           // scheduling from ever looking at specific tasks.
-          verify(stageToMockTaskSetBlacklist(0), never)
-            .isExecutorBlacklistedForTask(meq(exec), anyInt())
+          verify(stageToMockTaskSetExcludelist(0), never)
+            .isExecutorExcludedForTask(meq(exec), anyInt())
         }
       }
     }
   }
 
-  testBlacklistPerformance(
-    testName = "Blacklisted node for entire task set prevents per-task blacklist checks",
-    nodeBlacklist = Seq("host1"),
-    execBlacklist = Seq())
+  testExcludelistPerformance(
+    testName = "Excluded node for entire task set prevents per-task exclusion checks",
+    nodeExcludelist = Seq("host1"),
+    execExcludelist = Seq())
 
-  testBlacklistPerformance(
-    testName = "Blacklisted executor for entire task set prevents per-task blacklist checks",
-    nodeBlacklist = Seq(),
-    execBlacklist = Seq("executor3")
+  testExcludelistPerformance(
+    testName = "Excluded executor for entire task set prevents per-task exclusion checks",
+    nodeExcludelist = Seq(),
+    execExcludelist = Seq("executor3")
   )
 
   test("abort stage if executor loss results in unschedulability from previously failed tasks") {
-    // Make sure we can detect when a taskset becomes unschedulable from a blacklisting.  This
+    // Make sure we can detect when a taskset becomes unschedulable from excludeOnFailure.  This
     // test explores a particular corner case -- you may have one task fail, but still be
     // schedulable on another executor.  However, that executor may fail later on, leaving the
     // first task with no place to run.
     val taskScheduler = setupScheduler(
-      config.BLACKLIST_ENABLED.key -> "true"
+      config.EXCLUDE_ON_FAILURE_ENABLED.key -> "true"
     )
 
     val taskSet = FakeTask.createTaskSet(2)
@@ -1215,7 +1216,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     assert(nextTaskAttempts.head.index != failedTask.index)
 
     // Now we should definitely realize that our task set is unschedulable, because the only
-    // task left can't be scheduled on any executors due to the blacklist.
+    // task left can't be scheduled on any executors due to the excludelist.
     taskScheduler.resourceOffers(IndexedSeq(new WorkerOffer("executor0", "host0", 1)))
     sc.listenerBus.waitUntilEmpty(100000)
     assert(tsm.isZombie)
@@ -1223,11 +1224,11 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     val idx = failedTask.index
     assert(failedTaskSetReason === s"""
       |Aborting $taskSet because task $idx (partition $idx)
-      |cannot run anywhere due to node and executor blacklist.
+      |cannot run anywhere due to node and executor excludeOnFailure.
       |Most recent failure:
-      |${tsm.taskSetBlacklistHelperOpt.get.getLatestFailureReason}
+      |${tsm.taskSetExcludelistHelperOpt.get.getLatestFailureReason}
       |
-      |Blacklisting behavior can be configured via spark.blacklist.*.
+      |ExcludeOnFailure behavior can be configured via spark.excludeOnFailure.*.
       |""".stripMargin)
   }
 
@@ -1238,7 +1239,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     // available and not bail on the job
 
     val taskScheduler = setupScheduler(
-      config.BLACKLIST_ENABLED.key -> "true"
+      config.EXCLUDE_ON_FAILURE_ENABLED.key -> "true"
     )
 
     val taskSet = FakeTask.createTaskSet(2, (0 until 2).map { _ => Seq(TaskLocation("host0")) }: _*)
@@ -1306,7 +1307,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     assert(taskScheduler.getExecutorsAliveOnHost("host1") === Some(Set("executor1", "executor3")))
   }
 
-  test("scheduler checks for executors that can be expired from blacklist") {
+  test("scheduler checks for executors that can be expired from excludeOnFailure") {
     taskScheduler = setupScheduler()
 
     taskScheduler.submitTasks(FakeTask.createTaskSet(1, stageId = 0, stageAttemptId = 0))
@@ -1314,7 +1315,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
       new WorkerOffer("executor0", "host0", 1)
     )).flatten
 
-    verify(blacklist).applyBlacklistTimeout()
+    verify(healthTracker).applyExcludeOnFailureTimeout()
   }
 
   test("if an executor is lost then the state for its running tasks is cleaned up (SPARK-18553)") {
@@ -1400,7 +1401,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
         offers
       }
       override def createTaskSetManager(taskSet: TaskSet, maxTaskFailures: Int): TaskSetManager = {
-        new TaskSetManager(this, taskSet, maxTaskFailures, blacklistTrackerOpt, clock)
+        new TaskSetManager(this, taskSet, maxTaskFailures, healthTrackerOpt, clock)
       }
     }
     // Need to initialize a DAGScheduler for the taskScheduler to use for callbacks.
@@ -1440,7 +1441,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     val clock = new ManualClock()
     val taskScheduler = new TaskSchedulerImpl(sc) {
       override def createTaskSetManager(taskSet: TaskSet, maxTaskFailures: Int): TaskSetManager = {
-        new TaskSetManager(this, taskSet, maxTaskFailures, blacklistTrackerOpt, clock)
+        new TaskSetManager(this, taskSet, maxTaskFailures, healthTrackerOpt, clock)
       }
     }
     // Need to initialize a DAGScheduler for the taskScheduler to use for callbacks.
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetBlacklistSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetBlacklistSuite.scala
deleted file mode 100644
index ed97a4c206ca3..0000000000000
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetBlacklistSuite.scala
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.scheduler
-
-import org.mockito.ArgumentMatchers.isA
-import org.mockito.Mockito.{never, verify}
-import org.scalatest.BeforeAndAfterEach
-import org.scalatestplus.mockito.MockitoSugar
-
-import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.internal.config
-import org.apache.spark.util.ManualClock
-
-class TaskSetBlacklistSuite extends SparkFunSuite with BeforeAndAfterEach with MockitoSugar {
-
-  private var listenerBusMock: LiveListenerBus = _
-
-  override def beforeEach(): Unit = {
-    listenerBusMock = mock[LiveListenerBus]
-    super.beforeEach()
-  }
-
-  test("Blacklisting tasks, executors, and nodes") {
-    val conf = new SparkConf().setAppName("test").setMaster("local")
-      .set(config.BLACKLIST_ENABLED.key, "true")
-    val clock = new ManualClock
-    val attemptId = 0
-    val taskSetBlacklist = new TaskSetBlacklist(
-      listenerBusMock, conf, stageId = 0, stageAttemptId = attemptId, clock = clock)
-
-    clock.setTime(0)
-    // We will mark task 0 & 1 failed on both executor 1 & 2.
-    // We should blacklist all executors on that host, for all tasks for the stage.  Note the API
-    // will return false for isExecutorBacklistedForTaskSet even when the node is blacklisted, so
-    // the executor is implicitly blacklisted (this makes sense with how the scheduler uses the
-    // blacklist)
-
-    // First, mark task 0 as failed on exec1.
-    // task 0 should be blacklisted on exec1, and nowhere else
-    taskSetBlacklist.updateBlacklistForFailedTask(
-      "hostA", exec = "exec1", index = 0, failureReason = "testing")
-    for {
-      executor <- (1 to 4).map(_.toString)
-      index <- 0 until 10
-    } {
-      val shouldBeBlacklisted = (executor == "exec1" && index == 0)
-      assert(taskSetBlacklist.isExecutorBlacklistedForTask(executor, index) === shouldBeBlacklisted)
-    }
-
-    assert(!taskSetBlacklist.isExecutorBlacklistedForTaskSet("exec1"))
-    verify(listenerBusMock, never())
-      .post(isA(classOf[SparkListenerExecutorBlacklistedForStage]))
-
-    assert(!taskSetBlacklist.isNodeBlacklistedForTaskSet("hostA"))
-    verify(listenerBusMock, never())
-      .post(isA(classOf[SparkListenerNodeBlacklistedForStage]))
-
-    // Mark task 1 failed on exec1 -- this pushes the executor into the blacklist
-    taskSetBlacklist.updateBlacklistForFailedTask(
-      "hostA", exec = "exec1", index = 1, failureReason = "testing")
-
-    assert(taskSetBlacklist.isExecutorBlacklistedForTaskSet("exec1"))
-    verify(listenerBusMock).post(
-      SparkListenerExecutorBlacklistedForStage(0, "exec1", 2, 0, attemptId))
-
-    assert(!taskSetBlacklist.isNodeBlacklistedForTaskSet("hostA"))
-    verify(listenerBusMock, never())
-      .post(isA(classOf[SparkListenerNodeBlacklistedForStage]))
-
-    // Mark one task as failed on exec2 -- not enough for any further blacklisting yet.
-    taskSetBlacklist.updateBlacklistForFailedTask(
-      "hostA", exec = "exec2", index = 0, failureReason = "testing")
-    assert(taskSetBlacklist.isExecutorBlacklistedForTaskSet("exec1"))
-
-    assert(!taskSetBlacklist.isExecutorBlacklistedForTaskSet("exec2"))
-
-    assert(!taskSetBlacklist.isNodeBlacklistedForTaskSet("hostA"))
-    verify(listenerBusMock, never())
-      .post(isA(classOf[SparkListenerNodeBlacklistedForStage]))
-
-    // Mark another task as failed on exec2 -- now we blacklist exec2, which also leads to
-    // blacklisting the entire node.
-    taskSetBlacklist.updateBlacklistForFailedTask(
-      "hostA", exec = "exec2", index = 1, failureReason = "testing")
-
-    assert(taskSetBlacklist.isExecutorBlacklistedForTaskSet("exec1"))
-
-    assert(taskSetBlacklist.isExecutorBlacklistedForTaskSet("exec2"))
-    verify(listenerBusMock).post(
-      SparkListenerExecutorBlacklistedForStage(0, "exec2", 2, 0, attemptId))
-
-    assert(taskSetBlacklist.isNodeBlacklistedForTaskSet("hostA"))
-    verify(listenerBusMock).post(
-      SparkListenerNodeBlacklistedForStage(0, "hostA", 2, 0, attemptId))
-
-    // Make sure the blacklist has the correct per-task && per-executor responses, over a wider
-    // range of inputs.
-    for {
-      executor <- (1 to 4).map(e => s"exec$e")
-      index <- 0 until 10
-    } {
-      withClue(s"exec = $executor; index = $index") {
-        val badExec = (executor == "exec1" || executor == "exec2")
-        val badIndex = (index == 0 || index == 1)
-        assert(
-          // this ignores whether the executor is blacklisted entirely for the taskset -- that is
-          // intentional, it keeps it fast and is sufficient for usage in the scheduler.
-          taskSetBlacklist.isExecutorBlacklistedForTask(executor, index) === (badExec && badIndex))
-        assert(taskSetBlacklist.isExecutorBlacklistedForTaskSet(executor) === badExec)
-        if (badExec) {
-          verify(listenerBusMock).post(
-            SparkListenerExecutorBlacklistedForStage(0, executor, 2, 0, attemptId))
-        }
-      }
-    }
-    assert(taskSetBlacklist.isNodeBlacklistedForTaskSet("hostA"))
-    val execToFailures = taskSetBlacklist.execToFailures
-    assert(execToFailures.keySet === Set("exec1", "exec2"))
-
-    Seq("exec1", "exec2").foreach { exec =>
-      assert(
-        execToFailures(exec).taskToFailureCountAndFailureTime === Map(
-          0 -> ((1, 0)),
-          1 -> ((1, 0))
-        )
-      )
-    }
-  }
-
-  test("multiple attempts for the same task count once") {
-    // Make sure that for blacklisting tasks, the node counts task attempts, not executors.  But for
-    // stage-level blacklisting, we count unique tasks.  The reason for this difference is, with
-    // task-attempt blacklisting, we want to make it easy to configure so that you ensure a node
-    // is blacklisted before the taskset is completely aborted because of spark.task.maxFailures.
-    // But with stage-blacklisting, we want to make sure we're not just counting one bad task
-    // that has failed many times.
-
-    val conf = new SparkConf().setMaster("local").setAppName("test")
-      .set(config.MAX_TASK_ATTEMPTS_PER_EXECUTOR, 2)
-      .set(config.MAX_TASK_ATTEMPTS_PER_NODE, 3)
-      .set(config.MAX_FAILURES_PER_EXEC_STAGE, 2)
-      .set(config.MAX_FAILED_EXEC_PER_NODE_STAGE, 3)
-    val clock = new ManualClock
-
-    val attemptId = 0
-    val taskSetBlacklist = new TaskSetBlacklist(
-      listenerBusMock, conf, stageId = 0, stageAttemptId = attemptId, clock = clock)
-
-    var time = 0
-    clock.setTime(time)
-    // Fail a task twice on hostA, exec:1
-    taskSetBlacklist.updateBlacklistForFailedTask(
-      "hostA", exec = "1", index = 0, failureReason = "testing")
-    taskSetBlacklist.updateBlacklistForFailedTask(
-      "hostA", exec = "1", index = 0, failureReason = "testing")
-    assert(taskSetBlacklist.isExecutorBlacklistedForTask("1", 0))
-    assert(!taskSetBlacklist.isNodeBlacklistedForTask("hostA", 0))
-
-    assert(!taskSetBlacklist.isExecutorBlacklistedForTaskSet("1"))
-    verify(listenerBusMock, never()).post(
-      SparkListenerExecutorBlacklistedForStage(time, "1", 2, 0, attemptId))
-
-    assert(!taskSetBlacklist.isNodeBlacklistedForTaskSet("hostA"))
-    verify(listenerBusMock, never()).post(
-      SparkListenerNodeBlacklistedForStage(time, "hostA", 2, 0, attemptId))
-
-    // Fail the same task once more on hostA, exec:2
-    time += 1
-    clock.setTime(time)
-    taskSetBlacklist.updateBlacklistForFailedTask(
-      "hostA", exec = "2", index = 0, failureReason = "testing")
-    assert(taskSetBlacklist.isNodeBlacklistedForTask("hostA", 0))
-
-    assert(!taskSetBlacklist.isExecutorBlacklistedForTaskSet("2"))
-    verify(listenerBusMock, never()).post(
-      SparkListenerExecutorBlacklistedForStage(time, "2", 2, 0, attemptId))
-
-    assert(!taskSetBlacklist.isNodeBlacklistedForTaskSet("hostA"))
-    verify(listenerBusMock, never()).post(
-      SparkListenerNodeBlacklistedForStage(time, "hostA", 2, 0, attemptId))
-
-    // Fail another task on hostA, exec:1.  Now that executor has failures on two different tasks,
-    // so its blacklisted
-    time += 1
-    clock.setTime(time)
-    taskSetBlacklist.updateBlacklistForFailedTask(
-      "hostA", exec = "1", index = 1, failureReason = "testing")
-
-    assert(taskSetBlacklist.isExecutorBlacklistedForTaskSet("1"))
-    verify(listenerBusMock)
-      .post(SparkListenerExecutorBlacklistedForStage(time, "1", 2, 0, attemptId))
-
-    assert(!taskSetBlacklist.isNodeBlacklistedForTaskSet("hostA"))
-    verify(listenerBusMock, never())
-      .post(isA(classOf[SparkListenerNodeBlacklistedForStage]))
-
-    // Fail a third task on hostA, exec:2, so that exec is blacklisted for the whole task set
-    time += 1
-    clock.setTime(time)
-    taskSetBlacklist.updateBlacklistForFailedTask(
-      "hostA", exec = "2", index = 2, failureReason = "testing")
-
-    assert(taskSetBlacklist.isExecutorBlacklistedForTaskSet("2"))
-    verify(listenerBusMock)
-      .post(SparkListenerExecutorBlacklistedForStage(time, "2", 2, 0, attemptId))
-
-    assert(!taskSetBlacklist.isNodeBlacklistedForTaskSet("hostA"))
-    verify(listenerBusMock, never())
-      .post(isA(classOf[SparkListenerNodeBlacklistedForStage]))
-
-    // Fail a fourth & fifth task on hostA, exec:3.  Now we've got three executors that are
-    // blacklisted for the taskset, so blacklist the whole node.
-    time += 1
-    clock.setTime(time)
-    taskSetBlacklist.updateBlacklistForFailedTask(
-      "hostA", exec = "3", index = 3, failureReason = "testing")
-    taskSetBlacklist.updateBlacklistForFailedTask(
-      "hostA", exec = "3", index = 4, failureReason = "testing")
-
-    assert(taskSetBlacklist.isExecutorBlacklistedForTaskSet("3"))
-    verify(listenerBusMock)
-      .post(SparkListenerExecutorBlacklistedForStage(time, "3", 2, 0, attemptId))
-
-    assert(taskSetBlacklist.isNodeBlacklistedForTaskSet("hostA"))
-    verify(listenerBusMock).post(
-      SparkListenerNodeBlacklistedForStage(time, "hostA", 3, 0, attemptId))
-  }
-
-  test("only blacklist nodes for the task set when all the blacklisted executors are all on " +
-    "same host") {
-    // we blacklist executors on two different hosts within one taskSet -- make sure that doesn't
-    // lead to any node blacklisting
-    val conf = new SparkConf().setAppName("test").setMaster("local")
-      .set(config.BLACKLIST_ENABLED.key, "true")
-    val clock = new ManualClock
-
-    val attemptId = 0
-    val taskSetBlacklist = new TaskSetBlacklist(
-      listenerBusMock, conf, stageId = 0, stageAttemptId = attemptId, clock = clock)
-    var time = 0
-    clock.setTime(time)
-    taskSetBlacklist.updateBlacklistForFailedTask(
-      "hostA", exec = "1", index = 0, failureReason = "testing")
-    taskSetBlacklist.updateBlacklistForFailedTask(
-      "hostA", exec = "1", index = 1, failureReason = "testing")
-
-    assert(taskSetBlacklist.isExecutorBlacklistedForTaskSet("1"))
-    verify(listenerBusMock)
-      .post(SparkListenerExecutorBlacklistedForStage(time, "1", 2, 0, attemptId))
-
-    assert(!taskSetBlacklist.isNodeBlacklistedForTaskSet("hostA"))
-    verify(listenerBusMock, never()).post(
-      SparkListenerNodeBlacklistedForStage(time, "hostA", 2, 0, attemptId))
-
-    time += 1
-    clock.setTime(time)
-    taskSetBlacklist.updateBlacklistForFailedTask(
-      "hostB", exec = "2", index = 0, failureReason = "testing")
-    taskSetBlacklist.updateBlacklistForFailedTask(
-      "hostB", exec = "2", index = 1, failureReason = "testing")
-    assert(taskSetBlacklist.isExecutorBlacklistedForTaskSet("1"))
-
-    assert(taskSetBlacklist.isExecutorBlacklistedForTaskSet("2"))
-    verify(listenerBusMock)
-      .post(SparkListenerExecutorBlacklistedForStage(time, "2", 2, 0, attemptId))
-
-    assert(!taskSetBlacklist.isNodeBlacklistedForTaskSet("hostA"))
-    assert(!taskSetBlacklist.isNodeBlacklistedForTaskSet("hostB"))
-    verify(listenerBusMock, never())
-      .post(isA(classOf[SparkListenerNodeBlacklistedForStage]))
-  }
-
-}
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetExcludelistSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetExcludelistSuite.scala
new file mode 100644
index 0000000000000..d20768d7cd12b
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetExcludelistSuite.scala
@@ -0,0 +1,310 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.scheduler
+
+import org.mockito.ArgumentMatchers.isA
+import org.mockito.Mockito.{never, verify}
+import org.scalatest.BeforeAndAfterEach
+import org.scalatestplus.mockito.MockitoSugar
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.internal.config
+import org.apache.spark.util.ManualClock
+
+class TaskSetExcludelistSuite extends SparkFunSuite with BeforeAndAfterEach with MockitoSugar {
+
+  private var listenerBusMock: LiveListenerBus = _
+
+  override def beforeEach(): Unit = {
+    listenerBusMock = mock[LiveListenerBus]
+    super.beforeEach()
+  }
+
+  test("Excluding tasks, executors, and nodes") {
+    val conf = new SparkConf().setAppName("test").setMaster("local")
+      .set(config.EXCLUDE_ON_FAILURE_ENABLED.key, "true")
+    val clock = new ManualClock
+    val attemptId = 0
+    val taskSetExcludelist = new TaskSetExcludelist(
+      listenerBusMock, conf, stageId = 0, stageAttemptId = attemptId, clock = clock)
+
+    clock.setTime(0)
+    // We will mark task 0 & 1 failed on both executor 1 & 2.
+    // We should exclude all executors on that host, for all tasks for the stage.  Note the API
+    // will return false for isExecutorBacklistedForTaskSet even when the node is excluded, so
+    // the executor is implicitly excluded (this makes sense with how the scheduler uses the
+    // exclude)
+
+    // First, mark task 0 as failed on exec1.
+    // task 0 should be excluded on exec1, and nowhere else
+    taskSetExcludelist.updateExcludedForFailedTask(
+      "hostA", exec = "exec1", index = 0, failureReason = "testing")
+    for {
+      executor <- (1 to 4).map(_.toString)
+      index <- 0 until 10
+    } {
+      val shouldBeExcluded = (executor == "exec1" && index == 0)
+      assert(taskSetExcludelist.isExecutorExcludedForTask(executor, index) === shouldBeExcluded)
+    }
+
+    assert(!taskSetExcludelist.isExecutorExcludedForTaskSet("exec1"))
+    verify(listenerBusMock, never())
+      .post(isA(classOf[SparkListenerExecutorExcludedForStage]))
+    verify(listenerBusMock, never())
+      .post(isA(classOf[SparkListenerExecutorBlacklistedForStage]))
+
+    assert(!taskSetExcludelist.isNodeExcludedForTaskSet("hostA"))
+    verify(listenerBusMock, never())
+      .post(isA(classOf[SparkListenerNodeExcludedForStage]))
+
+    // Mark task 1 failed on exec1 -- this pushes the executor into the exclude
+    taskSetExcludelist.updateExcludedForFailedTask(
+      "hostA", exec = "exec1", index = 1, failureReason = "testing")
+
+    assert(taskSetExcludelist.isExecutorExcludedForTaskSet("exec1"))
+    verify(listenerBusMock).post(
+      SparkListenerExecutorExcludedForStage(0, "exec1", 2, 0, attemptId))
+    verify(listenerBusMock).post(
+      SparkListenerExecutorBlacklistedForStage(0, "exec1", 2, 0, attemptId))
+
+
+    assert(!taskSetExcludelist.isNodeExcludedForTaskSet("hostA"))
+    verify(listenerBusMock, never())
+      .post(isA(classOf[SparkListenerNodeExcludedForStage]))
+    verify(listenerBusMock, never())
+      .post(isA(classOf[SparkListenerNodeBlacklistedForStage]))
+
+    // Mark one task as failed on exec2 -- not enough for any further excluding yet.
+    taskSetExcludelist.updateExcludedForFailedTask(
+      "hostA", exec = "exec2", index = 0, failureReason = "testing")
+    assert(taskSetExcludelist.isExecutorExcludedForTaskSet("exec1"))
+
+    assert(!taskSetExcludelist.isExecutorExcludedForTaskSet("exec2"))
+
+    assert(!taskSetExcludelist.isNodeExcludedForTaskSet("hostA"))
+    verify(listenerBusMock, never())
+      .post(isA(classOf[SparkListenerNodeExcludedForStage]))
+    verify(listenerBusMock, never())
+      .post(isA(classOf[SparkListenerNodeBlacklistedForStage]))
+
+    // Mark another task as failed on exec2 -- now we exclude exec2, which also leads to
+    // excluding the entire node.
+    taskSetExcludelist.updateExcludedForFailedTask(
+      "hostA", exec = "exec2", index = 1, failureReason = "testing")
+
+    assert(taskSetExcludelist.isExecutorExcludedForTaskSet("exec1"))
+
+    assert(taskSetExcludelist.isExecutorExcludedForTaskSet("exec2"))
+    verify(listenerBusMock).post(
+      SparkListenerExecutorExcludedForStage(0, "exec2", 2, 0, attemptId))
+    verify(listenerBusMock).post(
+      SparkListenerExecutorBlacklistedForStage(0, "exec2", 2, 0, attemptId))
+
+    assert(taskSetExcludelist.isNodeExcludedForTaskSet("hostA"))
+    verify(listenerBusMock).post(
+      SparkListenerNodeExcludedForStage(0, "hostA", 2, 0, attemptId))
+    verify(listenerBusMock).post(
+      SparkListenerNodeBlacklistedForStage(0, "hostA", 2, 0, attemptId))
+
+    // Make sure the exclude has the correct per-task && per-executor responses, over a wider
+    // range of inputs.
+    for {
+      executor <- (1 to 4).map(e => s"exec$e")
+      index <- 0 until 10
+    } {
+      withClue(s"exec = $executor; index = $index") {
+        val badExec = (executor == "exec1" || executor == "exec2")
+        val badIndex = (index == 0 || index == 1)
+        assert(
+          // this ignores whether the executor is excluded entirely for the taskset -- that is
+          // intentional, it keeps it fast and is sufficient for usage in the scheduler.
+          taskSetExcludelist.isExecutorExcludedForTask(executor, index) === (badExec && badIndex))
+        assert(taskSetExcludelist.isExecutorExcludedForTaskSet(executor) === badExec)
+        if (badExec) {
+          verify(listenerBusMock).post(
+            SparkListenerExecutorExcludedForStage(0, executor, 2, 0, attemptId))
+          verify(listenerBusMock).post(
+            SparkListenerExecutorBlacklistedForStage(0, executor, 2, 0, attemptId))
+        }
+      }
+    }
+    assert(taskSetExcludelist.isNodeExcludedForTaskSet("hostA"))
+    val execToFailures = taskSetExcludelist.execToFailures
+    assert(execToFailures.keySet === Set("exec1", "exec2"))
+
+    Seq("exec1", "exec2").foreach { exec =>
+      assert(
+        execToFailures(exec).taskToFailureCountAndFailureTime === Map(
+          0 -> ((1, 0)),
+          1 -> ((1, 0))
+        )
+      )
+    }
+  }
+
+  test("multiple attempts for the same task count once") {
+    // Make sure that for excluding tasks, the node counts task attempts, not executors.  But for
+    // stage-level excluding, we count unique tasks.  The reason for this difference is, with
+    // task-attempt excluding, we want to make it easy to configure so that you ensure a node
+    // is excluded before the taskset is completely aborted because of spark.task.maxFailures.
+    // But with stage-excluding, we want to make sure we're not just counting one bad task
+    // that has failed many times.
+
+    val conf = new SparkConf().setMaster("local").setAppName("test")
+      .set(config.MAX_TASK_ATTEMPTS_PER_EXECUTOR, 2)
+      .set(config.MAX_TASK_ATTEMPTS_PER_NODE, 3)
+      .set(config.MAX_FAILURES_PER_EXEC_STAGE, 2)
+      .set(config.MAX_FAILED_EXEC_PER_NODE_STAGE, 3)
+    val clock = new ManualClock
+
+    val attemptId = 0
+    val taskSetExcludlist = new TaskSetExcludelist(
+      listenerBusMock, conf, stageId = 0, stageAttemptId = attemptId, clock = clock)
+
+    var time = 0
+    clock.setTime(time)
+    // Fail a task twice on hostA, exec:1
+    taskSetExcludlist.updateExcludedForFailedTask(
+      "hostA", exec = "1", index = 0, failureReason = "testing")
+    taskSetExcludlist.updateExcludedForFailedTask(
+      "hostA", exec = "1", index = 0, failureReason = "testing")
+    assert(taskSetExcludlist.isExecutorExcludedForTask("1", 0))
+    assert(!taskSetExcludlist.isNodeExcludedForTask("hostA", 0))
+
+    assert(!taskSetExcludlist.isExecutorExcludedForTaskSet("1"))
+    verify(listenerBusMock, never()).post(
+      SparkListenerExecutorExcludedForStage(time, "1", 2, 0, attemptId))
+
+    assert(!taskSetExcludlist.isNodeExcludedForTaskSet("hostA"))
+    verify(listenerBusMock, never()).post(
+      SparkListenerNodeExcludedForStage(time, "hostA", 2, 0, attemptId))
+
+    // Fail the same task once more on hostA, exec:2
+    time += 1
+    clock.setTime(time)
+    taskSetExcludlist.updateExcludedForFailedTask(
+      "hostA", exec = "2", index = 0, failureReason = "testing")
+    assert(taskSetExcludlist.isNodeExcludedForTask("hostA", 0))
+
+    assert(!taskSetExcludlist.isExecutorExcludedForTaskSet("2"))
+    verify(listenerBusMock, never()).post(
+      SparkListenerExecutorExcludedForStage(time, "2", 2, 0, attemptId))
+
+    assert(!taskSetExcludlist.isNodeExcludedForTaskSet("hostA"))
+    verify(listenerBusMock, never()).post(
+      SparkListenerNodeExcludedForStage(time, "hostA", 2, 0, attemptId))
+
+    // Fail another task on hostA, exec:1.  Now that executor has failures on two different tasks,
+    // so its excluded
+    time += 1
+    clock.setTime(time)
+    taskSetExcludlist.updateExcludedForFailedTask(
+      "hostA", exec = "1", index = 1, failureReason = "testing")
+
+    assert(taskSetExcludlist.isExecutorExcludedForTaskSet("1"))
+    verify(listenerBusMock)
+      .post(SparkListenerExecutorExcludedForStage(time, "1", 2, 0, attemptId))
+
+    assert(!taskSetExcludlist.isNodeExcludedForTaskSet("hostA"))
+    verify(listenerBusMock, never())
+      .post(isA(classOf[SparkListenerNodeExcludedForStage]))
+
+    // Fail a third task on hostA, exec:2, so that exec is excluded for the whole task set
+    time += 1
+    clock.setTime(time)
+    taskSetExcludlist.updateExcludedForFailedTask(
+      "hostA", exec = "2", index = 2, failureReason = "testing")
+
+    assert(taskSetExcludlist.isExecutorExcludedForTaskSet("2"))
+    verify(listenerBusMock)
+      .post(SparkListenerExecutorExcludedForStage(time, "2", 2, 0, attemptId))
+
+    assert(!taskSetExcludlist.isNodeExcludedForTaskSet("hostA"))
+    verify(listenerBusMock, never())
+      .post(isA(classOf[SparkListenerNodeExcludedForStage]))
+
+    // Fail a fourth & fifth task on hostA, exec:3.  Now we've got three executors that are
+    // excluded for the taskset, so exclude the whole node.
+    time += 1
+    clock.setTime(time)
+    taskSetExcludlist.updateExcludedForFailedTask(
+      "hostA", exec = "3", index = 3, failureReason = "testing")
+    taskSetExcludlist.updateExcludedForFailedTask(
+      "hostA", exec = "3", index = 4, failureReason = "testing")
+
+    assert(taskSetExcludlist.isExecutorExcludedForTaskSet("3"))
+    verify(listenerBusMock)
+      .post(SparkListenerExecutorExcludedForStage(time, "3", 2, 0, attemptId))
+
+    assert(taskSetExcludlist.isNodeExcludedForTaskSet("hostA"))
+    verify(listenerBusMock).post(
+      SparkListenerNodeExcludedForStage(time, "hostA", 3, 0, attemptId))
+  }
+
+  test("only exclude nodes for the task set when all the excluded executors are all on " +
+    "same host") {
+    // we exclude executors on two different hosts within one taskSet -- make sure that doesn't
+    // lead to any node excluding
+    val conf = new SparkConf().setAppName("test").setMaster("local")
+      .set(config.EXCLUDE_ON_FAILURE_ENABLED.key, "true")
+    val clock = new ManualClock
+
+    val attemptId = 0
+    val taskSetExcludlist = new TaskSetExcludelist(
+      listenerBusMock, conf, stageId = 0, stageAttemptId = attemptId, clock = clock)
+    var time = 0
+    clock.setTime(time)
+    taskSetExcludlist.updateExcludedForFailedTask(
+      "hostA", exec = "1", index = 0, failureReason = "testing")
+    taskSetExcludlist.updateExcludedForFailedTask(
+      "hostA", exec = "1", index = 1, failureReason = "testing")
+
+    assert(taskSetExcludlist.isExecutorExcludedForTaskSet("1"))
+    verify(listenerBusMock)
+      .post(SparkListenerExecutorExcludedForStage(time, "1", 2, 0, attemptId))
+    verify(listenerBusMock)
+      .post(SparkListenerExecutorBlacklistedForStage(time, "1", 2, 0, attemptId))
+
+    assert(!taskSetExcludlist.isNodeExcludedForTaskSet("hostA"))
+    verify(listenerBusMock, never()).post(
+      SparkListenerNodeExcludedForStage(time, "hostA", 2, 0, attemptId))
+    verify(listenerBusMock, never()).post(
+      SparkListenerNodeBlacklistedForStage(time, "hostA", 2, 0, attemptId))
+
+    time += 1
+    clock.setTime(time)
+    taskSetExcludlist.updateExcludedForFailedTask(
+      "hostB", exec = "2", index = 0, failureReason = "testing")
+    taskSetExcludlist.updateExcludedForFailedTask(
+      "hostB", exec = "2", index = 1, failureReason = "testing")
+    assert(taskSetExcludlist.isExecutorExcludedForTaskSet("1"))
+
+    assert(taskSetExcludlist.isExecutorExcludedForTaskSet("2"))
+    verify(listenerBusMock)
+      .post(SparkListenerExecutorExcludedForStage(time, "2", 2, 0, attemptId))
+    verify(listenerBusMock)
+      .post(SparkListenerExecutorBlacklistedForStage(time, "2", 2, 0, attemptId))
+
+    assert(!taskSetExcludlist.isNodeExcludedForTaskSet("hostA"))
+    assert(!taskSetExcludlist.isNodeExcludedForTaskSet("hostB"))
+    verify(listenerBusMock, never())
+      .post(isA(classOf[SparkListenerNodeExcludedForStage]))
+    verify(listenerBusMock, never())
+      .post(isA(classOf[SparkListenerNodeBlacklistedForStage]))
+  }
+
+}
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index c389fd2ffa8b1..e01e278f60205 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -382,14 +382,14 @@ class TaskSetManagerSuite
     assert(delayReject === false)
     manager.isZombie = false
 
-    // offers not accepted due to blacklisting are not delay schedule rejects
+    // offers not accepted due to excludelist are not delay schedule rejects
     val tsmSpy = spy(manager)
-    val blacklist = mock(classOf[TaskSetBlacklist])
-    when(tsmSpy.taskSetBlacklistHelperOpt).thenReturn(Some(blacklist))
-    when(blacklist.isNodeBlacklistedForTaskSet(any())).thenReturn(true)
-    val (blacklistTask, blackListReject) = tsmSpy.resourceOffer("exec2", "host2", ANY)
-    assert(blacklistTask.isEmpty)
-    assert(blackListReject === false)
+    val excludelist = mock(classOf[TaskSetExcludelist])
+    when(tsmSpy.taskSetExcludelistHelperOpt).thenReturn(Some(excludelist))
+    when(excludelist.isNodeExcludedForTaskSet(any())).thenReturn(true)
+    val (task, taskReject) = tsmSpy.resourceOffer("exec2", "host2", ANY)
+    assert(task.isEmpty)
+    assert(taskReject === false)
 
     // After another delay, we can go ahead and launch that task non-locally
     assert(manager.resourceOffer("exec2", "host2", ANY)._1.get.index === 3)
@@ -479,11 +479,11 @@ class TaskSetManagerSuite
     }
   }
 
-  test("executors should be blacklisted after task failure, in spite of locality preferences") {
+  test("executors should be excluded after task failure, in spite of locality preferences") {
     val rescheduleDelay = 300L
     val conf = new SparkConf().
-      set(config.BLACKLIST_ENABLED, true).
-      set(config.BLACKLIST_TIMEOUT_CONF, rescheduleDelay).
+      set(config.EXCLUDE_ON_FAILURE_ENABLED, true).
+      set(config.EXCLUDE_ON_FAILURE_TIMEOUT_CONF, rescheduleDelay).
       // don't wait to jump locality levels in this test
       set(config.LOCALITY_WAIT.key, "0")
 
@@ -495,11 +495,11 @@ class TaskSetManagerSuite
     val taskSet = FakeTask.createTaskSet(1, Seq(TaskLocation("host1", "exec1")))
     val clock = new ManualClock
     clock.advance(1)
-    // We don't directly use the application blacklist, but its presence triggers blacklisting
+    // We don't directly use the application excludelist, but its presence triggers exclusion
     // within the taskset.
     val mockListenerBus = mock(classOf[LiveListenerBus])
-    val blacklistTrackerOpt = Some(new BlacklistTracker(mockListenerBus, conf, None, clock))
-    val manager = new TaskSetManager(sched, taskSet, 4, blacklistTrackerOpt, clock)
+    val healthTrackerOpt = Some(new HealthTracker(mockListenerBus, conf, None, clock))
+    val manager = new TaskSetManager(sched, taskSet, 4, healthTrackerOpt, clock)
 
     {
       val offerResult = manager.resourceOffer("exec1", "host1", PROCESS_LOCAL)._1
@@ -512,7 +512,7 @@ class TaskSetManagerSuite
       manager.handleFailedTask(offerResult.get.taskId, TaskState.FINISHED, TaskResultLost)
       assert(!sched.taskSetsFailed.contains(taskSet.id))
 
-      // Ensure scheduling on exec1 fails after failure 1 due to blacklist
+      // Ensure scheduling on exec1 fails after failure 1 due to executor being excluded
       assert(manager.resourceOffer("exec1", "host1", PROCESS_LOCAL)._1.isEmpty)
       assert(manager.resourceOffer("exec1", "host1", NODE_LOCAL)._1.isEmpty)
       assert(manager.resourceOffer("exec1", "host1", RACK_LOCAL)._1.isEmpty)
@@ -532,7 +532,7 @@ class TaskSetManagerSuite
       manager.handleFailedTask(offerResult.get.taskId, TaskState.FINISHED, TaskResultLost)
       assert(!sched.taskSetsFailed.contains(taskSet.id))
 
-      // Ensure scheduling on exec1.1 fails after failure 2 due to blacklist
+      // Ensure scheduling on exec1.1 fails after failure 2 due to executor being excluded
       assert(manager.resourceOffer("exec1.1", "host1", NODE_LOCAL)._1.isEmpty)
     }
 
@@ -548,12 +548,12 @@ class TaskSetManagerSuite
       manager.handleFailedTask(offerResult.get.taskId, TaskState.FINISHED, TaskResultLost)
       assert(!sched.taskSetsFailed.contains(taskSet.id))
 
-      // Ensure scheduling on exec2 fails after failure 3 due to blacklist
+      // Ensure scheduling on exec2 fails after failure 3 due to executor being excluded
       assert(manager.resourceOffer("exec2", "host2", ANY)._1.isEmpty)
     }
 
-    // Despite advancing beyond the time for expiring executors from within the blacklist,
-    // we *never* expire from *within* the stage blacklist
+    // Despite advancing beyond the time for expiring executors from within the excludelist,
+    // we *never* expire from *within* the stage excludelist
     clock.advance(rescheduleDelay)
 
     {
@@ -1358,20 +1358,20 @@ class TaskSetManagerSuite
     assert(manager3.name === "TaskSet_1.1")
   }
 
-  test("don't update blacklist for shuffle-fetch failures, preemption, denied commits, " +
+  test("don't update excludelist for shuffle-fetch failures, preemption, denied commits, " +
       "or killed tasks") {
     // Setup a taskset, and fail some tasks for a fetch failure, preemption, denied commit,
     // and killed task.
     val conf = new SparkConf().
-      set(config.BLACKLIST_ENABLED, true)
+      set(config.EXCLUDE_ON_FAILURE_ENABLED, true)
     sc = new SparkContext("local", "test", conf)
     sched = new FakeTaskScheduler(sc, ("exec1", "host1"), ("exec2", "host2"))
     val taskSet = FakeTask.createTaskSet(4)
     val tsm = new TaskSetManager(sched, taskSet, 4)
-    // we need a spy so we can attach our mock blacklist
+    // we need a spy so we can attach our mock excludelist
     val tsmSpy = spy(tsm)
-    val blacklist = mock(classOf[TaskSetBlacklist])
-    when(tsmSpy.taskSetBlacklistHelperOpt).thenReturn(Some(blacklist))
+    val excludelist = mock(classOf[TaskSetExcludelist])
+    when(tsmSpy.taskSetExcludelistHelperOpt).thenReturn(Some(excludelist))
 
     // make some offers to our taskset, to get tasks we will fail
     val taskDescs = Seq(
@@ -1392,23 +1392,23 @@ class TaskSetManagerSuite
       TaskCommitDenied(0, 2, 0))
     tsmSpy.handleFailedTask(taskDescs(3).taskId, TaskState.KILLED, TaskKilled("test"))
 
-    // Make sure that the blacklist ignored all of the task failures above, since they aren't
+    // Make sure that the excludelist ignored all of the task failures above, since they aren't
     // the fault of the executor where the task was running.
-    verify(blacklist, never())
-      .updateBlacklistForFailedTask(anyString(), anyString(), anyInt(), anyString())
+    verify(excludelist, never())
+      .updateExcludedForFailedTask(anyString(), anyString(), anyInt(), anyString())
   }
 
-  test("update application blacklist for shuffle-fetch") {
+  test("update application healthTracker for shuffle-fetch") {
     // Setup a taskset, and fail some one task for fetch failure.
     val conf = new SparkConf()
-      .set(config.BLACKLIST_ENABLED, true)
+      .set(config.EXCLUDE_ON_FAILURE_ENABLED, true)
       .set(config.SHUFFLE_SERVICE_ENABLED, true)
-      .set(config.BLACKLIST_FETCH_FAILURE_ENABLED, true)
+      .set(config.EXCLUDE_ON_FAILURE_FETCH_FAILURE_ENABLED, true)
     sc = new SparkContext("local", "test", conf)
     sched = new FakeTaskScheduler(sc, ("exec1", "host1"), ("exec2", "host2"))
     val taskSet = FakeTask.createTaskSet(4)
-    val blacklistTracker = new BlacklistTracker(sc, None)
-    val tsm = new TaskSetManager(sched, taskSet, 4, Some(blacklistTracker))
+    val healthTracker = new HealthTracker(sc, None)
+    val tsm = new TaskSetManager(sched, taskSet, 4, Some(healthTracker))
 
     // make some offers to our taskset, to get tasks we will fail
     val taskDescs = Seq(
@@ -1420,22 +1420,22 @@ class TaskSetManagerSuite
     }
     assert(taskDescs.size === 4)
 
-    assert(!blacklistTracker.isExecutorBlacklisted(taskDescs(0).executorId))
-    assert(!blacklistTracker.isNodeBlacklisted("host1"))
+    assert(!healthTracker.isExecutorExcluded(taskDescs(0).executorId))
+    assert(!healthTracker.isNodeExcluded("host1"))
 
     // Fail the task with fetch failure
     tsm.handleFailedTask(taskDescs(0).taskId, TaskState.FAILED,
       FetchFailed(BlockManagerId(taskDescs(0).executorId, "host1", 12345), 0, 0L, 0, 0, "ignored"))
 
-    assert(blacklistTracker.isNodeBlacklisted("host1"))
+    assert(healthTracker.isNodeExcluded("host1"))
   }
 
-  test("update blacklist before adding pending task to avoid race condition") {
-    // When a task fails, it should apply the blacklist policy prior to
+  test("update healthTracker before adding pending task to avoid race condition") {
+    // When a task fails, it should apply the excludeOnFailure policy prior to
     // retrying the task otherwise there's a race condition where run on
     // the same executor that it was intended to be black listed from.
     val conf = new SparkConf().
-      set(config.BLACKLIST_ENABLED, true)
+      set(config.EXCLUDE_ON_FAILURE_ENABLED, true)
 
     // Create a task with two executors.
     sc = new SparkContext("local", "test", conf)
@@ -1448,8 +1448,8 @@ class TaskSetManagerSuite
 
     val clock = new ManualClock
     val mockListenerBus = mock(classOf[LiveListenerBus])
-    val blacklistTracker = new BlacklistTracker(mockListenerBus, conf, None, clock)
-    val taskSetManager = new TaskSetManager(sched, taskSet, 1, Some(blacklistTracker))
+    val healthTracker = new HealthTracker(mockListenerBus, conf, None, clock)
+    val taskSetManager = new TaskSetManager(sched, taskSet, 1, Some(healthTracker))
     val taskSetManagerSpy = spy(taskSetManager)
 
     val taskDesc = taskSetManagerSpy.resourceOffer(exec, host, TaskLocality.ANY)._1
@@ -1458,8 +1458,8 @@ class TaskSetManagerSuite
     when(taskSetManagerSpy.addPendingTask(anyInt(), anyBoolean(), anyBoolean())).thenAnswer(
       (invocationOnMock: InvocationOnMock) => {
         val task: Int = invocationOnMock.getArgument(0)
-        assert(taskSetManager.taskSetBlacklistHelperOpt.get.
-          isExecutorBlacklistedForTask(exec, task))
+        assert(taskSetManager.taskSetExcludelistHelperOpt.get.
+          isExecutorExcludedForTask(exec, task))
       }
     )
 
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala
index 397fdce8ae6e3..4acb4bbc779c3 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala
@@ -31,7 +31,7 @@ class KryoSerializerDistributedSuite extends SparkFunSuite with LocalSparkContex
       .set(config.SERIALIZER, "org.apache.spark.serializer.KryoSerializer")
       .set(config.Kryo.KRYO_USER_REGISTRATORS, Seq(classOf[AppJarRegistrator].getName))
       .set(config.TASK_MAX_FAILURES, 1)
-      .set(config.BLACKLIST_ENABLED, false)
+      .set(config.EXCLUDE_ON_FAILURE_ENABLED, false)
 
     val jar = TestUtils.createJarWithClasses(List(AppJarRegistrator.customClassName))
     conf.setJars(List(jar.getPath))
diff --git a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
index d5829c352be9b..6ca1109791c35 100644
--- a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
@@ -256,9 +256,9 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
       }
     }
 
-    // Blacklisting executor for stage
+    // Excluding executor for stage
     time += 1
-    listener.onExecutorBlacklistedForStage(SparkListenerExecutorBlacklistedForStage(
+    listener.onExecutorExcludedForStage(SparkListenerExecutorExcludedForStage(
       time = time,
       executorId = execIds.head,
       taskFailures = 2,
@@ -273,18 +273,21 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
 
     assert(executorStageSummaryWrappers.nonEmpty)
     executorStageSummaryWrappers.foreach { exec =>
-      // only the first executor is expected to be blacklisted
-      val expectedBlacklistedFlag = exec.executorId == execIds.head
-      assert(exec.info.isBlacklistedForStage === expectedBlacklistedFlag)
+      // only the first executor is expected to be excluded
+      val expectedExcludedFlag = exec.executorId == execIds.head
+      assert(exec.info.isBlacklistedForStage === expectedExcludedFlag)
+      assert(exec.info.isExcludedForStage === expectedExcludedFlag)
     }
 
     check[ExecutorSummaryWrapper](execIds.head) { exec =>
       assert(exec.info.blacklistedInStages === Set(stages.head.stageId))
+      assert(exec.info.excludedInStages === Set(stages.head.stageId))
+
     }
 
-    // Blacklisting node for stage
+    // Excluding node for stage
     time += 1
-    listener.onNodeBlacklistedForStage(SparkListenerNodeBlacklistedForStage(
+    listener.onNodeExcludedForStage(SparkListenerNodeExcludedForStage(
       time = time,
       hostId = "2.example.com", // this is where the second executor is hosted
       executorFailures = 1,
@@ -299,8 +302,10 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
 
     assert(executorStageSummaryWrappersForNode.nonEmpty)
     executorStageSummaryWrappersForNode.foreach { exec =>
-      // both executor is expected to be blacklisted
+      // both executor is expected to be excluded
       assert(exec.info.isBlacklistedForStage)
+      assert(exec.info.isExcludedForStage)
+
     }
 
     // Fail one of the tasks, re-start it.
@@ -450,6 +455,7 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
 
     check[ExecutorSummaryWrapper](execIds.head) { exec =>
       assert(exec.info.blacklistedInStages === Set())
+      assert(exec.info.excludedInStages === Set())
     }
 
     // Submit stage 2.
@@ -466,9 +472,9 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
       assert(stage.info.submissionTime === Some(new Date(stages.last.submissionTime.get)))
     }
 
-    // Blacklisting node for stage
+    // Excluding node for stage
     time += 1
-    listener.onNodeBlacklistedForStage(SparkListenerNodeBlacklistedForStage(
+    listener.onNodeExcludedForStage(SparkListenerNodeExcludedForStage(
       time = time,
       hostId = "1.example.com",
       executorFailures = 1,
@@ -477,6 +483,7 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
 
     check[ExecutorSummaryWrapper](execIds.head) { exec =>
       assert(exec.info.blacklistedInStages === Set(stages.last.stageId))
+      assert(exec.info.excludedInStages === Set(stages.last.stageId))
     }
 
     // Start and fail all tasks of stage 2.
@@ -628,30 +635,34 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
       assert(job.info.numSkippedTasks === s1Tasks.size)
     }
 
-    // Blacklist an executor.
+    // Exclude an executor.
     time += 1
-    listener.onExecutorBlacklisted(SparkListenerExecutorBlacklisted(time, "1", 42))
+    listener.onExecutorExcluded(SparkListenerExecutorExcluded(time, "1", 42))
     check[ExecutorSummaryWrapper]("1") { exec =>
       assert(exec.info.isBlacklisted)
+      assert(exec.info.isExcluded)
     }
 
     time += 1
-    listener.onExecutorUnblacklisted(SparkListenerExecutorUnblacklisted(time, "1"))
+    listener.onExecutorUnexcluded(SparkListenerExecutorUnexcluded(time, "1"))
     check[ExecutorSummaryWrapper]("1") { exec =>
       assert(!exec.info.isBlacklisted)
+      assert(!exec.info.isExcluded)
     }
 
-    // Blacklist a node.
+    // Exclude a node.
     time += 1
-    listener.onNodeBlacklisted(SparkListenerNodeBlacklisted(time, "1.example.com", 2))
+    listener.onNodeExcluded(SparkListenerNodeExcluded(time, "1.example.com", 2))
     check[ExecutorSummaryWrapper]("1") { exec =>
       assert(exec.info.isBlacklisted)
+      assert(exec.info.isExcluded)
     }
 
     time += 1
-    listener.onNodeUnblacklisted(SparkListenerNodeUnblacklisted(time, "1.example.com"))
+    listener.onNodeUnexcluded(SparkListenerNodeUnexcluded(time, "1.example.com"))
     check[ExecutorSummaryWrapper]("1") { exec =>
       assert(!exec.info.isBlacklisted)
+      assert(!exec.info.isExcluded)
     }
 
     // Stop executors.
diff --git a/core/src/test/scala/org/apache/spark/status/api/v1/ExecutorSummarySuite.scala b/core/src/test/scala/org/apache/spark/status/api/v1/ExecutorSummarySuite.scala
index 286911bdfc19a..541a7821a51fb 100644
--- a/core/src/test/scala/org/apache/spark/status/api/v1/ExecutorSummarySuite.scala
+++ b/core/src/test/scala/org/apache/spark/status/api/v1/ExecutorSummarySuite.scala
@@ -33,7 +33,8 @@ class ExecutorSummarySuite extends SparkFunSuite {
       0, 0, 1, 100,
       1, 100, 100,
       10, false, 20, new Date(1600984336352L),
-      Option.empty, Option.empty, Map(), Option.empty, Set(), Option.empty, Map(), Map(), 1)
+      Option.empty, Option.empty, Map(), Option.empty, Set(), Option.empty, Map(), Map(), 1,
+      false, Set())
     val expectedJson = "{\"id\":\"id\",\"hostPort\":\"host:port\",\"isActive\":true," +
       "\"rddBlocks\":1,\"memoryUsed\":10,\"diskUsed\":10,\"totalCores\":1,\"maxTasks\":1," +
       "\"activeTasks\":1,\"failedTasks\":0,\"completedTasks\":0,\"totalTasks\":1," +
@@ -41,7 +42,8 @@ class ExecutorSummarySuite extends SparkFunSuite {
       "\"totalShuffleRead\":100,\"totalShuffleWrite\":10,\"isBlacklisted\":false," +
       "\"maxMemory\":20,\"addTime\":1600984336352,\"removeTime\":null,\"removeReason\":null," +
       "\"executorLogs\":{},\"memoryMetrics\":null,\"blacklistedInStages\":[]," +
-      "\"peakMemoryMetrics\":null,\"attributes\":{},\"resources\":{},\"resourceProfileId\":1}"
+      "\"peakMemoryMetrics\":null,\"attributes\":{},\"resources\":{},\"resourceProfileId\":1," +
+      "\"isExcluded\":false,\"excludedInStages\":[]}"
     val json = mapper.writeValueAsString(executorSummary)
     assert(expectedJson.equals(json))
     val deserializeExecutorSummary = mapper.readValue(json, new TypeReference[ExecutorSummary] {})
diff --git a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
index 2ae51f425dcb5..4cd1fc19f1484 100644
--- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
@@ -94,12 +94,18 @@ class JsonProtocolSuite extends SparkFunSuite {
     val executorAdded = SparkListenerExecutorAdded(executorAddedTime, "exec1",
       new ExecutorInfo("Hostee.awesome.com", 11, logUrlMap, attributes, resources.toMap, 4))
     val executorRemoved = SparkListenerExecutorRemoved(executorRemovedTime, "exec2", "test reason")
-    val executorBlacklisted = SparkListenerExecutorBlacklisted(executorBlacklistedTime, "exec1", 22)
+    val executorBlacklisted = SparkListenerExecutorBlacklisted(executorExcludedTime, "exec1", 22)
     val executorUnblacklisted =
-      SparkListenerExecutorUnblacklisted(executorUnblacklistedTime, "exec1")
-    val nodeBlacklisted = SparkListenerNodeBlacklisted(nodeBlacklistedTime, "node1", 33)
+      SparkListenerExecutorUnblacklisted(executorUnexcludedTime, "exec1")
+    val nodeBlacklisted = SparkListenerNodeBlacklisted(nodeExcludedTime, "node1", 33)
+    val executorExcluded = SparkListenerExecutorExcluded(executorExcludedTime, "exec1", 22)
+    val executorUnexcluded =
+      SparkListenerExecutorUnexcluded(executorUnexcludedTime, "exec1")
+    val nodeExcluded = SparkListenerNodeExcluded(nodeExcludedTime, "node1", 33)
     val nodeUnblacklisted =
-      SparkListenerNodeUnblacklisted(nodeUnblacklistedTime, "node1")
+      SparkListenerNodeUnblacklisted(nodeUnexcludedTime, "node1")
+    val nodeUnexcluded =
+      SparkListenerNodeUnexcluded(nodeUnexcludedTime, "node1")
     val executorMetricsUpdate = {
       // Use custom accum ID for determinism
       val accumUpdates =
@@ -147,8 +153,12 @@ class JsonProtocolSuite extends SparkFunSuite {
     testEvent(executorRemoved, executorRemovedJsonString)
     testEvent(executorBlacklisted, executorBlacklistedJsonString)
     testEvent(executorUnblacklisted, executorUnblacklistedJsonString)
+    testEvent(executorExcluded, executorExcludedJsonString)
+    testEvent(executorUnexcluded, executorUnexcludedJsonString)
     testEvent(nodeBlacklisted, nodeBlacklistedJsonString)
     testEvent(nodeUnblacklisted, nodeUnblacklistedJsonString)
+    testEvent(nodeExcluded, nodeExcludedJsonString)
+    testEvent(nodeUnexcluded, nodeUnexcludedJsonString)
     testEvent(executorMetricsUpdate, executorMetricsUpdateJsonString)
     testEvent(blockUpdated, blockUpdatedJsonString)
     testEvent(stageExecutorMetrics, stageExecutorMetricsJsonString)
@@ -598,10 +608,10 @@ private[spark] object JsonProtocolSuite extends Assertions {
   private val jobCompletionTime = 1421191296660L
   private val executorAddedTime = 1421458410000L
   private val executorRemovedTime = 1421458922000L
-  private val executorBlacklistedTime = 1421458932000L
-  private val executorUnblacklistedTime = 1421458942000L
-  private val nodeBlacklistedTime = 1421458952000L
-  private val nodeUnblacklistedTime = 1421458962000L
+  private val executorExcludedTime = 1421458932000L
+  private val executorUnexcludedTime = 1421458942000L
+  private val nodeExcludedTime = 1421458952000L
+  private val nodeUnexcludedTime = 1421458962000L
 
   private def testEvent(event: SparkListenerEvent, jsonString: String): Unit = {
     val actualJsonString = compact(render(JsonProtocol.sparkEventToJson(event)))
@@ -2415,36 +2425,70 @@ private[spark] object JsonProtocolSuite extends Assertions {
     s"""
       |{
       |  "Event" : "org.apache.spark.scheduler.SparkListenerExecutorBlacklisted",
-      |  "time" : ${executorBlacklistedTime},
+      |  "time" : ${executorExcludedTime},
       |  "executorId" : "exec1",
       |  "taskFailures" : 22
       |}
     """.stripMargin
+  private val executorExcludedJsonString =
+    s"""
+       |{
+       |  "Event" : "org.apache.spark.scheduler.SparkListenerExecutorExcluded",
+       |  "time" : ${executorExcludedTime},
+       |  "executorId" : "exec1",
+       |  "taskFailures" : 22
+       |}
+    """.stripMargin
   private val executorUnblacklistedJsonString =
     s"""
       |{
       |  "Event" : "org.apache.spark.scheduler.SparkListenerExecutorUnblacklisted",
-      |  "time" : ${executorUnblacklistedTime},
+      |  "time" : ${executorUnexcludedTime},
       |  "executorId" : "exec1"
       |}
     """.stripMargin
+  private val executorUnexcludedJsonString =
+    s"""
+       |{
+       |  "Event" : "org.apache.spark.scheduler.SparkListenerExecutorUnexcluded",
+       |  "time" : ${executorUnexcludedTime},
+       |  "executorId" : "exec1"
+       |}
+    """.stripMargin
   private val nodeBlacklistedJsonString =
     s"""
       |{
       |  "Event" : "org.apache.spark.scheduler.SparkListenerNodeBlacklisted",
-      |  "time" : ${nodeBlacklistedTime},
+      |  "time" : ${nodeExcludedTime},
       |  "hostId" : "node1",
       |  "executorFailures" : 33
       |}
     """.stripMargin
+  private val nodeExcludedJsonString =
+    s"""
+       |{
+       |  "Event" : "org.apache.spark.scheduler.SparkListenerNodeExcluded",
+       |  "time" : ${nodeExcludedTime},
+       |  "hostId" : "node1",
+       |  "executorFailures" : 33
+       |}
+    """.stripMargin
   private val nodeUnblacklistedJsonString =
     s"""
       |{
       |  "Event" : "org.apache.spark.scheduler.SparkListenerNodeUnblacklisted",
-      |  "time" : ${nodeUnblacklistedTime},
+      |  "time" : ${nodeUnexcludedTime},
       |  "hostId" : "node1"
       |}
     """.stripMargin
+  private val nodeUnexcludedJsonString =
+    s"""
+       |{
+       |  "Event" : "org.apache.spark.scheduler.SparkListenerNodeUnexcluded",
+       |  "time" : ${nodeUnexcludedTime},
+       |  "hostId" : "node1"
+       |}
+    """.stripMargin
   private val resourceProfileJsonString =
     """
       |{
diff --git a/docs/configuration.md b/docs/configuration.md
index d825a589dfd31..232ea4079d436 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -2146,113 +2146,113 @@ Apart from these, the following properties are also available, and may be useful
   <td>3.1.0</td>
 </tr>
 <tr>
-  <td><code>spark.scheduler.blacklist.unschedulableTaskSetTimeout</code></td>
+  <td><code>spark.scheduler.excludeOnFailure.unschedulableTaskSetTimeout</code></td>
   <td>120s</td>
   <td>
     The timeout in seconds to wait to acquire a new executor and schedule a task before aborting a
-    TaskSet which is unschedulable because of being completely blacklisted.
+    TaskSet which is unschedulable because all executors are exluded due to task failures.
   </td>
   <td>2.4.1</td>
 </tr>
 <tr>
-  <td><code>spark.blacklist.enabled</code></td>
+  <td><code>spark.excludeOnFailure.enabled</code></td>
   <td>
     false
   </td>
   <td>
-    If set to "true", prevent Spark from scheduling tasks on executors that have been blacklisted
-    due to too many task failures. The blacklisting algorithm can be further controlled by the
-    other "spark.blacklist" configuration options.
+    If set to "true", prevent Spark from scheduling tasks on executors that have been excluded
+    due to too many task failures. The algorithm used to exclude executors and nodes can be further
+    controlled by the other "spark.excludeOnFailure" configuration options.
   </td>
   <td>2.1.0</td>
 </tr>
 <tr>
-  <td><code>spark.blacklist.timeout</code></td>
+  <td><code>spark.excludeOnFailure.timeout</code></td>
   <td>1h</td>
   <td>
-    (Experimental) How long a node or executor is blacklisted for the entire application, before it
-    is unconditionally removed from the blacklist to attempt running new tasks.
+    (Experimental) How long a node or executor is excluded for the entire application, before it
+    is unconditionally removed from the excludelist to attempt running new tasks.
   </td>
   <td>2.1.0</td>
 </tr>
 <tr>
-  <td><code>spark.blacklist.task.maxTaskAttemptsPerExecutor</code></td>
+  <td><code>spark.excludeOnFailure.task.maxTaskAttemptsPerExecutor</code></td>
   <td>1</td>
   <td>
     (Experimental) For a given task, how many times it can be retried on one executor before the
-    executor is blacklisted for that task.
+    executor is excluded for that task.
   </td>
   <td>2.1.0</td>
 </tr>
 <tr>
-  <td><code>spark.blacklist.task.maxTaskAttemptsPerNode</code></td>
+  <td><code>spark.excludeOnFailure.task.maxTaskAttemptsPerNode</code></td>
   <td>2</td>
   <td>
     (Experimental) For a given task, how many times it can be retried on one node, before the entire
-    node is blacklisted for that task.
+    node is excluded for that task.
   </td>
   <td>2.1.0</td>
 </tr>
 <tr>
-  <td><code>spark.blacklist.stage.maxFailedTasksPerExecutor</code></td>
+  <td><code>spark.excludeOnFailure.stage.maxFailedTasksPerExecutor</code></td>
   <td>2</td>
   <td>
     (Experimental) How many different tasks must fail on one executor, within one stage, before the
-    executor is blacklisted for that stage.
+    executor is excluded for that stage.
   </td>
   <td>2.1.0</td>
 </tr>
 <tr>
-  <td><code>spark.blacklist.stage.maxFailedExecutorsPerNode</code></td>
+  <td><code>spark.excludeOnFailure.stage.maxFailedExecutorsPerNode</code></td>
   <td>2</td>
   <td>
-    (Experimental) How many different executors are marked as blacklisted for a given stage, before
+    (Experimental) How many different executors are marked as excluded for a given stage, before
     the entire node is marked as failed for the stage.
   </td>
   <td>2.1.0</td>
 </tr>
 <tr>
-  <td><code>spark.blacklist.application.maxFailedTasksPerExecutor</code></td>
+  <td><code>spark.excludeOnFailure.application.maxFailedTasksPerExecutor</code></td>
   <td>2</td>
   <td>
     (Experimental) How many different tasks must fail on one executor, in successful task sets,
-    before the executor is blacklisted for the entire application.  Blacklisted executors will
+    before the executor is excluded for the entire application.  Excluded executors will
     be automatically added back to the pool of available resources after the timeout specified by
-    <code>spark.blacklist.timeout</code>.  Note that with dynamic allocation, though, the executors
+    <code>spark.excludeOnFailure.timeout</code>.  Note that with dynamic allocation, though, the executors
     may get marked as idle and be reclaimed by the cluster manager.
   </td>
   <td>2.2.0</td>
 </tr>
 <tr>
-  <td><code>spark.blacklist.application.maxFailedExecutorsPerNode</code></td>
+  <td><code>spark.excludeOnFailure.application.maxFailedExecutorsPerNode</code></td>
   <td>2</td>
   <td>
-    (Experimental) How many different executors must be blacklisted for the entire application,
-    before the node is blacklisted for the entire application.  Blacklisted nodes will
+    (Experimental) How many different executors must be excluded for the entire application,
+    before the node is excluded for the entire application.  Excluded nodes will
     be automatically added back to the pool of available resources after the timeout specified by
-    <code>spark.blacklist.timeout</code>.  Note that with dynamic allocation, though, the executors
-    on the node may get marked as idle and be reclaimed by the cluster manager.
+    <code>spark.excludeOnFailure.timeout</code>.  Note that with dynamic allocation, though, the
+    executors on the node may get marked as idle and be reclaimed by the cluster manager.
   </td>
   <td>2.2.0</td>
 </tr>
 <tr>
-  <td><code>spark.blacklist.killBlacklistedExecutors</code></td>
+  <td><code>spark.excludeOnFailure.killExcludedExecutors</code></td>
   <td>false</td>
   <td>
     (Experimental) If set to "true", allow Spark to automatically kill the executors 
-    when they are blacklisted on fetch failure or blacklisted for the entire application, 
-    as controlled by spark.blacklist.application.*. Note that, when an entire node is added 
-    to the blacklist, all of the executors on that node will be killed.
+    when they are excluded on fetch failure or excluded for the entire application, 
+    as controlled by spark.killExcludedExecutors.application.*. Note that, when an entire node is added 
+    excluded, all of the executors on that node will be killed.
   </td>
   <td>2.2.0</td>
 </tr>
 <tr>
-  <td><code>spark.blacklist.application.fetchFailure.enabled</code></td>
+  <td><code>spark.excludeOnFailure.application.fetchFailure.enabled</code></td>
   <td>false</td>
   <td>
-    (Experimental) If set to "true", Spark will blacklist the executor immediately when a fetch
+    (Experimental) If set to "true", Spark will exclude the executor immediately when a fetch
     failure happens. If external shuffle service is enabled, then the whole node will be
-    blacklisted.
+    excluded.
   </td>
   <td>2.3.0</td>
 </tr>
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 97948f6fac4d9..3513fed7b3d78 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -1125,12 +1125,14 @@ This is the component with the largest amount of instrumented metrics
   - stages.failedStages.count
   - stages.skippedStages.count
   - stages.completedStages.count
-  - tasks.blackListedExecutors.count
+  - tasks.blackListedExecutors.count // deprecated use excludedExecutors instead
+  - tasks.excludedExecutors.count
   - tasks.completedTasks.count
   - tasks.failedTasks.count
   - tasks.killedTasks.count
   - tasks.skippedTasks.count
-  - tasks.unblackListedExecutors.count
+  - tasks.unblackListedExecutors.count // deprecated use unexcludedExecutors instead
+  - tasks.unexcludedExecutors.count
   - jobs.succeededJobs
   - jobs.failedJobs
   - jobDuration
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 6f7aaf2baeccd..5e8eb48093c8a 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -551,12 +551,12 @@ To use a custom metrics.properties for the application master and executors, upd
   <td>2.0.0</td>
 </tr>
 <tr>
-  <td><code>spark.yarn.blacklist.executor.launch.blacklisting.enabled</code></td>
+  <td><code>spark.yarn.executor.launch.excludeOnFailure.enabled</code></td>
   <td>false</td>
   <td>
-  Flag to enable blacklisting of nodes having YARN resource allocation problems.
-  The error limit for blacklisting can be configured by
-  <code>spark.blacklist.application.maxFailedExecutorsPerNode</code>.
+  Flag to enable exclusion of nodes having YARN resource allocation problems.
+  The error limit for excluding can be configured by
+  <code>spark.excludeOnFailure.application.maxFailedExecutorsPerNode</code>.
   </td>
   <td>2.4.0</td>
 </tr>
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
index 5655ef50d214f..4ea22ebd93eef 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
@@ -185,7 +185,7 @@ private[spark] class KubernetesClusterSchedulerBackend(
     Some(new HadoopDelegationTokenManager(conf, sc.hadoopConfiguration, driverEndpoint))
   }
 
-  override protected def isBlacklisted(executorId: String, hostname: String): Boolean = {
+  override protected def isExecutorExcluded(executorId: String, hostname: String): Boolean = {
     podAllocator.isDeleted(executorId)
   }
 
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
index 32cd50298bc6c..bbe1ff495d8a6 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
@@ -63,7 +63,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
   with MesosScheduler
   with MesosSchedulerUtils {
 
-  // Blacklist a agent after this many failures
+  // Exclude an agent after this many failures
   private val MAX_AGENT_FAILURES = 2
 
   private val maxCoresOption = conf.get(config.CORES_MAX)
@@ -667,12 +667,12 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
           totalGpusAcquired -= gpus
           gpusByTaskId -= taskId
         }
-        // If it was a failure, mark the agent as failed for blacklisting purposes
+        // If it was a failure, mark the agent as failed for excluding purposes
         if (TaskState.isFailed(state)) {
           agent.taskFailures += 1
 
           if (agent.taskFailures >= MAX_AGENT_FAILURES) {
-            logInfo(s"Blacklisting Mesos agent $agentId due to too many failures; " +
+            logInfo(s"Excluding Mesos agent $agentId due to too many failures; " +
                 "is Spark installed on it?")
           }
         }
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
index 4d7f6441020b7..2b7272a490376 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
@@ -833,7 +833,7 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
     when(driver.start()).thenReturn(Protos.Status.DRIVER_RUNNING)
 
     taskScheduler = mock[TaskSchedulerImpl]
-    when(taskScheduler.nodeBlacklist).thenReturn(Set[String]())
+    when(taskScheduler.excludedNodes).thenReturn(Set[String]())
     when(taskScheduler.sc).thenReturn(sc)
 
     externalShuffleClient = mock[MesosExternalBlockStoreClient]
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 9b99e8ff9265c..e23773229c560 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -567,10 +567,10 @@ private[spark] class ApplicationMaster(
           finish(FinalApplicationStatus.FAILED,
             ApplicationMaster.EXIT_MAX_EXECUTOR_FAILURES,
             s"Max number of executor failures ($maxNumExecutorFailures) reached")
-        } else if (allocator.isAllNodeBlacklisted) {
+        } else if (allocator.isAllNodeExcluded) {
           finish(FinalApplicationStatus.FAILED,
             ApplicationMaster.EXIT_MAX_EXECUTOR_FAILURES,
-            "Due to executor failures all available nodes are blacklisted")
+            "Due to executor failures all available nodes are excluded")
         } else {
           logDebug("Sending progress")
           allocator.allocateResources()
@@ -792,7 +792,7 @@ private[spark] class ApplicationMaster(
               r.resourceProfileToTotalExecs,
               r.numLocalityAwareTasksPerResourceProfileId,
               r.hostToLocalTaskCount,
-              r.nodeBlacklist)) {
+              r.excludedNodes)) {
               resetAllocatorInterval()
             }
             context.reply(true)
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index adbbbc01a0bd5..ef01a2ad95483 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -159,8 +159,8 @@ private[yarn] class YarnAllocator(
 
   private[spark] val failureTracker = new FailureTracker(sparkConf, clock)
 
-  private val allocatorBlacklistTracker =
-    new YarnAllocatorBlacklistTracker(sparkConf, amClient, failureTracker)
+  private val allocatorNodeHealthTracker =
+    new YarnAllocatorNodeHealthTracker(sparkConf, amClient, failureTracker)
 
   // Executor memory in MiB.
   protected val executorMemory = sparkConf.get(EXECUTOR_MEMORY).toInt
@@ -238,7 +238,7 @@ private[yarn] class YarnAllocator(
 
   def getNumExecutorsFailed: Int = failureTracker.numFailedExecutors
 
-  def isAllNodeBlacklisted: Boolean = allocatorBlacklistTracker.isAllNodeBlacklisted
+  def isAllNodeExcluded: Boolean = allocatorNodeHealthTracker.isAllNodeExcluded
 
   /**
    * A sequence of pending container requests that have not yet been fulfilled.
@@ -358,15 +358,15 @@ private[yarn] class YarnAllocator(
    *                                                  placement hint.
    * @param hostToLocalTaskCount a map of preferred hostname to possible task counts for each
    *                             ResourceProfile id to be used as container placement hint.
-   * @param nodeBlacklist blacklisted nodes, which is passed in to avoid allocating new containers
-   *                      on them. It will be used to update the application master's blacklist.
+   * @param excludedNodes excluded nodes, which is passed in to avoid allocating new containers
+   *                      on them. It will be used to update the applications excluded node list.
    * @return Whether the new requested total is different than the old value.
    */
   def requestTotalExecutorsWithPreferredLocalities(
       resourceProfileToTotalExecs: Map[ResourceProfile, Int],
       numLocalityAwareTasksPerResourceProfileId: Map[Int, Int],
       hostToLocalTaskCountPerResourceProfileId: Map[Int, Map[String, Int]],
-      nodeBlacklist: Set[String]): Boolean = synchronized {
+      excludedNodes: Set[String]): Boolean = synchronized {
     this.numLocalityAwareTasksPerResourceProfileId = numLocalityAwareTasksPerResourceProfileId
     this.hostToLocalTaskCountPerResourceProfileId = hostToLocalTaskCountPerResourceProfileId
 
@@ -377,7 +377,7 @@ private[yarn] class YarnAllocator(
         logInfo(s"Driver requested a total number of $numExecs executor(s) " +
           s"for resource profile id: ${rp.id}.")
         targetNumExecutorsPerResourceProfileId(rp.id) = numExecs
-        allocatorBlacklistTracker.setSchedulerBlacklistedNodes(nodeBlacklist)
+        allocatorNodeHealthTracker.setSchedulerExcludedNodes(excludedNodes)
         true
       } else {
         false
@@ -416,7 +416,7 @@ private[yarn] class YarnAllocator(
     val allocateResponse = amClient.allocate(progressIndicator)
 
     val allocatedContainers = allocateResponse.getAllocatedContainers()
-    allocatorBlacklistTracker.setNumClusterNodes(allocateResponse.getNumClusterNodes)
+    allocatorNodeHealthTracker.setNumClusterNodes(allocateResponse.getNumClusterNodes)
 
     if (allocatedContainers.size > 0) {
       logDebug(("Allocated containers: %d. Current executor count: %d. " +
@@ -827,7 +827,7 @@ private[yarn] class YarnAllocator(
               s"$diag Consider boosting ${EXECUTOR_MEMORY_OVERHEAD.key}."
             (true, message)
           case other_exit_status =>
-            // SPARK-26269: follow YARN's blacklisting behaviour(see https://github
+            // SPARK-26269: follow YARN's behaviour(see https://github
             // .com/apache/hadoop/blob/228156cfd1b474988bc4fedfbf7edddc87db41e3/had
             // oop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/ap
             // ache/hadoop/yarn/util/Apps.java#L273 for details)
@@ -837,7 +837,7 @@ private[yarn] class YarnAllocator(
                 s". Diagnostics: ${completedContainer.getDiagnostics}.")
             } else {
               // completed container from a bad node
-              allocatorBlacklistTracker.handleResourceAllocationFailure(hostOpt)
+              allocatorNodeHealthTracker.handleResourceAllocationFailure(hostOpt)
               (true, s"Container from a bad node: $containerId$onHostStr" +
                 s". Exit status: ${completedContainer.getExitStatus}" +
                 s". Diagnostics: ${completedContainer.getDiagnostics}.")
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTracker.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorNodeHealthTracker.scala
similarity index 63%
rename from resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTracker.scala
rename to resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorNodeHealthTracker.scala
index 339d3715a7316..de9e190361428 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTracker.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorNodeHealthTracker.scala
@@ -27,42 +27,43 @@ import org.apache.spark.SparkConf
 import org.apache.spark.deploy.yarn.config._
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
-import org.apache.spark.scheduler.BlacklistTracker
+import org.apache.spark.scheduler.HealthTracker
 import org.apache.spark.util.{Clock, SystemClock}
 
 /**
- * YarnAllocatorBlacklistTracker is responsible for tracking the blacklisted nodes
- * and synchronizing the node list to YARN.
+ * YarnAllocatorNodeHealthTracker is responsible for tracking the health of nodes
+ * and synchronizing the node list to YARN as to which nodes are excluded.
  *
- * Blacklisted nodes are coming from two different sources:
+ * Excluding nodes are coming from two different sources:
  *
  * <ul>
- *   <li> from the scheduler as task level blacklisted nodes
+ *   <li> from the scheduler as task level excluded nodes
  *   <li> from this class (tracked here) as YARN resource allocation problems
  * </ul>
  *
  * The reason to realize this logic here (and not in the driver) is to avoid possible delays
- * between synchronizing the blacklisted nodes with YARN and resource allocations.
+ * between synchronizing the excluded nodes with YARN and resource allocations.
  */
-private[spark] class YarnAllocatorBlacklistTracker(
+private[spark] class YarnAllocatorNodeHealthTracker(
     sparkConf: SparkConf,
     amClient: AMRMClient[ContainerRequest],
     failureTracker: FailureTracker)
   extends Logging {
 
-  private val blacklistTimeoutMillis = BlacklistTracker.getBlacklistTimeout(sparkConf)
+  private val excludeOnFailureTimeoutMillis = HealthTracker.getExludeOnFailureTimeout(sparkConf)
 
-  private val launchBlacklistEnabled = sparkConf.get(YARN_EXECUTOR_LAUNCH_BLACKLIST_ENABLED)
+  private val launchExcludeOnFailureEnabled =
+    sparkConf.get(YARN_EXECUTOR_LAUNCH_EXCLUDE_ON_FAILURE_ENABLED)
 
   private val maxFailuresPerHost = sparkConf.get(MAX_FAILED_EXEC_PER_NODE)
 
   private val excludeNodes = sparkConf.get(YARN_EXCLUDE_NODES).toSet
 
-  private val allocatorBlacklist = new HashMap[String, Long]()
+  private val allocatorExcludedNodeList = new HashMap[String, Long]()
 
-  private var currentBlacklistedYarnNodes = Set.empty[String]
+  private var currentExcludededYarnNodes = Set.empty[String]
 
-  private var schedulerBlacklist = Set.empty[String]
+  private var schedulerExcludedNodeList = Set.empty[String]
 
   private var numClusterNodes = Int.MaxValue
 
@@ -72,72 +73,76 @@ private[spark] class YarnAllocatorBlacklistTracker(
 
   def handleResourceAllocationFailure(hostOpt: Option[String]): Unit = {
     hostOpt match {
-      case Some(hostname) if launchBlacklistEnabled =>
-        // failures on an already blacklisted nodes are not even tracked.
+      case Some(hostname) if launchExcludeOnFailureEnabled =>
+        // failures on an already excluded node are not even tracked.
         // otherwise, such failures could shutdown the application
         // as resource requests are asynchronous
         // and a late failure response could exceed MAX_EXECUTOR_FAILURES
-        if (!schedulerBlacklist.contains(hostname) &&
-            !allocatorBlacklist.contains(hostname)) {
+        if (!schedulerExcludedNodeList.contains(hostname) &&
+            !allocatorExcludedNodeList.contains(hostname)) {
           failureTracker.registerFailureOnHost(hostname)
-          updateAllocationBlacklistedNodes(hostname)
+          updateAllocationExcludedNodes(hostname)
         }
       case _ =>
         failureTracker.registerExecutorFailure()
     }
   }
 
-  private def updateAllocationBlacklistedNodes(hostname: String): Unit = {
+  private def updateAllocationExcludedNodes(hostname: String): Unit = {
     val failuresOnHost = failureTracker.numFailuresOnHost(hostname)
     if (failuresOnHost > maxFailuresPerHost) {
-      logInfo(s"blacklisting $hostname as YARN allocation failed $failuresOnHost times")
-      allocatorBlacklist.put(
+      logInfo(s"excluding $hostname as YARN allocation failed $failuresOnHost times")
+      allocatorExcludedNodeList.put(
         hostname,
-        failureTracker.clock.getTimeMillis() + blacklistTimeoutMillis)
-      refreshBlacklistedNodes()
+        failureTracker.clock.getTimeMillis() + excludeOnFailureTimeoutMillis)
+      refreshExcludedNodes()
     }
   }
 
-  def setSchedulerBlacklistedNodes(schedulerBlacklistedNodesWithExpiry: Set[String]): Unit = {
-    this.schedulerBlacklist = schedulerBlacklistedNodesWithExpiry
-    refreshBlacklistedNodes()
+  def setSchedulerExcludedNodes(schedulerExcludedNodesWithExpiry: Set[String]): Unit = {
+    this.schedulerExcludedNodeList = schedulerExcludedNodesWithExpiry
+    refreshExcludedNodes()
   }
 
-  def isAllNodeBlacklisted: Boolean = {
+  def isAllNodeExcluded: Boolean = {
     if (numClusterNodes <= 0) {
       logWarning("No available nodes reported, please check Resource Manager.")
       false
     } else {
-      currentBlacklistedYarnNodes.size >= numClusterNodes
+      currentExcludededYarnNodes.size >= numClusterNodes
     }
   }
 
-  private def refreshBlacklistedNodes(): Unit = {
-    removeExpiredYarnBlacklistedNodes()
-    val allBlacklistedNodes = excludeNodes ++ schedulerBlacklist ++ allocatorBlacklist.keySet
-    synchronizeBlacklistedNodeWithYarn(allBlacklistedNodes)
+  private def refreshExcludedNodes(): Unit = {
+    removeExpiredYarnExcludedNodes()
+    val allExcludedNodes =
+      excludeNodes ++ schedulerExcludedNodeList ++ allocatorExcludedNodeList.keySet
+    synchronizeExcludedNodesWithYarn(allExcludedNodes)
   }
 
-  private def synchronizeBlacklistedNodeWithYarn(nodesToBlacklist: Set[String]): Unit = {
-    // Update blacklist information to YARN ResourceManager for this application,
+  private def synchronizeExcludedNodesWithYarn(nodesToExclude: Set[String]): Unit = {
+    // Update YARN with the nodes that are excluded for this application,
     // in order to avoid allocating new Containers on the problematic nodes.
-    val additions = (nodesToBlacklist -- currentBlacklistedYarnNodes).toList.sorted
-    val removals = (currentBlacklistedYarnNodes -- nodesToBlacklist).toList.sorted
+    val additions = (nodesToExclude -- currentExcludededYarnNodes).toList.sorted
+    val removals = (currentExcludededYarnNodes -- nodesToExclude).toList.sorted
     if (additions.nonEmpty) {
-      logInfo(s"adding nodes to YARN application master's blacklist: $additions")
+      logInfo(s"adding nodes to YARN application master's excluded node list: $additions")
     }
     if (removals.nonEmpty) {
-      logInfo(s"removing nodes from YARN application master's blacklist: $removals")
+      logInfo(s"removing nodes from YARN application master's excluded node list: $removals")
     }
     if (additions.nonEmpty || removals.nonEmpty) {
+      // Note YARNs api for excluding nodes is updateBlacklist.
+      // TODO - We need to update once Hadoop changes -
+      // https://issues.apache.org/jira/browse/HADOOP-17169
       amClient.updateBlacklist(additions.asJava, removals.asJava)
     }
-    currentBlacklistedYarnNodes = nodesToBlacklist
+    currentExcludededYarnNodes = nodesToExclude
   }
 
-  private def removeExpiredYarnBlacklistedNodes(): Unit = {
+  private def removeExpiredYarnExcludedNodes(): Unit = {
     val now = failureTracker.clock.getTimeMillis()
-    allocatorBlacklist.retain { (_, expiryTime) => expiryTime > now }
+    allocatorExcludedNodeList.retain { (_, expiryTime) => expiryTime > now }
   }
 }
 
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
index 1b0bf295db499..f2e838f6270c9 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
@@ -379,14 +379,15 @@ package object config extends Logging {
     .stringConf
     .createOptional
 
-  /* YARN allocator-level blacklisting related config entries. */
-  private[spark] val YARN_EXECUTOR_LAUNCH_BLACKLIST_ENABLED =
-    ConfigBuilder("spark.yarn.blacklist.executor.launch.blacklisting.enabled")
-      .version("2.4.0")
+  /* YARN allocator-level excludeOnFailure related config entries. */
+  private[spark] val YARN_EXECUTOR_LAUNCH_EXCLUDE_ON_FAILURE_ENABLED =
+    ConfigBuilder("spark.yarn.executor.launch.excludeOnFailure.enabled")
+      .version("3.1.0")
+      .withAlternative("spark.yarn.blacklist.executor.launch.blacklisting.enabled")
       .booleanConf
       .createWithDefault(false)
 
-  /* Initially blacklisted YARN nodes. */
+  /* Initially excluded YARN nodes. */
   private[spark] val YARN_EXCLUDE_NODES = ConfigBuilder("spark.yarn.exclude.nodes")
     .version("3.0.0")
     .stringConf
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
index 3f2e8846e85b3..b42bdb9816600 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
@@ -132,13 +132,13 @@ private[spark] abstract class YarnSchedulerBackend(
 
   private[cluster] def prepareRequestExecutors(
       resourceProfileToTotalExecs: Map[ResourceProfile, Int]): RequestExecutors = {
-    val nodeBlacklist: Set[String] = scheduler.nodeBlacklist()
-    // For locality preferences, ignore preferences for nodes that are blacklisted
+    val excludedNodes: Set[String] = scheduler.excludedNodes()
+    // For locality preferences, ignore preferences for nodes that are excluded
     val filteredRPHostToLocalTaskCount = rpHostToLocalTaskCount.map { case (rpid, v) =>
-      (rpid, v.filter { case (host, count) => !nodeBlacklist.contains(host) })
+      (rpid, v.filter { case (host, count) => !excludedNodes.contains(host) })
     }
     RequestExecutors(resourceProfileToTotalExecs, numLocalityAwareTasksPerResourceProfileId,
-      filteredRPHostToLocalTaskCount, nodeBlacklist)
+      filteredRPHostToLocalTaskCount, excludedNodes)
   }
 
   /**
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTrackerSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorHealthTrackerSuite.scala
similarity index 54%
rename from resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTrackerSuite.scala
rename to resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorHealthTrackerSuite.scala
index 97615f5c936b0..c2fd5ff316592 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorBlacklistTrackerSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorHealthTrackerSuite.scala
@@ -26,14 +26,14 @@ import org.scalatest.BeforeAndAfterEach
 import org.scalatest.matchers.must.Matchers
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.deploy.yarn.config.{YARN_EXCLUDE_NODES, YARN_EXECUTOR_LAUNCH_BLACKLIST_ENABLED}
-import org.apache.spark.internal.config.{BLACKLIST_TIMEOUT_CONF, MAX_FAILED_EXEC_PER_NODE}
+import org.apache.spark.deploy.yarn.config.{YARN_EXCLUDE_NODES, YARN_EXECUTOR_LAUNCH_EXCLUDE_ON_FAILURE_ENABLED}
+import org.apache.spark.internal.config.{EXCLUDE_ON_FAILURE_TIMEOUT_CONF, MAX_FAILED_EXEC_PER_NODE}
 import org.apache.spark.util.ManualClock
 
-class YarnAllocatorBlacklistTrackerSuite extends SparkFunSuite with Matchers
+class YarnAllocatorHealthTrackerSuite extends SparkFunSuite with Matchers
   with BeforeAndAfterEach {
 
-  val BLACKLIST_TIMEOUT = 100L
+  val EXCLUDE_TIMEOUT = 100L
   val MAX_FAILED_EXEC_PER_NODE_VALUE = 2
 
   var sparkConf: SparkConf = _
@@ -42,117 +42,117 @@ class YarnAllocatorBlacklistTrackerSuite extends SparkFunSuite with Matchers
 
   override def beforeEach(): Unit = {
     sparkConf = new SparkConf()
-    sparkConf.set(BLACKLIST_TIMEOUT_CONF, BLACKLIST_TIMEOUT)
-    sparkConf.set(YARN_EXECUTOR_LAUNCH_BLACKLIST_ENABLED, true)
+    sparkConf.set(EXCLUDE_ON_FAILURE_TIMEOUT_CONF, EXCLUDE_TIMEOUT)
+    sparkConf.set(YARN_EXECUTOR_LAUNCH_EXCLUDE_ON_FAILURE_ENABLED, true)
     sparkConf.set(MAX_FAILED_EXEC_PER_NODE, MAX_FAILED_EXEC_PER_NODE_VALUE)
     clock = new ManualClock()
     amClientMock = mock(classOf[AMRMClient[ContainerRequest]])
     super.beforeEach()
   }
 
-  private def createYarnAllocatorBlacklistTracker(
-      sparkConf: SparkConf = sparkConf): YarnAllocatorBlacklistTracker = {
+  private def createYarnAllocatorHealthTracker(
+      sparkConf: SparkConf = sparkConf): YarnAllocatorNodeHealthTracker = {
     val failureTracker = new FailureTracker(sparkConf, clock)
-    val yarnBlacklistTracker =
-      new YarnAllocatorBlacklistTracker(sparkConf, amClientMock, failureTracker)
-    yarnBlacklistTracker.setNumClusterNodes(4)
-    yarnBlacklistTracker
+    val yarnHealthTracker =
+      new YarnAllocatorNodeHealthTracker(sparkConf, amClientMock, failureTracker)
+    yarnHealthTracker.setNumClusterNodes(4)
+    yarnHealthTracker
   }
 
-  test("expiring its own blacklisted nodes") {
-    val yarnBlacklistTracker = createYarnAllocatorBlacklistTracker()
+  test("expiring its own excluded nodes") {
+    val yarnHealthTracker = createYarnAllocatorHealthTracker()
     (1 to MAX_FAILED_EXEC_PER_NODE_VALUE).foreach {
       _ => {
-        yarnBlacklistTracker.handleResourceAllocationFailure(Some("host"))
-        // host should not be blacklisted at these failures as MAX_FAILED_EXEC_PER_NODE is 2
+        yarnHealthTracker.handleResourceAllocationFailure(Some("host"))
+        // host should not be excluded at these failures as MAX_FAILED_EXEC_PER_NODE is 2
         verify(amClientMock, never())
           .updateBlacklist(Arrays.asList("host"), Collections.emptyList())
       }
     }
 
-    yarnBlacklistTracker.handleResourceAllocationFailure(Some("host"))
-    // the third failure on the host triggers the blacklisting
+    yarnHealthTracker.handleResourceAllocationFailure(Some("host"))
+    // the third failure on the host triggers the exclusion
     verify(amClientMock).updateBlacklist(Arrays.asList("host"), Collections.emptyList())
 
-    clock.advance(BLACKLIST_TIMEOUT)
+    clock.advance(EXCLUDE_TIMEOUT)
 
-    // trigger synchronisation of blacklisted nodes with YARN
-    yarnBlacklistTracker.setSchedulerBlacklistedNodes(Set())
+    // trigger synchronisation of excluded nodes with YARN
+    yarnHealthTracker.setSchedulerExcludedNodes(Set())
     verify(amClientMock).updateBlacklist(Collections.emptyList(), Arrays.asList("host"))
   }
 
-  test("not handling the expiry of scheduler blacklisted nodes") {
-    val yarnBlacklistTracker = createYarnAllocatorBlacklistTracker()
+  test("not handling the expiry of scheduler excluded nodes") {
+    val yarnHealthTracker = createYarnAllocatorHealthTracker()
 
-    yarnBlacklistTracker.setSchedulerBlacklistedNodes(Set("host1", "host2"))
+    yarnHealthTracker.setSchedulerExcludedNodes(Set("host1", "host2"))
     verify(amClientMock)
       .updateBlacklist(Arrays.asList("host1", "host2"), Collections.emptyList())
 
     // advance timer more then host1, host2 expiry time
     clock.advance(200L)
 
-    // expired blacklisted nodes (simulating a resource request)
-    yarnBlacklistTracker.setSchedulerBlacklistedNodes(Set("host1", "host2"))
-    // no change is communicated to YARN regarding the blacklisting
+    // expired excluded nodes (simulating a resource request)
+    yarnHealthTracker.setSchedulerExcludedNodes(Set("host1", "host2"))
+    // no change is communicated to YARN regarding the exclusion
     verify(amClientMock, times(0)).updateBlacklist(Collections.emptyList(), Collections.emptyList())
   }
 
-  test("combining scheduler and allocation blacklist") {
+  test("combining scheduler and allocation excluded node list") {
     sparkConf.set(YARN_EXCLUDE_NODES, Seq("initial1", "initial2"))
-    val yarnBlacklistTracker = createYarnAllocatorBlacklistTracker(sparkConf)
-    yarnBlacklistTracker.setSchedulerBlacklistedNodes(Set())
+    val yarnHealthTracker = createYarnAllocatorHealthTracker(sparkConf)
+    yarnHealthTracker.setSchedulerExcludedNodes(Set())
 
-    // initial1 and initial2 is added as blacklisted nodes at the very first updateBlacklist call
+    // initial1 and initial2 is added as excluded nodes at the very first updateBlacklist call
     // and they are never removed
     verify(amClientMock)
       .updateBlacklist(Arrays.asList("initial1", "initial2"), Collections.emptyList())
 
     (1 to MAX_FAILED_EXEC_PER_NODE_VALUE).foreach {
       _ => {
-        yarnBlacklistTracker.handleResourceAllocationFailure(Some("host1"))
-        // host1 should not be blacklisted at these failures as MAX_FAILED_EXEC_PER_NODE is 2
+        yarnHealthTracker.handleResourceAllocationFailure(Some("host1"))
+        // host1 should not be excluded at these failures as MAX_FAILED_EXEC_PER_NODE is 2
         verify(amClientMock, never())
           .updateBlacklist(Arrays.asList("host1"), Collections.emptyList())
       }
     }
 
-    // as this is the third failure on host1 the node will be blacklisted
-    yarnBlacklistTracker.handleResourceAllocationFailure(Some("host1"))
+    // as this is the third failure on host1 the node will be excluded
+    yarnHealthTracker.handleResourceAllocationFailure(Some("host1"))
     verify(amClientMock)
       .updateBlacklist(Arrays.asList("host1"), Collections.emptyList())
 
-    yarnBlacklistTracker.setSchedulerBlacklistedNodes(Set("host2", "host3"))
+    yarnHealthTracker.setSchedulerExcludedNodes(Set("host2", "host3"))
     verify(amClientMock)
       .updateBlacklist(Arrays.asList("host2", "host3"), Collections.emptyList())
 
     clock.advance(10L)
 
-    yarnBlacklistTracker.setSchedulerBlacklistedNodes(Set("host3", "host4"))
+    yarnHealthTracker.setSchedulerExcludedNodes(Set("host3", "host4"))
     verify(amClientMock)
       .updateBlacklist(Arrays.asList("host4"), Arrays.asList("host2"))
   }
 
-  test("blacklist all available nodes") {
-    val yarnBlacklistTracker = createYarnAllocatorBlacklistTracker()
-    yarnBlacklistTracker.setSchedulerBlacklistedNodes(Set("host1", "host2", "host3"))
+  test("exclude all available nodes") {
+    val yarnHealthTracker = createYarnAllocatorHealthTracker()
+    yarnHealthTracker.setSchedulerExcludedNodes(Set("host1", "host2", "host3"))
     verify(amClientMock)
       .updateBlacklist(Arrays.asList("host1", "host2", "host3"), Collections.emptyList())
 
     clock.advance(60L)
     (1 to MAX_FAILED_EXEC_PER_NODE_VALUE).foreach {
       _ => {
-        yarnBlacklistTracker.handleResourceAllocationFailure(Some("host4"))
-        // host4 should not be blacklisted at these failures as MAX_FAILED_EXEC_PER_NODE is 2
+        yarnHealthTracker.handleResourceAllocationFailure(Some("host4"))
+        // host4 should not be excluded at these failures as MAX_FAILED_EXEC_PER_NODE is 2
         verify(amClientMock, never())
           .updateBlacklist(Arrays.asList("host4"), Collections.emptyList())
       }
     }
 
-    // the third failure on the host triggers the blacklisting
-    yarnBlacklistTracker.handleResourceAllocationFailure(Some("host4"))
+    // the third failure on the host triggers the exclusion
+    yarnHealthTracker.handleResourceAllocationFailure(Some("host4"))
 
     verify(amClientMock).updateBlacklist(Arrays.asList("host4"), Collections.emptyList())
-    assert(yarnBlacklistTracker.isAllNodeBlacklisted)
+    assert(yarnHealthTracker.isAllNodeExcluded)
   }
 
 }
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
index 63e2b97e0ecab..6b5c72ad7f7aa 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
@@ -523,9 +523,10 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
     handler.getNumUnexpectedContainerRelease should be (2)
   }
 
-  test("blacklisted nodes reflected in amClient requests") {
-    // Internally we track the set of blacklisted nodes, but yarn wants us to send *changes*
-    // to the blacklist.  This makes sure we are sending the right updates.
+  test("excluded nodes reflected in amClient requests") {
+    // Internally we track the set of excluded nodes, but yarn wants us to send *changes*
+    // to it. Note the YARN api uses the term blacklist for excluded nodes.
+    // This makes sure we are sending the right updates.
     val mockAmClient = mock(classOf[AMRMClient[ContainerRequest]])
     val (handler, _) = createAllocator(4, mockAmClient)
     val resourceProfileToTotalExecs = mutable.HashMap(defaultRP -> 1)
@@ -534,14 +535,14 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
       numLocalityAwareTasksPerResourceProfileId.toMap, Map(), Set("hostA"))
     verify(mockAmClient).updateBlacklist(Seq("hostA").asJava, Seq[String]().asJava)
 
-    val blacklistedNodes = Set(
+    val excludedNodes = Set(
       "hostA",
       "hostB"
     )
 
     resourceProfileToTotalExecs(defaultRP) = 2
     handler.requestTotalExecutorsWithPreferredLocalities(resourceProfileToTotalExecs.toMap,
-      numLocalityAwareTasksPerResourceProfileId.toMap, Map(), blacklistedNodes)
+      numLocalityAwareTasksPerResourceProfileId.toMap, Map(), excludedNodes)
     verify(mockAmClient).updateBlacklist(Seq("hostB").asJava, Seq[String]().asJava)
     resourceProfileToTotalExecs(defaultRP) = 3
     handler.requestTotalExecutorsWithPreferredLocalities(resourceProfileToTotalExecs.toMap,
@@ -592,7 +593,7 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
     handler.getNumExecutorsFailed should be (0)
   }
 
-  test("SPARK-26269: YarnAllocator should have same blacklist behaviour with YARN") {
+  test("SPARK-26269: YarnAllocator should have same excludeOnFailure behaviour with YARN") {
     val rmClientSpy = spy(rmClient)
     val maxExecutors = 11
 
@@ -600,7 +601,7 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
       maxExecutors,
       rmClientSpy,
       Map(
-        YARN_EXECUTOR_LAUNCH_BLACKLIST_ENABLED.key -> "true",
+        YARN_EXECUTOR_LAUNCH_EXCLUDE_ON_FAILURE_ENABLED.key -> "true",
         MAX_FAILED_EXEC_PER_NODE.key -> "0"))
     handler.updateResourceRequests()
 
@@ -608,7 +609,7 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
     val ids = 0 to maxExecutors
     val containers = createContainers(hosts, ids)
 
-    val nonBlacklistedStatuses = Seq(
+    val nonExcludedStatuses = Seq(
       ContainerExitStatus.SUCCESS,
       ContainerExitStatus.PREEMPTED,
       ContainerExitStatus.KILLED_EXCEEDED_VMEM,
@@ -619,24 +620,24 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
       ContainerExitStatus.ABORTED,
       ContainerExitStatus.DISKS_FAILED)
 
-    val nonBlacklistedContainerStatuses = nonBlacklistedStatuses.zipWithIndex.map {
+    val nonExcludedContainerStatuses = nonExcludedStatuses.zipWithIndex.map {
       case (exitStatus, idx) => createContainerStatus(containers(idx).getId, exitStatus)
     }
 
-    val BLACKLISTED_EXIT_CODE = 1
-    val blacklistedStatuses = Seq(ContainerExitStatus.INVALID, BLACKLISTED_EXIT_CODE)
+    val EXCLUDED_EXIT_CODE = 1
+    val excludedStatuses = Seq(ContainerExitStatus.INVALID, EXCLUDED_EXIT_CODE)
 
-    val blacklistedContainerStatuses = blacklistedStatuses.zip(9 until maxExecutors).map {
+    val excludedContainerStatuses = excludedStatuses.zip(9 until maxExecutors).map {
       case (exitStatus, idx) => createContainerStatus(containers(idx).getId, exitStatus)
     }
 
     handler.handleAllocatedContainers(containers.slice(0, 9))
-    handler.processCompletedContainers(nonBlacklistedContainerStatuses)
+    handler.processCompletedContainers(nonExcludedContainerStatuses)
     verify(rmClientSpy, never())
       .updateBlacklist(hosts.slice(0, 9).asJava, Collections.emptyList())
 
     handler.handleAllocatedContainers(containers.slice(9, 11))
-    handler.processCompletedContainers(blacklistedContainerStatuses)
+    handler.processCompletedContainers(excludedContainerStatuses)
     verify(rmClientSpy)
       .updateBlacklist(hosts.slice(9, 10).asJava, Collections.emptyList())
     verify(rmClientSpy)
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala
index 9003c2f630975..7959bb55d7ffc 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala
@@ -44,9 +44,10 @@ class YarnSchedulerBackendSuite extends SparkFunSuite with MockitoSugar with Loc
   }
 
   private class TestTaskSchedulerImpl(sc: SparkContext) extends TaskSchedulerImpl(sc) {
-    val blacklistedNodes = new AtomicReference[Set[String]]()
-    def setNodeBlacklist(nodeBlacklist: Set[String]): Unit = blacklistedNodes.set(nodeBlacklist)
-    override def nodeBlacklist(): Set[String] = blacklistedNodes.get()
+    val excludedNodesList = new AtomicReference[Set[String]]()
+    def setNodeExcludeList(nodeExcludeList: Set[String]): Unit =
+      excludedNodesList.set(nodeExcludeList)
+    override def excludedNodes(): Set[String] = excludedNodesList.get()
   }
 
   private class TestYarnSchedulerBackend(scheduler: TaskSchedulerImpl, sc: SparkContext)
@@ -56,7 +57,7 @@ class YarnSchedulerBackendSuite extends SparkFunSuite with MockitoSugar with Loc
     }
   }
 
-  test("RequestExecutors reflects node blacklist and is serializable") {
+  test("RequestExecutors reflects node excludelist and is serializable") {
     sc = new SparkContext("local", "YarnSchedulerBackendSuite")
     // Subclassing the TaskSchedulerImpl here instead of using Mockito. For details see SPARK-26891.
     val sched = new TestTaskSchedulerImpl(sc)
@@ -65,7 +66,7 @@ class YarnSchedulerBackendSuite extends SparkFunSuite with MockitoSugar with Loc
     val ser = new JavaSerializer(sc.conf).newInstance()
     val defaultResourceProf = ResourceProfile.getOrCreateDefaultProfile(sc.getConf)
     for {
-      blacklist <- IndexedSeq(Set[String](), Set("a", "b", "c"))
+      excludelist <- IndexedSeq(Set[String](), Set("a", "b", "c"))
       numRequested <- 0 until 10
       hostToLocalCount <- IndexedSeq(
         Map(defaultResourceProf.id -> Map.empty[String, Int]),
@@ -73,14 +74,14 @@ class YarnSchedulerBackendSuite extends SparkFunSuite with MockitoSugar with Loc
       )
     } {
       yarnSchedulerBackendExtended.setHostToLocalTaskCount(hostToLocalCount)
-      sched.setNodeBlacklist(blacklist)
+      sched.setNodeExcludeList(excludelist)
       val request = Map(defaultResourceProf -> numRequested)
       val req = yarnSchedulerBackendExtended.prepareRequestExecutors(request)
       assert(req.resourceProfileToTotalExecs(defaultResourceProf) === numRequested)
-      assert(req.nodeBlacklist === blacklist)
+      assert(req.excludedNodes === excludelist)
       val hosts =
         req.hostToLocalTaskCount(ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID).keySet
-      assert(hosts.intersect(blacklist).isEmpty)
+      assert(hosts.intersect(excludelist).isEmpty)
       // Serialize to make sure serialization doesn't throw an error
       ser.serialize(req)
     }

From 32b78d3795d5c4fd533b0267647977ed4f02ee49 Mon Sep 17 00:00:00 2001
From: Chao Sun <sunchao@apple.com>
Date: Sat, 31 Oct 2020 09:49:18 -0700
Subject: [PATCH 0365/1009] [SPARK-33290][SQL] REFRESH TABLE should invalidate
 cache even though the table itself may not be cached

### What changes were proposed in this pull request?

In `CatalogImpl.refreshTable`, this moves the `uncacheQuery` call out of the condition `if (cache.nonEmpty)` so that it will be called whether the table itself is cached or not.

### Why are the changes needed?

In the case like the following:
```sql
CREATE TABLE t ...;
CREATE VIEW t1 AS SELECT * FROM t;
REFRESH TABLE t;
```

If the table `t` is refreshed, the view `t1` which is depending on `t` will not be invalidated. This could lead to incorrect result and is similar to [SPARK-19765](https://issues.apache.org/jira/browse/SPARK-19765).

On the other hand, if we have:

```sql
CREATE TABLE t ...;
CACHE TABLE t;
CREATE VIEW t1 AS SELECT * FROM t;
REFRESH TABLE t;
```

Then the view `t1` will be refreshed. The behavior is somewhat inconsistent.

### Does this PR introduce _any_ user-facing change?

Yes, with the change any cache that are depending on the table refreshed will be invalidated with the change. Previously this only happens if the table itself is cached.

### How was this patch tested?

Added a new UT for the case.

Closes #30187 from sunchao/SPARK-33290.

Authored-by: Chao Sun <sunchao@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/internal/CatalogImpl.scala      | 12 ++++--
 .../apache/spark/sql/CachedTableSuite.scala   | 42 +++++++++++++++++++
 2 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
index 795775dd07561..3e216415c2815 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
@@ -504,6 +504,9 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
    * If this table is cached as an InMemoryRelation, drop the original cached version and make the
    * new version cached lazily.
    *
+   * In addition, refreshing a table also invalidate all caches that have reference to the table
+   * in a cascading manner. This is to prevent incorrect result from the otherwise staled caches.
+   *
    * @group cachemgmt
    * @since 2.0.0
    */
@@ -524,14 +527,17 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
     // If this table is cached as an InMemoryRelation, drop the original
     // cached version and make the new version cached lazily.
     val cache = sparkSession.sharedState.cacheManager.lookupCachedData(table)
+
+    // uncache the logical plan.
+    // note this is a no-op for the table itself if it's not cached, but will invalidate all
+    // caches referencing this table.
+    sparkSession.sharedState.cacheManager.uncacheQuery(table, cascade = true)
+
     if (cache.nonEmpty) {
       // save the cache name and cache level for recreation
       val cacheName = cache.get.cachedRepresentation.cacheBuilder.tableName
       val cacheLevel = cache.get.cachedRepresentation.cacheBuilder.storageLevel
 
-      // uncache the logical plan.
-      sparkSession.sharedState.cacheManager.uncacheQuery(table, cascade = true)
-
       // recache with the same name and cache level.
       sparkSession.sharedState.cacheManager.cacheQuery(table, cacheName, cacheLevel)
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index adc725ed9b062..6313370476c93 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -1208,4 +1208,46 @@ class CachedTableSuite extends QueryTest with SQLTestUtils
       assert(spark.sharedState.cacheManager.lookupCachedData(df).isDefined)
     }
   }
+
+  test("SPARK-33290: REFRESH TABLE should invalidate all caches referencing the table") {
+    withTable("t") {
+      withTempPath { path =>
+        withTempView("tempView1", "tempView2") {
+          Seq((1 -> "a")).toDF("i", "j").write.parquet(path.getCanonicalPath)
+          sql(s"CREATE TABLE t USING parquet LOCATION '${path.toURI}'")
+          sql("CREATE TEMPORARY VIEW tempView1 AS SELECT * FROM t")
+          sql("CACHE TABLE tempView2 AS SELECT i FROM tempView1")
+          checkAnswer(sql("SELECT * FROM tempView1"), Seq(Row(1, "a")))
+          checkAnswer(sql("SELECT * FROM tempView2"), Seq(Row(1)))
+
+          Utils.deleteRecursively(path)
+          sql("REFRESH TABLE tempView1")
+          checkAnswer(sql("SELECT * FROM tempView1"), Seq.empty)
+          checkAnswer(sql("SELECT * FROM tempView2"), Seq.empty)
+        }
+      }
+    }
+  }
+
+  test("SPARK-33290: querying temporary view after REFRESH TABLE fails with FNFE") {
+    withTable("t") {
+      withTempPath { path =>
+        withTempView("tempView1") {
+          Seq((1 -> "a")).toDF("i", "j").write.parquet(path.getCanonicalPath)
+          sql(s"CREATE TABLE t USING parquet LOCATION '${path.toURI}'")
+          sql("CREATE TEMPORARY VIEW tempView1 AS SELECT * FROM t")
+          checkAnswer(sql("SELECT * FROM tempView1"), Seq(Row(1, "a")))
+
+          Utils.deleteRecursively(path)
+          sql("REFRESH TABLE t")
+          checkAnswer(sql("SELECT * FROM t"), Seq.empty)
+          val exception = intercept[Exception] {
+            checkAnswer(sql("SELECT * FROM tempView1"), Seq.empty)
+          }
+          assert(exception.getMessage.contains("FileNotFoundException"))
+          assert(exception.getMessage.contains("REFRESH TABLE"))
+        }
+      }
+    }
+  }
 }

From c51e5fc14b9d1d120afcf0e53714ccba5063b71e Mon Sep 17 00:00:00 2001
From: Chao Sun <sunchao@apple.com>
Date: Sat, 31 Oct 2020 10:01:31 -0700
Subject: [PATCH 0366/1009] [SPARK-33293][SQL] Refactor WriteToDataSourceV2Exec
 and reduce code duplication

### What changes were proposed in this pull request?

Refactor `WriteToDataSourceV2Exec` via removing code duplication around write to table logic:
- renamed `AtomicTableWriteExec` to `TableWriteExec` so that the table write logic in this trait can be modified and shared with `CreateTableAsSelectExec`, `ReplaceTableAsSelectExec`, `AtomicCreateTableAsSelectExec ` and `AtomicReplaceTableAsSelectExec`.
- similar to the above, renamed `writeToStagedTable` to `writeToTable` in `TableWriteExec`.
- extended `writeToTable` so that it can handle both staged table as well as non-staged table.

### Why are the changes needed?

Simplify the logic and remove duplication, to make this piece of code easier to maintain.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Pass CIs with the existing test coverage.

Closes #30193 from sunchao/SPARK-33293.

Authored-by: Chao Sun <sunchao@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../v2/WriteToDataSourceV2Exec.scala          | 98 ++++++-------------
 1 file changed, 30 insertions(+), 68 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
index 616e18ee85a6b..efa2c31e07602 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.connector.catalog.{Identifier, StagedTable, StagingTableCatalog, SupportsWrite, TableCatalog}
+import org.apache.spark.sql.connector.catalog.{Identifier, StagedTable, StagingTableCatalog, SupportsWrite, Table, TableCatalog}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.connector.write.{BatchWrite, DataWriterFactory, LogicalWriteInfoImpl, PhysicalWriteInfoImpl, SupportsDynamicOverwrite, SupportsOverwrite, SupportsTruncate, V1WriteBuilder, WriteBuilder, WriterCommitMessage}
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
@@ -66,9 +66,7 @@ case class CreateTableAsSelectExec(
     query: SparkPlan,
     properties: Map[String, String],
     writeOptions: CaseInsensitiveStringMap,
-    ifNotExists: Boolean) extends V2TableWriteExec with SupportsV1Write {
-
-  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
+    ifNotExists: Boolean) extends TableWriteExec {
 
   override protected def run(): Seq[InternalRow] = {
     if (catalog.tableExists(ident)) {
@@ -79,30 +77,9 @@ case class CreateTableAsSelectExec(
       throw new TableAlreadyExistsException(ident)
     }
 
-    Utils.tryWithSafeFinallyAndFailureCallbacks({
-      val schema = query.schema.asNullable
-      catalog.createTable(
-        ident, schema, partitioning.toArray, properties.asJava) match {
-        case table: SupportsWrite =>
-          val info = LogicalWriteInfoImpl(
-            queryId = UUID.randomUUID().toString,
-            schema,
-            writeOptions)
-          val writeBuilder = table.newWriteBuilder(info)
-
-          writeBuilder match {
-            case v1: V1WriteBuilder => writeWithV1(v1.buildForV1Write())
-            case v2 => writeWithV2(v2.buildForBatch())
-          }
-
-        case _ =>
-          // table does not support writes
-          throw new SparkException(
-            s"Table implementation does not support writes: ${ident.quoted}")
-      }
-    })(catchBlock = {
-      catalog.dropTable(ident)
-    })
+    val table = catalog.createTable(ident, query.schema.asNullable,
+      partitioning.toArray, properties.asJava)
+    writeToTable(catalog, table, writeOptions, ident)
   }
 }
 
@@ -123,7 +100,7 @@ case class AtomicCreateTableAsSelectExec(
     query: SparkPlan,
     properties: Map[String, String],
     writeOptions: CaseInsensitiveStringMap,
-    ifNotExists: Boolean) extends AtomicTableWriteExec {
+    ifNotExists: Boolean) extends TableWriteExec {
 
   override protected def run(): Seq[InternalRow] = {
     if (catalog.tableExists(ident)) {
@@ -135,7 +112,7 @@ case class AtomicCreateTableAsSelectExec(
     }
     val stagedTable = catalog.stageCreate(
       ident, query.schema.asNullable, partitioning.toArray, properties.asJava)
-    writeToStagedTable(stagedTable, writeOptions, ident)
+    writeToTable(catalog, stagedTable, writeOptions, ident)
   }
 }
 
@@ -157,9 +134,7 @@ case class ReplaceTableAsSelectExec(
     query: SparkPlan,
     properties: Map[String, String],
     writeOptions: CaseInsensitiveStringMap,
-    orCreate: Boolean) extends V2TableWriteExec with SupportsV1Write {
-
-  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
+    orCreate: Boolean) extends TableWriteExec {
 
   override protected def run(): Seq[InternalRow] = {
     // Note that this operation is potentially unsafe, but these are the strict semantics of
@@ -175,31 +150,9 @@ case class ReplaceTableAsSelectExec(
     } else if (!orCreate) {
       throw new CannotReplaceMissingTableException(ident)
     }
-    val schema = query.schema.asNullable
-    val createdTable = catalog.createTable(
-      ident, schema, partitioning.toArray, properties.asJava)
-    Utils.tryWithSafeFinallyAndFailureCallbacks({
-      createdTable match {
-        case table: SupportsWrite =>
-          val info = LogicalWriteInfoImpl(
-            queryId = UUID.randomUUID().toString,
-            schema,
-            writeOptions)
-          val writeBuilder = table.newWriteBuilder(info)
-
-          writeBuilder match {
-            case v1: V1WriteBuilder => writeWithV1(v1.buildForV1Write())
-            case v2 => writeWithV2(v2.buildForBatch())
-          }
-
-        case _ =>
-          // table does not support writes
-          throw new SparkException(
-            s"Table implementation does not support writes: ${ident.quoted}")
-      }
-    })(catchBlock = {
-      catalog.dropTable(ident)
-    })
+    val table = catalog.createTable(
+      ident, query.schema.asNullable, partitioning.toArray, properties.asJava)
+    writeToTable(catalog, table, writeOptions, ident)
   }
 }
 
@@ -223,7 +176,7 @@ case class AtomicReplaceTableAsSelectExec(
     query: SparkPlan,
     properties: Map[String, String],
     writeOptions: CaseInsensitiveStringMap,
-    orCreate: Boolean) extends AtomicTableWriteExec {
+    orCreate: Boolean) extends TableWriteExec {
 
   override protected def run(): Seq[InternalRow] = {
     val schema = query.schema.asNullable
@@ -241,7 +194,7 @@ case class AtomicReplaceTableAsSelectExec(
     } else {
       throw new CannotReplaceMissingTableException(ident)
     }
-    writeToStagedTable(staged, writeOptions, ident)
+    writeToTable(catalog, staged, writeOptions, ident)
   }
 }
 
@@ -479,15 +432,16 @@ object DataWritingSparkTask extends Logging {
   }
 }
 
-private[v2] trait AtomicTableWriteExec extends V2TableWriteExec with SupportsV1Write {
+private[v2] trait TableWriteExec extends V2TableWriteExec with SupportsV1Write {
   import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
 
-  protected def writeToStagedTable(
-      stagedTable: StagedTable,
+  protected def writeToTable(
+      catalog: TableCatalog,
+      table: Table,
       writeOptions: CaseInsensitiveStringMap,
       ident: Identifier): Seq[InternalRow] = {
     Utils.tryWithSafeFinallyAndFailureCallbacks({
-      stagedTable match {
+      table match {
         case table: SupportsWrite =>
           val info = LogicalWriteInfoImpl(
             queryId = UUID.randomUUID().toString,
@@ -499,17 +453,25 @@ private[v2] trait AtomicTableWriteExec extends V2TableWriteExec with SupportsV1W
             case v1: V1WriteBuilder => writeWithV1(v1.buildForV1Write())
             case v2 => writeWithV2(v2.buildForBatch())
           }
-          stagedTable.commitStagedChanges()
+
+          table match {
+            case st: StagedTable => st.commitStagedChanges()
+            case _ =>
+          }
           writtenRows
 
         case _ =>
-          // Table does not support writes - staged changes are also rolled back below.
+          // Table does not support writes - staged changes are also rolled back below if table
+          // is staging.
           throw new SparkException(
             s"Table implementation does not support writes: ${ident.quoted}")
       }
     })(catchBlock = {
-      // Failure rolls back the staged writes and metadata changes.
-      stagedTable.abortStagedChanges()
+      table match {
+        // Failure rolls back the staged writes and metadata changes.
+        case st: StagedTable => st.abortStagedChanges()
+        case _ => catalog.dropTable(ident)
+      }
     })
   }
 }

From 69c27f49acf2fe6fbc8335bde2aac4afd4188678 Mon Sep 17 00:00:00 2001
From: "wangguangxin.cn" <wangguangxin.cn@gmail.com>
Date: Sat, 31 Oct 2020 15:14:46 -0700
Subject: [PATCH 0367/1009] [SPARK-33306][SQL] Timezone is needed when cast
 date to string

### What changes were proposed in this pull request?
When `spark.sql.legacy.typeCoercion.datetimeToString.enabled` is enabled, spark will cast date to string when compare date with string. In Spark3, timezone is needed when casting date to string as https://github.com/apache/spark/blob/72ad9dcd5d484a8dd64c08889de85ef9de2a6077/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala#L309.

Howerver, the timezone may not be set because `CastBase.needsTimeZone` returns false for this kind of casting.

A simple way to reproduce this is
```
spark-shell --conf spark.sql.legacy.typeCoercion.datetimeToString.enabled=true

```
when we execute the following sql,
```
select a.d1 from
(select to_date(concat('2000-01-0', id)) as d1 from range(1, 2)) a
join
(select concat('2000-01-0', id) as d2 from range(1, 2)) b
on a.d1 = b.d2
```
it will throw
```
java.util.NoSuchElementException: None.get
  at scala.None$.get(Option.scala:529)
  at scala.None$.get(Option.scala:527)
  at org.apache.spark.sql.catalyst.expressions.TimeZoneAwareExpression.zoneId(datetimeExpressions.scala:56)
  at org.apache.spark.sql.catalyst.expressions.TimeZoneAwareExpression.zoneId$(datetimeExpressions.scala:56)
  at org.apache.spark.sql.catalyst.expressions.CastBase.zoneId$lzycompute(Cast.scala:253)
  at org.apache.spark.sql.catalyst.expressions.CastBase.zoneId(Cast.scala:253)
  at org.apache.spark.sql.catalyst.expressions.CastBase.dateFormatter$lzycompute(Cast.scala:287)
  at org.apache.spark.sql.catalyst.expressions.CastBase.dateFormatter(Cast.scala:287)
```

### Why are the changes needed?
As described above, it's a bug here.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Add more UT

Closes #30213 from WangGuangxin/SPARK-33306.

Authored-by: wangguangxin.cn <wangguangxin.cn@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/catalyst/expressions/Cast.scala     |  1 +
 .../org/apache/spark/sql/SQLQuerySuite.scala      | 15 +++++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index bf759db59f3e6..610297cfd50b6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -108,6 +108,7 @@ object Cast {
    */
   def needsTimeZone(from: DataType, to: DataType): Boolean = (from, to) match {
     case (StringType, TimestampType | DateType) => true
+    case (DateType, StringType) => true
     case (DateType, TimestampType) => true
     case (TimestampType, StringType) => true
     case (TimestampType, DateType) => true
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index a002f720a3c4a..0dd2a286772a5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -3691,6 +3691,21 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       checkAnswer(sql("SELECT id FROM t WHERE (SELECT true)"), Row(0L))
     }
   }
+
+  test("SPARK-33306: Timezone is needed when cast Date to String") {
+    withTempView("t1", "t2") {
+      spark.sql("select to_date(concat('2000-01-0', id)) as d from range(1, 2)")
+        .createOrReplaceTempView("t1")
+      spark.sql("select concat('2000-01-0', id) as d from range(1, 2)")
+        .createOrReplaceTempView("t2")
+      val result = Date.valueOf("2000-01-01")
+
+      checkAnswer(sql("select t1.d from t1 join t2 on t1.d = t2.d"), Row(result))
+      withSQLConf(SQLConf.LEGACY_CAST_DATETIME_TO_STRING.key -> "true") {
+        checkAnswer(sql("select t1.d from t1 join t2 on t1.d = t2.d"), Row(result))
+      }
+    }
+  }
 }
 
 case class Foo(bar: Option[String])

From 56587f076d282ec96c4779faa63d7d9764cf0c3c Mon Sep 17 00:00:00 2001
From: Daniel Himmelstein <daniel.himmelstein@gmail.com>
Date: Sun, 1 Nov 2020 19:09:12 +0900
Subject: [PATCH 0368/1009] [SPARK-33310][PYTHON] Relax pyspark typing for sql
 str functions

### What changes were proposed in this pull request?

Relax pyspark typing for sql str functions. These functions all pass the first argument through `_to_java_column`, such that a string or Column object is acceptable.

### Why are the changes needed?

Convenience & ensuring the typing reflects the functionality

### Does this PR introduce _any_ user-facing change?

Yes, a backwards-compatible increase in functionality. But I think typing support is unreleased, so possibly no change to released versions.

### How was this patch tested?

Not tested. I am newish to Python typing with stubs, so someone should confirm this is the correct way to fix this.

Closes #30209 from dhimmel/patch-1.

Authored-by: Daniel Himmelstein <daniel.himmelstein@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/sql/functions.pyi | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi
index 1d048efcc3ca5..7ba3f07e17c19 100644
--- a/python/pyspark/sql/functions.pyi
+++ b/python/pyspark/sql/functions.pyi
@@ -155,11 +155,11 @@ def overlay(
 def substring(str: ColumnOrName, pos: int, len: int) -> Column: ...
 def substring_index(str: ColumnOrName, delim: str, count: int) -> Column: ...
 def levenshtein(left: ColumnOrName, right: ColumnOrName) -> Column: ...
-def locate(substr: str, str: Column, pos: int = ...) -> Column: ...
-def lpad(col: Column, len: int, pad: str) -> Column: ...
-def rpad(col: Column, len: int, pad: str) -> Column: ...
-def repeat(col: Column, n: int) -> Column: ...
-def split(str: Column, pattern: str, limit: int = ...) -> Column: ...
+def locate(substr: str, str: ColumnOrName, pos: int = ...) -> Column: ...
+def lpad(col: ColumnOrName, len: int, pad: str) -> Column: ...
+def rpad(col: ColumnOrName, len: int, pad: str) -> Column: ...
+def repeat(col: ColumnOrName, n: int) -> Column: ...
+def split(str: ColumnOrName, pattern: str, limit: int = ...) -> Column: ...
 def regexp_extract(str: ColumnOrName, pattern: str, idx: int) -> Column: ...
 def regexp_replace(str: ColumnOrName, pattern: str, replacement: str) -> Column: ...
 def initcap(col: ColumnOrName) -> Column: ...

From b8a440f09880c596325dd9e6caae6b470be76a8f Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@databricks.com>
Date: Sun, 1 Nov 2020 20:28:12 +0900
Subject: [PATCH 0369/1009] [SPARK-33277][PYSPARK][SQL] Use
 ContextAwareIterator to stop consuming after the task ends

### What changes were proposed in this pull request?

As the Python evaluation consumes the parent iterator in a separate thread, it could consume more data from the parent even after the task ends and the parent is closed. Thus, we should use `ContextAwareIterator` to stop consuming after the task ends.

### Why are the changes needed?

Python/Pandas UDF right after off-heap vectorized reader could cause executor crash.

E.g.,:

```py
spark.range(0, 100000, 1, 1).write.parquet(path)

spark.conf.set("spark.sql.columnVector.offheap.enabled", True)

def f(x):
    return 0

fUdf = udf(f, LongType())

spark.read.parquet(path).select(fUdf('id')).head()
```

This is because, the Python evaluation consumes the parent iterator in a separate thread and it consumes more data from the parent even after the task ends and the parent is closed. If an off-heap column vector exists in the parent iterator, it could cause segmentation fault which crashes the executor.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Added tests, and manually.

Closes #30177 from ueshin/issues/SPARK-33277/python_pandas_udf.

Authored-by: Takuya UESHIN <ueshin@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/sql/tests/test_pandas_map.py   | 22 +++++++++++++++++++
 .../sql/tests/test_pandas_udf_scalar.py       | 19 ++++++++++++++++
 python/pyspark/sql/tests/test_udf.py          | 20 +++++++++++++++++
 .../sql/execution/python/EvalPythonExec.scala | 18 ++++++++++++++-
 .../execution/python/MapInPandasExec.scala    |  7 +++---
 5 files changed, 82 insertions(+), 4 deletions(-)

diff --git a/python/pyspark/sql/tests/test_pandas_map.py b/python/pyspark/sql/tests/test_pandas_map.py
index 3ca437f75fc23..2cad30c7294d4 100644
--- a/python/pyspark/sql/tests/test_pandas_map.py
+++ b/python/pyspark/sql/tests/test_pandas_map.py
@@ -15,9 +15,12 @@
 # limitations under the License.
 #
 import os
+import shutil
+import tempfile
 import time
 import unittest
 
+from pyspark.sql import Row
 from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \
     pandas_requirement_message, pyarrow_requirement_message
 
@@ -112,6 +115,25 @@ def func(iterator):
         expected = df.collect()
         self.assertEquals(actual, expected)
 
+    # SPARK-33277
+    def test_map_in_pandas_with_column_vector(self):
+        path = tempfile.mkdtemp()
+        shutil.rmtree(path)
+
+        try:
+            self.spark.range(0, 200000, 1, 1).write.parquet(path)
+
+            def func(iterator):
+                for pdf in iterator:
+                    yield pd.DataFrame({'id': [0] * len(pdf)})
+
+            for offheap in ["true", "false"]:
+                with self.sql_conf({"spark.sql.columnVector.offheap.enabled": offheap}):
+                    self.assertEquals(
+                        self.spark.read.parquet(path).mapInPandas(func, 'id long').head(), Row(0))
+        finally:
+            shutil.rmtree(path)
+
 
 if __name__ == "__main__":
     from pyspark.sql.tests.test_pandas_map import *  # noqa: F401
diff --git a/python/pyspark/sql/tests/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
index 6d325c9085ce1..c2c8f6f697c4b 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_scalar.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
@@ -1137,6 +1137,25 @@ def test_datasource_with_udf(self):
         finally:
             shutil.rmtree(path)
 
+    # SPARK-33277
+    def test_pandas_udf_with_column_vector(self):
+        path = tempfile.mkdtemp()
+        shutil.rmtree(path)
+
+        try:
+            self.spark.range(0, 200000, 1, 1).write.parquet(path)
+
+            @pandas_udf(LongType())
+            def udf(x):
+                return pd.Series([0] * len(x))
+
+            for offheap in ["true", "false"]:
+                with self.sql_conf({"spark.sql.columnVector.offheap.enabled": offheap}):
+                    self.assertEquals(
+                        self.spark.read.parquet(path).select(udf('id')).head(), Row(0))
+        finally:
+            shutil.rmtree(path)
+
 
 if __name__ == "__main__":
     from pyspark.sql.tests.test_pandas_udf_scalar import *  # noqa: F401
diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py
index a7dcbfd32ac1c..c2e95fd41c5b4 100644
--- a/python/pyspark/sql/tests/test_udf.py
+++ b/python/pyspark/sql/tests/test_udf.py
@@ -674,6 +674,26 @@ def test_udf_cache(self):
         self.assertEqual(df.select(udf(func)("id"))._jdf.queryExecution()
                          .withCachedData().getClass().getSimpleName(), 'InMemoryRelation')
 
+    # SPARK-33277
+    def test_udf_with_column_vector(self):
+        path = tempfile.mkdtemp()
+        shutil.rmtree(path)
+
+        try:
+            self.spark.range(0, 100000, 1, 1).write.parquet(path)
+
+            def f(x):
+                return 0
+
+            fUdf = udf(f, LongType())
+
+            for offheap in ["true", "false"]:
+                with self.sql_conf({"spark.sql.columnVector.offheap.enabled": offheap}):
+                    self.assertEquals(
+                        self.spark.read.parquet(path).select(fUdf('id')).head(), Row(0))
+        finally:
+            shutil.rmtree(path)
+
 
 class UDFInitializationTests(unittest.TestCase):
     def tearDown(self):
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala
index 298d63478b63e..89c7716f7c1b2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala
@@ -89,6 +89,7 @@ trait EvalPythonExec extends UnaryExecNode {
 
     inputRDD.mapPartitions { iter =>
       val context = TaskContext.get()
+      val contextAwareIterator = new ContextAwareIterator(iter, context)
 
       // The queue used to buffer input rows so we can drain it to
       // combine input with output from Python.
@@ -120,7 +121,7 @@ trait EvalPythonExec extends UnaryExecNode {
       }.toSeq)
 
       // Add rows to queue to join later with the result.
-      val projectedRowIter = iter.map { inputRow =>
+      val projectedRowIter = contextAwareIterator.map { inputRow =>
         queue.add(inputRow.asInstanceOf[UnsafeRow])
         projection(inputRow)
       }
@@ -137,3 +138,18 @@ trait EvalPythonExec extends UnaryExecNode {
     }
   }
 }
+
+/**
+ * A TaskContext aware iterator.
+ *
+ * As the Python evaluation consumes the parent iterator in a separate thread,
+ * it could consume more data from the parent even after the task ends and the parent is closed.
+ * Thus, we should use ContextAwareIterator to stop consuming after the task ends.
+ */
+class ContextAwareIterator[IN](iter: Iterator[IN], context: TaskContext) extends Iterator[IN] {
+
+  override def hasNext: Boolean =
+    !context.isCompleted() && !context.isInterrupted() && iter.hasNext
+
+  override def next(): IN = iter.next()
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInPandasExec.scala
index 2bb808119c0ae..7fc18f885a2d3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInPandasExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInPandasExec.scala
@@ -61,16 +61,17 @@ case class MapInPandasExec(
       val pythonRunnerConf = ArrowUtils.getPythonRunnerConfMap(conf)
       val outputTypes = child.schema
 
+      val context = TaskContext.get()
+      val contextAwareIterator = new ContextAwareIterator(inputIter, context)
+
       // Here we wrap it via another row so that Python sides understand it
       // as a DataFrame.
-      val wrappedIter = inputIter.map(InternalRow(_))
+      val wrappedIter = contextAwareIterator.map(InternalRow(_))
 
       // DO NOT use iter.grouped(). See BatchIterator.
       val batchIter =
         if (batchSize > 0) new BatchIterator(wrappedIter, batchSize) else Iterator(wrappedIter)
 
-      val context = TaskContext.get()
-
       val columnarBatchIter = new ArrowPythonRunner(
         chainedFunc,
         PythonEvalType.SQL_MAP_PANDAS_ITER_UDF,

From 2b6dfa5f7bdd2f2ae7b4d53bb811ccb8563377c5 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Sun, 1 Nov 2020 23:57:57 +0800
Subject: [PATCH 0370/1009] [SPARK-20044][UI] Support Spark UI behind front-end
 reverse proxy using a path prefix Revert proxy url
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

Allow to run the Spark web UI behind a reverse proxy with URLs prefixed by a context root, like www.mydomain.com/spark. In particular, this allows to access multiple Spark clusters through the same virtual host, only distinguishing them by context root, like www.mydomain.com/cluster1, www.mydomain.com/cluster2, and it allows to run the Spark UI in a common cookie domain (for SSO) with other services.

### Why are the changes needed?

This PR is to take over https://github.com/apache/spark/pull/17455.
After changes, Spark allows showing customized prefix URL in all the `href` links of the HTML pages.

### Does this PR introduce _any_ user-facing change?

Yes, all the links of UI pages will be contains the value of `spark.ui.reverseProxyUrl` if it is configurated.
### How was this patch tested?

New HTML Unit tests in MasterSuite
Manual UI testing for master, worker and app UI with an nginx proxy
Spark config:
```
spark.ui.port 8080
spark.ui.reverseProxy=true
spark.ui.reverseProxyUrl=/path/to/spark/
```
nginx config:
```
server {
    listen 9000;
    set $SPARK_MASTER http://127.0.0.1:8080;
    # split spark UI path into prefix and local path within master UI
    location ~ ^(/path/to/spark/) {
        # strip prefix when forwarding request
        rewrite /path/to/spark(/.*) $1  break;
        #rewrite /path/to/spark/ "/" ;
        # forward to spark master UI
        proxy_pass $SPARK_MASTER;
        proxy_intercept_errors on;
        error_page 301 302 307 = handle_redirects;
    }
    location handle_redirects {
        set $saved_redirect_location '$upstream_http_location';
        proxy_pass $saved_redirect_location;
    }
}
```

Closes #29820 from gengliangwang/revertProxyURL.

Lead-authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Co-authored-by: Oliver Köth <okoeth@de.ibm.com>
Signed-off-by: Gengliang Wang <gengliang.wang@databricks.com>
---
 .../scala/org/apache/spark/SparkContext.scala |   4 +-
 .../apache/spark/deploy/master/Master.scala   |   8 +-
 .../spark/deploy/worker/ExecutorRunner.scala  |   3 +-
 .../apache/spark/deploy/worker/Worker.scala   |   9 +-
 .../scala/org/apache/spark/ui/UIUtils.scala   |   3 +-
 .../spark/deploy/master/MasterSuite.scala     | 101 ++++++++++++++++--
 docs/configuration.md                         |  25 ++++-
 7 files changed, 140 insertions(+), 13 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 501e865c4105a..b35768222437c 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -570,7 +570,9 @@ class SparkContext(config: SparkConf) extends Logging {
     _applicationAttemptId = _taskScheduler.applicationAttemptId()
     _conf.set("spark.app.id", _applicationId)
     if (_conf.get(UI_REVERSE_PROXY)) {
-      System.setProperty("spark.ui.proxyBase", "/proxy/" + _applicationId)
+      val proxyUrl = _conf.get(UI_REVERSE_PROXY_URL.key, "").stripSuffix("/") +
+        "/proxy/" + _applicationId
+      System.setProperty("spark.ui.proxyBase", proxyUrl)
     }
     _ui.foreach(_.setAppId(_applicationId))
     _env.blockManager.initialize(_applicationId)
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index ceeb01149f5db..a582a5d045855 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -147,7 +147,13 @@ private[deploy] class Master(
     webUi.bind()
     masterWebUiUrl = s"${webUi.scheme}$masterPublicAddress:${webUi.boundPort}"
     if (reverseProxy) {
-      masterWebUiUrl = conf.get(UI_REVERSE_PROXY_URL).orElse(Some(masterWebUiUrl)).get
+      val uiReverseProxyUrl = conf.get(UI_REVERSE_PROXY_URL).map(_.stripSuffix("/"))
+      if (uiReverseProxyUrl.nonEmpty) {
+        System.setProperty("spark.ui.proxyBase", uiReverseProxyUrl.get)
+        // If the master URL has a path component, it must end with a slash.
+        // Otherwise the browser generates incorrect relative links
+        masterWebUiUrl = uiReverseProxyUrl.get + "/"
+      }
       webUi.addProxy()
       logInfo(s"Spark Master is acting as a reverse proxy. Master, Workers and " +
        s"Applications UIs are available at $masterWebUiUrl")
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
index e4fcae13a2f89..2e26ccf671d88 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
@@ -171,7 +171,8 @@ private[deploy] class ExecutorRunner(
       // Add webUI log urls
       val baseUrl =
         if (conf.get(UI_REVERSE_PROXY)) {
-          s"/proxy/$workerId/logPage/?appId=$appId&executorId=$execId&logType="
+          conf.get(UI_REVERSE_PROXY_URL.key, "").stripSuffix("/") +
+            s"/proxy/$workerId/logPage/?appId=$appId&executorId=$execId&logType="
         } else {
           s"$webUiScheme$publicAddress:$webUiPort/logPage/?appId=$appId&executorId=$execId&logType="
         }
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 0660dbdafd605..a6092f637a9cb 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -276,7 +276,14 @@ private[deploy] class Worker(
     master = Some(masterRef)
     connected = true
     if (reverseProxy) {
-      logInfo(s"WorkerWebUI is available at $activeMasterWebUiUrl/proxy/$workerId")
+      logInfo("WorkerWebUI is available at %s/proxy/%s".format(
+        activeMasterWebUiUrl.stripSuffix("/"), workerId))
+      // if reverseProxyUrl is not set, then we continue to generate relative URLs
+      // starting with "/" throughout the UI and do not use activeMasterWebUiUrl
+      val proxyUrl = conf.get(UI_REVERSE_PROXY_URL.key, "").stripSuffix("/")
+      // In the method `UIUtils.makeHref`, the URL segment "/proxy/$worker_id" will be appended
+      // after `proxyUrl`, so no need to set the worker ID in the `spark.ui.proxyBase` here.
+      System.setProperty("spark.ui.proxyBase", proxyUrl)
     }
     // Cancel any outstanding re-registration attempts because we found a new master
     cancelLastRegistrationRetry()
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index dba6f8e8440cb..5e3406037a72b 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -639,7 +639,8 @@ private[spark] object UIUtils extends Logging {
   */
   def makeHref(proxy: Boolean, id: String, origHref: String): String = {
     if (proxy) {
-      s"/proxy/$id"
+      val proxyPrefix = sys.props.getOrElse("spark.ui.proxyBase", "")
+      proxyPrefix + "/proxy/" + id
     } else {
       origHref
     }
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
index 3329300b64d13..a46799df069d6 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
@@ -143,6 +143,10 @@ class MockExecutorLaunchFailWorker(master: Master, conf: SparkConf = new SparkCo
 class MasterSuite extends SparkFunSuite
   with Matchers with Eventually with PrivateMethodTester with BeforeAndAfter {
 
+  // regex to extract worker links from the master webui HTML
+  // groups represent URL and worker ID
+  val WORKER_LINK_RE = """<a href="(.+?)">\s*(worker-.+?)\s*</a>""".r
+
   private var _master: Master = _
 
   after {
@@ -320,10 +324,10 @@ class MasterSuite extends SparkFunSuite
     val conf = new SparkConf()
     val localCluster = new LocalSparkCluster(2, 2, 512, conf)
     localCluster.start()
+    val masterUrl = s"http://localhost:${localCluster.masterWebUIPort}"
     try {
       eventually(timeout(5.seconds), interval(100.milliseconds)) {
-        val json = Source.fromURL(s"http://localhost:${localCluster.masterWebUIPort}/json")
-          .getLines().mkString("\n")
+        val json = Source.fromURL(s"$masterUrl/json").getLines().mkString("\n")
         val JArray(workers) = (parse(json) \ "workers")
         workers.size should be (2)
         workers.foreach { workerSummaryJson =>
@@ -332,6 +336,16 @@ class MasterSuite extends SparkFunSuite
             .getLines().mkString("\n"))
           (workerResponse \ "cores").extract[Int] should be (2)
         }
+
+        val html = Source.fromURL(s"$masterUrl/").getLines().mkString("\n")
+        html should include ("Spark Master at spark://")
+        val workerLinks = (WORKER_LINK_RE findAllMatchIn html).toList
+        workerLinks.size should be (2)
+        workerLinks foreach { case WORKER_LINK_RE(workerUrl, workerId) =>
+          val workerHtml = Source.fromURL(workerUrl).getLines().mkString("\n")
+          workerHtml should include ("Spark Worker at")
+          workerHtml should include ("Running Executors (0)")
+        }
       }
     } finally {
       localCluster.stop()
@@ -340,31 +354,106 @@ class MasterSuite extends SparkFunSuite
 
   test("master/worker web ui available with reverseProxy") {
     implicit val formats = org.json4s.DefaultFormats
-    val reverseProxyUrl = "http://localhost:8080"
+    val conf = new SparkConf()
+    conf.set(UI_REVERSE_PROXY, true)
+    val localCluster = new LocalSparkCluster(2, 2, 512, conf)
+    localCluster.start()
+    val masterUrl = s"http://localhost:${localCluster.masterWebUIPort}"
+    try {
+      eventually(timeout(5.seconds), interval(100.milliseconds)) {
+        val json = Source.fromURL(s"$masterUrl/json")
+          .getLines().mkString("\n")
+        val JArray(workers) = (parse(json) \ "workers")
+        workers.size should be (2)
+        workers.foreach { workerSummaryJson =>
+          // the webuiaddress intentionally points to the local web ui.
+          // explicitly construct reverse proxy url targeting the master
+          val JString(workerId) = workerSummaryJson \ "id"
+          val url = s"$masterUrl/proxy/${workerId}/json"
+          val workerResponse = parse(Source.fromURL(url).getLines().mkString("\n"))
+          (workerResponse \ "cores").extract[Int] should be (2)
+        }
+
+        val html = Source.fromURL(s"$masterUrl/").getLines().mkString("\n")
+        html should include ("Spark Master at spark://")
+        html should include ("""href="/static""")
+        html should include ("""src="/static""")
+        verifyWorkerUI(html, masterUrl)
+      }
+    } finally {
+      localCluster.stop()
+      System.getProperties().remove("spark.ui.proxyBase")
+    }
+  }
+
+  test("master/worker web ui available behind front-end reverseProxy") {
+    implicit val formats = org.json4s.DefaultFormats
+    val reverseProxyUrl = "http://proxyhost:8080/path/to/spark"
     val conf = new SparkConf()
     conf.set(UI_REVERSE_PROXY, true)
     conf.set(UI_REVERSE_PROXY_URL, reverseProxyUrl)
     val localCluster = new LocalSparkCluster(2, 2, 512, conf)
     localCluster.start()
+    val masterUrl = s"http://localhost:${localCluster.masterWebUIPort}"
     try {
       eventually(timeout(5.seconds), interval(100.milliseconds)) {
-        val json = Source.fromURL(s"http://localhost:${localCluster.masterWebUIPort}/json")
+        val json = Source.fromURL(s"$masterUrl/json")
           .getLines().mkString("\n")
         val JArray(workers) = (parse(json) \ "workers")
         workers.size should be (2)
         workers.foreach { workerSummaryJson =>
+          // the webuiaddress intentionally points to the local web ui.
+          // explicitly construct reverse proxy url targeting the master
           val JString(workerId) = workerSummaryJson \ "id"
-          val url = s"http://localhost:${localCluster.masterWebUIPort}/proxy/${workerId}/json"
+          val url = s"$masterUrl/proxy/${workerId}/json"
           val workerResponse = parse(Source.fromURL(url).getLines().mkString("\n"))
           (workerResponse \ "cores").extract[Int] should be (2)
-          (workerResponse \ "masterwebuiurl").extract[String] should be (reverseProxyUrl)
+          (workerResponse \ "masterwebuiurl").extract[String] should be (reverseProxyUrl + "/")
         }
+
+        // with LocalCluster, we have masters and workers in the same JVM, each overwriting
+        // system property spark.ui.proxyBase.
+        // so we need to manage this property explicitly for test
+        System.getProperty("spark.ui.proxyBase") should startWith
+          (s"$reverseProxyUrl/proxy/worker-")
+        System.setProperty("spark.ui.proxyBase", reverseProxyUrl)
+        val html = Source.fromURL(s"$masterUrl/").getLines().mkString("\n")
+        html should include ("Spark Master at spark://")
+        verifyStaticResourcesServedByProxy(html, reverseProxyUrl)
+        verifyWorkerUI(html, masterUrl, reverseProxyUrl)
       }
     } finally {
       localCluster.stop()
+      System.getProperties().remove("spark.ui.proxyBase")
+    }
+  }
+
+  private def verifyWorkerUI(masterHtml: String, masterUrl: String,
+      reverseProxyUrl: String = ""): Unit = {
+    val workerLinks = (WORKER_LINK_RE findAllMatchIn masterHtml).toList
+    workerLinks.size should be (2)
+    workerLinks foreach {
+      case WORKER_LINK_RE(workerUrl, workerId) =>
+        workerUrl should be (s"$reverseProxyUrl/proxy/$workerId")
+        // there is no real front-end proxy as defined in $reverseProxyUrl
+        // construct url directly targeting the master
+        val url = s"$masterUrl/proxy/$workerId/"
+        System.setProperty("spark.ui.proxyBase", workerUrl)
+        val workerHtml = Source.fromURL(url).getLines().mkString("\n")
+        workerHtml should include ("Spark Worker at")
+        workerHtml should include ("Running Executors (0)")
+        verifyStaticResourcesServedByProxy(workerHtml, workerUrl)
+      case _ => fail  // make sure we don't accidentially skip the tests
     }
   }
 
+  private def verifyStaticResourcesServedByProxy(html: String, proxyUrl: String): Unit = {
+    html should not include ("""href="/static""")
+    html should include (s"""href="$proxyUrl/static""")
+    html should not include ("""src="/static""")
+    html should include (s"""src="$proxyUrl/static""")
+  }
+
   test("basic scheduling - spread out") {
     basicScheduling(spreadOut = true)
   }
diff --git a/docs/configuration.md b/docs/configuration.md
index 232ea4079d436..aab18f23a083f 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1193,8 +1193,29 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.ui.reverseProxyUrl</code></td>
   <td></td>
   <td>
-    This is the URL where your proxy is running. This URL is for proxy which is running in front of Spark Master. This is useful when running proxy for authentication e.g. OAuth proxy. Make sure this is a complete URL including scheme (http/https) and port to reach your proxy.
-  </td>
+    If the Spark UI should be served through another front-end reverse proxy, this is the URL
+    for accessing the Spark master UI through that reverse proxy.
+    This is useful when running proxy for authentication e.g. an OAuth proxy. The URL may contain
+    a path prefix, like <code>http://mydomain.com/path/to/spark/</code>, allowing you to serve the
+    UI for multiple Spark clusters and other web applications through the same virtual host and
+    port.
+    Normally, this should be an absolute URL including scheme (http/https), host and port.
+    It is possible to specify a relative URL starting with "/" here. In this case, all URLs
+    generated by the Spark UI and Spark REST APIs will be server-relative links -- this will still
+    work, as the entire Spark UI is served through the same host and port.
+    <br/>The setting affects link generation in the Spark UI, but the front-end reverse proxy
+    is responsible for
+    <ul>
+      <li>stripping a path prefix before forwarding the request,</li>
+      <li>rewriting redirects which point directly to the Spark master,</li>
+      <li>redirecting access from <code>http://mydomain.com/path/to/spark</code> to
+      <code>http://mydomain.com/path/to/spark/</code> (trailing slash after path prefix); otherwise
+      relative links on the master page do not work correctly.</li>
+    </ul>
+    This setting affects all the workers and application UIs running in the cluster and must be set
+    identically on all the workers, drivers and masters. In is only effective when
+    <code>spark.ui.reverseProxy</code> is turned on. This setting is not needed when the Spark
+    master web UI is directly reachable.  </td>
   <td>2.1.0</td>
 </tr>
 <tr>

From d71b2febaf536113ffe4ad0626d1d3b4098b98a5 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Mon, 2 Nov 2020 08:54:08 +0900
Subject: [PATCH 0371/1009] [SPARK-30663][SPARK-33313][TESTS][R] Drop testthat
 1.x support and add testthat 3.x support

### What changes were proposed in this pull request?

This PR modifies `R/pkg/tests/run-all.R` by:

- Removing `testthat` 1.x support, as Jenkins has been upgraded to 2.x with SPARK-30637 and this code is no longer relevant.
- Add `testthat` 3.x support to avoid AppVeyor failures.

### Why are the changes needed?

Currently used internal API has been removed in the latest `testthat` release.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Tests executed against `testthat == 2.3.2` and `testthat == 3.0.0`

Closes #30219 from zero323/SPARK-33313.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 R/pkg/tests/run-all.R | 44 +++++++++++++++++++++++++++----------------
 1 file changed, 28 insertions(+), 16 deletions(-)

diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R
index 3043df0f12075..f9e266eb4e014 100644
--- a/R/pkg/tests/run-all.R
+++ b/R/pkg/tests/run-all.R
@@ -60,25 +60,37 @@ if (identical(Sys.getenv("NOT_CRAN"), "true")) {
     # set random seed for predictable results. mostly for base's sample() in tree and classification
     set.seed(42)
 
-    # TODO (SPARK-30663) To be removed once testthat 1.x is removed from all builds
-    if (packageVersion("testthat")$major <= 1) {
-      # testthat 1.x
-      test_runner <- testthat:::run_tests
-      reporter <- "summary"
+    if (packageVersion("testthat")$major <= 1) stop("testhat 1.x is not supported")
+
+    test_runner <- if (packageVersion("testthat")$major == 2) {
+      # testthat >= 2.0.0, < 3.0.0
+      function(path, package, reporter, filter) {
+        testthat:::test_package_dir(
+          test_path = path,
+          package = package,
+          filter = filter,
+          reporter = reporter
+        )
+      }
     } else {
-      # testthat >= 2.0.0
-      test_runner <- testthat:::test_package_dir
-      dir.create("target/test-reports", showWarnings = FALSE)
-      reporter <- MultiReporter$new(list(
-        SummaryReporter$new(),
-        JunitReporter$new(file = "target/test-reports/test-results.xml")
-      ))
+      # testthat >= 3.0.0
+      testthat::test_dir
     }
 
-    test_runner("SparkR",
-                file.path(sparkRDir, "pkg", "tests", "fulltests"),
-                NULL,
-                reporter)
+    dir.create("target/test-reports", showWarnings = FALSE)
+    reporter <- MultiReporter$new(list(
+      SummaryReporter$new(),
+      JunitReporter$new(
+        file = file.path(getwd(), "target/test-reports/test-results.xml")
+      )
+    ))
+
+    test_runner(
+      path = file.path(sparkRDir, "pkg", "tests", "fulltests"),
+      package = "SparkR",
+      reporter = reporter,
+      filter = NULL
+    )
   }
 
   SparkR:::uninstallDownloadedSpark()

From 6226ccc092c0e24487ee80dc169eb15b32825bce Mon Sep 17 00:00:00 2001
From: Prashant Sharma <prashsh1@in.ibm.com>
Date: Mon, 2 Nov 2020 05:03:41 +0000
Subject: [PATCH 0372/1009] [SPARK-33095] Follow up, support alter table column
 rename

### What changes were proposed in this pull request?

Support rename column for mysql dialect.

### Why are the changes needed?

At the moment, it does not work for mysql version 5.x. So, we should throw proper exception for that case.

### Does this PR introduce _any_ user-facing change?

Yes, `column rename` with mysql dialect should work correctly.

### How was this patch tested?

Added tests for rename column.
Ran the tests to pass with both versions of mysql.

* `export MYSQL_DOCKER_IMAGE_NAME=mysql:5.7.31`

* `export MYSQL_DOCKER_IMAGE_NAME=mysql:8.0`

Closes #30142 from ScrapCodes/mysql-dialect-rename.

Authored-by: Prashant Sharma <prashsh1@in.ibm.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/jdbc/v2/MySQLIntegrationSuite.scala   | 28 +++++++++++++++---
 .../apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 29 ++++++++++++++++++-
 .../datasources/jdbc/JdbcUtils.scala          |  9 +++---
 .../apache/spark/sql/jdbc/JdbcDialects.scala  | 13 +++++++--
 .../apache/spark/sql/jdbc/MySQLDialect.scala  | 19 ++++++++++++
 5 files changed, 86 insertions(+), 12 deletions(-)

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
index ec958cd55c943..6cf0f56ee7eeb 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
@@ -50,7 +50,8 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest {
     override val jdbcPort: Int = 3306
 
     override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:mysql://$ip:$port/mysql?user=root&password=rootpass"
+      s"jdbc:mysql://$ip:$port/" +
+        s"mysql?user=root&password=rootpass&allowPublicKeyRetrieval=true&useSSL=false"
   }
 
   override def sparkConf: SparkConf = super.sparkConf
@@ -59,7 +60,11 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest {
 
   override val connectionTimeout = timeout(7.minutes)
 
-  override def dataPreparation(conn: Connection): Unit = {}
+  private var mySQLVersion = -1
+
+  override def dataPreparation(conn: Connection): Unit = {
+    mySQLVersion = conn.getMetaData.getDatabaseMajorVersion
+  }
 
   override def testUpdateColumnType(tbl: String): Unit = {
     sql(s"CREATE TABLE $tbl (ID INTEGER) USING _")
@@ -77,11 +82,26 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest {
     assert(msg1.contains("Cannot update alt_table field ID: string cannot be cast to int"))
   }
 
+  override def testRenameColumn(tbl: String): Unit = {
+    assert(mySQLVersion > 0)
+    if (mySQLVersion < 8) {
+      // Rename is unsupported for mysql versions < 8.0.
+      val exception = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $tbl RENAME COLUMN ID TO RENAMED")
+      }
+      assert(exception.getCause != null, s"Wrong exception thrown: $exception")
+      val msg = exception.getCause.asInstanceOf[SQLFeatureNotSupportedException].getMessage
+      assert(msg.contains("Rename column is only supported for MySQL version 8.0 and above."))
+    } else {
+      super.testRenameColumn(tbl)
+    }
+  }
+
   override def testUpdateColumnNullability(tbl: String): Unit = {
-    sql("CREATE TABLE mysql.alt_table (ID STRING NOT NULL) USING _")
+    sql(s"CREATE TABLE $tbl (ID STRING NOT NULL) USING _")
     // Update nullability is unsupported for mysql db.
     val msg = intercept[AnalysisException] {
-      sql("ALTER TABLE mysql.alt_table ALTER COLUMN ID DROP NOT NULL")
+      sql(s"ALTER TABLE $tbl ALTER COLUMN ID DROP NOT NULL")
     }.getCause.asInstanceOf[SQLFeatureNotSupportedException].getMessage
 
     assert(msg.contains("UpdateColumnNullability is not supported"))
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
index 8419db7784e88..92af29d9c9467 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.types._
 import org.apache.spark.tags.DockerTest
 
 @DockerTest
-trait V2JDBCTest extends SharedSparkSession {
+private[v2] trait V2JDBCTest extends SharedSparkSession {
   val catalogName: String
   // dialect specific update column type test
   def testUpdateColumnType(tbl: String): Unit
@@ -46,6 +46,14 @@ trait V2JDBCTest extends SharedSparkSession {
     assert(msg.contains("Cannot update missing field bad_column"))
   }
 
+  def testRenameColumn(tbl: String): Unit = {
+    sql(s"ALTER TABLE $tbl RENAME COLUMN ID TO RENAMED")
+    val t = spark.table(s"$tbl")
+    val expectedSchema = new StructType().add("RENAMED", StringType, nullable = true)
+      .add("ID1", StringType, nullable = true).add("ID2", StringType, nullable = true)
+    assert(t.schema === expectedSchema)
+  }
+
   test("SPARK-33034: ALTER TABLE ... add new columns") {
     withTable(s"$catalogName.alt_table") {
       sql(s"CREATE TABLE $catalogName.alt_table (ID STRING) USING _")
@@ -110,6 +118,24 @@ trait V2JDBCTest extends SharedSparkSession {
     assert(msg.contains("Table not found"))
   }
 
+  test("SPARK-33034: ALTER TABLE ... rename column") {
+    withTable(s"$catalogName.alt_table") {
+      sql(s"CREATE TABLE $catalogName.alt_table (ID STRING NOT NULL," +
+        s" ID1 STRING NOT NULL, ID2 STRING NOT NULL) USING _")
+      testRenameColumn(s"$catalogName.alt_table")
+      // Rename to already existing column
+      val msg = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $catalogName.alt_table RENAME COLUMN ID1 TO ID2")
+      }.getMessage
+      assert(msg.contains("Cannot rename column, because ID2 already exists"))
+    }
+    // Rename a column in a not existing table
+    val msg = intercept[AnalysisException] {
+      sql(s"ALTER TABLE $catalogName.not_existing_table RENAME COLUMN ID TO C")
+    }.getMessage
+    assert(msg.contains("Table not found"))
+  }
+
   test("SPARK-33034: ALTER TABLE ... update column nullability") {
     withTable(s"$catalogName.alt_table") {
       testUpdateColumnNullability(s"$catalogName.alt_table")
@@ -121,3 +147,4 @@ trait V2JDBCTest extends SharedSparkSession {
     assert(msg.contains("Table not found"))
   }
 }
+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index 24e380e3be3e1..9aaa55980436e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -895,11 +895,12 @@ object JdbcUtils extends Logging {
       changes: Seq[TableChange],
       options: JDBCOptions): Unit = {
     val dialect = JdbcDialects.get(options.url)
+    val metaData = conn.getMetaData
     if (changes.length == 1) {
-      executeStatement(conn, options, dialect.alterTable(tableName, changes)(0))
+      executeStatement(conn, options, dialect.alterTable(tableName, changes,
+        metaData.getDatabaseMajorVersion)(0))
     } else {
-      val metadata = conn.getMetaData
-      if (!metadata.supportsTransactions) {
+      if (!metaData.supportsTransactions) {
         throw new SQLFeatureNotSupportedException("The target JDBC server does not support " +
           "transaction and can only support ALTER TABLE with a single action.")
       } else {
@@ -907,7 +908,7 @@ object JdbcUtils extends Logging {
         val statement = conn.createStatement
         try {
           statement.setQueryTimeout(options.queryTimeout)
-          for (sql <- dialect.alterTable(tableName, changes)) {
+          for (sql <- dialect.alterTable(tableName, changes, metaData.getDatabaseMajorVersion)) {
             statement.executeUpdate(sql)
           }
           conn.commit()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index e0703195051dc..0a857b99966fc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -205,7 +205,10 @@ abstract class JdbcDialect extends Serializable {
    * @param changes Changes to apply to the table.
    * @return The SQL statements to use for altering the table.
    */
-  def alterTable(tableName: String, changes: Seq[TableChange]): Array[String] = {
+  def alterTable(
+      tableName: String,
+      changes: Seq[TableChange],
+      dbMajorVersion: Int): Array[String] = {
     val updateClause = ArrayBuilder.make[String]
     for (change <- changes) {
       change match {
@@ -215,7 +218,7 @@ abstract class JdbcDialect extends Serializable {
           updateClause += getAddColumnQuery(tableName, name(0), dataType)
         case rename: RenameColumn if rename.fieldNames.length == 1 =>
           val name = rename.fieldNames
-          updateClause += getRenameColumnQuery(tableName, name(0), rename.newName)
+          updateClause += getRenameColumnQuery(tableName, name(0), rename.newName, dbMajorVersion)
         case delete: DeleteColumn if delete.fieldNames.length == 1 =>
           val name = delete.fieldNames
           updateClause += getDeleteColumnQuery(tableName, name(0))
@@ -237,7 +240,11 @@ abstract class JdbcDialect extends Serializable {
   def getAddColumnQuery(tableName: String, columnName: String, dataType: String): String =
     s"ALTER TABLE $tableName ADD COLUMN ${quoteIdentifier(columnName)} $dataType"
 
-  def getRenameColumnQuery(tableName: String, columnName: String, newName: String): String =
+  def getRenameColumnQuery(
+      tableName: String,
+      columnName: String,
+      newName: String,
+      dbMajorVersion: Int): String =
     s"ALTER TABLE $tableName RENAME COLUMN ${quoteIdentifier(columnName)} TO" +
       s" ${quoteIdentifier(newName)}"
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
index a516e9e76ef31..942cdc9619b56 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
@@ -57,6 +57,25 @@ private case object MySQLDialect extends JdbcDialect {
     s"ALTER TABLE $tableName MODIFY COLUMN ${quoteIdentifier(columnName)} $newDataType"
   }
 
+  // See Old Syntax: https://dev.mysql.com/doc/refman/5.6/en/alter-table.html
+  // According to https://dev.mysql.com/worklog/task/?id=10761 old syntax works for
+  // both versions of MySQL i.e. 5.x and 8.0
+  // The old syntax requires us to have type definition. Since we do not have type
+  // information, we throw the exception for old version.
+  override def getRenameColumnQuery(
+      tableName: String,
+      columnName: String,
+      newName: String,
+      dbMajorVersion: Int): String = {
+    if (dbMajorVersion >= 8) {
+      s"ALTER TABLE $tableName RENAME COLUMN ${quoteIdentifier(columnName)} TO" +
+        s" ${quoteIdentifier(newName)}"
+    } else {
+      throw new SQLFeatureNotSupportedException(
+        s"Rename column is only supported for MySQL version 8.0 and above.")
+    }
+  }
+
   // See https://dev.mysql.com/doc/refman/8.0/en/alter-table.html
   // require to have column data type to change the column nullability
   // ALTER TABLE tbl_name MODIFY [COLUMN] col_name column_definition

From e52b858ef71fd2f05e3653e15e91252c04fcefd4 Mon Sep 17 00:00:00 2001
From: Cheng Su <chengsu@fb.com>
Date: Mon, 2 Nov 2020 06:44:07 +0000
Subject: [PATCH 0373/1009] [SPARK-33027][SQL] Add
 DisableUnnecessaryBucketedScan rule to AQE

### What changes were proposed in this pull request?

As a followup comment from https://github.com/apache/spark/pull/29804#issuecomment-700650620 , here we add add the physical plan rule DisableUnnecessaryBucketedScan into AQE AdaptiveSparkPlanExec.queryStagePreparationRules, to make auto bucketed scan work with AQE.

The change is mostly in:
* `AdaptiveSparkPlanExec.scala`: add physical plan rule `DisableUnnecessaryBucketedScan`
* `DisableUnnecessaryBucketedScan.scala`: propagate logical plan link for the file source scan exec operator, otherwise we lose the logical plan link information when AQE is enabled, and will get exception [here](https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala#L176). (for example, for query `SELECT * FROM bucketed_table` with AQE is enabled)
* `DisableUnnecessaryBucketedScanSuite.scala`: add new test suite for AQE enabled - `DisableUnnecessaryBucketedScanWithoutHiveSupportSuiteAE`, and changed some of tests to use `AdaptiveSparkPlanHelper.find/collect`, to make the plan verification work when AQE enabled.

### Why are the changes needed?

It's reasonable to add the support to allow disabling unnecessary bucketed scan with AQE is enabled, this helps optimize the query when AQE is enabled.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Added unit test in `DisableUnnecessaryBucketedScanSuite`.

Closes #30200 from c21/auto-bucket-aqe.

Authored-by: Cheng Su <chengsu@fb.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../adaptive/AdaptiveSparkPlanExec.scala      | 12 ++++----
 .../DisableUnnecessaryBucketedScan.scala      |  4 ++-
 .../DisableUnnecessaryBucketedScanSuite.scala | 28 +++++++++++++++----
 3 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index a4a58dfe1de53..4ae33311d5a24 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -36,6 +36,7 @@ import org.apache.spark.sql.catalyst.rules.{PlanChangeLogger, Rule}
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec._
+import org.apache.spark.sql.execution.bucketing.DisableUnnecessaryBucketedScan
 import org.apache.spark.sql.execution.command.DataWritingCommandExec
 import org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec
 import org.apache.spark.sql.execution.exchange._
@@ -82,17 +83,14 @@ case class AdaptiveSparkPlanExec(
   // The logical plan optimizer for re-optimizing the current logical plan.
   @transient private val optimizer = new AQEOptimizer(conf)
 
-  @transient private val removeRedundantProjects = RemoveRedundantProjects
-  @transient private val removeRedundantSorts = RemoveRedundantSorts
-  @transient private val ensureRequirements = EnsureRequirements
-
   // A list of physical plan rules to be applied before creation of query stages. The physical
   // plan should reach a final status of query stages (i.e., no more addition or removal of
   // Exchange nodes) after running these rules.
   private def queryStagePreparationRules: Seq[Rule[SparkPlan]] = Seq(
-    removeRedundantProjects,
-    removeRedundantSorts,
-    ensureRequirements
+    RemoveRedundantProjects,
+    RemoveRedundantSorts,
+    EnsureRequirements,
+    DisableUnnecessaryBucketedScan
   ) ++ context.session.sessionState.queryStagePrepRules
 
   // A list of physical optimizer rules to be applied to a new stage before its execution. These
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala
index 2bbd5f5d969dc..bb59f44abc761 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala
@@ -101,7 +101,9 @@ object DisableUnnecessaryBucketedScan extends Rule[SparkPlan] {
       case scan: FileSourceScanExec =>
         if (isBucketedScanWithoutFilter(scan)) {
           if (!withInterestingPartition || (withExchange && withAllowedNode)) {
-            scan.copy(disableBucketedScan = true)
+            val nonBucketedScan = scan.copy(disableBucketedScan = true)
+            scan.logicalLink.foreach(nonBucketedScan.setLogicalLink)
+            nonBucketedScan
           } else {
             scan
           }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala
index 70b74aed40eca..1fdd3be88f782 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala
@@ -21,6 +21,8 @@ import org.apache.spark.sql.QueryTest
 import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
 import org.apache.spark.sql.execution.FileSourceScanExec
+import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, DisableAdaptiveExecutionSuite, EnableAdaptiveExecutionSuite}
+import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
@@ -28,7 +30,8 @@ import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
 
 class DisableUnnecessaryBucketedScanWithoutHiveSupportSuite
   extends DisableUnnecessaryBucketedScanSuite
-  with SharedSparkSession {
+  with SharedSparkSession
+  with DisableAdaptiveExecutionSuite {
 
   protected override def beforeAll(): Unit = {
     super.beforeAll()
@@ -36,7 +39,22 @@ class DisableUnnecessaryBucketedScanWithoutHiveSupportSuite
   }
 }
 
-abstract class DisableUnnecessaryBucketedScanSuite extends QueryTest with SQLTestUtils {
+class DisableUnnecessaryBucketedScanWithoutHiveSupportSuiteAE
+  extends DisableUnnecessaryBucketedScanSuite
+  with SharedSparkSession
+  with EnableAdaptiveExecutionSuite {
+
+  protected override def beforeAll(): Unit = {
+    super.beforeAll()
+    assert(spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "in-memory")
+  }
+}
+
+abstract class DisableUnnecessaryBucketedScanSuite
+  extends QueryTest
+  with SQLTestUtils
+  with AdaptiveSparkPlanHelper {
+
   import testImplicits._
 
   private lazy val df1 =
@@ -51,7 +69,7 @@ abstract class DisableUnnecessaryBucketedScanSuite extends QueryTest with SQLTes
 
     def checkNumBucketedScan(query: String, expectedNumBucketedScan: Int): Unit = {
       val plan = sql(query).queryExecution.executedPlan
-      val bucketedScan = plan.collect { case s: FileSourceScanExec if s.bucketedScan => s }
+      val bucketedScan = collect(plan) { case s: FileSourceScanExec if s.bucketedScan => s }
       assert(bucketedScan.length == expectedNumBucketedScan)
     }
 
@@ -230,14 +248,14 @@ abstract class DisableUnnecessaryBucketedScanSuite extends QueryTest with SQLTes
         assertCached(spark.table("t1"))
 
         // Verify cached bucketed table scan not disabled
-        val partitioning = spark.table("t1").queryExecution.executedPlan
+        val partitioning = stripAQEPlan(spark.table("t1").queryExecution.executedPlan)
           .outputPartitioning
         assert(partitioning match {
           case HashPartitioning(Seq(column: AttributeReference), 8) if column.name == "i" => true
           case _ => false
         })
         val aggregateQueryPlan = sql("SELECT SUM(i) FROM t1 GROUP BY i").queryExecution.executedPlan
-        assert(aggregateQueryPlan.find(_.isInstanceOf[ShuffleExchangeExec]).isEmpty)
+        assert(find(aggregateQueryPlan)(_.isInstanceOf[ShuffleExchangeExec]).isEmpty)
       }
     }
   }

From 789d19cab5caa20d35dcdd700ed7fe53ca1893fe Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Mon, 2 Nov 2020 08:34:50 -0800
Subject: [PATCH 0374/1009] [SPARK-33319][SQL][TEST] Add all built-in SerDes to
 HiveSerDeReadWriteSuite

### What changes were proposed in this pull request?

This pr add all built-in SerDes to `HiveSerDeReadWriteSuite`.

https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualDDL-RowFormats&SerDe

### Why are the changes needed?

We will upgrade Parquet, ORC and Avro, need to ensure compatibility.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

N/A

Closes #30228 from wangyum/SPARK-33319.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../hive/execution/HiveSerDeReadWriteSuite.scala    | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala
index ac9ae8c9229db..aae49f70ca93f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala
@@ -135,11 +135,12 @@ class HiveSerDeReadWriteSuite extends QueryTest with SQLTestUtils with TestHiveS
     }
     // MAP<primitive_type, data_type>
     withTable("hive_serde") {
-      hiveClient.runSqlHive(s"CREATE TABLE hive_serde (c1 MAP <INT, STRING>) STORED AS $fileFormat")
-      hiveClient.runSqlHive("INSERT INTO TABLE hive_serde SELECT MAP(1, 'a') FROM (SELECT 1) t")
-      checkAnswer(spark.table("hive_serde"), Row(Map(1 -> "a")))
-      spark.sql("INSERT INTO TABLE hive_serde SELECT MAP(2, 'b')")
-      checkAnswer(spark.table("hive_serde"), Seq(Row(Map(1 -> "a")), Row(Map(2 -> "b"))))
+      hiveClient.runSqlHive(
+        s"CREATE TABLE hive_serde (c1 MAP <STRING, STRING>) STORED AS $fileFormat")
+      hiveClient.runSqlHive("INSERT INTO TABLE hive_serde SELECT MAP('1', 'a') FROM (SELECT 1) t")
+      checkAnswer(spark.table("hive_serde"), Row(Map("1" -> "a")))
+      spark.sql("INSERT INTO TABLE hive_serde SELECT MAP('2', 'b')")
+      checkAnswer(spark.table("hive_serde"), Seq(Row(Map("1" -> "a")), Row(Map("2" -> "b"))))
     }
 
     // STRUCT<col_name : data_type [COMMENT col_comment], ...>
@@ -154,7 +155,7 @@ class HiveSerDeReadWriteSuite extends QueryTest with SQLTestUtils with TestHiveS
     }
   }
 
-  Seq("PARQUET", "ORC", "TEXTFILE").foreach { fileFormat =>
+  Seq("SEQUENCEFILE", "TEXTFILE", "RCFILE", "ORC", "PARQUET", "AVRO").foreach { fileFormat =>
     test(s"Read/Write Hive $fileFormat serde table") {
       // Numeric Types
       checkNumericTypes(fileFormat, "TINYINT", 2)

From eecebd03023bdde5084b7f518d709e304eff7228 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 2 Nov 2020 10:07:18 -0800
Subject: [PATCH 0375/1009] [SPARK-33306][SQL][FOLLOWUP] Group DateType and
 TimestampType together in `needsTimeZone()`

### What changes were proposed in this pull request?
In the PR, I propose to group `DateType` and `TimestampType` together in checking time zone needs in the `Cast.needsTimeZone()` method.

### Why are the changes needed?
To improve code maintainability.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By the existing test `"SPARK-33306: Timezone is needed when cast Date to String"`.

Closes #30223 from MaxGekk/WangGuangxin-SPARK-33306-followup.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../scala/org/apache/spark/sql/catalyst/expressions/Cast.scala | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 610297cfd50b6..48a9e19c9d953 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -108,9 +108,8 @@ object Cast {
    */
   def needsTimeZone(from: DataType, to: DataType): Boolean = (from, to) match {
     case (StringType, TimestampType | DateType) => true
-    case (DateType, StringType) => true
+    case (TimestampType | DateType, StringType) => true
     case (DateType, TimestampType) => true
-    case (TimestampType, StringType) => true
     case (TimestampType, DateType) => true
     case (ArrayType(fromType, _), ArrayType(toType, _)) => needsTimeZone(fromType, toType)
     case (MapType(fromKey, fromValue, _), MapType(toKey, toValue, _)) =>

From bdabf60fb4a61b0eef95144f2c54477a10ea849f Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 2 Nov 2020 10:10:24 -0800
Subject: [PATCH 0376/1009] [SPARK-33299][SQL][DOCS] Don't mention schemas in
 JSON format in docs for `from_json`

### What changes were proposed in this pull request?
Remove the JSON formatted schema from comments for `from_json()` in Scala/Python APIs.

Closes #30201

### Why are the changes needed?
Schemas in JSON format is internal (not documented). It shouldn't be recommenced for usage.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By linters.

Closes #30226 from MaxGekk/from_json-common-schema-parsing-2.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 python/pyspark/sql/functions.py                            | 3 +--
 .../src/main/scala/org/apache/spark/sql/functions.scala    | 7 ++-----
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 69fdf220f19fe..c349ae5cf46c4 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2847,8 +2847,7 @@ def from_json(col, schema, options={}):
     :param schema: a StructType or ArrayType of StructType to use when parsing the json column.
     :param options: options to control parsing. accepts the same options as the json datasource
 
-    .. note:: Since Spark 2.3, the DDL-formatted string or a JSON format string is also
-              supported for ``schema``.
+    .. note:: Since Spark 2.3, the DDL-formatted string is also supported for ``schema``.
 
     >>> from pyspark.sql.types import *
     >>> data = [(1, '''{"a": 1}''')]
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index ffa97c20c397c..6bb9f7871edf2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -4077,9 +4077,7 @@ object functions {
    * Returns `null`, in the case of an unparseable string.
    *
    * @param e a string column containing JSON data.
-   * @param schema the schema to use when parsing the json string as a json string. In Spark 2.1,
-   *               the user-provided schema has to be in JSON format. Since Spark 2.2, the DDL
-   *               format is also supported for the schema.
+   * @param schema the schema as a DDL-formatted string.
    *
    * @group collection_funcs
    * @since 2.1.0
@@ -4094,8 +4092,7 @@ object functions {
    * Returns `null`, in the case of an unparseable string.
    *
    * @param e a string column containing JSON data.
-   * @param schema the schema to use when parsing the json string as a json string, it could be a
-   *               JSON format string or a DDL-formatted string.
+   * @param schema the schema as a DDL-formatted string.
    *
    * @group collection_funcs
    * @since 2.3.0

From 3959f0d9879fa7fa9e8f2e8ed8c8b12003d21788 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Tue, 3 Nov 2020 10:00:49 +0900
Subject: [PATCH 0377/1009] [SPARK-33250][PYTHON][DOCS] Migration to NumPy
 documentation style in SQL (pyspark.sql.*)

### What changes were proposed in this pull request?

This PR proposes to migrate to [NumPy documentation style](https://numpydoc.readthedocs.io/en/latest/format.html), see also SPARK-33243.
While I am migrating, I also fixed some Python type hints accordingly.

### Why are the changes needed?

For better documentation as text itself, and generated HTMLs

### Does this PR introduce _any_ user-facing change?

Yes, they will see a better format of HTMLs, and better text format. See SPARK-33243.

### How was this patch tested?

Manually tested via running `./dev/lint-python`.

Closes #30181 from HyukjinKwon/SPARK-33250.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/sql/avro/functions.py     |   45 +-
 python/pyspark/sql/catalog.py            |   43 +-
 python/pyspark/sql/column.py             |  233 +++-
 python/pyspark/sql/context.py            |  272 ++--
 python/pyspark/sql/dataframe.py          |  842 ++++++++---
 python/pyspark/sql/dataframe.pyi         |   14 +-
 python/pyspark/sql/functions.py          | 1631 ++++++++++++++++------
 python/pyspark/sql/functions.pyi         |    2 +-
 python/pyspark/sql/group.py              |   80 +-
 python/pyspark/sql/pandas/conversion.py  |   35 +-
 python/pyspark/sql/pandas/functions.py   |   73 +-
 python/pyspark/sql/pandas/group_ops.py   |  107 +-
 python/pyspark/sql/pandas/map_ops.py     |   22 +-
 python/pyspark/sql/pandas/serializers.py |   22 +-
 python/pyspark/sql/pandas/types.py       |   68 +-
 python/pyspark/sql/readwriter.py         | 1056 ++++++++------
 python/pyspark/sql/readwriter.pyi        |   34 +-
 python/pyspark/sql/session.py            |  265 +++-
 python/pyspark/sql/streaming.py          |  845 ++++++-----
 python/pyspark/sql/streaming.pyi         |   17 +-
 python/pyspark/sql/types.py              |  179 ++-
 python/pyspark/sql/udf.py                |   70 +-
 python/pyspark/sql/utils.py              |   12 +-
 python/pyspark/sql/window.py             |  116 +-
 python/pyspark/sql/window.pyi            |    9 +-
 25 files changed, 4240 insertions(+), 1852 deletions(-)

diff --git a/python/pyspark/sql/avro/functions.py b/python/pyspark/sql/avro/functions.py
index 75fe4eaa078a1..ce322814e34f8 100644
--- a/python/pyspark/sql/avro/functions.py
+++ b/python/pyspark/sql/avro/functions.py
@@ -20,12 +20,11 @@
 """
 
 
-from pyspark import since, SparkContext
+from pyspark import SparkContext
 from pyspark.sql.column import Column, _to_java_column
 from pyspark.util import _print_missing_jar
 
 
-@since(3.0)
 def from_avro(data, jsonFormatSchema, options={}):
     """
     Converts a binary column of Avro format into its corresponding catalyst value.
@@ -34,13 +33,24 @@ def from_avro(data, jsonFormatSchema, options={}):
     To deserialize the data with a compatible and evolved schema, the expected Avro schema can be
     set via the option avroSchema.
 
-    Note: Avro is built-in but external data source module since Spark 2.4. Please deploy the
-    application as per the deployment section of "Apache Avro Data Source Guide".
+    .. versionadded:: 3.0.0
+
+    Parameters
+    ----------
+    data : :class:`Column` or str
+        the binary column.
+    jsonFormatSchema : str
+        the avro schema in JSON string format.
+    options : dict, optional
+        options to control how the Avro record is parsed.
 
-    :param data: the binary column.
-    :param jsonFormatSchema: the avro schema in JSON string format.
-    :param options: options to control how the Avro record is parsed.
+    Notes
+    -----
+    Avro is built-in but external data source module since Spark 2.4. Please deploy the
+    application as per the deployment section of "Apache Avro Data Source Guide".
 
+    Examples
+    --------
     >>> from pyspark.sql import Row
     >>> from pyspark.sql.avro.functions import from_avro, to_avro
     >>> data = [(1, Row(age=2, name='Alice'))]
@@ -48,6 +58,7 @@ def from_avro(data, jsonFormatSchema, options={}):
     >>> avroDf = df.select(to_avro(df.value).alias("avro"))
     >>> avroDf.collect()
     [Row(avro=bytearray(b'\\x00\\x00\\x04\\x00\\nAlice'))]
+
     >>> jsonFormatSchema = '''{"type":"record","name":"topLevelRecord","fields":
     ...     [{"name":"avro","type":[{"type":"record","name":"value","namespace":"topLevelRecord",
     ...     "fields":[{"name":"age","type":["long","null"]},
@@ -67,23 +78,33 @@ def from_avro(data, jsonFormatSchema, options={}):
     return Column(jc)
 
 
-@since(3.0)
 def to_avro(data, jsonFormatSchema=""):
     """
     Converts a column into binary of avro format.
 
-    Note: Avro is built-in but external data source module since Spark 2.4. Please deploy the
-    application as per the deployment section of "Apache Avro Data Source Guide".
+    .. versionadded:: 3.0.0
+
+    Parameters
+    ----------
+    data : :class:`Column` or str
+        the data column.
+    jsonFormatSchema : str, optional
+        user-specified output avro schema in JSON string format.
 
-    :param data: the data column.
-    :param jsonFormatSchema: user-specified output avro schema in JSON string format.
+    Notes
+    -----
+    Avro is built-in but external data source module since Spark 2.4. Please deploy the
+    application as per the deployment section of "Apache Avro Data Source Guide".
 
+    Examples
+    --------
     >>> from pyspark.sql import Row
     >>> from pyspark.sql.avro.functions import to_avro
     >>> data = ['SPADES']
     >>> df = spark.createDataFrame(data, "string")
     >>> df.select(to_avro(df.value).alias("suite")).collect()
     [Row(suite=bytearray(b'\\x00\\x0cSPADES'))]
+
     >>> jsonFormatSchema = '''["null", {"type": "enum", "name": "value",
     ...     "symbols": ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"]}]'''
     >>> df.select(to_avro(df.value, jsonFormatSchema).alias("suite")).collect()
diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py
index 44e321c557e3d..70d68a04a473c 100644
--- a/python/pyspark/sql/catalog.py
+++ b/python/pyspark/sql/catalog.py
@@ -106,13 +106,16 @@ def listFunctions(self, dbName=None):
                 isTemporary=jfunction.isTemporary()))
         return functions
 
-    @since(2.0)
     def listColumns(self, tableName, dbName=None):
         """Returns a list of columns for the given table/view in the specified database.
 
         If no database is specified, the current database is used.
 
-        Note: the order of arguments here is different from that of its JVM counterpart
+       .. versionadded:: 2.0.0
+
+        Notes
+        -----
+        the order of arguments here is different from that of its JVM counterpart
         because Python does not support method overloading.
         """
         if dbName is None:
@@ -130,7 +133,6 @@ def listColumns(self, tableName, dbName=None):
                 isBucket=jcolumn.isBucket()))
         return columns
 
-    @since(2.0)
     def createExternalTable(self, tableName, path=None, source=None, schema=None, **options):
         """Creates a table based on the dataset in a data source.
 
@@ -143,14 +145,17 @@ def createExternalTable(self, tableName, path=None, source=None, schema=None, **
         Optionally, a schema can be provided as the schema of the returned :class:`DataFrame` and
         created external table.
 
-        :return: :class:`DataFrame`
+        .. versionadded:: 2.0.0
+
+        Returns
+        -------
+        :class:`DataFrame`
         """
         warnings.warn(
             "createExternalTable is deprecated since Spark 2.2, please use createTable instead.",
             DeprecationWarning)
         return self.createTable(tableName, path, source, schema, **options)
 
-    @since(2.2)
     def createTable(
             self, tableName, path=None, source=None, schema=None, description=None, **options):
         """Creates a table based on the dataset in a data source.
@@ -165,10 +170,14 @@ def createTable(
         Optionally, a schema can be provided as the schema of the returned :class:`DataFrame` and
         created table.
 
+        .. versionadded:: 2.2.0
+
+        Returns
+        -------
+        :class:`DataFrame`
+
         .. versionchanged:: 3.1
            Added the ``description`` parameter.
-
-        :return: :class:`DataFrame`
         """
         if path is not None:
             options["path"] = path
@@ -186,15 +195,20 @@ def createTable(
                 tableName, source, scala_datatype, description, options)
         return DataFrame(df, self._sparkSession._wrapped)
 
-    @since(2.0)
     def dropTempView(self, viewName):
         """Drops the local temporary view with the given view name in the catalog.
         If the view has been cached before, then it will also be uncached.
         Returns true if this view is dropped successfully, false otherwise.
 
-        Note that, the return type of this method was None in Spark 2.0, but changed to Boolean
+        .. versionadded:: 2.0.0
+
+        Notes
+        -----
+        The return type of this method was None in Spark 2.0, but changed to Boolean
         in Spark 2.1.
 
+        Examples
+        --------
         >>> spark.createDataFrame([(1, 1)]).createTempView("my_table")
         >>> spark.table("my_table").collect()
         [Row(_1=1, _2=1)]
@@ -206,12 +220,15 @@ def dropTempView(self, viewName):
         """
         self._jcatalog.dropTempView(viewName)
 
-    @since(2.1)
     def dropGlobalTempView(self, viewName):
         """Drops the global temporary view with the given view name in the catalog.
         If the view has been cached before, then it will also be uncached.
         Returns true if this view is dropped successfully, false otherwise.
 
+        .. versionadded:: 2.1.0
+
+        Examples
+        --------
         >>> spark.createDataFrame([(1, 1)]).createGlobalTempView("my_table")
         >>> spark.table("global_temp.my_table").collect()
         [Row(_1=1, _2=1)]
@@ -223,12 +240,14 @@ def dropGlobalTempView(self, viewName):
         """
         self._jcatalog.dropGlobalTempView(viewName)
 
-    @since(2.0)
     def registerFunction(self, name, f, returnType=None):
         """An alias for :func:`spark.udf.register`.
         See :meth:`pyspark.sql.UDFRegistration.register`.
 
-        .. note:: Deprecated in 2.3.0. Use :func:`spark.udf.register` instead.
+        .. versionadded:: 2.0.0
+
+        .. deprecated:: 2.3.0
+            Use :func:`spark.udf.register` instead.
         """
         warnings.warn(
             "Deprecated in 2.3.0. Use spark.udf.register instead.",
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 3cf7a033641d8..3dd08d88e92c4 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -19,7 +19,7 @@
 import json
 import warnings
 
-from pyspark import copy_func, since
+from pyspark import copy_func
 from pyspark.context import SparkContext
 from pyspark.sql.types import DataType, StructField, StructType, IntegerType, StringType
 
@@ -141,7 +141,7 @@ class Column(object):
         df.colName + 1
         1 / df.colName
 
-    .. versionadded:: 1.3
+    .. versionadded:: 1.3.0
     """
 
     def __init__(self, jc):
@@ -175,8 +175,15 @@ def __init__(self, jc):
     _eqNullSafe_doc = """
     Equality test that is safe for null values.
 
-    :param other: a value or :class:`Column`
+    .. versionadded:: 2.3.0
+
+    Parameters
+    ----------
+    other
+        a value or :class:`Column`
 
+    Examples
+    --------
     >>> from pyspark.sql import Row
     >>> df1 = spark.createDataFrame([
     ...     Row(id=1, value='foo'),
@@ -219,11 +226,11 @@ def __init__(self, jc):
     |            true|          false|           false|
     +----------------+---------------+----------------+
 
-    .. note:: Unlike Pandas, PySpark doesn't consider NaN values to be NULL.
-       See the `NaN Semantics`_ for details.
-    .. _NaN Semantics:
-       https://spark.apache.org/docs/latest/sql-programming-guide.html#nan-semantics
-    .. versionadded:: 2.3.0
+    Notes
+    -----
+    Unlike Pandas, PySpark doesn't consider NaN values to be NULL. See the
+    `NaN Semantics <https://spark.apache.org/docs/latest/sql-ref-datatypes.html#nan-semantics>`_
+    for details.
     """
     eqNullSafe = _bin_op("eqNullSafe", _eqNullSafe_doc)
 
@@ -244,9 +251,14 @@ def __contains__(self, item):
     _bitwiseOR_doc = """
     Compute bitwise OR of this expression with another expression.
 
-    :param other: a value or :class:`Column` to calculate bitwise or(|) against
-                  this :class:`Column`.
+    Parameters
+    ----------
+    other
+        a value or :class:`Column` to calculate bitwise or(|) with
+        this :class:`Column`.
 
+    Examples
+    --------
     >>> from pyspark.sql import Row
     >>> df = spark.createDataFrame([Row(a=170, b=75)])
     >>> df.select(df.a.bitwiseOR(df.b)).collect()
@@ -255,9 +267,14 @@ def __contains__(self, item):
     _bitwiseAND_doc = """
     Compute bitwise AND of this expression with another expression.
 
-    :param other: a value or :class:`Column` to calculate bitwise and(&) against
-                  this :class:`Column`.
+    Parameters
+    ----------
+    other
+        a value or :class:`Column` to calculate bitwise and(&) with
+        this :class:`Column`.
 
+    Examples
+    --------
     >>> from pyspark.sql import Row
     >>> df = spark.createDataFrame([Row(a=170, b=75)])
     >>> df.select(df.a.bitwiseAND(df.b)).collect()
@@ -266,9 +283,14 @@ def __contains__(self, item):
     _bitwiseXOR_doc = """
     Compute bitwise XOR of this expression with another expression.
 
-    :param other: a value or :class:`Column` to calculate bitwise xor(^) against
-                  this :class:`Column`.
+    Parameters
+    ----------
+    other
+        a value or :class:`Column` to calculate bitwise xor(^) with
+        this :class:`Column`.
 
+    Examples
+    --------
     >>> from pyspark.sql import Row
     >>> df = spark.createDataFrame([Row(a=170, b=75)])
     >>> df.select(df.a.bitwiseXOR(df.b)).collect()
@@ -279,12 +301,15 @@ def __contains__(self, item):
     bitwiseAND = _bin_op("bitwiseAND", _bitwiseAND_doc)
     bitwiseXOR = _bin_op("bitwiseXOR", _bitwiseXOR_doc)
 
-    @since(1.3)
     def getItem(self, key):
         """
         An expression that gets an item at position ``ordinal`` out of a list,
         or gets an item by key out of a dict.
 
+        .. versionadded:: 1.3.0
+
+        Examples
+        --------
         >>> df = spark.createDataFrame([([1, 2], {"key": "value"})], ["l", "d"])
         >>> df.select(df.l.getItem(0), df.d.getItem("key")).show()
         +----+------+
@@ -301,11 +326,14 @@ def getItem(self, key):
                 DeprecationWarning)
         return self[key]
 
-    @since(1.3)
     def getField(self, name):
         """
         An expression that gets a field by name in a StructField.
 
+        .. versionadded:: 1.3.0
+
+        Examples
+        --------
         >>> from pyspark.sql import Row
         >>> df = spark.createDataFrame([Row(r=Row(a=1, b="b"))])
         >>> df.select(df.r.getField("b")).show()
@@ -329,11 +357,14 @@ def getField(self, name):
                 DeprecationWarning)
         return self[name]
 
-    @since(3.1)
     def withField(self, fieldName, col):
         """
         An expression that adds/replaces a field in :class:`StructType` by name.
 
+        .. versionadded:: 3.1.0
+
+        Examples
+        --------
         >>> from pyspark.sql import Row
         >>> from pyspark.sql.functions import lit
         >>> df = spark.createDataFrame([Row(a=Row(b=1, c=2))])
@@ -358,11 +389,14 @@ def withField(self, fieldName, col):
 
         return Column(self._jc.withField(fieldName, col._jc))
 
-    @since(3.1)
     def dropFields(self, *fieldNames):
         """
         An expression that drops fields in :class:`StructType` by name.
 
+        .. versionadded:: 3.1.0
+
+        Examples
+        --------
         >>> from pyspark.sql import Row
         >>> from pyspark.sql.functions import col, lit
         >>> df = spark.createDataFrame([
@@ -429,8 +463,13 @@ def __iter__(self):
     _contains_doc = """
     Contains the other element. Returns a boolean :class:`Column` based on a string match.
 
-    :param other: string in line
+    Parameters
+    ----------
+    other
+        string in line. A value as a literal or a :class:`Column`.
 
+    Examples
+    --------
     >>> df.filter(df.name.contains('o')).collect()
     [Row(age=5, name='Bob')]
     """
@@ -438,26 +477,43 @@ def __iter__(self):
     SQL RLIKE expression (LIKE with Regex). Returns a boolean :class:`Column` based on a regex
     match.
 
-    :param other: an extended regex expression
+    Parameters
+    ----------
+    other : str
+        an extended regex expression
 
+    Examples
+    --------
     >>> df.filter(df.name.rlike('ice$')).collect()
     [Row(age=2, name='Alice')]
     """
     _like_doc = """
     SQL like expression. Returns a boolean :class:`Column` based on a SQL LIKE match.
 
-    :param other: a SQL LIKE pattern
+    Parameters
+    ----------
+    other : str
+        a SQL LIKE pattern
 
-    See :func:`rlike` for a regex version
+    See Also
+    --------
+    pyspark.sql.Column.rlike
 
+    Examples
+    --------
     >>> df.filter(df.name.like('Al%')).collect()
     [Row(age=2, name='Alice')]
     """
     _startswith_doc = """
     String starts with. Returns a boolean :class:`Column` based on a string match.
 
-    :param other: string at start of line (do not use a regex `^`)
+    Parameters
+    ----------
+    other : :class:`Column` or str
+        string at start of line (do not use a regex `^`)
 
+    Examples
+    --------
     >>> df.filter(df.name.startswith('Al')).collect()
     [Row(age=2, name='Alice')]
     >>> df.filter(df.name.startswith('^Al')).collect()
@@ -466,8 +522,13 @@ def __iter__(self):
     _endswith_doc = """
     String ends with. Returns a boolean :class:`Column` based on a string match.
 
-    :param other: string at end of line (do not use a regex `$`)
+    Parameters
+    ----------
+    other : :class:`Column` or str
+        string at end of line (do not use a regex `$`)
 
+    Examples
+    --------
     >>> df.filter(df.name.endswith('ice')).collect()
     [Row(age=2, name='Alice')]
     >>> df.filter(df.name.endswith('ice$')).collect()
@@ -480,14 +541,21 @@ def __iter__(self):
     startswith = _bin_op("startsWith", _startswith_doc)
     endswith = _bin_op("endsWith", _endswith_doc)
 
-    @since(1.3)
     def substr(self, startPos, length):
         """
         Return a :class:`Column` which is a substring of the column.
 
-        :param startPos: start position (int or Column)
-        :param length:  length of the substring (int or Column)
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        startPos : :class:`Column` or int
+            start position
+        length : :class:`Column` or int
+            length of the substring
 
+        Examples
+        --------
         >>> df.select(df.name.substr(1, 3).alias("col")).collect()
         [Row(col='Ali'), Row(col='Bob')]
         """
@@ -507,12 +575,15 @@ def substr(self, startPos, length):
             raise TypeError("Unexpected type: %s" % type(startPos))
         return Column(jc)
 
-    @since(1.5)
     def isin(self, *cols):
         """
         A boolean expression that is evaluated to true if the value of this
         expression is contained by the evaluated values of the arguments.
 
+        .. versionadded:: 1.5.0
+
+        Examples
+        --------
         >>> df[df.name.isin("Bob", "Mike")].collect()
         [Row(age=5, name='Bob')]
         >>> df[df.age.isin([1, 2, 3])].collect()
@@ -529,6 +600,8 @@ def isin(self, *cols):
     _asc_doc = """
     Returns a sort expression based on ascending order of the column.
 
+    Examples
+    --------
     >>> from pyspark.sql import Row
     >>> df = spark.createDataFrame([('Tom', 80), ('Alice', None)], ["name", "height"])
     >>> df.select(df.name).orderBy(df.name.asc()).collect()
@@ -538,27 +611,37 @@ def isin(self, *cols):
     Returns a sort expression based on ascending order of the column, and null values
     return before non-null values.
 
+    .. versionadded:: 2.4.0
+
+    Examples
+    --------
     >>> from pyspark.sql import Row
     >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
     >>> df.select(df.name).orderBy(df.name.asc_nulls_first()).collect()
     [Row(name=None), Row(name='Alice'), Row(name='Tom')]
 
-    .. versionadded:: 2.4
     """
     _asc_nulls_last_doc = """
     Returns a sort expression based on ascending order of the column, and null values
     appear after non-null values.
 
+    .. versionadded:: 2.4.0
+
+    Examples
+    --------
     >>> from pyspark.sql import Row
     >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
     >>> df.select(df.name).orderBy(df.name.asc_nulls_last()).collect()
     [Row(name='Alice'), Row(name='Tom'), Row(name=None)]
 
-    .. versionadded:: 2.4
     """
     _desc_doc = """
     Returns a sort expression based on the descending order of the column.
 
+    .. versionadded:: 2.4.0
+
+    Examples
+    --------
     >>> from pyspark.sql import Row
     >>> df = spark.createDataFrame([('Tom', 80), ('Alice', None)], ["name", "height"])
     >>> df.select(df.name).orderBy(df.name.desc()).collect()
@@ -568,23 +651,28 @@ def isin(self, *cols):
     Returns a sort expression based on the descending order of the column, and null values
     appear before non-null values.
 
+    .. versionadded:: 2.4.0
+
+    Examples
+    --------
     >>> from pyspark.sql import Row
     >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
     >>> df.select(df.name).orderBy(df.name.desc_nulls_first()).collect()
     [Row(name=None), Row(name='Tom'), Row(name='Alice')]
 
-    .. versionadded:: 2.4
     """
     _desc_nulls_last_doc = """
     Returns a sort expression based on the descending order of the column, and null values
     appear after non-null values.
 
+    .. versionadded:: 2.4.0
+
+    Examples
+    --------
     >>> from pyspark.sql import Row
     >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
     >>> df.select(df.name).orderBy(df.name.desc_nulls_last()).collect()
     [Row(name='Tom'), Row(name='Alice'), Row(name=None)]
-
-    .. versionadded:: 2.4
     """
 
     asc = _unary_op("asc", _asc_doc)
@@ -597,6 +685,8 @@ def isin(self, *cols):
     _isNull_doc = """
     True if the current expression is null.
 
+    Examples
+    --------
     >>> from pyspark.sql import Row
     >>> df = spark.createDataFrame([Row(name='Tom', height=80), Row(name='Alice', height=None)])
     >>> df.filter(df.height.isNull()).collect()
@@ -605,6 +695,8 @@ def isin(self, *cols):
     _isNotNull_doc = """
     True if the current expression is NOT null.
 
+    Examples
+    --------
     >>> from pyspark.sql import Row
     >>> df = spark.createDataFrame([Row(name='Tom', height=80), Row(name='Alice', height=None)])
     >>> df.filter(df.height.isNotNull()).collect()
@@ -614,20 +706,30 @@ def isin(self, *cols):
     isNull = _unary_op("isNull", _isNull_doc)
     isNotNull = _unary_op("isNotNull", _isNotNull_doc)
 
-    @since(1.3)
     def alias(self, *alias, **kwargs):
         """
         Returns this column aliased with a new name or names (in the case of expressions that
         return more than one column, such as explode).
 
-        :param alias: strings of desired column names (collects all positional arguments passed)
-        :param metadata: a dict of information to be stored in ``metadata`` attribute of the
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        alias : str
+            desired column names (collects all positional arguments passed)
+
+        Other Parameters
+        ----------------
+        metadata: dict
+            a dict of information to be stored in ``metadata`` attribute of the
             corresponding :class:`StructField <pyspark.sql.types.StructField>` (optional, keyword
             only argument)
 
-        .. versionchanged:: 2.2
-           Added optional ``metadata`` argument.
+            .. versionchanged:: 2.2.0
+               Added optional ``metadata`` argument.
 
+        Examples
+        --------
         >>> df.select(df.age.alias("age2")).collect()
         [Row(age2=2), Row(age2=5)]
         >>> df.select(df.age.alias("age3", metadata={'max': 99})).schema['age3'].metadata['max']
@@ -652,10 +754,13 @@ def alias(self, *alias, **kwargs):
 
     name = copy_func(alias, sinceversion=2.0, doc=":func:`name` is an alias for :func:`alias`.")
 
-    @since(1.3)
     def cast(self, dataType):
         """ Convert the column into type ``dataType``.
 
+        .. versionadded:: 1.3.0
+
+        Examples
+        --------
         >>> df.select(df.age.cast("string").alias('ages')).collect()
         [Row(ages='2'), Row(ages='5')]
         >>> df.select(df.age.cast(StringType()).alias('ages')).collect()
@@ -674,12 +779,15 @@ def cast(self, dataType):
 
     astype = copy_func(cast, sinceversion=1.4, doc=":func:`astype` is an alias for :func:`cast`.")
 
-    @since(1.3)
     def between(self, lowerBound, upperBound):
         """
         A boolean expression that is evaluated to true if the value of this
         expression is between the given columns.
 
+        .. versionadded:: 1.3.0
+
+        Examples
+        --------
         >>> df.select(df.name, df.age.between(2, 4)).show()
         +-----+---------------------------+
         | name|((age >= 2) AND (age <= 4))|
@@ -690,17 +798,22 @@ def between(self, lowerBound, upperBound):
         """
         return (self >= lowerBound) & (self <= upperBound)
 
-    @since(1.4)
     def when(self, condition, value):
         """
         Evaluates a list of conditions and returns one of multiple possible result expressions.
         If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions.
 
-        See :func:`pyspark.sql.functions.when` for example usage.
+        .. versionadded:: 1.4.0
 
-        :param condition: a boolean :class:`Column` expression.
-        :param value: a literal value, or a :class:`Column` expression.
+        Parameters
+        ----------
+        condition : :class:`Column`
+            a boolean :class:`Column` expression.
+        value
+            a literal value, or a :class:`Column` expression.
 
+        Examples
+        --------
         >>> from pyspark.sql import functions as F
         >>> df.select(df.name, F.when(df.age > 4, 1).when(df.age < 3, -1).otherwise(0)).show()
         +-----+------------------------------------------------------------+
@@ -709,6 +822,10 @@ def when(self, condition, value):
         |Alice|                                                          -1|
         |  Bob|                                                           1|
         +-----+------------------------------------------------------------+
+
+        See Also
+        --------
+        pyspark.sql.functions.when
         """
         if not isinstance(condition, Column):
             raise TypeError("condition should be a Column")
@@ -716,16 +833,20 @@ def when(self, condition, value):
         jc = self._jc.when(condition._jc, v)
         return Column(jc)
 
-    @since(1.4)
     def otherwise(self, value):
         """
         Evaluates a list of conditions and returns one of multiple possible result expressions.
         If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions.
 
-        See :func:`pyspark.sql.functions.when` for example usage.
+        .. versionadded:: 1.4.0
 
-        :param value: a literal value, or a :class:`Column` expression.
+        Parameters
+        ----------
+        value
+            a literal value, or a :class:`Column` expression.
 
+        Examples
+        --------
         >>> from pyspark.sql import functions as F
         >>> df.select(df.name, F.when(df.age > 3, 1).otherwise(0)).show()
         +-----+-------------------------------------+
@@ -734,19 +855,31 @@ def otherwise(self, value):
         |Alice|                                    0|
         |  Bob|                                    1|
         +-----+-------------------------------------+
+
+        See Also
+        --------
+        pyspark.sql.functions.when
         """
         v = value._jc if isinstance(value, Column) else value
         jc = self._jc.otherwise(v)
         return Column(jc)
 
-    @since(1.4)
     def over(self, window):
         """
         Define a windowing column.
 
-        :param window: a :class:`WindowSpec`
-        :return: a Column
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        window : :class:`WindowSpec`
+
+        Returns
+        -------
+        :class:`Column`
 
+        Examples
+        --------
         >>> from pyspark.sql import Window
         >>> window = Window.partitionBy("name").orderBy("age") \
                 .rowsBetween(Window.unboundedPreceding, Window.currentRow)
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index 937d44ac5ecbc..5071240a511a6 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -38,34 +38,41 @@ class SQLContext(object):
     A SQLContext can be used create :class:`DataFrame`, register :class:`DataFrame` as
     tables, execute SQL over tables, cache tables, and read parquet files.
 
-    :param sparkContext: The :class:`SparkContext` backing this SQLContext.
-    :param sparkSession: The :class:`SparkSession` around which this SQLContext wraps.
-    :param jsqlContext: An optional JVM Scala SQLContext. If set, we do not instantiate a new
+    .. deprecated:: 3.0.0
+        Use :func:`SparkSession.builder.getOrCreate()` instead.
+
+    Parameters
+    ----------
+    sparkContext : :class:`SparkContext`
+        The :class:`SparkContext` backing this SQLContext.
+    sparkSession : :class:`SparkSession`
+        The :class:`SparkSession` around which this SQLContext wraps.
+    jsqlContext : optional
+        An optional JVM Scala SQLContext. If set, we do not instantiate a new
         SQLContext in the JVM, instead we make all calls to this object.
+        This is only for internal.
+
+    Examples
+    --------
+    >>> from datetime import datetime
+    >>> from pyspark.sql import Row
+    >>> sqlContext = SQLContext(sc)
+    >>> allTypes = sc.parallelize([Row(i=1, s="string", d=1.0, l=1,
+    ...     b=True, list=[1, 2, 3], dict={"s": 0}, row=Row(a=1),
+    ...     time=datetime(2014, 8, 1, 14, 1, 5))])
+    >>> df = allTypes.toDF()
+    >>> df.createOrReplaceTempView("allTypes")
+    >>> sqlContext.sql('select i+1, d+1, not b, list[1], dict["s"], time, row.a '
+    ...            'from allTypes where b and i > 0').collect()
+    [Row((i + CAST(1 AS BIGINT))=2, (d + CAST(1 AS DOUBLE))=2.0, (NOT b)=False, list[1]=2, \
+        dict[s]=0, time=datetime.datetime(2014, 8, 1, 14, 1, 5), a=1)]
+    >>> df.rdd.map(lambda x: (x.i, x.s, x.d, x.l, x.b, x.time, x.row.a, x.list)).collect()
+    [(1, 'string', 1.0, 1, True, datetime.datetime(2014, 8, 1, 14, 1, 5), 1, [1, 2, 3])]
     """
 
     _instantiatedContext = None
 
     def __init__(self, sparkContext, sparkSession=None, jsqlContext=None):
-        """Creates a new SQLContext.
-
-        .. note:: Deprecated in 3.0.0. Use :func:`SparkSession.builder.getOrCreate()` instead.
-
-        >>> from datetime import datetime
-        >>> from pyspark.sql import Row
-        >>> sqlContext = SQLContext(sc)
-        >>> allTypes = sc.parallelize([Row(i=1, s="string", d=1.0, l=1,
-        ...     b=True, list=[1, 2, 3], dict={"s": 0}, row=Row(a=1),
-        ...     time=datetime(2014, 8, 1, 14, 1, 5))])
-        >>> df = allTypes.toDF()
-        >>> df.createOrReplaceTempView("allTypes")
-        >>> sqlContext.sql('select i+1, d+1, not b, list[1], dict["s"], time, row.a '
-        ...            'from allTypes where b and i > 0').collect()
-        [Row((i + CAST(1 AS BIGINT))=2, (d + CAST(1 AS DOUBLE))=2.0, (NOT b)=False, list[1]=2, \
-            dict[s]=0, time=datetime.datetime(2014, 8, 1, 14, 1, 5), a=1)]
-        >>> df.rdd.map(lambda x: (x.i, x.s, x.d, x.l, x.b, x.time, x.row.a, x.list)).collect()
-        [(1, 'string', 1.0, 1, True, datetime.datetime(2014, 8, 1, 14, 1, 5), 1, [1, 2, 3])]
-        """
         if sparkSession is None:
             warnings.warn(
                 "Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.",
@@ -101,14 +108,18 @@ def _conf(self):
         return self.sparkSession._jsparkSession.sessionState().conf()
 
     @classmethod
-    @since(1.6)
     def getOrCreate(cls, sc):
         """
         Get the existing SQLContext or create a new one with given SparkContext.
 
-        :param sc: SparkContext
+        .. versionadded:: 1.6.0
 
-        .. note:: Deprecated in 3.0.0. Use :func:`SparkSession.builder.getOrCreate()` instead.
+        .. deprecated:: 3.0.0
+            Use :func:`SparkSession.builder.getOrCreate()` instead.
+
+        Parameters
+        ----------
+        sc : :class:`SparkContext`
         """
         warnings.warn(
             "Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.",
@@ -122,22 +133,23 @@ def getOrCreate(cls, sc):
             cls(sc, sparkSession, jsqlContext)
         return cls._instantiatedContext
 
-    @since(1.6)
     def newSession(self):
         """
         Returns a new SQLContext as new session, that has separate SQLConf,
         registered temporary views and UDFs, but shared SparkContext and
         table cache.
+
+        .. versionadded:: 1.6.0
         """
         return self.__class__(self._sc, self.sparkSession.newSession())
 
-    @since(1.3)
     def setConf(self, key, value):
         """Sets the given Spark SQL configuration property.
+
+        .. versionadded:: 1.3.0
         """
         self.sparkSession.conf.set(key, value)
 
-    @since(1.3)
     def getConf(self, key, defaultValue=_NoValue):
         """Returns the value of Spark SQL configuration property for the given key.
 
@@ -145,6 +157,10 @@ def getConf(self, key, defaultValue=_NoValue):
         defaultValue. If the key is not set and defaultValue is not set, return
         the system default value.
 
+        .. versionadded:: 1.3.0
+
+        Examples
+        --------
         >>> sqlContext.getConf("spark.sql.shuffle.partitions")
         '200'
         >>> sqlContext.getConf("spark.sql.shuffle.partitions", "10")
@@ -156,27 +172,42 @@ def getConf(self, key, defaultValue=_NoValue):
         return self.sparkSession.conf.get(key, defaultValue)
 
     @property
-    @since("1.3.1")
     def udf(self):
         """Returns a :class:`UDFRegistration` for UDF registration.
 
-        :return: :class:`UDFRegistration`
+        .. versionadded:: 1.3.1
+
+        Returns
+        -------
+        :class:`UDFRegistration`
         """
         return self.sparkSession.udf
 
-    @since(1.4)
     def range(self, start, end=None, step=1, numPartitions=None):
         """
         Create a :class:`DataFrame` with single :class:`pyspark.sql.types.LongType` column named
         ``id``, containing elements in a range from ``start`` to ``end`` (exclusive) with
         step value ``step``.
 
-        :param start: the start value
-        :param end: the end value (exclusive)
-        :param step: the incremental step (default: 1)
-        :param numPartitions: the number of partitions of the DataFrame
-        :return: :class:`DataFrame`
-
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        start : int
+            the start value
+        end : int, optional
+            the end value (exclusive)
+        step : int, optional
+            the incremental step (default: 1)
+        numPartitions : int, optional
+            the number of partitions of the DataFrame
+
+        Returns
+        -------
+        :class:`DataFrame`
+
+        Examples
+        --------
         >>> sqlContext.range(1, 7, 2).collect()
         [Row(id=1), Row(id=3), Row(id=5)]
 
@@ -187,24 +218,28 @@ def range(self, start, end=None, step=1, numPartitions=None):
         """
         return self.sparkSession.range(start, end, step, numPartitions)
 
-    @since(1.2)
     def registerFunction(self, name, f, returnType=None):
         """An alias for :func:`spark.udf.register`.
         See :meth:`pyspark.sql.UDFRegistration.register`.
 
-        .. note:: Deprecated in 2.3.0. Use :func:`spark.udf.register` instead.
+        .. versionadded:: 1.2.0
+
+        .. deprecated:: 2.3.0
+            Use :func:`spark.udf.register` instead.
         """
         warnings.warn(
             "Deprecated in 2.3.0. Use spark.udf.register instead.",
             DeprecationWarning)
         return self.sparkSession.udf.register(name, f, returnType)
 
-    @since(2.1)
     def registerJavaFunction(self, name, javaClassName, returnType=None):
         """An alias for :func:`spark.udf.registerJavaFunction`.
         See :meth:`pyspark.sql.UDFRegistration.registerJavaFunction`.
 
-        .. note:: Deprecated in 2.3.0. Use :func:`spark.udf.registerJavaFunction` instead.
+        .. versionadded:: 2.1.0
+
+        .. deprecated:: 2.3.0
+            Use :func:`spark.udf.registerJavaFunction` instead.
         """
         warnings.warn(
             "Deprecated in 2.3.0. Use spark.udf.registerJavaFunction instead.",
@@ -216,13 +251,19 @@ def _inferSchema(self, rdd, samplingRatio=None):
         """
         Infer schema from an RDD of Row or tuple.
 
-        :param rdd: an RDD of Row or tuple
-        :param samplingRatio: sampling ratio, or no sampling (default)
-        :return: :class:`pyspark.sql.types.StructType`
+        Parameters
+        ----------
+        rdd : :class:`RDD`
+            an RDD of Row or tuple
+        samplingRatio : float, optional
+            sampling ratio, or no sampling (default)
+
+        Returns
+        -------
+        :class:`pyspark.sql.types.StructType`
         """
         return self.sparkSession._inferSchema(rdd, samplingRatio)
 
-    @since(1.3)
     def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=True):
         """
         Creates a :class:`DataFrame` from an :class:`RDD`, a list or a :class:`pandas.DataFrame`.
@@ -243,28 +284,41 @@ def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=Tr
         If schema inference is needed, ``samplingRatio`` is used to determined the ratio of
         rows used for schema inference. The first row will be used if ``samplingRatio`` is ``None``.
 
-        :param data: an RDD of any kind of SQL data representation(e.g. :class:`Row`,
+        .. versionadded:: 1.3.0
+
+        .. versionchanged:: 2.0.0
+           The ``schema`` parameter can be a :class:`pyspark.sql.types.DataType` or a
+           datatype string after 2.0.
+           If it's not a :class:`pyspark.sql.types.StructType`, it will be wrapped into a
+           :class:`pyspark.sql.types.StructType` and each record will also be wrapped into a tuple.
+
+        .. versionchanged:: 2.1.0
+           Added verifySchema.
+
+        Parameters
+        ----------
+        data : :class:`RDD` or iterable
+            an RDD of any kind of SQL data representation(e.g. :class:`Row`,
             :class:`tuple`, ``int``, ``boolean``, etc.), or :class:`list`, or
             :class:`pandas.DataFrame`.
-        :param schema: a :class:`pyspark.sql.types.DataType` or a datatype string or a list of
+        schema : :class:`pyspark.sql.types.DataType`, str or list, optional
+            a :class:`pyspark.sql.types.DataType` or a datatype string or a list of
             column names, default is None.  The data type string format equals to
             :class:`pyspark.sql.types.DataType.simpleString`, except that top level struct type can
             omit the ``struct<>`` and atomic types use ``typeName()`` as their format, e.g. use
             ``byte`` instead of ``tinyint`` for :class:`pyspark.sql.types.ByteType`.
             We can also use ``int`` as a short name for :class:`pyspark.sql.types.IntegerType`.
-        :param samplingRatio: the sample ratio of rows used for inferring
-        :param verifySchema: verify data types of every row against schema.
-        :return: :class:`DataFrame`
+        samplingRatio : float, optional
+            the sample ratio of rows used for inferring
+        verifySchema : bool, optional
+            verify data types of every row against schema. Enabled by default.
 
-        .. versionchanged:: 2.0
-           The ``schema`` parameter can be a :class:`pyspark.sql.types.DataType` or a
-           datatype string after 2.0.
-           If it's not a :class:`pyspark.sql.types.StructType`, it will be wrapped into a
-           :class:`pyspark.sql.types.StructType` and each record will also be wrapped into a tuple.
-
-        .. versionchanged:: 2.1
-           Added verifySchema.
+        Returns
+        -------
+        :class:`DataFrame`
 
+        Examples
+        --------
         >>> l = [('Alice', 1)]
         >>> sqlContext.createDataFrame(l).collect()
         [Row(_1='Alice', _2=1)]
@@ -314,26 +368,31 @@ def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=Tr
         """
         return self.sparkSession.createDataFrame(data, schema, samplingRatio, verifySchema)
 
-    @since(1.3)
     def registerDataFrameAsTable(self, df, tableName):
         """Registers the given :class:`DataFrame` as a temporary table in the catalog.
 
         Temporary tables exist only during the lifetime of this instance of :class:`SQLContext`.
 
+        .. versionadded:: 1.3.0
+
+        Examples
+        --------
         >>> sqlContext.registerDataFrameAsTable(df, "table1")
         """
         df.createOrReplaceTempView(tableName)
 
-    @since(1.6)
     def dropTempTable(self, tableName):
         """ Remove the temporary table from catalog.
 
+        .. versionadded:: 1.6.0
+
+        Examples
+        --------
         >>> sqlContext.registerDataFrameAsTable(df, "table1")
         >>> sqlContext.dropTempTable("table1")
         """
         self.sparkSession.catalog.dropTempView(tableName)
 
-    @since(1.3)
     def createExternalTable(self, tableName, path=None, source=None, schema=None, **options):
         """Creates an external table based on the dataset in a data source.
 
@@ -346,17 +405,26 @@ def createExternalTable(self, tableName, path=None, source=None, schema=None, **
         Optionally, a schema can be provided as the schema of the returned :class:`DataFrame` and
         created external table.
 
-        :return: :class:`DataFrame`
+        .. versionadded:: 1.3.0
+
+        Returns
+        -------
+        :class:`DataFrame`
         """
         return self.sparkSession.catalog.createExternalTable(
             tableName, path, source, schema, **options)
 
-    @since(1.0)
     def sql(self, sqlQuery):
         """Returns a :class:`DataFrame` representing the result of the given query.
 
-        :return: :class:`DataFrame`
+        .. versionadded:: 1.0.0
+
+        Returns
+        -------
+        :class:`DataFrame`
 
+        Examples
+        --------
         >>> sqlContext.registerDataFrameAsTable(df, "table1")
         >>> df2 = sqlContext.sql("SELECT field1 AS f1, field2 as f2 from table1")
         >>> df2.collect()
@@ -364,12 +432,17 @@ def sql(self, sqlQuery):
         """
         return self.sparkSession.sql(sqlQuery)
 
-    @since(1.0)
     def table(self, tableName):
         """Returns the specified table or view as a :class:`DataFrame`.
 
-        :return: :class:`DataFrame`
+        .. versionadded:: 1.0.0
+
+        Returns
+        -------
+        :class:`DataFrame`
 
+        Examples
+        --------
         >>> sqlContext.registerDataFrameAsTable(df, "table1")
         >>> df2 = sqlContext.table("table1")
         >>> sorted(df.collect()) == sorted(df2.collect())
@@ -377,7 +450,6 @@ def table(self, tableName):
         """
         return self.sparkSession.table(tableName)
 
-    @since(1.3)
     def tables(self, dbName=None):
         """Returns a :class:`DataFrame` containing names of tables in the given database.
 
@@ -386,9 +458,19 @@ def tables(self, dbName=None):
         The returned DataFrame has two columns: ``tableName`` and ``isTemporary``
         (a column with :class:`BooleanType` indicating if a table is a temporary one or not).
 
-        :param dbName: string, name of the database to use.
-        :return: :class:`DataFrame`
+        .. versionadded:: 1.3.0
 
+        Parameters
+        ----------
+        dbName: str, optional
+            name of the database to use.
+
+        Returns
+        -------
+        :class:`DataFrame`
+
+        Examples
+        --------
         >>> sqlContext.registerDataFrameAsTable(df, "table1")
         >>> df2 = sqlContext.tables()
         >>> df2.filter("tableName = 'table1'").first()
@@ -399,12 +481,20 @@ def tables(self, dbName=None):
         else:
             return DataFrame(self._ssql_ctx.tables(dbName), self)
 
-    @since(1.3)
     def tableNames(self, dbName=None):
         """Returns a list of names of tables in the database ``dbName``.
 
-        :param dbName: string, name of the database to use. Default to the current database.
-        :return: list of table names, in string
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        dbName: str
+            name of the database to use. Default to the current database.
+
+        Returns
+        -------
+        list
+            list of table names, in string
 
         >>> sqlContext.registerDataFrameAsTable(df, "table1")
         >>> "table1" in sqlContext.tableNames()
@@ -433,26 +523,34 @@ def clearCache(self):
         self._ssql_ctx.clearCache()
 
     @property
-    @since(1.4)
     def read(self):
         """
         Returns a :class:`DataFrameReader` that can be used to read data
         in as a :class:`DataFrame`.
 
-        :return: :class:`DataFrameReader`
+        .. versionadded:: 1.4.0
+
+        Returns
+        -------
+        :class:`DataFrameReader`
         """
         return DataFrameReader(self)
 
     @property
-    @since(2.0)
     def readStream(self):
         """
         Returns a :class:`DataStreamReader` that can be used to read data streams
         as a streaming :class:`DataFrame`.
 
-        .. note:: Evolving.
+        .. versionadded:: 2.0.0
 
-        :return: :class:`DataStreamReader`
+        Notes
+        -----
+        This API is evolving.
+
+        Returns
+        -------
+        :class:`DataStreamReader`
 
         >>> text_sdf = sqlContext.readStream.text(tempfile.mkdtemp())
         >>> text_sdf.isStreaming
@@ -461,12 +559,15 @@ def readStream(self):
         return DataStreamReader(self)
 
     @property
-    @since(2.0)
     def streams(self):
         """Returns a :class:`StreamingQueryManager` that allows managing all the
         :class:`StreamingQuery` StreamingQueries active on `this` context.
 
-        .. note:: Evolving.
+        .. versionadded:: 2.0.0
+
+        Notes
+        -----
+        This API is evolving.
         """
         from pyspark.sql.streaming import StreamingQueryManager
         return StreamingQueryManager(self._ssql_ctx.streams())
@@ -478,11 +579,18 @@ class HiveContext(SQLContext):
     Configuration for Hive is read from ``hive-site.xml`` on the classpath.
     It supports running both SQL and HiveQL commands.
 
-    :param sparkContext: The SparkContext to wrap.
-    :param jhiveContext: An optional JVM Scala HiveContext. If set, we do not instantiate a new
+    .. deprecated:: 2.0.0
+        Use SparkSession.builder.enableHiveSupport().getOrCreate().
+
+    Parameters
+    ----------
+    sparkContext : :class:`SparkContext`
+        The SparkContext to wrap.
+    jhiveContext : optional
+        An optional JVM Scala HiveContext. If set, we do not instantiate a new
         :class:`HiveContext` in the JVM, instead we make all calls to this object.
+        This is only for internal use.
 
-    .. note:: Deprecated in 2.0.0. Use SparkSession.builder.enableHiveSupport().getOrCreate().
     """
 
     def __init__(self, sparkContext, jhiveContext=None):
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 487135cd2329a..9fae27a2d9c6c 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -62,7 +62,7 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
         people.filter(people.age > 30).join(department, people.deptId == department.id) \\
           .groupBy(department.name, "gender").agg({"salary": "avg", "age": "max"})
 
-    .. versionadded:: 1.3
+    .. versionadded:: 1.3.0
     """
 
     def __init__(self, jdf, sql_ctx):
@@ -100,38 +100,44 @@ def stat(self):
         """
         return DataFrameStatFunctions(self)
 
-    @since(1.3)
     def toJSON(self, use_unicode=True):
         """Converts a :class:`DataFrame` into a :class:`RDD` of string.
 
         Each row is turned into a JSON document as one element in the returned RDD.
 
+        .. versionadded:: 1.3.0
+
+        Examples
+        --------
         >>> df.toJSON().first()
         '{"age":2,"name":"Alice"}'
         """
         rdd = self._jdf.toJSON()
         return RDD(rdd.toJavaRDD(), self._sc, UTF8Deserializer(use_unicode))
 
-    @since(1.3)
     def registerTempTable(self, name):
         """Registers this DataFrame as a temporary table using the given name.
 
         The lifetime of this temporary table is tied to the :class:`SparkSession`
         that was used to create this :class:`DataFrame`.
 
+        .. versionadded:: 1.3.0
+
+        .. deprecated:: 2.0.0
+            Use :meth:`DataFrame.createOrReplaceTempView` instead.
+
+        Examples
+        --------
         >>> df.registerTempTable("people")
         >>> df2 = spark.sql("select * from people")
         >>> sorted(df.collect()) == sorted(df2.collect())
         True
         >>> spark.catalog.dropTempView("people")
-
-        .. note:: Deprecated in 2.0, use createOrReplaceTempView instead.
         """
         warnings.warn(
             "Deprecated in 2.0, use createOrReplaceTempView instead.", DeprecationWarning)
         self._jdf.createOrReplaceTempView(name)
 
-    @since(2.0)
     def createTempView(self, name):
         """Creates a local temporary view with this :class:`DataFrame`.
 
@@ -140,6 +146,10 @@ def createTempView(self, name):
         throws :class:`TempTableAlreadyExistsException`, if the view name already exists in the
         catalog.
 
+        .. versionadded:: 2.0.0
+
+        Examples
+        --------
         >>> df.createTempView("people")
         >>> df2 = spark.sql("select * from people")
         >>> sorted(df.collect()) == sorted(df2.collect())
@@ -153,13 +163,16 @@ def createTempView(self, name):
         """
         self._jdf.createTempView(name)
 
-    @since(2.0)
     def createOrReplaceTempView(self, name):
         """Creates or replaces a local temporary view with this :class:`DataFrame`.
 
         The lifetime of this temporary table is tied to the :class:`SparkSession`
         that was used to create this :class:`DataFrame`.
 
+        .. versionadded:: 2.0.0
+
+        Examples
+        --------
         >>> df.createOrReplaceTempView("people")
         >>> df2 = df.filter(df.age > 3)
         >>> df2.createOrReplaceTempView("people")
@@ -171,7 +184,6 @@ def createOrReplaceTempView(self, name):
         """
         self._jdf.createOrReplaceTempView(name)
 
-    @since(2.1)
     def createGlobalTempView(self, name):
         """Creates a global temporary view with this :class:`DataFrame`.
 
@@ -179,6 +191,10 @@ def createGlobalTempView(self, name):
         throws :class:`TempTableAlreadyExistsException`, if the view name already exists in the
         catalog.
 
+        .. versionadded:: 2.1.0
+
+        Examples
+        --------
         >>> df.createGlobalTempView("people")
         >>> df2 = spark.sql("select * from global_temp.people")
         >>> sorted(df.collect()) == sorted(df2.collect())
@@ -192,12 +208,15 @@ def createGlobalTempView(self, name):
         """
         self._jdf.createGlobalTempView(name)
 
-    @since(2.2)
     def createOrReplaceGlobalTempView(self, name):
         """Creates or replaces a global temporary view using the given name.
 
         The lifetime of this temporary view is tied to this Spark application.
 
+        .. versionadded:: 2.2.0
+
+        Examples
+        --------
         >>> df.createOrReplaceGlobalTempView("people")
         >>> df2 = df.filter(df.age > 3)
         >>> df2.createOrReplaceGlobalTempView("people")
@@ -210,34 +229,45 @@ def createOrReplaceGlobalTempView(self, name):
         self._jdf.createOrReplaceGlobalTempView(name)
 
     @property
-    @since(1.4)
     def write(self):
         """
         Interface for saving the content of the non-streaming :class:`DataFrame` out into external
         storage.
 
-        :return: :class:`DataFrameWriter`
+        .. versionadded:: 1.4.0
+
+        Returns
+        -------
+        :class:`DataFrameWriter`
         """
         return DataFrameWriter(self)
 
     @property
-    @since(2.0)
     def writeStream(self):
         """
         Interface for saving the content of the streaming :class:`DataFrame` out into external
         storage.
 
-        .. note:: Evolving.
+        .. versionadded:: 2.0.0
 
-        :return: :class:`DataStreamWriter`
+        Notes
+        -----
+        This API is evolving.
+
+        Returns
+        -------
+        :class:`DataStreamWriter`
         """
         return DataStreamWriter(self)
 
     @property
-    @since(1.3)
     def schema(self):
         """Returns the schema of this :class:`DataFrame` as a :class:`pyspark.sql.types.StructType`.
 
+        .. versionadded:: 1.3.0
+
+        Examples
+        --------
         >>> df.schema
         StructType(List(StructField(age,IntegerType,true),StructField(name,StringType,true)))
         """
@@ -249,10 +279,13 @@ def schema(self):
                     "Unable to parse datatype from schema. %s" % e)
         return self._schema
 
-    @since(1.3)
     def printSchema(self):
         """Prints out the schema in the tree format.
 
+        .. versionadded:: 1.3.0
+
+        Examples
+        --------
         >>> df.printSchema()
         root
          |-- age: integer (nullable = true)
@@ -261,14 +294,19 @@ def printSchema(self):
         """
         print(self._jdf.schema().treeString())
 
-    @since(1.3)
     def explain(self, extended=None, mode=None):
         """Prints the (logical and physical) plans to the console for debugging purpose.
 
-        :param extended: boolean, default ``False``. If ``False``, prints only the physical plan.
+        .. versionadded:: 1.3.0
+
+        parameters
+        ----------
+        extended : bool, optional
+            default ``False``. If ``False``, prints only the physical plan.
             When this is a string without specifying the ``mode``, it works as the mode is
             specified.
-        :param mode: specifies the expected output format of plans.
+        mode : str, optional
+            specifies the expected output format of plans.
 
             * ``simple``: Print only a physical plan.
             * ``extended``: Print both logical and physical plans.
@@ -277,6 +315,11 @@ def explain(self, extended=None, mode=None):
             * ``formatted``: Split explain output into two sections: a physical plan outline \
                 and node details.
 
+            .. versionchanged:: 3.0.0
+               Added optional argument `mode` to specify the expected output format of plans.
+
+        Examples
+        --------
         >>> df.explain()
         == Physical Plan ==
         *(1) Scan ExistingRDD[age#0,name#1]
@@ -302,9 +345,6 @@ def explain(self, extended=None, mode=None):
         == Optimized Logical Plan ==
         ...Statistics...
         ...
-
-        .. versionchanged:: 3.0.0
-           Added optional argument `mode` to specify the expected output format of plans.
         """
 
         if extended is not None and mode is not None:
@@ -345,13 +385,17 @@ def explain(self, extended=None, mode=None):
 
         print(self._sc._jvm.PythonSQLUtils.explainString(self._jdf.queryExecution(), explain_mode))
 
-    @since(2.4)
     def exceptAll(self, other):
         """Return a new :class:`DataFrame` containing rows in this :class:`DataFrame` but
         not in another :class:`DataFrame` while preserving duplicates.
 
         This is equivalent to `EXCEPT ALL` in SQL.
+        As standard in SQL, this function resolves columns by position (not by name).
+
+        .. versionadded:: 2.4.0
 
+        Examples
+        --------
         >>> df1 = spark.createDataFrame(
         ...         [("a", 1), ("a", 1), ("a", 1), ("a", 2), ("b",  3), ("c", 4)], ["C1", "C2"])
         >>> df2 = spark.createDataFrame([("a", 1), ("b", 3)], ["C1", "C2"])
@@ -366,7 +410,6 @@ def exceptAll(self, other):
         |  c|  4|
         +---+---+
 
-        Also as standard in SQL, this function resolves columns by position (not by name).
         """
         return DataFrame(self._jdf.exceptAll(other._jdf), self.sql_ctx)
 
@@ -378,7 +421,6 @@ def isLocal(self):
         return self._jdf.isLocal()
 
     @property
-    @since(2.0)
     def isStreaming(self):
         """Returns ``True`` if this :class:`Dataset` contains one or more sources that continuously
         return data as it arrives. A :class:`Dataset` that reads data from a streaming source
@@ -387,21 +429,33 @@ def isStreaming(self):
         :func:`collect`) will throw an :class:`AnalysisException` when there is a streaming
         source present.
 
-        .. note:: Evolving
+        .. versionadded:: 2.0.0
+
+        Notes
+        -----
+        This API is evolving.
         """
         return self._jdf.isStreaming()
 
-    @since(1.3)
     def show(self, n=20, truncate=True, vertical=False):
         """Prints the first ``n`` rows to the console.
 
-        :param n: Number of rows to show.
-        :param truncate: If set to ``True``, truncate strings longer than 20 chars by default.
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        n : int, optional
+            Number of rows to show.
+        truncate : bool, optional
+            If set to ``True``, truncate strings longer than 20 chars by default.
             If set to a number greater than one, truncates long strings to length ``truncate``
             and align cells right.
-        :param vertical: If set to ``True``, print output rows vertically (one line
+        vertical : bool, optional
+            If set to ``True``, print output rows vertically (one line
             per column value).
 
+        Examples
+        --------
         >>> df
         DataFrame[age: int, name: string]
         >>> df.show()
@@ -472,35 +526,46 @@ def _repr_html_(self):
         else:
             return None
 
-    @since(2.1)
     def checkpoint(self, eager=True):
         """Returns a checkpointed version of this Dataset. Checkpointing can be used to truncate the
         logical plan of this :class:`DataFrame`, which is especially useful in iterative algorithms
         where the plan may grow exponentially. It will be saved to files inside the checkpoint
         directory set with :meth:`SparkContext.setCheckpointDir`.
 
-        :param eager: Whether to checkpoint this :class:`DataFrame` immediately
+        .. versionadded:: 2.1.0
 
-        .. note:: Experimental
+        Parameters
+        ----------
+        eager : bool, optional
+            Whether to checkpoint this :class:`DataFrame` immediately
+
+        Notes
+        -----
+        This API is experimental.
         """
         jdf = self._jdf.checkpoint(eager)
         return DataFrame(jdf, self.sql_ctx)
 
-    @since(2.3)
     def localCheckpoint(self, eager=True):
         """Returns a locally checkpointed version of this Dataset. Checkpointing can be used to
         truncate the logical plan of this :class:`DataFrame`, which is especially useful in
         iterative algorithms where the plan may grow exponentially. Local checkpoints are
         stored in the executors using the caching subsystem and therefore they are not reliable.
 
-        :param eager: Whether to checkpoint this :class:`DataFrame` immediately
+        .. versionadded:: 2.3.0
+
+        Parameters
+        ----------
+        eager : bool, optional
+            Whether to checkpoint this :class:`DataFrame` immediately
 
-        .. note:: Experimental
+        Notes
+        -----
+        This API is experimental.
         """
         jdf = self._jdf.localCheckpoint(eager)
         return DataFrame(jdf, self.sql_ctx)
 
-    @since(2.1)
     def withWatermark(self, eventTime, delayThreshold):
         """Defines an event time watermark for this :class:`DataFrame`. A watermark tracks a point
         in time before which we assume no more late data is going to arrive.
@@ -517,12 +582,20 @@ def withWatermark(self, eventTime, delayThreshold):
         to be at least `delayThreshold` behind the actual event time.  In some cases we may still
         process records that arrive more than `delayThreshold` late.
 
-        :param eventTime: the name of the column that contains the event time of the row.
-        :param delayThreshold: the minimum delay to wait to data to arrive late, relative to the
+        .. versionadded:: 2.1.0
+
+        Parameters
+        ----------
+        eventTime : str or :class:`Column`
+            the name of the column that contains the event time of the row.
+        delayThreshold : str
+            the minimum delay to wait to data to arrive late, relative to the
             latest record that has been processed in the form of an interval
             (e.g. "1 minute" or "5 hours").
 
-        .. note:: Evolving
+        Notes
+        -----
+        This API is evolving.
 
         >>> from pyspark.sql.functions import timestamp_seconds
         >>> sdf.select(
@@ -537,14 +610,24 @@ def withWatermark(self, eventTime, delayThreshold):
         jdf = self._jdf.withWatermark(eventTime, delayThreshold)
         return DataFrame(jdf, self.sql_ctx)
 
-    @since(2.2)
     def hint(self, name, *parameters):
         """Specifies some hint on the current :class:`DataFrame`.
 
-        :param name: A name of the hint.
-        :param parameters: Optional parameters.
-        :return: :class:`DataFrame`
+        .. versionadded:: 2.2.0
+
+        Parameters
+        ----------
+        name : str
+            A name of the hint.
+        parameters : str, list, float or int
+            Optional parameters.
+
+        Returns
+        -------
+        :class:`DataFrame`
 
+        Examples
+        --------
         >>> df.join(df2.hint("broadcast"), "name").show()
         +----+---+------+
         |name|age|height|
@@ -568,19 +651,25 @@ def hint(self, name, *parameters):
         jdf = self._jdf.hint(name, self._jseq(parameters))
         return DataFrame(jdf, self.sql_ctx)
 
-    @since(1.3)
     def count(self):
         """Returns the number of rows in this :class:`DataFrame`.
 
+        .. versionadded:: 1.3.0
+
+        Examples
+        --------
         >>> df.count()
         2
         """
         return int(self._jdf.count())
 
-    @since(1.3)
     def collect(self):
         """Returns all the records as a list of :class:`Row`.
 
+        .. versionadded:: 1.3.0
+
+        Examples
+        --------
         >>> df.collect()
         [Row(age=2, name='Alice'), Row(age=5, name='Bob')]
         """
@@ -588,7 +677,6 @@ def collect(self):
             sock_info = self._jdf.collectToPython()
         return list(_load_from_socket(sock_info, BatchedSerializer(PickleSerializer())))
 
-    @since(2.0)
     def toLocalIterator(self, prefetchPartitions=False):
         """
         Returns an iterator that contains all of the rows in this :class:`DataFrame`.
@@ -596,9 +684,15 @@ def toLocalIterator(self, prefetchPartitions=False):
         :class:`DataFrame`. With prefetch it may consume up to the memory of the 2 largest
         partitions.
 
-        :param prefetchPartitions: If Spark should pre-fetch the next partition
-                                   before it is needed.
+        .. versionadded:: 2.0.0
 
+        Parameters
+        ----------
+        prefetchPartitions : bool, optional
+            If Spark should pre-fetch the next partition  before it is needed.
+
+        Examples
+        --------
         >>> list(df.toLocalIterator())
         [Row(age=2, name='Alice'), Row(age=5, name='Bob')]
         """
@@ -606,10 +700,13 @@ def toLocalIterator(self, prefetchPartitions=False):
             sock_info = self._jdf.toPythonIterator(prefetchPartitions)
         return _local_iterator_from_socket(sock_info, BatchedSerializer(PickleSerializer()))
 
-    @since(1.3)
     def limit(self, num):
         """Limits the result count to the number specified.
 
+        .. versionadded:: 1.3.0
+
+        Examples
+        --------
         >>> df.limit(1).collect()
         [Row(age=2, name='Alice')]
         >>> df.limit(0).collect()
@@ -618,16 +715,18 @@ def limit(self, num):
         jdf = self._jdf.limit(num)
         return DataFrame(jdf, self.sql_ctx)
 
-    @since(1.3)
     def take(self, num):
         """Returns the first ``num`` rows as a :class:`list` of :class:`Row`.
 
+        .. versionadded:: 1.3.0
+
+        Examples
+        --------
         >>> df.take(2)
         [Row(age=2, name='Alice'), Row(age=5, name='Bob')]
         """
         return self.limit(num).collect()
 
-    @since(3.0)
     def tail(self, num):
         """
         Returns the last ``num`` rows as a :class:`list` of :class:`Row`.
@@ -635,6 +734,10 @@ def tail(self, num):
         Running tail requires moving data into the application's driver process, and doing so with
         a very large ``num`` can crash the driver process with OutOfMemoryError.
 
+        .. versionadded:: 3.0.0
+
+        Examples
+        --------
         >>> df.tail(1)
         [Row(age=5, name='Bob')]
         """
@@ -642,24 +745,30 @@ def tail(self, num):
             sock_info = self._jdf.tailToPython(num)
         return list(_load_from_socket(sock_info, BatchedSerializer(PickleSerializer())))
 
-    @since(1.3)
     def foreach(self, f):
         """Applies the ``f`` function to all :class:`Row` of this :class:`DataFrame`.
 
         This is a shorthand for ``df.rdd.foreach()``.
 
+        .. versionadded:: 1.3.0
+
+        Examples
+        --------
         >>> def f(person):
         ...     print(person.name)
         >>> df.foreach(f)
         """
         self.rdd.foreach(f)
 
-    @since(1.3)
     def foreachPartition(self, f):
         """Applies the ``f`` function to each partition of this :class:`DataFrame`.
 
         This a shorthand for ``df.rdd.foreachPartition()``.
 
+        .. versionadded:: 1.3.0
+
+        Examples
+        --------
         >>> def f(people):
         ...     for person in people:
         ...         print(person.name)
@@ -667,25 +776,30 @@ def foreachPartition(self, f):
         """
         self.rdd.foreachPartition(f)
 
-    @since(1.3)
     def cache(self):
         """Persists the :class:`DataFrame` with the default storage level (`MEMORY_AND_DISK`).
 
-        .. note:: The default storage level has changed to `MEMORY_AND_DISK` to match Scala in 2.0.
+        .. versionadded:: 1.3.0
+
+        Notes
+        -----
+        The default storage level has changed to `MEMORY_AND_DISK` to match Scala in 2.0.
         """
         self.is_cached = True
         self._jdf.cache()
         return self
 
-    @since(1.3)
     def persist(self, storageLevel=StorageLevel.MEMORY_AND_DISK_DESER):
         """Sets the storage level to persist the contents of the :class:`DataFrame` across
         operations after the first time it is computed. This can only be used to assign
         a new storage level if the :class:`DataFrame` does not have a storage level set yet.
         If no storage level is specified defaults to (`MEMORY_AND_DISK_DESER`)
 
-        .. note:: The default storage level has changed to `MEMORY_AND_DISK_DESER` to match Scala
-            in 3.0.
+        .. versionadded:: 1.3.0
+
+        Notes
+        -----
+        The default storage level has changed to `MEMORY_AND_DISK_DESER` to match Scala in 3.0.
         """
         self.is_cached = True
         javaStorageLevel = self._sc._getJavaStorageLevel(storageLevel)
@@ -693,10 +807,13 @@ def persist(self, storageLevel=StorageLevel.MEMORY_AND_DISK_DESER):
         return self
 
     @property
-    @since(2.1)
     def storageLevel(self):
         """Get the :class:`DataFrame`'s current storage level.
 
+        .. versionadded:: 2.1.0
+
+        Examples
+        --------
         >>> df.storageLevel
         StorageLevel(False, False, False, False, 1)
         >>> df.cache().storageLevel
@@ -712,24 +829,24 @@ def storageLevel(self):
                                      java_storage_level.replication())
         return storage_level
 
-    @since(1.3)
     def unpersist(self, blocking=False):
         """Marks the :class:`DataFrame` as non-persistent, and remove all blocks for it from
         memory and disk.
 
-        .. note:: `blocking` default has changed to ``False`` to match Scala in 2.0.
+        .. versionadded:: 1.3.0
+
+        Notes
+        -----
+        `blocking` default has changed to ``False`` to match Scala in 2.0.
         """
         self.is_cached = False
         self._jdf.unpersist(blocking)
         return self
 
-    @since(1.4)
     def coalesce(self, numPartitions):
         """
         Returns a new :class:`DataFrame` that has exactly `numPartitions` partitions.
 
-        :param numPartitions: int, to specify the target number of partitions
-
         Similar to coalesce defined on an :class:`RDD`, this operation results in a
         narrow dependency, e.g. if you go from 1000 partitions to 100 partitions,
         there will not be a shuffle, instead each of the 100 new partitions will
@@ -743,26 +860,42 @@ def coalesce(self, numPartitions):
         current upstream partitions will be executed in parallel (per whatever
         the current partitioning is).
 
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        numPartitions : int
+            specify the target number of partitions
+
+        Examples
+        --------
         >>> df.coalesce(1).rdd.getNumPartitions()
         1
         """
         return DataFrame(self._jdf.coalesce(numPartitions), self.sql_ctx)
 
-    @since(1.3)
     def repartition(self, numPartitions, *cols):
         """
         Returns a new :class:`DataFrame` partitioned by the given partitioning expressions. The
         resulting :class:`DataFrame` is hash partitioned.
 
-        :param numPartitions:
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        numPartitions : int
             can be an int to specify the target number of partitions or a Column.
             If it is a Column, it will be used as the first partitioning column. If not specified,
             the default number of partitions is used.
+        cols : str or :class:`Column`
+            partitioning columns.
 
-        .. versionchanged:: 1.6
-           Added optional arguments to specify the partitioning columns. Also made numPartitions
-           optional if partitioning columns are specified.
+            .. versionchanged:: 1.6
+               Added optional arguments to specify the partitioning columns. Also made numPartitions
+               optional if partitioning columns are specified.
 
+        Examples
+        --------
         >>> df.repartition(10).rdd.getNumPartitions()
         10
         >>> data = df.union(df).repartition("age")
@@ -810,25 +943,34 @@ def repartition(self, numPartitions, *cols):
         else:
             raise TypeError("numPartitions should be an int or Column")
 
-    @since("2.4.0")
     def repartitionByRange(self, numPartitions, *cols):
         """
         Returns a new :class:`DataFrame` partitioned by the given partitioning expressions. The
         resulting :class:`DataFrame` is range partitioned.
 
-        :param numPartitions:
+        At least one partition-by expression must be specified.
+        When no explicit sort order is specified, "ascending nulls first" is assumed.
+
+        .. versionadded:: 2.4.0
+
+        Parameters
+        ----------
+        numPartitions : int
             can be an int to specify the target number of partitions or a Column.
             If it is a Column, it will be used as the first partitioning column. If not specified,
             the default number of partitions is used.
+        cols : str or :class:`Column`
+            partitioning columns.
 
-        At least one partition-by expression must be specified.
-        When no explicit sort order is specified, "ascending nulls first" is assumed.
-
-        Note that due to performance reasons this method uses sampling to estimate the ranges.
+        Notes
+        -----
+        Due to performance reasons this method uses sampling to estimate the ranges.
         Hence, the output may not be consistent, since sampling can return different values.
         The sample size can be controlled by the config
         `spark.sql.execution.rangeExchange.sampleSizePerPartition`.
 
+        Examples
+        --------
         >>> df.repartitionByRange(2, "age").rdd.getNumPartitions()
         2
         >>> df.show()
@@ -861,28 +1003,41 @@ def repartitionByRange(self, numPartitions, *cols):
         else:
             raise TypeError("numPartitions should be an int, string or Column")
 
-    @since(1.3)
     def distinct(self):
         """Returns a new :class:`DataFrame` containing the distinct rows in this :class:`DataFrame`.
 
+        .. versionadded:: 1.3.0
+
+        Examples
+        --------
         >>> df.distinct().count()
         2
         """
         return DataFrame(self._jdf.distinct(), self.sql_ctx)
 
-    @since(1.3)
     def sample(self, withReplacement=None, fraction=None, seed=None):
         """Returns a sampled subset of this :class:`DataFrame`.
 
-        :param withReplacement: Sample with replacement or not (default ``False``).
-        :param fraction: Fraction of rows to generate, range [0.0, 1.0].
-        :param seed: Seed for sampling (default a random seed).
+        .. versionadded:: 1.3.0
 
-        .. note:: This is not guaranteed to provide exactly the fraction specified of the total
-            count of the given :class:`DataFrame`.
+        Parameters
+        ----------
+        withReplacement : bool, optional
+            Sample with replacement or not (default ``False``).
+        fraction : float, optional
+            Fraction of rows to generate, range [0.0, 1.0].
+        seed : int, optional
+            Seed for sampling (default a random seed).
 
-        .. note:: `fraction` is required and, `withReplacement` and `seed` are optional.
+        Notes
+        -----
+        This is not guaranteed to provide exactly the fraction specified of the total
+        count of the given :class:`DataFrame`.
 
+        `fraction` is required and, `withReplacement` and `seed` are optional.
+
+        Examples
+        --------
         >>> df = spark.range(10)
         >>> df.sample(0.5, 3).count()
         7
@@ -935,19 +1090,32 @@ def sample(self, withReplacement=None, fraction=None, seed=None):
         jdf = self._jdf.sample(*args)
         return DataFrame(jdf, self.sql_ctx)
 
-    @since(1.5)
     def sampleBy(self, col, fractions, seed=None):
         """
         Returns a stratified sample without replacement based on the
         fraction given on each stratum.
 
-        :param col: column that defines strata
-        :param fractions:
+        .. versionadded:: 1.5.0
+
+        Parameters
+        ----------
+        col : :class:`Column` or str
+            column that defines strata
+
+            .. versionchanged:: 3.0
+               Added sampling by a column of :class:`Column`
+        fractions : dict
             sampling fraction for each stratum. If a stratum is not
             specified, we treat its fraction as zero.
-        :param seed: random seed
-        :return: a new :class:`DataFrame` that represents the stratified sample
+        seed : int, optional
+            random seed
+
+        Returns
+        -------
+        a new :class:`DataFrame` that represents the stratified sample
 
+        Examples
+        --------
         >>> from pyspark.sql.functions import col
         >>> dataset = sqlContext.range(0, 100).select((col("id") % 3).alias("key"))
         >>> sampled = dataset.sampleBy("key", fractions={0: 0.1, 1: 0.2}, seed=0)
@@ -960,9 +1128,6 @@ def sampleBy(self, col, fractions, seed=None):
         +---+-----+
         >>> dataset.sampleBy(col("key"), fractions={2: 1.0}, seed=0).count()
         33
-
-        .. versionchanged:: 3.0
-           Added sampling by a column of :class:`Column`
         """
         if isinstance(col, str):
             col = Column(col)
@@ -978,14 +1143,21 @@ def sampleBy(self, col, fractions, seed=None):
         seed = seed if seed is not None else random.randint(0, sys.maxsize)
         return DataFrame(self._jdf.stat().sampleBy(col, self._jmap(fractions), seed), self.sql_ctx)
 
-    @since(1.4)
     def randomSplit(self, weights, seed=None):
         """Randomly splits this :class:`DataFrame` with the provided weights.
 
-        :param weights: list of doubles as weights with which to split the :class:`DataFrame`.
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        weights : list
+            list of doubles as weights with which to split the :class:`DataFrame`.
             Weights will be normalized if they don't sum up to 1.0.
-        :param seed: The seed for sampling.
+        seed : int, optional
+            The seed for sampling.
 
+        Examples
+        --------
         >>> splits = df4.randomSplit([1.0, 2.0], 24)
         >>> splits[0].count()
         2
@@ -1001,33 +1173,45 @@ def randomSplit(self, weights, seed=None):
         return [DataFrame(rdd, self.sql_ctx) for rdd in rdd_array]
 
     @property
-    @since(1.3)
     def dtypes(self):
         """Returns all column names and their data types as a list.
 
+        .. versionadded:: 1.3.0
+
+        Examples
+        --------
         >>> df.dtypes
         [('age', 'int'), ('name', 'string')]
         """
         return [(str(f.name), f.dataType.simpleString()) for f in self.schema.fields]
 
     @property
-    @since(1.3)
     def columns(self):
         """Returns all column names as a list.
 
+        .. versionadded:: 1.3.0
+
+        Examples
+        --------
         >>> df.columns
         ['age', 'name']
         """
         return [f.name for f in self.schema.fields]
 
-    @since(2.3)
     def colRegex(self, colName):
         """
         Selects column based on the column name specified as a regex and returns it
         as :class:`Column`.
 
-        :param colName: string, column name specified as a regex.
+        .. versionadded:: 2.3.0
+
+        Parameters
+        ----------
+        colName : str
+            string, column name specified as a regex.
 
+        Examples
+        --------
         >>> df = spark.createDataFrame([("a", 1), ("b", 2), ("c",  3)], ["Col1", "Col2"])
         >>> df.select(df.colRegex("`(Col1)?+.+`")).show()
         +----+
@@ -1043,12 +1227,18 @@ def colRegex(self, colName):
         jc = self._jdf.colRegex(colName)
         return Column(jc)
 
-    @since(1.3)
     def alias(self, alias):
         """Returns a new :class:`DataFrame` with an alias set.
 
-        :param alias: string, an alias name to be set for the :class:`DataFrame`.
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        alias : str
+            an alias name to be set for the :class:`DataFrame`.
 
+        Examples
+        --------
         >>> from pyspark.sql.functions import *
         >>> df_as1 = df.alias("df_as1")
         >>> df_as2 = df.alias("df_as2")
@@ -1060,12 +1250,18 @@ def alias(self, alias):
         assert isinstance(alias, str), "alias should be a string"
         return DataFrame(getattr(self._jdf, "as")(alias), self.sql_ctx)
 
-    @since(2.1)
     def crossJoin(self, other):
         """Returns the cartesian product with another :class:`DataFrame`.
 
-        :param other: Right side of the cartesian product.
+        .. versionadded:: 2.1.0
+
+        Parameters
+        ----------
+        other : :class:`DataFrame`
+            Right side of the cartesian product.
 
+        Examples
+        --------
         >>> df.select("age", "name").collect()
         [Row(age=2, name='Alice'), Row(age=5, name='Bob')]
         >>> df2.select("name", "height").collect()
@@ -1078,20 +1274,28 @@ def crossJoin(self, other):
         jdf = self._jdf.crossJoin(other._jdf)
         return DataFrame(jdf, self.sql_ctx)
 
-    @since(1.3)
     def join(self, other, on=None, how=None):
         """Joins with another :class:`DataFrame`, using the given join expression.
 
-        :param other: Right side of the join
-        :param on: a string for the join column name, a list of column names,
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        other : :class:`DataFrame`
+            Right side of the join
+        on : str, list or :class:`Column`, optional
+            a string for the join column name, a list of column names,
             a join expression (Column), or a list of Columns.
             If `on` is a string or a list of strings indicating the name of the join column(s),
             the column(s) must exist on both sides, and this performs an equi-join.
-        :param how: str, default ``inner``. Must be one of: ``inner``, ``cross``, ``outer``,
+        how : str, optional
+            default ``inner``. Must be one of: ``inner``, ``cross``, ``outer``,
             ``full``, ``fullouter``, ``full_outer``, ``left``, ``leftouter``, ``left_outer``,
             ``right``, ``rightouter``, ``right_outer``, ``semi``, ``leftsemi``, ``left_semi``,
             ``anti``, ``leftanti`` and ``left_anti``.
 
+        Examples
+        --------
         The following performs a full outer join between ``df1`` and ``df2``.
         >>> from pyspark.sql.functions import desc
         >>> df.join(df2, df.name == df2.name, 'outer').select(df.name, df2.height) \
@@ -1134,15 +1338,25 @@ def join(self, other, on=None, how=None):
             jdf = self._jdf.join(other._jdf, on, how)
         return DataFrame(jdf, self.sql_ctx)
 
-    @since(1.6)
     def sortWithinPartitions(self, *cols, **kwargs):
         """Returns a new :class:`DataFrame` with each partition sorted by the specified column(s).
 
-        :param cols: list of :class:`Column` or column names to sort by.
-        :param ascending: boolean or list of boolean (default ``True``).
+        .. versionadded:: 1.6.0
+
+        Parameters
+        ----------
+        cols : str, list or :class:`Column`, optional
+            list of :class:`Column` or column names to sort by.
+
+        Other Parameters
+        ----------------
+        ascending : bool or list, optional
+            boolean or list of boolean (default ``True``).
             Sort ascending vs. descending. Specify list for multiple sort orders.
             If a list is specified, length of the list must equal length of the `cols`.
 
+        Examples
+        --------
         >>> df.sortWithinPartitions("age", ascending=False).show()
         +---+-----+
         |age| name|
@@ -1154,15 +1368,25 @@ def sortWithinPartitions(self, *cols, **kwargs):
         jdf = self._jdf.sortWithinPartitions(self._sort_cols(cols, kwargs))
         return DataFrame(jdf, self.sql_ctx)
 
-    @since(1.3)
     def sort(self, *cols, **kwargs):
         """Returns a new :class:`DataFrame` sorted by the specified column(s).
 
-        :param cols: list of :class:`Column` or column names to sort by.
-        :param ascending: boolean or list of boolean (default ``True``).
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        cols : str, list, or :class:`Column`, optional
+             list of :class:`Column` or column names to sort by.
+
+        Other Parameters
+        ----------------
+        ascending : bool or list, optional
+            boolean or list of boolean (default ``True``).
             Sort ascending vs. descending. Specify list for multiple sort orders.
             If a list is specified, length of the list must equal length of the `cols`.
 
+        Examples
+        --------
         >>> df.sort(df.age.desc()).collect()
         [Row(age=5, name='Bob'), Row(age=2, name='Alice')]
         >>> df.sort("age", ascending=False).collect()
@@ -1218,17 +1442,24 @@ def _sort_cols(self, cols, kwargs):
             raise TypeError("ascending can only be boolean or list, but got %s" % type(ascending))
         return self._jseq(jcols)
 
-    @since("1.3.1")
     def describe(self, *cols):
         """Computes basic statistics for numeric and string columns.
 
+        .. versionadded:: 1.3.1
+
         This include count, mean, stddev, min, and max. If no columns are
         given, this function computes statistics for all numerical or string columns.
 
-        .. note:: This function is meant for exploratory data analysis, as we make no
-            guarantee about the backward compatibility of the schema of the resulting
-            :class:`DataFrame`.
+        Notes
+        -----
+        This function is meant for exploratory data analysis, as we make no
+        guarantee about the backward compatibility of the schema of the resulting
+        :class:`DataFrame`.
+
+        Use summary for expanded statistics and control over which statistics to compute.
 
+        Examples
+        --------
         >>> df.describe(['age']).show()
         +-------+------------------+
         |summary|               age|
@@ -1250,14 +1481,15 @@ def describe(self, *cols):
         |    max|                 5|  Bob|
         +-------+------------------+-----+
 
-        Use summary for expanded statistics and control over which statistics to compute.
+        See Also
+        --------
+        DataFrame.summary
         """
         if len(cols) == 1 and isinstance(cols[0], list):
             cols = cols[0]
         jdf = self._jdf.describe(self._jseq(cols))
         return DataFrame(jdf, self.sql_ctx)
 
-    @since("2.3.0")
     def summary(self, *statistics):
         """Computes specified statistics for numeric and string columns. Available statistics are:
         - count
@@ -1270,10 +1502,16 @@ def summary(self, *statistics):
         If no statistics are given, this function computes count, mean, stddev, min,
         approximate quartiles (percentiles at 25%, 50%, and 75%), and max.
 
-        .. note:: This function is meant for exploratory data analysis, as we make no
-            guarantee about the backward compatibility of the schema of the resulting
-            :class:`DataFrame`.
+        .. versionadded:: 2.3.0
+
+        Notes
+        -----
+        This function is meant for exploratory data analysis, as we make no
+        guarantee about the backward compatibility of the schema of the resulting
+        :class:`DataFrame`.
 
+        Examples
+        --------
         >>> df.summary().show()
         +-------+------------------+-----+
         |summary|               age| name|
@@ -1308,24 +1546,37 @@ def summary(self, *statistics):
         |  count|  2|   2|
         +-------+---+----+
 
-        See also describe for basic statistics.
+        See Also
+        --------
+        DataFrame.display
         """
         if len(statistics) == 1 and isinstance(statistics[0], list):
             statistics = statistics[0]
         jdf = self._jdf.summary(self._jseq(statistics))
         return DataFrame(jdf, self.sql_ctx)
 
-    @since(1.3)
     def head(self, n=None):
         """Returns the first ``n`` rows.
 
-        .. note:: This method should only be used if the resulting array is expected
-            to be small, as all the data is loaded into the driver's memory.
+        .. versionadded:: 1.3.0
+
+        Notes
+        -----
+        This method should only be used if the resulting array is expected
+        to be small, as all the data is loaded into the driver's memory.
 
-        :param n: int, default 1. Number of rows to return.
-        :return: If n is greater than 1, return a list of :class:`Row`.
-            If n is 1, return a single Row.
+        Parameters
+        ----------
+        n : int, optional
+            default 1. Number of rows to return.
 
+        Returns
+        -------
+        If n is greater than 1, return a list of :class:`Row`.
+        If n is 1, return a single Row.
+
+        Examples
+        --------
         >>> df.head()
         Row(age=2, name='Alice')
         >>> df.head(1)
@@ -1336,19 +1587,25 @@ def head(self, n=None):
             return rs[0] if rs else None
         return self.take(n)
 
-    @since(1.3)
     def first(self):
         """Returns the first row as a :class:`Row`.
 
+        .. versionadded:: 1.3.0
+
+        Examples
+        --------
         >>> df.first()
         Row(age=2, name='Alice')
         """
         return self.head()
 
-    @since(1.3)
     def __getitem__(self, item):
         """Returns the column as a :class:`Column`.
 
+        .. versionadded:: 1.3.0
+
+        Examples
+        --------
         >>> df.select(df['age']).collect()
         [Row(age=2), Row(age=5)]
         >>> df[ ["name", "age"]].collect()
@@ -1371,10 +1628,13 @@ def __getitem__(self, item):
         else:
             raise TypeError("unexpected item type: %s" % type(item))
 
-    @since(1.3)
     def __getattr__(self, name):
         """Returns the :class:`Column` denoted by ``name``.
 
+        .. versionadded:: 1.3.0
+
+        Examples
+        --------
         >>> df.select(df.age).collect()
         [Row(age=2), Row(age=5)]
         """
@@ -1384,14 +1644,20 @@ def __getattr__(self, name):
         jc = self._jdf.apply(name)
         return Column(jc)
 
-    @since(1.3)
     def select(self, *cols):
         """Projects a set of expressions and returns a new :class:`DataFrame`.
 
-        :param cols: list of column names (string) or expressions (:class:`Column`).
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        cols : str, :class:`Column`, or list
+            column names (string) or expressions (:class:`Column`).
             If one of the column names is '*', that column is expanded to include all columns
             in the current :class:`DataFrame`.
 
+        Examples
+        --------
         >>> df.select('*').collect()
         [Row(age=2, name='Alice'), Row(age=5, name='Bob')]
         >>> df.select('name', 'age').collect()
@@ -1402,12 +1668,15 @@ def select(self, *cols):
         jdf = self._jdf.select(self._jcols(*cols))
         return DataFrame(jdf, self.sql_ctx)
 
-    @since(1.3)
     def selectExpr(self, *expr):
         """Projects a set of SQL expressions and returns a new :class:`DataFrame`.
 
         This is a variant of :func:`select` that accepts SQL expressions.
 
+        .. versionadded:: 1.3.0
+
+        Examples
+        --------
         >>> df.selectExpr("age * 2", "abs(age)").collect()
         [Row((age * 2)=4, abs(age)=2), Row((age * 2)=10, abs(age)=5)]
         """
@@ -1416,15 +1685,21 @@ def selectExpr(self, *expr):
         jdf = self._jdf.selectExpr(self._jseq(expr))
         return DataFrame(jdf, self.sql_ctx)
 
-    @since(1.3)
     def filter(self, condition):
         """Filters rows using the given condition.
 
         :func:`where` is an alias for :func:`filter`.
 
-        :param condition: a :class:`Column` of :class:`types.BooleanType`
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        condition : :class:`Column` or str
+            a :class:`Column` of :class:`types.BooleanType`
             or a string of SQL expression.
 
+        Examples
+        --------
         >>> df.filter(df.age > 3).collect()
         [Row(age=5, name='Bob')]
         >>> df.where(df.age == 2).collect()
@@ -1443,7 +1718,6 @@ def filter(self, condition):
             raise TypeError("condition should be string or Column")
         return DataFrame(jdf, self.sql_ctx)
 
-    @since(1.3)
     def groupBy(self, *cols):
         """Groups the :class:`DataFrame` using the specified columns,
         so we can run aggregation on them. See :class:`GroupedData`
@@ -1451,9 +1725,16 @@ def groupBy(self, *cols):
 
         :func:`groupby` is an alias for :func:`groupBy`.
 
-        :param cols: list of columns to group by.
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        cols : list, str or :class:`Column`
+            columns to group by.
             Each element should be a column name (string) or an expression (:class:`Column`).
 
+        Examples
+        --------
         >>> df.groupBy().avg().collect()
         [Row(avg(age)=3.5)]
         >>> sorted(df.groupBy('name').agg({'age': 'mean'}).collect())
@@ -1467,12 +1748,15 @@ def groupBy(self, *cols):
         from pyspark.sql.group import GroupedData
         return GroupedData(jgd, self)
 
-    @since(1.4)
     def rollup(self, *cols):
         """
         Create a multi-dimensional rollup for the current :class:`DataFrame` using
         the specified columns, so we can run aggregation on them.
 
+        .. versionadded:: 1.4.0
+
+        Examples
+        --------
         >>> df.rollup("name", df.age).count().orderBy("name", "age").show()
         +-----+----+-----+
         | name| age|count|
@@ -1488,12 +1772,15 @@ def rollup(self, *cols):
         from pyspark.sql.group import GroupedData
         return GroupedData(jgd, self)
 
-    @since(1.4)
     def cube(self, *cols):
         """
         Create a multi-dimensional cube for the current :class:`DataFrame` using
         the specified columns, so we can run aggregations on them.
 
+        .. versionadded:: 1.4.0
+
+        Examples
+        --------
         >>> df.cube("name", df.age).count().orderBy("name", "age").show()
         +-----+----+-----+
         | name| age|count|
@@ -1511,11 +1798,14 @@ def cube(self, *cols):
         from pyspark.sql.group import GroupedData
         return GroupedData(jgd, self)
 
-    @since(1.3)
     def agg(self, *exprs):
         """ Aggregate on the entire :class:`DataFrame` without groups
         (shorthand for ``df.groupBy().agg()``).
 
+        .. versionadded:: 1.3.0
+
+        Examples
+        --------
         >>> df.agg({"age": "max"}).collect()
         [Row(max(age)=5)]
         >>> from pyspark.sql import functions as F
@@ -1548,7 +1838,6 @@ def unionAll(self, other):
         """
         return self.union(other)
 
-    @since(2.3)
     def unionByName(self, other, allowMissingColumns=False):
         """ Returns a new :class:`DataFrame` containing union of rows in this and another
         :class:`DataFrame`.
@@ -1556,6 +1845,10 @@ def unionByName(self, other, allowMissingColumns=False):
         This is different from both `UNION ALL` and `UNION DISTINCT` in SQL. To do a SQL-style set
         union (that does deduplication of elements), use this function followed by :func:`distinct`.
 
+        .. versionadded:: 2.3.0
+
+        Examples
+        --------
         The difference between this function and :func:`union` is that this function
         resolves columns by name (not by position):
 
@@ -1599,12 +1892,17 @@ def intersect(self, other):
         """
         return DataFrame(self._jdf.intersect(other._jdf), self.sql_ctx)
 
-    @since(2.4)
     def intersectAll(self, other):
         """ Return a new :class:`DataFrame` containing rows in both this :class:`DataFrame`
         and another :class:`DataFrame` while preserving duplicates.
 
-        This is equivalent to `INTERSECT ALL` in SQL.
+        This is equivalent to `INTERSECT ALL` in SQL. As standard in SQL, this function
+        resolves columns by position (not by name).
+
+        .. versionadded:: 2.4.0
+
+        Examples
+        --------
         >>> df1 = spark.createDataFrame([("a", 1), ("a", 1), ("b", 3), ("c", 4)], ["C1", "C2"])
         >>> df2 = spark.createDataFrame([("a", 1), ("a", 1), ("b", 3)], ["C1", "C2"])
 
@@ -1617,7 +1915,6 @@ def intersectAll(self, other):
         |  b|  3|
         +---+---+
 
-        Also as standard in SQL, this function resolves columns by position (not by name).
         """
         return DataFrame(self._jdf.intersectAll(other._jdf), self.sql_ctx)
 
@@ -1631,7 +1928,6 @@ def subtract(self, other):
         """
         return DataFrame(getattr(self._jdf, "except")(other._jdf), self.sql_ctx)
 
-    @since(1.4)
     def dropDuplicates(self, subset=None):
         """Return a new :class:`DataFrame` with duplicate rows removed,
         optionally only considering certain columns.
@@ -1644,6 +1940,10 @@ def dropDuplicates(self, subset=None):
 
         :func:`drop_duplicates` is an alias for :func:`dropDuplicates`.
 
+        .. versionadded:: 1.4.0
+
+        Examples
+        --------
         >>> from pyspark.sql import Row
         >>> df = sc.parallelize([ \\
         ...     Row(name='Alice', age=5, height=80), \\
@@ -1670,19 +1970,27 @@ def dropDuplicates(self, subset=None):
             jdf = self._jdf.dropDuplicates(self._jseq(subset))
         return DataFrame(jdf, self.sql_ctx)
 
-    @since("1.3.1")
     def dropna(self, how='any', thresh=None, subset=None):
         """Returns a new :class:`DataFrame` omitting rows with null values.
         :func:`DataFrame.dropna` and :func:`DataFrameNaFunctions.drop` are aliases of each other.
 
-        :param how: 'any' or 'all'.
+        .. versionadded:: 1.3.1
+
+        Parameters
+        ----------
+        how : str, optional
+            'any' or 'all'.
             If 'any', drop a row if it contains any nulls.
             If 'all', drop a row only if all its values are null.
-        :param thresh: int, default None
+        thresh: int, optional
+            default None
             If specified, drop rows that have less than `thresh` non-null values.
             This overwrites the `how` parameter.
-        :param subset: optional list of column names to consider.
+        subset : str, tuple or list, optional
+            optional list of column names to consider.
 
+        Examples
+        --------
         >>> df4.na.drop().show()
         +---+------+-----+
         |age|height| name|
@@ -1705,21 +2013,27 @@ def dropna(self, how='any', thresh=None, subset=None):
 
         return DataFrame(self._jdf.na().drop(thresh, self._jseq(subset)), self.sql_ctx)
 
-    @since("1.3.1")
     def fillna(self, value, subset=None):
         """Replace null values, alias for ``na.fill()``.
         :func:`DataFrame.fillna` and :func:`DataFrameNaFunctions.fill` are aliases of each other.
 
-        :param value: int, float, string, bool or dict.
+        .. versionadded:: 1.3.1
+
+        Parameters
+        ----------
+        value : int, float, string, bool or dict
             Value to replace null values with.
             If the value is a dict, then `subset` is ignored and `value` must be a mapping
             from column name (string) to replacement value. The replacement value must be
             an int, float, boolean, or string.
-        :param subset: optional list of column names to consider.
+        subset : str, tuple or list, optional
+            optional list of column names to consider.
             Columns specified in subset that do not have matching data type are ignored.
             For example, if `value` is a string, and subset contains a non-string column,
             then the non-string column is simply ignored.
 
+        Examples
+        --------
         >>> df4.na.fill(50).show()
         +---+------+-----+
         |age|height| name|
@@ -1770,7 +2084,6 @@ def fillna(self, value, subset=None):
 
             return DataFrame(self._jdf.na().fill(value, self._jseq(subset)), self.sql_ctx)
 
-    @since(1.4)
     def replace(self, to_replace, value=_NoValue, subset=None):
         """Returns a new :class:`DataFrame` replacing a value with another value.
         :func:`DataFrame.replace` and :func:`DataFrameNaFunctions.replace` are
@@ -1782,20 +2095,27 @@ def replace(self, to_replace, value=_NoValue, subset=None):
         floating point representation. In case of conflicts (for example with `{42: -1, 42.0: 1}`)
         and arbitrary replacement will be used.
 
-        :param to_replace: bool, int, float, string, list or dict.
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        to_replace : bool, int, float, string, list or dict
             Value to be replaced.
             If the value is a dict, then `value` is ignored or can be omitted, and `to_replace`
             must be a mapping between a value and a replacement.
-        :param value: bool, int, float, string, list or None.
+        value : bool, int, float, string or None, optional
             The replacement value must be a bool, int, float, string or None. If `value` is a
             list, `value` should be of the same length and type as `to_replace`.
             If `value` is a scalar and `to_replace` is a sequence, then `value` is
             used as a replacement for each item in `to_replace`.
-        :param subset: optional list of column names to consider.
+        subset : list, optional
+            optional list of column names to consider.
             Columns specified in subset that do not have matching data type are ignored.
             For example, if `value` is a string, and subset contains a non-string column,
             then the non-string column is simply ignored.
 
+        Examples
+        --------
         >>> df4.na.replace(10, 20).show()
         +----+------+-----+
         | age|height| name|
@@ -1910,7 +2230,6 @@ def all_of_(xs):
             return DataFrame(
                 self._jdf.na().replace(self._jseq(subset), self._jmap(rep_dict)), self.sql_ctx)
 
-    @since(2.0)
     def approxQuantile(self, col, probabilities, relativeError):
         """
         Calculates the approximate quantiles of numerical columns of a
@@ -1933,23 +2252,33 @@ def approxQuantile(self, col, probabilities, relativeError):
         Note that null values will be ignored in numerical columns before calculation.
         For columns only containing null values, an empty list is returned.
 
-        :param col: str, list.
-          Can be a single column name, or a list of names for multiple columns.
-        :param probabilities: a list of quantile probabilities
-          Each number must belong to [0, 1].
-          For example 0 is the minimum, 0.5 is the median, 1 is the maximum.
-        :param relativeError:  The relative target precision to achieve
-          (>= 0). If set to zero, the exact quantiles are computed, which
-          could be very expensive. Note that values greater than 1 are
-          accepted but give the same result as 1.
-        :return:  the approximate quantiles at the given probabilities. If
-          the input `col` is a string, the output is a list of floats. If the
-          input `col` is a list or tuple of strings, the output is also a
-          list, but each element in it is a list of floats, i.e., the output
-          is a list of list of floats.
-
-        .. versionchanged:: 2.2
-           Added support for multiple columns.
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        col: str, tuple or list
+            Can be a single column name, or a list of names for multiple columns.
+
+            .. versionchanged:: 2.2
+               Added support for multiple columns.
+        probabilities : list or tuple
+            a list of quantile probabilities
+            Each number must belong to [0, 1].
+            For example 0 is the minimum, 0.5 is the median, 1 is the maximum.
+        relativeError : float
+            The relative target precision to achieve
+            (>= 0). If set to zero, the exact quantiles are computed, which
+            could be very expensive. Note that values greater than 1 are
+            accepted but give the same result as 1.
+
+        Returns
+        -------
+        list
+            the approximate quantiles at the given probabilities. If
+            the input `col` is a string, the output is a list of floats. If the
+            input `col` is a list or tuple of strings, the output is also a
+            list, but each element in it is a list of floats, i.e., the output
+            is a list of list of floats.
         """
 
         if not isinstance(col, (str, list, tuple)):
@@ -1984,16 +2313,22 @@ def approxQuantile(self, col, probabilities, relativeError):
         jaq_list = [list(j) for j in jaq]
         return jaq_list[0] if isStr else jaq_list
 
-    @since(1.4)
     def corr(self, col1, col2, method=None):
         """
         Calculates the correlation of two columns of a :class:`DataFrame` as a double value.
         Currently only supports the Pearson Correlation Coefficient.
         :func:`DataFrame.corr` and :func:`DataFrameStatFunctions.corr` are aliases of each other.
 
-        :param col1: The name of the first column
-        :param col2: The name of the second column
-        :param method: The correlation method. Currently only supports "pearson"
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        col1 : str
+            The name of the first column
+        col2 : str
+            The name of the second column
+        method : str, optional
+            The correlation method. Currently only supports "pearson"
         """
         if not isinstance(col1, str):
             raise ValueError("col1 should be a string.")
@@ -2006,14 +2341,19 @@ def corr(self, col1, col2, method=None):
                              "coefficient is supported.")
         return self._jdf.stat().corr(col1, col2, method)
 
-    @since(1.4)
     def cov(self, col1, col2):
         """
         Calculate the sample covariance for the given columns, specified by their names, as a
         double value. :func:`DataFrame.cov` and :func:`DataFrameStatFunctions.cov` are aliases.
 
-        :param col1: The name of the first column
-        :param col2: The name of the second column
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        col1 : str
+            The name of the first column
+        col2 : str
+            The name of the second column
         """
         if not isinstance(col1, str):
             raise ValueError("col1 should be a string.")
@@ -2021,7 +2361,6 @@ def cov(self, col1, col2):
             raise ValueError("col2 should be a string.")
         return self._jdf.stat().cov(col1, col2)
 
-    @since(1.4)
     def crosstab(self, col1, col2):
         """
         Computes a pair-wise frequency table of the given columns. Also known as a contingency
@@ -2032,9 +2371,15 @@ def crosstab(self, col1, col2):
         Pairs that have no occurrences will have zero as their counts.
         :func:`DataFrame.crosstab` and :func:`DataFrameStatFunctions.crosstab` are aliases.
 
-        :param col1: The name of the first column. Distinct items will make the first item of
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        col1 : str
+            The name of the first column. Distinct items will make the first item of
             each row.
-        :param col2: The name of the second column. Distinct items will make the column names
+        col2 : str
+            The name of the second column. Distinct items will make the column names
             of the :class:`DataFrame`.
         """
         if not isinstance(col1, str):
@@ -2043,7 +2388,6 @@ def crosstab(self, col1, col2):
             raise ValueError("col2 should be a string.")
         return DataFrame(self._jdf.stat().crosstab(col1, col2), self.sql_ctx)
 
-    @since(1.4)
     def freqItems(self, cols, support=None):
         """
         Finding frequent items for columns, possibly with false positives. Using the
@@ -2051,14 +2395,22 @@ def freqItems(self, cols, support=None):
         "https://doi.org/10.1145/762471.762473, proposed by Karp, Schenker, and Papadimitriou".
         :func:`DataFrame.freqItems` and :func:`DataFrameStatFunctions.freqItems` are aliases.
 
-        .. note:: This function is meant for exploratory data analysis, as we make no
-            guarantee about the backward compatibility of the schema of the resulting
-            :class:`DataFrame`.
+        .. versionadded:: 1.4.0
 
-        :param cols: Names of the columns to calculate frequent items for as a list or tuple of
+        Parameters
+        ----------
+        cols : list or tuple
+            Names of the columns to calculate frequent items for as a list or tuple of
             strings.
-        :param support: The frequency with which to consider an item 'frequent'. Default is 1%.
+        support : float, optional
+            The frequency with which to consider an item 'frequent'. Default is 1%.
             The support must be greater than 1e-4.
+
+        Notes
+        -----
+        This function is meant for exploratory data analysis, as we make no
+        guarantee about the backward compatibility of the schema of the resulting
+        :class:`DataFrame`.
         """
         if isinstance(cols, tuple):
             cols = list(cols)
@@ -2068,7 +2420,6 @@ def freqItems(self, cols, support=None):
             support = 0.01
         return DataFrame(self._jdf.stat().freqItems(_to_seq(self._sc, cols), support), self.sql_ctx)
 
-    @since(1.3)
     def withColumn(self, colName, col):
         """
         Returns a new :class:`DataFrame` by adding a column or replacing the
@@ -2077,14 +2428,24 @@ def withColumn(self, colName, col):
         The column expression must be an expression over this :class:`DataFrame`; attempting to add
         a column from some other :class:`DataFrame` will raise an error.
 
-        :param colName: string, name of the new column.
-        :param col: a :class:`Column` expression for the new column.
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        colName : str
+            string, name of the new column.
+        col : :class:`Column`
+            a :class:`Column` expression for the new column.
 
-        .. note:: This method introduces a projection internally. Therefore, calling it multiple
-            times, for instance, via loops in order to add multiple columns can generate big
-            plans which can cause performance issues and even `StackOverflowException`.
-            To avoid this, use :func:`select` with the multiple columns at once.
+        Notes
+        -----
+        This method introduces a projection internally. Therefore, calling it multiple
+        times, for instance, via loops in order to add multiple columns can generate big
+        plans which can cause performance issues and even `StackOverflowException`.
+        To avoid this, use :func:`select` with the multiple columns at once.
 
+        Examples
+        --------
         >>> df.withColumn('age2', df.age + 2).collect()
         [Row(age=2, name='Alice', age2=4), Row(age=5, name='Bob', age2=7)]
 
@@ -2092,27 +2453,39 @@ def withColumn(self, colName, col):
         assert isinstance(col, Column), "col should be Column"
         return DataFrame(self._jdf.withColumn(colName, col._jc), self.sql_ctx)
 
-    @since(1.3)
     def withColumnRenamed(self, existing, new):
         """Returns a new :class:`DataFrame` by renaming an existing column.
         This is a no-op if schema doesn't contain the given column name.
 
-        :param existing: string, name of the existing column to rename.
-        :param new: string, new name of the column.
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        existing : str
+            string, name of the existing column to rename.
+        new : str
+            string, new name of the column.
 
+        Examples
+        --------
         >>> df.withColumnRenamed('age', 'age2').collect()
         [Row(age2=2, name='Alice'), Row(age2=5, name='Bob')]
         """
         return DataFrame(self._jdf.withColumnRenamed(existing, new), self.sql_ctx)
 
-    @since(1.4)
     def drop(self, *cols):
         """Returns a new :class:`DataFrame` that drops the specified column.
         This is a no-op if schema doesn't contain the given column name(s).
 
-        :param cols: a string name of the column to drop, or a
-            :class:`Column` to drop, or a list of string name of the columns to drop.
+        .. versionadded:: 1.4.0
 
+        Parameters
+        ----------
+        cols: str or :class:`Column`
+            a name of the column, or the :class:`Column` to drop
+
+        Examples
+        --------
         >>> df.drop('age').collect()
         [Row(name='Alice'), Row(name='Bob')]
 
@@ -2147,20 +2520,31 @@ def drop(self, *cols):
     def toDF(self, *cols):
         """Returns a new :class:`DataFrame` that with new specified column names
 
-        :param cols: list of new column names (string)
+        Parameters
+        ----------
+        cols : str
+            new column names
 
+        Examples
+        --------
         >>> df.toDF('f1', 'f2').collect()
         [Row(f1=2, f2='Alice'), Row(f1=5, f2='Bob')]
         """
         jdf = self._jdf.toDF(self._jseq(cols))
         return DataFrame(jdf, self.sql_ctx)
 
-    @since(3.0)
     def transform(self, func):
         """Returns a new :class:`DataFrame`. Concise syntax for chaining custom transformations.
 
-        :param func: a function that takes and returns a :class:`DataFrame`.
+        .. versionadded:: 3.0.0
+
+        Parameters
+        ----------
+        func : function
+            a function that takes and returns a :class:`DataFrame`.
 
+        Examples
+        --------
         >>> from pyspark.sql.functions import col
         >>> df = spark.createDataFrame([(1, 1.0), (2, 2.0)], ["int", "float"])
         >>> def cast_all_to_int(input_df):
@@ -2180,21 +2564,26 @@ def transform(self, func):
                                               "should have been DataFrame." % type(result)
         return result
 
-    @since(3.1)
     def sameSemantics(self, other):
         """
         Returns `True` when the logical query plans inside both :class:`DataFrame`\\s are equal and
         therefore return same results.
 
-        .. note:: The equality comparison here is simplified by tolerating the cosmetic differences
-            such as attribute names.
+        .. versionadded:: 3.1.0
 
-        .. note:: This API can compare both :class:`DataFrame`\\s very fast but can still return
-            `False` on the :class:`DataFrame` that return the same results, for instance, from
-            different plans. Such false negative semantic can be useful when caching as an example.
+        Notes
+        -----
+        The equality comparison here is simplified by tolerating the cosmetic differences
+        such as attribute names.
 
-        .. note:: DeveloperApi
+        This API can compare both :class:`DataFrame`\\s very fast but can still return
+        `False` on the :class:`DataFrame` that return the same results, for instance, from
+        different plans. Such false negative semantic can be useful when caching as an example.
 
+        This API is a developer API.
+
+        Examples
+        --------
         >>> df1 = spark.range(10)
         >>> df2 = spark.range(10)
         >>> df1.withColumn("col1", df1.id * 2).sameSemantics(df2.withColumn("col1", df2.id * 2))
@@ -2209,16 +2598,21 @@ def sameSemantics(self, other):
                              % type(other))
         return self._jdf.sameSemantics(other._jdf)
 
-    @since(3.1)
     def semanticHash(self):
         """
         Returns a hash code of the logical query plan against this :class:`DataFrame`.
 
-        .. note:: Unlike the standard hash code, the hash is calculated against the query plan
-            simplified by tolerating the cosmetic differences such as attribute names.
+        .. versionadded:: 3.1.0
+
+        Notes
+        -----
+        Unlike the standard hash code, the hash is calculated against the query plan
+        simplified by tolerating the cosmetic differences such as attribute names.
 
-        .. note:: DeveloperApi
+        This API is a developer API.
 
+        Examples
+        --------
         >>> spark.range(10).selectExpr("id as col0").semanticHash()  # doctest: +SKIP
         1855039936
         >>> spark.range(10).selectExpr("id as col1").semanticHash()  # doctest: +SKIP
@@ -2226,7 +2620,6 @@ def semanticHash(self):
         """
         return self._jdf.semanticHash()
 
-    @since(3.1)
     def inputFiles(self):
         """
         Returns a best-effort snapshot of the files that compose this :class:`DataFrame`.
@@ -2234,6 +2627,10 @@ def inputFiles(self):
         takes the union of all results. Depending on the source relations, this may not find
         all input files. Duplicates are removed.
 
+        .. versionadded:: 3.1.0
+
+        Examples
+        --------
         >>> df = spark.read.load("examples/src/main/resources/people.json", format="json")
         >>> len(df.inputFiles())
         1
@@ -2260,7 +2657,6 @@ def inputFiles(self):
         sinceversion=1.4,
         doc=":func:`drop_duplicates` is an alias for :func:`dropDuplicates`.")
 
-    @since(3.1)
     def writeTo(self, table):
         """
         Create a write configuration builder for v2 sources.
@@ -2269,6 +2665,10 @@ def writeTo(self, table):
 
         For example, to append or create or replace existing tables.
 
+        .. versionadded:: 3.1.0
+
+        Examples
+        --------
         >>> df.writeTo("catalog.db.table").append()  # doctest: +SKIP
         >>> df.writeTo(                              # doctest: +SKIP
         ...     "catalog.db.table"
diff --git a/python/pyspark/sql/dataframe.pyi b/python/pyspark/sql/dataframe.pyi
index c498d529d820f..1351c59470c9d 100644
--- a/python/pyspark/sql/dataframe.pyi
+++ b/python/pyspark/sql/dataframe.pyi
@@ -31,6 +31,7 @@ from typing import (
 from py4j.java_gateway import JavaObject  # type: ignore[import]
 
 from pyspark.sql._typing import ColumnOrName, LiteralType, OptionalPrimitiveType
+from pyspark._typing import PrimitiveType
 from pyspark.sql.types import (  # noqa: F401
     StructType,
     StructField,
@@ -86,7 +87,7 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
     def withWatermark(
         self, eventTime: ColumnOrName, delayThreshold: str
     ) -> DataFrame: ...
-    def hint(self, name: str, *parameters: Any) -> DataFrame: ...
+    def hint(self, name: str, *parameters: Union[PrimitiveType, List[PrimitiveType]]) -> DataFrame: ...
     def count(self) -> int: ...
     def collect(self) -> List[Row]: ...
     def toLocalIterator(self, prefetchPartitions: bool = ...) -> Iterator[Row]: ...
@@ -122,7 +123,7 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
         seed: Optional[int] = ...,
     ) -> DataFrame: ...
     def sampleBy(
-        self, col: str, fractions: Dict[Any, float], seed: Optional[int] = ...
+        self, col: ColumnOrName, fractions: Dict[Any, float], seed: Optional[int] = ...
     ) -> DataFrame: ...
     def randomSplit(
         self, weights: List[float], seed: Optional[int] = ...
@@ -199,7 +200,7 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
         self,
         how: str = ...,
         thresh: Optional[int] = ...,
-        subset: Optional[List[str]] = ...,
+        subset: Optional[Union[str, Tuple[str, ...], List[str]]] = ...,
     ) -> DataFrame: ...
     @overload
     def fillna(
@@ -237,13 +238,16 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
         subset: Optional[List[str]] = ...,
     ) -> DataFrame: ...
     def approxQuantile(
-        self, col: str, probabilities: List[float], relativeError: float
+        self,
+        col: Union[str, Tuple[str, ...], List[str]],
+        probabilities: Union[List[float], Tuple[float, ...]],
+        relativeError: float
     ) -> List[float]: ...
     def corr(self, col1: str, col2: str, method: Optional[str] = ...) -> float: ...
     def cov(self, col1: str, col2: str) -> float: ...
     def crosstab(self, col1: str, col2: str) -> DataFrame: ...
     def freqItems(
-        self, cols: List[str], support: Optional[float] = ...
+        self, cols: Union[List[str], Tuple[str]], support: Optional[float] = ...
     ) -> DataFrame: ...
     def withColumn(self, colName: str, col: Column) -> DataFrame: ...
     def withColumnRenamed(self, existing: str, new: str) -> DataFrame: ...
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index c349ae5cf46c4..87b999dca76ec 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -84,11 +84,14 @@ def _options_to_str(options):
     return {key: to_str(value) for (key, value) in options.items()}
 
 
-@since(1.3)
 def lit(col):
     """
     Creates a :class:`Column` of literal value.
 
+    .. versionadded:: 1.3.0
+
+    Examples
+    --------
     >>> df.select(lit(5).alias('height')).withColumn('spark_user', lit(True)).take(1)
     [Row(height=5, spark_user=True)]
     """
@@ -199,26 +202,39 @@ def sumDistinct(col):
     return _invoke_function_over_column("sumDistinct", col)
 
 
-@since(1.4)
 def acos(col):
     """
-    :return: inverse cosine of `col`, as if computed by `java.lang.Math.acos()`
+    .. versionadded:: 1.4.0
+
+    Returns
+    -------
+    :class:`Column`
+        inverse cosine of `col`, as if computed by `java.lang.Math.acos()`
     """
     return _invoke_function_over_column("acos", col)
 
 
-@since(1.4)
 def asin(col):
     """
-    :return: inverse sine of `col`, as if computed by `java.lang.Math.asin()`
+    .. versionadded:: 1.3.0
+
+
+    Returns
+    -------
+    :class:`Column`
+        inverse sine of `col`, as if computed by `java.lang.Math.asin()`
     """
     return _invoke_function_over_column("asin", col)
 
 
-@since(1.4)
 def atan(col):
     """
-    :return: inverse tangent of `col`, as if computed by `java.lang.Math.atan()`
+    .. versionadded:: 1.4.0
+
+    Returns
+    -------
+    :class:`Column`
+        inverse tangent of `col`, as if computed by `java.lang.Math.atan()`
     """
     return _invoke_function_over_column("atan", col)
 
@@ -239,20 +255,36 @@ def ceil(col):
     return _invoke_function_over_column("ceil", col)
 
 
-@since(1.4)
 def cos(col):
     """
-    :param col: angle in radians
-    :return: cosine of the angle, as if computed by `java.lang.Math.cos()`.
+    .. versionadded:: 1.4.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        angle in radians
+
+    Returns
+    -------
+    :class:`Column`
+        cosine of the angle, as if computed by `java.lang.Math.cos()`.
     """
     return _invoke_function_over_column("cos", col)
 
 
-@since(1.4)
 def cosh(col):
     """
-    :param col: hyperbolic angle
-    :return: hyperbolic cosine of the angle, as if computed by `java.lang.Math.cosh()`
+    .. versionadded:: 1.4.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        hyperbolic angle
+
+    Returns
+    -------
+    :class:`Column`
+        hyperbolic cosine of the angle, as if computed by `java.lang.Math.cosh()`
     """
     return _invoke_function_over_column("cosh", col)
 
@@ -322,40 +354,71 @@ def signum(col):
     return _invoke_function_over_column("signum", col)
 
 
-@since(1.4)
 def sin(col):
     """
-    :param col: angle in radians
-    :return: sine of the angle, as if computed by `java.lang.Math.sin()`
+    .. versionadded:: 1.4.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+
+    Returns
+    -------
+    :class:`Column`
+        sine of the angle, as if computed by `java.lang.Math.sin()`
     """
     return _invoke_function_over_column("sin", col)
 
 
-@since(1.4)
 def sinh(col):
     """
-    :param col: hyperbolic angle
-    :return: hyperbolic sine of the given value,
-             as if computed by `java.lang.Math.sinh()`
+    .. versionadded:: 1.4.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        hyperbolic angle
+
+    Returns
+    -------
+    :class:`Column`
+        hyperbolic sine of the given value,
+        as if computed by `java.lang.Math.sinh()`
     """
     return _invoke_function_over_column("sinh", col)
 
 
-@since(1.4)
 def tan(col):
     """
-    :param col: angle in radians
-    :return: tangent of the given value, as if computed by `java.lang.Math.tan()`
+    .. versionadded:: 1.4.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        angle in radians
+
+    Returns
+    -------
+    :class:`Column`
+        tangent of the given value, as if computed by `java.lang.Math.tan()`
     """
     return _invoke_function_over_column("tan", col)
 
 
-@since(1.4)
 def tanh(col):
     """
-    :param col: hyperbolic angle
-    :return: hyperbolic tangent of the given value
-             as if computed by `java.lang.Math.tanh()`
+    .. versionadded:: 1.4.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        hyperbolic angle
+
+    Returns
+    -------
+    :class:`Column`
+        hyperbolic tangent of the given value
+        as if computed by `java.lang.Math.tanh()`
     """
     return _invoke_function_over_column("tanh", col)
 
@@ -363,7 +426,8 @@ def tanh(col):
 @since(1.4)
 def toDegrees(col):
     """
-    .. note:: Deprecated in 2.1, use :func:`degrees` instead.
+    .. deprecated:: 2.1.0
+        Use :func:`degrees` instead.
     """
     warnings.warn("Deprecated in 2.1, use degrees instead.", DeprecationWarning)
     return degrees(col)
@@ -372,7 +436,8 @@ def toDegrees(col):
 @since(1.4)
 def toRadians(col):
     """
-    .. note:: Deprecated in 2.1, use :func:`radians` instead.
+    .. deprecated:: 2.1.0
+        Use :func:`radians` instead.
     """
     warnings.warn("Deprecated in 2.1, use radians instead.", DeprecationWarning)
     return radians(col)
@@ -489,14 +554,19 @@ def kurtosis(col):
     return _invoke_function_over_column("kurtosis", col)
 
 
-@since(1.6)
 def collect_list(col):
     """
     Aggregate function: returns a list of objects with duplicates.
 
-    .. note:: The function is non-deterministic because the order of collected results depends
-        on the order of the rows which may be non-deterministic after a shuffle.
+    .. versionadded:: 1.6.0
 
+    Notes
+    -----
+    The function is non-deterministic because the order of collected results depends
+    on the order of the rows which may be non-deterministic after a shuffle.
+
+    Examples
+    --------
     >>> df2 = spark.createDataFrame([(2,), (5,), (5,)], ('age',))
     >>> df2.agg(collect_list('age')).collect()
     [Row(collect_list(age)=[2, 5, 5])]
@@ -504,14 +574,19 @@ def collect_list(col):
     return _invoke_function_over_column("collect_list", col)
 
 
-@since(1.6)
 def collect_set(col):
     """
     Aggregate function: returns a set of objects with duplicate elements eliminated.
 
-    .. note:: The function is non-deterministic because the order of collected results depends
-        on the order of the rows which may be non-deterministic after a shuffle.
+    .. versionadded:: 1.6.0
+
+    Notes
+    -----
+    The function is non-deterministic because the order of collected results depends
+    on the order of the rows which may be non-deterministic after a shuffle.
 
+    Examples
+    --------
     >>> df2 = spark.createDataFrame([(2,), (5,), (5,)], ('age',))
     >>> df2.agg(collect_set('age')).collect()
     [Row(collect_set(age)=[5, 2])]
@@ -519,40 +594,65 @@ def collect_set(col):
     return _invoke_function_over_column("collect_set", col)
 
 
-@since(2.1)
 def degrees(col):
     """
     Converts an angle measured in radians to an approximately equivalent angle
     measured in degrees.
 
-    :param col: angle in radians
-    :return: angle in degrees, as if computed by `java.lang.Math.toDegrees()`
+    .. versionadded:: 2.1.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        angle in radians
+
+    Returns
+    -------
+    :class:`Column`
+        angle in degrees, as if computed by `java.lang.Math.toDegrees()`
     """
     return _invoke_function_over_column("degrees", col)
 
 
-@since(2.1)
 def radians(col):
     """
     Converts an angle measured in degrees to an approximately equivalent angle
     measured in radians.
 
-    :param col: angle in degrees
-    :return: angle in radians, as if computed by `java.lang.Math.toRadians()`
+    .. versionadded:: 2.1.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        angle in degrees
+
+    Returns
+    -------
+    :class:`Column`
+        angle in radians, as if computed by `java.lang.Math.toRadians()`
     """
     return _invoke_function_over_column("radians", col)
 
 
-@since(1.4)
 def atan2(col1, col2):
     """
-    :param col1: coordinate on y-axis
-    :param col2: coordinate on x-axis
-    :return: the `theta` component of the point
-          (`r`, `theta`)
-          in polar coordinates that corresponds to the point
-          (`x`, `y`) in Cartesian coordinates,
-          as if computed by `java.lang.Math.atan2()`
+    .. versionadded:: 1.4.0
+
+    Parameters
+    ----------
+    col1 : str, :class:`Column` or float
+        coordinate on y-axis
+    col2 : str, :class:`Column` or float
+        coordinate on x-axis
+
+    Returns
+    -------
+    :class:`Column`
+        the `theta` component of the point
+        (`r`, `theta`)
+        in polar coordinates that corresponds to the point
+        (`x`, `y`) in Cartesian coordinates,
+        as if computed by `java.lang.Math.atan2()`
     """
     return _invoke_binary_math_function("atan2", col1, col2)
 
@@ -633,20 +733,28 @@ def percent_rank():
 @since(1.3)
 def approxCountDistinct(col, rsd=None):
     """
-    .. note:: Deprecated in 2.1, use :func:`approx_count_distinct` instead.
+    .. deprecated:: 2.1.0
+        Use :func:`approx_count_distinct` instead.
     """
     warnings.warn("Deprecated in 2.1, use approx_count_distinct instead.", DeprecationWarning)
     return approx_count_distinct(col, rsd)
 
 
-@since(2.1)
 def approx_count_distinct(col, rsd=None):
     """Aggregate function: returns a new :class:`Column` for approximate distinct count of
     column `col`.
 
-    :param rsd: maximum relative standard deviation allowed (default = 0.05).
+    .. versionadded:: 2.1.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+    rsd : float, optional
+        maximum relative standard deviation allowed (default = 0.05).
         For rsd < 0.01, it is more efficient to use :func:`countDistinct`
 
+    Examples
+    --------
     >>> df.agg(approx_count_distinct(df.age).alias('distinct_ages')).collect()
     [Row(distinct_ages=2)]
     """
@@ -666,10 +774,13 @@ def broadcast(df):
     return DataFrame(sc._jvm.functions.broadcast(df._jdf), df.sql_ctx)
 
 
-@since(1.4)
 def coalesce(*cols):
     """Returns the first column that is not null.
 
+    .. versionadded:: 1.4.0
+
+    Examples
+    --------
     >>> cDf = spark.createDataFrame([(None, None), (1, None), (None, 2)], ("a", "b"))
     >>> cDf.show()
     +----+----+
@@ -703,11 +814,14 @@ def coalesce(*cols):
     return Column(jc)
 
 
-@since(1.6)
 def corr(col1, col2):
     """Returns a new :class:`Column` for the Pearson Correlation Coefficient for ``col1``
     and ``col2``.
 
+    .. versionadded:: 1.6.0
+
+    Examples
+    --------
     >>> a = range(20)
     >>> b = [2 * x for x in range(20)]
     >>> df = spark.createDataFrame(zip(a, b), ["a", "b"])
@@ -718,10 +832,13 @@ def corr(col1, col2):
     return Column(sc._jvm.functions.corr(_to_java_column(col1), _to_java_column(col2)))
 
 
-@since(2.0)
 def covar_pop(col1, col2):
     """Returns a new :class:`Column` for the population covariance of ``col1`` and ``col2``.
 
+    .. versionadded:: 2.0.0
+
+    Examples
+    --------
     >>> a = [1] * 10
     >>> b = [1] * 10
     >>> df = spark.createDataFrame(zip(a, b), ["a", "b"])
@@ -732,10 +849,13 @@ def covar_pop(col1, col2):
     return Column(sc._jvm.functions.covar_pop(_to_java_column(col1), _to_java_column(col2)))
 
 
-@since(2.0)
 def covar_samp(col1, col2):
     """Returns a new :class:`Column` for the sample covariance of ``col1`` and ``col2``.
 
+    .. versionadded:: 2.0.0
+
+    Examples
+    --------
     >>> a = [1] * 10
     >>> b = [1] * 10
     >>> df = spark.createDataFrame(zip(a, b), ["a", "b"])
@@ -746,10 +866,13 @@ def covar_samp(col1, col2):
     return Column(sc._jvm.functions.covar_samp(_to_java_column(col1), _to_java_column(col2)))
 
 
-@since(1.3)
 def countDistinct(col, *cols):
     """Returns a new :class:`Column` for distinct count of ``col`` or ``cols``.
 
+    .. versionadded:: 1.3.0
+
+    Examples
+    --------
     >>> df.agg(countDistinct(df.age, df.name).alias('c')).collect()
     [Row(c=2)]
 
@@ -761,27 +884,33 @@ def countDistinct(col, *cols):
     return Column(jc)
 
 
-@since(1.3)
 def first(col, ignorenulls=False):
     """Aggregate function: returns the first value in a group.
 
     The function by default returns the first values it sees. It will return the first non-null
     value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
 
-    .. note:: The function is non-deterministic because its results depends on the order of the
-        rows which may be non-deterministic after a shuffle.
+    .. versionadded:: 1.3.0
+
+    Notes
+    -----
+    The function is non-deterministic because its results depends on the order of the
+    rows which may be non-deterministic after a shuffle.
     """
     sc = SparkContext._active_spark_context
     jc = sc._jvm.functions.first(_to_java_column(col), ignorenulls)
     return Column(jc)
 
 
-@since(2.0)
 def grouping(col):
     """
     Aggregate function: indicates whether a specified column in a GROUP BY list is aggregated
     or not, returns 1 for aggregated or 0 for not aggregated in the result set.
 
+    .. versionadded:: 2.0.0
+
+    Examples
+    --------
     >>> df.cube("name").agg(grouping("name"), sum("age")).orderBy("name").show()
     +-----+--------------+--------+
     | name|grouping(name)|sum(age)|
@@ -796,16 +925,21 @@ def grouping(col):
     return Column(jc)
 
 
-@since(2.0)
 def grouping_id(*cols):
     """
     Aggregate function: returns the level of grouping, equals to
 
        (grouping(c1) << (n-1)) + (grouping(c2) << (n-2)) + ... + grouping(cn)
 
-    .. note:: The list of columns should match with grouping columns exactly, or empty (means all
-        the grouping columns).
+    .. versionadded:: 2.0.0
 
+    Notes
+    -----
+    The list of columns should match with grouping columns exactly, or empty (means all
+    the grouping columns).
+
+    Examples
+    --------
     >>> df.cube("name").agg(grouping_id(), sum("age")).orderBy("name").show()
     +-----+-------------+--------+
     | name|grouping_id()|sum(age)|
@@ -828,10 +962,13 @@ def input_file_name():
     return Column(sc._jvm.functions.input_file_name())
 
 
-@since(1.6)
 def isnan(col):
     """An expression that returns true iff the column is NaN.
 
+    .. versionadded:: 1.6.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([(1.0, float('nan')), (float('nan'), 2.0)], ("a", "b"))
     >>> df.select(isnan("a").alias("r1"), isnan(df.a).alias("r2")).collect()
     [Row(r1=False, r2=False), Row(r1=True, r2=True)]
@@ -840,10 +977,13 @@ def isnan(col):
     return Column(sc._jvm.functions.isnan(_to_java_column(col)))
 
 
-@since(1.6)
 def isnull(col):
     """An expression that returns true iff the column is null.
 
+    .. versionadded:: 1.6.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([(1, None), (None, 2)], ("a", "b"))
     >>> df.select(isnull("a").alias("r1"), isnull(df.a).alias("r2")).collect()
     [Row(r1=False, r2=False), Row(r1=True, r2=True)]
@@ -852,22 +992,24 @@ def isnull(col):
     return Column(sc._jvm.functions.isnull(_to_java_column(col)))
 
 
-@since(1.3)
 def last(col, ignorenulls=False):
     """Aggregate function: returns the last value in a group.
 
     The function by default returns the last values it sees. It will return the last non-null
     value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
 
-    .. note:: The function is non-deterministic because its results depends on the order of the
-        rows which may be non-deterministic after a shuffle.
+    .. versionadded:: 1.3.0
+
+    Notes
+    -----
+    The function is non-deterministic because its results depends on the order of the
+    rows which may be non-deterministic after a shuffle.
     """
     sc = SparkContext._active_spark_context
     jc = sc._jvm.functions.last(_to_java_column(col), ignorenulls)
     return Column(jc)
 
 
-@since(1.6)
 def monotonically_increasing_id():
     """A column that generates monotonically increasing 64-bit integers.
 
@@ -876,7 +1018,11 @@ def monotonically_increasing_id():
     within each partition in the lower 33 bits. The assumption is that the data frame has
     less than 1 billion partitions, and each partition has less than 8 billion records.
 
-    .. note:: The function is non-deterministic because its result depends on partition IDs.
+    .. versionadded:: 1.6.0
+
+    Notes
+    -----
+    The function is non-deterministic because its result depends on partition IDs.
 
     As an example, consider a :class:`DataFrame` with two partitions, each with 3 records.
     This expression would return the following IDs:
@@ -890,12 +1036,15 @@ def monotonically_increasing_id():
     return Column(sc._jvm.functions.monotonically_increasing_id())
 
 
-@since(1.6)
 def nanvl(col1, col2):
     """Returns col1 if it is not NaN, or col2 if col1 is NaN.
 
     Both inputs should be floating point columns (:class:`DoubleType` or :class:`FloatType`).
 
+    .. versionadded:: 1.6.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([(1.0, float('nan')), (float('nan'), 2.0)], ("a", "b"))
     >>> df.select(nanvl("a", "b").alias("r1"), nanvl(df.a, df.b).alias("r2")).collect()
     [Row(r1=1.0, r2=1.0), Row(r1=2.0, r2=2.0)]
@@ -904,7 +1053,6 @@ def nanvl(col1, col2):
     return Column(sc._jvm.functions.nanvl(_to_java_column(col1), _to_java_column(col2)))
 
 
-@since(3.1)
 def percentile_approx(col, percentage, accuracy=10000):
     """Returns the approximate `percentile` of the numeric column `col` which is the smallest value
     in the ordered `col` values (sorted from least to greatest) such that no more than `percentage`
@@ -920,6 +1068,10 @@ def percentile_approx(col, percentage, accuracy=10000):
     In this case, returns the approximate percentile array of column col
     at the given percentage array.
 
+    .. versionadded:: 3.1.0
+
+    Examples
+    --------
     >>> key = (col("id") % 3).alias("key")
     >>> value = (randn(42) + key * 10).alias("value")
     >>> df = spark.range(0, 1000, 1, 1).select(key, value)
@@ -959,13 +1111,18 @@ def percentile_approx(col, percentage, accuracy=10000):
     return Column(sc._jvm.functions.percentile_approx(_to_java_column(col), percentage, accuracy))
 
 
-@since(1.4)
 def rand(seed=None):
     """Generates a random column with independent and identically distributed (i.i.d.) samples
     uniformly distributed in [0.0, 1.0).
 
-    .. note:: The function is non-deterministic in general case.
+    .. versionadded:: 1.4.0
 
+    Notes
+    -----
+    The function is non-deterministic in general case.
+
+    Examples
+    --------
     >>> df.withColumn('rand', rand(seed=42) * 3).collect()
     [Row(age=2, name='Alice', rand=2.4052597283576684),
      Row(age=5, name='Bob', rand=2.3913904055683974)]
@@ -978,13 +1135,18 @@ def rand(seed=None):
     return Column(jc)
 
 
-@since(1.4)
 def randn(seed=None):
     """Generates a column with independent and identically distributed (i.i.d.) samples from
     the standard normal distribution.
 
-    .. note:: The function is non-deterministic in general case.
+    .. versionadded:: 1.4.0
 
+    Notes
+    -----
+    The function is non-deterministic in general case.
+
+    Examples
+    --------
     >>> df.withColumn('randn', randn(seed=42)).collect()
     [Row(age=2, name='Alice', randn=1.1027054481455365),
     Row(age=5, name='Bob', randn=0.7400395449950132)]
@@ -997,12 +1159,15 @@ def randn(seed=None):
     return Column(jc)
 
 
-@since(1.5)
 def round(col, scale=0):
     """
     Round the given value to `scale` decimal places using HALF_UP rounding mode if `scale` >= 0
     or at integral part when `scale` < 0.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> spark.createDataFrame([(2.5,)], ['a']).select(round('a', 0).alias('r')).collect()
     [Row(r=3.0)]
     """
@@ -1010,12 +1175,15 @@ def round(col, scale=0):
     return Column(sc._jvm.functions.round(_to_java_column(col), scale))
 
 
-@since(2.0)
 def bround(col, scale=0):
     """
     Round the given value to `scale` decimal places using HALF_EVEN rounding mode if `scale` >= 0
     or at integral part when `scale` < 0.
 
+    .. versionadded:: 2.0.0
+
+    Examples
+    --------
     >>> spark.createDataFrame([(2.5,)], ['a']).select(bround('a', 0).alias('r')).collect()
     [Row(r=2.0)]
     """
@@ -1023,10 +1191,13 @@ def bround(col, scale=0):
     return Column(sc._jvm.functions.bround(_to_java_column(col), scale))
 
 
-@since(1.5)
 def shiftLeft(col, numBits):
     """Shift the given value numBits left.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> spark.createDataFrame([(21,)], ['a']).select(shiftLeft('a', 1).alias('r')).collect()
     [Row(r=42)]
     """
@@ -1034,10 +1205,13 @@ def shiftLeft(col, numBits):
     return Column(sc._jvm.functions.shiftLeft(_to_java_column(col), numBits))
 
 
-@since(1.5)
 def shiftRight(col, numBits):
     """(Signed) shift the given value numBits right.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> spark.createDataFrame([(42,)], ['a']).select(shiftRight('a', 1).alias('r')).collect()
     [Row(r=21)]
     """
@@ -1046,10 +1220,13 @@ def shiftRight(col, numBits):
     return Column(jc)
 
 
-@since(1.5)
 def shiftRightUnsigned(col, numBits):
     """Unsigned shift the given value numBits right.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([(-42,)], ['a'])
     >>> df.select(shiftRightUnsigned('a', 1).alias('r')).collect()
     [Row(r=9223372036854775787)]
@@ -1059,12 +1236,17 @@ def shiftRightUnsigned(col, numBits):
     return Column(jc)
 
 
-@since(1.6)
 def spark_partition_id():
     """A column for partition ID.
 
-    .. note:: This is indeterministic because it depends on data partitioning and task scheduling.
+    .. versionadded:: 1.6.0
 
+    Notes
+    -----
+    This is indeterministic because it depends on data partitioning and task scheduling.
+
+    Examples
+    --------
     >>> df.repartition(1).select(spark_partition_id().alias("pid")).collect()
     [Row(pid=0), Row(pid=0)]
     """
@@ -1072,10 +1254,13 @@ def spark_partition_id():
     return Column(sc._jvm.functions.spark_partition_id())
 
 
-@since(1.5)
 def expr(str):
     """Parses the expression string into the column that it represents
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df.select(expr("length(name)")).collect()
     [Row(length(name)=5), Row(length(name)=3)]
     """
@@ -1083,12 +1268,18 @@ def expr(str):
     return Column(sc._jvm.functions.expr(str))
 
 
-@since(1.4)
 def struct(*cols):
     """Creates a new struct column.
 
-    :param cols: list of column names (string) or list of :class:`Column` expressions
+    .. versionadded:: 1.4.0
+
+    Parameters
+    ----------
+    cols : list, set, str or :class:`Column`
+        column names or :class:`Column`\\s to contain in the output struct.
 
+    Examples
+    --------
     >>> df.select(struct('age', 'name').alias("struct")).collect()
     [Row(struct=Row(age=2, name='Alice')), Row(struct=Row(age=5, name='Bob'))]
     >>> df.select(struct([df.age, df.name]).alias("struct")).collect()
@@ -1101,12 +1292,15 @@ def struct(*cols):
     return Column(jc)
 
 
-@since(1.5)
 def greatest(*cols):
     """
     Returns the greatest value of the list of column names, skipping null values.
     This function takes at least 2 parameters. It will return null iff all parameters are null.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([(1, 4, 3)], ['a', 'b', 'c'])
     >>> df.select(greatest(df.a, df.b, df.c).alias("greatest")).collect()
     [Row(greatest=4)]
@@ -1117,12 +1311,15 @@ def greatest(*cols):
     return Column(sc._jvm.functions.greatest(_to_seq(sc, cols, _to_java_column)))
 
 
-@since(1.5)
 def least(*cols):
     """
     Returns the least value of the list of column names, skipping null values.
     This function takes at least 2 parameters. It will return null iff all parameters are null.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([(1, 4, 3)], ['a', 'b', 'c'])
     >>> df.select(least(df.a, df.b, df.c).alias("least")).collect()
     [Row(least=1)]
@@ -1133,13 +1330,18 @@ def least(*cols):
     return Column(sc._jvm.functions.least(_to_seq(sc, cols, _to_java_column)))
 
 
-@since(1.4)
 def when(condition, value):
     """Evaluates a list of conditions and returns one of multiple possible result expressions.
     If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions.
 
-    :param condition: a boolean :class:`Column` expression.
-    :param value: a literal value, or a :class:`Column` expression.
+    .. versionadded:: 1.4.0
+
+    Parameters
+    ----------
+    condition : :class:`Column`
+        a boolean :class:`Column` expression.
+    value :
+        a literal value, or a :class:`Column` expression.
 
     >>> df.select(when(df['age'] == 2, 3).otherwise(4).alias("age")).collect()
     [Row(age=3), Row(age=4)]
@@ -1155,12 +1357,15 @@ def when(condition, value):
     return Column(jc)
 
 
-@since(1.5)
 def log(arg1, arg2=None):
     """Returns the first argument-based logarithm of the second argument.
 
     If there is only one argument, then this takes the natural logarithm of the argument.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df.select(log(10.0, df.age).alias('ten')).rdd.map(lambda l: str(l.ten)[:7]).collect()
     ['0.30102', '0.69897']
 
@@ -1175,10 +1380,13 @@ def log(arg1, arg2=None):
     return Column(jc)
 
 
-@since(1.5)
 def log2(col):
     """Returns the base-2 logarithm of the argument.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> spark.createDataFrame([(4,)], ['a']).select(log2('a').alias('log2')).collect()
     [Row(log2=2.0)]
     """
@@ -1186,11 +1394,14 @@ def log2(col):
     return Column(sc._jvm.functions.log2(_to_java_column(col)))
 
 
-@since(1.5)
 def conv(col, fromBase, toBase):
     """
     Convert a number in a string column from one base to another.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([("010101",)], ['n'])
     >>> df.select(conv(df.n, 2, 16).alias('hex')).collect()
     [Row(hex='15')]
@@ -1199,11 +1410,14 @@ def conv(col, fromBase, toBase):
     return Column(sc._jvm.functions.conv(_to_java_column(col), fromBase, toBase))
 
 
-@since(1.5)
 def factorial(col):
     """
     Computes the factorial of the given value.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([(5,)], ['n'])
     >>> df.select(factorial(df.n).alias('f')).collect()
     [Row(f=120)]
@@ -1214,7 +1428,6 @@ def factorial(col):
 
 # ---------------  Window functions ------------------------
 
-@since(1.4)
 def lag(col, offset=1, default=None):
     """
     Window function: returns the value that is `offset` rows before the current row, and
@@ -1223,15 +1436,21 @@ def lag(col, offset=1, default=None):
 
     This is equivalent to the LAG function in SQL.
 
-    :param col: name of column or expression
-    :param offset: number of row to extend
-    :param default: default value
+    .. versionadded:: 1.4.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column or expression
+    offset : int, optional
+        number of row to extend
+    default : optional
+        default value
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.lag(_to_java_column(col), offset, default))
 
 
-@since(1.4)
 def lead(col, offset=1, default=None):
     """
     Window function: returns the value that is `offset` rows after the current row, and
@@ -1240,15 +1459,21 @@ def lead(col, offset=1, default=None):
 
     This is equivalent to the LEAD function in SQL.
 
-    :param col: name of column or expression
-    :param offset: number of row to extend
-    :param default: default value
+    .. versionadded:: 1.4.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column or expression
+    offset : int, optional
+        number of row to extend
+    default : optional
+        default value
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.lead(_to_java_column(col), offset, default))
 
 
-@since(3.1)
 def nth_value(col, offset, ignoreNulls=False):
     """
     Window function: returns the value that is the `offset`\\th row of the window frame
@@ -1259,16 +1484,22 @@ def nth_value(col, offset, ignoreNulls=False):
 
     This is equivalent to the nth_value function in SQL.
 
-    :param col: name of column or expression
-    :param offset: number of row to use as the value
-    :param ignoreNulls: indicates the Nth value should skip null in the
+    .. versionadded:: 3.1.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column or expression
+    offset : int, optional
+        number of row to use as the value
+    ignoreNulls : bool, optional
+        indicates the Nth value should skip null in the
         determination of which row to use
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.nth_value(_to_java_column(col), offset, ignoreNulls))
 
 
-@since(1.4)
 def ntile(n):
     """
     Window function: returns the ntile group id (from 1 to `n` inclusive)
@@ -1278,7 +1509,12 @@ def ntile(n):
 
     This is equivalent to the NTILE function in SQL.
 
-    :param n: an integer
+    .. versionadded:: 1.4.0
+
+    Parameters
+    ----------
+    n : int
+        an integer
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.ntile(int(n)))
@@ -1305,7 +1541,6 @@ def current_timestamp():
     return Column(sc._jvm.functions.current_timestamp())
 
 
-@since(1.5)
 def date_format(date, format):
     """
     Converts a date/timestamp/string to a value of string in the format specified by the date
@@ -1315,9 +1550,15 @@ def date_format(date, format):
     pattern letters of `datetime pattern`_. can be used.
 
     .. _datetime pattern: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
-    .. note:: Use when ever possible specialized functions like `year`. These benefit from a
-        specialized implementation.
 
+    .. versionadded:: 1.5.0
+
+    Notes
+    -----
+    Whenever possible, use specialized functions like `year`.
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
     >>> df.select(date_format('dt', 'MM/dd/yyy').alias('date')).collect()
     [Row(date='04/08/2015')]
@@ -1326,11 +1567,14 @@ def date_format(date, format):
     return Column(sc._jvm.functions.date_format(_to_java_column(date), format))
 
 
-@since(1.5)
 def year(col):
     """
     Extract the year of a given date as integer.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
     >>> df.select(year('dt').alias('year')).collect()
     [Row(year=2015)]
@@ -1339,11 +1583,14 @@ def year(col):
     return Column(sc._jvm.functions.year(_to_java_column(col)))
 
 
-@since(1.5)
 def quarter(col):
     """
     Extract the quarter of a given date as integer.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
     >>> df.select(quarter('dt').alias('quarter')).collect()
     [Row(quarter=2)]
@@ -1352,11 +1599,14 @@ def quarter(col):
     return Column(sc._jvm.functions.quarter(_to_java_column(col)))
 
 
-@since(1.5)
 def month(col):
     """
     Extract the month of a given date as integer.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
     >>> df.select(month('dt').alias('month')).collect()
     [Row(month=4)]
@@ -1365,11 +1615,14 @@ def month(col):
     return Column(sc._jvm.functions.month(_to_java_column(col)))
 
 
-@since(2.3)
 def dayofweek(col):
     """
     Extract the day of the week of a given date as integer.
 
+    .. versionadded:: 2.3.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
     >>> df.select(dayofweek('dt').alias('day')).collect()
     [Row(day=4)]
@@ -1378,11 +1631,14 @@ def dayofweek(col):
     return Column(sc._jvm.functions.dayofweek(_to_java_column(col)))
 
 
-@since(1.5)
 def dayofmonth(col):
     """
     Extract the day of the month of a given date as integer.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
     >>> df.select(dayofmonth('dt').alias('day')).collect()
     [Row(day=8)]
@@ -1391,11 +1647,14 @@ def dayofmonth(col):
     return Column(sc._jvm.functions.dayofmonth(_to_java_column(col)))
 
 
-@since(1.5)
 def dayofyear(col):
     """
     Extract the day of the year of a given date as integer.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
     >>> df.select(dayofyear('dt').alias('day')).collect()
     [Row(day=98)]
@@ -1404,11 +1663,14 @@ def dayofyear(col):
     return Column(sc._jvm.functions.dayofyear(_to_java_column(col)))
 
 
-@since(1.5)
 def hour(col):
     """
     Extract the hours of a given date as integer.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('2015-04-08 13:08:15',)], ['ts'])
     >>> df.select(hour('ts').alias('hour')).collect()
     [Row(hour=13)]
@@ -1417,11 +1679,14 @@ def hour(col):
     return Column(sc._jvm.functions.hour(_to_java_column(col)))
 
 
-@since(1.5)
 def minute(col):
     """
     Extract the minutes of a given date as integer.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('2015-04-08 13:08:15',)], ['ts'])
     >>> df.select(minute('ts').alias('minute')).collect()
     [Row(minute=8)]
@@ -1430,11 +1695,14 @@ def minute(col):
     return Column(sc._jvm.functions.minute(_to_java_column(col)))
 
 
-@since(1.5)
 def second(col):
     """
     Extract the seconds of a given date as integer.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('2015-04-08 13:08:15',)], ['ts'])
     >>> df.select(second('ts').alias('second')).collect()
     [Row(second=15)]
@@ -1443,11 +1711,14 @@ def second(col):
     return Column(sc._jvm.functions.second(_to_java_column(col)))
 
 
-@since(1.5)
 def weekofyear(col):
     """
     Extract the week number of a given date as integer.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
     >>> df.select(weekofyear(df.dt).alias('week')).collect()
     [Row(week=15)]
@@ -1456,11 +1727,14 @@ def weekofyear(col):
     return Column(sc._jvm.functions.weekofyear(_to_java_column(col)))
 
 
-@since(1.5)
 def date_add(start, days):
     """
     Returns the date that is `days` days after `start`
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
     >>> df.select(date_add(df.dt, 1).alias('next_date')).collect()
     [Row(next_date=datetime.date(2015, 4, 9))]
@@ -1469,11 +1743,14 @@ def date_add(start, days):
     return Column(sc._jvm.functions.date_add(_to_java_column(start), days))
 
 
-@since(1.5)
 def date_sub(start, days):
     """
     Returns the date that is `days` days before `start`
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
     >>> df.select(date_sub(df.dt, 1).alias('prev_date')).collect()
     [Row(prev_date=datetime.date(2015, 4, 7))]
@@ -1482,11 +1759,14 @@ def date_sub(start, days):
     return Column(sc._jvm.functions.date_sub(_to_java_column(start), days))
 
 
-@since(1.5)
 def datediff(end, start):
     """
     Returns the number of days from `start` to `end`.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('2015-04-08','2015-05-10')], ['d1', 'd2'])
     >>> df.select(datediff(df.d2, df.d1).alias('diff')).collect()
     [Row(diff=32)]
@@ -1495,11 +1775,14 @@ def datediff(end, start):
     return Column(sc._jvm.functions.datediff(_to_java_column(end), _to_java_column(start)))
 
 
-@since(1.5)
 def add_months(start, months):
     """
     Returns the date that is `months` months after `start`
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
     >>> df.select(add_months(df.dt, 1).alias('next_month')).collect()
     [Row(next_month=datetime.date(2015, 5, 8))]
@@ -1508,7 +1791,6 @@ def add_months(start, months):
     return Column(sc._jvm.functions.add_months(_to_java_column(start), months))
 
 
-@since(1.5)
 def months_between(date1, date2, roundOff=True):
     """
     Returns number of months between dates date1 and date2.
@@ -1517,6 +1799,10 @@ def months_between(date1, date2, roundOff=True):
     returns an integer (time of day will be ignored).
     The result is rounded off to 8 digits unless `roundOff` is set to `False`.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('1997-02-28 10:30:00', '1996-10-30')], ['date1', 'date2'])
     >>> df.select(months_between(df.date1, df.date2).alias('months')).collect()
     [Row(months=3.94959677)]
@@ -1528,7 +1814,6 @@ def months_between(date1, date2, roundOff=True):
         _to_java_column(date1), _to_java_column(date2), roundOff))
 
 
-@since(2.2)
 def to_date(col, format=None):
     """Converts a :class:`Column` into :class:`pyspark.sql.types.DateType`
     using the optionally specified format. Specify formats according to `datetime pattern`_.
@@ -1537,6 +1822,10 @@ def to_date(col, format=None):
 
     .. _datetime pattern: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
+    .. versionadded:: 2.2.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
     >>> df.select(to_date(df.t).alias('date')).collect()
     [Row(date=datetime.date(1997, 2, 28))]
@@ -1553,7 +1842,6 @@ def to_date(col, format=None):
     return Column(jc)
 
 
-@since(2.2)
 def to_timestamp(col, format=None):
     """Converts a :class:`Column` into :class:`pyspark.sql.types.TimestampType`
     using the optionally specified format. Specify formats according to `datetime pattern`_.
@@ -1562,6 +1850,10 @@ def to_timestamp(col, format=None):
 
     .. _datetime pattern: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
 
+    .. versionadded:: 2.2.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
     >>> df.select(to_timestamp(df.t).alias('dt')).collect()
     [Row(dt=datetime.datetime(1997, 2, 28, 10, 30))]
@@ -1578,13 +1870,20 @@ def to_timestamp(col, format=None):
     return Column(jc)
 
 
-@since(1.5)
 def trunc(date, format):
     """
     Returns date truncated to the unit specified by the format.
 
-    :param format: 'year', 'yyyy', 'yy' or 'month', 'mon', 'mm'
+    .. versionadded:: 1.5.0
+
+    Parameters
+    ----------
+    date : :class:`Column` or str
+    format : str
+        'year', 'yyyy', 'yy' or 'month', 'mon', 'mm'
 
+    Examples
+    --------
     >>> df = spark.createDataFrame([('1997-02-28',)], ['d'])
     >>> df.select(trunc(df.d, 'year').alias('year')).collect()
     [Row(year=datetime.date(1997, 1, 1))]
@@ -1595,14 +1894,21 @@ def trunc(date, format):
     return Column(sc._jvm.functions.trunc(_to_java_column(date), format))
 
 
-@since(2.3)
 def date_trunc(format, timestamp):
     """
     Returns timestamp truncated to the unit specified by the format.
 
-    :param format: 'year', 'yyyy', 'yy', 'month', 'mon', 'mm',
+    .. versionadded:: 2.3.0
+
+    Parameters
+    ----------
+    format : str
+        'year', 'yyyy', 'yy', 'month', 'mon', 'mm',
         'day', 'dd', 'hour', 'minute', 'second', 'week', 'quarter'
+    timestamp : :class:`Column` or str
 
+    Examples
+    --------
     >>> df = spark.createDataFrame([('1997-02-28 05:02:11',)], ['t'])
     >>> df.select(date_trunc('year', df.t).alias('year')).collect()
     [Row(year=datetime.datetime(1997, 1, 1, 0, 0))]
@@ -1613,7 +1919,6 @@ def date_trunc(format, timestamp):
     return Column(sc._jvm.functions.date_trunc(format, _to_java_column(timestamp)))
 
 
-@since(1.5)
 def next_day(date, dayOfWeek):
     """
     Returns the first date which is later than the value of the date column.
@@ -1621,6 +1926,10 @@ def next_day(date, dayOfWeek):
     Day of the week parameter is case insensitive, and accepts:
         "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun".
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('2015-07-27',)], ['d'])
     >>> df.select(next_day(df.d, 'Sun').alias('date')).collect()
     [Row(date=datetime.date(2015, 8, 2))]
@@ -1629,11 +1938,14 @@ def next_day(date, dayOfWeek):
     return Column(sc._jvm.functions.next_day(_to_java_column(date), dayOfWeek))
 
 
-@since(1.5)
 def last_day(date):
     """
     Returns the last day of the month which the given date belongs to.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('1997-02-10',)], ['d'])
     >>> df.select(last_day(df.d).alias('date')).collect()
     [Row(date=datetime.date(1997, 2, 28))]
@@ -1642,13 +1954,16 @@ def last_day(date):
     return Column(sc._jvm.functions.last_day(_to_java_column(date)))
 
 
-@since(1.5)
 def from_unixtime(timestamp, format="yyyy-MM-dd HH:mm:ss"):
     """
     Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string
     representing the timestamp of that moment in the current system time zone in the given
     format.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
     >>> time_df = spark.createDataFrame([(1428476400,)], ['unix_time'])
     >>> time_df.select(from_unixtime('unix_time').alias('ts')).collect()
@@ -1659,7 +1974,6 @@ def from_unixtime(timestamp, format="yyyy-MM-dd HH:mm:ss"):
     return Column(sc._jvm.functions.from_unixtime(_to_java_column(timestamp), format))
 
 
-@since(1.5)
 def unix_timestamp(timestamp=None, format='yyyy-MM-dd HH:mm:ss'):
     """
     Convert time string with given pattern ('yyyy-MM-dd HH:mm:ss', by default)
@@ -1668,6 +1982,10 @@ def unix_timestamp(timestamp=None, format='yyyy-MM-dd HH:mm:ss'):
 
     if `timestamp` is None, then it returns current timestamp.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
     >>> time_df = spark.createDataFrame([('2015-04-08',)], ['dt'])
     >>> time_df.select(unix_timestamp('dt', 'yyyy-MM-dd').alias('unix_time')).collect()
@@ -1680,7 +1998,6 @@ def unix_timestamp(timestamp=None, format='yyyy-MM-dd HH:mm:ss'):
     return Column(sc._jvm.functions.unix_timestamp(_to_java_column(timestamp), format))
 
 
-@since(1.5)
 def from_utc_timestamp(timestamp, tz):
     """
     This is a common function for databases supporting TIMESTAMP WITHOUT TIMEZONE. This function
@@ -1696,17 +2013,25 @@ def from_utc_timestamp(timestamp, tz):
     according to the timezone in the string, and finally display the result by converting the
     timestamp to string according to the session local timezone.
 
-    :param timestamp: the column that contains timestamps
-    :param tz: A string detailing the time zone ID that the input should be adjusted to. It should
-               be in the format of either region-based zone IDs or zone offsets. Region IDs must
-               have the form 'area/city', such as 'America/Los_Angeles'. Zone offsets must be in
-               the format '(+|-)HH:mm', for example '-08:00' or '+01:00'. Also 'UTC' and 'Z' are
-               supported as aliases of '+00:00'. Other short names are not recommended to use
-               because they can be ambiguous.
-
-    .. versionchanged:: 2.4
-       `tz` can take a :class:`Column` containing timezone ID strings.
-
+    .. versionadded:: 1.5.0
+
+    Parameters
+    ----------
+    timestamp : :class:`Column` or str
+        the column that contains timestamps
+    tz : :class:`Column` or str
+        A string detailing the time zone ID that the input should be adjusted to. It should
+        be in the format of either region-based zone IDs or zone offsets. Region IDs must
+        have the form 'area/city', such as 'America/Los_Angeles'. Zone offsets must be in
+        the format '(+|-)HH:mm', for example '-08:00' or '+01:00'. Also 'UTC' and 'Z' are
+        supported as aliases of '+00:00'. Other short names are not recommended to use
+        because they can be ambiguous.
+
+        .. versionchanged:: 2.4
+           `tz` can take a :class:`Column` containing timezone ID strings.
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('1997-02-28 10:30:00', 'JST')], ['ts', 'tz'])
     >>> df.select(from_utc_timestamp(df.ts, "PST").alias('local_time')).collect()
     [Row(local_time=datetime.datetime(1997, 2, 28, 2, 30))]
@@ -1719,7 +2044,6 @@ def from_utc_timestamp(timestamp, tz):
     return Column(sc._jvm.functions.from_utc_timestamp(_to_java_column(timestamp), tz))
 
 
-@since(1.5)
 def to_utc_timestamp(timestamp, tz):
     """
     This is a common function for databases supporting TIMESTAMP WITHOUT TIMEZONE. This function
@@ -1735,17 +2059,25 @@ def to_utc_timestamp(timestamp, tz):
     according to the timezone in the string, and finally display the result by converting the
     timestamp to string according to the session local timezone.
 
-    :param timestamp: the column that contains timestamps
-    :param tz: A string detailing the time zone ID that the input should be adjusted to. It should
-               be in the format of either region-based zone IDs or zone offsets. Region IDs must
-               have the form 'area/city', such as 'America/Los_Angeles'. Zone offsets must be in
-               the format '(+|-)HH:mm', for example '-08:00' or '+01:00'. Also 'UTC' and 'Z' are
-               supported as aliases of '+00:00'. Other short names are not recommended to use
-               because they can be ambiguous.
-
-    .. versionchanged:: 2.4
-       `tz` can take a :class:`Column` containing timezone ID strings.
-
+    .. versionadded:: 1.5.0
+
+    Parameters
+    ----------
+    timestamp : :class:`Column` or str
+        the column that contains timestamps
+    tz : :class:`Column` or str
+        A string detailing the time zone ID that the input should be adjusted to. It should
+        be in the format of either region-based zone IDs or zone offsets. Region IDs must
+        have the form 'area/city', such as 'America/Los_Angeles'. Zone offsets must be in
+        the format '(+|-)HH:mm', for example '-08:00' or '+01:00'. Also 'UTC' and 'Z' are
+        upported as aliases of '+00:00'. Other short names are not recommended to use
+        because they can be ambiguous.
+
+        .. versionchanged:: 2.4.0
+           `tz` can take a :class:`Column` containing timezone ID strings.
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('1997-02-28 10:30:00', 'JST')], ['ts', 'tz'])
     >>> df.select(to_utc_timestamp(df.ts, "PST").alias('utc_time')).collect()
     [Row(utc_time=datetime.datetime(1997, 2, 28, 18, 30))]
@@ -1758,9 +2090,12 @@ def to_utc_timestamp(timestamp, tz):
     return Column(sc._jvm.functions.to_utc_timestamp(_to_java_column(timestamp), tz))
 
 
-@since(3.1)
 def timestamp_seconds(col):
     """
+    .. versionadded:: 3.1.0
+
+    Examples
+    --------
     >>> from pyspark.sql.functions import timestamp_seconds
     >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
     >>> time_df = spark.createDataFrame([(1230219000,)], ['unix_time'])
@@ -1777,7 +2112,6 @@ def timestamp_seconds(col):
     return Column(sc._jvm.functions.timestamp_seconds(_to_java_column(col)))
 
 
-@since(2.0)
 def window(timeColumn, windowDuration, slideDuration=None, startTime=None):
     """Bucketize rows into one or more time windows given a timestamp specifying column. Window
     starts are inclusive but the window ends are exclusive, e.g. 12:05 will be in the window
@@ -1797,6 +2131,10 @@ def window(timeColumn, windowDuration, slideDuration=None, startTime=None):
     The output column will be a struct called 'window' by default with the nested columns 'start'
     and 'end', where 'start' and 'end' will be of :class:`pyspark.sql.types.TimestampType`.
 
+    .. versionadded:: 2.0.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([("2016-03-11 09:00:07", 1)]).toDF("date", "val")
     >>> w = df.groupBy(window("date", "5 seconds")).agg(sum("val").alias("sum"))
     >>> w.select(w.window.start.cast("string").alias("start"),
@@ -1827,12 +2165,15 @@ def check_string_field(field, fieldName):
 
 # ---------------------------- misc functions ----------------------------------
 
-@since(1.5)
 def crc32(col):
     """
     Calculates the cyclic redundancy check value  (CRC32) of a binary column and
     returns the value as a bigint.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> spark.createDataFrame([('ABC',)], ['a']).select(crc32('a').alias('crc32')).collect()
     [Row(crc32=2743272264)]
     """
@@ -1840,10 +2181,13 @@ def crc32(col):
     return Column(sc._jvm.functions.crc32(_to_java_column(col)))
 
 
-@since(1.5)
 def md5(col):
     """Calculates the MD5 digest and returns the value as a 32 character hex string.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> spark.createDataFrame([('ABC',)], ['a']).select(md5('a').alias('hash')).collect()
     [Row(hash='902fbdd2b1df0c4f70b4a5d23525e932')]
     """
@@ -1852,10 +2196,13 @@ def md5(col):
     return Column(jc)
 
 
-@since(1.5)
 def sha1(col):
     """Returns the hex string result of SHA-1.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> spark.createDataFrame([('ABC',)], ['a']).select(sha1('a').alias('hash')).collect()
     [Row(hash='3c01bdbb26f358bab27f267924aa2c9a03fcfdb8')]
     """
@@ -1864,12 +2211,15 @@ def sha1(col):
     return Column(jc)
 
 
-@since(1.5)
 def sha2(col, numBits):
     """Returns the hex string result of SHA-2 family of hash functions (SHA-224, SHA-256, SHA-384,
     and SHA-512). The numBits indicates the desired bit length of the result, which must have a
     value of 224, 256, 384, 512, or 0 (which is equivalent to 256).
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> digests = df.select(sha2(df.name, 256).alias('s')).collect()
     >>> digests[0]
     Row(s='3bc51062973c458d5a6f2d8d64a023246354ad7e064b1e4e009ec8a0699a3043')
@@ -1881,10 +2231,13 @@ def sha2(col, numBits):
     return Column(jc)
 
 
-@since(2.0)
 def hash(*cols):
     """Calculates the hash code of given columns, and returns the result as an int column.
 
+    .. versionadded:: 2.0.0
+
+    Examples
+    --------
     >>> spark.createDataFrame([('ABC',)], ['a']).select(hash('a').alias('hash')).collect()
     [Row(hash=-757602832)]
     """
@@ -1893,11 +2246,14 @@ def hash(*cols):
     return Column(jc)
 
 
-@since(3.0)
 def xxhash64(*cols):
     """Calculates the hash code of given columns using the 64-bit variant of the xxHash algorithm,
     and returns the result as a long column.
 
+    .. versionadded:: 3.0.0
+
+    Examples
+    --------
     >>> spark.createDataFrame([('ABC',)], ['a']).select(xxhash64('a').alias('hash')).collect()
     [Row(hash=4105715581806190027)]
     """
@@ -1906,12 +2262,15 @@ def xxhash64(*cols):
     return Column(jc)
 
 
-@since(3.1)
 def assert_true(col, errMsg=None):
     """
     Returns null if the input column is true; throws an exception with the provided error message
     otherwise.
 
+    .. versionadded:: 3.1.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([(0,1)], ['a', 'b'])
     >>> df.select(assert_true(df.a < df.b).alias('r')).collect()
     [Row(r=None)]
@@ -2023,12 +2382,15 @@ def trim(col):
     return _invoke_function_over_column("trim", col)
 
 
-@since(1.5)
 def concat_ws(sep, *cols):
     """
     Concatenates multiple input string columns together into a single string column,
     using the given separator.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('abcd','123')], ['s', 'd'])
     >>> df.select(concat_ws('-', df.s, df.d).alias('s')).collect()
     [Row(s='abcd-123')]
@@ -2057,14 +2419,19 @@ def encode(col, charset):
     return Column(sc._jvm.functions.encode(_to_java_column(col), charset))
 
 
-@since(1.5)
 def format_number(col, d):
     """
     Formats the number X to a format like '#,--#,--#.--', rounded to d decimal places
     with HALF_EVEN round mode, and returns the result as a string.
 
-    :param col: the column name of the numeric value to be formatted
-    :param d: the N decimal places
+    .. versionadded:: 1.5.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        the column name of the numeric value to be formatted
+    d : int
+        the N decimal places
 
     >>> spark.createDataFrame([(5,)], ['a']).select(format_number('a', 4).alias('v')).collect()
     [Row(v='5.0000')]
@@ -2073,15 +2440,21 @@ def format_number(col, d):
     return Column(sc._jvm.functions.format_number(_to_java_column(col), d))
 
 
-@since(1.5)
 def format_string(format, *cols):
     """
     Formats the arguments in printf-style and returns the result as a string column.
 
-    :param format: string that can contain embedded format tags and used as result column's value
-    :param cols: list of column names (string) or list of :class:`Column` expressions to
-        be used in formatting
+    .. versionadded:: 1.5.0
+
+    Parameters
+    ----------
+    format : str
+        string that can contain embedded format tags and used as result column's value
+    cols : :class:`Column` or str
+        column names or :class:`Column`\\s to be used in formatting
 
+    Examples
+    --------
     >>> df = spark.createDataFrame([(5, "hello")], ['a', 'b'])
     >>> df.select(format_string('%d %s', df.a, df.b).alias('v')).collect()
     [Row(v='5 hello')]
@@ -2090,14 +2463,17 @@ def format_string(format, *cols):
     return Column(sc._jvm.functions.format_string(format, _to_seq(sc, cols, _to_java_column)))
 
 
-@since(1.5)
 def instr(str, substr):
     """
     Locate the position of the first occurrence of substr column in the given string.
     Returns null if either of the arguments are null.
 
-    .. note:: The position is not zero based, but 1 based index. Returns 0 if substr
-        could not be found in str.
+    .. versionadded:: 1.5.0
+
+    Notes
+    -----
+    The position is not zero based, but 1 based index. Returns 0 if substr
+    could not be found in str.
 
     >>> df = spark.createDataFrame([('abcd',)], ['s',])
     >>> df.select(instr(df.s, 'b').alias('s')).collect()
@@ -2107,12 +2483,15 @@ def instr(str, substr):
     return Column(sc._jvm.functions.instr(_to_java_column(str), substr))
 
 
-@since(3.0)
 def overlay(src, replace, pos, len=-1):
     """
     Overlay the specified portion of `src` with `replace`,
     starting from byte position `pos` of `src` and proceeding for `len` bytes.
 
+    .. versionadded:: 3.0.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([("SPARK_SQL", "CORE")], ("x", "y"))
     >>> df.select(overlay("x", "y", 7).alias("overlayed")).show()
     +----------+
@@ -2141,15 +2520,20 @@ def overlay(src, replace, pos, len=-1):
     ))
 
 
-@since(1.5)
 def substring(str, pos, len):
     """
     Substring starts at `pos` and is of length `len` when str is String type or
     returns the slice of byte array that starts at `pos` in byte and is of length `len`
     when str is Binary type.
 
-    .. note:: The position is not zero based, but 1 based index.
+    .. versionadded:: 1.5.0
 
+    Notes
+    -----
+    The position is not zero based, but 1 based index.
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('abcd',)], ['s',])
     >>> df.select(substring(df.s, 1, 2).alias('s')).collect()
     [Row(s='ab')]
@@ -2158,7 +2542,6 @@ def substring(str, pos, len):
     return Column(sc._jvm.functions.substring(_to_java_column(str), pos, len))
 
 
-@since(1.5)
 def substring_index(str, delim, count):
     """
     Returns the substring from string str before count occurrences of the delimiter delim.
@@ -2166,6 +2549,10 @@ def substring_index(str, delim, count):
     returned. If count is negative, every to the right of the final delimiter (counting from the
     right) is returned. substring_index performs a case-sensitive match when searching for delim.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('a.b.c.d',)], ['s'])
     >>> df.select(substring_index(df.s, '.', 2).alias('s')).collect()
     [Row(s='a.b')]
@@ -2176,10 +2563,13 @@ def substring_index(str, delim, count):
     return Column(sc._jvm.functions.substring_index(_to_java_column(str), delim, count))
 
 
-@since(1.5)
 def levenshtein(left, right):
     """Computes the Levenshtein distance of the two given strings.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df0 = spark.createDataFrame([('kitten', 'sitting',)], ['l', 'r'])
     >>> df0.select(levenshtein('l', 'r').alias('d')).collect()
     [Row(d=3)]
@@ -2189,18 +2579,28 @@ def levenshtein(left, right):
     return Column(jc)
 
 
-@since(1.5)
 def locate(substr, str, pos=1):
     """
     Locate the position of the first occurrence of substr in a string column, after position pos.
 
-    .. note:: The position is not zero based, but 1 based index. Returns 0 if substr
-        could not be found in str.
+    .. versionadded:: 1.5.0
+
+    Parameters
+    ----------
+    substr : str
+        a string
+    str : :class:`Column` or str
+        a Column of :class:`pyspark.sql.types.StringType`
+    pos : int, optional
+        start position (zero based)
 
-    :param substr: a string
-    :param str: a Column of :class:`pyspark.sql.types.StringType`
-    :param pos: start position (zero based)
+    Notes
+    -----
+    The position is not zero based, but 1 based index. Returns 0 if substr
+    could not be found in str.
 
+    Examples
+    --------
     >>> df = spark.createDataFrame([('abcd',)], ['s',])
     >>> df.select(locate('b', df.s, 1).alias('s')).collect()
     [Row(s=2)]
@@ -2209,11 +2609,14 @@ def locate(substr, str, pos=1):
     return Column(sc._jvm.functions.locate(substr, _to_java_column(str), pos))
 
 
-@since(1.5)
 def lpad(col, len, pad):
     """
     Left-pad the string column to width `len` with `pad`.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('abcd',)], ['s',])
     >>> df.select(lpad(df.s, 6, '#').alias('s')).collect()
     [Row(s='##abcd')]
@@ -2222,11 +2625,14 @@ def lpad(col, len, pad):
     return Column(sc._jvm.functions.lpad(_to_java_column(col), len, pad))
 
 
-@since(1.5)
 def rpad(col, len, pad):
     """
     Right-pad the string column to width `len` with `pad`.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('abcd',)], ['s',])
     >>> df.select(rpad(df.s, 6, '#').alias('s')).collect()
     [Row(s='abcd##')]
@@ -2235,11 +2641,14 @@ def rpad(col, len, pad):
     return Column(sc._jvm.functions.rpad(_to_java_column(col), len, pad))
 
 
-@since(1.5)
 def repeat(col, n):
     """
     Repeats a string column n times, and returns it as a new string column.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('ab',)], ['s',])
     >>> df.select(repeat(df.s, 3).alias('s')).collect()
     [Row(s='ababab')]
@@ -2248,15 +2657,21 @@ def repeat(col, n):
     return Column(sc._jvm.functions.repeat(_to_java_column(col), n))
 
 
-@since(1.5)
 def split(str, pattern, limit=-1):
     """
     Splits str around matches of the given pattern.
 
-    :param str: a string expression to split
-    :param pattern: a string representing a regular expression. The regex string should be
+    .. versionadded:: 1.5.0
+
+    Parameters
+    ----------
+    str : :class:`Column` or str
+        a string expression to split
+    pattern : str
+        a string representing a regular expression. The regex string should be
         a Java regular expression.
-    :param limit: an integer which controls the number of times `pattern` is applied.
+    limit : int, optional
+        an integer which controls the number of times `pattern` is applied.
 
         * ``limit > 0``: The resulting array's length will not be more than `limit`, and the
                          resulting array's last entry will contain all input beyond the last
@@ -2264,9 +2679,11 @@ def split(str, pattern, limit=-1):
         * ``limit <= 0``: `pattern` will be applied as many times as possible, and the resulting
                           array can be of any size.
 
-    .. versionchanged:: 3.0
-       `split` now takes an optional `limit` field. If not provided, default limit value is -1.
+        .. versionchanged:: 3.0
+           `split` now takes an optional `limit` field. If not provided, default limit value is -1.
 
+    Examples
+    --------
     >>> df = spark.createDataFrame([('oneAtwoBthreeC',)], ['s',])
     >>> df.select(split(df.s, '[ABC]', 2).alias('s')).collect()
     [Row(s=['one', 'twoBthreeC'])]
@@ -2277,11 +2694,14 @@ def split(str, pattern, limit=-1):
     return Column(sc._jvm.functions.split(_to_java_column(str), pattern, limit))
 
 
-@since(1.5)
 def regexp_extract(str, pattern, idx):
     r"""Extract a specific group matched by a Java regex, from the specified string column.
     If the regex did not match, or the specified group did not match, an empty string is returned.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('100-200',)], ['str'])
     >>> df.select(regexp_extract('str', r'(\d+)-(\d+)', 1).alias('d')).collect()
     [Row(d='100')]
@@ -2297,10 +2717,13 @@ def regexp_extract(str, pattern, idx):
     return Column(jc)
 
 
-@since(1.5)
 def regexp_replace(str, pattern, replacement):
     r"""Replace all substrings of the specified string value that match regexp with rep.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('100-200',)], ['str'])
     >>> df.select(regexp_replace('str', r'(\d+)', '--').alias('d')).collect()
     [Row(d='-----')]
@@ -2310,10 +2733,13 @@ def regexp_replace(str, pattern, replacement):
     return Column(jc)
 
 
-@since(1.5)
 def initcap(col):
     """Translate the first letter of each word to upper case in the sentence.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> spark.createDataFrame([('ab cd',)], ['a']).select(initcap("a").alias('v')).collect()
     [Row(v='Ab Cd')]
     """
@@ -2321,11 +2747,14 @@ def initcap(col):
     return Column(sc._jvm.functions.initcap(_to_java_column(col)))
 
 
-@since(1.5)
 def soundex(col):
     """
     Returns the SoundEx encoding for a string
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([("Peters",),("Uhrbach",)], ['name'])
     >>> df.select(soundex(df.name).alias("soundex")).collect()
     [Row(soundex='P362'), Row(soundex='U612')]
@@ -2334,10 +2763,13 @@ def soundex(col):
     return Column(sc._jvm.functions.soundex(_to_java_column(col)))
 
 
-@since(1.5)
 def bin(col):
     """Returns the string representation of the binary value of the given column.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df.select(bin(df.age).alias('c')).collect()
     [Row(c='10'), Row(c='101')]
     """
@@ -2346,12 +2778,15 @@ def bin(col):
     return Column(jc)
 
 
-@since(1.5)
 def hex(col):
     """Computes hex value of the given column, which could be :class:`pyspark.sql.types.StringType`,
     :class:`pyspark.sql.types.BinaryType`, :class:`pyspark.sql.types.IntegerType` or
     :class:`pyspark.sql.types.LongType`.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> spark.createDataFrame([('ABC', 3)], ['a', 'b']).select(hex('a'), hex('b')).collect()
     [Row(hex(a)='414243', hex(b)='3')]
     """
@@ -2360,11 +2795,14 @@ def hex(col):
     return Column(jc)
 
 
-@since(1.5)
 def unhex(col):
     """Inverse of hex. Interprets each pair of characters as a hexadecimal number
     and converts to the byte representation of number.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> spark.createDataFrame([('414243',)], ['a']).select(unhex('a')).collect()
     [Row(unhex(a)=bytearray(b'ABC'))]
     """
@@ -2372,12 +2810,15 @@ def unhex(col):
     return Column(sc._jvm.functions.unhex(_to_java_column(col)))
 
 
-@since(1.5)
 def length(col):
     """Computes the character length of string data or number of bytes of binary data.
     The length of character data includes the trailing spaces. The length of binary data
     includes binary zeros.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> spark.createDataFrame([('ABC ',)], ['a']).select(length('a').alias('length')).collect()
     [Row(length=4)]
     """
@@ -2385,13 +2826,16 @@ def length(col):
     return Column(sc._jvm.functions.length(_to_java_column(col)))
 
 
-@since(1.5)
 def translate(srcCol, matching, replace):
     """A function translate any character in the `srcCol` by a character in `matching`.
     The characters in `replace` is corresponding to the characters in `matching`.
     The translate will happen when any character in the string matching with the character
     in the `matching`.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> spark.createDataFrame([('translate',)], ['a']).select(translate('a', "rnlt", "123") \\
     ...     .alias('r')).collect()
     [Row(r='1a2s3ae')]
@@ -2402,13 +2846,19 @@ def translate(srcCol, matching, replace):
 
 # ---------------------- Collection functions ------------------------------
 
-@since(2.0)
 def create_map(*cols):
     """Creates a new map column.
 
-    :param cols: list of column names (string) or list of :class:`Column` expressions that are
+    .. versionadded:: 2.0.0
+
+    Parameters
+    ----------
+    cols : :class:`Column` or str
+        column names or :class:`Column`\\s that are
         grouped as key-value pairs, e.g. (key1, value1, key2, value2, ...).
 
+    Examples
+    --------
     >>> df.select(create_map('name', 'age').alias("map")).collect()
     [Row(map={'Alice': 2}), Row(map={'Bob': 5})]
     >>> df.select(create_map([df.name, df.age]).alias("map")).collect()
@@ -2421,13 +2871,20 @@ def create_map(*cols):
     return Column(jc)
 
 
-@since(2.4)
 def map_from_arrays(col1, col2):
     """Creates a new map from two arrays.
 
-    :param col1: name of column containing a set of keys. All elements should not be null
-    :param col2: name of column containing a set of values
+    .. versionadded:: 2.4.0
+
+    Parameters
+    ----------
+    col1 : :class:`Column` or str
+        name of column containing a set of keys. All elements should not be null
+    col2 : :class:`Column` or str
+        name of column containing a set of values
 
+    Examples
+    --------
     >>> df = spark.createDataFrame([([2, 5], ['a', 'b'])], ['k', 'v'])
     >>> df.select(map_from_arrays(df.k, df.v).alias("map")).show()
     +----------------+
@@ -2440,13 +2897,19 @@ def map_from_arrays(col1, col2):
     return Column(sc._jvm.functions.map_from_arrays(_to_java_column(col1), _to_java_column(col2)))
 
 
-@since(1.4)
 def array(*cols):
     """Creates a new array column.
 
-    :param cols: list of column names (string) or list of :class:`Column` expressions that have
+    .. versionadded:: 1.4.0
+
+    Parameters
+    ----------
+    cols : :class:`Column` or str
+        column names or :class:`Column`\\s that have
         the same data type.
 
+    Examples
+    --------
     >>> df.select(array('age', 'age').alias("arr")).collect()
     [Row(arr=[2, 2]), Row(arr=[5, 5])]
     >>> df.select(array([df.age, df.age]).alias("arr")).collect()
@@ -2459,15 +2922,22 @@ def array(*cols):
     return Column(jc)
 
 
-@since(1.5)
 def array_contains(col, value):
     """
     Collection function: returns null if the array is null, true if the array contains the
     given value, and false otherwise.
 
-    :param col: name of column containing array
-    :param value: value or column to check for in array
+    .. versionadded:: 1.5.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column containing array
+    value :
+        value or column to check for in array
 
+    Examples
+    --------
     >>> df = spark.createDataFrame([(["a", "b", "c"],), ([],)], ['data'])
     >>> df.select(array_contains(df.data, "a")).collect()
     [Row(array_contains(data, a)=True), Row(array_contains(data, a)=False)]
@@ -2479,13 +2949,16 @@ def array_contains(col, value):
     return Column(sc._jvm.functions.array_contains(_to_java_column(col), value))
 
 
-@since(2.4)
 def arrays_overlap(a1, a2):
     """
     Collection function: returns true if the arrays contain any common non-null element; if not,
     returns null if both the arrays are non-empty and any of them contains a null element; returns
     false otherwise.
 
+    .. versionadded:: 2.4.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([(["a", "b"], ["b", "c"]), (["a"], ["b", "c"])], ['x', 'y'])
     >>> df.select(arrays_overlap(df.x, df.y).alias("overlap")).collect()
     [Row(overlap=True), Row(overlap=False)]
@@ -2494,16 +2967,24 @@ def arrays_overlap(a1, a2):
     return Column(sc._jvm.functions.arrays_overlap(_to_java_column(a1), _to_java_column(a2)))
 
 
-@since(2.4)
 def slice(x, start, length):
     """
     Collection function: returns an array containing  all the elements in `x` from index `start`
     (array indices start at 1, or from the end if `start` is negative) with the specified `length`.
 
-    :param x: the array to be sliced
-    :param start: the starting index
-    :param length: the length of the slice
+    .. versionadded:: 2.4.0
+
+    Parameters
+    ----------
+    x : :class:`Column` or str
+        the array to be sliced
+    start : :class:`Column` or int
+        the starting index
+    length : :class:`Column` or int
+        the length of the slice
 
+    Examples
+    --------
     >>> df = spark.createDataFrame([([1, 2, 3],), ([4, 5],)], ['x'])
     >>> df.select(slice(df.x, 2, 2).alias("sliced")).collect()
     [Row(sliced=[2, 3]), Row(sliced=[5])]
@@ -2516,12 +2997,15 @@ def slice(x, start, length):
     ))
 
 
-@since(2.4)
 def array_join(col, delimiter, null_replacement=None):
     """
     Concatenates the elements of `column` using the `delimiter`. Null values are replaced with
     `null_replacement` if set, otherwise they are ignored.
 
+    .. versionadded:: 2.4.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([(["a", "b", "c"],), (["a", None],)], ['data'])
     >>> df.select(array_join(df.data, ",").alias("joined")).collect()
     [Row(joined='a,b,c'), Row(joined='a')]
@@ -2536,12 +3020,15 @@ def array_join(col, delimiter, null_replacement=None):
             _to_java_column(col), delimiter, null_replacement))
 
 
-@since(1.5)
 def concat(*cols):
     """
     Concatenates multiple input columns together into a single column.
     The function works with strings, binary and compatible array columns.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('abcd','123')], ['s', 'd'])
     >>> df.select(concat(df.s, df.d).alias('s')).collect()
     [Row(s='abcd123')]
@@ -2554,15 +3041,20 @@ def concat(*cols):
     return Column(sc._jvm.functions.concat(_to_seq(sc, cols, _to_java_column)))
 
 
-@since(2.4)
 def array_position(col, value):
     """
     Collection function: Locates the position of the first occurrence of the given value
     in the given array. Returns null if either of the arguments are null.
 
-    .. note:: The position is not zero based, but 1 based index. Returns 0 if the given
-        value could not be found in the array.
+    .. versionadded:: 2.4.0
 
+    Notes
+    -----
+    The position is not zero based, but 1 based index. Returns 0 if the given
+    value could not be found in the array.
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([(["c", "b", "a"],), ([],)], ['data'])
     >>> df.select(array_position(df.data, "a")).collect()
     [Row(array_position(data, a)=3), Row(array_position(data, a)=0)]
@@ -2571,17 +3063,26 @@ def array_position(col, value):
     return Column(sc._jvm.functions.array_position(_to_java_column(col), value))
 
 
-@since(2.4)
 def element_at(col, extraction):
     """
     Collection function: Returns element of array at given index in extraction if col is array.
     Returns value for the given key in extraction if col is map.
 
-    :param col: name of column containing array or map
-    :param extraction: index to check for in array or key to check for in map
+    .. versionadded:: 2.4.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column containing array or map
+    extraction :
+        index to check for in array or key to check for in map
 
-    .. note:: The position is not zero based, but 1 based index.
+    Notes
+    -----
+    The position is not zero based, but 1 based index.
 
+    Examples
+    --------
     >>> df = spark.createDataFrame([(["a", "b", "c"],), ([],)], ['data'])
     >>> df.select(element_at(df.data, 1)).collect()
     [Row(element_at(data, 1)='a'), Row(element_at(data, 1)=None)]
@@ -2595,14 +3096,21 @@ def element_at(col, extraction):
         _to_java_column(col), lit(extraction)._jc))
 
 
-@since(2.4)
 def array_remove(col, element):
     """
     Collection function: Remove all elements that equal to element from the given array.
 
-    :param col: name of column containing array
-    :param element: element to be removed from the array
+    .. versionadded:: 2.4.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column containing array
+    element :
+        element to be removed from the array
 
+    Examples
+    --------
     >>> df = spark.createDataFrame([([1, 2, 3, 1, 1],), ([],)], ['data'])
     >>> df.select(array_remove(df.data, 1)).collect()
     [Row(array_remove(data, 1)=[2, 3]), Row(array_remove(data, 1)=[])]
@@ -2611,13 +3119,19 @@ def array_remove(col, element):
     return Column(sc._jvm.functions.array_remove(_to_java_column(col), element))
 
 
-@since(2.4)
 def array_distinct(col):
     """
     Collection function: removes duplicate values from the array.
 
-    :param col: name of column or expression
+    .. versionadded:: 2.4.0
 
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column or expression
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([([1, 2, 3, 2],), ([4, 5, 5, 4],)], ['data'])
     >>> df.select(array_distinct(df.data)).collect()
     [Row(array_distinct(data)=[1, 2, 3]), Row(array_distinct(data)=[4, 5])]
@@ -2626,15 +3140,22 @@ def array_distinct(col):
     return Column(sc._jvm.functions.array_distinct(_to_java_column(col)))
 
 
-@since(2.4)
 def array_intersect(col1, col2):
     """
     Collection function: returns an array of the elements in the intersection of col1 and col2,
     without duplicates.
 
-    :param col1: name of column containing array
-    :param col2: name of column containing array
+    .. versionadded:: 2.4.0
+
+    Parameters
+    ----------
+    col1 : :class:`Column` or str
+        name of column containing array
+    col2 : :class:`Column` or str
+        name of column containing array
 
+    Examples
+    --------
     >>> from pyspark.sql import Row
     >>> df = spark.createDataFrame([Row(c1=["b", "a", "c"], c2=["c", "d", "a", "f"])])
     >>> df.select(array_intersect(df.c1, df.c2)).collect()
@@ -2644,15 +3165,22 @@ def array_intersect(col1, col2):
     return Column(sc._jvm.functions.array_intersect(_to_java_column(col1), _to_java_column(col2)))
 
 
-@since(2.4)
 def array_union(col1, col2):
     """
     Collection function: returns an array of the elements in the union of col1 and col2,
     without duplicates.
 
-    :param col1: name of column containing array
-    :param col2: name of column containing array
+    .. versionadded:: 2.4.0
+
+    Parameters
+    ----------
+    col1 : :class:`Column` or str
+        name of column containing array
+    col2 : :class:`Column` or str
+        name of column containing array
 
+    Examples
+    --------
     >>> from pyspark.sql import Row
     >>> df = spark.createDataFrame([Row(c1=["b", "a", "c"], c2=["c", "d", "a", "f"])])
     >>> df.select(array_union(df.c1, df.c2)).collect()
@@ -2662,15 +3190,22 @@ def array_union(col1, col2):
     return Column(sc._jvm.functions.array_union(_to_java_column(col1), _to_java_column(col2)))
 
 
-@since(2.4)
 def array_except(col1, col2):
     """
     Collection function: returns an array of the elements in col1 but not in col2,
     without duplicates.
 
-    :param col1: name of column containing array
-    :param col2: name of column containing array
+    .. versionadded:: 2.4.0
+
+    Parameters
+    ----------
+    col1 : :class:`Column` or str
+        name of column containing array
+    col2 : :class:`Column` or str
+        name of column containing array
 
+    Examples
+    --------
     >>> from pyspark.sql import Row
     >>> df = spark.createDataFrame([Row(c1=["b", "a", "c"], c2=["c", "d", "a", "f"])])
     >>> df.select(array_except(df.c1, df.c2)).collect()
@@ -2680,13 +3215,16 @@ def array_except(col1, col2):
     return Column(sc._jvm.functions.array_except(_to_java_column(col1), _to_java_column(col2)))
 
 
-@since(1.4)
 def explode(col):
     """
     Returns a new row for each element in the given array or map.
     Uses the default column name `col` for elements in the array and
     `key` and `value` for elements in the map unless specified otherwise.
 
+    .. versionadded:: 1.4.0
+
+    Examples
+    --------
     >>> from pyspark.sql import Row
     >>> eDF = spark.createDataFrame([Row(a=1, intlist=[1,2,3], mapfield={"a": "b"})])
     >>> eDF.select(explode(eDF.intlist).alias("anInt")).collect()
@@ -2704,13 +3242,16 @@ def explode(col):
     return Column(jc)
 
 
-@since(2.1)
 def posexplode(col):
     """
     Returns a new row for each element with position in the given array or map.
     Uses the default column name `pos` for position, and `col` for elements in the
     array and `key` and `value` for elements in the map unless specified otherwise.
 
+    .. versionadded:: 2.1.0
+
+    Examples
+    --------
     >>> from pyspark.sql import Row
     >>> eDF = spark.createDataFrame([Row(a=1, intlist=[1,2,3], mapfield={"a": "b"})])
     >>> eDF.select(posexplode(eDF.intlist)).collect()
@@ -2728,7 +3269,6 @@ def posexplode(col):
     return Column(jc)
 
 
-@since(2.3)
 def explode_outer(col):
     """
     Returns a new row for each element in the given array or map.
@@ -2736,6 +3276,10 @@ def explode_outer(col):
     Uses the default column name `col` for elements in the array and
     `key` and `value` for elements in the map unless specified otherwise.
 
+    .. versionadded:: 2.3.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame(
     ...     [(1, ["foo", "bar"], {"x": 1.0}), (2, [], {}), (3, None, None)],
     ...     ("id", "an_array", "a_map")
@@ -2764,7 +3308,6 @@ def explode_outer(col):
     return Column(jc)
 
 
-@since(2.3)
 def posexplode_outer(col):
     """
     Returns a new row for each element with position in the given array or map.
@@ -2772,6 +3315,10 @@ def posexplode_outer(col):
     Uses the default column name `pos` for position, and `col` for elements in the
     array and `key` and `value` for elements in the map unless specified otherwise.
 
+    .. versionadded:: 2.3.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame(
     ...     [(1, ["foo", "bar"], {"x": 1.0}), (2, [], {}), (3, None, None)],
     ...     ("id", "an_array", "a_map")
@@ -2799,15 +3346,22 @@ def posexplode_outer(col):
     return Column(jc)
 
 
-@since(1.6)
 def get_json_object(col, path):
     """
     Extracts json object from a json string based on json path specified, and returns json string
     of the extracted json object. It will return null if the input json string is invalid.
 
-    :param col: string column in json format
-    :param path: path to the json object to extract
+    .. versionadded:: 1.6.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        string column in json format
+    path : str
+        path to the json object to extract
 
+    Examples
+    --------
     >>> data = [("1", '''{"f1": "value1", "f2": "value2"}'''), ("2", '''{"f1": "value12"}''')]
     >>> df = spark.createDataFrame(data, ("key", "jstring"))
     >>> df.select(df.key, get_json_object(df.jstring, '$.f1').alias("c0"), \\
@@ -2819,13 +3373,20 @@ def get_json_object(col, path):
     return Column(jc)
 
 
-@since(1.6)
 def json_tuple(col, *fields):
     """Creates a new row for a json column according to the given field names.
 
-    :param col: string column in json format
-    :param fields: list of fields to extract
+    .. versionadded:: 1.6.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        string column in json format
+    fields : str
+        fields to extract
 
+    Examples
+    --------
     >>> data = [("1", '''{"f1": "value1", "f2": "value2"}'''), ("2", '''{"f1": "value12"}''')]
     >>> df = spark.createDataFrame(data, ("key", "jstring"))
     >>> df.select(df.key, json_tuple(df.jstring, 'f1', 'f2')).collect()
@@ -2836,19 +3397,28 @@ def json_tuple(col, *fields):
     return Column(jc)
 
 
-@since(2.1)
 def from_json(col, schema, options={}):
     """
     Parses a column containing a JSON string into a :class:`MapType` with :class:`StringType`
     as keys type, :class:`StructType` or :class:`ArrayType` with
     the specified schema. Returns `null`, in the case of an unparseable string.
 
-    :param col: string column in json format
-    :param schema: a StructType or ArrayType of StructType to use when parsing the json column.
-    :param options: options to control parsing. accepts the same options as the json datasource
+    .. versionadded:: 2.1.0
 
-    .. note:: Since Spark 2.3, the DDL-formatted string is also supported for ``schema``.
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        string column in json format
+    schema : :class:`DataType` or str
+        a StructType or ArrayType of StructType to use when parsing the json column.
 
+        .. versionchanged:: 2.3
+            the DDL-formatted string is also supported for ``schema``.
+    options : dict, optional
+        options to control parsing. accepts the same options as the json datasource
+
+    Examples
+    --------
     >>> from pyspark.sql.types import *
     >>> data = [(1, '''{"a": 1}''')]
     >>> schema = StructType([StructField("a", IntegerType())])
@@ -2883,17 +3453,24 @@ def from_json(col, schema, options={}):
     return Column(jc)
 
 
-@since(2.1)
 def to_json(col, options={}):
     """
     Converts a column containing a :class:`StructType`, :class:`ArrayType` or a :class:`MapType`
     into a JSON string. Throws an exception, in the case of an unsupported type.
 
-    :param col: name of column containing a struct, an array or a map.
-    :param options: options to control converting. accepts the same options as the JSON datasource.
-                    Additionally the function supports the `pretty` option which enables
-                    pretty JSON generation.
+    .. versionadded:: 2.1.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column containing a struct, an array or a map.
+    options : dict, optional
+        options to control converting. accepts the same options as the JSON datasource.
+        Additionally the function supports the `pretty` option which enables
+        pretty JSON generation.
 
+    Examples
+    --------
     >>> from pyspark.sql import Row
     >>> from pyspark.sql.types import *
     >>> data = [(1, Row(age=2, name='Alice'))]
@@ -2923,17 +3500,24 @@ def to_json(col, options={}):
     return Column(jc)
 
 
-@since(2.4)
 def schema_of_json(json, options={}):
     """
     Parses a JSON string and infers its schema in DDL format.
 
-    :param json: a JSON string or a string literal containing a JSON string.
-    :param options: options to control parsing. accepts the same options as the JSON datasource
+    .. versionadded:: 2.4.0
 
-    .. versionchanged:: 3.0
-       It accepts `options` parameter to control schema inferring.
+    Parameters
+    ----------
+    json : :class:`Column` or str
+        a JSON string or a string literal containing a JSON string.
+    options : dict, optional
+        options to control parsing. accepts the same options as the JSON datasource
 
+        .. versionchanged:: 3.0
+           It accepts `options` parameter to control schema inferring.
+
+    Examples
+    --------
     >>> df = spark.range(1)
     >>> df.select(schema_of_json(lit('{"a": 0}')).alias("json")).collect()
     [Row(json='STRUCT<`a`: BIGINT>')]
@@ -2953,14 +3537,21 @@ def schema_of_json(json, options={}):
     return Column(jc)
 
 
-@since(3.0)
 def schema_of_csv(csv, options={}):
     """
     Parses a CSV string and infers its schema in DDL format.
 
-    :param col: a CSV string or a string literal containing a CSV string.
-    :param options: options to control parsing. accepts the same options as the CSV datasource
+    .. versionadded:: 3.0.0
+
+    Parameters
+    ----------
+    csv : :class:`Column` or str
+        a CSV string or a string literal containing a CSV string.
+    options : dict, optional
+        options to control parsing. accepts the same options as the CSV datasource
 
+    Examples
+    --------
     >>> df = spark.range(1)
     >>> df.select(schema_of_csv(lit('1|a'), {'sep':'|'}).alias("csv")).collect()
     [Row(csv='STRUCT<`_c0`: INT, `_c1`: STRING>')]
@@ -2979,15 +3570,22 @@ def schema_of_csv(csv, options={}):
     return Column(jc)
 
 
-@since(3.0)
 def to_csv(col, options={}):
     """
     Converts a column containing a :class:`StructType` into a CSV string.
     Throws an exception, in the case of an unsupported type.
 
-    :param col: name of column containing a struct.
-    :param options: options to control converting. accepts the same options as the CSV datasource.
+    .. versionadded:: 3.0.0
 
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column containing a struct.
+    options: dict, optional
+        options to control converting. accepts the same options as the CSV datasource.
+
+    Examples
+    --------
     >>> from pyspark.sql import Row
     >>> data = [(1, Row(age=2, name='Alice'))]
     >>> df = spark.createDataFrame(data, ("key", "value"))
@@ -3000,13 +3598,19 @@ def to_csv(col, options={}):
     return Column(jc)
 
 
-@since(1.5)
 def size(col):
     """
     Collection function: returns the length of the array or map stored in the column.
 
-    :param col: name of column or expression
+    .. versionadded:: 1.5.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column or expression
 
+    Examples
+    --------
     >>> df = spark.createDataFrame([([1, 2, 3],),([1],),([],)], ['data'])
     >>> df.select(size(df.data)).collect()
     [Row(size(data)=3), Row(size(data)=1), Row(size(data)=0)]
@@ -3015,13 +3619,19 @@ def size(col):
     return Column(sc._jvm.functions.size(_to_java_column(col)))
 
 
-@since(2.4)
 def array_min(col):
     """
     Collection function: returns the minimum value of the array.
 
-    :param col: name of column or expression
+    .. versionadded:: 2.4.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column or expression
 
+    Examples
+    --------
     >>> df = spark.createDataFrame([([2, 1, 3],), ([None, 10, -1],)], ['data'])
     >>> df.select(array_min(df.data).alias('min')).collect()
     [Row(min=1), Row(min=-1)]
@@ -3030,13 +3640,19 @@ def array_min(col):
     return Column(sc._jvm.functions.array_min(_to_java_column(col)))
 
 
-@since(2.4)
 def array_max(col):
     """
     Collection function: returns the maximum value of the array.
 
-    :param col: name of column or expression
+    .. versionadded:: 2.4.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column or expression
 
+    Examples
+    --------
     >>> df = spark.createDataFrame([([2, 1, 3],), ([None, 10, -1],)], ['data'])
     >>> df.select(array_max(df.data).alias('max')).collect()
     [Row(max=3), Row(max=10)]
@@ -3045,7 +3661,6 @@ def array_max(col):
     return Column(sc._jvm.functions.array_max(_to_java_column(col)))
 
 
-@since(1.5)
 def sort_array(col, asc=True):
     """
     Collection function: sorts the input array in ascending or descending order according
@@ -3053,8 +3668,16 @@ def sort_array(col, asc=True):
     of the returned array in ascending order or at the end of the returned array in descending
     order.
 
-    :param col: name of column or expression
+    .. versionadded:: 1.5.0
 
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column or expression
+    asc : bool, optional
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([([2, 1, None, 3],),([1],),([],)], ['data'])
     >>> df.select(sort_array(df.data).alias('r')).collect()
     [Row(r=[None, 1, 2, 3]), Row(r=[1]), Row(r=[])]
@@ -3065,14 +3688,20 @@ def sort_array(col, asc=True):
     return Column(sc._jvm.functions.sort_array(_to_java_column(col), asc))
 
 
-@since(2.4)
 def array_sort(col):
     """
     Collection function: sorts the input array in ascending order. The elements of the input array
     must be orderable. Null elements will be placed at the end of the returned array.
 
-    :param col: name of column or expression
+    .. versionadded:: 2.4.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column or expression
 
+    Examples
+    --------
     >>> df = spark.createDataFrame([([2, 1, None, 3],),([1],),([],)], ['data'])
     >>> df.select(array_sort(df.data).alias('r')).collect()
     [Row(r=[1, 2, 3, None]), Row(r=[1]), Row(r=[])]
@@ -3081,15 +3710,23 @@ def array_sort(col):
     return Column(sc._jvm.functions.array_sort(_to_java_column(col)))
 
 
-@since(2.4)
 def shuffle(col):
     """
     Collection function: Generates a random permutation of the given array.
 
-    .. note:: The function is non-deterministic.
+    .. versionadded:: 2.4.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column or expression
 
-    :param col: name of column or expression
+    Notes
+    -----
+    The function is non-deterministic.
 
+    Examples
+    --------
     >>> df = spark.createDataFrame([([1, 20, 3, 5],), ([1, 20, None, 3],)], ['data'])
     >>> df.select(shuffle(df.data).alias('s')).collect()  # doctest: +SKIP
     [Row(s=[3, 1, 5, 20]), Row(s=[20, None, 3, 1])]
@@ -3098,13 +3735,19 @@ def shuffle(col):
     return Column(sc._jvm.functions.shuffle(_to_java_column(col)))
 
 
-@since(1.5)
 def reverse(col):
     """
     Collection function: returns a reversed string or an array with reverse order of elements.
 
-    :param col: name of column or expression
+    .. versionadded:: 1.5.0
 
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column or expression
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('Spark SQL',)], ['data'])
     >>> df.select(reverse(df.data).alias('s')).collect()
     [Row(s='LQS krapS')]
@@ -3116,15 +3759,21 @@ def reverse(col):
     return Column(sc._jvm.functions.reverse(_to_java_column(col)))
 
 
-@since(2.4)
 def flatten(col):
     """
     Collection function: creates a single array from an array of arrays.
     If a structure of nested arrays is deeper than two levels,
     only one level of nesting is removed.
 
-    :param col: name of column or expression
+    .. versionadded:: 2.4.0
 
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column or expression
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([([[1, 2, 3], [4, 5], [6]],), ([None, [4, 5]],)], ['data'])
     >>> df.select(flatten(df.data).alias('r')).collect()
     [Row(r=[1, 2, 3, 4, 5, 6]), Row(r=None)]
@@ -3133,13 +3782,19 @@ def flatten(col):
     return Column(sc._jvm.functions.flatten(_to_java_column(col)))
 
 
-@since(2.3)
 def map_keys(col):
     """
     Collection function: Returns an unordered array containing the keys of the map.
 
-    :param col: name of column or expression
+    .. versionadded:: 2.3.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column or expression
 
+    Examples
+    --------
     >>> from pyspark.sql.functions import map_keys
     >>> df = spark.sql("SELECT map(1, 'a', 2, 'b') as data")
     >>> df.select(map_keys("data").alias("keys")).show()
@@ -3153,13 +3808,19 @@ def map_keys(col):
     return Column(sc._jvm.functions.map_keys(_to_java_column(col)))
 
 
-@since(2.3)
 def map_values(col):
     """
     Collection function: Returns an unordered array containing the values of the map.
 
-    :param col: name of column or expression
+    .. versionadded:: 2.3.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column or expression
 
+    Examples
+    --------
     >>> from pyspark.sql.functions import map_values
     >>> df = spark.sql("SELECT map(1, 'a', 2, 'b') as data")
     >>> df.select(map_values("data").alias("values")).show()
@@ -3173,13 +3834,19 @@ def map_values(col):
     return Column(sc._jvm.functions.map_values(_to_java_column(col)))
 
 
-@since(3.0)
 def map_entries(col):
     """
     Collection function: Returns an unordered array of all entries in the given map.
 
-    :param col: name of column or expression
+    .. versionadded:: 3.0.0
 
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column or expression
+
+    Examples
+    --------
     >>> from pyspark.sql.functions import map_entries
     >>> df = spark.sql("SELECT map(1, 'a', 2, 'b') as data")
     >>> df.select(map_entries("data").alias("entries")).show()
@@ -3193,13 +3860,19 @@ def map_entries(col):
     return Column(sc._jvm.functions.map_entries(_to_java_column(col)))
 
 
-@since(2.4)
 def map_from_entries(col):
     """
     Collection function: Returns a map created from the given array of entries.
 
-    :param col: name of column or expression
+    .. versionadded:: 2.4.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column or expression
 
+    Examples
+    --------
     >>> from pyspark.sql.functions import map_from_entries
     >>> df = spark.sql("SELECT array(struct(1, 'a'), struct(2, 'b')) as data")
     >>> df.select(map_from_entries("data").alias("map")).show()
@@ -3213,11 +3886,14 @@ def map_from_entries(col):
     return Column(sc._jvm.functions.map_from_entries(_to_java_column(col)))
 
 
-@since(2.4)
 def array_repeat(col, count):
     """
     Collection function: creates an array containing a column repeated count times.
 
+    .. versionadded:: 2.4.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('ab',)], ['data'])
     >>> df.select(array_repeat(df.data, 3).alias('r')).collect()
     [Row(r=['ab', 'ab', 'ab'])]
@@ -3229,14 +3905,20 @@ def array_repeat(col, count):
     ))
 
 
-@since(2.4)
 def arrays_zip(*cols):
     """
     Collection function: Returns a merged array of structs in which the N-th struct contains all
     N-th values of input arrays.
 
-    :param cols: columns of arrays to be merged.
+    .. versionadded:: 2.4.0
 
+    Parameters
+    ----------
+    cols : :class:`Column` or str
+        columns of arrays to be merged.
+
+    Examples
+    --------
     >>> from pyspark.sql.functions import arrays_zip
     >>> df = spark.createDataFrame([(([1, 2, 3], [2, 3, 4]))], ['vals1', 'vals2'])
     >>> df.select(arrays_zip(df.vals1, df.vals2).alias('zipped')).collect()
@@ -3246,12 +3928,18 @@ def arrays_zip(*cols):
     return Column(sc._jvm.functions.arrays_zip(_to_seq(sc, cols, _to_java_column)))
 
 
-@since(2.4)
 def map_concat(*cols):
     """Returns the union of all the given maps.
 
-    :param cols: list of column names (string) or list of :class:`Column` expressions
+    .. versionadded:: 2.4.0
 
+    Parameters
+    ----------
+    cols : :class:`Column` or str
+        column names or :class:`Column`\\s
+
+    Examples
+    --------
     >>> from pyspark.sql.functions import map_concat
     >>> df = spark.sql("SELECT map(1, 'a', 2, 'b') as map1, map(3, 'c') as map2")
     >>> df.select(map_concat("map1", "map2").alias("map3")).show(truncate=False)
@@ -3268,13 +3956,16 @@ def map_concat(*cols):
     return Column(jc)
 
 
-@since(2.4)
 def sequence(start, stop, step=None):
     """
     Generate a sequence of integers from `start` to `stop`, incrementing by `step`.
     If `step` is not set, incrementing by 1 if `start` is less than or equal to `stop`,
     otherwise -1.
 
+    .. versionadded:: 2.4.0
+
+    Examples
+    --------
     >>> df1 = spark.createDataFrame([(-2, 2)], ('C1', 'C2'))
     >>> df1.select(sequence('C1', 'C2').alias('r')).collect()
     [Row(r=[-2, -1, 0, 1, 2])]
@@ -3290,16 +3981,24 @@ def sequence(start, stop, step=None):
             _to_java_column(start), _to_java_column(stop), _to_java_column(step)))
 
 
-@since(3.0)
 def from_csv(col, schema, options={}):
     """
     Parses a column containing a CSV string to a row with the specified schema.
     Returns `null`, in the case of an unparseable string.
 
-    :param col: string column in CSV format
-    :param schema: a string with schema in DDL format to use when parsing the CSV column.
-    :param options: options to control parsing. accepts the same options as the CSV datasource
+    .. versionadded:: 3.0.0
 
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        string column in CSV format
+    schema :class:`Column` or str
+        a string with schema in DDL format to use when parsing the CSV column.
+    options : dict, optional
+        options to control parsing. accepts the same options as the CSV datasource
+
+    Examples
+    --------
     >>> data = [("1,2,3",)]
     >>> df = spark.createDataFrame(data, ("value",))
     >>> df.select(from_csv(df.value, "a INT, b INT, c INT").alias("csv")).collect()
@@ -3331,7 +4030,9 @@ def _unresolved_named_lambda_variable(*name_parts):
     Create `o.a.s.sql.expressions.UnresolvedNamedLambdaVariable`,
     convert it to o.s.sql.Column and wrap in Python `Column`
 
-    :param name_parts: str
+    Parameters
+    ----------
+    name_parts : str
     """
     sc = SparkContext._active_spark_context
     name_parts_seq = _to_seq(sc, name_parts)
@@ -3428,13 +4129,18 @@ def _invoke_higher_order_function(name, cols, funs):
     return Column(sc._jvm.Column(expr(*jcols + jfuns)))
 
 
-@since(3.1)
 def transform(col, f):
     """
     Returns an array of elements after applying a transformation to each element in the input array.
 
-    :param col: name of column or expression
-    :param f: a function that is applied to each element of the input array.
+    .. versionadded:: 3.1.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column or expression
+    f : function
+        a function that is applied to each element of the input array.
         Can take one of the following forms:
 
         - Unary ``(x: Column) -> Column: ...``
@@ -3446,8 +4152,12 @@ def transform(col, f):
         Python ``UserDefinedFunctions`` are not supported
         (`SPARK-27052 <https://issues.apache.org/jira/browse/SPARK-27052>`__).
 
-    :return: a :class:`pyspark.sql.Column`
+    Returns
+    -------
+    :class:`pyspark.sql.Column`
 
+    Examples
+    --------
     >>> df = spark.createDataFrame([(1, [1, 2, 3, 4])], ("key", "values"))
     >>> df.select(transform("values", lambda x: x * 2).alias("doubled")).show()
     +------------+
@@ -3468,19 +4178,26 @@ def transform(col, f):
     return _invoke_higher_order_function("ArrayTransform", [col], [f])
 
 
-@since(3.1)
 def exists(col, f):
     """
     Returns whether a predicate holds for one or more elements in the array.
 
-    :param col: name of column or expression
-    :param f: an function ``(x: Column) -> Column: ...``  returning the Boolean expression.
+    .. versionadded:: 3.1.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column or expression
+    f : function
+        ``(x: Column) -> Column: ...``  returning the Boolean expression.
         Can use methods of :class:`pyspark.sql.Column`, functions defined in
         :py:mod:`pyspark.sql.functions` and Scala ``UserDefinedFunctions``.
         Python ``UserDefinedFunctions`` are not supported
         (`SPARK-27052 <https://issues.apache.org/jira/browse/SPARK-27052>`__).
     :return: a :class:`pyspark.sql.Column`
 
+    Examples
+    --------
     >>> df = spark.createDataFrame([(1, [1, 2, 3, 4]), (2, [3, -1, 0])],("key", "values"))
     >>> df.select(exists("values", lambda x: x < 0).alias("any_negative")).show()
     +------------+
@@ -3493,19 +4210,29 @@ def exists(col, f):
     return _invoke_higher_order_function("ArrayExists", [col], [f])
 
 
-@since(3.1)
 def forall(col, f):
     """
     Returns whether a predicate holds for every element in the array.
 
-    :param col: name of column or expression
-    :param f: an function ``(x: Column) -> Column: ...``  returning the Boolean expression.
+    .. versionadded:: 3.1.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column or expression
+    f : function
+        ``(x: Column) -> Column: ...``  returning the Boolean expression.
         Can use methods of :class:`pyspark.sql.Column`, functions defined in
         :py:mod:`pyspark.sql.functions` and Scala ``UserDefinedFunctions``.
         Python ``UserDefinedFunctions`` are not supported
         (`SPARK-27052 <https://issues.apache.org/jira/browse/SPARK-27052>`__).
-    :return: a :class:`pyspark.sql.Column`
 
+    Returns
+    -------
+    :class:`pyspark.sql.Column`
+
+    Examples
+    --------
     >>> df = spark.createDataFrame(
     ...     [(1, ["bar"]), (2, ["foo", "bar"]), (3, ["foobar", "foo"])],
     ...     ("key", "values")
@@ -3522,13 +4249,18 @@ def forall(col, f):
     return _invoke_higher_order_function("ArrayForAll", [col], [f])
 
 
-@since(3.1)
 def filter(col, f):
     """
     Returns an array of elements for which a predicate holds in a given array.
 
-    :param col: name of column or expression
-    :param f: A function that returns the Boolean expression.
+    .. versionadded:: 3.1.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column or expression
+    f : function
+        A function that returns the Boolean expression.
         Can take one of the following forms:
 
         - Unary ``(x: Column) -> Column: ...``
@@ -3540,8 +4272,12 @@ def filter(col, f):
         Python ``UserDefinedFunctions`` are not supported
         (`SPARK-27052 <https://issues.apache.org/jira/browse/SPARK-27052>`__).
 
-    :return: a :class:`pyspark.sql.Column`
+    Returns
+    -------
+    :class:`pyspark.sql.Column`
 
+    Examples
+    --------
     >>> df = spark.createDataFrame(
     ...     [(1, ["2018-09-20",  "2019-02-03", "2019-07-01", "2020-06-01"])],
     ...     ("key", "values")
@@ -3560,7 +4296,6 @@ def filter(col, f):
     return _invoke_higher_order_function("ArrayFilter", [col], [f])
 
 
-@since(3.1)
 def aggregate(col, zero, merge, finish=None):
     """
     Applies a binary operator to an initial state and all elements in the array,
@@ -3572,14 +4307,27 @@ def aggregate(col, zero, merge, finish=None):
     Python ``UserDefinedFunctions`` are not supported
     (`SPARK-27052 <https://issues.apache.org/jira/browse/SPARK-27052>`__).
 
-    :param col: name of column or expression
-    :param zero: initial value. Name of column or expression
-    :param merge: a binary function ``(acc: Column, x: Column) -> Column...`` returning expression
+    .. versionadded:: 3.1.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column or expression
+    zero : :class:`Column` or str
+        initial value. Name of column or expression
+    merge : function
+        a binary function ``(acc: Column, x: Column) -> Column...`` returning expression
         of the same type as ``zero``
-    :param finish: an optional unary function ``(x: Column) -> Column: ...``
+    finish : function
+        an optional unary function ``(x: Column) -> Column: ...``
         used to convert accumulated value.
-    :return: a :class:`pyspark.sql.Column`
 
+    Returns
+    -------
+    :class:`pyspark.sql.Column`
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([(1, [20.0, 4.0, 2.0, 6.0, 10.0])], ("id", "values"))
     >>> df.select(aggregate("values", lit(0.0), lambda acc, x: acc + x).alias("sum")).show()
     +----+
@@ -3621,22 +4369,33 @@ def aggregate(col, zero, merge, finish=None):
         )
 
 
-@since(3.1)
 def zip_with(col1, col2, f):
     """
     Merge two given arrays, element-wise, into a single array using a function.
     If one array is shorter, nulls are appended at the end to match the length of the longer
     array, before applying the function.
 
-    :param col1: name of the first column or expression
-    :param col2: name of the second column or expression
-    :param f: a binary function ``(x1: Column, x2: Column) -> Column...``
+    .. versionadded:: 3.1.0
+
+    Parameters
+    ----------
+    col1 : :class:`Column` or str
+        name of the first column or expression
+    col2 : :class:`Column` or str
+        name of the second column or expression
+    f : function
+        a binary function ``(x1: Column, x2: Column) -> Column...``
         Can use methods of :class:`pyspark.sql.Column`, functions defined in
         :py:mod:`pyspark.sql.functions` and Scala ``UserDefinedFunctions``.
         Python ``UserDefinedFunctions`` are not supported
         (`SPARK-27052 <https://issues.apache.org/jira/browse/SPARK-27052>`__).
-    :return: a :class:`pyspark.sql.Column`
 
+    Returns
+    -------
+    :class:`pyspark.sql.Column`
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([(1, [1, 3, 5, 8], [0, 2, 4, 6])], ("id", "xs", "ys"))
     >>> df.select(zip_with("xs", "ys", lambda x, y: x ** y).alias("powers")).show(truncate=False)
     +---------------------------+
@@ -3656,20 +4415,30 @@ def zip_with(col1, col2, f):
     return _invoke_higher_order_function("ZipWith", [col1, col2], [f])
 
 
-@since(3.1)
 def transform_keys(col, f):
     """
     Applies a function to every key-value pair in a map and returns
     a map with the results of those applications as the new keys for the pairs.
 
-    :param col: name of column or expression
-    :param f: a binary function ``(k: Column, v: Column) -> Column...``
+    .. versionadded:: 3.1.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column or expression
+    f : function
+        a binary function ``(k: Column, v: Column) -> Column...``
         Can use methods of :class:`pyspark.sql.Column`, functions defined in
         :py:mod:`pyspark.sql.functions` and Scala ``UserDefinedFunctions``.
         Python ``UserDefinedFunctions`` are not supported
         (`SPARK-27052 <https://issues.apache.org/jira/browse/SPARK-27052>`__).
-    :return: a :class:`pyspark.sql.Column`
 
+    Returns
+    -------
+    :class:`pyspark.sql.Column`
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([(1, {"foo": -2.0, "bar": 2.0})], ("id", "data"))
     >>> df.select(transform_keys(
     ...     "data", lambda k, _: upper(k)).alias("data_upper")
@@ -3683,20 +4452,30 @@ def transform_keys(col, f):
     return _invoke_higher_order_function("TransformKeys", [col], [f])
 
 
-@since(3.1)
 def transform_values(col, f):
     """
     Applies a function to every key-value pair in a map and returns
     a map with the results of those applications as the new values for the pairs.
 
-    :param col: name of column or expression
-    :param f: a binary function ``(k: Column, v: Column) -> Column...``
+    .. versionadded:: 3.1.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column or expression
+    f : function
+        a binary function ``(k: Column, v: Column) -> Column...``
         Can use methods of :class:`pyspark.sql.Column`, functions defined in
         :py:mod:`pyspark.sql.functions` and Scala ``UserDefinedFunctions``.
         Python ``UserDefinedFunctions`` are not supported
         (`SPARK-27052 <https://issues.apache.org/jira/browse/SPARK-27052>`__).
-    :return: a :class:`pyspark.sql.Column`
 
+    Returns
+    -------
+    :class:`pyspark.sql.Column`
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([(1, {"IT": 10.0, "SALES": 2.0, "OPS": 24.0})], ("id", "data"))
     >>> df.select(transform_values(
     ...     "data", lambda k, v: when(k.isin("IT", "OPS"), v + 10.0).otherwise(v)
@@ -3710,19 +4489,29 @@ def transform_values(col, f):
     return _invoke_higher_order_function("TransformValues", [col], [f])
 
 
-@since(3.1)
 def map_filter(col, f):
     """
     Returns a map whose key-value pairs satisfy a predicate.
 
-    :param col: name of column or expression
-    :param f: a binary function ``(k: Column, v: Column) -> Column...``
+    .. versionadded:: 3.1.0
+
+    Parameters
+    ----------
+    col : :class:`Column` or str
+        name of column or expression
+    f : function
+        a binary function ``(k: Column, v: Column) -> Column...``
         Can use methods of :class:`pyspark.sql.Column`, functions defined in
         :py:mod:`pyspark.sql.functions` and Scala ``UserDefinedFunctions``.
         Python ``UserDefinedFunctions`` are not supported
         (`SPARK-27052 <https://issues.apache.org/jira/browse/SPARK-27052>`__).
-    :return: a :class:`pyspark.sql.Column`
 
+    Returns
+    -------
+    :class:`pyspark.sql.Column`
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([(1, {"foo": 42.0, "bar": 1.0, "baz": 32.0})], ("id", "data"))
     >>> df.select(map_filter(
     ...     "data", lambda _, v: v > 30.0).alias("data_filtered")
@@ -3736,20 +4525,31 @@ def map_filter(col, f):
     return _invoke_higher_order_function("MapFilter", [col], [f])
 
 
-@since(3.1)
 def map_zip_with(col1, col2, f):
     """
     Merge two given maps, key-wise into a single map using a function.
 
-    :param col1: name of the first column or expression
-    :param col2: name of the second column or expression
-    :param f: a ternary function ``(k: Column, v1: Column, v2: Column) -> Column...``
+    .. versionadded:: 3.1.0
+
+    Parameters
+    ----------
+    col1 : :class:`Column` or str
+        name of the first column or expression
+    col2 : :class:`Column` or str
+        name of the second column or expression
+    f : function
+        a ternary function ``(k: Column, v1: Column, v2: Column) -> Column...``
         Can use methods of :class:`pyspark.sql.Column`, functions defined in
         :py:mod:`pyspark.sql.functions` and Scala ``UserDefinedFunctions``.
         Python ``UserDefinedFunctions`` are not supported
         (`SPARK-27052 <https://issues.apache.org/jira/browse/SPARK-27052>`__).
-    :return: a :class:`pyspark.sql.Column`
 
+    Returns
+    -------
+    :class:`pyspark.sql.Column`
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([
     ...     (1, {"IT": 24.0, "SALES": 12.00}, {"IT": 2.0, "SALES": 1.4})],
     ...     ("id", "base", "ratio")
@@ -3768,100 +4568,120 @@ def map_zip_with(col1, col2, f):
 
 # ---------------------- Partition transform functions --------------------------------
 
-@since(3.1)
 def years(col):
     """
     Partition transform function: A transform for timestamps and dates
     to partition data into years.
 
+    .. versionadded:: 3.1.0
+
+    Examples
+    --------
     >>> df.writeTo("catalog.db.table").partitionedBy(  # doctest: +SKIP
     ...     years("ts")
     ... ).createOrReplace()
 
-    .. warning::
-        This function can be used only in combinatiion with
-        :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy`
-        method of the `DataFrameWriterV2`.
+    Notes
+    -----
+    This function can be used only in combinatiion with
+    :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy`
+    method of the `DataFrameWriterV2`.
 
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.years(_to_java_column(col)))
 
 
-@since(3.1)
 def months(col):
     """
     Partition transform function: A transform for timestamps and dates
     to partition data into months.
 
+    .. versionadded:: 3.1.0
+
+    Examples
+    --------
     >>> df.writeTo("catalog.db.table").partitionedBy(
     ...     months("ts")
     ... ).createOrReplace()  # doctest: +SKIP
 
-    .. warning::
-        This function can be used only in combinatiion with
-        :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy`
-        method of the `DataFrameWriterV2`.
+    Notes
+    -----
+    This function can be used only in combinatiion with
+    :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy`
+    method of the `DataFrameWriterV2`.
 
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.months(_to_java_column(col)))
 
 
-@since(3.1)
 def days(col):
     """
     Partition transform function: A transform for timestamps and dates
     to partition data into days.
 
+    .. versionadded:: 3.1.0
+
+    Examples
+    --------
     >>> df.writeTo("catalog.db.table").partitionedBy(  # doctest: +SKIP
     ...     days("ts")
     ... ).createOrReplace()
 
-    .. warning::
-        This function can be used only in combinatiion with
-        :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy`
-        method of the `DataFrameWriterV2`.
+    Notes
+    -----
+    This function can be used only in combinatiion with
+    :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy`
+    method of the `DataFrameWriterV2`.
 
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.days(_to_java_column(col)))
 
 
-@since(3.1)
 def hours(col):
     """
     Partition transform function: A transform for timestamps
     to partition data into hours.
 
+    .. versionadded:: 3.1.0
+
+    Examples
+    --------
     >>> df.writeTo("catalog.db.table").partitionedBy(   # doctest: +SKIP
     ...     hours("ts")
     ... ).createOrReplace()
 
-    .. warning::
-        This function can be used only in combinatiion with
-        :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy`
-        method of the `DataFrameWriterV2`.
+    Notes
+    -----
+    This function can be used only in combinatiion with
+    :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy`
+    method of the `DataFrameWriterV2`.
 
     """
     sc = SparkContext._active_spark_context
     return Column(sc._jvm.functions.hours(_to_java_column(col)))
 
 
-@since(3.1)
 def bucket(numBuckets, col):
     """
     Partition transform function: A transform for any type that partitions
     by a hash of the input column.
 
+    .. versionadded:: 3.1.0
+
+    Examples
+    --------
     >>> df.writeTo("catalog.db.table").partitionedBy(  # doctest: +SKIP
     ...     bucket(42, "ts")
     ... ).createOrReplace()
 
-    .. warning::
-        This function can be used only in combination with
-        :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy`
-        method of the `DataFrameWriterV2`.
+    Notes
+    -----
+    This function can be used only in combination with
+    :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy`
+    method of the `DataFrameWriterV2`.
 
     """
     if not isinstance(numBuckets, (int, Column)):
@@ -3880,29 +4700,21 @@ def bucket(numBuckets, col):
 
 # ---------------------------- User Defined Function ----------------------------------
 
-@since(1.3)
 def udf(f=None, returnType=StringType()):
     """Creates a user defined function (UDF).
 
-    .. note:: The user-defined functions are considered deterministic by default. Due to
-        optimization, duplicate invocations may be eliminated or the function may even be invoked
-        more times than it is present in the query. If your function is not deterministic, call
-        `asNondeterministic` on the user defined function. E.g.:
+    .. versionadded:: 1.3.0
 
-    >>> from pyspark.sql.types import IntegerType
-    >>> import random
-    >>> random_udf = udf(lambda: int(random.random() * 100), IntegerType()).asNondeterministic()
-
-    .. note:: The user-defined functions do not support conditional expressions or short circuiting
-        in boolean expressions and it ends up with being executed all internally. If the functions
-        can fail on special rows, the workaround is to incorporate the condition into the functions.
-
-    .. note:: The user-defined functions do not take keyword arguments on the calling side.
-
-    :param f: python function if used as a standalone function
-    :param returnType: the return type of the user-defined function. The value can be either a
+    Parameters
+    ----------
+    f : function
+        python function if used as a standalone function
+    returnType : :class:`pyspark.sql.types.DataType` or str
+        the return type of the user-defined function. The value can be either a
         :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
 
+    Examples
+    --------
     >>> from pyspark.sql.types import IntegerType
     >>> slen = udf(lambda s: len(s), IntegerType())
     >>> @udf
@@ -3922,6 +4734,23 @@ def udf(f=None, returnType=StringType()):
     +----------+--------------+------------+
     |         8|      JOHN DOE|          22|
     +----------+--------------+------------+
+
+    Notes
+    -----
+    The user-defined functions are considered deterministic by default. Due to
+    optimization, duplicate invocations may be eliminated or the function may even be invoked
+    more times than it is present in the query. If your function is not deterministic, call
+    `asNondeterministic` on the user defined function. E.g.:
+
+    >>> from pyspark.sql.types import IntegerType
+    >>> import random
+    >>> random_udf = udf(lambda: int(random.random() * 100), IntegerType()).asNondeterministic()
+
+    The user-defined functions do not support conditional expressions or short circuiting
+    in boolean expressions and it ends up with being executed all internally. If the functions
+    can fail on special rows, the workaround is to incorporate the condition into the functions.
+
+    The user-defined functions do not take keyword arguments on the calling side.
     """
 
     # The following table shows most of Python data and SQL type conversions in normal UDFs that
diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi
index 7ba3f07e17c19..e395f5797bebd 100644
--- a/python/pyspark/sql/functions.pyi
+++ b/python/pyspark/sql/functions.pyi
@@ -174,7 +174,7 @@ def create_map(*cols: ColumnOrName) -> Column: ...
 def array(*cols: ColumnOrName) -> Column: ...
 def array_contains(col: ColumnOrName, value: Any) -> Column: ...
 def arrays_overlap(a1: ColumnOrName, a2: ColumnOrName) -> Column: ...
-def slice(x: ColumnOrName, start: int, length: int) -> Column: ...
+def slice(x: ColumnOrName, start: Union[Column, int], length: Union[Column, int]) -> Column: ...
 def array_join(
     col: ColumnOrName, delimiter: str, null_replacement: Optional[str] = ...
 ) -> Column: ...
diff --git a/python/pyspark/sql/group.py b/python/pyspark/sql/group.py
index 688f8d4992b7d..d3cbf9268c9c7 100644
--- a/python/pyspark/sql/group.py
+++ b/python/pyspark/sql/group.py
@@ -17,7 +17,6 @@
 
 import sys
 
-from pyspark import since
 from pyspark.sql.column import Column, _to_seq
 from pyspark.sql.dataframe import DataFrame
 from pyspark.sql.pandas.group_ops import PandasGroupedOpsMixin
@@ -59,7 +58,6 @@ def __init__(self, jgd, df):
         self._df = df
         self.sql_ctx = df.sql_ctx
 
-    @since(1.3)
     def agg(self, *exprs):
         """Compute aggregates and returns the result as a :class:`DataFrame`.
 
@@ -81,12 +79,21 @@ def agg(self, *exprs):
 
         Alternatively, ``exprs`` can also be a list of aggregate :class:`Column` expressions.
 
-        .. note:: Built-in aggregation functions and group aggregate pandas UDFs cannot be mixed
-            in a single call to this function.
+        .. versionadded:: 1.3.0
 
-        :param exprs: a dict mapping from column name (string) to aggregate functions (string),
+        Parameters
+        ----------
+        exprs : dict
+            a dict mapping from column name (string) to aggregate functions (string),
             or a list of :class:`Column`.
 
+        Notes
+        -----
+        Built-in aggregation functions and group aggregate pandas UDFs cannot be mixed
+        in a single call to this function.
+
+        Examples
+        --------
         >>> gdf = df.groupBy(df.name)
         >>> sorted(gdf.agg({"*": "count"}).collect())
         [Row(name='Alice', count(1)=1), Row(name='Bob', count(1)=1)]
@@ -113,23 +120,32 @@ def agg(self, *exprs):
         return DataFrame(jdf, self.sql_ctx)
 
     @dfapi
-    @since(1.3)
     def count(self):
         """Counts the number of records for each group.
 
+        .. versionadded:: 1.3.0
+
+        Examples
+        --------
         >>> sorted(df.groupBy(df.age).count().collect())
         [Row(age=2, count=1), Row(age=5, count=1)]
         """
 
     @df_varargs_api
-    @since(1.3)
     def mean(self, *cols):
         """Computes average values for each numeric columns for each group.
 
         :func:`mean` is an alias for :func:`avg`.
 
-        :param cols: list of column names (string). Non-numeric columns are ignored.
+        .. versionadded:: 1.3.0
 
+        Parameters
+        ----------
+        cols : str
+            column names. Non-numeric columns are ignored.
+
+        Examples
+        --------
         >>> df.groupBy().mean('age').collect()
         [Row(avg(age)=3.5)]
         >>> df3.groupBy().mean('age', 'height').collect()
@@ -137,14 +153,20 @@ def mean(self, *cols):
         """
 
     @df_varargs_api
-    @since(1.3)
     def avg(self, *cols):
         """Computes average values for each numeric columns for each group.
 
         :func:`mean` is an alias for :func:`avg`.
 
-        :param cols: list of column names (string). Non-numeric columns are ignored.
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        cols : str
+            column names. Non-numeric columns are ignored.
 
+        Examples
+        --------
         >>> df.groupBy().avg('age').collect()
         [Row(avg(age)=3.5)]
         >>> df3.groupBy().avg('age', 'height').collect()
@@ -152,10 +174,13 @@ def avg(self, *cols):
         """
 
     @df_varargs_api
-    @since(1.3)
     def max(self, *cols):
         """Computes the max value for each numeric columns for each group.
 
+        .. versionadded:: 1.3.0
+
+        Examples
+        --------
         >>> df.groupBy().max('age').collect()
         [Row(max(age)=5)]
         >>> df3.groupBy().max('age', 'height').collect()
@@ -163,12 +188,18 @@ def max(self, *cols):
         """
 
     @df_varargs_api
-    @since(1.3)
     def min(self, *cols):
         """Computes the min value for each numeric column for each group.
 
-        :param cols: list of column names (string). Non-numeric columns are ignored.
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        cols : str
+            column names. Non-numeric columns are ignored.
 
+        Examples
+        --------
         >>> df.groupBy().min('age').collect()
         [Row(min(age)=2)]
         >>> df3.groupBy().min('age', 'height').collect()
@@ -176,19 +207,24 @@ def min(self, *cols):
         """
 
     @df_varargs_api
-    @since(1.3)
     def sum(self, *cols):
         """Compute the sum for each numeric columns for each group.
 
-        :param cols: list of column names (string). Non-numeric columns are ignored.
+        .. versionadded:: 1.3.0
 
+        Parameters
+        ----------
+        cols : str
+            column names. Non-numeric columns are ignored.
+
+        Examples
+        --------
         >>> df.groupBy().sum('age').collect()
         [Row(sum(age)=7)]
         >>> df3.groupBy().sum('age', 'height').collect()
         [Row(sum(age)=7, sum(height)=165)]
         """
 
-    @since(1.6)
     def pivot(self, pivot_col, values=None):
         """
         Pivots a column of the current :class:`DataFrame` and perform the specified aggregation.
@@ -196,9 +232,17 @@ def pivot(self, pivot_col, values=None):
         of distinct values to pivot on, and one that does not. The latter is more concise but less
         efficient, because Spark needs to first compute the list of distinct values internally.
 
-        :param pivot_col: Name of the column to pivot.
-        :param values: List of values that will be translated to columns in the output DataFrame.
+        .. versionadded:: 1.6.0
+
+        Parameters
+        ----------
+        pivot_col : str
+            Name of the column to pivot.
+        values :
+            List of values that will be translated to columns in the output DataFrame.
 
+        Examples
+        --------
         # Compute the sum of earnings for each year by course with each course as a separate column
 
         >>> df4.groupBy("year").pivot("course", ["dotNET", "Java"]).sum("earnings").collect()
diff --git a/python/pyspark/sql/pandas/conversion.py b/python/pyspark/sql/pandas/conversion.py
index d39a4413a0f2e..3456c12e59c09 100644
--- a/python/pyspark/sql/pandas/conversion.py
+++ b/python/pyspark/sql/pandas/conversion.py
@@ -18,7 +18,6 @@
 import warnings
 from collections import Counter
 
-from pyspark import since
 from pyspark.rdd import _load_from_socket
 from pyspark.sql.pandas.serializers import ArrowCollectSerializer
 from pyspark.sql.types import IntegralType
@@ -33,18 +32,23 @@ class PandasConversionMixin(object):
     can use this class.
     """
 
-    @since(1.3)
     def toPandas(self):
         """
         Returns the contents of this :class:`DataFrame` as Pandas ``pandas.DataFrame``.
 
         This is only available if Pandas is installed and available.
 
-        .. note:: This method should only be used if the resulting Pandas's :class:`DataFrame` is
-            expected to be small, as all the data is loaded into the driver's memory.
+        .. versionadded:: 1.3.0
 
-        .. note:: Usage with spark.sql.execution.arrow.pyspark.enabled=True is experimental.
+        Notes
+        -----
+        This method should only be used if the resulting Pandas's :class:`DataFrame` is
+        expected to be small, as all the data is loaded into the driver's memory.
 
+        Usage with spark.sql.execution.arrow.pyspark.enabled=True is experimental.
+
+        Examples
+        --------
         >>> df.toPandas()  # doctest: +SKIP
            age   name
         0    2  Alice
@@ -221,8 +225,7 @@ def _collect_as_arrow(self):
         """
         Returns all records as a list of ArrowRecordBatches, pyarrow must be installed
         and available on driver and worker Python environments.
-
-        .. note:: Experimental.
+        This is an experimental feature.
         """
         from pyspark.sql.dataframe import DataFrame
 
@@ -295,7 +298,11 @@ def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=Tr
     def _convert_from_pandas(self, pdf, schema, timezone):
         """
          Convert a pandas.DataFrame to list of records that can be used to make a DataFrame
-         :return list of records
+
+         Returns
+         -------
+         list
+             list of records
         """
         from pyspark.sql import SparkSession
 
@@ -343,8 +350,16 @@ def _get_numpy_record_dtype(self, rec):
         """
         Used when converting a pandas.DataFrame to Spark using to_records(), this will correct
         the dtypes of fields in a record so they can be properly loaded into Spark.
-        :param rec: a numpy record to check field dtypes
-        :return corrected dtype for a numpy.record or None if no correction needed
+
+        Parameters
+        ----------
+        rec : numpy.record
+            a numpy record to check field dtypes
+
+        Returns
+        -------
+        numpy.dtype
+            corrected dtype for a numpy.record or None if no correction needed
         """
         import numpy as np
         cur_dtypes = rec.dtype
diff --git a/python/pyspark/sql/pandas/functions.py b/python/pyspark/sql/pandas/functions.py
index 82203fc03a9a1..16462e8702a0b 100644
--- a/python/pyspark/sql/pandas/functions.py
+++ b/python/pyspark/sql/pandas/functions.py
@@ -19,7 +19,6 @@
 import warnings
 from inspect import getfullargspec
 
-from pyspark import since
 from pyspark.rdd import PythonEvalType
 from pyspark.sql.pandas.typehints import infer_eval_type
 from pyspark.sql.pandas.utils import require_minimum_pandas_version, require_minimum_pyarrow_version
@@ -39,7 +38,6 @@ class PandasUDFType(object):
     GROUPED_AGG = PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF
 
 
-@since(2.3)
 def pandas_udf(f=None, returnType=None, functionType=None):
     """
     Creates a pandas user defined function (a.k.a. vectorized user defined function).
@@ -50,14 +48,22 @@ def pandas_udf(f=None, returnType=None, functionType=None):
     additional configuration is required. A Pandas UDF behaves as a regular PySpark function
     API in general.
 
-    :param f: user-defined function. A python function if used as a standalone function
-    :param returnType: the return type of the user-defined function. The value can be either a
-        :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
-    :param functionType: an enum value in :class:`pyspark.sql.functions.PandasUDFType`.
-        Default: SCALAR.
+    .. versionadded:: 2.3.0
 
-        .. note:: This parameter exists for compatibility. Using Python type hints is encouraged.
+    Parameters
+    ----------
+    f : function, optional
+        user-defined function. A python function if used as a standalone function
+    returnType : :class:`pyspark.sql.types.DataType` or str, optional
+        the return type of the user-defined function. The value can be either a
+        :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
+    functionType : int, optional
+        an enum value in :class:`pyspark.sql.functions.PandasUDFType`.
+        Default: SCALAR. This parameter exists for compatibility.
+        Using Python type hints is encouraged.
 
+    Examples
+    --------
     In order to use this API, customarily the below are imported:
 
     >>> import pandas as pd
@@ -263,30 +269,33 @@ def calculate(iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
             Therefore, mutating the input series is not allowed and will cause incorrect results.
             For the same reason, users should also not rely on the index of the input series.
 
-        .. seealso:: :meth:`pyspark.sql.GroupedData.agg` and :class:`pyspark.sql.Window`
-
-    .. note:: The user-defined functions do not support conditional expressions or short circuiting
-        in boolean expressions and it ends up with being executed all internally. If the functions
-        can fail on special rows, the workaround is to incorporate the condition into the functions.
-
-    .. note:: The user-defined functions do not take keyword arguments on the calling side.
-
-    .. note:: The data type of returned `pandas.Series` from the user-defined functions should be
-        matched with defined `returnType` (see :meth:`types.to_arrow_type` and
-        :meth:`types.from_arrow_type`). When there is mismatch between them, Spark might do
-        conversion on returned data. The conversion is not guaranteed to be correct and results
-        should be checked for accuracy by users.
-
-    .. note:: Currently,
-        :class:`pyspark.sql.types.MapType`,
-        :class:`pyspark.sql.types.ArrayType` of :class:`pyspark.sql.types.TimestampType` and
-        nested :class:`pyspark.sql.types.StructType`
-        are currently not supported as output types.
-
-    .. seealso:: :meth:`pyspark.sql.DataFrame.mapInPandas`
-    .. seealso:: :meth:`pyspark.sql.GroupedData.applyInPandas`
-    .. seealso:: :meth:`pyspark.sql.PandasCogroupedOps.applyInPandas`
-    .. seealso:: :meth:`pyspark.sql.UDFRegistration.register`
+    Notes
+    -----
+    The user-defined functions do not support conditional expressions or short circuiting
+    in boolean expressions and it ends up with being executed all internally. If the functions
+    can fail on special rows, the workaround is to incorporate the condition into the functions.
+
+    The user-defined functions do not take keyword arguments on the calling side.
+
+    The data type of returned `pandas.Series` from the user-defined functions should be
+    matched with defined `returnType` (see :meth:`types.to_arrow_type` and
+    :meth:`types.from_arrow_type`). When there is mismatch between them, Spark might do
+    conversion on returned data. The conversion is not guaranteed to be correct and results
+    should be checked for accuracy by users.
+
+    Currently,
+    :class:`pyspark.sql.types.MapType`,
+    :class:`pyspark.sql.types.ArrayType` of :class:`pyspark.sql.types.TimestampType` and
+    nested :class:`pyspark.sql.types.StructType`
+    are currently not supported as output types.
+
+    See Also
+    --------
+    pyspark.sql.GroupedData.agg
+    pyspark.sql.DataFrame.mapInPandas
+    pyspark.sql.GroupedData.applyInPandas
+    pyspark.sql.PandasCogroupedOps.applyInPandas
+    pyspark.sql.UDFRegistration.register
     """
 
     # The following table shows most of Pandas data and SQL type conversions in Pandas UDFs that
diff --git a/python/pyspark/sql/pandas/group_ops.py b/python/pyspark/sql/pandas/group_ops.py
index ce021fac147fb..8d4f67e2c7502 100644
--- a/python/pyspark/sql/pandas/group_ops.py
+++ b/python/pyspark/sql/pandas/group_ops.py
@@ -17,7 +17,6 @@
 import sys
 import warnings
 
-from pyspark import since
 from pyspark.rdd import PythonEvalType
 from pyspark.sql.column import Column
 from pyspark.sql.dataframe import DataFrame
@@ -29,19 +28,27 @@ class PandasGroupedOpsMixin(object):
     can use this class.
     """
 
-    @since(2.3)
     def apply(self, udf):
         """
         It is an alias of :meth:`pyspark.sql.GroupedData.applyInPandas`; however, it takes a
         :meth:`pyspark.sql.functions.pandas_udf` whereas
         :meth:`pyspark.sql.GroupedData.applyInPandas` takes a Python native function.
 
-        .. note:: It is preferred to use :meth:`pyspark.sql.GroupedData.applyInPandas` over this
-            API. This API will be deprecated in the future releases.
+        .. versionadded:: 2.3.0
 
-        :param udf: a grouped map user-defined function returned by
+        Parameters
+        ----------
+        udf : :func:`pyspark.sql.functions.pandas_udf`
+            a grouped map user-defined function returned by
             :func:`pyspark.sql.functions.pandas_udf`.
 
+        Notes
+        -----
+        It is preferred to use :meth:`pyspark.sql.GroupedData.applyInPandas` over this
+        API. This API will be deprecated in the future releases.
+
+        Examples
+        --------
         >>> from pyspark.sql.functions import pandas_udf, PandasUDFType
         >>> df = spark.createDataFrame(
         ...     [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)],
@@ -61,8 +68,9 @@ def apply(self, udf):
         |  2| 1.1094003924504583|
         +---+-------------------+
 
-        .. seealso:: :meth:`pyspark.sql.functions.pandas_udf`
-
+        See Also
+        --------
+        pyspark.sql.functions.pandas_udf
         """
         # Columns are special because hasattr always return True
         if isinstance(udf, Column) or not hasattr(udf, 'func') \
@@ -77,7 +85,6 @@ def apply(self, udf):
 
         return self.applyInPandas(udf.func, schema=udf.returnType)
 
-    @since(3.0)
     def applyInPandas(self, func, schema):
         """
         Maps each group of the current :class:`DataFrame` using a pandas udf and returns the result
@@ -94,11 +101,19 @@ def applyInPandas(self, func, schema):
         field data types by position if not strings, e.g. integer indices.
         The length of the returned `pandas.DataFrame` can be arbitrary.
 
-        :param func: a Python native function that takes a `pandas.DataFrame`, and outputs a
+        .. versionadded:: 3.0.0
+
+        Parameters
+        ----------
+        func : function
+            a Python native function that takes a `pandas.DataFrame`, and outputs a
             `pandas.DataFrame`.
-        :param schema: the return type of the `func` in PySpark. The value can be either a
+        schema : :class:`pyspark.sql.types.DataType` or str
+            the return type of the `func` in PySpark. The value can be either a
             :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
 
+        Examples
+        --------
         >>> import pandas as pd  # doctest: +SKIP
         >>> from pyspark.sql.functions import pandas_udf, ceil
         >>> df = spark.createDataFrame(
@@ -141,6 +156,7 @@ def applyInPandas(self, func, schema):
         |  1|1.5|
         |  2|6.0|
         +---+---+
+
         >>> def sum_func(key, pdf):
         ...     # key is a tuple of two numpy.int64s, which is the values
         ...     # of 'id' and 'ceil(df.v / 2)' for the current group
@@ -156,19 +172,23 @@ def applyInPandas(self, func, schema):
         |  2|          2| 3.0|
         +---+-----------+----+
 
-        .. note:: This function requires a full shuffle. All the data of a group will be loaded
-            into memory, so the user should be aware of the potential OOM risk if data is skewed
-            and certain groups are too large to fit in memory.
+        Notes
+        -----
+        This function requires a full shuffle. All the data of a group will be loaded
+        into memory, so the user should be aware of the potential OOM risk if data is skewed
+        and certain groups are too large to fit in memory.
 
-        .. note:: If returning a new `pandas.DataFrame` constructed with a dictionary, it is
-            recommended to explicitly index the columns by name to ensure the positions are correct,
-            or alternatively use an `OrderedDict`.
-            For example, `pd.DataFrame({'id': ids, 'a': data}, columns=['id', 'a'])` or
-            `pd.DataFrame(OrderedDict([('id', ids), ('a', data)]))`.
+        If returning a new `pandas.DataFrame` constructed with a dictionary, it is
+        recommended to explicitly index the columns by name to ensure the positions are correct,
+        or alternatively use an `OrderedDict`.
+        For example, `pd.DataFrame({'id': ids, 'a': data}, columns=['id', 'a'])` or
+        `pd.DataFrame(OrderedDict([('id', ids), ('a', data)]))`.
 
-        .. note:: Experimental
+        This API is experimental.
 
-        .. seealso:: :meth:`pyspark.sql.functions.pandas_udf`
+        See Also
+        --------
+        pyspark.sql.functions.pandas_udf
         """
         from pyspark.sql import GroupedData
         from pyspark.sql.functions import pandas_udf, PandasUDFType
@@ -182,11 +202,12 @@ def applyInPandas(self, func, schema):
         jdf = self._jgd.flatMapGroupsInPandas(udf_column._jc.expr())
         return DataFrame(jdf, self.sql_ctx)
 
-    @since(3.0)
     def cogroup(self, other):
         """
         Cogroups this group with another group so that we can run cogrouped operations.
 
+        .. versionadded:: 3.0.0
+
         See :class:`PandasCogroupedOps` for the operations that can be run.
         """
         from pyspark.sql import GroupedData
@@ -201,9 +222,11 @@ class PandasCogroupedOps(object):
     A logical grouping of two :class:`GroupedData`,
     created by :func:`GroupedData.cogroup`.
 
-    .. note:: Experimental
+    .. versionadded:: 3.0.0
 
-    .. versionadded:: 3.0
+    Notes
+    -----
+    This API is experimental.
     """
 
     def __init__(self, gd1, gd2):
@@ -211,7 +234,6 @@ def __init__(self, gd1, gd2):
         self._gd2 = gd2
         self.sql_ctx = gd1.sql_ctx
 
-    @since(3.0)
     def applyInPandas(self, func, schema):
         """
         Applies a function to each cogroup using pandas and returns the result
@@ -228,12 +250,20 @@ def applyInPandas(self, func, schema):
         field data types by position if not strings, e.g. integer indices.
         The length of the returned `pandas.DataFrame` can be arbitrary.
 
-        :param func: a Python native function that takes two `pandas.DataFrame`\\s, and
+        .. versionadded:: 3.0.0
+
+        Parameters
+        ----------
+        func : function
+            a Python native function that takes two `pandas.DataFrame`\\s, and
             outputs a `pandas.DataFrame`, or that takes one tuple (grouping keys) and two
             pandas ``DataFrame``\\s, and outputs a pandas ``DataFrame``.
-        :param schema: the return type of the `func` in PySpark. The value can be either a
+        schema : :class:`pyspark.sql.types.DataType` or str
+            the return type of the `func` in PySpark. The value can be either a
             :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
 
+        Examples
+        --------
         >>> from pyspark.sql.functions import pandas_udf
         >>> df1 = spark.createDataFrame(
         ...     [(20000101, 1, 1.0), (20000101, 2, 2.0), (20000102, 1, 3.0), (20000102, 2, 4.0)],
@@ -275,20 +305,23 @@ def applyInPandas(self, func, schema):
         |20000102|  1|3.0|  x|
         +--------+---+---+---+
 
-        .. note:: This function requires a full shuffle. All the data of a cogroup will be loaded
-            into memory, so the user should be aware of the potential OOM risk if data is skewed
-            and certain groups are too large to fit in memory.
-
-        .. note:: If returning a new `pandas.DataFrame` constructed with a dictionary, it is
-            recommended to explicitly index the columns by name to ensure the positions are correct,
-            or alternatively use an `OrderedDict`.
-            For example, `pd.DataFrame({'id': ids, 'a': data}, columns=['id', 'a'])` or
-            `pd.DataFrame(OrderedDict([('id', ids), ('a', data)]))`.
+        Notes
+        -----
+        This function requires a full shuffle. All the data of a cogroup will be loaded
+        into memory, so the user should be aware of the potential OOM risk if data is skewed
+        and certain groups are too large to fit in memory.
 
-        .. note:: Experimental
+        If returning a new `pandas.DataFrame` constructed with a dictionary, it is
+        recommended to explicitly index the columns by name to ensure the positions are correct,
+        or alternatively use an `OrderedDict`.
+        For example, `pd.DataFrame({'id': ids, 'a': data}, columns=['id', 'a'])` or
+        `pd.DataFrame(OrderedDict([('id', ids), ('a', data)]))`.
 
-        .. seealso:: :meth:`pyspark.sql.functions.pandas_udf`
+        This API is experimental.
 
+        See Also
+        --------
+        pyspark.sql.functions.pandas_udf
         """
         from pyspark.sql.pandas.functions import pandas_udf
 
diff --git a/python/pyspark/sql/pandas/map_ops.py b/python/pyspark/sql/pandas/map_ops.py
index 9835e88c6ac21..63fe37113e7cc 100644
--- a/python/pyspark/sql/pandas/map_ops.py
+++ b/python/pyspark/sql/pandas/map_ops.py
@@ -16,7 +16,6 @@
 #
 import sys
 
-from pyspark import since
 from pyspark.rdd import PythonEvalType
 
 
@@ -26,7 +25,6 @@ class PandasMapOpsMixin(object):
     can use this class.
     """
 
-    @since(3.0)
     def mapInPandas(self, func, schema):
         """
         Maps an iterator of batches in the current :class:`DataFrame` using a Python native
@@ -40,11 +38,19 @@ def mapInPandas(self, func, schema):
         Each `pandas.DataFrame` size can be controlled by
         `spark.sql.execution.arrow.maxRecordsPerBatch`.
 
-        :param func: a Python native function that takes an iterator of `pandas.DataFrame`\\s, and
+        .. versionadded:: 3.0.0
+
+        Parameters
+        ----------
+        func : function
+            a Python native function that takes an iterator of `pandas.DataFrame`\\s, and
             outputs an iterator of `pandas.DataFrame`\\s.
-        :param schema: the return type of the `func` in PySpark. The value can be either a
+        schema : :class:`pyspark.sql.types.DataType` or str
+            the return type of the `func` in PySpark. The value can be either a
             :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
 
+        Examples
+        --------
         >>> from pyspark.sql.functions import pandas_udf
         >>> df = spark.createDataFrame([(1, 21), (2, 30)], ("id", "age"))
         >>> def filter_func(iterator):
@@ -57,9 +63,13 @@ def mapInPandas(self, func, schema):
         |  1| 21|
         +---+---+
 
-        .. seealso:: :meth:`pyspark.sql.functions.pandas_udf`
+        Notes
+        -----
+        This API is experimental
 
-        .. note:: Experimental
+        See Also
+        --------
+        pyspark.sql.functions.pandas_udf
         """
         from pyspark.sql import DataFrame
         from pyspark.sql.pandas.functions import pandas_udf
diff --git a/python/pyspark/sql/pandas/serializers.py b/python/pyspark/sql/pandas/serializers.py
index 09c7cf1b312bc..73d36ee555fb5 100644
--- a/python/pyspark/sql/pandas/serializers.py
+++ b/python/pyspark/sql/pandas/serializers.py
@@ -100,9 +100,14 @@ class ArrowStreamPandasSerializer(ArrowStreamSerializer):
     """
     Serializes Pandas.Series as Arrow data with Arrow streaming format.
 
-    :param timezone: A timezone to respect when handling timestamp values
-    :param safecheck: If True, conversion from Arrow to Pandas checks for overflow/truncation
-    :param assign_cols_by_name: If True, then Pandas DataFrames will get columns by name
+    Parameters
+    ----------
+    timezone : str
+        A timezone to respect when handling timestamp values
+    safecheck : bool
+        If True, conversion from Arrow to Pandas checks for overflow/truncation
+    assign_cols_by_name : bool
+        If True, then Pandas DataFrames will get columns by name
     """
 
     def __init__(self, timezone, safecheck, assign_cols_by_name):
@@ -130,8 +135,15 @@ def _create_batch(self, series):
         Create an Arrow record batch from the given pandas.Series or list of Series,
         with optional type.
 
-        :param series: A single pandas.Series, list of Series, or list of (series, arrow_type)
-        :return: Arrow RecordBatch
+        Parameters
+        ----------
+        series : pandas.Series or list
+            A single series, list of series, or list of (series, arrow_type)
+
+        Returns
+        -------
+        pyarrow.RecordBatch
+            Arrow RecordBatch
         """
         import pandas as pd
         import pyarrow as pa
diff --git a/python/pyspark/sql/pandas/types.py b/python/pyspark/sql/pandas/types.py
index 78f9daa130d59..67557120715ac 100644
--- a/python/pyspark/sql/pandas/types.py
+++ b/python/pyspark/sql/pandas/types.py
@@ -153,9 +153,16 @@ def _check_series_localize_timestamps(s, timezone):
     If the input series is not a timestamp series, then the same series is returned. If the input
     series is a timestamp series, then a converted series is returned.
 
-    :param s: pandas.Series
-    :param timezone: the timezone to convert. if None then use local timezone
-    :return pandas.Series that have been converted to tz-naive
+    Parameters
+    ----------
+    s : pandas.Series
+    timezone : str
+        the timezone to convert. if None then use local timezone
+
+    Returns
+    -------
+    pandas.Series
+        `pandas.Series` that have been converted to tz-naive
     """
     from pyspark.sql.pandas.utils import require_minimum_pandas_version
     require_minimum_pandas_version()
@@ -174,9 +181,16 @@ def _check_series_convert_timestamps_internal(s, timezone):
     Convert a tz-naive timestamp in the specified timezone or local timezone to UTC normalized for
     Spark internal storage
 
-    :param s: a pandas.Series
-    :param timezone: the timezone to convert. if None then use local timezone
-    :return pandas.Series where if it is a timestamp, has been UTC normalized without a time zone
+    Parameters
+    ----------
+    s : pandas.Series
+    timezone : str
+        the timezone to convert. if None then use local timezone
+
+    Returns
+    -------
+    pandas.Series
+        `pandas.Series` where if it is a timestamp, has been UTC normalized without a time zone
     """
     from pyspark.sql.pandas.utils import require_minimum_pandas_version
     require_minimum_pandas_version()
@@ -226,10 +240,18 @@ def _check_series_convert_timestamps_localize(s, from_timezone, to_timezone):
     """
     Convert timestamp to timezone-naive in the specified timezone or local timezone
 
-    :param s: a pandas.Series
-    :param from_timezone: the timezone to convert from. if None then use local timezone
-    :param to_timezone: the timezone to convert to. if None then use local timezone
-    :return pandas.Series where if it is a timestamp, has been converted to tz-naive
+    Parameters
+    ----------
+    s : pandas.Series
+    from_timezone : str
+        the timezone to convert from. if None then use local timezone
+    to_timezone : str
+        the timezone to convert to. if None then use local timezone
+
+    Returns
+    -------
+    pandas.Series
+        `pandas.Series` where if it is a timestamp, has been converted to tz-naive
     """
     from pyspark.sql.pandas.utils import require_minimum_pandas_version
     require_minimum_pandas_version()
@@ -254,9 +276,16 @@ def _check_series_convert_timestamps_local_tz(s, timezone):
     """
     Convert timestamp to timezone-naive in the specified timezone or local timezone
 
-    :param s: a pandas.Series
-    :param timezone: the timezone to convert to. if None then use local timezone
-    :return pandas.Series where if it is a timestamp, has been converted to tz-naive
+    Parameters
+    ----------
+    s : pandas.Series
+    timezone : str
+        the timezone to convert to. if None then use local timezone
+
+    Returns
+    -------
+    pandas.Series
+        `pandas.Series` where if it is a timestamp, has been converted to tz-naive
     """
     return _check_series_convert_timestamps_localize(s, None, timezone)
 
@@ -265,8 +294,15 @@ def _check_series_convert_timestamps_tz_local(s, timezone):
     """
     Convert timestamp to timezone-naive in the specified timezone or local timezone
 
-    :param s: a pandas.Series
-    :param timezone: the timezone to convert from. if None then use local timezone
-    :return pandas.Series where if it is a timestamp, has been converted to tz-naive
+    Parameters
+    ----------
+    s : pandas.Series
+    timezone : str
+        the timezone to convert from. if None then use local timezone
+
+    Returns
+    -------
+    pandas.Series
+        `pandas.Series` where if it is a timestamp, has been converted to tz-naive
     """
     return _check_series_convert_timestamps_localize(s, timezone, None)
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index ae715eea70b6d..2ed991c87f506 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -57,12 +57,18 @@ def _df(self, jdf):
         from pyspark.sql.dataframe import DataFrame
         return DataFrame(jdf, self._spark)
 
-    @since(1.4)
     def format(self, source):
         """Specifies the input data source format.
 
-        :param source: string, name of the data source, e.g. 'json', 'parquet'.
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        source : str
+            string, name of the data source, e.g. 'json', 'parquet'.
 
+        Examples
+        --------
         >>> df = spark.read.format('json').load('python/test_support/sql/people.json')
         >>> df.dtypes
         [('age', 'bigint'), ('name', 'string')]
@@ -71,7 +77,6 @@ def format(self, source):
         self._jreader = self._jreader.format(source)
         return self
 
-    @since(1.4)
     def schema(self, schema):
         """Specifies the input schema.
 
@@ -79,8 +84,13 @@ def schema(self, schema):
         By specifying the schema here, the underlying data source can skip the schema
         inference step, and thus speed up data loading.
 
-        :param schema: a :class:`pyspark.sql.types.StructType` object or a DDL-formatted string
-                       (For example ``col0 INT, col1 DOUBLE``).
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        schema : :class:`pyspark.sql.types.StructType` or str
+            a :class:`pyspark.sql.types.StructType` object or a DDL-formatted string
+            (For example ``col0 INT, col1 DOUBLE``).
 
         >>> s = spark.read.schema("col0 INT, col1 DOUBLE")
         """
@@ -144,16 +154,25 @@ def options(self, **options):
             self._jreader = self._jreader.option(k, to_str(options[k]))
         return self
 
-    @since(1.4)
     def load(self, path=None, format=None, schema=None, **options):
         """Loads data from a data source and returns it as a :class:`DataFrame`.
 
-        :param path: optional string or a list of string for file-system backed data sources.
-        :param format: optional string for format of the data source. Default to 'parquet'.
-        :param schema: optional :class:`pyspark.sql.types.StructType` for the input schema
-                       or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
-        :param options: all other string options
-
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        path : str or list, optional
+            optional string or a list of string for file-system backed data sources.
+        format : str, optional
+            optional string for format of the data source. Default to 'parquet'.
+        schema : :class:`pyspark.sql.types.StructType` or str, optional
+            optional :class:`pyspark.sql.types.StructType` for the input schema
+            or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
+        **options : dict
+            all other string options
+
+        Examples
+        --------
         >>> df = spark.read.format("parquet").load('python/test_support/sql/parquet_partitioned',
         ...     opt1=True, opt2=1, opt3='str')
         >>> df.dtypes
@@ -178,7 +197,6 @@ def load(self, path=None, format=None, schema=None, **options):
         else:
             return self._df(self._jreader.load())
 
-    @since(1.4)
     def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
              allowComments=None, allowUnquotedFieldNames=None, allowSingleQuotes=None,
              allowNumericLeadingZero=None, allowBackslashEscapingAnyCharacter=None,
@@ -195,89 +213,118 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
         If the ``schema`` parameter is not specified, this function goes
         through the input once to determine the input schema.
 
-        :param path: string represents path to the JSON dataset, or a list of paths,
-                     or RDD of Strings storing JSON objects.
-        :param schema: an optional :class:`pyspark.sql.types.StructType` for the input schema or
-                       a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
-        :param primitivesAsString: infers all primitive values as a string type. If None is set,
-                                   it uses the default value, ``false``.
-        :param prefersDecimal: infers all floating-point values as a decimal type. If the values
-                               do not fit in decimal, then it infers them as doubles. If None is
-                               set, it uses the default value, ``false``.
-        :param allowComments: ignores Java/C++ style comment in JSON records. If None is set,
-                              it uses the default value, ``false``.
-        :param allowUnquotedFieldNames: allows unquoted JSON field names. If None is set,
-                                        it uses the default value, ``false``.
-        :param allowSingleQuotes: allows single quotes in addition to double quotes. If None is
-                                        set, it uses the default value, ``true``.
-        :param allowNumericLeadingZero: allows leading zeros in numbers (e.g. 00012). If None is
-                                        set, it uses the default value, ``false``.
-        :param allowBackslashEscapingAnyCharacter: allows accepting quoting of all character
-                                                   using backslash quoting mechanism. If None is
-                                                   set, it uses the default value, ``false``.
-        :param mode: allows a mode for dealing with corrupt records during parsing. If None is
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        path : str, list or :class:`RDD`
+            string represents path to the JSON dataset, or a list of paths,
+            or RDD of Strings storing JSON objects.
+        schema : :class:`pyspark.sql.types.StructType` or str, optional
+            an optional :class:`pyspark.sql.types.StructType` for the input schema or
+            a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
+        primitivesAsString : str or bool, optional
+            infers all primitive values as a string type. If None is set,
+            it uses the default value, ``false``.
+        prefersDecimal : str or bool, optional
+            infers all floating-point values as a decimal type. If the values
+            do not fit in decimal, then it infers them as doubles. If None is
+            set, it uses the default value, ``false``.
+        allowComments : str or bool, optional
+            ignores Java/C++ style comment in JSON records. If None is set,
+            it uses the default value, ``false``.
+        allowUnquotedFieldNames : str or bool, optional
+            allows unquoted JSON field names. If None is set,
+            it uses the default value, ``false``.
+        allowSingleQuotes : str or bool, optional
+            allows single quotes in addition to double quotes. If None is
+            set, it uses the default value, ``true``.
+        allowNumericLeadingZero : str or bool, optional
+            allows leading zeros in numbers (e.g. 00012). If None is
+            set, it uses the default value, ``false``.
+        allowBackslashEscapingAnyCharacter : str or bool, optional
+            allows accepting quoting of all character
+            using backslash quoting mechanism. If None is
+            set, it uses the default value, ``false``.
+        mode : str, optional
+            allows a mode for dealing with corrupt records during parsing. If None is
                      set, it uses the default value, ``PERMISSIVE``.
 
-                * ``PERMISSIVE``: when it meets a corrupted record, puts the malformed string \
-                  into a field configured by ``columnNameOfCorruptRecord``, and sets malformed \
-                  fields to ``null``. To keep corrupt records, an user can set a string type \
-                  field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \
-                  schema does not have the field, it drops corrupt records during parsing. \
-                  When inferring a schema, it implicitly adds a ``columnNameOfCorruptRecord`` \
-                  field in an output schema.
-                *  ``DROPMALFORMED``: ignores the whole corrupted records.
-                *  ``FAILFAST``: throws an exception when it meets corrupted records.
-
-        :param columnNameOfCorruptRecord: allows renaming the new field having malformed string
-                                          created by ``PERMISSIVE`` mode. This overrides
-                                          ``spark.sql.columnNameOfCorruptRecord``. If None is set,
-                                          it uses the value specified in
-                                          ``spark.sql.columnNameOfCorruptRecord``.
-        :param dateFormat: sets the string that indicates a date format. Custom date formats
-                           follow the formats at `datetime pattern`_.
-                           This applies to date type. If None is set, it uses the
-                           default value, ``yyyy-MM-dd``.
-        :param timestampFormat: sets the string that indicates a timestamp format.
-                                Custom date formats follow the formats at `datetime pattern`_.
-                                This applies to timestamp type. If None is set, it uses the
-                                default value, ``yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]``.
-        :param multiLine: parse one record, which may span multiple lines, per file. If None is
-                          set, it uses the default value, ``false``.
-        :param allowUnquotedControlChars: allows JSON Strings to contain unquoted control
-                                          characters (ASCII characters with value less than 32,
-                                          including tab and line feed characters) or not.
-        :param encoding: allows to forcibly set one of standard basic or extended encoding for
-                         the JSON files. For example UTF-16BE, UTF-32LE. If None is set,
-                         the encoding of input JSON will be detected automatically
-                         when the multiLine option is set to ``true``.
-        :param lineSep: defines the line separator that should be used for parsing. If None is
-                        set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``.
-        :param samplingRatio: defines fraction of input JSON objects used for schema inferring.
-                              If None is set, it uses the default value, ``1.0``.
-        :param dropFieldIfAllNull: whether to ignore column of all null values or empty
-                                   array/struct during schema inference. If None is set, it
-                                   uses the default value, ``false``.
-        :param locale: sets a locale as language tag in IETF BCP 47 format. If None is set,
-                       it uses the default value, ``en-US``. For instance, ``locale`` is used while
-                       parsing dates and timestamps.
-        :param pathGlobFilter: an optional glob pattern to only include files with paths matching
-                               the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
-                               It does not change the behavior of `partition discovery`_.
-        :param recursiveFileLookup: recursively scan a directory for files. Using this option
-                                    disables `partition discovery`_.
-        :param allowNonNumericNumbers: allows JSON parser to recognize set of "Not-a-Number" (NaN)
-                                       tokens as legal floating number values. If None is set,
-                                       it uses the default value, ``true``.
+            * ``PERMISSIVE``: when it meets a corrupted record, puts the malformed string \
+              into a field configured by ``columnNameOfCorruptRecord``, and sets malformed \
+              fields to ``null``. To keep corrupt records, an user can set a string type \
+              field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \
+              schema does not have the field, it drops corrupt records during parsing. \
+              When inferring a schema, it implicitly adds a ``columnNameOfCorruptRecord`` \
+              field in an output schema.
+            *  ``DROPMALFORMED``: ignores the whole corrupted records.
+            *  ``FAILFAST``: throws an exception when it meets corrupted records.
+
+        columnNameOfCorruptRecord: str, optional
+            allows renaming the new field having malformed string
+            created by ``PERMISSIVE`` mode. This overrides
+            ``spark.sql.columnNameOfCorruptRecord``. If None is set,
+            it uses the value specified in
+            ``spark.sql.columnNameOfCorruptRecord``.
+        dateFormat : str, optional
+            sets the string that indicates a date format. Custom date formats
+            follow the formats at
+            `datetime pattern <https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html>`_.  # noqa
+            This applies to date type. If None is set, it uses the
+            default value, ``yyyy-MM-dd``.
+        timestampFormat : str, optional
+            sets the string that indicates a timestamp format.
+            Custom date formats follow the formats at
+            `datetime pattern <https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html>`_.  # noqa
+            This applies to timestamp type. If None is set, it uses the
+            default value, ``yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]``.
+        multiLine : str or bool, optional
+            parse one record, which may span multiple lines, per file. If None is
+            set, it uses the default value, ``false``.
+        allowUnquotedControlChars : str or bool, optional
+            allows JSON Strings to contain unquoted control
+            characters (ASCII characters with value less than 32,
+            including tab and line feed characters) or not.
+        encoding : str or bool, optional
+            allows to forcibly set one of standard basic or extended encoding for
+            the JSON files. For example UTF-16BE, UTF-32LE. If None is set,
+            the encoding of input JSON will be detected automatically
+            when the multiLine option is set to ``true``.
+        lineSep : str, optional
+            defines the line separator that should be used for parsing. If None is
+            set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``.
+        samplingRatio : str or float, optional
+            defines fraction of input JSON objects used for schema inferring.
+            If None is set, it uses the default value, ``1.0``.
+        dropFieldIfAllNull : str or bool, optional
+            whether to ignore column of all null values or empty
+            array/struct during schema inference. If None is set, it
+            uses the default value, ``false``.
+        locale : str, optional
+            sets a locale as language tag in IETF BCP 47 format. If None is set,
+            it uses the default value, ``en-US``. For instance, ``locale`` is used while
+            parsing dates and timestamps.
+        pathGlobFilter : str or bool, optional
+            an optional glob pattern to only include files with paths matching
+            the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
+            It does not change the behavior of
+            `partition discovery <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery>`_.  # noqa
+        recursiveFileLookup : str or bool, optional
+            recursively scan a directory for files. Using this option
+            disables
+            `partition discovery <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery>`_.  # noqa
+        allowNonNumericNumbers : str or bool
+            allows JSON parser to recognize set of "Not-a-Number" (NaN)
+            tokens as legal floating number values. If None is set,
+            it uses the default value, ``true``.
 
                 * ``+INF``: for positive infinity, as well as alias of
                             ``+Infinity`` and ``Infinity``.
                 *  ``-INF``: for negative infinity, alias ``-Infinity``.
                 *  ``NaN``: for other not-a-numbers, like result of division by zero.
 
-        .. _partition discovery:
-          https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery
-        .. _datetime pattern: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
-
+        Examples
+        --------
         >>> df1 = spark.read.json('python/test_support/sql/people.json')
         >>> df1.dtypes
         [('age', 'bigint'), ('name', 'string')]
@@ -317,12 +364,18 @@ def func(iterator):
         else:
             raise TypeError("path can be only string, list or RDD")
 
-    @since(1.4)
     def table(self, tableName):
         """Returns the specified table as a :class:`DataFrame`.
 
-        :param tableName: string, name of the table.
+        .. versionadded:: 1.4.0
 
+        Parameters
+        ----------
+        tableName : str
+            string, name of the table.
+
+        Examples
+        --------
         >>> df = spark.read.parquet('python/test_support/sql/parquet_partitioned')
         >>> df.createOrReplaceTempView('tmpTable')
         >>> spark.read.table('tmpTable').dtypes
@@ -330,24 +383,35 @@ def table(self, tableName):
         """
         return self._df(self._jreader.table(tableName))
 
-    @since(1.4)
     def parquet(self, *paths, **options):
         """
         Loads Parquet files, returning the result as a :class:`DataFrame`.
 
-        :param mergeSchema: sets whether we should merge schemas collected from all
-                            Parquet part-files. This will override
-                            ``spark.sql.parquet.mergeSchema``. The default value is specified in
-                            ``spark.sql.parquet.mergeSchema``.
-        :param pathGlobFilter: an optional glob pattern to only include files with paths matching
-                               the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
-                               It does not change the behavior of `partition discovery`_.
-        :param recursiveFileLookup: recursively scan a directory for files. Using this option
-                                    disables `partition discovery`_.
-
-        .. _partition discovery:
-          https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery
-
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        paths : str
+
+        Other Parameters
+        ----------------
+        mergeSchema : str or bool, optional
+            sets whether we should merge schemas collected from all
+            Parquet part-files. This will override
+            ``spark.sql.parquet.mergeSchema``. The default value is specified in
+            ``spark.sql.parquet.mergeSchema``.
+        pathGlobFilter : str or bool, optional
+            an optional glob pattern to only include files with paths matching
+            the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
+            It does not change the behavior of
+            `partition discovery <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery>`_.  # noqa
+        recursiveFileLookup : str or bool, optional
+            recursively scan a directory for files. Using this option
+            disables
+            `partition discovery <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery>`_.  # noqa
+
+        Examples
+        --------
         >>> df = spark.read.parquet('python/test_support/sql/parquet_partitioned')
         >>> df.dtypes
         [('name', 'string'), ('year', 'int'), ('month', 'int'), ('day', 'int')]
@@ -359,7 +423,6 @@ def parquet(self, *paths, **options):
                        recursiveFileLookup=recursiveFileLookup)
         return self._df(self._jreader.parquet(_to_seq(self._spark._sc, paths)))
 
-    @since(1.6)
     def text(self, paths, wholetext=False, lineSep=None, pathGlobFilter=None,
              recursiveFileLookup=None):
         """
@@ -370,19 +433,28 @@ def text(self, paths, wholetext=False, lineSep=None, pathGlobFilter=None,
 
         By default, each line in the text file is a new row in the resulting DataFrame.
 
-        :param paths: string, or list of strings, for input path(s).
-        :param wholetext: if true, read each file from input path(s) as a single row.
-        :param lineSep: defines the line separator that should be used for parsing. If None is
-                        set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``.
-        :param pathGlobFilter: an optional glob pattern to only include files with paths matching
-                               the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
-                               It does not change the behavior of `partition discovery`_.
-        :param recursiveFileLookup: recursively scan a directory for files. Using this option
-                                    disables `partition discovery`_.
-
-        .. _partition discovery:
-          https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery
-
+        .. versionadded:: 1.6.0
+
+        Parameters
+        ----------
+        paths : str or list
+            string, or list of strings, for input path(s).
+        wholetext : str or bool, optional
+            if true, read each file from input path(s) as a single row.
+        lineSep : str, optional
+            defines the line separator that should be used for parsing. If None is
+            set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``.
+        pathGlobFilter : str or bool, optional
+            an optional glob pattern to only include files with paths matching
+            the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
+            It does not change the behavior of
+            `partition discovery <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery>`_.  # noqa
+        recursiveFileLookup : str or bool, optional
+            recursively scan a directory for files. Using this option disables
+            `partition discovery <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery>`_.  # noqa
+
+        Examples
+        --------
         >>> df = spark.read.text('python/test_support/sql/text-test.txt')
         >>> df.collect()
         [Row(value='hello'), Row(value='this')]
@@ -397,7 +469,6 @@ def text(self, paths, wholetext=False, lineSep=None, pathGlobFilter=None,
             paths = [paths]
         return self._df(self._jreader.text(self._spark._sc._jvm.PythonUtils.toSeq(paths)))
 
-    @since(2.0)
     def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=None,
             comment=None, header=None, inferSchema=None, ignoreLeadingWhiteSpace=None,
             ignoreTrailingWhiteSpace=None, nullValue=None, nanValue=None, positiveInf=None,
@@ -412,119 +483,156 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
         ``inferSchema`` is enabled. To avoid going through the entire data once, disable
         ``inferSchema`` option or specify the schema explicitly using ``schema``.
 
-        :param path: string, or list of strings, for input path(s),
-                     or RDD of Strings storing CSV rows.
-        :param schema: an optional :class:`pyspark.sql.types.StructType` for the input schema
-                       or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
-        :param sep: sets a separator (one or more characters) for each field and value. If None is
-                    set, it uses the default value, ``,``.
-        :param encoding: decodes the CSV files by the given encoding type. If None is set,
-                         it uses the default value, ``UTF-8``.
-        :param quote: sets a single character used for escaping quoted values where the
-                      separator can be part of the value. If None is set, it uses the default
-                      value, ``"``. If you would like to turn off quotations, you need to set an
-                      empty string.
-        :param escape: sets a single character used for escaping quotes inside an already
-                       quoted value. If None is set, it uses the default value, ``\``.
-        :param comment: sets a single character used for skipping lines beginning with this
-                        character. By default (None), it is disabled.
-        :param header: uses the first line as names of columns. If None is set, it uses the
-                       default value, ``false``.
-                       .. note:: if the given path is a RDD of Strings, this header
-                       option will remove all lines same with the header if exists.
-
-        :param inferSchema: infers the input schema automatically from data. It requires one extra
-                       pass over the data. If None is set, it uses the default value, ``false``.
-        :param enforceSchema: If it is set to ``true``, the specified or inferred schema will be
-                              forcibly applied to datasource files, and headers in CSV files will be
-                              ignored. If the option is set to ``false``, the schema will be
-                              validated against all headers in CSV files or the first header in RDD
-                              if the ``header`` option is set to ``true``. Field names in the schema
-                              and column names in CSV headers are checked by their positions
-                              taking into account ``spark.sql.caseSensitive``. If None is set,
-                              ``true`` is used by default. Though the default value is ``true``,
-                              it is recommended to disable the ``enforceSchema`` option
-                              to avoid incorrect results.
-        :param ignoreLeadingWhiteSpace: A flag indicating whether or not leading whitespaces from
-                                        values being read should be skipped. If None is set, it
-                                        uses the default value, ``false``.
-        :param ignoreTrailingWhiteSpace: A flag indicating whether or not trailing whitespaces from
-                                         values being read should be skipped. If None is set, it
-                                         uses the default value, ``false``.
-        :param nullValue: sets the string representation of a null value. If None is set, it uses
-                          the default value, empty string. Since 2.0.1, this ``nullValue`` param
-                          applies to all supported types including the string type.
-        :param nanValue: sets the string representation of a non-number value. If None is set, it
-                         uses the default value, ``NaN``.
-        :param positiveInf: sets the string representation of a positive infinity value. If None
-                            is set, it uses the default value, ``Inf``.
-        :param negativeInf: sets the string representation of a negative infinity value. If None
-                            is set, it uses the default value, ``Inf``.
-        :param dateFormat: sets the string that indicates a date format. Custom date formats
-                           follow the formats at `datetime pattern`_.
-                           This applies to date type. If None is set, it uses the
-                           default value, ``yyyy-MM-dd``.
-        :param timestampFormat: sets the string that indicates a timestamp format.
-                                Custom date formats follow the formats at `datetime pattern`_.
-                                This applies to timestamp type. If None is set, it uses the
-                                default value, ``yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]``.
-        :param maxColumns: defines a hard limit of how many columns a record can have. If None is
-                           set, it uses the default value, ``20480``.
-        :param maxCharsPerColumn: defines the maximum number of characters allowed for any given
-                                  value being read. If None is set, it uses the default value,
-                                  ``-1`` meaning unlimited length.
-        :param maxMalformedLogPerPartition: this parameter is no longer used since Spark 2.2.0.
-                                            If specified, it is ignored.
-        :param mode: allows a mode for dealing with corrupt records during parsing. If None is
-                     set, it uses the default value, ``PERMISSIVE``. Note that Spark tries to
-                     parse only required columns in CSV under column pruning. Therefore, corrupt
-                     records can be different based on required set of fields. This behavior can
-                     be controlled by ``spark.sql.csv.parser.columnPruning.enabled``
-                     (enabled by default).
-
-                * ``PERMISSIVE``: when it meets a corrupted record, puts the malformed string \
-                  into a field configured by ``columnNameOfCorruptRecord``, and sets malformed \
-                  fields to ``null``. To keep corrupt records, an user can set a string type \
-                  field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \
-                  schema does not have the field, it drops corrupt records during parsing. \
-                  A record with less/more tokens than schema is not a corrupted record to CSV. \
-                  When it meets a record having fewer tokens than the length of the schema, \
-                  sets ``null`` to extra fields. When the record has more tokens than the \
-                  length of the schema, it drops extra tokens.
-                * ``DROPMALFORMED``: ignores the whole corrupted records.
-                * ``FAILFAST``: throws an exception when it meets corrupted records.
-
-        :param columnNameOfCorruptRecord: allows renaming the new field having malformed string
-                                          created by ``PERMISSIVE`` mode. This overrides
-                                          ``spark.sql.columnNameOfCorruptRecord``. If None is set,
-                                          it uses the value specified in
-                                          ``spark.sql.columnNameOfCorruptRecord``.
-        :param multiLine: parse records, which may span multiple lines. If None is
-                          set, it uses the default value, ``false``.
-        :param charToEscapeQuoteEscaping: sets a single character used for escaping the escape for
-                                          the quote character. If None is set, the default value is
-                                          escape character when escape and quote characters are
-                                          different, ``\0`` otherwise.
-        :param samplingRatio: defines fraction of rows used for schema inferring.
-                              If None is set, it uses the default value, ``1.0``.
-        :param emptyValue: sets the string representation of an empty value. If None is set, it uses
-                           the default value, empty string.
-        :param locale: sets a locale as language tag in IETF BCP 47 format. If None is set,
-                       it uses the default value, ``en-US``. For instance, ``locale`` is used while
-                       parsing dates and timestamps.
-        :param lineSep: defines the line separator that should be used for parsing. If None is
-                        set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``.
-                        Maximum length is 1 character.
-        :param pathGlobFilter: an optional glob pattern to only include files with paths matching
-                               the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
-                               It does not change the behavior of `partition discovery`_.
-        :param recursiveFileLookup: recursively scan a directory for files. Using this option
-                                    disables `partition discovery`_.
-
-        .. _partition discovery:
-          https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery
-        .. _datetime pattern: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
-
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        path : str or list
+            string, or list of strings, for input path(s),
+            or RDD of Strings storing CSV rows.
+        schema : :class:`pyspark.sql.types.StructType` or str, optional
+            an optional :class:`pyspark.sql.types.StructType` for the input schema
+            or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
+        sep : str, optional
+            sets a separator (one or more characters) for each field and value. If None is
+            set, it uses the default value, ``,``.
+        encoding : str, optional
+            decodes the CSV files by the given encoding type. If None is set,
+            it uses the default value, ``UTF-8``.
+        quote : str, optional
+            sets a single character used for escaping quoted values where the
+            separator can be part of the value. If None is set, it uses the default
+            value, ``"``. If you would like to turn off quotations, you need to set an
+            empty string.
+        escape : str, optional
+            sets a single character used for escaping quotes inside an already
+            quoted value. If None is set, it uses the default value, ``\``.
+        comment : str, optional
+            sets a single character used for skipping lines beginning with this
+            character. By default (None), it is disabled.
+        header : str or bool, optional
+            uses the first line as names of columns. If None is set, it uses the
+            default value, ``false``.
+
+            .. note:: if the given path is a RDD of Strings, this header
+                option will remove all lines same with the header if exists.
+
+        inferSchema : str or bool, optional
+            infers the input schema automatically from data. It requires one extra
+            pass over the data. If None is set, it uses the default value, ``false``.
+        enforceSchema : str or bool, optional
+            If it is set to ``true``, the specified or inferred schema will be
+            forcibly applied to datasource files, and headers in CSV files will be
+            ignored. If the option is set to ``false``, the schema will be
+            validated against all headers in CSV files or the first header in RDD
+            if the ``header`` option is set to ``true``. Field names in the schema
+            and column names in CSV headers are checked by their positions
+            taking into account ``spark.sql.caseSensitive``. If None is set,
+            ``true`` is used by default. Though the default value is ``true``,
+            it is recommended to disable the ``enforceSchema`` option
+            to avoid incorrect results.
+        ignoreLeadingWhiteSpace : str or bool, optional
+            A flag indicating whether or not leading whitespaces from
+            values being read should be skipped. If None is set, it
+            uses the default value, ``false``.
+        ignoreTrailingWhiteSpace : str or bool, optional
+            A flag indicating whether or not trailing whitespaces from
+            values being read should be skipped. If None is set, it
+            uses the default value, ``false``.
+        nullValue : str, optional
+            sets the string representation of a null value. If None is set, it uses
+            the default value, empty string. Since 2.0.1, this ``nullValue`` param
+            applies to all supported types including the string type.
+        nanValue : str, optional
+            sets the string representation of a non-number value. If None is set, it
+            uses the default value, ``NaN``.
+        positiveInf : str, optional
+            sets the string representation of a positive infinity value. If None
+            is set, it uses the default value, ``Inf``.
+        negativeInf : str, optional
+            sets the string representation of a negative infinity value. If None
+            is set, it uses the default value, ``Inf``.
+        dateFormat : str, optional
+            sets the string that indicates a date format. Custom date formats
+            follow the formats at
+            `datetime pattern <https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html>`_.  # noqa
+            This applies to date type. If None is set, it uses the
+            default value, ``yyyy-MM-dd``.
+        timestampFormat : str, optional
+            sets the string that indicates a timestamp format.
+            Custom date formats follow the formats at
+            `datetime pattern <https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html>`_.  # noqa
+            This applies to timestamp type. If None is set, it uses the
+            default value, ``yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]``.
+        maxColumns : str or int, optional
+            defines a hard limit of how many columns a record can have. If None is
+            set, it uses the default value, ``20480``.
+        maxCharsPerColumn : str or int, optional
+            defines the maximum number of characters allowed for any given
+            value being read. If None is set, it uses the default value,
+            ``-1`` meaning unlimited length.
+        maxMalformedLogPerPartition : str or int, optional
+            this parameter is no longer used since Spark 2.2.0.
+            If specified, it is ignored.
+        mode : str, optional
+            allows a mode for dealing with corrupt records during parsing. If None is
+            set, it uses the default value, ``PERMISSIVE``. Note that Spark tries to
+            parse only required columns in CSV under column pruning. Therefore, corrupt
+            records can be different based on required set of fields. This behavior can
+            be controlled by ``spark.sql.csv.parser.columnPruning.enabled``
+            (enabled by default).
+
+            * ``PERMISSIVE``: when it meets a corrupted record, puts the malformed string \
+              into a field configured by ``columnNameOfCorruptRecord``, and sets malformed \
+              fields to ``null``. To keep corrupt records, an user can set a string type \
+              field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \
+              schema does not have the field, it drops corrupt records during parsing. \
+              A record with less/more tokens than schema is not a corrupted record to CSV. \
+              When it meets a record having fewer tokens than the length of the schema, \
+              sets ``null`` to extra fields. When the record has more tokens than the \
+              length of the schema, it drops extra tokens.
+            * ``DROPMALFORMED``: ignores the whole corrupted records.
+            * ``FAILFAST``: throws an exception when it meets corrupted records.
+
+        columnNameOfCorruptRecord : str, optional
+            allows renaming the new field having malformed string
+            created by ``PERMISSIVE`` mode. This overrides
+            ``spark.sql.columnNameOfCorruptRecord``. If None is set,
+            it uses the value specified in
+            ``spark.sql.columnNameOfCorruptRecord``.
+        multiLine : str or bool, optional
+            parse records, which may span multiple lines. If None is
+            set, it uses the default value, ``false``.
+        charToEscapeQuoteEscaping : str, optional
+            sets a single character used for escaping the escape for
+            the quote character. If None is set, the default value is
+            escape character when escape and quote characters are
+            different, ``\0`` otherwise.
+        samplingRatio : str or float, optional
+            defines fraction of rows used for schema inferring.
+            If None is set, it uses the default value, ``1.0``.
+        emptyValue : str, optional
+            sets the string representation of an empty value. If None is set, it uses
+            the default value, empty string.
+        locale : str, optional
+            sets a locale as language tag in IETF BCP 47 format. If None is set,
+            it uses the default value, ``en-US``. For instance, ``locale`` is used while
+            parsing dates and timestamps.
+        lineSep : str, optional
+            defines the line separator that should be used for parsing. If None is
+            set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``.
+            Maximum length is 1 character.
+        pathGlobFilter : str or bool, optional
+            an optional glob pattern to only include files with paths matching
+            the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
+            It does not change the behavior of
+            `partition discovery <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery>`_.  # noqa
+        recursiveFileLookup : str or bool, optional
+            recursively scan a directory for files. Using this option disables
+            `partition discovery <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery>`_.  # noqa
+
+        Examples
+        --------
         >>> df = spark.read.csv('python/test_support/sql/ages.csv')
         >>> df.dtypes
         [('_c0', 'string'), ('_c1', 'string')]
@@ -571,22 +679,30 @@ def func(iterator):
         else:
             raise TypeError("path can be only string, list or RDD")
 
-    @since(1.5)
     def orc(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLookup=None):
         """Loads ORC files, returning the result as a :class:`DataFrame`.
 
-        :param mergeSchema: sets whether we should merge schemas collected from all
-                            ORC part-files. This will override ``spark.sql.orc.mergeSchema``.
-                            The default value is specified in ``spark.sql.orc.mergeSchema``.
-        :param pathGlobFilter: an optional glob pattern to only include files with paths matching
-                               the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
-                               It does not change the behavior of `partition discovery`_.
-        :param recursiveFileLookup: recursively scan a directory for files. Using this option
-                                    disables `partition discovery`_.
-
-        .. _partition discovery:
-          https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery
-
+        .. versionadded:: 1.5.0
+
+        Parameters
+        ----------
+        path : str or list
+        mergeSchema : str or bool, optional
+            sets whether we should merge schemas collected from all
+            ORC part-files. This will override ``spark.sql.orc.mergeSchema``.
+            The default value is specified in ``spark.sql.orc.mergeSchema``.
+        pathGlobFilter : str or bool
+            an optional glob pattern to only include files with paths matching
+            the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
+            It does not change the behavior of
+            `partition discovery <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery>`_.  # noqa
+        recursiveFileLookup : str or bool
+            recursively scan a directory for files. Using this option
+            disables
+            `partition discovery <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery>`_.  # noqa
+
+        Examples
+        --------
         >>> df = spark.read.orc('python/test_support/sql/orc_partitioned')
         >>> df.dtypes
         [('a', 'bigint'), ('b', 'int'), ('c', 'int')]
@@ -597,7 +713,6 @@ def orc(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLookup=N
             path = [path]
         return self._df(self._jreader.orc(_to_seq(self._spark._sc, path)))
 
-    @since(1.4)
     def jdbc(self, url, table, column=None, lowerBound=None, upperBound=None, numPartitions=None,
              predicates=None, properties=None):
         """
@@ -605,31 +720,48 @@ def jdbc(self, url, table, column=None, lowerBound=None, upperBound=None, numPar
         accessible via JDBC URL ``url`` and connection ``properties``.
 
         Partitions of the table will be retrieved in parallel if either ``column`` or
-        ``predicates`` is specified. ``lowerBound`, ``upperBound`` and ``numPartitions``
+        ``predicates`` is specified. ``lowerBound``, ``upperBound`` and ``numPartitions``
         is needed when ``column`` is specified.
 
         If both ``column`` and ``predicates`` are specified, ``column`` will be used.
 
-        .. note:: Don't create too many partitions in parallel on a large cluster;
-            otherwise Spark might crash your external database systems.
-
-        :param url: a JDBC URL of the form ``jdbc:subprotocol:subname``
-        :param table: the name of the table
-        :param column: the name of a column of numeric, date, or timestamp type
-                       that will be used for partitioning;
-                       if this parameter is specified, then ``numPartitions``, ``lowerBound``
-                       (inclusive), and ``upperBound`` (exclusive) will form partition strides
-                       for generated WHERE clause expressions used to split the column
-                       ``column`` evenly
-        :param lowerBound: the minimum value of ``column`` used to decide partition stride
-        :param upperBound: the maximum value of ``column`` used to decide partition stride
-        :param numPartitions: the number of partitions
-        :param predicates: a list of expressions suitable for inclusion in WHERE clauses;
-                           each one defines one partition of the :class:`DataFrame`
-        :param properties: a dictionary of JDBC database connection arguments. Normally at
-                           least properties "user" and "password" with their corresponding values.
-                           For example { 'user' : 'SYSTEM', 'password' : 'mypassword' }
-        :return: a DataFrame
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        url : str
+            a JDBC URL of the form ``jdbc:subprotocol:subname``
+        table : str
+            the name of the table
+        column : str, optional
+            the name of a column of numeric, date, or timestamp type
+            that will be used for partitioning;
+            if this parameter is specified, then ``numPartitions``, ``lowerBound``
+            (inclusive), and ``upperBound`` (exclusive) will form partition strides
+            for generated WHERE clause expressions used to split the column
+            ``column`` evenly
+        lowerBound : str or int, optional
+            the minimum value of ``column`` used to decide partition stride
+        upperBound : str or int, optional
+            the maximum value of ``column`` used to decide partition stride
+        numPartitions : int, optional
+            the number of partitions
+        predicates : list, optional
+            a list of expressions suitable for inclusion in WHERE clauses;
+            each one defines one partition of the :class:`DataFrame`
+        properties : dict, optional
+            a dictionary of JDBC database connection arguments. Normally at
+            least properties "user" and "password" with their corresponding values.
+            For example { 'user' : 'SYSTEM', 'password' : 'mypassword' }
+
+        Notes
+        -----
+        Don't create too many partitions in parallel on a large cluster;
+        otherwise Spark might crash your external database systems.
+
+        Returns
+        -------
+        :class:`DataFrame`
         """
         if properties is None:
             properties = dict()
@@ -667,7 +799,6 @@ def _sq(self, jsq):
         from pyspark.sql.streaming import StreamingQuery
         return StreamingQuery(jsq)
 
-    @since(1.4)
     def mode(self, saveMode):
         """Specifies the behavior when data or table already exists.
 
@@ -678,6 +809,10 @@ def mode(self, saveMode):
         * `error` or `errorifexists`: Throw an exception if data already exists.
         * `ignore`: Silently ignore this operation if data already exists.
 
+        .. versionadded:: 1.4.0
+
+        Examples
+        --------
         >>> df.write.mode('append').parquet(os.path.join(tempfile.mkdtemp(), 'data'))
         """
         # At the JVM side, the default value of mode is already set to "error".
@@ -686,12 +821,18 @@ def mode(self, saveMode):
             self._jwrite = self._jwrite.mode(saveMode)
         return self
 
-    @since(1.4)
     def format(self, source):
         """Specifies the underlying output data source.
 
-        :param source: string, name of the data source, e.g. 'json', 'parquet'.
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        source : str
+            string, name of the data source, e.g. 'json', 'parquet'.
 
+        Examples
+        --------
         >>> df.write.format('json').save(os.path.join(tempfile.mkdtemp(), 'data'))
         """
         self._jwrite = self._jwrite.format(source)
@@ -740,15 +881,21 @@ def options(self, **options):
             self._jwrite = self._jwrite.option(k, to_str(options[k]))
         return self
 
-    @since(1.4)
     def partitionBy(self, *cols):
         """Partitions the output by the given columns on the file system.
 
         If specified, the output is laid out on the file system similar
         to Hive's partitioning scheme.
 
-        :param cols: name of columns
+        .. versionadded:: 1.4.0
 
+        Parameters
+        ----------
+        cols : str or list
+            name of columns
+
+        Examples
+        --------
         >>> df.write.partitionBy('year', 'month').parquet(os.path.join(tempfile.mkdtemp(), 'data'))
         """
         if len(cols) == 1 and isinstance(cols[0], (list, tuple)):
@@ -756,18 +903,28 @@ def partitionBy(self, *cols):
         self._jwrite = self._jwrite.partitionBy(_to_seq(self._spark._sc, cols))
         return self
 
-    @since(2.3)
     def bucketBy(self, numBuckets, col, *cols):
         """Buckets the output by the given columns.If specified,
         the output is laid out on the file system similar to Hive's bucketing scheme.
 
-        :param numBuckets: the number of buckets to save
-        :param col: a name of a column, or a list of names.
-        :param cols: additional names (optional). If `col` is a list it should be empty.
+        .. versionadded:: 2.3.0
+
+        Parameters
+        ----------
+        numBuckets : int
+            the number of buckets to save
+        col : str, list or tuple
+            a name of a column, or a list of names.
+        cols : str
+            additional names (optional). If `col` is a list it should be empty.
 
-        .. note:: Applicable for file-based data sources in combination with
-                  :py:meth:`DataFrameWriter.saveAsTable`.
+        Notes
+        -----
+        Applicable for file-based data sources in combination with
+        :py:meth:`DataFrameWriter.saveAsTable`.
 
+        Examples
+        --------
         >>> (df.write.format('parquet')  # doctest: +SKIP
         ...     .bucketBy(100, 'year', 'month')
         ...     .mode("overwrite")
@@ -788,13 +945,20 @@ def bucketBy(self, numBuckets, col, *cols):
         self._jwrite = self._jwrite.bucketBy(numBuckets, col, _to_seq(self._spark._sc, cols))
         return self
 
-    @since(2.3)
     def sortBy(self, col, *cols):
         """Sorts the output in each bucket by the given columns on the file system.
 
-        :param col: a name of a column, or a list of names.
-        :param cols: additional names (optional). If `col` is a list it should be empty.
+        .. versionadded:: 2.3.0
+
+        Parameters
+        ----------
+        col : str, tuple or list
+            a name of a column, or a list of names.
+        cols : str
+            additional names (optional). If `col` is a list it should be empty.
 
+        Examples
+        --------
         >>> (df.write.format('parquet')  # doctest: +SKIP
         ...     .bucketBy(100, 'year', 'month')
         ...     .sortBy('day')
@@ -813,7 +977,6 @@ def sortBy(self, col, *cols):
         self._jwrite = self._jwrite.sortBy(col, _to_seq(self._spark._sc, cols))
         return self
 
-    @since(1.4)
     def save(self, path=None, format=None, mode=None, partitionBy=None, **options):
         """Saves the contents of the :class:`DataFrame` to a data source.
 
@@ -821,18 +984,29 @@ def save(self, path=None, format=None, mode=None, partitionBy=None, **options):
         If ``format`` is not specified, the default data source configured by
         ``spark.sql.sources.default`` will be used.
 
-        :param path: the path in a Hadoop supported file system
-        :param format: the format used to save
-        :param mode: specifies the behavior of the save operation when data already exists.
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        path : str, optional
+            the path in a Hadoop supported file system
+        format : str, optional
+            the format used to save
+        mode : str, optional
+            specifies the behavior of the save operation when data already exists.
 
             * ``append``: Append contents of this :class:`DataFrame` to existing data.
             * ``overwrite``: Overwrite existing data.
             * ``ignore``: Silently ignore this operation if data already exists.
             * ``error`` or ``errorifexists`` (default case): Throw an exception if data already \
                 exists.
-        :param partitionBy: names of partitioning columns
-        :param options: all other string options
+        partitionBy : list, optional
+            names of partitioning columns
+        **options : dict
+            all other string options
 
+        Examples
+        --------
         >>> df.write.mode("append").save(os.path.join(tempfile.mkdtemp(), 'data'))
         """
         self.mode(mode).options(**options)
@@ -858,7 +1032,6 @@ def insertInto(self, tableName, overwrite=None):
             self.mode("overwrite" if overwrite else "append")
         self._jwrite.insertInto(tableName)
 
-    @since(1.4)
     def saveAsTable(self, name, format=None, mode=None, partitionBy=None, **options):
         """Saves the content of the :class:`DataFrame` as the specified table.
 
@@ -872,12 +1045,21 @@ def saveAsTable(self, name, format=None, mode=None, partitionBy=None, **options)
         * `error` or `errorifexists`: Throw an exception if data already exists.
         * `ignore`: Silently ignore this operation if data already exists.
 
-        :param name: the table name
-        :param format: the format used to save
-        :param mode: one of `append`, `overwrite`, `error`, `errorifexists`, `ignore` \
-                     (default: error)
-        :param partitionBy: names of partitioning columns
-        :param options: all other string options
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        name : str
+            the table name
+        format : str, optional
+            the format used to save
+        mode : str, optional
+            one of `append`, `overwrite`, `error`, `errorifexists`, `ignore` \
+            (default: error)
+        partitionBy : str or list
+            names of partitioning columns
+        **options : dict
+            all other string options
         """
         self.mode(mode).options(**options)
         if partitionBy is not None:
@@ -886,41 +1068,53 @@ def saveAsTable(self, name, format=None, mode=None, partitionBy=None, **options)
             self.format(format)
         self._jwrite.saveAsTable(name)
 
-    @since(1.4)
     def json(self, path, mode=None, compression=None, dateFormat=None, timestampFormat=None,
              lineSep=None, encoding=None, ignoreNullFields=None):
         """Saves the content of the :class:`DataFrame` in JSON format
         (`JSON Lines text format or newline-delimited JSON <http://jsonlines.org/>`_) at the
         specified path.
 
-        :param path: the path in any Hadoop supported file system
-        :param mode: specifies the behavior of the save operation when data already exists.
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        path : str
+            the path in any Hadoop supported file system
+        mode : str, optional
+            specifies the behavior of the save operation when data already exists.
 
             * ``append``: Append contents of this :class:`DataFrame` to existing data.
             * ``overwrite``: Overwrite existing data.
             * ``ignore``: Silently ignore this operation if data already exists.
             * ``error`` or ``errorifexists`` (default case): Throw an exception if data already \
                 exists.
-        :param compression: compression codec to use when saving to file. This can be one of the
-                            known case-insensitive shorten names (none, bzip2, gzip, lz4,
-                            snappy and deflate).
-        :param dateFormat: sets the string that indicates a date format. Custom date formats
-                           follow the formats at `datetime pattern`_.
-                           This applies to date type. If None is set, it uses the
-                           default value, ``yyyy-MM-dd``.
-        :param timestampFormat: sets the string that indicates a timestamp format.
-                                Custom date formats follow the formats at `datetime pattern`_.
-                                This applies to timestamp type. If None is set, it uses the
-                                default value, ``yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]``.
-        :param encoding: specifies encoding (charset) of saved json files. If None is set,
-                        the default UTF-8 charset will be used.
-        :param lineSep: defines the line separator that should be used for writing. If None is
-                        set, it uses the default value, ``\\n``.
-        :param ignoreNullFields: Whether to ignore null fields when generating JSON objects.
-                        If None is set, it uses the default value, ``true``.
-
-        .. _datetime pattern: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
-
+        compression : str, optional
+            compression codec to use when saving to file. This can be one of the
+            known case-insensitive shorten names (none, bzip2, gzip, lz4,
+            snappy and deflate).
+        dateFormat : str, optional
+            sets the string that indicates a date format. Custom date formats
+            follow the formats at
+            `datetime pattern <https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html>`_.  # noqa
+            This applies to date type. If None is set, it uses the
+            default value, ``yyyy-MM-dd``.
+        timestampFormat : str, optional
+            sets the string that indicates a timestamp format.
+            Custom date formats follow the formats at
+            `datetime pattern <https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html>`_.  # noqa
+            This applies to timestamp type. If None is set, it uses the
+            default value, ``yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]``.
+        encoding : str, optional
+            specifies encoding (charset) of saved json files. If None is set,
+            the default UTF-8 charset will be used.
+        lineSep : str, optional defines the line separator that should be used for writing. If None is
+            set, it uses the default value, ``\\n``.
+        ignoreNullFields : str or bool, optional
+            Whether to ignore null fields when generating JSON objects.
+            If None is set, it uses the default value, ``true``.
+
+        Examples
+        --------
         >>> df.write.json(os.path.join(tempfile.mkdtemp(), 'data'))
         """
         self.mode(mode)
@@ -929,25 +1123,34 @@ def json(self, path, mode=None, compression=None, dateFormat=None, timestampForm
             lineSep=lineSep, encoding=encoding, ignoreNullFields=ignoreNullFields)
         self._jwrite.json(path)
 
-    @since(1.4)
     def parquet(self, path, mode=None, partitionBy=None, compression=None):
         """Saves the content of the :class:`DataFrame` in Parquet format at the specified path.
 
-        :param path: the path in any Hadoop supported file system
-        :param mode: specifies the behavior of the save operation when data already exists.
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        path : str
+            the path in any Hadoop supported file system
+        mode : str, optional
+            specifies the behavior of the save operation when data already exists.
 
             * ``append``: Append contents of this :class:`DataFrame` to existing data.
             * ``overwrite``: Overwrite existing data.
             * ``ignore``: Silently ignore this operation if data already exists.
             * ``error`` or ``errorifexists`` (default case): Throw an exception if data already \
                 exists.
-        :param partitionBy: names of partitioning columns
-        :param compression: compression codec to use when saving to file. This can be one of the
-                            known case-insensitive shorten names (none, uncompressed, snappy, gzip,
-                            lzo, brotli, lz4, and zstd). This will override
-                            ``spark.sql.parquet.compression.codec``. If None is set, it uses the
-                            value specified in ``spark.sql.parquet.compression.codec``.
-
+        partitionBy : str or list, optional
+            names of partitioning columns
+        compression : str, optional
+            compression codec to use when saving to file. This can be one of the
+            known case-insensitive shorten names (none, uncompressed, snappy, gzip,
+            lzo, brotli, lz4, and zstd). This will override
+            ``spark.sql.parquet.compression.codec``. If None is set, it uses the
+            value specified in ``spark.sql.parquet.compression.codec``.
+
+        Examples
+        --------
         >>> df.write.parquet(os.path.join(tempfile.mkdtemp(), 'data'))
         """
         self.mode(mode)
@@ -956,17 +1159,23 @@ def parquet(self, path, mode=None, partitionBy=None, compression=None):
         self._set_opts(compression=compression)
         self._jwrite.parquet(path)
 
-    @since(1.6)
     def text(self, path, compression=None, lineSep=None):
         """Saves the content of the DataFrame in a text file at the specified path.
         The text files will be encoded as UTF-8.
 
-        :param path: the path in any Hadoop supported file system
-        :param compression: compression codec to use when saving to file. This can be one of the
-                            known case-insensitive shorten names (none, bzip2, gzip, lz4,
-                            snappy and deflate).
-        :param lineSep: defines the line separator that should be used for writing. If None is
-                        set, it uses the default value, ``\\n``.
+        .. versionadded:: 1.6.0
+
+        Parameters
+        ----------
+        path : str
+            the path in any Hadoop supported file system
+        compression : str, optional
+            compression codec to use when saving to file. This can be one of the
+            known case-insensitive shorten names (none, bzip2, gzip, lz4,
+            snappy and deflate).
+        lineSep : str, optional
+            defines the line separator that should be used for writing. If None is
+            set, it uses the default value, ``\\n``.
 
         The DataFrame must have only one column that is of string type.
         Each row becomes a new line in the output file.
@@ -974,15 +1183,20 @@ def text(self, path, compression=None, lineSep=None):
         self._set_opts(compression=compression, lineSep=lineSep)
         self._jwrite.text(path)
 
-    @since(2.0)
     def csv(self, path, mode=None, compression=None, sep=None, quote=None, escape=None,
             header=None, nullValue=None, escapeQuotes=None, quoteAll=None, dateFormat=None,
             timestampFormat=None, ignoreLeadingWhiteSpace=None, ignoreTrailingWhiteSpace=None,
             charToEscapeQuoteEscaping=None, encoding=None, emptyValue=None, lineSep=None):
         r"""Saves the content of the :class:`DataFrame` in CSV format at the specified path.
 
-        :param path: the path in any Hadoop supported file system
-        :param mode: specifies the behavior of the save operation when data already exists.
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        path : str
+            the path in any Hadoop supported file system
+        mode : str, optional
+            specifies the behavior of the save operation when data already exists.
 
             * ``append``: Append contents of this :class:`DataFrame` to existing data.
             * ``overwrite``: Overwrite existing data.
@@ -990,53 +1204,71 @@ def csv(self, path, mode=None, compression=None, sep=None, quote=None, escape=No
             * ``error`` or ``errorifexists`` (default case): Throw an exception if data already \
                 exists.
 
-        :param compression: compression codec to use when saving to file. This can be one of the
-                            known case-insensitive shorten names (none, bzip2, gzip, lz4,
-                            snappy and deflate).
-        :param sep: sets a separator (one or more characters) for each field and value. If None is
-                    set, it uses the default value, ``,``.
-        :param quote: sets a single character used for escaping quoted values where the
-                      separator can be part of the value. If None is set, it uses the default
-                      value, ``"``. If an empty string is set, it uses ``u0000`` (null character).
-        :param escape: sets a single character used for escaping quotes inside an already
-                       quoted value. If None is set, it uses the default value, ``\``
-        :param escapeQuotes: a flag indicating whether values containing quotes should always
-                             be enclosed in quotes. If None is set, it uses the default value
-                             ``true``, escaping all values containing a quote character.
-        :param quoteAll: a flag indicating whether all values should always be enclosed in
-                          quotes. If None is set, it uses the default value ``false``,
-                          only escaping values containing a quote character.
-        :param header: writes the names of columns as the first line. If None is set, it uses
-                       the default value, ``false``.
-        :param nullValue: sets the string representation of a null value. If None is set, it uses
-                          the default value, empty string.
-        :param dateFormat: sets the string that indicates a date format. Custom date formats follow
-                           the formats at `datetime pattern`_.
-                           This applies to date type. If None is set, it uses the
-                           default value, ``yyyy-MM-dd``.
-        :param timestampFormat: sets the string that indicates a timestamp format.
-                                Custom date formats follow the formats at `datetime pattern`_.
-                                This applies to timestamp type. If None is set, it uses the
-                                default value, ``yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]``.
-        :param ignoreLeadingWhiteSpace: a flag indicating whether or not leading whitespaces from
-                                        values being written should be skipped. If None is set, it
-                                        uses the default value, ``true``.
-        :param ignoreTrailingWhiteSpace: a flag indicating whether or not trailing whitespaces from
-                                         values being written should be skipped. If None is set, it
-                                         uses the default value, ``true``.
-        :param charToEscapeQuoteEscaping: sets a single character used for escaping the escape for
-                                          the quote character. If None is set, the default value is
-                                          escape character when escape and quote characters are
-                                          different, ``\0`` otherwise..
-        :param encoding: sets the encoding (charset) of saved csv files. If None is set,
-                         the default UTF-8 charset will be used.
-        :param emptyValue: sets the string representation of an empty value. If None is set, it uses
-                           the default value, ``""``.
-        :param lineSep: defines the line separator that should be used for writing. If None is
-                        set, it uses the default value, ``\\n``. Maximum length is 1 character.
-
-        .. _datetime pattern: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
-
+        compression : str, optional
+            compression codec to use when saving to file. This can be one of the
+            known case-insensitive shorten names (none, bzip2, gzip, lz4,
+            snappy and deflate).
+        sep : str, optional
+            sets a separator (one or more characters) for each field and value. If None is
+            set, it uses the default value, ``,``.
+        quote : str, optional
+            sets a single character used for escaping quoted values where the
+            separator can be part of the value. If None is set, it uses the default
+            value, ``"``. If an empty string is set, it uses ``u0000`` (null character).
+        escape : str, optional
+            sets a single character used for escaping quotes inside an already
+            quoted value. If None is set, it uses the default value, ``\``
+        escapeQuotes : str or bool, optional
+            a flag indicating whether values containing quotes should always
+            be enclosed in quotes. If None is set, it uses the default value
+            ``true``, escaping all values containing a quote character.
+        quoteAll : str or bool, optional
+            a flag indicating whether all values should always be enclosed in
+            quotes. If None is set, it uses the default value ``false``,
+            only escaping values containing a quote character.
+        header : str or bool, optional
+            writes the names of columns as the first line. If None is set, it uses
+            the default value, ``false``.
+        nullValue : str, optional
+            sets the string representation of a null value. If None is set, it uses
+            the default value, empty string.
+        dateFormat : str, optional
+            sets the string that indicates a date format. Custom date formats follow
+            the formats at
+            `datetime pattern <https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html>`_.  # noqa
+            This applies to date type. If None is set, it uses the
+            default value, ``yyyy-MM-dd``.
+        timestampFormat : str, optional
+            sets the string that indicates a timestamp format.
+            Custom date formats follow the formats at
+            `datetime pattern <https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html>`_.  # noqa
+            This applies to timestamp type. If None is set, it uses the
+            default value, ``yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]``.
+        ignoreLeadingWhiteSpace : str or bool, optional
+            a flag indicating whether or not leading whitespaces from
+            values being written should be skipped. If None is set, it
+            uses the default value, ``true``.
+        ignoreTrailingWhiteSpace : str or bool, optional
+            a flag indicating whether or not trailing whitespaces from
+            values being written should be skipped. If None is set, it
+            uses the default value, ``true``.
+        charToEscapeQuoteEscaping : str, optional
+            sets a single character used for escaping the escape for
+            the quote character. If None is set, the default value is
+            escape character when escape and quote characters are
+            different, ``\0`` otherwise..
+        encoding : str, optional
+            sets the encoding (charset) of saved csv files. If None is set,
+            the default UTF-8 charset will be used.
+        emptyValue : str, optional
+            sets the string representation of an empty value. If None is set, it uses
+            the default value, ``""``.
+        lineSep : str, optional
+            defines the line separator that should be used for writing. If None is
+            set, it uses the default value, ``\\n``. Maximum length is 1 character.
+
+        Examples
+        --------
         >>> df.write.csv(os.path.join(tempfile.mkdtemp(), 'data'))
         """
         self.mode(mode)
@@ -1049,25 +1281,34 @@ def csv(self, path, mode=None, compression=None, sep=None, quote=None, escape=No
                        encoding=encoding, emptyValue=emptyValue, lineSep=lineSep)
         self._jwrite.csv(path)
 
-    @since(1.5)
     def orc(self, path, mode=None, partitionBy=None, compression=None):
         """Saves the content of the :class:`DataFrame` in ORC format at the specified path.
 
-        :param path: the path in any Hadoop supported file system
-        :param mode: specifies the behavior of the save operation when data already exists.
+        .. versionadded:: 1.5.0
+
+        Parameters
+        ----------
+        path : str
+            the path in any Hadoop supported file system
+        mode : str, optional
+            specifies the behavior of the save operation when data already exists.
 
             * ``append``: Append contents of this :class:`DataFrame` to existing data.
             * ``overwrite``: Overwrite existing data.
             * ``ignore``: Silently ignore this operation if data already exists.
             * ``error`` or ``errorifexists`` (default case): Throw an exception if data already \
                 exists.
-        :param partitionBy: names of partitioning columns
-        :param compression: compression codec to use when saving to file. This can be one of the
-                            known case-insensitive shorten names (none, snappy, zlib, and lzo).
-                            This will override ``orc.compress`` and
-                            ``spark.sql.orc.compression.codec``. If None is set, it uses the value
-                            specified in ``spark.sql.orc.compression.codec``.
-
+        partitionBy : str or list, optional
+            names of partitioning columns
+        compression : str, optional
+            compression codec to use when saving to file. This can be one of the
+            known case-insensitive shorten names (none, snappy, zlib, and lzo).
+            This will override ``orc.compress`` and
+            ``spark.sql.orc.compression.codec``. If None is set, it uses the value
+            specified in ``spark.sql.orc.compression.codec``.
+
+        Examples
+        --------
         >>> orc_df = spark.read.orc('python/test_support/sql/orc_partitioned')
         >>> orc_df.write.orc(os.path.join(tempfile.mkdtemp(), 'data'))
         """
@@ -1077,25 +1318,34 @@ def orc(self, path, mode=None, partitionBy=None, compression=None):
         self._set_opts(compression=compression)
         self._jwrite.orc(path)
 
-    @since(1.4)
     def jdbc(self, url, table, mode=None, properties=None):
         """Saves the content of the :class:`DataFrame` to an external database table via JDBC.
 
-        .. note:: Don't create too many partitions in parallel on a large cluster;
-            otherwise Spark might crash your external database systems.
+        .. versionadded:: 1.4.0
 
-        :param url: a JDBC URL of the form ``jdbc:subprotocol:subname``
-        :param table: Name of the table in the external database.
-        :param mode: specifies the behavior of the save operation when data already exists.
+        Parameters
+        ----------
+        url : str
+            a JDBC URL of the form ``jdbc:subprotocol:subname``
+        table : str
+            Name of the table in the external database.
+        mode : str, optional
+            specifies the behavior of the save operation when data already exists.
 
             * ``append``: Append contents of this :class:`DataFrame` to existing data.
             * ``overwrite``: Overwrite existing data.
             * ``ignore``: Silently ignore this operation if data already exists.
             * ``error`` or ``errorifexists`` (default case): Throw an exception if data already \
                 exists.
-        :param properties: a dictionary of JDBC database connection arguments. Normally at
-                           least properties "user" and "password" with their corresponding values.
-                           For example { 'user' : 'SYSTEM', 'password' : 'mypassword' }
+        properties : dict
+            a dictionary of JDBC database connection arguments. Normally at
+            least properties "user" and "password" with their corresponding values.
+            For example { 'user' : 'SYSTEM', 'password' : 'mypassword' }
+
+        Notes
+        -----
+        Don't create too many partitions in parallel on a large cluster;
+        otherwise Spark might crash your external database systems.
         """
         if properties is None:
             properties = dict()
diff --git a/python/pyspark/sql/readwriter.pyi b/python/pyspark/sql/readwriter.pyi
index a111cbe416c2f..64c5697203a44 100644
--- a/python/pyspark/sql/readwriter.pyi
+++ b/python/pyspark/sql/readwriter.pyi
@@ -41,7 +41,7 @@ class DataFrameReader(OptionUtils):
         self,
         path: Optional[PathOrPaths] = ...,
         format: Optional[str] = ...,
-        schema: Optional[StructType] = ...,
+        schema: Optional[Union[StructType, str]] = ...,
         **options: OptionalPrimitiveType
     ) -> DataFrame: ...
     def json(
@@ -66,7 +66,9 @@ class DataFrameReader(OptionUtils):
         dropFieldIfAllNull: Optional[Union[bool, str]] = ...,
         encoding: Optional[str] = ...,
         locale: Optional[str] = ...,
-        recursiveFileLookup: Optional[bool] = ...,
+        pathGlobFilter: Optional[Union[bool, str]] = ...,
+        recursiveFileLookup: Optional[Union[bool, str]] = ...,
+        allowNonNumericNumbers: Optional[Union[bool, str]] = ...,
     ) -> DataFrame: ...
     def table(self, tableName: str) -> DataFrame: ...
     def parquet(self, *paths: str, **options: OptionalPrimitiveType) -> DataFrame: ...
@@ -75,7 +77,8 @@ class DataFrameReader(OptionUtils):
         paths: PathOrPaths,
         wholetext: bool = ...,
         lineSep: Optional[str] = ...,
-        recursiveFileLookup: Optional[bool] = ...,
+        pathGlobFilter: Optional[Union[bool, str]] = ...,
+        recursiveFileLookup: Optional[Union[bool, str]] = ...,
     ) -> DataFrame: ...
     def csv(
         self,
@@ -96,9 +99,9 @@ class DataFrameReader(OptionUtils):
         negativeInf: Optional[str] = ...,
         dateFormat: Optional[str] = ...,
         timestampFormat: Optional[str] = ...,
-        maxColumns: Optional[int] = ...,
-        maxCharsPerColumn: Optional[int] = ...,
-        maxMalformedLogPerPartition: Optional[int] = ...,
+        maxColumns: Optional[Union[int, str]] = ...,
+        maxCharsPerColumn: Optional[Union[int, str]] = ...,
+        maxMalformedLogPerPartition: Optional[Union[int, str]] = ...,
         mode: Optional[str] = ...,
         columnNameOfCorruptRecord: Optional[str] = ...,
         multiLine: Optional[Union[bool, str]] = ...,
@@ -108,12 +111,15 @@ class DataFrameReader(OptionUtils):
         emptyValue: Optional[str] = ...,
         locale: Optional[str] = ...,
         lineSep: Optional[str] = ...,
+        pathGlobFilter: Optional[Union[bool, str]] = ...,
+        recursiveFileLookup: Optional[Union[bool, str]] = ...,
     ) -> DataFrame: ...
     def orc(
         self,
         path: PathOrPaths,
         mergeSchema: Optional[bool] = ...,
-        recursiveFileLookup: Optional[bool] = ...,
+        pathGlobFilter: Optional[Union[bool, str]] = ...,
+        recursiveFileLookup: Optional[Union[bool, str]] = ...,
     ) -> DataFrame: ...
     @overload
     def jdbc(
@@ -125,8 +131,8 @@ class DataFrameReader(OptionUtils):
         url: str,
         table: str,
         column: str,
-        lowerBound: int,
-        upperBound: int,
+        lowerBound: Union[int, str],
+        upperBound: Union[int, str],
         numPartitions: int,
         *,
         properties: Optional[Dict[str, str]] = ...
@@ -166,7 +172,7 @@ class DataFrameWriter(OptionUtils):
         path: Optional[str] = ...,
         format: Optional[str] = ...,
         mode: Optional[str] = ...,
-        partitionBy: Optional[List[str]] = ...,
+        partitionBy: Optional[Union[str, List[str]]] = ...,
         **options: OptionalPrimitiveType
     ) -> None: ...
     def insertInto(self, tableName: str, overwrite: Optional[bool] = ...) -> None: ...
@@ -175,7 +181,7 @@ class DataFrameWriter(OptionUtils):
         name: str,
         format: Optional[str] = ...,
         mode: Optional[str] = ...,
-        partitionBy: Optional[List[str]] = ...,
+        partitionBy: Optional[Union[str, List[str]]] = ...,
         **options: OptionalPrimitiveType
     ) -> None: ...
     def json(
@@ -187,13 +193,13 @@ class DataFrameWriter(OptionUtils):
         timestampFormat: Optional[str] = ...,
         lineSep: Optional[str] = ...,
         encoding: Optional[str] = ...,
-        ignoreNullFields: Optional[bool] = ...,
+        ignoreNullFields: Optional[Union[bool, str]] = ...,
     ) -> None: ...
     def parquet(
         self,
         path: str,
         mode: Optional[str] = ...,
-        partitionBy: Optional[List[str]] = ...,
+        partitionBy: Optional[Union[str, List[str]]] = ...,
         compression: Optional[str] = ...,
     ) -> None: ...
     def text(
@@ -224,7 +230,7 @@ class DataFrameWriter(OptionUtils):
         self,
         path: str,
         mode: Optional[str] = ...,
-        partitionBy: Optional[List[str]] = ...,
+        partitionBy: Optional[Union[str, List[str]]] = ...,
         compression: Optional[str] = ...,
     ) -> None: ...
     def jdbc(
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index d724b76e3bfc3..2857e2e5865ae 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -42,10 +42,24 @@ def toDF(self, schema=None, sampleRatio=None):
 
         This is a shorthand for ``spark.createDataFrame(rdd, schema, sampleRatio)``
 
-        :param schema: a :class:`pyspark.sql.types.StructType` or list of names of columns
-        :param sampleRatio: the sample ratio of rows used for inferring
-        :return: a DataFrame
+        Parameters
+        ----------
+        schema : :class:`pyspark.sql.types.DataType`, str or list, optional
+            a :class:`pyspark.sql.types.DataType` or a datatype string or a list of
+            column names, default is None.  The data type string format equals to
+            :class:`pyspark.sql.types.DataType.simpleString`, except that top level struct type can
+            omit the ``struct<>`` and atomic types use ``typeName()`` as their format, e.g. use
+            ``byte`` instead of ``tinyint`` for :class:`pyspark.sql.types.ByteType`.
+            We can also use ``int`` as a short name for :class:`pyspark.sql.types.IntegerType`.
+        sampleRatio : float, optional
+            the sample ratio of rows used for inferring
+
+        Returns
+        -------
+        :class:`DataFrame`
 
+        Examples
+        --------
         >>> rdd.toDF().collect()
         [Row(name='Alice', age=1)]
         """
@@ -61,14 +75,31 @@ class SparkSession(SparkConversionMixin):
     tables, execute SQL over tables, cache tables, and read parquet files.
     To create a SparkSession, use the following builder pattern:
 
+    .. autoattribute:: builder
+       :annotation:
+
+    Examples
+    --------
     >>> spark = SparkSession.builder \\
     ...     .master("local") \\
     ...     .appName("Word Count") \\
     ...     .config("spark.some.config.option", "some-value") \\
     ...     .getOrCreate()
 
-    .. autoattribute:: builder
-       :annotation:
+    >>> from datetime import datetime
+    >>> from pyspark.sql import Row
+    >>> spark = SparkSession(sc)
+    >>> allTypes = sc.parallelize([Row(i=1, s="string", d=1.0, l=1,
+    ...     b=True, list=[1, 2, 3], dict={"s": 0}, row=Row(a=1),
+    ...     time=datetime(2014, 8, 1, 14, 1, 5))])
+    >>> df = allTypes.toDF()
+    >>> df.createOrReplaceTempView("allTypes")
+    >>> spark.sql('select i+1, d+1, not b, list[1], dict["s"], time, row.a '
+    ...            'from allTypes where b and i > 0').collect()
+    [Row((i + CAST(1 AS BIGINT))=2, (d + CAST(1 AS DOUBLE))=2.0, (NOT b)=False, list[1]=2, \
+        dict[s]=0, time=datetime.datetime(2014, 8, 1, 14, 1, 5), a=1)]
+    >>> df.rdd.map(lambda x: (x.i, x.s, x.d, x.l, x.b, x.time, x.row.a, x.list)).collect()
+    [(1, 'string', 1.0, 1, True, datetime.datetime(2014, 8, 1, 14, 1, 5), 1, [1, 2, 3])]
     """
 
     class Builder(object):
@@ -79,11 +110,23 @@ class Builder(object):
         _options = {}
         _sc = None
 
-        @since(2.0)
         def config(self, key=None, value=None, conf=None):
             """Sets a config option. Options set using this method are automatically propagated to
             both :class:`SparkConf` and :class:`SparkSession`'s own configuration.
 
+            .. versionadded:: 2.0.0
+
+            Parameters
+            ----------
+            key : str, optional
+                a key name string for configuration property
+            value : str, optional
+                a value for configuration property
+            conf : :class:`SparkConf`, optional
+                an instance of :class:`SparkConf`
+
+            Examples
+            --------
             For an existing SparkConf, use `conf` parameter.
 
             >>> from pyspark.conf import SparkConf
@@ -95,9 +138,6 @@ def config(self, key=None, value=None, conf=None):
             >>> SparkSession.builder.config("spark.some.config.option", "some-value")
             <pyspark.sql.session...
 
-            :param key: a key name string for configuration property
-            :param value: a value for configuration property
-            :param conf: an instance of :class:`SparkConf`
             """
             with self._lock:
                 if conf is None:
@@ -107,23 +147,31 @@ def config(self, key=None, value=None, conf=None):
                         self._options[k] = v
                 return self
 
-        @since(2.0)
         def master(self, master):
             """Sets the Spark master URL to connect to, such as "local" to run locally, "local[4]"
             to run locally with 4 cores, or "spark://master:7077" to run on a Spark standalone
             cluster.
 
-            :param master: a url for spark master
+            .. versionadded:: 2.0.0
+
+            Parameters
+            ----------
+            master : str
+                a url for spark master
             """
             return self.config("spark.master", master)
 
-        @since(2.0)
         def appName(self, name):
             """Sets a name for the application, which will be shown in the Spark web UI.
 
             If no application name is set, a randomly generated name will be used.
 
-            :param name: an application name
+            .. versionadded:: 2.0.0
+
+            Parameters
+            ----------
+            name : str
+                an application name
             """
             return self.config("spark.app.name", name)
 
@@ -139,11 +187,14 @@ def _sparkContext(self, sc):
                 self._sc = sc
                 return self
 
-        @since(2.0)
         def getOrCreate(self):
             """Gets an existing :class:`SparkSession` or, if there is no existing one, creates a
             new one based on the options set in this builder.
 
+            .. versionadded:: 2.0.0
+
+            Examples
+            --------
             This method first checks whether there is a valid global default SparkSession, and if
             yes, return that one. If no valid global default SparkSession exists, the method
             creates a new SparkSession and assigns the newly created SparkSession as the global
@@ -189,23 +240,6 @@ def getOrCreate(self):
     _activeSession = None
 
     def __init__(self, sparkContext, jsparkSession=None):
-        """Creates a new SparkSession.
-
-        >>> from datetime import datetime
-        >>> from pyspark.sql import Row
-        >>> spark = SparkSession(sc)
-        >>> allTypes = sc.parallelize([Row(i=1, s="string", d=1.0, l=1,
-        ...     b=True, list=[1, 2, 3], dict={"s": 0}, row=Row(a=1),
-        ...     time=datetime(2014, 8, 1, 14, 1, 5))])
-        >>> df = allTypes.toDF()
-        >>> df.createOrReplaceTempView("allTypes")
-        >>> spark.sql('select i+1, d+1, not b, list[1], dict["s"], time, row.a '
-        ...            'from allTypes where b and i > 0').collect()
-        [Row((i + CAST(1 AS BIGINT))=2, (d + CAST(1 AS DOUBLE))=2.0, (NOT b)=False, list[1]=2, \
-            dict[s]=0, time=datetime.datetime(2014, 8, 1, 14, 1, 5), a=1)]
-        >>> df.rdd.map(lambda x: (x.i, x.s, x.d, x.l, x.b, x.time, x.row.a, x.list)).collect()
-        [(1, 'string', 1.0, 1, True, datetime.datetime(2014, 8, 1, 14, 1, 5), 1, [1, 2, 3])]
-        """
         from pyspark.sql.context import SQLContext
         self._sc = sparkContext
         self._jsc = self._sc._jsc
@@ -254,13 +288,19 @@ def newSession(self):
         return self.__class__(self._sc, self._jsparkSession.newSession())
 
     @classmethod
-    @since(3.0)
     def getActiveSession(cls):
         """
         Returns the active SparkSession for the current thread, returned by the builder
 
-        :return: :class:`SparkSession` if an active session exists for the current thread
+        .. versionadded:: 3.0.0
+
+        Returns
+        -------
+        :class:`SparkSession`
+            Spark session if an active session exists for the current thread
 
+        Examples
+        --------
         >>> s = SparkSession.getActiveSession()
         >>> l = [('Alice', 1)]
         >>> rdd = s.sparkContext.parallelize(l)
@@ -305,12 +345,15 @@ def conf(self):
         return self._conf
 
     @property
-    @since(2.0)
     def catalog(self):
         """Interface through which the user may create, drop, alter or query underlying
         databases, tables, functions, etc.
 
-        :return: :class:`Catalog`
+        .. versionadded:: 2.0.0
+
+        Returns
+        -------
+        :class:`Catalog`
         """
         from pyspark.sql.catalog import Catalog
         if not hasattr(self, "_catalog"):
@@ -318,28 +361,43 @@ def catalog(self):
         return self._catalog
 
     @property
-    @since(2.0)
     def udf(self):
         """Returns a :class:`UDFRegistration` for UDF registration.
 
-        :return: :class:`UDFRegistration`
+        .. versionadded:: 2.0.0
+
+        Returns
+        -------
+        :class:`UDFRegistration`
         """
         from pyspark.sql.udf import UDFRegistration
         return UDFRegistration(self)
 
-    @since(2.0)
     def range(self, start, end=None, step=1, numPartitions=None):
         """
         Create a :class:`DataFrame` with single :class:`pyspark.sql.types.LongType` column named
         ``id``, containing elements in a range from ``start`` to ``end`` (exclusive) with
         step value ``step``.
 
-        :param start: the start value
-        :param end: the end value (exclusive)
-        :param step: the incremental step (default: 1)
-        :param numPartitions: the number of partitions of the DataFrame
-        :return: :class:`DataFrame`
-
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        start : int
+            the start value
+        end : int, optional
+            the end value (exclusive)
+        step : int, optional
+            the incremental step (default: 1)
+        numPartitions : int, optional
+            the number of partitions of the DataFrame
+
+        Returns
+        -------
+        :class:`DataFrame`
+
+        Examples
+        --------
         >>> spark.range(1, 7, 2).collect()
         [Row(id=1), Row(id=3), Row(id=5)]
 
@@ -362,9 +420,16 @@ def _inferSchemaFromList(self, data, names=None):
         """
         Infer schema from list of Row, dict, or tuple.
 
-        :param data: list of Row, dict, or tuple
-        :param names: list of column names
-        :return: :class:`pyspark.sql.types.StructType`
+        Parameters
+        ----------
+        data : iterable
+            list of Row, dict, or tuple
+        names : list, optional
+            list of column names
+
+        Returns
+        -------
+        :class:`pyspark.sql.types.StructType`
         """
         if not data:
             raise ValueError("can not infer schema from empty dataset")
@@ -377,9 +442,17 @@ def _inferSchema(self, rdd, samplingRatio=None, names=None):
         """
         Infer schema from an RDD of Row, dict, or tuple.
 
-        :param rdd: an RDD of Row, dict, or tuple
-        :param samplingRatio: sampling ratio, or no sampling (default)
-        :return: :class:`pyspark.sql.types.StructType`
+        Parameters
+        ----------
+        rdd : :class:`RDD`
+            an RDD of Row, dict, or tuple
+        samplingRatio : float, optional
+            sampling ratio, or no sampling (default)
+        names : list, optional
+
+        Returns
+        -------
+        :class:`pyspark.sql.types.StructType`
         """
         first = rdd.first()
         if not first:
@@ -476,7 +549,6 @@ def _create_shell_session():
 
         return SparkSession.builder.getOrCreate()
 
-    @since(2.0)
     def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=True):
         """
         Creates a :class:`DataFrame` from an :class:`RDD`, a list or a :class:`pandas.DataFrame`.
@@ -497,23 +569,39 @@ def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=Tr
         If schema inference is needed, ``samplingRatio`` is used to determined the ratio of
         rows used for schema inference. The first row will be used if ``samplingRatio`` is ``None``.
 
-        :param data: an RDD of any kind of SQL data representation (e.g. row, tuple, int, boolean,
-            etc.), :class:`list`, or :class:`pandas.DataFrame`.
-        :param schema: a :class:`pyspark.sql.types.DataType` or a datatype string or a list of
-            column names, default is ``None``.  The data type string format equals to
-            :class:`pyspark.sql.types.DataType.simpleString`, except that top level struct type can
-            omit the ``struct<>`` and atomic types use ``typeName()`` as their format, e.g. use
-            ``byte`` instead of ``tinyint`` for :class:`pyspark.sql.types.ByteType`. We can also use
-            ``int`` as a short name for ``IntegerType``.
-        :param samplingRatio: the sample ratio of rows used for inferring
-        :param verifySchema: verify data types of every row against schema.
-        :return: :class:`DataFrame`
+        .. versionadded:: 2.0.0
 
-        .. versionchanged:: 2.1
+        .. versionchanged:: 2.1.0
            Added verifySchema.
 
-        .. note:: Usage with spark.sql.execution.arrow.pyspark.enabled=True is experimental.
-
+        Parameters
+        ----------
+        data : :class:`RDD` or iterable
+            an RDD of any kind of SQL data representation(e.g. :class:`Row`,
+            :class:`tuple`, ``int``, ``boolean``, etc.), or :class:`list`, or
+            :class:`pandas.DataFrame`.
+        schema : :class:`pyspark.sql.types.DataType`, str or list, optional
+            a :class:`pyspark.sql.types.DataType` or a datatype string or a list of
+            column names, default is None.  The data type string format equals to
+            :class:`pyspark.sql.types.DataType.simpleString`, except that top level struct type can
+            omit the ``struct<>`` and atomic types use ``typeName()`` as their format, e.g. use
+            ``byte`` instead of ``tinyint`` for :class:`pyspark.sql.types.ByteType`.
+            We can also use ``int`` as a short name for :class:`pyspark.sql.types.IntegerType`.
+        samplingRatio : float, optional
+            the sample ratio of rows used for inferring
+        verifySchema : bool, optional
+            verify data types of every row against schema. Enabled by default.
+
+        Returns
+        -------
+        :class:`DataFrame`
+
+        Notes
+        -----
+        Usage with spark.sql.execution.arrow.pyspark.enabled=True is experimental.
+
+        Examples
+        --------
         >>> l = [('Alice', 1)]
         >>> spark.createDataFrame(l).collect()
         [Row(_1='Alice', _2=1)]
@@ -614,12 +702,17 @@ def prepare(obj):
         df._schema = schema
         return df
 
-    @since(2.0)
     def sql(self, sqlQuery):
         """Returns a :class:`DataFrame` representing the result of the given query.
 
-        :return: :class:`DataFrame`
+        .. versionadded:: 2.0.0
+
+        Returns
+        -------
+        :class:`DataFrame`
 
+        Examples
+        --------
         >>> df.createOrReplaceTempView("table1")
         >>> df2 = spark.sql("SELECT field1 AS f1, field2 as f2 from table1")
         >>> df2.collect()
@@ -627,12 +720,17 @@ def sql(self, sqlQuery):
         """
         return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped)
 
-    @since(2.0)
     def table(self, tableName):
         """Returns the specified table as a :class:`DataFrame`.
 
-        :return: :class:`DataFrame`
+        .. versionadded:: 2.0.0
+
+        Returns
+        -------
+        :class:`DataFrame`
 
+        Examples
+        --------
         >>> df.createOrReplaceTempView("table1")
         >>> df2 = spark.table("table1")
         >>> sorted(df.collect()) == sorted(df2.collect())
@@ -641,38 +739,51 @@ def table(self, tableName):
         return DataFrame(self._jsparkSession.table(tableName), self._wrapped)
 
     @property
-    @since(2.0)
     def read(self):
         """
         Returns a :class:`DataFrameReader` that can be used to read data
         in as a :class:`DataFrame`.
 
-        :return: :class:`DataFrameReader`
+        .. versionadded:: 2.0.0
+
+        Returns
+        -------
+        :class:`DataFrameReader`
         """
         return DataFrameReader(self._wrapped)
 
     @property
-    @since(2.0)
     def readStream(self):
         """
         Returns a :class:`DataStreamReader` that can be used to read data streams
         as a streaming :class:`DataFrame`.
 
-        .. note:: Evolving.
+        .. versionadded:: 2.0.0
+
+        Notes
+        -----
+        This API is evolving.
 
-        :return: :class:`DataStreamReader`
+        Returns
+        -------
+        :class:`DataStreamReader`
         """
         return DataStreamReader(self._wrapped)
 
     @property
-    @since(2.0)
     def streams(self):
         """Returns a :class:`StreamingQueryManager` that allows managing all the
         :class:`StreamingQuery` instances active on `this` context.
 
-        .. note:: Evolving.
+        .. versionadded:: 2.0.0
+
+        Notes
+        -----
+        This API is evolving.
 
-        :return: :class:`StreamingQueryManager`
+        Returns
+        -------
+        :class:`StreamingQueryManager`
         """
         from pyspark.sql.streaming import StreamingQueryManager
         return StreamingQueryManager(self._jsparkSession.streams())
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index eb3155e5512eb..e7b2fa16d620a 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -34,9 +34,11 @@ class StreamingQuery(object):
     A handle to a query that is executing continuously in the background as new data arrives.
     All these methods are thread-safe.
 
-    .. note:: Evolving
+    .. versionadded:: 2.0.0
 
-    .. versionadded:: 2.0
+    Notes
+    -----
+    This API is evolving.
     """
 
     def __init__(self, jsq):
@@ -116,13 +118,16 @@ def recentProgress(self):
         return [json.loads(p.json()) for p in self._jsq.recentProgress()]
 
     @property
-    @since(2.1)
     def lastProgress(self):
         """
         Returns the most recent :class:`StreamingQueryProgress` update of this streaming query or
         None if there were no progress updates
 
-        :return: a map
+        .. versionadded:: 2.1.0
+
+        Returns
+        -------
+        dict
         """
         lastProgress = self._jsq.lastProgress()
         if lastProgress:
@@ -130,15 +135,18 @@ def lastProgress(self):
         else:
             return None
 
-    @since(2.0)
     def processAllAvailable(self):
         """Blocks until all available data in the source has been processed and committed to the
         sink. This method is intended for testing.
 
-        .. note:: In the case of continually arriving data, this method may block forever.
-            Additionally, this method is only guaranteed to block until data that has been
-            synchronously appended data to a stream source prior to invocation.
-            (i.e. `getOffset` must immediately reflect the addition).
+        .. versionadded:: 2.0.0
+
+        Notes
+        -----
+        In the case of continually arriving data, this method may block forever.
+        Additionally, this method is only guaranteed to block until data that has been
+        synchronously appended data to a stream source prior to invocation.
+        (i.e. `getOffset` must immediately reflect the addition).
         """
         return self._jsq.processAllAvailable()
 
@@ -148,12 +156,18 @@ def stop(self):
         """
         self._jsq.stop()
 
-    @since(2.1)
     def explain(self, extended=False):
         """Prints the (logical and physical) plans to the console for debugging purpose.
 
-        :param extended: boolean, default ``False``. If ``False``, prints only the physical plan.
+        .. versionadded:: 2.1.0
+
+        Parameters
+        ----------
+        extended : bool, optional
+            default ``False``. If ``False``, prints only the physical plan.
 
+        Examples
+        --------
         >>> sq = sdf.writeStream.format('memory').queryName('query_explain').start()
         >>> sq.processAllAvailable() # Wait a bit to generate the runtime plans.
         >>> sq.explain()
@@ -174,10 +188,14 @@ def explain(self, extended=False):
         # We should print it in the Python process.
         print(self._jsq.explainInternal(extended))
 
-    @since(2.1)
     def exception(self):
         """
-        :return: the StreamingQueryException if the query was terminated by an exception, or None.
+        .. versionadded:: 2.1.0
+
+        Returns
+        -------
+        :class:`StreamingQueryException`
+            the StreamingQueryException if the query was terminated by an exception, or None.
         """
         if self._jsq.exception().isDefined():
             je = self._jsq.exception().get()
@@ -191,19 +209,24 @@ def exception(self):
 class StreamingQueryManager(object):
     """A class to manage all the :class:`StreamingQuery` StreamingQueries active.
 
-    .. note:: Evolving
+    .. versionadded:: 2.0.0
 
-    .. versionadded:: 2.0
+    Notes
+    -----
+    This API is evolving.
     """
 
     def __init__(self, jsqm):
         self._jsqm = jsqm
 
     @property
-    @since(2.0)
     def active(self):
         """Returns a list of active queries associated with this SQLContext
 
+        .. versionadded:: 2.0.0
+
+        Examples
+        --------
         >>> sq = sdf.writeStream.format('memory').queryName('this_query').start()
         >>> sqm = spark.streams
         >>> # get the list of active streaming queries
@@ -213,11 +236,14 @@ def active(self):
         """
         return [StreamingQuery(jsq) for jsq in self._jsqm.active()]
 
-    @since(2.0)
     def get(self, id):
         """Returns an active query from this SQLContext or throws exception if an active query
         with this name doesn't exist.
 
+        .. versionadded:: 2.0.0
+
+        Examples
+        --------
         >>> sq = sdf.writeStream.format('memory').queryName('this_query').start()
         >>> sq.name
         'this_query'
@@ -259,11 +285,14 @@ def awaitAnyTermination(self, timeout=None):
         else:
             return self._jsqm.awaitAnyTermination()
 
-    @since(2.0)
     def resetTerminated(self):
         """Forget about past terminated queries so that :func:`awaitAnyTermination()` can be used
         again to wait for new terminations.
 
+        .. versionadded:: 2.0.0
+
+        Examples
+        --------
         >>> spark.streams.resetTerminated()
         """
         self._jsqm.resetTerminated()
@@ -275,9 +304,11 @@ class DataStreamReader(OptionUtils):
     storage systems (e.g. file systems, key-value stores, etc).
     Use :attr:`SparkSession.readStream <pyspark.sql.SparkSession.readStream>` to access this.
 
-    .. note:: Evolving.
+    .. versionadded:: 2.0.0
 
-    .. versionadded:: 2.0
+    Notes
+    -----
+    This API is evolving.
     """
 
     def __init__(self, spark):
@@ -288,20 +319,27 @@ def _df(self, jdf):
         from pyspark.sql.dataframe import DataFrame
         return DataFrame(jdf, self._spark)
 
-    @since(2.0)
     def format(self, source):
         """Specifies the input data source format.
 
-        .. note:: Evolving.
+        .. versionadded:: 2.0.0
 
-        :param source: string, name of the data source, e.g. 'json', 'parquet'.
+        Parameters
+        ----------
+        source : str
+            name of the data source, e.g. 'json', 'parquet'.
 
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
         >>> s = spark.readStream.format("text")
         """
         self._jreader = self._jreader.format(source)
         return self
 
-    @since(2.0)
     def schema(self, schema):
         """Specifies the input schema.
 
@@ -309,11 +347,20 @@ def schema(self, schema):
         By specifying the schema here, the underlying data source can skip the schema
         inference step, and thus speed up data loading.
 
-        .. note:: Evolving.
+        .. versionadded:: 2.0.0
 
-        :param schema: a :class:`pyspark.sql.types.StructType` object or a DDL-formatted string
-                       (For example ``col0 INT, col1 DOUBLE``).
+        Parameters
+        ----------
+        schema : :class:`pyspark.sql.types.StructType` or str
+            a :class:`pyspark.sql.types.StructType` object or a DDL-formatted string
+            (For example ``col0 INT, col1 DOUBLE``).
 
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
         >>> s = spark.readStream.schema(sdf_schema)
         >>> s = spark.readStream.schema("col0 INT, col1 DOUBLE")
         """
@@ -328,7 +375,6 @@ def schema(self, schema):
             raise TypeError("schema should be StructType or string")
         return self
 
-    @since(2.0)
     def option(self, key, value):
         """Adds an input option for the underlying data source.
 
@@ -346,14 +392,19 @@ def option(self, key, value):
                 ambiguous. If it isn't set, the current value of the SQL config
                 ``spark.sql.session.timeZone`` is used by default.
 
-        .. note:: Evolving.
+        .. versionadded:: 2.0.0
 
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
         >>> s = spark.readStream.option("x", 1)
         """
         self._jreader = self._jreader.option(key, to_str(value))
         return self
 
-    @since(2.0)
     def options(self, **options):
         """Adds input options for the underlying data source.
 
@@ -371,27 +422,44 @@ def options(self, **options):
                 ambiguous. If it isn't set, the current value of the SQL config
                 ``spark.sql.session.timeZone`` is used by default.
 
-        .. note:: Evolving.
+        .. versionadded:: 2.0.0
+
+        Notes
+        -----
+        This API is evolving.
 
+        Examples
+        --------
         >>> s = spark.readStream.options(x="1", y=2)
         """
         for k in options:
             self._jreader = self._jreader.option(k, to_str(options[k]))
         return self
 
-    @since(2.0)
     def load(self, path=None, format=None, schema=None, **options):
         """Loads a data stream from a data source and returns it as a
         :class:`DataFrame <pyspark.sql.DataFrame>`.
 
-        .. note:: Evolving.
-
-        :param path: optional string for file-system backed data sources.
-        :param format: optional string for format of the data source. Default to 'parquet'.
-        :param schema: optional :class:`pyspark.sql.types.StructType` for the input schema
-                       or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
-        :param options: all other string options
-
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        path : str, optional
+            optional string for file-system backed data sources.
+        format : str, optional
+            optional string for format of the data source. Default to 'parquet'.
+        schema : :class:`pyspark.sql.types.StructType` or str, optional
+            optional :class:`pyspark.sql.types.StructType` for the input schema
+            or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
+        **options : dict
+            all other string options
+
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
         >>> json_sdf = spark.readStream.format("json") \\
         ...     .schema(sdf_schema) \\
         ...     .load(tempfile.mkdtemp())
@@ -413,7 +481,6 @@ def load(self, path=None, format=None, schema=None, **options):
         else:
             return self._df(self._jreader.load())
 
-    @since(2.0)
     def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
              allowComments=None, allowUnquotedFieldNames=None, allowSingleQuotes=None,
              allowNumericLeadingZero=None, allowBackslashEscapingAnyCharacter=None,
@@ -430,89 +497,119 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
         If the ``schema`` parameter is not specified, this function goes
         through the input once to determine the input schema.
 
-        .. note:: Evolving.
-
-        :param path: string represents path to the JSON dataset,
-                     or RDD of Strings storing JSON objects.
-        :param schema: an optional :class:`pyspark.sql.types.StructType` for the input schema
-                       or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
-        :param primitivesAsString: infers all primitive values as a string type. If None is set,
-                                   it uses the default value, ``false``.
-        :param prefersDecimal: infers all floating-point values as a decimal type. If the values
-                               do not fit in decimal, then it infers them as doubles. If None is
-                               set, it uses the default value, ``false``.
-        :param allowComments: ignores Java/C++ style comment in JSON records. If None is set,
-                              it uses the default value, ``false``.
-        :param allowUnquotedFieldNames: allows unquoted JSON field names. If None is set,
-                                        it uses the default value, ``false``.
-        :param allowSingleQuotes: allows single quotes in addition to double quotes. If None is
-                                        set, it uses the default value, ``true``.
-        :param allowNumericLeadingZero: allows leading zeros in numbers (e.g. 00012). If None is
-                                        set, it uses the default value, ``false``.
-        :param allowBackslashEscapingAnyCharacter: allows accepting quoting of all character
-                                                   using backslash quoting mechanism. If None is
-                                                   set, it uses the default value, ``false``.
-        :param mode: allows a mode for dealing with corrupt records during parsing. If None is
-                     set, it uses the default value, ``PERMISSIVE``.
-
-                * ``PERMISSIVE``: when it meets a corrupted record, puts the malformed string \
-                  into a field configured by ``columnNameOfCorruptRecord``, and sets malformed \
-                  fields to ``null``. To keep corrupt records, an user can set a string type \
-                  field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \
-                  schema does not have the field, it drops corrupt records during parsing. \
-                  When inferring a schema, it implicitly adds a ``columnNameOfCorruptRecord`` \
-                  field in an output schema.
-                *  ``DROPMALFORMED``: ignores the whole corrupted records.
-                *  ``FAILFAST``: throws an exception when it meets corrupted records.
-
-        :param columnNameOfCorruptRecord: allows renaming the new field having malformed string
-                                          created by ``PERMISSIVE`` mode. This overrides
-                                          ``spark.sql.columnNameOfCorruptRecord``. If None is set,
-                                          it uses the value specified in
-                                          ``spark.sql.columnNameOfCorruptRecord``.
-        :param dateFormat: sets the string that indicates a date format. Custom date formats
-                           follow the formats at `datetime pattern`_.
-                           This applies to date type. If None is set, it uses the
-                           default value, ``yyyy-MM-dd``.
-        :param timestampFormat: sets the string that indicates a timestamp format.
-                                Custom date formats follow the formats at `datetime pattern`_.
-                                This applies to timestamp type. If None is set, it uses the
-                                default value, ``yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]``.
-        :param multiLine: parse one record, which may span multiple lines, per file. If None is
-                          set, it uses the default value, ``false``.
-        :param allowUnquotedControlChars: allows JSON Strings to contain unquoted control
-                                          characters (ASCII characters with value less than 32,
-                                          including tab and line feed characters) or not.
-        :param lineSep: defines the line separator that should be used for parsing. If None is
-                        set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``.
-        :param locale: sets a locale as language tag in IETF BCP 47 format. If None is set,
-                       it uses the default value, ``en-US``. For instance, ``locale`` is used while
-                       parsing dates and timestamps.
-        :param dropFieldIfAllNull: whether to ignore column of all null values or empty
-                                   array/struct during schema inference. If None is set, it
-                                   uses the default value, ``false``.
-        :param encoding: allows to forcibly set one of standard basic or extended encoding for
-                         the JSON files. For example UTF-16BE, UTF-32LE. If None is set,
-                         the encoding of input JSON will be detected automatically
-                         when the multiLine option is set to ``true``.
-        :param pathGlobFilter: an optional glob pattern to only include files with paths matching
-                               the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
-                               It does not change the behavior of `partition discovery`_.
-        :param recursiveFileLookup: recursively scan a directory for files. Using this option
-                                    disables `partition discovery`_.
-        :param allowNonNumericNumbers: allows JSON parser to recognize set of "Not-a-Number" (NaN)
-                                       tokens as legal floating number values. If None is set,
-                                       it uses the default value, ``true``.
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        path : str
+            string represents path to the JSON dataset,
+            or RDD of Strings storing JSON objects.
+        schema : :class:`pyspark.sql.types.StructType` or str, optional
+            an optional :class:`pyspark.sql.types.StructType` for the input schema
+            or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
+        primitivesAsString : str or bool, optional
+            infers all primitive values as a string type. If None is set,
+            it uses the default value, ``false``.
+        prefersDecimal : str or bool, optional
+            infers all floating-point values as a decimal type. If the values
+            do not fit in decimal, then it infers them as doubles. If None is
+            set, it uses the default value, ``false``.
+        allowComments : str or bool, optional
+            ignores Java/C++ style comment in JSON records. If None is set,
+            it uses the default value, ``false``.
+        allowUnquotedFieldNames : str or bool, optional
+            allows unquoted JSON field names. If None is set,
+            it uses the default value, ``false``.
+        allowSingleQuotes : str or bool, optional
+            allows single quotes in addition to double quotes. If None is
+            set, it uses the default value, ``true``.
+        allowNumericLeadingZero : str or bool, optional
+            allows leading zeros in numbers (e.g. 00012). If None is
+            set, it uses the default value, ``false``.
+        allowBackslashEscapingAnyCharacter : str or bool, optional
+            allows accepting quoting of all character
+            using backslash quoting mechanism. If None is
+            set, it uses the default value, ``false``.
+        mode : str, optional
+            allows a mode for dealing with corrupt records during parsing. If None is
+            set, it uses the default value, ``PERMISSIVE``.
+
+            * ``PERMISSIVE``: when it meets a corrupted record, puts the malformed string \
+              into a field configured by ``columnNameOfCorruptRecord``, and sets malformed \
+              fields to ``null``. To keep corrupt records, an user can set a string type \
+              field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \
+              schema does not have the field, it drops corrupt records during parsing. \
+              When inferring a schema, it implicitly adds a ``columnNameOfCorruptRecord`` \
+              field in an output schema.
+            *  ``DROPMALFORMED``: ignores the whole corrupted records.
+            *  ``FAILFAST``: throws an exception when it meets corrupted records.
+
+        columnNameOfCorruptRecord : str, optional
+            allows renaming the new field having malformed string
+            created by ``PERMISSIVE`` mode. This overrides
+            ``spark.sql.columnNameOfCorruptRecord``. If None is set,
+            it uses the value specified in
+            ``spark.sql.columnNameOfCorruptRecord``.
+        dateFormat : str, optional
+            sets the string that indicates a date format. Custom date formats
+            follow the formats at
+            `datetime pattern <https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html>`_.  # noqa
+            This applies to date type. If None is set, it uses the
+            default value, ``yyyy-MM-dd``.
+        timestampFormat : str, optional
+            sets the string that indicates a timestamp format.
+            Custom date formats follow the formats at
+            `datetime pattern <https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html>`_.  # noqa
+            This applies to timestamp type. If None is set, it uses the
+            default value, ``yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]``.
+        multiLine : str or bool, optional
+            parse one record, which may span multiple lines, per file. If None is
+            set, it uses the default value, ``false``.
+        allowUnquotedControlChars : str or bool, optional
+            allows JSON Strings to contain unquoted control
+            characters (ASCII characters with value less than 32,
+            including tab and line feed characters) or not.
+        lineSep : str, optional
+            defines the line separator that should be used for parsing. If None is
+            set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``.
+        locale : str, optional
+            sets a locale as language tag in IETF BCP 47 format. If None is set,
+            it uses the default value, ``en-US``. For instance, ``locale`` is used while
+            parsing dates and timestamps.
+        dropFieldIfAllNull : str or bool, optional
+            whether to ignore column of all null values or empty
+            array/struct during schema inference. If None is set, it
+            uses the default value, ``false``.
+        encoding : str or bool, optional
+            allows to forcibly set one of standard basic or extended encoding for
+            the JSON files. For example UTF-16BE, UTF-32LE. If None is set,
+            the encoding of input JSON will be detected automatically
+            when the multiLine option is set to ``true``.
+        pathGlobFilter : str or bool, optional
+            an optional glob pattern to only include files with paths matching
+            the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
+            It does not change the behavior of
+            `partition discovery <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery>`_.  # noqa
+        recursiveFileLookup : str or bool, optional
+            recursively scan a directory for files. Using this option
+            disables
+            `partition discovery <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery>`_.  # noqa
+        allowNonNumericNumbers : str or bool, optional
+            allows JSON parser to recognize set of "Not-a-Number" (NaN)
+            tokens as legal floating number values. If None is set,
+            it uses the default value, ``true``.
 
                 * ``+INF``: for positive infinity, as well as alias of
                             ``+Infinity`` and ``Infinity``.
                 *  ``-INF``: for negative infinity, alias ``-Infinity``.
                 *  ``NaN``: for other not-a-numbers, like result of division by zero.
 
-        .. _partition discovery:
-          https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery
-        .. _datetime pattern: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+        Notes
+        -----
+        This API is evolving.
 
+        Examples
+        --------
         >>> json_sdf = spark.readStream.json(tempfile.mkdtemp(), schema = sdf_schema)
         >>> json_sdf.isStreaming
         True
@@ -535,24 +632,28 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
         else:
             raise TypeError("path can be only a single string")
 
-    @since(2.3)
     def orc(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLookup=None):
         """Loads a ORC file stream, returning the result as a :class:`DataFrame`.
 
-        .. note:: Evolving.
-
-        :param mergeSchema: sets whether we should merge schemas collected from all
-                            ORC part-files. This will override ``spark.sql.orc.mergeSchema``.
-                            The default value is specified in ``spark.sql.orc.mergeSchema``.
-        :param pathGlobFilter: an optional glob pattern to only include files with paths matching
-                               the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
-                               It does not change the behavior of `partition discovery`_.
-        :param recursiveFileLookup: recursively scan a directory for files. Using this option
-            disables `partition discovery`_.
-
-        .. _partition discovery:
-          https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery
-
+        .. versionadded:: 2.3.0
+
+        Parameters
+        ----------
+        mergeSchema : str or bool, optional
+            sets whether we should merge schemas collected from all
+            ORC part-files. This will override ``spark.sql.orc.mergeSchema``.
+            The default value is specified in ``spark.sql.orc.mergeSchema``.
+        pathGlobFilter : str or bool, optional
+            an optional glob pattern to only include files with paths matching
+            the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
+            It does not change the behavior of `partition discovery`_.
+        recursiveFileLookup : str or bool, optional
+            recursively scan a directory for files. Using this option
+            disables
+            `partition discovery <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery>`_.  # noqa
+
+        Examples
+        --------
         >>> orc_sdf = spark.readStream.schema(sdf_schema).orc(tempfile.mkdtemp())
         >>> orc_sdf.isStreaming
         True
@@ -566,26 +667,30 @@ def orc(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLookup=N
         else:
             raise TypeError("path can be only a single string")
 
-    @since(2.0)
     def parquet(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLookup=None):
         """
         Loads a Parquet file stream, returning the result as a :class:`DataFrame`.
 
-        .. note:: Evolving.
-
-        :param mergeSchema: sets whether we should merge schemas collected from all
-                            Parquet part-files. This will override
-                            ``spark.sql.parquet.mergeSchema``. The default value is specified in
-                            ``spark.sql.parquet.mergeSchema``.
-        :param pathGlobFilter: an optional glob pattern to only include files with paths matching
-                               the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
-                               It does not change the behavior of `partition discovery`_.
-        :param recursiveFileLookup: recursively scan a directory for files. Using this option
-                                    disables `partition discovery`_.
-
-        .. _partition discovery:
-          https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery
-
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        mergeSchema : str or bool, optional
+            sets whether we should merge schemas collected from all
+            Parquet part-files. This will override
+            ``spark.sql.parquet.mergeSchema``. The default value is specified in
+            ``spark.sql.parquet.mergeSchema``.
+        pathGlobFilter : str or bool, optional
+            an optional glob pattern to only include files with paths matching
+            the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
+            It does not change the behavior of `partition discovery`_.
+        recursiveFileLookup : str or bool, optional
+            recursively scan a directory for files. Using this option
+            disables
+            `partition discovery <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery>`_.  # noqa
+
+        Examples
+        --------
         >>> parquet_sdf = spark.readStream.schema(sdf_schema).parquet(tempfile.mkdtemp())
         >>> parquet_sdf.isStreaming
         True
@@ -599,7 +704,6 @@ def parquet(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLook
         else:
             raise TypeError("path can be only a single string")
 
-    @since(2.0)
     def text(self, path, wholetext=False, lineSep=None, pathGlobFilter=None,
              recursiveFileLookup=None):
         """
@@ -610,21 +714,32 @@ def text(self, path, wholetext=False, lineSep=None, pathGlobFilter=None,
 
         By default, each line in the text file is a new row in the resulting DataFrame.
 
-        .. note:: Evolving.
-
-        :param paths: string, or list of strings, for input path(s).
-        :param wholetext: if true, read each file from input path(s) as a single row.
-        :param lineSep: defines the line separator that should be used for parsing. If None is
-                        set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``.
-        :param pathGlobFilter: an optional glob pattern to only include files with paths matching
-                               the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
-                               It does not change the behavior of `partition discovery`_.
-        :param recursiveFileLookup: recursively scan a directory for files. Using this option
-                                    disables `partition discovery`_.
-
-        .. _partition discovery:
-          https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery
-
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        paths : str or list
+            string, or list of strings, for input path(s).
+        wholetext : str or bool, optional
+            if true, read each file from input path(s) as a single row.
+        lineSep : str, optional
+            defines the line separator that should be used for parsing. If None is
+            set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``.
+        pathGlobFilter : str or bool, optional
+            an optional glob pattern to only include files with paths matching
+            the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
+            It does not change the behavior of `partition discovery`_.
+        recursiveFileLookup : str or bool, optional
+            recursively scan a directory for files. Using this option
+            disables
+            `partition discovery <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery>`_.  # noqa
+
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
         >>> text_sdf = spark.readStream.text(tempfile.mkdtemp())
         >>> text_sdf.isStreaming
         True
@@ -639,7 +754,6 @@ def text(self, path, wholetext=False, lineSep=None, pathGlobFilter=None,
         else:
             raise TypeError("path can be only a single string")
 
-    @since(2.0)
     def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=None,
             comment=None, header=None, inferSchema=None, ignoreLeadingWhiteSpace=None,
             ignoreTrailingWhiteSpace=None, nullValue=None, nanValue=None, positiveInf=None,
@@ -654,111 +768,147 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
         ``inferSchema`` is enabled. To avoid going through the entire data once, disable
         ``inferSchema`` option or specify the schema explicitly using ``schema``.
 
-        .. note:: Evolving.
-
-        :param path: string, or list of strings, for input path(s).
-        :param schema: an optional :class:`pyspark.sql.types.StructType` for the input schema
-                       or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
-        :param sep: sets a separator (one or more characters) for each field and value. If None is
-                    set, it uses the default value, ``,``.
-        :param encoding: decodes the CSV files by the given encoding type. If None is set,
-                         it uses the default value, ``UTF-8``.
-        :param quote: sets a single character used for escaping quoted values where the
-                      separator can be part of the value. If None is set, it uses the default
-                      value, ``"``. If you would like to turn off quotations, you need to set an
-                      empty string.
-        :param escape: sets a single character used for escaping quotes inside an already
-                       quoted value. If None is set, it uses the default value, ``\``.
-        :param comment: sets a single character used for skipping lines beginning with this
-                        character. By default (None), it is disabled.
-        :param header: uses the first line as names of columns. If None is set, it uses the
-                       default value, ``false``.
-        :param inferSchema: infers the input schema automatically from data. It requires one extra
-                       pass over the data. If None is set, it uses the default value, ``false``.
-        :param enforceSchema: If it is set to ``true``, the specified or inferred schema will be
-                              forcibly applied to datasource files, and headers in CSV files will be
-                              ignored. If the option is set to ``false``, the schema will be
-                              validated against all headers in CSV files or the first header in RDD
-                              if the ``header`` option is set to ``true``. Field names in the schema
-                              and column names in CSV headers are checked by their positions
-                              taking into account ``spark.sql.caseSensitive``. If None is set,
-                              ``true`` is used by default. Though the default value is ``true``,
-                              it is recommended to disable the ``enforceSchema`` option
-                              to avoid incorrect results.
-        :param ignoreLeadingWhiteSpace: a flag indicating whether or not leading whitespaces from
-                                        values being read should be skipped. If None is set, it
-                                        uses the default value, ``false``.
-        :param ignoreTrailingWhiteSpace: a flag indicating whether or not trailing whitespaces from
-                                         values being read should be skipped. If None is set, it
-                                         uses the default value, ``false``.
-        :param nullValue: sets the string representation of a null value. If None is set, it uses
-                          the default value, empty string. Since 2.0.1, this ``nullValue`` param
-                          applies to all supported types including the string type.
-        :param nanValue: sets the string representation of a non-number value. If None is set, it
-                         uses the default value, ``NaN``.
-        :param positiveInf: sets the string representation of a positive infinity value. If None
-                            is set, it uses the default value, ``Inf``.
-        :param negativeInf: sets the string representation of a negative infinity value. If None
-                            is set, it uses the default value, ``Inf``.
-        :param dateFormat: sets the string that indicates a date format. Custom date formats
-                           follow the formats at `datetime pattern`_.
-                           This applies to date type. If None is set, it uses the
-                           default value, ``yyyy-MM-dd``.
-        :param timestampFormat: sets the string that indicates a timestamp format.
-                                Custom date formats follow the formats at `datetime pattern`_.
-                                This applies to timestamp type. If None is set, it uses the
-                                default value, ``yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]``.
-        :param maxColumns: defines a hard limit of how many columns a record can have. If None is
-                           set, it uses the default value, ``20480``.
-        :param maxCharsPerColumn: defines the maximum number of characters allowed for any given
-                                  value being read. If None is set, it uses the default value,
-                                  ``-1`` meaning unlimited length.
-        :param maxMalformedLogPerPartition: this parameter is no longer used since Spark 2.2.0.
-                                            If specified, it is ignored.
-        :param mode: allows a mode for dealing with corrupt records during parsing. If None is
-                     set, it uses the default value, ``PERMISSIVE``.
-
-                * ``PERMISSIVE``: when it meets a corrupted record, puts the malformed string \
-                  into a field configured by ``columnNameOfCorruptRecord``, and sets malformed \
-                  fields to ``null``. To keep corrupt records, an user can set a string type \
-                  field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \
-                  schema does not have the field, it drops corrupt records during parsing. \
-                  A record with less/more tokens than schema is not a corrupted record to CSV. \
-                  When it meets a record having fewer tokens than the length of the schema, \
-                  sets ``null`` to extra fields. When the record has more tokens than the \
-                  length of the schema, it drops extra tokens.
-                * ``DROPMALFORMED``: ignores the whole corrupted records.
-                * ``FAILFAST``: throws an exception when it meets corrupted records.
-
-        :param columnNameOfCorruptRecord: allows renaming the new field having malformed string
-                                          created by ``PERMISSIVE`` mode. This overrides
-                                          ``spark.sql.columnNameOfCorruptRecord``. If None is set,
-                                          it uses the value specified in
-                                          ``spark.sql.columnNameOfCorruptRecord``.
-        :param multiLine: parse one record, which may span multiple lines. If None is
-                          set, it uses the default value, ``false``.
-        :param charToEscapeQuoteEscaping: sets a single character used for escaping the escape for
-                                          the quote character. If None is set, the default value is
-                                          escape character when escape and quote characters are
-                                          different, ``\0`` otherwise..
-        :param emptyValue: sets the string representation of an empty value. If None is set, it uses
-                           the default value, empty string.
-        :param locale: sets a locale as language tag in IETF BCP 47 format. If None is set,
-                       it uses the default value, ``en-US``. For instance, ``locale`` is used while
-                       parsing dates and timestamps.
-        :param lineSep: defines the line separator that should be used for parsing. If None is
-                        set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``.
-                        Maximum length is 1 character.
-        :param pathGlobFilter: an optional glob pattern to only include files with paths matching
-                               the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
-                               It does not change the behavior of `partition discovery`_.
-        :param recursiveFileLookup: recursively scan a directory for files. Using this option
-                                    disables `partition discovery`_.
-
-        .. _partition discovery:
-          https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery
-        .. _datetime pattern: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
-
+        Parameters
+        ----------
+        path : str or list
+            string, or list of strings, for input path(s).
+        schema : :class:`pyspark.sql.types.StructType` or str, optional
+            an optional :class:`pyspark.sql.types.StructType` for the input schema
+            or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
+        sep : str, optional
+            sets a separator (one or more characters) for each field and value. If None is
+            set, it uses the default value, ``,``.
+        encoding : str, optional
+            decodes the CSV files by the given encoding type. If None is set,
+            it uses the default value, ``UTF-8``.
+        quote : str, optional sets a single character used for escaping quoted values where the
+            separator can be part of the value. If None is set, it uses the default
+            value, ``"``. If you would like to turn off quotations, you need to set an
+            empty string.
+        escape : str, optional
+            sets a single character used for escaping quotes inside an already
+            quoted value. If None is set, it uses the default value, ``\``.
+        comment : str, optional
+            sets a single character used for skipping lines beginning with this
+            character. By default (None), it is disabled.
+        header : str or bool, optional
+            uses the first line as names of columns. If None is set, it uses the
+            default value, ``false``.
+        inferSchema : str or bool, optional
+            infers the input schema automatically from data. It requires one extra
+            pass over the data. If None is set, it uses the default value, ``false``.
+        enforceSchema : str or bool, optional
+            If it is set to ``true``, the specified or inferred schema will be
+            forcibly applied to datasource files, and headers in CSV files will be
+            ignored. If the option is set to ``false``, the schema will be
+            validated against all headers in CSV files or the first header in RDD
+            if the ``header`` option is set to ``true``. Field names in the schema
+            and column names in CSV headers are checked by their positions
+            taking into account ``spark.sql.caseSensitive``. If None is set,
+            ``true`` is used by default. Though the default value is ``true``,
+            it is recommended to disable the ``enforceSchema`` option
+            to avoid incorrect results.
+        ignoreLeadingWhiteSpace : str or bool, optional
+            a flag indicating whether or not leading whitespaces from
+            values being read should be skipped. If None is set, it
+            uses the default value, ``false``.
+        ignoreTrailingWhiteSpace : str or bool, optional
+            a flag indicating whether or not trailing whitespaces from
+            values being read should be skipped. If None is set, it
+            uses the default value, ``false``.
+        nullValue : str, optional
+            sets the string representation of a null value. If None is set, it uses
+            the default value, empty string. Since 2.0.1, this ``nullValue`` param
+            applies to all supported types including the string type.
+        nanValue : str, optional
+            sets the string representation of a non-number value. If None is set, it
+            uses the default value, ``NaN``.
+        positiveInf : str, optional
+            sets the string representation of a positive infinity value. If None
+            is set, it uses the default value, ``Inf``.
+        negativeInf : str, optional
+            sets the string representation of a negative infinity value. If None
+            is set, it uses the default value, ``Inf``.
+        dateFormat : str, optional
+            sets the string that indicates a date format. Custom date formats
+            follow the formats at
+            `datetime pattern <https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html>`_.  # noqa
+            This applies to date type. If None is set, it uses the
+            default value, ``yyyy-MM-dd``.
+        timestampFormat : str, optional
+            sets the string that indicates a timestamp format.
+            Custom date formats follow the formats at
+            `datetime pattern <https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html>`_.  # noqa
+            This applies to timestamp type. If None is set, it uses the
+            default value, ``yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]``.
+        maxColumns : str or int, optional
+            defines a hard limit of how many columns a record can have. If None is
+            set, it uses the default value, ``20480``.
+        maxCharsPerColumn : str or int, optional
+            defines the maximum number of characters allowed for any given
+            value being read. If None is set, it uses the default value,
+            ``-1`` meaning unlimited length.
+        maxMalformedLogPerPartition : str or int, optional
+            this parameter is no longer used since Spark 2.2.0.
+            If specified, it is ignored.
+        mode : str, optional
+            allows a mode for dealing with corrupt records during parsing. If None is
+            set, it uses the default value, ``PERMISSIVE``.
+
+            * ``PERMISSIVE``: when it meets a corrupted record, puts the malformed string \
+              into a field configured by ``columnNameOfCorruptRecord``, and sets malformed \
+              fields to ``null``. To keep corrupt records, an user can set a string type \
+              field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \
+              schema does not have the field, it drops corrupt records during parsing. \
+              A record with less/more tokens than schema is not a corrupted record to CSV. \
+              When it meets a record having fewer tokens than the length of the schema, \
+              sets ``null`` to extra fields. When the record has more tokens than the \
+              length of the schema, it drops extra tokens.
+            * ``DROPMALFORMED``: ignores the whole corrupted records.
+            * ``FAILFAST``: throws an exception when it meets corrupted records.
+
+        columnNameOfCorruptRecord : str, optional
+            allows renaming the new field having malformed string
+            created by ``PERMISSIVE`` mode. This overrides
+            ``spark.sql.columnNameOfCorruptRecord``. If None is set,
+            it uses the value specified in
+            ``spark.sql.columnNameOfCorruptRecord``.
+        multiLine : str or bool, optional
+            parse one record, which may span multiple lines. If None is
+            set, it uses the default value, ``false``.
+        charToEscapeQuoteEscaping : str, optional
+            sets a single character used for escaping the escape for
+            the quote character. If None is set, the default value is
+            escape character when escape and quote characters are
+            different, ``\0`` otherwise.
+        emptyValue : str, optional
+            sets the string representation of an empty value. If None is set, it uses
+            the default value, empty string.
+        locale : str, optional
+            sets a locale as language tag in IETF BCP 47 format. If None is set,
+            it uses the default value, ``en-US``. For instance, ``locale`` is used while
+            parsing dates and timestamps.
+        lineSep : str, optional
+            defines the line separator that should be used for parsing. If None is
+            set, it covers all ``\\r``, ``\\r\\n`` and ``\\n``.
+            Maximum length is 1 character.
+        pathGlobFilter : str or bool, optional
+            an optional glob pattern to only include files with paths matching
+            the pattern. The syntax follows `org.apache.hadoop.fs.GlobFilter`.
+            It does not change the behavior of
+            `partition discovery <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery>`_.  # noqa
+        recursiveFileLookup : str or bool, optional
+            recursively scan a directory for files. Using this option disables
+            `partition discovery <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery>`_.  # noqa
+
+        .. versionadded:: 2.0.0
+
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
         >>> csv_sdf = spark.readStream.csv(tempfile.mkdtemp(), schema = sdf_schema)
         >>> csv_sdf.isStreaming
         True
@@ -790,9 +940,11 @@ class DataStreamWriter(object):
     Use :attr:`DataFrame.writeStream <pyspark.sql.DataFrame.writeStream>`
     to access this.
 
-    .. note:: Evolving.
+    .. versionadded:: 2.0.0
 
-    .. versionadded:: 2.0
+    Notes
+    -----
+    This API is evolving.
     """
 
     def __init__(self, df):
@@ -804,10 +956,11 @@ def _sq(self, jsq):
         from pyspark.sql.streaming import StreamingQuery
         return StreamingQuery(jsq)
 
-    @since(2.0)
     def outputMode(self, outputMode):
         """Specifies how data of a streaming DataFrame/Dataset is written to a streaming sink.
 
+        .. versionadded:: 2.0.0
+
         Options include:
 
         * `append`: Only the new rows in the streaming DataFrame/Dataset will be written to
@@ -818,8 +971,12 @@ def outputMode(self, outputMode):
            written to the sink every time there are some updates. If the query doesn't contain
            aggregations, it will be equivalent to `append` mode.
 
-       .. note:: Evolving.
+        Notes
+        -----
+        This API is evolving.
 
+        Examples
+        --------
         >>> writer = sdf.writeStream.outputMode('append')
         """
         if not outputMode or type(outputMode) != str or len(outputMode.strip()) == 0:
@@ -827,20 +984,27 @@ def outputMode(self, outputMode):
         self._jwrite = self._jwrite.outputMode(outputMode)
         return self
 
-    @since(2.0)
     def format(self, source):
         """Specifies the underlying output data source.
 
-        .. note:: Evolving.
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        source : str
+            string, name of the data source, which for now can be 'parquet'.
 
-        :param source: string, name of the data source, which for now can be 'parquet'.
+        Notes
+        -----
+        This API is evolving.
 
+        Examples
+        --------
         >>> writer = sdf.writeStream.format('json')
         """
         self._jwrite = self._jwrite.format(source)
         return self
 
-    @since(2.0)
     def option(self, key, value):
         """Adds an output option for the underlying data source.
 
@@ -858,12 +1022,15 @@ def option(self, key, value):
                 ambiguous. If it isn't set, the current value of the SQL config
                 ``spark.sql.session.timeZone`` is used by default.
 
-        .. note:: Evolving.
+        .. versionadded:: 2.0.0
+
+        Notes
+        -----
+        This API is evolving.
         """
         self._jwrite = self._jwrite.option(key, to_str(value))
         return self
 
-    @since(2.0)
     def options(self, **options):
         """Adds output options for the underlying data source.
 
@@ -881,39 +1048,56 @@ def options(self, **options):
                 ambiguous. If it isn't set, the current value of the SQL config
                 ``spark.sql.session.timeZone`` is used by default.
 
-       .. note:: Evolving.
+        .. versionadded:: 2.0.0
+
+        Notes
+        -----
+        This API is evolving.
         """
         for k in options:
             self._jwrite = self._jwrite.option(k, to_str(options[k]))
         return self
 
-    @since(2.0)
     def partitionBy(self, *cols):
         """Partitions the output by the given columns on the file system.
 
         If specified, the output is laid out on the file system similar
         to Hive's partitioning scheme.
 
-        .. note:: Evolving.
+        .. versionadded:: 2.0.0
 
-        :param cols: name of columns
+        Parameters
+        ----------
+        cols : str or list
+            name of columns
 
+        Notes
+        -----
+        This API is evolving.
         """
         if len(cols) == 1 and isinstance(cols[0], (list, tuple)):
             cols = cols[0]
         self._jwrite = self._jwrite.partitionBy(_to_seq(self._spark._sc, cols))
         return self
 
-    @since(2.0)
     def queryName(self, queryName):
         """Specifies the name of the :class:`StreamingQuery` that can be started with
         :func:`start`. This name must be unique among all the currently active queries
         in the associated SparkSession.
 
-        .. note:: Evolving.
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        queryName : str
+            unique name for the query
 
-        :param queryName: unique name for the query
+        Notes
+        -----
+        This API is evolving.
 
+        Examples
+        --------
         >>> writer = sdf.writeStream.queryName('streaming_query')
         """
         if not queryName or type(queryName) != str or len(queryName.strip()) == 0:
@@ -922,22 +1106,32 @@ def queryName(self, queryName):
         return self
 
     @keyword_only
-    @since(2.0)
     def trigger(self, *, processingTime=None, once=None, continuous=None):
         """Set the trigger for the stream query. If this is not set it will run the query as fast
         as possible, which is equivalent to setting the trigger to ``processingTime='0 seconds'``.
 
-        .. note:: Evolving.
-
-        :param processingTime: a processing time interval as a string, e.g. '5 seconds', '1 minute'.
-                               Set a trigger that runs a microbatch query periodically based on the
-                               processing time. Only one trigger can be set.
-        :param once: if set to True, set a trigger that processes only one batch of data in a
-                     streaming query then terminates the query. Only one trigger can be set.
-        :param continuous: a time interval as a string, e.g. '5 seconds', '1 minute'.
-                           Set a trigger that runs a continuous query with a given checkpoint
-                           interval. Only one trigger can be set.
-
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        processingTime : str, optional
+            a processing time interval as a string, e.g. '5 seconds', '1 minute'.
+            Set a trigger that runs a microbatch query periodically based on the
+            processing time. Only one trigger can be set.
+        once : bool, optional
+            if set to True, set a trigger that processes only one batch of data in a
+            streaming query then terminates the query. Only one trigger can be set.
+        continuous : str, optional
+            a time interval as a string, e.g. '5 seconds', '1 minute'.
+            Set a trigger that runs a continuous query with a given checkpoint
+            interval. Only one trigger can be set.
+
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
         >>> # trigger the query for execution every 5 seconds
         >>> writer = sdf.writeStream.trigger(processingTime='5 seconds')
         >>> # trigger the query for just once batch of data
@@ -977,7 +1171,6 @@ def trigger(self, *, processingTime=None, once=None, continuous=None):
         self._jwrite = self._jwrite.trigger(jTrigger)
         return self
 
-    @since(2.4)
     def foreach(self, f):
         """
         Sets the output of the streaming query to be processed using the provided writer ``f``.
@@ -1045,8 +1238,14 @@ def foreach(self, f):
                 returns successfully (irrespective of the return value), except if the Python
                 crashes in the middle.
 
-        .. note:: Evolving.
+        .. versionadded:: 2.4.0
+
+        Notes
+        -----
+        This API is evolving.
 
+        Examples
+        --------
         >>> # Print every row using a function
         >>> def print_row(row):
         ...     print(row)
@@ -1139,7 +1338,6 @@ def func_with_open_process_close(partition_id, iterator):
         self._jwrite.foreach(jForeachWriter)
         return self
 
-    @since(2.4)
     def foreachBatch(self, func):
         """
         Sets the output of the streaming query to be processed using the provided
@@ -1151,8 +1349,14 @@ def foreachBatch(self, func):
         to exactly same for the same batchId (assuming all operations are deterministic in the
         query).
 
-        .. note:: Evolving.
+        .. versionadded:: 2.4.0
 
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
         >>> def func(batch_df, batch_id):
         ...     batch_df.collect()
         ...
@@ -1168,7 +1372,6 @@ def foreachBatch(self, func):
         ensure_callback_server_started(gw)
         return self
 
-    @since(2.0)
     def start(self, path=None, format=None, outputMode=None, partitionBy=None, queryName=None,
               **options):
         """Streams the contents of the :class:`DataFrame` to a data source.
@@ -1177,12 +1380,17 @@ def start(self, path=None, format=None, outputMode=None, partitionBy=None, query
         If ``format`` is not specified, the default data source configured by
         ``spark.sql.sources.default`` will be used.
 
-        .. note:: Evolving.
+        .. versionadded:: 2.0.0
 
-        :param path: the path in a Hadoop supported file system
-        :param format: the format used to save
-        :param outputMode: specifies how data of a streaming DataFrame/Dataset is written to a
-                           streaming sink.
+        Parameters
+        ----------
+        path : str, optional
+            the path in a Hadoop supported file system
+        format : str, optional
+            the format used to save
+        outputMode : str, optional
+            specifies how data of a streaming DataFrame/Dataset is written to a
+            streaming sink.
 
             * `append`: Only the new rows in the streaming DataFrame/Dataset will be written to the
               sink
@@ -1191,11 +1399,20 @@ def start(self, path=None, format=None, outputMode=None, partitionBy=None, query
             * `update`: only the rows that were updated in the streaming DataFrame/Dataset will be
               written to the sink every time there are some updates. If the query doesn't contain
               aggregations, it will be equivalent to `append` mode.
-        :param partitionBy: names of partitioning columns
-        :param queryName: unique name for the query
-        :param options: All other string options. You may want to provide a `checkpointLocation`
-                        for most streams, however it is not required for a `memory` stream.
-
+        partitionBy : str or list, optional
+            names of partitioning columns
+        queryName : str, optional
+            unique name for the query
+        **options : dict
+            All other string options. You may want to provide a `checkpointLocation`
+            for most streams, however it is not required for a `memory` stream.
+
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
         >>> sq = sdf.writeStream.format('memory').queryName('this_query').start()
         >>> sq.isActive
         True
diff --git a/python/pyspark/sql/streaming.pyi b/python/pyspark/sql/streaming.pyi
index 22055b2efc06b..56ce140b826d5 100644
--- a/python/pyspark/sql/streaming.pyi
+++ b/python/pyspark/sql/streaming.pyi
@@ -68,7 +68,7 @@ class DataStreamReader(OptionUtils):
         self,
         path: Optional[str] = ...,
         format: Optional[str] = ...,
-        schema: Optional[StructType] = ...,
+        schema: Optional[Union[StructType, str]] = ...,
         **options: OptionalPrimitiveType
     ) -> DataFrame: ...
     def json(
@@ -92,26 +92,31 @@ class DataStreamReader(OptionUtils):
         locale: Optional[str] = ...,
         dropFieldIfAllNull: Optional[Union[bool, str]] = ...,
         encoding: Optional[str] = ...,
-        recursiveFileLookup: Optional[bool] = ...,
+        pathGlobFilter: Optional[Union[bool, str]] = ...,
+        recursiveFileLookup: Optional[Union[bool, str]] = ...,
+        allowNonNumericNumbers: Optional[Union[bool, str]] = ...,
     ) -> DataFrame: ...
     def orc(
         self,
         path: str,
         mergeSchema: Optional[bool] = ...,
-        recursiveFileLookup: Optional[bool] = ...,
+        pathGlobFilter: Optional[Union[bool, str]] = ...,
+        recursiveFileLookup: Optional[Union[bool, str]] = ...,
     ) -> DataFrame: ...
     def parquet(
         self,
         path: str,
         mergeSchema: Optional[bool] = ...,
-        recursiveFileLookup: Optional[bool] = ...,
+        pathGlobFilter: Optional[Union[bool, str]] = ...,
+        recursiveFileLookup: Optional[Union[bool, str]] = ...,
     ) -> DataFrame: ...
     def text(
         self,
         path: str,
         wholetext: bool = ...,
         lineSep: Optional[str] = ...,
-        recursiveFileLookup: Optional[bool] = ...,
+        pathGlobFilter: Optional[Union[bool, str]] = ...,
+        recursiveFileLookup: Optional[Union[bool, str]] = ...,
     ) -> DataFrame: ...
     def csv(
         self,
@@ -142,6 +147,8 @@ class DataStreamReader(OptionUtils):
         emptyValue: Optional[str] = ...,
         locale: Optional[str] = ...,
         lineSep: Optional[str] = ...,
+        pathGlobFilter: Optional[Union[bool, str]] = ...,
+        recursiveFileLookup: Optional[Union[bool, str]] = ...,
     ) -> DataFrame: ...
 
 class DataStreamWriter:
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 5a89d5ab9a7e5..c0948b6e6e379 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -198,8 +198,12 @@ class DecimalType(FractionalType):
     When creating a DecimalType, the default precision and scale is (10, 0). When inferring
     schema from decimal.Decimal objects, it will be DecimalType(38, 18).
 
-    :param precision: the maximum (i.e. total) number of digits (default: 10)
-    :param scale: the number of digits on right side of dot. (default: 0)
+    Parameters
+    ----------
+    precision : int, optional
+        the maximum (i.e. total) number of digits (default: 10)
+    scale : int, optional
+        the number of digits on right side of dot. (default: 0)
     """
 
     def __init__(self, precision=10, scale=0):
@@ -263,17 +267,22 @@ def simpleString(self):
 class ArrayType(DataType):
     """Array data type.
 
-    :param elementType: :class:`DataType` of each element in the array.
-    :param containsNull: boolean, whether the array can contain null (None) values.
+    Parameters
+    ----------
+    elementType : :class:`DataType`
+        :class:`DataType` of each element in the array.
+    containsNull : bool, optional
+        whether the array can contain null (None) values.
+
+    Examples
+    --------
+    >>> ArrayType(StringType()) == ArrayType(StringType(), True)
+    True
+    >>> ArrayType(StringType(), False) == ArrayType(StringType())
+    False
     """
 
     def __init__(self, elementType, containsNull=True):
-        """
-        >>> ArrayType(StringType()) == ArrayType(StringType(), True)
-        True
-        >>> ArrayType(StringType(), False) == ArrayType(StringType())
-        False
-        """
         assert isinstance(elementType, DataType),\
             "elementType %s should be an instance of %s" % (elementType, DataType)
         self.elementType = elementType
@@ -313,22 +322,30 @@ def fromInternal(self, obj):
 class MapType(DataType):
     """Map data type.
 
-    :param keyType: :class:`DataType` of the keys in the map.
-    :param valueType: :class:`DataType` of the values in the map.
-    :param valueContainsNull: indicates whether values can contain null (None) values.
-
+    Parameters
+    ----------
+    keyType : :class:`DataType`
+        :class:`DataType` of the keys in the map.
+    valueType : :class:`DataType`
+        :class:`DataType` of the values in the map.
+    valueContainsNull : bool, optional
+        indicates whether values can contain null (None) values.
+
+    Notes
+    -----
     Keys in a map data type are not allowed to be null (None).
+
+    Examples
+    --------
+    >>> (MapType(StringType(), IntegerType())
+    ...        == MapType(StringType(), IntegerType(), True))
+    True
+    >>> (MapType(StringType(), IntegerType(), False)
+    ...        == MapType(StringType(), FloatType()))
+    False
     """
 
     def __init__(self, keyType, valueType, valueContainsNull=True):
-        """
-        >>> (MapType(StringType(), IntegerType())
-        ...        == MapType(StringType(), IntegerType(), True))
-        True
-        >>> (MapType(StringType(), IntegerType(), False)
-        ...        == MapType(StringType(), FloatType()))
-        False
-        """
         assert isinstance(keyType, DataType),\
             "keyType %s should be an instance of %s" % (keyType, DataType)
         assert isinstance(valueType, DataType),\
@@ -375,21 +392,28 @@ def fromInternal(self, obj):
 class StructField(DataType):
     """A field in :class:`StructType`.
 
-    :param name: string, name of the field.
-    :param dataType: :class:`DataType` of the field.
-    :param nullable: boolean, whether the field can be null (None) or not.
-    :param metadata: a dict from string to simple type that can be toInternald to JSON automatically
+    Parameters
+    ----------
+    name : str
+        name of the field.
+    dataType : :class:`DataType`
+        :class:`DataType` of the field.
+    nullable : bool
+        whether the field can be null (None) or not.
+    metadata : dict
+        a dict from string to simple type that can be toInternald to JSON automatically
+
+    Examples
+    --------
+    >>> (StructField("f1", StringType(), True)
+    ...      == StructField("f1", StringType(), True))
+    True
+    >>> (StructField("f1", StringType(), True)
+    ...      == StructField("f2", StringType(), True))
+    False
     """
 
     def __init__(self, name, dataType, nullable=True, metadata=None):
-        """
-        >>> (StructField("f1", StringType(), True)
-        ...      == StructField("f1", StringType(), True))
-        True
-        >>> (StructField("f1", StringType(), True)
-        ...      == StructField("f2", StringType(), True))
-        False
-        """
         assert isinstance(dataType, DataType),\
             "dataType %s should be an instance of %s" % (dataType, DataType)
         assert isinstance(name, str), "field name %s should be a string" % (name)
@@ -441,24 +465,25 @@ class StructType(DataType):
     Iterating a :class:`StructType` will iterate over its :class:`StructField`\\s.
     A contained :class:`StructField` can be accessed by its name or position.
 
+    Examples
+    --------
     >>> struct1 = StructType([StructField("f1", StringType(), True)])
     >>> struct1["f1"]
     StructField(f1,StringType,true)
     >>> struct1[0]
     StructField(f1,StringType,true)
+
+    >>> struct1 = StructType([StructField("f1", StringType(), True)])
+    >>> struct2 = StructType([StructField("f1", StringType(), True)])
+    >>> struct1 == struct2
+    True
+    >>> struct1 = StructType([StructField("f1", StringType(), True)])
+    >>> struct2 = StructType([StructField("f1", StringType(), True),
+    ...     StructField("f2", IntegerType(), False)])
+    >>> struct1 == struct2
+    False
     """
     def __init__(self, fields=None):
-        """
-        >>> struct1 = StructType([StructField("f1", StringType(), True)])
-        >>> struct2 = StructType([StructField("f1", StringType(), True)])
-        >>> struct1 == struct2
-        True
-        >>> struct1 = StructType([StructField("f1", StringType(), True)])
-        >>> struct2 = StructType([StructField("f1", StringType(), True),
-        ...     StructField("f2", IntegerType(), False)])
-        >>> struct1 == struct2
-        False
-        """
         if not fields:
             self.fields = []
             self.names = []
@@ -481,6 +506,23 @@ def add(self, field, data_type=None, nullable=True, metadata=None):
                metadata(optional). The data_type parameter may be either a String or a
                DataType object.
 
+        Parameters
+        ----------
+        field : str or :class:`StructField`
+            Either the name of the field or a StructField object
+        data_type : :class:`DataType`, optional
+            If present, the DataType of the StructField to create
+        nullable : bool, optional
+            Whether the field to add should be nullable (default True)
+        metadata : dict, optional
+            Any additional metadata (default None)
+
+        Returns
+        -------
+        :class:`StructType`
+
+        Examples
+        --------
         >>> struct1 = StructType().add("f1", StringType(), True).add("f2", StringType(), True, None)
         >>> struct2 = StructType([StructField("f1", StringType(), True), \\
         ...     StructField("f2", StringType(), True, None)])
@@ -494,12 +536,6 @@ def add(self, field, data_type=None, nullable=True, metadata=None):
         >>> struct2 = StructType([StructField("f1", StringType(), True)])
         >>> struct1 == struct2
         True
-
-        :param field: Either the name of the field or a StructField object
-        :param data_type: If present, the DataType of the StructField to create
-        :param nullable: Whether the field to add should be nullable (default True)
-        :param metadata: Any additional metadata (default None)
-        :return: a new updated StructType
         """
         if isinstance(field, StructField):
             self.fields.append(field)
@@ -563,6 +599,8 @@ def fieldNames(self):
         """
         Returns all field names in a list.
 
+        Examples
+        --------
         >>> struct = StructType([StructField("f1", StringType(), True)])
         >>> struct.fieldNames()
         ['f1']
@@ -745,6 +783,8 @@ def _parse_datatype_string(s):
     for :class:`IntegerType`. Since Spark 2.3, this also supports a schema in a DDL-formatted
     string and case-insensitive strings.
 
+    Examples
+    --------
     >>> _parse_datatype_string("int ")
     IntegerType
     >>> _parse_datatype_string("INT ")
@@ -803,6 +843,9 @@ def from_ddl_datatype(type_str):
 
 def _parse_datatype_json_string(json_string):
     """Parses the given data type JSON string.
+
+    Examples
+    --------
     >>> import pickle
     >>> def check_datatype(datatype):
     ...     pickled = pickle.loads(pickle.dumps(datatype))
@@ -1173,6 +1216,8 @@ def _make_type_verifier(dataType, nullable=True, name=None):
     within the allowed range, e.g. using 128 as ByteType will overflow. Note that, Python float is
     not checked, so it will become infinity when cast to Java float, if it overflows.
 
+    Examples
+    --------
     >>> _make_type_verifier(StructType([]))(None)
     >>> _make_type_verifier(StringType())("")
     >>> _make_type_verifier(LongType())(0)
@@ -1392,10 +1437,13 @@ class Row(tuple):
     It is not allowed to omit a named argument to represent that the value is
     None or missing. This should be explicitly set to None in this case.
 
-    NOTE: As of Spark 3.0.0, Rows created from named arguments no longer have
-    field names sorted alphabetically and will be ordered in the position as
-    entered.
+    .. versionchanged:: 3.0.0
+        Rows created from named arguments no longer have
+        field names sorted alphabetically and will be ordered in the position as
+        entered.
 
+    Examples
+    --------
     >>> row = Row(name="Alice", age=11)
     >>> row
     Row(name='Alice', age=11)
@@ -1447,14 +1495,21 @@ def asDict(self, recursive=False):
         """
         Return as a dict
 
-        :param recursive: turns the nested Rows to dict (default: False).
-
-        .. note:: If a row contains duplicate field names, e.g., the rows of a join
-            between two :class:`DataFrame` that both have the fields of same names,
-            one of the duplicate fields will be selected by ``asDict``. ``__getitem__``
-            will also return one of the duplicate fields, however returned value might
-            be different to ``asDict``.
-
+        Parameters
+        ----------
+        recursive : bool, optional
+            turns the nested Rows to dict (default: False).
+
+        Notes
+        -----
+        If a row contains duplicate field names, e.g., the rows of a join
+        between two :class:`DataFrame` that both have the fields of same names,
+        one of the duplicate fields will be selected by ``asDict``. ``__getitem__``
+        will also return one of the duplicate fields, however returned value might
+        be different to ``asDict``.
+
+        Examples
+        --------
         >>> Row(name="Alice", age=11).asDict() == {'name': 'Alice', 'age': 11}
         True
         >>> row = Row(key=1, value=Row(name='a', age=2))
diff --git a/python/pyspark/sql/udf.py b/python/pyspark/sql/udf.py
index 100481cf12899..c2e02a1c8c3d8 100644
--- a/python/pyspark/sql/udf.py
+++ b/python/pyspark/sql/udf.py
@@ -20,7 +20,7 @@
 import functools
 import sys
 
-from pyspark import SparkContext, since
+from pyspark import SparkContext
 from pyspark.rdd import _prepare_for_python_RDD, PythonEvalType
 from pyspark.sql.column import Column, _to_java_column, _to_seq
 from pyspark.sql.types import StringType, DataType, StructType, _parse_datatype_string
@@ -49,9 +49,11 @@ class UserDefinedFunction(object):
 
     .. versionadded:: 1.3
 
-    .. note:: The constructor of this class is not supposed to be directly called.
-        Use :meth:`pyspark.sql.functions.udf` or :meth:`pyspark.sql.functions.pandas_udf`
-        to create this instance.
+    Notes
+    -----
+    The constructor of this class is not supposed to be directly called.
+    Use :meth:`pyspark.sql.functions.udf` or :meth:`pyspark.sql.functions.pandas_udf`
+    to create this instance.
     """
     def __init__(self, func,
                  returnType=StringType(),
@@ -232,26 +234,39 @@ class UDFRegistration(object):
     def __init__(self, sparkSession):
         self.sparkSession = sparkSession
 
-    @since("1.3.1")
     def register(self, name, f, returnType=None):
         """Register a Python function (including lambda function) or a user-defined function
         as a SQL function.
 
-        :param name: name of the user-defined function in SQL statements.
-        :param f: a Python function, or a user-defined function. The user-defined function can
+        .. versionadded:: 1.3.1
+
+        Parameters
+        ----------
+        name : str,
+            name of the user-defined function in SQL statements.
+        f : function, :meth:`pyspark.sql.functions.udf` or :meth:`pyspark.sql.functions.pandas_udf`
+            a Python function, or a user-defined function. The user-defined function can
             be either row-at-a-time or vectorized. See :meth:`pyspark.sql.functions.udf` and
             :meth:`pyspark.sql.functions.pandas_udf`.
-        :param returnType: the return type of the registered user-defined function. The value can
+        returnType : :class:`pyspark.sql.types.DataType` or str, optional
+            the return type of the registered user-defined function. The value can
             be either a :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
-        :return: a user-defined function.
+            `returnType` can be optionally specified when `f` is a Python function but not
+            when `f` is a user-defined function. Please see the examples below.
+
+        Returns
+        -------
+        function
+            a user-defined function
 
+        Notes
+        -----
         To register a nondeterministic Python function, users need to first build
         a nondeterministic user-defined function for the Python function and then register it
         as a SQL function.
 
-        `returnType` can be optionally specified when `f` is a Python function but not
-        when `f` is a user-defined function. Please see below.
-
+        Examples
+        --------
         1. When `f` is a Python function:
 
             `returnType` defaults to string type and can be optionally specified. The produced
@@ -275,7 +290,7 @@ def register(self, name, f, returnType=None):
             >>> spark.sql("SELECT stringLengthInt('test')").collect()
             [Row(stringLengthInt(test)=4)]
 
-        2. When `f` is a user-defined function:
+        2. When `f` is a user-defined function (from Spark 2.3.0):
 
             Spark uses the return type of the given user-defined function as the return type of
             the registered user-defined function. `returnType` should not be specified.
@@ -315,8 +330,6 @@ def register(self, name, f, returnType=None):
             >>> spark.sql(q).collect()  # doctest: +SKIP
             [Row(sum_udf(v1)=1), Row(sum_udf(v1)=5)]
 
-            .. note:: Registration for a user-defined function (case 2.) was added from
-                Spark 2.3.0.
         """
 
         # This is to check whether the input function is from a user-defined function or
@@ -348,18 +361,26 @@ def register(self, name, f, returnType=None):
         self.sparkSession._jsparkSession.udf().registerPython(name, register_udf._judf)
         return return_udf
 
-    @since(2.3)
     def registerJavaFunction(self, name, javaClassName, returnType=None):
         """Register a Java user-defined function as a SQL function.
 
         In addition to a name and the function itself, the return type can be optionally specified.
         When the return type is not specified we would infer it via reflection.
 
-        :param name: name of the user-defined function
-        :param javaClassName: fully qualified name of java class
-        :param returnType: the return type of the registered Java function. The value can be either
+        .. versionadded:: 2.3.0
+
+        Parameters
+        ----------
+        name : str
+            name of the user-defined function
+        javaClassName : str
+            fully qualified name of java class
+        returnType : :class:`pyspark.sql.types.DataType` or str, optional
+            the return type of the registered Java function. The value can be either
             a :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
 
+        Examples
+        --------
         >>> from pyspark.sql.types import IntegerType
         >>> spark.udf.registerJavaFunction(
         ...     "javaStringLength", "test.org.apache.spark.sql.JavaStringLength", IntegerType())
@@ -387,13 +408,18 @@ def registerJavaFunction(self, name, javaClassName, returnType=None):
             jdt = self.sparkSession._jsparkSession.parseDataType(returnType.json())
         self.sparkSession._jsparkSession.udf().registerJava(name, javaClassName, jdt)
 
-    @since(2.3)
     def registerJavaUDAF(self, name, javaClassName):
         """Register a Java user-defined aggregate function as a SQL function.
 
-        :param name: name of the user-defined aggregate function
-        :param javaClassName: fully qualified name of java class
+        .. versionadded:: 2.3.0
+
+        name : str
+            name of the user-defined aggregate function
+        javaClassName : str
+            fully qualified name of java class
 
+        Examples
+        --------
         >>> spark.udf.registerJavaUDAF("javaUDAF", "test.org.apache.spark.sql.MyDoubleAvg")
         ... # doctest: +SKIP
         >>> df = spark.createDataFrame([(1, "a"),(2, "b"), (3, "a")],["id", "name"])
diff --git a/python/pyspark/sql/utils.py b/python/pyspark/sql/utils.py
index bd76d880055cd..18f8ba29f95a2 100644
--- a/python/pyspark/sql/utils.py
+++ b/python/pyspark/sql/utils.py
@@ -141,9 +141,15 @@ def install_exception_handler():
 def toJArray(gateway, jtype, arr):
     """
     Convert python list to java type array
-    :param gateway: Py4j Gateway
-    :param jtype: java type of element in array
-    :param arr: python type list
+
+    Parameters
+    ----------
+    gateway :
+        Py4j Gateway
+    jtype :
+        java type of element in array
+    arr :
+        python type list
     """
     jarr = gateway.new_array(jtype, len(arr))
     for i in range(0, len(arr)):
diff --git a/python/pyspark/sql/window.py b/python/pyspark/sql/window.py
index 82f74346ba928..753ac6e10b3b3 100644
--- a/python/pyspark/sql/window.py
+++ b/python/pyspark/sql/window.py
@@ -34,19 +34,21 @@ class Window(object):
     """
     Utility functions for defining window in DataFrames.
 
-    For example:
+    .. versionadded:: 1.4
+
+    Notes
+    -----
+    When ordering is not defined, an unbounded window frame (rowFrame,
+    unboundedPreceding, unboundedFollowing) is used by default. When ordering is defined,
+    a growing window frame (rangeFrame, unboundedPreceding, currentRow) is used by default.
 
+    Examples
+    --------
     >>> # ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
     >>> window = Window.orderBy("date").rowsBetween(Window.unboundedPreceding, Window.currentRow)
 
     >>> # PARTITION BY country ORDER BY date RANGE BETWEEN 3 PRECEDING AND 3 FOLLOWING
     >>> window = Window.orderBy("date").partitionBy("country").rangeBetween(-3, 3)
-
-    .. note:: When ordering is not defined, an unbounded window frame (rowFrame,
-         unboundedPreceding, unboundedFollowing) is used by default. When ordering is defined,
-         a growing window frame (rangeFrame, unboundedPreceding, currentRow) is used by default.
-
-    .. versionadded:: 1.4
     """
 
     _JAVA_MIN_LONG = -(1 << 63)  # -9223372036854775808
@@ -81,7 +83,6 @@ def orderBy(*cols):
         return WindowSpec(jspec)
 
     @staticmethod
-    @since(2.1)
     def rowsBetween(start, end):
         """
         Creates a :class:`WindowSpec` with the frame boundaries defined,
@@ -101,6 +102,21 @@ def rowsBetween(start, end):
         offset of -1 and a upper bound offset of +2. The frame for row with index 5 would range from
         index 4 to index 7.
 
+        .. versionadded:: 2.1.0
+
+        Parameters
+        ----------
+        start : int
+            boundary start, inclusive.
+            The frame is unbounded if this is ``Window.unboundedPreceding``, or
+            any value less than or equal to -9223372036854775808.
+        end : int
+            boundary end, inclusive.
+            The frame is unbounded if this is ``Window.unboundedFollowing``, or
+            any value greater than or equal to 9223372036854775807.
+
+        Examples
+        --------
         >>> from pyspark.sql import Window
         >>> from pyspark.sql import functions as func
         >>> from pyspark.sql import SQLContext
@@ -121,12 +137,6 @@ def rowsBetween(start, end):
         |  3|       b|  3|
         +---+--------+---+
 
-        :param start: boundary start, inclusive.
-                      The frame is unbounded if this is ``Window.unboundedPreceding``, or
-                      any value less than or equal to -9223372036854775808.
-        :param end: boundary end, inclusive.
-                    The frame is unbounded if this is ``Window.unboundedFollowing``, or
-                    any value greater than or equal to 9223372036854775807.
         """
         if start <= Window._PRECEDING_THRESHOLD:
             start = Window.unboundedPreceding
@@ -137,7 +147,6 @@ def rowsBetween(start, end):
         return WindowSpec(jspec)
 
     @staticmethod
-    @since(2.1)
     def rangeBetween(start, end):
         """
         Creates a :class:`WindowSpec` with the frame boundaries defined,
@@ -160,6 +169,21 @@ def rangeBetween(start, end):
         unbounded, because no value modification is needed, in this case multiple and non-numeric
         ORDER BY expression are allowed.
 
+        .. versionadded:: 2.1.0
+
+        Parameters
+        ----------
+        start : int
+            boundary start, inclusive.
+            The frame is unbounded if this is ``Window.unboundedPreceding``, or
+            any value less than or equal to max(-sys.maxsize, -9223372036854775808).
+        end : int
+            boundary end, inclusive.
+            The frame is unbounded if this is ``Window.unboundedFollowing``, or
+            any value greater than or equal to min(sys.maxsize, 9223372036854775807).
+
+        Examples
+        --------
         >>> from pyspark.sql import Window
         >>> from pyspark.sql import functions as func
         >>> from pyspark.sql import SQLContext
@@ -180,12 +204,6 @@ def rangeBetween(start, end):
         |  3|       b|  3|
         +---+--------+---+
 
-        :param start: boundary start, inclusive.
-                      The frame is unbounded if this is ``Window.unboundedPreceding``, or
-                      any value less than or equal to max(-sys.maxsize, -9223372036854775808).
-        :param end: boundary end, inclusive.
-                    The frame is unbounded if this is ``Window.unboundedFollowing``, or
-                    any value greater than or equal to min(sys.maxsize, 9223372036854775807).
         """
         if start <= Window._PRECEDING_THRESHOLD:
             start = Window.unboundedPreceding
@@ -203,31 +221,38 @@ class WindowSpec(object):
 
     Use the static methods in :class:`Window` to create a :class:`WindowSpec`.
 
-    .. versionadded:: 1.4
+    .. versionadded:: 1.4.0
     """
 
     def __init__(self, jspec):
         self._jspec = jspec
 
-    @since(1.4)
     def partitionBy(self, *cols):
         """
         Defines the partitioning columns in a :class:`WindowSpec`.
 
-        :param cols: names of columns or expressions
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        cols : str, :class:`Column` or list
+            names of columns or expressions
         """
         return WindowSpec(self._jspec.partitionBy(_to_java_cols(cols)))
 
-    @since(1.4)
     def orderBy(self, *cols):
         """
         Defines the ordering columns in a :class:`WindowSpec`.
 
-        :param cols: names of columns or expressions
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        cols : str, :class:`Column` or list
+            names of columns or expressions
         """
         return WindowSpec(self._jspec.orderBy(_to_java_cols(cols)))
 
-    @since(1.4)
     def rowsBetween(self, start, end):
         """
         Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive).
@@ -240,12 +265,18 @@ def rowsBetween(self, start, end):
         and ``Window.currentRow`` to specify special boundary values, rather than using integral
         values directly.
 
-        :param start: boundary start, inclusive.
-                      The frame is unbounded if this is ``Window.unboundedPreceding``, or
-                      any value less than or equal to max(-sys.maxsize, -9223372036854775808).
-        :param end: boundary end, inclusive.
-                    The frame is unbounded if this is ``Window.unboundedFollowing``, or
-                    any value greater than or equal to min(sys.maxsize, 9223372036854775807).
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        start : int
+            boundary start, inclusive.
+            The frame is unbounded if this is ``Window.unboundedPreceding``, or
+            any value less than or equal to max(-sys.maxsize, -9223372036854775808).
+        end : int
+            boundary end, inclusive.
+            The frame is unbounded if this is ``Window.unboundedFollowing``, or
+            any value greater than or equal to min(sys.maxsize, 9223372036854775807).
         """
         if start <= Window._PRECEDING_THRESHOLD:
             start = Window.unboundedPreceding
@@ -253,7 +284,6 @@ def rowsBetween(self, start, end):
             end = Window.unboundedFollowing
         return WindowSpec(self._jspec.rowsBetween(start, end))
 
-    @since(1.4)
     def rangeBetween(self, start, end):
         """
         Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive).
@@ -266,12 +296,18 @@ def rangeBetween(self, start, end):
         and ``Window.currentRow`` to specify special boundary values, rather than using integral
         values directly.
 
-        :param start: boundary start, inclusive.
-                      The frame is unbounded if this is ``Window.unboundedPreceding``, or
-                      any value less than or equal to max(-sys.maxsize, -9223372036854775808).
-        :param end: boundary end, inclusive.
-                    The frame is unbounded if this is ``Window.unboundedFollowing``, or
-                    any value greater than or equal to min(sys.maxsize, 9223372036854775807).
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        start : int
+            boundary start, inclusive.
+            The frame is unbounded if this is ``Window.unboundedPreceding``, or
+            any value less than or equal to max(-sys.maxsize, -9223372036854775808).
+        end : int
+            boundary end, inclusive.
+            The frame is unbounded if this is ``Window.unboundedFollowing``, or
+            any value greater than or equal to min(sys.maxsize, 9223372036854775807).
         """
         if start <= Window._PRECEDING_THRESHOLD:
             start = Window.unboundedPreceding
diff --git a/python/pyspark/sql/window.pyi b/python/pyspark/sql/window.pyi
index 4e31d57bec4d0..4fdc468df112a 100644
--- a/python/pyspark/sql/window.pyi
+++ b/python/pyspark/sql/window.pyi
@@ -15,6 +15,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+from typing import List, Union
 
 from pyspark.sql._typing import ColumnOrName
 from py4j.java_gateway import JavaObject  # type: ignore[import]
@@ -24,9 +25,9 @@ class Window:
     unboundedFollowing: int
     currentRow: int
     @staticmethod
-    def partitionBy(*cols: ColumnOrName) -> WindowSpec: ...
+    def partitionBy(*cols: Union[ColumnOrName, List[ColumnOrName]]) -> WindowSpec: ...
     @staticmethod
-    def orderBy(*cols: ColumnOrName) -> WindowSpec: ...
+    def orderBy(*cols: Union[ColumnOrName, List[ColumnOrName]]) -> WindowSpec: ...
     @staticmethod
     def rowsBetween(start: int, end: int) -> WindowSpec: ...
     @staticmethod
@@ -34,7 +35,7 @@ class Window:
 
 class WindowSpec:
     def __init__(self, jspec: JavaObject) -> None: ...
-    def partitionBy(self, *cols: ColumnOrName) -> WindowSpec: ...
-    def orderBy(self, *cols: ColumnOrName) -> WindowSpec: ...
+    def partitionBy(self, *cols: Union[ColumnOrName, List[ColumnOrName]]) -> WindowSpec: ...
+    def orderBy(self, *cols: Union[ColumnOrName, List[ColumnOrName]]) -> WindowSpec: ...
     def rowsBetween(self, start: int, end: int) -> WindowSpec: ...
     def rangeBetween(self, start: int, end: int) -> WindowSpec: ...

From 27d81369342c19bae558329ddd0e2542554433f9 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Mon, 2 Nov 2020 22:23:26 -0800
Subject: [PATCH 0378/1009] [SPARK-33324][K8S][BUILD] Upgrade kubernetes-client
 to 4.11.1

### What changes were proposed in this pull request?

This PR aims to upgrade `Kubernetes-client` from 4.10.3 to 4.11.1.

### Why are the changes needed?

This upgrades the dependency for Apache Spark 3.1.0.
Since 4.12.0 is still new and has a breaking API changes, this PR chooses the latest compatible one.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the all CIs including K8s IT.

Closes #30233 from dongjoon-hyun/SPARK-33324.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/deps/spark-deps-hadoop-2.7-hive-2.3       | 44 +++++++++----------
 dev/deps/spark-deps-hadoop-3.2-hive-2.3       | 44 +++++++++----------
 resource-managers/kubernetes/core/pom.xml     |  2 +-
 .../kubernetes/integration-tests/pom.xml      |  2 +-
 4 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index b0b215a316df2..1cd4ee94997f8 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -108,7 +108,7 @@ jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar
 jackson-core/2.10.0//jackson-core-2.10.0.jar
 jackson-databind/2.10.0//jackson-databind-2.10.0.jar
 jackson-dataformat-yaml/2.10.0//jackson-dataformat-yaml-2.10.0.jar
-jackson-datatype-jsr310/2.10.3//jackson-datatype-jsr310-2.10.3.jar
+jackson-datatype-jsr310/2.11.2//jackson-datatype-jsr310-2.11.2.jar
 jackson-jaxrs/1.9.13//jackson-jaxrs-1.9.13.jar
 jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar
 jackson-module-jaxb-annotations/2.10.0//jackson-module-jaxb-annotations-2.10.0.jar
@@ -155,26 +155,26 @@ jsr305/3.0.0//jsr305-3.0.0.jar
 jta/1.1//jta-1.1.jar
 jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar
 kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
-kubernetes-client/4.10.3//kubernetes-client-4.10.3.jar
-kubernetes-model-admissionregistration/4.10.3//kubernetes-model-admissionregistration-4.10.3.jar
-kubernetes-model-apiextensions/4.10.3//kubernetes-model-apiextensions-4.10.3.jar
-kubernetes-model-apps/4.10.3//kubernetes-model-apps-4.10.3.jar
-kubernetes-model-autoscaling/4.10.3//kubernetes-model-autoscaling-4.10.3.jar
-kubernetes-model-batch/4.10.3//kubernetes-model-batch-4.10.3.jar
-kubernetes-model-certificates/4.10.3//kubernetes-model-certificates-4.10.3.jar
-kubernetes-model-common/4.10.3//kubernetes-model-common-4.10.3.jar
-kubernetes-model-coordination/4.10.3//kubernetes-model-coordination-4.10.3.jar
-kubernetes-model-core/4.10.3//kubernetes-model-core-4.10.3.jar
-kubernetes-model-discovery/4.10.3//kubernetes-model-discovery-4.10.3.jar
-kubernetes-model-events/4.10.3//kubernetes-model-events-4.10.3.jar
-kubernetes-model-extensions/4.10.3//kubernetes-model-extensions-4.10.3.jar
-kubernetes-model-metrics/4.10.3//kubernetes-model-metrics-4.10.3.jar
-kubernetes-model-networking/4.10.3//kubernetes-model-networking-4.10.3.jar
-kubernetes-model-policy/4.10.3//kubernetes-model-policy-4.10.3.jar
-kubernetes-model-rbac/4.10.3//kubernetes-model-rbac-4.10.3.jar
-kubernetes-model-scheduling/4.10.3//kubernetes-model-scheduling-4.10.3.jar
-kubernetes-model-settings/4.10.3//kubernetes-model-settings-4.10.3.jar
-kubernetes-model-storageclass/4.10.3//kubernetes-model-storageclass-4.10.3.jar
+kubernetes-client/4.11.1//kubernetes-client-4.11.1.jar
+kubernetes-model-admissionregistration/4.11.1//kubernetes-model-admissionregistration-4.11.1.jar
+kubernetes-model-apiextensions/4.11.1//kubernetes-model-apiextensions-4.11.1.jar
+kubernetes-model-apps/4.11.1//kubernetes-model-apps-4.11.1.jar
+kubernetes-model-autoscaling/4.11.1//kubernetes-model-autoscaling-4.11.1.jar
+kubernetes-model-batch/4.11.1//kubernetes-model-batch-4.11.1.jar
+kubernetes-model-certificates/4.11.1//kubernetes-model-certificates-4.11.1.jar
+kubernetes-model-common/4.11.1//kubernetes-model-common-4.11.1.jar
+kubernetes-model-coordination/4.11.1//kubernetes-model-coordination-4.11.1.jar
+kubernetes-model-core/4.11.1//kubernetes-model-core-4.11.1.jar
+kubernetes-model-discovery/4.11.1//kubernetes-model-discovery-4.11.1.jar
+kubernetes-model-events/4.11.1//kubernetes-model-events-4.11.1.jar
+kubernetes-model-extensions/4.11.1//kubernetes-model-extensions-4.11.1.jar
+kubernetes-model-metrics/4.11.1//kubernetes-model-metrics-4.11.1.jar
+kubernetes-model-networking/4.11.1//kubernetes-model-networking-4.11.1.jar
+kubernetes-model-policy/4.11.1//kubernetes-model-policy-4.11.1.jar
+kubernetes-model-rbac/4.11.1//kubernetes-model-rbac-4.11.1.jar
+kubernetes-model-scheduling/4.11.1//kubernetes-model-scheduling-4.11.1.jar
+kubernetes-model-settings/4.11.1//kubernetes-model-settings-4.11.1.jar
+kubernetes-model-storageclass/4.11.1//kubernetes-model-storageclass-4.11.1.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
 libthrift/0.12.0//libthrift-0.12.0.jar
@@ -195,7 +195,7 @@ objenesis/2.6//objenesis-2.6.jar
 okhttp/3.12.12//okhttp-3.12.12.jar
 okio/1.14.0//okio-1.14.0.jar
 opencsv/2.3//opencsv-2.3.jar
-openshift-model/4.10.3//openshift-model-4.10.3.jar
+openshift-model/4.11.1//openshift-model-4.11.1.jar
 orc-core/1.5.12//orc-core-1.5.12.jar
 orc-mapreduce/1.5.12//orc-mapreduce-1.5.12.jar
 orc-shims/1.5.12//orc-shims-1.5.12.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index b64c7989a4e02..198e939820fcd 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -85,7 +85,7 @@ jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar
 jackson-core/2.10.0//jackson-core-2.10.0.jar
 jackson-databind/2.10.0//jackson-databind-2.10.0.jar
 jackson-dataformat-yaml/2.10.0//jackson-dataformat-yaml-2.10.0.jar
-jackson-datatype-jsr310/2.10.3//jackson-datatype-jsr310-2.10.3.jar
+jackson-datatype-jsr310/2.11.2//jackson-datatype-jsr310-2.11.2.jar
 jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar
 jackson-module-jaxb-annotations/2.10.0//jackson-module-jaxb-annotations-2.10.0.jar
 jackson-module-paranamer/2.10.0//jackson-module-paranamer-2.10.0.jar
@@ -125,26 +125,26 @@ jsr305/3.0.0//jsr305-3.0.0.jar
 jta/1.1//jta-1.1.jar
 jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar
 kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
-kubernetes-client/4.10.3//kubernetes-client-4.10.3.jar
-kubernetes-model-admissionregistration/4.10.3//kubernetes-model-admissionregistration-4.10.3.jar
-kubernetes-model-apiextensions/4.10.3//kubernetes-model-apiextensions-4.10.3.jar
-kubernetes-model-apps/4.10.3//kubernetes-model-apps-4.10.3.jar
-kubernetes-model-autoscaling/4.10.3//kubernetes-model-autoscaling-4.10.3.jar
-kubernetes-model-batch/4.10.3//kubernetes-model-batch-4.10.3.jar
-kubernetes-model-certificates/4.10.3//kubernetes-model-certificates-4.10.3.jar
-kubernetes-model-common/4.10.3//kubernetes-model-common-4.10.3.jar
-kubernetes-model-coordination/4.10.3//kubernetes-model-coordination-4.10.3.jar
-kubernetes-model-core/4.10.3//kubernetes-model-core-4.10.3.jar
-kubernetes-model-discovery/4.10.3//kubernetes-model-discovery-4.10.3.jar
-kubernetes-model-events/4.10.3//kubernetes-model-events-4.10.3.jar
-kubernetes-model-extensions/4.10.3//kubernetes-model-extensions-4.10.3.jar
-kubernetes-model-metrics/4.10.3//kubernetes-model-metrics-4.10.3.jar
-kubernetes-model-networking/4.10.3//kubernetes-model-networking-4.10.3.jar
-kubernetes-model-policy/4.10.3//kubernetes-model-policy-4.10.3.jar
-kubernetes-model-rbac/4.10.3//kubernetes-model-rbac-4.10.3.jar
-kubernetes-model-scheduling/4.10.3//kubernetes-model-scheduling-4.10.3.jar
-kubernetes-model-settings/4.10.3//kubernetes-model-settings-4.10.3.jar
-kubernetes-model-storageclass/4.10.3//kubernetes-model-storageclass-4.10.3.jar
+kubernetes-client/4.11.1//kubernetes-client-4.11.1.jar
+kubernetes-model-admissionregistration/4.11.1//kubernetes-model-admissionregistration-4.11.1.jar
+kubernetes-model-apiextensions/4.11.1//kubernetes-model-apiextensions-4.11.1.jar
+kubernetes-model-apps/4.11.1//kubernetes-model-apps-4.11.1.jar
+kubernetes-model-autoscaling/4.11.1//kubernetes-model-autoscaling-4.11.1.jar
+kubernetes-model-batch/4.11.1//kubernetes-model-batch-4.11.1.jar
+kubernetes-model-certificates/4.11.1//kubernetes-model-certificates-4.11.1.jar
+kubernetes-model-common/4.11.1//kubernetes-model-common-4.11.1.jar
+kubernetes-model-coordination/4.11.1//kubernetes-model-coordination-4.11.1.jar
+kubernetes-model-core/4.11.1//kubernetes-model-core-4.11.1.jar
+kubernetes-model-discovery/4.11.1//kubernetes-model-discovery-4.11.1.jar
+kubernetes-model-events/4.11.1//kubernetes-model-events-4.11.1.jar
+kubernetes-model-extensions/4.11.1//kubernetes-model-extensions-4.11.1.jar
+kubernetes-model-metrics/4.11.1//kubernetes-model-metrics-4.11.1.jar
+kubernetes-model-networking/4.11.1//kubernetes-model-networking-4.11.1.jar
+kubernetes-model-policy/4.11.1//kubernetes-model-policy-4.11.1.jar
+kubernetes-model-rbac/4.11.1//kubernetes-model-rbac-4.11.1.jar
+kubernetes-model-scheduling/4.11.1//kubernetes-model-scheduling-4.11.1.jar
+kubernetes-model-settings/4.11.1//kubernetes-model-settings-4.11.1.jar
+kubernetes-model-storageclass/4.11.1//kubernetes-model-storageclass-4.11.1.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
 libthrift/0.12.0//libthrift-0.12.0.jar
@@ -165,7 +165,7 @@ objenesis/2.6//objenesis-2.6.jar
 okhttp/3.12.12//okhttp-3.12.12.jar
 okio/1.14.0//okio-1.14.0.jar
 opencsv/2.3//opencsv-2.3.jar
-openshift-model/4.10.3//openshift-model-4.10.3.jar
+openshift-model/4.11.1//openshift-model-4.11.1.jar
 orc-core/1.5.12//orc-core-1.5.12.jar
 orc-mapreduce/1.5.12//orc-mapreduce-1.5.12.jar
 orc-shims/1.5.12//orc-shims-1.5.12.jar
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index a4c80f551cdfc..9ae48f4da8b05 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -30,7 +30,7 @@
   <properties>
     <sbt.project.name>kubernetes</sbt.project.name>
     <!-- Note: Please update the kubernetes client version in kubernetes/integration-tests/pom.xml -->
-    <kubernetes.client.version>4.10.3</kubernetes.client.version>
+    <kubernetes.client.version>4.11.1</kubernetes.client.version>
   </properties>
 
   <dependencies>
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 952081030f5f3..5274c0579eb05 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -28,7 +28,7 @@
   <properties>
     <download-maven-plugin.version>1.3.0</download-maven-plugin.version>
     <extraScalaTestArgs></extraScalaTestArgs>
-    <kubernetes-client.version>4.10.3</kubernetes-client.version>
+    <kubernetes-client.version>4.11.1</kubernetes-client.version>
     <sbt.project.name>kubernetes-integration-tests</sbt.project.name>
 
     <!-- Integration Test Configuration Properties -->

From 4c8ee8856cb9714d433456fb0ce44dfebb00d83f Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Tue, 3 Nov 2020 22:50:59 +0900
Subject: [PATCH 0379/1009] [SPARK-33257][PYTHON][SQL] Support Column inputs in
 PySpark ordering functions (asc*, desc*)

### What changes were proposed in this pull request?

This PR adds support for passing `Column`s as input to PySpark sorting functions.

### Why are the changes needed?

According to SPARK-26979, PySpark functions should support both Column and str arguments, when possible.

### Does this PR introduce _any_ user-facing change?

PySpark users can now provide both `Column` and `str` as an argument for `asc*` and `desc*` functions.

### How was this patch tested?

New unit tests.

Closes #30227 from zero323/SPARK-33257.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/sql/functions.py            | 30 +++++++++++++++----
 python/pyspark/sql/functions.pyi           | 12 ++++----
 python/pyspark/sql/tests/test_functions.py | 35 ++++++++++++++++++++++
 3 files changed, 65 insertions(+), 12 deletions(-)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 87b999dca76ec..86a88a5bf341e 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -119,7 +119,10 @@ def asc(col):
     """
     Returns a sort expression based on the ascending order of the given column name.
     """
-    return _invoke_function("asc", col)
+    return (
+        col.asc() if isinstance(col, Column)
+        else _invoke_function("asc", col)
+    )
 
 
 @since(1.3)
@@ -127,7 +130,10 @@ def desc(col):
     """
     Returns a sort expression based on the descending order of the given column name.
     """
-    return _invoke_function("desc", col)
+    return (
+        col.desc() if isinstance(col, Column)
+        else _invoke_function("desc", col)
+    )
 
 
 @since(1.3)
@@ -457,7 +463,10 @@ def asc_nulls_first(col):
     Returns a sort expression based on the ascending order of the given
     column name, and null values return before non-null values.
     """
-    return _invoke_function("asc_nulls_first", col)
+    return (
+        col.asc_nulls_first() if isinstance(col, Column)
+        else _invoke_function("asc_nulls_first", col)
+    )
 
 
 @since(2.4)
@@ -466,7 +475,10 @@ def asc_nulls_last(col):
     Returns a sort expression based on the ascending order of the given
     column name, and null values appear after non-null values.
     """
-    return _invoke_function("asc_nulls_last", col)
+    return (
+        col.asc_nulls_last() if isinstance(col, Column)
+        else _invoke_function("asc_nulls_last", col)
+    )
 
 
 @since(2.4)
@@ -475,7 +487,10 @@ def desc_nulls_first(col):
     Returns a sort expression based on the descending order of the given
     column name, and null values appear before non-null values.
     """
-    return _invoke_function("desc_nulls_first", col)
+    return (
+        col.desc_nulls_first() if isinstance(col, Column)
+        else _invoke_function("desc_nulls_first", col)
+    )
 
 
 @since(2.4)
@@ -484,7 +499,10 @@ def desc_nulls_last(col):
     Returns a sort expression based on the descending order of the given
     column name, and null values appear after non-null values.
     """
-    return _invoke_function("desc_nulls_last", col)
+    return (
+        col.desc_nulls_last() if isinstance(col, Column)
+        else _invoke_function("desc_nulls_last", col)
+    )
 
 
 @since(1.6)
diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi
index e395f5797bebd..281c1d75436c6 100644
--- a/python/pyspark/sql/functions.pyi
+++ b/python/pyspark/sql/functions.pyi
@@ -258,9 +258,9 @@ def map_zip_with(
 ) -> Column: ...
 def abs(col: ColumnOrName) -> Column: ...
 def acos(col: ColumnOrName) -> Column: ...
-def asc(col: str) -> Column: ...
-def asc_nulls_first(col: str) -> Column: ...
-def asc_nulls_last(col: str) -> Column: ...
+def asc(col: ColumnOrName) -> Column: ...
+def asc_nulls_first(col: ColumnOrName) -> Column: ...
+def asc_nulls_last(col: ColumnOrName) -> Column: ...
 def ascii(col: ColumnOrName) -> Column: ...
 def asin(col: ColumnOrName) -> Column: ...
 def atan(col: ColumnOrName) -> Column: ...
@@ -285,9 +285,9 @@ def count(col: ColumnOrName) -> Column: ...
 def cume_dist() -> Column: ...
 def degrees(col: ColumnOrName) -> Column: ...
 def dense_rank() -> Column: ...
-def desc(col: str) -> Column: ...
-def desc_nulls_first(col: str) -> Column: ...
-def desc_nulls_last(col: str) -> Column: ...
+def desc(col: ColumnOrName) -> Column: ...
+def desc_nulls_first(col: ColumnOrName) -> Column: ...
+def desc_nulls_last(col: ColumnOrName) -> Column: ...
 def exp(col: ColumnOrName) -> Column: ...
 def expm1(col: ColumnOrName) -> Column: ...
 def floor(col: ColumnOrName) -> Column: ...
diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py
index cc77b8d5dfe3e..32549343d938f 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -265,6 +265,41 @@ def test_approxQuantile(self):
         self.assertRaises(ValueError, lambda: df.stat.approxQuantile(("a", 123), [0.1, 0.9], 0.1))
         self.assertRaises(ValueError, lambda: df.stat.approxQuantile(["a", 123], [0.1, 0.9], 0.1))
 
+    def test_sorting_functions_with_column(self):
+        from pyspark.sql import functions
+        from pyspark.sql.column import Column
+
+        funs = [
+            functions.asc_nulls_first, functions.asc_nulls_last,
+            functions.desc_nulls_first, functions.desc_nulls_last
+        ]
+        exprs = [col("x"), "x"]
+
+        for fun in funs:
+            for expr in exprs:
+                res = fun(expr)
+                self.assertIsInstance(res, Column)
+                self.assertIn(
+                    f"""'x {fun.__name__.replace("_", " ").upper()}'""",
+                    str(res)
+                )
+
+        for expr in exprs:
+            res = functions.asc(expr)
+            self.assertIsInstance(res, Column)
+            self.assertIn(
+                """'x ASC NULLS FIRST'""",
+                str(res)
+            )
+
+        for expr in exprs:
+            res = functions.desc(expr)
+            self.assertIsInstance(res, Column)
+            self.assertIn(
+                """'x DESC NULLS LAST'""",
+                str(res)
+            )
+
     def test_sort_with_nulls_order(self):
         from pyspark.sql import functions
 

From 56c623e98c54fdb4d47c9264ae1b282ecb2b7291 Mon Sep 17 00:00:00 2001
From: neko <echohlne@gmail.com>
Date: Tue, 3 Nov 2020 08:49:52 -0600
Subject: [PATCH 0380/1009] [SPARK-33284][WEB-UI] In the Storage UI page,
 clicking any field to sort the table will cause the header content to be lost

### What changes were proposed in this pull request?
In the old version of spark in the storage UI page, the sorting function is normal, but sorting in the new version will cause the header content to be lost, So I try to fix the bug.

### Why are the changes needed?

The header field of the table on the page is similar to the following, **note that each th contains the span attribute**:

```html
<thead>
    <tr>
        ....
        <th width="" class="">
              <span data-toggle="tooltip" title="" data-original-title="StorageLevel displays where the persisted RDD is stored, format of the persisted RDD (serialized or de-serialized) andreplication factor of the persisted RDD">
                Storage Level
              </span>
        </th>
       .....
    </tr>
</thead>
```

Since  [PR#26136](https://github.com/apache/spark/pull/26136), if the `th` in the table itself contains the `span` attribute, the `span` will be deleted directly after clicking the sort, and the original header content will be lost.

There are three problems  in `sorttable.js`:

1. `sortrevind.class = "sorttable_sortrevind"` in  [sorttab.js#107](https://github.com/apache/spark/blob/9d5e48ea95d1c3017a51ff69584f32a18901b2b5/core/src/main/resources/org/apache/spark/ui/static/sorttable.js#L107) and  `sortfwdind.class = "sorttable_sortfwdind"` in  [sorttab.js#125](https://github.com/apache/spark/blob/9d5e48ea95d1c3017a51ff69584f32a18901b2b5/core/src/main/resources/org/apache/spark/ui/static/sorttable.js#L125)
sorttable_xx attribute should be assigned to`className` instead of `class`, as javascript uses `rowlists[j].className.search` rather than `rowlists[j].class.search` to determine whether the component has a sorting flag or not.
2.  `rowlists[j].className.search(/\sorttable_sortrevind\b/)` in  [sorttab.js#120](https://github.com/apache/spark/blob/9d5e48ea95d1c3017a51ff69584f32a18901b2b5/core/src/main/resources/org/apache/spark/ui/static/sorttable.js#L120) was wrong. The original intention is to search whether `className` contains  the word `sorttable_sortrevind` , but the expression is wrong,  it should be `\bsorttable_sortrevind\b` instead of `\sorttable_sortrevind\b`
3. The if check statement in the following code snippet ([sorttab.js#141](https://github.com/apache/spark/blob/9d5e48ea95d1c3017a51ff69584f32a18901b2b5/core/src/main/resources/org/apache/spark/ui/static/sorttable.js#L141)) was wrong. **If the `search` function does not find the target, it will return -1, but Boolean(-1) is actually equals true**. This statement will cause span to be deleted even if it does not contain `sorttable_sortfwdind` or `sorttable_sortrevind`.
```javascript
rowlists = this.parentNode.getElementsByTagName("span");
for (var j=0; j < rowlists.length; j++) {
              if (rowlists[j].className.search(/\bsorttable_sortfwdind\b/)
                  || rowlists[j].className.search(/\sorttable_sortrevind\b/) ) {
                  rowlists[j].parentNode.removeChild(rowlists[j]);
              }
          }
```

### Does this PR introduce _any_ user-facing change?
NO.

### How was this patch tested?
The manual test result of the ui page is as below:

![fix sorted](https://user-images.githubusercontent.com/52202080/97543194-daeaa680-1a02-11eb-8b11-8109c3e4e9a3.gif)

Closes #30182 from akiyamaneko/ui_storage_sort_error.

Authored-by: neko <echohlne@gmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../org/apache/spark/ui/static/sorttable.js        | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/sorttable.js b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js
index ecd580e5c64aa..3f98a0379dc3c 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/sorttable.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js
@@ -99,12 +99,12 @@ sorttable = {
                                                     'sorttable_sorted_reverse');
             rowlists = this.parentNode.getElementsByTagName("span");
             for (var j=0; j < rowlists.length; j++) {
-                if (rowlists[j].className.search(/\bsorttable_sortfwdind\b/)) {
+                if (rowlists[j].className.search(/\bsorttable_sortfwdind\b/) != -1) {
                     rowlists[j].parentNode.removeChild(rowlists[j]);
                 }
             }
             sortrevind = document.createElement('span');
-            sortrevind.class = "sorttable_sortrevind";
+            sortrevind.className = "sorttable_sortrevind";
             sortrevind.innerHTML = stIsIE ? '&nbsp<font face="webdings">5</font>' : '&nbsp;&#x25BE;';
             this.appendChild(sortrevind);
             return;
@@ -117,12 +117,12 @@ sorttable = {
                                                   'sorttable_sorted');
             rowlists = this.parentNode.getElementsByTagName("span");
             for (var j=0; j < rowlists.length; j++) {
-                if (rowlists[j].className.search(/\sorttable_sortrevind\b/)) {
+                if (rowlists[j].className.search(/\bsorttable_sortrevind\b/) != -1) {
                     rowlists[j].parentNode.removeChild(rowlists[j]);
                 }
             }
             sortfwdind = document.createElement('span');
-            sortfwdind.class = "sorttable_sortfwdind";
+            sortfwdind.className = "sorttable_sortfwdind";
             sortfwdind.innerHTML = stIsIE ? '&nbsp<font face="webdings">6</font>' : '&nbsp;&#x25B4;';
             this.appendChild(sortfwdind);
             return;
@@ -138,15 +138,15 @@ sorttable = {
           });
           rowlists = this.parentNode.getElementsByTagName("span");
           for (var j=0; j < rowlists.length; j++) {
-              if (rowlists[j].className.search(/\bsorttable_sortfwdind\b/)
-                  || rowlists[j].className.search(/\sorttable_sortrevind\b/) ) {
+              if (rowlists[j].className.search(/\bsorttable_sortfwdind\b/) != -1
+                  || rowlists[j].className.search(/\bsorttable_sortrevind\b/) != -1) {
                   rowlists[j].parentNode.removeChild(rowlists[j]);
               }
           }
 
           this.className += ' sorttable_sorted';
           sortfwdind = document.createElement('span');
-          sortfwdind.class = "sorttable_sortfwdind";
+          sortfwdind.className = "sorttable_sortfwdind";
           sortfwdind.innerHTML = stIsIE ? '&nbsp<font face="webdings">6</font>' : '&nbsp;&#x25B4;';
           this.appendChild(sortfwdind);
 

From d900c6ff49ed898163f562d1211743decb75c601 Mon Sep 17 00:00:00 2001
From: Chao Sun <sunchao@apple.com>
Date: Tue, 3 Nov 2020 14:53:01 -0800
Subject: [PATCH 0381/1009] [SPARK-33293][SQL][FOLLOW-UP] Rename TableWriteExec
 to TableWriteExecHelper

### What changes were proposed in this pull request?

Rename `TableWriteExec` in `WriteToDataSourceV2Exec.scala` to `TableWriteExecHelper`.

### Why are the changes needed?

See [discussion](https://github.com/apache/spark/pull/30193#discussion_r516412653). The former is too general.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

N/A

Closes #30235 from sunchao/SPARK-33293-2.

Authored-by: Chao Sun <sunchao@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../datasources/v2/WriteToDataSourceV2Exec.scala       | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
index efa2c31e07602..1421a9315c3a8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
@@ -66,7 +66,7 @@ case class CreateTableAsSelectExec(
     query: SparkPlan,
     properties: Map[String, String],
     writeOptions: CaseInsensitiveStringMap,
-    ifNotExists: Boolean) extends TableWriteExec {
+    ifNotExists: Boolean) extends TableWriteExecHelper {
 
   override protected def run(): Seq[InternalRow] = {
     if (catalog.tableExists(ident)) {
@@ -100,7 +100,7 @@ case class AtomicCreateTableAsSelectExec(
     query: SparkPlan,
     properties: Map[String, String],
     writeOptions: CaseInsensitiveStringMap,
-    ifNotExists: Boolean) extends TableWriteExec {
+    ifNotExists: Boolean) extends TableWriteExecHelper {
 
   override protected def run(): Seq[InternalRow] = {
     if (catalog.tableExists(ident)) {
@@ -134,7 +134,7 @@ case class ReplaceTableAsSelectExec(
     query: SparkPlan,
     properties: Map[String, String],
     writeOptions: CaseInsensitiveStringMap,
-    orCreate: Boolean) extends TableWriteExec {
+    orCreate: Boolean) extends TableWriteExecHelper {
 
   override protected def run(): Seq[InternalRow] = {
     // Note that this operation is potentially unsafe, but these are the strict semantics of
@@ -176,7 +176,7 @@ case class AtomicReplaceTableAsSelectExec(
     query: SparkPlan,
     properties: Map[String, String],
     writeOptions: CaseInsensitiveStringMap,
-    orCreate: Boolean) extends TableWriteExec {
+    orCreate: Boolean) extends TableWriteExecHelper {
 
   override protected def run(): Seq[InternalRow] = {
     val schema = query.schema.asNullable
@@ -432,7 +432,7 @@ object DataWritingSparkTask extends Logging {
   }
 }
 
-private[v2] trait TableWriteExec extends V2TableWriteExec with SupportsV1Write {
+private[v2] trait TableWriteExecHelper extends V2TableWriteExec with SupportsV1Write {
   import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
 
   protected def writeToTable(

From 034070a23aa8bcecc351bb2fec413e1662dcbb75 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 4 Nov 2020 12:30:38 +0800
Subject: [PATCH 0382/1009] Revert "[SPARK-33248][SQL] Add a configuration to
 control the legacy behavior of whether need to pad null value when value size
 less then schema size"

This reverts commit 0c943cd2fbc6f2d25588991613abf469ace0153e.
---
 docs/sql-migration-guide.md                       |  2 --
 .../org/apache/spark/sql/internal/SQLConf.scala   | 15 ---------------
 .../execution/BaseScriptTransformationExec.scala  | 10 ++--------
 3 files changed, 2 insertions(+), 25 deletions(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 319e72172d597..fdc764a93424b 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -51,8 +51,6 @@ license: |
   - In Spark 3.1, loading and saving of timestamps from/to parquet files fails if the timestamps are before 1900-01-01 00:00:00Z, and loaded (saved) as the INT96 type. In Spark 3.0, the actions don't fail but might lead to shifting of the input timestamps due to rebasing from/to Julian to/from Proleptic Gregorian calendar. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.parquet.int96RebaseModeInRead` or/and `spark.sql.legacy.parquet.int96RebaseModeInWrite` to `LEGACY`.
   
   - In Spark 3.1, the `schema_of_json` and `schema_of_csv` functions return the schema in the SQL format in which field names are quoted. In Spark 3.0, the function returns a catalog string without field quoting and in lower case. 
- 
-  - In Spark 3.1, when `spark.sql.legacy.transformationPadNullWhenValueLessThenSchema` is true, Spark will pad NULL value when script transformation's output value size less then schema size in default-serde mode(script transformation with row format of `ROW FORMAT DELIMITED`). If false, Spark will keep original behavior to throw `ArrayIndexOutOfBoundsException`.
 
 ## Upgrading from Spark SQL 3.0 to 3.0.1
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 8825f4f96378d..21357a492e39e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2765,18 +2765,6 @@ object SQLConf {
       .checkValue(_ > 0, "The timeout value must be positive")
       .createWithDefault(10L)
 
-  val LEGACY_SCRIPT_TRANSFORM_PAD_NULL =
-    buildConf("spark.sql.legacy.transformationPadNullWhenValueLessThenSchema")
-      .internal()
-      .doc("Whether pad null value when transformation output's value size less then " +
-        "schema size in default-serde mode(script transformation with row format of " +
-        "`ROW FORMAT DELIMITED`)." +
-        "When true, Spark will pad NULL value to keep same behavior with hive." +
-        "When false, Spark keep original behavior to throw `ArrayIndexOutOfBoundsException`")
-      .version("3.1.0")
-      .booleanConf
-      .createWithDefault(true)
-
   val LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP =
     buildConf("spark.sql.legacy.allowCastNumericToTimestamp")
       .internal()
@@ -3505,9 +3493,6 @@ class SQLConf extends Serializable with Logging {
   def legacyAllowModifyActiveSession: Boolean =
     getConf(StaticSQLConf.LEGACY_ALLOW_MODIFY_ACTIVE_SESSION)
 
-  def legacyPadNullWhenValueLessThenSchema: Boolean =
-    getConf(SQLConf.LEGACY_SCRIPT_TRANSFORM_PAD_NULL)
-
   def legacyAllowCastNumericToTimestamp: Boolean =
     getConf(SQLConf.LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
index f2cddc7ba7290..74e5aa716ad67 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
@@ -104,16 +104,10 @@ trait BaseScriptTransformationExec extends UnaryExecNode {
       val reader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8))
 
       val outputRowFormat = ioschema.outputRowFormatMap("TOK_TABLEROWFORMATFIELD")
-
-      val padNull = if (conf.legacyPadNullWhenValueLessThenSchema) {
-        (arr: Array[String], size: Int) => arr.padTo(size, null)
-      } else {
-        (arr: Array[String], size: Int) => arr
-      }
       val processRowWithoutSerde = if (!ioschema.schemaLess) {
         prevLine: String =>
           new GenericInternalRow(
-            padNull(prevLine.split(outputRowFormat), outputFieldWriters.size)
+            prevLine.split(outputRowFormat).padTo(outputFieldWriters.size, null)
               .zip(outputFieldWriters)
               .map { case (data, writer) => writer(data) })
       } else {
@@ -124,7 +118,7 @@ trait BaseScriptTransformationExec extends UnaryExecNode {
         val kvWriter = CatalystTypeConverters.createToCatalystConverter(StringType)
         prevLine: String =>
           new GenericInternalRow(
-            padNull(prevLine.split(outputRowFormat).slice(0, 2), 2)
+            prevLine.split(outputRowFormat).slice(0, 2).padTo(2, null)
               .map(kvWriter))
       }
 

From 1740b29b3f006abd08bc01b0ca807c3721d4bb0e Mon Sep 17 00:00:00 2001
From: ulysses <youxiduo@weidian.com>
Date: Wed, 4 Nov 2020 05:01:39 +0000
Subject: [PATCH 0383/1009] [SPARK-33323][SQL] Add query resolved check before
 convert hive relation

### What changes were proposed in this pull request?

Add query.resolved before  convert hive relation.

### Why are the changes needed?

For better error msg.
```
CREATE TABLE t STORED AS PARQUET AS
SELECT * FROM (
 SELECT c3 FROM (
  SELECT c1, c2 from values(1,2) t(c1, c2)
  )
)
```
 Before this PR, we get such error msg
```
org.apache.spark.sql.catalyst.analysis.UnresolvedException: Invalid call to toAttribute on unresolved object, tree: *
  at org.apache.spark.sql.catalyst.analysis.Star.toAttribute(unresolved.scala:244)
  at org.apache.spark.sql.catalyst.plans.logical.Project$$anonfun$output$1.apply(basicLogicalOperators.scala:52)
  at org.apache.spark.sql.catalyst.plans.logical.Project$$anonfun$output$1.apply(basicLogicalOperators.scala:52)
  at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  at scala.collection.immutable.List.foreach(List.scala:392)
```

### Does this PR introduce _any_ user-facing change?

Yes, error msg changed.

### How was this patch tested?

Add test.

Closes #30230 from ulysses-you/SPARK-33323.

Authored-by: ulysses <youxiduo@weidian.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/hive/HiveStrategies.scala       |  5 +++--
 .../spark/sql/hive/HiveParquetSuite.scala     | 19 ++++++++++++++++++-
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index f91f78616abf5..e9f0461e6d1a8 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -221,8 +221,9 @@ case class RelationConversions(
 
       // CTAS
       case CreateTable(tableDesc, mode, Some(query))
-          if DDLUtils.isHiveTable(tableDesc) && tableDesc.partitionColumnNames.isEmpty &&
-            isConvertible(tableDesc) && SQLConf.get.getConf(HiveUtils.CONVERT_METASTORE_CTAS) =>
+          if query.resolved && DDLUtils.isHiveTable(tableDesc) &&
+            tableDesc.partitionColumnNames.isEmpty && isConvertible(tableDesc) &&
+            SQLConf.get.getConf(HiveUtils.CONVERT_METASTORE_CTAS) =>
         // validation is required to be done here before relation conversion.
         DDLUtils.checkDataColNames(tableDesc.copy(schema = query.schema))
         // This is for CREATE TABLE .. STORED AS PARQUET/ORC AS SELECT null
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala
index 470c6a342b4dd..df96b0675cc2d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.hive
 
-import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.execution.datasources.parquet.ParquetTest
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
@@ -106,4 +106,21 @@ class HiveParquetSuite extends QueryTest with ParquetTest with TestHiveSingleton
       }
     }
   }
+
+  test("SPARK-33323: Add query resolved check before convert hive relation") {
+    withTable("t") {
+      val msg = intercept[AnalysisException] {
+        sql(
+          s"""
+             |CREATE TABLE t STORED AS PARQUET AS
+             |SELECT * FROM (
+             | SELECT c3 FROM (
+             |  SELECT c1, c2 from values(1,2) t(c1, c2)
+             |  )
+             |)
+          """.stripMargin)
+      }.getMessage
+      assert(msg.contains("cannot resolve '`c3`' given input columns"))
+    }
+  }
 }

From 0ad35ba5f8bd6413669b568de659334bb9a3fb44 Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Wed, 4 Nov 2020 06:50:37 +0000
Subject: [PATCH 0384/1009] [SPARK-33321][SQL] Migrate ANALYZE TABLE commands
 to use UnresolvedTableOrView to resolve the identifier

### What changes were proposed in this pull request?

This PR proposes to migrate `ANALYZE TABLE` and `ANALYZE TABLE ... FOR COLUMNS` to use `UnresolvedTableOrView` to resolve the table/view identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing).

Note that `ANALYZE TABLE` is not supported for v2 tables.

### Why are the changes needed?

The changes allow consistent resolution behavior when resolving the table/view identifier. For example, the following is the current behavior:
```scala
sql("create temporary view t as select 1")
sql("create database db")
sql("create table db.t using csv as select 1")
sql("use db")
sql("ANALYZE TABLE t compute statistics") // Succeeds
```
With this change, ANALYZE TABLE above fails with the following:
```
    org.apache.spark.sql.AnalysisException: t is a temp view not table or permanent view.; line 1 pos 0
	at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)
	at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveTempViews$$anonfun$apply$7.$anonfun$applyOrElse$40(Analyzer.scala:872)
	at scala.Option.map(Option.scala:230)
	at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveTempViews$$anonfun$apply$7.applyOrElse(Analyzer.scala:870)
	at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveTempViews$$anonfun$apply$7.applyOrElse(Analyzer.scala:856)
```
, which is expected since temporary view is resolved first and ANALYZE TABLE doesn't support a temporary view.

### Does this PR introduce _any_ user-facing change?

After this PR, `ANALYZE TABLE t` is resolved to a temp view `t` instead of table `db.t`.

### How was this patch tested?

Updated existing tests.

Closes #30229 from imback82/parse_v1table.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 13 +++--
 .../catalyst/analysis/v2ResolutionPlans.scala |  4 +-
 .../sql/catalyst/parser/AstBuilder.scala      | 15 +++--
 .../catalyst/plans/logical/statements.scala   | 19 ------
 .../catalyst/plans/logical/v2Commands.scala   | 22 +++++++
 .../sql/catalyst/parser/DDLParserSuite.scala  | 58 +++++++++++++------
 .../analysis/ResolveSessionCatalog.scala      | 36 ++++++------
 .../datasources/v2/DataSourceV2Strategy.scala |  5 +-
 .../sql-tests/results/describe.sql.out        |  2 +-
 .../spark/sql/StatisticsCollectionSuite.scala |  4 +-
 .../sql/connector/DataSourceV2SQLSuite.scala  | 11 +++-
 .../spark/sql/execution/SQLViewSuite.scala    |  5 +-
 12 files changed, 120 insertions(+), 74 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 10fe5314b0ef9..69cf30c34d494 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -865,9 +865,14 @@ class Analyzer(
           u.failAnalysis(s"${ident.quoted} is a temp view not table.")
         }
         u
-      case u @ UnresolvedTableOrView(ident) =>
+      case u @ UnresolvedTableOrView(ident, allowTempView) =>
         lookupTempView(ident)
-          .map(_ => ResolvedView(ident.asIdentifier, isTemp = true))
+          .map { _ =>
+            if (!allowTempView) {
+              u.failAnalysis(s"${ident.quoted} is a temp view not table or permanent view.")
+            }
+            ResolvedView(ident.asIdentifier, isTemp = true)
+          }
           .getOrElse(u)
     }
 
@@ -926,7 +931,7 @@ class Analyzer(
           .map(ResolvedTable(catalog.asTableCatalog, ident, _))
           .getOrElse(u)
 
-      case u @ UnresolvedTableOrView(NonSessionCatalogAndIdentifier(catalog, ident)) =>
+      case u @ UnresolvedTableOrView(NonSessionCatalogAndIdentifier(catalog, ident), _) =>
         CatalogV2Util.loadTable(catalog, ident)
           .map(ResolvedTable(catalog.asTableCatalog, ident, _))
           .getOrElse(u)
@@ -1026,7 +1031,7 @@ class Analyzer(
           case table => table
         }.getOrElse(u)
 
-      case u @ UnresolvedTableOrView(identifier) =>
+      case u @ UnresolvedTableOrView(identifier, _) =>
         lookupTableOrView(identifier).getOrElse(u)
     }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
index 1344d78838e1c..fcf4a438eb19c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
@@ -46,7 +46,9 @@ case class UnresolvedTable(multipartIdentifier: Seq[String]) extends LeafNode {
  * Holds the name of a table or view that has yet to be looked up in a catalog. It will
  * be resolved to [[ResolvedTable]] or [[ResolvedView]] during analysis.
  */
-case class UnresolvedTableOrView(multipartIdentifier: Seq[String]) extends LeafNode {
+case class UnresolvedTableOrView(
+    multipartIdentifier: Seq[String],
+    allowTempView: Boolean = true) extends LeafNode {
   override lazy val resolved: Boolean = false
   override def output: Seq[Attribute] = Nil
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index f28375c8d7a4a..c5e8429d49427 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3216,7 +3216,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   }
 
   /**
-   * Create an [[AnalyzeTableStatement]], or an [[AnalyzeColumnStatement]].
+   * Create an [[AnalyzeTable]], or an [[AnalyzeColumn]].
    * Example SQL for analyzing a table or a set of partitions :
    * {{{
    *   ANALYZE TABLE multi_part_name [PARTITION (partcol1[=val1], partcol2[=val2], ...)]
@@ -3249,18 +3249,23 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     val tableName = visitMultipartIdentifier(ctx.multipartIdentifier())
     if (ctx.ALL() != null) {
       checkPartitionSpec()
-      AnalyzeColumnStatement(tableName, None, allColumns = true)
+      AnalyzeColumn(UnresolvedTableOrView(tableName), None, allColumns = true)
     } else if (ctx.identifierSeq() == null) {
       val partitionSpec = if (ctx.partitionSpec != null) {
         visitPartitionSpec(ctx.partitionSpec)
       } else {
         Map.empty[String, Option[String]]
       }
-      AnalyzeTableStatement(tableName, partitionSpec, noScan = ctx.identifier != null)
+      AnalyzeTable(
+        UnresolvedTableOrView(tableName, allowTempView = false),
+        partitionSpec,
+        noScan = ctx.identifier != null)
     } else {
       checkPartitionSpec()
-      AnalyzeColumnStatement(
-        tableName, Option(visitIdentifierSeq(ctx.identifierSeq())), allColumns = false)
+      AnalyzeColumn(
+        UnresolvedTableOrView(tableName),
+        Option(visitIdentifierSeq(ctx.identifierSeq())),
+        allColumns = false)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
index 3a534b2eb8ceb..e711a6ad434d4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
@@ -342,25 +342,6 @@ case class CreateNamespaceStatement(
  */
 case class UseStatement(isNamespaceSet: Boolean, nameParts: Seq[String]) extends ParsedStatement
 
-/**
- * An ANALYZE TABLE statement, as parsed from SQL.
- */
-case class AnalyzeTableStatement(
-    tableName: Seq[String],
-    partitionSpec: Map[String, Option[String]],
-    noScan: Boolean) extends ParsedStatement
-
-/**
- * An ANALYZE TABLE FOR COLUMNS statement, as parsed from SQL.
- */
-case class AnalyzeColumnStatement(
-    tableName: Seq[String],
-    columnNames: Option[Seq[String]],
-    allColumns: Boolean) extends ParsedStatement {
-  require(columnNames.isDefined ^ allColumns, "Parameter `columnNames` or `allColumns` are " +
-    "mutually exclusive. Only one of them should be specified.")
-}
-
 /**
  * A REPAIR TABLE statement, as parsed from SQL
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index 96cb096ff97c9..a1e26ae1ba2c8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -567,3 +567,25 @@ case class ShowFunctions(
     pattern: Option[String]) extends Command {
   override def children: Seq[LogicalPlan] = child.toSeq
 }
+
+/**
+ * The logical plan of the ANALYZE TABLE command that works for v2 catalogs.
+ */
+case class AnalyzeTable(
+    child: LogicalPlan,
+    partitionSpec: Map[String, Option[String]],
+    noScan: Boolean) extends Command {
+  override def children: Seq[LogicalPlan] = child :: Nil
+}
+
+/**
+ * The logical plan of the ANALYZE TABLE FOR COLUMNS command that works for v2 catalogs.
+ */
+case class AnalyzeColumn(
+    child: LogicalPlan,
+    columnNames: Option[Seq[String]],
+    allColumns: Boolean) extends Command {
+  require(columnNames.isDefined ^ allColumns, "Parameter `columnNames` or `allColumns` are " +
+    "mutually exclusive. Only one of them should be specified.")
+  override def children: Seq[LogicalPlan] = child :: Nil
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index a81f9e16083d6..aca7602bdbcb0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -1502,42 +1502,59 @@ class DDLParserSuite extends AnalysisTest {
 
   test("analyze table statistics") {
     comparePlans(parsePlan("analyze table a.b.c compute statistics"),
-      AnalyzeTableStatement(Seq("a", "b", "c"), Map.empty, noScan = false))
+      AnalyzeTable(
+        UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false),
+        Map.empty, noScan = false))
     comparePlans(parsePlan("analyze table a.b.c compute statistics noscan"),
-      AnalyzeTableStatement(Seq("a", "b", "c"), Map.empty, noScan = true))
+      AnalyzeTable(
+        UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false),
+        Map.empty, noScan = true))
     comparePlans(parsePlan("analyze table a.b.c partition (a) compute statistics nOscAn"),
-      AnalyzeTableStatement(Seq("a", "b", "c"), Map("a" -> None), noScan = true))
+      AnalyzeTable(
+        UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false),
+        Map("a" -> None), noScan = true))
 
     // Partitions specified
     comparePlans(
       parsePlan("ANALYZE TABLE a.b.c PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS"),
-      AnalyzeTableStatement(
-        Seq("a", "b", "c"), Map("ds" -> Some("2008-04-09"), "hr" -> Some("11")), noScan = false))
+      AnalyzeTable(
+        UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false),
+        Map("ds" -> Some("2008-04-09"), "hr" -> Some("11")), noScan = false))
     comparePlans(
       parsePlan("ANALYZE TABLE a.b.c PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS noscan"),
-      AnalyzeTableStatement(
-        Seq("a", "b", "c"), Map("ds" -> Some("2008-04-09"), "hr" -> Some("11")), noScan = true))
+      AnalyzeTable(
+        UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false),
+        Map("ds" -> Some("2008-04-09"), "hr" -> Some("11")), noScan = true))
     comparePlans(
       parsePlan("ANALYZE TABLE a.b.c PARTITION(ds='2008-04-09') COMPUTE STATISTICS noscan"),
-      AnalyzeTableStatement(Seq("a", "b", "c"), Map("ds" -> Some("2008-04-09")), noScan = true))
+      AnalyzeTable(
+        UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false),
+        Map("ds" -> Some("2008-04-09")), noScan = true))
     comparePlans(
       parsePlan("ANALYZE TABLE a.b.c PARTITION(ds='2008-04-09', hr) COMPUTE STATISTICS"),
-      AnalyzeTableStatement(
-        Seq("a", "b", "c"), Map("ds" -> Some("2008-04-09"), "hr" -> None), noScan = false))
+      AnalyzeTable(
+        UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false),
+        Map("ds" -> Some("2008-04-09"), "hr" -> None), noScan = false))
     comparePlans(
       parsePlan("ANALYZE TABLE a.b.c PARTITION(ds='2008-04-09', hr) COMPUTE STATISTICS noscan"),
-      AnalyzeTableStatement(
-        Seq("a", "b", "c"), Map("ds" -> Some("2008-04-09"), "hr" -> None), noScan = true))
+      AnalyzeTable(
+        UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false),
+        Map("ds" -> Some("2008-04-09"), "hr" -> None), noScan = true))
     comparePlans(
       parsePlan("ANALYZE TABLE a.b.c PARTITION(ds, hr=11) COMPUTE STATISTICS noscan"),
-      AnalyzeTableStatement(
-        Seq("a", "b", "c"), Map("ds" -> None, "hr" -> Some("11")), noScan = true))
+      AnalyzeTable(
+        UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false),
+        Map("ds" -> None, "hr" -> Some("11")), noScan = true))
     comparePlans(
       parsePlan("ANALYZE TABLE a.b.c PARTITION(ds, hr) COMPUTE STATISTICS"),
-      AnalyzeTableStatement(Seq("a", "b", "c"), Map("ds" -> None, "hr" -> None), noScan = false))
+      AnalyzeTable(
+        UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false),
+        Map("ds" -> None, "hr" -> None), noScan = false))
     comparePlans(
       parsePlan("ANALYZE TABLE a.b.c PARTITION(ds, hr) COMPUTE STATISTICS noscan"),
-      AnalyzeTableStatement(Seq("a", "b", "c"), Map("ds" -> None, "hr" -> None), noScan = true))
+      AnalyzeTable(
+        UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false),
+        Map("ds" -> None, "hr" -> None), noScan = true))
 
     intercept("analyze table a.b.c compute statistics xxxx",
       "Expected `NOSCAN` instead of `xxxx`")
@@ -1550,7 +1567,8 @@ class DDLParserSuite extends AnalysisTest {
 
     comparePlans(
       parsePlan("ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR COLUMNS key, value"),
-      AnalyzeColumnStatement(Seq("a", "b", "c"), Option(Seq("key", "value")), allColumns = false))
+      AnalyzeColumn(
+        UnresolvedTableOrView(Seq("a", "b", "c")), Option(Seq("key", "value")), allColumns = false))
 
     // Partition specified - should be ignored
     comparePlans(
@@ -1559,7 +1577,8 @@ class DDLParserSuite extends AnalysisTest {
            |ANALYZE TABLE a.b.c PARTITION(ds='2017-06-10')
            |COMPUTE STATISTICS FOR COLUMNS key, value
          """.stripMargin),
-      AnalyzeColumnStatement(Seq("a", "b", "c"), Option(Seq("key", "value")), allColumns = false))
+      AnalyzeColumn(
+        UnresolvedTableOrView(Seq("a", "b", "c")), Option(Seq("key", "value")), allColumns = false))
 
     // Partition specified should be ignored in case of COMPUTE STATISTICS FOR ALL COLUMNS
     comparePlans(
@@ -1568,7 +1587,8 @@ class DDLParserSuite extends AnalysisTest {
            |ANALYZE TABLE a.b.c PARTITION(ds='2017-06-10')
            |COMPUTE STATISTICS FOR ALL COLUMNS
          """.stripMargin),
-      AnalyzeColumnStatement(Seq("a", "b", "c"), None, allColumns = true))
+      AnalyzeColumn(
+        UnresolvedTableOrView(Seq("a", "b", "c")), None, allColumns = true))
 
     intercept("ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR ALL COLUMNS key, value",
       "mismatched input 'key' expecting {<EOF>, ';'}")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index f35eb41fe2ce1..610632ac9256e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -22,12 +22,11 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, LookupCatalog, SupportsNamespaces, TableCatalog, TableChange, V1Table}
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, Identifier, LookupCatalog, SupportsNamespaces, TableCatalog, TableChange, V1Table}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource}
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{HIVE_TYPE_STRING, HiveStringType, MetadataBuilder, StructField, StructType}
 
 /**
@@ -255,19 +254,11 @@ class ResolveSessionCatalog(
     case RenameTableStatement(TempViewOrV1Table(oldName), newName, isView) =>
       AlterTableRenameCommand(oldName.asTableIdentifier, newName.asTableIdentifier, isView)
 
-    case DescribeRelation(r @ ResolvedTable(_, ident, _: V1Table), partitionSpec, isExtended)
-        if isSessionCatalog(r.catalog) =>
-      DescribeTableCommand(ident.asTableIdentifier, partitionSpec, isExtended)
-
     // Use v1 command to describe (temp) view, as v2 catalog doesn't support view yet.
-    case DescribeRelation(ResolvedView(ident, _), partitionSpec, isExtended) =>
+    case DescribeRelation(ResolvedV1TableOrViewIdentifier(ident), partitionSpec, isExtended) =>
       DescribeTableCommand(ident.asTableIdentifier, partitionSpec, isExtended)
 
-    case DescribeColumn(r @ ResolvedTable(_, _, _: V1Table), colNameParts, isExtended)
-        if isSessionCatalog(r.catalog) =>
-      DescribeColumnCommand(r.identifier.asTableIdentifier, colNameParts, isExtended)
-
-    case DescribeColumn(ResolvedView(ident, _), colNameParts, isExtended) =>
+    case DescribeColumn(ResolvedV1TableOrViewIdentifier(ident), colNameParts, isExtended) =>
       DescribeColumnCommand(ident.asTableIdentifier, colNameParts, isExtended)
 
     // For CREATE TABLE [AS SELECT], we should use the v1 command if the catalog is resolved to the
@@ -419,17 +410,16 @@ class ResolveSessionCatalog(
       }
       ShowTablesCommand(db, Some(pattern), true, partitionsSpec)
 
-    case AnalyzeTableStatement(tbl, partitionSpec, noScan) =>
-      val v1TableName = parseV1Table(tbl, "ANALYZE TABLE")
+    // ANALYZE TABLE works on permanent views if the views are cached.
+    case AnalyzeTable(ResolvedV1TableOrViewIdentifier(ident), partitionSpec, noScan) =>
       if (partitionSpec.isEmpty) {
-        AnalyzeTableCommand(v1TableName.asTableIdentifier, noScan)
+        AnalyzeTableCommand(ident.asTableIdentifier, noScan)
       } else {
-        AnalyzePartitionCommand(v1TableName.asTableIdentifier, partitionSpec, noScan)
+        AnalyzePartitionCommand(ident.asTableIdentifier, partitionSpec, noScan)
       }
 
-    case AnalyzeColumnStatement(tbl, columnNames, allColumns) =>
-      val v1TableName = parseTempViewOrV1Table(tbl, "ANALYZE TABLE")
-      AnalyzeColumnCommand(v1TableName.asTableIdentifier, columnNames, allColumns)
+    case AnalyzeColumn(ResolvedV1TableOrViewIdentifier(ident), columnNames, allColumns) =>
+      AnalyzeColumnCommand(ident.asTableIdentifier, columnNames, allColumns)
 
     case RepairTableStatement(tbl) =>
       val v1TableName = parseV1Table(tbl, "MSCK REPAIR TABLE")
@@ -706,6 +696,14 @@ class ResolveSessionCatalog(
       }
   }
 
+  object ResolvedV1TableOrViewIdentifier {
+    def unapply(resolved: LogicalPlan): Option[Identifier] = resolved match {
+      case ResolvedTable(catalog, ident, _: V1Table) if isSessionCatalog(catalog) => Some(ident)
+      case ResolvedView(ident, _) => Some(ident)
+      case _ => None
+    }
+  }
+
   private def assertTopLevelColumn(colName: Seq[String], command: String): Unit = {
     if (colName.length > 1) {
       throw new AnalysisException(s"$command does not support nested column: ${colName.quoted}")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 81a36dee58389..4bb58142b3d19 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -231,7 +231,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
     case DropTable(r: ResolvedTable, ifExists, _) =>
       DropTableExec(r.catalog, r.identifier, ifExists) :: Nil
 
-    case NoopDropTable(multipartIdentifier) =>
+    case _: NoopDropTable =>
       LocalTableScanExec(Nil, Nil) :: Nil
 
     case AlterTable(catalog, ident, _, changes) =>
@@ -280,6 +280,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
     case r @ ShowTableProperties(rt: ResolvedTable, propertyKey) =>
       ShowTablePropertiesExec(r.output, rt.table, propertyKey) :: Nil
 
+    case AnalyzeTable(_: ResolvedTable, _, _) | AnalyzeColumn(_: ResolvedTable, _, _) =>
+      throw new AnalysisException("ANALYZE TABLE is not supported for v2 tables.")
+
     case _ => Nil
   }
 }
diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
index a7de033e3a1ac..07aed98d120f9 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
@@ -540,7 +540,7 @@ struct<plan:string>
 -- !query output
 == Parsed Logical Plan ==
 'DescribeRelation false
-+- 'UnresolvedTableOrView [t]
++- 'UnresolvedTableOrView [t], true
 
 == Analyzed Logical Plan ==
 col_name: string, data_type: string, comment: string
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index 18356a4de9ef4..b016cc3f57e0d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -540,10 +540,10 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
   test("analyzes column statistics in cached global temporary view") {
     withGlobalTempView("gTempView") {
       val globalTempDB = spark.sharedState.globalTempViewManager.database
-      val errMsg1 = intercept[NoSuchTableException] {
+      val errMsg1 = intercept[AnalysisException] {
         sql(s"ANALYZE TABLE $globalTempDB.gTempView COMPUTE STATISTICS FOR COLUMNS id")
       }.getMessage
-      assert(errMsg1.contains(s"Table or view 'gTempView' not found in database '$globalTempDB'"))
+      assert(errMsg1.contains(s"Table or view not found: $globalTempDB.gTempView"))
       // Analyzes in a global temporary view
       sql("CREATE GLOBAL TEMP VIEW gTempView AS SELECT * FROM range(1, 30)")
       val errMsg2 = intercept[AnalysisException] {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 298c07059ff44..893ee5f130cda 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -2010,8 +2010,8 @@ class DataSourceV2SQLSuite
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
       spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
-      testV1Command("ANALYZE TABLE", s"$t COMPUTE STATISTICS")
-      testV1CommandSupportingTempView("ANALYZE TABLE", s"$t COMPUTE STATISTICS FOR ALL COLUMNS")
+      testNotSupportedV2Command("ANALYZE TABLE", s"$t COMPUTE STATISTICS")
+      testNotSupportedV2Command("ANALYZE TABLE", s"$t COMPUTE STATISTICS FOR ALL COLUMNS")
     }
   }
 
@@ -2606,6 +2606,13 @@ class DataSourceV2SQLSuite
     }
   }
 
+  private def testNotSupportedV2Command(sqlCommand: String, sqlParams: String): Unit = {
+    val e = intercept[AnalysisException] {
+      sql(s"$sqlCommand $sqlParams")
+    }
+    assert(e.message.contains(s"$sqlCommand is not supported for v2 tables"))
+  }
+
   private def testV1Command(sqlCommand: String, sqlParams: String): Unit = {
     val e = intercept[AnalysisException] {
       sql(s"$sqlCommand $sqlParams")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index f3cae24527d60..7a6b0b8d6dd9f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -175,7 +175,10 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
       }.getMessage
       assert(e2.contains("SHOW CREATE TABLE is not supported on a temporary view"))
       assertNoSuchTable(s"SHOW PARTITIONS $viewName")
-      assertNoSuchTable(s"ANALYZE TABLE $viewName COMPUTE STATISTICS")
+      val e3 = intercept[AnalysisException] {
+        sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS")
+      }.getMessage
+      assert(e3.contains(s"$viewName is a temp view not table or permanent view"))
       assertNoSuchTable(s"ANALYZE TABLE $viewName COMPUTE STATISTICS FOR COLUMNS id")
     }
   }

From ff724d23b696b2c4232be5daf31eed569779d720 Mon Sep 17 00:00:00 2001
From: Erik Krogen <xkrogen@apache.org>
Date: Wed, 4 Nov 2020 06:51:54 +0000
Subject: [PATCH 0385/1009] [SPARK-33214][TEST][HIVE] Stop
 HiveExternalCatalogVersionsSuite from using a hard-coded location to store
 localized Spark binaries

### What changes were proposed in this pull request?
This PR changes `HiveExternalCatalogVersionsSuite` to, by default, use a standard temporary directory to store the Spark binaries that it localizes. It additionally adds a new System property, `spark.test.cache-dir`, which can be used to define a static location into which the Spark binary will be localized to allow for sharing between test executions. If the System property is used, the downloaded binaries won't be deleted after the test runs.

### Why are the changes needed?
In SPARK-22356 (PR #19579), the `sparkTestingDir` used by `HiveExternalCatalogVersionsSuite` became hard-coded to enable re-use of the downloaded Spark tarball between test executions:
```
  // For local test, you can set `sparkTestingDir` to a static value like `/tmp/test-spark`, to
  // avoid downloading Spark of different versions in each run.
  private val sparkTestingDir = new File("/tmp/test-spark")
```
However this doesn't work, since it gets deleted every time:
```
  override def afterAll(): Unit = {
    try {
      Utils.deleteRecursively(wareHousePath)
      Utils.deleteRecursively(tmpDataDir)
      Utils.deleteRecursively(sparkTestingDir)
    } finally {
      super.afterAll()
    }
  }
```

It's bad that we're hard-coding to a `/tmp` directory, as in some cases this is not the proper place to store temporary files. We're not currently making any good use of it.

### Does this PR introduce _any_ user-facing change?
Developer-facing changes only, as this is in a test.

### How was this patch tested?
The test continues to execute as expected.

Closes #30122 from xkrogen/xkrogen-SPARK-33214-hiveexternalversioncatalogsuite-fix.

Authored-by: Erik Krogen <xkrogen@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../HiveExternalCatalogVersionsSuite.scala    | 24 ++++++++++++++-----
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index b81b7e8ec0c0f..38a8c492d77a7 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -42,26 +42,33 @@ import org.apache.spark.util.Utils
  * Test HiveExternalCatalog backward compatibility.
  *
  * Note that, this test suite will automatically download spark binary packages of different
- * versions to a local directory `/tmp/spark-test`. If there is already a spark folder with
- * expected version under this local directory, e.g. `/tmp/spark-test/spark-2.0.3`, we will skip the
- * downloading for this spark version.
+ * versions to a local directory. If the `spark.test.cache-dir` system property is defined, this
+ * directory will be used. If there is already a spark folder with expected version under this
+ * local directory, e.g. `/{cache-dir}/spark-2.0.3`, downloading for this spark version will be
+ * skipped. If the system property is not present, a temporary directory will be used and cleaned
+ * up after the test.
  */
 @SlowHiveTest
 @ExtendedHiveTest
 class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
+  import HiveExternalCatalogVersionsSuite._
   private val isTestAtLeastJava9 = SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)
   private val wareHousePath = Utils.createTempDir(namePrefix = "warehouse")
   private val tmpDataDir = Utils.createTempDir(namePrefix = "test-data")
-  // For local test, you can set `sparkTestingDir` to a static value like `/tmp/test-spark`, to
+  // For local test, you can set `spark.test.cache-dir` to a static value like `/tmp/test-spark`, to
   // avoid downloading Spark of different versions in each run.
-  private val sparkTestingDir = new File("/tmp/test-spark")
+  private val sparkTestingDir = Option(System.getProperty(SPARK_TEST_CACHE_DIR_SYSTEM_PROPERTY))
+      .map(new File(_)).getOrElse(Utils.createTempDir(namePrefix = "test-spark"))
   private val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
 
   override def afterAll(): Unit = {
     try {
       Utils.deleteRecursively(wareHousePath)
       Utils.deleteRecursively(tmpDataDir)
-      Utils.deleteRecursively(sparkTestingDir)
+      // Only delete sparkTestingDir if it wasn't defined to a static location by the system prop
+      if (Option(System.getProperty(SPARK_TEST_CACHE_DIR_SYSTEM_PROPERTY)).isEmpty) {
+        Utils.deleteRecursively(sparkTestingDir)
+      }
     } finally {
       super.afterAll()
     }
@@ -307,3 +314,8 @@ object PROCESS_TABLES extends QueryTest with SQLTestUtils {
     }
   }
 }
+
+object HiveExternalCatalogVersionsSuite {
+  private val SPARK_TEST_CACHE_DIR_SYSTEM_PROPERTY = "spark.test.cache-dir"
+}
+

From 0b557b329046c66ee67a8c94c5bb95ffbe50e135 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Wed, 4 Nov 2020 17:39:06 +0900
Subject: [PATCH 0386/1009] [SPARK-33265][TEST] Rename classOf[Seq] to
 classOf[scala.collection.Seq] in PostgresIntegrationSuite for Scala 2.13

### What changes were proposed in this pull request?

This PR renames some part of `Seq` in `PostgresIntegrationSuite` to `scala.collection.Seq`.
When I run `docker-integration-test`, I noticed that `PostgresIntegrationSuite` failed due to `ClassCastException`.
The reason is the same as what is resolved in SPARK-29292.

### Why are the changes needed?

To pass `docker-integration-test` for Scala 2.13.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Ran `PostgresIntegrationSuite` fixed and confirmed it successfully finished.

Closes #30166 from sarutak/fix-toseq-postgresql.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../spark/sql/jdbc/PostgresIntegrationSuite.scala      | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
index de9c0660c51c1..fa13100b5fdc8 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
@@ -116,14 +116,14 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite {
     assert(classOf[java.lang.Boolean].isAssignableFrom(types(7)))
     assert(classOf[String].isAssignableFrom(types(8)))
     assert(classOf[String].isAssignableFrom(types(9)))
-    assert(classOf[Seq[Int]].isAssignableFrom(types(10)))
-    assert(classOf[Seq[String]].isAssignableFrom(types(11)))
-    assert(classOf[Seq[Double]].isAssignableFrom(types(12)))
-    assert(classOf[Seq[BigDecimal]].isAssignableFrom(types(13)))
+    assert(classOf[scala.collection.Seq[Int]].isAssignableFrom(types(10)))
+    assert(classOf[scala.collection.Seq[String]].isAssignableFrom(types(11)))
+    assert(classOf[scala.collection.Seq[Double]].isAssignableFrom(types(12)))
+    assert(classOf[scala.collection.Seq[BigDecimal]].isAssignableFrom(types(13)))
     assert(classOf[String].isAssignableFrom(types(14)))
     assert(classOf[java.lang.Float].isAssignableFrom(types(15)))
     assert(classOf[java.lang.Short].isAssignableFrom(types(16)))
-    assert(classOf[Seq[BigDecimal]].isAssignableFrom(types(17)))
+    assert(classOf[scala.collection.Seq[BigDecimal]].isAssignableFrom(types(17)))
     assert(rows(0).getString(0).equals("hello"))
     assert(rows(0).getInt(1) == 42)
     assert(rows(0).getDouble(2) == 1.25)

From 42c0b175ce6ee4bf1104b6a8cef6bb6477693781 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Wed, 4 Nov 2020 08:35:10 -0800
Subject: [PATCH 0387/1009] [SPARK-33338][SQL] GROUP BY using literal map
 should not fail

### What changes were proposed in this pull request?

This PR aims to fix `semanticEquals` works correctly on `GetMapValue` expressions having literal maps with `ArrayBasedMapData` and `GenericArrayData`.

### Why are the changes needed?

This is a regression from Apache Spark 1.6.x.
```scala
scala> sc.version
res1: String = 1.6.3

scala> sqlContext.sql("SELECT map('k1', 'v1')[k] FROM t GROUP BY map('k1', 'v1')[k]").show
+---+
|_c0|
+---+
| v1|
+---+
```

Apache Spark 2.x ~ 3.0.1 raise`RuntimeException` for the following queries.
```sql
CREATE TABLE t USING ORC AS SELECT map('k1', 'v1') m, 'k1' k
SELECT map('k1', 'v1')[k] FROM t GROUP BY 1
SELECT map('k1', 'v1')[k] FROM t GROUP BY map('k1', 'v1')[k]
SELECT map('k1', 'v1')[k] a FROM t GROUP BY a
```

**BEFORE**
```scala
Caused by: java.lang.RuntimeException: Couldn't find k#3 in [keys: [k1], values: [v1][k#3]#6]
	at scala.sys.package$.error(package.scala:27)
	at org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1$$anonfun$applyOrElse$1.apply(BoundAttribute.scala:85)
	at org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1$$anonfun$applyOrElse$1.apply(BoundAttribute.scala:79)
	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
```

**AFTER**
```sql
spark-sql> SELECT map('k1', 'v1')[k] FROM t GROUP BY 1;
v1
Time taken: 1.278 seconds, Fetched 1 row(s)
spark-sql> SELECT map('k1', 'v1')[k] FROM t GROUP BY map('k1', 'v1')[k];
v1
Time taken: 0.313 seconds, Fetched 1 row(s)
spark-sql> SELECT map('k1', 'v1')[k] a FROM t GROUP BY a;
v1
Time taken: 0.265 seconds, Fetched 1 row(s)
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs with the newly added test case.

Closes #30246 from dongjoon-hyun/SPARK-33338.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/catalyst/expressions/literals.scala |  2 ++
 .../catalyst/expressions/ComplexTypeSuite.scala   | 15 +++++++++++++++
 .../org/apache/spark/sql/SQLQuerySuite.scala      | 12 ++++++++++++
 3 files changed, 29 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index 9e96ab8a9b6ca..413d0af61a05c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -316,6 +316,8 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression {
       (value, o.value) match {
         case (null, null) => true
         case (a: Array[Byte], b: Array[Byte]) => util.Arrays.equals(a, b)
+        case (a: ArrayBasedMapData, b: ArrayBasedMapData) =>
+          a.keyArray == b.keyArray && a.valueArray == b.valueArray
         case (a, b) => a != null && a.equals(b)
       }
     case _ => false
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
index cdb83d3580f0a..38e32ff2518f7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
@@ -22,6 +22,7 @@ import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, UnresolvedExtractValue}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
+import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
@@ -471,4 +472,18 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
     CreateNamedStruct(Seq("a", "x", "b", 2.0)).genCode(ctx)
     assert(ctx.inlinedMutableStates.isEmpty)
   }
+
+  test("SPARK-33338: semanticEquals should handle static GetMapValue correctly") {
+    val keys = new Array[UTF8String](1)
+    val values = new Array[UTF8String](1)
+    keys(0) = UTF8String.fromString("key")
+    values(0) = UTF8String.fromString("value")
+
+    val d1 = new ArrayBasedMapData(new GenericArrayData(keys), new GenericArrayData(values))
+    val d2 = new ArrayBasedMapData(new GenericArrayData(keys), new GenericArrayData(values))
+    val m1 = GetMapValue(Literal.create(d1, MapType(StringType, StringType)), Literal("a"))
+    val m2 = GetMapValue(Literal.create(d2, MapType(StringType, StringType)), Literal("a"))
+
+    assert(m1.semanticEquals(m2))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 0dd2a286772a5..cebbf9282f710 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -3706,6 +3706,18 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       }
     }
   }
+
+  test("SPARK-33338: GROUP BY using literal map should not fail") {
+    withTempDir { dir =>
+      sql(s"CREATE TABLE t USING ORC LOCATION '${dir.toURI}' AS SELECT map('k1', 'v1') m, 'k1' k")
+      Seq(
+        "SELECT map('k1', 'v1')[k] FROM t GROUP BY 1",
+        "SELECT map('k1', 'v1')[k] FROM t GROUP BY map('k1', 'v1')[k]",
+        "SELECT map('k1', 'v1')[k] a FROM t GROUP BY a").foreach { statement =>
+        checkAnswer(sql(statement), Row("v1"))
+      }
+    }
+  }
 }
 
 case class Foo(bar: Option[String])

From b7fff0397319efd2987d4cceff4f738f1c06409d Mon Sep 17 00:00:00 2001
From: Luca Canali <luca.canali@cern.ch>
Date: Wed, 4 Nov 2020 16:48:55 -0600
Subject: [PATCH 0388/1009] [SPARK-31711][CORE] Register the executor source
 with the metrics system when running in local mode

### What changes were proposed in this pull request?
This PR proposes to register the executor source with the Spark metrics system when running in local mode.

### Why are the changes needed?
The Apache Spark metrics system provides many useful insights on the Spark workload.
In particular, the [executor source metrics](https://github.com/apache/spark/blob/master/docs/monitoring.md#component-instance--executor) provide detailed info, including the number of active tasks, I/O metrics, and several task metrics details. The executor source metrics, contrary to other sources (for example ExecutorMetrics source), is not available when running in local mode.
Having executor metrics in local mode can be useful when testing and troubleshooting Spark workloads in a development environment. The metrics can be fed to a dashboard to see the evolution of resource usage and can be used to troubleshoot performance,
as [in this example](https://github.com/cerndb/spark-dashboard).
Currently users will have to deploy on a cluster to be able to collect executor source metrics, while the possibility of having them in local mode is handy for testing.

### Does this PR introduce _any_ user-facing change?
- This PR exposes executor source metrics data when running in local mode.

### How was this patch tested?
- Manually tested by running in local mode and inspecting the metrics listed in http://localhost:4040/metrics/json/
- Also added a test in `SourceConfigSuite`

Closes #28528 from LucaCanali/metricsWithLocalMode.

Authored-by: Luca Canali <luca.canali@cern.ch>
Signed-off-by: Thomas Graves <tgraves@apache.org>
---
 .../main/scala/org/apache/spark/SparkContext.scala   |  5 ++++-
 .../scala/org/apache/spark/executor/Executor.scala   |  8 ++++++++
 .../spark/metrics/source/SourceConfigSuite.scala     | 12 ++++++++++++
 docs/monitoring.md                                   |  8 ++++++--
 4 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index b35768222437c..d68015454de9d 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -42,7 +42,7 @@ import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat => NewFileInputFor
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.deploy.{LocalSparkCluster, SparkHadoopUtil}
-import org.apache.spark.executor.{ExecutorMetrics, ExecutorMetricsSource}
+import org.apache.spark.executor.{Executor, ExecutorMetrics, ExecutorMetricsSource}
 import org.apache.spark.input.{FixedLengthBinaryInputFormat, PortableDataStream, StreamInputFormat, WholeTextFileInputFormat}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
@@ -625,6 +625,9 @@ class SparkContext(config: SparkConf) extends Logging {
 
     // Post init
     _taskScheduler.postStartHook()
+    if (isLocal) {
+      _env.metricsSystem.registerSource(Executor.executorSourceLocalModeOnly)
+    }
     _env.metricsSystem.registerSource(_dagScheduler.metricsSource)
     _env.metricsSystem.registerSource(new BlockManagerSource(_env.blockManager))
     _env.metricsSystem.registerSource(new JVMCPUSource())
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index 6653650615192..1a0ad566633da 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -135,6 +135,11 @@ private[spark] class Executor(
     env.metricsSystem.registerSource(new JVMCPUSource())
     executorMetricsSource.foreach(_.register(env.metricsSystem))
     env.metricsSystem.registerSource(env.blockManager.shuffleMetricsSource)
+  } else {
+    // This enable the registration of the executor source in local mode.
+    // The actual registration happens in SparkContext,
+    // it cannot be done here as the appId is not available yet
+    Executor.executorSourceLocalModeOnly = executorSource
   }
 
   // Whether to load classes in user jars before those in Spark jars
@@ -987,4 +992,7 @@ private[spark] object Executor {
   // task is fully deserialized. When possible, the TaskContext.getLocalProperty call should be
   // used instead.
   val taskDeserializationProps: ThreadLocal[Properties] = new ThreadLocal[Properties]
+
+  // Used to store executorSource, for local mode only
+  var executorSourceLocalModeOnly: ExecutorSource = null
 }
diff --git a/core/src/test/scala/org/apache/spark/metrics/source/SourceConfigSuite.scala b/core/src/test/scala/org/apache/spark/metrics/source/SourceConfigSuite.scala
index 8f5ab7419d4f7..7da1403ecd4b5 100644
--- a/core/src/test/scala/org/apache/spark/metrics/source/SourceConfigSuite.scala
+++ b/core/src/test/scala/org/apache/spark/metrics/source/SourceConfigSuite.scala
@@ -80,4 +80,16 @@ class SourceConfigSuite extends SparkFunSuite with LocalSparkContext {
     }
   }
 
+  test("SPARK-31711: Test executor source registration in local mode") {
+    val conf = new SparkConf()
+    val sc = new SparkContext("local", "test", conf)
+    try {
+      val metricsSystem = sc.env.metricsSystem
+
+      // Executor source should be registered
+      assert (metricsSystem.getSourcesByName("executor").nonEmpty)
+    } finally {
+      sc.stop()
+    }
+  }
 }
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 3513fed7b3d78..a07a113445981 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -1155,6 +1155,11 @@ This is the component with the largest amount of instrumented metrics
 - namespace=JVMCPU
   - jvmCpuTime
 
+- namespace=executor
+  - **note:** These metrics are available in the driver in local mode only.
+  - A full list of available metrics in this 
+    namespace can be found in the corresponding entry for the Executor component instance.
+    
 - namespace=ExecutorMetrics
   - **note:** these metrics are conditional to a configuration parameter:
     `spark.metrics.executorMetricsSource.enabled` (default is true) 
@@ -1167,8 +1172,7 @@ This is the component with the largest amount of instrumented metrics
   custom plugins into Spark.
 
 ### Component instance = Executor
-These metrics are exposed by Spark executors. Note, currently they are not available
-when running in local mode.
+These metrics are exposed by Spark executors. 
  
 - namespace=executor (metrics are of type counter or gauge)
   - bytesRead.count

From d24dbe89557c6cdbe5c7a2b190ccd4e847757428 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Wed, 4 Nov 2020 15:05:35 -0800
Subject: [PATCH 0389/1009] [SPARK-33343][BUILD] Fix the build with sbt to copy
 hadoop-client-runtime.jar

### What changes were proposed in this pull request?

This PR fix the issue that spark-shell doesn't work if it's built with `sbt package` (without any profiles specified).
It's due to hadoop-client-runtime.jar isn't copied to assembly/target/scala-2.12/jars.
```
$ bin/spark-shell
Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/hadoop/shaded/com/ctc/wstx/io/InputBootstrapper
	at org.apache.spark.deploy.SparkHadoopUtil$.newConfiguration(SparkHadoopUtil.scala:426)
	at org.apache.spark.deploy.SparkSubmit.$anonfun$prepareSubmitEnvironment$2(SparkSubmit.scala:342)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.deploy.SparkSubmit.prepareSubmitEnvironment(SparkSubmit.scala:342)
	at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:877)
	at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)
	at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
	at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
	at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)
	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)
	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.shaded.com.ctc.wstx.io.InputBootstrapper
	at java.net.URLClassLoader.findClass(URLClassLoader.java:382)
	at java.lang.ClassLoader.loadClass(ClassLoader.java:418)
	at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:352)
	at java.lang.ClassLoader.loadClass(ClassLoader.java:351)
```

### Why are the changes needed?

This is a bug.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Ran spark-shell and confirmed it works.

Closes #30250 from sarutak/copy-runtime-sbt.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 2c6f458ee25fd..dd0c5f04e5875 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1084,7 +1084,7 @@
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-client-runtime</artifactId>
         <version>${hadoop.version}</version>
-        <scope>runtime</scope>
+        <scope>${hadoop.deps.scope}</scope>
       </dependency>
       <dependency>
         <groupId>org.apache.hadoop</groupId>

From 7e8eb0447bfca2e38040c974dce711659e613e3c Mon Sep 17 00:00:00 2001
From: Bruce Robbins <bersprockets@gmail.com>
Date: Thu, 5 Nov 2020 11:50:11 +0900
Subject: [PATCH 0390/1009] [SPARK-33314][SQL] Avoid dropping rows in Avro
 reader

### What changes were proposed in this pull request?

This PR adds a check to  RowReader#hasNextRow such that multiple calls to RowReader#hasNextRow with no intervening call to RowReader#nextRow will avoid consuming more than 1 record.

This PR also modifies RowReader#nextRow such that consecutive calls will return new rows (previously consecutive calls would return the same row).

### Why are the changes needed?

SPARK-32346 slightly refactored the AvroFileFormat and AvroPartitionReaderFactory to use a new iterator-like trait called AvroUtils#RowReader. RowReader#hasNextRow consumes a raw input record and stores the deserialized row for the next call to RowReader#nextRow. Unfortunately, sometimes hasNextRow is called twice before nextRow is called, resulting in a lost row.

For example (which assumes V1 Avro reader):
```scala
val df = spark.range(0, 25).toDF("index")
df.write.mode("overwrite").format("avro").save("index_avro")
val loaded = spark.read.format("avro").load("index_avro")
// The following will give the expected size
loaded.collect.size
// The following will give the wrong size
loaded.orderBy("index").collect.size
```
### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Added tests, which fail without the fix.

Closes #30221 from bersprockets/avro_iterator_play.

Authored-by: Bruce Robbins <bersprockets@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../org/apache/spark/sql/avro/AvroUtils.scala | 14 +++-
 .../org/apache/spark/sql/avro/AvroSuite.scala | 84 ++++++++++++++++++-
 2 files changed, 92 insertions(+), 6 deletions(-)

diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
index 3583b38a01333..51997acc6dffe 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
@@ -174,7 +174,7 @@ private[sql] object AvroUtils extends Logging {
     private[this] var currentRow: Option[InternalRow] = None
 
     def hasNextRow: Boolean = {
-      do {
+      while (!completed && currentRow.isEmpty) {
         val r = fileReader.hasNext && !fileReader.pastSync(stopPosition)
         if (!r) {
           fileReader.close()
@@ -182,15 +182,21 @@ private[sql] object AvroUtils extends Logging {
           currentRow = None
         } else {
           val record = fileReader.next()
+          // the row must be deserialized in hasNextRow, because AvroDeserializer#deserialize
+          // potentially filters rows
           currentRow = deserializer.deserialize(record).asInstanceOf[Option[InternalRow]]
         }
-      } while (!completed && currentRow.isEmpty)
-
+      }
       currentRow.isDefined
     }
 
     def nextRow: InternalRow = {
-      currentRow.getOrElse {
+      if (currentRow.isEmpty) {
+        hasNextRow
+      }
+      val returnRow = currentRow
+      currentRow = None // free up hasNextRow to consume more Avro records, if not exhausted
+      returnRow.getOrElse {
         throw new NoSuchElementException("next on empty iterator")
       }
     }
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index 52cab880ab897..4f4af97f1299f 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.avro
 
 import java.io._
-import java.net.URL
+import java.net.{URI, URL}
 import java.nio.file.{Files, Paths, StandardCopyOption}
 import java.sql.{Date, Timestamp}
 import java.util.{Locale, UUID}
@@ -31,16 +31,20 @@ import org.apache.avro.Schema.Type._
 import org.apache.avro.file.{DataFileReader, DataFileWriter}
 import org.apache.avro.generic.{GenericData, GenericDatumReader, GenericDatumWriter, GenericRecord}
 import org.apache.avro.generic.GenericData.{EnumSymbol, Fixed}
+import org.apache.avro.mapred.FsInput
 import org.apache.commons.io.FileUtils
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
 
 import org.apache.spark.{SPARK_VERSION_SHORT, SparkConf, SparkException, SparkUpgradeException}
 import org.apache.spark.sql._
 import org.apache.spark.sql.TestingUDT.IntervalData
+import org.apache.spark.sql.catalyst.{InternalRow, NoopFilters}
 import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.plans.logical.Filter
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{withDefaultTimeZone, LA, UTC}
 import org.apache.spark.sql.execution.{FormattedMode, SparkPlan}
-import org.apache.spark.sql.execution.datasources.{CommonFileDataSourceSuite, DataSource, FilePartition}
+import org.apache.spark.sql.execution.datasources.{CommonFileDataSourceSuite, DataSource, FilePartition, PartitionedFile}
 import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.internal.SQLConf
@@ -1836,6 +1840,24 @@ abstract class AvroSuite
       }
     }
   }
+
+  test("SPARK-33314: RowReader doesn't over-consume when hasNextRow called twice") {
+    withTempPath { dir =>
+      Seq((1), (2), (3))
+        .toDF("index")
+        .write
+        .format("avro")
+        .save(dir.getCanonicalPath)
+      val df = spark
+        .read
+        .format("avro")
+        .load(dir.getCanonicalPath)
+        .orderBy("index")
+
+      checkAnswer(df,
+        Seq(Row(1), Row(2), Row(3)))
+    }
+  }
 }
 
 class AvroV1Suite extends AvroSuite {
@@ -2005,3 +2027,61 @@ class AvroV2Suite extends AvroSuite with ExplainSuiteHelper {
     }
   }
 }
+
+class AvroRowReaderSuite
+  extends QueryTest
+  with SharedSparkSession {
+
+  import testImplicits._
+
+  override protected def sparkConf: SparkConf =
+    super
+      .sparkConf
+      .set(SQLConf.USE_V1_SOURCE_LIST, "") // need this for BatchScanExec
+
+  test("SPARK-33314: hasNextRow and nextRow properly handle consecutive calls") {
+    withTempPath { dir =>
+      Seq((1), (2), (3))
+        .toDF("value")
+        .coalesce(1)
+        .write
+        .format("avro")
+        .save(dir.getCanonicalPath)
+
+      val df = spark.read.format("avro").load(dir.getCanonicalPath)
+      val fileScan = df.queryExecution.executedPlan collectFirst {
+        case BatchScanExec(_, f: AvroScan) => f
+      }
+      val filePath = fileScan.get.fileIndex.inputFiles(0)
+      val fileSize = new File(new URI(filePath)).length
+      val in = new FsInput(new Path(new URI(filePath)), new Configuration())
+      val reader = DataFileReader.openReader(in, new GenericDatumReader[GenericRecord]())
+
+      val it = new Iterator[InternalRow] with AvroUtils.RowReader {
+        override val fileReader = reader
+        override val deserializer = new AvroDeserializer(
+          reader.getSchema,
+          StructType(new StructField("value", IntegerType, true) :: Nil),
+          CORRECTED,
+          new NoopFilters)
+        override val stopPosition = fileSize
+
+        override def hasNext: Boolean = hasNextRow
+
+        override def next: InternalRow = nextRow
+      }
+      assert(it.hasNext == true)
+      assert(it.next.getInt(0) == 1)
+      // test no intervening next
+      assert(it.hasNext == true)
+      assert(it.hasNext == true)
+      // test no intervening hasNext
+      assert(it.next.getInt(0) == 2)
+      assert(it.next.getInt(0) == 3)
+      assert(it.hasNext == false)
+      assertThrows[NoSuchElementException] {
+        it.next
+      }
+    }
+  }
+}

From 551b504cfe38d1ab583e617c37e49659edd65c2e Mon Sep 17 00:00:00 2001
From: Bo Zhang <bo.zhang@databricks.com>
Date: Thu, 5 Nov 2020 12:27:20 +0800
Subject: [PATCH 0391/1009] [SPARK-33316][SQL] Support user provided nullable
 Avro schema for non-nullable catalyst schema in Avro writing

### What changes were proposed in this pull request?
This change is to support user provided nullable Avro schema for data with non-nullable catalyst schema in Avro writing.

Without this change, when users try to use a nullable Avro schema to write data with a non-nullable catalyst schema, it will throw an `IncompatibleSchemaException` with a message like `Cannot convert Catalyst type StringType to Avro type ["null","string"]`. With this change it will assume that the data is non-nullable, log a warning message for the nullability difference and serialize the data to Avro format with the nullable Avro schema provided.

### Why are the changes needed?
This change is needed because sometimes our users do not have full control over the nullability of the Avro schemas they use, and this change provides them with the flexibility.

### Does this PR introduce _any_ user-facing change?
Yes. Users are allowed to use nullable Avro schemas for data with non-nullable catalyst schemas in Avro writing after the change.

### How was this patch tested?
Added unit tests.

Closes #30224 from bozhang2820/avro-nullable.

Authored-by: Bo Zhang <bo.zhang@databricks.com>
Signed-off-by: Gengliang Wang <gengliang.wang@databricks.com>
---
 .../spark/sql/avro/AvroSerializer.scala       | 54 ++++++++++++++----
 .../spark/sql/avro/SchemaConverters.scala     |  2 +
 .../spark/sql/avro/AvroFunctionsSuite.scala   | 37 ++++++++++++
 .../org/apache/spark/sql/avro/AvroSuite.scala | 57 +++++++++++++++++++
 4 files changed, 140 insertions(+), 10 deletions(-)

diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala
index 08b1b4184fb0b..0ea95d1c0db5d 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala
@@ -255,20 +255,54 @@ private[sql] class AvroSerializer(
       result
   }
 
+  /**
+   * Resolve a possibly nullable Avro Type.
+   *
+   * An Avro type is nullable when it is a [[UNION]] of two types: one null type and another
+   * non-null type. This method will check the nullability of the input Avro type and return the
+   * non-null type within when it is nullable. Otherwise it will return the input Avro type
+   * unchanged. It will throw an [[UnsupportedAvroTypeException]] when the input Avro type is an
+   * unsupported nullable type.
+   *
+   * It will also log a warning message if the nullability for Avro and catalyst types are
+   * different.
+   */
   private def resolveNullableType(avroType: Schema, nullable: Boolean): Schema = {
-    if (avroType.getType == Type.UNION && nullable) {
-      // avro uses union to represent nullable type.
+    val (avroNullable, resolvedAvroType) = resolveAvroType(avroType)
+    warnNullabilityDifference(avroNullable, nullable)
+    resolvedAvroType
+  }
+
+  /**
+   * Check the nullability of the input Avro type and resolve it when it is nullable. The first
+   * return value is a [[Boolean]] indicating if the input Avro type is nullable. The second
+   * return value is the possibly resolved type.
+   */
+  private def resolveAvroType(avroType: Schema): (Boolean, Schema) = {
+    if (avroType.getType == Type.UNION) {
       val fields = avroType.getTypes.asScala
-      assert(fields.length == 2)
       val actualType = fields.filter(_.getType != Type.NULL)
-      assert(actualType.length == 1)
-      actualType.head
-    } else {
-      if (nullable) {
-        logWarning("Writing avro files with non-nullable avro schema with nullable catalyst " +
-          "schema will throw runtime exception if there is a record with null value.")
+      if (fields.length != 2 || actualType.length != 1) {
+        throw new UnsupportedAvroTypeException(
+          s"Unsupported Avro UNION type $avroType: Only UNION of a null type and a non-null " +
+            "type is supported")
       }
-      avroType
+      (true, actualType.head)
+    } else {
+      (false, avroType)
+    }
+  }
+
+  /**
+   * log a warning message if the nullability for Avro and catalyst types are different.
+   */
+  private def warnNullabilityDifference(avroNullable: Boolean, catalystNullable: Boolean): Unit = {
+    if (avroNullable && !catalystNullable) {
+      logWarning("Writing Avro files with nullable Avro schema and non-nullable catalyst schema.")
+    }
+    if (!avroNullable && catalystNullable) {
+      logWarning("Writing Avro files with non-nullable Avro schema and nullable catalyst " +
+        "schema will throw runtime exception if there is a record with null value.")
     }
   }
 }
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
index 905f90fa79373..c685c89f0dfc8 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
@@ -208,3 +208,5 @@ object SchemaConverters {
 
 private[avro] class IncompatibleSchemaException(
   msg: String, ex: Throwable = null) extends Exception(msg, ex)
+
+private[avro] class UnsupportedAvroTypeException(msg: String) extends Exception(msg)
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
index 7f14efe15ad55..c9e0d4344691a 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
@@ -201,4 +201,41 @@ class AvroFunctionsSuite extends QueryTest with SharedSparkSession {
           Map("avroSchema" -> evolvedAvroSchema).asJava)),
       expected)
   }
+
+  test("roundtrip in to_avro and from_avro - struct with nullable Avro schema") {
+    val df = spark.range(10).select(struct('id, 'id.cast("string").as("str")).as("struct"))
+    val avroTypeStruct = s"""
+      |{
+      |  "type": "record",
+      |  "name": "struct",
+      |  "fields": [
+      |    {"name": "id", "type": "long"},
+      |    {"name": "str", "type": ["null", "string"]}
+      |  ]
+      |}
+    """.stripMargin
+    val avroStructDF = df.select(functions.to_avro('struct, avroTypeStruct).as("avro"))
+    checkAnswer(avroStructDF.select(
+      functions.from_avro('avro, avroTypeStruct)), df)
+  }
+
+  test("to_avro with unsupported nullable Avro schema") {
+    val df = spark.range(10).select(struct('id, 'id.cast("string").as("str")).as("struct"))
+    for (unsupportedAvroType <- Seq("""["null", "int", "long"]""", """["int", "long"]""")) {
+      val avroTypeStruct = s"""
+        |{
+        |  "type": "record",
+        |  "name": "struct",
+        |  "fields": [
+        |    {"name": "id", "type": $unsupportedAvroType},
+        |    {"name": "str", "type": ["null", "string"]}
+        |  ]
+        |}
+      """.stripMargin
+      val message = intercept[SparkException] {
+        df.select(functions.to_avro('struct, avroTypeStruct).as("avro")).show()
+      }.getCause.getMessage
+      assert(message.contains("Only UNION of a null type and a non-null type is supported"))
+    }
+  }
 }
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index 4f4af97f1299f..c9c6bcecac14e 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -1019,6 +1019,63 @@ abstract class AvroSuite
     }
   }
 
+  test("support user provided nullable avro schema " +
+    "for non-nullable catalyst schema without any null record") {
+    val catalystSchema =
+      StructType(Seq(
+        StructField("Age", IntegerType, nullable = false),
+        StructField("Name", StringType, nullable = false)))
+
+    val avroSchema = """
+      |{
+      |  "type" : "record",
+      |  "name" : "test_schema",
+      |  "fields" : [
+      |    {"name": "Age", "type": ["null", "int"]},
+      |    {"name": "Name", "type": ["null", "string"]}
+      |  ]
+      |}
+    """.stripMargin
+
+    val df = spark.createDataFrame(
+      spark.sparkContext.parallelize(Seq(Row(2, "Aurora"))), catalystSchema)
+
+    withTempPath { tempDir =>
+      df.write.format("avro").option("avroSchema", avroSchema).save(tempDir.getPath)
+      checkAvroSchemaEquals(avroSchema, getAvroSchemaStringFromFiles(tempDir.getPath))
+    }
+  }
+
+  test("unsupported nullable avro type") {
+    val catalystSchema =
+      StructType(Seq(
+        StructField("Age", IntegerType, nullable = false),
+        StructField("Name", StringType, nullable = false)))
+
+    for (unsupportedAvroType <- Seq("""["null", "int", "long"]""", """["int", "long"]""")) {
+      val avroSchema = s"""
+        |{
+        |  "type" : "record",
+        |  "name" : "test_schema",
+        |  "fields" : [
+        |    {"name": "Age", "type": $unsupportedAvroType},
+        |    {"name": "Name", "type": ["null", "string"]}
+        |  ]
+        |}
+      """.stripMargin
+
+      val df = spark.createDataFrame(
+        spark.sparkContext.parallelize(Seq(Row(2, "Aurora"))), catalystSchema)
+
+      withTempPath { tempDir =>
+        val message = intercept[SparkException] {
+          df.write.format("avro").option("avroSchema", avroSchema).save(tempDir.getPath)
+        }.getCause.getMessage
+        assert(message.contains("Only UNION of a null type and a non-null type is supported"))
+      }
+    }
+  }
+
   test("error handling for unsupported Interval data types") {
     withTempDir { dir =>
       val tempDir = new File(dir, "files").getCanonicalPath

From 0535b34ad47249df4806ed70471d5539b998a3b3 Mon Sep 17 00:00:00 2001
From: Kyle Bendickson <kjbendickson@gmail.com>
Date: Thu, 5 Nov 2020 16:10:52 +0900
Subject: [PATCH 0392/1009] [SPARK-33282] Migrate from deprecated probot
 autolabeler to GitHub labeler action
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

This PR removes the old Probot Autolabeler labeling configuration, as the probot autolabeler has been deprecated. I've updated the configs in Iceberg and in Avro, and we also need to update here. This PR adds in an additional workflow for labeling PRs and migrates the old probot config to the new format. Unfortunately, because certain features have not been released upstream, we will not get the _exact_ behavior as before. I have documented where that is and what changes are neeeded, and in the associated ticket I've also discussed other options and why I think this is the best way to go. Definitely a follow up ticket is needed to get the original behavior back in these few cases, but PRs have not been labeled for almost a month and so it's probably best to get it right 95% of the time and occasionally have some UI related PRs labeled as `CORE` while the issue is resolved upstream and/or further investigated.

### Why are the changes needed?

The probot autolabeler is dead and will not be maintained going forward. This has been confirmed with github user [at]mithro in an issue in their repository.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

To test this PR, I first merged the config into my local fork. I then edited it several times and ran tests on that.

Unfortunately, I've overwritten my fork with the apache repo in order to create a proper PR. However, I've also added the config for the same thing in the Iceberg repo as well as the Avro repo.

I have now merged this PR into my local repo and will be running some tests on edge cases there and for validating in general:
- [Check that the SQL label is applied for changes directly below repo root's sql directory](https://github.com/kbendick/spark/pull/16) ✅
- [Check that the structured streaming label is applied](https://github.com/kbendick/spark/pull/20) ✅
- [Check that a wildcard at the end of a pattern will match nested files](https://github.com/kbendick/spark/pull/19) ✅
- [Check that the rule **/*pom.xml will match the root pom.xml file](https://github.com/kbendick/spark/pull/25) ✅

I've also discovered that we're likely not killing github actions that run (like large tests etc) when users push to their PR. In most cases, I see that a user has to mark something as "OK to test", but it still seems like we might want to discuss whether or not we should add a cancellation step In order to save time / capacity on the runners. If so desired, we would add an action in each workflow that cancels old runs when a `push` action occurs on a PR. This will likely make waiting for test runners much faster iff tests are automatically rerun on push by anybody (such as PMCs, PRs that have been marked OK to test, etc). We could free a large number of resources potentially if a cancellation step was added to all of the workflows in the Apache account (as github action API limits are set at the account level).

Admittedly, the fact that the "old" workflow runs weren't cancelled could admittedly be because of the fact that I was working in a fork, but given that there are explicit actions to be added to the start of workflows to cancel old PR workflows and given that we don't have them configured indicates to me that likely this is the case in this repo (and in most `apache` repos as well), at least under certain circumstances (e.g. repos that don't have "Ok to test"-like webhooks as one example).

This is a separate issue though, which I can bring up on the mailing list once I'm done with this PR. Unfortunately I've been very busy the past two weeks, but if somebody else wanted to work on that I would be happy to support with any knowledge I have.

The last Apache repo to still have the probot autolabeler in it is Beam, at which point we can have Gavin from ASF Infra remove the permissions for the probot autolabeler entirely. See the associated JIRA ticket for the links to other tickets, like the one for ASF Infra to remove the dead probot autolabeler's read and write permissions to our PRs in the Apache organization.

Closes #30244 from kbendick/begin-migration-to-github-labeler-action.

Authored-by: Kyle Bendickson <kjbendickson@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .github/autolabeler.yml       | 133 -----------------------------
 .github/labeler.yml           | 152 ++++++++++++++++++++++++++++++++++
 .github/workflows/labeler.yml |  43 ++++++++++
 3 files changed, 195 insertions(+), 133 deletions(-)
 delete mode 100644 .github/autolabeler.yml
 create mode 100644 .github/labeler.yml
 create mode 100644 .github/workflows/labeler.yml

diff --git a/.github/autolabeler.yml b/.github/autolabeler.yml
deleted file mode 100644
index 3bca01f89950a..0000000000000
--- a/.github/autolabeler.yml
+++ /dev/null
@@ -1,133 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Bot page: https://github.com/apps/probot-autolabeler
-# The matching patterns follow the .gitignore spec.
-# See: https://git-scm.com/docs/gitignore#_pattern_format
-# Also, note that the plugin uses 'ignore' package. See also
-# https://github.com/kaelzhang/node-ignore
-INFRA:
-  - ".github/"
-  - "appveyor.yml"
-  - "/tools/"
-  - "/dev/create-release/"
-  - ".asf.yaml"
-  - ".gitattributes"
-  - ".gitignore"
-  - "/dev/github_jira_sync.py"
-  - "/dev/merge_spark_pr.py"
-  - "/dev/run-tests-jenkins*"
-BUILD:
-  - "/dev/"
-  - "!/dev/github_jira_sync.py"
-  - "!/dev/merge_spark_pr.py"
-  - "!/dev/run-tests-jenkins*"
-  - "!/dev/.rat-excludes"
-  - "/build/"
-  - "/project/"
-  - "/assembly/"
-  - "*pom.xml"
-  - "/bin/docker-image-tool.sh"
-  - "/bin/find-spark-home*"
-  - "scalastyle-config.xml"
-DOCS:
-  - "docs/"
-  - "/README.md"
-  - "/CONTRIBUTING.md"
-EXAMPLES:
-  - "examples/"
-  - "/bin/run-example*"
-CORE:
-  - "/core/"
-  - "!UI.scala"
-  - "!ui/"
-  - "/common/kvstore/"
-  - "/common/network-common/"
-  - "/common/network-shuffle/"
-  - "/python/pyspark/*.py"
-  - "/python/pyspark/tests/*.py"
-SPARK SUBMIT:
-  - "/bin/spark-submit*"
-SPARK SHELL:
-  - "/repl/"
-  - "/bin/spark-shell*"
-SQL:
-  - "sql/"
-  - "/common/unsafe/"
-  - "!/python/pyspark/sql/avro/"
-  - "!/python/pyspark/sql/streaming.py"
-  - "!/python/pyspark/sql/tests/test_streaming.py"
-  - "/bin/spark-sql*"
-  - "/bin/beeline*"
-  - "/sbin/*thriftserver*.sh"
-  - "*SQL*.R"
-  - "DataFrame.R"
-  - "WindowSpec.R"
-  - "catalog.R"
-  - "column.R"
-  - "functions.R"
-  - "group.R"
-  - "schema.R"
-  - "types.R"
-AVRO:
-  - "/external/avro/"
-  - "/python/pyspark/sql/avro/"
-DSTREAM:
-  - "/streaming/"
-  - "/data/streaming/"
-  - "/external/flume*"
-  - "/external/kinesis*"
-  - "/external/kafka*"
-  - "/python/pyspark/streaming/"
-GRAPHX:
-  - "/graphx/"
-  - "/data/graphx/"
-ML:
-  - "ml/"
-  - "*mllib_*.R"
-MLLIB:
-  - "spark/mllib/"
-  - "/mllib-local/"
-  - "/python/pyspark/mllib/"
-STRUCTURED STREAMING:
-  - "sql/**/streaming/"
-  - "/external/kafka-0-10-sql/"
-  - "/python/pyspark/sql/streaming.py"
-  - "/python/pyspark/sql/tests/test_streaming.py"
-  - "*streaming.R"
-PYTHON:
-  - "/bin/pyspark*"
-  - "python/"
-R:
-  - "r/"
-  - "R/"
-  - "/bin/sparkR*"
-YARN:
-  - "/resource-managers/yarn/"
-MESOS:
-  - "/resource-managers/mesos/"
-  - "/sbin/*mesos*.sh"
-KUBERNETES:
-  - "/resource-managers/kubernetes/"
-WINDOWS:
-  - "*.cmd"
-  - "/R/pkg/tests/fulltests/test_Windows.R"
-WEB UI:
-  - "ui/"
-  - "UI.scala"
-DEPLOY:
-  - "/sbin/"
diff --git a/.github/labeler.yml b/.github/labeler.yml
new file mode 100644
index 0000000000000..bd61902925e33
--- /dev/null
+++ b/.github/labeler.yml
@@ -0,0 +1,152 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+#
+# Pull Request Labeler Github Action Configuration: https://github.com/marketplace/actions/labeler
+#
+# Note that we currently cannot use the negatioon operator  (i.e. `!`)  for miniglob matches as they
+# would match any file that doesn't touch them. What's needed is the concept of `any `, which takes a
+# list of constraints / globs and then matches all of the constraints for either `any` of the files or
+# `all` of the files in the change set.
+#
+# However, `any`/`all` are not supported in a released version and testing off of the `main` branch
+# resulted in some other errors when testing.
+#
+# An issue has been opened upstream requesting that a release be cut that has support for all/any:
+#   - https://github.com/actions/labeler/issues/111
+#
+# While we wait for this issue to be handled upstream, we can remove
+# the negated / `!` matches for now and at least have labels again.
+#
+INFRA:
+  - ".github/**/*"
+  - "appveyor.yml"
+  - "tools/**/*"
+  - "dev/create-release/**/*"
+  - ".asf.yaml"
+  - ".gitattributes"
+  - ".gitignore"
+  - "dev/github_jira_sync.py"
+  - "dev/merge_spark_pr.py"
+  - "dev/run-tests-jenkins*"
+BUILD:
+ # Can be supported when a stable release with correct all/any is released
+ #- any: ['dev/**/*', '!dev/github_jira_sync.py', '!dev/merge_spark_pr.py', '!dev/.rat-excludes']
+ - "dev/**/*"
+ - "build/**/*"
+ - "project/**/*"
+ - "assembly/**/*"
+ - "**/*pom.xml"
+ - "bin/docker-image-tool.sh"
+ - "bin/find-spark-home*"
+ - "scalastyle-config.xml"
+ # These can be added in the above `any` clause (and the /dev/**/* glob removed) when
+ # `any`/`all` support is released
+ # - "!dev/github_jira_sync.py"
+ # - "!dev/merge_spark_pr.py"
+ # - "!dev/run-tests-jenkins*"
+ # - "!dev/.rat-excludes"
+DOCS:
+  - "docs/**/*"
+  - "**/README.md"
+  - "**/CONTRIBUTING.md"
+EXAMPLES:
+  - "examples/**/*"
+  - "bin/run-example*"
+# CORE needs to be updated when all/any are released upstream.
+CORE:
+  # - any: ["core/**/*", "!**/*UI.scala", "!**/ui/**/*"] # If any file matches all of the globs defined in the list started by `any`, label is applied.
+  - "core/**/*"
+  - "common/kvstore/**/*"
+  - "common/network-common/**/*"
+  - "common/network-shuffle/**/*"
+  - "python/pyspark/**/*.py"
+  - "python/pyspark/tests/**/*.py"
+SPARK SUBMIT:
+  - "bin/spark-submit*"
+SPARK SHELL:
+  - "repl/**/*"
+  - "bin/spark-shell*"
+SQL:
+#- any: ["**/sql/**/*", "!python/pyspark/sql/avro/**/*", "!python/pyspark/sql/streaming.py", "!python/pyspark/sql/tests/test_streaming.py"]
+  - "**/sql/**/*"
+  - "common/unsafe/**/*"
+  #- "!python/pyspark/sql/avro/**/*"
+  #- "!python/pyspark/sql/streaming.py"
+  #- "!python/pyspark/sql/tests/test_streaming.py"
+  - "bin/spark-sql*"
+  - "bin/beeline*"
+  - "sbin/*thriftserver*.sh"
+  - "**/*SQL*.R"
+  - "**/DataFrame.R"
+  - "**/*WindowSpec.R"
+  - "**/*catalog.R"
+  - "**/*column.R"
+  - "**/*functions.R"
+  - "**/*group.R"
+  - "**/*schema.R"
+  - "**/*types.R"
+AVRO:
+  - "external/avro/**/*"
+  - "python/pyspark/sql/avro/**/*"
+DSTREAM:
+  - "streaming/**/*"
+  - "data/streaming/**/*"
+  - "external/kinesis*"
+  - "external/kafka*"
+  - "python/pyspark/streaming/**/*"
+GRAPHX:
+  - "graphx/**/*"
+  - "data/graphx/**/*"
+ML:
+  - "**/ml/**/*"
+  - "**/*mllib_*.R"
+MLLIB:
+  - "**/spark/mllib/**/*"
+  - "mllib-local/**/*"
+  - "python/pyspark/mllib/**/*"
+STRUCTURED STREAMING:
+  - "**/sql/**/streaming/**/*"
+  - "external/kafka-0-10-sql/**/*"
+  - "python/pyspark/sql/streaming.py"
+  - "python/pyspark/sql/tests/test_streaming.py"
+  - "**/*streaming.R"
+PYTHON:
+  - "bin/pyspark*"
+  - "**/python/**/*"
+R:
+  - "**/r/**/*"
+  - "**/R/**/*"
+  - "bin/sparkR*"
+YARN:
+  - "resource-managers/yarn/**/*"
+MESOS:
+  - "resource-managers/mesos/**/*"
+  - "sbin/*mesos*.sh"
+KUBERNETES:
+  - "resource-managers/kubernetes/**/*"
+WINDOWS:
+  - "**/*.cmd"
+  - "R/pkg/tests/fulltests/test_Windows.R"
+WEB UI:
+  - "**/ui/**/*"
+  - "**/*UI.scala"
+DEPLOY:
+  - "sbin/**/*"
+
diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
new file mode 100644
index 0000000000000..a1a5ab5b70f5b
--- /dev/null
+++ b/.github/workflows/labeler.yml
@@ -0,0 +1,43 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: "Pull Request Labeler"
+on: pull_request_target
+
+jobs:
+  label:
+    runs-on: ubuntu-latest
+    steps:
+    # In order to get back the negated matches like in the old config,
+    # we need the actinons/labeler concept of `all` and `any` which matches
+    # all of the given constraints / glob patterns for either `all`
+    # files or `any` file in the change set.
+    #
+    # Github issue which requests a timeline for a release with any/all support:
+    #     - https://github.com/actions/labeler/issues/111
+    # This issue also references the issue that mentioned that any/all are only
+    # supported on main branch (previously called master):
+    #    - https://github.com/actions/labeler/issues/73#issuecomment-639034278
+    #
+    # However, these are not in a published release and the current `main` branch
+    # has some issues upon testing.
+    - uses: actions/labeler@2.2.0
+      with:
+        repo-token: "${{ secrets.GITHUB_TOKEN }}"
+        sync-labels: true

From d530ed0ea8bdba09fba6dcd51f8e4f7745781c2e Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Thu, 5 Nov 2020 16:15:17 +0900
Subject: [PATCH 0393/1009] Revert "[SPARK-33277][PYSPARK][SQL] Use
 ContextAwareIterator to stop consuming after the task ends"

This reverts commit b8a440f09880c596325dd9e6caae6b470be76a8f.
---
 python/pyspark/sql/tests/test_pandas_map.py   | 22 -------------------
 .../sql/tests/test_pandas_udf_scalar.py       | 19 ----------------
 python/pyspark/sql/tests/test_udf.py          | 20 -----------------
 .../sql/execution/python/EvalPythonExec.scala | 18 +--------------
 .../execution/python/MapInPandasExec.scala    |  7 +++---
 5 files changed, 4 insertions(+), 82 deletions(-)

diff --git a/python/pyspark/sql/tests/test_pandas_map.py b/python/pyspark/sql/tests/test_pandas_map.py
index 2cad30c7294d4..3ca437f75fc23 100644
--- a/python/pyspark/sql/tests/test_pandas_map.py
+++ b/python/pyspark/sql/tests/test_pandas_map.py
@@ -15,12 +15,9 @@
 # limitations under the License.
 #
 import os
-import shutil
-import tempfile
 import time
 import unittest
 
-from pyspark.sql import Row
 from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \
     pandas_requirement_message, pyarrow_requirement_message
 
@@ -115,25 +112,6 @@ def func(iterator):
         expected = df.collect()
         self.assertEquals(actual, expected)
 
-    # SPARK-33277
-    def test_map_in_pandas_with_column_vector(self):
-        path = tempfile.mkdtemp()
-        shutil.rmtree(path)
-
-        try:
-            self.spark.range(0, 200000, 1, 1).write.parquet(path)
-
-            def func(iterator):
-                for pdf in iterator:
-                    yield pd.DataFrame({'id': [0] * len(pdf)})
-
-            for offheap in ["true", "false"]:
-                with self.sql_conf({"spark.sql.columnVector.offheap.enabled": offheap}):
-                    self.assertEquals(
-                        self.spark.read.parquet(path).mapInPandas(func, 'id long').head(), Row(0))
-        finally:
-            shutil.rmtree(path)
-
 
 if __name__ == "__main__":
     from pyspark.sql.tests.test_pandas_map import *  # noqa: F401
diff --git a/python/pyspark/sql/tests/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
index c2c8f6f697c4b..6d325c9085ce1 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_scalar.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
@@ -1137,25 +1137,6 @@ def test_datasource_with_udf(self):
         finally:
             shutil.rmtree(path)
 
-    # SPARK-33277
-    def test_pandas_udf_with_column_vector(self):
-        path = tempfile.mkdtemp()
-        shutil.rmtree(path)
-
-        try:
-            self.spark.range(0, 200000, 1, 1).write.parquet(path)
-
-            @pandas_udf(LongType())
-            def udf(x):
-                return pd.Series([0] * len(x))
-
-            for offheap in ["true", "false"]:
-                with self.sql_conf({"spark.sql.columnVector.offheap.enabled": offheap}):
-                    self.assertEquals(
-                        self.spark.read.parquet(path).select(udf('id')).head(), Row(0))
-        finally:
-            shutil.rmtree(path)
-
 
 if __name__ == "__main__":
     from pyspark.sql.tests.test_pandas_udf_scalar import *  # noqa: F401
diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py
index c2e95fd41c5b4..a7dcbfd32ac1c 100644
--- a/python/pyspark/sql/tests/test_udf.py
+++ b/python/pyspark/sql/tests/test_udf.py
@@ -674,26 +674,6 @@ def test_udf_cache(self):
         self.assertEqual(df.select(udf(func)("id"))._jdf.queryExecution()
                          .withCachedData().getClass().getSimpleName(), 'InMemoryRelation')
 
-    # SPARK-33277
-    def test_udf_with_column_vector(self):
-        path = tempfile.mkdtemp()
-        shutil.rmtree(path)
-
-        try:
-            self.spark.range(0, 100000, 1, 1).write.parquet(path)
-
-            def f(x):
-                return 0
-
-            fUdf = udf(f, LongType())
-
-            for offheap in ["true", "false"]:
-                with self.sql_conf({"spark.sql.columnVector.offheap.enabled": offheap}):
-                    self.assertEquals(
-                        self.spark.read.parquet(path).select(fUdf('id')).head(), Row(0))
-        finally:
-            shutil.rmtree(path)
-
 
 class UDFInitializationTests(unittest.TestCase):
     def tearDown(self):
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala
index 89c7716f7c1b2..298d63478b63e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala
@@ -89,7 +89,6 @@ trait EvalPythonExec extends UnaryExecNode {
 
     inputRDD.mapPartitions { iter =>
       val context = TaskContext.get()
-      val contextAwareIterator = new ContextAwareIterator(iter, context)
 
       // The queue used to buffer input rows so we can drain it to
       // combine input with output from Python.
@@ -121,7 +120,7 @@ trait EvalPythonExec extends UnaryExecNode {
       }.toSeq)
 
       // Add rows to queue to join later with the result.
-      val projectedRowIter = contextAwareIterator.map { inputRow =>
+      val projectedRowIter = iter.map { inputRow =>
         queue.add(inputRow.asInstanceOf[UnsafeRow])
         projection(inputRow)
       }
@@ -138,18 +137,3 @@ trait EvalPythonExec extends UnaryExecNode {
     }
   }
 }
-
-/**
- * A TaskContext aware iterator.
- *
- * As the Python evaluation consumes the parent iterator in a separate thread,
- * it could consume more data from the parent even after the task ends and the parent is closed.
- * Thus, we should use ContextAwareIterator to stop consuming after the task ends.
- */
-class ContextAwareIterator[IN](iter: Iterator[IN], context: TaskContext) extends Iterator[IN] {
-
-  override def hasNext: Boolean =
-    !context.isCompleted() && !context.isInterrupted() && iter.hasNext
-
-  override def next(): IN = iter.next()
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInPandasExec.scala
index 7fc18f885a2d3..2bb808119c0ae 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInPandasExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInPandasExec.scala
@@ -61,17 +61,16 @@ case class MapInPandasExec(
       val pythonRunnerConf = ArrowUtils.getPythonRunnerConfMap(conf)
       val outputTypes = child.schema
 
-      val context = TaskContext.get()
-      val contextAwareIterator = new ContextAwareIterator(inputIter, context)
-
       // Here we wrap it via another row so that Python sides understand it
       // as a DataFrame.
-      val wrappedIter = contextAwareIterator.map(InternalRow(_))
+      val wrappedIter = inputIter.map(InternalRow(_))
 
       // DO NOT use iter.grouped(). See BatchIterator.
       val batchIter =
         if (batchSize > 0) new BatchIterator(wrappedIter, batchSize) else Iterator(wrappedIter)
 
+      val context = TaskContext.get()
+
       val columnarBatchIter = new ArrowPythonRunner(
         chainedFunc,
         PythonEvalType.SQL_MAP_PANDAS_ITER_UDF,

From e66201b30bc1f3da7284af14b32e5e6200768dbd Mon Sep 17 00:00:00 2001
From: Sarvesh Dave <sarveshdave1@gmail.com>
Date: Thu, 5 Nov 2020 16:22:31 +0900
Subject: [PATCH 0394/1009] [MINOR][SS][DOCS] Update join type in stream static
 joins code examples

### What changes were proposed in this pull request?
Update join type in stream static joins code examples in structured streaming programming guide.
1) Scala, Java and Python examples have a common issue.
    The join keyword is "right_join", it should be "left_outer".

    _Reasons:_
    a) This code snippet is an example of "left outer join" as the streaming df is on left and static df is on right. Also, right outer    join between stream df(left) and static df(right) is not supported.
    b) The keyword "right_join/left_join" is unsupported and it should be "right_outer/left_outer".

So, all of these code snippets have been updated to "left_outer".

2) R exmaple is correct, but the example is of "right_outer" with static df (left) and streaming df(right).
It is changed to "left_outer" to make it consistent with other three examples of scala, java and python.

### Why are the changes needed?
To fix the mistake in example code of documentation.

### Does this PR introduce _any_ user-facing change?
Yes, it is a user-facing change (but documentation update only).

**Screenshots 1: Scala/Java/python example (similar issue)**
_Before:_
<img width="941" alt="Screenshot 2020-11-05 at 12 16 09 AM" src="https://user-images.githubusercontent.com/62717942/98155351-19e59400-1efc-11eb-8142-e6a25a5e6497.png">

_After:_
<img width="922" alt="Screenshot 2020-11-05 at 12 17 12 AM" src="https://user-images.githubusercontent.com/62717942/98155503-5d400280-1efc-11eb-96e1-5ba0f3c35c82.png">

**Screenshots 2: R example (Make it consistent with above change)**
_Before:_
<img width="896" alt="Screenshot 2020-11-05 at 12 19 57 AM" src="https://user-images.githubusercontent.com/62717942/98155685-ac863300-1efc-11eb-93bc-b7ca4dd34634.png">

_After:_
<img width="919" alt="Screenshot 2020-11-05 at 12 20 51 AM" src="https://user-images.githubusercontent.com/62717942/98155739-c0ca3000-1efc-11eb-8f95-a7538fa784b7.png">

### How was this patch tested?
The change was tested locally.
1) cd docs/
    SKIP_API=1 jekyll build
2) Verify docs/_site/structured-streaming-programming-guide.html file in browser.

Closes #30252 from sarveshdave1/doc-update-stream-static-joins.

Authored-by: Sarvesh Dave <sarveshdave1@gmail.com>
Signed-off-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
---
 docs/structured-streaming-programming-guide.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index ccd6f41f5c664..c671d6b590626 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -1117,7 +1117,7 @@ val staticDf = spark.read. ...
 val streamingDf = spark.readStream. ...
 
 streamingDf.join(staticDf, "type")          // inner equi-join with a static DF
-streamingDf.join(staticDf, "type", "right_join")  // right outer join with a static DF  
+streamingDf.join(staticDf, "type", "left_outer")  // left outer join with a static DF
 
 {% endhighlight %}
 
@@ -1128,7 +1128,7 @@ streamingDf.join(staticDf, "type", "right_join")  // right outer join with a sta
 Dataset<Row> staticDf = spark.read(). ...;
 Dataset<Row> streamingDf = spark.readStream(). ...;
 streamingDf.join(staticDf, "type");         // inner equi-join with a static DF
-streamingDf.join(staticDf, "type", "right_join");  // right outer join with a static DF
+streamingDf.join(staticDf, "type", "left_outer");  // left outer join with a static DF
 {% endhighlight %}
 
 
@@ -1139,7 +1139,7 @@ streamingDf.join(staticDf, "type", "right_join");  // right outer join with a st
 staticDf = spark.read. ...
 streamingDf = spark.readStream. ...
 streamingDf.join(staticDf, "type")  # inner equi-join with a static DF
-streamingDf.join(staticDf, "type", "right_join")  # right outer join with a static DF
+streamingDf.join(staticDf, "type", "left_outer")  # left outer join with a static DF
 {% endhighlight %}
 
 </div>
@@ -1151,10 +1151,10 @@ staticDf <- read.df(...)
 streamingDf <- read.stream(...)
 joined <- merge(streamingDf, staticDf, sort = FALSE)  # inner equi-join with a static DF
 joined <- join(
+            streamingDf,
             staticDf,
-            streamingDf, 
             streamingDf$value == staticDf$value,
-            "right_outer")  # right outer join with a static DF
+            "left_outer")  # left outer join with a static DF
 {% endhighlight %}
 
 </div>

From 21413b7dd4e19f725b21b92cddfbe73d1b381a05 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan.opensource@gmail.com>
Date: Thu, 5 Nov 2020 18:21:17 +0900
Subject: [PATCH 0395/1009] [SPARK-30294][SS] Explicitly defines read-only
 StateStore and optimize for HDFSBackedStateStore

### What changes were proposed in this pull request?

There's a concept of 'read-only' and 'read+write' state store in Spark which is defined "implicitly". Spark doesn't prevent write for 'read-only' state store; Spark just assumes read-only stateful operator will not modify the state store. Given it's not defined explicitly, the instance of state store has to be implemented as 'read+write' even it's being used as 'read-only', which sometimes brings confusion.

For example, abort() in HDFSBackedStateStore - https://github.com/apache/spark/blob/d38f8167483d4d79e8360f24a8c0bffd51460659/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala#L143-L155

The comment sounds as if statement works differently between 'read-only' and 'read+write', but that's not true as both state store has state initialized as UPDATING (no difference). So 'read-only' state also creates the temporary file, initializes output streams to write to temporary file, closes output streams, and finally deletes the temporary file. This unnecessary operations are being done per batch/partition.

This patch explicitly defines 'read-only' StateStore, and enables state store provider to create 'read-only' StateStore instance if requested. Relevant code paths are modified, as well as 'read-only' StateStore implementation for HDFSBackedStateStore is introduced. The new implementation gets rid of unnecessary operations explained above.

In point of backward-compatibility view, the only thing being changed in public API side is `StateStoreProvider`. The trait `StateStoreProvider` has to be changed to allow requesting 'read-only' StateStore; this patch adds default implementation which leverages 'read+write' StateStore but wrapping with 'write-protected' StateStore instance, so that custom providers don't need to change their code to reflect the change. But if the providers can optimize for read-only workload, they'll be happy to make a change.

Please note that this patch makes ReadOnlyStateStore extend StateStore and being referred as StateStore, as StateStore is being used in so many places and it's not easy to support both traits if we differentiate them. So unfortunately these write methods are still exposed for read-only state; it just throws UnsupportedOperationException.

### Why are the changes needed?

The new API opens the chance to optimize read-only state store instance compared with read+write state store instance. HDFSBackedStateStoreProvider is modified to provide read-only version of state store which doesn't deal with temporary file as well as state machine.

### Does this PR introduce any user-facing change?

Clearly "no" for most end users, and also "no" for custom state store providers as it doesn't touch trait `StateStore` as well as provides default implementation for added method in trait `StateStoreProvider`.

### How was this patch tested?

Modified UT. Existing UTs ensure the change doesn't break anything.

Closes #26935 from HeartSaVioR/SPARK-30294.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
Signed-off-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
---
 .../state/HDFSBackedStateStoreProvider.scala  |  46 ++++++--
 .../streaming/state/StateStore.scala          | 111 +++++++++++++++---
 .../streaming/state/StateStoreRDD.scala       | 104 ++++++++++++----
 .../StreamingAggregationStateManager.scala    |  22 ++--
 .../execution/streaming/state/package.scala   |  35 ++++++
 .../streaming/statefulOperators.scala         |   2 +-
 .../streaming/state/StateStoreSuite.scala     |   4 +-
 7 files changed, 261 insertions(+), 63 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
index 0a25d51666321..5c55034e88df5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
@@ -58,7 +58,6 @@ import org.apache.spark.util.{SizeEstimator, Utils}
  * - store.remove(...)
  * - store.commit()    // commits all the updates to made; the new version will be returned
  * - store.iterator()  // key-value data after last commit as an iterator
- * - store.updates()   // updates made in the last commit as an iterator
  *
  * Fault-tolerance model:
  * - Every set of updates is written to a delta file before committing.
@@ -79,6 +78,27 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit
   //   java.util.ConcurrentModificationException
   type MapType = java.util.concurrent.ConcurrentHashMap[UnsafeRow, UnsafeRow]
 
+  class HDFSBackedReadStateStore(val version: Long, map: MapType)
+    extends ReadStateStore {
+
+    override def id: StateStoreId = HDFSBackedStateStoreProvider.this.stateStoreId
+
+    override def get(key: UnsafeRow): UnsafeRow = map.get(key)
+
+    override def iterator(): Iterator[UnsafeRowPair] = {
+      val unsafeRowPair = new UnsafeRowPair()
+      map.entrySet.asScala.iterator.map { entry =>
+        unsafeRowPair.withRows(entry.getKey, entry.getValue)
+      }
+    }
+
+    override def abort(): Unit = {}
+
+    override def toString(): String = {
+      s"HDFSReadStateStore[id=(op=${id.operatorId},part=${id.partitionId}),dir=$baseDir]"
+    }
+  }
+
   /** Implementation of [[StateStore]] API which is backed by an HDFS-compatible file system */
   class HDFSBackedStateStore(val version: Long, mapToUpdate: MapType)
     extends StateStore {
@@ -142,9 +162,8 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit
 
     /** Abort all the updates made on this store. This store will not be usable any more. */
     override def abort(): Unit = {
-      // This if statement is to ensure that files are deleted only if there are changes to the
-      // StateStore. We have two StateStores for each task, one which is used only for reading, and
-      // the other used for read+write. We don't want the read-only to delete state files.
+      // This if statement is to ensure that files are deleted only once: if either commit or abort
+      // is called before, it will be no-op.
       if (state == UPDATING) {
         state = ABORTED
         cancelDeltaFile(compressedStream, deltaFileStream)
@@ -197,15 +216,26 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit
   }
 
   /** Get the state store for making updates to create a new `version` of the store. */
-  override def getStore(version: Long): StateStore = synchronized {
+  override def getStore(version: Long): StateStore = {
+    val newMap = getLoadedMapForStore(version)
+    logInfo(s"Retrieved version $version of ${HDFSBackedStateStoreProvider.this} for update")
+    new HDFSBackedStateStore(version, newMap)
+  }
+
+  /** Get the state store for reading to specific `version` of the store. */
+  override def getReadStore(version: Long): ReadStateStore = {
+    val newMap = getLoadedMapForStore(version)
+    logInfo(s"Retrieved version $version of ${HDFSBackedStateStoreProvider.this} for readonly")
+    new HDFSBackedReadStateStore(version, newMap)
+  }
+
+  private def getLoadedMapForStore(version: Long): MapType = synchronized {
     require(version >= 0, "Version cannot be less than 0")
     val newMap = new MapType()
     if (version > 0) {
       newMap.putAll(loadMap(version))
     }
-    val store = new HDFSBackedStateStore(version, newMap)
-    logInfo(s"Retrieved version $version of ${HDFSBackedStateStoreProvider.this} for update")
-    store
+    newMap
   }
 
   override def init(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
index 092ca968f59c4..d52505fbdab35 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
@@ -36,10 +36,14 @@ import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.{ThreadUtils, Utils}
 
 /**
- * Base trait for a versioned key-value store. Each instance of a `StateStore` represents a specific
- * version of state data, and such instances are created through a [[StateStoreProvider]].
+ * Base trait for a versioned key-value store which provides read operations. Each instance of a
+ * `ReadStateStore` represents a specific version of state data, and such instances are created
+ * through a [[StateStoreProvider]].
+ *
+ * `abort` method will be called when the task is completed - please clean up the resources in
+ * the method.
  */
-trait StateStore {
+trait ReadStateStore {
 
   /** Unique identifier of the store */
   def id: StateStoreId
@@ -53,17 +57,6 @@ trait StateStore {
    */
   def get(key: UnsafeRow): UnsafeRow
 
-  /**
-   * Put a new value for a non-null key. Implementations must be aware that the UnsafeRows in
-   * the params can be reused, and must make copies of the data as needed for persistence.
-   */
-  def put(key: UnsafeRow, value: UnsafeRow): Unit
-
-  /**
-   * Remove a single non-null key.
-   */
-  def remove(key: UnsafeRow): Unit
-
   /**
    * Get key value pairs with optional approximate `start` and `end` extents.
    * If the State Store implementation maintains indices for the data based on the optional
@@ -81,6 +74,40 @@ trait StateStore {
     iterator()
   }
 
+  /** Return an iterator containing all the key-value pairs in the StateStore. */
+  def iterator(): Iterator[UnsafeRowPair]
+
+  /**
+   * Clean up the resource.
+   *
+   * The method name is to respect backward compatibility on [[StateStore]].
+   */
+  def abort(): Unit
+}
+
+/**
+ * Base trait for a versioned key-value store which provides both read and write operations. Each
+ * instance of a `StateStore` represents a specific version of state data, and such instances are
+ * created through a [[StateStoreProvider]].
+ *
+ * Unlike [[ReadStateStore]], `abort` method may not be called if the `commit` method succeeds
+ * to commit the change. (`hasCommitted` returns `true`.) Otherwise, `abort` method will be called.
+ * Implementation should deal with resource cleanup in both methods, and also need to guard with
+ * double resource cleanup.
+ */
+trait StateStore extends ReadStateStore {
+
+  /**
+   * Put a new value for a non-null key. Implementations must be aware that the UnsafeRows in
+   * the params can be reused, and must make copies of the data as needed for persistence.
+   */
+  def put(key: UnsafeRow, value: UnsafeRow): Unit
+
+  /**
+   * Remove a single non-null key.
+   */
+  def remove(key: UnsafeRow): Unit
+
   /**
    * Commit all the updates that have been made to the store, and return the new version.
    * Implementations should ensure that no more updates (puts, removes) can be after a commit in
@@ -92,13 +119,13 @@ trait StateStore {
    * Abort all the updates that have been made to the store. Implementations should ensure that
    * no more updates (puts, removes) can be after an abort in order to avoid incorrect usage.
    */
-  def abort(): Unit
+  override def abort(): Unit
 
   /**
    * Return an iterator containing all the key-value pairs in the StateStore. Implementations must
    * ensure that updates (puts, removes) can be made while iterating over this iterator.
    */
-  def iterator(): Iterator[UnsafeRowPair]
+  override def iterator(): Iterator[UnsafeRowPair]
 
   /** Current metrics of the state store */
   def metrics: StateStoreMetrics
@@ -109,6 +136,19 @@ trait StateStore {
   def hasCommitted: Boolean
 }
 
+/** Wraps the instance of StateStore to make the instance read-only. */
+class WrappedReadStateStore(store: StateStore) extends ReadStateStore {
+  override def id: StateStoreId = store.id
+
+  override def version: Long = store.version
+
+  override def get(key: UnsafeRow): UnsafeRow = store.get(key)
+
+  override def iterator(): Iterator[UnsafeRowPair] = store.iterator()
+
+  override def abort(): Unit = store.abort()
+}
+
 /**
  * Metrics reported by a state store
  * @param numKeys         Number of keys in the state store
@@ -206,6 +246,15 @@ trait StateStoreProvider {
   /** Return an instance of [[StateStore]] representing state data of the given version */
   def getStore(version: Long): StateStore
 
+  /**
+   * Return an instance of [[ReadStateStore]] representing state data of the given version.
+   * By default it will return the same instance as getStore(version) but wrapped to prevent
+   * modification. Providers can override and return optimized version of [[ReadStateStore]]
+   * based on the fact the instance will be only used for reading.
+   */
+  def getReadStore(version: Long): ReadStateStore =
+    new WrappedReadStateStore(getStore(version))
+
   /** Optional method for providers to allow for background maintenance (e.g. compactions) */
   def doMaintenance(): Unit = { }
 
@@ -379,6 +428,21 @@ object StateStore extends Logging {
   @GuardedBy("loadedProviders")
   private var _coordRef: StateStoreCoordinatorRef = null
 
+  /** Get or create a read-only store associated with the id. */
+  def getReadOnly(
+      storeProviderId: StateStoreProviderId,
+      keySchema: StructType,
+      valueSchema: StructType,
+      indexOrdinal: Option[Int],
+      version: Long,
+      storeConf: StateStoreConf,
+      hadoopConf: Configuration): ReadStateStore = {
+    require(version >= 0)
+    val storeProvider = getStateStoreProvider(storeProviderId, keySchema, valueSchema,
+      indexOrdinal, storeConf, hadoopConf)
+    storeProvider.getReadStore(version)
+  }
+
   /** Get or create a store associated with the id. */
   def get(
       storeProviderId: StateStoreProviderId,
@@ -389,7 +453,19 @@ object StateStore extends Logging {
       storeConf: StateStoreConf,
       hadoopConf: Configuration): StateStore = {
     require(version >= 0)
-    val storeProvider = loadedProviders.synchronized {
+    val storeProvider = getStateStoreProvider(storeProviderId, keySchema, valueSchema,
+      indexOrdinal, storeConf, hadoopConf)
+    storeProvider.getStore(version)
+  }
+
+  private def getStateStoreProvider(
+      storeProviderId: StateStoreProviderId,
+      keySchema: StructType,
+      valueSchema: StructType,
+      indexOrdinal: Option[Int],
+      storeConf: StateStoreConf,
+      hadoopConf: Configuration): StateStoreProvider = {
+    loadedProviders.synchronized {
       startMaintenanceIfNeeded()
       val provider = loadedProviders.getOrElseUpdate(
         storeProviderId,
@@ -399,7 +475,6 @@ object StateStore extends Logging {
       reportActiveStoreInstance(storeProviderId)
       provider
     }
-    storeProvider.getStore(version)
   }
 
   /** Unload a state store provider */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala
index 90a53727aa317..eda191f28bf18 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala
@@ -29,14 +29,51 @@ import org.apache.spark.sql.internal.SessionState
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.SerializableConfiguration
 
+// This doesn't directly override RDD methods as MiMa complains it.
+abstract class BaseStateStoreRDD[T: ClassTag, U: ClassTag](
+    dataRDD: RDD[T],
+    checkpointLocation: String,
+    queryRunId: UUID,
+    operatorId: Long,
+    sessionState: SessionState,
+    @transient private val storeCoordinator: Option[StateStoreCoordinatorRef],
+    extraOptions: Map[String, String] = Map.empty) extends RDD[U](dataRDD) {
+
+  protected val storeConf = new StateStoreConf(sessionState.conf, extraOptions)
+
+  // A Hadoop Configuration can be about 10 KB, which is pretty big, so broadcast it
+  protected val hadoopConfBroadcast = dataRDD.context.broadcast(
+    new SerializableConfiguration(sessionState.newHadoopConf()))
+
+  /** Implementations can simply call this method in getPreferredLocations. */
+  protected def _getPartitions: Array[Partition] = dataRDD.partitions
+
+  /**
+   * Set the preferred location of each partition using the executor that has the related
+   * [[StateStoreProvider]] already loaded.
+   *
+   * Implementations can simply call this method in getPreferredLocations.
+   */
+  protected def _getPreferredLocations(partition: Partition): Seq[String] = {
+    val stateStoreProviderId = getStateProviderId(partition)
+    storeCoordinator.flatMap(_.getLocation(stateStoreProviderId)).toSeq
+  }
+
+  protected def getStateProviderId(partition: Partition): StateStoreProviderId = {
+    StateStoreProviderId(
+      StateStoreId(checkpointLocation, operatorId, partition.index),
+      queryRunId)
+  }
+}
+
 /**
- * An RDD that allows computations to be executed against [[StateStore]]s. It
+ * An RDD that allows computations to be executed against [[ReadStateStore]]s. It
  * uses the [[StateStoreCoordinator]] to get the locations of loaded state stores
  * and use that as the preferred locations.
  */
-class StateStoreRDD[T: ClassTag, U: ClassTag](
+class ReadStateStoreRDD[T: ClassTag, U: ClassTag](
     dataRDD: RDD[T],
-    storeUpdateFunction: (StateStore, Iterator[T]) => Iterator[U],
+    storeReadFunction: (ReadStateStore, Iterator[T]) => Iterator[U],
     checkpointLocation: String,
     queryRunId: UUID,
     operatorId: Long,
@@ -47,34 +84,55 @@ class StateStoreRDD[T: ClassTag, U: ClassTag](
     sessionState: SessionState,
     @transient private val storeCoordinator: Option[StateStoreCoordinatorRef],
     extraOptions: Map[String, String] = Map.empty)
-  extends RDD[U](dataRDD) {
+  extends BaseStateStoreRDD[T, U](dataRDD, checkpointLocation, queryRunId, operatorId,
+    sessionState, storeCoordinator, extraOptions) {
 
-  private val storeConf = new StateStoreConf(sessionState.conf, extraOptions)
+  override protected def getPartitions: Array[Partition] = _getPartitions
 
-  // A Hadoop Configuration can be about 10 KB, which is pretty big, so broadcast it
-  private val hadoopConfBroadcast = dataRDD.context.broadcast(
-    new SerializableConfiguration(sessionState.newHadoopConf()))
+  override def getPreferredLocations(partition: Partition): Seq[String] =
+    _getPreferredLocations(partition)
 
-  override protected def getPartitions: Array[Partition] = dataRDD.partitions
+  override def compute(partition: Partition, ctxt: TaskContext): Iterator[U] = {
+    val storeProviderId = getStateProviderId(partition)
 
-  /**
-   * Set the preferred location of each partition using the executor that has the related
-   * [[StateStoreProvider]] already loaded.
-   */
-  override def getPreferredLocations(partition: Partition): Seq[String] = {
-    val stateStoreProviderId = StateStoreProviderId(
-      StateStoreId(checkpointLocation, operatorId, partition.index),
-      queryRunId)
-    storeCoordinator.flatMap(_.getLocation(stateStoreProviderId)).toSeq
+    val store = StateStore.getReadOnly(
+      storeProviderId, keySchema, valueSchema, indexOrdinal, storeVersion,
+      storeConf, hadoopConfBroadcast.value.value)
+    val inputIter = dataRDD.iterator(partition, ctxt)
+    storeReadFunction(store, inputIter)
   }
+}
+
+/**
+ * An RDD that allows computations to be executed against [[StateStore]]s. It
+ * uses the [[StateStoreCoordinator]] to get the locations of loaded state stores
+ * and use that as the preferred locations.
+ */
+class StateStoreRDD[T: ClassTag, U: ClassTag](
+    dataRDD: RDD[T],
+    storeUpdateFunction: (StateStore, Iterator[T]) => Iterator[U],
+    checkpointLocation: String,
+    queryRunId: UUID,
+    operatorId: Long,
+    storeVersion: Long,
+    keySchema: StructType,
+    valueSchema: StructType,
+    indexOrdinal: Option[Int],
+    sessionState: SessionState,
+    @transient private val storeCoordinator: Option[StateStoreCoordinatorRef],
+    extraOptions: Map[String, String] = Map.empty)
+  extends BaseStateStoreRDD[T, U](dataRDD, checkpointLocation, queryRunId, operatorId,
+    sessionState, storeCoordinator, extraOptions) {
+
+  override protected def getPartitions: Array[Partition] = _getPartitions
+
+  override def getPreferredLocations(partition: Partition): Seq[String] =
+    _getPreferredLocations(partition)
 
   override def compute(partition: Partition, ctxt: TaskContext): Iterator[U] = {
-    var store: StateStore = null
-    val storeProviderId = StateStoreProviderId(
-      StateStoreId(checkpointLocation, operatorId, partition.index),
-      queryRunId)
+    val storeProviderId = getStateProviderId(partition)
 
-    store = StateStore.get(
+    val store = StateStore.get(
       storeProviderId, keySchema, valueSchema, indexOrdinal, storeVersion,
       storeConf, hadoopConfBroadcast.value.value)
     val inputIter = dataRDD.iterator(partition, ctxt)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StreamingAggregationStateManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StreamingAggregationStateManager.scala
index 9bfb9561b42a1..0496e4768b681 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StreamingAggregationStateManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StreamingAggregationStateManager.scala
@@ -34,7 +34,7 @@ sealed trait StreamingAggregationStateManager extends Serializable {
   def getStateValueSchema: StructType
 
   /** Get the current value of a non-null key from the target state store. */
-  def get(store: StateStore, key: UnsafeRow): UnsafeRow
+  def get(store: ReadStateStore, key: UnsafeRow): UnsafeRow
 
   /**
    * Put a new value for a non-null key to the target state store. Note that key will be
@@ -52,13 +52,13 @@ sealed trait StreamingAggregationStateManager extends Serializable {
   def remove(store: StateStore, key: UnsafeRow): Unit
 
   /** Return an iterator containing all the key-value pairs in target state store. */
-  def iterator(store: StateStore): Iterator[UnsafeRowPair]
+  def iterator(store: ReadStateStore): Iterator[UnsafeRowPair]
 
   /** Return an iterator containing all the keys in target state store. */
-  def keys(store: StateStore): Iterator[UnsafeRow]
+  def keys(store: ReadStateStore): Iterator[UnsafeRow]
 
   /** Return an iterator containing all the values in target state store. */
-  def values(store: StateStore): Iterator[UnsafeRow]
+  def values(store: ReadStateStore): Iterator[UnsafeRow]
 }
 
 object StreamingAggregationStateManager extends Logging {
@@ -90,7 +90,7 @@ abstract class StreamingAggregationStateManagerBaseImpl(
 
   override def remove(store: StateStore, key: UnsafeRow): Unit = store.remove(key)
 
-  override def keys(store: StateStore): Iterator[UnsafeRow] = {
+  override def keys(store: ReadStateStore): Iterator[UnsafeRow] = {
     // discard and don't convert values to avoid computation
     store.getRange(None, None).map(_.key)
   }
@@ -113,7 +113,7 @@ class StreamingAggregationStateManagerImplV1(
 
   override def getStateValueSchema: StructType = inputRowAttributes.toStructType
 
-  override def get(store: StateStore, key: UnsafeRow): UnsafeRow = {
+  override def get(store: ReadStateStore, key: UnsafeRow): UnsafeRow = {
     store.get(key)
   }
 
@@ -121,11 +121,11 @@ class StreamingAggregationStateManagerImplV1(
     store.put(getKey(row), row)
   }
 
-  override def iterator(store: StateStore): Iterator[UnsafeRowPair] = {
+  override def iterator(store: ReadStateStore): Iterator[UnsafeRowPair] = {
     store.iterator()
   }
 
-  override def values(store: StateStore): Iterator[UnsafeRow] = {
+  override def values(store: ReadStateStore): Iterator[UnsafeRow] = {
     store.iterator().map(_.value)
   }
 }
@@ -167,7 +167,7 @@ class StreamingAggregationStateManagerImplV2(
 
   override def getStateValueSchema: StructType = valueExpressions.toStructType
 
-  override def get(store: StateStore, key: UnsafeRow): UnsafeRow = {
+  override def get(store: ReadStateStore, key: UnsafeRow): UnsafeRow = {
     val savedState = store.get(key)
     if (savedState == null) {
       return savedState
@@ -182,11 +182,11 @@ class StreamingAggregationStateManagerImplV2(
     store.put(key, value)
   }
 
-  override def iterator(store: StateStore): Iterator[UnsafeRowPair] = {
+  override def iterator(store: ReadStateStore): Iterator[UnsafeRowPair] = {
     store.iterator().map(rowPair => new UnsafeRowPair(rowPair.key, restoreOriginalRow(rowPair)))
   }
 
-  override def values(store: StateStore): Iterator[UnsafeRow] = {
+  override def values(store: ReadStateStore): Iterator[UnsafeRow] = {
     store.iterator().map(rowPair => restoreOriginalRow(rowPair))
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala
index c7a332b6d778e..fa89c506587b1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala
@@ -82,5 +82,40 @@ package object state {
         storeCoordinator,
         extraOptions)
     }
+
+    /** Map each partition of an RDD along with data in a [[ReadStateStore]]. */
+    private[streaming] def mapPartitionsWithReadStateStore[U: ClassTag](
+        stateInfo: StatefulOperatorStateInfo,
+        keySchema: StructType,
+        valueSchema: StructType,
+        indexOrdinal: Option[Int],
+        sessionState: SessionState,
+        storeCoordinator: Option[StateStoreCoordinatorRef],
+        extraOptions: Map[String, String] = Map.empty)(
+        storeReadFn: (ReadStateStore, Iterator[T]) => Iterator[U])
+      : ReadStateStoreRDD[T, U] = {
+
+      val cleanedF = dataRDD.sparkContext.clean(storeReadFn)
+      val wrappedF = (store: ReadStateStore, iter: Iterator[T]) => {
+        // Clean up the state store.
+        TaskContext.get().addTaskCompletionListener[Unit](_ => {
+          store.abort()
+        })
+        cleanedF(store, iter)
+      }
+      new ReadStateStoreRDD(
+        dataRDD,
+        wrappedF,
+        stateInfo.checkpointLocation,
+        stateInfo.queryRunId,
+        stateInfo.operatorId,
+        stateInfo.storeVersion,
+        keySchema,
+        valueSchema,
+        indexOrdinal,
+        sessionState,
+        storeCoordinator,
+        extraOptions)
+    }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
index 497b13793a67b..f5fbe0fc32254 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
@@ -247,7 +247,7 @@ case class StateStoreRestoreExec(
   override protected def doExecute(): RDD[InternalRow] = {
     val numOutputRows = longMetric("numOutputRows")
 
-    child.execute().mapPartitionsWithStateStore(
+    child.execute().mapPartitionsWithReadStateStore(
       getStateInfo,
       keyExpressions.toStructType,
       stateManager.getStateValueSchema,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
index 9dc6c0a760d7e..c461bbb7e38eb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
@@ -958,7 +958,7 @@ abstract class StateStoreSuiteBase[ProviderClass <: StateStoreProvider]
 
     // two state stores
     val provider1 = newStoreProvider(storeId)
-    val restoreStore = provider1.getStore(1)
+    val restoreStore = provider1.getReadStore(1)
     val saveStore = provider1.getStore(1)
 
     put(saveStore, key, get(restoreStore, key).get + 1)
@@ -1034,7 +1034,7 @@ object StateStoreTestsHelper {
     store.put(stringToRow(key), intToRow(value))
   }
 
-  def get(store: StateStore, key: String): Option[Int] = {
+  def get(store: ReadStateStore, key: String): Option[Int] = {
     Option(store.get(stringToRow(key))).map(rowToInt)
   }
 

From 26ea417b1448d679fdc777705ee2f99f4e741ef3 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 5 Nov 2020 09:23:41 -0800
Subject: [PATCH 0396/1009] [SPARK-33362][SQL] skipSchemaResolution should
 still require query to be resolved

### What changes were proposed in this pull request?

Fix a small bug in `V2WriteCommand.resolved`. It should always require the `table` and `query` to be resolved.

### Why are the changes needed?

To prevent potential bugs that we skip resolve the input query.

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

a new test

Closes #30265 from cloud-fan/ds-minor-2.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  3 ++-
 .../catalyst/plans/logical/v2Commands.scala   | 26 ++++++++++---------
 .../analysis/DataSourceV2AnalysisSuite.scala  |  9 +++++++
 3 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 69cf30c34d494..f32190bc30df0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1507,7 +1507,8 @@ class Analyzer(
 
         g.copy(resolvedSelectedExprs, resolvedGroupingExprs, g.child, resolvedAggExprs)
 
-      case o: OverwriteByExpression if !o.outputResolved =>
+      case o: OverwriteByExpression
+          if !(o.table.resolved && o.query.resolved && o.outputResolved) =>
         // do not resolve expression attributes until the query attributes are resolved against the
         // table by ResolveOutputRelation. that rule will alias the attributes to the table's names.
         o
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index a1e26ae1ba2c8..f18aecd19b8d8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -35,20 +35,20 @@ trait V2WriteCommand extends Command {
 
   override def children: Seq[LogicalPlan] = Seq(query)
 
-  override lazy val resolved: Boolean = outputResolved
+  override lazy val resolved: Boolean = table.resolved && query.resolved && outputResolved
 
   def outputResolved: Boolean = {
+    assert(table.resolved && query.resolved,
+      "`outputResolved` can only be called when `table` and `query` are both resolved.")
     // If the table doesn't require schema match, we don't need to resolve the output columns.
-    table.skipSchemaResolution || {
-      table.resolved && query.resolved && query.output.size == table.output.size &&
-        query.output.zip(table.output).forall {
-          case (inAttr, outAttr) =>
-            // names and types must match, nullability must be compatible
-            inAttr.name == outAttr.name &&
-              DataType.equalsIgnoreCompatibleNullability(inAttr.dataType, outAttr.dataType) &&
-              (outAttr.nullable || !inAttr.nullable)
-        }
-    }
+    table.skipSchemaResolution || (query.output.size == table.output.size &&
+      query.output.zip(table.output).forall {
+        case (inAttr, outAttr) =>
+          // names and types must match, nullability must be compatible
+          inAttr.name == outAttr.name &&
+            DataType.equalsIgnoreCompatibleNullability(inAttr.dataType, outAttr.dataType) &&
+            (outAttr.nullable || !inAttr.nullable)
+      })
   }
 }
 
@@ -86,7 +86,9 @@ case class OverwriteByExpression(
     query: LogicalPlan,
     writeOptions: Map[String, String],
     isByName: Boolean) extends V2WriteCommand {
-  override lazy val resolved: Boolean = outputResolved && deleteExpr.resolved
+  override lazy val resolved: Boolean = {
+    table.resolved && query.resolved && outputResolved && deleteExpr.resolved
+  }
 }
 
 object OverwriteByExpression {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
index 7a2320f931da3..52dcf63426a7e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
@@ -248,6 +248,15 @@ abstract class DataSourceV2AnalysisBaseSuite extends AnalysisTest {
 
   def byPosition(table: NamedRelation, query: LogicalPlan): LogicalPlan
 
+  test("skipSchemaResolution should still require query to be resolved") {
+    val table = TestRelationAcceptAnySchema(StructType(Seq(
+      StructField("a", FloatType),
+      StructField("b", DoubleType))).toAttributes)
+    val query = UnresolvedRelation(Seq("t"))
+    val parsedPlan = byName(table, query)
+    assertNotResolved(parsedPlan)
+  }
+
   test("byName: basic behavior") {
     val query = TestRelation(table.schema.toAttributes)
 

From 208b94e4c1e5c500e76c54e8f7a2be6a07ef3f7a Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Thu, 5 Nov 2020 09:29:53 -0800
Subject: [PATCH 0397/1009] [SPARK-33353][BUILD] Cache dependencies for
 Coursier with new sbt in GitHub Actions

### What changes were proposed in this pull request?

This PR change the behavior of GitHub Actions job that caches dependencies.
SPARK-33226 upgraded sbt to 1.4.1.
As of 1.3.0, sbt uses Coursier as the dependency resolver / fetcher.
So let's change the dependency cache configuration for the GitHub Actions job.

### Why are the changes needed?

To make build faster with Coursier for the GitHub Actions job.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Should be done by GitHub Actions itself.

Closes #30259 from sarutak/coursier-cache.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .github/workflows/build_and_test.yml | 32 ++++++++++++++--------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 55c578e15724a..e4762523f7018 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -111,13 +111,13 @@ jobs:
         key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ hashFiles('**/pom.xml') }}
         restore-keys: |
           ${{ matrix.java }}-${{ matrix.hadoop }}-maven-
-    - name: Cache Ivy local repository
+    - name: Cache Coursier local repository
       uses: actions/cache@v2
       with:
-        path: ~/.ivy2/cache
-        key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
+        path: ~/.cache/coursier
+        key: ${{ matrix.java }}-${{ matrix.hadoop }}-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
         restore-keys: |
-          ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-
+          ${{ matrix.java }}-${{ matrix.hadoop }}-coursier-
     - name: Install JDK ${{ matrix.java }}
       uses: actions/setup-java@v1
       with:
@@ -206,13 +206,13 @@ jobs:
         key: pyspark-maven-${{ hashFiles('**/pom.xml') }}
         restore-keys: |
           pyspark-maven-
-    - name: Cache Ivy local repository
+    - name: Cache Coursier local repository
       uses: actions/cache@v2
       with:
-        path: ~/.ivy2/cache
-        key: pyspark-ivy-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
+        path: ~/.cache/coursier
+        key: pyspark-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
         restore-keys: |
-          pyspark-ivy-
+          pyspark-coursier-
     - name: Install Python 3.6
       uses: actions/setup-python@v2
       with:
@@ -282,13 +282,13 @@ jobs:
         key: sparkr-maven-${{ hashFiles('**/pom.xml') }}
         restore-keys: |
           sparkr-maven-
-    - name: Cache Ivy local repository
+    - name: Cache Coursier local repository
       uses: actions/cache@v2
       with:
-        path: ~/.ivy2/cache
-        key: sparkr-ivy-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
+        path: ~/.cache/coursier
+        key: sparkr-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
         restore-keys: |
-          sparkr-ivy-
+          sparkr-coursier-
     - name: Run tests
       run: |
         mkdir -p ~/.m2
@@ -404,13 +404,13 @@ jobs:
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v2
-    - name: Cache Ivy local repository
+    - name: Cache Coursier local repository
       uses: actions/cache@v2
       with:
-        path: ~/.ivy2/cache
-        key: scala-213-ivy-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
+        path: ~/.cache/coursier
+        key: scala-213-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
         restore-keys: |
-          scala-213-ivy-
+          scala-213-coursier-
     - name: Install Java 11
       uses: actions/setup-java@v1
       with:

From 1a704793f4846610307d18a8bf5e23a3f97525d3 Mon Sep 17 00:00:00 2001
From: Chao Sun <sunchao@apple.com>
Date: Thu, 5 Nov 2020 10:09:28 -0800
Subject: [PATCH 0398/1009] [SPARK-33290][SQL][DOCS][FOLLOW-UP] Update SQL
 migration guide

### What changes were proposed in this pull request?

Update SQL migration guide for SPARK-33290

### Why are the changes needed?

Make the change better documented.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

N/A

Closes #30256 from sunchao/SPARK-33290-2.

Authored-by: Chao Sun <sunchao@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/sql-migration-guide.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index fdc764a93424b..55618308c300a 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -52,6 +52,8 @@ license: |
   
   - In Spark 3.1, the `schema_of_json` and `schema_of_csv` functions return the schema in the SQL format in which field names are quoted. In Spark 3.0, the function returns a catalog string without field quoting and in lower case. 
 
+  - In Spark 3.1, refreshing a table will trigger an uncache operation for all other caches that reference the table, even if the table itself is not cached. In Spark 3.0 the operation will only be triggered if the table itself is cached.
+
 ## Upgrading from Spark SQL 3.0 to 3.0.1
 
 - In Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they match to the pattern defined by the JSON option `timestampFormat`. Since version 3.0.1, the timestamp type inference is disabled by default. Set the JSON option `inferTimestamp` to `true` to enable such type inference.

From 324275ae8350ec15844ce384f40f1ecc4acdc072 Mon Sep 17 00:00:00 2001
From: Erik Krogen <xkrogen@apache.org>
Date: Thu, 5 Nov 2020 12:38:42 -0600
Subject: [PATCH 0399/1009] [SPARK-33185][YARN] Set up yarn.Client to print
 direct links to driver stdout/stderr

### What changes were proposed in this pull request?
Currently when run in `cluster` mode on YARN, the Spark `yarn.Client` will print out the application report into the logs, to be easily viewed by users. For example:
```
INFO yarn.Client:
 	 client token: Token { kind: YARN_CLIENT_TOKEN, service:  }
 	 diagnostics: N/A
 	 ApplicationMaster host: X.X.X.X
 	 ApplicationMaster RPC port: 0
 	 queue: default
 	 start time: 1602782566027
 	 final status: UNDEFINED
 	 tracking URL: http://hostname:8888/proxy/application_<id>/
 	 user: xkrogen
```

I propose adding, alongside the application report, some additional lines like:
```
         Driver Logs (stdout): http://hostname:8042/node/containerlogs/container_<id>/xkrogen/stdout?start=-4096
         Driver Logs (stderr): http://hostname:8042/node/containerlogs/container_<id>/xkrogen/stderr?start=-4096
```

This information isn't contained in the `ApplicationReport`, so it's necessary to query the ResourceManager REST API. For now I have added this as an always-on feature, but if there is any concern about adding this REST dependency, I think hiding this feature behind an off-by-default flag is reasonable.

### Why are the changes needed?
Typically, the tracking URL can be used to find the logs of the ApplicationMaster/driver while the application is running. Later, the Spark History Server can be used to track this information down, using the stdout/stderr links on the Executors page.

However, in the situation when the driver crashed _before_ writing out a history file, the SHS may not be aware of this application, and thus does not contain links to the driver logs. When this situation arises, it can be difficult for users to debug further, since they can't easily find their driver logs.

It is possible to reach the logs by using the `yarn logs` commands, but the average Spark user isn't aware of this and shouldn't have to be.

With this information readily available in the logs, users can quickly jump to their driver logs, even if it crashed before the SHS became aware of the application. This has the additional benefit of providing a quick way to access driver logs, which often contain useful information, in a single click (instead of navigating through the Spark UI).

### Does this PR introduce _any_ user-facing change?
Yes, some additional print statements will be created in the application report when using YARN in cluster mode.

### How was this patch tested?
Added unit tests for the parsing logic in `yarn.ClientSuite`. Also tested against a live cluster. When the driver is running:
```
INFO Client: Application report for application_XXXXXXXXX_YYYYYY (state: RUNNING)
INFO Client:
         client token: Token { kind: YARN_CLIENT_TOKEN, service:  }
         diagnostics: N/A
         ApplicationMaster host: host.example.com
         ApplicationMaster RPC port: ######
         queue: queue_name
         start time: 1604529046091
         final status: UNDEFINED
         tracking URL: http://host.example.com:8080/proxy/application_XXXXXXXXX_YYYYYY/
         user: xkrogen
         Driver Logs (stdout): http://host.example.com:8042/node/containerlogs/container_e07_XXXXXXXXX_YYYYYY_01_000001/xkrogen/stdout?start=-4096
         Driver Logs (stderr): http://host.example.com:8042/node/containerlogs/container_e07_XXXXXXXXX_YYYYYY_01_000001/xkrogen/stderr?start=-4096
INFO Client: Application report for application_XXXXXXXXX_YYYYYY (state: RUNNING)
```
I confirmed that when the driver has not yet launched, the report does not include the two Driver Logs items. Will omit the output here for brevity since it looks the same.

Closes #30096 from xkrogen/xkrogen-SPARK-33185-yarn-client-print.

Authored-by: Erik Krogen <xkrogen@apache.org>
Signed-off-by: Mridul Muralidharan <mridul<at>gmail.com>
---
 .../org/apache/spark/deploy/yarn/Client.scala | 73 +++++++++++++++++--
 .../org/apache/spark/deploy/yarn/config.scala |  9 +++
 .../spark/util/YarnContainerInfoHelper.scala  | 14 +++-
 .../spark/deploy/yarn/ClientSuite.scala       | 47 ++++++++++++
 4 files changed, 134 insertions(+), 9 deletions(-)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 517a4af2e4b02..30ca4a6615fe8 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -25,11 +25,16 @@ import java.util.{Locale, Properties, UUID}
 import java.util.zip.{ZipEntry, ZipOutputStream}
 
 import scala.collection.JavaConverters._
+import scala.collection.immutable.{Map => IMap}
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, ListBuffer, Map}
 import scala.util.control.NonFatal
 
+import com.fasterxml.jackson.databind.ObjectMapper
 import com.google.common.base.Objects
 import com.google.common.io.Files
+import javax.ws.rs.client.ClientBuilder
+import javax.ws.rs.core.MediaType
+import javax.ws.rs.core.Response.Status.Family
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs._
 import org.apache.hadoop.fs.permission.FsPermission
@@ -46,6 +51,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException
 import org.apache.hadoop.yarn.security.AMRMTokenIdentifier
 import org.apache.hadoop.yarn.util.Records
+import org.apache.hadoop.yarn.webapp.util.WebAppUtils
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
 import org.apache.spark.api.python.PythonUtils
@@ -58,7 +64,7 @@ import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Python._
 import org.apache.spark.launcher.{LauncherBackend, SparkAppHandle, YarnCommandBuilderUtils}
 import org.apache.spark.rpc.RpcEnv
-import org.apache.spark.util.{CallerContext, Utils}
+import org.apache.spark.util.{CallerContext, Utils, YarnContainerInfoHelper}
 
 private[spark] class Client(
     val args: ClientArguments,
@@ -1080,9 +1086,9 @@ private[spark] class Client(
         // If DEBUG is enabled, log report details every iteration
         // Otherwise, log them every time the application changes state
         if (log.isDebugEnabled) {
-          logDebug(formatReportDetails(report))
+          logDebug(formatReportDetails(report, getDriverLogsLink(report.getApplicationId)))
         } else if (lastState != state) {
-          logInfo(formatReportDetails(report))
+          logInfo(formatReportDetails(report, getDriverLogsLink(report.getApplicationId)))
         }
       }
 
@@ -1152,7 +1158,17 @@ private[spark] class Client(
     appMaster
   }
 
-  private def formatReportDetails(report: ApplicationReport): String = {
+  /**
+   * Format an application report and optionally, links to driver logs, in a human-friendly manner.
+   *
+   * @param report The application report from YARN.
+   * @param driverLogsLinks A map of driver log files and their links. Keys are the file names
+   *                        (e.g. `stdout`), and values are the links. If empty, nothing will be
+   *                        printed.
+   * @return Human-readable version of the input data.
+   */
+  private def formatReportDetails(report: ApplicationReport,
+    driverLogsLinks: IMap[String, String]): String = {
     val details = Seq[(String, String)](
       ("client token", getClientToken(report)),
       ("diagnostics", report.getDiagnostics),
@@ -1163,7 +1179,7 @@ private[spark] class Client(
       ("final status", report.getFinalApplicationStatus.toString),
       ("tracking URL", report.getTrackingUrl),
       ("user", report.getUser)
-    )
+    ) ++ driverLogsLinks.map { case (fname, link) => (s"Driver Logs ($fname)", link) }
 
     // Use more loggable format if value is null or empty
     details.map { case (k, v) =>
@@ -1172,6 +1188,37 @@ private[spark] class Client(
     }.mkString("")
   }
 
+  /**
+   * Fetch links to the logs of the driver for the given application ID. This requires hitting the
+   * RM REST API. Returns an empty map if the links could not be fetched. If this feature is
+   * disabled via [[CLIENT_INCLUDE_DRIVER_LOGS_LINK]], an empty map is returned immediately.
+   */
+  private def getDriverLogsLink(appId: ApplicationId): IMap[String, String] = {
+    if (!sparkConf.get(CLIENT_INCLUDE_DRIVER_LOGS_LINK)) {
+      return IMap()
+    }
+    try {
+      val baseRmUrl = WebAppUtils.getRMWebAppURLWithScheme(hadoopConf)
+      val response = ClientBuilder.newClient()
+          .target(baseRmUrl)
+          .path("ws").path("v1").path("cluster").path("apps")
+          .path(appId.toString).path("appattempts")
+          .request(MediaType.APPLICATION_JSON)
+          .get()
+      response.getStatusInfo.getFamily match {
+        case Family.SUCCESSFUL => parseAppAttemptsJsonResponse(response.readEntity(classOf[String]))
+        case _ =>
+          logWarning(s"Unable to fetch app attempts info from $baseRmUrl, got "
+              + s"status code ${response.getStatus}: ${response.getStatusInfo.getReasonPhrase}")
+          IMap()
+      }
+    } catch {
+      case e: Exception =>
+        logWarning(s"Unable to get driver log links for $appId", e)
+        IMap()
+    }
+  }
+
   /**
    * Submit an application to the ResourceManager.
    * If set spark.yarn.submit.waitAppCompletion to true, it will stay alive
@@ -1186,7 +1233,7 @@ private[spark] class Client(
       val report = getApplicationReport(appId)
       val state = report.getYarnApplicationState
       logInfo(s"Application report for $appId (state: $state)")
-      logInfo(formatReportDetails(report))
+      logInfo(formatReportDetails(report, getDriverLogsLink(report.getApplicationId)))
       if (state == YarnApplicationState.FAILED || state == YarnApplicationState.KILLED) {
         throw new SparkException(s"Application $appId finished with status: $state")
       }
@@ -1577,6 +1624,20 @@ private object Client extends Logging {
     writer.flush()
     out.closeEntry()
   }
+
+  private[yarn] def parseAppAttemptsJsonResponse(jsonString: String): IMap[String, String] = {
+    val objectMapper = new ObjectMapper()
+    // If JSON response is malformed somewhere along the way, MissingNode will be returned,
+    // which allows for safe continuation of chaining. The `elements()` call will be empty,
+    // and None will get returned.
+    objectMapper.readTree(jsonString)
+      .path("appAttempts").path("appAttempt")
+      .elements().asScala.toList.takeRight(1).headOption
+      .map(_.path("logsLink").asText(""))
+      .filterNot(_ == "")
+      .map(baseUrl => YarnContainerInfoHelper.getLogUrlsFromBaseUrl(baseUrl))
+      .getOrElse(IMap())
+  }
 }
 
 private[spark] class YarnClusterApplication extends SparkApplication {
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
index f2e838f6270c9..89a4af2d2a741 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
@@ -189,6 +189,15 @@ package object config extends Logging {
       .timeConf(TimeUnit.MILLISECONDS)
       .createWithDefaultString("1s")
 
+  private[spark] val CLIENT_INCLUDE_DRIVER_LOGS_LINK =
+    ConfigBuilder("spark.yarn.includeDriverLogsLink")
+      .doc("In cluster mode, whether the client application report includes links to the driver "
+          + "container's logs. This requires polling the ResourceManager's REST API, so it "
+          + "places some additional load on the RM.")
+      .version("3.1.0")
+      .booleanConf
+      .createWithDefault(false)
+
   /* Shared Client-mode AM / Driver configuration. */
 
   private[spark] val AM_MAX_WAIT_TIME = ConfigBuilder("spark.yarn.am.waitTime")
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/util/YarnContainerInfoHelper.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/util/YarnContainerInfoHelper.scala
index 5e39422e868b7..854fe18c22430 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/util/YarnContainerInfoHelper.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/util/YarnContainerInfoHelper.scala
@@ -28,6 +28,16 @@ import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil
 import org.apache.spark.internal.Logging
 
 private[spark] object YarnContainerInfoHelper extends Logging {
+
+  private[this] val DRIVER_LOG_FILE_NAMES = Seq("stdout", "stderr")
+  private[this] val DRIVER_LOG_START_OFFSET = -4096
+
+  def getLogUrlsFromBaseUrl(baseUrl: String): Map[String, String] = {
+    DRIVER_LOG_FILE_NAMES.map { fname =>
+      fname -> s"$baseUrl/$fname?start=$DRIVER_LOG_START_OFFSET"
+    }.toMap
+  }
+
   def getLogUrls(
       conf: Configuration,
       container: Option[Container]): Option[Map[String, String]] = {
@@ -42,9 +52,7 @@ private[spark] object YarnContainerInfoHelper extends Logging {
       val baseUrl = s"$httpScheme$httpAddress/node/containerlogs/$containerId/$user"
       logDebug(s"Base URL for logs: $baseUrl")
 
-      Some(Map(
-        "stdout" -> s"$baseUrl/stdout?start=-4096",
-        "stderr" -> s"$baseUrl/stderr?start=-4096"))
+      Some(getLogUrlsFromBaseUrl(baseUrl))
     } catch {
       case e: Exception =>
         logInfo("Error while building executor logs - executor logs will not be available", e)
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
index ea3acec3bb78b..fccb2406d66f8 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
@@ -583,6 +583,53 @@ class ClientSuite extends SparkFunSuite with Matchers {
     }
   }
 
+  test("SPARK-33185 Parse YARN AppAttempts valid JSON response") {
+    val appIdSuffix = "1500000000000_1234567"
+    val containerId = s"container_e1_${appIdSuffix}_01_000001"
+    val nodeHost = "node.example.com"
+    val jsonString =
+      s"""
+        |{"appAttempts": {
+        |  "appAttempt": [ {
+        |    "id":1,
+        |    "startTime":1600000000000,
+        |    "finishedTime":1600000100000,
+        |    "containerId":"$containerId",
+        |    "nodeHttpAddress":"$nodeHost:8042",
+        |    "nodeId":"node.example.com:8041",
+        |    "logsLink":"http://$nodeHost:8042/node/containerlogs/$containerId/username",
+        |    "blacklistedNodes":"",
+        |    "nodesBlacklistedBySystem":"",
+        |    "appAttemptId":"appattempt_${appIdSuffix}_000001"
+        |  }]
+        |}}
+        |""".stripMargin
+    val logLinkMap = Client.parseAppAttemptsJsonResponse(jsonString)
+    assert(logLinkMap.keySet === Set("stdout", "stderr"))
+    assert(logLinkMap("stdout") ===
+        s"http://$nodeHost:8042/node/containerlogs/$containerId/username/stdout?start=-4096")
+    assert(logLinkMap("stderr") ===
+        s"http://$nodeHost:8042/node/containerlogs/$containerId/username/stderr?start=-4096")
+  }
+
+  test("SPARK-33185 Parse YARN AppAttempts invalid JSON response") {
+    // No "appAttempt" present
+    assert(Client.parseAppAttemptsJsonResponse("""{"appAttempts": { } }""") === Map())
+
+    // "appAttempt" is empty
+    assert(Client.parseAppAttemptsJsonResponse("""{"appAttempts": { "appAttempt": [ ] } }""")
+        === Map())
+
+    // logsLink is missing
+    assert(Client.parseAppAttemptsJsonResponse("""{"appAttempts":{"appAttempt":[{"id":1}]}}""")
+        === Map())
+
+    // logsLink is present but empty
+    assert(
+      Client.parseAppAttemptsJsonResponse("""{"appAttempts":{"appAttempt":[{"logsLink":""}]}}""")
+          === Map())
+  }
+
   private val matching = Seq(
     ("files URI match test1", "file:///file1", "file:///file2"),
     ("files URI match test2", "file:///c:file1", "file://c:file2"),

From cd4e3d3b0c7b1ec645ec9c3b2a1847ce29a65765 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 5 Nov 2020 15:44:04 -0800
Subject: [PATCH 0400/1009] [SPARK-33360][SQL] Simplify DS v2 write resolution

### What changes were proposed in this pull request?

Removing duplicated code in `ResolveOutputRelation`, by adding `V2WriteCommand.withNewQuery`

### Why are the changes needed?

code cleanup

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

existing tests

Closes #30264 from cloud-fan/ds-minor.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 39 ++-------
 .../catalyst/plans/logical/v2Commands.scala   | 16 +++-
 .../analysis/DataSourceV2AnalysisSuite.scala  | 62 +++++++++++++
 .../spark/sql/DataFrameWriterV2Suite.scala    | 86 +------------------
 4 files changed, 86 insertions(+), 117 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index f32190bc30df0..c4e4ffb98fb25 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -3046,40 +3046,15 @@ class Analyzer(
    */
   object ResolveOutputRelation extends Rule[LogicalPlan] {
     override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
-      case append @ AppendData(table, query, _, isByName)
-          if table.resolved && query.resolved && !append.outputResolved =>
+      case v2Write: V2WriteCommand
+          if v2Write.table.resolved && v2Write.query.resolved && !v2Write.outputResolved =>
         validateStoreAssignmentPolicy()
-        val projection =
-          TableOutputResolver.resolveOutputColumns(table.name, table.output, query, isByName, conf)
-
-        if (projection != query) {
-          append.copy(query = projection)
-        } else {
-          append
-        }
-
-      case overwrite @ OverwriteByExpression(table, _, query, _, isByName)
-          if table.resolved && query.resolved && !overwrite.outputResolved =>
-        validateStoreAssignmentPolicy()
-        val projection =
-          TableOutputResolver.resolveOutputColumns(table.name, table.output, query, isByName, conf)
-
-        if (projection != query) {
-          overwrite.copy(query = projection)
-        } else {
-          overwrite
-        }
-
-      case overwrite @ OverwritePartitionsDynamic(table, query, _, isByName)
-          if table.resolved && query.resolved && !overwrite.outputResolved =>
-        validateStoreAssignmentPolicy()
-        val projection =
-          TableOutputResolver.resolveOutputColumns(table.name, table.output, query, isByName, conf)
-
-        if (projection != query) {
-          overwrite.copy(query = projection)
+        val projection = TableOutputResolver.resolveOutputColumns(
+          v2Write.table.name, v2Write.table.output, v2Write.query, v2Write.isByName, conf)
+        if (projection != v2Write.query) {
+          v2Write.withNewQuery(projection)
         } else {
-          overwrite
+          v2Write
         }
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index f18aecd19b8d8..fb8a9be80385b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.types.{DataType, MetadataBuilder, StringType, Struct
 trait V2WriteCommand extends Command {
   def table: NamedRelation
   def query: LogicalPlan
+  def isByName: Boolean
 
   override def children: Seq[LogicalPlan] = Seq(query)
 
@@ -50,6 +51,8 @@ trait V2WriteCommand extends Command {
             (outAttr.nullable || !inAttr.nullable)
       })
   }
+
+  def withNewQuery(newQuery: LogicalPlan): V2WriteCommand
 }
 
 /**
@@ -59,7 +62,9 @@ case class AppendData(
     table: NamedRelation,
     query: LogicalPlan,
     writeOptions: Map[String, String],
-    isByName: Boolean) extends V2WriteCommand
+    isByName: Boolean) extends V2WriteCommand {
+  override def withNewQuery(newQuery: LogicalPlan): AppendData = copy(query = newQuery)
+}
 
 object AppendData {
   def byName(
@@ -89,6 +94,9 @@ case class OverwriteByExpression(
   override lazy val resolved: Boolean = {
     table.resolved && query.resolved && outputResolved && deleteExpr.resolved
   }
+  override def withNewQuery(newQuery: LogicalPlan): OverwriteByExpression = {
+    copy(query = newQuery)
+  }
 }
 
 object OverwriteByExpression {
@@ -116,7 +124,11 @@ case class OverwritePartitionsDynamic(
     table: NamedRelation,
     query: LogicalPlan,
     writeOptions: Map[String, String],
-    isByName: Boolean) extends V2WriteCommand
+    isByName: Boolean) extends V2WriteCommand {
+  override def withNewQuery(newQuery: LogicalPlan): OverwritePartitionsDynamic = {
+    copy(query = newQuery)
+  }
+}
 
 object OverwritePartitionsDynamic {
   def byName(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
index 52dcf63426a7e..ba926f842551f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
@@ -248,6 +248,68 @@ abstract class DataSourceV2AnalysisBaseSuite extends AnalysisTest {
 
   def byPosition(table: NamedRelation, query: LogicalPlan): LogicalPlan
 
+  test("SPARK-33136: output resolved on complex types for V2 write commands") {
+    def assertTypeCompatibility(name: String, fromType: DataType, toType: DataType): Unit = {
+      val table = TestRelation(StructType(Seq(StructField("a", toType))).toAttributes)
+      val query = TestRelation(StructType(Seq(StructField("a", fromType))).toAttributes)
+      val parsedPlan = byName(table, query)
+      assertResolved(parsedPlan)
+      checkAnalysis(parsedPlan, parsedPlan)
+    }
+
+    // The major difference between `from` and `to` is that `from` is a complex type
+    // with non-nullable, whereas `to` is same data type with flipping nullable.
+
+    // nested struct type
+    val fromStructType = StructType(Array(
+      StructField("s", StringType),
+      StructField("i_nonnull", IntegerType, nullable = false),
+      StructField("st", StructType(Array(
+        StructField("l", LongType),
+        StructField("s_nonnull", StringType, nullable = false))))))
+
+    val toStructType = StructType(Array(
+      StructField("s", StringType),
+      StructField("i_nonnull", IntegerType),
+      StructField("st", StructType(Array(
+        StructField("l", LongType),
+        StructField("s_nonnull", StringType))))))
+
+    assertTypeCompatibility("struct", fromStructType, toStructType)
+
+    // array type
+    assertTypeCompatibility("array", ArrayType(LongType, containsNull = false),
+      ArrayType(LongType, containsNull = true))
+
+    // array type with struct type
+    val fromArrayWithStructType = ArrayType(
+      StructType(Array(StructField("s", StringType, nullable = false))),
+      containsNull = false)
+
+    val toArrayWithStructType = ArrayType(
+      StructType(Array(StructField("s", StringType))),
+      containsNull = true)
+
+    assertTypeCompatibility("array_struct", fromArrayWithStructType, toArrayWithStructType)
+
+    // map type
+    assertTypeCompatibility("map", MapType(IntegerType, StringType, valueContainsNull = false),
+      MapType(IntegerType, StringType, valueContainsNull = true))
+
+    // map type with struct type
+    val fromMapWithStructType = MapType(
+      IntegerType,
+      StructType(Array(StructField("s", StringType, nullable = false))),
+      valueContainsNull = false)
+
+    val toMapWithStructType = MapType(
+      IntegerType,
+      StructType(Array(StructField("s", StringType))),
+      valueContainsNull = true)
+
+    assertTypeCompatibility("map_struct", fromMapWithStructType, toMapWithStructType)
+  }
+
   test("skipSchemaResolution should still require query to be resolved") {
     val table = TestRelationAcceptAnySchema(StructType(Seq(
       StructField("a", FloatType),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
index ff5c6242987de..8720c1f620564 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
@@ -23,15 +23,15 @@ import scala.collection.JavaConverters._
 
 import org.scalatest.BeforeAndAfter
 
-import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NamedRelation, NoSuchTableException, TableAlreadyExistsException}
-import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, V2WriteCommand}
+import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException}
+import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic}
 import org.apache.spark.sql.connector.{InMemoryTable, InMemoryTableCatalog}
 import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
 import org.apache.spark.sql.connector.expressions.{BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, YearsTransform}
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.{ArrayType, DataType, IntegerType, LongType, MapType, StringType, StructField, StructType, TimestampType}
+import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType, TimestampType}
 import org.apache.spark.sql.util.QueryExecutionListener
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.Utils
@@ -100,86 +100,6 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo
     assert(v2.catalog.exists(_ == catalogPlugin))
   }
 
-  case class FakeV2WriteCommand(table: NamedRelation, query: LogicalPlan) extends V2WriteCommand
-
-  test("SPARK-33136 output resolved on complex types for V2 write commands") {
-    val tableCatalog = catalog("testcat")
-
-    def assertTypeCompatibility(name: String, fromType: DataType, toType: DataType): Unit = {
-      val fromTableName = s"from_table_$name"
-      tableCatalog.createTable(
-        Identifier.of(Array(), fromTableName),
-        StructType(Array(StructField("col", fromType))),
-        Array.empty,
-        new java.util.HashMap[String, String]())
-
-      val toTable = tableCatalog.createTable(
-        Identifier.of(Array(), s"to_table_$name"),
-        StructType(Array(StructField("col", toType))),
-        Array.empty,
-        new java.util.HashMap[String, String]())
-
-      val df = spark.table(s"testcat.$fromTableName")
-
-      val relation = DataSourceV2Relation.create(toTable, Some(tableCatalog), None)
-      val writeCommand = FakeV2WriteCommand(relation, df.queryExecution.analyzed)
-
-      assert(writeCommand.outputResolved, s"Unable to write from type $fromType to type $toType.")
-    }
-
-    // The major difference between `from` and `to` is that `from` is a complex type
-    // with non-nullable, whereas `to` is same data type with flipping nullable.
-
-    // nested struct type
-    val fromStructType = StructType(Array(
-      StructField("s", StringType),
-      StructField("i_nonnull", IntegerType, nullable = false),
-      StructField("st", StructType(Array(
-        StructField("l", LongType),
-        StructField("s_nonnull", StringType, nullable = false))))))
-
-    val toStructType = StructType(Array(
-      StructField("s", StringType),
-      StructField("i_nonnull", IntegerType),
-      StructField("st", StructType(Array(
-        StructField("l", LongType),
-        StructField("s_nonnull", StringType))))))
-
-    assertTypeCompatibility("struct", fromStructType, toStructType)
-
-    // array type
-    assertTypeCompatibility("array", ArrayType(LongType, containsNull = false),
-      ArrayType(LongType, containsNull = true))
-
-    // array type with struct type
-    val fromArrayWithStructType = ArrayType(
-      StructType(Array(StructField("s", StringType, nullable = false))),
-      containsNull = false)
-
-    val toArrayWithStructType = ArrayType(
-      StructType(Array(StructField("s", StringType))),
-      containsNull = true)
-
-    assertTypeCompatibility("array_struct", fromArrayWithStructType, toArrayWithStructType)
-
-    // map type
-    assertTypeCompatibility("map", MapType(IntegerType, StringType, valueContainsNull = false),
-      MapType(IntegerType, StringType, valueContainsNull = true))
-
-    // map type with struct type
-    val fromMapWithStructType = MapType(
-      IntegerType,
-      StructType(Array(StructField("s", StringType, nullable = false))),
-      valueContainsNull = false)
-
-    val toMapWithStructType = MapType(
-      IntegerType,
-      StructType(Array(StructField("s", StringType))),
-      valueContainsNull = true)
-
-    assertTypeCompatibility("map_struct", fromMapWithStructType, toMapWithStructType)
-  }
-
   test("Append: basic append") {
     spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")
 

From 4941b7ae18d4081233953cc11328645d0b4cf208 Mon Sep 17 00:00:00 2001
From: William Hyun <williamhyun3@gmail.com>
Date: Thu, 5 Nov 2020 17:37:44 -0800
Subject: [PATCH 0401/1009] [SPARK-33365][BUILD] Update SBT to 1.4.2

### What changes were proposed in this pull request?
This PR aims to update SBT from 1.4.1 to 1.4.2.

### Why are the changes needed?

This will bring the latest bug fixes.
- https://github.com/sbt/sbt/releases/tag/v1.4.2

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Pass the CIs.

Closes #30268 from williamhyun/sbt.

Authored-by: William Hyun <williamhyun3@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 project/build.properties | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/project/build.properties b/project/build.properties
index d70d98448e4ca..5ec1d700fd2a8 100644
--- a/project/build.properties
+++ b/project/build.properties
@@ -14,4 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-sbt.version=1.4.1
+sbt.version=1.4.2

From 90f35c663e4118b7a716e614f37b8d888d0d6bd6 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Fri, 6 Nov 2020 12:46:26 +0900
Subject: [PATCH 0402/1009] [MINOR][SQL] Fix incorrect JIRA ID comments in
 Analyzer

### What changes were proposed in this pull request?

This PR fixes incorrect JIRA ids in `Analyzer.scala` introduced by  SPARK-31670 (https://github.com/apache/spark/pull/28490)
```scala
- // SPARK-31607: Resolve Struct field in selectedGroupByExprs/groupByExprs and aggregations
+ // SPARK-31670: Resolve Struct field in selectedGroupByExprs/groupByExprs and aggregations
```

### Why are the changes needed?

Fix the wrong information.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

This is a comment change. Manually review.

Closes #30269 from dongjoon-hyun/SPARK-31670-MINOR.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index c4e4ffb98fb25..f0143fdb23473 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1463,7 +1463,7 @@ class Analyzer(
       // rule: ResolveDeserializer.
       case plan if containsDeserializer(plan.expressions) => plan
 
-      // SPARK-31607: Resolve Struct field in groupByExpressions and aggregateExpressions
+      // SPARK-31670: Resolve Struct field in groupByExpressions and aggregateExpressions
       // with CUBE/ROLLUP will be wrapped with alias like Alias(GetStructField, name) with
       // different ExprId. This cause aggregateExpressions can't be replaced by expanded
       // groupByExpressions in `ResolveGroupingAnalytics.constructAggregateExprs()`, we trim
@@ -1487,7 +1487,7 @@ class Analyzer(
 
         a.copy(resolvedGroupingExprs, resolvedAggExprs, a.child)
 
-      // SPARK-31607: Resolve Struct field in selectedGroupByExprs/groupByExprs and aggregations
+      // SPARK-31670: Resolve Struct field in selectedGroupByExprs/groupByExprs and aggregations
       // will be wrapped with alias like Alias(GetStructField, name) with different ExprId.
       // This cause aggregateExpressions can't be replaced by expanded groupByExpressions in
       // `ResolveGroupingAnalytics.constructAggregateExprs()`, we trim unnecessary alias

From d16311051d4c67b65116ed182c87f96656b63333 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 6 Nov 2020 05:20:25 +0000
Subject: [PATCH 0403/1009] [SPARK-32934][SQL][FOLLOW-UP] Refine class naming
 and code comments

### What changes were proposed in this pull request?

1. Rename `OffsetWindowSpec` to `OffsetWindowFunction`, as it's the base class for all offset based window functions.
2. Refine and add more comments.
3. Remove `isRelative` as it's useless.

### Why are the changes needed?

code refinement

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

existing tests

Closes #30261 from cloud-fan/window.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/CheckAnalysis.scala |  4 ++--
 .../expressions/windowExpressions.scala       | 24 +++++++------------
 .../sql/execution/window/WindowExec.scala     | 10 ++++----
 .../sql/execution/window/WindowExecBase.scala | 17 ++++++-------
 .../window/WindowFunctionFrame.scala          | 10 ++++----
 5 files changed, 30 insertions(+), 35 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index d261f26072bcc..ac91fa0b5811e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -166,10 +166,10 @@ trait CheckAnalysis extends PredicateHelper {
           case w @ WindowExpression(AggregateExpression(_, _, true, _, _), _) =>
             failAnalysis(s"Distinct window functions are not supported: $w")
 
-          case w @ WindowExpression(_: FrameLessOffsetWindowFunction,
+          case w @ WindowExpression(wf: FrameLessOffsetWindowFunction,
             WindowSpecDefinition(_, order, frame: SpecifiedWindowFrame))
              if order.isEmpty || !frame.isOffset =>
-            failAnalysis("An offset window function can only be evaluated in an ordered " +
+            failAnalysis(s"${wf.prettyName} function can only be evaluated in an ordered " +
               s"row-based window frame with a single offset: $w")
 
           case w @ WindowExpression(e, s) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index 1a57afa8d9aae..b6dd817794723 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -327,7 +327,7 @@ object WindowFunctionType {
   }
 }
 
-trait OffsetWindowSpec extends Expression {
+trait OffsetWindowFunction extends WindowFunction {
   /**
    * Input expression to evaluate against a row which a number of rows below or above (depending on
    * the value and sign of the offset) the starting row (current row if isRelative=true, or the
@@ -356,23 +356,21 @@ trait OffsetWindowSpec extends Expression {
   val ignoreNulls: Boolean
 
   /**
-   * Whether the offset is starts with the current row. If `isRelative` is true, `offset` means
-   * the offset is start with the current row. otherwise, the offset is starts with the first
-   * row of the entire window frame.
+   * A fake window frame which is used to hold the offset information. It's used as a key to group
+   * by offset window functions in `WindowExecBase.windowFrameExpressionFactoryPairs`, as offset
+   * window functions with the same offset and same window frame can be evaluated together.
    */
-  val isRelative: Boolean
-
   lazy val fakeFrame = SpecifiedWindowFrame(RowFrame, offset, offset)
 }
 
 /**
  * A frameless offset window function is a window function that cannot specify window frame and
- * returns the value of the input column offset by a number of rows within the partition.
- * For instance: a FrameLessOffsetWindowFunction for value x with offset -2, will get the value of
- * x 2 rows back in the partition.
+ * returns the value of the input column offset by a number of rows according to the current row
+ * within the partition. For instance: a FrameLessOffsetWindowFunction for value x with offset -2,
+ * will get the value of x 2 rows back from the current row in the partition.
  */
 abstract class FrameLessOffsetWindowFunction
-  extends WindowFunction with OffsetWindowSpec with Unevaluable with ImplicitCastInputTypes {
+  extends OffsetWindowFunction with Unevaluable with ImplicitCastInputTypes {
 
   override def children: Seq[Expression] = Seq(input, offset, default)
 
@@ -391,8 +389,6 @@ abstract class FrameLessOffsetWindowFunction
 
   override val ignoreNulls = false
 
-  override val isRelative = true
-
   override lazy val frame: WindowFrame = fakeFrame
 
   override def checkInputDataTypes(): TypeCheckResult = {
@@ -630,14 +626,12 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction {
   group = "window_funcs")
 // scalastyle:on line.size.limit line.contains.tab
 case class NthValue(input: Expression, offset: Expression, ignoreNulls: Boolean)
-    extends AggregateWindowFunction with OffsetWindowSpec with ImplicitCastInputTypes {
+    extends AggregateWindowFunction with OffsetWindowFunction with ImplicitCastInputTypes {
 
   def this(child: Expression, offset: Expression) = this(child, offset, false)
 
   override lazy val default = Literal.create(null, input.dataType)
 
-  override val isRelative = false
-
   override def children: Seq[Expression] = input :: offset :: Nil
 
   override val frame: WindowFrame = UnspecifiedFrame
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
index 439c31a47fd3b..b693cae824bf9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
@@ -57,12 +57,10 @@ import org.apache.spark.sql.types.{CalendarIntervalType, DateType, IntegerType,
  *     3. CURRENT ROW AND 1 FOLLOWING
  *     4. 1 PRECEDING AND 1 FOLLOWING
  *     5. 1 FOLLOWING AND 2 FOLLOWING
- * - Offset frame: The frame consist of one row, which is an offset number of rows. There are three
- * implement of offset frame.
- *     1. [[FrameLessOffsetWindowFunction]] returns the value of the input column offset by a number
- *        of rows according to the current row.
- *     2. [[UnboundedOffsetWindowFunctionFrame]] and [[UnboundedPrecedingOffsetWindowFunctionFrame]]
- *       returns the value of the input column offset by a number of rows within the frame.
+ * - Offset frame: The frame consist of one row, which is an offset number of rows away from the
+ *   current row. Only [[OffsetWindowFunction]]s can be processed in an offset frame. There are
+ *   three implements of offset frame: [[FrameLessOffsetWindowFunctionFrame]],
+ *   [[UnboundedOffsetWindowFunctionFrame]] and [[UnboundedPrecedingOffsetWindowFunctionFrame]].
  *
  * Different frame boundaries can be used in Growing, Shrinking and Moving frames. A frame
  * boundary can be either Row or Range based:
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
index f0b99c1522aa1..a6a3f3d7384bf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
@@ -136,8 +136,9 @@ trait WindowExecBase extends UnaryExecNode {
           val frame = spec.frameSpecification.asInstanceOf[SpecifiedWindowFrame]
           function match {
             case AggregateExpression(f, _, _, _, _) => collect("AGGREGATE", frame, e, f)
-            case f: FrameLessOffsetWindowFunction => collect("FRAME_LESS_OFFSET", frame, e, f)
-            case f: OffsetWindowSpec if !f.ignoreNulls &&
+            case f: FrameLessOffsetWindowFunction =>
+              collect("FRAME_LESS_OFFSET", f.fakeFrame, e, f)
+            case f: OffsetWindowFunction if !f.ignoreNulls &&
               frame.frameType == RowFrame && frame.lower == UnboundedPreceding =>
               frame.upper match {
                 case UnboundedFollowing => collect("UNBOUNDED_OFFSET", f.fakeFrame, e, f)
@@ -184,8 +185,8 @@ trait WindowExecBase extends UnaryExecNode {
               new FrameLessOffsetWindowFunctionFrame(
                 target,
                 ordinal,
-                // OFFSET frame functions are guaranteed be OffsetWindowSpec.
-                functions.map(_.asInstanceOf[OffsetWindowSpec]),
+                // OFFSET frame functions are guaranteed be OffsetWindowFunction.
+                functions.map(_.asInstanceOf[OffsetWindowFunction]),
                 child.output,
                 (expressions, schema) =>
                   MutableProjection.create(expressions, schema),
@@ -195,8 +196,8 @@ trait WindowExecBase extends UnaryExecNode {
               new UnboundedOffsetWindowFunctionFrame(
                 target,
                 ordinal,
-                // OFFSET frame functions are guaranteed be OffsetWindowSpec.
-                functions.map(_.asInstanceOf[OffsetWindowSpec]),
+                // OFFSET frame functions are guaranteed be OffsetWindowFunction.
+                functions.map(_.asInstanceOf[OffsetWindowFunction]),
                 child.output,
                 (expressions, schema) =>
                   MutableProjection.create(expressions, schema),
@@ -207,8 +208,8 @@ trait WindowExecBase extends UnaryExecNode {
               new UnboundedPrecedingOffsetWindowFunctionFrame(
                 target,
                 ordinal,
-                // OFFSET frame functions are guaranteed be OffsetWindowSpec.
-                functions.map(_.asInstanceOf[OffsetWindowSpec]),
+                // OFFSET frame functions are guaranteed be OffsetWindowFunction.
+                functions.map(_.asInstanceOf[OffsetWindowFunction]),
                 child.output,
                 (expressions, schema) =>
                   MutableProjection.create(expressions, schema),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
index e8a83f9772d35..2a4b957c35426 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
@@ -80,7 +80,7 @@ object WindowFunctionFrame {
 abstract class OffsetWindowFunctionFrameBase(
     target: InternalRow,
     ordinal: Int,
-    expressions: Array[OffsetWindowSpec],
+    expressions: Array[OffsetWindowFunction],
     inputSchema: Seq[Attribute],
     newMutableProjection: (Seq[Expression], Seq[Attribute]) => MutableProjection,
     offset: Int)
@@ -144,7 +144,7 @@ abstract class OffsetWindowFunctionFrameBase(
 class FrameLessOffsetWindowFunctionFrame(
     target: InternalRow,
     ordinal: Int,
-    expressions: Array[OffsetWindowSpec],
+    expressions: Array[OffsetWindowFunction],
     inputSchema: Seq[Attribute],
     newMutableProjection: (Seq[Expression], Seq[Attribute]) => MutableProjection,
     offset: Int)
@@ -187,12 +187,13 @@ class FrameLessOffsetWindowFunctionFrame(
 class UnboundedOffsetWindowFunctionFrame(
     target: InternalRow,
     ordinal: Int,
-    expressions: Array[OffsetWindowSpec],
+    expressions: Array[OffsetWindowFunction],
     inputSchema: Seq[Attribute],
     newMutableProjection: (Seq[Expression], Seq[Attribute]) => MutableProjection,
     offset: Int)
   extends OffsetWindowFunctionFrameBase(
     target, ordinal, expressions, inputSchema, newMutableProjection, offset) {
+  assert(offset > 0)
 
   override def prepare(rows: ExternalAppendOnlyUnsafeRowArray): Unit = {
     input = rows
@@ -230,12 +231,13 @@ class UnboundedOffsetWindowFunctionFrame(
 class UnboundedPrecedingOffsetWindowFunctionFrame(
     target: InternalRow,
     ordinal: Int,
-    expressions: Array[OffsetWindowSpec],
+    expressions: Array[OffsetWindowFunction],
     inputSchema: Seq[Attribute],
     newMutableProjection: (Seq[Expression], Seq[Attribute]) => MutableProjection,
     offset: Int)
   extends OffsetWindowFunctionFrameBase(
     target, ordinal, expressions, inputSchema, newMutableProjection, offset) {
+  assert(offset > 0)
 
   var selectedRow: UnsafeRow = null
 

From f6c00079709b6dcda72b08d3e9865ca6b49f8b74 Mon Sep 17 00:00:00 2001
From: neko <echohlne@gmail.com>
Date: Fri, 6 Nov 2020 13:45:02 +0800
Subject: [PATCH 0404/1009] [SPARK-33342][WEBUI] fix the wrong url and display
 name of blocking thread in threadDump page

### What changes were proposed in this pull request?
fix the wrong url and display name of blocking thread in threadDump page.
The blockingThreadId variable passed to the page should be of string type instead of Option type.

### Why are the changes needed?
blocking threadId in the ui page is not displayed well, and the corresponding  url cannot be redirected normally

### Does this PR introduce _any_ user-facing change?
NO

### How was this patch tested?
The pr  only involves minor changes to the page and does not affect other functions,
The manual test results are as follows. The thread name displayed on the page is correct, and you can click on the URL to jump to the corresponding url

![shows_ok](https://user-images.githubusercontent.com/52202080/98108177-89488d00-1ed6-11eb-9488-8446c3f38bad.gif)

Closes #30249 from akiyamaneko/thread-dump-improve.

Authored-by: neko <echohlne@gmail.com>
Signed-off-by: Gengliang Wang <gengliang.wang@databricks.com>
---
 .../org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala   | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
index 2c7aeeabb3601..c3246dc90976c 100644
--- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorThreadDumpPage.scala
@@ -41,10 +41,10 @@ private[ui] class ExecutorThreadDumpPage(
       val dumpRows = threadDump.map { thread =>
         val threadId = thread.threadId
         val blockedBy = thread.blockedByThreadId match {
-          case Some(_) =>
+          case Some(blockingThreadId) =>
             <div>
-              Blocked by <a href={s"#${thread.blockedByThreadId}_td_id"}>
-              Thread {thread.blockedByThreadId} {thread.blockedByLock}</a>
+              Blocked by <a href={s"#${blockingThreadId}_td_id"}>
+              Thread {blockingThreadId} {thread.blockedByLock}</a>
             </div>
           case None => Text("")
         }

From 733a468726849ba17ab27bd20895f253590fedcb Mon Sep 17 00:00:00 2001
From: Prashant Sharma <prashsh1@in.ibm.com>
Date: Fri, 6 Nov 2020 05:46:38 +0000
Subject: [PATCH 0405/1009] [SPARK-33130][SQL] Support ALTER TABLE in JDBC v2
 Table Catalog: add, update type and nullability of columns (MsSqlServer
 dialect)

### What changes were proposed in this pull request?

Override the default SQL strings for:
ALTER TABLE RENAME COLUMN
ALTER TABLE UPDATE COLUMN NULLABILITY
in the following MsSQLServer JDBC dialect according to official documentation.
Write MsSqlServer integration tests for JDBC.

### Why are the changes needed?

To add the support for alter table when interacting with MSSql Server.

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

added tests

Closes #30038 from ScrapCodes/mssql-dialect.

Authored-by: Prashant Sharma <prashsh1@in.ibm.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../jdbc/v2/MsSqlServerIntegrationSuite.scala | 90 +++++++++++++++++++
 .../spark/sql/jdbc/MsSqlServerDialect.scala   | 38 ++++++++
 2 files changed, 128 insertions(+)
 create mode 100644 external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
new file mode 100644
index 0000000000000..905e32aaa918e
--- /dev/null
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc.v2
+
+import java.sql.{Connection, SQLFeatureNotSupportedException}
+
+import org.scalatest.time.SpanSugar._
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
+import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite}
+import org.apache.spark.sql.types._
+import org.apache.spark.tags.DockerTest
+
+/**
+ * To run this test suite for a specific version (e.g., 2019-GA-ubuntu-16.04):
+ * {{{
+ *   MSSQLSERVER_DOCKER_IMAGE_NAME=2019-GA-ubuntu-16.04
+ *     ./build/sbt -Pdocker-integration-tests "testOnly *v2*MsSqlServerIntegrationSuite"
+ * }}}
+ */
+@DockerTest
+class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest {
+
+  override val catalogName: String = "mssql"
+
+  override val db = new DatabaseOnDocker {
+    override val imageName = sys.env.getOrElse("MSSQLSERVER_DOCKER_IMAGE_NAME",
+      "mcr.microsoft.com/mssql/server:2019-GA-ubuntu-16.04")
+    override val env = Map(
+      "SA_PASSWORD" -> "Sapass123",
+      "ACCEPT_EULA" -> "Y"
+    )
+    override val usesIpc = false
+    override val jdbcPort: Int = 1433
+
+    override def getJdbcUrl(ip: String, port: Int): String =
+      s"jdbc:sqlserver://$ip:$port;user=sa;password=Sapass123;"
+  }
+
+  override def sparkConf: SparkConf = super.sparkConf
+    .set("spark.sql.catalog.mssql", classOf[JDBCTableCatalog].getName)
+    .set("spark.sql.catalog.mssql.url", db.getJdbcUrl(dockerIp, externalPort))
+
+  override val connectionTimeout = timeout(7.minutes)
+
+  override def dataPreparation(conn: Connection): Unit = {}
+
+  override def testUpdateColumnType(tbl: String): Unit = {
+    sql(s"CREATE TABLE $tbl (ID INTEGER) USING _")
+    var t = spark.table(tbl)
+    var expectedSchema = new StructType().add("ID", IntegerType)
+    assert(t.schema === expectedSchema)
+    sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE STRING")
+    t = spark.table(tbl)
+    expectedSchema = new StructType().add("ID", StringType)
+    assert(t.schema === expectedSchema)
+    // Update column type from STRING to INTEGER
+    val msg1 = intercept[AnalysisException] {
+      sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE INTEGER")
+    }.getMessage
+    assert(msg1.contains("Cannot update alt_table field ID: string cannot be cast to int"))
+  }
+
+  override def testUpdateColumnNullability(tbl: String): Unit = {
+    sql(s"CREATE TABLE $tbl (ID STRING NOT NULL) USING _")
+    // Update nullability is unsupported for mssql db.
+    val msg = intercept[AnalysisException] {
+      sql(s"ALTER TABLE $tbl ALTER COLUMN ID DROP NOT NULL")
+    }.getCause.asInstanceOf[SQLFeatureNotSupportedException].getMessage
+
+    assert(msg.contains("UpdateColumnNullability is not supported"))
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
index 1c6e8c359aa15..dc39a10987c91 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.jdbc
 
+import java.sql.SQLFeatureNotSupportedException
 import java.util.Locale
 
 import org.apache.spark.sql.internal.SQLConf
@@ -64,4 +65,41 @@ private object MsSqlServerDialect extends JdbcDialect {
   override def renameTable(oldTable: String, newTable: String): String = {
     s"EXEC sp_rename $oldTable, $newTable"
   }
+
+  // scalastyle:off line.size.limit
+  // see https://docs.microsoft.com/en-us/sql/relational-databases/tables/add-columns-to-a-table-database-engine?view=sql-server-ver15
+  // scalastyle:on line.size.limit
+  override def getAddColumnQuery(
+      tableName: String,
+      columnName: String,
+      dataType: String): String = {
+    s"ALTER TABLE $tableName ADD ${quoteIdentifier(columnName)} $dataType"
+  }
+
+  // scalastyle:off line.size.limit
+  // See https://docs.microsoft.com/en-us/sql/relational-databases/system-stored-procedures/sp-rename-transact-sql?view=sql-server-ver15
+  // scalastyle:on line.size.limit
+  override def getRenameColumnQuery(
+      tableName: String,
+      columnName: String,
+      newName: String,
+      dbMajorVersion: Int): String = {
+    s"EXEC sp_rename '$tableName.${quoteIdentifier(columnName)}'," +
+      s" ${quoteIdentifier(newName)}, 'COLUMN'"
+  }
+
+  // scalastyle:off line.size.limit
+  // see https://docs.microsoft.com/en-us/sql/t-sql/statements/alter-table-transact-sql?view=sql-server-ver15
+  // scalastyle:on line.size.limit
+  // require to have column data type to change the column nullability
+  // ALTER TABLE tbl_name ALTER COLUMN col_name datatype [NULL | NOT NULL]
+  // column_definition:
+  //    data_type [NOT NULL | NULL]
+  // We don't have column data type here, so we throw Exception for now
+  override def getUpdateColumnNullabilityQuery(
+      tableName: String,
+      columnName: String,
+      isNullable: Boolean): String = {
+    throw new SQLFeatureNotSupportedException(s"UpdateColumnNullability is not supported")
+  }
 }

From 68c032c246bb091b25d80e436b9288cca9245265 Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Thu, 5 Nov 2020 22:00:45 -0800
Subject: [PATCH 0406/1009] [SPARK-33364][SQL] Introduce the "purge" option in
 TableCatalog.dropTable for v2 catalog

### What changes were proposed in this pull request?

This PR proposes to introduce the `purge` option in `TableCatalog.dropTable` so that v2 catalogs can use the option if needed.

Related discussion: https://github.com/apache/spark/pull/30079#discussion_r510594110

### Why are the changes needed?

Spark DDL supports passing the purge option to `DROP TABLE` command. However, the option is not used (ignored) for v2 catalogs.

### Does this PR introduce _any_ user-facing change?

This PR introduces a new API in `TableCatalog`.

### How was this patch tested?

Added a test.

Closes #30267 from imback82/purge_table.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/connector/catalog/TableCatalog.java   | 23 +++++++++++++++++++
 .../datasources/v2/DataSourceV2Strategy.scala |  4 ++--
 .../datasources/v2/DropTableExec.scala        |  9 +++++---
 .../sql/connector/DataSourceV2SQLSuite.scala  | 11 +++++++++
 4 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
index b818515adf9c0..92079d127b1e3 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
@@ -162,6 +162,29 @@ Table alterTable(
    */
   boolean dropTable(Identifier ident);
 
+  /**
+   * Drop a table in the catalog with an option to purge.
+   * <p>
+   * If the catalog supports views and contains a view for the identifier and not a table, this
+   * must not drop the view and must return false.
+   * <p>
+   * If the catalog supports the option to purge a table, this method must be overridden.
+   * The default implementation falls back to {@link #dropTable(Identifier)} dropTable} if the
+   * purge option is set to false. Otherwise, it throws {@link UnsupportedOperationException}.
+   *
+   * @param ident a table identifier
+   * @param purge whether a table should be purged
+   * @return true if a table was deleted, false if no table exists for the identifier
+   *
+   * @since 3.1.0
+   */
+  default boolean dropTable(Identifier ident, boolean purge) {
+    if (purge) {
+      throw new UnsupportedOperationException("Purge option is not supported.");
+    }
+    return dropTable(ident);
+  }
+
   /**
    * Renames a table in the catalog.
    * <p>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 4bb58142b3d19..648929eaa33ce 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -228,8 +228,8 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
     case DescribeColumn(_: ResolvedTable, _, _) =>
       throw new AnalysisException("Describing columns is not supported for v2 tables.")
 
-    case DropTable(r: ResolvedTable, ifExists, _) =>
-      DropTableExec(r.catalog, r.identifier, ifExists) :: Nil
+    case DropTable(r: ResolvedTable, ifExists, purge) =>
+      DropTableExec(r.catalog, r.identifier, ifExists, purge) :: Nil
 
     case _: NoopDropTable =>
       LocalTableScanExec(Nil, Nil) :: Nil
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala
index 967613f77577c..1fd0cd177478b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala
@@ -25,12 +25,15 @@ import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
 /**
  * Physical plan node for dropping a table.
  */
-case class DropTableExec(catalog: TableCatalog, ident: Identifier, ifExists: Boolean)
-  extends V2CommandExec {
+case class DropTableExec(
+    catalog: TableCatalog,
+    ident: Identifier,
+    ifExists: Boolean,
+    purge: Boolean) extends V2CommandExec {
 
   override def run(): Seq[InternalRow] = {
     if (catalog.tableExists(ident)) {
-      catalog.dropTable(ident)
+      catalog.dropTable(ident, purge)
     } else if (!ifExists) {
       throw new NoSuchTableException(ident)
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 893ee5f130cda..444daf8233c67 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -751,6 +751,17 @@ class DataSourceV2SQLSuite
     sql("DROP TABLE IF EXISTS testcat.db.notbl")
   }
 
+  test("DropTable: purge option") {
+    withTable("testcat.ns.t") {
+      sql("CREATE TABLE testcat.ns.t (id bigint) USING foo")
+      val ex = intercept[UnsupportedOperationException] {
+        sql ("DROP TABLE testcat.ns.t PURGE")
+      }
+      // The default TableCatalog.dropTable implementation doesn't support the purge option.
+      assert(ex.getMessage.contains("Purge option is not supported"))
+    }
+  }
+
   test("SPARK-33174: DROP TABLE should resolve to a temporary view first") {
     withTable("testcat.ns.t") {
       withTempView("t") {

From 93ad26be01a47fb075310a26188e238d55110098 Mon Sep 17 00:00:00 2001
From: Warren Zhu <warren.zhu25@gmail.com>
Date: Fri, 6 Nov 2020 16:53:10 +0900
Subject: [PATCH 0407/1009] [SPARK-23432][UI] Add executor peak jvm memory
 metrics in executors page

### What changes were proposed in this pull request?
Add executor peak jvm memory metrics in executors page

![image](https://user-images.githubusercontent.com/1633312/97767765-9121bf00-1adb-11eb-93c7-7912d9fe7826.png)

### Why are the changes needed?
Users can know executor peak jvm metrics on in executors page

### Does this PR introduce _any_ user-facing change?
Users can know executor peak jvm metrics on in executors page

### How was this patch tested?
Manually tested

Closes #30186 from warrenzhu25/23432.

Authored-by: Warren Zhu <warren.zhu25@gmail.com>
Signed-off-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
---
 .../ui/static/executorspage-template.html     | 16 ++++++
 .../apache/spark/ui/static/executorspage.js   | 52 +++++++++++++++++--
 2 files changed, 64 insertions(+), 4 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html b/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html
index 5e835c053eb6c..ec3cb5bb8ae5e 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html
+++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage-template.html
@@ -86,6 +86,22 @@ <h4 class="title-table">Executors</h4>
             <span data-toggle="tooltip" data-placement="top"
                   title="Memory used / total available memory for off heap storage of data like RDD partitions cached in memory.">
               Off Heap Storage Memory</span></th>
+          <th>
+            <span data-toggle="tooltip" data-placement="top"
+                  title="Peak onHeap / offHeap memory used by JVM.">
+              Peak JVM Memory OnHeap / OffHeap</span></th>
+          <th>
+            <span data-toggle="tooltip" data-placement="top"
+                  title="Peak onHeap / offHeap memory used for execution. This refers to memory used for computation in shuffles, joins, user data structures, etc. See the Memory Management Overview documentation for more details.">
+              Peak Execution Memory OnHeap / OffHeap</span></th>
+          <th>
+            <span data-toggle="tooltip" data-placement="top"
+            title="Peak storage onHeap / offHeap memory used for storage of data like RDD partitions cached in memory.">
+              Peak Storage Memory OnHeap / OffHeap</span></th>
+          <th>
+            <span data-toggle="tooltip" data-placement="top"
+                  title="Peak direct byte buffer / memory-mapped buffer pool memory used by JVM. This refers to BufferPoolMXBean with form 'java.nio:type=BufferPool,name=direct' and 'java.nio:type=BufferPool,name=mapped'.">
+              Peak Pool Memory Direct / Mapped</span></th>
           <th>Disk Used</th>
           <th>Cores</th>
           <th>Resources</th>
diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
index d4eaea9103771..4f179a93c9d5f 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
@@ -119,7 +119,7 @@ function totalDurationColor(totalGCTime, totalDuration) {
 }
 
 var sumOptionalColumns = [3, 4];
-var execOptionalColumns = [5, 6, 9, 10];
+var execOptionalColumns = [5, 6, 7, 8, 9, 10, 13, 14];
 var execDataTable;
 var sumDataTable;
 
@@ -412,6 +412,42 @@ $(document).ready(function () {
                                         formatBytes(row.memoryMetrics.totalOffHeapStorageMemory, type));
                             }
                         },
+                        {
+                            data: function (row, type) {
+                                if (type !== 'display')
+                                    return row.peakMemoryMetrics.JVMHeapMemory;
+                                else
+                                    return (formatBytes(row.peakMemoryMetrics.JVMHeapMemory, type) + ' / ' +
+                                        formatBytes(row.peakMemoryMetrics.JVMOffHeapMemory, type));
+                            }
+                        },
+                        {
+                            data: function (row, type) {
+                                if (type !== 'display')
+                                    return row.peakMemoryMetrics.OnHeapExecutionMemory;
+                                else
+                                    return (formatBytes(row.peakMemoryMetrics.OnHeapExecutionMemory, type) + ' / ' +
+                                        formatBytes(row.peakMemoryMetrics.OffHeapExecutionMemory, type));
+                            }
+                        },
+                        {
+                            data: function (row, type) {
+                                if (type !== 'display')
+                                    return row.peakMemoryMetrics.OnHeapStorageMemory;
+                                else
+                                    return (formatBytes(row.peakMemoryMetrics.OnHeapStorageMemory, type) + ' / ' +
+                                        formatBytes(row.peakMemoryMetrics.OffHeapStorageMemory, type));
+                            }
+                        },
+                        {
+                            data: function (row, type) {
+                                if (type !== 'display')
+                                    return row.peakMemoryMetrics.DirectPoolMemory;
+                                else
+                                    return (formatBytes(row.peakMemoryMetrics.DirectPoolMemory, type) + ' / ' +
+                                        formatBytes(row.peakMemoryMetrics.MappedPoolMemory, type));
+                            }
+                        },
                         {data: 'diskUsed', render: formatBytes},
                         {data: 'totalCores'},
                         {name: 'resourcesCol', data: 'resources', render: formatResourceCells, orderable: false},
@@ -462,8 +498,12 @@ $(document).ready(function () {
                     "columnDefs": [
                         {"visible": false, "targets": 5},
                         {"visible": false, "targets": 6},
+                        {"visible": false, "targets": 7},
+                        {"visible": false, "targets": 8},
                         {"visible": false, "targets": 9},
-                        {"visible": false, "targets": 10}
+                        {"visible": false, "targets": 10},
+                        {"visible": false, "targets": 13},
+                        {"visible": false, "targets": 14}
                     ],
                     "deferRender": true
                 };
@@ -571,8 +611,12 @@ $(document).ready(function () {
                     "<div><input type='checkbox' class='toggle-vis' id='select-all-box'>Select All</div>" +
                     "<div id='on_heap_memory' class='on-heap-memory-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='3' data-exec-col-idx='5'>On Heap Memory</div>" +
                     "<div id='off_heap_memory' class='off-heap-memory-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='4' data-exec-col-idx='6'>Off Heap Memory</div>" +
-                    "<div id='extra_resources' class='resources-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='' data-exec-col-idx='9'>Resources</div>" +
-                    "<div id='resource_prof_id' class='resource-prof-id-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='' data-exec-col-idx='10'>Resource Profile Id</div>" +
+                    "<div id='jvm_on_off_heap_memory' class='jvm_on_off_heap_memory-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='' data-exec-col-idx='7'>Peak JVM Memory OnHeap / OffHeap</div>" +
+                    "<div id='on_off_heap_execution_memory' class='on_off_heap_execution_memory-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='' data-exec-col-idx='8'>Peak Execution Memory OnHeap / OffHeap</div>" +
+                    "<div id='on_off_heap_storage_memory' class='on_off_heap_storage_memory'><input type='checkbox' class='toggle-vis' data-sum-col-idx='' data-exec-col-idx='9'>Peak Storage Memory OnHeap / OffHeap</div>" +
+                    "<div id='direct_mapped_pool_memory' class='direct_mapped_pool_memory-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='' data-exec-col-idx='10'>Peak Pool Memory Direct / Mapped</div>" +
+                    "<div id='extra_resources' class='resources-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='' data-exec-col-idx='13'>Resources</div>" +
+                    "<div id='resource_prof_id' class='resource-prof-id-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='' data-exec-col-idx='14'>Resource Profile Id</div>" +
                     "</div>");
 
                 reselectCheckboxesBasedOnTaskTableState();

From 09fa7ecae146c0865fc535b4b17175ca5714cfa4 Mon Sep 17 00:00:00 2001
From: Stuart White <stuart.white1@gmail.com>
Date: Fri, 6 Nov 2020 13:12:35 -0800
Subject: [PATCH 0408/1009] [SPARK-33291][SQL] Improve DataFrame.show for nulls
 in arrays and structs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
The changes in [SPARK-32501 Inconsistent NULL conversions to strings](https://issues.apache.org/jira/browse/SPARK-32501) introduced some behavior that I'd like to clean up a bit.

Here's sample code to illustrate the behavior I'd like to clean up:

```scala
val rows = Seq[String](null)
  .toDF("value")
  .withColumn("struct1", struct('value as "value1"))
  .withColumn("struct2", struct('value as "value1", 'value as "value2"))
  .withColumn("array1", array('value))
  .withColumn("array2", array('value, 'value))

// Show the DataFrame using the "first" codepath.
rows.show(truncate=false)
+-----+-------+-------------+------+--------+
|value|struct1|struct2      |array1|array2  |
+-----+-------+-------------+------+--------+
|null |{ null}|{ null, null}|[]    |[, null]|
+-----+-------+-------------+------+--------+

// Write the DataFrame to disk, then read it back and show it to trigger the "codegen" code path:
rows.write.parquet("rows")
spark.read.parquet("rows").show(truncate=false)

+-----+-------+-------------+-------+-------------+
|value|struct1|struct2      |array1 |array2       |
+-----+-------+-------------+-------+-------------+
|null |{ null}|{ null, null}|[ null]|[ null, null]|
+-----+-------+-------------+-------+-------------+
```

Notice:

1. If the first element of a struct is null, it is printed with a leading space (e.g. "\{ null\}").  I think it's preferable to print it without the leading space (e.g. "\{null\}").  This is consistent with how non-null values are printed inside a struct.
2. If the first element of an array is null, it is not printed at all in the first code path, and the "codegen" code path prints it with a leading space.  I think both code paths should be consistent and print it without a leading space (e.g. "[null]").

The desired result of this PR is to product the following output via both code paths:

```
+-----+-------+------------+------+------------+
|value|struct1|struct2     |array1|array2      |
+-----+-------+------------+------+------------+
|null |{null} |{null, null}|[null]|[null, null]|
+-----+-------+------------+------+------------+
```

This contribution is my original work and I license the work to the project under the project’s open source license.

### Why are the changes needed?

To correct errors and inconsistencies in how DataFrame.show() displays nulls inside arrays and structs.

### Does this PR introduce _any_ user-facing change?

Yes.  This PR changes what is printed out by DataFrame.show().

### How was this patch tested?

I added new test cases in CastSuite.scala to cover the cases addressed by this PR.

Closes #30189 from stwhit/show_nulls.

Authored-by: Stuart White <stuart.white1@gmail.com>
Signed-off-by: Liang-Chi Hsieh <viirya@gmail.com>
---
 .../spark/sql/catalyst/expressions/Cast.scala | 20 +++++++++-------
 .../sql/catalyst/expressions/CastSuite.scala  | 24 +++++++++++++++++++
 2 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 48a9e19c9d953..4af12d61e86d9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -315,7 +315,9 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
         builder.append("[")
         if (array.numElements > 0) {
           val toUTF8String = castToString(et)
-          if (!array.isNullAt(0)) {
+          if (array.isNullAt(0)) {
+            if (!legacyCastToStr) builder.append("null")
+          } else {
             builder.append(toUTF8String(array.get(0, et)).asInstanceOf[UTF8String])
           }
           var i = 1
@@ -376,7 +378,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
           val st = fields.map(_.dataType)
           val toUTF8StringFuncs = st.map(castToString)
           if (row.isNullAt(0)) {
-            if (!legacyCastToStr) builder.append(" null")
+            if (!legacyCastToStr) builder.append("null")
           } else {
             builder.append(toUTF8StringFuncs(0)(row.get(0, st(0))).asInstanceOf[UTF8String])
           }
@@ -898,8 +900,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
     """
   }
 
-  private def outNullElem(buffer: ExprValue): Block = {
-    if (legacyCastToStr) code"" else code"""$buffer.append(" null");"""
+  private def appendIfNotLegacyCastToStr(buffer: ExprValue, s: String): Block = {
+    if (!legacyCastToStr) code"""$buffer.append("$s");""" else EmptyBlock
   }
 
   private def writeArrayToStringBuilder(
@@ -925,14 +927,14 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
        |$buffer.append("[");
        |if ($array.numElements() > 0) {
        |  if ($array.isNullAt(0)) {
-       |    ${outNullElem(buffer)}
+       |    ${appendIfNotLegacyCastToStr(buffer, "null")}
        |  } else {
        |    $buffer.append($elementToStringFunc(${CodeGenerator.getValue(array, et, "0")}));
        |  }
        |  for (int $loopIndex = 1; $loopIndex < $array.numElements(); $loopIndex++) {
        |    $buffer.append(",");
        |    if ($array.isNullAt($loopIndex)) {
-       |      ${outNullElem(buffer)}
+       |      ${appendIfNotLegacyCastToStr(buffer, " null")}
        |    } else {
        |      $buffer.append(" ");
        |      $buffer.append($elementToStringFunc(${CodeGenerator.getValue(array, et, loopIndex)}));
@@ -982,7 +984,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
        |  $buffer.append($keyToStringFunc($getMapFirstKey));
        |  $buffer.append(" ->");
        |  if ($map.valueArray().isNullAt(0)) {
-       |    ${outNullElem(buffer)}
+       |    ${appendIfNotLegacyCastToStr(buffer, " null")}
        |  } else {
        |    $buffer.append(" ");
        |    $buffer.append($valueToStringFunc($getMapFirstValue));
@@ -992,7 +994,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
        |    $buffer.append($keyToStringFunc($getMapKeyArray));
        |    $buffer.append(" ->");
        |    if ($map.valueArray().isNullAt($loopIndex)) {
-       |      ${outNullElem(buffer)}
+       |      ${appendIfNotLegacyCastToStr(buffer, " null")}
        |    } else {
        |      $buffer.append(" ");
        |      $buffer.append($valueToStringFunc($getMapValueArray));
@@ -1016,7 +1018,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
       code"""
          |${if (i != 0) code"""$buffer.append(",");""" else EmptyBlock}
          |if ($row.isNullAt($i)) {
-         |  ${outNullElem(buffer)}
+         |  ${appendIfNotLegacyCastToStr(buffer, if (i == 0) "null" else " null")}
          |} else {
          |  ${if (i != 0) code"""$buffer.append(" ");""" else EmptyBlock}
          |
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index 7caa4a55c06af..61133e2db5cbd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -717,6 +717,17 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(ret8, "[[[a], [b, c]], [[d]]]")
   }
 
+  test("SPARK-33291: Cast array with null elements to string") {
+    Seq(false, true).foreach { omitNull =>
+      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> omitNull.toString) {
+        val ret1 = cast(Literal.create(Array(null, null)), StringType)
+        checkEvaluation(
+          ret1,
+          s"[${if (omitNull) "" else "null"},${if (omitNull) "" else " null"}]")
+      }
+    }
+  }
+
   test("SPARK-22973 Cast map to string") {
     Seq(
       false -> ("{", "}"),
@@ -773,6 +784,19 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
+  test("SPARK-33291: Cast struct with null elements to string") {
+    Seq(
+      false -> ("{", "}"),
+      true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
+      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) {
+        val ret1 = cast(Literal.create(Tuple2[String, String](null, null)), StringType)
+        checkEvaluation(
+          ret1,
+          s"$lb${if (legacyCast) "" else "null"},${if (legacyCast) "" else " null"}$rb")
+      }
+    }
+  }
+
   test("up-cast") {
     def isCastSafe(from: NumericType, to: NumericType): Boolean = (from, to) match {
       case (_, dt: DecimalType) => dt.isWiderThan(from)

From fb9c873e7d5c81f312b26e46df32b1aadc6670b7 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Sat, 7 Nov 2020 06:43:27 +0900
Subject: [PATCH 0409/1009] [SPARK-33347][CORE] Cleanup useless variables of
 MutableApplicationInfo

### What changes were proposed in this pull request?
There are 4 fields in `MutableApplicationInfo ` seems useless:

- `coresGranted`
- `maxCores`
- `coresPerExecutor`
- `memoryPerExecutorMB`

They are always `None` and not reassigned.

So the main change of this pr is  cleanup these useless fields in `MutableApplicationInfo`.

### Why are the changes needed?
Cleanup useless variables.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass the Jenkins or GitHub Action

Closes #30251 from LuciferYang/SPARK-33347.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
---
 .../apache/spark/deploy/history/FsHistoryProvider.scala    | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 400c82c1f9e63..e1b0fc5e45d6e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -1546,14 +1546,9 @@ private[history] class AppListingListener(
   private class MutableApplicationInfo {
     var id: String = null
     var name: String = null
-    var coresGranted: Option[Int] = None
-    var maxCores: Option[Int] = None
-    var coresPerExecutor: Option[Int] = None
-    var memoryPerExecutorMB: Option[Int] = None
 
     def toView(): ApplicationInfoWrapper = {
-      val apiInfo = ApplicationInfo(id, name, coresGranted, maxCores, coresPerExecutor,
-        memoryPerExecutorMB, Nil)
+      val apiInfo = ApplicationInfo(id, name, None, None, None, None, Nil)
       new ApplicationInfoWrapper(apiInfo, List(attempt.toView()))
     }
 

From e11a24c1ba5b0f3116b46a213443902165919da5 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Fri, 6 Nov 2020 15:05:37 -0800
Subject: [PATCH 0410/1009] [SPARK-33371][PYTHON] Update setup.py and tests for
 Python 3.9

### What changes were proposed in this pull request?

This PR proposes to fix PySpark to officially support Python 3.9. The main codes already work. We should just note that we support Python 3.9.

Also, this PR fixes some minor fixes into the test codes.
- `Thread.isAlive` is removed in Python 3.9, and `Thread.is_alive` exists in Python 3.6+, see https://docs.python.org/3/whatsnew/3.9.html#removed
- Fixed `TaskContextTestsWithWorkerReuse.test_barrier_with_python_worker_reuse` and `TaskContextTests.test_barrier` to be less flaky. This becomes more flaky in Python 3.9 for some reasons.

NOTE that PyArrow does not support Python 3.9 yet.

### Why are the changes needed?

To officially support Python 3.9.

### Does this PR introduce _any_ user-facing change?

Yes, it officially supports Python 3.9.

### How was this patch tested?

Manually ran the tests:

```
$  ./run-tests --python-executable=python
Running PySpark tests. Output is in /.../spark/python/unit-tests.log
Will test against the following Python executables: ['python']
Will test the following Python modules: ['pyspark-core', 'pyspark-ml', 'pyspark-mllib', 'pyspark-resource', 'pyspark-sql', 'pyspark-streaming']
python python_implementation is CPython
python version is: Python 3.9.0
Starting test(python): pyspark.ml.tests.test_base
Starting test(python): pyspark.ml.tests.test_evaluation
Starting test(python): pyspark.ml.tests.test_algorithms
Starting test(python): pyspark.ml.tests.test_feature
Finished test(python): pyspark.ml.tests.test_base (12s)
Starting test(python): pyspark.ml.tests.test_image
Finished test(python): pyspark.ml.tests.test_evaluation (15s)
Starting test(python): pyspark.ml.tests.test_linalg
Finished test(python): pyspark.ml.tests.test_feature (25s)
Starting test(python): pyspark.ml.tests.test_param
Finished test(python): pyspark.ml.tests.test_image (17s)
Starting test(python): pyspark.ml.tests.test_persistence
Finished test(python): pyspark.ml.tests.test_param (17s)
Starting test(python): pyspark.ml.tests.test_pipeline
Finished test(python): pyspark.ml.tests.test_linalg (30s)
Starting test(python): pyspark.ml.tests.test_stat
Finished test(python): pyspark.ml.tests.test_pipeline (6s)
Starting test(python): pyspark.ml.tests.test_training_summary
Finished test(python): pyspark.ml.tests.test_stat (12s)
Starting test(python): pyspark.ml.tests.test_tuning
Finished test(python): pyspark.ml.tests.test_algorithms (68s)
Starting test(python): pyspark.ml.tests.test_wrapper
Finished test(python): pyspark.ml.tests.test_persistence (51s)
Starting test(python): pyspark.mllib.tests.test_algorithms
Finished test(python): pyspark.ml.tests.test_training_summary (33s)
Starting test(python): pyspark.mllib.tests.test_feature
Finished test(python): pyspark.ml.tests.test_wrapper (19s)
Starting test(python): pyspark.mllib.tests.test_linalg
Finished test(python): pyspark.mllib.tests.test_feature (26s)
Starting test(python): pyspark.mllib.tests.test_stat
Finished test(python): pyspark.mllib.tests.test_stat (22s)
Starting test(python): pyspark.mllib.tests.test_streaming_algorithms
Finished test(python): pyspark.mllib.tests.test_algorithms (53s)
Starting test(python): pyspark.mllib.tests.test_util
Finished test(python): pyspark.mllib.tests.test_linalg (54s)
Starting test(python): pyspark.sql.tests.test_arrow
Finished test(python): pyspark.sql.tests.test_arrow (0s) ... 61 tests were skipped
Starting test(python): pyspark.sql.tests.test_catalog
Finished test(python): pyspark.mllib.tests.test_util (11s)
Starting test(python): pyspark.sql.tests.test_column
Finished test(python): pyspark.sql.tests.test_catalog (16s)
Starting test(python): pyspark.sql.tests.test_conf
Finished test(python): pyspark.sql.tests.test_column (17s)
Starting test(python): pyspark.sql.tests.test_context
Finished test(python): pyspark.sql.tests.test_context (6s) ... 3 tests were skipped
Starting test(python): pyspark.sql.tests.test_dataframe
Finished test(python): pyspark.sql.tests.test_conf (11s)
Starting test(python): pyspark.sql.tests.test_datasources
Finished test(python): pyspark.sql.tests.test_datasources (19s)
Starting test(python): pyspark.sql.tests.test_functions
Finished test(python): pyspark.sql.tests.test_dataframe (35s) ... 3 tests were skipped
Starting test(python): pyspark.sql.tests.test_group
Finished test(python): pyspark.sql.tests.test_functions (32s)
Starting test(python): pyspark.sql.tests.test_pandas_cogrouped_map
Finished test(python): pyspark.sql.tests.test_pandas_cogrouped_map (1s) ... 15 tests were skipped
Starting test(python): pyspark.sql.tests.test_pandas_grouped_map
Finished test(python): pyspark.sql.tests.test_group (19s)
Starting test(python): pyspark.sql.tests.test_pandas_map
Finished test(python): pyspark.sql.tests.test_pandas_grouped_map (0s) ... 21 tests were skipped
Starting test(python): pyspark.sql.tests.test_pandas_udf
Finished test(python): pyspark.sql.tests.test_pandas_map (0s) ... 6 tests were skipped
Starting test(python): pyspark.sql.tests.test_pandas_udf_grouped_agg
Finished test(python): pyspark.sql.tests.test_pandas_udf (0s) ... 6 tests were skipped
Starting test(python): pyspark.sql.tests.test_pandas_udf_scalar
Finished test(python): pyspark.sql.tests.test_pandas_udf_grouped_agg (0s) ... 13 tests were skipped
Starting test(python): pyspark.sql.tests.test_pandas_udf_typehints
Finished test(python): pyspark.sql.tests.test_pandas_udf_scalar (0s) ... 50 tests were skipped
Starting test(python): pyspark.sql.tests.test_pandas_udf_window
Finished test(python): pyspark.sql.tests.test_pandas_udf_typehints (0s) ... 10 tests were skipped
Starting test(python): pyspark.sql.tests.test_readwriter
Finished test(python): pyspark.sql.tests.test_pandas_udf_window (0s) ... 14 tests were skipped
Starting test(python): pyspark.sql.tests.test_serde
Finished test(python): pyspark.sql.tests.test_serde (19s)
Starting test(python): pyspark.sql.tests.test_session
Finished test(python): pyspark.mllib.tests.test_streaming_algorithms (120s)
Starting test(python): pyspark.sql.tests.test_streaming
Finished test(python): pyspark.sql.tests.test_readwriter (25s)
Starting test(python): pyspark.sql.tests.test_types
Finished test(python): pyspark.ml.tests.test_tuning (208s)
Starting test(python): pyspark.sql.tests.test_udf
Finished test(python): pyspark.sql.tests.test_session (31s)
Starting test(python): pyspark.sql.tests.test_utils
Finished test(python): pyspark.sql.tests.test_streaming (35s)
Starting test(python): pyspark.streaming.tests.test_context
Finished test(python): pyspark.sql.tests.test_types (34s)
Starting test(python): pyspark.streaming.tests.test_dstream
Finished test(python): pyspark.sql.tests.test_utils (14s)
Starting test(python): pyspark.streaming.tests.test_kinesis
Finished test(python): pyspark.streaming.tests.test_kinesis (0s) ... 2 tests were skipped
Starting test(python): pyspark.streaming.tests.test_listener
Finished test(python): pyspark.streaming.tests.test_listener (11s)
Starting test(python): pyspark.tests.test_appsubmit
Finished test(python): pyspark.sql.tests.test_udf (39s)
Starting test(python): pyspark.tests.test_broadcast
Finished test(python): pyspark.streaming.tests.test_context (23s)
Starting test(python): pyspark.tests.test_conf
Finished test(python): pyspark.tests.test_conf (15s)
Starting test(python): pyspark.tests.test_context
Finished test(python): pyspark.tests.test_broadcast (33s)
Starting test(python): pyspark.tests.test_daemon
Finished test(python): pyspark.tests.test_daemon (5s)
Starting test(python): pyspark.tests.test_install_spark
Finished test(python): pyspark.tests.test_context (44s)
Starting test(python): pyspark.tests.test_join
Finished test(python): pyspark.tests.test_appsubmit (68s)
Starting test(python): pyspark.tests.test_profiler
Finished test(python): pyspark.tests.test_join (7s)
Starting test(python): pyspark.tests.test_rdd
Finished test(python): pyspark.tests.test_profiler (9s)
Starting test(python): pyspark.tests.test_rddbarrier
Finished test(python): pyspark.tests.test_rddbarrier (7s)
Starting test(python): pyspark.tests.test_readwrite
Finished test(python): pyspark.streaming.tests.test_dstream (107s)
Starting test(python): pyspark.tests.test_serializers
Finished test(python): pyspark.tests.test_serializers (8s)
Starting test(python): pyspark.tests.test_shuffle
Finished test(python): pyspark.tests.test_readwrite (14s)
Starting test(python): pyspark.tests.test_taskcontext
Finished test(python): pyspark.tests.test_install_spark (65s)
Starting test(python): pyspark.tests.test_util
Finished test(python): pyspark.tests.test_shuffle (8s)
Starting test(python): pyspark.tests.test_worker
Finished test(python): pyspark.tests.test_util (5s)
Starting test(python): pyspark.accumulators
Finished test(python): pyspark.accumulators (5s)
Starting test(python): pyspark.broadcast
Finished test(python): pyspark.broadcast (6s)
Starting test(python): pyspark.conf
Finished test(python): pyspark.tests.test_worker (14s)
Starting test(python): pyspark.context
Finished test(python): pyspark.conf (4s)
Starting test(python): pyspark.ml.classification
Finished test(python): pyspark.tests.test_rdd (60s)
Starting test(python): pyspark.ml.clustering
Finished test(python): pyspark.context (21s)
Starting test(python): pyspark.ml.evaluation
Finished test(python): pyspark.tests.test_taskcontext (69s)
Starting test(python): pyspark.ml.feature
Finished test(python): pyspark.ml.evaluation (26s)
Starting test(python): pyspark.ml.fpm
Finished test(python): pyspark.ml.clustering (45s)
Starting test(python): pyspark.ml.functions
Finished test(python): pyspark.ml.fpm (24s)
Starting test(python): pyspark.ml.image
Finished test(python): pyspark.ml.functions (17s)
Starting test(python): pyspark.ml.linalg.__init__
Finished test(python): pyspark.ml.linalg.__init__ (0s)
Starting test(python): pyspark.ml.recommendation
Finished test(python): pyspark.ml.classification (74s)
Starting test(python): pyspark.ml.regression
Finished test(python): pyspark.ml.image (8s)
Starting test(python): pyspark.ml.stat
Finished test(python): pyspark.ml.stat (29s)
Starting test(python): pyspark.ml.tuning
Finished test(python): pyspark.ml.regression (53s)
Starting test(python): pyspark.mllib.classification
Finished test(python): pyspark.ml.tuning (35s)
Starting test(python): pyspark.mllib.clustering
Finished test(python): pyspark.ml.feature (103s)
Starting test(python): pyspark.mllib.evaluation
Finished test(python): pyspark.mllib.classification (33s)
Starting test(python): pyspark.mllib.feature
Finished test(python): pyspark.mllib.evaluation (21s)
Starting test(python): pyspark.mllib.fpm
Finished test(python): pyspark.ml.recommendation (103s)
Starting test(python): pyspark.mllib.linalg.__init__
Finished test(python): pyspark.mllib.linalg.__init__ (1s)
Starting test(python): pyspark.mllib.linalg.distributed
Finished test(python): pyspark.mllib.feature (26s)
Starting test(python): pyspark.mllib.random
Finished test(python): pyspark.mllib.fpm (23s)
Starting test(python): pyspark.mllib.recommendation
Finished test(python): pyspark.mllib.clustering (50s)
Starting test(python): pyspark.mllib.regression
Finished test(python): pyspark.mllib.random (13s)
Starting test(python): pyspark.mllib.stat.KernelDensity
Finished test(python): pyspark.mllib.stat.KernelDensity (1s)
Starting test(python): pyspark.mllib.stat._statistics
Finished test(python): pyspark.mllib.linalg.distributed (42s)
Starting test(python): pyspark.mllib.tree
Finished test(python): pyspark.mllib.stat._statistics (19s)
Starting test(python): pyspark.mllib.util
Finished test(python): pyspark.mllib.regression (33s)
Starting test(python): pyspark.profiler
Finished test(python): pyspark.mllib.recommendation (36s)
Starting test(python): pyspark.rdd
Finished test(python): pyspark.profiler (9s)
Starting test(python): pyspark.resource.tests.test_resources
Finished test(python): pyspark.mllib.tree (19s)
Starting test(python): pyspark.serializers
Finished test(python): pyspark.mllib.util (21s)
Starting test(python): pyspark.shuffle
Finished test(python): pyspark.resource.tests.test_resources (9s)
Starting test(python): pyspark.sql.avro.functions
Finished test(python): pyspark.shuffle (1s)
Starting test(python): pyspark.sql.catalog
Finished test(python): pyspark.rdd (22s)
Starting test(python): pyspark.sql.column
Finished test(python): pyspark.serializers (12s)
Starting test(python): pyspark.sql.conf
Finished test(python): pyspark.sql.conf (6s)
Starting test(python): pyspark.sql.context
Finished test(python): pyspark.sql.catalog (14s)
Starting test(python): pyspark.sql.dataframe
Finished test(python): pyspark.sql.avro.functions (15s)
Starting test(python): pyspark.sql.functions
Finished test(python): pyspark.sql.column (24s)
Starting test(python): pyspark.sql.group
Finished test(python): pyspark.sql.context (20s)
Starting test(python): pyspark.sql.pandas.conversion
Finished test(python): pyspark.sql.pandas.conversion (13s)
Starting test(python): pyspark.sql.pandas.group_ops
Finished test(python): pyspark.sql.group (36s)
Starting test(python): pyspark.sql.pandas.map_ops
Finished test(python): pyspark.sql.pandas.group_ops (21s)
Starting test(python): pyspark.sql.pandas.serializers
Finished test(python): pyspark.sql.pandas.serializers (0s)
Starting test(python): pyspark.sql.pandas.typehints
Finished test(python): pyspark.sql.pandas.typehints (0s)
Starting test(python): pyspark.sql.pandas.types
Finished test(python): pyspark.sql.pandas.types (0s)
Starting test(python): pyspark.sql.pandas.utils
Finished test(python): pyspark.sql.pandas.utils (0s)
Starting test(python): pyspark.sql.readwriter
Finished test(python): pyspark.sql.dataframe (56s)
Starting test(python): pyspark.sql.session
Finished test(python): pyspark.sql.functions (57s)
Starting test(python): pyspark.sql.streaming
Finished test(python): pyspark.sql.pandas.map_ops (12s)
Starting test(python): pyspark.sql.types
Finished test(python): pyspark.sql.types (10s)
Starting test(python): pyspark.sql.udf
Finished test(python): pyspark.sql.streaming (16s)
Starting test(python): pyspark.sql.window
Finished test(python): pyspark.sql.session (19s)
Starting test(python): pyspark.streaming.util
Finished test(python): pyspark.streaming.util (0s)
Starting test(python): pyspark.util
Finished test(python): pyspark.util (0s)
Finished test(python): pyspark.sql.readwriter (24s)
Finished test(python): pyspark.sql.udf (13s)
Finished test(python): pyspark.sql.window (14s)
Tests passed in 780 seconds

```

Closes #30277 from HyukjinKwon/SPARK-33371.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 python/pyspark/tests/test_taskcontext.py | 8 ++++----
 python/pyspark/tests/test_worker.py      | 2 +-
 python/setup.py                          | 1 +
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/python/pyspark/tests/test_taskcontext.py b/python/pyspark/tests/test_taskcontext.py
index f0e6672957c13..f6e275abfb1e7 100644
--- a/python/pyspark/tests/test_taskcontext.py
+++ b/python/pyspark/tests/test_taskcontext.py
@@ -124,12 +124,12 @@ def f(iterator):
 
         def context_barrier(x):
             tc = BarrierTaskContext.get()
-            time.sleep(random.randint(1, 10))
+            time.sleep(random.randint(1, 5) * 2)
             tc.barrier()
             return time.time()
 
         times = rdd.barrier().mapPartitions(f).map(context_barrier).collect()
-        self.assertTrue(max(times) - min(times) < 1)
+        self.assertTrue(max(times) - min(times) < 2)
 
     def test_all_gather(self):
         """
@@ -232,7 +232,7 @@ def f(iterator):
 
         def context_barrier(x):
             tc = BarrierTaskContext.get()
-            time.sleep(random.randint(1, 10))
+            time.sleep(random.randint(1, 5) * 2)
             tc.barrier()
             return (time.time(), os.getpid())
 
@@ -240,7 +240,7 @@ def context_barrier(x):
         times = list(map(lambda x: x[0], result))
         pids = list(map(lambda x: x[1], result))
         # check both barrier and worker reuse effect
-        self.assertTrue(max(times) - min(times) < 1)
+        self.assertTrue(max(times) - min(times) < 2)
         for pid in pids:
             self.assertTrue(pid in worker_pids)
 
diff --git a/python/pyspark/tests/test_worker.py b/python/pyspark/tests/test_worker.py
index bfaf3a3186cad..8039c0661dd0b 100644
--- a/python/pyspark/tests/test_worker.py
+++ b/python/pyspark/tests/test_worker.py
@@ -134,7 +134,7 @@ def count():
         t.daemon = True
         t.start()
         t.join(5)
-        self.assertTrue(not t.isAlive())
+        self.assertTrue(not t.is_alive())
         self.assertEqual(100000, rdd.count())
 
     def test_with_different_versions_of_python(self):
diff --git a/python/setup.py b/python/setup.py
index 8d9cf2ee5459a..f5836ecf5fbfc 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -266,6 +266,7 @@ def run(self):
             'Programming Language :: Python :: 3.6',
             'Programming Language :: Python :: 3.7',
             'Programming Language :: Python :: 3.8',
+            'Programming Language :: Python :: 3.9',
             'Programming Language :: Python :: Implementation :: CPython',
             'Programming Language :: Python :: Implementation :: PyPy',
             'Typing :: Typed'],

From 1090b1b00a4aa6168fd5b69f227f28309c42b6fd Mon Sep 17 00:00:00 2001
From: Hannah Amundson <amundson.hannah@heb.com>
Date: Sun, 8 Nov 2020 20:29:24 +0900
Subject: [PATCH 0411/1009] [SPARK-32860][DOCS][SQL] Updating documentation
 about map support in Encoders

### What changes were proposed in this pull request?

Javadocs updated for the encoder to include maps as a collection type

### Why are the changes needed?

The javadocs were not updated with fix SPARK-16706

### Does this PR introduce _any_ user-facing change?

Yes, the javadocs are updated

### How was this patch tested?

sbt was run to ensure it meets scalastyle

Closes #30274 from hannahkamundson/SPARK-32860.

Lead-authored-by: Hannah Amundson <amundson.hannah@heb.com>
Co-authored-by: Hannah <48397717+hannahkamundson@users.noreply.github.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
index 5d31b5bbf12af..24045b5a43a64 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
@@ -146,7 +146,7 @@ object Encoders {
    *  - String
    *  - java.math.BigDecimal, java.math.BigInteger
    *  - time related: java.sql.Date, java.sql.Timestamp, java.time.LocalDate, java.time.Instant
-   *  - collection types: only array and java.util.List currently, map support is in progress
+   *  - collection types: array, java.util.List, and map
    *  - nested java bean.
    *
    * @since 1.6.0

From 02fd52cfbc8989a41f69bafd7d432ec3a365c138 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Sun, 8 Nov 2020 12:51:48 -0600
Subject: [PATCH 0412/1009] [SPARK-33352][CORE][SQL][SS][MLLIB][AVRO][K8S] Fix
 procedure-like declaration compilation warnings in Scala 2.13
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
There are two similar compilation warnings about procedure-like declaration in Scala 2.13:

```
[WARNING] [Warn] /spark/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala:70: procedure syntax is deprecated for constructors: add `=`, as in method definition
```
and

```
[WARNING] [Warn] /spark/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala:211: procedure syntax is deprecated: instead, add `: Unit =` to explicitly declare `run`'s return type
```

this pr is the first part to resolve SPARK-33352：

- For constructors method definition add `=` to convert to function syntax

- For without `return type` methods definition add `: Unit =` to convert to function syntax

### Why are the changes needed?
Eliminate compilation warnings in Scala 2.13 and this change should be compatible with Scala 2.12

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass the Jenkins or GitHub Action

Closes #30255 from LuciferYang/SPARK-29392-FOLLOWUP.1.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../scala/org/apache/spark/HeartbeatReceiver.scala   |  2 +-
 .../main/scala/org/apache/spark/TaskEndReason.scala  |  4 ++--
 .../org/apache/spark/executor/ExecutorMetrics.scala  |  6 +++---
 .../org/apache/spark/rdd/InputFileBlockHolder.scala  |  2 +-
 .../org/apache/spark/rdd/LocalCheckpointRDD.scala    |  2 +-
 .../scala/org/apache/spark/scheduler/MapStatus.scala |  2 +-
 .../org/apache/spark/scheduler/ShuffleMapTask.scala  |  2 +-
 .../cluster/StandaloneSchedulerBackend.scala         |  3 ++-
 .../apache/spark/shuffle/FetchFailedException.scala  |  2 +-
 .../spark/storage/BlockManagerDecommissioner.scala   |  2 +-
 .../org/apache/spark/storage/StorageLevel.scala      |  2 +-
 .../org/apache/spark/storage/StorageUtils.scala      |  2 +-
 .../apache/spark/util/UninterruptibleThread.scala    |  2 +-
 .../util/collection/ExternalAppendOnlyMap.scala      |  2 +-
 .../executor/CoarseGrainedExecutorBackendSuite.scala |  2 +-
 .../spark/resource/ResourceProfileManagerSuite.scala |  4 ++--
 .../apache/spark/resource/ResourceProfileSuite.scala |  4 ++--
 .../util/SparkUncaughtExceptionHandlerSuite.scala    |  2 +-
 .../org/apache/spark/sql/avro/AvroDeserializer.scala |  2 +-
 .../org/apache/spark/sql/avro/AvroSerializer.scala   |  2 +-
 .../spark/sql/jdbc/DockerJDBCIntegrationSuite.scala  |  4 ++--
 .../org/apache/spark/metrics/sink/GangliaSink.scala  |  6 +++---
 .../scala/org/apache/spark/ml/stat/Summarizer.scala  |  2 +-
 .../apache/spark/mllib/feature/ChiSqSelector.scala   |  2 +-
 .../apache/spark/mllib/feature/StandardScaler.scala  |  2 +-
 .../spark/mllib/tree/configuration/Strategy.scala    |  4 ++--
 .../spark/deploy/k8s/integrationtest/Utils.scala     |  6 +++---
 .../catalyst/analysis/AlreadyExistException.scala    |  2 +-
 .../spark/sql/catalyst/catalog/SessionCatalog.scala  |  4 ++--
 .../catalyst/expressions/datetimeExpressions.scala   |  2 +-
 .../scala/org/apache/spark/sql/SparkSession.scala    |  2 +-
 .../execution/ExternalAppendOnlyUnsafeRowArray.scala |  2 +-
 .../apache/spark/sql/execution/command/views.scala   |  2 +-
 .../datasources/parquet/ParquetReadSupport.scala     |  2 +-
 .../org/apache/spark/sql/streaming/progress.scala    |  2 +-
 .../org/apache/spark/sql/test/TestSQLContext.scala   |  4 ++--
 .../sql/hive/thriftserver/SparkSQLCLIDriver.scala    |  2 +-
 .../sql/hive/thriftserver/HiveSessionImplSuite.scala |  2 +-
 .../org/apache/spark/sql/hive/test/TestHive.scala    | 12 ++++++------
 39 files changed, 58 insertions(+), 57 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
index c99698f99d904..233ad884a721a 100644
--- a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
+++ b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
@@ -67,7 +67,7 @@ private[spark] case class HeartbeatResponse(reregisterBlockManager: Boolean)
 private[spark] class HeartbeatReceiver(sc: SparkContext, clock: Clock)
   extends SparkListener with ThreadSafeRpcEndpoint with Logging {
 
-  def this(sc: SparkContext) {
+  def this(sc: SparkContext) = {
     this(sc, new SystemClock)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/TaskEndReason.scala b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
index b304eb97fbdf6..5dc70e9834b0b 100644
--- a/core/src/main/scala/org/apache/spark/TaskEndReason.scala
+++ b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
@@ -143,12 +143,12 @@ case class ExceptionFailure(
   private[spark] def this(
       e: Throwable,
       accumUpdates: Seq[AccumulableInfo],
-      preserveCause: Boolean) {
+      preserveCause: Boolean) = {
     this(e.getClass.getName, e.getMessage, e.getStackTrace, Utils.exceptionString(e),
       if (preserveCause) Some(new ThrowableSerializationWrapper(e)) else None, accumUpdates)
   }
 
-  private[spark] def this(e: Throwable, accumUpdates: Seq[AccumulableInfo]) {
+  private[spark] def this(e: Throwable, accumUpdates: Seq[AccumulableInfo]) = {
     this(e, accumUpdates, preserveCause = true)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorMetrics.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorMetrics.scala
index d9aa3ef60fc9e..486e59652218b 100644
--- a/core/src/main/scala/org/apache/spark/executor/ExecutorMetrics.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorMetrics.scala
@@ -44,12 +44,12 @@ class ExecutorMetrics private[spark] extends Serializable {
   /** Returns true if the values for the metrics have been set, false otherwise. */
   def isSet(): Boolean = metrics(0) > -1
 
-  private[spark] def this(metrics: Array[Long]) {
+  private[spark] def this(metrics: Array[Long]) = {
     this()
     Array.copy(metrics, 0, this.metrics, 0, Math.min(metrics.size, this.metrics.size))
   }
 
-  private[spark] def this(metrics: AtomicLongArray) {
+  private[spark] def this(metrics: AtomicLongArray) = {
     this()
     ExecutorMetricType.metricToOffset.foreach { case (_, i) =>
       this.metrics(i) = metrics.get(i)
@@ -61,7 +61,7 @@ class ExecutorMetrics private[spark] extends Serializable {
    *
    * @param executorMetrics map of executor metric name to value
    */
-  private[spark] def this(executorMetrics: Map[String, Long]) {
+  private[spark] def this(executorMetrics: Map[String, Long]) = {
     this()
     ExecutorMetricType.metricToOffset.foreach { case (name, idx) =>
       metrics(idx) = executorMetrics.getOrElse(name, 0L)
diff --git a/core/src/main/scala/org/apache/spark/rdd/InputFileBlockHolder.scala b/core/src/main/scala/org/apache/spark/rdd/InputFileBlockHolder.scala
index 1beb085db27d9..8230144025feb 100644
--- a/core/src/main/scala/org/apache/spark/rdd/InputFileBlockHolder.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/InputFileBlockHolder.scala
@@ -34,7 +34,7 @@ private[spark] object InputFileBlockHolder {
    * @param length size of the block, in bytes, or -1 if not available.
    */
   private class FileBlock(val filePath: UTF8String, val startOffset: Long, val length: Long) {
-    def this() {
+    def this() = {
       this(UTF8String.fromString(""), -1, -1)
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/rdd/LocalCheckpointRDD.scala b/core/src/main/scala/org/apache/spark/rdd/LocalCheckpointRDD.scala
index 503aa0dffc9f3..113ed2db7f546 100644
--- a/core/src/main/scala/org/apache/spark/rdd/LocalCheckpointRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/LocalCheckpointRDD.scala
@@ -40,7 +40,7 @@ private[spark] class LocalCheckpointRDD[T: ClassTag](
     numPartitions: Int)
   extends CheckpointRDD[T](sc) {
 
-  def this(rdd: RDD[T]) {
+  def this(rdd: RDD[T]) = {
     this(rdd.context, rdd.id, rdd.partitions.length)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
index cfc2e141290c4..1239c32cee3ab 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
@@ -123,7 +123,7 @@ private[spark] class CompressedMapStatus(
   // For deserialization only
   protected def this() = this(null, null.asInstanceOf[Array[Byte]], -1)
 
-  def this(loc: BlockManagerId, uncompressedSizes: Array[Long], mapTaskId: Long) {
+  def this(loc: BlockManagerId, uncompressedSizes: Array[Long], mapTaskId: Long) = {
     this(loc, uncompressedSizes.map(MapStatus.compressSize), mapTaskId)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
index a0ba9208ea647..89db3a86f4ce8 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
@@ -66,7 +66,7 @@ private[spark] class ShuffleMapTask(
   with Logging {
 
   /** A constructor used only in test suites. This does not require passing in an RDD. */
-  def this(partitionId: Int) {
+  def this(partitionId: Int) = {
     this(0, 0, null, new Partition { override def index: Int = 0 }, null, new Properties, null)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
index b9ac8d2ba2784..c14b2d4e5df31 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
@@ -177,7 +177,8 @@ private[spark] class StandaloneSchedulerBackend(
     removeExecutor(fullId.split("/")(1), reason)
   }
 
-  override def executorDecommissioned(fullId: String, decommissionInfo: ExecutorDecommissionInfo) {
+  override def executorDecommissioned(fullId: String,
+      decommissionInfo: ExecutorDecommissionInfo): Unit = {
     logInfo(s"Asked to decommission executor $fullId")
     val execId = fullId.split("/")(1)
     decommissionExecutors(
diff --git a/core/src/main/scala/org/apache/spark/shuffle/FetchFailedException.scala b/core/src/main/scala/org/apache/spark/shuffle/FetchFailedException.scala
index 6509a04dc4893..208c676a1c352 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/FetchFailedException.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/FetchFailedException.scala
@@ -48,7 +48,7 @@ private[spark] class FetchFailedException(
       mapTaskId: Long,
       mapIndex: Int,
       reduceId: Int,
-      cause: Throwable) {
+      cause: Throwable) = {
     this(bmAddress, shuffleId, mapTaskId, mapIndex, reduceId, cause.getMessage, cause)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
index d1e89418a4897..9129e8012dc59 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
@@ -208,7 +208,7 @@ private[storage] class BlockManagerDecommissioner(
   private val shuffleBlockMigrationRefreshRunnable = new Runnable {
     val sleepInterval = conf.get(config.STORAGE_DECOMMISSION_REPLICATION_REATTEMPT_INTERVAL)
 
-    override def run() {
+    override def run(): Unit = {
       assert(conf.get(config.STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED))
       while (!stopped && !stoppedShuffle && !Thread.interrupted()) {
         try {
diff --git a/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala b/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala
index f6db73ba805b1..ce89c2ae90b49 100644
--- a/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala
+++ b/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala
@@ -45,7 +45,7 @@ class StorageLevel private(
   extends Externalizable {
 
   // TODO: Also add fields for caching priority, dataset ID, and flushing.
-  private def this(flags: Int, replication: Int) {
+  private def this(flags: Int, replication: Int) = {
     this((flags & 8) != 0, (flags & 4) != 0, (flags & 2) != 0, (flags & 1) != 0, replication)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala b/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
index fc426eee608c0..147731a0fb547 100644
--- a/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
+++ b/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
@@ -61,7 +61,7 @@ private[spark] class StorageStatus(
       maxMemory: Long,
       maxOnHeapMem: Option[Long],
       maxOffHeapMem: Option[Long],
-      initialBlocks: Map[BlockId, BlockStatus]) {
+      initialBlocks: Map[BlockId, BlockStatus]) = {
     this(bmid, maxMemory, maxOnHeapMem, maxOffHeapMem)
     initialBlocks.foreach { case (bid, bstatus) => addBlock(bid, bstatus) }
   }
diff --git a/core/src/main/scala/org/apache/spark/util/UninterruptibleThread.scala b/core/src/main/scala/org/apache/spark/util/UninterruptibleThread.scala
index 6a58ec142dd7f..24788d69121b2 100644
--- a/core/src/main/scala/org/apache/spark/util/UninterruptibleThread.scala
+++ b/core/src/main/scala/org/apache/spark/util/UninterruptibleThread.scala
@@ -31,7 +31,7 @@ private[spark] class UninterruptibleThread(
     target: Runnable,
     name: String) extends Thread(target, name) {
 
-  def this(name: String) {
+  def this(name: String) = {
     this(null, name)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
index 7f40b469a95e9..731131b688ca7 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
@@ -76,7 +76,7 @@ class ExternalAppendOnlyMap[K, V, C](
       mergeValue: (C, V) => C,
       mergeCombiners: (C, C) => C,
       serializer: Serializer,
-      blockManager: BlockManager) {
+      blockManager: BlockManager) = {
     this(createCombiner, mergeValue, mergeCombiners, serializer, blockManager, TaskContext.get())
   }
 
diff --git a/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala b/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala
index e0b586074b89e..319dcfeecee24 100644
--- a/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala
+++ b/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala
@@ -106,7 +106,7 @@ class CoarseGrainedExecutorBackendSuite extends SparkFunSuite
     testParsingMultipleResources(conf, ResourceProfile.getOrCreateDefaultProfile(conf))
   }
 
-  def testParsingMultipleResources(conf: SparkConf, resourceProfile: ResourceProfile) {
+  def testParsingMultipleResources(conf: SparkConf, resourceProfile: ResourceProfile): Unit = {
     val serializer = new JavaSerializer(conf)
     val env = createMockEnv(conf, serializer)
     // we don't really use this, just need it to get at the parser function
diff --git a/core/src/test/scala/org/apache/spark/resource/ResourceProfileManagerSuite.scala b/core/src/test/scala/org/apache/spark/resource/ResourceProfileManagerSuite.scala
index f4521738c4870..ddfe80ee81e6f 100644
--- a/core/src/test/scala/org/apache/spark/resource/ResourceProfileManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/resource/ResourceProfileManagerSuite.scala
@@ -24,7 +24,7 @@ import org.apache.spark.scheduler.LiveListenerBus
 
 class ResourceProfileManagerSuite extends SparkFunSuite {
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     try {
       ResourceProfile.clearDefaultProfile()
     } finally {
@@ -32,7 +32,7 @@ class ResourceProfileManagerSuite extends SparkFunSuite {
     }
   }
 
-  override def afterEach() {
+  override def afterEach(): Unit = {
     try {
       ResourceProfile.clearDefaultProfile()
     } finally {
diff --git a/core/src/test/scala/org/apache/spark/resource/ResourceProfileSuite.scala b/core/src/test/scala/org/apache/spark/resource/ResourceProfileSuite.scala
index d0479ca7db40c..f8c4a3a68f367 100644
--- a/core/src/test/scala/org/apache/spark/resource/ResourceProfileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/resource/ResourceProfileSuite.scala
@@ -24,7 +24,7 @@ import org.apache.spark.resource.TestResourceIDs._
 
 class ResourceProfileSuite extends SparkFunSuite {
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     try {
       ResourceProfile.clearDefaultProfile()
     } finally {
@@ -32,7 +32,7 @@ class ResourceProfileSuite extends SparkFunSuite {
     }
   }
 
-  override def afterEach() {
+  override def afterEach(): Unit = {
     try {
       ResourceProfile.clearDefaultProfile()
     } finally {
diff --git a/core/src/test/scala/org/apache/spark/util/SparkUncaughtExceptionHandlerSuite.scala b/core/src/test/scala/org/apache/spark/util/SparkUncaughtExceptionHandlerSuite.scala
index 90741a6bde7f0..9e23b25493dfe 100644
--- a/core/src/test/scala/org/apache/spark/util/SparkUncaughtExceptionHandlerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/SparkUncaughtExceptionHandlerSuite.scala
@@ -80,7 +80,7 @@ object ThrowableThrower {
 
   // a thread that uses SparkUncaughtExceptionHandler and throws a Throwable by name
   class ThrowerThread(name: String, exitOnUncaughtException: Boolean) extends Thread {
-    override def run() {
+    override def run(): Unit = {
       Thread.setDefaultUncaughtExceptionHandler(
         new SparkUncaughtExceptionHandler(exitOnUncaughtException))
       throw ThrowableTypes.getThrowableByName(name)
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
index aabf9d92ce7d8..85416b80cfbb7 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
@@ -48,7 +48,7 @@ private[sql] class AvroDeserializer(
     datetimeRebaseMode: LegacyBehaviorPolicy.Value,
     filters: StructFilters) {
 
-  def this(rootAvroType: Schema, rootCatalystType: DataType) {
+  def this(rootAvroType: Schema, rootCatalystType: DataType) = {
     this(
       rootAvroType,
       rootCatalystType,
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala
index 0ea95d1c0db5d..33c6022ff7b6d 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala
@@ -49,7 +49,7 @@ private[sql] class AvroSerializer(
     nullable: Boolean,
     datetimeRebaseMode: LegacyBehaviorPolicy.Value) extends Logging {
 
-  def this(rootCatalystType: DataType, rootAvroType: Schema, nullable: Boolean) {
+  def this(rootCatalystType: DataType, rootAvroType: Schema, nullable: Boolean) = {
     this(rootCatalystType, rootAvroType, nullable,
       LegacyBehaviorPolicy.withName(SQLConf.get.getConf(
         SQLConf.LEGACY_AVRO_REBASE_MODE_IN_WRITE)))
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
index 24927da16d50c..ad6a829fffd0d 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
@@ -108,7 +108,7 @@ abstract class DockerJDBCIntegrationSuite extends SharedSparkSession with Eventu
   private var containerId: String = _
   protected var jdbcUrl: String = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     try {
       docker = DefaultDockerClient.fromEnv.build()
@@ -174,7 +174,7 @@ abstract class DockerJDBCIntegrationSuite extends SharedSparkSession with Eventu
     }
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       if (docker != null) {
         try {
diff --git a/external/spark-ganglia-lgpl/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala b/external/spark-ganglia-lgpl/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala
index 4fb9f2f849085..7266187597589 100644
--- a/external/spark-ganglia-lgpl/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala
+++ b/external/spark-ganglia-lgpl/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala
@@ -81,15 +81,15 @@ class GangliaSink(val property: Properties, val registry: MetricRegistry,
       .withDMax(dmax)
       .build(ganglia)
 
-  override def start() {
+  override def start(): Unit = {
     reporter.start(pollPeriod, pollUnit)
   }
 
-  override def stop() {
+  override def stop(): Unit = {
     reporter.stop()
   }
 
-  override def report() {
+  override def report(): Unit = {
     reporter.report()
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
index 4db518bd4f9ba..397dbb28f8e3e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
@@ -435,7 +435,7 @@ private[spark] class SummarizerBuffer(
   private var currMax: Array[Double] = null
   private var currMin: Array[Double] = null
 
-  def this() {
+  def this() = {
     this(
       Seq(
         SummaryBuilderImpl.Mean,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
index d970c3c3d6131..70125d2c4c6af 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
@@ -195,7 +195,7 @@ class ChiSqSelector @Since("2.1.0") () extends Serializable {
    * The is the same to call this() and setNumTopFeatures(numTopFeatures)
    */
   @Since("1.3.0")
-  def this(numTopFeatures: Int) {
+  def this(numTopFeatures: Int) = {
     this()
     this.numTopFeatures = numTopFeatures
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
index 78c974e22f2cf..8f9d6d07a4c36 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
@@ -83,7 +83,7 @@ class StandardScalerModel @Since("1.3.0") (
   /**
    */
   @Since("1.3.0")
-  def this(std: Vector, mean: Vector) {
+  def this(std: Vector, mean: Vector) = {
     this(std, mean, withStd = std != null, withMean = mean != null)
     require(this.withStd || this.withMean,
       "at least one of std or mean vectors must be provided")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
index 09e3e22030546..0f6c7033687fa 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala
@@ -116,7 +116,7 @@ class Strategy @Since("1.3.0") (
       maxMemoryInMB: Int,
       subsamplingRate: Double,
       useNodeIdCache: Boolean,
-      checkpointInterval: Int) {
+      checkpointInterval: Int) = {
     this(algo, impurity, maxDepth, numClasses, maxBins, quantileCalculationStrategy,
       categoricalFeaturesInfo, minInstancesPerNode, minInfoGain, maxMemoryInMB,
       subsamplingRate, useNodeIdCache, checkpointInterval, 0.0)
@@ -133,7 +133,7 @@ class Strategy @Since("1.3.0") (
       maxDepth: Int,
       numClasses: Int,
       maxBins: Int,
-      categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer]) {
+      categoricalFeaturesInfo: java.util.Map[java.lang.Integer, java.lang.Integer]) = {
     this(algo, impurity, maxDepth, numClasses, maxBins, Sort,
       categoricalFeaturesInfo.asInstanceOf[java.util.Map[Int, Int]].asScala.toMap,
       minWeightFractionPerNode = 0.0)
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
index 0000a94725763..9bcd6e9503532 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
@@ -60,15 +60,15 @@ object Utils extends Logging {
       val openLatch: CountDownLatch = new CountDownLatch(1)
       val closeLatch: CountDownLatch = new CountDownLatch(1)
 
-      override def onOpen(response: Response) {
+      override def onOpen(response: Response): Unit = {
         openLatch.countDown()
       }
 
-      override def onClose(a: Int, b: String) {
+      override def onClose(a: Int, b: String): Unit = {
         closeLatch.countDown()
       }
 
-      override def onFailure(e: Throwable, r: Response) {
+      override def onFailure(e: Throwable, r: Response): Unit = {
       }
 
       def waitForInputStreamToConnect(): Unit = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala
index c50ba623c27b2..70f821d5f8af0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala
@@ -64,7 +64,7 @@ class PartitionAlreadyExistsException(message: String) extends AnalysisException
 }
 
 class PartitionsAlreadyExistException(message: String) extends AnalysisException(message) {
-  def this(db: String, table: String, specs: Seq[TablePartitionSpec]) {
+  def this(db: String, table: String, specs: Seq[TablePartitionSpec]) = {
     this(s"The following partitions already exists in table '$table' database '$db':\n"
       + specs.mkString("\n===\n"))
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 4865629329831..fa5634935ff29 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -72,7 +72,7 @@ class SessionCatalog(
   def this(
       externalCatalog: ExternalCatalog,
       functionRegistry: FunctionRegistry,
-      conf: SQLConf) {
+      conf: SQLConf) = {
     this(
       () => externalCatalog,
       () => new GlobalTempViewManager(conf.getConf(GLOBAL_TEMP_DATABASE)),
@@ -84,7 +84,7 @@ class SessionCatalog(
   }
 
   // For testing only.
-  def this(externalCatalog: ExternalCatalog) {
+  def this(externalCatalog: ExternalCatalog) = {
     this(
       externalCatalog,
       new SimpleFunctionRegistry,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 223d0e661ed3e..97aacb3f7530c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -1421,7 +1421,7 @@ case class MonthsBetween(
 case class ParseToDate(left: Expression, format: Option[Expression], child: Expression)
   extends RuntimeReplaceable {
 
-  def this(left: Expression, format: Expression) {
+  def this(left: Expression, format: Expression) = {
     this(left, Option(format), Cast(GetTimestamp(left, format), DateType))
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index c4aadfb1d66bd..592f209475baf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -94,7 +94,7 @@ class SparkSession private(
    * since that would cause every new session to reinvoke Spark Session Extensions on the currently
    * running extensions.
    */
-  private[sql] def this(sc: SparkContext) {
+  private[sql] def this(sc: SparkContext) = {
     this(sc, None, None,
       SparkSession.applyExtensions(
         sc.getConf.get(StaticSQLConf.SPARK_SESSION_EXTENSIONS).getOrElse(Seq.empty),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala
index ac282ea2e94f5..993627847c08c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala
@@ -52,7 +52,7 @@ private[sql] class ExternalAppendOnlyUnsafeRowArray(
     numRowsInMemoryBufferThreshold: Int,
     numRowsSpillThreshold: Int) extends Logging {
 
-  def this(numRowsInMemoryBufferThreshold: Int, numRowsSpillThreshold: Int) {
+  def this(numRowsInMemoryBufferThreshold: Int, numRowsSpillThreshold: Int) = {
     this(
       TaskContext.get().taskMemoryManager(),
       SparkEnv.get.blockManager,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index bcc0e1fd82d7a..43bc50522f2a8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -173,7 +173,7 @@ case class CreateViewCommand(
       // added/generated from a temporary view.
       // 2) The temp functions are represented by multiple classes. Most are inaccessible from this
       // package (e.g., HiveGenericUDF).
-      def verify(child: LogicalPlan) {
+      def verify(child: LogicalPlan): Unit = {
         child.collect {
           // Disallow creating permanent views based on temporary views.
           case UnresolvedRelation(nameParts, _, _) if catalog.isTempView(nameParts) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
index e74872da0829d..4a1f9154488af 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
@@ -59,7 +59,7 @@ class ParquetReadSupport(
   extends ReadSupport[InternalRow] with Logging {
   private var catalystRequestedSchema: StructType = _
 
-  def this() {
+  def this() = {
     // We need a zero-arg constructor for SpecificParquetRecordReaderBase.  But that is only
     // used in the vectorized reader, where we get the convertTz/rebaseDateTime value directly,
     // and the values here are ignored.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
index 482f2b4bf4ed7..59dc5bc1f37df 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
@@ -231,7 +231,7 @@ class SinkProgress protected[sql](
     val numOutputRows: Long) extends Serializable {
 
   /** SinkProgress without custom metrics. */
-  protected[sql] def this(description: String) {
+  protected[sql] def this(description: String) = {
     this(description, DEFAULT_NUM_OUTPUT_ROWS)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
index a477eed4478e8..36488bec7bb53 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
@@ -25,12 +25,12 @@ import org.apache.spark.sql.internal.{SessionState, SessionStateBuilder, SQLConf
  * A special `SparkSession` prepared for testing.
  */
 private[spark] class TestSparkSession(sc: SparkContext) extends SparkSession(sc) { self =>
-  def this(sparkConf: SparkConf) {
+  def this(sparkConf: SparkConf) = {
     this(new SparkContext("local[2]", "test-sql-context",
       sparkConf.set("spark.sql.testkey", "true")))
   }
 
-  def this() {
+  def this() = {
     this(new SparkConf)
   }
 
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index 965f28ebe0840..8550597da936e 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -465,7 +465,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
       oldSignal = Signal.handle(interruptSignal, new SignalHandler() {
         private var interruptRequested: Boolean = false
 
-        override def handle(signal: Signal) {
+        override def handle(signal: Signal): Unit = {
           val initialRequest = !interruptRequested
           interruptRequested = true
 
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveSessionImplSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveSessionImplSuite.scala
index 13dc74b92d4b3..7c42348f74453 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveSessionImplSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveSessionImplSuite.scala
@@ -33,7 +33,7 @@ class HiveSessionImplSuite extends SparkFunSuite {
   private var session: HiveSessionImpl = _
   private var operationManager: OperationManagerMock = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
 
     val sessionManager = new SessionManager(null)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 0c601ef798dcc..082aa8d765e9c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -130,11 +130,11 @@ class TestHiveContext(
    * If loadTestTables is false, no test tables are loaded. Note that this flag can only be true
    * when running in the JVM, i.e. it needs to be false when calling from Python.
    */
-  def this(sc: SparkContext, loadTestTables: Boolean = true) {
+  def this(sc: SparkContext, loadTestTables: Boolean = true) = {
     this(new TestHiveSparkSession(HiveUtils.withHiveExternalCatalog(sc), loadTestTables))
   }
 
-  def this(sc: SparkContext, hiveClient: HiveClient) {
+  def this(sc: SparkContext, hiveClient: HiveClient) = {
     this(new TestHiveSparkSession(HiveUtils.withHiveExternalCatalog(sc),
       hiveClient,
       loadTestTables = false))
@@ -178,7 +178,7 @@ private[hive] class TestHiveSparkSession(
     private val loadTestTables: Boolean)
   extends SparkSession(sc) with Logging { self =>
 
-  def this(sc: SparkContext, loadTestTables: Boolean) {
+  def this(sc: SparkContext, loadTestTables: Boolean) = {
     this(
       sc,
       existingSharedState = None,
@@ -186,7 +186,7 @@ private[hive] class TestHiveSparkSession(
       loadTestTables)
   }
 
-  def this(sc: SparkContext, hiveClient: HiveClient, loadTestTables: Boolean) {
+  def this(sc: SparkContext, hiveClient: HiveClient, loadTestTables: Boolean) = {
     this(
       sc,
       existingSharedState = Some(new TestHiveSharedState(sc, Some(hiveClient))),
@@ -584,11 +584,11 @@ private[hive] class TestHiveQueryExecution(
     logicalPlan: LogicalPlan)
   extends QueryExecution(sparkSession, logicalPlan) with Logging {
 
-  def this(sparkSession: TestHiveSparkSession, sql: String) {
+  def this(sparkSession: TestHiveSparkSession, sql: String) = {
     this(sparkSession, sparkSession.sessionState.sqlParser.parsePlan(sql))
   }
 
-  def this(sql: String) {
+  def this(sql: String) = {
     this(TestHive.sparkSession, sql)
   }
 

From c269b53f073b1ae448e24cf346917397f5e10285 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Sun, 8 Nov 2020 18:44:26 -0800
Subject: [PATCH 0413/1009] [SPARK-33384][SS] Delete temporary file when
 cancelling writing to final path even underlying stream throwing error

### What changes were proposed in this pull request?

In `RenameBasedFSDataOutputStream.cancel`, we do two things: closing underlying stream and delete temporary file, in a single try/catch block. Closing `OutputStream` could possibly throw `IOException` so we possibly missing deleting temporary file.

This patch proposes to delete temporary even underlying stream throwing error.

### Why are the changes needed?

To avoid leaving temporary files during canceling writing in `RenameBasedFSDataOutputStream`.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Unit test.

Closes #30290 from viirya/SPARK-33384.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../execution/streaming/CheckpointFileManager.scala    | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManager.scala
index 26f42b6e3f472..41b705514fb92 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManager.scala
@@ -160,11 +160,17 @@ object CheckpointFileManager extends Logging {
     override def cancel(): Unit = synchronized {
       try {
         if (terminated) return
-        underlyingStream.close()
+        try {
+          underlyingStream.close()
+        } catch {
+          case NonFatal(e) =>
+            logWarning(s"Error cancelling write to $finalPath, " +
+              s"continuing to delete temp path $tempPath", e)
+        }
         fm.delete(tempPath)
       } catch {
         case NonFatal(e) =>
-          logWarning(s"Error cancelling write to $finalPath", e)
+          logWarning(s"Error deleting temp file $tempPath", e)
       } finally {
         terminated = true
       }

From aa0849b46a43f0942e884816cbc771435571b564 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Sun, 8 Nov 2020 22:43:27 -0800
Subject: [PATCH 0414/1009] [SPARK-33387][CORE] Support ordered shuffle block
 migration

### What changes were proposed in this pull request?

This PR aims to support sorted shuffle block migration.

### Why are the changes needed?

Since the current shuffle block migration works in a random order, the failure during worker decommission affects all shuffles. We had better finish the shuffles one by one to minimize the number of affected shuffle.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs with the newly added test case.

Closes #30293 from dongjoon-hyun/SPARK-33387.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../storage/BlockManagerDecommissioner.scala  |  1 +
 .../spark/storage/BlockManagerSuite.scala     | 23 ++++++++++++++++++-
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
index 9129e8012dc59..9699515c626bf 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
@@ -248,6 +248,7 @@ private[storage] class BlockManagerDecommissioner(
     logInfo("Offloading shuffle blocks")
     val localShuffles = bm.migratableResolver.getStoredShuffles().toSet
     val newShufflesToMigrate = (localShuffles.diff(migratingShuffles)).toSeq
+      .sortBy(b => (b.shuffleId, b.mapId))
     shufflesToMigrate.addAll(newShufflesToMigrate.map(x => (x, 0)).asJava)
     migratingShuffles ++= newShufflesToMigrate
     logInfo(s"${newShufflesToMigrate.size} of ${localShuffles.size} local shuffles " +
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 5450a4b67c00b..55280fc578310 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -57,7 +57,7 @@ import org.apache.spark.scheduler.{LiveListenerBus, MapStatus, SparkListenerBloc
 import org.apache.spark.scheduler.cluster.{CoarseGrainedClusterMessages, CoarseGrainedSchedulerBackend}
 import org.apache.spark.security.{CryptoStreamUtils, EncryptionFunSuite}
 import org.apache.spark.serializer.{JavaSerializer, KryoSerializer, SerializerManager}
-import org.apache.spark.shuffle.{ShuffleBlockResolver, ShuffleManager}
+import org.apache.spark.shuffle.{MigratableResolver, ShuffleBlockInfo, ShuffleBlockResolver, ShuffleManager}
 import org.apache.spark.shuffle.sort.SortShuffleManager
 import org.apache.spark.storage.BlockManagerMessages._
 import org.apache.spark.util._
@@ -1974,6 +1974,27 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     }
   }
 
+  test("SPARK-33387 Support ordered shuffle block migration") {
+    val blocks: Seq[ShuffleBlockInfo] = Seq(
+      ShuffleBlockInfo(1, 0L),
+      ShuffleBlockInfo(0, 1L),
+      ShuffleBlockInfo(0, 0L),
+      ShuffleBlockInfo(1, 1L))
+    val sortedBlocks = blocks.sortBy(b => (b.shuffleId, b.mapId))
+
+    val resolver = mock(classOf[MigratableResolver])
+    when(resolver.getStoredShuffles).thenReturn(blocks)
+
+    val bm = mock(classOf[BlockManager])
+    when(bm.migratableResolver).thenReturn(resolver)
+    when(bm.getPeers(mc.any())).thenReturn(Seq.empty)
+
+    val decomManager = new BlockManagerDecommissioner(conf, bm)
+    decomManager.refreshOffloadingShuffleBlocks()
+
+    assert(sortedBlocks.sameElements(decomManager.shufflesToMigrate.asScala.map(_._1)))
+  }
+
   class MockBlockTransferService(val maxFailures: Int) extends BlockTransferService {
     var numCalls = 0
     var tempFileManager: DownloadFileManager = null

From bfb257f078854ad587a9e2bfe548cdb7bf8786d4 Mon Sep 17 00:00:00 2001
From: Huaxin Gao <huaxing@us.ibm.com>
Date: Mon, 9 Nov 2020 07:02:14 +0000
Subject: [PATCH 0415/1009] [SPARK-32405][SQL] Apply table options while
 creating tables in JDBC Table Catalog

### What changes were proposed in this pull request?
Currently in JDBCTableCatalog, we ignore the table options when creating table.
```
    // TODO (SPARK-32405): Apply table options while creating tables in JDBC Table Catalog
    if (!properties.isEmpty) {
      logWarning("Cannot create JDBC table with properties, these properties will be " +
        "ignored: " + properties.asScala.map { case (k, v) => s"$k=$v" }.mkString("[", ", ", "]"))
    }
```

### Why are the changes needed?
need to apply the table options when we create table

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?
add new test

Closes #30154 from huaxingao/table_options.

Authored-by: Huaxin Gao <huaxing@us.ibm.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/jdbc/v2/DB2IntegrationSuite.scala     |  8 +++++
 .../jdbc/v2/MsSqlServerIntegrationSuite.scala |  2 ++
 .../sql/jdbc/v2/MySQLIntegrationSuite.scala   |  8 +++++
 .../jdbc/v2/PostgresIntegrationSuite.scala    |  8 +++++
 .../apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 30 ++++++++++++++++
 .../datasources/jdbc/JDBCOptions.scala        |  3 ++
 .../datasources/jdbc/JdbcUtils.scala          | 10 ++++++
 .../v2/jdbc/JDBCTableCatalog.scala            | 35 ++++++++++++++++---
 .../apache/spark/sql/jdbc/DerbyDialect.scala  |  8 ++++-
 .../apache/spark/sql/jdbc/JdbcDialects.scala  |  7 +++-
 .../spark/sql/jdbc/MsSqlServerDialect.scala   |  8 +++++
 .../apache/spark/sql/jdbc/MySQLDialect.scala  |  5 +++
 .../v2/jdbc/JDBCTableCatalogSuite.scala       | 26 ++++++++++++++
 13 files changed, 151 insertions(+), 7 deletions(-)

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
index 5c1442283aaed..4b6461815d306 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
@@ -73,4 +73,12 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest {
     }.getMessage
     assert(msg1.contains("Cannot update alt_table field ID: double cannot be cast to varchar"))
   }
+
+  override def testCreateTableWithProperty(tbl: String): Unit = {
+    sql(s"CREATE TABLE $tbl (ID INT) USING _" +
+      s" TBLPROPERTIES('CCSID'='UNICODE')")
+    var t = spark.table(tbl)
+    var expectedSchema = new StructType().add("ID", IntegerType)
+    assert(t.schema === expectedSchema)
+  }
 }
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
index 905e32aaa918e..fd101607ad3ee 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
@@ -62,6 +62,8 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBC
 
   override def dataPreparation(conn: Connection): Unit = {}
 
+  override def notSupportsTableComment: Boolean = true
+
   override def testUpdateColumnType(tbl: String): Unit = {
     sql(s"CREATE TABLE $tbl (ID INTEGER) USING _")
     var t = spark.table(tbl)
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
index 6cf0f56ee7eeb..a81399fc2a4f7 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
@@ -106,4 +106,12 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest {
 
     assert(msg.contains("UpdateColumnNullability is not supported"))
   }
+
+  override def testCreateTableWithProperty(tbl: String): Unit = {
+    sql(s"CREATE TABLE $tbl (ID INT) USING _" +
+      s" TBLPROPERTIES('ENGINE'='InnoDB', 'DEFAULT CHARACTER SET'='utf8')")
+    var t = spark.table(tbl)
+    var expectedSchema = new StructType().add("ID", IntegerType)
+    assert(t.schema === expectedSchema)
+  }
 }
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
index 45994a5093748..df2c865e4d13b 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
@@ -66,4 +66,12 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTes
     }.getMessage
     assert(msg.contains("Cannot update alt_table field ID: string cannot be cast to int"))
   }
+
+  override def testCreateTableWithProperty(tbl: String): Unit = {
+    sql(s"CREATE TABLE $tbl (ID INT) USING _" +
+      s" TBLPROPERTIES('TABLESPACE'='pg_default')")
+    var t = spark.table(tbl)
+    var expectedSchema = new StructType().add("ID", IntegerType)
+    assert(t.schema === expectedSchema)
+  }
 }
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
index 92af29d9c9467..2e726b9e650b6 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.jdbc.v2
 
+import org.apache.log4j.Level
+
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
@@ -28,6 +30,8 @@ private[v2] trait V2JDBCTest extends SharedSparkSession {
   // dialect specific update column type test
   def testUpdateColumnType(tbl: String): Unit
 
+  def notSupportsTableComment: Boolean = false
+
   def testUpdateColumnNullability(tbl: String): Unit = {
     sql(s"CREATE TABLE $catalogName.alt_table (ID STRING NOT NULL) USING _")
     var t = spark.table(s"$catalogName.alt_table")
@@ -54,6 +58,8 @@ private[v2] trait V2JDBCTest extends SharedSparkSession {
     assert(t.schema === expectedSchema)
   }
 
+  def testCreateTableWithProperty(tbl: String): Unit = {}
+
   test("SPARK-33034: ALTER TABLE ... add new columns") {
     withTable(s"$catalogName.alt_table") {
       sql(s"CREATE TABLE $catalogName.alt_table (ID STRING) USING _")
@@ -146,5 +152,29 @@ private[v2] trait V2JDBCTest extends SharedSparkSession {
     }.getMessage
     assert(msg.contains("Table not found"))
   }
+
+  test("CREATE TABLE with table comment") {
+    withTable(s"$catalogName.new_table") {
+      val logAppender = new LogAppender("table comment")
+      withLogAppender(logAppender) {
+        sql(s"CREATE TABLE $catalogName.new_table(i INT) USING _ COMMENT 'this is a comment'")
+      }
+      val createCommentWarning = logAppender.loggingEvents
+        .filter(_.getLevel == Level.WARN)
+        .map(_.getRenderedMessage)
+        .exists(_.contains("Cannot create JDBC table comment"))
+      assert(createCommentWarning === notSupportsTableComment)
+    }
+  }
+
+  test("CREATE TABLE with table property") {
+    withTable(s"$catalogName.new_table") {
+      val m = intercept[AnalysisException] {
+        sql(s"CREATE TABLE $catalogName.new_table (i INT) USING _ TBLPROPERTIES('a'='1')")
+      }.message
+      assert(m.contains("Failed table creation"))
+      testCreateTableWithProperty(s"$catalogName.new_table")
+    }
+  }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
index e6fff8dbdbd7c..6e8b7ea678264 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
@@ -206,6 +206,8 @@ class JDBCOptions(
   }
   // The principal name of user's keytab file
   val principal = parameters.getOrElse(JDBC_PRINCIPAL, null)
+
+  val tableComment = parameters.getOrElse(JDBC_TABLE_COMMENT, "").toString
 }
 
 class JdbcOptionsInWrite(
@@ -260,4 +262,5 @@ object JDBCOptions {
   val JDBC_PUSHDOWN_PREDICATE = newOption("pushDownPredicate")
   val JDBC_KEYTAB = newOption("keytab")
   val JDBC_PRINCIPAL = newOption("principal")
+  val JDBC_TABLE_COMMENT = newOption("tableComment")
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index 9aaa55980436e..78f31fb80ecf6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -863,6 +863,7 @@ object JdbcUtils extends Logging {
       schema: StructType,
       caseSensitive: Boolean,
       options: JdbcOptionsInWrite): Unit = {
+    val dialect = JdbcDialects.get(options.url)
     val strSchema = schemaString(
       schema, caseSensitive, options.url, options.createTableColumnTypes)
     val createTableOptions = options.createTableOptions
@@ -872,6 +873,15 @@ object JdbcUtils extends Logging {
     // E.g., "CREATE TABLE t (name string) ENGINE=InnoDB DEFAULT CHARSET=utf8"
     val sql = s"CREATE TABLE $tableName ($strSchema) $createTableOptions"
     executeStatement(conn, options, sql)
+    if (options.tableComment.nonEmpty) {
+      try {
+        executeStatement(
+          conn, options, dialect.getTableCommentQuery(tableName, options.tableComment))
+      } catch {
+        case e: Exception =>
+          logWarning("Cannot create JDBC table comment. The table comment will be ignored.")
+      }
+    }
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
index 8edc2fe5585e0..e96b37e05c762 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
@@ -21,6 +21,7 @@ import java.sql.{Connection, SQLException}
 import scala.collection.JavaConverters._
 
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{NoSuchNamespaceException, NoSuchTableException}
 import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog, TableChange}
 import org.apache.spark.sql.connector.expressions.Transform
@@ -117,14 +118,38 @@ class JDBCTableCatalog extends TableCatalog with Logging {
     if (partitions.nonEmpty) {
       throw new UnsupportedOperationException("Cannot create JDBC table with partition")
     }
-    // TODO (SPARK-32405): Apply table options while creating tables in JDBC Table Catalog
+
+    var tableOptions = options.parameters + (JDBCOptions.JDBC_TABLE_NAME -> getTableName(ident))
+    var tableComment: String = ""
+    var tableProperties: String = ""
     if (!properties.isEmpty) {
-      logWarning("Cannot create JDBC table with properties, these properties will be " +
-        "ignored: " + properties.asScala.map { case (k, v) => s"$k=$v" }.mkString("[", ", ", "]"))
+      properties.asScala.map {
+        case (k, v) => k match {
+          case "comment" => tableComment = v
+          // ToDo: have a follow up to fail provider once unify create table syntax PR is merged
+          case "provider" =>
+          case "owner" => // owner is ignored. It is default to current user name.
+          case "location" =>
+            throw new AnalysisException("CREATE TABLE ... LOCATION ... is not supported in" +
+              " JDBC catalog.")
+          case _ => tableProperties = tableProperties + " " + s"$k $v"
+        }
+      }
     }
 
-    val writeOptions = new JdbcOptionsInWrite(
-      options.parameters + (JDBCOptions.JDBC_TABLE_NAME -> getTableName(ident)))
+    if (tableComment != "") {
+      tableOptions = tableOptions + (JDBCOptions.JDBC_TABLE_COMMENT -> tableComment)
+    }
+    if (tableProperties != "") {
+      // table property is set in JDBC_CREATE_TABLE_OPTIONS, which will be appended
+      // to CREATE TABLE statement.
+      // E.g., "CREATE TABLE t (name string) ENGINE InnoDB DEFAULT CHARACTER SET utf8"
+      // Spark doesn't check if these table properties are supported by databases. If
+      // table property is invalid, database will fail the table creation.
+      tableOptions = tableOptions + (JDBCOptions.JDBC_CREATE_TABLE_OPTIONS -> tableProperties)
+    }
+
+    val writeOptions = new JdbcOptionsInWrite(tableOptions)
     val caseSensitive = SQLConf.get.caseSensitiveAnalysis
     withConnection { conn =>
       classifyException(s"Failed table creation: $ident") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala
index 9ca8879be31e0..3a2c9a5428be0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.jdbc
 
-import java.sql.Types
+import java.sql.{SQLFeatureNotSupportedException, Types}
 import java.util.Locale
 
 import org.apache.spark.sql.types._
@@ -50,4 +50,10 @@ private object DerbyDialect extends JdbcDialect {
   override def renameTable(oldTable: String, newTable: String): String = {
     s"RENAME TABLE $oldTable TO $newTable"
   }
+
+  // Derby currently doesn't support comment on table. Here is the ticket to add the support
+  // https://issues.apache.org/jira/browse/DERBY-7008
+  override def getTableCommentQuery(table: String, comment: String): String = {
+    throw new SQLFeatureNotSupportedException(s"comment on table is not supported")
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index 0a857b99966fc..b12882b72fb66 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -24,6 +24,7 @@ import scala.collection.mutable.ArrayBuilder
 import org.apache.commons.lang3.StringUtils
 
 import org.apache.spark.annotation.{DeveloperApi, Since}
+import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.connector.catalog.TableChange
 import org.apache.spark.sql.connector.catalog.TableChange._
@@ -61,7 +62,7 @@ case class JdbcType(databaseTypeDefinition : String, jdbcNullType : Int)
  * for the given Catalyst type.
  */
 @DeveloperApi
-abstract class JdbcDialect extends Serializable {
+abstract class JdbcDialect extends Serializable with Logging{
   /**
    * Check if this dialect instance can handle a certain jdbc url.
    * @param url the jdbc url.
@@ -265,6 +266,10 @@ abstract class JdbcDialect extends Serializable {
     s"ALTER TABLE $tableName ALTER COLUMN ${quoteIdentifier(columnName)} SET $nullable"
   }
 
+  def getTableCommentQuery(table: String, comment: String): String = {
+    s"COMMENT ON TABLE $table IS '$comment'"
+  }
+
   /**
    * Gets a dialect exception, classifies it and wraps it by `AnalysisException`.
    * @param message The error message to be placed to the returned exception.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
index dc39a10987c91..bc8589881adc2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
@@ -102,4 +102,12 @@ private object MsSqlServerDialect extends JdbcDialect {
       isNullable: Boolean): String = {
     throw new SQLFeatureNotSupportedException(s"UpdateColumnNullability is not supported")
   }
+
+  // scalastyle:off line.size.limit
+  // https://docs.microsoft.com/en-us/sql/relational-databases/system-stored-procedures/sp-addextendedproperty-transact-sql?redirectedfrom=MSDN&view=sql-server-ver15
+  // scalastyle:on line.size.limit
+  // need to use the stored procedure called sp_addextendedproperty to add comments to tables
+  override def getTableCommentQuery(table: String, comment: String): String = {
+    throw new SQLFeatureNotSupportedException(s"comment on table is not supported")
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
index 942cdc9619b56..71bba6f1105ba 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
@@ -89,4 +89,9 @@ private case object MySQLDialect extends JdbcDialect {
       isNullable: Boolean): String = {
     throw new SQLFeatureNotSupportedException(s"UpdateColumnNullability is not supported")
   }
+
+  // See https://dev.mysql.com/doc/refman/8.0/en/alter-table.html
+  override def getTableCommentQuery(table: String, comment: String): String = {
+    s"ALTER TABLE $table COMMENT = '$comment'"
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
index 51316b464ab34..c7ad96c8f7619 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.datasources.v2.jdbc
 import java.sql.{Connection, DriverManager}
 import java.util.Properties
 
+import org.apache.log4j.Level
+
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.catalyst.analysis.{NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
@@ -391,4 +393,28 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       }
     }
   }
+
+  test("CREATE TABLE with table comment") {
+    withTable("h2.test.new_table") {
+      val logAppender = new LogAppender("table comment")
+      withLogAppender(logAppender) {
+        sql("CREATE TABLE h2.test.new_table(i INT, j STRING) USING _ COMMENT 'this is a comment'")
+      }
+      val createCommentWarning = logAppender.loggingEvents
+        .filter(_.getLevel == Level.WARN)
+        .map(_.getRenderedMessage)
+        .exists(_.contains("Cannot create JDBC table comment"))
+      assert(createCommentWarning === false)
+    }
+  }
+
+  test("CREATE TABLE with table property") {
+    withTable("h2.test.new_table") {
+      val m = intercept[AnalysisException] {
+        sql("CREATE TABLE h2.test.new_table(i INT, j STRING) USING _" +
+          " TBLPROPERTIES('ENGINE'='tableEngineName')")
+      }.cause.get.getMessage
+      assert(m.contains("\"TABLEENGINENAME\" not found"))
+    }
+  }
 }

From 98730b7ee24bfc35b4dcf431246dbb3ae19f8322 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 9 Nov 2020 08:08:00 +0000
Subject: [PATCH 0416/1009] [SPARK-33087][SQL] DataFrameWriterV2 should
 delegate table resolution to the analyzer

### What changes were proposed in this pull request?

This PR makes `DataFrameWriterV2` to create query plans with `UnresolvedRelation` and leave the table resolution work to the analyzer.

### Why are the changes needed?

Table resolution work should be done by the analyzer. After this PR, the behavior is more consistent between different APIs (DataFrameWriter, DataFrameWriterV2 and SQL). See the next section for behavior changes.

### Does this PR introduce _any_ user-facing change?

Yes.
1. writes to a temp view of v2 relation: previously it fails with table not found exception, now it works if the v2 relation is writable. This is consistent with `DataFrameWriter` and SQL INSERT.
2. writes to other temp views: previously it fails with table not found exception, now it fails with a more explicit error message, saying that writing to a temp view of non-v2-relation is not allowed.
3. writes to a view: previously it fails with table not writable error, now it fails with a more explicit error message, saying that writing to a view is not allowed.
4. writes to a v1 table: previously it fails with table not writable error, now it fails with a more explicit error message, saying that writing to a v1 table is not allowed. (We can allow it later, by falling back to v1 command)

### How was this patch tested?

new tests

Closes #29970 from cloud-fan/refactor.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  41 ++++++
 .../sql/catalyst/analysis/CheckAnalysis.scala |   5 +
 .../catalyst/plans/logical/v2Commands.scala   |   8 ++
 .../apache/spark/sql/DataFrameWriterV2.scala  |  44 +-----
 .../spark/sql/DataFrameWriterV2Suite.scala    | 129 +++++++++++++++++-
 5 files changed, 183 insertions(+), 44 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index f0143fdb23473..5834f9bad4a18 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -860,6 +860,17 @@ class Analyzer(
         lookupTempView(ident)
           .map(view => i.copy(table = view))
           .getOrElse(i)
+      // TODO (SPARK-27484): handle streaming write commands when we have them.
+      case write: V2WriteCommand =>
+        write.table match {
+          case UnresolvedRelation(ident, _, false) =>
+            lookupTempView(ident).map(EliminateSubqueryAliases(_)).map {
+              case r: DataSourceV2Relation => write.withNewTable(r)
+              case _ => throw new AnalysisException("Cannot write into temp view " +
+                s"${ident.quoted} as it's not a data source v2 relation.")
+            }.getOrElse(write)
+          case _ => write
+        }
       case u @ UnresolvedTable(ident) =>
         lookupTempView(ident).foreach { _ =>
           u.failAnalysis(s"${ident.quoted} is a temp view not table.")
@@ -942,6 +953,18 @@ class Analyzer(
           .map(v2Relation => i.copy(table = v2Relation))
           .getOrElse(i)
 
+      // TODO (SPARK-27484): handle streaming write commands when we have them.
+      case write: V2WriteCommand =>
+        write.table match {
+          case u: UnresolvedRelation if !u.isStreaming =>
+            lookupV2Relation(u.multipartIdentifier, u.options, false).map {
+              case r: DataSourceV2Relation => write.withNewTable(r)
+              case other => throw new IllegalStateException(
+                "[BUG] unexpected plan returned by `lookupV2Relation`: " + other)
+            }.getOrElse(write)
+          case _ => write
+        }
+
       case alter @ AlterTable(_, _, u: UnresolvedV2Relation, _) =>
         CatalogV2Util.loadRelation(u.catalog, u.tableName)
           .map(rel => alter.copy(table = rel))
@@ -1019,6 +1042,24 @@ class Analyzer(
           case other => i.copy(table = other)
         }
 
+      // TODO (SPARK-27484): handle streaming write commands when we have them.
+      case write: V2WriteCommand =>
+        write.table match {
+          case u: UnresolvedRelation if !u.isStreaming =>
+            lookupRelation(u.multipartIdentifier, u.options, false)
+              .map(EliminateSubqueryAliases(_))
+              .map {
+                case v: View => write.failAnalysis(
+                  s"Writing into a view is not allowed. View: ${v.desc.identifier}.")
+                case u: UnresolvedCatalogRelation => write.failAnalysis(
+                  "Cannot write into v1 table: " + u.tableMeta.identifier)
+                case r: DataSourceV2Relation => write.withNewTable(r)
+                case other => throw new IllegalStateException(
+                  "[BUG] unexpected plan returned by `lookupRelation`: " + other)
+              }.getOrElse(write)
+          case _ => write
+        }
+
       case u: UnresolvedRelation =>
         lookupRelation(u.multipartIdentifier, u.options, u.isStreaming)
           .map(resolveViews).getOrElse(u)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index ac91fa0b5811e..33a5224ed293e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -108,6 +108,11 @@ trait CheckAnalysis extends PredicateHelper {
       case InsertIntoStatement(u: UnresolvedRelation, _, _, _, _) =>
         failAnalysis(s"Table not found: ${u.multipartIdentifier.quoted}")
 
+      // TODO (SPARK-27484): handle streaming write commands when we have them.
+      case write: V2WriteCommand if write.table.isInstanceOf[UnresolvedRelation] =>
+        val tblName = write.table.asInstanceOf[UnresolvedRelation].multipartIdentifier
+        write.table.failAnalysis(s"Table or view not found: ${tblName.quoted}")
+
       case u: UnresolvedV2Relation if isView(u.originalNameParts) =>
         u.failAnalysis(
           s"Invalid command: '${u.originalNameParts.quoted}' is a view not a table.")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index fb8a9be80385b..94d4e7ecfac21 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -53,6 +53,7 @@ trait V2WriteCommand extends Command {
   }
 
   def withNewQuery(newQuery: LogicalPlan): V2WriteCommand
+  def withNewTable(newTable: NamedRelation): V2WriteCommand
 }
 
 /**
@@ -64,6 +65,7 @@ case class AppendData(
     writeOptions: Map[String, String],
     isByName: Boolean) extends V2WriteCommand {
   override def withNewQuery(newQuery: LogicalPlan): AppendData = copy(query = newQuery)
+  override def withNewTable(newTable: NamedRelation): AppendData = copy(table = newTable)
 }
 
 object AppendData {
@@ -97,6 +99,9 @@ case class OverwriteByExpression(
   override def withNewQuery(newQuery: LogicalPlan): OverwriteByExpression = {
     copy(query = newQuery)
   }
+  override def withNewTable(newTable: NamedRelation): OverwriteByExpression = {
+    copy(table = newTable)
+  }
 }
 
 object OverwriteByExpression {
@@ -128,6 +133,9 @@ case class OverwritePartitionsDynamic(
   override def withNewQuery(newQuery: LogicalPlan): OverwritePartitionsDynamic = {
     copy(query = newQuery)
   }
+  override def withNewTable(newTable: NamedRelation): OverwritePartitionsDynamic = {
+    copy(table = newTable)
+  }
 }
 
 object OverwritePartitionsDynamic {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
index 87f35410172d6..d55b5c3103537 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
@@ -21,12 +21,11 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable
 
 import org.apache.spark.annotation.Experimental
-import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException}
+import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Bucket, Days, Hours, Literal, Months, Years}
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, CreateTableAsSelectStatement, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, ReplaceTableAsSelectStatement}
 import org.apache.spark.sql.connector.expressions.{LogicalExpressions, NamedReference, Transform}
 import org.apache.spark.sql.execution.SQLExecution
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.types.IntegerType
 
 /**
@@ -38,21 +37,12 @@ import org.apache.spark.sql.types.IntegerType
 final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T])
     extends CreateTableWriter[T] {
 
-  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-  import org.apache.spark.sql.connector.catalog.CatalogV2Util._
-  import df.sparkSession.sessionState.analyzer.CatalogAndIdentifier
-
   private val df: DataFrame = ds.toDF()
 
   private val sparkSession = ds.sparkSession
 
   private val tableName = sparkSession.sessionState.sqlParser.parseMultipartIdentifier(table)
 
-  private val (catalog, identifier) = {
-    val CatalogAndIdentifier(catalog, identifier) = tableName
-    (catalog.asTableCatalog, identifier)
-  }
-
   private val logicalPlan = df.queryExecution.logical
 
   private var provider: Option[String] = None
@@ -153,15 +143,7 @@ final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T])
    */
   @throws(classOf[NoSuchTableException])
   def append(): Unit = {
-    val append = loadTable(catalog, identifier) match {
-      case Some(t) =>
-        AppendData.byName(
-          DataSourceV2Relation.create(t, Some(catalog), Some(identifier)),
-          logicalPlan, options.toMap)
-      case _ =>
-        throw new NoSuchTableException(identifier)
-    }
-
+    val append = AppendData.byName(UnresolvedRelation(tableName), logicalPlan, options.toMap)
     runCommand("append")(append)
   }
 
@@ -177,15 +159,8 @@ final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T])
    */
   @throws(classOf[NoSuchTableException])
   def overwrite(condition: Column): Unit = {
-    val overwrite = loadTable(catalog, identifier) match {
-      case Some(t) =>
-        OverwriteByExpression.byName(
-          DataSourceV2Relation.create(t, Some(catalog), Some(identifier)),
-          logicalPlan, condition.expr, options.toMap)
-      case _ =>
-        throw new NoSuchTableException(identifier)
-    }
-
+    val overwrite = OverwriteByExpression.byName(
+      UnresolvedRelation(tableName), logicalPlan, condition.expr, options.toMap)
     runCommand("overwrite")(overwrite)
   }
 
@@ -204,15 +179,8 @@ final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T])
    */
   @throws(classOf[NoSuchTableException])
   def overwritePartitions(): Unit = {
-    val dynamicOverwrite = loadTable(catalog, identifier) match {
-      case Some(t) =>
-        OverwritePartitionsDynamic.byName(
-          DataSourceV2Relation.create(t, Some(catalog), Some(identifier)),
-          logicalPlan, options.toMap)
-      case _ =>
-        throw new NoSuchTableException(identifier)
-    }
-
+    val dynamicOverwrite = OverwritePartitionsDynamic.byName(
+      UnresolvedRelation(tableName), logicalPlan, options.toMap)
     runCommand("overwritePartitions")(dynamicOverwrite)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
index 8720c1f620564..de791383326f1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
@@ -30,6 +30,7 @@ import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
 import org.apache.spark.sql.connector.expressions.{BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, YearsTransform}
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.sources.FakeSourceOne
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType, TimestampType}
 import org.apache.spark.sql.util.QueryExecutionListener
@@ -57,6 +58,7 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo
   }
 
   after {
+    spark.sessionState.catalog.reset()
     spark.sessionState.catalogManager.reset()
     spark.sessionState.conf.clear()
   }
@@ -118,6 +120,18 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo
       Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"), Row(4L, "d"), Row(5L, "e"), Row(6L, "f")))
   }
 
+  test("Append: write to a temp view of v2 relation") {
+    spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")
+    spark.table("testcat.table_name").createOrReplaceTempView("temp_view")
+    spark.table("source").writeTo("temp_view").append()
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+    checkAnswer(
+      spark.table("temp_view"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+  }
+
   test("Append: by name not position") {
     spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")
 
@@ -136,11 +150,36 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo
   }
 
   test("Append: fail if table does not exist") {
-    val exc = intercept[NoSuchTableException] {
+    val exc = intercept[AnalysisException] {
       spark.table("source").writeTo("testcat.table_name").append()
     }
 
-    assert(exc.getMessage.contains("table_name"))
+    assert(exc.getMessage.contains("Table or view not found: testcat.table_name"))
+  }
+
+  test("Append: fail if it writes to a temp view that is not v2 relation") {
+    spark.range(10).createOrReplaceTempView("temp_view")
+    val exc = intercept[AnalysisException] {
+      spark.table("source").writeTo("temp_view").append()
+    }
+    assert(exc.getMessage.contains("Cannot write into temp view temp_view as it's not a " +
+      "data source v2 relation"))
+  }
+
+  test("Append: fail if it writes to a view") {
+    spark.sql("CREATE VIEW v AS SELECT 1")
+    val exc = intercept[AnalysisException] {
+      spark.table("source").writeTo("v").append()
+    }
+    assert(exc.getMessage.contains("Writing into a view is not allowed"))
+  }
+
+  test("Append: fail if it writes to a v1 table") {
+    sql(s"CREATE TABLE table_name USING ${classOf[FakeSourceOne].getName}")
+    val exc = intercept[AnalysisException] {
+      spark.table("source").writeTo("table_name").append()
+    }
+    assert(exc.getMessage.contains("Cannot write into v1 table: `default`.`table_name`"))
   }
 
   test("Overwrite: overwrite by expression: true") {
@@ -181,6 +220,20 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo
       Seq(Row(1L, "a"), Row(2L, "b"), Row(4L, "d"), Row(5L, "e"), Row(6L, "f")))
   }
 
+  test("Overwrite: write to a temp view of v2 relation") {
+    spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")
+    spark.table("source").writeTo("testcat.table_name").append()
+    spark.table("testcat.table_name").createOrReplaceTempView("temp_view")
+
+    spark.table("source2").writeTo("testcat.table_name").overwrite(lit(true))
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(4L, "d"), Row(5L, "e"), Row(6L, "f")))
+    checkAnswer(
+      spark.table("temp_view"),
+      Seq(Row(4L, "d"), Row(5L, "e"), Row(6L, "f")))
+  }
+
   test("Overwrite: by name not position") {
     spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")
 
@@ -200,11 +253,36 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo
   }
 
   test("Overwrite: fail if table does not exist") {
-    val exc = intercept[NoSuchTableException] {
+    val exc = intercept[AnalysisException] {
       spark.table("source").writeTo("testcat.table_name").overwrite(lit(true))
     }
 
-    assert(exc.getMessage.contains("table_name"))
+    assert(exc.getMessage.contains("Table or view not found: testcat.table_name"))
+  }
+
+  test("Overwrite: fail if it writes to a temp view that is not v2 relation") {
+    spark.range(10).createOrReplaceTempView("temp_view")
+    val exc = intercept[AnalysisException] {
+      spark.table("source").writeTo("temp_view").overwrite(lit(true))
+    }
+    assert(exc.getMessage.contains("Cannot write into temp view temp_view as it's not a " +
+      "data source v2 relation"))
+  }
+
+  test("Overwrite: fail if it writes to a view") {
+    spark.sql("CREATE VIEW v AS SELECT 1")
+    val exc = intercept[AnalysisException] {
+      spark.table("source").writeTo("v").overwrite(lit(true))
+    }
+    assert(exc.getMessage.contains("Writing into a view is not allowed"))
+  }
+
+  test("Overwrite: fail if it writes to a v1 table") {
+    sql(s"CREATE TABLE table_name USING ${classOf[FakeSourceOne].getName}")
+    val exc = intercept[AnalysisException] {
+      spark.table("source").writeTo("table_name").overwrite(lit(true))
+    }
+    assert(exc.getMessage.contains("Cannot write into v1 table: `default`.`table_name`"))
   }
 
   test("OverwritePartitions: overwrite conflicting partitions") {
@@ -245,6 +323,20 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo
       Seq(Row(4L, "d"), Row(5L, "e"), Row(6L, "f")))
   }
 
+  test("OverwritePartitions: write to a temp view of v2 relation") {
+    spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")
+    spark.table("source").writeTo("testcat.table_name").append()
+    spark.table("testcat.table_name").createOrReplaceTempView("temp_view")
+
+    spark.table("source2").writeTo("testcat.table_name").overwritePartitions()
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(4L, "d"), Row(5L, "e"), Row(6L, "f")))
+    checkAnswer(
+      spark.table("temp_view"),
+      Seq(Row(4L, "d"), Row(5L, "e"), Row(6L, "f")))
+  }
+
   test("OverwritePartitions: by name not position") {
     spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")
 
@@ -264,11 +356,36 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo
   }
 
   test("OverwritePartitions: fail if table does not exist") {
-    val exc = intercept[NoSuchTableException] {
+    val exc = intercept[AnalysisException] {
       spark.table("source").writeTo("testcat.table_name").overwritePartitions()
     }
 
-    assert(exc.getMessage.contains("table_name"))
+    assert(exc.getMessage.contains("Table or view not found: testcat.table_name"))
+  }
+
+  test("OverwritePartitions: fail if it writes to a temp view that is not v2 relation") {
+    spark.range(10).createOrReplaceTempView("temp_view")
+    val exc = intercept[AnalysisException] {
+      spark.table("source").writeTo("temp_view").overwritePartitions()
+    }
+    assert(exc.getMessage.contains("Cannot write into temp view temp_view as it's not a " +
+      "data source v2 relation"))
+  }
+
+  test("OverwritePartitions: fail if it writes to a view") {
+    spark.sql("CREATE VIEW v AS SELECT 1")
+    val exc = intercept[AnalysisException] {
+      spark.table("source").writeTo("v").overwritePartitions()
+    }
+    assert(exc.getMessage.contains("Writing into a view is not allowed"))
+  }
+
+  test("OverwritePartitions: fail if it writes to a v1 table") {
+    sql(s"CREATE TABLE table_name USING ${classOf[FakeSourceOne].getName}")
+    val exc = intercept[AnalysisException] {
+      spark.table("source").writeTo("table_name").overwritePartitions()
+    }
+    assert(exc.getMessage.contains("Cannot write into v1 table: `default`.`table_name`"))
   }
 
   test("Create: basic behavior") {

From 69799c514ff9874c57bf94d4de21ea4cd0cbbf8d Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Mon, 9 Nov 2020 08:32:51 +0000
Subject: [PATCH 0417/1009] [SPARK-33372][SQL] Fix InSet bucket pruning

### What changes were proposed in this pull request?

This pr fix `InSet` bucket pruning because of it's values should not be `Literal`:
https://github.com/apache/spark/blob/cbd3fdea62dab73fc4a96702de8fd1f07722da66/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala#L253-L255

### Why are the changes needed?

Fix bug.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test and manual test:

```scala
spark.sql("select id as a, id as b from range(10000)").write.bucketBy(100, "a").saveAsTable("t")
spark.sql("select * from t where a in (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)").show
```

Before this PR | After this PR
-- | --
![image](https://user-images.githubusercontent.com/5399861/98380788-fb120980-2083-11eb-8fae-4e21ad873e9b.png) | ![image](https://user-images.githubusercontent.com/5399861/98381095-5ba14680-2084-11eb-82ca-2d780c85305c.png)

Closes #30279 from wangyum/SPARK-33372.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/datasources/FileSourceStrategy.scala | 5 ++---
 .../org/apache/spark/sql/sources/BucketedReadSuite.scala     | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
index 1191f99cc98a2..5e07f778ac135 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
@@ -89,9 +89,8 @@ object FileSourceStrategy extends Strategy with PredicateHelper with Logging {
       case expressions.In(a: Attribute, list)
         if list.forall(_.isInstanceOf[Literal]) && a.name == bucketColumnName =>
         getBucketSetFromIterable(a, list.map(e => e.eval(EmptyRow)))
-      case expressions.InSet(a: Attribute, hset)
-        if hset.forall(_.isInstanceOf[Literal]) && a.name == bucketColumnName =>
-        getBucketSetFromIterable(a, hset.map(e => expressions.Literal(e).eval(EmptyRow)))
+      case expressions.InSet(a: Attribute, hset) if a.name == bucketColumnName =>
+        getBucketSetFromIterable(a, hset)
       case expressions.IsNull(a: Attribute) if a.name == bucketColumnName =>
         getBucketSetFromValue(a, null)
       case expressions.And(left, right) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
index a188e4d9d6d90..6a31ce07dabb4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
@@ -190,7 +190,7 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils {
 
       // Case 4: InSet
       val inSetExpr = expressions.InSet($"j".expr,
-        Set(bucketValue, bucketValue + 1, bucketValue + 2, bucketValue + 3).map(lit(_).expr))
+        Set(bucketValue, bucketValue + 1, bucketValue + 2, bucketValue + 3))
       checkPrunedAnswers(
         bucketSpec,
         bucketValues = Seq(bucketValue, bucketValue + 1, bucketValue + 2, bucketValue + 3),

From 7a5647a93aaea9d1d78d9262e24fc8c010db04d0 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Mon, 9 Nov 2020 09:20:31 +0000
Subject: [PATCH 0418/1009] [SPARK-33385][SQL] Support bucket pruning for IsNaN

### What changes were proposed in this pull request?

This pr add support bucket pruning on `IsNaN` predicate.

### Why are the changes needed?

Improve query performance.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #30291 from wangyum/SPARK-33385.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../datasources/FileSourceStrategy.scala      |  7 +++++++
 .../spark/sql/sources/BucketedReadSuite.scala | 21 ++++++++++++++++++-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
index 5e07f778ac135..1bfde7515dc92 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.ScanOperation
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan}
+import org.apache.spark.sql.types.{DoubleType, FloatType}
 import org.apache.spark.util.collection.BitSet
 
 /**
@@ -93,6 +94,12 @@ object FileSourceStrategy extends Strategy with PredicateHelper with Logging {
         getBucketSetFromIterable(a, hset)
       case expressions.IsNull(a: Attribute) if a.name == bucketColumnName =>
         getBucketSetFromValue(a, null)
+      case expressions.IsNaN(a: Attribute)
+        if a.name == bucketColumnName && a.dataType == FloatType =>
+        getBucketSetFromValue(a, Float.NaN)
+      case expressions.IsNaN(a: Attribute)
+        if a.name == bucketColumnName && a.dataType == DoubleType =>
+        getBucketSetFromValue(a, Double.NaN)
       case expressions.And(left, right) =>
         getExpressionBuckets(left, bucketColumnName, numBuckets) &
           getExpressionBuckets(right, bucketColumnName, numBuckets)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
index 6a31ce07dabb4..4832386e553db 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
@@ -113,7 +113,7 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils {
   //   2) Verify the final result is the same as the expected one
   private def checkPrunedAnswers(
       bucketSpec: BucketSpec,
-      bucketValues: Seq[Integer],
+      bucketValues: Seq[Any],
       filterCondition: Column,
       originalDataFrame: DataFrame): Unit = {
     // This test verifies parts of the plan. Disable whole stage codegen.
@@ -245,6 +245,25 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils {
     }
   }
 
+  test("bucket pruning support IsNaN") {
+    withTable("bucketed_table") {
+      val numBuckets = NumBucketsForPruningNullDf
+      val bucketSpec = BucketSpec(numBuckets, Seq("j"), Nil)
+      val naNDF = nullDF.selectExpr("i", "cast(if(isnull(j), 'NaN', j) as double) as j", "k")
+      // json does not support predicate push-down, and thus json is used here
+      naNDF.write
+        .format("json")
+        .bucketBy(numBuckets, "j")
+        .saveAsTable("bucketed_table")
+
+      checkPrunedAnswers(
+        bucketSpec,
+        bucketValues = Double.NaN :: Nil,
+        filterCondition = $"j".isNaN,
+        naNDF)
+    }
+  }
+
   test("read partitioning bucketed tables having composite filters") {
     withTable("bucketed_table") {
       val numBuckets = NumBucketsForPruningDF

From 4e1c89400dc57b5c53741f14f5179add7cb617eb Mon Sep 17 00:00:00 2001
From: Linhong Liu <linhong.liu@databricks.com>
Date: Mon, 9 Nov 2020 09:44:58 +0000
Subject: [PATCH 0419/1009] [SPARK-33140][SQL][FOLLOW-UP] Use sparkSession in
 AQE context when applying rules

### What changes were proposed in this pull request?
After #30097, all rules are using `SparkSession.active` to get `SQLConf`
and `SparkSession`. But in AQE, when applying the rules for the initial plan,
we should use the spark session in AQE context.

### Why are the changes needed?
Fix potential problem caused by using the wrong spark session

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing ut

Closes #30294 from linhongliu-db/SPARK-33140-followup.

Authored-by: Linhong Liu <linhong.liu@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/execution/adaptive/AdaptiveSparkPlanExec.scala      | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index 4ae33311d5a24..75cc073c4a62c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -123,8 +123,10 @@ case class AdaptiveSparkPlanExec(
 
   @transient private val costEvaluator = SimpleCostEvaluator
 
-  @transient private val initialPlan = applyPhysicalRules(
-    inputPlan, queryStagePreparationRules, Some((planChangeLogger, "AQE Preparations")))
+  @transient private val initialPlan = context.session.withActive {
+    applyPhysicalRules(
+      inputPlan, queryStagePreparationRules, Some((planChangeLogger, "AQE Preparations")))
+  }
 
   @volatile private var currentPhysicalPlan = initialPlan
 

From 84dc37461187210ecdb25fa36ccb61c7cc1a6486 Mon Sep 17 00:00:00 2001
From: Peter Toth <peter.toth@gmail.com>
Date: Mon, 9 Nov 2020 19:27:36 +0900
Subject: [PATCH 0420/1009] [SPARK-33303][SQL] Deduplicate deterministic
 PythonUDF calls

### What changes were proposed in this pull request?
This PR modifies the `ExtractPythonUDFs` rule to deduplicate deterministic PythonUDF calls.

Before this PR the dataframe: `df.withColumn("c", batchedPythonUDF(col("a"))).withColumn("d", col("c"))` has the plan:
```
*(1) Project [value#1 AS a#4, pythonUDF1#15 AS c#7, pythonUDF1#15 AS d#10]
+- BatchEvalPython [dummyUDF(value#1), dummyUDF(value#1)], [pythonUDF0#14, pythonUDF1#15]
   +- LocalTableScan [value#1]
```
After this PR the deterministic PythonUDF calls are deduplicated:
```
*(1) Project [value#1 AS a#4, pythonUDF0#14 AS c#7, pythonUDF0#14 AS d#10]
+- BatchEvalPython [dummyUDF(value#1)], [pythonUDF0#14]
   +- LocalTableScan [value#1]
```

### Why are the changes needed?
To fix a performance issue.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
New and existing UTs.

Closes #30203 from peter-toth/SPARK-33303-deduplicate-deterministic-udf-calls.

Authored-by: Peter Toth <peter.toth@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../execution/python/ExtractPythonUDFs.scala  | 20 +++++++++-----
 .../python/BatchEvalPythonExecSuite.scala     |  7 +++++
 .../python/ExtractPythonUDFsSuite.scala       | 27 +++++++++++++++++++
 3 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala
index 1c88056cb50c9..dab2723d25726 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala
@@ -218,13 +218,22 @@ object ExtractPythonUDFs extends Rule[LogicalPlan] with PredicateHelper {
     }
   }
 
+  private def canonicalizeDeterministic(u: PythonUDF) = {
+    if (u.deterministic) {
+      u.canonicalized.asInstanceOf[PythonUDF]
+    } else {
+      u
+    }
+  }
+
   /**
    * Extract all the PythonUDFs from the current operator and evaluate them before the operator.
    */
   private def extract(plan: LogicalPlan): LogicalPlan = {
-    val udfs = collectEvaluableUDFsFromExpressions(plan.expressions)
+    val udfs = ExpressionSet(collectEvaluableUDFsFromExpressions(plan.expressions))
       // ignore the PythonUDF that come from second/third aggregate, which is not used
       .filter(udf => udf.references.subsetOf(plan.inputSet))
+      .toSeq.asInstanceOf[Seq[PythonUDF]]
     if (udfs.isEmpty) {
       // If there aren't any, we are done.
       plan
@@ -262,7 +271,7 @@ object ExtractPythonUDFs extends Rule[LogicalPlan] with PredicateHelper {
               throw new AnalysisException("Unexcepted UDF evalType")
           }
 
-          attributeMap ++= validUdfs.zip(resultAttrs)
+          attributeMap ++= validUdfs.map(canonicalizeDeterministic).zip(resultAttrs)
           evaluation
         } else {
           child
@@ -270,13 +279,12 @@ object ExtractPythonUDFs extends Rule[LogicalPlan] with PredicateHelper {
       }
       // Other cases are disallowed as they are ambiguous or would require a cartesian
       // product.
-      udfs.filterNot(attributeMap.contains).foreach { udf =>
-        sys.error(s"Invalid PythonUDF $udf, requires attributes from more than one child.")
+      udfs.map(canonicalizeDeterministic).filterNot(attributeMap.contains).foreach {
+        udf => sys.error(s"Invalid PythonUDF $udf, requires attributes from more than one child.")
       }
 
       val rewritten = plan.withNewChildren(newChildren).transformExpressions {
-        case p: PythonUDF if attributeMap.contains(p) =>
-          attributeMap(p)
+        case p: PythonUDF => attributeMap.getOrElse(canonicalizeDeterministic(p), p)
       }
 
       // extract remaining python UDFs recursively
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala
index 5fe3d6a71167e..cb5e23e0534d0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala
@@ -137,6 +137,13 @@ class MyDummyPythonUDF extends UserDefinedPythonFunction(
   pythonEvalType = PythonEvalType.SQL_BATCHED_UDF,
   udfDeterministic = true)
 
+class MyDummyNondeterministicPythonUDF extends UserDefinedPythonFunction(
+  name = "dummyNondeterministicUDF",
+  func = new DummyUDF,
+  dataType = BooleanType,
+  pythonEvalType = PythonEvalType.SQL_BATCHED_UDF,
+  udfDeterministic = false)
+
 class MyDummyGroupedAggPandasUDF extends UserDefinedPythonFunction(
   name = "dummyGroupedAggPandasUDF",
   func = new DummyUDF,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala
index 87d541d2d22b0..325f4923bd6c6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala
@@ -28,6 +28,7 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession {
   import testImplicits._
 
   val batchedPythonUDF = new MyDummyPythonUDF
+  val batchedNondeterministicPythonUDF = new MyDummyNondeterministicPythonUDF
   val scalarPandasUDF = new MyDummyScalarPandasUDF
 
   private def collectBatchExec(plan: SparkPlan): Seq[BatchEvalPythonExec] = plan.collect {
@@ -166,5 +167,31 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession {
     }
   }
 
+  test("SPARK-33303: Deterministic UDF calls are deduplicated") {
+    val df = Seq("Hello").toDF("a")
+
+    val df2 = df.withColumn("c", batchedPythonUDF(col("a"))).withColumn("d", col("c"))
+    val pythonEvalNodes2 = collectBatchExec(df2.queryExecution.executedPlan)
+    assert(pythonEvalNodes2.size == 1)
+    assert(pythonEvalNodes2.head.udfs.size == 1)
+
+    val df3 = df.withColumns(Seq("c", "d"),
+      Seq(batchedPythonUDF(col("a")), batchedPythonUDF(col("a"))))
+    val pythonEvalNodes3 = collectBatchExec(df3.queryExecution.executedPlan)
+    assert(pythonEvalNodes3.size == 1)
+    assert(pythonEvalNodes3.head.udfs.size == 1)
+
+    val df4 = df.withColumn("c", batchedNondeterministicPythonUDF(col("a")))
+      .withColumn("d", col("c"))
+    val pythonEvalNodes4 = collectBatchExec(df4.queryExecution.executedPlan)
+    assert(pythonEvalNodes4.size == 1)
+    assert(pythonEvalNodes4.head.udfs.size == 1)
+
+    val df5 = df.withColumns(Seq("c", "d"),
+      Seq(batchedNondeterministicPythonUDF(col("a")), batchedNondeterministicPythonUDF(col("a"))))
+    val pythonEvalNodes5 = collectBatchExec(df5.queryExecution.executedPlan)
+    assert(pythonEvalNodes5.size == 1)
+    assert(pythonEvalNodes5.head.udfs.size == 2)
+  }
 }
 

From 8113c88542ee282b510c7e046d64df1761a85d14 Mon Sep 17 00:00:00 2001
From: Chandni Singh <singh.chandni@gmail.com>
Date: Mon, 9 Nov 2020 11:00:52 -0600
Subject: [PATCH 0421/1009] [SPARK-32916][SHUFFLE] Implementation of shuffle
 service that leverages push-based shuffle in YARN deployment mode

### What changes were proposed in this pull request?
This is one of the patches for SPIP [SPARK-30602](https://issues.apache.org/jira/browse/SPARK-30602) which is needed for push-based shuffle.
Summary of changes:
- Adds an implementation of `MergedShuffleFileManager` which was introduced with [Spark 32915](https://issues.apache.org/jira/browse/SPARK-32915).
- Integrated the push-based shuffle service with `YarnShuffleService`.

### Why are the changes needed?
Refer to the SPIP in  [SPARK-30602](https://issues.apache.org/jira/browse/SPARK-30602).

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Added unit tests.
The reference PR with the consolidated changes covering the complete implementation is also provided in [SPARK-30602](https://issues.apache.org/jira/browse/SPARK-30602).
We have already verified the functionality and the improved performance as documented in the SPIP doc.

Lead-authored-by: Min Shen mshenlinkedin.com
Co-authored-by: Chandni Singh chsinghlinkedin.com
Co-authored-by: Ye Zhou yezhoulinkedin.com

Closes #30062 from otterc/SPARK-32916.

Lead-authored-by: Chandni Singh <singh.chandni@gmail.com>
Co-authored-by: Chandni Singh <chsingh@linkedin.com>
Co-authored-by: Ye Zhou <yezhou@linkedin.com>
Co-authored-by: Min Shen <mshen@linkedin.com>
Signed-off-by: Mridul Muralidharan <mridul<at>gmail.com>
---
 .../spark/network/protocol/Encoders.java      |  26 +-
 .../spark/network/util/TransportConf.java     |  35 +
 .../spark/network/protocol/EncodersSuite.java |  68 ++
 common/network-shuffle/pom.xml                |  10 +-
 .../spark/network/shuffle/ErrorHandler.java   |   8 +-
 .../network/shuffle/ExternalBlockHandler.java |  25 +-
 .../network/shuffle/MergedBlockMeta.java      |   2 +
 .../shuffle/MergedShuffleFileManager.java     |  28 +-
 .../network/shuffle/OneForOneBlockPusher.java |  11 +-
 .../shuffle/RemoteBlockPushResolver.java      | 934 ++++++++++++++++++
 .../protocol/FinalizeShuffleMerge.java        |   2 +
 .../shuffle/protocol/MergeStatuses.java       |   2 +
 .../shuffle/protocol/PushBlockStream.java     |  37 +-
 .../shuffle/ExternalBlockHandlerSuite.java    |   2 +-
 .../shuffle/OneForOneBlockPusherSuite.java    |  66 +-
 .../shuffle/RemoteBlockPushResolverSuite.java | 496 ++++++++++
 .../network/yarn/YarnShuffleService.java      |  23 +-
 .../network/yarn/YarnShuffleServiceSuite.java |  61 ++
 18 files changed, 1748 insertions(+), 88 deletions(-)
 create mode 100644 common/network-common/src/test/java/org/apache/spark/network/protocol/EncodersSuite.java
 create mode 100644 common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java
 create mode 100644 common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java
 create mode 100644 common/network-yarn/src/test/java/org/apache/spark/network/yarn/YarnShuffleServiceSuite.java

diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/Encoders.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/Encoders.java
index 4fa191b3917e3..8bab808ad6864 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/Encoders.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/Encoders.java
@@ -18,6 +18,7 @@
 package org.apache.spark.network.protocol;
 
 import java.io.IOException;
+import java.nio.ByteBuffer;
 import java.nio.charset.StandardCharsets;
 
 import io.netty.buffer.ByteBuf;
@@ -46,7 +47,11 @@ public static String decode(ByteBuf buf) {
     }
   }
 
-  /** Bitmaps are encoded with their serialization length followed by the serialization bytes. */
+  /**
+   * Bitmaps are encoded with their serialization length followed by the serialization bytes.
+   *
+   * @since 3.1.0
+   */
   public static class Bitmaps {
     public static int encodedLength(RoaringBitmap b) {
       // Compress the bitmap before serializing it. Note that since BlockTransferMessage
@@ -57,13 +62,20 @@ public static int encodedLength(RoaringBitmap b) {
       return b.serializedSizeInBytes();
     }
 
+    /**
+     * The input ByteBuf for this encoder should have enough write capacity to fit the serialized
+     * bitmap. Other encoders which use {@link io.netty.buffer.AbstractByteBuf#writeBytes(byte[])}
+     * to write can expand the buf as writeBytes calls {@link ByteBuf#ensureWritable} internally.
+     * However, this encoder doesn't rely on netty's writeBytes and will fail if the input buf
+     * doesn't have enough write capacity.
+     */
     public static void encode(ByteBuf buf, RoaringBitmap b) {
-      int encodedLength = b.serializedSizeInBytes();
       // RoaringBitmap requires nio ByteBuffer for serde. We expose the netty ByteBuf as a nio
       // ByteBuffer. Here, we need to explicitly manage the index so we can write into the
       // ByteBuffer, and the write is reflected in the underneath ByteBuf.
-      b.serialize(buf.nioBuffer(buf.writerIndex(), encodedLength));
-      buf.writerIndex(buf.writerIndex() + encodedLength);
+      ByteBuffer byteBuffer = buf.nioBuffer(buf.writerIndex(), buf.writableBytes());
+      b.serialize(byteBuffer);
+      buf.writerIndex(buf.writerIndex() + byteBuffer.position());
     }
 
     public static RoaringBitmap decode(ByteBuf buf) {
@@ -172,7 +184,11 @@ public static long[] decode(ByteBuf buf) {
     }
   }
 
-  /** Bitmap arrays are encoded with the number of bitmaps followed by per-Bitmap encoding. */
+  /**
+   * Bitmap arrays are encoded with the number of bitmaps followed by per-Bitmap encoding.
+   *
+   * @since 3.1.0
+   */
   public static class BitmapArrays {
     public static int encodedLength(RoaringBitmap[] bitmaps) {
       int totalLength = 4;
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
index 646e4278811f4..fd287b022618b 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
@@ -363,4 +363,39 @@ public boolean useOldFetchProtocol() {
     return conf.getBoolean("spark.shuffle.useOldFetchProtocol", false);
   }
 
+  /**
+   * Class name of the implementation of MergedShuffleFileManager that merges the blocks
+   * pushed to it when push-based shuffle is enabled. By default, push-based shuffle is disabled at
+   * a cluster level because this configuration is set to
+   * 'org.apache.spark.network.shuffle.ExternalBlockHandler$NoOpMergedShuffleFileManager'.
+   * To turn on push-based shuffle at a cluster level, set the configuration to
+   * 'org.apache.spark.network.shuffle.RemoteBlockPushResolver'.
+   */
+  public String mergedShuffleFileManagerImpl() {
+    return conf.get("spark.shuffle.server.mergedShuffleFileManagerImpl",
+      "org.apache.spark.network.shuffle.ExternalBlockHandler$NoOpMergedShuffleFileManager");
+  }
+
+  /**
+   * The minimum size of a chunk when dividing a merged shuffle file into multiple chunks during
+   * push-based shuffle.
+   * A merged shuffle file consists of multiple small shuffle blocks. Fetching the
+   * complete merged shuffle file in a single response increases the memory requirements for the
+   * clients. Instead of serving the entire merged file, the shuffle service serves the
+   * merged file in `chunks`. A `chunk` constitutes few shuffle blocks in entirety and this
+   * configuration controls how big a chunk can get. A corresponding index file for each merged
+   * shuffle file will be generated indicating chunk boundaries.
+   */
+  public int minChunkSizeInMergedShuffleFile() {
+    return Ints.checkedCast(JavaUtils.byteStringAsBytes(
+      conf.get("spark.shuffle.server.minChunkSizeInMergedShuffleFile", "2m")));
+  }
+
+  /**
+   * The size of cache in memory which is used in push-based shuffle for storing merged index files.
+   */
+  public long mergedIndexCacheSize() {
+    return JavaUtils.byteStringAsBytes(
+      conf.get("spark.shuffle.server.mergedIndexCacheSize", "100m"));
+  }
 }
diff --git a/common/network-common/src/test/java/org/apache/spark/network/protocol/EncodersSuite.java b/common/network-common/src/test/java/org/apache/spark/network/protocol/EncodersSuite.java
new file mode 100644
index 0000000000000..6e89702c04396
--- /dev/null
+++ b/common/network-common/src/test/java/org/apache/spark/network/protocol/EncodersSuite.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.protocol;
+
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.Unpooled;
+import org.junit.Test;
+import org.roaringbitmap.RoaringBitmap;
+
+import static org.junit.Assert.*;
+
+/**
+ * Tests for {@link Encoders}.
+ */
+public class EncodersSuite {
+
+  @Test
+  public void testRoaringBitmapEncodeDecode() {
+    RoaringBitmap bitmap = new RoaringBitmap();
+    bitmap.add(1, 2, 3);
+    ByteBuf buf = Unpooled.buffer(Encoders.Bitmaps.encodedLength(bitmap));
+    Encoders.Bitmaps.encode(buf, bitmap);
+    RoaringBitmap decodedBitmap = Encoders.Bitmaps.decode(buf);
+    assertEquals(bitmap, decodedBitmap);
+  }
+
+  @Test (expected = java.nio.BufferOverflowException.class)
+  public void testRoaringBitmapEncodeShouldFailWhenBufferIsSmall() {
+    RoaringBitmap bitmap = new RoaringBitmap();
+    bitmap.add(1, 2, 3);
+    ByteBuf buf = Unpooled.buffer(4);
+    Encoders.Bitmaps.encode(buf, bitmap);
+  }
+
+  @Test
+  public void testBitmapArraysEncodeDecode() {
+    RoaringBitmap[] bitmaps = new RoaringBitmap[] {
+      new RoaringBitmap(),
+      new RoaringBitmap(),
+      new RoaringBitmap(), // empty
+      new RoaringBitmap(),
+      new RoaringBitmap()
+    };
+    bitmaps[0].add(1, 2, 3);
+    bitmaps[1].add(1, 2, 4);
+    bitmaps[3].add(7L, 9L);
+    bitmaps[4].add(1L, 100L);
+    ByteBuf buf = Unpooled.buffer(Encoders.BitmapArrays.encodedLength(bitmaps));
+    Encoders.BitmapArrays.encode(buf, bitmaps);
+    RoaringBitmap[] decodedBitmaps = Encoders.BitmapArrays.decode(buf);
+    assertArrayEquals(bitmaps, decodedBitmaps);
+  }
+}
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index a4a1ff92ef9a0..562a1d495cc8a 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -47,6 +47,11 @@
       <artifactId>metrics-core</artifactId>
     </dependency>
 
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+    </dependency>
+
     <!-- Provided dependencies -->
     <dependency>
       <groupId>org.slf4j</groupId>
@@ -70,11 +75,6 @@
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-tags_${scala.binary.version}</artifactId>
-      <scope>test</scope>
-    </dependency>
 
     <!--
       This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java
index 308b0b7a6b33b..d13a0272744a0 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java
@@ -21,14 +21,18 @@
 
 import com.google.common.base.Throwables;
 
+import org.apache.spark.annotation.Evolving;
+
 /**
  * Plugs into {@link RetryingBlockFetcher} to further control when an exception should be retried
  * and logged.
  * Note: {@link RetryingBlockFetcher} will delegate the exception to this handler only when
  * - remaining retries < max retries
  * - exception is an IOException
+ *
+ * @since 3.1.0
  */
-
+@Evolving
 public interface ErrorHandler {
 
   boolean shouldRetryError(Throwable t);
@@ -44,6 +48,8 @@ default boolean shouldLogError(Throwable t) {
 
   /**
    * The error handler for pushing shuffle blocks to remote shuffle services.
+   *
+   * @since 3.1.0
    */
   class BlockPushErrorHandler implements ErrorHandler {
     /**
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java
index 321b25305c504..688ee1c01b343 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java
@@ -68,7 +68,7 @@ public ExternalBlockHandler(TransportConf conf, File registeredExecutorFile)
     throws IOException {
     this(new OneForOneStreamManager(),
       new ExternalShuffleBlockResolver(conf, registeredExecutorFile),
-      new NoOpMergedShuffleFileManager());
+      new NoOpMergedShuffleFileManager(conf));
   }
 
   public ExternalBlockHandler(
@@ -89,7 +89,7 @@ public ExternalShuffleBlockResolver getBlockResolver() {
   public ExternalBlockHandler(
       OneForOneStreamManager streamManager,
       ExternalShuffleBlockResolver blockManager) {
-    this(streamManager, blockManager, new NoOpMergedShuffleFileManager());
+    this(streamManager, blockManager, new NoOpMergedShuffleFileManager(null));
   }
 
   /** Enables mocking out the StreamManager, BlockManager, and MergeManager. */
@@ -175,7 +175,7 @@ protected void handleMessage(
         RegisterExecutor msg = (RegisterExecutor) msgObj;
         checkAuth(client, msg.appId);
         blockManager.registerExecutor(msg.appId, msg.execId, msg.executorInfo);
-        mergeManager.registerExecutor(msg.appId, msg.executorInfo.localDirs);
+        mergeManager.registerExecutor(msg.appId, msg.executorInfo);
         callback.onSuccess(ByteBuffer.wrap(new byte[0]));
       } finally {
         responseDelayContext.stop();
@@ -232,6 +232,7 @@ public StreamManager getStreamManager() {
    */
   public void applicationRemoved(String appId, boolean cleanupLocalDirs) {
     blockManager.applicationRemoved(appId, cleanupLocalDirs);
+    mergeManager.applicationRemoved(appId, cleanupLocalDirs);
   }
 
   /**
@@ -430,8 +431,15 @@ public ManagedBuffer next() {
   /**
    * Dummy implementation of merged shuffle file manager. Suitable for when push-based shuffle
    * is not enabled.
+   *
+   * @since 3.1.0
    */
-  private static class NoOpMergedShuffleFileManager implements MergedShuffleFileManager {
+  public static class NoOpMergedShuffleFileManager implements MergedShuffleFileManager {
+
+    // This constructor is needed because we use this constructor to instantiate an implementation
+    // of MergedShuffleFileManager using reflection.
+    // See YarnShuffleService#newMergedShuffleFileManagerInstance.
+    public NoOpMergedShuffleFileManager(TransportConf transportConf) {}
 
     @Override
     public StreamCallbackWithID receiveBlockDataAsStream(PushBlockStream msg) {
@@ -444,18 +452,13 @@ public MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) throws IOExc
     }
 
     @Override
-    public void registerApplication(String appId, String user) {
-      // No-op. Do nothing.
-    }
-
-    @Override
-    public void registerExecutor(String appId, String[] localDirs) {
+    public void registerExecutor(String appId, ExecutorShuffleInfo executorInfo) {
       // No-Op. Do nothing.
     }
 
     @Override
     public void applicationRemoved(String appId, boolean cleanupLocalDirs) {
-      throw new UnsupportedOperationException("Cannot handle shuffle block merge");
+      // No-Op. Do nothing.
     }
 
     @Override
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedBlockMeta.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedBlockMeta.java
index e9d9e53495469..5541b7460ac96 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedBlockMeta.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedBlockMeta.java
@@ -34,6 +34,8 @@
  * 1. Number of chunks in a merged shuffle block.
  * 2. Bitmaps for each chunk in the merged block. A chunk bitmap contains all the mapIds that were
  *    merged to that merged block chunk.
+ *
+ * @since 3.1.0
  */
 public class MergedBlockMeta {
   private final int numChunks;
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedShuffleFileManager.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedShuffleFileManager.java
index ef4dbb2bd0059..4ce6a478ffb61 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedShuffleFileManager.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedShuffleFileManager.java
@@ -19,13 +19,14 @@
 
 import java.io.IOException;
 
+import org.apache.spark.annotation.Evolving;
 import org.apache.spark.network.buffer.ManagedBuffer;
 import org.apache.spark.network.client.StreamCallbackWithID;
+import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
 import org.apache.spark.network.shuffle.protocol.FinalizeShuffleMerge;
 import org.apache.spark.network.shuffle.protocol.MergeStatuses;
 import org.apache.spark.network.shuffle.protocol.PushBlockStream;
 
-
 /**
  * The MergedShuffleFileManager is used to process push based shuffle when enabled. It works
  * along side {@link ExternalBlockHandler} and serves as an RPCHandler for
@@ -33,7 +34,10 @@
  * remotely pushed streams of shuffle blocks to merge them into merged shuffle files. Right
  * now, support for push based shuffle is only implemented for external shuffle service in
  * YARN mode.
+ *
+ * @since 3.1.0
  */
+@Evolving
 public interface MergedShuffleFileManager {
   /**
    * Provides the stream callback used to process a remotely pushed block. The callback is
@@ -56,25 +60,15 @@ public interface MergedShuffleFileManager {
   MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) throws IOException;
 
   /**
-   * Registers an application when it starts. It also stores the username which is necessary
-   * for generating the host local directories for merged shuffle files.
-   * Right now, this is invoked by YarnShuffleService.
-   *
-   * @param appId application ID
-   * @param user username
-   */
-  void registerApplication(String appId, String user);
-
-  /**
-   * Registers an executor with its local dir list when it starts. This provides the specific path
-   * so MergedShuffleFileManager knows where to store and look for shuffle data for a
-   * given application. It is invoked by the RPC call when executor tries to register with the
-   * local shuffle service.
+   * Registers an executor with MergedShuffleFileManager. This executor-info provides
+   * the directories and number of sub-dirs per dir so that MergedShuffleFileManager knows where to
+   * store and look for shuffle data for a given application. It is invoked by the RPC call when
+   * executor tries to register with the local shuffle service.
    *
    * @param appId application ID
-   * @param localDirs The list of local dirs that this executor gets granted from NodeManager
+   * @param executorInfo The list of local dirs that this executor gets granted from NodeManager
    */
-  void registerExecutor(String appId, String[] localDirs);
+  void registerExecutor(String appId, ExecutorShuffleInfo executorInfo);
 
   /**
    * Invoked when an application finishes. This cleans up any remaining metadata associated with
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockPusher.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockPusher.java
index 407b248170a46..6ee95ef0dea01 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockPusher.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockPusher.java
@@ -35,10 +35,13 @@
  * be merged instead of for fetching them from remote shuffle services. This is used by
  * ShuffleWriter when the block push process is initiated. The supplied BlockFetchingListener
  * is used to handle the success or failure in pushing each blocks.
+ *
+ * @since 3.1.0
  */
 public class OneForOneBlockPusher {
   private static final Logger logger = LoggerFactory.getLogger(OneForOneBlockPusher.class);
   private static final ErrorHandler PUSH_ERROR_HANDLER = new ErrorHandler.BlockPushErrorHandler();
+  public static final String SHUFFLE_PUSH_BLOCK_PREFIX = "shufflePush";
 
   private final TransportClient client;
   private final String appId;
@@ -115,7 +118,13 @@ public void start() {
     for (int i = 0; i < blockIds.length; i++) {
       assert buffers.containsKey(blockIds[i]) : "Could not find the block buffer for block "
         + blockIds[i];
-      ByteBuffer header = new PushBlockStream(appId, blockIds[i], i).toByteBuffer();
+      String[] blockIdParts = blockIds[i].split("_");
+      if (blockIdParts.length != 4 || !blockIdParts[0].equals(SHUFFLE_PUSH_BLOCK_PREFIX)) {
+        throw new IllegalArgumentException(
+          "Unexpected shuffle push block id format: " + blockIds[i]);
+      }
+      ByteBuffer header = new PushBlockStream(appId, Integer.parseInt(blockIdParts[1]),
+        Integer.parseInt(blockIdParts[2]), Integer.parseInt(blockIdParts[3]) , i).toByteBuffer();
       client.uploadStream(new NioManagedBuffer(header), buffers.get(blockIds[i]),
         new BlockPushCallback(i, blockIds[i]));
     }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java
new file mode 100644
index 0000000000000..76abb05c99bb4
--- /dev/null
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java
@@ -0,0 +1,934 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Executor;
+import java.util.concurrent.Executors;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Objects;
+import com.google.common.base.Preconditions;
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+import com.google.common.cache.Weigher;
+import com.google.common.collect.Maps;
+import org.roaringbitmap.RoaringBitmap;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.buffer.FileSegmentManagedBuffer;
+import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.client.StreamCallbackWithID;
+import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
+import org.apache.spark.network.shuffle.protocol.FinalizeShuffleMerge;
+import org.apache.spark.network.shuffle.protocol.MergeStatuses;
+import org.apache.spark.network.shuffle.protocol.PushBlockStream;
+import org.apache.spark.network.util.JavaUtils;
+import org.apache.spark.network.util.NettyUtils;
+import org.apache.spark.network.util.TransportConf;
+
+/**
+ * An implementation of {@link MergedShuffleFileManager} that provides the most essential shuffle
+ * service processing logic to support push based shuffle.
+ *
+ * @since 3.1.0
+ */
+public class RemoteBlockPushResolver implements MergedShuffleFileManager {
+
+  private static final Logger logger = LoggerFactory.getLogger(RemoteBlockPushResolver.class);
+  @VisibleForTesting
+  static final String MERGE_MANAGER_DIR = "merge_manager";
+
+  private final ConcurrentMap<String, AppPathsInfo> appsPathInfo;
+  private final ConcurrentMap<AppShuffleId, Map<Integer, AppShufflePartitionInfo>> partitions;
+
+  private final Executor directoryCleaner;
+  private final TransportConf conf;
+  private final int minChunkSize;
+  private final ErrorHandler.BlockPushErrorHandler errorHandler;
+
+  @SuppressWarnings("UnstableApiUsage")
+  private final LoadingCache<File, ShuffleIndexInformation> indexCache;
+
+  @SuppressWarnings("UnstableApiUsage")
+  public RemoteBlockPushResolver(TransportConf conf) {
+    this.conf = conf;
+    this.partitions = Maps.newConcurrentMap();
+    this.appsPathInfo = Maps.newConcurrentMap();
+    this.directoryCleaner = Executors.newSingleThreadExecutor(
+      // Add `spark` prefix because it will run in NM in Yarn mode.
+      NettyUtils.createThreadFactory("spark-shuffle-merged-shuffle-directory-cleaner"));
+    this.minChunkSize = conf.minChunkSizeInMergedShuffleFile();
+    CacheLoader<File, ShuffleIndexInformation> indexCacheLoader =
+      new CacheLoader<File, ShuffleIndexInformation>() {
+        public ShuffleIndexInformation load(File file) throws IOException {
+          return new ShuffleIndexInformation(file);
+        }
+      };
+    indexCache = CacheBuilder.newBuilder()
+      .maximumWeight(conf.mergedIndexCacheSize())
+      .weigher((Weigher<File, ShuffleIndexInformation>) (file, indexInfo) -> indexInfo.getSize())
+      .build(indexCacheLoader);
+    this.errorHandler = new ErrorHandler.BlockPushErrorHandler();
+  }
+
+  /**
+   * Given the appShuffleId and reduceId that uniquely identifies a given shuffle partition of an
+   * application, retrieves the associated metadata. If not present and the corresponding merged
+   * shuffle does not exist, initializes the metadata.
+   */
+  private AppShufflePartitionInfo getOrCreateAppShufflePartitionInfo(
+      AppShuffleId appShuffleId,
+      int reduceId) {
+    File dataFile = getMergedShuffleDataFile(appShuffleId, reduceId);
+    if (!partitions.containsKey(appShuffleId) && dataFile.exists()) {
+      // If this partition is already finalized then the partitions map will not contain
+      // the appShuffleId but the data file would exist. In that case the block is considered late.
+      return null;
+    }
+    Map<Integer, AppShufflePartitionInfo> shufflePartitions =
+      partitions.computeIfAbsent(appShuffleId, id -> Maps.newConcurrentMap());
+    return shufflePartitions.computeIfAbsent(reduceId, key -> {
+      // It only gets here when the key is not present in the map. This could either
+      // be the first time the merge manager receives a pushed block for a given application
+      // shuffle partition, or after the merged shuffle file is finalized. We handle these
+      // two cases accordingly by checking if the file already exists.
+      File indexFile = getMergedShuffleIndexFile(appShuffleId, reduceId);
+      File metaFile = getMergedShuffleMetaFile(appShuffleId, reduceId);
+      try {
+        if (dataFile.exists()) {
+          return null;
+        } else {
+          return new AppShufflePartitionInfo(appShuffleId, reduceId, dataFile, indexFile, metaFile);
+        }
+      } catch (IOException e) {
+        logger.error(
+          "Cannot create merged shuffle partition with data file {}, index file {}, and "
+            + "meta file {}", dataFile.getAbsolutePath(),
+            indexFile.getAbsolutePath(), metaFile.getAbsolutePath());
+        throw new RuntimeException(
+          String.format("Cannot initialize merged shuffle partition for appId %s shuffleId %s "
+          + "reduceId %s", appShuffleId.appId, appShuffleId.shuffleId, reduceId), e);
+      }
+    });
+  }
+
+  @Override
+  public MergedBlockMeta getMergedBlockMeta(String appId, int shuffleId, int reduceId) {
+    AppShuffleId appShuffleId = new AppShuffleId(appId, shuffleId);
+    File indexFile = getMergedShuffleIndexFile(appShuffleId, reduceId);
+    if (!indexFile.exists()) {
+      throw new RuntimeException(String.format(
+        "Merged shuffle index file %s not found", indexFile.getPath()));
+    }
+    int size = (int) indexFile.length();
+    // First entry is the zero offset
+    int numChunks = (size / Long.BYTES) - 1;
+    File metaFile = getMergedShuffleMetaFile(appShuffleId, reduceId);
+    if (!metaFile.exists()) {
+      throw new RuntimeException(String.format("Merged shuffle meta file %s not found",
+        metaFile.getPath()));
+    }
+    FileSegmentManagedBuffer chunkBitMaps =
+      new FileSegmentManagedBuffer(conf, metaFile, 0L, metaFile.length());
+    logger.trace(
+      "{} shuffleId {} reduceId {} num chunks {}", appId, shuffleId, reduceId, numChunks);
+    return new MergedBlockMeta(numChunks, chunkBitMaps);
+  }
+
+  @SuppressWarnings("UnstableApiUsage")
+  @Override
+  public ManagedBuffer getMergedBlockData(String appId, int shuffleId, int reduceId, int chunkId) {
+    AppShuffleId appShuffleId = new AppShuffleId(appId, shuffleId);
+    File dataFile = getMergedShuffleDataFile(appShuffleId, reduceId);
+    if (!dataFile.exists()) {
+      throw new RuntimeException(String.format("Merged shuffle data file %s not found",
+        dataFile.getPath()));
+    }
+    File indexFile = getMergedShuffleIndexFile(appShuffleId, reduceId);
+    try {
+      // If we get here, the merged shuffle file should have been properly finalized. Thus we can
+      // use the file length to determine the size of the merged shuffle block.
+      ShuffleIndexInformation shuffleIndexInformation = indexCache.get(indexFile);
+      ShuffleIndexRecord shuffleIndexRecord = shuffleIndexInformation.getIndex(chunkId);
+      return new FileSegmentManagedBuffer(
+        conf, dataFile, shuffleIndexRecord.getOffset(), shuffleIndexRecord.getLength());
+    } catch (ExecutionException e) {
+      throw new RuntimeException(String.format(
+        "Failed to open merged shuffle index file %s", indexFile.getPath()), e);
+    }
+  }
+
+  /**
+   * The logic here is consistent with
+   * org.apache.spark.storage.DiskBlockManager#getMergedShuffleFile
+   */
+  private File getFile(String appId, String filename) {
+    // TODO: [SPARK-33236] Change the message when this service is able to handle NM restart
+    AppPathsInfo appPathsInfo = Preconditions.checkNotNull(appsPathInfo.get(appId),
+      "application " + appId + " is not registered or NM was restarted.");
+    File targetFile = ExecutorDiskUtils.getFile(appPathsInfo.activeLocalDirs,
+      appPathsInfo.subDirsPerLocalDir, filename);
+    logger.debug("Get merged file {}", targetFile.getAbsolutePath());
+    return targetFile;
+  }
+
+  private File getMergedShuffleDataFile(AppShuffleId appShuffleId, int reduceId) {
+    String fileName = String.format("%s.data", generateFileName(appShuffleId, reduceId));
+    return getFile(appShuffleId.appId, fileName);
+  }
+
+  private File getMergedShuffleIndexFile(AppShuffleId appShuffleId, int reduceId) {
+    String indexName = String.format("%s.index", generateFileName(appShuffleId, reduceId));
+    return getFile(appShuffleId.appId, indexName);
+  }
+
+  private File getMergedShuffleMetaFile(AppShuffleId appShuffleId, int reduceId) {
+    String metaName = String.format("%s.meta", generateFileName(appShuffleId, reduceId));
+    return getFile(appShuffleId.appId, metaName);
+  }
+
+  @Override
+  public String[] getMergedBlockDirs(String appId) {
+    AppPathsInfo appPathsInfo = Preconditions.checkNotNull(appsPathInfo.get(appId),
+      "application " + appId + " is not registered or NM was restarted.");
+    String[] activeLocalDirs = Preconditions.checkNotNull(appPathsInfo.activeLocalDirs,
+      "application " + appId
+      + " active local dirs list has not been updated by any executor registration");
+    return activeLocalDirs;
+  }
+
+  @Override
+  public void applicationRemoved(String appId, boolean cleanupLocalDirs) {
+    logger.info("Application {} removed, cleanupLocalDirs = {}", appId, cleanupLocalDirs);
+    // TODO: [SPARK-33236] Change the message when this service is able to handle NM restart
+    AppPathsInfo appPathsInfo = Preconditions.checkNotNull(appsPathInfo.remove(appId),
+      "application " + appId + " is not registered or NM was restarted.");
+    Iterator<Map.Entry<AppShuffleId, Map<Integer, AppShufflePartitionInfo>>> iterator =
+      partitions.entrySet().iterator();
+    while (iterator.hasNext()) {
+      Map.Entry<AppShuffleId, Map<Integer, AppShufflePartitionInfo>> entry = iterator.next();
+      AppShuffleId appShuffleId = entry.getKey();
+      if (appId.equals(appShuffleId.appId)) {
+        iterator.remove();
+        for (AppShufflePartitionInfo partitionInfo : entry.getValue().values()) {
+          partitionInfo.closeAllFiles();
+        }
+      }
+    }
+    if (cleanupLocalDirs) {
+      Path[] dirs = Arrays.stream(appPathsInfo.activeLocalDirs)
+        .map(dir -> Paths.get(dir)).toArray(Path[]::new);
+      directoryCleaner.execute(() -> deleteExecutorDirs(dirs));
+    }
+  }
+
+  /**
+   * Serially delete local dirs, executed in a separate thread.
+   */
+  @VisibleForTesting
+  void deleteExecutorDirs(Path[] dirs) {
+    for (Path localDir : dirs) {
+      try {
+        if (Files.exists(localDir)) {
+          JavaUtils.deleteRecursively(localDir.toFile());
+          logger.debug("Successfully cleaned up directory: {}", localDir);
+        }
+      } catch (Exception e) {
+        logger.error("Failed to delete directory: {}", localDir, e);
+      }
+    }
+  }
+
+  @Override
+  public StreamCallbackWithID receiveBlockDataAsStream(PushBlockStream msg) {
+    // Retrieve merged shuffle file metadata
+    AppShuffleId appShuffleId = new AppShuffleId(msg.appId, msg.shuffleId);
+    AppShufflePartitionInfo partitionInfoBeforeCheck =
+      getOrCreateAppShufflePartitionInfo(appShuffleId, msg.reduceId);
+    // Here partitionInfo will be null in 2 cases:
+    // 1) The request is received for a block that has already been merged, this is possible due
+    // to the retry logic.
+    // 2) The request is received after the merged shuffle is finalized, thus is too late.
+    //
+    // For case 1, we will drain the data in the channel and just respond success
+    // to the client. This is required because the response of the previously merged
+    // block will be ignored by the client, per the logic in RetryingBlockFetcher.
+    // Note that the netty server should receive data for a given block id only from 1 channel
+    // at any time. The block should be pushed only from successful maps, thus there should be
+    // only 1 source for a given block at any time. Although the netty client might retry sending
+    // this block to the server multiple times, the data of the same block always arrives from the
+    // same channel thus the server should have already processed the previous request of this
+    // block before seeing it again in the channel. This guarantees that we can simply just
+    // check the bitmap to determine if a block is a duplicate or not.
+    //
+    // For case 2, we will also drain the data in the channel, but throw an exception in
+    // {@link org.apache.spark.network.client.StreamCallback#onComplete(String)}. This way,
+    // the client will be notified of the failure but the channel will remain active. Keeping
+    // the channel alive is important because the same channel could be reused by multiple map
+    // tasks in the executor JVM, which belongs to different stages. While one of the shuffles
+    // in these stages is finalized, the others might still be active. Tearing down the channel
+    // on the server side will disrupt these other on-going shuffle merges. It's also important
+    // to notify the client of the failure, so that it can properly halt pushing the remaining
+    // blocks upon receiving such failures to preserve resources on the server/client side.
+    //
+    // Speculative execution would also raise a possible scenario with duplicate blocks. Although
+    // speculative execution would kill the slower task attempt, leading to only 1 task attempt
+    // succeeding in the end, there is no guarantee that only one copy of the block will be
+    // pushed. This is due to our handling of block push process outside of the map task, thus
+    // it is possible for the speculative task attempt to initiate the block push process before
+    // getting killed. When this happens, we need to distinguish the duplicate blocks as they
+    // arrive. More details on this is explained in later comments.
+
+    // Track if the block is received after shuffle merge finalize
+    final boolean isTooLate = partitionInfoBeforeCheck == null;
+    // Check if the given block is already merged by checking the bitmap against the given map index
+    final AppShufflePartitionInfo partitionInfo = partitionInfoBeforeCheck != null
+      && partitionInfoBeforeCheck.mapTracker.contains(msg.mapIndex) ? null
+        : partitionInfoBeforeCheck;
+    final String streamId = String.format("%s_%d_%d_%d",
+      OneForOneBlockPusher.SHUFFLE_PUSH_BLOCK_PREFIX, appShuffleId.shuffleId, msg.mapIndex,
+      msg.reduceId);
+    if (partitionInfo != null) {
+      return new PushBlockStreamCallback(this, streamId, partitionInfo, msg.mapIndex);
+    } else {
+      // For a duplicate block or a block which is late, respond back with a callback that handles
+      // them differently.
+      return new StreamCallbackWithID() {
+        @Override
+        public String getID() {
+          return streamId;
+        }
+
+        @Override
+        public void onData(String streamId, ByteBuffer buf) {
+          // Ignore the requests. It reaches here either when a request is received after the
+          // shuffle file is finalized or when a request is for a duplicate block.
+        }
+
+        @Override
+        public void onComplete(String streamId) {
+          if (isTooLate) {
+            // Throw an exception here so the block data is drained from channel and server
+            // responds RpcFailure to the client.
+            throw new RuntimeException(String.format("Block %s %s", streamId,
+              ErrorHandler.BlockPushErrorHandler.TOO_LATE_MESSAGE_SUFFIX));
+          }
+          // For duplicate block that is received before the shuffle merge finalizes, the
+          // server should respond success to the client.
+        }
+
+        @Override
+        public void onFailure(String streamId, Throwable cause) {
+        }
+      };
+    }
+  }
+
+  @SuppressWarnings("SynchronizationOnLocalVariableOrMethodParameter")
+  @Override
+  public MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) throws IOException {
+    logger.info("Finalizing shuffle {} from Application {}.", msg.shuffleId, msg.appId);
+    AppShuffleId appShuffleId = new AppShuffleId(msg.appId, msg.shuffleId);
+    Map<Integer, AppShufflePartitionInfo> shufflePartitions = partitions.get(appShuffleId);
+    MergeStatuses mergeStatuses;
+    if (shufflePartitions == null || shufflePartitions.isEmpty()) {
+      mergeStatuses =
+        new MergeStatuses(msg.shuffleId, new RoaringBitmap[0], new int[0], new long[0]);
+    } else {
+      Collection<AppShufflePartitionInfo> partitionsToFinalize = shufflePartitions.values();
+      int totalPartitions = partitionsToFinalize.size();
+      RoaringBitmap[] bitmaps = new RoaringBitmap[totalPartitions];
+      int[] reduceIds = new int[totalPartitions];
+      long[] sizes = new long[totalPartitions];
+      Iterator<AppShufflePartitionInfo> partitionsIter = partitionsToFinalize.iterator();
+      int idx = 0;
+      while (partitionsIter.hasNext()) {
+        AppShufflePartitionInfo partition = partitionsIter.next();
+        synchronized (partition) {
+          // Get rid of any partial block data at the end of the file. This could either
+          // be due to failure or a request still being processed when the shuffle
+          // merge gets finalized.
+          try {
+            partition.dataChannel.truncate(partition.getPosition());
+            if (partition.getPosition() != partition.getLastChunkOffset()) {
+              partition.updateChunkInfo(partition.getPosition(), partition.lastMergedMapIndex);
+            }
+            bitmaps[idx] = partition.mapTracker;
+            reduceIds[idx] = partition.reduceId;
+            sizes[idx++] = partition.getPosition();
+          } catch (IOException ioe) {
+            logger.warn("Exception while finalizing shuffle partition {} {} {}", msg.appId,
+              msg.shuffleId, partition.reduceId, ioe);
+          } finally {
+            partition.closeAllFiles();
+            // The partition should be removed after the files are written so that any new stream
+            // for the same reduce partition will see that the data file exists.
+            partitionsIter.remove();
+          }
+        }
+      }
+      mergeStatuses = new MergeStatuses(msg.shuffleId, bitmaps, reduceIds, sizes);
+    }
+    partitions.remove(appShuffleId);
+    logger.info("Finalized shuffle {} from Application {}.", msg.shuffleId, msg.appId);
+    return mergeStatuses;
+  }
+
+  @Override
+  public void registerExecutor(String appId, ExecutorShuffleInfo executorInfo) {
+    if (logger.isDebugEnabled()) {
+      logger.debug("register executor with RemoteBlockPushResolver {} local-dirs {} "
+        + "num sub-dirs {}", appId, Arrays.toString(executorInfo.localDirs),
+          executorInfo.subDirsPerLocalDir);
+    }
+    appsPathInfo.computeIfAbsent(appId, id -> new AppPathsInfo(appId, executorInfo.localDirs,
+      executorInfo.subDirsPerLocalDir));
+  }
+  private static String generateFileName(AppShuffleId appShuffleId, int reduceId) {
+    return String.format("mergedShuffle_%s_%d_%d", appShuffleId.appId, appShuffleId.shuffleId,
+      reduceId);
+  }
+
+  /**
+   * Callback for push stream that handles blocks which are not already merged.
+   */
+  static class PushBlockStreamCallback implements StreamCallbackWithID {
+
+    private final RemoteBlockPushResolver mergeManager;
+    private final String streamId;
+    private final int mapIndex;
+    private final AppShufflePartitionInfo partitionInfo;
+    private int length = 0;
+    // This indicates that this stream got the opportunity to write the blocks to the merged file.
+    // Once this is set to true and the stream encounters a failure then it will take necessary
+    // action to overwrite any partial written data. This is reset to false when the stream
+    // completes without any failures.
+    private boolean isWriting = false;
+    // Use on-heap instead of direct ByteBuffer since these buffers will be GC'ed very quickly
+    private List<ByteBuffer> deferredBufs;
+
+    private PushBlockStreamCallback(
+        RemoteBlockPushResolver mergeManager,
+        String streamId,
+        AppShufflePartitionInfo partitionInfo,
+        int mapIndex) {
+      this.mergeManager = Preconditions.checkNotNull(mergeManager);
+      this.streamId = streamId;
+      this.partitionInfo = Preconditions.checkNotNull(partitionInfo);
+      this.mapIndex = mapIndex;
+    }
+
+    @Override
+    public String getID() {
+      return streamId;
+    }
+
+    /**
+     * Write a ByteBuffer to the merged shuffle file. Here we keep track of the length of the
+     * block data written to file. In case of failure during writing block to file, we use the
+     * information tracked in partitionInfo to overwrite the corrupt block when writing the new
+     * block.
+     */
+    private void writeBuf(ByteBuffer buf) throws IOException {
+      while (buf.hasRemaining()) {
+        if (partitionInfo.isEncounteredFailure()) {
+          long updatedPos = partitionInfo.getPosition() + length;
+          logger.debug(
+            "{} shuffleId {} reduceId {} encountered failure current pos {} updated pos {}",
+            partitionInfo.appShuffleId.appId, partitionInfo.appShuffleId.shuffleId,
+            partitionInfo.reduceId, partitionInfo.getPosition(), updatedPos);
+          length += partitionInfo.dataChannel.write(buf, updatedPos);
+        } else {
+          length += partitionInfo.dataChannel.write(buf);
+        }
+      }
+    }
+
+    /**
+     * There will be multiple streams of map blocks belonging to the same reduce partition. At any
+     * given point of time, only a single map stream can write its data to the merged file. Until
+     * this stream is completed, the other streams defer writing. This prevents corruption of
+     * merged data. This returns whether this stream is the active stream that can write to the
+     * merged file.
+     */
+    private boolean allowedToWrite() {
+      return partitionInfo.getCurrentMapIndex() < 0
+        || partitionInfo.getCurrentMapIndex() == mapIndex;
+    }
+
+    /**
+     * Returns if this is a duplicate block generated by speculative tasks. With speculative
+     * tasks, we could receive the same block from 2 different sources at the same time. One of
+     * them is going to be the first to set the currentMapIndex. When that block does so, it's
+     * going to see the currentMapIndex initially as -1. After it sets the currentMapIndex, it's
+     * going to write some data to disk, thus increasing the length counter. The other duplicate
+     * block is going to see the currentMapIndex already set to its mapIndex. However, it hasn't
+     * written any data yet. If the first block gets written completely and resets the
+     * currentMapIndex to -1 before the processing for the second block finishes, we can just
+     * check the bitmap to identify the second as a duplicate.
+     */
+    private boolean isDuplicateBlock() {
+      return (partitionInfo.getCurrentMapIndex() == mapIndex && length == 0)
+        || partitionInfo.mapTracker.contains(mapIndex);
+    }
+
+    /**
+     * This is only invoked when the stream is able to write. The stream first writes any deferred
+     * block parts buffered in memory.
+     */
+    private void writeAnyDeferredBufs() throws IOException {
+      if (deferredBufs != null && !deferredBufs.isEmpty()) {
+        for (ByteBuffer deferredBuf : deferredBufs) {
+          writeBuf(deferredBuf);
+        }
+        deferredBufs = null;
+      }
+    }
+
+    @Override
+    public void onData(String streamId, ByteBuffer buf) throws IOException {
+      // When handling the block data using StreamInterceptor, it can help to reduce the amount
+      // of data that needs to be buffered in memory since it does not wait till the completion
+      // of the frame before handling the message, thus releasing the ByteBuf earlier. However,
+      // this also means it would chunk a block into multiple buffers. Here, we want to preserve
+      // the benefit of handling the block data using StreamInterceptor as much as possible while
+      // providing the guarantee that one block would be continuously written to the merged
+      // shuffle file before the next block starts. For each shuffle partition, we would track
+      // the current map index to make sure only block matching the map index can be written to
+      // disk. If one server thread sees the block being handled is the current block, it would
+      // directly write the block to disk. Otherwise, it would buffer the block chunks in memory.
+      // If the block becomes the current block before we see the end of it, we would then dump
+      // all buffered block data to disk and write the remaining portions of the block directly
+      // to disk as well. This way, we avoid having to buffer the entirety of every blocks in
+      // memory, while still providing the necessary guarantee.
+      synchronized (partitionInfo) {
+        Map<Integer, AppShufflePartitionInfo> shufflePartitions =
+          mergeManager.partitions.get(partitionInfo.appShuffleId);
+        // If the partitionInfo corresponding to (appId, shuffleId, reduceId) is no longer present
+        // then it means that the shuffle merge has already been finalized. We should thus ignore
+        // the data and just drain the remaining bytes of this message. This check should be
+        // placed inside the synchronized block to make sure that checking the key is still
+        // present and processing the data is atomic.
+        if (shufflePartitions == null || !shufflePartitions.containsKey(partitionInfo.reduceId)) {
+          deferredBufs = null;
+          return;
+        }
+        // Check whether we can write to disk
+        if (allowedToWrite()) {
+          isWriting = true;
+          // Identify duplicate block generated by speculative tasks. We respond success to
+          // the client in cases of duplicate even though no data is written.
+          if (isDuplicateBlock()) {
+            deferredBufs = null;
+            return;
+          }
+          logger.trace("{} shuffleId {} reduceId {} onData writable",
+            partitionInfo.appShuffleId.appId, partitionInfo.appShuffleId.shuffleId,
+            partitionInfo.reduceId);
+          if (partitionInfo.getCurrentMapIndex() < 0) {
+            partitionInfo.setCurrentMapIndex(mapIndex);
+          }
+
+          // If we got here, it's safe to write the block data to the merged shuffle file. We
+          // first write any deferred block.
+          writeAnyDeferredBufs();
+          writeBuf(buf);
+          // If we got here, it means we successfully write the current chunk of block to merged
+          // shuffle file. If we encountered failure while writing the previous block, we should
+          // reset the file channel position and the status of partitionInfo to indicate that we
+          // have recovered from previous disk write failure. However, we do not update the
+          // position tracked by partitionInfo here. That is only updated while the entire block
+          // is successfully written to merged shuffle file.
+          if (partitionInfo.isEncounteredFailure()) {
+            partitionInfo.dataChannel.position(partitionInfo.getPosition() + length);
+            partitionInfo.setEncounteredFailure(false);
+          }
+        } else {
+          logger.trace("{} shuffleId {} reduceId {} onData deferred",
+            partitionInfo.appShuffleId.appId, partitionInfo.appShuffleId.shuffleId,
+            partitionInfo.reduceId);
+          // If we cannot write to disk, we buffer the current block chunk in memory so it could
+          // potentially be written to disk later. We take our best effort without guarantee
+          // that the block will be written to disk. If the block data is divided into multiple
+          // chunks during TCP transportation, each #onData invocation is an attempt to write
+          // the block to disk. If the block is still not written to disk after all #onData
+          // invocations, the final #onComplete invocation is the last attempt to write the
+          // block to disk. If we still couldn't write this block to disk after this, we give up
+          // on this block push request and respond failure to client. We could potentially
+          // buffer the block longer or wait for a few iterations inside #onData or #onComplete
+          // to increase the chance of writing the block to disk, however this would incur more
+          // memory footprint or decrease the server processing throughput for the shuffle
+          // service. In addition, during test we observed that by randomizing the order in
+          // which clients sends block push requests batches, only ~0.5% blocks failed to be
+          // written to disk due to this reason. We thus decide to optimize for server
+          // throughput and memory usage.
+          if (deferredBufs == null) {
+            deferredBufs = new LinkedList<>();
+          }
+          // Write the buffer to the in-memory deferred cache. Since buf is a slice of a larger
+          // byte buffer, we cache only the relevant bytes not the entire large buffer to save
+          // memory.
+          ByteBuffer deferredBuf = ByteBuffer.allocate(buf.remaining());
+          deferredBuf.put(buf);
+          deferredBuf.flip();
+          deferredBufs.add(deferredBuf);
+        }
+      }
+    }
+
+    @Override
+    public void onComplete(String streamId) throws IOException {
+      synchronized (partitionInfo) {
+        logger.trace("{} shuffleId {} reduceId {} onComplete invoked",
+          partitionInfo.appShuffleId.appId, partitionInfo.appShuffleId.shuffleId,
+          partitionInfo.reduceId);
+        Map<Integer, AppShufflePartitionInfo> shufflePartitions =
+          mergeManager.partitions.get(partitionInfo.appShuffleId);
+        // When this request initially got to the server, the shuffle merge finalize request
+        // was not received yet. By the time we finish reading this message, the shuffle merge
+        // however is already finalized. We should thus respond RpcFailure to the client.
+        if (shufflePartitions == null || !shufflePartitions.containsKey(partitionInfo.reduceId)) {
+          deferredBufs = null;
+          throw new RuntimeException(String.format("Block %s %s", streamId,
+            ErrorHandler.BlockPushErrorHandler.TOO_LATE_MESSAGE_SUFFIX));
+        }
+        // Check if we can commit this block
+        if (allowedToWrite()) {
+          isWriting = true;
+          // Identify duplicate block generated by speculative tasks. We respond success to
+          // the client in cases of duplicate even though no data is written.
+          if (isDuplicateBlock()) {
+            deferredBufs = null;
+            return;
+          }
+          if (partitionInfo.getCurrentMapIndex() < 0) {
+            writeAnyDeferredBufs();
+          }
+          long updatedPos = partitionInfo.getPosition() + length;
+          boolean indexUpdated = false;
+          if (updatedPos - partitionInfo.getLastChunkOffset() >= mergeManager.minChunkSize) {
+            partitionInfo.updateChunkInfo(updatedPos, mapIndex);
+            indexUpdated = true;
+          }
+          partitionInfo.setPosition(updatedPos);
+          partitionInfo.setCurrentMapIndex(-1);
+
+          // update merged results
+          partitionInfo.blockMerged(mapIndex);
+          if (indexUpdated) {
+            partitionInfo.resetChunkTracker();
+          }
+        } else {
+          deferredBufs = null;
+          throw new RuntimeException(String.format("%s %s to merged shuffle",
+            ErrorHandler.BlockPushErrorHandler.BLOCK_APPEND_COLLISION_DETECTED_MSG_PREFIX,
+            streamId));
+        }
+      }
+      isWriting = false;
+    }
+
+    @Override
+    public void onFailure(String streamId, Throwable throwable) throws IOException {
+      if (mergeManager.errorHandler.shouldLogError(throwable)) {
+        logger.error("Encountered issue when merging {}", streamId, throwable);
+      } else {
+        logger.debug("Encountered issue when merging {}", streamId, throwable);
+      }
+      // Only update partitionInfo if the failure corresponds to a valid request. If the
+      // request is too late, i.e. received after shuffle merge finalize, #onFailure will
+      // also be triggered, and we can just ignore. Also, if we couldn't find an opportunity
+      // to write the block data to disk, we should also ignore here.
+      if (isWriting) {
+        synchronized (partitionInfo) {
+          Map<Integer, AppShufflePartitionInfo> shufflePartitions =
+            mergeManager.partitions.get(partitionInfo.appShuffleId);
+          if (shufflePartitions != null && shufflePartitions.containsKey(partitionInfo.reduceId)) {
+            logger.debug("{} shuffleId {} reduceId {} set encountered failure",
+              partitionInfo.appShuffleId.appId, partitionInfo.appShuffleId.shuffleId,
+              partitionInfo.reduceId);
+            partitionInfo.setCurrentMapIndex(-1);
+            partitionInfo.setEncounteredFailure(true);
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * ID that uniquely identifies a shuffle for an application. This is used as a key in
+   * {@link #partitions}.
+   */
+  public static class AppShuffleId {
+    public final String appId;
+    public final int shuffleId;
+
+    AppShuffleId(String appId, int shuffleId) {
+      this.appId = appId;
+      this.shuffleId = shuffleId;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+      AppShuffleId that = (AppShuffleId) o;
+      return shuffleId == that.shuffleId && Objects.equal(appId, that.appId);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hashCode(appId, shuffleId);
+    }
+
+    @Override
+    public String toString() {
+      return Objects.toStringHelper(this)
+        .add("appId", appId)
+        .add("shuffleId", shuffleId)
+        .toString();
+    }
+  }
+
+  /** Metadata tracked for an actively merged shuffle partition */
+  public static class AppShufflePartitionInfo {
+
+    private final AppShuffleId appShuffleId;
+    private final int reduceId;
+    // The merged shuffle data file channel
+    public FileChannel dataChannel;
+    // Location offset of the last successfully merged block for this shuffle partition
+    private long position;
+    // Indicating whether failure was encountered when merging the previous block
+    private boolean encounteredFailure;
+    // Track the map index whose block is being merged for this shuffle partition
+    private int currentMapIndex;
+    // Bitmap tracking which mapper's blocks have been merged for this shuffle partition
+    private RoaringBitmap mapTracker;
+    // The index file for a particular merged shuffle contains the chunk offsets.
+    private RandomAccessFile indexFile;
+    // The meta file for a particular merged shuffle contains all the map indices that belong to
+    // every chunk. The entry per chunk is a serialized bitmap.
+    private RandomAccessFile metaFile;
+    // The offset for the last chunk tracked in the index file for this shuffle partition
+    private long lastChunkOffset;
+    private int lastMergedMapIndex = -1;
+    // Bitmap tracking which mapper's blocks are in the current shuffle chunk
+    private RoaringBitmap chunkTracker;
+
+    AppShufflePartitionInfo(
+        AppShuffleId appShuffleId,
+        int reduceId,
+        File dataFile,
+        File indexFile,
+        File metaFile) throws IOException {
+      this.appShuffleId = Preconditions.checkNotNull(appShuffleId, "app shuffle id");
+      this.reduceId = reduceId;
+      this.dataChannel = new FileOutputStream(dataFile).getChannel();
+      this.indexFile = new RandomAccessFile(indexFile, "rw");
+      this.metaFile = new RandomAccessFile(metaFile, "rw");
+      this.currentMapIndex = -1;
+      // Writing 0 offset so that we can reuse ShuffleIndexInformation.getIndex()
+      updateChunkInfo(0L, -1);
+      this.position = 0;
+      this.encounteredFailure = false;
+      this.mapTracker = new RoaringBitmap();
+      this.chunkTracker = new RoaringBitmap();
+    }
+
+    public long getPosition() {
+      return position;
+    }
+
+    public void setPosition(long position) {
+      logger.trace("{} shuffleId {} reduceId {} current pos {} update pos {}", appShuffleId.appId,
+        appShuffleId.shuffleId, reduceId, this.position, position);
+      this.position = position;
+    }
+
+    boolean isEncounteredFailure() {
+      return encounteredFailure;
+    }
+
+    void setEncounteredFailure(boolean encounteredFailure) {
+      this.encounteredFailure = encounteredFailure;
+    }
+
+    int getCurrentMapIndex() {
+      return currentMapIndex;
+    }
+
+    void setCurrentMapIndex(int mapIndex) {
+      logger.trace("{} shuffleId {} reduceId {} updated mapIndex {} current mapIndex {}",
+        appShuffleId.appId, appShuffleId.shuffleId, reduceId, currentMapIndex, mapIndex);
+      this.currentMapIndex = mapIndex;
+    }
+
+    long getLastChunkOffset() {
+      return lastChunkOffset;
+    }
+
+    void blockMerged(int mapIndex) {
+      logger.debug("{} shuffleId {} reduceId {} updated merging mapIndex {}", appShuffleId.appId,
+        appShuffleId.shuffleId, reduceId, mapIndex);
+      mapTracker.add(mapIndex);
+      chunkTracker.add(mapIndex);
+      lastMergedMapIndex = mapIndex;
+    }
+
+    void resetChunkTracker() {
+      chunkTracker.clear();
+    }
+
+    /**
+     * Appends the chunk offset to the index file and adds the map index to the chunk tracker.
+     *
+     * @param chunkOffset the offset of the chunk in the data file.
+     * @param mapIndex the map index to be added to chunk tracker.
+     */
+    void updateChunkInfo(long chunkOffset, int mapIndex) throws IOException {
+      long idxStartPos = -1;
+      try {
+        // update the chunk tracker to meta file before index file
+        writeChunkTracker(mapIndex);
+        idxStartPos = indexFile.getFilePointer();
+        logger.trace("{} shuffleId {} reduceId {} updated index current {} updated {}",
+          appShuffleId.appId, appShuffleId.shuffleId, reduceId, this.lastChunkOffset,
+          chunkOffset);
+        indexFile.writeLong(chunkOffset);
+      } catch (IOException ioe) {
+        if (idxStartPos != -1) {
+          // reset the position to avoid corrupting index files during exception.
+          logger.warn("{} shuffleId {} reduceId {} reset index to position {}",
+            appShuffleId.appId, appShuffleId.shuffleId, reduceId, idxStartPos);
+          indexFile.seek(idxStartPos);
+        }
+        throw ioe;
+      }
+      this.lastChunkOffset = chunkOffset;
+    }
+
+    private void writeChunkTracker(int mapIndex) throws IOException {
+      if (mapIndex == -1) {
+        return;
+      }
+      chunkTracker.add(mapIndex);
+      long metaStartPos = metaFile.getFilePointer();
+      try {
+        logger.trace("{} shuffleId {} reduceId {} mapIndex {} write chunk to meta file",
+          appShuffleId.appId, appShuffleId.shuffleId, reduceId, mapIndex);
+        chunkTracker.serialize(metaFile);
+      } catch (IOException ioe) {
+        logger.warn("{} shuffleId {} reduceId {} mapIndex {} reset position of meta file to {}",
+          appShuffleId.appId, appShuffleId.shuffleId, reduceId, mapIndex, metaStartPos);
+        metaFile.seek(metaStartPos);
+        throw ioe;
+      }
+    }
+
+    void closeAllFiles() {
+      if (dataChannel != null) {
+        try {
+          dataChannel.close();
+        } catch (IOException ioe) {
+          logger.warn("Error closing data channel for {} shuffleId {} reduceId {}",
+            appShuffleId.appId, appShuffleId.shuffleId, reduceId);
+        } finally {
+          dataChannel = null;
+        }
+      }
+      if (metaFile != null) {
+        try {
+          // if the stream is closed, channel get's closed as well.
+          metaFile.close();
+        } catch (IOException ioe) {
+          logger.warn("Error closing meta file for {} shuffleId {} reduceId {}",
+            appShuffleId.appId, appShuffleId.shuffleId, reduceId);
+        } finally {
+          metaFile = null;
+        }
+      }
+      if (indexFile != null) {
+        try {
+          indexFile.close();
+        } catch (IOException ioe) {
+          logger.warn("Error closing index file for {} shuffleId {} reduceId {}",
+            appShuffleId.appId, appShuffleId.shuffleId, reduceId);
+        } finally {
+          indexFile = null;
+        }
+      }
+    }
+
+    @Override
+    protected void finalize() throws Throwable {
+      closeAllFiles();
+    }
+  }
+
+  /**
+   * Wraps all the information related to the merge directory of an application.
+   */
+  private static class AppPathsInfo {
+
+    private final String[] activeLocalDirs;
+    private final int subDirsPerLocalDir;
+
+    private AppPathsInfo(
+        String appId,
+        String[] localDirs,
+        int subDirsPerLocalDir) {
+      activeLocalDirs = Arrays.stream(localDirs)
+        .map(localDir ->
+          // Merge directory is created at the same level as block-manager directory. The list of
+          // local directories that we get from executorShuffleInfo are paths of each
+          // block-manager directory. To find out the merge directory location, we first find the
+          // parent dir and then append the "merger_manager" directory to it.
+          Paths.get(localDir).getParent().resolve(MERGE_MANAGER_DIR).toFile().getPath())
+        .toArray(String[]::new);
+      this.subDirsPerLocalDir = subDirsPerLocalDir;
+      if (logger.isInfoEnabled()) {
+        logger.info("Updated active local dirs {} and sub dirs {} for application {}",
+          Arrays.toString(activeLocalDirs),subDirsPerLocalDir, appId);
+      }
+    }
+  }
+}
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FinalizeShuffleMerge.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FinalizeShuffleMerge.java
index 9058575df57ef..8427837d1525b 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FinalizeShuffleMerge.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FinalizeShuffleMerge.java
@@ -25,6 +25,8 @@
 /**
  * Request to finalize merge for a given shuffle.
  * Returns {@link MergeStatuses}
+ *
+ * @since 3.1.0
  */
 public class FinalizeShuffleMerge extends BlockTransferMessage {
   public final String appId;
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/MergeStatuses.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/MergeStatuses.java
index f57e8b326e5e2..d506d9eb2b784 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/MergeStatuses.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/MergeStatuses.java
@@ -32,6 +32,8 @@
  * the set of mapper partition blocks that are merged for a given reducer partition, an array
  * of reducer IDs, and an array of merged shuffle partition sizes. The 3 arrays list information
  * about all the reducer partitions merged by the ExternalShuffleService in the same order.
+ *
+ * @since 3.1.0
  */
 public class MergeStatuses extends BlockTransferMessage {
   /** Shuffle ID **/
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/PushBlockStream.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/PushBlockStream.java
index 7eab5a644783c..83fc7b23ac444 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/PushBlockStream.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/PushBlockStream.java
@@ -23,23 +23,27 @@
 import org.apache.spark.network.protocol.Encoders;
 
 // Needed by ScalaDoc. See SPARK-7726
-import static org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type;
-
 
 /**
  * Request to push a block to a remote shuffle service to be merged in push based shuffle.
  * The remote shuffle service will also include this message when responding the push requests.
+ *
+ * @since 3.1.0
  */
 public class PushBlockStream extends BlockTransferMessage {
   public final String appId;
-  public final String blockId;
+  public final int shuffleId;
+  public final int mapIndex;
+  public final int reduceId;
   // Similar to the chunkIndex in StreamChunkId, indicating the index of a block in a batch of
   // blocks to be pushed.
   public final int index;
 
-  public PushBlockStream(String appId, String blockId, int index) {
+  public PushBlockStream(String appId, int shuffleId, int mapIndex, int reduceId, int index) {
     this.appId = appId;
-    this.blockId = blockId;
+    this.shuffleId = shuffleId;
+    this.mapIndex = mapIndex;
+    this.reduceId = reduceId;
     this.index = index;
   }
 
@@ -50,14 +54,16 @@ protected Type type() {
 
   @Override
   public int hashCode() {
-    return Objects.hashCode(appId, blockId, index);
+    return Objects.hashCode(appId, shuffleId, mapIndex , reduceId, index);
   }
 
   @Override
   public String toString() {
     return Objects.toStringHelper(this)
       .add("appId", appId)
-      .add("blockId", blockId)
+      .add("shuffleId", shuffleId)
+      .add("mapIndex", mapIndex)
+      .add("reduceId", reduceId)
       .add("index", index)
       .toString();
   }
@@ -67,7 +73,9 @@ public boolean equals(Object other) {
     if (other != null && other instanceof PushBlockStream) {
       PushBlockStream o = (PushBlockStream) other;
       return Objects.equal(appId, o.appId)
-        && Objects.equal(blockId, o.blockId)
+        && shuffleId == o.shuffleId
+        && mapIndex == o.mapIndex
+        && reduceId == o.reduceId
         && index == o.index;
     }
     return false;
@@ -75,21 +83,24 @@ public boolean equals(Object other) {
 
   @Override
   public int encodedLength() {
-    return Encoders.Strings.encodedLength(appId)
-      + Encoders.Strings.encodedLength(blockId) + 4;
+    return Encoders.Strings.encodedLength(appId) + 16;
   }
 
   @Override
   public void encode(ByteBuf buf) {
     Encoders.Strings.encode(buf, appId);
-    Encoders.Strings.encode(buf, blockId);
+    buf.writeInt(shuffleId);
+    buf.writeInt(mapIndex);
+    buf.writeInt(reduceId);
     buf.writeInt(index);
   }
 
   public static PushBlockStream decode(ByteBuf buf) {
     String appId = Encoders.Strings.decode(buf);
-    String blockId = Encoders.Strings.decode(buf);
+    int shuffleId = buf.readInt();
+    int mapIdx = buf.readInt();
+    int reduceId = buf.readInt();
     int index = buf.readInt();
-    return new PushBlockStream(appId, blockId, index);
+    return new PushBlockStream(appId, shuffleId, mapIdx, reduceId, index);
   }
 }
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java
index 680b8d74a2eea..f06e7cb047f1a 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java
@@ -77,7 +77,7 @@ public void testRegisterExecutor() {
     ByteBuffer registerMessage = new RegisterExecutor("app0", "exec1", config).toByteBuffer();
     handler.receive(client, registerMessage, callback);
     verify(blockResolver, times(1)).registerExecutor("app0", "exec1", config);
-    verify(mergedShuffleManager, times(1)).registerExecutor("app0", localDirs);
+    verify(mergedShuffleManager, times(1)).registerExecutor("app0", config);
 
     verify(callback, times(1)).onSuccess(any(ByteBuffer.class));
     verify(callback, never()).onFailure(any(Throwable.class));
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockPusherSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockPusherSuite.java
index ebcdba72aa1a8..46a0f6cf420eb 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockPusherSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockPusherSuite.java
@@ -45,77 +45,77 @@ public class OneForOneBlockPusherSuite {
   @Test
   public void testPushOne() {
     LinkedHashMap<String, ManagedBuffer> blocks = Maps.newLinkedHashMap();
-    blocks.put("shuffle_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[1])));
+    blocks.put("shufflePush_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[1])));
     String[] blockIds = blocks.keySet().toArray(new String[blocks.size()]);
 
     BlockFetchingListener listener = pushBlocks(
       blocks,
       blockIds,
-      Arrays.asList(new PushBlockStream("app-id", "shuffle_0_0_0", 0)));
+      Arrays.asList(new PushBlockStream("app-id", 0, 0, 0, 0)));
 
-    verify(listener).onBlockFetchSuccess(eq("shuffle_0_0_0"), any());
+    verify(listener).onBlockFetchSuccess(eq("shufflePush_0_0_0"), any());
   }
 
   @Test
   public void testPushThree() {
     LinkedHashMap<String, ManagedBuffer> blocks = Maps.newLinkedHashMap();
-    blocks.put("b0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
-    blocks.put("b1", new NioManagedBuffer(ByteBuffer.wrap(new byte[23])));
-    blocks.put("b2", new NettyManagedBuffer(Unpooled.wrappedBuffer(new byte[23])));
+    blocks.put("shufflePush_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
+    blocks.put("shufflePush_0_1_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[23])));
+    blocks.put("shufflePush_0_2_0", new NettyManagedBuffer(Unpooled.wrappedBuffer(new byte[23])));
     String[] blockIds = blocks.keySet().toArray(new String[blocks.size()]);
 
     BlockFetchingListener listener = pushBlocks(
       blocks,
       blockIds,
-      Arrays.asList(new PushBlockStream("app-id", "b0", 0),
-        new PushBlockStream("app-id", "b1", 1),
-        new PushBlockStream("app-id", "b2", 2)));
+      Arrays.asList(new PushBlockStream("app-id", 0, 0, 0, 0),
+        new PushBlockStream("app-id", 0, 1, 0, 1),
+        new PushBlockStream("app-id", 0, 2, 0, 2)));
 
-    for (int i = 0; i < 3; i ++) {
-      verify(listener, times(1)).onBlockFetchSuccess(eq("b" + i), any());
-    }
+    verify(listener, times(1)).onBlockFetchSuccess(eq("shufflePush_0_0_0"), any());
+    verify(listener, times(1)).onBlockFetchSuccess(eq("shufflePush_0_1_0"), any());
+    verify(listener, times(1)).onBlockFetchSuccess(eq("shufflePush_0_2_0"), any());
   }
 
   @Test
   public void testServerFailures() {
     LinkedHashMap<String, ManagedBuffer> blocks = Maps.newLinkedHashMap();
-    blocks.put("b0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
-    blocks.put("b1", new NioManagedBuffer(ByteBuffer.wrap(new byte[0])));
-    blocks.put("b2", new NioManagedBuffer(ByteBuffer.wrap(new byte[0])));
+    blocks.put("shufflePush_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
+    blocks.put("shufflePush_0_1_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[0])));
+    blocks.put("shufflePush_0_2_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[0])));
     String[] blockIds = blocks.keySet().toArray(new String[blocks.size()]);
 
     BlockFetchingListener listener = pushBlocks(
       blocks,
       blockIds,
-      Arrays.asList(new PushBlockStream("app-id", "b0", 0),
-        new PushBlockStream("app-id", "b1", 1),
-        new PushBlockStream("app-id", "b2", 2)));
+      Arrays.asList(new PushBlockStream("app-id", 0, 0, 0, 0),
+        new PushBlockStream("app-id", 0, 1, 0, 1),
+        new PushBlockStream("app-id", 0, 2, 0, 2)));
 
-    verify(listener, times(1)).onBlockFetchSuccess(eq("b0"), any());
-    verify(listener, times(1)).onBlockFetchFailure(eq("b1"), any());
-    verify(listener, times(1)).onBlockFetchFailure(eq("b2"), any());
+    verify(listener, times(1)).onBlockFetchSuccess(eq("shufflePush_0_0_0"), any());
+    verify(listener, times(1)).onBlockFetchFailure(eq("shufflePush_0_1_0"), any());
+    verify(listener, times(1)).onBlockFetchFailure(eq("shufflePush_0_2_0"), any());
   }
 
   @Test
   public void testHandlingRetriableFailures() {
     LinkedHashMap<String, ManagedBuffer> blocks = Maps.newLinkedHashMap();
-    blocks.put("b0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
-    blocks.put("b1", null);
-    blocks.put("b2", new NioManagedBuffer(ByteBuffer.wrap(new byte[0])));
+    blocks.put("shufflePush_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
+    blocks.put("shufflePush_0_1_0", null);
+    blocks.put("shufflePush_0_2_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[0])));
     String[] blockIds = blocks.keySet().toArray(new String[blocks.size()]);
 
     BlockFetchingListener listener = pushBlocks(
       blocks,
       blockIds,
-      Arrays.asList(new PushBlockStream("app-id", "b0", 0),
-        new PushBlockStream("app-id", "b1", 1),
-        new PushBlockStream("app-id", "b2", 2)));
-
-    verify(listener, times(1)).onBlockFetchSuccess(eq("b0"), any());
-    verify(listener, times(0)).onBlockFetchSuccess(not(eq("b0")), any());
-    verify(listener, times(0)).onBlockFetchFailure(eq("b0"), any());
-    verify(listener, times(1)).onBlockFetchFailure(eq("b1"), any());
-    verify(listener, times(2)).onBlockFetchFailure(eq("b2"), any());
+      Arrays.asList(new PushBlockStream("app-id", 0, 0, 0, 0),
+        new PushBlockStream("app-id", 0, 1, 0, 1),
+        new PushBlockStream("app-id", 0, 2, 0, 2)));
+
+    verify(listener, times(1)).onBlockFetchSuccess(eq("shufflePush_0_0_0"), any());
+    verify(listener, times(0)).onBlockFetchSuccess(not(eq("shufflePush_0_0_0")), any());
+    verify(listener, times(0)).onBlockFetchFailure(eq("shufflePush_0_0_0"), any());
+    verify(listener, times(1)).onBlockFetchFailure(eq("shufflePush_0_1_0"), any());
+    verify(listener, times(2)).onBlockFetchFailure(eq("shufflePush_0_2_0"), any());
   }
 
   /**
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java
new file mode 100644
index 0000000000000..0f200dc721963
--- /dev/null
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java
@@ -0,0 +1,496 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.concurrent.Semaphore;
+
+import com.google.common.base.Throwables;
+import com.google.common.collect.ImmutableMap;
+
+import org.apache.commons.io.FileUtils;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+import org.roaringbitmap.RoaringBitmap;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.junit.Assert.*;
+
+import org.apache.spark.network.buffer.FileSegmentManagedBuffer;
+import org.apache.spark.network.client.StreamCallbackWithID;
+import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
+import org.apache.spark.network.shuffle.protocol.FinalizeShuffleMerge;
+import org.apache.spark.network.shuffle.protocol.MergeStatuses;
+import org.apache.spark.network.shuffle.protocol.PushBlockStream;
+import org.apache.spark.network.util.MapConfigProvider;
+import org.apache.spark.network.util.TransportConf;
+
+/**
+ * Tests for {@link RemoteBlockPushResolver}.
+ */
+public class RemoteBlockPushResolverSuite {
+
+  private static final Logger log = LoggerFactory.getLogger(RemoteBlockPushResolverSuite.class);
+  private final String TEST_APP = "testApp";
+  private final String BLOCK_MANAGER_DIR = "blockmgr-193d8401";
+
+  private TransportConf conf;
+  private RemoteBlockPushResolver pushResolver;
+  private Path[] localDirs;
+
+  @Before
+  public void before() throws IOException {
+    localDirs = createLocalDirs(2);
+    MapConfigProvider provider = new MapConfigProvider(
+      ImmutableMap.of("spark.shuffle.server.minChunkSizeInMergedShuffleFile", "4"));
+    conf = new TransportConf("shuffle", provider);
+    pushResolver = new RemoteBlockPushResolver(conf);
+    registerExecutor(TEST_APP, prepareLocalDirs(localDirs));
+  }
+
+  @After
+  public void after() {
+    try {
+      for (Path local : localDirs) {
+        FileUtils.deleteDirectory(local.toFile());
+      }
+      removeApplication(TEST_APP);
+    } catch (Exception e) {
+      // don't fail if clean up doesn't succeed.
+      log.debug("Error while tearing down", e);
+    }
+  }
+
+  @Test(expected = RuntimeException.class)
+  public void testNoIndexFile() {
+    try {
+      pushResolver.getMergedBlockMeta(TEST_APP, 0, 0);
+    } catch (Throwable t) {
+      assertTrue(t.getMessage().startsWith("Merged shuffle index file"));
+      Throwables.propagate(t);
+    }
+  }
+
+  @Test
+  public void testBasicBlockMerge() throws IOException {
+    PushBlock[] pushBlocks = new PushBlock[] {
+      new PushBlock(0, 0, 0, ByteBuffer.wrap(new byte[4])),
+      new PushBlock(0, 1, 0, ByteBuffer.wrap(new byte[5]))
+    };
+    pushBlockHelper(TEST_APP, pushBlocks);
+    MergeStatuses statuses = pushResolver.finalizeShuffleMerge(
+      new FinalizeShuffleMerge(TEST_APP, 0));
+    validateMergeStatuses(statuses, new int[] {0}, new long[] {9});
+    MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0);
+    validateChunks(TEST_APP, 0, 0, blockMeta, new int[]{4, 5}, new int[][]{{0}, {1}});
+  }
+
+  @Test
+  public void testDividingMergedBlocksIntoChunks() throws IOException {
+    PushBlock[] pushBlocks = new PushBlock[] {
+      new PushBlock(0, 0, 0, ByteBuffer.wrap(new byte[2])),
+      new PushBlock(0, 1, 0, ByteBuffer.wrap(new byte[3])),
+      new PushBlock(0, 2, 0, ByteBuffer.wrap(new byte[5])),
+      new PushBlock(0, 3, 0, ByteBuffer.wrap(new byte[3]))
+    };
+    pushBlockHelper(TEST_APP, pushBlocks);
+    MergeStatuses statuses = pushResolver.finalizeShuffleMerge(
+      new FinalizeShuffleMerge(TEST_APP, 0));
+    validateMergeStatuses(statuses, new int[] {0}, new long[] {13});
+    MergedBlockMeta meta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0);
+    validateChunks(TEST_APP, 0, 0, meta, new int[]{5, 5, 3}, new int[][]{{0, 1}, {2}, {3}});
+  }
+
+  @Test
+  public void testFinalizeWithMultipleReducePartitions() throws IOException {
+    PushBlock[] pushBlocks = new PushBlock[] {
+      new PushBlock(0, 0, 0, ByteBuffer.wrap(new byte[2])),
+      new PushBlock(0, 1, 0, ByteBuffer.wrap(new byte[3])),
+      new PushBlock(0, 0, 1, ByteBuffer.wrap(new byte[5])),
+      new PushBlock(0, 1, 1, ByteBuffer.wrap(new byte[3]))
+    };
+    pushBlockHelper(TEST_APP, pushBlocks);
+    MergeStatuses statuses = pushResolver.finalizeShuffleMerge(
+      new FinalizeShuffleMerge(TEST_APP, 0));
+    validateMergeStatuses(statuses, new int[] {0, 1}, new long[] {5, 8});
+    MergedBlockMeta meta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0);
+    validateChunks(TEST_APP, 0, 0, meta, new int[]{5}, new int[][]{{0, 1}});
+  }
+
+  @Test
+  public void testDeferredBufsAreWrittenDuringOnData() throws IOException {
+    StreamCallbackWithID stream1 =
+      pushResolver.receiveBlockDataAsStream(new PushBlockStream(TEST_APP, 0, 0, 0, 0));
+    stream1.onData(stream1.getID(), ByteBuffer.wrap(new byte[2]));
+    StreamCallbackWithID stream2 =
+      pushResolver.receiveBlockDataAsStream(new PushBlockStream(TEST_APP, 0, 1, 0, 0));
+    // This should be deferred
+    stream2.onData(stream2.getID(), ByteBuffer.wrap(new byte[3]));
+    // stream 1 now completes
+    stream1.onData(stream1.getID(), ByteBuffer.wrap(new byte[2]));
+    stream1.onComplete(stream1.getID());
+    // stream 2 has more data and then completes
+    stream2.onData(stream2.getID(), ByteBuffer.wrap(new byte[3]));
+    stream2.onComplete(stream2.getID());
+    pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(TEST_APP, 0));
+    MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0);
+    validateChunks(TEST_APP, 0, 0, blockMeta, new int[]{4, 6}, new int[][]{{0}, {1}});
+  }
+
+  @Test
+  public void testDeferredBufsAreWrittenDuringOnComplete() throws IOException {
+    StreamCallbackWithID stream1 =
+      pushResolver.receiveBlockDataAsStream(new PushBlockStream(TEST_APP, 0, 0, 0, 0));
+    stream1.onData(stream1.getID(), ByteBuffer.wrap(new byte[2]));
+    StreamCallbackWithID stream2 =
+      pushResolver.receiveBlockDataAsStream(new PushBlockStream(TEST_APP, 0, 1, 0, 0));
+    // This should be deferred
+    stream2.onData(stream2.getID(), ByteBuffer.wrap(new byte[3]));
+    stream2.onData(stream2.getID(), ByteBuffer.wrap(new byte[3]));
+    // stream 1 now completes
+    stream1.onData(stream1.getID(), ByteBuffer.wrap(new byte[2]));
+    stream1.onComplete(stream1.getID());
+    // stream 2 now completes completes
+    stream2.onComplete(stream2.getID());
+    pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(TEST_APP, 0));
+    MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0);
+    validateChunks(TEST_APP, 0, 0, blockMeta, new int[]{4, 6}, new int[][]{{0}, {1}});
+  }
+
+  @Test
+  public void testDuplicateBlocksAreIgnoredWhenPrevStreamHasCompleted() throws IOException {
+    StreamCallbackWithID stream1 =
+      pushResolver.receiveBlockDataAsStream(new PushBlockStream(TEST_APP, 0, 0, 0, 0));
+    stream1.onData(stream1.getID(), ByteBuffer.wrap(new byte[2]));
+    stream1.onData(stream1.getID(), ByteBuffer.wrap(new byte[2]));
+    stream1.onComplete(stream1.getID());
+    StreamCallbackWithID stream2 =
+      pushResolver.receiveBlockDataAsStream(new PushBlockStream(TEST_APP, 0, 0, 0, 0));
+    // This should be ignored
+    stream2.onData(stream2.getID(), ByteBuffer.wrap(new byte[2]));
+    stream2.onData(stream2.getID(), ByteBuffer.wrap(new byte[2]));
+    stream2.onComplete(stream2.getID());
+    pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(TEST_APP, 0));
+    MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0);
+    validateChunks(TEST_APP, 0, 0, blockMeta, new int[]{4}, new int[][]{{0}});
+  }
+
+  @Test
+  public void testDuplicateBlocksAreIgnoredWhenPrevStreamIsInProgress() throws IOException {
+    StreamCallbackWithID stream1 =
+      pushResolver.receiveBlockDataAsStream(new PushBlockStream(TEST_APP, 0, 0, 0, 0));
+    stream1.onData(stream1.getID(), ByteBuffer.wrap(new byte[2]));
+    StreamCallbackWithID stream2 =
+      pushResolver.receiveBlockDataAsStream(new PushBlockStream(TEST_APP, 0, 0, 0, 0));
+    // This should be ignored
+    stream2.onData(stream2.getID(), ByteBuffer.wrap(new byte[2]));
+    stream2.onData(stream2.getID(), ByteBuffer.wrap(new byte[2]));
+    // stream 1 now completes
+    stream1.onData(stream1.getID(), ByteBuffer.wrap(new byte[2]));
+    stream1.onComplete(stream1.getID());
+    // stream 2 now completes completes
+    stream2.onComplete(stream2.getID());
+    pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(TEST_APP, 0));
+    MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0);
+    validateChunks(TEST_APP, 0, 0, blockMeta, new int[]{4}, new int[][]{{0}});
+  }
+
+  @Test
+  public void testFailureAfterData() throws IOException {
+    StreamCallbackWithID stream =
+      pushResolver.receiveBlockDataAsStream(new PushBlockStream(TEST_APP, 0, 0, 0, 0));
+    stream.onData(stream.getID(), ByteBuffer.wrap(new byte[4]));
+    stream.onFailure(stream.getID(), new RuntimeException("Forced Failure"));
+    pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(TEST_APP, 0));
+    MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0);
+    assertEquals("num-chunks", 0, blockMeta.getNumChunks());
+  }
+
+  @Test
+  public void testFailureAfterMultipleDataBlocks() throws IOException {
+    StreamCallbackWithID stream =
+      pushResolver.receiveBlockDataAsStream(new PushBlockStream(TEST_APP, 0, 0, 0, 0));
+    stream.onData(stream.getID(), ByteBuffer.wrap(new byte[2]));
+    stream.onData(stream.getID(), ByteBuffer.wrap(new byte[3]));
+    stream.onData(stream.getID(), ByteBuffer.wrap(new byte[4]));
+    stream.onFailure(stream.getID(), new RuntimeException("Forced Failure"));
+    pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(TEST_APP, 0));
+    MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0);
+    assertEquals("num-chunks", 0, blockMeta.getNumChunks());
+  }
+
+  @Test
+  public void testFailureAfterComplete() throws IOException {
+    StreamCallbackWithID stream =
+      pushResolver.receiveBlockDataAsStream(new PushBlockStream(TEST_APP, 0, 0, 0, 0));
+    stream.onData(stream.getID(), ByteBuffer.wrap(new byte[2]));
+    stream.onData(stream.getID(), ByteBuffer.wrap(new byte[3]));
+    stream.onData(stream.getID(), ByteBuffer.wrap(new byte[4]));
+    stream.onComplete(stream.getID());
+    stream.onFailure(stream.getID(), new RuntimeException("Forced Failure"));
+    pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(TEST_APP, 0));
+    MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0);
+    validateChunks(TEST_APP, 0, 0, blockMeta, new int[]{9}, new int[][]{{0}});
+  }
+
+  @Test (expected = RuntimeException.class)
+  public void testTooLateArrival() throws IOException {
+    ByteBuffer[] blocks = new ByteBuffer[]{
+      ByteBuffer.wrap(new byte[4]),
+      ByteBuffer.wrap(new byte[5])
+    };
+    StreamCallbackWithID stream = pushResolver.receiveBlockDataAsStream(
+      new PushBlockStream(TEST_APP, 0, 0, 0, 0));
+    for (ByteBuffer block : blocks) {
+      stream.onData(stream.getID(), block);
+    }
+    stream.onComplete(stream.getID());
+    pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(TEST_APP, 0));
+    StreamCallbackWithID stream1 = pushResolver.receiveBlockDataAsStream(
+      new PushBlockStream(TEST_APP, 0, 1, 0, 0));
+    stream1.onData(stream1.getID(), ByteBuffer.wrap(new byte[4]));
+    try {
+      stream1.onComplete(stream1.getID());
+    } catch (RuntimeException re) {
+      assertEquals(
+        "Block shufflePush_0_1_0 received after merged shuffle is finalized",
+          re.getMessage());
+      MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0);
+      validateChunks(TEST_APP, 0, 0, blockMeta, new int[]{9}, new int[][]{{0}});
+      throw re;
+    }
+  }
+
+  @Test
+  public void testIncompleteStreamsAreOverwritten() throws IOException {
+    registerExecutor(TEST_APP, prepareLocalDirs(localDirs));
+    StreamCallbackWithID stream1 =
+      pushResolver.receiveBlockDataAsStream(new PushBlockStream(TEST_APP, 0, 0, 0, 0));
+    stream1.onData(stream1.getID(), ByteBuffer.wrap(new byte[4]));
+    // There is a failure
+    stream1.onFailure(stream1.getID(), new RuntimeException("forced error"));
+    StreamCallbackWithID stream2 =
+      pushResolver.receiveBlockDataAsStream(new PushBlockStream(TEST_APP, 0, 1, 0, 0));
+    stream2.onData(stream2.getID(), ByteBuffer.wrap(new byte[5]));
+    stream2.onComplete(stream2.getID());
+    pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(TEST_APP, 0));
+    MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0);
+    validateChunks(TEST_APP, 0, 0, blockMeta, new int[]{5}, new int[][]{{1}});
+  }
+
+  @Test (expected = RuntimeException.class)
+  public void testCollision() throws IOException {
+    StreamCallbackWithID stream1 =
+      pushResolver.receiveBlockDataAsStream(new PushBlockStream(TEST_APP, 0, 0, 0, 0));
+    stream1.onData(stream1.getID(), ByteBuffer.wrap(new byte[2]));
+    StreamCallbackWithID stream2 =
+      pushResolver.receiveBlockDataAsStream(new PushBlockStream(TEST_APP, 0, 1, 0, 0));
+    // This should be deferred
+    stream2.onData(stream2.getID(), ByteBuffer.wrap(new byte[5]));
+    // Since stream2 didn't get any opportunity it will throw couldn't find opportunity error
+    try {
+      stream2.onComplete(stream2.getID());
+    } catch (RuntimeException re) {
+      assertEquals(
+        "Couldn't find an opportunity to write block shufflePush_0_1_0 to merged shuffle",
+        re.getMessage());
+      throw re;
+    }
+  }
+
+  @Test (expected = RuntimeException.class)
+  public void testFailureInAStreamDoesNotInterfereWithStreamWhichIsWriting() throws IOException {
+    StreamCallbackWithID stream1 =
+      pushResolver.receiveBlockDataAsStream(new PushBlockStream(TEST_APP, 0, 0, 0, 0));
+    stream1.onData(stream1.getID(), ByteBuffer.wrap(new byte[2]));
+    StreamCallbackWithID stream2 =
+      pushResolver.receiveBlockDataAsStream(new PushBlockStream(TEST_APP, 0, 1, 0, 0));
+    // There is a failure with stream2
+    stream2.onFailure(stream2.getID(), new RuntimeException("forced error"));
+    StreamCallbackWithID stream3 =
+      pushResolver.receiveBlockDataAsStream(new PushBlockStream(TEST_APP, 0, 2, 0, 0));
+    // This should be deferred
+    stream3.onData(stream3.getID(), ByteBuffer.wrap(new byte[5]));
+    // Since this stream didn't get any opportunity it will throw couldn't find opportunity error
+    RuntimeException failedEx = null;
+    try {
+      stream3.onComplete(stream3.getID());
+    } catch (RuntimeException re) {
+      assertEquals(
+        "Couldn't find an opportunity to write block shufflePush_0_2_0 to merged shuffle",
+        re.getMessage());
+      failedEx = re;
+    }
+    // stream 1 now completes
+    stream1.onData(stream1.getID(), ByteBuffer.wrap(new byte[2]));
+    stream1.onComplete(stream1.getID());
+
+    pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(TEST_APP, 0));
+    MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0);
+    validateChunks(TEST_APP, 0, 0, blockMeta, new int[] {4}, new int[][] {{0}});
+    if (failedEx != null) {
+      throw failedEx;
+    }
+  }
+
+  @Test(expected = NullPointerException.class)
+  public void testUpdateLocalDirsOnlyOnce() throws IOException {
+    String testApp = "updateLocalDirsOnlyOnceTest";
+    Path[] activeLocalDirs = createLocalDirs(1);
+    registerExecutor(testApp, prepareLocalDirs(activeLocalDirs));
+    assertEquals(pushResolver.getMergedBlockDirs(testApp).length, 1);
+    assertTrue(pushResolver.getMergedBlockDirs(testApp)[0].contains(
+      activeLocalDirs[0].toFile().getPath()));
+    // Any later executor register from the same application should not change the active local
+    // dirs list
+    Path[] updatedLocalDirs = localDirs;
+    registerExecutor(testApp, prepareLocalDirs(updatedLocalDirs));
+    assertEquals(pushResolver.getMergedBlockDirs(testApp).length, 1);
+    assertTrue(pushResolver.getMergedBlockDirs(testApp)[0].contains(
+      activeLocalDirs[0].toFile().getPath()));
+    removeApplication(testApp);
+    try {
+      pushResolver.getMergedBlockDirs(testApp);
+    } catch (Throwable e) {
+      assertTrue(e.getMessage()
+        .startsWith("application " + testApp + " is not registered or NM was restarted."));
+      Throwables.propagate(e);
+    }
+  }
+
+  @Test
+  public void testCleanUpDirectory() throws IOException, InterruptedException {
+    String testApp = "cleanUpDirectory";
+    Semaphore deleted = new Semaphore(0);
+    pushResolver = new RemoteBlockPushResolver(conf) {
+      @Override
+      void deleteExecutorDirs(Path[] dirs) {
+        super.deleteExecutorDirs(dirs);
+        deleted.release();
+      }
+    };
+    Path[] activeDirs = createLocalDirs(1);
+    registerExecutor(testApp, prepareLocalDirs(activeDirs));
+    PushBlock[] pushBlocks = new PushBlock[] {
+      new PushBlock(0, 0, 0, ByteBuffer.wrap(new byte[4]))};
+    pushBlockHelper(testApp, pushBlocks);
+    pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(testApp, 0));
+    MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(testApp, 0, 0);
+    validateChunks(testApp, 0, 0, blockMeta, new int[]{4}, new int[][]{{0}});
+    String[] mergeDirs = pushResolver.getMergedBlockDirs(testApp);
+    pushResolver.applicationRemoved(testApp,  true);
+    // Since the cleanup happen in a different thread, check few times to see if the merge dirs gets
+    // deleted.
+    deleted.acquire();
+    for (String mergeDir : mergeDirs) {
+      Assert.assertFalse(Files.exists(Paths.get(mergeDir)));
+    }
+  }
+
+  private Path[] createLocalDirs(int numLocalDirs) throws IOException {
+    Path[] localDirs = new Path[numLocalDirs];
+    for (int i = 0; i < localDirs.length; i++) {
+      localDirs[i] = Files.createTempDirectory("shuffleMerge");
+      localDirs[i].toFile().deleteOnExit();
+    }
+    return localDirs;
+  }
+
+  private void registerExecutor(String appId, String[] localDirs) throws IOException {
+    ExecutorShuffleInfo shuffleInfo = new ExecutorShuffleInfo(localDirs, 1, "mergedShuffle");
+    pushResolver.registerExecutor(appId, shuffleInfo);
+  }
+
+  private String[] prepareLocalDirs(Path[] localDirs) throws IOException {
+    String[] blockMgrDirs = new String[localDirs.length];
+    for (int i = 0; i< localDirs.length; i++) {
+      Files.createDirectories(localDirs[i].resolve(
+        RemoteBlockPushResolver.MERGE_MANAGER_DIR + File.separator + "00"));
+      blockMgrDirs[i] = localDirs[i].toFile().getPath() + File.separator + BLOCK_MANAGER_DIR;
+    }
+    return blockMgrDirs;
+  }
+
+  private void removeApplication(String appId) {
+    // PushResolver cleans up the local dirs in a different thread which can conflict with the test
+    // data of other tests, since they are using the same Application Id.
+    pushResolver.applicationRemoved(appId,  false);
+  }
+
+  private void validateMergeStatuses(
+      MergeStatuses mergeStatuses,
+      int[] expectedReduceIds,
+      long[] expectedSizes) {
+    assertArrayEquals(expectedReduceIds, mergeStatuses.reduceIds);
+    assertArrayEquals(expectedSizes, mergeStatuses.sizes);
+  }
+
+  private void validateChunks(
+      String appId,
+      int shuffleId,
+      int reduceId,
+      MergedBlockMeta meta,
+      int[] expectedSizes,
+      int[][] expectedMapsPerChunk) throws IOException {
+    assertEquals("num chunks", expectedSizes.length, meta.getNumChunks());
+    RoaringBitmap[] bitmaps = meta.readChunkBitmaps();
+    assertEquals("num of bitmaps", meta.getNumChunks(), bitmaps.length);
+    for (int i = 0; i < meta.getNumChunks(); i++) {
+      RoaringBitmap chunkBitmap = bitmaps[i];
+      Arrays.stream(expectedMapsPerChunk[i]).forEach(x -> assertTrue(chunkBitmap.contains(x)));
+    }
+    for (int i = 0; i < meta.getNumChunks(); i++) {
+      FileSegmentManagedBuffer mb =
+        (FileSegmentManagedBuffer) pushResolver.getMergedBlockData(appId, shuffleId, reduceId, i);
+      assertEquals(expectedSizes[i], mb.getLength());
+    }
+  }
+
+  private void pushBlockHelper(
+      String appId,
+      PushBlock[] blocks) throws IOException {
+    for (int i = 0; i < blocks.length; i++) {
+      StreamCallbackWithID stream = pushResolver.receiveBlockDataAsStream(
+        new PushBlockStream(appId, blocks[i].shuffleId, blocks[i].mapIndex, blocks[i].reduceId, 0));
+      stream.onData(stream.getID(), blocks[i].buffer);
+      stream.onComplete(stream.getID());
+    }
+  }
+
+  private static class PushBlock {
+    private final int shuffleId;
+    private final int mapIndex;
+    private final int reduceId;
+    private final ByteBuffer buffer;
+    PushBlock(int shuffleId, int mapIndex, int reduceId, ByteBuffer buffer) {
+      this.shuffleId = shuffleId;
+      this.mapIndex = mapIndex;
+      this.reduceId = reduceId;
+      this.buffer = buffer;
+    }
+  }
+}
diff --git a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
index 3d14318bf90f0..548a5ccd1385b 100644
--- a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
+++ b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
@@ -41,6 +41,7 @@
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.server.api.*;
+import org.apache.spark.network.shuffle.MergedShuffleFileManager;
 import org.apache.spark.network.util.LevelDBProvider;
 import org.iq80.leveldb.DB;
 import org.iq80.leveldb.DBIterator;
@@ -172,7 +173,10 @@ protected void serviceInit(Configuration conf) throws Exception {
       }
 
       TransportConf transportConf = new TransportConf("shuffle", new HadoopConfigProvider(conf));
-      blockHandler = new ExternalBlockHandler(transportConf, registeredExecutorFile);
+      MergedShuffleFileManager shuffleMergeManager = newMergedShuffleFileManagerInstance(
+        transportConf);
+      blockHandler = new ExternalBlockHandler(
+        transportConf, registeredExecutorFile, shuffleMergeManager);
 
       // If authentication is enabled, set up the shuffle server to use a
       // special RPC handler that filters out unauthenticated fetch requests
@@ -219,6 +223,23 @@ protected void serviceInit(Configuration conf) throws Exception {
     }
   }
 
+  @VisibleForTesting
+  static MergedShuffleFileManager newMergedShuffleFileManagerInstance(TransportConf conf) {
+    String mergeManagerImplClassName = conf.mergedShuffleFileManagerImpl();
+    try {
+      Class<?> mergeManagerImplClazz = Class.forName(
+        mergeManagerImplClassName, true, Thread.currentThread().getContextClassLoader());
+      Class<? extends MergedShuffleFileManager> mergeManagerSubClazz =
+        mergeManagerImplClazz.asSubclass(MergedShuffleFileManager.class);
+      // The assumption is that all the custom implementations just like the RemoteBlockPushResolver
+      // will also need the transport configuration.
+      return mergeManagerSubClazz.getConstructor(TransportConf.class).newInstance(conf);
+    } catch (Exception e) {
+      logger.error("Unable to create an instance of {}", mergeManagerImplClassName);
+      return new ExternalBlockHandler.NoOpMergedShuffleFileManager(conf);
+    }
+  }
+
   private void loadSecretsFromDb() throws IOException {
     secretsFile = initRecoveryDb(SECRETS_RECOVERY_FILE_NAME);
 
diff --git a/common/network-yarn/src/test/java/org/apache/spark/network/yarn/YarnShuffleServiceSuite.java b/common/network-yarn/src/test/java/org/apache/spark/network/yarn/YarnShuffleServiceSuite.java
new file mode 100644
index 0000000000000..09bc4d8403678
--- /dev/null
+++ b/common/network-yarn/src/test/java/org/apache/spark/network/yarn/YarnShuffleServiceSuite.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.yarn;
+
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+import static org.mockito.Mockito.*;
+
+import org.apache.spark.network.shuffle.ExternalBlockHandler;
+import org.apache.spark.network.shuffle.MergedShuffleFileManager;
+import org.apache.spark.network.shuffle.RemoteBlockPushResolver;
+import org.apache.spark.network.util.TransportConf;
+
+public class YarnShuffleServiceSuite {
+
+  @Test
+  public void testCreateDefaultMergedShuffleFileManagerInstance() {
+    TransportConf mockConf = mock(TransportConf.class);
+    when(mockConf.mergedShuffleFileManagerImpl()).thenReturn(
+      "org.apache.spark.network.shuffle.ExternalBlockHandler$NoOpMergedShuffleFileManager");
+    MergedShuffleFileManager mergeMgr = YarnShuffleService.newMergedShuffleFileManagerInstance(
+      mockConf);
+    assertTrue(mergeMgr instanceof ExternalBlockHandler.NoOpMergedShuffleFileManager);
+  }
+
+  @Test
+  public void testCreateRemoteBlockPushResolverInstance() {
+    TransportConf mockConf = mock(TransportConf.class);
+    when(mockConf.mergedShuffleFileManagerImpl()).thenReturn(
+      "org.apache.spark.network.shuffle.RemoteBlockPushResolver");
+    MergedShuffleFileManager mergeMgr = YarnShuffleService.newMergedShuffleFileManagerInstance(
+      mockConf);
+    assertTrue(mergeMgr instanceof RemoteBlockPushResolver);
+  }
+
+  @Test
+  public void testInvalidClassNameOfMergeManagerWillUseNoOpInstance() {
+    TransportConf mockConf = mock(TransportConf.class);
+    when(mockConf.mergedShuffleFileManagerImpl()).thenReturn(
+      "org.apache.spark.network.shuffle.NotExistent");
+    MergedShuffleFileManager mergeMgr = YarnShuffleService.newMergedShuffleFileManagerInstance(
+      mockConf);
+    assertTrue(mergeMgr instanceof ExternalBlockHandler.NoOpMergedShuffleFileManager);
+  }
+}

From 83a80796aa5b1ecfbe0ebc4c385932d93fd578ea Mon Sep 17 00:00:00 2001
From: huangtianhua <huangtianhua223@gmail.com>
Date: Mon, 9 Nov 2020 14:33:27 -0800
Subject: [PATCH 0422/1009] [SPARK-32691][BUILD] Update commons-crypto to
 v1.1.0

### What changes were proposed in this pull request?
Update the package commons-crypto to v1.1.0 to support aarch64 platform
- https://issues.apache.org/jira/browse/CRYPTO-139

### Why are the changes needed?

The package commons-crypto-1.0.0 available in the Maven repository
doesn't support aarch64 platform. It costs long time in
CryptoRandomFactory.getCryptoRandom(properties).nextBytes(iv) when NettyBlockRpcSever
receive block data from client,  if the time more than the default value 120s, IOException raised and client
will retry replicate the block data to other executors. But in fact the replication is complete,
it makes the replication number incorrect.
This makes DistributedSuite tests pass.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
Pass the CIs.

Closes #30275 from huangtianhua/SPARK-32691.

Authored-by: huangtianhua <huangtianhua223@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 2 +-
 dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 2 +-
 pom.xml                                 | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index 1cd4ee94997f8..2f81fb8d874e1 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -37,7 +37,7 @@ commons-collections/3.2.2//commons-collections-3.2.2.jar
 commons-compiler/3.0.16//commons-compiler-3.0.16.jar
 commons-compress/1.8.1//commons-compress-1.8.1.jar
 commons-configuration/1.6//commons-configuration-1.6.jar
-commons-crypto/1.0.0//commons-crypto-1.0.0.jar
+commons-crypto/1.1.0//commons-crypto-1.1.0.jar
 commons-dbcp/1.4//commons-dbcp-1.4.jar
 commons-digester/1.8//commons-digester-1.8.jar
 commons-httpclient/3.1//commons-httpclient-3.1.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index 198e939820fcd..02aeac4a61dc6 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -30,7 +30,7 @@ commons-codec/1.10//commons-codec-1.10.jar
 commons-collections/3.2.2//commons-collections-3.2.2.jar
 commons-compiler/3.0.16//commons-compiler-3.0.16.jar
 commons-compress/1.8.1//commons-compress-1.8.1.jar
-commons-crypto/1.0.0//commons-crypto-1.0.0.jar
+commons-crypto/1.1.0//commons-crypto-1.1.0.jar
 commons-dbcp/1.4//commons-dbcp-1.4.jar
 commons-httpclient/3.1//commons-httpclient-3.1.jar
 commons-io/2.5//commons-io-2.5.jar
diff --git a/pom.xml b/pom.xml
index dd0c5f04e5875..59c3be8227e2e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -197,7 +197,7 @@
     -->
     <paranamer.version>2.8</paranamer.version>
     <maven-antrun.version>1.8</maven-antrun.version>
-    <commons-crypto.version>1.0.0</commons-crypto.version>
+    <commons-crypto.version>1.1.0</commons-crypto.version>
     <!--
     If you are changing Arrow version specification, please check
     ./python/pyspark/sql/pandas/utils.py, and ./python/setup.py too.

From 090962cd4279cfee5b73a928180c3c19fec647c6 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Tue, 10 Nov 2020 09:33:48 +0900
Subject: [PATCH 0423/1009] [SPARK-33251][PYTHON][DOCS] Migration to NumPy
 documentation style in ML (pyspark.ml.*)

### What changes were proposed in this pull request?

This PR proposes migration of `pyspark.ml` to NumPy documentation style.

### Why are the changes needed?

To improve documentation style.

### Does this PR introduce _any_ user-facing change?

Yes, this changes both rendered HTML docs and console representation (SPARK-33243).

### How was this patch tested?

`dev/lint-python` and manual inspection.

Closes #30285 from zero323/SPARK-33251.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/ml/base.py            |  94 +++++--
 python/pyspark/ml/base.pyi           |   8 +-
 python/pyspark/ml/classification.py  | 227 ++++++++++------
 python/pyspark/ml/clustering.py      | 133 ++++++----
 python/pyspark/ml/evaluation.py      |  91 +++++--
 python/pyspark/ml/feature.py         | 379 +++++++++++++++++----------
 python/pyspark/ml/fpm.py             |  72 +++--
 python/pyspark/ml/functions.py       |  21 +-
 python/pyspark/ml/image.py           |  47 +++-
 python/pyspark/ml/linalg/__init__.py |  72 ++++-
 python/pyspark/ml/param/__init__.py  |  63 ++++-
 python/pyspark/ml/pipeline.py        |  42 ++-
 python/pyspark/ml/recommendation.py  |  92 +++++--
 python/pyspark/ml/regression.py      | 219 +++++++++++-----
 python/pyspark/ml/stat.py            | 312 +++++++++++++---------
 python/pyspark/ml/tuning.py          | 124 ++++++---
 python/pyspark/ml/util.py            |  27 +-
 python/pyspark/ml/wrapper.py         |  59 +++--
 18 files changed, 1427 insertions(+), 655 deletions(-)

diff --git a/python/pyspark/ml/base.py b/python/pyspark/ml/base.py
index f1ae123250321..fa5b553ac2d96 100644
--- a/python/pyspark/ml/base.py
+++ b/python/pyspark/ml/base.py
@@ -34,13 +34,19 @@ class _FitMultipleIterator(object):
     iterator. This class handles the simple case of fitMultiple where each param map should be
     fit independently.
 
-    :param fitSingleModel: Function: (int => Model) which fits an estimator to a dataset.
+    Parameters
+    ----------
+    fitSingleModel : function
+        Callable[[int], Transformer] which fits an estimator to a dataset.
         `fitSingleModel` may be called up to `numModels` times, with a unique index each time.
         Each call to `fitSingleModel` with an index should return the Model associated with
         that index.
-    :param numModel: Number of models this iterator should produce.
+    numModel : int
+        Number of models this iterator should produce.
 
-    See Estimator.fitMultiple for more info.
+    Notes
+    -----
+    See :py:meth:`Estimator.fitMultiple` for more info.
     """
     def __init__(self, fitSingleModel, numModels):
         """
@@ -81,21 +87,38 @@ def _fit(self, dataset):
         """
         Fits a model to the input dataset. This is called by the default implementation of fit.
 
-        :param dataset: input dataset, which is an instance of :py:class:`pyspark.sql.DataFrame`
-        :returns: fitted model
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            input dataset
+
+        Returns
+        -------
+        :class:`Transformer`
+            fitted model
         """
         raise NotImplementedError()
 
-    @since("2.3.0")
     def fitMultiple(self, dataset, paramMaps):
         """
         Fits a model to the input dataset for each param map in `paramMaps`.
 
-        :param dataset: input dataset, which is an instance of :py:class:`pyspark.sql.DataFrame`.
-        :param paramMaps: A Sequence of param maps.
-        :return: A thread safe iterable which contains one model for each param map. Each
-                 call to `next(modelIterator)` will return `(index, model)` where model was fit
-                 using `paramMaps[index]`. `index` values may not be sequential.
+        .. versionadded:: 2.3.0
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            input dataset.
+        paramMaps : :py:class:`collections.abc.Sequence`
+            A Sequence of param maps.
+
+        Returns
+        -------
+        :py:class:`_FitMultipleIterator`
+            A thread safe iterable which contains one model for each param map. Each
+            call to `next(modelIterator)` will return `(index, model)` where model was fit
+            using `paramMaps[index]`. `index` values may not be sequential.
         """
         estimator = self.copy()
 
@@ -104,16 +127,25 @@ def fitSingleModel(index):
 
         return _FitMultipleIterator(fitSingleModel, len(paramMaps))
 
-    @since("1.3.0")
     def fit(self, dataset, params=None):
         """
         Fits a model to the input dataset with optional parameters.
 
-        :param dataset: input dataset, which is an instance of :py:class:`pyspark.sql.DataFrame`
-        :param params: an optional param map that overrides embedded params. If a list/tuple of
-                       param maps is given, this calls fit on each param map and returns a list of
-                       models.
-        :returns: fitted model(s)
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            input dataset.
+        params : dict or list or tuple, optional
+            an optional param map that overrides embedded params. If a list/tuple of
+            param maps is given, this calls fit on each param map and returns a list of
+            models.
+
+        Returns
+        -------
+        :py:class:`Transformer` or a list of :py:class:`Transformer`
+            fitted model(s)
         """
         if params is None:
             params = dict()
@@ -146,19 +178,35 @@ def _transform(self, dataset):
         """
         Transforms the input dataset.
 
-        :param dataset: input dataset, which is an instance of :py:class:`pyspark.sql.DataFrame`
-        :returns: transformed dataset
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            input dataset.
+
+        Returns
+        -------
+        :py:class:`pyspark.sql.DataFrame`
+            transformed dataset
         """
         raise NotImplementedError()
 
-    @since("1.3.0")
     def transform(self, dataset, params=None):
         """
         Transforms the input dataset with optional parameters.
 
-        :param dataset: input dataset, which is an instance of :py:class:`pyspark.sql.DataFrame`
-        :param params: an optional param map that overrides embedded params.
-        :returns: transformed dataset
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            input dataset
+        params : dict, optional
+            an optional param map that overrides embedded params.
+
+        Returns
+        -------
+        :py:class:`pyspark.sql.DataFrame`
+            transformed dataset
         """
         if params is None:
             params = dict()
diff --git a/python/pyspark/ml/base.pyi b/python/pyspark/ml/base.pyi
index 7fd8c3b70b672..4f1c6f98ebc49 100644
--- a/python/pyspark/ml/base.pyi
+++ b/python/pyspark/ml/base.pyi
@@ -23,7 +23,9 @@ from typing import (
     Iterable,
     List,
     Optional,
+    Sequence,
     Tuple,
+    Union,
 )
 from pyspark.ml._typing import M, P, T, ParamMap
 
@@ -66,9 +68,11 @@ class Estimator(Generic[M], Params, metaclass=abc.ABCMeta):
     @overload
     def fit(self, dataset: DataFrame, params: Optional[ParamMap] = ...) -> M: ...
     @overload
-    def fit(self, dataset: DataFrame, params: List[ParamMap]) -> List[M]: ...
+    def fit(
+        self, dataset: DataFrame, params: Union[List[ParamMap], Tuple[ParamMap]]
+    ) -> List[M]: ...
     def fitMultiple(
-        self, dataset: DataFrame, params: List[ParamMap]
+        self, dataset: DataFrame, params: Sequence[ParamMap]
     ) -> Iterable[Tuple[int, M]]: ...
 
 class Transformer(Params, metaclass=abc.ABCMeta):
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 9c3fd8610b786..d6c861361a248 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -287,13 +287,16 @@ def weightCol(self):
         return self._call_java("weightCol")
 
     @property
-    @since("3.1.0")
     def labels(self):
         """
         Returns the sequence of labels in ascending order. This order matches the order used
         in metrics which are specified as arrays over labels, e.g., truePositiveRateByLabel.
 
-        Note: In most cases, it will be values {0.0, 1.0, ..., numClasses-1}, However, if the
+        .. versionadded:: 3.1.0
+
+        Notes
+        -----
+        In most cases, it will be values {0.0, 1.0, ..., numClasses-1}, However, if the
         training set is missing a label, then all of the arrays over labels
         (e.g., from truePositiveRateByLabel) will be of length numClasses-1 instead of the
         expected numClasses.
@@ -436,15 +439,17 @@ def scoreCol(self):
         return self._call_java("scoreCol")
 
     @property
-    @since("3.1.0")
     def roc(self):
         """
         Returns the receiver operating characteristic (ROC) curve,
         which is a Dataframe having two fields (FPR, TPR) with
         (0.0, 0.0) prepended and (1.0, 1.0) appended to it.
 
-        .. seealso:: `Wikipedia reference
-            <http://en.wikipedia.org/wiki/Receiver_operating_characteristic>`_
+        .. versionadded:: 3.1.0
+
+        Notes
+        -----
+        `Wikipedia reference <http://en.wikipedia.org/wiki/Receiver_operating_characteristic>`_
         """
         return self._call_java("roc")
 
@@ -522,11 +527,17 @@ def __init__(self, *args):
 @inherit_doc
 class LinearSVC(_JavaClassifier, _LinearSVCParams, JavaMLWritable, JavaMLReadable):
     """
-    `Linear SVM Classifier <https://en.wikipedia.org/wiki/Support_vector_machine#Linear_SVM>`_
-
     This binary classifier optimizes the Hinge Loss using the OWLQN optimizer.
     Only supports L2 regularization currently.
 
+    .. versionadded:: 2.2.0
+
+    Notes
+    -----
+    `Linear SVM Classifier <https://en.wikipedia.org/wiki/Support_vector_machine#Linear_SVM>`_
+
+    Examples
+    --------
     >>> from pyspark.sql import Row
     >>> from pyspark.ml.linalg import Vectors
     >>> df = sc.parallelize([
@@ -588,8 +599,6 @@ class LinearSVC(_JavaClassifier, _LinearSVCParams, JavaMLWritable, JavaMLReadabl
     True
     >>> model.transform(test0).take(1) == model2.transform(test0).take(1)
     True
-
-    .. versionadded:: 2.2.0
     """
 
     @keyword_only
@@ -735,14 +744,16 @@ def summary(self):
             raise RuntimeError("No training summary available for this %s" %
                                self.__class__.__name__)
 
-    @since("3.1.0")
     def evaluate(self, dataset):
         """
         Evaluates the model on a test dataset.
 
-        :param dataset:
-          Test dataset to evaluate model on, where dataset is an
-          instance of :py:class:`pyspark.sql.DataFrame`
+        .. versionadded:: 3.1.0
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            Test dataset to evaluate model on.
         """
         if not isinstance(dataset, DataFrame):
             raise ValueError("dataset must be a DataFrame but got %s." % type(dataset))
@@ -753,6 +764,7 @@ def evaluate(self, dataset):
 class LinearSVCSummary(_BinaryClassificationSummary):
     """
     Abstraction for LinearSVC Results for a given model.
+
     .. versionadded:: 3.1.0
     """
     pass
@@ -938,6 +950,10 @@ class LogisticRegression(_JavaProbabilisticClassifier, _LogisticRegressionParams
     Logistic regression.
     This class supports multinomial logistic (softmax) and binomial logistic regression.
 
+    .. versionadded:: 1.3.0
+
+    Examples
+    --------
     >>> from pyspark.sql import Row
     >>> from pyspark.ml.linalg import Vectors
     >>> bdf = sc.parallelize([
@@ -1021,8 +1037,6 @@ class LogisticRegression(_JavaProbabilisticClassifier, _LogisticRegressionParams
     LogisticRegressionModel: uid=..., numClasses=2, numFeatures=2
     >>> blorModel.transform(test0).take(1) == model2.transform(test0).take(1)
     True
-
-    .. versionadded:: 1.3.0
     """
 
     @keyword_only
@@ -1234,14 +1248,16 @@ def summary(self):
             raise RuntimeError("No training summary available for this %s" %
                                self.__class__.__name__)
 
-    @since("2.0.0")
     def evaluate(self, dataset):
         """
         Evaluates the model on a test dataset.
 
-        :param dataset:
-          Test dataset to evaluate model on, where dataset is an
-          instance of :py:class:`pyspark.sql.DataFrame`
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            Test dataset to evaluate model on.
         """
         if not isinstance(dataset, DataFrame):
             raise ValueError("dataset must be a DataFrame but got %s." % type(dataset))
@@ -1332,6 +1348,10 @@ class DecisionTreeClassifier(_JavaProbabilisticClassifier, _DecisionTreeClassifi
     It supports both binary and multiclass labels, as well as both continuous and categorical
     features.
 
+    .. versionadded:: 1.4.0
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> from pyspark.ml.feature import StringIndexer
     >>> df = spark.createDataFrame([
@@ -1400,8 +1420,6 @@ class DecisionTreeClassifier(_JavaProbabilisticClassifier, _DecisionTreeClassifi
     >>> model3 = dt3.fit(td3)
     >>> print(model3.toDebugString)
     DecisionTreeClassificationModel...depth=1, numNodes=3...
-
-    .. versionadded:: 1.4.0
     """
 
     @keyword_only
@@ -1527,7 +1545,6 @@ class DecisionTreeClassificationModel(_DecisionTreeModel, _JavaProbabilisticClas
     """
 
     @property
-    @since("2.0.0")
     def featureImportances(self):
         """
         Estimate of the importance of each feature.
@@ -1541,9 +1558,13 @@ def featureImportances(self):
             where gain is scaled by the number of instances passing through node
           - Normalize importances for tree to sum to 1.
 
-        .. note:: Feature importance for single decision trees can have high variance due to
-            correlated predictor variables. Consider using a :py:class:`RandomForestClassifier`
-            to determine feature importance instead.
+        .. versionadded:: 2.0.0
+
+        Notes
+        -----
+        Feature importance for single decision trees can have high variance due to
+        correlated predictor variables. Consider using a :py:class:`RandomForestClassifier`
+        to determine feature importance instead.
         """
         return self._call_java("featureImportances")
 
@@ -1572,6 +1593,10 @@ class RandomForestClassifier(_JavaProbabilisticClassifier, _RandomForestClassifi
     It supports both binary and multiclass labels, as well as both continuous and categorical
     features.
 
+    .. versionadded:: 1.4.0
+
+    Examples
+    --------
     >>> import numpy
     >>> from numpy import allclose
     >>> from pyspark.ml.linalg import Vectors
@@ -1634,8 +1659,6 @@ class RandomForestClassifier(_JavaProbabilisticClassifier, _RandomForestClassifi
     True
     >>> model.transform(test0).take(1) == model2.transform(test0).take(1)
     True
-
-    .. versionadded:: 1.4.0
     """
 
     @keyword_only
@@ -1790,7 +1813,6 @@ class RandomForestClassificationModel(_TreeEnsembleModel, _JavaProbabilisticClas
     """
 
     @property
-    @since("2.0.0")
     def featureImportances(self):
         """
         Estimate of the importance of each feature.
@@ -1800,7 +1822,11 @@ def featureImportances(self):
         (Hastie, Tibshirani, Friedman. "The Elements of Statistical Learning, 2nd Edition." 2001.)
         and follows the implementation from scikit-learn.
 
-        .. seealso:: :py:attr:`DecisionTreeClassificationModel.featureImportances`
+        .. versionadded:: 2.0.0
+
+        See Also
+        --------
+        DecisionTreeClassificationModel.featureImportances
         """
         return self._call_java("featureImportances")
 
@@ -1828,14 +1854,16 @@ def summary(self):
             raise RuntimeError("No training summary available for this %s" %
                                self.__class__.__name__)
 
-    @since("3.1.0")
     def evaluate(self, dataset):
         """
         Evaluates the model on a test dataset.
 
-        :param dataset:
-          Test dataset to evaluate model on, where dataset is an
-          instance of :py:class:`pyspark.sql.DataFrame`
+        .. versionadded:: 3.1.0
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            Test dataset to evaluate model on.
         """
         if not isinstance(dataset, DataFrame):
             raise ValueError("dataset must be a DataFrame but got %s." % type(dataset))
@@ -1849,6 +1877,7 @@ def evaluate(self, dataset):
 class RandomForestClassificationSummary(_ClassificationSummary):
     """
     Abstraction for RandomForestClassification Results for a given model.
+
     .. versionadded:: 3.1.0
     """
     pass
@@ -1859,6 +1888,7 @@ class RandomForestClassificationTrainingSummary(RandomForestClassificationSummar
                                                 _TrainingSummary):
     """
     Abstraction for RandomForestClassificationTraining Training results.
+
     .. versionadded:: 3.1.0
     """
     pass
@@ -1923,18 +1953,25 @@ class GBTClassifier(_JavaProbabilisticClassifier, _GBTClassifierParams,
     learning algorithm for classification.
     It supports binary labels, as well as both continuous and categorical features.
 
+    .. versionadded:: 1.4.0
+
+    Notes
+    -----
+    Multiclass labels are not currently supported.
+
     The implementation is based upon: J.H. Friedman. "Stochastic Gradient Boosting." 1999.
 
-    Notes on Gradient Boosting vs. TreeBoost:
+    Gradient Boosting vs. TreeBoost:
+
     - This implementation is for Stochastic Gradient Boosting, not for TreeBoost.
     - Both algorithms learn tree ensembles by minimizing loss functions.
     - TreeBoost (Friedman, 1999) additionally modifies the outputs at tree leaf nodes
-    based on the loss function, whereas the original gradient boosting method does not.
+      based on the loss function, whereas the original gradient boosting method does not.
     - We expect to implement TreeBoost in the future:
-    `SPARK-4240 <https://issues.apache.org/jira/browse/SPARK-4240>`_
-
-    .. note:: Multiclass labels are not currently supported.
+      `SPARK-4240 <https://issues.apache.org/jira/browse/SPARK-4240>`_
 
+    Examples
+    --------
     >>> from numpy import allclose
     >>> from pyspark.ml.linalg import Vectors
     >>> from pyspark.ml.feature import StringIndexer
@@ -2013,8 +2050,6 @@ class GBTClassifier(_JavaProbabilisticClassifier, _GBTClassifierParams,
     'validationIndicator'
     >>> gbt.getValidationTol()
     0.01
-
-    .. versionadded:: 1.4.0
     """
 
     @keyword_only
@@ -2187,7 +2222,6 @@ class GBTClassificationModel(_TreeEnsembleModel, _JavaProbabilisticClassificatio
     """
 
     @property
-    @since("2.0.0")
     def featureImportances(self):
         """
         Estimate of the importance of each feature.
@@ -2197,7 +2231,11 @@ def featureImportances(self):
         (Hastie, Tibshirani, Friedman. "The Elements of Statistical Learning, 2nd Edition." 2001.)
         and follows the implementation from scikit-learn.
 
-        .. seealso:: :py:attr:`DecisionTreeClassificationModel.featureImportances`
+        .. versionadded:: 2.0.0
+
+        See Also
+        --------
+        DecisionTreeClassificationModel.featureImportances
         """
         return self._call_java("featureImportances")
 
@@ -2207,14 +2245,16 @@ def trees(self):
         """Trees in this ensemble. Warning: These have null parent Estimators."""
         return [DecisionTreeRegressionModel(m) for m in list(self._call_java("trees"))]
 
-    @since("2.4.0")
     def evaluateEachIteration(self, dataset):
         """
         Method to compute error or loss for every iteration of gradient boosting.
 
-        :param dataset:
-            Test dataset to evaluate model on, where dataset is an
-            instance of :py:class:`pyspark.sql.DataFrame`
+        .. versionadded:: 2.4.0
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            Test dataset to evaluate model on.
         """
         return self._call_java("evaluateEachIteration", dataset)
 
@@ -2257,22 +2297,27 @@ class NaiveBayes(_JavaProbabilisticClassifier, _NaiveBayesParams, HasThresholds,
                  JavaMLWritable, JavaMLReadable):
     """
     Naive Bayes Classifiers.
-    It supports both Multinomial and Bernoulli NB. `Multinomial NB
+    It supports both Multinomial and Bernoulli NB. `Multinomial NB \
     <http://nlp.stanford.edu/IR-book/html/htmledition/naive-bayes-text-classification-1.html>`_
     can handle finitely supported discrete data. For example, by converting documents into
     TF-IDF vectors, it can be used for document classification. By making every vector a
-    binary (0/1) data, it can also be used as `Bernoulli NB
+    binary (0/1) data, it can also be used as `Bernoulli NB \
     <http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html>`_.
+
     The input feature values for Multinomial NB and Bernoulli NB must be nonnegative.
     Since 3.0.0, it supports Complement NB which is an adaptation of the Multinomial NB.
     Specifically, Complement NB uses statistics from the complement of each class to compute
     the model's coefficients. The inventors of Complement NB show empirically that the parameter
     estimates for CNB are more stable than those for Multinomial NB. Like Multinomial NB, the
     input feature values for Complement NB must be nonnegative.
-    Since 3.0.0, it also supports Gaussian NB
+    Since 3.0.0, it also supports `Gaussian NB \
     <https://en.wikipedia.org/wiki/Naive_Bayes_classifier#Gaussian_naive_Bayes>`_.
     which can handle continuous data.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> from pyspark.sql import Row
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([
@@ -2341,8 +2386,6 @@ class NaiveBayes(_JavaProbabilisticClassifier, _NaiveBayesParams, HasThresholds,
     DenseMatrix(2, 2, [...], 1)
     >>> model5.sigma
     DenseMatrix(0, 0, [...], ...)
-
-    .. versionadded:: 1.5.0
     """
 
     @keyword_only
@@ -2476,6 +2519,10 @@ class MultilayerPerceptronClassifier(_JavaProbabilisticClassifier, _MultilayerPe
     Number of inputs has to be equal to the size of feature vectors.
     Number of outputs has to be equal to the total number of labels.
 
+    .. versionadded:: 1.6.0
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([
     ...     (0.0, Vectors.dense([0.0, 0.0])),
@@ -2539,8 +2586,6 @@ class MultilayerPerceptronClassifier(_JavaProbabilisticClassifier, _MultilayerPe
     True
     >>> model3.getLayers() == model.getLayers()
     True
-
-    .. versionadded:: 1.6.0
     """
 
     @keyword_only
@@ -2662,14 +2707,16 @@ def summary(self):
             raise RuntimeError("No training summary available for this %s" %
                                self.__class__.__name__)
 
-    @since("3.1.0")
     def evaluate(self, dataset):
         """
         Evaluates the model on a test dataset.
 
-        :param dataset:
-          Test dataset to evaluate model on, where dataset is an
-          instance of :py:class:`pyspark.sql.DataFrame`
+        .. versionadded:: 3.1.0
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            Test dataset to evaluate model on.
         """
         if not isinstance(dataset, DataFrame):
             raise ValueError("dataset must be a DataFrame but got %s." % type(dataset))
@@ -2680,6 +2727,7 @@ def evaluate(self, dataset):
 class MultilayerPerceptronClassificationSummary(_ClassificationSummary):
     """
     Abstraction for MultilayerPerceptronClassifier Results for a given model.
+
     .. versionadded:: 3.1.0
     """
     pass
@@ -2690,6 +2738,7 @@ class MultilayerPerceptronClassificationTrainingSummary(MultilayerPerceptronClas
                                                         _TrainingSummary):
     """
     Abstraction for MultilayerPerceptronClassifier Training results.
+
     .. versionadded:: 3.1.0
     """
     pass
@@ -2719,6 +2768,10 @@ class OneVsRest(Estimator, _OneVsRestParams, HasParallelism, JavaMLReadable, Jav
     Each example is scored against all k models and the model with highest score
     is picked to label the example.
 
+    .. versionadded:: 2.0.0
+
+    Examples
+    --------
     >>> from pyspark.sql import Row
     >>> from pyspark.ml.linalg import Vectors
     >>> data_path = "data/mllib/sample_multiclass_classification_data.txt"
@@ -2756,8 +2809,6 @@ class OneVsRest(Estimator, _OneVsRestParams, HasParallelism, JavaMLReadable, Jav
     True
     >>> model.transform(test2).columns
     ['features', 'rawPrediction', 'newPrediction']
-
-    .. versionadded:: 2.0.0
     """
 
     @keyword_only
@@ -2874,15 +2925,23 @@ def trainSingleClass(index):
 
         return self._copyValues(OneVsRestModel(models=models))
 
-    @since("2.0.0")
     def copy(self, extra=None):
         """
         Creates a copy of this instance with a randomly generated uid
         and some extra params. This creates a deep copy of the embedded paramMap,
         and copies the embedded and extra parameters over.
 
-        :param extra: Extra parameters to copy to the new instance
-        :return: Copy of this instance
+        .. versionadded:: 2.0.0
+
+        Examples
+        --------
+        extra : dict, optional
+            Extra parameters to copy to the new instance
+
+        Returns
+        -------
+        :py:class:`OneVsRest`
+            Copy of this instance
         """
         if extra is None:
             extra = dict()
@@ -2915,7 +2974,10 @@ def _to_java(self):
         """
         Transfer this instance to a Java OneVsRest. Used for ML persistence.
 
-        :return: Java object equivalent to this instance.
+        Returns
+        -------
+        py4j.java_gateway.JavaObject
+            Java object equivalent to this instance.
         """
         _java_obj = JavaParams._new_java_obj("org.apache.spark.ml.classification.OneVsRest",
                                              self.uid)
@@ -3074,15 +3136,23 @@ def func(predictions):
                 self.getPredictionCol(), labelUDF(aggregatedDataset[accColName]))
         return aggregatedDataset.drop(accColName)
 
-    @since("2.0.0")
     def copy(self, extra=None):
         """
         Creates a copy of this instance with a randomly generated uid
         and some extra params. This creates a deep copy of the embedded paramMap,
         and copies the embedded and extra parameters over.
 
-        :param extra: Extra parameters to copy to the new instance
-        :return: Copy of this instance
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        extra : dict, optional
+            Extra parameters to copy to the new instance
+
+        Returns
+        -------
+        :py:class:`OneVsRestModel`
+            Copy of this instance
         """
         if extra is None:
             extra = dict()
@@ -3114,7 +3184,10 @@ def _to_java(self):
         """
         Transfer this instance to a Java OneVsRestModel. Used for ML persistence.
 
-        :return: Java object equivalent to this instance.
+        Returns
+        -------
+        py4j.java_gateway.JavaObject
+            Java object equivalent to this instance.
         """
         sc = SparkContext._active_spark_context
         java_models = [model._to_java() for model in self.models]
@@ -3138,11 +3211,15 @@ class FMClassifier(_JavaProbabilisticClassifier, _FactorizationMachinesParams, J
     """
     Factorization Machines learning algorithm for classification.
 
-    solver Supports:
+    Solver supports:
 
     * gd (normal mini-batch gradient descent)
     * adamW (default)
 
+    .. versionadded:: 3.0.0
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> from pyspark.ml.classification import FMClassifier
     >>> df = spark.createDataFrame([
@@ -3190,8 +3267,6 @@ class FMClassifier(_JavaProbabilisticClassifier, _FactorizationMachinesParams, J
     DenseMatrix(1, 2, [0.0163, -0.0051], 1)
     >>> model.transform(test0).take(1) == model2.transform(test0).take(1)
     True
-
-    .. versionadded:: 3.0.0
     """
 
     @keyword_only
@@ -3356,14 +3431,16 @@ def summary(self):
             raise RuntimeError("No training summary available for this %s" %
                                self.__class__.__name__)
 
-    @since("3.1.0")
     def evaluate(self, dataset):
         """
         Evaluates the model on a test dataset.
 
-        :param dataset:
-          Test dataset to evaluate model on, where dataset is an
-          instance of :py:class:`pyspark.sql.DataFrame`
+        .. versionadded:: 3.1.0
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            Test dataset to evaluate model on.
         """
         if not isinstance(dataset, DataFrame):
             raise ValueError("dataset must be a DataFrame but got %s." % type(dataset))
@@ -3374,6 +3451,7 @@ def evaluate(self, dataset):
 class FMClassificationSummary(_BinaryClassificationSummary):
     """
     Abstraction for FMClassifier Results for a given model.
+
     .. versionadded:: 3.1.0
     """
     pass
@@ -3383,6 +3461,7 @@ class FMClassificationSummary(_BinaryClassificationSummary):
 class FMClassificationTrainingSummary(FMClassificationSummary, _TrainingSummary):
     """
     Abstraction for FMClassifier Training results.
+
     .. versionadded:: 3.1.0
     """
     pass
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index dba3dcd670f7f..54c1a431671a3 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -228,11 +228,17 @@ class GaussianMixture(JavaEstimator, _GaussianMixtureParams, JavaMLWritable, Jav
     While this process is generally guaranteed to converge, it is not guaranteed
     to find a global optimum.
 
-    .. note:: For high-dimensional data (with many features), this algorithm may perform poorly.
-          This is due to high-dimensional data (a) making it difficult to cluster at all
-          (based on statistical/theoretical arguments) and (b) numerical issues with
-          Gaussian distributions.
+    .. versionadded:: 2.0.0
+
+    Notes
+    -----
+    For high-dimensional data (with many features), this algorithm may perform poorly.
+    This is due to high-dimensional data (a) making it difficult to cluster at all
+    (based on statistical/theoretical arguments) and (b) numerical issues with
+    Gaussian distributions.
 
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
 
     >>> data = [(Vectors.dense([-0.1, -0.05 ]),),
@@ -329,8 +335,6 @@ class GaussianMixture(JavaEstimator, _GaussianMixtureParams, JavaMLWritable, Jav
     True
     >>> gm2.setWeightCol("weight")
     GaussianMixture...
-
-    .. versionadded:: 2.0.0
     """
 
     @keyword_only
@@ -579,6 +583,10 @@ class KMeans(JavaEstimator, _KMeansParams, JavaMLWritable, JavaMLReadable):
     K-means clustering with a k-means++ like initialization mode
     (the k-means|| algorithm by Bahmani et al).
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> data = [(Vectors.dense([0.0, 0.0]), 2.0), (Vectors.dense([1.0, 1.0]), 2.0),
     ...         (Vectors.dense([9.0, 8.0]), 2.0), (Vectors.dense([8.0, 9.0]), 2.0)]
@@ -634,8 +642,6 @@ class KMeans(JavaEstimator, _KMeansParams, JavaMLWritable, JavaMLReadable):
     array([ True,  True], dtype=bool)
     >>> model.transform(df).take(1) == model2.transform(df).take(1)
     True
-
-    .. versionadded:: 1.5.0
     """
 
     @keyword_only
@@ -809,9 +815,9 @@ def computeCost(self, dataset):
         Computes the sum of squared distances between the input points
         and their corresponding cluster centers.
 
-        .. note:: Deprecated in 3.0.0. It will be removed in future versions. Use
-           ClusteringEvaluator instead. You can also get the cost on the training dataset in the
-           summary.
+        .. deprecated:: 3.0.0
+            It will be removed in future versions. Use :py:class:`ClusteringEvaluator` instead.
+            You can also get the cost on the training dataset in the summary.
         """
         warnings.warn("Deprecated in 3.0.0. It will be removed in future versions. Use "
                       "ClusteringEvaluator instead. You can also get the cost on the training "
@@ -851,6 +857,10 @@ class BisectingKMeans(JavaEstimator, _BisectingKMeansParams, JavaMLWritable, Jav
     If bisecting all divisible clusters on the bottom level would result more than `k` leaf
     clusters, larger clusters get higher priority.
 
+    .. versionadded:: 2.0.0
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> data = [(Vectors.dense([0.0, 0.0]), 2.0), (Vectors.dense([1.0, 1.0]), 2.0),
     ...         (Vectors.dense([9.0, 8.0]), 2.0), (Vectors.dense([8.0, 9.0]), 2.0)]
@@ -913,8 +923,6 @@ class BisectingKMeans(JavaEstimator, _BisectingKMeansParams, JavaMLWritable, Jav
     array([ True,  True], dtype=bool)
     >>> model.transform(df).take(1) == model2.transform(df).take(1)
     True
-
-    .. versionadded:: 2.0.0
     """
 
     @keyword_only
@@ -1199,9 +1207,10 @@ def topicsMatrix(self):
         This is a matrix of size vocabSize x k, where each column is a topic.
         No guarantees are given about the ordering of the topics.
 
-        WARNING: If this model is actually a :py:class:`DistributedLDAModel` instance produced by
-        the Expectation-Maximization ("em") `optimizer`, then this method could involve
-        collecting a large amount of data to the driver (on the order of vocabSize x k).
+        .. warning:: If this model is actually a :py:class:`DistributedLDAModel`
+            instance produced by the Expectation-Maximization ("em") `optimizer`,
+            then this method could involve collecting a large amount of data
+            to the driver (on the order of vocabSize x k).
         """
         return self._call_java("topicsMatrix")
 
@@ -1211,9 +1220,9 @@ def logLikelihood(self, dataset):
         Calculates a lower bound on the log likelihood of the entire corpus.
         See Equation (16) in the Online LDA paper (Hoffman et al., 2010).
 
-        WARNING: If this model is an instance of :py:class:`DistributedLDAModel` (produced when
-        :py:attr:`optimizer` is set to "em"), this involves collecting a large
-        :py:func:`topicsMatrix` to the driver. This implementation may be changed in the future.
+        .. warning:: If this model is an instance of :py:class:`DistributedLDAModel` (produced when
+            :py:attr:`optimizer` is set to "em"), this involves collecting a large
+            :py:func:`topicsMatrix` to the driver. This implementation may be changed in the future.
         """
         return self._call_java("logLikelihood", dataset)
 
@@ -1223,9 +1232,9 @@ def logPerplexity(self, dataset):
         Calculate an upper bound on perplexity.  (Lower is better.)
         See Equation (16) in the Online LDA paper (Hoffman et al., 2010).
 
-        WARNING: If this model is an instance of :py:class:`DistributedLDAModel` (produced when
-        :py:attr:`optimizer` is set to "em"), this involves collecting a large
-        :py:func:`topicsMatrix` to the driver. This implementation may be changed in the future.
+        .. warning:: If this model is an instance of :py:class:`DistributedLDAModel` (produced when
+            :py:attr:`optimizer` is set to "em"), this involves collecting a large
+            :py:func:`topicsMatrix` to the driver. This implementation may be changed in the future.
         """
         return self._call_java("logPerplexity", dataset)
 
@@ -1264,7 +1273,7 @@ def toLocal(self):
         Convert this distributed model to a local representation.  This discards info about the
         training dataset.
 
-        WARNING: This involves collecting a large :py:func:`topicsMatrix` to the driver.
+        .. warning:: This involves collecting a large :py:func:`topicsMatrix` to the driver.
         """
         model = LocalLDAModel(self._call_java("toLocal"))
 
@@ -1281,11 +1290,12 @@ def trainingLogLikelihood(self):
         given the current parameter estimates:
         log P(docs | topics, topic distributions for docs, Dirichlet hyperparameters)
 
-        Notes:
-          - This excludes the prior; for that, use :py:func:`logPrior`.
-          - Even with :py:func:`logPrior`, this is NOT the same as the data log likelihood given
+        Notes
+        -----
+        - This excludes the prior; for that, use :py:func:`logPrior`.
+        - Even with :py:func:`logPrior`, this is NOT the same as the data log likelihood given
             the hyperparameters.
-          - This is computed from the topic distributions computed during training. If you call
+        - This is computed from the topic distributions computed during training. If you call
             :py:func:`logLikelihood` on the same training dataset, the topic distributions
             will be computed again, possibly giving different results.
         """
@@ -1299,17 +1309,23 @@ def logPrior(self):
         """
         return self._call_java("logPrior")
 
-    @since("2.0.0")
     def getCheckpointFiles(self):
         """
         If using checkpointing and :py:attr:`LDA.keepLastCheckpoint` is set to true, then there may
         be saved checkpoint files.  This method is provided so that users can manage those files.
 
-        .. note:: Removing the checkpoints can cause failures if a partition is lost and is needed
-            by certain :py:class:`DistributedLDAModel` methods.  Reference counting will clean up
-            the checkpoints when this model and derivative data go out of scope.
+        .. versionadded:: 2.0.0
+
+        Returns
+        -------
+        list
+            List of checkpoint files from training
 
-        :return  List of checkpoint files from training
+        Notes
+        -----
+        Removing the checkpoints can cause failures if a partition is lost and is needed
+        by certain :py:class:`DistributedLDAModel` methods.  Reference counting will clean up
+        the checkpoints when this model and derivative data go out of scope.
         """
         return self._call_java("getCheckpointFiles")
 
@@ -1347,6 +1363,10 @@ class LDA(JavaEstimator, _LDAParams, JavaMLReadable, JavaMLWritable):
     :py:class:`pyspark.ml.feature.Tokenizer` and :py:class:`pyspark.ml.feature.CountVectorizer`
     can be useful for converting text to word count vectors.
 
+    .. versionadded:: 2.0.0
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors, SparseVector
     >>> from pyspark.ml.clustering import LDA
     >>> df = spark.createDataFrame([[1, Vectors.dense([0.0, 1.0])],
@@ -1390,8 +1410,6 @@ class LDA(JavaEstimator, _LDAParams, JavaMLReadable, JavaMLWritable):
     >>> sameLocalModel = LocalLDAModel.load(local_model_path)
     >>> model.transform(df).take(1) == sameLocalModel.transform(df).take(1)
     True
-
-    .. versionadded:: 2.0.0
     """
 
     @keyword_only
@@ -1468,6 +1486,8 @@ def setOptimizer(self, value):
         Sets the value of :py:attr:`optimizer`.
         Currently only support 'em' and 'online'.
 
+        Examples
+        --------
         >>> algo = LDA().setOptimizer("em")
         >>> algo.getOptimizer()
         'em'
@@ -1479,6 +1499,8 @@ def setLearningOffset(self, value):
         """
         Sets the value of :py:attr:`learningOffset`.
 
+        Examples
+        --------
         >>> algo = LDA().setLearningOffset(100)
         >>> algo.getLearningOffset()
         100.0
@@ -1490,6 +1512,8 @@ def setLearningDecay(self, value):
         """
         Sets the value of :py:attr:`learningDecay`.
 
+        Examples
+        --------
         >>> algo = LDA().setLearningDecay(0.1)
         >>> algo.getLearningDecay()
         0.1...
@@ -1501,6 +1525,8 @@ def setSubsamplingRate(self, value):
         """
         Sets the value of :py:attr:`subsamplingRate`.
 
+        Examples
+        --------
         >>> algo = LDA().setSubsamplingRate(0.1)
         >>> algo.getSubsamplingRate()
         0.1...
@@ -1512,6 +1538,8 @@ def setOptimizeDocConcentration(self, value):
         """
         Sets the value of :py:attr:`optimizeDocConcentration`.
 
+        Examples
+        --------
         >>> algo = LDA().setOptimizeDocConcentration(True)
         >>> algo.getOptimizeDocConcentration()
         True
@@ -1523,6 +1551,8 @@ def setDocConcentration(self, value):
         """
         Sets the value of :py:attr:`docConcentration`.
 
+        Examples
+        --------
         >>> algo = LDA().setDocConcentration([0.1, 0.2])
         >>> algo.getDocConcentration()
         [0.1..., 0.2...]
@@ -1534,6 +1564,8 @@ def setTopicConcentration(self, value):
         """
         Sets the value of :py:attr:`topicConcentration`.
 
+        Examples
+        --------
         >>> algo = LDA().setTopicConcentration(0.5)
         >>> algo.getTopicConcentration()
         0.5...
@@ -1545,6 +1577,8 @@ def setTopicDistributionCol(self, value):
         """
         Sets the value of :py:attr:`topicDistributionCol`.
 
+        Examples
+        --------
         >>> algo = LDA().setTopicDistributionCol("topicDistributionCol")
         >>> algo.getTopicDistributionCol()
         'topicDistributionCol'
@@ -1556,6 +1590,8 @@ def setKeepLastCheckpoint(self, value):
         """
         Sets the value of :py:attr:`keepLastCheckpoint`.
 
+        Examples
+        --------
         >>> algo = LDA().setKeepLastCheckpoint(False)
         >>> algo.getKeepLastCheckpoint()
         False
@@ -1646,9 +1682,14 @@ class PowerIterationClustering(_PowerIterationClusteringParams, JavaParams, Java
     This class is not yet an Estimator/Transformer, use :py:func:`assignClusters` method
     to run the PowerIterationClustering algorithm.
 
-    .. seealso:: `Wikipedia on Spectral clustering
-        <http://en.wikipedia.org/wiki/Spectral_clustering>`_
+    .. versionadded:: 2.4.0
+
+    Notes
+    -----
+    See `Wikipedia on Spectral clustering <http://en.wikipedia.org/wiki/Spectral_clustering>`_
 
+    Examples
+    --------
     >>> data = [(1, 0, 0.5),
     ...         (2, 0, 0.5), (2, 1, 0.7),
     ...         (3, 0, 0.5), (3, 1, 0.7), (3, 2, 0.9),
@@ -1680,8 +1721,6 @@ class PowerIterationClustering(_PowerIterationClusteringParams, JavaParams, Java
     40
     >>> pic2.assignClusters(df).take(6) == assignments.take(6)
     True
-
-    .. versionadded:: 2.4.0
     """
 
     @keyword_only
@@ -1756,7 +1795,9 @@ def assignClusters(self, dataset):
         """
         Run the PIC algorithm and returns a cluster assignment for each input vertex.
 
-        :param dataset:
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
           A dataset with columns src, dst, weight representing the affinity matrix,
           which is the matrix A in the PIC paper. Suppose the src column value is i,
           the dst column value is j, the weight column value is similarity s,,ij,,
@@ -1765,13 +1806,13 @@ def assignClusters(self, dataset):
           either (i, j, s,,ij,,) or (j, i, s,,ji,,) in the input. Rows with i = j are
           ignored, because we assume s,,ij,, = 0.0.
 
-        :return:
-          A dataset that contains columns of vertex id and the corresponding cluster for
-          the id. The schema of it will be:
-          - id: Long
-          - cluster: Int
-
-        .. versionadded:: 2.4.0
+        Returns
+        -------
+        :py:class:`pyspark.sql.DataFrame`
+            A dataset that contains columns of vertex id and the corresponding cluster for
+            the id. The schema of it will be:
+            - id: Long
+            - cluster: Int
         """
         self._transfer_params_to_java()
         jdf = self._java_obj.assignClusters(dataset._jdf)
diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py
index f2dbd7a1987b5..b93d483067278 100644
--- a/python/pyspark/ml/evaluation.py
+++ b/python/pyspark/ml/evaluation.py
@@ -45,22 +45,35 @@ def _evaluate(self, dataset):
         """
         Evaluates the output.
 
-        :param dataset: a dataset that contains labels/observations and
-               predictions
-        :return: metric
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            a dataset that contains labels/observations and predictions
+
+        Returns
+        -------
+        float
+            metric
         """
         raise NotImplementedError()
 
-    @since("1.4.0")
     def evaluate(self, dataset, params=None):
         """
         Evaluates the output with optional parameters.
 
-        :param dataset: a dataset that contains labels/observations and
-                        predictions
-        :param params: an optional param map that overrides embedded
-                       params
-        :return: metric
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            a dataset that contains labels/observations and predictions
+        params : dict, optional
+            an optional param map that overrides embedded params
+
+        Returns
+        -------
+        float
+            metric
         """
         if params is None:
             params = dict()
@@ -92,8 +105,16 @@ class JavaEvaluator(JavaParams, Evaluator, metaclass=ABCMeta):
     def _evaluate(self, dataset):
         """
         Evaluates the output.
-        :param dataset: a dataset that contains labels/observations and predictions.
-        :return: evaluation metric
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            a dataset that contains labels/observations and predictions
+
+        Returns
+        -------
+        float
+            evaluation metric
         """
         self._transfer_params_to_java()
         return self._java_obj.evaluate(dataset._jdf)
@@ -112,6 +133,10 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction
     The rawPrediction column can be of type double (binary 0/1 prediction, or probability of label
     1) or of type vector (length-2 vector of raw predictions, scores, or label probabilities).
 
+    .. versionadded:: 1.4.0
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> scoreAndLabels = map(lambda x: (Vectors.dense([1.0 - x[0], x[0]]), x[1]),
     ...    [(0.1, 0.0), (0.1, 1.0), (0.4, 0.0), (0.6, 0.0), (0.6, 1.0), (0.6, 1.0), (0.8, 1.0)])
@@ -141,8 +166,6 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction
     0.82...
     >>> evaluator.getNumBins()
     1000
-
-    .. versionadded:: 1.4.0
     """
 
     metricName = Param(Params._dummy(), "metricName",
@@ -235,6 +258,10 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol, HasWeigh
     Evaluator for Regression, which expects input columns prediction, label
     and an optional weight column.
 
+    .. versionadded:: 1.4.0
+
+    Examples
+    --------
     >>> scoreAndLabels = [(-28.98343821, -27.0), (20.21491975, 21.5),
     ...   (-25.98418959, -22.0), (30.69731842, 33.0), (74.69283752, 71.0)]
     >>> dataset = spark.createDataFrame(scoreAndLabels, ["raw", "label"])
@@ -262,8 +289,6 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol, HasWeigh
     2.740...
     >>> evaluator.getThroughOrigin()
     False
-
-    .. versionadded:: 1.4.0
     """
     metricName = Param(Params._dummy(), "metricName",
                        """metric name in evaluation - one of:
@@ -359,6 +384,10 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio
     Evaluator for Multiclass Classification, which expects input
     columns: prediction, label, weight (optional) and probabilityCol (only for logLoss).
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> scoreAndLabels = [(0.0, 0.0), (0.0, 1.0), (0.0, 0.0),
     ...     (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (2.0, 2.0), (2.0, 0.0)]
     >>> dataset = spark.createDataFrame(scoreAndLabels, ["prediction", "label"])
@@ -402,8 +431,6 @@ class MulticlassClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio
     MulticlassClassificationEvaluator...
     >>> evaluator.evaluate(dataset)
     0.9682...
-
-    .. versionadded:: 1.5.0
     """
     metricName = Param(Params._dummy(), "metricName",
                        "metric name in evaluation "
@@ -544,11 +571,17 @@ def setParams(self, *, predictionCol="prediction", labelCol="label",
 class MultilabelClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol,
                                         JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Evaluator for Multilabel Classification, which expects two input
     columns: prediction and label.
 
+    .. versionadded:: 3.0.0
+
+    Notes
+    -----
+    Experimental
+
+    Examples
+    --------
     >>> scoreAndLabels = [([0.0, 1.0], [0.0, 2.0]), ([0.0, 2.0], [0.0, 1.0]),
     ...     ([], [0.0]), ([2.0], [2.0]), ([2.0, 0.0], [2.0, 0.0]),
     ...     ([0.0, 1.0, 2.0], [0.0, 1.0]), ([1.0], [1.0, 2.0])]
@@ -566,8 +599,6 @@ class MultilabelClassificationEvaluator(JavaEvaluator, HasLabelCol, HasPredictio
     >>> evaluator2 = MultilabelClassificationEvaluator.load(mlce_path)
     >>> str(evaluator2.getPredictionCol())
     'prediction'
-
-    .. versionadded:: 3.0.0
     """
     metricName = Param(Params._dummy(), "metricName",
                        "metric name in evaluation "
@@ -663,6 +694,10 @@ class ClusteringEvaluator(JavaEvaluator, HasPredictionCol, HasFeaturesCol, HasWe
     1 means that the points in a cluster are close to the other points
     in the same cluster and far from the points of the other clusters.
 
+    .. versionadded:: 2.3.0
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> featureAndPredictions = map(lambda x: (Vectors.dense(x[0]), x[1]),
     ...     [([0.0, 0.5], 0.0), ([0.5, 0.0], 0.0), ([10.0, 11.0], 1.0),
@@ -691,8 +726,6 @@ class ClusteringEvaluator(JavaEvaluator, HasPredictionCol, HasFeaturesCol, HasWe
     >>> evaluator2 = ClusteringEvaluator.load(ce_path)
     >>> str(evaluator2.getPredictionCol())
     'prediction'
-
-    .. versionadded:: 2.3.0
     """
     metricName = Param(Params._dummy(), "metricName",
                        "metric name in evaluation (silhouette)",
@@ -779,11 +812,17 @@ def setWeightCol(self, value):
 class RankingEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol,
                        JavaMLReadable, JavaMLWritable):
     """
-    .. note:: Experimental
-
     Evaluator for Ranking, which expects two input
     columns: prediction and label.
 
+    .. versionadded:: 3.0.0
+
+    Notes
+    -----
+    Experimental
+
+    Examples
+    --------
     >>> scoreAndLabels = [([1.0, 6.0, 2.0, 7.0, 8.0, 3.0, 9.0, 10.0, 4.0, 5.0],
     ...     [1.0, 2.0, 3.0, 4.0, 5.0]),
     ...     ([4.0, 1.0, 5.0, 6.0, 2.0, 7.0, 3.0, 8.0, 9.0, 10.0], [1.0, 2.0, 3.0]),
@@ -802,8 +841,6 @@ class RankingEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol,
     >>> evaluator2 = RankingEvaluator.load(ranke_path)
     >>> str(evaluator2.getPredictionCol())
     'prediction'
-
-    .. versionadded:: 3.0.0
     """
     metricName = Param(Params._dummy(), "metricName",
                        "metric name in evaluation "
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 1fb813e4b64d4..4d898bd5fffa8 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -74,6 +74,10 @@ class Binarizer(JavaTransformer, HasThreshold, HasThresholds, HasInputCol, HasOu
     are set, an Exception will be thrown. The :py:attr:`threshold` parameter is used for
     single column usage, and :py:attr:`thresholds` is for multiple columns.
 
+    .. versionadded:: 1.4.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([(0.5,)], ["values"])
     >>> binarizer = Binarizer(threshold=1.0, inputCol="values", outputCol="features")
     >>> binarizer.setThreshold(1.0)
@@ -107,8 +111,6 @@ class Binarizer(JavaTransformer, HasThreshold, HasThresholds, HasInputCol, HasOu
     |    0.5|    0.3|    1.0|    0.0|
     +-------+-------+-------+-------+
     ...
-
-    .. versionadded:: 1.4.0
     """
 
     threshold = Param(Params._dummy(), "threshold",
@@ -258,15 +260,27 @@ def approxNearestNeighbors(self, dataset, key, numNearestNeighbors, distCol="dis
         transform the data; if the :py:attr:`outputCol` exists, it will use that. This allows
         caching of the transformed data when necessary.
 
-        .. note:: This method is experimental and will likely change behavior in the next release.
-
-        :param dataset: The dataset to search for nearest neighbors of the key.
-        :param key: Feature vector representing the item to search for.
-        :param numNearestNeighbors: The maximum number of nearest neighbors.
-        :param distCol: Output column for storing the distance between each result row and the key.
-                        Use "distCol" as default value if it's not specified.
-        :return: A dataset containing at most k items closest to the key. A column "distCol" is
-                 added to show the distance between each row and the key.
+        Notes
+        -----
+        This method is experimental and will likely change behavior in the next release.
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            The dataset to search for nearest neighbors of the key.
+        key :  :py:class:`pyspark.ml.linalg.Vector`
+            Feature vector representing the item to search for.
+        numNearestNeighbors : int
+            The maximum number of nearest neighbors.
+        distCol : str
+            Output column for storing the distance between each result row and the key.
+            Use "distCol" as default value if it's not specified.
+
+        Returns
+        -------
+        :py:class:`pyspark.sql.DataFrame`
+            A dataset containing at most k items closest to the key. A column "distCol" is
+            added to show the distance between each row and the key.
         """
         return self._call_java("approxNearestNeighbors", dataset, key, numNearestNeighbors,
                                distCol)
@@ -278,14 +292,24 @@ def approxSimilarityJoin(self, datasetA, datasetB, threshold, distCol="distCol")
         if the :py:attr:`outputCol` exists, it will use that. This allows caching of the
         transformed data when necessary.
 
-        :param datasetA: One of the datasets to join.
-        :param datasetB: Another dataset to join.
-        :param threshold: The threshold for the distance of row pairs.
-        :param distCol: Output column for storing the distance between each pair of rows. Use
-                        "distCol" as default value if it's not specified.
-        :return: A joined dataset containing pairs of rows. The original rows are in columns
-                 "datasetA" and "datasetB", and a column "distCol" is added to show the distance
-                 between each pair.
+        Parameters
+        ----------
+        datasetA : :py:class:`pyspark.sql.DataFrame`
+            One of the datasets to join.
+        datasetB : :py:class:`pyspark.sql.DataFrame`
+            Another dataset to join.
+        threshold : float
+            The threshold for the distance of row pairs.
+        distCol : str, optional
+            Output column for storing the distance between each pair of rows. Use
+            "distCol" as default value if it's not specified.
+
+        Returns
+        -------
+        :py:class:`pyspark.sql.DataFrame`
+            A joined dataset containing pairs of rows. The original rows are in columns
+            "datasetA" and "datasetB", and a column "distCol" is added to show the distance
+            between each pair.
         """
         threshold = TypeConverters.toFloat(threshold)
         return self._call_java("approxSimilarityJoin", datasetA, datasetB, threshold, distCol)
@@ -320,10 +344,17 @@ class BucketedRandomProjectionLSH(_LSH, _BucketedRandomProjectionLSHParams,
     distance space. The output will be vectors of configurable dimension. Hash values in the same
     dimension are calculated by the same hash function.
 
-    .. seealso:: `Stable Distributions
-        <https://en.wikipedia.org/wiki/Locality-sensitive_hashing#Stable_distributions>`_
-    .. seealso:: `Hashing for Similarity Search: A Survey <https://arxiv.org/abs/1408.2927>`_
+    .. versionadded:: 2.2.0
 
+    Notes
+    -----
+
+    - `Stable Distributions in Wikipedia article on Locality-sensitive hashing \
+      <https://en.wikipedia.org/wiki/Locality-sensitive_hashing#Stable_distributions>`_
+    - `Hashing for Similarity Search: A Survey <https://arxiv.org/abs/1408.2927>`_
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> from pyspark.sql.functions import col
     >>> data = [(0, Vectors.dense([-1.0, -1.0 ]),),
@@ -384,8 +415,6 @@ class BucketedRandomProjectionLSH(_LSH, _BucketedRandomProjectionLSHParams,
     >>> model2 = BucketedRandomProjectionLSHModel.load(modelPath)
     >>> model.transform(df).head().hashes == model2.transform(df).head().hashes
     True
-
-    .. versionadded:: 2.2.0
     """
 
     @keyword_only
@@ -453,6 +482,10 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, HasInputCols, HasOu
     are set, an Exception will be thrown. The :py:attr:`splits` parameter is only used for single
     column usage, and :py:attr:`splitsArray` is for multiple columns.
 
+    .. versionadded:: 1.4.0
+
+    Examples
+    --------
     >>> values = [(0.1, 0.0), (0.4, 1.0), (1.2, 1.3), (1.5, float("nan")),
     ...     (float("nan"), 1.0), (float("nan"), 0.0)]
     >>> df = spark.createDataFrame(values, ["values1", "values2"])
@@ -505,8 +538,6 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, HasInputCols, HasOu
     |NaN    |0.0    |3.0     |0.0     |
     +-------+-------+--------+--------+
     ...
-
-    .. versionadded:: 1.4.0
     """
 
     splits = \
@@ -707,6 +738,10 @@ class CountVectorizer(JavaEstimator, _CountVectorizerParams, JavaMLReadable, Jav
     """
     Extracts a vocabulary from document collections and generates a :py:attr:`CountVectorizerModel`.
 
+    .. versionadded:: 1.6.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame(
     ...    [(0, ["a", "b", "c"]), (1, ["a", "b", "b", "c", "a"])],
     ...    ["label", "raw"])
@@ -754,8 +789,6 @@ class CountVectorizer(JavaEstimator, _CountVectorizerParams, JavaMLReadable, Jav
     |1    |[a, b, b, c, a]|(3,[0,1,2],[2.0,2.0,1.0])|
     +-----+---------------+-------------------------+
     ...
-
-    .. versionadded:: 1.6.0
     """
 
     @keyword_only
@@ -910,9 +943,15 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWrit
     The return vector is scaled such that the transform matrix is
     unitary (aka scaled DCT-II).
 
-    .. seealso:: `More information on Wikipedia
-        <https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II Wikipedia>`_.
+    .. versionadded:: 1.6.0
 
+    Notes
+    -----
+    `More information on Wikipedia \
+      <https://en.wikipedia.org/wiki/Discrete_cosine_transform#DCT-II Wikipedia>`_.
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> df1 = spark.createDataFrame([(Vectors.dense([5.0, 8.0, 6.0]),)], ["vec"])
     >>> dct = DCT( )
@@ -935,8 +974,6 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWrit
     True
     >>> loadedDtc.getInverse()
     False
-
-    .. versionadded:: 1.6.0
     """
 
     inverse = Param(Params._dummy(), "inverse", "Set transformer to perform inverse DCT, " +
@@ -998,6 +1035,10 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReada
     with a provided "weight" vector. In other words, it scales each column of the dataset
     by a scalar multiplier.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([(Vectors.dense([2.0, 1.0, 3.0]),)], ["values"])
     >>> ep = ElementwiseProduct()
@@ -1018,8 +1059,6 @@ class ElementwiseProduct(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReada
     True
     >>> loadedEp.transform(df).take(1) == ep.transform(df).take(1)
     True
-
-    .. versionadded:: 1.5.0
     """
 
     scalingVec = Param(Params._dummy(), "scalingVec", "Vector for hadamard product.",
@@ -1108,6 +1147,10 @@ class FeatureHasher(JavaTransformer, HasInputCols, HasOutputCol, HasNumFeatures,
     it is advisable to use a power of two as the `numFeatures` parameter;
     otherwise the features will not be mapped evenly to the vector indices.
 
+    .. versionadded:: 2.3.0
+
+    Examples
+    --------
     >>> data = [(2.0, True, "1", "foo"), (3.0, False, "2", "bar")]
     >>> cols = ["real", "bool", "stringNum", "string"]
     >>> df = spark.createDataFrame(data, cols)
@@ -1127,8 +1170,6 @@ class FeatureHasher(JavaTransformer, HasInputCols, HasOutputCol, HasNumFeatures,
     True
     >>> loadedHasher.transform(df).head().features == hasher.transform(df).head().features
     True
-
-    .. versionadded:: 2.3.0
     """
 
     categoricalCols = Param(Params._dummy(), "categoricalCols",
@@ -1204,6 +1245,10 @@ class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures, Java
     it is advisable to use a power of two as the numFeatures parameter;
     otherwise the features will not be mapped evenly to the columns.
 
+    .. versionadded:: 1.3.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([(["a", "b", "c"],)], ["words"])
     >>> hashingTF = HashingTF(inputCol="words", outputCol="features")
     >>> hashingTF.setNumFeatures(10)
@@ -1224,8 +1269,6 @@ class HashingTF(JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures, Java
     True
     >>> hashingTF.indexOf("b")
     5
-
-    .. versionadded:: 1.3.0
     """
 
     binary = Param(Params._dummy(), "binary", "If True, all non zero counts are set to 1. " +
@@ -1323,6 +1366,10 @@ class IDF(JavaEstimator, _IDFParams, JavaMLReadable, JavaMLWritable):
     """
     Compute the Inverse Document Frequency (IDF) given a collection of documents.
 
+    .. versionadded:: 1.4.0
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import DenseVector
     >>> df = spark.createDataFrame([(DenseVector([1.0, 2.0]),),
     ...     (DenseVector([0.0, 1.0]),), (DenseVector([3.0, 0.2]),)], ["tf"])
@@ -1359,8 +1406,6 @@ class IDF(JavaEstimator, _IDFParams, JavaMLReadable, JavaMLWritable):
     >>> loadedModel = IDFModel.load(modelPath)
     >>> loadedModel.transform(df).head().idf == model.transform(df).head().idf
     True
-
-    .. versionadded:: 1.4.0
     """
 
     @keyword_only
@@ -1501,6 +1546,10 @@ class Imputer(JavaEstimator, _ImputerParams, JavaMLReadable, JavaMLWritable):
     computing median, :py:meth:`pyspark.sql.DataFrame.approxQuantile` is used with a
     relative error of `0.001`.
 
+    .. versionadded:: 2.2.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([(1.0, float("nan")), (2.0, float("nan")), (float("nan"), 3.0),
     ...                             (4.0, 4.0), (5.0, 5.0)], ["a", "b"])
     >>> imputer = Imputer()
@@ -1597,8 +1646,6 @@ class Imputer(JavaEstimator, _ImputerParams, JavaMLReadable, JavaMLWritable):
     >>> loadedModel = ImputerModel.load(modelPath)
     >>> loadedModel.transform(df).head().out_a == model.transform(df).head().out_a
     True
-
-    .. versionadded:: 2.2.0
     """
 
     @keyword_only
@@ -1735,6 +1782,10 @@ class Interaction(JavaTransformer, HasInputCols, HasOutputCol, JavaMLReadable, J
     `Vector(6, 8)` if all input features were numeric. If the first feature was instead nominal
     with four categories, the output would then be `Vector(0, 0, 0, 0, 3, 4, 0, 0)`.
 
+    .. versionadded:: 3.0.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([(0.0, 1.0), (2.0, 3.0)], ["a", "b"])
     >>> interaction = Interaction()
     >>> interaction.setInputCols(["a", "b"])
@@ -1754,8 +1805,6 @@ class Interaction(JavaTransformer, HasInputCols, HasOutputCol, JavaMLReadable, J
     >>> loadedInteraction = Interaction.load(interactionPath)
     >>> loadedInteraction.transform(df).head().ab == interaction.transform(df).head().ab
     True
-
-    .. versionadded:: 3.0.0
     """
 
     @keyword_only
@@ -1810,6 +1859,10 @@ class MaxAbsScaler(JavaEstimator, _MaxAbsScalerParams, JavaMLReadable, JavaMLWri
     absolute value in each feature. It does not shift/center the data, and thus does not destroy
     any sparsity.
 
+    .. versionadded:: 2.0.0
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([(Vectors.dense([1.0]),), (Vectors.dense([2.0]),)], ["a"])
     >>> maScaler = MaxAbsScaler(outputCol="scaled")
@@ -1840,8 +1893,6 @@ class MaxAbsScaler(JavaEstimator, _MaxAbsScalerParams, JavaMLReadable, JavaMLWri
     True
     >>> loadedModel.transform(df).take(1) == model.transform(df).take(1)
     True
-
-    .. versionadded:: 2.0.0
     """
 
     @keyword_only
@@ -1921,8 +1972,14 @@ class MinHashLSH(_LSH, HasInputCol, HasOutputCol, HasSeed, JavaMLReadable, JavaM
     in the space. This set contains elements 2, 3, and 5. Also, any input vector must have at
     least 1 non-zero index, and all non-zero values are treated as binary "1" values.
 
-    .. seealso:: `Wikipedia on MinHash <https://en.wikipedia.org/wiki/MinHash>`_
+    .. versionadded:: 2.2.0
 
+    Notes
+    -----
+    See `Wikipedia on MinHash <https://en.wikipedia.org/wiki/MinHash>`_
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> from pyspark.sql.functions import col
     >>> data = [(0, Vectors.sparse(6, [0, 1, 2], [1.0, 1.0, 1.0]),),
@@ -1969,8 +2026,6 @@ class MinHashLSH(_LSH, HasInputCol, HasOutputCol, HasSeed, JavaMLReadable, JavaM
     >>> model2 = MinHashLSHModel.load(modelPath)
     >>> model.transform(df).head().hashes == model2.transform(df).head().hashes
     True
-
-    .. versionadded:: 2.2.0
     """
 
     @keyword_only
@@ -2011,10 +2066,12 @@ class MinHashLSHModel(_LSHModel, JavaMLReadable, JavaMLWritable):
     :math:`h_i(x) = ((x \cdot a_i + b_i) \mod prime)` This hash family is approximately min-wise
     independent according to the reference.
 
-    .. seealso:: Tom Bohman, Colin Cooper, and Alan Frieze. "Min-wise independent linear
-        permutations." Electronic Journal of Combinatorics 7 (2000): R26.
-
     .. versionadded:: 2.2.0
+
+    Notes
+    -----
+    See Tom Bohman, Colin Cooper, and Alan Frieze. "Min-wise independent linear permutations."
+    Electronic Journal of Combinatorics 7 (2000): R26.
     """
 
 
@@ -2060,9 +2117,15 @@ class MinMaxScaler(JavaEstimator, _MinMaxScalerParams, JavaMLReadable, JavaMLWri
 
     For the case E_max == E_min, Rescaled(e_i) = 0.5 * (max + min)
 
-    .. note:: Since zero values will probably be transformed to non-zero values, output of the
-        transformer will be DenseVector even for sparse input.
+    .. versionadded:: 1.6.0
+
+    Notes
+    -----
+    Since zero values will probably be transformed to non-zero values, output of the
+    transformer will be DenseVector even for sparse input.
 
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([(Vectors.dense([0.0]),), (Vectors.dense([2.0]),)], ["a"])
     >>> mmScaler = MinMaxScaler(outputCol="scaled")
@@ -2099,8 +2162,6 @@ class MinMaxScaler(JavaEstimator, _MinMaxScalerParams, JavaMLReadable, JavaMLWri
     True
     >>> loadedModel.transform(df).take(1) == model.transform(df).take(1)
     True
-
-    .. versionadded:: 1.6.0
     """
 
     @keyword_only
@@ -2216,6 +2277,10 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWr
     When the input array length is less than n (number of elements per n-gram), no n-grams are
     returned.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([Row(inputTokens=["a", "b", "c", "d", "e"])])
     >>> ngram = NGram(n=2)
     >>> ngram.setInputCol("inputTokens")
@@ -2244,8 +2309,6 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWr
     True
     >>> loadedNGram.transform(df).take(1) == ngram.transform(df).take(1)
     True
-
-    .. versionadded:: 1.5.0
     """
 
     n = Param(Params._dummy(), "n", "number of elements per n-gram (>=1)",
@@ -2304,6 +2367,10 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav
     """
      Normalize a vector to have unit norm using the given p-norm.
 
+    .. versionadded:: 1.4.0
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> svec = Vectors.sparse(4, {1: 4.0, 3: 3.0})
     >>> df = spark.createDataFrame([(Vectors.dense([3.0, -4.0]), svec)], ["dense", "sparse"])
@@ -2326,8 +2393,6 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav
     True
     >>> loadedNormalizer.transform(df).take(1) == normalizer.transform(df).take(1)
     True
-
-    .. versionadded:: 1.4.0
     """
 
     p = Param(Params._dummy(), "p", "the p norm value.",
@@ -2422,19 +2487,27 @@ class OneHotEncoder(JavaEstimator, _OneHotEncoderParams, JavaMLReadable, JavaMLW
     because it makes the vector entries sum up to one, and hence linearly dependent.
     So an input value of 4.0 maps to `[0.0, 0.0, 0.0, 0.0]`.
 
-    .. note:: This is different from scikit-learn's OneHotEncoder, which keeps all categories.
-        The output vectors are sparse.
-
     When :py:attr:`handleInvalid` is configured to 'keep', an extra "category" indicating invalid
     values is added as last category. So when :py:attr:`dropLast` is true, invalid values are
     encoded as all-zeros vector.
 
-    .. note:: When encoding multi-column by using :py:attr:`inputCols` and
-        :py:attr:`outputCols` params, input/output cols come in pairs, specified by the order in
-        the arrays, and each pair is treated independently.
+    .. versionadded:: 2.3.0
+
+    Notes
+    -----
+    This is different from scikit-learn's OneHotEncoder, which keeps all categories.
+    The output vectors are sparse.
+
+    When encoding multi-column by using :py:attr:`inputCols` and
+    :py:attr:`outputCols` params, input/output cols come in pairs, specified by the order in
+    the arrays, and each pair is treated independently.
 
-    .. seealso:: :py:class:`StringIndexer` for converting categorical values into category indices
+    See Also
+    --------
+    StringIndexer : for converting categorical values into category indices
 
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([(0.0,), (1.0,), (2.0,)], ["input"])
     >>> ohe = OneHotEncoder()
@@ -2465,8 +2538,6 @@ class OneHotEncoder(JavaEstimator, _OneHotEncoderParams, JavaMLReadable, JavaMLW
     True
     >>> loadedModel.transform(df).take(1) == model.transform(df).take(1)
     True
-
-    .. versionadded:: 2.3.0
     """
 
     @keyword_only
@@ -2609,6 +2680,10 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol, JavaMLRead
     multiplication distributes over addition". Take a 2-variable feature vector as an example:
     `(x, y)`, if we want to expand it with degree 2, then we get `(x, x * x, y, x * y, y * y)`.
 
+    .. versionadded:: 1.4.0
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([(Vectors.dense([0.5, 2.0]),)], ["dense"])
     >>> px = PolynomialExpansion(degree=2)
@@ -2627,8 +2702,6 @@ class PolynomialExpansion(JavaTransformer, HasInputCol, HasOutputCol, JavaMLRead
     True
     >>> loadedPx.transform(df).take(1) == px.transform(df).take(1)
     True
-
-    .. versionadded:: 1.4.0
     """
 
     degree = Param(Params._dummy(), "degree", "the polynomial degree to expand (>= 1)",
@@ -2697,6 +2770,10 @@ class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, HasInputCols
     column, the :py:attr:`numBucketsArray` parameter can be set, or if the number of buckets
     should be the same across columns, :py:attr:`numBuckets` can be set as a convenience.
 
+    .. versionadded:: 2.0.0
+
+    Notes
+    -----
     NaN handling: Note also that
     :py:class:`QuantileDiscretizer` will raise an error when it finds NaN values in the dataset,
     but the user can also choose to either keep or remove NaN values within the dataset by setting
@@ -2710,6 +2787,8 @@ class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, HasInputCols
     :py:attr:`relativeError` parameter.
     The lower and upper bin bounds will be `-Infinity` and `+Infinity`, covering all real values.
 
+    Examples
+    --------
     >>> values = [(0.1,), (0.4,), (1.2,), (1.5,), (float("nan"),), (float("nan"),)]
     >>> df1 = spark.createDataFrame(values, ["values"])
     >>> qds1 = QuantileDiscretizer(inputCol="values", outputCol="buckets")
@@ -2771,8 +2850,6 @@ class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, HasInputCols
     |   1.5|   1.5|    4.0|    4.0|
     +------+------+-------+-------+
     ...
-
-    .. versionadded:: 2.0.0
     """
 
     numBuckets = Param(Params._dummy(), "numBuckets",
@@ -2968,6 +3045,10 @@ class RobustScaler(JavaEstimator, _RobustScalerParams, JavaMLReadable, JavaMLWri
     stored to be used on later data using the transform method.
     Note that NaN values are ignored in the computation of medians and ranges.
 
+    .. versionadded:: 3.0.0
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> data = [(0, Vectors.dense([0.0, 0.0]),),
     ...         (1, Vectors.dense([1.0, -1.0]),),
@@ -3005,8 +3086,6 @@ class RobustScaler(JavaEstimator, _RobustScalerParams, JavaMLReadable, JavaMLWri
     True
     >>> loadedModel.transform(df).take(1) == model.transform(df).take(1)
     True
-
-    .. versionadded:: 3.0.0
     """
 
     @keyword_only
@@ -3134,6 +3213,10 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
     length.
     It returns an array of strings that can be empty.
 
+    .. versionadded:: 1.4.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([("A B  c",)], ["text"])
     >>> reTokenizer = RegexTokenizer()
     >>> reTokenizer.setInputCol("text")
@@ -3164,8 +3247,6 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable,
     True
     >>> loadedReTokenizer.transform(df).take(1) == reTokenizer.transform(df).take(1)
     True
-
-    .. versionadded:: 1.4.0
     """
 
     minTokenLength = Param(Params._dummy(), "minTokenLength", "minimum token length (>= 0)",
@@ -3275,9 +3356,13 @@ def setOutputCol(self, value):
 class SQLTransformer(JavaTransformer, JavaMLReadable, JavaMLWritable):
     """
     Implements the transforms which are defined by SQL statement.
-    Currently we only support SQL syntax like 'SELECT ... FROM __THIS__'
-    where '__THIS__' represents the underlying table of the input dataset.
+    Currently we only support SQL syntax like `SELECT ... FROM __THIS__`
+    where `__THIS__` represents the underlying table of the input dataset.
 
+    .. versionadded:: 1.6.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([(0, 1.0, 3.0), (2, 2.0, 5.0)], ["id", "v1", "v2"])
     >>> sqlTrans = SQLTransformer(
     ...     statement="SELECT *, (v1 + v2) AS v3, (v1 * v2) AS v4 FROM __THIS__")
@@ -3290,8 +3375,6 @@ class SQLTransformer(JavaTransformer, JavaMLReadable, JavaMLWritable):
     True
     >>> loadedSqlTrans.transform(df).take(1) == sqlTrans.transform(df).take(1)
     True
-
-    .. versionadded:: 1.6.0
     """
 
     statement = Param(Params._dummy(), "statement", "SQL statement",
@@ -3373,6 +3456,10 @@ class StandardScaler(JavaEstimator, _StandardScalerParams, JavaMLReadable, JavaM
     <https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation>`_,
     which is computed as the square root of the unbiased sample variance.
 
+    .. versionadded:: 1.4.0
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([(Vectors.dense([0.0]),), (Vectors.dense([2.0]),)], ["a"])
     >>> standardScaler = StandardScaler()
@@ -3407,8 +3494,6 @@ class StandardScaler(JavaEstimator, _StandardScalerParams, JavaMLReadable, JavaM
     True
     >>> loadedModel.transform(df).take(1) == model.transform(df).take(1)
     True
-
-    .. versionadded:: 1.4.0
     """
 
     @keyword_only
@@ -3540,6 +3625,10 @@ class StringIndexer(JavaEstimator, _StringIndexerParams, JavaMLReadable, JavaMLW
     so the most frequent label gets index 0. The ordering behavior is controlled by
     setting :py:attr:`stringOrderType`. Its default value is 'frequencyDesc'.
 
+    .. versionadded:: 1.4.0
+
+    Examples
+    --------
     >>> stringIndexer = StringIndexer(inputCol="label", outputCol="indexed",
     ...     stringOrderType="frequencyDesc")
     >>> stringIndexer.setHandleInvalid("error")
@@ -3609,8 +3698,6 @@ class StringIndexer(JavaEstimator, _StringIndexerParams, JavaMLReadable, JavaMLW
     >>> sorted(set([(i[0], i[1], i[2]) for i in result.select(result.id, result.index1,
     ...     result.index2).collect()]), key=lambda x: x[0])
     [(0, 0.0, 0.0), (1, 1.0, 1.0), (2, 2.0, 0.0), (3, 0.0, 1.0), (4, 0.0, 1.0), (5, 2.0, 1.0)]
-
-    .. versionadded:: 1.4.0
     """
 
     @keyword_only
@@ -3771,13 +3858,16 @@ def labels(self):
 @inherit_doc
 class IndexToString(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
     """
-    A :py:class:`Transformer` that maps a column of indices back to a new column of
+    A :py:class:`pyspark.ml.base.Transformer` that maps a column of indices back to a new column of
     corresponding string values.
     The index-string mapping is either from the ML attributes of the input column,
     or from user-supplied labels (which take precedence over ML attributes).
-    See :class:`StringIndexer` for converting strings into indices.
 
     .. versionadded:: 1.6.0
+
+    See Also
+    --------
+    StringIndexer : for converting categorical values into category indices
     """
 
     labels = Param(Params._dummy(), "labels",
@@ -3841,8 +3931,14 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol, HasInputCols,
     the :py:attr:`inputCols` parameter. Note that when both the :py:attr:`inputCol` and
     :py:attr:`inputCols` parameters are set, an Exception will be thrown.
 
-    .. note:: null values from input array are preserved unless adding null to stopWords explicitly.
+    .. versionadded:: 1.6.0
+
+    Notes
+    -----
+    null values from input array are preserved unless adding null to stopWords explicitly.
 
+    Examples
+    --------
     >>> df = spark.createDataFrame([(["a", "b", "c"],)], ["text"])
     >>> remover = StopWordsRemover(stopWords=["b"])
     >>> remover.setInputCol("text")
@@ -3871,8 +3967,6 @@ class StopWordsRemover(JavaTransformer, HasInputCol, HasOutputCol, HasInputCols,
     |[a, b, c]|[a, b]|[a, c]|   [a]|
     +---------+------+------+------+
     ...
-
-    .. versionadded:: 1.6.0
     """
 
     stopWords = Param(Params._dummy(), "stopWords", "The words to be filtered out",
@@ -3995,6 +4089,10 @@ class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Java
     A tokenizer that converts the input string to lowercase and then
     splits it by white spaces.
 
+    .. versionadded:: 1.3.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([("a b c",)], ["text"])
     >>> tokenizer = Tokenizer(outputCol="words")
     >>> tokenizer.setInputCol("text")
@@ -4019,8 +4117,6 @@ class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Java
     >>> loadedTokenizer = Tokenizer.load(tokenizerPath)
     >>> loadedTokenizer.transform(df).head().tokens == tokenizer.transform(df).head().tokens
     True
-
-    .. versionadded:: 1.3.0
     """
 
     @keyword_only
@@ -4062,6 +4158,10 @@ class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol, HasHandleInva
     """
     A feature transformer that merges multiple columns into a vector column.
 
+    .. versionadded:: 1.4.0
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([(1, 0, 3)], ["a", "b", "c"])
     >>> vecAssembler = VectorAssembler(outputCol="features")
     >>> vecAssembler.setInputCols(["a", "b", "c"])
@@ -4098,8 +4198,6 @@ class VectorAssembler(JavaTransformer, HasInputCols, HasOutputCol, HasHandleInva
     |5.0|6.0|7.0|[5.0,6.0,7.0]|
     +---+---+---+-------------+
     ...
-
-    .. versionadded:: 1.4.0
     """
 
     handleInvalid = Param(Params._dummy(), "handleInvalid", "How to handle invalid data (NULL " +
@@ -4214,12 +4312,16 @@ class VectorIndexer(JavaEstimator, _VectorIndexerParams, JavaMLReadable, JavaMLW
         index 0. This maintains vector sparsity.
       - More stability may be added in the future.
 
-     TODO: Future extensions: The following functionality is planned for the future:
+    TODO: Future extensions: The following functionality is planned for the future:
       - Preserve metadata in transform; if a feature's metadata is already present,
         do not recompute.
       - Specify certain features to not index, either via a parameter or via existing metadata.
       - Add warning if a categorical feature has only 1 category.
 
+    .. versionadded:: 1.4.0
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([(Vectors.dense([-1.0, 0.0]),),
     ...     (Vectors.dense([0.0, 1.0]),), (Vectors.dense([0.0, 2.0]),)], ["a"])
@@ -4266,8 +4368,6 @@ class VectorIndexer(JavaEstimator, _VectorIndexerParams, JavaMLReadable, JavaMLW
     >>> model4 = indexer.setParams(handleInvalid="keep", outputCol="indexed").fit(df)
     >>> model4.transform(dfWithInvalid).head().indexed
     DenseVector([2.0, 1.0])
-
-    .. versionadded:: 1.4.0
     """
 
     @keyword_only
@@ -4382,6 +4482,10 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, J
     The output vector will order features with the selected indices first (in the order given),
     followed by the selected names (in the order given).
 
+    .. versionadded:: 1.6.0
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([
     ...     (Vectors.dense([-2.0, 2.3, 0.0, 0.0, 1.0]),),
@@ -4401,8 +4505,6 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, J
     True
     >>> loadedVs.transform(df).take(1) == vs.transform(df).take(1)
     True
-
-    .. versionadded:: 1.6.0
     """
 
     indices = Param(Params._dummy(), "indices", "An array of indices to select features from " +
@@ -4547,6 +4649,10 @@ class Word2Vec(JavaEstimator, _Word2VecParams, JavaMLReadable, JavaMLWritable):
     Word2Vec trains a model of `Map(String, Vector)`, i.e. transforms a word into a code for further
     natural language processing or machine learning process.
 
+    .. versionadded:: 1.4.0
+
+    Examples
+    --------
     >>> sent = ("a b " * 100 + "a c " * 10).split(" ")
     >>> doc = spark.createDataFrame([(sent,), (sent,)], ["sentence"])
     >>> word2Vec = Word2Vec(vectorSize=5, seed=42, inputCol="sentence", outputCol="model")
@@ -4600,8 +4706,6 @@ class Word2Vec(JavaEstimator, _Word2VecParams, JavaMLReadable, JavaMLWritable):
     True
     >>> loadedModel.transform(doc).take(1) == model.transform(doc).take(1)
     True
-
-    .. versionadded:: 1.4.0
     """
 
     @keyword_only
@@ -4779,6 +4883,10 @@ class PCA(JavaEstimator, _PCAParams, JavaMLReadable, JavaMLWritable):
     PCA trains a model to project vectors to a lower dimensional space of the
     top :py:attr:`k` principal components.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> data = [(Vectors.sparse(5, [(1, 1.0), (3, 7.0)]),),
     ...     (Vectors.dense([2.0, 0.0, 3.0, 4.0, 5.0]),),
@@ -4810,8 +4918,6 @@ class PCA(JavaEstimator, _PCAParams, JavaMLReadable, JavaMLWritable):
     True
     >>> loadedModel.transform(df).take(1) == model.transform(df).take(1)
     True
-
-    .. versionadded:: 1.5.0
     """
 
     @keyword_only
@@ -4959,9 +5065,16 @@ class RFormula(JavaEstimator, _RFormulaParams, JavaMLReadable, JavaMLWritable):
     Implements the transforms required for fitting a dataset against an
     R model formula. Currently we support a limited subset of the R
     operators, including '~', '.', ':', '+', '-', '*', and '^'.
+
+    .. versionadded:: 1.5.0
+
+    Notes
+    -----
     Also see the `R formula docs
     <http://stat.ethz.ch/R-manual/R-patched/library/stats/html/formula.html>`_.
 
+    Examples
+    --------
     >>> df = spark.createDataFrame([
     ...     (1.0, 1.0, "a"),
     ...     (0.0, 2.0, "b"),
@@ -5018,8 +5131,6 @@ class RFormula(JavaEstimator, _RFormulaParams, JavaMLReadable, JavaMLWritable):
     ...
     >>> str(loadedModel)
     'RFormulaModel(ResolvedRFormula(label=y, terms=[x,s], hasIntercept=true)) (uid=...)'
-
-    .. versionadded:: 1.5.0
     """
 
     @keyword_only
@@ -5296,26 +5407,25 @@ class ANOVASelector(_Selector, JavaMLReadable, JavaMLWritable):
     The selector supports different selection methods: `numTopFeatures`, `percentile`, `fpr`,
     `fdr`, `fwe`.
 
-     * `numTopFeatures` chooses a fixed number of top features according to a F value
-        classification test.
-
-     * `percentile` is similar but chooses a fraction of all features
-       instead of a fixed number.
-
-     * `fpr` chooses all features whose p-values are below a threshold,
-       thus controlling the false positive rate of selection.
-
-     * `fdr` uses the `Benjamini-Hochberg procedure <https://en.wikipedia.org/wiki/
-       False_discovery_rate#Benjamini.E2.80.93Hochberg_procedure>`_
-       to choose all features whose false discovery rate is below a threshold.
-
-     * `fwe` chooses all features whose p-values are below a threshold. The threshold is scaled by
-       1/numFeatures, thus controlling the family-wise error rate of selection.
+    - `numTopFeatures` chooses a fixed number of top features according to a F value
+      classification test.
+    - `percentile` is similar but chooses a fraction of all features
+      instead of a fixed number.
+    - `fpr` chooses all features whose p-values are below a threshold,
+      thus controlling the false positive rate of selection.
+    - `fdr` uses the `Benjamini-Hochberg procedure \
+      <https://en.wikipedia.org/wiki/False_discovery_rate#Benjamini.E2.80.93Hochberg_procedure>`_
+      to choose all features whose false discovery rate is below a threshold.
+    - `fwe` chooses all features whose p-values are below a threshold. The threshold is scaled by
+      1 / `numFeatures`, thus controlling the family-wise error rate of selection.
 
     By default, the selection method is `numTopFeatures`, with the default number of top features
     set to 50.
 
+    .. versionadded:: 3.1.0
 
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame(
     ...    [(Vectors.dense([1.7, 4.4, 7.6, 5.8, 9.6, 2.3]), 3.0),
@@ -5347,8 +5457,6 @@ class ANOVASelector(_Selector, JavaMLReadable, JavaMLWritable):
     True
     >>> loadedModel.transform(df).take(1) == model.transform(df).take(1)
     True
-
-    .. versionadded:: 3.1.0
     """
 
     @keyword_only
@@ -5417,7 +5525,10 @@ class ChiSqSelector(_Selector, JavaMLReadable, JavaMLWritable):
     By default, the selection method is `numTopFeatures`, with the default number of top features
     set to 50.
 
+    .. versionadded:: 2.0.0
 
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame(
     ...    [(Vectors.dense([0.0, 0.0, 18.0, 1.0]), 1.0),
@@ -5446,8 +5557,6 @@ class ChiSqSelector(_Selector, JavaMLReadable, JavaMLWritable):
     True
     >>> loadedModel.transform(df).take(1) == model.transform(df).take(1)
     True
-
-    .. versionadded:: 2.0.0
     """
 
     @keyword_only
@@ -5517,7 +5626,10 @@ class FValueSelector(_Selector, JavaMLReadable, JavaMLWritable):
     By default, the selection method is `numTopFeatures`, with the default number of top features
     set to 50.
 
+    .. versionadded:: 3.1.0
 
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame(
     ...    [(Vectors.dense([6.0, 7.0, 0.0, 7.0, 6.0, 0.0]), 4.6),
@@ -5549,8 +5661,6 @@ class FValueSelector(_Selector, JavaMLReadable, JavaMLWritable):
     True
     >>> loadedModel.transform(df).take(1) == model.transform(df).take(1)
     True
-
-    .. versionadded:: 3.1.0
     """
 
     @keyword_only
@@ -5601,9 +5711,14 @@ class VectorSizeHint(JavaTransformer, HasInputCol, HasHandleInvalid, JavaMLReada
     VectorAssembler needs size information for its input columns and cannot be used on streaming
     dataframes without this metadata.
 
-    .. note:: VectorSizeHint modifies `inputCol` to include size metadata and does not have an
-        outputCol.
+    .. versionadded:: 2.3.0
 
+    Notes
+    -----
+    VectorSizeHint modifies `inputCol` to include size metadata and does not have an outputCol.
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> from pyspark.ml import Pipeline, PipelineModel
     >>> data = [(Vectors.dense([1., 2., 3.]), 4.)]
@@ -5623,8 +5738,6 @@ class VectorSizeHint(JavaTransformer, HasInputCol, HasHandleInvalid, JavaMLReada
     >>> expected = pipelineModel.transform(df).head().assembled
     >>> loaded == expected
     True
-
-    .. versionadded:: 2.3.0
     """
 
     size = Param(Params._dummy(), "size", "Size of vectors in column.",
@@ -5711,6 +5824,10 @@ class VarianceThresholdSelector(JavaEstimator, _VarianceThresholdSelectorParams,
     all features with non-zero variance, i.e. remove the features that have the
     same value in all samples.
 
+    .. versionadded:: 3.1.0
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame(
     ...    [(Vectors.dense([6.0, 7.0, 0.0, 7.0, 6.0, 0.0]),),
@@ -5742,8 +5859,6 @@ class VarianceThresholdSelector(JavaEstimator, _VarianceThresholdSelectorParams,
     True
     >>> loadedModel.transform(df).take(1) == model.transform(df).take(1)
     True
-
-    .. versionadded:: 3.1.0
     """
 
     @keyword_only
diff --git a/python/pyspark/ml/fpm.py b/python/pyspark/ml/fpm.py
index 0847109dccfc5..77e610f49410d 100644
--- a/python/pyspark/ml/fpm.py
+++ b/python/pyspark/ml/fpm.py
@@ -140,18 +140,37 @@ def associationRules(self):
 
 class FPGrowth(JavaEstimator, _FPGrowthParams, JavaMLWritable, JavaMLReadable):
     r"""
-    A parallel FP-growth algorithm to mine frequent itemsets. The algorithm is described in
-    Li et al., PFP: Parallel FP-Growth for Query Recommendation [LI2008]_.
+    A parallel FP-growth algorithm to mine frequent itemsets.
+
+    .. versionadded:: 2.2.0
+
+    Notes
+    -----
+
+    The algorithm is described in
+    Li et al., PFP: Parallel FP-Growth for Query Recommendation [1]_.
     PFP distributes computation in such a way that each worker executes an
     independent group of mining tasks. The FP-Growth algorithm is described in
-    Han et al., Mining frequent patterns without candidate generation [HAN2000]_
+    Han et al., Mining frequent patterns without candidate generation [2]_
+
+    NULL values in the feature column are ignored during `fit()`.
 
-    .. [LI2008] https://doi.org/10.1145/1454008.1454027
-    .. [HAN2000] https://doi.org/10.1145/335191.335372
+    Internally `transform` `collects` and `broadcasts` association rules.
 
-    .. note:: null values in the feature column are ignored during fit().
-    .. note:: Internally `transform` `collects` and `broadcasts` association rules.
 
+    .. [1] Haoyuan Li, Yi Wang, Dong Zhang, Ming Zhang, and Edward Y. Chang. 2008.
+        Pfp: parallel fp-growth for query recommendation.
+        In Proceedings of the 2008 ACM conference on Recommender systems (RecSys '08).
+        Association for Computing Machinery, New York, NY, USA, 107–114.
+        DOI: https://doi.org/10.1145/1454008.1454027
+    .. [2] Jiawei Han, Jian Pei, and Yiwen Yin. 2000.
+        Mining frequent patterns without candidate generation.
+        SIGMOD Rec. 29, 2 (June 2000), 1–12.
+        DOI: https://doi.org/10.1145/335191.335372
+
+
+    Examples
+    --------
     >>> from pyspark.sql.functions import split
     >>> data = (spark.read
     ...     .text("data/mllib/sample_fpgrowth.txt")
@@ -204,8 +223,6 @@ class FPGrowth(JavaEstimator, _FPGrowthParams, JavaMLWritable, JavaMLReadable):
     >>> model2 = FPGrowthModel.load(model_path)
     >>> fpm.transform(data).take(1) == model2.transform(data).take(1)
     True
-
-    .. versionadded:: 2.2.0
     """
     @keyword_only
     def __init__(self, *, minSupport=0.3, minConfidence=0.8, itemsCol="items",
@@ -269,13 +286,19 @@ class PrefixSpan(JavaParams):
     A parallel PrefixSpan algorithm to mine frequent sequential patterns.
     The PrefixSpan algorithm is described in J. Pei, et al., PrefixSpan: Mining Sequential Patterns
     Efficiently by Prefix-Projected Pattern Growth
-    (see <a href="https://doi.org/10.1109/ICDE.2001.914830">here</a>).
+    (see `here <https://doi.org/10.1109/ICDE.2001.914830">`_).
     This class is not yet an Estimator/Transformer, use :py:func:`findFrequentSequentialPatterns`
     method to run the PrefixSpan algorithm.
 
-    @see <a href="https://en.wikipedia.org/wiki/Sequential_Pattern_Mining">Sequential Pattern Mining
-    (Wikipedia)</a>
+    .. versionadded:: 2.4.0
+
+    Notes
+    -----
+    See `Sequential Pattern Mining (Wikipedia) \
+      <https://en.wikipedia.org/wiki/Sequential_Pattern_Mining>`_
 
+    Examples
+    --------
     >>> from pyspark.ml.fpm import PrefixSpan
     >>> from pyspark.sql import Row
     >>> df = sc.parallelize([Row(sequence=[[1, 2], [3]]),
@@ -302,8 +325,6 @@ class PrefixSpan(JavaParams):
     |[[3]]     |2   |
     +----------+----+
     ...
-
-    .. versionadded:: 2.4.0
     """
 
     minSupport = Param(Params._dummy(), "minSupport", "The minimal support level of the " +
@@ -408,19 +429,26 @@ def getSequenceCol(self):
         """
         return self.getOrDefault(self.sequenceCol)
 
-    @since("2.4.0")
     def findFrequentSequentialPatterns(self, dataset):
         """
         Finds the complete set of frequent sequential patterns in the input sequences of itemsets.
 
-        :param dataset: A dataframe containing a sequence column which is
-                        `ArrayType(ArrayType(T))` type, T is the item type for the input dataset.
-        :return: A `DataFrame` that contains columns of sequence and corresponding frequency.
-                 The schema of it will be:
-                 - `sequence: ArrayType(ArrayType(T))` (T is the item type)
-                 - `freq: Long`
-
         .. versionadded:: 2.4.0
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            A dataframe containing a sequence column which is
+            `ArrayType(ArrayType(T))` type, T is the item type for the input dataset.
+
+        Returns
+        -------
+        pyspark.sql.dataframe.DataFrame
+            A `DataFrame` that contains columns of sequence and corresponding frequency.
+            The schema of it will be:
+
+            - `sequence: ArrayType(ArrayType(T))` (T is the item type)
+            - `freq: Long`
         """
 
         self._transfer_params_to_java()
diff --git a/python/pyspark/ml/functions.py b/python/pyspark/ml/functions.py
index 65b0558b282a4..cf4a014d897fb 100644
--- a/python/pyspark/ml/functions.py
+++ b/python/pyspark/ml/functions.py
@@ -15,21 +15,30 @@
 # limitations under the License.
 #
 
-from pyspark import since, SparkContext
+from pyspark import SparkContext
 from pyspark.sql.column import Column, _to_java_column
 
 
-@since("3.0.0")
 def vector_to_array(col, dtype="float64"):
     """
     Converts a column of MLlib sparse/dense vectors into a column of dense arrays.
 
-    :param col: A string of the column name or a Column
-    :param dtype: The data type of the output array. Valid values: "float64" or "float32".
-    :return: The converted column of dense arrays.
-
     .. versionadded:: 3.0.0
 
+    Parameters
+    ----------
+    col : :py:class:`pyspark.sql.Column` or str
+        Input column
+    dtype : str, optional
+        The data type of the output array. Valid values: "float64" or "float32".
+
+    Returns
+    -------
+    :py:class:`pyspark.sql.Column`
+        The converted column of dense arrays.
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> from pyspark.ml.functions import vector_to_array
     >>> from pyspark.mllib.linalg import Vectors as OldVectors
diff --git a/python/pyspark/ml/image.py b/python/pyspark/ml/image.py
index 20b24559b182d..728e9a356ed36 100644
--- a/python/pyspark/ml/image.py
+++ b/python/pyspark/ml/image.py
@@ -55,8 +55,11 @@ def imageSchema(self):
         """
         Returns the image schema.
 
-        :return: a :class:`StructType` with a single column of images
-               named "image" (nullable) and having the same type returned by :meth:`columnSchema`.
+        Returns
+        -------
+        :class:`StructType`
+            with a single column of images named "image" (nullable)
+            and having the same type returned by :meth:`columnSchema`.
 
         .. versionadded:: 2.3.0
         """
@@ -72,7 +75,10 @@ def ocvTypes(self):
         """
         Returns the OpenCV type mapping supported.
 
-        :return: a dictionary containing the OpenCV type mapping supported.
+        Returns
+        -------
+        dict
+            a dictionary containing the OpenCV type mapping supported.
 
         .. versionadded:: 2.3.0
         """
@@ -87,7 +93,10 @@ def columnSchema(self):
         """
         Returns the schema for the image column.
 
-        :return: a :class:`StructType` for image column,
+        Returns
+        -------
+        :class:`StructType`
+            a schema for image column,
             ``struct<origin:string, height:int, width:int, nChannels:int, mode:int, data:binary>``.
 
         .. versionadded:: 2.4.0
@@ -104,7 +113,10 @@ def imageFields(self):
         """
         Returns field names of image columns.
 
-        :return: a list of field names.
+        Returns
+        -------
+        list
+            a list of field names.
 
         .. versionadded:: 2.3.0
         """
@@ -132,9 +144,16 @@ def toNDArray(self, image):
         """
         Converts an image to an array with metadata.
 
-        :param `Row` image: A row that contains the image to be converted. It should
+        Parameters
+        ----------
+        image : :class:`Row`
+            image: A row that contains the image to be converted. It should
             have the attributes specified in `ImageSchema.imageSchema`.
-        :return: a `numpy.ndarray` that is an image.
+
+        Returns
+        -------
+        :class:`numpy.ndarray`
+            that is an image.
 
         .. versionadded:: 2.3.0
         """
@@ -162,9 +181,17 @@ def toImage(self, array, origin=""):
         """
         Converts an array with metadata to a two-dimensional image.
 
-        :param `numpy.ndarray` array: The array to convert to image.
-        :param str origin: Path to the image, optional.
-        :return: a :class:`Row` that is a two dimensional image.
+        Parameters
+        ----------
+        array : :class:`numpy.ndarray`
+            The array to convert to image.
+        origin : str
+            Path to the image, optional.
+
+        Returns
+        -------
+        :class:`Row`
+            that is a two dimensional image.
 
         .. versionadded:: 2.3.0
         """
diff --git a/python/pyspark/ml/linalg/__init__.py b/python/pyspark/ml/linalg/__init__.py
index 8be440da4fef8..221b8f4204683 100644
--- a/python/pyspark/ml/linalg/__init__.py
+++ b/python/pyspark/ml/linalg/__init__.py
@@ -68,6 +68,8 @@ def _vector_size(v):
     """
     Returns the size of the vector.
 
+    Examples
+    --------
     >>> _vector_size([1., 2., 3.])
     3
     >>> _vector_size((1., 2., 3.))
@@ -239,6 +241,8 @@ class DenseVector(Vector):
     storage and arithmetics will be delegated to the underlying numpy
     array.
 
+    Examples
+    --------
     >>> v = Vectors.dense([1.0, 2.0])
     >>> u = Vectors.dense([3.0, 4.0])
     >>> v + u
@@ -278,6 +282,8 @@ def norm(self, p):
         """
         Calculates the norm of a DenseVector.
 
+        Examples
+        --------
         >>> a = DenseVector([0, -1, 2, -3])
         >>> a.norm(2)
         3.7...
@@ -293,6 +299,8 @@ def dot(self, other):
         and a target NumPy array that is either 1- or 2-dimensional.
         Equivalent to calling numpy.dot of the two vectors.
 
+        Examples
+        --------
         >>> dense = DenseVector(array.array('d', [1., 2.]))
         >>> dense.dot(dense)
         5.0
@@ -333,6 +341,8 @@ def squared_distance(self, other):
         """
         Squared distance of two Vectors.
 
+        Examples
+        --------
         >>> dense1 = DenseVector(array.array('d', [1., 2.]))
         >>> dense1.squared_distance(dense1)
         0.0
@@ -456,12 +466,18 @@ def __init__(self, size, *args):
         (index, value) pairs, or two separate arrays of indices and
         values (sorted by index).
 
-        :param size: Size of the vector.
-        :param args: Active entries, as a dictionary {index: value, ...},
-          a list of tuples [(index, value), ...], or a list of strictly
-          increasing indices and a list of corresponding values [index, ...],
-          [value, ...]. Inactive entries are treated as zeros.
-
+        Examples
+        --------
+        size : int
+            Size of the vector.
+        args
+            Active entries, as a dictionary {index: value, ...},
+            a list of tuples [(index, value), ...], or a list of strictly
+            increasing indices and a list of corresponding values [index, ...],
+            [value, ...]. Inactive entries are treated as zeros.
+
+        Examples
+        --------
         >>> SparseVector(4, {1: 1.0, 3: 5.5})
         SparseVector(4, {1: 1.0, 3: 5.5})
         >>> SparseVector(4, [(1, 1.0), (3, 5.5)])
@@ -526,6 +542,8 @@ def norm(self, p):
         """
         Calculates the norm of a SparseVector.
 
+        Examples
+        --------
         >>> a = SparseVector(4, [0, 1], [3., -4.])
         >>> a.norm(1)
         7.0
@@ -543,6 +561,8 @@ def dot(self, other):
         """
         Dot product with a SparseVector or 1- or 2-dimensional Numpy array.
 
+        Examples
+        --------
         >>> a = SparseVector(4, [1, 3], [3.0, 4.0])
         >>> a.dot(a)
         25.0
@@ -599,6 +619,8 @@ def squared_distance(self, other):
         """
         Squared distance from a SparseVector or 1-dimensional NumPy array.
 
+        Examples
+        --------
         >>> a = SparseVector(4, [1, 3], [3.0, 4.0])
         >>> a.squared_distance(a)
         0.0
@@ -738,10 +760,12 @@ class Vectors(object):
     """
     Factory methods for working with vectors.
 
-    .. note:: Dense vectors are simply represented as NumPy array objects,
-        so there is no need to covert them for use in MLlib. For sparse vectors,
-        the factory methods in this class create an MLlib-compatible type, or users
-        can pass in SciPy's `scipy.sparse` column vectors.
+    Notes
+    -----
+    Dense vectors are simply represented as NumPy array objects,
+    so there is no need to covert them for use in MLlib. For sparse vectors,
+    the factory methods in this class create an MLlib-compatible type, or users
+    can pass in SciPy's `scipy.sparse` column vectors.
     """
 
     @staticmethod
@@ -751,10 +775,16 @@ def sparse(size, *args):
         (index, value) pairs, or two separate arrays of indices and
         values (sorted by index).
 
-        :param size: Size of the vector.
-        :param args: Non-zero entries, as a dictionary, list of tuples,
-                     or two sorted lists containing indices and values.
+        Parameters
+        ----------
+        size : int
+            Size of the vector.
+        args
+            Non-zero entries, as a dictionary, list of tuples,
+            or two sorted lists containing indices and values.
 
+        Examples
+        --------
         >>> Vectors.sparse(4, {1: 1.0, 3: 5.5})
         SparseVector(4, {1: 1.0, 3: 5.5})
         >>> Vectors.sparse(4, [(1, 1.0), (3, 5.5)])
@@ -769,6 +799,8 @@ def dense(*elements):
         """
         Create a dense vector of 64-bit floats from a Python list or numbers.
 
+        Examples
+        --------
         >>> Vectors.dense([1, 2, 3])
         DenseVector([1.0, 2.0, 3.0])
         >>> Vectors.dense(1.0, 2.0)
@@ -786,6 +818,8 @@ def squared_distance(v1, v2):
         a and b can be of type SparseVector, DenseVector, np.ndarray
         or array.array.
 
+        Examples
+        --------
         >>> a = Vectors.sparse(4, [(0, 1), (3, 4)])
         >>> b = Vectors.dense([2, 5, 4, 1])
         >>> a.squared_distance(b)
@@ -878,6 +912,8 @@ def __str__(self):
         """
         Pretty printing of a DenseMatrix
 
+        Examples
+        --------
         >>> dm = DenseMatrix(2, 2, range(4))
         >>> print(dm)
         DenseMatrix([[ 0.,  2.],
@@ -899,6 +935,8 @@ def __repr__(self):
         """
         Representation of a DenseMatrix
 
+        Examples
+        --------
         >>> dm = DenseMatrix(2, 2, range(4))
         >>> dm
         DenseMatrix(2, 2, [0.0, 1.0, 2.0, 3.0], False)
@@ -920,8 +958,10 @@ def __repr__(self):
 
     def toArray(self):
         """
-        Return a numpy.ndarray
+        Return a :py:class:`numpy.ndarray`
 
+        Examples
+        --------
         >>> m = DenseMatrix(2, 2, range(4))
         >>> m.toArray()
         array([[ 0.,  2.],
@@ -998,6 +1038,8 @@ def __str__(self):
         """
         Pretty printing of a SparseMatrix
 
+        Examples
+        --------
         >>> sm1 = SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
         >>> print(sm1)
         2 X 2 CSCMatrix
@@ -1044,6 +1086,8 @@ def __repr__(self):
         """
         Representation of a SparseMatrix
 
+        Examples
+        --------
         >>> sm1 = SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
         >>> sm1
         SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2.0, 3.0, 4.0], False)
diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py
index 1853a8816ff58..f2381a4c42698 100644
--- a/python/pyspark/ml/param/__init__.py
+++ b/python/pyspark/ml/param/__init__.py
@@ -353,8 +353,15 @@ def extractParamMap(self, extra=None):
         conflicts, i.e., with ordering: default param values <
         user-supplied values < extra.
 
-        :param extra: extra param values
-        :return: merged param map
+        Parameters
+        ----------
+        extra : dict, optional
+            extra param values
+
+        Returns
+        -------
+        dict
+            merged param map
         """
         if extra is None:
             extra = dict()
@@ -372,8 +379,15 @@ def copy(self, extra=None):
         Subclasses should override this method if the default approach
         is not sufficient.
 
-        :param extra: Extra parameters to copy to the new instance
-        :return: Copy of this instance
+        Parameters
+        ----------
+        extra : dict, optional
+            Extra parameters to copy to the new instance
+
+        Returns
+        -------
+        :py:class:`Params`
+            Copy of this instance
         """
         if extra is None:
             extra = dict()
@@ -404,9 +418,16 @@ def _resolveParam(self, param):
         """
         Resolves a param and validates the ownership.
 
-        :param param: param name or the param instance, which must
-                      belong to this Params instance
-        :return: resolved param instance
+        Parameters
+        ----------
+        param : str or :py:class:`Param`
+            param name or the param instance, which must
+            belong to this Params instance
+
+        Returns
+        -------
+        :py:class:`Param`
+            resolved param instance
         """
         if isinstance(param, Param):
             self._shouldOwn(param)
@@ -467,9 +488,17 @@ def _copyValues(self, to, extra=None):
         Copies param values from this instance to another instance for
         params shared by them.
 
-        :param to: the target instance
-        :param extra: extra params to be copied
-        :return: the target instance with param values copied
+        Parameters
+        ----------
+        to : :py:class:`Params`
+            the target instance
+        extra : dict, optional
+            extra params to be copied
+
+        Returns
+        -------
+        :py:class:`Params`
+            the target instance with param values copied
         """
         paramMap = self._paramMap.copy()
         if isinstance(extra, dict):
@@ -496,9 +525,17 @@ def _resetUid(self, newUid):
         Changes the uid of this instance. This updates both
         the stored uid and the parent uid of params and param maps.
         This is used by persistence (loading).
-        :param newUid: new uid to use, which is converted to unicode
-        :return: same instance, but with the uid and Param.parent values
-                 updated, including within param maps
+
+        Parameters
+        ----------
+        newUid
+            new uid to use, which is converted to unicode
+
+        Returns
+        -------
+        :py:class:`Params`
+            same instance, but with the uid and Param.parent values
+            updated, including within param maps
         """
         newUid = str(newUid)
         self.uid = newUid
diff --git a/python/pyspark/ml/pipeline.py b/python/pyspark/ml/pipeline.py
index 4598433fbf4f1..a6471a8dd1fe5 100644
--- a/python/pyspark/ml/pipeline.py
+++ b/python/pyspark/ml/pipeline.py
@@ -58,13 +58,22 @@ def __init__(self, *, stages=None):
         kwargs = self._input_kwargs
         self.setParams(**kwargs)
 
-    @since("1.3.0")
     def setStages(self, value):
         """
         Set pipeline stages.
 
-        :param value: a list of transformers or estimators
-        :return: the pipeline instance
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        value : list
+            of :py:class:`pyspark.ml.Transformer`
+            or :py:class:`pyspark.ml.Estimator`
+
+        Returns
+        -------
+        :py:class:`Pipeline`
+            the pipeline instance
         """
         return self._set(stages=value)
 
@@ -110,13 +119,21 @@ def _fit(self, dataset):
                 transformers.append(stage)
         return PipelineModel(transformers)
 
-    @since("1.4.0")
     def copy(self, extra=None):
         """
         Creates a copy of this instance.
 
-        :param extra: extra parameters
-        :returns: new instance
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        extra : dict, optional
+            extra parameters
+
+        Returns
+        -------
+        :py:class:`Pipeline`
+            new instance
         """
         if extra is None:
             extra = dict()
@@ -156,7 +173,10 @@ def _to_java(self):
         """
         Transfer this instance to a Java Pipeline.  Used for ML persistence.
 
-        :return: Java object equivalent to this instance.
+        Returns
+        -------
+        py4j.java_gateway.JavaObject
+            Java object equivalent to this instance.
         """
 
         gateway = SparkContext._gateway
@@ -307,11 +327,12 @@ def _transform(self, dataset):
             dataset = t.transform(dataset)
         return dataset
 
-    @since("1.4.0")
     def copy(self, extra=None):
         """
         Creates a copy of this instance.
 
+        .. versionadded:: 1.4.0
+
         :param extra: extra parameters
         :returns: new instance
         """
@@ -410,7 +431,10 @@ def load(metadata, sc, path):
         """
         Load metadata and stages for a :py:class:`Pipeline` or :py:class:`PipelineModel`
 
-        :return: (UID, list of stages)
+        Returns
+        -------
+        tuple
+            (UID, list of stages)
         """
         stagesDir = os.path.join(path, "stages")
         stageUids = metadata['paramMap']['stageUids']
diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py
index 4ef38534444cd..28c4499f779ec 100644
--- a/python/pyspark/ml/recommendation.py
+++ b/python/pyspark/ml/recommendation.py
@@ -216,13 +216,19 @@ class ALS(JavaEstimator, _ALSParams, JavaMLWritable, JavaMLReadable):
     indicated user preferences rather than explicit ratings given to
     items.
 
-    .. note:: the input rating dataframe to the ALS implementation should be deterministic.
-              Nondeterministic data can cause failure during fitting ALS model.
-              For example, an order-sensitive operation like sampling after a repartition makes
-              dataframe output nondeterministic, like `df.repartition(2).sample(False, 0.5, 1618)`.
-              Checkpointing sampled dataframe or adding a sort before sampling can help make the
-              dataframe deterministic.
+    .. versionadded:: 1.4.0
 
+    Notes
+    -----
+    The input rating dataframe to the ALS implementation should be deterministic.
+    Nondeterministic data can cause failure during fitting ALS model.
+    For example, an order-sensitive operation like sampling after a repartition makes
+    dataframe output nondeterministic, like `df.repartition(2).sample(False, 0.5, 1618)`.
+    Checkpointing sampled dataframe or adding a sort before sampling can help make the
+    dataframe deterministic.
+
+    Examples
+    --------
     >>> df = spark.createDataFrame(
     ...     [(0, 0, 4.0), (0, 1, 2.0), (1, 1, 3.0), (1, 2, 4.0), (2, 1, 1.0), (2, 2, 5.0)],
     ...     ["user", "item", "rating"])
@@ -291,8 +297,6 @@ class ALS(JavaEstimator, _ALSParams, JavaMLWritable, JavaMLReadable):
     True
     >>> model.transform(test).take(1) == model2.transform(test).take(1)
     True
-
-    .. versionadded:: 1.4.0
     """
 
     @keyword_only
@@ -530,55 +534,87 @@ def itemFactors(self):
         """
         return self._call_java("itemFactors")
 
-    @since("2.2.0")
     def recommendForAllUsers(self, numItems):
         """
         Returns top `numItems` items recommended for each user, for all users.
 
-        :param numItems: max number of recommendations for each user
-        :return: a DataFrame of (userCol, recommendations), where recommendations are
-                 stored as an array of (itemCol, rating) Rows.
+        .. versionadded:: 2.2.0
+
+        Parameters
+        ----------
+        numItems : int
+            max number of recommendations for each user
+
+        Returns
+        -------
+        :py:class:`pyspark.sql.DataFrame`
+            a DataFrame of (userCol, recommendations), where recommendations are
+            stored as an array of (itemCol, rating) Rows.
         """
         return self._call_java("recommendForAllUsers", numItems)
 
-    @since("2.2.0")
     def recommendForAllItems(self, numUsers):
         """
         Returns top `numUsers` users recommended for each item, for all items.
 
-        :param numUsers: max number of recommendations for each item
-        :return: a DataFrame of (itemCol, recommendations), where recommendations are
-                 stored as an array of (userCol, rating) Rows.
+        .. versionadded:: 2.2.0
+
+        Parameters
+        ----------
+        numUsers : int
+            max number of recommendations for each item
+
+        Returns
+        -------
+        :py:class:`pyspark.sql.DataFrame`
+            a DataFrame of (itemCol, recommendations), where recommendations are
+            stored as an array of (userCol, rating) Rows.
         """
         return self._call_java("recommendForAllItems", numUsers)
 
-    @since("2.3.0")
     def recommendForUserSubset(self, dataset, numItems):
         """
         Returns top `numItems` items recommended for each user id in the input data set. Note that
         if there are duplicate ids in the input dataset, only one set of recommendations per unique
         id will be returned.
 
-        :param dataset: a Dataset containing a column of user ids. The column name must match
-                        `userCol`.
-        :param numItems: max number of recommendations for each user
-        :return: a DataFrame of (userCol, recommendations), where recommendations are
-                 stored as an array of (itemCol, rating) Rows.
+        .. versionadded:: 2.3.0
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            a DataFrame containing a column of user ids. The column name must match `userCol`.
+        numItems : int
+            max number of recommendations for each user
+
+        Returns
+        -------
+        :py:class:`pyspark.sql.DataFrame`
+            a DataFrame of (userCol, recommendations), where recommendations are
+            stored as an array of (itemCol, rating) Rows.
         """
         return self._call_java("recommendForUserSubset", dataset, numItems)
 
-    @since("2.3.0")
     def recommendForItemSubset(self, dataset, numUsers):
         """
         Returns top `numUsers` users recommended for each item id in the input data set. Note that
         if there are duplicate ids in the input dataset, only one set of recommendations per unique
         id will be returned.
 
-        :param dataset: a Dataset containing a column of item ids. The column name must match
-                        `itemCol`.
-        :param numUsers: max number of recommendations for each item
-        :return: a DataFrame of (itemCol, recommendations), where recommendations are
-                 stored as an array of (userCol, rating) Rows.
+        .. versionadded:: 2.3.0
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            a DataFrame containing a column of item ids. The column name must match `itemCol`.
+        numUsers : int
+            max number of recommendations for each item
+
+        Returns
+        -------
+        :py:class:`pyspark.sql.DataFrame`
+            a DataFrame of (itemCol, recommendations), where recommendations are
+            stored as an array of (userCol, rating) Rows.
         """
         return self._call_java("recommendForItemSubset", dataset, numUsers)
 
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index ba0127c938642..d1a5852fd65bd 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -136,8 +136,14 @@ class LinearRegression(_JavaRegressor, _LinearRegressionParams, JavaMLWritable,
     * L1 (Lasso)
     * L2 + L1 (elastic net)
 
-    Note: Fitting with huber loss only supports none and L2 regularization.
+    .. versionadded:: 1.4.0
+
+    Notes
+    -----
+    Fitting with huber loss only supports none and L2 regularization.
 
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([
     ...     (1.0, 2.0, Vectors.dense(1.0)),
@@ -195,8 +201,6 @@ class LinearRegression(_JavaRegressor, _LinearRegressionParams, JavaMLWritable,
     >>> model.numFeatures
     1
     >>> model.write().format("pmml").save(model_path + "_2")
-
-    .. versionadded:: 1.4.0
     """
 
     @keyword_only
@@ -356,14 +360,17 @@ def summary(self):
             raise RuntimeError("No training summary available for this %s" %
                                self.__class__.__name__)
 
-    @since("2.0.0")
     def evaluate(self, dataset):
         """
         Evaluates the model on a test dataset.
 
-        :param dataset:
-          Test dataset to evaluate model on, where dataset is an
-          instance of :py:class:`pyspark.sql.DataFrame`
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            Test dataset to evaluate model on, where dataset is an
+            instance of :py:class:`pyspark.sql.DataFrame`
         """
         if not isinstance(dataset, DataFrame):
             raise ValueError("dataset must be a DataFrame but got %s." % type(dataset))
@@ -420,12 +427,15 @@ def explainedVariance(self):
         Returns the explained variance regression score.
         explainedVariance = :math:`1 - \frac{variance(y - \hat{y})}{variance(y)}`
 
-        .. seealso:: `Wikipedia explain variation
-            <http://en.wikipedia.org/wiki/Explained_variation>`_
+        Notes
+        -----
+        This ignores instance weights (setting all to 1.0) from
+        `LinearRegression.weightCol`. This will change in later Spark
+        versions.
 
-        .. note:: This ignores instance weights (setting all to 1.0) from
-            `LinearRegression.weightCol`. This will change in later Spark
-            versions.
+        For additional information see
+        `Explained variation on Wikipedia \
+        <http://en.wikipedia.org/wiki/Explained_variation>`_
         """
         return self._call_java("explainedVariance")
 
@@ -437,9 +447,11 @@ def meanAbsoluteError(self):
         corresponding to the expected value of the absolute error
         loss or l1-norm loss.
 
-        .. note:: This ignores instance weights (setting all to 1.0) from
-            `LinearRegression.weightCol`. This will change in later Spark
-            versions.
+        Notes
+        -----
+        This ignores instance weights (setting all to 1.0) from
+        `LinearRegression.weightCol`. This will change in later Spark
+        versions.
         """
         return self._call_java("meanAbsoluteError")
 
@@ -451,9 +463,11 @@ def meanSquaredError(self):
         corresponding to the expected value of the squared error
         loss or quadratic loss.
 
-        .. note:: This ignores instance weights (setting all to 1.0) from
-            `LinearRegression.weightCol`. This will change in later Spark
-            versions.
+        Notes
+        -----
+        This ignores instance weights (setting all to 1.0) from
+        `LinearRegression.weightCol`. This will change in later Spark
+        versions.
         """
         return self._call_java("meanSquaredError")
 
@@ -464,9 +478,11 @@ def rootMeanSquaredError(self):
         Returns the root mean squared error, which is defined as the
         square root of the mean squared error.
 
-        .. note:: This ignores instance weights (setting all to 1.0) from
-            `LinearRegression.weightCol`. This will change in later Spark
-            versions.
+        Notes
+        -----
+        This ignores instance weights (setting all to 1.0) from
+        `LinearRegression.weightCol`. This will change in later Spark
+        versions.
         """
         return self._call_java("rootMeanSquaredError")
 
@@ -476,12 +492,14 @@ def r2(self):
         """
         Returns R^2, the coefficient of determination.
 
-        .. seealso:: `Wikipedia coefficient of determination
-            <http://en.wikipedia.org/wiki/Coefficient_of_determination>`_
+        Notes
+        -----
+        This ignores instance weights (setting all to 1.0) from
+        `LinearRegression.weightCol`. This will change in later Spark
+        versions.
 
-        .. note:: This ignores instance weights (setting all to 1.0) from
-            `LinearRegression.weightCol`. This will change in later Spark
-            versions.
+        See also `Wikipedia coefficient of determination \
+        <http://en.wikipedia.org/wiki/Coefficient_of_determination>`_
         """
         return self._call_java("r2")
 
@@ -491,11 +509,13 @@ def r2adj(self):
         """
         Returns Adjusted R^2, the adjusted coefficient of determination.
 
-        .. seealso:: `Wikipedia coefficient of determination, Adjusted R^2
-            <https://en.wikipedia.org/wiki/Coefficient_of_determination#Adjusted_R2>`_
+        Notes
+        -----
+        This ignores instance weights (setting all to 1.0) from
+        `LinearRegression.weightCol`. This will change in later Spark versions.
 
-        .. note:: This ignores instance weights (setting all to 1.0) from
-            `LinearRegression.weightCol`. This will change in later Spark versions.
+        `Wikipedia coefficient of determination, Adjusted R^2 \
+        <https://en.wikipedia.org/wiki/Coefficient_of_determination#Adjusted_R2>`_
         """
         return self._call_java("r2adj")
 
@@ -533,7 +553,6 @@ def devianceResiduals(self):
         return self._call_java("devianceResiduals")
 
     @property
-    @since("2.0.0")
     def coefficientStandardErrors(self):
         """
         Standard error of estimated coefficients and intercept.
@@ -542,12 +561,15 @@ def coefficientStandardErrors(self):
         If :py:attr:`LinearRegression.fitIntercept` is set to True,
         then the last element returned corresponds to the intercept.
 
-        .. seealso:: :py:attr:`LinearRegression.solver`
+        .. versionadded:: 2.0.0
+
+        See Also
+        --------
+        LinearRegression.solver
         """
         return self._call_java("coefficientStandardErrors")
 
     @property
-    @since("2.0.0")
     def tValues(self):
         """
         T-statistic of estimated coefficients and intercept.
@@ -556,12 +578,15 @@ def tValues(self):
         If :py:attr:`LinearRegression.fitIntercept` is set to True,
         then the last element returned corresponds to the intercept.
 
-        .. seealso:: :py:attr:`LinearRegression.solver`
+        .. versionadded:: 2.0.0
+
+        See Also
+        --------
+        LinearRegression.solver
         """
         return self._call_java("tValues")
 
     @property
-    @since("2.0.0")
     def pValues(self):
         """
         Two-sided p-value of estimated coefficients and intercept.
@@ -570,7 +595,11 @@ def pValues(self):
         If :py:attr:`LinearRegression.fitIntercept` is set to True,
         then the last element returned corresponds to the intercept.
 
-        .. seealso:: :py:attr:`LinearRegression.solver`
+        .. versionadded:: 2.0.0
+
+        See Also
+        --------
+        LinearRegression.solver
         """
         return self._call_java("pValues")
 
@@ -585,25 +614,31 @@ class LinearRegressionTrainingSummary(LinearRegressionSummary):
     """
 
     @property
-    @since("2.0.0")
     def objectiveHistory(self):
         """
         Objective function (scaled loss + regularization) at each
         iteration.
         This value is only available when using the "l-bfgs" solver.
 
-        .. seealso:: :py:attr:`LinearRegression.solver`
+        .. versionadded:: 2.0.0
+
+        See Also
+        --------
+        LinearRegression.solver
         """
         return self._call_java("objectiveHistory")
 
     @property
-    @since("2.0.0")
     def totalIterations(self):
         """
         Number of training iterations until termination.
         This value is only available when using the "l-bfgs" solver.
 
-        .. seealso:: :py:attr:`LinearRegression.solver`
+        .. versionadded:: 2.0.0
+
+        See Also
+        --------
+        LinearRegression.solver
         """
         return self._call_java("totalIterations")
 
@@ -648,6 +683,10 @@ class IsotonicRegression(JavaEstimator, _IsotonicRegressionParams, HasWeightCol,
     Currently implemented using parallelized pool adjacent violators algorithm.
     Only univariate (single feature) algorithm supported.
 
+    .. versionadded:: 1.6.0
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([
     ...     (1.0, Vectors.dense(1.0)),
@@ -679,8 +718,6 @@ class IsotonicRegression(JavaEstimator, _IsotonicRegressionParams, HasWeightCol,
     True
     >>> model.transform(test0).take(1) == model2.transform(test0).take(1)
     True
-
-    .. versionadded:: 1.6.0
     """
     @keyword_only
     def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
@@ -833,6 +870,10 @@ class DecisionTreeRegressor(_JavaRegressor, _DecisionTreeRegressorParams, JavaML
     learning algorithm for regression.
     It supports both continuous and categorical features.
 
+    .. versionadded:: 1.4.0
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([
     ...     (1.0, Vectors.dense(1.0)),
@@ -890,8 +931,6 @@ class DecisionTreeRegressor(_JavaRegressor, _DecisionTreeRegressorParams, JavaML
     >>> model3 = dt3.fit(df3)
     >>> print(model3.toDebugString)
     DecisionTreeRegressionModel...depth=1, numNodes=3...
-
-    .. versionadded:: 1.4.0
     """
 
     @keyword_only
@@ -1037,7 +1076,6 @@ def setVarianceCol(self, value):
         return self._set(varianceCol=value)
 
     @property
-    @since("2.0.0")
     def featureImportances(self):
         """
         Estimate of the importance of each feature.
@@ -1051,9 +1089,13 @@ def featureImportances(self):
             where gain is scaled by the number of instances passing through node
           - Normalize importances for tree to sum to 1.
 
-        .. note:: Feature importance for single decision trees can have high variance due to
-              correlated predictor variables. Consider using a :py:class:`RandomForestRegressor`
-              to determine feature importance instead.
+        .. versionadded:: 2.0.0
+
+        Notes
+        -----
+        Feature importance for single decision trees can have high variance due to
+        correlated predictor variables. Consider using a :py:class:`RandomForestRegressor`
+        to determine feature importance instead.
         """
         return self._call_java("featureImportances")
 
@@ -1082,6 +1124,10 @@ class RandomForestRegressor(_JavaRegressor, _RandomForestRegressorParams, JavaML
     learning algorithm for regression.
     It supports both continuous and categorical features.
 
+    .. versionadded:: 1.4.0
+
+    Examples
+    --------
     >>> from numpy import allclose
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([
@@ -1134,8 +1180,6 @@ class RandomForestRegressor(_JavaRegressor, _RandomForestRegressorParams, JavaML
     True
     >>> model.transform(test0).take(1) == model2.transform(test0).take(1)
     True
-
-    .. versionadded:: 1.4.0
     """
 
     @keyword_only
@@ -1297,7 +1341,6 @@ def trees(self):
         return [DecisionTreeRegressionModel(m) for m in list(self._call_java("trees"))]
 
     @property
-    @since("2.0.0")
     def featureImportances(self):
         """
         Estimate of the importance of each feature.
@@ -1307,7 +1350,11 @@ def featureImportances(self):
         (Hastie, Tibshirani, Friedman. "The Elements of Statistical Learning, 2nd Edition." 2001.)
         and follows the implementation from scikit-learn.
 
-        .. seealso:: :py:attr:`DecisionTreeRegressionModel.featureImportances`
+        .. versionadded:: 2.0.0
+
+        Examples
+        --------
+        DecisionTreeRegressionModel.featureImportances
         """
         return self._call_java("featureImportances")
 
@@ -1349,6 +1396,10 @@ class GBTRegressor(_JavaRegressor, _GBTRegressorParams, JavaMLWritable, JavaMLRe
     learning algorithm for regression.
     It supports both continuous and categorical features.
 
+    .. versionadded:: 1.4.0
+
+    Examples
+    --------
     >>> from numpy import allclose
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([
@@ -1410,8 +1461,6 @@ class GBTRegressor(_JavaRegressor, _GBTRegressorParams, JavaMLWritable, JavaMLRe
     'validationIndicator'
     >>> gbt.getValidationTol()
     0.01
-
-    .. versionadded:: 1.4.0
     """
 
     @keyword_only
@@ -1592,7 +1641,6 @@ class GBTRegressionModel(
     """
 
     @property
-    @since("2.0.0")
     def featureImportances(self):
         """
         Estimate of the importance of each feature.
@@ -1602,7 +1650,11 @@ def featureImportances(self):
         (Hastie, Tibshirani, Friedman. "The Elements of Statistical Learning, 2nd Edition." 2001.)
         and follows the implementation from scikit-learn.
 
-        .. seealso:: :py:attr:`DecisionTreeRegressionModel.featureImportances`
+        .. versionadded:: 2.0.0
+
+        Examples
+        --------
+        DecisionTreeRegressionModel.featureImportances
         """
         return self._call_java("featureImportances")
 
@@ -1612,15 +1664,18 @@ def trees(self):
         """Trees in this ensemble. Warning: These have null parent Estimators."""
         return [DecisionTreeRegressionModel(m) for m in list(self._call_java("trees"))]
 
-    @since("2.4.0")
     def evaluateEachIteration(self, dataset, loss):
         """
         Method to compute error or loss for every iteration of gradient boosting.
 
-        :param dataset:
+        .. versionadded:: 2.4.0
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
             Test dataset to evaluate model on, where dataset is an
             instance of :py:class:`pyspark.sql.DataFrame`
-        :param loss:
+        loss : str
             The loss function used to compute error.
             Supported options: squared, absolute
         """
@@ -1688,8 +1743,14 @@ class AFTSurvivalRegression(_JavaRegressor, _AFTSurvivalRegressionParams,
     Fit a parametric AFT survival regression model based on the Weibull distribution
     of the survival time.
 
-    .. seealso:: `AFT Model <https://en.wikipedia.org/wiki/Accelerated_failure_time_model>`_
+    Notes
+    -----
+    For more information see Wikipedia page on
+    `AFT Model <https://en.wikipedia.org/wiki/Accelerated_failure_time_model>`_
+
 
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([
     ...     (1.0, Vectors.dense(1.0), 1.0),
@@ -1987,8 +2048,15 @@ class GeneralizedLinearRegression(_JavaRegressor, _GeneralizedLinearRegressionPa
     * "tweedie"  -> power link function specified through "linkPower". \
                     The default link power in the tweedie family is 1 - variancePower.
 
-    .. seealso:: `GLM <https://en.wikipedia.org/wiki/Generalized_linear_model>`_
+    .. versionadded:: 2.0.0
+
+    Notes
+    -----
+    For more information see Wikipedia page on
+    `GLM <https://en.wikipedia.org/wiki/Generalized_linear_model>`_
 
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> df = spark.createDataFrame([
     ...     (1.0, Vectors.dense(0.0, 0.0)),
@@ -2038,8 +2106,6 @@ class GeneralizedLinearRegression(_JavaRegressor, _GeneralizedLinearRegressionPa
     True
     >>> model.transform(df).take(1) == model2.transform(df).take(1)
     True
-
-    .. versionadded:: 2.0.0
     """
 
     @keyword_only
@@ -2217,14 +2283,17 @@ def summary(self):
             raise RuntimeError("No training summary available for this %s" %
                                self.__class__.__name__)
 
-    @since("2.0.0")
     def evaluate(self, dataset):
         """
         Evaluates the model on a test dataset.
 
-        :param dataset:
-          Test dataset to evaluate model on, where dataset is an
-          instance of :py:class:`pyspark.sql.DataFrame`
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            Test dataset to evaluate model on, where dataset is an
+            instance of :py:class:`pyspark.sql.DataFrame`
         """
         if not isinstance(dataset, DataFrame):
             raise ValueError("dataset must be a DataFrame but got %s." % type(dataset))
@@ -2296,13 +2365,17 @@ def residualDegreeOfFreedomNull(self):
         """
         return self._call_java("residualDegreeOfFreedomNull")
 
-    @since("2.0.0")
     def residuals(self, residualsType="deviance"):
         """
         Get the residuals of the fitted model by type.
 
-        :param residualsType: The type of residuals which should be returned.
-                              Supported options: deviance (default), pearson, working, and response.
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        residualsType : str, optional
+            The type of residuals which should be returned.
+            Supported options: deviance (default), pearson, working, and response.
         """
         return self._call_java("residuals", residualsType)
 
@@ -2474,6 +2547,10 @@ class FMRegressor(_JavaRegressor, _FactorizationMachinesParams, JavaMLWritable,
     * gd (normal mini-batch gradient descent)
     * adamW (default)
 
+    .. versionadded:: 3.0.0
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import Vectors
     >>> from pyspark.ml.regression import FMRegressor
     >>> df = spark.createDataFrame([
@@ -2519,8 +2596,6 @@ class FMRegressor(_JavaRegressor, _FactorizationMachinesParams, JavaMLWritable,
     DenseMatrix(1, 2, [0.0173, 0.0021], 1)
     >>> model.transform(test0).take(1) == model2.transform(test0).take(1)
     True
-
-    .. versionadded:: 3.0.0
     """
 
     @keyword_only
diff --git a/python/pyspark/ml/stat.py b/python/pyspark/ml/stat.py
index caa847a6a4afa..4388de133406d 100644
--- a/python/pyspark/ml/stat.py
+++ b/python/pyspark/ml/stat.py
@@ -36,36 +36,48 @@ class ChiSquareTest(object):
 
     """
     @staticmethod
-    @since("2.2.0")
     def test(dataset, featuresCol, labelCol, flatten=False):
         """
         Perform a Pearson's independence test using dataset.
 
-        :param dataset:
-          DataFrame of categorical labels and categorical features.
-          Real-valued features will be treated as categorical for each distinct value.
-        :param featuresCol:
-          Name of features column in dataset, of type `Vector` (`VectorUDT`).
-        :param labelCol:
-          Name of label column in dataset, of any numerical type.
-        :param flatten: if True, flattens the returned dataframe.
-        :return:
-          DataFrame containing the test result for every feature against the label.
-          If flatten is True, this DataFrame will contain one row per feature with the following
-          fields:
-          - `featureIndex: int`
-          - `pValue: float`
-          - `degreesOfFreedom: int`
-          - `statistic: float`
-          If flatten is False, this DataFrame will contain a single Row with the following fields:
-          - `pValues: Vector`
-          - `degreesOfFreedom: Array[int]`
-          - `statistics: Vector`
-          Each of these fields has one value per feature.
-
+        .. versionadded:: 2.2.0
         .. versionchanged:: 3.1.0
            Added optional ``flatten`` argument.
 
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            DataFrame of categorical labels and categorical features.
+            Real-valued features will be treated as categorical for each distinct value.
+        featuresCol : str
+            Name of features column in dataset, of type `Vector` (`VectorUDT`).
+        labelCol : str
+            Name of label column in dataset, of any numerical type.
+        flatten : bool, optional
+            if True, flattens the returned dataframe.
+
+        Returns
+        -------
+        :py:class:`pyspark.sql.DataFrame`
+            DataFrame containing the test result for every feature against the label.
+            If flatten is True, this DataFrame will contain one row per feature with the following
+            fields:
+
+            - `featureIndex: int`
+            - `pValue: float`
+            - `degreesOfFreedom: int`
+            - `statistic: float`
+
+            If flatten is False, this DataFrame will contain a single Row with the following fields:
+
+            - `pValues: Vector`
+            - `degreesOfFreedom: Array[int]`
+            - `statistics: Vector`
+
+            Each of these fields has one value per feature.
+
+        Examples
+        --------
         >>> from pyspark.ml.linalg import Vectors
         >>> from pyspark.ml.stat import ChiSquareTest
         >>> dataset = [[0, Vectors.dense([0, 0, 1])],
@@ -92,34 +104,41 @@ class Correlation(object):
     Compute the correlation matrix for the input dataset of Vectors using the specified method.
     Methods currently supported: `pearson` (default), `spearman`.
 
-    .. note:: For Spearman, a rank correlation, we need to create an RDD[Double] for each column
-      and sort it in order to retrieve the ranks and then join the columns back into an RDD[Vector],
-      which is fairly costly. Cache the input Dataset before calling corr with `method = 'spearman'`
-      to avoid recomputing the common lineage.
-
     .. versionadded:: 2.2.0
 
+    Notes
+    -----
+    For Spearman, a rank correlation, we need to create an RDD[Double] for each column
+    and sort it in order to retrieve the ranks and then join the columns back into an RDD[Vector],
+    which is fairly costly. Cache the input Dataset before calling corr with `method = 'spearman'`
+    to avoid recomputing the common lineage.
     """
     @staticmethod
-    @since("2.2.0")
     def corr(dataset, column, method="pearson"):
         """
         Compute the correlation matrix with specified method using dataset.
 
-        :param dataset:
-          A Dataset or a DataFrame.
-        :param column:
-          The name of the column of vectors for which the correlation coefficient needs
-          to be computed. This must be a column of the dataset, and it must contain
-          Vector objects.
-        :param method:
-          String specifying the method to use for computing correlation.
-          Supported: `pearson` (default), `spearman`.
-        :return:
-          A DataFrame that contains the correlation matrix of the column of vectors. This
-          DataFrame contains a single row and a single column of name
-          '$METHODNAME($COLUMN)'.
-
+        .. versionadded:: 2.2.0
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            A DataFrame.
+        column : str
+            The name of the column of vectors for which the correlation coefficient needs
+            to be computed. This must be a column of the dataset, and it must contain
+            Vector objects.
+        method : str, optional
+            String specifying the method to use for computing correlation.
+            Supported: `pearson` (default), `spearman`.
+
+        Returns
+        -------
+        A DataFrame that contains the correlation matrix of the column of vectors. This
+        DataFrame contains a single row and a single column of name `METHODNAME(COLUMN)`.
+
+        Examples
+        --------
         >>> from pyspark.ml.linalg import DenseMatrix, Vectors
         >>> from pyspark.ml.stat import Correlation
         >>> dataset = [[Vectors.dense([1, 0, 0, -2])],
@@ -159,28 +178,36 @@ class KolmogorovSmirnovTest(object):
 
     """
     @staticmethod
-    @since("2.4.0")
     def test(dataset, sampleCol, distName, *params):
         """
         Conduct a one-sample, two-sided Kolmogorov-Smirnov test for probability distribution
         equality. Currently supports the normal distribution, taking as parameters the mean and
         standard deviation.
 
-        :param dataset:
-          a Dataset or a DataFrame containing the sample of data to test.
-        :param sampleCol:
-          Name of sample column in dataset, of any numerical type.
-        :param distName:
-          a `string` name for a theoretical distribution, currently only support "norm".
-        :param params:
-          a list of `Double` values specifying the parameters to be used for the theoretical
-          distribution. For "norm" distribution, the parameters includes mean and variance.
-        :return:
-          A DataFrame that contains the Kolmogorov-Smirnov test result for the input sampled data.
-          This DataFrame will contain a single Row with the following fields:
-          - `pValue: Double`
-          - `statistic: Double`
-
+        .. versionadded:: 2.4.0
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            a Dataset or a DataFrame containing the sample of data to test.
+        sampleCol : str
+            Name of sample column in dataset, of any numerical type.
+        distName : str
+            a `string` name for a theoretical distribution, currently only support "norm".
+        params : float
+            a list of `float` values specifying the parameters to be used for the theoretical
+            distribution. For "norm" distribution, the parameters includes mean and variance.
+
+        Returns
+        -------
+        A DataFrame that contains the Kolmogorov-Smirnov test result for the input sampled data.
+        This DataFrame will contain a single Row with the following fields:
+
+        - `pValue: Double`
+        - `statistic: Double`
+
+        Examples
+        --------
         >>> from pyspark.ml.stat import KolmogorovSmirnovTest
         >>> dataset = [[-1.0], [0.0], [1.0]]
         >>> dataset = spark.createDataFrame(dataset, ['sample'])
@@ -211,6 +238,10 @@ class Summarizer(object):
     The methods in this package provide various statistics for Vectors contained inside DataFrames.
     This class lets users pick the statistics they would like to extract for a given column.
 
+    .. versionadded:: 2.4.0
+
+    Examples
+    --------
     >>> from pyspark.ml.stat import Summarizer
     >>> from pyspark.sql import Row
     >>> from pyspark.ml.linalg import Vectors
@@ -245,9 +276,6 @@ class Summarizer(object):
     |[1.0,1.5,2.0] |
     +--------------+
     <BLANKLINE>
-
-    .. versionadded:: 2.4.0
-
     """
     @staticmethod
     @since("2.4.0")
@@ -344,12 +372,11 @@ def _get_single_metric(col, weightCol, metric):
                                                 col._jc, weightCol._jc))
 
     @staticmethod
-    @since("2.4.0")
     def metrics(*metrics):
         """
         Given a list of metrics, provides a builder that it turns computes metrics from a column.
 
-        See the documentation of [[Summarizer]] for an example.
+        See the documentation of :py:class:`Summarizer` for an example.
 
         The following metrics are accepted (case sensitive):
          - mean: a vector that contains the coefficient-wise mean.
@@ -363,13 +390,21 @@ def metrics(*metrics):
          - normL2: the Euclidean norm for each coefficient.
          - normL1: the L1 norm of each coefficient (sum of the absolute values).
 
-        :param metrics:
-         metrics that can be provided.
-        :return:
-         an object of :py:class:`pyspark.ml.stat.SummaryBuilder`
+        .. versionadded:: 2.4.0
 
-        Note: Currently, the performance of this interface is about 2x~3x slower then using the RDD
+        Notes
+        -----
+        Currently, the performance of this interface is about 2x~3x slower than using the RDD
         interface.
+
+        Examples
+        --------
+        metrics : str
+            metrics that can be provided.
+
+        Returns
+        -------
+        :py:class:`pyspark.ml.stat.SummaryBuilder`
         """
         sc = SparkContext._active_spark_context
         js = JavaWrapper._new_java_obj("org.apache.spark.ml.stat.Summarizer.metrics",
@@ -390,19 +425,25 @@ class SummaryBuilder(JavaWrapper):
     def __init__(self, jSummaryBuilder):
         super(SummaryBuilder, self).__init__(jSummaryBuilder)
 
-    @since("2.4.0")
     def summary(self, featuresCol, weightCol=None):
         """
         Returns an aggregate object that contains the summary of the column with the requested
         metrics.
 
-        :param featuresCol:
-         a column that contains features Vector object.
-        :param weightCol:
-         a column that contains weight value. Default weight is 1.0.
-        :return:
-         an aggregate column that contains the statistics. The exact content of this
-         structure is determined during the creation of the builder.
+        .. versionadded:: 2.4.0
+
+        Parameters
+        ----------
+        featuresCol : str
+            a column that contains features Vector object.
+        weightCol : str, optional
+            a column that contains weight value. Default weight is 1.0.
+
+        Returns
+        -------
+        :py:class:`pyspark.sql.Column`
+            an aggregate column that contains the statistics. The exact content of this
+            structure is determined during the creation of the builder.
         """
         featuresCol, weightCol = Summarizer._check_param(featuresCol, weightCol)
         return Column(self._java_obj.summary(featuresCol._jc, weightCol._jc))
@@ -411,14 +452,15 @@ def summary(self, featuresCol, weightCol=None):
 class MultivariateGaussian(object):
     """Represents a (mean, cov) tuple
 
+    .. versionadded:: 3.0.0
+
+    Examples
+    --------
     >>> from pyspark.ml.linalg import DenseMatrix, Vectors
     >>> m = MultivariateGaussian(Vectors.dense([11,12]), DenseMatrix(2, 2, (1.0, 3.0, 5.0, 2.0)))
     >>> (m.mean, m.cov.toArray())
     (DenseVector([11.0, 12.0]), array([[ 1.,  5.],
            [ 3.,  2.]]))
-
-    .. versionadded:: 3.0.0
-
     """
     def __init__(self, mean, cov):
         self.mean = mean
@@ -432,32 +474,45 @@ class ANOVATest(object):
     .. versionadded:: 3.1.0
     """
     @staticmethod
-    @since("3.1.0")
     def test(dataset, featuresCol, labelCol, flatten=False):
         """
         Perform an ANOVA test using dataset.
 
-        :param dataset:
-          DataFrame of categorical labels and continuous features.
-        :param featuresCol:
-          Name of features column in dataset, of type `Vector` (`VectorUDT`).
-        :param labelCol:
-          Name of label column in dataset, of any numerical type.
-        :param flatten: if True, flattens the returned dataframe.
-        :return:
-          DataFrame containing the test result for every feature against the label.
-          If flatten is True, this DataFrame will contain one row per feature with the following
-          fields:
-          - `featureIndex: int`
-          - `pValue: float`
-          - `degreesOfFreedom: int`
-          - `fValue: float`
-          If flatten is False, this DataFrame will contain a single Row with the following fields:
-          - `pValues: Vector`
-          - `degreesOfFreedom: Array[int]`
-          - `fValues: Vector`
-          Each of these fields has one value per feature.
-
+        .. versionadded:: 3.1.0
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            DataFrame of categorical labels and continuous features.
+        featuresCol : str
+            Name of features column in dataset, of type `Vector` (`VectorUDT`).
+        labelCol : str
+            Name of label column in dataset, of any numerical type.
+        flatten : bool, optional
+            if True, flattens the returned dataframe.
+
+        Returns
+        -------
+        :py:class:`pyspark.sql.DataFrame`
+            DataFrame containing the test result for every feature against the label.
+            If flatten is True, this DataFrame will contain one row per feature with the following
+            fields:
+
+            - `featureIndex: int`
+            - `pValue: float`
+            - `degreesOfFreedom: int`
+            - `fValue: float`
+
+            If flatten is False, this DataFrame will contain a single Row with the following fields:
+
+            - `pValues: Vector`
+            - `degreesOfFreedom: Array[int]`
+            - `fValues: Vector`
+
+            Each of these fields has one value per feature.
+
+        Examples
+        --------
         >>> from pyspark.ml.linalg import Vectors
         >>> from pyspark.ml.stat import ANOVATest
         >>> dataset = [[2.0, Vectors.dense([0.43486404, 0.57153633, 0.43175686,
@@ -493,32 +548,45 @@ class FValueTest(object):
     .. versionadded:: 3.1.0
     """
     @staticmethod
-    @since("3.1.0")
     def test(dataset, featuresCol, labelCol, flatten=False):
         """
         Perform a F Regression test using dataset.
 
-        :param dataset:
-          DataFrame of continuous labels and continuous features.
-        :param featuresCol:
+        .. versionadded:: 3.1.0
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            DataFrame of continuous labels and continuous features.
+        featuresCol : str
           Name of features column in dataset, of type `Vector` (`VectorUDT`).
-        :param labelCol:
-          Name of label column in dataset, of any numerical type.
-        :param flatten: if True, flattens the returned dataframe.
-        :return:
-          DataFrame containing the test result for every feature against the label.
-          If flatten is True, this DataFrame will contain one row per feature with the following
-          fields:
-          - `featureIndex: int`
-          - `pValue: float`
-          - `degreesOfFreedom: int`
-          - `fValue: float`
-          If flatten is False, this DataFrame will contain a single Row with the following fields:
-          - `pValues: Vector`
-          - `degreesOfFreedom: Array[int]`
-          - `fValues: Vector`
-          Each of these fields has one value per feature.
+        labelCol : str
+            Name of label column in dataset, of any numerical type.
+        flatten : bool, optional
+            if True, flattens the returned dataframe.
+
+        Returns
+        -------
+        :py:class:`pyspark.sql.DataFrame`
+            DataFrame containing the test result for every feature against the label.
+            If flatten is True, this DataFrame will contain one row per feature with the following
+            fields:
+
+            - `featureIndex: int`
+            - `pValue: float`
+            - `degreesOfFreedom: int`
+            - `fValue: float`
+
+            If flatten is False, this DataFrame will contain a single Row with the following fields:
+
+            - `pValues: Vector`
+            - `degreesOfFreedom: Array[int]`
+            - `fValues: Vector`
+
+            Each of these fields has one value per feature.
 
+        Examples
+        --------
         >>> from pyspark.ml.linalg import Vectors
         >>> from pyspark.ml.stat import FValueTest
         >>> dataset = [[0.57495218, Vectors.dense([0.43486404, 0.57153633, 0.43175686,
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index 21bbe1f96758b..6f4ad99484546 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -40,13 +40,25 @@ def _parallelFitTasks(est, train, eva, validation, epm, collectSubModel):
     Creates a list of callables which can be called from different threads to fit and evaluate
     an estimator in parallel. Each callable returns an `(index, metric)` pair.
 
-    :param est: Estimator, the estimator to be fit.
-    :param train: DataFrame, training data set, used for fitting.
-    :param eva: Evaluator, used to compute `metric`
-    :param validation: DataFrame, validation data set, used for evaluation.
-    :param epm: Sequence of ParamMap, params maps to be used during fitting & evaluation.
-    :param collectSubModel: Whether to collect sub model.
-    :return: (int, float, subModel), an index into `epm` and the associated metric value.
+    Parameters
+    ----------
+    est : :py:class:`pyspark.ml.baseEstimator`
+        he estimator to be fit.
+    train : :py:class:`pyspark.sql.DataFrame`
+        DataFrame, training data set, used for fitting.
+    eva : :py:class:`pyspark.ml.evaluation.Evaluator`
+        used to compute `metric`
+    validation : :py:class:`pyspark.sql.DataFrame`
+        DataFrame, validation data set, used for evaluation.
+    epm : :py:class:`collections.abc.Sequence`
+        Sequence of ParamMap, params maps to be used during fitting & evaluation.
+    collectSubModel : bool
+        Whether to collect sub model.
+
+    Returns
+    -------
+    tuple
+        (int, float, subModel), an index into `epm` and the associated metric value.
     """
     modelIter = est.fitMultiple(train, epm)
 
@@ -62,6 +74,11 @@ class ParamGridBuilder(object):
     r"""
     Builder for a param grid used in grid search-based model selection.
 
+
+    .. versionadded:: 1.4.0
+
+    Examples
+    --------
     >>> from pyspark.ml.classification import LogisticRegression
     >>> lr = LogisticRegression()
     >>> output = ParamGridBuilder() \
@@ -79,8 +96,6 @@ class ParamGridBuilder(object):
     True
     >>> all([m in expected for m in output])
     True
-
-    .. versionadded:: 1.4.0
     """
 
     def __init__(self):
@@ -237,7 +252,10 @@ class CrossValidator(Estimator, _CrossValidatorParams, HasParallelism, HasCollec
     each of which uses 2/3 of the data for training and 1/3 for testing. Each fold is used as the
     test set exactly once.
 
+    .. versionadded:: 1.4.0
 
+    Examples
+    --------
     >>> from pyspark.ml.classification import LogisticRegression
     >>> from pyspark.ml.evaluation import BinaryClassificationEvaluator
     >>> from pyspark.ml.linalg import Vectors
@@ -270,8 +288,6 @@ class CrossValidator(Estimator, _CrossValidatorParams, HasParallelism, HasCollec
     0.8333...
     >>> evaluator.evaluate(cvModelRead.transform(dataset))
     0.8333...
-
-    .. versionadded:: 1.4.0
     """
 
     @keyword_only
@@ -425,15 +441,24 @@ def checker(foldNum):
 
         return datasets
 
-    @since("1.4.0")
     def copy(self, extra=None):
         """
         Creates a copy of this instance with a randomly generated uid
         and some extra params. This copies creates a deep copy of
         the embedded paramMap, and copies the embedded and extra parameters over.
 
-        :param extra: Extra parameters to copy to the new instance
-        :return: Copy of this instance
+
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        extra : dict, optional
+            Extra parameters to copy to the new instance
+
+        Returns
+        -------
+        :py:class:`CrossValidator`
+            Copy of this instance
         """
         if extra is None:
             extra = dict()
@@ -480,7 +505,10 @@ def _to_java(self):
         """
         Transfer this instance to a Java CrossValidator. Used for ML persistence.
 
-        :return: Java object equivalent to this instance.
+        Returns
+        -------
+        py4j.java_gateway.JavaObject
+            Java object equivalent to this instance.
         """
 
         estimator, epms, evaluator = super(CrossValidator, self)._to_java_impl()
@@ -521,7 +549,6 @@ def __init__(self, bestModel, avgMetrics=[], subModels=None):
     def _transform(self, dataset):
         return self.bestModel.transform(dataset)
 
-    @since("1.4.0")
     def copy(self, extra=None):
         """
         Creates a copy of this instance with a randomly generated uid
@@ -530,8 +557,17 @@ def copy(self, extra=None):
         copies the embedded and extra parameters over.
         It does not copy the extra Params into the subModels.
 
-        :param extra: Extra parameters to copy to the new instance
-        :return: Copy of this instance
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        extra : dict, optional
+            Extra parameters to copy to the new instance
+
+        Returns
+        -------
+        :py:class:`CrossValidatorModel`
+            Copy of this instance
         """
         if extra is None:
             extra = dict()
@@ -589,7 +625,10 @@ def _to_java(self):
         """
         Transfer this instance to a Java CrossValidatorModel. Used for ML persistence.
 
-        :return: Java object equivalent to this instance.
+        Returns
+        -------
+        py4j.java_gateway.JavaObject
+            Java object equivalent to this instance.
         """
 
         sc = SparkContext._active_spark_context
@@ -648,6 +687,10 @@ class TrainValidationSplit(Estimator, _TrainValidationSplitParams, HasParallelis
     validation sets, and uses evaluation metric on the validation set to select the best model.
     Similar to :class:`CrossValidator`, but only splits the set once.
 
+    .. versionadded:: 2.0.0
+
+    Examples
+    --------
     >>> from pyspark.ml.classification import LogisticRegression
     >>> from pyspark.ml.evaluation import BinaryClassificationEvaluator
     >>> from pyspark.ml.linalg import Vectors
@@ -680,9 +723,6 @@ class TrainValidationSplit(Estimator, _TrainValidationSplitParams, HasParallelis
     0.833...
     >>> evaluator.evaluate(tvsModelRead.transform(dataset))
     0.833...
-
-    .. versionadded:: 2.0.0
-
     """
 
     @keyword_only
@@ -791,15 +831,23 @@ def _fit(self, dataset):
         bestModel = est.fit(dataset, epm[bestIndex])
         return self._copyValues(TrainValidationSplitModel(bestModel, metrics, subModels))
 
-    @since("2.0.0")
     def copy(self, extra=None):
         """
         Creates a copy of this instance with a randomly generated uid
         and some extra params. This copies creates a deep copy of
         the embedded paramMap, and copies the embedded and extra parameters over.
 
-        :param extra: Extra parameters to copy to the new instance
-        :return: Copy of this instance
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        extra : dict, optional
+            Extra parameters to copy to the new instance
+
+        Returns
+        -------
+        :py:class:`TrainValidationSplit`
+            Copy of this instance
         """
         if extra is None:
             extra = dict()
@@ -844,7 +892,11 @@ def _from_java(cls, java_stage):
     def _to_java(self):
         """
         Transfer this instance to a Java TrainValidationSplit. Used for ML persistence.
-        :return: Java object equivalent to this instance.
+
+        Returns
+        -------
+        py4j.java_gateway.JavaObject
+            Java object equivalent to this instance.
         """
 
         estimator, epms, evaluator = super(TrainValidationSplit, self)._to_java_impl()
@@ -880,7 +932,6 @@ def __init__(self, bestModel, validationMetrics=[], subModels=None):
     def _transform(self, dataset):
         return self.bestModel.transform(dataset)
 
-    @since("2.0.0")
     def copy(self, extra=None):
         """
         Creates a copy of this instance with a randomly generated uid
@@ -890,8 +941,17 @@ def copy(self, extra=None):
         And, this creates a shallow copy of the validationMetrics.
         It does not copy the extra Params into the subModels.
 
-        :param extra: Extra parameters to copy to the new instance
-        :return: Copy of this instance
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        extra : dict, optional
+            Extra parameters to copy to the new instance
+
+        Returns
+        -------
+        :py:class:`TrainValidationSplitModel`
+            Copy of this instance
         """
         if extra is None:
             extra = dict()
@@ -950,7 +1010,11 @@ def _from_java(cls, java_stage):
     def _to_java(self):
         """
         Transfer this instance to a Java TrainValidationSplitModel. Used for ML persistence.
-        :return: Java object equivalent to this instance.
+
+        Returns
+        -------
+        py4j.java_gateway.JavaObject
+            Java object equivalent to this instance.
         """
 
         sc = SparkContext._active_spark_context
diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index f5f4584231dd7..a7b5a79d75f5f 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -388,8 +388,12 @@ def saveMetadata(instance, path, sc, extraMetadata=None, paramMap=None):
         - defaultParamMap (since 2.4.0)
         - (optionally, extra metadata)
 
-        :param extraMetadata:  Extra metadata to be saved at same level as uid, paramMap, etc.
-        :param paramMap:  If given, this is saved in the "paramMap" field.
+        Parameters
+        ----------
+        extraMetadata : dict, optional
+            Extra metadata to be saved at same level as uid, paramMap, etc.
+        paramMap : dict, optional
+            If given, this is saved in the "paramMap" field.
         """
         metadataPath = os.path.join(path, "metadata")
         metadataJson = DefaultParamsWriter._get_metadata_to_save(instance,
@@ -404,7 +408,9 @@ def _get_metadata_to_save(instance, sc, extraMetadata=None, paramMap=None):
         Helper for :py:meth:`DefaultParamsWriter.saveMetadata` which extracts the JSON to save.
         This is useful for ensemble models which need to save metadata for many sub-models.
 
-        .. note:: :py:meth:`DefaultParamsWriter.saveMetadata` for details on what this includes.
+        Notes
+        -----
+        See :py:meth:`DefaultParamsWriter.saveMetadata` for details on what this includes.
         """
         uid = instance.uid
         cls = instance.__module__ + '.' + instance.__class__.__name__
@@ -491,7 +497,12 @@ def loadMetadata(path, sc, expectedClassName=""):
         """
         Load metadata saved using :py:meth:`DefaultParamsWriter.saveMetadata`
 
-        :param expectedClassName:  If non empty, this is checked against the loaded metadata.
+        Parameters
+        ----------
+        path : str
+        sc : :py:class:`pyspark.SparkContext`
+        expectedClassName : str, optional
+            If non empty, this is checked against the loaded metadata.
         """
         metadataPath = os.path.join(path, "metadata")
         metadataStr = sc.textFile(metadataPath, 1).first()
@@ -504,8 +515,12 @@ def _parseMetaData(metadataStr, expectedClassName=""):
         Parse metadata JSON string produced by :py:meth`DefaultParamsWriter._get_metadata_to_save`.
         This is a helper function for :py:meth:`DefaultParamsReader.loadMetadata`.
 
-        :param metadataStr:  JSON string of metadata
-        :param expectedClassName:  If non empty, this is checked against the loaded metadata.
+        Parameters
+        ----------
+        metadataStr : str
+            JSON string of metadata
+        expectedClassName : str, optional
+            If non empty, this is checked against the loaded metadata.
         """
         metadata = json.loads(metadataStr)
         className = metadata['class']
diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py
index da52788afea72..3c8dc03a82971 100644
--- a/python/pyspark/ml/wrapper.py
+++ b/python/pyspark/ml/wrapper.py
@@ -75,19 +75,25 @@ def _new_java_array(pylist, java_class):
         enough for all elements. The empty slots in the inner Java arrays will
         be filled with null to make the non-jagged 2D array.
 
-        :param pylist:
-          Python list to convert to a Java Array.
-        :param java_class:
-          Java class to specify the type of Array. Should be in the
-          form of sc._gateway.jvm.* (sc is a valid Spark Context).
-        :return:
+        Parameters
+        ----------
+        pylist : list
+            Python list to convert to a Java Array.
+        java_class : :py:class:`py4j.java_gateway.JavaClass`
+            Java class to specify the type of Array. Should be in the
+            form of sc._gateway.jvm.* (sc is a valid Spark Context).
+
+            Example primitive Java classes:
+
+            - basestring -> sc._gateway.jvm.java.lang.String
+            - int -> sc._gateway.jvm.java.lang.Integer
+            - float -> sc._gateway.jvm.java.lang.Double
+            - bool -> sc._gateway.jvm.java.lang.Boolean
+
+        Returns
+        -------
+        :py:class:`py4j.java_collections.JavaArray`
           Java Array of converted pylist.
-
-        Example primitive Java classes:
-          - basestring -> sc._gateway.jvm.java.lang.String
-          - int -> sc._gateway.jvm.java.lang.Integer
-          - float -> sc._gateway.jvm.java.lang.Double
-          - bool -> sc._gateway.jvm.java.lang.Boolean
         """
         sc = SparkContext._active_spark_context
         java_array = None
@@ -212,7 +218,10 @@ def _to_java(self):
 
         Meta-algorithms such as Pipeline should override this method.
 
-        :return: Java object equivalent to this instance.
+        Returns
+        -------
+        py4j.java_gateway.JavaObject
+            Java object equivalent to this instance.
         """
         self._transfer_params_to_java()
         return self._java_obj
@@ -264,8 +273,15 @@ def copy(self, extra=None):
         extra params. So both the Python wrapper and the Java pipeline
         component get copied.
 
-        :param extra: Extra parameters to copy to the new instance
-        :return: Copy of this instance
+        Parameters
+        ----------
+        extra : dict, optional
+            Extra parameters to copy to the new instance
+
+        Returns
+        -------
+        :py:class:`JavaParams`
+            Copy of this instance
         """
         if extra is None:
             extra = dict()
@@ -302,10 +318,15 @@ def _fit_java(self, dataset):
         """
         Fits a Java model to the input dataset.
 
-        :param dataset: input dataset, which is an instance of
-                        :py:class:`pyspark.sql.DataFrame`
-        :param params: additional params (overwriting embedded values)
-        :return: fitted Java model
+        Examples
+        --------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            input dataset
+
+        Returns
+        -------
+        py4j.java_gateway.JavaObject
+            fitted Java model
         """
         self._transfer_params_to_java()
         return self._java_obj.fit(dataset._jdf)

From 036c11b0d4ee88ce88d7869bbbbba6a589754786 Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Tue, 10 Nov 2020 10:15:55 +0900
Subject: [PATCH 0424/1009] [SPARK-33397][YARN][DOC] Fix generating md to html
 for available-patterns-for-shs-custom-executor-log-url

### What changes were proposed in this pull request?

1. replace `{{}}`  with `&#123;&#123;&#125;&#125;`
2. using `<code></code>` in td-tag

### Why are the changes needed?

to fix this.
![image](https://user-images.githubusercontent.com/8326978/98544155-8c74bc00-22ce-11eb-8889-8dacb726b762.png)

### Does this PR introduce _any_ user-facing change?

yes, you will see the correct online doc with this change

![image](https://user-images.githubusercontent.com/8326978/98545256-2e48d880-22d0-11eb-9dd9-b8cae3df8659.png)

### How was this patch tested?

shown as the above pic via jekyll serve.

Closes #30298 from yaooqinn/SPARK-33397.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 docs/running-on-yarn.md | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 5e8eb48093c8a..f19ce3de5e018 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -584,48 +584,48 @@ To use a custom metrics.properties for the application master and executors, upd
 <table class="table">
     <tr><th>Pattern</th><th>Meaning</th></tr>
     <tr>
-      <td>{{HTTP_SCHEME}}</td>
-      <td>`http://` or `https://` according to YARN HTTP policy. (Configured via `yarn.http.policy`)</td>
+      <td>&#123;&#123;HTTP_SCHEME&#125;&#125;</td>
+      <td><code>http://</code> or <code>https://</code> according to YARN HTTP policy. (Configured via <code>yarn.http.policy</code>)</td>
     </tr>
     <tr>
-      <td>{{NM_HOST}}</td>
+      <td>&#123;&#123;NM_HOST&#125;&#125;</td>
       <td>The "host" of node where container was run.</td>
     </tr>
     <tr>
-      <td>{{NM_PORT}}</td>
+      <td>&#123;&#123;NM_PORT&#125;&#125;</td>
       <td>The "port" of node manager where container was run.</td>
     </tr>
     <tr>
-      <td>{{NM_HTTP_PORT}}</td>
+      <td>&#123;&#123;NM_HTTP_PORT&#125;&#125;</td>
       <td>The "port" of node manager's http server where container was run.</td>
     </tr>
     <tr>
-      <td>{{NM_HTTP_ADDRESS}}</td>
+      <td>&#123;&#123;NM_HTTP_ADDRESS&#125;&#125;</td>
       <td>Http URI of the node on which the container is allocated.</td>
     </tr>
     <tr>
-      <td>{{CLUSTER_ID}}</td>
-      <td>The cluster ID of Resource Manager. (Configured via `yarn.resourcemanager.cluster-id`)</td>
+      <td>&#123;&#123;CLUSTER_ID&#125;&#125;</td>
+      <td>The cluster ID of Resource Manager. (Configured via <code>yarn.resourcemanager.cluster-id</code>)</td>
     </tr>
     <tr>
-      <td>{{CONTAINER_ID}}</td>
+      <td>&#123;&#123;CONTAINER_ID&#125;&#125;</td>
       <td>The ID of container.</td>
     </tr>
     <tr>
-      <td>{{USER}}</td>
-      <td>'SPARK_USER' on system environment.</td>
+      <td>&#123;&#123;USER&#125;&#125;</td>
+      <td><code>SPARK_USER</code> on system environment.</td>
     </tr>
     <tr>
-      <td>{{FILE_NAME}}</td>
-      <td>`stdout`, `stderr`.</td>
+      <td>&#123;&#123;FILE_NAME&#125;&#125;</td>
+      <td><code>stdout</code>, <code>stderr</code>.</td>
     </tr>
 </table>
 
 For example, suppose you would like to point log url link to Job History Server directly instead of let NodeManager http server redirects it, you can configure `spark.history.custom.executor.log.url` as below:
 
- `{{HTTP_SCHEME}}<JHS_HOST>:<JHS_PORT>/jobhistory/logs/{{NM_HOST}}:{{NM_PORT}}/{{CONTAINER_ID}}/{{CONTAINER_ID}}/{{USER}}/{{FILE_NAME}}?start=-4096`
+<code>&#123;&#123;HTTP_SCHEME&#125;&#125;&lt;JHS_HOST&gt;:&lt;JHS_PORT&gt;/jobhistory/logs/&#123;&#123;NM_HOST&#125;&#125;:&#123;&#123;NM_PORT&#125;&#125;/&#123;&#123;CONTAINER_ID&#125;&#125;/&#123;&#123;CONTAINER_ID&#125;&#125;/&#123;&#123;USER&#125;&#125;/&#123;&#123;FILE_NAME&#125;&#125;?start=-4096</code>
 
- NOTE: you need to replace `<JHS_POST>` and `<JHS_PORT>` with actual value.
+NOTE: you need to replace `<JHS_POST>` and `<JHS_PORT>` with actual value.
 
 # Resource Allocation and Configuration Overview
 

From 35ac314181129374b02f8f8c07341b43a734e1c7 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 10 Nov 2020 11:08:55 +0900
Subject: [PATCH 0425/1009] [SPARK-33405][BUILD] Upgrade commons-compress to
 1.20

### What changes were proposed in this pull request?

This PR aims to upgrade `commons-compress` from 1.8 to 1.20.

### Why are the changes needed?

- https://commons.apache.org/proper/commons-compress/security-reports.html

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

Closes #30304 from dongjoon-hyun/SPARK-33405.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 2 +-
 dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 2 +-
 pom.xml                                 | 6 ++++++
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index 2f81fb8d874e1..dc98de4d8015d 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -35,7 +35,7 @@ commons-cli/1.2//commons-cli-1.2.jar
 commons-codec/1.10//commons-codec-1.10.jar
 commons-collections/3.2.2//commons-collections-3.2.2.jar
 commons-compiler/3.0.16//commons-compiler-3.0.16.jar
-commons-compress/1.8.1//commons-compress-1.8.1.jar
+commons-compress/1.20//commons-compress-1.20.jar
 commons-configuration/1.6//commons-configuration-1.6.jar
 commons-crypto/1.1.0//commons-crypto-1.1.0.jar
 commons-dbcp/1.4//commons-dbcp-1.4.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index 02aeac4a61dc6..0d7aeb9a82059 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -29,7 +29,7 @@ commons-cli/1.2//commons-cli-1.2.jar
 commons-codec/1.10//commons-codec-1.10.jar
 commons-collections/3.2.2//commons-collections-3.2.2.jar
 commons-compiler/3.0.16//commons-compiler-3.0.16.jar
-commons-compress/1.8.1//commons-compress-1.8.1.jar
+commons-compress/1.20//commons-compress-1.20.jar
 commons-crypto/1.1.0//commons-crypto-1.1.0.jar
 commons-dbcp/1.4//commons-dbcp-1.4.jar
 commons-httpclient/3.1//commons-httpclient-3.1.jar
diff --git a/pom.xml b/pom.xml
index 59c3be8227e2e..d0eb0a354627d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -173,6 +173,7 @@
     <snappy.version>1.1.8</snappy.version>
     <netlib.java.version>1.1.2</netlib.java.version>
     <commons-codec.version>1.10</commons-codec.version>
+    <commons-compress.version>1.20</commons-compress.version>
     <commons-io.version>2.5</commons-io.version>
     <!-- org.apache.commons/commons-lang/-->
     <commons-lang2.version>2.6</commons-lang2.version>
@@ -549,6 +550,11 @@
         <artifactId>commons-codec</artifactId>
         <version>${commons-codec.version}</version>
       </dependency>
+      <dependency>
+        <groupId>org.apache.commons</groupId>
+        <artifactId>commons-compress</artifactId>
+        <version>${commons-compress.version}</version>
+      </dependency>
       <dependency>
         <groupId>org.apache.commons</groupId>
         <artifactId>commons-math3</artifactId>

From 4360c6f12ae8f192fb65ae1c6ad6ee05e0217c7d Mon Sep 17 00:00:00 2001
From: neko <echohlne@gmail.com>
Date: Tue, 10 Nov 2020 11:12:19 +0900
Subject: [PATCH 0426/1009] [SPARK-33363] Add prompt information related to the
 current task when pyspark/sparkR starts

### What changes were proposed in this pull request?
add prompt information about current applicationId, current URL and master info when pyspark / sparkR starts.

### Why are the changes needed?
The information printed when pyspark/sparkR starts does not prompt the basic information of current application, and it is not convenient when used pyspark/sparkR in dos.

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?
manual test result shows below:
![pyspark new print](https://user-images.githubusercontent.com/52202080/98274268-2a663f00-1fce-11eb-88ce-964ce90b439e.png)
![sparkR](https://user-images.githubusercontent.com/52202080/98541235-1a01dd00-22ca-11eb-9304-09bcde87b05e.png)

Closes #30266 from akiyamaneko/pyspark-hint-info.

Authored-by: neko <echohlne@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 R/pkg/inst/profile/shell.R | 4 +++-
 python/pyspark/shell.py    | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/R/pkg/inst/profile/shell.R b/R/pkg/inst/profile/shell.R
index f6c20e1a5ebc3..ffedb3038fd53 100644
--- a/R/pkg/inst/profile/shell.R
+++ b/R/pkg/inst/profile/shell.R
@@ -43,5 +43,7 @@
   cat("      /_/", "\n")
   cat("\n")
 
-  cat("\nSparkSession available as 'spark'.\n")
+  cat("\nSparkSession Web UI available at", SparkR::sparkR.uiWebUrl())
+  cat("\nSparkSession available as 'spark'(master = ", unlist(SparkR::sparkR.conf("spark.master")),
+    ", app id = ", unlist(SparkR::sparkR.conf("spark.app.id")), ").", "\n", sep = "")
 }
diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py
index 0c6cc1302ff62..25aadb16840c8 100644
--- a/python/pyspark/shell.py
+++ b/python/pyspark/shell.py
@@ -62,6 +62,8 @@
     platform.python_version(),
     platform.python_build()[0],
     platform.python_build()[1]))
+print("Spark context Web UI available at %s" % (sc.uiWebUrl))
+print("Spark context available as 'sc' (master = %s, app id = %s)." % (sc.master, sc.applicationId))
 print("SparkSession available as 'spark'.")
 
 # The ./bin/pyspark script stores the old PYTHONSTARTUP value in OLD_PYTHONSTARTUP,

From 4ac8133866e7b97e04ab75cad0e0bf54565b0ba5 Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Tue, 10 Nov 2020 11:22:35 +0900
Subject: [PATCH 0427/1009] [SPARK-33223][SS][UI] Structured Streaming Web UI
 state information

### What changes were proposed in this pull request?
Structured Streaming UI is not containing state information. In this PR I've added it.

### Why are the changes needed?
Missing state information.

### Does this PR introduce _any_ user-facing change?
Additional UI elements appear.

### How was this patch tested?
Existing unit tests + manual test.
<img width="1044" alt="Screenshot 2020-10-30 at 15 14 21" src="https://user-images.githubusercontent.com/18561820/97715405-a1797000-1ac2-11eb-886a-e3e6efa3af3e.png">

Closes #30151 from gaborgsomogyi/SPARK-33223.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
---
 .../ui/StreamingQueryStatisticsPage.scala     | 119 +++++++++++++++++-
 .../sql/streaming/ui/UISeleniumSuite.scala    |  15 ++-
 2 files changed, 131 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
index 227e5e5af3983..77078046dda7c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
@@ -22,7 +22,7 @@ import java.lang.{Long => JLong}
 import java.util.UUID
 import javax.servlet.http.HttpServletRequest
 
-import scala.xml.{Node, Unparsed}
+import scala.xml.{Node, NodeBuffer, Unparsed}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.streaming.ui.UIUtils._
@@ -126,6 +126,122 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab)
     <br />
   }
 
+  def generateAggregatedStateOperators(
+      query: StreamingQueryUIData,
+      minBatchTime: Long,
+      maxBatchTime: Long,
+      jsCollector: JsCollector): NodeBuffer = {
+    // This is made sure on caller side but put it here to be defensive
+    require(query.lastProgress != null)
+    if (query.lastProgress.stateOperators.nonEmpty) {
+      val numRowsTotalData = query.recentProgress.map(p => (parseProgressTimestamp(p.timestamp),
+        p.stateOperators.map(_.numRowsTotal).sum.toDouble))
+      val maxNumRowsTotal = numRowsTotalData.maxBy(_._2)._2
+
+      val numRowsUpdatedData = query.recentProgress.map(p => (parseProgressTimestamp(p.timestamp),
+        p.stateOperators.map(_.numRowsUpdated).sum.toDouble))
+      val maxNumRowsUpdated = numRowsUpdatedData.maxBy(_._2)._2
+
+      val memoryUsedBytesData = query.recentProgress.map(p => (parseProgressTimestamp(p.timestamp),
+        p.stateOperators.map(_.memoryUsedBytes).sum.toDouble))
+      val maxMemoryUsedBytes = memoryUsedBytesData.maxBy(_._2)._2
+
+      val numRowsDroppedByWatermarkData = query.recentProgress
+        .map(p => (parseProgressTimestamp(p.timestamp),
+          p.stateOperators.map(_.numRowsDroppedByWatermark).sum.toDouble))
+      val maxNumRowsDroppedByWatermark = numRowsDroppedByWatermarkData.maxBy(_._2)._2
+
+      val graphUIDataForNumberTotalRows =
+        new GraphUIData(
+          "aggregated-num-total-state-rows-timeline",
+          "aggregated-num-total-state-rows-histogram",
+          numRowsTotalData,
+          minBatchTime,
+          maxBatchTime,
+          0,
+          maxNumRowsTotal,
+          "records")
+      graphUIDataForNumberTotalRows.generateDataJs(jsCollector)
+
+      val graphUIDataForNumberUpdatedRows =
+        new GraphUIData(
+          "aggregated-num-updated-state-rows-timeline",
+          "aggregated-num-updated-state-rows-histogram",
+          numRowsUpdatedData,
+          minBatchTime,
+          maxBatchTime,
+          0,
+          maxNumRowsUpdated,
+          "records")
+      graphUIDataForNumberUpdatedRows.generateDataJs(jsCollector)
+
+      val graphUIDataForMemoryUsedBytes =
+        new GraphUIData(
+          "aggregated-state-memory-used-bytes-timeline",
+          "aggregated-state-memory-used-bytes-histogram",
+          memoryUsedBytesData,
+          minBatchTime,
+          maxBatchTime,
+          0,
+          maxMemoryUsedBytes,
+          "bytes")
+      graphUIDataForMemoryUsedBytes.generateDataJs(jsCollector)
+
+      val graphUIDataForNumRowsDroppedByWatermark =
+        new GraphUIData(
+          "aggregated-num-state-rows-dropped-by-watermark-timeline",
+          "aggregated-num-state-rows-dropped-by-watermark-histogram",
+          numRowsDroppedByWatermarkData,
+          minBatchTime,
+          maxBatchTime,
+          0,
+          maxNumRowsDroppedByWatermark,
+          "records")
+      graphUIDataForNumRowsDroppedByWatermark.generateDataJs(jsCollector)
+
+      // scalastyle:off
+      <tr>
+        <td style="vertical-align: middle;">
+          <div style="width: 160px;">
+            <div><strong>Aggregated Number Of Total State Rows {SparkUIUtils.tooltip("Aggregated number of total state rows.", "right")}</strong></div>
+          </div>
+        </td>
+        <td class={"aggregated-num-total-state-rows-timeline"}>{graphUIDataForNumberTotalRows.generateTimelineHtml(jsCollector)}</td>
+        <td class={"aggregated-num-total-state-rows-histogram"}>{graphUIDataForNumberTotalRows.generateHistogramHtml(jsCollector)}</td>
+      </tr>
+        <tr>
+          <td style="vertical-align: middle;">
+            <div style="width: 160px;">
+              <div><strong>Aggregated Number Of Updated State Rows {SparkUIUtils.tooltip("Aggregated number of updated state rows.", "right")}</strong></div>
+            </div>
+          </td>
+          <td class={"aggregated-num-updated-state-rows-timeline"}>{graphUIDataForNumberUpdatedRows.generateTimelineHtml(jsCollector)}</td>
+          <td class={"aggregated-num-updated-state-rows-histogram"}>{graphUIDataForNumberUpdatedRows.generateHistogramHtml(jsCollector)}</td>
+        </tr>
+        <tr>
+          <td style="vertical-align: middle;">
+            <div style="width: 160px;">
+              <div><strong>Aggregated State Memory Used In Bytes {SparkUIUtils.tooltip("Aggregated state memory used in bytes.", "right")}</strong></div>
+            </div>
+          </td>
+          <td class={"aggregated-state-memory-used-bytes-timeline"}>{graphUIDataForMemoryUsedBytes.generateTimelineHtml(jsCollector)}</td>
+          <td class={"aggregated-state-memory-used-bytes-histogram"}>{graphUIDataForMemoryUsedBytes.generateHistogramHtml(jsCollector)}</td>
+        </tr>
+        <tr>
+          <td style="vertical-align: middle;">
+            <div style="width: 160px;">
+              <div><strong>Aggregated Number Of State Rows Dropped By Watermark {SparkUIUtils.tooltip("Aggregated number of state rows dropped by watermark.", "right")}</strong></div>
+            </div>
+          </td>
+          <td class={"aggregated-num-state-rows-dropped-by-watermark-timeline"}>{graphUIDataForNumRowsDroppedByWatermark.generateTimelineHtml(jsCollector)}</td>
+          <td class={"aggregated-num-state-rows-dropped-by-watermark-histogram"}>{graphUIDataForNumRowsDroppedByWatermark.generateHistogramHtml(jsCollector)}</td>
+        </tr>
+      // scalastyle:on
+    } else {
+      new NodeBuffer()
+    }
+  }
+
   def generateStatTable(query: StreamingQueryUIData): Seq[Node] = {
     val batchToTimestamps = withNoProgress(query,
       query.recentProgress.map(p => (p.batchId, parseProgressTimestamp(p.timestamp))),
@@ -284,6 +400,7 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab)
             </td>
             <td class="duration-area-stack" colspan="2">{graphUIDataForDuration.generateAreaStackHtmlWithData(jsCollector, operationDurationData)}</td>
           </tr>
+          {generateAggregatedStateOperators(query, minBatchTime, maxBatchTime, jsCollector)}
         </tbody>
       </table>
     } else {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala
index 82aa1453f9ba2..1a8b28001b8d1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala
@@ -75,10 +75,12 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
           val h3Text = findAll(cssSelector("h3")).map(_.text).toSeq
           h3Text should not contain ("Streaming Query")
 
+          val input1 = spark.readStream.format("rate").load()
+          val input2 = spark.readStream.format("rate").load()
           val activeQuery =
-            spark.readStream.format("rate").load().writeStream.format("noop").start()
+            input1.join(input2, "value").writeStream.format("noop").start()
           val completedQuery =
-            spark.readStream.format("rate").load().writeStream.format("noop").start()
+            input1.join(input2, "value").writeStream.format("noop").start()
           completedQuery.stop()
           val failedQuery = spark.readStream.format("rate").load().select("value").as[Long]
             .map(_ / 0).writeStream.format("noop").start()
@@ -129,6 +131,15 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
             findAll(cssSelector("""#stat-table th""")).map(_.text).toSeq should be {
               List("", "Timelines", "Histograms")
             }
+            summaryText should contain ("Input Rate (?)")
+            summaryText should contain ("Process Rate (?)")
+            summaryText should contain ("Input Rows (?)")
+            summaryText should contain ("Batch Duration (?)")
+            summaryText should contain ("Operation Duration (?)")
+            summaryText should contain ("Aggregated Number Of Total State Rows (?)")
+            summaryText should contain ("Aggregated Number Of Updated State Rows (?)")
+            summaryText should contain ("Aggregated State Memory Used In Bytes (?)")
+            summaryText should contain ("Aggregated Number Of State Rows Dropped By Watermark (?)")
           }
         } finally {
           spark.streams.active.foreach(_.stop())

From c2caf2522b2e65a93a797580f08ac36461000969 Mon Sep 17 00:00:00 2001
From: Chao Sun <sunchao@apple.com>
Date: Mon, 9 Nov 2020 19:07:16 -0800
Subject: [PATCH 0428/1009] [SPARK-33213][BUILD] Upgrade Apache Arrow to 2.0.0

### What changes were proposed in this pull request?

This upgrade Apache Arrow version from 1.0.1 to 2.0.0

### Why are the changes needed?

Apache Arrow 2.0.0 was released with some improvements from Java side, so it's better to upgrade Spark to the new version.
Note that the format version in Arrow 2.0.0 is still 1.0.0 so API should still be compatible between 1.0.1 and 2.0.0.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing UTs.

Closes #30306 from sunchao/SPARK-33213.

Authored-by: Chao Sun <sunchao@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 8 ++++----
 dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 8 ++++----
 pom.xml                                 | 2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index dc98de4d8015d..8c1ab9e3c1cfe 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -15,10 +15,10 @@ apacheds-kerberos-codec/2.0.0-M15//apacheds-kerberos-codec-2.0.0-M15.jar
 api-asn1-api/1.0.0-M20//api-asn1-api-1.0.0-M20.jar
 api-util/1.0.0-M20//api-util-1.0.0-M20.jar
 arpack_combined_all/0.1//arpack_combined_all-0.1.jar
-arrow-format/1.0.1//arrow-format-1.0.1.jar
-arrow-memory-core/1.0.1//arrow-memory-core-1.0.1.jar
-arrow-memory-netty/1.0.1//arrow-memory-netty-1.0.1.jar
-arrow-vector/1.0.1//arrow-vector-1.0.1.jar
+arrow-format/2.0.0//arrow-format-2.0.0.jar
+arrow-memory-core/2.0.0//arrow-memory-core-2.0.0.jar
+arrow-memory-netty/2.0.0//arrow-memory-netty-2.0.0.jar
+arrow-vector/2.0.0//arrow-vector-2.0.0.jar
 audience-annotations/0.5.0//audience-annotations-0.5.0.jar
 automaton/1.11-8//automaton-1.11-8.jar
 avro-ipc/1.8.2//avro-ipc-1.8.2.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index 0d7aeb9a82059..fcb993033221e 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -10,10 +10,10 @@ antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
 antlr4-runtime/4.7.1//antlr4-runtime-4.7.1.jar
 aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar
 arpack_combined_all/0.1//arpack_combined_all-0.1.jar
-arrow-format/1.0.1//arrow-format-1.0.1.jar
-arrow-memory-core/1.0.1//arrow-memory-core-1.0.1.jar
-arrow-memory-netty/1.0.1//arrow-memory-netty-1.0.1.jar
-arrow-vector/1.0.1//arrow-vector-1.0.1.jar
+arrow-format/2.0.0//arrow-format-2.0.0.jar
+arrow-memory-core/2.0.0//arrow-memory-core-2.0.0.jar
+arrow-memory-netty/2.0.0//arrow-memory-netty-2.0.0.jar
+arrow-vector/2.0.0//arrow-vector-2.0.0.jar
 audience-annotations/0.5.0//audience-annotations-0.5.0.jar
 automaton/1.11-8//automaton-1.11-8.jar
 avro-ipc/1.8.2//avro-ipc-1.8.2.jar
diff --git a/pom.xml b/pom.xml
index d0eb0a354627d..25c6da7100056 100644
--- a/pom.xml
+++ b/pom.xml
@@ -203,7 +203,7 @@
     If you are changing Arrow version specification, please check
     ./python/pyspark/sql/pandas/utils.py, and ./python/setup.py too.
     -->
-    <arrow.version>1.0.1</arrow.version>
+    <arrow.version>2.0.0</arrow.version>
     <!-- org.fusesource.leveldbjni will be used except on arm64 platform. -->
     <leveldbjni.group>org.fusesource.leveldbjni</leveldbjni.group>
 

From a1f84d8714cd1bd6cc6e2da6eb97fb9f58f3ee8f Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Tue, 10 Nov 2020 04:43:32 +0000
Subject: [PATCH 0429/1009] [SPARK-33369][SQL] DSV2: Skip schema inference in
 write if table provider supports external metadata

### What changes were proposed in this pull request?

When TableProvider.supportsExternalMetadata() is true, Spark will use the input Dataframe's schema in `DataframeWriter.save()`/`DataStreamWriter.start()` and skip schema/partitioning inference.

### Why are the changes needed?

For all the v2 data sources which are not FileDataSourceV2, Spark always infers the table schema/partitioning on `DataframeWriter.save()`/`DataStreamWriter.start()`.
The inference of table schema/partitioning can be expensive. However, there is no such trait or flag for indicating a V2 source can use the input DataFrame's schema on `DataframeWriter.save()`/`DataStreamWriter.start()`. We can resolve the problem by adding a new expected behavior for the method `TableProvider.supportsExternalMetadata()`.

### Does this PR introduce _any_ user-facing change?

Yes, a new behavior for the data source v2 API `TableProvider.supportsExternalMetadata()` when it returns true.

### How was this patch tested?

Unit test

Closes #30273 from gengliangwang/supportsExternalMetadata.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/connector/catalog/TableProvider.java  |  7 ++--
 .../apache/spark/sql/DataFrameWriter.scala    | 11 +++---
 .../sql/streaming/DataStreamWriter.scala      | 10 +++++-
 ...pache.spark.sql.sources.DataSourceRegister |  1 +
 .../sql/connector/DataSourceV2Suite.scala     | 23 ++++++++++++
 .../sources/StreamingDataSourceV2Suite.scala  | 36 +++++++++++++++++++
 6 files changed, 80 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableProvider.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableProvider.java
index 82731e2c8e1e8..4881fde06c659 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableProvider.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableProvider.java
@@ -77,8 +77,11 @@ default Transform[] inferPartitioning(CaseInsensitiveStringMap options) {
 
   /**
    * Returns true if the source has the ability of accepting external table metadata when getting
-   * tables. The external table metadata includes user-specified schema from
-   * `DataFrameReader`/`DataStreamReader` and schema/partitioning stored in Spark catalog.
+   * tables. The external table metadata includes:
+   *   1. For table reader: user-specified schema from `DataFrameReader`/`DataStreamReader` and
+   *      schema/partitioning stored in Spark catalog.
+   *   2. For table writer: the schema of the input `Dataframe` of
+   *      `DataframeWriter`/`DataStreamWriter`.
    * <p>
    * By default this method returns false, which means the schema and partitioning passed to
    * `getTable` are from the infer methods. Please override it if this source has expensive
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index da031b1827dd5..991f02d43bc47 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -325,11 +325,12 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
       val dsOptions = new CaseInsensitiveStringMap(finalOptions.asJava)
 
       def getTable: Table = {
-        // For file source, it's expensive to infer schema/partition at each write. Here we pass
-        // the schema of input query and the user-specified partitioning to `getTable`. If the
-        // query schema is not compatible with the existing data, the write can still success but
-        // following reads would fail.
-        if (provider.isInstanceOf[FileDataSourceV2]) {
+        // If the source accepts external table metadata, here we pass the schema of input query
+        // and the user-specified partitioning to `getTable`. This is for avoiding
+        // schema/partitioning inference, which can be very expensive.
+        // If the query schema is not compatible with the existing data, the behavior is undefined.
+        // For example, writing file source will success but the following reads will fail.
+        if (provider.supportsExternalMetadata()) {
           provider.getTable(
             df.schema.asNullable,
             partitioningAsV2.toArray,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index 2867bf581df81..d67e175c24dd9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -386,8 +386,16 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
         val finalOptions = sessionOptions.filterKeys(!optionsWithPath.contains(_)).toMap ++
           optionsWithPath.originalMap
         val dsOptions = new CaseInsensitiveStringMap(finalOptions.asJava)
+        // If the source accepts external table metadata, here we pass the schema of input query
+        // to `getTable`. This is for avoiding schema inference, which can be very expensive.
+        // If the query schema is not compatible with the existing data, the behavior is undefined.
+        val outputSchema = if (provider.supportsExternalMetadata()) {
+          Some(df.schema)
+        } else {
+          None
+        }
         val table = DataSourceV2Utils.getTableFromProvider(
-          provider, dsOptions, userSpecifiedSchema = None)
+          provider, dsOptions, userSpecifiedSchema = outputSchema)
         import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
         table match {
           case table: SupportsWrite if table.supports(STREAMING_WRITE) =>
diff --git a/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
index 914af589384df..dd22970203b3c 100644
--- a/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+++ b/sql/core/src/test/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -11,4 +11,5 @@ org.apache.spark.sql.streaming.sources.FakeReadBothModes
 org.apache.spark.sql.streaming.sources.FakeReadNeitherMode
 org.apache.spark.sql.streaming.sources.FakeWriteOnly
 org.apache.spark.sql.streaming.sources.FakeNoWrite
+org.apache.spark.sql.streaming.sources.FakeWriteSupportingExternalMetadata
 org.apache.spark.sql.streaming.sources.FakeWriteSupportProviderV1Fallback
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
index ce28e615702db..28cb448c400c7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
@@ -157,6 +157,19 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
     }
   }
 
+  test("SPARK-33369: Skip schema inference in DataframeWriter.save() if table provider " +
+    "supports external metadata") {
+    withTempDir { dir =>
+      val cls = classOf[SupportsExternalMetadataWritableDataSource].getName
+      spark.range(10).select('id as 'i, -'id as 'j).write.format(cls)
+          .option("path", dir.getCanonicalPath).mode("append").save()
+      val schema = new StructType().add("i", "long").add("j", "long")
+        checkAnswer(
+          spark.read.format(cls).option("path", dir.getCanonicalPath).schema(schema).load(),
+          spark.range(10).select('id, -'id))
+    }
+  }
+
   test("partitioning reporting") {
     import org.apache.spark.sql.functions.{count, sum}
     Seq(classOf[PartitionAwareDataSource], classOf[JavaPartitionAwareDataSource]).foreach { cls =>
@@ -771,6 +784,16 @@ class SimpleWriteOnlyDataSource extends SimpleWritableDataSource {
   }
 }
 
+class SupportsExternalMetadataWritableDataSource extends SimpleWritableDataSource {
+  override def supportsExternalMetadata(): Boolean = true
+
+  override def inferSchema(options: CaseInsensitiveStringMap): StructType = {
+    throw new IllegalArgumentException(
+      "Dataframe writer should not require inferring table schema the data source supports" +
+        " external metadata.")
+  }
+}
+
 class ReportStatisticsDataSource extends SimpleWritableDataSource {
 
   class MyScanBuilder extends SimpleScanBuilder
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
index 05cf324f8d490..66544a8dc4693 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
@@ -25,6 +25,7 @@ import scala.collection.JavaConverters._
 import org.apache.spark.sql.{DataFrame, SQLContext}
 import org.apache.spark.sql.connector.catalog.{SessionConfigSupport, SupportsRead, SupportsWrite, Table, TableCapability, TableProvider}
 import org.apache.spark.sql.connector.catalog.TableCapability._
+import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.connector.read.{InputPartition, PartitionReaderFactory, Scan, ScanBuilder}
 import org.apache.spark.sql.connector.read.streaming.{ContinuousPartitionReaderFactory, ContinuousStream, MicroBatchStream, Offset, PartitionOffset}
 import org.apache.spark.sql.connector.write.{LogicalWriteInfo, PhysicalWriteInfo, WriteBuilder, WriterCommitMessage}
@@ -195,6 +196,30 @@ class FakeNoWrite extends DataSourceRegister with SimpleTableProvider {
   }
 }
 
+class FakeWriteSupportingExternalMetadata
+    extends DataSourceRegister
+    with TableProvider {
+  override def shortName(): String = "fake-write-supporting-external-metadata"
+
+  override def supportsExternalMetadata(): Boolean = true
+
+  override def inferSchema(options: CaseInsensitiveStringMap): StructType = {
+    throw new IllegalArgumentException(
+      "Data stream writer should not require inferring table schema the data source supports" +
+      " external Metadata.")
+  }
+
+  override def getTable(
+      tableSchema: StructType,
+      partitioning: Array[Transform],
+      properties: util.Map[String, String]): Table = {
+    new Table with FakeStreamingWriteTable {
+      override def name(): String = "fake"
+      override def schema(): StructType = tableSchema
+    }
+  }
+}
+
 case class FakeWriteV1FallbackException() extends Exception
 
 class FakeSink extends Sink {
@@ -314,6 +339,17 @@ class StreamingDataSourceV2Suite extends StreamTest {
     }
   }
 
+  test("SPARK-33369: Skip schema inference in DataStreamWriter.start() if table provider " +
+    "supports external metadata") {
+    testPositiveCaseWithQuery(
+      "fake-read-microbatch-continuous", "fake-write-supporting-external-metadata",
+      Trigger.Once()) { v2Query =>
+      val sink = v2Query.asInstanceOf[StreamingQueryWrapper].streamingQuery.sink
+      assert(sink.isInstanceOf[Table])
+      assert(sink.asInstanceOf[Table].schema() == StructType(Nil))
+    }
+  }
+
   test("disabled v2 write") {
     // Ensure the V2 path works normally and generates a V2 sink..
     testPositiveCaseWithQuery(

From 90f6f39e429e0db00e234bdcf679a70dfce3272e Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Tue, 10 Nov 2020 05:28:06 +0000
Subject: [PATCH 0430/1009] [SPARK-33366][SQL] Migrate LOAD DATA command to use
 UnresolvedTable to resolve the identifier

### What changes were proposed in this pull request?

This PR proposes to migrate `LOAD DATA` to use `UnresolvedTable` to resolve the table identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing).

Note that `LOAD DATA` is not supported for v2 tables.

### Why are the changes needed?

The changes allow consistent resolution behavior when resolving the table identifier. For example, the following is the current behavior:
```scala
sql("CREATE TEMPORARY VIEW t AS SELECT 1")
sql("CREATE DATABASE db")
sql("CREATE TABLE t (key INT, value STRING) USING hive")
sql("USE db")
sql("LOAD DATA LOCAL INPATH 'examples/src/main/resources/kv1.txt' INTO TABLE t") // Succeeds
```
With this change, `LOAD DATA` above fails with the following:
```
org.apache.spark.sql.AnalysisException: t is a temp view not table.; line 1 pos 0
    at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)
    at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveTempViews$$anonfun$apply$7.$anonfun$applyOrElse$39(Analyzer.scala:865)
    at scala.Option.foreach(Option.scala:407)
```
, which is expected since temporary view is resolved first and `LOAD DATA` doesn't support a temporary view.

### Does this PR introduce _any_ user-facing change?

After this PR, `LOAD DATA ... t` is resolved to a temp view `t` instead of table `db.t` in the above scenario.

### How was this patch tested?

Updated existing tests.

Closes #30270 from imback82/load_data_cmd.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/parser/AstBuilder.scala      |  6 +-
 .../catalyst/plans/logical/statements.scala   | 10 ---
 .../catalyst/plans/logical/v2Commands.scala   | 65 +++++++++++--------
 .../sql/catalyst/parser/DDLParserSuite.scala  | 10 +--
 .../analysis/ResolveSessionCatalog.scala      | 28 ++++----
 .../datasources/v2/DataSourceV2Strategy.scala |  3 +
 .../sql/connector/DataSourceV2SQLSuite.scala  |  8 +--
 .../spark/sql/execution/SQLViewSuite.scala    | 15 +++--
 .../apache/spark/sql/hive/test/TestHive.scala |  5 +-
 9 files changed, 82 insertions(+), 68 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index c5e8429d49427..07086d1a45aa0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3282,7 +3282,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   }
 
   /**
-   * Create a [[LoadDataStatement]].
+   * Create a [[LoadData]].
    *
    * For example:
    * {{{
@@ -3291,8 +3291,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
    * }}}
    */
   override def visitLoadData(ctx: LoadDataContext): LogicalPlan = withOrigin(ctx) {
-    LoadDataStatement(
-      tableName = visitMultipartIdentifier(ctx.multipartIdentifier),
+    LoadData(
+      child = UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier)),
       path = string(ctx.path),
       isLocal = ctx.LOCAL != null,
       isOverwrite = ctx.OVERWRITE != null,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
index e711a6ad434d4..246e7f3bcb959 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
@@ -347,16 +347,6 @@ case class UseStatement(isNamespaceSet: Boolean, nameParts: Seq[String]) extends
  */
 case class RepairTableStatement(tableName: Seq[String]) extends ParsedStatement
 
-/**
- * A LOAD DATA INTO TABLE statement, as parsed from SQL
- */
-case class LoadDataStatement(
-    tableName: Seq[String],
-    path: String,
-    isLocal: Boolean,
-    isOverwrite: Boolean,
-    partition: Option[TablePartitionSpec]) extends ParsedStatement
-
 /**
  * A SHOW CREATE TABLE statement, as parsed from SQL.
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index 94d4e7ecfac21..b5386f5044452 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -261,7 +261,7 @@ case class ReplaceTableAsSelect(
 }
 
 /**
- * The logical plan of the CREATE NAMESPACE command that works for v2 catalogs.
+ * The logical plan of the CREATE NAMESPACE command.
  */
 case class CreateNamespace(
     catalog: SupportsNamespaces,
@@ -270,7 +270,7 @@ case class CreateNamespace(
     properties: Map[String, String]) extends Command
 
 /**
- * The logical plan of the DROP NAMESPACE command that works for v2 catalogs.
+ * The logical plan of the DROP NAMESPACE command.
  */
 case class DropNamespace(
     namespace: LogicalPlan,
@@ -280,7 +280,7 @@ case class DropNamespace(
 }
 
 /**
- * The logical plan of the DESCRIBE NAMESPACE command that works for v2 catalogs.
+ * The logical plan of the DESCRIBE NAMESPACE command.
  */
 case class DescribeNamespace(
     namespace: LogicalPlan,
@@ -296,7 +296,7 @@ case class DescribeNamespace(
 
 /**
  * The logical plan of the ALTER (DATABASE|SCHEMA|NAMESPACE) ... SET (DBPROPERTIES|PROPERTIES)
- * command that works for v2 catalogs.
+ * command.
  */
 case class AlterNamespaceSetProperties(
     namespace: LogicalPlan,
@@ -305,8 +305,7 @@ case class AlterNamespaceSetProperties(
 }
 
 /**
- * The logical plan of the ALTER (DATABASE|SCHEMA|NAMESPACE) ... SET LOCATION
- * command that works for v2 catalogs.
+ * The logical plan of the ALTER (DATABASE|SCHEMA|NAMESPACE) ... SET LOCATION command.
  */
 case class AlterNamespaceSetLocation(
     namespace: LogicalPlan,
@@ -315,7 +314,7 @@ case class AlterNamespaceSetLocation(
 }
 
 /**
- * The logical plan of the SHOW NAMESPACES command that works for v2 catalogs.
+ * The logical plan of the SHOW NAMESPACES command.
  */
 case class ShowNamespaces(
     namespace: LogicalPlan,
@@ -327,7 +326,7 @@ case class ShowNamespaces(
 }
 
 /**
- * The logical plan of the DESCRIBE relation_name command that works for v2 tables.
+ * The logical plan of the DESCRIBE relation_name command.
  */
 case class DescribeRelation(
     relation: LogicalPlan,
@@ -338,7 +337,7 @@ case class DescribeRelation(
 }
 
 /**
- * The logical plan of the DESCRIBE relation_name col_name command that works for v2 tables.
+ * The logical plan of the DESCRIBE relation_name col_name command.
  */
 case class DescribeColumn(
     relation: LogicalPlan,
@@ -349,7 +348,7 @@ case class DescribeColumn(
 }
 
 /**
- * The logical plan of the DELETE FROM command that works for v2 tables.
+ * The logical plan of the DELETE FROM command.
  */
 case class DeleteFromTable(
     table: LogicalPlan,
@@ -358,7 +357,7 @@ case class DeleteFromTable(
 }
 
 /**
- * The logical plan of the UPDATE TABLE command that works for v2 tables.
+ * The logical plan of the UPDATE TABLE command.
  */
 case class UpdateTable(
     table: LogicalPlan,
@@ -368,7 +367,7 @@ case class UpdateTable(
 }
 
 /**
- * The logical plan of the MERGE INTO command that works for v2 tables.
+ * The logical plan of the MERGE INTO command.
  */
 case class MergeIntoTable(
     targetTable: LogicalPlan,
@@ -407,7 +406,7 @@ case class Assignment(key: Expression, value: Expression) extends Expression wit
 }
 
 /**
- * The logical plan of the DROP TABLE command that works for v2 tables.
+ * The logical plan of the DROP TABLE command.
  */
 case class DropTable(
     child: LogicalPlan,
@@ -422,7 +421,7 @@ case class DropTable(
 case class NoopDropTable(multipartIdentifier: Seq[String]) extends Command
 
 /**
- * The logical plan of the ALTER TABLE command that works for v2 tables.
+ * The logical plan of the ALTER TABLE command.
  */
 case class AlterTable(
     catalog: TableCatalog,
@@ -454,7 +453,7 @@ case class AlterTable(
 }
 
 /**
- * The logical plan of the ALTER TABLE RENAME command that works for v2 tables.
+ * The logical plan of the ALTER TABLE RENAME command.
  */
 case class RenameTable(
     catalog: TableCatalog,
@@ -462,7 +461,7 @@ case class RenameTable(
     newIdent: Identifier) extends Command
 
 /**
- * The logical plan of the SHOW TABLE command that works for v2 catalogs.
+ * The logical plan of the SHOW TABLE command.
  */
 case class ShowTables(
     namespace: LogicalPlan,
@@ -475,7 +474,7 @@ case class ShowTables(
 }
 
 /**
- * The logical plan of the SHOW VIEWS command that works for v1 and v2 catalogs.
+ * The logical plan of the SHOW VIEWS command.
  *
  * Notes: v2 catalogs do not support views API yet, the command will fallback to
  * v1 ShowViewsCommand during ResolveSessionCatalog.
@@ -491,7 +490,7 @@ case class ShowViews(
 }
 
 /**
- * The logical plan of the USE/USE NAMESPACE command that works for v2 catalogs.
+ * The logical plan of the USE/USE NAMESPACE command.
  */
 case class SetCatalogAndNamespace(
     catalogManager: CatalogManager,
@@ -499,14 +498,14 @@ case class SetCatalogAndNamespace(
     namespace: Option[Seq[String]]) extends Command
 
 /**
- * The logical plan of the REFRESH TABLE command that works for v2 catalogs.
+ * The logical plan of the REFRESH TABLE command.
  */
 case class RefreshTable(child: LogicalPlan) extends Command {
   override def children: Seq[LogicalPlan] = child :: Nil
 }
 
 /**
- * The logical plan of the SHOW CURRENT NAMESPACE command that works for v2 catalogs.
+ * The logical plan of the SHOW CURRENT NAMESPACE command.
  */
 case class ShowCurrentNamespace(catalogManager: CatalogManager) extends Command {
   override val output: Seq[Attribute] = Seq(
@@ -515,7 +514,7 @@ case class ShowCurrentNamespace(catalogManager: CatalogManager) extends Command
 }
 
 /**
- * The logical plan of the SHOW TBLPROPERTIES command that works for v2 catalogs.
+ * The logical plan of the SHOW TBLPROPERTIES command.
  */
 case class ShowTableProperties(
     table: LogicalPlan,
@@ -556,21 +555,21 @@ case class CommentOnTable(child: LogicalPlan, comment: String) extends Command {
 }
 
 /**
- * The logical plan of the REFRESH FUNCTION command that works for v2 catalogs.
+ * The logical plan of the REFRESH FUNCTION command.
  */
 case class RefreshFunction(child: LogicalPlan) extends Command {
   override def children: Seq[LogicalPlan] = child :: Nil
 }
 
 /**
- * The logical plan of the DESCRIBE FUNCTION command that works for v2 catalogs.
+ * The logical plan of the DESCRIBE FUNCTION command.
  */
 case class DescribeFunction(child: LogicalPlan, isExtended: Boolean) extends Command {
   override def children: Seq[LogicalPlan] = child :: Nil
 }
 
 /**
- * The logical plan of the DROP FUNCTION command that works for v2 catalogs.
+ * The logical plan of the DROP FUNCTION command.
  */
 case class DropFunction(
     child: LogicalPlan,
@@ -580,7 +579,7 @@ case class DropFunction(
 }
 
 /**
- * The logical plan of the SHOW FUNCTIONS command that works for v2 catalogs.
+ * The logical plan of the SHOW FUNCTIONS command.
  */
 case class ShowFunctions(
     child: Option[LogicalPlan],
@@ -591,7 +590,7 @@ case class ShowFunctions(
 }
 
 /**
- * The logical plan of the ANALYZE TABLE command that works for v2 catalogs.
+ * The logical plan of the ANALYZE TABLE command.
  */
 case class AnalyzeTable(
     child: LogicalPlan,
@@ -601,7 +600,7 @@ case class AnalyzeTable(
 }
 
 /**
- * The logical plan of the ANALYZE TABLE FOR COLUMNS command that works for v2 catalogs.
+ * The logical plan of the ANALYZE TABLE FOR COLUMNS command.
  */
 case class AnalyzeColumn(
     child: LogicalPlan,
@@ -611,3 +610,15 @@ case class AnalyzeColumn(
     "mutually exclusive. Only one of them should be specified.")
   override def children: Seq[LogicalPlan] = child :: Nil
 }
+
+/**
+ * The logical plan of the LOAD DATA INTO TABLE command.
+ */
+case class LoadData(
+    child: LogicalPlan,
+    path: String,
+    isLocal: Boolean,
+    isOverwrite: Boolean,
+    partition: Option[TablePartitionSpec]) extends Command {
+  override def children: Seq[LogicalPlan] = child :: Nil
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index aca7602bdbcb0..085aaf148c8cd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -1605,15 +1605,15 @@ class DDLParserSuite extends AnalysisTest {
   test("LOAD DATA INTO table") {
     comparePlans(
       parsePlan("LOAD DATA INPATH 'filepath' INTO TABLE a.b.c"),
-      LoadDataStatement(Seq("a", "b", "c"), "filepath", false, false, None))
+      LoadData(UnresolvedTable(Seq("a", "b", "c")), "filepath", false, false, None))
 
     comparePlans(
       parsePlan("LOAD DATA LOCAL INPATH 'filepath' INTO TABLE a.b.c"),
-      LoadDataStatement(Seq("a", "b", "c"), "filepath", true, false, None))
+      LoadData(UnresolvedTable(Seq("a", "b", "c")), "filepath", true, false, None))
 
     comparePlans(
       parsePlan("LOAD DATA LOCAL INPATH 'filepath' OVERWRITE INTO TABLE a.b.c"),
-      LoadDataStatement(Seq("a", "b", "c"), "filepath", true, true, None))
+      LoadData(UnresolvedTable(Seq("a", "b", "c")), "filepath", true, true, None))
 
     comparePlans(
       parsePlan(
@@ -1621,8 +1621,8 @@ class DDLParserSuite extends AnalysisTest {
            |LOAD DATA LOCAL INPATH 'filepath' OVERWRITE INTO TABLE a.b.c
            |PARTITION(ds='2017-06-10')
          """.stripMargin),
-      LoadDataStatement(
-        Seq("a", "b", "c"),
+      LoadData(
+        UnresolvedTable(Seq("a", "b", "c")),
         "filepath",
         true,
         true,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 610632ac9256e..59652229a2b2e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -312,8 +312,8 @@ class ResolveSessionCatalog(
           ignoreIfExists = c.ifNotExists)
       }
 
-    case RefreshTable(r @ ResolvedTable(_, _, _: V1Table)) if isSessionCatalog(r.catalog) =>
-      RefreshTableCommand(r.identifier.asTableIdentifier)
+    case RefreshTable(ResolvedV1TableIdentifier(ident)) =>
+      RefreshTableCommand(ident.asTableIdentifier)
 
     case RefreshTable(r: ResolvedView) =>
       RefreshTableCommand(r.identifier.asTableIdentifier)
@@ -358,9 +358,8 @@ class ResolveSessionCatalog(
           orCreate = c.orCreate)
       }
 
-    case DropTable(
-        r @ ResolvedTable(_, _, _: V1Table), ifExists, purge) if isSessionCatalog(r.catalog) =>
-      DropTableCommand(r.identifier.asTableIdentifier, ifExists, isView = false, purge = purge)
+    case DropTable(ResolvedV1TableIdentifier(ident), ifExists, purge) =>
+      DropTableCommand(ident.asTableIdentifier, ifExists, isView = false, purge = purge)
 
     // v1 DROP TABLE supports temp view.
     case DropTable(r: ResolvedView, ifExists, purge) =>
@@ -427,10 +426,9 @@ class ResolveSessionCatalog(
         v1TableName.asTableIdentifier,
         "MSCK REPAIR TABLE")
 
-    case LoadDataStatement(tbl, path, isLocal, isOverwrite, partition) =>
-      val v1TableName = parseV1Table(tbl, "LOAD DATA")
+    case LoadData(ResolvedV1TableIdentifier(ident), path, isLocal, isOverwrite, partition) =>
       LoadDataCommand(
-        v1TableName.asTableIdentifier,
+        ident.asTableIdentifier,
         path,
         isLocal,
         isOverwrite,
@@ -573,9 +571,8 @@ class ResolveSessionCatalog(
             "SHOW VIEWS, only SessionCatalog supports this command.")
       }
 
-    case ShowTableProperties(
-        r @ ResolvedTable(_, _, _: V1Table), propertyKey) if isSessionCatalog(r.catalog) =>
-      ShowTablePropertiesCommand(r.identifier.asTableIdentifier, propertyKey)
+    case ShowTableProperties(ResolvedV1TableIdentifier(ident), propertyKey) =>
+      ShowTablePropertiesCommand(ident.asTableIdentifier, propertyKey)
 
     case ShowTableProperties(r: ResolvedView, propertyKey) =>
       ShowTablePropertiesCommand(r.identifier.asTableIdentifier, propertyKey)
@@ -696,9 +693,16 @@ class ResolveSessionCatalog(
       }
   }
 
-  object ResolvedV1TableOrViewIdentifier {
+  object ResolvedV1TableIdentifier {
     def unapply(resolved: LogicalPlan): Option[Identifier] = resolved match {
       case ResolvedTable(catalog, ident, _: V1Table) if isSessionCatalog(catalog) => Some(ident)
+      case _ => None
+    }
+  }
+
+  object ResolvedV1TableOrViewIdentifier {
+    def unapply(resolved: LogicalPlan): Option[Identifier] = resolved match {
+      case ResolvedV1TableIdentifier(ident) => Some(ident)
       case ResolvedView(ident, _) => Some(ident)
       case _ => None
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 648929eaa33ce..817b3cecf03e2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -283,6 +283,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
     case AnalyzeTable(_: ResolvedTable, _, _) | AnalyzeColumn(_: ResolvedTable, _, _) =>
       throw new AnalysisException("ANALYZE TABLE is not supported for v2 tables.")
 
+    case LoadData(_: ResolvedTable, _, _, _, _) =>
+      throw new AnalysisException("LOAD DATA is not supported for v2 tables.")
+
     case _ => Nil
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 444daf8233c67..ee3f7bed7ca9f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -2074,10 +2074,10 @@ class DataSourceV2SQLSuite
            |PARTITIONED BY (id)
          """.stripMargin)
 
-      testV1Command("LOAD DATA", s"INPATH 'filepath' INTO TABLE $t")
-      testV1Command("LOAD DATA", s"LOCAL INPATH 'filepath' INTO TABLE $t")
-      testV1Command("LOAD DATA", s"LOCAL INPATH 'filepath' OVERWRITE INTO TABLE $t")
-      testV1Command("LOAD DATA",
+      testNotSupportedV2Command("LOAD DATA", s"INPATH 'filepath' INTO TABLE $t")
+      testNotSupportedV2Command("LOAD DATA", s"LOCAL INPATH 'filepath' INTO TABLE $t")
+      testNotSupportedV2Command("LOAD DATA", s"LOCAL INPATH 'filepath' OVERWRITE INTO TABLE $t")
+      testNotSupportedV2Command("LOAD DATA",
         s"LOCAL INPATH 'filepath' OVERWRITE INTO TABLE $t PARTITION(id=1)")
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index 7a6b0b8d6dd9f..8889ea177720e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -168,17 +168,20 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
 
       val dataFilePath =
         Thread.currentThread().getContextClassLoader.getResource("data/files/employee.dat")
-      assertNoSuchTable(s"""LOAD DATA LOCAL INPATH "$dataFilePath" INTO TABLE $viewName""")
-      assertNoSuchTable(s"TRUNCATE TABLE $viewName")
       val e2 = intercept[AnalysisException] {
+        sql(s"""LOAD DATA LOCAL INPATH "$dataFilePath" INTO TABLE $viewName""")
+      }.getMessage
+      assert(e2.contains(s"$viewName is a temp view not table"))
+      assertNoSuchTable(s"TRUNCATE TABLE $viewName")
+      val e3 = intercept[AnalysisException] {
         sql(s"SHOW CREATE TABLE $viewName")
       }.getMessage
-      assert(e2.contains("SHOW CREATE TABLE is not supported on a temporary view"))
+      assert(e3.contains("SHOW CREATE TABLE is not supported on a temporary view"))
       assertNoSuchTable(s"SHOW PARTITIONS $viewName")
-      val e3 = intercept[AnalysisException] {
+      val e4 = intercept[AnalysisException] {
         sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS")
       }.getMessage
-      assert(e3.contains(s"$viewName is a temp view not table or permanent view"))
+      assert(e4.contains(s"$viewName is a temp view not table or permanent view"))
       assertNoSuchTable(s"ANALYZE TABLE $viewName COMPUTE STATISTICS FOR COLUMNS id")
     }
   }
@@ -208,7 +211,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
       e = intercept[AnalysisException] {
         sql(s"""LOAD DATA LOCAL INPATH "$dataFilePath" INTO TABLE $viewName""")
       }.getMessage
-      assert(e.contains(s"Target table in LOAD DATA cannot be a view: `default`.`testview`"))
+      assert(e.contains("default.testView is a view not table"))
 
       e = intercept[AnalysisException] {
         sql(s"TRUNCATE TABLE $viewName")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 082aa8d765e9c..10cb200550499 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -496,7 +496,10 @@ private[hive] class TestHiveSparkSession(
   def getLoadedTables: collection.mutable.HashSet[String] = sharedState.loadedTables
 
   def loadTestTable(name: String): Unit = {
-    if (!sharedState.loadedTables.contains(name)) {
+    // LOAD DATA does not work on temporary views. Since temporary views are resolved first,
+    // skip loading if there exists a temporary view with the given name.
+    if (sessionState.catalog.getTempView(name).isEmpty &&
+        !sharedState.loadedTables.contains(name)) {
       // Marks the table as loaded first to prevent infinite mutually recursive table loading.
       sharedState.loadedTables += name
       logDebug(s"Loading test table $name")

From ad02ceda29c60f9c6e0430caff0d174558c0c661 Mon Sep 17 00:00:00 2001
From: Yuanjian Li <yuanjian.li@databricks.com>
Date: Tue, 10 Nov 2020 05:46:45 +0000
Subject: [PATCH 0431/1009] [SPARK-33244][SQL] Unify the code paths for
 spark.table and spark.read.table

### What changes were proposed in this pull request?

- Call `spark.read.table` in `spark.table`.
- Add comments for `spark.table` to emphasize it also support streaming temp view reading.

### Why are the changes needed?
The code paths of `spark.table` and `spark.read.table` should be the same. This behavior is broke in SPARK-32592 since we need to respect options in `spark.read.table` API.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Existing UT.

Closes #30148 from xuanyuanking/SPARK-33244.

Authored-by: Yuanjian Li <yuanjian.li@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/sql/DataFrameReader.scala | 12 ++++++++++--
 .../scala/org/apache/spark/sql/SparkSession.scala    | 11 +++++------
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index bd986d0138256..276d5d29bfa2c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -825,8 +825,16 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
   def orc(paths: String*): DataFrame = format("orc").load(paths: _*)
 
   /**
-   * Returns the specified table as a `DataFrame`.
-   *
+   * Returns the specified table/view as a `DataFrame`. If it's a table, it must support batch
+   * reading and the returned DataFrame is the batch scan query plan of this table. If it's a view,
+   * the returned DataFrame is simply the query plan of the view, which can either be a batch or
+   * streaming query plan.
+   *
+   * @param tableName is either a qualified or unqualified name that designates a table or view.
+   *                  If a database is specified, it identifies the table/view from the database.
+   *                  Otherwise, it first attempts to find a temporary view with the given name
+   *                  and then match the table/view from the current database.
+   *                  Note that, the global temporary view database is also valid here.
    * @since 1.4.0
    */
   def table(tableName: String): DataFrame = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 592f209475baf..d738d617f2315 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -573,7 +573,10 @@ class SparkSession private(
   @transient lazy val catalog: Catalog = new CatalogImpl(self)
 
   /**
-   * Returns the specified table/view as a `DataFrame`.
+   * Returns the specified table/view as a `DataFrame`. If it's a table, it must support batch
+   * reading and the returned DataFrame is the batch scan query plan of this table. If it's a view,
+   * the returned DataFrame is simply the query plan of the view, which can either be a batch or
+   * streaming query plan.
    *
    * @param tableName is either a qualified or unqualified name that designates a table or view.
    *                  If a database is specified, it identifies the table/view from the database.
@@ -583,11 +586,7 @@ class SparkSession private(
    * @since 2.0.0
    */
   def table(tableName: String): DataFrame = {
-    table(sessionState.sqlParser.parseMultipartIdentifier(tableName))
-  }
-
-  private[sql] def table(multipartIdentifier: Seq[String]): DataFrame = {
-    Dataset.ofRows(self, UnresolvedRelation(multipartIdentifier))
+    read.table(tableName)
   }
 
   private[sql] def table(tableIdent: TableIdentifier): DataFrame = {

From e3a768dd79558b04f6ae71380876bcde2354008c Mon Sep 17 00:00:00 2001
From: "xuewei.linxuewei" <xuewei.linxuewei@alibaba-inc.com>
Date: Tue, 10 Nov 2020 07:23:47 +0000
Subject: [PATCH 0432/1009] [SPARK-33391][SQL] element_at with CreateArray not
 respect one based index
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

element_at with CreateArray not respect one based index.

repo step:

```
var df = spark.sql("select element_at(array(3, 2, 1), 0)")
df.printSchema()

df = spark.sql("select element_at(array(3, 2, 1), 1)")
df.printSchema()

df = spark.sql("select element_at(array(3, 2, 1), 2)")
df.printSchema()

df = spark.sql("select element_at(array(3, 2, 1), 3)")
df.printSchema()

root
– element_at(array(3, 2, 1), 0): integer (nullable = false)

root
– element_at(array(3, 2, 1), 1): integer (nullable = false)

root
– element_at(array(3, 2, 1), 2): integer (nullable = false)

root
– element_at(array(3, 2, 1), 3): integer (nullable = true)

correct answer should be
0 true which is outOfBounds return default true.
1 false
2 false
3 false

```

For expression eval, it respect the oneBasedIndex, but within checking the nullable, it calculates with zeroBasedIndex using `computeNullabilityFromArray`.

### Why are the changes needed?

Correctness issue.

### Does this PR introduce any user-facing change?

No.

### How was this patch tested?

Added UT and existing UT.

Closes #30296 from leanken/leanken-SPARK-33391.

Authored-by: xuewei.linxuewei <xuewei.linxuewei@alibaba-inc.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/collectionOperations.scala    | 30 +++++++++++++++
 .../CollectionExpressionsSuite.scala          | 38 +++++++++++++++----
 2 files changed, 60 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index 8719b2e065663..cb081b80ba096 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -1965,6 +1965,36 @@ case class ElementAt(left: Expression, right: Expression)
     }
   }
 
+  private def nullability(elements: Seq[Expression], ordinal: Int): Boolean = {
+    if (ordinal == 0) {
+      false
+    } else if (elements.length < math.abs(ordinal)) {
+      true
+    } else {
+      if (ordinal < 0) {
+        elements(elements.length + ordinal).nullable
+      } else {
+        elements(ordinal - 1).nullable
+      }
+    }
+  }
+
+  override def computeNullabilityFromArray(child: Expression, ordinal: Expression): Boolean = {
+    if (ordinal.foldable && !ordinal.nullable) {
+      val intOrdinal = ordinal.eval().asInstanceOf[Number].intValue()
+      child match {
+        case CreateArray(ar, _) =>
+          nullability(ar, intOrdinal)
+        case GetArrayStructFields(CreateArray(elements, _), field, _, _, _) =>
+          nullability(elements, intOrdinal) || field.nullable
+        case _ =>
+          true
+      }
+    } else {
+      true
+    }
+  }
+
   override def nullable: Boolean = left.dataType match {
     case _: ArrayType => computeNullabilityFromArray(left, right)
     case _: MapType => true
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
index 856c1fad9b204..d59d13d49cef4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
@@ -1122,11 +1122,18 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     val a = AttributeReference("a", IntegerType, nullable = false)()
     val b = AttributeReference("b", IntegerType, nullable = true)()
     val array = CreateArray(a :: b :: Nil)
-    assert(!ElementAt(array, Literal(0)).nullable)
-    assert(ElementAt(array, Literal(1)).nullable)
-    assert(!ElementAt(array, Subtract(Literal(2), Literal(2))).nullable)
+    assert(!ElementAt(array, Literal(1)).nullable)
+    assert(!ElementAt(array, Literal(-2)).nullable)
+    assert(ElementAt(array, Literal(2)).nullable)
+    assert(ElementAt(array, Literal(-1)).nullable)
+    assert(!ElementAt(array, Subtract(Literal(2), Literal(1))).nullable)
     assert(ElementAt(array, AttributeReference("ordinal", IntegerType)()).nullable)
 
+    // CreateArray case invalid indices
+    assert(!ElementAt(array, Literal(0)).nullable)
+    assert(ElementAt(array, Literal(4)).nullable)
+    assert(ElementAt(array, Literal(-4)).nullable)
+
     // GetArrayStructFields case
     val f1 = StructField("a", IntegerType, nullable = false)
     val f2 = StructField("b", IntegerType, nullable = true)
@@ -1135,19 +1142,34 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     val inputArray1 = CreateArray(c :: Nil)
     val inputArray1ContainsNull = c.nullable
     val stArray1 = GetArrayStructFields(inputArray1, f1, 0, 2, inputArray1ContainsNull)
-    assert(!ElementAt(stArray1, Literal(0)).nullable)
+    assert(!ElementAt(stArray1, Literal(1)).nullable)
+    assert(!ElementAt(stArray1, Literal(-1)).nullable)
     val stArray2 = GetArrayStructFields(inputArray1, f2, 1, 2, inputArray1ContainsNull)
-    assert(ElementAt(stArray2, Literal(0)).nullable)
+    assert(ElementAt(stArray2, Literal(1)).nullable)
+    assert(ElementAt(stArray2, Literal(-1)).nullable)
 
     val d = AttributeReference("d", structType, nullable = true)()
     val inputArray2 = CreateArray(c :: d :: Nil)
     val inputArray2ContainsNull = c.nullable || d.nullable
     val stArray3 = GetArrayStructFields(inputArray2, f1, 0, 2, inputArray2ContainsNull)
-    assert(!ElementAt(stArray3, Literal(0)).nullable)
-    assert(ElementAt(stArray3, Literal(1)).nullable)
+    assert(!ElementAt(stArray3, Literal(1)).nullable)
+    assert(!ElementAt(stArray3, Literal(-2)).nullable)
+    assert(ElementAt(stArray3, Literal(2)).nullable)
+    assert(ElementAt(stArray3, Literal(-1)).nullable)
     val stArray4 = GetArrayStructFields(inputArray2, f2, 1, 2, inputArray2ContainsNull)
-    assert(ElementAt(stArray4, Literal(0)).nullable)
     assert(ElementAt(stArray4, Literal(1)).nullable)
+    assert(ElementAt(stArray4, Literal(-2)).nullable)
+    assert(ElementAt(stArray4, Literal(2)).nullable)
+    assert(ElementAt(stArray4, Literal(-1)).nullable)
+
+    // GetArrayStructFields case invalid indices
+    assert(!ElementAt(stArray3, Literal(0)).nullable)
+    assert(ElementAt(stArray3, Literal(4)).nullable)
+    assert(ElementAt(stArray3, Literal(-4)).nullable)
+
+    assert(ElementAt(stArray4, Literal(0)).nullable)
+    assert(ElementAt(stArray4, Literal(4)).nullable)
+    assert(ElementAt(stArray4, Literal(-4)).nullable)
   }
 
   test("Concat") {

From 27bb40b6297361985e3590687f0332a72b71bc85 Mon Sep 17 00:00:00 2001
From: lrz <lrz@lrzdeMacBook-Pro.local>
Date: Tue, 10 Nov 2020 19:39:18 +0900
Subject: [PATCH 0433/1009] [SPARK-33339][PYTHON] Pyspark application will hang
 due to non Exception error

### What changes were proposed in this pull request?

When a system.exit exception occurs during the process, the python worker exits abnormally, and then the executor task is still waiting for the worker for reading from socket, causing it to hang.
The system.exit exception may be caused by the user's error code, but spark should at least throw an error to remind the user, not get stuck
we can run a simple test to reproduce this case:

```
from pyspark.sql import SparkSession
def err(line):
  raise SystemExit
spark = SparkSession.builder.appName("test").getOrCreate()
spark.sparkContext.parallelize(range(1,2), 2).map(err).collect()
spark.stop()
```

### Why are the changes needed?

to make sure pyspark application won't hang if there's non-Exception error in python worker

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

added a new test and also manually tested the case above

Closes #30248 from li36909/pyspark.

Lead-authored-by: lrz <lrz@lrzdeMacBook-Pro.local>
Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/tests/test_worker.py | 9 +++++++++
 python/pyspark/worker.py            | 4 ++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/tests/test_worker.py b/python/pyspark/tests/test_worker.py
index 8039c0661dd0b..d7a4b84e8dc41 100644
--- a/python/pyspark/tests/test_worker.py
+++ b/python/pyspark/tests/test_worker.py
@@ -95,6 +95,15 @@ def raise_exception(_):
             self.assertRaises(Exception, lambda: rdd.foreach(raise_exception))
         self.assertEqual(100, rdd.map(str).count())
 
+    def test_after_non_exception_error(self):
+        # SPARK-33339: Pyspark application will hang due to non Exception
+        def raise_system_exit(_):
+            raise SystemExit()
+        rdd = self.sc.parallelize(range(100), 1)
+        with QuietTest(self.sc):
+            self.assertRaises(Exception, lambda: rdd.foreach(raise_system_exit))
+        self.assertEqual(100, rdd.map(str).count())
+
     def test_after_jvm_exception(self):
         tempFile = tempfile.NamedTemporaryFile(delete=False)
         tempFile.write(b"Hello World!")
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 84e5cca5d3c00..6362839d96242 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -604,7 +604,7 @@ def process():
         # reuse.
         TaskContext._setTaskContext(None)
         BarrierTaskContext._setTaskContext(None)
-    except Exception:
+    except BaseException:
         try:
             exc_info = traceback.format_exc()
             if isinstance(exc_info, bytes):
@@ -618,7 +618,7 @@ def process():
         except IOError:
             # JVM close the socket
             pass
-        except Exception:
+        except BaseException:
             # Write the error to stderr if it happened while serializing
             print("PySpark worker failed with exception:", file=sys.stderr)
             print(traceback.format_exc(), file=sys.stderr)

From 4934da56bcc13fc61afc8e8cc44fb5290b5e7b32 Mon Sep 17 00:00:00 2001
From: Chao Sun <sunchao@apple.com>
Date: Tue, 10 Nov 2020 14:37:42 +0000
Subject: [PATCH 0434/1009] [SPARK-33305][SQL] DSv2: DROP TABLE command should
 also invalidate cache

### What changes were proposed in this pull request?

This changes `DropTableExec` to also invalidate caches referencing the table to be dropped, in a cascading manner.

### Why are the changes needed?

In DSv1, `DROP TABLE` command also invalidate caches as described in [SPARK-19765](https://issues.apache.org/jira/browse/SPARK-19765). However in DSv2 the same command only drops the table but doesn't handle the caches. This could lead to correctness issue.

### Does this PR introduce _any_ user-facing change?

Yes. Now DSv2 `DROP TABLE` command also invalidates cache.

### How was this patch tested?

Added a new UT

Closes #30211 from sunchao/SPARK-33305.

Authored-by: Chao Sun <sunchao@apple.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../datasources/v2/DataSourceV2Strategy.scala    |  2 +-
 .../execution/datasources/v2/DropTableExec.scala |  7 ++++++-
 .../sql/connector/DataSourceV2SQLSuite.scala     | 16 ++++++++++++++++
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 817b3cecf03e2..5695d232fae54 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -229,7 +229,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       throw new AnalysisException("Describing columns is not supported for v2 tables.")
 
     case DropTable(r: ResolvedTable, ifExists, purge) =>
-      DropTableExec(r.catalog, r.identifier, ifExists, purge) :: Nil
+      DropTableExec(session, r.catalog, r.table, r.identifier, ifExists, purge) :: Nil
 
     case _: NoopDropTable =>
       LocalTableScanExec(Nil, Nil) :: Nil
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala
index 1fd0cd177478b..068475fc56f47 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala
@@ -17,22 +17,27 @@
 
 package org.apache.spark.sql.execution.datasources.v2
 
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
+import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog}
 
 /**
  * Physical plan node for dropping a table.
  */
 case class DropTableExec(
+    session: SparkSession,
     catalog: TableCatalog,
+    table: Table,
     ident: Identifier,
     ifExists: Boolean,
     purge: Boolean) extends V2CommandExec {
 
   override def run(): Seq[InternalRow] = {
     if (catalog.tableExists(ident)) {
+      val v2Relation = DataSourceV2Relation.create(table, Some(catalog), Some(ident))
+      session.sharedState.cacheManager.uncacheQuery(session, v2Relation, cascade = true)
       catalog.dropTable(ident, purge)
     } else if (!ifExists) {
       throw new NoSuchTableException(ident)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index ee3f7bed7ca9f..dfa32b9ac802e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -784,6 +784,22 @@ class DataSourceV2SQLSuite
     }
   }
 
+  test("SPARK-33305: DROP TABLE should also invalidate cache") {
+    val t = "testcat.ns.t"
+    val view = "view"
+    withTable(t) {
+      withTempView(view) {
+        sql(s"CREATE TABLE $t USING foo AS SELECT id, data FROM source")
+        sql(s"CACHE TABLE $view AS SELECT id FROM $t")
+        checkAnswer(sql(s"SELECT * FROM $t"), spark.table("source"))
+        checkAnswer(sql(s"SELECT * FROM $view"), spark.table("source").select("id"))
+
+        sql(s"DROP TABLE $t")
+        assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty)
+      }
+    }
+  }
+
   test("Relation: basic") {
     val t1 = "testcat.ns1.ns2.tbl"
     withTable(t1) {

From 34f5e7ce77647d3b5eb11700566e0bbce73960e2 Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Tue, 10 Nov 2020 14:40:24 +0000
Subject: [PATCH 0435/1009] [SPARK-33302][SQL] Push down filters through Expand

### What changes were proposed in this pull request?
Push down filter through expand.  For case below:
```
create table t1(pid int, uid int, sid int, dt date, suid int) using parquet;
create table t2(pid int, vs int, uid int, csid int) using parquet;

SELECT
       years,
       appversion,
       SUM(uusers) AS users
FROM   (SELECT
               Date_trunc('year', dt)          AS years,
               CASE
                 WHEN h.pid = 3 THEN 'iOS'
                 WHEN h.pid = 4 THEN 'Android'
                 ELSE 'Other'
               END                             AS viewport,
               h.vs                            AS appversion,
               Count(DISTINCT u.uid)           AS uusers
               ,Count(DISTINCT u.suid)         AS srcusers
        FROM   t1 u
               join t2 h
                 ON h.uid = u.uid
        GROUP  BY 1,
                  2,
                  3) AS a
WHERE  viewport = 'iOS'
GROUP  BY 1,
          2
```

Plan. before this pr:
```
== Physical Plan ==
*(5) HashAggregate(keys=[years#30, appversion#32], functions=[sum(uusers#33L)])
+- Exchange hashpartitioning(years#30, appversion#32, 200), true, [id=#251]
   +- *(4) HashAggregate(keys=[years#30, appversion#32], functions=[partial_sum(uusers#33L)])
      +- *(4) HashAggregate(keys=[date_trunc('year', CAST(u.`dt` AS TIMESTAMP))#45, CASE WHEN (h.`pid` = 3) THEN 'iOS' WHEN (h.`pid` = 4) THEN 'Android' ELSE 'Other' END#46, vs#12], functions=[count(if ((gid#44 = 1)) u.`uid`#47 else null)])
         +- Exchange hashpartitioning(date_trunc('year', CAST(u.`dt` AS TIMESTAMP))#45, CASE WHEN (h.`pid` = 3) THEN 'iOS' WHEN (h.`pid` = 4) THEN 'Android' ELSE 'Other' END#46, vs#12, 200), true, [id=#246]
            +- *(3) HashAggregate(keys=[date_trunc('year', CAST(u.`dt` AS TIMESTAMP))#45, CASE WHEN (h.`pid` = 3) THEN 'iOS' WHEN (h.`pid` = 4) THEN 'Android' ELSE 'Other' END#46, vs#12], functions=[partial_count(if ((gid#44 = 1)) u.`uid`#47 else null)])
               +- *(3) HashAggregate(keys=[date_trunc('year', CAST(u.`dt` AS TIMESTAMP))#45, CASE WHEN (h.`pid` = 3) THEN 'iOS' WHEN (h.`pid` = 4) THEN 'Android' ELSE 'Other' END#46, vs#12, u.`uid`#47, u.`suid`#48, gid#44], functions=[])
                  +- Exchange hashpartitioning(date_trunc('year', CAST(u.`dt` AS TIMESTAMP))#45, CASE WHEN (h.`pid` = 3) THEN 'iOS' WHEN (h.`pid` = 4) THEN 'Android' ELSE 'Other' END#46, vs#12, u.`uid`#47, u.`suid`#48, gid#44, 200), true, [id=#241]
                     +- *(2) HashAggregate(keys=[date_trunc('year', CAST(u.`dt` AS TIMESTAMP))#45, CASE WHEN (h.`pid` = 3) THEN 'iOS' WHEN (h.`pid` = 4) THEN 'Android' ELSE 'Other' END#46, vs#12, u.`uid`#47, u.`suid`#48, gid#44], functions=[])
                        +- *(2) Filter (CASE WHEN (h.`pid` = 3) THEN 'iOS' WHEN (h.`pid` = 4) THEN 'Android' ELSE 'Other' END#46 = iOS)
                           +- *(2) Expand [ArrayBuffer(date_trunc(year, cast(dt#9 as timestamp), Some(Etc/GMT+7)), CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END, vs#12, uid#7, null, 1), ArrayBuffer(date_trunc(year, cast(dt#9 as timestamp), Some(Etc/GMT+7)), CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END, vs#12, null, suid#10, 2)], [date_trunc('year', CAST(u.`dt` AS TIMESTAMP))#45, CASE WHEN (h.`pid` = 3) THEN 'iOS' WHEN (h.`pid` = 4) THEN 'Android' ELSE 'Other' END#46, vs#12, u.`uid`#47, u.`suid`#48, gid#44]
                              +- *(2) Project [uid#7, dt#9, suid#10, pid#11, vs#12]
                                 +- *(2) BroadcastHashJoin [uid#7], [uid#13], Inner, BuildRight
                                    :- *(2) Project [uid#7, dt#9, suid#10]
                                    :  +- *(2) Filter isnotnull(uid#7)
                                    :     +- *(2) ColumnarToRow
                                    :        +- FileScan parquet default.t1[uid#7,dt#9,suid#10] Batched: true, DataFilters: [isnotnull(uid#7)], Format: Parquet, Location: InMemoryFileIndex[file:/root/spark-3.0.0-bin-hadoop3.2/spark-warehouse/t1], PartitionFilters: [], PushedFilters: [IsNotNull(uid)], ReadSchema: struct<uid:int,dt:date,suid:int>
                                    +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))), [id=#233]
                                       +- *(1) Project [pid#11, vs#12, uid#13]
                                          +- *(1) Filter isnotnull(uid#13)
                                             +- *(1) ColumnarToRow
                                                +- FileScan parquet default.t2[pid#11,vs#12,uid#13] Batched: true, DataFilters: [isnotnull(uid#13)], Format: Parquet, Location: InMemoryFileIndex[file:/root/spark-3.0.0-bin-hadoop3.2/spark-warehouse/t2], PartitionFilters: [], PushedFilters: [IsNotNull(uid)], ReadSchema: struct<pid:int,vs:int,uid:int>
```

Plan. after. this pr. :
```
== Physical Plan ==
AdaptiveSparkPlan isFinalPlan=false
+- HashAggregate(keys=[years#0, appversion#2], functions=[sum(uusers#3L)], output=[years#0, appversion#2, users#5L])
   +- Exchange hashpartitioning(years#0, appversion#2, 5), true, [id=#71]
      +- HashAggregate(keys=[years#0, appversion#2], functions=[partial_sum(uusers#3L)], output=[years#0, appversion#2, sum#22L])
         +- HashAggregate(keys=[date_trunc(year, cast(dt#9 as timestamp), Some(America/Los_Angeles))#23, CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END#24, vs#12], functions=[count(distinct uid#7)], output=[years#0, appversion#2, uusers#3L])
            +- Exchange hashpartitioning(date_trunc(year, cast(dt#9 as timestamp), Some(America/Los_Angeles))#23, CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END#24, vs#12, 5), true, [id=#67]
               +- HashAggregate(keys=[date_trunc(year, cast(dt#9 as timestamp), Some(America/Los_Angeles))#23, CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END#24, vs#12], functions=[partial_count(distinct uid#7)], output=[date_trunc(year, cast(dt#9 as timestamp), Some(America/Los_Angeles))#23, CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END#24, vs#12, count#27L])
                  +- HashAggregate(keys=[date_trunc(year, cast(dt#9 as timestamp), Some(America/Los_Angeles))#23, CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END#24, vs#12, uid#7], functions=[], output=[date_trunc(year, cast(dt#9 as timestamp), Some(America/Los_Angeles))#23, CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END#24, vs#12, uid#7])
                     +- Exchange hashpartitioning(date_trunc(year, cast(dt#9 as timestamp), Some(America/Los_Angeles))#23, CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END#24, vs#12, uid#7, 5), true, [id=#63]
                        +- HashAggregate(keys=[date_trunc(year, cast(dt#9 as timestamp), Some(America/Los_Angeles)) AS date_trunc(year, cast(dt#9 as timestamp), Some(America/Los_Angeles))#23, CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END AS CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END#24, vs#12, uid#7], functions=[], output=[date_trunc(year, cast(dt#9 as timestamp), Some(America/Los_Angeles))#23, CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END#24, vs#12, uid#7])
                           +- Project [uid#7, dt#9, pid#11, vs#12]
                              +- BroadcastHashJoin [uid#7], [uid#13], Inner, BuildRight, false
                                 :- Filter isnotnull(uid#7)
                                 :  +- FileScan parquet default.t1[uid#7,dt#9] Batched: true, DataFilters: [isnotnull(uid#7)], Format: Parquet, Location: InMemoryFileIndex[file:/private/var/folders/4l/7_c5c97s1_gb0d9_d6shygx00000gn/T/warehouse-c069d87..., PartitionFilters: [], PushedFilters: [IsNotNull(uid)], ReadSchema: struct<uid:int,dt:date>
                                 +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[2, int, false] as bigint)),false), [id=#58]
                                    +- Filter ((CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END = iOS) AND isnotnull(uid#13))
                                       +- FileScan parquet default.t2[pid#11,vs#12,uid#13] Batched: true, DataFilters: [(CASE WHEN (pid#11 = 3) THEN iOS WHEN (pid#11 = 4) THEN Android ELSE Other END = iOS), isnotnull..., Format: Parquet, Location: InMemoryFileIndex[file:/private/var/folders/4l/7_c5c97s1_gb0d9_d6shygx00000gn/T/warehouse-c069d87..., PartitionFilters: [], PushedFilters: [IsNotNull(uid)], ReadSchema: struct<pid:int,vs:int,uid:int>

```

### Why are the changes needed?
Improve  performance, filter more data.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Added UT

Closes #30278 from AngersZhuuuu/SPARK-33302.

Authored-by: angerszhu <angers.zhu@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/optimizer/Optimizer.scala    |  1 +
 .../optimizer/FilterPushdownSuite.scala       | 24 ++++++++++++++++++-
 .../LeftSemiAntiJoinPushDownSuite.scala       | 15 ++++++++++++
 3 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 9519a56c2817a..51f7799b1e427 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -1269,6 +1269,7 @@ object PushPredicateThroughNonJoin extends Rule[LogicalPlan] with PredicateHelpe
     case _: Sort => true
     case _: BatchEvalPython => true
     case _: ArrowEvalPython => true
+    case _: Expand => true
     case _ => false
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
index 156313300eef9..11ec037c94f73 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{BooleanType, IntegerType, TimestampType}
+import org.apache.spark.sql.types.{BooleanType, IntegerType, StringType, TimestampType}
 import org.apache.spark.unsafe.types.CalendarInterval
 
 class FilterPushdownSuite extends PlanTest {
@@ -1208,6 +1208,28 @@ class FilterPushdownSuite extends PlanTest {
       checkAnalysis = false)
   }
 
+  test("push down predicate through expand") {
+    val query =
+        Filter('a > 1,
+          Expand(
+            Seq(
+              Seq('a, 'b, 'c, Literal.create(null, StringType), 1),
+              Seq('a, 'b, 'c, 'a, 2)),
+            Seq('a, 'b, 'c),
+            testRelation)).analyze
+    val optimized = Optimize.execute(query)
+
+    val expected =
+        Expand(
+          Seq(
+            Seq('a, 'b, 'c, Literal.create(null, StringType), 1),
+            Seq('a, 'b, 'c, 'a, 2)),
+          Seq('a, 'b, 'c),
+          Filter('a > 1, testRelation)).analyze
+
+    comparePlans(optimized, expected)
+  }
+
   test("SPARK-28345: PythonUDF predicate should be able to pushdown to join") {
     val pythonUDFJoinCond = {
       val pythonUDF = PythonUDF("pythonUDF", null,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala
index a3da9f73ebd40..729a1e9f06ca5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala
@@ -315,6 +315,21 @@ class LeftSemiPushdownSuite extends PlanTest {
     comparePlans(optimized, originalQuery.analyze)
   }
 
+  test("Unary: LeftSemi join push down through Expand") {
+    val expand = Expand(Seq(Seq('a, 'b, "null"), Seq('a, "null", 'c)),
+      Seq('a, 'b, 'c), testRelation)
+    val originalQuery = expand
+      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd && 'b === 1))
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    val correctAnswer = Expand(Seq(Seq('a, 'b, "null"), Seq('a, "null", 'c)),
+      Seq('a, 'b, 'c), testRelation
+        .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd && 'b === 1)))
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
   Seq(Some('d === 'e), None).foreach { case innerJoinCond =>
     Seq(LeftSemi, LeftAnti).foreach { case outerJT =>
       Seq(Inner, LeftOuter, Cross, RightOuter).foreach { case innerJT =>

From 3165ca742a7508dca35a1e40b303c337939df86f Mon Sep 17 00:00:00 2001
From: Chao Sun <sunchao@apple.com>
Date: Tue, 10 Nov 2020 15:41:04 +0000
Subject: [PATCH 0436/1009] [SPARK-33376][SQL] Remove the option of
 "sharesHadoopClasses" in Hive IsolatedClientLoader

### What changes were proposed in this pull request?

This removes the `sharesHadoopClasses` flag from `IsolatedClientLoader` in Hive module.

### Why are the changes needed?

Currently, when initializing `IsolatedClientLoader`, users can set the `sharesHadoopClasses` flag to decide whether the `HiveClient` created should share Hadoop classes with Spark itself or not. In the latter case, the client will only load Hadoop classes from the Hive dependencies.

There are two reasons to remove this:
1. this feature is currently used in two cases: 1) unit tests, 2) when the Hadoop version defined in Maven can not be found when `spark.sql.hive.metastore.jars` is equal to "maven", which could be very rare.
2. when `sharesHadoopClasses` is false, Spark doesn't really only use Hadoop classes from Hive jars: we also download `hadoop-client` jar and put all the sub-module jars (e.g., `hadoop-common`, `hadoop-hdfs`) together with the Hive jars, and the Hadoop version used by `hadoop-client` is the same version used by Spark itself. As result, we're mixing two versions of Hadoop jars in the classpath, which could potentially cause issues, especially considering that the default Hadoop version is already 3.2.0 while most Hive versions supported by the `IsolatedClientLoader` is still using Hadoop 2.x or even lower.

### Does this PR introduce _any_ user-facing change?

This affects Spark users in one scenario: when `spark.sql.hive.metastore.jars` is set to `maven` AND the Hadoop version specified in pom file cannot be downloaded, currently the behavior is to switch to _not_ share Hadoop classes, but with the PR it will share Hadoop classes with Spark.

### How was this patch tested?

Existing UTs.

Closes #30284 from sunchao/SPARK-33376.

Authored-by: Chao Sun <sunchao@apple.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/hive/client/IsolatedClientLoader.scala   | 16 ++++------------
 .../sql/hive/client/HadoopVersionInfoSuite.scala |  3 +--
 .../sql/hive/client/HiveClientBuilder.scala      |  6 ++----
 .../client/HivePartitionFilteringSuite.scala     |  4 ----
 .../spark/sql/hive/client/HiveVersionSuite.scala |  7 ++-----
 5 files changed, 9 insertions(+), 27 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index f9946fe8e0616..9663e03ee6a74 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -52,12 +52,9 @@ private[hive] object IsolatedClientLoader extends Logging {
       config: Map[String, String] = Map.empty,
       ivyPath: Option[String] = None,
       sharedPrefixes: Seq[String] = Seq.empty,
-      barrierPrefixes: Seq[String] = Seq.empty,
-      sharesHadoopClasses: Boolean = true): IsolatedClientLoader = synchronized {
+      barrierPrefixes: Seq[String] = Seq.empty): IsolatedClientLoader = synchronized {
     val resolvedVersion = hiveVersion(hiveMetastoreVersion)
-    // We will first try to share Hadoop classes. If we cannot resolve the Hadoop artifact
-    // with the given version, we will use Hadoop 2.7 and then will not share Hadoop classes.
-    var _sharesHadoopClasses = sharesHadoopClasses
+    // We will use Hadoop 2.7 if we cannot resolve the Hadoop artifact.
     val files = if (resolvedVersions.contains((resolvedVersion, hadoopVersion))) {
       resolvedVersions((resolvedVersion, hadoopVersion))
     } else {
@@ -72,10 +69,8 @@ private[hive] object IsolatedClientLoader extends Logging {
             val fallbackVersion = "2.7.4"
             logWarning(s"Failed to resolve Hadoop artifacts for the version $hadoopVersion. We " +
               s"will change the hadoop version from $hadoopVersion to $fallbackVersion and try " +
-              "again. Hadoop classes will not be shared between Spark and Hive metastore client. " +
-              "It is recommended to set jars used by Hive metastore client through " +
+              "again. It is recommended to set jars used by Hive metastore client through " +
               "spark.sql.hive.metastore.jars in the production environment.")
-            _sharesHadoopClasses = false
             (downloadVersion(
               resolvedVersion, fallbackVersion, ivyPath, remoteRepos), fallbackVersion)
         }
@@ -89,7 +84,6 @@ private[hive] object IsolatedClientLoader extends Logging {
       execJars = files,
       hadoopConf = hadoopConf,
       config = config,
-      sharesHadoopClasses = _sharesHadoopClasses,
       sharedPrefixes = sharedPrefixes,
       barrierPrefixes = barrierPrefixes)
   }
@@ -177,7 +171,6 @@ private[hive] object IsolatedClientLoader extends Logging {
  * @param config   A set of options that will be added to the HiveConf of the constructed client.
  * @param isolationOn When true, custom versions of barrier classes will be constructed.  Must be
  *                    true unless loading the version of hive that is on Spark's classloader.
- * @param sharesHadoopClasses When true, we will share Hadoop classes between Spark and
  * @param baseClassLoader The spark classloader that is used to load shared classes.
  */
 private[hive] class IsolatedClientLoader(
@@ -187,7 +180,6 @@ private[hive] class IsolatedClientLoader(
     val execJars: Seq[URL] = Seq.empty,
     val config: Map[String, String] = Map.empty,
     val isolationOn: Boolean = true,
-    val sharesHadoopClasses: Boolean = true,
     val baseClassLoader: ClassLoader = Thread.currentThread().getContextClassLoader,
     val sharedPrefixes: Seq[String] = Seq.empty,
     val barrierPrefixes: Seq[String] = Seq.empty)
@@ -204,7 +196,7 @@ private[hive] class IsolatedClientLoader(
     name.startsWith("org.apache.log4j") || // log4j1.x
     name.startsWith("org.apache.logging.log4j") || // log4j2
     name.startsWith("org.apache.spark.") ||
-    (sharesHadoopClasses && isHadoopClass) ||
+    isHadoopClass ||
     name.startsWith("scala.") ||
     (name.startsWith("com.google") && !name.startsWith("com.google.cloud")) ||
     name.startsWith("java.") ||
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala
index 65492abf38cc0..8d55356da28e6 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HadoopVersionInfoSuite.scala
@@ -49,8 +49,7 @@ class HadoopVersionInfoSuite extends SparkFunSuite {
         sparkConf = new SparkConf(),
         hadoopConf = hadoopConf,
         config = HiveClientBuilder.buildConf(Map.empty),
-        ivyPath = Some(ivyPath.getCanonicalPath),
-        sharesHadoopClasses = true)
+        ivyPath = Some(ivyPath.getCanonicalPath))
       val jars = client.classLoader.getParent.asInstanceOf[URLClassLoader].getURLs
         .map(u => new File(u.toURI))
         // Drop all Hadoop jars to use the existing Hadoop jars on the classpath
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientBuilder.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientBuilder.scala
index 2ad3afcb214b3..f40b4f00d9fd0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientBuilder.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientBuilder.scala
@@ -46,15 +46,13 @@ private[client] object HiveClientBuilder {
   def buildClient(
       version: String,
       hadoopConf: Configuration,
-      extraConf: Map[String, String] = Map.empty,
-      sharesHadoopClasses: Boolean = true): HiveClient = {
+      extraConf: Map[String, String] = Map.empty): HiveClient = {
     IsolatedClientLoader.forVersion(
       hiveMetastoreVersion = version,
       hadoopVersion = VersionInfo.getVersion,
       sparkConf = new SparkConf(),
       hadoopConf = hadoopConf,
       config = buildConf(extraConf),
-      ivyPath = ivyPath,
-      sharesHadoopClasses = sharesHadoopClasses).createClient()
+      ivyPath = ivyPath).createClient()
   }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
index 2d615f6fdc261..7e10d498d0413 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
@@ -272,10 +272,6 @@ class HivePartitionFilteringSuite(version: String)
       day1 :: day2 :: Nil)
   }
 
-  test("create client with sharesHadoopClasses = false") {
-    buildClient(new Configuration(), sharesHadoopClasses = false)
-  }
-
   private def testMetastorePartitionFiltering(
       filterExpr: Expression,
       expectedDs: Seq[Int],
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala
index dd58c302e0197..02e9b7fb151fd 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala
@@ -28,9 +28,7 @@ private[client] abstract class HiveVersionSuite(version: String) extends SparkFu
   override protected val enableAutoThreadAudit = false
   protected var client: HiveClient = null
 
-  protected def buildClient(
-      hadoopConf: Configuration,
-      sharesHadoopClasses: Boolean = true): HiveClient = {
+  protected def buildClient(hadoopConf: Configuration): HiveClient = {
     // Hive changed the default of datanucleus.schema.autoCreateAll from true to false and
     // hive.metastore.schema.verification from false to true since 2.0
     // For details, see the JIRA HIVE-6113 and HIVE-12463
@@ -46,8 +44,7 @@ private[client] abstract class HiveVersionSuite(version: String) extends SparkFu
     HiveClientBuilder.buildClient(
       version,
       hadoopConf,
-      HiveUtils.formatTimeVarsForHiveClient(hadoopConf),
-      sharesHadoopClasses = sharesHadoopClasses)
+      HiveUtils.formatTimeVarsForHiveClient(hadoopConf))
   }
 
   override def suiteName: String = s"${super.suiteName}($version)"

From 122c8999cbf2a1f9484ae973864a843cfa32b6c6 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Tue, 10 Nov 2020 09:17:00 -0800
Subject: [PATCH 0437/1009] [SPARK-33251][FOLLOWUP][PYTHON][DOCS][MINOR]
 Adjusts returns PrefixSpan.findFrequentSequentialPatterns

### What changes were proposed in this pull request?

Changes

    pyspark.sql.dataframe.DataFrame

to

    :py:class:`pyspark.sql.DataFrame`

### Why are the changes needed?

Consistency (see https://github.com/apache/spark/pull/30285#pullrequestreview-526764104).

### Does this PR introduce _any_ user-facing change?

User will see shorter reference with a link.

### How was this patch tested?

`dev/lint-python` and manual check of the rendered docs.

Closes #30313 from zero323/SPARK-33251-FOLLOW-UP.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: Huaxin Gao <huaxing@us.ibm.com>
---
 python/pyspark/ml/fpm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/ml/fpm.py b/python/pyspark/ml/fpm.py
index 77e610f49410d..d36b9efb8cce0 100644
--- a/python/pyspark/ml/fpm.py
+++ b/python/pyspark/ml/fpm.py
@@ -443,7 +443,7 @@ def findFrequentSequentialPatterns(self, dataset):
 
         Returns
         -------
-        pyspark.sql.dataframe.DataFrame
+        :py:class:`pyspark.sql.DataFrame`
             A `DataFrame` that contains columns of sequence and corresponding frequency.
             The schema of it will be:
 

From 6fa80ed1dd43c2ecd092c10933330b501641c51b Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Tue, 10 Nov 2020 16:17:00 -0800
Subject: [PATCH 0438/1009] [SPARK-33337][SQL] Support subexpression
 elimination in branches of conditional expressions

### What changes were proposed in this pull request?

Currently we skip subexpression elimination in branches of conditional expressions including `If`, `CaseWhen`, and `Coalesce`. Actually we can do subexpression elimination for such branches if the subexpression is common across all branches. This patch proposes to support subexpression elimination in branches of conditional expressions.

### Why are the changes needed?

We may miss subexpression elimination chances in branches of conditional expressions. This kind of subexpression is frequently seen. It may be written manually by users or come from query optimizer. For example, project collapsing could embed expressions between two `Project`s and produces conditional expression like:

```
CASE WHEN jsonToStruct(json).a = '1' THEN 1.0 WHEN jsonToStruct(json).a = '2' THEN 2.0 ... ELSE 1.2 END
```

If `jsonToStruct(json)` is time-expensive expression, we don't eliminate the duplication and waste time on running it repeatedly now.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Unit test.

Closes #30245 from viirya/SPARK-33337.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Liang-Chi Hsieh <viirya@gmail.com>
---
 .../expressions/EquivalentExpressions.scala   |  96 +++++++++++----
 .../expressions/codegen/CodeGenerator.scala   |   2 +-
 .../SubexpressionEliminationSuite.scala       | 111 ++++++++++++++++--
 3 files changed, 177 insertions(+), 32 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
index 458c48df6d0c8..1dfff412d9a8e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
@@ -65,11 +65,82 @@ class EquivalentExpressions {
     }
   }
 
+  private def addExprToSet(expr: Expression, set: mutable.Set[Expr]): Boolean = {
+    if (expr.deterministic) {
+      val e = Expr(expr)
+      if (set.contains(e)) {
+        true
+      } else {
+        set.add(e)
+        false
+      }
+    } else {
+      false
+    }
+  }
+
+  /**
+   * Adds only expressions which are common in each of given expressions, in a recursive way.
+   * For example, given two expressions `(a + (b + (c + 1)))` and `(d + (e + (c + 1)))`,
+   * the common expression `(c + 1)` will be added into `equivalenceMap`.
+   */
+  private def addCommonExprs(
+      exprs: Seq[Expression],
+      addFunc: Expression => Boolean = addExpr): Unit = {
+    val exprSetForAll = mutable.Set[Expr]()
+    addExprTree(exprs.head, addExprToSet(_, exprSetForAll))
+
+    val commonExprSet = exprs.tail.foldLeft(exprSetForAll) { (exprSet, expr) =>
+      val otherExprSet = mutable.Set[Expr]()
+      addExprTree(expr, addExprToSet(_, otherExprSet))
+      exprSet.intersect(otherExprSet)
+    }
+
+    commonExprSet.foreach(expr => addFunc(expr.e))
+  }
+
+  // There are some special expressions that we should not recurse into all of its children.
+  //   1. CodegenFallback: it's children will not be used to generate code (call eval() instead)
+  //   2. If: common subexpressions will always be evaluated at the beginning, but the true and
+  //          false expressions in `If` may not get accessed, according to the predicate
+  //          expression. We should only recurse into the predicate expression.
+  //   3. CaseWhen: like `If`, the children of `CaseWhen` only get accessed in a certain
+  //                condition. We should only recurse into the first condition expression as it
+  //                will always get accessed.
+  //   4. Coalesce: it's also a conditional expression, we should only recurse into the first
+  //                children, because others may not get accessed.
+  private def childrenToRecurse(expr: Expression): Seq[Expression] = expr match {
+    case _: CodegenFallback => Nil
+    case i: If => i.predicate :: Nil
+    case c: CaseWhen => c.children.head :: Nil
+    case c: Coalesce => c.children.head :: Nil
+    case other => other.children
+  }
+
+  // For some special expressions we cannot just recurse into all of its children, but we can
+  // recursively add the common expressions shared between all of its children.
+  private def commonChildrenToRecurse(expr: Expression): Seq[Seq[Expression]] = expr match {
+    case i: If => Seq(Seq(i.trueValue, i.falseValue))
+    case c: CaseWhen =>
+      // We look at subexpressions in conditions and values of `CaseWhen` separately. It is
+      // because a subexpression in conditions will be run no matter which condition is matched
+      // if it is shared among conditions, but it doesn't need to be shared in values. Similarly,
+      // a subexpression among values doesn't need to be in conditions because no matter which
+      // condition is true, it will be evaluated.
+      val conditions = c.branches.tail.map(_._1)
+      val values = c.branches.map(_._2) ++ c.elseValue
+      Seq(conditions, values)
+    case c: Coalesce => Seq(c.children.tail)
+    case _ => Nil
+  }
+
   /**
    * Adds the expression to this data structure recursively. Stops if a matching expression
    * is found. That is, if `expr` has already been added, its children are not added.
    */
-  def addExprTree(expr: Expression): Unit = {
+  def addExprTree(
+      expr: Expression,
+      addFunc: Expression => Boolean = addExpr): Unit = {
     val skip = expr.isInstanceOf[LeafExpression] ||
       // `LambdaVariable` is usually used as a loop variable, which can't be evaluated ahead of the
       // loop. So we can't evaluate sub-expressions containing `LambdaVariable` at the beginning.
@@ -78,26 +149,9 @@ class EquivalentExpressions {
       // can cause error like NPE.
       (expr.isInstanceOf[PlanExpression[_]] && TaskContext.get != null)
 
-    // There are some special expressions that we should not recurse into all of its children.
-    //   1. CodegenFallback: it's children will not be used to generate code (call eval() instead)
-    //   2. If: common subexpressions will always be evaluated at the beginning, but the true and
-    //          false expressions in `If` may not get accessed, according to the predicate
-    //          expression. We should only recurse into the predicate expression.
-    //   3. CaseWhen: like `If`, the children of `CaseWhen` only get accessed in a certain
-    //                condition. We should only recurse into the first condition expression as it
-    //                will always get accessed.
-    //   4. Coalesce: it's also a conditional expression, we should only recurse into the first
-    //                children, because others may not get accessed.
-    def childrenToRecurse: Seq[Expression] = expr match {
-      case _: CodegenFallback => Nil
-      case i: If => i.predicate :: Nil
-      case c: CaseWhen => c.children.head :: Nil
-      case c: Coalesce => c.children.head :: Nil
-      case other => other.children
-    }
-
-    if (!skip && !addExpr(expr)) {
-      childrenToRecurse.foreach(addExprTree)
+    if (!skip && !addFunc(expr)) {
+      childrenToRecurse(expr).foreach(addExprTree(_, addFunc))
+      commonChildrenToRecurse(expr).filter(_.nonEmpty).foreach(addCommonExprs(_, addFunc))
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 9a26c388f59af..9aa827a58d87a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -1044,7 +1044,7 @@ class CodegenContext extends Logging {
     val localSubExprEliminationExprs = mutable.HashMap.empty[Expression, SubExprEliminationState]
 
     // Add each expression tree and compute the common subexpressions.
-    expressions.foreach(equivalentExpressions.addExprTree)
+    expressions.foreach(equivalentExpressions.addExprTree(_))
 
     // Get all the expressions that appear at least twice and set up the state for subexpression
     // elimination.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
index 1fa185cc77ebb..4725a40781c6b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
@@ -146,20 +146,111 @@ class SubexpressionEliminationSuite extends SparkFunSuite {
     equivalence.addExprTree(add)
     // the `two` inside `fallback` should not be added
     assert(equivalence.getAllEquivalentExprs.count(_.size > 1) == 0)
-    assert(equivalence.getAllEquivalentExprs.count(_.size == 1) == 3)  // add, two, explode
+    assert(equivalence.getAllEquivalentExprs.count(_.size == 1) == 3) // add, two, explode
   }
 
-  test("Children of conditional expressions") {
-    val condition = And(Literal(true), Literal(false))
+  test("Children of conditional expressions: If") {
     val add = Add(Literal(1), Literal(2))
-    val ifExpr = If(condition, add, add)
+    val condition = GreaterThan(add, Literal(3))
 
-    val equivalence = new EquivalentExpressions
-    equivalence.addExprTree(ifExpr)
-    // the `add` inside `If` should not be added
-    assert(equivalence.getAllEquivalentExprs.count(_.size > 1) == 0)
-    // only ifExpr and its predicate expression
-    assert(equivalence.getAllEquivalentExprs.count(_.size == 1) == 2)
+    val ifExpr1 = If(condition, add, add)
+    val equivalence1 = new EquivalentExpressions
+    equivalence1.addExprTree(ifExpr1)
+
+    // `add` is in both two branches of `If` and predicate.
+    assert(equivalence1.getAllEquivalentExprs.count(_.size == 2) == 1)
+    assert(equivalence1.getAllEquivalentExprs.filter(_.size == 2).head == Seq(add, add))
+    // one-time expressions: only ifExpr and its predicate expression
+    assert(equivalence1.getAllEquivalentExprs.count(_.size == 1) == 2)
+    assert(equivalence1.getAllEquivalentExprs.filter(_.size == 1).head == Seq(ifExpr1))
+    assert(equivalence1.getAllEquivalentExprs.filter(_.size == 1).last == Seq(condition))
+
+    // Repeated `add` is only in one branch, so we don't count it.
+    val ifExpr2 = If(condition, Add(Literal(1), Literal(3)), Add(add, add))
+    val equivalence2 = new EquivalentExpressions
+    equivalence2.addExprTree(ifExpr2)
+
+    assert(equivalence2.getAllEquivalentExprs.count(_.size > 1) == 0)
+    assert(equivalence2.getAllEquivalentExprs.count(_.size == 1) == 3)
+
+    val ifExpr3 = If(condition, ifExpr1, ifExpr1)
+    val equivalence3 = new EquivalentExpressions
+    equivalence3.addExprTree(ifExpr3)
+
+    // `add`: 2, `condition`: 2
+    assert(equivalence3.getAllEquivalentExprs.count(_.size == 2) == 2)
+    assert(equivalence3.getAllEquivalentExprs.filter(_.size == 2).head == Seq(add, add))
+    assert(equivalence3.getAllEquivalentExprs.filter(_.size == 2).last == Seq(condition, condition))
+
+    // `ifExpr1`, `ifExpr3`
+    assert(equivalence3.getAllEquivalentExprs.count(_.size == 1) == 2)
+    assert(equivalence3.getAllEquivalentExprs.filter(_.size == 1).head == Seq(ifExpr1))
+    assert(equivalence3.getAllEquivalentExprs.filter(_.size == 1).last == Seq(ifExpr3))
+  }
+
+  test("Children of conditional expressions: CaseWhen") {
+    val add1 = Add(Literal(1), Literal(2))
+    val add2 = Add(Literal(2), Literal(3))
+    val conditions1 = (GreaterThan(add2, Literal(3)), add1) ::
+      (GreaterThan(add2, Literal(4)), add1) ::
+      (GreaterThan(add2, Literal(5)), add1) :: Nil
+
+    val caseWhenExpr1 = CaseWhen(conditions1, None)
+    val equivalence1 = new EquivalentExpressions
+    equivalence1.addExprTree(caseWhenExpr1)
+
+    // `add2` is repeatedly in all conditions.
+    assert(equivalence1.getAllEquivalentExprs.count(_.size == 2) == 1)
+    assert(equivalence1.getAllEquivalentExprs.filter(_.size == 2).head == Seq(add2, add2))
+
+    val conditions2 = (GreaterThan(add1, Literal(3)), add1) ::
+      (GreaterThan(add2, Literal(4)), add1) ::
+      (GreaterThan(add2, Literal(5)), add1) :: Nil
+
+    val caseWhenExpr2 = CaseWhen(conditions2, None)
+    val equivalence2 = new EquivalentExpressions
+    equivalence2.addExprTree(caseWhenExpr2)
+
+    // `add1` is repeatedly in all branch values, and first predicate.
+    assert(equivalence2.getAllEquivalentExprs.count(_.size == 2) == 1)
+    assert(equivalence2.getAllEquivalentExprs.filter(_.size == 2).head == Seq(add1, add1))
+
+    // Negative case. `add1` or `add2` is not commonly used in all predicates/branch values.
+    val conditions3 = (GreaterThan(add1, Literal(3)), add2) ::
+      (GreaterThan(add2, Literal(4)), add1) ::
+      (GreaterThan(add2, Literal(5)), add1) :: Nil
+
+    val caseWhenExpr3 = CaseWhen(conditions3, None)
+    val equivalence3 = new EquivalentExpressions
+    equivalence3.addExprTree(caseWhenExpr3)
+    assert(equivalence3.getAllEquivalentExprs.count(_.size == 2) == 0)
+  }
+
+  test("Children of conditional expressions: Coalesce") {
+    val add1 = Add(Literal(1), Literal(2))
+    val add2 = Add(Literal(2), Literal(3))
+    val conditions1 = GreaterThan(add2, Literal(3)) ::
+      GreaterThan(add2, Literal(4)) ::
+      GreaterThan(add2, Literal(5)) :: Nil
+
+    val coalesceExpr1 = Coalesce(conditions1)
+    val equivalence1 = new EquivalentExpressions
+    equivalence1.addExprTree(coalesceExpr1)
+
+    // `add2` is repeatedly in all conditions.
+    assert(equivalence1.getAllEquivalentExprs.count(_.size == 2) == 1)
+    assert(equivalence1.getAllEquivalentExprs.filter(_.size == 2).head == Seq(add2, add2))
+
+    // Negative case. `add1` and `add2` both are not used in all branches.
+    val conditions2 = GreaterThan(add1, Literal(3)) ::
+      GreaterThan(add2, Literal(4)) ::
+      GreaterThan(add2, Literal(5)) :: Nil
+
+    val coalesceExpr2 = Coalesce(conditions2)
+    val equivalence2 = new EquivalentExpressions
+    equivalence2.addExprTree(coalesceExpr2)
+
+    assert(equivalence2.getAllEquivalentExprs.count(_.size == 2) == 0)
   }
 }
 

From 46346943bb6c312dc87ac3fcdfd1dbeac68c53b5 Mon Sep 17 00:00:00 2001
From: Utkarsh <utkarsh.agarwal@databricks.com>
Date: Wed, 11 Nov 2020 09:28:59 +0900
Subject: [PATCH 0439/1009] [SPARK-33404][SQL] Fix incorrect results in
 `date_trunc` expression

### What changes were proposed in this pull request?
The following query produces incorrect results:
```
SELECT date_trunc('minute', '1769-10-17 17:10:02')
```
Spark currently incorrectly returns
```
1769-10-17 17:10:02
```
against the expected return value of
```
1769-10-17 17:10:00
```
**Steps to repro**
Run the following commands in spark-shell:
```
spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
spark.sql("SELECT date_trunc('minute', '1769-10-17 17:10:02')").show()
```
This happens as `truncTimestamp` in package `org.apache.spark.sql.catalyst.util.DateTimeUtils` incorrectly assumes that time zone offsets can never have the granularity of a second and thus does not account for time zone adjustment when truncating the given timestamp to `minute`.
This assumption is currently used when truncating the timestamps to `microsecond, millisecond, second, or minute`.

This PR fixes this issue and always uses time zone knowledge when truncating timestamps regardless of the truncation unit.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Added new tests to `DateTimeUtilsSuite` which previously failed and pass now.

Closes #30303 from utkarsh39/trunc-timestamp-fix.

Authored-by: Utkarsh <utkarsh.agarwal@databricks.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../sql/catalyst/util/DateTimeUtils.scala     |  6 ++--
 .../catalyst/util/DateTimeUtilsSuite.scala    | 34 +++++++++++++------
 2 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index ff6b106d93d1d..3b974759bd6c0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -736,14 +736,16 @@ object DateTimeUtils {
    * Trunc level should be generated using `parseTruncLevel()`, should be between 0 and 9.
    */
   def truncTimestamp(micros: Long, level: Int, zoneId: ZoneId): Long = {
+    // Time zone offsets have a maximum precision of seconds (see `java.time.ZoneOffset`). Hence
+    // truncation to microsecond, millisecond, and second can be done
+    // without using time zone information. This results in a performance improvement.
     level match {
       case TRUNC_TO_MICROSECOND => micros
       case TRUNC_TO_MILLISECOND =>
         micros - Math.floorMod(micros, MICROS_PER_MILLIS)
       case TRUNC_TO_SECOND =>
         micros - Math.floorMod(micros, MICROS_PER_SECOND)
-      case TRUNC_TO_MINUTE =>
-        micros - Math.floorMod(micros, MICROS_PER_MINUTE)
+      case TRUNC_TO_MINUTE => truncToUnit(micros, zoneId, ChronoUnit.MINUTES)
       case TRUNC_TO_HOUR => truncToUnit(micros, zoneId, ChronoUnit.HOURS)
       case TRUNC_TO_DAY => truncToUnit(micros, zoneId, ChronoUnit.DAYS)
       case _ => // Try to truncate date levels
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 7bbdf44d78c3c..3d841f32379ff 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -518,18 +518,32 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
     assert(time == None)
   }
 
-  test("truncTimestamp") {
-    def testTrunc(
-        level: Int,
-        expected: String,
-        inputTS: Long,
-        zoneId: ZoneId = defaultZoneId): Unit = {
-      val truncated =
-        DateTimeUtils.truncTimestamp(inputTS, level, zoneId)
-      val expectedTS = toTimestamp(expected, defaultZoneId)
-      assert(truncated === expectedTS.get)
+  def testTrunc(
+      level: Int,
+      expected: String,
+      inputTS: Long,
+      zoneId: ZoneId = defaultZoneId): Unit = {
+    val truncated = DateTimeUtils.truncTimestamp(inputTS, level, zoneId)
+    val expectedTS = toTimestamp(expected, defaultZoneId)
+    assert(truncated === expectedTS.get)
+  }
+
+  test("SPARK-33404: test truncTimestamp when time zone offset from UTC has a " +
+    "granularity of seconds") {
+    for (zid <- ALL_TIMEZONES) {
+      withDefaultTimeZone(zid) {
+        val inputTS = DateTimeUtils.stringToTimestamp(
+          UTF8String.fromString("1769-10-17T17:10:02.123456"), defaultZoneId)
+        testTrunc(DateTimeUtils.TRUNC_TO_MINUTE, "1769-10-17T17:10:00", inputTS.get, zid)
+        testTrunc(DateTimeUtils.TRUNC_TO_SECOND, "1769-10-17T17:10:02", inputTS.get, zid)
+        testTrunc(DateTimeUtils.TRUNC_TO_MILLISECOND, "1769-10-17T17:10:02.123", inputTS.get, zid)
+        testTrunc(DateTimeUtils.TRUNC_TO_MICROSECOND, "1769-10-17T17:10:02.123456",
+          inputTS.get, zid)
+      }
     }
+  }
 
+  test("truncTimestamp") {
     val defaultInputTS = DateTimeUtils.stringToTimestamp(
       UTF8String.fromString("2015-03-05T09:32:05.359123"), defaultZoneId)
     val defaultInputTS1 = DateTimeUtils.stringToTimestamp(

From 5197c5d2e7648d75def3e159e0d2aa3e20117105 Mon Sep 17 00:00:00 2001
From: ulysses <youxiduo@weidian.com>
Date: Wed, 11 Nov 2020 11:39:11 +0900
Subject: [PATCH 0440/1009] [SPARK-33390][SQL] Make Literal support char array

### What changes were proposed in this pull request?

Make Literal support char array.

### Why are the changes needed?

We always use `Literal()` to create foldable value, and `char[]` is a usual data type. We can make it easy that support create String Literal with `char[]`.

### Does this PR introduce _any_ user-facing change?

Yes, user can call `Literal()` with `char[]`.

### How was this patch tested?

Add test.

Closes #30295 from ulysses-you/SPARK-33390.

Authored-by: ulysses <youxiduo@weidian.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../spark/sql/catalyst/CatalystTypeConverters.scala      | 1 +
 .../apache/spark/sql/catalyst/expressions/literals.scala | 4 ++++
 .../spark/sql/catalyst/CatalystTypeConvertersSuite.scala | 7 +++++++
 .../catalyst/expressions/LiteralExpressionSuite.scala    | 9 +++++++++
 .../test/scala/org/apache/spark/sql/DatasetSuite.scala   | 8 ++++++++
 5 files changed, 29 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
index aab944c680149..971d61518c026 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
@@ -291,6 +291,7 @@ object CatalystTypeConverters {
       case str: String => UTF8String.fromString(str)
       case utf8: UTF8String => utf8
       case chr: Char => UTF8String.fromString(chr.toString)
+      case ac: Array[Char] => UTF8String.fromString(String.valueOf(ac))
       case other => throw new IllegalArgumentException(
         s"The value (${other.toString}) of the type (${other.getClass.getCanonicalName}) "
           + s"cannot be converted to the string type")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index 413d0af61a05c..1e69814673082 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.lang.{Boolean => JavaBoolean}
 import java.lang.{Byte => JavaByte}
+import java.lang.{Character => JavaChar}
 import java.lang.{Double => JavaDouble}
 import java.lang.{Float => JavaFloat}
 import java.lang.{Integer => JavaInteger}
@@ -62,6 +63,7 @@ object Literal {
     case s: Short => Literal(s, ShortType)
     case s: String => Literal(UTF8String.fromString(s), StringType)
     case c: Char => Literal(UTF8String.fromString(c.toString), StringType)
+    case ac: Array[Char] => Literal(UTF8String.fromString(String.valueOf(ac)), StringType)
     case b: Boolean => Literal(b, BooleanType)
     case d: BigDecimal =>
       val decimal = Decimal(d)
@@ -102,6 +104,7 @@ object Literal {
     case JavaByte.TYPE => ByteType
     case JavaFloat.TYPE => FloatType
     case JavaBoolean.TYPE => BooleanType
+    case JavaChar.TYPE => StringType
 
     // java classes
     case _ if clz == classOf[LocalDate] => DateType
@@ -110,6 +113,7 @@ object Literal {
     case _ if clz == classOf[Timestamp] => TimestampType
     case _ if clz == classOf[JavaBigDecimal] => DecimalType.SYSTEM_DEFAULT
     case _ if clz == classOf[Array[Byte]] => BinaryType
+    case _ if clz == classOf[Array[Char]] => StringType
     case _ if clz == classOf[JavaShort] => ShortType
     case _ if clz == classOf[JavaInteger] => IntegerType
     case _ if clz == classOf[JavaLong] => LongType
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
index b9e7cf3049896..f4b08330e4c79 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
@@ -152,6 +152,13 @@ class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
     assert(converter(chr) === expected)
   }
 
+  test("SPARK-33390: Make Literal support char array") {
+    val ac = Array('a', 'c')
+    val converter = CatalystTypeConverters.createToCatalystConverter(StringType)
+    val expected = UTF8String.fromString(String.valueOf(ac))
+    assert(converter(ac) === expected)
+  }
+
   test("converting java.time.Instant to TimestampType") {
     Seq(
       "0101-02-16T10:11:32Z",
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
index bb86135021b91..7a482641def3d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
@@ -239,6 +239,15 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(Literal.create('\n'), "\n")
   }
 
+  test("SPARK-33390: Make Literal support char array") {
+    checkEvaluation(Literal(Array('h', 'e', 'l', 'l', 'o')), "hello")
+    checkEvaluation(Literal(Array("hello".toCharArray)), Array("hello"))
+    // scalastyle:off
+    checkEvaluation(Literal(Array('测','试')), "测试")
+    checkEvaluation(Literal(Array('a', '测', 'b', '试', 'c')), "a测b试c")
+    // scalastyle:on
+  }
+
   test("construct literals from java.time.LocalDate") {
     Seq(
       LocalDate.of(1, 1, 1),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 3c914ae043677..6a1378837ea9b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -1939,6 +1939,14 @@ class DatasetSuite extends QueryTest
       Seq(FooClassWithEnum(1, null), FooClassWithEnum(2, FooEnum.E2)): _*
     )
   }
+
+  test("SPARK-33390: Make Literal support char array") {
+    val df = Seq("aa", "bb", "cc", "abc").toDF("zoo")
+    checkAnswer(df.where($"zoo" === Array('a', 'a')), Seq(Row("aa")))
+    checkAnswer(
+      df.where($"zoo".contains(Array('a', 'b'))),
+      Seq(Row("abc")))
+  }
 }
 
 object AssertExecutionId {

From 1e2eeda20e062a77dfd8f944abeaeeb609817ae3 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Wed, 11 Nov 2020 05:26:46 +0000
Subject: [PATCH 0441/1009] [SPARK-33382][SQL][TESTS] Unify datasource v1 and
 v2 SHOW TABLES tests

### What changes were proposed in this pull request?
In the PR, I propose to gather common `SHOW TABLES` tests into one trait `org.apache.spark.sql.execution.command.ShowTablesSuite`, and put datasource specific tests to the `v1.ShowTablesSuite` and `v2.ShowTablesSuite`. Also tests for parsing `SHOW TABLES` are extracted to `ShowTablesParserSuite`.

### Why are the changes needed?
- The unification will allow to run common `SHOW TABLES` tests for both DSv1 and DSv2
- We can detect missing features and differences between DSv1 and DSv2 implementations.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running new test suites:
- `org.apache.spark.sql.execution.command.v1.ShowTablesSuite`
- `org.apache.spark.sql.execution.command.v2.ShowTablesSuite`
- `ShowTablesParserSuite`

Closes #30287 from MaxGekk/unify-dsv1_v2-tests.

Lead-authored-by: Max Gekk <max.gekk@gmail.com>
Co-authored-by: Maxim Gekk <max.gekk@gmail.com>
Co-authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/parser/DDLParserSuite.scala  |  49 ------
 .../sql/connector/DataSourceV2SQLSuite.scala  | 150 +-----------------
 .../command/ShowTablesParserSuite.scala       |  76 +++++++++
 .../execution/command/ShowTablesSuite.scala   | 122 ++++++++++++++
 .../command/v1/ShowTablesSuite.scala          |  95 +++++++++++
 .../command/v2/ShowTablesSuite.scala          | 115 ++++++++++++++
 6 files changed, 409 insertions(+), 198 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesParserSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 085aaf148c8cd..7dac8ffd8475d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -1233,55 +1233,6 @@ class DDLParserSuite extends AnalysisTest {
     assert(exc.getMessage.contains("There must be at least one WHEN clause in a MERGE statement"))
   }
 
-  test("show tables") {
-    comparePlans(
-      parsePlan("SHOW TABLES"),
-      ShowTables(UnresolvedNamespace(Seq.empty[String]), None))
-    comparePlans(
-      parsePlan("SHOW TABLES '*test*'"),
-      ShowTables(UnresolvedNamespace(Seq.empty[String]), Some("*test*")))
-    comparePlans(
-      parsePlan("SHOW TABLES LIKE '*test*'"),
-      ShowTables(UnresolvedNamespace(Seq.empty[String]), Some("*test*")))
-    comparePlans(
-      parsePlan("SHOW TABLES FROM testcat.ns1.ns2.tbl"),
-      ShowTables(UnresolvedNamespace(Seq("testcat", "ns1", "ns2", "tbl")), None))
-    comparePlans(
-      parsePlan("SHOW TABLES IN testcat.ns1.ns2.tbl"),
-      ShowTables(UnresolvedNamespace(Seq("testcat", "ns1", "ns2", "tbl")), None))
-    comparePlans(
-      parsePlan("SHOW TABLES IN ns1 '*test*'"),
-      ShowTables(UnresolvedNamespace(Seq("ns1")), Some("*test*")))
-    comparePlans(
-      parsePlan("SHOW TABLES IN ns1 LIKE '*test*'"),
-      ShowTables(UnresolvedNamespace(Seq("ns1")), Some("*test*")))
-  }
-
-  test("show table extended") {
-    comparePlans(
-      parsePlan("SHOW TABLE EXTENDED LIKE '*test*'"),
-      ShowTableStatement(None, "*test*", None))
-    comparePlans(
-      parsePlan("SHOW TABLE EXTENDED FROM testcat.ns1.ns2 LIKE '*test*'"),
-      ShowTableStatement(Some(Seq("testcat", "ns1", "ns2")), "*test*", None))
-    comparePlans(
-      parsePlan("SHOW TABLE EXTENDED IN testcat.ns1.ns2 LIKE '*test*'"),
-      ShowTableStatement(Some(Seq("testcat", "ns1", "ns2")), "*test*", None))
-    comparePlans(
-      parsePlan("SHOW TABLE EXTENDED LIKE '*test*' PARTITION(ds='2008-04-09', hr=11)"),
-      ShowTableStatement(None, "*test*", Some(Map("ds" -> "2008-04-09", "hr" -> "11"))))
-    comparePlans(
-      parsePlan("SHOW TABLE EXTENDED FROM testcat.ns1.ns2 LIKE '*test*' " +
-        "PARTITION(ds='2008-04-09')"),
-      ShowTableStatement(Some(Seq("testcat", "ns1", "ns2")), "*test*",
-        Some(Map("ds" -> "2008-04-09"))))
-    comparePlans(
-      parsePlan("SHOW TABLE EXTENDED IN testcat.ns1.ns2 LIKE '*test*' " +
-        "PARTITION(ds='2008-04-09')"),
-      ShowTableStatement(Some(Seq("testcat", "ns1", "ns2")), "*test*",
-        Some(Map("ds" -> "2008-04-09"))))
-  }
-
   test("show views") {
     comparePlans(
       parsePlan("SHOW VIEWS"),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index dfa32b9ac802e..6f888e527eeab 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -925,71 +925,9 @@ class DataSourceV2SQLSuite
     }
   }
 
-  test("ShowTables: using v2 catalog") {
-    spark.sql("CREATE TABLE testcat.db.table_name (id bigint, data string) USING foo")
-    spark.sql("CREATE TABLE testcat.n1.n2.db.table_name (id bigint, data string) USING foo")
-
-    runShowTablesSql("SHOW TABLES FROM testcat.db", Seq(Row("db", "table_name")))
-
-    runShowTablesSql(
-      "SHOW TABLES FROM testcat.n1.n2.db",
-      Seq(Row("n1.n2.db", "table_name")))
-  }
-
-  test("ShowTables: using v2 catalog with a pattern") {
-    spark.sql("CREATE TABLE testcat.db.table (id bigint, data string) USING foo")
-    spark.sql("CREATE TABLE testcat.db.table_name_1 (id bigint, data string) USING foo")
-    spark.sql("CREATE TABLE testcat.db.table_name_2 (id bigint, data string) USING foo")
-    spark.sql("CREATE TABLE testcat.db2.table_name_2 (id bigint, data string) USING foo")
-
-    runShowTablesSql(
-      "SHOW TABLES FROM testcat.db",
-      Seq(
-        Row("db", "table"),
-        Row("db", "table_name_1"),
-        Row("db", "table_name_2")))
-
-    runShowTablesSql(
-      "SHOW TABLES FROM testcat.db LIKE '*name*'",
-      Seq(Row("db", "table_name_1"), Row("db", "table_name_2")))
-
-    runShowTablesSql(
-      "SHOW TABLES FROM testcat.db LIKE '*2'",
-      Seq(Row("db", "table_name_2")))
-  }
-
-  test("ShowTables: using v2 catalog, namespace doesn't exist") {
-    runShowTablesSql("SHOW TABLES FROM testcat.unknown", Seq())
-  }
-
-  test("ShowTables: using v1 catalog") {
-    runShowTablesSql(
-      "SHOW TABLES FROM default",
-      Seq(Row("", "source", true), Row("", "source2", true)),
-      expectV2Catalog = false)
-  }
-
-  test("ShowTables: using v1 catalog, db doesn't exist ") {
-    // 'db' below resolves to a database name for v1 catalog because there is no catalog named
-    // 'db' and there is no default catalog set.
-    val exception = intercept[NoSuchDatabaseException] {
-      runShowTablesSql("SHOW TABLES FROM db", Seq(), expectV2Catalog = false)
-    }
-
-    assert(exception.getMessage.contains("Database 'db' not found"))
-  }
-
-  test("ShowTables: using v1 catalog, db name with multipartIdentifier ('a.b') is not allowed.") {
-    val exception = intercept[AnalysisException] {
-      runShowTablesSql("SHOW TABLES FROM a.b", Seq(), expectV2Catalog = false)
-    }
-
-    assert(exception.getMessage.contains("The database name is not valid: a.b"))
-  }
-
   test("ShowViews: using v1 catalog, db name with multipartIdentifier ('a.b') is not allowed.") {
     val exception = intercept[AnalysisException] {
-      sql("SHOW TABLES FROM a.b")
+      sql("SHOW VIEWS FROM a.b")
     }
 
     assert(exception.getMessage.contains("The database name is not valid: a.b"))
@@ -1004,48 +942,6 @@ class DataSourceV2SQLSuite
       " only SessionCatalog supports this command."))
   }
 
-  test("ShowTables: using v2 catalog with empty namespace") {
-    spark.sql("CREATE TABLE testcat.table (id bigint, data string) USING foo")
-    runShowTablesSql("SHOW TABLES FROM testcat", Seq(Row("", "table")))
-  }
-
-  test("ShowTables: namespace is not specified and default v2 catalog is set") {
-    spark.conf.set(SQLConf.DEFAULT_CATALOG.key, "testcat")
-    spark.sql("CREATE TABLE testcat.table (id bigint, data string) USING foo")
-
-    // v2 catalog is used where default namespace is empty for TestInMemoryTableCatalog.
-    runShowTablesSql("SHOW TABLES", Seq(Row("", "table")))
-  }
-
-  test("ShowTables: namespace not specified and default v2 catalog not set - fallback to v1") {
-    runShowTablesSql(
-      "SHOW TABLES",
-      Seq(Row("", "source", true), Row("", "source2", true)),
-      expectV2Catalog = false)
-
-    runShowTablesSql(
-      "SHOW TABLES LIKE '*2'",
-      Seq(Row("", "source2", true)),
-      expectV2Catalog = false)
-  }
-
-  test("ShowTables: change current catalog and namespace with USE statements") {
-    sql("CREATE TABLE testcat.ns1.ns2.table (id bigint) USING foo")
-
-    // Initially, the v2 session catalog (current catalog) is used.
-    runShowTablesSql(
-      "SHOW TABLES", Seq(Row("", "source", true), Row("", "source2", true)),
-      expectV2Catalog = false)
-
-    // Update the current catalog, and no table is matched since the current namespace is Array().
-    sql("USE testcat")
-    runShowTablesSql("SHOW TABLES", Seq())
-
-    // Update the current namespace to match ns1.ns2.table.
-    sql("USE testcat.ns1.ns2")
-    runShowTablesSql("SHOW TABLES", Seq(Row("ns1.ns2", "table")))
-  }
-
   private def runShowTablesSql(
       sqlText: String,
       expected: Seq[Row],
@@ -1066,50 +962,6 @@ class DataSourceV2SQLSuite
     assert(expected === df.collect())
   }
 
-  test("SHOW TABLE EXTENDED not valid v1 database") {
-    def testV1CommandNamespace(sqlCommand: String, namespace: String): Unit = {
-      val e = intercept[AnalysisException] {
-        sql(sqlCommand)
-      }
-      assert(e.message.contains(s"The database name is not valid: ${namespace}"))
-    }
-
-    val namespace = "testcat.ns1.ns2"
-    val table = "tbl"
-    withTable(s"$namespace.$table") {
-      sql(s"CREATE TABLE $namespace.$table (id bigint, data string) " +
-        s"USING foo PARTITIONED BY (id)")
-
-      testV1CommandNamespace(s"SHOW TABLE EXTENDED FROM $namespace LIKE 'tb*'",
-        namespace)
-      testV1CommandNamespace(s"SHOW TABLE EXTENDED IN $namespace LIKE 'tb*'",
-        namespace)
-      testV1CommandNamespace("SHOW TABLE EXTENDED " +
-        s"FROM $namespace LIKE 'tb*' PARTITION(id=1)",
-        namespace)
-      testV1CommandNamespace("SHOW TABLE EXTENDED " +
-        s"IN $namespace LIKE 'tb*' PARTITION(id=1)",
-        namespace)
-    }
-  }
-
-  test("SHOW TABLE EXTENDED valid v1") {
-    val expected = Seq(Row("", "source", true), Row("", "source2", true))
-    val schema = new StructType()
-      .add("database", StringType, nullable = false)
-      .add("tableName", StringType, nullable = false)
-      .add("isTemporary", BooleanType, nullable = false)
-      .add("information", StringType, nullable = false)
-
-    val df = sql("SHOW TABLE EXTENDED FROM default LIKE '*source*'")
-    val result = df.collect()
-    val resultWithoutInfo = result.map{ case Row(db, table, temp, _) => Row(db, table, temp)}
-
-    assert(df.schema === schema)
-    assert(resultWithoutInfo === expected)
-    result.foreach{ case Row(_, _, _, info: String) => assert(info.nonEmpty)}
-  }
-
   test("CreateNameSpace: basic tests") {
     // Session catalog is used.
     withNamespace("ns") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesParserSuite.scala
new file mode 100644
index 0000000000000..16f3dea8d75ef
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesParserSuite.scala
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedNamespace}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
+import org.apache.spark.sql.catalyst.plans.logical.{ShowTables, ShowTableStatement}
+import org.apache.spark.sql.test.SharedSparkSession
+
+class ShowTablesParserSuite extends AnalysisTest with SharedSparkSession {
+  private val catalog = "test_catalog"
+
+  test("show tables") {
+    comparePlans(
+      parsePlan("SHOW TABLES"),
+      ShowTables(UnresolvedNamespace(Seq.empty[String]), None))
+    comparePlans(
+      parsePlan("SHOW TABLES '*test*'"),
+      ShowTables(UnresolvedNamespace(Seq.empty[String]), Some("*test*")))
+    comparePlans(
+      parsePlan("SHOW TABLES LIKE '*test*'"),
+      ShowTables(UnresolvedNamespace(Seq.empty[String]), Some("*test*")))
+    comparePlans(
+      parsePlan(s"SHOW TABLES FROM $catalog.ns1.ns2.tbl"),
+      ShowTables(UnresolvedNamespace(Seq(catalog, "ns1", "ns2", "tbl")), None))
+    comparePlans(
+      parsePlan(s"SHOW TABLES IN $catalog.ns1.ns2.tbl"),
+      ShowTables(UnresolvedNamespace(Seq(catalog, "ns1", "ns2", "tbl")), None))
+    comparePlans(
+      parsePlan("SHOW TABLES IN ns1 '*test*'"),
+      ShowTables(UnresolvedNamespace(Seq("ns1")), Some("*test*")))
+    comparePlans(
+      parsePlan("SHOW TABLES IN ns1 LIKE '*test*'"),
+      ShowTables(UnresolvedNamespace(Seq("ns1")), Some("*test*")))
+  }
+
+  test("show table extended") {
+    comparePlans(
+      parsePlan("SHOW TABLE EXTENDED LIKE '*test*'"),
+      ShowTableStatement(None, "*test*", None))
+    comparePlans(
+      parsePlan(s"SHOW TABLE EXTENDED FROM $catalog.ns1.ns2 LIKE '*test*'"),
+      ShowTableStatement(Some(Seq(catalog, "ns1", "ns2")), "*test*", None))
+    comparePlans(
+      parsePlan(s"SHOW TABLE EXTENDED IN $catalog.ns1.ns2 LIKE '*test*'"),
+      ShowTableStatement(Some(Seq(catalog, "ns1", "ns2")), "*test*", None))
+    comparePlans(
+      parsePlan("SHOW TABLE EXTENDED LIKE '*test*' PARTITION(ds='2008-04-09', hr=11)"),
+      ShowTableStatement(None, "*test*", Some(Map("ds" -> "2008-04-09", "hr" -> "11"))))
+    comparePlans(
+      parsePlan(s"SHOW TABLE EXTENDED FROM $catalog.ns1.ns2 LIKE '*test*' " +
+        "PARTITION(ds='2008-04-09')"),
+      ShowTableStatement(Some(Seq(catalog, "ns1", "ns2")), "*test*",
+        Some(Map("ds" -> "2008-04-09"))))
+    comparePlans(
+      parsePlan(s"SHOW TABLE EXTENDED IN $catalog.ns1.ns2 LIKE '*test*' " +
+        "PARTITION(ds='2008-04-09')"),
+      ShowTableStatement(Some(Seq(catalog, "ns1", "ns2")), "*test*",
+        Some(Map("ds" -> "2008-04-09"))))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuite.scala
new file mode 100644
index 0000000000000..01720b5723243
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuite.scala
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.scalactic.source.Position
+import org.scalatest.Tag
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.StructType
+
+trait ShowTablesSuite extends SharedSparkSession {
+  protected def version: String
+  protected def catalog: String
+  protected def defaultNamespace: Seq[String]
+  protected def defaultUsing: String
+  case class ShowRow(namespace: String, table: String, isTemporary: Boolean)
+  protected def getRows(showRows: Seq[ShowRow]): Seq[Row]
+  // Gets the schema of `SHOW TABLES`
+  protected def showSchema: StructType
+
+  protected def runShowTablesSql(sqlText: String, expected: Seq[ShowRow]): Unit = {
+    val df = spark.sql(sqlText)
+    assert(df.schema === showSchema)
+    assert(df.collect() === getRows(expected))
+  }
+
+  override def test(testName: String, testTags: Tag*)(testFun: => Any)
+      (implicit pos: Position): Unit = {
+    super.test(s"SHOW TABLES $version: " + testName, testTags: _*)(testFun)
+  }
+
+  test("show an existing table") {
+    withNamespace(s"$catalog.ns") {
+      sql(s"CREATE NAMESPACE $catalog.ns")
+      withTable(s"$catalog.ns.table") {
+        sql(s"CREATE TABLE $catalog.ns.table (name STRING, id INT) $defaultUsing")
+        runShowTablesSql(s"SHOW TABLES IN $catalog.ns", Seq(ShowRow("ns", "table", false)))
+      }
+    }
+  }
+
+  test("show tables with a pattern") {
+    withNamespace(s"$catalog.ns1", s"$catalog.ns2") {
+      sql(s"CREATE NAMESPACE $catalog.ns1")
+      sql(s"CREATE NAMESPACE $catalog.ns2")
+      withTable(
+        s"$catalog.ns1.table",
+        s"$catalog.ns1.table_name_1",
+        s"$catalog.ns1.table_name_2",
+        s"$catalog.ns2.table_name_2") {
+        sql(s"CREATE TABLE $catalog.ns1.table (id bigint, data string) $defaultUsing")
+        sql(s"CREATE TABLE $catalog.ns1.table_name_1 (id bigint, data string) $defaultUsing")
+        sql(s"CREATE TABLE $catalog.ns1.table_name_2 (id bigint, data string) $defaultUsing")
+        sql(s"CREATE TABLE $catalog.ns2.table_name_2 (id bigint, data string) $defaultUsing")
+
+        runShowTablesSql(
+          s"SHOW TABLES FROM $catalog.ns1",
+          Seq(
+            ShowRow("ns1", "table", false),
+            ShowRow("ns1", "table_name_1", false),
+            ShowRow("ns1", "table_name_2", false)))
+
+        runShowTablesSql(
+          s"SHOW TABLES FROM $catalog.ns1 LIKE '*name*'",
+          Seq(
+            ShowRow("ns1", "table_name_1", false),
+            ShowRow("ns1", "table_name_2", false)))
+
+        runShowTablesSql(
+          s"SHOW TABLES FROM $catalog.ns1 LIKE '*2'",
+          Seq(ShowRow("ns1", "table_name_2", false)))
+      }
+    }
+  }
+
+  test("show tables with current catalog and namespace") {
+    withSQLConf(SQLConf.DEFAULT_CATALOG.key -> catalog) {
+      val tblName = (catalog +: defaultNamespace :+ "table").quoted
+      withTable(tblName) {
+        sql(s"CREATE TABLE $tblName (name STRING, id INT) $defaultUsing")
+        val ns = defaultNamespace.mkString(".")
+        runShowTablesSql("SHOW TABLES", Seq(ShowRow(ns, "table", false)))
+      }
+    }
+  }
+
+  test("change current catalog and namespace with USE statements") {
+    withNamespace(s"$catalog.ns") {
+      sql(s"CREATE NAMESPACE $catalog.ns")
+      withTable(s"$catalog.ns.table") {
+        sql(s"CREATE TABLE $catalog.ns.table (name STRING, id INT) $defaultUsing")
+
+        sql(s"USE $catalog")
+        // No table is matched since the current namespace is not ["ns"]
+        assert(defaultNamespace != Seq("ns"))
+        runShowTablesSql("SHOW TABLES", Seq())
+
+        // Update the current namespace to match "ns.tbl".
+        sql(s"USE $catalog.ns")
+        runShowTablesSql("SHOW TABLES", Seq(ShowRow("ns", "table", false)))
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
new file mode 100644
index 0000000000000..feb3bc623f3fa
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException
+import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.execution.command.{ShowTablesSuite => CommonShowTablesSuite}
+import org.apache.spark.sql.types.{BooleanType, StringType, StructType}
+
+class ShowTablesSuite extends CommonShowTablesSuite {
+  override def version: String = "V1"
+  override def catalog: String = CatalogManager.SESSION_CATALOG_NAME
+  override def defaultNamespace: Seq[String] = Seq("default")
+  override def defaultUsing: String = "USING parquet"
+  override def showSchema: StructType = {
+    new StructType()
+      .add("database", StringType, nullable = false)
+      .add("tableName", StringType, nullable = false)
+      .add("isTemporary", BooleanType, nullable = false)
+  }
+  override def getRows(showRows: Seq[ShowRow]): Seq[Row] = {
+    showRows.map {
+      case ShowRow(namespace, table, isTemporary) => Row(namespace, table, isTemporary)
+    }
+  }
+
+  private def withSourceViews(f: => Unit): Unit = {
+    withTable("source", "source2") {
+      val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data")
+      df.createOrReplaceTempView("source")
+      val df2 = spark.createDataFrame(Seq((4L, "d"), (5L, "e"), (6L, "f"))).toDF("id", "data")
+      df2.createOrReplaceTempView("source2")
+      f
+    }
+  }
+
+  // `SHOW TABLES` returns empty result in V2 catalog instead of throwing the exception.
+  test("show table in a not existing namespace") {
+    val msg = intercept[NoSuchDatabaseException] {
+      runShowTablesSql(s"SHOW TABLES IN $catalog.unknown", Seq())
+    }.getMessage
+    assert(msg.contains("Database 'unknown' not found"))
+  }
+
+  // `SHOW TABLES` from v2 catalog returns empty result.
+  test("v1 SHOW TABLES list the temp views") {
+    withSourceViews {
+      runShowTablesSql(
+        "SHOW TABLES FROM default",
+        Seq(ShowRow("", "source", true), ShowRow("", "source2", true)))
+    }
+  }
+
+  test("v1 SHOW TABLES only support single-level namespace") {
+    val exception = intercept[AnalysisException] {
+      runShowTablesSql("SHOW TABLES FROM a.b", Seq())
+    }
+    assert(exception.getMessage.contains("The database name is not valid: a.b"))
+  }
+
+  test("SHOW TABLE EXTENDED from default") {
+    withSourceViews {
+      val expected = Seq(Row("", "source", true), Row("", "source2", true))
+      val schema = new StructType()
+        .add("database", StringType, nullable = false)
+        .add("tableName", StringType, nullable = false)
+        .add("isTemporary", BooleanType, nullable = false)
+        .add("information", StringType, nullable = false)
+
+      val df = sql("SHOW TABLE EXTENDED FROM default LIKE '*source*'")
+      val result = df.collect()
+      val resultWithoutInfo = result.map { case Row(db, table, temp, _) => Row(db, table, temp) }
+
+      assert(df.schema === schema)
+      assert(resultWithoutInfo === expected)
+      result.foreach { case Row(_, _, _, info: String) => assert(info.nonEmpty) }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala
new file mode 100644
index 0000000000000..668120ae1cada
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException
+import org.apache.spark.sql.connector.InMemoryTableCatalog
+import org.apache.spark.sql.execution.command.{ShowTablesSuite => CommonShowTablesSuite}
+import org.apache.spark.sql.types.{StringType, StructType}
+
+class ShowTablesSuite extends CommonShowTablesSuite {
+  override def version: String = "V2"
+  override def catalog: String = "test_catalog"
+  override def defaultNamespace: Seq[String] = Nil
+  override def defaultUsing: String = "USING _"
+  override def showSchema: StructType = {
+    new StructType()
+      .add("namespace", StringType, nullable = false)
+      .add("tableName", StringType, nullable = false)
+  }
+  override def getRows(showRows: Seq[ShowRow]): Seq[Row] = {
+    showRows.map {
+      case ShowRow(namespace, table, _) => Row(namespace, table)
+    }
+  }
+
+  override def sparkConf: SparkConf = super.sparkConf
+    .set(s"spark.sql.catalog.$catalog", classOf[InMemoryTableCatalog].getName)
+
+  // The test fails with the exception `NoSuchDatabaseException` in V1 catalog.
+  // TODO(SPARK-33394): Throw `NoSuchDatabaseException` for not existing namespace
+  test("show table in a not existing namespace") {
+    runShowTablesSql(s"SHOW TABLES IN $catalog.unknown", Seq())
+  }
+
+  // The test fails for V1 catalog with the error:
+  // org.apache.spark.sql.AnalysisException:
+  //   The namespace in session catalog must have exactly one name part: spark_catalog.n1.n2.db
+  test("show tables in nested namespaces") {
+    withTable(s"$catalog.n1.n2.db") {
+      spark.sql(s"CREATE TABLE $catalog.n1.n2.db.table_name (id bigint, data string) $defaultUsing")
+      runShowTablesSql(
+        s"SHOW TABLES FROM $catalog.n1.n2.db",
+        Seq(ShowRow("n1.n2.db", "table_name", false)))
+    }
+  }
+
+  // The test fails for V1 catalog with the error:
+  // org.apache.spark.sql.AnalysisException:
+  //   The namespace in session catalog must have exactly one name part: spark_catalog.table
+  test("using v2 catalog with empty namespace") {
+    withTable(s"$catalog.table") {
+      spark.sql(s"CREATE TABLE $catalog.table (id bigint, data string) $defaultUsing")
+      runShowTablesSql(s"SHOW TABLES FROM $catalog", Seq(ShowRow("", "table", false)))
+    }
+  }
+
+  // The test fails for V1 catalog with the error:
+  // org.apache.spark.sql.AnalysisException:
+  //   The namespace in session catalog must have exactly one name part: spark_catalog.ns1.ns2.tbl
+  test("SHOW TABLE EXTENDED not valid v1 database") {
+    def testV1CommandNamespace(sqlCommand: String, namespace: String): Unit = {
+      val e = intercept[AnalysisException] {
+        sql(sqlCommand)
+      }
+      assert(e.message.contains(s"The database name is not valid: ${namespace}"))
+    }
+
+    val namespace = s"$catalog.ns1.ns2"
+    val table = "tbl"
+    withTable(s"$namespace.$table") {
+      sql(s"CREATE TABLE $namespace.$table (id bigint, data string) " +
+        s"$defaultUsing PARTITIONED BY (id)")
+
+      testV1CommandNamespace(s"SHOW TABLE EXTENDED FROM $namespace LIKE 'tb*'",
+        namespace)
+      testV1CommandNamespace(s"SHOW TABLE EXTENDED IN $namespace LIKE 'tb*'",
+        namespace)
+      testV1CommandNamespace("SHOW TABLE EXTENDED " +
+        s"FROM $namespace LIKE 'tb*' PARTITION(id=1)",
+        namespace)
+      testV1CommandNamespace("SHOW TABLE EXTENDED " +
+        s"IN $namespace LIKE 'tb*' PARTITION(id=1)",
+        namespace)
+    }
+  }
+
+  // TODO(SPARK-33393): Support SHOW TABLE EXTENDED in DSv2
+  test("SHOW TABLE EXTENDED: an existing table") {
+    val table = "people"
+    withTable(s"$catalog.$table") {
+      sql(s"CREATE TABLE $catalog.$table (name STRING, id INT) $defaultUsing")
+      val errMsg = intercept[NoSuchDatabaseException] {
+        sql(s"SHOW TABLE EXTENDED FROM $catalog LIKE '*$table*'").collect()
+      }.getMessage
+      assert(errMsg.contains(s"Database '$catalog' not found"))
+    }
+  }
+}

From 6d5d03095798a2ca2014ada340424512d60810ce Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Wed, 11 Nov 2020 05:54:27 +0000
Subject: [PATCH 0442/1009] [SPARK-33414][SQL] Migrate SHOW CREATE TABLE
 command to use UnresolvedTableOrView to resolve the identifier

### What changes were proposed in this pull request?

This PR proposes to migrate `SHOW CREATE TABLE` to use `UnresolvedTableOrView` to resolve the table identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing).

Note that `SHOW CREATE TABLE` works only with a v1 table and a permanent view, and not supported for v2 tables.

### Why are the changes needed?

The changes allow consistent resolution behavior when resolving the table identifier. For example, the following is the current behavior:
```scala
sql("CREATE TEMPORARY VIEW t AS SELECT 1")
sql("CREATE DATABASE db")
sql("CREATE TABLE t (key INT, value STRING) USING hive")
sql("USE db")
sql("SHOW CREATE TABLE t AS SERDE") // Succeeds
```
With this change, `SHOW CREATE TABLE ... AS SERDE` above fails with the following:
```
org.apache.spark.sql.AnalysisException: t is a temp view not table or permanent view.; line 1 pos 0
  at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)
  at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveTempViews$$anonfun$apply$7.$anonfun$applyOrElse$43(Analyzer.scala:883)
  at scala.Option.map(Option.scala:230)
```
, which is expected since temporary view is resolved first and `SHOW CREATE TABLE ... AS SERDE` doesn't support a temporary view.

Note that there is no behavior change for `SHOW CREATE TABLE` without `AS SERDE` since it was already resolving to a temporary view first. See below for more detail.

### Does this PR introduce _any_ user-facing change?

After this PR, `SHOW CREATE TABLE t AS SERDE` is resolved to a temp view `t` instead of table `db.t` in the above scenario.

Note that there is no behavior change for `SHOW CREATE TABLE` without `AS SERDE`, but the exception message changes from `SHOW CREATE TABLE is not supported on a temporary view` to `t is a temp view not table or permanent view`.

### How was this patch tested?

Updated existing tests.

Closes #30321 from imback82/show_create_table.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/parser/AstBuilder.scala      |  8 ++++++--
 .../sql/catalyst/plans/logical/statements.scala     |  7 -------
 .../sql/catalyst/plans/logical/v2Commands.scala     |  7 +++++++
 .../spark/sql/catalyst/parser/DDLParserSuite.scala  |  8 +++++++-
 .../catalyst/analysis/ResolveSessionCatalog.scala   | 13 ++++++-------
 .../datasources/v2/DataSourceV2Strategy.scala       |  3 +++
 .../org/apache/spark/sql/ShowCreateTableSuite.scala |  7 ++++---
 .../spark/sql/connector/DataSourceV2SQLSuite.scala  |  3 ++-
 .../apache/spark/sql/execution/SQLViewSuite.scala   |  2 +-
 9 files changed, 36 insertions(+), 22 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 07086d1a45aa0..893afc8984e9c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3301,10 +3301,14 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   }
 
   /**
-   * Creates a [[ShowCreateTableStatement]]
+   * Creates a [[ShowCreateTable]]
    */
   override def visitShowCreateTable(ctx: ShowCreateTableContext): LogicalPlan = withOrigin(ctx) {
-    ShowCreateTableStatement(visitMultipartIdentifier(ctx.multipartIdentifier()), ctx.SERDE != null)
+    ShowCreateTable(
+      UnresolvedTableOrView(
+        visitMultipartIdentifier(ctx.multipartIdentifier()),
+        allowTempView = false),
+      ctx.SERDE != null)
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
index 246e7f3bcb959..2fc56891cd15e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
@@ -347,13 +347,6 @@ case class UseStatement(isNamespaceSet: Boolean, nameParts: Seq[String]) extends
  */
 case class RepairTableStatement(tableName: Seq[String]) extends ParsedStatement
 
-/**
- * A SHOW CREATE TABLE statement, as parsed from SQL.
- */
-case class ShowCreateTableStatement(
-    tableName: Seq[String],
-    asSerde: Boolean = false) extends ParsedStatement
-
 /**
  * A CACHE TABLE statement, as parsed from SQL
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index b5386f5044452..c1fc0b69354cd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -622,3 +622,10 @@ case class LoadData(
     partition: Option[TablePartitionSpec]) extends Command {
   override def children: Seq[LogicalPlan] = child :: Nil
 }
+
+/**
+ * The logical plan of the SHOW CREATE TABLE command.
+ */
+case class ShowCreateTable(child: LogicalPlan, asSerde: Boolean = false) extends Command {
+  override def children: Seq[LogicalPlan] = child :: Nil
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 7dac8ffd8475d..be1ac56c4a4a3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -1583,7 +1583,13 @@ class DDLParserSuite extends AnalysisTest {
   test("SHOW CREATE table") {
     comparePlans(
       parsePlan("SHOW CREATE TABLE a.b.c"),
-      ShowCreateTableStatement(Seq("a", "b", "c")))
+      ShowCreateTable(UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false)))
+
+    comparePlans(
+      parsePlan("SHOW CREATE TABLE a.b.c AS SERDE"),
+      ShowCreateTable(
+        UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false),
+        asSerde = true))
   }
 
   test("CACHE TABLE") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 59652229a2b2e..ff25272aebb5b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -434,13 +434,12 @@ class ResolveSessionCatalog(
         isOverwrite,
         partition)
 
-    case ShowCreateTableStatement(tbl, asSerde) if !asSerde =>
-      val name = parseTempViewOrV1Table(tbl, "SHOW CREATE TABLE")
-      ShowCreateTableCommand(name.asTableIdentifier)
-
-    case ShowCreateTableStatement(tbl, asSerde) if asSerde =>
-      val v1TableName = parseV1Table(tbl, "SHOW CREATE TABLE AS SERDE")
-      ShowCreateTableAsSerdeCommand(v1TableName.asTableIdentifier)
+    case ShowCreateTable(ResolvedV1TableOrViewIdentifier(ident), asSerde) =>
+      if (asSerde) {
+        ShowCreateTableAsSerdeCommand(ident.asTableIdentifier)
+      } else {
+        ShowCreateTableCommand(ident.asTableIdentifier)
+      }
 
     case CacheTableStatement(tbl, plan, isLazy, options) =>
       val name = if (plan.isDefined) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 5695d232fae54..48fa88ed550b6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -286,6 +286,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
     case LoadData(_: ResolvedTable, _, _, _, _) =>
       throw new AnalysisException("LOAD DATA is not supported for v2 tables.")
 
+    case ShowCreateTable(_: ResolvedTable, _) =>
+      throw new AnalysisException("SHOW CREATE TABLE is not supported for v2 tables.")
+
     case _ => Nil
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala
index 1106a787cc9a7..7b4c8d1cc71d8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala
@@ -155,16 +155,17 @@ abstract class ShowCreateTableSuite extends QueryTest with SQLTestUtils {
       val ex = intercept[AnalysisException] {
         sql(s"SHOW CREATE TABLE $viewName")
       }
-      assert(ex.getMessage.contains("SHOW CREATE TABLE is not supported on a temporary view"))
+      assert(ex.getMessage.contains(s"$viewName is a temp view not table or permanent view"))
     }
 
     withGlobalTempView(viewName) {
       sql(s"CREATE GLOBAL TEMPORARY VIEW $viewName AS SELECT 1 AS a")
+      val globalTempViewDb = spark.sessionState.catalog.globalTempViewManager.database
       val ex = intercept[AnalysisException] {
-        val globalTempViewDb = spark.sessionState.catalog.globalTempViewManager.database
         sql(s"SHOW CREATE TABLE $globalTempViewDb.$viewName")
       }
-      assert(ex.getMessage.contains("SHOW CREATE TABLE is not supported on a temporary view"))
+      assert(ex.getMessage.contains(
+        s"$globalTempViewDb.$viewName is a temp view not table or permanent view"))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 6f888e527eeab..68de55f03ba83 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -1954,7 +1954,8 @@ class DataSourceV2SQLSuite
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
       spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
-      testV1CommandSupportingTempView("SHOW CREATE TABLE", t)
+      testNotSupportedV2Command("SHOW CREATE TABLE", t)
+      testNotSupportedV2Command("SHOW CREATE TABLE", s"$t AS SERDE")
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index 8889ea177720e..f5d6ea929a9aa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -176,7 +176,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
       val e3 = intercept[AnalysisException] {
         sql(s"SHOW CREATE TABLE $viewName")
       }.getMessage
-      assert(e3.contains("SHOW CREATE TABLE is not supported on a temporary view"))
+      assert(e3.contains(s"$viewName is a temp view not table or permanent view"))
       assertNoSuchTable(s"SHOW PARTITIONS $viewName")
       val e4 = intercept[AnalysisException] {
         sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS")

From 4b367976a877adb981f65d546e1522fdf30d0731 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Wed, 11 Nov 2020 15:24:05 +0900
Subject: [PATCH 0443/1009] [SPARK-33417][SQL][TEST] Correct the behaviour of
 query filters in TPCDSQueryBenchmark

### What changes were proposed in this pull request?

This PR intends to fix the behaviour of query filters in `TPCDSQueryBenchmark`. We can use an option `--query-filter` for selecting TPCDS queries to run, e.g., `--query-filter q6,q8,q13`. But, the current master has a weird behaviour about the option. For example, if we pass `--query-filter q6` so as to run the TPCDS q6 only, `TPCDSQueryBenchmark` runs `q6` and `q6-v2.7` because the `filterQueries` method does not respect the name suffix. So, there is no way now to run the TPCDS q6 only.

### Why are the changes needed?

Bugfix.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manually checked.

Closes #30324 from maropu/FilterBugInTPCDSQueryBenchmark.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../benchmark/TPCDSQueryBenchmark.scala       | 21 ++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
index 7bbf0795eb052..43bc7c12937ec 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
@@ -98,11 +98,16 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark {
     }
   }
 
-  def filterQueries(
+  private def filterQueries(
       origQueries: Seq[String],
-      args: TPCDSQueryBenchmarkArguments): Seq[String] = {
-    if (args.queryFilter.nonEmpty) {
-      origQueries.filter(args.queryFilter.contains)
+      queryFilter: Set[String],
+      nameSuffix: String = ""): Seq[String] = {
+    if (queryFilter.nonEmpty) {
+      if (nameSuffix.nonEmpty) {
+        origQueries.filter { name => queryFilter.contains(s"$name$nameSuffix") }
+      } else {
+        origQueries.filter(queryFilter.contains)
+      }
     } else {
       origQueries
     }
@@ -125,6 +130,7 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark {
       "q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99")
 
     // This list only includes TPC-DS v2.7 queries that are different from v1.4 ones
+    val nameSuffixForQueriesV2_7 = "-v2.7"
     val tpcdsQueriesV2_7 = Seq(
       "q5a", "q6", "q10a", "q11", "q12", "q14", "q14a", "q18a",
       "q20", "q22", "q22a", "q24", "q27a", "q34", "q35", "q35a", "q36a", "q47", "q49",
@@ -132,8 +138,9 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark {
       "q80a", "q86a", "q98")
 
     // If `--query-filter` defined, filters the queries that this option selects
-    val queriesV1_4ToRun = filterQueries(tpcdsQueries, benchmarkArgs)
-    val queriesV2_7ToRun = filterQueries(tpcdsQueriesV2_7, benchmarkArgs)
+    val queriesV1_4ToRun = filterQueries(tpcdsQueries, benchmarkArgs.queryFilter)
+    val queriesV2_7ToRun = filterQueries(tpcdsQueriesV2_7, benchmarkArgs.queryFilter,
+      nameSuffix = nameSuffixForQueriesV2_7)
 
     if ((queriesV1_4ToRun ++ queriesV2_7ToRun).isEmpty) {
       throw new RuntimeException(
@@ -143,6 +150,6 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark {
     val tableSizes = setupTables(benchmarkArgs.dataLocation)
     runTpcdsQueries(queryLocation = "tpcds", queries = queriesV1_4ToRun, tableSizes)
     runTpcdsQueries(queryLocation = "tpcds-v2.7.0", queries = queriesV2_7ToRun, tableSizes,
-      nameSuffix = "-v2.7")
+      nameSuffix = nameSuffixForQueriesV2_7)
   }
 }

From 8760032f4f7e1ef36fee6afc45923d3826ef14fc Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 11 Nov 2020 16:13:21 +0900
Subject: [PATCH 0444/1009] [SPARK-33412][SQL] OverwriteByExpression should
 resolve its delete condition based on the table relation not the input query

### What changes were proposed in this pull request?

Make a special case in `ResolveReferences`, which resolves `OverwriteByExpression`'s condition expression based on the table relation instead of the input query.

### Why are the changes needed?

The condition expression is passed to the table implementation at the end, so we should resolve it using table schema. Previously it works because we have a hack in `ResolveReferences` to delay the resolution if `outputResolved == false`. However, this hack doesn't work for tables accepting any schema like https://github.com/delta-io/delta/pull/521 . We may wrongly resolve the delete condition using input query's outout columns which don't match the table column names.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

existing tests and updated test in v2 write.

Closes #30318 from cloud-fan/v2-write.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../spark/sql/catalyst/analysis/Analyzer.scala  |  9 ++++-----
 .../sql/catalyst/plans/logical/v2Commands.scala |  3 ++-
 .../analysis/DataSourceV2AnalysisSuite.scala    | 17 ++++++++++++-----
 3 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 5834f9bad4a18..b27b8d8a606da 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1548,11 +1548,10 @@ class Analyzer(
 
         g.copy(resolvedSelectedExprs, resolvedGroupingExprs, g.child, resolvedAggExprs)
 
-      case o: OverwriteByExpression
-          if !(o.table.resolved && o.query.resolved && o.outputResolved) =>
-        // do not resolve expression attributes until the query attributes are resolved against the
-        // table by ResolveOutputRelation. that rule will alias the attributes to the table's names.
-        o
+      case o: OverwriteByExpression if o.table.resolved =>
+        // The delete condition of `OverwriteByExpression` will be passed to the table
+        // implementation and should be resolved based on the table schema.
+        o.copy(deleteExpr = resolveExpressionBottomUp(o.deleteExpr, o.table))
 
       case m @ MergeIntoTable(targetTable, sourceTable, _, _, _)
         if !m.resolved && targetTable.resolved && sourceTable.resolved =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index c1fc0b69354cd..e65555ea27672 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.plans.logical
 
 import org.apache.spark.sql.catalyst.analysis.{NamedRelation, UnresolvedException}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, Unevaluable}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, Expression, Unevaluable}
 import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema
 import org.apache.spark.sql.connector.catalog._
 import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, ColumnChange}
@@ -96,6 +96,7 @@ case class OverwriteByExpression(
   override lazy val resolved: Boolean = {
     table.resolved && query.resolved && outputResolved && deleteExpr.resolved
   }
+  override def inputSet: AttributeSet = AttributeSet(table.output)
   override def withNewQuery(newQuery: LogicalPlan): OverwriteByExpression = {
     copy(query = newQuery)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
index ba926f842551f..349237c2aa893 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
@@ -668,9 +668,7 @@ abstract class DataSourceV2AnalysisBaseSuite extends AnalysisTest {
         Alias(Cast(a, DoubleType, Some(conf.sessionLocalTimeZone)), "x")(),
         Alias(Cast(b, DoubleType, Some(conf.sessionLocalTimeZone)), "y")()),
         query),
-      LessThanOrEqual(
-        AttributeReference("x", DoubleType, nullable = false)(x.exprId),
-        Literal(15.0d)))
+      LessThanOrEqual(x, Literal(15.0d)))
 
     assertNotResolved(parsedPlan)
     checkAnalysis(parsedPlan, expectedPlan)
@@ -678,7 +676,7 @@ abstract class DataSourceV2AnalysisBaseSuite extends AnalysisTest {
   }
 
   protected def testNotResolvedOverwriteByExpression(): Unit = {
-    val xRequiredTable = TestRelation(StructType(Seq(
+    val table = TestRelation(StructType(Seq(
       StructField("x", DoubleType, nullable = false),
       StructField("y", DoubleType))).toAttributes)
 
@@ -687,10 +685,19 @@ abstract class DataSourceV2AnalysisBaseSuite extends AnalysisTest {
       StructField("b", DoubleType))).toAttributes)
 
     // the write is resolved (checked above). this test plan is not because of the expression.
-    val parsedPlan = OverwriteByExpression.byPosition(xRequiredTable, query,
+    val parsedPlan = OverwriteByExpression.byPosition(table, query,
       LessThanOrEqual(UnresolvedAttribute(Seq("a")), Literal(15.0d)))
 
     assertNotResolved(parsedPlan)
     assertAnalysisError(parsedPlan, Seq("cannot resolve", "`a`", "given input columns", "x, y"))
+
+    val tableAcceptAnySchema = TestRelationAcceptAnySchema(StructType(Seq(
+      StructField("x", DoubleType, nullable = false),
+      StructField("y", DoubleType))).toAttributes)
+
+    val parsedPlan2 = OverwriteByExpression.byPosition(tableAcceptAnySchema, query,
+      LessThanOrEqual(UnresolvedAttribute(Seq("a")), Literal(15.0d)))
+    assertNotResolved(parsedPlan2)
+    assertAnalysisError(parsedPlan2, Seq("cannot resolve", "`a`", "given input columns", "x, y"))
   }
 }

From 1eb236b9360a000afc30424341698fe26ee96d0f Mon Sep 17 00:00:00 2001
From: stczwd <qcsd2011@163.com>
Date: Wed, 11 Nov 2020 09:30:42 +0000
Subject: [PATCH 0445/1009] [SPARK-32512][SQL] add alter table add/drop
 partition command for datasourcev2

### What changes were proposed in this pull request?
This patch is trying to add `AlterTableAddPartitionExec` and `AlterTableDropPartitionExec` with the new table partition API, defined in #28617.

### Does this PR introduce _any_ user-facing change?
Yes. User can use `alter table add partition` or `alter table drop partition` to create/drop partition in V2Table.

### How was this patch tested?
Run suites and fix old tests.

Closes #29339 from stczwd/SPARK-32512-new.

Lead-authored-by: stczwd <qcsd2011@163.com>
Co-authored-by: Jacky Lee <qcsd2011@163.com>
Co-authored-by: Jackey Lee <qcsd2011@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |   1 +
 .../sql/catalyst/analysis/CheckAnalysis.scala |  27 +++
 .../analysis/ResolvePartitionSpec.scala       |  89 ++++++++++
 .../catalyst/analysis/v2ResolutionPlans.scala |  12 ++
 .../sql/catalyst/parser/AstBuilder.scala      |  18 +-
 .../catalyst/plans/logical/statements.scala   |  18 --
 .../catalyst/plans/logical/v2Commands.scala   |  42 ++++-
 .../v2/DataSourceV2Implicits.scala            |  31 +++-
 .../sql/catalyst/parser/DDLParserSuite.scala  |  30 ++--
 .../InMemoryPartitionTableCatalog.scala       |  47 +++++
 .../analysis/ResolveSessionCatalog.scala      |  18 +-
 .../v2/AlterTableAddPartitionExec.scala       |  65 +++++++
 .../v2/AlterTableDropPartitionExec.scala      |  57 ++++++
 .../datasources/v2/DataSourceV2Strategy.scala |  12 +-
 .../AlterTablePartitionV2SQLSuite.scala       | 162 ++++++++++++++++++
 .../sql/connector/DataSourceV2SQLSuite.scala  |  59 +++----
 .../sql/connector/DatasourceV2SQLBase.scala   |  54 ++++++
 .../spark/sql/execution/SQLViewSuite.scala    |   8 +-
 .../sql/hive/execution/HiveDDLSuite.scala     |  17 +-
 19 files changed, 670 insertions(+), 97 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTableCatalog.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableAddPartitionExec.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/connector/DatasourceV2SQLBase.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index b27b8d8a606da..690d66bec890d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -225,6 +225,7 @@ class Analyzer(
       ResolveInsertInto ::
       ResolveRelations ::
       ResolveTables ::
+      ResolvePartitionSpec ::
       ResolveReferences ::
       ResolveCreateNamedStruct ::
       ResolveDeserializer ::
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 33a5224ed293e..452ba80b23441 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.optimizer.BooleanSimplification
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.util.TypeUtils
+import org.apache.spark.sql.connector.catalog.{SupportsAtomicPartitionManagement, SupportsPartitionManagement, Table}
 import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, After, ColumnPosition, DeleteColumn, RenameColumn, UpdateColumnComment, UpdateColumnNullability, UpdateColumnPosition, UpdateColumnType}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -564,6 +565,12 @@ trait CheckAnalysis extends PredicateHelper {
               // no validation needed for set and remove property
             }
 
+          case AlterTableAddPartition(ResolvedTable(_, _, table), parts, _) =>
+            checkAlterTablePartition(table, parts)
+
+          case AlterTableDropPartition(ResolvedTable(_, _, table), parts, _, _, _) =>
+            checkAlterTablePartition(table, parts)
+
           case _ => // Fallbacks to the following checks
         }
 
@@ -976,4 +983,24 @@ trait CheckAnalysis extends PredicateHelper {
         failOnOuterReferenceInSubTree(p)
     }}
   }
+
+  // Make sure that table is able to alter partition.
+  private def checkAlterTablePartition(
+      table: Table, parts: Seq[PartitionSpec]): Unit = {
+    (table, parts) match {
+      case (_, parts) if parts.exists(_.isInstanceOf[UnresolvedPartitionSpec]) =>
+        failAnalysis("PartitionSpecs are not resolved")
+
+      case (table, _) if !table.isInstanceOf[SupportsPartitionManagement] =>
+        failAnalysis(s"Table ${table.name()} can not alter partitions.")
+
+      // Skip atomic partition tables
+      case (_: SupportsAtomicPartitionManagement, _) =>
+      case (_: SupportsPartitionManagement, parts) if parts.size > 1 =>
+        failAnalysis(
+          s"Nonatomic partition table ${table.name()} can not alter multiple partitions.")
+
+      case _ =>
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
new file mode 100644
index 0000000000000..5e19a32968992
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import org.apache.spark.sql.catalyst.plans.logical.{AlterTableAddPartition, AlterTableDropPartition, LogicalPlan}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.connector.catalog.SupportsPartitionManagement
+import org.apache.spark.sql.types._
+
+/**
+ * Resolve [[UnresolvedPartitionSpec]] to [[ResolvedPartitionSpec]] in partition related commands.
+ */
+object ResolvePartitionSpec extends Rule[LogicalPlan] {
+
+  def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+    case r @ AlterTableAddPartition(
+        ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs, _) =>
+      r.copy(parts = resolvePartitionSpecs(partSpecs, table.partitionSchema()))
+
+    case r @ AlterTableDropPartition(
+        ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs, _, _, _) =>
+      r.copy(parts = resolvePartitionSpecs(partSpecs, table.partitionSchema()))
+  }
+
+  private def resolvePartitionSpecs(
+      partSpecs: Seq[PartitionSpec], partSchema: StructType): Seq[ResolvedPartitionSpec] =
+    partSpecs.map {
+      case unresolvedPartSpec: UnresolvedPartitionSpec =>
+        ResolvedPartitionSpec(
+          convertToPartIdent(unresolvedPartSpec.spec, partSchema), unresolvedPartSpec.location)
+      case resolvedPartitionSpec: ResolvedPartitionSpec =>
+        resolvedPartitionSpec
+    }
+
+  private def convertToPartIdent(
+      partSpec: TablePartitionSpec, partSchema: StructType): InternalRow = {
+    val conflictKeys = partSpec.keys.toSeq.diff(partSchema.map(_.name))
+    if (conflictKeys.nonEmpty) {
+      throw new AnalysisException(s"Partition key ${conflictKeys.mkString(",")} not exists")
+    }
+
+    val partValues = partSchema.map { part =>
+      val partValue = partSpec.get(part.name).orNull
+      if (partValue == null) {
+        null
+      } else {
+        // TODO: Support other datatypes, such as DateType
+        part.dataType match {
+          case _: ByteType =>
+            partValue.toByte
+          case _: ShortType =>
+            partValue.toShort
+          case _: IntegerType =>
+            partValue.toInt
+          case _: LongType =>
+            partValue.toLong
+          case _: FloatType =>
+            partValue.toFloat
+          case _: DoubleType =>
+            partValue.toDouble
+          case _: StringType =>
+            partValue
+          case _ =>
+            throw new AnalysisException(
+              s"Type ${part.dataType.typeName} is not supported for partition.")
+        }
+      }
+    }
+    InternalRow.fromSeq(partValues)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
index fcf4a438eb19c..83acfb8d4a71c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
@@ -17,7 +17,9 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.catalog.CatalogFunction
+import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan}
 import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, SupportsNamespaces, Table, TableCatalog}
@@ -53,6 +55,12 @@ case class UnresolvedTableOrView(
   override def output: Seq[Attribute] = Nil
 }
 
+sealed trait PartitionSpec
+
+case class UnresolvedPartitionSpec(
+    spec: TablePartitionSpec,
+    location: Option[String] = None) extends PartitionSpec
+
 /**
  * Holds the name of a function that has yet to be looked up in a catalog. It will be resolved to
  * [[ResolvedFunc]] during analysis.
@@ -78,6 +86,10 @@ case class ResolvedTable(catalog: TableCatalog, identifier: Identifier, table: T
   override def output: Seq[Attribute] = Nil
 }
 
+case class ResolvedPartitionSpec(
+    spec: InternalRow,
+    location: Option[String] = None) extends PartitionSpec
+
 /**
  * A plan containing resolved (temp) views.
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 893afc8984e9c..be8bbb5ad3eba 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3415,7 +3415,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   }
 
   /**
-   * Create an [[AlterTableAddPartitionStatement]].
+   * Create an [[AlterTableAddPartition]].
    *
    * For example:
    * {{{
@@ -3435,10 +3435,10 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     val specsAndLocs = ctx.partitionSpecLocation.asScala.map { splCtx =>
       val spec = visitNonOptionalPartitionSpec(splCtx.partitionSpec)
       val location = Option(splCtx.locationSpec).map(visitLocationSpec)
-      spec -> location
+      UnresolvedPartitionSpec(spec, location)
     }
-    AlterTableAddPartitionStatement(
-      visitMultipartIdentifier(ctx.multipartIdentifier),
+    AlterTableAddPartition(
+      UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier)),
       specsAndLocs.toSeq,
       ctx.EXISTS != null)
   }
@@ -3460,7 +3460,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   }
 
   /**
-   * Create an [[AlterTableDropPartitionStatement]]
+   * Create an [[AlterTableDropPartition]]
    *
    * For example:
    * {{{
@@ -3477,9 +3477,11 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     if (ctx.VIEW != null) {
       operationNotAllowed("ALTER VIEW ... DROP PARTITION", ctx)
     }
-    AlterTableDropPartitionStatement(
-      visitMultipartIdentifier(ctx.multipartIdentifier),
-      ctx.partitionSpec.asScala.map(visitNonOptionalPartitionSpec).toSeq,
+    val partSpecs = ctx.partitionSpec.asScala.map(visitNonOptionalPartitionSpec)
+      .map(spec => UnresolvedPartitionSpec(spec))
+    AlterTableDropPartition(
+      UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier)),
+      partSpecs.toSeq,
       ifExists = ctx.EXISTS != null,
       purge = ctx.PURGE != null,
       retainData = false)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
index 2fc56891cd15e..39bc5a5604b20 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
@@ -217,14 +217,6 @@ case class AlterTableSetLocationStatement(
 case class AlterTableRecoverPartitionsStatement(
     tableName: Seq[String]) extends ParsedStatement
 
-/**
- * ALTER TABLE ... ADD PARTITION command, as parsed from SQL
- */
-case class AlterTableAddPartitionStatement(
-    tableName: Seq[String],
-    partitionSpecsAndLocs: Seq[(TablePartitionSpec, Option[String])],
-    ifNotExists: Boolean) extends ParsedStatement
-
 /**
  * ALTER TABLE ... RENAME PARTITION command, as parsed from SQL.
  */
@@ -233,16 +225,6 @@ case class AlterTableRenamePartitionStatement(
     from: TablePartitionSpec,
     to: TablePartitionSpec) extends ParsedStatement
 
-/**
- * ALTER TABLE ... DROP PARTITION command, as parsed from SQL
- */
-case class AlterTableDropPartitionStatement(
-    tableName: Seq[String],
-    specs: Seq[TablePartitionSpec],
-    ifExists: Boolean,
-    purge: Boolean,
-    retainData: Boolean) extends ParsedStatement
-
 /**
  * ALTER TABLE ... SERDEPROPERTIES command, as parsed from SQL
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index e65555ea27672..5bda2b5b8db01 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.plans.logical
 
-import org.apache.spark.sql.catalyst.analysis.{NamedRelation, UnresolvedException}
+import org.apache.spark.sql.catalyst.analysis.{NamedRelation, PartitionSpec, ResolvedPartitionSpec, UnresolvedException}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, Expression, Unevaluable}
 import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema
@@ -612,6 +612,46 @@ case class AnalyzeColumn(
   override def children: Seq[LogicalPlan] = child :: Nil
 }
 
+/**
+ * The logical plan of the ALTER TABLE ADD PARTITION command.
+ *
+ * The syntax of this command is:
+ * {{{
+ *     ALTER TABLE table ADD [IF NOT EXISTS]
+ *                 PARTITION spec1 [LOCATION 'loc1'][, PARTITION spec2 [LOCATION 'loc2'], ...];
+ * }}}
+ */
+case class AlterTableAddPartition(
+    child: LogicalPlan,
+    parts: Seq[PartitionSpec],
+    ifNotExists: Boolean) extends Command {
+  override lazy val resolved: Boolean =
+    childrenResolved && parts.forall(_.isInstanceOf[ResolvedPartitionSpec])
+
+  override def children: Seq[LogicalPlan] = child :: Nil
+}
+
+/**
+ * The logical plan of the ALTER TABLE DROP PARTITION command.
+ * This may remove the data and metadata for this partition.
+ *
+ * The syntax of this command is:
+ * {{{
+ *     ALTER TABLE table DROP [IF EXISTS] PARTITION spec1[, PARTITION spec2, ...];
+ * }}}
+ */
+case class AlterTableDropPartition(
+    child: LogicalPlan,
+    parts: Seq[PartitionSpec],
+    ifExists: Boolean,
+    purge: Boolean,
+    retainData: Boolean) extends Command {
+  override lazy val resolved: Boolean =
+    childrenResolved && parts.forall(_.isInstanceOf[ResolvedPartitionSpec])
+
+  override def children: Seq[LogicalPlan] = child :: Nil
+}
+
 /**
  * The logical plan of the LOAD DATA INTO TABLE command.
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala
index 86ef867eca547..dfacf6e83ef57 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala
@@ -20,7 +20,8 @@ package org.apache.spark.sql.execution.datasources.v2
 import scala.collection.JavaConverters._
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.connector.catalog.{SupportsDelete, SupportsRead, SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.catalyst.analysis.{PartitionSpec, ResolvedPartitionSpec, UnresolvedPartitionSpec}
+import org.apache.spark.sql.connector.catalog.{SupportsAtomicPartitionManagement, SupportsDelete, SupportsPartitionManagement, SupportsRead, SupportsWrite, Table, TableCapability}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 object DataSourceV2Implicits {
@@ -52,6 +53,26 @@ object DataSourceV2Implicits {
       }
     }
 
+    def asPartitionable: SupportsPartitionManagement = {
+      table match {
+        case support: SupportsPartitionManagement =>
+          support
+        case _ =>
+          throw new AnalysisException(
+            s"Table does not support partition management: ${table.name}")
+      }
+    }
+
+    def asAtomicPartitionable: SupportsAtomicPartitionManagement = {
+      table match {
+        case support: SupportsAtomicPartitionManagement =>
+          support
+        case _ =>
+          throw new AnalysisException(
+            s"Table does not support atomic partition management: ${table.name}")
+      }
+    }
+
     def supports(capability: TableCapability): Boolean = table.capabilities.contains(capability)
 
     def supportsAny(capabilities: TableCapability*): Boolean = capabilities.exists(supports)
@@ -62,4 +83,12 @@ object DataSourceV2Implicits {
       new CaseInsensitiveStringMap(options.asJava)
     }
   }
+
+  implicit class PartitionSpecsHelper(partSpecs: Seq[PartitionSpec]) {
+    def asUnresolvedPartitionSpecs: Seq[UnresolvedPartitionSpec] =
+      partSpecs.map(_.asInstanceOf[UnresolvedPartitionSpec])
+
+    def asResolvedPartitionSpecs: Seq[ResolvedPartitionSpec] =
+      partSpecs.map(_.asInstanceOf[ResolvedPartitionSpec])
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index be1ac56c4a4a3..cddc392cfa2d7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.parser
 import java.util.Locale
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedNamespace, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView}
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedNamespace, UnresolvedPartitionSpec, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView}
 import org.apache.spark.sql.catalyst.catalog.{ArchiveResource, BucketSpec, FileResource, FunctionResource, FunctionResourceType, JarResource}
 import org.apache.spark.sql.catalyst.expressions.{EqualTo, Literal}
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -1700,15 +1700,15 @@ class DDLParserSuite extends AnalysisTest {
     val parsed1 = parsePlan(sql1)
     val parsed2 = parsePlan(sql2)
 
-    val expected1 = AlterTableAddPartitionStatement(
-      Seq("a", "b", "c"),
+    val expected1 = AlterTableAddPartition(
+      UnresolvedTable(Seq("a", "b", "c")),
       Seq(
-        (Map("dt" -> "2008-08-08", "country" -> "us"), Some("location1")),
-        (Map("dt" -> "2009-09-09", "country" -> "uk"), None)),
+        UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us"), Some("location1")),
+        UnresolvedPartitionSpec(Map("dt" -> "2009-09-09", "country" -> "uk"), None)),
       ifNotExists = true)
-    val expected2 = AlterTableAddPartitionStatement(
-      Seq("a", "b", "c"),
-      Seq((Map("dt" -> "2008-08-08"), Some("loc"))),
+    val expected2 = AlterTableAddPartition(
+      UnresolvedTable(Seq("a", "b", "c")),
+      Seq(UnresolvedPartitionSpec(Map("dt" -> "2008-08-08"), Some("loc"))),
       ifNotExists = false)
 
     comparePlans(parsed1, expected1)
@@ -1773,11 +1773,11 @@ class DDLParserSuite extends AnalysisTest {
     assertUnsupported(sql1_view)
     assertUnsupported(sql2_view)
 
-    val expected1_table = AlterTableDropPartitionStatement(
-      Seq("table_name"),
+    val expected1_table = AlterTableDropPartition(
+      UnresolvedTable(Seq("table_name")),
       Seq(
-        Map("dt" -> "2008-08-08", "country" -> "us"),
-        Map("dt" -> "2009-09-09", "country" -> "uk")),
+        UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us")),
+        UnresolvedPartitionSpec(Map("dt" -> "2009-09-09", "country" -> "uk"))),
       ifExists = true,
       purge = false,
       retainData = false)
@@ -1789,9 +1789,9 @@ class DDLParserSuite extends AnalysisTest {
     comparePlans(parsed1_purge, expected1_purge)
 
     val sql3_table = "ALTER TABLE a.b.c DROP IF EXISTS PARTITION (ds='2017-06-10')"
-    val expected3_table = AlterTableDropPartitionStatement(
-      Seq("a", "b", "c"),
-      Seq(Map("ds" -> "2017-06-10")),
+    val expected3_table = AlterTableDropPartition(
+      UnresolvedTable(Seq("a", "b", "c")),
+      Seq(UnresolvedPartitionSpec(Map("ds" -> "2017-06-10"))),
       ifExists = true,
       purge = false,
       retainData = false)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTableCatalog.scala
new file mode 100644
index 0000000000000..aebfe5af41825
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTableCatalog.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import java.util
+
+import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
+import org.apache.spark.sql.connector.catalog.{CatalogV2Implicits, Identifier, Table}
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.types.StructType
+
+class InMemoryPartitionTableCatalog extends InMemoryTableCatalog {
+  import CatalogV2Implicits._
+
+  override def createTable(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): Table = {
+    if (tables.containsKey(ident)) {
+      throw new TableAlreadyExistsException(ident)
+    }
+
+    InMemoryTableCatalog.maybeSimulateFailedTableCreation(properties)
+
+    val table = new InMemoryAtomicPartitionTable(
+      s"$name.${ident.quoted}", schema, partitions, properties)
+    tables.put(ident, table)
+    namespaces.putIfAbsent(ident.namespace.toList, Map())
+    table
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index ff25272aebb5b..bd9120a1fbe78 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, Identifier, LookupCatalog, SupportsNamespaces, TableCatalog, TableChange, V1Table}
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, Identifier, LookupCatalog, SupportsNamespaces, SupportsPartitionManagement, TableCatalog, TableChange, V1Table}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource}
@@ -42,6 +42,7 @@ class ResolveSessionCatalog(
   extends Rule[LogicalPlan] with LookupCatalog {
   import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
   import org.apache.spark.sql.connector.catalog.CatalogV2Util._
+  import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
     case AlterTableAddColumnsStatement(
@@ -497,11 +498,10 @@ class ResolveSessionCatalog(
         v1TableName.asTableIdentifier,
         "ALTER TABLE RECOVER PARTITIONS")
 
-    case AlterTableAddPartitionStatement(tbl, partitionSpecsAndLocs, ifNotExists) =>
-      val v1TableName = parseV1Table(tbl, "ALTER TABLE ADD PARTITION")
+    case AlterTableAddPartition(ResolvedV1TableIdentifier(ident), partSpecsAndLocs, ifNotExists) =>
       AlterTableAddPartitionCommand(
-        v1TableName.asTableIdentifier,
-        partitionSpecsAndLocs,
+        ident.asTableIdentifier,
+        partSpecsAndLocs.asUnresolvedPartitionSpecs.map(spec => (spec.spec, spec.location)),
         ifNotExists)
 
     case AlterTableRenamePartitionStatement(tbl, from, to) =>
@@ -511,11 +511,11 @@ class ResolveSessionCatalog(
         from,
         to)
 
-    case AlterTableDropPartitionStatement(tbl, specs, ifExists, purge, retainData) =>
-      val v1TableName = parseV1Table(tbl, "ALTER TABLE DROP PARTITION")
+    case AlterTableDropPartition(
+        ResolvedV1TableIdentifier(ident), specs, ifExists, purge, retainData) =>
       AlterTableDropPartitionCommand(
-        v1TableName.asTableIdentifier,
-        specs,
+        ident.asTableIdentifier,
+        specs.asUnresolvedPartitionSpecs.map(_.spec),
         ifExists,
         purge,
         retainData)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableAddPartitionExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableAddPartitionExec.scala
new file mode 100644
index 0000000000000..0171cdd9ca41a
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableAddPartitionExec.scala
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.{PartitionsAlreadyExistException, ResolvedPartitionSpec}
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.connector.catalog.{SupportsAtomicPartitionManagement, SupportsPartitionManagement}
+
+/**
+ * Physical plan node for adding partitions of table.
+ */
+case class AlterTableAddPartitionExec(
+    table: SupportsPartitionManagement,
+    partSpecs: Seq[ResolvedPartitionSpec],
+    ignoreIfExists: Boolean) extends V2CommandExec {
+  import DataSourceV2Implicits._
+
+  override def output: Seq[Attribute] = Seq.empty
+
+  override protected def run(): Seq[InternalRow] = {
+    val (existsParts, notExistsParts) =
+      partSpecs.partition(p => table.partitionExists(p.spec))
+
+    if (existsParts.nonEmpty && !ignoreIfExists) {
+      throw new PartitionsAlreadyExistException(
+        table.name(), existsParts.map(_.spec), table.partitionSchema())
+    }
+
+    notExistsParts match {
+      case Seq() => // Nothing will be done
+      case Seq(partitionSpec) =>
+        val partProp = partitionSpec.location.map(loc => "location" -> loc).toMap
+        table.createPartition(partitionSpec.spec, partProp.asJava)
+      case _ if table.isInstanceOf[SupportsAtomicPartitionManagement] =>
+        val partIdents = notExistsParts.map(_.spec)
+        val partProps = notExistsParts.map(_.location.map(loc => "location" -> loc).toMap)
+        table.asAtomicPartitionable
+          .createPartitions(
+            partIdents.toArray,
+            partProps.map(_.asJava).toArray)
+      case _ =>
+        throw new UnsupportedOperationException(
+          s"Nonatomic partition table ${table.name()} can not add multiple partitions.")
+    }
+    Seq.empty
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala
new file mode 100644
index 0000000000000..09a65804a05eb
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionsException, ResolvedPartitionSpec}
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.connector.catalog.{SupportsAtomicPartitionManagement, SupportsPartitionManagement}
+
+/**
+ * Physical plan node for dropping partitions of table.
+ */
+case class AlterTableDropPartitionExec(
+    table: SupportsPartitionManagement,
+    partSpecs: Seq[ResolvedPartitionSpec],
+    ignoreIfNotExists: Boolean) extends V2CommandExec {
+  import DataSourceV2Implicits._
+
+  override def output: Seq[Attribute] = Seq.empty
+
+  override protected def run(): Seq[InternalRow] = {
+    val (existsPartIdents, notExistsPartIdents) =
+      partSpecs.map(_.spec).partition(table.partitionExists)
+
+    if (notExistsPartIdents.nonEmpty && !ignoreIfNotExists) {
+      throw new NoSuchPartitionsException(
+        table.name(), notExistsPartIdents, table.partitionSchema())
+    }
+
+    existsPartIdents match {
+      case Seq() => // Nothing will be done
+      case Seq(partIdent) =>
+        table.dropPartition(partIdent)
+      case _ if table.isInstanceOf[SupportsAtomicPartitionManagement] =>
+        table.asAtomicPartitionable.dropPartitions(existsPartIdents.toArray)
+      case _ =>
+        throw new UnsupportedOperationException(
+          s"Nonatomic partition table ${table.name()} can not drop multiple partitions.")
+    }
+    Seq.empty
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 48fa88ed550b6..a82f86ea952d9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.analysis.{ResolvedNamespace, ResolvedTable}
 import org.apache.spark.sql.catalyst.expressions.{And, Expression, NamedExpression, PredicateHelper, SubqueryExpression}
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, StagingTableCatalog, SupportsNamespaces, TableCapability, TableCatalog, TableChange}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, StagingTableCatalog, SupportsNamespaces, SupportsPartitionManagement, TableCapability, TableCatalog, TableChange}
 import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream}
 import org.apache.spark.sql.execution.{FilterExec, LeafExecNode, LocalTableScanExec, ProjectExec, RowDataSourceScanExec, SparkPlan}
 import org.apache.spark.sql.execution.datasources.DataSourceStrategy
@@ -283,6 +283,16 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
     case AnalyzeTable(_: ResolvedTable, _, _) | AnalyzeColumn(_: ResolvedTable, _, _) =>
       throw new AnalysisException("ANALYZE TABLE is not supported for v2 tables.")
 
+    case AlterTableAddPartition(
+        ResolvedTable(_, _, table: SupportsPartitionManagement), parts, ignoreIfExists) =>
+      AlterTableAddPartitionExec(
+        table, parts.asResolvedPartitionSpecs, ignoreIfExists) :: Nil
+
+    case AlterTableDropPartition(
+        ResolvedTable(_, _, table: SupportsPartitionManagement), parts, ignoreIfNotExists, _, _) =>
+      AlterTableDropPartitionExec(
+        table, parts.asResolvedPartitionSpecs, ignoreIfNotExists) :: Nil
+
     case LoadData(_: ResolvedTable, _, _, _, _) =>
       throw new AnalysisException("LOAD DATA is not supported for v2 tables.")
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
new file mode 100644
index 0000000000000..107d0ea47249d
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionsException, PartitionsAlreadyExistException}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Implicits, Identifier}
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits
+
+class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase {
+
+  import CatalogV2Implicits._
+  import DataSourceV2Implicits._
+
+
+  test("ALTER TABLE RECOVER PARTITIONS") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
+      val e = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $t RECOVER PARTITIONS")
+      }
+      assert(e.message.contains("ALTER TABLE RECOVER PARTITIONS is only supported with v1 tables"))
+    }
+  }
+
+  test("ALTER TABLE ADD PARTITION") {
+    val t = "testpart.ns1.ns2.tbl"
+    withTable(t) {
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
+      spark.sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'")
+
+      val partTable = catalog("testpart").asTableCatalog
+        .loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")).asInstanceOf[InMemoryPartitionTable]
+      assert(partTable.partitionExists(InternalRow.fromSeq(Seq(1))))
+
+      val partMetadata = partTable.loadPartitionMetadata(InternalRow.fromSeq(Seq(1)))
+      assert(partMetadata.containsKey("location"))
+      assert(partMetadata.get("location") == "loc")
+    }
+  }
+
+  test("ALTER TABLE ADD PARTITIONS") {
+    val t = "testpart.ns1.ns2.tbl"
+    withTable(t) {
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
+      spark.sql(
+        s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc' PARTITION (id=2) LOCATION 'loc1'")
+
+      val partTable = catalog("testpart").asTableCatalog
+        .loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")).asInstanceOf[InMemoryPartitionTable]
+      assert(partTable.partitionExists(InternalRow.fromSeq(Seq(1))))
+      assert(partTable.partitionExists(InternalRow.fromSeq(Seq(2))))
+
+      val partMetadata = partTable.loadPartitionMetadata(InternalRow.fromSeq(Seq(1)))
+      assert(partMetadata.containsKey("location"))
+      assert(partMetadata.get("location") == "loc")
+
+      val partMetadata1 = partTable.loadPartitionMetadata(InternalRow.fromSeq(Seq(2)))
+      assert(partMetadata1.containsKey("location"))
+      assert(partMetadata1.get("location") == "loc1")
+    }
+  }
+
+  test("ALTER TABLE ADD PARTITIONS: partition already exists") {
+    val t = "testpart.ns1.ns2.tbl"
+    withTable(t) {
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
+      spark.sql(
+        s"ALTER TABLE $t ADD PARTITION (id=2) LOCATION 'loc1'")
+
+      assertThrows[PartitionsAlreadyExistException](
+        spark.sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'" +
+          " PARTITION (id=2) LOCATION 'loc1'"))
+
+      val partTable = catalog("testpart").asTableCatalog
+        .loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")).asInstanceOf[InMemoryPartitionTable]
+      assert(!partTable.partitionExists(InternalRow.fromSeq(Seq(1))))
+
+      spark.sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (id=1) LOCATION 'loc'" +
+        " PARTITION (id=2) LOCATION 'loc1'")
+      assert(partTable.partitionExists(InternalRow.fromSeq(Seq(1))))
+      assert(partTable.partitionExists(InternalRow.fromSeq(Seq(2))))
+    }
+  }
+
+  test("ALTER TABLE RENAME PARTITION") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
+      val e = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $t PARTITION (id=1) RENAME TO PARTITION (id=2)")
+      }
+      assert(e.message.contains("ALTER TABLE RENAME PARTITION is only supported with v1 tables"))
+    }
+  }
+
+  test("ALTER TABLE DROP PARTITION") {
+    val t = "testpart.ns1.ns2.tbl"
+    withTable(t) {
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
+      spark.sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'")
+      spark.sql(s"ALTER TABLE $t DROP PARTITION (id=1)")
+
+      val partTable =
+        catalog("testpart").asTableCatalog.loadTable(Identifier.of(Array("ns1", "ns2"), "tbl"))
+      assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(1))))
+    }
+  }
+
+  test("ALTER TABLE DROP PARTITIONS") {
+    val t = "testpart.ns1.ns2.tbl"
+    withTable(t) {
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
+      spark.sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (id=1) LOCATION 'loc'" +
+        " PARTITION (id=2) LOCATION 'loc1'")
+      spark.sql(s"ALTER TABLE $t DROP PARTITION (id=1), PARTITION (id=2)")
+
+      val partTable =
+        catalog("testpart").asTableCatalog.loadTable(Identifier.of(Array("ns1", "ns2"), "tbl"))
+      assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(1))))
+      assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(2))))
+      assert(partTable.asPartitionable.listPartitionIdentifiers(InternalRow.empty).isEmpty)
+    }
+  }
+
+  test("ALTER TABLE DROP PARTITIONS: partition not exists") {
+    val t = "testpart.ns1.ns2.tbl"
+    withTable(t) {
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
+      spark.sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'")
+
+      assertThrows[NoSuchPartitionsException](
+        spark.sql(s"ALTER TABLE $t DROP PARTITION (id=1), PARTITION (id=2)"))
+
+      val partTable =
+        catalog("testpart").asTableCatalog.loadTable(Identifier.of(Array("ns1", "ns2"), "tbl"))
+      assert(partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(1))))
+
+      spark.sql(s"ALTER TABLE $t DROP IF EXISTS PARTITION (id=1), PARTITION (id=2)")
+      assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(1))))
+      assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(2))))
+      assert(partTable.asPartitionable.listPartitionIdentifiers(InternalRow.empty).isEmpty)
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 68de55f03ba83..c480df323ddc2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -24,7 +24,8 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NamespaceAlreadyExistsException, NoSuchDatabaseException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NamespaceAlreadyExistsException, NoSuchDatabaseException, NoSuchNamespaceException, NoSuchPartitionException, NoSuchPartitionsException, NoSuchTableException, PartitionsAlreadyExistException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.connector.catalog._
 import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
@@ -39,19 +40,16 @@ import org.apache.spark.util.Utils
 
 class DataSourceV2SQLSuite
   extends InsertIntoTests(supportsDynamicOverwrite = true, includeSQLOnlyTests = true)
-  with AlterTableTests {
+  with AlterTableTests with DatasourceV2SQLBase {
 
   import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+  import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
 
   private val v2Source = classOf[FakeV2Provider].getName
   override protected val v2Format = v2Source
   override protected val catalogAndNamespace = "testcat.ns1.ns2."
   private val defaultUser: String = Utils.getCurrentUserName()
 
-  private def catalog(name: String): CatalogPlugin = {
-    spark.sessionState.catalogManager.catalog(name)
-  }
-
   protected def doInsert(tableName: String, insert: DataFrame, mode: SaveMode): Unit = {
     val tmpView = "tmp_view"
     withTempView(tmpView) {
@@ -72,26 +70,6 @@ class DataSourceV2SQLSuite
     v2Catalog.loadTable(Identifier.of(namespace, nameParts.last))
   }
 
-  before {
-    spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryTableCatalog].getName)
-    spark.conf.set(
-        "spark.sql.catalog.testcat_atomic", classOf[StagingInMemoryTableCatalog].getName)
-    spark.conf.set("spark.sql.catalog.testcat2", classOf[InMemoryTableCatalog].getName)
-    spark.conf.set(
-      V2_SESSION_CATALOG_IMPLEMENTATION.key, classOf[InMemoryTableSessionCatalog].getName)
-
-    val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data")
-    df.createOrReplaceTempView("source")
-    val df2 = spark.createDataFrame(Seq((4L, "d"), (5L, "e"), (6L, "f"))).toDF("id", "data")
-    df2.createOrReplaceTempView("source2")
-  }
-
-  after {
-    spark.sessionState.catalog.reset()
-    spark.sessionState.catalogManager.reset()
-    spark.sessionState.conf.clear()
-  }
-
   test("CreateTable: use v2 plan because catalog is set") {
     spark.sql("CREATE TABLE testcat.table_name (id bigint NOT NULL, data string) USING foo")
 
@@ -2011,13 +1989,18 @@ class DataSourceV2SQLSuite
   }
 
   test("ALTER TABLE ADD PARTITION") {
-    val t = "testcat.ns1.ns2.tbl"
+    val t = "testpart.ns1.ns2.tbl"
     withTable(t) {
       spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
-      val e = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'")
-      }
-      assert(e.message.contains("ALTER TABLE ADD PARTITION is only supported with v1 tables"))
+      spark.sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'")
+
+      val partTable = catalog("testpart").asTableCatalog
+        .loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")).asInstanceOf[InMemoryPartitionTable]
+      assert(partTable.partitionExists(InternalRow.fromSeq(Seq(1))))
+
+      val partMetadata = partTable.loadPartitionMetadata(InternalRow.fromSeq(Seq(1)))
+      assert(partMetadata.containsKey("location"))
+      assert(partMetadata.get("location") == "loc")
     }
   }
 
@@ -2032,14 +2015,16 @@ class DataSourceV2SQLSuite
     }
   }
 
-  test("ALTER TABLE DROP PARTITIONS") {
-    val t = "testcat.ns1.ns2.tbl"
+  test("ALTER TABLE DROP PARTITION") {
+    val t = "testpart.ns1.ns2.tbl"
     withTable(t) {
       spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
-      val e = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $t DROP PARTITION (id=1)")
-      }
-      assert(e.message.contains("ALTER TABLE DROP PARTITION is only supported with v1 tables"))
+      spark.sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'")
+      spark.sql(s"ALTER TABLE $t DROP PARTITION (id=1)")
+
+      val partTable =
+        catalog("testpart").asTableCatalog.loadTable(Identifier.of(Array("ns1", "ns2"), "tbl"))
+      assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(1))))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DatasourceV2SQLBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DatasourceV2SQLBase.scala
new file mode 100644
index 0000000000000..8922eea8e0ae6
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DatasourceV2SQLBase.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.connector.catalog.CatalogPlugin
+import org.apache.spark.sql.internal.SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION
+import org.apache.spark.sql.test.SharedSparkSession
+
+trait DatasourceV2SQLBase
+  extends QueryTest with SharedSparkSession with BeforeAndAfter {
+
+  protected def catalog(name: String): CatalogPlugin = {
+    spark.sessionState.catalogManager.catalog(name)
+  }
+
+  before {
+    spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryTableCatalog].getName)
+    spark.conf.set("spark.sql.catalog.testpart", classOf[InMemoryPartitionTableCatalog].getName)
+    spark.conf.set(
+      "spark.sql.catalog.testcat_atomic", classOf[StagingInMemoryTableCatalog].getName)
+    spark.conf.set("spark.sql.catalog.testcat2", classOf[InMemoryTableCatalog].getName)
+    spark.conf.set(
+      V2_SESSION_CATALOG_IMPLEMENTATION.key, classOf[InMemoryTableSessionCatalog].getName)
+
+    val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data")
+    df.createOrReplaceTempView("source")
+    val df2 = spark.createDataFrame(Seq((4L, "d"), (5L, "e"), (6L, "f"))).toDF("id", "data")
+    df2.createOrReplaceTempView("source2")
+  }
+
+  after {
+    spark.sessionState.catalog.reset()
+    spark.sessionState.catalogManager.reset()
+    spark.sessionState.conf.clear()
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index f5d6ea929a9aa..87a5cb9f73355 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -138,8 +138,6 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
       assertNoSuchTable(s"ALTER TABLE $viewName SET SERDE 'whatever'")
       assertNoSuchTable(s"ALTER TABLE $viewName PARTITION (a=1, b=2) SET SERDE 'whatever'")
       assertNoSuchTable(s"ALTER TABLE $viewName SET SERDEPROPERTIES ('p' = 'an')")
-      assertNoSuchTable(s"ALTER TABLE $viewName ADD IF NOT EXISTS PARTITION (a='4', b='8')")
-      assertNoSuchTable(s"ALTER TABLE $viewName DROP PARTITION (a='4', b='8')")
       assertNoSuchTable(s"ALTER TABLE $viewName PARTITION (a='4') RENAME TO PARTITION (a='5')")
       assertNoSuchTable(s"ALTER TABLE $viewName RECOVER PARTITIONS")
 
@@ -147,6 +145,12 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
       assertAnalysisError(
         s"ALTER TABLE $viewName SET LOCATION '/path/to/your/lovely/heart'",
         s"'$viewName' is a view not a table")
+      assertAnalysisError(
+        s"ALTER TABLE $viewName ADD IF NOT EXISTS PARTITION (a='4', b='8')",
+        s"$viewName is a temp view not table")
+      assertAnalysisError(
+        s"ALTER TABLE $viewName DROP PARTITION (a='4', b='8')",
+        s"$viewName is a temp view not table")
 
       // For the following v2 ALERT TABLE statements, unsupported operations are checked first
       // before resolving the relations.
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 44c551cf4a4c1..1f15bd685b239 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -815,6 +815,11 @@ class HiveDDLSuite
     }
   }
 
+  private def assertAnalysisError(sqlText: String, message: String): Unit = {
+    val e = intercept[AnalysisException](sql(sqlText))
+    assert(e.message.contains(message))
+  }
+
   private def assertErrorForAlterTableOnView(sqlText: String): Unit = {
     val message = intercept[AnalysisException](sql(sqlText)).getMessage
     assert(message.contains("Cannot alter a view with ALTER TABLE. Please use ALTER VIEW instead"))
@@ -892,16 +897,18 @@ class HiveDDLSuite
         assertErrorForAlterTableOnView(
           s"ALTER TABLE $oldViewName PARTITION (a=1, b=2) SET SERDEPROPERTIES ('x' = 'y')")
 
-        assertErrorForAlterTableOnView(
-          s"ALTER TABLE $oldViewName ADD IF NOT EXISTS PARTITION (a='4', b='8')")
-
-        assertErrorForAlterTableOnView(s"ALTER TABLE $oldViewName DROP IF EXISTS PARTITION (a='2')")
-
         assertErrorForAlterTableOnView(s"ALTER TABLE $oldViewName RECOVER PARTITIONS")
 
         assertErrorForAlterTableOnView(
           s"ALTER TABLE $oldViewName PARTITION (a='1') RENAME TO PARTITION (a='100')")
 
+        assertAnalysisError(
+          s"ALTER TABLE $oldViewName ADD IF NOT EXISTS PARTITION (a='4', b='8')",
+          s"$oldViewName is a view not table")
+        assertAnalysisError(
+          s"ALTER TABLE $oldViewName DROP IF EXISTS PARTITION (a='2')",
+          s"$oldViewName is a view not table")
+
         assert(catalog.tableExists(TableIdentifier(tabName)))
         assert(catalog.tableExists(TableIdentifier(oldViewName)))
         assert(!catalog.tableExists(TableIdentifier(newViewName)))

From 4b76a74f1c0b5d9bd11794eefd739352764c88c4 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Thu, 12 Nov 2020 00:13:17 +0900
Subject: [PATCH 0446/1009] [SPARK-33415][PYTHON][SQL] Don't encode JVM
 response in Column.__repr__
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

Removes encoding of the JVM response in `pyspark.sql.column.Column.__repr__`.

### Why are the changes needed?

API consistency and improved readability of the expressions.

### Does this PR introduce _any_ user-facing change?

Before this change

    col("abc")
    col("wąż")

result in

    Column<b'abc'>
    Column<b'w\xc4\x85\xc5\xbc'>

After this change we'll get

    Column<'abc'>
    Column<'wąż'>

### How was this patch tested?

Existing tests and manual inspection.

Closes #30322 from zero323/SPARK-33415.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/sql/column.py            | 2 +-
 python/pyspark/sql/tests/test_column.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 3dd08d88e92c4..345e81bd2d73e 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -906,7 +906,7 @@ def __nonzero__(self):
     __bool__ = __nonzero__
 
     def __repr__(self):
-        return 'Column<%s>' % self._jc.toString().encode('utf8')
+        return "Column<'%s'>" % self._jc.toString()
 
 
 def _test():
diff --git a/python/pyspark/sql/tests/test_column.py b/python/pyspark/sql/tests/test_column.py
index 4b4ac3bf9cd6c..4a9c7106a12b0 100644
--- a/python/pyspark/sql/tests/test_column.py
+++ b/python/pyspark/sql/tests/test_column.py
@@ -116,6 +116,7 @@ def test_column_name_with_non_ascii(self):
         self.assertEqual([("数量", 'bigint')], df.dtypes)
         self.assertEqual(1, df.select("数量").first()[0])
         self.assertEqual(1, df.select(df["数量"]).first()[0])
+        self.assertTrue(columnName in repr(df[columnName]))
 
     def test_field_accessor(self):
         df = self.sc.parallelize([Row(l=[1], r=Row(a=1, b="b"), d={"k": "v"})]).toDF()

From 7e867298fed60db670e40013524ed41b1ab46215 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Wed, 11 Nov 2020 08:50:43 -0800
Subject: [PATCH 0447/1009] [SPARK-33404][SQL][FOLLOWUP] Update benchmark
 results for `date_trunc`

### What changes were proposed in this pull request?
Updated results of `DateTimeBenchmark` in the environment:

| Item | Description |
| ---- | ----|
| Region | us-west-2 (Oregon) |
| Instance | r3.xlarge (spot instance) |
| AMI | ami-06f2f779464715dc5 (ubuntu/images/hvm-ssd/ubuntu-bionic-18.04-amd64-server-20190722.1) |
| Java | OpenJDK8/11 installed by`sudo add-apt-repository ppa:openjdk-r/ppa` & `sudo apt install openjdk-11-jdk`|

### Why are the changes needed?
The fix https://github.com/apache/spark/pull/30303 slowed down `date_trunc`. This PR updates benchmark results to have actual info about performance of `date_trunc`.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By regenerating benchmark results:
```
$ SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain org.apache.spark.sql.execution.benchmark.DateTimeBenchmark"
```

Closes #30338 from MaxGekk/fix-trunc_date-benchmark.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../DateTimeBenchmark-jdk11-results.txt       | 372 +++++++++---------
 .../benchmarks/DateTimeBenchmark-results.txt  | 372 +++++++++---------
 2 files changed, 372 insertions(+), 372 deletions(-)

diff --git a/sql/core/benchmarks/DateTimeBenchmark-jdk11-results.txt b/sql/core/benchmarks/DateTimeBenchmark-jdk11-results.txt
index d84dccbf6c266..b787eff7029e6 100644
--- a/sql/core/benchmarks/DateTimeBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/DateTimeBenchmark-jdk11-results.txt
@@ -2,460 +2,460 @@
 datetime +/- interval
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 datetime +/- interval:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date + interval(m)                                 1550           1609          83          6.5         155.0       1.0X
-date + interval(m, d)                              1572           1575           5          6.4         157.2       1.0X
-date + interval(m, d, ms)                          6512           6512           0          1.5         651.2       0.2X
-date - interval(m)                                 1469           1489          28          6.8         146.9       1.1X
-date - interval(m, d)                              1558           1572          19          6.4         155.8       1.0X
-date - interval(m, d, ms)                          6602           6605           4          1.5         660.2       0.2X
-timestamp + interval(m)                            2945           2961          23          3.4         294.5       0.5X
-timestamp + interval(m, d)                         3075           3083          12          3.3         307.5       0.5X
-timestamp + interval(m, d, ms)                     3421           3430          13          2.9         342.1       0.5X
-timestamp - interval(m)                            3050           3061          17          3.3         305.0       0.5X
-timestamp - interval(m, d)                         3195           3201           8          3.1         319.5       0.5X
-timestamp - interval(m, d, ms)                     3442           3450          11          2.9         344.2       0.5X
+date + interval(m)                                 1556           1667         157          6.4         155.6       1.0X
+date + interval(m, d)                              1582           1593          16          6.3         158.2       1.0X
+date + interval(m, d, ms)                          6619           6625           9          1.5         661.9       0.2X
+date - interval(m)                                 1463           1475          16          6.8         146.3       1.1X
+date - interval(m, d)                              1569           1589          29          6.4         156.9       1.0X
+date - interval(m, d, ms)                          6638           6641           5          1.5         663.8       0.2X
+timestamp + interval(m)                            3153           3159           7          3.2         315.3       0.5X
+timestamp + interval(m, d)                         3230           3234           7          3.1         323.0       0.5X
+timestamp + interval(m, d, ms)                     3309           3313           5          3.0         330.9       0.5X
+timestamp - interval(m)                            2897           2900           4          3.5         289.7       0.5X
+timestamp - interval(m, d)                         3018           3019           1          3.3         301.8       0.5X
+timestamp - interval(m, d, ms)                     3313           3317           5          3.0         331.3       0.5X
 
 
 ================================================================================================
 Extract components
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 cast to timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp wholestage off                    320            326           8         31.2          32.0       1.0X
-cast to timestamp wholestage on                     289            297           5         34.6          28.9       1.1X
+cast to timestamp wholestage off                    314            319           7         31.8          31.4       1.0X
+cast to timestamp wholestage on                     289            305          12         34.6          28.9       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 year of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-year of timestamp wholestage off                   1266           1266           1          7.9         126.6       1.0X
-year of timestamp wholestage on                    1233           1253          15          8.1         123.3       1.0X
+year of timestamp wholestage off                   1237           1247          14          8.1         123.7       1.0X
+year of timestamp wholestage on                    1242           1251          11          8.0         124.2       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 quarter of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-quarter of timestamp wholestage off                1594           1600           8          6.3         159.4       1.0X
-quarter of timestamp wholestage on                 1529           1532           3          6.5         152.9       1.0X
+quarter of timestamp wholestage off                1589           1590           2          6.3         158.9       1.0X
+quarter of timestamp wholestage on                 1541           1556          11          6.5         154.1       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 month of timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-month of timestamp wholestage off                  1239           1257          25          8.1         123.9       1.0X
-month of timestamp wholestage on                   1235           1243           5          8.1         123.5       1.0X
+month of timestamp wholestage off                  1236           1252          23          8.1         123.6       1.0X
+month of timestamp wholestage on                   1226           1232           5          8.2         122.6       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 weekofyear of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekofyear of timestamp wholestage off             2209           2216           9          4.5         220.9       1.0X
-weekofyear of timestamp wholestage on              1831           1838           9          5.5         183.1       1.2X
+weekofyear of timestamp wholestage off             1877           1879           3          5.3         187.7       1.0X
+weekofyear of timestamp wholestage on              1852           1872          28          5.4         185.2       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 day of timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-day of timestamp wholestage off                    1238           1238           0          8.1         123.8       1.0X
-day of timestamp wholestage on                     1223           1235          12          8.2         122.3       1.0X
+day of timestamp wholestage off                    1260           1262           3          7.9         126.0       1.0X
+day of timestamp wholestage on                     1230           1238           9          8.1         123.0       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 dayofyear of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofyear of timestamp wholestage off              1302           1304           3          7.7         130.2       1.0X
-dayofyear of timestamp wholestage on               1269           1276           6          7.9         126.9       1.0X
+dayofyear of timestamp wholestage off              1281           1285           7          7.8         128.1       1.0X
+dayofyear of timestamp wholestage on               1268           1272           6          7.9         126.8       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 dayofmonth of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofmonth of timestamp wholestage off             1251           1253           3          8.0         125.1       1.0X
-dayofmonth of timestamp wholestage on              1225           1232           9          8.2         122.5       1.0X
+dayofmonth of timestamp wholestage off             1280           1287           9          7.8         128.0       1.0X
+dayofmonth of timestamp wholestage on              1232           1237           5          8.1         123.2       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 dayofweek of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofweek of timestamp wholestage off              1424           1424           1          7.0         142.4       1.0X
-dayofweek of timestamp wholestage on               1385           1389           4          7.2         138.5       1.0X
+dayofweek of timestamp wholestage off              1417           1419           4          7.1         141.7       1.0X
+dayofweek of timestamp wholestage on               1419           1435          19          7.0         141.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 weekday of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekday of timestamp wholestage off                1366           1366           0          7.3         136.6       1.0X
-weekday of timestamp wholestage on                 1320           1325           5          7.6         132.0       1.0X
+weekday of timestamp wholestage off                1353           1359           8          7.4         135.3       1.0X
+weekday of timestamp wholestage on                 1338           1345           7          7.5         133.8       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 hour of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-hour of timestamp wholestage off                    985            986           1         10.2          98.5       1.0X
-hour of timestamp wholestage on                     974            981          10         10.3          97.4       1.0X
+hour of timestamp wholestage off                    985            998          17         10.1          98.5       1.0X
+hour of timestamp wholestage on                     935            938           3         10.7          93.5       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 minute of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-minute of timestamp wholestage off                 1044           1047           5          9.6         104.4       1.0X
-minute of timestamp wholestage on                   984            994          17         10.2          98.4       1.1X
+minute of timestamp wholestage off                 1053           1053           0          9.5         105.3       1.0X
+minute of timestamp wholestage on                   934            940           9         10.7          93.4       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 second of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-second of timestamp wholestage off                  999           1003           6         10.0          99.9       1.0X
-second of timestamp wholestage on                   961            974           8         10.4          96.1       1.0X
+second of timestamp wholestage off                  978            983           7         10.2          97.8       1.0X
+second of timestamp wholestage on                   935            944           9         10.7          93.5       1.0X
 
 
 ================================================================================================
 Current date and time
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 current_date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_date wholestage off                         297            302           7         33.6          29.7       1.0X
-current_date wholestage on                          270            283          22         37.1          27.0       1.1X
+current_date wholestage off                         297            299           2         33.6          29.7       1.0X
+current_date wholestage on                          273            283          11         36.6          27.3       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 current_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_timestamp wholestage off                    302            310          11         33.1          30.2       1.0X
-current_timestamp wholestage on                     264            351          98         37.9          26.4       1.1X
+current_timestamp wholestage off                    300            365          92         33.4          30.0       1.0X
+current_timestamp wholestage on                     276            381          91         36.3          27.6       1.1X
 
 
 ================================================================================================
 Date arithmetic
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 cast to date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date wholestage off                        1083           1083           1          9.2         108.3       1.0X
-cast to date wholestage on                         1040           1044           5          9.6         104.0       1.0X
+cast to date wholestage off                        1073           1087          20          9.3         107.3       1.0X
+cast to date wholestage on                         1009           1016           7          9.9         100.9       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 last_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-last_day wholestage off                            1258           1258           0          7.9         125.8       1.0X
-last_day wholestage on                             1244           1254           8          8.0         124.4       1.0X
+last_day wholestage off                            1253           1254           2          8.0         125.3       1.0X
+last_day wholestage on                             1247           1257          10          8.0         124.7       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 next_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-next_day wholestage off                            1133           1135           3          8.8         113.3       1.0X
-next_day wholestage on                             1093           1100           7          9.1         109.3       1.0X
+next_day wholestage off                            1150           1150           1          8.7         115.0       1.0X
+next_day wholestage on                             1061           1066           5          9.4         106.1       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_add:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_add wholestage off                            1065           1074          14          9.4         106.5       1.0X
-date_add wholestage on                             1044           1053           6          9.6         104.4       1.0X
+date_add wholestage off                            1062           1068           9          9.4         106.2       1.0X
+date_add wholestage on                             1049           1056           8          9.5         104.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_sub:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_sub wholestage off                            1069           1076           9          9.4         106.9       1.0X
-date_sub wholestage on                             1047           1052           8          9.6         104.7       1.0X
+date_sub wholestage off                            1063           1067           6          9.4         106.3       1.0X
+date_sub wholestage on                             1043           1061          26          9.6         104.3       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 add_months:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-add_months wholestage off                          1417           1430          18          7.1         141.7       1.0X
-add_months wholestage on                           1439           1445           5          6.9         143.9       1.0X
+add_months wholestage off                          1427           1434          10          7.0         142.7       1.0X
+add_months wholestage on                           1436           1449          11          7.0         143.6       1.0X
 
 
 ================================================================================================
 Formatting dates
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 format date:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-format date wholestage off                         5228           5232           6          1.9         522.8       1.0X
-format date wholestage on                          5172           5193          17          1.9         517.2       1.0X
+format date wholestage off                         5200           5214          19          1.9         520.0       1.0X
+format date wholestage on                          5404           5424          14          1.9         540.4       1.0X
 
 
 ================================================================================================
 Formatting timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 from_unixtime:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_unixtime wholestage off                       6941           6952          16          1.4         694.1       1.0X
-from_unixtime wholestage on                        6898           6926          32          1.4         689.8       1.0X
+from_unixtime wholestage off                       7493           7494           2          1.3         749.3       1.0X
+from_unixtime wholestage on                        7506           7514           7          1.3         750.6       1.0X
 
 
 ================================================================================================
 Convert timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 from_utc_timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_utc_timestamp wholestage off                  1339           1342           5          7.5         133.9       1.0X
-from_utc_timestamp wholestage on                   1285           1292           5          7.8         128.5       1.0X
+from_utc_timestamp wholestage off                  1314           1317           4          7.6         131.4       1.0X
+from_utc_timestamp wholestage on                   1273           1279           6          7.9         127.3       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 to_utc_timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_utc_timestamp wholestage off                    1697           1717          29          5.9         169.7       1.0X
-to_utc_timestamp wholestage on                     1656           1665          13          6.0         165.6       1.0X
+to_utc_timestamp wholestage off                    1751           1752           1          5.7         175.1       1.0X
+to_utc_timestamp wholestage on                     1711           1716           6          5.8         171.1       1.0X
 
 
 ================================================================================================
 Intervals
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 cast interval:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast interval wholestage off                        333            344          16         30.1          33.3       1.0X
-cast interval wholestage on                         288            290           2         34.7          28.8       1.2X
+cast interval wholestage off                        332            337           7         30.1          33.2       1.0X
+cast interval wholestage on                         288            289           1         34.7          28.8       1.2X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 datediff:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-datediff wholestage off                            1857           1860           4          5.4         185.7       1.0X
-datediff wholestage on                             1795           1808          10          5.6         179.5       1.0X
+datediff wholestage off                            1850           1852           3          5.4         185.0       1.0X
+datediff wholestage on                             1783           1791           5          5.6         178.3       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 months_between:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-months_between wholestage off                      5826           5834          11          1.7         582.6       1.0X
-months_between wholestage on                       5737           5763          18          1.7         573.7       1.0X
+months_between wholestage off                      5540           5545           8          1.8         554.0       1.0X
+months_between wholestage on                       5474           5482           8          1.8         547.4       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 window:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-window wholestage off                              2220           2246          36          0.5        2220.4       1.0X
-window wholestage on                              46696          46794          89          0.0       46696.1       0.0X
+window wholestage off                              2200           2309         154          0.5        2200.0       1.0X
+window wholestage on                              47429          47483          35          0.0       47428.8       0.0X
 
 
 ================================================================================================
 Truncation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_trunc YEAR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YEAR wholestage off                     2658           2659           1          3.8         265.8       1.0X
-date_trunc YEAR wholestage on                      2691           2700           8          3.7         269.1       1.0X
+date_trunc YEAR wholestage off                     2587           2591           5          3.9         258.7       1.0X
+date_trunc YEAR wholestage on                      2531           2548          11          4.0         253.1       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_trunc YYYY:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YYYY wholestage off                     2671           2679          11          3.7         267.1       1.0X
-date_trunc YYYY wholestage on                      2700           2706           6          3.7         270.0       1.0X
+date_trunc YYYY wholestage off                     2595           2596           1          3.9         259.5       1.0X
+date_trunc YYYY wholestage on                      2532           2537           9          3.9         253.2       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_trunc YY:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YY wholestage off                       2674           2689          20          3.7         267.4       1.0X
-date_trunc YY wholestage on                        2697           2716          17          3.7         269.7       1.0X
+date_trunc YY wholestage off                       2604           2604           1          3.8         260.4       1.0X
+date_trunc YY wholestage on                        2529           2539           7          4.0         252.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_trunc MON:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MON wholestage off                      2695           2700           7          3.7         269.5       1.0X
-date_trunc MON wholestage on                       2711           2722          11          3.7         271.1       1.0X
+date_trunc MON wholestage off                      2601           2606           7          3.8         260.1       1.0X
+date_trunc MON wholestage on                       2544           2551           5          3.9         254.4       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_trunc MONTH:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MONTH wholestage off                    2682           2685           4          3.7         268.2       1.0X
-date_trunc MONTH wholestage on                     2709           2727          15          3.7         270.9       1.0X
+date_trunc MONTH wholestage off                    2596           2597           1          3.9         259.6       1.0X
+date_trunc MONTH wholestage on                     2547           2552           8          3.9         254.7       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_trunc MM:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MM wholestage off                       2683           2693          14          3.7         268.3       1.0X
-date_trunc MM wholestage on                        2706           2722          16          3.7         270.6       1.0X
+date_trunc MM wholestage off                       2598           2598           1          3.8         259.8       1.0X
+date_trunc MM wholestage on                        2545           2550           5          3.9         254.5       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_trunc DAY:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DAY wholestage off                      2292           2299          10          4.4         229.2       1.0X
-date_trunc DAY wholestage on                       2290           2311          14          4.4         229.0       1.0X
+date_trunc DAY wholestage off                      2248           2249           2          4.4         224.8       1.0X
+date_trunc DAY wholestage on                       2215           2222           6          4.5         221.5       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_trunc DD:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DD wholestage off                       2302           2309           9          4.3         230.2       1.0X
-date_trunc DD wholestage on                        2282           2292           6          4.4         228.2       1.0X
+date_trunc DD wholestage off                       2244           2251           9          4.5         224.4       1.0X
+date_trunc DD wholestage on                        2214           2220           6          4.5         221.4       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_trunc HOUR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc HOUR wholestage off                     2288           2288           0          4.4         228.8       1.0X
-date_trunc HOUR wholestage on                      2277           2290          14          4.4         227.7       1.0X
+date_trunc HOUR wholestage off                     2208           2211           3          4.5         220.8       1.0X
+date_trunc HOUR wholestage on                      2228           2233           3          4.5         222.8       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_trunc MINUTE:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MINUTE wholestage off                    400            419          26         25.0          40.0       1.0X
-date_trunc MINUTE wholestage on                     401            405           4         24.9          40.1       1.0X
+date_trunc MINUTE wholestage off                   2230           2238          11          4.5         223.0       1.0X
+date_trunc MINUTE wholestage on                    2217           2225          11          4.5         221.7       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_trunc SECOND:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc SECOND wholestage off                    408            414           9         24.5          40.8       1.0X
-date_trunc SECOND wholestage on                     408            413           8         24.5          40.8       1.0X
+date_trunc SECOND wholestage off                    353            362          12         28.3          35.3       1.0X
+date_trunc SECOND wholestage on                     333            336           3         30.0          33.3       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_trunc WEEK:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc WEEK wholestage off                     2623           2631          12          3.8         262.3       1.0X
-date_trunc WEEK wholestage on                      2613           2621           8          3.8         261.3       1.0X
+date_trunc WEEK wholestage off                     2473           2478           7          4.0         247.3       1.0X
+date_trunc WEEK wholestage on                      2439           2462          33          4.1         243.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_trunc QUARTER:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc QUARTER wholestage off                  3518           3520           3          2.8         351.8       1.0X
-date_trunc QUARTER wholestage on                   3501           3510          11          2.9         350.1       1.0X
+date_trunc QUARTER wholestage off                  3163           3165           3          3.2         316.3       1.0X
+date_trunc QUARTER wholestage on                   3129           3142          13          3.2         312.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 trunc year:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc year wholestage off                           315            333          26         31.8          31.5       1.0X
-trunc year wholestage on                            352            360           7         28.4          35.2       0.9X
+trunc year wholestage off                           309            311           3         32.4          30.9       1.0X
+trunc year wholestage on                            325            332           4         30.8          32.5       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 trunc yyyy:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yyyy wholestage off                           321            321           1         31.2          32.1       1.0X
-trunc yyyy wholestage on                            354            358           5         28.3          35.4       0.9X
+trunc yyyy wholestage off                           319            320           2         31.4          31.9       1.0X
+trunc yyyy wholestage on                            324            328           4         30.9          32.4       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 trunc yy:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yy wholestage off                             312            313           1         32.0          31.2       1.0X
-trunc yy wholestage on                              355            360           5         28.2          35.5       0.9X
+trunc yy wholestage off                             311            313           3         32.2          31.1       1.0X
+trunc yy wholestage on                              324            330           4         30.8          32.4       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 trunc mon:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mon wholestage off                            324            327           4         30.9          32.4       1.0X
-trunc mon wholestage on                             355            357           2         28.2          35.5       0.9X
+trunc mon wholestage off                            310            313           4         32.2          31.0       1.0X
+trunc mon wholestage on                             326            329           4         30.7          32.6       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 trunc month:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc month wholestage off                          313            318           8         32.0          31.3       1.0X
-trunc month wholestage on                           354            358           5         28.3          35.4       0.9X
+trunc month wholestage off                          308            318          13         32.4          30.8       1.0X
+trunc month wholestage on                           324            326           3         30.9          32.4       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 trunc mm:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mm wholestage off                             314            325          15         31.8          31.4       1.0X
-trunc mm wholestage on                              353            366          17         28.4          35.3       0.9X
+trunc mm wholestage off                             309            314           7         32.4          30.9       1.0X
+trunc mm wholestage on                              323            329           5         31.0          32.3       1.0X
 
 
 ================================================================================================
 Parsing
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 to timestamp str:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to timestamp str wholestage off                     168            169           0          5.9         168.4       1.0X
-to timestamp str wholestage on                      168            173           7          6.0         167.6       1.0X
+to timestamp str wholestage off                     172            174           2          5.8         172.4       1.0X
+to timestamp str wholestage on                      171            174           4          5.9         170.6       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 to_timestamp:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_timestamp wholestage off                        1390           1390           0          0.7        1389.8       1.0X
-to_timestamp wholestage on                         1204           1215          11          0.8        1204.2       1.2X
+to_timestamp wholestage off                        1410           1411           2          0.7        1410.4       1.0X
+to_timestamp wholestage on                         1364           1375          10          0.7        1364.4       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 to_unix_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_unix_timestamp wholestage off                   1277           1281           4          0.8        1277.5       1.0X
-to_unix_timestamp wholestage on                    1203           1213          11          0.8        1202.6       1.1X
+to_unix_timestamp wholestage off                   1449           1453           6          0.7        1449.2       1.0X
+to_unix_timestamp wholestage on                    1379           1389           9          0.7        1379.5       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 to date str:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to date str wholestage off                          218            219           1          4.6         218.2       1.0X
-to date str wholestage on                           211            214           5          4.7         210.8       1.0X
+to date str wholestage off                          228            231           4          4.4         228.1       1.0X
+to date str wholestage on                           211            213           1          4.7         210.6       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 to_date:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_date wholestage off                             3016           3041          35          0.3        3016.1       1.0X
-to_date wholestage on                              3015           3023           9          0.3        3014.6       1.0X
+to_date wholestage off                             3147           3173          37          0.3        3147.0       1.0X
+to_date wholestage on                              3123           3137          13          0.3        3123.0       1.0X
 
 
 ================================================================================================
 Conversion from/to external types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.7+10-post-Ubuntu-2ubuntu218.04 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 11.0.9+11-Ubuntu-0ubuntu1.18.04.1 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 To/from Java's date-time:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-From java.sql.Date                                  430            442          18         11.6          86.0       1.0X
-From java.time.LocalDate                            351            354           3         14.3          70.2       1.2X
-Collect java.sql.Date                              2095           2853         733          2.4         418.9       0.2X
-Collect java.time.LocalDate                        1691           1910         209          3.0         338.3       0.3X
-From java.sql.Timestamp                             276            280           4         18.1          55.2       1.6X
-From java.time.Instant                              324            328           4         15.4          64.8       1.3X
-Collect longs                                      1348           1450         126          3.7         269.5       0.3X
-Collect java.sql.Timestamp                         1441           1478          62          3.5         288.3       0.3X
-Collect java.time.Instant                          1471           1579         100          3.4         294.3       0.3X
-java.sql.Date to Hive string                      12049          12909         862          0.4        2409.8       0.0X
-java.time.LocalDate to Hive string                12045          12130          74          0.4        2408.9       0.0X
-java.sql.Timestamp to Hive string                 12854          13376         510          0.4        2570.9       0.0X
-java.time.Instant to Hive string                  15057          15184         115          0.3        3011.4       0.0X
+From java.sql.Date                                  403            414          13         12.4          80.6       1.0X
+From java.time.LocalDate                            342            346           4         14.6          68.4       1.2X
+Collect java.sql.Date                              2122           2549         639          2.4         424.4       0.2X
+Collect java.time.LocalDate                        1833           2034         175          2.7         366.5       0.2X
+From java.sql.Timestamp                             244            250           6         20.5          48.8       1.7X
+From java.time.Instant                              315            316           1         15.9          63.0       1.3X
+Collect longs                                      1436           1452          19          3.5         287.2       0.3X
+Collect java.sql.Timestamp                         1685           1698          14          3.0         337.0       0.2X
+Collect java.time.Instant                          1722           2022         278          2.9         344.4       0.2X
+java.sql.Date to Hive string                      14996          16316        1670          0.3        2999.2       0.0X
+java.time.LocalDate to Hive string                13774          13942         160          0.4        2754.8       0.0X
+java.sql.Timestamp to Hive string                 15346          15775         435          0.3        3069.3       0.0X
+java.time.Instant to Hive string                  17731          18153         444          0.3        3546.1       0.0X
 
 
diff --git a/sql/core/benchmarks/DateTimeBenchmark-results.txt b/sql/core/benchmarks/DateTimeBenchmark-results.txt
index ebfcb45f30ce0..8e22dbbd8b8b3 100644
--- a/sql/core/benchmarks/DateTimeBenchmark-results.txt
+++ b/sql/core/benchmarks/DateTimeBenchmark-results.txt
@@ -2,460 +2,460 @@
 datetime +/- interval
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 datetime +/- interval:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date + interval(m)                                 1636           1653          24          6.1         163.6       1.0X
-date + interval(m, d)                              1802           1818          23          5.5         180.2       0.9X
-date + interval(m, d, ms)                          6330           6348          26          1.6         633.0       0.3X
-date - interval(m)                                 1462           1484          32          6.8         146.2       1.1X
-date - interval(m, d)                              1732           1732           1          5.8         173.2       0.9X
-date - interval(m, d, ms)                          6494           6505          16          1.5         649.4       0.3X
-timestamp + interval(m)                            2446           2446           0          4.1         244.6       0.7X
-timestamp + interval(m, d)                         2670           2703          46          3.7         267.0       0.6X
-timestamp + interval(m, d, ms)                     2992           3012          29          3.3         299.2       0.5X
-timestamp - interval(m)                            2447           2449           3          4.1         244.7       0.7X
-timestamp - interval(m, d)                         2739           2739           0          3.7         273.9       0.6X
-timestamp - interval(m, d, ms)                     2977           2983           8          3.4         297.7       0.5X
+date + interval(m)                                 1651           1690          56          6.1         165.1       1.0X
+date + interval(m, d)                              1826           1833          10          5.5         182.6       0.9X
+date + interval(m, d, ms)                          6522           6534          17          1.5         652.2       0.3X
+date - interval(m)                                 1465           1473          12          6.8         146.5       1.1X
+date - interval(m, d)                              1728           1734           9          5.8         172.8       1.0X
+date - interval(m, d, ms)                          6757           6765          12          1.5         675.7       0.2X
+timestamp + interval(m)                            2686           2696          14          3.7         268.6       0.6X
+timestamp + interval(m, d)                         2979           2982           4          3.4         297.9       0.6X
+timestamp + interval(m, d, ms)                     3483           3507          33          2.9         348.3       0.5X
+timestamp - interval(m)                            2856           2858           3          3.5         285.6       0.6X
+timestamp - interval(m, d)                         3167           3169           3          3.2         316.7       0.5X
+timestamp - interval(m, d, ms)                     3475           3477           2          2.9         347.5       0.5X
 
 
 ================================================================================================
 Extract components
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 cast to timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp wholestage off                    312            321          13         32.1          31.2       1.0X
-cast to timestamp wholestage on                     290            311          14         34.5          29.0       1.1X
+cast to timestamp wholestage off                    309            312           5         32.4          30.9       1.0X
+cast to timestamp wholestage on                     292            302           8         34.2          29.2       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 year of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-year of timestamp wholestage off                   1226           1228           3          8.2         122.6       1.0X
-year of timestamp wholestage on                    1214           1222          10          8.2         121.4       1.0X
+year of timestamp wholestage off                   1228           1228           0          8.1         122.8       1.0X
+year of timestamp wholestage on                    1213           1227          18          8.2         121.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 quarter of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-quarter of timestamp wholestage off                1437           1447          14          7.0         143.7       1.0X
-quarter of timestamp wholestage on                 1354           1359           4          7.4         135.4       1.1X
+quarter of timestamp wholestage off                1433           1440           9          7.0         143.3       1.0X
+quarter of timestamp wholestage on                 1344           1349           4          7.4         134.4       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 month of timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-month of timestamp wholestage off                  1219           1219           1          8.2         121.9       1.0X
-month of timestamp wholestage on                   1205           1211           7          8.3         120.5       1.0X
+month of timestamp wholestage off                  1229           1232           5          8.1         122.9       1.0X
+month of timestamp wholestage on                   1201           1207           6          8.3         120.1       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 weekofyear of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekofyear of timestamp wholestage off             1849           1854           7          5.4         184.9       1.0X
-weekofyear of timestamp wholestage on              1829           1835           5          5.5         182.9       1.0X
+weekofyear of timestamp wholestage off             1921           1931          14          5.2         192.1       1.0X
+weekofyear of timestamp wholestage on              1864           1881          16          5.4         186.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 day of timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-day of timestamp wholestage off                    1224           1230           8          8.2         122.4       1.0X
-day of timestamp wholestage on                     1204           1215          10          8.3         120.4       1.0X
+day of timestamp wholestage off                    1223           1225           2          8.2         122.3       1.0X
+day of timestamp wholestage on                     1204           1215           7          8.3         120.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 dayofyear of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofyear of timestamp wholestage off              1272           1275           5          7.9         127.2       1.0X
-dayofyear of timestamp wholestage on               1246           1256           7          8.0         124.6       1.0X
+dayofyear of timestamp wholestage off              1261           1266           8          7.9         126.1       1.0X
+dayofyear of timestamp wholestage on               1236           1260          15          8.1         123.6       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 dayofmonth of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofmonth of timestamp wholestage off             1226           1233          11          8.2         122.6       1.0X
-dayofmonth of timestamp wholestage on              1205           1211           5          8.3         120.5       1.0X
+dayofmonth of timestamp wholestage off             1243           1250          10          8.0         124.3       1.0X
+dayofmonth of timestamp wholestage on              1203           1214          11          8.3         120.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 dayofweek of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofweek of timestamp wholestage off              1420           1427           9          7.0         142.0       1.0X
-dayofweek of timestamp wholestage on               1375           1385          11          7.3         137.5       1.0X
+dayofweek of timestamp wholestage off              1400           1409          13          7.1         140.0       1.0X
+dayofweek of timestamp wholestage on               1374           1385          10          7.3         137.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 weekday of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekday of timestamp wholestage off                1345           1347           3          7.4         134.5       1.0X
-weekday of timestamp wholestage on                 1316           1322           5          7.6         131.6       1.0X
+weekday of timestamp wholestage off                1355           1358           4          7.4         135.5       1.0X
+weekday of timestamp wholestage on                 1319           1328           8          7.6         131.9       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 hour of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-hour of timestamp wholestage off                    983            984           1         10.2          98.3       1.0X
-hour of timestamp wholestage on                     942            953           8         10.6          94.2       1.0X
+hour of timestamp wholestage off                    970            973           4         10.3          97.0       1.0X
+hour of timestamp wholestage on                     950            957           9         10.5          95.0       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 minute of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-minute of timestamp wholestage off                 1008           1010           3          9.9         100.8       1.0X
-minute of timestamp wholestage on                   942            945           3         10.6          94.2       1.1X
+minute of timestamp wholestage off                 1017           1019           3          9.8         101.7       1.0X
+minute of timestamp wholestage on                   948            951           2         10.5          94.8       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 second of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-second of timestamp wholestage off                  975            976           1         10.3          97.5       1.0X
-second of timestamp wholestage on                   938            944           4         10.7          93.8       1.0X
+second of timestamp wholestage off                  965            966           2         10.4          96.5       1.0X
+second of timestamp wholestage on                   943            946           2         10.6          94.3       1.0X
 
 
 ================================================================================================
 Current date and time
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 current_date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_date wholestage off                         295            296           2         33.9          29.5       1.0X
-current_date wholestage on                          267            274           6         37.5          26.7       1.1X
+current_date wholestage off                         296            296           0         33.8          29.6       1.0X
+current_date wholestage on                          271            277           7         36.9          27.1       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 current_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_timestamp wholestage off                    298            303           7         33.5          29.8       1.0X
-current_timestamp wholestage on                     261            275          12         38.2          26.1       1.1X
+current_timestamp wholestage off                    307            329          32         32.6          30.7       1.0X
+current_timestamp wholestage on                     259            314          96         38.7          25.9       1.2X
 
 
 ================================================================================================
 Date arithmetic
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 cast to date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date wholestage off                        1071           1073           3          9.3         107.1       1.0X
-cast to date wholestage on                          998           1014          31         10.0          99.8       1.1X
+cast to date wholestage off                        1075           1077           3          9.3         107.5       1.0X
+cast to date wholestage on                          997           1002           5         10.0          99.7       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 last_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-last_day wholestage off                            1260           1261           1          7.9         126.0       1.0X
-last_day wholestage on                             1245           1261          17          8.0         124.5       1.0X
+last_day wholestage off                            1259           1261           3          7.9         125.9       1.0X
+last_day wholestage on                             1231           1242          11          8.1         123.1       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 next_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-next_day wholestage off                            1118           1120           2          8.9         111.8       1.0X
-next_day wholestage on                             1043           1047           3          9.6         104.3       1.1X
+next_day wholestage off                            1121           1123           3          8.9         112.1       1.0X
+next_day wholestage on                             1043           1049           6          9.6         104.3       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_add:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_add wholestage off                            1046           1048           3          9.6         104.6       1.0X
-date_add wholestage on                             1040           1048          11          9.6         104.0       1.0X
+date_add wholestage off                            1043           1044           2          9.6         104.3       1.0X
+date_add wholestage on                             1026           1030           5          9.7         102.6       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_sub:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_sub wholestage off                            1081           1081           0          9.3         108.1       1.0X
-date_sub wholestage on                             1030           1035           6          9.7         103.0       1.0X
+date_sub wholestage off                            1058           1062           6          9.5         105.8       1.0X
+date_sub wholestage on                             1024           1027           3          9.8         102.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 add_months:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-add_months wholestage off                          1393           1400          10          7.2         139.3       1.0X
-add_months wholestage on                           1391           1396           5          7.2         139.1       1.0X
+add_months wholestage off                          1403           1404           2          7.1         140.3       1.0X
+add_months wholestage on                           1394           1399           5          7.2         139.4       1.0X
 
 
 ================================================================================================
 Formatting dates
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 format date:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-format date wholestage off                         5424           5426           2          1.8         542.4       1.0X
-format date wholestage on                          5408           5448          37          1.8         540.8       1.0X
+format date wholestage off                         5730           5736           8          1.7         573.0       1.0X
+format date wholestage on                          6159           6184          26          1.6         615.9       0.9X
 
 
 ================================================================================================
 Formatting timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 from_unixtime:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_unixtime wholestage off                       8839           8841           3          1.1         883.9       1.0X
-from_unixtime wholestage on                        8788           8826          24          1.1         878.8       1.0X
+from_unixtime wholestage off                       8718           8725          10          1.1         871.8       1.0X
+from_unixtime wholestage on                        8648           8668          17          1.2         864.8       1.0X
 
 
 ================================================================================================
 Convert timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 from_utc_timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_utc_timestamp wholestage off                  1105           1111           8          9.0         110.5       1.0X
-from_utc_timestamp wholestage on                   1073           1081           8          9.3         107.3       1.0X
+from_utc_timestamp wholestage off                  1174           1180           8          8.5         117.4       1.0X
+from_utc_timestamp wholestage on                   1084           1093           6          9.2         108.4       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 to_utc_timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_utc_timestamp wholestage off                    1462           1465           4          6.8         146.2       1.0X
-to_utc_timestamp wholestage on                     1394           1408          13          7.2         139.4       1.0X
+to_utc_timestamp wholestage off                    1567           1567           0          6.4         156.7       1.0X
+to_utc_timestamp wholestage on                     1509           1528          13          6.6         150.9       1.0X
 
 
 ================================================================================================
 Intervals
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 cast interval:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast interval wholestage off                        325            328           4         30.8          32.5       1.0X
-cast interval wholestage on                         286            290           3         35.0          28.6       1.1X
+cast interval wholestage off                        328            332           5         30.4          32.8       1.0X
+cast interval wholestage on                         286            290           5         35.0          28.6       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 datediff:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-datediff wholestage off                            1822           1824           3          5.5         182.2       1.0X
-datediff wholestage on                             1757           1761           5          5.7         175.7       1.0X
+datediff wholestage off                            1832           1833           2          5.5         183.2       1.0X
+datediff wholestage on                             1757           1761           3          5.7         175.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 months_between:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-months_between wholestage off                      4886           4893          10          2.0         488.6       1.0X
-months_between wholestage on                       4785           4799          12          2.1         478.5       1.0X
+months_between wholestage off                      5040           5049          13          2.0         504.0       1.0X
+months_between wholestage on                       4943           4950           5          2.0         494.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 window:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-window wholestage off                              2024           2052          40          0.5        2023.7       1.0X
-window wholestage on                              46599          46660          45          0.0       46599.0       0.0X
+window wholestage off                              1779           1855         107          0.6        1778.6       1.0X
+window wholestage on                              46705          46754          43          0.0       46705.1       0.0X
 
 
 ================================================================================================
 Truncation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_trunc YEAR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YEAR wholestage off                     2361           2366           7          4.2         236.1       1.0X
-date_trunc YEAR wholestage on                      2325           2328           3          4.3         232.5       1.0X
+date_trunc YEAR wholestage off                     2485           2497          17          4.0         248.5       1.0X
+date_trunc YEAR wholestage on                      2403           2420          20          4.2         240.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_trunc YYYY:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YYYY wholestage off                     2366           2374          12          4.2         236.6       1.0X
-date_trunc YYYY wholestage on                      2316           2328          13          4.3         231.6       1.0X
+date_trunc YYYY wholestage off                     2498           2502           5          4.0         249.8       1.0X
+date_trunc YYYY wholestage on                      2399           2401           2          4.2         239.9       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_trunc YY:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YY wholestage off                       2359           2359           0          4.2         235.9       1.0X
-date_trunc YY wholestage on                        2315           2325           7          4.3         231.5       1.0X
+date_trunc YY wholestage off                       2492           2493           3          4.0         249.2       1.0X
+date_trunc YY wholestage on                        2399           2404           6          4.2         239.9       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_trunc MON:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MON wholestage off                      2360           2369          12          4.2         236.0       1.0X
-date_trunc MON wholestage on                       2306           2314           9          4.3         230.6       1.0X
+date_trunc MON wholestage off                      2454           2455           1          4.1         245.4       1.0X
+date_trunc MON wholestage on                       2412           2417           5          4.1         241.2       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_trunc MONTH:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MONTH wholestage off                    2359           2360           2          4.2         235.9       1.0X
-date_trunc MONTH wholestage on                     2304           2308           4          4.3         230.4       1.0X
+date_trunc MONTH wholestage off                    2449           2450           1          4.1         244.9       1.0X
+date_trunc MONTH wholestage on                     2409           2414           7          4.2         240.9       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_trunc MM:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MM wholestage off                       2356           2358           2          4.2         235.6       1.0X
-date_trunc MM wholestage on                        2302           2309           6          4.3         230.2       1.0X
+date_trunc MM wholestage off                       2445           2450           7          4.1         244.5       1.0X
+date_trunc MM wholestage on                        2409           2412           4          4.2         240.9       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_trunc DAY:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DAY wholestage off                      1962           1964           3          5.1         196.2       1.0X
-date_trunc DAY wholestage on                       1916           1921           6          5.2         191.6       1.0X
+date_trunc DAY wholestage off                      2158           2165          10          4.6         215.8       1.0X
+date_trunc DAY wholestage on                       2039           2045           6          4.9         203.9       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_trunc DD:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DD wholestage off                       1956           1957           2          5.1         195.6       1.0X
-date_trunc DD wholestage on                        1916           1922           6          5.2         191.6       1.0X
+date_trunc DD wholestage off                       2156           2162           8          4.6         215.6       1.0X
+date_trunc DD wholestage on                        2038           2043           3          4.9         203.8       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_trunc HOUR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc HOUR wholestage off                     1968           1970           3          5.1         196.8       1.0X
-date_trunc HOUR wholestage on                      1949           1961           9          5.1         194.9       1.0X
+date_trunc HOUR wholestage off                     2080           2081           2          4.8         208.0       1.0X
+date_trunc HOUR wholestage on                      2042           2048           6          4.9         204.2       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_trunc MINUTE:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MINUTE wholestage off                    368            373           7         27.2          36.8       1.0X
-date_trunc MINUTE wholestage on                     338            343           6         29.6          33.8       1.1X
+date_trunc MINUTE wholestage off                   2116           2122           9          4.7         211.6       1.0X
+date_trunc MINUTE wholestage on                    2041           2048          11          4.9         204.1       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_trunc SECOND:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc SECOND wholestage off                    379            379           1         26.4          37.9       1.0X
-date_trunc SECOND wholestage on                     327            340          13         30.6          32.7       1.2X
+date_trunc SECOND wholestage off                    349            352           4         28.6          34.9       1.0X
+date_trunc SECOND wholestage on                     309            314           6         32.3          30.9       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_trunc WEEK:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc WEEK wholestage off                     2227           2242          21          4.5         222.7       1.0X
-date_trunc WEEK wholestage on                      2231           2241           9          4.5         223.1       1.0X
+date_trunc WEEK wholestage off                     2324           2330           8          4.3         232.4       1.0X
+date_trunc WEEK wholestage on                      2297           2305          13          4.4         229.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 date_trunc QUARTER:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc QUARTER wholestage off                  3158           3160           3          3.2         315.8       1.0X
-date_trunc QUARTER wholestage on                   3150           3163          12          3.2         315.0       1.0X
+date_trunc QUARTER wholestage off                  3652           3654           3          2.7         365.2       1.0X
+date_trunc QUARTER wholestage on                   3211           3218           9          3.1         321.1       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 trunc year:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc year wholestage off                           321            323           3         31.2          32.1       1.0X
-trunc year wholestage on                            302            330          18         33.1          30.2       1.1X
+trunc year wholestage off                           308            311           4         32.5          30.8       1.0X
+trunc year wholestage on                            286            291           4         35.0          28.6       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 trunc yyyy:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yyyy wholestage off                           320            324           6         31.2          32.0       1.0X
-trunc yyyy wholestage on                            294            329          20         34.0          29.4       1.1X
+trunc yyyy wholestage off                           304            305           1         32.9          30.4       1.0X
+trunc yyyy wholestage on                            286            290           5         35.0          28.6       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 trunc yy:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yy wholestage off                             322            322           0         31.1          32.2       1.0X
-trunc yy wholestage on                              293            320          37         34.1          29.3       1.1X
+trunc yy wholestage off                             319            322           5         31.4          31.9       1.0X
+trunc yy wholestage on                              285            288           3         35.0          28.5       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 trunc mon:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mon wholestage off                            320            322           2         31.2          32.0       1.0X
-trunc mon wholestage on                             291            312          26         34.4          29.1       1.1X
+trunc mon wholestage off                            304            309           7         32.9          30.4       1.0X
+trunc mon wholestage on                             284            289           4         35.2          28.4       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 trunc month:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc month wholestage off                          318            331          18         31.4          31.8       1.0X
-trunc month wholestage on                           297            329          28         33.7          29.7       1.1X
+trunc month wholestage off                          302            305           4         33.1          30.2       1.0X
+trunc month wholestage on                           285            294          10         35.1          28.5       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 trunc mm:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mm wholestage off                             318            319           1         31.4          31.8       1.0X
-trunc mm wholestage on                              312            335          15         32.1          31.2       1.0X
+trunc mm wholestage off                             301            317          23         33.2          30.1       1.0X
+trunc mm wholestage on                              284            290           4         35.2          28.4       1.1X
 
 
 ================================================================================================
 Parsing
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 to timestamp str:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to timestamp str wholestage off                     217            221           5          4.6         217.5       1.0X
-to timestamp str wholestage on                      210            214           5          4.8         210.0       1.0X
+to timestamp str wholestage off                     217            219           2          4.6         217.5       1.0X
+to timestamp str wholestage on                      216            219           4          4.6         215.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 to_timestamp:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_timestamp wholestage off                        1714           1718           5          0.6        1714.4       1.0X
-to_timestamp wholestage on                         1418           1433          14          0.7        1418.5       1.2X
+to_timestamp wholestage off                        1853           1855           3          0.5        1852.9       1.0X
+to_timestamp wholestage on                         2138           2159          26          0.5        2137.6       0.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 to_unix_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_unix_timestamp wholestage off                   1436           1441           6          0.7        1436.2       1.0X
-to_unix_timestamp wholestage on                    1421           1426           7          0.7        1420.6       1.0X
+to_unix_timestamp wholestage off                   2115           2116           1          0.5        2115.2       1.0X
+to_unix_timestamp wholestage on                    2131           2144          16          0.5        2130.8       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 to date str:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to date str wholestage off                          267            267           0          3.8         266.6       1.0X
-to date str wholestage on                           260            262           2          3.8         260.1       1.0X
+to date str wholestage off                          280            281           1          3.6         279.7       1.0X
+to date str wholestage on                           265            271           9          3.8         265.2       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 to_date:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_date wholestage off                             3419           3436          25          0.3        3419.0       1.0X
-to_date wholestage on                              3344           3352           7          0.3        3343.5       1.0X
+to_date wholestage off                             3434           3458          34          0.3        3433.7       1.0X
+to_date wholestage on                              3517           3539          18          0.3        3517.4       1.0X
 
 
 ================================================================================================
 Conversion from/to external types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_252-8u252-b09-1~18.04-b09 on Linux 4.15.0-1063-aws
+OpenJDK 64-Bit Server VM 1.8.0_272-8u272-b10-0ubuntu1~18.04-b10 on Linux 5.4.0-1029-aws
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 To/from Java's date-time:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-From java.sql.Date                                  436            445           8         11.5          87.2       1.0X
-From java.time.LocalDate                            348            357          11         14.4          69.7       1.3X
-Collect java.sql.Date                              1723           1917         168          2.9         344.5       0.3X
-Collect java.time.LocalDate                        1591           1602          18          3.1         318.3       0.3X
-From java.sql.Timestamp                             248            252           4         20.2          49.6       1.8X
-From java.time.Instant                              232            238           5         21.5          46.5       1.9X
-Collect longs                                      1398           1455          99          3.6         279.5       0.3X
-Collect java.sql.Timestamp                         1469           1483          13          3.4         293.9       0.3X
-Collect java.time.Instant                          1561           1597          40          3.2         312.2       0.3X
-java.sql.Date to Hive string                      13820          14798         857          0.4        2763.9       0.0X
-java.time.LocalDate to Hive string                14374          14779         357          0.3        2874.8       0.0X
-java.sql.Timestamp to Hive string                 14872          15461         653          0.3        2974.5       0.0X
-java.time.Instant to Hive string                  17062          17789         759          0.3        3412.4       0.0X
+From java.sql.Date                                  399            405           6         12.5          79.7       1.0X
+From java.time.LocalDate                            341            347           6         14.6          68.3       1.2X
+Collect java.sql.Date                              1732           1943         183          2.9         346.3       0.2X
+Collect java.time.LocalDate                        1686           1719          29          3.0         337.2       0.2X
+From java.sql.Timestamp                             249            261          19         20.1          49.8       1.6X
+From java.time.Instant                              240            242           3         20.9          47.9       1.7X
+Collect longs                                      1546           1582          60          3.2         309.3       0.3X
+Collect java.sql.Timestamp                         1714           1720           6          2.9         342.9       0.2X
+Collect java.time.Instant                          2063           2119          65          2.4         412.6       0.2X
+java.sql.Date to Hive string                      13888          14401         490          0.4        2777.6       0.0X
+java.time.LocalDate to Hive string                13804          14231         661          0.4        2760.8       0.0X
+java.sql.Timestamp to Hive string                 14231          14550         393          0.4        2846.1       0.0X
+java.time.Instant to Hive string                  16732          17801         953          0.3        3346.3       0.0X
 
 
From 318a173fcee11902820593fe4ac992a90e6bb00e Mon Sep 17 00:00:00 2001
From: Steve Loughran <stevel@cloudera.com>
Date: Wed, 11 Nov 2020 14:27:48 -0800
Subject: [PATCH 0448/1009] [SPARK-33402][CORE] Jobs launched in same second
 have duplicate MapReduce JobIDs

### What changes were proposed in this pull request?

1. Applies the SQL changes in SPARK-33230 to SparkHadoopWriter, so that `rdd.saveAsNewAPIHadoopDataset` passes in a unique job UUID in `spark.sql.sources.writeJobUUID`
1. `SparkHadoopWriterUtils.createJobTrackerID` generates a JobID by appending a random long number to the supplied timestamp to ensure the probability of a collision is near-zero.
1. With tests of uniqueness, round trips and negative jobID rejection.

### Why are the changes needed?

Without this, if more than one job is started in the same second *and the committer expects application attempt IDs to be unique* is at risk of clashing with other jobs.

With the fix,

* those committers which use the ID set in `spark.sql.sources.writeJobUUID` as a priority ID will pick that up instead and so be unique.
* committers which use the Hadoop JobID for unique paths and filenames will get the randomly generated jobID.  Assuming all clocks in a cluster in sync, the probability of two jobs launched in the same second has dropped from 1 to 1/(2^63)

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit tests

There's a new test suite SparkHadoopWriterUtilsSuite which creates jobID, verifies they are unique even for the same timestamp and that they can be marshalled to string and parsed back in the hadoop code, which contains some (brittle) assumptions about the format of job IDs.

Functional Integration Tests

1. Hadoop-trunk built with [HADOOP-17318], publishing to local maven repository
1. Spark built with hadoop.version=3.4.0-SNAPSHOT to pick up these JARs.
1. Spark + Object store integration tests at [https://github.com/hortonworks-spark/cloud-integration](https://github.com/hortonworks-spark/cloud-integration) were built against that local spark version
1. And executed against AWS london.

The tests were run with `fs.s3a.committer.require.uuid=true`, so the s3a committers fail fast if they don't get a job ID down. This showed that `rdd.saveAsNewAPIHadoopDataset` wasn't setting the UUID option. It again uses the current Date value for an app attempt -which is not guaranteed to be unique.

With the change applied to spark, the relevant tests work, therefore the committers are getting unique job IDs.

Closes #30319 from steveloughran/BUG/SPARK-33402-jobuuid.

Authored-by: Steve Loughran <stevel@cloudera.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/internal/io/SparkHadoopWriter.scala |   7 +-
 .../internal/io/SparkHadoopWriterUtils.scala  |  25 ++++-
 .../io/SparkHadoopWriterUtilsSuite.scala      | 102 ++++++++++++++++++
 3 files changed, 131 insertions(+), 3 deletions(-)
 create mode 100644 core/src/test/scala/org/apache/spark/internal/io/SparkHadoopWriterUtilsSuite.scala

diff --git a/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala b/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala
index 6d174b5e0f81b..37b470802067a 100644
--- a/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala
+++ b/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.internal.io
 
 import java.text.NumberFormat
-import java.util.{Date, Locale}
+import java.util.{Date, Locale, UUID}
 
 import scala.reflect.ClassTag
 
@@ -70,6 +70,11 @@ object SparkHadoopWriter extends Logging {
     // Assert the output format/key/value class is set in JobConf.
     config.assertConf(jobContext, rdd.conf)
 
+    // propagate the description UUID into the jobs, so that committers
+    // get an ID guaranteed to be unique.
+    jobContext.getConfiguration.set("spark.sql.sources.writeJobUUID",
+      UUID.randomUUID.toString)
+
     val committer = config.createCommitter(commitJobId)
     committer.setupJob(jobContext)
 
diff --git a/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriterUtils.scala b/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriterUtils.scala
index de828a6d6156e..657842c620f30 100644
--- a/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriterUtils.scala
+++ b/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriterUtils.scala
@@ -20,7 +20,7 @@ package org.apache.spark.internal.io
 import java.text.SimpleDateFormat
 import java.util.{Date, Locale}
 
-import scala.util.DynamicVariable
+import scala.util.{DynamicVariable, Random}
 
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapred.{JobConf, JobID}
@@ -37,14 +37,35 @@ private[spark]
 object SparkHadoopWriterUtils {
 
   private val RECORDS_BETWEEN_BYTES_WRITTEN_METRIC_UPDATES = 256
+  private val RAND = new Random()
 
+  /**
+   * Create a job ID.
+   *
+   * @param time (current) time
+   * @param id job number
+   * @return a job ID
+   */
   def createJobID(time: Date, id: Int): JobID = {
+    if (id < 0) {
+      throw new IllegalArgumentException("Job number is negative")
+    }
     val jobtrackerID = createJobTrackerID(time)
     new JobID(jobtrackerID, id)
   }
 
+  /**
+   * Generate an ID for a job tracker.
+   * @param time (current) time
+   * @return a string for a job ID
+   */
   def createJobTrackerID(time: Date): String = {
-    new SimpleDateFormat("yyyyMMddHHmmss", Locale.US).format(time)
+    val base = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US).format(time)
+    var l1 = RAND.nextLong()
+    if (l1 < 0) {
+      l1 = -l1
+    }
+    base + l1
   }
 
   def createPathFromString(path: String, conf: JobConf): Path = {
diff --git a/core/src/test/scala/org/apache/spark/internal/io/SparkHadoopWriterUtilsSuite.scala b/core/src/test/scala/org/apache/spark/internal/io/SparkHadoopWriterUtilsSuite.scala
new file mode 100644
index 0000000000000..33b58ec9e6665
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/internal/io/SparkHadoopWriterUtilsSuite.scala
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.internal.io
+
+import java.util.Date
+
+import org.apache.hadoop.mapreduce.JobID
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.internal.io.SparkHadoopWriterUtils.createJobID
+
+/**
+ * Unit tests for functions in SparkHadoopWriterUtils.
+ */
+class SparkHadoopWriterUtilsSuite extends SparkFunSuite {
+
+  /**
+   * Core test of JobID generation:
+   * They are created.
+   * The job number is converted to the job ID.
+   * They round trip to string and back
+   * (which implies that the full string matches the regexp
+   * in the JobID class).
+   */
+  test("JobID Generation") {
+    val jobNumber = 1010
+    val j1 = createJobID(new Date(), jobNumber)
+    assert(jobNumber == j1.getId,
+      s"Job number mismatch in $j1")
+
+    val jobStr = j1.toString
+    // the string value begins with job_
+    assert(jobStr.startsWith("job_"),
+      s"wrong prefix of $jobStr")
+    // and the hadoop code can parse it
+    val j2 = roundTrip(j1)
+    assert(j1.getId == j2.getId, "Job ID mismatch")
+    assert(j1.getJtIdentifier == j2.getJtIdentifier, "Job identifier mismatch")
+  }
+
+  /**
+   * This is the problem surfacing in situations where committers expect
+   * Job IDs to be unique: if the timestamp is (exclusively) used
+   * then there will conflict in directories created.
+   */
+  test("JobIDs generated at same time are different") {
+    val now = new Date()
+    val j1 = createJobID(now, 1)
+    val j2 = createJobID(now, 1)
+    assert(j1.toString != j2.toString)
+  }
+
+  /**
+   * There's nothing explicitly in the Hadoop classes to stop
+   * job numbers being negative.
+   * There's some big assumptions in the FileOutputCommitter about attempt IDs
+   * being positive during any recovery operations; for safety the ID
+   * job number is validated.
+   */
+  test("JobIDs with negative job number") {
+    intercept[IllegalArgumentException] {
+      createJobID(new Date(), -1)
+    }
+  }
+
+  /**
+   * If someone ever does reinstate use of timestamps,
+   * make sure that the case of timestamp == 0 is handled.
+   */
+  test("JobIDs on Epoch are different") {
+    val j1 = createJobID(new Date(0), 0)
+    val j2 = createJobID(new Date(0), 0)
+    assert (j1.toString != j2.toString)
+  }
+
+  /**
+   * Do a round trip as a string and back again.
+   * This uses the JobID parser.
+   * @param jobID job ID
+   * @return the returned jobID
+   */
+  private def roundTrip(jobID: JobID): JobID = {
+    val parsedJobId = JobID.forName(jobID.toString)
+    assert(jobID == parsedJobId, "Round trip was inconsistent")
+    parsedJobId
+  }
+}

From 9d58a2f0f0f308a03830bf183959a4743a77b78a Mon Sep 17 00:00:00 2001
From: Josh Soref <jsoref@users.noreply.github.com>
Date: Thu, 12 Nov 2020 08:29:22 +0900
Subject: [PATCH 0449/1009] [MINOR][GRAPHX] Correct typos in the sub-modules:
 graphx, external, and examples

### What changes were proposed in this pull request?

This PR intends to fix typos in the sub-modules: graphx, external, and examples.
Split per holdenk https://github.com/apache/spark/pull/30323#issuecomment-725159710

NOTE: The misspellings have been reported at https://github.com/jsoref/spark/commit/706a726f87a0bbf5e31467fae9015218773db85b#commitcomment-44064356

### Why are the changes needed?

Misspelled words make it harder to read / understand content.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

No testing was performed

Closes #30326 from jsoref/spelling-graphx.

Authored-by: Josh Soref <jsoref@users.noreply.github.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../spark/examples/streaming/JavaCustomReceiver.java |  2 +-
 .../examples/streaming/JavaNetworkWordCount.java     |  2 +-
 .../streaming/JavaRecoverableNetworkWordCount.java   |  2 +-
 .../examples/streaming/JavaSqlNetworkWordCount.java  |  2 +-
 .../src/main/python/ml/train_validation_split.py     |  2 +-
 examples/src/main/python/sql/arrow.py                |  4 ++--
 .../streaming/recoverable_network_wordcount.py       |  2 +-
 .../main/python/streaming/sql_network_wordcount.py   |  2 +-
 .../spark/examples/streaming/CustomReceiver.scala    |  2 +-
 .../spark/examples/streaming/NetworkWordCount.scala  |  2 +-
 .../streaming/RecoverableNetworkWordCount.scala      |  2 +-
 .../examples/streaming/SqlNetworkWordCount.scala     |  2 +-
 .../streaming/StatefulNetworkWordCount.scala         |  2 +-
 .../spark/sql/jdbc/DockerJDBCIntegrationSuite.scala  |  2 +-
 .../org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala    |  2 +-
 .../sql/kafka010/KafkaContinuousSourceSuite.scala    |  4 ++--
 .../sql/kafka010/KafkaMicroBatchSourceSuite.scala    | 12 ++++++------
 .../spark/sql/kafka010/KafkaRelationSuite.scala      |  4 ++--
 .../apache/spark/sql/kafka010/KafkaTestUtils.scala   |  4 ++--
 .../spark/streaming/kafka010/KafkaRDDSuite.scala     |  2 +-
 .../examples/streaming/JavaKinesisWordCountASL.java  |  2 +-
 .../examples/streaming/kinesis_wordcount_asl.py      |  2 +-
 .../examples/streaming/KinesisWordCountASL.scala     |  6 +++---
 .../streaming/kinesis/KinesisUtilsPythonHelper.scala |  2 +-
 .../org/apache/spark/graphx/lib/PageRankSuite.scala  |  6 +++---
 25 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java
index 47692ec982890..f84a1978de1ad 100644
--- a/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java
+++ b/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java
@@ -67,7 +67,7 @@ public static void main(String[] args) throws Exception {
     JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(1000));
 
     // Create an input stream with the custom receiver on target ip:port and count the
-    // words in input stream of \n delimited text (eg. generated by 'nc')
+    // words in input stream of \n delimited text (e.g. generated by 'nc')
     JavaReceiverInputDStream<String> lines = ssc.receiverStream(
       new JavaCustomReceiver(args[0], Integer.parseInt(args[1])));
     JavaDStream<String> words = lines.flatMap(x -> Arrays.asList(SPACE.split(x)).iterator());
diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaNetworkWordCount.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaNetworkWordCount.java
index b217672def88e..d56134bd99e36 100644
--- a/examples/src/main/java/org/apache/spark/examples/streaming/JavaNetworkWordCount.java
+++ b/examples/src/main/java/org/apache/spark/examples/streaming/JavaNetworkWordCount.java
@@ -57,7 +57,7 @@ public static void main(String[] args) throws Exception {
     JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));
 
     // Create a JavaReceiverInputDStream on target ip:port and count the
-    // words in input stream of \n delimited text (eg. generated by 'nc')
+    // words in input stream of \n delimited text (e.g. generated by 'nc')
     // Note that no duplication in storage level only for running locally.
     // Replication necessary in distributed scenario for fault tolerance.
     JavaReceiverInputDStream<String> lines = ssc.socketTextStream(
diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaRecoverableNetworkWordCount.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaRecoverableNetworkWordCount.java
index c01a62b078f7a..0c11c40cfe7ed 100644
--- a/examples/src/main/java/org/apache/spark/examples/streaming/JavaRecoverableNetworkWordCount.java
+++ b/examples/src/main/java/org/apache/spark/examples/streaming/JavaRecoverableNetworkWordCount.java
@@ -126,7 +126,7 @@ private static JavaStreamingContext createContext(String ip,
     ssc.checkpoint(checkpointDirectory);
 
     // Create a socket stream on target ip:port and count the
-    // words in input stream of \n delimited text (eg. generated by 'nc')
+    // words in input stream of \n delimited text (e.g. generated by 'nc')
     JavaReceiverInputDStream<String> lines = ssc.socketTextStream(ip, port);
     JavaDStream<String> words = lines.flatMap(x -> Arrays.asList(SPACE.split(x)).iterator());
     JavaPairDStream<String, Integer> wordCounts = words.mapToPair(s -> new Tuple2<>(s, 1))
diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaSqlNetworkWordCount.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaSqlNetworkWordCount.java
index 948d1a2111780..5d30698c93372 100644
--- a/examples/src/main/java/org/apache/spark/examples/streaming/JavaSqlNetworkWordCount.java
+++ b/examples/src/main/java/org/apache/spark/examples/streaming/JavaSqlNetworkWordCount.java
@@ -59,7 +59,7 @@ public static void main(String[] args) throws Exception {
     JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));
 
     // Create a JavaReceiverInputDStream on target ip:port and count the
-    // words in input stream of \n delimited text (eg. generated by 'nc')
+    // words in input stream of \n delimited text (e.g. generated by 'nc')
     // Note that no duplication in storage level only for running locally.
     // Replication necessary in distributed scenario for fault tolerance.
     JavaReceiverInputDStream<String> lines = ssc.socketTextStream(
diff --git a/examples/src/main/python/ml/train_validation_split.py b/examples/src/main/python/ml/train_validation_split.py
index d4f9184bf576e..5e3dc7b3ec2fa 100644
--- a/examples/src/main/python/ml/train_validation_split.py
+++ b/examples/src/main/python/ml/train_validation_split.py
@@ -17,7 +17,7 @@
 
 """
 This example demonstrates applying TrainValidationSplit to split data
-and preform model selection.
+and perform model selection.
 Run with:
 
   bin/spark-submit examples/src/main/python/ml/train_validation_split.py
diff --git a/examples/src/main/python/sql/arrow.py b/examples/src/main/python/sql/arrow.py
index 9978e8601449a..a0eba0fbede73 100644
--- a/examples/src/main/python/sql/arrow.py
+++ b/examples/src/main/python/sql/arrow.py
@@ -60,7 +60,7 @@ def func(s1: pd.Series, s2: pd.Series, s3: pd.DataFrame) -> pd.DataFrame:
         s3['col2'] = s1 + s2.str.len()
         return s3
 
-    # Create a Spark DataFrame that has three columns including a sturct column.
+    # Create a Spark DataFrame that has three columns including a struct column.
     df = spark.createDataFrame(
         [[1, "a string", ("a nested string",)]],
         "long_col long, string_col string, struct_col struct<col1:string>")
@@ -285,7 +285,7 @@ def asof_join(l, r):
     ser_to_frame_pandas_udf_example(spark)
     print("Running pandas_udf example: Series to Series")
     ser_to_ser_pandas_udf_example(spark)
-    print("Running pandas_udf example: Iterator of Series to Iterator of Seires")
+    print("Running pandas_udf example: Iterator of Series to Iterator of Series")
     iter_ser_to_iter_ser_pandas_udf_example(spark)
     print("Running pandas_udf example: Iterator of Multiple Series to Iterator of Series")
     iter_sers_to_iter_ser_pandas_udf_example(spark)
diff --git a/examples/src/main/python/streaming/recoverable_network_wordcount.py b/examples/src/main/python/streaming/recoverable_network_wordcount.py
index 6ebe91a2f47fe..567f9c819e3ad 100644
--- a/examples/src/main/python/streaming/recoverable_network_wordcount.py
+++ b/examples/src/main/python/streaming/recoverable_network_wordcount.py
@@ -66,7 +66,7 @@ def createContext(host, port, outputPath):
     ssc = StreamingContext(sc, 1)
 
     # Create a socket stream on target ip:port and count the
-    # words in input stream of \n delimited text (eg. generated by 'nc')
+    # words in input stream of \n delimited text (e.g. generated by 'nc')
     lines = ssc.socketTextStream(host, port)
     words = lines.flatMap(lambda line: line.split(" "))
     wordCounts = words.map(lambda x: (x, 1)).reduceByKey(lambda x, y: x + y)
diff --git a/examples/src/main/python/streaming/sql_network_wordcount.py b/examples/src/main/python/streaming/sql_network_wordcount.py
index 59a8a11a45b19..2965ea8fb1872 100644
--- a/examples/src/main/python/streaming/sql_network_wordcount.py
+++ b/examples/src/main/python/streaming/sql_network_wordcount.py
@@ -52,7 +52,7 @@ def getSparkSessionInstance(sparkConf):
     ssc = StreamingContext(sc, 1)
 
     # Create a socket stream on target ip:port and count the
-    # words in input stream of \n delimited text (eg. generated by 'nc')
+    # words in input stream of \n delimited text (e.g. generated by 'nc')
     lines = ssc.socketTextStream(host, int(port))
     words = lines.flatMap(lambda line: line.split(" "))
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala
index 0f47deaf1021b..626f4b4d3ccdf 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala
@@ -50,7 +50,7 @@ object CustomReceiver {
     val ssc = new StreamingContext(sparkConf, Seconds(1))
 
     // Create an input stream with the custom receiver on target ip:port and count the
-    // words in input stream of \n delimited text (eg. generated by 'nc')
+    // words in input stream of \n delimited text (e.g. generated by 'nc')
     val lines = ssc.receiverStream(new CustomReceiver(args(0), args(1).toInt))
     val words = lines.flatMap(_.split(" "))
     val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _)
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/NetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/NetworkWordCount.scala
index 26bb51dde3a1d..7d981dfb949ea 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/NetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/NetworkWordCount.scala
@@ -47,7 +47,7 @@ object NetworkWordCount {
     val ssc = new StreamingContext(sparkConf, Seconds(1))
 
     // Create a socket stream on target ip:port and count the
-    // words in input stream of \n delimited text (eg. generated by 'nc')
+    // words in input stream of \n delimited text (e.g. generated by 'nc')
     // Note that no duplication in storage level only for running locally.
     // Replication necessary in distributed scenario for fault tolerance.
     val lines = ssc.socketTextStream(args(0), args(1).toInt, StorageLevel.MEMORY_AND_DISK_SER)
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala
index ee3bbe40fbeed..98539d6494231 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala
@@ -112,7 +112,7 @@ object RecoverableNetworkWordCount {
     ssc.checkpoint(checkpointDirectory)
 
     // Create a socket stream on target ip:port and count the
-    // words in input stream of \n delimited text (eg. generated by 'nc')
+    // words in input stream of \n delimited text (e.g. generated by 'nc')
     val lines = ssc.socketTextStream(ip, port)
     val words = lines.flatMap(_.split(" "))
     val wordCounts = words.map((_, 1)).reduceByKey(_ + _)
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala
index 778be7baaeeac..7daa0014e0f1c 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala
@@ -51,7 +51,7 @@ object SqlNetworkWordCount {
     val ssc = new StreamingContext(sparkConf, Seconds(2))
 
     // Create a socket stream on target ip:port and count the
-    // words in input stream of \n delimited text (eg. generated by 'nc')
+    // words in input stream of \n delimited text (e.g. generated by 'nc')
     // Note that no duplication in storage level only for running locally.
     // Replication necessary in distributed scenario for fault tolerance.
     val lines = ssc.socketTextStream(args(0), args(1).toInt, StorageLevel.MEMORY_AND_DISK_SER)
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala
index 46f01edf7deec..8a5fcda9cd990 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala
@@ -52,7 +52,7 @@ object StatefulNetworkWordCount {
     val initialRDD = ssc.sparkContext.parallelize(List(("hello", 1), ("world", 1)))
 
     // Create a ReceiverInputDStream on target ip:port and count the
-    // words in input stream of \n delimited test (eg. generated by 'nc')
+    // words in input stream of \n delimited test (e.g. generated by 'nc')
     val lines = ssc.socketTextStream(args(0), args(1).toInt)
     val words = lines.flatMap(_.split(" "))
     val wordDstream = words.map(x => (x, 1))
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
index ad6a829fffd0d..00b7b413a964d 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
@@ -45,7 +45,7 @@ abstract class DatabaseOnDocker {
   val env: Map[String, String]
 
   /**
-   * Wheather or not to use ipc mode for shared memory when starting docker image
+   * Whether or not to use ipc mode for shared memory when starting docker image
    */
   val usesIpc: Boolean
 
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
index 2e726b9e650b6..e36555e514c9f 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
@@ -35,7 +35,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession {
   def testUpdateColumnNullability(tbl: String): Unit = {
     sql(s"CREATE TABLE $catalogName.alt_table (ID STRING NOT NULL) USING _")
     var t = spark.table(s"$catalogName.alt_table")
-    // nullable is true in the expecteSchema because Spark always sets nullable to true
+    // nullable is true in the expectedSchema because Spark always sets nullable to true
     // regardless of the JDBC metadata https://github.com/apache/spark/pull/18445
     var expectedSchema = new StructType().add("ID", StringType, nullable = true)
     assert(t.schema === expectedSchema)
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala
index 14dcbeef0d9a3..6801d14d036dd 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala
@@ -33,7 +33,7 @@ class KafkaContinuousSourceSuite extends KafkaSourceSuiteBase with KafkaContinuo
     withTable(table) {
       val topic = newTopic()
       testUtils.createTopic(topic)
-      testUtils.withTranscationalProducer { producer =>
+      testUtils.withTransactionalProducer { producer =>
         val df = spark
           .readStream
           .format("kafka")
@@ -99,7 +99,7 @@ class KafkaContinuousSourceSuite extends KafkaSourceSuiteBase with KafkaContinuo
     withTable(table) {
       val topic = newTopic()
       testUtils.createTopic(topic)
-      testUtils.withTranscationalProducer { producer =>
+      testUtils.withTransactionalProducer { producer =>
         val df = spark
           .readStream
           .format("kafka")
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
index 853d201ba7ea5..510c0c5bd28a5 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
@@ -569,7 +569,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
     val rows = spark.table("kafkaWatermark").collect()
     assert(rows.length === 1, s"Unexpected results: ${rows.toList}")
     val row = rows(0)
-    // We cannot check the exact window start time as it depands on the time that messages were
+    // We cannot check the exact window start time as it depends on the time that messages were
     // inserted by the producer. So here we just use a low bound to make sure the internal
     // conversion works.
     assert(
@@ -836,7 +836,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
 
     val topicPartition = new TopicPartition(topic, 0)
     // The message values are the same as their offsets to make the test easy to follow
-    testUtils.withTranscationalProducer { producer =>
+    testUtils.withTransactionalProducer { producer =>
       testStream(mapped)(
         StartStream(Trigger.ProcessingTime(100), clock),
         waitUntilBatchProcessed,
@@ -959,7 +959,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
 
     val topicPartition = new TopicPartition(topic, 0)
     // The message values are the same as their offsets to make the test easy to follow
-    testUtils.withTranscationalProducer { producer =>
+    testUtils.withTransactionalProducer { producer =>
       testStream(mapped)(
         StartStream(Trigger.ProcessingTime(100), clock),
         waitUntilBatchProcessed,
@@ -1050,7 +1050,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
       .load()
       .select($"value".as[String])
 
-    testUtils.withTranscationalProducer { producer =>
+    testUtils.withTransactionalProducer { producer =>
       producer.beginTransaction()
       (0 to 3).foreach { i =>
         producer.send(new ProducerRecord[String, String](topic, i.toString)).get()
@@ -1066,7 +1066,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
         // this case, if we forget to reset `FetchedData._nextOffsetInFetchedData` or
         // `FetchedData._offsetAfterPoll` (See SPARK-25495), the next batch will see incorrect
         // values and return wrong results hence fail the test.
-        testUtils.withTranscationalProducer { producer =>
+        testUtils.withTransactionalProducer { producer =>
           producer.beginTransaction()
           (4 to 7).foreach { i =>
             producer.send(new ProducerRecord[String, String](topic, i.toString)).get()
@@ -1779,7 +1779,7 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
     withTable(table) {
       val topic = newTopic()
       testUtils.createTopic(topic)
-      testUtils.withTranscationalProducer { producer =>
+      testUtils.withTransactionalProducer { producer =>
         val df = spark
           .readStream
           .format("kafka")
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
index e5f3a229622e1..6f5dc0bb081ba 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
@@ -503,7 +503,7 @@ abstract class KafkaRelationSuiteBase extends QueryTest with SharedSparkSession
   test("read Kafka transactional messages: read_committed") {
     val topic = newTopic()
     testUtils.createTopic(topic)
-    testUtils.withTranscationalProducer { producer =>
+    testUtils.withTransactionalProducer { producer =>
       val df = spark
         .read
         .format("kafka")
@@ -552,7 +552,7 @@ abstract class KafkaRelationSuiteBase extends QueryTest with SharedSparkSession
   test("read Kafka transactional messages: read_uncommitted") {
     val topic = newTopic()
     testUtils.createTopic(topic)
-    testUtils.withTranscationalProducer { producer =>
+    testUtils.withTransactionalProducer { producer =>
       val df = spark
         .read
         .format("kafka")
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
index 3a86352e42d2b..c5f3086b38c99 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
@@ -539,7 +539,7 @@ class KafkaTestUtils(
   }
 
   /** Call `f` with a `KafkaProducer` that has initialized transactions. */
-  def withTranscationalProducer(f: KafkaProducer[String, String] => Unit): Unit = {
+  def withTransactionalProducer(f: KafkaProducer[String, String] => Unit): Unit = {
     val props = producerConfiguration
     props.put("transactional.id", UUID.randomUUID().toString)
     val producer = new KafkaProducer[String, String](props)
@@ -577,7 +577,7 @@ class KafkaTestUtils(
     // ensure that logs from all replicas are deleted if delete topic is marked successful
     assert(servers.forall(server => topicAndPartitions.forall(tp =>
       server.getLogManager().getLog(tp).isEmpty)),
-      s"topic $topic still exists in log mananger")
+      s"topic $topic still exists in log manager")
     // ensure that topic is removed from all cleaner offsets
     assert(servers.forall(server => topicAndPartitions.forall { tp =>
       val checkpoints = server.getLogManager().liveLogDirs.map { logDir =>
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala
index d6123e16dd238..2053d3655d860 100644
--- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala
+++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala
@@ -42,7 +42,7 @@ class KafkaRDDSuite extends SparkFunSuite with BeforeAndAfterAll {
   private val sparkConf = new SparkConf().setMaster("local[4]")
     .setAppName(this.getClass.getSimpleName)
     // Set a timeout of 10 seconds that's going to be used to fetch topics/partitions from kafka.
-    // Othewise the poll timeout defaults to 2 minutes and causes test cases to run longer.
+    // Otherwise the poll timeout defaults to 2 minutes and causes test cases to run longer.
     .set("spark.streaming.kafka.consumer.poll.ms", "10000")
 
   private var sc: SparkContext = _
diff --git a/external/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java b/external/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
index d704aeb507518..244873af70de9 100644
--- a/external/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
+++ b/external/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
@@ -49,7 +49,7 @@
  *
  * Usage: JavaKinesisWordCountASL [app-name] [stream-name] [endpoint-url] [region-name]
  *   [app-name] is the name of the consumer app, used to track the read data in DynamoDB
- *   [stream-name] name of the Kinesis stream (ie. mySparkStream)
+ *   [stream-name] name of the Kinesis stream (i.e. mySparkStream)
  *   [endpoint-url] endpoint of the Kinesis service
  *     (e.g. https://kinesis.us-east-1.amazonaws.com)
  *
diff --git a/external/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py b/external/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py
index df8c64e531cfa..06ada13b52399 100644
--- a/external/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py
+++ b/external/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py
@@ -23,7 +23,7 @@
 
   Usage: kinesis_wordcount_asl.py <app-name> <stream-name> <endpoint-url> <region-name>
     <app-name> is the name of the consumer app, used to track the read data in DynamoDB
-    <stream-name> name of the Kinesis stream (ie. mySparkStream)
+    <stream-name> name of the Kinesis stream (i.e. mySparkStream)
     <endpoint-url> endpoint of the Kinesis service
       (e.g. https://kinesis.us-east-1.amazonaws.com)
     <region-name> region name of the Kinesis endpoint (e.g. us-east-1)
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
index bbb6008c2dddf..d6a9160eed98e 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
@@ -43,7 +43,7 @@ import org.apache.spark.streaming.kinesis.KinesisInputDStream
  *
  * Usage: KinesisWordCountASL <app-name> <stream-name> <endpoint-url> <region-name>
  *   <app-name> is the name of the consumer app, used to track the read data in DynamoDB
- *   <stream-name> name of the Kinesis stream (ie. mySparkStream)
+ *   <stream-name> name of the Kinesis stream (i.e. mySparkStream)
  *   <endpoint-url> endpoint of the Kinesis service
  *     (e.g. https://kinesis.us-east-1.amazonaws.com)
  *
@@ -167,9 +167,9 @@ object KinesisWordCountASL extends Logging {
  * Usage: KinesisWordProducerASL <stream-name> <endpoint-url> \
  *   <records-per-sec> <words-per-record>
  *
- *   <stream-name> is the name of the Kinesis stream (ie. mySparkStream)
+ *   <stream-name> is the name of the Kinesis stream (i.e. mySparkStream)
  *   <endpoint-url> is the endpoint of the Kinesis service
- *     (ie. https://kinesis.us-east-1.amazonaws.com)
+ *     (i.e. https://kinesis.us-east-1.amazonaws.com)
  *   <records-per-sec> is the rate of records per second to put onto the stream
  *   <words-per-record> is the number of words per record
  *
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtilsPythonHelper.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtilsPythonHelper.scala
index c89dedd3366d1..0056438c4eefb 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtilsPythonHelper.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtilsPythonHelper.scala
@@ -46,7 +46,7 @@ private class KinesisUtilsPythonHelper {
     // scalastyle:on
     if (!(stsAssumeRoleArn != null && stsSessionName != null && stsExternalId != null)
         && !(stsAssumeRoleArn == null && stsSessionName == null && stsExternalId == null)) {
-      throw new IllegalArgumentException("stsAssumeRoleArn, stsSessionName, and stsExtenalId " +
+      throw new IllegalArgumentException("stsAssumeRoleArn, stsSessionName, and stsExternalId " +
         "must all be defined or all be null")
     }
     if (awsAccessKeyId == null && awsSecretKey != null) {
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
index a5e2fc5c9a74f..8008a89c6cd5f 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/lib/PageRankSuite.scala
@@ -274,8 +274,8 @@ class PageRankSuite extends SparkFunSuite with LocalSparkContext {
     withSpark { sc =>
       // Check that implementation can handle large vertexIds, SPARK-25149
       val vertexIdOffset = Int.MaxValue.toLong + 1
-      val sourceOffest = 4
-      val source = vertexIdOffset + sourceOffest
+      val sourceOffset = 4
+      val source = vertexIdOffset + sourceOffset
       val numIter = 10
       val vertices = vertexIdOffset until vertexIdOffset + numIter
       val chain1 = vertices.zip(vertices.tail)
@@ -285,7 +285,7 @@ class PageRankSuite extends SparkFunSuite with LocalSparkContext {
       val tol = 0.0001
       val errorTol = 1.0e-1
 
-      val a = resetProb / (1 - Math.pow(1 - resetProb, numIter - sourceOffest))
+      val a = resetProb / (1 - Math.pow(1 - resetProb, numIter - sourceOffset))
       // We expect the rank to decay as (1 - resetProb) ^ distance
       val expectedRanks = sc.parallelize(vertices).map { vid =>
         val rank = if (vid < source) {

From 61ee5d8a4e3080e01abfdbd8277fa75868c257cd Mon Sep 17 00:00:00 2001
From: WeichenXu <weichen.xu@databricks.com>
Date: Thu, 12 Nov 2020 10:20:33 +0800
Subject: [PATCH 0450/1009] [WIP] Test (#30327)

* resend

* address comments

* directly gen new Iter

* directly gen new Iter

* update blockify strategy

* address comments

* try to fix 2.13

* try to fix scala 2.13

* use 1.0 as the default value for gemv

* update

Co-authored-by: zhengruifeng <ruifengz@foxmail.com>
---
 .../spark/ml/classification/LinearSVC.scala   | 93 ++++++-------------
 .../apache/spark/ml/feature/Instance.scala    | 72 ++++++++++++++
 .../ml/param/shared/SharedParamsCodeGen.scala |  7 +-
 .../spark/ml/param/shared/sharedParams.scala  | 18 ++++
 .../ml/classification/LinearSVCSuite.scala    |  4 +-
 .../spark/ml/feature/InstanceSuite.scala      | 54 +++++++++++
 python/pyspark/ml/classification.py           | 26 +++---
 python/pyspark/ml/classification.pyi          |  9 +-
 .../ml/param/_shared_params_code_gen.py       |  6 +-
 python/pyspark/ml/param/shared.py             | 18 ++++
 python/pyspark/ml/param/shared.pyi            |  5 +
 11 files changed, 225 insertions(+), 87 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
index 77272c65eb231..a2e7b0fadd4cb 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
@@ -42,7 +42,7 @@ import org.apache.spark.storage.StorageLevel
 /** Params for linear SVM Classifier. */
 private[classification] trait LinearSVCParams extends ClassifierParams with HasRegParam
   with HasMaxIter with HasFitIntercept with HasTol with HasStandardization with HasWeightCol
-  with HasAggregationDepth with HasThreshold with HasBlockSize {
+  with HasAggregationDepth with HasThreshold with HasBlockSizeInMB {
 
   /**
    * Param for threshold in binary classification prediction.
@@ -57,7 +57,7 @@ private[classification] trait LinearSVCParams extends ClassifierParams with HasR
     "threshold in binary classification prediction applied to rawPrediction")
 
   setDefault(regParam -> 0.0, maxIter -> 100, fitIntercept -> true, tol -> 1E-6,
-    standardization -> true, threshold -> 0.0, aggregationDepth -> 2, blockSize -> 1)
+    standardization -> true, threshold -> 0.0, aggregationDepth -> 2, blockSizeInMB -> 0.0)
 }
 
 /**
@@ -153,22 +153,13 @@ class LinearSVC @Since("2.2.0") (
   def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value)
 
   /**
-   * Set block size for stacking input data in matrices.
-   * If blockSize == 1, then stacking will be skipped, and each vector is treated individually;
-   * If blockSize &gt; 1, then vectors will be stacked to blocks, and high-level BLAS routines
-   * will be used if possible (for example, GEMV instead of DOT, GEMM instead of GEMV).
-   * Recommended size is between 10 and 1000. An appropriate choice of the block size depends
-   * on the sparsity and dim of input datasets, the underlying BLAS implementation (for example,
-   * f2jBLAS, OpenBLAS, intel MKL) and its configuration (for example, number of threads).
-   * Note that existing BLAS implementations are mainly optimized for dense matrices, if the
-   * input dataset is sparse, stacking may bring no performance gain, the worse is possible
-   * performance regression.
-   * Default is 1.
+   * Sets the value of param [[blockSizeInMB]].
+   * Default is 0.0.
    *
    * @group expertSetParam
    */
   @Since("3.1.0")
-  def setBlockSize(value: Int): this.type = set(blockSize, value)
+  def setBlockSizeInMB(value: Double): this.type = set(blockSizeInMB, value)
 
   @Since("2.2.0")
   override def copy(extra: ParamMap): LinearSVC = defaultCopy(extra)
@@ -177,19 +168,19 @@ class LinearSVC @Since("2.2.0") (
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
     instr.logParams(this, labelCol, weightCol, featuresCol, predictionCol, rawPredictionCol,
-      regParam, maxIter, fitIntercept, tol, standardization, threshold, aggregationDepth, blockSize)
+      regParam, maxIter, fitIntercept, tol, standardization, threshold, aggregationDepth,
+      blockSizeInMB)
+
+    if (dataset.storageLevel != StorageLevel.NONE) {
+      instr.logWarning(s"Input instances will be standardized, blockified to blocks, and " +
+        s"then cached during training. Be careful of double caching!")
+    }
 
     val instances = extractInstances(dataset)
       .setName("training instances")
 
-    if (dataset.storageLevel == StorageLevel.NONE && $(blockSize) == 1) {
-      instances.persist(StorageLevel.MEMORY_AND_DISK)
-    }
-
-    var requestedMetrics = Seq("mean", "std", "count")
-    if ($(blockSize) != 1) requestedMetrics +:= "numNonZeros"
     val (summarizer, labelSummarizer) = Summarizer
-      .getClassificationSummarizers(instances, $(aggregationDepth), requestedMetrics)
+      .getClassificationSummarizers(instances, $(aggregationDepth), Seq("mean", "std", "count"))
 
     val histogram = labelSummarizer.histogram
     val numInvalid = labelSummarizer.countInvalid
@@ -199,14 +190,12 @@ class LinearSVC @Since("2.2.0") (
     instr.logNamedValue("lowestLabelWeight", labelSummarizer.histogram.min.toString)
     instr.logNamedValue("highestLabelWeight", labelSummarizer.histogram.max.toString)
     instr.logSumOfWeights(summarizer.weightSum)
-    if ($(blockSize) > 1) {
-      val scale = 1.0 / summarizer.count / numFeatures
-      val sparsity = 1 - summarizer.numNonzeros.toArray.map(_ * scale).sum
-      instr.logNamedValue("sparsity", sparsity.toString)
-      if (sparsity > 0.5) {
-        instr.logWarning(s"sparsity of input dataset is $sparsity, " +
-          s"which may hurt performance in high-level BLAS.")
-      }
+
+    var actualBlockSizeInMB = $(blockSizeInMB)
+    if (actualBlockSizeInMB == 0) {
+      actualBlockSizeInMB = InstanceBlock.DefaultBlockSizeInMB
+      require(actualBlockSizeInMB > 0, "inferred actual BlockSizeInMB must > 0")
+      instr.logNamedValue("actualBlockSizeInMB", actualBlockSizeInMB.toString)
     }
 
     val numClasses = MetadataUtils.getNumClasses(dataset.schema($(labelCol))) match {
@@ -245,12 +234,8 @@ class LinearSVC @Since("2.2.0") (
        Note that the intercept in scaled space and original space is the same;
        as a result, no scaling is needed.
      */
-    val (rawCoefficients, objectiveHistory) = if ($(blockSize) == 1) {
-      trainOnRows(instances, featuresStd, regularization, optimizer)
-    } else {
-      trainOnBlocks(instances, featuresStd, regularization, optimizer)
-    }
-    if (instances.getStorageLevel != StorageLevel.NONE) instances.unpersist()
+    val (rawCoefficients, objectiveHistory) =
+      trainImpl(instances, actualBlockSizeInMB, featuresStd, regularization, optimizer)
 
     if (rawCoefficients == null) {
       val msg = s"${optimizer.getClass.getName} failed."
@@ -284,35 +269,9 @@ class LinearSVC @Since("2.2.0") (
     model.setSummary(Some(summary))
   }
 
-  private def trainOnRows(
-      instances: RDD[Instance],
-      featuresStd: Array[Double],
-      regularization: Option[L2Regularization],
-      optimizer: BreezeOWLQN[Int, BDV[Double]]): (Array[Double], Array[Double]) = {
-    val numFeatures = featuresStd.length
-    val numFeaturesPlusIntercept = if ($(fitIntercept)) numFeatures + 1 else numFeatures
-
-    val bcFeaturesStd = instances.context.broadcast(featuresStd)
-    val getAggregatorFunc = new HingeAggregator(bcFeaturesStd, $(fitIntercept))(_)
-    val costFun = new RDDLossFunction(instances, getAggregatorFunc,
-      regularization, $(aggregationDepth))
-
-    val states = optimizer.iterations(new CachedDiffFunction(costFun),
-      Vectors.zeros(numFeaturesPlusIntercept).asBreeze.toDenseVector)
-
-    val arrayBuilder = mutable.ArrayBuilder.make[Double]
-    var state: optimizer.State = null
-    while (states.hasNext) {
-      state = states.next()
-      arrayBuilder += state.adjustedValue
-    }
-    bcFeaturesStd.destroy()
-
-    (if (state != null) state.x.toArray else null, arrayBuilder.result)
-  }
-
-  private def trainOnBlocks(
+  private def trainImpl(
       instances: RDD[Instance],
+      actualBlockSizeInMB: Double,
       featuresStd: Array[Double],
       regularization: Option[L2Regularization],
       optimizer: BreezeOWLQN[Int, BDV[Double]]): (Array[Double], Array[Double]) = {
@@ -326,9 +285,11 @@ class LinearSVC @Since("2.2.0") (
       val func = StandardScalerModel.getTransformFunc(Array.empty, inverseStd, false, true)
       iter.map { case Instance(label, weight, vec) => Instance(label, weight, func(vec)) }
     }
-    val blocks = InstanceBlock.blokify(standardized, $(blockSize))
+
+    val maxMemUsage = (actualBlockSizeInMB * 1024L * 1024L).ceil.toLong
+    val blocks = InstanceBlock.blokifyWithMaxMemUsage(standardized, maxMemUsage)
       .persist(StorageLevel.MEMORY_AND_DISK)
-      .setName(s"training blocks (blockSize=${$(blockSize)})")
+      .setName(s"training blocks (blockSizeInMB=$actualBlockSizeInMB)")
 
     val getAggregatorFunc = new BlockHingeAggregator($(fitIntercept))(_)
     val costFun = new RDDLossFunction(blocks, getAggregatorFunc,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala
index db5f88d5dddc8..0b47c48e9a922 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.ml.feature
 
+import scala.collection.mutable
+
 import org.apache.spark.ml.linalg._
 import org.apache.spark.rdd.RDD
 
@@ -100,6 +102,32 @@ private[spark] case class InstanceBlock(
 
 private[spark] object InstanceBlock {
 
+  /**
+   * Suggested value for BlockSizeInMB in Level-2 routine cases.
+   * According to performance tests of BLAS routine (see SPARK-31714) and
+   * LinearSVC (see SPARK-32907), 1.0 MB should be an acceptable value for
+   * linear models using Level-2 routine (GEMV) to perform prediction and
+   * gradient computation.
+   */
+  val DefaultBlockSizeInMB = 1.0
+
+  private def getBlockMemUsage(
+      numCols: Long,
+      numRows: Long,
+      nnz: Long,
+      allUnitWeight: Boolean): Long = {
+    val doubleBytes = java.lang.Double.BYTES
+    val arrayHeader = 12L
+    val denseSize = Matrices.getDenseSize(numCols, numRows)
+    val sparseSize = Matrices.getSparseSize(nnz, numRows + 1)
+    val matrixSize = math.min(denseSize, sparseSize)
+    if (allUnitWeight) {
+      matrixSize + doubleBytes * numRows + arrayHeader * 2
+    } else {
+      matrixSize + doubleBytes * numRows * 2 + arrayHeader * 2
+    }
+  }
+
   def fromInstances(instances: Seq[Instance]): InstanceBlock = {
     val labels = instances.map(_.label).toArray
     val weights = if (instances.exists(_.weight != 1)) {
@@ -114,6 +142,50 @@ private[spark] object InstanceBlock {
   def blokify(instances: RDD[Instance], blockSize: Int): RDD[InstanceBlock] = {
     instances.mapPartitions(_.grouped(blockSize).map(InstanceBlock.fromInstances))
   }
+
+  def blokifyWithMaxMemUsage(
+      instanceIterator: Iterator[Instance],
+      maxMemUsage: Long): Iterator[InstanceBlock] = {
+    require(maxMemUsage > 0)
+
+    new Iterator[InstanceBlock]() {
+      private var numCols = -1L
+
+      override def hasNext: Boolean = instanceIterator.hasNext
+
+      override def next(): InstanceBlock = {
+        val buff = mutable.ArrayBuilder.make[Instance]
+        var buffCnt = 0L
+        var buffNnz = 0L
+        var buffUnitWeight = true
+        var blockMemUsage = 0L
+
+        while (instanceIterator.hasNext && blockMemUsage < maxMemUsage) {
+          val instance: Instance = instanceIterator.next()
+          if (numCols < 0L) numCols = instance.features.size
+          require(numCols == instance.features.size)
+          val nnz = instance.features.numNonzeros
+
+          buff += instance
+          buffCnt += 1L
+          buffNnz += nnz
+          buffUnitWeight &&= (instance.weight == 1)
+          blockMemUsage = getBlockMemUsage(numCols, buffCnt, buffNnz, buffUnitWeight)
+        }
+
+        // the block mem usage may slightly exceed threshold, not a big issue.
+        // and this ensure even if one row exceed block limit, each block has one row
+        InstanceBlock.fromInstances(buff.result())
+      }
+    }
+  }
+
+  def blokifyWithMaxMemUsage(
+      instances: RDD[Instance],
+      maxMemUsage: Long): RDD[InstanceBlock] = {
+    require(maxMemUsage > 0)
+    instances.mapPartitions(iter => blokifyWithMaxMemUsage(iter, maxMemUsage))
+  }
 }
 
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
index 7fd5f5938b565..64261bdfac7d5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
@@ -108,7 +108,12 @@ private[shared] object SharedParamsCodeGen {
       ParamDesc[Int]("blockSize", "block size for stacking input data in matrices. Data is " +
         "stacked within partitions. If block size is more than remaining data in a partition " +
         "then it is adjusted to the size of this data.",
-        isValid = "ParamValidators.gt(0)", isExpertParam = true)
+        isValid = "ParamValidators.gt(0)", isExpertParam = true),
+      ParamDesc[Double]("blockSizeInMB", "Maximum memory in MB for stacking input data " +
+        "in blocks. Data is stacked within partitions. If more than remaining data size in a " +
+        "partition then it is adjusted to the data size. If 0, try to infer an appropriate value " +
+        "based on the statistics of dataset. Must be >= 0.",
+        Some("0.0"), isValid = "ParamValidators.gtEq(0.0)", isExpertParam = true)
     )
 
     val code = genSharedParams(params)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
index 60203eba61ea5..1c741545dade0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
@@ -562,4 +562,22 @@ trait HasBlockSize extends Params {
   /** @group expertGetParam */
   final def getBlockSize: Int = $(blockSize)
 }
+
+/**
+ * Trait for shared param blockSizeInMB (default: 0.0). This trait may be changed or
+ * removed between minor versions.
+ */
+trait HasBlockSizeInMB extends Params {
+
+  /**
+   * Param for Maximum memory in MB for stacking input data in blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value based on the statistics of dataset. Must be &gt;= 0..
+   * @group expertParam
+   */
+  final val blockSizeInMB: DoubleParam = new DoubleParam(this, "blockSizeInMB", "Maximum memory in MB for stacking input data in blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value based on the statistics of dataset. Must be >= 0.", ParamValidators.gtEq(0.0))
+
+  setDefault(blockSizeInMB, 0.0)
+
+  /** @group expertGetParam */
+  final def getBlockSizeInMB: Double = $(blockSizeInMB)
+}
 // scalastyle:on
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala
index a66397324c1a6..55558f06ee362 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala
@@ -214,8 +214,8 @@ class LinearSVCSuite extends MLTest with DefaultReadWriteTest {
         .setFitIntercept(fitIntercept)
         .setMaxIter(5)
       val model = lsvc.fit(dataset)
-      Seq(4, 16, 64).foreach { blockSize =>
-        val model2 = lsvc.setBlockSize(blockSize).fit(dataset)
+      Seq(0, 0.01, 0.1, 1, 2, 4).foreach { s =>
+        val model2 = lsvc.setBlockSizeInMB(s).fit(dataset)
         assert(model.intercept ~== model2.intercept relTol 1e-9)
         assert(model.coefficients ~== model2.coefficients relTol 1e-9)
       }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala
index d780bdf5f5dc8..f1e071357bab7 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala
@@ -74,4 +74,58 @@ class InstanceSuite extends SparkFunSuite{
     }
   }
 
+  test("InstanceBlock: blokify with max memory usage") {
+    val instance1 = Instance(19.0, 2.0, Vectors.dense(1.0, 7.0))
+    val instance2 = Instance(17.0, 1.0, Vectors.dense(0.0, 5.0).toSparse)
+    val instances = Seq(instance1, instance2)
+
+    val blocks = InstanceBlock
+      .blokifyWithMaxMemUsage(Iterator.apply(instance1, instance2), 128).toArray
+    require(blocks.length == 1)
+    val block = blocks.head
+    assert(block.size === 2)
+    assert(block.numFeatures === 2)
+    block.instanceIterator.zipWithIndex.foreach {
+      case (instance, i) =>
+        assert(instance.label === instances(i).label)
+        assert(instance.weight === instances(i).weight)
+        assert(instance.features.toArray === instances(i).features.toArray)
+    }
+    Seq(0, 1).foreach { i =>
+      val nzIter = block.getNonZeroIter(i)
+      val vec = Vectors.sparse(2, nzIter.toSeq)
+      assert(vec.toArray === instances(i).features.toArray)
+    }
+
+    // instances larger than maxMemUsage
+    val denseInstance = Instance(-1.0, 2.0, Vectors.dense(Array.fill(1000)(1.0)))
+    InstanceBlock.blokifyWithMaxMemUsage(Iterator.single(denseInstance), 64).size
+    InstanceBlock.blokifyWithMaxMemUsage(Iterator.fill(10)(denseInstance), 64).size
+
+    // different numFeatures
+    intercept[IllegalArgumentException] {
+      InstanceBlock.blokifyWithMaxMemUsage(Iterator.apply(instance1, denseInstance), 64).size
+    }
+
+    // nnz = 10
+    val sparseInstance = Instance(-2.0, 3.0,
+      Vectors.sparse(1000, Array.range(0, 1000, 100), Array.fill(10)(0.1)))
+
+    // normally, memory usage of a block does not exceed maxMemUsage too much
+    val maxMemUsage = 1 << 18
+    val mixedIter = Iterator.fill(100)(denseInstance) ++
+      Iterator.fill(1000)(sparseInstance) ++
+      Iterator.fill(10)(denseInstance) ++
+      Iterator.fill(10)(sparseInstance) ++
+      Iterator.fill(100)(denseInstance) ++
+      Iterator.fill(100)(sparseInstance)
+    InstanceBlock.blokifyWithMaxMemUsage(mixedIter, maxMemUsage)
+      .foreach { block =>
+        val doubleBytes = java.lang.Double.BYTES
+        val arrayHeader = 12L
+        val blockMemUsage = block.matrix.getSizeInBytes +
+          (block.labels.length + block.weights.length) * doubleBytes + arrayHeader * 2
+        require(blockMemUsage < maxMemUsage * 1.05)
+      }
+  }
 }
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index d6c861361a248..f96bbd4d33577 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -26,8 +26,8 @@
 from pyspark.ml import Estimator, Predictor, PredictionModel, Model
 from pyspark.ml.param.shared import HasRawPredictionCol, HasProbabilityCol, HasThresholds, \
     HasRegParam, HasMaxIter, HasFitIntercept, HasTol, HasStandardization, HasWeightCol, \
-    HasAggregationDepth, HasThreshold, HasBlockSize, Param, Params, TypeConverters, \
-    HasElasticNetParam, HasSeed, HasStepSize, HasSolver, HasParallelism
+    HasAggregationDepth, HasThreshold, HasBlockSize, HasBlockSizeInMB, Param, Params, \
+    TypeConverters, HasElasticNetParam, HasSeed, HasStepSize, HasSolver, HasParallelism
 from pyspark.ml.tree import _DecisionTreeModel, _DecisionTreeParams, \
     _TreeEnsembleModel, _RandomForestParams, _GBTParams, \
     _HasVarianceImpurity, _TreeClassifierParams
@@ -504,7 +504,7 @@ def recallByThreshold(self):
 
 class _LinearSVCParams(_ClassifierParams, HasRegParam, HasMaxIter, HasFitIntercept, HasTol,
                        HasStandardization, HasWeightCol, HasAggregationDepth, HasThreshold,
-                       HasBlockSize):
+                       HasBlockSizeInMB):
     """
     Params for :py:class:`LinearSVC` and :py:class:`LinearSVCModel`.
 
@@ -521,7 +521,7 @@ def __init__(self, *args):
         super(_LinearSVCParams, self).__init__(*args)
         self._setDefault(maxIter=100, regParam=0.0, tol=1e-6, fitIntercept=True,
                          standardization=True, threshold=0.0, aggregationDepth=2,
-                         blockSize=1)
+                         blockSizeInMB=0.0)
 
 
 @inherit_doc
@@ -565,8 +565,8 @@ class LinearSVC(_JavaClassifier, _LinearSVCParams, JavaMLWritable, JavaMLReadabl
     LinearSVCModel...
     >>> model.getThreshold()
     0.5
-    >>> model.getBlockSize()
-    1
+    >>> model.getBlockSizeInMB()
+    0.0
     >>> model.coefficients
     DenseVector([0.0, -0.2792, -0.1833])
     >>> model.intercept
@@ -605,12 +605,12 @@ class LinearSVC(_JavaClassifier, _LinearSVCParams, JavaMLWritable, JavaMLReadabl
     def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction",
                  fitIntercept=True, standardization=True, threshold=0.0, weightCol=None,
-                 aggregationDepth=2, blockSize=1):
+                 aggregationDepth=2, blockSizeInMB=0.0):
         """
         __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction", \
                  fitIntercept=True, standardization=True, threshold=0.0, weightCol=None, \
-                 aggregationDepth=2, blockSize=1):
+                 aggregationDepth=2, blockSizeInMB=0.0):
         """
         super(LinearSVC, self).__init__()
         self._java_obj = self._new_java_obj(
@@ -623,12 +623,12 @@ def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="p
     def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                   maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction",
                   fitIntercept=True, standardization=True, threshold=0.0, weightCol=None,
-                  aggregationDepth=2, blockSize=1):
+                  aggregationDepth=2, blockSizeInMB=0.0):
         """
         setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction", \
                   fitIntercept=True, standardization=True, threshold=0.0, weightCol=None, \
-                  aggregationDepth=2, blockSize=1):
+                  aggregationDepth=2, blockSizeInMB=0.0):
         Sets params for Linear SVM Classifier.
         """
         kwargs = self._input_kwargs
@@ -694,11 +694,11 @@ def setAggregationDepth(self, value):
         return self._set(aggregationDepth=value)
 
     @since("3.1.0")
-    def setBlockSize(self, value):
+    def setBlockSizeInMB(self, value):
         """
-        Sets the value of :py:attr:`blockSize`.
+        Sets the value of :py:attr:`blockSizeInMB`.
         """
-        return self._set(blockSize=value)
+        return self._set(blockSizeInMB=value)
 
 
 class LinearSVCModel(_JavaClassificationModel, _LinearSVCParams, JavaMLWritable, JavaMLReadable,
diff --git a/python/pyspark/ml/classification.pyi b/python/pyspark/ml/classification.pyi
index 55afc20a54cb9..241f5baf8dfd4 100644
--- a/python/pyspark/ml/classification.pyi
+++ b/python/pyspark/ml/classification.pyi
@@ -26,6 +26,7 @@ from pyspark.ml.base import _PredictorParams
 from pyspark.ml.param.shared import (
     HasAggregationDepth,
     HasBlockSize,
+    HasBlockSizeInMB,
     HasElasticNetParam,
     HasFitIntercept,
     HasMaxIter,
@@ -172,7 +173,7 @@ class _LinearSVCParams(
     HasWeightCol,
     HasAggregationDepth,
     HasThreshold,
-    HasBlockSize,
+    HasBlockSizeInMB,
 ):
     threshold: Param[float]
     def __init__(self, *args: Any) -> None: ...
@@ -198,7 +199,7 @@ class LinearSVC(
         threshold: float = ...,
         weightCol: Optional[str] = ...,
         aggregationDepth: int = ...,
-        blockSize: int = ...
+        blockSizeInMB: float = ...
     ) -> None: ...
     def setParams(
         self,
@@ -215,7 +216,7 @@ class LinearSVC(
         threshold: float = ...,
         weightCol: Optional[str] = ...,
         aggregationDepth: int = ...,
-        blockSize: int = ...
+        blockSizeInMB: float = ...
     ) -> LinearSVC: ...
     def setMaxIter(self, value: int) -> LinearSVC: ...
     def setRegParam(self, value: float) -> LinearSVC: ...
@@ -225,7 +226,7 @@ class LinearSVC(
     def setThreshold(self, value: float) -> LinearSVC: ...
     def setWeightCol(self, value: str) -> LinearSVC: ...
     def setAggregationDepth(self, value: int) -> LinearSVC: ...
-    def setBlockSize(self, value: int) -> LinearSVC: ...
+    def setBlockSizeInMB(self, value: float) -> LinearSVC: ...
 
 class LinearSVCModel(
     _JavaClassificationModel[Vector],
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index bc1ea87ad629c..b6fc170abe788 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -165,7 +165,11 @@ def get$Name(self):
          None, "TypeConverters.toString"),
         ("blockSize", "block size for stacking input data in matrices. Data is stacked within "
          "partitions. If block size is more than remaining data in a partition then it is "
-         "adjusted to the size of this data.", None, "TypeConverters.toInt")]
+         "adjusted to the size of this data.", None, "TypeConverters.toInt"),
+        ("blockSizeInMB", "maximum memory in MB for stacking input data in blocks. Data is " +
+         "stacked within partitions. If more than remaining data size in a partition then it " +
+         "is adjusted to the data size. If 0, try to infer an appropriate value based on the " +
+         "statistics of dataset. Must be >= 0.", "0.0", "TypeConverters.toFloat")]
 
     code = []
     for name, doc, defaultValueStr, typeConverter in shared:
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index 24fb0d3e2554d..a829a2e76b380 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -597,3 +597,21 @@ def getBlockSize(self):
         Gets the value of blockSize or its default value.
         """
         return self.getOrDefault(self.blockSize)
+
+
+class HasBlockSizeInMB(Params):
+    """
+    Mixin for param blockSizeInMB: maximum memory in MB for stacking input data in blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value based on the statistics of dataset. Must be >= 0.
+    """
+
+    blockSizeInMB = Param(Params._dummy(), "blockSizeInMB", "maximum memory in MB for stacking input data in blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value based on the statistics of dataset. Must be >= 0.", typeConverter=TypeConverters.toFloat)
+
+    def __init__(self):
+        super(HasBlockSizeInMB, self).__init__()
+        self._setDefault(blockSizeInMB=0.0)
+
+    def getBlockSizeInMB(self):
+        """
+        Gets the value of blockSizeInMB or its default value.
+        """
+        return self.getOrDefault(self.blockSizeInMB)
diff --git a/python/pyspark/ml/param/shared.pyi b/python/pyspark/ml/param/shared.pyi
index 5999c0eaa4661..bbb4890455de7 100644
--- a/python/pyspark/ml/param/shared.pyi
+++ b/python/pyspark/ml/param/shared.pyi
@@ -185,3 +185,8 @@ class HasBlockSize(Params):
     blockSize: Param[int]
     def __init__(self) -> None: ...
     def getBlockSize(self) -> int: ...
+
+class HasBlockSizeInMB(Params):
+    blockSizeInMB: Param[float]
+    def __init__(self) -> None: ...
+    def getBlockSizeInMB(self) -> float: ...

From 6244407ce60c33ec9a549011723195fe8e15f287 Mon Sep 17 00:00:00 2001
From: Ruifeng Zheng <ruifengz@foxmail.com>
Date: Thu, 12 Nov 2020 11:32:12 +0900
Subject: [PATCH 0451/1009] Revert "[WIP] Test (#30327)"

This reverts commit 61ee5d8a4e3080e01abfdbd8277fa75868c257cd.

### What changes were proposed in this pull request?
I need to merge https://github.com/apache/spark/pull/30327 to https://github.com/apache/spark/pull/30009,
but I merged it to master by mistake.

### Why are the changes needed?

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

Closes #30345 from zhengruifeng/revert-30327-adaptively_blockify_linear_svc_II.

Authored-by: Ruifeng Zheng <ruifengz@foxmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../spark/ml/classification/LinearSVC.scala   | 93 +++++++++++++------
 .../apache/spark/ml/feature/Instance.scala    | 72 --------------
 .../ml/param/shared/SharedParamsCodeGen.scala |  7 +-
 .../spark/ml/param/shared/sharedParams.scala  | 18 ----
 .../ml/classification/LinearSVCSuite.scala    |  4 +-
 .../spark/ml/feature/InstanceSuite.scala      | 54 -----------
 python/pyspark/ml/classification.py           | 26 +++---
 python/pyspark/ml/classification.pyi          |  9 +-
 .../ml/param/_shared_params_code_gen.py       |  6 +-
 python/pyspark/ml/param/shared.py             | 18 ----
 python/pyspark/ml/param/shared.pyi            |  5 -
 11 files changed, 87 insertions(+), 225 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
index a2e7b0fadd4cb..77272c65eb231 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
@@ -42,7 +42,7 @@ import org.apache.spark.storage.StorageLevel
 /** Params for linear SVM Classifier. */
 private[classification] trait LinearSVCParams extends ClassifierParams with HasRegParam
   with HasMaxIter with HasFitIntercept with HasTol with HasStandardization with HasWeightCol
-  with HasAggregationDepth with HasThreshold with HasBlockSizeInMB {
+  with HasAggregationDepth with HasThreshold with HasBlockSize {
 
   /**
    * Param for threshold in binary classification prediction.
@@ -57,7 +57,7 @@ private[classification] trait LinearSVCParams extends ClassifierParams with HasR
     "threshold in binary classification prediction applied to rawPrediction")
 
   setDefault(regParam -> 0.0, maxIter -> 100, fitIntercept -> true, tol -> 1E-6,
-    standardization -> true, threshold -> 0.0, aggregationDepth -> 2, blockSizeInMB -> 0.0)
+    standardization -> true, threshold -> 0.0, aggregationDepth -> 2, blockSize -> 1)
 }
 
 /**
@@ -153,13 +153,22 @@ class LinearSVC @Since("2.2.0") (
   def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value)
 
   /**
-   * Sets the value of param [[blockSizeInMB]].
-   * Default is 0.0.
+   * Set block size for stacking input data in matrices.
+   * If blockSize == 1, then stacking will be skipped, and each vector is treated individually;
+   * If blockSize &gt; 1, then vectors will be stacked to blocks, and high-level BLAS routines
+   * will be used if possible (for example, GEMV instead of DOT, GEMM instead of GEMV).
+   * Recommended size is between 10 and 1000. An appropriate choice of the block size depends
+   * on the sparsity and dim of input datasets, the underlying BLAS implementation (for example,
+   * f2jBLAS, OpenBLAS, intel MKL) and its configuration (for example, number of threads).
+   * Note that existing BLAS implementations are mainly optimized for dense matrices, if the
+   * input dataset is sparse, stacking may bring no performance gain, the worse is possible
+   * performance regression.
+   * Default is 1.
    *
    * @group expertSetParam
    */
   @Since("3.1.0")
-  def setBlockSizeInMB(value: Double): this.type = set(blockSizeInMB, value)
+  def setBlockSize(value: Int): this.type = set(blockSize, value)
 
   @Since("2.2.0")
   override def copy(extra: ParamMap): LinearSVC = defaultCopy(extra)
@@ -168,19 +177,19 @@ class LinearSVC @Since("2.2.0") (
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
     instr.logParams(this, labelCol, weightCol, featuresCol, predictionCol, rawPredictionCol,
-      regParam, maxIter, fitIntercept, tol, standardization, threshold, aggregationDepth,
-      blockSizeInMB)
-
-    if (dataset.storageLevel != StorageLevel.NONE) {
-      instr.logWarning(s"Input instances will be standardized, blockified to blocks, and " +
-        s"then cached during training. Be careful of double caching!")
-    }
+      regParam, maxIter, fitIntercept, tol, standardization, threshold, aggregationDepth, blockSize)
 
     val instances = extractInstances(dataset)
       .setName("training instances")
 
+    if (dataset.storageLevel == StorageLevel.NONE && $(blockSize) == 1) {
+      instances.persist(StorageLevel.MEMORY_AND_DISK)
+    }
+
+    var requestedMetrics = Seq("mean", "std", "count")
+    if ($(blockSize) != 1) requestedMetrics +:= "numNonZeros"
     val (summarizer, labelSummarizer) = Summarizer
-      .getClassificationSummarizers(instances, $(aggregationDepth), Seq("mean", "std", "count"))
+      .getClassificationSummarizers(instances, $(aggregationDepth), requestedMetrics)
 
     val histogram = labelSummarizer.histogram
     val numInvalid = labelSummarizer.countInvalid
@@ -190,12 +199,14 @@ class LinearSVC @Since("2.2.0") (
     instr.logNamedValue("lowestLabelWeight", labelSummarizer.histogram.min.toString)
     instr.logNamedValue("highestLabelWeight", labelSummarizer.histogram.max.toString)
     instr.logSumOfWeights(summarizer.weightSum)
-
-    var actualBlockSizeInMB = $(blockSizeInMB)
-    if (actualBlockSizeInMB == 0) {
-      actualBlockSizeInMB = InstanceBlock.DefaultBlockSizeInMB
-      require(actualBlockSizeInMB > 0, "inferred actual BlockSizeInMB must > 0")
-      instr.logNamedValue("actualBlockSizeInMB", actualBlockSizeInMB.toString)
+    if ($(blockSize) > 1) {
+      val scale = 1.0 / summarizer.count / numFeatures
+      val sparsity = 1 - summarizer.numNonzeros.toArray.map(_ * scale).sum
+      instr.logNamedValue("sparsity", sparsity.toString)
+      if (sparsity > 0.5) {
+        instr.logWarning(s"sparsity of input dataset is $sparsity, " +
+          s"which may hurt performance in high-level BLAS.")
+      }
     }
 
     val numClasses = MetadataUtils.getNumClasses(dataset.schema($(labelCol))) match {
@@ -234,8 +245,12 @@ class LinearSVC @Since("2.2.0") (
        Note that the intercept in scaled space and original space is the same;
        as a result, no scaling is needed.
      */
-    val (rawCoefficients, objectiveHistory) =
-      trainImpl(instances, actualBlockSizeInMB, featuresStd, regularization, optimizer)
+    val (rawCoefficients, objectiveHistory) = if ($(blockSize) == 1) {
+      trainOnRows(instances, featuresStd, regularization, optimizer)
+    } else {
+      trainOnBlocks(instances, featuresStd, regularization, optimizer)
+    }
+    if (instances.getStorageLevel != StorageLevel.NONE) instances.unpersist()
 
     if (rawCoefficients == null) {
       val msg = s"${optimizer.getClass.getName} failed."
@@ -269,9 +284,35 @@ class LinearSVC @Since("2.2.0") (
     model.setSummary(Some(summary))
   }
 
-  private def trainImpl(
+  private def trainOnRows(
+      instances: RDD[Instance],
+      featuresStd: Array[Double],
+      regularization: Option[L2Regularization],
+      optimizer: BreezeOWLQN[Int, BDV[Double]]): (Array[Double], Array[Double]) = {
+    val numFeatures = featuresStd.length
+    val numFeaturesPlusIntercept = if ($(fitIntercept)) numFeatures + 1 else numFeatures
+
+    val bcFeaturesStd = instances.context.broadcast(featuresStd)
+    val getAggregatorFunc = new HingeAggregator(bcFeaturesStd, $(fitIntercept))(_)
+    val costFun = new RDDLossFunction(instances, getAggregatorFunc,
+      regularization, $(aggregationDepth))
+
+    val states = optimizer.iterations(new CachedDiffFunction(costFun),
+      Vectors.zeros(numFeaturesPlusIntercept).asBreeze.toDenseVector)
+
+    val arrayBuilder = mutable.ArrayBuilder.make[Double]
+    var state: optimizer.State = null
+    while (states.hasNext) {
+      state = states.next()
+      arrayBuilder += state.adjustedValue
+    }
+    bcFeaturesStd.destroy()
+
+    (if (state != null) state.x.toArray else null, arrayBuilder.result)
+  }
+
+  private def trainOnBlocks(
       instances: RDD[Instance],
-      actualBlockSizeInMB: Double,
       featuresStd: Array[Double],
       regularization: Option[L2Regularization],
       optimizer: BreezeOWLQN[Int, BDV[Double]]): (Array[Double], Array[Double]) = {
@@ -285,11 +326,9 @@ class LinearSVC @Since("2.2.0") (
       val func = StandardScalerModel.getTransformFunc(Array.empty, inverseStd, false, true)
       iter.map { case Instance(label, weight, vec) => Instance(label, weight, func(vec)) }
     }
-
-    val maxMemUsage = (actualBlockSizeInMB * 1024L * 1024L).ceil.toLong
-    val blocks = InstanceBlock.blokifyWithMaxMemUsage(standardized, maxMemUsage)
+    val blocks = InstanceBlock.blokify(standardized, $(blockSize))
       .persist(StorageLevel.MEMORY_AND_DISK)
-      .setName(s"training blocks (blockSizeInMB=$actualBlockSizeInMB)")
+      .setName(s"training blocks (blockSize=${$(blockSize)})")
 
     val getAggregatorFunc = new BlockHingeAggregator($(fitIntercept))(_)
     val costFun = new RDDLossFunction(blocks, getAggregatorFunc,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala
index 0b47c48e9a922..db5f88d5dddc8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.ml.feature
 
-import scala.collection.mutable
-
 import org.apache.spark.ml.linalg._
 import org.apache.spark.rdd.RDD
 
@@ -102,32 +100,6 @@ private[spark] case class InstanceBlock(
 
 private[spark] object InstanceBlock {
 
-  /**
-   * Suggested value for BlockSizeInMB in Level-2 routine cases.
-   * According to performance tests of BLAS routine (see SPARK-31714) and
-   * LinearSVC (see SPARK-32907), 1.0 MB should be an acceptable value for
-   * linear models using Level-2 routine (GEMV) to perform prediction and
-   * gradient computation.
-   */
-  val DefaultBlockSizeInMB = 1.0
-
-  private def getBlockMemUsage(
-      numCols: Long,
-      numRows: Long,
-      nnz: Long,
-      allUnitWeight: Boolean): Long = {
-    val doubleBytes = java.lang.Double.BYTES
-    val arrayHeader = 12L
-    val denseSize = Matrices.getDenseSize(numCols, numRows)
-    val sparseSize = Matrices.getSparseSize(nnz, numRows + 1)
-    val matrixSize = math.min(denseSize, sparseSize)
-    if (allUnitWeight) {
-      matrixSize + doubleBytes * numRows + arrayHeader * 2
-    } else {
-      matrixSize + doubleBytes * numRows * 2 + arrayHeader * 2
-    }
-  }
-
   def fromInstances(instances: Seq[Instance]): InstanceBlock = {
     val labels = instances.map(_.label).toArray
     val weights = if (instances.exists(_.weight != 1)) {
@@ -142,50 +114,6 @@ private[spark] object InstanceBlock {
   def blokify(instances: RDD[Instance], blockSize: Int): RDD[InstanceBlock] = {
     instances.mapPartitions(_.grouped(blockSize).map(InstanceBlock.fromInstances))
   }
-
-  def blokifyWithMaxMemUsage(
-      instanceIterator: Iterator[Instance],
-      maxMemUsage: Long): Iterator[InstanceBlock] = {
-    require(maxMemUsage > 0)
-
-    new Iterator[InstanceBlock]() {
-      private var numCols = -1L
-
-      override def hasNext: Boolean = instanceIterator.hasNext
-
-      override def next(): InstanceBlock = {
-        val buff = mutable.ArrayBuilder.make[Instance]
-        var buffCnt = 0L
-        var buffNnz = 0L
-        var buffUnitWeight = true
-        var blockMemUsage = 0L
-
-        while (instanceIterator.hasNext && blockMemUsage < maxMemUsage) {
-          val instance: Instance = instanceIterator.next()
-          if (numCols < 0L) numCols = instance.features.size
-          require(numCols == instance.features.size)
-          val nnz = instance.features.numNonzeros
-
-          buff += instance
-          buffCnt += 1L
-          buffNnz += nnz
-          buffUnitWeight &&= (instance.weight == 1)
-          blockMemUsage = getBlockMemUsage(numCols, buffCnt, buffNnz, buffUnitWeight)
-        }
-
-        // the block mem usage may slightly exceed threshold, not a big issue.
-        // and this ensure even if one row exceed block limit, each block has one row
-        InstanceBlock.fromInstances(buff.result())
-      }
-    }
-  }
-
-  def blokifyWithMaxMemUsage(
-      instances: RDD[Instance],
-      maxMemUsage: Long): RDD[InstanceBlock] = {
-    require(maxMemUsage > 0)
-    instances.mapPartitions(iter => blokifyWithMaxMemUsage(iter, maxMemUsage))
-  }
 }
 
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
index 64261bdfac7d5..7fd5f5938b565 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
@@ -108,12 +108,7 @@ private[shared] object SharedParamsCodeGen {
       ParamDesc[Int]("blockSize", "block size for stacking input data in matrices. Data is " +
         "stacked within partitions. If block size is more than remaining data in a partition " +
         "then it is adjusted to the size of this data.",
-        isValid = "ParamValidators.gt(0)", isExpertParam = true),
-      ParamDesc[Double]("blockSizeInMB", "Maximum memory in MB for stacking input data " +
-        "in blocks. Data is stacked within partitions. If more than remaining data size in a " +
-        "partition then it is adjusted to the data size. If 0, try to infer an appropriate value " +
-        "based on the statistics of dataset. Must be >= 0.",
-        Some("0.0"), isValid = "ParamValidators.gtEq(0.0)", isExpertParam = true)
+        isValid = "ParamValidators.gt(0)", isExpertParam = true)
     )
 
     val code = genSharedParams(params)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
index 1c741545dade0..60203eba61ea5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
@@ -562,22 +562,4 @@ trait HasBlockSize extends Params {
   /** @group expertGetParam */
   final def getBlockSize: Int = $(blockSize)
 }
-
-/**
- * Trait for shared param blockSizeInMB (default: 0.0). This trait may be changed or
- * removed between minor versions.
- */
-trait HasBlockSizeInMB extends Params {
-
-  /**
-   * Param for Maximum memory in MB for stacking input data in blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value based on the statistics of dataset. Must be &gt;= 0..
-   * @group expertParam
-   */
-  final val blockSizeInMB: DoubleParam = new DoubleParam(this, "blockSizeInMB", "Maximum memory in MB for stacking input data in blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value based on the statistics of dataset. Must be >= 0.", ParamValidators.gtEq(0.0))
-
-  setDefault(blockSizeInMB, 0.0)
-
-  /** @group expertGetParam */
-  final def getBlockSizeInMB: Double = $(blockSizeInMB)
-}
 // scalastyle:on
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala
index 55558f06ee362..a66397324c1a6 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala
@@ -214,8 +214,8 @@ class LinearSVCSuite extends MLTest with DefaultReadWriteTest {
         .setFitIntercept(fitIntercept)
         .setMaxIter(5)
       val model = lsvc.fit(dataset)
-      Seq(0, 0.01, 0.1, 1, 2, 4).foreach { s =>
-        val model2 = lsvc.setBlockSizeInMB(s).fit(dataset)
+      Seq(4, 16, 64).foreach { blockSize =>
+        val model2 = lsvc.setBlockSize(blockSize).fit(dataset)
         assert(model.intercept ~== model2.intercept relTol 1e-9)
         assert(model.coefficients ~== model2.coefficients relTol 1e-9)
       }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala
index f1e071357bab7..d780bdf5f5dc8 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala
@@ -74,58 +74,4 @@ class InstanceSuite extends SparkFunSuite{
     }
   }
 
-  test("InstanceBlock: blokify with max memory usage") {
-    val instance1 = Instance(19.0, 2.0, Vectors.dense(1.0, 7.0))
-    val instance2 = Instance(17.0, 1.0, Vectors.dense(0.0, 5.0).toSparse)
-    val instances = Seq(instance1, instance2)
-
-    val blocks = InstanceBlock
-      .blokifyWithMaxMemUsage(Iterator.apply(instance1, instance2), 128).toArray
-    require(blocks.length == 1)
-    val block = blocks.head
-    assert(block.size === 2)
-    assert(block.numFeatures === 2)
-    block.instanceIterator.zipWithIndex.foreach {
-      case (instance, i) =>
-        assert(instance.label === instances(i).label)
-        assert(instance.weight === instances(i).weight)
-        assert(instance.features.toArray === instances(i).features.toArray)
-    }
-    Seq(0, 1).foreach { i =>
-      val nzIter = block.getNonZeroIter(i)
-      val vec = Vectors.sparse(2, nzIter.toSeq)
-      assert(vec.toArray === instances(i).features.toArray)
-    }
-
-    // instances larger than maxMemUsage
-    val denseInstance = Instance(-1.0, 2.0, Vectors.dense(Array.fill(1000)(1.0)))
-    InstanceBlock.blokifyWithMaxMemUsage(Iterator.single(denseInstance), 64).size
-    InstanceBlock.blokifyWithMaxMemUsage(Iterator.fill(10)(denseInstance), 64).size
-
-    // different numFeatures
-    intercept[IllegalArgumentException] {
-      InstanceBlock.blokifyWithMaxMemUsage(Iterator.apply(instance1, denseInstance), 64).size
-    }
-
-    // nnz = 10
-    val sparseInstance = Instance(-2.0, 3.0,
-      Vectors.sparse(1000, Array.range(0, 1000, 100), Array.fill(10)(0.1)))
-
-    // normally, memory usage of a block does not exceed maxMemUsage too much
-    val maxMemUsage = 1 << 18
-    val mixedIter = Iterator.fill(100)(denseInstance) ++
-      Iterator.fill(1000)(sparseInstance) ++
-      Iterator.fill(10)(denseInstance) ++
-      Iterator.fill(10)(sparseInstance) ++
-      Iterator.fill(100)(denseInstance) ++
-      Iterator.fill(100)(sparseInstance)
-    InstanceBlock.blokifyWithMaxMemUsage(mixedIter, maxMemUsage)
-      .foreach { block =>
-        val doubleBytes = java.lang.Double.BYTES
-        val arrayHeader = 12L
-        val blockMemUsage = block.matrix.getSizeInBytes +
-          (block.labels.length + block.weights.length) * doubleBytes + arrayHeader * 2
-        require(blockMemUsage < maxMemUsage * 1.05)
-      }
-  }
 }
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index f96bbd4d33577..d6c861361a248 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -26,8 +26,8 @@
 from pyspark.ml import Estimator, Predictor, PredictionModel, Model
 from pyspark.ml.param.shared import HasRawPredictionCol, HasProbabilityCol, HasThresholds, \
     HasRegParam, HasMaxIter, HasFitIntercept, HasTol, HasStandardization, HasWeightCol, \
-    HasAggregationDepth, HasThreshold, HasBlockSize, HasBlockSizeInMB, Param, Params, \
-    TypeConverters, HasElasticNetParam, HasSeed, HasStepSize, HasSolver, HasParallelism
+    HasAggregationDepth, HasThreshold, HasBlockSize, Param, Params, TypeConverters, \
+    HasElasticNetParam, HasSeed, HasStepSize, HasSolver, HasParallelism
 from pyspark.ml.tree import _DecisionTreeModel, _DecisionTreeParams, \
     _TreeEnsembleModel, _RandomForestParams, _GBTParams, \
     _HasVarianceImpurity, _TreeClassifierParams
@@ -504,7 +504,7 @@ def recallByThreshold(self):
 
 class _LinearSVCParams(_ClassifierParams, HasRegParam, HasMaxIter, HasFitIntercept, HasTol,
                        HasStandardization, HasWeightCol, HasAggregationDepth, HasThreshold,
-                       HasBlockSizeInMB):
+                       HasBlockSize):
     """
     Params for :py:class:`LinearSVC` and :py:class:`LinearSVCModel`.
 
@@ -521,7 +521,7 @@ def __init__(self, *args):
         super(_LinearSVCParams, self).__init__(*args)
         self._setDefault(maxIter=100, regParam=0.0, tol=1e-6, fitIntercept=True,
                          standardization=True, threshold=0.0, aggregationDepth=2,
-                         blockSizeInMB=0.0)
+                         blockSize=1)
 
 
 @inherit_doc
@@ -565,8 +565,8 @@ class LinearSVC(_JavaClassifier, _LinearSVCParams, JavaMLWritable, JavaMLReadabl
     LinearSVCModel...
     >>> model.getThreshold()
     0.5
-    >>> model.getBlockSizeInMB()
-    0.0
+    >>> model.getBlockSize()
+    1
     >>> model.coefficients
     DenseVector([0.0, -0.2792, -0.1833])
     >>> model.intercept
@@ -605,12 +605,12 @@ class LinearSVC(_JavaClassifier, _LinearSVCParams, JavaMLWritable, JavaMLReadabl
     def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction",
                  fitIntercept=True, standardization=True, threshold=0.0, weightCol=None,
-                 aggregationDepth=2, blockSizeInMB=0.0):
+                 aggregationDepth=2, blockSize=1):
         """
         __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction", \
                  fitIntercept=True, standardization=True, threshold=0.0, weightCol=None, \
-                 aggregationDepth=2, blockSizeInMB=0.0):
+                 aggregationDepth=2, blockSize=1):
         """
         super(LinearSVC, self).__init__()
         self._java_obj = self._new_java_obj(
@@ -623,12 +623,12 @@ def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="p
     def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                   maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction",
                   fitIntercept=True, standardization=True, threshold=0.0, weightCol=None,
-                  aggregationDepth=2, blockSizeInMB=0.0):
+                  aggregationDepth=2, blockSize=1):
         """
         setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction", \
                   fitIntercept=True, standardization=True, threshold=0.0, weightCol=None, \
-                  aggregationDepth=2, blockSizeInMB=0.0):
+                  aggregationDepth=2, blockSize=1):
         Sets params for Linear SVM Classifier.
         """
         kwargs = self._input_kwargs
@@ -694,11 +694,11 @@ def setAggregationDepth(self, value):
         return self._set(aggregationDepth=value)
 
     @since("3.1.0")
-    def setBlockSizeInMB(self, value):
+    def setBlockSize(self, value):
         """
-        Sets the value of :py:attr:`blockSizeInMB`.
+        Sets the value of :py:attr:`blockSize`.
         """
-        return self._set(blockSizeInMB=value)
+        return self._set(blockSize=value)
 
 
 class LinearSVCModel(_JavaClassificationModel, _LinearSVCParams, JavaMLWritable, JavaMLReadable,
diff --git a/python/pyspark/ml/classification.pyi b/python/pyspark/ml/classification.pyi
index 241f5baf8dfd4..55afc20a54cb9 100644
--- a/python/pyspark/ml/classification.pyi
+++ b/python/pyspark/ml/classification.pyi
@@ -26,7 +26,6 @@ from pyspark.ml.base import _PredictorParams
 from pyspark.ml.param.shared import (
     HasAggregationDepth,
     HasBlockSize,
-    HasBlockSizeInMB,
     HasElasticNetParam,
     HasFitIntercept,
     HasMaxIter,
@@ -173,7 +172,7 @@ class _LinearSVCParams(
     HasWeightCol,
     HasAggregationDepth,
     HasThreshold,
-    HasBlockSizeInMB,
+    HasBlockSize,
 ):
     threshold: Param[float]
     def __init__(self, *args: Any) -> None: ...
@@ -199,7 +198,7 @@ class LinearSVC(
         threshold: float = ...,
         weightCol: Optional[str] = ...,
         aggregationDepth: int = ...,
-        blockSizeInMB: float = ...
+        blockSize: int = ...
     ) -> None: ...
     def setParams(
         self,
@@ -216,7 +215,7 @@ class LinearSVC(
         threshold: float = ...,
         weightCol: Optional[str] = ...,
         aggregationDepth: int = ...,
-        blockSizeInMB: float = ...
+        blockSize: int = ...
     ) -> LinearSVC: ...
     def setMaxIter(self, value: int) -> LinearSVC: ...
     def setRegParam(self, value: float) -> LinearSVC: ...
@@ -226,7 +225,7 @@ class LinearSVC(
     def setThreshold(self, value: float) -> LinearSVC: ...
     def setWeightCol(self, value: str) -> LinearSVC: ...
     def setAggregationDepth(self, value: int) -> LinearSVC: ...
-    def setBlockSizeInMB(self, value: float) -> LinearSVC: ...
+    def setBlockSize(self, value: int) -> LinearSVC: ...
 
 class LinearSVCModel(
     _JavaClassificationModel[Vector],
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index b6fc170abe788..bc1ea87ad629c 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -165,11 +165,7 @@ def get$Name(self):
          None, "TypeConverters.toString"),
         ("blockSize", "block size for stacking input data in matrices. Data is stacked within "
          "partitions. If block size is more than remaining data in a partition then it is "
-         "adjusted to the size of this data.", None, "TypeConverters.toInt"),
-        ("blockSizeInMB", "maximum memory in MB for stacking input data in blocks. Data is " +
-         "stacked within partitions. If more than remaining data size in a partition then it " +
-         "is adjusted to the data size. If 0, try to infer an appropriate value based on the " +
-         "statistics of dataset. Must be >= 0.", "0.0", "TypeConverters.toFloat")]
+         "adjusted to the size of this data.", None, "TypeConverters.toInt")]
 
     code = []
     for name, doc, defaultValueStr, typeConverter in shared:
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index a829a2e76b380..24fb0d3e2554d 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -597,21 +597,3 @@ def getBlockSize(self):
         Gets the value of blockSize or its default value.
         """
         return self.getOrDefault(self.blockSize)
-
-
-class HasBlockSizeInMB(Params):
-    """
-    Mixin for param blockSizeInMB: maximum memory in MB for stacking input data in blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value based on the statistics of dataset. Must be >= 0.
-    """
-
-    blockSizeInMB = Param(Params._dummy(), "blockSizeInMB", "maximum memory in MB for stacking input data in blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value based on the statistics of dataset. Must be >= 0.", typeConverter=TypeConverters.toFloat)
-
-    def __init__(self):
-        super(HasBlockSizeInMB, self).__init__()
-        self._setDefault(blockSizeInMB=0.0)
-
-    def getBlockSizeInMB(self):
-        """
-        Gets the value of blockSizeInMB or its default value.
-        """
-        return self.getOrDefault(self.blockSizeInMB)
diff --git a/python/pyspark/ml/param/shared.pyi b/python/pyspark/ml/param/shared.pyi
index bbb4890455de7..5999c0eaa4661 100644
--- a/python/pyspark/ml/param/shared.pyi
+++ b/python/pyspark/ml/param/shared.pyi
@@ -185,8 +185,3 @@ class HasBlockSize(Params):
     blockSize: Param[int]
     def __init__(self) -> None: ...
     def getBlockSize(self) -> int: ...
-
-class HasBlockSizeInMB(Params):
-    blockSizeInMB: Param[float]
-    def __init__(self) -> None: ...
-    def getBlockSizeInMB(self) -> float: ...

From 9f983a68f1fdefcd033ea65999ab916b61cba8b3 Mon Sep 17 00:00:00 2001
From: Yuanjian Li <yuanjian.li@databricks.com>
Date: Thu, 12 Nov 2020 12:22:25 +0900
Subject: [PATCH 0452/1009] [SPARK-30294][SS][FOLLOW-UP] Directly override RDD
 methods

### Why are the changes needed?
Follow the comment: https://github.com/apache/spark/pull/26935#discussion_r514697997

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing test and Mima test.

Closes #30344 from xuanyuanking/SPARK-30294-follow.

Authored-by: Yuanjian Li <yuanjian.li@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../streaming/state/StateStoreRDD.scala          | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala
index eda191f28bf18..b894e771a6fe2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala
@@ -29,7 +29,6 @@ import org.apache.spark.sql.internal.SessionState
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.SerializableConfiguration
 
-// This doesn't directly override RDD methods as MiMa complains it.
 abstract class BaseStateStoreRDD[T: ClassTag, U: ClassTag](
     dataRDD: RDD[T],
     checkpointLocation: String,
@@ -45,16 +44,13 @@ abstract class BaseStateStoreRDD[T: ClassTag, U: ClassTag](
   protected val hadoopConfBroadcast = dataRDD.context.broadcast(
     new SerializableConfiguration(sessionState.newHadoopConf()))
 
-  /** Implementations can simply call this method in getPreferredLocations. */
-  protected def _getPartitions: Array[Partition] = dataRDD.partitions
-
   /**
    * Set the preferred location of each partition using the executor that has the related
    * [[StateStoreProvider]] already loaded.
    *
    * Implementations can simply call this method in getPreferredLocations.
    */
-  protected def _getPreferredLocations(partition: Partition): Seq[String] = {
+  override def getPreferredLocations(partition: Partition): Seq[String] = {
     val stateStoreProviderId = getStateProviderId(partition)
     storeCoordinator.flatMap(_.getLocation(stateStoreProviderId)).toSeq
   }
@@ -87,10 +83,7 @@ class ReadStateStoreRDD[T: ClassTag, U: ClassTag](
   extends BaseStateStoreRDD[T, U](dataRDD, checkpointLocation, queryRunId, operatorId,
     sessionState, storeCoordinator, extraOptions) {
 
-  override protected def getPartitions: Array[Partition] = _getPartitions
-
-  override def getPreferredLocations(partition: Partition): Seq[String] =
-    _getPreferredLocations(partition)
+  override protected def getPartitions: Array[Partition] = dataRDD.partitions
 
   override def compute(partition: Partition, ctxt: TaskContext): Iterator[U] = {
     val storeProviderId = getStateProviderId(partition)
@@ -124,10 +117,7 @@ class StateStoreRDD[T: ClassTag, U: ClassTag](
   extends BaseStateStoreRDD[T, U](dataRDD, checkpointLocation, queryRunId, operatorId,
     sessionState, storeCoordinator, extraOptions) {
 
-  override protected def getPartitions: Array[Partition] = _getPartitions
-
-  override def getPreferredLocations(partition: Partition): Seq[String] =
-    _getPreferredLocations(partition)
+  override protected def getPartitions: Array[Partition] = dataRDD.partitions
 
   override def compute(partition: Partition, ctxt: TaskContext): Iterator[U] = {
     val storeProviderId = getStateProviderId(partition)

From 22baf05a9ec6fffe53bd34d35c122de776464dd0 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 12 Nov 2020 15:36:31 +0900
Subject: [PATCH 0453/1009] [SPARK-33408][SPARK-32354][K8S][R] Use R 3.6.3 in
 K8s R image and re-enable RTestsSuite
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

This PR aims to use R 3.6.3 in K8s R image and re-enable `RTestsSuite`.

### Why are the changes needed?

Jenkins Server is using `R 3.6.3`.
```
+ SPARK_HOME=/home/jenkins/workspace/SparkPullRequestBuilder-K8s
+ /usr/bin/R CMD check --as-cran --no-tests SparkR_3.1.0.tar.gz
* using log directory ‘/home/jenkins/workspace/SparkPullRequestBuilder-K8s/R/SparkR.Rcheck’
* using R version 3.6.3 (2020-02-29)
```

OpenJDK docker image is using `R 3.5.2 (2018-12-20)` which is old and currently `spark-3.0.1` fails to run SparkR.
```
$ cd spark-3.0.1-bin-hadoop3.2

$ bin/docker-image-tool.sh -R kubernetes/dockerfiles/spark/bindings/R/Dockerfile -n build
...
	 exit code: 1
	 termination reason: Error
...

$ bin/spark-submit --master k8s://https://192.168.64.49:8443 --deploy-mode cluster --conf spark.kubernetes.container.image=spark-r:latest local:///opt/spark/examples/src/main/r/dataframe.R

$ k logs dataframe-r-b1c14b75b0c09eeb-driver
...
+ exec /usr/bin/tini -s -- /opt/spark/bin/spark-submit --conf spark.driver.bindAddress=172.17.0.4 --deploy-mode client --properties-file /opt/spark/conf/spark.properties --class org.apache.spark.deploy.RRunner local:///opt/spark/examples/src/main/r/dataframe.R
20/11/10 06:03:58 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
log4j:WARN No appenders could be found for logger (io.netty.util.internal.logging.InternalLoggerFactory).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
Error: package or namespace load failed for ‘SparkR’ in rbind(info, getNamespaceInfo(env, "S3methods")):
 number of columns of matrices must match (see arg 2)
In addition: Warning message:
package ‘SparkR’ was built under R version 4.0.2
Execution halted
```

In addition, this PR aims to recover the test coverage.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass K8S IT Jenkins job.

Closes #30130 from dongjoon-hyun/SPARK-32354.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../src/main/dockerfiles/spark/bindings/R/Dockerfile     | 9 ++++++++-
 .../deploy/k8s/integrationtest/KubernetesSuite.scala     | 3 +--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile
index 59f375b707ca7..bd645e40677d0 100644
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile
@@ -25,7 +25,14 @@ USER 0
 
 RUN mkdir ${SPARK_HOME}/R
 
-RUN apt-get update && apt install -y r-base r-base-dev && rm -rf /var/cache/apt/*
+# Install R 3.6.3 (http://cloud.r-project.org/bin/linux/debian/)
+RUN \
+  echo "deb http://cloud.r-project.org/bin/linux/debian buster-cran35/" >> /etc/apt/sources.list && \
+  apt install -y gnupg && \
+  apt-key adv --keyserver keys.gnupg.net --recv-key 'E19F5F87128899B192B1A2C2AD5F960A256A04AF' && \
+  apt-get update && \
+  apt install -y -t buster-cran35 r-base r-base-dev && \
+  rm -rf /var/cache/apt/*
 
 COPY R ${SPARK_HOME}/R
 ENV R_HOME /usr/lib/R
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
index 28ab37152cf4c..f1d8217e31b71 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
@@ -43,8 +43,7 @@ import org.apache.spark.internal.config._
 class KubernetesSuite extends SparkFunSuite
   with BeforeAndAfterAll with BeforeAndAfter with BasicTestsSuite with SecretsTestsSuite
   with PythonTestsSuite with ClientModeTestsSuite with PodTemplateSuite with PVTestsSuite
-  // TODO(SPARK-32354): Fix and re-enable the R tests.
-  with DepsTestsSuite with DecommissionSuite /* with RTestsSuite */ with Logging with Eventually
+  with DepsTestsSuite with DecommissionSuite with RTestsSuite with Logging with Eventually
   with Matchers {
 
 
From 6d31daeb6a2c5607ffe3b23ffb381626ad57f576 Mon Sep 17 00:00:00 2001
From: "xuewei.linxuewei" <xuewei.linxuewei@alibaba-inc.com>
Date: Thu, 12 Nov 2020 08:50:32 +0000
Subject: [PATCH 0454/1009] [SPARK-33386][SQL] Accessing array elements in
 ElementAt/Elt/GetArrayItem should failed if index is out of bound
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

Instead of returning NULL, throws runtime ArrayIndexOutOfBoundsException when ansiMode is enable for `element_at`，`elt`, `GetArrayItem` functions.

### Why are the changes needed?

For ansiMode.

### Does this PR introduce any user-facing change?

When `spark.sql.ansi.enabled` = true, Spark will throw `ArrayIndexOutOfBoundsException` if out-of-range index when accessing array elements

### How was this patch tested?

Added UT and existing UT.

Closes #30297 from leanken/leanken-SPARK-33386.

Authored-by: xuewei.linxuewei <xuewei.linxuewei@alibaba-inc.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-ref-ansi-compliance.md               |   9 +-
 .../sql/catalyst/analysis/TypeCoercion.scala  |   4 +-
 .../expressions/ProjectionOverSchema.scala    |   6 +-
 .../catalyst/expressions/SelectedField.scala  |   2 +-
 .../expressions/collectionOperations.scala    |  53 ++--
 .../expressions/complexTypeExtractors.scala   |  67 +++--
 .../expressions/stringExpressions.scala       |  33 ++-
 .../sql/catalyst/optimizer/ComplexTypes.scala |   2 +-
 .../apache/spark/sql/internal/SQLConf.scala   |   7 +-
 .../CollectionExpressionsSuite.scala          | 136 ++++++----
 .../expressions/ComplexTypeSuite.scala        |  23 ++
 .../expressions/StringExpressionsSuite.scala  |  32 ++-
 .../resources/sql-tests/inputs/ansi/array.sql |   1 +
 .../test/resources/sql-tests/inputs/array.sql |  12 +
 .../sql-tests/results/ansi/array.sql.out      | 234 ++++++++++++++++++
 .../resources/sql-tests/results/array.sql.out |  67 ++++-
 16 files changed, 584 insertions(+), 104 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/ansi/array.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index d6e99312bb66e..c2b36033e318e 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -110,7 +110,14 @@ SELECT * FROM t;
 ### SQL Functions
 
 The behavior of some SQL functions can be different under ANSI mode (`spark.sql.ansi.enabled=true`).
-  - `size`: This function returns null for null input under ANSI mode.
+  - `size`: This function returns null for null input.
+  - `element_at`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices. 
+  - `elt`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices.
+
+### SQL Operators
+
+The behavior of some SQL operators can be different under ANSI mode (`spark.sql.ansi.enabled=true`).
+  - `array_col[index]`: This operator throws `ArrayIndexOutOfBoundsException` if using invalid indices.
 
 ### SQL Keywords
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index becdef8b9c603..e8dab28b5e907 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -840,8 +840,8 @@ object TypeCoercion {
       plan resolveOperators { case p =>
         p transformExpressionsUp {
           // Skip nodes if unresolved or not enough children
-          case c @ Elt(children) if !c.childrenResolved || children.size < 2 => c
-          case c @ Elt(children) =>
+          case c @ Elt(children, _) if !c.childrenResolved || children.size < 2 => c
+          case c @ Elt(children, _) =>
             val index = children.head
             val newIndex = ImplicitTypeCasts.implicitCast(index, IntegerType).getOrElse(index)
             val newInputs = if (conf.eltOutputAsString ||
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala
index 13c6f8db7c129..6f1d9d065ab1a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala
@@ -34,8 +34,10 @@ case class ProjectionOverSchema(schema: StructType) {
     expr match {
       case a: AttributeReference if fieldNames.contains(a.name) =>
         Some(a.copy(dataType = schema(a.name).dataType)(a.exprId, a.qualifier))
-      case GetArrayItem(child, arrayItemOrdinal) =>
-        getProjection(child).map { projection => GetArrayItem(projection, arrayItemOrdinal) }
+      case GetArrayItem(child, arrayItemOrdinal, failOnError) =>
+        getProjection(child).map {
+          projection => GetArrayItem(projection, arrayItemOrdinal, failOnError)
+        }
       case a: GetArrayStructFields =>
         getProjection(a.child).map(p => (p, p.dataType)).map {
           case (projection, ArrayType(projSchema @ StructType(_), _)) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala
index 7ba3d302d553b..adcc4be10687e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala
@@ -119,7 +119,7 @@ object SelectedField {
             throw new AnalysisException(s"DataType '$x' is not supported by MapKeys.")
         }
         selectField(child, opt)
-      case GetArrayItem(child, _) =>
+      case GetArrayItem(child, _, _) =>
         // GetArrayItem does not select a field from a struct (i.e. prune the struct) so it can't be
         // the top-level extractor. However it can be part of an extractor chain.
         val ArrayType(_, containsNull) = child.dataType
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index cb081b80ba096..ee98ebf5a8a50 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -1906,8 +1906,10 @@ case class ArrayPosition(left: Expression, right: Expression)
 @ExpressionDescription(
   usage = """
     _FUNC_(array, index) - Returns element of array at given (1-based) index. If index < 0,
-      accesses elements from the last to the first. Returns NULL if the index exceeds the length
-      of the array.
+      accesses elements from the last to the first. The function returns NULL
+      if the index exceeds the length of the array and `spark.sql.ansi.enabled` is set to false.
+      If `spark.sql.ansi.enabled` is set to true, it throws ArrayIndexOutOfBoundsException
+      for invalid indices.
 
     _FUNC_(map, key) - Returns value for given key, or NULL if the key is not contained in the map
   """,
@@ -1919,9 +1921,14 @@ case class ArrayPosition(left: Expression, right: Expression)
        b
   """,
   since = "2.4.0")
-case class ElementAt(left: Expression, right: Expression)
+case class ElementAt(
+    left: Expression,
+    right: Expression,
+    failOnError: Boolean = SQLConf.get.ansiEnabled)
   extends GetMapValueUtil with GetArrayItemUtil with NullIntolerant {
 
+  def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled)
+
   @transient private lazy val mapKeyType = left.dataType.asInstanceOf[MapType].keyType
 
   @transient private lazy val arrayContainsNull = left.dataType.asInstanceOf[ArrayType].containsNull
@@ -1969,7 +1976,7 @@ case class ElementAt(left: Expression, right: Expression)
     if (ordinal == 0) {
       false
     } else if (elements.length < math.abs(ordinal)) {
-      true
+      !failOnError
     } else {
       if (ordinal < 0) {
         elements(elements.length + ordinal).nullable
@@ -1979,24 +1986,9 @@ case class ElementAt(left: Expression, right: Expression)
     }
   }
 
-  override def computeNullabilityFromArray(child: Expression, ordinal: Expression): Boolean = {
-    if (ordinal.foldable && !ordinal.nullable) {
-      val intOrdinal = ordinal.eval().asInstanceOf[Number].intValue()
-      child match {
-        case CreateArray(ar, _) =>
-          nullability(ar, intOrdinal)
-        case GetArrayStructFields(CreateArray(elements, _), field, _, _, _) =>
-          nullability(elements, intOrdinal) || field.nullable
-        case _ =>
-          true
-      }
-    } else {
-      true
-    }
-  }
-
   override def nullable: Boolean = left.dataType match {
-    case _: ArrayType => computeNullabilityFromArray(left, right)
+    case _: ArrayType =>
+      computeNullabilityFromArray(left, right, failOnError, nullability)
     case _: MapType => true
   }
 
@@ -2008,7 +2000,12 @@ case class ElementAt(left: Expression, right: Expression)
         val array = value.asInstanceOf[ArrayData]
         val index = ordinal.asInstanceOf[Int]
         if (array.numElements() < math.abs(index)) {
-          null
+          if (failOnError) {
+            throw new ArrayIndexOutOfBoundsException(
+              s"Invalid index: $index, numElements: ${array.numElements()}")
+          } else {
+            null
+          }
         } else {
           val idx = if (index == 0) {
             throw new ArrayIndexOutOfBoundsException("SQL array indices start at 1")
@@ -2042,10 +2039,20 @@ case class ElementAt(left: Expression, right: Expression)
           } else {
             ""
           }
+
+          val indexOutOfBoundBranch = if (failOnError) {
+            s"""throw new ArrayIndexOutOfBoundsException(
+               |  "Invalid index: " + $index + ", numElements: " + $eval1.numElements()
+               |);
+             """.stripMargin
+          } else {
+            s"${ev.isNull} = true;"
+          }
+
           s"""
              |int $index = (int) $eval2;
              |if ($eval1.numElements() < Math.abs($index)) {
-             |  ${ev.isNull} = true;
+             |  $indexOutOfBoundBranch
              |} else {
              |  if ($index == 0) {
              |    throw new ArrayIndexOutOfBoundsException("SQL array indices start at 1");
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
index 60afe140960cc..363d388692c9f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
@@ -22,6 +22,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode}
 import org.apache.spark.sql.catalyst.util.{quoteIdentifier, ArrayData, GenericArrayData, MapData, TypeUtils}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -222,10 +223,15 @@ case class GetArrayStructFields(
  *
  * We need to do type checking here as `ordinal` expression maybe unresolved.
  */
-case class GetArrayItem(child: Expression, ordinal: Expression)
+case class GetArrayItem(
+    child: Expression,
+    ordinal: Expression,
+    failOnError: Boolean = SQLConf.get.ansiEnabled)
   extends BinaryExpression with GetArrayItemUtil with ExpectsInputTypes with ExtractValue
   with NullIntolerant {
 
+  def this(child: Expression, ordinal: Expression) = this(child, ordinal, SQLConf.get.ansiEnabled)
+
   // We have done type checking for child in `ExtractValue`, so only need to check the `ordinal`.
   override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType, IntegralType)
 
@@ -234,13 +240,29 @@ case class GetArrayItem(child: Expression, ordinal: Expression)
 
   override def left: Expression = child
   override def right: Expression = ordinal
-  override def nullable: Boolean = computeNullabilityFromArray(left, right)
+  override def nullable: Boolean =
+    computeNullabilityFromArray(left, right, failOnError, nullability)
   override def dataType: DataType = child.dataType.asInstanceOf[ArrayType].elementType
 
+  private def nullability(elements: Seq[Expression], ordinal: Int): Boolean = {
+    if (ordinal >= 0 && ordinal < elements.length) {
+      elements(ordinal).nullable
+    } else {
+      !failOnError
+    }
+  }
+
   protected override def nullSafeEval(value: Any, ordinal: Any): Any = {
     val baseValue = value.asInstanceOf[ArrayData]
     val index = ordinal.asInstanceOf[Number].intValue()
-    if (index >= baseValue.numElements() || index < 0 || baseValue.isNullAt(index)) {
+    if (index >= baseValue.numElements() || index < 0) {
+      if (failOnError) {
+        throw new ArrayIndexOutOfBoundsException(
+          s"Invalid index: $index, numElements: ${baseValue.numElements()}")
+      } else {
+        null
+      }
+    } else if (baseValue.isNullAt(index)) {
       null
     } else {
       baseValue.get(index, dataType)
@@ -251,15 +273,28 @@ case class GetArrayItem(child: Expression, ordinal: Expression)
     nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
       val index = ctx.freshName("index")
       val nullCheck = if (child.dataType.asInstanceOf[ArrayType].containsNull) {
-        s" || $eval1.isNullAt($index)"
+        s"""else if ($eval1.isNullAt($index)) {
+               ${ev.isNull} = true;
+            }
+         """
       } else {
         ""
       }
+
+      val indexOutOfBoundBranch = if (failOnError) {
+        s"""throw new ArrayIndexOutOfBoundsException(
+           |  "Invalid index: " + $index + ", numElements: " + $eval1.numElements()
+           |);
+         """.stripMargin
+      } else {
+        s"${ev.isNull} = true;"
+      }
+
       s"""
         final int $index = (int) $eval2;
-        if ($index >= $eval1.numElements() || $index < 0$nullCheck) {
-          ${ev.isNull} = true;
-        } else {
+        if ($index >= $eval1.numElements() || $index < 0) {
+          $indexOutOfBoundBranch
+        } $nullCheck else {
           ${ev.value} = ${CodeGenerator.getValue(eval1, dataType, index)};
         }
       """
@@ -273,20 +308,24 @@ case class GetArrayItem(child: Expression, ordinal: Expression)
 trait GetArrayItemUtil {
 
   /** `Null` is returned for invalid ordinals. */
-  protected def computeNullabilityFromArray(child: Expression, ordinal: Expression): Boolean = {
+  protected def computeNullabilityFromArray(
+      child: Expression,
+      ordinal: Expression,
+      failOnError: Boolean,
+      nullability: (Seq[Expression], Int) => Boolean): Boolean = {
+    val arrayContainsNull = child.dataType.asInstanceOf[ArrayType].containsNull
     if (ordinal.foldable && !ordinal.nullable) {
       val intOrdinal = ordinal.eval().asInstanceOf[Number].intValue()
       child match {
-        case CreateArray(ar, _) if intOrdinal < ar.length =>
-          ar(intOrdinal).nullable
-        case GetArrayStructFields(CreateArray(elements, _), field, _, _, _)
-          if intOrdinal < elements.length =>
-          elements(intOrdinal).nullable || field.nullable
+        case CreateArray(ar, _) =>
+          nullability(ar, intOrdinal)
+        case GetArrayStructFields(CreateArray(elements, _), field, _, _, _) =>
+          nullability(elements, intOrdinal) || field.nullable
         case _ =>
           true
       }
     } else {
-      true
+      if (failOnError) arrayContainsNull else true
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 1fe990207160c..16e22940495f1 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData, TypeUtils}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.UTF8StringBuilder
 import org.apache.spark.unsafe.types.{ByteArray, UTF8String}
@@ -231,7 +232,12 @@ case class ConcatWs(children: Seq[Expression])
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(n, input1, input2, ...) - Returns the `n`-th input, e.g., returns `input2` when `n` is 2.",
+  usage = """
+    _FUNC_(n, input1, input2, ...) - Returns the `n`-th input, e.g., returns `input2` when `n` is 2.
+    The function returns NULL if the index exceeds the length of the array
+    and `spark.sql.ansi.enabled` is set to false. If `spark.sql.ansi.enabled` is set to true,
+    it throws ArrayIndexOutOfBoundsException for invalid indices.
+  """,
   examples = """
     Examples:
       > SELECT _FUNC_(1, 'scala', 'java');
@@ -239,7 +245,11 @@ case class ConcatWs(children: Seq[Expression])
   """,
   since = "2.0.0")
 // scalastyle:on line.size.limit
-case class Elt(children: Seq[Expression]) extends Expression {
+case class Elt(
+    children: Seq[Expression],
+    failOnError: Boolean = SQLConf.get.ansiEnabled) extends Expression {
+
+  def this(children: Seq[Expression]) = this(children, SQLConf.get.ansiEnabled)
 
   private lazy val indexExpr = children.head
   private lazy val inputExprs = children.tail.toArray
@@ -275,7 +285,12 @@ case class Elt(children: Seq[Expression]) extends Expression {
     } else {
       val index = indexObj.asInstanceOf[Int]
       if (index <= 0 || index > inputExprs.length) {
-        null
+        if (failOnError) {
+          throw new ArrayIndexOutOfBoundsException(
+            s"Invalid index: $index, numElements: ${inputExprs.length}")
+        } else {
+          null
+        }
       } else {
         inputExprs(index - 1).eval(input)
       }
@@ -323,6 +338,17 @@ case class Elt(children: Seq[Expression]) extends Expression {
          """.stripMargin
       }.mkString)
 
+    val indexOutOfBoundBranch = if (failOnError) {
+      s"""
+         |if (!$indexMatched) {
+         |  throw new ArrayIndexOutOfBoundsException(
+         |    "Invalid index: " + ${index.value} + ", numElements: " + ${inputExprs.length});
+         |}
+       """.stripMargin
+    } else {
+      ""
+    }
+
     ev.copy(
       code"""
          |${index.code}
@@ -332,6 +358,7 @@ case class Elt(children: Seq[Expression]) extends Expression {
          |do {
          |  $codes
          |} while (false);
+         |$indexOutOfBoundBranch
          |final ${CodeGenerator.javaType(dataType)} ${ev.value} = $inputVal;
          |final boolean ${ev.isNull} = ${ev.value} == null;
        """.stripMargin)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
index 2ac8f62b67b3d..7a21ce254a235 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
@@ -61,7 +61,7 @@ object SimplifyExtractValueOps extends Rule[LogicalPlan] {
         CreateArray(elems.map(GetStructField(_, ordinal, Some(field.name))), useStringTypeWhenEmpty)
 
       // Remove redundant map lookup.
-      case ga @ GetArrayItem(CreateArray(elems, _), IntegerLiteral(idx)) =>
+      case ga @ GetArrayItem(CreateArray(elems, _), IntegerLiteral(idx), _) =>
         // Instead of creating the array and then selecting one row, remove array creation
         // altogether.
         if (idx >= 0 && idx < elems.size) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 21357a492e39e..ef988052affcd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2144,9 +2144,10 @@ object SQLConf {
 
   val ANSI_ENABLED = buildConf("spark.sql.ansi.enabled")
     .doc("When true, Spark tries to conform to the ANSI SQL specification: 1. Spark will " +
-      "throw a runtime exception if an overflow occurs in any operation on integral/decimal " +
-      "field. 2. Spark will forbid using the reserved keywords of ANSI SQL as identifiers in " +
-      "the SQL parser.")
+      "throw an exception at runtime if the inputs to a SQL operator/function are invalid, " +
+      "e.g. overflow in arithmetic operations, out-of-range index when accessing array elements. " +
+      "2. Spark will forbid using the reserved keywords of ANSI SQL as identifiers in " +
+      "the SQL parser. 3. Spark will return NULL for null input for function `size`.")
     .version("3.0.0")
     .booleanConf
     .createWithDefault(false)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
index d59d13d49cef4..6ee88c9eaef86 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
@@ -1118,58 +1118,62 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
   }
 
   test("correctly handles ElementAt nullability for arrays") {
-    // CreateArray case
-    val a = AttributeReference("a", IntegerType, nullable = false)()
-    val b = AttributeReference("b", IntegerType, nullable = true)()
-    val array = CreateArray(a :: b :: Nil)
-    assert(!ElementAt(array, Literal(1)).nullable)
-    assert(!ElementAt(array, Literal(-2)).nullable)
-    assert(ElementAt(array, Literal(2)).nullable)
-    assert(ElementAt(array, Literal(-1)).nullable)
-    assert(!ElementAt(array, Subtract(Literal(2), Literal(1))).nullable)
-    assert(ElementAt(array, AttributeReference("ordinal", IntegerType)()).nullable)
-
-    // CreateArray case invalid indices
-    assert(!ElementAt(array, Literal(0)).nullable)
-    assert(ElementAt(array, Literal(4)).nullable)
-    assert(ElementAt(array, Literal(-4)).nullable)
-
-    // GetArrayStructFields case
-    val f1 = StructField("a", IntegerType, nullable = false)
-    val f2 = StructField("b", IntegerType, nullable = true)
-    val structType = StructType(f1 :: f2 :: Nil)
-    val c = AttributeReference("c", structType, nullable = false)()
-    val inputArray1 = CreateArray(c :: Nil)
-    val inputArray1ContainsNull = c.nullable
-    val stArray1 = GetArrayStructFields(inputArray1, f1, 0, 2, inputArray1ContainsNull)
-    assert(!ElementAt(stArray1, Literal(1)).nullable)
-    assert(!ElementAt(stArray1, Literal(-1)).nullable)
-    val stArray2 = GetArrayStructFields(inputArray1, f2, 1, 2, inputArray1ContainsNull)
-    assert(ElementAt(stArray2, Literal(1)).nullable)
-    assert(ElementAt(stArray2, Literal(-1)).nullable)
-
-    val d = AttributeReference("d", structType, nullable = true)()
-    val inputArray2 = CreateArray(c :: d :: Nil)
-    val inputArray2ContainsNull = c.nullable || d.nullable
-    val stArray3 = GetArrayStructFields(inputArray2, f1, 0, 2, inputArray2ContainsNull)
-    assert(!ElementAt(stArray3, Literal(1)).nullable)
-    assert(!ElementAt(stArray3, Literal(-2)).nullable)
-    assert(ElementAt(stArray3, Literal(2)).nullable)
-    assert(ElementAt(stArray3, Literal(-1)).nullable)
-    val stArray4 = GetArrayStructFields(inputArray2, f2, 1, 2, inputArray2ContainsNull)
-    assert(ElementAt(stArray4, Literal(1)).nullable)
-    assert(ElementAt(stArray4, Literal(-2)).nullable)
-    assert(ElementAt(stArray4, Literal(2)).nullable)
-    assert(ElementAt(stArray4, Literal(-1)).nullable)
-
-    // GetArrayStructFields case invalid indices
-    assert(!ElementAt(stArray3, Literal(0)).nullable)
-    assert(ElementAt(stArray3, Literal(4)).nullable)
-    assert(ElementAt(stArray3, Literal(-4)).nullable)
-
-    assert(ElementAt(stArray4, Literal(0)).nullable)
-    assert(ElementAt(stArray4, Literal(4)).nullable)
-    assert(ElementAt(stArray4, Literal(-4)).nullable)
+    Seq(true, false).foreach { ansiEnabled =>
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiEnabled.toString) {
+        // CreateArray case
+        val a = AttributeReference("a", IntegerType, nullable = false)()
+        val b = AttributeReference("b", IntegerType, nullable = true)()
+        val array = CreateArray(a :: b :: Nil)
+        assert(!ElementAt(array, Literal(1)).nullable)
+        assert(!ElementAt(array, Literal(-2)).nullable)
+        assert(ElementAt(array, Literal(2)).nullable)
+        assert(ElementAt(array, Literal(-1)).nullable)
+        assert(!ElementAt(array, Subtract(Literal(2), Literal(1))).nullable)
+        assert(ElementAt(array, AttributeReference("ordinal", IntegerType)()).nullable)
+
+        // CreateArray case invalid indices
+        assert(!ElementAt(array, Literal(0)).nullable)
+        assert(ElementAt(array, Literal(4)).nullable == !ansiEnabled)
+        assert(ElementAt(array, Literal(-4)).nullable == !ansiEnabled)
+
+        // GetArrayStructFields case
+        val f1 = StructField("a", IntegerType, nullable = false)
+        val f2 = StructField("b", IntegerType, nullable = true)
+        val structType = StructType(f1 :: f2 :: Nil)
+        val c = AttributeReference("c", structType, nullable = false)()
+        val inputArray1 = CreateArray(c :: Nil)
+        val inputArray1ContainsNull = c.nullable
+        val stArray1 = GetArrayStructFields(inputArray1, f1, 0, 2, inputArray1ContainsNull)
+        assert(!ElementAt(stArray1, Literal(1)).nullable)
+        assert(!ElementAt(stArray1, Literal(-1)).nullable)
+        val stArray2 = GetArrayStructFields(inputArray1, f2, 1, 2, inputArray1ContainsNull)
+        assert(ElementAt(stArray2, Literal(1)).nullable)
+        assert(ElementAt(stArray2, Literal(-1)).nullable)
+
+        val d = AttributeReference("d", structType, nullable = true)()
+        val inputArray2 = CreateArray(c :: d :: Nil)
+        val inputArray2ContainsNull = c.nullable || d.nullable
+        val stArray3 = GetArrayStructFields(inputArray2, f1, 0, 2, inputArray2ContainsNull)
+        assert(!ElementAt(stArray3, Literal(1)).nullable)
+        assert(!ElementAt(stArray3, Literal(-2)).nullable)
+        assert(ElementAt(stArray3, Literal(2)).nullable)
+        assert(ElementAt(stArray3, Literal(-1)).nullable)
+        val stArray4 = GetArrayStructFields(inputArray2, f2, 1, 2, inputArray2ContainsNull)
+        assert(ElementAt(stArray4, Literal(1)).nullable)
+        assert(ElementAt(stArray4, Literal(-2)).nullable)
+        assert(ElementAt(stArray4, Literal(2)).nullable)
+        assert(ElementAt(stArray4, Literal(-1)).nullable)
+
+        // GetArrayStructFields case invalid indices
+        assert(!ElementAt(stArray3, Literal(0)).nullable)
+        assert(ElementAt(stArray3, Literal(4)).nullable == !ansiEnabled)
+        assert(ElementAt(stArray3, Literal(-4)).nullable == !ansiEnabled)
+
+        assert(ElementAt(stArray4, Literal(0)).nullable)
+        assert(ElementAt(stArray4, Literal(4)).nullable)
+        assert(ElementAt(stArray4, Literal(-4)).nullable)
+      }
+    }
   }
 
   test("Concat") {
@@ -1883,4 +1887,32 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
       Literal(stringToInterval("interval 1 year"))),
       Seq(Date.valueOf("2018-01-01")))
   }
+
+  test("SPARK-33386: element_at ArrayIndexOutOfBoundsException") {
+    Seq(true, false).foreach { ansiEnabled =>
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiEnabled.toString) {
+        val array = Literal.create(Seq(1, 2, 3), ArrayType(IntegerType))
+        var expr: Expression = ElementAt(array, Literal(5))
+        if (ansiEnabled) {
+          val errMsg = "Invalid index: 5, numElements: 3"
+          checkExceptionInExpression[Exception](expr, errMsg)
+        } else {
+          checkEvaluation(expr, null)
+        }
+
+        expr = ElementAt(array, Literal(-5))
+        if (ansiEnabled) {
+          val errMsg = "Invalid index: -5, numElements: 3"
+          checkExceptionInExpression[Exception](expr, errMsg)
+        } else {
+          checkEvaluation(expr, null)
+        }
+
+        // SQL array indices start at 1 exception throws for both mode.
+        expr = ElementAt(array, Literal(0))
+        val errMsg = "SQL array indices start at 1"
+        checkExceptionInExpression[Exception](expr, errMsg)
+      }
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
index 38e32ff2518f7..67ab2071de037 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
@@ -62,6 +62,29 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(GetArrayItem(nestedArray, Literal(0)), Seq(1))
   }
 
+  test("SPARK-33386: GetArrayItem ArrayIndexOutOfBoundsException") {
+    Seq(true, false).foreach { ansiEnabled =>
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiEnabled.toString) {
+        val array = Literal.create(Seq("a", "b"), ArrayType(StringType))
+
+        if (ansiEnabled) {
+          checkExceptionInExpression[Exception](
+            GetArrayItem(array, Literal(5)),
+            "Invalid index: 5, numElements: 2"
+          )
+
+          checkExceptionInExpression[Exception](
+            GetArrayItem(array, Literal(-1)),
+            "Invalid index: -1, numElements: 2"
+          )
+        } else {
+          checkEvaluation(GetArrayItem(array, Literal(5)), null)
+          checkEvaluation(GetArrayItem(array, Literal(-1)), null)
+        }
+      }
+    }
+  }
+
   test("SPARK-26637 handles GetArrayItem nullability correctly when input array size is constant") {
     // CreateArray case
     val a = AttributeReference("a", IntegerType, nullable = false)()
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index 967ccc42c632d..a1b6cec24f23f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -18,9 +18,9 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
@@ -968,4 +968,34 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     GenerateUnsafeProjection.generate(
       Sentences(Literal("\"quote"), Literal("\"quote"), Literal("\"quote")) :: Nil)
   }
+
+  test("SPARK-33386: elt ArrayIndexOutOfBoundsException") {
+    Seq(true, false).foreach { ansiEnabled =>
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiEnabled.toString) {
+        var expr: Expression = Elt(Seq(Literal(4), Literal("123"), Literal("456")))
+        if (ansiEnabled) {
+          val errMsg = "Invalid index: 4, numElements: 2"
+          checkExceptionInExpression[Exception](expr, errMsg)
+        } else {
+          checkEvaluation(expr, null)
+        }
+
+        expr = Elt(Seq(Literal(0), Literal("123"), Literal("456")))
+        if (ansiEnabled) {
+          val errMsg = "Invalid index: 0, numElements: 2"
+          checkExceptionInExpression[Exception](expr, errMsg)
+        } else {
+          checkEvaluation(expr, null)
+        }
+
+        expr = Elt(Seq(Literal(-1), Literal("123"), Literal("456")))
+        if (ansiEnabled) {
+          val errMsg = "Invalid index: -1, numElements: 2"
+          checkExceptionInExpression[Exception](expr, errMsg)
+        } else {
+          checkEvaluation(expr, null)
+        }
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/array.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/array.sql
new file mode 100644
index 0000000000000..662756cbfb0b0
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/array.sql
@@ -0,0 +1 @@
+--IMPORT array.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/array.sql b/sql/core/src/test/resources/sql-tests/inputs/array.sql
index 984321ab795fc..f73b653659eb4 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/array.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/array.sql
@@ -90,3 +90,15 @@ select
   size(date_array),
   size(timestamp_array)
 from primitive_arrays;
+
+-- index out of range for array elements
+select element_at(array(1, 2, 3), 5);
+select element_at(array(1, 2, 3), -5);
+select element_at(array(1, 2, 3), 0);
+
+select elt(4, '123', '456');
+select elt(0, '123', '456');
+select elt(-1, '123', '456');
+
+select array(1, 2, 3)[5];
+select array(1, 2, 3)[-1];
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
new file mode 100644
index 0000000000000..12a77e36273fa
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
@@ -0,0 +1,234 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 20
+
+
+-- !query
+create temporary view data as select * from values
+  ("one", array(11, 12, 13), array(array(111, 112, 113), array(121, 122, 123))),
+  ("two", array(21, 22, 23), array(array(211, 212, 213), array(221, 222, 223)))
+  as data(a, b, c)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select * from data
+-- !query schema
+struct<a:string,b:array<int>,c:array<array<int>>>
+-- !query output
+one	[11,12,13]	[[111,112,113],[121,122,123]]
+two	[21,22,23]	[[211,212,213],[221,222,223]]
+
+
+-- !query
+select a, b[0], b[0] + b[1] from data
+-- !query schema
+struct<a:string,b[0]:int,(b[0] + b[1]):int>
+-- !query output
+one	11	23
+two	21	43
+
+
+-- !query
+select a, c[0][0] + c[0][0 + 1] from data
+-- !query schema
+struct<a:string,(c[0][0] + c[0][(0 + 1)]):int>
+-- !query output
+one	223
+two	423
+
+
+-- !query
+create temporary view primitive_arrays as select * from values (
+  array(true),
+  array(2Y, 1Y),
+  array(2S, 1S),
+  array(2, 1),
+  array(2L, 1L),
+  array(9223372036854775809, 9223372036854775808),
+  array(2.0D, 1.0D),
+  array(float(2.0), float(1.0)),
+  array(date '2016-03-14', date '2016-03-13'),
+  array(timestamp '2016-11-15 20:54:00.000',  timestamp '2016-11-12 20:54:00.000')
+) as primitive_arrays(
+  boolean_array,
+  tinyint_array,
+  smallint_array,
+  int_array,
+  bigint_array,
+  decimal_array,
+  double_array,
+  float_array,
+  date_array,
+  timestamp_array
+)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select * from primitive_arrays
+-- !query schema
+struct<boolean_array:array<boolean>,tinyint_array:array<tinyint>,smallint_array:array<smallint>,int_array:array<int>,bigint_array:array<bigint>,decimal_array:array<decimal(19,0)>,double_array:array<double>,float_array:array<float>,date_array:array<date>,timestamp_array:array<timestamp>>
+-- !query output
+[true]	[2,1]	[2,1]	[2,1]	[2,1]	[9223372036854775809,9223372036854775808]	[2.0,1.0]	[2.0,1.0]	[2016-03-14,2016-03-13]	[2016-11-15 20:54:00,2016-11-12 20:54:00]
+
+
+-- !query
+select
+  array_contains(boolean_array, true), array_contains(boolean_array, false),
+  array_contains(tinyint_array, 2Y), array_contains(tinyint_array, 0Y),
+  array_contains(smallint_array, 2S), array_contains(smallint_array, 0S),
+  array_contains(int_array, 2), array_contains(int_array, 0),
+  array_contains(bigint_array, 2L), array_contains(bigint_array, 0L),
+  array_contains(decimal_array, 9223372036854775809), array_contains(decimal_array, 1),
+  array_contains(double_array, 2.0D), array_contains(double_array, 0.0D),
+  array_contains(float_array, float(2.0)), array_contains(float_array, float(0.0)),
+  array_contains(date_array, date '2016-03-14'), array_contains(date_array, date '2016-01-01'),
+  array_contains(timestamp_array, timestamp '2016-11-15 20:54:00.000'), array_contains(timestamp_array, timestamp '2016-01-01 20:54:00.000')
+from primitive_arrays
+-- !query schema
+struct<array_contains(boolean_array, true):boolean,array_contains(boolean_array, false):boolean,array_contains(tinyint_array, 2):boolean,array_contains(tinyint_array, 0):boolean,array_contains(smallint_array, 2):boolean,array_contains(smallint_array, 0):boolean,array_contains(int_array, 2):boolean,array_contains(int_array, 0):boolean,array_contains(bigint_array, 2):boolean,array_contains(bigint_array, 0):boolean,array_contains(decimal_array, 9223372036854775809):boolean,array_contains(decimal_array, CAST(1 AS DECIMAL(19,0))):boolean,array_contains(double_array, 2.0):boolean,array_contains(double_array, 0.0):boolean,array_contains(float_array, CAST(2.0 AS FLOAT)):boolean,array_contains(float_array, CAST(0.0 AS FLOAT)):boolean,array_contains(date_array, DATE '2016-03-14'):boolean,array_contains(date_array, DATE '2016-01-01'):boolean,array_contains(timestamp_array, TIMESTAMP '2016-11-15 20:54:00'):boolean,array_contains(timestamp_array, TIMESTAMP '2016-01-01 20:54:00'):boolean>
+-- !query output
+true	false	true	false	true	false	true	false	true	false	true	false	true	false	true	false	true	false	true	false
+
+
+-- !query
+select array_contains(b, 11), array_contains(c, array(111, 112, 113)) from data
+-- !query schema
+struct<array_contains(b, 11):boolean,array_contains(c, array(111, 112, 113)):boolean>
+-- !query output
+false	false
+true	true
+
+
+-- !query
+select
+  sort_array(boolean_array),
+  sort_array(tinyint_array),
+  sort_array(smallint_array),
+  sort_array(int_array),
+  sort_array(bigint_array),
+  sort_array(decimal_array),
+  sort_array(double_array),
+  sort_array(float_array),
+  sort_array(date_array),
+  sort_array(timestamp_array)
+from primitive_arrays
+-- !query schema
+struct<sort_array(boolean_array, true):array<boolean>,sort_array(tinyint_array, true):array<tinyint>,sort_array(smallint_array, true):array<smallint>,sort_array(int_array, true):array<int>,sort_array(bigint_array, true):array<bigint>,sort_array(decimal_array, true):array<decimal(19,0)>,sort_array(double_array, true):array<double>,sort_array(float_array, true):array<float>,sort_array(date_array, true):array<date>,sort_array(timestamp_array, true):array<timestamp>>
+-- !query output
+[true]	[1,2]	[1,2]	[1,2]	[1,2]	[9223372036854775808,9223372036854775809]	[1.0,2.0]	[1.0,2.0]	[2016-03-13,2016-03-14]	[2016-11-12 20:54:00,2016-11-15 20:54:00]
+
+
+-- !query
+select sort_array(array('b', 'd'), '1')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'sort_array(array('b', 'd'), '1')' due to data type mismatch: Sort order in second argument requires a boolean literal.; line 1 pos 7
+
+
+-- !query
+select sort_array(array('b', 'd'), cast(NULL as boolean))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'sort_array(array('b', 'd'), CAST(NULL AS BOOLEAN))' due to data type mismatch: Sort order in second argument requires a boolean literal.; line 1 pos 7
+
+
+-- !query
+select
+  size(boolean_array),
+  size(tinyint_array),
+  size(smallint_array),
+  size(int_array),
+  size(bigint_array),
+  size(decimal_array),
+  size(double_array),
+  size(float_array),
+  size(date_array),
+  size(timestamp_array)
+from primitive_arrays
+-- !query schema
+struct<size(boolean_array):int,size(tinyint_array):int,size(smallint_array):int,size(int_array):int,size(bigint_array):int,size(decimal_array):int,size(double_array):int,size(float_array):int,size(date_array):int,size(timestamp_array):int>
+-- !query output
+1	2	2	2	2	2	2	2	2	2
+
+
+-- !query
+select element_at(array(1, 2, 3), 5)
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArrayIndexOutOfBoundsException
+Invalid index: 5, numElements: 3
+
+
+-- !query
+select element_at(array(1, 2, 3), -5)
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArrayIndexOutOfBoundsException
+Invalid index: -5, numElements: 3
+
+
+-- !query
+select element_at(array(1, 2, 3), 0)
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArrayIndexOutOfBoundsException
+SQL array indices start at 1
+
+
+-- !query
+select elt(4, '123', '456')
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArrayIndexOutOfBoundsException
+Invalid index: 4, numElements: 2
+
+
+-- !query
+select elt(0, '123', '456')
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArrayIndexOutOfBoundsException
+Invalid index: 0, numElements: 2
+
+
+-- !query
+select elt(-1, '123', '456')
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArrayIndexOutOfBoundsException
+Invalid index: -1, numElements: 2
+
+
+-- !query
+select array(1, 2, 3)[5]
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArrayIndexOutOfBoundsException
+Invalid index: 5, numElements: 3
+
+
+-- !query
+select array(1, 2, 3)[-1]
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArrayIndexOutOfBoundsException
+Invalid index: -1, numElements: 3
diff --git a/sql/core/src/test/resources/sql-tests/results/array.sql.out b/sql/core/src/test/resources/sql-tests/results/array.sql.out
index 2c2b1a7856304..9bf0d89ed71fe 100644
--- a/sql/core/src/test/resources/sql-tests/results/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/array.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 12
+-- Number of queries: 20
 
 
 -- !query
@@ -160,3 +160,68 @@ from primitive_arrays
 struct<size(boolean_array):int,size(tinyint_array):int,size(smallint_array):int,size(int_array):int,size(bigint_array):int,size(decimal_array):int,size(double_array):int,size(float_array):int,size(date_array):int,size(timestamp_array):int>
 -- !query output
 1	2	2	2	2	2	2	2	2	2
+
+
+-- !query
+select element_at(array(1, 2, 3), 5)
+-- !query schema
+struct<element_at(array(1, 2, 3), 5):int>
+-- !query output
+NULL
+
+
+-- !query
+select element_at(array(1, 2, 3), -5)
+-- !query schema
+struct<element_at(array(1, 2, 3), -5):int>
+-- !query output
+NULL
+
+
+-- !query
+select element_at(array(1, 2, 3), 0)
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArrayIndexOutOfBoundsException
+SQL array indices start at 1
+
+
+-- !query
+select elt(4, '123', '456')
+-- !query schema
+struct<elt(4, 123, 456):string>
+-- !query output
+NULL
+
+
+-- !query
+select elt(0, '123', '456')
+-- !query schema
+struct<elt(0, 123, 456):string>
+-- !query output
+NULL
+
+
+-- !query
+select elt(-1, '123', '456')
+-- !query schema
+struct<elt(-1, 123, 456):string>
+-- !query output
+NULL
+
+
+-- !query
+select array(1, 2, 3)[5]
+-- !query schema
+struct<array(1, 2, 3)[5]:int>
+-- !query output
+NULL
+
+
+-- !query
+select array(1, 2, 3)[-1]
+-- !query schema
+struct<array(1, 2, 3)[-1]:int>
+-- !query output
+NULL

From 4335af075a8ad27c4906f03ae5f8cd8f9a754e5a Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Thu, 12 Nov 2020 18:53:06 +0900
Subject: [PATCH 0455/1009] [MINOR][DOC] spark.executor.memoryOverhead is not
 cluster-mode only

### What changes were proposed in this pull request?

Remove "in cluster mode" from the description of `spark.executor.memoryOverhead`

### Why are the changes needed?

fix correctness issue in documentaion

### Does this PR introduce _any_ user-facing change?

yes, users may not get confused about the description `spark.executor.memoryOverhead`

### How was this patch tested?

pass GA doc generation

Closes #30311 from yaooqinn/minordoc.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../scala/org/apache/spark/internal/config/package.scala   | 4 ++--
 docs/configuration.md                                      | 7 +++----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 6239ef0491a6f..2bb1290963f87 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -302,8 +302,8 @@ package object config {
     .createWithDefaultString("1g")
 
   private[spark] val EXECUTOR_MEMORY_OVERHEAD = ConfigBuilder("spark.executor.memoryOverhead")
-    .doc("The amount of non-heap memory to be allocated per executor in cluster mode, " +
-      "in MiB unless otherwise specified.")
+    .doc("The amount of non-heap memory to be allocated per executor, in MiB unless otherwise" +
+      " specified.")
     .version("2.3.0")
     .bytesConf(ByteUnit.MiB)
     .createOptional
diff --git a/docs/configuration.md b/docs/configuration.md
index aab18f23a083f..d4738f1c363f0 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -274,10 +274,9 @@ of the most common options to set are:
  <td><code>spark.executor.memoryOverhead</code></td>
   <td>executorMemory * 0.10, with minimum of 384 </td>
   <td>
-    Amount of additional memory to be allocated per executor process in cluster mode, in MiB unless
-    otherwise specified. This is memory that accounts for things like VM overheads, interned strings,
-    other native overheads, etc. This tends to grow with the executor size (typically 6-10%).
-    This option is currently supported on YARN and Kubernetes.
+    Amount of additional memory to be allocated per executor process, in MiB unless otherwise specified.
+    This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc.
+    This tends to grow with the executor size (typically 6-10%). This option is currently supported on YARN and Kubernetes.
     <br/>
     <em>Note:</em> Additional memory includes PySpark executor memory 
     (when <code>spark.executor.pyspark.memory</code> is not configured) and memory used by other

From a2887164bcca152e2402169bf6991c7dfb3ac11c Mon Sep 17 00:00:00 2001
From: zhengruifeng <ruifengz@foxmail.com>
Date: Thu, 12 Nov 2020 19:14:07 +0800
Subject: [PATCH 0456/1009] [SPARK-32907][ML][PYTHON] adaptively blockify
 instances - LinearSVC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
1, use `maxBlockSizeInMB` instead of `blockSize`(#rows) to control the stacking of vectors;
2, infer an appropriate `maxBlockSizeInMB` if set 0;

### Why are the changes needed?
the performance gain is mainly related to the nnz of block.

f2jBLAS |   |   |   |   |   |   |   |   |   |   |   |   |  
-- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | --
Duration(millisecond) | branch 3.0 Impl | blockSizeInMB=0.0625 | blockSizeInMB=0.125 | blockSizeInMB=0.25 | blockSizeInMB=0.5 | blockSizeInMB=1 | blockSizeInMB=2 | blockSizeInMB=4 | blockSizeInMB=8 | blockSizeInMB=16 | blockSizeInMB=32 | blockSizeInMB=64 | blockSizeInMB=128
epsilon(100%) | 326481 | 26143 | 25710 | 24726 | 25395 | 25840 | 26846 | 25927 | 27431 | 26190 | 26056 | 26347 | 27204
epsilon3000(67%) | 455247 | 35893 | 34366 | 34985 | 38387 | 38901 | 40426 | 40044 | 39161 | 38767 | 39965 | 39523 | 39108
epsilon4000(50%) | 306390 | 42256 | 41164 | 43748 | 48638 | 50892 | 50986 | 51091 | 51072 | 51289 | 51652 | 53312 | 52146
epsilon5000(40%) | 307619 | 43639 | 42992 | 44743 | 50800 | 51939 | 51871 | 52190 | 53850 | 52607 | 51062 | 52509 | 51570
epsilon10000(20%) | 310070 | 58371 | 55921 | 56317 | 56618 | 53694 | 52131 | 51768 | 51728 | 52233 | 51881 | 51653 | 52440
epsilon20000(10%) | 316565 | 109193 | 95121 | 82764 | 69653 | 60764 | 56066 | 53371 | 52822 | 52872 | 52769 | 52527 | 53508
epsilon200000(1%) | 336181 | 1569721 | 1069355 | 673718 | 375043 | 218230 | 145393 | 110926 | 94327 | 87039 | 83926 | 81890 | 81787
  |   |   |   |   |   |   |   |   |   |   |   |   |  
  |   |   |   |   |   |   |   |   |   |   |   |   |  
  | Speedup |   |   |   |   |   |   |   |   |   |   |   |  
epsilon(100%) | 1 | 12.48827602 | 12.69859977 | **13.20395535** | 12.85611341 | 12.63471362 | 12.16125307 | 12.59231689 | 11.90189931 | 12.46586483 | 12.5299739 | 12.39158158 | 12.00121306
epsilon3000(67%) | 1 | 12.68344803 | **13.2470174** | 13.01263399 | 11.85940553 | 11.70270687 | 11.26124276 | 11.36866946 | 11.62500958 | 11.74315784 | 11.39114225 | 11.51853351 | 11.64076404
epsilon4000(50%) | 1 | 7.250804619 | **7.443154212** | 7.003520161 | 6.299395534 | 6.020396133 | 6.00929667 | 5.996946625 | 5.999177632 | 5.973795551 | 5.931812902 | 5.747111345 | 5.875618456
epsilon5000(40%) | 1 | 7.049176196 | **7.155261444** | 6.875243055 | 6.055492126 | 5.92269778 | 5.930462108 | 5.894213451 | 5.712516249 | 5.847491779 | 6.024421292 | 5.858405226 | 5.965076595
epsilon10000(20%) | 1 | 5.312055644 | 5.544786395 | 5.505797539 | 5.4765269 | 5.774760681 | 5.947900481 | 5.98960748 | 5.994239097 | 5.93628549 | 5.976561747 | **6.002942714** | 5.912852784
epsilon20000(10%) | 1 | 2.899132728 | 3.328024306 | 3.824911797 | 4.544886796 | 5.209745902 | 5.64629187 | 5.931404695 | 5.993052137 | 5.987384627 | 5.999071425 | **6.026710073** | 5.916218136
epsilon200000(1%) | 1 | 0.214166084 | 0.314377358 | 0.498993644 | 0.896379882 | 1.540489392 | 2.312222734 | 3.03067811 | 3.563995463 | 3.862417997 | 4.005683578 | 4.105275369 | **4.110445425**

OpenBLAS |   |   |   |   |   |   |   |   |   |   |   |   |  
-- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | -- | --
Duration(millisecond) | branch 3.0 Impl | blockSizeInMB=0.0625 | blockSizeInMB=0.125 | blockSizeInMB=0.25 | blockSizeInMB=0.5 | blockSizeInMB=1 | blockSizeInMB=2 | blockSizeInMB=4 | blockSizeInMB=8 | blockSizeInMB=16 | blockSizeInMB=32 | blockSizeInMB=64 | blockSizeInMB=128
epsilon(100%) | 299119 | 26047 | 25049 | 25239 | 28001 | 35138 | 36438 | 36279 | 36114 | 35111 | 35428 | 36295 | 35197
epsilon3000(67%) | 439798 | 33321 | 34423 | 34336 | 38906 | 51756 | 54138 | 54085 | 53412 | 54766 | 54425 | 54221 | 54842
epsilon4000(50%) | 302963 | 42960 | 40678 | 43483 | 48254 | 50888 | 54990 | 52647 | 51947 | 51843 | 52891 | 53410 | 52020
epsilon5000(40%) | 303569 | 44225 | 44961 | 45065 | 51768 | 52776 | 51930 | 53587 | 53104 | 51833 | 52138 | 52574 | 53756
epsilon10000(20%) | 307403 | 58447 | 55993 | 56757 | 56694 | 54038 | 52734 | 52073 | 52051 | 52150 | 51986 | 52407 | 52390
epsilon20000(10%) | 313344 | 107580 | 94679 | 83329 | 70226 | 60996 | 57130 | 55461 | 54641 | 52712 | 52541 | 53101 | 53312
epsilon200000(1%) | 334679 | 1642726 | 1073148 | 654481 | 364974 | 213881 | 140248 | 107579 | 91757 | 85090 | 81940 | 80492 | 80250
  |   |   |   |   |   |   |   |   |   |   |   |   |  
  |   |   |   |   |   |   |   |   |   |   |   |   |  
  | Speedup |   |   |   |   |   |   |   |   |   |   |   |  
epsilon(100%) | 1 | 11.48381771 | **11.94135494** | 11.85146004 | 10.68243991 | 8.512692811 | 8.208985125 | 8.244962651 | 8.282632774 | 8.519238985 | 8.443011178 | 8.241328007 | 8.498423161
epsilon3000(67%) | 1 | 13.19882356 | 12.7762833 | **12.80865564** | 11.30411762 | 8.497526857 | 8.123646976 | 8.131607655 | 8.234067251 | 8.030493372 | 8.080808452 | 8.111211523 | 8.01936472
epsilon4000(50%) | 1 | 7.052211359 | **7.44783421** | 6.967389555 | 6.278505409 | 5.953525389 | 5.509419895 | 5.754610899 | 5.832155851 | 5.843855487 | 5.728063376 | 5.672402172 | 5.823971549
epsilon5000(40%) | 1 | **6.86419446** | 6.751829363 | 6.736247642 | 5.864027971 | 5.752027437 | 5.845734643 | 5.664974714 | 5.716499699 | 5.856674319 | 5.822413595 | 5.774127896 | 5.647164968
epsilon10000(20%) | 1 | 5.259517169 | 5.490025539 | 5.416124883 | 5.422143437 | 5.688645028 | 5.829313157 | 5.903308816 | 5.905803923 | 5.894592522 | **5.913188166** | 5.865685882 | 5.867589235
epsilon20000(10%) | 1 | 2.912660346 | 3.309540658 | 3.760323537 | 4.461937174 | 5.137123746 | 5.48475407 | 5.649807973 | 5.734594901 | 5.944452876 | **5.963799699** | 5.900905821 | 5.87755102
epsilon200000(1%) | 1 | 0.203733915 | 0.311866583 | 0.511365494 | 0.916994087 | 1.564790701 | 2.38633706 | 3.111006795 | 3.647449241 | 3.933235398 | 4.084439834 | 4.157916315 | **4.170454829**

### Does this PR introduce _any_ user-facing change?
yes, param `blockSize` -> `blockSizeInMB` in master

### How was this patch tested?
added testsuites and performance test (result attached in [ticket](https://issues.apache.org/jira/browse/SPARK-32907))

Closes #30009 from zhengruifeng/adaptively_blockify_linear_svc_II.

Lead-authored-by: zhengruifeng <ruifengz@foxmail.com>
Co-authored-by: Weichen Xu <weichen.xu@databricks.com>
Signed-off-by: Weichen Xu <weichen.xu@databricks.com>
---
 .../spark/ml/classification/LinearSVC.scala   | 93 ++++++-------------
 .../apache/spark/ml/feature/Instance.scala    | 71 ++++++++++++++
 .../ml/param/shared/SharedParamsCodeGen.scala |  7 +-
 .../spark/ml/param/shared/sharedParams.scala  | 18 ++++
 .../ml/classification/LinearSVCSuite.scala    |  4 +-
 .../spark/ml/feature/InstanceSuite.scala      | 54 +++++++++++
 python/pyspark/ml/classification.py           | 26 +++---
 python/pyspark/ml/classification.pyi          |  9 +-
 .../ml/param/_shared_params_code_gen.py       |  6 +-
 python/pyspark/ml/param/shared.py             | 18 ++++
 python/pyspark/ml/param/shared.pyi            |  5 +
 11 files changed, 224 insertions(+), 87 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
index 77272c65eb231..95f37671e1399 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
@@ -42,7 +42,7 @@ import org.apache.spark.storage.StorageLevel
 /** Params for linear SVM Classifier. */
 private[classification] trait LinearSVCParams extends ClassifierParams with HasRegParam
   with HasMaxIter with HasFitIntercept with HasTol with HasStandardization with HasWeightCol
-  with HasAggregationDepth with HasThreshold with HasBlockSize {
+  with HasAggregationDepth with HasThreshold with HasMaxBlockSizeInMB {
 
   /**
    * Param for threshold in binary classification prediction.
@@ -57,7 +57,7 @@ private[classification] trait LinearSVCParams extends ClassifierParams with HasR
     "threshold in binary classification prediction applied to rawPrediction")
 
   setDefault(regParam -> 0.0, maxIter -> 100, fitIntercept -> true, tol -> 1E-6,
-    standardization -> true, threshold -> 0.0, aggregationDepth -> 2, blockSize -> 1)
+    standardization -> true, threshold -> 0.0, aggregationDepth -> 2, maxBlockSizeInMB -> 0.0)
 }
 
 /**
@@ -153,22 +153,13 @@ class LinearSVC @Since("2.2.0") (
   def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value)
 
   /**
-   * Set block size for stacking input data in matrices.
-   * If blockSize == 1, then stacking will be skipped, and each vector is treated individually;
-   * If blockSize &gt; 1, then vectors will be stacked to blocks, and high-level BLAS routines
-   * will be used if possible (for example, GEMV instead of DOT, GEMM instead of GEMV).
-   * Recommended size is between 10 and 1000. An appropriate choice of the block size depends
-   * on the sparsity and dim of input datasets, the underlying BLAS implementation (for example,
-   * f2jBLAS, OpenBLAS, intel MKL) and its configuration (for example, number of threads).
-   * Note that existing BLAS implementations are mainly optimized for dense matrices, if the
-   * input dataset is sparse, stacking may bring no performance gain, the worse is possible
-   * performance regression.
-   * Default is 1.
+   * Sets the value of param [[maxBlockSizeInMB]].
+   * Default is 0.0.
    *
    * @group expertSetParam
    */
   @Since("3.1.0")
-  def setBlockSize(value: Int): this.type = set(blockSize, value)
+  def setMaxBlockSizeInMB(value: Double): this.type = set(maxBlockSizeInMB, value)
 
   @Since("2.2.0")
   override def copy(extra: ParamMap): LinearSVC = defaultCopy(extra)
@@ -177,19 +168,19 @@ class LinearSVC @Since("2.2.0") (
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
     instr.logParams(this, labelCol, weightCol, featuresCol, predictionCol, rawPredictionCol,
-      regParam, maxIter, fitIntercept, tol, standardization, threshold, aggregationDepth, blockSize)
+      regParam, maxIter, fitIntercept, tol, standardization, threshold, aggregationDepth,
+      maxBlockSizeInMB)
+
+    if (dataset.storageLevel != StorageLevel.NONE) {
+      instr.logWarning(s"Input instances will be standardized, blockified to blocks, and " +
+        s"then cached during training. Be careful of double caching!")
+    }
 
     val instances = extractInstances(dataset)
       .setName("training instances")
 
-    if (dataset.storageLevel == StorageLevel.NONE && $(blockSize) == 1) {
-      instances.persist(StorageLevel.MEMORY_AND_DISK)
-    }
-
-    var requestedMetrics = Seq("mean", "std", "count")
-    if ($(blockSize) != 1) requestedMetrics +:= "numNonZeros"
     val (summarizer, labelSummarizer) = Summarizer
-      .getClassificationSummarizers(instances, $(aggregationDepth), requestedMetrics)
+      .getClassificationSummarizers(instances, $(aggregationDepth), Seq("mean", "std", "count"))
 
     val histogram = labelSummarizer.histogram
     val numInvalid = labelSummarizer.countInvalid
@@ -199,14 +190,12 @@ class LinearSVC @Since("2.2.0") (
     instr.logNamedValue("lowestLabelWeight", labelSummarizer.histogram.min.toString)
     instr.logNamedValue("highestLabelWeight", labelSummarizer.histogram.max.toString)
     instr.logSumOfWeights(summarizer.weightSum)
-    if ($(blockSize) > 1) {
-      val scale = 1.0 / summarizer.count / numFeatures
-      val sparsity = 1 - summarizer.numNonzeros.toArray.map(_ * scale).sum
-      instr.logNamedValue("sparsity", sparsity.toString)
-      if (sparsity > 0.5) {
-        instr.logWarning(s"sparsity of input dataset is $sparsity, " +
-          s"which may hurt performance in high-level BLAS.")
-      }
+
+    var actualBlockSizeInMB = $(maxBlockSizeInMB)
+    if (actualBlockSizeInMB == 0) {
+      actualBlockSizeInMB = InstanceBlock.DefaultBlockSizeInMB
+      require(actualBlockSizeInMB > 0, "inferred actual BlockSizeInMB must > 0")
+      instr.logNamedValue("actualBlockSizeInMB", actualBlockSizeInMB.toString)
     }
 
     val numClasses = MetadataUtils.getNumClasses(dataset.schema($(labelCol))) match {
@@ -245,12 +234,8 @@ class LinearSVC @Since("2.2.0") (
        Note that the intercept in scaled space and original space is the same;
        as a result, no scaling is needed.
      */
-    val (rawCoefficients, objectiveHistory) = if ($(blockSize) == 1) {
-      trainOnRows(instances, featuresStd, regularization, optimizer)
-    } else {
-      trainOnBlocks(instances, featuresStd, regularization, optimizer)
-    }
-    if (instances.getStorageLevel != StorageLevel.NONE) instances.unpersist()
+    val (rawCoefficients, objectiveHistory) =
+      trainImpl(instances, actualBlockSizeInMB, featuresStd, regularization, optimizer)
 
     if (rawCoefficients == null) {
       val msg = s"${optimizer.getClass.getName} failed."
@@ -284,35 +269,9 @@ class LinearSVC @Since("2.2.0") (
     model.setSummary(Some(summary))
   }
 
-  private def trainOnRows(
-      instances: RDD[Instance],
-      featuresStd: Array[Double],
-      regularization: Option[L2Regularization],
-      optimizer: BreezeOWLQN[Int, BDV[Double]]): (Array[Double], Array[Double]) = {
-    val numFeatures = featuresStd.length
-    val numFeaturesPlusIntercept = if ($(fitIntercept)) numFeatures + 1 else numFeatures
-
-    val bcFeaturesStd = instances.context.broadcast(featuresStd)
-    val getAggregatorFunc = new HingeAggregator(bcFeaturesStd, $(fitIntercept))(_)
-    val costFun = new RDDLossFunction(instances, getAggregatorFunc,
-      regularization, $(aggregationDepth))
-
-    val states = optimizer.iterations(new CachedDiffFunction(costFun),
-      Vectors.zeros(numFeaturesPlusIntercept).asBreeze.toDenseVector)
-
-    val arrayBuilder = mutable.ArrayBuilder.make[Double]
-    var state: optimizer.State = null
-    while (states.hasNext) {
-      state = states.next()
-      arrayBuilder += state.adjustedValue
-    }
-    bcFeaturesStd.destroy()
-
-    (if (state != null) state.x.toArray else null, arrayBuilder.result)
-  }
-
-  private def trainOnBlocks(
+  private def trainImpl(
       instances: RDD[Instance],
+      actualBlockSizeInMB: Double,
       featuresStd: Array[Double],
       regularization: Option[L2Regularization],
       optimizer: BreezeOWLQN[Int, BDV[Double]]): (Array[Double], Array[Double]) = {
@@ -326,9 +285,11 @@ class LinearSVC @Since("2.2.0") (
       val func = StandardScalerModel.getTransformFunc(Array.empty, inverseStd, false, true)
       iter.map { case Instance(label, weight, vec) => Instance(label, weight, func(vec)) }
     }
-    val blocks = InstanceBlock.blokify(standardized, $(blockSize))
+
+    val maxMemUsage = (actualBlockSizeInMB * 1024L * 1024L).ceil.toLong
+    val blocks = InstanceBlock.blokifyWithMaxMemUsage(standardized, maxMemUsage)
       .persist(StorageLevel.MEMORY_AND_DISK)
-      .setName(s"training blocks (blockSize=${$(blockSize)})")
+      .setName(s"training blocks (blockSizeInMB=$actualBlockSizeInMB)")
 
     val getAggregatorFunc = new BlockHingeAggregator($(fitIntercept))(_)
     val costFun = new RDDLossFunction(blocks, getAggregatorFunc,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala
index db5f88d5dddc8..c237366ec5c3d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Instance.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.ml.feature
 
+import scala.collection.mutable
+
 import org.apache.spark.ml.linalg._
 import org.apache.spark.rdd.RDD
 
@@ -100,6 +102,32 @@ private[spark] case class InstanceBlock(
 
 private[spark] object InstanceBlock {
 
+  /**
+   * Suggested value for BlockSizeInMB in Level-2 routine cases.
+   * According to performance tests of BLAS routine (see SPARK-31714) and
+   * LinearSVC (see SPARK-32907), 1.0 MB should be an acceptable value for
+   * linear models using Level-2 routine (GEMV) to perform prediction and
+   * gradient computation.
+   */
+  val DefaultBlockSizeInMB = 1.0
+
+  private def getBlockMemUsage(
+      numCols: Long,
+      numRows: Long,
+      nnz: Long,
+      allUnitWeight: Boolean): Long = {
+    val doubleBytes = java.lang.Double.BYTES
+    val arrayHeader = 12L
+    val denseSize = Matrices.getDenseSize(numCols, numRows)
+    val sparseSize = Matrices.getSparseSize(nnz, numRows + 1)
+    val matrixSize = math.min(denseSize, sparseSize)
+    if (allUnitWeight) {
+      matrixSize + doubleBytes * numRows + arrayHeader * 2
+    } else {
+      matrixSize + doubleBytes * numRows * 2 + arrayHeader * 2
+    }
+  }
+
   def fromInstances(instances: Seq[Instance]): InstanceBlock = {
     val labels = instances.map(_.label).toArray
     val weights = if (instances.exists(_.weight != 1)) {
@@ -114,6 +142,49 @@ private[spark] object InstanceBlock {
   def blokify(instances: RDD[Instance], blockSize: Int): RDD[InstanceBlock] = {
     instances.mapPartitions(_.grouped(blockSize).map(InstanceBlock.fromInstances))
   }
+
+  def blokifyWithMaxMemUsage(
+      instanceIterator: Iterator[Instance],
+      maxMemUsage: Long): Iterator[InstanceBlock] = {
+    require(maxMemUsage > 0)
+
+    new Iterator[InstanceBlock]() {
+      private var numCols = -1L
+
+      override def hasNext: Boolean = instanceIterator.hasNext
+
+      override def next(): InstanceBlock = {
+        val buff = mutable.ArrayBuilder.make[Instance]
+        var buffCnt = 0L
+        var buffNnz = 0L
+        var buffUnitWeight = true
+        var blockMemUsage = 0L
+
+        while (instanceIterator.hasNext && blockMemUsage < maxMemUsage) {
+          val instance = instanceIterator.next()
+          if (numCols < 0L) numCols = instance.features.size
+          require(numCols == instance.features.size)
+
+          buff += instance
+          buffCnt += 1L
+          buffNnz += instance.features.numNonzeros
+          buffUnitWeight &&= (instance.weight == 1)
+          blockMemUsage = getBlockMemUsage(numCols, buffCnt, buffNnz, buffUnitWeight)
+        }
+
+        // the block memory usage may slightly exceed threshold, not a big issue.
+        // and this ensure even if one row exceed block limit, each block has one row.
+        InstanceBlock.fromInstances(buff.result())
+      }
+    }
+  }
+
+  def blokifyWithMaxMemUsage(
+      instances: RDD[Instance],
+      maxMemUsage: Long): RDD[InstanceBlock] = {
+    require(maxMemUsage > 0)
+    instances.mapPartitions(iter => blokifyWithMaxMemUsage(iter, maxMemUsage))
+  }
 }
 
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
index 7fd5f5938b565..0640fe355fdd6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
@@ -108,7 +108,12 @@ private[shared] object SharedParamsCodeGen {
       ParamDesc[Int]("blockSize", "block size for stacking input data in matrices. Data is " +
         "stacked within partitions. If block size is more than remaining data in a partition " +
         "then it is adjusted to the size of this data.",
-        isValid = "ParamValidators.gt(0)", isExpertParam = true)
+        isValid = "ParamValidators.gt(0)", isExpertParam = true),
+      ParamDesc[Double]("maxBlockSizeInMB", "Maximum memory in MB for stacking input data " +
+        "into blocks. Data is stacked within partitions. If more than remaining data size in a " +
+        "partition then it is adjusted to the data size. If 0, try to infer an appropriate " +
+        "value. Must be >= 0.",
+        Some("0.0"), isValid = "ParamValidators.gtEq(0.0)", isExpertParam = true)
     )
 
     val code = genSharedParams(params)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
index 60203eba61ea5..2fbda45a9e97a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
@@ -562,4 +562,22 @@ trait HasBlockSize extends Params {
   /** @group expertGetParam */
   final def getBlockSize: Int = $(blockSize)
 }
+
+/**
+ * Trait for shared param maxBlockSizeInMB (default: 0.0). This trait may be changed or
+ * removed between minor versions.
+ */
+trait HasMaxBlockSizeInMB extends Params {
+
+  /**
+   * Param for Maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value. Must be &gt;= 0..
+   * @group expertParam
+   */
+  final val maxBlockSizeInMB: DoubleParam = new DoubleParam(this, "maxBlockSizeInMB", "Maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value. Must be >= 0.", ParamValidators.gtEq(0.0))
+
+  setDefault(maxBlockSizeInMB, 0.0)
+
+  /** @group expertGetParam */
+  final def getMaxBlockSizeInMB: Double = $(maxBlockSizeInMB)
+}
 // scalastyle:on
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala
index a66397324c1a6..d8b9c6a606ec2 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala
@@ -214,8 +214,8 @@ class LinearSVCSuite extends MLTest with DefaultReadWriteTest {
         .setFitIntercept(fitIntercept)
         .setMaxIter(5)
       val model = lsvc.fit(dataset)
-      Seq(4, 16, 64).foreach { blockSize =>
-        val model2 = lsvc.setBlockSize(blockSize).fit(dataset)
+      Seq(0, 0.01, 0.1, 1, 2, 4).foreach { s =>
+        val model2 = lsvc.setMaxBlockSizeInMB(s).fit(dataset)
         assert(model.intercept ~== model2.intercept relTol 1e-9)
         assert(model.coefficients ~== model2.coefficients relTol 1e-9)
       }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala
index d780bdf5f5dc8..f1e071357bab7 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala
@@ -74,4 +74,58 @@ class InstanceSuite extends SparkFunSuite{
     }
   }
 
+  test("InstanceBlock: blokify with max memory usage") {
+    val instance1 = Instance(19.0, 2.0, Vectors.dense(1.0, 7.0))
+    val instance2 = Instance(17.0, 1.0, Vectors.dense(0.0, 5.0).toSparse)
+    val instances = Seq(instance1, instance2)
+
+    val blocks = InstanceBlock
+      .blokifyWithMaxMemUsage(Iterator.apply(instance1, instance2), 128).toArray
+    require(blocks.length == 1)
+    val block = blocks.head
+    assert(block.size === 2)
+    assert(block.numFeatures === 2)
+    block.instanceIterator.zipWithIndex.foreach {
+      case (instance, i) =>
+        assert(instance.label === instances(i).label)
+        assert(instance.weight === instances(i).weight)
+        assert(instance.features.toArray === instances(i).features.toArray)
+    }
+    Seq(0, 1).foreach { i =>
+      val nzIter = block.getNonZeroIter(i)
+      val vec = Vectors.sparse(2, nzIter.toSeq)
+      assert(vec.toArray === instances(i).features.toArray)
+    }
+
+    // instances larger than maxMemUsage
+    val denseInstance = Instance(-1.0, 2.0, Vectors.dense(Array.fill(1000)(1.0)))
+    InstanceBlock.blokifyWithMaxMemUsage(Iterator.single(denseInstance), 64).size
+    InstanceBlock.blokifyWithMaxMemUsage(Iterator.fill(10)(denseInstance), 64).size
+
+    // different numFeatures
+    intercept[IllegalArgumentException] {
+      InstanceBlock.blokifyWithMaxMemUsage(Iterator.apply(instance1, denseInstance), 64).size
+    }
+
+    // nnz = 10
+    val sparseInstance = Instance(-2.0, 3.0,
+      Vectors.sparse(1000, Array.range(0, 1000, 100), Array.fill(10)(0.1)))
+
+    // normally, memory usage of a block does not exceed maxMemUsage too much
+    val maxMemUsage = 1 << 18
+    val mixedIter = Iterator.fill(100)(denseInstance) ++
+      Iterator.fill(1000)(sparseInstance) ++
+      Iterator.fill(10)(denseInstance) ++
+      Iterator.fill(10)(sparseInstance) ++
+      Iterator.fill(100)(denseInstance) ++
+      Iterator.fill(100)(sparseInstance)
+    InstanceBlock.blokifyWithMaxMemUsage(mixedIter, maxMemUsage)
+      .foreach { block =>
+        val doubleBytes = java.lang.Double.BYTES
+        val arrayHeader = 12L
+        val blockMemUsage = block.matrix.getSizeInBytes +
+          (block.labels.length + block.weights.length) * doubleBytes + arrayHeader * 2
+        require(blockMemUsage < maxMemUsage * 1.05)
+      }
+  }
 }
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index d6c861361a248..8f13f3275cb5b 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -26,8 +26,8 @@
 from pyspark.ml import Estimator, Predictor, PredictionModel, Model
 from pyspark.ml.param.shared import HasRawPredictionCol, HasProbabilityCol, HasThresholds, \
     HasRegParam, HasMaxIter, HasFitIntercept, HasTol, HasStandardization, HasWeightCol, \
-    HasAggregationDepth, HasThreshold, HasBlockSize, Param, Params, TypeConverters, \
-    HasElasticNetParam, HasSeed, HasStepSize, HasSolver, HasParallelism
+    HasAggregationDepth, HasThreshold, HasBlockSize, HasMaxBlockSizeInMB, Param, Params, \
+    TypeConverters, HasElasticNetParam, HasSeed, HasStepSize, HasSolver, HasParallelism
 from pyspark.ml.tree import _DecisionTreeModel, _DecisionTreeParams, \
     _TreeEnsembleModel, _RandomForestParams, _GBTParams, \
     _HasVarianceImpurity, _TreeClassifierParams
@@ -504,7 +504,7 @@ def recallByThreshold(self):
 
 class _LinearSVCParams(_ClassifierParams, HasRegParam, HasMaxIter, HasFitIntercept, HasTol,
                        HasStandardization, HasWeightCol, HasAggregationDepth, HasThreshold,
-                       HasBlockSize):
+                       HasMaxBlockSizeInMB):
     """
     Params for :py:class:`LinearSVC` and :py:class:`LinearSVCModel`.
 
@@ -521,7 +521,7 @@ def __init__(self, *args):
         super(_LinearSVCParams, self).__init__(*args)
         self._setDefault(maxIter=100, regParam=0.0, tol=1e-6, fitIntercept=True,
                          standardization=True, threshold=0.0, aggregationDepth=2,
-                         blockSize=1)
+                         maxBlockSizeInMB=0.0)
 
 
 @inherit_doc
@@ -565,8 +565,8 @@ class LinearSVC(_JavaClassifier, _LinearSVCParams, JavaMLWritable, JavaMLReadabl
     LinearSVCModel...
     >>> model.getThreshold()
     0.5
-    >>> model.getBlockSize()
-    1
+    >>> model.getMaxBlockSizeInMB()
+    0.0
     >>> model.coefficients
     DenseVector([0.0, -0.2792, -0.1833])
     >>> model.intercept
@@ -605,12 +605,12 @@ class LinearSVC(_JavaClassifier, _LinearSVCParams, JavaMLWritable, JavaMLReadabl
     def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction",
                  fitIntercept=True, standardization=True, threshold=0.0, weightCol=None,
-                 aggregationDepth=2, blockSize=1):
+                 aggregationDepth=2, maxBlockSizeInMB=0.0):
         """
         __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction", \
                  fitIntercept=True, standardization=True, threshold=0.0, weightCol=None, \
-                 aggregationDepth=2, blockSize=1):
+                 aggregationDepth=2, maxBlockSizeInMB=0.0):
         """
         super(LinearSVC, self).__init__()
         self._java_obj = self._new_java_obj(
@@ -623,12 +623,12 @@ def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="p
     def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                   maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction",
                   fitIntercept=True, standardization=True, threshold=0.0, weightCol=None,
-                  aggregationDepth=2, blockSize=1):
+                  aggregationDepth=2, maxBlockSizeInMB=0.0):
         """
         setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction", \
                   fitIntercept=True, standardization=True, threshold=0.0, weightCol=None, \
-                  aggregationDepth=2, blockSize=1):
+                  aggregationDepth=2, maxBlockSizeInMB=0.0):
         Sets params for Linear SVM Classifier.
         """
         kwargs = self._input_kwargs
@@ -694,11 +694,11 @@ def setAggregationDepth(self, value):
         return self._set(aggregationDepth=value)
 
     @since("3.1.0")
-    def setBlockSize(self, value):
+    def setMaxBlockSizeInMB(self, value):
         """
-        Sets the value of :py:attr:`blockSize`.
+        Sets the value of :py:attr:`maxBlockSizeInMB`.
         """
-        return self._set(blockSize=value)
+        return self._set(maxBlockSizeInMB=value)
 
 
 class LinearSVCModel(_JavaClassificationModel, _LinearSVCParams, JavaMLWritable, JavaMLReadable,
diff --git a/python/pyspark/ml/classification.pyi b/python/pyspark/ml/classification.pyi
index 55afc20a54cb9..9f72d24f63117 100644
--- a/python/pyspark/ml/classification.pyi
+++ b/python/pyspark/ml/classification.pyi
@@ -26,6 +26,7 @@ from pyspark.ml.base import _PredictorParams
 from pyspark.ml.param.shared import (
     HasAggregationDepth,
     HasBlockSize,
+    HasMaxBlockSizeInMB,
     HasElasticNetParam,
     HasFitIntercept,
     HasMaxIter,
@@ -172,7 +173,7 @@ class _LinearSVCParams(
     HasWeightCol,
     HasAggregationDepth,
     HasThreshold,
-    HasBlockSize,
+    HasMaxBlockSizeInMB,
 ):
     threshold: Param[float]
     def __init__(self, *args: Any) -> None: ...
@@ -198,7 +199,7 @@ class LinearSVC(
         threshold: float = ...,
         weightCol: Optional[str] = ...,
         aggregationDepth: int = ...,
-        blockSize: int = ...
+        maxBlockSizeInMB: float = ...
     ) -> None: ...
     def setParams(
         self,
@@ -215,7 +216,7 @@ class LinearSVC(
         threshold: float = ...,
         weightCol: Optional[str] = ...,
         aggregationDepth: int = ...,
-        blockSize: int = ...
+        maxBlockSizeInMB: float = ...
     ) -> LinearSVC: ...
     def setMaxIter(self, value: int) -> LinearSVC: ...
     def setRegParam(self, value: float) -> LinearSVC: ...
@@ -225,7 +226,7 @@ class LinearSVC(
     def setThreshold(self, value: float) -> LinearSVC: ...
     def setWeightCol(self, value: str) -> LinearSVC: ...
     def setAggregationDepth(self, value: int) -> LinearSVC: ...
-    def setBlockSize(self, value: int) -> LinearSVC: ...
+    def setMaxBlockSizeInMB(self, value: float) -> LinearSVC: ...
 
 class LinearSVCModel(
     _JavaClassificationModel[Vector],
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index bc1ea87ad629c..53d26972c4b4a 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -165,7 +165,11 @@ def get$Name(self):
          None, "TypeConverters.toString"),
         ("blockSize", "block size for stacking input data in matrices. Data is stacked within "
          "partitions. If block size is more than remaining data in a partition then it is "
-         "adjusted to the size of this data.", None, "TypeConverters.toInt")]
+         "adjusted to the size of this data.", None, "TypeConverters.toInt"),
+        ("maxBlockSizeInMB", "maximum memory in MB for stacking input data into blocks. Data is " +
+         "stacked within partitions. If more than remaining data size in a partition then it " +
+         "is adjusted to the data size. If 0, try to infer an appropriate value. Must be >= 0.",
+         "0.0", "TypeConverters.toFloat")]
 
     code = []
     for name, doc, defaultValueStr, typeConverter in shared:
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index 24fb0d3e2554d..cbef7386e2214 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -597,3 +597,21 @@ def getBlockSize(self):
         Gets the value of blockSize or its default value.
         """
         return self.getOrDefault(self.blockSize)
+
+
+class HasMaxBlockSizeInMB(Params):
+    """
+    Mixin for param maxBlockSizeInMB: maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value. Must be >= 0.
+    """
+
+    maxBlockSizeInMB = Param(Params._dummy(), "maxBlockSizeInMB", "maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value. Must be >= 0.", typeConverter=TypeConverters.toFloat)
+
+    def __init__(self):
+        super(HasMaxBlockSizeInMB, self).__init__()
+        self._setDefault(maxBlockSizeInMB=0.0)
+
+    def getMaxBlockSizeInMB(self):
+        """
+        Gets the value of maxBlockSizeInMB or its default value.
+        """
+        return self.getOrDefault(self.maxBlockSizeInMB)
diff --git a/python/pyspark/ml/param/shared.pyi b/python/pyspark/ml/param/shared.pyi
index 5999c0eaa4661..0ff4d544205bc 100644
--- a/python/pyspark/ml/param/shared.pyi
+++ b/python/pyspark/ml/param/shared.pyi
@@ -185,3 +185,8 @@ class HasBlockSize(Params):
     blockSize: Param[int]
     def __init__(self) -> None: ...
     def getBlockSize(self) -> int: ...
+
+class HasMaxBlockSizeInMB(Params):
+    maxBlockSizeInMB: Param[float]
+    def __init__(self) -> None: ...
+    def getMaxBlockSizeInMB(self) -> float: ...

From a3d2954662831ca9fa6a2b886ca5bd8d81785974 Mon Sep 17 00:00:00 2001
From: ulysses <youxiduo@weidian.com>
Date: Thu, 12 Nov 2020 20:26:33 +0900
Subject: [PATCH 0457/1009] [SPARK-33421][SQL] Support Greatest and Least in
 Expression Canonicalize

### What changes were proposed in this pull request?

Add `Greatest` and `Least` check in `Canonicalize`.

### Why are the changes needed?

The children of both `Greatest` and `Least` are order Irrelevant.

Let's say we have `greatest(1, 2)` and `greatest(2, 1)`. We can get the same canonicalized expression in this case.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Add test.

Closes #30330 from ulysses-you/SPARK-33421.

Authored-by: ulysses <youxiduo@weidian.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../catalyst/expressions/Canonicalize.scala   |  7 +++++
 .../expressions/CanonicalizeSuite.scala       | 28 +++++++++++++++++++
 2 files changed, 35 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
index ae201359a762c..2765ec7d8a0eb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala
@@ -109,6 +109,13 @@ object Canonicalize {
     // order the list in the In operator
     case In(value, list) if list.length > 1 => In(value, list.sortBy(_.hashCode()))
 
+    case g: Greatest =>
+      val newChildren = orderCommutative(g, { case Greatest(children) => children })
+      Greatest(newChildren)
+    case l: Least =>
+      val newChildren = orderCommutative(l, { case Least(children) => children })
+      Least(newChildren)
+
     case _ => e
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
index bcbccd93e509f..ac31a68b2b618 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
@@ -142,4 +142,32 @@ class CanonicalizeSuite extends SparkFunSuite {
       }
     }
   }
+
+  test("SPARK-33421: Support Greatest and Least in Expression Canonicalize") {
+    Seq(Least(_), Greatest(_)).foreach { f =>
+      // test deterministic expr
+      val expr1 = f(Seq(Literal(1), Literal(2), Literal(3)))
+      val expr2 = f(Seq(Literal(3), Literal(1), Literal(2)))
+      val expr3 = f(Seq(Literal(1), Literal(1), Literal(1)))
+      assert(expr1.canonicalized == expr2.canonicalized)
+      assert(expr1.canonicalized != expr3.canonicalized)
+      assert(expr2.canonicalized != expr3.canonicalized)
+
+      // test non-deterministic expr
+      val randExpr1 = f(Seq(Literal(1), rand(1)))
+      val randExpr2 = f(Seq(rand(1), Literal(1)))
+      val randExpr3 = f(Seq(Literal(1), rand(2)))
+      assert(randExpr1.canonicalized == randExpr2.canonicalized)
+      assert(randExpr1.canonicalized != randExpr3.canonicalized)
+      assert(randExpr2.canonicalized != randExpr3.canonicalized)
+
+      // test nested expr
+      val nestedExpr1 = f(Seq(Literal(1), f(Seq(Literal(2), Literal(3)))))
+      val nestedExpr2 = f(Seq(f(Seq(Literal(2), Literal(3))), Literal(1)))
+      val nestedExpr3 = f(Seq(f(Seq(Literal(1), Literal(1))), Literal(1)))
+      assert(nestedExpr1.canonicalized == nestedExpr2.canonicalized)
+      assert(nestedExpr1.canonicalized != nestedExpr3.canonicalized)
+      assert(nestedExpr2.canonicalized != nestedExpr3.canonicalized)
+    }
+  }
 }

From 2f07c568107b2e466a6d6e199eaff7068100bb3c Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Thu, 12 Nov 2020 14:59:22 +0000
Subject: [PATCH 0458/1009] [SPARK-33278][SQL] Improve the performance for
 FIRST_VALUE

### What changes were proposed in this pull request?
https://github.com/apache/spark/pull/29800 provides a performance improvement for `NTH_VALUE`.
`FIRST_VALUE` also could use the `UnboundedOffsetWindowFunctionFrame` and `UnboundedPrecedingOffsetWindowFunctionFrame`.

### Why are the changes needed?
Improve the performance for `FIRST_VALUE`.

### Does this PR introduce _any_ user-facing change?
 'No'.

### How was this patch tested?
Jenkins test.

Closes #30178 from beliefer/SPARK-33278.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: beliefer <beliefer@163.com>
Co-authored-by: Jiaan Geng <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/optimizer/Optimizer.scala    |  13 +
 .../OptimizeWindowFunctionsSuite.scala        |  76 ++++
 .../resources/sql-tests/inputs/window.sql     |  66 +--
 .../sql-tests/results/window.sql.out          | 426 +++++++++---------
 4 files changed, 339 insertions(+), 242 deletions(-)
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWindowFunctionsSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 51f7799b1e427..e492d01650097 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -82,6 +82,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
         // Operator combine
         CollapseRepartition,
         CollapseProject,
+        OptimizeWindowFunctions,
         CollapseWindow,
         CombineFilters,
         CombineLimits,
@@ -806,6 +807,18 @@ object CollapseRepartition extends Rule[LogicalPlan] {
   }
 }
 
+/**
+ * Replaces first(col) to nth_value(col, 1) for better performance.
+ */
+object OptimizeWindowFunctions extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions {
+    case we @ WindowExpression(AggregateExpression(first: First, _, _, _, _), spec)
+      if spec.orderSpec.nonEmpty &&
+        spec.frameSpecification.asInstanceOf[SpecifiedWindowFrame].frameType == RowFrame =>
+      we.copy(windowFunction = NthValue(first.child, Literal(1), first.ignoreNulls))
+  }
+}
+
 /**
  * Collapse Adjacent Window Expression.
  * - If the partition specs and order specs are the same and the window expression are
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWindowFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWindowFunctionsSuite.scala
new file mode 100644
index 0000000000000..389aaeafe655f
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWindowFunctionsSuite.scala
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.aggregate.First
+import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
+
+class OptimizeWindowFunctionsSuite extends PlanTest {
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches = Batch("OptimizeWindowFunctions", FixedPoint(10),
+        OptimizeWindowFunctions) :: Nil
+  }
+
+  val testRelation = LocalRelation('a.double, 'b.double, 'c.string)
+  val a = testRelation.output(0)
+  val b = testRelation.output(1)
+  val c = testRelation.output(2)
+
+  test("replace first(col) by nth_value(col, 1)") {
+    val inputPlan = testRelation.select(
+      WindowExpression(
+        First(a, false).toAggregateExpression(),
+        WindowSpecDefinition(b :: Nil, c.asc :: Nil,
+          SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow))))
+    val correctAnswer = testRelation.select(
+      WindowExpression(
+        NthValue(a, Literal(1), false),
+        WindowSpecDefinition(b :: Nil, c.asc :: Nil,
+          SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow))))
+
+    val optimized = Optimize.execute(inputPlan)
+    assert(optimized == correctAnswer)
+  }
+
+  test("can't replace first(col) by nth_value(col, 1) if the window frame type is range") {
+    val inputPlan = testRelation.select(
+      WindowExpression(
+        First(a, false).toAggregateExpression(),
+        WindowSpecDefinition(b :: Nil, c.asc :: Nil,
+          SpecifiedWindowFrame(RangeFrame, UnboundedPreceding, CurrentRow))))
+
+    val optimized = Optimize.execute(inputPlan)
+    assert(optimized == inputPlan)
+  }
+
+  test("can't replace first(col) by nth_value(col, 1) if the window frame isn't ordered") {
+    val inputPlan = testRelation.select(
+      WindowExpression(
+        First(a, false).toAggregateExpression(),
+        WindowSpecDefinition(b :: Nil, Nil,
+          SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow))))
+
+    val optimized = Optimize.execute(inputPlan)
+    assert(optimized == inputPlan)
+  }
+}
diff --git a/sql/core/src/test/resources/sql-tests/inputs/window.sql b/sql/core/src/test/resources/sql-tests/inputs/window.sql
index c1be5fb27e6fa..f5223af9125f6 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/window.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/window.sql
@@ -146,104 +146,108 @@ SELECT val, cate,
 count(val) FILTER (WHERE val > 1) OVER(PARTITION BY cate)
 FROM testData ORDER BY cate, val;
 
--- nth_value() over ()
+-- nth_value()/first_value() over ()
 SELECT
     employee_name,
     salary,
-    nth_value(employee_name, 2) OVER (ORDER BY salary DESC) second_highest_salary
+    first_value(employee_name) OVER w highest_salary,
+    nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
+WINDOW w AS (ORDER BY salary DESC)
 ORDER BY salary DESC;
 
 SELECT
     employee_name,
     salary,
-    nth_value(employee_name, 2) OVER (
-      ORDER BY salary DESC
-      RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) second_highest_salary
+    first_value(employee_name) OVER w highest_salary,
+    nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
+WINDOW w AS (ORDER BY salary DESC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
 ORDER BY salary DESC;
 
 SELECT
     employee_name,
     salary,
-    nth_value(employee_name, 2) OVER (
-      ORDER BY salary DESC
-      ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) second_highest_salary
+    first_value(employee_name) OVER w highest_salary,
+    nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
+WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
 ORDER BY salary DESC;
 
 SELECT
     employee_name,
     salary,
-    nth_value(employee_name, 2) OVER (
-      ORDER BY salary
-      RANGE BETWEEN 2000 PRECEDING AND 1000 FOLLOWING) second_highest_salary
+    first_value(employee_name) OVER w highest_salary,
+    nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
+WINDOW w AS (ORDER BY salary RANGE BETWEEN 2000 PRECEDING AND 1000 FOLLOWING)
 ORDER BY salary;
 
 SELECT
     employee_name,
     salary,
-    nth_value(employee_name, 2) OVER (
-      ORDER BY salary DESC
-      ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) second_highest_salary
+    first_value(employee_name) OVER w highest_salary,
+    nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
+WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)
 ORDER BY salary DESC;
 
 SELECT
     employee_name,
     salary,
-    nth_value(employee_name, 2) OVER (
-      ORDER BY salary DESC
-      RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) second_highest_salary
+    first_value(employee_name) OVER w highest_salary,
+    nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
+WINDOW w AS (ORDER BY salary DESC RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
 ORDER BY salary DESC;
 
 SELECT
     employee_name,
     salary,
-    nth_value(employee_name, 2) OVER (
-      ORDER BY salary DESC
-      RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) second_highest_salary
+    first_value(employee_name) OVER w highest_salary,
+    nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
+WINDOW w AS (ORDER BY salary DESC RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
 ORDER BY salary DESC;
 
 SELECT
     employee_name,
     salary,
-    nth_value(employee_name, 2) OVER (
-      ORDER BY salary DESC
-      ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) second_highest_salary
+    first_value(employee_name) OVER w highest_salary,
+    nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
+WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
 ORDER BY salary DESC;
 
 SELECT
     employee_name,
     salary,
-    nth_value(employee_name, 2) OVER (
-      ORDER BY salary DESC
-      ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) second_highest_salary
+    first_value(employee_name) OVER w highest_salary,
+    nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
+WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
 ORDER BY salary DESC;
 
 SELECT
 	employee_name,
 	department,
 	salary,
-	NTH_VALUE(employee_name, 2) OVER  (
-		PARTITION BY department
-		ORDER BY salary DESC
-		RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
-	) second_highest_salary
+	FIRST_VALUE(employee_name) OVER w highest_salary,
+	NTH_VALUE(employee_name, 2) OVER w second_highest_salary
 FROM
 	basic_pays
+WINDOW w AS (
+  PARTITION BY department
+  ORDER BY salary DESC
+  RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+)
 ORDER BY department;
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out
index f6506a77e239c..1304dcf21d0b3 100644
--- a/sql/core/src/test/resources/sql-tests/results/window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out
@@ -421,286 +421,288 @@ window aggregate function with filter predicate is not supported yet.;
 SELECT
     employee_name,
     salary,
-    nth_value(employee_name, 2) OVER (ORDER BY salary DESC) second_highest_salary
+    first_value(employee_name) OVER w highest_salary,
+    nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
+WINDOW w AS (ORDER BY salary DESC)
 ORDER BY salary DESC
 -- !query schema
-struct<employee_name:string,salary:int,second_highest_salary:string>
+struct<employee_name:string,salary:int,highest_salary:string,second_highest_salary:string>
 -- !query output
-Larry Bott	11798	NULL
-Gerard Bondur	11472	Gerard Bondur
-Pamela Castillo	11303	Gerard Bondur
-Barry Jones	10586	Gerard Bondur
-George Vanauf	10563	Gerard Bondur
-Loui Bondur	10449	Gerard Bondur
-Mary Patterson	9998	Gerard Bondur
-Steve Patterson	9441	Gerard Bondur
-Julie Firrelli	9181	Gerard Bondur
-Jeff Firrelli	8992	Gerard Bondur
-William Patterson	8870	Gerard Bondur
-Diane Murphy	8435	Gerard Bondur
-Leslie Jennings	8113	Gerard Bondur
-Gerard Hernandez	6949	Gerard Bondur
-Foon Yue Tseng	6660	Gerard Bondur
-Anthony Bow	6627	Gerard Bondur
-Leslie Thompson	5186	Gerard Bondur
+Larry Bott	11798	Larry Bott	NULL
+Gerard Bondur	11472	Larry Bott	Gerard Bondur
+Pamela Castillo	11303	Larry Bott	Gerard Bondur
+Barry Jones	10586	Larry Bott	Gerard Bondur
+George Vanauf	10563	Larry Bott	Gerard Bondur
+Loui Bondur	10449	Larry Bott	Gerard Bondur
+Mary Patterson	9998	Larry Bott	Gerard Bondur
+Steve Patterson	9441	Larry Bott	Gerard Bondur
+Julie Firrelli	9181	Larry Bott	Gerard Bondur
+Jeff Firrelli	8992	Larry Bott	Gerard Bondur
+William Patterson	8870	Larry Bott	Gerard Bondur
+Diane Murphy	8435	Larry Bott	Gerard Bondur
+Leslie Jennings	8113	Larry Bott	Gerard Bondur
+Gerard Hernandez	6949	Larry Bott	Gerard Bondur
+Foon Yue Tseng	6660	Larry Bott	Gerard Bondur
+Anthony Bow	6627	Larry Bott	Gerard Bondur
+Leslie Thompson	5186	Larry Bott	Gerard Bondur
 
 
 -- !query
 SELECT
     employee_name,
     salary,
-    nth_value(employee_name, 2) OVER (
-      ORDER BY salary DESC
-      RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) second_highest_salary
+    first_value(employee_name) OVER w highest_salary,
+    nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
+WINDOW w AS (ORDER BY salary DESC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
 ORDER BY salary DESC
 -- !query schema
-struct<employee_name:string,salary:int,second_highest_salary:string>
+struct<employee_name:string,salary:int,highest_salary:string,second_highest_salary:string>
 -- !query output
-Larry Bott	11798	NULL
-Gerard Bondur	11472	Gerard Bondur
-Pamela Castillo	11303	Gerard Bondur
-Barry Jones	10586	Gerard Bondur
-George Vanauf	10563	Gerard Bondur
-Loui Bondur	10449	Gerard Bondur
-Mary Patterson	9998	Gerard Bondur
-Steve Patterson	9441	Gerard Bondur
-Julie Firrelli	9181	Gerard Bondur
-Jeff Firrelli	8992	Gerard Bondur
-William Patterson	8870	Gerard Bondur
-Diane Murphy	8435	Gerard Bondur
-Leslie Jennings	8113	Gerard Bondur
-Gerard Hernandez	6949	Gerard Bondur
-Foon Yue Tseng	6660	Gerard Bondur
-Anthony Bow	6627	Gerard Bondur
-Leslie Thompson	5186	Gerard Bondur
+Larry Bott	11798	Larry Bott	NULL
+Gerard Bondur	11472	Larry Bott	Gerard Bondur
+Pamela Castillo	11303	Larry Bott	Gerard Bondur
+Barry Jones	10586	Larry Bott	Gerard Bondur
+George Vanauf	10563	Larry Bott	Gerard Bondur
+Loui Bondur	10449	Larry Bott	Gerard Bondur
+Mary Patterson	9998	Larry Bott	Gerard Bondur
+Steve Patterson	9441	Larry Bott	Gerard Bondur
+Julie Firrelli	9181	Larry Bott	Gerard Bondur
+Jeff Firrelli	8992	Larry Bott	Gerard Bondur
+William Patterson	8870	Larry Bott	Gerard Bondur
+Diane Murphy	8435	Larry Bott	Gerard Bondur
+Leslie Jennings	8113	Larry Bott	Gerard Bondur
+Gerard Hernandez	6949	Larry Bott	Gerard Bondur
+Foon Yue Tseng	6660	Larry Bott	Gerard Bondur
+Anthony Bow	6627	Larry Bott	Gerard Bondur
+Leslie Thompson	5186	Larry Bott	Gerard Bondur
 
 
 -- !query
 SELECT
     employee_name,
     salary,
-    nth_value(employee_name, 2) OVER (
-      ORDER BY salary DESC
-      ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) second_highest_salary
+    first_value(employee_name) OVER w highest_salary,
+    nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
+WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
 ORDER BY salary DESC
 -- !query schema
-struct<employee_name:string,salary:int,second_highest_salary:string>
+struct<employee_name:string,salary:int,highest_salary:string,second_highest_salary:string>
 -- !query output
-Larry Bott	11798	NULL
-Gerard Bondur	11472	Gerard Bondur
-Pamela Castillo	11303	Gerard Bondur
-Barry Jones	10586	Gerard Bondur
-George Vanauf	10563	Gerard Bondur
-Loui Bondur	10449	Gerard Bondur
-Mary Patterson	9998	Gerard Bondur
-Steve Patterson	9441	Gerard Bondur
-Julie Firrelli	9181	Gerard Bondur
-Jeff Firrelli	8992	Gerard Bondur
-William Patterson	8870	Gerard Bondur
-Diane Murphy	8435	Gerard Bondur
-Leslie Jennings	8113	Gerard Bondur
-Gerard Hernandez	6949	Gerard Bondur
-Foon Yue Tseng	6660	Gerard Bondur
-Anthony Bow	6627	Gerard Bondur
-Leslie Thompson	5186	Gerard Bondur
+Larry Bott	11798	Larry Bott	NULL
+Gerard Bondur	11472	Larry Bott	Gerard Bondur
+Pamela Castillo	11303	Larry Bott	Gerard Bondur
+Barry Jones	10586	Larry Bott	Gerard Bondur
+George Vanauf	10563	Larry Bott	Gerard Bondur
+Loui Bondur	10449	Larry Bott	Gerard Bondur
+Mary Patterson	9998	Larry Bott	Gerard Bondur
+Steve Patterson	9441	Larry Bott	Gerard Bondur
+Julie Firrelli	9181	Larry Bott	Gerard Bondur
+Jeff Firrelli	8992	Larry Bott	Gerard Bondur
+William Patterson	8870	Larry Bott	Gerard Bondur
+Diane Murphy	8435	Larry Bott	Gerard Bondur
+Leslie Jennings	8113	Larry Bott	Gerard Bondur
+Gerard Hernandez	6949	Larry Bott	Gerard Bondur
+Foon Yue Tseng	6660	Larry Bott	Gerard Bondur
+Anthony Bow	6627	Larry Bott	Gerard Bondur
+Leslie Thompson	5186	Larry Bott	Gerard Bondur
 
 
 -- !query
 SELECT
     employee_name,
     salary,
-    nth_value(employee_name, 2) OVER (
-      ORDER BY salary
-      RANGE BETWEEN 2000 PRECEDING AND 1000 FOLLOWING) second_highest_salary
+    first_value(employee_name) OVER w highest_salary,
+    nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
+WINDOW w AS (ORDER BY salary RANGE BETWEEN 2000 PRECEDING AND 1000 FOLLOWING)
 ORDER BY salary
 -- !query schema
-struct<employee_name:string,salary:int,second_highest_salary:string>
+struct<employee_name:string,salary:int,highest_salary:string,second_highest_salary:string>
 -- !query output
-Leslie Thompson	5186	NULL
-Anthony Bow	6627	Anthony Bow
-Foon Yue Tseng	6660	Anthony Bow
-Gerard Hernandez	6949	Anthony Bow
-Leslie Jennings	8113	Foon Yue Tseng
-Diane Murphy	8435	Foon Yue Tseng
-William Patterson	8870	Leslie Jennings
-Jeff Firrelli	8992	Diane Murphy
-Julie Firrelli	9181	Diane Murphy
-Steve Patterson	9441	Diane Murphy
-Mary Patterson	9998	Diane Murphy
-Loui Bondur	10449	Jeff Firrelli
-George Vanauf	10563	Jeff Firrelli
-Barry Jones	10586	Jeff Firrelli
-Pamela Castillo	11303	Mary Patterson
-Gerard Bondur	11472	Loui Bondur
-Larry Bott	11798	Loui Bondur
+Leslie Thompson	5186	Leslie Thompson	NULL
+Anthony Bow	6627	Leslie Thompson	Anthony Bow
+Foon Yue Tseng	6660	Leslie Thompson	Anthony Bow
+Gerard Hernandez	6949	Leslie Thompson	Anthony Bow
+Leslie Jennings	8113	Anthony Bow	Foon Yue Tseng
+Diane Murphy	8435	Anthony Bow	Foon Yue Tseng
+William Patterson	8870	Gerard Hernandez	Leslie Jennings
+Jeff Firrelli	8992	Leslie Jennings	Diane Murphy
+Julie Firrelli	9181	Leslie Jennings	Diane Murphy
+Steve Patterson	9441	Leslie Jennings	Diane Murphy
+Mary Patterson	9998	Leslie Jennings	Diane Murphy
+Loui Bondur	10449	William Patterson	Jeff Firrelli
+George Vanauf	10563	William Patterson	Jeff Firrelli
+Barry Jones	10586	William Patterson	Jeff Firrelli
+Pamela Castillo	11303	Steve Patterson	Mary Patterson
+Gerard Bondur	11472	Mary Patterson	Loui Bondur
+Larry Bott	11798	Mary Patterson	Loui Bondur
 
 
 -- !query
 SELECT
     employee_name,
     salary,
-    nth_value(employee_name, 2) OVER (
-      ORDER BY salary DESC
-      ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) second_highest_salary
+    first_value(employee_name) OVER w highest_salary,
+    nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
+WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)
 ORDER BY salary DESC
 -- !query schema
-struct<employee_name:string,salary:int,second_highest_salary:string>
+struct<employee_name:string,salary:int,highest_salary:string,second_highest_salary:string>
 -- !query output
-Larry Bott	11798	Gerard Bondur
-Gerard Bondur	11472	Gerard Bondur
-Pamela Castillo	11303	Gerard Bondur
-Barry Jones	10586	Pamela Castillo
-George Vanauf	10563	Barry Jones
-Loui Bondur	10449	George Vanauf
-Mary Patterson	9998	Loui Bondur
-Steve Patterson	9441	Mary Patterson
-Julie Firrelli	9181	Steve Patterson
-Jeff Firrelli	8992	Julie Firrelli
-William Patterson	8870	Jeff Firrelli
-Diane Murphy	8435	William Patterson
-Leslie Jennings	8113	Diane Murphy
-Gerard Hernandez	6949	Leslie Jennings
-Foon Yue Tseng	6660	Gerard Hernandez
-Anthony Bow	6627	Foon Yue Tseng
-Leslie Thompson	5186	Anthony Bow
+Larry Bott	11798	Larry Bott	Gerard Bondur
+Gerard Bondur	11472	Larry Bott	Gerard Bondur
+Pamela Castillo	11303	Larry Bott	Gerard Bondur
+Barry Jones	10586	Gerard Bondur	Pamela Castillo
+George Vanauf	10563	Pamela Castillo	Barry Jones
+Loui Bondur	10449	Barry Jones	George Vanauf
+Mary Patterson	9998	George Vanauf	Loui Bondur
+Steve Patterson	9441	Loui Bondur	Mary Patterson
+Julie Firrelli	9181	Mary Patterson	Steve Patterson
+Jeff Firrelli	8992	Steve Patterson	Julie Firrelli
+William Patterson	8870	Julie Firrelli	Jeff Firrelli
+Diane Murphy	8435	Jeff Firrelli	William Patterson
+Leslie Jennings	8113	William Patterson	Diane Murphy
+Gerard Hernandez	6949	Diane Murphy	Leslie Jennings
+Foon Yue Tseng	6660	Leslie Jennings	Gerard Hernandez
+Anthony Bow	6627	Gerard Hernandez	Foon Yue Tseng
+Leslie Thompson	5186	Foon Yue Tseng	Anthony Bow
 
 
 -- !query
 SELECT
     employee_name,
     salary,
-    nth_value(employee_name, 2) OVER (
-      ORDER BY salary DESC
-      RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) second_highest_salary
+    first_value(employee_name) OVER w highest_salary,
+    nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
+WINDOW w AS (ORDER BY salary DESC RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
 ORDER BY salary DESC
 -- !query schema
-struct<employee_name:string,salary:int,second_highest_salary:string>
+struct<employee_name:string,salary:int,highest_salary:string,second_highest_salary:string>
 -- !query output
-Larry Bott	11798	Gerard Bondur
-Gerard Bondur	11472	Pamela Castillo
-Pamela Castillo	11303	Barry Jones
-Barry Jones	10586	George Vanauf
-George Vanauf	10563	Loui Bondur
-Loui Bondur	10449	Mary Patterson
-Mary Patterson	9998	Steve Patterson
-Steve Patterson	9441	Julie Firrelli
-Julie Firrelli	9181	Jeff Firrelli
-Jeff Firrelli	8992	William Patterson
-William Patterson	8870	Diane Murphy
-Diane Murphy	8435	Leslie Jennings
-Leslie Jennings	8113	Gerard Hernandez
-Gerard Hernandez	6949	Foon Yue Tseng
-Foon Yue Tseng	6660	Anthony Bow
-Anthony Bow	6627	Leslie Thompson
-Leslie Thompson	5186	NULL
+Larry Bott	11798	Larry Bott	Gerard Bondur
+Gerard Bondur	11472	Gerard Bondur	Pamela Castillo
+Pamela Castillo	11303	Pamela Castillo	Barry Jones
+Barry Jones	10586	Barry Jones	George Vanauf
+George Vanauf	10563	George Vanauf	Loui Bondur
+Loui Bondur	10449	Loui Bondur	Mary Patterson
+Mary Patterson	9998	Mary Patterson	Steve Patterson
+Steve Patterson	9441	Steve Patterson	Julie Firrelli
+Julie Firrelli	9181	Julie Firrelli	Jeff Firrelli
+Jeff Firrelli	8992	Jeff Firrelli	William Patterson
+William Patterson	8870	William Patterson	Diane Murphy
+Diane Murphy	8435	Diane Murphy	Leslie Jennings
+Leslie Jennings	8113	Leslie Jennings	Gerard Hernandez
+Gerard Hernandez	6949	Gerard Hernandez	Foon Yue Tseng
+Foon Yue Tseng	6660	Foon Yue Tseng	Anthony Bow
+Anthony Bow	6627	Anthony Bow	Leslie Thompson
+Leslie Thompson	5186	Leslie Thompson	NULL
 
 
 -- !query
 SELECT
     employee_name,
     salary,
-    nth_value(employee_name, 2) OVER (
-      ORDER BY salary DESC
-      RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) second_highest_salary
+    first_value(employee_name) OVER w highest_salary,
+    nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
+WINDOW w AS (ORDER BY salary DESC RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
 ORDER BY salary DESC
 -- !query schema
-struct<employee_name:string,salary:int,second_highest_salary:string>
+struct<employee_name:string,salary:int,highest_salary:string,second_highest_salary:string>
 -- !query output
-Larry Bott	11798	Gerard Bondur
-Gerard Bondur	11472	Gerard Bondur
-Pamela Castillo	11303	Gerard Bondur
-Barry Jones	10586	Gerard Bondur
-George Vanauf	10563	Gerard Bondur
-Loui Bondur	10449	Gerard Bondur
-Mary Patterson	9998	Gerard Bondur
-Steve Patterson	9441	Gerard Bondur
-Julie Firrelli	9181	Gerard Bondur
-Jeff Firrelli	8992	Gerard Bondur
-William Patterson	8870	Gerard Bondur
-Diane Murphy	8435	Gerard Bondur
-Leslie Jennings	8113	Gerard Bondur
-Gerard Hernandez	6949	Gerard Bondur
-Foon Yue Tseng	6660	Gerard Bondur
-Anthony Bow	6627	Gerard Bondur
-Leslie Thompson	5186	Gerard Bondur
+Larry Bott	11798	Larry Bott	Gerard Bondur
+Gerard Bondur	11472	Larry Bott	Gerard Bondur
+Pamela Castillo	11303	Larry Bott	Gerard Bondur
+Barry Jones	10586	Larry Bott	Gerard Bondur
+George Vanauf	10563	Larry Bott	Gerard Bondur
+Loui Bondur	10449	Larry Bott	Gerard Bondur
+Mary Patterson	9998	Larry Bott	Gerard Bondur
+Steve Patterson	9441	Larry Bott	Gerard Bondur
+Julie Firrelli	9181	Larry Bott	Gerard Bondur
+Jeff Firrelli	8992	Larry Bott	Gerard Bondur
+William Patterson	8870	Larry Bott	Gerard Bondur
+Diane Murphy	8435	Larry Bott	Gerard Bondur
+Leslie Jennings	8113	Larry Bott	Gerard Bondur
+Gerard Hernandez	6949	Larry Bott	Gerard Bondur
+Foon Yue Tseng	6660	Larry Bott	Gerard Bondur
+Anthony Bow	6627	Larry Bott	Gerard Bondur
+Leslie Thompson	5186	Larry Bott	Gerard Bondur
 
 
 -- !query
 SELECT
     employee_name,
     salary,
-    nth_value(employee_name, 2) OVER (
-      ORDER BY salary DESC
-      ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) second_highest_salary
+    first_value(employee_name) OVER w highest_salary,
+    nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
+WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
 ORDER BY salary DESC
 -- !query schema
-struct<employee_name:string,salary:int,second_highest_salary:string>
+struct<employee_name:string,salary:int,highest_salary:string,second_highest_salary:string>
 -- !query output
-Larry Bott	11798	Gerard Bondur
-Gerard Bondur	11472	Gerard Bondur
-Pamela Castillo	11303	Gerard Bondur
-Barry Jones	10586	Gerard Bondur
-George Vanauf	10563	Gerard Bondur
-Loui Bondur	10449	Gerard Bondur
-Mary Patterson	9998	Gerard Bondur
-Steve Patterson	9441	Gerard Bondur
-Julie Firrelli	9181	Gerard Bondur
-Jeff Firrelli	8992	Gerard Bondur
-William Patterson	8870	Gerard Bondur
-Diane Murphy	8435	Gerard Bondur
-Leslie Jennings	8113	Gerard Bondur
-Gerard Hernandez	6949	Gerard Bondur
-Foon Yue Tseng	6660	Gerard Bondur
-Anthony Bow	6627	Gerard Bondur
-Leslie Thompson	5186	Gerard Bondur
+Larry Bott	11798	Larry Bott	Gerard Bondur
+Gerard Bondur	11472	Larry Bott	Gerard Bondur
+Pamela Castillo	11303	Larry Bott	Gerard Bondur
+Barry Jones	10586	Larry Bott	Gerard Bondur
+George Vanauf	10563	Larry Bott	Gerard Bondur
+Loui Bondur	10449	Larry Bott	Gerard Bondur
+Mary Patterson	9998	Larry Bott	Gerard Bondur
+Steve Patterson	9441	Larry Bott	Gerard Bondur
+Julie Firrelli	9181	Larry Bott	Gerard Bondur
+Jeff Firrelli	8992	Larry Bott	Gerard Bondur
+William Patterson	8870	Larry Bott	Gerard Bondur
+Diane Murphy	8435	Larry Bott	Gerard Bondur
+Leslie Jennings	8113	Larry Bott	Gerard Bondur
+Gerard Hernandez	6949	Larry Bott	Gerard Bondur
+Foon Yue Tseng	6660	Larry Bott	Gerard Bondur
+Anthony Bow	6627	Larry Bott	Gerard Bondur
+Leslie Thompson	5186	Larry Bott	Gerard Bondur
 
 
 -- !query
 SELECT
     employee_name,
     salary,
-    nth_value(employee_name, 2) OVER (
-      ORDER BY salary DESC
-      ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) second_highest_salary
+    first_value(employee_name) OVER w highest_salary,
+    nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
+WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
 ORDER BY salary DESC
 -- !query schema
-struct<employee_name:string,salary:int,second_highest_salary:string>
+struct<employee_name:string,salary:int,highest_salary:string,second_highest_salary:string>
 -- !query output
-Larry Bott	11798	Gerard Bondur
-Gerard Bondur	11472	Gerard Bondur
-Pamela Castillo	11303	Gerard Bondur
-Barry Jones	10586	Gerard Bondur
-George Vanauf	10563	Gerard Bondur
-Loui Bondur	10449	Gerard Bondur
-Mary Patterson	9998	Gerard Bondur
-Steve Patterson	9441	Gerard Bondur
-Julie Firrelli	9181	Gerard Bondur
-Jeff Firrelli	8992	Gerard Bondur
-William Patterson	8870	Gerard Bondur
-Diane Murphy	8435	Gerard Bondur
-Leslie Jennings	8113	Gerard Bondur
-Gerard Hernandez	6949	Gerard Bondur
-Foon Yue Tseng	6660	Gerard Bondur
-Anthony Bow	6627	Gerard Bondur
-Leslie Thompson	5186	Gerard Bondur
+Larry Bott	11798	Larry Bott	Gerard Bondur
+Gerard Bondur	11472	Larry Bott	Gerard Bondur
+Pamela Castillo	11303	Larry Bott	Gerard Bondur
+Barry Jones	10586	Larry Bott	Gerard Bondur
+George Vanauf	10563	Larry Bott	Gerard Bondur
+Loui Bondur	10449	Larry Bott	Gerard Bondur
+Mary Patterson	9998	Larry Bott	Gerard Bondur
+Steve Patterson	9441	Larry Bott	Gerard Bondur
+Julie Firrelli	9181	Larry Bott	Gerard Bondur
+Jeff Firrelli	8992	Larry Bott	Gerard Bondur
+William Patterson	8870	Larry Bott	Gerard Bondur
+Diane Murphy	8435	Larry Bott	Gerard Bondur
+Leslie Jennings	8113	Larry Bott	Gerard Bondur
+Gerard Hernandez	6949	Larry Bott	Gerard Bondur
+Foon Yue Tseng	6660	Larry Bott	Gerard Bondur
+Anthony Bow	6627	Larry Bott	Gerard Bondur
+Leslie Thompson	5186	Larry Bott	Gerard Bondur
 
 
 -- !query
@@ -708,31 +710,33 @@ SELECT
 	employee_name,
 	department,
 	salary,
-	NTH_VALUE(employee_name, 2) OVER  (
-		PARTITION BY department
-		ORDER BY salary DESC
-		RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
-	) second_highest_salary
+	FIRST_VALUE(employee_name) OVER w highest_salary,
+	NTH_VALUE(employee_name, 2) OVER w second_highest_salary
 FROM
 	basic_pays
+WINDOW w AS (
+  PARTITION BY department
+  ORDER BY salary DESC
+  RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+)
 ORDER BY department
 -- !query schema
-struct<employee_name:string,department:string,salary:int,second_highest_salary:string>
--- !query output
-Gerard Bondur	Accounting	11472	Mary Patterson
-Mary Patterson	Accounting	9998	Mary Patterson
-Jeff Firrelli	Accounting	8992	Mary Patterson
-William Patterson	Accounting	8870	Mary Patterson
-Diane Murphy	Accounting	8435	Mary Patterson
-Anthony Bow	Accounting	6627	Mary Patterson
-Leslie Jennings	IT	8113	Leslie Thompson
-Leslie Thompson	IT	5186	Leslie Thompson
-Larry Bott	SCM	11798	Pamela Castillo
-Pamela Castillo	SCM	11303	Pamela Castillo
-Barry Jones	SCM	10586	Pamela Castillo
-Loui Bondur	SCM	10449	Pamela Castillo
-Gerard Hernandez	SCM	6949	Pamela Castillo
-George Vanauf	Sales	10563	Steve Patterson
-Steve Patterson	Sales	9441	Steve Patterson
-Julie Firrelli	Sales	9181	Steve Patterson
-Foon Yue Tseng	Sales	6660	Steve Patterson
\ No newline at end of file
+struct<employee_name:string,department:string,salary:int,highest_salary:string,second_highest_salary:string>
+-- !query output
+Gerard Bondur	Accounting	11472	Gerard Bondur	Mary Patterson
+Mary Patterson	Accounting	9998	Gerard Bondur	Mary Patterson
+Jeff Firrelli	Accounting	8992	Gerard Bondur	Mary Patterson
+William Patterson	Accounting	8870	Gerard Bondur	Mary Patterson
+Diane Murphy	Accounting	8435	Gerard Bondur	Mary Patterson
+Anthony Bow	Accounting	6627	Gerard Bondur	Mary Patterson
+Leslie Jennings	IT	8113	Leslie Jennings	Leslie Thompson
+Leslie Thompson	IT	5186	Leslie Jennings	Leslie Thompson
+Larry Bott	SCM	11798	Larry Bott	Pamela Castillo
+Pamela Castillo	SCM	11303	Larry Bott	Pamela Castillo
+Barry Jones	SCM	10586	Larry Bott	Pamela Castillo
+Loui Bondur	SCM	10449	Larry Bott	Pamela Castillo
+Gerard Hernandez	SCM	6949	Larry Bott	Pamela Castillo
+George Vanauf	Sales	10563	George Vanauf	Steve Patterson
+Steve Patterson	Sales	9441	George Vanauf	Steve Patterson
+Julie Firrelli	Sales	9181	George Vanauf	Steve Patterson
+Foon Yue Tseng	Sales	6660	George Vanauf	Steve Patterson
\ No newline at end of file

From 1baf0d5c9b481622d5a811fd600f680b0cc3229f Mon Sep 17 00:00:00 2001
From: Linhong Liu <67896261+linhongliu-db@users.noreply.github.com>
Date: Fri, 13 Nov 2020 01:10:28 +0900
Subject: [PATCH 0459/1009] [SPARK-33140][SQL][FOLLOW-UP] change val to def in
 object rule

### What changes were proposed in this pull request?
In #30097, many rules changed from case class to object, but if the rule
is stateful, there will be a problem. For example, if an object rule uses a
`val` to refer to a config, it will be unchanged after initialization even if
other spark session uses a different config value.

### Why are the changes needed?
Avoid potential bug

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing UT

Closes #30354 from linhongliu-db/SPARK-33140-followup-2.

Lead-authored-by: Linhong Liu <67896261+linhongliu-db@users.noreply.github.com>
Co-authored-by: Linhong Liu <linhong.liu@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../org/apache/spark/sql/catalyst/analysis/ResolveHints.scala | 4 ++--
 .../spark/sql/catalyst/analysis/higherOrderFunctions.scala    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
index f1706c11e92ec..b44ca20e74bb0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
@@ -53,7 +53,7 @@ object ResolveHints {
   object ResolveJoinStrategyHints extends Rule[LogicalPlan] {
     private val STRATEGY_HINT_NAMES = JoinStrategyHint.strategies.flatMap(_.hintAliases)
 
-    private val hintErrorHandler = conf.hintErrorHandler
+    private def hintErrorHandler = conf.hintErrorHandler
 
     def resolver: Resolver = conf.resolver
 
@@ -268,7 +268,7 @@ object ResolveHints {
    */
   class RemoveAllHints extends Rule[LogicalPlan] {
 
-    private val hintErrorHandler = conf.hintErrorHandler
+    private def hintErrorHandler = conf.hintErrorHandler
 
     def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperatorsUp {
       case h: UnresolvedHint =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
index e10af3d5cc68d..51eb3d033ddc4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
@@ -74,7 +74,7 @@ object ResolveLambdaVariables extends Rule[LogicalPlan] {
 
   type LambdaVariableMap = Map[String, NamedExpression]
 
-  private val canonicalizer = {
+  private def canonicalizer = {
     if (!conf.caseSensitiveAnalysis) {
       // scalastyle:off caselocale
       s: String => s.toLowerCase

From cf3b6551ce010a5503d6c624e313690cd2058855 Mon Sep 17 00:00:00 2001
From: Chao Sun <sunchao@apple.com>
Date: Thu, 12 Nov 2020 15:22:56 -0800
Subject: [PATCH 0460/1009] [SPARK-33435][SQL] DSv2: REFRESH TABLE should
 invalidate caches referencing the table

### What changes were proposed in this pull request?

This changes `RefreshTableExec` in DSv2 to also invalidate caches with references to the target table to be refreshed. The change itself is similar to what's done in #30211. Note that though, since we currently don't support caching a DSv2 table directly, this doesn't add recache logic as in the DSv1 impl. I marked it as a TODO for now.

### Why are the changes needed?

Currently the behavior in DSv1 and DSv2 is inconsistent w.r.t refreshing table: in DSv1 we invalidate both metadata cache as well as all table caches that are related to the table, but in DSv2 we only do the former. This addresses the issue and make the behavior consistent.

### Does this PR introduce _any_ user-facing change?

Yes, now refreshing a v2 table also invalidate all the related caches.

### How was this patch tested?

Added a new UT.

Closes #30359 from sunchao/SPARK-33435.

Authored-by: Chao Sun <sunchao@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../datasources/v2/DataSourceV2Strategy.scala      |  2 +-
 .../datasources/v2/RefreshTableExec.scala          | 11 ++++++++++-
 .../spark/sql/connector/DataSourceV2SQLSuite.scala | 14 ++++++++++++++
 3 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index a82f86ea952d9..21abfc2816ee4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -128,7 +128,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       }
 
     case RefreshTable(r: ResolvedTable) =>
-      RefreshTableExec(r.catalog, r.identifier) :: Nil
+      RefreshTableExec(session, r.catalog, r.table, r.identifier) :: Nil
 
     case ReplaceTable(catalog, ident, schema, parts, props, orCreate) =>
       val propsWithOwner = CatalogV2Util.withDefaultOwnership(props)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RefreshTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RefreshTableExec.scala
index 2a19ff304a9e0..52836de5a926b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RefreshTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RefreshTableExec.scala
@@ -17,15 +17,24 @@
 
 package org.apache.spark.sql.execution.datasources.v2
 
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
+import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog}
 
 case class RefreshTableExec(
+    session: SparkSession,
     catalog: TableCatalog,
+    table: Table,
     ident: Identifier) extends V2CommandExec {
   override protected def run(): Seq[InternalRow] = {
     catalog.invalidateTable(ident)
+
+    // invalidate all caches referencing the given table
+    // TODO(SPARK-33437): re-cache the table itself once we support caching a DSv2 table
+    val v2Relation = DataSourceV2Relation.create(table, Some(catalog), Some(ident))
+    session.sharedState.cacheManager.uncacheQuery(session, v2Relation, cascade = true)
+
     Seq.empty
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index c480df323ddc2..db3f11dbda51a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -1650,6 +1650,20 @@ class DataSourceV2SQLSuite
     }
   }
 
+  test("SPARK-33435: REFRESH TABLE should invalidate all caches referencing the table") {
+    val tblName = "testcat.ns.t"
+    withTable(tblName) {
+      withTempView("t") {
+        sql(s"CREATE TABLE $tblName (id bigint) USING foo")
+        sql(s"CACHE TABLE t AS SELECT id FROM $tblName")
+
+        assert(spark.sharedState.cacheManager.lookupCachedData(spark.table("t")).isDefined)
+        sql(s"REFRESH TABLE $tblName")
+        assert(spark.sharedState.cacheManager.lookupCachedData(spark.table("t")).isEmpty)
+      }
+    }
+  }
+
   test("REPLACE TABLE: v1 table") {
     val e = intercept[AnalysisException] {
       sql(s"CREATE OR REPLACE TABLE tbl (a int) USING ${classOf[SimpleScanSource].getName}")

From 2c64b731ae6a976b0d75a95901db849b4a0e2393 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 12 Nov 2020 15:31:57 -0800
Subject: [PATCH 0461/1009] [SPARK-33259][SS] Disable streaming query with
 possible correctness issue by default

### What changes were proposed in this pull request?

This patch proposes to disable the streaming query with possible correctness issue in chained stateful operators. The behavior can be controlled by a SQL config, so if users understand the risk and still want to run the query, they can disable the check.

### Why are the changes needed?

The possible correctness in chained stateful operators in streaming query is not straightforward for users. From users perspective, it will be considered as a Spark bug. It is also possible the worse case, users are not aware of the correctness issue and use wrong results.

A better approach should be to disable such queries and let users choose to run the query if they understand there is such risk, instead of implicitly running the query and let users to find out correctness issue by themselves and report this known to Spark community.

### Does this PR introduce _any_ user-facing change?

Yes. Streaming query with possible correctness issue will be blocked to run, except for users explicitly disable the SQL config.

### How was this patch tested?

Unit test.

Closes #30210 from viirya/SPARK-33259.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 docs/ss-migration-guide.md                    |  6 +++-
 .../UnsupportedOperationChecker.scala         | 19 ++++++++---
 .../apache/spark/sql/internal/SQLConf.scala   | 18 ++++++++++
 .../analysis/UnsupportedOperationsSuite.scala | 34 +++++++++++++------
 .../FlatMapGroupsWithStateSuite.scala         |  4 ++-
 5 files changed, 64 insertions(+), 17 deletions(-)

diff --git a/docs/ss-migration-guide.md b/docs/ss-migration-guide.md
index 002058b69bf30..d52b2e095fc76 100644
--- a/docs/ss-migration-guide.md
+++ b/docs/ss-migration-guide.md
@@ -26,10 +26,14 @@ Note that this migration guide describes the items specific to Structured Stream
 Many items of SQL migration can be applied when migrating Structured Streaming to higher versions.
 Please refer [Migration Guide: SQL, Datasets and DataFrame](sql-migration-guide.html).
 
+## Upgrading from Structured Streaming 3.0 to 3.1
+
+- In Spark 3.0 and before, for the queries that have stateful operation which can emit rows older than the current watermark plus allowed late record delay, which are "late rows" in downstream stateful operations and these rows can be discarded, Spark only prints a warning message. Since Spark 3.1, Spark will check for such queries with possible correctness issue and throw AnalysisException for it by default. For the users who understand the possible risk of correctness issue and still decide to run the query, please disable this check by setting the config `spark.sql.streaming.statefulOperator.checkCorrectness.enabled` to false.
+
 ## Upgrading from Structured Streaming 2.4 to 3.0
 
 - In Spark 3.0, Structured Streaming forces the source schema into nullable when file-based datasources such as text, json, csv, parquet and orc are used via `spark.readStream(...)`. Previously, it respected the nullability in source schema; however, it caused issues tricky to debug with NPE. To restore the previous behavior, set `spark.sql.streaming.fileSource.schema.forceNullable` to `false`.
 
 - Spark 3.0 fixes the correctness issue on Stream-stream outer join, which changes the schema of state. (See [SPARK-26154](https://issues.apache.org/jira/browse/SPARK-26154) for more details). If you start your query from checkpoint constructed from Spark 2.x which uses stream-stream outer join, Spark 3.0 fails the query. To recalculate outputs, discard the checkpoint and replay previous inputs.
 
-- In Spark 3.0, the deprecated class `org.apache.spark.sql.streaming.ProcessingTime` has been removed. Use `org.apache.spark.sql.streaming.Trigger.ProcessingTime` instead. Likewise, `org.apache.spark.sql.execution.streaming.continuous.ContinuousTrigger` has been removed in favor of `Trigger.Continuous`, and `org.apache.spark.sql.execution.streaming.OneTimeTrigger` has been hidden in favor of `Trigger.Once`.
\ No newline at end of file
+- In Spark 3.0, the deprecated class `org.apache.spark.sql.streaming.ProcessingTime` has been removed. Use `org.apache.spark.sql.streaming.Trigger.ProcessingTime` instead. Likewise, `org.apache.spark.sql.execution.streaming.continuous.ContinuousTrigger` has been removed in favor of `Trigger.Continuous`, and `org.apache.spark.sql.execution.streaming.OneTimeTrigger` has been hidden in favor of `Trigger.Once`.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index 809323455652e..814ea8c9768ae 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.OutputMode
 
 /**
@@ -40,10 +41,15 @@ object UnsupportedOperationChecker extends Logging {
     }
   }
 
+  /**
+   * Checks for possible correctness issue in chained stateful operators. The behavior is
+   * controlled by SQL config `spark.sql.streaming.statefulOperator.checkCorrectness.enabled`.
+   * Once it is enabled, an analysis exception will be thrown. Otherwise, Spark will just
+   * print a warning message.
+   */
   def checkStreamingQueryGlobalWatermarkLimit(
       plan: LogicalPlan,
-      outputMode: OutputMode,
-      failWhenDetected: Boolean): Unit = {
+      outputMode: OutputMode): Unit = {
     def isStatefulOperationPossiblyEmitLateRows(p: LogicalPlan): Boolean = p match {
       case s: Aggregate
         if s.isStreaming && outputMode == InternalOutputModes.Append => true
@@ -62,6 +68,8 @@ object UnsupportedOperationChecker extends Logging {
       case _ => false
     }
 
+    val failWhenDetected = SQLConf.get.statefulOperatorCorrectnessCheckEnabled
+
     try {
       plan.foreach { subPlan =>
         if (isStatefulOperation(subPlan)) {
@@ -73,7 +81,10 @@ object UnsupportedOperationChecker extends Logging {
               "The query contains stateful operation which can emit rows older than " +
               "the current watermark plus allowed late record delay, which are \"late rows\"" +
               " in downstream stateful operations and these rows can be discarded. " +
-              "Please refer the programming guide doc for more details."
+              "Please refer the programming guide doc for more details. If you understand " +
+              "the possible risk of correctness issue and still need to run the query, " +
+              "you can disable this check by setting the config " +
+              "`spark.sql.streaming.statefulOperator.checkCorrectness.enabled` to false."
             throwError(errorMsg)(plan)
           }
         }
@@ -388,7 +399,7 @@ object UnsupportedOperationChecker extends Logging {
       checkUnsupportedExpressions(subPlan)
     }
 
-    checkStreamingQueryGlobalWatermarkLimit(plan, outputMode, failWhenDetected = false)
+    checkStreamingQueryGlobalWatermarkLimit(plan, outputMode)
   }
 
   def checkForContinuous(plan: LogicalPlan, outputMode: OutputMode): Unit = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index ef988052affcd..546b199950a21 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1382,6 +1382,21 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  val STATEFUL_OPERATOR_CHECK_CORRECTNESS_ENABLED =
+    buildConf("spark.sql.streaming.statefulOperator.checkCorrectness.enabled")
+      .internal()
+      .doc("When true, the stateful operators for streaming query will be checked for possible " +
+        "correctness issue due to global watermark. The correctness issue comes from queries " +
+        "containing stateful operation which can emit rows older than the current watermark " +
+        "plus allowed late record delay, which are \"late rows\" in downstream stateful " +
+        "operations and these rows can be discarded. Please refer the programming guide doc for " +
+        "more details. Once the issue is detected, Spark will throw analysis exception. " +
+        "When this config is disabled, Spark will just print warning message for users. " +
+        "Prior to Spark 3.1.0, the behavior is disabling this config.")
+      .version("3.1.0")
+      .booleanConf
+      .createWithDefault(true)
+
   val VARIABLE_SUBSTITUTE_ENABLED =
     buildConf("spark.sql.variable.substitute")
       .doc("This enables substitution using syntax like `${var}`, `${system:var}`, " +
@@ -3017,6 +3032,9 @@ class SQLConf extends Serializable with Logging {
 
   def isUnsupportedOperationCheckEnabled: Boolean = getConf(UNSUPPORTED_OPERATION_CHECK_ENABLED)
 
+  def statefulOperatorCorrectnessCheckEnabled: Boolean =
+    getConf(STATEFUL_OPERATOR_CHECK_CORRECTNESS_ENABLED)
+
   def streamingFileCommitProtocolClass: String = getConf(STREAMING_FILE_COMMIT_PROTOCOL_CLASS)
 
   def fileSinkLogDeletion: Boolean = getConf(FILE_SINK_LOG_DELETION)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
index b9943a9744985..21dde3ca8ca51 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.Count
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical.{FlatMapGroupsWithState, _}
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.{IntegerType, LongType, MetadataBuilder}
 import org.apache.spark.unsafe.types.CalendarInterval
@@ -36,7 +37,7 @@ import org.apache.spark.unsafe.types.CalendarInterval
 /** A dummy command for testing unsupported operations. */
 case class DummyCommand() extends Command
 
-class UnsupportedOperationsSuite extends SparkFunSuite {
+class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
 
   val attribute = AttributeReference("a", IntegerType, nullable = true)()
   val watermarkMetadata = new MetadataBuilder()
@@ -218,6 +219,7 @@ class UnsupportedOperationsSuite extends SparkFunSuite {
     expectedMsgs = Seq("flatMapGroupsWithState in append mode", "update"))
 
   // FlatMapGroupsWithState(Append) in streaming with aggregation
+  // Only supported when `spark.sql.streaming.statefulOperator.correctnessCheck` is disabled.
   for (outputMode <- Seq(Append, Update, Complete)) {
     assertSupportedInStreamingPlan(
       "flatMapGroupsWithState - flatMapGroupsWithState(Append) " +
@@ -228,7 +230,8 @@ class UnsupportedOperationsSuite extends SparkFunSuite {
         FlatMapGroupsWithState(
           null, att, att, Seq(att), Seq(att), att, null, Append, isMapGroupsWithState = false, null,
           streamRelation)),
-      outputMode = outputMode)
+      outputMode = outputMode,
+      SQLConf.STATEFUL_OPERATOR_CHECK_CORRECTNESS_ENABLED.key -> "false")
   }
 
   for (outputMode <- Seq(Append, Update)) {
@@ -268,6 +271,7 @@ class UnsupportedOperationsSuite extends SparkFunSuite {
   }
 
   // multiple FlatMapGroupsWithStates
+  // Only supported when `spark.sql.streaming.statefulOperator.correctnessCheck` is disabled.
   assertSupportedInStreamingPlan(
     "flatMapGroupsWithState - multiple flatMapGroupsWithStates on streaming relation and all are " +
       "in append mode",
@@ -275,7 +279,8 @@ class UnsupportedOperationsSuite extends SparkFunSuite {
       isMapGroupsWithState = false, null,
       FlatMapGroupsWithState(null, att, att, Seq(att), Seq(att), att, null, Append,
         isMapGroupsWithState = false, null, streamRelation)),
-    outputMode = Append)
+    outputMode = Append,
+    SQLConf.STATEFUL_OPERATOR_CHECK_CORRECTNESS_ENABLED.key -> "false")
 
   assertNotSupportedInStreamingPlan(
     "flatMapGroupsWithState -  multiple flatMapGroupsWithStates on s streaming relation but some" +
@@ -995,9 +1000,12 @@ class UnsupportedOperationsSuite extends SparkFunSuite {
   def assertSupportedInStreamingPlan(
       name: String,
       plan: LogicalPlan,
-      outputMode: OutputMode): Unit = {
+      outputMode: OutputMode,
+      configs: (String, String)*): Unit = {
     test(s"streaming plan - $name: supported") {
-      UnsupportedOperationChecker.checkForStreaming(wrapInStreaming(plan), outputMode)
+      withSQLConf(configs: _*) {
+        UnsupportedOperationChecker.checkForStreaming(wrapInStreaming(plan), outputMode)
+      }
     }
   }
 
@@ -1070,14 +1078,18 @@ class UnsupportedOperationsSuite extends SparkFunSuite {
       expectFailure: Boolean): Unit = {
     test(s"Global watermark limit - $testNamePostfix") {
       if (expectFailure) {
-        val e = intercept[AnalysisException] {
-          UnsupportedOperationChecker.checkStreamingQueryGlobalWatermarkLimit(
-            wrapInStreaming(plan), outputMode, failWhenDetected = true)
+        withSQLConf(SQLConf.STATEFUL_OPERATOR_CHECK_CORRECTNESS_ENABLED.key -> "true") {
+          val e = intercept[AnalysisException] {
+            UnsupportedOperationChecker.checkStreamingQueryGlobalWatermarkLimit(
+              wrapInStreaming(plan), outputMode)
+          }
+          assert(e.message.contains("Detected pattern of possible 'correctness' issue"))
         }
-        assert(e.message.contains("Detected pattern of possible 'correctness' issue"))
       } else {
-        UnsupportedOperationChecker.checkStreamingQueryGlobalWatermarkLimit(
-          wrapInStreaming(plan), outputMode, failWhenDetected = true)
+        withSQLConf(SQLConf.STATEFUL_OPERATOR_CHECK_CORRECTNESS_ENABLED.key -> "false") {
+          UnsupportedOperationChecker.checkStreamingQueryGlobalWatermarkLimit(
+            wrapInStreaming(plan), outputMode)
+        }
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
index 2efd715b7731c..f97c9386f9488 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
@@ -1324,7 +1324,9 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest {
   def testWithAllStateVersions(name: String)(func: => Unit): Unit = {
     for (version <- FlatMapGroupsWithStateExecHelper.supportedVersions) {
       test(s"$name - state format version $version") {
-        withSQLConf(SQLConf.FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION.key -> version.toString) {
+        withSQLConf(
+            SQLConf.FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION.key -> version.toString,
+            SQLConf.STATEFUL_OPERATOR_CHECK_CORRECTNESS_ENABLED.key -> "false") {
           func
         }
       }

From 539c2deb896d0adb9bbd63fc1ef48a31050a6538 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Fri, 13 Nov 2020 05:15:13 +0000
Subject: [PATCH 0462/1009] [SPARK-33426][SQL][TESTS] Unify Hive SHOW TABLES
 tests

### What changes were proposed in this pull request?
1. Create the separate test suite `org.apache.spark.sql.hive.execution.command.ShowTablesSuite`.
2. Re-use V1 SHOW TABLES tests added by https://github.com/apache/spark/pull/30287 in the Hive test suites.
3. Add new test case for the pattern `'table_name_1*|table_name_2*'` in the common test suite.

### Why are the changes needed?
To test V1 + common  SHOW TABLES tests in Hive.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running v1/v2 and Hive v1 `ShowTablesSuite`:
```
$  build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *ShowTablesSuite"
```

Closes #30340 from MaxGekk/show-tables-hive-tests.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 ...sSuite.scala => ShowTablesSuiteBase.scala} | 38 +++++++++++--------
 .../command/v1/ShowTablesSuite.scala          |  7 +++-
 .../command/v2/ShowTablesSuite.scala          |  5 ++-
 .../sql/hive/execution/HiveCommandSuite.scala | 22 -----------
 .../execution/command/ShowTablesSuite.scala   | 26 +++++++++++++
 5 files changed, 56 insertions(+), 42 deletions(-)
 rename sql/core/src/test/scala/org/apache/spark/sql/execution/command/{ShowTablesSuite.scala => ShowTablesSuiteBase.scala} (76%)
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala
similarity index 76%
rename from sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala
index 01720b5723243..49428fab79027 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala
@@ -20,13 +20,13 @@ package org.apache.spark.sql.execution.command
 import org.scalactic.source.Position
 import org.scalatest.Tag
 
-import org.apache.spark.sql.Row
+import org.apache.spark.sql.{QueryTest, Row}
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types.StructType
 
-trait ShowTablesSuite extends SharedSparkSession {
+trait ShowTablesSuiteBase extends QueryTest with SQLTestUtils {
   protected def version: String
   protected def catalog: String
   protected def defaultNamespace: Seq[String]
@@ -39,7 +39,7 @@ trait ShowTablesSuite extends SharedSparkSession {
   protected def runShowTablesSql(sqlText: String, expected: Seq[ShowRow]): Unit = {
     val df = spark.sql(sqlText)
     assert(df.schema === showSchema)
-    assert(df.collect() === getRows(expected))
+    checkAnswer(df, getRows(expected))
   }
 
   override def test(testName: String, testTags: Tag*)(testFun: => Any)
@@ -63,30 +63,36 @@ trait ShowTablesSuite extends SharedSparkSession {
       sql(s"CREATE NAMESPACE $catalog.ns2")
       withTable(
         s"$catalog.ns1.table",
-        s"$catalog.ns1.table_name_1",
-        s"$catalog.ns1.table_name_2",
-        s"$catalog.ns2.table_name_2") {
+        s"$catalog.ns1.table_name_1a",
+        s"$catalog.ns1.table_name_2b",
+        s"$catalog.ns2.table_name_2b") {
         sql(s"CREATE TABLE $catalog.ns1.table (id bigint, data string) $defaultUsing")
-        sql(s"CREATE TABLE $catalog.ns1.table_name_1 (id bigint, data string) $defaultUsing")
-        sql(s"CREATE TABLE $catalog.ns1.table_name_2 (id bigint, data string) $defaultUsing")
-        sql(s"CREATE TABLE $catalog.ns2.table_name_2 (id bigint, data string) $defaultUsing")
+        sql(s"CREATE TABLE $catalog.ns1.table_name_1a (id bigint, data string) $defaultUsing")
+        sql(s"CREATE TABLE $catalog.ns1.table_name_2b (id bigint, data string) $defaultUsing")
+        sql(s"CREATE TABLE $catalog.ns2.table_name_2b (id bigint, data string) $defaultUsing")
 
         runShowTablesSql(
           s"SHOW TABLES FROM $catalog.ns1",
           Seq(
             ShowRow("ns1", "table", false),
-            ShowRow("ns1", "table_name_1", false),
-            ShowRow("ns1", "table_name_2", false)))
+            ShowRow("ns1", "table_name_1a", false),
+            ShowRow("ns1", "table_name_2b", false)))
 
         runShowTablesSql(
           s"SHOW TABLES FROM $catalog.ns1 LIKE '*name*'",
           Seq(
-            ShowRow("ns1", "table_name_1", false),
-            ShowRow("ns1", "table_name_2", false)))
+            ShowRow("ns1", "table_name_1a", false),
+            ShowRow("ns1", "table_name_2b", false)))
 
         runShowTablesSql(
-          s"SHOW TABLES FROM $catalog.ns1 LIKE '*2'",
-          Seq(ShowRow("ns1", "table_name_2", false)))
+          s"SHOW TABLES FROM $catalog.ns1 LIKE 'table_name_1*|table_name_2*'",
+          Seq(
+            ShowRow("ns1", "table_name_1a", false),
+            ShowRow("ns1", "table_name_2b", false)))
+
+        runShowTablesSql(
+          s"SHOW TABLES FROM $catalog.ns1 LIKE '*2b'",
+          Seq(ShowRow("ns1", "table_name_2b", false)))
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
index feb3bc623f3fa..d2332818d9546 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
@@ -20,10 +20,11 @@ package org.apache.spark.sql.execution.command.v1
 import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException
 import org.apache.spark.sql.connector.catalog.CatalogManager
-import org.apache.spark.sql.execution.command.{ShowTablesSuite => CommonShowTablesSuite}
+import org.apache.spark.sql.execution.command
+import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{BooleanType, StringType, StructType}
 
-class ShowTablesSuite extends CommonShowTablesSuite {
+trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase {
   override def version: String = "V1"
   override def catalog: String = CatalogManager.SESSION_CATALOG_NAME
   override def defaultNamespace: Seq[String] = Seq("default")
@@ -93,3 +94,5 @@ class ShowTablesSuite extends CommonShowTablesSuite {
     }
   }
 }
+
+class ShowTablesSuite extends ShowTablesSuiteBase with SharedSparkSession
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala
index 668120ae1cada..c7f68863a1791 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala
@@ -21,10 +21,11 @@ import org.apache.spark.SparkConf
 import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException
 import org.apache.spark.sql.connector.InMemoryTableCatalog
-import org.apache.spark.sql.execution.command.{ShowTablesSuite => CommonShowTablesSuite}
+import org.apache.spark.sql.execution.command
+import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{StringType, StructType}
 
-class ShowTablesSuite extends CommonShowTablesSuite {
+class ShowTablesSuite extends command.ShowTablesSuiteBase with SharedSparkSession {
   override def version: String = "V2"
   override def catalog: String = "test_catalog"
   override def defaultNamespace: Seq[String] = Nil
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
index dcec8bf5c0cc6..a78fd506b752e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
@@ -95,28 +95,6 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
     }
   }
 
-  test("show tables") {
-    withTable("show1a", "show2b") {
-      sql("CREATE TABLE show1a(c1 int)")
-      sql("CREATE TABLE show2b(c2 int)")
-      checkAnswer(
-        sql("SHOW TABLES IN default 'show1*'"),
-        Row("default", "show1a", false) :: Nil)
-      checkAnswer(
-        sql("SHOW TABLES IN default 'show1*|show2*'"),
-        Row("default", "show1a", false) ::
-          Row("default", "show2b", false) :: Nil)
-      checkAnswer(
-        sql("SHOW TABLES 'show1*|show2*'"),
-        Row("default", "show1a", false) ::
-          Row("default", "show2b", false) :: Nil)
-      assert(
-        sql("SHOW TABLES").count() >= 2)
-      assert(
-        sql("SHOW TABLES IN default").count() >= 2)
-    }
-  }
-
   test("show views") {
     withView("show1a", "show2b", "global_temp.temp1", "temp2") {
       sql("CREATE VIEW show1a AS SELECT 1 AS id")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala
new file mode 100644
index 0000000000000..836f080d28e75
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution.command
+
+import org.apache.spark.sql.execution.command.v1
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+
+class ShowTablesSuite extends v1.ShowTablesSuiteBase with TestHiveSingleton {
+  override def version: String = "Hive V1"
+  override def defaultUsing: String = "USING HIVE"
+}

From a70a2b02ce7d18947778d37c8fffb3f1b1b5b154 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 12 Nov 2020 21:19:51 -0800
Subject: [PATCH 0463/1009] [SPARK-33439][INFRA] Use SERIAL_SBT_TESTS=1 for SQL
 modules

### What changes were proposed in this pull request?

This PR aims to decrease the parallelism of `SQL` module like `Hive` module.

### Why are the changes needed?

GitHub Action `sql - slow tests` become flaky.
- https://github.com/apache/spark/runs/1393670291
- https://github.com/apache/spark/runs/1393088031

### Does this PR introduce _any_ user-facing change?

No. This is dev-only feature.
Although this will increase the running time, but it's better than flakiness.

### How was this patch tested?

Pass the GitHub Action stably.

Closes #30365 from dongjoon-hyun/SPARK-33439.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .github/workflows/build_and_test.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index e4762523f7018..0918ee111b536 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -139,8 +139,8 @@ jobs:
     # Run the tests.
     - name: Run tests
       run: |
-        # Hive tests become flaky when running in parallel as it's too intensive.
-        if [[ "$MODULES_TO_TEST" == "hive" ]]; then export SERIAL_SBT_TESTS=1; fi
+        # Hive and SQL tests become flaky when running in parallel as it's too intensive.
+        if [[ "$MODULES_TO_TEST" == "hive" ]] || [[ "$MODULES_TO_TEST" == "sql" ]]; then export SERIAL_SBT_TESTS=1; fi
         mkdir -p ~/.m2
         ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
         rm -rf ~/.m2/repository/org/apache/spark

From 82a21d2a3e3d4eafa43802b3034907a1f2725396 Mon Sep 17 00:00:00 2001
From: ulysses <youxiduo@weidian.com>
Date: Fri, 13 Nov 2020 15:57:07 +0900
Subject: [PATCH 0464/1009] [SPARK-33433][SQL] Change Aggregate max rows to 1
 if grouping is empty

### What changes were proposed in this pull request?

Change `Aggregate` max rows to 1 if grouping is empty.

### Why are the changes needed?

If `Aggregate` grouping is empty, the result is always one row.

Then we don't need push down limit in `LimitPushDown` with such case
```
select count(*) from t1
union
select count(*) from t2
limit 1
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Add test.

Closes #30356 from ulysses-you/SPARK-33433.

Authored-by: ulysses <youxiduo@weidian.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../plans/logical/basicLogicalOperators.scala  |  8 +++++++-
 .../optimizer/LimitPushdownSuite.scala         | 18 ++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 223ef652d2f80..17bf704c6d67a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -586,7 +586,13 @@ case class Aggregate(
   }
 
   override def output: Seq[Attribute] = aggregateExpressions.map(_.toAttribute)
-  override def maxRows: Option[Long] = child.maxRows
+  override def maxRows: Option[Long] = {
+    if (groupingExpressions.isEmpty) {
+      Some(1L)
+    } else {
+      child.maxRows
+    }
+  }
 
   override lazy val validConstraints: ExpressionSet = {
     val nonAgg = aggregateExpressions.filter(_.find(_.isInstanceOf[AggregateExpression]).isEmpty)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala
index 17fb9fc5d11e3..d993aee3d7518 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala
@@ -171,4 +171,22 @@ class LimitPushdownSuite extends PlanTest {
     // No pushdown for FULL OUTER JOINS.
     comparePlans(optimized, originalQuery)
   }
+
+  test("SPARK-33433: Change Aggregate max rows to 1 if grouping is empty") {
+    val analyzed1 = Limit(1, Union(
+      x.groupBy()(count(1)),
+      y.groupBy()(count(1)))).analyze
+    val optimized1 = Optimize.execute(analyzed1)
+    comparePlans(analyzed1, optimized1)
+
+    // test push down
+    val analyzed2 = Limit(1, Union(
+      x.groupBy(Symbol("a"))(count(1)),
+      y.groupBy(Symbol("b"))(count(1)))).analyze
+    val optimized2 = Optimize.execute(analyzed2)
+    val expected2 = Limit(1, Union(
+      LocalLimit(1, x.groupBy(Symbol("a"))(count(1))),
+      LocalLimit(1, y.groupBy(Symbol("b"))(count(1))))).analyze
+    comparePlans(expected2, optimized2)
+  }
 }

From cdd8e51742a59ab11ffd45b8f4e893128c43f8d7 Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Fri, 13 Nov 2020 06:58:16 +0000
Subject: [PATCH 0465/1009] [SPARK-33419][SQL] Unexpected behavior when using
 SET commands before a query in SparkSession.sql

### What changes were proposed in this pull request?

SparkSession.sql converts a string value to a DataFrame, and the string value should be one single SQL statement ending up w/ or w/o one or more semicolons. e.g.

```sql
scala> spark.sql(" select 2").show
+---+
|  2|
+---+
|  2|
+---+
scala> spark.sql(" select 2;").show
+---+
|  2|
+---+
|  2|
+---+

scala> spark.sql(" select 2;;;;").show
+---+
|  2|
+---+
|  2|
+---+
```
If we put 2 or more statements in, it fails in the parser as expected, e.g.

```sql
scala> spark.sql(" select 2; select 1;").show
org.apache.spark.sql.catalyst.parser.ParseException:
extraneous input 'select' expecting {<EOF>, ';'}(line 1, pos 11)

== SQL ==
 select 2; select 1;
-----------^^^

  at org.apache.spark.sql.catalyst.parser.ParseException.withCommand(ParseDriver.scala:263)
  at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parse(ParseDriver.scala:130)
  at org.apache.spark.sql.execution.SparkSqlParser.parse(SparkSqlParser.scala:51)
  at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parsePlan(ParseDriver.scala:81)
  at org.apache.spark.sql.SparkSession.$anonfun$sql$2(SparkSession.scala:610)
  at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
  at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:610)
  at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:769)
  at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:607)
  ... 47 elided
```

As a very generic user scenario, users may want to change some settings before they execute
the queries. They may pass a string value like `set spark.sql.abc=2; select 1;` into this API, which creates a confusing gap between the actual effect and the user's expectations.

The user may want the query to be executed with spark.sql.abc=2, but Spark actually treats the whole part of `2; select 1;` as the value of the property 'spark.sql.abc',
e.g.

```
scala> spark.sql("set spark.sql.abc=2; select 1;").show
+-------------+------------+
|          key|       value|
+-------------+------------+
|spark.sql.abc|2; select 1;|
+-------------+------------+
```

What's more, the SET symbol could digest everything behind it, which makes it unstable from version to version, e.g.

#### 3.1
```sql
scala> spark.sql("set;").show
org.apache.spark.sql.catalyst.parser.ParseException:
Expected format is 'SET', 'SET key', or 'SET key=value'. If you want to include special characters in key, please use quotes, e.g., SET `ke y`=value.(line 1, pos 0)

== SQL ==
set;
^^^

  at org.apache.spark.sql.execution.SparkSqlAstBuilder.$anonfun$visitSetConfiguration$1(SparkSqlParser.scala:83)
  at org.apache.spark.sql.catalyst.parser.ParserUtils$.withOrigin(ParserUtils.scala:113)
  at org.apache.spark.sql.execution.SparkSqlAstBuilder.visitSetConfiguration(SparkSqlParser.scala:72)
  at org.apache.spark.sql.execution.SparkSqlAstBuilder.visitSetConfiguration(SparkSqlParser.scala:58)
  at org.apache.spark.sql.catalyst.parser.SqlBaseParser$SetConfigurationContext.accept(SqlBaseParser.java:2161)
  at org.antlr.v4.runtime.tree.AbstractParseTreeVisitor.visit(AbstractParseTreeVisitor.java:18)
  at org.apache.spark.sql.catalyst.parser.AstBuilder.$anonfun$visitSingleStatement$1(AstBuilder.scala:77)
  at org.apache.spark.sql.catalyst.parser.ParserUtils$.withOrigin(ParserUtils.scala:113)
  at org.apache.spark.sql.catalyst.parser.AstBuilder.visitSingleStatement(AstBuilder.scala:77)
  at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.$anonfun$parsePlan$1(ParseDriver.scala:82)
  at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parse(ParseDriver.scala:113)
  at org.apache.spark.sql.execution.SparkSqlParser.parse(SparkSqlParser.scala:51)
  at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parsePlan(ParseDriver.scala:81)
  at org.apache.spark.sql.SparkSession.$anonfun$sql$2(SparkSession.scala:610)
  at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
  at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:610)
  at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:769)
  at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:607)
  ... 47 elided

scala> spark.sql("set a;").show
org.apache.spark.sql.catalyst.parser.ParseException:
Expected format is 'SET', 'SET key', or 'SET key=value'. If you want to include special characters in key, please use quotes, e.g., SET `ke y`=value.(line 1, pos 0)

== SQL ==
set a;
^^^

  at org.apache.spark.sql.execution.SparkSqlAstBuilder.$anonfun$visitSetConfiguration$1(SparkSqlParser.scala:83)
  at org.apache.spark.sql.catalyst.parser.ParserUtils$.withOrigin(ParserUtils.scala:113)
  at org.apache.spark.sql.execution.SparkSqlAstBuilder.visitSetConfiguration(SparkSqlParser.scala:72)
  at org.apache.spark.sql.execution.SparkSqlAstBuilder.visitSetConfiguration(SparkSqlParser.scala:58)
  at org.apache.spark.sql.catalyst.parser.SqlBaseParser$SetConfigurationContext.accept(SqlBaseParser.java:2161)
  at org.antlr.v4.runtime.tree.AbstractParseTreeVisitor.visit(AbstractParseTreeVisitor.java:18)
  at org.apache.spark.sql.catalyst.parser.AstBuilder.$anonfun$visitSingleStatement$1(AstBuilder.scala:77)
  at org.apache.spark.sql.catalyst.parser.ParserUtils$.withOrigin(ParserUtils.scala:113)
  at org.apache.spark.sql.catalyst.parser.AstBuilder.visitSingleStatement(AstBuilder.scala:77)
  at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.$anonfun$parsePlan$1(ParseDriver.scala:82)
  at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parse(ParseDriver.scala:113)
  at org.apache.spark.sql.execution.SparkSqlParser.parse(SparkSqlParser.scala:51)
  at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parsePlan(ParseDriver.scala:81)
  at org.apache.spark.sql.SparkSession.$anonfun$sql$2(SparkSession.scala:610)
  at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
  at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:610)
  at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:769)
  at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:607)
  ... 47 elided
```

#### 2.4

```sql
scala> spark.sql("set;").show
+---+-----------+
|key|      value|
+---+-----------+
|  ;|<undefined>|
+---+-----------+

scala> spark.sql("set a;").show
+---+-----------+
|key|      value|
+---+-----------+
| a;|<undefined>|
+---+-----------+
```

In this PR,
1.  make `set spark.sql.abc=2; select 1;` in `SparkSession.sql` fail directly, user should call `.sql` for each statement separately.
2. make the semicolon as the separator of statements, and if users want to use it as part of the property value, shall use quotes too.

### Why are the changes needed?

1. disambiguation for  `SparkSession.sql`
2. make semicolon work same both w/ `SET` and other statements

### Does this PR introduce _any_ user-facing change?

yes,
the semicolon works as a separator of statements now, it will be trimmed if it is at the end of the statement and fail the statement if it is in the middle. you need to use quotes if you want it to be part of the property value

### How was this patch tested?

new tests

Closes #30332 from yaooqinn/SPARK-33419.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/parser/SqlBase.g4      |  6 +++
 .../sql/catalyst/parser/ParserUtils.scala     | 11 ++++
 .../spark/sql/execution/SparkSqlParser.scala  | 35 +++++++++----
 .../sql/execution/SparkSqlParserSuite.scala   | 52 +++++++++++++++++--
 4 files changed, 90 insertions(+), 14 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index ad0de528708a4..6b6b751cc3c15 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -246,7 +246,9 @@ statement
     | SET TIME ZONE interval                                           #setTimeZone
     | SET TIME ZONE timezone=(STRING | LOCAL)                          #setTimeZone
     | SET TIME ZONE .*?                                                #setTimeZone
+    | SET configKey EQ configValue                                     #setQuotedConfiguration
     | SET configKey (EQ .*?)?                                          #setQuotedConfiguration
+    | SET .*? EQ configValue                                           #setQuotedConfiguration
     | SET .*?                                                          #setConfiguration
     | RESET configKey                                                  #resetQuotedConfiguration
     | RESET .*?                                                        #resetConfiguration
@@ -257,6 +259,10 @@ configKey
     : quotedIdentifier
     ;
 
+configValue
+    : quotedIdentifier
+    ;
+
 unsupportedHiveNativeCommands
     : kw1=CREATE kw2=ROLE
     | kw1=DROP kw2=ROLE
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
index f2dab941cb8b2..1f32620e54902 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
@@ -71,6 +71,17 @@ object ParserUtils {
     stream.getText(interval)
   }
 
+  /**
+   * Get all the text which between the given start and end tokens.
+   * When we need to extract everything between two tokens including all spaces we should use
+   * this method instead of defined a named Antlr4 rule for .*?,
+   * which somehow parse "a b" -> "ab" in some cases
+   */
+  def interval(start: Token, end: Token): String = {
+    val interval = Interval.of(start.getStopIndex + 1, end.getStartIndex - 1)
+    start.getInputStream.getText(interval)
+  }
+
   /** Convert a string token into a string. */
   def string(token: Token): String = unescapeSQLString(token.getText)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index f46526d419158..b28effbcb5514 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -58,8 +58,9 @@ class SparkSqlParser(conf: SQLConf) extends AbstractSqlParser(conf) {
 class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
   import org.apache.spark.sql.catalyst.parser.ParserUtils._
 
-  private val configKeyValueDef = """([a-zA-Z_\d\\.:]+)\s*=(.*)""".r
+  private val configKeyValueDef = """([a-zA-Z_\d\\.:]+)\s*=([^;]*);*""".r
   private val configKeyDef = """([a-zA-Z_\d\\.:]+)$""".r
+  private val configValueDef = """([^;]*);*""".r
 
   /**
    * Create a [[SetCommand]] logical plan.
@@ -79,18 +80,34 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
       case s if s.isEmpty =>
         SetCommand(None)
       case _ => throw new ParseException("Expected format is 'SET', 'SET key', or " +
-        "'SET key=value'. If you want to include special characters in key, " +
-        "please use quotes, e.g., SET `ke y`=value.", ctx)
+        "'SET key=value'. If you want to include special characters in key, or include semicolon " +
+        "in value, please use quotes, e.g., SET `ke y`=`v;alue`.", ctx)
     }
   }
 
-  override def visitSetQuotedConfiguration(ctx: SetQuotedConfigurationContext)
-    : LogicalPlan = withOrigin(ctx) {
-    val keyStr = ctx.configKey().getText
-    if (ctx.EQ() != null) {
-      SetCommand(Some(keyStr -> Option(remainder(ctx.EQ().getSymbol).trim)))
+  override def visitSetQuotedConfiguration(
+      ctx: SetQuotedConfigurationContext): LogicalPlan = withOrigin(ctx) {
+    if (ctx.configValue() != null && ctx.configKey() != null) {
+      SetCommand(Some(ctx.configKey().getText -> Option(ctx.configValue().getText)))
+    } else if (ctx.configValue() != null) {
+      val valueStr = ctx.configValue().getText
+      val keyCandidate = interval(ctx.SET().getSymbol, ctx.EQ().getSymbol).trim
+      keyCandidate match {
+        case configKeyDef(key) => SetCommand(Some(key -> Option(valueStr)))
+        case _ => throw new ParseException(s"'$keyCandidate' is an invalid property key, please " +
+          s"use quotes, e.g. SET `$keyCandidate`=`$valueStr`", ctx)
+      }
     } else {
-      SetCommand(Some(keyStr -> None))
+      val keyStr = ctx.configKey().getText
+      if (ctx.EQ() != null) {
+        remainder(ctx.EQ().getSymbol).trim match {
+          case configValueDef(valueStr) => SetCommand(Some(keyStr -> Option(valueStr)))
+          case other => throw new ParseException(s"'$other' is an invalid property value, please " +
+            s"use quotes, e.g. SET `$keyStr`=`$other`", ctx)
+        }
+      } else {
+        SetCommand(Some(keyStr -> None))
+      }
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
index 5e6808eeba0f6..5b4cd47742c00 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
@@ -70,9 +70,21 @@ class SparkSqlParserSuite extends AnalysisTest {
     StaticSQLConf
     ConfigEntry.knownConfigs.values.asScala.foreach { config =>
       assertEqual(s"SET ${config.key}", SetCommand(Some(config.key -> None)))
-      if (config.defaultValue.isDefined && config.defaultValueString != null) {
-        assertEqual(s"SET ${config.key}=${config.defaultValueString}",
-          SetCommand(Some(config.key -> Some(config.defaultValueString))))
+      assertEqual(s"SET `${config.key}`", SetCommand(Some(config.key -> None)))
+
+      val defaultValueStr = config.defaultValueString
+      if (config.defaultValue.isDefined && defaultValueStr != null) {
+        assertEqual(s"SET ${config.key}=`$defaultValueStr`",
+          SetCommand(Some(config.key -> Some(defaultValueStr))))
+        assertEqual(s"SET `${config.key}`=`$defaultValueStr`",
+          SetCommand(Some(config.key -> Some(defaultValueStr))))
+
+        if (!defaultValueStr.contains(";")) {
+          assertEqual(s"SET ${config.key}=$defaultValueStr",
+            SetCommand(Some(config.key -> Some(defaultValueStr))))
+          assertEqual(s"SET `${config.key}`=$defaultValueStr",
+            SetCommand(Some(config.key -> Some(defaultValueStr))))
+        }
       }
       assertEqual(s"RESET ${config.key}", ResetCommand(Some(config.key)))
     }
@@ -101,10 +113,11 @@ class SparkSqlParserSuite extends AnalysisTest {
       SetCommand(Some("spark.sql.    key" -> Some("v  a lu e"))))
     assertEqual("SET `spark.sql.    key`=  -1",
       SetCommand(Some("spark.sql.    key" -> Some("-1"))))
+    assertEqual("SET key=", SetCommand(Some("key" -> Some(""))))
 
     val expectedErrMsg = "Expected format is 'SET', 'SET key', or " +
-      "'SET key=value'. If you want to include special characters in key, " +
-      "please use quotes, e.g., SET `ke y`=value."
+      "'SET key=value'. If you want to include special characters in key, or include semicolon " +
+      "in value, please use quotes, e.g., SET `ke y`=`v;alue`."
     intercept("SET spark.sql.key value", expectedErrMsg)
     intercept("SET spark.sql.key   'value'", expectedErrMsg)
     intercept("SET    spark.sql.key \"value\" ", expectedErrMsg)
@@ -115,6 +128,8 @@ class SparkSqlParserSuite extends AnalysisTest {
     intercept("SET spark.sql.   key=value", expectedErrMsg)
     intercept("SET spark.sql   :key=value", expectedErrMsg)
     intercept("SET spark.sql .  key=value", expectedErrMsg)
+    intercept("SET =", expectedErrMsg)
+    intercept("SET =value", expectedErrMsg)
   }
 
   test("Report Error for invalid usage of RESET command") {
@@ -141,6 +156,33 @@ class SparkSqlParserSuite extends AnalysisTest {
     intercept("RESET spark.sql :  key", expectedErrMsg)
   }
 
+  test("SPARK-33419: Semicolon handling in SET command") {
+    assertEqual("SET a=1;", SetCommand(Some("a" -> Some("1"))))
+    assertEqual("SET a=1;;", SetCommand(Some("a" -> Some("1"))))
+
+    assertEqual("SET a=`1`;", SetCommand(Some("a" -> Some("1"))))
+    assertEqual("SET a=`1;`", SetCommand(Some("a" -> Some("1;"))))
+    assertEqual("SET a=`1;`;", SetCommand(Some("a" -> Some("1;"))))
+
+    assertEqual("SET `a`=1;;", SetCommand(Some("a" -> Some("1"))))
+    assertEqual("SET `a`=`1;`", SetCommand(Some("a" -> Some("1;"))))
+    assertEqual("SET `a`=`1;`;", SetCommand(Some("a" -> Some("1;"))))
+
+    val expectedErrMsg = "Expected format is 'SET', 'SET key', or " +
+      "'SET key=value'. If you want to include special characters in key, or include semicolon " +
+      "in value, please use quotes, e.g., SET `ke y`=`v;alue`."
+
+    intercept("SET a=1; SELECT 1", expectedErrMsg)
+    intercept("SET a=1;2;;", expectedErrMsg)
+
+    intercept("SET a b=`1;;`",
+      "'a b' is an invalid property key, please use quotes, e.g. SET `a b`=`1;;`")
+
+    intercept("SET `a`=1;2;;",
+      "'1;2;;' is an invalid property value, please use quotes, e.g." +
+        " SET `a`=`1;2;;`")
+  }
+
   test("refresh resource") {
     assertEqual("REFRESH prefix_path", RefreshResource("prefix_path"))
     assertEqual("REFRESH /", RefreshResource("/"))

From f80fe213bd4c5e065d5723816c42302a532be75c Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Fri, 13 Nov 2020 16:51:06 +0800
Subject: [PATCH 0466/1009] [SPARK-33166][DOC] Provide Search Function in Spark
 docs site

### What changes were proposed in this pull request?
In the last few releases, our Spark documentation  https://spark.apache.org/docs/latest/ becomes richer. It would nice to provide a search function to make our users find contents faster.

[DocSearch](https://docsearch.algolia.com/) is entirely free and automated.  This PR will use it to provides search function.

The screenshots show below:
![overview](https://user-images.githubusercontent.com/8486025/98756802-30d82a80-23c3-11eb-9ca2-73bb20fb54c4.png)

### Why are the changes needed?
Let the users of Spark documentation could find the needed information effectively.

### Does this PR introduce _any_ user-facing change?
Yes

### How was this patch tested?
build on my machine and look on brower.

Closes #30292 from beliefer/SPARK-33166.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: beliefer <beliefer@163.com>
Signed-off-by: Gengliang Wang <gengliang.wang@databricks.com>
---
 docs/_layouts/global.html | 23 +++++++++++++++++++++++
 docs/css/docsearch.css    | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+)
 create mode 100644 docs/css/docsearch.css

diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
index 5f6cd7c6b7f20..65af17ed2e4a1 100755
--- a/docs/_layouts/global.html
+++ b/docs/_layouts/global.html
@@ -30,6 +30,8 @@
         <script src="js/vendor/modernizr-2.6.1-respond-1.1.0.min.js"></script>
 
         <link rel="stylesheet" href="css/pygments-default.css">
+        <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/docsearch.js@2/dist/cdn/docsearch.min.css" />
+        <link rel="stylesheet" href="css/docsearch.css">
 
         {% production %}
         <!-- Google analytics script -->
@@ -125,6 +127,10 @@
                                 <a class="dropdown-item" href="https://spark.apache.org/third-party-projects.html">Third Party Projects</a>
                             </div>
                         </li>
+
+                        <li class="nav-item">
+                            <input type="text" id="docsearch-input" placeholder="Search the docs…">
+                        </li>
                     </ul>
                     <!--<span class="navbar-text navbar-right"><span class="version-text">v{{site.SPARK_VERSION_SHORT}}</span></span>-->
                 </div>
@@ -172,6 +178,23 @@ <h1 class="title">{{ page.title }}</h1>
         <script src="js/vendor/bootstrap.bundle.min.js"></script>
         <script src="js/vendor/anchor.min.js"></script>
         <script src="js/main.js"></script>
+        <script type="text/javascript" src="https://cdn.jsdelivr.net/npm/docsearch.js@2/dist/cdn/docsearch.min.js"></script>
+        <script type="text/javascript">
+            // DocSearch is entirely free and automated. DocSearch is built in two parts:
+            // 1. a crawler which we run on our own infrastructure every 24 hours. It follows every link
+            //    in your website and extract content from every page it traverses. It then pushes this
+            //    content to an Algolia index.
+            // 2. a JavaScript snippet to be inserted in your website that will bind this Algolia index
+            //    to your search input and display its results in a dropdown UI. If you want to find more
+            //    details on how works DocSearch, check the docs of DocSearch.
+            docsearch({
+                apiKey: 'b18ca3732c502995563043aa17bc6ecb',
+                indexName: 'apache_spark',
+                inputSelector: '#docsearch-input',
+                enhancedSearchInput: true,
+                debug: false // Set debug to true if you want to inspect the dropdown
+        });
+        </script>
 
         <!-- MathJax Section -->
         <script type="text/x-mathjax-config">
diff --git a/docs/css/docsearch.css b/docs/css/docsearch.css
new file mode 100644
index 0000000000000..54059c0c2e9da
--- /dev/null
+++ b/docs/css/docsearch.css
@@ -0,0 +1,36 @@
+/* Main dropdown wrapper */
+.algolia-autocomplete .ds-dropdown-menu {
+    width: 500px;
+}
+
+/* Main category (eg. Getting Started) */
+.algolia-autocomplete .algolia-docsearch-suggestion--category-header {
+    color: darkgray;
+    border: 1px underline gray;
+}
+
+/* Category (eg. Downloads) */
+.algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column {
+    color: gray;
+}
+
+/* Title (eg. Bootstrap CDN) */
+.algolia-autocomplete .algolia-docsearch-suggestion--title {
+    font-weight: bold;
+    color: black;
+}
+
+/* Description description (eg. Bootstrap currently works...) */
+.algolia-autocomplete .algolia-docsearch-suggestion--text {
+    font-size: 0.8rem;
+    color: gray;
+}
+
+/* Highlighted text */
+.algolia-autocomplete .algolia-docsearch-suggestion--highlight {
+    color: blue;
+}
+
+.searchbox {
+    margin-top: 2%;
+}

From 234711a328dd4cd888d6a73145984987eabc483b Mon Sep 17 00:00:00 2001
From: "xuewei.linxuewei" <xuewei.linxuewei@alibaba-inc.com>
Date: Fri, 13 Nov 2020 13:35:45 +0000
Subject: [PATCH 0467/1009] Revert "[SPARK-33139][SQL] protect setActionSession
 and clearActiveSession"

### What changes were proposed in this pull request?

In [SPARK-33139] we defined `setActionSession` and `clearActiveSession` as deprecated API, it turns out it is widely used, and after discussion, even if without this PR, it should work with unify view feature, it might only be a risk if user really abuse using these two API. So revert the PR is needed.

[SPARK-33139] has two commit, include a follow up. Revert them both.

### Why are the changes needed?

Revert.

### Does this PR introduce any user-facing change?

No.

### How was this patch tested?

Existing UT.

Closes #30367 from leanken/leanken-revert-SPARK-33139.

Authored-by: xuewei.linxuewei <xuewei.linxuewei@alibaba-inc.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide.md                   |  2 -
 .../kafka010/KafkaMicroBatchSourceSuite.scala |  2 +-
 .../org/apache/spark/SharedSparkSession.java  |  3 +-
 .../mllib/util/MLlibTestSparkContext.scala    |  2 +-
 python/pyspark/sql/session.py                 |  8 ++--
 .../apache/spark/sql/internal/SQLConf.scala   |  3 --
 .../spark/sql/internal/StaticSQLConf.scala    |  9 -----
 .../org/apache/spark/sql/SparkSession.scala   | 26 ++-----------
 .../spark/sql/execution/SQLExecution.scala    |  6 +--
 .../spark/sql/execution/SparkPlan.scala       |  2 +-
 .../execution/streaming/StreamExecution.scala |  2 +-
 .../apache/spark/sql/DeprecatedAPISuite.scala |  6 +--
 .../apache/spark/sql/LocalSparkSession.scala  |  4 +-
 .../apache/spark/sql/SQLContextSuite.scala    |  2 +-
 .../org/apache/spark/sql/SQLQuerySuite.scala  |  2 +-
 .../apache/spark/sql/SessionStateSuite.scala  |  2 +-
 .../spark/sql/SparkSessionBuilderSuite.scala  | 37 +++++--------------
 .../sql/SparkSessionExtensionSuite.scala      |  2 +-
 .../sql/connector/V1WriteFallbackSuite.scala  |  4 +-
 .../CoalesceShufflePartitionsSuite.scala      |  4 +-
 .../adaptive/AdaptiveQueryExecSuite.scala     |  4 +-
 .../state/StateStoreCoordinatorSuite.scala    |  2 +-
 .../streaming/state/StateStoreSuite.scala     |  2 +-
 .../SymmetricHashJoinStateManagerSuite.scala  |  2 +-
 .../sql/streaming/StreamingJoinSuite.scala    |  4 +-
 .../apache/spark/sql/test/SQLTestUtils.scala  |  2 +-
 .../spark/sql/test/SharedSparkSession.scala   |  2 +-
 .../spark/sql/test/TestSQLContext.scala       |  2 +-
 .../hive/thriftserver/SparkOperation.scala    |  6 +--
 .../spark/sql/hive/HiveSharedStateSuite.scala |  2 +-
 .../apache/spark/sql/hive/test/TestHive.scala |  2 +-
 31 files changed, 52 insertions(+), 106 deletions(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 55618308c300a..6942ef7201703 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -24,8 +24,6 @@ license: |
 
 ## Upgrading from Spark SQL 3.0 to 3.1
 
-  - In Spark 3.1, `SparkSession.setActiveSession` and `SparkSession.clearActiveSession` are deprecated and unsupported, it will throw `UnsupportedOperationException` if called. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.allowModifyActiveSession` to true if you really need to use these APIs.
-
   - In Spark 3.1, statistical aggregation function includes `std`, `stddev`, `stddev_samp`, `variance`, `var_samp`, `skewness`, `kurtosis`, `covar_samp`, `corr` will return `NULL` instead of `Double.NaN` when `DivideByZero` occurs during expression evaluation, for example, when `stddev_samp` applied on a single element set. In Spark version 3.0 and earlier, it will return `Double.NaN` in such case. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.statisticalAggregate` to `true`.
 
   - In Spark 3.1, grouping_id() returns long values. In Spark version 3.0 and earlier, this function returns int values. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.integerGroupingId` to `true`.
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
index 510c0c5bd28a5..fe783ffe53a3b 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
@@ -1189,7 +1189,7 @@ class KafkaMicroBatchV2SourceSuite extends KafkaMicroBatchSourceSuiteBase {
         numPartitionsGenerated: Int,
         reusesConsumers: Boolean): Unit = {
 
-      SparkSession.setActiveSessionInternal(spark)
+      SparkSession.setActiveSession(spark)
       withTempDir { dir =>
         val provider = new KafkaSourceProvider()
         val options = Map(
diff --git a/mllib/src/test/java/org/apache/spark/SharedSparkSession.java b/mllib/src/test/java/org/apache/spark/SharedSparkSession.java
index 49bd0a43a16d6..35a250955b282 100644
--- a/mllib/src/test/java/org/apache/spark/SharedSparkSession.java
+++ b/mllib/src/test/java/org/apache/spark/SharedSparkSession.java
@@ -20,7 +20,6 @@
 import java.io.IOException;
 import java.io.Serializable;
 
-import org.apache.spark.sql.SparkSession$;
 import org.junit.After;
 import org.junit.Before;
 
@@ -48,7 +47,7 @@ public void tearDown() {
       spark = null;
     } finally {
       SparkSession.clearDefaultSession();
-      SparkSession$.MODULE$.clearActiveSessionInternal();
+      SparkSession.clearActiveSession();
     }
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
index 840ca6f8af0b1..5eb128abacdb9 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
@@ -48,7 +48,7 @@ trait MLlibTestSparkContext extends TempDirectory { self: Suite =>
   override def afterAll(): Unit = {
     try {
       Utils.deleteRecursively(new File(checkpointDir))
-      SparkSession.clearActiveSessionInternal()
+      SparkSession.clearActiveSession()
       if (spark != null) {
         spark.stop()
       }
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index 2857e2e5865ae..5d0aad39896b0 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -264,8 +264,7 @@ def __init__(self, sparkContext, jsparkSession=None):
             SparkSession._instantiatedSession = self
             SparkSession._activeSession = self
             self._jvm.SparkSession.setDefaultSession(self._jsparkSession)
-            getattr(getattr(self._jvm, "SparkSession$"), "MODULE$")\
-                .setActiveSessionInternal(self._jsparkSession)
+            self._jvm.SparkSession.setActiveSession(self._jsparkSession)
 
     def _repr_html_(self):
         return """
@@ -650,8 +649,7 @@ def createDataFrame(self, data, schema=None, samplingRatio=None, verifySchema=Tr
         Py4JJavaError: ...
         """
         SparkSession._activeSession = self
-        getattr(getattr(self._jvm, "SparkSession$"), "MODULE$")\
-            .setActiveSessionInternal(self._jsparkSession)
+        self._jvm.SparkSession.setActiveSession(self._jsparkSession)
         if isinstance(data, DataFrame):
             raise TypeError("data is already a DataFrame")
 
@@ -796,7 +794,7 @@ def stop(self):
         self._sc.stop()
         # We should clean the default session up. See SPARK-23228.
         self._jvm.SparkSession.clearDefaultSession()
-        getattr(getattr(self._jvm, "SparkSession$"), "MODULE$").clearActiveSessionInternal()
+        self._jvm.SparkSession.clearActiveSession()
         SparkSession._instantiatedSession = None
         SparkSession._activeSession = None
         SQLContext._instantiatedContext = None
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 546b199950a21..f2e309013a5b6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -3509,9 +3509,6 @@ class SQLConf extends Serializable with Logging {
 
   def integerGroupingIdEnabled: Boolean = getConf(SQLConf.LEGACY_INTEGER_GROUPING_ID)
 
-  def legacyAllowModifyActiveSession: Boolean =
-    getConf(StaticSQLConf.LEGACY_ALLOW_MODIFY_ACTIVE_SESSION)
-
   def legacyAllowCastNumericToTimestamp: Boolean =
     getConf(SQLConf.LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
index b9446465e1f79..ca1074fcf6fc0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
@@ -249,13 +249,4 @@ object StaticSQLConf {
     .version("3.1.0")
     .timeConf(TimeUnit.SECONDS)
     .createWithDefault(-1)
-
-  val LEGACY_ALLOW_MODIFY_ACTIVE_SESSION =
-    buildStaticConf("spark.sql.legacy.allowModifyActiveSession")
-      .internal()
-      .doc("When set to true, user is allowed to use setActiveSession or clearActiveSession " +
-        "to modify the current active SparkSession, otherwise an exception will be thrown.")
-      .version("3.1.0")
-      .booleanConf
-      .createWithDefault(false)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index d738d617f2315..db5ad52977c71 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -764,9 +764,9 @@ class SparkSession private(
     // set and not the default session. This to prevent that we promote the default session to the
     // active session once we are done.
     val old = SparkSession.activeThreadSession.get()
-    SparkSession.setActiveSessionInternal(this)
+    SparkSession.setActiveSession(this)
     try block finally {
-      SparkSession.setActiveSessionInternal(old)
+      SparkSession.setActiveSession(old)
     }
   }
 }
@@ -945,7 +945,7 @@ object SparkSession extends Logging {
 
         session = new SparkSession(sparkContext, None, None, extensions, options.toMap)
         setDefaultSession(session)
-        setActiveSessionInternal(session)
+        setActiveSession(session)
         registerContextListener(sparkContext)
       }
 
@@ -983,16 +983,7 @@ object SparkSession extends Logging {
    *
    * @since 2.0.0
    */
-  @deprecated("This method is deprecated and will be removed in future versions.", "3.1.0")
   def setActiveSession(session: SparkSession): Unit = {
-    if (SQLConf.get.legacyAllowModifyActiveSession) {
-      setActiveSessionInternal(session)
-    } else {
-      throw new UnsupportedOperationException("Not allowed to modify active Spark session.")
-    }
-  }
-
-  private[sql] def setActiveSessionInternal(session: SparkSession): Unit = {
     activeThreadSession.set(session)
   }
 
@@ -1002,16 +993,7 @@ object SparkSession extends Logging {
    *
    * @since 2.0.0
    */
-  @deprecated("This method is deprecated and will be removed in future versions.", "3.1.0")
   def clearActiveSession(): Unit = {
-    if (SQLConf.get.legacyAllowModifyActiveSession) {
-      clearActiveSessionInternal()
-    } else {
-      throw new UnsupportedOperationException("Not allowed to modify active Spark session.")
-    }
-  }
-
-  private[spark] def clearActiveSessionInternal(): Unit = {
     activeThreadSession.remove()
   }
 
@@ -1185,7 +1167,7 @@ object SparkSession extends Logging {
            |
          """.stripMargin)
       session.get.stop()
-      SparkSession.clearActiveSessionInternal()
+      SparkSession.clearActiveSession()
       SparkSession.clearDefaultSession()
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
index 1465e57743323..c62670b227bcc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
@@ -180,15 +180,15 @@ object SQLExecution {
     exec.submit(() => {
       val originalSession = SparkSession.getActiveSession
       val originalLocalProps = sc.getLocalProperties
-      SparkSession.setActiveSessionInternal(activeSession)
+      SparkSession.setActiveSession(activeSession)
       sc.setLocalProperties(localProps)
       val res = body
       // reset active session and local props.
       sc.setLocalProperties(originalLocalProps)
       if (originalSession.nonEmpty) {
-        SparkSession.setActiveSessionInternal(originalSession.get)
+        SparkSession.setActiveSession(originalSession.get)
       } else {
-        SparkSession.clearActiveSessionInternal()
+        SparkSession.clearActiveSession()
       }
       res
     })
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
index 42eb131b8e4ce..ead8c00031112 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -82,7 +82,7 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
   /** Overridden make copy also propagates sqlContext to copied plan. */
   override def makeCopy(newArgs: Array[AnyRef]): SparkPlan = {
     if (sqlContext != null) {
-      SparkSession.setActiveSessionInternal(sqlContext.sparkSession)
+      SparkSession.setActiveSession(sqlContext.sparkSession)
     }
     super.makeCopy(newArgs)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 09c0d2148307c..aba0463f56cd7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -315,7 +315,7 @@ abstract class StreamExecution(
       startLatch.countDown()
 
       // While active, repeatedly attempt to run batches.
-      SparkSession.setActiveSessionInternal(sparkSession)
+      SparkSession.setActiveSession(sparkSession)
 
       updateStatusMessage("Initializing sources")
       // force initialization of the logical plan so that the sources can be created
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DeprecatedAPISuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DeprecatedAPISuite.scala
index d27333ec727d0..25b8849d61248 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DeprecatedAPISuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DeprecatedAPISuite.scala
@@ -130,10 +130,10 @@ class DeprecatedAPISuite extends QueryTest with SharedSparkSession {
   test("SQLContext.setActive/clearActive") {
     val sc = spark.sparkContext
     val sqlContext = new SQLContext(sc)
-    intercept[UnsupportedOperationException](SQLContext.setActive(sqlContext))
-    assert(SparkSession.getActiveSession === Some(spark))
-    intercept[UnsupportedOperationException](SQLContext.clearActive())
+    SQLContext.setActive(sqlContext)
     assert(SparkSession.getActiveSession === Some(spark))
+    SQLContext.clearActive()
+    assert(SparkSession.getActiveSession === None)
   }
 
   test("SQLContext.applySchema") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/LocalSparkSession.scala b/sql/core/src/test/scala/org/apache/spark/sql/LocalSparkSession.scala
index 8fdf55aeae6d4..36db95ff8a31b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/LocalSparkSession.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/LocalSparkSession.scala
@@ -30,14 +30,14 @@ trait LocalSparkSession extends BeforeAndAfterEach with BeforeAndAfterAll { self
   override def beforeAll(): Unit = {
     super.beforeAll()
     InternalLoggerFactory.setDefaultFactory(Slf4JLoggerFactory.INSTANCE)
-    SparkSession.clearActiveSessionInternal()
+    SparkSession.clearActiveSession()
     SparkSession.clearDefaultSession()
   }
 
   override def afterEach(): Unit = {
     try {
       LocalSparkSession.stop(spark)
-      SparkSession.clearActiveSessionInternal()
+      SparkSession.clearActiveSession()
       SparkSession.clearDefaultSession()
       spark = null
     } finally {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala
index aec124de81049..a1799829932b8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLContextSuite.scala
@@ -43,7 +43,7 @@ class SQLContextSuite extends SparkFunSuite with SharedSparkContext {
     val newSession = sqlContext.newSession()
     assert(SQLContext.getOrCreate(sc).eq(sqlContext),
       "SQLContext.getOrCreate after explicitly created SQLContext did not return the context")
-    SparkSession.setActiveSessionInternal(newSession.sparkSession)
+    SparkSession.setActiveSession(newSession.sparkSession)
     assert(SQLContext.getOrCreate(sc).eq(newSession),
       "SQLContext.getOrCreate after explicitly setActive() did not return the active context")
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index cebbf9282f710..727482e551a8b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -3468,7 +3468,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     // problem before the fix.
     withSQLConf(SQLConf.CODEGEN_FALLBACK.key -> "true") {
       val cloned = spark.cloneSession()
-      SparkSession.setActiveSessionInternal(cloned)
+      SparkSession.setActiveSession(cloned)
       assert(SQLConf.get.getConf(SQLConf.CODEGEN_FALLBACK) === true)
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
index 2f766a270ad73..003f5bc835d5f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
@@ -48,7 +48,7 @@ class SessionStateSuite extends SparkFunSuite {
       if (activeSession != null) {
         activeSession.stop()
         activeSession = null
-        SparkSession.clearActiveSessionInternal()
+        SparkSession.clearActiveSession()
         SparkSession.clearDefaultSession()
       }
     } finally {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
index 23695af0f59c1..1fbce512f976d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
@@ -22,7 +22,7 @@ import org.scalatest.BeforeAndAfterEach
 import org.apache.spark.{SparkConf, SparkContext, SparkException, SparkFunSuite}
 import org.apache.spark.internal.config.EXECUTOR_ALLOW_SPARK_CONTEXT
 import org.apache.spark.internal.config.UI.UI_ENABLED
-import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf._
 
 /**
@@ -33,7 +33,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach {
   override def afterEach(): Unit = {
     // This suite should not interfere with the other test suites.
     SparkSession.getActiveSession.foreach(_.stop())
-    SparkSession.clearActiveSessionInternal()
+    SparkSession.clearActiveSession()
     SparkSession.getDefaultSession.foreach(_.stop())
     SparkSession.clearDefaultSession()
   }
@@ -64,7 +64,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach {
   test("get active or default session") {
     val session = SparkSession.builder().master("local").getOrCreate()
     assert(SparkSession.active == session)
-    SparkSession.clearActiveSessionInternal()
+    SparkSession.clearActiveSession()
     assert(SparkSession.active == session)
     SparkSession.clearDefaultSession()
     intercept[IllegalStateException](SparkSession.active)
@@ -82,7 +82,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach {
   test("use session from active thread session and propagate config options") {
     val defaultSession = SparkSession.builder().master("local").getOrCreate()
     val activeSession = defaultSession.newSession()
-    SparkSession.setActiveSessionInternal(activeSession)
+    SparkSession.setActiveSession(activeSession)
     val session = SparkSession.builder().config("spark-config2", "a").getOrCreate()
 
     assert(activeSession != defaultSession)
@@ -90,7 +90,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach {
     assert(session.conf.get("spark-config2") == "a")
     assert(session.sessionState.conf == SQLConf.get)
     assert(SQLConf.get.getConfString("spark-config2") == "a")
-    SparkSession.clearActiveSessionInternal()
+    SparkSession.clearActiveSession()
 
     assert(SparkSession.builder().getOrCreate() == defaultSession)
   }
@@ -105,7 +105,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach {
 
   test("create a new session if the active thread session has been stopped") {
     val activeSession = SparkSession.builder().master("local").getOrCreate()
-    SparkSession.setActiveSessionInternal(activeSession)
+    SparkSession.setActiveSession(activeSession)
     activeSession.stop()
     val newSession = SparkSession.builder().master("local").getOrCreate()
     assert(newSession != activeSession)
@@ -181,7 +181,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach {
       .master("local")
       .getOrCreate()
     val postFirstCreation = context.listenerBus.listeners.size()
-    SparkSession.clearActiveSessionInternal()
+    SparkSession.clearActiveSession()
     SparkSession.clearDefaultSession()
 
     SparkSession
@@ -190,7 +190,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach {
       .master("local")
       .getOrCreate()
     val postSecondCreation = context.listenerBus.listeners.size()
-    SparkSession.clearActiveSessionInternal()
+    SparkSession.clearActiveSession()
     SparkSession.clearDefaultSession()
     assert(postFirstCreation == postSecondCreation)
   }
@@ -211,7 +211,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach {
     assert(session1.conf.get(GLOBAL_TEMP_DATABASE) === "globaltempdb-spark-31532")
 
     // do not propagate static sql configs to the existing default session
-    SparkSession.clearActiveSessionInternal()
+    SparkSession.clearActiveSession()
     val session2 = SparkSession
       .builder()
       .config(WAREHOUSE_PATH.key, "SPARK-31532-db")
@@ -282,25 +282,6 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach {
     }
   }
 
-  test("SPARK-33139: Test SparkSession.setActiveSession/clearActiveSession") {
-    Seq(true, false).foreach { allowModifyActiveSession =>
-      val session = SparkSession.builder()
-        .master("local")
-        .config(StaticSQLConf.LEGACY_ALLOW_MODIFY_ACTIVE_SESSION.key, allowModifyActiveSession)
-        .getOrCreate()
-
-      val newSession = session.newSession()
-      if (!allowModifyActiveSession) {
-        intercept[UnsupportedOperationException](SparkSession.setActiveSession(newSession))
-        intercept[UnsupportedOperationException](SparkSession.clearActiveSession())
-      } else {
-        SparkSession.setActiveSession(newSession)
-        SparkSession.clearActiveSession()
-      }
-      session.stop()
-    }
-  }
-
   test("SPARK-32991: Use conf in shared state as the original configuration for RESET") {
     val wh = "spark.sql.warehouse.dir"
     val td = "spark.sql.globalTempDatabase"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
index cc88f9ad3da40..951b72a863483 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
@@ -51,7 +51,7 @@ class SparkSessionExtensionSuite extends SparkFunSuite {
 
   private def stop(spark: SparkSession): Unit = {
     spark.stop()
-    SparkSession.clearActiveSessionInternal()
+    SparkSession.clearActiveSession()
     SparkSession.clearDefaultSession()
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
index 0a86a41e86255..4b52a4cbf4116 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
@@ -130,7 +130,7 @@ class V1WriteFallbackSuite extends QueryTest with SharedSparkSession with Before
   }
 
   test("fallback writes should only analyze plan once") {
-    SparkSession.clearActiveSessionInternal()
+    SparkSession.clearActiveSession()
     SparkSession.clearDefaultSession()
     try {
       val session = SparkSession.builder()
@@ -141,7 +141,7 @@ class V1WriteFallbackSuite extends QueryTest with SharedSparkSession with Before
       val df = session.createDataFrame(Seq((1, "x"), (2, "y"), (3, "z")))
       df.write.mode("append").option("name", "t1").format(v2Format).saveAsTable("test")
     } finally {
-      SparkSession.setActiveSessionInternal(spark)
+      SparkSession.setActiveSession(spark)
       SparkSession.setDefaultSession(spark)
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala
index fd55ad69ed386..22c5b651f7e12 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala
@@ -38,14 +38,14 @@ class CoalesceShufflePartitionsSuite extends SparkFunSuite with BeforeAndAfterAl
     originalActiveSparkSession = SparkSession.getActiveSession
     originalInstantiatedSparkSession = SparkSession.getDefaultSession
 
-    SparkSession.clearActiveSessionInternal()
+    SparkSession.clearActiveSession()
     SparkSession.clearDefaultSession()
   }
 
   override protected def afterAll(): Unit = {
     try {
       // Set these states back.
-      originalActiveSparkSession.foreach(ctx => SparkSession.setActiveSessionInternal(ctx))
+      originalActiveSparkSession.foreach(ctx => SparkSession.setActiveSession(ctx))
       originalInstantiatedSparkSession.foreach(ctx => SparkSession.setDefaultSession(ctx))
     } finally {
       super.afterAll()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index fc95ab53dade8..38a323b1c057e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -999,9 +999,9 @@ class AdaptiveQueryExecSuite
     withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
       val df = spark.range(10).select(sum('id))
       assert(df.queryExecution.executedPlan.isInstanceOf[AdaptiveSparkPlanExec])
-      SparkSession.setActiveSessionInternal(null)
+      SparkSession.setActiveSession(null)
       checkAnswer(df, Seq(Row(45)))
-      SparkSession.setActiveSessionInternal(spark) // recover the active session.
+      SparkSession.setActiveSession(spark) // recover the active session.
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala
index 20e488960fa59..7bca225dfdd8f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala
@@ -121,7 +121,7 @@ class StateStoreCoordinatorSuite extends SparkFunSuite with SharedSparkContext {
     var coordRef: StateStoreCoordinatorRef = null
     try {
       val spark = SparkSession.builder().sparkContext(sc).getOrCreate()
-      SparkSession.setActiveSessionInternal(spark)
+      SparkSession.setActiveSession(spark)
       import spark.implicits._
       coordRef = spark.streams.stateStoreCoordinator
       implicit val sqlContext = spark.sqlContext
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
index c461bbb7e38eb..0c2083ab98ade 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
@@ -567,7 +567,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
     try {
       val checkpointLocation = Utils.createTempDir().getAbsoluteFile
       val spark = SparkSession.builder().master("local[2]").getOrCreate()
-      SparkSession.setActiveSessionInternal(spark)
+      SparkSession.setActiveSession(spark)
       implicit val sqlContext = spark.sqlContext
       spark.conf.set(SQLConf.SHUFFLE_PARTITIONS.key, "1")
       import spark.implicits._
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala
index 5df47e1d5faa0..ce1eabeb932fb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.types._
 class SymmetricHashJoinStateManagerSuite extends StreamTest with BeforeAndAfter {
 
   before {
-    SparkSession.setActiveSessionInternal(spark) // set this before force initializing 'joinExec'
+    SparkSession.setActiveSession(spark) // set this before force initializing 'joinExec'
     spark.streams.stateStoreCoordinator // initialize the lazy coordinator
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
index 91d1f5de3f211..b2bb00b704a69 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
@@ -47,8 +47,8 @@ abstract class StreamingJoinSuite
   import testImplicits._
 
   before {
-    SparkSession.setActiveSessionInternal(spark) // set this before force initializing 'joinExec'
-    spark.streams.stateStoreCoordinator // initialize the lazy coordinator
+    SparkSession.setActiveSession(spark)  // set this before force initializing 'joinExec'
+    spark.streams.stateStoreCoordinator   // initialize the lazy coordinator
   }
 
   after {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
index d15dc8c6bccd5..7be15e9d87004 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -242,7 +242,7 @@ private[sql] trait SQLTestUtilsBase
   }
 
   protected override def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = {
-    SparkSession.setActiveSessionInternal(spark)
+    SparkSession.setActiveSession(spark)
     super.withSQLConf(pairs: _*)(f)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
index a38b360b79c05..cfc92a780308d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
@@ -144,7 +144,7 @@ trait SharedSparkSessionBase
           }
         }
       } finally {
-        SparkSession.clearActiveSessionInternal()
+        SparkSession.clearActiveSession()
         SparkSession.clearDefaultSession()
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
index 36488bec7bb53..380723029b8a8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
@@ -35,7 +35,7 @@ private[spark] class TestSparkSession(sc: SparkContext) extends SparkSession(sc)
   }
 
   SparkSession.setDefaultSession(this)
-  SparkSession.setActiveSessionInternal(this)
+  SparkSession.setActiveSession(this)
 
   @transient
   override lazy val sessionState: SessionState = {
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkOperation.scala
index be9c024f9ca64..bbfc1b83379aa 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkOperation.scala
@@ -65,7 +65,7 @@ private[hive] trait SparkOperation extends Operation with Logging {
 
     try {
       // Set active SparkSession
-      SparkSession.setActiveSessionInternal(sqlContext.sparkSession)
+      SparkSession.setActiveSession(sqlContext.sparkSession)
 
       // Set scheduler pool
       sqlContext.sparkSession.conf.getOption(SQLConf.THRIFTSERVER_POOL.key) match {
@@ -81,8 +81,8 @@ private[hive] trait SparkOperation extends Operation with Logging {
       sqlContext.sparkContext.setLocalProperties(originalProps)
 
       originalSession match {
-        case Some(session) => SparkSession.setActiveSessionInternal(session)
-        case None => SparkSession.clearActiveSessionInternal()
+        case Some(session) => SparkSession.setActiveSession(session)
+        case None => SparkSession.clearActiveSession()
       }
     }
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSharedStateSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSharedStateSuite.scala
index d2d4546ea18ea..4570e72db0641 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSharedStateSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSharedStateSuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.util.Utils
 class HiveSharedStateSuite extends SparkFunSuite {
 
   override def beforeEach(): Unit = {
-    SparkSession.clearActiveSessionInternal()
+    SparkSession.clearActiveSession()
     SparkSession.clearDefaultSession()
     super.beforeEach()
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 10cb200550499..5669cb757a678 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -195,7 +195,7 @@ private[hive] class TestHiveSparkSession(
   }
 
   SparkSession.setDefaultSession(this)
-  SparkSession.setActiveSessionInternal(this)
+  SparkSession.setActiveSession(this)
 
   { // set the metastore temporary configuration
     val metastoreTempConf = HiveUtils.newTemporaryConfiguration(useInMemoryDerby = false) ++ Map(

From acfd8467534fbf58c12e9f2d993b7d135fb8d32b Mon Sep 17 00:00:00 2001
From: Thomas Graves <tgraves@apache.org>
Date: Fri, 13 Nov 2020 16:04:13 -0600
Subject: [PATCH 0468/1009] [SPARK-33288][SPARK-32661][K8S] Stage level
 scheduling support for Kubernetes

### What changes were proposed in this pull request?

This adds support for Stage level scheduling to kubernetes. Kubernetes can support dynamic allocation via the shuffle tracking option which means we can support stage level scheduling by getting new executors.
The main changes here are having the k8s cluster manager pass the resource profile id into the executors and then the ExecutorsPodsAllocator has to request executors based on the individual resource profiles.  I tried to keep code changes here to a minimum. I specifically choose to leave the ExecutorPodsSnapshot the way it was and construct the resource profile to pod states on the fly, with a fast path when not using other resource profiles, to keep the impact to a minimum.  This results in the main changes required are just wrapping the allocation logic in a for loop over each profile.  The other main change is in the basic feature step we have to look at the resources in the ResourceProfile to request pods with the correct resources.  Much of the other logic like in the executor life cycle manager doesn't need to be resource profile.

This also adds support for [SPARK-32661]Spark executors on K8S should request extra memory for off-heap allocations because the stage level scheduling api has support for this and it made sense to make consistent with YARN.  This was started with PR https://github.com/apache/spark/pull/29477 but never updated so I just did it here.   To do this I moved a few functions around that were now used by both YARN and kubernetes so you will see some changes in Utils.

### Why are the changes needed?

Add the feature to Kubernetes based on customer feedback.

### Does this PR introduce _any_ user-facing change?

Yes the feature now works with K8s, but not underlying API changes.

### How was this patch tested?

Tested manually on kubernetes cluster and with unit tests.

Closes #30204 from tgravescs/stagek8sOrigSnapshotsRebase.

Lead-authored-by: Thomas Graves <tgraves@apache.org>
Co-authored-by: Thomas Graves <tgraves@nvidia.com>
Signed-off-by: Thomas Graves <tgraves@apache.org>
---
 .../spark/resource/ResourceProfile.scala      | 133 +++++++-
 .../resource/ResourceProfileManager.scala     |  21 +-
 .../scala/org/apache/spark/util/Utils.scala   |  21 ++
 .../ResourceProfileManagerSuite.scala         |  27 +-
 .../spark/resource/ResourceProfileSuite.scala |   8 +-
 .../org/apache/spark/util/UtilsSuite.scala    |  27 ++
 docs/configuration.md                         |   2 +-
 docs/running-on-kubernetes.md                 |   4 +
 docs/running-on-yarn.md                       |   1 +
 .../apache/spark/deploy/k8s/Constants.scala   |   3 +-
 .../spark/deploy/k8s/KubernetesConf.scala     |  12 +-
 .../k8s/features/BasicDriverFeatureStep.scala |   4 +-
 .../features/BasicExecutorFeatureStep.scala   |  89 +++---
 .../cluster/k8s/ExecutorPodsAllocator.scala   | 285 ++++++++++--------
 .../KubernetesClusterSchedulerBackend.scala   |   5 +-
 .../k8s/KubernetesExecutorBuilder.scala       |   8 +-
 .../deploy/k8s/KubernetesConfSuite.scala      |  15 +-
 .../BasicDriverFeatureStepSuite.scala         |   5 +-
 .../BasicExecutorFeatureStepSuite.scala       | 106 +++++--
 .../k8s/ExecutorLifecycleTestUtils.scala      |  40 +--
 .../k8s/ExecutorPodsAllocatorSuite.scala      | 109 ++++++-
 ...bernetesClusterSchedulerBackendSuite.scala |   5 +-
 .../k8s/KubernetesExecutorBuilderSuite.scala  |   4 +-
 .../src/main/dockerfiles/spark/entrypoint.sh  |   1 +
 .../org/apache/spark/deploy/yarn/Client.scala |   9 +-
 .../deploy/yarn/ResourceRequestHelper.scala   |   4 +-
 .../spark/deploy/yarn/YarnAllocator.scala     | 136 ++++-----
 .../deploy/yarn/YarnSparkHadoopUtil.scala     |  24 +-
 .../deploy/yarn/YarnAllocatorSuite.scala      |  20 +-
 .../yarn/YarnSparkHadoopUtilSuite.scala       |  27 --
 30 files changed, 772 insertions(+), 383 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceProfile.scala b/core/src/main/scala/org/apache/spark/resource/ResourceProfile.scala
index 8a37670c31b9a..ac7e8e89fa4bf 100644
--- a/core/src/main/scala/org/apache/spark/resource/ResourceProfile.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceProfile.scala
@@ -29,6 +29,7 @@ import org.apache.spark.annotation.{Evolving, Since}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Python.PYSPARK_EXECUTOR_MEMORY
+import org.apache.spark.util.Utils
 
 /**
  * Resource profile to associate with an RDD. A ResourceProfile allows the user to
@@ -256,6 +257,8 @@ object ResourceProfile extends Logging {
   val UNKNOWN_RESOURCE_PROFILE_ID = -1
   val DEFAULT_RESOURCE_PROFILE_ID = 0
 
+  private[spark] val MEMORY_OVERHEAD_MIN_MIB = 384L
+
   private lazy val nextProfileId = new AtomicInteger(0)
   private val DEFAULT_PROFILE_LOCK = new Object()
 
@@ -263,6 +266,7 @@ object ResourceProfile extends Logging {
   // var so that it can be reset for testing purposes.
   @GuardedBy("DEFAULT_PROFILE_LOCK")
   private var defaultProfile: Option[ResourceProfile] = None
+  private var defaultProfileExecutorResources: Option[DefaultProfileExecutorResources] = None
 
   private[spark] def getNextProfileId: Int = nextProfileId.getAndIncrement()
 
@@ -284,6 +288,14 @@ object ResourceProfile extends Logging {
     }
   }
 
+  private[spark] def getDefaultProfileExecutorResources(
+      conf: SparkConf): DefaultProfileExecutorResources = {
+    defaultProfileExecutorResources.getOrElse {
+      getOrCreateDefaultProfile(conf)
+      defaultProfileExecutorResources.get
+    }
+  }
+
   private def getDefaultTaskResources(conf: SparkConf): Map[String, TaskResourceRequest] = {
     val cpusPerTask = conf.get(CPUS_PER_TASK)
     val treqs = new TaskResourceRequests().cpus(cpusPerTask)
@@ -293,20 +305,26 @@ object ResourceProfile extends Logging {
 
   private def getDefaultExecutorResources(conf: SparkConf): Map[String, ExecutorResourceRequest] = {
     val ereqs = new ExecutorResourceRequests()
-    ereqs.cores(conf.get(EXECUTOR_CORES))
-    ereqs.memory(conf.get(EXECUTOR_MEMORY).toString)
-    conf.get(EXECUTOR_MEMORY_OVERHEAD).map(mem => ereqs.memoryOverhead(mem.toString))
-    conf.get(PYSPARK_EXECUTOR_MEMORY).map(mem => ereqs.pysparkMemory(mem.toString))
-    if (conf.get(MEMORY_OFFHEAP_ENABLED)) {
-      // Explicitly add suffix b as default unit of offHeapMemory is Mib
-      ereqs.offHeapMemory(conf.get(MEMORY_OFFHEAP_SIZE).toString + "b")
-    }
+    val cores = conf.get(EXECUTOR_CORES)
+    ereqs.cores(cores)
+    val memory = conf.get(EXECUTOR_MEMORY)
+    ereqs.memory(memory.toString)
+    val overheadMem = conf.get(EXECUTOR_MEMORY_OVERHEAD)
+    overheadMem.map(mem => ereqs.memoryOverhead(mem.toString))
+    val pysparkMem = conf.get(PYSPARK_EXECUTOR_MEMORY)
+    pysparkMem.map(mem => ereqs.pysparkMemory(mem.toString))
+    val offheapMem = Utils.executorOffHeapMemorySizeAsMb(conf)
+    ereqs.offHeapMemory(offheapMem.toString)
     val execReq = ResourceUtils.parseAllResourceRequests(conf, SPARK_EXECUTOR_PREFIX)
     execReq.foreach { req =>
-      val name = req.id.resourceName
-      ereqs.resource(name, req.amount, req.discoveryScript.orElse(""),
+      ereqs.resource(req.id.resourceName, req.amount, req.discoveryScript.orElse(""),
         req.vendor.orElse(""))
     }
+    val customResourceNames = execReq.map(_.id.resourceName).toSet
+    val customResources = ereqs.requests.filter(v => customResourceNames.contains(v._1))
+    defaultProfileExecutorResources =
+      Some(DefaultProfileExecutorResources(cores, memory, offheapMem, pysparkMem,
+        overheadMem, customResources))
     ereqs.requests
   }
 
@@ -320,6 +338,7 @@ object ResourceProfile extends Logging {
   private[spark] def clearDefaultProfile(): Unit = {
     DEFAULT_PROFILE_LOCK.synchronized {
       defaultProfile = None
+      defaultProfileExecutorResources = None
     }
   }
 
@@ -342,6 +361,100 @@ object ResourceProfile extends Logging {
     rp.getTaskCpus.getOrElse(conf.get(CPUS_PER_TASK))
   }
 
+  /**
+   * Get offHeap memory size from [[ExecutorResourceRequest]]
+   * return 0 if MEMORY_OFFHEAP_ENABLED is false.
+   */
+  private[spark] def executorOffHeapMemorySizeAsMb(sparkConf: SparkConf,
+      execRequest: ExecutorResourceRequest): Long = {
+    Utils.checkOffHeapEnabled(sparkConf, execRequest.amount)
+  }
+
+  private[spark] case class ExecutorResourcesOrDefaults(
+      cores: Int,
+      executorMemoryMiB: Long,
+      memoryOffHeapMiB: Long,
+      pysparkMemoryMiB: Long,
+      memoryOverheadMiB: Long,
+      totalMemMiB: Long,
+      customResources: Map[String, ExecutorResourceRequest])
+
+  private[spark] case class DefaultProfileExecutorResources(
+      cores: Int,
+      executorMemoryMiB: Long,
+      memoryOffHeapMiB: Long,
+      pysparkMemoryMiB: Option[Long],
+      memoryOverheadMiB: Option[Long],
+      customResources: Map[String, ExecutorResourceRequest])
+
+  private[spark] def calculateOverHeadMemory(
+      overHeadMemFromConf: Option[Long],
+      executorMemoryMiB: Long,
+      overheadFactor: Double): Long = {
+    overHeadMemFromConf.getOrElse(math.max((overheadFactor * executorMemoryMiB).toInt,
+        ResourceProfile.MEMORY_OVERHEAD_MIN_MIB))
+  }
+
+  /**
+   * Gets the full list of resources to allow a cluster manager to request the appropriate
+   * container. If the resource profile is not the default one we either get the resources
+   * specified in the profile or fall back to the default profile resource size for everything
+   * except for custom resources.
+   */
+  private[spark] def getResourcesForClusterManager(
+      rpId: Int,
+      execResources: Map[String, ExecutorResourceRequest],
+      overheadFactor: Double,
+      conf: SparkConf,
+      isPythonApp: Boolean,
+      resourceMappings: Map[String, String]): ExecutorResourcesOrDefaults = {
+    val defaultResources = getDefaultProfileExecutorResources(conf)
+    // set all the default values, which may change for custom ResourceProfiles
+    var cores = defaultResources.cores
+    var executorMemoryMiB = defaultResources.executorMemoryMiB
+    var memoryOffHeapMiB = defaultResources.memoryOffHeapMiB
+    var pysparkMemoryMiB = defaultResources.pysparkMemoryMiB.getOrElse(0L)
+    var memoryOverheadMiB = calculateOverHeadMemory(defaultResources.memoryOverheadMiB,
+      executorMemoryMiB, overheadFactor)
+
+    val finalCustomResources = if (rpId != DEFAULT_RESOURCE_PROFILE_ID) {
+      val customResources = new mutable.HashMap[String, ExecutorResourceRequest]
+      execResources.foreach { case (r, execReq) =>
+        r match {
+          case ResourceProfile.MEMORY =>
+            executorMemoryMiB = execReq.amount
+          case ResourceProfile.OVERHEAD_MEM =>
+            memoryOverheadMiB = execReq.amount
+          case ResourceProfile.PYSPARK_MEM =>
+            pysparkMemoryMiB = execReq.amount
+          case ResourceProfile.OFFHEAP_MEM =>
+            memoryOffHeapMiB = executorOffHeapMemorySizeAsMb(conf, execReq)
+          case ResourceProfile.CORES =>
+            cores = execReq.amount.toInt
+          case rName =>
+            val nameToUse = resourceMappings.get(rName).getOrElse(rName)
+            customResources(nameToUse) = execReq
+        }
+      }
+      customResources.toMap
+    } else {
+      defaultResources.customResources.map { case (rName, execReq) =>
+        val nameToUse = resourceMappings.get(rName).getOrElse(rName)
+        (nameToUse, execReq)
+      }
+    }
+    // only add in pyspark memory if actually a python application
+    val pysparkMemToUseMiB = if (isPythonApp) {
+      pysparkMemoryMiB
+    } else {
+      0L
+    }
+    val totalMemMiB =
+      (executorMemoryMiB + memoryOverheadMiB + memoryOffHeapMiB + pysparkMemToUseMiB)
+    ExecutorResourcesOrDefaults(cores, executorMemoryMiB, memoryOffHeapMiB,
+      pysparkMemToUseMiB, memoryOverheadMiB, totalMemMiB, finalCustomResources)
+  }
+
   private[spark] val PYSPARK_MEMORY_LOCAL_PROPERTY = "resource.pyspark.memory"
   private[spark] val EXECUTOR_CORES_LOCAL_PROPERTY = "resource.executor.cores"
 }
diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceProfileManager.scala b/core/src/main/scala/org/apache/spark/resource/ResourceProfileManager.scala
index f365548c75359..d538f0bcc423e 100644
--- a/core/src/main/scala/org/apache/spark/resource/ResourceProfileManager.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceProfileManager.scala
@@ -52,18 +52,25 @@ private[spark] class ResourceProfileManager(sparkConf: SparkConf,
 
   private val dynamicEnabled = Utils.isDynamicAllocationEnabled(sparkConf)
   private val master = sparkConf.getOption("spark.master")
-  private val isNotYarn = master.isDefined && !master.get.equals("yarn")
-  private val errorForTesting = !isTesting || sparkConf.get(RESOURCE_PROFILE_MANAGER_TESTING)
+  private val isYarn = master.isDefined && master.get.equals("yarn")
+  private val isK8s = master.isDefined && master.get.startsWith("k8s://")
+  private val notRunningUnitTests = !isTesting
+  private val testExceptionThrown = sparkConf.get(RESOURCE_PROFILE_MANAGER_TESTING)
 
   // If we use anything except the default profile, its only supported on YARN right now.
   // Throw an exception if not supported.
   private[spark] def isSupported(rp: ResourceProfile): Boolean = {
     val isNotDefaultProfile = rp.id != ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
-    val notYarnAndNotDefaultProfile = isNotDefaultProfile && isNotYarn
-    val YarnNotDynAllocAndNotDefaultProfile = isNotDefaultProfile && !isNotYarn && !dynamicEnabled
-    if (errorForTesting && (notYarnAndNotDefaultProfile || YarnNotDynAllocAndNotDefaultProfile)) {
-      throw new SparkException("ResourceProfiles are only supported on YARN with dynamic " +
-        "allocation enabled.")
+    val notYarnOrK8sAndNotDefaultProfile = isNotDefaultProfile && !(isYarn || isK8s)
+    val YarnOrK8sNotDynAllocAndNotDefaultProfile =
+      isNotDefaultProfile && (isYarn || isK8s) && !dynamicEnabled
+    // We want the exception to be thrown only when we are specifically testing for the
+    // exception or in a real application. Otherwise in all other testing scenarios we want
+    // to skip throwing the exception so that we can test in other modes to make testing easier.
+    if ((notRunningUnitTests || testExceptionThrown) &&
+        (notYarnOrK8sAndNotDefaultProfile || YarnOrK8sNotDynAllocAndNotDefaultProfile)) {
+      throw new SparkException("ResourceProfiles are only supported on YARN and Kubernetes " +
+        "with dynamic allocation enabled.")
     }
     true
   }
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index b8b044bbad30e..7f1f3a71acab8 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2971,6 +2971,27 @@ private[spark] object Utils extends Logging {
     metadata.append("]")
     metadata.toString
   }
+
+  /**
+   * Convert MEMORY_OFFHEAP_SIZE to MB Unit, return 0 if MEMORY_OFFHEAP_ENABLED is false.
+   */
+  def executorOffHeapMemorySizeAsMb(sparkConf: SparkConf): Int = {
+    val sizeInMB = Utils.memoryStringToMb(sparkConf.get(MEMORY_OFFHEAP_SIZE).toString)
+    checkOffHeapEnabled(sparkConf, sizeInMB).toInt
+  }
+
+  /**
+   * return 0 if MEMORY_OFFHEAP_ENABLED is false.
+   */
+  def checkOffHeapEnabled(sparkConf: SparkConf, offHeapSize: Long): Long = {
+    if (sparkConf.get(MEMORY_OFFHEAP_ENABLED)) {
+      require(offHeapSize > 0,
+        s"${MEMORY_OFFHEAP_SIZE.key} must be > 0 when ${MEMORY_OFFHEAP_ENABLED.key} == true")
+      offHeapSize
+    } else {
+      0
+    }
+  }
 }
 
 private[util] object CallerContext extends Logging {
diff --git a/core/src/test/scala/org/apache/spark/resource/ResourceProfileManagerSuite.scala b/core/src/test/scala/org/apache/spark/resource/ResourceProfileManagerSuite.scala
index ddfe80ee81e6f..36a5620729912 100644
--- a/core/src/test/scala/org/apache/spark/resource/ResourceProfileManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/resource/ResourceProfileManagerSuite.scala
@@ -47,8 +47,8 @@ class ResourceProfileManagerSuite extends SparkFunSuite {
     val rpmanager = new ResourceProfileManager(conf, listenerBus)
     val defaultProf = rpmanager.defaultResourceProfile
     assert(defaultProf.id === ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
-    assert(defaultProf.executorResources.size === 2,
-      "Executor resources should contain cores and memory by default")
+    assert(defaultProf.executorResources.size === 3,
+      "Executor resources should contain cores, heap and offheap memory by default")
     assert(defaultProf.executorResources(ResourceProfile.CORES).amount === 4,
       s"Executor resources should have 4 cores")
   }
@@ -67,7 +67,8 @@ class ResourceProfileManagerSuite extends SparkFunSuite {
       rpmanager.isSupported(immrprof)
     }.getMessage()
 
-    assert(error.contains("ResourceProfiles are only supported on YARN with dynamic allocation"))
+    assert(error.contains(
+      "ResourceProfiles are only supported on YARN and Kubernetes with dynamic allocation"))
   }
 
   test("isSupported yarn with dynamic allocation") {
@@ -84,7 +85,22 @@ class ResourceProfileManagerSuite extends SparkFunSuite {
     assert(rpmanager.isSupported(immrprof) == true)
   }
 
-  test("isSupported yarn with local mode") {
+  test("isSupported k8s with dynamic allocation") {
+    val conf = new SparkConf().setMaster("k8s://foo").set(EXECUTOR_CORES, 4)
+    conf.set(DYN_ALLOCATION_ENABLED, true)
+    conf.set(DYN_ALLOCATION_SHUFFLE_TRACKING_ENABLED, true)
+    conf.set(RESOURCE_PROFILE_MANAGER_TESTING.key, "true")
+    val rpmanager = new ResourceProfileManager(conf, listenerBus)
+    // default profile should always work
+    val defaultProf = rpmanager.defaultResourceProfile
+    val rprof = new ResourceProfileBuilder()
+    val gpuExecReq =
+      new ExecutorResourceRequests().resource("gpu", 2, "someScript", "nvidia")
+    val immrprof = rprof.require(gpuExecReq).build
+    assert(rpmanager.isSupported(immrprof) == true)
+  }
+
+  test("isSupported with local mode") {
     val conf = new SparkConf().setMaster("local").set(EXECUTOR_CORES, 4)
     conf.set(RESOURCE_PROFILE_MANAGER_TESTING.key, "true")
     val rpmanager = new ResourceProfileManager(conf, listenerBus)
@@ -98,7 +114,8 @@ class ResourceProfileManagerSuite extends SparkFunSuite {
       rpmanager.isSupported(immrprof)
     }.getMessage()
 
-    assert(error.contains("ResourceProfiles are only supported on YARN with dynamic allocation"))
+    assert(error.contains(
+      "ResourceProfiles are only supported on YARN and Kubernetes with dynamic allocation"))
   }
 
   test("ResourceProfileManager has equivalent profile") {
diff --git a/core/src/test/scala/org/apache/spark/resource/ResourceProfileSuite.scala b/core/src/test/scala/org/apache/spark/resource/ResourceProfileSuite.scala
index f8c4a3a68f367..27cc44a099de1 100644
--- a/core/src/test/scala/org/apache/spark/resource/ResourceProfileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/resource/ResourceProfileSuite.scala
@@ -43,8 +43,8 @@ class ResourceProfileSuite extends SparkFunSuite {
   test("Default ResourceProfile") {
     val rprof = ResourceProfile.getOrCreateDefaultProfile(new SparkConf)
     assert(rprof.id === ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
-    assert(rprof.executorResources.size === 2,
-      "Executor resources should contain cores and memory by default")
+    assert(rprof.executorResources.size === 3,
+      "Executor resources should contain cores, heap and offheap memory by default")
     assert(rprof.executorResources(ResourceProfile.CORES).amount === 1,
       "Executor resources should have 1 core")
     assert(rprof.getExecutorCores.get === 1,
@@ -55,8 +55,8 @@ class ResourceProfileSuite extends SparkFunSuite {
       "pyspark memory empty if not specified")
     assert(rprof.executorResources.get(ResourceProfile.OVERHEAD_MEM) == None,
       "overhead memory empty if not specified")
-    assert(rprof.executorResources.get(ResourceProfile.OFFHEAP_MEM) == None,
-      "offHeap memory empty if not specified")
+    assert(rprof.executorResources(ResourceProfile.OFFHEAP_MEM).amount === 0,
+      "Executor resources should have 0 offheap memory")
     assert(rprof.taskResources.size === 1,
       "Task resources should just contain cpus by default")
     assert(rprof.taskResources(ResourceProfile.CPUS).amount === 1,
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 7ec7c5afca1df..857749e84764d 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -1406,6 +1406,33 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
     assert(hostnamePort._1.equals("localhost"))
     assert(hostnamePort._2 === 0)
   }
+
+  test("executorOffHeapMemorySizeAsMb when MEMORY_OFFHEAP_ENABLED is false") {
+    val executorOffHeapMemory = Utils.executorOffHeapMemorySizeAsMb(new SparkConf())
+    assert(executorOffHeapMemory == 0)
+  }
+
+  test("executorOffHeapMemorySizeAsMb when MEMORY_OFFHEAP_ENABLED is true") {
+    val offHeapMemoryInMB = 50
+    val offHeapMemory: Long = offHeapMemoryInMB * 1024 * 1024
+    val sparkConf = new SparkConf()
+      .set(MEMORY_OFFHEAP_ENABLED, true)
+      .set(MEMORY_OFFHEAP_SIZE, offHeapMemory)
+    val executorOffHeapMemory = Utils.executorOffHeapMemorySizeAsMb(sparkConf)
+    assert(executorOffHeapMemory == offHeapMemoryInMB)
+  }
+
+  test("executorMemoryOverhead when MEMORY_OFFHEAP_ENABLED is true, " +
+    "but MEMORY_OFFHEAP_SIZE not config scene") {
+    val sparkConf = new SparkConf()
+      .set(MEMORY_OFFHEAP_ENABLED, true)
+    val expected =
+      s"${MEMORY_OFFHEAP_SIZE.key} must be > 0 when ${MEMORY_OFFHEAP_ENABLED.key} == true"
+    val message = intercept[IllegalArgumentException] {
+      Utils.executorOffHeapMemorySizeAsMb(sparkConf)
+    }.getMessage
+    assert(message.contains(expected))
+  }
 }
 
 private class SimpleExtension
diff --git a/docs/configuration.md b/docs/configuration.md
index d4738f1c363f0..14ff38dac9b13 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -3051,6 +3051,6 @@ See your cluster manager specific page for requirements and details on each of -
 # Stage Level Scheduling Overview
 
 The stage level scheduling feature allows users to specify task and executor resource requirements at the stage level. This allows for different stages to run with executors that have different resources. A prime example of this is one ETL stage runs with executors with just CPUs, the next stage is an ML stage that needs GPUs. Stage level scheduling allows for user to request different executors that have GPUs when the ML stage runs rather then having to acquire executors with GPUs at the start of the application and them be idle while the ETL stage is being run.
-This is only available for the RDD API in Scala, Java, and Python and requires dynamic allocation to be enabled.  It is only available on YARN at this time. See the [YARN](running-on-yarn.html#stage-level-scheduling-overview) page for more implementation details.
+This is only available for the RDD API in Scala, Java, and Python.  It is available on YARN and Kubernetes when dynamic allocation is enabled. See the [YARN](running-on-yarn.html#stage-level-scheduling-overview) page or [Kubernetes](running-on-kubernetes.html#stage-level-scheduling-overview) page for more implementation details.
 
 See the `RDD.withResources` and `ResourceProfileBuilder` API's for using this feature. The current implementation acquires new executors for each `ResourceProfile`  created and currently has to be an exact match. Spark does not try to fit tasks into an executor that require a different ResourceProfile than the executor was created with. Executors that are not in use will idle timeout with the dynamic allocation logic. The default configuration for this feature is to only allow one ResourceProfile per stage. If the user associates more then 1 ResourceProfile to an RDD, Spark will throw an exception by default. See config `spark.scheduler.resource.profileMergeConflicts` to control that behavior. The current merge strategy Spark implements when `spark.scheduler.resource.profileMergeConflicts` is enabled is a simple max of each resource within the conflicting ResourceProfiles. Spark will create a new ResourceProfile with the max of each of the resources.
diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index 4714e3517f16e..5ec7a2c6f0bf4 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -1399,3 +1399,7 @@ Spark automatically handles translating the Spark configs <code>spark.{driver/ex
 
 Kubernetes does not tell Spark the addresses of the resources allocated to each container. For that reason, the user must specify a discovery script that gets run by the executor on startup to discover what resources are available to that executor. You can find an example scripts in `examples/src/main/scripts/getGpusResources.sh`. The script must have execute permissions set and the user should setup permissions to not allow malicious users to modify it. The script should write to STDOUT a JSON string in the format of the ResourceInformation class. This has the resource name and an array of resource addresses available to just that executor.
 
+### Stage Level Scheduling Overview
+
+Stage level scheduling is supported on Kubernetes when dynamic allocation is enabled. This also requires <code>spark.dynamicAllocation.shuffleTracking.enabled</code> to be enabled since Kubernetes doesn't support an external shuffle service at this time. The order in which containers for different profiles is requested from Kubernetes is not guaranteed. Note that since dynamic allocation on Kubernetes requires the shuffle tracking feature, this means that executors from previous stages that used a different ResourceProfile may not idle timeout due to having shuffle data on them. This could result in using more cluster resources and in the worst case if there are no remaining resources on the Kubernetes cluster then Spark could potentially hang. You may consider looking at config <code>spark.dynamicAllocation.shuffleTracking.timeout</code> to set a timeout, but that could result in data having to be recomputed if the shuffle data is really needed.
+Note, there is a difference in the way pod template resources are handled between the base default profile and custom ResourceProfiles. Any resources specified in the pod template file will only be used with the base default profile. If you create custom ResourceProfiles be sure to include all necessary resources there since the resources from the template file will not be propogated to custom ResourceProfiles.
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index f19ce3de5e018..73c4930dadbd5 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -644,6 +644,7 @@ YARN does not tell Spark the addresses of the resources allocated to each contai
 # Stage Level Scheduling Overview
 
 Stage level scheduling is supported on YARN when dynamic allocation is enabled. One thing to note that is YARN specific is that each ResourceProfile requires a different container priority on YARN. The mapping is simply the ResourceProfile id becomes the priority, on YARN lower numbers are higher priority. This means that profiles created earlier will have a higher priority in YARN. Normally this won't matter as Spark finishes one stage before starting another one, the only case this might have an affect is in a job server type scenario, so its something to keep in mind.
+Note there is a difference in the way custom resources are handled between the base default profile and custom ResourceProfiles. To allow for the user to request YARN containers with extra resources without Spark scheduling on them, the user can specify resources via the <code>spark.yarn.executor.resource.</code> config. Those configs are only used in the base default profile though and do not get propogated into any other custom ResourceProfiles. This is because there would be no way to remove them if you wanted a stage to not have them. This results in your default profile getting custom resources defined in <code>spark.yarn.executor.resource.</code> plus spark defined resources of GPU or FPGA. Spark converts GPU and FPGA resources into the YARN built in types <code>yarn.io/gpu</code>) and <code>yarn.io/fpga</code>, but does not know the mapping of any other resources. Any other Spark custom resources are not propogated to YARN for the default profile. So if you want Spark to schedule based off a custom resource and have it requested from YARN, you must specify it in both YARN (<code>spark.yarn.{driver/executor}.resource.</code>) and Spark (<code>spark.{driver/executor}.resource.</code>) configs. Leave the Spark config off if you only want YARN containers with the extra resources but Spark not to schedule using them. Now for custom ResourceProfiles, it doesn't currently have a way to only specify YARN resources without Spark scheduling off of them. This means for custom ResourceProfiles we propogate all the resources defined in the ResourceProfile to YARN. We still convert GPU and FPGA to the YARN build in types as well. This requires that the name of any custom resources you specify match what they are defined as in YARN.
 
 # Important notes
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala
index 991205a47f846..7d9e4949a8dbb 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala
@@ -21,6 +21,7 @@ private[spark] object Constants {
   // Labels
   val SPARK_APP_ID_LABEL = "spark-app-selector"
   val SPARK_EXECUTOR_ID_LABEL = "spark-exec-id"
+  val SPARK_RESOURCE_PROFILE_ID_LABEL = "spark-exec-resourceprofile-id"
   val SPARK_ROLE_LABEL = "spark-role"
   val SPARK_POD_DRIVER_ROLE = "driver"
   val SPARK_POD_EXECUTOR_ROLE = "executor"
@@ -63,6 +64,7 @@ private[spark] object Constants {
   val ENV_DRIVER_BIND_ADDRESS = "SPARK_DRIVER_BIND_ADDRESS"
   val ENV_SPARK_CONF_DIR = "SPARK_CONF_DIR"
   val ENV_SPARK_USER = "SPARK_USER"
+  val ENV_RESOURCE_PROFILE_ID = "SPARK_RESOURCE_PROFILE_ID"
   // Spark app configs for containers
   val SPARK_CONF_VOLUME = "spark-conf-volume"
   val SPARK_CONF_DIR_INTERNAL = "/opt/spark/conf"
@@ -84,7 +86,6 @@ private[spark] object Constants {
   val KUBERNETES_MASTER_INTERNAL_URL = "https://kubernetes.default.svc"
   val DEFAULT_DRIVER_CONTAINER_NAME = "spark-kubernetes-driver"
   val DEFAULT_EXECUTOR_CONTAINER_NAME = "spark-kubernetes-executor"
-  val MEMORY_OVERHEAD_MIN_MIB = 384L
   val NON_JVM_MEMORY_OVERHEAD_FACTOR = 0.4d
 
   // Hadoop Configuration
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
index f3e492e8efad9..087eeee277d1e 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
@@ -26,6 +26,7 @@ import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.submit._
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.ConfigEntry
+import org.apache.spark.resource.ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
 import org.apache.spark.util.Utils
 
 /**
@@ -132,7 +133,8 @@ private[spark] class KubernetesExecutorConf(
     sparkConf: SparkConf,
     val appId: String,
     val executorId: String,
-    val driverPod: Option[Pod])
+    val driverPod: Option[Pod],
+    val resourceProfileId: Int = DEFAULT_RESOURCE_PROFILE_ID)
   extends KubernetesConf(sparkConf) with Logging {
 
   override val resourceNamePrefix: String = {
@@ -144,7 +146,8 @@ private[spark] class KubernetesExecutorConf(
     val presetLabels = Map(
       SPARK_EXECUTOR_ID_LABEL -> executorId,
       SPARK_APP_ID_LABEL -> appId,
-      SPARK_ROLE_LABEL -> SPARK_POD_EXECUTOR_ROLE)
+      SPARK_ROLE_LABEL -> SPARK_POD_EXECUTOR_ROLE,
+      SPARK_RESOURCE_PROFILE_ID_LABEL -> resourceProfileId.toString)
 
     val executorCustomLabels = KubernetesUtils.parsePrefixedKeyValuePairs(
       sparkConf, KUBERNETES_EXECUTOR_LABEL_PREFIX)
@@ -217,8 +220,9 @@ private[spark] object KubernetesConf {
       sparkConf: SparkConf,
       executorId: String,
       appId: String,
-      driverPod: Option[Pod]): KubernetesExecutorConf = {
-    new KubernetesExecutorConf(sparkConf.clone(), appId, executorId, driverPod)
+      driverPod: Option[Pod],
+      resourceProfileId: Int = DEFAULT_RESOURCE_PROFILE_ID): KubernetesExecutorConf = {
+    new KubernetesExecutorConf(sparkConf.clone(), appId, executorId, driverPod, resourceProfileId)
   }
 
   def getResourceNamePrefix(appName: String): String = {
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
index 63f18129328b0..6503bc823ec0d 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
@@ -27,6 +27,7 @@ import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.submit._
 import org.apache.spark.internal.config._
+import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.ui.SparkUI
 import org.apache.spark.util.Utils
 
@@ -66,7 +67,8 @@ private[spark] class BasicDriverFeatureStep(conf: KubernetesDriverConf)
 
   private val memoryOverheadMiB = conf
     .get(DRIVER_MEMORY_OVERHEAD)
-    .getOrElse(math.max((overheadFactor * driverMemoryMiB).toInt, MEMORY_OVERHEAD_MIN_MIB))
+    .getOrElse(math.max((overheadFactor * driverMemoryMiB).toInt,
+      ResourceProfile.MEMORY_OVERHEAD_MIN_MIB))
   private val driverMemoryWithOverheadMiB = driverMemoryMiB + memoryOverheadMiB
 
   override def configurePod(pod: SparkPod): SparkPod = {
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
index 82e00d5b013b6..8c75162da59fb 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
@@ -26,14 +26,15 @@ import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
-import org.apache.spark.internal.config.Python._
+import org.apache.spark.resource.{ExecutorResourceRequest, ResourceProfile}
 import org.apache.spark.rpc.RpcEndpointAddress
 import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
 import org.apache.spark.util.Utils
 
 private[spark] class BasicExecutorFeatureStep(
     kubernetesConf: KubernetesExecutorConf,
-    secMgr: SecurityManager)
+    secMgr: SecurityManager,
+    resourceProfile: ResourceProfile)
   extends KubernetesFeatureConfigStep with Logging {
 
   // Consider moving some of these fields to KubernetesConf or KubernetesExecutorSpecificConf
@@ -50,33 +51,43 @@ private[spark] class BasicExecutorFeatureStep(
     kubernetesConf.get(DRIVER_HOST_ADDRESS),
     kubernetesConf.sparkConf.getInt(DRIVER_PORT.key, DEFAULT_DRIVER_PORT),
     CoarseGrainedSchedulerBackend.ENDPOINT_NAME).toString
-  private val executorMemoryMiB = kubernetesConf.get(EXECUTOR_MEMORY)
-  private val executorMemoryString = kubernetesConf.get(
-    EXECUTOR_MEMORY.key, EXECUTOR_MEMORY.defaultValueString)
-
-  private val memoryOverheadMiB = kubernetesConf
-    .get(EXECUTOR_MEMORY_OVERHEAD)
-    .getOrElse(math.max(
-      (kubernetesConf.get(MEMORY_OVERHEAD_FACTOR) * executorMemoryMiB).toInt,
-      MEMORY_OVERHEAD_MIN_MIB))
-  private val executorMemoryWithOverhead = executorMemoryMiB + memoryOverheadMiB
-  private val executorMemoryTotal =
-    if (kubernetesConf.get(APP_RESOURCE_TYPE) == Some(APP_RESOURCE_TYPE_PYTHON)) {
-      executorMemoryWithOverhead +
-        kubernetesConf.get(PYSPARK_EXECUTOR_MEMORY).map(_.toInt).getOrElse(0)
-    } else {
-      executorMemoryWithOverhead
-    }
 
-  private val executorCores = kubernetesConf.sparkConf.get(EXECUTOR_CORES)
+  private val isDefaultProfile = resourceProfile.id == ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
+  private val isPythonApp = kubernetesConf.get(APP_RESOURCE_TYPE) == Some(APP_RESOURCE_TYPE_PYTHON)
+
+  val execResources = ResourceProfile.getResourcesForClusterManager(
+    resourceProfile.id,
+    resourceProfile.executorResources,
+    kubernetesConf.get(MEMORY_OVERHEAD_FACTOR),
+    kubernetesConf.sparkConf,
+    isPythonApp,
+    Map.empty)
+
+  private val executorMemoryString = s"${execResources.executorMemoryMiB}m"
+  // we don't include any kubernetes conf specific requests or limits when using custom
+  // ResourceProfiles because we don't have a way of overriding them if needed
   private val executorCoresRequest =
-    if (kubernetesConf.sparkConf.contains(KUBERNETES_EXECUTOR_REQUEST_CORES)) {
+    if (isDefaultProfile && kubernetesConf.sparkConf.contains(KUBERNETES_EXECUTOR_REQUEST_CORES)) {
       kubernetesConf.get(KUBERNETES_EXECUTOR_REQUEST_CORES).get
     } else {
-      executorCores.toString
+      execResources.cores.toString
     }
   private val executorLimitCores = kubernetesConf.get(KUBERNETES_EXECUTOR_LIMIT_CORES)
 
+  private def buildExecutorResourcesQuantities(
+      customResources: Set[ExecutorResourceRequest]): Map[String, Quantity] = {
+    customResources.map { request =>
+      val vendorDomain = if (request.vendor.nonEmpty) {
+        request.vendor
+      } else {
+        throw new SparkException(s"Resource: ${request.resourceName} was requested, " +
+          "but vendor was not specified.")
+      }
+      val quantity = new Quantity(request.amount.toString)
+      (KubernetesConf.buildKubernetesResourceName(vendorDomain, request.resourceName), quantity)
+    }.toMap
+  }
+
   override def configurePod(pod: SparkPod): SparkPod = {
     val name = s"$executorPodNamePrefix-exec-${kubernetesConf.executorId}"
 
@@ -89,22 +100,21 @@ private[spark] class BasicExecutorFeatureStep(
       // Replace dangerous characters in the remaining string with a safe alternative.
       .replaceAll("[^\\w-]+", "_")
 
-    val executorMemoryQuantity = new Quantity(s"${executorMemoryTotal}Mi")
+    val executorMemoryQuantity = new Quantity(s"${execResources.totalMemMiB}Mi")
     val executorCpuQuantity = new Quantity(executorCoresRequest)
-
     val executorResourceQuantities =
-      KubernetesUtils.buildResourcesQuantities(SPARK_EXECUTOR_PREFIX,
-        kubernetesConf.sparkConf)
+      buildExecutorResourcesQuantities(execResources.customResources.values.toSet)
 
     val executorEnv: Seq[EnvVar] = {
         (Seq(
           (ENV_DRIVER_URL, driverUrl),
-          (ENV_EXECUTOR_CORES, executorCores.toString),
+          (ENV_EXECUTOR_CORES, execResources.cores.toString),
           (ENV_EXECUTOR_MEMORY, executorMemoryString),
           (ENV_APPLICATION_ID, kubernetesConf.appId),
           // This is to set the SPARK_CONF_DIR to be /opt/spark/conf
           (ENV_SPARK_CONF_DIR, SPARK_CONF_DIR_INTERNAL),
-          (ENV_EXECUTOR_ID, kubernetesConf.executorId)
+          (ENV_EXECUTOR_ID, kubernetesConf.executorId),
+          (ENV_RESOURCE_PROFILE_ID, resourceProfile.id.toString)
         ) ++ kubernetesConf.environment).map { case (k, v) =>
           new EnvVarBuilder()
             .withName(k)
@@ -166,6 +176,13 @@ private[spark] class BasicExecutorFeatureStep(
           .build()
       }
 
+    if (!isDefaultProfile) {
+      if (pod.container != null && pod.container.getResources() != null) {
+        logDebug("NOT using the default profile and removing template resources")
+        pod.container.setResources(new ResourceRequirements())
+      }
+    }
+
     val executorContainer = new ContainerBuilder(pod.container)
       .withName(Option(pod.container.getName).getOrElse(DEFAULT_EXECUTOR_CONTAINER_NAME))
       .withImage(executorContainerImage)
@@ -184,14 +201,18 @@ private[spark] class BasicExecutorFeatureStep(
       .withPorts(requiredPorts.asJava)
       .addToArgs("executor")
       .build()
-    val containerWithLimitCores = executorLimitCores.map { limitCores =>
-      val executorCpuLimitQuantity = new Quantity(limitCores)
-      new ContainerBuilder(executorContainer)
-        .editResources()
+    val containerWithLimitCores = if (isDefaultProfile) {
+      executorLimitCores.map { limitCores =>
+        val executorCpuLimitQuantity = new Quantity(limitCores)
+        new ContainerBuilder(executorContainer)
+          .editResources()
           .addToLimits("cpu", executorCpuLimitQuantity)
           .endResources()
-        .build()
-    }.getOrElse(executorContainer)
+          .build()
+      }.getOrElse(executorContainer)
+    } else {
+      executorContainer
+    }
     val containerWithLifecycle =
       if (!kubernetesConf.workerDecommissioning) {
         logInfo("Decommissioning not enabled, skipping shutdown script")
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
index 4e8ca47b8dd02..c029b248f7ea4 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
@@ -17,13 +17,14 @@
 package org.apache.spark.scheduler.cluster.k8s
 
 import java.time.Instant
-import java.time.format.DateTimeParseException
-import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger, AtomicLong}
+import java.util.concurrent.ConcurrentHashMap
+import java.util.concurrent.atomic.{AtomicBoolean, AtomicLong}
 
+import scala.collection.JavaConverters._
 import scala.collection.mutable
 import scala.util.control.NonFatal
 
-import io.fabric8.kubernetes.api.model.{HasMetadata, PersistentVolumeClaim, PodBuilder}
+import io.fabric8.kubernetes.api.model.{PersistentVolumeClaim, PodBuilder}
 import io.fabric8.kubernetes.client.KubernetesClient
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
@@ -33,6 +34,8 @@ import org.apache.spark.deploy.k8s.KubernetesConf
 import org.apache.spark.deploy.k8s.KubernetesUtils.addOwnerReference
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT
+import org.apache.spark.resource.ResourceProfile
+import org.apache.spark.scheduler.cluster.SchedulerBackendUtils
 import org.apache.spark.util.{Clock, Utils}
 
 private[spark] class ExecutorPodsAllocator(
@@ -45,7 +48,11 @@ private[spark] class ExecutorPodsAllocator(
 
   private val EXECUTOR_ID_COUNTER = new AtomicLong(0L)
 
-  private val totalExpectedExecutors = new AtomicInteger(0)
+  // ResourceProfile id -> total expected executors per profile, currently we don't remove
+  // any resource profiles - https://issues.apache.org/jira/browse/SPARK-30749
+  private val totalExpectedExecutorsPerResourceProfileId = new ConcurrentHashMap[Int, Int]()
+
+  private val rpIdToResourceProfile = new mutable.HashMap[Int, ResourceProfile]
 
   private val podAllocationSize = conf.get(KUBERNETES_ALLOCATION_BATCH_SIZE)
 
@@ -73,8 +80,8 @@ private[spark] class ExecutorPodsAllocator(
           s"namespace $namespace (this was supposed to be the driver pod.).")))
 
   // Executor IDs that have been requested from Kubernetes but have not been detected in any
-  // snapshot yet. Mapped to the timestamp when they were created.
-  private val newlyCreatedExecutors = mutable.LinkedHashMap.empty[Long, Long]
+  // snapshot yet. Mapped to the (ResourceProfile id, timestamp) when they were created.
+  private val newlyCreatedExecutors = mutable.LinkedHashMap.empty[Long, (Int, Long)]
 
   private val dynamicAllocationEnabled = Utils.isDynamicAllocationEnabled(conf)
 
@@ -93,9 +100,12 @@ private[spark] class ExecutorPodsAllocator(
     }
   }
 
-  def setTotalExpectedExecutors(total: Int): Unit = {
-    logDebug(s"Set totalExpectedExecutors to $total")
-    totalExpectedExecutors.set(total)
+  def setTotalExpectedExecutors(resourceProfileToTotalExecs: Map[ResourceProfile, Int]): Unit = {
+    resourceProfileToTotalExecs.foreach { case (rp, numExecs) =>
+      rpIdToResourceProfile.getOrElseUpdate(rp.id, rp)
+      totalExpectedExecutorsPerResourceProfileId.put(rp.id, numExecs)
+    }
+    logDebug(s"Set total expected execs to $totalExpectedExecutorsPerResourceProfileId")
     if (!hasPendingPods.get()) {
       snapshotsStore.notifySubscribers()
     }
@@ -114,7 +124,7 @@ private[spark] class ExecutorPodsAllocator(
     // both the creation and deletion events. In either case, delete the missing pod
     // if possible, and mark such a pod to be rescheduled below.
     val currentTime = clock.getTimeMillis()
-    val timedOut = newlyCreatedExecutors.flatMap { case (execId, timeCreated) =>
+    val timedOut = newlyCreatedExecutors.flatMap { case (execId, (_, timeCreated)) =>
       if (currentTime - timeCreated > podCreationTimeout) {
         Some(execId)
       } else {
@@ -147,136 +157,171 @@ private[spark] class ExecutorPodsAllocator(
       lastSnapshot = snapshots.last
     }
 
-    val currentRunningCount = lastSnapshot.executorPods.values.count {
-      case PodRunning(_) => true
-      case _ => false
-    }
-
-    val currentPendingExecutors = lastSnapshot.executorPods
-      .filter {
-        case (_, PodPending(_)) => true
-        case _ => false
-      }
-
     // Make a local, non-volatile copy of the reference since it's used multiple times. This
     // is the only method that modifies the list, so this is safe.
     var _deletedExecutorIds = deletedExecutorIds
-
     if (snapshots.nonEmpty) {
-      logDebug(s"Pod allocation status: $currentRunningCount running, " +
-        s"${currentPendingExecutors.size} pending, " +
-        s"${newlyCreatedExecutors.size} unacknowledged.")
-
       val existingExecs = lastSnapshot.executorPods.keySet
       _deletedExecutorIds = _deletedExecutorIds.filter(existingExecs.contains)
     }
 
-    val currentTotalExpectedExecutors = totalExpectedExecutors.get
-
-    // This variable is used later to print some debug logs. It's updated when cleaning up
-    // excess pod requests, since currentPendingExecutors is immutable.
-    var knownPendingCount = currentPendingExecutors.size
-
-    // It's possible that we have outstanding pods that are outdated when dynamic allocation
-    // decides to downscale the application. So check if we can release any pending pods early
-    // instead of waiting for them to time out. Drop them first from the unacknowledged list,
-    // then from the pending. However, in order to prevent too frequent frunctuation, newly
-    // requested pods are protected during executorIdleTimeout period.
-    //
-    // TODO: with dynamic allocation off, handle edge cases if we end up with more running
-    // executors than expected.
-    val knownPodCount = currentRunningCount + currentPendingExecutors.size +
-      newlyCreatedExecutors.size
-    if (knownPodCount > currentTotalExpectedExecutors) {
-      val excess = knownPodCount - currentTotalExpectedExecutors
-      val knownPendingToDelete = currentPendingExecutors
-        .filter(x => isExecutorIdleTimedOut(x._2, currentTime))
-        .map { case (id, _) => id }
-        .take(excess - newlyCreatedExecutors.size)
-      val toDelete = newlyCreatedExecutors
-        .filter(x => currentTime - x._2 > executorIdleTimeout)
-        .keys.take(excess).toList ++ knownPendingToDelete
-
-      if (toDelete.nonEmpty) {
-        logInfo(s"Deleting ${toDelete.size} excess pod requests (${toDelete.mkString(",")}).")
-        _deletedExecutorIds = _deletedExecutorIds ++ toDelete
+    // Map the pods into per ResourceProfile id so we can check per ResourceProfile,
+    // add a fast path if not using other ResourceProfiles.
+    val rpIdToExecsAndPodState =
+      mutable.HashMap[Int, mutable.HashMap[Long, ExecutorPodState]]()
+    if (totalExpectedExecutorsPerResourceProfileId.size <= 1) {
+      rpIdToExecsAndPodState(ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID) =
+        mutable.HashMap.empty ++= lastSnapshot.executorPods
+    } else {
+      lastSnapshot.executorPods.foreach { case (execId, execPodState) =>
+        val rpId = execPodState.pod.getMetadata.getLabels.get(SPARK_RESOURCE_PROFILE_ID_LABEL).toInt
+        val execPods = rpIdToExecsAndPodState.getOrElseUpdate(rpId,
+          mutable.HashMap[Long, ExecutorPodState]())
+        execPods(execId) = execPodState
+      }
+    }
 
-        Utils.tryLogNonFatalError {
-          kubernetesClient
-            .pods()
-            .withField("status.phase", "Pending")
-            .withLabel(SPARK_APP_ID_LABEL, applicationId)
-            .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)
-            .withLabelIn(SPARK_EXECUTOR_ID_LABEL, toDelete.sorted.map(_.toString): _*)
-            .delete()
-          newlyCreatedExecutors --= toDelete
-          knownPendingCount -= knownPendingToDelete.size
+    var totalPendingCount = 0
+    // The order we request executors for each ResourceProfile is not guaranteed.
+    totalExpectedExecutorsPerResourceProfileId.asScala.foreach { case (rpId, targetNum) =>
+      val podsForRpId = rpIdToExecsAndPodState.getOrElse(rpId, mutable.HashMap.empty)
+
+      val currentRunningCount = podsForRpId.values.count {
+        case PodRunning(_) => true
+        case _ => false
+      }
+
+      val currentPendingExecutors = podsForRpId.filter {
+        case (_, PodPending(_)) => true
+        case _ => false
+      }
+      // This variable is used later to print some debug logs. It's updated when cleaning up
+      // excess pod requests, since currentPendingExecutors is immutable.
+      var knownPendingCount = currentPendingExecutors.size
+
+      val newlyCreatedExecutorsForRpId =
+        newlyCreatedExecutors.filter { case (_, (waitingRpId, _)) =>
+          rpId == waitingRpId
         }
+
+      if (podsForRpId.nonEmpty) {
+        logDebug(s"ResourceProfile Id: $rpId " +
+          s"pod allocation status: $currentRunningCount running, " +
+          s"${currentPendingExecutors.size} pending. " +
+          s"${newlyCreatedExecutorsForRpId.size} unacknowledged.")
       }
-    }
 
-    if (newlyCreatedExecutors.isEmpty
-        && knownPodCount < currentTotalExpectedExecutors) {
-      val numExecutorsToAllocate = math.min(
-        currentTotalExpectedExecutors - knownPodCount, podAllocationSize)
-      logInfo(s"Going to request $numExecutorsToAllocate executors from Kubernetes.")
-      for ( _ <- 0 until numExecutorsToAllocate) {
-        val newExecutorId = EXECUTOR_ID_COUNTER.incrementAndGet()
-        val executorConf = KubernetesConf.createExecutorConf(
-          conf,
-          newExecutorId.toString,
-          applicationId,
-          driverPod)
-        val resolvedExecutorSpec = executorBuilder.buildFromFeatures(executorConf, secMgr,
-          kubernetesClient)
-        val executorPod = resolvedExecutorSpec.pod
-        val podWithAttachedContainer = new PodBuilder(executorPod.pod)
-          .editOrNewSpec()
-          .addToContainers(executorPod.container)
-          .endSpec()
-          .build()
-        val createdExecutorPod = kubernetesClient.pods().create(podWithAttachedContainer)
-        try {
-          val resources = resolvedExecutorSpec.executorKubernetesResources
-          addOwnerReference(createdExecutorPod, resources)
-          resources
-            .filter(_.getKind == "PersistentVolumeClaim")
-            .foreach { resource =>
-              val pvc = resource.asInstanceOf[PersistentVolumeClaim]
-              logInfo(s"Trying to create PersistentVolumeClaim ${pvc.getMetadata.getName} with " +
-                s"StorageClass ${pvc.getSpec.getStorageClassName}")
-              kubernetesClient.persistentVolumeClaims().create(pvc)
-            }
-          newlyCreatedExecutors(newExecutorId) = clock.getTimeMillis()
-          logDebug(s"Requested executor with id $newExecutorId from Kubernetes.")
-        } catch {
-          case NonFatal(e) =>
-            kubernetesClient.pods().delete(createdExecutorPod)
-            throw e
+      // It's possible that we have outstanding pods that are outdated when dynamic allocation
+      // decides to downscale the application. So check if we can release any pending pods early
+      // instead of waiting for them to time out. Drop them first from the unacknowledged list,
+      // then from the pending. However, in order to prevent too frequent fluctuation, newly
+      // requested pods are protected during executorIdleTimeout period.
+      //
+      // TODO: with dynamic allocation off, handle edge cases if we end up with more running
+      // executors than expected.
+      val knownPodCount = currentRunningCount + currentPendingExecutors.size +
+        newlyCreatedExecutorsForRpId.size
+
+      if (knownPodCount > targetNum) {
+        val excess = knownPodCount - targetNum
+        val knownPendingToDelete = currentPendingExecutors
+          .filter(x => isExecutorIdleTimedOut(x._2, currentTime))
+          .map { case (id, _) => id }
+          .take(excess - newlyCreatedExecutorsForRpId.size)
+        val toDelete = newlyCreatedExecutorsForRpId
+          .filter { case (_, (_, createTime)) =>
+            currentTime - createTime > executorIdleTimeout
+          }.keys.take(excess).toList ++ knownPendingToDelete
+
+        if (toDelete.nonEmpty) {
+          logInfo(s"Deleting ${toDelete.size} excess pod requests (${toDelete.mkString(",")}).")
+          _deletedExecutorIds = _deletedExecutorIds ++ toDelete
+
+          Utils.tryLogNonFatalError {
+            kubernetesClient
+              .pods()
+              .withField("status.phase", "Pending")
+              .withLabel(SPARK_APP_ID_LABEL, applicationId)
+              .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)
+              .withLabelIn(SPARK_EXECUTOR_ID_LABEL, toDelete.sorted.map(_.toString): _*)
+              .delete()
+            newlyCreatedExecutors --= toDelete
+            knownPendingCount -= knownPendingToDelete.size
+          }
         }
       }
-    }
 
+      if (newlyCreatedExecutorsForRpId.isEmpty
+        && knownPodCount < targetNum) {
+        requestNewExecutors(targetNum, knownPodCount, applicationId, rpId)
+      }
+      totalPendingCount += knownPendingCount
+
+      // The code below just prints debug messages, which are only useful when there's a change
+      // in the snapshot state. Since the messages are a little spammy, avoid them when we know
+      // there are no useful updates.
+      if (log.isDebugEnabled && snapshots.nonEmpty) {
+        val outstanding = knownPendingCount + newlyCreatedExecutorsForRpId.size
+        if (currentRunningCount >= targetNum && !dynamicAllocationEnabled) {
+          logDebug(s"Current number of running executors for ResourceProfile Id $rpId is " +
+            "equal to the number of requested executors. Not scaling up further.")
+        } else {
+          if (outstanding > 0) {
+            logDebug(s"Still waiting for $outstanding executors for ResourceProfile " +
+              s"Id $rpId before requesting more.")
+          }
+        }
+      }
+    }
     deletedExecutorIds = _deletedExecutorIds
 
     // Update the flag that helps the setTotalExpectedExecutors() callback avoid triggering this
     // update method when not needed.
-    hasPendingPods.set(knownPendingCount + newlyCreatedExecutors.size > 0)
-
-    // The code below just prints debug messages, which are only useful when there's a change
-    // in the snapshot state. Since the messages are a little spammy, avoid them when we know
-    // there are no useful updates.
-    if (!log.isDebugEnabled || snapshots.isEmpty) {
-      return
-    }
+    hasPendingPods.set(totalPendingCount + newlyCreatedExecutors.size > 0)
+  }
 
-    if (currentRunningCount >= currentTotalExpectedExecutors && !dynamicAllocationEnabled) {
-      logDebug("Current number of running executors is equal to the number of requested" +
-        " executors. Not scaling up further.")
-    } else {
-      val outstanding = knownPendingCount + newlyCreatedExecutors.size
-      if (outstanding > 0) {
-        logDebug(s"Still waiting for $outstanding executors before requesting more.")
+  private def requestNewExecutors(
+      expected: Int,
+      running: Int,
+      applicationId: String,
+      resourceProfileId: Int): Unit = {
+    val numExecutorsToAllocate = math.min(expected - running, podAllocationSize)
+    logInfo(s"Going to request $numExecutorsToAllocate executors from Kubernetes for " +
+      s"ResourceProfile Id: $resourceProfileId, target: $expected running: $running.")
+    for ( _ <- 0 until numExecutorsToAllocate) {
+      val newExecutorId = EXECUTOR_ID_COUNTER.incrementAndGet()
+      val executorConf = KubernetesConf.createExecutorConf(
+        conf,
+        newExecutorId.toString,
+        applicationId,
+        driverPod,
+        resourceProfileId)
+      val resolvedExecutorSpec = executorBuilder.buildFromFeatures(executorConf, secMgr,
+        kubernetesClient, rpIdToResourceProfile(resourceProfileId))
+      val executorPod = resolvedExecutorSpec.pod
+      val podWithAttachedContainer = new PodBuilder(executorPod.pod)
+        .editOrNewSpec()
+        .addToContainers(executorPod.container)
+        .endSpec()
+        .build()
+      val createdExecutorPod = kubernetesClient.pods().create(podWithAttachedContainer)
+      try {
+        val resources = resolvedExecutorSpec.executorKubernetesResources
+        addOwnerReference(createdExecutorPod, resources)
+        resources
+          .filter(_.getKind == "PersistentVolumeClaim")
+          .foreach { resource =>
+            val pvc = resource.asInstanceOf[PersistentVolumeClaim]
+            logInfo(s"Trying to create PersistentVolumeClaim ${pvc.getMetadata.getName} with " +
+              s"StorageClass ${pvc.getSpec.getStorageClassName}")
+            kubernetesClient.persistentVolumeClaims().create(pvc)
+          }
+        newlyCreatedExecutors(newExecutorId) = (resourceProfileId, clock.getTimeMillis())
+        logDebug(s"Requested executor with id $newExecutorId from Kubernetes.")
+      } catch {
+        case NonFatal(e) =>
+          kubernetesClient.pods().delete(createdExecutorPod)
+          throw e
       }
     }
   }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
index 4ea22ebd93eef..7d1565d9a5846 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
@@ -78,7 +78,8 @@ private[spark] class KubernetesClusterSchedulerBackend(
 
   override def start(): Unit = {
     super.start()
-    podAllocator.setTotalExpectedExecutors(initialExecutors)
+    val initExecs = Map(defaultProfile -> initialExecutors)
+    podAllocator.setTotalExpectedExecutors(initExecs)
     lifecycleEventHandler.start(this)
     podAllocator.start(applicationId())
     watchEvents.start(applicationId())
@@ -121,7 +122,7 @@ private[spark] class KubernetesClusterSchedulerBackend(
 
   override def doRequestTotalExecutors(
       resourceProfileToTotalExecs: Map[ResourceProfile, Int]): Future[Boolean] = {
-    podAllocator.setTotalExpectedExecutors(resourceProfileToTotalExecs(defaultProfile))
+    podAllocator.setTotalExpectedExecutors(resourceProfileToTotalExecs)
     Future.successful(true)
   }
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
index b5f21fe69f52b..5388d185489f2 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
@@ -23,13 +23,15 @@ import io.fabric8.kubernetes.client.KubernetesClient
 import org.apache.spark.SecurityManager
 import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.features._
+import org.apache.spark.resource.ResourceProfile
 
 private[spark] class KubernetesExecutorBuilder {
 
   def buildFromFeatures(
       conf: KubernetesExecutorConf,
       secMgr: SecurityManager,
-      client: KubernetesClient): KubernetesExecutorSpec = {
+      client: KubernetesClient,
+      resourceProfile: ResourceProfile): KubernetesExecutorSpec = {
     val initialPod = conf.get(Config.KUBERNETES_EXECUTOR_PODTEMPLATE_FILE)
       .map { file =>
         KubernetesUtils.loadPodFromTemplate(
@@ -40,7 +42,7 @@ private[spark] class KubernetesExecutorBuilder {
       .getOrElse(SparkPod.initialPod())
 
     val features = Seq(
-      new BasicExecutorFeatureStep(conf, secMgr),
+      new BasicExecutorFeatureStep(conf, secMgr, resourceProfile),
       new ExecutorKubernetesCredentialsFeatureStep(conf),
       new MountSecretsFeatureStep(conf),
       new EnvSecretsFeatureStep(conf),
@@ -51,6 +53,8 @@ private[spark] class KubernetesExecutorBuilder {
       initialPod,
       executorKubernetesResources = Seq.empty)
 
+    // If using a template this will always get the resources from that and combine
+    // them with any Spark conf or ResourceProfile resources.
     features.foldLeft(spec) { case (spec, feature) =>
       val configuredPod = feature.configurePod(spec.pod)
       val addedResources = feature.getAdditionalKubernetesResources()
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
index 1ca4dbc8674fb..0b973227fe0fd 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
@@ -23,6 +23,7 @@ import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.submit._
+import org.apache.spark.resource.ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
 
 class KubernetesConfSuite extends SparkFunSuite {
 
@@ -96,6 +97,17 @@ class KubernetesConfSuite extends SparkFunSuite {
       Some(DRIVER_POD))
     assert(conf.executorId === EXECUTOR_ID)
     assert(conf.driverPod.get === DRIVER_POD)
+    assert(conf.resourceProfileId === DEFAULT_RESOURCE_PROFILE_ID)
+  }
+
+  test("resource profile not default.") {
+    val conf = KubernetesConf.createExecutorConf(
+      new SparkConf(false),
+      EXECUTOR_ID,
+      KubernetesTestConf.APP_ID,
+      Some(DRIVER_POD),
+      10)
+    assert(conf.resourceProfileId === 10)
   }
 
   test("Image pull secrets.") {
@@ -134,7 +146,8 @@ class KubernetesConfSuite extends SparkFunSuite {
     assert(conf.labels === Map(
       SPARK_EXECUTOR_ID_LABEL -> EXECUTOR_ID,
       SPARK_APP_ID_LABEL -> KubernetesTestConf.APP_ID,
-      SPARK_ROLE_LABEL -> SPARK_POD_EXECUTOR_ROLE) ++ CUSTOM_LABELS)
+      SPARK_ROLE_LABEL -> SPARK_POD_EXECUTOR_ROLE,
+      SPARK_RESOURCE_PROFILE_ID_LABEL -> DEFAULT_RESOURCE_PROFILE_ID.toString) ++ CUSTOM_LABELS)
     assert(conf.annotations === CUSTOM_ANNOTATIONS)
     assert(conf.secretNamesToMountPaths === SECRET_NAMES_TO_MOUNT_PATHS)
     assert(conf.secretEnvNamesToKeyRefs === SECRET_ENV_VARS)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
index c8c934b436ffa..858b4f1494b8e 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.deploy.k8s.features.KubernetesFeaturesTestUtils.TestReso
 import org.apache.spark.deploy.k8s.submit._
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.UI._
-import org.apache.spark.resource.ResourceID
+import org.apache.spark.resource.{ResourceID, ResourceProfile}
 import org.apache.spark.resource.ResourceUtils._
 import org.apache.spark.util.Utils
 
@@ -191,7 +191,8 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
   ).foreach { case (name, resource, factor, expectedFactor) =>
     test(s"memory overhead factor: $name") {
       // Choose a driver memory where the default memory overhead is > MEMORY_OVERHEAD_MIN_MIB
-      val driverMem = MEMORY_OVERHEAD_MIN_MIB / MEMORY_OVERHEAD_FACTOR.defaultValue.get * 2
+      val driverMem =
+        ResourceProfile.MEMORY_OVERHEAD_MIN_MIB / MEMORY_OVERHEAD_FACTOR.defaultValue.get * 2
 
       // main app resource, overhead factor
       val sparkConf = new SparkConf(false)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
index d56ffe1868552..92031c69d9bbd 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
@@ -34,7 +34,7 @@ import org.apache.spark.deploy.k8s.features.KubernetesFeaturesTestUtils.TestReso
 import org.apache.spark.internal.config
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Python._
-import org.apache.spark.resource.ResourceID
+import org.apache.spark.resource._
 import org.apache.spark.resource.ResourceUtils._
 import org.apache.spark.resource.TestResourceIDs._
 import org.apache.spark.rpc.RpcEndpointAddress
@@ -55,6 +55,7 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
   private val RESOURCE_NAME_PREFIX = "base"
   private val EXECUTOR_IMAGE = "executor-image"
   private val LABELS = Map("label1key" -> "label1value")
+  private var defaultProfile: ResourceProfile = _
   private val TEST_IMAGE_PULL_SECRETS = Seq("my-1secret-1", "my-secret-2")
   private val TEST_IMAGE_PULL_SECRET_OBJECTS =
     TEST_IMAGE_PULL_SECRETS.map { secret =>
@@ -84,6 +85,7 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
       .set(config.DRIVER_PORT, DRIVER_PORT)
       .set(IMAGE_PULL_SECRETS, TEST_IMAGE_PULL_SECRETS)
       .set("spark.kubernetes.resource.type", "java")
+    initDefaultProfile(baseConf)
   }
 
   private def newExecutorConf(
@@ -95,10 +97,17 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
       environment = environment)
   }
 
+  private def initDefaultProfile(baseConf: SparkConf): Unit = {
+    ResourceProfile.clearDefaultProfile()
+    defaultProfile = ResourceProfile.getOrCreateDefaultProfile(baseConf)
+  }
+
   test("test spark resource missing vendor") {
     baseConf.set(EXECUTOR_GPU_ID.amountConf, "2")
-    val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf))
     val error = intercept[SparkException] {
+      initDefaultProfile(baseConf)
+      val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf),
+        defaultProfile)
       val executor = step.configurePod(SparkPod.initialPod())
     }.getMessage()
     assert(error.contains("Resource: gpu was requested, but vendor was not specified"))
@@ -106,9 +115,10 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
 
   test("test spark resource missing amount") {
     baseConf.set(EXECUTOR_GPU_ID.vendorConf, "nvidia.com")
-
-    val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf))
     val error = intercept[SparkException] {
+      initDefaultProfile(baseConf)
+      val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf),
+      defaultProfile)
       val executor = step.configurePod(SparkPod.initialPod())
     }.getMessage()
     assert(error.contains("You must specify an amount for gpu"))
@@ -124,7 +134,9 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
       baseConf.set(testRInfo.rId.amountConf, testRInfo.count)
       baseConf.set(testRInfo.rId.vendorConf, testRInfo.vendor)
     }
-    val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf))
+    initDefaultProfile(baseConf)
+    val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf),
+      defaultProfile)
     val executor = step.configurePod(SparkPod.initialPod())
 
     assert(executor.container.getResources.getLimits.size() === 3)
@@ -137,7 +149,8 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
   }
 
   test("basic executor pod has reasonable defaults") {
-    val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf))
+    val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf),
+      defaultProfile)
     val executor = step.configurePod(SparkPod.initialPod())
 
     // The executor pod name and default labels.
@@ -167,7 +180,9 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
     val longPodNamePrefix = "loremipsumdolorsitametvimatelitrefficiendisuscipianturvixlegeresple"
 
     baseConf.set(KUBERNETES_EXECUTOR_POD_NAME_PREFIX, longPodNamePrefix)
-    val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf))
+    initDefaultProfile(baseConf)
+    val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf),
+      defaultProfile)
     assert(step.configurePod(SparkPod.initialPod()).pod.getSpec.getHostname.length === 63)
   }
 
@@ -175,7 +190,9 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
     val invalidPrefix = "abcdef-*_/[]{}+==.,;'\"-----------------------------------------------"
 
     baseConf.set(KUBERNETES_EXECUTOR_POD_NAME_PREFIX, invalidPrefix)
-    val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf))
+    initDefaultProfile(baseConf)
+    val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf),
+      defaultProfile)
     val hostname = step.configurePod(SparkPod.initialPod()).pod.getSpec().getHostname()
     assert(hostname.length <= 63)
     assert(InternetDomainName.isValid(hostname))
@@ -184,8 +201,10 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
   test("classpath and extra java options get translated into environment variables") {
     baseConf.set(config.EXECUTOR_JAVA_OPTIONS, "foo=bar")
     baseConf.set(config.EXECUTOR_CLASS_PATH, "bar=baz")
+    initDefaultProfile(baseConf)
     val kconf = newExecutorConf(environment = Map("qux" -> "quux"))
-    val step = new BasicExecutorFeatureStep(kconf, new SecurityManager(baseConf))
+    val step = new BasicExecutorFeatureStep(kconf, new SecurityManager(baseConf),
+      defaultProfile)
     val executor = step.configurePod(SparkPod.initialPod())
 
     checkEnv(executor, baseConf,
@@ -198,7 +217,8 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
   test("SPARK-32655 Support appId/execId placeholder in SPARK_EXECUTOR_DIRS") {
     val kconf = newExecutorConf(environment = Map(ENV_EXECUTOR_DIRS ->
       "/p1/SPARK_APPLICATION_ID/SPARK_EXECUTOR_ID,/p2/SPARK_APPLICATION_ID/SPARK_EXECUTOR_ID"))
-    val step = new BasicExecutorFeatureStep(kconf, new SecurityManager(baseConf))
+    val step = new BasicExecutorFeatureStep(kconf, new SecurityManager(baseConf),
+      defaultProfile)
     val executor = step.configurePod(SparkPod.initialPod())
 
     checkEnv(executor, baseConf, Map(ENV_EXECUTOR_DIRS ->
@@ -208,8 +228,9 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
   test("test executor pyspark memory") {
     baseConf.set("spark.kubernetes.resource.type", "python")
     baseConf.set(PYSPARK_EXECUTOR_MEMORY, 42L)
-
-    val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf))
+    initDefaultProfile(baseConf)
+    val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf),
+      defaultProfile)
     val executor = step.configurePod(SparkPod.initialPod())
     // This is checking that basic executor + executorMemory = 1408 + 42 = 1450
     assert(amountAndFormat(executor.container.getResources.getRequests.get("memory")) === "1450Mi")
@@ -224,7 +245,7 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
     secMgr.initializeAuth()
 
     val step = new BasicExecutorFeatureStep(KubernetesTestConf.createExecutorConf(sparkConf = conf),
-      secMgr)
+      secMgr, defaultProfile)
 
     val executor = step.configurePod(SparkPod.initialPod())
     checkEnv(executor, conf, Map(SecurityManager.ENV_AUTH_SECRET -> secMgr.getSecretKey()))
@@ -240,15 +261,65 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
       .set("spark.master", "k8s://127.0.0.1")
     val secMgr = new SecurityManager(conf)
     secMgr.initializeAuth()
-
     val step = new BasicExecutorFeatureStep(KubernetesTestConf.createExecutorConf(sparkConf = conf),
-      secMgr)
+      secMgr, defaultProfile)
 
     val executor = step.configurePod(SparkPod.initialPod())
     assert(!KubernetesFeaturesTestUtils.containerHasEnvVar(
       executor.container, SecurityManager.ENV_AUTH_SECRET))
   }
 
+  test("SPARK-32661 test executor offheap memory") {
+    baseConf.set(MEMORY_OFFHEAP_ENABLED, true)
+    baseConf.set("spark.memory.offHeap.size", "42m")
+    initDefaultProfile(baseConf)
+
+    val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf),
+      defaultProfile)
+    val executor = step.configurePod(SparkPod.initialPod())
+    // This is checking that basic executor + executorMemory = 1408 + 42 = 1450
+    assert(amountAndFormat(executor.container.getResources.getRequests.get("memory")) === "1450Mi")
+  }
+
+  test("basic resourceprofile") {
+    baseConf.set("spark.kubernetes.resource.type", "python")
+    initDefaultProfile(baseConf)
+    val rpb = new ResourceProfileBuilder()
+    val ereq = new ExecutorResourceRequests()
+    val treq = new TaskResourceRequests()
+    ereq.cores(4).memory("2g").memoryOverhead("1g").pysparkMemory("3g")
+    treq.cpus(2)
+    rpb.require(ereq).require(treq)
+    val rp = rpb.build
+    val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf), rp)
+    val executor = step.configurePod(SparkPod.initialPod())
+
+    assert(amountAndFormat(executor.container.getResources
+      .getRequests.get("cpu")) === "4")
+    assert(amountAndFormat(executor.container.getResources
+      .getLimits.get("memory")) === "6144Mi")
+  }
+
+  test("resourceprofile with gpus") {
+    val rpb = new ResourceProfileBuilder()
+    val ereq = new ExecutorResourceRequests()
+    val treq = new TaskResourceRequests()
+    ereq.cores(2).resource("gpu", 2, "/path/getGpusResources.sh", "nvidia.com")
+    treq.cpus(1)
+    rpb.require(ereq).require(treq)
+    val rp = rpb.build
+    val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf), rp)
+    val executor = step.configurePod(SparkPod.initialPod())
+
+    assert(amountAndFormat(executor.container.getResources
+      .getLimits.get("memory")) === "1408Mi")
+    assert(amountAndFormat(executor.container.getResources
+      .getRequests.get("cpu")) === "2")
+
+    assert(executor.container.getResources.getLimits.size() === 2)
+    assert(amountAndFormat(executor.container.getResources.getLimits.get("nvidia.com/gpu")) === "2")
+  }
+
   // There is always exactly one controller reference, and it points to the driver pod.
   private def checkOwnerReferences(executor: Pod, driverPodUid: String): Unit = {
     assert(executor.getMetadata.getOwnerReferences.size() === 1)
@@ -265,11 +336,12 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
       ENV_EXECUTOR_ID -> "1",
       ENV_DRIVER_URL -> DRIVER_ADDRESS.toString,
       ENV_EXECUTOR_CORES -> "1",
-      ENV_EXECUTOR_MEMORY -> "1g",
+      ENV_EXECUTOR_MEMORY -> "1024m",
       ENV_APPLICATION_ID -> KubernetesTestConf.APP_ID,
       ENV_SPARK_CONF_DIR -> SPARK_CONF_DIR_INTERNAL,
       ENV_EXECUTOR_POD_IP -> null,
-      ENV_SPARK_USER -> Utils.getCurrentUserName())
+      ENV_SPARK_USER -> Utils.getCurrentUserName(),
+      ENV_RESOURCE_PROFILE_ID -> "0")
 
     val extraJavaOptsStart = additionalEnvVars.keys.count(_.startsWith(ENV_JAVA_OPT_PREFIX))
     val extraJavaOpts = Utils.sparkJavaOpts(conf, SparkConf.isExecutorStartupConf)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala
index 62c79e6f7cba5..ad79e3a39832b 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala
@@ -22,13 +22,15 @@ import io.fabric8.kubernetes.api.model.{ContainerBuilder, Pod, PodBuilder}
 
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.SparkPod
+import org.apache.spark.resource.ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
 
 object ExecutorLifecycleTestUtils {
 
   val TEST_SPARK_APP_ID = "spark-app-id"
 
-  def failedExecutorWithoutDeletion(executorId: Long): Pod = {
-    new PodBuilder(podWithAttachedContainerForId(executorId))
+  def failedExecutorWithoutDeletion(
+      executorId: Long, rpId: Int = DEFAULT_RESOURCE_PROFILE_ID): Pod = {
+    new PodBuilder(podWithAttachedContainerForId(executorId, rpId))
       .editOrNewStatus()
         .withPhase("failed")
         .withStartTime(Instant.now.toString)
@@ -58,8 +60,8 @@ object ExecutorLifecycleTestUtils {
       .build()
   }
 
-  def pendingExecutor(executorId: Long): Pod = {
-    new PodBuilder(podWithAttachedContainerForId(executorId))
+  def pendingExecutor(executorId: Long, rpId: Int = DEFAULT_RESOURCE_PROFILE_ID): Pod = {
+    new PodBuilder(podWithAttachedContainerForId(executorId, rpId))
       .editOrNewStatus()
         .withPhase("pending")
         .withStartTime(Instant.now.toString)
@@ -67,8 +69,8 @@ object ExecutorLifecycleTestUtils {
       .build()
   }
 
-  def runningExecutor(executorId: Long): Pod = {
-    new PodBuilder(podWithAttachedContainerForId(executorId))
+  def runningExecutor(executorId: Long, rpId: Int = DEFAULT_RESOURCE_PROFILE_ID): Pod = {
+    new PodBuilder(podWithAttachedContainerForId(executorId, rpId))
       .editOrNewStatus()
         .withPhase("running")
         .withStartTime(Instant.now.toString)
@@ -82,8 +84,9 @@ object ExecutorLifecycleTestUtils {
    * state (terminated with non-zero exit code). This pod is used for unit-testing the
    * spark.kubernetes.executor.checkAllContainers Spark Conf.
    */
-  def runningExecutorWithFailedContainer(executorId: Long): Pod = {
-    new PodBuilder(podWithAttachedContainerForId(executorId))
+  def runningExecutorWithFailedContainer(
+      executorId: Long, rpId: Int = DEFAULT_RESOURCE_PROFILE_ID): Pod = {
+    new PodBuilder(podWithAttachedContainerForId(executorId, rpId))
       .editOrNewStatus()
         .withPhase("running")
         .addNewContainerStatus()
@@ -103,32 +106,34 @@ object ExecutorLifecycleTestUtils {
       .build()
   }
 
-  def succeededExecutor(executorId: Long): Pod = {
-    new PodBuilder(podWithAttachedContainerForId(executorId))
+  def succeededExecutor(executorId: Long, rpId: Int = DEFAULT_RESOURCE_PROFILE_ID): Pod = {
+    new PodBuilder(podWithAttachedContainerForId(executorId, rpId))
       .editOrNewStatus()
         .withPhase("succeeded")
         .endStatus()
       .build()
   }
 
-  def deletedExecutor(executorId: Long): Pod = {
-    new PodBuilder(podWithAttachedContainerForId(executorId))
+  def deletedExecutor(executorId: Long, rpId: Int = DEFAULT_RESOURCE_PROFILE_ID): Pod = {
+    new PodBuilder(podWithAttachedContainerForId(executorId, rpId))
       .editOrNewMetadata()
         .withDeletionTimestamp("523012521")
         .endMetadata()
       .build()
   }
 
-  def unknownExecutor(executorId: Long): Pod = {
-    new PodBuilder(podWithAttachedContainerForId(executorId))
+  def unknownExecutor(executorId: Long, rpId: Int = DEFAULT_RESOURCE_PROFILE_ID): Pod = {
+    new PodBuilder(podWithAttachedContainerForId(executorId, rpId))
       .editOrNewStatus()
         .withPhase("unknown")
         .endStatus()
       .build()
   }
 
-  def podWithAttachedContainerForId(executorId: Long): Pod = {
-    val sparkPod = executorPodWithId(executorId)
+  def podWithAttachedContainerForId(
+      executorId: Long,
+      rpId: Int = DEFAULT_RESOURCE_PROFILE_ID): Pod = {
+    val sparkPod = executorPodWithId(executorId, rpId)
     val podWithAttachedContainer = new PodBuilder(sparkPod.pod)
       .editOrNewSpec()
         .addToContainers(sparkPod.container)
@@ -137,13 +142,14 @@ object ExecutorLifecycleTestUtils {
     podWithAttachedContainer
   }
 
-  def executorPodWithId(executorId: Long): SparkPod = {
+  def executorPodWithId(executorId: Long, rpId: Int = DEFAULT_RESOURCE_PROFILE_ID): SparkPod = {
     val pod = new PodBuilder()
       .withNewMetadata()
         .withName(s"spark-executor-$executorId")
         .addToLabels(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID)
         .addToLabels(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)
         .addToLabels(SPARK_EXECUTOR_ID_LABEL, executorId.toString)
+        .addToLabels(SPARK_RESOURCE_PROFILE_ID_LABEL, rpId.toString)
       .endMetadata()
       .editOrNewSpec()
         .withRestartPolicy("Never")
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
index 37f9caef656d0..528b755c41605 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
@@ -34,6 +34,7 @@ import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.Fabric8Aliases._
 import org.apache.spark.internal.config.DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT
+import org.apache.spark.resource._
 import org.apache.spark.scheduler.cluster.k8s.ExecutorLifecycleTestUtils._
 import org.apache.spark.util.ManualClock
 
@@ -54,6 +55,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     .set(KUBERNETES_DRIVER_POD_NAME, driverPodName)
     .set(DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT.key, "10s")
 
+  private val defaultProfile: ResourceProfile = ResourceProfile.getOrCreateDefaultProfile(conf)
   private val podAllocationSize = conf.get(KUBERNETES_ALLOCATION_BATCH_SIZE)
   private val podAllocationDelay = conf.get(KUBERNETES_ALLOCATION_BATCH_DELAY)
   private val executorIdleTimeout = conf.get(DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT) * 1000
@@ -89,7 +91,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     when(podOperations.withName(driverPodName)).thenReturn(driverPodOperations)
     when(driverPodOperations.get).thenReturn(driverPod)
     when(executorBuilder.buildFromFeatures(any(classOf[KubernetesExecutorConf]), meq(secMgr),
-      meq(kubernetesClient))).thenAnswer(executorPodAnswer())
+      meq(kubernetesClient), any(classOf[ResourceProfile]))).thenAnswer(executorPodAnswer())
     snapshotsStore = new DeterministicExecutorPodsSnapshotsStore()
     waitForExecutorPodsClock = new ManualClock(0L)
     podsAllocatorUnderTest = new ExecutorPodsAllocator(
@@ -99,7 +101,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
 
   test("Initially request executors in batches. Do not request another batch if the" +
     " first has not finished.") {
-    podsAllocatorUnderTest.setTotalExpectedExecutors(podAllocationSize + 1)
+    podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> (podAllocationSize + 1)))
     for (nextId <- 1 to podAllocationSize) {
       verify(podOperations).create(podWithAttachedContainerForId(nextId))
     }
@@ -108,7 +110,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
 
   test("Request executors in batches. Allow another batch to be requested if" +
     " all pending executors start running.") {
-    podsAllocatorUnderTest.setTotalExpectedExecutors(podAllocationSize + 1)
+    podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> (podAllocationSize + 1)))
     for (execId <- 1 until podAllocationSize) {
       snapshotsStore.updatePod(runningExecutor(execId))
     }
@@ -124,7 +126,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
 
   test("When a current batch reaches error states immediately, re-request" +
     " them on the next batch.") {
-    podsAllocatorUnderTest.setTotalExpectedExecutors(podAllocationSize)
+    podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> podAllocationSize))
     for (execId <- 1 until podAllocationSize) {
       snapshotsStore.updatePod(runningExecutor(execId))
     }
@@ -145,7 +147,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     when(podOperations
       .withLabelIn(SPARK_EXECUTOR_ID_LABEL, "1"))
       .thenReturn(labeledPods)
-    podsAllocatorUnderTest.setTotalExpectedExecutors(1)
+    podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 1))
     verify(podOperations).create(podWithAttachedContainerForId(1))
     waitForExecutorPodsClock.setTime(podCreationTimeout + 1)
     snapshotsStore.notifySubscribers()
@@ -171,7 +173,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     waitForExecutorPodsClock.setTime(startTime)
 
     // Target 1 executor, make sure it's requested, even with an empty initial snapshot.
-    podsAllocatorUnderTest.setTotalExpectedExecutors(1)
+    podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 1))
     verify(podOperations).create(podWithAttachedContainerForId(1))
 
     // Mark executor as running, verify that subsequent allocation cycle is a no-op.
@@ -181,7 +183,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     verify(podOperations, never()).delete()
 
     // Request 3 more executors, make sure all are requested.
-    podsAllocatorUnderTest.setTotalExpectedExecutors(4)
+    podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 4))
     snapshotsStore.notifySubscribers()
     verify(podOperations).create(podWithAttachedContainerForId(2))
     verify(podOperations).create(podWithAttachedContainerForId(3))
@@ -196,7 +198,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
 
     // Scale down to 1. Pending executors (both acknowledged and not) should be deleted.
     waitForExecutorPodsClock.advance(executorIdleTimeout * 2)
-    podsAllocatorUnderTest.setTotalExpectedExecutors(1)
+    podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 1))
     snapshotsStore.notifySubscribers()
     verify(podOperations, times(4)).create(any())
     verify(podOperations).withLabelIn(SPARK_EXECUTOR_ID_LABEL, "3", "4")
@@ -231,7 +233,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     val startTime = Instant.now.toEpochMilli
     waitForExecutorPodsClock.setTime(startTime)
 
-    podsAllocatorUnderTest.setTotalExpectedExecutors(5)
+    podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 5))
     verify(podOperations).create(podWithAttachedContainerForId(1))
     verify(podOperations).create(podWithAttachedContainerForId(2))
     verify(podOperations).create(podWithAttachedContainerForId(3))
@@ -243,7 +245,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     snapshotsStore.updatePod(pendingExecutor(2))
 
     // Newly created executors (both acknowledged and not) are protected by executorIdleTimeout
-    podsAllocatorUnderTest.setTotalExpectedExecutors(0)
+    podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 0))
     snapshotsStore.notifySubscribers()
     verify(podOperations, never()).withLabelIn(SPARK_EXECUTOR_ID_LABEL, "1", "2", "3", "4", "5")
     verify(podOperations, never()).delete()
@@ -255,6 +257,88 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     verify(podOperations).delete()
   }
 
+  test("SPARK-33288: multiple resource profiles") {
+    when(podOperations
+      .withField("status.phase", "Pending"))
+      .thenReturn(podOperations)
+    when(podOperations
+      .withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID))
+      .thenReturn(podOperations)
+    when(podOperations
+      .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE))
+      .thenReturn(podOperations)
+    when(podOperations
+      .withLabelIn(meq(SPARK_EXECUTOR_ID_LABEL), any()))
+      .thenReturn(podOperations)
+
+    val startTime = Instant.now.toEpochMilli
+    waitForExecutorPodsClock.setTime(startTime)
+
+    val rpb = new ResourceProfileBuilder()
+    val ereq = new ExecutorResourceRequests()
+    val treq = new TaskResourceRequests()
+    ereq.cores(4).memory("2g")
+    treq.cpus(2)
+    rpb.require(ereq).require(treq)
+    val rp = rpb.build
+
+    // Target 1 executor for default profile, 2 for other profile,
+    // make sure it's requested, even with an empty initial snapshot.
+    podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 1, rp -> 2))
+    verify(podOperations).create(podWithAttachedContainerForId(1, defaultProfile.id))
+    verify(podOperations).create(podWithAttachedContainerForId(2, rp.id))
+    verify(podOperations).create(podWithAttachedContainerForId(3, rp.id))
+
+    // Mark executor as running, verify that subsequent allocation cycle is a no-op.
+    snapshotsStore.updatePod(runningExecutor(1, defaultProfile.id))
+    snapshotsStore.updatePod(runningExecutor(2, rp.id))
+    snapshotsStore.updatePod(runningExecutor(3, rp.id))
+    snapshotsStore.notifySubscribers()
+    verify(podOperations, times(3)).create(any())
+    verify(podOperations, never()).delete()
+
+    // Request 3 more executors for default profile and 1 more for other profile,
+    // make sure all are requested.
+    podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 4, rp -> 3))
+    snapshotsStore.notifySubscribers()
+    verify(podOperations).create(podWithAttachedContainerForId(4, defaultProfile.id))
+    verify(podOperations).create(podWithAttachedContainerForId(5, defaultProfile.id))
+    verify(podOperations).create(podWithAttachedContainerForId(6, defaultProfile.id))
+    verify(podOperations).create(podWithAttachedContainerForId(7, rp.id))
+
+    // Mark 4 as running, 5 and 7 as pending. Allocation cycle should do nothing.
+    snapshotsStore.updatePod(runningExecutor(4, defaultProfile.id))
+    snapshotsStore.updatePod(pendingExecutor(5, defaultProfile.id))
+    snapshotsStore.updatePod(pendingExecutor(7, rp.id))
+    snapshotsStore.notifySubscribers()
+    verify(podOperations, times(7)).create(any())
+    verify(podOperations, never()).delete()
+
+    // Scale down to 1 for both resource profiles. Pending executors
+    // (both acknowledged and not) should be deleted.
+    waitForExecutorPodsClock.advance(executorIdleTimeout * 2)
+    podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 1, rp -> 1))
+    snapshotsStore.notifySubscribers()
+    verify(podOperations, times(7)).create(any())
+    verify(podOperations).withLabelIn(SPARK_EXECUTOR_ID_LABEL, "5", "6")
+    verify(podOperations).withLabelIn(SPARK_EXECUTOR_ID_LABEL, "7")
+    verify(podOperations, times(2)).delete()
+    assert(podsAllocatorUnderTest.isDeleted("5"))
+    assert(podsAllocatorUnderTest.isDeleted("6"))
+    assert(podsAllocatorUnderTest.isDeleted("7"))
+
+    // Update the snapshot to not contain the deleted executors, make sure the
+    // allocator cleans up internal state.
+    snapshotsStore.updatePod(deletedExecutor(5))
+    snapshotsStore.updatePod(deletedExecutor(6))
+    snapshotsStore.updatePod(deletedExecutor(7))
+    snapshotsStore.removeDeletedExecutors()
+    snapshotsStore.notifySubscribers()
+    assert(!podsAllocatorUnderTest.isDeleted("5"))
+    assert(!podsAllocatorUnderTest.isDeleted("6"))
+    assert(!podsAllocatorUnderTest.isDeleted("7"))
+  }
+
   test("SPARK-33262: pod allocator does not stall with pending pods") {
     when(podOperations
       .withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID))
@@ -269,7 +353,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
       .withLabelIn(SPARK_EXECUTOR_ID_LABEL, "2", "3", "4", "5", "6"))
       .thenReturn(podOperations)
 
-    podsAllocatorUnderTest.setTotalExpectedExecutors(6)
+    podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 6))
     // Initial request of pods
     verify(podOperations).create(podWithAttachedContainerForId(1))
     verify(podOperations).create(podWithAttachedContainerForId(2))
@@ -292,6 +376,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
   private def executorPodAnswer(): Answer[KubernetesExecutorSpec] =
     (invocation: InvocationOnMock) => {
       val k8sConf: KubernetesExecutorConf = invocation.getArgument(0)
-      KubernetesExecutorSpec(executorPodWithId(k8sConf.executorId.toInt), Seq.empty)
+      KubernetesExecutorSpec(executorPodWithId(k8sConf.executorId.toInt,
+        k8sConf.resourceProfileId.toInt), Seq.empty)
   }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
index 894e1e4978178..a7c6ffa406722 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
@@ -31,7 +31,7 @@ import org.apache.spark.{SparkConf, SparkContext, SparkEnv, SparkFunSuite}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.Fabric8Aliases._
-import org.apache.spark.resource.ResourceProfileManager
+import org.apache.spark.resource.{ResourceProfile, ResourceProfileManager}
 import org.apache.spark.rpc.{RpcEndpoint, RpcEndpointRef, RpcEnv}
 import org.apache.spark.scheduler.{ExecutorKilled, LiveListenerBus, TaskSchedulerImpl}
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.RemoveExecutor
@@ -89,6 +89,7 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
 
   private val listenerBus = new LiveListenerBus(new SparkConf())
   private val resourceProfileManager = new ResourceProfileManager(sparkConf, listenerBus)
+  private val defaultProfile = ResourceProfile.getOrCreateDefaultProfile(sparkConf)
 
   before {
     MockitoAnnotations.initMocks(this)
@@ -118,7 +119,7 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
 
   test("Start all components") {
     schedulerBackendUnderTest.start()
-    verify(podAllocator).setTotalExpectedExecutors(3)
+    verify(podAllocator).setTotalExpectedExecutors(Map(defaultProfile -> 3))
     verify(podAllocator).start(TEST_SPARK_APP_ID)
     verify(lifecycleEventHandler).start(schedulerBackendUnderTest)
     verify(watchEvents).start(TEST_SPARK_APP_ID)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
index 796e2126e9e3e..c64b733102dc8 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
@@ -21,6 +21,7 @@ import io.fabric8.kubernetes.client.KubernetesClient
 import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.deploy.k8s._
 import org.apache.spark.internal.config.ConfigEntry
+import org.apache.spark.resource.ResourceProfile
 
 class KubernetesExecutorBuilderSuite extends PodBuilderSuite {
 
@@ -32,7 +33,8 @@ class KubernetesExecutorBuilderSuite extends PodBuilderSuite {
     sparkConf.set("spark.driver.host", "https://driver.host.com")
     val conf = KubernetesTestConf.createExecutorConf(sparkConf = sparkConf)
     val secMgr = new SecurityManager(sparkConf)
-    new KubernetesExecutorBuilder().buildFromFeatures(conf, secMgr, client).pod
+    val defaultProfile = ResourceProfile.getOrCreateDefaultProfile(sparkConf)
+    new KubernetesExecutorBuilder().buildFromFeatures(conf, secMgr, client, defaultProfile).pod
   }
 
 }
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
index d605ae43c024f..fd66a5db2c18d 100755
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
@@ -85,6 +85,7 @@ case "$1" in
       --cores $SPARK_EXECUTOR_CORES
       --app-id $SPARK_APPLICATION_ID
       --hostname $SPARK_EXECUTOR_POD_IP
+      --resourceProfileId $SPARK_RESOURCE_PROFILE_ID
     )
     ;;
 
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 30ca4a6615fe8..552167c935b30 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -63,6 +63,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Python._
 import org.apache.spark.launcher.{LauncherBackend, SparkAppHandle, YarnCommandBuilderUtils}
+import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.rpc.RpcEnv
 import org.apache.spark.util.{CallerContext, Utils, YarnContainerInfoHelper}
 
@@ -93,7 +94,8 @@ private[spark] class Client(
   private val amMemoryOverhead = {
     val amMemoryOverheadEntry = if (isClusterMode) DRIVER_MEMORY_OVERHEAD else AM_MEMORY_OVERHEAD
     sparkConf.get(amMemoryOverheadEntry).getOrElse(
-      math.max((MEMORY_OVERHEAD_FACTOR * amMemory).toLong, MEMORY_OVERHEAD_MIN)).toInt
+      math.max((MEMORY_OVERHEAD_FACTOR * amMemory).toLong,
+        ResourceProfile.MEMORY_OVERHEAD_MIN_MIB)).toInt
   }
   private val amCores = if (isClusterMode) {
     sparkConf.get(DRIVER_CORES)
@@ -104,9 +106,10 @@ private[spark] class Client(
   // Executor related configurations
   private val executorMemory = sparkConf.get(EXECUTOR_MEMORY)
   // Executor offHeap memory in MiB.
-  protected val executorOffHeapMemory = YarnSparkHadoopUtil.executorOffHeapMemorySizeAsMb(sparkConf)
+  protected val executorOffHeapMemory = Utils.executorOffHeapMemorySizeAsMb(sparkConf)
   private val executorMemoryOverhead = sparkConf.get(EXECUTOR_MEMORY_OVERHEAD).getOrElse(
-    math.max((MEMORY_OVERHEAD_FACTOR * executorMemory).toLong, MEMORY_OVERHEAD_MIN)).toInt
+    math.max((MEMORY_OVERHEAD_FACTOR * executorMemory).toLong,
+      ResourceProfile.MEMORY_OVERHEAD_MIN_MIB)).toInt
 
   private val isPython = sparkConf.get(IS_PYTHON_APP)
   private val pysparkWorkerMemory: Int = if (isPython) {
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ResourceRequestHelper.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ResourceRequestHelper.scala
index 3d800be9e210b..3aabc467a9de9 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ResourceRequestHelper.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ResourceRequestHelper.scala
@@ -43,6 +43,8 @@ private object ResourceRequestHelper extends Logging {
   private val RESOURCE_NOT_FOUND = "org.apache.hadoop.yarn.exceptions.ResourceNotFoundException"
   val YARN_GPU_RESOURCE_CONFIG = "yarn.io/gpu"
   val YARN_FPGA_RESOURCE_CONFIG = "yarn.io/fpga"
+  private[yarn] val resourceNameMapping =
+    Map(GPU -> YARN_GPU_RESOURCE_CONFIG, FPGA -> YARN_FPGA_RESOURCE_CONFIG)
   @volatile private var numResourceErrors: Int = 0
 
   private[yarn] def getYarnResourcesAndAmounts(
@@ -76,7 +78,7 @@ private object ResourceRequestHelper extends Logging {
       confPrefix: String,
       sparkConf: SparkConf
   ): Map[String, String] = {
-    Map(GPU -> YARN_GPU_RESOURCE_CONFIG, FPGA -> YARN_FPGA_RESOURCE_CONFIG).map {
+    resourceNameMapping.map {
       case (rName, yarnName) =>
         (yarnName -> sparkConf.get(new ResourceID(confPrefix, rName).amountConf, "0"))
     }.filter { case (_, count) => count.toLong > 0 }
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index ef01a2ad95483..c3b7cc0d7d331 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -37,7 +37,6 @@ import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._
 import org.apache.spark.deploy.yarn.config._
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
-import org.apache.spark.internal.config.Python._
 import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.resource.ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
 import org.apache.spark.rpc.{RpcCallContext, RpcEndpointRef}
@@ -162,34 +161,7 @@ private[yarn] class YarnAllocator(
   private val allocatorNodeHealthTracker =
     new YarnAllocatorNodeHealthTracker(sparkConf, amClient, failureTracker)
 
-  // Executor memory in MiB.
-  protected val executorMemory = sparkConf.get(EXECUTOR_MEMORY).toInt
-  // Executor offHeap memory in MiB.
-  protected val executorOffHeapMemory = YarnSparkHadoopUtil.executorOffHeapMemorySizeAsMb(sparkConf)
-  // Additional memory overhead.
-  protected val memoryOverhead: Int = sparkConf.get(EXECUTOR_MEMORY_OVERHEAD).getOrElse(
-    math.max((MEMORY_OVERHEAD_FACTOR * executorMemory).toInt, MEMORY_OVERHEAD_MIN)).toInt
-  protected val pysparkWorkerMemory: Int = if (sparkConf.get(IS_PYTHON_APP)) {
-    sparkConf.get(PYSPARK_EXECUTOR_MEMORY).map(_.toInt).getOrElse(0)
-  } else {
-    0
-  }
-  // Number of cores per executor for the default profile
-  protected val defaultExecutorCores = sparkConf.get(EXECUTOR_CORES)
-
-  private val executorResourceRequests =
-    getYarnResourcesAndAmounts(sparkConf, config.YARN_EXECUTOR_RESOURCE_TYPES_PREFIX) ++
-    getYarnResourcesFromSparkResources(SPARK_EXECUTOR_PREFIX, sparkConf)
-
-  // Resource capability requested for each executor for the default profile
-  private[yarn] val defaultResource: Resource = {
-    val resource: Resource = Resource.newInstance(
-      executorMemory + executorOffHeapMemory + memoryOverhead + pysparkWorkerMemory,
-      defaultExecutorCores)
-    ResourceRequestHelper.setResourceRequests(executorResourceRequests, resource)
-    logDebug(s"Created resource capability: $resource")
-    resource
-  }
+  private val isPythonApp = sparkConf.get(IS_PYTHON_APP)
 
   private val launcherPool = ThreadUtils.newDaemonCachedThreadPool(
     "ContainerLauncher", sparkConf.get(CONTAINER_LAUNCH_MAX_THREADS))
@@ -211,11 +183,10 @@ private[yarn] class YarnAllocator(
       new HashMap[String, mutable.Set[ContainerId]]()
     runningExecutorsPerResourceProfileId.put(DEFAULT_RESOURCE_PROFILE_ID, mutable.HashSet[String]())
     numExecutorsStartingPerResourceProfileId(DEFAULT_RESOURCE_PROFILE_ID) = new AtomicInteger(0)
-    targetNumExecutorsPerResourceProfileId(DEFAULT_RESOURCE_PROFILE_ID) =
-      SchedulerBackendUtils.getInitialTargetExecutorNumber(sparkConf)
-    rpIdToYarnResource.put(DEFAULT_RESOURCE_PROFILE_ID, defaultResource)
-    rpIdToResourceProfile(DEFAULT_RESOURCE_PROFILE_ID) =
-      ResourceProfile.getOrCreateDefaultProfile(sparkConf)
+    val initTargetExecNum = SchedulerBackendUtils.getInitialTargetExecutorNumber(sparkConf)
+    targetNumExecutorsPerResourceProfileId(DEFAULT_RESOURCE_PROFILE_ID) = initTargetExecNum
+    val defaultProfile = ResourceProfile.getOrCreateDefaultProfile(sparkConf)
+    createYarnResourceForResourceProfile(defaultProfile)
   }
 
   initDefaultProfile()
@@ -302,48 +273,55 @@ private[yarn] class YarnAllocator(
   }
 
   // if a ResourceProfile hasn't been seen yet, create the corresponding YARN Resource for it
-  private def createYarnResourceForResourceProfile(
-      resourceProfileToTotalExecs: Map[ResourceProfile, Int]): Unit = synchronized {
-    resourceProfileToTotalExecs.foreach { case (rp, num) =>
-      if (!rpIdToYarnResource.contains(rp.id)) {
-        // Start with the application or default settings
-        var heapMem = executorMemory.toLong
-        var offHeapMem = executorOffHeapMemory.toLong
-        var overheadMem = memoryOverhead.toLong
-        var pysparkMem = pysparkWorkerMemory.toLong
-        var cores = defaultExecutorCores
-        val customResources = new mutable.HashMap[String, String]
-        // track the resource profile if not already there
-        getOrUpdateRunningExecutorForRPId(rp.id)
-        logInfo(s"Resource profile ${rp.id} doesn't exist, adding it")
-        val execResources = rp.executorResources
-        execResources.foreach { case (r, execReq) =>
-          r match {
-            case ResourceProfile.MEMORY =>
-              heapMem = execReq.amount
-            case ResourceProfile.OVERHEAD_MEM =>
-              overheadMem = execReq.amount
-            case ResourceProfile.PYSPARK_MEM =>
-              pysparkMem = execReq.amount
-            case ResourceProfile.OFFHEAP_MEM =>
-              offHeapMem = YarnSparkHadoopUtil.executorOffHeapMemorySizeAsMb(sparkConf, execReq)
-            case ResourceProfile.CORES =>
-              cores = execReq.amount.toInt
-            case "gpu" =>
-              customResources(YARN_GPU_RESOURCE_CONFIG) = execReq.amount.toString
-            case "fpga" =>
-              customResources(YARN_FPGA_RESOURCE_CONFIG) = execReq.amount.toString
-            case rName =>
-              customResources(rName) = execReq.amount.toString
-          }
+  private def createYarnResourceForResourceProfile(rp: ResourceProfile): Unit = synchronized {
+    if (!rpIdToYarnResource.contains(rp.id)) {
+      // track the resource profile if not already there
+      getOrUpdateRunningExecutorForRPId(rp.id)
+      logInfo(s"Resource profile ${rp.id} doesn't exist, adding it")
+      val resourcesWithDefaults =
+        ResourceProfile.getResourcesForClusterManager(rp.id, rp.executorResources,
+          MEMORY_OVERHEAD_FACTOR, sparkConf, isPythonApp,
+          ResourceRequestHelper.resourceNameMapping)
+      val customSparkResources =
+        resourcesWithDefaults.customResources.map { case (name, execReq) =>
+          (name, execReq.amount.toString)
         }
-        val totalMem = (heapMem + offHeapMem + overheadMem + pysparkMem).toInt
-        val resource = Resource.newInstance(totalMem, cores)
-        ResourceRequestHelper.setResourceRequests(customResources.toMap, resource)
-        logDebug(s"Created resource capability: $resource")
-        rpIdToYarnResource.putIfAbsent(rp.id, resource)
-        rpIdToResourceProfile(rp.id) = rp
+      // There is a difference in the way custom resources are handled between
+      // the base default profile and custom ResourceProfiles. To allow for the user
+      // to request YARN containers with extra resources without Spark scheduling on
+      // them, the user can specify resources via the <code>spark.yarn.executor.resource.</code>
+      // config. Those configs are only used in the base default profile though and do
+      // not get propogated into any other custom ResourceProfiles. This is because
+      // there would be no way to remove them if you wanted a stage to not have them.
+      // This results in your default profile getting custom resources defined in
+      // <code>spark.yarn.executor.resource.</code> plus spark defined resources of
+      // GPU or FPGA. Spark converts GPU and FPGA resources into the YARN built in
+      // types <code>yarn.io/gpu</code>) and <code>yarn.io/fpga</code>, but does not
+      // know the mapping of any other resources. Any other Spark custom resources
+      // are not propogated to YARN for the default profile. So if you want Spark
+      // to schedule based off a custom resource and have it requested from YARN, you
+      // must specify it in both YARN (<code>spark.yarn.{driver/executor}.resource.</code>)
+      // and Spark (<code>spark.{driver/executor}.resource.</code>) configs. Leave the Spark
+      // config off if you only want YARN containers with the extra resources but Spark not to
+      // schedule using them. Now for custom ResourceProfiles, it doesn't currently have a way
+      // to only specify YARN resources without Spark scheduling off of them. This means for
+      // custom ResourceProfiles we propogate all the resources defined in the ResourceProfile
+      // to YARN. We still convert GPU and FPGA to the YARN build in types as well. This requires
+      // that the name of any custom resources you specify match what they are defined as in YARN.
+      val customResources = if (rp.id == DEFAULT_RESOURCE_PROFILE_ID) {
+        getYarnResourcesAndAmounts(sparkConf, config.YARN_EXECUTOR_RESOURCE_TYPES_PREFIX) ++
+          customSparkResources.filterKeys { r =>
+            (r == YARN_GPU_RESOURCE_CONFIG || r == YARN_FPGA_RESOURCE_CONFIG)
+          }
+      } else {
+        customSparkResources
       }
+      val resource =
+        Resource.newInstance(resourcesWithDefaults.totalMemMiB, resourcesWithDefaults.cores)
+      ResourceRequestHelper.setResourceRequests(customResources, resource)
+      logDebug(s"Created resource capability: $resource")
+      rpIdToYarnResource.putIfAbsent(rp.id, resource)
+      rpIdToResourceProfile(rp.id) = rp
     }
   }
 
@@ -370,9 +348,8 @@ private[yarn] class YarnAllocator(
     this.numLocalityAwareTasksPerResourceProfileId = numLocalityAwareTasksPerResourceProfileId
     this.hostToLocalTaskCountPerResourceProfileId = hostToLocalTaskCountPerResourceProfileId
 
-    createYarnResourceForResourceProfile(resourceProfileToTotalExecs)
-
     val res = resourceProfileToTotalExecs.map { case (rp, numExecs) =>
+      createYarnResourceForResourceProfile(rp)
       if (numExecs != getOrUpdateTargetNumExecutorsForRPId(rp.id)) {
         logInfo(s"Driver requested a total number of $numExecs executor(s) " +
           s"for resource profile id: ${rp.id}.")
@@ -477,7 +454,7 @@ private[yarn] class YarnAllocator(
           var requestContainerMessage = s"Will request $missing executor container(s) for " +
             s" ResourceProfile Id: $rpId, each with " +
             s"${resource.getVirtualCores} core(s) and " +
-            s"${resource.getMemory} MB memory (including $memoryOverhead MB of overhead)"
+            s"${resource.getMemory} MB memory."
           if (ResourceRequestHelper.isYarnResourceTypesAvailable() &&
             ResourceRequestHelper.isYarnCustomResourcesNonEmpty(resource)) {
             requestContainerMessage ++= s" with custom resources: " + resource.toString
@@ -723,9 +700,10 @@ private[yarn] class YarnAllocator(
       }
 
       val rp = rpIdToResourceProfile(rpId)
+      val defaultResources = ResourceProfile.getDefaultProfileExecutorResources(sparkConf)
       val containerMem = rp.executorResources.get(ResourceProfile.MEMORY).
-        map(_.amount.toInt).getOrElse(executorMemory)
-      val containerCores = rp.getExecutorCores.getOrElse(defaultExecutorCores)
+        map(_.amount).getOrElse(defaultResources.executorMemoryMiB).toInt
+      val containerCores = rp.getExecutorCores.getOrElse(defaultResources.cores)
       val rpRunningExecs = getOrUpdateRunningExecutorForRPId(rpId).size
       if (rpRunningExecs < getOrUpdateTargetNumExecutorsForRPId(rpId)) {
         getOrUpdateNumExecutorsStartingForRPId(rpId).incrementAndGet()
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
index fe8990be7ee6f..0273de10993eb 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
@@ -38,7 +38,6 @@ object YarnSparkHadoopUtil {
   // the common cases. Memory overhead tends to grow with container size.
 
   val MEMORY_OVERHEAD_FACTOR = 0.10
-  val MEMORY_OVERHEAD_MIN = 384L
 
   val ANY_HOST = "*"
 
@@ -184,33 +183,12 @@ object YarnSparkHadoopUtil {
     ConverterUtils.toContainerId(containerIdString)
   }
 
-  /**
-   * Convert MEMORY_OFFHEAP_SIZE to MB Unit, return 0 if MEMORY_OFFHEAP_ENABLED is false.
-   */
-  def executorOffHeapMemorySizeAsMb(sparkConf: SparkConf): Int = {
-    val sizeInMB = Utils.memoryStringToMb(sparkConf.get(MEMORY_OFFHEAP_SIZE).toString)
-    checkOffHeapEnabled(sparkConf, sizeInMB).toInt
-  }
-
   /**
    * Get offHeap memory size from [[ExecutorResourceRequest]]
    * return 0 if MEMORY_OFFHEAP_ENABLED is false.
    */
   def executorOffHeapMemorySizeAsMb(sparkConf: SparkConf,
     execRequest: ExecutorResourceRequest): Long = {
-    checkOffHeapEnabled(sparkConf, execRequest.amount)
-  }
-
-  /**
-   * return 0 if MEMORY_OFFHEAP_ENABLED is false.
-   */
-  def checkOffHeapEnabled(sparkConf: SparkConf, offHeapSize: Long): Long = {
-    if (sparkConf.get(MEMORY_OFFHEAP_ENABLED)) {
-      require(offHeapSize > 0,
-        s"${MEMORY_OFFHEAP_SIZE.key} must be > 0 when ${MEMORY_OFFHEAP_ENABLED.key} == true")
-      offHeapSize
-    } else {
-      0
-    }
+    Utils.checkOffHeapEnabled(sparkConf, execRequest.amount)
   }
 }
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
index 6b5c72ad7f7aa..825bdd92dd78f 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
@@ -75,7 +75,7 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
   // priority has to be 0 to match default profile id
   val RM_REQUEST_PRIORITY = Priority.newInstance(0)
   val defaultRPId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
-  val defaultRP = ResourceProfile.getOrCreateDefaultProfile(sparkConf)
+  var defaultRP = ResourceProfile.getOrCreateDefaultProfile(sparkConf)
 
   override def beforeEach(): Unit = {
     super.beforeEach()
@@ -114,6 +114,9 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
     for ((name, value) <- additionalConfigs) {
       sparkConfClone.set(name, value)
     }
+    // different spark confs means we need to reinit the default profile
+    ResourceProfile.clearDefaultProfile()
+    defaultRP = ResourceProfile.getOrCreateDefaultProfile(sparkConfClone)
 
     val allocator = new YarnAllocator(
       "not used",
@@ -268,12 +271,13 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
       Map(s"${YARN_EXECUTOR_RESOURCE_TYPES_PREFIX}${GPU}.${AMOUNT}" -> "2G"))
 
     handler.updateResourceRequests()
-    val container = createContainer("host1", resource = handler.defaultResource)
+    val defaultResource = handler.rpIdToYarnResource.get(defaultRPId)
+    val container = createContainer("host1", resource = defaultResource)
     handler.handleAllocatedContainers(Array(container))
 
     // get amount of memory and vcores from resource, so effectively skipping their validation
-    val expectedResources = Resource.newInstance(handler.defaultResource.getMemory(),
-      handler.defaultResource.getVirtualCores)
+    val expectedResources = Resource.newInstance(defaultResource.getMemory(),
+      defaultResource.getVirtualCores)
     setResourceRequests(Map("gpu" -> "2G"), expectedResources)
     val captor = ArgumentCaptor.forClass(classOf[ContainerRequest])
 
@@ -296,7 +300,8 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
     val (handler, _) = createAllocator(1, mockAmClient, sparkResources)
 
     handler.updateResourceRequests()
-    val yarnRInfo = ResourceRequestTestHelper.getResources(handler.defaultResource)
+    val defaultResource = handler.rpIdToYarnResource.get(defaultRPId)
+    val yarnRInfo = ResourceRequestTestHelper.getResources(defaultResource)
     val allResourceInfo = yarnRInfo.map( rInfo => (rInfo.name -> rInfo.value) ).toMap
     assert(allResourceInfo.get(YARN_GPU_RESOURCE_CONFIG).nonEmpty)
     assert(allResourceInfo.get(YARN_GPU_RESOURCE_CONFIG).get === 3)
@@ -656,9 +661,10 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
       sparkConf.set(MEMORY_OFFHEAP_SIZE, offHeapMemoryInByte)
       val (handler, _) = createAllocator(maxExecutors = 1,
         additionalConfigs = Map(EXECUTOR_MEMORY.key -> executorMemory.toString))
-      val memory = handler.defaultResource.getMemory
+      val defaultResource = handler.rpIdToYarnResource.get(defaultRPId)
+      val memory = defaultResource.getMemory
       assert(memory ==
-        executorMemory + offHeapMemoryInMB + YarnSparkHadoopUtil.MEMORY_OVERHEAD_MIN)
+        executorMemory + offHeapMemoryInMB + ResourceProfile.MEMORY_OVERHEAD_MIN_MIB)
     } finally {
       sparkConf.set(MEMORY_OFFHEAP_ENABLED, originalOffHeapEnabled)
       sparkConf.set(MEMORY_OFFHEAP_SIZE, originalOffHeapSize)
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
index 9cd37479a9db6..7f8dd590545c6 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
@@ -142,31 +142,4 @@ class YarnSparkHadoopUtilSuite extends SparkFunSuite with Matchers with Logging
     }
 
   }
-
-  test("executorOffHeapMemorySizeAsMb when MEMORY_OFFHEAP_ENABLED is false") {
-    val executorOffHeapMemory = YarnSparkHadoopUtil.executorOffHeapMemorySizeAsMb(new SparkConf())
-    assert(executorOffHeapMemory == 0)
-  }
-
-  test("executorOffHeapMemorySizeAsMb when MEMORY_OFFHEAP_ENABLED is true") {
-    val offHeapMemoryInMB = 50
-    val offHeapMemory: Long = offHeapMemoryInMB * 1024 * 1024
-    val sparkConf = new SparkConf()
-      .set(MEMORY_OFFHEAP_ENABLED, true)
-      .set(MEMORY_OFFHEAP_SIZE, offHeapMemory)
-    val executorOffHeapMemory = YarnSparkHadoopUtil.executorOffHeapMemorySizeAsMb(sparkConf)
-    assert(executorOffHeapMemory == offHeapMemoryInMB)
-  }
-
-  test("executorMemoryOverhead when MEMORY_OFFHEAP_ENABLED is true, " +
-    "but MEMORY_OFFHEAP_SIZE not config scene") {
-    val sparkConf = new SparkConf()
-      .set(MEMORY_OFFHEAP_ENABLED, true)
-    val expected =
-      s"${MEMORY_OFFHEAP_SIZE.key} must be > 0 when ${MEMORY_OFFHEAP_ENABLED.key} == true"
-    val message = intercept[IllegalArgumentException] {
-      YarnSparkHadoopUtil.executorOffHeapMemorySizeAsMb(sparkConf)
-    }.getMessage
-    assert(message.contains(expected))
-  }
 }

From 423ba5a16038c1cb28d0973e18518645e69d5ff1 Mon Sep 17 00:00:00 2001
From: Chandni Singh <singh.chandni@gmail.com>
Date: Fri, 13 Nov 2020 16:16:23 -0600
Subject: [PATCH 0469/1009] [SPARK-32916][SHUFFLE][TEST-MAVEN][TEST-HADOOP2.7]
 Remove the newly added YarnShuffleServiceSuite.java

### What changes were proposed in this pull request?
This is a follow-up fix for the failing tests in `YarnShuffleServiceSuite.java`. This java class was introduced in https://github.com/apache/spark/pull/30062. The tests in the class fail when run with hadoop-2.7 profile:
```
[ERROR] testCreateDefaultMergedShuffleFileManagerInstance(org.apache.spark.network.yarn.YarnShuffleServiceSuite)  Time elapsed: 0.627 s  <<< ERROR!
java.lang.NoClassDefFoundError: org/apache/commons/logging/LogFactory
	at org.apache.spark.network.yarn.YarnShuffleServiceSuite.testCreateDefaultMergedShuffleFileManagerInstance(YarnShuffleServiceSuite.java:37)
Caused by: java.lang.ClassNotFoundException: org.apache.commons.logging.LogFactory
	at org.apache.spark.network.yarn.YarnShuffleServiceSuite.testCreateDefaultMergedShuffleFileManagerInstance(YarnShuffleServiceSuite.java:37)

[ERROR] testCreateRemoteBlockPushResolverInstance(org.apache.spark.network.yarn.YarnShuffleServiceSuite)  Time elapsed: 0 s  <<< ERROR!
java.lang.NoClassDefFoundError: Could not initialize class org.apache.spark.network.yarn.YarnShuffleService
	at org.apache.spark.network.yarn.YarnShuffleServiceSuite.testCreateRemoteBlockPushResolverInstance(YarnShuffleServiceSuite.java:47)

[ERROR] testInvalidClassNameOfMergeManagerWillUseNoOpInstance(org.apache.spark.network.yarn.YarnShuffleServiceSuite)  Time elapsed: 0.001 s  <<< ERROR!
java.lang.NoClassDefFoundError: Could not initialize class org.apache.spark.network.yarn.YarnShuffleService
	at org.apache.spark.network.yarn.YarnShuffleServiceSuite.testInvalidClassNameOfMergeManagerWillUseNoOpInstance(YarnShuffleServiceSuite.java:57)
```
A test suit for `YarnShuffleService` did exist here:
`resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala`
I missed this when I created `common/network-yarn/src/test/java/org/apache/spark/network/yarn/YarnShuffleServiceSuite.java`. Moving all the new tests to the earlier test suite fixes the failures with hadoop-2.7 even though why this happened is not clear.

### Why are the changes needed?
The newly added tests are failing when run with hadoop profile 2.7

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Ran the unit tests with the default profile as well as hadoop 2.7 profile.
`build/mvn test -Dtest=none -DwildcardSuites=org.apache.spark.network.yarn.YarnShuffleServiceSuite -Phadoop-2.7 -Pyarn`
```
Run starting. Expected test count is: 11
YarnShuffleServiceSuite:
- executor state kept across NM restart
- removed applications should not be in registered executor file
- shuffle service should be robust to corrupt registered executor file
- get correct recovery path
- moving recovery file from NM local dir to recovery path
- service throws error if cannot start
- recovery db should not be created if NM recovery is not enabled
- SPARK-31646: metrics should be registered into Node Manager's metrics system
- create default merged shuffle file manager instance
- create remote block push resolver instance
- invalid class name of merge manager will use noop instance
Run completed in 2 seconds, 572 milliseconds.
Total number of tests run: 11
Suites: completed 2, aborted 0
Tests: succeeded 11, failed 0, canceled 0, ignored 0, pending 0
All tests passed.
```

Closes #30349 from otterc/SPARK-32916-followup.

Authored-by: Chandni Singh <singh.chandni@gmail.com>
Signed-off-by: Thomas Graves <tgraves@apache.org>
---
 .../network/yarn/YarnShuffleServiceSuite.java | 61 -------------------
 .../yarn/YarnShuffleServiceSuite.scala        | 27 +++++++-
 2 files changed, 26 insertions(+), 62 deletions(-)
 delete mode 100644 common/network-yarn/src/test/java/org/apache/spark/network/yarn/YarnShuffleServiceSuite.java

diff --git a/common/network-yarn/src/test/java/org/apache/spark/network/yarn/YarnShuffleServiceSuite.java b/common/network-yarn/src/test/java/org/apache/spark/network/yarn/YarnShuffleServiceSuite.java
deleted file mode 100644
index 09bc4d8403678..0000000000000
--- a/common/network-yarn/src/test/java/org/apache/spark/network/yarn/YarnShuffleServiceSuite.java
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.network.yarn;
-
-import org.junit.Test;
-
-import static org.junit.Assert.*;
-import static org.mockito.Mockito.*;
-
-import org.apache.spark.network.shuffle.ExternalBlockHandler;
-import org.apache.spark.network.shuffle.MergedShuffleFileManager;
-import org.apache.spark.network.shuffle.RemoteBlockPushResolver;
-import org.apache.spark.network.util.TransportConf;
-
-public class YarnShuffleServiceSuite {
-
-  @Test
-  public void testCreateDefaultMergedShuffleFileManagerInstance() {
-    TransportConf mockConf = mock(TransportConf.class);
-    when(mockConf.mergedShuffleFileManagerImpl()).thenReturn(
-      "org.apache.spark.network.shuffle.ExternalBlockHandler$NoOpMergedShuffleFileManager");
-    MergedShuffleFileManager mergeMgr = YarnShuffleService.newMergedShuffleFileManagerInstance(
-      mockConf);
-    assertTrue(mergeMgr instanceof ExternalBlockHandler.NoOpMergedShuffleFileManager);
-  }
-
-  @Test
-  public void testCreateRemoteBlockPushResolverInstance() {
-    TransportConf mockConf = mock(TransportConf.class);
-    when(mockConf.mergedShuffleFileManagerImpl()).thenReturn(
-      "org.apache.spark.network.shuffle.RemoteBlockPushResolver");
-    MergedShuffleFileManager mergeMgr = YarnShuffleService.newMergedShuffleFileManagerInstance(
-      mockConf);
-    assertTrue(mergeMgr instanceof RemoteBlockPushResolver);
-  }
-
-  @Test
-  public void testInvalidClassNameOfMergeManagerWillUseNoOpInstance() {
-    TransportConf mockConf = mock(TransportConf.class);
-    when(mockConf.mergedShuffleFileManagerImpl()).thenReturn(
-      "org.apache.spark.network.shuffle.NotExistent");
-    MergedShuffleFileManager mergeMgr = YarnShuffleService.newMergedShuffleFileManagerInstance(
-      mockConf);
-    assertTrue(mergeMgr instanceof ExternalBlockHandler.NoOpMergedShuffleFileManager);
-  }
-}
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala
index a6a302ad5df95..c2bdd971a0fe9 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala
@@ -34,6 +34,7 @@ import org.apache.hadoop.service.ServiceStateException
 import org.apache.hadoop.yarn.api.records.ApplicationId
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.server.api.{ApplicationInitializationContext, ApplicationTerminationContext}
+import org.mockito.Mockito.{mock, when}
 import org.scalatest.BeforeAndAfterEach
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.matchers.must.Matchers
@@ -42,8 +43,9 @@ import org.scalatest.matchers.should.Matchers._
 import org.apache.spark.SecurityManager
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.internal.config._
-import org.apache.spark.network.shuffle.ShuffleTestAccessor
+import org.apache.spark.network.shuffle.{ExternalBlockHandler, RemoteBlockPushResolver, ShuffleTestAccessor}
 import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo
+import org.apache.spark.network.util.TransportConf
 import org.apache.spark.util.Utils
 
 class YarnShuffleServiceSuite extends SparkFunSuite with Matchers with BeforeAndAfterEach {
@@ -411,4 +413,27 @@ class YarnShuffleServiceSuite extends SparkFunSuite with Matchers with BeforeAnd
     ))
   }
 
+  test("create default merged shuffle file manager instance") {
+    val mockConf = mock(classOf[TransportConf])
+    when(mockConf.mergedShuffleFileManagerImpl).thenReturn(
+      "org.apache.spark.network.shuffle.ExternalBlockHandler$NoOpMergedShuffleFileManager")
+    val mergeMgr = YarnShuffleService.newMergedShuffleFileManagerInstance(mockConf)
+    assert(mergeMgr.isInstanceOf[ExternalBlockHandler.NoOpMergedShuffleFileManager])
+  }
+
+  test("create remote block push resolver instance") {
+    val mockConf = mock(classOf[TransportConf])
+    when(mockConf.mergedShuffleFileManagerImpl).thenReturn(
+      "org.apache.spark.network.shuffle.RemoteBlockPushResolver")
+    val mergeMgr = YarnShuffleService.newMergedShuffleFileManagerInstance(mockConf)
+    assert(mergeMgr.isInstanceOf[RemoteBlockPushResolver])
+  }
+
+  test("invalid class name of merge manager will use noop instance") {
+    val mockConf = mock(classOf[TransportConf])
+    when(mockConf.mergedShuffleFileManagerImpl).thenReturn(
+      "org.apache.spark.network.shuffle.NotExistent")
+    val mergeMgr = YarnShuffleService.newMergedShuffleFileManagerInstance(mockConf)
+    assert(mergeMgr.isInstanceOf[ExternalBlockHandler.NoOpMergedShuffleFileManager])
+  }
 }

From 0046222a758fda2aead4a77bd19b19b913276693 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Fri, 13 Nov 2020 15:10:02 -0800
Subject: [PATCH 0470/1009] [SPARK-33337][SQL][FOLLOWUP] Prevent possible
 flakyness in SubexpressionEliminationSuite

### What changes were proposed in this pull request?

This is a simple followup to prevent test flakyness in SubexpressionEliminationSuite. If `getAllEquivalentExprs` returns more than 1 sequences, due to HashMap, we should use `contains` instead of assuming the order of results.

### Why are the changes needed?

Prevent test flakyness in SubexpressionEliminationSuite.

### Does this PR introduce _any_ user-facing change?

No, dev only.

### How was this patch tested?

Unit test.

Closes #30371 from viirya/SPARK-33337-followup.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../expressions/SubexpressionEliminationSuite.scala | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
index 4725a40781c6b..0147c6c6a8260 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
@@ -162,8 +162,8 @@ class SubexpressionEliminationSuite extends SparkFunSuite {
     assert(equivalence1.getAllEquivalentExprs.filter(_.size == 2).head == Seq(add, add))
     // one-time expressions: only ifExpr and its predicate expression
     assert(equivalence1.getAllEquivalentExprs.count(_.size == 1) == 2)
-    assert(equivalence1.getAllEquivalentExprs.filter(_.size == 1).head == Seq(ifExpr1))
-    assert(equivalence1.getAllEquivalentExprs.filter(_.size == 1).last == Seq(condition))
+    assert(equivalence1.getAllEquivalentExprs.filter(_.size == 1).contains(Seq(ifExpr1)))
+    assert(equivalence1.getAllEquivalentExprs.filter(_.size == 1).contains(Seq(condition)))
 
     // Repeated `add` is only in one branch, so we don't count it.
     val ifExpr2 = If(condition, Add(Literal(1), Literal(3)), Add(add, add))
@@ -179,13 +179,14 @@ class SubexpressionEliminationSuite extends SparkFunSuite {
 
     // `add`: 2, `condition`: 2
     assert(equivalence3.getAllEquivalentExprs.count(_.size == 2) == 2)
-    assert(equivalence3.getAllEquivalentExprs.filter(_.size == 2).head == Seq(add, add))
-    assert(equivalence3.getAllEquivalentExprs.filter(_.size == 2).last == Seq(condition, condition))
+    assert(equivalence3.getAllEquivalentExprs.filter(_.size == 2).contains(Seq(add, add)))
+    assert(
+      equivalence3.getAllEquivalentExprs.filter(_.size == 2).contains(Seq(condition, condition)))
 
     // `ifExpr1`, `ifExpr3`
     assert(equivalence3.getAllEquivalentExprs.count(_.size == 1) == 2)
-    assert(equivalence3.getAllEquivalentExprs.filter(_.size == 1).head == Seq(ifExpr1))
-    assert(equivalence3.getAllEquivalentExprs.filter(_.size == 1).last == Seq(ifExpr3))
+    assert(equivalence3.getAllEquivalentExprs.filter(_.size == 1).contains(Seq(ifExpr1)))
+    assert(equivalence3.getAllEquivalentExprs.filter(_.size == 1).contains(Seq(ifExpr3)))
   }
 
   test("Children of conditional expressions: CaseWhen") {

From 34a9a77ab5c5589330298ba8eb4a6435bb6b9cba Mon Sep 17 00:00:00 2001
From: artiship <meilziner@gmail.com>
Date: Sat, 14 Nov 2020 20:54:17 +0800
Subject: [PATCH 0471/1009] [SPARK-33396][SQL] Spark SQL CLI prints appliction
 id when process file

### What changes were proposed in this pull request?
Modify SparkSQLCLIDriver.scala to move ahead calling the cli.printMasterAndAppId method before process file.

### Why are the changes needed?
Even though in SPARK-25043 it has already brought in the printing application id feature. But the process file situation seems have not been included. This small change is to make spark-sql will also print out application id when process file.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
env

```
spark version: 3.0.1
os: centos 7
```

/tmp/tmp.sql

```sql
select 1;
```

submit command:

```sh
export HADOOP_USER_NAME=my-hadoop-user
bin/spark-sql  \
--master yarn \
--deploy-mode client \
--queue my.queue.name \
--conf spark.driver.host=$(hostname -i) \
--conf spark.app.name=spark-test  \
--name "spark-test" \
-f /tmp/tmp.sql
```

execution log:

```sh
20/11/09 23:18:39 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
20/11/09 23:18:40 WARN HiveConf: HiveConf of name hive.spark.client.rpc.server.address.use.ip does not exist
20/11/09 23:18:40 WARN HiveConf: HiveConf of name hive.spark.client.submit.timeout.interval does not exist
20/11/09 23:18:40 WARN HiveConf: HiveConf of name hive.enforce.bucketing does not exist
20/11/09 23:18:40 WARN HiveConf: HiveConf of name hive.server2.enable.impersonation does not exist
20/11/09 23:18:40 WARN HiveConf: HiveConf of name hive.run.timeout.seconds does not exist
20/11/09 23:18:40 WARN HiveConf: HiveConf of name hive.support.sql11.reserved.keywords does not exist
20/11/09 23:18:40 WARN DomainSocketFactory: The short-circuit local reads feature cannot be used because libhadoop cannot be loaded.
20/11/09 23:18:41 WARN SparkConf: Note that spark.local.dir will be overridden by the value set by the cluster manager (via SPARK_LOCAL_DIRS in mesos/standalone/kubernetes and LOCAL_DIRS in YARN).
20/11/09 23:18:42 WARN Client: Neither spark.yarn.jars nor spark.yarn.archive is set, falling back to uploading libraries under SPARK_HOME.
20/11/09 23:18:52 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Attempted to request executors before the AM has registered!

Spark master: yarn, Application Id: application_1567136266901_27355775
1
1
Time taken: 4.974 seconds, Fetched 1 row(s)

```

Closes #30301 from artiship/SPARK-33396.

Authored-by: artiship <meilziner@gmail.com>
Signed-off-by: Yuming Wang <yumwang@ebay.com>
---
 .../spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala       | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index 8550597da936e..57acd2d9bfd51 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -199,6 +199,8 @@ private[hive] object SparkSQLCLIDriver extends Logging {
       SparkSQLEnv.sqlContext.setConf(k, v)
     }
 
+    cli.printMasterAndAppId
+
     if (sessionState.execString != null) {
       System.exit(cli.processLine(sessionState.execString))
     }
@@ -268,8 +270,6 @@ private[hive] object SparkSQLCLIDriver extends Logging {
     def continuedPromptWithDBSpaces: String = continuedPrompt + ReflectionUtils.invokeStatic(
       classOf[CliDriver], "spacesForString", classOf[String] -> currentDB)
 
-    cli.printMasterAndAppId
-
     var currentPrompt = promptWithCurrentDB
     var line = reader.readLine(currentPrompt + "> ")
 

From 156704ba0dfcae39a80b8f0ce778b73913db03b2 Mon Sep 17 00:00:00 2001
From: luluorta <luluorta@gmail.com>
Date: Sat, 14 Nov 2020 13:37:12 -0800
Subject: [PATCH 0472/1009] [SPARK-33432][SQL] SQL parser should use active
 SQLConf

### What changes were proposed in this pull request?

This PR makes SQL parser using active SQLConf instead of the one in ctor-parameters.

### Why are the changes needed?

In ANSI mode, schema string parsing should fail if the schema uses ANSI reserved keyword as attribute name:

```scala
spark.conf.set("spark.sql.ansi.enabled", "true")
spark.sql("""select from_json('{"time":"26/10/2015"}', 'time Timestamp', map('timestampFormat',  'dd/MM/yyyy'));""").show
```

output:

> Cannot parse the data type:
> no viable alternative at input 'time'(line 1, pos 0)
>
> == SQL ==
> time Timestamp
> ^^^

But this query may accidentally succeed in certain cases cause the DataType parser sticks to the configs of the first created session in the current thread:

```scala
DataType.fromDDL("time Timestamp")
val newSpark = spark.newSession()
newSpark.conf.set("spark.sql.ansi.enabled", "true")
newSpark.sql("""select from_json('{"time":"26/10/2015"}', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy'));""").show
```

output:

> +--------------------------------+
> |from_json({"time":"26/10/2015"})|
> +--------------------------------+
> |                   {2015-10-26 00:00...|
> +--------------------------------+

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Newly and updated UTs

Closes #30357 from luluorta/SPARK-33432.

Authored-by: luluorta <luluorta@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../sql/catalyst/catalog/SessionCatalog.scala |   2 +-
 .../sql/catalyst/parser/AstBuilder.scala      |   4 +-
 .../sql/catalyst/parser/ParseDriver.scala     |  12 +-
 .../catalog/CatalogV2Implicits.scala          |   5 +-
 .../connector/expressions/expressions.scala   |   7 +-
 .../parser/ExpressionParserSuite.scala        | 187 +++++++++---------
 .../spark/sql/execution/SparkSqlParser.scala  |   8 +-
 .../org/apache/spark/sql/functions.scala      |   2 +-
 .../internal/BaseSessionStateBuilder.scala    |   2 +-
 .../sql/internal/VariableSubstitution.scala   |   4 +-
 .../inputs/ansi/parse-schema-string.sql       |   1 +
 .../resources/sql-tests/inputs/datetime.sql   |   8 +-
 .../sql-tests/inputs/parse-schema-string.sql  |   5 +
 .../sql-tests/results/ansi/datetime.sql.out   |   8 +-
 .../results/ansi/parse-schema-string.sql.out  |  62 ++++++
 .../sql-tests/results/datetime-legacy.sql.out |  24 +--
 .../sql-tests/results/datetime.sql.out        |   8 +-
 .../results/parse-schema-string.sql.out       |  34 ++++
 .../sql/execution/SparkSqlParserSuite.scala   |   5 +-
 .../execution/command/DDLParserSuite.scala    |   8 +-
 .../internal/VariableSubstitutionSuite.scala  |  31 ++-
 .../SparkExecuteStatementOperation.scala      |   4 +-
 .../hive/thriftserver/SparkSQLDriver.scala    |   6 +-
 23 files changed, 267 insertions(+), 170 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/ansi/parse-schema-string.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/parse-schema-string.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/ansi/parse-schema-string.sql.out
 create mode 100644 sql/core/src/test/resources/sql-tests/results/parse-schema-string.sql.out

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index fa5634935ff29..c00d51dc3df1f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -79,7 +79,7 @@ class SessionCatalog(
       functionRegistry,
       conf,
       new Configuration(),
-      new CatalystSqlParser(conf),
+      new CatalystSqlParser(),
       DummyFunctionResourceLoader)
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index be8bbb5ad3eba..a5b8c118d6c54 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -51,10 +51,10 @@ import org.apache.spark.util.random.RandomSampler
  * The AstBuilder converts an ANTLR4 ParseTree into a catalyst Expression, LogicalPlan or
  * TableIdentifier.
  */
-class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging {
+class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
   import ParserUtils._
 
-  def this() = this(new SQLConf())
+  protected def conf: SQLConf = SQLConf.get
 
   protected def typedVisit[T](ctx: ParseTree): T = {
     ctx.accept(this).asInstanceOf[T]
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index 343b499f7231f..73a58f79ff132 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -33,7 +33,9 @@ import org.apache.spark.sql.types.{DataType, StructType}
 /**
  * Base SQL parsing infrastructure.
  */
-abstract class AbstractSqlParser(conf: SQLConf) extends ParserInterface with Logging {
+abstract class AbstractSqlParser extends ParserInterface with Logging {
+
+  protected def conf: SQLConf = SQLConf.get
 
   /** Creates/Resolves DataType for a given SQL string. */
   override def parseDataType(sqlText: String): DataType = parse(sqlText) { parser =>
@@ -138,14 +140,12 @@ abstract class AbstractSqlParser(conf: SQLConf) extends ParserInterface with Log
 /**
  * Concrete SQL parser for Catalyst-only SQL statements.
  */
-class CatalystSqlParser(conf: SQLConf) extends AbstractSqlParser(conf) {
-  val astBuilder = new AstBuilder(conf)
+class CatalystSqlParser extends AbstractSqlParser {
+  val astBuilder = new AstBuilder
 }
 
 /** For test-only. */
-object CatalystSqlParser extends AbstractSqlParser(SQLConf.get) {
-  val astBuilder = new AstBuilder(SQLConf.get)
-}
+object CatalystSqlParser extends CatalystSqlParser
 
 /**
  * This string stream provides the lexer with upper case characters only. This greatly simplifies
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
index 2ee760d4f60b0..ab8f87fd7bff9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
@@ -22,7 +22,6 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.connector.expressions.{BucketTransform, IdentityTransform, LogicalExpressions, Transform}
-import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Conversion helpers for working with v2 [[CatalogPlugin]].
@@ -143,9 +142,7 @@ private[sql] object CatalogV2Implicits {
     }
   }
 
-  private lazy val catalystSqlParser = new CatalystSqlParser(SQLConf.get)
-
   def parseColumnPath(name: String): Seq[String] = {
-    catalystSqlParser.parseMultipartIdentifier(name)
+    CatalystSqlParser.parseMultipartIdentifier(name)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
index ea26847ac3cef..321ea14d376b4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.connector.expressions
 
 import org.apache.spark.sql.catalyst
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, IntegerType, StringType}
 
 /**
@@ -29,10 +28,6 @@ import org.apache.spark.sql.types.{DataType, IntegerType, StringType}
  * logical and internal expressions are used.
  */
 private[sql] object LogicalExpressions {
-  // a generic parser that is only used for parsing multi-part field names.
-  // because this is only used for field names, the SQL conf passed in does not matter.
-  private lazy val parser = new CatalystSqlParser(SQLConf.get)
-
   def literal[T](value: T): LiteralValue[T] = {
     val internalLit = catalyst.expressions.Literal(value)
     literal(value, internalLit.dataType)
@@ -41,7 +36,7 @@ private[sql] object LogicalExpressions {
   def literal[T](value: T, dataType: DataType): LiteralValue[T] = LiteralValue(value, dataType)
 
   def parseReference(name: String): NamedReference =
-    FieldReference(parser.parseMultipartIdentifier(name))
+    FieldReference(CatalystSqlParser.parseMultipartIdentifier(name))
 
   def reference(nameParts: Seq[String]): NamedReference = FieldReference(nameParts)
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index bcbdf5df57d0e..b1d0d044eaead 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -200,12 +200,12 @@ class ExpressionParserSuite extends AnalysisTest {
   }
 
   test("like expressions with ESCAPED_STRING_LITERALS = true") {
-    val conf = new SQLConf()
-    conf.setConfString(SQLConf.ESCAPED_STRING_LITERALS.key, "true")
-    val parser = new CatalystSqlParser(conf)
-    assertEqual("a rlike '^\\x20[\\x20-\\x23]+$'", 'a rlike "^\\x20[\\x20-\\x23]+$", parser)
-    assertEqual("a rlike 'pattern\\\\'", 'a rlike "pattern\\\\", parser)
-    assertEqual("a rlike 'pattern\\t\\n'", 'a rlike "pattern\\t\\n", parser)
+    withSQLConf(SQLConf.ESCAPED_STRING_LITERALS.key -> "true") {
+      val parser = new CatalystSqlParser()
+      assertEqual("a rlike '^\\x20[\\x20-\\x23]+$'", 'a rlike "^\\x20[\\x20-\\x23]+$", parser)
+      assertEqual("a rlike 'pattern\\\\'", 'a rlike "pattern\\\\", parser)
+      assertEqual("a rlike 'pattern\\t\\n'", 'a rlike "pattern\\t\\n", parser)
+    }
   }
 
   test("(NOT) LIKE (ANY | SOME | ALL) expressions") {
@@ -557,98 +557,99 @@ class ExpressionParserSuite extends AnalysisTest {
     def testDecimal(value: String, parser: ParserInterface): Unit = {
       assertEqual(value, Literal(BigDecimal(value).underlying), parser)
     }
-    val conf = new SQLConf()
-    conf.setConf(SQLConf.LEGACY_EXPONENT_LITERAL_AS_DECIMAL_ENABLED, true)
-    val parser = new CatalystSqlParser(conf)
-    testDecimal("9e1", parser)
-    testDecimal("9e-1", parser)
-    testDecimal("-9e1", parser)
-    testDecimal("9.0e1", parser)
-    testDecimal(".9e+2", parser)
-    testDecimal("0.9e+2", parser)
+    withSQLConf(SQLConf.LEGACY_EXPONENT_LITERAL_AS_DECIMAL_ENABLED.key -> "true") {
+      val parser = new CatalystSqlParser()
+      testDecimal("9e1", parser)
+      testDecimal("9e-1", parser)
+      testDecimal("-9e1", parser)
+      testDecimal("9.0e1", parser)
+      testDecimal(".9e+2", parser)
+      testDecimal("0.9e+2", parser)
+    }
   }
 
   test("strings") {
     Seq(true, false).foreach { escape =>
-      val conf = new SQLConf()
-      conf.setConfString(SQLConf.ESCAPED_STRING_LITERALS.key, escape.toString)
-      val parser = new CatalystSqlParser(conf)
-
-      // tests that have same result whatever the conf is
-      // Single Strings.
-      assertEqual("\"hello\"", "hello", parser)
-      assertEqual("'hello'", "hello", parser)
-
-      // Multi-Strings.
-      assertEqual("\"hello\" 'world'", "helloworld", parser)
-      assertEqual("'hello' \" \" 'world'", "hello world", parser)
-
-      // 'LIKE' string literals. Notice that an escaped '%' is the same as an escaped '\' and a
-      // regular '%'; to get the correct result you need to add another escaped '\'.
-      // TODO figure out if we shouldn't change the ParseUtils.unescapeSQLString method?
-      assertEqual("'pattern%'", "pattern%", parser)
-      assertEqual("'no-pattern\\%'", "no-pattern\\%", parser)
-
-      // tests that have different result regarding the conf
-      if (escape) {
-        // When SQLConf.ESCAPED_STRING_LITERALS is enabled, string literal parsing fallbacks to
-        // Spark 1.6 behavior.
-
-        // 'LIKE' string literals.
-        assertEqual("'pattern\\\\%'", "pattern\\\\%", parser)
-        assertEqual("'pattern\\\\\\%'", "pattern\\\\\\%", parser)
-
-        // Escaped characters.
-        // Unescape string literal "'\\0'" for ASCII NUL (X'00') doesn't work
-        // when ESCAPED_STRING_LITERALS is enabled.
-        // It is parsed literally.
-        assertEqual("'\\0'", "\\0", parser)
-
-        // Note: Single quote follows 1.6 parsing behavior when ESCAPED_STRING_LITERALS is enabled.
-        val e = intercept[ParseException](parser.parseExpression("'\''"))
-        assert(e.message.contains("extraneous input '''"))
-
-        // The unescape special characters (e.g., "\\t") for 2.0+ don't work
-        // when ESCAPED_STRING_LITERALS is enabled. They are parsed literally.
-        assertEqual("'\\\"'", "\\\"", parser)   // Double quote
-        assertEqual("'\\b'", "\\b", parser)     // Backspace
-        assertEqual("'\\n'", "\\n", parser)     // Newline
-        assertEqual("'\\r'", "\\r", parser)     // Carriage return
-        assertEqual("'\\t'", "\\t", parser)     // Tab character
-
-        // The unescape Octals for 2.0+ don't work when ESCAPED_STRING_LITERALS is enabled.
-        // They are parsed literally.
-        assertEqual("'\\110\\145\\154\\154\\157\\041'", "\\110\\145\\154\\154\\157\\041", parser)
-        // The unescape Unicode for 2.0+ doesn't work when ESCAPED_STRING_LITERALS is enabled.
-        // They are parsed literally.
-        assertEqual("'\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029'",
-          "\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029", parser)
-      } else {
-        // Default behavior
-
-        // 'LIKE' string literals.
-        assertEqual("'pattern\\\\%'", "pattern\\%", parser)
-        assertEqual("'pattern\\\\\\%'", "pattern\\\\%", parser)
-
-        // Escaped characters.
-        // See: http://dev.mysql.com/doc/refman/5.7/en/string-literals.html
-        assertEqual("'\\0'", "\u0000", parser) // ASCII NUL (X'00')
-        assertEqual("'\\''", "\'", parser)     // Single quote
-        assertEqual("'\\\"'", "\"", parser)    // Double quote
-        assertEqual("'\\b'", "\b", parser)     // Backspace
-        assertEqual("'\\n'", "\n", parser)     // Newline
-        assertEqual("'\\r'", "\r", parser)     // Carriage return
-        assertEqual("'\\t'", "\t", parser)     // Tab character
-        assertEqual("'\\Z'", "\u001A", parser) // ASCII 26 - CTRL + Z (EOF on windows)
-
-        // Octals
-        assertEqual("'\\110\\145\\154\\154\\157\\041'", "Hello!", parser)
-
-        // Unicode
-        assertEqual("'\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029'", "World :)",
-          parser)
-      }
+      withSQLConf(SQLConf.ESCAPED_STRING_LITERALS.key -> escape.toString) {
+        val parser = new CatalystSqlParser()
+
+        // tests that have same result whatever the conf is
+        // Single Strings.
+        assertEqual("\"hello\"", "hello", parser)
+        assertEqual("'hello'", "hello", parser)
+
+        // Multi-Strings.
+        assertEqual("\"hello\" 'world'", "helloworld", parser)
+        assertEqual("'hello' \" \" 'world'", "hello world", parser)
+
+        // 'LIKE' string literals. Notice that an escaped '%' is the same as an escaped '\' and a
+        // regular '%'; to get the correct result you need to add another escaped '\'.
+        // TODO figure out if we shouldn't change the ParseUtils.unescapeSQLString method?
+        assertEqual("'pattern%'", "pattern%", parser)
+        assertEqual("'no-pattern\\%'", "no-pattern\\%", parser)
+
+        // tests that have different result regarding the conf
+        if (escape) {
+          // When SQLConf.ESCAPED_STRING_LITERALS is enabled, string literal parsing fallbacks to
+          // Spark 1.6 behavior.
+
+          // 'LIKE' string literals.
+          assertEqual("'pattern\\\\%'", "pattern\\\\%", parser)
+          assertEqual("'pattern\\\\\\%'", "pattern\\\\\\%", parser)
+
+          // Escaped characters.
+          // Unescape string literal "'\\0'" for ASCII NUL (X'00') doesn't work
+          // when ESCAPED_STRING_LITERALS is enabled.
+          // It is parsed literally.
+          assertEqual("'\\0'", "\\0", parser)
+
+          // Note: Single quote follows 1.6 parsing behavior
+          // when ESCAPED_STRING_LITERALS is enabled.
+          val e = intercept[ParseException](parser.parseExpression("'\''"))
+          assert(e.message.contains("extraneous input '''"))
+
+          // The unescape special characters (e.g., "\\t") for 2.0+ don't work
+          // when ESCAPED_STRING_LITERALS is enabled. They are parsed literally.
+          assertEqual("'\\\"'", "\\\"", parser) // Double quote
+          assertEqual("'\\b'", "\\b", parser) // Backspace
+          assertEqual("'\\n'", "\\n", parser) // Newline
+          assertEqual("'\\r'", "\\r", parser) // Carriage return
+          assertEqual("'\\t'", "\\t", parser) // Tab character
+
+          // The unescape Octals for 2.0+ don't work when ESCAPED_STRING_LITERALS is enabled.
+          // They are parsed literally.
+          assertEqual("'\\110\\145\\154\\154\\157\\041'", "\\110\\145\\154\\154\\157\\041", parser)
+          // The unescape Unicode for 2.0+ doesn't work when ESCAPED_STRING_LITERALS is enabled.
+          // They are parsed literally.
+          assertEqual("'\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029'",
+            "\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029", parser)
+        } else {
+          // Default behavior
+
+          // 'LIKE' string literals.
+          assertEqual("'pattern\\\\%'", "pattern\\%", parser)
+          assertEqual("'pattern\\\\\\%'", "pattern\\\\%", parser)
+
+          // Escaped characters.
+          // See: http://dev.mysql.com/doc/refman/5.7/en/string-literals.html
+          assertEqual("'\\0'", "\u0000", parser) // ASCII NUL (X'00')
+          assertEqual("'\\''", "\'", parser) // Single quote
+          assertEqual("'\\\"'", "\"", parser) // Double quote
+          assertEqual("'\\b'", "\b", parser) // Backspace
+          assertEqual("'\\n'", "\n", parser) // Newline
+          assertEqual("'\\r'", "\r", parser) // Carriage return
+          assertEqual("'\\t'", "\t", parser) // Tab character
+          assertEqual("'\\Z'", "\u001A", parser) // ASCII 26 - CTRL + Z (EOF on windows)
+
+          // Octals
+          assertEqual("'\\110\\145\\154\\154\\157\\041'", "Hello!", parser)
+
+          // Unicode
+          assertEqual("'\\u0057\\u006F\\u0072\\u006C\\u0064\\u0020\\u003A\\u0029'", "World :)",
+            parser)
+        }
 
+      }
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index b28effbcb5514..6c42c051fbba6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -42,10 +42,10 @@ import org.apache.spark.sql.types.StructType
 /**
  * Concrete parser for Spark SQL statements.
  */
-class SparkSqlParser(conf: SQLConf) extends AbstractSqlParser(conf) {
-  val astBuilder = new SparkSqlAstBuilder(conf)
+class SparkSqlParser extends AbstractSqlParser {
+  val astBuilder = new SparkSqlAstBuilder()
 
-  private val substitutor = new VariableSubstitution(conf)
+  private val substitutor = new VariableSubstitution()
 
   protected override def parse[T](command: String)(toResult: SqlBaseParser => T): T = {
     super.parse(substitutor.substitute(command))(toResult)
@@ -55,7 +55,7 @@ class SparkSqlParser(conf: SQLConf) extends AbstractSqlParser(conf) {
 /**
  * Builder that converts an ANTLR ParseTree into a LogicalPlan/Expression/TableIdentifier.
  */
-class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
+class SparkSqlAstBuilder extends AstBuilder {
   import org.apache.spark.sql.catalyst.parser.ParserUtils._
 
   private val configKeyValueDef = """([a-zA-Z_\d\\.:]+)\s*=([^;]*);*""".r
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 6bb9f7871edf2..8d6281882f188 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -1394,7 +1394,7 @@ object functions {
    */
   def expr(expr: String): Column = {
     val parser = SparkSession.getActiveSession.map(_.sessionState.sqlParser).getOrElse {
-      new SparkSqlParser(new SQLConf)
+      new SparkSqlParser()
     }
     Column(parser.parseExpression(expr))
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 3cef9f9df0daa..c5773400010fa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -131,7 +131,7 @@ abstract class BaseSessionStateBuilder(
    * Note: this depends on the `conf` field.
    */
   protected lazy val sqlParser: ParserInterface = {
-    extensions.buildParser(session, new SparkSqlParser(conf))
+    extensions.buildParser(session, new SparkSqlParser())
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
index 4e7c813be9922..2b9c574aaaf0c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
@@ -25,7 +25,9 @@ import org.apache.spark.internal.config._
  *
  * Variable substitution is controlled by `SQLConf.variableSubstituteEnabled`.
  */
-class VariableSubstitution(conf: SQLConf) {
+class VariableSubstitution {
+
+  private def conf = SQLConf.get
 
   private val provider = new ConfigProvider {
     override def get(key: String): Option[String] = Option(conf.getConfString(key, ""))
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/parse-schema-string.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/parse-schema-string.sql
new file mode 100644
index 0000000000000..42775102e650e
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/parse-schema-string.sql
@@ -0,0 +1 @@
+--IMPORT parse-schema-string.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
index 0445c7864946c..19b4c53702662 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
@@ -149,7 +149,7 @@ select to_timestamp('2019-10-06 A', 'yyyy-MM-dd GGGGG');
 select to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEEE');
 select to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE');
 select unix_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE');
-select from_json('{"time":"26/October/2015"}', 'time Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'));
-select from_json('{"date":"26/October/2015"}', 'date Date', map('dateFormat', 'dd/MMMMM/yyyy'));
-select from_csv('26/October/2015', 'time Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'));
-select from_csv('26/October/2015', 'date Date', map('dateFormat', 'dd/MMMMM/yyyy'));
+select from_json('{"t":"26/October/2015"}', 't Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'));
+select from_json('{"d":"26/October/2015"}', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy'));
+select from_csv('26/October/2015', 't Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'));
+select from_csv('26/October/2015', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy'));
diff --git a/sql/core/src/test/resources/sql-tests/inputs/parse-schema-string.sql b/sql/core/src/test/resources/sql-tests/inputs/parse-schema-string.sql
new file mode 100644
index 0000000000000..c67d45139fd6c
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/parse-schema-string.sql
@@ -0,0 +1,5 @@
+-- Use keywords as attribute names
+select from_csv('1', 'create INT');
+select from_csv('1', 'cube INT');
+select from_json('{"create":1}', 'create INT');
+select from_json('{"cube":1}', 'cube INT');
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
index 5fe0bd56bf8af..5b357fd064e41 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
@@ -898,7 +898,7 @@ You may get a different result due to the upgrading of Spark 3.0: Fail to recogn
 
 
 -- !query
-select from_json('{"time":"26/October/2015"}', 'time Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'))
+select from_json('{"t":"26/October/2015"}', 't Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'))
 -- !query schema
 struct<>
 -- !query output
@@ -907,7 +907,7 @@ You may get a different result due to the upgrading of Spark 3.0: Fail to recogn
 
 
 -- !query
-select from_json('{"date":"26/October/2015"}', 'date Date', map('dateFormat', 'dd/MMMMM/yyyy'))
+select from_json('{"d":"26/October/2015"}', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy'))
 -- !query schema
 struct<>
 -- !query output
@@ -916,7 +916,7 @@ You may get a different result due to the upgrading of Spark 3.0: Fail to recogn
 
 
 -- !query
-select from_csv('26/October/2015', 'time Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'))
+select from_csv('26/October/2015', 't Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'))
 -- !query schema
 struct<>
 -- !query output
@@ -925,7 +925,7 @@ You may get a different result due to the upgrading of Spark 3.0: Fail to recogn
 
 
 -- !query
-select from_csv('26/October/2015', 'date Date', map('dateFormat', 'dd/MMMMM/yyyy'))
+select from_csv('26/October/2015', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy'))
 -- !query schema
 struct<>
 -- !query output
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/parse-schema-string.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/parse-schema-string.sql.out
new file mode 100644
index 0000000000000..e12d988a57672
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/parse-schema-string.sql.out
@@ -0,0 +1,62 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 4
+
+
+-- !query
+select from_csv('1', 'create INT')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Cannot parse the data type: 
+no viable alternative at input 'create'(line 1, pos 0)
+
+== SQL ==
+create INT
+^^^
+
+Failed fallback parsing: 
+no viable alternative at input 'create'(line 1, pos 0)
+
+== SQL ==
+create INT
+^^^
+;; line 1 pos 7
+
+
+-- !query
+select from_csv('1', 'cube INT')
+-- !query schema
+struct<from_csv(1):struct<cube:int>>
+-- !query output
+{"cube":1}
+
+
+-- !query
+select from_json('{"create":1}', 'create INT')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Cannot parse the data type: 
+no viable alternative at input 'create'(line 1, pos 0)
+
+== SQL ==
+create INT
+^^^
+
+Failed fallback parsing: 
+no viable alternative at input 'create'(line 1, pos 0)
+
+== SQL ==
+create INT
+^^^
+;; line 1 pos 7
+
+
+-- !query
+select from_json('{"cube":1}', 'cube INT')
+-- !query schema
+struct<from_json({"cube":1}):struct<cube:int>>
+-- !query output
+{"cube":1}
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
index 3806764856f5b..8727b74d771ee 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
@@ -872,32 +872,32 @@ struct<unix_timestamp(22 05 2020 Friday, dd MM yyyy EEEEE):bigint>
 
 
 -- !query
-select from_json('{"time":"26/October/2015"}', 'time Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'))
+select from_json('{"t":"26/October/2015"}', 't Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'))
 -- !query schema
-struct<from_json({"time":"26/October/2015"}):struct<time:timestamp>>
+struct<from_json({"t":"26/October/2015"}):struct<t:timestamp>>
 -- !query output
-{"time":2015-10-26 00:00:00}
+{"t":2015-10-26 00:00:00}
 
 
 -- !query
-select from_json('{"date":"26/October/2015"}', 'date Date', map('dateFormat', 'dd/MMMMM/yyyy'))
+select from_json('{"d":"26/October/2015"}', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy'))
 -- !query schema
-struct<from_json({"date":"26/October/2015"}):struct<date:date>>
+struct<from_json({"d":"26/October/2015"}):struct<d:date>>
 -- !query output
-{"date":2015-10-26}
+{"d":2015-10-26}
 
 
 -- !query
-select from_csv('26/October/2015', 'time Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'))
+select from_csv('26/October/2015', 't Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'))
 -- !query schema
-struct<from_csv(26/October/2015):struct<time:timestamp>>
+struct<from_csv(26/October/2015):struct<t:timestamp>>
 -- !query output
-{"time":2015-10-26 00:00:00}
+{"t":2015-10-26 00:00:00}
 
 
 -- !query
-select from_csv('26/October/2015', 'date Date', map('dateFormat', 'dd/MMMMM/yyyy'))
+select from_csv('26/October/2015', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy'))
 -- !query schema
-struct<from_csv(26/October/2015):struct<date:date>>
+struct<from_csv(26/October/2015):struct<d:date>>
 -- !query output
-{"date":2015-10-26}
+{"d":2015-10-26}
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
index 5feeaa9addef7..850cc86d943d3 100755
--- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
@@ -876,7 +876,7 @@ You may get a different result due to the upgrading of Spark 3.0: Fail to recogn
 
 
 -- !query
-select from_json('{"time":"26/October/2015"}', 'time Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'))
+select from_json('{"t":"26/October/2015"}', 't Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'))
 -- !query schema
 struct<>
 -- !query output
@@ -885,7 +885,7 @@ You may get a different result due to the upgrading of Spark 3.0: Fail to recogn
 
 
 -- !query
-select from_json('{"date":"26/October/2015"}', 'date Date', map('dateFormat', 'dd/MMMMM/yyyy'))
+select from_json('{"d":"26/October/2015"}', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy'))
 -- !query schema
 struct<>
 -- !query output
@@ -894,7 +894,7 @@ You may get a different result due to the upgrading of Spark 3.0: Fail to recogn
 
 
 -- !query
-select from_csv('26/October/2015', 'time Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'))
+select from_csv('26/October/2015', 't Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'))
 -- !query schema
 struct<>
 -- !query output
@@ -903,7 +903,7 @@ You may get a different result due to the upgrading of Spark 3.0: Fail to recogn
 
 
 -- !query
-select from_csv('26/October/2015', 'date Date', map('dateFormat', 'dd/MMMMM/yyyy'))
+select from_csv('26/October/2015', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy'))
 -- !query schema
 struct<>
 -- !query output
diff --git a/sql/core/src/test/resources/sql-tests/results/parse-schema-string.sql.out b/sql/core/src/test/resources/sql-tests/results/parse-schema-string.sql.out
new file mode 100644
index 0000000000000..4440dd763bd2b
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/parse-schema-string.sql.out
@@ -0,0 +1,34 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 4
+
+
+-- !query
+select from_csv('1', 'create INT')
+-- !query schema
+struct<from_csv(1):struct<create:int>>
+-- !query output
+{"create":1}
+
+
+-- !query
+select from_csv('1', 'cube INT')
+-- !query schema
+struct<from_csv(1):struct<cube:int>>
+-- !query output
+{"cube":1}
+
+
+-- !query
+select from_json('{"create":1}', 'create INT')
+-- !query schema
+struct<from_json({"create":1}):struct<create:int>>
+-- !query output
+{"create":1}
+
+
+-- !query
+select from_json('{"cube":1}', 'cube INT')
+-- !query schema
+struct<from_json({"cube":1}):struct<cube:int>>
+-- !query output
+{"cube":1}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
index 5b4cd47742c00..f55fbc9809f71 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.expressions.{Ascending, AttributeReference,
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources.{CreateTable, CreateTempViewUsing, RefreshResource}
-import org.apache.spark.sql.internal.{HiveSerDe, SQLConf, StaticSQLConf}
+import org.apache.spark.sql.internal.{HiveSerDe, StaticSQLConf}
 import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType}
 
 /**
@@ -40,8 +40,7 @@ import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType
 class SparkSqlParserSuite extends AnalysisTest {
   import org.apache.spark.sql.catalyst.dsl.expressions._
 
-  val newConf = new SQLConf
-  private lazy val parser = new SparkSqlParser(newConf)
+  private lazy val parser = new SparkSqlParser()
 
   /**
    * Normalizes plans:
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
index c6a533dfae4d0..7fd7040f0f51d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
@@ -23,7 +23,6 @@ import java.util.Locale
 import scala.reflect.{classTag, ClassTag}
 
 import org.apache.spark.sql.{AnalysisException, SaveMode}
-import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAttribute}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.dsl.expressions._
@@ -32,15 +31,14 @@ import org.apache.spark.sql.catalyst.dsl.plans.DslLogicalPlan
 import org.apache.spark.sql.catalyst.expressions.JsonTuple
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform}
 import org.apache.spark.sql.execution.SparkSqlParser
 import org.apache.spark.sql.execution.datasources.CreateTable
-import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
+import org.apache.spark.sql.internal.HiveSerDe
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
+import org.apache.spark.sql.types.StructType
 
 class DDLParserSuite extends AnalysisTest with SharedSparkSession {
-  private lazy val parser = new SparkSqlParser(new SQLConf)
+  private lazy val parser = new SparkSqlParser()
 
   private def assertUnsupported(sql: String, containsThesePhrases: Seq[String] = Seq()): Unit = {
     val e = intercept[ParseException] {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala
index d5a946aeaac31..d5da2553c7186 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala
@@ -18,12 +18,11 @@
 package org.apache.spark.sql.internal
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.plans.SQLHelper
 
-class VariableSubstitutionSuite extends SparkFunSuite {
+class VariableSubstitutionSuite extends SparkFunSuite with SQLHelper {
 
-  private lazy val conf = new SQLConf
-  private lazy val sub = new VariableSubstitution(conf)
+  private lazy val sub = new VariableSubstitution()
 
   test("system property") {
     System.setProperty("varSubSuite.var", "abcd")
@@ -35,26 +34,26 @@ class VariableSubstitutionSuite extends SparkFunSuite {
   }
 
   test("Spark configuration variable") {
-    conf.setConfString("some-random-string-abcd", "1234abcd")
-    assert(sub.substitute("${hiveconf:some-random-string-abcd}") == "1234abcd")
-    assert(sub.substitute("${sparkconf:some-random-string-abcd}") == "1234abcd")
-    assert(sub.substitute("${spark:some-random-string-abcd}") == "1234abcd")
-    assert(sub.substitute("${some-random-string-abcd}") == "1234abcd")
+    withSQLConf("some-random-string-abcd" -> "1234abcd") {
+      assert(sub.substitute("${hiveconf:some-random-string-abcd}") == "1234abcd")
+      assert(sub.substitute("${sparkconf:some-random-string-abcd}") == "1234abcd")
+      assert(sub.substitute("${spark:some-random-string-abcd}") == "1234abcd")
+      assert(sub.substitute("${some-random-string-abcd}") == "1234abcd")
+    }
   }
 
   test("multiple substitutes") {
     val q = "select ${bar} ${foo} ${doo} this is great"
-    conf.setConfString("bar", "1")
-    conf.setConfString("foo", "2")
-    conf.setConfString("doo", "3")
-    assert(sub.substitute(q) == "select 1 2 3 this is great")
+    withSQLConf("bar"-> "1", "foo"-> "2", "doo" -> "3") {
+      assert(sub.substitute(q) == "select 1 2 3 this is great")
+    }
   }
 
   test("test nested substitutes") {
     val q = "select ${bar} ${foo} this is great"
-    conf.setConfString("bar", "1")
-    conf.setConfString("foo", "${bar}")
-    assert(sub.substitute(q) == "select 1 1 this is great")
+    withSQLConf("bar"-> "1", "foo"-> "${bar}") {
+      assert(sub.substitute(q) == "select 1 1 this is great")
+    }
   }
 
 }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index bc8cc16746a30..2e9975bcabc3f 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -306,7 +306,9 @@ private[hive] class SparkExecuteStatementOperation(
         parentSession.getSessionState.getConf.setClassLoader(executionHiveClassLoader)
       }
 
-      val substitutorStatement = new VariableSubstitution(sqlContext.conf).substitute(statement)
+      val substitutorStatement = SQLConf.withExistingConf(sqlContext.conf) {
+        new VariableSubstitution().substitute(statement)
+      }
       sqlContext.sparkContext.setJobGroup(statementId, substitutorStatement)
       result = sqlContext.sql(statement)
       logDebug(result.queryExecution.toString())
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
index 8faeee523d983..8bc762fe99233 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
@@ -30,7 +30,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, SQLContext}
 import org.apache.spark.sql.execution.{QueryExecution, SQLExecution}
 import org.apache.spark.sql.execution.HiveResult.hiveResultString
-import org.apache.spark.sql.internal.VariableSubstitution
+import org.apache.spark.sql.internal.{SQLConf, VariableSubstitution}
 
 
 private[hive] class SparkSQLDriver(val context: SQLContext = SparkSQLEnv.sqlContext)
@@ -60,7 +60,9 @@ private[hive] class SparkSQLDriver(val context: SQLContext = SparkSQLEnv.sqlCont
   override def run(command: String): CommandProcessorResponse = {
     // TODO unify the error code
     try {
-      val substitutorCommand = new VariableSubstitution(context.conf).substitute(command)
+      val substitutorCommand = SQLConf.withExistingConf(context.conf) {
+        new VariableSubstitution().substitute(command)
+      }
       context.sparkContext.setJobDescription(substitutorCommand)
       val execution = context.sessionState.executePlan(context.sql(command).logicalPlan)
       hiveResponse = SQLExecution.withNewExecutionId(execution) {

From eea846b8957badf016752264816dce55916628b4 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Sat, 14 Nov 2020 19:02:36 -0800
Subject: [PATCH 0473/1009] [SPARK-33455][SQL][TEST] Add
 SubExprEliminationBenchmark for benchmarking subexpression elimination

### What changes were proposed in this pull request?

This patch adds a benchmark `SubExprEliminationBenchmark` for benchmarking subexpression elimination feature.

### Why are the changes needed?

We need a benchmark for subexpression elimination feature for change such as #30341.

### Does this PR introduce _any_ user-facing change?

No, dev only.

### How was this patch tested?

Unit test.

Closes #30379 from viirya/SPARK-33455.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 ...ExprEliminationBenchmark-jdk11-results.txt |  15 +++
 .../SubExprEliminationBenchmark-results.txt   |  15 +++
 .../SubExprEliminationBenchmark.scala         | 118 ++++++++++++++++++
 .../benchmark/SqlBasedBenchmark.scala         |  23 ++++
 .../datasources/json/JsonBenchmark.scala      |  21 +---
 5 files changed, 172 insertions(+), 20 deletions(-)
 create mode 100644 sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt
 create mode 100644 sql/core/benchmarks/SubExprEliminationBenchmark-results.txt
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala

diff --git a/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt b/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..49dc7adccbf3c
--- /dev/null
+++ b/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt
@@ -0,0 +1,15 @@
+================================================================================================
+Benchmark for performance of subexpression elimination
+================================================================================================
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 11.0.9+11 on Mac OS X 10.15.6
+Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
+from_json as subExpr:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+subexpressionElimination off, codegen on           26809          27731         898          0.0   268094225.4       1.0X
+subexpressionElimination off, codegen off          25117          26612        1357          0.0   251166638.4       1.1X
+subexpressionElimination on, codegen on             2582           2906         282          0.0    25819408.7      10.4X
+subexpressionElimination on, codegen off           25635          26131         804          0.0   256346873.1       1.0X
+
+
diff --git a/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt b/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt
new file mode 100644
index 0000000000000..3f131726bc53d
--- /dev/null
+++ b/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt
@@ -0,0 +1,15 @@
+================================================================================================
+Benchmark for performance of subexpression elimination
+================================================================================================
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.6
+Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
+from_json as subExpr:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+subexpressionElimination off, codegen on           24841          25365         803          0.0   248412787.5       1.0X
+subexpressionElimination off, codegen off          25344          26205         941          0.0   253442656.5       1.0X
+subexpressionElimination on, codegen on             2883           3019         119          0.0    28833086.8       8.6X
+subexpressionElimination on, codegen off           24707          25688         903          0.0   247068775.9       1.0X
+
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala
new file mode 100644
index 0000000000000..34b4a70d05a25
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution
+
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * The benchmarks aims to measure performance of the queries where there are subexpression
+ * elimination or not.
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> --jars <spark core test jar>,
+ *        <spark catalyst test jar> <spark sql test jar>
+ *   2. build/sbt "sql/test:runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      Results will be written to "benchmarks/SubExprEliminationBenchmark-results.txt".
+ * }}}
+ */
+object SubExprEliminationBenchmark extends SqlBasedBenchmark {
+  import spark.implicits._
+
+  def withFromJson(rowsNum: Int, numIters: Int): Unit = {
+    val benchmark = new Benchmark("from_json as subExpr", rowsNum, output = output)
+
+    withTempPath { path =>
+      prepareDataInfo(benchmark)
+      val numCols = 1000
+      val schema = writeWideRow(path.getAbsolutePath, rowsNum, numCols)
+
+      val cols = (0 until numCols).map { idx =>
+        from_json('value, schema).getField(s"col$idx")
+      }
+
+      // We only benchmark subexpression performance under codegen/non-codegen, so disabling
+      // json optimization.
+      benchmark.addCase("subexpressionElimination off, codegen on", numIters) { _ =>
+        withSQLConf(
+          SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> "false",
+          SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true",
+          SQLConf.CODEGEN_FACTORY_MODE.key -> "CODEGEN_ONLY",
+          SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") {
+          val df = spark.read
+            .text(path.getAbsolutePath)
+            .select(cols: _*)
+          df.collect()
+        }
+      }
+
+      benchmark.addCase("subexpressionElimination off, codegen off", numIters) { _ =>
+        withSQLConf(
+          SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> "false",
+          SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false",
+          SQLConf.CODEGEN_FACTORY_MODE.key -> "NO_CODEGEN",
+          SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") {
+          val df = spark.read
+            .text(path.getAbsolutePath)
+            .select(cols: _*)
+          df.collect()
+        }
+      }
+
+      benchmark.addCase("subexpressionElimination on, codegen on", numIters) { _ =>
+        withSQLConf(
+            SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> "true",
+            SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true",
+            SQLConf.CODEGEN_FACTORY_MODE.key -> "CODEGEN_ONLY",
+            SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") {
+          val df = spark.read
+            .text(path.getAbsolutePath)
+            .select(cols: _*)
+          df.collect()
+        }
+      }
+
+      benchmark.addCase("subexpressionElimination on, codegen off", numIters) { _ =>
+        withSQLConf(
+          SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> "true",
+          SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false",
+          SQLConf.CODEGEN_FACTORY_MODE.key -> "NO_CODEGEN",
+          SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") {
+          val df = spark.read
+            .text(path.getAbsolutePath)
+            .select(cols: _*)
+          df.collect()
+        }
+      }
+
+      benchmark.run()
+    }
+  }
+
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    val numIters = 3
+    runBenchmark("Benchmark for performance of subexpression elimination") {
+      withFromJson(100, numIters)
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SqlBasedBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SqlBasedBenchmark.scala
index 28387dcef125b..98abe8daac670 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SqlBasedBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SqlBasedBenchmark.scala
@@ -22,7 +22,9 @@ import org.apache.spark.internal.config.UI.UI_ENABLED
 import org.apache.spark.sql.{Dataset, SparkSession}
 import org.apache.spark.sql.SaveMode.Overwrite
 import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.functions.lit
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
 
 /**
  * Common base trait to run benchmark with the Dataset and DataFrame API.
@@ -66,4 +68,25 @@ trait SqlBasedBenchmark extends BenchmarkBase with SQLHelper {
       ds.write.format("noop").mode(Overwrite).save()
     }
   }
+
+  protected def prepareDataInfo(benchmark: Benchmark): Unit = {
+    // scalastyle:off println
+    benchmark.out.println("Preparing data for benchmarking ...")
+    // scalastyle:on println
+  }
+
+  /**
+   * Prepares a table with wide row for benchmarking. The table will be written into
+   * the given path.
+   */
+  protected  def writeWideRow(path: String, rowsNum: Int, numCols: Int): StructType = {
+    val fields = Seq.tabulate(numCols)(i => StructField(s"col$i", IntegerType))
+    val schema = StructType(fields)
+
+    spark.range(rowsNum)
+      .select(Seq.tabulate(numCols)(i => lit(i).as(s"col$i")): _*)
+      .write.json(path)
+
+    schema
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
index 9ff35c0946cc9..ffe8e66f3368a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
@@ -39,16 +39,9 @@ import org.apache.spark.sql.types._
  *      Results will be written to "benchmarks/JSONBenchmark-results.txt".
  * }}}
  */
-
 object JsonBenchmark extends SqlBasedBenchmark {
   import spark.implicits._
 
-  private def prepareDataInfo(benchmark: Benchmark): Unit = {
-    // scalastyle:off println
-    benchmark.out.println("Preparing data for benchmarking ...")
-    // scalastyle:on println
-  }
-
   def schemaInferring(rowsNum: Int, numIters: Int): Unit = {
     val benchmark = new Benchmark("JSON schema inferring", rowsNum, output = output)
 
@@ -128,18 +121,6 @@ object JsonBenchmark extends SqlBasedBenchmark {
       .add("z", StringType)
   }
 
-  def writeWideRow(path: String, rowsNum: Int): StructType = {
-    val colsNum = 1000
-    val fields = Seq.tabulate(colsNum)(i => StructField(s"col$i", IntegerType))
-    val schema = StructType(fields)
-
-    spark.range(rowsNum)
-      .select(Seq.tabulate(colsNum)(i => lit(i).as(s"col$i")): _*)
-      .write.json(path)
-
-    schema
-  }
-
   def countWideColumn(rowsNum: Int, numIters: Int): Unit = {
     val benchmark = new Benchmark("count a wide column", rowsNum, output = output)
 
@@ -171,7 +152,7 @@ object JsonBenchmark extends SqlBasedBenchmark {
 
     withTempPath { path =>
       prepareDataInfo(benchmark)
-      val schema = writeWideRow(path.getAbsolutePath, rowsNum)
+      val schema = writeWideRow(path.getAbsolutePath, rowsNum, 1000)
 
       benchmark.addCase("No encoding", numIters) { _ =>
         spark.read

From 1ae6d64b5fb9c20e41deffaf670a42b0b545477f Mon Sep 17 00:00:00 2001
From: artiship <meilziner@gmail.com>
Date: Sun, 15 Nov 2020 16:56:35 -0800
Subject: [PATCH 0474/1009] [SPARK-33358][SQL] Return code when command process
 failed

Exit Spark SQL CLI processing loop if one of the commands (sub sql statement) process failed

This is a regression at Apache Spark 3.0.0.

```
$ cat 1.sql
select * from nonexistent_table;
select 2;
```

**Apache Spark 2.4.7**
```
spark-2.4.7-bin-hadoop2.7:$ bin/spark-sql -f 1.sql
20/11/15 16:14:38 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Error in query: Table or view not found: nonexistent_table; line 1 pos 14
```

**Apache Spark 3.0.1**
```
$ bin/spark-sql -f 1.sql
Error in query: Table or view not found: nonexistent_table; line 1 pos 14;
'Project [*]
+- 'UnresolvedRelation [nonexistent_table]

2
Time taken: 2.786 seconds, Fetched 1 row(s)
```

**Apache Hive 1.2.2**
```
apache-hive-1.2.2-bin:$ bin/hive -f 1.sql

Logging initialized using configuration in jar:file:/Users/dongjoon/APACHE/hive-release/apache-hive-1.2.2-bin/lib/hive-common-1.2.2.jar!/hive-log4j.properties
FAILED: SemanticException [Error 10001]: Line 1:14 Table not found 'nonexistent_table'
```

Yes. This is a fix of regression.

Pass the UT.

Closes #30263 from artiship/SPARK-33358.

Authored-by: artiship <meilziner@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index 57acd2d9bfd51..f2fd373bf6cc0 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -503,7 +503,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
             val ignoreErrors = HiveConf.getBoolVar(conf, HiveConf.ConfVars.CLIIGNOREERRORS)
             if (ret != 0 && !ignoreErrors) {
               CommandProcessorFactory.clean(conf.asInstanceOf[HiveConf])
-              ret
+              return ret
             }
           }
         }

From 52073ef8acae84ff94680af71bb3172237d4783b Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Mon, 16 Nov 2020 10:21:50 +0900
Subject: [PATCH 0475/1009] [SPARK-33254][PYTHON][DOCS] Migration to NumPy
 documentation style in Core (pyspark.*, pyspark.resource.*, etc.)

### What changes were proposed in this pull request?

This PR proposes migration of Core to NumPy documentation style.

### Why are the changes needed?

To improve documentation style.

### Does this PR introduce _any_ user-facing change?

Yes, this changes both rendered HTML docs and console representation (SPARK-33243).

### How was this patch tested?

dev/lint-python and manual inspection.

Closes #30320 from zero323/SPARK-33254.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/docs/source/reference/pyspark.rst |   3 +
 python/pyspark/__init__.py               |   4 +-
 python/pyspark/accumulators.py           | 151 +++----
 python/pyspark/broadcast.py              |  14 +-
 python/pyspark/conf.py                   | 126 +++---
 python/pyspark/context.py                | 342 +++++++++------
 python/pyspark/install.py                |  30 +-
 python/pyspark/java_gateway.py           |  27 +-
 python/pyspark/profiler.py               |   8 +-
 python/pyspark/rdd.py                    | 503 ++++++++++++++++++-----
 python/pyspark/resource/information.py   |  16 +-
 python/pyspark/resource/profile.py       |  12 +-
 python/pyspark/resource/requests.py      |  50 ++-
 python/pyspark/serializers.py            |   3 +
 python/pyspark/shuffle.py                |  11 +-
 python/pyspark/statcounter.py            |   2 +
 python/pyspark/taskcontext.py            |  55 ++-
 python/pyspark/testing/mlutils.py        |   3 +-
 python/pyspark/testing/streamingutils.py |  16 +-
 python/pyspark/testing/utils.py          |  28 +-
 python/pyspark/util.py                   |  12 +-
 python/pyspark/worker.py                 |   5 +-
 22 files changed, 984 insertions(+), 437 deletions(-)

diff --git a/python/docs/source/reference/pyspark.rst b/python/docs/source/reference/pyspark.rst
index fc0775eb7f8f5..49a8ff59aca0d 100644
--- a/python/docs/source/reference/pyspark.rst
+++ b/python/docs/source/reference/pyspark.rst
@@ -32,6 +32,7 @@ Public Classes
     RDD
     Broadcast
     Accumulator
+    AccumulatorParam
     SparkConf
     SparkFiles
     StorageLevel
@@ -226,6 +227,8 @@ Broadcast and Accumulator
     Broadcast.value
     Accumulator.add
     Accumulator.value
+    AccumulatorParam.addInPlace
+    AccumulatorParam.zero
 
 
 Management
diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py
index 19269e4466507..54b7f8373beee 100644
--- a/python/pyspark/__init__.py
+++ b/python/pyspark/__init__.py
@@ -102,7 +102,9 @@ def keyword_only(func):
     A decorator that forces keyword arguments in the wrapped method
     and saves actual input keyword arguments in `_input_kwargs`.
 
-    .. note:: Should only be used to wrap a method where first arg is `self`
+    Notes
+    -----
+    Should only be used to wrap a method where first arg is `self`
     """
     @wraps(func)
     def wrapper(self, *args, **kwargs):
diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py
index 2a19d233bc652..91e2b6abdd771 100644
--- a/python/pyspark/accumulators.py
+++ b/python/pyspark/accumulators.py
@@ -15,77 +15,6 @@
 # limitations under the License.
 #
 
-"""
->>> from pyspark.context import SparkContext
->>> sc = SparkContext('local', 'test')
->>> a = sc.accumulator(1)
->>> a.value
-1
->>> a.value = 2
->>> a.value
-2
->>> a += 5
->>> a.value
-7
-
->>> sc.accumulator(1.0).value
-1.0
-
->>> sc.accumulator(1j).value
-1j
-
->>> rdd = sc.parallelize([1,2,3])
->>> def f(x):
-...     global a
-...     a += x
->>> rdd.foreach(f)
->>> a.value
-13
-
->>> b = sc.accumulator(0)
->>> def g(x):
-...     b.add(x)
->>> rdd.foreach(g)
->>> b.value
-6
-
->>> from pyspark.accumulators import AccumulatorParam
->>> class VectorAccumulatorParam(AccumulatorParam):
-...     def zero(self, value):
-...         return [0.0] * len(value)
-...     def addInPlace(self, val1, val2):
-...         for i in range(len(val1)):
-...              val1[i] += val2[i]
-...         return val1
->>> va = sc.accumulator([1.0, 2.0, 3.0], VectorAccumulatorParam())
->>> va.value
-[1.0, 2.0, 3.0]
->>> def g(x):
-...     global va
-...     va += [x] * 3
->>> rdd.foreach(g)
->>> va.value
-[7.0, 8.0, 9.0]
-
->>> rdd.map(lambda x: a.value).collect() # doctest: +IGNORE_EXCEPTION_DETAIL
-Traceback (most recent call last):
-    ...
-Py4JJavaError:...
-
->>> def h(x):
-...     global a
-...     a.value = 7
->>> rdd.foreach(h) # doctest: +IGNORE_EXCEPTION_DETAIL
-Traceback (most recent call last):
-    ...
-Py4JJavaError:...
-
->>> sc.accumulator([1.0, 2.0, 3.0]) # doctest: +IGNORE_EXCEPTION_DETAIL
-Traceback (most recent call last):
-    ...
-TypeError:...
-"""
-
 import sys
 import select
 import struct
@@ -126,7 +55,54 @@ class Accumulator(object):
 
     While :class:`SparkContext` supports accumulators for primitive data types like :class:`int` and
     :class:`float`, users can also define accumulators for custom types by providing a custom
-    :class:`AccumulatorParam` object. Refer to the doctest of this module for an example.
+    :py:class:`AccumulatorParam` object. Refer to its doctest for an example.
+
+    Examples
+    --------
+    >>> a = sc.accumulator(1)
+    >>> a.value
+    1
+    >>> a.value = 2
+    >>> a.value
+    2
+    >>> a += 5
+    >>> a.value
+    7
+    >>> sc.accumulator(1.0).value
+    1.0
+    >>> sc.accumulator(1j).value
+    1j
+    >>> rdd = sc.parallelize([1,2,3])
+    >>> def f(x):
+    ...     global a
+    ...     a += x
+    >>> rdd.foreach(f)
+    >>> a.value
+    13
+    >>> b = sc.accumulator(0)
+    >>> def g(x):
+    ...     b.add(x)
+    >>> rdd.foreach(g)
+    >>> b.value
+    6
+
+    >>> rdd.map(lambda x: a.value).collect() # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    Py4JJavaError: ...
+
+    >>> def h(x):
+    ...     global a
+    ...     a.value = 7
+    >>> rdd.foreach(h) # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    Py4JJavaError: ...
+
+    >>> sc.accumulator([1.0, 2.0, 3.0]) # doctest: +IGNORE_EXCEPTION_DETAIL
+    Traceback (most recent call last):
+        ...
+    TypeError: ...
     """
 
     def __init__(self, aid, value, accum_param):
@@ -177,6 +153,27 @@ class AccumulatorParam(object):
 
     """
     Helper object that defines how to accumulate values of a given type.
+
+    Examples
+    --------
+    >>> from pyspark.accumulators import AccumulatorParam
+    >>> class VectorAccumulatorParam(AccumulatorParam):
+    ...     def zero(self, value):
+    ...         return [0.0] * len(value)
+    ...     def addInPlace(self, val1, val2):
+    ...         for i in range(len(val1)):
+    ...              val1[i] += val2[i]
+    ...         return val1
+    >>> va = sc.accumulator([1.0, 2.0, 3.0], VectorAccumulatorParam())
+    >>> va.value
+    [1.0, 2.0, 3.0]
+    >>> def g(x):
+    ...     global va
+    ...     va += [x] * 3
+    >>> rdd = sc.parallelize([1,2,3])
+    >>> rdd.foreach(g)
+    >>> va.value
+    [7.0, 8.0, 9.0]
     """
 
     def zero(self, value):
@@ -293,6 +290,14 @@ def _start_update_server(auth_token):
 
 if __name__ == "__main__":
     import doctest
-    (failure_count, test_count) = doctest.testmod()
+
+    from pyspark.context import SparkContext
+    globs = globals().copy()
+    # The small batch size here ensures that we see multiple batches,
+    # even in these small test examples:
+    globs['sc'] = SparkContext('local', 'test')
+    (failure_count, test_count) = doctest.testmod(
+        globs=globs, optionflags=doctest.ELLIPSIS)
+    globs['sc'].stop()
     if failure_count:
         sys.exit(-1)
diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py
index c2daf7600ff26..1f3b053c05d19 100644
--- a/python/pyspark/broadcast.py
+++ b/python/pyspark/broadcast.py
@@ -47,8 +47,8 @@ class Broadcast(object):
     A broadcast variable created with :meth:`SparkContext.broadcast`.
     Access its value through :attr:`value`.
 
-    Examples:
-
+    Examples
+    --------
     >>> from pyspark.context import SparkContext
     >>> sc = SparkContext('local', 'test')
     >>> b = sc.broadcast([1, 2, 3, 4, 5])
@@ -148,7 +148,10 @@ def unpersist(self, blocking=False):
         broadcast is used after this is called, it will need to be
         re-sent to each executor.
 
-        :param blocking: Whether to block until unpersisting has completed
+        Parameters
+        ----------
+        blocking : bool, optional
+            Whether to block until unpersisting has completed
         """
         if self._jbroadcast is None:
             raise Exception("Broadcast can only be unpersisted in driver")
@@ -163,6 +166,11 @@ def destroy(self, blocking=False):
         .. versionchanged:: 3.0.0
            Added optional argument `blocking` to specify whether to block until all
            blocks are deleted.
+
+        Parameters
+        ----------
+        blocking : bool, optional
+            Whether to block until unpersisting has completed
         """
         if self._jbroadcast is None:
             raise Exception("Broadcast can only be destroyed in driver")
diff --git a/python/pyspark/conf.py b/python/pyspark/conf.py
index efd8b6d633e0c..c4c3e43de60bf 100644
--- a/python/pyspark/conf.py
+++ b/python/pyspark/conf.py
@@ -15,53 +15,6 @@
 # limitations under the License.
 #
 
-"""
->>> from pyspark.conf import SparkConf
->>> from pyspark.context import SparkContext
->>> conf = SparkConf()
->>> conf.setMaster("local").setAppName("My app")
-<pyspark.conf.SparkConf object at ...>
->>> conf.get("spark.master")
-'local'
->>> conf.get("spark.app.name")
-'My app'
->>> sc = SparkContext(conf=conf)
->>> sc.master
-'local'
->>> sc.appName
-'My app'
->>> sc.sparkHome is None
-True
-
->>> conf = SparkConf(loadDefaults=False)
->>> conf.setSparkHome("/path")
-<pyspark.conf.SparkConf object at ...>
->>> conf.get("spark.home")
-'/path'
->>> conf.setExecutorEnv("VAR1", "value1")
-<pyspark.conf.SparkConf object at ...>
->>> conf.setExecutorEnv(pairs = [("VAR3", "value3"), ("VAR4", "value4")])
-<pyspark.conf.SparkConf object at ...>
->>> conf.get("spark.executorEnv.VAR1")
-'value1'
->>> print(conf.toDebugString())
-spark.executorEnv.VAR1=value1
-spark.executorEnv.VAR3=value3
-spark.executorEnv.VAR4=value4
-spark.home=/path
->>> sorted(conf.getAll(), key=lambda p: p[0])
-[('spark.executorEnv.VAR1', 'value1'), ('spark.executorEnv.VAR3', 'value3'), \
-('spark.executorEnv.VAR4', 'value4'), ('spark.home', '/path')]
->>> conf._jconf.setExecutorEnv("VAR5", "value5")
-JavaObject id...
->>> print(conf.toDebugString())
-spark.executorEnv.VAR1=value1
-spark.executorEnv.VAR3=value3
-spark.executorEnv.VAR4=value4
-spark.executorEnv.VAR5=value5
-spark.home=/path
-"""
-
 __all__ = ['SparkConf']
 
 import sys
@@ -85,20 +38,76 @@ class SparkConf(object):
     All setter methods in this class support chaining. For example,
     you can write ``conf.setMaster("local").setAppName("My app")``.
 
-    .. note:: Once a SparkConf object is passed to Spark, it is cloned
-        and can no longer be modified by the user.
+    Parameters
+    ----------
+    loadDefaults : bool
+        whether to load values from Java system properties (True by default)
+    _jvm : class:`py4j.java_gateway.JVMView`
+        internal parameter used to pass a handle to the
+        Java VM; does not need to be set by users
+    _jconf : class:`py4j.java_gateway.JavaObject`
+        Optionally pass in an existing SparkConf handle
+        to use its parameters
+
+    Notes
+    -----
+    Once a SparkConf object is passed to Spark, it is cloned
+    and can no longer be modified by the user.
+
+    Examples
+    --------
+    >>> from pyspark.conf import SparkConf
+    >>> from pyspark.context import SparkContext
+    >>> conf = SparkConf()
+    >>> conf.setMaster("local").setAppName("My app")
+    <pyspark.conf.SparkConf object at ...>
+    >>> conf.get("spark.master")
+    'local'
+    >>> conf.get("spark.app.name")
+    'My app'
+    >>> sc = SparkContext(conf=conf)
+    >>> sc.master
+    'local'
+    >>> sc.appName
+    'My app'
+    >>> sc.sparkHome is None
+    True
+
+    >>> conf = SparkConf(loadDefaults=False)
+    >>> conf.setSparkHome("/path")
+    <pyspark.conf.SparkConf object at ...>
+    >>> conf.get("spark.home")
+    '/path'
+    >>> conf.setExecutorEnv("VAR1", "value1")
+    <pyspark.conf.SparkConf object at ...>
+    >>> conf.setExecutorEnv(pairs = [("VAR3", "value3"), ("VAR4", "value4")])
+    <pyspark.conf.SparkConf object at ...>
+    >>> conf.get("spark.executorEnv.VAR1")
+    'value1'
+    >>> print(conf.toDebugString())
+    spark.executorEnv.VAR1=value1
+    spark.executorEnv.VAR3=value3
+    spark.executorEnv.VAR4=value4
+    spark.home=/path
+    >>> for p in sorted(conf.getAll(), key=lambda p: p[0]):
+    ...     print(p)
+    ('spark.executorEnv.VAR1', 'value1')
+    ('spark.executorEnv.VAR3', 'value3')
+    ('spark.executorEnv.VAR4', 'value4')
+    ('spark.home', '/path')
+    >>> conf._jconf.setExecutorEnv("VAR5", "value5")
+    JavaObject id...
+    >>> print(conf.toDebugString())
+    spark.executorEnv.VAR1=value1
+    spark.executorEnv.VAR3=value3
+    spark.executorEnv.VAR4=value4
+    spark.executorEnv.VAR5=value5
+    spark.home=/path
     """
 
     def __init__(self, loadDefaults=True, _jvm=None, _jconf=None):
         """
         Create a new Spark configuration.
-
-        :param loadDefaults: whether to load values from Java system
-               properties (True by default)
-        :param _jvm: internal parameter used to pass a handle to the
-               Java VM; does not need to be set by users
-        :param _jconf: Optionally pass in an existing SparkConf handle
-               to use its parameters
         """
         if _jconf:
             self._jconf = _jconf
@@ -160,7 +169,10 @@ def setAll(self, pairs):
         """
         Set multiple parameters, passed as a list of key-value pairs.
 
-        :param pairs: list of key-value pairs to set
+        Parameters
+        ----------
+        pairs : iterable of tuples
+            list of key-value pairs to set
         """
         for (k, v) in pairs:
             self.set(k, v)
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 3f1643e2d21ac..9c9e3f4b3c881 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -74,7 +74,7 @@ class SparkContext(object):
         A name for your job, to display on the cluster web UI.
     sparkHome : str, optional
         Location where Spark is installed on cluster nodes.
-    pyFiles : str, optional
+    pyFiles : list, optional
         Collection of .zip or .py files to send to the cluster
         and add to PYTHONPATH.  These can be paths on the local file
         system or HDFS, HTTP, HTTPS, or FTP URLs.
@@ -88,14 +88,14 @@ class SparkContext(object):
         batch size
     serializer : :class:`pyspark.serializers.Serializer`, optional
         The serializer for RDDs.
-    conf : dict, optional
-        A :class:`SparkConf` object setting Spark properties.
-    gateway : optional
+    conf : :py:class:`pyspark.SparkConf`, optional
+        An object setting Spark properties.
+    gateway : :py:class:`py4j.java_gateway.JavaGateway`,  optional
         Use an existing gateway and JVM, otherwise a new JVM
         will be instantiated. This is only used internally.
-    jsc : optional
+    jsc : :py:class:`py4j.java_gateway.JavaObject`, optional
         The JavaSparkContext instance. This is only used internally.
-    profiler_cls : :class:`pyspark.profiler.Profiler`, optional
+    profiler_cls : type, optional
         A class of custom Profiler used to do profiling
         (default is :class:`pyspark.profiler.BasicProfiler`).
 
@@ -115,7 +115,7 @@ class SparkContext(object):
     >>> sc2 = SparkContext('local', 'test2') # doctest: +IGNORE_EXCEPTION_DETAIL
     Traceback (most recent call last):
         ...
-    ValueError:...
+    ValueError: ...
     """
 
     _gateway = None
@@ -371,7 +371,9 @@ def getOrCreate(cls, conf=None):
         """
         Get or instantiate a SparkContext and register it as a singleton object.
 
-        :param conf: SparkConf (optional)
+        Parameters
+        ----------
+        conf : :py:class:`pyspark.SparkConf`, optional
         """
         with SparkContext._lock:
             if SparkContext._active_spark_context is None:
@@ -410,6 +412,8 @@ def applicationId(self):
         * in case of local spark app something like 'local-1433865536131'
         * in case of YARN something like 'application_1433865536131_34483'
 
+        Examples
+        --------
         >>> sc.applicationId  # doctest: +ELLIPSIS
         'local-...'
         """
@@ -476,12 +480,24 @@ def range(self, start, end=None, step=1, numSlices=None):
         way as python's built-in range() function. If called with a single argument,
         the argument is interpreted as `end`, and `start` is set to 0.
 
-        :param start: the start value
-        :param end: the end value (exclusive)
-        :param step: the incremental step (default: 1)
-        :param numSlices: the number of partitions of the new RDD
-        :return: An RDD of int
-
+        Parameters
+        ----------
+        start : int
+            the start value
+        end : int, optional
+            the end value (exclusive)
+        step : int, optional
+            the incremental step (default: 1)
+        numSlices : int, optional
+            the number of partitions of the new RDD
+
+        Returns
+        -------
+        :py:class:`pyspark.RDD`
+            An RDD of int
+
+        Examples
+        --------
         >>> sc.range(5).collect()
         [0, 1, 2, 3, 4]
         >>> sc.range(2, 4).collect()
@@ -500,6 +516,8 @@ def parallelize(self, c, numSlices=None):
         Distribute a local Python collection to form an RDD. Using range
         is recommended if the input represents a range for performance.
 
+        Examples
+        --------
         >>> sc.parallelize([0, 2, 3, 4, 6], 5).glom().collect()
         [[0], [2], [3], [4], [6]]
         >>> sc.parallelize(range(0, 6, 2), 5).glom().collect()
@@ -548,13 +566,18 @@ def _serialize_to_jvm(self, data, serializer, reader_func, createRDDServer):
         """
         Using py4j to send a large dataset to the jvm is really slow, so we use either a file
         or a socket if we have encryption enabled.
-        :param data:
-        :param serializer:
-        :param reader_func:  A function which takes a filename and reads in the data in the jvm and
-                returns a JavaRDD. Only used when encryption is disabled.
-        :param createRDDServer:  A function which creates a PythonRDDServer in the jvm to
-               accept the serialized data, for use when encryption is enabled.
-        :return:
+
+        Examples
+        --------
+        data
+            object to be serialized
+        serializer : :py:class:`pyspark.serializers.Serializer`
+        reader_func : function
+            A function which takes a filename and reads in the data in the jvm and
+            returns a JavaRDD. Only used when encryption is disabled.
+        createRDDServer : function
+            A function which creates a PythonRDDServer in the jvm to
+            accept the serialized data, for use when encryption is enabled.
         """
         if self._encryption_enabled:
             # with encryption, we open a server in java and send the data directly
@@ -585,6 +608,8 @@ def pickleFile(self, name, minPartitions=None):
         """
         Load an RDD previously saved using :meth:`RDD.saveAsPickleFile` method.
 
+        Examples
+        --------
         >>> tmpFile = NamedTemporaryFile(delete=True)
         >>> tmpFile.close()
         >>> sc.parallelize(range(10)).saveAsPickleFile(tmpFile.name, 5)
@@ -605,6 +630,8 @@ def textFile(self, name, minPartitions=None, use_unicode=True):
         as `utf-8`), which is faster and smaller than unicode. (Added in
         Spark 1.2)
 
+        Examples
+        --------
         >>> path = os.path.join(tempdir, "sample-text.txt")
         >>> with open(path, "w") as testFile:
         ...    _ = testFile.write("Hello world!")
@@ -625,7 +652,7 @@ def wholeTextFiles(self, path, minPartitions=None, use_unicode=True):
         value is the content of each file.
         The text files must be encoded as UTF-8.
 
-        If use_unicode is False, the strings will be kept as `str` (encoding
+        If `use_unicode` is False, the strings will be kept as `str` (encoding
         as `utf-8`), which is faster and smaller than unicode. (Added in
         Spark 1.2)
 
@@ -648,9 +675,12 @@ def wholeTextFiles(self, path, minPartitions=None, use_unicode=True):
             ...
             (a-hdfs-path/part-nnnnn, its content)
 
-        .. note:: Small files are preferred, as each file will be loaded
-            fully in memory.
+        Notes
+        -----
+        Small files are preferred, as each file will be loaded fully in memory.
 
+        Examples
+        --------
         >>> dirPath = os.path.join(tempdir, "files")
         >>> os.mkdir(dirPath)
         >>> with open(os.path.join(dirPath, "1.txt"), "w") as file1:
@@ -673,8 +703,9 @@ def binaryFiles(self, path, minPartitions=None):
         in a key-value pair, where the key is the path of each file, the
         value is the content of each file.
 
-        .. note:: Small files are preferred, large file is also allowable, but
-            may cause bad performance.
+        Notes
+        -----
+        Small files are preferred, large file is also allowable, but may cause bad performance.
         """
         minPartitions = minPartitions or self.defaultMinPartitions
         return RDD(self._jsc.binaryFiles(path, minPartitions), self,
@@ -686,8 +717,12 @@ def binaryRecords(self, path, recordLength):
         with the specified numerical format (see ByteBuffer), and the number of
         bytes per record is constant.
 
-        :param path: Directory to the input data files
-        :param recordLength: The length at which to split the records
+        Parameters
+        ----------
+        path : str
+            Directory to the input data files
+        recordLength : int
+            The length at which to split the records
         """
         return RDD(self._jsc.binaryRecords(path, recordLength), self, NoOpSerializer())
 
@@ -712,17 +747,24 @@ def sequenceFile(self, path, keyClass=None, valueClass=None, keyConverter=None,
             3. If this fails, the fallback is to call 'toString' on each key and value
             4. :class:`PickleSerializer` is used to deserialize pickled objects on the Python side
 
-        :param path: path to sequncefile
-        :param keyClass: fully qualified classname of key Writable class
-               (e.g. "org.apache.hadoop.io.Text")
-        :param valueClass: fully qualified classname of value Writable class
-               (e.g. "org.apache.hadoop.io.LongWritable")
-        :param keyConverter:
-        :param valueConverter:
-        :param minSplits: minimum splits in dataset
-               (default min(2, sc.defaultParallelism))
-        :param batchSize: The number of Python objects represented as a single
-               Java object. (default 0, choose batchSize automatically)
+        Parameters
+        ----------
+        path : str
+            path to sequencefile
+        keyClass: str, optional
+            fully qualified classname of key Writable class (e.g. "org.apache.hadoop.io.Text")
+        valueClass : str, optional
+            fully qualified classname of value Writable class
+            (e.g. "org.apache.hadoop.io.LongWritable")
+        keyConverter : str, optional
+            fully qualified name of a function returning key WritableConverter
+        valueConverter : str, optional
+            fully qualifiedname of a function returning value WritableConverter
+        minSplits : int, optional
+            minimum splits in dataset (default min(2, sc.defaultParallelism))
+        batchSize : int, optional
+            The number of Python objects represented as a single
+            Java object. (default 0, choose batchSize automatically)
         """
         minSplits = minSplits or min(self.defaultParallelism, 2)
         jrdd = self._jvm.PythonRDD.sequenceFile(self._jsc, path, keyClass, valueClass,
@@ -734,24 +776,36 @@ def newAPIHadoopFile(self, path, inputFormatClass, keyClass, valueClass, keyConv
         """
         Read a 'new API' Hadoop InputFormat with arbitrary key and value class from HDFS,
         a local file system (available on all nodes), or any Hadoop-supported file system URI.
-        The mechanism is the same as for sc.sequenceFile.
+        The mechanism is the same as for :py:meth:`SparkContext.sequenceFile`.
 
         A Hadoop configuration can be passed in as a Python dict. This will be converted into a
         Configuration in Java
 
-        :param path: path to Hadoop file
-        :param inputFormatClass: fully qualified classname of Hadoop InputFormat
-               (e.g. "org.apache.hadoop.mapreduce.lib.input.TextInputFormat")
-        :param keyClass: fully qualified classname of key Writable class
-               (e.g. "org.apache.hadoop.io.Text")
-        :param valueClass: fully qualified classname of value Writable class
-               (e.g. "org.apache.hadoop.io.LongWritable")
-        :param keyConverter: (None by default)
-        :param valueConverter: (None by default)
-        :param conf: Hadoop configuration, passed in as a dict
-               (None by default)
-        :param batchSize: The number of Python objects represented as a single
-               Java object. (default 0, choose batchSize automatically)
+        Parameters
+        ----------
+        path : str
+            path to Hadoop file
+        inputFormatClass : str
+            fully qualified classname of Hadoop InputFormat
+            (e.g. "org.apache.hadoop.mapreduce.lib.input.TextInputFormat")
+        keyClass : str
+            fully qualified classname of key Writable class
+            (e.g. "org.apache.hadoop.io.Text")
+        valueClass : str
+            fully qualified classname of value Writable class
+            (e.g. "org.apache.hadoop.io.LongWritable")
+        keyConverter : str, optional
+            fully qualified name of a function returning key WritableConverter
+            None by default
+        valueConverter : str, optional
+            fully qualified name of a function returning value WritableConverter
+            None by default
+        conf : dict, optional
+            Hadoop configuration, passed in as a dict
+            None by default
+        batchSize : int, optional
+            The number of Python objects represented as a single
+            Java object. (default 0, choose batchSize automatically)
         """
         jconf = self._dictToJavaMap(conf)
         jrdd = self._jvm.PythonRDD.newAPIHadoopFile(self._jsc, path, inputFormatClass, keyClass,
@@ -765,20 +819,29 @@ def newAPIHadoopRDD(self, inputFormatClass, keyClass, valueClass, keyConverter=N
         Read a 'new API' Hadoop InputFormat with arbitrary key and value class, from an arbitrary
         Hadoop configuration, which is passed in as a Python dict.
         This will be converted into a Configuration in Java.
-        The mechanism is the same as for sc.sequenceFile.
-
-        :param inputFormatClass: fully qualified classname of Hadoop InputFormat
-               (e.g. "org.apache.hadoop.mapreduce.lib.input.TextInputFormat")
-        :param keyClass: fully qualified classname of key Writable class
-               (e.g. "org.apache.hadoop.io.Text")
-        :param valueClass: fully qualified classname of value Writable class
-               (e.g. "org.apache.hadoop.io.LongWritable")
-        :param keyConverter: (None by default)
-        :param valueConverter: (None by default)
-        :param conf: Hadoop configuration, passed in as a dict
-               (None by default)
-        :param batchSize: The number of Python objects represented as a single
-               Java object. (default 0, choose batchSize automatically)
+        The mechanism is the same as for :py:meth:`SparkContext.sequenceFile`.
+
+        Parameters
+        ----------
+        inputFormatClass : str
+            fully qualified classname of Hadoop InputFormat
+            (e.g. "org.apache.hadoop.mapreduce.lib.input.TextInputFormat")
+        keyClass : str
+            fully qualified classname of key Writable class (e.g. "org.apache.hadoop.io.Text")
+        valueClass : str
+            fully qualified classname of value Writable class
+            (e.g. "org.apache.hadoop.io.LongWritable")
+        keyConverter : str, optional
+            fully qualified name of a function returning key WritableConverter
+            (None by default)
+        valueConverter : str, optional
+            fully qualified name of a function returning value WritableConverter
+            (None by default)
+        conf : dict, optional
+            Hadoop configuration, passed in as a dict (None by default)
+        batchSize : int, optional
+            The number of Python objects represented as a single
+            Java object. (default 0, choose batchSize automatically)
         """
         jconf = self._dictToJavaMap(conf)
         jrdd = self._jvm.PythonRDD.newAPIHadoopRDD(self._jsc, inputFormatClass, keyClass,
@@ -791,24 +854,32 @@ def hadoopFile(self, path, inputFormatClass, keyClass, valueClass, keyConverter=
         """
         Read an 'old' Hadoop InputFormat with arbitrary key and value class from HDFS,
         a local file system (available on all nodes), or any Hadoop-supported file system URI.
-        The mechanism is the same as for sc.sequenceFile.
+        The mechanism is the same as for :py:meth:`SparkContext.sequenceFile`.
 
         A Hadoop configuration can be passed in as a Python dict. This will be converted into a
         Configuration in Java.
 
-        :param path: path to Hadoop file
-        :param inputFormatClass: fully qualified classname of Hadoop InputFormat
-               (e.g. "org.apache.hadoop.mapred.TextInputFormat")
-        :param keyClass: fully qualified classname of key Writable class
-               (e.g. "org.apache.hadoop.io.Text")
-        :param valueClass: fully qualified classname of value Writable class
-               (e.g. "org.apache.hadoop.io.LongWritable")
-        :param keyConverter: (None by default)
-        :param valueConverter: (None by default)
-        :param conf: Hadoop configuration, passed in as a dict
-               (None by default)
-        :param batchSize: The number of Python objects represented as a single
-               Java object. (default 0, choose batchSize automatically)
+        path : str
+            path to Hadoop file
+        inputFormatClass : str
+            fully qualified classname of Hadoop InputFormat
+            (e.g. "org.apache.hadoop.mapreduce.lib.input.TextInputFormat")
+        keyClass : str
+            fully qualified classname of key Writable class (e.g. "org.apache.hadoop.io.Text")
+        valueClass : str
+            fully qualified classname of value Writable class
+            (e.g. "org.apache.hadoop.io.LongWritable")
+        keyConverter : str, optional
+            fully qualified name of a function returning key WritableConverter
+            (None by default)
+        valueConverter : str, optional
+            fully qualified name of a function returning value WritableConverter
+            (None by default)
+        conf : dict, optional
+            Hadoop configuration, passed in as a dict (None by default)
+        batchSize : int, optional
+            The number of Python objects represented as a single
+            Java object. (default 0, choose batchSize automatically)
         """
         jconf = self._dictToJavaMap(conf)
         jrdd = self._jvm.PythonRDD.hadoopFile(self._jsc, path, inputFormatClass, keyClass,
@@ -822,20 +893,29 @@ def hadoopRDD(self, inputFormatClass, keyClass, valueClass, keyConverter=None,
         Read an 'old' Hadoop InputFormat with arbitrary key and value class, from an arbitrary
         Hadoop configuration, which is passed in as a Python dict.
         This will be converted into a Configuration in Java.
-        The mechanism is the same as for sc.sequenceFile.
-
-        :param inputFormatClass: fully qualified classname of Hadoop InputFormat
-               (e.g. "org.apache.hadoop.mapred.TextInputFormat")
-        :param keyClass: fully qualified classname of key Writable class
-               (e.g. "org.apache.hadoop.io.Text")
-        :param valueClass: fully qualified classname of value Writable class
-               (e.g. "org.apache.hadoop.io.LongWritable")
-        :param keyConverter: (None by default)
-        :param valueConverter: (None by default)
-        :param conf: Hadoop configuration, passed in as a dict
-               (None by default)
-        :param batchSize: The number of Python objects represented as a single
-               Java object. (default 0, choose batchSize automatically)
+        The mechanism is the same as for :py:meth:`SparkContext.sequenceFile`.
+
+        Parameters
+        ----------
+        inputFormatClass : str
+            fully qualified classname of Hadoop InputFormat
+            (e.g. "org.apache.hadoop.mapreduce.lib.input.TextInputFormat")
+        keyClass : str
+            fully qualified classname of key Writable class (e.g. "org.apache.hadoop.io.Text")
+        valueClass : str
+            fully qualified classname of value Writable class
+            (e.g. "org.apache.hadoop.io.LongWritable")
+        keyConverter : str, optional
+            fully qualified name of a function returning key WritableConverter
+            (None by default)
+        valueConverter : str, optional
+            fully qualified name of a function returning value WritableConverter
+            (None by default)
+        conf : dict, optional
+            Hadoop configuration, passed in as a dict (None by default)
+        batchSize : int, optional
+            The number of Python objects represented as a single
+            Java object. (default 0, choose batchSize automatically)
         """
         jconf = self._dictToJavaMap(conf)
         jrdd = self._jvm.PythonRDD.hadoopRDD(self._jsc, inputFormatClass, keyClass,
@@ -855,6 +935,8 @@ def union(self, rdds):
         although this forces them to be reserialized using the default
         serializer:
 
+        Examples
+        --------
         >>> path = os.path.join(tempdir, "union-text.txt")
         >>> with open(path, "w") as testFile:
         ...    _ = testFile.write("Hello")
@@ -928,8 +1010,12 @@ def addFile(self, path, recursive=False):
         A directory can be given if the recursive option is set to True.
         Currently directories are only supported for Hadoop-supported filesystems.
 
-        .. note:: A path can be added only once. Subsequent additions of the same path are ignored.
+        Notes
+        -----
+        A path can be added only once. Subsequent additions of the same path are ignored.
 
+        Examples
+        --------
         >>> from pyspark import SparkFiles
         >>> path = os.path.join(tempdir, "test.txt")
         >>> with open(path, "w") as testFile:
@@ -951,7 +1037,9 @@ def addPyFile(self, path):
         file, a file in HDFS (or other Hadoop-supported filesystems), or an
         HTTP, HTTPS or FTP URI.
 
-        .. note:: A path can be added only once. Subsequent additions of the same path are ignored.
+        Notes
+        -----
+        A path can be added only once. Subsequent additions of the same path are ignored.
         """
         self.addFile(path)
         (dirname, filename) = os.path.split(path)  # dirname may be directory or HDFS/S3 prefix
@@ -1005,6 +1093,23 @@ def setJobGroup(self, groupId, description, interruptOnCancel=False):
         The application can use :meth:`SparkContext.cancelJobGroup` to cancel all
         running jobs in this group.
 
+        Notes
+        -----
+        If interruptOnCancel is set to true for the job group, then job cancellation will result
+        in Thread.interrupt() being called on the job's executor threads. This is useful to help
+        ensure that the tasks are actually stopped in a timely manner, but is off by default due
+        to HDFS-1208, where HDFS may respond to Thread.interrupt() by marking nodes as dead.
+
+        Currently, setting a group ID (set to local properties) with multiple threads
+        does not properly work. Internally threads on PVM and JVM are not synced, and JVM
+        thread can be reused for multiple threads on PVM, which fails to isolate local
+        properties for each thread on PVM.
+
+        To avoid this, enable the pinned thread mode by setting ``PYSPARK_PIN_THREAD``
+        environment variable to ``true`` and uses :class:`pyspark.InheritableThread`.
+
+        Examples
+        --------
         >>> import threading
         >>> from time import sleep
         >>> result = "Not Set"
@@ -1029,19 +1134,6 @@ def setJobGroup(self, groupId, description, interruptOnCancel=False):
         >>> suppress = lock.acquire()
         >>> print(result)
         Cancelled
-
-        If interruptOnCancel is set to true for the job group, then job cancellation will result
-        in Thread.interrupt() being called on the job's executor threads. This is useful to help
-        ensure that the tasks are actually stopped in a timely manner, but is off by default due
-        to HDFS-1208, where HDFS may respond to Thread.interrupt() by marking nodes as dead.
-
-        .. note:: Currently, setting a group ID (set to local properties) with multiple threads
-            does not properly work. Internally threads on PVM and JVM are not synced, and JVM
-            thread can be reused for multiple threads on PVM, which fails to isolate local
-            properties for each thread on PVM.
-
-            To avoid this, enable the pinned thread mode by setting ``PYSPARK_PIN_THREAD``
-            environment variable to ``true`` and uses :class:`pyspark.InheritableThread`.
         """
         self._jsc.setJobGroup(groupId, description, interruptOnCancel)
 
@@ -1050,13 +1142,15 @@ def setLocalProperty(self, key, value):
         Set a local property that affects jobs submitted from this thread, such as the
         Spark fair scheduler pool.
 
-        .. note:: Currently, setting a local property with multiple threads does not properly work.
-            Internally threads on PVM and JVM are not synced, and JVM thread
-            can be reused for multiple threads on PVM, which fails to isolate local properties
-            for each thread on PVM.
+        Notes
+        -----
+        Currently, setting a local property with multiple threads does not properly work.
+        Internally threads on PVM and JVM are not synced, and JVM thread
+        can be reused for multiple threads on PVM, which fails to isolate local properties
+        for each thread on PVM.
 
-            To avoid this, enable the pinned thread mode by setting ``PYSPARK_PIN_THREAD``
-            environment variable to ``true`` and uses :class:`pyspark.InheritableThread`.
+        To avoid this, enable the pinned thread mode by setting ``PYSPARK_PIN_THREAD``
+        environment variable to ``true`` and uses :class:`pyspark.InheritableThread`.
         """
         self._jsc.setLocalProperty(key, value)
 
@@ -1071,13 +1165,15 @@ def setJobDescription(self, value):
         """
         Set a human readable description of the current job.
 
-        .. note:: Currently, setting a job description (set to local properties) with multiple
-            threads does not properly work. Internally threads on PVM and JVM are not synced,
-            and JVM thread can be reused for multiple threads on PVM, which fails to isolate
-            local properties for each thread on PVM.
+        Notes
+        -----
+        Currently, setting a job description (set to local properties) with multiple
+        threads does not properly work. Internally threads on PVM and JVM are not synced,
+        and JVM thread can be reused for multiple threads on PVM, which fails to isolate
+        local properties for each thread on PVM.
 
-            To avoid this, enable the pinned thread mode by setting ``PYSPARK_PIN_THREAD``
-            environment variable to ``true`` and uses :class:`pyspark.InheritableThread`.
+        To avoid this, enable the pinned thread mode by setting ``PYSPARK_PIN_THREAD``
+        environment variable to ``true`` and uses :class:`pyspark.InheritableThread`.
         """
         self._jsc.setJobDescription(value)
 
@@ -1113,6 +1209,8 @@ def runJob(self, rdd, partitionFunc, partitions=None, allowLocal=False):
 
         If 'partitions' is not specified, this will run over all partitions.
 
+        Examples
+        --------
         >>> myRDD = sc.parallelize(range(6), 3)
         >>> sc.runJob(myRDD, lambda part: [x * x for x in part])
         [0, 1, 4, 9, 16, 25]
diff --git a/python/pyspark/install.py b/python/pyspark/install.py
index 2de7b21832abf..7efee42bac609 100644
--- a/python/pyspark/install.py
+++ b/python/pyspark/install.py
@@ -39,13 +39,21 @@ def checked_versions(spark_version, hadoop_version, hive_version):
     """
     Check the valid combinations of supported versions in Spark distributions.
 
-    :param spark_version: Spark version. It should be X.X.X such as '3.0.0' or spark-3.0.0.
-    :param hadoop_version: Hadoop version. It should be X.X such as '2.7' or 'hadoop2.7'.
+    Parameters
+    ----------
+    spark_version : str
+        Spark version. It should be X.X.X such as '3.0.0' or spark-3.0.0.
+    hadoop_version : str
+        Hadoop version. It should be X.X such as '2.7' or 'hadoop2.7'.
         'without' and 'without-hadoop' are supported as special keywords for Hadoop free
         distribution.
-    :param hive_version: Hive version. It should be X.X such as '2.3' or 'hive2.3'.
+    hive_version : str
+        Hive version. It should be X.X such as '2.3' or 'hive2.3'.
 
-    :return it returns fully-qualified versions of Spark, Hadoop and Hive in a tuple.
+    Parameters
+    ----------
+    tuple
+        fully-qualified versions of Spark, Hadoop and Hive in a tuple.
         For example, spark-3.0.0, hadoop3.2 and hive2.3.
     """
     if re.match("^[0-9]+\\.[0-9]+\\.[0-9]+$", spark_version):
@@ -83,11 +91,17 @@ def install_spark(dest, spark_version, hadoop_version, hive_version):
     Installs Spark that corresponds to the given Hadoop version in the current
     library directory.
 
-    :param dest: The location to download and install the Spark.
-    :param spark_version: Spark version. It should be spark-X.X.X form.
-    :param hadoop_version: Hadoop version. It should be hadoopX.X
+    Parameters
+    ----------
+    dest : str
+        The location to download and install the Spark.
+    spark_version : str
+        Spark version. It should be spark-X.X.X form.
+    hadoop_version : str
+        Hadoop version. It should be hadoopX.X
         such as 'hadoop2.7' or 'without-hadoop'.
-    :param hive_version: Hive version. It should be hiveX.X such as 'hive2.3'.
+    hive_version : str
+        Hive version. It should be hiveX.X such as 'hive2.3'.
     """
 
     package_name = checked_package_name(spark_version, hadoop_version, hive_version)
diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py
index fba92a96ae1a1..eafa5d90f9ff8 100644
--- a/python/pyspark/java_gateway.py
+++ b/python/pyspark/java_gateway.py
@@ -35,12 +35,20 @@
 def launch_gateway(conf=None, popen_kwargs=None):
     """
     launch jvm gateway
-    :param conf: spark configuration passed to spark-submit
-    :param popen_kwargs: Dictionary of kwargs to pass to Popen when spawning
+
+    Parameters
+    ----------
+    conf : :py:class:`pyspark.SparkConf`
+        spark configuration passed to spark-submit
+    popen_kwargs : dict
+        Dictionary of kwargs to pass to Popen when spawning
         the py4j JVM. This is a developer feature intended for use in
         customizing how pyspark interacts with the py4j JVM (e.g., capturing
         stdout/stderr).
-    :return:
+
+    Returns
+    -------
+    ClientServer or JavaGateway
     """
     if "PYSPARK_GATEWAY_PORT" in os.environ:
         gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"])
@@ -174,9 +182,16 @@ def local_connect_and_auth(port, auth_secret):
     """
     Connect to local host, authenticate with it, and return a (sockfile,sock) for that connection.
     Handles IPV4 & IPV6, does some error handling.
-    :param port
-    :param auth_secret
-    :return: a tuple with (sockfile, sock)
+
+    Parameters
+    ----------
+    port : str or int or None
+    auth_secret : str
+
+    Returns
+    -------
+    tuple
+        with (sockfile, sock)
     """
     sock = None
     errors = []
diff --git a/python/pyspark/profiler.py b/python/pyspark/profiler.py
index b9423b7604873..99cfe71fda2b0 100644
--- a/python/pyspark/profiler.py
+++ b/python/pyspark/profiler.py
@@ -67,8 +67,6 @@ def show_profiles(self):
 
 class Profiler(object):
     """
-    .. note:: DeveloperApi
-
     PySpark supports custom profilers, this is to allow for different profilers to
     be used as well as outputting to different formats than what is provided in the
     BasicProfiler.
@@ -81,6 +79,8 @@ class Profiler(object):
 
     The profiler class is chosen when creating a SparkContext
 
+    Examples
+    --------
     >>> from pyspark import SparkConf, SparkContext
     >>> from pyspark import BasicProfiler
     >>> class MyCustomProfiler(BasicProfiler):
@@ -97,6 +97,10 @@ class Profiler(object):
     My custom profiles for RDD:1
     My custom profiles for RDD:3
     >>> sc.stop()
+
+    Notes
+    -----
+    This API is a developer API.
     """
 
     def __init__(self, ctx):
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index ed4e387d1b561..1964070040cdf 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -80,6 +80,8 @@ def portable_hash(x):
 
     The algorithm is similar to that one used by CPython 2.7
 
+    Examples
+    --------
     >>> portable_hash(None)
     0
     >>> portable_hash((None, 1)) & 0xffffffff
@@ -109,6 +111,8 @@ class BoundedFloat(float):
     Bounded value is generated by approximate job, with confidence and low
     bound and high bound.
 
+    Examples
+    --------
     >>> BoundedFloat(100.0, 0.95, 95.0, 105.0)
     100.0
     """
@@ -124,8 +128,14 @@ def _create_local_socket(sock_info):
     """
     Create a local socket that can be used to load deserialized data from the JVM
 
-    :param sock_info: Tuple containing port number and authentication secret for a local socket.
-    :return: sockfile file descriptor of the local socket
+    Parameters
+    ----------
+    sock_info : tuple
+        Tuple containing port number and authentication secret for a local socket.
+
+    Returns
+    -------
+    sockfile file descriptor of the local socket
     """
     port = sock_info[0]
     auth_secret = sock_info[1]
@@ -140,9 +150,17 @@ def _load_from_socket(sock_info, serializer):
     """
     Connect to a local socket described by sock_info and use the given serializer to yield data
 
-    :param sock_info: Tuple containing port number and authentication secret for a local socket.
-    :param serializer: The PySpark serializer to use
-    :return: result of Serializer.load_stream, usually a generator that yields deserialized data
+    Parameters
+    ----------
+    sock_info : tuple
+        Tuple containing port number and authentication secret for a local socket.
+    serializer : :py:class:`Serializer`
+        The PySpark serializer to use
+
+    Returns
+    -------
+    result of :py:meth:`Serializer.load_stream`,
+    usually a generator that yields deserialized data
     """
     sockfile = _create_local_socket(sock_info)
     # The socket will be automatically closed when garbage-collected.
@@ -273,6 +291,8 @@ def persist(self, storageLevel=StorageLevel.MEMORY_ONLY):
         a new storage level if the RDD does not have a storage level set yet.
         If no storage level is specified defaults to (`MEMORY_ONLY`).
 
+        Examples
+        --------
         >>> rdd = sc.parallelize(["b", "a", "c"])
         >>> rdd.persist().is_cached
         True
@@ -356,6 +376,8 @@ def map(self, f, preservesPartitioning=False):
         """
         Return a new RDD by applying a function to each element of this RDD.
 
+        Examples
+        --------
         >>> rdd = sc.parallelize(["b", "a", "c"])
         >>> sorted(rdd.map(lambda x: (x, 1)).collect())
         [('a', 1), ('b', 1), ('c', 1)]
@@ -369,6 +391,8 @@ def flatMap(self, f, preservesPartitioning=False):
         Return a new RDD by first applying a function to all elements of this
         RDD, and then flattening the results.
 
+        Examples
+        --------
         >>> rdd = sc.parallelize([2, 3, 4])
         >>> sorted(rdd.flatMap(lambda x: range(1, x)).collect())
         [1, 1, 1, 2, 2, 3]
@@ -383,6 +407,8 @@ def mapPartitions(self, f, preservesPartitioning=False):
         """
         Return a new RDD by applying a function to each partition of this RDD.
 
+        Examples
+        --------
         >>> rdd = sc.parallelize([1, 2, 3, 4], 2)
         >>> def f(iterator): yield sum(iterator)
         >>> rdd.mapPartitions(f).collect()
@@ -397,6 +423,8 @@ def mapPartitionsWithIndex(self, f, preservesPartitioning=False):
         Return a new RDD by applying a function to each partition of this RDD,
         while tracking the index of the original partition.
 
+        Examples
+        --------
         >>> rdd = sc.parallelize([1, 2, 3, 4], 4)
         >>> def f(splitIndex, iterator): yield splitIndex
         >>> rdd.mapPartitionsWithIndex(f).sum()
@@ -406,11 +434,15 @@ def mapPartitionsWithIndex(self, f, preservesPartitioning=False):
 
     def mapPartitionsWithSplit(self, f, preservesPartitioning=False):
         """
-        Deprecated: use mapPartitionsWithIndex instead.
 
         Return a new RDD by applying a function to each partition of this RDD,
         while tracking the index of the original partition.
 
+        .. deprecated:: 0.9.0
+            use :py:meth:`RDD.mapPartitionsWithIndex` instead.
+
+        Examples
+        --------
         >>> rdd = sc.parallelize([1, 2, 3, 4], 4)
         >>> def f(splitIndex, iterator): yield splitIndex
         >>> rdd.mapPartitionsWithSplit(f).sum()
@@ -424,6 +456,8 @@ def getNumPartitions(self):
         """
         Returns the number of partitions in RDD
 
+        Examples
+        --------
         >>> rdd = sc.parallelize([1, 2, 3, 4], 2)
         >>> rdd.getNumPartitions()
         2
@@ -434,6 +468,8 @@ def filter(self, f):
         """
         Return a new RDD containing only the elements that satisfy a predicate.
 
+        Examples
+        --------
         >>> rdd = sc.parallelize([1, 2, 3, 4, 5])
         >>> rdd.filter(lambda x: x % 2 == 0).collect()
         [2, 4]
@@ -446,6 +482,8 @@ def distinct(self, numPartitions=None):
         """
         Return a new RDD containing the distinct elements in this RDD.
 
+        Examples
+        --------
         >>> sorted(sc.parallelize([1, 1, 2, 3]).distinct().collect())
         [1, 2, 3]
         """
@@ -457,15 +495,24 @@ def sample(self, withReplacement, fraction, seed=None):
         """
         Return a sampled subset of this RDD.
 
-        :param withReplacement: can elements be sampled multiple times (replaced when sampled out)
-        :param fraction: expected size of the sample as a fraction of this RDD's size
+        Parameters
+        ----------
+        withReplacement : bool
+            can elements be sampled multiple times (replaced when sampled out)
+        fraction : float
+            expected size of the sample as a fraction of this RDD's size
             without replacement: probability that each element is chosen; fraction must be [0, 1]
             with replacement: expected number of times each element is chosen; fraction must be >= 0
-        :param seed: seed for the random number generator
+        seed : int, optional
+            seed for the random number generator
 
-        .. note:: This is not guaranteed to provide exactly the fraction specified of the total
-            count of the given :class:`DataFrame`.
+        Notes
+        -----
+        This is not guaranteed to provide exactly the fraction specified of the total
+        count of the given :class:`DataFrame`.
 
+        Examples
+        --------
         >>> rdd = sc.parallelize(range(100), 4)
         >>> 6 <= rdd.sample(False, 0.1, 81).count() <= 14
         True
@@ -477,10 +524,18 @@ def randomSplit(self, weights, seed=None):
         """
         Randomly splits this RDD with the provided weights.
 
-        :param weights: weights for splits, will be normalized if they don't sum to 1
-        :param seed: random seed
-        :return: split RDDs in a list
+        weights : list
+            weights for splits, will be normalized if they don't sum to 1
+        seed : int, optional
+            random seed
 
+        Returns
+        -------
+        list
+            split RDDs in a list
+
+        Examples
+        --------
         >>> rdd = sc.parallelize(range(500), 1)
         >>> rdd1, rdd2 = rdd.randomSplit([2, 3], 17)
         >>> len(rdd1.collect() + rdd2.collect())
@@ -504,9 +559,13 @@ def takeSample(self, withReplacement, num, seed=None):
         """
         Return a fixed-size sampled subset of this RDD.
 
-        .. note:: This method should only be used if the resulting array is expected
-            to be small, as all the data is loaded into the driver's memory.
+        Notes
+        -----
+        This method should only be used if the resulting array is expected
+        to be small, as all the data is loaded into the driver's memory.
 
+        Examples
+        --------
         >>> rdd = sc.parallelize(range(0, 10))
         >>> len(rdd.takeSample(True, 20, 1))
         20
@@ -592,6 +651,8 @@ def union(self, other):
         """
         Return the union of this RDD and another one.
 
+        Examples
+        --------
         >>> rdd = sc.parallelize([1, 1, 2, 3])
         >>> rdd.union(rdd).collect()
         [1, 1, 2, 3, 1, 1, 2, 3]
@@ -616,8 +677,12 @@ def intersection(self, other):
         Return the intersection of this RDD and another one. The output will
         not contain any duplicate elements, even if the input RDDs did.
 
-        .. note:: This method performs a shuffle internally.
+        Notes
+        -----
+        This method performs a shuffle internally.
 
+        Examples
+        --------
         >>> rdd1 = sc.parallelize([1, 10, 2, 3, 4, 5])
         >>> rdd2 = sc.parallelize([1, 6, 2, 3, 7, 8])
         >>> rdd1.intersection(rdd2).collect()
@@ -639,6 +704,8 @@ def __add__(self, other):
         """
         Return the union of this RDD and another one.
 
+        Examples
+        --------
         >>> rdd = sc.parallelize([1, 1, 2, 3])
         >>> (rdd + rdd).collect()
         [1, 1, 2, 3, 1, 1, 2, 3]
@@ -653,6 +720,8 @@ def repartitionAndSortWithinPartitions(self, numPartitions=None, partitionFunc=p
         Repartition the RDD according to the given partitioner and, within each resulting partition,
         sort records by their keys.
 
+        Examples
+        --------
         >>> rdd = sc.parallelize([(0, 5), (3, 8), (2, 6), (0, 8), (3, 8), (1, 3)])
         >>> rdd2 = rdd.repartitionAndSortWithinPartitions(2, lambda x: x % 2, True)
         >>> rdd2.glom().collect()
@@ -674,6 +743,8 @@ def sortByKey(self, ascending=True, numPartitions=None, keyfunc=lambda x: x):
         """
         Sorts this RDD, which is assumed to consist of (key, value) pairs.
 
+        Examples
+        --------
         >>> tmp = [('a', 1), ('b', 2), ('1', 3), ('d', 4), ('2', 5)]
         >>> sc.parallelize(tmp).sortByKey().first()
         ('1', 3)
@@ -730,6 +801,8 @@ def sortBy(self, keyfunc, ascending=True, numPartitions=None):
         """
         Sorts this RDD by the given keyfunc
 
+        Examples
+        --------
         >>> tmp = [('a', 1), ('b', 2), ('1', 3), ('d', 4), ('2', 5)]
         >>> sc.parallelize(tmp).sortBy(lambda x: x[0]).collect()
         [('1', 3), ('2', 5), ('a', 1), ('b', 2), ('d', 4)]
@@ -743,6 +816,8 @@ def glom(self):
         Return an RDD created by coalescing all elements within each partition
         into a list.
 
+        Examples
+        --------
         >>> rdd = sc.parallelize([1, 2, 3, 4], 2)
         >>> sorted(rdd.glom().collect())
         [[1, 2], [3, 4]]
@@ -757,6 +832,8 @@ def cartesian(self, other):
         RDD of all pairs of elements ``(a, b)`` where ``a`` is in `self` and
         ``b`` is in `other`.
 
+        Examples
+        --------
         >>> rdd = sc.parallelize([1, 2])
         >>> sorted(rdd.cartesian(rdd).collect())
         [(1, 1), (1, 2), (2, 1), (2, 2)]
@@ -770,6 +847,8 @@ def groupBy(self, f, numPartitions=None, partitionFunc=portable_hash):
         """
         Return an RDD of grouped items.
 
+        Examples
+        --------
         >>> rdd = sc.parallelize([1, 1, 2, 3, 5, 8])
         >>> result = rdd.groupBy(lambda x: x % 2).collect()
         >>> sorted([(x, sorted(y)) for (x, y) in result])
@@ -781,10 +860,19 @@ def pipe(self, command, env=None, checkCode=False):
         """
         Return an RDD created by piping elements to a forked external process.
 
+        Parameters
+        ----------
+        command : str
+            command to run.
+        env : dict, optional
+            environment variables to set.
+        checkCode : bool, optional
+            whether or not to check the return value of the shell command.
+
+        Examples
+        --------
         >>> sc.parallelize(['1', '2', '', '3']).pipe('cat').collect()
         ['1', '2', '', '3']
-
-        :param checkCode: whether or not to check the return value of the shell command.
         """
         if env is None:
             env = dict()
@@ -816,6 +904,8 @@ def foreach(self, f):
         """
         Applies a function to all elements of this RDD.
 
+        Examples
+        --------
         >>> def f(x): print(x)
         >>> sc.parallelize([1, 2, 3, 4, 5]).foreach(f)
         """
@@ -831,6 +921,8 @@ def foreachPartition(self, f):
         """
         Applies a function to each partition of this RDD.
 
+        Examples
+        --------
         >>> def f(iterator):
         ...     for x in iterator:
         ...          print(x)
@@ -848,8 +940,10 @@ def collect(self):
         """
         Return a list that contains all of the elements in this RDD.
 
-        .. note:: This method should only be used if the resulting array is expected
-            to be small, as all the data is loaded into the driver's memory.
+        Notes
+        -----
+        This method should only be used if the resulting array is expected
+        to be small, as all the data is loaded into the driver's memory.
         """
         with SCCallSiteSync(self.context) as css:
             sock_info = self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd())
@@ -859,10 +953,9 @@ def collectWithJobGroup(self, groupId, description, interruptOnCancel=False):
         """
         When collect rdd, use this method to specify job group.
 
-        .. note:: Deprecated in 3.1.0. Use :class:`pyspark.InheritableThread` with
-            the pinned thread mode enabled.
-
         .. versionadded:: 3.0.0
+        .. deprecated:: 3.1.0
+            Use :class:`pyspark.InheritableThread` with the pinned thread mode enabled.
         """
         warnings.warn(
             "Deprecated in 3.1, Use pyspark.InheritableThread with "
@@ -879,6 +972,8 @@ def reduce(self, f):
         Reduces the elements of this RDD using the specified commutative and
         associative binary operator. Currently reduces partitions locally.
 
+        Examples
+        --------
         >>> from operator import add
         >>> sc.parallelize([1, 2, 3, 4, 5]).reduce(add)
         15
@@ -908,8 +1003,14 @@ def treeReduce(self, f, depth=2):
         """
         Reduces the elements of this RDD in a multi-level tree pattern.
 
-        :param depth: suggested depth of the tree (default: 2)
+        Parameters
+        ----------
+        f : function
+        depth : int, optional
+            suggested depth of the tree (default: 2)
 
+        Examples
+        --------
         >>> add = lambda x, y: x + y
         >>> rdd = sc.parallelize([-5, -4, -3, -2, -1, 1, 2, 3, 4], 10)
         >>> rdd.treeReduce(add)
@@ -958,6 +1059,8 @@ def fold(self, zeroValue, op):
         that are not commutative, the result may differ from that of a fold
         applied to a non-distributed collection.
 
+        Examples
+        --------
         >>> from operator import add
         >>> sc.parallelize([1, 2, 3, 4, 5]).fold(0, add)
         15
@@ -989,6 +1092,8 @@ def aggregate(self, zeroValue, seqOp, combOp):
         the type of this RDD. Thus, we need one operation for merging a T into
         an U and one operation for merging two U
 
+        Examples
+        --------
         >>> seqOp = (lambda x, y: (x[0] + y, x[1] + 1))
         >>> combOp = (lambda x, y: (x[0] + y[0], x[1] + y[1]))
         >>> sc.parallelize([1, 2, 3, 4]).aggregate((0, 0), seqOp, combOp)
@@ -1015,8 +1120,11 @@ def treeAggregate(self, zeroValue, seqOp, combOp, depth=2):
         Aggregates the elements of this RDD in a multi-level tree
         pattern.
 
-        :param depth: suggested depth of the tree (default: 2)
+        depth : int, optional
+            suggested depth of the tree (default: 2)
 
+        Examples
+        --------
         >>> add = lambda x, y: x + y
         >>> rdd = sc.parallelize([-5, -4, -3, -2, -1, 1, 2, 3, 4], 10)
         >>> rdd.treeAggregate(0, add, add)
@@ -1066,8 +1174,13 @@ def max(self, key=None):
         """
         Find the maximum item in this RDD.
 
-        :param key: A function used to generate key for comparing
+        Parameters
+        ----------
+        key : function, optional
+            A function used to generate key for comparing
 
+        Examples
+        --------
         >>> rdd = sc.parallelize([1.0, 5.0, 43.0, 10.0])
         >>> rdd.max()
         43.0
@@ -1082,8 +1195,13 @@ def min(self, key=None):
         """
         Find the minimum item in this RDD.
 
-        :param key: A function used to generate key for comparing
+        Parameters
+        ----------
+        key : function, optional
+            A function used to generate key for comparing
 
+        Examples
+        --------
         >>> rdd = sc.parallelize([2.0, 5.0, 43.0, 10.0])
         >>> rdd.min()
         2.0
@@ -1098,6 +1216,8 @@ def sum(self):
         """
         Add up the elements in this RDD.
 
+        Examples
+        --------
         >>> sc.parallelize([1.0, 2.0, 3.0]).sum()
         6.0
         """
@@ -1107,6 +1227,8 @@ def count(self):
         """
         Return the number of elements in this RDD.
 
+        Examples
+        --------
         >>> sc.parallelize([2, 3, 4]).count()
         3
         """
@@ -1147,6 +1269,8 @@ def histogram(self, buckets):
 
         The return value is a tuple of buckets and histogram.
 
+        Examples
+        --------
         >>> rdd = sc.parallelize(range(51))
         >>> rdd.histogram(2)
         ([0, 25, 50], [25, 26])
@@ -1254,6 +1378,8 @@ def mean(self):
         """
         Compute the mean of this RDD's elements.
 
+        Examples
+        --------
         >>> sc.parallelize([1, 2, 3]).mean()
         2.0
         """
@@ -1263,6 +1389,8 @@ def variance(self):
         """
         Compute the variance of this RDD's elements.
 
+        Examples
+        --------
         >>> sc.parallelize([1, 2, 3]).variance()
         0.666...
         """
@@ -1272,6 +1400,8 @@ def stdev(self):
         """
         Compute the standard deviation of this RDD's elements.
 
+        Examples
+        --------
         >>> sc.parallelize([1, 2, 3]).stdev()
         0.816...
         """
@@ -1283,6 +1413,8 @@ def sampleStdev(self):
         corrects for bias in estimating the standard deviation by dividing by
         N-1 instead of N).
 
+        Examples
+        --------
         >>> sc.parallelize([1, 2, 3]).sampleStdev()
         1.0
         """
@@ -1293,6 +1425,8 @@ def sampleVariance(self):
         Compute the sample variance of this RDD's elements (which corrects
         for bias in estimating the variance by dividing by N-1 instead of N).
 
+        Examples
+        --------
         >>> sc.parallelize([1, 2, 3]).sampleVariance()
         1.0
         """
@@ -1303,6 +1437,8 @@ def countByValue(self):
         Return the count of each unique value in this RDD as a dictionary of
         (value, count) pairs.
 
+        Examples
+        --------
         >>> sorted(sc.parallelize([1, 2, 1, 2, 2], 2).countByValue().items())
         [(1, 2), (2, 3)]
         """
@@ -1322,11 +1458,15 @@ def top(self, num, key=None):
         """
         Get the top N elements from an RDD.
 
-        .. note:: This method should only be used if the resulting array is expected
-            to be small, as all the data is loaded into the driver's memory.
+        Notes
+        -----
+        This method should only be used if the resulting array is expected
+        to be small, as all the data is loaded into the driver's memory.
 
-        .. note:: It returns the list sorted in descending order.
+        It returns the list sorted in descending order.
 
+        Examples
+        --------
         >>> sc.parallelize([10, 4, 2, 12, 3]).top(1)
         [12]
         >>> sc.parallelize([2, 3, 4, 5, 6], 2).top(2)
@@ -1347,9 +1487,13 @@ def takeOrdered(self, num, key=None):
         Get the N elements from an RDD ordered in ascending order or as
         specified by the optional key function.
 
-        .. note:: this method should only be used if the resulting array is expected
-            to be small, as all the data is loaded into the driver's memory.
+        Notes
+        -----
+        This method should only be used if the resulting array is expected
+        to be small, as all the data is loaded into the driver's memory.
 
+        Examples
+        --------
         >>> sc.parallelize([10, 1, 2, 9, 3, 4, 5, 6, 7]).takeOrdered(6)
         [1, 2, 3, 4, 5, 6]
         >>> sc.parallelize([10, 1, 2, 9, 3, 4, 5, 6, 7], 2).takeOrdered(6, key=lambda x: -x)
@@ -1371,9 +1515,13 @@ def take(self, num):
 
         Translated from the Scala implementation in RDD#take().
 
-        .. note:: this method should only be used if the resulting array is expected
-            to be small, as all the data is loaded into the driver's memory.
+        Notes
+        -----
+        This method should only be used if the resulting array is expected
+        to be small, as all the data is loaded into the driver's memory.
 
+        Examples
+        --------
         >>> sc.parallelize([2, 3, 4, 5, 6]).cache().take(2)
         [2, 3]
         >>> sc.parallelize([2, 3, 4, 5, 6]).take(10)
@@ -1426,6 +1574,8 @@ def first(self):
         """
         Return the first element in this RDD.
 
+        Examples
+        --------
         >>> sc.parallelize([2, 3, 4]).first()
         2
         >>> sc.parallelize([]).first()
@@ -1442,8 +1592,12 @@ def isEmpty(self):
         """
         Returns true if and only if the RDD contains no elements at all.
 
-        .. note:: an RDD may be empty even when it has at least 1 partition.
+        Notes
+        -----
+        An RDD may be empty even when it has at least 1 partition.
 
+        Examples
+        --------
         >>> sc.parallelize([]).isEmpty()
         True
         >>> sc.parallelize([1]).isEmpty()
@@ -1458,9 +1612,14 @@ def saveAsNewAPIHadoopDataset(self, conf, keyConverter=None, valueConverter=None
         converted for output using either user specified converters or, by default,
         "org.apache.spark.api.python.JavaToWritableConverter".
 
-        :param conf: Hadoop job configuration, passed in as a dict
-        :param keyConverter: (None by default)
-        :param valueConverter: (None by default)
+        Parameters
+        ----------
+        conf : dict
+            Hadoop job configuration
+        keyConverter : str, optional
+            fully qualified classname of key converter (None by default)
+        valueConverter : str, optional
+            fully qualified classname of value converter (None by default)
         """
         jconf = self.ctx._dictToJavaMap(conf)
         pickledRDD = self._pickled()
@@ -1477,16 +1636,23 @@ def saveAsNewAPIHadoopFile(self, path, outputFormatClass, keyClass=None, valueCl
         `conf` is applied on top of the base Hadoop conf associated with the SparkContext
         of this RDD to create a merged Hadoop MapReduce job configuration for saving the data.
 
-        :param path: path to Hadoop file
-        :param outputFormatClass: fully qualified classname of Hadoop OutputFormat
-               (e.g. "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat")
-        :param keyClass: fully qualified classname of key Writable class
-               (e.g. "org.apache.hadoop.io.IntWritable", None by default)
-        :param valueClass: fully qualified classname of value Writable class
-               (e.g. "org.apache.hadoop.io.Text", None by default)
-        :param keyConverter: (None by default)
-        :param valueConverter: (None by default)
-        :param conf: Hadoop job configuration, passed in as a dict (None by default)
+        path : str
+            path to Hadoop file
+        outputFormatClass : str
+            fully qualified classname of Hadoop OutputFormat
+            (e.g. "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat")
+        keyClass : str, optional
+            fully qualified classname of key Writable class
+             (e.g. "org.apache.hadoop.io.IntWritable", None by default)
+        valueClass : str, optional
+            fully qualified classname of value Writable class
+            (e.g. "org.apache.hadoop.io.Text", None by default)
+        keyConverter : str, optional
+            fully qualified classname of key converter (None by default)
+        valueConverter : str, optional
+            fully qualified classname of value converter (None by default)
+        conf : dict, optional
+            Hadoop job configuration (None by default)
         """
         jconf = self.ctx._dictToJavaMap(conf)
         pickledRDD = self._pickled()
@@ -1502,9 +1668,14 @@ def saveAsHadoopDataset(self, conf, keyConverter=None, valueConverter=None):
         converted for output using either user specified converters or, by default,
         "org.apache.spark.api.python.JavaToWritableConverter".
 
-        :param conf: Hadoop job configuration, passed in as a dict
-        :param keyConverter: (None by default)
-        :param valueConverter: (None by default)
+        Parameters
+        ----------
+        conf : dict
+            Hadoop job configuration
+        keyConverter : str, optional
+            fully qualified classname of key converter (None by default)
+        valueConverter : str, optional
+            fully qualified classname of value converter (None by default)
         """
         jconf = self.ctx._dictToJavaMap(conf)
         pickledRDD = self._pickled()
@@ -1522,17 +1693,28 @@ def saveAsHadoopFile(self, path, outputFormatClass, keyClass=None, valueClass=No
         `conf` is applied on top of the base Hadoop conf associated with the SparkContext
         of this RDD to create a merged Hadoop MapReduce job configuration for saving the data.
 
-        :param path: path to Hadoop file
-        :param outputFormatClass: fully qualified classname of Hadoop OutputFormat
-               (e.g. "org.apache.hadoop.mapred.SequenceFileOutputFormat")
-        :param keyClass: fully qualified classname of key Writable class
-               (e.g. "org.apache.hadoop.io.IntWritable", None by default)
-        :param valueClass: fully qualified classname of value Writable class
-               (e.g. "org.apache.hadoop.io.Text", None by default)
-        :param keyConverter: (None by default)
-        :param valueConverter: (None by default)
-        :param conf: (None by default)
-        :param compressionCodecClass: (None by default)
+        Parameters
+        ----------
+        path : str
+            path to Hadoop file
+        outputFormatClass : str
+            fully qualified classname of Hadoop OutputFormat
+            (e.g. "org.apache.hadoop.mapred.SequenceFileOutputFormat")
+        keyClass : str, optional
+            fully qualified classname of key Writable class
+            (e.g. "org.apache.hadoop.io.IntWritable", None by default)
+        valueClass : str, optional
+            fully qualified classname of value Writable class
+            (e.g. "org.apache.hadoop.io.Text", None by default)
+        keyConverter : str, optional
+            fully qualified classname of key converter (None by default)
+        valueConverter : str, optional
+            fully qualified classname of value converter (None by default)
+        conf : dict, optional
+            (None by default)
+        compressionCodecClass : str
+            fully qualified classname of the compression codec class
+            i.e. "org.apache.hadoop.io.compress.GzipCodec" (None by default)
         """
         jconf = self.ctx._dictToJavaMap(conf)
         pickledRDD = self._pickled()
@@ -1551,8 +1733,13 @@ def saveAsSequenceFile(self, path, compressionCodecClass=None):
             1. Pyrolite is used to convert pickled Python RDD into RDD of Java objects.
             2. Keys and values of this Java RDD are converted to Writables and written out.
 
-        :param path: path to sequence file
-        :param compressionCodecClass: (None by default)
+        Parameters
+        ----------
+        path : str
+            path to sequence file
+        compressionCodecClass : str, optional
+            fully qualified classname of the compression codec class
+            i.e. "org.apache.hadoop.io.compress.GzipCodec" (None by default)
         """
         pickledRDD = self._pickled()
         self.ctx._jvm.PythonRDD.saveAsSequenceFile(pickledRDD._jrdd, True,
@@ -1564,6 +1751,8 @@ def saveAsPickleFile(self, path, batchSize=10):
         used is :class:`pyspark.serializers.PickleSerializer`, default batch size
         is 10.
 
+        Examples
+        --------
         >>> from tempfile import NamedTemporaryFile
         >>> tmpFile = NamedTemporaryFile(delete=True)
         >>> tmpFile.close()
@@ -1581,10 +1770,16 @@ def saveAsTextFile(self, path, compressionCodecClass=None):
         """
         Save this RDD as a text file, using string representations of elements.
 
-        :param path: path to text file
-        :param compressionCodecClass: (None by default) string i.e.
-            "org.apache.hadoop.io.compress.GzipCodec"
+        Parameters
+        ----------
+        path : str
+            path to text file
+        compressionCodecClass : str, optional
+            fully qualified classname of the compression codec class
+            i.e. "org.apache.hadoop.io.compress.GzipCodec" (None by default)
 
+        Examples
+        --------
         >>> from tempfile import NamedTemporaryFile
         >>> tempFile = NamedTemporaryFile(delete=True)
         >>> tempFile.close()
@@ -1636,9 +1831,13 @@ def collectAsMap(self):
         """
         Return the key-value pairs in this RDD to the master as a dictionary.
 
-        .. note:: this method should only be used if the resulting data is expected
-            to be small, as all the data is loaded into the driver's memory.
+        Notes
+        -----
+        This method should only be used if the resulting data is expected
+        to be small, as all the data is loaded into the driver's memory.
 
+        Examples
+        --------
         >>> m = sc.parallelize([(1, 2), (3, 4)]).collectAsMap()
         >>> m[1]
         2
@@ -1651,6 +1850,8 @@ def keys(self):
         """
         Return an RDD with the keys of each tuple.
 
+        Examples
+        --------
         >>> m = sc.parallelize([(1, 2), (3, 4)]).keys()
         >>> m.collect()
         [1, 3]
@@ -1661,6 +1862,8 @@ def values(self):
         """
         Return an RDD with the values of each tuple.
 
+        Examples
+        --------
         >>> m = sc.parallelize([(1, 2), (3, 4)]).values()
         >>> m.collect()
         [2, 4]
@@ -1678,6 +1881,8 @@ def reduceByKey(self, func, numPartitions=None, partitionFunc=portable_hash):
         the default parallelism level if `numPartitions` is not specified.
         Default partitioner is hash-partition.
 
+        Examples
+        --------
         >>> from operator import add
         >>> rdd = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
         >>> sorted(rdd.reduceByKey(add).collect())
@@ -1693,6 +1898,8 @@ def reduceByKeyLocally(self, func):
         This will also perform the merging locally on each mapper before
         sending results to a reducer, similarly to a "combiner" in MapReduce.
 
+        Examples
+        --------
         >>> from operator import add
         >>> rdd = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
         >>> sorted(rdd.reduceByKeyLocally(add).items())
@@ -1717,6 +1924,8 @@ def countByKey(self):
         Count the number of elements for each key, and return the result to the
         master as a dictionary.
 
+        Examples
+        --------
         >>> rdd = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
         >>> sorted(rdd.countByKey().items())
         [('a', 2), ('b', 1)]
@@ -1733,6 +1942,8 @@ def join(self, other, numPartitions=None):
 
         Performs a hash join across the cluster.
 
+        Examples
+        --------
         >>> x = sc.parallelize([("a", 1), ("b", 4)])
         >>> y = sc.parallelize([("a", 2), ("a", 3)])
         >>> sorted(x.join(y).collect())
@@ -1750,6 +1961,8 @@ def leftOuterJoin(self, other, numPartitions=None):
 
         Hash-partitions the resulting RDD into the given number of partitions.
 
+        Examples
+        --------
         >>> x = sc.parallelize([("a", 1), ("b", 4)])
         >>> y = sc.parallelize([("a", 2)])
         >>> sorted(x.leftOuterJoin(y).collect())
@@ -1767,6 +1980,8 @@ def rightOuterJoin(self, other, numPartitions=None):
 
         Hash-partitions the resulting RDD into the given number of partitions.
 
+        Examples
+        --------
         >>> x = sc.parallelize([("a", 1), ("b", 4)])
         >>> y = sc.parallelize([("a", 2)])
         >>> sorted(y.rightOuterJoin(x).collect())
@@ -1788,6 +2003,8 @@ def fullOuterJoin(self, other, numPartitions=None):
 
         Hash-partitions the resulting RDD into the given number of partitions.
 
+        Examples
+        --------
         >>> x = sc.parallelize([("a", 1), ("b", 4)])
         >>> y = sc.parallelize([("a", 2), ("c", 8)])
         >>> sorted(x.fullOuterJoin(y).collect())
@@ -1802,6 +2019,8 @@ def partitionBy(self, numPartitions, partitionFunc=portable_hash):
         """
         Return a copy of the RDD partitioned using the specified partitioner.
 
+        Examples
+        --------
         >>> pairs = sc.parallelize([1, 2, 3, 4, 2, 4, 1]).map(lambda x: (x, x))
         >>> sets = pairs.partitionBy(2).glom().collect()
         >>> len(set(sets[0]).intersection(set(sets[1])))
@@ -1891,9 +2110,13 @@ def combineByKey(self, createCombiner, mergeValue, mergeCombiners,
 
         In addition, users can control the partitioning of the output RDD.
 
-        .. note:: V and C can be different -- for example, one might group an RDD of type
+        Notes
+        -----
+        V and C can be different -- for example, one might group an RDD of type
             (Int, Int) into an RDD of type (Int, List[Int]).
 
+        Examples
+        --------
         >>> x = sc.parallelize([("a", 1), ("b", 1), ("a", 2)])
         >>> def to_list(a):
         ...     return [a]
@@ -1955,6 +2178,8 @@ def foldByKey(self, zeroValue, func, numPartitions=None, partitionFunc=portable_
         arbitrary number of times, and must not change the result
         (e.g., 0 for addition, or 1 for multiplication.).
 
+        Examples
+        --------
         >>> rdd = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
         >>> from operator import add
         >>> sorted(rdd.foldByKey(0, add).collect())
@@ -1975,10 +2200,14 @@ def groupByKey(self, numPartitions=None, partitionFunc=portable_hash):
         Group the values for each key in the RDD into a single sequence.
         Hash-partitions the resulting RDD with numPartitions partitions.
 
-        .. note:: If you are grouping in order to perform an aggregation (such as a
-            sum or average) over each key, using reduceByKey or aggregateByKey will
-            provide much better performance.
+        Notes
+        -----
+        If you are grouping in order to perform an aggregation (such as a
+        sum or average) over each key, using reduceByKey or aggregateByKey will
+        provide much better performance.
 
+        Examples
+        --------
         >>> rdd = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
         >>> sorted(rdd.groupByKey().mapValues(len).collect())
         [('a', 2), ('b', 1)]
@@ -2021,6 +2250,8 @@ def flatMapValues(self, f):
         without changing the keys; this also retains the original RDD's
         partitioning.
 
+        Examples
+        --------
         >>> x = sc.parallelize([("a", ["x", "y", "z"]), ("b", ["p", "r"])])
         >>> def f(x): return x
         >>> x.flatMapValues(f).collect()
@@ -2035,6 +2266,8 @@ def mapValues(self, f):
         without changing the keys; this also retains the original RDD's
         partitioning.
 
+        Examples
+        --------
         >>> x = sc.parallelize([("a", ["apple", "banana", "lemon"]), ("b", ["grapes"])])
         >>> def f(x): return len(x)
         >>> x.mapValues(f).collect()
@@ -2047,6 +2280,8 @@ def groupWith(self, other, *others):
         """
         Alias for cogroup but with support for multiple RDDs.
 
+        Examples
+        --------
         >>> w = sc.parallelize([("a", 5), ("b", 6)])
         >>> x = sc.parallelize([("a", 1), ("b", 4)])
         >>> y = sc.parallelize([("a", 2)])
@@ -2064,6 +2299,8 @@ def cogroup(self, other, numPartitions=None):
         contains a tuple with the list of values for that key in `self` as
         well as `other`.
 
+        Examples
+        --------
         >>> x = sc.parallelize([("a", 1), ("b", 4)])
         >>> y = sc.parallelize([("a", 2)])
         >>> [(x, tuple(map(list, y))) for x, y in sorted(list(x.cogroup(y).collect()))]
@@ -2077,6 +2314,8 @@ def sampleByKey(self, withReplacement, fractions, seed=None):
         Create a sample of this RDD using variable sampling rates for
         different keys as specified by fractions, a key to sampling rate map.
 
+        Examples
+        --------
         >>> fractions = {"a": 0.2, "b": 0.1}
         >>> rdd = sc.parallelize(fractions.keys()).cartesian(sc.parallelize(range(0, 1000)))
         >>> sample = dict(rdd.sampleByKey(False, fractions, 2).groupByKey().collect())
@@ -2097,6 +2336,8 @@ def subtractByKey(self, other, numPartitions=None):
         Return each (key, value) pair in `self` that has no pair with matching
         key in `other`.
 
+        Examples
+        --------
         >>> x = sc.parallelize([("a", 1), ("b", 4), ("b", 5), ("a", 2)])
         >>> y = sc.parallelize([("a", 3), ("c", None)])
         >>> sorted(x.subtractByKey(y).collect())
@@ -2111,6 +2352,8 @@ def subtract(self, other, numPartitions=None):
         """
         Return each value in `self` that is not contained in `other`.
 
+        Examples
+        --------
         >>> x = sc.parallelize([("a", 1), ("b", 4), ("b", 5), ("a", 3)])
         >>> y = sc.parallelize([("a", 3), ("c", None)])
         >>> sorted(x.subtract(y).collect())
@@ -2124,6 +2367,8 @@ def keyBy(self, f):
         """
         Creates tuples of the elements in this RDD by applying `f`.
 
+        Examples
+        --------
         >>> x = sc.parallelize(range(0,3)).keyBy(lambda x: x*x)
         >>> y = sc.parallelize(zip(range(0,5), range(0,5)))
         >>> [(x, list(map(list, y))) for x, y in sorted(x.cogroup(y).collect())]
@@ -2140,6 +2385,8 @@ def repartition(self, numPartitions):
          If you are decreasing the number of partitions in this RDD, consider
          using `coalesce`, which can avoid performing a shuffle.
 
+        Examples
+        --------
          >>> rdd = sc.parallelize([1,2,3,4,5,6,7], 4)
          >>> sorted(rdd.glom().collect())
          [[1], [2, 3], [4, 5], [6, 7]]
@@ -2154,6 +2401,8 @@ def coalesce(self, numPartitions, shuffle=False):
         """
         Return a new RDD that is reduced into `numPartitions` partitions.
 
+        Examples
+        --------
         >>> sc.parallelize([1, 2, 3, 4, 5], 3).glom().collect()
         [[1], [2, 3], [4, 5]]
         >>> sc.parallelize([1, 2, 3, 4, 5], 3).coalesce(1).glom().collect()
@@ -2180,6 +2429,8 @@ def zip(self, other):
         number of elements in each partition (e.g. one was made through
         a map on the other).
 
+        Examples
+        --------
         >>> x = sc.parallelize(range(0,5))
         >>> y = sc.parallelize(range(1000, 1005))
         >>> x.zip(y).collect()
@@ -2226,6 +2477,8 @@ def zipWithIndex(self):
         This method needs to trigger a spark job when this RDD contains
         more than one partitions.
 
+        Examples
+        --------
         >>> sc.parallelize(["a", "b", "c", "d"], 3).zipWithIndex().collect()
         [('a', 0), ('b', 1), ('c', 2), ('d', 3)]
         """
@@ -2250,6 +2503,8 @@ def zipWithUniqueId(self):
         method won't trigger a spark job, which is different from
         :meth:`zipWithIndex`.
 
+        Examples
+        --------
         >>> sc.parallelize(["a", "b", "c", "d", "e"], 3).zipWithUniqueId().collect()
         [('a', 0), ('b', 1), ('c', 4), ('d', 2), ('e', 5)]
         """
@@ -2273,6 +2528,8 @@ def setName(self, name):
         """
         Assign a name to this RDD.
 
+        Examples
+        --------
         >>> rdd1 = sc.parallelize([1, 2])
         >>> rdd1.setName('RDD1').name()
         'RDD1'
@@ -2292,6 +2549,8 @@ def getStorageLevel(self):
         """
         Get the RDD's current storage level.
 
+        Examples
+        --------
         >>> rdd1 = sc.parallelize([1,2])
         >>> rdd1.getStorageLevel()
         StorageLevel(False, False, False, False, 1)
@@ -2327,6 +2586,8 @@ def lookup(self, key):
         is done efficiently if the RDD has a known partitioner by only
         searching the partition that the key maps to.
 
+        Examples
+        --------
         >>> l = range(1000)
         >>> rdd = sc.parallelize(zip(l, l), 10)
         >>> rdd.lookup(42)  # slow
@@ -2361,6 +2622,8 @@ def countApprox(self, timeout, confidence=0.95):
         Approximate version of count() that returns a potentially incomplete
         result within a timeout, even if not all tasks have finished.
 
+        Examples
+        --------
         >>> rdd = sc.parallelize(range(1000), 10)
         >>> rdd.countApprox(1000, 1.0)
         1000
@@ -2373,6 +2636,8 @@ def sumApprox(self, timeout, confidence=0.95):
         Approximate operation to return the sum within a timeout
         or meet the confidence.
 
+        Examples
+        --------
         >>> rdd = sc.parallelize(range(1000), 10)
         >>> r = sum(range(1000))
         >>> abs(rdd.sumApprox(1000) - r) / r < 0.05
@@ -2388,6 +2653,8 @@ def meanApprox(self, timeout, confidence=0.95):
         Approximate operation to return the mean within a timeout
         or meet the confidence.
 
+        Examples
+        --------
         >>> rdd = sc.parallelize(range(1000), 10)
         >>> r = sum(range(1000)) / 1000.0
         >>> abs(rdd.meanApprox(1000) - r) / r < 0.05
@@ -2402,15 +2669,22 @@ def countApproxDistinct(self, relativeSD=0.05):
         """
         Return approximate number of distinct elements in the RDD.
 
+        Parameters
+        ----------
+        relativeSD : float, optional
+            Relative accuracy. Smaller values create
+            counters that require more space.
+            It must be greater than 0.000017.
+
+        Notes
+        -----
         The algorithm used is based on streamlib's implementation of
         `"HyperLogLog in Practice: Algorithmic Engineering of a State
         of The Art Cardinality Estimation Algorithm", available here
         <https://doi.org/10.1145/2452376.2452456>`_.
 
-        :param relativeSD: Relative accuracy. Smaller values create
-                           counters that require more space.
-                           It must be greater than 0.000017.
-
+        Examples
+        --------
         >>> n = sc.parallelize(range(1000)).map(str).countApproxDistinct()
         >>> 900 < n < 1100
         True
@@ -2430,9 +2704,14 @@ def toLocalIterator(self, prefetchPartitions=False):
         The iterator will consume as much memory as the largest partition in this RDD.
         With prefetch it may consume up to the memory of the 2 largest partitions.
 
-        :param prefetchPartitions: If Spark should pre-fetch the next partition
-                                   before it is needed.
+        Parameters
+        ----------
+        prefetchPartitions : bool, optional
+            If Spark should pre-fetch the next partition
+            before it is needed.
 
+        Examples
+        --------
         >>> rdd = sc.parallelize(range(10))
         >>> [x for x in rdd.toLocalIterator()]
         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
@@ -2445,22 +2724,31 @@ def toLocalIterator(self, prefetchPartitions=False):
 
     def barrier(self):
         """
-        .. note:: Experimental
-
         Marks the current stage as a barrier stage, where Spark must launch all tasks together.
         In case of a task failure, instead of only restarting the failed task, Spark will abort the
         entire stage and relaunch all tasks for this stage.
         The barrier execution mode feature is experimental and it only handles limited scenarios.
         Please read the linked SPIP and design docs to understand the limitations and future plans.
 
-        :return: an :class:`RDDBarrier` instance that provides actions within a barrier stage.
+        .. versionadded:: 2.4.0
 
-        .. seealso:: :class:`BarrierTaskContext`
-        .. seealso:: `SPIP: Barrier Execution Mode
-            <http://jira.apache.org/jira/browse/SPARK-24374>`_
-        .. seealso:: `Design Doc <https://jira.apache.org/jira/browse/SPARK-24582>`_
+        Returns
+        -------
+        :class:`RDDBarrier`
+            instance that provides actions within a barrier stage.
 
-        .. versionadded:: 2.4.0
+        See Also
+        --------
+        pyspark.BarrierTaskContext
+
+        Notes
+        -----
+        For additional information see
+
+        - `SPIP: Barrier Execution Mode <http://jira.apache.org/jira/browse/SPARK-24374>`_
+        - `Design Doc <https://jira.apache.org/jira/browse/SPARK-24582>`_
+
+        This API is experimental
         """
         return RDDBarrier(self)
 
@@ -2472,14 +2760,16 @@ def _is_barrier(self):
 
     def withResources(self, profile):
         """
-        .. note:: Experimental
-
         Specify a :class:`pyspark.resource.ResourceProfile` to use when calculating this RDD.
         This is only supported on certain cluster managers and currently requires dynamic
         allocation to be enabled. It will result in new executors with the resources specified
         being acquired to calculate the RDD.
 
         .. versionadded:: 3.1.0
+
+        Notes
+        -----
+        This API is experimental
         """
         self.has_resource_profile = True
         if profile._java_resource_profile is not None:
@@ -2497,13 +2787,19 @@ def withResources(self, profile):
 
     def getResourceProfile(self):
         """
-        .. note:: Experimental
-
         Get the :class:`pyspark.resource.ResourceProfile` specified with this RDD or None
         if it wasn't specified.
-        :return: the user specified ResourceProfile or None if none were specified
 
         .. versionadded:: 3.1.0
+
+        Returns
+        -------
+        :py:class:`pyspark.resource.ResourceProfile`
+            The the user specified profile or None if none were specified
+
+        Notes
+        -----
+        This API is experimental
         """
         rp = self._jrdd.getResourceProfile()
         if rp is not None:
@@ -2537,12 +2833,14 @@ def _wrap_function(sc, func, deserializer, serializer, profiler=None):
 class RDDBarrier(object):
 
     """
-    .. note:: Experimental
-
     Wraps an RDD in a barrier stage, which forces Spark to launch tasks of this stage together.
     :class:`RDDBarrier` instances are created by :func:`RDD.barrier`.
 
     .. versionadded:: 2.4.0
+
+    Notes
+    -----
+    This API is experimental
     """
 
     def __init__(self, rdd):
@@ -2550,14 +2848,16 @@ def __init__(self, rdd):
 
     def mapPartitions(self, f, preservesPartitioning=False):
         """
-        .. note:: Experimental
-
         Returns a new RDD by applying a function to each partition of the wrapped RDD,
         where tasks are launched together in a barrier stage.
         The interface is the same as :func:`RDD.mapPartitions`.
         Please see the API doc there.
 
         .. versionadded:: 2.4.0
+
+        Notes
+        -----
+        This API is experimental
         """
         def func(s, iterator):
             return f(iterator)
@@ -2565,8 +2865,6 @@ def func(s, iterator):
 
     def mapPartitionsWithIndex(self, f, preservesPartitioning=False):
         """
-        .. note:: Experimental
-
         Returns a new RDD by applying a function to each partition of the wrapped RDD, while
         tracking the index of the original partition. And all tasks are launched together
         in a barrier stage.
@@ -2574,6 +2872,10 @@ def mapPartitionsWithIndex(self, f, preservesPartitioning=False):
         Please see the API doc there.
 
         .. versionadded:: 3.0.0
+
+        Notes
+        -----
+        This API is experimental
         """
         return PipelinedRDD(self.rdd, f, preservesPartitioning, isFromBarrier=True)
 
@@ -2581,6 +2883,8 @@ def mapPartitionsWithIndex(self, f, preservesPartitioning=False):
 class PipelinedRDD(RDD):
 
     """
+    Examples
+    --------
     Pipelined maps:
 
     >>> rdd = sc.parallelize([1, 2, 3, 4])
@@ -2590,6 +2894,7 @@ class PipelinedRDD(RDD):
     [4, 8, 12, 16]
 
     Pipelined reduces:
+
     >>> from operator import add
     >>> rdd.map(lambda x: 2 * x).reduce(add)
     20
diff --git a/python/pyspark/resource/information.py b/python/pyspark/resource/information.py
index b0e41cced85b5..40209cae9b3d9 100644
--- a/python/pyspark/resource/information.py
+++ b/python/pyspark/resource/information.py
@@ -19,17 +19,23 @@
 class ResourceInformation(object):
 
     """
-    .. note:: Evolving
-
     Class to hold information about a type of Resource. A resource could be a GPU, FPGA, etc.
     The array of addresses are resource specific and its up to the user to interpret the address.
 
     One example is GPUs, where the addresses would be the indices of the GPUs
 
-    :param name: the name of the resource
-    :param addresses: an array of strings describing the addresses of the resource
-
     .. versionadded:: 3.0.0
+
+    Parameters
+    ----------
+    name : str
+        the name of the resource
+    addresses : list
+        an array of strings describing the addresses of the resource
+
+    Notes
+    -----
+    This API is evolving.
     """
 
     def __init__(self, name, addresses):
diff --git a/python/pyspark/resource/profile.py b/python/pyspark/resource/profile.py
index 3f6ae1ddd5e30..1c59a1c4a123c 100644
--- a/python/pyspark/resource/profile.py
+++ b/python/pyspark/resource/profile.py
@@ -22,14 +22,16 @@
 class ResourceProfile(object):
 
     """
-    .. note:: Evolving
-
     Resource profile to associate with an RDD. A :class:`pyspark.resource.ResourceProfile`
     allows the user to specify executor and task requirements for an RDD that will get
     applied during a stage. This allows the user to change the resource requirements between
     stages. This is meant to be immutable so user cannot change it after building.
 
     .. versionadded:: 3.1.0
+
+    Notes
+    -----
+    This API is evolving.
     """
 
     def __init__(self, _java_resource_profile=None, _exec_req={}, _task_req={}):
@@ -75,14 +77,16 @@ def executorResources(self):
 class ResourceProfileBuilder(object):
 
     """
-    .. note:: Evolving
-
     Resource profile Builder to build a resource profile to associate with an RDD.
     A ResourceProfile allows the user to specify executor and task requirements for
     an RDD that will get applied during a stage. This allows the user to change the
     resource requirements between stages.
 
     .. versionadded:: 3.1.0
+
+    Notes
+    -----
+    This API is evolving.
     """
 
     def __init__(self):
diff --git a/python/pyspark/resource/requests.py b/python/pyspark/resource/requests.py
index 61491080a8c43..74d26d04312c4 100644
--- a/python/pyspark/resource/requests.py
+++ b/python/pyspark/resource/requests.py
@@ -20,8 +20,6 @@
 
 class ExecutorResourceRequest(object):
     """
-    .. note:: Evolving
-
     An Executor resource request. This is used in conjunction with the ResourceProfile to
     programmatically specify the resources needed for an RDD that will be applied at the
     stage level.
@@ -41,17 +39,27 @@ class ExecutorResourceRequest(object):
 
     See the configuration and cluster specific docs for more details.
 
-    Use `pyspark.ExecutorResourceRequests` class as a convenience API.
+    Use :py:class:`pyspark.ExecutorResourceRequests` class as a convenience API.
+
+    .. versionadded:: 3.1.0
 
-    :param resourceName: Name of the resource
-    :param amount: Amount requesting
-    :param discoveryScript: Optional script used to discover the resources. This is required on some
+    Parameters
+    ----------
+    resourceName : str
+        Name of the resource
+    amount : str
+        Amount requesting
+    discoveryScript : str, optional
+        Optional script used to discover the resources. This is required on some
         cluster managers that don't tell Spark the addresses of the resources
         allocated. The script runs on Executors startup to discover the addresses
         of the resources available.
-    :param vendor: Vendor, required for some cluster managers
+    vendor : str, optional
+        Vendor, required for some cluster managers
 
-    .. versionadded:: 3.1.0
+    Notes
+    -----
+    This API is evolving.
     """
     def __init__(self, resourceName, amount, discoveryScript="", vendor=""):
         self._name = resourceName
@@ -79,13 +87,15 @@ def vendor(self):
 class ExecutorResourceRequests(object):
 
     """
-    .. note:: Evolving
-
     A set of Executor resource requests. This is used in conjunction with the
     :class:`pyspark.resource.ResourceProfileBuilder` to programmatically specify the
     resources needed for an RDD that will be applied at the stage level.
 
     .. versionadded:: 3.1.0
+
+    Notes
+    -----
+    This API is evolving.
     """
     _CORES = "cores"
     _MEMORY = "memory"
@@ -179,8 +189,6 @@ def requests(self):
 
 class TaskResourceRequest(object):
     """
-    .. note:: Evolving
-
     A task resource request. This is used in conjuntion with the
     :class:`pyspark.resource.ResourceProfile` to programmatically specify the resources
     needed for an RDD that will be applied at the stage level. The amount is specified
@@ -188,11 +196,19 @@ class TaskResourceRequest(object):
     are less than or equal to 0.5 or whole numbers.
     Use :class:`pyspark.resource.TaskResourceRequests` class as a convenience API.
 
-    :param resourceName: Name of the resource
-    :param amount: Amount requesting as a Double to support fractional resource requests.
+    Parameters
+    ----------
+    resourceName : str
+        Name of the resource
+    amount : float
+        Amount requesting as a float to support fractional resource requests.
         Valid values are less than or equal to 0.5 or whole numbers.
 
     .. versionadded:: 3.1.0
+
+    Notes
+    -----
+    This API is evolving.
     """
     def __init__(self, resourceName, amount):
         self._name = resourceName
@@ -210,13 +226,15 @@ def amount(self):
 class TaskResourceRequests(object):
 
     """
-    .. note:: Evolving
-
     A set of task resource requests. This is used in conjuntion with the
     :class:`pyspark.resource.ResourceProfileBuilder` to programmatically specify the resources
     needed for an RDD that will be applied at the stage level.
 
     .. versionadded:: 3.1.0
+
+    Notes
+    -----
+    This API is evolving.
     """
 
     _CPUS = "cpus"
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index e6033dd7505c1..1b434d39312a6 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -24,6 +24,9 @@
 Other serializers, like :class:`MarshalSerializer`, support fewer datatypes but can be
 faster.
 
+
+Examples
+--------
 The serializer is chosen when creating :class:`SparkContext`:
 
 >>> from pyspark.context import SparkContext
diff --git a/python/pyspark/shuffle.py b/python/pyspark/shuffle.py
index 308305e5f46c6..89be6295f9888 100644
--- a/python/pyspark/shuffle.py
+++ b/python/pyspark/shuffle.py
@@ -177,6 +177,8 @@ class ExternalMerger(Merger):
     Finally, if any items were spilled into disks, each partition
     will be merged into `data` and be yielded, then cleared.
 
+    Examples
+    --------
     >>> agg = SimpleAggregator(lambda x, y: x + y)
     >>> merger = ExternalMerger(agg, 10)
     >>> N = 10000
@@ -422,7 +424,8 @@ class ExternalSorter(object):
     The spilling will only happen when the used memory goes above
     the limit.
 
-
+    Examples
+    --------
     >>> sorter = ExternalSorter(1)  # 1M
     >>> import random
     >>> l = list(range(1024))
@@ -506,6 +509,8 @@ class ExternalList(object):
     ExternalList can have many items which cannot be hold in memory in
     the same time.
 
+    Examples
+    --------
     >>> l = ExternalList(list(range(100)))
     >>> len(l)
     100
@@ -606,6 +611,8 @@ class ExternalListOfList(ExternalList):
     """
     An external list for list.
 
+    Examples
+    --------
     >>> l = ExternalListOfList([[i, i] for i in range(100)])
     >>> len(l)
     200
@@ -635,6 +642,8 @@ class GroupByKey(object):
     """
     Group a sorted iterator as [(k1, it1), (k2, it2), ...]
 
+    Examples
+    --------
     >>> k = [i // 3 for i in range(6)]
     >>> v = [[i] for i in range(6)]
     >>> g = GroupByKey(zip(k, v))
diff --git a/python/pyspark/statcounter.py b/python/pyspark/statcounter.py
index 03ea0b6d33c9d..9e56c43f873d9 100644
--- a/python/pyspark/statcounter.py
+++ b/python/pyspark/statcounter.py
@@ -134,6 +134,8 @@ def sampleStdev(self):
     def asDict(self, sample=False):
         """Returns the :class:`StatCounter` members as a ``dict``.
 
+        Examples
+        --------
         >>> sc.parallelize([1., 2., 3., 4.]).stats().asDict()
         {'count': 4L,
          'max': 4.0,
diff --git a/python/pyspark/taskcontext.py b/python/pyspark/taskcontext.py
index d8aa5f93182e2..091c7636c8849 100644
--- a/python/pyspark/taskcontext.py
+++ b/python/pyspark/taskcontext.py
@@ -60,7 +60,9 @@ def get(cls):
         Return the currently active TaskContext. This can be called inside of
         user functions to access contextual information about running tasks.
 
-        .. note:: Must be called on the worker, not the driver. Returns None if not initialized.
+        Notes
+        -----
+        Must be called on the worker, not the driver. Returns None if not initialized.
         """
         return cls._taskContext
 
@@ -144,12 +146,14 @@ def _load_from_socket(port, auth_secret, function, all_gather_message=None):
 class BarrierTaskContext(TaskContext):
 
     """
-    .. note:: Experimental
-
     A :class:`TaskContext` with extra contextual info and tooling for tasks in a barrier stage.
     Use :func:`BarrierTaskContext.get` to obtain the barrier context for a running barrier task.
 
     .. versionadded:: 2.4.0
+
+    Notes
+    -----
+    This API is experimental
     """
 
     _port = None
@@ -169,14 +173,16 @@ def _getOrCreate(cls):
     @classmethod
     def get(cls):
         """
-        .. note:: Experimental
-
         Return the currently active :class:`BarrierTaskContext`.
         This can be called inside of user functions to access contextual information about
         running tasks.
 
-        .. note:: Must be called on the worker, not the driver. Returns None if not initialized.
-            An Exception will raise if it is not in a barrier stage.
+        Notes
+        -----
+        Must be called on the worker, not the driver. Returns None if not initialized.
+        An Exception will raise if it is not in a barrier stage.
+
+        This API is experimental
         """
         if not isinstance(cls._taskContext, BarrierTaskContext):
             raise Exception('It is not in a barrier stage')
@@ -193,17 +199,19 @@ def _initialize(cls, port, secret):
 
     def barrier(self):
         """
-        .. note:: Experimental
-
         Sets a global barrier and waits until all tasks in this stage hit this barrier.
         Similar to `MPI_Barrier` function in MPI, this function blocks until all tasks
         in the same stage have reached this routine.
 
+        .. versionadded:: 2.4.0
+
         .. warning:: In a barrier stage, each task much have the same number of `barrier()`
             calls, in all possible code branches.
             Otherwise, you may get the job hanging or a SparkException after timeout.
 
-        .. versionadded:: 2.4.0
+        Notes
+        -----
+        This API is experimental
         """
         if self._port is None or self._secret is None:
             raise Exception("Not supported to call barrier() before initialize " +
@@ -213,17 +221,19 @@ def barrier(self):
 
     def allGather(self, message=""):
         """
-        .. note:: Experimental
-
         This function blocks until all tasks in the same stage have reached this routine.
         Each task passes in a message and returns with a list of all the messages passed in
         by each of those tasks.
 
+        .. versionadded:: 3.0.0
+
         .. warning:: In a barrier stage, each task much have the same number of `allGather()`
             calls, in all possible code branches.
             Otherwise, you may get the job hanging or a SparkException after timeout.
 
-        .. versionadded:: 3.0.0
+        Notes
+        -----
+        This API is experimental
         """
         if not isinstance(message, str):
             raise ValueError("Argument `message` must be of type `str`")
@@ -235,12 +245,14 @@ def allGather(self, message=""):
 
     def getTaskInfos(self):
         """
-        .. note:: Experimental
-
         Returns :class:`BarrierTaskInfo` for all tasks in this barrier stage,
         ordered by partition ID.
 
         .. versionadded:: 2.4.0
+
+        Notes
+        -----
+        This API is experimental
         """
         if self._port is None or self._secret is None:
             raise Exception("Not supported to call getTaskInfos() before initialize " +
@@ -252,13 +264,18 @@ def getTaskInfos(self):
 
 class BarrierTaskInfo(object):
     """
-    .. note:: Experimental
-
     Carries all task infos of a barrier task.
 
-    :var address: The IPv4 address (host:port) of the executor that the barrier task is running on
-
     .. versionadded:: 2.4.0
+
+    Attributes
+    ----------
+    address : str
+        The IPv4 address (host:port) of the executor that the barrier task is running on
+
+    Notes
+    -----
+    This API is experimental
     """
 
     def __init__(self, address):
diff --git a/python/pyspark/testing/mlutils.py b/python/pyspark/testing/mlutils.py
index a8cf53b31f8c9..a90a64e747dea 100644
--- a/python/pyspark/testing/mlutils.py
+++ b/python/pyspark/testing/mlutils.py
@@ -28,7 +28,8 @@
 
 def check_params(test_self, py_stage, check_params_exist=True):
     """
-    Checks common requirements for Params.params:
+    Checks common requirements for :py:class:`PySpark.ml.Params.params`:
+
       - set of params exist in Java and Python and are ordered by names
       - param parent has the same UID as the object's UID
       - default param value from Java matches value in Python
diff --git a/python/pyspark/testing/streamingutils.py b/python/pyspark/testing/streamingutils.py
index f6a317e97331c..84c186d0d3262 100644
--- a/python/pyspark/testing/streamingutils.py
+++ b/python/pyspark/testing/streamingutils.py
@@ -116,7 +116,10 @@ def _collect(self, dstream, n, block=True):
         """
         Collect each RDDs into the returned list.
 
-        :return: list, which will have the collected items.
+        Returns
+        -------
+        list
+            which will have the collected items.
         """
         result = []
 
@@ -137,9 +140,14 @@ def get_output(_, rdd):
 
     def _test_func(self, input, func, expected, sort=False, input2=None):
         """
-        :param input: dataset for the test. This should be list of lists.
-        :param func: wrapped function. This function should return PythonDStream object.
-        :param expected: expected output for this testcase.
+        Parameters
+        ----------
+        input : list
+            dataset for the test. This should be list of lists.
+        func : function
+            wrapped function. This function should return PythonDStream object.
+        expected
+            expected output for this testcase.
         """
         if not isinstance(input[0], RDD):
             input = [self.sc.parallelize(d, 1) for d in input]
diff --git a/python/pyspark/testing/utils.py b/python/pyspark/testing/utils.py
index ca5761e8cdaab..f960aa4fee421 100644
--- a/python/pyspark/testing/utils.py
+++ b/python/pyspark/testing/utils.py
@@ -56,18 +56,22 @@ def eventually(condition, timeout=30.0, catch_assertions=False):
     Wait a given amount of time for a condition to pass, else fail with an error.
     This is a helper utility for PySpark tests.
 
-    :param condition: Function that checks for termination conditions.
-                      condition() can return:
-                       - True: Conditions met. Return without error.
-                       - other value: Conditions not met yet. Continue. Upon timeout,
-                                      include last such value in error message.
-                      Note that this method may be called at any time during
-                      streaming execution (e.g., even before any results
-                      have been created).
-    :param timeout: Number of seconds to wait.  Default 30 seconds.
-    :param catch_assertions: If False (default), do not catch AssertionErrors.
-                             If True, catch AssertionErrors; continue, but save
-                             error to throw upon timeout.
+    Parameters
+    ----------
+    condition : function
+        Function that checks for termination conditions. condition() can return:
+            - True: Conditions met. Return without error.
+            - other value: Conditions not met yet. Continue. Upon timeout,
+              include last such value in error message.
+              Note that this method may be called at any time during
+              streaming execution (e.g., even before any results
+              have been created).
+    timeout : int
+        Number of seconds to wait.  Default 30 seconds.
+    catch_assertions : bool
+        If False (default), do not catch AssertionErrors.
+        If True, catch AssertionErrors; continue, but save
+        error to throw upon timeout.
     """
     start_time = time()
     lastValue = None
diff --git a/python/pyspark/util.py b/python/pyspark/util.py
index d2ca484e8ace6..275a72b37be97 100644
--- a/python/pyspark/util.py
+++ b/python/pyspark/util.py
@@ -41,13 +41,14 @@ def majorMinorVersion(sparkVersion):
         Given a Spark version string, return the (major version number, minor version number).
         E.g., for 2.0.1-SNAPSHOT, return (2, 0).
 
+        Examples
+        --------
         >>> sparkVersion = "2.4.0"
         >>> VersionUtils.majorMinorVersion(sparkVersion)
         (2, 4)
         >>> sparkVersion = "2.3.0-SNAPSHOT"
         >>> VersionUtils.majorMinorVersion(sparkVersion)
         (2, 3)
-
         """
         m = re.search(r'^(\d+)\.(\d+)(\..*)?$', sparkVersion)
         if m is not None:
@@ -107,6 +108,8 @@ def _parse_memory(s):
     Parse a memory string in the format supported by Java (e.g. 1g, 200m) and
     return the value in MiB
 
+    Examples
+    --------
     >>> _parse_memory("256m")
     256
     >>> _parse_memory("2g")
@@ -132,9 +135,12 @@ class InheritableThread(threading.Thread):
 
     When the pinned thread mode is off, this works as :class:`threading.Thread`.
 
-    .. note:: Experimental
-
     .. versionadded:: 3.1.0
+
+
+    Notes
+    -----
+    This API is experimental.
     """
     def __init__(self, target, *args, **kwargs):
         from pyspark import SparkContext
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 6362839d96242..704e96ba0666b 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -387,7 +387,10 @@ def extract_key_value_indexes(grouped_arg_offsets):
         Helper function to extract the key and value indexes from arg_offsets for the grouped and
         cogrouped pandas udfs. See BasePandasGroupExec.resolveArgOffsets for equivalent scala code.
 
-        :param grouped_arg_offsets:  List containing the key and value indexes of columns of the
+        Parameters
+        ----------
+        grouped_arg_offsets:  list
+            List containing the key and value indexes of columns of the
             DataFrames to be passed to the udf. It consists of n repeating groups where n is the
             number of DataFrames.  Each group has the following format:
                 group[0]: length of group

From 0933f1c6c27c7a087c8dbe485bd08371ce5f5695 Mon Sep 17 00:00:00 2001
From: aof00 <x14562573449@gmail.com>
Date: Mon, 16 Nov 2020 10:32:00 +0900
Subject: [PATCH 0476/1009] [SPARK-33451][DOCS] Change to
 'spark.sql.adaptive.skewJoin.skewedPartitionThresholdInBytes' in
 documentation

### What changes were proposed in this pull request?

In the 'Optimizing Skew Join' section of the following two pages:
1. [https://spark.apache.org/docs/3.0.0/sql-performance-tuning.html](https://spark.apache.org/docs/3.0.0/sql-performance-tuning.html)
2. [https://spark.apache.org/docs/3.0.1/sql-performance-tuning.html](https://spark.apache.org/docs/3.0.1/sql-performance-tuning.html)

The configuration 'spark.sql.adaptive.skewedPartitionThresholdInBytes' should be changed to 'spark.sql.adaptive.skewJoin.skewedPartitionThresholdInBytes', The former is missing the 'skewJoin'.

### Why are the changes needed?

To document the correct name of configuration

### Does this PR introduce _any_ user-facing change?

Yes, this is a user-facing doc change.

### How was this patch tested?

Jenkins / CI builds in this PR.

Closes #30376 from aof00/doc_change.

Authored-by: aof00 <x14562573449@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/sql-performance-tuning.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/sql-performance-tuning.md b/docs/sql-performance-tuning.md
index 5d8c3b698c703..49b32e7562e0b 100644
--- a/docs/sql-performance-tuning.md
+++ b/docs/sql-performance-tuning.md
@@ -280,7 +280,7 @@ Data skew can severely downgrade the performance of join queries. This feature d
        <td><code>spark.sql.adaptive.skewJoin.skewedPartitionFactor</code></td>
        <td>10</td>
        <td>
-         A partition is considered as skewed if its size is larger than this factor multiplying the median partition size and also larger than <code>spark.sql.adaptive.skewedPartitionThresholdInBytes</code>.
+         A partition is considered as skewed if its size is larger than this factor multiplying the median partition size and also larger than <code>spark.sql.adaptive.skewJoin.skewedPartitionThresholdInBytes</code>.
        </td>
        <td>3.0.0</td>
      </tr>

From 236c6c9f7c932fdcd285988e2dff1185187f6617 Mon Sep 17 00:00:00 2001
From: itholic <haejoon309@naver.com>
Date: Mon, 16 Nov 2020 10:44:57 +0900
Subject: [PATCH 0477/1009] [SPARK-33253][PYTHON][DOCS] Migration to NumPy
 documentation style in Streaming (pyspark.streaming.*)

### What changes were proposed in this pull request?

This PR proposes to migrate to [NumPy documentation style](https://numpydoc.readthedocs.io/en/latest/format.html), see also [SPARK-33243](https://issues.apache.org/jira/browse/SPARK-33243).

### Why are the changes needed?

For better documentation as text itself, and generated HTMLs

### Does this PR introduce _any_ user-facing change?

Yes, they will see a better format of HTMLs, and better text format. See [SPARK-33243](https://issues.apache.org/jira/browse/SPARK-33243).

### How was this patch tested?

Manually tested via running ./dev/lint-python.

Closes #30346 from itholic/SPARK-32085.

Lead-authored-by: itholic <haejoon309@naver.com>
Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/streaming/context.py  | 103 ++++++++++++++-------
 python/pyspark/streaming/context.pyi |   2 +-
 python/pyspark/streaming/dstream.py  | 128 ++++++++++++++++++---------
 python/pyspark/streaming/dstream.pyi |   2 +-
 python/pyspark/streaming/kinesis.py  |  83 ++++++++++-------
 python/pyspark/streaming/kinesis.pyi |  43 ++++-----
 python/pyspark/streaming/util.py     |   2 +
 7 files changed, 234 insertions(+), 129 deletions(-)

diff --git a/python/pyspark/streaming/context.py b/python/pyspark/streaming/context.py
index 170f0c0ef7593..c4dc0d3af3332 100644
--- a/python/pyspark/streaming/context.py
+++ b/python/pyspark/streaming/context.py
@@ -36,6 +36,14 @@ class StreamingContext(object):
     be started and stopped using `context.start()` and `context.stop()`,
     respectively. `context.awaitTermination()` allows the current thread
     to wait for the termination of the context by `stop()` or by an exception.
+
+    Parameters
+    ----------
+    sparkContext : :class:`SparkContext`
+        SparkContext object.
+    batchDuration : int, optional
+        the time interval (in seconds) at which streaming
+        data will be divided into batches
     """
     _transformerSerializer = None
 
@@ -43,13 +51,6 @@ class StreamingContext(object):
     _activeContext = None
 
     def __init__(self, sparkContext, batchDuration=None, jssc=None):
-        """
-        Create a new StreamingContext.
-
-        :param sparkContext: :class:`SparkContext` object.
-        :param batchDuration: the time interval (in seconds) at which streaming
-                              data will be divided into batches
-        """
 
         self._sc = sparkContext
         self._jvm = self._sc._jvm
@@ -90,8 +91,12 @@ def getOrCreate(cls, checkpointPath, setupFunc):
         recreated from the checkpoint data. If the data does not exist, then the provided setupFunc
         will be used to create a new context.
 
-        :param checkpointPath: Checkpoint directory used in an earlier streaming program
-        :param setupFunc:      Function to create a new context and setup DStreams
+        Parameters
+        ----------
+        checkpointPath : str
+            Checkpoint directory used in an earlier streaming program
+        setupFunc : function
+            Function to create a new context and setup DStreams
         """
         cls._ensure_initialized()
         gw = SparkContext._gateway
@@ -147,10 +152,14 @@ def getActiveOrCreate(cls, checkpointPath, setupFunc):
         valid checkpoint data, then setupFunc will be called to create a new context and setup
         DStreams.
 
-        :param checkpointPath: Checkpoint directory used in an earlier streaming program. Can be
-                               None if the intention is to always create a new context when there
-                               is no active context.
-        :param setupFunc:      Function to create a new JavaStreamingContext and setup DStreams
+        Parameters
+        ----------
+        checkpointPath : str
+            Checkpoint directory used in an earlier streaming program. Can be
+            None if the intention is to always create a new context when there
+            is no active context.
+        setupFunc : function
+            Function to create a new JavaStreamingContext and setup DStreams
         """
 
         if setupFunc is None:
@@ -181,7 +190,10 @@ def awaitTermination(self, timeout=None):
         """
         Wait for the execution to stop.
 
-        :param timeout: time to wait in seconds
+        Parameters
+        ----------
+        timeout : int, optional
+            time to wait in seconds
         """
         if timeout is None:
             self._jssc.awaitTermination()
@@ -194,7 +206,10 @@ def awaitTerminationOrTimeout(self, timeout):
         throw the reported error during the execution; or `false` if the
         waiting time elapsed before returning from the method.
 
-        :param timeout: time to wait in seconds
+        Parameters
+        ----------
+        timeout : int
+            time to wait in seconds
         """
         return self._jssc.awaitTerminationOrTimeout(int(timeout * 1000))
 
@@ -203,9 +218,13 @@ def stop(self, stopSparkContext=True, stopGraceFully=False):
         Stop the execution of the streams, with option of ensuring all
         received data has been processed.
 
-        :param stopSparkContext: Stop the associated SparkContext or not
-        :param stopGracefully: Stop gracefully by waiting for the processing
-                              of all received data to be completed
+        Parameters
+        ----------
+        stopSparkContext : bool, optional
+            Stop the associated SparkContext or not
+        stopGracefully : bool, optional
+            Stop gracefully by waiting for the processing of all received
+            data to be completed
         """
         self._jssc.stop(stopSparkContext, stopGraceFully)
         StreamingContext._activeContext = None
@@ -221,8 +240,10 @@ def remember(self, duration):
         the RDDs (if the developer wishes to query old data outside the
         DStream computation).
 
-        :param duration: Minimum duration (in seconds) that each DStream
-                        should remember its RDDs
+        Parameters
+        ----------
+        duration : int
+            Minimum duration (in seconds) that each DStream should remember its RDDs
         """
         self._jssc.remember(self._jduration(duration))
 
@@ -231,8 +252,10 @@ def checkpoint(self, directory):
         Sets the context to periodically checkpoint the DStream operations for master
         fault-tolerance. The graph will be checkpointed every batch interval.
 
-        :param directory: HDFS-compatible directory where the checkpoint data
-                         will be reliably stored
+        Parameters
+        ----------
+        directory : str
+            HDFS-compatible directory where the checkpoint data will be reliably stored
         """
         self._jssc.checkpoint(directory)
 
@@ -242,9 +265,14 @@ def socketTextStream(self, hostname, port, storageLevel=StorageLevel.MEMORY_AND_
         a TCP socket and receive byte is interpreted as UTF8 encoded ``\\n`` delimited
         lines.
 
-        :param hostname:      Hostname to connect to for receiving data
-        :param port:          Port to connect to for receiving data
-        :param storageLevel:  Storage level to use for storing the received objects
+        Parameters
+        ----------
+        hostname : str
+            Hostname to connect to for receiving data
+        port : int
+            Port to connect to for receiving data
+        storageLevel : :class:`pyspark.StorageLevel`, optional
+            Storage level to use for storing the received objects
         """
         jlevel = self._sc._getJavaStorageLevel(storageLevel)
         return DStream(self._jssc.socketTextStream(hostname, port, jlevel), self,
@@ -268,8 +296,12 @@ def binaryRecordsStream(self, directory, recordLength):
         them from another location within the same file system.
         File names starting with . are ignored.
 
-        :param directory:       Directory to load data from
-        :param recordLength:    Length of each record in bytes
+        Parameters
+        ----------
+        directory : str
+            Directory to load data from
+        recordLength : int
+            Length of each record in bytes
         """
         return DStream(self._jssc.binaryRecordsStream(directory, recordLength), self,
                        NoOpSerializer())
@@ -286,11 +318,18 @@ def queueStream(self, rdds, oneAtATime=True, default=None):
         Create an input stream from a queue of RDDs or list. In each batch,
         it will process either one or all of the RDDs returned by the queue.
 
-        .. note:: Changes to the queue after the stream is created will not be recognized.
-
-        :param rdds:       Queue of RDDs
-        :param oneAtATime: pick one rdd each time or pick all of them once.
-        :param default:    The default rdd if no more in rdds
+        Parameters
+        ----------
+        rdds : list
+            Queue of RDDs
+        oneAtATime : bool, optional
+            pick one rdd each time or pick all of them once.
+        default : :class:`pyspark.RDD`, optional
+            The default rdd if no more in rdds
+
+        Notes
+        -----
+        Changes to the queue after the stream is created will not be recognized.
         """
         if default and not isinstance(default, RDD):
             default = self._sc.parallelize(default)
diff --git a/python/pyspark/streaming/context.pyi b/python/pyspark/streaming/context.pyi
index f4b3dad38f1fb..026163fc9a1db 100644
--- a/python/pyspark/streaming/context.pyi
+++ b/python/pyspark/streaming/context.pyi
@@ -32,7 +32,7 @@ class StreamingContext:
     def __init__(
         self,
         sparkContext: SparkContext,
-        batchDuration: Union[float, int] = ...,
+        batchDuration: int = ...,
         jssc: Optional[JavaObject] = ...,
     ) -> None: ...
     @classmethod
diff --git a/python/pyspark/streaming/dstream.py b/python/pyspark/streaming/dstream.py
index f0067026ff594..afb85709c771c 100644
--- a/python/pyspark/streaming/dstream.py
+++ b/python/pyspark/streaming/dstream.py
@@ -161,7 +161,10 @@ def pprint(self, num=10):
         """
         Print the first num elements of each RDD generated in this DStream.
 
-        :param num: the number of elements from the first will be printed.
+        Parameters
+        ----------
+        num : int, optional
+            the number of elements from the first will be printed.
         """
         def takeAndPrint(time, rdd):
             taken = rdd.take(num + 1)
@@ -223,8 +226,11 @@ def checkpoint(self, interval):
         """
         Enable periodic checkpointing of RDDs of this DStream
 
-        :param interval: time in seconds, after each period of that, generated
-                         RDD will be checkpointed
+        Parameters
+        ----------
+        interval : int
+            time in seconds, after each period of that, generated
+            RDD will be checkpointed
         """
         self.is_checkpointed = True
         self._jdstream.checkpoint(self._ssc._jduration(interval))
@@ -327,8 +333,11 @@ def union(self, other):
         """
         Return a new DStream by unifying data of another DStream with this DStream.
 
-        :param other: Another DStream having the same interval (i.e., slideDuration)
-                     as this DStream.
+        Parameters
+        ----------
+        other : :class:`DStream`
+            Another DStream having the same interval (i.e., slideDuration)
+            as this DStream.
         """
         if self._slideDuration != other._slideDuration:
             raise ValueError("the two DStream should have same slide duration")
@@ -423,11 +432,15 @@ def window(self, windowDuration, slideDuration=None):
         Return a new DStream in which each RDD contains all the elements in seen in a
         sliding window of time over this DStream.
 
-        :param windowDuration: width of the window; must be a multiple of this DStream's
-                              batching interval
-        :param slideDuration:  sliding interval of the window (i.e., the interval after which
-                              the new DStream will generate RDDs); must be a multiple of this
-                              DStream's batching interval
+        Parameters
+        ----------
+        windowDuration : int
+            width of the window; must be a multiple of this DStream's
+            batching interval
+        slideDuration : int, optional
+            sliding interval of the window (i.e., the interval after which
+            the new DStream will generate RDDs); must be a multiple of this
+            DStream's batching interval
         """
         self._validate_window_param(windowDuration, slideDuration)
         d = self._ssc._jduration(windowDuration)
@@ -449,15 +462,21 @@ def reduceByWindow(self, reduceFunc, invReduceFunc, windowDuration, slideDuratio
         2. "inverse reduce" the old values that left the window (e.g., subtracting old counts)
         This is more efficient than `invReduceFunc` is None.
 
-        :param reduceFunc:     associative and commutative reduce function
-        :param invReduceFunc:  inverse reduce function of `reduceFunc`; such that for all y,
-                               and invertible x:
-                               `invReduceFunc(reduceFunc(x, y), x) = y`
-        :param windowDuration: width of the window; must be a multiple of this DStream's
-                               batching interval
-        :param slideDuration:  sliding interval of the window (i.e., the interval after which
-                               the new DStream will generate RDDs); must be a multiple of this
-                               DStream's batching interval
+        Parameters
+        ----------
+        reduceFunc : function
+            associative and commutative reduce function
+        invReduceFunc : function
+            inverse reduce function of `reduceFunc`; such that for all y,
+            and invertible x:
+            `invReduceFunc(reduceFunc(x, y), x) = y`
+        windowDuration : int
+            width of the window; must be a multiple of this DStream's
+            batching interval
+        slideDuration : int
+            sliding interval of the window (i.e., the interval after which
+            the new DStream will generate RDDs); must be a multiple of this
+            DStream's batching interval
         """
         keyed = self.map(lambda x: (1, x))
         reduced = keyed.reduceByKeyAndWindow(reduceFunc, invReduceFunc,
@@ -481,12 +500,17 @@ def countByValueAndWindow(self, windowDuration, slideDuration, numPartitions=Non
         Return a new DStream in which each RDD contains the count of distinct elements in
         RDDs in a sliding window over this DStream.
 
-        :param windowDuration: width of the window; must be a multiple of this DStream's
-                              batching interval
-        :param slideDuration:  sliding interval of the window (i.e., the interval after which
-                              the new DStream will generate RDDs); must be a multiple of this
-                              DStream's batching interval
-        :param numPartitions:  number of partitions of each RDD in the new DStream.
+        Parameters
+        ----------
+        windowDuration : int
+            width of the window; must be a multiple of this DStream's
+            batching interval
+        slideDuration : int
+            sliding interval of the window (i.e., the interval after which
+            the new DStream will generate RDDs); must be a multiple of this
+            DStream's batching interval
+        numPartitions : int, optional
+            number of partitions of each RDD in the new DStream.
         """
         keyed = self.map(lambda x: (x, 1))
         counted = keyed.reduceByKeyAndWindow(operator.add, operator.sub,
@@ -498,12 +522,17 @@ def groupByKeyAndWindow(self, windowDuration, slideDuration, numPartitions=None)
         Return a new DStream by applying `groupByKey` over a sliding window.
         Similar to `DStream.groupByKey()`, but applies it over a sliding window.
 
-        :param windowDuration: width of the window; must be a multiple of this DStream's
-                              batching interval
-        :param slideDuration:  sliding interval of the window (i.e., the interval after which
-                              the new DStream will generate RDDs); must be a multiple of this
-                              DStream's batching interval
-        :param numPartitions:  Number of partitions of each RDD in the new DStream.
+        Parameters
+        ----------
+        windowDuration : int
+            width of the window; must be a multiple of this DStream's
+            batching interval
+        slideDuration : int
+            sliding interval of the window (i.e., the interval after which
+            the new DStream will generate RDDs); must be a multiple of this
+            DStream's batching interval
+        numPartitions : int, optional
+            Number of partitions of each RDD in the new DStream.
         """
         ls = self.mapValues(lambda x: [x])
         grouped = ls.reduceByKeyAndWindow(lambda a, b: a.extend(b) or a, lambda a, b: a[len(b):],
@@ -522,17 +551,25 @@ def reduceByKeyAndWindow(self, func, invFunc, windowDuration, slideDuration=None
         `invFunc` can be None, then it will reduce all the RDDs in window, could be slower
         than having `invFunc`.
 
-        :param func:           associative and commutative reduce function
-        :param invFunc:        inverse function of `reduceFunc`
-        :param windowDuration: width of the window; must be a multiple of this DStream's
-                              batching interval
-        :param slideDuration:  sliding interval of the window (i.e., the interval after which
-                              the new DStream will generate RDDs); must be a multiple of this
-                              DStream's batching interval
-        :param numPartitions:  number of partitions of each RDD in the new DStream.
-        :param filterFunc:     function to filter expired key-value pairs;
-                              only pairs that satisfy the function are retained
-                              set this to null if you do not want to filter
+        Parameters
+        ----------
+        func : function
+            associative and commutative reduce function
+        invFunc : function
+            inverse function of `reduceFunc`
+        windowDuration : int
+            width of the window; must be a multiple of this DStream's
+            batching interval
+        slideDuration : int, optional
+            sliding interval of the window (i.e., the interval after which
+            the new DStream will generate RDDs); must be a multiple of this
+            DStream's batching interval
+        numPartitions : int, optional
+            number of partitions of each RDD in the new DStream.
+        filterFunc : function, optional
+            function to filter expired key-value pairs;
+            only pairs that satisfy the function are retained
+            set this to null if you do not want to filter
         """
         self._validate_window_param(windowDuration, slideDuration)
         if numPartitions is None:
@@ -572,8 +609,11 @@ def updateStateByKey(self, updateFunc, numPartitions=None, initialRDD=None):
         Return a new "state" DStream where the state for each key is updated by applying
         the given function on the previous state of the key and the new values of the key.
 
-        :param updateFunc: State update function. If this function returns None, then
-                           corresponding state key-value pair will be eliminated.
+        Parameters
+        ----------
+        updateFunc : function
+            State update function. If this function returns None, then
+            corresponding state key-value pair will be eliminated.
         """
         if numPartitions is None:
             numPartitions = self._sc.defaultParallelism
diff --git a/python/pyspark/streaming/dstream.pyi b/python/pyspark/streaming/dstream.pyi
index bbeea69ee9ac2..7b76ce4c65233 100644
--- a/python/pyspark/streaming/dstream.pyi
+++ b/python/pyspark/streaming/dstream.pyi
@@ -94,7 +94,7 @@ class DStream(Generic[T]):
     def glom(self) -> DStream[List[T]]: ...
     def cache(self) -> DStream[T]: ...
     def persist(self, storageLevel: StorageLevel) -> DStream[T]: ...
-    def checkpoint(self, interval: Union[float, int]) -> DStream[T]: ...
+    def checkpoint(self, interval: int) -> DStream[T]: ...
     def groupByKey(
         self: DStream[Tuple[K, V]], numPartitions: Optional[int] = ...
     ) -> DStream[Tuple[K, Iterable[V]]]: ...
diff --git a/python/pyspark/streaming/kinesis.py b/python/pyspark/streaming/kinesis.py
index 729ec97505aad..25a1a72666ce5 100644
--- a/python/pyspark/streaming/kinesis.py
+++ b/python/pyspark/streaming/kinesis.py
@@ -43,38 +43,59 @@ def createStream(ssc, kinesisAppName, streamName, endpointUrl, regionName,
         Create an input stream that pulls messages from a Kinesis stream. This uses the
         Kinesis Client Library (KCL) to pull messages from Kinesis.
 
-        .. note:: The given AWS credentials will get saved in DStream checkpoints if checkpointing
-            is enabled. Make sure that your checkpoint directory is secure.
+        Parameters
+        ----------
+        ssc : :class:`StreamingContext`
+            StreamingContext object
+        kinesisAppName : str
+            Kinesis application name used by the Kinesis Client Library (KCL) to
+            update DynamoDB
+        streamName : str
+            Kinesis stream name
+        endpointUrl : str
+            Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
+        regionName : str
+            Name of region used by the Kinesis Client Library (KCL) to update
+            DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
+        initialPositionInStream : int
+            In the absence of Kinesis checkpoint info, this is the
+            worker's initial starting position in the stream. The
+            values are either the beginning of the stream per Kinesis'
+            limit of 24 hours (InitialPositionInStream.TRIM_HORIZON) or
+            the tip of the stream (InitialPositionInStream.LATEST).
+        checkpointInterval : int
+            Checkpoint interval(in seconds) for Kinesis checkpointing. See the Kinesis
+            Spark Streaming documentation for more details on the different
+            types of checkpoints.
+        storageLevel : :class:`pyspark.StorageLevel`, optional
+            Storage level to use for storing the received objects (default is
+            StorageLevel.MEMORY_AND_DISK_2)
+        awsAccessKeyId : str, optional
+            AWS AccessKeyId (default is None. If None, will use
+            DefaultAWSCredentialsProviderChain)
+        awsSecretKey : str, optional
+            AWS SecretKey (default is None. If None, will use
+            DefaultAWSCredentialsProviderChain)
+        decoder : function, optional
+            A function used to decode value (default is utf8_decoder)
+        stsAssumeRoleArn : str, optional
+            ARN of IAM role to assume when using STS sessions to read from
+            the Kinesis stream (default is None).
+        stsSessionName : str, optional
+            Name to uniquely identify STS sessions used to read from Kinesis
+            stream, if STS is being used (default is None).
+        stsExternalId : str, optional
+            External ID that can be used to validate against the assumed IAM
+            role's trust policy, if STS is being used (default is None).
 
-        :param ssc:  StreamingContext object
-        :param kinesisAppName:  Kinesis application name used by the Kinesis Client Library (KCL) to
-                                update DynamoDB
-        :param streamName:  Kinesis stream name
-        :param endpointUrl:  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
-        :param regionName:  Name of region used by the Kinesis Client Library (KCL) to update
-                            DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
-        :param initialPositionInStream:  In the absence of Kinesis checkpoint info, this is the
-                                         worker's initial starting position in the stream. The
-                                         values are either the beginning of the stream per Kinesis'
-                                         limit of 24 hours (InitialPositionInStream.TRIM_HORIZON) or
-                                         the tip of the stream (InitialPositionInStream.LATEST).
-        :param checkpointInterval:  Checkpoint interval for Kinesis checkpointing. See the Kinesis
-                                    Spark Streaming documentation for more details on the different
-                                    types of checkpoints.
-        :param storageLevel:  Storage level to use for storing the received objects (default is
-                              StorageLevel.MEMORY_AND_DISK_2)
-        :param awsAccessKeyId:  AWS AccessKeyId (default is None. If None, will use
-                                DefaultAWSCredentialsProviderChain)
-        :param awsSecretKey:  AWS SecretKey (default is None. If None, will use
-                              DefaultAWSCredentialsProviderChain)
-        :param decoder:  A function used to decode value (default is utf8_decoder)
-        :param stsAssumeRoleArn: ARN of IAM role to assume when using STS sessions to read from
-                                 the Kinesis stream (default is None).
-        :param stsSessionName: Name to uniquely identify STS sessions used to read from Kinesis
-                               stream, if STS is being used (default is None).
-        :param stsExternalId: External ID that can be used to validate against the assumed IAM
-                              role's trust policy, if STS is being used (default is None).
-        :return: A DStream object
+        Returns
+        -------
+        A DStream object
+
+        Notes
+        -----
+        The given AWS credentials will get saved in DStream checkpoints if checkpointing
+        is enabled. Make sure that your checkpoint directory is secure.
         """
         jlevel = ssc._sc._getJavaStorageLevel(storageLevel)
         jduration = ssc._jduration(checkpointInterval)
diff --git a/python/pyspark/streaming/kinesis.pyi b/python/pyspark/streaming/kinesis.pyi
index 246fa58ca6da3..af7cd6f6ec13c 100644
--- a/python/pyspark/streaming/kinesis.pyi
+++ b/python/pyspark/streaming/kinesis.pyi
@@ -16,31 +16,34 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# NOTE: This dynamically typed stub was automatically generated by stubgen.
+from typing import Any, Callable, Optional, TypeVar
+from pyspark.storagelevel import StorageLevel
+from pyspark.streaming.context import StreamingContext
+from pyspark.streaming.dstream import DStream
 
-from typing import Any, Optional
+T = TypeVar("T")
 
-def utf8_decoder(s): ...
+def utf8_decoder(s: Optional[bytes]) -> str: ...
 
 class KinesisUtils:
     @staticmethod
     def createStream(
-        ssc,
-        kinesisAppName,
-        streamName,
-        endpointUrl,
-        regionName,
-        initialPositionInStream,
-        checkpointInterval,
-        storageLevel: Any = ...,
-        awsAccessKeyId: Optional[Any] = ...,
-        awsSecretKey: Optional[Any] = ...,
-        decoder: Any = ...,
-        stsAssumeRoleArn: Optional[Any] = ...,
-        stsSessionName: Optional[Any] = ...,
-        stsExternalId: Optional[Any] = ...,
-    ): ...
+        ssc: StreamingContext,
+        kinesisAppName: str,
+        streamName: str,
+        endpointUrl: str,
+        regionName: str,
+        initialPositionInStream: str,
+        checkpointInterval: int,
+        storageLevel: StorageLevel = ...,
+        awsAccessKeyId: Optional[str] = ...,
+        awsSecretKey: Optional[str] = ...,
+        decoder: Callable[[Optional[bytes]], T] = ...,
+        stsAssumeRoleArn: Optional[str] = ...,
+        stsSessionName: Optional[str] = ...,
+        stsExternalId: Optional[str] = ...,
+    ) -> DStream[T]: ...
 
 class InitialPositionInStream:
-    LATEST: Any
-    TRIM_HORIZON: Any
+    LATEST: int
+    TRIM_HORIZON: int
diff --git a/python/pyspark/streaming/util.py b/python/pyspark/streaming/util.py
index b4b9f97feb7ca..84de9445761d4 100644
--- a/python/pyspark/streaming/util.py
+++ b/python/pyspark/streaming/util.py
@@ -139,6 +139,8 @@ def rddToFileName(prefix, suffix, timestamp):
     """
     Return string prefix-time(.suffix)
 
+    Examples
+    --------
     >>> rddToFileName("spark", None, 12345678910)
     'spark-12345678910'
     >>> rddToFileName("spark", "tmp", 12345678910)

From f660946ef29a985b53ceff948001a9db64279d67 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Mon, 16 Nov 2020 11:28:52 +0800
Subject: [PATCH 0478/1009] [SPARK-33288][YARN][FOLLOW-UP][TEST-HADOOP2.7] Fix
 type mismatch error

### What changes were proposed in this pull request?

This pr fix type mismatch error:
```
[error] /home/jenkins/workspace/spark-master-test-sbt-hadoop-2.7-hive-2.3/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala:320:52: type mismatch;
[error]  found   : Long
[error]  required: Int
[error]         Resource.newInstance(resourcesWithDefaults.totalMemMiB, resourcesWithDefaults.cores)
[error]                                                    ^
[error] one error found
```

### Why are the changes needed?

Fix compile issue.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing test.

Closes #30375 from wangyum/SPARK-33288.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Yuming Wang <yumwang@ebay.com>
---
 .../main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index c3b7cc0d7d331..57af76b46fe64 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -317,7 +317,7 @@ private[yarn] class YarnAllocator(
         customSparkResources
       }
       val resource =
-        Resource.newInstance(resourcesWithDefaults.totalMemMiB, resourcesWithDefaults.cores)
+        Resource.newInstance(resourcesWithDefaults.totalMemMiB.toInt, resourcesWithDefaults.cores)
       ResourceRequestHelper.setResourceRequests(customResources, resource)
       logDebug(s"Created resource capability: $resource")
       rpIdToYarnResource.putIfAbsent(rp.id, resource)

From 10105b555d0f51a680302b438d0fb06b463bd530 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 16 Nov 2020 15:06:51 +0900
Subject: [PATCH 0479/1009] [SPARK-33454][INFRA] Add GitHub Action job for
 Hadoop 2

### What changes were proposed in this pull request?

This PR aims to protect `Hadoop 2.x` profile compilation in Apache Spark 3.1+.

### Why are the changes needed?

Since Apache Spark 3.1+ switch our default profile to Hadoop 3, we had better prevent at least compilation error with `Hadoop 2.x` profile at the PR review phase. Although this is an additional workload, it will finish quickly because it's compilation only.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the GitHub Action.
- This should be merged after https://github.com/apache/spark/pull/30375 .

Closes #30378 from dongjoon-hyun/SPARK-33454.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .github/workflows/build_and_test.yml | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 0918ee111b536..7a6c49f9135d0 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -14,6 +14,28 @@ on:
         required: true
 
 jobs:
+  # This is on the top to give the most visibility in case of failures
+  hadoop-2:
+    name: Hadoop 2 build
+    runs-on: ubuntu-20.04
+    steps:
+    - name: Checkout Spark repository
+      uses: actions/checkout@v2
+    - name: Cache Coursier local repository
+      uses: actions/cache@v2
+      with:
+        path: ~/.cache/coursier
+        key: hadoop-2-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
+        restore-keys: |
+          hadoop-2-coursier-
+    - name: Install Java 8
+      uses: actions/setup-java@v1
+      with:
+        java-version: 1.8
+    - name: Build with SBT
+      run: |
+        ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Phadoop-2.7 compile test:compile
+
   # Build: build Spark and run the tests for specified modules.
   build:
     name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"

From d4cf1483fd940fad1ffcc8910fb10f6b894d2f68 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 16 Nov 2020 15:47:35 +0900
Subject: [PATCH 0480/1009] [SPARK-33456][SQL][TEST] Add end-to-end test for
 subexpression elimination

### What changes were proposed in this pull request?

This patch proposes to add end-to-end test for subexpression elimination.

### Why are the changes needed?

We have subexpression elimination feature for expression evaluation but we don't have end-to-end tests for the feature. We should have one to make sure we don't break it.

### Does this PR introduce _any_ user-facing change?

No, dev only.

### How was this patch tested?

Unit tests.

Closes #30381 from viirya/SPARK-33456.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../sql-tests/inputs/subexp-elimination.sql   |  37 ++++++
 .../results/subexp-elimination.sql.out        | 116 ++++++++++++++++++
 2 files changed, 153 insertions(+)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/subexp-elimination.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out

diff --git a/sql/core/src/test/resources/sql-tests/inputs/subexp-elimination.sql b/sql/core/src/test/resources/sql-tests/inputs/subexp-elimination.sql
new file mode 100644
index 0000000000000..c769933f344fb
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/subexp-elimination.sql
@@ -0,0 +1,37 @@
+-- Test for subexpression elimination.
+
+--SET spark.sql.optimizer.enableJsonExpressionOptimization=false
+
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false
+
+--CONFIG_DIM2 spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM2 spark.sql.codegen.factoryMode=NO_CODEGEN
+
+--CONFIG_DIM3 SUBEXPRESSION_ELIMINATION_ENABLED=true
+--CONFIG_DIM3 SUBEXPRESSION_ELIMINATION_ENABLED=false
+
+-- Test data.
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+('{"a":1, "b":"2"}', '[{"a": 1, "b":2}, {"a":2, "b":2}]'), ('{"a":1, "b":"2"}', null), ('{"a":2, "b":"3"}', '[{"a": 3, "b":4}, {"a":4, "b":5}]'), ('{"a":5, "b":"6"}', '[{"a": 6, "b":7}, {"a":8, "b":9}]'), (null, '[{"a": 1, "b":2}, {"a":2, "b":2}]')
+AS testData(a, b);
+
+SELECT from_json(a, 'struct<a:int,b:string>').a, from_json(a, 'struct<a:int,b:string>').b, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].b FROM testData;
+
+SELECT if(from_json(a, 'struct<a:int,b:string>').a > 1, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].a + 1) FROM testData;
+
+SELECT if(isnull(from_json(a, 'struct<a:int,b:string>').a), from_json(b, 'array<struct<a:int,b:int>>')[0].b + 1, from_json(b, 'array<struct<a:int,b:int>>')[0].b) FROM testData;
+
+SELECT case when from_json(a, 'struct<a:int,b:string>').a > 5 then from_json(a, 'struct<a:int,b:string>').b when from_json(a, 'struct<a:int,b:string>').a > 4 then from_json(a, 'struct<a:int,b:string>').b + 1 else from_json(a, 'struct<a:int,b:string>').b + 2 end FROM testData;
+
+SELECT case when from_json(a, 'struct<a:int,b:string>').a > 5 then from_json(b, 'array<struct<a:int,b:int>>')[0].b when from_json(a, 'struct<a:int,b:string>').a > 4 then from_json(b, 'array<struct<a:int,b:int>>')[0].b + 1 else from_json(b, 'array<struct<a:int,b:int>>')[0].b + 2 end FROM testData;
+
+-- With non-deterministic expressions.
+SELECT from_json(a, 'struct<a:int,b:string>').a + random() > 2, from_json(a, 'struct<a:int,b:string>').b, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].b + + random() > 2 FROM testData;
+
+SELECT if(from_json(a, 'struct<a:int,b:string>').a + random() > 5, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].a + 1) FROM testData;
+
+SELECT case when from_json(a, 'struct<a:int,b:string>').a > 5 then from_json(a, 'struct<a:int,b:string>').b + random() > 5 when from_json(a, 'struct<a:int,b:string>').a > 4 then from_json(a, 'struct<a:int,b:string>').b + 1 + random() > 2 else from_json(a, 'struct<a:int,b:string>').b + 2 + random() > 5 end FROM testData;
+
+-- Clean up
+DROP VIEW IF EXISTS testData;
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out b/sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out
new file mode 100644
index 0000000000000..d161eef79b984
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out
@@ -0,0 +1,116 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 10
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+('{"a":1, "b":"2"}', '[{"a": 1, "b":2}, {"a":2, "b":2}]'), ('{"a":1, "b":"2"}', null), ('{"a":2, "b":"3"}', '[{"a": 3, "b":4}, {"a":4, "b":5}]'), ('{"a":5, "b":"6"}', '[{"a": 6, "b":7}, {"a":8, "b":9}]'), (null, '[{"a": 1, "b":2}, {"a":2, "b":2}]')
+AS testData(a, b)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT from_json(a, 'struct<a:int,b:string>').a, from_json(a, 'struct<a:int,b:string>').b, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].b FROM testData
+-- !query schema
+struct<from_json(a).a:int,from_json(a).b:string,from_json(b)[0].a:int,from_json(b)[0].b:int>
+-- !query output
+1	2	1	2
+1	2	NULL	NULL
+2	3	3	4
+5	6	6	7
+NULL	NULL	1	2
+
+
+-- !query
+SELECT if(from_json(a, 'struct<a:int,b:string>').a > 1, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].a + 1) FROM testData
+-- !query schema
+struct<(IF((from_json(a).a > 1), from_json(b)[0].a, (from_json(b)[0].a + 1))):int>
+-- !query output
+2
+2
+3
+6
+NULL
+
+
+-- !query
+SELECT if(isnull(from_json(a, 'struct<a:int,b:string>').a), from_json(b, 'array<struct<a:int,b:int>>')[0].b + 1, from_json(b, 'array<struct<a:int,b:int>>')[0].b) FROM testData
+-- !query schema
+struct<(IF((from_json(a).a IS NULL), (from_json(b)[0].b + 1), from_json(b)[0].b)):int>
+-- !query output
+2
+3
+4
+7
+NULL
+
+
+-- !query
+SELECT case when from_json(a, 'struct<a:int,b:string>').a > 5 then from_json(a, 'struct<a:int,b:string>').b when from_json(a, 'struct<a:int,b:string>').a > 4 then from_json(a, 'struct<a:int,b:string>').b + 1 else from_json(a, 'struct<a:int,b:string>').b + 2 end FROM testData
+-- !query schema
+struct<CASE WHEN (from_json(a).a > 5) THEN from_json(a).b WHEN (from_json(a).a > 4) THEN CAST((CAST(from_json(a).b AS DOUBLE) + CAST(1 AS DOUBLE)) AS STRING) ELSE CAST((CAST(from_json(a).b AS DOUBLE) + CAST(2 AS DOUBLE)) AS STRING) END:string>
+-- !query output
+4.0
+4.0
+5.0
+7.0
+NULL
+
+
+-- !query
+SELECT case when from_json(a, 'struct<a:int,b:string>').a > 5 then from_json(b, 'array<struct<a:int,b:int>>')[0].b when from_json(a, 'struct<a:int,b:string>').a > 4 then from_json(b, 'array<struct<a:int,b:int>>')[0].b + 1 else from_json(b, 'array<struct<a:int,b:int>>')[0].b + 2 end FROM testData
+-- !query schema
+struct<CASE WHEN (from_json(a).a > 5) THEN from_json(b)[0].b WHEN (from_json(a).a > 4) THEN (from_json(b)[0].b + 1) ELSE (from_json(b)[0].b + 2) END:int>
+-- !query output
+4
+4
+6
+8
+NULL
+
+
+-- !query
+SELECT from_json(a, 'struct<a:int,b:string>').a + random() > 2, from_json(a, 'struct<a:int,b:string>').b, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].b + + random() > 2 FROM testData
+-- !query schema
+struct<((CAST(from_json(a).a AS DOUBLE) + rand()) > CAST(2 AS DOUBLE)):boolean,from_json(a).b:string,from_json(b)[0].a:int,((CAST(from_json(b)[0].b AS DOUBLE) + (+ rand())) > CAST(2 AS DOUBLE)):boolean>
+-- !query output
+NULL	NULL	1	true
+false	2	1	true
+false	2	NULL	NULL
+true	3	3	true
+true	6	6	true
+
+
+-- !query
+SELECT if(from_json(a, 'struct<a:int,b:string>').a + random() > 5, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].a + 1) FROM testData
+-- !query schema
+struct<(IF(((CAST(from_json(a).a AS DOUBLE) + rand()) > CAST(5 AS DOUBLE)), from_json(b)[0].a, (from_json(b)[0].a + 1))):int>
+-- !query output
+2
+2
+4
+6
+NULL
+
+
+-- !query
+SELECT case when from_json(a, 'struct<a:int,b:string>').a > 5 then from_json(a, 'struct<a:int,b:string>').b + random() > 5 when from_json(a, 'struct<a:int,b:string>').a > 4 then from_json(a, 'struct<a:int,b:string>').b + 1 + random() > 2 else from_json(a, 'struct<a:int,b:string>').b + 2 + random() > 5 end FROM testData
+-- !query schema
+struct<CASE WHEN (from_json(a).a > 5) THEN ((CAST(from_json(a).b AS DOUBLE) + rand()) > CAST(5 AS DOUBLE)) WHEN (from_json(a).a > 4) THEN (((CAST(from_json(a).b AS DOUBLE) + CAST(1 AS DOUBLE)) + rand()) > CAST(2 AS DOUBLE)) ELSE (((CAST(from_json(a).b AS DOUBLE) + CAST(2 AS DOUBLE)) + rand()) > CAST(5 AS DOUBLE)) END:boolean>
+-- !query output
+NULL
+false
+false
+true
+true
+
+
+-- !query
+DROP VIEW IF EXISTS testData
+-- !query schema
+struct<>
+-- !query output
+

From 4e5d2e06950ab60d2442a43e0656d47aca4a87f7 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 16 Nov 2020 07:08:21 +0000
Subject: [PATCH 0481/1009] [SPARK-33394][SQL][TESTS] Throw
 `NoSuchNamespaceException` for not existing namespace in
 `InMemoryTableCatalog.listTables()`

### What changes were proposed in this pull request?
Throw `NoSuchNamespaceException` in `listTables()` of the custom test catalog `InMemoryTableCatalog` if the passed namespace doesn't exist.

### Why are the changes needed?
1. To align behavior of V2 `InMemoryTableCatalog` to V1 session catalog.
2. To distinguish two situations:
    1. A namespace **does exist** but does not contain any tables. In that case, `listTables()` returns empty result.
    2. A namespace **does not exist**. `listTables()` throws `NoSuchNamespaceException` in this case.

### Does this PR introduce _any_ user-facing change?
Yes. For example, `SHOW TABLES` returns empty result before the changes.

### How was this patch tested?
By running V1/V2 ShowTablesSuites.

Closes #30358 from MaxGekk/show-tables-in-not-existing-namespace.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/connector/InMemoryTableCatalog.scala | 14 +++++++++++++-
 .../sql/connector/catalog/TableCatalogSuite.scala  |  6 +++---
 .../execution/command/ShowTablesSuiteBase.scala    |  8 ++++++++
 .../sql/execution/command/v1/ShowTablesSuite.scala |  9 ---------
 .../sql/execution/command/v2/ShowTablesSuite.scala |  6 ------
 5 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTableCatalog.scala
index 6824efd9880a9..edb9f65480836 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTableCatalog.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTableCatalog.scala
@@ -181,9 +181,21 @@ class InMemoryTableCatalog extends BasicInMemoryTableCatalog with SupportsNamesp
 
   override def dropNamespace(namespace: Array[String]): Boolean = {
     listNamespaces(namespace).foreach(dropNamespace)
-    listTables(namespace).foreach(dropTable)
+    try {
+      listTables(namespace).foreach(dropTable)
+    } catch {
+      case _: NoSuchNamespaceException =>
+    }
     Option(namespaces.remove(namespace.toList)).isDefined
   }
+
+  override def listTables(namespace: Array[String]): Array[Identifier] = {
+    if (namespace.isEmpty || namespaceExists(namespace)) {
+      super.listTables(namespace)
+    } else {
+      throw new NoSuchNamespaceException(namespace)
+    }
+  }
 }
 
 object InMemoryTableCatalog {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/TableCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/TableCatalogSuite.scala
index a4c85ec64ecf6..dab20911bbdc7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/TableCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/TableCatalogSuite.scala
@@ -64,12 +64,12 @@ class TableCatalogSuite extends SparkFunSuite {
     val ident2 = Identifier.of(Array("ns"), "test_table_2")
     val ident3 = Identifier.of(Array("ns2"), "test_table_1")
 
-    assert(catalog.listTables(Array("ns")).isEmpty)
+    intercept[NoSuchNamespaceException](catalog.listTables(Array("ns")))
 
     catalog.createTable(ident1, schema, Array.empty, emptyProps)
 
     assert(catalog.listTables(Array("ns")).toSet == Set(ident1))
-    assert(catalog.listTables(Array("ns2")).isEmpty)
+    intercept[NoSuchNamespaceException](catalog.listTables(Array("ns2")))
 
     catalog.createTable(ident3, schema, Array.empty, emptyProps)
     catalog.createTable(ident2, schema, Array.empty, emptyProps)
@@ -841,7 +841,7 @@ class TableCatalogSuite extends SparkFunSuite {
     assert(catalog.dropNamespace(testNs))
 
     assert(!catalog.namespaceExists(testNs))
-    assert(catalog.listTables(testNs).isEmpty)
+    intercept[NoSuchNamespaceException](catalog.listTables(testNs))
   }
 
   test("alterNamespace: basic behavior") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala
index 49428fab79027..d7659e25d2c41 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala
@@ -21,6 +21,7 @@ import org.scalactic.source.Position
 import org.scalatest.Tag
 
 import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
@@ -57,6 +58,13 @@ trait ShowTablesSuiteBase extends QueryTest with SQLTestUtils {
     }
   }
 
+  test("show table in a not existing namespace") {
+    val msg = intercept[NoSuchNamespaceException] {
+      runShowTablesSql(s"SHOW TABLES IN $catalog.unknown", Seq())
+    }.getMessage
+    assert(msg.matches("(Database|Namespace) 'unknown' not found;"))
+  }
+
   test("show tables with a pattern") {
     withNamespace(s"$catalog.ns1", s"$catalog.ns2") {
       sql(s"CREATE NAMESPACE $catalog.ns1")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
index d2332818d9546..5bbc6c6285193 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.execution.command.v1
 
 import org.apache.spark.sql.{AnalysisException, Row}
-import org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.execution.command
 import org.apache.spark.sql.test.SharedSparkSession
@@ -51,14 +50,6 @@ trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase {
     }
   }
 
-  // `SHOW TABLES` returns empty result in V2 catalog instead of throwing the exception.
-  test("show table in a not existing namespace") {
-    val msg = intercept[NoSuchDatabaseException] {
-      runShowTablesSql(s"SHOW TABLES IN $catalog.unknown", Seq())
-    }.getMessage
-    assert(msg.contains("Database 'unknown' not found"))
-  }
-
   // `SHOW TABLES` from v2 catalog returns empty result.
   test("v1 SHOW TABLES list the temp views") {
     withSourceViews {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala
index c7f68863a1791..aff1729a000b6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala
@@ -44,12 +44,6 @@ class ShowTablesSuite extends command.ShowTablesSuiteBase with SharedSparkSessio
   override def sparkConf: SparkConf = super.sparkConf
     .set(s"spark.sql.catalog.$catalog", classOf[InMemoryTableCatalog].getName)
 
-  // The test fails with the exception `NoSuchDatabaseException` in V1 catalog.
-  // TODO(SPARK-33394): Throw `NoSuchDatabaseException` for not existing namespace
-  test("show table in a not existing namespace") {
-    runShowTablesSql(s"SHOW TABLES IN $catalog.unknown", Seq())
-  }
-
   // The test fails for V1 catalog with the error:
   // org.apache.spark.sql.AnalysisException:
   //   The namespace in session catalog must have exactly one name part: spark_catalog.n1.n2.db

From cdcbdaeb0d78e083d12e7c33bb4e5d0746635bcc Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Mon, 16 Nov 2020 07:18:13 +0000
Subject: [PATCH 0482/1009] [SPARK-33458][SQL] Hive partition pruning support
 Contains, StartsWith and EndsWith predicate

### What changes were proposed in this pull request?

This pr add support Hive partition pruning on `Contains`, `StartsWith` and `EndsWith` predicate.

### Why are the changes needed?

Improve query performance.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #30383 from wangyum/SPARK-33458.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/hive/client/HiveShim.scala       |  9 +++++++++
 .../client/HivePartitionFilteringSuite.scala   | 18 ++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index 4ab0599e4477b..bf67ae6bfe92e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -753,6 +753,15 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
           ExtractableLiteral(value), ExtractAttribute(SupportedAttribute(name))) =>
         Some(s"$value ${op.symbol} $name")
 
+      case Contains(ExtractAttribute(SupportedAttribute(name)), ExtractableLiteral(value)) =>
+        Some(s"$name like " + (("\".*" + value.drop(1)).dropRight(1) + ".*\""))
+
+      case StartsWith(ExtractAttribute(SupportedAttribute(name)), ExtractableLiteral(value)) =>
+        Some(s"$name like " + (value.dropRight(1) + ".*\""))
+
+      case EndsWith(ExtractAttribute(SupportedAttribute(name)), ExtractableLiteral(value)) =>
+        Some(s"$name like " + ("\".*" + value.drop(1)))
+
       case And(expr1, expr2) if useAdvanced =>
         val converted = convert(expr1) ++ convert(expr2)
         if (converted.isEmpty) {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
index 7e10d498d0413..daa785bf110c5 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
@@ -272,6 +272,24 @@ class HivePartitionFilteringSuite(version: String)
       day1 :: day2 :: Nil)
   }
 
+  test("getPartitionsByFilter: chunk contains bb") {
+    testMetastorePartitionFiltering(
+      attr("chunk").contains("bb"),
+      (20170101 to 20170103, 0 to 4, Seq("bb")) :: Nil)
+  }
+
+  test("getPartitionsByFilter: chunk startsWith b") {
+    testMetastorePartitionFiltering(
+      attr("chunk").startsWith("b"),
+      (20170101 to 20170103, 0 to 4, Seq("ba", "bb")) :: Nil)
+  }
+
+  test("getPartitionsByFilter: chunk endsWith b") {
+    testMetastorePartitionFiltering(
+      attr("chunk").endsWith("b"),
+      (20170101 to 20170103, 0 to 4, Seq("ab", "bb")) :: Nil)
+  }
+
   private def testMetastorePartitionFiltering(
       filterExpr: Expression,
       expectedDs: Seq[Int],

From 8615f354a41d0b5ea8e24670b332c8023f9a326b Mon Sep 17 00:00:00 2001
From: Prashant Sharma <prashsh1@in.ibm.com>
Date: Mon, 16 Nov 2020 00:02:18 -0800
Subject: [PATCH 0483/1009] [SPARK-30985][K8S] Support propagating
 SPARK_CONF_DIR files to driver and executor pods

### What changes were proposed in this pull request?
This is an improvement, we mount all the user specific configuration files(except the templates and spark properties files) from `SPARK_CONF_DIR` at the point of spark-submit, to both executor and driver pods. Currently, only `spark.properties` is mounted, only on driver.

### Why are the changes needed?

`SPARK_CONF_DIR` hosts several configuration files, for example,
1) `spark-defaults.conf` - containing all the spark properties.
2) `log4j.properties` - Logger configuration.
3) `core-site.xml` - Hadoop related configuration.
4) `fairscheduler.xml` - Spark's fair scheduling policy at the job level.
5) `metrics.properties` - Spark metrics.
6) Any user specific - library or framework specific configuration file.

At the moment, we can cannot propagate these files to the driver and executor configuration directory.

There is a design doc, with more details, and this patch is currently providing a reference implementation. Please take a look at the doc and comment, how we can improve. [google docs link to the doc](https://bit.ly/spark-30985)

### Further scope
Support user defined configMaps.

### Does this PR introduce any user-facing change?
Yes, previously the user configuration files(e.g. hdfs-site.xml, log4j.properties etc...) were not propagated by default, now after this patch it is propagated to driver and executor pods' `SPARK_CONF_DIR`.

### How was this patch tested?
Added tests.

Also manually tested, by deploying it to a minikube cluster and observing the additional configuration files were present, and taking effect. For example, changes to log4j.properties was properly applied to executors.

Closes #27735 from ScrapCodes/SPARK-30985/spark-conf-k8s-propagate.

Authored-by: Prashant Sharma <prashsh1@in.ibm.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../apache/spark/deploy/k8s/Constants.scala   |   3 +-
 .../features/BasicExecutorFeatureStep.scala   |  17 ++-
 .../submit/KubernetesClientApplication.scala  |  34 ++---
 .../k8s/submit/KubernetesClientUtils.scala    | 127 ++++++++++++++++++
 .../KubernetesClusterSchedulerBackend.scala   |  19 +++
 .../spark/deploy/k8s/Fabric8Aliases.scala     |   8 +-
 .../BasicExecutorFeatureStepSuite.scala       |  20 ++-
 .../spark/deploy/k8s/submit/ClientSuite.scala | 117 +++++++++++++---
 ...bernetesClusterSchedulerBackendSuite.scala |  13 ++
 .../src/main/dockerfiles/spark/entrypoint.sh  |   6 +
 10 files changed, 310 insertions(+), 54 deletions(-)
 create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtils.scala

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala
index 7d9e4949a8dbb..4014a964ed950 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala
@@ -66,7 +66,8 @@ private[spark] object Constants {
   val ENV_SPARK_USER = "SPARK_USER"
   val ENV_RESOURCE_PROFILE_ID = "SPARK_RESOURCE_PROFILE_ID"
   // Spark app configs for containers
-  val SPARK_CONF_VOLUME = "spark-conf-volume"
+  val SPARK_CONF_VOLUME_DRIVER = "spark-conf-volume-driver"
+  val SPARK_CONF_VOLUME_EXEC = "spark-conf-volume-exec"
   val SPARK_CONF_DIR_INTERNAL = "/opt/spark/conf"
   val SPARK_CONF_FILE_NAME = "spark.properties"
   val SPARK_CONF_PATH = s"$SPARK_CONF_DIR_INTERNAL/$SPARK_CONF_FILE_NAME"
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
index 8c75162da59fb..250dd8238d9ea 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
@@ -24,6 +24,7 @@ import org.apache.spark.{SecurityManager, SparkConf, SparkException}
 import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
+import org.apache.spark.deploy.k8s.submit.KubernetesClientUtils
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.resource.{ExecutorResourceRequest, ResourceProfile}
@@ -90,7 +91,10 @@ private[spark] class BasicExecutorFeatureStep(
 
   override def configurePod(pod: SparkPod): SparkPod = {
     val name = s"$executorPodNamePrefix-exec-${kubernetesConf.executorId}"
-
+    val configMapName = KubernetesClientUtils.configMapNameExecutor
+    val confFilesMap = KubernetesClientUtils
+      .buildSparkConfDirFilesMap(configMapName, kubernetesConf.sparkConf, Map.empty)
+    val keyToPaths = KubernetesClientUtils.buildKeyToPathObjects(confFilesMap)
     // hostname must be no longer than 63 characters, so take the last 63 characters of the pod
     // name as the hostname.  This preserves uniqueness since the end of name contains
     // executorId
@@ -193,6 +197,10 @@ private[spark] class BasicExecutorFeatureStep(
         .addToRequests("cpu", executorCpuQuantity)
         .addToLimits(executorResourceQuantities.asJava)
         .endResources()
+      .addNewVolumeMount()
+        .withName(SPARK_CONF_VOLUME_EXEC)
+        .withMountPath(SPARK_CONF_DIR_INTERNAL)
+        .endVolumeMount()
       .addNewEnv()
         .withName(ENV_SPARK_USER)
         .withValue(Utils.getCurrentUserName())
@@ -249,6 +257,13 @@ private[spark] class BasicExecutorFeatureStep(
         .withRestartPolicy("Never")
         .addToNodeSelector(kubernetesConf.nodeSelector.asJava)
         .addToImagePullSecrets(kubernetesConf.imagePullSecrets: _*)
+        .addNewVolume()
+          .withName(SPARK_CONF_VOLUME_EXEC)
+          .withNewConfigMap()
+            .withItems(keyToPaths.asJava)
+            .withName(configMapName)
+            .endConfigMap()
+          .endVolume()
         .endSpec()
       .build()
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
index 93caa70e085c7..d479d6688a8c0 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
@@ -16,10 +16,9 @@
  */
 package org.apache.spark.deploy.k8s.submit
 
-import java.io.StringWriter
-import java.util.{Collections, UUID}
-import java.util.Properties
+import java.util.UUID
 
+import scala.collection.JavaConverters._
 import scala.collection.mutable
 import scala.util.control.Breaks._
 import scala.util.control.NonFatal
@@ -105,8 +104,11 @@ private[spark] class Client(
 
   def run(): Unit = {
     val resolvedDriverSpec = builder.buildFromFeatures(conf, kubernetesClient)
-    val configMapName = s"${conf.resourceNamePrefix}-driver-conf-map"
-    val configMap = buildConfigMap(configMapName, resolvedDriverSpec.systemProperties)
+    val configMapName = KubernetesClientUtils.configMapNameDriver
+    val confFilesMap = KubernetesClientUtils.buildSparkConfDirFilesMap(configMapName,
+      conf.sparkConf, resolvedDriverSpec.systemProperties)
+    val configMap = KubernetesClientUtils.buildConfigMap(configMapName, confFilesMap)
+
     // The include of the ENV_VAR for "SPARK_CONF_DIR" is to allow for the
     // Spark command builder to pickup on the Java Options present in the ConfigMap
     val resolvedDriverContainer = new ContainerBuilder(resolvedDriverSpec.pod.container)
@@ -115,7 +117,7 @@ private[spark] class Client(
         .withValue(SPARK_CONF_DIR_INTERNAL)
         .endEnv()
       .addNewVolumeMount()
-        .withName(SPARK_CONF_VOLUME)
+        .withName(SPARK_CONF_VOLUME_DRIVER)
         .withMountPath(SPARK_CONF_DIR_INTERNAL)
         .endVolumeMount()
       .build()
@@ -123,8 +125,9 @@ private[spark] class Client(
       .editSpec()
         .addToContainers(resolvedDriverContainer)
         .addNewVolume()
-          .withName(SPARK_CONF_VOLUME)
+          .withName(SPARK_CONF_VOLUME_DRIVER)
           .withNewConfigMap()
+            .withItems(KubernetesClientUtils.buildKeyToPathObjects(confFilesMap).asJava)
             .withName(configMapName)
             .endConfigMap()
           .endVolume()
@@ -164,23 +167,6 @@ private[spark] class Client(
       }
     }
   }
-
-  // Build a Config Map that will house spark conf properties in a single file for spark-submit
-  private def buildConfigMap(configMapName: String, conf: Map[String, String]): ConfigMap = {
-    val properties = new Properties()
-    conf.foreach { case (k, v) =>
-      properties.setProperty(k, v)
-    }
-    val propertiesWriter = new StringWriter()
-    properties.store(propertiesWriter,
-      s"Java properties built from Kubernetes config map with name: $configMapName")
-    new ConfigMapBuilder()
-      .withNewMetadata()
-        .withName(configMapName)
-        .endMetadata()
-      .addToData(SPARK_CONF_FILE_NAME, propertiesWriter.toString)
-      .build()
-  }
 }
 
 /**
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtils.scala
new file mode 100644
index 0000000000000..32f630f77d666
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtils.scala
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.k8s.submit
+
+import java.io.{File, StringWriter}
+import java.util.Properties
+
+import scala.collection.JavaConverters._
+import scala.io.{Codec, Source}
+
+import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapBuilder, KeyToPath}
+
+import org.apache.spark.SparkConf
+import org.apache.spark.deploy.k8s.{Constants, KubernetesUtils}
+import org.apache.spark.deploy.k8s.Constants.ENV_SPARK_CONF_DIR
+import org.apache.spark.internal.Logging
+
+private[spark] object KubernetesClientUtils extends Logging {
+
+  // Config map name can be 63 chars at max.
+  def configMapName(prefix: String): String = s"${prefix.take(54)}-conf-map"
+
+  val configMapNameExecutor: String = configMapName(s"spark-exec-${KubernetesUtils.uniqueID()}")
+
+  val configMapNameDriver: String = configMapName(s"spark-drv-${KubernetesUtils.uniqueID()}")
+
+  private def buildStringFromPropertiesMap(configMapName: String,
+      propertiesMap: Map[String, String]): String = {
+    val properties = new Properties()
+    propertiesMap.foreach { case (k, v) =>
+      properties.setProperty(k, v)
+    }
+    val propertiesWriter = new StringWriter()
+    properties.store(propertiesWriter,
+      s"Java properties built from Kubernetes config map with name: $configMapName")
+    propertiesWriter.toString
+  }
+
+  /**
+   * Build, file -> 'file's content' map of all the selected files in SPARK_CONF_DIR.
+   */
+  def buildSparkConfDirFilesMap(configMapName: String,
+      sparkConf: SparkConf, resolvedPropertiesMap: Map[String, String]): Map[String, String] = {
+    val loadedConfFilesMap = KubernetesClientUtils.loadSparkConfDirFiles(sparkConf)
+    // Add resolved spark conf to the loaded configuration files map.
+    if (resolvedPropertiesMap.nonEmpty) {
+      val resolvedProperties: String = KubernetesClientUtils
+        .buildStringFromPropertiesMap(configMapName, resolvedPropertiesMap)
+      loadedConfFilesMap ++ Map(Constants.SPARK_CONF_FILE_NAME -> resolvedProperties)
+    } else {
+      loadedConfFilesMap
+    }
+  }
+
+  def buildKeyToPathObjects(confFilesMap: Map[String, String]): Seq[KeyToPath] = {
+    confFilesMap.map {
+      case (fileName: String, _: String) =>
+        val filePermissionMode = 420  // 420 is decimal for octal literal 0644.
+        new KeyToPath(fileName, filePermissionMode, fileName)
+    }.toList.sortBy(x => x.getKey) // List is sorted to make mocking based tests work
+  }
+
+  /**
+   * Build a Config Map that will hold the content for environment variable SPARK_CONF_DIR
+   * on remote pods.
+   */
+  def buildConfigMap(configMapName: String, confFileMap: Map[String, String],
+      withLabels: Map[String, String] = Map()): ConfigMap = {
+    new ConfigMapBuilder()
+      .withNewMetadata()
+        .withName(configMapName)
+        .withLabels(withLabels.asJava)
+        .endMetadata()
+      .addToData(confFileMap.asJava)
+      .build()
+  }
+
+  private def loadSparkConfDirFiles(conf: SparkConf): Map[String, String] = {
+    val confDir = Option(conf.getenv(ENV_SPARK_CONF_DIR)).orElse(
+      conf.getOption("spark.home").map(dir => s"$dir/conf"))
+    if (confDir.isDefined) {
+      val confFiles = listConfFiles(confDir.get)
+      logInfo(s"Spark configuration files loaded from $confDir : ${confFiles.mkString(",")}")
+      confFiles.map { file =>
+        val source = Source.fromFile(file)(Codec.UTF8)
+        val mapping = (file.getName -> source.mkString)
+        source.close()
+        mapping
+      }.toMap
+    } else {
+      Map.empty[String, String]
+    }
+  }
+
+  private def listConfFiles(confDir: String): Seq[File] = {
+    // We exclude all the template files and user provided spark conf or properties.
+    // As spark properties are resolved in a different step.
+    val fileFilter = (f: File) => {
+      f.isFile && !(f.getName.endsWith("template") ||
+        f.getName.matches("spark.*(conf|properties)"))
+    }
+    val confFiles: Seq[File] = {
+      val dir = new File(confDir)
+      if (dir.isDirectory) {
+        dir.listFiles.filter(x => fileFilter(x)).toSeq
+      } else {
+        Nil
+      }
+    }
+    confFiles
+  }
+}
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
index 7d1565d9a5846..30a7c65a57726 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
@@ -25,6 +25,7 @@ import io.fabric8.kubernetes.client.KubernetesClient
 import org.apache.spark.SparkContext
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
+import org.apache.spark.deploy.k8s.submit.KubernetesClientUtils
 import org.apache.spark.deploy.security.HadoopDelegationTokenManager
 import org.apache.spark.internal.config.SCHEDULER_MIN_REGISTERED_RESOURCES_RATIO
 import org.apache.spark.resource.ResourceProfile
@@ -65,6 +66,16 @@ private[spark] class KubernetesClusterSchedulerBackend(
     }
   }
 
+  private def setUpExecutorConfigMap(): Unit = {
+    val configMapName = KubernetesClientUtils.configMapNameExecutor
+    val confFilesMap = KubernetesClientUtils
+      .buildSparkConfDirFilesMap(configMapName, conf, Map.empty)
+    val labels =
+      Map(SPARK_APP_ID_LABEL -> applicationId(), SPARK_ROLE_LABEL -> SPARK_POD_EXECUTOR_ROLE)
+    val configMap = KubernetesClientUtils.buildConfigMap(configMapName, confFilesMap, labels)
+    kubernetesClient.configMaps().create(configMap)
+  }
+
   /**
    * Get an application ID associated with the job.
    * This returns the string value of spark.app.id if set, otherwise
@@ -84,6 +95,7 @@ private[spark] class KubernetesClusterSchedulerBackend(
     podAllocator.start(applicationId())
     watchEvents.start(applicationId())
     pollEvents.start(applicationId())
+    setUpExecutorConfigMap()
   }
 
   override def stop(): Unit = {
@@ -109,6 +121,13 @@ private[spark] class KubernetesClusterSchedulerBackend(
           .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)
           .delete()
       }
+      Utils.tryLogNonFatalError {
+        kubernetesClient
+          .configMaps()
+          .withLabel(SPARK_APP_ID_LABEL, applicationId())
+          .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)
+          .delete()
+      }
     }
 
     Utils.tryLogNonFatalError {
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/Fabric8Aliases.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/Fabric8Aliases.scala
index 527fc6b0d8f87..23055813a9786 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/Fabric8Aliases.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/Fabric8Aliases.scala
@@ -16,14 +16,18 @@
  */
 package org.apache.spark.deploy.k8s
 
-import io.fabric8.kubernetes.api.model.{DoneablePod, HasMetadata, Pod, PodList}
+import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapList, DoneableConfigMap, DoneablePod, HasMetadata, Pod, PodList}
 import io.fabric8.kubernetes.client.{Watch, Watcher}
-import io.fabric8.kubernetes.client.dsl.{FilterWatchListDeletable, MixedOperation, NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable, PodResource}
+import io.fabric8.kubernetes.client.dsl.{FilterWatchListDeletable, MixedOperation, NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable, PodResource, Resource}
 
 object Fabric8Aliases {
   type PODS = MixedOperation[Pod, PodList, DoneablePod, PodResource[Pod, DoneablePod]]
+  type CONFIG_MAPS = MixedOperation[
+    ConfigMap, ConfigMapList, DoneableConfigMap, Resource[ConfigMap, DoneableConfigMap]]
   type LABELED_PODS = FilterWatchListDeletable[
     Pod, PodList, java.lang.Boolean, Watch, Watcher[Pod]]
+  type LABELED_CONFIG_MAPS = FilterWatchListDeletable[
+    ConfigMap, ConfigMapList, java.lang.Boolean, Watch, Watcher[ConfigMap]]
   type SINGLE_POD = PodResource[Pod, DoneablePod]
   type RESOURCE_LIST = NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable[
     HasMetadata, Boolean]
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
index 92031c69d9bbd..44e8e5744f22f 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
@@ -27,7 +27,7 @@ import io.fabric8.kubernetes.api.model._
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkException, SparkFunSuite}
-import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, KubernetesTestConf, SparkPod}
+import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, KubernetesTestConf, SecretVolumeUtils, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.features.KubernetesFeaturesTestUtils.TestResourceInformation
@@ -160,17 +160,17 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
     }
     assert(executor.pod.getSpec.getImagePullSecrets.asScala === TEST_IMAGE_PULL_SECRET_OBJECTS)
 
-    // There is exactly 1 container with no volume mounts and default memory limits.
+    // There is exactly 1 container with 1 volume mount and default memory limits.
     // Default memory limit is 1024M + 384M (minimum overhead constant).
     assert(executor.container.getImage === EXECUTOR_IMAGE)
-    assert(executor.container.getVolumeMounts.isEmpty)
+    assert(executor.container.getVolumeMounts.size() == 1)
     assert(executor.container.getResources.getLimits.size() === 1)
     assert(amountAndFormat(executor.container.getResources
       .getLimits.get("memory")) === "1408Mi")
 
-    // The pod has no node selector, volumes.
+    // The pod has no node selector, and 1 volume.
     assert(executor.pod.getSpec.getNodeSelector.isEmpty)
-    assert(executor.pod.getSpec.getVolumes.isEmpty)
+    assert(executor.pod.getSpec.getVolumes.size() == 1)
 
     checkEnv(executor, baseConf, Map())
     checkOwnerReferences(executor.pod, DRIVER_POD_UID)
@@ -320,6 +320,16 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
     assert(amountAndFormat(executor.container.getResources.getLimits.get("nvidia.com/gpu")) === "2")
   }
 
+  test("Verify spark conf dir is mounted as configmap volume on executor pod's container.") {
+    val baseDriverPod = SparkPod.initialPod()
+    val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf),
+      defaultProfile)
+    val podConfigured = step.configurePod(baseDriverPod)
+    SecretVolumeUtils.containerHasVolume(podConfigured.container,
+      SPARK_CONF_VOLUME_EXEC, SPARK_CONF_DIR_INTERNAL)
+    SecretVolumeUtils.podHasVolume(podConfigured.pod, SPARK_CONF_VOLUME_EXEC)
+  }
+
   // There is always exactly one controller reference, and it points to the driver pod.
   private def checkOwnerReferences(executor: Pod, driverPodUid: String): Unit = {
     assert(executor.getMetadata.getOwnerReferences.size() === 1)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
index d9ec3feb526ee..1a14d524003c0 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
@@ -16,6 +16,12 @@
  */
 package org.apache.spark.deploy.k8s.submit
 
+import java.io.File
+import java.nio.charset.StandardCharsets
+import java.nio.file.Files
+
+import scala.collection.JavaConverters._
+
 import io.fabric8.kubernetes.api.model._
 import io.fabric8.kubernetes.client.{KubernetesClient, Watch}
 import io.fabric8.kubernetes.client.dsl.PodResource
@@ -24,10 +30,11 @@ import org.mockito.Mockito.{verify, when}
 import org.scalatest.BeforeAndAfter
 import org.scalatestplus.mockito.MockitoSugar._
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.Fabric8Aliases._
+import org.apache.spark.util.Utils
 
 class ClientSuite extends SparkFunSuite with BeforeAndAfter {
 
@@ -66,27 +73,36 @@ class ClientSuite extends SparkFunSuite with BeforeAndAfter {
       .withValue(SPARK_CONF_DIR_INTERNAL)
       .endEnv()
     .addNewVolumeMount()
-      .withName(SPARK_CONF_VOLUME)
+      .withName(SPARK_CONF_VOLUME_DRIVER)
       .withMountPath(SPARK_CONF_DIR_INTERNAL)
       .endVolumeMount()
     .build()
-  private val FULL_EXPECTED_POD = new PodBuilder(BUILT_DRIVER_POD)
-    .editSpec()
-      .addToContainers(FULL_EXPECTED_CONTAINER)
-      .addNewVolume()
-        .withName(SPARK_CONF_VOLUME)
-        .withNewConfigMap().withName(s"$KUBERNETES_RESOURCE_PREFIX-driver-conf-map").endConfigMap()
-        .endVolume()
-      .endSpec()
-    .build()
 
-  private val POD_WITH_OWNER_REFERENCE = new PodBuilder(FULL_EXPECTED_POD)
-    .editMetadata()
-      .withUid(DRIVER_POD_UID)
-      .endMetadata()
-    .withApiVersion(DRIVER_POD_API_VERSION)
-    .withKind(DRIVER_POD_KIND)
-    .build()
+  private val KEY_TO_PATH =
+    new KeyToPath(SPARK_CONF_FILE_NAME, 420, SPARK_CONF_FILE_NAME)
+
+  private def fullExpectedPod(keyToPaths: List[KeyToPath] = List(KEY_TO_PATH)) =
+    new PodBuilder(BUILT_DRIVER_POD)
+      .editSpec()
+        .addToContainers(FULL_EXPECTED_CONTAINER)
+        .addNewVolume()
+          .withName(SPARK_CONF_VOLUME_DRIVER)
+          .withNewConfigMap()
+            .withItems(keyToPaths.asJava)
+            .withName(KubernetesClientUtils.configMapNameDriver)
+            .endConfigMap()
+          .endVolume()
+        .endSpec()
+      .build()
+
+  private def podWithOwnerReference(keyToPaths: List[KeyToPath] = List(KEY_TO_PATH)) =
+    new PodBuilder(fullExpectedPod(keyToPaths))
+      .editMetadata()
+        .withUid(DRIVER_POD_UID)
+        .endMetadata()
+      .withApiVersion(DRIVER_POD_API_VERSION)
+      .withKind(DRIVER_POD_KIND)
+      .build()
 
   private val ADDITIONAL_RESOURCES_WITH_OWNER_REFERENCES = ADDITIONAL_RESOURCES.map { secret =>
     new SecretBuilder(secret)
@@ -134,7 +150,7 @@ class ClientSuite extends SparkFunSuite with BeforeAndAfter {
 
     createdPodArgumentCaptor = ArgumentCaptor.forClass(classOf[Pod])
     createdResourcesArgumentCaptor = ArgumentCaptor.forClass(classOf[HasMetadata])
-    when(podOperations.create(FULL_EXPECTED_POD)).thenReturn(POD_WITH_OWNER_REFERENCE)
+    when(podOperations.create(fullExpectedPod())).thenReturn(podWithOwnerReference())
     when(namedPods.watch(loggingPodStatusWatcher)).thenReturn(mock[Watch])
     when(loggingPodStatusWatcher.watchOrStop(kconf.namespace + ":" + POD_NAME)).thenReturn(true)
     doReturn(resourceList)
@@ -149,7 +165,7 @@ class ClientSuite extends SparkFunSuite with BeforeAndAfter {
       kubernetesClient,
       loggingPodStatusWatcher)
     submissionClient.run()
-    verify(podOperations).create(FULL_EXPECTED_POD)
+    verify(podOperations).create(fullExpectedPod())
   }
 
   test("The client should create Kubernetes resources") {
@@ -169,12 +185,71 @@ class ClientSuite extends SparkFunSuite with BeforeAndAfter {
     assert(configMaps.nonEmpty)
     val configMap = configMaps.head
     assert(configMap.getMetadata.getName ===
-      s"$KUBERNETES_RESOURCE_PREFIX-driver-conf-map")
+      KubernetesClientUtils.configMapNameDriver)
     assert(configMap.getData.containsKey(SPARK_CONF_FILE_NAME))
     assert(configMap.getData.get(SPARK_CONF_FILE_NAME).contains("conf1key=conf1value"))
     assert(configMap.getData.get(SPARK_CONF_FILE_NAME).contains("conf2key=conf2value"))
   }
 
+  test("All files from SPARK_CONF_DIR, except templates and spark config " +
+    "should be populated to pod's configMap.") {
+    def testSetup: (SparkConf, Seq[String]) = {
+      val tempDir = Utils.createTempDir()
+      val sparkConf = new SparkConf(loadDefaults = false).setSparkHome(tempDir.getAbsolutePath)
+
+      val tempConfDir = new File(s"${tempDir.getAbsolutePath}/conf")
+      tempConfDir.mkdir()
+      // File names - which should not get mounted on the resultant config map.
+      val filteredConfFileNames =
+        Set("spark-env.sh.template", "spark.properties", "spark-defaults.conf")
+      val confFileNames = for (i <- 1 to 5) yield s"testConf.$i" ++
+        List("spark-env.sh") ++ filteredConfFileNames
+
+      val testConfFiles = for (i <- confFileNames) yield {
+        val file = new File(s"${tempConfDir.getAbsolutePath}/$i")
+        Files.write(file.toPath, "conf1key=conf1value".getBytes(StandardCharsets.UTF_8))
+        file.getName
+      }
+      assert(tempConfDir.listFiles().length == confFileNames.length)
+      val expectedConfFiles: Seq[String] = testConfFiles.filterNot(filteredConfFileNames.contains)
+      (sparkConf, expectedConfFiles)
+    }
+
+    val (sparkConf: SparkConf, expectedConfFiles: Seq[String]) = testSetup
+
+    val expectedKeyToPaths = (expectedConfFiles.map(x => new KeyToPath(x, 420, x)).toList ++
+      List(KEY_TO_PATH)).sortBy(x => x.getKey)
+
+    when(podOperations.create(fullExpectedPod(expectedKeyToPaths)))
+      .thenReturn(podWithOwnerReference(expectedKeyToPaths))
+
+    kconf = KubernetesTestConf.createDriverConf(sparkConf = sparkConf,
+      resourceNamePrefix = Some(KUBERNETES_RESOURCE_PREFIX))
+
+    assert(kconf.sparkConf.getOption("spark.home").isDefined)
+    when(driverBuilder.buildFromFeatures(kconf, kubernetesClient)).thenReturn(BUILT_KUBERNETES_SPEC)
+
+    val submissionClient = new Client(
+      kconf,
+      driverBuilder,
+      kubernetesClient,
+      loggingPodStatusWatcher)
+    submissionClient.run()
+    val otherCreatedResources = createdResourcesArgumentCaptor.getAllValues
+
+    val configMaps = otherCreatedResources.toArray
+      .filter(_.isInstanceOf[ConfigMap]).map(_.asInstanceOf[ConfigMap])
+    assert(configMaps.nonEmpty)
+    val configMapName = KubernetesClientUtils.configMapNameDriver
+    val configMap: ConfigMap = configMaps.head
+    assert(configMap.getMetadata.getName == configMapName)
+    val configMapLoadedFiles = configMap.getData.keySet().asScala.toSet
+    assert(configMapLoadedFiles === expectedConfFiles.toSet ++ Set(SPARK_CONF_FILE_NAME))
+    for (f <- configMapLoadedFiles) {
+      assert(configMap.getData.get(f).contains("conf1key=conf1value"))
+    }
+  }
+
   test("Waiting for app completion should stall on the watcher") {
     val submissionClient = new Client(
       kconf,
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
index a7c6ffa406722..632c5647bbe20 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
@@ -66,6 +66,12 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
   @Mock
   private var labeledPods: LABELED_PODS = _
 
+  @Mock
+  private var configMapsOperations: CONFIG_MAPS = _
+
+  @Mock
+  private var labledConfigMaps: LABELED_CONFIG_MAPS = _
+
   @Mock
   private var taskScheduler: TaskSchedulerImpl = _
 
@@ -105,6 +111,7 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
         driverEndpoint.capture()))
       .thenReturn(driverEndpointRef)
     when(kubernetesClient.pods()).thenReturn(podOperations)
+    when(kubernetesClient.configMaps()).thenReturn(configMapsOperations)
     schedulerBackendUnderTest = new KubernetesClusterSchedulerBackend(
       taskScheduler,
       sc,
@@ -124,16 +131,22 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
     verify(lifecycleEventHandler).start(schedulerBackendUnderTest)
     verify(watchEvents).start(TEST_SPARK_APP_ID)
     verify(pollEvents).start(TEST_SPARK_APP_ID)
+    verify(configMapsOperations).create(any())
   }
 
   test("Stop all components") {
     when(podOperations.withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID)).thenReturn(labeledPods)
     when(labeledPods.withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)).thenReturn(labeledPods)
+    when(configMapsOperations.withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID))
+      .thenReturn(labledConfigMaps)
+    when(labledConfigMaps.withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE))
+      .thenReturn(labledConfigMaps)
     schedulerBackendUnderTest.stop()
     verify(eventQueue).stop()
     verify(watchEvents).stop()
     verify(pollEvents).stop()
     verify(labeledPods).delete()
+    verify(labledConfigMaps).delete()
     verify(kubernetesClient).close()
   }
 
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
index fd66a5db2c18d..c837e00d2e468 100755
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
@@ -61,6 +61,12 @@ if ! [ -z ${HADOOP_CONF_DIR+x} ]; then
   SPARK_CLASSPATH="$HADOOP_CONF_DIR:$SPARK_CLASSPATH";
 fi
 
+if ! [ -z ${SPARK_CONF_DIR+x} ]; then
+  SPARK_CLASSPATH="$SPARK_CONF_DIR:$SPARK_CLASSPATH";
+elif ! [ -z ${SPARK_HOME+x} ]; then
+  SPARK_CLASSPATH="$SPARK_HOME/conf:$SPARK_CLASSPATH";
+fi
+
 case "$1" in
   driver)
     shift 1

From 10b011f837756cb3cfdb17b7bb593f1636880d10 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 16 Nov 2020 17:53:31 +0900
Subject: [PATCH 0484/1009] [SPARK-33456][SQL][TEST][FOLLOWUP] Fix
 SUBEXPRESSION_ELIMINATION_ENABLED config name

### What changes were proposed in this pull request?

To fix wrong config name in `subexp-elimination.sql`.

### Why are the changes needed?

`CONFIG_DIM` should use config name's key.

### Does this PR introduce _any_ user-facing change?

No, dev only.

### How was this patch tested?

Unit test.

Closes #30384 from viirya/SPARK-33456-followup.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../test/resources/sql-tests/inputs/subexp-elimination.sql    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/test/resources/sql-tests/inputs/subexp-elimination.sql b/sql/core/src/test/resources/sql-tests/inputs/subexp-elimination.sql
index c769933f344fb..9a594e0928ddc 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subexp-elimination.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subexp-elimination.sql
@@ -8,8 +8,8 @@
 --CONFIG_DIM2 spark.sql.codegen.factoryMode=CODEGEN_ONLY
 --CONFIG_DIM2 spark.sql.codegen.factoryMode=NO_CODEGEN
 
---CONFIG_DIM3 SUBEXPRESSION_ELIMINATION_ENABLED=true
---CONFIG_DIM3 SUBEXPRESSION_ELIMINATION_ENABLED=false
+--CONFIG_DIM3 spark.sql.subexpressionElimination.enabled=true
+--CONFIG_DIM3 spark.sql.subexpressionElimination.enabled=false
 
 -- Test data.
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES

From 71a29b2eca0b5f3c7bd51259796ca007774d8fd6 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 16 Nov 2020 17:57:20 +0900
Subject: [PATCH 0485/1009] [MINOR][SQL][DOCS] Fix a reference to
 `spark.sql.sources.useV1SourceList`

### What changes were proposed in this pull request?
Replace `spark.sql.sources.write.useV1SourceList` by `spark.sql.sources.useV1SourceList` in the comment for `CatalogManager.v2SessionCatalog()`.

### Why are the changes needed?
To have correct comments.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running `./dev/scalastyle`.

Closes #30385 from MaxGekk/fix-comment-useV1SourceList.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../org/apache/spark/sql/connector/catalog/CatalogManager.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
index 8e8cd786b70c3..fc2ab99a3da8c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
@@ -78,7 +78,7 @@ class CatalogManager(
    * This catalog is a v2 catalog that delegates to the v1 session catalog. it is used when the
    * session catalog is responsible for an identifier, but the source requires the v2 catalog API.
    * This happens when the source implementation extends the v2 TableProvider API and is not listed
-   * in the fallback configuration, spark.sql.sources.write.useV1SourceList
+   * in the fallback configuration, spark.sql.sources.useV1SourceList
    */
   private[sql] def v2SessionCatalog: CatalogPlugin = {
     conf.getConf(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION).map { _ =>

From aa508fcc0328366c578f65a4d4b8a7c7358140ea Mon Sep 17 00:00:00 2001
From: "xuewei.linxuewei" <xuewei.linxuewei@alibaba-inc.com>
Date: Mon, 16 Nov 2020 11:57:50 +0000
Subject: [PATCH 0486/1009] [SPARK-33140][SQL][FOLLOW-UP] Revert code that not
 use passed-in SparkSession to get SQLConf

### What changes were proposed in this pull request?

Revert code that does not use passed-in SparkSession to get SQLConf in [SPARK-33140]. The change scope of [SPARK-33140] change passed-in SQLConf instance and place using SparkSession to get SQLConf to be unified to use SQLConf.get. And the code reverted in the patch, the passed-in SparkSession was not about to get SQLConf, but using its catalog, it's better to be consistent.

### Why are the changes needed?

Potential regression bug.

### Does this PR introduce any user-facing change?

No.

### How was this patch tested?

Existing UT.

Closes #30364 from leanken/leanken-SPARK-33140.

Authored-by: xuewei.linxuewei <xuewei.linxuewei@alibaba-inc.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/QueryExecution.scala       |  4 ++--
 .../execution/adaptive/AdaptiveSparkPlanExec.scala |  2 +-
 .../adaptive/CoalesceShufflePartitions.scala       |  4 ++--
 .../execution/datasources/DataSourceStrategy.scala |  6 +++---
 .../datasources/FallBackFileSourceV2.scala         |  4 ++--
 .../spark/sql/execution/datasources/rules.scala    |  8 ++++----
 .../dynamicpruning/PlanDynamicPruningFilters.scala |  7 ++++---
 .../org/apache/spark/sql/execution/subquery.scala  |  6 +++---
 .../sql/internal/BaseSessionStateBuilder.scala     |  8 ++++----
 .../connector/V2CommandsCaseSensitivitySuite.scala |  2 +-
 .../spark/sql/hive/HiveSessionStateBuilder.scala   | 14 +++++++-------
 .../org/apache/spark/sql/hive/HiveStrategies.scala |  8 ++++----
 .../hive/execution/PruneHiveTablePartitions.scala  |  8 +++-----
 .../execution/PruneHiveTablePartitionsSuite.scala  |  2 +-
 14 files changed, 41 insertions(+), 42 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index b998430c1602d..77f7a4e553f06 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -340,8 +340,8 @@ object QueryExecution {
     adaptiveExecutionRule.toSeq ++
     Seq(
       CoalesceBucketsInJoin,
-      PlanDynamicPruningFilters,
-      PlanSubqueries,
+      PlanDynamicPruningFilters(sparkSession),
+      PlanSubqueries(sparkSession),
       RemoveRedundantProjects,
       RemoveRedundantSorts,
       EnsureRequirements,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index 75cc073c4a62c..0865e42b440db 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -97,7 +97,7 @@ case class AdaptiveSparkPlanExec(
   // optimizations should be stage-independent.
   @transient private val queryStageOptimizerRules: Seq[Rule[SparkPlan]] = Seq(
     ReuseAdaptiveSubquery(context.subqueryCache),
-    CoalesceShufflePartitions,
+    CoalesceShufflePartitions(context.session),
     // The following two rules need to make use of 'CustomShuffleReaderExec.partitionSpecs'
     // added by `CoalesceShufflePartitions`. So they must be executed after it.
     OptimizeSkewedJoin,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
index ecf908a737442..89ff528d7a188 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.internal.SQLConf
  * A rule to coalesce the shuffle partitions based on the map output statistics, which can
  * avoid many small reduce tasks that hurt performance.
  */
-object CoalesceShufflePartitions extends Rule[SparkPlan] {
+case class CoalesceShufflePartitions(session: SparkSession) extends Rule[SparkPlan] {
   override def apply(plan: SparkPlan): SparkPlan = {
     if (!conf.coalesceShufflePartitionsEnabled) {
       return plan
@@ -63,7 +63,7 @@ object CoalesceShufflePartitions extends Rule[SparkPlan] {
         // We fall back to Spark default parallelism if the minimum number of coalesced partitions
         // is not set, so to avoid perf regressions compared to no coalescing.
         val minPartitionNum = conf.getConf(SQLConf.COALESCE_PARTITIONS_MIN_PARTITION_NUM)
-          .getOrElse(SparkSession.active.sparkContext.defaultParallelism)
+          .getOrElse(session.sparkContext.defaultParallelism)
         val partitionSpecs = ShufflePartitionsUtil.coalescePartitions(
           validMetrics.toArray,
           advisoryTargetSize = conf.getConf(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index b1600a639a9bf..822bdbdad8f00 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -243,16 +243,16 @@ object DataSourceAnalysis extends Rule[LogicalPlan] with CastSupport {
  * TODO: we should remove the special handling for hive tables after completely making hive as a
  * data source.
  */
-object FindDataSourceTable extends Rule[LogicalPlan] {
+class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan] {
   private def readDataSourceTable(
       table: CatalogTable, extraOptions: CaseInsensitiveStringMap): LogicalPlan = {
     val qualifiedTableName = QualifiedTableName(table.database, table.identifier.table)
-    val catalog = SparkSession.active.sessionState.catalog
+    val catalog = sparkSession.sessionState.catalog
     val dsOptions = DataSourceUtils.generateDatasourceOptions(extraOptions, table)
     catalog.getCachedPlan(qualifiedTableName, () => {
       val dataSource =
         DataSource(
-          SparkSession.active,
+          sparkSession,
           // In older version(prior to 2.1) of Spark, the table schema can be empty and should be
           // inferred at runtime. We should still support it.
           userSpecifiedSchema = if (table.schema.isEmpty) None else Some(table.schema),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala
index 0244175f1a1bd..28a63c26604ec 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, File
  * This is a temporary hack for making current data source V2 work. It should be
  * removed when Catalog support of file data source v2 is finished.
  */
-object FallBackFileSourceV2 extends Rule[LogicalPlan] {
+class FallBackFileSourceV2(sparkSession: SparkSession) extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     case i @
         InsertIntoStatement(d @ DataSourceV2Relation(table: FileTable, _, _, _, _), _, _, _, _) =>
@@ -42,7 +42,7 @@ object FallBackFileSourceV2 extends Rule[LogicalPlan] {
         table.schema,
         None,
         v1FileFormat,
-        d.options.asScala.toMap)(SparkSession.active)
+        d.options.asScala.toMap)(sparkSession)
       i.copy(table = LogicalRelation(relation))
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index 5c46a36cf91f8..e45514385e292 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -38,7 +38,7 @@ import org.apache.spark.sql.util.SchemaUtils
 /**
  * Replaces [[UnresolvedRelation]]s if the plan is for direct query on files.
  */
-object ResolveSQLOnFile extends Rule[LogicalPlan] {
+class ResolveSQLOnFile(sparkSession: SparkSession) extends Rule[LogicalPlan] {
   private def maybeSQLFile(u: UnresolvedRelation): Boolean = {
     conf.runSQLonFile && u.multipartIdentifier.size == 2
   }
@@ -47,7 +47,7 @@ object ResolveSQLOnFile extends Rule[LogicalPlan] {
     case u: UnresolvedRelation if maybeSQLFile(u) =>
       try {
         val dataSource = DataSource(
-          SparkSession.active,
+          sparkSession,
           paths = u.multipartIdentifier.last :: Nil,
           className = u.multipartIdentifier.head)
 
@@ -73,9 +73,9 @@ object ResolveSQLOnFile extends Rule[LogicalPlan] {
 /**
  * Preprocess [[CreateTable]], to do some normalization and checking.
  */
-object PreprocessTableCreation extends Rule[LogicalPlan] {
+case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[LogicalPlan] {
   // catalog is a def and not a val/lazy val as the latter would introduce a circular reference
-  private def catalog = SparkSession.active.sessionState.catalog
+  private def catalog = sparkSession.sessionState.catalog
 
   def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     // When we CREATE TABLE without specifying the table schema, we should fail the query if
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala
index e1e996a857521..6973f55e8dca0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala
@@ -34,7 +34,8 @@ import org.apache.spark.sql.internal.SQLConf
  * results of broadcast. For joins that are not planned as broadcast hash joins we keep
  * the fallback mechanism with subquery duplicate.
 */
-object PlanDynamicPruningFilters extends Rule[SparkPlan] with PredicateHelper {
+case class PlanDynamicPruningFilters(sparkSession: SparkSession)
+    extends Rule[SparkPlan] with PredicateHelper {
 
   /**
    * Identify the shape in which keys of a given plan are broadcasted.
@@ -53,7 +54,7 @@ object PlanDynamicPruningFilters extends Rule[SparkPlan] with PredicateHelper {
       case DynamicPruningSubquery(
           value, buildPlan, buildKeys, broadcastKeyIndex, onlyInBroadcast, exprId) =>
         val sparkPlan = QueryExecution.createSparkPlan(
-          SparkSession.active, SparkSession.active.sessionState.planner, buildPlan)
+          sparkSession, sparkSession.sessionState.planner, buildPlan)
         // Using `sparkPlan` is a little hacky as it is based on the assumption that this rule is
         // the first to be applied (apart from `InsertAdaptiveSparkPlan`).
         val canReuseExchange = SQLConf.get.exchangeReuseEnabled && buildKeys.nonEmpty &&
@@ -66,7 +67,7 @@ object PlanDynamicPruningFilters extends Rule[SparkPlan] with PredicateHelper {
           }.isDefined
 
         if (canReuseExchange) {
-          val executedPlan = QueryExecution.prepareExecutedPlan(SparkSession.active, sparkPlan)
+          val executedPlan = QueryExecution.prepareExecutedPlan(sparkSession, sparkPlan)
           val mode = broadcastMode(buildKeys, executedPlan.output)
           // plan a broadcast exchange of the build side of the join
           val exchange = BroadcastExchangeExec(mode, executedPlan)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
index 7cf9af67aaa36..5e222d2e48769 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
@@ -172,11 +172,11 @@ case class InSubqueryExec(
 /**
  * Plans subqueries that are present in the given [[SparkPlan]].
  */
-object PlanSubqueries extends Rule[SparkPlan] {
+case class PlanSubqueries(sparkSession: SparkSession) extends Rule[SparkPlan] {
   def apply(plan: SparkPlan): SparkPlan = {
     plan.transformAllExpressions {
       case subquery: expressions.ScalarSubquery =>
-        val executedPlan = QueryExecution.prepareExecutedPlan(SparkSession.active, subquery.plan)
+        val executedPlan = QueryExecution.prepareExecutedPlan(sparkSession, subquery.plan)
         ScalarSubquery(
           SubqueryExec(s"scalar-subquery#${subquery.exprId.id}", executedPlan),
           subquery.exprId)
@@ -190,7 +190,7 @@ object PlanSubqueries extends Rule[SparkPlan] {
             }
           )
         }
-        val executedPlan = QueryExecution.prepareExecutedPlan(SparkSession.active, query)
+        val executedPlan = QueryExecution.prepareExecutedPlan(sparkSession, query)
         InSubqueryExec(expr, SubqueryExec(s"subquery#${exprId.id}", executedPlan), exprId)
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index c5773400010fa..33c15ec76654d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -177,9 +177,9 @@ abstract class BaseSessionStateBuilder(
    */
   protected def analyzer: Analyzer = new Analyzer(catalogManager, conf) {
     override val extendedResolutionRules: Seq[Rule[LogicalPlan]] =
-      FindDataSourceTable +:
-        ResolveSQLOnFile +:
-        FallBackFileSourceV2 +:
+      new FindDataSourceTable(session) +:
+        new ResolveSQLOnFile(session) +:
+        new FallBackFileSourceV2(session) +:
         ResolveEncodersInScalaAgg +:
         new ResolveSessionCatalog(
           catalogManager, catalog.isTempView, catalog.isTempFunction) +:
@@ -187,7 +187,7 @@ abstract class BaseSessionStateBuilder(
 
     override val postHocResolutionRules: Seq[Rule[LogicalPlan]] =
       DetectAmbiguousSelfJoin +:
-        PreprocessTableCreation +:
+        PreprocessTableCreation(session) +:
         PreprocessTableInsertion +:
         DataSourceAnalysis +:
         customPostHocResolutionRules
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala
index e5f46eb9b1098..dd95ceb59bdc4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala
@@ -34,7 +34,7 @@ class V2CommandsCaseSensitivitySuite extends SharedSparkSession with AnalysisTes
   import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
   override protected def extendedAnalysisRules: Seq[Rule[LogicalPlan]] = {
-    Seq(PreprocessTableCreation)
+    Seq(PreprocessTableCreation(spark))
   }
 
   test("CreateTableAsSelect: using top level field for partitioning") {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index 345f0288de4b1..f79aaa464de81 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -75,10 +75,10 @@ class HiveSessionStateBuilder(
    */
   override protected def analyzer: Analyzer = new Analyzer(catalogManager, conf) {
     override val extendedResolutionRules: Seq[Rule[LogicalPlan]] =
-      ResolveHiveSerdeTable +:
-        FindDataSourceTable +:
-        ResolveSQLOnFile +:
-        FallBackFileSourceV2 +:
+      new ResolveHiveSerdeTable(session) +:
+        new FindDataSourceTable(session) +:
+        new ResolveSQLOnFile(session) +:
+        new FallBackFileSourceV2(session) +:
         ResolveEncodersInScalaAgg +:
         new ResolveSessionCatalog(
           catalogManager, catalog.isTempView, catalog.isTempFunction) +:
@@ -86,9 +86,9 @@ class HiveSessionStateBuilder(
 
     override val postHocResolutionRules: Seq[Rule[LogicalPlan]] =
       DetectAmbiguousSelfJoin +:
-        DetermineTableStats +:
+        new DetermineTableStats(session) +:
         RelationConversions(catalog) +:
-        PreprocessTableCreation +:
+        PreprocessTableCreation(session) +:
         PreprocessTableInsertion +:
         DataSourceAnalysis +:
         HiveAnalysis +:
@@ -103,7 +103,7 @@ class HiveSessionStateBuilder(
   }
 
   override def customEarlyScanPushDownRules: Seq[Rule[LogicalPlan]] =
-    Seq(PruneHiveTablePartitions)
+    Seq(new PruneHiveTablePartitions(session))
 
   /**
    * Planner that takes into account Hive-specific strategies.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index e9f0461e6d1a8..3d8bba8b1b425 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -41,7 +41,7 @@ import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
  * Determine the database, serde/format and schema of the Hive serde table, according to the storage
  * properties.
  */
-object ResolveHiveSerdeTable extends Rule[LogicalPlan] {
+class ResolveHiveSerdeTable(session: SparkSession) extends Rule[LogicalPlan] {
   private def determineHiveSerde(table: CatalogTable): CatalogTable = {
     if (table.storage.serde.nonEmpty) {
       table
@@ -90,7 +90,7 @@ object ResolveHiveSerdeTable extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     case c @ CreateTable(t, _, query) if DDLUtils.isHiveTable(t) =>
       // Finds the database name if the name does not exist.
-      val dbName = t.identifier.database.getOrElse(SparkSession.active.catalog.currentDatabase)
+      val dbName = t.identifier.database.getOrElse(session.catalog.currentDatabase)
       val table = t.copy(identifier = t.identifier.copy(database = Some(dbName)))
 
       // Determines the serde/format of Hive tables
@@ -113,7 +113,7 @@ object ResolveHiveSerdeTable extends Rule[LogicalPlan] {
   }
 }
 
-object DetermineTableStats extends Rule[LogicalPlan] {
+class DetermineTableStats(session: SparkSession) extends Rule[LogicalPlan] {
   private def hiveTableWithStats(relation: HiveTableRelation): HiveTableRelation = {
     val table = relation.tableMeta
     val partitionCols = relation.partitionCols
@@ -121,7 +121,7 @@ object DetermineTableStats extends Rule[LogicalPlan] {
     // Which is expensive to get table size. Please see how we implemented it in the AnalyzeTable.
     val sizeInBytes = if (conf.fallBackToHdfsForStatsEnabled && partitionCols.isEmpty) {
       try {
-        val hadoopConf = SparkSession.active.sessionState.newHadoopConf()
+        val hadoopConf = session.sessionState.newHadoopConf()
         val tablePath = new Path(table.location)
         val fs: FileSystem = tablePath.getFileSystem(hadoopConf)
         fs.getContentSummary(tablePath).getLength
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala
index 50ced7870d9ed..cd07199e48ed7 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala
@@ -39,9 +39,7 @@ import org.apache.spark.sql.execution.datasources.DataSourceStrategy
  *
  * TODO: merge this with PruneFileSourcePartitions after we completely make hive as a data source.
  */
-private[sql] class PruneHiveTablePartitions
-
-private[sql] object PruneHiveTablePartitions
+private[sql] class PruneHiveTablePartitions(session: SparkSession)
   extends Rule[LogicalPlan] with CastSupport with PredicateHelper {
 
   /**
@@ -64,11 +62,11 @@ private[sql] object PruneHiveTablePartitions
       relation: HiveTableRelation,
       partitionFilters: ExpressionSet): Seq[CatalogTablePartition] = {
     if (conf.metastorePartitionPruning) {
-      SparkSession.active.sessionState.catalog.listPartitionsByFilter(
+      session.sessionState.catalog.listPartitionsByFilter(
         relation.tableMeta.identifier, partitionFilters.toSeq)
     } else {
       ExternalCatalogUtils.prunePartitionsByFilter(relation.tableMeta,
-        SparkSession.active.sessionState.catalog.listPartitions(relation.tableMeta.identifier),
+        session.sessionState.catalog.listPartitions(relation.tableMeta.identifier),
         partitionFilters.toSeq, conf.sessionLocalTimeZone)
     }
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitionsSuite.scala
index 6b35928067b50..018df35403be5 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitionsSuite.scala
@@ -29,7 +29,7 @@ class PruneHiveTablePartitionsSuite extends PrunePartitionSuiteBase {
   object Optimize extends RuleExecutor[LogicalPlan] {
     val batches =
       Batch("PruneHiveTablePartitions", Once,
-        EliminateSubqueryAliases, PruneHiveTablePartitions) :: Nil
+        EliminateSubqueryAliases, new PruneHiveTablePartitions(spark)) :: Nil
   }
 
   test("SPARK-15616: statistics pruned after going through PruneHiveTablePartitions") {

From dfa6fb46f4238792bff6a0201da201be1b42620e Mon Sep 17 00:00:00 2001
From: luluorta <luluorta@gmail.com>
Date: Mon, 16 Nov 2020 15:27:18 +0000
Subject: [PATCH 0487/1009] [SPARK-33389][SQL] Make internal classes of
 SparkSession always using active SQLConf

### What changes were proposed in this pull request?

This PR makes internal classes of SparkSession always using active SQLConf. We should remove all `conf: SQLConf`s from ctor-parameters of this classes (`Analyzer`, `SparkPlanner`, `SessionCatalog`, `CatalogManager` `SparkSqlParser` and etc.) and use  `SQLConf.get` instead.

### Why are the changes needed?

Code refine.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing test

Closes #30299 from luluorta/SPARK-33389.

Authored-by: luluorta <luluorta@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/SQLConfHelper.scala    |  32 +++++
 .../sql/catalyst/analysis/Analyzer.scala      |  21 ++--
 .../catalyst/analysis/timeZoneAnalysis.scala  |   7 +-
 .../spark/sql/catalyst/analysis/view.scala    |   3 -
 .../sql/catalyst/catalog/SessionCatalog.scala |  23 ++--
 .../sql/catalyst/catalog/interface.scala      |   5 +-
 .../sql/catalyst/optimizer/Optimizer.scala    |   3 +-
 .../optimizer/PropagateEmptyRelation.scala    |   3 -
 .../optimizer/StarSchemaDetection.scala       |   6 +-
 .../sql/catalyst/parser/AstBuilder.scala      |   6 +-
 .../sql/catalyst/parser/ParseDriver.scala     |   7 +-
 .../spark/sql/catalyst/plans/QueryPlan.scala  |  10 +-
 .../spark/sql/catalyst/rules/Rule.scala       |   6 +-
 .../connector/catalog/CatalogManager.scala    |   4 +-
 .../AnalysisExternalCatalogSuite.scala        |   8 +-
 .../sql/catalyst/analysis/AnalysisSuite.scala |  38 +++---
 .../sql/catalyst/analysis/AnalysisTest.scala  |   4 +-
 .../analysis/DataSourceV2AnalysisSuite.scala  |   4 +-
 .../analysis/DecimalPrecisionSuite.scala      |   5 +-
 .../analysis/LookupFunctionsSuite.scala       |  11 +-
 .../analysis/TableLookupCacheSuite.scala      |   6 +-
 .../catalog/SessionCatalogSuite.scala         |  38 +++---
 .../BooleanSimplificationSuite.scala          |  15 +--
 ...EliminateSortsBeforeRepartitionSuite.scala |   4 +-
 ...mizerStructuralIntegrityCheckerSuite.scala |   4 +-
 .../RewriteDistinctAggregatesSuite.scala      |   8 --
 .../spark/sql/catalyst/plans/PlanTest.scala   |   6 +-
 .../catalog/CatalogManagerSuite.scala         | 119 +++++++++---------
 .../spark/sql/execution/SparkPlanner.scala    |  11 +-
 .../sql/execution/command/CommandCheck.scala  |   4 +-
 .../datasources/DataSourceStrategy.scala      |  11 +-
 .../datasources/v2/V2SessionCatalog.scala     |   7 +-
 .../streaming/IncrementalExecution.scala      |   8 +-
 .../internal/BaseSessionStateBuilder.scala    |  11 +-
 .../sql/internal/VariableSubstitution.scala   |   5 +-
 .../command/PlanResolutionSuite.scala         |   2 +-
 .../v2/V2SessionCatalogSuite.scala            |   4 +-
 .../spark/sql/hive/HiveSessionCatalog.scala   |   3 -
 .../sql/hive/HiveSessionStateBuilder.scala    |   7 +-
 .../apache/spark/sql/hive/TableReader.scala   |   4 +-
 40 files changed, 226 insertions(+), 257 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SQLConfHelper.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SQLConfHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SQLConfHelper.scala
new file mode 100644
index 0000000000000..cee35cdb8d840
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SQLConfHelper.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst
+
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * Trait for getting the active SQLConf.
+ */
+trait SQLConfHelper {
+
+  /**
+   * The active config object within the current scope.
+   * See [[SQLConf.get]] for more information.
+   */
+  def conf: SQLConf = SQLConf.get
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 690d66bec890d..14b50f481f387 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -58,15 +58,14 @@ import org.apache.spark.util.Utils
  */
 object SimpleAnalyzer extends Analyzer(
   new CatalogManager(
-    new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true),
     FakeV2SessionCatalog,
     new SessionCatalog(
       new InMemoryCatalog,
-      EmptyFunctionRegistry,
-      new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true)) {
+      EmptyFunctionRegistry) {
       override def createDatabase(dbDefinition: CatalogDatabase, ignoreIfExists: Boolean): Unit = {}
-    }),
-  new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true))
+    })) {
+  override def resolver: Resolver = caseSensitiveResolution
+}
 
 object FakeV2SessionCatalog extends TableCatalog {
   private def fail() = throw new UnsupportedOperationException
@@ -130,10 +129,8 @@ object AnalysisContext {
  * Provides a logical query plan analyzer, which translates [[UnresolvedAttribute]]s and
  * [[UnresolvedRelation]]s into fully typed objects using information in a [[SessionCatalog]].
  */
-class Analyzer(
-    override val catalogManager: CatalogManager,
-    conf: SQLConf)
-  extends RuleExecutor[LogicalPlan] with CheckAnalysis with LookupCatalog {
+class Analyzer(override val catalogManager: CatalogManager)
+  extends RuleExecutor[LogicalPlan] with CheckAnalysis with LookupCatalog with SQLConfHelper {
 
   private val v1SessionCatalog: SessionCatalog = catalogManager.v1SessionCatalog
 
@@ -144,10 +141,8 @@ class Analyzer(
   override def isView(nameParts: Seq[String]): Boolean = v1SessionCatalog.isView(nameParts)
 
   // Only for tests.
-  def this(catalog: SessionCatalog, conf: SQLConf) = {
-    this(
-      new CatalogManager(conf, FakeV2SessionCatalog, catalog),
-      conf)
+  def this(catalog: SessionCatalog) = {
+    this(new CatalogManager(FakeV2SessionCatalog, catalog))
   }
 
   def executeAndCheck(plan: LogicalPlan, tracker: QueryPlanningTracker): LogicalPlan = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/timeZoneAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/timeZoneAnalysis.scala
index d8062744a4264..9234b58eb9f6e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/timeZoneAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/timeZoneAnalysis.scala
@@ -16,10 +16,10 @@
  */
 package org.apache.spark.sql.catalyst.analysis
 
+import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, ListQuery, TimeZoneAwareExpression}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.DataType
 
 /**
@@ -47,10 +47,7 @@ object ResolveTimeZone extends Rule[LogicalPlan] {
  * Mix-in trait for constructing valid [[Cast]] expressions.
  */
 trait CastSupport {
-  /**
-   * Configuration used to create a valid cast expression.
-   */
-  def conf: SQLConf
+  self: SQLConfHelper =>
 
   /**
    * Create a Cast expression with the session local time zone.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala
index 65601640fa044..06de023098a1c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.catalyst.expressions.Alias
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, View}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.internal.SQLConf
 
 /**
  * This file defines view types and analysis rules related to views.
@@ -54,8 +53,6 @@ import org.apache.spark.sql.internal.SQLConf
  * completely resolved during the batch of Resolution.
  */
 object EliminateView extends Rule[LogicalPlan] with CastSupport {
-  override def conf: SQLConf = SQLConf.get
-
   override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
     // The child has the different output attributes with the View operator. Adds a Project over
     // the child of the view.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index c00d51dc3df1f..17ab6664df75c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -61,10 +61,11 @@ class SessionCatalog(
     externalCatalogBuilder: () => ExternalCatalog,
     globalTempViewManagerBuilder: () => GlobalTempViewManager,
     functionRegistry: FunctionRegistry,
-    conf: SQLConf,
     hadoopConf: Configuration,
     parser: ParserInterface,
-    functionResourceLoader: FunctionResourceLoader) extends Logging {
+    functionResourceLoader: FunctionResourceLoader,
+    cacheSize: Int = SQLConf.get.tableRelationCacheSize,
+    cacheTTL: Long = SQLConf.get.metadataCacheTTL) extends SQLConfHelper with Logging {
   import SessionCatalog._
   import CatalogTypes.TablePartitionSpec
 
@@ -77,18 +78,21 @@ class SessionCatalog(
       () => externalCatalog,
       () => new GlobalTempViewManager(conf.getConf(GLOBAL_TEMP_DATABASE)),
       functionRegistry,
-      conf,
       new Configuration(),
       new CatalystSqlParser(),
-      DummyFunctionResourceLoader)
+      DummyFunctionResourceLoader,
+      conf.tableRelationCacheSize,
+      conf.metadataCacheTTL)
+  }
+
+  // For testing only.
+  def this(externalCatalog: ExternalCatalog, functionRegistry: FunctionRegistry) = {
+    this(externalCatalog, functionRegistry, SQLConf.get)
   }
 
   // For testing only.
   def this(externalCatalog: ExternalCatalog) = {
-    this(
-      externalCatalog,
-      new SimpleFunctionRegistry,
-      new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true))
+    this(externalCatalog, new SimpleFunctionRegistry)
   }
 
   lazy val externalCatalog = externalCatalogBuilder()
@@ -136,9 +140,6 @@ class SessionCatalog(
   }
 
   private val tableRelationCache: Cache[QualifiedTableName, LogicalPlan] = {
-    val cacheSize = conf.tableRelationCacheSize
-    val cacheTTL = conf.metadataCacheTTL
-
     var builder = CacheBuilder.newBuilder()
       .maximumSize(cacheSize)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 9c93691ca3b41..ee7216e93ebb5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -28,7 +28,7 @@ import org.apache.commons.lang3.StringUtils
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow, TableIdentifier}
+import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow, SQLConfHelper, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, Cast, ExprId, Literal}
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -177,8 +177,7 @@ case class CatalogTablePartition(
 case class BucketSpec(
     numBuckets: Int,
     bucketColumnNames: Seq[String],
-    sortColumnNames: Seq[String]) {
-  def conf: SQLConf = SQLConf.get
+    sortColumnNames: Seq[String]) extends SQLConfHelper {
 
   if (numBuckets <= 0 || numBuckets > conf.bucketingMaxBuckets) {
     throw new AnalysisException(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index e492d01650097..86c46e072c887 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -377,9 +377,8 @@ object SimpleTestOptimizer extends SimpleTestOptimizer
 
 class SimpleTestOptimizer extends Optimizer(
   new CatalogManager(
-    new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true),
     FakeV2SessionCatalog,
-    new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, new SQLConf())))
+    new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry)))
 
 /**
  * Remove redundant aliases from a query plan. A redundant alias is an alias that does not change
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
index 2627202c09c45..15d4561b47a23 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
@@ -22,7 +22,6 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
-import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Collapse plans consisting empty local relations generated by [[PruneFilters]].
@@ -47,8 +46,6 @@ object PropagateEmptyRelation extends Rule[LogicalPlan] with PredicateHelper wit
   private def nullValueProjectList(plan: LogicalPlan): Seq[NamedExpression] =
     plan.output.map{ a => Alias(cast(Literal(null), a.dataType), a.name)(a.exprId) }
 
-  override def conf: SQLConf = SQLConf.get
-
   def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
     case p: Union if p.children.exists(isEmptyLocalRelation) =>
       val newChildren = p.children.filterNot(isEmptyLocalRelation)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/StarSchemaDetection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/StarSchemaDetection.scala
index 2aa762e2595ad..b65fc7f7e2bde 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/StarSchemaDetection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/StarSchemaDetection.scala
@@ -19,18 +19,16 @@ package org.apache.spark.sql.catalyst.optimizer
 
 import scala.annotation.tailrec
 
+import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Encapsulates star-schema detection logic.
  */
-object StarSchemaDetection extends PredicateHelper {
-
-  private def conf = SQLConf.get
+object StarSchemaDetection extends PredicateHelper with SQLConfHelper {
 
   /**
    * Star schema consists of one or more fact tables referencing a number of dimension
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index a5b8c118d6c54..c3855fe088db6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -28,7 +28,7 @@ import org.antlr.v4.runtime.tree.{ParseTree, RuleNode, TerminalNode}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.{FunctionIdentifier, SQLConfHelper, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, FunctionResource, FunctionResourceType}
 import org.apache.spark.sql.catalyst.expressions._
@@ -51,11 +51,9 @@ import org.apache.spark.util.random.RandomSampler
  * The AstBuilder converts an ANTLR4 ParseTree into a catalyst Expression, LogicalPlan or
  * TableIdentifier.
  */
-class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
+class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logging {
   import ParserUtils._
 
-  protected def conf: SQLConf = SQLConf.get
-
   protected def typedVisit[T](ctx: ParseTree): T = {
     ctx.accept(this).asInstanceOf[T]
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index 73a58f79ff132..ac3fbbf6b0512 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -23,19 +23,16 @@ import org.antlr.v4.runtime.tree.TerminalNodeImpl
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.{FunctionIdentifier, SQLConfHelper, TableIdentifier}
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.trees.Origin
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, StructType}
 
 /**
  * Base SQL parsing infrastructure.
  */
-abstract class AbstractSqlParser extends ParserInterface with Logging {
-
-  protected def conf: SQLConf = SQLConf.get
+abstract class AbstractSqlParser extends ParserInterface with SQLConfHelper with Logging {
 
   /** Creates/Resolves DataType for a given SQL string. */
   override def parseDataType(sqlText: String): DataType = parse(sqlText) { parser =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index b1884eac27f73..864ca4f57483d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.plans
 import scala.collection.mutable
 
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, TreeNode, TreeNodeTag}
 import org.apache.spark.sql.internal.SQLConf
@@ -35,15 +36,10 @@ import org.apache.spark.sql.types.{DataType, StructType}
  * The tree traverse APIs like `transform`, `foreach`, `collect`, etc. that are
  * inherited from `TreeNode`, do not traverse into query plans inside subqueries.
  */
-abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanType] {
+abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
+  extends TreeNode[PlanType] with SQLConfHelper {
   self: PlanType =>
 
-  /**
-   * The active config object within the current scope.
-   * See [[SQLConf.get]] for more information.
-   */
-  def conf: SQLConf = SQLConf.get
-
   def output: Seq[Attribute]
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala
index a774217ecc832..4ef71bbc7c098 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/Rule.scala
@@ -18,10 +18,10 @@
 package org.apache.spark.sql.catalyst.rules
 
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.trees.TreeNode
-import org.apache.spark.sql.internal.SQLConf
 
-abstract class Rule[TreeType <: TreeNode[_]] extends Logging {
+abstract class Rule[TreeType <: TreeNode[_]] extends SQLConfHelper with Logging {
 
   /** Name for this rule, automatically inferred based on class name. */
   val ruleName: String = {
@@ -30,6 +30,4 @@ abstract class Rule[TreeType <: TreeNode[_]] extends Logging {
   }
 
   def apply(plan: TreeType): TreeType
-
-  def conf: SQLConf = SQLConf.get
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
index fc2ab99a3da8c..0779bf53fe446 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.connector.catalog
 import scala.collection.mutable
 
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException
 import org.apache.spark.sql.catalyst.catalog.SessionCatalog
 import org.apache.spark.sql.internal.SQLConf
@@ -37,9 +38,8 @@ import org.apache.spark.sql.internal.SQLConf
 //       need to track current database at all.
 private[sql]
 class CatalogManager(
-    conf: SQLConf,
     defaultSessionCatalog: CatalogPlugin,
-    val v1SessionCatalog: SessionCatalog) extends Logging {
+    val v1SessionCatalog: SessionCatalog) extends SQLConfHelper with Logging {
   import CatalogManager.SESSION_CATALOG_NAME
   import CatalogV2Util._
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExternalCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExternalCatalogSuite.scala
index 3dd38091051d8..df99cd851cc3e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExternalCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExternalCatalogSuite.scala
@@ -27,13 +27,11 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogStorageFormat, CatalogTable, CatalogTableType, ExternalCatalog, InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project}
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 class AnalysisExternalCatalogSuite extends AnalysisTest with Matchers {
   private def getAnalyzer(externCatalog: ExternalCatalog, databasePath: File): Analyzer = {
-    val conf = new SQLConf()
-    val catalog = new SessionCatalog(externCatalog, FunctionRegistry.builtin, conf)
+    val catalog = new SessionCatalog(externCatalog, FunctionRegistry.builtin)
     catalog.createDatabase(
       CatalogDatabase("default", "", databasePath.toURI, Map.empty),
       ignoreIfExists = false)
@@ -44,7 +42,7 @@ class AnalysisExternalCatalogSuite extends AnalysisTest with Matchers {
         CatalogStorageFormat.empty,
         StructType(Seq(StructField("a", IntegerType, nullable = true)))),
       ignoreIfExists = false)
-    new Analyzer(catalog, conf)
+    new Analyzer(catalog)
   }
 
   test("query builtin functions don't call the external catalog") {
@@ -66,7 +64,7 @@ class AnalysisExternalCatalogSuite extends AnalysisTest with Matchers {
     withTempDir { tempDir =>
       val inMemoryCatalog = new InMemoryCatalog
       val externCatalog = spy(inMemoryCatalog)
-      val catalog = new SessionCatalog(externCatalog, FunctionRegistry.builtin, conf)
+      val catalog = new SessionCatalog(externCatalog, FunctionRegistry.builtin)
       catalog.createDatabase(
         CatalogDatabase("default", "", new URI(tempDir.toString), Map.empty),
         ignoreIfExists = false)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 37dcee1e59ee8..f0a24d4a56048 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import java.util.{Locale, TimeZone}
+import java.util.TimeZone
 
 import scala.reflect.ClassTag
 import scala.reflect.runtime.universe.TypeTag
@@ -771,22 +771,23 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     // RuleExecutor only throw exception or log warning when the rule is supposed to run
     // more than once.
     val maxIterations = 2
-    val conf = new SQLConf().copy(SQLConf.ANALYZER_MAX_ITERATIONS -> maxIterations)
-    val testAnalyzer = new Analyzer(
-      new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin, conf), conf)
+    withSQLConf(SQLConf.ANALYZER_MAX_ITERATIONS.key -> maxIterations.toString) {
+      val testAnalyzer = new Analyzer(
+        new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin))
 
-    val plan = testRelation2.select(
-      $"a" / Literal(2) as "div1",
-      $"a" / $"b" as "div2",
-      $"a" / $"c" as "div3",
-      $"a" / $"d" as "div4",
-      $"e" / $"e" as "div5")
+      val plan = testRelation2.select(
+        $"a" / Literal(2) as "div1",
+        $"a" / $"b" as "div2",
+        $"a" / $"c" as "div3",
+        $"a" / $"d" as "div4",
+        $"e" / $"e" as "div5")
 
-    val message = intercept[TreeNodeException[LogicalPlan]] {
-      testAnalyzer.execute(plan)
-    }.getMessage
-    assert(message.startsWith(s"Max iterations ($maxIterations) reached for batch Resolution, " +
-      s"please set '${SQLConf.ANALYZER_MAX_ITERATIONS.key}' to a larger value."))
+      val message = intercept[TreeNodeException[LogicalPlan]] {
+        testAnalyzer.execute(plan)
+      }.getMessage
+      assert(message.startsWith(s"Max iterations ($maxIterations) reached for batch Resolution, " +
+        s"please set '${SQLConf.ANALYZER_MAX_ITERATIONS.key}' to a larger value."))
+    }
   }
 
   test("SPARK-30886 Deprecate two-parameter TRIM/LTRIM/RTRIM") {
@@ -802,7 +803,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
 
       withLogAppender(logAppender) {
         val testAnalyzer1 = new Analyzer(
-          new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin, conf), conf)
+          new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin))
 
         val plan1 = testRelation2.select(
           UnresolvedFunction(f, $"a" :: Nil, isDistinct = false))
@@ -824,7 +825,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
 
         // New analyzer from new SessionState
         val testAnalyzer2 = new Analyzer(
-          new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin, conf), conf)
+          new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin))
         val plan4 = testRelation2.select(
           UnresolvedFunction(f, $"c" :: $"d" :: Nil, isDistinct = false))
         testAnalyzer2.execute(plan4)
@@ -933,9 +934,8 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     val maxIterations = 2
     val maxIterationsEnough = 5
     withSQLConf(SQLConf.ANALYZER_MAX_ITERATIONS.key -> maxIterations.toString) {
-      val conf = SQLConf.get
       val testAnalyzer = new Analyzer(
-        new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin, conf), conf)
+        new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin))
 
       val plan = testRelation2.select(
         $"a" / Literal(2) as "div1",
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
index 8c14ffffa17a5..37db4be502a83 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
@@ -34,7 +34,7 @@ trait AnalysisTest extends PlanTest {
   protected def extendedAnalysisRules: Seq[Rule[LogicalPlan]] = Nil
 
   protected def getAnalyzer: Analyzer = {
-    val catalog = new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin, conf)
+    val catalog = new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin)
     catalog.createDatabase(
       CatalogDatabase("default", "", new URI("loc"), Map.empty),
       ignoreIfExists = false)
@@ -43,7 +43,7 @@ trait AnalysisTest extends PlanTest {
     catalog.createTempView("TaBlE3", TestRelations.testRelation3, overrideIfExists = true)
     catalog.createGlobalTempView("TaBlE4", TestRelations.testRelation4, overrideIfExists = true)
     catalog.createGlobalTempView("TaBlE5", TestRelations.testRelation5, overrideIfExists = true)
-    new Analyzer(catalog, conf) {
+    new Analyzer(catalog) {
       override val extendedResolutionRules = EliminateSubqueryAliases +: extendedAnalysisRules
     }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
index 349237c2aa893..67bafbd4a8122 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
@@ -223,11 +223,11 @@ abstract class DataSourceV2StrictAnalysisSuite extends DataSourceV2AnalysisBaseS
 abstract class DataSourceV2AnalysisBaseSuite extends AnalysisTest {
 
   override def getAnalyzer: Analyzer = {
-    val catalog = new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin, conf)
+    val catalog = new SessionCatalog(new InMemoryCatalog, FunctionRegistry.builtin)
     catalog.createDatabase(
       CatalogDatabase("default", "", new URI("loc"), Map.empty),
       ignoreIfExists = false)
-    new Analyzer(catalog, conf) {
+    new Analyzer(catalog) {
       override val extendedResolutionRules = EliminateSubqueryAliases :: Nil
     }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
index d5991ff10ce6c..9892e62a9ce19 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
@@ -24,15 +24,14 @@ import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
 import org.apache.spark.sql.catalyst.expressions.aggregate._
-import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project, Union}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 
 class DecimalPrecisionSuite extends AnalysisTest with BeforeAndAfter {
-  private val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf)
-  private val analyzer = new Analyzer(catalog, conf)
+  private val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry)
+  private val analyzer = new Analyzer(catalog)
 
   private val relation = LocalRelation(
     AttributeReference("i", IntegerType)(),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/LookupFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/LookupFunctionsSuite.scala
index cea0f2a9cbc97..e0f3c9a835b6e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/LookupFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/LookupFunctionsSuite.scala
@@ -24,19 +24,17 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog,
 import org.apache.spark.sql.catalyst.expressions.Alias
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.internal.SQLConf
 
 class LookupFunctionsSuite extends PlanTest {
 
   test("SPARK-23486: the functionExists for the Persistent function check") {
     val externalCatalog = new CustomInMemoryCatalog
-    val conf = new SQLConf()
-    val catalog = new SessionCatalog(externalCatalog, FunctionRegistry.builtin, conf)
+    val catalog = new SessionCatalog(externalCatalog, FunctionRegistry.builtin)
     val analyzer = {
       catalog.createDatabase(
         CatalogDatabase("default", "", new URI("loc"), Map.empty),
         ignoreIfExists = false)
-      new Analyzer(catalog, conf)
+      new Analyzer(catalog)
     }
 
     def table(ref: String): LogicalPlan = UnresolvedRelation(TableIdentifier(ref))
@@ -56,14 +54,13 @@ class LookupFunctionsSuite extends PlanTest {
 
   test("SPARK-23486: the functionExists for the Registered function check") {
     val externalCatalog = new InMemoryCatalog
-    val conf = new SQLConf()
     val customerFunctionReg = new CustomerFunctionRegistry
-    val catalog = new SessionCatalog(externalCatalog, customerFunctionReg, conf)
+    val catalog = new SessionCatalog(externalCatalog, customerFunctionReg)
     val analyzer = {
       catalog.createDatabase(
         CatalogDatabase("default", "", new URI("loc"), Map.empty),
         ignoreIfExists = false)
-      new Analyzer(catalog, conf)
+      new Analyzer(catalog)
     }
 
     def table(ref: String): LogicalPlan = UnresolvedRelation(TableIdentifier(ref))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TableLookupCacheSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TableLookupCacheSuite.scala
index 06ea531833a43..3e9a8b71a8fb6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TableLookupCacheSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TableLookupCacheSuite.scala
@@ -29,13 +29,11 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogStorageFor
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.connector.InMemoryTableCatalog
 import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogNotFoundException, Identifier, Table, V1Table}
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 class TableLookupCacheSuite extends AnalysisTest with Matchers {
   private def getAnalyzer(externalCatalog: ExternalCatalog, databasePath: File): Analyzer = {
-    val conf = new SQLConf()
-    val v1Catalog = new SessionCatalog(externalCatalog, FunctionRegistry.builtin, conf)
+    val v1Catalog = new SessionCatalog(externalCatalog, FunctionRegistry.builtin)
     v1Catalog.createDatabase(
       CatalogDatabase("default", "", databasePath.toURI, Map.empty),
       ignoreIfExists = false)
@@ -64,7 +62,7 @@ class TableLookupCacheSuite extends AnalysisTest with Matchers {
     when(catalogManager.currentCatalog).thenReturn(v2Catalog)
     when(catalogManager.currentNamespace).thenReturn(Array("default"))
 
-    new Analyzer(catalogManager, conf)
+    new Analyzer(catalogManager)
   }
 
   test("table lookups to external catalog are cached") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index ad40cc010361c..f30ae70dceffa 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -1618,26 +1618,28 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
     import org.apache.spark.sql.catalyst.dsl.plans._
 
     Seq(true, false) foreach { caseSensitive =>
-      val conf = new SQLConf().copy(SQLConf.CASE_SENSITIVE -> caseSensitive)
-      val catalog = new SessionCatalog(newBasicCatalog(), new SimpleFunctionRegistry, conf)
-      catalog.setCurrentDatabase("db1")
-      try {
-        val analyzer = new Analyzer(catalog, conf)
-
-        // The analyzer should report the undefined function rather than the undefined table first.
-        val cause = intercept[AnalysisException] {
-          analyzer.execute(
-            UnresolvedRelation(TableIdentifier("undefined_table")).select(
-              UnresolvedFunction("undefined_fn", Nil, isDistinct = false)
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        val catalog = new SessionCatalog(newBasicCatalog(), new SimpleFunctionRegistry)
+        catalog.setCurrentDatabase("db1")
+        try {
+          val analyzer = new Analyzer(catalog)
+
+          // The analyzer should report the undefined function
+          // rather than the undefined table first.
+          val cause = intercept[AnalysisException] {
+            analyzer.execute(
+              UnresolvedRelation(TableIdentifier("undefined_table")).select(
+                UnresolvedFunction("undefined_fn", Nil, isDistinct = false)
+              )
             )
-          )
-        }
+          }
 
-        assert(cause.getMessage.contains("Undefined function: 'undefined_fn'"))
-        // SPARK-21318: the error message should contains the current database name
-        assert(cause.getMessage.contains("db1"))
-      } finally {
-        catalog.reset()
+          assert(cause.getMessage.contains("Undefined function: 'undefined_fn'"))
+          // SPARK-21318: the error message should contains the current database name
+          assert(cause.getMessage.contains("db1"))
+        } finally {
+          catalog.reset()
+        }
       }
     }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
index 03d75340e31e9..04dcf50e0c3c5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
@@ -26,7 +26,6 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.BooleanType
 
 class BooleanSimplificationSuite extends PlanTest with ExpressionEvalHelper with PredicateHelper {
@@ -188,25 +187,23 @@ class BooleanSimplificationSuite extends PlanTest with ExpressionEvalHelper with
     checkCondition(!(('e || 'f) && ('g || 'h)), (!'e && !'f) || (!'g && !'h))
   }
 
-  private val caseInsensitiveConf = new SQLConf().copy(SQLConf.CASE_SENSITIVE -> false)
-  private val caseInsensitiveAnalyzer = new Analyzer(
-    new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, caseInsensitiveConf),
-    caseInsensitiveConf)
+  private val analyzer = new Analyzer(
+    new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry))
 
   test("(a && b) || (a && c) => a && (b || c) when case insensitive") {
-    val plan = caseInsensitiveAnalyzer.execute(
+    val plan = analyzer.execute(
       testRelation.where(('a > 2 && 'b > 3) || ('A > 2 && 'b < 5)))
     val actual = Optimize.execute(plan)
-    val expected = caseInsensitiveAnalyzer.execute(
+    val expected = analyzer.execute(
       testRelation.where('a > 2 && ('b > 3 || 'b < 5)))
     comparePlans(actual, expected)
   }
 
   test("(a || b) && (a || c) => a || (b && c) when case insensitive") {
-    val plan = caseInsensitiveAnalyzer.execute(
+    val plan = analyzer.execute(
       testRelation.where(('a > 2 || 'b > 3) && ('A > 2 || 'b < 5)))
     val actual = Optimize.execute(plan)
-    val expected = caseInsensitiveAnalyzer.execute(
+    val expected = analyzer.execute(
       testRelation.where('a > 2 || ('b > 3 && 'b < 5)))
     comparePlans(actual, expected)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsBeforeRepartitionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsBeforeRepartitionSuite.scala
index 9f031358611b1..82db174ad41b0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsBeforeRepartitionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsBeforeRepartitionSuite.scala
@@ -27,8 +27,8 @@ import org.apache.spark.sql.catalyst.rules.RuleExecutor
 
 class EliminateSortsBeforeRepartitionSuite extends PlanTest {
 
-  val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf)
-  val analyzer = new Analyzer(catalog, conf)
+  val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry)
+  val analyzer = new Analyzer(catalog)
 
   val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
   val anotherTestRelation = LocalRelation('d.int, 'e.int)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
index 5998437f11f4d..42ab43242a16b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
@@ -27,7 +27,6 @@ import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LocalRelation, LogicalPlan, OneRowRelation, Project}
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.connector.catalog.CatalogManager
-import org.apache.spark.sql.internal.SQLConf
 
 
 class OptimizerStructuralIntegrityCheckerSuite extends PlanTest {
@@ -45,9 +44,8 @@ class OptimizerStructuralIntegrityCheckerSuite extends PlanTest {
 
   object Optimize extends Optimizer(
     new CatalogManager(
-      new SQLConf(),
       FakeV2SessionCatalog,
-      new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, new SQLConf()))) {
+      new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry))) {
     val newBatch = Batch("OptimizeRuleBreakSI", Once, OptimizeRuleBreakSI)
     override def defaultBatches: Seq[Batch] = Seq(newBatch) ++ super.defaultBatches
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregatesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregatesSuite.scala
index 8cb939e010c68..5d6abf516f288 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregatesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregatesSuite.scala
@@ -16,23 +16,15 @@
  */
 package org.apache.spark.sql.catalyst.optimizer
 
-import org.apache.spark.sql.catalyst.analysis.{Analyzer, EmptyFunctionRegistry}
-import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.catalyst.expressions.aggregate.CollectSet
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Expand, LocalRelation, LogicalPlan}
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.SQLConf.{CASE_SENSITIVE, GROUP_BY_ORDINAL}
 import org.apache.spark.sql.types.{IntegerType, StringType}
 
 class RewriteDistinctAggregatesSuite extends PlanTest {
-  override val conf = new SQLConf().copy(CASE_SENSITIVE -> false, GROUP_BY_ORDINAL -> false)
-  val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf)
-  val analyzer = new Analyzer(catalog, conf)
-
   val nullInt = Literal(null, IntegerType)
   val nullString = Literal(null, StringType)
   val testRelation = LocalRelation('a.string, 'b.string, 'c.string, 'd.string, 'e.int)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
index 6ad132cdfe449..7c70ab98e4183 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
@@ -22,6 +22,7 @@ import org.scalatest.Suite
 import org.scalatest.Tag
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode
@@ -56,10 +57,7 @@ trait CodegenInterpretedPlanTest extends PlanTest {
  * Provides helper methods for comparing plans, but without the overhead of
  * mandating a FunSuite.
  */
-trait PlanTestBase extends PredicateHelper with SQLHelper { self: Suite =>
-
-  // TODO(gatorsmile): remove this from PlanTest and all the analyzer rules
-  protected def conf = SQLConf.get
+trait PlanTestBase extends PredicateHelper with SQLHelper with SQLConfHelper { self: Suite =>
 
   /**
    * Since attribute references are given globally unique ids during analysis,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala
index 7dd0753fcf777..aec361b9799cc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala
@@ -24,76 +24,77 @@ import scala.collection.JavaConverters._
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.analysis.{EmptyFunctionRegistry, FakeV2SessionCatalog, NoSuchNamespaceException}
 import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog, SessionCatalog}
+import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.connector.InMemoryTableCatalog
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
-class CatalogManagerSuite extends SparkFunSuite {
+class CatalogManagerSuite extends SparkFunSuite with SQLHelper {
 
-  private def createSessionCatalog(conf: SQLConf): SessionCatalog = {
+  private def createSessionCatalog(): SessionCatalog = {
     val catalog = new InMemoryCatalog()
     catalog.createDatabase(
       CatalogDatabase(SessionCatalog.DEFAULT_DATABASE, "", new URI("fake"), Map.empty),
       ignoreIfExists = true)
-    new SessionCatalog(catalog, EmptyFunctionRegistry, conf)
+    new SessionCatalog(catalog, EmptyFunctionRegistry)
   }
 
   test("CatalogManager should reflect the changes of default catalog") {
-    val conf = new SQLConf
-    val catalogManager = new CatalogManager(conf, FakeV2SessionCatalog, createSessionCatalog(conf))
+    val catalogManager = new CatalogManager(FakeV2SessionCatalog, createSessionCatalog())
     assert(catalogManager.currentCatalog.name() == CatalogManager.SESSION_CATALOG_NAME)
     assert(catalogManager.currentNamespace.sameElements(Array("default")))
 
-    conf.setConfString("spark.sql.catalog.dummy", classOf[DummyCatalog].getName)
-    conf.setConfString(SQLConf.DEFAULT_CATALOG.key, "dummy")
-
-    // The current catalog should be changed if the default catalog is set.
-    assert(catalogManager.currentCatalog.name() == "dummy")
-    assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
+    withSQLConf("spark.sql.catalog.dummy" -> classOf[DummyCatalog].getName,
+      SQLConf.DEFAULT_CATALOG.key -> "dummy") {
+      // The current catalog should be changed if the default catalog is set.
+      assert(catalogManager.currentCatalog.name() == "dummy")
+      assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
+    }
   }
 
   test("CatalogManager should keep the current catalog once set") {
-    val conf = new SQLConf
-    val catalogManager = new CatalogManager(conf, FakeV2SessionCatalog, createSessionCatalog(conf))
+    val catalogManager = new CatalogManager(FakeV2SessionCatalog, createSessionCatalog())
     assert(catalogManager.currentCatalog.name() == CatalogManager.SESSION_CATALOG_NAME)
-    conf.setConfString("spark.sql.catalog.dummy", classOf[DummyCatalog].getName)
-    catalogManager.setCurrentCatalog("dummy")
-    assert(catalogManager.currentCatalog.name() == "dummy")
-    assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
-
-    conf.setConfString("spark.sql.catalog.dummy2", classOf[DummyCatalog].getName)
-    conf.setConfString(SQLConf.DEFAULT_CATALOG.key, "dummy2")
-    // The current catalog shouldn't be changed if it's set before.
-    assert(catalogManager.currentCatalog.name() == "dummy")
+    withSQLConf("spark.sql.catalog.dummy" -> classOf[DummyCatalog].getName) {
+      catalogManager.setCurrentCatalog("dummy")
+      assert(catalogManager.currentCatalog.name() == "dummy")
+      assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
+
+      withSQLConf("spark.sql.catalog.dummy2" -> classOf[DummyCatalog].getName,
+        SQLConf.DEFAULT_CATALOG.key -> "dummy2") {
+        // The current catalog shouldn't be changed if it's set before.
+        assert(catalogManager.currentCatalog.name() == "dummy")
+      }
+    }
   }
 
   test("current namespace should be updated when switching current catalog") {
-    val conf = new SQLConf
-    val catalogManager = new CatalogManager(conf, FakeV2SessionCatalog, createSessionCatalog(conf))
-    conf.setConfString("spark.sql.catalog.dummy", classOf[DummyCatalog].getName)
-    catalogManager.setCurrentCatalog("dummy")
-    assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
-    catalogManager.setCurrentNamespace(Array("a"))
-    assert(catalogManager.currentNamespace.sameElements(Array("a")))
-
-    // If we set current catalog to the same catalog, current namespace should stay the same.
-    catalogManager.setCurrentCatalog("dummy")
-    assert(catalogManager.currentNamespace.sameElements(Array("a")))
-
-    // If we switch to a different catalog, current namespace should be reset.
-    conf.setConfString("spark.sql.catalog.dummy2", classOf[DummyCatalog].getName)
-    catalogManager.setCurrentCatalog("dummy2")
-    assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
+    val catalogManager = new CatalogManager(FakeV2SessionCatalog, createSessionCatalog())
+    withSQLConf("spark.sql.catalog.dummy" -> classOf[DummyCatalog].getName) {
+      catalogManager.setCurrentCatalog("dummy")
+      assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
+      catalogManager.setCurrentNamespace(Array("a"))
+      assert(catalogManager.currentNamespace.sameElements(Array("a")))
+
+      // If we set current catalog to the same catalog, current namespace should stay the same.
+      catalogManager.setCurrentCatalog("dummy")
+      assert(catalogManager.currentNamespace.sameElements(Array("a")))
+
+      // If we switch to a different catalog, current namespace should be reset.
+      withSQLConf("spark.sql.catalog.dummy2" -> classOf[DummyCatalog].getName) {
+        catalogManager.setCurrentCatalog("dummy2")
+        assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
+      }
+    }
   }
 
   test("set current namespace") {
-    val conf = new SQLConf
-    val v1SessionCatalog = createSessionCatalog(conf)
+    val v1SessionCatalog = createSessionCatalog()
     v1SessionCatalog.createDatabase(
       CatalogDatabase(
         "test", "", v1SessionCatalog.getDefaultDBPath("test"), Map.empty),
       ignoreIfExists = false)
-    val catalogManager = new CatalogManager(conf, FakeV2SessionCatalog, v1SessionCatalog)
+    val catalogManager = new CatalogManager(FakeV2SessionCatalog, v1SessionCatalog)
 
     // If the current catalog is session catalog, setting current namespace actually sets
     // `SessionCatalog.currentDb`.
@@ -106,23 +107,25 @@ class CatalogManagerSuite extends SparkFunSuite {
     }
 
     // when switching current catalog, `SessionCatalog.currentDb` should be reset.
-    conf.setConfString("spark.sql.catalog.dummy", classOf[DummyCatalog].getName)
-    catalogManager.setCurrentCatalog("dummy")
-    assert(v1SessionCatalog.getCurrentDatabase == "default")
-    catalogManager.setCurrentNamespace(Array("test2"))
-    assert(v1SessionCatalog.getCurrentDatabase == "default")
-
-    // Check namespace existence if currentCatalog implements SupportsNamespaces.
-    conf.setConfString("spark.sql.catalog.testCatalog", classOf[InMemoryTableCatalog].getName)
-    catalogManager.setCurrentCatalog("testCatalog")
-    catalogManager.currentCatalog.asInstanceOf[InMemoryTableCatalog]
-      .createNamespace(Array("test3"), Map.empty[String, String].asJava)
-    assert(v1SessionCatalog.getCurrentDatabase == "default")
-    catalogManager.setCurrentNamespace(Array("test3"))
-    assert(v1SessionCatalog.getCurrentDatabase == "default")
-
-    intercept[NoSuchNamespaceException] {
-      catalogManager.setCurrentNamespace(Array("ns1", "ns2"))
+    withSQLConf("spark.sql.catalog.dummy" -> classOf[DummyCatalog].getName) {
+      catalogManager.setCurrentCatalog("dummy")
+      assert(v1SessionCatalog.getCurrentDatabase == "default")
+      catalogManager.setCurrentNamespace(Array("test2"))
+      assert(v1SessionCatalog.getCurrentDatabase == "default")
+
+      // Check namespace existence if currentCatalog implements SupportsNamespaces.
+      withSQLConf("spark.sql.catalog.testCatalog" -> classOf[InMemoryTableCatalog].getName) {
+        catalogManager.setCurrentCatalog("testCatalog")
+        catalogManager.currentCatalog.asInstanceOf[InMemoryTableCatalog]
+          .createNamespace(Array("test3"), Map.empty[String, String].asJava)
+        assert(v1SessionCatalog.getCurrentDatabase == "default")
+        catalogManager.setCurrentNamespace(Array("test3"))
+        assert(v1SessionCatalog.getCurrentDatabase == "default")
+
+        intercept[NoSuchNamespaceException] {
+          catalogManager.setCurrentNamespace(Array("ns1", "ns2"))
+        }
+      }
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala
index 895eeedd86b8b..c88fcecc9983b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala
@@ -18,18 +18,15 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.adaptive.LogicalQueryStageStrategy
 import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, FileSourceStrategy}
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Strategy
-import org.apache.spark.sql.internal.SQLConf
 
-class SparkPlanner(
-    val session: SparkSession,
-    val conf: SQLConf,
-    val experimentalMethods: ExperimentalMethods)
-  extends SparkStrategies {
+class SparkPlanner(val session: SparkSession, val experimentalMethods: ExperimentalMethods)
+  extends SparkStrategies with SQLConfHelper {
 
   def numPartitions: Int = conf.numShufflePartitions
 
@@ -40,7 +37,7 @@ class SparkPlanner(
       PythonEvals ::
       new DataSourceV2Strategy(session) ::
       FileSourceStrategy ::
-      DataSourceStrategy(conf) ::
+      DataSourceStrategy ::
       SpecialLimits ::
       Aggregation ::
       Window ::
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandCheck.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandCheck.scala
index dedace4af4d14..216636c7ea14f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandCheck.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandCheck.scala
@@ -17,14 +17,14 @@
 
 package org.apache.spark.sql.execution.command
 
+import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.util.SchemaUtils
 
 /**
  * Checks legitimization of various execution commands.
  */
-case class CommandCheck(conf: SQLConf) extends (LogicalPlan => Unit) {
+object CommandCheck extends (LogicalPlan => Unit) with SQLConfHelper {
 
   override def apply(plan: LogicalPlan): Unit = {
     plan.foreach {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 822bdbdad8f00..361d1fab03421 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.datasources
 
 import java.util.Locale
 
-import scala.collection.JavaConverters._
 import scala.collection.mutable
 
 import org.apache.hadoop.fs.Path
@@ -27,7 +26,7 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, QualifiedTableName}
+import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, QualifiedTableName, SQLConfHelper}
 import org.apache.spark.sql.catalyst.CatalystTypeConverters.convertToScala
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog._
@@ -42,9 +41,7 @@ import org.apache.spark.sql.connector.catalog.SupportsRead
 import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.execution.{RowDataSourceScanExec, SparkPlan}
 import org.apache.spark.sql.execution.command._
-import org.apache.spark.sql.execution.datasources.FileSourceStrategy.{extractPredicatesWithinOutputSet, logInfo}
 import org.apache.spark.sql.execution.streaming.StreamingRelation
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
@@ -314,8 +311,8 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan]
 /**
  * A Strategy for planning scans over data sources defined using the sources API.
  */
-case class DataSourceStrategy(conf: SQLConf) extends Strategy with Logging with CastSupport {
-  import DataSourceStrategy._
+object DataSourceStrategy
+  extends Strategy with Logging with CastSupport with PredicateHelper with SQLConfHelper {
 
   def apply(plan: LogicalPlan): Seq[execution.SparkPlan] = plan match {
     case ScanOperation(projects, filters, l @ LogicalRelation(t: CatalystScan, _, _, _)) =>
@@ -466,9 +463,7 @@ case class DataSourceStrategy(conf: SQLConf) extends Strategy with Logging with
   private[this] def toCatalystRDD(relation: LogicalRelation, rdd: RDD[Row]): RDD[InternalRow] = {
     toCatalystRDD(relation, relation.output, rdd)
   }
-}
 
-object DataSourceStrategy extends PredicateHelper {
   /**
    * The attribute name may differ from the one in the schema if the query analyzer
    * is case insensitive. We should change attribute names to match the ones in the schema,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
index 6dda1d4aaf37e..9ee145580ce6d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
@@ -23,22 +23,21 @@ import java.util
 import scala.collection.JavaConverters._
 import scala.collection.mutable
 
-import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.{SQLConfHelper, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.{NamespaceAlreadyExistsException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogDatabase, CatalogTable, CatalogTableType, CatalogUtils, SessionCatalog}
 import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogV2Util, Identifier, NamespaceChange, SupportsNamespaces, Table, TableCatalog, TableChange, V1Table}
 import org.apache.spark.sql.connector.catalog.NamespaceChange.RemoveProperty
 import org.apache.spark.sql.connector.expressions.{BucketTransform, FieldReference, IdentityTransform, Transform}
 import org.apache.spark.sql.execution.datasources.DataSource
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
  * A [[TableCatalog]] that translates calls to the v1 SessionCatalog.
  */
-class V2SessionCatalog(catalog: SessionCatalog, conf: SQLConf)
-  extends TableCatalog with SupportsNamespaces {
+class V2SessionCatalog(catalog: SessionCatalog)
+  extends TableCatalog with SupportsNamespaces with SQLConfHelper {
   import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper
   import V2SessionCatalog._
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
index bfa60cf7dfd78..b871874f52967 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
@@ -21,14 +21,13 @@ import java.util.UUID
 import java.util.concurrent.atomic.AtomicInteger
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{AnalysisException, SparkSession, Strategy}
+import org.apache.spark.sql.{SparkSession, Strategy}
 import org.apache.spark.sql.catalyst.QueryPlanningTracker
 import org.apache.spark.sql.catalyst.expressions.{CurrentBatchTimestamp, ExpressionWithRandomSeed}
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, HashPartitioning, SinglePartition}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.execution.{LeafExecNode, LocalLimitExec, QueryExecution, SparkPlan, SparkPlanner, UnaryExecNode}
-import org.apache.spark.sql.execution.exchange.{ShuffleExchangeExec, ShuffleExchangeLike}
+import org.apache.spark.sql.execution.{LocalLimitExec, QueryExecution, SparkPlan, SparkPlanner, UnaryExecNode}
+import org.apache.spark.sql.execution.exchange.ShuffleExchangeLike
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.util.Utils
@@ -51,7 +50,6 @@ class IncrementalExecution(
   // Modified planner with stateful operations.
   override val planner: SparkPlanner = new SparkPlanner(
       sparkSession,
-      sparkSession.sessionState.conf,
       sparkSession.sessionState.experimentalMethods) {
     override def strategies: Seq[Strategy] =
       extraPlanningStrategies ++
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 33c15ec76654d..538a5408723bb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -150,7 +150,6 @@ abstract class BaseSessionStateBuilder(
       () => session.sharedState.externalCatalog,
       () => session.sharedState.globalTempViewManager,
       functionRegistry,
-      conf,
       SessionState.newHadoopConf(session.sparkContext.hadoopConfiguration, conf),
       sqlParser,
       resourceLoader)
@@ -158,9 +157,9 @@ abstract class BaseSessionStateBuilder(
     catalog
   }
 
-  protected lazy val v2SessionCatalog = new V2SessionCatalog(catalog, conf)
+  protected lazy val v2SessionCatalog = new V2SessionCatalog(catalog)
 
-  protected lazy val catalogManager = new CatalogManager(conf, v2SessionCatalog, catalog)
+  protected lazy val catalogManager = new CatalogManager(v2SessionCatalog, catalog)
 
   /**
    * Interface exposed to the user for registering user-defined functions.
@@ -175,7 +174,7 @@ abstract class BaseSessionStateBuilder(
    *
    * Note: this depends on the `conf` and `catalog` fields.
    */
-  protected def analyzer: Analyzer = new Analyzer(catalogManager, conf) {
+  protected def analyzer: Analyzer = new Analyzer(catalogManager) {
     override val extendedResolutionRules: Seq[Rule[LogicalPlan]] =
       new FindDataSourceTable(session) +:
         new ResolveSQLOnFile(session) +:
@@ -197,7 +196,7 @@ abstract class BaseSessionStateBuilder(
         PreReadCheck +:
         HiveOnlyCheck +:
         TableCapabilityCheck +:
-        CommandCheck(conf) +:
+        CommandCheck +:
         customCheckRules
   }
 
@@ -270,7 +269,7 @@ abstract class BaseSessionStateBuilder(
    * Note: this depends on the `conf` and `experimentalMethods` fields.
    */
   protected def planner: SparkPlanner = {
-    new SparkPlanner(session, conf, experimentalMethods) {
+    new SparkPlanner(session, experimentalMethods) {
       override def extraPlanningStrategies: Seq[Strategy] =
         super.extraPlanningStrategies ++ customPlanningStrategies
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
index 2b9c574aaaf0c..248dfa107bc4b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.internal
 
 import org.apache.spark.internal.config._
+import org.apache.spark.sql.catalyst.SQLConfHelper
 
 /**
  * A helper class that enables substitution using syntax like
@@ -25,9 +26,7 @@ import org.apache.spark.internal.config._
  *
  * Variable substitution is controlled by `SQLConf.variableSubstituteEnabled`.
  */
-class VariableSubstitution {
-
-  private def conf = SQLConf.get
+class VariableSubstitution extends SQLConfHelper {
 
   private val provider = new ConfigProvider {
     override def get(key: String): Option[String] = Option(conf.getConfString(key, ""))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index d5820b016736a..f5809ebbb836e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -151,7 +151,7 @@ class PlanResolutionSuite extends AnalysisTest {
     } else {
       catalogManagerWithoutDefault
     }
-    val analyzer = new Analyzer(catalogManager, conf)
+    val analyzer = new Analyzer(catalogManager)
     // TODO: run the analyzer directly.
     val rules = Seq(
       CTESubstitution,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
index c3bcf86c1ed27..1a4f08418f8d3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
@@ -29,7 +29,7 @@ import org.scalatest.BeforeAndAfter
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{NamespaceAlreadyExistsException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, NamespaceChange, SupportsNamespaces, TableCatalog, TableChange, V1Table}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, NamespaceChange, TableCatalog, TableChange, V1Table}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{DoubleType, IntegerType, LongType, StringType, StructField, StructType, TimestampType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -46,7 +46,7 @@ abstract class V2SessionCatalogBaseSuite extends SharedSparkSession with BeforeA
   val testIdent: Identifier = Identifier.of(testNs, "test_table")
 
   def newCatalog(): V2SessionCatalog = {
-    val newCatalog = new V2SessionCatalog(spark.sessionState.catalog, spark.sessionState.conf)
+    val newCatalog = new V2SessionCatalog(spark.sessionState.catalog)
     newCatalog.initialize("test", CaseInsensitiveStringMap.empty())
     newCatalog
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 8a248a251820f..f60bad180a710 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -34,7 +34,6 @@ import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions.{Cast, Expression}
 import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.hive.HiveShim.HiveFunctionWrapper
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DecimalType, DoubleType}
 import org.apache.spark.util.Utils
 
@@ -44,7 +43,6 @@ private[sql] class HiveSessionCatalog(
     globalTempViewManagerBuilder: () => GlobalTempViewManager,
     val metastoreCatalog: HiveMetastoreCatalog,
     functionRegistry: FunctionRegistry,
-    conf: SQLConf,
     hadoopConf: Configuration,
     parser: ParserInterface,
     functionResourceLoader: FunctionResourceLoader)
@@ -52,7 +50,6 @@ private[sql] class HiveSessionCatalog(
       externalCatalogBuilder,
       globalTempViewManagerBuilder,
       functionRegistry,
-      conf,
       hadoopConf,
       parser,
       functionResourceLoader) {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index f79aaa464de81..b30492802495f 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -62,7 +62,6 @@ class HiveSessionStateBuilder(
       () => session.sharedState.globalTempViewManager,
       new HiveMetastoreCatalog(session),
       functionRegistry,
-      conf,
       SessionState.newHadoopConf(session.sparkContext.hadoopConfiguration, conf),
       sqlParser,
       resourceLoader)
@@ -73,7 +72,7 @@ class HiveSessionStateBuilder(
   /**
    * A logical query plan `Analyzer` with rules specific to Hive.
    */
-  override protected def analyzer: Analyzer = new Analyzer(catalogManager, conf) {
+  override protected def analyzer: Analyzer = new Analyzer(catalogManager) {
     override val extendedResolutionRules: Seq[Rule[LogicalPlan]] =
       new ResolveHiveSerdeTable(session) +:
         new FindDataSourceTable(session) +:
@@ -98,7 +97,7 @@ class HiveSessionStateBuilder(
       PreWriteCheck +:
         PreReadCheck +:
         TableCapabilityCheck +:
-        CommandCheck(conf) +:
+        CommandCheck +:
         customCheckRules
   }
 
@@ -109,7 +108,7 @@ class HiveSessionStateBuilder(
    * Planner that takes into account Hive-specific strategies.
    */
   override protected def planner: SparkPlanner = {
-    new SparkPlanner(session, conf, experimentalMethods) with HiveStrategies {
+    new SparkPlanner(session, experimentalMethods) with HiveStrategies {
       override val sparkSession: SparkSession = session
 
       override def extraPlanningStrategies: Seq[Strategy] =
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index 3e0d44160c8a1..eb9ce877fc8d2 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -39,7 +39,7 @@ import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.{EmptyRDD, HadoopRDD, NewHadoopRDD, RDD, UnionRDD}
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.{InternalRow, SQLConfHelper}
 import org.apache.spark.sql.catalyst.analysis.CastSupport
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
@@ -68,7 +68,7 @@ class HadoopTableReader(
     @transient private val tableDesc: TableDesc,
     @transient private val sparkSession: SparkSession,
     hadoopConf: Configuration)
-  extends TableReader with CastSupport with Logging {
+  extends TableReader with CastSupport with SQLConfHelper with Logging {
 
   // Hadoop honors "mapreduce.job.maps" as hint,
   // but will ignore when mapreduce.jobtracker.address is "local".

From 6883f29465f67b5c052a3e6d137bec250d5cc8ef Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 16 Nov 2020 16:11:42 +0000
Subject: [PATCH 0488/1009] [SPARK-33453][SQL][TESTS] Unify v1 and v2 SHOW
 PARTITIONS tests

### What changes were proposed in this pull request?
1. Move `SHOW PARTITIONS` parsing tests to `ShowPartitionsParserSuite`
2. Place Hive tests for `SHOW PARTITIONS` from `HiveCommandSuite` to the base test suite `v1.ShowPartitionsSuiteBase`. This will allow to run the tests w/ and w/o Hive.

The changes follow the approach of https://github.com/apache/spark/pull/30287.

### Why are the changes needed?
- The unification will allow to run common `SHOW PARTITIONS` tests for both DSv1 and Hive DSv1, DSv2
- We can detect missing features and differences between DSv1 and DSv2 implementations.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running:
- new test suites `build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *ShowPartitionsSuite"`
- and old one `build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly org.apache.spark.sql.hive.execution.HiveCommandSuite"`

Closes #30377 from MaxGekk/unify-dsv1_v2-show-partitions-tests.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/parser/DDLParserSuite.scala  |  26 ---
 .../sql/connector/DataSourceV2SQLSuite.scala  |  15 --
 .../spark/sql/execution/SQLViewSuite.scala    |   1 -
 .../execution/command/DDLParserSuite.scala    |   8 -
 .../command/ShowPartitionsParserSuite.scala   |  52 +++++
 .../command/ShowPartitionsSuiteBase.scala     |  36 ++++
 .../command/v1/ShowPartitionsSuite.scala      | 184 ++++++++++++++++++
 .../command/v2/ShowPartitionsSuite.scala      |  56 ++++++
 .../sql/hive/execution/HiveCommandSuite.scala | 104 +---------
 .../command/ShowPartitionsSuite.scala         |  26 +++
 10 files changed, 355 insertions(+), 153 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsParserSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index cddc392cfa2d7..4ac5c8d0561d9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -1629,32 +1629,6 @@ class DDLParserSuite extends AnalysisTest {
       TruncateTableStatement(Seq("a", "b", "c"), Some(Map("ds" -> "2017-06-10"))))
   }
 
-  test("SHOW PARTITIONS") {
-    val sql1 = "SHOW PARTITIONS t1"
-    val sql2 = "SHOW PARTITIONS db1.t1"
-    val sql3 = "SHOW PARTITIONS t1 PARTITION(partcol1='partvalue', partcol2='partvalue')"
-    val sql4 = "SHOW PARTITIONS a.b.c"
-    val sql5 = "SHOW PARTITIONS a.b.c PARTITION(ds='2017-06-10')"
-
-    val parsed1 = parsePlan(sql1)
-    val expected1 = ShowPartitionsStatement(Seq("t1"), None)
-    val parsed2 = parsePlan(sql2)
-    val expected2 = ShowPartitionsStatement(Seq("db1", "t1"), None)
-    val parsed3 = parsePlan(sql3)
-    val expected3 = ShowPartitionsStatement(Seq("t1"),
-      Some(Map("partcol1" -> "partvalue", "partcol2" -> "partvalue")))
-    val parsed4 = parsePlan(sql4)
-    val expected4 = ShowPartitionsStatement(Seq("a", "b", "c"), None)
-    val parsed5 = parsePlan(sql5)
-    val expected5 = ShowPartitionsStatement(Seq("a", "b", "c"), Some(Map("ds" -> "2017-06-10")))
-
-    comparePlans(parsed1, expected1)
-    comparePlans(parsed2, expected2)
-    comparePlans(parsed3, expected3)
-    comparePlans(parsed4, expected4)
-    comparePlans(parsed5, expected5)
-  }
-
   test("REFRESH TABLE") {
     comparePlans(
       parsePlan("REFRESH TABLE a.b.c"),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index db3f11dbda51a..5f7be7c4c565b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -1909,21 +1909,6 @@ class DataSourceV2SQLSuite
     }
   }
 
-  test("SHOW PARTITIONS") {
-    val t = "testcat.ns1.ns2.tbl"
-    withTable(t) {
-      sql(
-        s"""
-           |CREATE TABLE $t (id bigint, data string)
-           |USING foo
-           |PARTITIONED BY (id)
-         """.stripMargin)
-
-      testV1Command("SHOW PARTITIONS", t)
-      testV1Command("SHOW PARTITIONS", s"$t PARTITION(id='1')")
-    }
-  }
-
   test("LOAD DATA INTO TABLE") {
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index 87a5cb9f73355..792f920ee0217 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -181,7 +181,6 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         sql(s"SHOW CREATE TABLE $viewName")
       }.getMessage
       assert(e3.contains(s"$viewName is a temp view not table or permanent view"))
-      assertNoSuchTable(s"SHOW PARTITIONS $viewName")
       val e4 = intercept[AnalysisException] {
         sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS")
       }.getMessage
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
index 7fd7040f0f51d..8ce4bcbadc223 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
@@ -359,14 +359,6 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     assert(e.contains("Found duplicate keys 'a'"))
   }
 
-  test("empty values in non-optional partition specs") {
-    val e = intercept[ParseException] {
-      parser.parsePlan(
-        "SHOW PARTITIONS dbx.tab1 PARTITION (a='1', b)")
-    }.getMessage
-    assert(e.contains("Found an empty partition key 'b'"))
-  }
-
   test("Test CTAS #1") {
     val s1 =
       """
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsParserSuite.scala
new file mode 100644
index 0000000000000..bc75528b9644c
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsParserSuite.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.catalyst.analysis.AnalysisTest
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
+import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.catalyst.plans.logical.ShowPartitionsStatement
+import org.apache.spark.sql.execution.SparkSqlParser
+import org.apache.spark.sql.test.SharedSparkSession
+
+class ShowPartitionsParserSuite extends AnalysisTest with SharedSparkSession {
+  test("SHOW PARTITIONS") {
+    Seq(
+      "SHOW PARTITIONS t1" -> ShowPartitionsStatement(Seq("t1"), None),
+      "SHOW PARTITIONS db1.t1" -> ShowPartitionsStatement(Seq("db1", "t1"), None),
+      "SHOW PARTITIONS t1 PARTITION(partcol1='partvalue', partcol2='partvalue')" ->
+        ShowPartitionsStatement(
+          Seq("t1"),
+          Some(Map("partcol1" -> "partvalue", "partcol2" -> "partvalue"))),
+      "SHOW PARTITIONS a.b.c" -> ShowPartitionsStatement(Seq("a", "b", "c"), None),
+      "SHOW PARTITIONS a.b.c PARTITION(ds='2017-06-10')" ->
+        ShowPartitionsStatement(Seq("a", "b", "c"), Some(Map("ds" -> "2017-06-10")))
+    ).foreach { case (sql, expected) =>
+      val parsed = parsePlan(sql)
+      comparePlans(parsed, expected)
+    }
+  }
+
+  test("empty values in non-optional partition specs") {
+    val e = intercept[ParseException] {
+      new SparkSqlParser().parsePlan(
+        "SHOW PARTITIONS dbx.tab1 PARTITION (a='1', b)")
+    }.getMessage
+    assert(e.contains("Found an empty partition key 'b'"))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
new file mode 100644
index 0000000000000..413e170326eea
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.scalactic.source.Position
+import org.scalatest.Tag
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.test.SQLTestUtils
+
+trait ShowPartitionsSuiteBase extends QueryTest with SQLTestUtils {
+  protected def version: String
+  protected def catalog: String
+  protected def defaultNamespace: Seq[String]
+  protected def defaultUsing: String
+
+  override def test(testName: String, testTags: Tag*)(testFun: => Any)
+      (implicit pos: Position): Unit = {
+    super.test(s"SHOW PARTITIONS $version: " + testName, testTags: _*)(testFun)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
new file mode 100644
index 0000000000000..bcc71e9b7241c
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import org.apache.spark.sql.{AnalysisException, Row, SaveMode}
+import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
+import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.execution.command
+import org.apache.spark.sql.test.SharedSparkSession
+
+trait ShowPartitionsSuiteBase extends command.ShowPartitionsSuiteBase {
+  override def version: String = "V1"
+  override def catalog: String = CatalogManager.SESSION_CATALOG_NAME
+  override def defaultNamespace: Seq[String] = Seq("default")
+  override def defaultUsing: String = "USING parquet"
+
+  private def createDateTable(table: String): Unit = {
+    sql(s"""
+      |CREATE TABLE $table (price int, qty int, year int, month int)
+      |$defaultUsing
+      |partitioned by (year, month)""".stripMargin)
+    sql(s"INSERT INTO $table PARTITION(year = 2015, month = 1) SELECT 1, 1")
+    sql(s"INSERT INTO $table PARTITION(year = 2015, month = 2) SELECT 2, 2")
+    sql(s"INSERT INTO $table PARTITION(year = 2016, month = 2) SELECT 3, 3")
+    sql(s"INSERT INTO $table PARTITION(year = 2016, month = 3) SELECT 3, 3")
+  }
+
+  test("show everything") {
+    val table = "dateTable"
+    withTable(table) {
+      createDateTable(table)
+      checkAnswer(
+        sql(s"show partitions $table"),
+        Row("year=2015/month=1") ::
+          Row("year=2015/month=2") ::
+          Row("year=2016/month=2") ::
+          Row("year=2016/month=3") :: Nil)
+
+      checkAnswer(
+        sql(s"show partitions default.$table"),
+        Row("year=2015/month=1") ::
+          Row("year=2015/month=2") ::
+          Row("year=2016/month=2") ::
+          Row("year=2016/month=3") :: Nil)
+    }
+  }
+
+  test("filter by partitions") {
+    val table = "dateTable"
+    withTable(table) {
+      createDateTable(table)
+      checkAnswer(
+        sql(s"show partitions default.$table PARTITION(year=2015)"),
+        Row("year=2015/month=1") ::
+          Row("year=2015/month=2") :: Nil)
+      checkAnswer(
+        sql(s"show partitions default.$table PARTITION(year=2015, month=1)"),
+        Row("year=2015/month=1") :: Nil)
+      checkAnswer(
+        sql(s"show partitions default.$table PARTITION(month=2)"),
+        Row("year=2015/month=2") ::
+          Row("year=2016/month=2") :: Nil)
+    }
+  }
+
+  test("show everything more than 5 part keys") {
+    val table = "wideTable"
+    withTable(table) {
+      sql(s"""
+        |CREATE TABLE $table (
+        |  price int, qty int,
+        |  year int, month int, hour int, minute int, sec int, extra int)
+        |$defaultUsing
+        |PARTITIONED BY (year, month, hour, minute, sec, extra)""".stripMargin)
+      sql(s"""
+        |INSERT INTO $table
+        |PARTITION(year = 2016, month = 3, hour = 10, minute = 10, sec = 10, extra = 1) SELECT 3, 3
+      """.stripMargin)
+      sql(s"""
+        |INSERT INTO $table
+        |PARTITION(year = 2016, month = 4, hour = 10, minute = 10, sec = 10, extra = 1) SELECT 3, 3
+      """.stripMargin)
+      checkAnswer(
+        sql(s"show partitions $table"),
+        Row("year=2016/month=3/hour=10/minute=10/sec=10/extra=1") ::
+          Row("year=2016/month=4/hour=10/minute=10/sec=10/extra=1") :: Nil)
+    }
+  }
+
+  test("non-partitioning columns") {
+    val table = "dateTable"
+    withTable(table) {
+      createDateTable(table)
+      val errMsg = intercept[AnalysisException] {
+        sql(s"SHOW PARTITIONS $table PARTITION(abcd=2015, xyz=1)")
+      }.getMessage
+      assert(errMsg.contains("Non-partitioning column(s) [abcd, xyz] are specified"))
+    }
+  }
+
+  test("show partitions of non-partitioned table") {
+    val table = "not_partitioned_table"
+    withTable(table) {
+      sql(s"CREATE TABLE $table (col1 int) $defaultUsing")
+      val errMsg = intercept[AnalysisException] {
+        sql(s"SHOW PARTITIONS $table")
+      }.getMessage
+      assert(errMsg.contains("not allowed on a table that is not partitioned"))
+    }
+  }
+
+  test("show partitions of a view") {
+    val table = "dateTable"
+    withTable(table) {
+      createDateTable(table)
+      val view = "view1"
+      withView(view) {
+        sql(s"CREATE VIEW $view as select * from $table")
+        val errMsg = intercept[AnalysisException] {
+          sql(s"SHOW PARTITIONS $view")
+        }.getMessage
+        assert(errMsg.contains("is not allowed on a view"))
+      }
+    }
+  }
+
+  test("show partitions of a temporary view") {
+    val viewName = "test_view"
+    withTempView(viewName) {
+      spark.range(10).createTempView(viewName)
+      val errMsg = intercept[NoSuchTableException] {
+        sql(s"SHOW PARTITIONS $viewName")
+      }.getMessage
+      assert(errMsg.contains(s"Table or view '$viewName' not found"))
+    }
+  }
+}
+
+class ShowPartitionsSuite extends ShowPartitionsSuiteBase with SharedSparkSession {
+  // The test is placed here because it fails with `USING HIVE`:
+  // org.apache.spark.sql.AnalysisException:
+  //   Hive data source can only be used with tables, you can't use it with CREATE TEMP VIEW USING
+  test("issue exceptions on the temporary view") {
+    val viewName = "test_view"
+    withTempView(viewName) {
+      sql(s"""
+             |CREATE TEMPORARY VIEW $viewName (c1 INT, c2 STRING)
+             |$defaultUsing""".stripMargin)
+      val errMsg = intercept[NoSuchTableException] {
+        sql(s"SHOW PARTITIONS $viewName")
+      }.getMessage
+      assert(errMsg.contains(s"Table or view '$viewName' not found"))
+    }
+  }
+
+  test("show partitions from a datasource") {
+    import testImplicits._
+    withTable("part_datasrc") {
+      val df = (1 to 3).map(i => (i, s"val_$i", i * 2)).toDF("a", "b", "c")
+      df.write
+        .partitionBy("a")
+        .format("parquet")
+        .mode(SaveMode.Overwrite)
+        .saveAsTable("part_datasrc")
+
+      assert(sql("SHOW PARTITIONS part_datasrc").count() == 3)
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
new file mode 100644
index 0000000000000..8a63cd49e89e9
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.connector.InMemoryTableCatalog
+import org.apache.spark.sql.execution.command
+import org.apache.spark.sql.test.SharedSparkSession
+
+class ShowPartitionsSuite extends command.ShowPartitionsSuiteBase with SharedSparkSession {
+  override def version: String = "V2"
+  override def catalog: String = "test_catalog"
+  override def defaultNamespace: Seq[String] = Nil
+  override def defaultUsing: String = "USING _"
+
+  override def sparkConf: SparkConf = super.sparkConf
+    .set(s"spark.sql.catalog.$catalog", classOf[InMemoryTableCatalog].getName)
+
+  // TODO(SPARK-33452): Create a V2 SHOW PARTITIONS execution node
+  test("not supported SHOW PARTITIONS") {
+    def testV1Command(sqlCommand: String, sqlParams: String): Unit = {
+      val e = intercept[AnalysisException] {
+        sql(s"$sqlCommand $sqlParams")
+      }
+      assert(e.message.contains(s"$sqlCommand is only supported with v1 tables"))
+    }
+    val t = s"$catalog.ns1.ns2.tbl"
+    withTable(t) {
+      sql(
+        s"""
+           |CREATE TABLE $t (id bigint, data string)
+           |$defaultUsing
+           |PARTITIONED BY (id)
+         """.stripMargin)
+
+      testV1Command("SHOW PARTITIONS", t)
+      testV1Command("SHOW PARTITIONS", s"$t PARTITION(id='1')")
+    }
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
index a78fd506b752e..d3398842afb21 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
@@ -22,9 +22,8 @@ import java.io.File
 import com.google.common.io.Files
 import org.apache.hadoop.fs.{FileContext, FsConstants, Path}
 
-import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
 import org.apache.spark.sql.execution.command.LoadDataCommand
 import org.apache.spark.sql.hive.test.TestHiveSingleton
@@ -33,7 +32,6 @@ import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types.StructType
 
 class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
-  import testImplicits._
 
   protected override def beforeAll(): Unit = {
     super.beforeAll()
@@ -58,27 +56,11 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
         |STORED AS PARQUET
         |TBLPROPERTIES('prop1Key'="prop1Val", '`prop2Key`'="prop2Val")
       """.stripMargin)
-    sql("CREATE TABLE parquet_tab3(col1 int, `col 2` int) USING hive")
     sql("CREATE TABLE parquet_tab4 (price int, qty int) partitioned by (year int, month int)")
     sql("INSERT INTO parquet_tab4 PARTITION(year = 2015, month = 1) SELECT 1, 1")
     sql("INSERT INTO parquet_tab4 PARTITION(year = 2015, month = 2) SELECT 2, 2")
     sql("INSERT INTO parquet_tab4 PARTITION(year = 2016, month = 2) SELECT 3, 3")
     sql("INSERT INTO parquet_tab4 PARTITION(year = 2016, month = 3) SELECT 3, 3")
-    sql(
-      """
-        |CREATE TABLE parquet_tab5 (price int, qty int)
-        |PARTITIONED BY (year int, month int, hour int, minute int, sec int, extra int)
-      """.stripMargin)
-    sql(
-      """
-        |INSERT INTO parquet_tab5
-        |PARTITION(year = 2016, month = 3, hour = 10, minute = 10, sec = 10, extra = 1) SELECT 3, 3
-      """.stripMargin)
-    sql(
-      """
-        |INSERT INTO parquet_tab5
-        |PARTITION(year = 2016, month = 4, hour = 10, minute = 10, sec = 10, extra = 1) SELECT 3, 3
-      """.stripMargin)
     sql("CREATE VIEW parquet_view1 as select * from parquet_tab4")
   }
 
@@ -86,10 +68,8 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
     try {
       sql("DROP TABLE IF EXISTS parquet_tab1")
       sql("DROP TABLE IF EXISTS parquet_tab2")
-      sql("DROP TABLE IF EXISTS parquet_tab3")
       sql("DROP VIEW IF EXISTS parquet_view1")
       sql("DROP TABLE IF EXISTS parquet_tab4")
-      sql("DROP TABLE IF EXISTS parquet_tab5")
     } finally {
       super.afterAll()
     }
@@ -393,88 +373,6 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
     }
   }
 
-
-  test("show partitions - show everything") {
-    checkAnswer(
-      sql("show partitions parquet_tab4"),
-      Row("year=2015/month=1") ::
-        Row("year=2015/month=2") ::
-        Row("year=2016/month=2") ::
-        Row("year=2016/month=3") :: Nil)
-
-    checkAnswer(
-      sql("show partitions default.parquet_tab4"),
-      Row("year=2015/month=1") ::
-        Row("year=2015/month=2") ::
-        Row("year=2016/month=2") ::
-        Row("year=2016/month=3") :: Nil)
-  }
-
-  test("show partitions - show everything more than 5 part keys") {
-    checkAnswer(
-      sql("show partitions parquet_tab5"),
-      Row("year=2016/month=3/hour=10/minute=10/sec=10/extra=1") ::
-        Row("year=2016/month=4/hour=10/minute=10/sec=10/extra=1") :: Nil)
-  }
-
-  test("show partitions - filter") {
-    checkAnswer(
-      sql("show partitions default.parquet_tab4 PARTITION(year=2015)"),
-      Row("year=2015/month=1") ::
-        Row("year=2015/month=2") :: Nil)
-
-    checkAnswer(
-      sql("show partitions default.parquet_tab4 PARTITION(year=2015, month=1)"),
-      Row("year=2015/month=1") :: Nil)
-
-    checkAnswer(
-      sql("show partitions default.parquet_tab4 PARTITION(month=2)"),
-      Row("year=2015/month=2") ::
-        Row("year=2016/month=2") :: Nil)
-  }
-
-  test("show partitions - empty row") {
-    withTempView("parquet_temp") {
-      sql(
-        """
-         |CREATE TEMPORARY VIEW parquet_temp (c1 INT, c2 STRING)
-         |USING org.apache.spark.sql.parquet.DefaultSource
-        """.stripMargin)
-      // An empty sequence of row is returned for session temporary table.
-      intercept[NoSuchTableException] {
-        sql("SHOW PARTITIONS parquet_temp")
-      }
-
-      val message1 = intercept[AnalysisException] {
-        sql("SHOW PARTITIONS parquet_tab3")
-      }.getMessage
-      assert(message1.contains("not allowed on a table that is not partitioned"))
-
-      val message2 = intercept[AnalysisException] {
-        sql("SHOW PARTITIONS parquet_tab4 PARTITION(abcd=2015, xyz=1)")
-      }.getMessage
-      assert(message2.contains("Non-partitioning column(s) [abcd, xyz] are specified"))
-
-      val message3 = intercept[AnalysisException] {
-        sql("SHOW PARTITIONS parquet_view1")
-      }.getMessage
-      assert(message3.contains("is not allowed on a view"))
-    }
-  }
-
-  test("show partitions - datasource") {
-    withTable("part_datasrc") {
-      val df = (1 to 3).map(i => (i, s"val_$i", i * 2)).toDF("a", "b", "c")
-      df.write
-        .partitionBy("a")
-        .format("parquet")
-        .mode(SaveMode.Overwrite)
-        .saveAsTable("part_datasrc")
-
-      assert(sql("SHOW PARTITIONS part_datasrc").count() == 3)
-    }
-  }
-
   test("SPARK-25918: LOAD DATA LOCAL INPATH should handle a relative path") {
     val localFS = FileContext.getLocalFSFileContext()
     val workingDir = localFS.getWorkingDirectory
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala
new file mode 100644
index 0000000000000..a92478faf0e16
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution.command
+
+import org.apache.spark.sql.execution.command.v1
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+
+class ShowPartitionsSuite extends v1.ShowPartitionsSuiteBase with TestHiveSingleton {
+  override def version: String = "Hive V1"
+  override def defaultUsing: String = "USING HIVE"
+}

From b5eca18af050718ca23c0bb9c171f2352c171790 Mon Sep 17 00:00:00 2001
From: "xuewei.linxuewei" <xuewei.linxuewei@alibaba-inc.com>
Date: Mon, 16 Nov 2020 16:14:31 +0000
Subject: [PATCH 0489/1009] [SPARK-33460][SQL] Accessing map values should fail
 if key is not found

### What changes were proposed in this pull request?

Instead of returning NULL, throws runtime NoSuchElementException towards invalid key accessing in map-like functions, such as element_at, GetMapValue, when ANSI mode is on.

### Why are the changes needed?

For ANSI mode.

### Does this PR introduce any user-facing change?

No.

### How was this patch tested?

Added UT and Existing UT.

Closes #30386 from leanken/leanken-SPARK-33460.

Authored-by: xuewei.linxuewei <xuewei.linxuewei@alibaba-inc.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-ref-ansi-compliance.md               |  2 +
 .../expressions/ProjectionOverSchema.scala    |  4 +-
 .../catalyst/expressions/SelectedField.scala  |  2 +-
 .../expressions/collectionOperations.scala    | 13 +++--
 .../expressions/complexTypeExtractors.scala   | 50 +++++++++++++++----
 .../sql/catalyst/optimizer/ComplexTypes.scala |  2 +-
 .../CollectionExpressionsSuite.scala          | 15 ++++++
 .../expressions/ComplexTypeSuite.scala        | 17 +++++++
 .../resources/sql-tests/inputs/ansi/map.sql   |  1 +
 .../test/resources/sql-tests/inputs/map.sql   |  5 ++
 .../sql-tests/results/ansi/map.sql.out        | 20 ++++++++
 .../resources/sql-tests/results/map.sql.out   | 18 +++++++
 12 files changed, 131 insertions(+), 18 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/ansi/map.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/map.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out
 create mode 100644 sql/core/src/test/resources/sql-tests/results/map.sql.out

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index c2b36033e318e..41921265b7c70 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -112,12 +112,14 @@ SELECT * FROM t;
 The behavior of some SQL functions can be different under ANSI mode (`spark.sql.ansi.enabled=true`).
   - `size`: This function returns null for null input.
   - `element_at`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices. 
+  - `element_at`: This function throws `NoSuchElementException` if key does not exist in map. 
   - `elt`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices.
 
 ### SQL Operators
 
 The behavior of some SQL operators can be different under ANSI mode (`spark.sql.ansi.enabled=true`).
   - `array_col[index]`: This operator throws `ArrayIndexOutOfBoundsException` if using invalid indices.
+  - `map_col[key]`: This operator throws `NoSuchElementException` if key does not exist in map.
 
 ### SQL Keywords
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala
index 6f1d9d065ab1a..241c761624b76 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala
@@ -55,8 +55,8 @@ case class ProjectionOverSchema(schema: StructType) {
         getProjection(child).map { projection => MapKeys(projection) }
       case MapValues(child) =>
         getProjection(child).map { projection => MapValues(projection) }
-      case GetMapValue(child, key) =>
-        getProjection(child).map { projection => GetMapValue(projection, key) }
+      case GetMapValue(child, key, failOnError) =>
+        getProjection(child).map { projection => GetMapValue(projection, key, failOnError) }
       case GetStructFieldObject(child, field: StructField) =>
         getProjection(child).map(p => (p, p.dataType)).map {
           case (projection, projSchema: StructType) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala
index adcc4be10687e..f2acb75ea6ac4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala
@@ -91,7 +91,7 @@ object SelectedField {
         }
         val newField = StructField(field.name, newFieldDataType, field.nullable)
         selectField(child, Option(ArrayType(struct(newField), containsNull)))
-      case GetMapValue(child, _) =>
+      case GetMapValue(child, _, _) =>
         // GetMapValue does not select a field from a struct (i.e. prune the struct) so it can't be
         // the top-level extractor. However it can be part of an extractor chain.
         val MapType(keyType, _, valueContainsNull) = child.dataType
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index ee98ebf5a8a50..0765bfdd78fa6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -1911,7 +1911,9 @@ case class ArrayPosition(left: Expression, right: Expression)
       If `spark.sql.ansi.enabled` is set to true, it throws ArrayIndexOutOfBoundsException
       for invalid indices.
 
-    _FUNC_(map, key) - Returns value for given key, or NULL if the key is not contained in the map
+    _FUNC_(map, key) - Returns value for given key. The function returns NULL
+      if the key is not contained in the map and `spark.sql.ansi.enabled` is set to false.
+      If `spark.sql.ansi.enabled` is set to true, it throws NoSuchElementException instead.
   """,
   examples = """
     Examples:
@@ -1931,6 +1933,9 @@ case class ElementAt(
 
   @transient private lazy val mapKeyType = left.dataType.asInstanceOf[MapType].keyType
 
+  @transient private lazy val mapValueContainsNull =
+    left.dataType.asInstanceOf[MapType].valueContainsNull
+
   @transient private lazy val arrayContainsNull = left.dataType.asInstanceOf[ArrayType].containsNull
 
   @transient private lazy val ordering: Ordering[Any] = TypeUtils.getInterpretedOrdering(mapKeyType)
@@ -1989,7 +1994,7 @@ case class ElementAt(
   override def nullable: Boolean = left.dataType match {
     case _: ArrayType =>
       computeNullabilityFromArray(left, right, failOnError, nullability)
-    case _: MapType => true
+    case _: MapType => if (failOnError) mapValueContainsNull else true
   }
 
   override def nullSafeEval(value: Any, ordinal: Any): Any = doElementAt(value, ordinal)
@@ -2022,7 +2027,7 @@ case class ElementAt(
         }
       }
     case _: MapType =>
-      (value, ordinal) => getValueEval(value, ordinal, mapKeyType, ordering)
+      (value, ordinal) => getValueEval(value, ordinal, mapKeyType, ordering, failOnError)
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
@@ -2069,7 +2074,7 @@ case class ElementAt(
            """.stripMargin
         })
       case _: MapType =>
-        doGetValueGenCode(ctx, ev, left.dataType.asInstanceOf[MapType])
+        doGetValueGenCode(ctx, ev, left.dataType.asInstanceOf[MapType], failOnError)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
index 363d388692c9f..767650d022200 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
@@ -336,7 +336,12 @@ trait GetArrayItemUtil {
 trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes {
 
   // todo: current search is O(n), improve it.
-  def getValueEval(value: Any, ordinal: Any, keyType: DataType, ordering: Ordering[Any]): Any = {
+  def getValueEval(
+      value: Any,
+      ordinal: Any,
+      keyType: DataType,
+      ordering: Ordering[Any],
+      failOnError: Boolean): Any = {
     val map = value.asInstanceOf[MapData]
     val length = map.numElements()
     val keys = map.keyArray()
@@ -352,14 +357,24 @@ trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes {
       }
     }
 
-    if (!found || values.isNullAt(i)) {
+    if (!found) {
+      if (failOnError) {
+        throw new NoSuchElementException(s"Key $ordinal does not exist.")
+      } else {
+        null
+      }
+    } else if (values.isNullAt(i)) {
       null
     } else {
       values.get(i, dataType)
     }
   }
 
-  def doGetValueGenCode(ctx: CodegenContext, ev: ExprCode, mapType: MapType): ExprCode = {
+  def doGetValueGenCode(
+      ctx: CodegenContext,
+      ev: ExprCode,
+      mapType: MapType,
+      failOnError: Boolean): ExprCode = {
     val index = ctx.freshName("index")
     val length = ctx.freshName("length")
     val keys = ctx.freshName("keys")
@@ -368,12 +383,22 @@ trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes {
     val values = ctx.freshName("values")
     val keyType = mapType.keyType
     val nullCheck = if (mapType.valueContainsNull) {
-      s" || $values.isNullAt($index)"
+      s"""else if ($values.isNullAt($index)) {
+            ${ev.isNull} = true;
+          }
+       """
     } else {
       ""
     }
+
     val keyJavaType = CodeGenerator.javaType(keyType)
     nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
+      val keyNotFoundBranch = if (failOnError) {
+        s"""throw new NoSuchElementException("Key " + $eval2 + " does not exist.");"""
+      } else {
+        s"${ev.isNull} = true;"
+      }
+
       s"""
         final int $length = $eval1.numElements();
         final ArrayData $keys = $eval1.keyArray();
@@ -390,9 +415,9 @@ trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes {
           }
         }
 
-        if (!$found$nullCheck) {
-          ${ev.isNull} = true;
-        } else {
+        if (!$found) {
+          $keyNotFoundBranch
+        } $nullCheck else {
           ${ev.value} = ${CodeGenerator.getValue(values, dataType, index)};
         }
       """
@@ -405,9 +430,14 @@ trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes {
  *
  * We need to do type checking here as `key` expression maybe unresolved.
  */
-case class GetMapValue(child: Expression, key: Expression)
+case class GetMapValue(
+    child: Expression,
+    key: Expression,
+    failOnError: Boolean = SQLConf.get.ansiEnabled)
   extends GetMapValueUtil with ExtractValue with NullIntolerant {
 
+  def this(child: Expression, key: Expression) = this(child, key, SQLConf.get.ansiEnabled)
+
   @transient private lazy val ordering: Ordering[Any] =
     TypeUtils.getInterpretedOrdering(keyType)
 
@@ -442,10 +472,10 @@ case class GetMapValue(child: Expression, key: Expression)
 
   // todo: current search is O(n), improve it.
   override def nullSafeEval(value: Any, ordinal: Any): Any = {
-    getValueEval(value, ordinal, keyType, ordering)
+    getValueEval(value, ordinal, keyType, ordering, failOnError)
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    doGetValueGenCode(ctx, ev, child.dataType.asInstanceOf[MapType])
+    doGetValueGenCode(ctx, ev, child.dataType.asInstanceOf[MapType], failOnError)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
index 7a21ce254a235..3dd79d153c236 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
@@ -71,7 +71,7 @@ object SimplifyExtractValueOps extends Rule[LogicalPlan] {
           // out of bounds, mimic the runtime behavior and return null
           Literal(null, ga.dataType)
         }
-      case GetMapValue(CreateMap(elems, _), key) => CaseKeyWhen(key, elems)
+      case GetMapValue(CreateMap(elems, _), key, _) => CaseKeyWhen(key, elems)
     }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
index 6ee88c9eaef86..095894b9fffac 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
@@ -1915,4 +1915,19 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
       }
     }
   }
+
+  test("SPARK-33460: element_at NoSuchElementException") {
+    Seq(true, false).foreach { ansiEnabled =>
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiEnabled.toString) {
+        val map = Literal.create(Map(1 -> "a", 2 -> "b"), MapType(IntegerType, StringType))
+        val expr: Expression = ElementAt(map, Literal(5))
+        if (ansiEnabled) {
+          val errMsg = "Key 5 does not exist."
+          checkExceptionInExpression[Exception](expr, errMsg)
+        } else {
+          checkEvaluation(expr, null)
+        }
+      }
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
index 67ab2071de037..3d6f6937e780b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
@@ -85,6 +85,23 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
+  test("SPARK-33460: GetMapValue NoSuchElementException") {
+    Seq(true, false).foreach { ansiEnabled =>
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiEnabled.toString) {
+        val map = Literal.create(Map(1 -> "a", 2 -> "b"), MapType(IntegerType, StringType))
+
+        if (ansiEnabled) {
+          checkExceptionInExpression[Exception](
+            GetMapValue(map, Literal(5)),
+            "Key 5 does not exist."
+          )
+        } else {
+          checkEvaluation(GetMapValue(map, Literal(5)), null)
+        }
+      }
+    }
+  }
+
   test("SPARK-26637 handles GetArrayItem nullability correctly when input array size is constant") {
     // CreateArray case
     val a = AttributeReference("a", IntegerType, nullable = false)()
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/map.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/map.sql
new file mode 100644
index 0000000000000..23e5b9562973b
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/map.sql
@@ -0,0 +1 @@
+--IMPORT map.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/map.sql b/sql/core/src/test/resources/sql-tests/inputs/map.sql
new file mode 100644
index 0000000000000..e2d855fba154e
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/map.sql
@@ -0,0 +1,5 @@
+-- test cases for map functions
+
+-- key does not exist
+select element_at(map(1, 'a', 2, 'b'), 5);
+select map(1, 'a', 2, 'b')[5];
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out
new file mode 100644
index 0000000000000..12c599812cdee
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out
@@ -0,0 +1,20 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 2
+
+
+-- !query
+select element_at(map(1, 'a', 2, 'b'), 5)
+-- !query schema
+struct<>
+-- !query output
+java.util.NoSuchElementException
+Key 5 does not exist.
+
+
+-- !query
+select map(1, 'a', 2, 'b')[5]
+-- !query schema
+struct<>
+-- !query output
+java.util.NoSuchElementException
+Key 5 does not exist.
diff --git a/sql/core/src/test/resources/sql-tests/results/map.sql.out b/sql/core/src/test/resources/sql-tests/results/map.sql.out
new file mode 100644
index 0000000000000..7a0c0d776ca2b
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/map.sql.out
@@ -0,0 +1,18 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 2
+
+
+-- !query
+select element_at(map(1, 'a', 2, 'b'), 5)
+-- !query schema
+struct<element_at(map(1, a, 2, b), 5):string>
+-- !query output
+NULL
+
+
+-- !query
+select map(1, 'a', 2, 'b')[5]
+-- !query schema
+struct<map(1, a, 2, b)[5]:string>
+-- !query output
+NULL

From 9ab0f82a5983d67aa41efa86bbe4080e5fd57335 Mon Sep 17 00:00:00 2001
From: Pascal Gillet <pascal.gillet@stack-labs.com>
Date: Mon, 16 Nov 2020 16:54:08 -0800
Subject: [PATCH 0490/1009] [SPARK-23499][MESOS] Support for priority queues in
 Mesos scheduler

### What changes were proposed in this pull request?

I push this PR as I could not re-open the stale one https://github.com/apache/spark/pull/20665 .

As for Yarn or Kubernetes, Mesos users should be able to specify priority queues to define a workload management policy for queued drivers in the Mesos Cluster Dispatcher.

This would ensure scheduling order while enqueuing Spark applications for a Mesos cluster.

### Why are the changes needed?

Currently, submitted drivers are kept in order of their submission: the first driver added to the queue will be the first one to be executed (FIFO), regardless of their priority.

See https://issues.apache.org/jira/projects/SPARK/issues/SPARK-23499 for more details.

### Does this PR introduce _any_ user-facing change?

The MesosClusterDispatcher UI shows now Spark jobs along with the queue to which they are submitted.

### How was this patch tested?

Unit tests.
Also, this feature has been in production for 3 years now as we use a modified Spark 2.4.0 since then.

Closes #30352 from pgillet/mesos-scheduler-priority-queue.

Lead-authored-by: Pascal Gillet <pascal.gillet@stack-labs.com>
Co-authored-by: pgillet <pascalgillet@ymail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 docs/running-on-mesos.md                      |  32 +++++
 .../apache/spark/deploy/mesos/config.scala    |  10 ++
 .../spark/deploy/mesos/ui/DriverPage.scala    |   9 +-
 .../deploy/mesos/ui/MesosClusterPage.scala    |  12 +-
 .../cluster/mesos/MesosClusterScheduler.scala |  39 +++++-
 .../mesos/MesosClusterSchedulerSuite.scala    | 130 ++++++++++++++++++
 6 files changed, 222 insertions(+), 10 deletions(-)

diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md
index 578ab90fedfca..80591bd08650a 100644
--- a/docs/running-on-mesos.md
+++ b/docs/running-on-mesos.md
@@ -734,6 +734,38 @@ See the [configuration page](configuration.html) for information on Spark config
   </td>
   <td>2.1.0</td>
 </tr>
+<tr>
+  <td><code>spark.mesos.dispatcher.queue</code></td>
+  <td><code>(none)</code></td>
+  <td>
+    Set the name of the dispatcher queue to which the application is submitted.
+    The specified queue must be added to the dispatcher with <code>spark.mesos.dispatcher.queue.[QueueName]</code>.
+    If no queue is specified, then the application is submitted to the "default" queue with 0.0 priority.
+  </td>
+  <td>3.1.0</td>
+</tr>
+<tr>
+  <td><code>spark.mesos.dispatcher.queue.[QueueName]</code></td>
+  <td><code>0.0</code></td>
+  <td>
+    Add a new queue for submitted drivers with the specified priority.
+    Higher numbers indicate higher priority.
+    The user can specify multiple queues to define a workload management policy for queued drivers in the dispatcher.
+    A driver can then be submitted to a specific queue with <code>spark.mesos.dispatcher.queue</code>.
+    By default, the dispatcher has a single queue with 0.0 priority (cannot be overridden).
+    It is possible to implement a consistent and overall workload management policy throughout the lifecycle of drivers
+    by mapping priority queues to weighted Mesos roles, and by specifying a
+    <code>spark.mesos.role</code> along with a <code>spark.mesos.dispatcher.queue</code> when submitting an application.
+    For example, with the URGENT Mesos role:
+    <pre>
+    spark.mesos.dispatcher.queue.URGENT=1.0
+
+    spark.mesos.dispatcher.queue=URGENT
+    spark.mesos.role=URGENT
+    </pre>
+  </td>
+  <td>3.1.0</td>
+</tr>
 <tr>
   <td><code>spark.mesos.gpus.max</code></td>
   <td><code>0</code></td>
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala
index e1c0d18b73a2b..bd42f6f05655f 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala
@@ -113,6 +113,16 @@ package object config {
       .stringConf
       .createOptional
 
+  private[spark] val DISPATCHER_QUEUE =
+    ConfigBuilder("spark.mesos.dispatcher.queue")
+      .doc("Set the name of the dispatcher queue to which the application is submitted. " +
+        "The specified queue must be added to the dispatcher " +
+        "with \"spark.mesos.dispatcher.queue.[QueueName]\". If no queue is specified, then " +
+        "the application is submitted to the \"default\" queue with 0.0 priority.")
+      .version("3.1.0")
+      .stringConf
+      .createWithDefaultString("default")
+
   private[spark] val DRIVER_LABELS =
     ConfigBuilder("spark.mesos.driver.labels")
       .doc("Mesos labels to add to the driver.  Labels are free-form key-value pairs. Key-value " +
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala
index b8c64a28c72cd..97ef153177674 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala
@@ -22,7 +22,7 @@ import javax.servlet.http.HttpServletRequest
 import scala.xml.Node
 
 import org.apache.spark.deploy.Command
-import org.apache.spark.deploy.mesos.MesosDriverDescription
+import org.apache.spark.deploy.mesos.{config, MesosDriverDescription}
 import org.apache.spark.scheduler.cluster.mesos.{MesosClusterRetryState, MesosClusterSubmissionState}
 import org.apache.spark.ui.{UIUtils, WebUIPage}
 
@@ -153,6 +153,13 @@ private[ui] class DriverPage(parent: MesosClusterUI) extends WebUIPage("driver")
     <tr>
       <td>Memory</td><td>{driver.mem}</td>
     </tr>
+    <tr>
+      <td>Queue</td>
+      <td>
+        {driver.conf.get(
+        "spark.mesos.dispatcher.queue", config.DISPATCHER_QUEUE.defaultValueString)}
+      </td>
+    </tr>
     <tr>
       <td>Submitted</td><td>{UIUtils.formatDate(driver.submissionDate)}</td>
     </tr>
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterPage.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterPage.scala
index 772906397546c..5c62ddb37684d 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterPage.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterPage.scala
@@ -23,7 +23,7 @@ import scala.xml.Node
 
 import org.apache.mesos.Protos.TaskStatus
 
-import org.apache.spark.deploy.mesos.MesosDriverDescription
+import org.apache.spark.deploy.mesos.{config, MesosDriverDescription}
 import org.apache.spark.deploy.mesos.config._
 import org.apache.spark.scheduler.cluster.mesos.MesosClusterSubmissionState
 import org.apache.spark.ui.{UIUtils, WebUIPage}
@@ -36,7 +36,7 @@ private[mesos] class MesosClusterPage(parent: MesosClusterUI) extends WebUIPage(
 
     val driverHeader = Seq("Driver ID")
     val historyHeader = historyServerURL.map(url => Seq("History")).getOrElse(Nil)
-    val submissionHeader = Seq("Submit Date", "Main Class", "Driver Resources")
+    val submissionHeader = Seq("Queue", "Submit Date", "Main Class", "Driver Resources")
     val sandboxHeader = Seq("Sandbox")
 
     val queuedHeaders = driverHeader ++ submissionHeader
@@ -69,6 +69,10 @@ private[mesos] class MesosClusterPage(parent: MesosClusterUI) extends WebUIPage(
     val id = submission.submissionId
     <tr>
       <td><a href={s"driver?id=$id"}>{id}</a></td>
+      <td>
+        {submission.conf.get(
+        "spark.mesos.dispatcher.queue", config.DISPATCHER_QUEUE.defaultValueString)}
+      </td>
       <td>{UIUtils.formatDate(submission.submissionDate)}</td>
       <td>{submission.command.mainClass}</td>
       <td>cpus: {submission.cores}, mem: {submission.mem}</td>
@@ -99,6 +103,10 @@ private[mesos] class MesosClusterPage(parent: MesosClusterUI) extends WebUIPage(
     <tr>
       <td><a href={s"driver?id=$id"}>{id}</a></td>
       {historyCol}
+      <td>
+        {state.driverDescription.conf.get(
+        "spark.mesos.dispatcher.queue", config.DISPATCHER_QUEUE.defaultValueString)}
+      </td>
       <td>{UIUtils.formatDate(state.driverDescription.submissionDate)}</td>
       <td>{state.driverDescription.command.mainClass}</td>
       <td>cpus: {state.driverDescription.cores}, mem: {state.driverDescription.mem}</td>
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
index 39168a5e3c7a5..b18737cf6126d 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
@@ -21,6 +21,7 @@ import java.io.File
 import java.util.{Collections, Date, List => JList}
 
 import scala.collection.JavaConverters._
+import scala.collection.immutable
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
@@ -131,6 +132,8 @@ private[spark] class MesosClusterScheduler(
   private val queuedCapacity = conf.get(config.MAX_DRIVERS)
   private val retainedDrivers = conf.get(config.RETAINED_DRIVERS)
   private val maxRetryWaitTime = conf.get(config.CLUSTER_RETRY_WAIT_MAX_SECONDS)
+  private val queues: immutable.Map[String, Float] =
+    conf.getAllWithPrefix("spark.mesos.dispatcher.queue.").map(t => (t._1, t._2.toFloat)).toMap
   private val schedulerState = engineFactory.createEngine("scheduler")
   private val stateLock = new Object()
   // Keyed by submission id
@@ -144,7 +147,19 @@ private[spark] class MesosClusterScheduler(
   // state of the tasks from Mesos. Keyed by task Id.
   private val pendingRecover = new mutable.HashMap[String, AgentID]()
   // Stores all the submitted drivers that hasn't been launched, keyed by submission id
-  private val queuedDrivers = new ArrayBuffer[MesosDriverDescription]()
+  // and sorted by priority, then by submission date
+  private val driverOrdering = new Ordering[MesosDriverDescription] {
+    override def compare(x: MesosDriverDescription, y: MesosDriverDescription): Int = {
+      val xp = getDriverPriority(x)
+      val yp = getDriverPriority(y)
+      if (xp != yp) {
+        xp compare yp
+      } else {
+        y.submissionDate.compareTo(x.submissionDate)
+      }
+    }
+  }
+  private val queuedDrivers = new mutable.TreeSet[MesosDriverDescription]()(driverOrdering.reverse)
   // All supervised drivers that are waiting to retry after termination, keyed by submission id
   private val pendingRetryDrivers = new ArrayBuffer[MesosDriverDescription]()
   private val queuedDriversState = engineFactory.createEngine("driverQueue")
@@ -374,6 +389,16 @@ private[spark] class MesosClusterScheduler(
     s"${frameworkId}-${desc.submissionId}${retries}"
   }
 
+  private[mesos] def getDriverPriority(desc: MesosDriverDescription): Float = {
+    val defaultQueueName = config.DISPATCHER_QUEUE.defaultValueString
+    val queueName = desc.conf.get("spark.mesos.dispatcher.queue", defaultQueueName)
+    if (queueName != defaultQueueName) {
+      queues.getOrElse(queueName, throw new NoSuchElementException(queueName))
+    } else {
+      0.0f
+    }
+  }
+
   private def getDriverTaskId(desc: MesosDriverDescription): String = {
     val sId = desc.submissionId
     desc.retryState.map(state => sId + s"${RETRY_SEP}${state.retries.toString}").getOrElse(sId)
@@ -710,7 +735,7 @@ private[spark] class MesosClusterScheduler(
   }
 
   private def copyBuffer(
-      buffer: ArrayBuffer[MesosDriverDescription]): ArrayBuffer[MesosDriverDescription] = {
+      buffer: TraversableOnce[MesosDriverDescription]): ArrayBuffer[MesosDriverDescription] = {
     val newBuffer = new ArrayBuffer[MesosDriverDescription](buffer.size)
     buffer.copyToBuffer(newBuffer)
     newBuffer
@@ -827,13 +852,13 @@ private[spark] class MesosClusterScheduler(
       status: Int): Unit = {}
 
   private def removeFromQueuedDrivers(subId: String): Boolean = {
-    val index = queuedDrivers.indexWhere(_.submissionId == subId)
-    if (index != -1) {
-      queuedDrivers.remove(index)
+    val matchOption = queuedDrivers.find(_.submissionId == subId)
+    if (matchOption.isEmpty) {
+      false
+    } else {
+      queuedDrivers.remove(matchOption.get)
       queuedDriversState.expunge(subId)
       true
-    } else {
-      false
     }
   }
 
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
index 5ff7f99aadb2f..146a135afd795 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
@@ -603,6 +603,136 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
     assert(scheduler.getDriverCommandValue(driverDesc) == expectedCmd)
   }
 
+  test("SPARK-23499: Test dispatcher priority queue with non float value") {
+    val conf = new SparkConf()
+    conf.set("spark.mesos.dispatcher.queue.ROUTINE", "1.0")
+    conf.set("spark.mesos.dispatcher.queue.URGENT", "abc")
+    conf.set("spark.mesos.dispatcher.queue.EXCEPTIONAL", "3.0")
+    assertThrows[NumberFormatException] {
+      setScheduler(conf.getAll.toMap)
+    }
+  }
+
+  test("SPARK-23499: Get driver priority") {
+    val conf = new SparkConf()
+    conf.set("spark.mesos.dispatcher.queue.ROUTINE", "1.0")
+    conf.set("spark.mesos.dispatcher.queue.URGENT", "2.0")
+    conf.set("spark.mesos.dispatcher.queue.EXCEPTIONAL", "3.0")
+    setScheduler(conf.getAll.toMap)
+
+    val mem = 1000
+    val cpu = 1
+
+    // Test queue not declared in scheduler
+    var desc = new MesosDriverDescription("d1", "jar", mem, cpu, true,
+      command,
+      Map("spark.mesos.dispatcher.queue" -> "dummy"),
+      "s1",
+      new Date())
+
+    assertThrows[NoSuchElementException] {
+      scheduler.getDriverPriority(desc)
+    }
+
+    // Test with no specified queue
+    desc = new MesosDriverDescription("d1", "jar", mem, cpu, true,
+      command,
+      Map[String, String](),
+      "s2",
+      new Date())
+
+    assert(scheduler.getDriverPriority(desc) == 0.0f)
+
+    // Test with "default" queue specified
+    desc = new MesosDriverDescription("d1", "jar", mem, cpu, true,
+      command,
+      Map("spark.mesos.dispatcher.queue" -> "default"),
+      "s3",
+      new Date())
+
+    assert(scheduler.getDriverPriority(desc) == 0.0f)
+
+    // Test queue declared in scheduler
+    desc = new MesosDriverDescription("d1", "jar", mem, cpu, true,
+      command,
+      Map("spark.mesos.dispatcher.queue" -> "ROUTINE"),
+      "s4",
+      new Date())
+
+    assert(scheduler.getDriverPriority(desc) == 1.0f)
+
+    // Test other queue declared in scheduler
+    desc = new MesosDriverDescription("d1", "jar", mem, cpu, true,
+      command,
+      Map("spark.mesos.dispatcher.queue" -> "URGENT"),
+      "s5",
+      new Date())
+
+    assert(scheduler.getDriverPriority(desc) == 2.0f)
+  }
+
+  test("SPARK-23499: Can queue drivers with priority") {
+    val conf = new SparkConf()
+    conf.set("spark.mesos.dispatcher.queue.ROUTINE", "1.0")
+    conf.set("spark.mesos.dispatcher.queue.URGENT", "2.0")
+    conf.set("spark.mesos.dispatcher.queue.EXCEPTIONAL", "3.0")
+    setScheduler(conf.getAll.toMap)
+
+    val mem = 1000
+    val cpu = 1
+
+    val response0 = scheduler.submitDriver(
+      new MesosDriverDescription("d1", "jar", 100, 1, true, command,
+        Map("spark.mesos.dispatcher.queue" -> "ROUTINE"), "s0", new Date()))
+    assert(response0.success)
+
+    val response1 = scheduler.submitDriver(
+      new MesosDriverDescription("d1", "jar", 100, 1, true, command,
+        Map[String, String](), "s1", new Date()))
+    assert(response1.success)
+
+    val response2 = scheduler.submitDriver(
+      new MesosDriverDescription("d1", "jar", 100, 1, true, command,
+        Map("spark.mesos.dispatcher.queue" -> "EXCEPTIONAL"), "s2", new Date()))
+    assert(response2.success)
+
+    val response3 = scheduler.submitDriver(
+      new MesosDriverDescription("d1", "jar", 100, 1, true, command,
+        Map("spark.mesos.dispatcher.queue" -> "URGENT"), "s3", new Date()))
+    assert(response3.success)
+
+    val state = scheduler.getSchedulerState()
+    val queuedDrivers = state.queuedDrivers.toList
+    assert(queuedDrivers(0).submissionId == response2.submissionId)
+    assert(queuedDrivers(1).submissionId == response3.submissionId)
+    assert(queuedDrivers(2).submissionId == response0.submissionId)
+    assert(queuedDrivers(3).submissionId == response1.submissionId)
+  }
+
+  test("SPARK-23499: Can queue drivers with negative priority") {
+    val conf = new SparkConf()
+    conf.set("spark.mesos.dispatcher.queue.LOWER", "-1.0")
+    setScheduler(conf.getAll.toMap)
+
+    val mem = 1000
+    val cpu = 1
+
+    val response0 = scheduler.submitDriver(
+      new MesosDriverDescription("d1", "jar", 100, 1, true, command,
+        Map("spark.mesos.dispatcher.queue" -> "LOWER"), "s0", new Date()))
+    assert(response0.success)
+
+    val response1 = scheduler.submitDriver(
+      new MesosDriverDescription("d1", "jar", 100, 1, true, command,
+        Map[String, String](), "s1", new Date()))
+    assert(response1.success)
+
+    val state = scheduler.getSchedulerState()
+    val queuedDrivers = state.queuedDrivers.toList
+    assert(queuedDrivers(0).submissionId == response1.submissionId)
+    assert(queuedDrivers(1).submissionId == response0.submissionId)
+  }
+
   private def launchDriverTask(addlSparkConfVars: Map[String, String]): List[TaskInfo] = {
     setScheduler()
     val mem = 1000

From f5e330284031ab8e66aa4834b947b9ca71cf28bb Mon Sep 17 00:00:00 2001
From: Prakhar Jain <prakharjain09@gmail.com>
Date: Tue, 17 Nov 2020 10:35:43 +0900
Subject: [PATCH 0491/1009] [SPARK-33399][SQL] Normalize output partitioning
 and sortorder with respect to aliases to avoid unneeded exchange/sort nodes

### What changes were proposed in this pull request?
This pull request tries to remove unneeded exchanges/sorts by normalizing the output partitioning and sortorder information correctly with respect to aliases.

Example: consider this join of three tables:

     |SELECT t2id, t3.id as t3id
     |FROM (
     |    SELECT t1.id as t1id, t2.id as t2id
     |    FROM t1, t2
     |    WHERE t1.id = t2.id
     |) t12, t3
     |WHERE t1id = t3.id

The plan for this looks like:

      *(9) Project [t2id#1034L, id#1004L AS t3id#1035L]
      +- *(9) SortMergeJoin [t1id#1033L], [id#1004L], Inner
         :- *(6) Sort [t1id#1033L ASC NULLS FIRST], false, 0
         :  +- Exchange hashpartitioning(t1id#1033L, 5), true, [id=#1343]   <------------------------------
         :     +- *(5) Project [id#996L AS t1id#1033L, id#1000L AS t2id#1034L]
         :        +- *(5) SortMergeJoin [id#996L], [id#1000L], Inner
         :           :- *(2) Sort [id#996L ASC NULLS FIRST], false, 0
         :           :  +- Exchange hashpartitioning(id#996L, 5), true, [id=#1329]
         :           :     +- *(1) Range (0, 10, step=1, splits=2)
         :           +- *(4) Sort [id#1000L ASC NULLS FIRST], false, 0
         :              +- Exchange hashpartitioning(id#1000L, 5), true, [id=#1335]
         :                 +- *(3) Range (0, 20, step=1, splits=2)
         +- *(8) Sort [id#1004L ASC NULLS FIRST], false, 0
            +- Exchange hashpartitioning(id#1004L, 5), true, [id=#1349]
               +- *(7) Range (0, 30, step=1, splits=2)

In this plan, the marked exchange could have been avoided as the data is already partitioned on "t1.id". This happens because AliasAwareOutputPartitioning class handles aliases only related to HashPartitioning. This change normalizes all output partitioning based on aliasing happening in Project.

### Why are the changes needed?
To remove unneeded exchanges.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
New UT added.

On TPCDS 1000 scale, this change improves the performance of query 95 from 330 seconds to 170 seconds by removing the extra Exchange.

Closes #30300 from prakharjain09/SPARK-33399-outputpartitioning.

Authored-by: Prakhar Jain <prakharjain09@gmail.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../AliasAwareOutputExpression.scala          |   32 +-
 .../approved-plans-v1_4/q2.sf100/explain.txt  |  169 ++-
 .../q2.sf100/simplified.txt                   |   97 +-
 .../q23a.sf100/explain.txt                    |  782 ++++++------
 .../q23a.sf100/simplified.txt                 |  155 +--
 .../q23b.sf100/explain.txt                    | 1132 +++++++++--------
 .../q23b.sf100/simplified.txt                 |  241 ++--
 .../approved-plans-v1_4/q95.sf100/explain.txt |  350 +++--
 .../q95.sf100/simplified.txt                  |   82 +-
 .../spark/sql/execution/PlannerSuite.scala    |  164 +++
 10 files changed, 1718 insertions(+), 1486 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala
index fa41e865444da..3cbe1654ea2cd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala
@@ -16,8 +16,8 @@
  */
 package org.apache.spark.sql.execution
 
-import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, Expression, NamedExpression, SortOrder}
-import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning}
+import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeMap, AttributeReference, Expression, NamedExpression, SortOrder}
+import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 
 /**
  * A trait that provides functionality to handle aliases in the `outputExpressions`.
@@ -25,19 +25,15 @@ import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partition
 trait AliasAwareOutputExpression extends UnaryExecNode {
   protected def outputExpressions: Seq[NamedExpression]
 
-  protected def hasAlias: Boolean = outputExpressions.collectFirst { case _: Alias => }.isDefined
+  private lazy val aliasMap = AttributeMap(outputExpressions.collect {
+    case a @ Alias(child: AttributeReference, _) => (child, a.toAttribute)
+  })
 
-  protected def replaceAliases(exprs: Seq[Expression]): Seq[Expression] = {
-    exprs.map {
-      case a: AttributeReference => replaceAlias(a).getOrElse(a)
-      case other => other
-    }
-  }
+  protected def hasAlias: Boolean = aliasMap.nonEmpty
 
-  protected def replaceAlias(attr: AttributeReference): Option[Attribute] = {
-    outputExpressions.collectFirst {
-      case a @ Alias(child: AttributeReference, _) if child.semanticEquals(attr) =>
-        a.toAttribute
+  protected def normalizeExpression(exp: Expression): Expression = {
+    exp.transform {
+      case attr: AttributeReference => aliasMap.getOrElse(attr, attr)
     }
   }
 }
@@ -50,7 +46,8 @@ trait AliasAwareOutputPartitioning extends AliasAwareOutputExpression {
   final override def outputPartitioning: Partitioning = {
     if (hasAlias) {
       child.outputPartitioning match {
-        case h: HashPartitioning => h.copy(expressions = replaceAliases(h.expressions))
+        case e: Expression =>
+          normalizeExpression(e).asInstanceOf[Partitioning]
         case other => other
       }
     } else {
@@ -68,12 +65,7 @@ trait AliasAwareOutputOrdering extends AliasAwareOutputExpression {
 
   final override def outputOrdering: Seq[SortOrder] = {
     if (hasAlias) {
-      orderingExpressions.map { s =>
-        s.child match {
-          case a: AttributeReference => s.copy(child = replaceAlias(a).getOrElse(a))
-          case _ => s
-        }
-      }
+      orderingExpressions.map(normalizeExpression(_).asInstanceOf[SortOrder])
     } else {
       orderingExpressions
     }
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt
index fe5966bb4dfb3..61e5ae0121819 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt
@@ -1,46 +1,45 @@
 == Physical Plan ==
-* Sort (42)
-+- Exchange (41)
-   +- * Project (40)
-      +- * SortMergeJoin Inner (39)
-         :- * Sort (27)
-         :  +- Exchange (26)
-         :     +- * Project (25)
-         :        +- * BroadcastHashJoin Inner BuildRight (24)
-         :           :- * HashAggregate (18)
-         :           :  +- Exchange (17)
-         :           :     +- * HashAggregate (16)
-         :           :        +- * Project (15)
-         :           :           +- * BroadcastHashJoin Inner BuildRight (14)
-         :           :              :- Union (9)
-         :           :              :  :- * Project (4)
-         :           :              :  :  +- * Filter (3)
-         :           :              :  :     +- * ColumnarToRow (2)
-         :           :              :  :        +- Scan parquet default.web_sales (1)
-         :           :              :  +- * Project (8)
-         :           :              :     +- * Filter (7)
-         :           :              :        +- * ColumnarToRow (6)
-         :           :              :           +- Scan parquet default.catalog_sales (5)
-         :           :              +- BroadcastExchange (13)
-         :           :                 +- * Filter (12)
-         :           :                    +- * ColumnarToRow (11)
-         :           :                       +- Scan parquet default.date_dim (10)
-         :           +- BroadcastExchange (23)
-         :              +- * Project (22)
-         :                 +- * Filter (21)
-         :                    +- * ColumnarToRow (20)
-         :                       +- Scan parquet default.date_dim (19)
-         +- * Sort (38)
-            +- Exchange (37)
-               +- * Project (36)
-                  +- * BroadcastHashJoin Inner BuildRight (35)
-                     :- * HashAggregate (29)
-                     :  +- ReusedExchange (28)
-                     +- BroadcastExchange (34)
-                        +- * Project (33)
-                           +- * Filter (32)
-                              +- * ColumnarToRow (31)
-                                 +- Scan parquet default.date_dim (30)
+* Sort (41)
++- Exchange (40)
+   +- * Project (39)
+      +- * SortMergeJoin Inner (38)
+         :- * Sort (26)
+         :  +- * Project (25)
+         :     +- * BroadcastHashJoin Inner BuildRight (24)
+         :        :- * HashAggregate (18)
+         :        :  +- Exchange (17)
+         :        :     +- * HashAggregate (16)
+         :        :        +- * Project (15)
+         :        :           +- * BroadcastHashJoin Inner BuildRight (14)
+         :        :              :- Union (9)
+         :        :              :  :- * Project (4)
+         :        :              :  :  +- * Filter (3)
+         :        :              :  :     +- * ColumnarToRow (2)
+         :        :              :  :        +- Scan parquet default.web_sales (1)
+         :        :              :  +- * Project (8)
+         :        :              :     +- * Filter (7)
+         :        :              :        +- * ColumnarToRow (6)
+         :        :              :           +- Scan parquet default.catalog_sales (5)
+         :        :              +- BroadcastExchange (13)
+         :        :                 +- * Filter (12)
+         :        :                    +- * ColumnarToRow (11)
+         :        :                       +- Scan parquet default.date_dim (10)
+         :        +- BroadcastExchange (23)
+         :           +- * Project (22)
+         :              +- * Filter (21)
+         :                 +- * ColumnarToRow (20)
+         :                    +- Scan parquet default.date_dim (19)
+         +- * Sort (37)
+            +- Exchange (36)
+               +- * Project (35)
+                  +- * BroadcastHashJoin Inner BuildRight (34)
+                     :- * HashAggregate (28)
+                     :  +- ReusedExchange (27)
+                     +- BroadcastExchange (33)
+                        +- * Project (32)
+                           +- * Filter (31)
+                              +- * ColumnarToRow (30)
+                                 +- Scan parquet default.date_dim (29)
 
 
 (1) Scan parquet default.web_sales
@@ -157,77 +156,73 @@ Join condition: None
 Output [8]: [d_week_seq#10 AS d_week_seq1#45, sun_sales#35 AS sun_sales1#46, mon_sales#36 AS mon_sales1#47, tue_sales#37 AS tue_sales1#48, wed_sales#38 AS wed_sales1#49, thu_sales#39 AS thu_sales1#50, fri_sales#40 AS fri_sales1#51, sat_sales#41 AS sat_sales1#52]
 Input [9]: [d_week_seq#10, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41, d_week_seq#42]
 
-(26) Exchange
-Input [8]: [d_week_seq1#45, sun_sales1#46, mon_sales1#47, tue_sales1#48, wed_sales1#49, thu_sales1#50, fri_sales1#51, sat_sales1#52]
-Arguments: hashpartitioning(d_week_seq1#45, 5), true, [id=#53]
-
-(27) Sort [codegen id : 7]
+(26) Sort [codegen id : 6]
 Input [8]: [d_week_seq1#45, sun_sales1#46, mon_sales1#47, tue_sales1#48, wed_sales1#49, thu_sales1#50, fri_sales1#51, sat_sales1#52]
 Arguments: [d_week_seq1#45 ASC NULLS FIRST], false, 0
 
-(28) ReusedExchange [Reuses operator id: 17]
-Output [8]: [d_week_seq#10, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59, sum#60]
+(27) ReusedExchange [Reuses operator id: 17]
+Output [8]: [d_week_seq#10, sum#53, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59]
 
-(29) HashAggregate [codegen id : 13]
-Input [8]: [d_week_seq#10, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59, sum#60]
+(28) HashAggregate [codegen id : 12]
+Input [8]: [d_week_seq#10, sum#53, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59]
 Keys [1]: [d_week_seq#10]
 Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))]
-Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END))#61, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END))#62, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END))#63, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#64, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END))#65, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END))#66, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))#67]
-Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END))#61,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END))#62,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END))#63,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#64,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END))#65,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END))#66,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))#67,17,2) AS sat_sales#41]
+Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END))#60, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END))#61, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END))#62, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#63, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END))#64, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END))#65, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))#66]
+Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END))#60,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END))#61,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END))#62,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#63,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END))#64,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END))#65,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))#66,17,2) AS sat_sales#41]
 
-(30) Scan parquet default.date_dim
-Output [2]: [d_week_seq#68, d_year#69]
+(29) Scan parquet default.date_dim
+Output [2]: [d_week_seq#67, d_year#68]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_week_seq)]
 ReadSchema: struct<d_week_seq:int,d_year:int>
 
-(31) ColumnarToRow [codegen id : 12]
-Input [2]: [d_week_seq#68, d_year#69]
+(30) ColumnarToRow [codegen id : 11]
+Input [2]: [d_week_seq#67, d_year#68]
 
-(32) Filter [codegen id : 12]
-Input [2]: [d_week_seq#68, d_year#69]
-Condition : ((isnotnull(d_year#69) AND (d_year#69 = 2002)) AND isnotnull(d_week_seq#68))
+(31) Filter [codegen id : 11]
+Input [2]: [d_week_seq#67, d_year#68]
+Condition : ((isnotnull(d_year#68) AND (d_year#68 = 2002)) AND isnotnull(d_week_seq#67))
 
-(33) Project [codegen id : 12]
-Output [1]: [d_week_seq#68]
-Input [2]: [d_week_seq#68, d_year#69]
+(32) Project [codegen id : 11]
+Output [1]: [d_week_seq#67]
+Input [2]: [d_week_seq#67, d_year#68]
 
-(34) BroadcastExchange
-Input [1]: [d_week_seq#68]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#70]
+(33) BroadcastExchange
+Input [1]: [d_week_seq#67]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#69]
 
-(35) BroadcastHashJoin [codegen id : 13]
+(34) BroadcastHashJoin [codegen id : 12]
 Left keys [1]: [d_week_seq#10]
-Right keys [1]: [d_week_seq#68]
+Right keys [1]: [d_week_seq#67]
 Join condition: None
 
-(36) Project [codegen id : 13]
-Output [8]: [d_week_seq#10 AS d_week_seq2#71, sun_sales#35 AS sun_sales2#72, mon_sales#36 AS mon_sales2#73, tue_sales#37 AS tue_sales2#74, wed_sales#38 AS wed_sales2#75, thu_sales#39 AS thu_sales2#76, fri_sales#40 AS fri_sales2#77, sat_sales#41 AS sat_sales2#78]
-Input [9]: [d_week_seq#10, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41, d_week_seq#68]
+(35) Project [codegen id : 12]
+Output [8]: [d_week_seq#10 AS d_week_seq2#70, sun_sales#35 AS sun_sales2#71, mon_sales#36 AS mon_sales2#72, tue_sales#37 AS tue_sales2#73, wed_sales#38 AS wed_sales2#74, thu_sales#39 AS thu_sales2#75, fri_sales#40 AS fri_sales2#76, sat_sales#41 AS sat_sales2#77]
+Input [9]: [d_week_seq#10, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41, d_week_seq#67]
 
-(37) Exchange
-Input [8]: [d_week_seq2#71, sun_sales2#72, mon_sales2#73, tue_sales2#74, wed_sales2#75, thu_sales2#76, fri_sales2#77, sat_sales2#78]
-Arguments: hashpartitioning((d_week_seq2#71 - 53), 5), true, [id=#79]
+(36) Exchange
+Input [8]: [d_week_seq2#70, sun_sales2#71, mon_sales2#72, tue_sales2#73, wed_sales2#74, thu_sales2#75, fri_sales2#76, sat_sales2#77]
+Arguments: hashpartitioning((d_week_seq2#70 - 53), 5), true, [id=#78]
 
-(38) Sort [codegen id : 14]
-Input [8]: [d_week_seq2#71, sun_sales2#72, mon_sales2#73, tue_sales2#74, wed_sales2#75, thu_sales2#76, fri_sales2#77, sat_sales2#78]
-Arguments: [(d_week_seq2#71 - 53) ASC NULLS FIRST], false, 0
+(37) Sort [codegen id : 13]
+Input [8]: [d_week_seq2#70, sun_sales2#71, mon_sales2#72, tue_sales2#73, wed_sales2#74, thu_sales2#75, fri_sales2#76, sat_sales2#77]
+Arguments: [(d_week_seq2#70 - 53) ASC NULLS FIRST], false, 0
 
-(39) SortMergeJoin [codegen id : 15]
+(38) SortMergeJoin [codegen id : 14]
 Left keys [1]: [d_week_seq1#45]
-Right keys [1]: [(d_week_seq2#71 - 53)]
+Right keys [1]: [(d_week_seq2#70 - 53)]
 Join condition: None
 
-(40) Project [codegen id : 15]
-Output [8]: [d_week_seq1#45, round(CheckOverflow((promote_precision(sun_sales1#46) / promote_precision(sun_sales2#72)), DecimalType(37,20), true), 2) AS round((sun_sales1 / sun_sales2), 2)#80, round(CheckOverflow((promote_precision(mon_sales1#47) / promote_precision(mon_sales2#73)), DecimalType(37,20), true), 2) AS round((mon_sales1 / mon_sales2), 2)#81, round(CheckOverflow((promote_precision(tue_sales1#48) / promote_precision(tue_sales2#74)), DecimalType(37,20), true), 2) AS round((tue_sales1 / tue_sales2), 2)#82, round(CheckOverflow((promote_precision(wed_sales1#49) / promote_precision(wed_sales2#75)), DecimalType(37,20), true), 2) AS round((wed_sales1 / wed_sales2), 2)#83, round(CheckOverflow((promote_precision(thu_sales1#50) / promote_precision(thu_sales2#76)), DecimalType(37,20), true), 2) AS round((thu_sales1 / thu_sales2), 2)#84, round(CheckOverflow((promote_precision(fri_sales1#51) / promote_precision(fri_sales2#77)), DecimalType(37,20), true), 2) AS round((fri_sales1 / fri_sales2), 2)#85, round(CheckOverflow((promote_precision(sat_sales1#52) / promote_precision(sat_sales2#78)), DecimalType(37,20), true), 2) AS round((sat_sales1 / sat_sales2), 2)#86]
-Input [16]: [d_week_seq1#45, sun_sales1#46, mon_sales1#47, tue_sales1#48, wed_sales1#49, thu_sales1#50, fri_sales1#51, sat_sales1#52, d_week_seq2#71, sun_sales2#72, mon_sales2#73, tue_sales2#74, wed_sales2#75, thu_sales2#76, fri_sales2#77, sat_sales2#78]
+(39) Project [codegen id : 14]
+Output [8]: [d_week_seq1#45, round(CheckOverflow((promote_precision(sun_sales1#46) / promote_precision(sun_sales2#71)), DecimalType(37,20), true), 2) AS round((sun_sales1 / sun_sales2), 2)#79, round(CheckOverflow((promote_precision(mon_sales1#47) / promote_precision(mon_sales2#72)), DecimalType(37,20), true), 2) AS round((mon_sales1 / mon_sales2), 2)#80, round(CheckOverflow((promote_precision(tue_sales1#48) / promote_precision(tue_sales2#73)), DecimalType(37,20), true), 2) AS round((tue_sales1 / tue_sales2), 2)#81, round(CheckOverflow((promote_precision(wed_sales1#49) / promote_precision(wed_sales2#74)), DecimalType(37,20), true), 2) AS round((wed_sales1 / wed_sales2), 2)#82, round(CheckOverflow((promote_precision(thu_sales1#50) / promote_precision(thu_sales2#75)), DecimalType(37,20), true), 2) AS round((thu_sales1 / thu_sales2), 2)#83, round(CheckOverflow((promote_precision(fri_sales1#51) / promote_precision(fri_sales2#76)), DecimalType(37,20), true), 2) AS round((fri_sales1 / fri_sales2), 2)#84, round(CheckOverflow((promote_precision(sat_sales1#52) / promote_precision(sat_sales2#77)), DecimalType(37,20), true), 2) AS round((sat_sales1 / sat_sales2), 2)#85]
+Input [16]: [d_week_seq1#45, sun_sales1#46, mon_sales1#47, tue_sales1#48, wed_sales1#49, thu_sales1#50, fri_sales1#51, sat_sales1#52, d_week_seq2#70, sun_sales2#71, mon_sales2#72, tue_sales2#73, wed_sales2#74, thu_sales2#75, fri_sales2#76, sat_sales2#77]
 
-(41) Exchange
-Input [8]: [d_week_seq1#45, round((sun_sales1 / sun_sales2), 2)#80, round((mon_sales1 / mon_sales2), 2)#81, round((tue_sales1 / tue_sales2), 2)#82, round((wed_sales1 / wed_sales2), 2)#83, round((thu_sales1 / thu_sales2), 2)#84, round((fri_sales1 / fri_sales2), 2)#85, round((sat_sales1 / sat_sales2), 2)#86]
-Arguments: rangepartitioning(d_week_seq1#45 ASC NULLS FIRST, 5), true, [id=#87]
+(40) Exchange
+Input [8]: [d_week_seq1#45, round((sun_sales1 / sun_sales2), 2)#79, round((mon_sales1 / mon_sales2), 2)#80, round((tue_sales1 / tue_sales2), 2)#81, round((wed_sales1 / wed_sales2), 2)#82, round((thu_sales1 / thu_sales2), 2)#83, round((fri_sales1 / fri_sales2), 2)#84, round((sat_sales1 / sat_sales2), 2)#85]
+Arguments: rangepartitioning(d_week_seq1#45 ASC NULLS FIRST, 5), true, [id=#86]
 
-(42) Sort [codegen id : 16]
-Input [8]: [d_week_seq1#45, round((sun_sales1 / sun_sales2), 2)#80, round((mon_sales1 / mon_sales2), 2)#81, round((tue_sales1 / tue_sales2), 2)#82, round((wed_sales1 / wed_sales2), 2)#83, round((thu_sales1 / thu_sales2), 2)#84, round((fri_sales1 / fri_sales2), 2)#85, round((sat_sales1 / sat_sales2), 2)#86]
+(41) Sort [codegen id : 15]
+Input [8]: [d_week_seq1#45, round((sun_sales1 / sun_sales2), 2)#79, round((mon_sales1 / mon_sales2), 2)#80, round((tue_sales1 / tue_sales2), 2)#81, round((wed_sales1 / wed_sales2), 2)#82, round((thu_sales1 / thu_sales2), 2)#83, round((fri_sales1 / fri_sales2), 2)#84, round((sat_sales1 / sat_sales2), 2)#85]
 Arguments: [d_week_seq1#45 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt
index 3df7e4c8e6f3f..3389774c46469 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt
@@ -1,68 +1,65 @@
-WholeStageCodegen (16)
+WholeStageCodegen (15)
   Sort [d_week_seq1]
     InputAdapter
       Exchange [d_week_seq1] #1
-        WholeStageCodegen (15)
+        WholeStageCodegen (14)
           Project [d_week_seq1,sun_sales1,sun_sales2,mon_sales1,mon_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2,thu_sales1,thu_sales2,fri_sales1,fri_sales2,sat_sales1,sat_sales2]
             SortMergeJoin [d_week_seq1,d_week_seq2]
               InputAdapter
-                WholeStageCodegen (7)
+                WholeStageCodegen (6)
                   Sort [d_week_seq1]
-                    InputAdapter
-                      Exchange [d_week_seq1] #2
-                        WholeStageCodegen (6)
-                          Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales]
-                            BroadcastHashJoin [d_week_seq,d_week_seq]
-                              HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum]
-                                InputAdapter
-                                  Exchange [d_week_seq] #3
-                                    WholeStageCodegen (4)
-                                      HashAggregate [d_week_seq,d_day_name,sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum]
-                                        Project [sales_price,d_week_seq,d_day_name]
-                                          BroadcastHashJoin [sold_date_sk,d_date_sk]
-                                            InputAdapter
-                                              Union
-                                                WholeStageCodegen (1)
-                                                  Project [ws_sold_date_sk,ws_ext_sales_price]
-                                                    Filter [ws_sold_date_sk]
-                                                      ColumnarToRow
-                                                        InputAdapter
-                                                          Scan parquet default.web_sales [ws_sold_date_sk,ws_ext_sales_price]
-                                                WholeStageCodegen (2)
-                                                  Project [cs_sold_date_sk,cs_ext_sales_price]
-                                                    Filter [cs_sold_date_sk]
-                                                      ColumnarToRow
-                                                        InputAdapter
-                                                          Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ext_sales_price]
-                                            InputAdapter
-                                              BroadcastExchange #4
-                                                WholeStageCodegen (3)
-                                                  Filter [d_date_sk,d_week_seq]
-                                                    ColumnarToRow
-                                                      InputAdapter
-                                                        Scan parquet default.date_dim [d_date_sk,d_week_seq,d_day_name]
-                              InputAdapter
-                                BroadcastExchange #5
-                                  WholeStageCodegen (5)
-                                    Project [d_week_seq]
-                                      Filter [d_year,d_week_seq]
-                                        ColumnarToRow
-                                          InputAdapter
-                                            Scan parquet default.date_dim [d_week_seq,d_year]
+                    Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales]
+                      BroadcastHashJoin [d_week_seq,d_week_seq]
+                        HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum]
+                          InputAdapter
+                            Exchange [d_week_seq] #2
+                              WholeStageCodegen (4)
+                                HashAggregate [d_week_seq,d_day_name,sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum]
+                                  Project [sales_price,d_week_seq,d_day_name]
+                                    BroadcastHashJoin [sold_date_sk,d_date_sk]
+                                      InputAdapter
+                                        Union
+                                          WholeStageCodegen (1)
+                                            Project [ws_sold_date_sk,ws_ext_sales_price]
+                                              Filter [ws_sold_date_sk]
+                                                ColumnarToRow
+                                                  InputAdapter
+                                                    Scan parquet default.web_sales [ws_sold_date_sk,ws_ext_sales_price]
+                                          WholeStageCodegen (2)
+                                            Project [cs_sold_date_sk,cs_ext_sales_price]
+                                              Filter [cs_sold_date_sk]
+                                                ColumnarToRow
+                                                  InputAdapter
+                                                    Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ext_sales_price]
+                                      InputAdapter
+                                        BroadcastExchange #3
+                                          WholeStageCodegen (3)
+                                            Filter [d_date_sk,d_week_seq]
+                                              ColumnarToRow
+                                                InputAdapter
+                                                  Scan parquet default.date_dim [d_date_sk,d_week_seq,d_day_name]
+                        InputAdapter
+                          BroadcastExchange #4
+                            WholeStageCodegen (5)
+                              Project [d_week_seq]
+                                Filter [d_year,d_week_seq]
+                                  ColumnarToRow
+                                    InputAdapter
+                                      Scan parquet default.date_dim [d_week_seq,d_year]
               InputAdapter
-                WholeStageCodegen (14)
+                WholeStageCodegen (13)
                   Sort [d_week_seq2]
                     InputAdapter
-                      Exchange [d_week_seq2] #6
-                        WholeStageCodegen (13)
+                      Exchange [d_week_seq2] #5
+                        WholeStageCodegen (12)
                           Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales]
                             BroadcastHashJoin [d_week_seq,d_week_seq]
                               HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum]
                                 InputAdapter
-                                  ReusedExchange [d_week_seq,sum,sum,sum,sum,sum,sum,sum] #3
+                                  ReusedExchange [d_week_seq,sum,sum,sum,sum,sum,sum,sum] #2
                               InputAdapter
-                                BroadcastExchange #7
-                                  WholeStageCodegen (12)
+                                BroadcastExchange #6
+                                  WholeStageCodegen (11)
                                     Project [d_week_seq]
                                       Filter [d_year,d_week_seq]
                                         ColumnarToRow
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt
index c5988072f758d..bda9824b71b5a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt
@@ -1,96 +1,104 @@
 == Physical Plan ==
-CollectLimit (92)
-+- * HashAggregate (91)
-   +- Exchange (90)
-      +- * HashAggregate (89)
-         +- Union (88)
-            :- * Project (60)
-            :  +- * BroadcastHashJoin Inner BuildRight (59)
-            :     :- * Project (53)
-            :     :  +- SortMergeJoin LeftSemi (52)
-            :     :     :- * Sort (34)
-            :     :     :  +- Exchange (33)
-            :     :     :     +- * Project (32)
-            :     :     :        +- SortMergeJoin LeftSemi (31)
+CollectLimit (100)
++- * HashAggregate (99)
+   +- Exchange (98)
+      +- * HashAggregate (97)
+         +- Union (96)
+            :- * Project (59)
+            :  +- * BroadcastHashJoin Inner BuildRight (58)
+            :     :- * Project (52)
+            :     :  +- SortMergeJoin LeftSemi (51)
+            :     :     :- * Sort (33)
+            :     :     :  +- Exchange (32)
+            :     :     :     +- * Project (31)
+            :     :     :        +- SortMergeJoin LeftSemi (30)
             :     :     :           :- * Sort (5)
             :     :     :           :  +- Exchange (4)
             :     :     :           :     +- * Filter (3)
             :     :     :           :        +- * ColumnarToRow (2)
             :     :     :           :           +- Scan parquet default.catalog_sales (1)
-            :     :     :           +- * Sort (30)
-            :     :     :              +- Exchange (29)
-            :     :     :                 +- * Project (28)
-            :     :     :                    +- * Filter (27)
-            :     :     :                       +- * HashAggregate (26)
-            :     :     :                          +- * HashAggregate (25)
-            :     :     :                             +- * Project (24)
-            :     :     :                                +- * SortMergeJoin Inner (23)
-            :     :     :                                   :- * Sort (17)
-            :     :     :                                   :  +- Exchange (16)
-            :     :     :                                   :     +- * Project (15)
-            :     :     :                                   :        +- * BroadcastHashJoin Inner BuildRight (14)
-            :     :     :                                   :           :- * Filter (8)
-            :     :     :                                   :           :  +- * ColumnarToRow (7)
-            :     :     :                                   :           :     +- Scan parquet default.store_sales (6)
-            :     :     :                                   :           +- BroadcastExchange (13)
-            :     :     :                                   :              +- * Project (12)
-            :     :     :                                   :                 +- * Filter (11)
-            :     :     :                                   :                    +- * ColumnarToRow (10)
-            :     :     :                                   :                       +- Scan parquet default.date_dim (9)
-            :     :     :                                   +- * Sort (22)
-            :     :     :                                      +- Exchange (21)
-            :     :     :                                         +- * Filter (20)
-            :     :     :                                            +- * ColumnarToRow (19)
-            :     :     :                                               +- Scan parquet default.item (18)
-            :     :     +- * Sort (51)
-            :     :        +- * Project (50)
-            :     :           +- * Filter (49)
-            :     :              +- * HashAggregate (48)
-            :     :                 +- * HashAggregate (47)
-            :     :                    +- * Project (46)
-            :     :                       +- * SortMergeJoin Inner (45)
-            :     :                          :- * Sort (39)
-            :     :                          :  +- Exchange (38)
-            :     :                          :     +- * Filter (37)
-            :     :                          :        +- * ColumnarToRow (36)
-            :     :                          :           +- Scan parquet default.store_sales (35)
-            :     :                          +- * Sort (44)
-            :     :                             +- Exchange (43)
-            :     :                                +- * Filter (42)
-            :     :                                   +- * ColumnarToRow (41)
-            :     :                                      +- Scan parquet default.customer (40)
-            :     +- BroadcastExchange (58)
-            :        +- * Project (57)
-            :           +- * Filter (56)
-            :              +- * ColumnarToRow (55)
-            :                 +- Scan parquet default.date_dim (54)
-            +- * Project (87)
-               +- * BroadcastHashJoin Inner BuildRight (86)
-                  :- * Project (84)
-                  :  +- SortMergeJoin LeftSemi (83)
-                  :     :- * Sort (71)
-                  :     :  +- Exchange (70)
-                  :     :     +- * Project (69)
-                  :     :        +- SortMergeJoin LeftSemi (68)
-                  :     :           :- * Sort (65)
-                  :     :           :  +- Exchange (64)
-                  :     :           :     +- * Filter (63)
-                  :     :           :        +- * ColumnarToRow (62)
-                  :     :           :           +- Scan parquet default.web_sales (61)
-                  :     :           +- * Sort (67)
-                  :     :              +- ReusedExchange (66)
-                  :     +- * Sort (82)
-                  :        +- * Project (81)
-                  :           +- * Filter (80)
-                  :              +- * HashAggregate (79)
-                  :                 +- * HashAggregate (78)
-                  :                    +- * Project (77)
-                  :                       +- * SortMergeJoin Inner (76)
-                  :                          :- * Sort (73)
-                  :                          :  +- ReusedExchange (72)
-                  :                          +- * Sort (75)
-                  :                             +- ReusedExchange (74)
-                  +- ReusedExchange (85)
+            :     :     :           +- * Sort (29)
+            :     :     :              +- * Project (28)
+            :     :     :                 +- * Filter (27)
+            :     :     :                    +- * HashAggregate (26)
+            :     :     :                       +- * HashAggregate (25)
+            :     :     :                          +- * Project (24)
+            :     :     :                             +- * SortMergeJoin Inner (23)
+            :     :     :                                :- * Sort (17)
+            :     :     :                                :  +- Exchange (16)
+            :     :     :                                :     +- * Project (15)
+            :     :     :                                :        +- * BroadcastHashJoin Inner BuildRight (14)
+            :     :     :                                :           :- * Filter (8)
+            :     :     :                                :           :  +- * ColumnarToRow (7)
+            :     :     :                                :           :     +- Scan parquet default.store_sales (6)
+            :     :     :                                :           +- BroadcastExchange (13)
+            :     :     :                                :              +- * Project (12)
+            :     :     :                                :                 +- * Filter (11)
+            :     :     :                                :                    +- * ColumnarToRow (10)
+            :     :     :                                :                       +- Scan parquet default.date_dim (9)
+            :     :     :                                +- * Sort (22)
+            :     :     :                                   +- Exchange (21)
+            :     :     :                                      +- * Filter (20)
+            :     :     :                                         +- * ColumnarToRow (19)
+            :     :     :                                            +- Scan parquet default.item (18)
+            :     :     +- * Sort (50)
+            :     :        +- * Project (49)
+            :     :           +- * Filter (48)
+            :     :              +- * HashAggregate (47)
+            :     :                 +- * HashAggregate (46)
+            :     :                    +- * Project (45)
+            :     :                       +- * SortMergeJoin Inner (44)
+            :     :                          :- * Sort (38)
+            :     :                          :  +- Exchange (37)
+            :     :                          :     +- * Filter (36)
+            :     :                          :        +- * ColumnarToRow (35)
+            :     :                          :           +- Scan parquet default.store_sales (34)
+            :     :                          +- * Sort (43)
+            :     :                             +- Exchange (42)
+            :     :                                +- * Filter (41)
+            :     :                                   +- * ColumnarToRow (40)
+            :     :                                      +- Scan parquet default.customer (39)
+            :     +- BroadcastExchange (57)
+            :        +- * Project (56)
+            :           +- * Filter (55)
+            :              +- * ColumnarToRow (54)
+            :                 +- Scan parquet default.date_dim (53)
+            +- * Project (95)
+               +- * BroadcastHashJoin Inner BuildRight (94)
+                  :- * Project (92)
+                  :  +- SortMergeJoin LeftSemi (91)
+                  :     :- * Sort (79)
+                  :     :  +- Exchange (78)
+                  :     :     +- * Project (77)
+                  :     :        +- SortMergeJoin LeftSemi (76)
+                  :     :           :- * Sort (64)
+                  :     :           :  +- Exchange (63)
+                  :     :           :     +- * Filter (62)
+                  :     :           :        +- * ColumnarToRow (61)
+                  :     :           :           +- Scan parquet default.web_sales (60)
+                  :     :           +- * Sort (75)
+                  :     :              +- * Project (74)
+                  :     :                 +- * Filter (73)
+                  :     :                    +- * HashAggregate (72)
+                  :     :                       +- * HashAggregate (71)
+                  :     :                          +- * Project (70)
+                  :     :                             +- * SortMergeJoin Inner (69)
+                  :     :                                :- * Sort (66)
+                  :     :                                :  +- ReusedExchange (65)
+                  :     :                                +- * Sort (68)
+                  :     :                                   +- ReusedExchange (67)
+                  :     +- * Sort (90)
+                  :        +- * Project (89)
+                  :           +- * Filter (88)
+                  :              +- * HashAggregate (87)
+                  :                 +- * HashAggregate (86)
+                  :                    +- * Project (85)
+                  :                       +- * SortMergeJoin Inner (84)
+                  :                          :- * Sort (81)
+                  :                          :  +- ReusedExchange (80)
+                  :                          +- * Sort (83)
+                  :                             +- ReusedExchange (82)
+                  +- ReusedExchange (93)
 
 
 (1) Scan parquet default.catalog_sales
@@ -221,435 +229,473 @@ Condition : (count(1)#22 > 4)
 Output [1]: [item_sk#21]
 Input [2]: [item_sk#21, count(1)#22]
 
-(29) Exchange
-Input [1]: [item_sk#21]
-Arguments: hashpartitioning(item_sk#21, 5), true, [id=#23]
-
-(30) Sort [codegen id : 9]
+(29) Sort [codegen id : 8]
 Input [1]: [item_sk#21]
 Arguments: [item_sk#21 ASC NULLS FIRST], false, 0
 
-(31) SortMergeJoin
+(30) SortMergeJoin
 Left keys [1]: [cs_item_sk#3]
 Right keys [1]: [item_sk#21]
 Join condition: None
 
-(32) Project [codegen id : 10]
+(31) Project [codegen id : 9]
 Output [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5]
 Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5]
 
-(33) Exchange
+(32) Exchange
 Input [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5]
-Arguments: hashpartitioning(cs_bill_customer_sk#2, 5), true, [id=#24]
+Arguments: hashpartitioning(cs_bill_customer_sk#2, 5), true, [id=#23]
 
-(34) Sort [codegen id : 11]
+(33) Sort [codegen id : 10]
 Input [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5]
 Arguments: [cs_bill_customer_sk#2 ASC NULLS FIRST], false, 0
 
-(35) Scan parquet default.store_sales
-Output [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
+(34) Scan parquet default.store_sales
+Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
 PushedFilters: [IsNotNull(ss_customer_sk)]
 ReadSchema: struct<ss_customer_sk:int,ss_quantity:int,ss_sales_price:decimal(7,2)>
 
-(36) ColumnarToRow [codegen id : 12]
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
+(35) ColumnarToRow [codegen id : 11]
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 
-(37) Filter [codegen id : 12]
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Condition : isnotnull(ss_customer_sk#25)
+(36) Filter [codegen id : 11]
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Condition : isnotnull(ss_customer_sk#24)
 
-(38) Exchange
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Arguments: hashpartitioning(ss_customer_sk#25, 5), true, [id=#28]
+(37) Exchange
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Arguments: hashpartitioning(ss_customer_sk#24, 5), true, [id=#27]
 
-(39) Sort [codegen id : 13]
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0
+(38) Sort [codegen id : 12]
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Arguments: [ss_customer_sk#24 ASC NULLS FIRST], false, 0
 
-(40) Scan parquet default.customer
-Output [1]: [c_customer_sk#29]
+(39) Scan parquet default.customer
+Output [1]: [c_customer_sk#28]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [IsNotNull(c_customer_sk)]
 ReadSchema: struct<c_customer_sk:int>
 
-(41) ColumnarToRow [codegen id : 14]
-Input [1]: [c_customer_sk#29]
+(40) ColumnarToRow [codegen id : 13]
+Input [1]: [c_customer_sk#28]
 
-(42) Filter [codegen id : 14]
-Input [1]: [c_customer_sk#29]
-Condition : isnotnull(c_customer_sk#29)
+(41) Filter [codegen id : 13]
+Input [1]: [c_customer_sk#28]
+Condition : isnotnull(c_customer_sk#28)
 
-(43) Exchange
-Input [1]: [c_customer_sk#29]
-Arguments: hashpartitioning(c_customer_sk#29, 5), true, [id=#30]
+(42) Exchange
+Input [1]: [c_customer_sk#28]
+Arguments: hashpartitioning(c_customer_sk#28, 5), true, [id=#29]
 
-(44) Sort [codegen id : 15]
-Input [1]: [c_customer_sk#29]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
+(43) Sort [codegen id : 14]
+Input [1]: [c_customer_sk#28]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
 
-(45) SortMergeJoin [codegen id : 16]
-Left keys [1]: [ss_customer_sk#25]
-Right keys [1]: [c_customer_sk#29]
+(44) SortMergeJoin [codegen id : 15]
+Left keys [1]: [ss_customer_sk#24]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(46) Project [codegen id : 16]
-Output [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Input [4]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-
-(47) HashAggregate [codegen id : 16]
-Input [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#31, isEmpty#32]
-Results [3]: [c_customer_sk#29, sum#33, isEmpty#34]
-
-(48) HashAggregate [codegen id : 16]
-Input [3]: [c_customer_sk#29, sum#33, isEmpty#34]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#35 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36]
-
-(49) Filter [codegen id : 16]
-Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true)))
-
-(50) Project [codegen id : 16]
-Output [1]: [c_customer_sk#29]
-Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36]
-
-(51) Sort [codegen id : 16]
-Input [1]: [c_customer_sk#29]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
-
-(52) SortMergeJoin
+(45) Project [codegen id : 15]
+Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+
+(46) HashAggregate [codegen id : 15]
+Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#30, isEmpty#31]
+Results [3]: [c_customer_sk#28, sum#32, isEmpty#33]
+
+(47) HashAggregate [codegen id : 15]
+Input [3]: [c_customer_sk#28, sum#32, isEmpty#33]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#34]
+Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#34 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+
+(48) Filter [codegen id : 15]
+Input [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#36, [id=#37] as decimal(32,6)))), DecimalType(38,8), true)))
+
+(49) Project [codegen id : 15]
+Output [1]: [c_customer_sk#28]
+Input [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+
+(50) Sort [codegen id : 15]
+Input [1]: [c_customer_sk#28]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
+
+(51) SortMergeJoin
 Left keys [1]: [cs_bill_customer_sk#2]
-Right keys [1]: [c_customer_sk#29]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(53) Project [codegen id : 18]
+(52) Project [codegen id : 17]
 Output [3]: [cs_sold_date_sk#1, cs_quantity#4, cs_list_price#5]
 Input [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5]
 
-(54) Scan parquet default.date_dim
-Output [3]: [d_date_sk#9, d_year#11, d_moy#39]
+(53) Scan parquet default.date_dim
+Output [3]: [d_date_sk#9, d_year#11, d_moy#38]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
-(55) ColumnarToRow [codegen id : 17]
-Input [3]: [d_date_sk#9, d_year#11, d_moy#39]
+(54) ColumnarToRow [codegen id : 16]
+Input [3]: [d_date_sk#9, d_year#11, d_moy#38]
 
-(56) Filter [codegen id : 17]
-Input [3]: [d_date_sk#9, d_year#11, d_moy#39]
-Condition : ((((isnotnull(d_year#11) AND isnotnull(d_moy#39)) AND (d_year#11 = 2000)) AND (d_moy#39 = 2)) AND isnotnull(d_date_sk#9))
+(55) Filter [codegen id : 16]
+Input [3]: [d_date_sk#9, d_year#11, d_moy#38]
+Condition : ((((isnotnull(d_year#11) AND isnotnull(d_moy#38)) AND (d_year#11 = 2000)) AND (d_moy#38 = 2)) AND isnotnull(d_date_sk#9))
 
-(57) Project [codegen id : 17]
+(56) Project [codegen id : 16]
 Output [1]: [d_date_sk#9]
-Input [3]: [d_date_sk#9, d_year#11, d_moy#39]
+Input [3]: [d_date_sk#9, d_year#11, d_moy#38]
 
-(58) BroadcastExchange
+(57) BroadcastExchange
 Input [1]: [d_date_sk#9]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#40]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#39]
 
-(59) BroadcastHashJoin [codegen id : 18]
+(58) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [cs_sold_date_sk#1]
 Right keys [1]: [d_date_sk#9]
 Join condition: None
 
-(60) Project [codegen id : 18]
-Output [1]: [CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true) AS sales#41]
+(59) Project [codegen id : 17]
+Output [1]: [CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true) AS sales#40]
 Input [4]: [cs_sold_date_sk#1, cs_quantity#4, cs_list_price#5, d_date_sk#9]
 
-(61) Scan parquet default.web_sales
-Output [5]: [ws_sold_date_sk#42, ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46]
+(60) Scan parquet default.web_sales
+Output [5]: [ws_sold_date_sk#41, ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
 PushedFilters: [IsNotNull(ws_sold_date_sk)]
 ReadSchema: struct<ws_sold_date_sk:int,ws_item_sk:int,ws_bill_customer_sk:int,ws_quantity:int,ws_list_price:decimal(7,2)>
 
-(62) ColumnarToRow [codegen id : 19]
-Input [5]: [ws_sold_date_sk#42, ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46]
+(61) ColumnarToRow [codegen id : 18]
+Input [5]: [ws_sold_date_sk#41, ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45]
+
+(62) Filter [codegen id : 18]
+Input [5]: [ws_sold_date_sk#41, ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45]
+Condition : isnotnull(ws_sold_date_sk#41)
+
+(63) Exchange
+Input [5]: [ws_sold_date_sk#41, ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45]
+Arguments: hashpartitioning(ws_item_sk#42, 5), true, [id=#46]
 
-(63) Filter [codegen id : 19]
-Input [5]: [ws_sold_date_sk#42, ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46]
-Condition : isnotnull(ws_sold_date_sk#42)
+(64) Sort [codegen id : 19]
+Input [5]: [ws_sold_date_sk#41, ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45]
+Arguments: [ws_item_sk#42 ASC NULLS FIRST], false, 0
 
-(64) Exchange
-Input [5]: [ws_sold_date_sk#42, ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46]
-Arguments: hashpartitioning(ws_item_sk#43, 5), true, [id=#47]
+(65) ReusedExchange [Reuses operator id: 16]
+Output [2]: [ss_item_sk#8, d_date#10]
+
+(66) Sort [codegen id : 22]
+Input [2]: [ss_item_sk#8, d_date#10]
+Arguments: [ss_item_sk#8 ASC NULLS FIRST], false, 0
+
+(67) ReusedExchange [Reuses operator id: 21]
+Output [2]: [i_item_sk#14, i_item_desc#15]
+
+(68) Sort [codegen id : 24]
+Input [2]: [i_item_sk#14, i_item_desc#15]
+Arguments: [i_item_sk#14 ASC NULLS FIRST], false, 0
+
+(69) SortMergeJoin [codegen id : 25]
+Left keys [1]: [ss_item_sk#8]
+Right keys [1]: [i_item_sk#14]
+Join condition: None
+
+(70) Project [codegen id : 25]
+Output [3]: [d_date#10, i_item_sk#14, i_item_desc#15]
+Input [4]: [ss_item_sk#8, d_date#10, i_item_sk#14, i_item_desc#15]
+
+(71) HashAggregate [codegen id : 25]
+Input [3]: [d_date#10, i_item_sk#14, i_item_desc#15]
+Keys [3]: [substr(i_item_desc#15, 1, 30) AS substr(i_item_desc#15, 1, 30)#47, i_item_sk#14, d_date#10]
+Functions [1]: [partial_count(1)]
+Aggregate Attributes [1]: [count#48]
+Results [4]: [substr(i_item_desc#15, 1, 30)#47, i_item_sk#14, d_date#10, count#49]
+
+(72) HashAggregate [codegen id : 25]
+Input [4]: [substr(i_item_desc#15, 1, 30)#47, i_item_sk#14, d_date#10, count#49]
+Keys [3]: [substr(i_item_desc#15, 1, 30)#47, i_item_sk#14, d_date#10]
+Functions [1]: [count(1)]
+Aggregate Attributes [1]: [count(1)#50]
+Results [2]: [i_item_sk#14 AS item_sk#21, count(1)#50 AS count(1)#51]
 
-(65) Sort [codegen id : 20]
-Input [5]: [ws_sold_date_sk#42, ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46]
-Arguments: [ws_item_sk#43 ASC NULLS FIRST], false, 0
+(73) Filter [codegen id : 25]
+Input [2]: [item_sk#21, count(1)#51]
+Condition : (count(1)#51 > 4)
 
-(66) ReusedExchange [Reuses operator id: 29]
+(74) Project [codegen id : 25]
 Output [1]: [item_sk#21]
+Input [2]: [item_sk#21, count(1)#51]
 
-(67) Sort [codegen id : 27]
+(75) Sort [codegen id : 25]
 Input [1]: [item_sk#21]
 Arguments: [item_sk#21 ASC NULLS FIRST], false, 0
 
-(68) SortMergeJoin
-Left keys [1]: [ws_item_sk#43]
+(76) SortMergeJoin
+Left keys [1]: [ws_item_sk#42]
 Right keys [1]: [item_sk#21]
 Join condition: None
 
-(69) Project [codegen id : 28]
-Output [4]: [ws_sold_date_sk#42, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46]
-Input [5]: [ws_sold_date_sk#42, ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46]
+(77) Project [codegen id : 26]
+Output [4]: [ws_sold_date_sk#41, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45]
+Input [5]: [ws_sold_date_sk#41, ws_item_sk#42, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45]
 
-(70) Exchange
-Input [4]: [ws_sold_date_sk#42, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46]
-Arguments: hashpartitioning(ws_bill_customer_sk#44, 5), true, [id=#48]
+(78) Exchange
+Input [4]: [ws_sold_date_sk#41, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45]
+Arguments: hashpartitioning(ws_bill_customer_sk#43, 5), true, [id=#52]
 
-(71) Sort [codegen id : 29]
-Input [4]: [ws_sold_date_sk#42, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46]
-Arguments: [ws_bill_customer_sk#44 ASC NULLS FIRST], false, 0
+(79) Sort [codegen id : 27]
+Input [4]: [ws_sold_date_sk#41, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45]
+Arguments: [ws_bill_customer_sk#43 ASC NULLS FIRST], false, 0
 
-(72) ReusedExchange [Reuses operator id: 38]
-Output [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
+(80) ReusedExchange [Reuses operator id: 37]
+Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 
-(73) Sort [codegen id : 31]
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0
+(81) Sort [codegen id : 29]
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Arguments: [ss_customer_sk#24 ASC NULLS FIRST], false, 0
 
-(74) ReusedExchange [Reuses operator id: 43]
-Output [1]: [c_customer_sk#29]
+(82) ReusedExchange [Reuses operator id: 42]
+Output [1]: [c_customer_sk#28]
 
-(75) Sort [codegen id : 33]
-Input [1]: [c_customer_sk#29]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
+(83) Sort [codegen id : 31]
+Input [1]: [c_customer_sk#28]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
 
-(76) SortMergeJoin [codegen id : 34]
-Left keys [1]: [ss_customer_sk#25]
-Right keys [1]: [c_customer_sk#29]
+(84) SortMergeJoin [codegen id : 32]
+Left keys [1]: [ss_customer_sk#24]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(77) Project [codegen id : 34]
-Output [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Input [4]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-
-(78) HashAggregate [codegen id : 34]
-Input [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#49, isEmpty#50]
-Results [3]: [c_customer_sk#29, sum#51, isEmpty#52]
-
-(79) HashAggregate [codegen id : 34]
-Input [3]: [c_customer_sk#29, sum#51, isEmpty#52]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#53]
-Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#53 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#54]
-
-(80) Filter [codegen id : 34]
-Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#54]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#54) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#54 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true)))
-
-(81) Project [codegen id : 34]
-Output [1]: [c_customer_sk#29]
-Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#54]
-
-(82) Sort [codegen id : 34]
-Input [1]: [c_customer_sk#29]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
-
-(83) SortMergeJoin
-Left keys [1]: [ws_bill_customer_sk#44]
-Right keys [1]: [c_customer_sk#29]
+(85) Project [codegen id : 32]
+Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+
+(86) HashAggregate [codegen id : 32]
+Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#53, isEmpty#54]
+Results [3]: [c_customer_sk#28, sum#55, isEmpty#56]
+
+(87) HashAggregate [codegen id : 32]
+Input [3]: [c_customer_sk#28, sum#55, isEmpty#56]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#57]
+Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#57 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#58]
+
+(88) Filter [codegen id : 32]
+Input [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#58]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#58) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#58 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#36, [id=#37] as decimal(32,6)))), DecimalType(38,8), true)))
+
+(89) Project [codegen id : 32]
+Output [1]: [c_customer_sk#28]
+Input [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#58]
+
+(90) Sort [codegen id : 32]
+Input [1]: [c_customer_sk#28]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
+
+(91) SortMergeJoin
+Left keys [1]: [ws_bill_customer_sk#43]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(84) Project [codegen id : 36]
-Output [3]: [ws_sold_date_sk#42, ws_quantity#45, ws_list_price#46]
-Input [4]: [ws_sold_date_sk#42, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46]
+(92) Project [codegen id : 34]
+Output [3]: [ws_sold_date_sk#41, ws_quantity#44, ws_list_price#45]
+Input [4]: [ws_sold_date_sk#41, ws_bill_customer_sk#43, ws_quantity#44, ws_list_price#45]
 
-(85) ReusedExchange [Reuses operator id: 58]
+(93) ReusedExchange [Reuses operator id: 57]
 Output [1]: [d_date_sk#9]
 
-(86) BroadcastHashJoin [codegen id : 36]
-Left keys [1]: [ws_sold_date_sk#42]
+(94) BroadcastHashJoin [codegen id : 34]
+Left keys [1]: [ws_sold_date_sk#41]
 Right keys [1]: [d_date_sk#9]
 Join condition: None
 
-(87) Project [codegen id : 36]
-Output [1]: [CheckOverflow((promote_precision(cast(cast(ws_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#46 as decimal(12,2)))), DecimalType(18,2), true) AS sales#55]
-Input [4]: [ws_sold_date_sk#42, ws_quantity#45, ws_list_price#46, d_date_sk#9]
+(95) Project [codegen id : 34]
+Output [1]: [CheckOverflow((promote_precision(cast(cast(ws_quantity#44 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#45 as decimal(12,2)))), DecimalType(18,2), true) AS sales#59]
+Input [4]: [ws_sold_date_sk#41, ws_quantity#44, ws_list_price#45, d_date_sk#9]
 
-(88) Union
+(96) Union
 
-(89) HashAggregate [codegen id : 37]
-Input [1]: [sales#41]
+(97) HashAggregate [codegen id : 35]
+Input [1]: [sales#40]
 Keys: []
-Functions [1]: [partial_sum(sales#41)]
-Aggregate Attributes [2]: [sum#56, isEmpty#57]
-Results [2]: [sum#58, isEmpty#59]
+Functions [1]: [partial_sum(sales#40)]
+Aggregate Attributes [2]: [sum#60, isEmpty#61]
+Results [2]: [sum#62, isEmpty#63]
 
-(90) Exchange
-Input [2]: [sum#58, isEmpty#59]
-Arguments: SinglePartition, true, [id=#60]
+(98) Exchange
+Input [2]: [sum#62, isEmpty#63]
+Arguments: SinglePartition, true, [id=#64]
 
-(91) HashAggregate [codegen id : 38]
-Input [2]: [sum#58, isEmpty#59]
+(99) HashAggregate [codegen id : 36]
+Input [2]: [sum#62, isEmpty#63]
 Keys: []
-Functions [1]: [sum(sales#41)]
-Aggregate Attributes [1]: [sum(sales#41)#61]
-Results [1]: [sum(sales#41)#61 AS sum(sales)#62]
+Functions [1]: [sum(sales#40)]
+Aggregate Attributes [1]: [sum(sales#40)#65]
+Results [1]: [sum(sales#40)#65 AS sum(sales)#66]
 
-(92) CollectLimit
-Input [1]: [sum(sales)#62]
+(100) CollectLimit
+Input [1]: [sum(sales)#66]
 Arguments: 100
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 49 Hosting Expression = Subquery scalar-subquery#37, [id=#38]
-* HashAggregate (116)
-+- Exchange (115)
-   +- * HashAggregate (114)
-      +- * HashAggregate (113)
-         +- * HashAggregate (112)
-            +- * Project (111)
-               +- * SortMergeJoin Inner (110)
-                  :- * Sort (104)
-                  :  +- Exchange (103)
-                  :     +- * Project (102)
-                  :        +- * BroadcastHashJoin Inner BuildRight (101)
-                  :           :- * Filter (95)
-                  :           :  +- * ColumnarToRow (94)
-                  :           :     +- Scan parquet default.store_sales (93)
-                  :           +- BroadcastExchange (100)
-                  :              +- * Project (99)
-                  :                 +- * Filter (98)
-                  :                    +- * ColumnarToRow (97)
-                  :                       +- Scan parquet default.date_dim (96)
-                  +- * Sort (109)
-                     +- Exchange (108)
-                        +- * Filter (107)
-                           +- * ColumnarToRow (106)
-                              +- Scan parquet default.customer (105)
-
-
-(93) Scan parquet default.store_sales
-Output [4]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
+Subquery:1 Hosting operator id = 48 Hosting Expression = Subquery scalar-subquery#36, [id=#37]
+* HashAggregate (124)
++- Exchange (123)
+   +- * HashAggregate (122)
+      +- * HashAggregate (121)
+         +- * HashAggregate (120)
+            +- * Project (119)
+               +- * SortMergeJoin Inner (118)
+                  :- * Sort (112)
+                  :  +- Exchange (111)
+                  :     +- * Project (110)
+                  :        +- * BroadcastHashJoin Inner BuildRight (109)
+                  :           :- * Filter (103)
+                  :           :  +- * ColumnarToRow (102)
+                  :           :     +- Scan parquet default.store_sales (101)
+                  :           +- BroadcastExchange (108)
+                  :              +- * Project (107)
+                  :                 +- * Filter (106)
+                  :                    +- * ColumnarToRow (105)
+                  :                       +- Scan parquet default.date_dim (104)
+                  +- * Sort (117)
+                     +- Exchange (116)
+                        +- * Filter (115)
+                           +- * ColumnarToRow (114)
+                              +- Scan parquet default.customer (113)
+
+
+(101) Scan parquet default.store_sales
+Output [4]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
 PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)]
 ReadSchema: struct<ss_sold_date_sk:int,ss_customer_sk:int,ss_quantity:int,ss_sales_price:decimal(7,2)>
 
-(94) ColumnarToRow [codegen id : 2]
-Input [4]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
+(102) ColumnarToRow [codegen id : 2]
+Input [4]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 
-(95) Filter [codegen id : 2]
-Input [4]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Condition : (isnotnull(ss_customer_sk#25) AND isnotnull(ss_sold_date_sk#7))
+(103) Filter [codegen id : 2]
+Input [4]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Condition : (isnotnull(ss_customer_sk#24) AND isnotnull(ss_sold_date_sk#7))
 
-(96) Scan parquet default.date_dim
+(104) Scan parquet default.date_dim
 Output [2]: [d_date_sk#9, d_year#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(97) ColumnarToRow [codegen id : 1]
+(105) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#9, d_year#11]
 
-(98) Filter [codegen id : 1]
+(106) Filter [codegen id : 1]
 Input [2]: [d_date_sk#9, d_year#11]
 Condition : (d_year#11 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#9))
 
-(99) Project [codegen id : 1]
+(107) Project [codegen id : 1]
 Output [1]: [d_date_sk#9]
 Input [2]: [d_date_sk#9, d_year#11]
 
-(100) BroadcastExchange
+(108) BroadcastExchange
 Input [1]: [d_date_sk#9]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#63]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#67]
 
-(101) BroadcastHashJoin [codegen id : 2]
+(109) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#7]
 Right keys [1]: [d_date_sk#9]
 Join condition: None
 
-(102) Project [codegen id : 2]
-Output [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Input [5]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, d_date_sk#9]
+(110) Project [codegen id : 2]
+Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Input [5]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, d_date_sk#9]
 
-(103) Exchange
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Arguments: hashpartitioning(ss_customer_sk#25, 5), true, [id=#64]
+(111) Exchange
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Arguments: hashpartitioning(ss_customer_sk#24, 5), true, [id=#68]
 
-(104) Sort [codegen id : 3]
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0
+(112) Sort [codegen id : 3]
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Arguments: [ss_customer_sk#24 ASC NULLS FIRST], false, 0
 
-(105) Scan parquet default.customer
-Output [1]: [c_customer_sk#29]
+(113) Scan parquet default.customer
+Output [1]: [c_customer_sk#28]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [IsNotNull(c_customer_sk)]
 ReadSchema: struct<c_customer_sk:int>
 
-(106) ColumnarToRow [codegen id : 4]
-Input [1]: [c_customer_sk#29]
+(114) ColumnarToRow [codegen id : 4]
+Input [1]: [c_customer_sk#28]
 
-(107) Filter [codegen id : 4]
-Input [1]: [c_customer_sk#29]
-Condition : isnotnull(c_customer_sk#29)
+(115) Filter [codegen id : 4]
+Input [1]: [c_customer_sk#28]
+Condition : isnotnull(c_customer_sk#28)
 
-(108) Exchange
-Input [1]: [c_customer_sk#29]
-Arguments: hashpartitioning(c_customer_sk#29, 5), true, [id=#65]
+(116) Exchange
+Input [1]: [c_customer_sk#28]
+Arguments: hashpartitioning(c_customer_sk#28, 5), true, [id=#69]
 
-(109) Sort [codegen id : 5]
-Input [1]: [c_customer_sk#29]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
+(117) Sort [codegen id : 5]
+Input [1]: [c_customer_sk#28]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
 
-(110) SortMergeJoin [codegen id : 6]
-Left keys [1]: [ss_customer_sk#25]
-Right keys [1]: [c_customer_sk#29]
+(118) SortMergeJoin [codegen id : 6]
+Left keys [1]: [ss_customer_sk#24]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(111) Project [codegen id : 6]
-Output [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Input [4]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-
-(112) HashAggregate [codegen id : 6]
-Input [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#66, isEmpty#67]
-Results [3]: [c_customer_sk#29, sum#68, isEmpty#69]
-
-(113) HashAggregate [codegen id : 6]
-Input [3]: [c_customer_sk#29, sum#68, isEmpty#69]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#70]
-Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#70 AS csales#71]
-
-(114) HashAggregate [codegen id : 6]
-Input [1]: [csales#71]
+(119) Project [codegen id : 6]
+Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+
+(120) HashAggregate [codegen id : 6]
+Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#70, isEmpty#71]
+Results [3]: [c_customer_sk#28, sum#72, isEmpty#73]
+
+(121) HashAggregate [codegen id : 6]
+Input [3]: [c_customer_sk#28, sum#72, isEmpty#73]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#74]
+Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#74 AS csales#75]
+
+(122) HashAggregate [codegen id : 6]
+Input [1]: [csales#75]
 Keys: []
-Functions [1]: [partial_max(csales#71)]
-Aggregate Attributes [1]: [max#72]
-Results [1]: [max#73]
+Functions [1]: [partial_max(csales#75)]
+Aggregate Attributes [1]: [max#76]
+Results [1]: [max#77]
 
-(115) Exchange
-Input [1]: [max#73]
-Arguments: SinglePartition, true, [id=#74]
+(123) Exchange
+Input [1]: [max#77]
+Arguments: SinglePartition, true, [id=#78]
 
-(116) HashAggregate [codegen id : 7]
-Input [1]: [max#73]
+(124) HashAggregate [codegen id : 7]
+Input [1]: [max#77]
 Keys: []
-Functions [1]: [max(csales#71)]
-Aggregate Attributes [1]: [max(csales#71)#75]
-Results [1]: [max(csales#71)#75 AS tpcds_cmax#76]
+Functions [1]: [max(csales#75)]
+Aggregate Attributes [1]: [max(csales#75)#79]
+Results [1]: [max(csales#75)#79 AS tpcds_cmax#80]
 
-Subquery:2 Hosting operator id = 80 Hosting Expression = ReusedSubquery Subquery scalar-subquery#37, [id=#38]
+Subquery:2 Hosting operator id = 88 Hosting Expression = ReusedSubquery Subquery scalar-subquery#36, [id=#37]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt
index 9ee444cdd988c..695e6ccd71821 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt
@@ -1,23 +1,23 @@
 CollectLimit
-  WholeStageCodegen (38)
+  WholeStageCodegen (36)
     HashAggregate [sum,isEmpty] [sum(sales),sum(sales),sum,isEmpty]
       InputAdapter
         Exchange #1
-          WholeStageCodegen (37)
+          WholeStageCodegen (35)
             HashAggregate [sales] [sum,isEmpty,sum,isEmpty]
               InputAdapter
                 Union
-                  WholeStageCodegen (18)
+                  WholeStageCodegen (17)
                     Project [cs_quantity,cs_list_price]
                       BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                         Project [cs_sold_date_sk,cs_quantity,cs_list_price]
                           InputAdapter
                             SortMergeJoin [cs_bill_customer_sk,c_customer_sk]
-                              WholeStageCodegen (11)
+                              WholeStageCodegen (10)
                                 Sort [cs_bill_customer_sk]
                                   InputAdapter
                                     Exchange [cs_bill_customer_sk] #2
-                                      WholeStageCodegen (10)
+                                      WholeStageCodegen (9)
                                         Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price]
                                           InputAdapter
                                             SortMergeJoin [cs_item_sk,item_sk]
@@ -30,48 +30,45 @@ CollectLimit
                                                           ColumnarToRow
                                                             InputAdapter
                                                               Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price]
-                                              WholeStageCodegen (9)
+                                              WholeStageCodegen (8)
                                                 Sort [item_sk]
-                                                  InputAdapter
-                                                    Exchange [item_sk] #4
-                                                      WholeStageCodegen (8)
-                                                        Project [item_sk]
-                                                          Filter [count(1)]
-                                                            HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count]
-                                                              HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count]
-                                                                Project [d_date,i_item_sk,i_item_desc]
-                                                                  SortMergeJoin [ss_item_sk,i_item_sk]
+                                                  Project [item_sk]
+                                                    Filter [count(1)]
+                                                      HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count]
+                                                        HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count]
+                                                          Project [d_date,i_item_sk,i_item_desc]
+                                                            SortMergeJoin [ss_item_sk,i_item_sk]
+                                                              InputAdapter
+                                                                WholeStageCodegen (5)
+                                                                  Sort [ss_item_sk]
                                                                     InputAdapter
-                                                                      WholeStageCodegen (5)
-                                                                        Sort [ss_item_sk]
-                                                                          InputAdapter
-                                                                            Exchange [ss_item_sk] #5
-                                                                              WholeStageCodegen (4)
-                                                                                Project [ss_item_sk,d_date]
-                                                                                  BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                    Filter [ss_sold_date_sk,ss_item_sk]
-                                                                                      ColumnarToRow
-                                                                                        InputAdapter
-                                                                                          Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk]
-                                                                                    InputAdapter
-                                                                                      BroadcastExchange #6
-                                                                                        WholeStageCodegen (3)
-                                                                                          Project [d_date_sk,d_date]
-                                                                                            Filter [d_year,d_date_sk]
-                                                                                              ColumnarToRow
-                                                                                                InputAdapter
-                                                                                                  Scan parquet default.date_dim [d_date_sk,d_date,d_year]
+                                                                      Exchange [ss_item_sk] #4
+                                                                        WholeStageCodegen (4)
+                                                                          Project [ss_item_sk,d_date]
+                                                                            BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                              Filter [ss_sold_date_sk,ss_item_sk]
+                                                                                ColumnarToRow
+                                                                                  InputAdapter
+                                                                                    Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk]
+                                                                              InputAdapter
+                                                                                BroadcastExchange #5
+                                                                                  WholeStageCodegen (3)
+                                                                                    Project [d_date_sk,d_date]
+                                                                                      Filter [d_year,d_date_sk]
+                                                                                        ColumnarToRow
+                                                                                          InputAdapter
+                                                                                            Scan parquet default.date_dim [d_date_sk,d_date,d_year]
+                                                              InputAdapter
+                                                                WholeStageCodegen (7)
+                                                                  Sort [i_item_sk]
                                                                     InputAdapter
-                                                                      WholeStageCodegen (7)
-                                                                        Sort [i_item_sk]
-                                                                          InputAdapter
-                                                                            Exchange [i_item_sk] #7
-                                                                              WholeStageCodegen (6)
-                                                                                Filter [i_item_sk]
-                                                                                  ColumnarToRow
-                                                                                    InputAdapter
-                                                                                      Scan parquet default.item [i_item_sk,i_item_desc]
-                              WholeStageCodegen (16)
+                                                                      Exchange [i_item_sk] #6
+                                                                        WholeStageCodegen (6)
+                                                                          Filter [i_item_sk]
+                                                                            ColumnarToRow
+                                                                              InputAdapter
+                                                                                Scan parquet default.item [i_item_sk,i_item_desc]
+                              WholeStageCodegen (15)
                                 Sort [c_customer_sk]
                                   Project [c_customer_sk]
                                     Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
@@ -79,7 +76,7 @@ CollectLimit
                                         WholeStageCodegen (7)
                                           HashAggregate [max] [max(csales),tpcds_cmax,max]
                                             InputAdapter
-                                              Exchange #10
+                                              Exchange #9
                                                 WholeStageCodegen (6)
                                                   HashAggregate [csales] [max,max]
                                                     HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty]
@@ -90,7 +87,7 @@ CollectLimit
                                                               WholeStageCodegen (3)
                                                                 Sort [ss_customer_sk]
                                                                   InputAdapter
-                                                                    Exchange [ss_customer_sk] #11
+                                                                    Exchange [ss_customer_sk] #10
                                                                       WholeStageCodegen (2)
                                                                         Project [ss_customer_sk,ss_quantity,ss_sales_price]
                                                                           BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
@@ -99,7 +96,7 @@ CollectLimit
                                                                                 InputAdapter
                                                                                   Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price]
                                                                             InputAdapter
-                                                                              BroadcastExchange #12
+                                                                              BroadcastExchange #11
                                                                                 WholeStageCodegen (1)
                                                                                   Project [d_date_sk]
                                                                                     Filter [d_year,d_date_sk]
@@ -110,7 +107,7 @@ CollectLimit
                                                               WholeStageCodegen (5)
                                                                 Sort [c_customer_sk]
                                                                   InputAdapter
-                                                                    Exchange [c_customer_sk] #13
+                                                                    Exchange [c_customer_sk] #12
                                                                       WholeStageCodegen (4)
                                                                         Filter [c_customer_sk]
                                                                           ColumnarToRow
@@ -121,61 +118,75 @@ CollectLimit
                                           Project [ss_quantity,ss_sales_price,c_customer_sk]
                                             SortMergeJoin [ss_customer_sk,c_customer_sk]
                                               InputAdapter
-                                                WholeStageCodegen (13)
+                                                WholeStageCodegen (12)
                                                   Sort [ss_customer_sk]
                                                     InputAdapter
-                                                      Exchange [ss_customer_sk] #8
-                                                        WholeStageCodegen (12)
+                                                      Exchange [ss_customer_sk] #7
+                                                        WholeStageCodegen (11)
                                                           Filter [ss_customer_sk]
                                                             ColumnarToRow
                                                               InputAdapter
                                                                 Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price]
                                               InputAdapter
-                                                WholeStageCodegen (15)
+                                                WholeStageCodegen (14)
                                                   Sort [c_customer_sk]
                                                     InputAdapter
-                                                      Exchange [c_customer_sk] #9
-                                                        WholeStageCodegen (14)
+                                                      Exchange [c_customer_sk] #8
+                                                        WholeStageCodegen (13)
                                                           Filter [c_customer_sk]
                                                             ColumnarToRow
                                                               InputAdapter
                                                                 Scan parquet default.customer [c_customer_sk]
                         InputAdapter
-                          BroadcastExchange #14
-                            WholeStageCodegen (17)
+                          BroadcastExchange #13
+                            WholeStageCodegen (16)
                               Project [d_date_sk]
                                 Filter [d_year,d_moy,d_date_sk]
                                   ColumnarToRow
                                     InputAdapter
                                       Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
-                  WholeStageCodegen (36)
+                  WholeStageCodegen (34)
                     Project [ws_quantity,ws_list_price]
                       BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                         Project [ws_sold_date_sk,ws_quantity,ws_list_price]
                           InputAdapter
                             SortMergeJoin [ws_bill_customer_sk,c_customer_sk]
-                              WholeStageCodegen (29)
+                              WholeStageCodegen (27)
                                 Sort [ws_bill_customer_sk]
                                   InputAdapter
-                                    Exchange [ws_bill_customer_sk] #15
-                                      WholeStageCodegen (28)
+                                    Exchange [ws_bill_customer_sk] #14
+                                      WholeStageCodegen (26)
                                         Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price]
                                           InputAdapter
                                             SortMergeJoin [ws_item_sk,item_sk]
-                                              WholeStageCodegen (20)
+                                              WholeStageCodegen (19)
                                                 Sort [ws_item_sk]
                                                   InputAdapter
-                                                    Exchange [ws_item_sk] #16
-                                                      WholeStageCodegen (19)
+                                                    Exchange [ws_item_sk] #15
+                                                      WholeStageCodegen (18)
                                                         Filter [ws_sold_date_sk]
                                                           ColumnarToRow
                                                             InputAdapter
                                                               Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price]
-                                              WholeStageCodegen (27)
+                                              WholeStageCodegen (25)
                                                 Sort [item_sk]
-                                                  InputAdapter
-                                                    ReusedExchange [item_sk] #4
-                              WholeStageCodegen (34)
+                                                  Project [item_sk]
+                                                    Filter [count(1)]
+                                                      HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count]
+                                                        HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count]
+                                                          Project [d_date,i_item_sk,i_item_desc]
+                                                            SortMergeJoin [ss_item_sk,i_item_sk]
+                                                              InputAdapter
+                                                                WholeStageCodegen (22)
+                                                                  Sort [ss_item_sk]
+                                                                    InputAdapter
+                                                                      ReusedExchange [ss_item_sk,d_date] #4
+                                                              InputAdapter
+                                                                WholeStageCodegen (24)
+                                                                  Sort [i_item_sk]
+                                                                    InputAdapter
+                                                                      ReusedExchange [i_item_sk,i_item_desc] #6
+                              WholeStageCodegen (32)
                                 Sort [c_customer_sk]
                                   Project [c_customer_sk]
                                     Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
@@ -185,14 +196,14 @@ CollectLimit
                                           Project [ss_quantity,ss_sales_price,c_customer_sk]
                                             SortMergeJoin [ss_customer_sk,c_customer_sk]
                                               InputAdapter
-                                                WholeStageCodegen (31)
+                                                WholeStageCodegen (29)
                                                   Sort [ss_customer_sk]
                                                     InputAdapter
-                                                      ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #8
+                                                      ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #7
                                               InputAdapter
-                                                WholeStageCodegen (33)
+                                                WholeStageCodegen (31)
                                                   Sort [c_customer_sk]
                                                     InputAdapter
-                                                      ReusedExchange [c_customer_sk] #9
+                                                      ReusedExchange [c_customer_sk] #8
                         InputAdapter
-                          ReusedExchange [d_date_sk] #14
+                          ReusedExchange [d_date_sk] #13
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt
index 51b85142f37ff..9a4c2b064d091 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt
@@ -1,134 +1,140 @@
 == Physical Plan ==
-TakeOrderedAndProject (130)
-+- Union (129)
-   :- * HashAggregate (82)
-   :  +- Exchange (81)
-   :     +- * HashAggregate (80)
-   :        +- * Project (79)
-   :           +- * SortMergeJoin Inner (78)
-   :              :- * Project (59)
-   :              :  +- * BroadcastHashJoin Inner BuildRight (58)
-   :              :     :- SortMergeJoin LeftSemi (52)
-   :              :     :  :- * Sort (34)
-   :              :     :  :  +- Exchange (33)
-   :              :     :  :     +- * Project (32)
-   :              :     :  :        +- SortMergeJoin LeftSemi (31)
+TakeOrderedAndProject (136)
++- Union (135)
+   :- * HashAggregate (80)
+   :  +- Exchange (79)
+   :     +- * HashAggregate (78)
+   :        +- * Project (77)
+   :           +- * SortMergeJoin Inner (76)
+   :              :- * Project (58)
+   :              :  +- * BroadcastHashJoin Inner BuildRight (57)
+   :              :     :- SortMergeJoin LeftSemi (51)
+   :              :     :  :- * Sort (33)
+   :              :     :  :  +- Exchange (32)
+   :              :     :  :     +- * Project (31)
+   :              :     :  :        +- SortMergeJoin LeftSemi (30)
    :              :     :  :           :- * Sort (5)
    :              :     :  :           :  +- Exchange (4)
    :              :     :  :           :     +- * Filter (3)
    :              :     :  :           :        +- * ColumnarToRow (2)
    :              :     :  :           :           +- Scan parquet default.catalog_sales (1)
-   :              :     :  :           +- * Sort (30)
-   :              :     :  :              +- Exchange (29)
-   :              :     :  :                 +- * Project (28)
-   :              :     :  :                    +- * Filter (27)
-   :              :     :  :                       +- * HashAggregate (26)
-   :              :     :  :                          +- * HashAggregate (25)
-   :              :     :  :                             +- * Project (24)
-   :              :     :  :                                +- * SortMergeJoin Inner (23)
-   :              :     :  :                                   :- * Sort (17)
-   :              :     :  :                                   :  +- Exchange (16)
-   :              :     :  :                                   :     +- * Project (15)
-   :              :     :  :                                   :        +- * BroadcastHashJoin Inner BuildRight (14)
-   :              :     :  :                                   :           :- * Filter (8)
-   :              :     :  :                                   :           :  +- * ColumnarToRow (7)
-   :              :     :  :                                   :           :     +- Scan parquet default.store_sales (6)
-   :              :     :  :                                   :           +- BroadcastExchange (13)
-   :              :     :  :                                   :              +- * Project (12)
-   :              :     :  :                                   :                 +- * Filter (11)
-   :              :     :  :                                   :                    +- * ColumnarToRow (10)
-   :              :     :  :                                   :                       +- Scan parquet default.date_dim (9)
-   :              :     :  :                                   +- * Sort (22)
-   :              :     :  :                                      +- Exchange (21)
-   :              :     :  :                                         +- * Filter (20)
-   :              :     :  :                                            +- * ColumnarToRow (19)
-   :              :     :  :                                               +- Scan parquet default.item (18)
-   :              :     :  +- * Sort (51)
-   :              :     :     +- * Project (50)
-   :              :     :        +- * Filter (49)
-   :              :     :           +- * HashAggregate (48)
-   :              :     :              +- * HashAggregate (47)
-   :              :     :                 +- * Project (46)
-   :              :     :                    +- * SortMergeJoin Inner (45)
-   :              :     :                       :- * Sort (39)
-   :              :     :                       :  +- Exchange (38)
-   :              :     :                       :     +- * Filter (37)
-   :              :     :                       :        +- * ColumnarToRow (36)
-   :              :     :                       :           +- Scan parquet default.store_sales (35)
-   :              :     :                       +- * Sort (44)
-   :              :     :                          +- Exchange (43)
-   :              :     :                             +- * Filter (42)
-   :              :     :                                +- * ColumnarToRow (41)
-   :              :     :                                   +- Scan parquet default.customer (40)
-   :              :     +- BroadcastExchange (57)
-   :              :        +- * Project (56)
-   :              :           +- * Filter (55)
-   :              :              +- * ColumnarToRow (54)
-   :              :                 +- Scan parquet default.date_dim (53)
-   :              +- SortMergeJoin LeftSemi (77)
-   :                 :- * Sort (64)
-   :                 :  +- Exchange (63)
-   :                 :     +- * Filter (62)
-   :                 :        +- * ColumnarToRow (61)
-   :                 :           +- Scan parquet default.customer (60)
-   :                 +- * Sort (76)
-   :                    +- Exchange (75)
-   :                       +- * Project (74)
-   :                          +- * Filter (73)
-   :                             +- * HashAggregate (72)
-   :                                +- * HashAggregate (71)
-   :                                   +- * Project (70)
-   :                                      +- * SortMergeJoin Inner (69)
-   :                                         :- * Sort (66)
-   :                                         :  +- ReusedExchange (65)
-   :                                         +- * Sort (68)
-   :                                            +- ReusedExchange (67)
-   +- * HashAggregate (128)
-      +- Exchange (127)
-         +- * HashAggregate (126)
-            +- * Project (125)
-               +- * SortMergeJoin Inner (124)
-                  :- * Project (108)
-                  :  +- * BroadcastHashJoin Inner BuildRight (107)
-                  :     :- SortMergeJoin LeftSemi (105)
-                  :     :  :- * Sort (93)
-                  :     :  :  +- Exchange (92)
-                  :     :  :     +- * Project (91)
-                  :     :  :        +- SortMergeJoin LeftSemi (90)
-                  :     :  :           :- * Sort (87)
-                  :     :  :           :  +- Exchange (86)
-                  :     :  :           :     +- * Filter (85)
-                  :     :  :           :        +- * ColumnarToRow (84)
-                  :     :  :           :           +- Scan parquet default.web_sales (83)
-                  :     :  :           +- * Sort (89)
-                  :     :  :              +- ReusedExchange (88)
-                  :     :  +- * Sort (104)
-                  :     :     +- * Project (103)
-                  :     :        +- * Filter (102)
-                  :     :           +- * HashAggregate (101)
-                  :     :              +- * HashAggregate (100)
-                  :     :                 +- * Project (99)
-                  :     :                    +- * SortMergeJoin Inner (98)
-                  :     :                       :- * Sort (95)
-                  :     :                       :  +- ReusedExchange (94)
-                  :     :                       +- * Sort (97)
-                  :     :                          +- ReusedExchange (96)
-                  :     +- ReusedExchange (106)
-                  +- SortMergeJoin LeftSemi (123)
-                     :- * Sort (110)
-                     :  +- ReusedExchange (109)
-                     +- * Sort (122)
-                        +- Exchange (121)
-                           +- * Project (120)
-                              +- * Filter (119)
-                                 +- * HashAggregate (118)
-                                    +- * HashAggregate (117)
-                                       +- * Project (116)
-                                          +- * SortMergeJoin Inner (115)
-                                             :- * Sort (112)
-                                             :  +- ReusedExchange (111)
-                                             +- * Sort (114)
-                                                +- ReusedExchange (113)
+   :              :     :  :           +- * Sort (29)
+   :              :     :  :              +- * Project (28)
+   :              :     :  :                 +- * Filter (27)
+   :              :     :  :                    +- * HashAggregate (26)
+   :              :     :  :                       +- * HashAggregate (25)
+   :              :     :  :                          +- * Project (24)
+   :              :     :  :                             +- * SortMergeJoin Inner (23)
+   :              :     :  :                                :- * Sort (17)
+   :              :     :  :                                :  +- Exchange (16)
+   :              :     :  :                                :     +- * Project (15)
+   :              :     :  :                                :        +- * BroadcastHashJoin Inner BuildRight (14)
+   :              :     :  :                                :           :- * Filter (8)
+   :              :     :  :                                :           :  +- * ColumnarToRow (7)
+   :              :     :  :                                :           :     +- Scan parquet default.store_sales (6)
+   :              :     :  :                                :           +- BroadcastExchange (13)
+   :              :     :  :                                :              +- * Project (12)
+   :              :     :  :                                :                 +- * Filter (11)
+   :              :     :  :                                :                    +- * ColumnarToRow (10)
+   :              :     :  :                                :                       +- Scan parquet default.date_dim (9)
+   :              :     :  :                                +- * Sort (22)
+   :              :     :  :                                   +- Exchange (21)
+   :              :     :  :                                      +- * Filter (20)
+   :              :     :  :                                         +- * ColumnarToRow (19)
+   :              :     :  :                                            +- Scan parquet default.item (18)
+   :              :     :  +- * Sort (50)
+   :              :     :     +- * Project (49)
+   :              :     :        +- * Filter (48)
+   :              :     :           +- * HashAggregate (47)
+   :              :     :              +- * HashAggregate (46)
+   :              :     :                 +- * Project (45)
+   :              :     :                    +- * SortMergeJoin Inner (44)
+   :              :     :                       :- * Sort (38)
+   :              :     :                       :  +- Exchange (37)
+   :              :     :                       :     +- * Filter (36)
+   :              :     :                       :        +- * ColumnarToRow (35)
+   :              :     :                       :           +- Scan parquet default.store_sales (34)
+   :              :     :                       +- * Sort (43)
+   :              :     :                          +- Exchange (42)
+   :              :     :                             +- * Filter (41)
+   :              :     :                                +- * ColumnarToRow (40)
+   :              :     :                                   +- Scan parquet default.customer (39)
+   :              :     +- BroadcastExchange (56)
+   :              :        +- * Project (55)
+   :              :           +- * Filter (54)
+   :              :              +- * ColumnarToRow (53)
+   :              :                 +- Scan parquet default.date_dim (52)
+   :              +- SortMergeJoin LeftSemi (75)
+   :                 :- * Sort (63)
+   :                 :  +- Exchange (62)
+   :                 :     +- * Filter (61)
+   :                 :        +- * ColumnarToRow (60)
+   :                 :           +- Scan parquet default.customer (59)
+   :                 +- * Sort (74)
+   :                    +- * Project (73)
+   :                       +- * Filter (72)
+   :                          +- * HashAggregate (71)
+   :                             +- * HashAggregate (70)
+   :                                +- * Project (69)
+   :                                   +- * SortMergeJoin Inner (68)
+   :                                      :- * Sort (65)
+   :                                      :  +- ReusedExchange (64)
+   :                                      +- * Sort (67)
+   :                                         +- ReusedExchange (66)
+   +- * HashAggregate (134)
+      +- Exchange (133)
+         +- * HashAggregate (132)
+            +- * Project (131)
+               +- * SortMergeJoin Inner (130)
+                  :- * Project (115)
+                  :  +- * BroadcastHashJoin Inner BuildRight (114)
+                  :     :- SortMergeJoin LeftSemi (112)
+                  :     :  :- * Sort (100)
+                  :     :  :  +- Exchange (99)
+                  :     :  :     +- * Project (98)
+                  :     :  :        +- SortMergeJoin LeftSemi (97)
+                  :     :  :           :- * Sort (85)
+                  :     :  :           :  +- Exchange (84)
+                  :     :  :           :     +- * Filter (83)
+                  :     :  :           :        +- * ColumnarToRow (82)
+                  :     :  :           :           +- Scan parquet default.web_sales (81)
+                  :     :  :           +- * Sort (96)
+                  :     :  :              +- * Project (95)
+                  :     :  :                 +- * Filter (94)
+                  :     :  :                    +- * HashAggregate (93)
+                  :     :  :                       +- * HashAggregate (92)
+                  :     :  :                          +- * Project (91)
+                  :     :  :                             +- * SortMergeJoin Inner (90)
+                  :     :  :                                :- * Sort (87)
+                  :     :  :                                :  +- ReusedExchange (86)
+                  :     :  :                                +- * Sort (89)
+                  :     :  :                                   +- ReusedExchange (88)
+                  :     :  +- * Sort (111)
+                  :     :     +- * Project (110)
+                  :     :        +- * Filter (109)
+                  :     :           +- * HashAggregate (108)
+                  :     :              +- * HashAggregate (107)
+                  :     :                 +- * Project (106)
+                  :     :                    +- * SortMergeJoin Inner (105)
+                  :     :                       :- * Sort (102)
+                  :     :                       :  +- ReusedExchange (101)
+                  :     :                       +- * Sort (104)
+                  :     :                          +- ReusedExchange (103)
+                  :     +- ReusedExchange (113)
+                  +- SortMergeJoin LeftSemi (129)
+                     :- * Sort (117)
+                     :  +- ReusedExchange (116)
+                     +- * Sort (128)
+                        +- * Project (127)
+                           +- * Filter (126)
+                              +- * HashAggregate (125)
+                                 +- * HashAggregate (124)
+                                    +- * Project (123)
+                                       +- * SortMergeJoin Inner (122)
+                                          :- * Sort (119)
+                                          :  +- ReusedExchange (118)
+                                          +- * Sort (121)
+                                             +- ReusedExchange (120)
 
 
 (1) Scan parquet default.catalog_sales
@@ -259,612 +265,642 @@ Condition : (count(1)#22 > 4)
 Output [1]: [item_sk#21]
 Input [2]: [item_sk#21, count(1)#22]
 
-(29) Exchange
-Input [1]: [item_sk#21]
-Arguments: hashpartitioning(item_sk#21, 5), true, [id=#23]
-
-(30) Sort [codegen id : 9]
+(29) Sort [codegen id : 8]
 Input [1]: [item_sk#21]
 Arguments: [item_sk#21 ASC NULLS FIRST], false, 0
 
-(31) SortMergeJoin
+(30) SortMergeJoin
 Left keys [1]: [cs_item_sk#3]
 Right keys [1]: [item_sk#21]
 Join condition: None
 
-(32) Project [codegen id : 10]
+(31) Project [codegen id : 9]
 Output [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5]
 Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5]
 
-(33) Exchange
+(32) Exchange
 Input [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5]
-Arguments: hashpartitioning(cs_bill_customer_sk#2, 5), true, [id=#24]
+Arguments: hashpartitioning(cs_bill_customer_sk#2, 5), true, [id=#23]
 
-(34) Sort [codegen id : 11]
+(33) Sort [codegen id : 10]
 Input [4]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5]
 Arguments: [cs_bill_customer_sk#2 ASC NULLS FIRST], false, 0
 
-(35) Scan parquet default.store_sales
-Output [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
+(34) Scan parquet default.store_sales
+Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
 PushedFilters: [IsNotNull(ss_customer_sk)]
 ReadSchema: struct<ss_customer_sk:int,ss_quantity:int,ss_sales_price:decimal(7,2)>
 
-(36) ColumnarToRow [codegen id : 12]
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
+(35) ColumnarToRow [codegen id : 11]
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 
-(37) Filter [codegen id : 12]
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Condition : isnotnull(ss_customer_sk#25)
+(36) Filter [codegen id : 11]
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Condition : isnotnull(ss_customer_sk#24)
 
-(38) Exchange
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Arguments: hashpartitioning(ss_customer_sk#25, 5), true, [id=#28]
+(37) Exchange
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Arguments: hashpartitioning(ss_customer_sk#24, 5), true, [id=#27]
 
-(39) Sort [codegen id : 13]
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0
+(38) Sort [codegen id : 12]
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Arguments: [ss_customer_sk#24 ASC NULLS FIRST], false, 0
 
-(40) Scan parquet default.customer
-Output [1]: [c_customer_sk#29]
+(39) Scan parquet default.customer
+Output [1]: [c_customer_sk#28]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [IsNotNull(c_customer_sk)]
 ReadSchema: struct<c_customer_sk:int>
 
-(41) ColumnarToRow [codegen id : 14]
-Input [1]: [c_customer_sk#29]
+(40) ColumnarToRow [codegen id : 13]
+Input [1]: [c_customer_sk#28]
 
-(42) Filter [codegen id : 14]
-Input [1]: [c_customer_sk#29]
-Condition : isnotnull(c_customer_sk#29)
+(41) Filter [codegen id : 13]
+Input [1]: [c_customer_sk#28]
+Condition : isnotnull(c_customer_sk#28)
 
-(43) Exchange
-Input [1]: [c_customer_sk#29]
-Arguments: hashpartitioning(c_customer_sk#29, 5), true, [id=#30]
+(42) Exchange
+Input [1]: [c_customer_sk#28]
+Arguments: hashpartitioning(c_customer_sk#28, 5), true, [id=#29]
 
-(44) Sort [codegen id : 15]
-Input [1]: [c_customer_sk#29]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
+(43) Sort [codegen id : 14]
+Input [1]: [c_customer_sk#28]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
 
-(45) SortMergeJoin [codegen id : 16]
-Left keys [1]: [ss_customer_sk#25]
-Right keys [1]: [c_customer_sk#29]
+(44) SortMergeJoin [codegen id : 15]
+Left keys [1]: [ss_customer_sk#24]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(46) Project [codegen id : 16]
-Output [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Input [4]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-
-(47) HashAggregate [codegen id : 16]
-Input [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#31, isEmpty#32]
-Results [3]: [c_customer_sk#29, sum#33, isEmpty#34]
-
-(48) HashAggregate [codegen id : 16]
-Input [3]: [c_customer_sk#29, sum#33, isEmpty#34]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#35 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36]
-
-(49) Filter [codegen id : 16]
-Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true)))
-
-(50) Project [codegen id : 16]
-Output [1]: [c_customer_sk#29]
-Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36]
-
-(51) Sort [codegen id : 16]
-Input [1]: [c_customer_sk#29]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
-
-(52) SortMergeJoin
+(45) Project [codegen id : 15]
+Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+
+(46) HashAggregate [codegen id : 15]
+Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#30, isEmpty#31]
+Results [3]: [c_customer_sk#28, sum#32, isEmpty#33]
+
+(47) HashAggregate [codegen id : 15]
+Input [3]: [c_customer_sk#28, sum#32, isEmpty#33]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#34]
+Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#34 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+
+(48) Filter [codegen id : 15]
+Input [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#36, [id=#37] as decimal(32,6)))), DecimalType(38,8), true)))
+
+(49) Project [codegen id : 15]
+Output [1]: [c_customer_sk#28]
+Input [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+
+(50) Sort [codegen id : 15]
+Input [1]: [c_customer_sk#28]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
+
+(51) SortMergeJoin
 Left keys [1]: [cs_bill_customer_sk#2]
-Right keys [1]: [c_customer_sk#29]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(53) Scan parquet default.date_dim
-Output [3]: [d_date_sk#9, d_year#11, d_moy#39]
+(52) Scan parquet default.date_dim
+Output [3]: [d_date_sk#9, d_year#11, d_moy#38]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
-(54) ColumnarToRow [codegen id : 17]
-Input [3]: [d_date_sk#9, d_year#11, d_moy#39]
+(53) ColumnarToRow [codegen id : 16]
+Input [3]: [d_date_sk#9, d_year#11, d_moy#38]
 
-(55) Filter [codegen id : 17]
-Input [3]: [d_date_sk#9, d_year#11, d_moy#39]
-Condition : ((((isnotnull(d_year#11) AND isnotnull(d_moy#39)) AND (d_year#11 = 2000)) AND (d_moy#39 = 2)) AND isnotnull(d_date_sk#9))
+(54) Filter [codegen id : 16]
+Input [3]: [d_date_sk#9, d_year#11, d_moy#38]
+Condition : ((((isnotnull(d_year#11) AND isnotnull(d_moy#38)) AND (d_year#11 = 2000)) AND (d_moy#38 = 2)) AND isnotnull(d_date_sk#9))
 
-(56) Project [codegen id : 17]
+(55) Project [codegen id : 16]
 Output [1]: [d_date_sk#9]
-Input [3]: [d_date_sk#9, d_year#11, d_moy#39]
+Input [3]: [d_date_sk#9, d_year#11, d_moy#38]
 
-(57) BroadcastExchange
+(56) BroadcastExchange
 Input [1]: [d_date_sk#9]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#40]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#39]
 
-(58) BroadcastHashJoin [codegen id : 18]
+(57) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [cs_sold_date_sk#1]
 Right keys [1]: [d_date_sk#9]
 Join condition: None
 
-(59) Project [codegen id : 18]
+(58) Project [codegen id : 17]
 Output [3]: [cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5]
 Input [5]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5, d_date_sk#9]
 
-(60) Scan parquet default.customer
-Output [3]: [c_customer_sk#29, c_first_name#41, c_last_name#42]
+(59) Scan parquet default.customer
+Output [3]: [c_customer_sk#28, c_first_name#40, c_last_name#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [IsNotNull(c_customer_sk)]
 ReadSchema: struct<c_customer_sk:int,c_first_name:string,c_last_name:string>
 
-(61) ColumnarToRow [codegen id : 19]
-Input [3]: [c_customer_sk#29, c_first_name#41, c_last_name#42]
+(60) ColumnarToRow [codegen id : 18]
+Input [3]: [c_customer_sk#28, c_first_name#40, c_last_name#41]
 
-(62) Filter [codegen id : 19]
-Input [3]: [c_customer_sk#29, c_first_name#41, c_last_name#42]
-Condition : isnotnull(c_customer_sk#29)
+(61) Filter [codegen id : 18]
+Input [3]: [c_customer_sk#28, c_first_name#40, c_last_name#41]
+Condition : isnotnull(c_customer_sk#28)
 
-(63) Exchange
-Input [3]: [c_customer_sk#29, c_first_name#41, c_last_name#42]
-Arguments: hashpartitioning(c_customer_sk#29, 5), true, [id=#43]
+(62) Exchange
+Input [3]: [c_customer_sk#28, c_first_name#40, c_last_name#41]
+Arguments: hashpartitioning(c_customer_sk#28, 5), true, [id=#42]
 
-(64) Sort [codegen id : 20]
-Input [3]: [c_customer_sk#29, c_first_name#41, c_last_name#42]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
+(63) Sort [codegen id : 19]
+Input [3]: [c_customer_sk#28, c_first_name#40, c_last_name#41]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
 
-(65) ReusedExchange [Reuses operator id: 38]
-Output [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
+(64) ReusedExchange [Reuses operator id: 37]
+Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 
-(66) Sort [codegen id : 22]
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0
+(65) Sort [codegen id : 21]
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Arguments: [ss_customer_sk#24 ASC NULLS FIRST], false, 0
 
-(67) ReusedExchange [Reuses operator id: 43]
-Output [1]: [c_customer_sk#29]
+(66) ReusedExchange [Reuses operator id: 42]
+Output [1]: [c_customer_sk#28]
 
-(68) Sort [codegen id : 24]
-Input [1]: [c_customer_sk#29]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
+(67) Sort [codegen id : 23]
+Input [1]: [c_customer_sk#28]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
 
-(69) SortMergeJoin [codegen id : 25]
-Left keys [1]: [ss_customer_sk#25]
-Right keys [1]: [c_customer_sk#29]
+(68) SortMergeJoin [codegen id : 24]
+Left keys [1]: [ss_customer_sk#24]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(70) Project [codegen id : 25]
-Output [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Input [4]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-
-(71) HashAggregate [codegen id : 25]
-Input [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#31, isEmpty#32]
-Results [3]: [c_customer_sk#29, sum#33, isEmpty#34]
-
-(72) HashAggregate [codegen id : 25]
-Input [3]: [c_customer_sk#29, sum#33, isEmpty#34]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#35]
-Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#35 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36]
-
-(73) Filter [codegen id : 25]
-Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true)))
-
-(74) Project [codegen id : 25]
-Output [1]: [c_customer_sk#29 AS c_customer_sk#29#44]
-Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#36]
-
-(75) Exchange
-Input [1]: [c_customer_sk#29#44]
-Arguments: hashpartitioning(c_customer_sk#29#44, 5), true, [id=#45]
-
-(76) Sort [codegen id : 26]
-Input [1]: [c_customer_sk#29#44]
-Arguments: [c_customer_sk#29#44 ASC NULLS FIRST], false, 0
-
-(77) SortMergeJoin
-Left keys [1]: [c_customer_sk#29]
-Right keys [1]: [c_customer_sk#29#44]
+(69) Project [codegen id : 24]
+Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+
+(70) HashAggregate [codegen id : 24]
+Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#30, isEmpty#31]
+Results [3]: [c_customer_sk#28, sum#32, isEmpty#33]
+
+(71) HashAggregate [codegen id : 24]
+Input [3]: [c_customer_sk#28, sum#32, isEmpty#33]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#34]
+Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#34 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+
+(72) Filter [codegen id : 24]
+Input [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#36, [id=#37] as decimal(32,6)))), DecimalType(38,8), true)))
+
+(73) Project [codegen id : 24]
+Output [1]: [c_customer_sk#28 AS c_customer_sk#28#43]
+Input [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#35]
+
+(74) Sort [codegen id : 24]
+Input [1]: [c_customer_sk#28#43]
+Arguments: [c_customer_sk#28#43 ASC NULLS FIRST], false, 0
+
+(75) SortMergeJoin
+Left keys [1]: [c_customer_sk#28]
+Right keys [1]: [c_customer_sk#28#43]
 Join condition: None
 
-(78) SortMergeJoin [codegen id : 27]
+(76) SortMergeJoin [codegen id : 25]
 Left keys [1]: [cs_bill_customer_sk#2]
-Right keys [1]: [c_customer_sk#29]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(79) Project [codegen id : 27]
-Output [4]: [cs_quantity#4, cs_list_price#5, c_first_name#41, c_last_name#42]
-Input [6]: [cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5, c_customer_sk#29, c_first_name#41, c_last_name#42]
+(77) Project [codegen id : 25]
+Output [4]: [cs_quantity#4, cs_list_price#5, c_first_name#40, c_last_name#41]
+Input [6]: [cs_bill_customer_sk#2, cs_quantity#4, cs_list_price#5, c_customer_sk#28, c_first_name#40, c_last_name#41]
 
-(80) HashAggregate [codegen id : 27]
-Input [4]: [cs_quantity#4, cs_list_price#5, c_first_name#41, c_last_name#42]
-Keys [2]: [c_last_name#42, c_first_name#41]
+(78) HashAggregate [codegen id : 25]
+Input [4]: [cs_quantity#4, cs_list_price#5, c_first_name#40, c_last_name#41]
+Keys [2]: [c_last_name#41, c_first_name#40]
 Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#46, isEmpty#47]
-Results [4]: [c_last_name#42, c_first_name#41, sum#48, isEmpty#49]
+Aggregate Attributes [2]: [sum#44, isEmpty#45]
+Results [4]: [c_last_name#41, c_first_name#40, sum#46, isEmpty#47]
 
-(81) Exchange
-Input [4]: [c_last_name#42, c_first_name#41, sum#48, isEmpty#49]
-Arguments: hashpartitioning(c_last_name#42, c_first_name#41, 5), true, [id=#50]
+(79) Exchange
+Input [4]: [c_last_name#41, c_first_name#40, sum#46, isEmpty#47]
+Arguments: hashpartitioning(c_last_name#41, c_first_name#40, 5), true, [id=#48]
 
-(82) HashAggregate [codegen id : 28]
-Input [4]: [c_last_name#42, c_first_name#41, sum#48, isEmpty#49]
-Keys [2]: [c_last_name#42, c_first_name#41]
+(80) HashAggregate [codegen id : 26]
+Input [4]: [c_last_name#41, c_first_name#40, sum#46, isEmpty#47]
+Keys [2]: [c_last_name#41, c_first_name#40]
 Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))#51]
-Results [3]: [c_last_name#42, c_first_name#41, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))#51 AS sales#52]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))#49]
+Results [3]: [c_last_name#41, c_first_name#40, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#4 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#5 as decimal(12,2)))), DecimalType(18,2), true))#49 AS sales#50]
 
-(83) Scan parquet default.web_sales
-Output [5]: [ws_sold_date_sk#53, ws_item_sk#54, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57]
+(81) Scan parquet default.web_sales
+Output [5]: [ws_sold_date_sk#51, ws_item_sk#52, ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
 PushedFilters: [IsNotNull(ws_bill_customer_sk), IsNotNull(ws_sold_date_sk)]
 ReadSchema: struct<ws_sold_date_sk:int,ws_item_sk:int,ws_bill_customer_sk:int,ws_quantity:int,ws_list_price:decimal(7,2)>
 
-(84) ColumnarToRow [codegen id : 29]
-Input [5]: [ws_sold_date_sk#53, ws_item_sk#54, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57]
+(82) ColumnarToRow [codegen id : 27]
+Input [5]: [ws_sold_date_sk#51, ws_item_sk#52, ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55]
+
+(83) Filter [codegen id : 27]
+Input [5]: [ws_sold_date_sk#51, ws_item_sk#52, ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55]
+Condition : (isnotnull(ws_bill_customer_sk#53) AND isnotnull(ws_sold_date_sk#51))
+
+(84) Exchange
+Input [5]: [ws_sold_date_sk#51, ws_item_sk#52, ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55]
+Arguments: hashpartitioning(ws_item_sk#52, 5), true, [id=#56]
 
-(85) Filter [codegen id : 29]
-Input [5]: [ws_sold_date_sk#53, ws_item_sk#54, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57]
-Condition : (isnotnull(ws_bill_customer_sk#55) AND isnotnull(ws_sold_date_sk#53))
+(85) Sort [codegen id : 28]
+Input [5]: [ws_sold_date_sk#51, ws_item_sk#52, ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55]
+Arguments: [ws_item_sk#52 ASC NULLS FIRST], false, 0
 
-(86) Exchange
-Input [5]: [ws_sold_date_sk#53, ws_item_sk#54, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57]
-Arguments: hashpartitioning(ws_item_sk#54, 5), true, [id=#58]
+(86) ReusedExchange [Reuses operator id: 16]
+Output [2]: [ss_item_sk#8, d_date#10]
+
+(87) Sort [codegen id : 31]
+Input [2]: [ss_item_sk#8, d_date#10]
+Arguments: [ss_item_sk#8 ASC NULLS FIRST], false, 0
+
+(88) ReusedExchange [Reuses operator id: 21]
+Output [2]: [i_item_sk#14, i_item_desc#15]
+
+(89) Sort [codegen id : 33]
+Input [2]: [i_item_sk#14, i_item_desc#15]
+Arguments: [i_item_sk#14 ASC NULLS FIRST], false, 0
+
+(90) SortMergeJoin [codegen id : 34]
+Left keys [1]: [ss_item_sk#8]
+Right keys [1]: [i_item_sk#14]
+Join condition: None
+
+(91) Project [codegen id : 34]
+Output [3]: [d_date#10, i_item_sk#14, i_item_desc#15]
+Input [4]: [ss_item_sk#8, d_date#10, i_item_sk#14, i_item_desc#15]
+
+(92) HashAggregate [codegen id : 34]
+Input [3]: [d_date#10, i_item_sk#14, i_item_desc#15]
+Keys [3]: [substr(i_item_desc#15, 1, 30) AS substr(i_item_desc#15, 1, 30)#57, i_item_sk#14, d_date#10]
+Functions [1]: [partial_count(1)]
+Aggregate Attributes [1]: [count#58]
+Results [4]: [substr(i_item_desc#15, 1, 30)#57, i_item_sk#14, d_date#10, count#59]
+
+(93) HashAggregate [codegen id : 34]
+Input [4]: [substr(i_item_desc#15, 1, 30)#57, i_item_sk#14, d_date#10, count#59]
+Keys [3]: [substr(i_item_desc#15, 1, 30)#57, i_item_sk#14, d_date#10]
+Functions [1]: [count(1)]
+Aggregate Attributes [1]: [count(1)#60]
+Results [2]: [i_item_sk#14 AS item_sk#21, count(1)#60 AS count(1)#61]
 
-(87) Sort [codegen id : 30]
-Input [5]: [ws_sold_date_sk#53, ws_item_sk#54, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57]
-Arguments: [ws_item_sk#54 ASC NULLS FIRST], false, 0
+(94) Filter [codegen id : 34]
+Input [2]: [item_sk#21, count(1)#61]
+Condition : (count(1)#61 > 4)
 
-(88) ReusedExchange [Reuses operator id: 29]
+(95) Project [codegen id : 34]
 Output [1]: [item_sk#21]
+Input [2]: [item_sk#21, count(1)#61]
 
-(89) Sort [codegen id : 37]
+(96) Sort [codegen id : 34]
 Input [1]: [item_sk#21]
 Arguments: [item_sk#21 ASC NULLS FIRST], false, 0
 
-(90) SortMergeJoin
-Left keys [1]: [ws_item_sk#54]
+(97) SortMergeJoin
+Left keys [1]: [ws_item_sk#52]
 Right keys [1]: [item_sk#21]
 Join condition: None
 
-(91) Project [codegen id : 38]
-Output [4]: [ws_sold_date_sk#53, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57]
-Input [5]: [ws_sold_date_sk#53, ws_item_sk#54, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57]
+(98) Project [codegen id : 35]
+Output [4]: [ws_sold_date_sk#51, ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55]
+Input [5]: [ws_sold_date_sk#51, ws_item_sk#52, ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55]
 
-(92) Exchange
-Input [4]: [ws_sold_date_sk#53, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57]
-Arguments: hashpartitioning(ws_bill_customer_sk#55, 5), true, [id=#59]
+(99) Exchange
+Input [4]: [ws_sold_date_sk#51, ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55]
+Arguments: hashpartitioning(ws_bill_customer_sk#53, 5), true, [id=#62]
 
-(93) Sort [codegen id : 39]
-Input [4]: [ws_sold_date_sk#53, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57]
-Arguments: [ws_bill_customer_sk#55 ASC NULLS FIRST], false, 0
+(100) Sort [codegen id : 36]
+Input [4]: [ws_sold_date_sk#51, ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55]
+Arguments: [ws_bill_customer_sk#53 ASC NULLS FIRST], false, 0
 
-(94) ReusedExchange [Reuses operator id: 38]
-Output [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
+(101) ReusedExchange [Reuses operator id: 37]
+Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 
-(95) Sort [codegen id : 41]
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0
+(102) Sort [codegen id : 38]
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Arguments: [ss_customer_sk#24 ASC NULLS FIRST], false, 0
 
-(96) ReusedExchange [Reuses operator id: 43]
-Output [1]: [c_customer_sk#29]
+(103) ReusedExchange [Reuses operator id: 42]
+Output [1]: [c_customer_sk#28]
 
-(97) Sort [codegen id : 43]
-Input [1]: [c_customer_sk#29]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
+(104) Sort [codegen id : 40]
+Input [1]: [c_customer_sk#28]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
 
-(98) SortMergeJoin [codegen id : 44]
-Left keys [1]: [ss_customer_sk#25]
-Right keys [1]: [c_customer_sk#29]
+(105) SortMergeJoin [codegen id : 41]
+Left keys [1]: [ss_customer_sk#24]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(99) Project [codegen id : 44]
-Output [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Input [4]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-
-(100) HashAggregate [codegen id : 44]
-Input [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#60, isEmpty#61]
-Results [3]: [c_customer_sk#29, sum#62, isEmpty#63]
-
-(101) HashAggregate [codegen id : 44]
-Input [3]: [c_customer_sk#29, sum#62, isEmpty#63]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#64]
-Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#64 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65]
-
-(102) Filter [codegen id : 44]
-Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true)))
-
-(103) Project [codegen id : 44]
-Output [1]: [c_customer_sk#29]
-Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65]
-
-(104) Sort [codegen id : 44]
-Input [1]: [c_customer_sk#29]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
-
-(105) SortMergeJoin
-Left keys [1]: [ws_bill_customer_sk#55]
-Right keys [1]: [c_customer_sk#29]
+(106) Project [codegen id : 41]
+Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+
+(107) HashAggregate [codegen id : 41]
+Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#63, isEmpty#64]
+Results [3]: [c_customer_sk#28, sum#65, isEmpty#66]
+
+(108) HashAggregate [codegen id : 41]
+Input [3]: [c_customer_sk#28, sum#65, isEmpty#66]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#67]
+Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#67 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#68]
+
+(109) Filter [codegen id : 41]
+Input [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#68]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#68) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#68 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#36, [id=#37] as decimal(32,6)))), DecimalType(38,8), true)))
+
+(110) Project [codegen id : 41]
+Output [1]: [c_customer_sk#28]
+Input [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#68]
+
+(111) Sort [codegen id : 41]
+Input [1]: [c_customer_sk#28]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
+
+(112) SortMergeJoin
+Left keys [1]: [ws_bill_customer_sk#53]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(106) ReusedExchange [Reuses operator id: 57]
+(113) ReusedExchange [Reuses operator id: 56]
 Output [1]: [d_date_sk#9]
 
-(107) BroadcastHashJoin [codegen id : 46]
-Left keys [1]: [ws_sold_date_sk#53]
+(114) BroadcastHashJoin [codegen id : 43]
+Left keys [1]: [ws_sold_date_sk#51]
 Right keys [1]: [d_date_sk#9]
 Join condition: None
 
-(108) Project [codegen id : 46]
-Output [3]: [ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57]
-Input [5]: [ws_sold_date_sk#53, ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57, d_date_sk#9]
+(115) Project [codegen id : 43]
+Output [3]: [ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55]
+Input [5]: [ws_sold_date_sk#51, ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55, d_date_sk#9]
 
-(109) ReusedExchange [Reuses operator id: 63]
-Output [3]: [c_customer_sk#29, c_first_name#41, c_last_name#42]
+(116) ReusedExchange [Reuses operator id: 62]
+Output [3]: [c_customer_sk#28, c_first_name#40, c_last_name#41]
 
-(110) Sort [codegen id : 48]
-Input [3]: [c_customer_sk#29, c_first_name#41, c_last_name#42]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
+(117) Sort [codegen id : 45]
+Input [3]: [c_customer_sk#28, c_first_name#40, c_last_name#41]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
 
-(111) ReusedExchange [Reuses operator id: 38]
-Output [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
+(118) ReusedExchange [Reuses operator id: 37]
+Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 
-(112) Sort [codegen id : 50]
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0
+(119) Sort [codegen id : 47]
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Arguments: [ss_customer_sk#24 ASC NULLS FIRST], false, 0
 
-(113) ReusedExchange [Reuses operator id: 43]
-Output [1]: [c_customer_sk#29]
+(120) ReusedExchange [Reuses operator id: 42]
+Output [1]: [c_customer_sk#28]
 
-(114) Sort [codegen id : 52]
-Input [1]: [c_customer_sk#29]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
+(121) Sort [codegen id : 49]
+Input [1]: [c_customer_sk#28]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
 
-(115) SortMergeJoin [codegen id : 53]
-Left keys [1]: [ss_customer_sk#25]
-Right keys [1]: [c_customer_sk#29]
+(122) SortMergeJoin [codegen id : 50]
+Left keys [1]: [ss_customer_sk#24]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(116) Project [codegen id : 53]
-Output [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Input [4]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-
-(117) HashAggregate [codegen id : 53]
-Input [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#60, isEmpty#61]
-Results [3]: [c_customer_sk#29, sum#62, isEmpty#63]
-
-(118) HashAggregate [codegen id : 53]
-Input [3]: [c_customer_sk#29, sum#62, isEmpty#63]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#64]
-Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#64 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65]
-
-(119) Filter [codegen id : 53]
-Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8), true)))
-
-(120) Project [codegen id : 53]
-Output [1]: [c_customer_sk#29 AS c_customer_sk#29#66]
-Input [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#65]
-
-(121) Exchange
-Input [1]: [c_customer_sk#29#66]
-Arguments: hashpartitioning(c_customer_sk#29#66, 5), true, [id=#67]
-
-(122) Sort [codegen id : 54]
-Input [1]: [c_customer_sk#29#66]
-Arguments: [c_customer_sk#29#66 ASC NULLS FIRST], false, 0
-
-(123) SortMergeJoin
-Left keys [1]: [c_customer_sk#29]
-Right keys [1]: [c_customer_sk#29#66]
+(123) Project [codegen id : 50]
+Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+
+(124) HashAggregate [codegen id : 50]
+Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#63, isEmpty#64]
+Results [3]: [c_customer_sk#28, sum#65, isEmpty#66]
+
+(125) HashAggregate [codegen id : 50]
+Input [3]: [c_customer_sk#28, sum#65, isEmpty#66]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#67]
+Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#67 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#68]
+
+(126) Filter [codegen id : 50]
+Input [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#68]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#68) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#68 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#36, [id=#37] as decimal(32,6)))), DecimalType(38,8), true)))
+
+(127) Project [codegen id : 50]
+Output [1]: [c_customer_sk#28 AS c_customer_sk#28#69]
+Input [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#68]
+
+(128) Sort [codegen id : 50]
+Input [1]: [c_customer_sk#28#69]
+Arguments: [c_customer_sk#28#69 ASC NULLS FIRST], false, 0
+
+(129) SortMergeJoin
+Left keys [1]: [c_customer_sk#28]
+Right keys [1]: [c_customer_sk#28#69]
 Join condition: None
 
-(124) SortMergeJoin [codegen id : 55]
-Left keys [1]: [ws_bill_customer_sk#55]
-Right keys [1]: [c_customer_sk#29]
+(130) SortMergeJoin [codegen id : 51]
+Left keys [1]: [ws_bill_customer_sk#53]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(125) Project [codegen id : 55]
-Output [4]: [ws_quantity#56, ws_list_price#57, c_first_name#41, c_last_name#42]
-Input [6]: [ws_bill_customer_sk#55, ws_quantity#56, ws_list_price#57, c_customer_sk#29, c_first_name#41, c_last_name#42]
+(131) Project [codegen id : 51]
+Output [4]: [ws_quantity#54, ws_list_price#55, c_first_name#40, c_last_name#41]
+Input [6]: [ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55, c_customer_sk#28, c_first_name#40, c_last_name#41]
 
-(126) HashAggregate [codegen id : 55]
-Input [4]: [ws_quantity#56, ws_list_price#57, c_first_name#41, c_last_name#42]
-Keys [2]: [c_last_name#42, c_first_name#41]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#56 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#57 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#68, isEmpty#69]
-Results [4]: [c_last_name#42, c_first_name#41, sum#70, isEmpty#71]
+(132) HashAggregate [codegen id : 51]
+Input [4]: [ws_quantity#54, ws_list_price#55, c_first_name#40, c_last_name#41]
+Keys [2]: [c_last_name#41, c_first_name#40]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#54 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#70, isEmpty#71]
+Results [4]: [c_last_name#41, c_first_name#40, sum#72, isEmpty#73]
 
-(127) Exchange
-Input [4]: [c_last_name#42, c_first_name#41, sum#70, isEmpty#71]
-Arguments: hashpartitioning(c_last_name#42, c_first_name#41, 5), true, [id=#72]
+(133) Exchange
+Input [4]: [c_last_name#41, c_first_name#40, sum#72, isEmpty#73]
+Arguments: hashpartitioning(c_last_name#41, c_first_name#40, 5), true, [id=#74]
 
-(128) HashAggregate [codegen id : 56]
-Input [4]: [c_last_name#42, c_first_name#41, sum#70, isEmpty#71]
-Keys [2]: [c_last_name#42, c_first_name#41]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#56 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#57 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#56 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#57 as decimal(12,2)))), DecimalType(18,2), true))#73]
-Results [3]: [c_last_name#42, c_first_name#41, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#56 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#57 as decimal(12,2)))), DecimalType(18,2), true))#73 AS sales#74]
+(134) HashAggregate [codegen id : 52]
+Input [4]: [c_last_name#41, c_first_name#40, sum#72, isEmpty#73]
+Keys [2]: [c_last_name#41, c_first_name#40]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#54 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#54 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2), true))#75]
+Results [3]: [c_last_name#41, c_first_name#40, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#54 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2), true))#75 AS sales#76]
 
-(129) Union
+(135) Union
 
-(130) TakeOrderedAndProject
-Input [3]: [c_last_name#42, c_first_name#41, sales#52]
-Arguments: 100, [c_last_name#42 ASC NULLS FIRST, c_first_name#41 ASC NULLS FIRST, sales#52 ASC NULLS FIRST], [c_last_name#42, c_first_name#41, sales#52]
+(136) TakeOrderedAndProject
+Input [3]: [c_last_name#41, c_first_name#40, sales#50]
+Arguments: 100, [c_last_name#41 ASC NULLS FIRST, c_first_name#40 ASC NULLS FIRST, sales#50 ASC NULLS FIRST], [c_last_name#41, c_first_name#40, sales#50]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 49 Hosting Expression = Subquery scalar-subquery#37, [id=#38]
-* HashAggregate (154)
-+- Exchange (153)
-   +- * HashAggregate (152)
-      +- * HashAggregate (151)
-         +- * HashAggregate (150)
-            +- * Project (149)
-               +- * SortMergeJoin Inner (148)
-                  :- * Sort (142)
-                  :  +- Exchange (141)
-                  :     +- * Project (140)
-                  :        +- * BroadcastHashJoin Inner BuildRight (139)
-                  :           :- * Filter (133)
-                  :           :  +- * ColumnarToRow (132)
-                  :           :     +- Scan parquet default.store_sales (131)
-                  :           +- BroadcastExchange (138)
-                  :              +- * Project (137)
-                  :                 +- * Filter (136)
-                  :                    +- * ColumnarToRow (135)
-                  :                       +- Scan parquet default.date_dim (134)
-                  +- * Sort (147)
-                     +- Exchange (146)
-                        +- * Filter (145)
-                           +- * ColumnarToRow (144)
-                              +- Scan parquet default.customer (143)
-
-
-(131) Scan parquet default.store_sales
-Output [4]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
+Subquery:1 Hosting operator id = 48 Hosting Expression = Subquery scalar-subquery#36, [id=#37]
+* HashAggregate (160)
++- Exchange (159)
+   +- * HashAggregate (158)
+      +- * HashAggregate (157)
+         +- * HashAggregate (156)
+            +- * Project (155)
+               +- * SortMergeJoin Inner (154)
+                  :- * Sort (148)
+                  :  +- Exchange (147)
+                  :     +- * Project (146)
+                  :        +- * BroadcastHashJoin Inner BuildRight (145)
+                  :           :- * Filter (139)
+                  :           :  +- * ColumnarToRow (138)
+                  :           :     +- Scan parquet default.store_sales (137)
+                  :           +- BroadcastExchange (144)
+                  :              +- * Project (143)
+                  :                 +- * Filter (142)
+                  :                    +- * ColumnarToRow (141)
+                  :                       +- Scan parquet default.date_dim (140)
+                  +- * Sort (153)
+                     +- Exchange (152)
+                        +- * Filter (151)
+                           +- * ColumnarToRow (150)
+                              +- Scan parquet default.customer (149)
+
+
+(137) Scan parquet default.store_sales
+Output [4]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
 PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)]
 ReadSchema: struct<ss_sold_date_sk:int,ss_customer_sk:int,ss_quantity:int,ss_sales_price:decimal(7,2)>
 
-(132) ColumnarToRow [codegen id : 2]
-Input [4]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
+(138) ColumnarToRow [codegen id : 2]
+Input [4]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 
-(133) Filter [codegen id : 2]
-Input [4]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Condition : (isnotnull(ss_customer_sk#25) AND isnotnull(ss_sold_date_sk#7))
+(139) Filter [codegen id : 2]
+Input [4]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Condition : (isnotnull(ss_customer_sk#24) AND isnotnull(ss_sold_date_sk#7))
 
-(134) Scan parquet default.date_dim
+(140) Scan parquet default.date_dim
 Output [2]: [d_date_sk#9, d_year#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(135) ColumnarToRow [codegen id : 1]
+(141) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#9, d_year#11]
 
-(136) Filter [codegen id : 1]
+(142) Filter [codegen id : 1]
 Input [2]: [d_date_sk#9, d_year#11]
 Condition : (d_year#11 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#9))
 
-(137) Project [codegen id : 1]
+(143) Project [codegen id : 1]
 Output [1]: [d_date_sk#9]
 Input [2]: [d_date_sk#9, d_year#11]
 
-(138) BroadcastExchange
+(144) BroadcastExchange
 Input [1]: [d_date_sk#9]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#75]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#77]
 
-(139) BroadcastHashJoin [codegen id : 2]
+(145) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#7]
 Right keys [1]: [d_date_sk#9]
 Join condition: None
 
-(140) Project [codegen id : 2]
-Output [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Input [5]: [ss_sold_date_sk#7, ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, d_date_sk#9]
+(146) Project [codegen id : 2]
+Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Input [5]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, d_date_sk#9]
 
-(141) Exchange
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Arguments: hashpartitioning(ss_customer_sk#25, 5), true, [id=#76]
+(147) Exchange
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Arguments: hashpartitioning(ss_customer_sk#24, 5), true, [id=#78]
 
-(142) Sort [codegen id : 3]
-Input [3]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27]
-Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0
+(148) Sort [codegen id : 3]
+Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
+Arguments: [ss_customer_sk#24 ASC NULLS FIRST], false, 0
 
-(143) Scan parquet default.customer
-Output [1]: [c_customer_sk#29]
+(149) Scan parquet default.customer
+Output [1]: [c_customer_sk#28]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [IsNotNull(c_customer_sk)]
 ReadSchema: struct<c_customer_sk:int>
 
-(144) ColumnarToRow [codegen id : 4]
-Input [1]: [c_customer_sk#29]
+(150) ColumnarToRow [codegen id : 4]
+Input [1]: [c_customer_sk#28]
 
-(145) Filter [codegen id : 4]
-Input [1]: [c_customer_sk#29]
-Condition : isnotnull(c_customer_sk#29)
+(151) Filter [codegen id : 4]
+Input [1]: [c_customer_sk#28]
+Condition : isnotnull(c_customer_sk#28)
 
-(146) Exchange
-Input [1]: [c_customer_sk#29]
-Arguments: hashpartitioning(c_customer_sk#29, 5), true, [id=#77]
+(152) Exchange
+Input [1]: [c_customer_sk#28]
+Arguments: hashpartitioning(c_customer_sk#28, 5), true, [id=#79]
 
-(147) Sort [codegen id : 5]
-Input [1]: [c_customer_sk#29]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
+(153) Sort [codegen id : 5]
+Input [1]: [c_customer_sk#28]
+Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
 
-(148) SortMergeJoin [codegen id : 6]
-Left keys [1]: [ss_customer_sk#25]
-Right keys [1]: [c_customer_sk#29]
+(154) SortMergeJoin [codegen id : 6]
+Left keys [1]: [ss_customer_sk#24]
+Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(149) Project [codegen id : 6]
-Output [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Input [4]: [ss_customer_sk#25, ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-
-(150) HashAggregate [codegen id : 6]
-Input [3]: [ss_quantity#26, ss_sales_price#27, c_customer_sk#29]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#78, isEmpty#79]
-Results [3]: [c_customer_sk#29, sum#80, isEmpty#81]
-
-(151) HashAggregate [codegen id : 6]
-Input [3]: [c_customer_sk#29, sum#80, isEmpty#81]
-Keys [1]: [c_customer_sk#29]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#82]
-Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#26 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#27 as decimal(12,2)))), DecimalType(18,2), true))#82 AS csales#83]
-
-(152) HashAggregate [codegen id : 6]
-Input [1]: [csales#83]
+(155) Project [codegen id : 6]
+Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+
+(156) HashAggregate [codegen id : 6]
+Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#80, isEmpty#81]
+Results [3]: [c_customer_sk#28, sum#82, isEmpty#83]
+
+(157) HashAggregate [codegen id : 6]
+Input [3]: [c_customer_sk#28, sum#82, isEmpty#83]
+Keys [1]: [c_customer_sk#28]
+Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#84]
+Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#84 AS csales#85]
+
+(158) HashAggregate [codegen id : 6]
+Input [1]: [csales#85]
 Keys: []
-Functions [1]: [partial_max(csales#83)]
-Aggregate Attributes [1]: [max#84]
-Results [1]: [max#85]
+Functions [1]: [partial_max(csales#85)]
+Aggregate Attributes [1]: [max#86]
+Results [1]: [max#87]
 
-(153) Exchange
-Input [1]: [max#85]
-Arguments: SinglePartition, true, [id=#86]
+(159) Exchange
+Input [1]: [max#87]
+Arguments: SinglePartition, true, [id=#88]
 
-(154) HashAggregate [codegen id : 7]
-Input [1]: [max#85]
+(160) HashAggregate [codegen id : 7]
+Input [1]: [max#87]
 Keys: []
-Functions [1]: [max(csales#83)]
-Aggregate Attributes [1]: [max(csales#83)#87]
-Results [1]: [max(csales#83)#87 AS tpcds_cmax#88]
+Functions [1]: [max(csales#85)]
+Aggregate Attributes [1]: [max(csales#85)#89]
+Results [1]: [max(csales#85)#89 AS tpcds_cmax#90]
 
-Subquery:2 Hosting operator id = 73 Hosting Expression = ReusedSubquery Subquery scalar-subquery#37, [id=#38]
+Subquery:2 Hosting operator id = 72 Hosting Expression = ReusedSubquery Subquery scalar-subquery#36, [id=#37]
 
-Subquery:3 Hosting operator id = 102 Hosting Expression = ReusedSubquery Subquery scalar-subquery#37, [id=#38]
+Subquery:3 Hosting operator id = 109 Hosting Expression = ReusedSubquery Subquery scalar-subquery#36, [id=#37]
 
-Subquery:4 Hosting operator id = 119 Hosting Expression = ReusedSubquery Subquery scalar-subquery#37, [id=#38]
+Subquery:4 Hosting operator id = 126 Hosting Expression = ReusedSubquery Subquery scalar-subquery#36, [id=#37]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/simplified.txt
index e8891f032a091..4279bf3e16a82 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/simplified.txt
@@ -1,24 +1,24 @@
 TakeOrderedAndProject [c_last_name,c_first_name,sales]
   Union
-    WholeStageCodegen (28)
+    WholeStageCodegen (26)
       HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sales,sum,isEmpty]
         InputAdapter
           Exchange [c_last_name,c_first_name] #1
-            WholeStageCodegen (27)
+            WholeStageCodegen (25)
               HashAggregate [c_last_name,c_first_name,cs_quantity,cs_list_price] [sum,isEmpty,sum,isEmpty]
                 Project [cs_quantity,cs_list_price,c_first_name,c_last_name]
                   SortMergeJoin [cs_bill_customer_sk,c_customer_sk]
                     InputAdapter
-                      WholeStageCodegen (18)
+                      WholeStageCodegen (17)
                         Project [cs_bill_customer_sk,cs_quantity,cs_list_price]
                           BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                             InputAdapter
                               SortMergeJoin [cs_bill_customer_sk,c_customer_sk]
-                                WholeStageCodegen (11)
+                                WholeStageCodegen (10)
                                   Sort [cs_bill_customer_sk]
                                     InputAdapter
                                       Exchange [cs_bill_customer_sk] #2
-                                        WholeStageCodegen (10)
+                                        WholeStageCodegen (9)
                                           Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price]
                                             InputAdapter
                                               SortMergeJoin [cs_item_sk,item_sk]
@@ -31,48 +31,45 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                             ColumnarToRow
                                                               InputAdapter
                                                                 Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price]
-                                                WholeStageCodegen (9)
+                                                WholeStageCodegen (8)
                                                   Sort [item_sk]
-                                                    InputAdapter
-                                                      Exchange [item_sk] #4
-                                                        WholeStageCodegen (8)
-                                                          Project [item_sk]
-                                                            Filter [count(1)]
-                                                              HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count]
-                                                                HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count]
-                                                                  Project [d_date,i_item_sk,i_item_desc]
-                                                                    SortMergeJoin [ss_item_sk,i_item_sk]
+                                                    Project [item_sk]
+                                                      Filter [count(1)]
+                                                        HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count]
+                                                          HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count]
+                                                            Project [d_date,i_item_sk,i_item_desc]
+                                                              SortMergeJoin [ss_item_sk,i_item_sk]
+                                                                InputAdapter
+                                                                  WholeStageCodegen (5)
+                                                                    Sort [ss_item_sk]
                                                                       InputAdapter
-                                                                        WholeStageCodegen (5)
-                                                                          Sort [ss_item_sk]
-                                                                            InputAdapter
-                                                                              Exchange [ss_item_sk] #5
-                                                                                WholeStageCodegen (4)
-                                                                                  Project [ss_item_sk,d_date]
-                                                                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                      Filter [ss_sold_date_sk,ss_item_sk]
-                                                                                        ColumnarToRow
-                                                                                          InputAdapter
-                                                                                            Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk]
-                                                                                      InputAdapter
-                                                                                        BroadcastExchange #6
-                                                                                          WholeStageCodegen (3)
-                                                                                            Project [d_date_sk,d_date]
-                                                                                              Filter [d_year,d_date_sk]
-                                                                                                ColumnarToRow
-                                                                                                  InputAdapter
-                                                                                                    Scan parquet default.date_dim [d_date_sk,d_date,d_year]
+                                                                        Exchange [ss_item_sk] #4
+                                                                          WholeStageCodegen (4)
+                                                                            Project [ss_item_sk,d_date]
+                                                                              BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                                Filter [ss_sold_date_sk,ss_item_sk]
+                                                                                  ColumnarToRow
+                                                                                    InputAdapter
+                                                                                      Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk]
+                                                                                InputAdapter
+                                                                                  BroadcastExchange #5
+                                                                                    WholeStageCodegen (3)
+                                                                                      Project [d_date_sk,d_date]
+                                                                                        Filter [d_year,d_date_sk]
+                                                                                          ColumnarToRow
+                                                                                            InputAdapter
+                                                                                              Scan parquet default.date_dim [d_date_sk,d_date,d_year]
+                                                                InputAdapter
+                                                                  WholeStageCodegen (7)
+                                                                    Sort [i_item_sk]
                                                                       InputAdapter
-                                                                        WholeStageCodegen (7)
-                                                                          Sort [i_item_sk]
-                                                                            InputAdapter
-                                                                              Exchange [i_item_sk] #7
-                                                                                WholeStageCodegen (6)
-                                                                                  Filter [i_item_sk]
-                                                                                    ColumnarToRow
-                                                                                      InputAdapter
-                                                                                        Scan parquet default.item [i_item_sk,i_item_desc]
-                                WholeStageCodegen (16)
+                                                                        Exchange [i_item_sk] #6
+                                                                          WholeStageCodegen (6)
+                                                                            Filter [i_item_sk]
+                                                                              ColumnarToRow
+                                                                                InputAdapter
+                                                                                  Scan parquet default.item [i_item_sk,i_item_desc]
+                                WholeStageCodegen (15)
                                   Sort [c_customer_sk]
                                     Project [c_customer_sk]
                                       Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
@@ -80,7 +77,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                           WholeStageCodegen (7)
                                             HashAggregate [max] [max(csales),tpcds_cmax,max]
                                               InputAdapter
-                                                Exchange #10
+                                                Exchange #9
                                                   WholeStageCodegen (6)
                                                     HashAggregate [csales] [max,max]
                                                       HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty]
@@ -91,7 +88,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                                 WholeStageCodegen (3)
                                                                   Sort [ss_customer_sk]
                                                                     InputAdapter
-                                                                      Exchange [ss_customer_sk] #11
+                                                                      Exchange [ss_customer_sk] #10
                                                                         WholeStageCodegen (2)
                                                                           Project [ss_customer_sk,ss_quantity,ss_sales_price]
                                                                             BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
@@ -100,7 +97,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                                                   InputAdapter
                                                                                     Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price]
                                                                               InputAdapter
-                                                                                BroadcastExchange #12
+                                                                                BroadcastExchange #11
                                                                                   WholeStageCodegen (1)
                                                                                     Project [d_date_sk]
                                                                                       Filter [d_year,d_date_sk]
@@ -111,7 +108,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                                 WholeStageCodegen (5)
                                                                   Sort [c_customer_sk]
                                                                     InputAdapter
-                                                                      Exchange [c_customer_sk] #13
+                                                                      Exchange [c_customer_sk] #12
                                                                         WholeStageCodegen (4)
                                                                           Filter [c_customer_sk]
                                                                             ColumnarToRow
@@ -122,28 +119,28 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                             Project [ss_quantity,ss_sales_price,c_customer_sk]
                                               SortMergeJoin [ss_customer_sk,c_customer_sk]
                                                 InputAdapter
-                                                  WholeStageCodegen (13)
+                                                  WholeStageCodegen (12)
                                                     Sort [ss_customer_sk]
                                                       InputAdapter
-                                                        Exchange [ss_customer_sk] #8
-                                                          WholeStageCodegen (12)
+                                                        Exchange [ss_customer_sk] #7
+                                                          WholeStageCodegen (11)
                                                             Filter [ss_customer_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
                                                                   Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price]
                                                 InputAdapter
-                                                  WholeStageCodegen (15)
+                                                  WholeStageCodegen (14)
                                                     Sort [c_customer_sk]
                                                       InputAdapter
-                                                        Exchange [c_customer_sk] #9
-                                                          WholeStageCodegen (14)
+                                                        Exchange [c_customer_sk] #8
+                                                          WholeStageCodegen (13)
                                                             Filter [c_customer_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
                                                                   Scan parquet default.customer [c_customer_sk]
                             InputAdapter
-                              BroadcastExchange #14
-                                WholeStageCodegen (17)
+                              BroadcastExchange #13
+                                WholeStageCodegen (16)
                                   Project [d_date_sk]
                                     Filter [d_year,d_moy,d_date_sk]
                                       ColumnarToRow
@@ -151,73 +148,84 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                           Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
                     InputAdapter
                       SortMergeJoin [c_customer_sk,c_customer_sk]
-                        WholeStageCodegen (20)
+                        WholeStageCodegen (19)
                           Sort [c_customer_sk]
                             InputAdapter
-                              Exchange [c_customer_sk] #15
-                                WholeStageCodegen (19)
+                              Exchange [c_customer_sk] #14
+                                WholeStageCodegen (18)
                                   Filter [c_customer_sk]
                                     ColumnarToRow
                                       InputAdapter
                                         Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
-                        WholeStageCodegen (26)
+                        WholeStageCodegen (24)
                           Sort [c_customer_sk]
-                            InputAdapter
-                              Exchange [c_customer_sk] #16
-                                WholeStageCodegen (25)
-                                  Project [c_customer_sk]
-                                    Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                      ReusedSubquery [tpcds_cmax] #1
-                                      HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty]
-                                        HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
-                                          Project [ss_quantity,ss_sales_price,c_customer_sk]
-                                            SortMergeJoin [ss_customer_sk,c_customer_sk]
+                            Project [c_customer_sk]
+                              Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                ReusedSubquery [tpcds_cmax] #1
+                                HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty]
+                                  HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
+                                    Project [ss_quantity,ss_sales_price,c_customer_sk]
+                                      SortMergeJoin [ss_customer_sk,c_customer_sk]
+                                        InputAdapter
+                                          WholeStageCodegen (21)
+                                            Sort [ss_customer_sk]
                                               InputAdapter
-                                                WholeStageCodegen (22)
-                                                  Sort [ss_customer_sk]
-                                                    InputAdapter
-                                                      ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #8
+                                                ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #7
+                                        InputAdapter
+                                          WholeStageCodegen (23)
+                                            Sort [c_customer_sk]
                                               InputAdapter
-                                                WholeStageCodegen (24)
-                                                  Sort [c_customer_sk]
-                                                    InputAdapter
-                                                      ReusedExchange [c_customer_sk] #9
-    WholeStageCodegen (56)
+                                                ReusedExchange [c_customer_sk] #8
+    WholeStageCodegen (52)
       HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sales,sum,isEmpty]
         InputAdapter
-          Exchange [c_last_name,c_first_name] #17
-            WholeStageCodegen (55)
+          Exchange [c_last_name,c_first_name] #15
+            WholeStageCodegen (51)
               HashAggregate [c_last_name,c_first_name,ws_quantity,ws_list_price] [sum,isEmpty,sum,isEmpty]
                 Project [ws_quantity,ws_list_price,c_first_name,c_last_name]
                   SortMergeJoin [ws_bill_customer_sk,c_customer_sk]
                     InputAdapter
-                      WholeStageCodegen (46)
+                      WholeStageCodegen (43)
                         Project [ws_bill_customer_sk,ws_quantity,ws_list_price]
                           BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                             InputAdapter
                               SortMergeJoin [ws_bill_customer_sk,c_customer_sk]
-                                WholeStageCodegen (39)
+                                WholeStageCodegen (36)
                                   Sort [ws_bill_customer_sk]
                                     InputAdapter
-                                      Exchange [ws_bill_customer_sk] #18
-                                        WholeStageCodegen (38)
+                                      Exchange [ws_bill_customer_sk] #16
+                                        WholeStageCodegen (35)
                                           Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price]
                                             InputAdapter
                                               SortMergeJoin [ws_item_sk,item_sk]
-                                                WholeStageCodegen (30)
+                                                WholeStageCodegen (28)
                                                   Sort [ws_item_sk]
                                                     InputAdapter
-                                                      Exchange [ws_item_sk] #19
-                                                        WholeStageCodegen (29)
+                                                      Exchange [ws_item_sk] #17
+                                                        WholeStageCodegen (27)
                                                           Filter [ws_bill_customer_sk,ws_sold_date_sk]
                                                             ColumnarToRow
                                                               InputAdapter
                                                                 Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price]
-                                                WholeStageCodegen (37)
+                                                WholeStageCodegen (34)
                                                   Sort [item_sk]
-                                                    InputAdapter
-                                                      ReusedExchange [item_sk] #4
-                                WholeStageCodegen (44)
+                                                    Project [item_sk]
+                                                      Filter [count(1)]
+                                                        HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count]
+                                                          HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count]
+                                                            Project [d_date,i_item_sk,i_item_desc]
+                                                              SortMergeJoin [ss_item_sk,i_item_sk]
+                                                                InputAdapter
+                                                                  WholeStageCodegen (31)
+                                                                    Sort [ss_item_sk]
+                                                                      InputAdapter
+                                                                        ReusedExchange [ss_item_sk,d_date] #4
+                                                                InputAdapter
+                                                                  WholeStageCodegen (33)
+                                                                    Sort [i_item_sk]
+                                                                      InputAdapter
+                                                                        ReusedExchange [i_item_sk,i_item_desc] #6
+                                WholeStageCodegen (41)
                                   Sort [c_customer_sk]
                                     Project [c_customer_sk]
                                       Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
@@ -227,42 +235,39 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                             Project [ss_quantity,ss_sales_price,c_customer_sk]
                                               SortMergeJoin [ss_customer_sk,c_customer_sk]
                                                 InputAdapter
-                                                  WholeStageCodegen (41)
+                                                  WholeStageCodegen (38)
                                                     Sort [ss_customer_sk]
                                                       InputAdapter
-                                                        ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #8
+                                                        ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #7
                                                 InputAdapter
-                                                  WholeStageCodegen (43)
+                                                  WholeStageCodegen (40)
                                                     Sort [c_customer_sk]
                                                       InputAdapter
-                                                        ReusedExchange [c_customer_sk] #9
+                                                        ReusedExchange [c_customer_sk] #8
                             InputAdapter
-                              ReusedExchange [d_date_sk] #14
+                              ReusedExchange [d_date_sk] #13
                     InputAdapter
                       SortMergeJoin [c_customer_sk,c_customer_sk]
-                        WholeStageCodegen (48)
+                        WholeStageCodegen (45)
                           Sort [c_customer_sk]
                             InputAdapter
-                              ReusedExchange [c_customer_sk,c_first_name,c_last_name] #15
-                        WholeStageCodegen (54)
+                              ReusedExchange [c_customer_sk,c_first_name,c_last_name] #14
+                        WholeStageCodegen (50)
                           Sort [c_customer_sk]
-                            InputAdapter
-                              Exchange [c_customer_sk] #20
-                                WholeStageCodegen (53)
-                                  Project [c_customer_sk]
-                                    Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                      ReusedSubquery [tpcds_cmax] #1
-                                      HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty]
-                                        HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
-                                          Project [ss_quantity,ss_sales_price,c_customer_sk]
-                                            SortMergeJoin [ss_customer_sk,c_customer_sk]
+                            Project [c_customer_sk]
+                              Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                ReusedSubquery [tpcds_cmax] #1
+                                HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty]
+                                  HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
+                                    Project [ss_quantity,ss_sales_price,c_customer_sk]
+                                      SortMergeJoin [ss_customer_sk,c_customer_sk]
+                                        InputAdapter
+                                          WholeStageCodegen (47)
+                                            Sort [ss_customer_sk]
                                               InputAdapter
-                                                WholeStageCodegen (50)
-                                                  Sort [ss_customer_sk]
-                                                    InputAdapter
-                                                      ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #8
+                                                ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #7
+                                        InputAdapter
+                                          WholeStageCodegen (49)
+                                            Sort [c_customer_sk]
                                               InputAdapter
-                                                WholeStageCodegen (52)
-                                                  Sort [c_customer_sk]
-                                                    InputAdapter
-                                                      ReusedExchange [c_customer_sk] #9
+                                                ReusedExchange [c_customer_sk] #8
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt
index eae118d46245d..7fec07e259559 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt
@@ -1,67 +1,65 @@
 == Physical Plan ==
-TakeOrderedAndProject (63)
-+- * HashAggregate (62)
-   +- Exchange (61)
-      +- * HashAggregate (60)
-         +- * HashAggregate (59)
-            +- Exchange (58)
-               +- * HashAggregate (57)
-                  +- * Project (56)
-                     +- * BroadcastHashJoin Inner BuildRight (55)
-                        :- * Project (49)
-                        :  +- * BroadcastHashJoin Inner BuildRight (48)
-                        :     :- * Project (42)
-                        :     :  +- * BroadcastHashJoin Inner BuildRight (41)
-                        :     :     :- SortMergeJoin LeftSemi (35)
-                        :     :     :  :- * Sort (19)
-                        :     :     :  :  +- Exchange (18)
-                        :     :     :  :     +- SortMergeJoin LeftSemi (17)
+TakeOrderedAndProject (61)
++- * HashAggregate (60)
+   +- Exchange (59)
+      +- * HashAggregate (58)
+         +- * HashAggregate (57)
+            +- Exchange (56)
+               +- * HashAggregate (55)
+                  +- * Project (54)
+                     +- * BroadcastHashJoin Inner BuildRight (53)
+                        :- * Project (47)
+                        :  +- * BroadcastHashJoin Inner BuildRight (46)
+                        :     :- * Project (40)
+                        :     :  +- * BroadcastHashJoin Inner BuildRight (39)
+                        :     :     :- SortMergeJoin LeftSemi (33)
+                        :     :     :  :- * Sort (17)
+                        :     :     :  :  +- Exchange (16)
+                        :     :     :  :     +- SortMergeJoin LeftSemi (15)
                         :     :     :  :        :- * Sort (5)
                         :     :     :  :        :  +- Exchange (4)
                         :     :     :  :        :     +- * Filter (3)
                         :     :     :  :        :        +- * ColumnarToRow (2)
                         :     :     :  :        :           +- Scan parquet default.web_sales (1)
-                        :     :     :  :        +- * Sort (16)
-                        :     :     :  :           +- Exchange (15)
-                        :     :     :  :              +- * Project (14)
-                        :     :     :  :                 +- * SortMergeJoin Inner (13)
-                        :     :     :  :                    :- * Sort (10)
-                        :     :     :  :                    :  +- Exchange (9)
-                        :     :     :  :                    :     +- * Filter (8)
-                        :     :     :  :                    :        +- * ColumnarToRow (7)
-                        :     :     :  :                    :           +- Scan parquet default.web_sales (6)
-                        :     :     :  :                    +- * Sort (12)
-                        :     :     :  :                       +- ReusedExchange (11)
-                        :     :     :  +- * Project (34)
-                        :     :     :     +- * SortMergeJoin Inner (33)
-                        :     :     :        :- * Sort (27)
-                        :     :     :        :  +- Exchange (26)
-                        :     :     :        :     +- * Project (25)
-                        :     :     :        :        +- * SortMergeJoin Inner (24)
-                        :     :     :        :           :- * Sort (21)
-                        :     :     :        :           :  +- ReusedExchange (20)
-                        :     :     :        :           +- * Sort (23)
-                        :     :     :        :              +- ReusedExchange (22)
-                        :     :     :        +- * Sort (32)
-                        :     :     :           +- Exchange (31)
-                        :     :     :              +- * Filter (30)
-                        :     :     :                 +- * ColumnarToRow (29)
-                        :     :     :                    +- Scan parquet default.web_returns (28)
-                        :     :     +- BroadcastExchange (40)
-                        :     :        +- * Project (39)
-                        :     :           +- * Filter (38)
-                        :     :              +- * ColumnarToRow (37)
-                        :     :                 +- Scan parquet default.customer_address (36)
-                        :     +- BroadcastExchange (47)
-                        :        +- * Project (46)
-                        :           +- * Filter (45)
-                        :              +- * ColumnarToRow (44)
-                        :                 +- Scan parquet default.web_site (43)
-                        +- BroadcastExchange (54)
-                           +- * Project (53)
-                              +- * Filter (52)
-                                 +- * ColumnarToRow (51)
-                                    +- Scan parquet default.date_dim (50)
+                        :     :     :  :        +- * Project (14)
+                        :     :     :  :           +- * SortMergeJoin Inner (13)
+                        :     :     :  :              :- * Sort (10)
+                        :     :     :  :              :  +- Exchange (9)
+                        :     :     :  :              :     +- * Filter (8)
+                        :     :     :  :              :        +- * ColumnarToRow (7)
+                        :     :     :  :              :           +- Scan parquet default.web_sales (6)
+                        :     :     :  :              +- * Sort (12)
+                        :     :     :  :                 +- ReusedExchange (11)
+                        :     :     :  +- * Project (32)
+                        :     :     :     +- * SortMergeJoin Inner (31)
+                        :     :     :        :- * Sort (25)
+                        :     :     :        :  +- Exchange (24)
+                        :     :     :        :     +- * Project (23)
+                        :     :     :        :        +- * SortMergeJoin Inner (22)
+                        :     :     :        :           :- * Sort (19)
+                        :     :     :        :           :  +- ReusedExchange (18)
+                        :     :     :        :           +- * Sort (21)
+                        :     :     :        :              +- ReusedExchange (20)
+                        :     :     :        +- * Sort (30)
+                        :     :     :           +- Exchange (29)
+                        :     :     :              +- * Filter (28)
+                        :     :     :                 +- * ColumnarToRow (27)
+                        :     :     :                    +- Scan parquet default.web_returns (26)
+                        :     :     +- BroadcastExchange (38)
+                        :     :        +- * Project (37)
+                        :     :           +- * Filter (36)
+                        :     :              +- * ColumnarToRow (35)
+                        :     :                 +- Scan parquet default.customer_address (34)
+                        :     +- BroadcastExchange (45)
+                        :        +- * Project (44)
+                        :           +- * Filter (43)
+                        :              +- * ColumnarToRow (42)
+                        :                 +- Scan parquet default.web_site (41)
+                        +- BroadcastExchange (52)
+                           +- * Project (51)
+                              +- * Filter (50)
+                                 +- * ColumnarToRow (49)
+                                    +- Scan parquet default.date_dim (48)
 
 
 (1) Scan parquet default.web_sales
@@ -124,224 +122,216 @@ Join condition: NOT (ws_warehouse_sk#8 = ws_warehouse_sk#10)
 Output [1]: [ws_order_number#4 AS ws_order_number#4#12]
 Input [4]: [ws_warehouse_sk#8, ws_order_number#4, ws_warehouse_sk#10, ws_order_number#11]
 
-(15) Exchange
-Input [1]: [ws_order_number#4#12]
-Arguments: hashpartitioning(ws_order_number#4#12, 5), true, [id=#13]
-
-(16) Sort [codegen id : 8]
-Input [1]: [ws_order_number#4#12]
-Arguments: [ws_order_number#4#12 ASC NULLS FIRST], false, 0
-
-(17) SortMergeJoin
+(15) SortMergeJoin
 Left keys [1]: [ws_order_number#4]
 Right keys [1]: [ws_order_number#4#12]
 Join condition: None
 
-(18) Exchange
+(16) Exchange
 Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6]
-Arguments: hashpartitioning(cast(ws_order_number#4 as bigint), 5), true, [id=#14]
+Arguments: hashpartitioning(cast(ws_order_number#4 as bigint), 5), true, [id=#13]
 
-(19) Sort [codegen id : 9]
+(17) Sort [codegen id : 8]
 Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6]
 Arguments: [cast(ws_order_number#4 as bigint) ASC NULLS FIRST], false, 0
 
-(20) ReusedExchange [Reuses operator id: 9]
+(18) ReusedExchange [Reuses operator id: 9]
 Output [2]: [ws_warehouse_sk#8, ws_order_number#4]
 
-(21) Sort [codegen id : 11]
+(19) Sort [codegen id : 10]
 Input [2]: [ws_warehouse_sk#8, ws_order_number#4]
 Arguments: [ws_order_number#4 ASC NULLS FIRST], false, 0
 
-(22) ReusedExchange [Reuses operator id: 9]
-Output [2]: [ws_warehouse_sk#15, ws_order_number#16]
+(20) ReusedExchange [Reuses operator id: 9]
+Output [2]: [ws_warehouse_sk#14, ws_order_number#15]
 
-(23) Sort [codegen id : 13]
-Input [2]: [ws_warehouse_sk#15, ws_order_number#16]
-Arguments: [ws_order_number#16 ASC NULLS FIRST], false, 0
+(21) Sort [codegen id : 12]
+Input [2]: [ws_warehouse_sk#14, ws_order_number#15]
+Arguments: [ws_order_number#15 ASC NULLS FIRST], false, 0
 
-(24) SortMergeJoin [codegen id : 14]
+(22) SortMergeJoin [codegen id : 13]
 Left keys [1]: [ws_order_number#4]
-Right keys [1]: [ws_order_number#16]
-Join condition: NOT (ws_warehouse_sk#8 = ws_warehouse_sk#15)
+Right keys [1]: [ws_order_number#15]
+Join condition: NOT (ws_warehouse_sk#8 = ws_warehouse_sk#14)
 
-(25) Project [codegen id : 14]
+(23) Project [codegen id : 13]
 Output [1]: [ws_order_number#4]
-Input [4]: [ws_warehouse_sk#8, ws_order_number#4, ws_warehouse_sk#15, ws_order_number#16]
+Input [4]: [ws_warehouse_sk#8, ws_order_number#4, ws_warehouse_sk#14, ws_order_number#15]
 
-(26) Exchange
+(24) Exchange
 Input [1]: [ws_order_number#4]
-Arguments: hashpartitioning(cast(ws_order_number#4 as bigint), 5), true, [id=#17]
+Arguments: hashpartitioning(cast(ws_order_number#4 as bigint), 5), true, [id=#16]
 
-(27) Sort [codegen id : 15]
+(25) Sort [codegen id : 14]
 Input [1]: [ws_order_number#4]
 Arguments: [cast(ws_order_number#4 as bigint) ASC NULLS FIRST], false, 0
 
-(28) Scan parquet default.web_returns
-Output [1]: [wr_order_number#18]
+(26) Scan parquet default.web_returns
+Output [1]: [wr_order_number#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
 PushedFilters: [IsNotNull(wr_order_number)]
 ReadSchema: struct<wr_order_number:bigint>
 
-(29) ColumnarToRow [codegen id : 16]
-Input [1]: [wr_order_number#18]
+(27) ColumnarToRow [codegen id : 15]
+Input [1]: [wr_order_number#17]
 
-(30) Filter [codegen id : 16]
-Input [1]: [wr_order_number#18]
-Condition : isnotnull(wr_order_number#18)
+(28) Filter [codegen id : 15]
+Input [1]: [wr_order_number#17]
+Condition : isnotnull(wr_order_number#17)
 
-(31) Exchange
-Input [1]: [wr_order_number#18]
-Arguments: hashpartitioning(wr_order_number#18, 5), true, [id=#19]
+(29) Exchange
+Input [1]: [wr_order_number#17]
+Arguments: hashpartitioning(wr_order_number#17, 5), true, [id=#18]
 
-(32) Sort [codegen id : 17]
-Input [1]: [wr_order_number#18]
-Arguments: [wr_order_number#18 ASC NULLS FIRST], false, 0
+(30) Sort [codegen id : 16]
+Input [1]: [wr_order_number#17]
+Arguments: [wr_order_number#17 ASC NULLS FIRST], false, 0
 
-(33) SortMergeJoin [codegen id : 18]
+(31) SortMergeJoin [codegen id : 17]
 Left keys [1]: [cast(ws_order_number#4 as bigint)]
-Right keys [1]: [wr_order_number#18]
+Right keys [1]: [wr_order_number#17]
 Join condition: None
 
-(34) Project [codegen id : 18]
-Output [1]: [wr_order_number#18]
-Input [2]: [ws_order_number#4, wr_order_number#18]
+(32) Project [codegen id : 17]
+Output [1]: [wr_order_number#17]
+Input [2]: [ws_order_number#4, wr_order_number#17]
 
-(35) SortMergeJoin
+(33) SortMergeJoin
 Left keys [1]: [cast(ws_order_number#4 as bigint)]
-Right keys [1]: [wr_order_number#18]
+Right keys [1]: [wr_order_number#17]
 Join condition: None
 
-(36) Scan parquet default.customer_address
-Output [2]: [ca_address_sk#20, ca_state#21]
+(34) Scan parquet default.customer_address
+Output [2]: [ca_address_sk#19, ca_state#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
 PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)]
 ReadSchema: struct<ca_address_sk:int,ca_state:string>
 
-(37) ColumnarToRow [codegen id : 19]
-Input [2]: [ca_address_sk#20, ca_state#21]
+(35) ColumnarToRow [codegen id : 18]
+Input [2]: [ca_address_sk#19, ca_state#20]
 
-(38) Filter [codegen id : 19]
-Input [2]: [ca_address_sk#20, ca_state#21]
-Condition : ((isnotnull(ca_state#21) AND (ca_state#21 = IL)) AND isnotnull(ca_address_sk#20))
+(36) Filter [codegen id : 18]
+Input [2]: [ca_address_sk#19, ca_state#20]
+Condition : ((isnotnull(ca_state#20) AND (ca_state#20 = IL)) AND isnotnull(ca_address_sk#19))
 
-(39) Project [codegen id : 19]
-Output [1]: [ca_address_sk#20]
-Input [2]: [ca_address_sk#20, ca_state#21]
+(37) Project [codegen id : 18]
+Output [1]: [ca_address_sk#19]
+Input [2]: [ca_address_sk#19, ca_state#20]
 
-(40) BroadcastExchange
-Input [1]: [ca_address_sk#20]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22]
+(38) BroadcastExchange
+Input [1]: [ca_address_sk#19]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21]
 
-(41) BroadcastHashJoin [codegen id : 22]
+(39) BroadcastHashJoin [codegen id : 21]
 Left keys [1]: [ws_ship_addr_sk#2]
-Right keys [1]: [ca_address_sk#20]
+Right keys [1]: [ca_address_sk#19]
 Join condition: None
 
-(42) Project [codegen id : 22]
+(40) Project [codegen id : 21]
 Output [5]: [ws_ship_date_sk#1, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6]
-Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ca_address_sk#20]
+Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ca_address_sk#19]
 
-(43) Scan parquet default.web_site
-Output [2]: [web_site_sk#23, web_company_name#24]
+(41) Scan parquet default.web_site
+Output [2]: [web_site_sk#22, web_company_name#23]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_site]
 PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri), IsNotNull(web_site_sk)]
 ReadSchema: struct<web_site_sk:int,web_company_name:string>
 
-(44) ColumnarToRow [codegen id : 20]
-Input [2]: [web_site_sk#23, web_company_name#24]
+(42) ColumnarToRow [codegen id : 19]
+Input [2]: [web_site_sk#22, web_company_name#23]
 
-(45) Filter [codegen id : 20]
-Input [2]: [web_site_sk#23, web_company_name#24]
-Condition : ((isnotnull(web_company_name#24) AND (web_company_name#24 = pri)) AND isnotnull(web_site_sk#23))
+(43) Filter [codegen id : 19]
+Input [2]: [web_site_sk#22, web_company_name#23]
+Condition : ((isnotnull(web_company_name#23) AND (web_company_name#23 = pri)) AND isnotnull(web_site_sk#22))
 
-(46) Project [codegen id : 20]
-Output [1]: [web_site_sk#23]
-Input [2]: [web_site_sk#23, web_company_name#24]
+(44) Project [codegen id : 19]
+Output [1]: [web_site_sk#22]
+Input [2]: [web_site_sk#22, web_company_name#23]
 
-(47) BroadcastExchange
-Input [1]: [web_site_sk#23]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25]
+(45) BroadcastExchange
+Input [1]: [web_site_sk#22]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#24]
 
-(48) BroadcastHashJoin [codegen id : 22]
+(46) BroadcastHashJoin [codegen id : 21]
 Left keys [1]: [ws_web_site_sk#3]
-Right keys [1]: [web_site_sk#23]
+Right keys [1]: [web_site_sk#22]
 Join condition: None
 
-(49) Project [codegen id : 22]
+(47) Project [codegen id : 21]
 Output [4]: [ws_ship_date_sk#1, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6]
-Input [6]: [ws_ship_date_sk#1, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, web_site_sk#23]
+Input [6]: [ws_ship_date_sk#1, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, web_site_sk#22]
 
-(50) Scan parquet default.date_dim
-Output [2]: [d_date_sk#26, d_date#27]
+(48) Scan parquet default.date_dim
+Output [2]: [d_date_sk#25, d_date#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-01), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date>
 
-(51) ColumnarToRow [codegen id : 21]
-Input [2]: [d_date_sk#26, d_date#27]
+(49) ColumnarToRow [codegen id : 20]
+Input [2]: [d_date_sk#25, d_date#26]
 
-(52) Filter [codegen id : 21]
-Input [2]: [d_date_sk#26, d_date#27]
-Condition : (((isnotnull(d_date#27) AND (d_date#27 >= 10623)) AND (d_date#27 <= 10683)) AND isnotnull(d_date_sk#26))
+(50) Filter [codegen id : 20]
+Input [2]: [d_date_sk#25, d_date#26]
+Condition : (((isnotnull(d_date#26) AND (d_date#26 >= 10623)) AND (d_date#26 <= 10683)) AND isnotnull(d_date_sk#25))
 
-(53) Project [codegen id : 21]
-Output [1]: [d_date_sk#26]
-Input [2]: [d_date_sk#26, d_date#27]
+(51) Project [codegen id : 20]
+Output [1]: [d_date_sk#25]
+Input [2]: [d_date_sk#25, d_date#26]
 
-(54) BroadcastExchange
-Input [1]: [d_date_sk#26]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#28]
+(52) BroadcastExchange
+Input [1]: [d_date_sk#25]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27]
 
-(55) BroadcastHashJoin [codegen id : 22]
+(53) BroadcastHashJoin [codegen id : 21]
 Left keys [1]: [ws_ship_date_sk#1]
-Right keys [1]: [d_date_sk#26]
+Right keys [1]: [d_date_sk#25]
 Join condition: None
 
-(56) Project [codegen id : 22]
+(54) Project [codegen id : 21]
 Output [3]: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6]
-Input [5]: [ws_ship_date_sk#1, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, d_date_sk#26]
+Input [5]: [ws_ship_date_sk#1, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, d_date_sk#25]
 
-(57) HashAggregate [codegen id : 22]
+(55) HashAggregate [codegen id : 21]
 Input [3]: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6]
 Keys [1]: [ws_order_number#4]
 Functions [2]: [partial_sum(UnscaledValue(ws_ext_ship_cost#5)), partial_sum(UnscaledValue(ws_net_profit#6))]
-Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#29, sum(UnscaledValue(ws_net_profit#6))#30]
-Results [3]: [ws_order_number#4, sum#31, sum#32]
+Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#28, sum(UnscaledValue(ws_net_profit#6))#29]
+Results [3]: [ws_order_number#4, sum#30, sum#31]
 
-(58) Exchange
-Input [3]: [ws_order_number#4, sum#31, sum#32]
-Arguments: hashpartitioning(ws_order_number#4, 5), true, [id=#33]
+(56) Exchange
+Input [3]: [ws_order_number#4, sum#30, sum#31]
+Arguments: hashpartitioning(ws_order_number#4, 5), true, [id=#32]
 
-(59) HashAggregate [codegen id : 23]
-Input [3]: [ws_order_number#4, sum#31, sum#32]
+(57) HashAggregate [codegen id : 22]
+Input [3]: [ws_order_number#4, sum#30, sum#31]
 Keys [1]: [ws_order_number#4]
 Functions [2]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6))]
-Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#29, sum(UnscaledValue(ws_net_profit#6))#30]
-Results [3]: [ws_order_number#4, sum#31, sum#32]
+Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#28, sum(UnscaledValue(ws_net_profit#6))#29]
+Results [3]: [ws_order_number#4, sum#30, sum#31]
 
-(60) HashAggregate [codegen id : 23]
-Input [3]: [ws_order_number#4, sum#31, sum#32]
+(58) HashAggregate [codegen id : 22]
+Input [3]: [ws_order_number#4, sum#30, sum#31]
 Keys: []
 Functions [3]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6)), partial_count(distinct ws_order_number#4)]
-Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#29, sum(UnscaledValue(ws_net_profit#6))#30, count(ws_order_number#4)#34]
-Results [3]: [sum#31, sum#32, count#35]
+Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#28, sum(UnscaledValue(ws_net_profit#6))#29, count(ws_order_number#4)#33]
+Results [3]: [sum#30, sum#31, count#34]
 
-(61) Exchange
-Input [3]: [sum#31, sum#32, count#35]
-Arguments: SinglePartition, true, [id=#36]
+(59) Exchange
+Input [3]: [sum#30, sum#31, count#34]
+Arguments: SinglePartition, true, [id=#35]
 
-(62) HashAggregate [codegen id : 24]
-Input [3]: [sum#31, sum#32, count#35]
+(60) HashAggregate [codegen id : 23]
+Input [3]: [sum#30, sum#31, count#34]
 Keys: []
 Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net_profit#6)), count(distinct ws_order_number#4)]
-Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#29, sum(UnscaledValue(ws_net_profit#6))#30, count(ws_order_number#4)#34]
-Results [3]: [count(ws_order_number#4)#34 AS order count #37, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#29,17,2) AS total shipping cost #38, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#30,17,2) AS total net profit #39]
+Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#28, sum(UnscaledValue(ws_net_profit#6))#29, count(ws_order_number#4)#33]
+Results [3]: [count(ws_order_number#4)#33 AS order count #36, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#28,17,2) AS total shipping cost #37, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#29,17,2) AS total net profit #38]
 
-(63) TakeOrderedAndProject
-Input [3]: [order count #37, total shipping cost #38, total net profit #39]
-Arguments: 100, [order count #37 ASC NULLS FIRST], [order count #37, total shipping cost #38, total net profit #39]
+(61) TakeOrderedAndProject
+Input [3]: [order count #36, total shipping cost #37, total net profit #38]
+Arguments: 100, [order count #36 ASC NULLS FIRST], [order count #36, total shipping cost #37, total net profit #38]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt
index bdcbb87b372dc..da48d34c72a04 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt
@@ -1,14 +1,14 @@
 TakeOrderedAndProject [order count ,total shipping cost ,total net profit ]
-  WholeStageCodegen (24)
+  WholeStageCodegen (23)
     HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count]
       InputAdapter
         Exchange #1
-          WholeStageCodegen (23)
+          WholeStageCodegen (22)
             HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),sum,sum,count,sum,sum,count]
               HashAggregate [ws_order_number] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum]
                 InputAdapter
                   Exchange [ws_order_number] #2
-                    WholeStageCodegen (22)
+                    WholeStageCodegen (21)
                       HashAggregate [ws_order_number,ws_ext_ship_cost,ws_net_profit] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),sum,sum,sum,sum]
                         Project [ws_order_number,ws_ext_ship_cost,ws_net_profit]
                           BroadcastHashJoin [ws_ship_date_sk,d_date_sk]
@@ -18,7 +18,7 @@ TakeOrderedAndProject [order count ,total shipping cost ,total net profit ]
                                   BroadcastHashJoin [ws_ship_addr_sk,ca_address_sk]
                                     InputAdapter
                                       SortMergeJoin [ws_order_number,wr_order_number]
-                                        WholeStageCodegen (9)
+                                        WholeStageCodegen (8)
                                           Sort [ws_order_number]
                                             InputAdapter
                                               Exchange [ws_order_number] #3
@@ -32,78 +32,74 @@ TakeOrderedAndProject [order count ,total shipping cost ,total net profit ]
                                                               ColumnarToRow
                                                                 InputAdapter
                                                                   Scan parquet default.web_sales [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit]
-                                                  WholeStageCodegen (8)
-                                                    Sort [ws_order_number]
-                                                      InputAdapter
-                                                        Exchange [ws_order_number] #5
-                                                          WholeStageCodegen (7)
-                                                            Project [ws_order_number]
-                                                              SortMergeJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk]
-                                                                InputAdapter
-                                                                  WholeStageCodegen (4)
-                                                                    Sort [ws_order_number]
-                                                                      InputAdapter
-                                                                        Exchange [ws_order_number] #6
-                                                                          WholeStageCodegen (3)
-                                                                            Filter [ws_order_number,ws_warehouse_sk]
-                                                                              ColumnarToRow
-                                                                                InputAdapter
-                                                                                  Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number]
-                                                                InputAdapter
-                                                                  WholeStageCodegen (6)
-                                                                    Sort [ws_order_number]
-                                                                      InputAdapter
-                                                                        ReusedExchange [ws_warehouse_sk,ws_order_number] #6
-                                        WholeStageCodegen (18)
+                                                  WholeStageCodegen (7)
+                                                    Project [ws_order_number]
+                                                      SortMergeJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk]
+                                                        InputAdapter
+                                                          WholeStageCodegen (4)
+                                                            Sort [ws_order_number]
+                                                              InputAdapter
+                                                                Exchange [ws_order_number] #5
+                                                                  WholeStageCodegen (3)
+                                                                    Filter [ws_order_number,ws_warehouse_sk]
+                                                                      ColumnarToRow
+                                                                        InputAdapter
+                                                                          Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number]
+                                                        InputAdapter
+                                                          WholeStageCodegen (6)
+                                                            Sort [ws_order_number]
+                                                              InputAdapter
+                                                                ReusedExchange [ws_warehouse_sk,ws_order_number] #5
+                                        WholeStageCodegen (17)
                                           Project [wr_order_number]
                                             SortMergeJoin [ws_order_number,wr_order_number]
                                               InputAdapter
-                                                WholeStageCodegen (15)
+                                                WholeStageCodegen (14)
                                                   Sort [ws_order_number]
                                                     InputAdapter
-                                                      Exchange [ws_order_number] #7
-                                                        WholeStageCodegen (14)
+                                                      Exchange [ws_order_number] #6
+                                                        WholeStageCodegen (13)
                                                           Project [ws_order_number]
                                                             SortMergeJoin [ws_order_number,ws_order_number,ws_warehouse_sk,ws_warehouse_sk]
                                                               InputAdapter
-                                                                WholeStageCodegen (11)
+                                                                WholeStageCodegen (10)
                                                                   Sort [ws_order_number]
                                                                     InputAdapter
-                                                                      ReusedExchange [ws_warehouse_sk,ws_order_number] #6
+                                                                      ReusedExchange [ws_warehouse_sk,ws_order_number] #5
                                                               InputAdapter
-                                                                WholeStageCodegen (13)
+                                                                WholeStageCodegen (12)
                                                                   Sort [ws_order_number]
                                                                     InputAdapter
-                                                                      ReusedExchange [ws_warehouse_sk,ws_order_number] #6
+                                                                      ReusedExchange [ws_warehouse_sk,ws_order_number] #5
                                               InputAdapter
-                                                WholeStageCodegen (17)
+                                                WholeStageCodegen (16)
                                                   Sort [wr_order_number]
                                                     InputAdapter
-                                                      Exchange [wr_order_number] #8
-                                                        WholeStageCodegen (16)
+                                                      Exchange [wr_order_number] #7
+                                                        WholeStageCodegen (15)
                                                           Filter [wr_order_number]
                                                             ColumnarToRow
                                                               InputAdapter
                                                                 Scan parquet default.web_returns [wr_order_number]
                                     InputAdapter
-                                      BroadcastExchange #9
-                                        WholeStageCodegen (19)
+                                      BroadcastExchange #8
+                                        WholeStageCodegen (18)
                                           Project [ca_address_sk]
                                             Filter [ca_state,ca_address_sk]
                                               ColumnarToRow
                                                 InputAdapter
                                                   Scan parquet default.customer_address [ca_address_sk,ca_state]
                                 InputAdapter
-                                  BroadcastExchange #10
-                                    WholeStageCodegen (20)
+                                  BroadcastExchange #9
+                                    WholeStageCodegen (19)
                                       Project [web_site_sk]
                                         Filter [web_company_name,web_site_sk]
                                           ColumnarToRow
                                             InputAdapter
                                               Scan parquet default.web_site [web_site_sk,web_company_name]
                             InputAdapter
-                              BroadcastExchange #11
-                                WholeStageCodegen (21)
+                              BroadcastExchange #10
+                                WholeStageCodegen (20)
                                   Project [d_date_sk]
                                     Filter [d_date,d_date_sk]
                                       ColumnarToRow
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index be29acb6d3a7c..7db94a702488a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -895,6 +895,170 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
     }
   }
 
+  test("SPARK-33399: aliases should be handled properly in PartitioningCollection output" +
+    " partitioning") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      withTempView("t1", "t2", "t3") {
+        spark.range(10).repartition($"id").createTempView("t1")
+        spark.range(20).repartition($"id").createTempView("t2")
+        spark.range(30).repartition($"id").createTempView("t3")
+        val planned = sql(
+          """
+            |SELECT t3.id as t3id
+            |FROM (
+            |    SELECT t1.id as t1id, t2.id as t2id
+            |    FROM t1, t2
+            |    WHERE t1.id = t2.id
+            |) t12, t3
+            |WHERE t1id = t3.id
+          """.stripMargin).queryExecution.executedPlan
+        val exchanges = planned.collect { case s: ShuffleExchangeExec => s }
+        assert(exchanges.size == 3)
+
+        val projects = planned.collect { case p: ProjectExec => p }
+        assert(projects.exists(_.outputPartitioning match {
+          case PartitioningCollection(Seq(HashPartitioning(Seq(k1: AttributeReference), _),
+            HashPartitioning(Seq(k2: AttributeReference), _))) if k1.name == "t1id" =>
+            true
+          case _ => false
+        }))
+      }
+    }
+  }
+
+  test("SPARK-33399: aliases should be handled properly in HashPartitioning") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      withTempView("t1", "t2", "t3") {
+        spark.range(10).repartition($"id").createTempView("t1")
+        spark.range(20).repartition($"id").createTempView("t2")
+        spark.range(30).repartition($"id").createTempView("t3")
+        val planned = sql(
+          """
+            |SELECT t1id, t3.id as t3id
+            |FROM (
+            |    SELECT t1.id as t1id
+            |    FROM t1 LEFT SEMI JOIN t2
+            |    ON t1.id = t2.id
+            |) t12 INNER JOIN t3
+            |WHERE t1id = t3.id
+          """.stripMargin).queryExecution.executedPlan
+        val exchanges = planned.collect { case s: ShuffleExchangeExec => s }
+        assert(exchanges.size == 3)
+
+        val projects = planned.collect { case p: ProjectExec => p }
+        assert(projects.exists(_.outputPartitioning match {
+          case HashPartitioning(Seq(a: AttributeReference), _) => a.name == "t1id"
+          case _ => false
+        }))
+      }
+    }
+  }
+
+  test("SPARK-33399: alias handling should happen properly for RangePartitioning") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      val df = spark.range(1, 100)
+        .select(col("id").as("id1")).groupBy("id1").count()
+      // Plan for this will be Range -> ProjectWithAlias -> HashAggregate -> HashAggregate
+      // if Project normalizes alias in its Range outputPartitioning, then no Exchange should come
+      // in between HashAggregates
+      val planned = df.queryExecution.executedPlan
+      val exchanges = planned.collect { case s: ShuffleExchangeExec => s }
+      assert(exchanges.isEmpty)
+
+      val projects = planned.collect { case p: ProjectExec => p }
+      assert(projects.exists(_.outputPartitioning match {
+        case RangePartitioning(Seq(SortOrder(ar: AttributeReference, _, _, _)), _) =>
+          ar.name == "id1"
+        case _ => false
+      }))
+    }
+  }
+
+  test("SPARK-33399: aliased should be handled properly " +
+    "for partitioning and sortorder involving complex expressions") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      withTempView("t1", "t2", "t3") {
+        spark.range(10).select(col("id").as("id1")).createTempView("t1")
+        spark.range(20).select(col("id").as("id2")).createTempView("t2")
+        spark.range(30).select(col("id").as("id3")).createTempView("t3")
+        val planned = sql(
+          """
+            |SELECT t3.id3 as t3id
+            |FROM (
+            |    SELECT t1.id1 as t1id, t2.id2 as t2id
+            |    FROM t1, t2
+            |    WHERE t1.id1 * 10 = t2.id2 * 10
+            |) t12, t3
+            |WHERE t1id * 10 = t3.id3 * 10
+          """.stripMargin).queryExecution.executedPlan
+        val sortNodes = planned.collect { case s: SortExec => s }
+        assert(sortNodes.size == 3)
+        val exchangeNodes = planned.collect { case e: ShuffleExchangeExec => e }
+        assert(exchangeNodes.size == 3)
+
+        val projects = planned.collect { case p: ProjectExec => p }
+        assert(projects.exists(_.outputPartitioning match {
+          case PartitioningCollection(Seq(HashPartitioning(Seq(Multiply(ar1, _, _)), _),
+            HashPartitioning(Seq(Multiply(ar2, _, _)), _))) =>
+            Seq(ar1, ar2) match {
+              case Seq(ar1: AttributeReference, ar2: AttributeReference) =>
+                ar1.name == "t1id" && ar2.name == "id2"
+              case _ =>
+                false
+            }
+          case _ => false
+        }))
+
+      }
+    }
+  }
+
+  test("SPARK-33399: alias handling should happen properly for SinglePartition") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      val df = spark.range(1, 100, 1, 1)
+        .select(col("id").as("id1")).groupBy("id1").count()
+      val planned = df.queryExecution.executedPlan
+      val exchanges = planned.collect { case s: ShuffleExchangeExec => s }
+      assert(exchanges.isEmpty)
+
+      val projects = planned.collect { case p: ProjectExec => p }
+      assert(projects.exists(_.outputPartitioning match {
+        case SinglePartition => true
+        case _ => false
+      }))
+    }
+  }
+
+  test("SPARK-33399: No extra exchanges in case of" +
+    " [Inner Join -> Project with aliases -> HashAggregate]") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      withTempView("t1", "t2") {
+        spark.range(10).repartition($"id").createTempView("t1")
+        spark.range(20).repartition($"id").createTempView("t2")
+        val planned = sql(
+          """
+            |SELECT t1id, t2id
+            |FROM (
+            |  SELECT t1.id as t1id, t2.id as t2id
+            |  FROM t1 INNER JOIN t2
+            |  WHERE t1.id = t2.id
+            |) t12
+            |GROUP BY t1id, t2id
+          """.stripMargin).queryExecution.executedPlan
+        val exchanges = planned.collect { case s: ShuffleExchangeExec => s }
+        assert(exchanges.size == 2)
+
+        val projects = planned.collect { case p: ProjectExec => p }
+        assert(projects.exists(_.outputPartitioning match {
+          case PartitioningCollection(Seq(HashPartitioning(Seq(k1: AttributeReference), _),
+          HashPartitioning(Seq(k2: AttributeReference), _))) =>
+            k1.name == "t1id" && k2.name == "t2id"
+          case _ => false
+        }))
+      }
+    }
+  }
+
   test("aliases to expressions should not be replaced") {
     withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
       withTempView("df1", "df2") {

From 5af5aa146ecbff38b809127b5eb9805441627ed2 Mon Sep 17 00:00:00 2001
From: Cheng Su <chengsu@fb.com>
Date: Tue, 17 Nov 2020 11:18:42 +0900
Subject: [PATCH 0492/1009] [SPARK-33209][SS] Refactor unit test of
 stream-stream join in UnsupportedOperationsSuite

### What changes were proposed in this pull request?

This PR is a followup from https://github.com/apache/spark/pull/30076 to refactor unit test of stream-stream join in `UnsupportedOperationsSuite`, where we had a lot of duplicated code for stream-stream join unit test, for each join type.

### Why are the changes needed?

Help reduce duplicated code and make it easier for developers to read and add code in the future.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing unit test in `UnsupportedOperationsSuite.scala` (pure refactoring).

Closes #30347 from c21/stream-test.

Authored-by: Cheng Su <chengsu@fb.com>
Signed-off-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
---
 .../UnsupportedOperationChecker.scala         |   2 +-
 .../analysis/UnsupportedOperationsSuite.scala | 361 ++++++------------
 2 files changed, 120 insertions(+), 243 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index 814ea8c9768ae..7dcc6a81b48cd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -304,7 +304,7 @@ object UnsupportedOperationChecker extends Logging {
 
             case LeftAnti =>
               if (right.isStreaming) {
-                throwError("Left anti joins with a streaming DataFrame/Dataset " +
+                throwError(s"$LeftAnti joins with a streaming DataFrame/Dataset " +
                     "on the right are not supported")
               }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
index 21dde3ca8ca51..918db903a783f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
@@ -393,16 +393,14 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
   testBinaryOperationInStreamingPlan(
     "single inner join in append mode",
     _.join(_, joinType = Inner),
-    outputMode = Append,
-    streamStreamSupported = true)
+    outputMode = Append)
 
   testBinaryOperationInStreamingPlan(
     "multiple inner joins in append mode",
     (x: LogicalPlan, y: LogicalPlan) => {
       x.join(y, joinType = Inner).join(streamRelation, joinType = Inner)
     },
-    outputMode = Append,
-    streamStreamSupported = true)
+    outputMode = Append)
 
   testBinaryOperationInStreamingPlan(
     "inner join in update mode",
@@ -419,209 +417,135 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
     batchStreamSupported = false,
     streamBatchSupported = false)
 
-  // Left outer joins: *-stream not allowed
+  // Left outer, left semi, left anti join: *-stream not allowed
+  Seq((LeftOuter, "LeftOuter join"), (LeftSemi, "LeftSemi join"), (LeftAnti, "LeftAnti join"))
+    .foreach { case (joinType, name) =>
+      testBinaryOperationInStreamingPlan(
+        name,
+        _.join(_, joinType = joinType),
+        batchStreamSupported = false,
+        streamStreamSupported = false,
+        expectedMsg = name)
+    }
+
+  // Right outer joins: stream-* not allowed
   testBinaryOperationInStreamingPlan(
-    "left outer join",
-    _.join(_, joinType = LeftOuter),
-    batchStreamSupported = false,
+    "right outer join",
+    _.join(_, joinType = RightOuter),
+    streamBatchSupported = false,
     streamStreamSupported = false,
     expectedMsg = "outer join")
 
-  // Left outer joins: update and complete mode not allowed
-  assertNotSupportedInStreamingPlan(
-    s"left outer join with stream-stream relations and update mode",
-    streamRelation.join(streamRelation, joinType = LeftOuter,
-      condition = Some(attribute === attribute)),
-    OutputMode.Update(),
-    Seq("is not supported in Update output mode"))
-  assertNotSupportedInStreamingPlan(
-    s"left outer join with stream-stream relations and complete mode",
-    Aggregate(Nil, aggExprs("d"), streamRelation.join(streamRelation, joinType = LeftOuter,
-      condition = Some(attribute === attribute))),
-    OutputMode.Complete(),
-    Seq("is not supported in Complete output mode"))
-
-  // Left outer joins: stream-stream allowed with join on watermark attribute
-  // Note that the attribute need not be watermarked on both sides.
-  assertSupportedInStreamingPlan(
-    s"left outer join with stream-stream relations and join on attribute with left watermark",
-    streamRelation.join(streamRelation, joinType = LeftOuter,
-      condition = Some(attributeWithWatermark === attribute)),
-    OutputMode.Append())
-  assertSupportedInStreamingPlan(
-    s"left outer join with stream-stream relations and join on attribute with right watermark",
-    streamRelation.join(streamRelation, joinType = LeftOuter,
-      condition = Some(attribute === attributeWithWatermark)),
-    OutputMode.Append())
-  assertNotSupportedInStreamingPlan(
-    s"left outer join with stream-stream relations and join on non-watermarked attribute",
-    streamRelation.join(streamRelation, joinType = LeftOuter,
-      condition = Some(attribute === attribute)),
-    OutputMode.Append(),
-    Seq("watermark in the join keys"))
-
-  // Left outer joins: stream-stream allowed with range condition yielding state value watermark
-  assertSupportedInStreamingPlan(
-    s"left outer join with stream-stream relations and state value watermark", {
-      val leftRelation = streamRelation
-      val rightTimeWithWatermark =
-        AttributeReference("b", IntegerType)().withMetadata(watermarkMetadata)
-      val rightRelation = new TestStreamingRelation(rightTimeWithWatermark)
-      leftRelation.join(
-        rightRelation,
-        joinType = LeftOuter,
-        condition = Some(attribute > rightTimeWithWatermark + 10))
-    },
-    OutputMode.Append())
-
-  // Left outer joins: stream-stream not allowed with insufficient range condition
-  assertNotSupportedInStreamingPlan(
-    s"left outer join with stream-stream relations and state value watermark", {
-      val leftRelation = streamRelation
-      val rightTimeWithWatermark =
-        AttributeReference("b", IntegerType)().withMetadata(watermarkMetadata)
-      val rightRelation = new TestStreamingRelation(rightTimeWithWatermark)
-      leftRelation.join(
-        rightRelation,
-        joinType = LeftOuter,
-        condition = Some(attribute < rightTimeWithWatermark + 10))
-    },
-    OutputMode.Append(),
-    Seq("appropriate range condition"))
-
-  // Left semi joins: stream-* not allowed
-  testBinaryOperationInStreamingPlan(
-    "left semi join",
-    _.join(_, joinType = LeftSemi),
-    streamStreamSupported = false,
-    batchStreamSupported = false,
-    expectedMsg = "LeftSemi join")
+  // Left outer, right outer, left semi joins
+  Seq(LeftOuter, RightOuter, LeftSemi).foreach { joinType =>
+    // Update mode not allowed
+    assertNotSupportedInStreamingPlan(
+      s"$joinType join with stream-stream relations and update mode",
+      streamRelation.join(streamRelation, joinType = joinType,
+        condition = Some(attribute === attribute)),
+      OutputMode.Update(),
+      Seq("is not supported in Update output mode"))
 
-  // Left semi joins: update and complete mode not allowed
-  assertNotSupportedInStreamingPlan(
-    "left semi join with stream-stream relations and update mode",
-    streamRelation.join(streamRelation, joinType = LeftSemi,
-      condition = Some(attribute === attribute)),
-    OutputMode.Update(),
-    Seq("is not supported in Update output mode"))
-  assertNotSupportedInStreamingPlan(
-    "left semi join with stream-stream relations and complete mode",
-    Aggregate(Nil, aggExprs("d"), streamRelation.join(streamRelation, joinType = LeftSemi,
-      condition = Some(attribute === attribute))),
-    OutputMode.Complete(),
-    Seq("is not supported in Complete output mode"))
-
-  // Left semi joins: stream-stream allowed with join on watermark attribute
-  // Note that the attribute need not be watermarked on both sides.
-  assertSupportedInStreamingPlan(
-    "left semi join with stream-stream relations and join on attribute with left watermark",
-    streamRelation.join(streamRelation, joinType = LeftSemi,
-      condition = Some(attributeWithWatermark === attribute)),
-    OutputMode.Append())
-  assertSupportedInStreamingPlan(
-    "left semi join with stream-stream relations and join on attribute with right watermark",
-    streamRelation.join(streamRelation, joinType = LeftSemi,
-      condition = Some(attribute === attributeWithWatermark)),
-    OutputMode.Append())
-  assertNotSupportedInStreamingPlan(
-    "left semi join with stream-stream relations and join on non-watermarked attribute",
-    streamRelation.join(streamRelation, joinType = LeftSemi,
-      condition = Some(attribute === attribute)),
-    OutputMode.Append(),
-    Seq("without a watermark in the join keys"))
+    // Complete mode not allowed
+    assertNotSupportedInStreamingPlan(
+      s"$joinType join with stream-stream relations and complete mode",
+      Aggregate(Nil, aggExprs("d"), streamRelation.join(streamRelation, joinType = joinType,
+        condition = Some(attribute === attribute))),
+      OutputMode.Complete(),
+      Seq("is not supported in Complete output mode"))
+
+    // Stream-stream allowed with join on watermark attribute
+    // Note that the attribute need not be watermarked on both sides.
+    assertSupportedInStreamingPlan(
+      s"$joinType join with stream-stream relations and join on attribute with left watermark",
+      streamRelation.join(streamRelation, joinType = joinType,
+        condition = Some(attributeWithWatermark === attribute)),
+      OutputMode.Append())
+    assertSupportedInStreamingPlan(
+      s"$joinType join with stream-stream relations and join on attribute with right watermark",
+      streamRelation.join(streamRelation, joinType = joinType,
+        condition = Some(attribute === attributeWithWatermark)),
+      OutputMode.Append())
+    assertNotSupportedInStreamingPlan(
+      s"$joinType join with stream-stream relations and join on non-watermarked attribute",
+      streamRelation.join(streamRelation, joinType = joinType,
+        condition = Some(attribute === attribute)),
+      OutputMode.Append(),
+      Seq("without a watermark in the join keys"))
+
+    val timeWithWatermark =
+      AttributeReference("b", IntegerType)().withMetadata(watermarkMetadata)
+    val relationWithWatermark = new TestStreamingRelation(timeWithWatermark)
+    val (leftRelation, rightRelation) =
+      if (joinType == RightOuter) {
+        (relationWithWatermark, streamRelation)
+      } else {
+        (streamRelation, relationWithWatermark)
+      }
 
-  // Left semi joins: stream-stream allowed with range condition yielding state value watermark
-  assertSupportedInStreamingPlan(
-    "left semi join with stream-stream relations and state value watermark", {
-      val leftRelation = streamRelation
-      val rightTimeWithWatermark =
-        AttributeReference("b", IntegerType)().withMetadata(watermarkMetadata)
-      val rightRelation = new TestStreamingRelation(rightTimeWithWatermark)
-      leftRelation.join(
-        rightRelation,
-        joinType = LeftSemi,
-        condition = Some(attribute > rightTimeWithWatermark + 10))
-    },
-    OutputMode.Append())
+    // stream-stream allowed with range condition yielding state value watermark
+    assertSupportedInStreamingPlan(
+      s"$joinType join with stream-stream relations and state value watermark",
+      leftRelation.join(rightRelation, joinType = joinType,
+        condition = Some(attribute > timeWithWatermark + 10)),
+      OutputMode.Append())
 
-  // Left semi joins: stream-stream not allowed with insufficient range condition
-  assertNotSupportedInStreamingPlan(
-    "left semi join with stream-stream relations and state value watermark", {
-      val leftRelation = streamRelation
-      val rightTimeWithWatermark =
-        AttributeReference("b", IntegerType)().withMetadata(watermarkMetadata)
-      val rightRelation = new TestStreamingRelation(rightTimeWithWatermark)
-      leftRelation.join(
-        rightRelation,
-        joinType = LeftSemi,
-        condition = Some(attribute < rightTimeWithWatermark + 10))
-    },
-    OutputMode.Append(),
-    Seq("appropriate range condition"))
+    // stream-stream not allowed with insufficient range condition
+    assertNotSupportedInStreamingPlan(
+      s"$joinType join with stream-stream relations and state value watermark",
+      leftRelation.join(rightRelation, joinType = joinType,
+        condition = Some(attribute < timeWithWatermark + 10)),
+      OutputMode.Append(),
+      Seq("is not supported without a watermark in the join keys, or a watermark on " +
+        "the nullable side and an appropriate range condition"))
+  }
 
-  // Left anti joins: stream-* not allowed
-  testBinaryOperationInStreamingPlan(
-    "left anti join",
-    _.join(_, joinType = LeftAnti),
-    streamStreamSupported = false,
-    batchStreamSupported = false,
-    expectedMsg = "Left anti join")
+  // stream-stream inner join doesn't emit late rows, whereas outer joins could
+  Seq((Inner, false), (LeftOuter, true), (RightOuter, true)).map {
+    case (joinType, expectFailure) =>
+      assertPassOnGlobalWatermarkLimit(
+        s"single $joinType join in Append mode",
+        streamRelation.join(streamRelation, joinType = RightOuter,
+          condition = Some(attributeWithWatermark === attribute)),
+        OutputMode.Append())
 
-  // Right outer joins: stream-* not allowed
-  testBinaryOperationInStreamingPlan(
-    "right outer join",
-    _.join(_, joinType = RightOuter),
-    streamBatchSupported = false,
-    streamStreamSupported = false,
-    expectedMsg = "outer join")
+      testGlobalWatermarkLimit(
+        s"streaming aggregation after stream-stream $joinType join in Append mode",
+        streamRelation.join(streamRelation, joinType = joinType,
+          condition = Some(attributeWithWatermark === attribute))
+          .groupBy("a")(count("*")),
+        OutputMode.Append(),
+        expectFailure = expectFailure)
 
-  // Right outer joins: stream-stream allowed with join on watermark attribute
-  // Note that the attribute need not be watermarked on both sides.
-  assertSupportedInStreamingPlan(
-    s"right outer join with stream-stream relations and join on attribute with left watermark",
-    streamRelation.join(streamRelation, joinType = RightOuter,
-      condition = Some(attributeWithWatermark === attribute)),
-    OutputMode.Append())
-  assertSupportedInStreamingPlan(
-    s"right outer join with stream-stream relations and join on attribute with right watermark",
-    streamRelation.join(streamRelation, joinType = RightOuter,
-      condition = Some(attribute === attributeWithWatermark)),
-    OutputMode.Append())
-  assertNotSupportedInStreamingPlan(
-    s"right outer join with stream-stream relations and join on non-watermarked attribute",
-    streamRelation.join(streamRelation, joinType = RightOuter,
-      condition = Some(attribute === attribute)),
-    OutputMode.Append(),
-    Seq("watermark in the join keys"))
+      Seq(Inner, LeftOuter, RightOuter).foreach { joinType2 =>
+        testGlobalWatermarkLimit(
+          s"streaming-stream $joinType2 after stream-stream $joinType join in Append mode",
+          streamRelation.join(
+            streamRelation.join(streamRelation, joinType = joinType,
+              condition = Some(attributeWithWatermark === attribute)),
+            joinType = joinType2,
+            condition = Some(attributeWithWatermark === attribute)),
+          OutputMode.Append(),
+          expectFailure = expectFailure)
+      }
 
-  // Right outer joins: stream-stream allowed with range condition yielding state value watermark
-  assertSupportedInStreamingPlan(
-    s"right outer join with stream-stream relations and state value watermark", {
-      val leftTimeWithWatermark =
-        AttributeReference("b", IntegerType)().withMetadata(watermarkMetadata)
-      val leftRelation = new TestStreamingRelation(leftTimeWithWatermark)
-      val rightRelation = streamRelation
-      leftRelation.join(
-        rightRelation,
-        joinType = RightOuter,
-        condition = Some(leftTimeWithWatermark + 10 < attribute))
-    },
-    OutputMode.Append())
+      testGlobalWatermarkLimit(
+        s"FlatMapGroupsWithState after stream-stream $joinType join in Append mode",
+        FlatMapGroupsWithState(
+          null, att, att, Seq(att), Seq(att), att, null, Append,
+          isMapGroupsWithState = false, null,
+          streamRelation.join(streamRelation, joinType = joinType,
+            condition = Some(attributeWithWatermark === attribute))),
+        OutputMode.Append(),
+        expectFailure = expectFailure)
 
-  // Right outer joins: stream-stream not allowed with insufficient range condition
-  assertNotSupportedInStreamingPlan(
-    s"right outer join with stream-stream relations and state value watermark", {
-      val leftTimeWithWatermark =
-        AttributeReference("b", IntegerType)().withMetadata(watermarkMetadata)
-      val leftRelation = new TestStreamingRelation(leftTimeWithWatermark)
-      val rightRelation = streamRelation
-      leftRelation.join(
-        rightRelation,
-        joinType = RightOuter,
-        condition = Some(leftTimeWithWatermark + 10 > attribute))
-    },
-    OutputMode.Append(),
-    Seq("appropriate range condition"))
+      testGlobalWatermarkLimit(
+        s"deduplicate after stream-stream $joinType join in Append mode",
+        Deduplicate(Seq(attribute), streamRelation.join(streamRelation, joinType = joinType,
+          condition = Some(attributeWithWatermark === attribute))),
+        OutputMode.Append(),
+        expectFailure = expectFailure)
+  }
 
   // Cogroup: only batch-batch is allowed
   testBinaryOperationInStreamingPlan(
@@ -744,53 +668,6 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
       OutputMode.Append())
   }
 
-  // stream-stream join
-  // stream-stream inner join doesn't emit late rows, whereas outer joins could
-  Seq((Inner, false), (LeftOuter, true), (RightOuter, true)).map { case (joinType, expectFailure) =>
-    assertPassOnGlobalWatermarkLimit(
-      s"single $joinType join in Append mode",
-      streamRelation.join(streamRelation, joinType = RightOuter,
-        condition = Some(attributeWithWatermark === attribute)),
-      OutputMode.Append())
-
-    testGlobalWatermarkLimit(
-      s"streaming aggregation after stream-stream $joinType join in Append mode",
-      streamRelation.join(streamRelation, joinType = joinType,
-        condition = Some(attributeWithWatermark === attribute))
-        .groupBy("a")(count("*")),
-      OutputMode.Append(),
-      expectFailure = expectFailure)
-
-    Seq(Inner, LeftOuter, RightOuter).map { joinType2 =>
-      testGlobalWatermarkLimit(
-        s"streaming-stream $joinType2 after stream-stream $joinType join in Append mode",
-        streamRelation.join(
-          streamRelation.join(streamRelation, joinType = joinType,
-            condition = Some(attributeWithWatermark === attribute)),
-          joinType = joinType2,
-          condition = Some(attributeWithWatermark === attribute)),
-        OutputMode.Append(),
-        expectFailure = expectFailure)
-    }
-
-    testGlobalWatermarkLimit(
-      s"FlatMapGroupsWithState after stream-stream $joinType join in Append mode",
-      FlatMapGroupsWithState(
-        null, att, att, Seq(att), Seq(att), att, null, Append,
-        isMapGroupsWithState = false, null,
-        streamRelation.join(streamRelation, joinType = joinType,
-          condition = Some(attributeWithWatermark === attribute))),
-      OutputMode.Append(),
-      expectFailure = expectFailure)
-
-    testGlobalWatermarkLimit(
-      s"deduplicate after stream-stream $joinType join in Append mode",
-      Deduplicate(Seq(attribute), streamRelation.join(streamRelation, joinType = joinType,
-        condition = Some(attributeWithWatermark === attribute))),
-      OutputMode.Append(),
-      expectFailure = expectFailure)
-  }
-
   // FlatMapGroupsWithState
   {
     assertPassOnGlobalWatermarkLimit(

From e2c7bfce40c309299f99c50c191c178180d756e5 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Tue, 17 Nov 2020 14:15:31 +0900
Subject: [PATCH 0493/1009] [SPARK-33407][PYTHON] Simplify the exception
 message from Python UDFs (disabled by default)

### What changes were proposed in this pull request?

This PR proposes to simplify the exception messages from Python UDFS.

Currently, the exception message from Python UDFs is as below:

```python
from pyspark.sql.functions import udf; spark.range(10).select(udf(lambda x: x/0)("id")).collect()
```

```python
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/.../python/pyspark/sql/dataframe.py", line 427, in show
    print(self._jdf.showString(n, 20, vertical))
  File "/.../python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1305, in __call__
  File "/.../python/pyspark/sql/utils.py", line 127, in deco
    raise_from(converted)
  File "<string>", line 3, in raise_from
pyspark.sql.utils.PythonException:
  An exception was thrown from Python worker in the executor:
Traceback (most recent call last):
  File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 605, in main
    process()
  File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 597, in process
    serializer.dump_stream(out_iter, outfile)
  File "/.../python/lib/pyspark.zip/pyspark/serializers.py", line 223, in dump_stream
    self.serializer.dump_stream(self._batched(iterator), stream)
  File "/.../python/lib/pyspark.zip/pyspark/serializers.py", line 141, in dump_stream
    for obj in iterator:
  File "/.../python/lib/pyspark.zip/pyspark/serializers.py", line 212, in _batched
    for item in iterator:
  File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 450, in mapper
    result = tuple(f(*[a[o] for o in arg_offsets]) for (arg_offsets, f) in udfs)
  File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 450, in <genexpr>
    result = tuple(f(*[a[o] for o in arg_offsets]) for (arg_offsets, f) in udfs)
  File "/.../python/lib/pyspark.zip/pyspark/worker.py", line 90, in <lambda>
    return lambda *a: f(*a)
  File "/.../python/lib/pyspark.zip/pyspark/util.py", line 107, in wrapper
    return f(*args, **kwargs)
  File "<stdin>", line 1, in <lambda>
ZeroDivisionError: division by zero
```

Actually, almost all cases, users only care about `ZeroDivisionError: division by zero`. We don't really have to show the internal stuff in 99% cases.

This PR adds a configuration `spark.sql.execution.pyspark.udf.simplifiedException.enabled` (disabled by default) that hides the internal tracebacks related to Python worker, (de)serialization, etc.

```python
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/.../python/pyspark/sql/dataframe.py", line 427, in show
    print(self._jdf.showString(n, 20, vertical))
  File "/.../python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", line 1305, in __call__
  File "/.../python/pyspark/sql/utils.py", line 127, in deco
    raise_from(converted)
  File "<string>", line 3, in raise_from
pyspark.sql.utils.PythonException:
  An exception was thrown from Python worker in the executor:
Traceback (most recent call last):
  File "<stdin>", line 1, in <lambda>
ZeroDivisionError: division by zero
```

The trackback will be shown from the point when any non-PySpark file is seen in the traceback.

### Why are the changes needed?

Without this configuration. such internal tracebacks are exposed to users directly especially for shall or notebook users in PySpark. 99% cases people don't care about the internal Python worker, (de)serialization and related tracebacks. It just makes the exception more difficult to read. For example, one statement of `x/0` above shows a very long traceback and most of them are unnecessary.

This configuration enables the ability to show simplified tracebacks which users will likely be most interested in.

### Does this PR introduce _any_ user-facing change?

By default, no. It adds one configuration that simplifies the exception message. See the example above.

### How was this patch tested?

Manually tested:

```bash
$ pyspark --conf spark.sql.execution.pyspark.udf.simplifiedException.enabled=true
```
```python
from pyspark.sql.functions import udf; spark.sparkContext.setLogLevel("FATAL"); spark.range(10).select(udf(lambda x: x/0)("id")).collect()
```

and unittests were also added.

Closes #30309 from HyukjinKwon/SPARK-33407.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../spark/api/python/PythonRunner.scala       |   4 +
 python/pyspark/util.py                        | 152 +++++++++++++++++-
 python/pyspark/worker.py                      |  22 +--
 .../apache/spark/sql/internal/SQLConf.scala   |  12 ++
 .../execution/python/ArrowPythonRunner.scala  |   2 +
 .../python/CoGroupedArrowPythonRunner.scala   |   3 +
 .../execution/python/PythonUDFRunner.scala    |   3 +
 7 files changed, 184 insertions(+), 14 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
index d7a09b599794e..cb4eabefec32f 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
@@ -86,6 +86,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
   private val conf = SparkEnv.get.conf
   protected val bufferSize: Int = conf.get(BUFFER_SIZE)
   private val reuseWorker = conf.get(PYTHON_WORKER_REUSE)
+  protected val simplifiedTraceback: Boolean = false
 
   // All the Python functions should have the same exec, version and envvars.
   protected val envVars: java.util.Map[String, String] = funcs.head.funcs.head.envVars
@@ -133,6 +134,9 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
     if (reuseWorker) {
       envVars.put("SPARK_REUSE_WORKER", "1")
     }
+    if (simplifiedTraceback) {
+      envVars.put("SPARK_SIMPLIFIED_TRACEBACK", "1")
+    }
     // SPARK-30299 this could be wrong with standalone mode when executor
     // cores might not be correct because it defaults to all cores on the box.
     val execCores = execCoresProp.map(_.toInt).getOrElse(conf.get(EXECUTOR_CORES))
diff --git a/python/pyspark/util.py b/python/pyspark/util.py
index 275a72b37be97..09c5963927456 100644
--- a/python/pyspark/util.py
+++ b/python/pyspark/util.py
@@ -16,10 +16,14 @@
 # limitations under the License.
 #
 
-import threading
+import itertools
+import os
+import platform
 import re
 import sys
+import threading
 import traceback
+import types
 
 from py4j.clientserver import ClientServer
 
@@ -76,6 +80,144 @@ def wrapper(*args, **kwargs):
     return wrapper
 
 
+def walk_tb(tb):
+    while tb is not None:
+        yield tb
+        tb = tb.tb_next
+
+
+def try_simplify_traceback(tb):
+    """
+    Simplify the traceback. It removes the tracebacks in the current package, and only
+    shows the traceback that is related to the thirdparty and user-specified codes.
+
+    Returns
+    -------
+    TracebackType or None
+      Simplified traceback instance. It returns None if it fails to simplify.
+
+    Notes
+    -----
+    This keeps the tracebacks once it sees they are from a different file even
+    though the following tracebacks are from the current package.
+
+    Examples
+    --------
+    >>> import importlib
+    >>> import sys
+    >>> import traceback
+    >>> import tempfile
+    >>> with tempfile.TemporaryDirectory() as tmp_dir:
+    ...     with open("%s/dummy_module.py" % tmp_dir, "w") as f:
+    ...         _ = f.write(
+    ...             'def raise_stop_iteration():\\n'
+    ...             '    raise StopIteration()\\n\\n'
+    ...             'def simple_wrapper(f):\\n'
+    ...             '    def wrapper(*a, **k):\\n'
+    ...             '        return f(*a, **k)\\n'
+    ...             '    return wrapper\\n')
+    ...         f.flush()
+    ...         spec = importlib.util.spec_from_file_location(
+    ...             "dummy_module", "%s/dummy_module.py" % tmp_dir)
+    ...         dummy_module = importlib.util.module_from_spec(spec)
+    ...         spec.loader.exec_module(dummy_module)
+    >>> def skip_doctest_traceback(tb):
+    ...     import pyspark
+    ...     root = os.path.dirname(pyspark.__file__)
+    ...     pairs = zip(walk_tb(tb), traceback.extract_tb(tb))
+    ...     for cur_tb, cur_frame in pairs:
+    ...         if cur_frame.filename.startswith(root):
+    ...             return cur_tb
+
+    Regular exceptions should show the file name of the current package as below.
+
+    >>> exc_info = None
+    >>> try:
+    ...     fail_on_stopiteration(dummy_module.raise_stop_iteration)()
+    ... except Exception as e:
+    ...     tb = sys.exc_info()[-1]
+    ...     e.__cause__ = None
+    ...     exc_info = "".join(
+    ...         traceback.format_exception(type(e), e, tb))
+    >>> print(exc_info)  # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
+    Traceback (most recent call last):
+      File ...
+        ...
+      File "/.../pyspark/util.py", line ...
+        ...
+    RuntimeError: ...
+    >>> "pyspark/util.py" in exc_info
+    True
+
+    If the traceback is simplified with this method, it hides the current package file name:
+
+    >>> exc_info = None
+    >>> try:
+    ...     fail_on_stopiteration(dummy_module.raise_stop_iteration)()
+    ... except Exception as e:
+    ...     tb = try_simplify_traceback(sys.exc_info()[-1])
+    ...     e.__cause__ = None
+    ...     exc_info = "".join(
+    ...         traceback.format_exception(
+    ...             type(e), e, try_simplify_traceback(skip_doctest_traceback(tb))))
+    >>> print(exc_info)  # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
+    RuntimeError: ...
+    >>> "pyspark/util.py" in exc_info
+    False
+
+    In the case below, the traceback contains the current package in the middle.
+    In this case, it just hides the top occurrence only.
+
+    >>> exc_info = None
+    >>> try:
+    ...     fail_on_stopiteration(dummy_module.simple_wrapper(
+    ...         fail_on_stopiteration(dummy_module.raise_stop_iteration)))()
+    ... except Exception as e:
+    ...     tb = sys.exc_info()[-1]
+    ...     e.__cause__ = None
+    ...     exc_info_a = "".join(
+    ...         traceback.format_exception(type(e), e, tb))
+    ...     exc_info_b = "".join(
+    ...         traceback.format_exception(
+    ...             type(e), e, try_simplify_traceback(skip_doctest_traceback(tb))))
+    >>> exc_info_a.count("pyspark/util.py")
+    2
+    >>> exc_info_b.count("pyspark/util.py")
+    1
+    """
+    if "pypy" in platform.python_implementation().lower():
+        # Traceback modification is not supported with PyPy in PySpark.
+        return None
+    if sys.version_info[:2] < (3, 7):
+        # Traceback creation is not supported Python < 3.7.
+        # See https://bugs.python.org/issue30579.
+        return None
+
+    import pyspark
+
+    root = os.path.dirname(pyspark.__file__)
+    tb_next = None
+    new_tb = None
+    pairs = zip(walk_tb(tb), traceback.extract_tb(tb))
+    last_seen = []
+
+    for cur_tb, cur_frame in pairs:
+        if not cur_frame.filename.startswith(root):
+            # Filter the stacktrace from the PySpark source itself.
+            last_seen = [(cur_tb, cur_frame)]
+            break
+
+    for cur_tb, cur_frame in reversed(list(itertools.chain(last_seen, pairs))):
+        # Once we have seen the file names outside, don't skip.
+        new_tb = types.TracebackType(
+            tb_next=tb_next,
+            tb_frame=cur_tb.tb_frame,
+            tb_lasti=cur_tb.tb_frame.f_lasti,
+            tb_lineno=cur_tb.tb_frame.f_lineno)
+        tb_next = new_tb
+    return new_tb
+
+
 def _print_missing_jar(lib_name, pkg_name, jar_name, spark_version):
     print("""
 ________________________________________________________________________________________________
@@ -183,6 +325,8 @@ def __del__(self):
 
 if __name__ == "__main__":
     import doctest
-    (failure_count, test_count) = doctest.testmod()
-    if failure_count:
-        sys.exit(-1)
+
+    if "pypy" not in platform.python_implementation().lower() and sys.version_info[:2] >= (3, 7):
+        (failure_count, test_count) = doctest.testmod()
+        if failure_count:
+            sys.exit(-1)
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 704e96ba0666b..1b09d327a5dfe 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -44,7 +44,7 @@
 from pyspark.sql.pandas.serializers import ArrowStreamPandasUDFSerializer, CogroupUDFSerializer
 from pyspark.sql.pandas.types import to_arrow_type
 from pyspark.sql.types import StructType
-from pyspark.util import fail_on_stopiteration
+from pyspark.util import fail_on_stopiteration, try_simplify_traceback
 from pyspark import shuffle
 
 pickleSer = PickleSerializer()
@@ -607,17 +607,19 @@ def process():
         # reuse.
         TaskContext._setTaskContext(None)
         BarrierTaskContext._setTaskContext(None)
-    except BaseException:
+    except BaseException as e:
         try:
-            exc_info = traceback.format_exc()
-            if isinstance(exc_info, bytes):
-                # exc_info may contains other encoding bytes, replace the invalid bytes and convert
-                # it back to utf-8 again
-                exc_info = exc_info.decode("utf-8", "replace").encode("utf-8")
-            else:
-                exc_info = exc_info.encode("utf-8")
+            exc_info = None
+            if os.environ.get("SPARK_SIMPLIFIED_TRACEBACK", False):
+                tb = try_simplify_traceback(sys.exc_info()[-1])
+                if tb is not None:
+                    e.__cause__ = None
+                    exc_info = "".join(traceback.format_exception(type(e), e, tb))
+            if exc_info is None:
+                exc_info = traceback.format_exc()
+
             write_int(SpecialLengths.PYTHON_EXCEPTION_THROWN, outfile)
-            write_with_length(exc_info, outfile)
+            write_with_length(exc_info.encode("utf-8"), outfile)
         except IOError:
             # JVM close the socket
             pass
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index f2e309013a5b6..4c3ec67d66951 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1942,6 +1942,16 @@ object SQLConf {
       .version("3.0.0")
       .fallbackConf(BUFFER_SIZE)
 
+  val PYSPARK_SIMPLIFIEID_TRACEBACK =
+    buildConf("spark.sql.execution.pyspark.udf.simplifiedTraceback.enabled")
+      .doc(
+        "When true, the traceback from Python UDFs is simplified. It hides " +
+        "the Python worker, (de)serialization, etc from PySpark in tracebacks, and only " +
+        "shows the exception messages from UDFs. Note that this works only with CPython 3.7+.")
+      .version("3.1.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val PANDAS_GROUPED_MAP_ASSIGN_COLUMNS_BY_NAME =
     buildConf("spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName")
       .internal()
@@ -3405,6 +3415,8 @@ class SQLConf extends Serializable with Logging {
 
   def pandasUDFBufferSize: Int = getConf(PANDAS_UDF_BUFFER_SIZE)
 
+  def pysparkSimplifiedTraceback: Boolean = getConf(PYSPARK_SIMPLIFIEID_TRACEBACK)
+
   def pandasGroupedMapAssignColumnsByName: Boolean =
     getConf(SQLConf.PANDAS_GROUPED_MAP_ASSIGN_COLUMNS_BY_NAME)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala
index b44b13c8de0da..7171c7f7f9746 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala
@@ -46,6 +46,8 @@ class ArrowPythonRunner(
   extends BasePythonRunner[Iterator[InternalRow], ColumnarBatch](funcs, evalType, argOffsets)
   with PythonArrowOutput {
 
+  override val simplifiedTraceback: Boolean = SQLConf.get.pysparkSimplifiedTraceback
+
   override val bufferSize: Int = SQLConf.get.pandasUDFBufferSize
   require(
     bufferSize >= 4,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/CoGroupedArrowPythonRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/CoGroupedArrowPythonRunner.scala
index 25ce16db264ac..e3d8a943d8cf2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/CoGroupedArrowPythonRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/CoGroupedArrowPythonRunner.scala
@@ -27,6 +27,7 @@ import org.apache.spark.{SparkEnv, TaskContext}
 import org.apache.spark.api.python.{BasePythonRunner, ChainedPythonFunctions, PythonRDD}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.arrow.ArrowWriter
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.ArrowUtils
 import org.apache.spark.sql.vectorized.ColumnarBatch
@@ -49,6 +50,8 @@ class CoGroupedArrowPythonRunner(
     (Iterator[InternalRow], Iterator[InternalRow]), ColumnarBatch](funcs, evalType, argOffsets)
   with PythonArrowOutput {
 
+  override val simplifiedTraceback: Boolean = SQLConf.get.pysparkSimplifiedTraceback
+
   protected def newWriterThread(
       env: SparkEnv,
       worker: Socket,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDFRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDFRunner.scala
index d341d7019f0ac..d9fe07214d061 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDFRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDFRunner.scala
@@ -23,6 +23,7 @@ import java.util.concurrent.atomic.AtomicBoolean
 
 import org.apache.spark._
 import org.apache.spark.api.python._
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * A helper class to run Python UDFs in Spark.
@@ -34,6 +35,8 @@ class PythonUDFRunner(
   extends BasePythonRunner[Array[Byte], Array[Byte]](
     funcs, evalType, argOffsets) {
 
+  override val simplifiedTraceback: Boolean = SQLConf.get.pysparkSimplifiedTraceback
+
   protected override def newWriterThread(
       env: SparkEnv,
       worker: Socket,

From 09bb9bedcd27e08b86d63a6aed90d42ca4c606be Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Tue, 17 Nov 2020 13:47:01 +0000
Subject: [PATCH 0494/1009] [SPARK-33416][SQL] Avoid Hive metastore stack
 overflow when InSet predicate have many values

### What changes were proposed in this pull request?

We [rewrite](https://github.com/apache/spark/blob/5197c5d2e7648d75def3e159e0d2aa3e20117105/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala#L722-L724) `In`/`InSet` predicate to `or` expressions when pruning Hive partitions. That will cause Hive metastore stack over flow if there are a lot of values.

This pr rewrite `InSet` predicate to `GreaterThanOrEqual` min value and `LessThanOrEqual ` max value when pruning Hive partitions to avoid Hive metastore stack overflow.

From our experience, `spark.sql.hive.metastorePartitionPruningInSetThreshold` should be less than 10000.

### Why are the changes needed?

Avoid Hive metastore stack overflow when `InSet` predicate have many values.
Especially DPP, it may generate many values.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manual test.

Closes #30325 from wangyum/SPARK-33416.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/internal/SQLConf.scala   | 15 ++++++++++
 .../spark/sql/hive/client/HiveShim.scala      | 10 ++++++-
 .../spark/sql/hive/client/FiltersSuite.scala  | 28 +++++++++++++++++++
 3 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 4c3ec67d66951..4c1e889352672 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -815,6 +815,18 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  val HIVE_METASTORE_PARTITION_PRUNING_INSET_THRESHOLD =
+    buildConf("spark.sql.hive.metastorePartitionPruningInSetThreshold")
+      .doc("The threshold of set size for InSet predicate when pruning partitions through Hive " +
+        "Metastore. When the set size exceeds the threshold, we rewrite the InSet predicate " +
+        "to be greater than or equal to the minimum value in set and less than or equal to the " +
+        "maximum value in set. Larger values may cause Hive Metastore stack overflow.")
+      .version("3.1.0")
+      .internal()
+      .intConf
+      .checkValue(_ > 0, "The value of metastorePartitionPruningInSetThreshold must be positive")
+      .createWithDefault(1000)
+
   val HIVE_MANAGE_FILESOURCE_PARTITIONS =
     buildConf("spark.sql.hive.manageFilesourcePartitions")
       .doc("When true, enable metastore partition management for file source tables as well. " +
@@ -3152,6 +3164,9 @@ class SQLConf extends Serializable with Logging {
 
   def metastorePartitionPruning: Boolean = getConf(HIVE_METASTORE_PARTITION_PRUNING)
 
+  def metastorePartitionPruningInSetThreshold: Int =
+    getConf(HIVE_METASTORE_PARTITION_PRUNING_INSET_THRESHOLD)
+
   def manageFilesourcePartitions: Boolean = getConf(HIVE_MANAGE_FILESOURCE_PARTITIONS)
 
   def filesourcePartitionFileCacheSize: Long = getConf(HIVE_FILESOURCE_PARTITION_FILE_CACHE_SIZE)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index bf67ae6bfe92e..a5417b2c5ff6d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -46,7 +46,8 @@ import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.analysis.NoSuchPermanentFunctionException
 import org.apache.spark.sql.catalyst.catalog.{CatalogFunction, CatalogTablePartition, CatalogUtils, FunctionResource, FunctionResourceType}
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.catalyst.util.TypeUtils
+import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 import org.apache.spark.sql.types.{AtomicType, IntegralType, StringType}
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.Utils
@@ -724,6 +725,7 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
     }
 
     val useAdvanced = SQLConf.get.advancedPartitionPredicatePushdownEnabled
+    val inSetThreshold = SQLConf.get.metastorePartitionPruningInSetThreshold
 
     object ExtractAttribute {
       def unapply(expr: Expression): Option[Attribute] = {
@@ -741,6 +743,12 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
           if useAdvanced =>
         Some(convertInToOr(name, values))
 
+      case InSet(child, values) if useAdvanced && values.size > inSetThreshold =>
+        val dataType = child.dataType
+        val sortedValues = values.toSeq.sorted(TypeUtils.getInterpretedOrdering(dataType))
+        convert(And(GreaterThanOrEqual(child, Literal(sortedValues.head, dataType)),
+          LessThanOrEqual(child, Literal(sortedValues.last, dataType))))
+
       case InSet(ExtractAttribute(SupportedAttribute(name)), ExtractableValues(values))
           if useAdvanced =>
         Some(convertInToOr(name, values))
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
index 2a4efd0cce6e0..12b409e487061 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
@@ -114,5 +114,33 @@ class FiltersSuite extends SparkFunSuite with Logging with PlanTest {
     }
   }
 
+  test("SPARK-33416: Avoid Hive metastore stack overflow when InSet predicate have many values") {
+    def checkConverted(inSet: InSet, result: String): Unit = {
+      assert(shim.convertFilters(testTable, inSet :: Nil) == result)
+    }
+
+    withSQLConf(SQLConf.HIVE_METASTORE_PARTITION_PRUNING_INSET_THRESHOLD.key -> "15") {
+      checkConverted(
+        InSet(a("intcol", IntegerType),
+          Range(1, 20).map(s => Literal(s).eval(EmptyRow)).toSet),
+        "(intcol >= 1 and intcol <= 19)")
+
+      checkConverted(
+        InSet(a("stringcol", StringType),
+          Range(1, 20).map(s => Literal(s.toString).eval(EmptyRow)).toSet),
+        "(stringcol >= \"1\" and stringcol <= \"9\")")
+
+      checkConverted(
+        InSet(a("intcol", IntegerType).cast(LongType),
+          Range(1, 20).map(s => Literal(s.toLong).eval(EmptyRow)).toSet),
+        "(intcol >= 1 and intcol <= 19)")
+
+      checkConverted(
+        InSet(a("doublecol", DoubleType),
+          Range(1, 20).map(s => Literal(s.toDouble).eval(EmptyRow)).toSet),
+        "")
+    }
+  }
+
   private def a(name: String, dataType: DataType) = AttributeReference(name, dataType)()
 }

From 928348408eac59eaa0c8ad6412d36ea8e8bea83f Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Tue, 17 Nov 2020 14:29:37 +0000
Subject: [PATCH 0495/1009] [SPARK-33427][SQL] Add subexpression elimination
 for interpreted expression evaluation

### What changes were proposed in this pull request?

This patch proposes to add subexpression elimination for interpreted expression evaluation. Interpreted expression evaluation is used when codegen was not able to work, for example complex schema.

### Why are the changes needed?

Currently we only do subexpression elimination for codegen. For some reasons, we may need to run interpreted expression evaluation. For example, codegen fails to compile and fallbacks to interpreted mode, or complex input/output schema of expressions. It is commonly seen for complex schema from expressions that is possibly caused by the query optimizer too, e.g. SPARK-32945.

We should also support subexpression elimination for interpreted evaluation. That could reduce performance difference when Spark fallbacks from codegen to interpreted expression evaluation, and improve Spark usability.

#### Benchmark

Update `SubExprEliminationBenchmark`:

Before:

```
OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.6
 Intel(R) Core(TM) i7-9750H CPU  2.60GHz
 from_json as subExpr:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
subexpressionElimination on, codegen off           24707          25688         903          0.0   247068775.9       1.0X
```

After:
```
OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.6
 Intel(R) Core(TM) i7-9750H CPU  2.60GHz
 from_json as subExpr:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
subexpressionElimination on, codegen off            2360           2435          87          0.0    23604320.7      11.2X
```

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Unit test. Benchmark manually.

Closes #30341 from viirya/SPARK-33427.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../InterpretedUnsafeProjection.scala         |  18 ++-
 .../SubExprEvaluationRuntime.scala            | 145 ++++++++++++++++++
 .../apache/spark/sql/internal/SQLConf.scala   |  12 ++
 .../SubExprEvaluationRuntimeSuite.scala       | 100 ++++++++++++
 ...ExprEliminationBenchmark-jdk11-results.txt |   8 +-
 .../SubExprEliminationBenchmark-results.txt   |   8 +-
 6 files changed, 281 insertions(+), 10 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntime.scala
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala
index 39a16e917c4a5..f3ca4f06cd372 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala
@@ -20,6 +20,7 @@ import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen.{UnsafeArrayWriter, UnsafeRowWriter, UnsafeWriter}
 import org.apache.spark.sql.catalyst.util.ArrayData
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{UserDefinedType, _}
 import org.apache.spark.unsafe.Platform
 
@@ -33,6 +34,15 @@ import org.apache.spark.unsafe.Platform
 class InterpretedUnsafeProjection(expressions: Array[Expression]) extends UnsafeProjection {
   import InterpretedUnsafeProjection._
 
+  private[this] val subExprEliminationEnabled = SQLConf.get.subexpressionEliminationEnabled
+  private[this] lazy val runtime =
+    new SubExprEvaluationRuntime(SQLConf.get.subexpressionEliminationCacheMaxEntries)
+  private[this] val exprs = if (subExprEliminationEnabled) {
+    runtime.proxyExpressions(expressions)
+  } else {
+    expressions.toSeq
+  }
+
   /** Number of (top level) fields in the resulting row. */
   private[this] val numFields = expressions.length
 
@@ -63,17 +73,21 @@ class InterpretedUnsafeProjection(expressions: Array[Expression]) extends Unsafe
   }
 
   override def initialize(partitionIndex: Int): Unit = {
-    expressions.foreach(_.foreach {
+    exprs.foreach(_.foreach {
       case n: Nondeterministic => n.initialize(partitionIndex)
       case _ =>
     })
   }
 
   override def apply(row: InternalRow): UnsafeRow = {
+    if (subExprEliminationEnabled) {
+      runtime.setInput(row)
+    }
+
     // Put the expression results in the intermediate row.
     var i = 0
     while (i < numFields) {
-      values(i) = expressions(i).eval(row)
+      values(i) = exprs(i).eval(row)
       i += 1
     }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntime.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntime.scala
new file mode 100644
index 0000000000000..3189d81289903
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntime.scala
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.expressions
+
+import java.util.IdentityHashMap
+
+import scala.collection.JavaConverters._
+
+import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache}
+import com.google.common.util.concurrent.{ExecutionError, UncheckedExecutionException}
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
+import org.apache.spark.sql.types.DataType
+
+/**
+ * This class helps subexpression elimination for interpreted evaluation
+ * such as `InterpretedUnsafeProjection`. It maintains an evaluation cache.
+ * This class wraps `ExpressionProxy` around given expressions. The `ExpressionProxy`
+ * intercepts expression evaluation and loads from the cache first.
+ */
+class SubExprEvaluationRuntime(cacheMaxEntries: Int) {
+  // The id assigned to `ExpressionProxy`. `SubExprEvaluationRuntime` will use assigned ids of
+  // `ExpressionProxy` to decide the equality when loading from cache. `SubExprEvaluationRuntime`
+  // won't be use by multi-threads so we don't need to consider concurrency here.
+  private var proxyExpressionCurrentId = 0
+
+  private[sql] val cache: LoadingCache[ExpressionProxy, ResultProxy] = CacheBuilder.newBuilder()
+    .maximumSize(cacheMaxEntries)
+    .build(
+      new CacheLoader[ExpressionProxy, ResultProxy]() {
+        override def load(expr: ExpressionProxy): ResultProxy = {
+          ResultProxy(expr.proxyEval(currentInput))
+        }
+      })
+
+  private var currentInput: InternalRow = null
+
+  def getEval(proxy: ExpressionProxy): Any = try {
+    cache.get(proxy).result
+  } catch {
+    // Cache.get() may wrap the original exception. See the following URL
+    // http://google.github.io/guava/releases/14.0/api/docs/com/google/common/cache/
+    //   Cache.html#get(K,%20java.util.concurrent.Callable)
+    case e @ (_: UncheckedExecutionException | _: ExecutionError) =>
+      throw e.getCause
+  }
+
+  /**
+   * Sets given input row as current row for evaluating expressions. This cleans up the cache
+   * too as new input comes.
+   */
+  def setInput(input: InternalRow = null): Unit = {
+    currentInput = input
+    cache.invalidateAll()
+  }
+
+  /**
+   * Recursively replaces expression with its proxy expression in `proxyMap`.
+   */
+  private def replaceWithProxy(
+      expr: Expression,
+      proxyMap: IdentityHashMap[Expression, ExpressionProxy]): Expression = {
+    if (proxyMap.containsKey(expr)) {
+      proxyMap.get(expr)
+    } else {
+      expr.mapChildren(replaceWithProxy(_, proxyMap))
+    }
+  }
+
+  /**
+   * Finds subexpressions and wraps them with `ExpressionProxy`.
+   */
+  def proxyExpressions(expressions: Seq[Expression]): Seq[Expression] = {
+    val equivalentExpressions: EquivalentExpressions = new EquivalentExpressions
+
+    expressions.foreach(equivalentExpressions.addExprTree(_))
+
+    val proxyMap = new IdentityHashMap[Expression, ExpressionProxy]
+
+    val commonExprs = equivalentExpressions.getAllEquivalentExprs.filter(_.size > 1)
+    commonExprs.foreach { e =>
+      val expr = e.head
+      val proxy = ExpressionProxy(expr, proxyExpressionCurrentId, this)
+      proxyExpressionCurrentId += 1
+
+      proxyMap.putAll(e.map(_ -> proxy).toMap.asJava)
+    }
+
+    // Only adding proxy if we find subexpressions.
+    if (!proxyMap.isEmpty) {
+      expressions.map(replaceWithProxy(_, proxyMap))
+    } else {
+      expressions
+    }
+  }
+}
+
+/**
+ * A proxy for an catalyst `Expression`. Given a runtime object `SubExprEvaluationRuntime`,
+ * when this is asked to evaluate, it will load from the evaluation cache in the runtime first.
+ */
+case class ExpressionProxy(
+    child: Expression,
+    id: Int,
+    runtime: SubExprEvaluationRuntime) extends Expression {
+
+  final override def dataType: DataType = child.dataType
+  final override def nullable: Boolean = child.nullable
+  final override def children: Seq[Expression] = child :: Nil
+
+  // `ExpressionProxy` is for interpreted expression evaluation only. So cannot `doGenCode`.
+  final override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
+    throw new UnsupportedOperationException(s"Cannot generate code for expression: $this")
+
+  def proxyEval(input: InternalRow = null): Any = child.eval(input)
+
+  override def eval(input: InternalRow = null): Any = runtime.getEval(this)
+
+  override def equals(obj: Any): Boolean = obj match {
+    case other: ExpressionProxy => this.id == other.id
+    case _ => false
+  }
+
+  override def hashCode(): Int = this.id.hashCode()
+}
+
+/**
+ * A simple wrapper for holding `Any` in the cache of `SubExprEvaluationRuntime`.
+ */
+case class ResultProxy(result: Any)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 4c1e889352672..6f79d1a91c814 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -539,6 +539,15 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  val SUBEXPRESSION_ELIMINATION_CACHE_MAX_ENTRIES =
+    buildConf("spark.sql.subexpressionElimination.cache.maxEntries")
+      .internal()
+      .doc("The maximum entries of the cache used for interpreted subexpression elimination.")
+      .version("3.1.0")
+      .intConf
+      .checkValue(_ >= 0, "The maximum must not be negative")
+      .createWithDefault(100)
+
   val CASE_SENSITIVE = buildConf("spark.sql.caseSensitive")
     .internal()
     .doc("Whether the query analyzer should be case sensitive or not. " +
@@ -3258,6 +3267,9 @@ class SQLConf extends Serializable with Logging {
   def subexpressionEliminationEnabled: Boolean =
     getConf(SUBEXPRESSION_ELIMINATION_ENABLED)
 
+  def subexpressionEliminationCacheMaxEntries: Int =
+    getConf(SUBEXPRESSION_ELIMINATION_CACHE_MAX_ENTRIES)
+
   def autoBroadcastJoinThreshold: Long = getConf(AUTO_BROADCASTJOIN_THRESHOLD)
 
   def limitScaleUpFactor: Int = getConf(LIMIT_SCALE_UP_FACTOR)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala
new file mode 100644
index 0000000000000..badcd4fc3fdad
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.types.IntegerType
+
+class SubExprEvaluationRuntimeSuite extends SparkFunSuite {
+
+  test("Evaluate ExpressionProxy should create cached result") {
+    val runtime = new SubExprEvaluationRuntime(1)
+    val proxy = ExpressionProxy(Literal(1), 0, runtime)
+    assert(runtime.cache.size() == 0)
+    proxy.eval()
+    assert(runtime.cache.size() == 1)
+    assert(runtime.cache.get(proxy) == ResultProxy(1))
+  }
+
+  test("SubExprEvaluationRuntime cannot exceed configured max entries") {
+    val runtime = new SubExprEvaluationRuntime(2)
+    assert(runtime.cache.size() == 0)
+
+    val proxy1 = ExpressionProxy(Literal(1), 0, runtime)
+    proxy1.eval()
+    assert(runtime.cache.size() == 1)
+    assert(runtime.cache.get(proxy1) == ResultProxy(1))
+
+    val proxy2 = ExpressionProxy(Literal(2), 1, runtime)
+    proxy2.eval()
+    assert(runtime.cache.size() == 2)
+    assert(runtime.cache.get(proxy2) == ResultProxy(2))
+
+    val proxy3 = ExpressionProxy(Literal(3), 2, runtime)
+    proxy3.eval()
+    assert(runtime.cache.size() == 2)
+    assert(runtime.cache.get(proxy3) == ResultProxy(3))
+  }
+
+  test("setInput should empty cached result") {
+    val runtime = new SubExprEvaluationRuntime(2)
+    val proxy1 = ExpressionProxy(Literal(1), 0, runtime)
+    assert(runtime.cache.size() == 0)
+    proxy1.eval()
+    assert(runtime.cache.size() == 1)
+    assert(runtime.cache.get(proxy1) == ResultProxy(1))
+
+    val proxy2 = ExpressionProxy(Literal(2), 1, runtime)
+    proxy2.eval()
+    assert(runtime.cache.size() == 2)
+    assert(runtime.cache.get(proxy2) == ResultProxy(2))
+
+    runtime.setInput()
+    assert(runtime.cache.size() == 0)
+  }
+
+  test("Wrap ExpressionProxy on subexpressions") {
+    val runtime = new SubExprEvaluationRuntime(1)
+
+    val one = Literal(1)
+    val two = Literal(2)
+    val mul = Multiply(one, two)
+    val mul2 = Multiply(mul, mul)
+    val sqrt = Sqrt(mul2)
+    val sum = Add(mul2, sqrt)
+
+    //  ( (one * two) * (one * two) ) + sqrt( (one * two) * (one * two) )
+    val proxyExpressions = runtime.proxyExpressions(Seq(sum))
+    val proxys = proxyExpressions.flatMap(_.collect {
+      case p: ExpressionProxy => p
+    })
+    // ( (one * two) * (one * two) )
+    assert(proxys.size == 2)
+    val expected = ExpressionProxy(mul2, 0, runtime)
+    assert(proxys.forall(_ == expected))
+  }
+
+  test("ExpressionProxy won't be on non deterministic") {
+    val runtime = new SubExprEvaluationRuntime(1)
+
+    val sum = Add(Rand(0), Rand(0))
+    val proxys = runtime.proxyExpressions(Seq(sum, sum)).flatMap(_.collect {
+      case p: ExpressionProxy => p
+    })
+    assert(proxys.isEmpty)
+  }
+}
diff --git a/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt b/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt
index 49dc7adccbf3c..3d2b2e5c8edba 100644
--- a/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt
@@ -7,9 +7,9 @@ OpenJDK 64-Bit Server VM 11.0.9+11 on Mac OS X 10.15.6
 Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
 from_json as subExpr:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-subexpressionElimination off, codegen on           26809          27731         898          0.0   268094225.4       1.0X
-subexpressionElimination off, codegen off          25117          26612        1357          0.0   251166638.4       1.1X
-subexpressionElimination on, codegen on             2582           2906         282          0.0    25819408.7      10.4X
-subexpressionElimination on, codegen off           25635          26131         804          0.0   256346873.1       1.0X
+subexpressionElimination off, codegen on           25932          26908         916          0.0   259320042.3       1.0X
+subexpressionElimination off, codegen off          26085          26159          65          0.0   260848905.0       1.0X
+subexpressionElimination on, codegen on             2860           2939          72          0.0    28603312.9       9.1X
+subexpressionElimination on, codegen off            2517           2617          93          0.0    25165157.7      10.3X
 
 
diff --git a/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt b/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt
index 3f131726bc53d..ca2a9c6497500 100644
--- a/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt
+++ b/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt
@@ -7,9 +7,9 @@ OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.6
 Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
 from_json as subExpr:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-subexpressionElimination off, codegen on           24841          25365         803          0.0   248412787.5       1.0X
-subexpressionElimination off, codegen off          25344          26205         941          0.0   253442656.5       1.0X
-subexpressionElimination on, codegen on             2883           3019         119          0.0    28833086.8       8.6X
-subexpressionElimination on, codegen off           24707          25688         903          0.0   247068775.9       1.0X
+subexpressionElimination off, codegen on           26503          27622        1937          0.0   265033362.4       1.0X
+subexpressionElimination off, codegen off          24920          25376         430          0.0   249196978.2       1.1X
+subexpressionElimination on, codegen on             2421           2466          39          0.0    24213606.1      10.9X
+subexpressionElimination on, codegen off            2360           2435          87          0.0    23604320.7      11.2X
 
 
From 2a8e253cdbfdf11993e41b6e14664df890efffce Mon Sep 17 00:00:00 2001
From: Prashant Sharma <prashsh1@in.ibm.com>
Date: Tue, 17 Nov 2020 08:47:04 -0800
Subject: [PATCH 0496/1009] [SPARK-32222][K8S][TESTS] Add K8s IT for conf
 propagation

### What changes were proposed in this pull request?

Added integration test - which tries to configure a log4j.properties and checks if, it is the one pickup by the driver.

### Why are the changes needed?

Improved test coverage.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

By running integration tests.

Closes #30388 from ScrapCodes/SPARK-32222/k8s-it-spark-conf-propagate.

Authored-by: Prashant Sharma <prashsh1@in.ibm.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../log-config-test-log4j.properties          | 23 +++++++
 .../k8s/integrationtest/KubernetesSuite.scala |  8 +--
 .../SparkConfPropagateSuite.scala             | 62 +++++++++++++++++++
 3 files changed, 89 insertions(+), 4 deletions(-)
 create mode 100644 resource-managers/kubernetes/integration-tests/src/test/resources/log-config-test-log4j.properties
 create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala

diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/log-config-test-log4j.properties b/resource-managers/kubernetes/integration-tests/src/test/resources/log-config-test-log4j.properties
new file mode 100644
index 0000000000000..d3e13d8542ba1
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/resources/log-config-test-log4j.properties
@@ -0,0 +1,23 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This log4j config file is for integration test SparkConfPropagateSuite.
+log4j.rootCategory=DEBUG, console
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c: %m%n
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
index f1d8217e31b71..cc226b341916d 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
@@ -41,10 +41,10 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 
 class KubernetesSuite extends SparkFunSuite
-  with BeforeAndAfterAll with BeforeAndAfter with BasicTestsSuite with SecretsTestsSuite
-  with PythonTestsSuite with ClientModeTestsSuite with PodTemplateSuite with PVTestsSuite
-  with DepsTestsSuite with DecommissionSuite with RTestsSuite with Logging with Eventually
-  with Matchers {
+  with BeforeAndAfterAll with BeforeAndAfter with BasicTestsSuite with SparkConfPropagateSuite
+  with SecretsTestsSuite with PythonTestsSuite with ClientModeTestsSuite with PodTemplateSuite
+  with PVTestsSuite with DepsTestsSuite with DecommissionSuite with RTestsSuite with Logging
+  with Eventually with Matchers {
 
 
   import KubernetesSuite._
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala
new file mode 100644
index 0000000000000..6d15201d19796
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.integrationtest
+
+import java.io.{BufferedWriter, File, FileWriter}
+import java.net.URL
+
+import scala.io.{BufferedSource, Source}
+
+import io.fabric8.kubernetes.api.model._
+
+import org.apache.spark.internal.config
+
+private[spark] trait SparkConfPropagateSuite { k8sSuite: KubernetesSuite =>
+  import KubernetesSuite.{k8sTestTag, SPARK_PI_MAIN_CLASS}
+
+  test("Verify logging configuration is picked from the provided SPARK_CONF_DIR/log4j.properties",
+    k8sTestTag) {
+    val loggingConfigFileName = "log-config-test-log4j.properties"
+    val loggingConfURL: URL = this.getClass.getClassLoader.getResource(loggingConfigFileName)
+    assert(loggingConfURL != null, "Logging configuration file not available.")
+
+    val content = Source.createBufferedSource(loggingConfURL.openStream()).getLines().mkString("\n")
+    val logConfFilePath = s"${sparkHomeDir.toFile}/conf/log4j.properties"
+
+    try {
+      val writer = new BufferedWriter(new FileWriter(logConfFilePath))
+      writer.write(content)
+      writer.close()
+
+      sparkAppConf.set("spark.driver.extraJavaOptions", "-Dlog4j.debug")
+
+      runSparkApplicationAndVerifyCompletion(
+        appResource = containerLocalSparkDistroExamplesJar,
+        mainClass = SPARK_PI_MAIN_CLASS,
+        expectedLogOnCompletion = (Seq("DEBUG",
+          s"log4j: Reading configuration from URL file:/opt/spark/conf/log4j.properties",
+          "Pi is roughly 3")),
+        appArgs = Array.empty[String],
+        driverPodChecker = doBasicDriverPodCheck,
+        executorPodChecker = doBasicExecutorPodCheck,
+        appLocator = appLocator,
+        isJVM = true)
+    } finally {
+      new File(logConfFilePath).delete()
+    }
+  }
+}

From 5e8549973dc8aeb484d06be988b04ac2ed4e7add Mon Sep 17 00:00:00 2001
From: Rameshkrishnan Muthusamy <rameshkrishnan_muthusamy@apple.com>
Date: Tue, 17 Nov 2020 13:41:58 -0800
Subject: [PATCH 0497/1009] [SPARK-33471][K8S][BUILD] Upgrade kubernetes-client
 to 4.12.0

### What changes were proposed in this pull request?

This PR aims to upgrade Kubernetes-client from 4.11.1 to 4.12.0

### Why are the changes needed?

This upgrades the dependency for Apache Spark 3.1.0.

### Does this PR introduce any user-facing change?

No.

### How was this patch tested?

Pass the CIs.

Closes #30401 from ramesh-muthusamy/SPARK-33471-k8s-clientupgrade.

Authored-by: Rameshkrishnan Muthusamy <rameshkrishnan_muthusamy@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/deps/spark-deps-hadoop-2.7-hive-2.3       | 41 +++++++++----------
 dev/deps/spark-deps-hadoop-3.2-hive-2.3       | 41 +++++++++----------
 resource-managers/kubernetes/core/pom.xml     |  2 +-
 .../spark/deploy/k8s/Fabric8Aliases.scala     |  6 +--
 .../kubernetes/integration-tests/pom.xml      |  2 +-
 5 files changed, 45 insertions(+), 47 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index 8c1ab9e3c1cfe..5a7948491173f 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -155,26 +155,26 @@ jsr305/3.0.0//jsr305-3.0.0.jar
 jta/1.1//jta-1.1.jar
 jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar
 kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
-kubernetes-client/4.11.1//kubernetes-client-4.11.1.jar
-kubernetes-model-admissionregistration/4.11.1//kubernetes-model-admissionregistration-4.11.1.jar
-kubernetes-model-apiextensions/4.11.1//kubernetes-model-apiextensions-4.11.1.jar
-kubernetes-model-apps/4.11.1//kubernetes-model-apps-4.11.1.jar
-kubernetes-model-autoscaling/4.11.1//kubernetes-model-autoscaling-4.11.1.jar
-kubernetes-model-batch/4.11.1//kubernetes-model-batch-4.11.1.jar
-kubernetes-model-certificates/4.11.1//kubernetes-model-certificates-4.11.1.jar
-kubernetes-model-common/4.11.1//kubernetes-model-common-4.11.1.jar
-kubernetes-model-coordination/4.11.1//kubernetes-model-coordination-4.11.1.jar
-kubernetes-model-core/4.11.1//kubernetes-model-core-4.11.1.jar
-kubernetes-model-discovery/4.11.1//kubernetes-model-discovery-4.11.1.jar
-kubernetes-model-events/4.11.1//kubernetes-model-events-4.11.1.jar
-kubernetes-model-extensions/4.11.1//kubernetes-model-extensions-4.11.1.jar
-kubernetes-model-metrics/4.11.1//kubernetes-model-metrics-4.11.1.jar
-kubernetes-model-networking/4.11.1//kubernetes-model-networking-4.11.1.jar
-kubernetes-model-policy/4.11.1//kubernetes-model-policy-4.11.1.jar
-kubernetes-model-rbac/4.11.1//kubernetes-model-rbac-4.11.1.jar
-kubernetes-model-scheduling/4.11.1//kubernetes-model-scheduling-4.11.1.jar
-kubernetes-model-settings/4.11.1//kubernetes-model-settings-4.11.1.jar
-kubernetes-model-storageclass/4.11.1//kubernetes-model-storageclass-4.11.1.jar
+kubernetes-client/4.12.0//kubernetes-client-4.12.0.jar
+kubernetes-model-admissionregistration/4.12.0//kubernetes-model-admissionregistration-4.12.0.jar
+kubernetes-model-apiextensions/4.12.0//kubernetes-model-apiextensions-4.12.0.jar
+kubernetes-model-apps/4.12.0//kubernetes-model-apps-4.12.0.jar
+kubernetes-model-autoscaling/4.12.0//kubernetes-model-autoscaling-4.12.0.jar
+kubernetes-model-batch/4.12.0//kubernetes-model-batch-4.12.0.jar
+kubernetes-model-certificates/4.12.0//kubernetes-model-certificates-4.12.0.jar
+kubernetes-model-common/4.12.0//kubernetes-model-common-4.12.0.jar
+kubernetes-model-coordination/4.12.0//kubernetes-model-coordination-4.12.0.jar
+kubernetes-model-core/4.12.0//kubernetes-model-core-4.12.0.jar
+kubernetes-model-discovery/4.12.0//kubernetes-model-discovery-4.12.0.jar
+kubernetes-model-events/4.12.0//kubernetes-model-events-4.12.0.jar
+kubernetes-model-extensions/4.12.0//kubernetes-model-extensions-4.12.0.jar
+kubernetes-model-metrics/4.12.0//kubernetes-model-metrics-4.12.0.jar
+kubernetes-model-networking/4.12.0//kubernetes-model-networking-4.12.0.jar
+kubernetes-model-policy/4.12.0//kubernetes-model-policy-4.12.0.jar
+kubernetes-model-rbac/4.12.0//kubernetes-model-rbac-4.12.0.jar
+kubernetes-model-scheduling/4.12.0//kubernetes-model-scheduling-4.12.0.jar
+kubernetes-model-settings/4.12.0//kubernetes-model-settings-4.12.0.jar
+kubernetes-model-storageclass/4.12.0//kubernetes-model-storageclass-4.12.0.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
 libthrift/0.12.0//libthrift-0.12.0.jar
@@ -195,7 +195,6 @@ objenesis/2.6//objenesis-2.6.jar
 okhttp/3.12.12//okhttp-3.12.12.jar
 okio/1.14.0//okio-1.14.0.jar
 opencsv/2.3//opencsv-2.3.jar
-openshift-model/4.11.1//openshift-model-4.11.1.jar
 orc-core/1.5.12//orc-core-1.5.12.jar
 orc-mapreduce/1.5.12//orc-mapreduce-1.5.12.jar
 orc-shims/1.5.12//orc-shims-1.5.12.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index fcb993033221e..3cc402db9b843 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -125,26 +125,26 @@ jsr305/3.0.0//jsr305-3.0.0.jar
 jta/1.1//jta-1.1.jar
 jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar
 kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
-kubernetes-client/4.11.1//kubernetes-client-4.11.1.jar
-kubernetes-model-admissionregistration/4.11.1//kubernetes-model-admissionregistration-4.11.1.jar
-kubernetes-model-apiextensions/4.11.1//kubernetes-model-apiextensions-4.11.1.jar
-kubernetes-model-apps/4.11.1//kubernetes-model-apps-4.11.1.jar
-kubernetes-model-autoscaling/4.11.1//kubernetes-model-autoscaling-4.11.1.jar
-kubernetes-model-batch/4.11.1//kubernetes-model-batch-4.11.1.jar
-kubernetes-model-certificates/4.11.1//kubernetes-model-certificates-4.11.1.jar
-kubernetes-model-common/4.11.1//kubernetes-model-common-4.11.1.jar
-kubernetes-model-coordination/4.11.1//kubernetes-model-coordination-4.11.1.jar
-kubernetes-model-core/4.11.1//kubernetes-model-core-4.11.1.jar
-kubernetes-model-discovery/4.11.1//kubernetes-model-discovery-4.11.1.jar
-kubernetes-model-events/4.11.1//kubernetes-model-events-4.11.1.jar
-kubernetes-model-extensions/4.11.1//kubernetes-model-extensions-4.11.1.jar
-kubernetes-model-metrics/4.11.1//kubernetes-model-metrics-4.11.1.jar
-kubernetes-model-networking/4.11.1//kubernetes-model-networking-4.11.1.jar
-kubernetes-model-policy/4.11.1//kubernetes-model-policy-4.11.1.jar
-kubernetes-model-rbac/4.11.1//kubernetes-model-rbac-4.11.1.jar
-kubernetes-model-scheduling/4.11.1//kubernetes-model-scheduling-4.11.1.jar
-kubernetes-model-settings/4.11.1//kubernetes-model-settings-4.11.1.jar
-kubernetes-model-storageclass/4.11.1//kubernetes-model-storageclass-4.11.1.jar
+kubernetes-client/4.12.0//kubernetes-client-4.12.0.jar
+kubernetes-model-admissionregistration/4.12.0//kubernetes-model-admissionregistration-4.12.0.jar
+kubernetes-model-apiextensions/4.12.0//kubernetes-model-apiextensions-4.12.0.jar
+kubernetes-model-apps/4.12.0//kubernetes-model-apps-4.12.0.jar
+kubernetes-model-autoscaling/4.12.0//kubernetes-model-autoscaling-4.12.0.jar
+kubernetes-model-batch/4.12.0//kubernetes-model-batch-4.12.0.jar
+kubernetes-model-certificates/4.12.0//kubernetes-model-certificates-4.12.0.jar
+kubernetes-model-common/4.12.0//kubernetes-model-common-4.12.0.jar
+kubernetes-model-coordination/4.12.0//kubernetes-model-coordination-4.12.0.jar
+kubernetes-model-core/4.12.0//kubernetes-model-core-4.12.0.jar
+kubernetes-model-discovery/4.12.0//kubernetes-model-discovery-4.12.0.jar
+kubernetes-model-events/4.12.0//kubernetes-model-events-4.12.0.jar
+kubernetes-model-extensions/4.12.0//kubernetes-model-extensions-4.12.0.jar
+kubernetes-model-metrics/4.12.0//kubernetes-model-metrics-4.12.0.jar
+kubernetes-model-networking/4.12.0//kubernetes-model-networking-4.12.0.jar
+kubernetes-model-policy/4.12.0//kubernetes-model-policy-4.12.0.jar
+kubernetes-model-rbac/4.12.0//kubernetes-model-rbac-4.12.0.jar
+kubernetes-model-scheduling/4.12.0//kubernetes-model-scheduling-4.12.0.jar
+kubernetes-model-settings/4.12.0//kubernetes-model-settings-4.12.0.jar
+kubernetes-model-storageclass/4.12.0//kubernetes-model-storageclass-4.12.0.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
 libthrift/0.12.0//libthrift-0.12.0.jar
@@ -165,7 +165,6 @@ objenesis/2.6//objenesis-2.6.jar
 okhttp/3.12.12//okhttp-3.12.12.jar
 okio/1.14.0//okio-1.14.0.jar
 opencsv/2.3//opencsv-2.3.jar
-openshift-model/4.11.1//openshift-model-4.11.1.jar
 orc-core/1.5.12//orc-core-1.5.12.jar
 orc-mapreduce/1.5.12//orc-mapreduce-1.5.12.jar
 orc-shims/1.5.12//orc-shims-1.5.12.jar
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 9ae48f4da8b05..edeb95fdba684 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -30,7 +30,7 @@
   <properties>
     <sbt.project.name>kubernetes</sbt.project.name>
     <!-- Note: Please update the kubernetes client version in kubernetes/integration-tests/pom.xml -->
-    <kubernetes.client.version>4.11.1</kubernetes.client.version>
+    <kubernetes.client.version>4.12.0</kubernetes.client.version>
   </properties>
 
   <dependencies>
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/Fabric8Aliases.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/Fabric8Aliases.scala
index 23055813a9786..5b36bd144d0f9 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/Fabric8Aliases.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/Fabric8Aliases.scala
@@ -17,7 +17,7 @@
 package org.apache.spark.deploy.k8s
 
 import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapList, DoneableConfigMap, DoneablePod, HasMetadata, Pod, PodList}
-import io.fabric8.kubernetes.client.{Watch, Watcher}
+import io.fabric8.kubernetes.client.Watch
 import io.fabric8.kubernetes.client.dsl.{FilterWatchListDeletable, MixedOperation, NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable, PodResource, Resource}
 
 object Fabric8Aliases {
@@ -25,9 +25,9 @@ object Fabric8Aliases {
   type CONFIG_MAPS = MixedOperation[
     ConfigMap, ConfigMapList, DoneableConfigMap, Resource[ConfigMap, DoneableConfigMap]]
   type LABELED_PODS = FilterWatchListDeletable[
-    Pod, PodList, java.lang.Boolean, Watch, Watcher[Pod]]
+    Pod, PodList, java.lang.Boolean, Watch]
   type LABELED_CONFIG_MAPS = FilterWatchListDeletable[
-    ConfigMap, ConfigMapList, java.lang.Boolean, Watch, Watcher[ConfigMap]]
+    ConfigMap, ConfigMapList, java.lang.Boolean, Watch]
   type SINGLE_POD = PodResource[Pod, DoneablePod]
   type RESOURCE_LIST = NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable[
     HasMetadata, Boolean]
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 5274c0579eb05..258d3dfc3df9d 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -28,7 +28,7 @@
   <properties>
     <download-maven-plugin.version>1.3.0</download-maven-plugin.version>
     <extraScalaTestArgs></extraScalaTestArgs>
-    <kubernetes-client.version>4.11.1</kubernetes-client.version>
+    <kubernetes-client.version>4.12.0</kubernetes-client.version>
     <sbt.project.name>kubernetes-integration-tests</sbt.project.name>
 
     <!-- Integration Test Configuration Properties -->

From 7f3d99a8a5b17c049010db46adf9bf65a63eb241 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Wed, 18 Nov 2020 11:32:27 +0900
Subject: [PATCH 0498/1009] [MINOR][SQL][DOCS] Update schema_of_csv and
 schema_of_json doc

### What changes were proposed in this pull request?

This minor PR updates the docs of `schema_of_csv` and `schema_of_json`. They allow foldable string column instead of a string literal now.

### Why are the changes needed?

The function doc of  `schema_of_csv` and `schema_of_json` are not updated accordingly with previous PRs.

### Does this PR introduce _any_ user-facing change?

Yes, update user-facing doc.

### How was this patch tested?

Unit test.

Closes #30396 from viirya/minor-json-csv.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/sql/functions.py                           | 4 ++--
 .../src/main/scala/org/apache/spark/sql/functions.scala   | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 86a88a5bf341e..4af5d1f484ee4 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -3527,7 +3527,7 @@ def schema_of_json(json, options={}):
     Parameters
     ----------
     json : :class:`Column` or str
-        a JSON string or a string literal containing a JSON string.
+        a JSON string or a foldable string column containing a JSON string.
     options : dict, optional
         options to control parsing. accepts the same options as the JSON datasource
 
@@ -3564,7 +3564,7 @@ def schema_of_csv(csv, options={}):
     Parameters
     ----------
     csv : :class:`Column` or str
-        a CSV string or a string literal containing a CSV string.
+        a CSV string or a foldable string column containing a CSV string.
     options : dict, optional
         options to control parsing. accepts the same options as the CSV datasource
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 8d6281882f188..5dc1c6b5b49fc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -4151,7 +4151,7 @@ object functions {
   /**
    * Parses a JSON string and infers its schema in DDL format.
    *
-   * @param json a string literal containing a JSON string.
+   * @param json a foldable string column containing a JSON string.
    *
    * @group collection_funcs
    * @since 2.4.0
@@ -4161,7 +4161,7 @@ object functions {
   /**
    * Parses a JSON string and infers its schema in DDL format using options.
    *
-   * @param json a string column containing JSON data.
+   * @param json a foldable string column containing JSON data.
    * @param options options to control how the json is parsed. accepts the same options and the
    *                json data source. See [[DataFrameReader#json]].
    * @return a column with string literal containing schema in DDL format.
@@ -4426,7 +4426,7 @@ object functions {
   /**
    * Parses a CSV string and infers its schema in DDL format.
    *
-   * @param csv a string literal containing a CSV string.
+   * @param csv a foldable string column containing a CSV string.
    *
    * @group collection_funcs
    * @since 3.0.0
@@ -4436,7 +4436,7 @@ object functions {
   /**
    * Parses a CSV string and infers its schema in DDL format using options.
    *
-   * @param csv a string literal containing a CSV string.
+   * @param csv a foldable string column containing a CSV string.
    * @param options options to control how the CSV is parsed. accepts the same options and the
    *                json data source. See [[DataFrameReader#csv]].
    * @return a column with string literal containing schema in DDL format.

From dd32f45d2058d00293330c01d3d9f53ecdbc036c Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Tue, 17 Nov 2020 20:52:58 -0600
Subject: [PATCH 0499/1009] [SPARK-31069][CORE] Avoid repeat compute
 `chunksBeingTransferred` cause hight cpu cost in external shuffle service
 when  `maxChunksBeingTransferred`  use default value

### What changes were proposed in this pull request?
Followup from #27831 , origin author chrysan.

Each request it will check `chunksBeingTransferred `
```
public long chunksBeingTransferred() {
    long sum = 0L;
    for (StreamState streamState: streams.values()) {
      sum += streamState.chunksBeingTransferred.get();
    }
    return sum;
  }
```
  such as
```
long chunksBeingTransferred = streamManager.chunksBeingTransferred();
    if (chunksBeingTransferred >= maxChunksBeingTransferred) {
      logger.warn("The number of chunks being transferred {} is above {}, close the connection.",
        chunksBeingTransferred, maxChunksBeingTransferred);
      channel.close();
      return;
    }
```
It will  traverse `streams` repeatedly and we know that fetch data chunk will access `stream` too,  there cause two problem:

1. repeated traverse `streams`, the longer the length, the longer the time
2. lock race in ConcurrentHashMap `streams`

In this PR, when `maxChunksBeingTransferred` use default value, we avoid compute `chunksBeingTransferred ` since we don't  care about this.  If user want to set this configuration and meet performance problem,  you can also backport PR #27831

### Why are the changes needed?
Speed up  getting `chunksBeingTransferred`  and avoid lock race in object `streams`

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existed UT

Closes #30139 from AngersZhuuuu/SPARK-31069.

Lead-authored-by: angerszhu <angers.zhu@gmail.com>
Co-authored-by: chrysan <chrysanxia@gmail.com>
Signed-off-by: Mridul Muralidharan <mridul<at>gmail.com>
---
 .../network/server/ChunkFetchRequestHandler.java   | 14 ++++++++------
 .../network/server/TransportRequestHandler.java    | 14 ++++++++------
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/ChunkFetchRequestHandler.java b/common/network-common/src/main/java/org/apache/spark/network/server/ChunkFetchRequestHandler.java
index 82810dacdad84..9a71cf593e28c 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/ChunkFetchRequestHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/ChunkFetchRequestHandler.java
@@ -88,12 +88,14 @@ public void processFetchRequest(
       logger.trace("Received req from {} to fetch block {}", getRemoteAddress(channel),
         msg.streamChunkId);
     }
-    long chunksBeingTransferred = streamManager.chunksBeingTransferred();
-    if (chunksBeingTransferred >= maxChunksBeingTransferred) {
-      logger.warn("The number of chunks being transferred {} is above {}, close the connection.",
-        chunksBeingTransferred, maxChunksBeingTransferred);
-      channel.close();
-      return;
+    if (maxChunksBeingTransferred < Long.MAX_VALUE) {
+      long chunksBeingTransferred = streamManager.chunksBeingTransferred();
+      if (chunksBeingTransferred >= maxChunksBeingTransferred) {
+        logger.warn("The number of chunks being transferred {} is above {}, close the connection.",
+          chunksBeingTransferred, maxChunksBeingTransferred);
+        channel.close();
+        return;
+      }
     }
     ManagedBuffer buf;
     try {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java b/common/network-common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java
index f178928006902..4a30f8de07827 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java
@@ -124,12 +124,14 @@ private void processStreamRequest(final StreamRequest req) {
         req.streamId);
     }
 
-    long chunksBeingTransferred = streamManager.chunksBeingTransferred();
-    if (chunksBeingTransferred >= maxChunksBeingTransferred) {
-      logger.warn("The number of chunks being transferred {} is above {}, close the connection.",
-        chunksBeingTransferred, maxChunksBeingTransferred);
-      channel.close();
-      return;
+    if (maxChunksBeingTransferred < Long.MAX_VALUE) {
+      long chunksBeingTransferred = streamManager.chunksBeingTransferred();
+      if (chunksBeingTransferred >= maxChunksBeingTransferred) {
+        logger.warn("The number of chunks being transferred {} is above {}, close the connection.",
+          chunksBeingTransferred, maxChunksBeingTransferred);
+        channel.close();
+        return;
+      }
     }
     ManagedBuffer buf;
     try {

From 8e2a0bdce706128891ebc4c00345a25d7dd41371 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Wed, 18 Nov 2020 21:18:19 +0900
Subject: [PATCH 0500/1009] [SPARK-24554][PYTHON][SQL] Add MapType support for
 PySpark with Arrow

### What changes were proposed in this pull request?

This change adds MapType support for PySpark with Arrow, if using pyarrow >= 2.0.0.

### Why are the changes needed?

MapType was previous unsupported with Arrow.

### Does this PR introduce _any_ user-facing change?

User can now enable MapType for `createDataFrame()`, `toPandas()` with Arrow optimization, and with Pandas UDFs.

### How was this patch tested?

Added new PySpark tests for createDataFrame(), toPandas() and Scalar Pandas UDFs.

Closes #30393 from BryanCutler/arrow-add-MapType-SPARK-24554.

Authored-by: Bryan Cutler <cutlerb@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../docs/source/user_guide/arrow_pandas.rst   |  3 +-
 python/pyspark/sql/pandas/conversion.py       |  8 +-
 python/pyspark/sql/pandas/functions.py        |  1 -
 python/pyspark/sql/pandas/serializers.py      | 10 ++-
 python/pyspark/sql/pandas/types.py            | 42 +++++++++-
 python/pyspark/sql/tests/test_arrow.py        | 77 ++++++++++++++++---
 .../sql/tests/test_pandas_cogrouped_map.py    |  4 +-
 .../sql/tests/test_pandas_grouped_map.py      |  7 +-
 .../sql/tests/test_pandas_udf_grouped_agg.py  |  4 +-
 .../sql/tests/test_pandas_udf_scalar.py       | 23 +++++-
 .../apache/spark/sql/internal/SQLConf.scala   |  2 +-
 .../sql/execution/arrow/ArrowWriter.scala     | 12 +--
 12 files changed, 157 insertions(+), 36 deletions(-)

diff --git a/python/docs/source/user_guide/arrow_pandas.rst b/python/docs/source/user_guide/arrow_pandas.rst
index fe04315f87ad5..91d8155523391 100644
--- a/python/docs/source/user_guide/arrow_pandas.rst
+++ b/python/docs/source/user_guide/arrow_pandas.rst
@@ -341,8 +341,9 @@ Supported SQL Types
 
 .. currentmodule:: pyspark.sql.types
 
-Currently, all Spark SQL data types are supported by Arrow-based conversion except :class:`MapType`,
+Currently, all Spark SQL data types are supported by Arrow-based conversion except
 :class:`ArrayType` of :class:`TimestampType`, and nested :class:`StructType`.
+:class: `MapType` is only supported when using PyArrow 2.0.0 and above.
 
 Setting Arrow Batch Size
 ~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/python/pyspark/sql/pandas/conversion.py b/python/pyspark/sql/pandas/conversion.py
index 3456c12e59c09..d8a241417532e 100644
--- a/python/pyspark/sql/pandas/conversion.py
+++ b/python/pyspark/sql/pandas/conversion.py
@@ -22,7 +22,7 @@
 from pyspark.sql.pandas.serializers import ArrowCollectSerializer
 from pyspark.sql.types import IntegralType
 from pyspark.sql.types import ByteType, ShortType, IntegerType, LongType, FloatType, \
-    DoubleType, BooleanType, TimestampType, StructType, DataType
+    DoubleType, BooleanType, MapType, TimestampType, StructType, DataType
 from pyspark.traceback_utils import SCCallSiteSync
 
 
@@ -100,7 +100,8 @@ def toPandas(self):
             # of PyArrow is found, if 'spark.sql.execution.arrow.pyspark.enabled' is enabled.
             if use_arrow:
                 try:
-                    from pyspark.sql.pandas.types import _check_series_localize_timestamps
+                    from pyspark.sql.pandas.types import _check_series_localize_timestamps, \
+                        _convert_map_items_to_dict
                     import pyarrow
                     # Rename columns to avoid duplicated column names.
                     tmp_column_names = ['col_{}'.format(i) for i in range(len(self.columns))]
@@ -117,6 +118,9 @@ def toPandas(self):
                             if isinstance(field.dataType, TimestampType):
                                 pdf[field.name] = \
                                     _check_series_localize_timestamps(pdf[field.name], timezone)
+                            elif isinstance(field.dataType, MapType):
+                                pdf[field.name] = \
+                                    _convert_map_items_to_dict(pdf[field.name])
                         return pdf
                     else:
                         return pd.DataFrame.from_records([], columns=self.columns)
diff --git a/python/pyspark/sql/pandas/functions.py b/python/pyspark/sql/pandas/functions.py
index 16462e8702a0b..750aa4b0e6c56 100644
--- a/python/pyspark/sql/pandas/functions.py
+++ b/python/pyspark/sql/pandas/functions.py
@@ -284,7 +284,6 @@ def calculate(iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
     should be checked for accuracy by users.
 
     Currently,
-    :class:`pyspark.sql.types.MapType`,
     :class:`pyspark.sql.types.ArrayType` of :class:`pyspark.sql.types.TimestampType` and
     nested :class:`pyspark.sql.types.StructType`
     are currently not supported as output types.
diff --git a/python/pyspark/sql/pandas/serializers.py b/python/pyspark/sql/pandas/serializers.py
index 73d36ee555fb5..2dcfdc1046049 100644
--- a/python/pyspark/sql/pandas/serializers.py
+++ b/python/pyspark/sql/pandas/serializers.py
@@ -117,7 +117,8 @@ def __init__(self, timezone, safecheck, assign_cols_by_name):
         self._assign_cols_by_name = assign_cols_by_name
 
     def arrow_to_pandas(self, arrow_column):
-        from pyspark.sql.pandas.types import _check_series_localize_timestamps
+        from pyspark.sql.pandas.types import _check_series_localize_timestamps, \
+            _convert_map_items_to_dict
         import pyarrow
 
         # If the given column is a date type column, creates a series of datetime.date directly
@@ -127,6 +128,8 @@ def arrow_to_pandas(self, arrow_column):
 
         if pyarrow.types.is_timestamp(arrow_column.type):
             return _check_series_localize_timestamps(s, self._timezone)
+        elif pyarrow.types.is_map(arrow_column.type):
+            return _convert_map_items_to_dict(s)
         else:
             return s
 
@@ -147,7 +150,8 @@ def _create_batch(self, series):
         """
         import pandas as pd
         import pyarrow as pa
-        from pyspark.sql.pandas.types import _check_series_convert_timestamps_internal
+        from pyspark.sql.pandas.types import _check_series_convert_timestamps_internal, \
+            _convert_dict_to_map_items
         from pandas.api.types import is_categorical_dtype
         # Make input conform to [(series1, type1), (series2, type2), ...]
         if not isinstance(series, (list, tuple)) or \
@@ -160,6 +164,8 @@ def create_array(s, t):
             # Ensure timestamp series are in expected form for Spark internal representation
             if t is not None and pa.types.is_timestamp(t):
                 s = _check_series_convert_timestamps_internal(s, self._timezone)
+            elif t is not None and pa.types.is_map(t):
+                s = _convert_dict_to_map_items(s)
             elif is_categorical_dtype(s.dtype):
                 # Note: This can be removed once minimum pyarrow version is >= 0.16.1
                 s = s.astype(s.dtypes.categories.dtype)
diff --git a/python/pyspark/sql/pandas/types.py b/python/pyspark/sql/pandas/types.py
index 67557120715ac..7e4d61b0d21b8 100644
--- a/python/pyspark/sql/pandas/types.py
+++ b/python/pyspark/sql/pandas/types.py
@@ -20,14 +20,15 @@
 pandas instances during the type conversion.
 """
 
-from pyspark.sql.types import ByteType, ShortType, IntegerType, LongType, FloatType, \
-    DoubleType, DecimalType, StringType, BinaryType, DateType, TimestampType, ArrayType, \
-    StructType, StructField, BooleanType
+from pyspark.sql.types import BooleanType, ByteType, ShortType, IntegerType, LongType, \
+    FloatType, DoubleType, DecimalType, StringType, BinaryType, DateType, TimestampType, \
+    ArrayType, MapType, StructType, StructField
 
 
 def to_arrow_type(dt):
     """ Convert Spark data type to pyarrow type
     """
+    from distutils.version import LooseVersion
     import pyarrow as pa
     if type(dt) == BooleanType:
         arrow_type = pa.bool_()
@@ -58,6 +59,13 @@ def to_arrow_type(dt):
         if type(dt.elementType) in [StructType, TimestampType]:
             raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
         arrow_type = pa.list_(to_arrow_type(dt.elementType))
+    elif type(dt) == MapType:
+        if LooseVersion(pa.__version__) < LooseVersion("2.0.0"):
+            raise TypeError("MapType is only supported with pyarrow 2.0.0 and above")
+        if type(dt.keyType) in [StructType, TimestampType] or \
+                type(dt.valueType) in [StructType, TimestampType]:
+            raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
+        arrow_type = pa.map_(to_arrow_type(dt.keyType), to_arrow_type(dt.valueType))
     elif type(dt) == StructType:
         if any(type(field.dataType) == StructType for field in dt):
             raise TypeError("Nested StructType not supported in conversion to Arrow")
@@ -81,6 +89,8 @@ def to_arrow_schema(schema):
 def from_arrow_type(at):
     """ Convert pyarrow type to Spark data type.
     """
+    from distutils.version import LooseVersion
+    import pyarrow as pa
     import pyarrow.types as types
     if types.is_boolean(at):
         spark_type = BooleanType()
@@ -110,6 +120,12 @@ def from_arrow_type(at):
         if types.is_timestamp(at.value_type):
             raise TypeError("Unsupported type in conversion from Arrow: " + str(at))
         spark_type = ArrayType(from_arrow_type(at.value_type))
+    elif types.is_map(at):
+        if LooseVersion(pa.__version__) < LooseVersion("2.0.0"):
+            raise TypeError("MapType is only supported with pyarrow 2.0.0 and above")
+        if types.is_timestamp(at.key_type) or types.is_timestamp(at.item_type):
+            raise TypeError("Unsupported type in conversion from Arrow: " + str(at))
+        spark_type = MapType(from_arrow_type(at.key_type), from_arrow_type(at.item_type))
     elif types.is_struct(at):
         if any(types.is_struct(field.type) for field in at):
             raise TypeError("Nested StructType not supported in conversion from Arrow: " + str(at))
@@ -306,3 +322,23 @@ def _check_series_convert_timestamps_tz_local(s, timezone):
         `pandas.Series` where if it is a timestamp, has been converted to tz-naive
     """
     return _check_series_convert_timestamps_localize(s, timezone, None)
+
+
+def _convert_map_items_to_dict(s):
+    """
+    Convert a series with items as list of (key, value), as made from an Arrow column of map type,
+    to dict for compatibility with non-arrow MapType columns.
+    :param s: pandas.Series of lists of (key, value) pairs
+    :return: pandas.Series of dictionaries
+    """
+    return s.apply(lambda m: None if m is None else {k: v for k, v in m})
+
+
+def _convert_dict_to_map_items(s):
+    """
+    Convert a series of dictionaries to list of (key, value) pairs to match expected data
+    for Arrow column of map type.
+    :param s: pandas.Series of dictionaries
+    :return: pandas.Series of lists of (key, value) pairs
+    """
+    return s.apply(lambda d: list(d.items()) if d is not None else None)
diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py
index 55d5e9017b345..e764c42d88a31 100644
--- a/python/pyspark/sql/tests/test_arrow.py
+++ b/python/pyspark/sql/tests/test_arrow.py
@@ -21,13 +21,13 @@
 import time
 import unittest
 import warnings
+from distutils.version import LooseVersion
 
 from pyspark import SparkContext, SparkConf
 from pyspark.sql import Row, SparkSession
 from pyspark.sql.functions import udf
 from pyspark.sql.types import StructType, StringType, IntegerType, LongType, \
-    FloatType, DoubleType, DecimalType, DateType, TimestampType, BinaryType, StructField, MapType, \
-    ArrayType
+    FloatType, DoubleType, DecimalType, DateType, TimestampType, BinaryType, StructField, ArrayType
 from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \
     pandas_requirement_message, pyarrow_requirement_message
 from pyspark.testing.utils import QuietTest
@@ -114,9 +114,10 @@ def create_pandas_data_frame(self):
         return pd.DataFrame(data=data_dict)
 
     def test_toPandas_fallback_enabled(self):
+        ts = datetime.datetime(2015, 11, 1, 0, 30)
         with self.sql_conf({"spark.sql.execution.arrow.pyspark.fallback.enabled": True}):
-            schema = StructType([StructField("map", MapType(StringType(), IntegerType()), True)])
-            df = self.spark.createDataFrame([({u'a': 1},)], schema=schema)
+            schema = StructType([StructField("a", ArrayType(TimestampType()), True)])
+            df = self.spark.createDataFrame([([ts],)], schema=schema)
             with QuietTest(self.sc):
                 with self.warnings_lock:
                     with warnings.catch_warnings(record=True) as warns:
@@ -129,10 +130,10 @@ def test_toPandas_fallback_enabled(self):
                         self.assertTrue(len(user_warns) > 0)
                         self.assertTrue(
                             "Attempting non-optimization" in str(user_warns[-1]))
-                        assert_frame_equal(pdf, pd.DataFrame({u'map': [{u'a': 1}]}))
+                        assert_frame_equal(pdf, pd.DataFrame({"a": [[ts]]}))
 
     def test_toPandas_fallback_disabled(self):
-        schema = StructType([StructField("map", MapType(StringType(), IntegerType()), True)])
+        schema = StructType([StructField("a", ArrayType(TimestampType()), True)])
         df = self.spark.createDataFrame([(None,)], schema=schema)
         with QuietTest(self.sc):
             with self.warnings_lock:
@@ -336,6 +337,62 @@ def test_toPandas_with_array_type(self):
                 self.assertTrue(expected[r][e] == result_arrow[r][e] and
                                 result[r][e] == result_arrow[r][e])
 
+    def test_createDataFrame_with_map_type(self):
+        map_data = [{"a": 1}, {"b": 2, "c": 3}, {}, None, {"d": None}]
+
+        pdf = pd.DataFrame({"id": [0, 1, 2, 3, 4], "m": map_data})
+        schema = "id long, m map<string, long>"
+
+        with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": False}):
+            df = self.spark.createDataFrame(pdf, schema=schema)
+
+        if LooseVersion(pa.__version__) < LooseVersion("2.0.0"):
+            with QuietTest(self.sc):
+                with self.assertRaisesRegex(Exception, "MapType.*only.*pyarrow 2.0.0"):
+                    self.spark.createDataFrame(pdf, schema=schema)
+        else:
+            df_arrow = self.spark.createDataFrame(pdf, schema=schema)
+
+            result = df.collect()
+            result_arrow = df_arrow.collect()
+
+            self.assertEqual(len(result), len(result_arrow))
+            for row, row_arrow in zip(result, result_arrow):
+                i, m = row
+                _, m_arrow = row_arrow
+                self.assertEqual(m, map_data[i])
+                self.assertEqual(m_arrow, map_data[i])
+
+    def test_toPandas_with_map_type(self):
+        pdf = pd.DataFrame({"id": [0, 1, 2, 3],
+                            "m": [{}, {"a": 1}, {"a": 1, "b": 2}, {"a": 1, "b": 2, "c": 3}]})
+
+        with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": False}):
+            df = self.spark.createDataFrame(pdf, schema="id long, m map<string, long>")
+
+        if LooseVersion(pa.__version__) < LooseVersion("2.0.0"):
+            with QuietTest(self.sc):
+                with self.assertRaisesRegex(Exception, "MapType.*only.*pyarrow 2.0.0"):
+                    df.toPandas()
+        else:
+            pdf_non, pdf_arrow = self._toPandas_arrow_toggle(df)
+            assert_frame_equal(pdf_arrow, pdf_non)
+
+    def test_toPandas_with_map_type_nulls(self):
+        pdf = pd.DataFrame({"id": [0, 1, 2, 3, 4],
+                            "m": [{"a": 1}, {"b": 2, "c": 3}, {}, None, {"d": None}]})
+
+        with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": False}):
+            df = self.spark.createDataFrame(pdf, schema="id long, m map<string, long>")
+
+        if LooseVersion(pa.__version__) < LooseVersion("2.0.0"):
+            with QuietTest(self.sc):
+                with self.assertRaisesRegex(Exception, "MapType.*only.*pyarrow 2.0.0"):
+                    df.toPandas()
+        else:
+            pdf_non, pdf_arrow = self._toPandas_arrow_toggle(df)
+            assert_frame_equal(pdf_arrow, pdf_non)
+
     def test_createDataFrame_with_int_col_names(self):
         import numpy as np
         pdf = pd.DataFrame(np.random.rand(4, 2))
@@ -345,26 +402,28 @@ def test_createDataFrame_with_int_col_names(self):
         self.assertEqual(pdf_col_names, df_arrow.columns)
 
     def test_createDataFrame_fallback_enabled(self):
+        ts = datetime.datetime(2015, 11, 1, 0, 30)
         with QuietTest(self.sc):
             with self.sql_conf({"spark.sql.execution.arrow.pyspark.fallback.enabled": True}):
                 with warnings.catch_warnings(record=True) as warns:
                     # we want the warnings to appear even if this test is run from a subclass
                     warnings.simplefilter("always")
                     df = self.spark.createDataFrame(
-                        pd.DataFrame([[{u'a': 1}]]), "a: map<string, int>")
+                        pd.DataFrame({"a": [[ts]]}), "a: array<timestamp>")
                     # Catch and check the last UserWarning.
                     user_warns = [
                         warn.message for warn in warns if isinstance(warn.message, UserWarning)]
                     self.assertTrue(len(user_warns) > 0)
                     self.assertTrue(
                         "Attempting non-optimization" in str(user_warns[-1]))
-                    self.assertEqual(df.collect(), [Row(a={u'a': 1})])
+                    self.assertEqual(df.collect(), [Row(a=[ts])])
 
     def test_createDataFrame_fallback_disabled(self):
         with QuietTest(self.sc):
             with self.assertRaisesRegexp(TypeError, 'Unsupported type'):
                 self.spark.createDataFrame(
-                    pd.DataFrame([[{u'a': 1}]]), "a: map<string, int>")
+                    pd.DataFrame({"a": [[datetime.datetime(2015, 11, 1, 0, 30)]]}),
+                    "a: array<timestamp>")
 
     # Regression test for SPARK-23314
     def test_timestamp_dst(self):
diff --git a/python/pyspark/sql/tests/test_pandas_cogrouped_map.py b/python/pyspark/sql/tests/test_pandas_cogrouped_map.py
index f9a7dd69b61fb..4afc1dfcc1c6e 100644
--- a/python/pyspark/sql/tests/test_pandas_cogrouped_map.py
+++ b/python/pyspark/sql/tests/test_pandas_cogrouped_map.py
@@ -176,9 +176,9 @@ def test_wrong_return_type(self):
         with QuietTest(self.sc):
             with self.assertRaisesRegexp(
                     NotImplementedError,
-                    'Invalid return type.*MapType'):
+                    'Invalid return type.*ArrayType.*TimestampType'):
                 left.groupby('id').cogroup(right.groupby('id')).applyInPandas(
-                    lambda l, r: l, 'id long, v map<int, int>')
+                    lambda l, r: l, 'id long, v array<timestamp>')
 
     def test_wrong_args(self):
         left = self.data1
diff --git a/python/pyspark/sql/tests/test_pandas_grouped_map.py b/python/pyspark/sql/tests/test_pandas_grouped_map.py
index 93e37125eaa33..ee68b95fc478d 100644
--- a/python/pyspark/sql/tests/test_pandas_grouped_map.py
+++ b/python/pyspark/sql/tests/test_pandas_grouped_map.py
@@ -26,7 +26,7 @@
     window
 from pyspark.sql.types import IntegerType, DoubleType, ArrayType, BinaryType, ByteType, \
     LongType, DecimalType, ShortType, FloatType, StringType, BooleanType, StructType, \
-    StructField, NullType, MapType, TimestampType
+    StructField, NullType, TimestampType
 from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \
     pandas_requirement_message, pyarrow_requirement_message
 from pyspark.testing.utils import QuietTest
@@ -246,10 +246,10 @@ def test_wrong_return_type(self):
         with QuietTest(self.sc):
             with self.assertRaisesRegexp(
                     NotImplementedError,
-                    'Invalid return type.*grouped map Pandas UDF.*MapType'):
+                    'Invalid return type.*grouped map Pandas UDF.*ArrayType.*TimestampType'):
                 pandas_udf(
                     lambda pdf: pdf,
-                    'id long, v map<int, int>',
+                    'id long, v array<timestamp>',
                     PandasUDFType.GROUPED_MAP)
 
     def test_wrong_args(self):
@@ -276,7 +276,6 @@ def test_wrong_args(self):
     def test_unsupported_types(self):
         common_err_msg = 'Invalid return type.*grouped map Pandas UDF.*'
         unsupported_types = [
-            StructField('map', MapType(StringType(), IntegerType())),
             StructField('arr_ts', ArrayType(TimestampType())),
             StructField('null', NullType()),
             StructField('struct', StructType([StructField('l', LongType())])),
diff --git a/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py b/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py
index 451308927629b..2cbcf31f6e7b3 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py
@@ -21,7 +21,7 @@
 from pyspark.sql import Row
 from pyspark.sql.functions import array, explode, col, lit, mean, sum, \
     udf, pandas_udf, PandasUDFType
-from pyspark.sql.types import ArrayType, TimestampType, DoubleType, MapType
+from pyspark.sql.types import ArrayType, TimestampType
 from pyspark.sql.utils import AnalysisException
 from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \
     pandas_requirement_message, pyarrow_requirement_message
@@ -159,7 +159,7 @@ def mean_and_std_udf(v):
 
         with QuietTest(self.sc):
             with self.assertRaisesRegexp(NotImplementedError, 'not supported'):
-                @pandas_udf(MapType(DoubleType(), DoubleType()), PandasUDFType.GROUPED_AGG)
+                @pandas_udf(ArrayType(TimestampType()), PandasUDFType.GROUPED_AGG)
                 def mean_and_std_udf(v):
                     return {v.mean(): v.std()}
 
diff --git a/python/pyspark/sql/tests/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
index 6d325c9085ce1..5da5d043ceca4 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_scalar.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
@@ -22,6 +22,7 @@
 import unittest
 from datetime import date, datetime
 from decimal import Decimal
+from distutils.version import LooseVersion
 
 from pyspark import TaskContext
 from pyspark.rdd import PythonEvalType
@@ -379,6 +380,20 @@ def test_vectorized_udf_nested_struct(self):
                         'Invalid return type with scalar Pandas UDFs'):
                     pandas_udf(lambda x: x, returnType=nested_type, functionType=udf_type)
 
+    def test_vectorized_udf_map_type(self):
+        data = [({},), ({"a": 1},), ({"a": 1, "b": 2},), ({"a": 1, "b": 2, "c": 3},)]
+        schema = StructType([StructField("map", MapType(StringType(), LongType()))])
+        df = self.spark.createDataFrame(data, schema=schema)
+        for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
+            if LooseVersion(pa.__version__) < LooseVersion("2.0.0"):
+                with QuietTest(self.sc):
+                    with self.assertRaisesRegex(Exception, "MapType.*not supported"):
+                        pandas_udf(lambda x: x, MapType(StringType(), LongType()), udf_type)
+            else:
+                map_f = pandas_udf(lambda x: x, MapType(StringType(), LongType()), udf_type)
+                result = df.select(map_f(col('map')))
+                self.assertEquals(df.collect(), result.collect())
+
     def test_vectorized_udf_complex(self):
         df = self.spark.range(10).select(
             col('id').cast('int').alias('a'),
@@ -504,8 +519,8 @@ def test_vectorized_udf_wrong_return_type(self):
             for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
                 with self.assertRaisesRegexp(
                         NotImplementedError,
-                        'Invalid return type.*scalar Pandas UDF.*MapType'):
-                    pandas_udf(lambda x: x, MapType(LongType(), LongType()), udf_type)
+                        'Invalid return type.*scalar Pandas UDF.*ArrayType.*TimestampType'):
+                    pandas_udf(lambda x: x, ArrayType(TimestampType()), udf_type)
 
     def test_vectorized_udf_return_scalar(self):
         df = self.spark.range(10)
@@ -577,8 +592,8 @@ def test_vectorized_udf_unsupported_types(self):
             for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
                 with self.assertRaisesRegexp(
                         NotImplementedError,
-                        'Invalid return type.*scalar Pandas UDF.*MapType'):
-                    pandas_udf(lambda x: x, MapType(StringType(), IntegerType()), udf_type)
+                        'Invalid return type.*scalar Pandas UDF.*ArrayType.*TimestampType'):
+                    pandas_udf(lambda x: x, ArrayType(TimestampType()), udf_type)
                 with self.assertRaisesRegexp(
                         NotImplementedError,
                         'Invalid return type.*scalar Pandas UDF.*ArrayType.StructType'):
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 6f79d1a91c814..5c17f0434bc79 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1903,7 +1903,7 @@ object SQLConf {
         "1. pyspark.sql.DataFrame.toPandas " +
         "2. pyspark.sql.SparkSession.createDataFrame when its input is a Pandas DataFrame " +
         "The following data types are unsupported: " +
-        "MapType, ArrayType of TimestampType, and nested StructType.")
+        "ArrayType of TimestampType, and nested StructType.")
       .version("3.0.0")
       .fallbackConf(ARROW_EXECUTION_ENABLED)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
index 501e1c460f9c9..f62aa5db0872f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
@@ -63,10 +63,10 @@ object ArrowWriter {
         val elementVector = createFieldWriter(vector.getDataVector())
         new ArrayWriter(vector, elementVector)
       case (MapType(_, _, _), vector: MapVector) =>
-        val entryWriter = createFieldWriter(vector.getDataVector).asInstanceOf[StructWriter]
-        val keyWriter = createFieldWriter(entryWriter.valueVector.getChild(MapVector.KEY_NAME))
-        val valueWriter = createFieldWriter(entryWriter.valueVector.getChild(MapVector.VALUE_NAME))
-        new MapWriter(vector, keyWriter, valueWriter)
+        val structVector = vector.getDataVector.asInstanceOf[StructVector]
+        val keyWriter = createFieldWriter(structVector.getChild(MapVector.KEY_NAME))
+        val valueWriter = createFieldWriter(structVector.getChild(MapVector.VALUE_NAME))
+        new MapWriter(vector, structVector, keyWriter, valueWriter)
       case (StructType(_), vector: StructVector) =>
         val children = (0 until vector.size()).map { ordinal =>
           createFieldWriter(vector.getChildByOrdinal(ordinal))
@@ -331,11 +331,11 @@ private[arrow] class StructWriter(
   override def setValue(input: SpecializedGetters, ordinal: Int): Unit = {
     val struct = input.getStruct(ordinal, children.length)
     var i = 0
+    valueVector.setIndexDefined(count)
     while (i < struct.numFields) {
       children(i).write(struct, i)
       i += 1
     }
-    valueVector.setIndexDefined(count)
   }
 
   override def finish(): Unit = {
@@ -351,6 +351,7 @@ private[arrow] class StructWriter(
 
 private[arrow] class MapWriter(
     val valueVector: MapVector,
+    val structVector: StructVector,
     val keyWriter: ArrowFieldWriter,
     val valueWriter: ArrowFieldWriter) extends ArrowFieldWriter {
 
@@ -363,6 +364,7 @@ private[arrow] class MapWriter(
     val values = map.valueArray()
     var i = 0
     while (i <  map.numElements()) {
+      structVector.setIndexDefined(keyWriter.count)
       keyWriter.write(keys, i)
       valueWriter.write(values, i)
       i += 1

From 74bd046d17db69830b6c798a60d3eb3c28e08dec Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Wed, 18 Nov 2020 21:20:28 +0900
Subject: [PATCH 0501/1009] [SPARK-33475][BUILD] Bump ANTLR runtime version to
 4.8-1

### What changes were proposed in this pull request?

This PR intends to upgrade ANTLR runtime from 4.7.1 to 4.8-1.

### Why are the changes needed?

Release note of v4.8 and v4.7.2 (the v4.7.2 release has a few minor bug fixes for java targets):
 - v4.8: https://github.com/antlr/antlr4/releases/tag/4.8
 - v4.7.2: https://github.com/antlr/antlr4/releases/tag/4.7.2

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

GA tests.

Closes #30404 from maropu/UpgradeAntlr.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 2 +-
 dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 2 +-
 pom.xml                                 | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index 5a7948491173f..bcf05506855c5 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -7,7 +7,7 @@ activation/1.1.1//activation-1.1.1.jar
 aircompressor/0.10//aircompressor-0.10.jar
 algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar
 antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
-antlr4-runtime/4.7.1//antlr4-runtime-4.7.1.jar
+antlr4-runtime/4.8-1//antlr4-runtime-4.8-1.jar
 aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar
 aopalliance/1.0//aopalliance-1.0.jar
 apacheds-i18n/2.0.0-M15//apacheds-i18n-2.0.0-M15.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index 3cc402db9b843..cd274bef7045b 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -7,7 +7,7 @@ activation/1.1.1//activation-1.1.1.jar
 aircompressor/0.10//aircompressor-0.10.jar
 algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar
 antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
-antlr4-runtime/4.7.1//antlr4-runtime-4.7.1.jar
+antlr4-runtime/4.8-1//antlr4-runtime-4.8-1.jar
 aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar
 arpack_combined_all/0.1//arpack_combined_all-0.1.jar
 arrow-format/2.0.0//arrow-format-2.0.0.jar
diff --git a/pom.xml b/pom.xml
index 25c6da7100056..ee88e11046803 100644
--- a/pom.xml
+++ b/pom.xml
@@ -189,7 +189,7 @@
     <jodd.version>3.5.2</jodd.version>
     <jsr305.version>3.0.0</jsr305.version>
     <libthrift.version>0.12.0</libthrift.version>
-    <antlr4.version>4.7.1</antlr4.version>
+    <antlr4.version>4.8-1</antlr4.version>
     <jpam.version>1.1</jpam.version>
     <selenium.version>3.141.59</selenium.version>
     <htmlunit.version>2.40.0</htmlunit.version>

From a180e0284205ca002ba6fa5fb9e692136febca0b Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Wed, 18 Nov 2020 22:09:40 +0800
Subject: [PATCH 0502/1009] [SPARK-32852][SQL][DOC][FOLLOWUP] Revise the
 documentation of spark.sql.hive.metastore.jars

### What changes were proposed in this pull request?

This is a follow-up for https://github.com/apache/spark/pull/29881.
It revises the documentation of the configuration `spark.sql.hive.metastore.jars`.

### Why are the changes needed?

Fix grammatical error in the doc.
Also, make it more clear that the configuration is effective only when `spark.sql.hive.metastore.jars` is set as `path`

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Just doc changes.

Closes #30407 from gengliangwang/reviseJarPathDoc.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Gengliang Wang <gengliang.wang@databricks.com>
---
 .../org/apache/spark/sql/hive/HiveUtils.scala | 23 ++++++++++---------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index 399f8911ef679..0082fa87e00f1 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -96,17 +96,18 @@ private[spark] object HiveUtils extends Logging {
     .createWithDefault("builtin")
 
   val HIVE_METASTORE_JARS_PATH = buildStaticConf("spark.sql.hive.metastore.jars.path")
-    .doc(s"Comma separated URL of Hive jars, support both local and remote paths," +
-      s"Such as: " +
-      s" 1. file://path/to/jar/xxx.jar\n" +
-      s" 2. hdfs://nameservice/path/to/jar/xxx.jar\n" +
-      s" 3. /path/to/jar/ (path without URI scheme follow conf `fs.defaultFS`'s URI schema)\n" +
-      s" 4. [http/https/ftp]://path/to/jar/xxx.jar\n" +
-      s"Notice: `http/https/ftp` doesn't support wildcard, but other URLs support" +
-      s"nested path wildcard, Such as: " +
-      s" 1. file://path/to/jar/*, file://path/to/jar/*/*\n" +
-      s" 2. hdfs://nameservice/path/to/jar/*, hdfs://nameservice/path/to/jar/*/*\n" +
-      s"When ${HIVE_METASTORE_JARS.key} is set to `path`, we will use Hive jars configured by this")
+    .doc(s"""
+      | Comma-separated paths of the jars that used to instantiate the HiveMetastoreClient.
+      | This configuration is useful only when `{$HIVE_METASTORE_JARS.key}` is set as `path`.
+      | The paths can be any of the following format:
+      | 1. file://path/to/jar/foo.jar
+      | 2. hdfs://nameservice/path/to/jar/foo.jar
+      | 3. /path/to/jar/ (path without URI scheme follow conf `fs.defaultFS`'s URI schema)
+      | 4. [http/https/ftp]://path/to/jar/foo.jar
+      | Note that 1, 2, and 3 support wildcard. For example:
+      | 1. file://path/to/jar/*,file://path2/to/jar/*/*.jar
+      | 2. hdfs://nameservice/path/to/jar/*,hdfs://nameservice2/path/to/jar/*/*.jar
+      """.stripMargin)
     .version("3.1.0")
     .stringConf
     .toSequence

From 689c2941021563883fc6b9d25f1a2108b4f7ceff Mon Sep 17 00:00:00 2001
From: zhengruifeng <ruifengz@foxmail.com>
Date: Wed, 18 Nov 2020 23:02:31 +0800
Subject: [PATCH 0503/1009] [SPARK-32907][ML][PYTHON] Adaptively blockify
 instances - AFT,LiR,LoR

### What changes were proposed in this pull request?
use `maxBlockSizeInMB` instead of `blockSize` (#rows) to control the stacking of vectors;

### Why are the changes needed?
the performance gain is mainly related to the nnz of block.

### Does this PR introduce _any_ user-facing change?
yes, param blockSize -> blockSizeInMB in master

### How was this patch tested?
updated testsuites

Closes #30355 from zhengruifeng/adaptively_blockify_aft_lir_lor.

Lead-authored-by: zhengruifeng <ruifengz@foxmail.com>
Co-authored-by: Ruifeng Zheng <ruifengz@foxmail.com>
Signed-off-by: Weichen Xu <weichen.xu@databricks.com>
---
 .../spark/ml/classification/LinearSVC.scala   |   6 +-
 .../classification/LogisticRegression.scala   | 114 +++++----------
 .../ml/optim/aggregator/AFTAggregator.scala   |  42 +++---
 .../ml/optim/aggregator/HingeAggregator.scala |   6 +-
 .../ml/optim/aggregator/HuberAggregator.scala |  11 +-
 .../aggregator/LeastSquaresAggregator.scala   |  11 +-
 .../optim/aggregator/LogisticAggregator.scala |  19 ++-
 .../ml/param/shared/SharedParamsCodeGen.scala |   4 +-
 .../spark/ml/param/shared/sharedParams.scala  |   4 +-
 .../ml/regression/AFTSurvivalRegression.scala | 130 ++++++-----------
 .../ml/regression/LinearRegression.scala      | 138 +++++-------------
 .../classification/LogisticRegression.scala   |   4 +-
 .../LogisticRegressionSuite.scala             |   8 +-
 .../AFTSurvivalRegressionSuite.scala          |   4 +-
 .../ml/regression/LinearRegressionSuite.scala |   4 +-
 python/pyspark/ml/classification.py           |  22 +--
 python/pyspark/ml/classification.pyi          |   8 +-
 .../ml/param/_shared_params_code_gen.py       |   4 +-
 python/pyspark/ml/param/shared.py             |   4 +-
 python/pyspark/ml/regression.py               |  46 +++---
 python/pyspark/ml/regression.pyi              |  18 +--
 21 files changed, 237 insertions(+), 370 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
index 95f37671e1399..9191b3ec4bc2b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
@@ -67,6 +67,10 @@ private[classification] trait LinearSVCParams extends ClassifierParams with HasR
  * This binary classifier optimizes the Hinge Loss using the OWLQN optimizer.
  * Only supports L2 regularization currently.
  *
+ * Since 3.1.0, it supports stacking instances into blocks and using GEMV for
+ * better performance.
+ * The block size will be 1.0 MB, if param maxBlockSizeInMB is set 0.0 by default.
+ *
  */
 @Since("2.2.0")
 class LinearSVC @Since("2.2.0") (
@@ -154,7 +158,7 @@ class LinearSVC @Since("2.2.0") (
 
   /**
    * Sets the value of param [[maxBlockSizeInMB]].
-   * Default is 0.0.
+   * Default is 0.0, then 1.0 MB will be chosen.
    *
    * @group expertSetParam
    */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index a43ad466a7c80..057196dd67a52 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -50,7 +50,7 @@ import org.apache.spark.util.VersionUtils
 private[classification] trait LogisticRegressionParams extends ProbabilisticClassifierParams
   with HasRegParam with HasElasticNetParam with HasMaxIter with HasFitIntercept with HasTol
   with HasStandardization with HasWeightCol with HasThreshold with HasAggregationDepth
-  with HasBlockSize {
+  with HasMaxBlockSizeInMB {
 
   import org.apache.spark.ml.classification.LogisticRegression.supportedFamilyNames
 
@@ -245,7 +245,7 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
 
   setDefault(regParam -> 0.0, elasticNetParam -> 0.0, maxIter -> 100, tol -> 1E-6,
     fitIntercept -> true, family -> "auto", standardization -> true, threshold -> 0.5,
-    aggregationDepth -> 2, blockSize -> 1)
+    aggregationDepth -> 2, maxBlockSizeInMB -> 0.0)
 
   protected def usingBoundConstrainedOptimization: Boolean = {
     isSet(lowerBoundsOnCoefficients) || isSet(upperBoundsOnCoefficients) ||
@@ -276,6 +276,10 @@ private[classification] trait LogisticRegressionParams extends ProbabilisticClas
  *
  * This class supports fitting traditional logistic regression model by LBFGS/OWLQN and
  * bound (box) constrained logistic regression model by LBFGSB.
+ *
+ * Since 3.1.0, it supports stacking instances into blocks and using GEMV/GEMM for
+ * better performance.
+ * The block size will be 1.0 MB, if param maxBlockSizeInMB is set 0.0 by default.
  */
 @Since("1.2.0")
 class LogisticRegression @Since("1.2.0") (
@@ -426,22 +430,13 @@ class LogisticRegression @Since("1.2.0") (
   def setUpperBoundsOnIntercepts(value: Vector): this.type = set(upperBoundsOnIntercepts, value)
 
   /**
-   * Set block size for stacking input data in matrices.
-   * If blockSize == 1, then stacking will be skipped, and each vector is treated individually;
-   * If blockSize &gt; 1, then vectors will be stacked to blocks, and high-level BLAS routines
-   * will be used if possible (for example, GEMV instead of DOT, GEMM instead of GEMV).
-   * Recommended size is between 10 and 1000. An appropriate choice of the block size depends
-   * on the sparsity and dim of input datasets, the underlying BLAS implementation (for example,
-   * f2jBLAS, OpenBLAS, intel MKL) and its configuration (for example, number of threads).
-   * Note that existing BLAS implementations are mainly optimized for dense matrices, if the
-   * input dataset is sparse, stacking may bring no performance gain, the worse is possible
-   * performance regression.
-   * Default is 1.
+   * Sets the value of param [[maxBlockSizeInMB]].
+   * Default is 0.0, then 1.0 MB will be chosen.
    *
    * @group expertSetParam
    */
   @Since("3.1.0")
-  def setBlockSize(value: Int): this.type = set(blockSize, value)
+  def setMaxBlockSizeInMB(value: Double): this.type = set(maxBlockSizeInMB, value)
 
   private def assertBoundConstrainedOptimizationParamsValid(
       numCoefficientSets: Int,
@@ -495,31 +490,24 @@ class LogisticRegression @Since("1.2.0") (
     this
   }
 
-  override protected[spark] def train(dataset: Dataset[_]): LogisticRegressionModel = {
-    val handlePersistence = dataset.storageLevel == StorageLevel.NONE
-    train(dataset, handlePersistence)
-  }
-
   protected[spark] def train(
-      dataset: Dataset[_],
-      handlePersistence: Boolean): LogisticRegressionModel = instrumented { instr =>
+      dataset: Dataset[_]): LogisticRegressionModel = instrumented { instr =>
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
     instr.logParams(this, labelCol, weightCol, featuresCol, predictionCol, rawPredictionCol,
       probabilityCol, regParam, elasticNetParam, standardization, threshold, thresholds, maxIter,
-      tol, fitIntercept, blockSize)
+      tol, fitIntercept, maxBlockSizeInMB)
+
+    if (dataset.storageLevel != StorageLevel.NONE) {
+      instr.logWarning(s"Input instances will be standardized, blockified to blocks, and " +
+        s"then cached during training. Be careful of double caching!")
+    }
 
     val instances = extractInstances(dataset)
       .setName("training instances")
 
-    if (handlePersistence && $(blockSize) == 1) {
-      instances.persist(StorageLevel.MEMORY_AND_DISK)
-    }
-
-    var requestedMetrics = Seq("mean", "std", "count")
-    if ($(blockSize) != 1) requestedMetrics +:= "numNonZeros"
     val (summarizer, labelSummarizer) = Summarizer
-      .getClassificationSummarizers(instances, $(aggregationDepth), requestedMetrics)
+      .getClassificationSummarizers(instances, $(aggregationDepth), Seq("mean", "std", "count"))
 
     val numFeatures = summarizer.mean.size
     val histogram = labelSummarizer.histogram
@@ -547,14 +535,13 @@ class LogisticRegression @Since("1.2.0") (
     instr.logNamedValue("lowestLabelWeight", labelSummarizer.histogram.min.toString)
     instr.logNamedValue("highestLabelWeight", labelSummarizer.histogram.max.toString)
     instr.logSumOfWeights(summarizer.weightSum)
-    if ($(blockSize) > 1) {
-      val scale = 1.0 / summarizer.count / numFeatures
-      val sparsity = 1 - summarizer.numNonzeros.toArray.map(_ * scale).sum
-      instr.logNamedValue("sparsity", sparsity.toString)
-      if (sparsity > 0.5) {
-        instr.logWarning(s"sparsity of input dataset is $sparsity, " +
-          s"which may hurt performance in high-level BLAS.")
-      }
+
+    var actualBlockSizeInMB = $(maxBlockSizeInMB)
+    if (actualBlockSizeInMB == 0) {
+      // TODO: for Multinomial logistic regression, take numClasses into account
+      actualBlockSizeInMB = InstanceBlock.DefaultBlockSizeInMB
+      require(actualBlockSizeInMB > 0, "inferred actual BlockSizeInMB must > 0")
+      instr.logNamedValue("actualBlockSizeInMB", actualBlockSizeInMB.toString)
     }
 
     val isMultinomial = checkMultinomial(numClasses)
@@ -584,7 +571,6 @@ class LogisticRegression @Since("1.2.0") (
       } else {
         Vectors.dense(if (numClasses == 2) Double.PositiveInfinity else Double.NegativeInfinity)
       }
-      if (instances.getStorageLevel != StorageLevel.NONE) instances.unpersist()
       return createModel(dataset, numClasses, coefMatrix, interceptVec, Array(0.0))
     }
 
@@ -636,14 +622,9 @@ class LogisticRegression @Since("1.2.0") (
        Note that the intercept in scaled space and original space is the same;
        as a result, no scaling is needed.
      */
-    val (allCoefficients, objectiveHistory) = if ($(blockSize) == 1) {
-      trainOnRows(instances, featuresStd, numClasses, initialCoefWithInterceptMatrix,
-        regularization, optimizer)
-    } else {
-      trainOnBlocks(instances, featuresStd, numClasses, initialCoefWithInterceptMatrix,
-        regularization, optimizer)
-    }
-    if (instances.getStorageLevel != StorageLevel.NONE) instances.unpersist()
+    val (allCoefficients, objectiveHistory) =
+      trainImpl(instances, actualBlockSizeInMB, featuresStd, numClasses,
+        initialCoefWithInterceptMatrix, regularization, optimizer)
 
     if (allCoefficients == null) {
       val msg = s"${optimizer.getClass.getName} failed."
@@ -949,40 +930,9 @@ class LogisticRegression @Since("1.2.0") (
     initialCoefWithInterceptMatrix
   }
 
-  private def trainOnRows(
-      instances: RDD[Instance],
-      featuresStd: Array[Double],
-      numClasses: Int,
-      initialCoefWithInterceptMatrix: Matrix,
-      regularization: Option[L2Regularization],
-      optimizer: FirstOrderMinimizer[BDV[Double], DiffFunction[BDV[Double]]]) = {
-    val bcFeaturesStd = instances.context.broadcast(featuresStd)
-    val getAggregatorFunc = new LogisticAggregator(bcFeaturesStd, numClasses, $(fitIntercept),
-      checkMultinomial(numClasses))(_)
-
-    val costFun = new RDDLossFunction(instances, getAggregatorFunc,
-      regularization, $(aggregationDepth))
-    val states = optimizer.iterations(new CachedDiffFunction(costFun),
-      new BDV[Double](initialCoefWithInterceptMatrix.toArray))
-
-    /*
-       Note that in Logistic Regression, the objective history (loss + regularization)
-       is log-likelihood which is invariant under feature standardization. As a result,
-       the objective history from optimizer is the same as the one in the original space.
-     */
-    val arrayBuilder = mutable.ArrayBuilder.make[Double]
-    var state: optimizer.State = null
-    while (states.hasNext) {
-      state = states.next()
-      arrayBuilder += state.adjustedValue
-    }
-    bcFeaturesStd.destroy()
-
-    (if (state == null) null else state.x.toArray, arrayBuilder.result)
-  }
-
-  private def trainOnBlocks(
+  private def trainImpl(
       instances: RDD[Instance],
+      actualBlockSizeInMB: Double,
       featuresStd: Array[Double],
       numClasses: Int,
       initialCoefWithInterceptMatrix: Matrix,
@@ -996,9 +946,11 @@ class LogisticRegression @Since("1.2.0") (
       val func = StandardScalerModel.getTransformFunc(Array.empty, inverseStd, false, true)
       iter.map { case Instance(label, weight, vec) => Instance(label, weight, func(vec)) }
     }
-    val blocks = InstanceBlock.blokify(standardized, $(blockSize))
+
+    val maxMemUsage = (actualBlockSizeInMB * 1024L * 1024L).ceil.toLong
+    val blocks = InstanceBlock.blokifyWithMaxMemUsage(standardized, maxMemUsage)
       .persist(StorageLevel.MEMORY_AND_DISK)
-      .setName(s"training blocks (blockSize=${$(blockSize)})")
+      .setName(s"training blocks (blockSizeInMB=$actualBlockSizeInMB)")
 
     val getAggregatorFunc = new BlockLogisticAggregator(numFeatures, numClasses, $(fitIntercept),
       checkMultinomial(numClasses))(_)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/AFTAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/AFTAggregator.scala
index 8a5d7fe34e7a0..fd59b4b71c41b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/AFTAggregator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/AFTAggregator.scala
@@ -18,8 +18,8 @@
 package org.apache.spark.ml.optim.aggregator
 
 import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.ml.feature._
 import org.apache.spark.ml.linalg._
-import org.apache.spark.ml.regression.AFTPoint
 
 /**
  * AFTAggregator computes the gradient and loss for a AFT loss function,
@@ -108,7 +108,7 @@ import org.apache.spark.ml.regression.AFTPoint
 private[ml] class AFTAggregator(
     bcFeaturesStd: Broadcast[Array[Double]],
     fitIntercept: Boolean)(bcCoefficients: Broadcast[Vector])
-  extends DifferentiableLossAggregator[AFTPoint, AFTAggregator] {
+  extends DifferentiableLossAggregator[Instance, AFTAggregator] {
 
   protected override val dim: Int = bcCoefficients.value.size
 
@@ -116,10 +116,10 @@ private[ml] class AFTAggregator(
    * Add a new training data to this AFTAggregator, and update the loss and gradient
    * of the objective function.
    *
-   * @param data The AFTPoint representation for one data point to be added into this aggregator.
+   * @param data The Instance representation for one data point to be added into this aggregator.
    * @return This AFTAggregator object.
    */
-  def add(data: AFTPoint): this.type = {
+  def add(data: Instance): this.type = {
     val coefficients = bcCoefficients.value.toArray
     val intercept = coefficients(dim - 2)
     // sigma is the scale parameter of the AFT model
@@ -127,7 +127,7 @@ private[ml] class AFTAggregator(
 
     val xi = data.features
     val ti = data.label
-    val delta = data.censor
+    val delta = data.weight
 
     require(ti > 0.0, "The lifetime or label should be  greater than 0.")
 
@@ -176,7 +176,7 @@ private[ml] class AFTAggregator(
  */
 private[ml] class BlockAFTAggregator(
     fitIntercept: Boolean)(bcCoefficients: Broadcast[Vector])
-  extends DifferentiableLossAggregator[(Matrix, Array[Double], Array[Double]),
+  extends DifferentiableLossAggregator[InstanceBlock,
     BlockAFTAggregator] {
 
   protected override val dim: Int = bcCoefficients.value.size
@@ -196,16 +196,13 @@ private[ml] class BlockAFTAggregator(
    *
    * @return This BlockAFTAggregator object.
    */
-  def add(block: (Matrix, Array[Double], Array[Double])): this.type = {
-    val (matrix, labels, censors) = block
-    require(matrix.isTransposed)
-    require(numFeatures == matrix.numCols, s"Dimensions mismatch when adding new " +
-      s"instance. Expecting $numFeatures but got ${matrix.numCols}.")
-    require(labels.forall(_ > 0.0), "The lifetime or label should be  greater than 0.")
-
-    val size = matrix.numRows
-    require(labels.length == size && censors.length == size)
+  def add(block: InstanceBlock): this.type = {
+    require(block.matrix.isTransposed)
+    require(numFeatures == block.numFeatures, s"Dimensions mismatch when adding new " +
+      s"instance. Expecting $numFeatures but got ${block.numFeatures}.")
+    require(block.labels.forall(_ > 0.0), "The lifetime or label should be  greater than 0.")
 
+    val size = block.size
     val intercept = coefficientsArray(dim - 2)
     // sigma is the scale parameter of the AFT model
     val sigma = math.exp(coefficientsArray(dim - 1))
@@ -216,26 +213,30 @@ private[ml] class BlockAFTAggregator(
     } else {
       Vectors.zeros(size).toDense
     }
-    BLAS.gemv(1.0, matrix, linear, 1.0, vec)
+    BLAS.gemv(1.0, block.matrix, linear, 1.0, vec)
 
     // in-place convert margins to gradient scales
     // then, vec represents gradient scales
+    var localLossSum = 0.0
     var i = 0
     var sigmaGradSum = 0.0
     while (i < size) {
-      val ti = labels(i)
-      val delta = censors(i)
+      val ti = block.getLabel(i)
+      // here use Instance.weight to store censor for convenience
+      val delta = block.getWeight(i)
       val margin = vec(i)
       val epsilon = (math.log(ti) - margin) / sigma
       val expEpsilon = math.exp(epsilon)
-      lossSum += delta * math.log(sigma) - delta * epsilon + expEpsilon
+      localLossSum += delta * math.log(sigma) - delta * epsilon + expEpsilon
       val multiplier = (delta - expEpsilon) / sigma
       vec.values(i) = multiplier
       sigmaGradSum += delta + multiplier * sigma * epsilon
       i += 1
     }
+    lossSum += localLossSum
+    weightSum += size
 
-    matrix match {
+    block.matrix match {
       case dm: DenseMatrix =>
         BLAS.nativeBLAS.dgemv("N", dm.numCols, dm.numRows, 1.0, dm.values, dm.numCols,
           vec.values, 1, 1.0, gradientSumArray, 1)
@@ -249,7 +250,6 @@ private[ml] class BlockAFTAggregator(
 
     if (fitIntercept) gradientSumArray(dim - 2) += vec.values.sum
     gradientSumArray(dim - 1) += sigmaGradSum
-    weightSum += size
 
     this
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeAggregator.scala
index b1990f7c60f64..3d72512563154 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeAggregator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeAggregator.scala
@@ -162,24 +162,26 @@ private[ml] class BlockHingeAggregator(
 
     // in-place convert dotProducts to gradient scales
     // then, vec represents gradient scales
+    var localLossSum = 0.0
     var i = 0
     while (i < size) {
       val weight = block.getWeight(i)
       if (weight > 0) {
-        weightSum += weight
         // Our loss function with {0, 1} labels is max(0, 1 - (2y - 1) (f_w(x)))
         // Therefore the gradient is -(2y - 1)*x
         val label = block.getLabel(i)
         val labelScaled = label + label - 1.0
         val loss = (1.0 - labelScaled * vec(i)) * weight
         if (loss > 0) {
-          lossSum += loss
+          localLossSum += loss
           val gradScale = -labelScaled * weight
           vec.values(i) = gradScale
         } else { vec.values(i) = 0.0 }
       } else { vec.values(i) = 0.0 }
       i += 1
     }
+    lossSum += localLossSum
+    weightSum += block.weightIter.sum
 
     // predictions are all correct, no gradient signal
     if (vec.values.forall(_ == 0)) return this
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HuberAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HuberAggregator.scala
index 59ecc038e5569..35582dbc990e6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HuberAggregator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HuberAggregator.scala
@@ -167,7 +167,6 @@ private[ml] class BlockHuberAggregator(
 
   protected override val dim: Int = bcParameters.value.size
   private val numFeatures = if (fitIntercept) dim - 2 else dim - 1
-  private val sigma = bcParameters.value(dim - 1)
   private val intercept = if (fitIntercept) bcParameters.value(dim - 2) else 0.0
   // make transient so we do not serialize between aggregation stages
   @transient private lazy val linear = Vectors.dense(bcParameters.value.toArray.take(numFeatures))
@@ -187,7 +186,9 @@ private[ml] class BlockHuberAggregator(
       s"instance weights ${block.weightIter.mkString("[", ",", "]")} has to be >= 0.0")
 
     if (block.weightIter.forall(_ == 0)) return this
+
     val size = block.size
+    val sigma = bcParameters.value(dim - 1)
 
     // vec here represents margins or dotProducts
     val vec = if (fitIntercept) {
@@ -200,23 +201,23 @@ private[ml] class BlockHuberAggregator(
     // in-place convert margins to multipliers
     // then, vec represents multipliers
     var sigmaGradSum = 0.0
+    var localLossSum = 0.0
     var i = 0
     while (i < size) {
       val weight = block.getWeight(i)
       if (weight > 0) {
-        weightSum += weight
         val label = block.getLabel(i)
         val margin = vec(i)
         val linearLoss = label - margin
 
         if (math.abs(linearLoss) <= sigma * epsilon) {
-          lossSum += 0.5 * weight * (sigma + math.pow(linearLoss, 2.0) / sigma)
+          localLossSum += 0.5 * weight * (sigma + math.pow(linearLoss, 2.0) / sigma)
           val linearLossDivSigma = linearLoss / sigma
           val multiplier = -1.0 * weight * linearLossDivSigma
           vec.values(i) = multiplier
           sigmaGradSum += 0.5 * weight * (1.0 - math.pow(linearLossDivSigma, 2.0))
         } else {
-          lossSum += 0.5 * weight *
+          localLossSum += 0.5 * weight *
             (sigma + 2.0 * epsilon * math.abs(linearLoss) - sigma * epsilon * epsilon)
           val sign = if (linearLoss >= 0) -1.0 else 1.0
           val multiplier = weight * sign * epsilon
@@ -226,6 +227,8 @@ private[ml] class BlockHuberAggregator(
       } else { vec.values(i) = 0.0 }
       i += 1
     }
+    lossSum += localLossSum
+    weightSum += block.weightIter.sum
 
     block.matrix match {
       case dm: DenseMatrix =>
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LeastSquaresAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LeastSquaresAggregator.scala
index fa3bda00d802d..d5e1ea980840b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LeastSquaresAggregator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LeastSquaresAggregator.scala
@@ -267,9 +267,6 @@ private[ml] class BlockLeastSquaresAggregator(
     val offset = if (fitIntercept) labelMean / labelStd - sum else 0.0
     (Vectors.dense(coefficientsArray), offset)
   }
-  // do not use tuple assignment above because it will circumvent the @transient tag
-  @transient private lazy val effectiveCoefficientsVec = effectiveCoefAndOffset._1
-  @transient private lazy val offset = effectiveCoefAndOffset._2
 
   /**
    * Add a new training instance block to this BlockLeastSquaresAggregator, and update the loss
@@ -286,7 +283,9 @@ private[ml] class BlockLeastSquaresAggregator(
       s"instance weights ${block.weightIter.mkString("[", ",", "]")} has to be >= 0.0")
 
     if (block.weightIter.forall(_ == 0)) return this
+
     val size = block.size
+    val (effectiveCoefficientsVec, offset) = effectiveCoefAndOffset
 
     // vec here represents diffs
     val vec = new DenseVector(Array.tabulate(size)(i => offset - block.getLabel(i) / labelStd))
@@ -294,16 +293,18 @@ private[ml] class BlockLeastSquaresAggregator(
 
     // in-place convert diffs to multipliers
     // then, vec represents multipliers
+    var localLossSum = 0.0
     var i = 0
     while (i < size) {
       val weight = block.getWeight(i)
       val diff = vec(i)
-      lossSum += weight * diff * diff / 2
-      weightSum += weight
+      localLossSum += weight * diff * diff / 2
       val multiplier = weight * diff
       vec.values(i) = multiplier
       i += 1
     }
+    lossSum += localLossSum
+    weightSum += block.weightIter.sum
 
     val gradSumVec = new DenseVector(gradientSumArray)
     BLAS.gemv(1.0, block.matrix.transpose, vec, 1.0, gradSumVec)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregator.scala
index a331122776b5c..2496c789f8da6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregator.scala
@@ -466,24 +466,26 @@ private[ml] class BlockLogisticAggregator(
 
     // in-place convert margins to multiplier
     // then, vec represents multiplier
+    var localLossSum = 0.0
     var i = 0
     while (i < size) {
       val weight = block.getWeight(i)
       if (weight > 0) {
-        weightSum += weight
         val label = block.getLabel(i)
         val margin = vec(i)
         if (label > 0) {
           // The following is equivalent to log(1 + exp(margin)) but more numerically stable.
-          lossSum += weight * Utils.log1pExp(margin)
+          localLossSum += weight * Utils.log1pExp(margin)
         } else {
-          lossSum += weight * (Utils.log1pExp(margin) - margin)
+          localLossSum += weight * (Utils.log1pExp(margin) - margin)
         }
         val multiplier = weight * (1.0 / (1.0 + math.exp(margin)) - label)
         vec.values(i) = multiplier
       } else { vec.values(i) = 0.0 }
       i += 1
     }
+    lossSum += localLossSum
+    weightSum += block.weightIter.sum
 
     // predictions are all correct, no gradient signal
     if (vec.values.forall(_ == 0)) return
@@ -514,10 +516,11 @@ private[ml] class BlockLogisticAggregator(
     // mat here represents margins, shape: S X C
     val mat = DenseMatrix.zeros(size, numClasses)
     if (fitIntercept) {
+      val localCoefficientsArray = coefficientsArray
       val offset = numClasses * numFeatures
       var j = 0
       while (j < numClasses) {
-        val intercept = coefficientsArray(offset + j)
+        val intercept = localCoefficientsArray(offset + j)
         var i = 0
         while (i < size) { mat.update(i, j, intercept); i += 1 }
         j += 1
@@ -527,13 +530,13 @@ private[ml] class BlockLogisticAggregator(
 
     // in-place convert margins to multipliers
     // then, mat represents multipliers
+    var localLossSum = 0.0
     var i = 0
     val tmp = Array.ofDim[Double](numClasses)
     val interceptGradSumArr = if (fitIntercept) Array.ofDim[Double](numClasses) else null
     while (i < size) {
       val weight = block.getWeight(i)
       if (weight > 0) {
-        weightSum += weight
         val label = block.getLabel(i)
 
         var maxMargin = Double.NegativeInfinity
@@ -566,15 +569,17 @@ private[ml] class BlockLogisticAggregator(
         }
 
         if (maxMargin > 0) {
-          lossSum += weight * (math.log(sum) - marginOfLabel + maxMargin)
+          localLossSum += weight * (math.log(sum) - marginOfLabel + maxMargin)
         } else {
-          lossSum += weight * (math.log(sum) - marginOfLabel)
+          localLossSum += weight * (math.log(sum) - marginOfLabel)
         }
       } else {
         var j = 0; while (j < numClasses) { mat.update(i, j, 0.0); j += 1 }
       }
       i += 1
     }
+    lossSum += localLossSum
+    weightSum += block.weightIter.sum
 
     // mat (multipliers):             S X C, dense                                N
     // mat.transpose (multipliers):   C X S, dense                                T
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
index 0640fe355fdd6..2f6b9c1e11aac 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
@@ -111,8 +111,8 @@ private[shared] object SharedParamsCodeGen {
         isValid = "ParamValidators.gt(0)", isExpertParam = true),
       ParamDesc[Double]("maxBlockSizeInMB", "Maximum memory in MB for stacking input data " +
         "into blocks. Data is stacked within partitions. If more than remaining data size in a " +
-        "partition then it is adjusted to the data size. If 0, try to infer an appropriate " +
-        "value. Must be >= 0.",
+        "partition then it is adjusted to the data size. Default 0.0 represents choosing " +
+        "optimal value, depends on specific algorithm. Must be >= 0.",
         Some("0.0"), isValid = "ParamValidators.gtEq(0.0)", isExpertParam = true)
     )
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
index 2fbda45a9e97a..425bf91fd00ba 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
@@ -570,10 +570,10 @@ trait HasBlockSize extends Params {
 trait HasMaxBlockSizeInMB extends Params {
 
   /**
-   * Param for Maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value. Must be &gt;= 0..
+   * Param for Maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. Default 0.0 represents choosing optimal value, depends on specific algorithm. Must be &gt;= 0..
    * @group expertParam
    */
-  final val maxBlockSizeInMB: DoubleParam = new DoubleParam(this, "maxBlockSizeInMB", "Maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value. Must be >= 0.", ParamValidators.gtEq(0.0))
+  final val maxBlockSizeInMB: DoubleParam = new DoubleParam(this, "maxBlockSizeInMB", "Maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. Default 0.0 represents choosing optimal value, depends on specific algorithm. Must be >= 0.", ParamValidators.gtEq(0.0))
 
   setDefault(maxBlockSizeInMB, 0.0)
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
index 3870a71a91a20..4d214dc74ed8b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
@@ -27,7 +27,7 @@ import org.apache.spark.SparkException
 import org.apache.spark.annotation.Since
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml.PredictorParams
-import org.apache.spark.ml.feature.StandardScalerModel
+import org.apache.spark.ml.feature._
 import org.apache.spark.ml.linalg._
 import org.apache.spark.ml.optim.aggregator._
 import org.apache.spark.ml.optim.loss.RDDLossFunction
@@ -47,8 +47,8 @@ import org.apache.spark.storage.StorageLevel
  * Params for accelerated failure time (AFT) regression.
  */
 private[regression] trait AFTSurvivalRegressionParams extends PredictorParams
-  with HasMaxIter with HasTol with HasFitIntercept with HasAggregationDepth with HasBlockSize
-  with Logging {
+  with HasMaxIter with HasTol with HasFitIntercept with HasAggregationDepth
+  with HasMaxBlockSizeInMB with Logging {
 
   /**
    * Param for censor column name.
@@ -92,7 +92,8 @@ private[regression] trait AFTSurvivalRegressionParams extends PredictorParams
 
   setDefault(censorCol -> "censor",
     quantileProbabilities -> Array(0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99),
-    fitIntercept -> true, maxIter -> 100, tol -> 1E-6, aggregationDepth -> 2, blockSize -> 1)
+    fitIntercept -> true, maxIter -> 100, tol -> 1E-6, aggregationDepth -> 2,
+    maxBlockSizeInMB -> 0.0)
 
   /** Checks whether the input has quantiles column name. */
   private[regression] def hasQuantilesCol: Boolean = {
@@ -127,6 +128,10 @@ private[regression] trait AFTSurvivalRegressionParams extends PredictorParams
  * (see <a href="https://en.wikipedia.org/wiki/Accelerated_failure_time_model">
  * Accelerated failure time model (Wikipedia)</a>)
  * based on the Weibull distribution of the survival time.
+ *
+ * Since 3.1.0, it supports stacking instances into blocks and using GEMV for
+ * better performance.
+ * The block size will be 1.0 MB, if param maxBlockSizeInMB is set 0.0 by default.
  */
 @Since("1.6.0")
 class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: String)
@@ -184,55 +189,39 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
   def setAggregationDepth(value: Int): this.type = set(aggregationDepth, value)
 
   /**
-   * Set block size for stacking input data in matrices.
-   * If blockSize == 1, then stacking will be skipped, and each vector is treated individually;
-   * If blockSize &gt; 1, then vectors will be stacked to blocks, and high-level BLAS routines
-   * will be used if possible (for example, GEMV instead of DOT, GEMM instead of GEMV).
-   * Recommended size is between 10 and 1000. An appropriate choice of the block size depends
-   * on the sparsity and dim of input datasets, the underlying BLAS implementation (for example,
-   * f2jBLAS, OpenBLAS, intel MKL) and its configuration (for example, number of threads).
-   * Note that existing BLAS implementations are mainly optimized for dense matrices, if the
-   * input dataset is sparse, stacking may bring no performance gain, the worse is possible
-   * performance regression.
-   * Default is 1.
+   * Sets the value of param [[maxBlockSizeInMB]].
+   * Default is 0.0, then 1.0 MB will be chosen.
    *
    * @group expertSetParam
    */
   @Since("3.1.0")
-  def setBlockSize(value: Int): this.type = set(blockSize, value)
-
-  /**
-   * Extract [[featuresCol]], [[labelCol]] and [[censorCol]] from input dataset,
-   * and put it in an RDD with strong types.
-   */
-  protected[ml] def extractAFTPoints(dataset: Dataset[_]): RDD[AFTPoint] = {
-    dataset.select(col($(featuresCol)), col($(labelCol)).cast(DoubleType),
-      col($(censorCol)).cast(DoubleType)).rdd.map {
-        case Row(features: Vector, label: Double, censor: Double) =>
-          AFTPoint(features, label, censor)
-      }
-  }
+  def setMaxBlockSizeInMB(value: Double): this.type = set(maxBlockSizeInMB, value)
 
   override protected def train(
       dataset: Dataset[_]): AFTSurvivalRegressionModel = instrumented { instr =>
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
     instr.logParams(this, labelCol, featuresCol, censorCol, predictionCol, quantilesCol,
-      fitIntercept, maxIter, tol, aggregationDepth, blockSize)
+      fitIntercept, maxIter, tol, aggregationDepth, maxBlockSizeInMB)
     instr.logNamedValue("quantileProbabilities.size", $(quantileProbabilities).length)
 
-    val instances = extractAFTPoints(dataset)
-      .setName("training instances")
-
-    if ($(blockSize) == 1 && dataset.storageLevel == StorageLevel.NONE) {
-      instances.persist(StorageLevel.MEMORY_AND_DISK)
+    if (dataset.storageLevel != StorageLevel.NONE) {
+      instr.logWarning(s"Input instances will be standardized, blockified to blocks, and " +
+        s"then cached during training. Be careful of double caching!")
     }
 
-    var requestedMetrics = Seq("mean", "std", "count")
-    if ($(blockSize) != 1) requestedMetrics +:= "numNonZeros"
+    val instances = dataset.select(col($(featuresCol)), col($(labelCol)).cast(DoubleType),
+      col($(censorCol)).cast(DoubleType))
+      .rdd.map { case Row(features: Vector, label: Double, censor: Double) =>
+        require(censor == 1.0 || censor == 0.0, "censor must be 1.0 or 0.0")
+        // AFT does not support instance weighting,
+        // here use Instance.weight to store censor for convenience
+        Instance(label, censor, features)
+      }.setName("training instances")
+
     val summarizer = instances.treeAggregate(
-      Summarizer.createSummarizerBuffer(requestedMetrics: _*))(
-      seqOp = (c: SummarizerBuffer, v: AFTPoint) => c.add(v.features),
+      Summarizer.createSummarizerBuffer("mean", "std", "count"))(
+      seqOp = (c: SummarizerBuffer, i: Instance) => c.add(i.features),
       combOp = (c1: SummarizerBuffer, c2: SummarizerBuffer) => c1.merge(c2),
       depth = $(aggregationDepth)
     )
@@ -241,14 +230,12 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
     val numFeatures = featuresStd.length
     instr.logNumFeatures(numFeatures)
     instr.logNumExamples(summarizer.count)
-    if ($(blockSize) > 1) {
-      val scale = 1.0 / summarizer.count / numFeatures
-      val sparsity = 1 - summarizer.numNonzeros.toArray.map(_ * scale).sum
-      instr.logNamedValue("sparsity", sparsity.toString)
-      if (sparsity > 0.5) {
-        instr.logWarning(s"sparsity of input dataset is $sparsity, " +
-          s"which may hurt performance in high-level BLAS.")
-      }
+
+    var actualBlockSizeInMB = $(maxBlockSizeInMB)
+    if (actualBlockSizeInMB == 0) {
+      actualBlockSizeInMB = InstanceBlock.DefaultBlockSizeInMB
+      require(actualBlockSizeInMB > 0, "inferred actual BlockSizeInMB must > 0")
+      instr.logNamedValue("actualBlockSizeInMB", actualBlockSizeInMB.toString)
     }
 
     if (!$(fitIntercept) && (0 until numFeatures).exists { i =>
@@ -268,12 +255,8 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
      */
     val initialParameters = Vectors.zeros(numFeatures + 2)
 
-    val (rawCoefficients, objectiveHistory) = if ($(blockSize) == 1) {
-      trainOnRows(instances, featuresStd, optimizer, initialParameters)
-    } else {
-      trainOnBlocks(instances, featuresStd, optimizer, initialParameters)
-    }
-    if (instances.getStorageLevel != StorageLevel.NONE) instances.unpersist()
+    val (rawCoefficients, objectiveHistory) =
+      trainImpl(instances, actualBlockSizeInMB, featuresStd, optimizer, initialParameters)
 
     if (rawCoefficients == null) {
       val msg = s"${optimizer.getClass.getName} failed."
@@ -290,47 +273,24 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
     new AFTSurvivalRegressionModel(uid, coefficients, intercept, scale)
   }
 
-  private def trainOnRows(
-      instances: RDD[AFTPoint],
+  private def trainImpl(
+      instances: RDD[Instance],
+      actualBlockSizeInMB: Double,
       featuresStd: Array[Double],
       optimizer: BreezeLBFGS[BDV[Double]],
       initialParameters: Vector): (Array[Double], Array[Double]) = {
     val bcFeaturesStd = instances.context.broadcast(featuresStd)
-    val getAggregatorFunc = new AFTAggregator(bcFeaturesStd, $(fitIntercept))(_)
-    val costFun = new RDDLossFunction(instances, getAggregatorFunc, None, $(aggregationDepth))
-
-    val states = optimizer.iterations(new CachedDiffFunction(costFun),
-      initialParameters.asBreeze.toDenseVector)
-
-    val arrayBuilder = mutable.ArrayBuilder.make[Double]
-    var state: optimizer.State = null
-    while (states.hasNext) {
-      state = states.next()
-      arrayBuilder += state.adjustedValue
-    }
-    bcFeaturesStd.destroy()
-
-    (if (state != null) state.x.toArray else null, arrayBuilder.result)
-  }
 
-  private def trainOnBlocks(
-      instances: RDD[AFTPoint],
-      featuresStd: Array[Double],
-      optimizer: BreezeLBFGS[BDV[Double]],
-      initialParameters: Vector): (Array[Double], Array[Double]) = {
-    val bcFeaturesStd = instances.context.broadcast(featuresStd)
-    val blocks = instances.mapPartitions { iter =>
+    val standardized = instances.mapPartitions { iter =>
       val inverseStd = bcFeaturesStd.value.map { std => if (std != 0) 1.0 / std else 0.0 }
       val func = StandardScalerModel.getTransformFunc(Array.empty, inverseStd, false, true)
-      iter.grouped($(blockSize)).map { seq =>
-        val matrix = Matrices.fromVectors(seq.map(point => func(point.features)))
-        val labels = seq.map(_.label).toArray
-        val censors = seq.map(_.censor).toArray
-        (matrix, labels, censors)
-      }
+      iter.map { case Instance(label, weight, vec) => Instance(label, weight, func(vec)) }
     }
-    blocks.persist(StorageLevel.MEMORY_AND_DISK)
-      .setName(s"training blocks (blockSize=${$(blockSize)})")
+
+    val maxMemUsage = (actualBlockSizeInMB * 1024L * 1024L).ceil.toLong
+    val blocks = InstanceBlock.blokifyWithMaxMemUsage(standardized, maxMemUsage)
+      .persist(StorageLevel.MEMORY_AND_DISK)
+      .setName(s"training blocks (blockSizeInMB=$actualBlockSizeInMB)")
 
     val getAggregatorFunc = new BlockAFTAggregator($(fitIntercept))(_)
     val costFun = new RDDLossFunction(blocks, getAggregatorFunc, None, $(aggregationDepth))
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 235a7f9b6ebd5..11a1984b0ab4c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -56,7 +56,7 @@ import org.apache.spark.util.VersionUtils.majorMinorVersion
 private[regression] trait LinearRegressionParams extends PredictorParams
     with HasRegParam with HasElasticNetParam with HasMaxIter with HasTol
     with HasFitIntercept with HasStandardization with HasWeightCol with HasSolver
-    with HasAggregationDepth with HasLoss with HasBlockSize {
+    with HasAggregationDepth with HasLoss with HasMaxBlockSizeInMB {
 
   import LinearRegression._
 
@@ -107,7 +107,7 @@ private[regression] trait LinearRegressionParams extends PredictorParams
 
   setDefault(regParam -> 0.0, fitIntercept -> true, standardization -> true,
     elasticNetParam -> 0.0, maxIter -> 100, tol -> 1E-6, solver -> Auto,
-    aggregationDepth -> 2, loss -> SquaredError, epsilon -> 1.35, blockSize -> 1)
+    aggregationDepth -> 2, loss -> SquaredError, epsilon -> 1.35, maxBlockSizeInMB -> 0.0)
 
   override protected def validateAndTransformSchema(
       schema: StructType,
@@ -175,6 +175,10 @@ private[regression] trait LinearRegressionParams extends PredictorParams
  *   $$
  * </blockquote>
  *
+ * Since 3.1.0, it supports stacking instances into blocks and using GEMV for
+ * better performance.
+ * The block size will be 1.0 MB, if param maxBlockSizeInMB is set 0.0 by default.
+ *
  * Note: Fitting with huber loss only supports none and L2 regularization.
  */
 @Since("1.3.0")
@@ -312,29 +316,26 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
   def setEpsilon(value: Double): this.type = set(epsilon, value)
 
   /**
-   * Set block size for stacking input data in matrices.
-   * If blockSize == 1, then stacking will be skipped, and each vector is treated individually;
-   * If blockSize &gt; 1, then vectors will be stacked to blocks, and high-level BLAS routines
-   * will be used if possible (for example, GEMV instead of DOT, GEMM instead of GEMV).
-   * Recommended size is between 10 and 1000. An appropriate choice of the block size depends
-   * on the sparsity and dim of input datasets, the underlying BLAS implementation (for example,
-   * f2jBLAS, OpenBLAS, intel MKL) and its configuration (for example, number of threads).
-   * Note that existing BLAS implementations are mainly optimized for dense matrices, if the
-   * input dataset is sparse, stacking may bring no performance gain, the worse is possible
-   * performance regression.
-   * Default is 1.
+   * Sets the value of param [[maxBlockSizeInMB]].
+   * Default is 0.0, then 1.0 MB will be chosen.
    *
    * @group expertSetParam
    */
   @Since("3.1.0")
-  def setBlockSize(value: Int): this.type = set(blockSize, value)
+  def setMaxBlockSizeInMB(value: Double): this.type = set(maxBlockSizeInMB, value)
 
-  override protected def train(dataset: Dataset[_]): LinearRegressionModel = instrumented { instr =>
+  override protected def train(
+      dataset: Dataset[_]): LinearRegressionModel = instrumented { instr =>
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
     instr.logParams(this, labelCol, featuresCol, weightCol, predictionCol, solver, tol,
       elasticNetParam, fitIntercept, maxIter, regParam, standardization, aggregationDepth, loss,
-      epsilon, blockSize)
+      epsilon, maxBlockSizeInMB)
+
+    if (dataset.storageLevel != StorageLevel.NONE) {
+      instr.logWarning(s"Input instances will be standardized, blockified to blocks, and " +
+        s"then cached during training. Be careful of double caching!")
+    }
 
     // Extract the number of features before deciding optimization solver.
     val numFeatures = MetadataUtils.getNumFeatures(dataset, $(featuresCol))
@@ -348,35 +349,26 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
     val instances = extractInstances(dataset)
       .setName("training instances")
 
-    if (dataset.storageLevel == StorageLevel.NONE && $(blockSize) == 1) {
-      instances.persist(StorageLevel.MEMORY_AND_DISK)
-    }
+    val (summarizer, labelSummarizer) = Summarizer
+      .getRegressionSummarizers(instances, $(aggregationDepth), Seq("mean", "std", "count"))
 
-    var requestedMetrics = Seq("mean", "std", "count")
-    if ($(blockSize) != 1) requestedMetrics +:= "numNonZeros"
-    val (featuresSummarizer, ySummarizer) = Summarizer
-      .getRegressionSummarizers(instances, $(aggregationDepth), requestedMetrics)
+    val yMean = labelSummarizer.mean(0)
+    val rawYStd = labelSummarizer.std(0)
 
-    val yMean = ySummarizer.mean(0)
-    val rawYStd = ySummarizer.std(0)
-
-    instr.logNumExamples(ySummarizer.count)
+    instr.logNumExamples(labelSummarizer.count)
     instr.logNamedValue(Instrumentation.loggerTags.meanOfLabels, yMean)
     instr.logNamedValue(Instrumentation.loggerTags.varianceOfLabels, rawYStd)
-    instr.logSumOfWeights(featuresSummarizer.weightSum)
-    if ($(blockSize) > 1) {
-      val scale = 1.0 / featuresSummarizer.count / numFeatures
-      val sparsity = 1 - featuresSummarizer.numNonzeros.toArray.map(_ * scale).sum
-      instr.logNamedValue("sparsity", sparsity.toString)
-      if (sparsity > 0.5) {
-        instr.logWarning(s"sparsity of input dataset is $sparsity, " +
-          s"which may hurt performance in high-level BLAS.")
-      }
+    instr.logSumOfWeights(summarizer.weightSum)
+
+    var actualBlockSizeInMB = $(maxBlockSizeInMB)
+    if (actualBlockSizeInMB == 0) {
+      actualBlockSizeInMB = InstanceBlock.DefaultBlockSizeInMB
+      require(actualBlockSizeInMB > 0, "inferred actual BlockSizeInMB must > 0")
+      instr.logNamedValue("actualBlockSizeInMB", actualBlockSizeInMB.toString)
     }
 
     if (rawYStd == 0.0) {
       if ($(fitIntercept) || yMean == 0.0) {
-        if (instances.getStorageLevel != StorageLevel.NONE) instances.unpersist()
         return trainWithConstantLabel(dataset, instr, numFeatures, yMean)
       } else {
         require($(regParam) == 0.0, "The standard deviation of the label is zero. " +
@@ -389,8 +381,8 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
     // if y is constant (rawYStd is zero), then y cannot be scaled. In this case
     // setting yStd=abs(yMean) ensures that y is not scaled anymore in l-bfgs algorithm.
     val yStd = if (rawYStd > 0) rawYStd else math.abs(yMean)
-    val featuresMean = featuresSummarizer.mean.toArray
-    val featuresStd = featuresSummarizer.std.toArray
+    val featuresMean = summarizer.mean.toArray
+    val featuresStd = summarizer.std.toArray
 
     if (!$(fitIntercept) && (0 until numFeatures).exists { i =>
       featuresStd(i) == 0.0 && featuresMean(i) != 0.0 }) {
@@ -426,14 +418,9 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
         Vectors.dense(Array.fill(dim)(1.0))
     }
 
-    val (parameters, objectiveHistory) = if ($(blockSize) == 1) {
-      trainOnRows(instances, yMean, yStd, featuresMean, featuresStd,
-        initialValues, regularization, optimizer)
-    } else {
-      trainOnBlocks(instances, yMean, yStd, featuresMean, featuresStd,
-        initialValues, regularization, optimizer)
-    }
-    if (instances.getStorageLevel != StorageLevel.NONE) instances.unpersist()
+    val (parameters, objectiveHistory) =
+      trainImpl(instances, actualBlockSizeInMB, yMean, yStd,
+        featuresMean, featuresStd, initialValues, regularization, optimizer)
 
     if (parameters == null) {
       val msg = s"${optimizer.getClass.getName} failed."
@@ -541,56 +528,9 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
     }
   }
 
-  private def trainOnRows(
-      instances: RDD[Instance],
-      yMean: Double,
-      yStd: Double,
-      featuresMean: Array[Double],
-      featuresStd: Array[Double],
-      initialValues: Vector,
-      regularization: Option[L2Regularization],
-      optimizer: FirstOrderMinimizer[BDV[Double], DiffFunction[BDV[Double]]]) = {
-    val bcFeaturesMean = instances.context.broadcast(featuresMean)
-    val bcFeaturesStd = instances.context.broadcast(featuresStd)
-
-    val costFun = $(loss) match {
-      case SquaredError =>
-        val getAggregatorFunc = new LeastSquaresAggregator(yStd, yMean, $(fitIntercept),
-          bcFeaturesStd, bcFeaturesMean)(_)
-        new RDDLossFunction(instances, getAggregatorFunc, regularization, $(aggregationDepth))
-      case Huber =>
-        val getAggregatorFunc = new HuberAggregator($(fitIntercept), $(epsilon), bcFeaturesStd)(_)
-        new RDDLossFunction(instances, getAggregatorFunc, regularization, $(aggregationDepth))
-    }
-
-    val states = optimizer.iterations(new CachedDiffFunction(costFun),
-      initialValues.asBreeze.toDenseVector)
-
-    /*
-       Note that in Linear Regression, the objective history (loss + regularization) returned
-       from optimizer is computed in the scaled space given by the following formula.
-       <blockquote>
-          $$
-          L &= 1/2n||\sum_i w_i(x_i - \bar{x_i}) / \hat{x_i} - (y - \bar{y}) / \hat{y}||^2
-               + regTerms \\
-          $$
-       </blockquote>
-     */
-    val arrayBuilder = mutable.ArrayBuilder.make[Double]
-    var state: optimizer.State = null
-    while (states.hasNext) {
-      state = states.next()
-      arrayBuilder += state.adjustedValue
-    }
-
-    bcFeaturesMean.destroy()
-    bcFeaturesStd.destroy()
-
-    (if (state == null) null else state.x.toArray, arrayBuilder.result)
-  }
-
-  private def trainOnBlocks(
+  private def trainImpl(
       instances: RDD[Instance],
+      actualBlockSizeInMB: Double,
       yMean: Double,
       yStd: Double,
       featuresMean: Array[Double],
@@ -606,9 +546,11 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
       val func = StandardScalerModel.getTransformFunc(Array.empty, inverseStd, false, true)
       iter.map { case Instance(label, weight, vec) => Instance(label, weight, func(vec)) }
     }
-    val blocks = InstanceBlock.blokify(standardized, $(blockSize))
+
+    val maxMemUsage = (actualBlockSizeInMB * 1024L * 1024L).ceil.toLong
+    val blocks = InstanceBlock.blokifyWithMaxMemUsage(standardized, maxMemUsage)
       .persist(StorageLevel.MEMORY_AND_DISK)
-      .setName(s"training blocks (blockSize=${$(blockSize)})")
+      .setName(s"training blocks (blockSizeInMB=$actualBlockSizeInMB)")
 
     val costFun = $(loss) match {
       case SquaredError =>
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index 21eb17dfaacb3..f88f3fce61b33 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -339,10 +339,8 @@ class LogisticRegressionWithLBFGS
         // Convert our input into a DataFrame
         val spark = SparkSession.builder().sparkContext(input.context).getOrCreate()
         val df = spark.createDataFrame(input.map(_.asML))
-        // Determine if we should cache the DF
-        val handlePersistence = input.getStorageLevel == StorageLevel.NONE
         // Train our model
-        val mlLogisticRegressionModel = lr.train(df, handlePersistence)
+        val mlLogisticRegressionModel = lr.train(df)
         // convert the model
         val weights = Vectors.dense(mlLogisticRegressionModel.coefficients.toArray)
         createModel(weights, mlLogisticRegressionModel.intercept)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 51a6ae3c7e49b..d0b282db1ece8 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -593,8 +593,8 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
         .setMaxIter(5)
         .setFamily("multinomial")
       val model = mlor.fit(dataset)
-      Seq(4, 16, 64).foreach { blockSize =>
-        val model2 = mlor.setBlockSize(blockSize).fit(dataset)
+      Seq(0, 0.01, 0.1, 1, 2, 4).foreach { s =>
+        val model2 = mlor.setMaxBlockSizeInMB(s).fit(dataset)
         assert(model.interceptVector ~== model2.interceptVector relTol 1e-6)
         assert(model.coefficientMatrix ~== model2.coefficientMatrix relTol 1e-6)
       }
@@ -606,8 +606,8 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
         .setMaxIter(5)
         .setFamily("binomial")
       val model = blor.fit(dataset)
-      Seq(4, 16, 64).foreach { blockSize =>
-        val model2 = blor.setBlockSize(blockSize).fit(dataset)
+      Seq(0, 0.01, 0.1, 1, 2, 4).foreach { s =>
+        val model2 = blor.setMaxBlockSizeInMB(s).fit(dataset)
         assert(model.intercept ~== model2.intercept relTol 1e-6)
         assert(model.coefficients ~== model2.coefficients relTol 1e-6)
       }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
index 63ccfa3834624..e745e7f67df98 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
@@ -436,8 +436,8 @@ class AFTSurvivalRegressionSuite extends MLTest with DefaultReadWriteTest {
         .setQuantileProbabilities(quantileProbabilities)
         .setQuantilesCol("quantiles")
       val model = aft.fit(dataset)
-      Seq(4, 16, 64).foreach { blockSize =>
-        val model2 = aft.setBlockSize(blockSize).fit(dataset)
+      Seq(0, 0.01, 0.1, 1, 2, 4).foreach { s =>
+        val model2 = aft.setMaxBlockSizeInMB(s).fit(dataset)
         assert(model.coefficients ~== model2.coefficients relTol 1e-9)
         assert(model.intercept ~== model2.intercept relTol 1e-9)
         assert(model.scale ~== model2.scale relTol 1e-9)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
index fb70883bffc5f..b3098be0a36fb 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
@@ -672,8 +672,8 @@ class LinearRegressionSuite extends MLTest with DefaultReadWriteTest with PMMLRe
         .setLoss(loss)
         .setMaxIter(3)
       val model = lir.fit(dataset)
-      Seq(4, 16, 64).foreach { blockSize =>
-        val model2 = lir.setBlockSize(blockSize).fit(dataset)
+      Seq(0, 0.01, 0.1, 1, 2, 4).foreach { s =>
+        val model2 = lir.setMaxBlockSizeInMB(s).fit(dataset)
         assert(model.intercept ~== model2.intercept relTol 1e-9)
         assert(model.coefficients ~== model2.coefficients relTol 1e-9)
         assert(model.scale ~== model2.scale relTol 1e-9)
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 8f13f3275cb5b..50882fc895d6c 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -783,7 +783,7 @@ class LinearSVCTrainingSummary(LinearSVCSummary, _TrainingSummary):
 class _LogisticRegressionParams(_ProbabilisticClassifierParams, HasRegParam,
                                 HasElasticNetParam, HasMaxIter, HasFitIntercept, HasTol,
                                 HasStandardization, HasWeightCol, HasAggregationDepth,
-                                HasThreshold, HasBlockSize):
+                                HasThreshold, HasMaxBlockSizeInMB):
     """
     Params for :py:class:`LogisticRegression` and :py:class:`LogisticRegressionModel`.
 
@@ -836,7 +836,7 @@ class _LogisticRegressionParams(_ProbabilisticClassifierParams, HasRegParam,
     def __init__(self, *args):
         super(_LogisticRegressionParams, self).__init__(*args)
         self._setDefault(maxIter=100, regParam=0.0, tol=1E-6, threshold=0.5, family="auto",
-                         blockSize=1)
+                         maxBlockSizeInMB=0.0)
 
     @since("1.4.0")
     def setThreshold(self, value):
@@ -980,8 +980,8 @@ class LogisticRegression(_JavaProbabilisticClassifier, _LogisticRegressionParams
     LogisticRegressionModel...
     >>> blorModel.getProbabilityCol()
     'newProbability'
-    >>> blorModel.getBlockSize()
-    1
+    >>> blorModel.getMaxBlockSizeInMB()
+    0.0
     >>> blorModel.setThreshold(0.1)
     LogisticRegressionModel...
     >>> blorModel.getThreshold()
@@ -1047,7 +1047,7 @@ def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="p
                  aggregationDepth=2, family="auto",
                  lowerBoundsOnCoefficients=None, upperBoundsOnCoefficients=None,
                  lowerBoundsOnIntercepts=None, upperBoundsOnIntercepts=None,
-                 blockSize=1):
+                 maxBlockSizeInMB=0.0):
 
         """
         __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
@@ -1057,7 +1057,7 @@ def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="p
                  aggregationDepth=2, family="auto", \
                  lowerBoundsOnCoefficients=None, upperBoundsOnCoefficients=None, \
                  lowerBoundsOnIntercepts=None, upperBoundsOnIntercepts=None, \
-                 blockSize=1):
+                 maxBlockSizeInMB=0.0):
         If the threshold and thresholds Params are both set, they must be equivalent.
         """
         super(LogisticRegression, self).__init__()
@@ -1076,7 +1076,7 @@ def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="
                   aggregationDepth=2, family="auto",
                   lowerBoundsOnCoefficients=None, upperBoundsOnCoefficients=None,
                   lowerBoundsOnIntercepts=None, upperBoundsOnIntercepts=None,
-                  blockSize=1):
+                  maxBlockSizeInMB=0.0):
         """
         setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
@@ -1085,7 +1085,7 @@ def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="
                   aggregationDepth=2, family="auto", \
                   lowerBoundsOnCoefficients=None, upperBoundsOnCoefficients=None, \
                   lowerBoundsOnIntercepts=None, upperBoundsOnIntercepts=None, \
-                  blockSize=1):
+                  maxBlockSizeInMB=0.0):
         Sets params for logistic regression.
         If the threshold and thresholds Params are both set, they must be equivalent.
         """
@@ -1181,11 +1181,11 @@ def setAggregationDepth(self, value):
         return self._set(aggregationDepth=value)
 
     @since("3.1.0")
-    def setBlockSize(self, value):
+    def setMaxBlockSizeInMB(self, value):
         """
-        Sets the value of :py:attr:`blockSize`.
+        Sets the value of :py:attr:`maxBlockSizeInMB`.
         """
-        return self._set(blockSize=value)
+        return self._set(maxBlockSizeInMB=value)
 
 
 class LogisticRegressionModel(_JavaProbabilisticClassificationModel, _LogisticRegressionParams,
diff --git a/python/pyspark/ml/classification.pyi b/python/pyspark/ml/classification.pyi
index 9f72d24f63117..4bde851bb1e0d 100644
--- a/python/pyspark/ml/classification.pyi
+++ b/python/pyspark/ml/classification.pyi
@@ -257,7 +257,7 @@ class _LogisticRegressionParams(
     HasWeightCol,
     HasAggregationDepth,
     HasThreshold,
-    HasBlockSize,
+    HasMaxBlockSizeInMB,
 ):
     threshold: Param[float]
     family: Param[str]
@@ -305,7 +305,7 @@ class LogisticRegression(
         upperBoundsOnCoefficients: Optional[Matrix] = ...,
         lowerBoundsOnIntercepts: Optional[Vector] = ...,
         upperBoundsOnIntercepts: Optional[Vector] = ...,
-        blockSize: int = ...
+        maxBlockSizeInMB: float = ...
     ) -> None: ...
     def setParams(
         self,
@@ -330,7 +330,7 @@ class LogisticRegression(
         upperBoundsOnCoefficients: Optional[Matrix] = ...,
         lowerBoundsOnIntercepts: Optional[Vector] = ...,
         upperBoundsOnIntercepts: Optional[Vector] = ...,
-        blockSize: int = ...
+        maxBlockSizeInMB: float = ...
     ) -> LogisticRegression: ...
     def setFamily(self, value: str) -> LogisticRegression: ...
     def setLowerBoundsOnCoefficients(self, value: Matrix) -> LogisticRegression: ...
@@ -345,7 +345,7 @@ class LogisticRegression(
     def setStandardization(self, value: bool) -> LogisticRegression: ...
     def setWeightCol(self, value: str) -> LogisticRegression: ...
     def setAggregationDepth(self, value: int) -> LogisticRegression: ...
-    def setBlockSize(self, value: int) -> LogisticRegression: ...
+    def setMaxBlockSizeInMB(self, value: float) -> LogisticRegression: ...
 
 class LogisticRegressionModel(
     _JavaProbabilisticClassificationModel[Vector],
diff --git a/python/pyspark/ml/param/_shared_params_code_gen.py b/python/pyspark/ml/param/_shared_params_code_gen.py
index 53d26972c4b4a..bcab51f76bd49 100644
--- a/python/pyspark/ml/param/_shared_params_code_gen.py
+++ b/python/pyspark/ml/param/_shared_params_code_gen.py
@@ -168,8 +168,8 @@ def get$Name(self):
          "adjusted to the size of this data.", None, "TypeConverters.toInt"),
         ("maxBlockSizeInMB", "maximum memory in MB for stacking input data into blocks. Data is " +
          "stacked within partitions. If more than remaining data size in a partition then it " +
-         "is adjusted to the data size. If 0, try to infer an appropriate value. Must be >= 0.",
-         "0.0", "TypeConverters.toFloat")]
+         "is adjusted to the data size. Default 0.0 represents choosing optimal value, depends " +
+         "on specific algorithm. Must be >= 0.", "0.0", "TypeConverters.toFloat")]
 
     code = []
     for name, doc, defaultValueStr, typeConverter in shared:
diff --git a/python/pyspark/ml/param/shared.py b/python/pyspark/ml/param/shared.py
index cbef7386e2214..9311e4481e2b4 100644
--- a/python/pyspark/ml/param/shared.py
+++ b/python/pyspark/ml/param/shared.py
@@ -601,10 +601,10 @@ def getBlockSize(self):
 
 class HasMaxBlockSizeInMB(Params):
     """
-    Mixin for param maxBlockSizeInMB: maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value. Must be >= 0.
+    Mixin for param maxBlockSizeInMB: maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. Default 0.0 represents choosing optimal value, depends on specific algorithm. Must be >= 0.
     """
 
-    maxBlockSizeInMB = Param(Params._dummy(), "maxBlockSizeInMB", "maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. If 0, try to infer an appropriate value. Must be >= 0.", typeConverter=TypeConverters.toFloat)
+    maxBlockSizeInMB = Param(Params._dummy(), "maxBlockSizeInMB", "maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. Default 0.0 represents choosing optimal value, depends on specific algorithm. Must be >= 0.", typeConverter=TypeConverters.toFloat)
 
     def __init__(self):
         super(HasMaxBlockSizeInMB, self).__init__()
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index d1a5852fd65bd..5ce484d964a5a 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -24,7 +24,7 @@
 from pyspark.ml.base import _PredictorParams
 from pyspark.ml.param.shared import HasFeaturesCol, HasLabelCol, HasPredictionCol, HasWeightCol, \
     Param, Params, TypeConverters, HasMaxIter, HasTol, HasFitIntercept, HasAggregationDepth, \
-    HasBlockSize, HasRegParam, HasSolver, HasStepSize, HasSeed, HasElasticNetParam, \
+    HasMaxBlockSizeInMB, HasRegParam, HasSolver, HasStepSize, HasSeed, HasElasticNetParam, \
     HasStandardization, HasLoss, HasVarianceCol
 from pyspark.ml.tree import _DecisionTreeModel, _DecisionTreeParams, \
     _TreeEnsembleModel, _RandomForestParams, _GBTParams, _TreeRegressorParams
@@ -87,7 +87,7 @@ class _JavaRegressionModel(RegressionModel, JavaPredictionModel, metaclass=ABCMe
 
 class _LinearRegressionParams(_PredictorParams, HasRegParam, HasElasticNetParam, HasMaxIter,
                               HasTol, HasFitIntercept, HasStandardization, HasWeightCol, HasSolver,
-                              HasAggregationDepth, HasLoss, HasBlockSize):
+                              HasAggregationDepth, HasLoss, HasMaxBlockSizeInMB):
     """
     Params for :py:class:`LinearRegression` and :py:class:`LinearRegressionModel`.
 
@@ -107,7 +107,7 @@ class _LinearRegressionParams(_PredictorParams, HasRegParam, HasElasticNetParam,
     def __init__(self, *args):
         super(_LinearRegressionParams, self).__init__(*args)
         self._setDefault(maxIter=100, regParam=0.0, tol=1e-6, loss="squaredError", epsilon=1.35,
-                         blockSize=1)
+                         maxBlockSizeInMB=0.0)
 
     @since("2.3.0")
     def getEpsilon(self):
@@ -166,8 +166,8 @@ class LinearRegression(_JavaRegressor, _LinearRegressionParams, JavaMLWritable,
     LinearRegressionModel...
     >>> model.getMaxIter()
     5
-    >>> model.getBlockSize()
-    1
+    >>> model.getMaxBlockSizeInMB()
+    0.0
     >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
     >>> abs(model.predict(test0.head().features) - (-1.0)) < 0.001
     True
@@ -207,12 +207,12 @@ class LinearRegression(_JavaRegressor, _LinearRegressionParams, JavaMLWritable,
     def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                  maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
                  standardization=True, solver="auto", weightCol=None, aggregationDepth=2,
-                 loss="squaredError", epsilon=1.35, blockSize=1):
+                 loss="squaredError", epsilon=1.35, maxBlockSizeInMB=0.0):
         """
         __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
                  standardization=True, solver="auto", weightCol=None, aggregationDepth=2, \
-                 loss="squaredError", epsilon=1.35, blockSize=1)
+                 loss="squaredError", epsilon=1.35, maxBlockSizeInMB=0.0)
         """
         super(LinearRegression, self).__init__()
         self._java_obj = self._new_java_obj(
@@ -225,12 +225,12 @@ def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="p
     def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                   maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True,
                   standardization=True, solver="auto", weightCol=None, aggregationDepth=2,
-                  loss="squaredError", epsilon=1.35, blockSize=1):
+                  loss="squaredError", epsilon=1.35, maxBlockSizeInMB=0.0):
         """
         setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \
                   standardization=True, solver="auto", weightCol=None, aggregationDepth=2, \
-                  loss="squaredError", epsilon=1.35, blockSize=1)
+                  loss="squaredError", epsilon=1.35, maxBlockSizeInMB=0.0)
         Sets params for linear regression.
         """
         kwargs = self._input_kwargs
@@ -307,11 +307,11 @@ def setLoss(self, value):
         return self._set(lossType=value)
 
     @since("3.1.0")
-    def setBlockSize(self, value):
+    def setMaxBlockSizeInMB(self, value):
         """
-        Sets the value of :py:attr:`blockSize`.
+        Sets the value of :py:attr:`maxBlockSizeInMB`.
         """
-        return self._set(blockSize=value)
+        return self._set(maxBlockSizeInMB=value)
 
 
 class LinearRegressionModel(_JavaRegressionModel, _LinearRegressionParams, GeneralJavaMLWritable,
@@ -1683,7 +1683,7 @@ def evaluateEachIteration(self, dataset, loss):
 
 
 class _AFTSurvivalRegressionParams(_PredictorParams, HasMaxIter, HasTol, HasFitIntercept,
-                                   HasAggregationDepth, HasBlockSize):
+                                   HasAggregationDepth, HasMaxBlockSizeInMB):
     """
     Params for :py:class:`AFTSurvivalRegression` and :py:class:`AFTSurvivalRegressionModel`.
 
@@ -1710,7 +1710,7 @@ def __init__(self, *args):
         super(_AFTSurvivalRegressionParams, self).__init__(*args)
         self._setDefault(censorCol="censor",
                          quantileProbabilities=[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99],
-                         maxIter=100, tol=1E-6, blockSize=1)
+                         maxIter=100, tol=1E-6, maxBlockSizeInMB=0.0)
 
     @since("1.6.0")
     def getCensorCol(self):
@@ -1762,8 +1762,8 @@ class AFTSurvivalRegression(_JavaRegressor, _AFTSurvivalRegressionParams,
     10
     >>> aftsr.clear(aftsr.maxIter)
     >>> model = aftsr.fit(df)
-    >>> model.getBlockSize()
-    1
+    >>> model.getMaxBlockSizeInMB()
+    0.0
     >>> model.setFeaturesCol("features")
     AFTSurvivalRegressionModel...
     >>> model.predict(Vectors.dense(6.3))
@@ -1802,12 +1802,12 @@ class AFTSurvivalRegression(_JavaRegressor, _AFTSurvivalRegressionParams,
     def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                  fitIntercept=True, maxIter=100, tol=1E-6, censorCol="censor",
                  quantileProbabilities=list([0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]),
-                 quantilesCol=None, aggregationDepth=2, blockSize=1):
+                 quantilesCol=None, aggregationDepth=2, maxBlockSizeInMB=0.0):
         """
         __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                  fitIntercept=True, maxIter=100, tol=1E-6, censorCol="censor", \
                  quantileProbabilities=[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99], \
-                 quantilesCol=None, aggregationDepth=2, blockSize=1)
+                 quantilesCol=None, aggregationDepth=2, maxBlockSizeInMB=0.0)
         """
         super(AFTSurvivalRegression, self).__init__()
         self._java_obj = self._new_java_obj(
@@ -1820,12 +1820,12 @@ def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="p
     def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                   fitIntercept=True, maxIter=100, tol=1E-6, censorCol="censor",
                   quantileProbabilities=list([0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]),
-                  quantilesCol=None, aggregationDepth=2, blockSize=1):
+                  quantilesCol=None, aggregationDepth=2, maxBlockSizeInMB=0.0):
         """
         setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
                   fitIntercept=True, maxIter=100, tol=1E-6, censorCol="censor", \
                   quantileProbabilities=[0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99], \
-                  quantilesCol=None, aggregationDepth=2, blockSize=1):
+                  quantilesCol=None, aggregationDepth=2, maxBlockSizeInMB=0.0):
         """
         kwargs = self._input_kwargs
         return self._set(**kwargs)
@@ -1883,11 +1883,11 @@ def setAggregationDepth(self, value):
         return self._set(aggregationDepth=value)
 
     @since("3.1.0")
-    def setBlockSize(self, value):
+    def setMaxBlockSizeInMB(self, value):
         """
-        Sets the value of :py:attr:`blockSize`.
+        Sets the value of :py:attr:`maxBlockSizeInMB`.
         """
-        return self._set(blockSize=value)
+        return self._set(maxBlockSizeInMB=value)
 
 
 class AFTSurvivalRegressionModel(_JavaRegressionModel, _AFTSurvivalRegressionParams,
diff --git a/python/pyspark/ml/regression.pyi b/python/pyspark/ml/regression.pyi
index 991eb4f12ac85..5cb0e7a5092f7 100644
--- a/python/pyspark/ml/regression.pyi
+++ b/python/pyspark/ml/regression.pyi
@@ -24,7 +24,7 @@ from pyspark.ml import PredictionModel, Predictor
 from pyspark.ml.base import _PredictorParams
 from pyspark.ml.param.shared import (
     HasAggregationDepth,
-    HasBlockSize,
+    HasMaxBlockSizeInMB,
     HasElasticNetParam,
     HasFeaturesCol,
     HasFitIntercept,
@@ -86,7 +86,7 @@ class _LinearRegressionParams(
     HasSolver,
     HasAggregationDepth,
     HasLoss,
-    HasBlockSize,
+    HasMaxBlockSizeInMB,
 ):
     solver: Param[str]
     loss: Param[str]
@@ -116,7 +116,7 @@ class LinearRegression(
         weightCol: Optional[str] = ...,
         aggregationDepth: int = ...,
         epsilon: float = ...,
-        blockSize: int = ...
+        maxBlockSizeInMB: float = ...
     ) -> None: ...
     def setParams(
         self,
@@ -134,7 +134,7 @@ class LinearRegression(
         weightCol: Optional[str] = ...,
         aggregationDepth: int = ...,
         epsilon: float = ...,
-        blockSize: int = ...
+        maxBlockSizeInMB: float = ...
     ) -> LinearRegression: ...
     def setEpsilon(self, value: float) -> LinearRegression: ...
     def setMaxIter(self, value: int) -> LinearRegression: ...
@@ -147,7 +147,7 @@ class LinearRegression(
     def setSolver(self, value: str) -> LinearRegression: ...
     def setAggregationDepth(self, value: int) -> LinearRegression: ...
     def setLoss(self, value: str) -> LinearRegression: ...
-    def setBlockSize(self, value: int) -> LinearRegression: ...
+    def setMaxBlockSizeInMB(self, value: float) -> LinearRegression: ...
 
 class LinearRegressionModel(
     _JavaRegressionModel[Vector],
@@ -522,7 +522,7 @@ class _AFTSurvivalRegressionParams(
     HasTol,
     HasFitIntercept,
     HasAggregationDepth,
-    HasBlockSize,
+    HasMaxBlockSizeInMB,
 ):
     censorCol: Param[str]
     quantileProbabilities: Param[List[float]]
@@ -551,7 +551,7 @@ class AFTSurvivalRegression(
         quantileProbabilities: List[float] = ...,
         quantilesCol: Optional[str] = ...,
         aggregationDepth: int = ...,
-        blockSize: int = ...
+        maxBlockSizeInMB: float = ...
     ) -> None: ...
     def setParams(
         self,
@@ -566,7 +566,7 @@ class AFTSurvivalRegression(
         quantileProbabilities: List[float] = ...,
         quantilesCol: Optional[str] = ...,
         aggregationDepth: int = ...,
-        blockSize: int = ...
+        maxBlockSizeInMB: float = ...
     ) -> AFTSurvivalRegression: ...
     def setCensorCol(self, value: str) -> AFTSurvivalRegression: ...
     def setQuantileProbabilities(self, value: List[float]) -> AFTSurvivalRegression: ...
@@ -575,7 +575,7 @@ class AFTSurvivalRegression(
     def setTol(self, value: float) -> AFTSurvivalRegression: ...
     def setFitIntercept(self, value: bool) -> AFTSurvivalRegression: ...
     def setAggregationDepth(self, value: int) -> AFTSurvivalRegression: ...
-    def setBlockSize(self, value: int) -> AFTSurvivalRegression: ...
+    def setMaxBlockSizeInMB(self, value: float) -> AFTSurvivalRegression: ...
 
 class AFTSurvivalRegressionModel(
     _JavaRegressionModel[Vector],

From 594c7c613a8ef80ab6b3725832579f12d40b02c8 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Wed, 18 Nov 2020 08:04:14 -0800
Subject: [PATCH 0504/1009] [SPARK-33476][CORE] Generalize ExecutorSource to
 expose user-given file system schemes

### What changes were proposed in this pull request?

This PR aims to generalize executor metrics to support user-given file system schemes instead of the fixed `file,hdfs` scheme.

### Why are the changes needed?

For the users using only cloud storages like `S3A`, we need to be able to expose `S3A` metrics. Also, we can skip unused `hdfs` metrics.

### Does this PR introduce _any_ user-facing change?

Yes, but compatible for the existing users which uses `hdfs` and `file` filesystem scheme only.

### How was this patch tested?

Manually do the following.

```
$ build/sbt -Phadoop-cloud package
$ sbin/start-master.sh; sbin/start-slave.sh spark://$(hostname):7077
$ bin/spark-shell --master spark://$(hostname):7077 -c spark.executor.metrics.fileSystemSchemes=file,s3a -c spark.metrics.conf.executor.sink.jmx.class=org.apache.spark.metrics.sink.JmxSink
scala> spark.read.textFile("s3a://dongjoon/README.md").collect()
```

Separately, launch `jconsole` and check `*.executor.filesystem.s3a.*`. Also, confirm that there is no `*.executor.filesystem.hdfs.*`

```
$ jconsole
```
![Screen Shot 2020-11-17 at 9 26 03 PM](https://user-images.githubusercontent.com/9700541/99487609-94121180-291b-11eb-9ed2-964546146981.png)

Closes #30405 from dongjoon-hyun/SPARK-33476.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../main/scala/org/apache/spark/executor/Executor.scala    | 6 ++++--
 .../scala/org/apache/spark/executor/ExecutorSource.scala   | 7 +++++--
 .../scala/org/apache/spark/internal/config/package.scala   | 7 +++++++
 docs/monitoring.md                                         | 2 ++
 4 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index 1a0ad566633da..f7246448959e9 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -22,7 +22,7 @@ import java.lang.Thread.UncaughtExceptionHandler
 import java.lang.management.ManagementFactory
 import java.net.{URI, URL}
 import java.nio.ByteBuffer
-import java.util.Properties
+import java.util.{Locale, Properties}
 import java.util.concurrent._
 import java.util.concurrent.atomic.AtomicBoolean
 import javax.annotation.concurrent.GuardedBy
@@ -110,7 +110,9 @@ private[spark] class Executor(
       .build()
     Executors.newCachedThreadPool(threadFactory).asInstanceOf[ThreadPoolExecutor]
   }
-  private val executorSource = new ExecutorSource(threadPool, executorId)
+  private val schemes = conf.get(EXECUTOR_METRICS_FILESYSTEM_SCHEMES)
+    .toLowerCase(Locale.ROOT).split(",").map(_.trim).filter(_.nonEmpty)
+  private val executorSource = new ExecutorSource(threadPool, executorId, schemes)
   // Pool used for threads that supervise task killing / cancellation
   private val taskReaperPool = ThreadUtils.newDaemonCachedThreadPool("Task reaper")
   // For tasks which are in the process of being killed, this map holds the most recently created
diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
index 50207aeb3ef6b..d2765d061d662 100644
--- a/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
@@ -27,7 +27,10 @@ import org.apache.hadoop.fs.FileSystem
 import org.apache.spark.metrics.source.Source
 
 private[spark]
-class ExecutorSource(threadPool: ThreadPoolExecutor, executorId: String) extends Source {
+class ExecutorSource(
+    threadPool: ThreadPoolExecutor,
+    executorId: String,
+    fileSystemSchemes: Array[String]) extends Source {
 
   private def fileStats(scheme: String) : Option[FileSystem.Statistics] =
     FileSystem.getAllStatistics.asScala.find(s => s.getScheme.equals(scheme))
@@ -70,7 +73,7 @@ class ExecutorSource(threadPool: ThreadPoolExecutor, executorId: String) extends
   })
 
   // Gauge for file system stats of this executor
-  for (scheme <- Array("hdfs", "file")) {
+  for (scheme <- fileSystemSchemes) {
     registerFileSystemStat(scheme, "read_bytes", _.getBytesRead(), 0L)
     registerFileSystemStat(scheme, "write_bytes", _.getBytesWritten(), 0L)
     registerFileSystemStat(scheme, "read_ops", _.getReadOps(), 0)
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 2bb1290963f87..4bc49514fc5ad 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -271,6 +271,13 @@ package object config {
       .timeConf(TimeUnit.MILLISECONDS)
       .createWithDefaultString("0")
 
+  private[spark] val EXECUTOR_METRICS_FILESYSTEM_SCHEMES =
+    ConfigBuilder("spark.executor.metrics.fileSystemSchemes")
+      .doc("The file system schemes to report in executor metrics.")
+      .version("3.1.0")
+      .stringConf
+      .createWithDefaultString("file,hdfs")
+
   private[spark] val EXECUTOR_JAVA_OPTIONS =
     ConfigBuilder(SparkLauncher.EXECUTOR_EXTRA_JAVA_OPTIONS)
       .withPrepended(SparkLauncher.EXECUTOR_DEFAULT_JAVA_OPTIONS)
diff --git a/docs/monitoring.md b/docs/monitoring.md
index a07a113445981..15a6cbd910210 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -1175,6 +1175,8 @@ This is the component with the largest amount of instrumented metrics
 These metrics are exposed by Spark executors. 
  
 - namespace=executor (metrics are of type counter or gauge)
+  - **notes:**
+    - `spark.executor.metrics.fileSystemSchemes` (default: `file,hdfs`) determines the exposed file system metrics.
   - bytesRead.count
   - bytesWritten.count
   - cpuTime.count

From dcac78e12b146189090f1f7725d63393dd154a26 Mon Sep 17 00:00:00 2001
From: Stavros Kontopoulos <skontopo@redhat.com>
Date: Wed, 18 Nov 2020 10:42:46 -0800
Subject: [PATCH 0505/1009] [SPARK-27936][K8S] Support python deps

Supports python client deps from the launcher fs.
This is a feature that was added for java deps. This PR adds support fo rpythona s well.

yes

Manually running different scenarios and via examining the driver & executors logs. Also there is an integration test added.
I verified that the python resources are added to the spark file server and they are named properly so they dont fail the executors. Note here that as previously the following will not work:
primary resource `A.py`: uses a closure defined in submited pyfile `B.py`, context.py only adds to the pythonpath files with certain extension eg. zip, egg, jar.

Closes #25870 from skonto/python-deps.

Lead-authored-by: Stavros Kontopoulos <skontopo@redhat.com>
Co-authored-by: Stavros Kontopoulos <st.kontopoulos@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../scala/org/apache/spark/SparkContext.scala |   4 +-
 .../org/apache/spark/deploy/SparkSubmit.scala |   1 +
 .../spark/deploy/k8s/KubernetesUtils.scala    |  13 +-
 .../k8s/features/BasicDriverFeatureStep.scala |   2 +-
 .../features/DriverCommandFeatureStep.scala   |  13 +-
 .../k8s/integrationtest/DepsTestsSuite.scala  | 114 +++++++++++++-----
 .../deploy/k8s/integrationtest/Utils.scala    |  31 +++--
 7 files changed, 128 insertions(+), 50 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index d68015454de9d..0440a9de6ab31 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1542,8 +1542,8 @@ class SparkContext(config: SparkConf) extends Logging {
     val schemeCorrectedURI = uri.getScheme match {
       case null => new File(path).getCanonicalFile.toURI
       case "local" =>
-        logWarning("File with 'local' scheme is not supported to add to file server, since " +
-          "it is already available on every node.")
+        logWarning(s"File with 'local' scheme $path is not supported to add to file server, " +
+          s"since it is already available on every node.")
         return
       case _ => uri
     }
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 9a316e8c5b5a9..4b17661496808 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -391,6 +391,7 @@ private[spark] class SparkSubmit extends Logging {
           downloadFileList(_, targetDir, sparkConf, hadoopConf, secMgr)
         }.orNull
         args.files = renameResourcesToLocalFS(args.files, localFiles)
+        args.pyFiles = renameResourcesToLocalFS(args.pyFiles, localPyFiles)
       }
     }
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
index e8bf8f9c9b505..7e5edd905781a 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
@@ -261,12 +261,19 @@ private[spark] object KubernetesUtils extends Logging {
       isLocalDependency(Utils.resolveURI(resource))
   }
 
-  def renameMainAppResource(resource: String, conf: SparkConf): String = {
+  def renameMainAppResource(
+      resource: String,
+      conf: Option[SparkConf] = None,
+      shouldUploadLocal: Boolean): String = {
     if (isLocalAndResolvable(resource)) {
-      SparkLauncher.NO_RESOURCE
+      if (shouldUploadLocal) {
+        uploadFileUri(resource, conf)
+      } else {
+        SparkLauncher.NO_RESOURCE
+      }
     } else {
       resource
-   }
+    }
   }
 
   def uploadFileUri(uri: String, conf: Option[SparkConf] = None): String = {
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
index 6503bc823ec0d..f5ba261c8f405 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
@@ -159,7 +159,7 @@ private[spark] class BasicDriverFeatureStep(conf: KubernetesDriverConf)
       KUBERNETES_DRIVER_SUBMIT_CHECK.key -> "true",
       MEMORY_OVERHEAD_FACTOR.key -> overheadFactor.toString)
     // try upload local, resolvable files to a hadoop compatible file system
-    Seq(JARS, FILES).foreach { key =>
+    Seq(JARS, FILES, SUBMIT_PYTHON_FILES).foreach { key =>
       val value = conf.get(key).filter(uri => KubernetesUtils.isLocalAndResolvable(uri))
       val resolved = KubernetesUtils.uploadAndTransformFileUris(value, Some(conf.sparkConf))
       if (resolved.nonEmpty) {
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala
index ebe44855f1d0d..d49381ba897d4 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala
@@ -62,7 +62,11 @@ private[spark] class DriverCommandFeatureStep(conf: KubernetesDriverConf)
   }
 
   private def configureForJava(pod: SparkPod, res: String): SparkPod = {
-    val driverContainer = baseDriverContainer(pod, res).build()
+    // re-write primary resource, app jar is also added to spark.jars by default in SparkSubmit
+    // no uploading takes place here
+    val newResName = KubernetesUtils
+      .renameMainAppResource(resource = res, shouldUploadLocal = false)
+    val driverContainer = baseDriverContainer(pod, newResName).build()
     SparkPod(pod.pod, driverContainer)
   }
 
@@ -73,7 +77,10 @@ private[spark] class DriverCommandFeatureStep(conf: KubernetesDriverConf)
           .withValue(conf.get(PYSPARK_MAJOR_PYTHON_VERSION))
         .build())
 
-    val pythonContainer = baseDriverContainer(pod, res)
+    // re-write primary resource to be the remote one and upload the related file
+    val newResName = KubernetesUtils
+      .renameMainAppResource(res, Option(conf.sparkConf), true)
+    val pythonContainer = baseDriverContainer(pod, newResName)
       .addAllToEnv(pythonEnvs.asJava)
       .build()
 
@@ -88,7 +95,7 @@ private[spark] class DriverCommandFeatureStep(conf: KubernetesDriverConf)
   private def baseDriverContainer(pod: SparkPod, resource: String): ContainerBuilder = {
     // re-write primary resource, app jar is also added to spark.jars by default in SparkSubmit
     val resolvedResource = if (conf.mainAppResource.isInstanceOf[JavaMainAppResource]) {
-      KubernetesUtils.renameMainAppResource(resource, conf.sparkConf)
+      KubernetesUtils.renameMainAppResource(resource, Option(conf.sparkConf), false)
     } else {
       resource
     }
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
index e712b95cdbcea..8f6e9cd8af740 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
@@ -30,6 +30,7 @@ import org.scalatest.time.{Minutes, Span}
 import org.apache.spark.SparkException
 import org.apache.spark.deploy.k8s.integrationtest.DepsTestsSuite.{DEPS_TIMEOUT, FILE_CONTENTS, HOST_PATH}
 import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.{INTERVAL, MinikubeTag, TIMEOUT}
+import org.apache.spark.deploy.k8s.integrationtest.Utils.getExamplesJarName
 import org.apache.spark.deploy.k8s.integrationtest.backend.minikube.Minikube
 
 private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite =>
@@ -120,16 +121,18 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite =>
       .endSpec()
       .build()
 
-    kubernetesTestComponents
+    // try until the service from a previous test is deleted
+    Eventually.eventually(TIMEOUT, INTERVAL) (kubernetesTestComponents
       .kubernetesClient
       .services()
-      .create(minioService)
+      .create(minioService))
 
-    kubernetesTestComponents
+    // try until the stateful set of a previous test is deleted
+    Eventually.eventually(TIMEOUT, INTERVAL) (kubernetesTestComponents
       .kubernetesClient
       .apps()
       .statefulSets()
-      .create(minioStatefulSet)
+      .create(minioStatefulSet))
   }
 
  private def deleteMinioStorage(): Unit = {
@@ -138,47 +141,52 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite =>
       .apps()
       .statefulSets()
       .withName(cName)
+      .withGracePeriod(0)
       .delete()
 
     kubernetesTestComponents
       .kubernetesClient
       .services()
       .withName(svcName)
+      .withGracePeriod(0)
       .delete()
   }
 
   test("Launcher client dependencies", k8sTestTag, MinikubeTag) {
-    val packages = if (Utils.isHadoop3) {
-      "org.apache.hadoop:hadoop-aws:3.2.0"
-    } else {
-      "com.amazonaws:aws-java-sdk:1.7.4,org.apache.hadoop:hadoop-aws:2.7.6"
-    }
-    val fileName = Utils.createTempFile(FILE_CONTENTS, HOST_PATH)
-    try {
-      setupMinioStorage()
-      val minioUrlStr = getServiceUrl(svcName)
-      val minioUrl = new URL(minioUrlStr)
-      val minioHost = minioUrl.getHost
-      val minioPort = minioUrl.getPort
-      val examplesJar = Utils.getExamplesJarAbsolutePath(sparkHomeDir)
-      sparkAppConf
-        .set("spark.hadoop.fs.s3a.access.key", ACCESS_KEY)
-        .set("spark.hadoop.fs.s3a.secret.key", SECRET_KEY)
-        .set("spark.hadoop.fs.s3a.connection.ssl.enabled", "false")
-        .set("spark.hadoop.fs.s3a.endpoint", s"$minioHost:$minioPort")
-        .set("spark.kubernetes.file.upload.path", s"s3a://$BUCKET")
-        .set("spark.files", s"$HOST_PATH/$fileName")
-        .set("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
-        .set("spark.jars.packages", packages)
-        .set("spark.driver.extraJavaOptions", "-Divy.cache.dir=/tmp -Divy.home=/tmp")
-      createS3Bucket(ACCESS_KEY, SECRET_KEY, minioUrlStr)
+    tryDepsTest({
+      val fileName = Utils.createTempFile(FILE_CONTENTS, HOST_PATH)
+      sparkAppConf.set("spark.files", s"$HOST_PATH/$fileName")
+      val examplesJar = Utils.getTestFileAbsolutePath(getExamplesJarName(), sparkHomeDir)
       runSparkRemoteCheckAndVerifyCompletion(appResource = examplesJar,
         appArgs = Array(fileName),
         timeout = Option(DEPS_TIMEOUT))
-    } finally {
-      // make sure this always runs
-      deleteMinioStorage()
-    }
+    })
+  }
+
+  test("Launcher python client dependencies using a zip file", k8sTestTag, MinikubeTag) {
+    val inDepsFile = Utils.getTestFileAbsolutePath("py_container_checks.py", sparkHomeDir)
+    val outDepsFile = s"${inDepsFile.substring(0, inDepsFile.lastIndexOf("."))}.zip"
+    Utils.createZipFile(inDepsFile, outDepsFile)
+    testPythonDeps(outDepsFile)
+  }
+
+  private def testPythonDeps(depsFile: String): Unit = {
+    tryDepsTest({
+      val pySparkFiles = Utils.getTestFileAbsolutePath("pyfiles.py", sparkHomeDir)
+      setPythonSparkConfProperties(sparkAppConf)
+      runSparkApplicationAndVerifyCompletion(
+        appResource = pySparkFiles,
+        mainClass = "",
+        expectedLogOnCompletion = Seq(
+          "Python runtime version check is: True",
+          "Python environment version check is: True",
+          "Python runtime version check for executor is: True"),
+        appArgs = Array("python3"),
+        driverPodChecker = doBasicDriverPyPodCheck,
+        executorPodChecker = doBasicExecutorPyPodCheck,
+        appLocator = appLocator,
+        isJVM = false,
+        pyFiles = Option(depsFile)) })
   }
 
   private def extractS3Key(data: String, key: String): String = {
@@ -222,6 +230,48 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite =>
       url
     }
   }
+
+  private def getServiceHostAndPort(minioUrlStr : String) : (String, Int) = {
+    val minioUrl = new URL(minioUrlStr)
+    (minioUrl.getHost, minioUrl.getPort)
+  }
+
+  private def setCommonSparkConfPropertiesForS3Access(
+      conf: SparkAppConf,
+      minioUrlStr: String): Unit = {
+    val (minioHost, minioPort) = getServiceHostAndPort(minioUrlStr)
+    val packages = if (Utils.isHadoop3) {
+      "org.apache.hadoop:hadoop-aws:3.2.0"
+    } else {
+      "com.amazonaws:aws-java-sdk:1.7.4,org.apache.hadoop:hadoop-aws:2.7.6"
+    }
+    conf.set("spark.hadoop.fs.s3a.access.key", ACCESS_KEY)
+      .set("spark.hadoop.fs.s3a.secret.key", SECRET_KEY)
+      .set("spark.hadoop.fs.s3a.connection.ssl.enabled", "false")
+      .set("spark.hadoop.fs.s3a.endpoint", s"$minioHost:$minioPort")
+      .set("spark.kubernetes.file.upload.path", s"s3a://$BUCKET")
+      .set("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
+      .set("spark.jars.packages", packages)
+      .set("spark.driver.extraJavaOptions", "-Divy.cache.dir=/tmp -Divy.home=/tmp")
+  }
+
+  private def setPythonSparkConfProperties(conf: SparkAppConf): Unit = {
+    sparkAppConf.set("spark.kubernetes.container.image", pyImage)
+      .set("spark.kubernetes.pyspark.pythonVersion", "3")
+  }
+
+  private def tryDepsTest(runTest: => Unit): Unit = {
+    try {
+      setupMinioStorage()
+      val minioUrlStr = getServiceUrl(svcName)
+      createS3Bucket(ACCESS_KEY, SECRET_KEY, minioUrlStr)
+      setCommonSparkConfPropertiesForS3Access(sparkAppConf, minioUrlStr)
+      runTest
+    } finally {
+      // make sure this always runs
+      deleteMinioStorage()
+    }
+  }
 }
 
 private[spark] object DepsTestsSuite {
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
index 9bcd6e9503532..e50115d6f493f 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
@@ -16,15 +16,17 @@
  */
 package org.apache.spark.deploy.k8s.integrationtest
 
-import java.io.{Closeable, File, PrintWriter}
+import java.io.{Closeable, File, FileInputStream, FileOutputStream, PrintWriter}
 import java.nio.file.{Files, Path}
 import java.util.concurrent.CountDownLatch
+import java.util.zip.{ZipEntry, ZipOutputStream}
 
 import scala.collection.JavaConverters._
 import scala.util.Try
 
 import io.fabric8.kubernetes.client.dsl.ExecListener
 import okhttp3.Response
+import org.apache.commons.compress.utils.IOUtils
 import org.apache.commons.io.output.ByteArrayOutputStream
 import org.apache.hadoop.util.VersionInfo
 
@@ -114,23 +116,22 @@ object Utils extends Logging {
     filename
   }
 
-  def getExamplesJarAbsolutePath(sparkHomeDir: Path): String = {
-    val jarName = getExamplesJarName()
-    val jarPathsFound = Files
+  def getTestFileAbsolutePath(fileName: String, sparkHomeDir: Path): String = {
+    val filePathsFound = Files
       .walk(sparkHomeDir)
       .filter(Files.isRegularFile(_))
-      .filter((f: Path) => {f.toFile.getName == jarName})
+      .filter((f: Path) => {f.toFile.getName == fileName})
     // we should not have more than one here under current test build dir
     // we only need one though
-    val jarPath = jarPathsFound
+    val filePath = filePathsFound
       .iterator()
       .asScala
       .map(_.toAbsolutePath.toString)
       .toArray
       .headOption
-    jarPath match {
-      case Some(jar) => jar
-      case _ => throw new SparkException(s"No valid $jarName file was found " +
+    filePath match {
+      case Some(file) => file
+      case _ => throw new SparkException(s"No valid $fileName file was found " +
         s"under spark home test dir ${sparkHomeDir.toAbsolutePath}!")
     }
   }
@@ -138,4 +139,16 @@ object Utils extends Logging {
   def isHadoop3(): Boolean = {
     VersionInfo.getVersion.startsWith("3")
   }
+
+  def createZipFile(inFile: String, outFile: String): Unit = {
+    val fileToZip = new File(inFile)
+    val fis = new FileInputStream(fileToZip)
+    val fos = new FileOutputStream(outFile)
+    val zipOut = new ZipOutputStream(fos)
+    val zipEntry = new ZipEntry(fileToZip.getName)
+    zipOut.putNextEntry(zipEntry)
+    IOUtils.copy(fis, zipOut)
+    IOUtils.closeQuietly(fis)
+    IOUtils.closeQuietly(zipOut)
+  }
 }

From 27cd945c151dccb5ac863e6bc2c4f5b2c6a6d996 Mon Sep 17 00:00:00 2001
From: Chao Sun <sunchao@apple.com>
Date: Wed, 18 Nov 2020 12:39:00 -0800
Subject: [PATCH 0506/1009] [SPARK-32381][CORE][SQL][FOLLOWUP] More cleanup on
 HadoopFSUtils

### What changes were proposed in this pull request?

This PR is a follow-up of #29471 and does the following improvements for `HadoopFSUtils`:
1. Removes the extra `filterFun` from the listing API and combines it with the `filter`.
2. Removes `SerializableBlockLocation` and `SerializableFileStatus` given that `BlockLocation` and `FileStatus` are already serializable.
3. Hides the `isRootLevel` flag from the top-level API.

### Why are the changes needed?

Main purpose is to simplify the logic within `HadoopFSUtils` as well as cleanup the API.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing unit tests (e.g., `FileIndexSuite`)

Closes #29959 from sunchao/hadoop-fs-utils-followup.

Authored-by: Chao Sun <sunchao@apple.com>
Signed-off-by: Holden Karau <hkarau@apple.com>
---
 .../org/apache/spark/util/HadoopFSUtils.scala | 104 ++++--------------
 .../sql/execution/command/CommandUtils.scala  |   2 +-
 .../datasources/InMemoryFileIndex.scala       |  19 ++--
 3 files changed, 31 insertions(+), 94 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala b/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala
index c0a135e04bac5..a3a528cddee37 100644
--- a/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala
@@ -27,7 +27,6 @@ import org.apache.hadoop.fs.viewfs.ViewFileSystem
 import org.apache.hadoop.hdfs.DistributedFileSystem
 
 import org.apache.spark._
-import org.apache.spark.annotation.Private
 import org.apache.spark.internal.Logging
 import org.apache.spark.metrics.source.HiveCatalogMetrics
 
@@ -45,8 +44,6 @@ private[spark] object HadoopFSUtils extends Logging {
    * @param paths Input paths to list
    * @param hadoopConf Hadoop configuration
    * @param filter Path filter used to exclude leaf files from result
-   * @param isRootLevel Whether the input paths are at the root level, i.e., they are the root
-   *                    paths as opposed to nested paths encountered during recursive calls of this.
    * @param ignoreMissingFiles Ignore missing files that happen during recursive listing
    *                           (e.g., due to race conditions)
    * @param ignoreLocality Whether to fetch data locality info when listing leaf files. If false,
@@ -57,11 +54,22 @@ private[spark] object HadoopFSUtils extends Logging {
    * @param parallelismMax The maximum parallelism for listing. If the number of input paths is
    *                       larger than this value, parallelism will be throttled to this value
    *                       to avoid generating too many tasks.
-   * @param filterFun Optional predicate on the leaf files. Files who failed the check will be
-   *                  excluded from the results
    * @return for each input path, the set of discovered files for the path
    */
   def parallelListLeafFiles(
+    sc: SparkContext,
+    paths: Seq[Path],
+    hadoopConf: Configuration,
+    filter: PathFilter,
+    ignoreMissingFiles: Boolean,
+    ignoreLocality: Boolean,
+    parallelismThreshold: Int,
+    parallelismMax: Int): Seq[(Path, Seq[FileStatus])] = {
+    parallelListLeafFilesInternal(sc, paths, hadoopConf, filter, isRootLevel = true,
+      ignoreMissingFiles, ignoreLocality, parallelismThreshold, parallelismMax)
+  }
+
+  private def parallelListLeafFilesInternal(
       sc: SparkContext,
       paths: Seq[Path],
       hadoopConf: Configuration,
@@ -70,8 +78,7 @@ private[spark] object HadoopFSUtils extends Logging {
       ignoreMissingFiles: Boolean,
       ignoreLocality: Boolean,
       parallelismThreshold: Int,
-      parallelismMax: Int,
-      filterFun: Option[String => Boolean] = None): Seq[(Path, Seq[FileStatus])] = {
+      parallelismMax: Int): Seq[(Path, Seq[FileStatus])] = {
 
     // Short-circuits parallel listing when serial listing is likely to be faster.
     if (paths.size <= parallelismThreshold) {
@@ -85,8 +92,7 @@ private[spark] object HadoopFSUtils extends Logging {
           ignoreLocality = ignoreLocality,
           isRootPath = isRootLevel,
           parallelismThreshold = parallelismThreshold,
-          parallelismMax = parallelismMax,
-          filterFun = filterFun)
+          parallelismMax = parallelismMax)
         (path, leafFiles)
       }
     }
@@ -126,58 +132,16 @@ private[spark] object HadoopFSUtils extends Logging {
               ignoreMissingFiles = ignoreMissingFiles,
               ignoreLocality = ignoreLocality,
               isRootPath = isRootLevel,
-              filterFun = filterFun,
               parallelismThreshold = Int.MaxValue,
               parallelismMax = 0)
             (path, leafFiles)
           }.iterator
-        }.map { case (path, statuses) =>
-            val serializableStatuses = statuses.map { status =>
-              // Turn FileStatus into SerializableFileStatus so we can send it back to the driver
-              val blockLocations = status match {
-                case f: LocatedFileStatus =>
-                  f.getBlockLocations.map { loc =>
-                    SerializableBlockLocation(
-                      loc.getNames,
-                      loc.getHosts,
-                      loc.getOffset,
-                      loc.getLength)
-                  }
-
-                case _ =>
-                  Array.empty[SerializableBlockLocation]
-              }
-
-              SerializableFileStatus(
-                status.getPath.toString,
-                status.getLen,
-                status.isDirectory,
-                status.getReplication,
-                status.getBlockSize,
-                status.getModificationTime,
-                status.getAccessTime,
-                blockLocations)
-            }
-            (path.toString, serializableStatuses)
         }.collect()
     } finally {
       sc.setJobDescription(previousJobDescription)
     }
 
-    // turn SerializableFileStatus back to Status
-    statusMap.map { case (path, serializableStatuses) =>
-      val statuses = serializableStatuses.map { f =>
-        val blockLocations = f.blockLocations.map { loc =>
-          new BlockLocation(loc.names, loc.hosts, loc.offset, loc.length)
-        }
-        new LocatedFileStatus(
-          new FileStatus(
-            f.length, f.isDir, f.blockReplication, f.blockSize, f.modificationTime,
-            new Path(f.path)),
-          blockLocations)
-      }
-      (new Path(path), statuses)
-    }
+    statusMap.toSeq
   }
 
   // scalastyle:off argcount
@@ -197,7 +161,6 @@ private[spark] object HadoopFSUtils extends Logging {
       ignoreMissingFiles: Boolean,
       ignoreLocality: Boolean,
       isRootPath: Boolean,
-      filterFun: Option[String => Boolean],
       parallelismThreshold: Int,
       parallelismMax: Int): Seq[FileStatus] = {
 
@@ -245,19 +208,11 @@ private[spark] object HadoopFSUtils extends Logging {
         Array.empty[FileStatus]
     }
 
-    def doFilter(statuses: Array[FileStatus]) = filterFun match {
-      case Some(shouldFilterOut) =>
-        statuses.filterNot(status => shouldFilterOut(status.getPath.getName))
-      case None =>
-        statuses
-    }
-
-    val filteredStatuses = doFilter(statuses)
     val allLeafStatuses = {
-      val (dirs, topLevelFiles) = filteredStatuses.partition(_.isDirectory)
+      val (dirs, topLevelFiles) = statuses.partition(_.isDirectory)
       val nestedFiles: Seq[FileStatus] = contextOpt match {
         case Some(context) if dirs.size > parallelismThreshold =>
-          parallelListLeafFiles(
+          parallelListLeafFilesInternal(
             context,
             dirs.map(_.getPath),
             hadoopConf = hadoopConf,
@@ -265,7 +220,6 @@ private[spark] object HadoopFSUtils extends Logging {
             isRootLevel = false,
             ignoreMissingFiles = ignoreMissingFiles,
             ignoreLocality = ignoreLocality,
-            filterFun = filterFun,
             parallelismThreshold = parallelismThreshold,
             parallelismMax = parallelismMax
           ).flatMap(_._2)
@@ -279,7 +233,6 @@ private[spark] object HadoopFSUtils extends Logging {
               ignoreMissingFiles = ignoreMissingFiles,
               ignoreLocality = ignoreLocality,
               isRootPath = false,
-              filterFun = filterFun,
               parallelismThreshold = parallelismThreshold,
               parallelismMax = parallelismMax)
           }
@@ -289,8 +242,7 @@ private[spark] object HadoopFSUtils extends Logging {
     }
 
     val missingFiles = mutable.ArrayBuffer.empty[String]
-    val filteredLeafStatuses = doFilter(allLeafStatuses)
-    val resolvedLeafStatuses = filteredLeafStatuses.flatMap {
+    val resolvedLeafStatuses = allLeafStatuses.flatMap {
       case f: LocatedFileStatus =>
         Some(f)
 
@@ -339,22 +291,4 @@ private[spark] object HadoopFSUtils extends Logging {
     resolvedLeafStatuses
   }
   // scalastyle:on argcount
-
-  /** A serializable variant of HDFS's BlockLocation. */
-  private case class SerializableBlockLocation(
-    names: Array[String],
-    hosts: Array[String],
-    offset: Long,
-    length: Long)
-
-  /** A serializable variant of HDFS's FileStatus. */
-  private case class SerializableFileStatus(
-    path: String,
-    length: Long,
-    isDir: Boolean,
-    blockReplication: Short,
-    blockSize: Long,
-    modificationTime: Long,
-    accessTime: Long,
-    blockLocations: Array[SerializableBlockLocation])
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
index 8bf7504716f79..6495463be02c0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
@@ -163,7 +163,7 @@ object CommandUtils extends Logging {
       .getConfString("hive.exec.stagingdir", ".hive-staging")
     val filter = new PathFilterIgnoreNonData(stagingDir)
     val sizes = InMemoryFileIndex.bulkListLeafFiles(paths.flatten,
-      sparkSession.sessionState.newHadoopConf(), filter, sparkSession, isRootLevel = true).map {
+      sparkSession.sessionState.newHadoopConf(), filter, sparkSession).map {
       case (_, files) => files.map(_.getLen).sum
     }
     // the size is 0 where paths(i) is not defined and sizes(i) where it is defined
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
index 130894e9bc025..21275951b5603 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
@@ -128,7 +128,7 @@ class InMemoryFileIndex(
     }
     val filter = FileInputFormat.getInputPathFilter(new JobConf(hadoopConf, this.getClass))
     val discovered = InMemoryFileIndex.bulkListLeafFiles(
-      pathsToFetch.toSeq, hadoopConf, filter, sparkSession, isRootLevel = true)
+      pathsToFetch.toSeq, hadoopConf, filter, sparkSession)
     discovered.foreach { case (path, leafFiles) =>
       HiveCatalogMetrics.incrementFilesDiscovered(leafFiles.size)
       fileStatusCache.putLeafFiles(path, leafFiles.toArray)
@@ -146,20 +146,17 @@ object InMemoryFileIndex extends Logging {
       paths: Seq[Path],
       hadoopConf: Configuration,
       filter: PathFilter,
-      sparkSession: SparkSession,
-      isRootLevel: Boolean): Seq[(Path, Seq[FileStatus])] = {
+      sparkSession: SparkSession): Seq[(Path, Seq[FileStatus])] = {
     HadoopFSUtils.parallelListLeafFiles(
       sc = sparkSession.sparkContext,
       paths = paths,
       hadoopConf = hadoopConf,
-      filter = filter,
-      isRootLevel = isRootLevel,
+      filter = new PathFilterWrapper(filter),
       ignoreMissingFiles = sparkSession.sessionState.conf.ignoreMissingFiles,
       ignoreLocality = sparkSession.sessionState.conf.ignoreDataLocality,
       parallelismThreshold = sparkSession.sessionState.conf.parallelPartitionDiscoveryThreshold,
-      parallelismMax = sparkSession.sessionState.conf.parallelPartitionDiscoveryParallelism,
-      filterFun = Some(shouldFilterOut))
- }
+      parallelismMax = sparkSession.sessionState.conf.parallelPartitionDiscoveryParallelism)
+  }
 
   /** Checks if we should filter out this path name. */
   def shouldFilterOut(pathName: String): Boolean = {
@@ -175,3 +172,9 @@ object InMemoryFileIndex extends Logging {
     exclude && !include
   }
 }
+
+private class PathFilterWrapper(val filter: PathFilter) extends PathFilter with Serializable {
+  override def accept(path: Path): Boolean = {
+    (filter == null || filter.accept(path)) && !InMemoryFileIndex.shouldFilterOut(path.getName)
+  }
+}

From 1df69f7e324aa799c05f6158e433371c5eeed8ce Mon Sep 17 00:00:00 2001
From: Ryan Blue <blue@apache.org>
Date: Wed, 18 Nov 2020 14:07:51 -0800
Subject: [PATCH 0507/1009] [SPARK-31255][SQL] Add SupportsMetadataColumns to
 DSv2

### What changes were proposed in this pull request?

This adds support for metadata columns to DataSourceV2. If a source implements `SupportsMetadataColumns` it must also implement `SupportsPushDownRequiredColumns` to support projecting those columns.

The analyzer is updated to resolve metadata columns from `LogicalPlan.metadataOutput`, and this adds a rule that will add metadata columns to the output of `DataSourceV2Relation` if one is used.

### Why are the changes needed?

This is the solution discussed for exposing additional data in the Kafka source. It is also needed for a generic `MERGE INTO` plan.

### Does this PR introduce any user-facing change?

Yes. Users can project additional columns from sources that implement the new API. This also updates `DescribeTableExec` to show metadata columns.

### How was this patch tested?

Will include new unit tests.

Closes #28027 from rdblue/add-dsv2-metadata-columns.

Authored-by: Ryan Blue <blue@apache.org>
Signed-off-by: Burak Yavuz <brkyvz@gmail.com>
---
 .../sql/connector/catalog/MetadataColumn.java | 58 +++++++++++++++
 .../catalog/SupportsMetadataColumns.java      | 37 ++++++++++
 .../sql/catalyst/analysis/Analyzer.scala      | 24 ++++++
 .../catalyst/plans/logical/LogicalPlan.scala  |  6 +-
 .../plans/logical/basicLogicalOperators.scala |  6 ++
 .../v2/DataSourceV2Implicits.scala            | 16 +++-
 .../datasources/v2/DataSourceV2Relation.scala | 26 ++++++-
 .../spark/sql/connector/InMemoryTable.scala   | 74 ++++++++++++++++---
 .../datasources/v2/DescribeTableExec.scala    | 16 +++-
 .../datasources/v2/PushDownUtils.scala        | 12 ++-
 .../sql/connector/DataSourceV2SQLSuite.scala  | 43 +++++++++++
 11 files changed, 296 insertions(+), 22 deletions(-)
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java
new file mode 100644
index 0000000000000..8aefa28323b33
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java
@@ -0,0 +1,58 @@
+package org.apache.spark.sql.connector.catalog;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.expressions.Transform;
+import org.apache.spark.sql.types.DataType;
+
+/**
+ * Interface for a metadata column.
+ * <p>
+ * A metadata column can expose additional metadata about a row. For example, rows from Kafka can
+ * use metadata columns to expose a message's topic, partition number, and offset.
+ * <p>
+ * A metadata column could also be the result of a transform applied to a value in the row. For
+ * example, a partition value produced by bucket(id, 16) could be exposed by a metadata column. In
+ * this case, {@link #transform()} should return a non-null {@link Transform} that produced the
+ * metadata column's values.
+ */
+@Evolving
+public interface MetadataColumn {
+  /**
+   * The name of this metadata column.
+   *
+   * @return a String name
+   */
+  String name();
+
+  /**
+   * The data type of values in this metadata column.
+   *
+   * @return a {@link DataType}
+   */
+  DataType dataType();
+
+  /**
+   * @return whether values produced by this metadata column may be null
+   */
+  default boolean isNullable() {
+    return true;
+  }
+
+  /**
+   * Documentation for this metadata column, or null.
+   *
+   * @return a documentation String
+   */
+  default String comment() {
+    return null;
+  }
+
+  /**
+   * The {@link Transform} used to produce this metadata column from data rows, or null.
+   *
+   * @return a {@link Transform} used to produce the column's values, or null if there isn't one
+   */
+  default Transform transform() {
+    return null;
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java
new file mode 100644
index 0000000000000..fc313491f2970
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java
@@ -0,0 +1,37 @@
+package org.apache.spark.sql.connector.catalog;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.read.SupportsPushDownRequiredColumns;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * An interface for exposing data columns for a table that are not in the table schema. For example,
+ * a file source could expose a "file" column that contains the path of the file that contained each
+ * row.
+ * <p>
+ * The columns returned by {@link #metadataColumns()} may be passed as {@link StructField} in
+ * requested projections. Sources that implement this interface and column projection using
+ * {@link SupportsPushDownRequiredColumns} must accept metadata fields passed to
+ * {@link SupportsPushDownRequiredColumns#pruneColumns(StructType)}.
+ * <p>
+ * If a table column and a metadata column have the same name, the metadata column will never be
+ * requested. It is recommended that Table implementations reject data column name that conflict
+ * with metadata column names.
+ */
+@Evolving
+public interface SupportsMetadataColumns extends Table {
+  /**
+   * Metadata columns that are supported by this {@link Table}.
+   * <p>
+   * The columns returned by this method may be passed as {@link StructField} in requested
+   * projections using {@link SupportsPushDownRequiredColumns#pruneColumns(StructType)}.
+   * <p>
+   * If a table column and a metadata column have the same name, the metadata column will never be
+   * requested and is ignored. It is recommended that Table implementations reject data column names
+   * that conflict with metadata column names.
+   *
+   * @return an array of {@link MetadataColumn}
+   */
+  MetadataColumn[] metadataColumns();
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 14b50f481f387..8d95d8cf49d45 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -221,6 +221,7 @@ class Analyzer(override val catalogManager: CatalogManager)
       ResolveRelations ::
       ResolveTables ::
       ResolvePartitionSpec ::
+      AddMetadataColumns ::
       ResolveReferences ::
       ResolveCreateNamedStruct ::
       ResolveDeserializer ::
@@ -916,6 +917,29 @@ class Analyzer(override val catalogManager: CatalogManager)
     }
   }
 
+  /**
+   * Adds metadata columns to output for child relations when nodes are missing resolved attributes.
+   *
+   * References to metadata columns are resolved using columns from [[LogicalPlan.metadataOutput]],
+   * but the relation's output does not include the metadata columns until the relation is replaced
+   * using [[DataSourceV2Relation.withMetadataColumns()]]. Unless this rule adds metadata to the
+   * relation's output, the analyzer will detect that nothing produces the columns.
+   *
+   * This rule only adds metadata columns when a node is resolved but is missing input from its
+   * children. This ensures that metadata columns are not added to the plan unless they are used. By
+   * checking only resolved nodes, this ensures that * expansion is already done so that metadata
+   * columns are not accidentally selected by *.
+   */
+  object AddMetadataColumns extends Rule[LogicalPlan] {
+    def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperatorsUp {
+      case node if node.resolved && node.children.nonEmpty && node.missingInput.nonEmpty =>
+        node resolveOperatorsUp {
+          case rel: DataSourceV2Relation =>
+            rel.withMetadataColumns()
+        }
+    }
+  }
+
   /**
    * Resolve table relations with concrete relations from v2 catalog.
    *
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 48dfc5fd57e63..ad5c3fd74e9b5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -33,6 +33,9 @@ abstract class LogicalPlan
   with QueryPlanConstraints
   with Logging {
 
+  /** Metadata fields that can be projected from this node */
+  def metadataOutput: Seq[Attribute] = children.flatMap(_.metadataOutput)
+
   /** Returns true if this subtree has data from a streaming data source. */
   def isStreaming: Boolean = children.exists(_.isStreaming)
 
@@ -86,7 +89,8 @@ abstract class LogicalPlan
     }
   }
 
-  private[this] lazy val childAttributes = AttributeSeq(children.flatMap(_.output))
+  private[this] lazy val childAttributes =
+    AttributeSeq(children.flatMap(c => c.output ++ c.metadataOutput))
 
   private[this] lazy val outputAttributes = AttributeSeq(output)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 17bf704c6d67a..4e7923b45822b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -886,6 +886,12 @@ case class SubqueryAlias(
     val qualifierList = identifier.qualifier :+ alias
     child.output.map(_.withQualifier(qualifierList))
   }
+
+  override def metadataOutput: Seq[Attribute] = {
+    val qualifierList = identifier.qualifier :+ alias
+    child.metadataOutput.map(_.withQualifier(qualifierList))
+  }
+
   override def doCanonicalize(): LogicalPlan = child.canonicalized
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala
index dfacf6e83ef57..8d91ea7c50cde 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala
@@ -21,7 +21,9 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{PartitionSpec, ResolvedPartitionSpec, UnresolvedPartitionSpec}
-import org.apache.spark.sql.connector.catalog.{SupportsAtomicPartitionManagement, SupportsDelete, SupportsPartitionManagement, SupportsRead, SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.catalyst.expressions.AttributeReference
+import org.apache.spark.sql.connector.catalog.{MetadataColumn, SupportsAtomicPartitionManagement, SupportsDelete, SupportsPartitionManagement, SupportsRead, SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.types.{StructField, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 object DataSourceV2Implicits {
@@ -78,6 +80,18 @@ object DataSourceV2Implicits {
     def supportsAny(capabilities: TableCapability*): Boolean = capabilities.exists(supports)
   }
 
+  implicit class MetadataColumnsHelper(metadata: Array[MetadataColumn]) {
+    def asStruct: StructType = {
+      val fields = metadata.map { metaCol =>
+        val field = StructField(metaCol.name, metaCol.dataType, metaCol.isNullable)
+        Option(metaCol.comment).map(field.withComment).getOrElse(field)
+      }
+      StructType(fields)
+    }
+
+    def toAttributes: Seq[AttributeReference] = asStruct.toAttributes
+  }
+
   implicit class OptionsHelper(options: Map[String, String]) {
     def asOptions: CaseInsensitiveStringMap = {
       new CaseInsensitiveStringMap(options.asJava)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
index 45d89498f5ae9..b09ccff39f842 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
@@ -21,10 +21,11 @@ import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, NamedRelat
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
 import org.apache.spark.sql.catalyst.util.truncatedString
-import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, Table, TableCapability}
+import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, MetadataColumn, SupportsMetadataColumns, Table, TableCapability}
 import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, Statistics => V2Statistics, SupportsReportStatistics}
 import org.apache.spark.sql.connector.read.streaming.{Offset, SparkDataStream}
 import org.apache.spark.sql.connector.write.WriteBuilder
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.Utils
 
@@ -48,6 +49,21 @@ case class DataSourceV2Relation(
 
   import DataSourceV2Implicits._
 
+  override lazy val metadataOutput: Seq[AttributeReference] = table match {
+    case hasMeta: SupportsMetadataColumns =>
+      val resolve = SQLConf.get.resolver
+      val outputNames = outputSet.map(_.name)
+      def isOutputColumn(col: MetadataColumn): Boolean = {
+        outputNames.exists(name => resolve(col.name, name))
+      }
+      // filter out metadata columns that have names conflicting with output columns. if the table
+      // has a column "line" and the table can produce a metadata column called "line", then the
+      // data column should be returned, not the metadata column.
+      hasMeta.metadataColumns.filterNot(isOutputColumn).toAttributes
+    case _ =>
+      Nil
+  }
+
   override def name: String = table.name()
 
   override def skipSchemaResolution: Boolean = table.supports(TableCapability.ACCEPT_ANY_SCHEMA)
@@ -78,6 +94,14 @@ case class DataSourceV2Relation(
   override def newInstance(): DataSourceV2Relation = {
     copy(output = output.map(_.newInstance()))
   }
+
+  def withMetadataColumns(): DataSourceV2Relation = {
+    if (metadataOutput.nonEmpty) {
+      DataSourceV2Relation(table, output ++ metadataOutput, catalog, identifier, options)
+    } else {
+      this
+    }
+  }
 }
 
 /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
index b0325600e7530..3b47271a114e2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
@@ -27,6 +27,7 @@ import scala.collection.mutable
 import org.scalatest.Assertions._
 
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, JoinedRow}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.connector.catalog._
 import org.apache.spark.sql.connector.expressions.{BucketTransform, DaysTransform, HoursTransform, IdentityTransform, MonthsTransform, Transform, YearsTransform}
@@ -34,8 +35,9 @@ import org.apache.spark.sql.connector.read._
 import org.apache.spark.sql.connector.write._
 import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.sources.{And, EqualTo, Filter, IsNotNull}
-import org.apache.spark.sql.types.{DataType, DateType, StructType, TimestampType}
+import org.apache.spark.sql.types.{DataType, DateType, StringType, StructField, StructType, TimestampType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.unsafe.types.UTF8String
 
 /**
  * A simple in-memory table. Rows are stored as a buffered group produced by each output task.
@@ -45,7 +47,24 @@ class InMemoryTable(
     val schema: StructType,
     override val partitioning: Array[Transform],
     override val properties: util.Map[String, String])
-  extends Table with SupportsRead with SupportsWrite with SupportsDelete {
+  extends Table with SupportsRead with SupportsWrite with SupportsDelete
+      with SupportsMetadataColumns {
+
+  private object PartitionKeyColumn extends MetadataColumn {
+    override def name: String = "_partition"
+    override def dataType: DataType = StringType
+    override def comment: String = "Partition key used to store the row"
+  }
+
+  private object IndexColumn extends MetadataColumn {
+    override def name: String = "index"
+    override def dataType: DataType = StringType
+    override def comment: String = "Metadata column used to conflict with a data column"
+  }
+
+  // purposely exposes a metadata column that conflicts with a data column in some tests
+  override val metadataColumns: Array[MetadataColumn] = Array(IndexColumn, PartitionKeyColumn)
+  private val metadataColumnNames = metadataColumns.map(_.name).toSet -- schema.map(_.name)
 
   private val allowUnsupportedTransforms =
     properties.getOrDefault("allow-unsupported-transforms", "false").toBoolean
@@ -146,7 +165,7 @@ class InMemoryTable(
       val key = getKey(row)
       dataMap += dataMap.get(key)
         .map(key -> _.withRow(row))
-        .getOrElse(key -> new BufferedRows().withRow(row))
+        .getOrElse(key -> new BufferedRows(key.toArray.mkString("/")).withRow(row))
     })
     this
   }
@@ -160,17 +179,38 @@ class InMemoryTable(
     TableCapability.TRUNCATE).asJava
 
   override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
-    () => new InMemoryBatchScan(data.map(_.asInstanceOf[InputPartition]))
+    new InMemoryScanBuilder(schema)
+  }
+
+  class InMemoryScanBuilder(tableSchema: StructType) extends ScanBuilder
+      with SupportsPushDownRequiredColumns {
+    private var schema: StructType = tableSchema
+
+    override def build: Scan =
+      new InMemoryBatchScan(data.map(_.asInstanceOf[InputPartition]), schema)
+
+    override def pruneColumns(requiredSchema: StructType): Unit = {
+      // if metadata columns are projected, return the table schema and metadata columns
+      val hasMetadataColumns = requiredSchema.map(_.name).exists(metadataColumnNames.contains)
+      if (hasMetadataColumns) {
+        schema = StructType(tableSchema ++ metadataColumnNames
+            .flatMap(name => metadataColumns.find(_.name == name))
+            .map(col => StructField(col.name, col.dataType, col.isNullable)))
+      }
+    }
   }
 
-  class InMemoryBatchScan(data: Array[InputPartition]) extends Scan with Batch {
+  class InMemoryBatchScan(data: Array[InputPartition], schema: StructType) extends Scan with Batch {
     override def readSchema(): StructType = schema
 
     override def toBatch: Batch = this
 
     override def planInputPartitions(): Array[InputPartition] = data
 
-    override def createReaderFactory(): PartitionReaderFactory = BufferedRowsReaderFactory
+    override def createReaderFactory(): PartitionReaderFactory = {
+      val metadataColumns = schema.map(_.name).filter(metadataColumnNames.contains)
+      new BufferedRowsReaderFactory(metadataColumns)
+    }
   }
 
   override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
@@ -340,7 +380,8 @@ object InMemoryTable {
   }
 }
 
-class BufferedRows extends WriterCommitMessage with InputPartition with Serializable {
+class BufferedRows(
+    val key: String = "") extends WriterCommitMessage with InputPartition with Serializable {
   val rows = new mutable.ArrayBuffer[InternalRow]()
 
   def withRow(row: InternalRow): BufferedRows = {
@@ -349,13 +390,24 @@ class BufferedRows extends WriterCommitMessage with InputPartition with Serializ
   }
 }
 
-private object BufferedRowsReaderFactory extends PartitionReaderFactory {
+private class BufferedRowsReaderFactory(
+    metadataColumns: Seq[String]) extends PartitionReaderFactory {
   override def createReader(partition: InputPartition): PartitionReader[InternalRow] = {
-    new BufferedRowsReader(partition.asInstanceOf[BufferedRows])
+    new BufferedRowsReader(partition.asInstanceOf[BufferedRows], metadataColumns)
   }
 }
 
-private class BufferedRowsReader(partition: BufferedRows) extends PartitionReader[InternalRow] {
+private class BufferedRowsReader(
+    partition: BufferedRows,
+    metadataColumns: Seq[String]) extends PartitionReader[InternalRow] {
+  private def addMetadata(row: InternalRow): InternalRow = {
+    val metadataRow = new GenericInternalRow(metadataColumns.map {
+      case "index" => index
+      case "_partition" => UTF8String.fromString(partition.key)
+    }.toArray)
+    new JoinedRow(row, metadataRow)
+  }
+
   private var index: Int = -1
 
   override def next(): Boolean = {
@@ -363,7 +415,7 @@ private class BufferedRowsReader(partition: BufferedRows) extends PartitionReade
     index < partition.rows.length
   }
 
-  override def get(): InternalRow = partition.rows(index)
+  override def get(): InternalRow = addMetadata(partition.rows(index))
 
   override def close(): Unit = {}
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
index 81b1c81499c74..0cbcad1f48026 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
@@ -23,7 +23,7 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema}
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Table, TableCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, Table, TableCatalog}
 import org.apache.spark.sql.types.StructType
 
 case class DescribeTableExec(
@@ -41,6 +41,7 @@ case class DescribeTableExec(
     addPartitioning(rows)
 
     if (isExtended) {
+      addMetadataColumns(rows)
       addTableDetails(rows)
     }
     rows.toSeq
@@ -72,6 +73,19 @@ case class DescribeTableExec(
     }
   }
 
+  private def addMetadataColumns(rows: ArrayBuffer[InternalRow]): Unit = table match {
+    case hasMeta: SupportsMetadataColumns if hasMeta.metadataColumns.nonEmpty =>
+      rows += emptyRow()
+      rows += toCatalystRow("# Metadata Columns", "", "")
+      rows ++= hasMeta.metadataColumns.map { column =>
+        toCatalystRow(
+          column.name,
+          column.dataType.simpleString,
+          Option(column.comment()).getOrElse(""))
+      }
+    case _ =>
+  }
+
   private def addPartitioning(rows: ArrayBuffer[InternalRow]): Unit = {
     rows += emptyRow()
     rows += toCatalystRow("# Partitioning", "", "")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala
index 7f6ae20d5cd0b..ce8edce6f08d6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala
@@ -96,13 +96,11 @@ object PushDownUtils extends PredicateHelper {
         val exprs = projects ++ filters
         val requiredColumns = AttributeSet(exprs.flatMap(_.references))
         val neededOutput = relation.output.filter(requiredColumns.contains)
-        if (neededOutput != relation.output) {
-          r.pruneColumns(neededOutput.toStructType)
-          val scan = r.build()
-          scan -> toOutputAttrs(scan.readSchema(), relation)
-        } else {
-          r.build() -> relation.output
-        }
+        r.pruneColumns(neededOutput.toStructType)
+        val scan = r.build()
+        // always project, in case the relation's output has been updated and doesn't match
+        // the underlying table schema
+        scan -> toOutputAttrs(scan.readSchema(), relation)
 
       case _ => scanBuilder.build() -> relation.output
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 5f7be7c4c565b..4eaf5822e1628 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -139,6 +139,10 @@ class DataSourceV2SQLSuite
       Array("# Partitioning", "", ""),
       Array("Part 0", "id", ""),
       Array("", "", ""),
+      Array("# Metadata Columns", "", ""),
+      Array("index", "string", "Metadata column used to conflict with a data column"),
+      Array("_partition", "string", "Partition key used to store the row"),
+      Array("", "", ""),
       Array("# Detailed Table Information", "", ""),
       Array("Name", "testcat.table_name", ""),
       Array("Comment", "this is a test table", ""),
@@ -2470,6 +2474,45 @@ class DataSourceV2SQLSuite
     }
   }
 
+  test("SPARK-31255: Project a metadata column") {
+    val t1 = s"${catalogAndNamespace}table"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format " +
+          "PARTITIONED BY (bucket(4, id), id)")
+      sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b'), (3, 'c')")
+
+      checkAnswer(
+        spark.sql(s"SELECT id, data, _partition FROM $t1"),
+        Seq(Row(1, "a", "3/1"), Row(2, "b", "2/2"), Row(3, "c", "2/3")))
+    }
+  }
+
+  test("SPARK-31255: Projects data column when metadata column has the same name") {
+    val t1 = s"${catalogAndNamespace}table"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (index bigint, data string) USING $v2Format " +
+          "PARTITIONED BY (bucket(4, index), index)")
+      sql(s"INSERT INTO $t1 VALUES (3, 'c'), (2, 'b'), (1, 'a')")
+
+      checkAnswer(
+        spark.sql(s"SELECT index, data, _partition FROM $t1"),
+        Seq(Row(3, "c", "2/3"), Row(2, "b", "2/2"), Row(1, "a", "3/1")))
+    }
+  }
+
+  test("SPARK-31255: * expansion does not include metadata columns") {
+    val t1 = s"${catalogAndNamespace}table"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format " +
+          "PARTITIONED BY (bucket(4, id), id)")
+      sql(s"INSERT INTO $t1 VALUES (3, 'c'), (2, 'b'), (1, 'a')")
+
+      checkAnswer(
+        spark.sql(s"SELECT * FROM $t1"),
+        Seq(Row(3, "c"), Row(2, "b"), Row(1, "a")))
+    }
+  }
+
   private def testNotSupportedV2Command(sqlCommand: String, sqlParams: String): Unit = {
     val e = intercept[AnalysisException] {
       sql(s"$sqlCommand $sqlParams")

From fbfc0bf62879a7029e52d392618f623d24eedf1c Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Wed, 18 Nov 2020 15:13:43 -0800
Subject: [PATCH 0508/1009] [SPARK-33464][INFRA] Add/remove (un)necessary cache
 and restructure GitHub Actions yaml

### What changes were proposed in this pull request?

This PR proposes:
- Add `~/.sbt` directory into the build cache, see also https://github.com/sbt/sbt/issues/3681
- Move `hadoop-2` below to put up together with `java-11` and `scala-213`, see https://github.com/apache/spark/pull/30391#discussion_r524881430
- Remove unnecessary `.m2` cache if you run SBT tests only.
- Remove `rm ~/.m2/repository/org/apache/spark`. If you don't `sbt publishLocal` or `mvn install`, we don't need to care about it.
- Use Java 8 in Scala 2.13 build. We can switch the Java version to 11 used for release later.
- Add caches into linters. The linter scripts uses `sbt` in, for example, `./dev/lint-scala`, and uses `mvn` in, for example, `./dev/lint-java`. Also, it requires to `sbt package` in Jekyll build, see: https://github.com/apache/spark/blob/master/docs/_plugins/copy_api_dirs.rb#L160-L161. We need full caches here for SBT, Maven and build tools.
- Use the same syntax of Java version, 1.8 -> 8.

### Why are the changes needed?

- Remove unnecessary stuff
- Cache what we can in the build

### Does this PR introduce _any_ user-facing change?

No, dev-only.

### How was this patch tested?

It will be tested in GitHub Actions build at the current PR

Closes #30391 from HyukjinKwon/SPARK-33464.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .github/workflows/build_and_test.yml | 131 +++++++++++++--------------
 1 file changed, 63 insertions(+), 68 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 7a6c49f9135d0..b2b6a38916eeb 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -14,28 +14,6 @@ on:
         required: true
 
 jobs:
-  # This is on the top to give the most visibility in case of failures
-  hadoop-2:
-    name: Hadoop 2 build
-    runs-on: ubuntu-20.04
-    steps:
-    - name: Checkout Spark repository
-      uses: actions/checkout@v2
-    - name: Cache Coursier local repository
-      uses: actions/cache@v2
-      with:
-        path: ~/.cache/coursier
-        key: hadoop-2-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
-        restore-keys: |
-          hadoop-2-coursier-
-    - name: Install Java 8
-      uses: actions/setup-java@v1
-      with:
-        java-version: 1.8
-    - name: Build with SBT
-      run: |
-        ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Phadoop-2.7 compile test:compile
-
   # Build: build Spark and run the tests for specified modules.
   build:
     name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
@@ -45,7 +23,7 @@ jobs:
       fail-fast: false
       matrix:
         java:
-          - 1.8
+          - 8
         hadoop:
           - hadoop3.2
         hive:
@@ -71,26 +49,26 @@ jobs:
         include:
           # Hive tests
           - modules: hive
-            java: 1.8
+            java: 8
             hadoop: hadoop3.2
             hive: hive2.3
             included-tags: org.apache.spark.tags.SlowHiveTest
             comment: "- slow tests"
           - modules: hive
-            java: 1.8
+            java: 8
             hadoop: hadoop3.2
             hive: hive2.3
             excluded-tags: org.apache.spark.tags.SlowHiveTest
             comment: "- other tests"
           # SQL tests
           - modules: sql
-            java: 1.8
+            java: 8
             hadoop: hadoop3.2
             hive: hive2.3
             included-tags: org.apache.spark.tags.ExtendedSQLTest
             comment: "- slow tests"
           - modules: sql
-            java: 1.8
+            java: 8
             hadoop: hadoop3.2
             hive: hive2.3
             excluded-tags: org.apache.spark.tags.ExtendedSQLTest
@@ -123,16 +101,10 @@ jobs:
           build/zinc-*
           build/scala-*
           build/*.jar
+          ~/.sbt
         key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
         restore-keys: |
           build-
-    - name: Cache Maven local repository
-      uses: actions/cache@v2
-      with:
-        path: ~/.m2/repository
-        key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ hashFiles('**/pom.xml') }}
-        restore-keys: |
-          ${{ matrix.java }}-${{ matrix.hadoop }}-maven-
     - name: Cache Coursier local repository
       uses: actions/cache@v2
       with:
@@ -140,7 +112,7 @@ jobs:
         key: ${{ matrix.java }}-${{ matrix.hadoop }}-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
         restore-keys: |
           ${{ matrix.java }}-${{ matrix.hadoop }}-coursier-
-    - name: Install JDK ${{ matrix.java }}
+    - name: Install Java ${{ matrix.java }}
       uses: actions/setup-java@v1
       with:
         java-version: ${{ matrix.java }}
@@ -163,9 +135,7 @@ jobs:
       run: |
         # Hive and SQL tests become flaky when running in parallel as it's too intensive.
         if [[ "$MODULES_TO_TEST" == "hive" ]] || [[ "$MODULES_TO_TEST" == "sql" ]]; then export SERIAL_SBT_TESTS=1; fi
-        mkdir -p ~/.m2
         ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
-        rm -rf ~/.m2/repository/org/apache/spark
     - name: Upload test results to report
       if: always()
       uses: actions/upload-artifact@v2
@@ -218,16 +188,10 @@ jobs:
           build/zinc-*
           build/scala-*
           build/*.jar
+          ~/.sbt
         key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
         restore-keys: |
           build-
-    - name: Cache Maven local repository
-      uses: actions/cache@v2
-      with:
-        path: ~/.m2/repository
-        key: pyspark-maven-${{ hashFiles('**/pom.xml') }}
-        restore-keys: |
-          pyspark-maven-
     - name: Cache Coursier local repository
       uses: actions/cache@v2
       with:
@@ -250,24 +214,22 @@ jobs:
     # Run the tests.
     - name: Run tests
       run: |
-        mkdir -p ~/.m2
         ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST"
-        rm -rf ~/.m2/repository/org/apache/spark
     - name: Upload test results to report
       if: always()
       uses: actions/upload-artifact@v2
       with:
-        name: test-results-${{ matrix.modules }}--1.8-hadoop3.2-hive2.3
+        name: test-results-${{ matrix.modules }}--8-hadoop3.2-hive2.3
         path: "**/target/test-reports/*.xml"
     - name: Upload unit tests log files
       if: failure()
       uses: actions/upload-artifact@v2
       with:
-        name: unit-tests-log-${{ matrix.modules }}--1.8-hadoop3.2-hive2.3
+        name: unit-tests-log-${{ matrix.modules }}--8-hadoop3.2-hive2.3
         path: "**/target/unit-tests.log"
 
   sparkr:
-    name: Build modules - sparkr
+    name: "Build modules: sparkr"
     runs-on: ubuntu-20.04
     container:
       image: dongjoon/apache-spark-github-action-image:20201025
@@ -294,16 +256,10 @@ jobs:
           build/zinc-*
           build/scala-*
           build/*.jar
+          ~/.sbt
         key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
         restore-keys: |
           build-
-    - name: Cache Maven local repository
-      uses: actions/cache@v2
-      with:
-        path: ~/.m2/repository
-        key: sparkr-maven-${{ hashFiles('**/pom.xml') }}
-        restore-keys: |
-          sparkr-maven-
     - name: Cache Coursier local repository
       uses: actions/cache@v2
       with:
@@ -313,18 +269,16 @@ jobs:
           sparkr-coursier-
     - name: Run tests
       run: |
-        mkdir -p ~/.m2
         # The followings are also used by `r-lib/actions/setup-r` to avoid
         # R issues at docker environment
         export TZ=UTC
         export _R_CHECK_SYSTEM_CLOCK_=FALSE
         ./dev/run-tests --parallelism 2 --modules sparkr
-        rm -rf ~/.m2/repository/org/apache/spark
     - name: Upload test results to report
       if: always()
       uses: actions/upload-artifact@v2
       with:
-        name: test-results-sparkr--1.8-hadoop3.2-hive2.3
+        name: test-results-sparkr--8-hadoop3.2-hive2.3
         path: "**/target/test-reports/*.xml"
 
   # Static analysis, and documentation build
@@ -334,17 +288,37 @@ jobs:
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v2
+    # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
+    - name: Cache Scala, SBT, Maven and Zinc
+      uses: actions/cache@v2
+      with:
+        path: |
+          build/apache-maven-*
+          build/zinc-*
+          build/scala-*
+          build/*.jar
+          ~/.sbt
+        key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
+        restore-keys: |
+          build-
+    - name: Cache Coursier local repository
+      uses: actions/cache@v2
+      with:
+        path: ~/.cache/coursier
+        key: docs-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
+        restore-keys: |
+          docs-coursier-
     - name: Cache Maven local repository
       uses: actions/cache@v2
       with:
         path: ~/.m2/repository
-        key: docs-maven-repo-${{ hashFiles('**/pom.xml') }}
+        key: docs-maven-${{ hashFiles('**/pom.xml') }}
         restore-keys: |
           docs-maven-
-    - name: Install JDK 1.8
+    - name: Install Java 8
       uses: actions/setup-java@v1
       with:
-        java-version: 1.8
+        java-version: 8
     - name: Install Python 3.6
       uses: actions/setup-python@v2
       with:
@@ -395,8 +369,8 @@ jobs:
         cd docs
         jekyll build
 
-  java11:
-    name: Java 11 build
+  java-11:
+    name: Java 11 build with Maven
     runs-on: ubuntu-20.04
     steps:
     - name: Checkout Spark repository
@@ -416,12 +390,12 @@ jobs:
       run: |
         export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
         export MAVEN_CLI_OPTS="--no-transfer-progress"
-        mkdir -p ~/.m2
+        # It uses Maven's 'install' intentionally, see https://github.com/apache/spark/pull/26414.
         ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 install
         rm -rf ~/.m2/repository/org/apache/spark
 
   scala-213:
-    name: Scala 2.13 build
+    name: Scala 2.13 build with SBT
     runs-on: ubuntu-20.04
     steps:
     - name: Checkout Spark repository
@@ -433,11 +407,32 @@ jobs:
         key: scala-213-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
         restore-keys: |
           scala-213-coursier-
-    - name: Install Java 11
+    - name: Install Java 8
       uses: actions/setup-java@v1
       with:
-        java-version: 11
+        java-version: 8
     - name: Build with SBT
       run: |
         ./dev/change-scala-version.sh 2.13
         ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Djava.version=11 -Pscala-2.13 compile test:compile
+
+  hadoop-2:
+    name: Hadoop 2 build with SBT
+    runs-on: ubuntu-20.04
+    steps:
+    - name: Checkout Spark repository
+      uses: actions/checkout@v2
+    - name: Cache Coursier local repository
+      uses: actions/cache@v2
+      with:
+        path: ~/.cache/coursier
+        key: hadoop-2-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
+        restore-keys: |
+          hadoop-2-coursier-
+    - name: Install Java 8
+      uses: actions/setup-java@v1
+      with:
+        java-version: 8
+    - name: Build with SBT
+      run: |
+        ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Phadoop-2.7 compile test:compile

From 9a4c79073bbd741f14e2babcbe140a7b4e56d558 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Thu, 19 Nov 2020 09:23:36 +0900
Subject: [PATCH 0509/1009] [SPARK-33354][SQL] New explicit cast syntax rules
 in ANSI mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

In section 6.13 of the ANSI SQL standard, there are syntax rules for valid combinations of the source and target data types.
![image](https://user-images.githubusercontent.com/1097932/98212874-17356f80-1ef9-11eb-8f2b-385f32db404a.png)

Comparing the ANSI CAST syntax rules with the current default behavior of Spark:
![image](https://user-images.githubusercontent.com/1097932/98789831-b7870a80-23b7-11eb-9b5f-469a42e0ee4a.png)

To make Spark's ANSI mode more ANSI SQL Compatible，I propose to disallow the following casting in ANSI mode:
```
TimeStamp <=> Boolean
Date <=> Boolean
Numeric <=> Timestamp
Numeric <=> Date
Numeric <=> Binary
String <=> Array
String <=> Map
String <=> Struct
```
The following castings are considered invalid in ANSI SQL standard, but they are quite straight forward. Let's Allow them for now
```
Numeric <=> Boolean
String <=> Binary
```
### Why are the changes needed?

Better ANSI SQL compliance

### Does this PR introduce _any_ user-facing change?

Yes, the following castings will not be allowed in ANSI mode:
```
TimeStamp <=> Boolean
Date <=> Boolean
Numeric <=> Timestamp
Numeric <=> Date
Numeric <=> Binary
String <=> Array
String <=> Map
String <=> Struct
```

### How was this patch tested?

Unit test

The ANSI Compliance doc preview:
![image](https://user-images.githubusercontent.com/1097932/98946017-2cd20880-24a8-11eb-8161-65749bfdd03a.png)

Closes #30260 from gengliangwang/ansiCanCast.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 docs/sql-ref-ansi-compliance.md               |  21 +
 .../spark/sql/catalyst/expressions/Cast.scala | 118 ++-
 .../sql/catalyst/expressions/CastSuite.scala  | 850 ++++++++++--------
 .../spark/sql/sources/InsertSuite.scala       |  41 +
 4 files changed, 635 insertions(+), 395 deletions(-)

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index 41921265b7c70..fd7208615a09f 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -61,6 +61,27 @@ Spark SQL has three kinds of type conversions: explicit casting, type coercion,
 When `spark.sql.ansi.enabled` is set to `true`, explicit casting by `CAST` syntax throws a runtime exception for illegal cast patterns defined in the standard, e.g. casts from a string to an integer.
 On the other hand, `INSERT INTO` syntax throws an analysis exception when the ANSI mode enabled via `spark.sql.storeAssignmentPolicy=ANSI`.
 
+The type conversion of Spark ANSI mode follows the syntax rules of section 6.13 "cast specification" in [ISO/IEC 9075-2:2011 Information technology — Database languages - SQL — Part 2: Foundation (SQL/Foundation)"](https://www.iso.org/standard/53682.html), except it specially allows the following
+ straightforward type conversions which are disallowed as per the ANSI standard:
+* NumericType <=> BooleanType
+* StringType <=> BinaryType
+
+ The valid combinations of target data type and source data type in a `CAST` expression are given by the following table.
+“Y” indicates that the combination is syntactically valid without restriction and “N” indicates that the combination is not valid.
+    
+| From\To   | NumericType | StringType | DateType | TimestampType | IntervalType | BooleanType | BinaryType | ArrayType | MapType | StructType |
+|-----------|---------|--------|------|-----------|----------|---------|--------|-------|-----|--------|
+| NumericType   | Y       | Y      | N    | N         | N        | Y       | N      | N     | N   | N      |
+| StringType    | Y       | Y      | Y    | Y         | Y        | Y       | Y      | N     | N   | N      |
+| DateType      | N       | Y      | Y    | Y         | N        | N       | N      | N     | N   | N      |
+| TimestampType | N       | Y      | Y    | Y         | N        | N       | N      | N     | N   | N      |
+| IntervalType  | N       | Y      | N    | N         | Y        | N       | N      | N     | N   | N      |
+| BooleanType   | Y       | Y      | N    | N         | N        | Y       | N      | N     | N   | N      |
+| BinaryType    | Y       | N      | N    | N         | N        | N       | Y      | N     | N   | N      |
+| ArrayType     | N       | N      | N    | N         | N        | N       | N      | Y     | N   | N      |
+| MapType       | N       | N      | N    | N         | N        | N       | N      | N     | Y   | N      |
+| StructType    | N       | N      | N    | N         | N        | N       | N      | N     | N   | Y      |
+
 Currently, the ANSI mode affects explicit casting and assignment casting only.
 In future releases, the behaviour of type coercion might change along with the other two type conversion rules.
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 4af12d61e86d9..1257cf6e787ce 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -25,6 +25,7 @@ import java.util.concurrent.TimeUnit._
 import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion}
+import org.apache.spark.sql.catalyst.expressions.Cast.{canCast, forceNullable, resolvableNullability}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.util._
@@ -258,13 +259,18 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
 
   def dataType: DataType
 
+  /**
+   * Returns true iff we can cast `from` type to `to` type.
+   */
+  def canCast(from: DataType, to: DataType): Boolean
+
   override def toString: String = {
     val ansi = if (ansiEnabled) "ansi_" else ""
     s"${ansi}cast($child as ${dataType.simpleString})"
   }
 
   override def checkInputDataTypes(): TypeCheckResult = {
-    if (Cast.canCast(child.dataType, dataType)) {
+    if (canCast(child.dataType, dataType)) {
       TypeCheckResult.TypeCheckSuccess
     } else {
       TypeCheckResult.TypeCheckFailure(
@@ -1753,6 +1759,12 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
     copy(timeZoneId = Option(timeZoneId))
 
   override protected val ansiEnabled: Boolean = SQLConf.get.ansiEnabled
+
+  override def canCast(from: DataType, to: DataType): Boolean = if (ansiEnabled) {
+    AnsiCast.canCast(from, to)
+  } else {
+    Cast.canCast(from, to)
+  }
 }
 
 /**
@@ -1770,6 +1782,110 @@ case class AnsiCast(child: Expression, dataType: DataType, timeZoneId: Option[St
     copy(timeZoneId = Option(timeZoneId))
 
   override protected val ansiEnabled: Boolean = true
+
+  override def canCast(from: DataType, to: DataType): Boolean = AnsiCast.canCast(from, to)
+}
+
+object AnsiCast {
+  /**
+   * As per section 6.13 "cast specification" in "Information technology — Database languages " +
+   * "- SQL — Part 2: Foundation (SQL/Foundation)":
+   * If the <cast operand> is a <value expression>, then the valid combinations of TD and SD
+   * in a <cast specification> are given by the following table. “Y” indicates that the
+   * combination is syntactically valid without restriction; “M” indicates that the combination
+   * is valid subject to other Syntax Rules in this Sub- clause being satisfied; and “N” indicates
+   * that the combination is not valid:
+   * SD                   TD
+   *     EN AN C D T TS YM DT BO UDT B RT CT RW
+   * EN  Y  Y  Y N N  N  M  M  N   M N  M  N N
+   * AN  Y  Y  Y N N  N  N  N  N   M N  M  N N
+   * C   Y  Y  Y Y Y  Y  Y  Y  Y   M N  M  N N
+   * D   N  N  Y Y N  Y  N  N  N   M N  M  N N
+   * T   N  N  Y N Y  Y  N  N  N   M N  M  N N
+   * TS  N  N  Y Y Y  Y  N  N  N   M N  M  N N
+   * YM  M  N  Y N N  N  Y  N  N   M N  M  N N
+   * DT  M  N  Y N N  N  N  Y  N   M N  M  N N
+   * BO  N  N  Y N N  N  N  N  Y   M N  M  N N
+   * UDT M  M  M M M  M  M  M  M   M M  M  M N
+   * B   N  N  N N N  N  N  N  N   M Y  M  N N
+   * RT  M  M  M M M  M  M  M  M   M M  M  N N
+   * CT  N  N  N N N  N  N  N  N   M N  N  M N
+   * RW  N  N  N N N  N  N  N  N   N N  N  N M
+   *
+   * Where:
+   *   EN  = Exact Numeric
+   *   AN  = Approximate Numeric
+   *   C   = Character (Fixed- or Variable-Length, or Character Large Object)
+   *   D   = Date
+   *   T   = Time
+   *   TS  = Timestamp
+   *   YM  = Year-Month Interval
+   *   DT  = Day-Time Interval
+   *   BO  = Boolean
+   *   UDT  = User-Defined Type
+   *   B   = Binary (Fixed- or Variable-Length or Binary Large Object)
+   *   RT  = Reference type
+   *   CT  = Collection type
+   *   RW  = Row type
+   *
+   * Spark's ANSI mode follows the syntax rules, except it specially allow the following
+   * straightforward type conversions which are disallowed as per the SQL standard:
+   *   - Numeric <=> Boolean
+   *   - String <=> Binary
+   */
+  def canCast(from: DataType, to: DataType): Boolean = (from, to) match {
+    case (fromType, toType) if fromType == toType => true
+
+    case (NullType, _) => true
+
+    case (StringType, _: BinaryType) => true
+
+    case (StringType, BooleanType) => true
+    case (_: NumericType, BooleanType) => true
+
+    case (StringType, TimestampType) => true
+    case (DateType, TimestampType) => true
+
+    case (StringType, _: CalendarIntervalType) => true
+
+    case (StringType, DateType) => true
+    case (TimestampType, DateType) => true
+
+    case (_: NumericType, _: NumericType) => true
+    case (StringType, _: NumericType) => true
+    case (BooleanType, _: NumericType) => true
+
+    case (_: NumericType, StringType) => true
+    case (_: DateType, StringType) => true
+    case (_: TimestampType, StringType) => true
+    case (_: CalendarIntervalType, StringType) => true
+    case (BooleanType, StringType) => true
+    case (BinaryType, StringType) => true
+
+    case (ArrayType(fromType, fn), ArrayType(toType, tn)) =>
+      canCast(fromType, toType) &&
+        resolvableNullability(fn || forceNullable(fromType, toType), tn)
+
+    case (MapType(fromKey, fromValue, fn), MapType(toKey, toValue, tn)) =>
+      canCast(fromKey, toKey) &&
+        (!forceNullable(fromKey, toKey)) &&
+        canCast(fromValue, toValue) &&
+        resolvableNullability(fn || forceNullable(fromValue, toValue), tn)
+
+    case (StructType(fromFields), StructType(toFields)) =>
+      fromFields.length == toFields.length &&
+        fromFields.zip(toFields).forall {
+          case (fromField, toField) =>
+            canCast(fromField.dataType, toField.dataType) &&
+              resolvableNullability(
+                fromField.nullable || forceNullable(fromField.dataType, toField.dataType),
+                toField.nullable)
+        }
+
+    case (udt1: UserDefinedType[_], udt2: UserDefinedType[_]) if udt2.acceptsType(udt1) => true
+
+    case _ => false
+  }
 }
 
 /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index 61133e2db5cbd..afb76d8a5a68c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -38,9 +38,6 @@ import org.apache.spark.unsafe.types.UTF8String
 
 abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
 
-  // Whether it is required to set SQLConf.ANSI_ENABLED as true for testing numeric overflow.
-  protected def requiredAnsiEnabledForOverflowTestCases: Boolean
-
   protected def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase
 
   // expected cannot be null
@@ -55,8 +52,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
   test("null cast") {
     import DataTypeTestUtils._
 
-    // follow [[org.apache.spark.sql.catalyst.expressions.Cast.canCast]] logic
-    // to ensure we test every possible cast situation here
     atomicTypes.zip(atomicTypes).foreach { case (from, to) =>
       checkNullCast(from, to)
     }
@@ -65,14 +60,10 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     atomicTypes.foreach(dt => checkNullCast(dt, StringType))
     checkNullCast(StringType, BinaryType)
     checkNullCast(StringType, BooleanType)
-    checkNullCast(DateType, BooleanType)
-    checkNullCast(TimestampType, BooleanType)
     numericTypes.foreach(dt => checkNullCast(dt, BooleanType))
 
     checkNullCast(StringType, TimestampType)
-    checkNullCast(BooleanType, TimestampType)
     checkNullCast(DateType, TimestampType)
-    numericTypes.foreach(dt => checkNullCast(dt, TimestampType))
 
     checkNullCast(StringType, DateType)
     checkNullCast(TimestampType, DateType)
@@ -80,8 +71,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     checkNullCast(StringType, CalendarIntervalType)
     numericTypes.foreach(dt => checkNullCast(StringType, dt))
     numericTypes.foreach(dt => checkNullCast(BooleanType, dt))
-    numericTypes.foreach(dt => checkNullCast(DateType, dt))
-    numericTypes.foreach(dt => checkNullCast(TimestampType, dt))
     for (from <- numericTypes; to <- numericTypes) checkNullCast(from, to)
   }
 
@@ -215,6 +204,39 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(cast(cast(0, BooleanType), IntegerType), 0)
   }
 
+  test("cast from int") {
+    checkCast(0, false)
+    checkCast(1, true)
+    checkCast(-5, true)
+    checkCast(1, 1.toByte)
+    checkCast(1, 1.toShort)
+    checkCast(1, 1)
+    checkCast(1, 1.toLong)
+    checkCast(1, 1.0f)
+    checkCast(1, 1.0)
+    checkCast(123, "123")
+
+    checkEvaluation(cast(123, DecimalType.USER_DEFAULT), Decimal(123))
+    checkEvaluation(cast(123, DecimalType(3, 0)), Decimal(123))
+    checkEvaluation(cast(1, LongType), 1.toLong)
+  }
+
+  test("cast from long") {
+    checkCast(0L, false)
+    checkCast(1L, true)
+    checkCast(-5L, true)
+    checkCast(1L, 1.toByte)
+    checkCast(1L, 1.toShort)
+    checkCast(1L, 1)
+    checkCast(1L, 1.toLong)
+    checkCast(1L, 1.0f)
+    checkCast(1L, 1.0)
+    checkCast(123L, "123")
+
+    checkEvaluation(cast(123L, DecimalType.USER_DEFAULT), Decimal(123))
+    checkEvaluation(cast(123L, DecimalType(3, 0)), Decimal(123))
+  }
+
   test("cast from float") {
     checkCast(0.0f, false)
     checkCast(0.5f, true)
@@ -237,8 +259,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     checkCast(1.5, 1.toLong)
     checkCast(1.5, 1.5f)
     checkCast(1.5, "1.5")
-
-    checkEvaluation(cast(cast(1.toDouble, TimestampType), DoubleType), 1.toDouble)
   }
 
   test("cast from string") {
@@ -305,18 +325,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
       cast(cast("5", ByteType), ShortType), IntegerType), FloatType), DoubleType), LongType),
       5.toLong)
 
-    checkEvaluation(
-      cast(cast(cast(cast(cast(cast("5", ByteType), TimestampType),
-        DecimalType.SYSTEM_DEFAULT), LongType), StringType), ShortType),
-      5.toShort)
-    checkEvaluation(
-      cast(cast(cast(cast(cast(cast("5", TimestampType, UTC_OPT), ByteType),
-        DecimalType.SYSTEM_DEFAULT), LongType), StringType), ShortType),
-      null)
-    checkEvaluation(cast(cast(cast(cast(cast(cast("5", DecimalType.SYSTEM_DEFAULT),
-      ByteType), TimestampType), LongType), StringType), ShortType),
-      5.toShort)
-
     checkEvaluation(cast("23", DoubleType), 23d)
     checkEvaluation(cast("23", IntegerType), 23)
     checkEvaluation(cast("23", FloatType), 23f)
@@ -350,58 +358,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     checkCast(Decimal(1.5), "1.5")
   }
 
-  test("cast from date") {
-    val d = Date.valueOf("1970-01-01")
-    checkEvaluation(cast(d, ShortType), null)
-    checkEvaluation(cast(d, IntegerType), null)
-    checkEvaluation(cast(d, LongType), null)
-    checkEvaluation(cast(d, FloatType), null)
-    checkEvaluation(cast(d, DoubleType), null)
-    checkEvaluation(cast(d, DecimalType.SYSTEM_DEFAULT), null)
-    checkEvaluation(cast(d, DecimalType(10, 2)), null)
-    checkEvaluation(cast(d, StringType), "1970-01-01")
-
-    checkEvaluation(
-      cast(cast(d, TimestampType, UTC_OPT), StringType, UTC_OPT),
-      "1970-01-01 00:00:00")
-  }
-
-  test("cast from timestamp") {
-    val millis = 15 * 1000 + 3
-    val seconds = millis * 1000 + 3
-    val ts = new Timestamp(millis)
-    val tss = new Timestamp(seconds)
-    checkEvaluation(cast(ts, ShortType), 15.toShort)
-    checkEvaluation(cast(ts, IntegerType), 15)
-    checkEvaluation(cast(ts, LongType), 15.toLong)
-    checkEvaluation(cast(ts, FloatType), 15.003f)
-    checkEvaluation(cast(ts, DoubleType), 15.003)
-
-    checkEvaluation(cast(cast(tss, ShortType), TimestampType),
-      fromJavaTimestamp(ts) * MILLIS_PER_SECOND)
-    checkEvaluation(cast(cast(tss, IntegerType), TimestampType),
-      fromJavaTimestamp(ts) * MILLIS_PER_SECOND)
-    checkEvaluation(cast(cast(tss, LongType), TimestampType),
-      fromJavaTimestamp(ts) * MILLIS_PER_SECOND)
-    checkEvaluation(
-      cast(cast(millis.toFloat / MILLIS_PER_SECOND, TimestampType), FloatType),
-      millis.toFloat / MILLIS_PER_SECOND)
-    checkEvaluation(
-      cast(cast(millis.toDouble / MILLIS_PER_SECOND, TimestampType), DoubleType),
-      millis.toDouble / MILLIS_PER_SECOND)
-    checkEvaluation(
-      cast(cast(Decimal(1), TimestampType), DecimalType.SYSTEM_DEFAULT),
-      Decimal(1))
-
-    // A test for higher precision than millis
-    checkEvaluation(cast(cast(0.000001, TimestampType), DoubleType), 0.000001)
-
-    checkEvaluation(cast(Double.NaN, TimestampType), null)
-    checkEvaluation(cast(1.0 / 0.0, TimestampType), null)
-    checkEvaluation(cast(Float.NaN, TimestampType), null)
-    checkEvaluation(cast(1.0f / 0.0f, TimestampType), null)
-  }
-
   test("cast from array") {
     val array = Literal.create(Seq("123", "true", "f", null),
       ArrayType(StringType, containsNull = true))
@@ -635,16 +591,20 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(cast("", BooleanType), null)
   }
 
+  protected def checkInvalidCastFromNumericType(to: DataType): Unit = {
+    assert(cast(1.toByte, to).checkInputDataTypes().isFailure)
+    assert(cast(1.toShort, to).checkInputDataTypes().isFailure)
+    assert(cast(1, to).checkInputDataTypes().isFailure)
+    assert(cast(1L, to).checkInputDataTypes().isFailure)
+    assert(cast(1.0.toFloat, to).checkInputDataTypes().isFailure)
+    assert(cast(1.0, to).checkInputDataTypes().isFailure)
+  }
+
   test("SPARK-16729 type checking for casting to date type") {
     assert(cast("1234", DateType).checkInputDataTypes().isSuccess)
     assert(cast(new Timestamp(1), DateType).checkInputDataTypes().isSuccess)
     assert(cast(false, DateType).checkInputDataTypes().isFailure)
-    assert(cast(1.toByte, DateType).checkInputDataTypes().isFailure)
-    assert(cast(1.toShort, DateType).checkInputDataTypes().isFailure)
-    assert(cast(1, DateType).checkInputDataTypes().isFailure)
-    assert(cast(1L, DateType).checkInputDataTypes().isFailure)
-    assert(cast(1.0.toFloat, DateType).checkInputDataTypes().isFailure)
-    assert(cast(1.0, DateType).checkInputDataTypes().isFailure)
+    checkInvalidCastFromNumericType(DateType)
   }
 
   test("SPARK-20302 cast with same structure") {
@@ -686,117 +646,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     assert(ctx.inlinedMutableStates.length == 0)
   }
 
-  test("SPARK-22825 Cast array to string") {
-    val ret1 = cast(Literal.create(Array(1, 2, 3, 4, 5)), StringType)
-    checkEvaluation(ret1, "[1, 2, 3, 4, 5]")
-    val ret2 = cast(Literal.create(Array("ab", "cde", "f")), StringType)
-    checkEvaluation(ret2, "[ab, cde, f]")
-    Seq(false, true).foreach { omitNull =>
-      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> omitNull.toString) {
-        val ret3 = cast(Literal.create(Array("ab", null, "c")), StringType)
-        checkEvaluation(ret3, s"[ab,${if (omitNull) "" else " null"}, c]")
-      }
-    }
-    val ret4 =
-      cast(Literal.create(Array("ab".getBytes, "cde".getBytes, "f".getBytes)), StringType)
-    checkEvaluation(ret4, "[ab, cde, f]")
-    val ret5 = cast(
-      Literal.create(Array("2014-12-03", "2014-12-04", "2014-12-06").map(Date.valueOf)),
-      StringType)
-    checkEvaluation(ret5, "[2014-12-03, 2014-12-04, 2014-12-06]")
-    val ret6 = cast(
-      Literal.create(Array("2014-12-03 13:01:00", "2014-12-04 15:05:00")
-        .map(Timestamp.valueOf)),
-      StringType)
-    checkEvaluation(ret6, "[2014-12-03 13:01:00, 2014-12-04 15:05:00]")
-    val ret7 = cast(Literal.create(Array(Array(1, 2, 3), Array(4, 5))), StringType)
-    checkEvaluation(ret7, "[[1, 2, 3], [4, 5]]")
-    val ret8 = cast(
-      Literal.create(Array(Array(Array("a"), Array("b", "c")), Array(Array("d")))),
-      StringType)
-    checkEvaluation(ret8, "[[[a], [b, c]], [[d]]]")
-  }
-
-  test("SPARK-33291: Cast array with null elements to string") {
-    Seq(false, true).foreach { omitNull =>
-      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> omitNull.toString) {
-        val ret1 = cast(Literal.create(Array(null, null)), StringType)
-        checkEvaluation(
-          ret1,
-          s"[${if (omitNull) "" else "null"},${if (omitNull) "" else " null"}]")
-      }
-    }
-  }
-
-  test("SPARK-22973 Cast map to string") {
-    Seq(
-      false -> ("{", "}"),
-      true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
-      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) {
-        val ret1 = cast(Literal.create(Map(1 -> "a", 2 -> "b", 3 -> "c")), StringType)
-        checkEvaluation(ret1, s"${lb}1 -> a, 2 -> b, 3 -> c$rb")
-        val ret2 = cast(
-          Literal.create(Map("1" -> "a".getBytes, "2" -> null, "3" -> "c".getBytes)),
-          StringType)
-        checkEvaluation(ret2, s"${lb}1 -> a, 2 ->${if (legacyCast) "" else " null"}, 3 -> c$rb")
-        val ret3 = cast(
-          Literal.create(Map(
-            1 -> Date.valueOf("2014-12-03"),
-            2 -> Date.valueOf("2014-12-04"),
-            3 -> Date.valueOf("2014-12-05"))),
-          StringType)
-        checkEvaluation(ret3, s"${lb}1 -> 2014-12-03, 2 -> 2014-12-04, 3 -> 2014-12-05$rb")
-        val ret4 = cast(
-          Literal.create(Map(
-            1 -> Timestamp.valueOf("2014-12-03 13:01:00"),
-            2 -> Timestamp.valueOf("2014-12-04 15:05:00"))),
-          StringType)
-        checkEvaluation(ret4, s"${lb}1 -> 2014-12-03 13:01:00, 2 -> 2014-12-04 15:05:00$rb")
-        val ret5 = cast(
-          Literal.create(Map(
-            1 -> Array(1, 2, 3),
-            2 -> Array(4, 5, 6))),
-          StringType)
-        checkEvaluation(ret5, s"${lb}1 -> [1, 2, 3], 2 -> [4, 5, 6]$rb")
-      }
-    }
-  }
-
-  test("SPARK-22981 Cast struct to string") {
-    Seq(
-      false -> ("{", "}"),
-      true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
-      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) {
-        val ret1 = cast(Literal.create((1, "a", 0.1)), StringType)
-        checkEvaluation(ret1, s"${lb}1, a, 0.1$rb")
-        val ret2 = cast(Literal.create(Tuple3[Int, String, String](1, null, "a")), StringType)
-        checkEvaluation(ret2, s"${lb}1,${if (legacyCast) "" else " null"}, a$rb")
-        val ret3 = cast(Literal.create(
-          (Date.valueOf("2014-12-03"), Timestamp.valueOf("2014-12-03 15:05:00"))), StringType)
-        checkEvaluation(ret3, s"${lb}2014-12-03, 2014-12-03 15:05:00$rb")
-        val ret4 = cast(Literal.create(((1, "a"), 5, 0.1)), StringType)
-        checkEvaluation(ret4, s"$lb${lb}1, a$rb, 5, 0.1$rb")
-        val ret5 = cast(Literal.create((Seq(1, 2, 3), "a", 0.1)), StringType)
-        checkEvaluation(ret5, s"$lb[1, 2, 3], a, 0.1$rb")
-        val ret6 = cast(Literal.create((1, Map(1 -> "a", 2 -> "b", 3 -> "c"))), StringType)
-        checkEvaluation(ret6, s"${lb}1, ${lb}1 -> a, 2 -> b, 3 -> c$rb$rb")
-      }
-    }
-  }
-
-  test("SPARK-33291: Cast struct with null elements to string") {
-    Seq(
-      false -> ("{", "}"),
-      true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
-      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) {
-        val ret1 = cast(Literal.create(Tuple2[String, String](null, null)), StringType)
-        checkEvaluation(
-          ret1,
-          s"$lb${if (legacyCast) "" else "null"},${if (legacyCast) "" else " null"}$rb")
-      }
-    }
-  }
-
   test("up-cast") {
     def isCastSafe(from: NumericType, to: NumericType): Boolean = (from, to) match {
       case (_, dt: DecimalType) => dt.isWiderThan(from)
@@ -869,20 +718,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
-  test("Throw exception on casting out-of-range value to decimal type") {
-    withSQLConf(SQLConf.ANSI_ENABLED.key -> requiredAnsiEnabledForOverflowTestCases.toString) {
-      checkExceptionInExpression[ArithmeticException](
-        cast(Literal("134.12"), DecimalType(3, 2)), "cannot be represented")
-      checkExceptionInExpression[ArithmeticException](
-        cast(Literal(Timestamp.valueOf("2019-07-25 22:04:36")), DecimalType(3, 2)),
-        "cannot be represented")
-      checkExceptionInExpression[ArithmeticException](
-        cast(Literal(BigDecimal(134.12)), DecimalType(3, 2)), "cannot be represented")
-      checkExceptionInExpression[ArithmeticException](
-        cast(Literal(134.12), DecimalType(3, 2)), "cannot be represented")
-    }
-  }
-
   test("Process Infinity, -Infinity, NaN in case insensitive manner") {
     Seq("inf", "+inf", "infinity", "+infiNity", " infinity ").foreach { value =>
       checkEvaluation(cast(value, FloatType), Float.PositiveInfinity)
@@ -903,14 +738,15 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
       checkEvaluation(cast(value, DoubleType), Double.NaN)
     }
   }
+}
+
+abstract class AnsiCastSuiteBase extends CastSuiteBase {
 
   private def testIntMaxAndMin(dt: DataType): Unit = {
     assert(Seq(IntegerType, ShortType, ByteType).contains(dt))
     Seq(Int.MaxValue + 1L, Int.MinValue - 1L).foreach { value =>
       checkExceptionInExpression[ArithmeticException](cast(value, dt), "overflow")
       checkExceptionInExpression[ArithmeticException](cast(Decimal(value.toString), dt), "overflow")
-      checkExceptionInExpression[ArithmeticException](
-        cast(Literal(value * MICROS_PER_SECOND, TimestampType), dt), "overflow")
       checkExceptionInExpression[ArithmeticException](
         cast(Literal(value * 1.5f, FloatType), dt), "overflow")
       checkExceptionInExpression[ArithmeticException](
@@ -930,151 +766,219 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
-  test("Throw exception on casting out-of-range value to byte type") {
-    withSQLConf(SQLConf.ANSI_ENABLED.key -> requiredAnsiEnabledForOverflowTestCases.toString) {
-      testIntMaxAndMin(ByteType)
-      Seq(Byte.MaxValue + 1, Byte.MinValue - 1).foreach { value =>
-        checkExceptionInExpression[ArithmeticException](cast(value, ByteType), "overflow")
-        checkExceptionInExpression[ArithmeticException](
-          cast(Literal(value * MICROS_PER_SECOND, TimestampType), ByteType), "overflow")
-        checkExceptionInExpression[ArithmeticException](
-          cast(Literal(value.toFloat, FloatType), ByteType), "overflow")
-        checkExceptionInExpression[ArithmeticException](
-          cast(Literal(value.toDouble, DoubleType), ByteType), "overflow")
-      }
+  test("ANSI mode: Throw exception on casting out-of-range value to byte type") {
+    testIntMaxAndMin(ByteType)
+    Seq(Byte.MaxValue + 1, Byte.MinValue - 1).foreach { value =>
+      checkExceptionInExpression[ArithmeticException](cast(value, ByteType), "overflow")
+      checkExceptionInExpression[ArithmeticException](
+        cast(Literal(value.toFloat, FloatType), ByteType), "overflow")
+      checkExceptionInExpression[ArithmeticException](
+        cast(Literal(value.toDouble, DoubleType), ByteType), "overflow")
+    }
 
-      Seq(Byte.MaxValue, 0.toByte, Byte.MinValue).foreach { value =>
-        checkEvaluation(cast(value, ByteType), value)
-        checkEvaluation(cast(value.toString, ByteType), value)
-        checkEvaluation(cast(Decimal(value.toString), ByteType), value)
-        checkEvaluation(cast(Literal(value * MICROS_PER_SECOND, TimestampType), ByteType), value)
-        checkEvaluation(cast(Literal(value.toInt, DateType), ByteType), null)
-        checkEvaluation(cast(Literal(value.toFloat, FloatType), ByteType), value)
-        checkEvaluation(cast(Literal(value.toDouble, DoubleType), ByteType), value)
-      }
+    Seq(Byte.MaxValue, 0.toByte, Byte.MinValue).foreach { value =>
+      checkEvaluation(cast(value, ByteType), value)
+      checkEvaluation(cast(value.toString, ByteType), value)
+      checkEvaluation(cast(Decimal(value.toString), ByteType), value)
+      checkEvaluation(cast(Literal(value.toFloat, FloatType), ByteType), value)
+      checkEvaluation(cast(Literal(value.toDouble, DoubleType), ByteType), value)
     }
   }
 
-  test("Throw exception on casting out-of-range value to short type") {
-    withSQLConf(SQLConf.ANSI_ENABLED.key -> requiredAnsiEnabledForOverflowTestCases.toString) {
-      testIntMaxAndMin(ShortType)
-      Seq(Short.MaxValue + 1, Short.MinValue - 1).foreach { value =>
-        checkExceptionInExpression[ArithmeticException](cast(value, ShortType), "overflow")
-        checkExceptionInExpression[ArithmeticException](
-          cast(Literal(value * MICROS_PER_SECOND, TimestampType), ShortType), "overflow")
-        checkExceptionInExpression[ArithmeticException](
-          cast(Literal(value.toFloat, FloatType), ShortType), "overflow")
-        checkExceptionInExpression[ArithmeticException](
-          cast(Literal(value.toDouble, DoubleType), ShortType), "overflow")
-      }
+  test("ANSI mode: Throw exception on casting out-of-range value to short type") {
+    testIntMaxAndMin(ShortType)
+    Seq(Short.MaxValue + 1, Short.MinValue - 1).foreach { value =>
+      checkExceptionInExpression[ArithmeticException](cast(value, ShortType), "overflow")
+      checkExceptionInExpression[ArithmeticException](
+        cast(Literal(value.toFloat, FloatType), ShortType), "overflow")
+      checkExceptionInExpression[ArithmeticException](
+        cast(Literal(value.toDouble, DoubleType), ShortType), "overflow")
+    }
 
-      Seq(Short.MaxValue, 0.toShort, Short.MinValue).foreach { value =>
-        checkEvaluation(cast(value, ShortType), value)
-        checkEvaluation(cast(value.toString, ShortType), value)
-        checkEvaluation(cast(Decimal(value.toString), ShortType), value)
-        checkEvaluation(cast(Literal(value * MICROS_PER_SECOND, TimestampType), ShortType), value)
-        checkEvaluation(cast(Literal(value.toInt, DateType), ShortType), null)
-        checkEvaluation(cast(Literal(value.toFloat, FloatType), ShortType), value)
-        checkEvaluation(cast(Literal(value.toDouble, DoubleType), ShortType), value)
-      }
+    Seq(Short.MaxValue, 0.toShort, Short.MinValue).foreach { value =>
+      checkEvaluation(cast(value, ShortType), value)
+      checkEvaluation(cast(value.toString, ShortType), value)
+      checkEvaluation(cast(Decimal(value.toString), ShortType), value)
+      checkEvaluation(cast(Literal(value.toFloat, FloatType), ShortType), value)
+      checkEvaluation(cast(Literal(value.toDouble, DoubleType), ShortType), value)
     }
   }
 
-  test("Throw exception on casting out-of-range value to int type") {
-    withSQLConf(SQLConf.ANSI_ENABLED.key -> requiredAnsiEnabledForOverflowTestCases.toString) {
-      testIntMaxAndMin(IntegerType)
-      testLongMaxAndMin(IntegerType)
+  test("ANSI mode: Throw exception on casting out-of-range value to int type") {
+    testIntMaxAndMin(IntegerType)
+    testLongMaxAndMin(IntegerType)
 
-      Seq(Int.MaxValue, 0, Int.MinValue).foreach { value =>
-        checkEvaluation(cast(value, IntegerType), value)
-        checkEvaluation(cast(value.toString, IntegerType), value)
-        checkEvaluation(cast(Decimal(value.toString), IntegerType), value)
-        checkEvaluation(cast(Literal(value * MICROS_PER_SECOND, TimestampType), IntegerType), value)
-        checkEvaluation(cast(Literal(value * 1.0, DoubleType), IntegerType), value)
-      }
-      checkEvaluation(cast(Int.MaxValue + 0.9D, IntegerType), Int.MaxValue)
-      checkEvaluation(cast(Int.MinValue - 0.9D, IntegerType), Int.MinValue)
+    Seq(Int.MaxValue, 0, Int.MinValue).foreach { value =>
+      checkEvaluation(cast(value, IntegerType), value)
+      checkEvaluation(cast(value.toString, IntegerType), value)
+      checkEvaluation(cast(Decimal(value.toString), IntegerType), value)
+      checkEvaluation(cast(Literal(value * 1.0, DoubleType), IntegerType), value)
     }
+    checkEvaluation(cast(Int.MaxValue + 0.9D, IntegerType), Int.MaxValue)
+    checkEvaluation(cast(Int.MinValue - 0.9D, IntegerType), Int.MinValue)
   }
 
-  test("Throw exception on casting out-of-range value to long type") {
-    withSQLConf(SQLConf.ANSI_ENABLED.key -> requiredAnsiEnabledForOverflowTestCases.toString) {
-      testLongMaxAndMin(LongType)
+  test("ANSI mode: Throw exception on casting out-of-range value to long type") {
+    testLongMaxAndMin(LongType)
 
-      Seq(Long.MaxValue, 0, Long.MinValue).foreach { value =>
-        checkEvaluation(cast(value, LongType), value)
-        checkEvaluation(cast(value.toString, LongType), value)
-        checkEvaluation(cast(Decimal(value.toString), LongType), value)
-        checkEvaluation(cast(Literal(value, TimestampType), LongType),
-          Math.floorDiv(value, MICROS_PER_SECOND))
-      }
-      checkEvaluation(cast(Long.MaxValue + 0.9F, LongType), Long.MaxValue)
-      checkEvaluation(cast(Long.MinValue - 0.9F, LongType), Long.MinValue)
-      checkEvaluation(cast(Long.MaxValue + 0.9D, LongType), Long.MaxValue)
-      checkEvaluation(cast(Long.MinValue - 0.9D, LongType), Long.MinValue)
+    Seq(Long.MaxValue, 0, Long.MinValue).foreach { value =>
+      checkEvaluation(cast(value, LongType), value)
+      checkEvaluation(cast(value.toString, LongType), value)
+      checkEvaluation(cast(Decimal(value.toString), LongType), value)
     }
+    checkEvaluation(cast(Long.MaxValue + 0.9F, LongType), Long.MaxValue)
+    checkEvaluation(cast(Long.MinValue - 0.9F, LongType), Long.MinValue)
+    checkEvaluation(cast(Long.MaxValue + 0.9D, LongType), Long.MaxValue)
+    checkEvaluation(cast(Long.MinValue - 0.9D, LongType), Long.MinValue)
   }
-}
 
-/**
- * Test suite for data type casting expression [[Cast]].
- */
-class CastSuite extends CastSuiteBase {
-  // It is required to set SQLConf.ANSI_ENABLED as true for testing numeric overflow.
-  override protected def requiredAnsiEnabledForOverflowTestCases: Boolean = true
+  test("ANSI mode: Throw exception on casting out-of-range value to decimal type") {
+    checkExceptionInExpression[ArithmeticException](
+      cast(Literal("134.12"), DecimalType(3, 2)), "cannot be represented")
+    checkExceptionInExpression[ArithmeticException](
+      cast(Literal(BigDecimal(134.12)), DecimalType(3, 2)), "cannot be represented")
+    checkExceptionInExpression[ArithmeticException](
+      cast(Literal(134.12), DecimalType(3, 2)), "cannot be represented")
+  }
 
-  override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase = {
-    v match {
-      case lit: Expression => Cast(lit, targetType, timeZoneId)
-      case _ => Cast(Literal(v), targetType, timeZoneId)
+  test("ANSI mode: disallow type conversions between Numeric types and Timestamp type") {
+    import DataTypeTestUtils.numericTypes
+    checkInvalidCastFromNumericType(TimestampType)
+    val timestampLiteral = Literal(1L, TimestampType)
+    numericTypes.foreach { numericType =>
+      assert(cast(timestampLiteral, numericType).checkInputDataTypes().isFailure)
     }
   }
 
-  test("cast from int") {
-    checkCast(0, false)
-    checkCast(1, true)
-    checkCast(-5, true)
-    checkCast(1, 1.toByte)
-    checkCast(1, 1.toShort)
-    checkCast(1, 1)
-    checkCast(1, 1.toLong)
-    checkCast(1, 1.0f)
-    checkCast(1, 1.0)
-    checkCast(123, "123")
+  test("ANSI mode: disallow type conversions between Numeric types and Date type") {
+    import DataTypeTestUtils.numericTypes
+    checkInvalidCastFromNumericType(DateType)
+    val dateLiteral = Literal(1, DateType)
+    numericTypes.foreach { numericType =>
+      assert(cast(dateLiteral, numericType).checkInputDataTypes().isFailure)
+    }
+  }
 
-    checkEvaluation(cast(123, DecimalType.USER_DEFAULT), Decimal(123))
-    checkEvaluation(cast(123, DecimalType(3, 0)), Decimal(123))
-    checkEvaluation(cast(123, DecimalType(3, 1)), null)
-    checkEvaluation(cast(123, DecimalType(2, 0)), null)
+  test("ANSI mode: disallow type conversions between Numeric types and Binary type") {
+    import DataTypeTestUtils.numericTypes
+    checkInvalidCastFromNumericType(BinaryType)
+    val binaryLiteral = Literal(new Array[Byte](1.toByte), BinaryType)
+    numericTypes.foreach { numericType =>
+      assert(cast(binaryLiteral, numericType).checkInputDataTypes().isFailure)
+    }
   }
 
-  test("cast from long") {
-    checkCast(0L, false)
-    checkCast(1L, true)
-    checkCast(-5L, true)
-    checkCast(1L, 1.toByte)
-    checkCast(1L, 1.toShort)
-    checkCast(1L, 1)
-    checkCast(1L, 1.toLong)
-    checkCast(1L, 1.0f)
-    checkCast(1L, 1.0)
-    checkCast(123L, "123")
+  test("ANSI mode: disallow type conversions between Datatime types and Boolean types") {
+    val timestampLiteral = Literal(1L, TimestampType)
+    assert(cast(timestampLiteral, BooleanType).checkInputDataTypes().isFailure)
+    val dateLiteral = Literal(1, DateType)
+    assert(cast(dateLiteral, BooleanType).checkInputDataTypes().isFailure)
 
-    checkEvaluation(cast(123L, DecimalType.USER_DEFAULT), Decimal(123))
-    checkEvaluation(cast(123L, DecimalType(3, 0)), Decimal(123))
-    checkEvaluation(cast(123L, DecimalType(3, 1)), null)
+    val booleanLiteral = Literal(true, BooleanType)
+    assert(cast(booleanLiteral, TimestampType).checkInputDataTypes().isFailure)
+    assert(cast(booleanLiteral, DateType).checkInputDataTypes().isFailure)
+  }
 
-    checkEvaluation(cast(123L, DecimalType(2, 0)), null)
+  test("ANSI mode: disallow casting complex types as String type") {
+    assert(cast(Literal.create(Array(1, 2, 3, 4, 5)), StringType).checkInputDataTypes().isFailure)
+    assert(cast(Literal.create(Map(1 -> "a")), StringType).checkInputDataTypes().isFailure)
+    assert(cast(Literal.create((1, "a", 0.1)), StringType).checkInputDataTypes().isFailure)
   }
 
-  test("cast from int 2") {
-    checkEvaluation(cast(1, LongType), 1.toLong)
+  test("cast from invalid string to numeric should throw NumberFormatException") {
+    // cast to IntegerType
+    Seq(IntegerType, ShortType, ByteType, LongType).foreach { dataType =>
+      val array = Literal.create(Seq("123", "true", "f", null),
+        ArrayType(StringType, containsNull = true))
+      checkExceptionInExpression[NumberFormatException](
+        cast(array, ArrayType(dataType, containsNull = true)),
+        "invalid input syntax for type numeric: true")
+      checkExceptionInExpression[NumberFormatException](
+        cast("string", dataType), "invalid input syntax for type numeric: string")
+      checkExceptionInExpression[NumberFormatException](
+        cast("123-string", dataType), "invalid input syntax for type numeric: 123-string")
+      checkExceptionInExpression[NumberFormatException](
+        cast("2020-07-19", dataType), "invalid input syntax for type numeric: 2020-07-19")
+      checkExceptionInExpression[NumberFormatException](
+        cast("1.23", dataType), "invalid input syntax for type numeric: 1.23")
+    }
+
+    Seq(DoubleType, FloatType, DecimalType.USER_DEFAULT).foreach { dataType =>
+      checkExceptionInExpression[NumberFormatException](
+        cast("string", dataType), "invalid input syntax for type numeric: string")
+      checkExceptionInExpression[NumberFormatException](
+        cast("123.000.00", dataType), "invalid input syntax for type numeric: 123.000.00")
+      checkExceptionInExpression[NumberFormatException](
+        cast("abc.com", dataType), "invalid input syntax for type numeric: abc.com")
+    }
+  }
+
+  test("Fast fail for cast string type to decimal type in ansi mode") {
+    checkEvaluation(cast("12345678901234567890123456789012345678", DecimalType(38, 0)),
+      Decimal("12345678901234567890123456789012345678"))
+    checkExceptionInExpression[ArithmeticException](
+      cast("123456789012345678901234567890123456789", DecimalType(38, 0)),
+      "out of decimal type range")
+    checkExceptionInExpression[ArithmeticException](
+      cast("12345678901234567890123456789012345678", DecimalType(38, 1)),
+      "cannot be represented as Decimal(38, 1)")
 
+    checkEvaluation(cast("0.00000000000000000000000000000000000001", DecimalType(38, 0)),
+      Decimal("0"))
+    checkEvaluation(cast("0.00000000000000000000000000000000000000000001", DecimalType(38, 0)),
+      Decimal("0"))
+    checkEvaluation(cast("0.00000000000000000000000000000000000001", DecimalType(38, 18)),
+      Decimal("0E-18"))
+    checkEvaluation(cast("6E-120", DecimalType(38, 0)),
+      Decimal("0"))
+
+    checkEvaluation(cast("6E+37", DecimalType(38, 0)),
+      Decimal("60000000000000000000000000000000000000"))
+    checkExceptionInExpression[ArithmeticException](
+      cast("6E+38", DecimalType(38, 0)),
+      "out of decimal type range")
+    checkExceptionInExpression[ArithmeticException](
+      cast("6E+37", DecimalType(38, 1)),
+      "cannot be represented as Decimal(38, 1)")
+
+    checkExceptionInExpression[NumberFormatException](
+      cast("abcd", DecimalType(38, 1)),
+      "invalid input syntax for type numeric")
+  }
+}
+
+/**
+ * Test suite for data type casting expression [[Cast]].
+ */
+class CastSuite extends CastSuiteBase {
+
+  override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase = {
+    v match {
+      case lit: Expression => Cast(lit, targetType, timeZoneId)
+      case _ => Cast(Literal(v), targetType, timeZoneId)
+    }
+  }
+
+  test("null cast #2") {
+    import DataTypeTestUtils._
+
+    checkNullCast(DateType, BooleanType)
+    checkNullCast(TimestampType, BooleanType)
+    checkNullCast(BooleanType, TimestampType)
+    numericTypes.foreach(dt => checkNullCast(dt, TimestampType))
+    numericTypes.foreach(dt => checkNullCast(TimestampType, dt))
+    numericTypes.foreach(dt => checkNullCast(DateType, dt))
+  }
+
+  test("cast from long #2") {
+    checkEvaluation(cast(123L, DecimalType(3, 1)), null)
+    checkEvaluation(cast(123L, DecimalType(2, 0)), null)
+  }
+
+  test("cast from int #2") {
     checkEvaluation(cast(cast(1000, TimestampType), LongType), 1000.toLong)
     checkEvaluation(cast(cast(-1200, TimestampType), LongType), -1200.toLong)
 
-    checkEvaluation(cast(123, DecimalType.USER_DEFAULT), Decimal(123))
-    checkEvaluation(cast(123, DecimalType(3, 0)), Decimal(123))
     checkEvaluation(cast(123, DecimalType(3, 1)), null)
     checkEvaluation(cast(123, DecimalType(2, 0)), null)
   }
@@ -1343,6 +1247,58 @@ class CastSuite extends CastSuiteBase {
     }
   }
 
+  test("cast from date") {
+    val d = Date.valueOf("1970-01-01")
+    checkEvaluation(cast(d, ShortType), null)
+    checkEvaluation(cast(d, IntegerType), null)
+    checkEvaluation(cast(d, LongType), null)
+    checkEvaluation(cast(d, FloatType), null)
+    checkEvaluation(cast(d, DoubleType), null)
+    checkEvaluation(cast(d, DecimalType.SYSTEM_DEFAULT), null)
+    checkEvaluation(cast(d, DecimalType(10, 2)), null)
+    checkEvaluation(cast(d, StringType), "1970-01-01")
+
+    checkEvaluation(
+      cast(cast(d, TimestampType, UTC_OPT), StringType, UTC_OPT),
+      "1970-01-01 00:00:00")
+  }
+
+  test("cast from timestamp") {
+    val millis = 15 * 1000 + 3
+    val seconds = millis * 1000 + 3
+    val ts = new Timestamp(millis)
+    val tss = new Timestamp(seconds)
+    checkEvaluation(cast(ts, ShortType), 15.toShort)
+    checkEvaluation(cast(ts, IntegerType), 15)
+    checkEvaluation(cast(ts, LongType), 15.toLong)
+    checkEvaluation(cast(ts, FloatType), 15.003f)
+    checkEvaluation(cast(ts, DoubleType), 15.003)
+
+    checkEvaluation(cast(cast(tss, ShortType), TimestampType),
+      fromJavaTimestamp(ts) * MILLIS_PER_SECOND)
+    checkEvaluation(cast(cast(tss, IntegerType), TimestampType),
+      fromJavaTimestamp(ts) * MILLIS_PER_SECOND)
+    checkEvaluation(cast(cast(tss, LongType), TimestampType),
+      fromJavaTimestamp(ts) * MILLIS_PER_SECOND)
+    checkEvaluation(
+      cast(cast(millis.toFloat / MILLIS_PER_SECOND, TimestampType), FloatType),
+      millis.toFloat / MILLIS_PER_SECOND)
+    checkEvaluation(
+      cast(cast(millis.toDouble / MILLIS_PER_SECOND, TimestampType), DoubleType),
+      millis.toDouble / MILLIS_PER_SECOND)
+    checkEvaluation(
+      cast(cast(Decimal(1), TimestampType), DecimalType.SYSTEM_DEFAULT),
+      Decimal(1))
+
+    // A test for higher precision than millis
+    checkEvaluation(cast(cast(0.000001, TimestampType), DoubleType), 0.000001)
+
+    checkEvaluation(cast(Double.NaN, TimestampType), null)
+    checkEvaluation(cast(1.0 / 0.0, TimestampType), null)
+    checkEvaluation(cast(Float.NaN, TimestampType), null)
+    checkEvaluation(cast(1.0f / 0.0f, TimestampType), null)
+  }
+
   test("cast a timestamp before the epoch 1970-01-01 00:00:00Z") {
     withDefaultTimeZone(UTC) {
       val negativeTs = Timestamp.valueOf("1900-05-05 18:34:56.1")
@@ -1396,93 +1352,199 @@ class CastSuite extends CastSuiteBase {
 
     checkEvaluation(cast("abcd", DecimalType(38, 1)), null)
   }
+
+  test("SPARK-22825 Cast array to string") {
+    val ret1 = cast(Literal.create(Array(1, 2, 3, 4, 5)), StringType)
+    checkEvaluation(ret1, "[1, 2, 3, 4, 5]")
+    val ret2 = cast(Literal.create(Array("ab", "cde", "f")), StringType)
+    checkEvaluation(ret2, "[ab, cde, f]")
+    Seq(false, true).foreach { omitNull =>
+      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> omitNull.toString) {
+        val ret3 = cast(Literal.create(Array("ab", null, "c")), StringType)
+        checkEvaluation(ret3, s"[ab,${if (omitNull) "" else " null"}, c]")
+      }
+    }
+    val ret4 =
+      cast(Literal.create(Array("ab".getBytes, "cde".getBytes, "f".getBytes)), StringType)
+    checkEvaluation(ret4, "[ab, cde, f]")
+    val ret5 = cast(
+      Literal.create(Array("2014-12-03", "2014-12-04", "2014-12-06").map(Date.valueOf)),
+      StringType)
+    checkEvaluation(ret5, "[2014-12-03, 2014-12-04, 2014-12-06]")
+    val ret6 = cast(
+      Literal.create(Array("2014-12-03 13:01:00", "2014-12-04 15:05:00")
+        .map(Timestamp.valueOf)),
+      StringType)
+    checkEvaluation(ret6, "[2014-12-03 13:01:00, 2014-12-04 15:05:00]")
+    val ret7 = cast(Literal.create(Array(Array(1, 2, 3), Array(4, 5))), StringType)
+    checkEvaluation(ret7, "[[1, 2, 3], [4, 5]]")
+    val ret8 = cast(
+      Literal.create(Array(Array(Array("a"), Array("b", "c")), Array(Array("d")))),
+      StringType)
+    checkEvaluation(ret8, "[[[a], [b, c]], [[d]]]")
+  }
+
+  test("SPARK-33291: Cast array with null elements to string") {
+    Seq(false, true).foreach { omitNull =>
+      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> omitNull.toString) {
+        val ret1 = cast(Literal.create(Array(null, null)), StringType)
+        checkEvaluation(
+          ret1,
+          s"[${if (omitNull) "" else "null"},${if (omitNull) "" else " null"}]")
+      }
+    }
+  }
+
+  test("SPARK-22973 Cast map to string") {
+    Seq(
+      false -> ("{", "}"),
+      true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
+      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) {
+        val ret1 = cast(Literal.create(Map(1 -> "a", 2 -> "b", 3 -> "c")), StringType)
+        checkEvaluation(ret1, s"${lb}1 -> a, 2 -> b, 3 -> c$rb")
+        val ret2 = cast(
+          Literal.create(Map("1" -> "a".getBytes, "2" -> null, "3" -> "c".getBytes)),
+          StringType)
+        checkEvaluation(ret2, s"${lb}1 -> a, 2 ->${if (legacyCast) "" else " null"}, 3 -> c$rb")
+        val ret3 = cast(
+          Literal.create(Map(
+            1 -> Date.valueOf("2014-12-03"),
+            2 -> Date.valueOf("2014-12-04"),
+            3 -> Date.valueOf("2014-12-05"))),
+          StringType)
+        checkEvaluation(ret3, s"${lb}1 -> 2014-12-03, 2 -> 2014-12-04, 3 -> 2014-12-05$rb")
+        val ret4 = cast(
+          Literal.create(Map(
+            1 -> Timestamp.valueOf("2014-12-03 13:01:00"),
+            2 -> Timestamp.valueOf("2014-12-04 15:05:00"))),
+          StringType)
+        checkEvaluation(ret4, s"${lb}1 -> 2014-12-03 13:01:00, 2 -> 2014-12-04 15:05:00$rb")
+        val ret5 = cast(
+          Literal.create(Map(
+            1 -> Array(1, 2, 3),
+            2 -> Array(4, 5, 6))),
+          StringType)
+        checkEvaluation(ret5, s"${lb}1 -> [1, 2, 3], 2 -> [4, 5, 6]$rb")
+      }
+    }
+  }
+
+  test("SPARK-22981 Cast struct to string") {
+    Seq(
+      false -> ("{", "}"),
+      true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
+      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) {
+        val ret1 = cast(Literal.create((1, "a", 0.1)), StringType)
+        checkEvaluation(ret1, s"${lb}1, a, 0.1$rb")
+        val ret2 = cast(Literal.create(Tuple3[Int, String, String](1, null, "a")), StringType)
+        checkEvaluation(ret2, s"${lb}1,${if (legacyCast) "" else " null"}, a$rb")
+        val ret3 = cast(Literal.create(
+          (Date.valueOf("2014-12-03"), Timestamp.valueOf("2014-12-03 15:05:00"))), StringType)
+        checkEvaluation(ret3, s"${lb}2014-12-03, 2014-12-03 15:05:00$rb")
+        val ret4 = cast(Literal.create(((1, "a"), 5, 0.1)), StringType)
+        checkEvaluation(ret4, s"$lb${lb}1, a$rb, 5, 0.1$rb")
+        val ret5 = cast(Literal.create((Seq(1, 2, 3), "a", 0.1)), StringType)
+        checkEvaluation(ret5, s"$lb[1, 2, 3], a, 0.1$rb")
+        val ret6 = cast(Literal.create((1, Map(1 -> "a", 2 -> "b", 3 -> "c"))), StringType)
+        checkEvaluation(ret6, s"${lb}1, ${lb}1 -> a, 2 -> b, 3 -> c$rb$rb")
+      }
+    }
+  }
+
+  test("SPARK-33291: Cast struct with null elements to string") {
+    Seq(
+      false -> ("{", "}"),
+      true -> ("[", "]")).foreach { case (legacyCast, (lb, rb)) =>
+      withSQLConf(SQLConf.LEGACY_COMPLEX_TYPES_TO_STRING.key -> legacyCast.toString) {
+        val ret1 = cast(Literal.create(Tuple2[String, String](null, null)), StringType)
+        checkEvaluation(
+          ret1,
+          s"$lb${if (legacyCast) "" else "null"},${if (legacyCast) "" else " null"}$rb")
+      }
+    }
+  }
+
+  test("data type casting II") {
+    checkEvaluation(
+      cast(cast(cast(cast(cast(cast("5", ByteType), TimestampType),
+        DecimalType.SYSTEM_DEFAULT), LongType), StringType), ShortType),
+        5.toShort)
+      checkEvaluation(
+        cast(cast(cast(cast(cast(cast("5", TimestampType, UTC_OPT), ByteType),
+          DecimalType.SYSTEM_DEFAULT), LongType), StringType), ShortType),
+        null)
+      checkEvaluation(cast(cast(cast(cast(cast(cast("5", DecimalType.SYSTEM_DEFAULT),
+        ByteType), TimestampType), LongType), StringType), ShortType),
+        5.toShort)
+  }
+
+  test("Cast from double II") {
+    checkEvaluation(cast(cast(1.toDouble, TimestampType), DoubleType), 1.toDouble)
+  }
 }
 
 /**
- * Test suite for data type casting expression [[AnsiCast]].
+ * Test suite for data type casting expression [[Cast]] with ANSI mode disabled.
  */
-class AnsiCastSuite extends CastSuiteBase {
-  // It is not required to set SQLConf.ANSI_ENABLED as true for testing numeric overflow.
-  override protected def requiredAnsiEnabledForOverflowTestCases: Boolean = false
+class CastSuiteWithAnsiModeOn extends AnsiCastSuiteBase {
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    SQLConf.get.setConf(SQLConf.ANSI_ENABLED, true)
+  }
+
+  override def afterAll(): Unit = {
+    super.afterAll()
+    SQLConf.get.unsetConf(SQLConf.ANSI_ENABLED)
+  }
 
   override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase = {
     v match {
-      case lit: Expression => AnsiCast(lit, targetType, timeZoneId)
-      case _ => AnsiCast(Literal(v), targetType, timeZoneId)
+      case lit: Expression => Cast(lit, targetType, timeZoneId)
+      case _ => Cast(Literal(v), targetType, timeZoneId)
     }
   }
+}
 
-  test("cast from invalid string to numeric should throw NumberFormatException") {
-    // cast to IntegerType
-    Seq(IntegerType, ShortType, ByteType, LongType).foreach { dataType =>
-      val array = Literal.create(Seq("123", "true", "f", null),
-        ArrayType(StringType, containsNull = true))
-      checkExceptionInExpression[NumberFormatException](
-        cast(array, ArrayType(dataType, containsNull = true)),
-        "invalid input syntax for type numeric: true")
-      checkExceptionInExpression[NumberFormatException](
-        cast("string", dataType), "invalid input syntax for type numeric: string")
-      checkExceptionInExpression[NumberFormatException](
-        cast("123-string", dataType), "invalid input syntax for type numeric: 123-string")
-      checkExceptionInExpression[NumberFormatException](
-        cast("2020-07-19", dataType), "invalid input syntax for type numeric: 2020-07-19")
-      checkExceptionInExpression[NumberFormatException](
-        cast("1.23", dataType), "invalid input syntax for type numeric: 1.23")
-    }
+/**
+ * Test suite for data type casting expression [[AnsiCast]] with ANSI mode enabled.
+ */
+class AnsiCastSuiteWithAnsiModeOn extends AnsiCastSuiteBase {
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    SQLConf.get.setConf(SQLConf.ANSI_ENABLED, true)
+  }
 
-    Seq(DoubleType, FloatType, DecimalType.USER_DEFAULT).foreach { dataType =>
-      checkExceptionInExpression[NumberFormatException](
-        cast("string", dataType), "invalid input syntax for type numeric: string")
-      checkExceptionInExpression[NumberFormatException](
-        cast("123.000.00", dataType), "invalid input syntax for type numeric: 123.000.00")
-      checkExceptionInExpression[NumberFormatException](
-        cast("abc.com", dataType), "invalid input syntax for type numeric: abc.com")
-    }
+  override def afterAll(): Unit = {
+    super.afterAll()
+    SQLConf.get.unsetConf(SQLConf.ANSI_ENABLED)
   }
 
-  test("cast a timestamp before the epoch 1970-01-01 00:00:00Z") {
-    def errMsg(t: String): String = s"Casting -2198208303900000 to $t causes overflow"
-    withDefaultTimeZone(UTC) {
-      val negativeTs = Timestamp.valueOf("1900-05-05 18:34:56.1")
-      assert(negativeTs.getTime < 0)
-      val expectedSecs = Math.floorDiv(negativeTs.getTime, MILLIS_PER_SECOND)
-      checkExceptionInExpression[ArithmeticException](cast(negativeTs, ByteType), errMsg("byte"))
-      checkExceptionInExpression[ArithmeticException](cast(negativeTs, ShortType), errMsg("short"))
-      checkExceptionInExpression[ArithmeticException](cast(negativeTs, IntegerType), errMsg("int"))
-      checkEvaluation(cast(negativeTs, LongType), expectedSecs)
+  override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase = {
+    v match {
+      case lit: Expression => AnsiCast(lit, targetType, timeZoneId)
+      case _ => AnsiCast(Literal(v), targetType, timeZoneId)
     }
   }
+}
 
-  test("Fast fail for cast string type to decimal type in ansi mode") {
-    checkEvaluation(cast("12345678901234567890123456789012345678", DecimalType(38, 0)),
-      Decimal("12345678901234567890123456789012345678"))
-    checkExceptionInExpression[ArithmeticException](
-      cast("123456789012345678901234567890123456789", DecimalType(38, 0)),
-      "out of decimal type range")
-    checkExceptionInExpression[ArithmeticException](
-      cast("12345678901234567890123456789012345678", DecimalType(38, 1)),
-      "cannot be represented as Decimal(38, 1)")
-
-    checkEvaluation(cast("0.00000000000000000000000000000000000001", DecimalType(38, 0)),
-      Decimal("0"))
-    checkEvaluation(cast("0.00000000000000000000000000000000000000000001", DecimalType(38, 0)),
-      Decimal("0"))
-    checkEvaluation(cast("0.00000000000000000000000000000000000001", DecimalType(38, 18)),
-      Decimal("0E-18"))
-    checkEvaluation(cast("6E-120", DecimalType(38, 0)),
-      Decimal("0"))
+/**
+ * Test suite for data type casting expression [[AnsiCast]] with ANSI mode disabled.
+ */
+class AnsiCastSuiteWithAnsiModeOff extends AnsiCastSuiteBase {
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    SQLConf.get.setConf(SQLConf.ANSI_ENABLED, false)
+  }
 
-    checkEvaluation(cast("6E+37", DecimalType(38, 0)),
-      Decimal("60000000000000000000000000000000000000"))
-    checkExceptionInExpression[ArithmeticException](
-      cast("6E+38", DecimalType(38, 0)),
-      "out of decimal type range")
-    checkExceptionInExpression[ArithmeticException](
-      cast("6E+37", DecimalType(38, 1)),
-      "cannot be represented as Decimal(38, 1)")
+  override def afterAll(): Unit = {
+    super.afterAll()
+    SQLConf.get.unsetConf(SQLConf.ANSI_ENABLED)
+  }
 
-    checkExceptionInExpression[NumberFormatException](
-      cast("abcd", DecimalType(38, 1)),
-      "invalid input syntax for type numeric")
+  override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase = {
+    v match {
+      case lit: Expression => AnsiCast(lit, targetType, timeZoneId)
+      case _ => AnsiCast(Literal(v), targetType, timeZoneId)
+    }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index 4686a0c69de63..aaf8765c04425 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -756,6 +756,47 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
     }
   }
 
+  test("SPARK-33354: Throw exceptions on inserting invalid cast with ANSI casting policy") {
+    withSQLConf(
+      SQLConf.STORE_ASSIGNMENT_POLICY.key -> SQLConf.StoreAssignmentPolicy.ANSI.toString) {
+      withTable("t") {
+        sql("CREATE TABLE t(i int, t timestamp) USING parquet")
+        val msg = intercept[AnalysisException] {
+          sql("INSERT INTO t VALUES (TIMESTAMP('2010-09-02 14:10:10'), 1)")
+        }.getMessage
+        assert(msg.contains("Cannot safely cast 'i': timestamp to int"))
+        assert(msg.contains("Cannot safely cast 't': int to timestamp"))
+      }
+
+      withTable("t") {
+        sql("CREATE TABLE t(i int, d date) USING parquet")
+        val msg = intercept[AnalysisException] {
+          sql("INSERT INTO t VALUES (date('2010-09-02'), 1)")
+        }.getMessage
+        assert(msg.contains("Cannot safely cast 'i': date to int"))
+        assert(msg.contains("Cannot safely cast 'd': int to date"))
+      }
+
+      withTable("t") {
+        sql("CREATE TABLE t(b boolean, t timestamp) USING parquet")
+        val msg = intercept[AnalysisException] {
+          sql("INSERT INTO t VALUES (TIMESTAMP('2010-09-02 14:10:10'), true)")
+        }.getMessage
+        assert(msg.contains("Cannot safely cast 'b': timestamp to boolean"))
+        assert(msg.contains("Cannot safely cast 't': boolean to timestamp"))
+      }
+
+      withTable("t") {
+        sql("CREATE TABLE t(b boolean, d date) USING parquet")
+        val msg = intercept[AnalysisException] {
+          sql("INSERT INTO t VALUES (date('2010-09-02'), true)")
+        }.getMessage
+        assert(msg.contains("Cannot safely cast 'b': date to boolean"))
+        assert(msg.contains("Cannot safely cast 'd': boolean to date"))
+      }
+    }
+  }
+
   test("SPARK-30844: static partition should also follow StoreAssignmentPolicy") {
     SQLConf.StoreAssignmentPolicy.values.foreach { policy =>
       withSQLConf(

From 56a8510e19b3e0349e41d2a8903f4bf05ca00a28 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Thu, 19 Nov 2020 09:52:29 +0900
Subject: [PATCH 0510/1009] [SPARK-33304][R][SQL] Add from_avro and to_avro
 functions to SparkR

### What changes were proposed in this pull request?

Adds `from_avro` and `to_avro` functions to SparkR.

### Why are the changes needed?

Feature parity.

### Does this PR introduce _any_ user-facing change?

New functions exposed in SparkR API.

### How was this patch tested?

New unit tests.

Closes #30216 from zero323/SPARK-33304.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 R/pkg/NAMESPACE                       |   2 +
 R/pkg/R/functions.R                   | 101 ++++++++++++++++++++++++++
 R/pkg/R/generics.R                    |   9 ++-
 R/pkg/tests/fulltests/test_sparkSQL.R |  26 +++++++
 R/run-tests.sh                        |  13 +++-
 docs/sql-data-sources-avro.md         |  34 ++++++++-
 6 files changed, 181 insertions(+), 4 deletions(-)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 404a6968ea429..b927a6b96b810 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -292,6 +292,7 @@ exportMethods("%<=>%",
               "floor",
               "format_number",
               "format_string",
+              "from_avro",
               "from_csv",
               "from_json",
               "from_unixtime",
@@ -416,6 +417,7 @@ exportMethods("%<=>%",
               "timestamp_seconds",
               "toDegrees",
               "toRadians",
+              "to_avro",
               "to_csv",
               "to_date",
               "to_json",
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index bcd798a8c31e2..039d28a3a37b6 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -361,6 +361,50 @@ NULL
 #' }
 NULL
 
+#' Avro processing functions for Column operations
+#'
+#' Avro processing functions defined for \code{Column}.
+#'
+#' @param x Column to compute on.
+#' @param jsonFormatSchema character Avro schema in JSON string format
+#' @param ... additional argument(s) passed as parser options.
+#' @name column_avro_functions
+#' @rdname column_avro_functions
+#' @family avro functions
+#' @note Avro is built-in but external data source module since Spark 2.4.
+#'   Please deploy the application as per
+#'   \href{https://spark.apache.org/docs/latest/sql-data-sources-avro.html#deploying}{
+#'     the deployment section
+#'   } of "Apache Avro Data Source Guide".
+#' @examples
+#' \dontrun{
+#' df <- createDataFrame(iris)
+#' schema <- paste(
+#'   c(
+#'     '{"type": "record", "namespace": "example.avro", "name": "Iris", "fields": [',
+#'     '{"type": ["double", "null"], "name": "Sepal_Length"},',
+#'     '{"type": ["double", "null"], "name": "Sepal_Width"},',
+#'     '{"type": ["double", "null"], "name": "Petal_Length"},',
+#'     '{"type": ["double", "null"], "name": "Petal_Width"},',
+#'     '{"type": ["string", "null"], "name": "Species"}]}'
+#'   ),
+#'   collapse="\\n"
+#' )
+#'
+#' df_serialized <- select(
+#'   df,
+#'   alias(to_avro(alias(struct(column("*")), "fields")), "payload")
+#' )
+#'
+#' df_deserialized <- select(
+#'   df_serialized,
+#'   from_avro(df_serialized$payload, schema)
+#' )
+#'
+#' head(df_deserialized)
+#' }
+NULL
+
 #' @details
 #' \code{lit}: A new Column is created to represent the literal value.
 #' If the parameter is a Column, it is returned unchanged.
@@ -4547,3 +4591,60 @@ setMethod("vector_to_array",
             )
             column(jc)
           })
+
+#' @details
+#' \code{from_avro} Converts a binary column of Avro format into its corresponding catalyst value.
+#' The specified schema must match the read data, otherwise the behavior is undefined:
+#' it may fail or return arbitrary result.
+#' To deserialize the data with a compatible and evolved schema, the expected Avro schema can be
+#' set via the option avroSchema.
+#'
+#' @rdname column_avro_functions
+#' @aliases from_avro from_avro,Column-method
+#' @note from_avro since 3.1.0
+setMethod("from_avro",
+          signature(x = "characterOrColumn"),
+          function(x, jsonFormatSchema, ...) {
+            x <- if (is.character(x)) {
+              column(x)
+            } else {
+              x
+            }
+
+            options <- varargsToStrEnv(...)
+            jc <- callJStatic(
+              "org.apache.spark.sql.avro.functions", "from_avro",
+              x@jc,
+              jsonFormatSchema,
+              options
+            )
+            column(jc)
+          })
+
+#' @details
+#' \code{to_avro} Converts a column into binary of Avro format.
+#'
+#' @rdname column_avro_functions
+#' @aliases to_avro to_avro,Column-method
+#' @note to_avro since 3.1.0
+setMethod("to_avro",
+          signature(x = "characterOrColumn"),
+          function(x, jsonFormatSchema = NULL) {
+            x <- if (is.character(x)) {
+              column(x)
+            } else {
+              x
+            }
+
+            jc <- if (is.null(jsonFormatSchema)) {
+              callJStatic("org.apache.spark.sql.avro.functions", "to_avro", x@jc)
+            } else {
+              callJStatic(
+                "org.apache.spark.sql.avro.functions",
+                "to_avro",
+                x@jc,
+                jsonFormatSchema
+              )
+            }
+            column(jc)
+          })
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index e372ae27e315a..1fe6599bf1b97 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -950,7 +950,6 @@ setGeneric("current_date", function(x = "missing") { standardGeneric("current_da
 #' @name NULL
 setGeneric("current_timestamp", function(x = "missing") { standardGeneric("current_timestamp") })
 
-
 #' @rdname column_datetime_diff_functions
 #' @name NULL
 setGeneric("datediff", function(y, x) { standardGeneric("datediff") })
@@ -1015,6 +1014,10 @@ setGeneric("expr", function(x) { standardGeneric("expr") })
 #' @name NULL
 setGeneric("flatten", function(x) { standardGeneric("flatten") })
 
+#' @rdname column_avro_functions
+#' @name NULL
+setGeneric("from_avro", function(x, ...) { standardGeneric("from_avro") })
+
 #' @rdname column_datetime_diff_functions
 #' @name NULL
 setGeneric("from_utc_timestamp", function(y, x) { standardGeneric("from_utc_timestamp") })
@@ -1388,6 +1391,10 @@ setGeneric("sumDistinct", function(x) { standardGeneric("sumDistinct") })
 #' @name timestamp_seconds
 setGeneric("timestamp_seconds", function(x) { standardGeneric("timestamp_seconds") })
 
+#' @rdname column_avro_functions
+#' @name NULL
+setGeneric("to_avro", function(x, ...) { standardGeneric("to_avro") })
+
 #' @rdname column_collection_functions
 #' @name NULL
 setGeneric("transform_keys", function(x, f) {  standardGeneric("transform_keys") })
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 3a0d359e2ae79..45de1ef1bd3d1 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1841,6 +1841,32 @@ test_that("column functions", {
   )
 })
 
+test_that("avro column functions", {
+  skip_if_not(
+    grepl("spark-avro", sparkR.conf("spark.jars", "")),
+    "spark-avro jar not present"
+  )
+
+  schema <- '{"namespace": "example.avro",
+    "type": "record",
+    "name": "User",
+    "fields": [
+      {"name": "name", "type": "string"},
+      {"name": "favorite_color", "type": ["string", "null"]}
+    ]
+  }'
+
+  c0 <- column("foo")
+  c1 <- from_avro(c0, schema)
+  expect_s4_class(c1, "Column")
+  c2 <- from_avro("foo", schema)
+  expect_s4_class(c2, "Column")
+  c3 <- to_avro(c1)
+  expect_s4_class(c3, "Column")
+  c4 <- to_avro(c1, schema)
+  expect_s4_class(c4, "Column")
+})
+
 test_that("column binary mathfunctions", {
   lines <- c("{\"a\":1, \"b\":5}",
              "{\"a\":2, \"b\":6}",
diff --git a/R/run-tests.sh b/R/run-tests.sh
index 51ca7d600caf0..edc2b2b60b60e 100755
--- a/R/run-tests.sh
+++ b/R/run-tests.sh
@@ -23,7 +23,18 @@ FAILED=0
 LOGFILE=$FWDIR/unit-tests.out
 rm -f $LOGFILE
 
-SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --driver-java-options "-Dlog4j.configuration=file:$FWDIR/log4j.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true" --conf spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
+SPARK_AVRO_JAR_PATH=$(find $FWDIR/../external/avro/ -name "spark-avro*jar" -print | egrep -v "tests.jar|test-sources.jar|sources.jar|javadoc.jar")
+
+if [[ $(echo $SPARK_AVRO_JAR_PATH | wc -l) -eq 1 ]]; then
+  SPARK_JARS=$SPARK_AVRO_JAR_PATH
+fi
+
+if [ -z "$SPARK_JARS" ]; then
+  SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --driver-java-options "-Dlog4j.configuration=file:$FWDIR/log4j.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true" --conf spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
+else
+  SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --jars $SPARK_JARS --driver-java-options "-Dlog4j.configuration=file:$FWDIR/log4j.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true" --conf spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
+fi
+
 FAILED=$((PIPESTATUS[0]||$FAILED))
 
 NUM_TEST_WARNING="$(grep -c -e 'Warnings ----------------' $LOGFILE)"
diff --git a/docs/sql-data-sources-avro.md b/docs/sql-data-sources-avro.md
index 69b165ed28bae..9ecc6eb91da5a 100644
--- a/docs/sql-data-sources-avro.md
+++ b/docs/sql-data-sources-avro.md
@@ -88,8 +88,6 @@ Kafka key-value record will be augmented with some metadata, such as the ingesti
 * If the "value" field that contains your data is in Avro, you could use `from_avro()` to extract your data, enrich it, clean it, and then push it downstream to Kafka again or write it out to a file.
 * `to_avro()` can be used to turn structs into Avro records. This method is particularly useful when you would like to re-encode multiple columns into a single one when writing data out to Kafka.
 
-Both functions are currently only available in Scala, Java, and Python.
-
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
 {% highlight scala %}
@@ -183,6 +181,38 @@ query = output\
   .option("topic", "topic2")\
   .start()
 
+{% endhighlight %}
+</div>
+<div data-lang="r" markdown="1">
+{% highlight r %}
+
+# `from_avro` requires Avro schema in JSON string format.
+jsonFormatSchema <- paste0(readLines("examples/src/main/resources/user.avsc"), collapse=" ")
+
+df <- read.stream(
+  "kafka",
+  kafka.bootstrap.servers = "host1:port1,host2:port2",
+  subscribe = "topic1"
+)
+
+# 1. Decode the Avro data into a struct;
+# 2. Filter by column `favorite_color`;
+# 3. Encode the column `name` in Avro format.
+
+output <- select(
+  filter(
+    select(df, alias(from_avro("value", jsonFormatSchema), "user")),
+    column("user.favorite_color") == "red"
+  ),
+  alias(to_avro("user.name"), "value")
+)
+
+write.stream(
+  output,
+  "kafka",
+  kafka.bootstrap.servers = "host1:port1,host2:port2",
+  topic = "topic2"
+)
 {% endhighlight %}
 </div>
 </div>

From 4267ca98faf377715f2eb19216165b0046a338e0 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Thu, 19 Nov 2020 11:20:18 +0900
Subject: [PATCH 0511/1009] [SPARK-33479][DOC] Make the API Key of DocSearch
 configurable

### What changes were proposed in this pull request?

Make the API key of DocSearch configurable and avoid hardcoding in the HTML template

### Why are the changes needed?

After https://github.com/apache/spark/pull/30292, our Spark documentation site supports searching.
However, the default API key always points to the latest release doc. We have to set different API keys for different releases. Otherwise, the search results are always based on the latest documentation(https://spark.apache.org/docs/latest/) even when visiting the documentation of previous releases.

As per discussion in https://github.com/apache/spark/pull/30292#issuecomment-725613417, we should make the API key configurable and set different values for different releases.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Manual test

Closes #30409 from gengliangwang/apiKey.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 docs/_config.yml          | 12 ++++++++++++
 docs/_layouts/global.html |  8 +-------
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/docs/_config.yml b/docs/_config.yml
index 3be9807f81082..cd341063a1f92 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -26,3 +26,15 @@ SCALA_VERSION: "2.12.10"
 MESOS_VERSION: 1.0.0
 SPARK_ISSUE_TRACKER_URL: https://issues.apache.org/jira/browse/SPARK
 SPARK_GITHUB_URL: https://github.com/apache/spark
+# Before a new release, we should apply a new `apiKey` for the new Spark documentation
+# on https://docsearch.algolia.com/. Otherwise, after release, the search results are always based
+# on the latest documentation(https://spark.apache.org/docs/latest/) even when visiting the
+# documentation of previous releases.
+DOCSEARCH_SCRIPT: |
+  docsearch({
+      apiKey: 'b18ca3732c502995563043aa17bc6ecb',
+      indexName: 'apache_spark',
+      inputSelector: '#docsearch-input',
+      enhancedSearchInput: true,
+      debug: false // Set debug to true if you want to inspect the dropdown
+  });
diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
index 65af17ed2e4a1..de98f29acf3b7 100755
--- a/docs/_layouts/global.html
+++ b/docs/_layouts/global.html
@@ -187,13 +187,7 @@ <h1 class="title">{{ page.title }}</h1>
             // 2. a JavaScript snippet to be inserted in your website that will bind this Algolia index
             //    to your search input and display its results in a dropdown UI. If you want to find more
             //    details on how works DocSearch, check the docs of DocSearch.
-            docsearch({
-                apiKey: 'b18ca3732c502995563043aa17bc6ecb',
-                indexName: 'apache_spark',
-                inputSelector: '#docsearch-input',
-                enhancedSearchInput: true,
-                debug: false // Set debug to true if you want to inspect the dropdown
-        });
+            {{site.DOCSEARCH_SCRIPT}}
         </script>
 
         <!-- MathJax Section -->

From 97d2cee4af4ad8882334e2b680ab75dc73e29336 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Wed, 18 Nov 2020 18:35:11 -0800
Subject: [PATCH 0512/1009] [SPARK-33427][SQL][FOLLOWUP] Prevent test flakyness
 in SubExprEvaluationRuntimeSuite

### What changes were proposed in this pull request?

This followup is to prevent possible test flakyness of `SubExprEvaluationRuntimeSuite`.

### Why are the changes needed?

Because HashMap doesn't guarantee the order, in `proxyExpressions` the proxy expression id is not deterministic. So in `SubExprEvaluationRuntimeSuite` we should not test against it.

### Does this PR introduce _any_ user-facing change?

No, dev only.

### How was this patch tested?

Unit test.

Closes #30414 from viirya/SPARK-33427-followup.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Liang-Chi Hsieh <viirya@gmail.com>
---
 .../catalyst/expressions/SubExprEvaluationRuntimeSuite.scala   | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala
index badcd4fc3fdad..f56ec49724adb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala
@@ -84,8 +84,7 @@ class SubExprEvaluationRuntimeSuite extends SparkFunSuite {
     })
     // ( (one * two) * (one * two) )
     assert(proxys.size == 2)
-    val expected = ExpressionProxy(mul2, 0, runtime)
-    assert(proxys.forall(_ == expected))
+    assert(proxys.forall(_.child == mul2))
   }
 
   test("ExpressionProxy won't be on non deterministic") {

From e518008ca9dc8a4950e2655ed9b35ce95ffe5acb Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Wed, 18 Nov 2020 18:58:06 -0800
Subject: [PATCH 0513/1009] [SPARK-33473][SQL] Extend interpreted subexpression
 elimination to other interpreted projections

### What changes were proposed in this pull request?

Similar to `InterpretedUnsafeProjection`, this patch proposes to extend interpreted subexpression elimination to `InterpretedMutableProjection` and `InterpretedSafeProjection`.

### Why are the changes needed?

Enabling subexpression elimination can improve the performance of interpreted projections, as shown in `InterpretedUnsafeProjection`.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Unit test.

Closes #30406 from viirya/SPARK-33473.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../InterpretedMutableProjection.scala        | 18 ++++++-
 .../InterpretedSafeProjection.scala           | 16 +++++-
 .../expressions/MutableProjectionSuite.scala  | 46 +++++++++++++++++
 .../codegen/GeneratedProjectionSuite.scala    | 49 ++++++++++++++++++-
 4 files changed, 125 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala
index a2daec0b1ade1..91c9457af7de3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences
 import org.apache.spark.sql.catalyst.expressions.aggregate.NoOp
+import org.apache.spark.sql.internal.SQLConf
 
 
 /**
@@ -33,6 +34,15 @@ class InterpretedMutableProjection(expressions: Seq[Expression]) extends Mutable
   def this(expressions: Seq[Expression], inputSchema: Seq[Attribute]) =
     this(bindReferences(expressions, inputSchema))
 
+  private[this] val subExprEliminationEnabled = SQLConf.get.subexpressionEliminationEnabled
+  private[this] lazy val runtime =
+    new SubExprEvaluationRuntime(SQLConf.get.subexpressionEliminationCacheMaxEntries)
+  private[this] val exprs = if (subExprEliminationEnabled) {
+    runtime.proxyExpressions(expressions)
+  } else {
+    expressions
+  }
+
   private[this] val buffer = new Array[Any](expressions.size)
 
   override def initialize(partitionIndex: Int): Unit = {
@@ -76,11 +86,15 @@ class InterpretedMutableProjection(expressions: Seq[Expression]) extends Mutable
   }.toArray
 
   override def apply(input: InternalRow): InternalRow = {
+    if (subExprEliminationEnabled) {
+      runtime.setInput(input)
+    }
+
     var i = 0
     while (i < validExprs.length) {
-      val (expr, ordinal) = validExprs(i)
+      val (_, ordinal) = validExprs(i)
       // Store the result into buffer first, to make the projection atomic (needed by aggregation)
-      buffer(ordinal) = expr.eval(input)
+      buffer(ordinal) = exprs(ordinal).eval(input)
       i += 1
     }
     i = 0
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedSafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedSafeProjection.scala
index 70789dac1d87a..0e71892db666b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedSafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedSafeProjection.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.aggregate.NoOp
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, GenericArrayData, MapData}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 
@@ -30,6 +31,15 @@ import org.apache.spark.sql.types._
  */
 class InterpretedSafeProjection(expressions: Seq[Expression]) extends Projection {
 
+  private[this] val subExprEliminationEnabled = SQLConf.get.subexpressionEliminationEnabled
+  private[this] lazy val runtime =
+    new SubExprEvaluationRuntime(SQLConf.get.subexpressionEliminationCacheMaxEntries)
+  private[this] val exprs = if (subExprEliminationEnabled) {
+    runtime.proxyExpressions(expressions)
+  } else {
+    expressions
+  }
+
   private[this] val mutableRow = new SpecificInternalRow(expressions.map(_.dataType))
 
   private[this] val exprsWithWriters = expressions.zipWithIndex.filter {
@@ -49,7 +59,7 @@ class InterpretedSafeProjection(expressions: Seq[Expression]) extends Projection
         }
       }
     }
-    (e, f)
+    (exprs(i), f)
   }
 
   private def generateSafeValueConverter(dt: DataType): Any => Any = dt match {
@@ -97,6 +107,10 @@ class InterpretedSafeProjection(expressions: Seq[Expression]) extends Projection
   }
 
   override def apply(row: InternalRow): InternalRow = {
+    if (subExprEliminationEnabled) {
+      runtime.setInput(row)
+    }
+
     var i = 0
     while (i < exprsWithWriters.length) {
       val (expr, writer) = exprsWithWriters(i)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala
index c31310bc54023..8f030b45e5d3e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala
@@ -80,4 +80,50 @@ class MutableProjectionSuite extends SparkFunSuite with ExpressionEvalHelper {
       assert(errMsg.contains("MutableProjection cannot use UnsafeRow for output data types:"))
     }
   }
+
+  test("SPARK-33473: subexpression elimination for interpreted MutableProjection") {
+    Seq("true", "false").foreach { enabled =>
+      withSQLConf(
+        SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> enabled,
+        SQLConf.CODEGEN_FACTORY_MODE.key -> CodegenObjectFactoryMode.NO_CODEGEN.toString) {
+        val one = BoundReference(0, DoubleType, true)
+        val two = BoundReference(1, DoubleType, true)
+
+        val mul = Multiply(one, two)
+        val mul2 = Multiply(mul, mul)
+        val sqrt = Sqrt(mul2)
+        val sum = Add(mul2, sqrt)
+
+        val proj = MutableProjection.create(Seq(sum))
+        val result = (d1: Double, d2: Double) =>
+          ((d1 * d2) * (d1 * d2)) + Math.sqrt((d1 * d2) * (d1 * d2))
+
+        val inputRows = Seq(
+          InternalRow.fromSeq(Seq(1.0, 2.0)),
+          InternalRow.fromSeq(Seq(2.0, 3.0)),
+          InternalRow.fromSeq(Seq(1.0, null)),
+          InternalRow.fromSeq(Seq(null, 2.0)),
+          InternalRow.fromSeq(Seq(3.0, 4.0)),
+          InternalRow.fromSeq(Seq(null, null))
+        )
+        val expectedResults = Seq(
+          result(1.0, 2.0),
+          result(2.0, 3.0),
+          null,
+          null,
+          result(3.0, 4.0),
+          null
+        )
+
+        inputRows.zip(expectedResults).foreach { case (inputRow, expected) =>
+          val projRow = proj.apply(inputRow)
+          if (expected != null) {
+            assert(projRow.getDouble(0) == expected)
+          } else {
+            assert(projRow.isNullAt(0))
+          }
+        }
+      }
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
index 4c9bcfe8f93a6..180665e653727 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
@@ -23,13 +23,14 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util.GenericArrayData
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
 /**
  * A test suite for generated projections
  */
-class GeneratedProjectionSuite extends SparkFunSuite {
+class GeneratedProjectionSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("generated projections on wider table") {
     val N = 1000
@@ -246,4 +247,50 @@ class GeneratedProjectionSuite extends SparkFunSuite {
     val row2 = mutableProj(result)
     assert(result === row2)
   }
+
+  test("SPARK-33473: subexpression elimination for interpreted SafeProjection") {
+    Seq("true", "false").foreach { enabled =>
+      withSQLConf(
+        SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> enabled,
+        SQLConf.CODEGEN_FACTORY_MODE.key -> CodegenObjectFactoryMode.NO_CODEGEN.toString) {
+        val one = BoundReference(0, DoubleType, true)
+        val two = BoundReference(1, DoubleType, true)
+
+        val mul = Multiply(one, two)
+        val mul2 = Multiply(mul, mul)
+        val sqrt = Sqrt(mul2)
+        val sum = Add(mul2, sqrt)
+
+        val proj = SafeProjection.create(Seq(sum))
+        val result = (d1: Double, d2: Double) =>
+          ((d1 * d2) * (d1 * d2)) + Math.sqrt((d1 * d2) * (d1 * d2))
+
+        val inputRows = Seq(
+          InternalRow.fromSeq(Seq(1.0, 2.0)),
+          InternalRow.fromSeq(Seq(2.0, 3.0)),
+          InternalRow.fromSeq(Seq(1.0, null)),
+          InternalRow.fromSeq(Seq(null, 2.0)),
+          InternalRow.fromSeq(Seq(3.0, 4.0)),
+          InternalRow.fromSeq(Seq(null, null))
+        )
+        val expectedResults = Seq(
+          result(1.0, 2.0),
+          result(2.0, 3.0),
+          null,
+          null,
+          result(3.0, 4.0),
+          null
+        )
+
+        inputRows.zip(expectedResults).foreach { case (inputRow, expected) =>
+          val projRow = proj.apply(inputRow)
+          if (expected != null) {
+            assert(projRow.getDouble(0) == expected)
+          } else {
+            assert(projRow.isNullAt(0))
+          }
+        }
+      }
+    }
+  }
 }

From 66a76378cf9aa049c9281fc099721904942fa5ee Mon Sep 17 00:00:00 2001
From: Ryan Blue <blue@apache.org>
Date: Wed, 18 Nov 2020 19:18:28 -0800
Subject: [PATCH 0514/1009] [SPARK-31255][SQL][FOLLOWUP] Add missing license
 headers

### What changes were proposed in this pull request?

Add missing license headers for new files added in #28027.

### Why are the changes needed?

To fix licenses.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

This is a purely non-functional change.

Closes #30415 from rdblue/license-headers.

Authored-by: Ryan Blue <blue@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../sql/connector/catalog/MetadataColumn.java | 19 +++++++++++++++++++
 .../catalog/SupportsMetadataColumns.java      | 19 +++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java
index 8aefa28323b33..cdfa082ced317 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java
@@ -1,3 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 package org.apache.spark.sql.connector.catalog;
 
 import org.apache.spark.annotation.Evolving;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java
index fc313491f2970..208abfc302582 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java
@@ -1,3 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
 package org.apache.spark.sql.connector.catalog;
 
 import org.apache.spark.annotation.Evolving;

From e3058ba17cb4512537953eb4ded884e24ee93ba2 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Thu, 19 Nov 2020 14:20:39 +0900
Subject: [PATCH 0515/1009] [SPARK-33441][BUILD] Add unused-imports compilation
 check and remove all unused-imports

### What changes were proposed in this pull request?
This pr add a new Scala compile arg to `pom.xml` to defense against new unused imports:

- `-Ywarn-unused-import` for Scala 2.12
- `-Wconf:cat=unused-imports:e` for Scala 2.13

The other fIles change are remove all unused imports in Spark code

### Why are the changes needed?
Cleanup code and add guarantee to defense against new unused imports

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass the Jenkins or GitHub Action

Closes #30351 from LuciferYang/remove-imports-core-module.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../org/apache/spark/BarrierTaskContext.scala  |  1 -
 .../org/apache/spark/MapOutputTracker.scala    |  2 +-
 .../apache/spark/api/python/PythonRunner.scala |  5 -----
 .../apache/spark/api/python/SerDeUtil.scala    |  2 --
 .../scala/org/apache/spark/api/r/RRunner.scala |  1 -
 .../org/apache/spark/deploy/JsonProtocol.scala |  4 ++--
 .../history/BasicEventFilterBuilder.scala      |  1 -
 .../deploy/history/FsHistoryProvider.scala     |  2 +-
 .../spark/deploy/history/HybridStore.scala     |  1 -
 .../apache/spark/deploy/master/Master.scala    |  2 --
 .../spark/deploy/master/ui/MasterWebUI.scala   |  1 -
 .../CoarseGrainedExecutorBackend.scala         |  1 -
 .../spark/network/BlockDataManager.scala       |  2 +-
 .../spark/network/BlockTransferService.scala   |  1 -
 .../netty/NettyBlockTransferService.scala      |  6 ++----
 .../spark/rdd/ParallelCollectionRDD.scala      |  1 -
 .../spark/rdd/ReliableCheckpointRDD.scala      |  1 -
 .../spark/resource/ResourceAllocator.scala     |  3 +--
 .../apache/spark/resource/ResourceUtils.scala  |  4 ++--
 .../spark/scheduler/TaskSchedulerImpl.scala    |  3 ---
 .../cluster/CoarseGrainedClusterMessage.scala  |  1 -
 .../shuffle/sort/SortShuffleManager.scala      |  6 ++----
 .../apache/spark/status/AppStatusStore.scala   |  3 +--
 .../org/apache/spark/status/KVUtils.scala      |  1 -
 .../apache/spark/storage/BlockManager.scala    |  1 -
 .../storage/BlockManagerDecommissioner.scala   |  3 +--
 .../storage/BlockManagerMasterEndpoint.scala   |  2 +-
 .../org/apache/spark/util/ThreadUtils.scala    |  1 -
 .../scala/org/apache/spark/util/Utils.scala    |  2 +-
 .../org/apache/spark/StatusTrackerSuite.scala  |  1 -
 .../StandaloneDynamicAllocationSuite.scala     |  2 +-
 .../history/EventLogFileCompactorSuite.scala   |  3 +--
 .../deploy/master/ui/MasterWebUISuite.scala    |  2 --
 .../apache/spark/executor/ExecutorSuite.scala  |  5 ++---
 .../input/WholeTextFileRecordReaderSuite.scala |  1 -
 .../netty/NettyBlockTransferServiceSuite.scala |  2 +-
 .../spark/resource/ResourceUtilsSuite.scala    |  2 --
 .../scheduler/BarrierTaskContextSuite.scala    |  1 -
 .../CoarseGrainedSchedulerBackendSuite.scala   |  1 -
 .../spark/scheduler/DAGSchedulerSuite.scala    |  2 +-
 .../scheduler/TaskSchedulerImplSuite.scala     |  2 +-
 .../spark/scheduler/TaskSetManagerSuite.scala  |  1 -
 .../scheduler/WorkerDecommissionSuite.scala    |  6 ++----
 .../spark/storage/BlockInfoManagerSuite.scala  |  2 +-
 .../org/apache/spark/ui/StagePageSuite.scala   |  1 -
 .../org/apache/spark/util/UtilsSuite.scala     |  3 +--
 .../examples/ml/DeveloperApiExample.scala      |  1 -
 .../examples/mllib/RankingMetricsExample.scala |  1 -
 .../spark/examples/sql/SparkSQLExample.scala   |  2 --
 .../spark/sql/avro/SchemaConverters.scala      |  2 +-
 .../org/apache/spark/sql/avro/AvroSuite.scala  |  2 +-
 .../apache/spark/sql/kafka010/KafkaBatch.scala |  2 --
 .../sql/kafka010/KafkaMicroBatchStream.scala   |  3 ---
 .../spark/sql/kafka010/KafkaOffsetReader.scala | 11 +++++++----
 .../spark/sql/kafka010/KafkaRelation.scala     |  3 ---
 .../spark/sql/kafka010/KafkaSource.scala       |  5 -----
 .../sql/kafka010/KafkaSourceProvider.scala     |  2 +-
 .../kafka010/KafkaMicroBatchSourceSuite.scala  |  1 -
 .../spark/sql/kafka010/KafkaTestUtils.scala    |  4 +---
 .../apache/spark/kafka010/KafkaTokenUtil.scala |  2 +-
 .../kafka010/mocks/MockScheduler.scala         |  2 --
 .../kinesis/KinesisCheckpointer.scala          |  1 -
 .../kinesis/KinesisInputDStream.scala          |  2 --
 .../apache/spark/ml/attribute/package.scala    |  2 --
 .../ml/feature/VarianceThresholdSelector.scala |  5 +----
 .../org/apache/spark/ml/feature/package.scala  |  2 --
 .../apache/spark/ml/recommendation/ALS.scala   |  2 +-
 .../ml/recommendation/TopByKeyAggregator.scala |  1 -
 .../classification/LogisticRegression.scala    |  1 -
 .../apache/spark/ml/fpm/PrefixSpanSuite.scala  |  1 -
 .../GeneralizedLinearRegressionSuite.scala     |  4 ----
 .../apache/spark/ml/stat/SummarizerSuite.scala |  1 -
 .../spark/ml/tree/impl/RandomForestSuite.scala |  1 -
 .../spark/ml/util/DefaultReadWriteTest.scala   |  1 -
 .../spark/ml/util/PMMLReadWriteTest.scala      |  3 ---
 .../spark/mllib/clustering/LDASuite.scala      |  1 -
 .../linalg/distributed/BlockMatrixSuite.scala  |  2 +-
 pom.xml                                        |  3 +++
 .../org/apache/spark/repl/Repl2Suite.scala     |  5 -----
 .../spark/repl/ExecutorClassLoaderSuite.scala  |  1 -
 .../org/apache/spark/repl/ReplSuite.scala      |  2 +-
 .../k8s/features/EnvSecretsFeatureStep.scala   |  2 +-
 .../k8s/features/MountSecretsFeatureStep.scala |  2 +-
 .../cluster/k8s/ExecutorPodsAllocator.scala    |  1 -
 .../cluster/k8s/KubernetesClusterManager.scala |  1 -
 .../spark/deploy/k8s/KubernetesTestConf.scala  |  1 -
 .../deploy/k8s/KubernetesUtilsSuite.scala      |  2 +-
 .../spark/deploy/k8s/PodBuilderSuite.scala     |  1 -
 .../DriverCommandFeatureStepSuite.scala        |  3 ---
 .../DriverServiceFeatureStepSuite.scala        |  1 -
 .../HadoopConfDriverFeatureStepSuite.scala     |  1 -
 .../KerberosConfDriverFeatureStepSuite.scala   |  3 +--
 .../MountVolumesFeatureStepSuite.scala         |  2 +-
 .../k8s/ExecutorPodsAllocatorSuite.scala       |  2 +-
 .../KubernetesTestComponents.scala             |  1 -
 .../deploy/k8s/integrationtest/Utils.scala     |  2 --
 .../backend/cloud/KubeConfigBackend.scala      |  2 --
 .../spark/deploy/mesos/ui/MesosClusterUI.scala |  1 -
 .../MesosCoarseGrainedSchedulerBackend.scala   |  2 +-
 .../mesos/MesosSchedulerBackendUtil.scala      |  2 +-
 .../cluster/mesos/MesosSchedulerUtils.scala    |  4 ++--
 .../spark/deploy/yarn/ExecutorRunnable.scala   |  4 +---
 ...tyPreferredContainerPlacementStrategy.scala |  2 +-
 .../deploy/yarn/YarnSparkHadoopUtil.scala      |  1 -
 .../launcher/YarnCommandBuilderUtils.scala     |  2 --
 .../deploy/yarn/YarnSparkHadoopUtilSuite.scala |  1 -
 .../spark/sql/catalyst/ScalaReflection.scala   |  5 -----
 .../catalyst/analysis/DecimalPrecision.scala   |  1 -
 .../catalyst/analysis/ResolveCatalogs.scala    |  2 +-
 .../analysis/higherOrderFunctions.scala        |  1 -
 .../catalyst/analysis/v2ResolutionPlans.scala  |  5 ++---
 .../sql/catalyst/catalog/ExternalCatalog.scala |  2 +-
 .../catalyst/encoders/ExpressionEncoder.scala  |  4 +---
 .../spark/sql/catalyst/expressions/Cast.scala  |  3 +--
 .../sql/catalyst/expressions/Expression.scala  |  5 ++---
 .../sql/catalyst/expressions/ScalaUDF.scala    |  2 +-
 .../expressions/codegen/CodeGenerator.scala    |  7 +++----
 .../codegen/GeneratePredicate.scala            |  1 -
 .../codegen/GenerateUnsafeRowJoiner.scala      |  4 ----
 .../expressions/higherOrderFunctions.scala     |  2 +-
 .../sql/catalyst/expressions/predicates.scala  |  3 ---
 .../spark/sql/catalyst/json/JsonFilters.scala  |  1 -
 .../sql/catalyst/optimizer/ComplexTypes.scala  |  1 -
 .../optimizer/NormalizeFloatingNumbers.scala   |  4 ++--
 .../ReplaceNullWithFalseInPredicate.scala      |  1 -
 .../catalyst/optimizer/finishAnalysis.scala    |  2 --
 .../sql/catalyst/optimizer/subquery.scala      |  1 -
 .../plans/logical/AnalysisHelper.scala         | 12 ++++++------
 .../catalyst/plans/logical/Statistics.scala    |  8 --------
 .../plans/logical/basicLogicalOperators.scala  |  7 ++-----
 .../sql/catalyst/plans/logical/hints.scala     |  1 -
 .../statsEstimation/ProjectEstimation.scala    |  2 +-
 .../spark/sql/catalyst/trees/TreeNode.scala    |  1 -
 .../spark/sql/catalyst/util/ArrayData.scala    |  1 -
 .../sql/catalyst/util/RebaseDateTime.scala     |  2 +-
 .../datasources/v2/DataSourceV2Relation.scala  |  8 ++++----
 .../org/apache/spark/sql/types/DataType.scala  |  2 +-
 .../org/apache/spark/sql/types/Decimal.scala   |  1 -
 .../org/apache/spark/sql/RowJsonSuite.scala    |  3 +--
 .../sql/catalyst/ScalaReflectionSuite.scala    |  1 -
 ...reateTablePartitioningValidationSuite.scala |  2 +-
 .../analysis/ResolveNaturalJoinSuite.scala     |  1 -
 .../analysis/StreamingJoinHelperSuite.scala    |  2 +-
 .../analysis/UnsupportedOperationsSuite.scala  |  1 -
 .../expressions/ObjectExpressionsSuite.scala   |  5 ++---
 .../SubExprEvaluationRuntimeSuite.scala        |  1 -
 .../aggregate/ApproximatePercentileSuite.scala |  2 +-
 .../expressions/codegen/CodeBlockSuite.scala   |  2 +-
 .../optimizer/EliminateDistinctSuite.scala     |  2 +-
 .../optimizer/FilterPushdownSuite.scala        |  3 +--
 .../PullupCorrelatedPredicatesSuite.scala      |  2 +-
 .../optimizer/SimplifyCastsSuite.scala         |  2 --
 .../catalyst/optimizer/complexTypesSuite.scala |  2 +-
 .../sql/catalyst/parser/DDLParserSuite.scala   |  3 +--
 .../FilterEstimationSuite.scala                |  2 +-
 .../scala/org/apache/spark/sql/Column.scala    |  1 -
 .../org/apache/spark/sql/DataFrameWriter.scala |  1 -
 .../scala/org/apache/spark/sql/Dataset.scala   |  3 +--
 .../spark/sql/RelationalGroupedDataset.scala   |  1 -
 .../org/apache/spark/sql/RuntimeConfig.scala   |  2 --
 .../org/apache/spark/sql/UDFRegistration.scala |  4 ++--
 .../org/apache/spark/sql/catalog/Catalog.scala |  2 +-
 .../analysis/ResolveSessionCatalog.scala       |  2 +-
 .../spark/sql/execution/CacheManager.scala     |  2 +-
 .../sql/execution/CollectMetricsExec.scala     |  2 --
 .../sql/execution/DataSourceScanExec.scala     |  1 -
 .../spark/sql/execution/HiveResult.scala       |  2 +-
 .../execution/RemoveRedundantProjects.scala    |  1 -
 .../spark/sql/execution/SparkSqlParser.scala   |  2 +-
 .../spark/sql/execution/SparkStrategies.scala  |  7 ++++---
 .../sql/execution/WholeStageCodegenExec.scala  |  1 -
 .../adaptive/DemoteBroadcastHashJoin.scala     |  1 -
 .../execution/adaptive/LogicalQueryStage.scala |  1 -
 .../adaptive/ReuseAdaptiveSubquery.scala       |  1 -
 .../sql/execution/adaptive/simpleCosting.scala |  2 +-
 .../aggregate/ObjectAggregationIterator.scala  |  1 -
 .../aggregate/ObjectAggregationMap.scala       |  1 -
 .../SortBasedAggregationIterator.scala         |  7 ++++---
 .../spark/sql/execution/aggregate/udaf.scala   |  6 +-----
 .../sql/execution/basicPhysicalOperators.scala |  4 ++--
 .../bucketing/CoalesceBucketsInJoin.scala      |  1 -
 .../DisableUnnecessaryBucketedScan.scala       |  1 -
 .../sql/execution/columnar/ColumnStats.scala   |  2 +-
 .../sql/execution/command/CommandUtils.scala   |  2 +-
 .../execution/command/DataWritingCommand.scala |  3 +--
 .../sql/execution/command/SetCommand.scala     |  2 +-
 .../spark/sql/execution/command/cache.scala    |  1 -
 .../command/createDataSourceTables.scala       |  1 -
 .../sql/execution/command/functions.scala      |  2 +-
 .../sql/execution/datasources/DataSource.scala |  1 -
 .../datasources/FallBackFileSourceV2.scala     |  5 +++--
 .../datasources/HadoopFsRelation.scala         |  4 ----
 .../execution/datasources/OutputWriter.scala   |  3 +--
 .../PartitioningAwareFileIndex.scala           |  2 +-
 .../datasources/PartitioningUtils.scala        |  2 +-
 .../datasources/RecordReaderIterator.scala     |  2 --
 .../execution/datasources/SchemaPruning.scala  |  2 +-
 .../binaryfile/BinaryFileFormat.scala          |  4 ++--
 .../datasources/csv/CSVDataSource.scala        |  2 --
 .../execution/datasources/jdbc/JDBCRDD.scala   |  6 +++---
 .../datasources/json/JsonFileFormat.scala      |  3 ---
 .../datasources/orc/OrcDeserializer.scala      |  1 -
 .../parquet/ParquetFileFormat.scala            |  4 ++--
 .../parquet/ParquetOutputWriter.scala          |  1 -
 .../parquet/ParquetRowConverter.scala          | 18 +++++++++++-------
 .../sql/execution/datasources/rules.scala      |  2 --
 .../datasources/v2/DescribeTableExec.scala     |  2 +-
 .../datasources/v2/DropNamespaceExec.scala     |  2 +-
 .../datasources/v2/FileDataSourceV2.scala      |  1 -
 .../v2/ShowTablePropertiesExec.scala           |  2 +-
 .../datasources/v2/TableCapabilityCheck.scala  |  2 +-
 .../datasources/v2/TextBasedFileScan.scala     |  2 --
 .../datasources/v2/orc/OrcScanBuilder.scala    |  3 ---
 .../PlanDynamicPruningFilters.scala            |  2 +-
 .../exchange/EnsureRequirements.scala          |  1 -
 .../sql/execution/exchange/Exchange.scala      |  1 -
 .../sql/execution/python/EvalPythonExec.scala  |  2 +-
 .../spark/sql/execution/r/ArrowRRunner.scala   |  2 +-
 .../streaming/FlatMapGroupsWithStateExec.scala |  2 +-
 .../execution/streaming/HDFSMetadataLog.scala  |  4 +---
 .../execution/streaming/StreamExecution.scala  |  1 -
 .../execution/streaming/StreamMetadata.scala   | 10 +++++-----
 .../StreamingSymmetricHashJoinHelper.scala     |  8 ++++----
 .../streaming/sources/ForeachBatchSink.scala   |  1 -
 .../sources/PackedRowWriterFactory.scala       |  7 ++++---
 .../execution/streaming/sources/memory.scala   |  5 ++---
 .../execution/streaming/state/StateStore.scala |  2 +-
 .../streaming/state/StateStoreRDD.scala        |  2 --
 .../state/SymmetricHashJoinStateManager.scala  |  6 +++---
 .../streaming/statefulOperators.scala          |  1 -
 .../execution/streaming/streamingLimits.scala  |  1 -
 .../sql/execution/window/WindowExec.scala      |  8 +-------
 .../sql/execution/window/WindowExecBase.scala  |  2 +-
 .../sql/expressions/UserDefinedFunction.scala  |  7 ++-----
 .../sql/expressions/scalalang/typed.scala      |  2 --
 .../scala/org/apache/spark/sql/functions.scala |  3 +--
 .../spark/sql/internal/SessionState.scala      |  5 +++--
 .../spark/sql/internal/SharedState.scala       |  2 --
 .../sql/streaming/StreamingQueryManager.scala  |  1 -
 .../spark/sql/streaming/ui/UIUtils.scala       |  1 -
 .../org/apache/spark/sql/DataFrameSuite.scala  |  1 -
 .../sql/DataFrameTimeWindowingSuite.scala      |  2 --
 .../spark/sql/DataFrameWindowFramesSuite.scala |  2 --
 .../spark/sql/DataFrameWriterV2Suite.scala     |  2 +-
 .../spark/sql/DatasetPrimitiveSuite.scala      |  1 -
 .../spark/sql/IntegratedUDFTestUtils.scala     |  1 -
 .../apache/spark/sql/PlanStabilitySuite.scala  |  1 -
 .../spark/sql/StatisticsCollectionSuite.scala  |  1 -
 ...aSourceV2DataFrameSessionCatalogSuite.scala |  1 -
 .../sql/connector/DataSourceV2SQLSuite.scala   |  2 +-
 .../SupportsCatalogOptionsSuite.scala          |  2 +-
 .../connector/TableCapabilityCheckSuite.scala  |  2 +-
 .../sql/connector/V1ReadFallbackSuite.scala    |  2 +-
 .../BaseScriptTransformationSuite.scala        |  1 -
 .../spark/sql/execution/PlannerSuite.scala     |  2 +-
 .../spark/sql/execution/SameResultSuite.scala  |  2 +-
 .../spark/sql/execution/SparkPlanTest.scala    |  1 -
 .../execution/adaptive/AdaptiveTestUtils.scala |  2 --
 .../benchmark/FilterPushdownBenchmark.scala    |  2 +-
 ...rquetNestedPredicatePushDownBenchmark.scala |  3 +--
 .../benchmark/TPCDSQueryBenchmark.scala        |  1 -
 .../execution/columnar/ColumnStatsSuite.scala  |  1 -
 .../spark/sql/execution/command/DDLSuite.scala |  1 -
 .../command/PlanResolutionSuite.scala          |  2 +-
 .../execution/datasources/ReadSchemaTest.scala |  2 +-
 .../RowDataSourceStrategySuite.scala           |  5 -----
 .../SaveIntoDataSourceCommandSuite.scala       |  1 -
 .../binaryfile/BinaryFileFormatSuite.scala     |  2 +-
 .../json/JsonParsingOptionsSuite.scala         |  5 ++---
 .../orc/OrcV2SchemaPruningSuite.scala          |  2 +-
 .../parquet/ParquetCommitterSuite.scala        |  4 +---
 .../datasources/parquet/ParquetIOSuite.scala   |  1 -
 .../parquet/ParquetInteroperabilitySuite.scala |  2 +-
 .../ParquetPartitionDiscoverySuite.scala       |  2 --
 .../parquet/ParquetSchemaSuite.scala           |  2 +-
 .../streaming/FileStreamSinkLogSuite.scala     |  1 -
 .../execution/streaming/MemorySinkSuite.scala  |  2 +-
 ...FlatMapGroupsWithStateExecHelperSuite.scala |  1 -
 .../spark/sql/internal/CatalogSuite.scala      |  2 +-
 .../spark/sql/internal/SQLConfSuite.scala      |  2 --
 .../spark/sql/sources/BucketedReadSuite.scala  |  2 +-
 .../DisableUnnecessaryBucketedScanSuite.scala  |  1 -
 .../spark/sql/sources/PathOptionSuite.scala    |  2 --
 .../FlatMapGroupsWithStateSuite.scala          |  3 +--
 .../spark/sql/streaming/StreamTest.scala       |  1 -
 .../streaming/StreamingAggregationSuite.scala  |  2 --
 .../StreamingDeduplicationSuite.scala          |  8 ++------
 .../sql/streaming/StreamingJoinSuite.scala     | 10 +---------
 .../streaming/continuous/ContinuousSuite.scala |  1 -
 .../test/DataStreamReaderWriterSuite.scala     |  2 --
 .../spark/sql/test/GenericFunSpecSuite.scala   |  2 --
 .../SparkGetSchemasOperation.scala             |  3 ---
 .../thriftserver/SparkGetTablesOperation.scala |  1 -
 .../hive/thriftserver/SparkSQLCLIService.scala |  2 --
 .../thriftserver/SparkSQLSessionManager.scala  |  4 ----
 .../thriftserver/ui/ThriftServerPage.scala     |  1 -
 .../spark/sql/hive/thriftserver/CliSuite.scala |  2 +-
 .../ThriftServerQueryTestSuite.scala           |  1 -
 .../execution/HiveCompatibilitySuite.scala     |  1 -
 .../spark/sql/hive/HiveExternalCatalog.scala   |  1 -
 .../org/apache/spark/sql/hive/HiveUtils.scala  |  1 -
 .../spark/sql/hive/client/HiveClientImpl.scala |  1 -
 .../spark/sql/hive/client/HiveShim.scala       |  3 +--
 .../sql/hive/execution/SaveAsHiveFile.scala    |  2 +-
 .../InsertIntoHiveTableBenchmark.scala         |  1 -
 .../sql/hive/HiveExternalCatalogSuite.scala    |  2 --
 .../sql/hive/HiveParquetSourceSuite.scala      |  1 -
 .../apache/spark/sql/hive/HiveShimSuite.scala  |  3 ---
 .../sql/hive/HiveShowCreateTableSuite.scala    |  2 +-
 .../spark/sql/hive/HiveSparkSubmitSuite.scala  |  2 --
 .../sql/hive/HiveUserDefinedTypeSuite.scala    |  1 -
 .../apache/spark/sql/hive/HiveUtilsSuite.scala |  3 +--
 .../spark/sql/hive/QueryPartitionSuite.scala   |  4 ----
 .../spark/sql/hive/StatisticsSuite.scala       |  2 +-
 .../hive/client/HiveClientUserNameSuite.scala  |  1 -
 .../sql/hive/execution/HiveQuerySuite.scala    |  1 -
 .../sql/hive/execution/HiveSQLViewSuite.scala  |  4 ++--
 .../HiveScriptTransformationSuite.scala        |  3 ---
 .../hive/execution/HiveTableScanSuite.scala    |  1 -
 .../sql/hive/execution/HiveUDFSuite.scala      |  1 -
 .../execution/PrunePartitionSuiteBase.scala    |  2 +-
 .../sql/hive/execution/SQLQuerySuite.scala     |  1 -
 .../sql/hive/execution/UDAQuerySuite.scala     | 12 ++----------
 .../apache/spark/sql/hive/test/TestHive.scala  |  1 -
 .../streaming/ApiStreamingRootResource.scala   |  2 --
 .../org/apache/spark/streaming/State.scala     |  2 --
 .../scheduler/ReceivedBlockTracker.scala       |  1 -
 .../streaming/ReceiverInputDStreamSuite.scala  |  1 -
 .../apache/spark/streaming/TestSuiteBase.scala |  3 +--
 .../receiver/BlockGeneratorSuite.scala         |  1 -
 .../ExecutorAllocationManagerSuite.scala       |  2 +-
 331 files changed, 225 insertions(+), 573 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
index 4d765481eb836..09fa91655fba5 100644
--- a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
+++ b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
@@ -21,7 +21,6 @@ import java.util.{Properties, Timer, TimerTask}
 
 import scala.collection.JavaConverters._
 import scala.concurrent.duration._
-import scala.language.postfixOps
 import scala.util.{Failure, Success => ScalaSuccess, Try}
 
 import org.apache.spark.annotation.{Experimental, Since}
diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index c3152d9225107..cdec1982b4487 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -35,7 +35,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.rpc.{RpcCallContext, RpcEndpoint, RpcEndpointRef, RpcEnv}
-import org.apache.spark.scheduler.{ExecutorCacheTaskLocation, MapStatus}
+import org.apache.spark.scheduler.MapStatus
 import org.apache.spark.shuffle.MetadataFetchFailedException
 import org.apache.spark.storage.{BlockId, BlockManagerId, ShuffleBlockId}
 import org.apache.spark.util._
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
index cb4eabefec32f..136da80d48dee 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
@@ -24,13 +24,8 @@ import java.nio.charset.StandardCharsets.UTF_8
 import java.util.concurrent.atomic.AtomicBoolean
 
 import scala.collection.JavaConverters._
-import scala.collection.mutable.ArrayBuffer
 import scala.util.control.NonFatal
 
-import org.json4s.JsonAST._
-import org.json4s.JsonDSL._
-import org.json4s.jackson.JsonMethods.{compact, render}
-
 import org.apache.spark._
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.{BUFFER_SIZE, EXECUTOR_CORES}
diff --git a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala
index 5a6fa507963f0..dc2587a62ae40 100644
--- a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.api.python
 
-import java.nio.ByteOrder
-import java.nio.charset.StandardCharsets
 import java.util.{ArrayList => JArrayList}
 
 import scala.collection.JavaConverters._
diff --git a/core/src/main/scala/org/apache/spark/api/r/RRunner.scala b/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
index 20ab6fc2f348d..41c66024272b9 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RRunner.scala
@@ -19,7 +19,6 @@ package org.apache.spark.api.r
 
 import java.io._
 
-import org.apache.spark._
 import org.apache.spark.broadcast.Broadcast
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala
index 17733d99cd5bc..d76fb7f9a20b3 100644
--- a/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala
@@ -22,7 +22,6 @@ import org.json4s.JsonDSL._
 
 import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, WorkerStateResponse}
 import org.apache.spark.deploy.master._
-import org.apache.spark.deploy.master.RecoveryState.MasterState
 import org.apache.spark.deploy.worker.ExecutorRunner
 import org.apache.spark.resource.{ResourceInformation, ResourceRequirement}
 
@@ -208,7 +207,8 @@ private[deploy] object JsonProtocol {
    *         master
    *         `completeddrivers` a list of Json objects of [[DriverInfo]] of the completed drivers
    *         of the master
-   *         `status` status of the master, see [[MasterState]]
+   *         `status` status of the master,
+   *         see [[org.apache.spark.deploy.master.RecoveryState.MasterState]]
    */
   def writeMasterState(obj: MasterStateResponse): JObject = {
     val aliveWorkers = obj.workers.filter(_.isAlive())
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/BasicEventFilterBuilder.scala b/core/src/main/scala/org/apache/spark/deploy/history/BasicEventFilterBuilder.scala
index c659d32d16314..57b05ff245258 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/BasicEventFilterBuilder.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/BasicEventFilterBuilder.scala
@@ -19,7 +19,6 @@ package org.apache.spark.deploy.history
 
 import scala.collection.mutable
 
-import org.apache.spark.SparkContext
 import org.apache.spark.deploy.history.EventFilter.FilterStatistics
 import org.apache.spark.internal.Logging
 import org.apache.spark.scheduler._
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index e1b0fc5e45d6e..e5341aff8ce66 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -21,7 +21,7 @@ import java.io.{File, FileNotFoundException, IOException}
 import java.lang.{Long => JLong}
 import java.nio.file.Files
 import java.util.{Date, NoSuchElementException, ServiceLoader}
-import java.util.concurrent.{ConcurrentHashMap, ExecutorService, Future, TimeUnit}
+import java.util.concurrent.{ConcurrentHashMap, ExecutorService, TimeUnit}
 import java.util.zip.ZipOutputStream
 
 import scala.collection.JavaConverters._
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HybridStore.scala b/core/src/main/scala/org/apache/spark/deploy/history/HybridStore.scala
index 58714f16e8417..1b8c7ff26e9f5 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HybridStore.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HybridStore.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.deploy.history
 
-import java.io.IOException
 import java.util.Collection
 import java.util.concurrent.ConcurrentHashMap
 import java.util.concurrent.atomic.AtomicBoolean
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index a582a5d045855..cccd3da323774 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -22,9 +22,7 @@ import java.util.{Date, Locale}
 import java.util.concurrent.{ScheduledFuture, TimeUnit}
 
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
-import scala.collection.mutable
 import scala.util.Random
-import scala.util.control.NonFatal
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
 import org.apache.spark.deploy.{ApplicationDescription, DriverDescription, ExecutorState, SparkHadoopUtil}
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
index 035f9d379471c..af94bd6d9e0f2 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.deploy.master.ui
 
 import java.net.{InetAddress, NetworkInterface, SocketException}
-import java.util.Locale
 import javax.servlet.http.{HttpServlet, HttpServletRequest, HttpServletResponse}
 
 import org.apache.spark.deploy.DeployMessages.{DecommissionWorkersOnHosts, MasterStateResponse, RequestMasterState}
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index b2bc6b3b68007..6a1fd57873c3a 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.executor
 
-import java.io.File
 import java.net.URL
 import java.nio.ByteBuffer
 import java.util.Locale
diff --git a/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala b/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala
index 62fbc166167d3..cafb39ea82ad9 100644
--- a/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala
+++ b/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala
@@ -22,7 +22,7 @@ import scala.reflect.ClassTag
 import org.apache.spark.TaskContext
 import org.apache.spark.network.buffer.ManagedBuffer
 import org.apache.spark.network.client.StreamCallbackWithID
-import org.apache.spark.storage.{BlockId, ShuffleBlockId, StorageLevel}
+import org.apache.spark.storage.{BlockId, StorageLevel}
 
 private[spark]
 trait BlockDataManager {
diff --git a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
index c7f5a97e35612..635efc3e22628 100644
--- a/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala
@@ -23,7 +23,6 @@ import scala.concurrent.{Future, Promise}
 import scala.concurrent.duration.Duration
 import scala.reflect.ClassTag
 
-import org.apache.spark.internal.Logging
 import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer, NioManagedBuffer}
 import org.apache.spark.network.shuffle.{BlockFetchingListener, BlockStoreClient, DownloadFileManager}
 import org.apache.spark.storage.{BlockId, EncryptedManagedBuffer, StorageLevel}
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
index 806fbf52795bc..828849812bbd1 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
@@ -19,9 +19,7 @@ package org.apache.spark.network.netty
 
 import java.io.IOException
 import java.nio.ByteBuffer
-import java.util
 import java.util.{HashMap => JHashMap, Map => JMap}
-import java.util.concurrent.CompletableFuture
 
 import scala.collection.JavaConverters._
 import scala.concurrent.{Future, Promise}
@@ -35,11 +33,11 @@ import org.apache.spark.ExecutorDeadException
 import org.apache.spark.internal.config
 import org.apache.spark.network._
 import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
-import org.apache.spark.network.client.{RpcResponseCallback, TransportClient, TransportClientBootstrap, TransportClientFactory}
+import org.apache.spark.network.client.{RpcResponseCallback, TransportClientBootstrap}
 import org.apache.spark.network.crypto.{AuthClientBootstrap, AuthServerBootstrap}
 import org.apache.spark.network.server._
 import org.apache.spark.network.shuffle.{BlockFetchingListener, DownloadFileManager, OneForOneBlockFetcher, RetryingBlockFetcher}
-import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, GetLocalDirsForExecutors, LocalDirsForExecutors, UploadBlock, UploadBlockStream}
+import org.apache.spark.network.shuffle.protocol.{UploadBlock, UploadBlockStream}
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.serializer.JavaSerializer
diff --git a/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala
index 324cba5b4de42..f0239cdd9136d 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala
@@ -19,7 +19,6 @@ package org.apache.spark.rdd
 
 import java.io._
 
-import scala.Serializable
 import scala.collection.Map
 import scala.collection.immutable.NumericRange
 import scala.collection.mutable.ArrayBuffer
diff --git a/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala
index 576a83f6ab4d9..5093a12777ad3 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala
@@ -20,7 +20,6 @@ package org.apache.spark.rdd
 import java.io.{FileNotFoundException, IOException}
 import java.util.concurrent.TimeUnit
 
-import scala.collection.mutable
 import scala.reflect.ClassTag
 import scala.util.control.NonFatal
 
diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceAllocator.scala b/core/src/main/scala/org/apache/spark/resource/ResourceAllocator.scala
index 482d9e94c6dd9..22d10a975ad0f 100644
--- a/core/src/main/scala/org/apache/spark/resource/ResourceAllocator.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceAllocator.scala
@@ -20,7 +20,6 @@ package org.apache.spark.resource
 import scala.collection.mutable
 
 import org.apache.spark.SparkException
-import org.apache.spark.util.collection.OpenHashMap
 
 /**
  * Trait used to help executor/worker allocate resources.
@@ -40,7 +39,7 @@ trait ResourceAllocator {
    * can be a multiple, such that each address can be allocated up to [[slotsPerAddress]]
    * times.
    *
-   * TODO Use [[OpenHashMap]] instead to gain better performance.
+   * TODO Use [[org.apache.spark.util.collection.OpenHashMap]] instead to gain better performance.
    */
   private lazy val addressAvailabilityMap = {
     mutable.HashMap(resourceAddresses.map(_ -> slotsPerAddress): _*)
diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala b/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
index 5a9435653920f..837b2d80aace6 100644
--- a/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
@@ -29,8 +29,8 @@ import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.api.resource.ResourceDiscoveryPlugin
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config.{CPUS_PER_TASK, EXECUTOR_CORES, RESOURCES_DISCOVERY_PLUGIN, SPARK_TASK_PREFIX}
-import org.apache.spark.internal.config.Tests.{RESOURCES_WARNING_TESTING, SKIP_VALIDATE_CORES_TESTING}
+import org.apache.spark.internal.config.{EXECUTOR_CORES, RESOURCES_DISCOVERY_PLUGIN, SPARK_TASK_PREFIX}
+import org.apache.spark.internal.config.Tests.{RESOURCES_WARNING_TESTING}
 import org.apache.spark.util.Utils
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index 57e219999b0d0..b939e40f3b60c 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -26,9 +26,6 @@ import scala.collection.mutable
 import scala.collection.mutable.{ArrayBuffer, Buffer, HashMap, HashSet}
 import scala.util.Random
 
-import com.google.common.base.Ticker
-import com.google.common.cache.CacheBuilder
-
 import org.apache.spark._
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.executor.ExecutorMetrics
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
index eda1cb52d4abc..e084453be0789 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
@@ -22,7 +22,6 @@ import java.nio.ByteBuffer
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.resource.{ResourceInformation, ResourceProfile}
 import org.apache.spark.rpc.RpcEndpointRef
-import org.apache.spark.scheduler.ExecutorDecommissionInfo
 import org.apache.spark.scheduler.ExecutorLossReason
 import org.apache.spark.util.SerializableBuffer
 
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
index 72460180f5908..d9b8eddcf8cd0 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
@@ -22,11 +22,9 @@ import java.util.concurrent.ConcurrentHashMap
 import scala.collection.JavaConverters._
 
 import org.apache.spark._
-import org.apache.spark.internal.{config, Logging}
-import org.apache.spark.scheduler.MapStatus
+import org.apache.spark.internal.Logging
 import org.apache.spark.shuffle._
-import org.apache.spark.shuffle.api.{ShuffleDataIO, ShuffleExecutorComponents}
-import org.apache.spark.util.Utils
+import org.apache.spark.shuffle.api.ShuffleExecutorComponents
 import org.apache.spark.util.collection.OpenHashSet
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
index 5c6543fe28a18..affa85b76cf19 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
@@ -22,8 +22,7 @@ import java.util.{List => JList}
 import scala.collection.JavaConverters._
 import scala.collection.mutable.HashMap
 
-import org.apache.spark.{JobExecutionStatus, SparkConf, SparkException}
-import org.apache.spark.resource.ResourceProfileManager
+import org.apache.spark.{JobExecutionStatus, SparkConf}
 import org.apache.spark.status.api.v1
 import org.apache.spark.ui.scope._
 import org.apache.spark.util.Utils
diff --git a/core/src/main/scala/org/apache/spark/status/KVUtils.scala b/core/src/main/scala/org/apache/spark/status/KVUtils.scala
index 45348be5c98b9..c79f2dcd86533 100644
--- a/core/src/main/scala/org/apache/spark/status/KVUtils.scala
+++ b/core/src/main/scala/org/apache/spark/status/KVUtils.scala
@@ -21,7 +21,6 @@ import java.io.File
 
 import scala.annotation.meta.getter
 import scala.collection.JavaConverters._
-import scala.language.implicitConversions
 import scala.reflect.{classTag, ClassTag}
 
 import com.fasterxml.jackson.annotation.JsonInclude
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 3909c02c5bb1f..924601f92c5b8 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -55,7 +55,6 @@ import org.apache.spark.rpc.RpcEnv
 import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.serializer.{SerializerInstance, SerializerManager}
 import org.apache.spark.shuffle.{MigratableResolver, ShuffleManager, ShuffleWriteMetricsReporter}
-import org.apache.spark.shuffle.{ShuffleManager, ShuffleWriteMetricsReporter}
 import org.apache.spark.storage.BlockManagerMessages.{DecommissionBlockManager, ReplicateBlock}
 import org.apache.spark.storage.memory._
 import org.apache.spark.unsafe.Platform
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
index 9699515c626bf..7a55039db1b60 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.storage
 
 import java.io.IOException
-import java.util.concurrent.ExecutorService
 import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.JavaConverters._
@@ -28,7 +27,7 @@ import scala.util.control.NonFatal
 import org.apache.spark._
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config
-import org.apache.spark.shuffle.{MigratableResolver, ShuffleBlockInfo}
+import org.apache.spark.shuffle.ShuffleBlockInfo
 import org.apache.spark.storage.BlockManagerMessages.ReplicateBlock
 import org.apache.spark.util.ThreadUtils
 
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
index b8c5cbd121861..a7532a9870fae 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
@@ -33,7 +33,7 @@ import org.apache.spark.{MapOutputTrackerMaster, SparkConf}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.network.shuffle.ExternalBlockStoreClient
-import org.apache.spark.rpc.{IsolatedRpcEndpoint, RpcCallContext, RpcEndpointAddress, RpcEndpointRef, RpcEnv}
+import org.apache.spark.rpc.{IsolatedRpcEndpoint, RpcCallContext, RpcEndpointRef, RpcEnv}
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.{CoarseGrainedClusterMessages, CoarseGrainedSchedulerBackend}
 import org.apache.spark.storage.BlockManagerMessages._
diff --git a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
index 78206c51c1028..d45dc937910d9 100644
--- a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
@@ -23,7 +23,6 @@ import java.util.concurrent.locks.ReentrantLock
 
 import scala.concurrent.{Awaitable, ExecutionContext, ExecutionContextExecutor, Future}
 import scala.concurrent.duration.{Duration, FiniteDuration}
-import scala.language.higherKinds
 import scala.util.control.NonFatal
 
 import com.google.common.util.concurrent.ThreadFactoryBuilder
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 7f1f3a71acab8..b743ab6507117 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -28,7 +28,7 @@ import java.nio.channels.{Channels, FileChannel, WritableByteChannel}
 import java.nio.charset.StandardCharsets
 import java.nio.file.Files
 import java.security.SecureRandom
-import java.util.{Arrays, Locale, Properties, Random, UUID}
+import java.util.{Locale, Properties, Random, UUID}
 import java.util.concurrent._
 import java.util.concurrent.TimeUnit.NANOSECONDS
 import java.util.zip.GZIPInputStream
diff --git a/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala b/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala
index fae6c4af1240c..e6d3377120e56 100644
--- a/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark
 
 import scala.concurrent.duration._
-import scala.language.implicitConversions
 
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.matchers.must.Matchers
diff --git a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
index e1d4eff0a62cb..e47181719a9db 100644
--- a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
@@ -21,7 +21,7 @@ import scala.collection.mutable
 import scala.concurrent.duration._
 
 import org.mockito.ArgumentMatchers.any
-import org.mockito.Mockito.{mock, verify, when}
+import org.mockito.Mockito.{mock, when}
 import org.scalatest.{BeforeAndAfterAll, PrivateMethodTester}
 import org.scalatest.concurrent.Eventually._
 
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileCompactorSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileCompactorSuite.scala
index ac39f022d5ca6..7d07af4d7246b 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileCompactorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileCompactorSuite.scala
@@ -23,10 +23,9 @@ import scala.io.{Codec, Source}
 import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
 import org.json4s.jackson.JsonMethods.parse
 
-import org.apache.spark.{SparkConf, SparkFunSuite, Success}
+import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.deploy.history.EventLogTestHelper.writeEventsToRollingWriter
-import org.apache.spark.executor.ExecutorMetrics
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.ExecutorInfo
 import org.apache.spark.status.ListenerEventsTestHelper._
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala
index 35de457ec48ce..be83ec12f92f5 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala
@@ -21,7 +21,6 @@ import java.io.DataOutputStream
 import java.net.{HttpURLConnection, URL}
 import java.nio.charset.StandardCharsets
 import java.util.Date
-import javax.servlet.http.HttpServletResponse
 
 import scala.collection.mutable.HashMap
 
@@ -32,7 +31,6 @@ import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.DeployMessages.{DecommissionWorkersOnHosts, KillDriverResponse, RequestKillDriver}
 import org.apache.spark.deploy.DeployTestUtils._
 import org.apache.spark.deploy.master._
-import org.apache.spark.internal.config.UI
 import org.apache.spark.rpc.{RpcEndpointRef, RpcEnv}
 
 
diff --git a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
index 8e58beff74290..31049d104e63d 100644
--- a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.executor
 
-import java.io.{Externalizable, File, ObjectInput, ObjectOutput}
+import java.io.{Externalizable, ObjectInput, ObjectOutput}
 import java.lang.Thread.UncaughtExceptionHandler
 import java.nio.ByteBuffer
 import java.util.Properties
@@ -41,7 +41,6 @@ import org.scalatestplus.mockito.MockitoSugar
 import org.apache.spark._
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.broadcast.Broadcast
-import org.apache.spark.deploy.{SimpleApplicationTest, SparkSubmitSuite}
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.memory.TestMemoryManager
@@ -53,7 +52,7 @@ import org.apache.spark.scheduler.{DirectTaskResult, FakeTask, ResultTask, Task,
 import org.apache.spark.serializer.{JavaSerializer, SerializerInstance, SerializerManager}
 import org.apache.spark.shuffle.FetchFailedException
 import org.apache.spark.storage.{BlockManager, BlockManagerId}
-import org.apache.spark.util.{LongAccumulator, UninterruptibleThread, Utils}
+import org.apache.spark.util.{LongAccumulator, UninterruptibleThread}
 
 class ExecutorSuite extends SparkFunSuite
     with LocalSparkContext with MockitoSugar with Eventually with PrivateMethodTester {
diff --git a/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala b/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
index fab7aea6c47aa..f1d7053c34594 100644
--- a/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
@@ -29,7 +29,6 @@ import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
 import org.apache.spark.internal.Logging
-import org.apache.spark.util.Utils
 
 /**
  * Tests the correctness of
diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala
index fa1a75d076051..182c3c09e0524 100644
--- a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala
@@ -24,7 +24,7 @@ import scala.reflect.ClassTag
 import scala.util.Random
 
 import org.mockito.ArgumentMatchers.any
-import org.mockito.Mockito.{mock, times, verify, when}
+import org.mockito.Mockito.{mock, when}
 import org.scalatest.BeforeAndAfterEach
 import org.scalatest.matchers.must.Matchers
 import org.scalatest.matchers.should.Matchers._
diff --git a/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala b/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala
index 278a72a7192d8..e8e8682e20ed4 100644
--- a/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala
@@ -26,10 +26,8 @@ import org.json4s.{DefaultFormats, Extraction}
 import org.apache.spark.{LocalSparkContext, SparkConf, SparkException, SparkFunSuite}
 import org.apache.spark.TestUtils._
 import org.apache.spark.internal.config._
-import org.apache.spark.internal.config.Tests._
 import org.apache.spark.resource.ResourceUtils._
 import org.apache.spark.resource.TestResourceIDs._
-import org.apache.spark.scheduler.LiveListenerBus
 import org.apache.spark.util.Utils
 
 class ResourceUtilsSuite extends SparkFunSuite
diff --git a/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
index e4ec62f8efc5b..b7ac9ecac2387 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
@@ -25,7 +25,6 @@ import org.scalatest.concurrent.Eventually
 import org.scalatest.time.SpanSugar._
 
 import org.apache.spark._
-import org.apache.spark.internal.config
 import org.apache.spark.internal.config.Tests.TEST_NO_STAGE_RETRY
 
 class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext with Eventually {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
index 47e37fc55cefe..65d51e57ee308 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
@@ -188,7 +188,6 @@ class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkCo
   }
 
   test("extra resources from executor") {
-    import TestUtils._
 
     val execCores = 3
     val conf = new SparkConf()
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index 99be1faab8b85..58aa246b7358f 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -42,7 +42,7 @@ import org.apache.spark.resource.ResourceUtils.{FPGA, GPU}
 import org.apache.spark.scheduler.SchedulingMode.SchedulingMode
 import org.apache.spark.shuffle.{FetchFailedException, MetadataFetchFailedException}
 import org.apache.spark.storage.{BlockId, BlockManagerId, BlockManagerMaster}
-import org.apache.spark.util.{AccumulatorContext, AccumulatorV2, CallSite, LongAccumulator, ThreadUtils, Utils}
+import org.apache.spark.util.{AccumulatorContext, AccumulatorV2, CallSite, LongAccumulator, Utils}
 
 class DAGSchedulerEventProcessLoopTester(dagScheduler: DAGScheduler)
   extends DAGSchedulerEventProcessLoop(dagScheduler) {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
index 0c60c42c054cf..b6a59c8bbd944 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
@@ -34,7 +34,7 @@ import org.apache.spark.internal.config
 import org.apache.spark.resource.{ExecutorResourceRequests, ResourceProfile, TaskResourceRequests}
 import org.apache.spark.resource.ResourceUtils._
 import org.apache.spark.resource.TestResourceIDs._
-import org.apache.spark.util.{Clock, ManualClock, SystemClock}
+import org.apache.spark.util.{Clock, ManualClock}
 
 class FakeSchedulerBackend extends SchedulerBackend {
   def start(): Unit = {}
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index e01e278f60205..a760dda3897df 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -1768,7 +1768,6 @@ class TaskSetManagerSuite
   }
 
   test("TaskSetManager passes task resource along") {
-    import TestUtils._
 
     sc = new SparkContext("local", "test")
     sc.conf.set(TASK_GPU_ID.amountConf, "2")
diff --git a/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala
index 4a92cbcb85847..1c2326db6dc99 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/WorkerDecommissionSuite.scala
@@ -19,14 +19,12 @@ package org.apache.spark.scheduler
 
 import java.util.concurrent.Semaphore
 
-import scala.concurrent.TimeoutException
 import scala.concurrent.duration._
 
-import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkException, SparkFunSuite,
-  TestUtils}
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkFunSuite, TestUtils}
 import org.apache.spark.internal.config
 import org.apache.spark.scheduler.cluster.StandaloneSchedulerBackend
-import org.apache.spark.util.{RpcUtils, SerializableBuffer, ThreadUtils}
+import org.apache.spark.util.ThreadUtils
 
 class WorkerDecommissionSuite extends SparkFunSuite with LocalSparkContext {
 
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala
index 9c0699bc981f8..d2bf385e10796 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.storage
 
 import java.util.Properties
 
-import scala.concurrent.{Await, ExecutionContext, Future}
+import scala.concurrent.{ExecutionContext, Future}
 import scala.language.implicitConversions
 import scala.reflect.ClassTag
 
diff --git a/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
index 48e0d218c0e5c..d02d7f862df80 100644
--- a/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.ui
 
-import java.util.Locale
 import javax.servlet.http.HttpServletRequest
 
 import scala.xml.Node
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 857749e84764d..20624c743bc22 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -18,8 +18,7 @@
 package org.apache.spark.util
 
 import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataOutput, DataOutputStream, File,
-  FileOutputStream, InputStream, PrintStream, SequenceInputStream}
-import java.lang.{Double => JDouble, Float => JFloat}
+  FileOutputStream, PrintStream, SequenceInputStream}
 import java.lang.reflect.Field
 import java.net.{BindException, ServerSocket, URI}
 import java.nio.{ByteBuffer, ByteOrder}
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
index 86d00cac9485f..487cb27b93fe8 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
@@ -41,7 +41,6 @@ object DeveloperApiExample {
       .builder
       .appName("DeveloperApiExample")
       .getOrCreate()
-    import spark.implicits._
 
     // Prepare training data.
     val training = spark.createDataFrame(Seq(
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala
index 2845028dd0814..7a7501ee84526 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala
@@ -30,7 +30,6 @@ object RankingMetricsExample {
       .builder
       .appName("RankingMetricsExample")
       .getOrCreate()
-    import spark.implicits._
     // $example on$
     // Read in the ratings data
     val ratings = spark.read.textFile("data/mllib/sample_movielens_data.txt").rdd.map { line =>
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala b/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala
index fde281087c267..b17b86c08314b 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala
@@ -42,8 +42,6 @@ object SparkSQLExample {
       .config("spark.some.config.option", "some-value")
       .getOrCreate()
 
-    // For implicit conversions like converting RDDs to DataFrames
-    import spark.implicits._
     // $example off:init_session$
 
     runBasicDataFrameExample(spark)
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
index c685c89f0dfc8..09c849960c1b5 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
@@ -27,7 +27,7 @@ import org.apache.avro.Schema.Type._
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.sql.catalyst.util.RandomUUIDGenerator
 import org.apache.spark.sql.types._
-import org.apache.spark.sql.types.Decimal.{maxPrecisionForBytes, minBytesForPrecision}
+import org.apache.spark.sql.types.Decimal.minBytesForPrecision
 
 /**
  * This object contains method that are used to convert sparkSQL schemas to avro schemas and vice
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index c9c6bcecac14e..d3bfb716f515c 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -44,7 +44,7 @@ import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.plans.logical.Filter
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{withDefaultTimeZone, LA, UTC}
 import org.apache.spark.sql.execution.{FormattedMode, SparkPlan}
-import org.apache.spark.sql.execution.datasources.{CommonFileDataSourceSuite, DataSource, FilePartition, PartitionedFile}
+import org.apache.spark.sql.execution.datasources.{CommonFileDataSourceSuite, DataSource, FilePartition}
 import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.internal.SQLConf
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala
index 9ad083f1cfde5..a1b0f7d22216b 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.kafka010
 
-import org.apache.kafka.common.TopicPartition
-
 import org.apache.spark.SparkEnv
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.Network.NETWORK_TIMEOUT
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
index 6599e7e0fe707..c25b8b4e510a0 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
@@ -19,12 +19,9 @@ package org.apache.spark.sql.kafka010
 
 import java.{util => ju}
 
-import org.apache.kafka.clients.consumer.ConsumerConfig
-
 import org.apache.spark.SparkEnv
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.Network.NETWORK_TIMEOUT
-import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.connector.read.{InputPartition, PartitionReaderFactory}
 import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, Offset, ReadAllAvailable, ReadLimit, ReadMaxRows, SupportsAdmissionControl}
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
index 6d30bd2a6d2cd..adcc20c25cb5f 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
@@ -23,7 +23,7 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 import scala.util.control.NonFatal
 
-import org.apache.kafka.clients.consumer.{Consumer, ConsumerConfig, KafkaConsumer, OffsetAndTimestamp}
+import org.apache.kafka.clients.consumer.{Consumer, ConsumerConfig, OffsetAndTimestamp}
 import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.SparkEnv
@@ -33,10 +33,12 @@ import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.util.{UninterruptibleThread, UninterruptibleThreadRunner}
 
 /**
- * This class uses Kafka's own [[KafkaConsumer]] API to read data offsets from Kafka.
+ * This class uses Kafka's own [[org.apache.kafka.clients.consumer.KafkaConsumer]] API to
+ * read data offsets from Kafka.
  * The [[ConsumerStrategy]] class defines which Kafka topics and partitions should be read
  * by this source. These strategies directly correspond to the different consumption options
- * in. This class is designed to return a configured [[KafkaConsumer]] that is used by the
+ * in. This class is designed to return a configured
+ * [[org.apache.kafka.clients.consumer.KafkaConsumer]] that is used by the
  * [[KafkaSource]] to query for the offsets. See the docs on
  * [[org.apache.spark.sql.kafka010.ConsumerStrategy]]
  * for more details.
@@ -50,7 +52,8 @@ private[kafka010] class KafkaOffsetReader(
     driverGroupIdPrefix: String) extends Logging {
 
   /**
-   * [[UninterruptibleThreadRunner]] ensures that all [[KafkaConsumer]] communication called in an
+   * [[UninterruptibleThreadRunner]] ensures that all
+   * [[org.apache.kafka.clients.consumer.KafkaConsumer]] communication called in an
    * [[UninterruptibleThread]]. In the case of streaming queries, we are already running in an
    * [[UninterruptibleThread]], however for batch mode this is not the case.
    */
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
index 413a0c4de8bea..69a66e2209773 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
@@ -17,13 +17,10 @@
 
 package org.apache.spark.sql.kafka010
 
-import org.apache.kafka.common.TopicPartition
-
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.Network.NETWORK_TIMEOUT
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{Row, SQLContext}
-import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.sources.{BaseRelation, TableScan}
 import org.apache.spark.sql.types.StructType
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index 57879c7ca31cf..71ccb5f952f0a 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -18,11 +18,7 @@
 package org.apache.spark.sql.kafka010
 
 import java.{util => ju}
-import java.io._
-import java.nio.charset.StandardCharsets
 
-import org.apache.commons.io.IOUtils
-import org.apache.kafka.clients.consumer.ConsumerConfig
 import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.SparkContext
@@ -35,7 +31,6 @@ import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.connector.read.streaming
 import org.apache.spark.sql.connector.read.streaming.{ReadAllAvailable, ReadLimit, ReadMaxRows, SupportsAdmissionControl}
 import org.apache.spark.sql.execution.streaming._
-import org.apache.spark.sql.kafka010.KafkaSource._
 import org.apache.spark.sql.kafka010.KafkaSourceProvider._
 import org.apache.spark.sql.types._
 
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index 748d623a0a32a..3ace0874674b6 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -30,7 +30,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.kafka010.KafkaConfigUpdater
 import org.apache.spark.sql.{AnalysisException, DataFrame, SaveMode, SQLContext}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, Table, TableCapability, TableProvider}
+import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, Table, TableCapability}
 import org.apache.spark.sql.connector.read.{Batch, Scan, ScanBuilder}
 import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream}
 import org.apache.spark.sql.connector.write.{BatchWrite, LogicalWriteInfo, SupportsTruncate, WriteBuilder}
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
index fe783ffe53a3b..08f673455d729 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
@@ -1178,7 +1178,6 @@ class KafkaMicroBatchV2SourceSuite extends KafkaMicroBatchSourceSuiteBase {
   }
 
   testWithUninterruptibleThread("minPartitions is supported") {
-    import testImplicits._
 
     val topic = newTopic()
     val tp = new TopicPartition(topic, 0)
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
index c5f3086b38c99..43ed4a8378a8c 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
@@ -26,7 +26,6 @@ import javax.security.auth.login.Configuration
 
 import scala.collection.JavaConverters._
 import scala.io.Source
-import scala.util.Random
 import scala.util.control.NonFatal
 
 import com.google.common.io.Files
@@ -38,13 +37,12 @@ import org.apache.hadoop.minikdc.MiniKdc
 import org.apache.hadoop.security.UserGroupInformation
 import org.apache.kafka.clients.CommonClientConfigs
 import org.apache.kafka.clients.admin._
-import org.apache.kafka.clients.consumer.KafkaConsumer
 import org.apache.kafka.clients.producer._
 import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.config.SaslConfigs
 import org.apache.kafka.common.network.ListenerName
 import org.apache.kafka.common.security.auth.SecurityProtocol.{PLAINTEXT, SASL_PLAINTEXT}
-import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}
+import org.apache.kafka.common.serialization.StringSerializer
 import org.apache.kafka.common.utils.SystemTime
 import org.apache.zookeeper.server.{NIOServerCnxnFactory, ZooKeeperServer}
 import org.apache.zookeeper.server.auth.SASLAuthenticationProvider
diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
index 307a69f9b84c5..bc790418decd3 100644
--- a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
+++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
@@ -36,7 +36,7 @@ import org.apache.kafka.common.security.auth.SecurityProtocol.{SASL_PLAINTEXT, S
 import org.apache.kafka.common.security.scram.ScramLoginModule
 import org.apache.kafka.common.security.token.delegation.DelegationToken
 
-import org.apache.spark.{SparkConf, SparkEnv}
+import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.deploy.security.HadoopDelegationTokenManager
 import org.apache.spark.internal.Logging
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockScheduler.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockScheduler.scala
index ac81f92f86109..c0724909bc350 100644
--- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockScheduler.scala
+++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockScheduler.scala
@@ -19,8 +19,6 @@ package org.apache.spark.streaming.kafka010.mocks
 
 import java.util.concurrent.{ScheduledFuture, TimeUnit}
 
-import scala.collection.mutable.PriorityQueue
-
 import kafka.utils.Scheduler
 import org.apache.kafka.common.utils.Time
 import org.jmock.lib.concurrent.DeterministicScheduler
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala
index 11e949536f2b6..770eb2d89d522 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala
@@ -21,7 +21,6 @@ import java.util.concurrent._
 import scala.util.control.NonFatal
 
 import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorCheckpointer
-import com.amazonaws.services.kinesis.clientlibrary.lib.worker.ShutdownReason
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.streaming.Duration
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala
index 8c3931a1c87fd..e778d083b3f70 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala
@@ -50,8 +50,6 @@ private[kinesis] class KinesisInputDStream[T: ClassTag](
     val metricsEnabledDimensions: Set[String]
   ) extends ReceiverInputDStream[T](_ssc) {
 
-  import KinesisReadConfigurations._
-
   private[streaming]
   override def createBlockRDD(time: Time, blockInfos: Seq[ReceivedBlockInfo]): RDD[T] = {
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/package.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/package.scala
index d26acf924c0a3..7bc86c4871cfb 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/attribute/package.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/package.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.ml
 
-import org.apache.spark.ml.attribute.{Attribute, AttributeGroup}
-
 /**
  * ==ML attributes==
  *
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala
index cd245dd723348..2c7186015d400 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala
@@ -17,13 +17,10 @@
 
 package org.apache.spark.ml.feature
 
-import scala.collection.mutable.ArrayBuilder
-
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.annotation.Since
 import org.apache.spark.ml._
-import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NominalAttribute}
 import org.apache.spark.ml.linalg._
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
@@ -31,7 +28,7 @@ import org.apache.spark.ml.stat.Summarizer
 import org.apache.spark.ml.util._
 import org.apache.spark.sql._
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.types.{StructField, StructType}
+import org.apache.spark.sql.types.StructType
 
 
 /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala
index 6ff970cc72dfd..ac63024768d77 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.ml
 
-import org.apache.spark.ml.feature.{HashingTF, IDF, IDFModel, VectorAssembler}
-
 /**
  * == Feature transformers ==
  *
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index a0e5924a7ee3a..088f6a682be82 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -31,7 +31,7 @@ import org.apache.hadoop.fs.Path
 import org.json4s.DefaultFormats
 import org.json4s.JsonDSL._
 
-import org.apache.spark.{Dependency, Partitioner, ShuffleDependency, SparkContext, SparkException}
+import org.apache.spark.{Partitioner, SparkException}
 import org.apache.spark.annotation.Since
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml.{Estimator, Model}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/TopByKeyAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/TopByKeyAggregator.scala
index 517179c0eb9ae..ed41169070c59 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/TopByKeyAggregator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/TopByKeyAggregator.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.ml.recommendation
 
-import scala.language.implicitConversions
 import scala.reflect.runtime.universe.TypeTag
 
 import org.apache.spark.sql.{Encoder, Encoders}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index f88f3fce61b33..75262ac4fe06b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -30,7 +30,6 @@ import org.apache.spark.mllib.regression._
 import org.apache.spark.mllib.util.{DataValidators, Loader, Saveable}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.storage.StorageLevel
 
 /**
  * Classification model trained using Multinomial/Binary Logistic Regression.
diff --git a/mllib/src/test/scala/org/apache/spark/ml/fpm/PrefixSpanSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/fpm/PrefixSpanSuite.scala
index 2252151af306b..cc8982f338702 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/fpm/PrefixSpanSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/fpm/PrefixSpanSuite.scala
@@ -17,7 +17,6 @@
 package org.apache.spark.ml.fpm
 
 import org.apache.spark.ml.util.MLTest
-import org.apache.spark.sql.DataFrame
 
 class PrefixSpanSuite extends MLTest {
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
index a30c47293c543..a0e17a4b40fd2 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
@@ -507,8 +507,6 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
 
     val residualDeviancesR = Array(3.809296, 3.70055)
 
-    import GeneralizedLinearRegression._
-
     var idx = 0
     val link = "log"
     val dataset = datasetPoissonLogWithZero
@@ -790,8 +788,6 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
     val expected = Seq(0.5108256, 0.1201443, 1.600000, 1.886792, 0.625, 0.530,
       -0.4700036, -0.6348783, 1.325782, 1.463641)
 
-    import GeneralizedLinearRegression._
-
     var idx = 0
     for (family <- GeneralizedLinearRegression.supportedFamilyNames.sortWith(_ < _)) {
       for (useWeight <- Seq(false, true)) {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/stat/SummarizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/stat/SummarizerSuite.scala
index 68ba57c0d5fc8..e438a4135908e 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/stat/SummarizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/stat/SummarizerSuite.scala
@@ -29,7 +29,6 @@ class SummarizerSuite extends SparkFunSuite with MLlibTestSparkContext {
 
   import testImplicits._
   import Summarizer._
-  import SummaryBuilderImpl._
 
   private case class ExpectedMetrics(
       mean: Vector,
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala
index 2a83d0aaf9699..3ca6816ce7c0d 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark.ml.tree.impl
 
 import scala.annotation.tailrec
 import scala.collection.mutable
-import scala.language.implicitConversions
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.classification.DecisionTreeClassificationModel
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/DefaultReadWriteTest.scala b/mllib/src/test/scala/org/apache/spark/ml/util/DefaultReadWriteTest.scala
index dd0139b94f098..c5bf202a2d337 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/util/DefaultReadWriteTest.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/util/DefaultReadWriteTest.scala
@@ -19,7 +19,6 @@ package org.apache.spark.ml.util
 
 import java.io.{File, IOException}
 
-import org.json4s.JNothing
 import org.scalatest.Suite
 
 import org.apache.spark.{SparkException, SparkFunSuite}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/PMMLReadWriteTest.scala b/mllib/src/test/scala/org/apache/spark/ml/util/PMMLReadWriteTest.scala
index d2c4832b12bac..19e9fe4bdb30e 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/util/PMMLReadWriteTest.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/util/PMMLReadWriteTest.scala
@@ -23,10 +23,7 @@ import org.dmg.pmml.PMML
 import org.scalatest.Suite
 
 import org.apache.spark.SparkContext
-import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.param._
-import org.apache.spark.mllib.util.MLlibTestSparkContext
-import org.apache.spark.sql.Dataset
 
 trait PMMLReadWriteTest extends TempDirectory { self: Suite =>
   /**
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
index 56d41403f74cc..8f311bbf9f840 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
@@ -20,7 +20,6 @@ package org.apache.spark.mllib.clustering
 import java.util.{ArrayList => JArrayList}
 
 import breeze.linalg.{argmax, argtopk, max, DenseMatrix => BDM}
-import org.scalatest.Assertions
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.graphx.Edge
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
index 9d7177e0a149e..0e789821aa5f3 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
@@ -22,7 +22,7 @@ import java.{util => ju}
 import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, SparseVector => BSV}
 
 import org.apache.spark.{SparkException, SparkFunSuite}
-import org.apache.spark.mllib.linalg.{DenseMatrix, DenseVector, Matrices, Matrix, SparseMatrix, SparseVector, Vectors}
+import org.apache.spark.mllib.linalg.{DenseMatrix, Matrices, Matrix, SparseMatrix}
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.util.TestingUtils._
 
diff --git a/pom.xml b/pom.xml
index ee88e11046803..3ae2e7420e154 100644
--- a/pom.xml
+++ b/pom.xml
@@ -164,6 +164,7 @@
     <commons.collections.version>3.2.2</commons.collections.version>
     <scala.version>2.12.10</scala.version>
     <scala.binary.version>2.12</scala.binary.version>
+    <scalac.arg.unused-imports>-Ywarn-unused-import</scalac.arg.unused-imports>
     <scalatest-maven-plugin.version>2.0.0</scalatest-maven-plugin.version>
     <scalafmt.parameters>--test</scalafmt.parameters>
     <!-- for now, not running scalafmt as part of default verify pipeline -->
@@ -2537,6 +2538,7 @@
               <arg>-deprecation</arg>
               <arg>-feature</arg>
               <arg>-explaintypes</arg>
+              <arg>${scalac.arg.unused-imports}</arg>
               <arg>-target:jvm-1.8</arg>
             </args>
             <jvmArgs>
@@ -3266,6 +3268,7 @@
       <properties>
         <scala.version>2.13.3</scala.version>
         <scala.binary.version>2.13</scala.binary.version>
+        <scalac.arg.unused-imports>-Wconf:cat=unused-imports:e</scalac.arg.unused-imports>
       </properties>
       <dependencyManagement>
         <dependencies>
diff --git a/repl/src/test/scala-2.12/org/apache/spark/repl/Repl2Suite.scala b/repl/src/test/scala-2.12/org/apache/spark/repl/Repl2Suite.scala
index 4ffa8beaf4740..90af9ec299efc 100644
--- a/repl/src/test/scala-2.12/org/apache/spark/repl/Repl2Suite.scala
+++ b/repl/src/test/scala-2.12/org/apache/spark/repl/Repl2Suite.scala
@@ -18,17 +18,12 @@
 package org.apache.spark.repl
 
 import java.io._
-import java.nio.file.Files
 
 import scala.tools.nsc.interpreter.SimpleReader
 
-import org.apache.log4j.{Level, LogManager, PropertyConfigurator}
 import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.{SparkContext, SparkFunSuite}
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 
 class Repl2Suite extends SparkFunSuite with BeforeAndAfterAll {
   test("propagation of local properties") {
diff --git a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
index 5428fa4ee9df7..f696e93e9cef2 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
@@ -28,7 +28,6 @@ import java.util.Collections
 import javax.tools.{JavaFileObject, SimpleJavaFileObject, ToolProvider}
 
 import scala.io.Source
-import scala.language.implicitConversions
 
 import com.google.common.io.Files
 import org.mockito.ArgumentMatchers.{any, anyString}
diff --git a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index 95d908cec5de0..6566d29d16e91 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -23,7 +23,7 @@ import java.nio.file.Files
 import org.apache.log4j.{Level, LogManager, PropertyConfigurator}
 import org.scalatest.BeforeAndAfterAll
 
-import org.apache.spark.{SparkContext, SparkFunSuite}
+import org.apache.spark.SparkFunSuite
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/EnvSecretsFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/EnvSecretsFeatureStep.scala
index d78f04dcc40e6..222e19c5e20f1 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/EnvSecretsFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/EnvSecretsFeatureStep.scala
@@ -18,7 +18,7 @@ package org.apache.spark.deploy.k8s.features
 
 import scala.collection.JavaConverters._
 
-import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder, HasMetadata}
+import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder}
 
 import org.apache.spark.deploy.k8s.{KubernetesConf, SparkPod}
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountSecretsFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountSecretsFeatureStep.scala
index f4e1a3a326729..9de7686c8a9c0 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountSecretsFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountSecretsFeatureStep.scala
@@ -16,7 +16,7 @@
  */
 package org.apache.spark.deploy.k8s.features
 
-import io.fabric8.kubernetes.api.model.{ContainerBuilder, HasMetadata, PodBuilder, VolumeBuilder, VolumeMountBuilder}
+import io.fabric8.kubernetes.api.model.{ContainerBuilder, PodBuilder, VolumeBuilder, VolumeMountBuilder}
 
 import org.apache.spark.deploy.k8s.{KubernetesConf, SparkPod}
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
index c029b248f7ea4..863cb28bc827c 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
@@ -35,7 +35,6 @@ import org.apache.spark.deploy.k8s.KubernetesUtils.addOwnerReference
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT
 import org.apache.spark.resource.ResourceProfile
-import org.apache.spark.scheduler.cluster.SchedulerBackendUtils
 import org.apache.spark.util.{Clock, Utils}
 
 private[spark] class ExecutorPodsAllocator(
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
index cc5c2f4b6325d..151e98ba17e3b 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
@@ -25,7 +25,6 @@ import io.fabric8.kubernetes.client.Config
 import org.apache.spark.SparkContext
 import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesUtils, SparkKubernetesClientFactory}
 import org.apache.spark.deploy.k8s.Config._
-import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.internal.Logging
 import org.apache.spark.scheduler.{ExternalClusterManager, SchedulerBackend, TaskScheduler, TaskSchedulerImpl}
 import org.apache.spark.util.{SystemClock, ThreadUtils}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala
index 83d9481e6f2b0..0567f32c23134 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala
@@ -21,7 +21,6 @@ import io.fabric8.kubernetes.api.model.Pod
 
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.k8s.Config._
-import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.submit.{JavaMainAppResource, MainAppResource}
 
 /**
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala
index 7c231586af935..ef57a4b861508 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.deploy.k8s
 
 import scala.collection.JavaConverters._
 
-import io.fabric8.kubernetes.api.model.{Container, ContainerBuilder, PodBuilder}
+import io.fabric8.kubernetes.api.model.{ContainerBuilder, PodBuilder}
 
 import org.apache.spark.SparkFunSuite
 
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
index 26bd317de8ec6..4d4c4baeb12c0 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
@@ -26,7 +26,6 @@ import org.mockito.Mockito.{mock, never, verify, when}
 import scala.collection.JavaConverters._
 
 import org.apache.spark.{SparkConf, SparkException, SparkFunSuite}
-import org.apache.spark.deploy.k8s._
 import org.apache.spark.internal.config.ConfigEntry
 
 abstract class PodBuilderSuite extends SparkFunSuite {
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala
index 6a7366e9c6b7a..a44d465e35087 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala
@@ -20,11 +20,8 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.k8s._
-import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.submit._
-import org.apache.spark.internal.config._
-import org.apache.spark.util.Utils
 
 class DriverCommandFeatureStepSuite extends SparkFunSuite {
 
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala
index 18afd10395566..413371d056b26 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala
@@ -25,7 +25,6 @@ import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.k8s.{KubernetesTestConf, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.submit.JavaMainAppResource
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.util.ManualClock
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStepSuite.scala
index e1c01dbdc7358..c078e69b8a14b 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStepSuite.scala
@@ -27,7 +27,6 @@ import io.fabric8.kubernetes.api.model.ConfigMap
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.submit.JavaMainAppResource
 import org.apache.spark.util.{SparkConfWithEnv, Utils}
 
 class HadoopConfDriverFeatureStepSuite extends SparkFunSuite {
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStepSuite.scala
index 41ca3a94ce7a7..094fcb39782f4 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStepSuite.scala
@@ -26,14 +26,13 @@ import com.google.common.io.Files
 import io.fabric8.kubernetes.api.model.{ConfigMap, Secret}
 import org.apache.commons.codec.binary.Base64
 import org.apache.hadoop.io.Text
-import org.apache.hadoop.security.{Credentials, UserGroupInformation}
+import org.apache.hadoop.security.UserGroupInformation
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
-import org.apache.spark.deploy.k8s.submit.JavaMainAppResource
 import org.apache.spark.internal.config._
 import org.apache.spark.util.Utils
 
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
index bbb89fd0a1c24..95ee37e3daa41 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
@@ -18,7 +18,7 @@ package org.apache.spark.deploy.k8s.features
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.SparkFunSuite
 import org.apache.spark.deploy.k8s._
 
 class MountVolumesFeatureStepSuite extends SparkFunSuite {
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
index 528b755c41605..8401f7102ad8e 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
@@ -29,7 +29,7 @@ import org.mockito.stubbing.Answer
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
-import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, KubernetesExecutorSpec, SparkPod}
+import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, KubernetesExecutorSpec}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.Fabric8Aliases._
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
index af980f0494369..0bf01e6b66427 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
@@ -21,7 +21,6 @@ import java.util.UUID
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
-import scala.collection.mutable.ArrayBuffer
 
 import io.fabric8.kubernetes.client.DefaultKubernetesClient
 import org.scalatest.concurrent.Eventually
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
index e50115d6f493f..ee44cb5f85835 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
@@ -22,7 +22,6 @@ import java.util.concurrent.CountDownLatch
 import java.util.zip.{ZipEntry, ZipOutputStream}
 
 import scala.collection.JavaConverters._
-import scala.util.Try
 
 import io.fabric8.kubernetes.client.dsl.ExecListener
 import okhttp3.Response
@@ -32,7 +31,6 @@ import org.apache.hadoop.util.VersionInfo
 
 import org.apache.spark.{SPARK_VERSION, SparkException}
 import org.apache.spark.internal.Logging
-import org.apache.spark.util.{Utils => SparkUtils}
 
 object Utils extends Logging {
 
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/cloud/KubeConfigBackend.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/cloud/KubeConfigBackend.scala
index be1834c0b5dea..0fbed4a220e68 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/cloud/KubeConfigBackend.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/cloud/KubeConfigBackend.scala
@@ -16,8 +16,6 @@
  */
 package org.apache.spark.deploy.k8s.integrationtest.backend.cloud
 
-import java.nio.file.Paths
-
 import io.fabric8.kubernetes.client.{Config, DefaultKubernetesClient}
 import io.fabric8.kubernetes.client.utils.Utils
 import org.apache.commons.lang3.StringUtils
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterUI.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterUI.scala
index c0cdcda14291f..e260fb8e25f4c 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterUI.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterUI.scala
@@ -20,7 +20,6 @@ package org.apache.spark.deploy.mesos.ui
 import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.scheduler.cluster.mesos.MesosClusterScheduler
 import org.apache.spark.ui.{SparkUI, WebUI}
-import org.apache.spark.ui.JettyUtils._
 
 /**
  * UI that displays driver results from the [[org.apache.spark.deploy.mesos.MesosClusterDispatcher]]
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
index bbe1ff495d8a6..efcef09132f5b 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
@@ -39,7 +39,7 @@ import org.apache.spark.launcher.{LauncherBackend, SparkAppHandle}
 import org.apache.spark.network.netty.SparkTransportConf
 import org.apache.spark.network.shuffle.mesos.MesosExternalBlockStoreClient
 import org.apache.spark.resource.ResourceProfile
-import org.apache.spark.rpc.{RpcEndpointAddress, RpcEndpointRef}
+import org.apache.spark.rpc.RpcEndpointAddress
 import org.apache.spark.scheduler.{ExecutorProcessLost, TaskSchedulerImpl}
 import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
 import org.apache.spark.util.Utils
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendUtil.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendUtil.scala
index 981b8e9df1747..a5a2611be3765 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendUtil.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackendUtil.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.scheduler.cluster.mesos
 
-import org.apache.mesos.Protos.{ContainerInfo, Environment, Image, NetworkInfo, Parameter, Secret,
+import org.apache.mesos.Protos.{ContainerInfo, Image, NetworkInfo, Parameter, Secret,
   TaskState => MesosTaskState, Volume}
 import org.apache.mesos.Protos.ContainerInfo.{DockerInfo, MesosInfo}
 import org.apache.mesos.Protos.Environment.Variable
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
index 2be8835f77e36..b5a360167679e 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
@@ -29,10 +29,10 @@ import scala.util.control.NonFatal
 import com.google.common.base.Splitter
 import com.google.common.io.Files
 import org.apache.mesos.{MesosSchedulerDriver, Protos, Scheduler, SchedulerDriver}
-import org.apache.mesos.Protos.{SlaveID => AgentID, TaskState => MesosTaskState, _}
+import org.apache.mesos.Protos.{TaskState => MesosTaskState, _}
 import org.apache.mesos.Protos.FrameworkInfo.Capability
 import org.apache.mesos.Protos.Resource.ReservationInfo
-import org.apache.mesos.protobuf.{ByteString, GeneratedMessageV3}
+import org.apache.mesos.protobuf.GeneratedMessageV3
 
 import org.apache.spark.{SparkConf, SparkContext, SparkException}
 import org.apache.spark.TaskState
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
index d9262bbac6586..ede39063cf1bd 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
@@ -19,12 +19,11 @@ package org.apache.spark.deploy.yarn
 
 import java.io.File
 import java.nio.ByteBuffer
-import java.util.{Collections, Locale}
+import java.util.Collections
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.{HashMap, ListBuffer}
 
-import org.apache.hadoop.HadoopIllegalArgumentException
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.io.DataOutputBuffer
 import org.apache.hadoop.security.UserGroupInformation
@@ -40,7 +39,6 @@ import org.apache.spark.{SecurityManager, SparkConf, SparkException}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.network.util.JavaUtils
-import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.util.Utils
 
 private[yarn] class ExecutorRunnable(
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/LocalityPreferredContainerPlacementStrategy.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/LocalityPreferredContainerPlacementStrategy.scala
index 5640f7ede33df..7ac5beac76e20 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/LocalityPreferredContainerPlacementStrategy.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/LocalityPreferredContainerPlacementStrategy.scala
@@ -21,7 +21,7 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable.{ArrayBuffer, HashMap, Set}
 
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.yarn.api.records.{ContainerId, Resource}
+import org.apache.hadoop.yarn.api.records.ContainerId
 import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest
 
 import org.apache.spark.SparkConf
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
index 0273de10993eb..09766bf97d8f3 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
@@ -26,7 +26,6 @@ import org.apache.hadoop.yarn.api.records.{ApplicationAccessType, ContainerId, P
 import org.apache.hadoop.yarn.util.ConverterUtils
 
 import org.apache.spark.{SecurityManager, SparkConf}
-import org.apache.spark.internal.config._
 import org.apache.spark.launcher.YarnCommandBuilderUtils
 import org.apache.spark.resource.ExecutorResourceRequest
 import org.apache.spark.util.Utils
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/launcher/YarnCommandBuilderUtils.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/launcher/YarnCommandBuilderUtils.scala
index 0c3d080cca254..d000287cb7a96 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/launcher/YarnCommandBuilderUtils.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/launcher/YarnCommandBuilderUtils.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.launcher
 
-import scala.collection.JavaConverters._
-import scala.collection.mutable.ListBuffer
 import scala.util.Properties
 
 /**
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
index 7f8dd590545c6..5b762f606112c 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
@@ -29,7 +29,6 @@ import org.scalatest.matchers.should.Matchers._
 import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.util.{ResetSystemProperties, Utils}
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index c65e181181e83..53c7f17ee6b2e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -30,7 +30,6 @@ import org.apache.spark.sql.catalyst.expressions.objects._
 import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
-import org.apache.spark.util.Utils
 
 
 /**
@@ -894,10 +893,6 @@ trait ScalaReflection extends Logging {
 
   import universe._
 
-  // The Predef.Map is scala.collection.immutable.Map.
-  // Since the map values can be mutable, we explicitly import scala.collection.Map at here.
-  import scala.collection.Map
-
   /**
    * Any codes calling `scala.reflect.api.Types.TypeApi.<:<` should be wrapped by this method to
    * clean up the Scala reflection garbage automatically. Otherwise, it will leak some objects to
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
index 6eed152e6dd77..47a45b0e529c6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.Literal._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
index d3bb72badeb13..deeb8215d22c6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, LookupCatalog, SupportsNamespaces, TableCatalog, TableChange}
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, LookupCatalog, TableCatalog, TableChange}
 
 /**
  * Resolves catalogs from the multi-part identifiers in SQL statements, and convert the statements
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
index 51eb3d033ddc4..2fa6bf0acea67 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
@@ -21,7 +21,6 @@ import org.apache.spark.sql.catalyst.catalog.SessionCatalog
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.DataType
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
index 83acfb8d4a71c..98bd84fb94bd6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
@@ -18,11 +18,10 @@
 package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.catalog.CatalogFunction
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan}
-import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, SupportsNamespaces, Table, TableCatalog}
+import org.apache.spark.sql.catalyst.plans.logical.LeafNode
+import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, Table, TableCatalog}
 
 /**
  * Holds the name of a namespace that has yet to be looked up in a catalog. It will be resolved to
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
index db930cf7890e6..5643bf8b3a9b7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.catalog
 
-import org.apache.spark.sql.catalyst.analysis.{FunctionAlreadyExistsException, NoSuchDatabaseException, NoSuchFunctionException, NoSuchPartitionException, NoSuchTableException}
+import org.apache.spark.sql.catalyst.analysis.{FunctionAlreadyExistsException, NoSuchDatabaseException, NoSuchFunctionException, NoSuchTableException}
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.types.StructType
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
index 3d5c1855f6975..9ab38044e6a88 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.catalyst.encoders
 
-import java.io.ObjectInputStream
-
 import scala.reflect.ClassTag
 import scala.reflect.runtime.universe.{typeTag, TypeTag}
 
@@ -33,7 +31,7 @@ import org.apache.spark.sql.catalyst.expressions.objects.{AssertNotNull, Initial
 import org.apache.spark.sql.catalyst.optimizer.{ReassignLambdaVariableID, SimplifyCasts}
 import org.apache.spark.sql.catalyst.plans.logical.{CatalystSerde, DeserializeToObject, LeafNode, LocalRelation}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{DataType, ObjectType, StringType, StructField, StructType}
+import org.apache.spark.sql.types.{ObjectType, StringType, StructField, StructType}
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.Utils
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 1257cf6e787ce..5afc308e52ead 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import java.math.{BigDecimal => JavaBigDecimal}
 import java.time.ZoneId
 import java.util.Locale
 import java.util.concurrent.TimeUnit._
@@ -25,7 +24,7 @@ import java.util.concurrent.TimeUnit._
 import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion}
-import org.apache.spark.sql.catalyst.expressions.Cast.{canCast, forceNullable, resolvableNullability}
+import org.apache.spark.sql.catalyst.expressions.Cast.{forceNullable, resolvableNullability}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.util._
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 35b192cc5544a..1d23953484046 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -24,9 +24,7 @@ import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult
 import org.apache.spark.sql.catalyst.expressions.aggregate.DeclarativeAggregate
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.trees.TreeNode
-import org.apache.spark.sql.catalyst.util.toPrettySQL
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -63,7 +61,8 @@ import org.apache.spark.sql.types._
  *                            functions.
  * - [[NamedExpression]]: An [[Expression]] that is named.
  * - [[TimeZoneAwareExpression]]: A common base trait for time zone aware expressions.
- * - [[SubqueryExpression]]: A base interface for expressions that contain a [[LogicalPlan]].
+ * - [[SubqueryExpression]]: A base interface for expressions that contain a
+ *                           [[org.apache.spark.sql.catalyst.plans.logical.LogicalPlan]].
  *
  * - [[LeafExpression]]: an expression that has no child.
  * - [[UnaryExpression]]: an expression that has one child.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
index 6e2bd96784b94..0a69d5aa6b9ad 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.types.{AbstractDataType, AnyDataType, DataType, UserDefinedType}
+import org.apache.spark.sql.types.{AbstractDataType, AnyDataType, DataType}
 import org.apache.spark.util.Utils
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 9aa827a58d87a..1ff4a93cf0acd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -38,9 +38,8 @@ import org.apache.spark.metrics.source.CodegenMetrics
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData, MapData, SQLOrderingUtil}
+import org.apache.spark.sql.catalyst.util.{ArrayData, MapData, SQLOrderingUtil}
 import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_MILLIS
-import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.Platform
@@ -1555,8 +1554,8 @@ object CodeGenerator extends Logging {
   }
 
   /**
-   * Generates code creating a [[UnsafeArrayData]] or [[GenericArrayData]] based on
-   * given parameters.
+   * Generates code creating a [[UnsafeArrayData]] or
+   * [[org.apache.spark.sql.catalyst.util.GenericArrayData]] based on given parameters.
    *
    * @param arrayName name of the array to create
    * @param elementType data type of the elements in source array
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
index 7404030b661c8..c246d07f189b4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.catalyst.expressions.codegen
 
-import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala
index 070570d8f20b2..27b1f89f70870 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala
@@ -17,12 +17,8 @@
 
 package org.apache.spark.sql.catalyst.expressions.codegen
 
-import scala.collection.mutable
-import scala.collection.mutable.ArrayBuffer
-
 import org.apache.spark.sql.catalyst.expressions.{Attribute, UnsafeRow}
 import org.apache.spark.sql.types.StructType
-import org.apache.spark.unsafe.Platform
 
 abstract class UnsafeRowJoiner {
   def join(row1: UnsafeRow, row2: UnsafeRow): UnsafeRow
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
index 9fef8e9415e72..4454afb6c099b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
@@ -23,7 +23,7 @@ import java.util.concurrent.atomic.AtomicReference
 import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion, UnresolvedAttribute, UnresolvedException}
+import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion, UnresolvedException}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.internal.SQLConf
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index f440534745ba1..53d6394d0d1f1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -18,14 +18,11 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import scala.collection.immutable.TreeSet
-import scala.collection.mutable
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.catalyst.CatalystTypeConverters.convertToScala
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReference
-import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LeafNode, LogicalPlan, Project}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonFilters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonFilters.scala
index d6adbe83584e3..0d5974af19ac3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonFilters.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonFilters.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.catalyst.json
 
 import org.apache.spark.sql.catalyst.{InternalRow, StructFilters}
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources
 import org.apache.spark.sql.types.StructType
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
index 3dd79d153c236..0ff11ca49f3d1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.catalyst.optimizer
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.types.StructType
 
 /**
  * Simplify redundant [[CreateNamedStruct]], [[CreateArray]] and [[CreateMap]] expressions.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala
index bfc36ec477a73..4434c29cbb3c4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala
@@ -17,10 +17,10 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import org.apache.spark.sql.catalyst.expressions.{Alias, And, ArrayTransform, CaseWhen, Coalesce, CreateArray, CreateMap, CreateNamedStruct, CreateStruct, EqualTo, ExpectsInputTypes, Expression, GetStructField, If, IsNull, KnownFloatingPointNormalized, LambdaFunction, Literal, NamedLambdaVariable, UnaryExpression}
+import org.apache.spark.sql.catalyst.expressions.{Alias, And, ArrayTransform, CaseWhen, Coalesce, CreateArray, CreateMap, CreateNamedStruct, EqualTo, ExpectsInputTypes, Expression, GetStructField, If, IsNull, KnownFloatingPointNormalized, LambdaFunction, Literal, NamedLambdaVariable, UnaryExpression}
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
 import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Subquery, Window}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Window}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.types._
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
index 33b398e11cde9..ef3de4738c75c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
@@ -22,7 +22,6 @@ import org.apache.spark.sql.catalyst.expressions.{LambdaFunction, Literal, MapFi
 import org.apache.spark.sql.catalyst.expressions.Literal.FalseLiteral
 import org.apache.spark.sql.catalyst.plans.logical.{Filter, Join, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.BooleanType
 import org.apache.spark.util.Utils
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
index 76b9bd03f216c..9aa7e3201ab1b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import java.time.LocalDate
-
 import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.expressions._
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
index cb076f6e35184..11532d22204a4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.catalyst.optimizer
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis.CleanupAliases
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.SubExprUtils._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala
index d8d18b46bcc74..2c6a716a2ed48 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala
@@ -17,10 +17,9 @@
 
 package org.apache.spark.sql.catalyst.plans.logical
 
-import org.apache.spark.sql.catalyst.analysis.CheckAnalysis
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
-import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, TreeNode}
+import org.apache.spark.sql.catalyst.trees.CurrentOrigin
 import org.apache.spark.util.Utils
 
 
@@ -33,7 +32,7 @@ import org.apache.spark.util.Utils
  * analyzed flag set to true.
  *
  * The analyzer rules should use the various resolve methods, in lieu of the various transform
- * methods defined in [[TreeNode]] and [[QueryPlan]].
+ * methods defined in [[org.apache.spark.sql.catalyst.trees.TreeNode]] and [[QueryPlan]].
  *
  * To prevent accidental use of the transform methods, this trait also overrides the transform
  * methods to throw exceptions in test mode, if they are used in the analyzer.
@@ -44,7 +43,8 @@ trait AnalysisHelper extends QueryPlan[LogicalPlan] { self: LogicalPlan =>
 
   /**
    * Recursively marks all nodes in this plan tree as analyzed.
-   * This should only be called by [[CheckAnalysis]].
+   * This should only be called by
+   * [[org.apache.spark.sql.catalyst.analysis.CheckAnalysis]].
    */
   private[catalyst] def setAnalyzed(): Unit = {
     if (!_analyzed) {
@@ -155,7 +155,7 @@ trait AnalysisHelper extends QueryPlan[LogicalPlan] { self: LogicalPlan =>
    * In analyzer, use [[resolveOperatorsDown()]] instead. If this is used in the analyzer,
    * an exception will be thrown in test mode. It is however OK to call this function within
    * the scope of a [[resolveOperatorsDown()]] call.
-   * @see [[TreeNode.transformDown()]].
+   * @see [[org.apache.spark.sql.catalyst.trees.TreeNode.transformDown()]].
    */
   override def transformDown(rule: PartialFunction[LogicalPlan, LogicalPlan]): LogicalPlan = {
     assertNotAnalysisRule()
@@ -164,7 +164,7 @@ trait AnalysisHelper extends QueryPlan[LogicalPlan] { self: LogicalPlan =>
 
   /**
    * Use [[resolveOperators()]] in the analyzer.
-   * @see [[TreeNode.transformUp()]]
+   * @see [[org.apache.spark.sql.catalyst.trees.TreeNode.transformUp()]]
    */
   override def transformUp(rule: PartialFunction[LogicalPlan, LogicalPlan]): LogicalPlan = {
     assertNotAnalysisRule()
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
index 49f89bed154bb..1346f80247a1f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Statistics.scala
@@ -20,18 +20,10 @@ package org.apache.spark.sql.catalyst.plans.logical
 import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
 import java.math.{MathContext, RoundingMode}
 
-import scala.util.control.NonFatal
-
 import net.jpountz.lz4.{LZ4BlockInputStream, LZ4BlockOutputStream}
 
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.catalog.CatalogColumnStat
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate._
-import org.apache.spark.sql.catalyst.util.{ArrayData, DateTimeUtils}
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 4e7923b45822b..f96e07863fa69 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -17,17 +17,14 @@
 
 package org.apache.spark.sql.catalyst.plans.logical
 
-import scala.collection.mutable
-
 import org.apache.spark.sql.catalyst.AliasIdentifier
-import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation}
+import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable}
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction}
+import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, RangePartitioning, RoundRobinPartitioning}
 import org.apache.spark.sql.catalyst.util.truncatedString
-import org.apache.spark.sql.connector.catalog.Identifier
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.random.RandomSampler
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
index a325b61fcc5a9..4b5e278fccdfb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.catalyst.plans.logical
 
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.util.Utils
 
 /**
  * A general hint for the child that is not yet resolved. This node is generated by the parser and
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ProjectEstimation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ProjectEstimation.scala
index 6925423f003ba..8e58c4f314df0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ProjectEstimation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ProjectEstimation.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.plans.logical.statsEstimation
 
-import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap}
+import org.apache.spark.sql.catalyst.expressions.AttributeMap
 import org.apache.spark.sql.catalyst.plans.logical.{Project, Statistics}
 
 object ProjectEstimation {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 1ab7bbdcff697..ff2b366a9bc75 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -33,7 +33,6 @@ import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat,
 import org.apache.spark.sql.catalyst.errors._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.JoinType
-import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.physical.{BroadcastMode, Partitioning}
 import org.apache.spark.sql.catalyst.util.StringUtils.PlanStringConcat
 import org.apache.spark.sql.catalyst.util.truncatedString
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala
index ebbf241088f80..44203316edd94 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala
@@ -22,7 +22,6 @@ import scala.reflect.ClassTag
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{SpecializedGetters, UnsafeArrayData}
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.Platform
 import org.apache.spark.unsafe.array.ByteArrayMethods
 
 object ArrayData {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala
index 1a78422e57a4c..46860ae1771de 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.util
 
-import java.time.{LocalDate, LocalDateTime, LocalTime, ZoneId}
+import java.time.{LocalDate, LocalDateTime, LocalTime}
 import java.time.temporal.ChronoField
 import java.util.{Calendar, TimeZone}
 import java.util.Calendar.{DAY_OF_MONTH, DST_OFFSET, ERA, HOUR_OF_DAY, MINUTE, MONTH, SECOND, YEAR, ZONE_OFFSET}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
index b09ccff39f842..f541411daeff4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
@@ -22,9 +22,8 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, MetadataColumn, SupportsMetadataColumns, Table, TableCapability}
-import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, Statistics => V2Statistics, SupportsReportStatistics}
+import org.apache.spark.sql.connector.read.{Scan, Statistics => V2Statistics, SupportsReportStatistics}
 import org.apache.spark.sql.connector.read.streaming.{Offset, SparkDataStream}
-import org.apache.spark.sql.connector.write.WriteBuilder
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.Utils
@@ -36,8 +35,9 @@ import org.apache.spark.util.Utils
  * @param output the output attributes of this relation.
  * @param catalog catalogPlugin for the table. None if no catalog is specified.
  * @param identifier the identifier for the table. None if no identifier is defined.
- * @param options The options for this table operation. It's used to create fresh [[ScanBuilder]]
- *                and [[WriteBuilder]].
+ * @param options The options for this table operation. It's used to create fresh
+ *                [[org.apache.spark.sql.connector.read.ScanBuilder]] and
+ *                [[org.apache.spark.sql.connector.write.WriteBuilder]].
  */
 case class DataSourceV2Relation(
     table: Table,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
index 043c88f88843c..7556a19f0d316 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
@@ -31,7 +31,7 @@ import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.Resolver
 import org.apache.spark.sql.catalyst.expressions.{Cast, Expression}
-import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.util.DataTypeJsonUtils.{DataTypeJsonDeserializer, DataTypeJsonSerializer}
 import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat
 import org.apache.spark.sql.internal.SQLConf
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
index 6be6d81ec3bb7..960e174f9c368 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.types
 
-import java.lang.{Long => JLong}
 import java.math.{BigDecimal => JavaBigDecimal, BigInteger, MathContext, RoundingMode}
 
 import scala.util.Try
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowJsonSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowJsonSuite.scala
index ac18b0f79b5f3..1962fca66c059 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowJsonSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowJsonSuite.scala
@@ -17,14 +17,13 @@
 package org.apache.spark.sql
 
 import java.sql.{Date, Timestamp}
-import java.time.{Instant, LocalDate}
+import java.time.LocalDate
 
 import org.json4s.JsonAST.{JArray, JBool, JDecimal, JDouble, JLong, JNull, JObject, JString, JValue}
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.encoders.{ExamplePoint, ExamplePointUDT}
 import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
index e8c7aed6d72ce..164bbd7f34d04 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
@@ -134,7 +134,6 @@ object ScroogeLikeExample {
 }
 
 trait ScroogeLikeExample extends Product1[Int] with Serializable {
-  import ScroogeLikeExample._
 
   def x: Int
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
index f433229595e9e..1c849fa21e4ea 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.plans.logical.{CreateTableAsSelect, LeafNode}
 import org.apache.spark.sql.connector.InMemoryTableCatalog
 import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
-import org.apache.spark.sql.connector.expressions.{Expressions, LogicalExpressions}
+import org.apache.spark.sql.connector.expressions.Expressions
 import org.apache.spark.sql.types.{DoubleType, LongType, StringType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala
index e449b9669cc72..ea2284e5420bd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelperSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelperSuite.scala
index 8cf41a02320d2..7566545f98355 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelperSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelperSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet}
 import org.apache.spark.sql.catalyst.optimizer.SimpleTestOptimizer
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, Filter, LeafNode, LocalRelation}
+import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, Filter, LeafNode}
 import org.apache.spark.sql.types.{IntegerType, MetadataBuilder, TimestampType}
 
 class StreamingJoinHelperSuite extends AnalysisTest {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
index 918db903a783f..3be417de472c6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
@@ -32,7 +32,6 @@ import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.{IntegerType, LongType, MetadataBuilder}
-import org.apache.spark.unsafe.types.CalendarInterval
 
 /** A dummy command for testing unsupported operations. */
 case class DummyCommand() extends Command
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
index ff33324c3bb18..bc2b93e5390da 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
@@ -28,7 +28,7 @@ import scala.util.Random
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
 import org.apache.spark.sql.{RandomDataGenerator, Row}
-import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, JavaTypeInference, ScalaReflection}
+import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.ScroogeLikeExample
 import org.apache.spark.sql.catalyst.analysis.{ResolveTimeZone, SimpleAnalyzer, UnresolvedDeserializer}
 import org.apache.spark.sql.catalyst.dsl.expressions._
@@ -37,9 +37,8 @@ import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjectio
 import org.apache.spark.sql.catalyst.expressions.objects._
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project}
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, DateTimeUtils, GenericArrayData, IntervalUtils}
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
+import org.apache.spark.unsafe.types.UTF8String
 
 class InvokeTargetClass extends Serializable {
   def filterInt(e: Any): Any = e.asInstanceOf[Int] > 0
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala
index f56ec49724adb..64b619ca7766b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala
@@ -17,7 +17,6 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.types.IntegerType
 
 class SubExprEvaluationRuntimeSuite extends SparkFunSuite {
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentileSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentileSuite.scala
index 303fa137d8925..53e8ee9fbe715 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentileSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentileSuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
 import org.apache.spark.sql.catalyst.util.{ArrayData, QuantileSummaries}
 import org.apache.spark.sql.catalyst.util.QuantileSummaries.Stats
-import org.apache.spark.sql.types.{ArrayType, Decimal, DecimalType, DoubleType, FloatType, IntegerType, IntegralType, LongType}
+import org.apache.spark.sql.types.{ArrayType, Decimal, DecimalType, DoubleType, FloatType, IntegerType, IntegralType}
 import org.apache.spark.util.SizeEstimator
 
 class ApproximatePercentileSuite extends SparkFunSuite {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala
index 67e3bc69543e8..d660afb7f8a05 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions.codegen
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.types.{BooleanType, IntegerType}
+import org.apache.spark.sql.types.IntegerType
 
 class CodeBlockSuite extends SparkFunSuite {
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala
index f40691bd1a038..51c751923e414 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.optimizer
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.plans.PlanTest
-import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Expand, LocalRelation, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 
 class EliminateDistinctSuite extends PlanTest {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
index 11ec037c94f73..c518fdded2112 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
@@ -25,8 +25,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{BooleanType, IntegerType, StringType, TimestampType}
+import org.apache.spark.sql.types.{IntegerType, StringType}
 import org.apache.spark.unsafe.types.CalendarInterval
 
 class FilterPushdownSuite extends PlanTest {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala
index 8785bc7cd36cb..17dfc7f3f18f7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.PlanTest
-import org.apache.spark.sql.catalyst.plans.logical.{Filter, LocalRelation, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 
 class PullupCorrelatedPredicatesSuite extends PlanTest {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyCastsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyCastsSuite.scala
index 0ccf8aea660b2..c981cee55d0fa 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyCastsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyCastsSuite.scala
@@ -17,10 +17,8 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import org.apache.spark.sql.catalyst.dsl._
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
index 9878969959bfd..dcd2fbbf00529 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.catalyst.plans.PlanTest
-import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, OneRowRelation, Project, Range}
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.util.GenericArrayData
 import org.apache.spark.sql.types._
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 4ac5c8d0561d9..f93c0dcf59f4c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -21,12 +21,11 @@ import java.util.Locale
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedNamespace, UnresolvedPartitionSpec, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView}
-import org.apache.spark.sql.catalyst.catalog.{ArchiveResource, BucketSpec, FileResource, FunctionResource, FunctionResourceType, JarResource}
+import org.apache.spark.sql.catalyst.catalog.{ArchiveResource, BucketSpec, FileResource, FunctionResource, JarResource}
 import org.apache.spark.sql.catalyst.expressions.{EqualTo, Literal}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition.{after, first}
 import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform}
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType, TimestampType}
 import org.apache.spark.unsafe.types.UTF8String
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala
index 1cf888519077a..878fae4c547b3 100755
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
 import org.apache.spark.sql.catalyst.plans.LeftOuter
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.{ColumnStatsMap, FilterEstimation}
+import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.ColumnStatsMap
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.types._
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 30792c9bacd53..c164835c753e8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql
 
 import scala.collection.JavaConverters._
-import scala.language.implicitConversions
 
 import org.apache.spark.annotation.Stable
 import org.apache.spark.internal.Logging
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 991f02d43bc47..31b4c158aa67b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -470,7 +470,6 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
   def insertInto(tableName: String): Unit = {
     import df.sparkSession.sessionState.analyzer.{AsTableIdentifier, NonSessionCatalogAndIdentifier, SessionCatalogAndIdentifier}
     import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-    import org.apache.spark.sql.connector.catalog.CatalogV2Util._
 
     assertNotBucketed("insertInto")
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 3d431d6ff13a9..2c38a65ac2106 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -21,7 +21,6 @@ import java.io.{ByteArrayOutputStream, CharArrayWriter, DataOutputStream}
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
-import scala.language.implicitConversions
 import scala.reflect.runtime.universe.TypeTag
 import scala.util.control.NonFatal
 
@@ -63,7 +62,7 @@ import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.SchemaUtils
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.unsafe.array.ByteArrayMethods
-import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
+import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.Utils
 
 private[sql] object Dataset {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
index 7e430b682faf4..c40ce0f4777c6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql
 import java.util.Locale
 
 import scala.collection.JavaConverters._
-import scala.language.implicitConversions
 
 import org.apache.spark.annotation.Stable
 import org.apache.spark.api.python.PythonEvalType
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala b/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
index e9bc25d489718..2f46fa8073bbc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
@@ -18,10 +18,8 @@
 package org.apache.spark.sql
 
 import org.apache.spark.annotation.Stable
-import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.{ConfigEntry, OptionalConfigEntry}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.SQLConf.{DeprecatedConfig, RemovedConfig}
 
 /**
  * Runtime configuration interface for Spark. To access this, use `SparkSession.conf`.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index 0f6ae9c5d44e1..cceb38558946e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -30,9 +30,9 @@ import org.apache.spark.sql.catalyst.{JavaTypeInference, ScalaReflection}
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUDF}
-import org.apache.spark.sql.execution.aggregate.{ScalaAggregator, ScalaUDAF}
+import org.apache.spark.sql.execution.aggregate.ScalaUDAF
 import org.apache.spark.sql.execution.python.UserDefinedPythonFunction
-import org.apache.spark.sql.expressions.{Aggregator, SparkUserDefinedFunction, UserDefinedAggregateFunction, UserDefinedAggregator, UserDefinedFunction}
+import org.apache.spark.sql.expressions.{SparkUserDefinedFunction, UserDefinedAggregateFunction, UserDefinedAggregator, UserDefinedFunction}
 import org.apache.spark.sql.types.DataType
 import org.apache.spark.util.Utils
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
index c6a644f9f2e29..1436574c0d90a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalog
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.annotation.{Evolving, Experimental, Stable}
+import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.storage.StorageLevel
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index bd9120a1fbe78..303ae47f06b84 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, Identifier, LookupCatalog, SupportsNamespaces, SupportsPartitionManagement, TableCatalog, TableChange, V1Table}
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, Identifier, LookupCatalog, SupportsNamespaces, TableCatalog, TableChange, V1Table}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index 5f72d6005a8dd..f163d85914bc9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, SubqueryExpression}
 import org.apache.spark.sql.catalyst.optimizer.EliminateResolvedHint
 import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan, ResolvedHint}
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
-import org.apache.spark.sql.execution.columnar.{DefaultCachedBatchSerializer, InMemoryRelation}
+import org.apache.spark.sql.execution.columnar.InMemoryRelation
 import org.apache.spark.sql.execution.command.CommandUtils
 import org.apache.spark.sql.execution.datasources.{FileIndex, HadoopFsRelation, LogicalRelation}
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileTable}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CollectMetricsExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CollectMetricsExec.scala
index e1b9c8f430c56..b0bbb52bc4990 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CollectMetricsExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CollectMetricsExec.scala
@@ -16,8 +16,6 @@
  */
 package org.apache.spark.sql.execution
 
-import scala.collection.mutable
-
 import org.apache.spark.TaskContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.Row
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index 45d28ddb42fc3..44636beeec7fc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -25,7 +25,6 @@ import org.apache.commons.lang3.StringUtils
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.expressions._
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
index dcec0b019da28..08950c827f5aa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
@@ -22,7 +22,7 @@ import java.sql.{Date, Timestamp}
 import java.time.{Instant, LocalDate, ZoneOffset}
 
 import org.apache.spark.sql.Row
-import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, LegacyDateFormats, TimestampFormatter}
+import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter}
 import org.apache.spark.sql.execution.command.{DescribeCommandBase, ExecutedCommandExec, ShowTablesCommand, ShowViewsCommand}
 import org.apache.spark.sql.execution.datasources.v2.{DescribeTableExec, ShowTablesExec}
 import org.apache.spark.sql.internal.SQLConf
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala
index 8746cc6f650d7..bbe3f50492d9f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala
@@ -22,7 +22,6 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.{Final, PartialMerge}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.aggregate.BaseAggregateExec
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanExecBase
-import org.apache.spark.sql.execution.window.WindowExec
 import org.apache.spark.sql.internal.SQLConf
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 6c42c051fbba6..85476bcd21e19 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -27,7 +27,7 @@ import org.antlr.v4.runtime.{ParserRuleContext, Token}
 import org.antlr.v4.runtime.tree.TerminalNode
 
 import org.apache.spark.sql.SaveMode
-import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.parser._
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index ba3d83714c302..e9b1aa81895f5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -37,7 +37,7 @@ import org.apache.spark.sql.execution.python._
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.sources.MemoryPlan
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.streaming.{OutputMode, StreamingQuery}
+import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -312,8 +312,9 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
 
   /**
    * Used to plan streaming aggregation queries that are computed incrementally as part of a
-   * [[StreamingQuery]]. Currently this rule is injected into the planner
-   * on-demand, only when planning in a [[org.apache.spark.sql.execution.streaming.StreamExecution]]
+   * [[org.apache.spark.sql.streaming.StreamingQuery]]. Currently this rule is injected into the
+   * planner on-demand, only when planning in a
+   * [[org.apache.spark.sql.execution.streaming.StreamExecution]]
    */
   object StatefulAggregationStrategy extends Strategy {
     override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index a8905ca530005..b2963457e22db 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -29,7 +29,6 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.aggregate.HashAggregateExec
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/DemoteBroadcastHashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/DemoteBroadcastHashJoin.scala
index 011acbf1b22a4..3760782515e97 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/DemoteBroadcastHashJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/DemoteBroadcastHashJoin.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.adaptive
 
 import org.apache.spark.sql.catalyst.plans.logical.{HintInfo, Join, LogicalPlan, NO_BROADCAST_HASH}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.internal.SQLConf
 
 /**
  * This optimization rule detects a join child that has a high ratio of empty partitions and
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStage.scala
index 9914eddd53a3d..bff142315f8ff 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStage.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.adaptive
 
 import org.apache.spark.sql.catalyst.expressions.{Attribute, SortOrder}
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
-import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.execution.SparkPlan
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ReuseAdaptiveSubquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ReuseAdaptiveSubquery.scala
index c3c7358641fcb..71540dbd39f95 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ReuseAdaptiveSubquery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ReuseAdaptiveSubquery.scala
@@ -21,7 +21,6 @@ import scala.collection.concurrent.TrieMap
 
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.{BaseSubqueryExec, ExecSubqueryExpression, ReusedSubqueryExec, SparkPlan}
-import org.apache.spark.sql.internal.SQLConf
 
 case class ReuseAdaptiveSubquery(
     reuseMap: TrieMap[SparkPlan, BaseSubqueryExec]) extends Rule[SparkPlan] {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/simpleCosting.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/simpleCosting.scala
index cdc57dbc7dcc2..aae3d922b28a5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/simpleCosting.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/simpleCosting.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.execution.adaptive
 
 import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.execution.exchange.{ShuffleExchangeExec, ShuffleExchangeLike}
+import org.apache.spark.sql.execution.exchange.ShuffleExchangeLike
 
 /**
  * A simple implementation of [[Cost]], which takes a number of [[Long]] as the cost value.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala
index 75651500954cf..1c140d7b6955f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala
@@ -28,7 +28,6 @@ import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.unsafe.KVIterator
-import org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter
 
 class ObjectAggregationIterator(
     partIndex: Int,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationMap.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationMap.scala
index b5372bcca89dd..9f2cf84a6d7e6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationMap.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationMap.scala
@@ -26,7 +26,6 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, UnsafeProjection, U
 import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateFunction, TypedImperativeAggregate}
 import org.apache.spark.sql.execution.UnsafeKVExternalSorter
 import org.apache.spark.sql.types.StructType
-import org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter
 
 /**
  * An aggregation map that supports using safe `SpecificInternalRow`s aggregation buffers, so that
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala
index 492b0f2da77cb..deb9e76c51760 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationIterator.scala
@@ -19,12 +19,13 @@ package org.apache.spark.sql.execution.aggregate
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction}
+import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.execution.metric.SQLMetric
 
 /**
- * An iterator used to evaluate [[AggregateFunction]]. It assumes the input rows have been
- * sorted by values of [[groupingExpressions]].
+ * An iterator used to evaluate
+ * [[org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction]].
+ * It assumes the input rows have been sorted by values of [[groupingExpressions]].
  */
 class SortBasedAggregationIterator(
     partIndex: Int,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala
index 44bc9c2e3a9d0..41e247a02759b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala
@@ -17,16 +17,12 @@
 
 package org.apache.spark.sql.execution.aggregate
 
-import scala.reflect.runtime.universe.TypeTag
-
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{Column, Row}
+import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, _}
-import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete}
 import org.apache.spark.sql.catalyst.expressions.aggregate.{ImperativeAggregate, TypedImperativeAggregate}
-import org.apache.spark.sql.catalyst.expressions.codegen.{GenerateMutableProjection, GenerateSafeProjection}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.expressions.{Aggregator, MutableAggregationBuffer, UserDefinedAggregateFunction}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index 7334ea1e27284..006fa0fba4138 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -21,7 +21,7 @@ import java.util.concurrent.{Future => JFuture}
 import java.util.concurrent.TimeUnit._
 
 import scala.collection.mutable
-import scala.concurrent.{ExecutionContext}
+import scala.concurrent.ExecutionContext
 import scala.concurrent.duration.Duration
 
 import org.apache.spark.{InterruptibleIterator, Partition, SparkContext, TaskContext}
@@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution.metric.SQLMetrics
 import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 import org.apache.spark.sql.types.{LongType, StructType}
-import org.apache.spark.util.{ThreadUtils, Utils}
+import org.apache.spark.util.ThreadUtils
 import org.apache.spark.util.random.{BernoulliCellSampler, PoissonSampler}
 
 /** Physical plan for Project. */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoin.scala
index 40a2a7a2359e0..a4e5be01b45a2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoin.scala
@@ -26,7 +26,6 @@ import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partition
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.{FileSourceScanExec, FilterExec, ProjectExec, SparkPlan}
 import org.apache.spark.sql.execution.joins.{BaseJoinExec, ShuffledHashJoinExec, SortMergeJoinExec}
-import org.apache.spark.sql.internal.SQLConf
 
 /**
  * This rule coalesces one side of the `SortMergeJoin` and `ShuffledHashJoin`
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala
index bb59f44abc761..6b195b3b49f09 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/bucketing/DisableUnnecessaryBucketedScan.scala
@@ -22,7 +22,6 @@ import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.{FileSourceScanExec, FilterExec, ProjectExec, SortExec, SparkPlan}
 import org.apache.spark.sql.execution.aggregate.BaseAggregateExec
 import org.apache.spark.sql.execution.exchange.Exchange
-import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Disable unnecessary bucketed table scan based on actual physical query plan.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnStats.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnStats.scala
index 45557bfbada6c..d2f65b745f35a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnStats.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnStats.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.columnar
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference}
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
+import org.apache.spark.unsafe.types.UTF8String
 
 class ColumnStatisticsSchema(a: Attribute) extends Serializable {
   val upperBound = AttributeReference(a.name + ".upperBound", a.dataType, nullable = true)()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
index 6495463be02c0..f86f62bbf853b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
@@ -27,7 +27,7 @@ import org.apache.hadoop.fs.{FileSystem, Path, PathFilter}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
-import org.apache.spark.sql.catalyst.catalog.{CatalogColumnStat, CatalogStatistics, CatalogTable}
+import org.apache.spark.sql.catalyst.catalog.{CatalogStatistics, CatalogTable}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.logical._
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DataWritingCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DataWritingCommand.scala
index a1bb5af1ab723..a56007f5d5d95 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DataWritingCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/DataWritingCommand.scala
@@ -24,7 +24,6 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan}
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.datasources.BasicWriteJobStatsTracker
-import org.apache.spark.sql.execution.datasources.FileFormatWriter
 import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.util.SerializableConfiguration
 
@@ -35,7 +34,7 @@ trait DataWritingCommand extends Command {
   /**
    * The input query plan that produces the data to be written.
    * IMPORTANT: the input query plan MUST be analyzed, so that we can carry its output columns
-   *            to [[FileFormatWriter]].
+   *            to [[org.apache.spark.sql.execution.datasources.FileFormatWriter]].
    */
   def query: LogicalPlan
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
index 61ee6d7f4a299..00accedf21556 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.command
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.IgnoreCachedData
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.types.{StringType, StructField, StructType}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala
index ef6b0bba1628e..f99dc8d9f1a8e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala
@@ -21,7 +21,6 @@ import java.util.Locale
 
 import org.apache.spark.sql.{Dataset, Row, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
index 68c47d6a6dfaa..6ed40aacd1125 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
@@ -21,7 +21,6 @@ import java.net.URI
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.catalog._
-import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.datasources._
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
index d76b4b8894783..330a503e5f8e2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
 import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchFunctionException}
 import org.apache.spark.sql.catalyst.catalog.{CatalogFunction, FunctionResource}
-import org.apache.spark.sql.catalyst.expressions.{Attribute, ExpressionInfo}
+import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.util.StringUtils
 import org.apache.spark.sql.types.{StringType, StructField, StructType}
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index b55bed9cd7fc0..34ded5d456d09 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.execution.datasources
 import java.util.{Locale, ServiceConfigurationError, ServiceLoader}
 
 import scala.collection.JavaConverters._
-import scala.language.implicitConversions
 import scala.util.{Failure, Success, Try}
 
 import org.apache.hadoop.conf.Configuration
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala
index 28a63c26604ec..1149767bdade2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala
@@ -22,11 +22,12 @@ import scala.collection.JavaConverters._
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoStatement, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileDataSourceV2, FileTable}
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileTable}
 
 /**
  * Replace the File source V2 table in [[InsertIntoStatement]] to V1 [[FileFormat]].
- * E.g, with temporary view `t` using [[FileDataSourceV2]], inserting into  view `t` fails
+ * E.g, with temporary view `t` using
+ * [[org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2]], inserting into view `t` fails
  * since there is no corresponding physical plan.
  * This is a temporary hack for making current data source V2 work. It should be
  * removed when Catalog support of file data source v2 is finished.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala
index d278802e6c9f2..a0b191e60f376 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFsRelation.scala
@@ -17,10 +17,6 @@
 
 package org.apache.spark.sql.execution.datasources
 
-import java.util.Locale
-
-import scala.collection.mutable
-
 import org.apache.spark.sql.{SparkSession, SQLContext}
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.execution.FileRelation
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/OutputWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/OutputWriter.scala
index 868e5371426c0..1d7abe5b938c2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/OutputWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/OutputWriter.scala
@@ -19,8 +19,7 @@ package org.apache.spark.sql.execution.datasources
 
 import org.apache.hadoop.mapreduce.TaskAttemptContext
 
-import org.apache.spark.sql.Row
-import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.types.StructType
 
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
index 5341e22f5e670..fed9614347f6a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.{expressions, InternalRow}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
-import org.apache.spark.sql.types.{StringType, StructType}
+import org.apache.spark.sql.types.StructType
 
 /**
  * An abstract class that represents [[FileIndex]]s that are aware of partitioned tables.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index 4087efc486a4f..796c23c7337d8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{Resolver, TypeCoercion}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Cast, Literal}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Literal}
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateFormatter, DateTimeUtils, TimestampFormatter}
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.SchemaUtils
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/RecordReaderIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/RecordReaderIterator.scala
index c3dd6939ec5bd..0959d8799f5a1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/RecordReaderIterator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/RecordReaderIterator.scala
@@ -21,8 +21,6 @@ import java.io.Closeable
 
 import org.apache.hadoop.mapreduce.RecordReader
 
-import org.apache.spark.sql.catalyst.InternalRow
-
 /**
  * An adaptor from a Hadoop [[RecordReader]] to an [[Iterator]] over the values returned.
  *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
index 61e0154a0ffe8..76a6a48ca0b0c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructField, StructType}
+import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructType}
 
 /**
  * Prunes unnecessary physical columns given a [[PhysicalOperation]] over a data source relation.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala
index 637ce68ec05a2..b241243363746 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala
@@ -22,14 +22,14 @@ import java.sql.Timestamp
 
 import com.google.common.io.{ByteStreams, Closeables}
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FileStatus, GlobFilter, Path}
+import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hadoop.mapreduce.Job
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter
-import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.datasources.{FileFormat, OutputWriterFactory, PartitionedFile}
 import org.apache.spark.sql.internal.SQLConf.SOURCES_BINARY_FILE_MAX_LENGTH
 import org.apache.spark.sql.sources.{And, DataSourceRegister, EqualTo, Filter, GreaterThan, GreaterThanOrEqual, LessThan, LessThanOrEqual, Not, Or}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
index 10146be44e8bf..d8fa768a604f4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
@@ -23,8 +23,6 @@ import java.nio.charset.{Charset, StandardCharsets}
 import com.univocity.parsers.csv.CsvParser
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, Path}
-import org.apache.hadoop.io.{LongWritable, Text}
-import org.apache.hadoop.mapred.TextInputFormat
 import org.apache.hadoop.mapreduce.Job
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
index e25ce53941ff6..87ca78db59b29 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.datasources.jdbc
 
-import java.sql.{Connection, PreparedStatement, ResultSet, SQLException}
+import java.sql.{Connection, PreparedStatement, ResultSet}
 
 import scala.util.control.NonFatal
 
@@ -46,8 +46,8 @@ object JDBCRDD extends Logging {
    * @param options - JDBC options that contains url, table and other information.
    *
    * @return A StructType giving the table's Catalyst schema.
-   * @throws SQLException if the table specification is garbage.
-   * @throws SQLException if the table contains an unsupported type.
+   * @throws java.sql.SQLException if the table specification is garbage.
+   * @throws java.sql.SQLException if the table contains an unsupported type.
    */
   def resolveTable(options: JDBCOptions): StructType = {
     val url = options.url
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
index e0fa4584185e9..f2f6f60cb1dde 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
@@ -17,13 +17,10 @@
 
 package org.apache.spark.sql.execution.datasources.json
 
-import java.nio.charset.{Charset, StandardCharsets}
-
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
 
-import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.ExprUtils
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala
index 4ab009c6bd014..32ce7185f7381 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala
@@ -23,7 +23,6 @@ import org.apache.orc.mapred.{OrcList, OrcMap, OrcStruct, OrcTimestamp}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{SpecificInternalRow, UnsafeArrayData}
 import org.apache.spark.sql.catalyst.util._
-import org.apache.spark.sql.catalyst.util.RebaseDateTime.rebaseJulianToGregorianDays
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index 95f19f9dcee64..1901f5575470e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -35,7 +35,6 @@ import org.apache.parquet.hadoop._
 import org.apache.parquet.hadoop.ParquetOutputFormat.JobSummaryLevel
 import org.apache.parquet.hadoop.codec.CodecConfig
 import org.apache.parquet.hadoop.util.ContextUtil
-import org.apache.parquet.schema.MessageType
 
 import org.apache.spark.{SparkException, TaskContext}
 import org.apache.spark.internal.Logging
@@ -504,7 +503,8 @@ object ParquetFileFormat extends Logging {
   /**
    * Reads Spark SQL schema from a Parquet footer.  If a valid serialized Spark SQL schema string
    * can be found in the file metadata, returns the deserialized [[StructType]], otherwise, returns
-   * a [[StructType]] converted from the [[MessageType]] stored in this footer.
+   * a [[StructType]] converted from the [[org.apache.parquet.schema.MessageType]] stored in this
+   * footer.
    */
   def readSchemaFromFooter(
       footer: Footer, converter: ParquetToSparkSchemaConverter): StructType = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOutputWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOutputWriter.scala
index e7753cec681cf..70f6726c581a2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOutputWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOutputWriter.scala
@@ -21,7 +21,6 @@ import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapreduce._
 import org.apache.parquet.hadoop.ParquetOutputFormat
 
-import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.datasources.OutputWriter
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
index 6ef56af927129..f65aef95b6c38 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
@@ -26,9 +26,9 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.apache.parquet.column.Dictionary
 import org.apache.parquet.io.api.{Binary, Converter, GroupConverter, PrimitiveConverter}
-import org.apache.parquet.schema.{GroupType, MessageType, OriginalType, Type}
-import org.apache.parquet.schema.OriginalType.{INT_32, LIST, UTF8}
-import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.{BINARY, DOUBLE, FIXED_LEN_BYTE_ARRAY, INT32, INT64, INT96}
+import org.apache.parquet.schema.{GroupType, OriginalType, Type}
+import org.apache.parquet.schema.OriginalType.LIST
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.{BINARY, FIXED_LEN_BYTE_ARRAY, INT32, INT64, INT96}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
@@ -107,11 +107,15 @@ private[parquet] class ParquetPrimitiveConverter(val updater: ParentContainerUpd
  * }}}
  * 5 converters will be created:
  *
- * - a root [[ParquetRowConverter]] for [[MessageType]] `root`, which contains:
- *   - a [[ParquetPrimitiveConverter]] for required [[INT_32]] field `f1`, and
+ * - a root [[ParquetRowConverter]] for [[org.apache.parquet.schema.MessageType]] `root`,
+ * which contains:
+ *   - a [[ParquetPrimitiveConverter]] for required
+ *   [[org.apache.parquet.schema.OriginalType.INT_32]] field `f1`, and
  *   - a nested [[ParquetRowConverter]] for optional [[GroupType]] `f2`, which contains:
- *     - a [[ParquetPrimitiveConverter]] for required [[DOUBLE]] field `f21`, and
- *     - a [[ParquetStringConverter]] for optional [[UTF8]] string field `f22`
+ *     - a [[ParquetPrimitiveConverter]] for required
+ *     [[org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE]] field `f21`, and
+ *     - a [[ParquetStringConverter]] for optional [[org.apache.parquet.schema.OriginalType.UTF8]]
+ *     string field `f22`
  *
  * When used as a root converter, [[NoopUpdater]] should be used since root converters don't have
  * any "parent" container.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index e45514385e292..3a2a642b870f8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -29,8 +29,6 @@ import org.apache.spark.sql.connector.catalog.CatalogV2Util.assertNoNullTypeInSc
 import org.apache.spark.sql.connector.expressions.{FieldReference, RewritableTransform}
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
 import org.apache.spark.sql.sources.InsertableRelation
 import org.apache.spark.sql.types.{AtomicType, StructType}
 import org.apache.spark.sql.util.SchemaUtils
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
index 0cbcad1f48026..0ca442baeea2f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
@@ -23,7 +23,7 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema}
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, Table, TableCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, Table}
 import org.apache.spark.sql.types.StructType
 
 case class DescribeTableExec(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropNamespaceExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropNamespaceExec.scala
index f7b4317ad65e2..777ee9d385f12 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropNamespaceExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropNamespaceExec.scala
@@ -21,7 +21,7 @@ import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.{CatalogPlugin, SupportsNamespaces}
+import org.apache.spark.sql.connector.catalog.CatalogPlugin
 
 /**
  * Physical plan node for dropping a namespace.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
index e4de70d4ee88f..8cf59f3a59323 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
@@ -25,7 +25,6 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.connector.catalog.{Table, TableProvider}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.execution.datasources._
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala
index 95715fd1af56e..7ceee1edee180 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources.v2
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, GenericRowWithSchema}
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Table, TableCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Table}
 
 /**
  * Physical plan node for showing table properties.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala
index 5dfd2e52706d0..cb4a2994de1f4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic}
 import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
-import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table}
+import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.execution.streaming.StreamingRelation
 import org.apache.spark.sql.types.BooleanType
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TextBasedFileScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TextBasedFileScan.scala
index 1ca3fd42c0597..f24fb95acb922 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TextBasedFileScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TextBasedFileScan.scala
@@ -22,8 +22,6 @@ import org.apache.hadoop.fs.Path
 import org.apache.hadoop.io.compress.CompressionCodecFactory
 
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
-import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.Utils
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
index 2f9387532c25c..0dbc74395afb1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
@@ -19,10 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2.orc
 
 import scala.collection.JavaConverters._
 
-import org.apache.orc.mapreduce.OrcInputFormat
-
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.quoteIfNeeded
 import org.apache.spark.sql.connector.read.{Scan, SupportsPushDownFilters}
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.orc.OrcFilters
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala
index 6973f55e8dca0..93d7db44f2285 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeSeq, BindReferences, DynamicPruningExpression, DynamicPruningSubquery, Expression, ListQuery, Literal, PredicateHelper}
 import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight}
-import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.Aggregate
 import org.apache.spark.sql.catalyst.plans.physical.BroadcastMode
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.{InSubqueryExec, QueryExecution, SparkPlan, SubqueryBroadcastExec}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
index cf38fee055ca5..ebbc8a4df5643 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
@@ -25,7 +25,6 @@ import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.joins.{ShuffledHashJoinExec, SortMergeJoinExec}
-import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Ensures that the [[org.apache.spark.sql.catalyst.plans.physical.Partitioning Partitioning]]
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala
index aeaf59b7f0f4a..e58733b35990a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala
@@ -27,7 +27,6 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, Expre
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution._
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala
index 298d63478b63e..7c476ab03c002 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala
@@ -26,7 +26,7 @@ import org.apache.spark.api.python.ChainedPythonFunctions
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
+import org.apache.spark.sql.execution.UnaryExecNode
 import org.apache.spark.sql.types.{DataType, StructField, StructType}
 import org.apache.spark.util.Utils
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala
index 59f5a7078a151..ae7b7ef23512c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala
@@ -26,7 +26,7 @@ import org.apache.arrow.vector.VectorSchemaRoot
 import org.apache.arrow.vector.ipc.{ArrowStreamReader, ArrowStreamWriter}
 import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel
 
-import org.apache.spark.{SparkException, TaskContext}
+import org.apache.spark.TaskContext
 import org.apache.spark.api.r._
 import org.apache.spark.api.r.SpecialLengths
 import org.apache.spark.broadcast.Broadcast
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
index eb8b8af7950b2..747094b7791c1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.streaming
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.catalyst.expressions.{Ascending, Attribute, AttributeReference, Expression, Literal, SortOrder, UnsafeRow}
+import org.apache.spark.sql.catalyst.expressions.{Ascending, Attribute, Expression, SortOrder, UnsafeRow}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, Distribution}
 import org.apache.spark.sql.execution._
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
index c2278e8659147..893639a86c88c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
@@ -19,14 +19,12 @@ package org.apache.spark.sql.execution.streaming
 
 import java.io._
 import java.nio.charset.StandardCharsets
-import java.util.{ConcurrentModificationException, EnumSet, UUID}
+import java.util.ConcurrentModificationException
 
 import scala.reflect.ClassTag
 
 import org.apache.commons.io.IOUtils
-import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs._
-import org.apache.hadoop.fs.permission.FsPermission
 import org.json4s.NoTypeHints
 import org.json4s.jackson.Serialization
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index aba0463f56cd7..d6be33c76e937 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -40,7 +40,6 @@ import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table}
 import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2, ReadLimit, SparkDataStream}
 import org.apache.spark.sql.connector.write.{LogicalWriteInfoImpl, SupportsTruncate}
 import org.apache.spark.sql.connector.write.streaming.StreamingWrite
-import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.command.StreamingExplainCommand
 import org.apache.spark.sql.execution.datasources.v2.StreamWriterCommitProgress
 import org.apache.spark.sql.internal.SQLConf
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetadata.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetadata.scala
index 516afbea5d9de..fc0cfc30ff2fd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetadata.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetadata.scala
@@ -31,14 +31,14 @@ import org.json4s.jackson.Serialization
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.execution.streaming.CheckpointFileManager.CancellableFSDataOutputStream
-import org.apache.spark.sql.streaming.StreamingQuery
 
 /**
- * Contains metadata associated with a [[StreamingQuery]]. This information is written
- * in the checkpoint location the first time a query is started and recovered every time the query
- * is restarted.
+ * Contains metadata associated with a [[org.apache.spark.sql.streaming.StreamingQuery]].
+ * This information is written in the checkpoint location the first time a query is started
+ * and recovered every time the query is restarted.
  *
- * @param id  unique id of the [[StreamingQuery]] that needs to be persisted across restarts
+ * @param id  unique id of the [[org.apache.spark.sql.streaming.StreamingQuery]]
+ *            that needs to be persisted across restarts
  */
 case class StreamMetadata(id: String) {
   def json: String = Serialization.write(this)(StreamMetadata.format)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala
index 71792facf698a..2f62dbd7ec578 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala
@@ -21,13 +21,13 @@ import scala.reflect.ClassTag
 
 import org.apache.spark.{Partition, SparkContext, TaskContext}
 import org.apache.spark.internal.Logging
-import org.apache.spark.rdd.{RDD, ZippedPartitionsBaseRDD, ZippedPartitionsPartition, ZippedPartitionsRDD2}
+import org.apache.spark.rdd.{RDD, ZippedPartitionsBaseRDD, ZippedPartitionsPartition}
 import org.apache.spark.sql.catalyst.analysis.StreamingJoinHelper
 import org.apache.spark.sql.catalyst.expressions.{And, Attribute, AttributeSet, BoundReference, Expression, NamedExpression, PredicateHelper}
 import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark._
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.streaming.WatermarkSupport.watermarkExpression
-import org.apache.spark.sql.execution.streaming.state.{StateStoreCoordinatorRef, StateStoreProvider, StateStoreProviderId}
+import org.apache.spark.sql.execution.streaming.state.{StateStoreCoordinatorRef, StateStoreProviderId}
 
 
 /**
@@ -200,8 +200,8 @@ object StreamingSymmetricHashJoinHelper extends Logging {
   /**
    * A custom RDD that allows partitions to be "zipped" together, while ensuring the tasks'
    * preferred location is based on which executors have the required join state stores already
-   * loaded. This class is a variant of [[ZippedPartitionsRDD2]] which only changes signature
-   * of `f`.
+   * loaded. This class is a variant of [[org.apache.spark.rdd.ZippedPartitionsRDD2]] which only
+   * changes signature of `f`.
    */
   class StateStoreAwareZipPartitionsRDD[A: ClassTag, B: ClassTag, V: ClassTag](
       sc: SparkContext,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSink.scala
index 6d5e7fd5c5cf3..60c66d863a3c5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSink.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.execution.streaming.sources
 
-import org.apache.spark.api.python.PythonException
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.execution.streaming.Sink
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/PackedRowWriterFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/PackedRowWriterFactory.scala
index 507f860e0452a..fa51dd61a939b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/PackedRowWriterFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/PackedRowWriterFactory.scala
@@ -21,12 +21,13 @@ import scala.collection.mutable
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.connector.write.{BatchWrite, DataWriter, DataWriterFactory, WriterCommitMessage}
+import org.apache.spark.sql.connector.write.{DataWriter, WriterCommitMessage}
 import org.apache.spark.sql.connector.write.streaming.StreamingDataWriterFactory
 
 /**
- * A simple [[DataWriterFactory]] whose tasks just pack rows into the commit message for delivery
- * to a [[BatchWrite]] on the driver.
+ * A simple [[org.apache.spark.sql.connector.write.DataWriterFactory]] whose tasks just pack rows
+ * into the commit message for delivery to a
+ * [[org.apache.spark.sql.connector.write.BatchWrite]] on the driver.
  *
  * Note that, because it sends all rows to the driver, this factory will generally be unsuitable
  * for production-quality sinks. It's intended for use in tests.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memory.scala
index a6ac6f2da8e41..778cfeda68af0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memory.scala
@@ -35,13 +35,12 @@ import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUti
 import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table, TableCapability}
 import org.apache.spark.sql.connector.write.{DataWriter, DataWriterFactory, LogicalWriteInfo, PhysicalWriteInfo, SupportsTruncate, WriteBuilder, WriterCommitMessage}
 import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite}
-import org.apache.spark.sql.execution.streaming.Sink
 import org.apache.spark.sql.internal.connector.SupportsStreamingUpdateAsAppend
 import org.apache.spark.sql.types.StructType
 
 /**
- * A sink that stores the results in memory. This [[Sink]] is primarily intended for use in unit
- * tests and does not provide durability.
+ * A sink that stores the results in memory. This [[org.apache.spark.sql.execution.streaming.Sink]]
+ * is primarily intended for use in unit tests and does not provide durability.
  */
 class MemorySink extends Table with SupportsWrite with Logging {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
index d52505fbdab35..05bcee7b05c6f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
@@ -27,7 +27,7 @@ import scala.util.control.NonFatal
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.{SparkContext, SparkEnv, SparkException}
+import org.apache.spark.{SparkContext, SparkEnv}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.catalyst.util.UnsafeRowUtils
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala
index b894e771a6fe2..f21e2ffb80a7b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala
@@ -23,8 +23,6 @@ import scala.reflect.ClassTag
 
 import org.apache.spark.{Partition, TaskContext}
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.execution.streaming.StreamExecution
-import org.apache.spark.sql.execution.streaming.continuous.EpochTracker
 import org.apache.spark.sql.internal.SessionState
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.SerializableConfiguration
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala
index 3fae3979757fe..dae771c613131 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala
@@ -25,14 +25,14 @@ import org.apache.spark.TaskContext
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, JoinedRow, Literal, SpecificInternalRow, UnsafeProjection, UnsafeRow}
-import org.apache.spark.sql.execution.streaming.{StatefulOperatorStateInfo, StreamingSymmetricHashJoinExec}
+import org.apache.spark.sql.execution.streaming.StatefulOperatorStateInfo
 import org.apache.spark.sql.execution.streaming.StreamingSymmetricHashJoinHelper._
-import org.apache.spark.sql.execution.streaming.state.SymmetricHashJoinStateManager.KeyToValuePair
 import org.apache.spark.sql.types.{BooleanType, LongType, StructField, StructType}
 import org.apache.spark.util.NextIterator
 
 /**
- * Helper class to manage state required by a single side of [[StreamingSymmetricHashJoinExec]].
+ * Helper class to manage state required by a single side of
+ * [[org.apache.spark.sql.execution.streaming.StreamingSymmetricHashJoinExec]].
  * The interface of this class is basically that of a multi-map:
  * - Get: Returns an iterator of multiple values for given key
  * - Append: Append a new value to the given key
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
index f5fbe0fc32254..1449d937982e8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
@@ -33,7 +33,6 @@ import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
 import org.apache.spark.sql.execution.streaming.state._
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.{OutputMode, StateOperatorProgress}
 import org.apache.spark.sql.types._
 import org.apache.spark.util.{CompletionIterator, NextIterator, Utils}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/streamingLimits.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/streamingLimits.scala
index b19540253d7eb..e53e0644eb268 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/streamingLimits.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/streamingLimits.scala
@@ -22,7 +22,6 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericInternalRow, SortOrder, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, Distribution, Partitioning}
-import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
 import org.apache.spark.sql.execution.{LimitExec, SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.execution.streaming.state.StateStoreOps
 import org.apache.spark.sql.streaming.OutputMode
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
index b693cae824bf9..6e0e36cbe5901 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
@@ -17,17 +17,11 @@
 
 package org.apache.spark.sql.execution.window
 
-import scala.collection.mutable
-import scala.collection.mutable.ArrayBuffer
-
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.physical._
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
-import org.apache.spark.sql.execution.{ExternalAppendOnlyUnsafeRowArray, SparkPlan, UnaryExecNode}
-import org.apache.spark.sql.types.{CalendarIntervalType, DateType, IntegerType, TimestampType}
+import org.apache.spark.sql.execution.{ExternalAppendOnlyUnsafeRowArray, SparkPlan}
 
 /**
  * This class calculates and outputs (windowed) aggregates over the rows in a single (sorted)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
index a6a3f3d7384bf..c6b98d48d7dde 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
@@ -23,7 +23,7 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
-import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
+import org.apache.spark.sql.execution.UnaryExecNode
 import org.apache.spark.sql.types.{CalendarIntervalType, DateType, IntegerType, TimestampType}
 
 trait WindowExecBase extends UnaryExecNode {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
index 0cef33509a175..80dd3cf8bc840 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
@@ -17,16 +17,13 @@
 
 package org.apache.spark.sql.expressions
 
-import scala.reflect.runtime.universe.TypeTag
-
-import org.apache.spark.annotation.{Experimental, Stable}
+import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.{Column, Encoder}
-import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUDF}
 import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete}
 import org.apache.spark.sql.execution.aggregate.ScalaAggregator
-import org.apache.spark.sql.types.{AnyDataType, DataType}
+import org.apache.spark.sql.types.DataType
 
 /**
  * A user-defined function. To create one, use the `udf` functions in `functions`.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
index f7591e4d265e0..4e3c5586209e4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
@@ -44,8 +44,6 @@ object typed {
     override protected def _sqlContext: SQLContext = null
   }
 
-  import implicits._
-
   /**
    * Average aggregate function.
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 5dc1c6b5b49fc..9861d21d3a430 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -18,8 +18,7 @@
 package org.apache.spark.sql
 
 import scala.collection.JavaConverters._
-import scala.language.implicitConversions
-import scala.reflect.runtime.universe.{typeTag, TypeTag}
+import scala.reflect.runtime.universe.TypeTag
 import scala.util.Try
 
 import org.apache.spark.annotation.Stable
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index 0f9a89741c192..48d8c3d325347 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.streaming.StreamingQueryManager
-import org.apache.spark.sql.util.{ExecutionListenerManager, QueryExecutionListener}
+import org.apache.spark.sql.util.ExecutionListenerManager
 
 /**
  * A class that holds all session-specific state in a given [[SparkSession]].
@@ -52,7 +52,8 @@ import org.apache.spark.sql.util.{ExecutionListenerManager, QueryExecutionListen
  * @param planner Planner that converts optimized logical plans to physical plans.
  * @param streamingQueryManagerBuilder A function to create a streaming query manager to
  *                                     start and stop streaming queries.
- * @param listenerManager Interface to register custom [[QueryExecutionListener]]s.
+ * @param listenerManager Interface to register custominternal/SessionState.scala
+ *                        [[org.apache.spark.sql.util.QueryExecutionListener]]s.
  * @param resourceLoaderBuilder a function to create a session shared resource loader to load JARs,
  *                              files, etc.
  * @param createQueryExecution Function used to create QueryExecution objects.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index 1acdc4bd5f0e3..89aceacac6007 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -31,13 +31,11 @@ import org.apache.hadoop.fs.FsUrlStreamHandlerFactory
 
 import org.apache.spark.{SparkConf, SparkContext, SparkException}
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.execution.CacheManager
 import org.apache.spark.sql.execution.streaming.StreamExecution
 import org.apache.spark.sql.execution.ui.{SQLAppStatusListener, SQLAppStatusStore, SQLTab}
 import org.apache.spark.sql.internal.StaticSQLConf._
-import org.apache.spark.sql.streaming.StreamingQueryListener
 import org.apache.spark.sql.streaming.ui.{StreamingQueryStatusListener, StreamingQueryTab}
 import org.apache.spark.status.ElementTrackingStore
 import org.apache.spark.util.Utils
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index 0fe2d0be966d0..ffdbe9d4e4915 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -29,7 +29,6 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.SparkException
 import org.apache.spark.annotation.Evolving
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config.UI.UI_ENABLED
 import org.apache.spark.sql.{AnalysisException, DataFrame, SparkSession}
 import org.apache.spark.sql.catalyst.analysis.UnsupportedOperationChecker
 import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/UIUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/UIUtils.scala
index cdad5ed9942b5..1f7e65dede170 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/UIUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/UIUtils.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.streaming.ui
 
 import java.text.SimpleDateFormat
-import java.util.Locale
 
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.getTimeZone
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 321f4966178d7..d34dcb4fe0c01 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -26,7 +26,6 @@ import java.util.concurrent.atomic.AtomicLong
 import scala.reflect.runtime.universe.TypeTag
 import scala.util.Random
 
-import org.scalatest.matchers.must.Matchers
 import org.scalatest.matchers.should.Matchers._
 
 import org.apache.spark.SparkException
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
index 8b0f46b9d1ddb..4fdaeb57ad50e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql
 
-import org.scalatest.BeforeAndAfterEach
-
 import org.apache.spark.sql.catalyst.plans.logical.Expand
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.SharedSparkSession
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala
index 8c998290b5044..fd408c37ef6cd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql
 
-import java.sql.Date
-
 import org.apache.spark.sql.expressions.Window
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.SharedSparkSession
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
index de791383326f1..35e732e0840e4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
@@ -23,7 +23,7 @@ import scala.collection.JavaConverters._
 
 import org.scalatest.BeforeAndAfter
 
-import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException}
+import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic}
 import org.apache.spark.sql.connector.{InMemoryTable, InMemoryTableCatalog}
 import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
index 2be86b9ad6208..ac51634febc99 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql
 import scala.collection.immutable.{HashSet => HSet}
 import scala.collection.immutable.Queue
 import scala.collection.mutable.{LinkedHashMap => LHMap}
-import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.test.SharedSparkSession
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
index 80346b350c142..861a001b190aa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
@@ -27,7 +27,6 @@ import org.scalatest.Assertions._
 import org.apache.spark.TestUtils
 import org.apache.spark.api.python.{PythonBroadcast, PythonEvalType, PythonFunction, PythonUtils}
 import org.apache.spark.broadcast.Broadcast
-import org.apache.spark.internal.config.Tests
 import org.apache.spark.sql.catalyst.expressions.{Cast, Expression}
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.execution.python.UserDefinedPythonFunction
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala
index c2aee0ad4c9a1..76204c504c0ed 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala
@@ -30,7 +30,6 @@ import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.adaptive.DisableAdaptiveExecutionSuite
 import org.apache.spark.sql.execution.exchange.{Exchange, ReusedExchangeExec}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.tags.ExtendedSQLTest
 
 // scalastyle:off line.size.limit
 /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index b016cc3f57e0d..65377594f083c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -25,7 +25,6 @@ import java.util.concurrent.TimeUnit
 import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.catalyst.catalog.CatalogColumnStat
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
index 6b25d7c61663c..46112d40f08ba 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
@@ -30,7 +30,6 @@ import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.internal.SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.StructType
-import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class DataSourceV2DataFrameSessionCatalogSuite
   extends InsertIntoTests(supportsDynamicOverwrite = true, includeSQLOnlyTests = false)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 4eaf5822e1628..ddafa1bb5070a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -25,7 +25,7 @@ import scala.collection.JavaConverters._
 import org.apache.spark.SparkException
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NamespaceAlreadyExistsException, NoSuchDatabaseException, NoSuchNamespaceException, NoSuchPartitionException, NoSuchPartitionsException, NoSuchTableException, PartitionsAlreadyExistException, TableAlreadyExistsException}
+import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NamespaceAlreadyExistsException, NoSuchDatabaseException, NoSuchNamespaceException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.connector.catalog._
 import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala
index eacdb9e2fcd7b..3aad644655aa6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression}
 import org.apache.spark.sql.connector.catalog.{Identifier, SupportsCatalogOptions, TableCatalog}
 import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
-import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform, Transform}
+import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform}
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.internal.SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala
index 2d75a35215866..bad21aac41712 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.analysis.{AnalysisSuite, NamedRelation}
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Literal}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
-import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, Table, TableCapability, TableProvider}
+import org.apache.spark.sql.connector.catalog.{Table, TableCapability}
 import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, TableCapabilityCheck}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1ReadFallbackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1ReadFallbackSuite.scala
index 74f2ca14234d2..9beef690cba32 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1ReadFallbackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1ReadFallbackSuite.scala
@@ -23,7 +23,7 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, QueryTest, Row, SparkSession, SQLContext}
-import org.apache.spark.sql.connector.catalog.{Identifier, SupportsRead, Table, TableCapability, TableProvider}
+import org.apache.spark.sql.connector.catalog.{Identifier, SupportsRead, Table, TableCapability}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownFilters, SupportsPushDownRequiredColumns, V1Scan}
 import org.apache.spark.sql.execution.RowDataSourceScanExec
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
index e6029400997a2..81f292809df4a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
@@ -28,7 +28,6 @@ import org.scalatest.exceptions.TestFailedException
 
 import org.apache.spark.{SparkException, TaskContext, TestUtils}
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.Column
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, GenericInternalRow}
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index 7db94a702488a..b631f08405a39 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.{execution, DataFrame, Row}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Range, Repartition, Sort, Union}
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Range, Repartition, Union}
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, DisableAdaptiveExecution}
 import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala
index ddaa2687eaf1a..18d36670306b8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution
 import org.apache.spark.sql.{DataFrame, QueryTest}
 import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project}
-import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, FileScan}
+import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanTest.scala
index 7ddf9d87a6aca..f1fcf3bc5125e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanTest.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.execution
 
-import scala.language.implicitConversions
 import scala.util.control.NonFatal
 
 import org.apache.spark.SparkFunSuite
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveTestUtils.scala
index 48f85ae76cd8c..ad3ec85e984c8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveTestUtils.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.execution.adaptive
 
-import java.io.{PrintWriter, StringWriter}
-
 import org.scalactic.source.Position
 import org.scalatest.Tag
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala
index 9ade8b14f59b0..a98ca7f5d8f88 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.{DataFrame, SparkSession}
 import org.apache.spark.sql.functions.{monotonically_increasing_id, timestamp_seconds}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.ParquetOutputTimestampType
-import org.apache.spark.sql.types.{ByteType, Decimal, DecimalType, TimestampType}
+import org.apache.spark.sql.types.{ByteType, Decimal, DecimalType}
 
 /**
  * Benchmark to measure read performance with Filter pushdown.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ParquetNestedPredicatePushDownBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ParquetNestedPredicatePushDownBenchmark.scala
index d2bd962b50654..f89fe2e64c778 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ParquetNestedPredicatePushDownBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ParquetNestedPredicatePushDownBenchmark.scala
@@ -17,9 +17,8 @@
 
 package org.apache.spark.sql.execution.benchmark
 
-import org.apache.spark.SparkConf
 import org.apache.spark.benchmark.Benchmark
-import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
+import org.apache.spark.sql.{DataFrame, SaveMode}
 import org.apache.spark.sql.internal.SQLConf
 
 /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
index 43bc7c12937ec..f931914b19c6c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.benchmark
 
 import org.apache.spark.SparkConf
 import org.apache.spark.benchmark.Benchmark
-import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
 import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnStatsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnStatsSuite.scala
index 847e0ec4f3195..0abb3cb6a2ed0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnStatsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnStatsSuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.columnar
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.CalendarInterval
 
 class ColumnStatsSuite extends SparkFunSuite {
   testColumnStats(classOf[BooleanColumnStats], BOOLEAN, Array(true, false, 0))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 348cf94dfc629..9d0147048dbb8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -2026,7 +2026,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
   }
 
   test("SPARK-30312: truncate table - keep acl/permission") {
-    import testImplicits._
     val ignorePermissionAcl = Seq(true, false)
 
     ignorePermissionAcl.foreach { ignore =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index f5809ebbb836e..fd1978c5137a5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -26,7 +26,7 @@ import org.mockito.invocation.InvocationOnMock
 
 import org.apache.spark.sql.{AnalysisException, SaveMode}
 import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, Analyzer, CTESubstitution, EmptyFunctionRegistry, NoSuchTableException, ResolveCatalogs, ResolvedTable, ResolveInlineTables, ResolveSessionCatalog, UnresolvedAttribute, UnresolvedRelation, UnresolvedStar, UnresolvedSubqueryColumnAliases, UnresolvedV2Relation}
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, Analyzer, CTESubstitution, EmptyFunctionRegistry, NoSuchTableException, ResolveCatalogs, ResolvedTable, ResolveInlineTables, ResolveSessionCatalog, UnresolvedAttribute, UnresolvedRelation, UnresolvedSubqueryColumnAliases, UnresolvedV2Relation}
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Expression, InSubquery, IntegerLiteral, ListQuery, StringLiteral}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaTest.scala
index fd70b6529ff51..22db55afc27c9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaTest.scala
@@ -21,7 +21,7 @@ import java.io.File
 
 import org.apache.spark.sql.{QueryTest, Row}
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
+import org.apache.spark.sql.test.SharedSparkSession
 
 /**
  * The reader schema is said to be evolved (or projected) when it changed after the data is
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/RowDataSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/RowDataSourceStrategySuite.scala
index 6420081a9757b..3e8a4fe290502 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/RowDataSourceStrategySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/RowDataSourceStrategySuite.scala
@@ -22,15 +22,10 @@ import java.util.Properties
 
 import org.scalatest.BeforeAndAfter
 
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.{DataFrame, Row}
-import org.apache.spark.sql.sources._
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
 class RowDataSourceStrategySuite extends SharedSparkSession with BeforeAndAfter {
-  import testImplicits._
 
   val url = "jdbc:h2:mem:testdb0"
   val urlWithUserAndPass = "jdbc:h2:mem:testdb0;user=testUser;password=testPass"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala
index 233978289f068..e843d1d328425 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.execution.datasources
 
-import org.apache.spark.SparkConf
 import org.apache.spark.sql.SaveMode
 import org.apache.spark.sql.test.SharedSparkSession
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
index 8462916daaab8..86ff026d7b1e9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.execution.datasources.PartitionedFile
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.internal.SQLConf.SOURCES_BINARY_FILE_MAX_LENGTH
 import org.apache.spark.sql.sources._
-import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
+import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
index d27b5c4737a11..7cc3a1cf9f3b8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
@@ -18,12 +18,11 @@
 package org.apache.spark.sql.execution.datasources.json
 
 import org.apache.spark.sql.{QueryTest, Row}
-import org.apache.spark.sql.catalyst.json.JSONOptions
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.{DoubleType, StringType, StructType}
+import org.apache.spark.sql.types.{StringType, StructType}
 
 /**
- * Test cases for various [[JSONOptions]].
+ * Test cases for various [[org.apache.spark.sql.catalyst.json.JSONOptions]].
  */
 class JsonParsingOptionsSuite extends QueryTest with SharedSparkSession {
   import testImplicits._
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala
index 6c9bd32913178..378b52f9c6c8c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala
@@ -17,7 +17,7 @@
 package org.apache.spark.sql.execution.datasources.orc
 
 import org.apache.spark.SparkConf
-import org.apache.spark.sql.{DataFrame, Row}
+import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.datasources.SchemaPruningSuite
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCommitterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCommitterSuite.scala
index 4b2437803d645..7f408dbba5099 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCommitterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCommitterSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.execution.datasources.parquet
 
-import java.io.FileNotFoundException
-
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hadoop.mapreduce.{JobContext, TaskAttemptContext}
@@ -149,7 +147,7 @@ private object MarkingFileOutput {
    * @param outputPath destination directory
    * @param conf configuration to create the FS with
    * @return the status of the marker
-   * @throws FileNotFoundException if the marker is absent
+   * @throws java.io.FileNotFoundException if the marker is absent
    */
   def checkMarker(outputPath: Path, conf: Configuration): FileStatus = {
     outputPath.getFileSystem(conf).getFileStatus(new Path(outputPath, "marker"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index 34bdef7bdb402..d13b3e58a30ff 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.datasources.parquet
 
 import java.nio.file.{Files, Paths, StandardCopyOption}
 import java.sql.{Date, Timestamp}
-import java.time._
 import java.util.Locale
 
 import scala.collection.JavaConverters._
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
index 8c4eedfde76cd..8c5f7bed7c50d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
@@ -21,7 +21,7 @@ import java.io.File
 import java.time.ZoneOffset
 
 import org.apache.commons.io.FileUtils
-import org.apache.hadoop.fs.{FileSystem, Path, PathFilter}
+import org.apache.hadoop.fs.{Path, PathFilter}
 import org.apache.parquet.format.converter.ParquetMetadataConverter.NO_FILTER
 import org.apache.parquet.hadoop.ParquetFileReader
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
index accd04592bec5..5c41614c45b6f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
@@ -23,8 +23,6 @@ import java.sql.{Date, Timestamp}
 import java.time.{ZoneId, ZoneOffset}
 import java.util.{Calendar, Locale}
 
-import scala.collection.mutable.ArrayBuffer
-
 import com.google.common.io.Files
 import org.apache.hadoop.fs.Path
 import org.apache.parquet.hadoop.ParquetOutputFormat
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
index 7990b1c27437a..e97c6cd29709c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
@@ -23,7 +23,7 @@ import scala.reflect.runtime.universe.TypeTag
 import org.apache.parquet.io.ParquetDecodingException
 import org.apache.parquet.schema.{MessageType, MessageTypeParser}
 
-import org.apache.spark.{SparkConf, SparkException}
+import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.execution.QueryExecutionException
 import org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupportedException
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala
index c53617b40e09d..622d69e188821 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala
@@ -22,7 +22,6 @@ import java.lang.{Long => JLong}
 import java.net.URI
 import java.nio.charset.StandardCharsets.UTF_8
 import java.util.concurrent.ConcurrentHashMap
-import java.util.concurrent.atomic.AtomicLong
 
 import scala.util.Random
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala
index 3ead91fcf712a..014840d758c0c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MemorySinkSuite.scala
@@ -24,7 +24,7 @@ import org.scalatest.BeforeAndAfter
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.streaming.sources._
-import org.apache.spark.sql.streaming.{OutputMode, StreamTest}
+import org.apache.spark.sql.streaming.StreamTest
 import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
 import org.apache.spark.util.Utils
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelperSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelperSuite.scala
index dec30fd01f7e2..ea6fd8ab312c9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelperSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelperSuite.scala
@@ -23,7 +23,6 @@ import org.apache.spark.sql.Encoder
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.execution.streaming.GroupStateImpl._
-import org.apache.spark.sql.streaming.FlatMapGroupsWithStateSuite._
 import org.apache.spark.sql.streaming.StreamTest
 import org.apache.spark.sql.types._
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
index 298820349b683..6eb070138c3b8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalog.{Column, Database, Function, Table}
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, ScalaReflection, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog._
-import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionInfo}
+import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.plans.logical.Range
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.StructType
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
index 77a5d12cd8c95..580e7df6ef63e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
@@ -19,8 +19,6 @@ package org.apache.spark.sql.internal
 
 import java.util.TimeZone
 
-import scala.language.reflectiveCalls
-
 import org.apache.hadoop.fs.Path
 import org.apache.log4j.Level
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
index 4832386e553db..167e87dd3d5cb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
-import org.apache.spark.sql.execution.{DataSourceScanExec, FileSourceScanExec, SortExec, SparkPlan}
+import org.apache.spark.sql.execution.{FileSourceScanExec, SortExec, SparkPlan}
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec
 import org.apache.spark.sql.execution.datasources.BucketingUtils
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala
index 1fdd3be88f782..179cdeb976391 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DisableUnnecessaryBucketedScanSuite.scala
@@ -22,7 +22,6 @@ import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
 import org.apache.spark.sql.execution.FileSourceScanExec
 import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, DisableAdaptiveExecutionSuite, EnableAdaptiveExecutionSuite}
-import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala
index 9b26a5659df49..48d717daf00d4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala
@@ -19,8 +19,6 @@ package org.apache.spark.sql.sources
 
 import java.net.URI
 
-import org.apache.hadoop.fs.Path
-
 import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession, SQLContext}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogUtils
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
index f97c9386f9488..788be539fe073 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
@@ -21,7 +21,6 @@ import java.io.File
 import java.sql.Date
 
 import org.apache.commons.io.FileUtils
-import org.scalatest.BeforeAndAfterAll
 import org.scalatest.exceptions.TestFailedException
 
 import org.apache.spark.SparkException
@@ -34,7 +33,7 @@ import org.apache.spark.sql.catalyst.plans.physical.UnknownPartitioning
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.execution.RDDScanExec
 import org.apache.spark.sql.execution.streaming._
-import org.apache.spark.sql.execution.streaming.state.{FlatMapGroupsWithStateExecHelper, MemoryStateStore, StateStore, StateStoreId, StateStoreMetrics, UnsafeRowPair}
+import org.apache.spark.sql.execution.streaming.state.{FlatMapGroupsWithStateExecHelper, MemoryStateStore, StateStore}
 import org.apache.spark.sql.functions.timestamp_seconds
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.util.StreamManualClock
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index 7a2e29f1258ae..624b630401f47 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.streaming
 
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
-import scala.language.experimental.macros
 import scala.reflect.ClassTag
 import scala.util.Random
 import scala.util.control.NonFatal
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
index 4a57cc27b1d59..0524e29662014 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
@@ -20,8 +20,6 @@ package org.apache.spark.sql.streaming
 import java.io.File
 import java.util.{Locale, TimeZone}
 
-import scala.collection.mutable
-
 import org.apache.commons.io.FileUtils
 import org.scalatest.Assertions
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala
index e1505acf3ecda..ac9cd1a12d06f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala
@@ -17,13 +17,9 @@
 
 package org.apache.spark.sql.streaming
 
-import org.scalatest.BeforeAndAfterAll
-
-import org.apache.spark.sql.{DataFrame, Row}
-import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, HashPartitioning, SinglePartition}
+import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
-import org.apache.spark.sql.execution.streaming.{MemoryStream, StreamingDeduplicateExec}
-import org.apache.spark.sql.execution.streaming.state.StateStore
+import org.apache.spark.sql.execution.streaming.MemoryStream
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
index b2bb00b704a69..a25616af360b1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
@@ -26,19 +26,11 @@ import scala.util.Random
 import org.apache.commons.io.FileUtils
 import org.scalatest.BeforeAndAfter
 
-import org.apache.spark.SparkContext
 import org.apache.spark.scheduler.ExecutorCacheTaskLocation
-import org.apache.spark.sql.{AnalysisException, DataFrame, Row, SparkSession}
-import org.apache.spark.sql.catalyst.analysis.StreamingJoinHelper
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, AttributeSet, Literal}
-import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, Filter}
-import org.apache.spark.sql.catalyst.trees.TreeNode
-import org.apache.spark.sql.execution.{FileSourceScanExec, LogicalRDD}
-import org.apache.spark.sql.execution.datasources.LogicalRelation
+import org.apache.spark.sql.{DataFrame, Row, SparkSession}
 import org.apache.spark.sql.execution.streaming.{MemoryStream, StatefulOperatorStateInfo, StreamingSymmetricHashJoinExec, StreamingSymmetricHashJoinHelper}
 import org.apache.spark.sql.execution.streaming.state.{StateStore, StateStoreProviderId}
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
 abstract class StreamingJoinSuite
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
index 0d17f2e0bc7fb..02f91399fce1c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
@@ -22,7 +22,6 @@ import java.sql.Timestamp
 import org.apache.spark.{SparkContext, SparkException}
 import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskStart}
 import org.apache.spark.sql._
-import org.apache.spark.sql.execution.datasources.v2.ContinuousScanExec
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous._
 import org.apache.spark.sql.execution.streaming.sources.ContinuousMemoryStream
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
index 8d39704c61d4e..bdc714d49fcc9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
@@ -89,8 +89,6 @@ class DefaultSource extends StreamSourceProvider with StreamSinkProvider {
       override def getOffset: Option[Offset] = Some(new LongOffset(0))
 
       override def getBatch(start: Option[Offset], end: Offset): DataFrame = {
-        import spark.implicits._
-
         spark.internalCreateDataFrame(spark.sparkContext.emptyRDD, schema, isStreaming = true)
       }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/GenericFunSpecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/GenericFunSpecSuite.scala
index 1b6724054a3ad..d15e5c42732d1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/GenericFunSpecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/GenericFunSpecSuite.scala
@@ -19,8 +19,6 @@ package org.apache.spark.sql.test
 
 import org.scalatest.funspec.AnyFunSpec
 
-import org.apache.spark.sql.Dataset
-
 /**
  * The purpose of this suite is to make sure that generic FunSpec-based scala
  * tests work with a shared spark session
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala
index e58357a415545..45cfa86ba9343 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala
@@ -17,10 +17,8 @@
 
 package org.apache.spark.sql.hive.thriftserver
 
-import java.util.UUID
 import java.util.regex.Pattern
 
-import org.apache.commons.lang3.exception.ExceptionUtils
 import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType
 import org.apache.hive.service.cli._
 import org.apache.hive.service.cli.operation.GetSchemasOperation
@@ -29,7 +27,6 @@ import org.apache.hive.service.cli.session.HiveSession
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SQLContext
-import org.apache.spark.util.{Utils => SparkUtils}
 
 /**
  * Spark's own GetSchemasOperation
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala
index bccad865be27a..bddf5eb82012f 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala
@@ -30,7 +30,6 @@ import org.apache.hive.service.cli.session.HiveSession
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.catalog.CatalogTableType._
-import org.apache.spark.sql.hive.HiveUtils
 
 /**
  * Spark's own GetTablesOperation
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
index c39d2ecdd7923..df0fa514ccff3 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
@@ -24,7 +24,6 @@ import javax.security.auth.login.LoginException
 import scala.collection.JavaConverters._
 import scala.util.control.NonFatal
 
-import org.apache.commons.logging.Log
 import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 import org.apache.hadoop.hive.shims.Utils
@@ -37,7 +36,6 @@ import org.apache.hive.service.server.HiveServer2
 import org.slf4j.Logger
 
 import org.apache.spark.sql.SQLContext
-import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._
 
 private[hive] class SparkSQLCLIService(hiveServer: HiveServer2, sqlContext: SQLContext)
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
index e4559e69e7585..856edede0b85f 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
@@ -17,11 +17,7 @@
 
 package org.apache.spark.sql.hive.thriftserver
 
-import java.util.concurrent.Executors
-
-import org.apache.commons.logging.Log
 import org.apache.hadoop.hive.conf.HiveConf
-import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 import org.apache.hive.service.cli.SessionHandle
 import org.apache.hive.service.cli.session.SessionManager
 import org.apache.hive.service.rpc.thrift.TProtocolVersion
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala
index 8efbdb30c605c..54a40e3990f09 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.hive.thriftserver.ui
 
 import java.net.URLEncoder
 import java.nio.charset.StandardCharsets.UTF_8
-import java.util.Calendar
 import javax.servlet.http.HttpServletRequest
 
 import scala.xml.Node
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index f5ce21f2af335..d39b94503fe40 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -27,7 +27,7 @@ import scala.concurrent.Promise
 import scala.concurrent.duration._
 
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
-import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach}
+import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.internal.Logging
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
index be42497113469..4a87be5f61195 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
@@ -23,7 +23,6 @@ import java.util.{Locale, MissingFormatArgumentException}
 
 import scala.util.control.NonFatal
 
-import org.apache.commons.io.FileUtils
 import org.apache.commons.lang3.exception.ExceptionUtils
 
 import org.apache.spark.SparkException
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index a685549290f0e..d9b6bb43c2b47 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -22,7 +22,6 @@ import java.io.File
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
-import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index f01a03996821a..907bb86ad0c1c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -41,7 +41,6 @@ import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-import org.apache.spark.sql.connector.catalog.TableCatalog
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.{PartitioningUtils, SourceOptions}
 import org.apache.spark.sql.hive.client.HiveClient
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index 0082fa87e00f1..46a8e9660a207 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -24,7 +24,6 @@ import java.util.concurrent.TimeUnit
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.HashMap
-import scala.language.implicitConversions
 
 import org.apache.commons.lang3.{JavaVersion, SystemUtils}
 import org.apache.hadoop.conf.Configuration
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index a78e1cebc588c..9bc99b08c2cc8 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -57,7 +57,6 @@ import org.apache.spark.sql.connector.catalog.SupportsNamespaces._
 import org.apache.spark.sql.execution.QueryExecutionException
 import org.apache.spark.sql.hive.HiveExternalCatalog
 import org.apache.spark.sql.hive.HiveExternalCatalog.{DATASOURCE_SCHEMA, DATASOURCE_SCHEMA_NUMPARTS, DATASOURCE_SCHEMA_PART_PREFIX}
-import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.{CircularBuffer, Utils}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index a5417b2c5ff6d..44a3a4c590934 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -41,13 +41,12 @@ import org.apache.hadoop.hive.serde.serdeConstants
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.analysis.NoSuchPermanentFunctionException
 import org.apache.spark.sql.catalyst.catalog.{CatalogFunction, CatalogTablePartition, CatalogUtils, FunctionResource, FunctionResourceType}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util.TypeUtils
-import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{AtomicType, IntegralType, StringType}
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.Utils
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala
index 4be3cd45454c6..c712a4a2b7c23 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.hive.execution
 
-import java.io.{File, IOException}
+import java.io.IOException
 import java.net.URI
 import java.text.SimpleDateFormat
 import java.util.{Date, Locale, Random}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertIntoHiveTableBenchmark.scala b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertIntoHiveTableBenchmark.scala
index da34c54cb36a2..e71b11e7a3f41 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertIntoHiveTableBenchmark.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertIntoHiveTableBenchmark.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.benchmark
 
 import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.test.TestHive
 
 /**
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
index 270595b0011e9..e413e0ee73cb9 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.hive
 
-import java.net.URI
-
 import org.apache.hadoop.conf.Configuration
 
 import org.apache.spark.SparkConf
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
index 86fc32cd8ca63..b3ea54a7bc931 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
@@ -25,7 +25,6 @@ import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.util.Utils
 
 /**
  * A suite of tests for the Parquet support through the data sources API.
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala
index 54c64a4eeb190..89131a79e59de 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShimSuite.scala
@@ -16,9 +16,6 @@
  */
 package org.apache.spark.sql.hive
 
-import scala.collection.JavaConverters._
-import scala.language.implicitConversions
-
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.hive.serde2.ColumnProjectionUtils
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala
index 446923ad23201..3e7c3e6799724 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.{AnalysisException, ShowCreateTableSuite}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.hive.test.TestHiveSingleton
-import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
+import org.apache.spark.sql.internal.HiveSerDe
 
 class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSingleton {
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index 501a877e8b7fb..77d54ed45a5de 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -770,8 +770,6 @@ object SPARK_14244 extends QueryTest {
     val hiveContext = new TestHiveContext(sparkContext)
     spark = hiveContext.sparkSession
 
-    import hiveContext.implicits._
-
     try {
       val window = Window.orderBy("id")
       val df = spark.range(2).select(cume_dist().over(window).as("cdist")).orderBy("cdist")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUserDefinedTypeSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUserDefinedTypeSuite.scala
index ca1af73b038a7..d0af8dc7ae49f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUserDefinedTypeSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUserDefinedTypeSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.hive
 
 import scala.collection.JavaConverters._
-import scala.util.Random
 
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDF
 import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspector, StandardListObjectInspector}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
index 4ad97eaa2b1c8..d8e1e01292820 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
@@ -23,9 +23,8 @@ import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.sql.QueryTest
-import org.apache.spark.sql.execution.HiveResult
 import org.apache.spark.sql.hive.test.TestHiveSingleton
-import org.apache.spark.sql.test.{ExamplePoint, ExamplePointUDT, SQLTestUtils}
+import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.util.ChildFirstURLClassLoader
 
 class HiveUtilsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala
index 1e396553c9c52..483622b16762a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala
@@ -17,12 +17,8 @@
 
 package org.apache.spark.sql.hive
 
-import java.io.File
 import java.sql.Timestamp
 
-import com.google.common.io.Files
-import org.apache.hadoop.fs.FileSystem
-
 import org.apache.spark.internal.config._
 import org.apache.spark.sql._
 import org.apache.spark.sql.hive.test.TestHiveSingleton
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index 7d5a200606356..43d1ba04c561d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionException
 import org.apache.spark.sql.catalyst.catalog.{CatalogColumnStat, CatalogStatistics, HiveTableRelation}
-import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, HistogramBin, HistogramSerializer}
+import org.apache.spark.sql.catalyst.plans.logical.HistogramBin
 import org.apache.spark.sql.catalyst.util.{DateTimeUtils, StringUtils}
 import org.apache.spark.sql.execution.command.{AnalyzeColumnCommand, CommandUtils, DDLUtils}
 import org.apache.spark.sql.execution.datasources.LogicalRelation
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuite.scala
index 77956f4fe69da..b94d517e89e30 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuite.scala
@@ -21,7 +21,6 @@ import java.security.PrivilegedExceptionAction
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.security.UserGroupInformation
-import org.scalatest.{BeforeAndAfterAll, PrivateMethodTester}
 
 import org.apache.spark.util.Utils
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index cea7c5686054a..1cabf6033e8d8 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -34,7 +34,6 @@ import org.apache.spark.sql.catalyst.expressions.Cast
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.Project
 import org.apache.spark.sql.execution.joins.BroadcastNestedLoopJoinExec
-import org.apache.spark.sql.hive._
 import org.apache.spark.sql.hive.test.{HiveTestJars, TestHive}
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.internal.SQLConf
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala
index da7dfd05f33d6..8aae7a1545b1a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala
@@ -17,11 +17,11 @@
 
 package org.apache.spark.sql.hive.execution
 
-import org.apache.spark.sql.{AnalysisException, Row, SaveMode, SparkSession}
+import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
 import org.apache.spark.sql.execution.SQLViewSuite
-import org.apache.spark.sql.hive.test.{TestHive, TestHiveSingleton}
+import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.types.{NullType, StructType}
 
 /**
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
index a8b10fc94d880..1018ae5b68895 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
@@ -17,10 +17,8 @@
 
 package org.apache.spark.sql.hive.execution
 
-import java.io.File
 import java.sql.Timestamp
 
-import org.apache.commons.io.FileUtils
 import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
 import org.scalatest.exceptions.TestFailedException
 
@@ -28,7 +26,6 @@ import org.apache.spark.{SparkException, TestUtils}
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression}
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
index ba6dbb01d5901..4a50621d89d4e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
@@ -21,7 +21,6 @@ import java.io.{File, IOException}
 
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.functions.col
-import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.test.{TestHive, TestHiveSingleton}
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.hive.test.TestHive.implicits._
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
index dd797b39e0939..9e8046b9ef544 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
@@ -34,7 +34,6 @@ import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.catalyst.plans.logical.Project
 import org.apache.spark.sql.execution.command.FunctionsCommand
 import org.apache.spark.sql.functions.max
-import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala
index 993a730524f6f..8e35cd034311d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.hive.execution
 
 import org.apache.spark.sql.QueryTest
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, BinaryOperator, EqualTo, Expression, IsNotNull, Literal}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, BinaryOperator, Expression, IsNotNull, Literal}
 import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan}
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.test.SQLTestUtils
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index a69a949e3a3a2..712f81d98753e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -45,7 +45,6 @@ import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types._
 import org.apache.spark.tags.SlowHiveTest
-import org.apache.spark.util.Utils
 
 case class Nested1(f1: Nested2)
 case class Nested2(f2: Nested3)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/UDAQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/UDAQuerySuite.scala
index 1f1a5568b0201..50f13efccc915 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/UDAQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/UDAQuerySuite.scala
@@ -17,23 +17,15 @@
 
 package org.apache.spark.sql.hive.execution
 
-import java.lang.{Double => jlDouble, Integer => jlInt, Long => jlLong}
-
-import scala.collection.JavaConverters._
-import scala.util.Random
-
-import test.org.apache.spark.sql.MyDoubleAvg
-import test.org.apache.spark.sql.MyDoubleSum
+import java.lang.{Double => jlDouble, Long => jlLong}
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
-import org.apache.spark.sql.catalyst.expressions.UnsafeRow
-import org.apache.spark.sql.expressions.{Aggregator}
+import org.apache.spark.sql.expressions.Aggregator
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.hive.test.TestHiveSingleton
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types._
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 5669cb757a678..f7c13ea047da7 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -23,7 +23,6 @@ import java.util.{Set => JavaSet}
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
-import scala.language.implicitConversions
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
diff --git a/streaming/src/main/scala/org/apache/spark/status/api/v1/streaming/ApiStreamingRootResource.scala b/streaming/src/main/scala/org/apache/spark/status/api/v1/streaming/ApiStreamingRootResource.scala
index a2571b910f615..99d59e4a1447a 100644
--- a/streaming/src/main/scala/org/apache/spark/status/api/v1/streaming/ApiStreamingRootResource.scala
+++ b/streaming/src/main/scala/org/apache/spark/status/api/v1/streaming/ApiStreamingRootResource.scala
@@ -23,9 +23,7 @@ import javax.ws.rs.core.MediaType
 
 import org.apache.spark.status.api.v1.NotFoundException
 import org.apache.spark.streaming.Time
-import org.apache.spark.streaming.ui.StreamingJobProgressListener
 import org.apache.spark.streaming.ui.StreamingJobProgressListener._
-import org.apache.spark.ui.SparkUI
 
 @Produces(Array(MediaType.APPLICATION_JSON))
 private[v1] class ApiStreamingRootResource extends BaseStreamingAppResource {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/State.scala b/streaming/src/main/scala/org/apache/spark/streaming/State.scala
index 734c6ef42696e..c4cd1a9dc336b 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/State.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/State.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.streaming
 
-import scala.language.implicitConversions
-
 import org.apache.spark.annotation.Experimental
 
 /**
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala
index d038021e93e73..4ac1c62822e7a 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala
@@ -21,7 +21,6 @@ import java.nio.ByteBuffer
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
-import scala.language.implicitConversions
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.conf.Configuration
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala
index 6b332206e8f6d..9d4b67bccecaf 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala
@@ -22,7 +22,6 @@ import scala.util.Random
 import org.apache.spark.{SparkConf, SparkEnv}
 import org.apache.spark.rdd.BlockRDD
 import org.apache.spark.storage.{StorageLevel, StreamBlockId}
-import org.apache.spark.streaming.StreamingConf.RECEIVER_WAL_ENABLE_CONF_KEY
 import org.apache.spark.streaming.dstream.ReceiverInputDStream
 import org.apache.spark.streaming.rdd.WriteAheadLogBackedBlockRDD
 import org.apache.spark.streaming.receiver.{BlockManagerBasedStoreResult, Receiver, WriteAheadLogBasedStoreResult}
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
index 55c2950261a07..7ce4343acbdac 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
@@ -17,11 +17,10 @@
 
 package org.apache.spark.streaming
 
-import java.io.{File, IOException, ObjectInputStream}
+import java.io.{IOException, ObjectInputStream}
 import java.util.concurrent.{ConcurrentLinkedQueue, TimeUnit}
 
 import scala.collection.JavaConverters._
-import scala.language.implicitConversions
 import scala.reflect.ClassTag
 
 import org.scalatest.BeforeAndAfterEach
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala
index cd867aa8132bc..31456b0b95b18 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala
@@ -25,7 +25,6 @@ import scala.collection.mutable
 import org.scalatest.BeforeAndAfter
 import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
 import org.scalatest.concurrent.Eventually._
-import org.scalatest.matchers.must.Matchers
 import org.scalatest.matchers.should.Matchers._
 import org.scalatest.time.SpanSugar._
 
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
index 293498ae5c37b..c2b039244d01f 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.streaming.scheduler
 
 import org.mockito.ArgumentMatchers.{any, eq => meq}
 import org.mockito.Mockito.{never, reset, times, verify, when}
-import org.scalatest.{BeforeAndAfterEach, PrivateMethodTester}
+import org.scalatest.PrivateMethodTester
 import org.scalatest.concurrent.Eventually.{eventually, timeout}
 import org.scalatest.time.SpanSugar._
 import org.scalatestplus.mockito.MockitoSugar

From 014e1fbb3aba81a803c963fc0b7f4a8d1d70e253 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Thu, 19 Nov 2020 14:01:42 +0800
Subject: [PATCH 0516/1009] [SPARK-27421][SQL] Fix filter for int column and
 value class java.lang.String when pruning partition column

### What changes were proposed in this pull request?

This pr fix filter for int column and value class java.lang.String when pruning partition column.

How to reproduce this issue:
```scala
spark.sql("CREATE table test (name STRING) partitioned by (id int) STORED AS PARQUET")
spark.sql("CREATE VIEW test_view as select cast(id as string) as id, name from test")
spark.sql("SELECT * FROM test_view WHERE id = '0'").explain
```
```
20/11/15 06:19:01 INFO audit: ugi=root ip=unknown-ip-addr cmd=get_partitions_by_filter : db=default tbl=test
20/11/15 06:19:01 INFO MetaStoreDirectSql: Unable to push down SQL filter: Cannot push down filter for int column and value class java.lang.String
20/11/15 06:19:01 ERROR SparkSQLDriver: Failed in [SELECT * FROM test_view WHERE id = '0']
java.lang.RuntimeException: Caught Hive MetaException attempting to get partition metadata by filter from Hive. You can set the Spark configuration setting spark.sql.hive.manageFilesourcePartitions to false to work around this problem, however this will result in degraded performance. Please report a bug: https://issues.apache.org/jira/browse/SPARK
 at org.apache.spark.sql.hive.client.Shim_v0_13.getPartitionsByFilter(HiveShim.scala:828)
 at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$getPartitionsByFilter$1(HiveClientImpl.scala:745)
 at org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$withHiveState$1(HiveClientImpl.scala:294)
 at org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:227)
 at org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:226)
 at org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:276)
 at org.apache.spark.sql.hive.client.HiveClientImpl.getPartitionsByFilter(HiveClientImpl.scala:743)
```

### Why are the changes needed?

Fix bug.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #30380 from wangyum/SPARK-27421.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Yuming Wang <yumwang@ebay.com>
---
 .../org/apache/spark/sql/hive/client/HiveShim.scala      | 2 +-
 .../sql/hive/client/HivePartitionFilteringSuite.scala    | 9 ++++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index 44a3a4c590934..d989f0154ea95 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -730,7 +730,7 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
       def unapply(expr: Expression): Option[Attribute] = {
         expr match {
           case attr: Attribute => Some(attr)
-          case Cast(child @ AtomicType(), dt: AtomicType, _)
+          case Cast(child @ IntegralType(), dt: IntegralType, _)
               if Cast.canUpCast(child.dataType.asInstanceOf[AtomicType], dt) => unapply(child)
           case _ => None
         }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
index daa785bf110c5..81186909bb167 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.types.{BooleanType, IntegerType, LongType, StructType}
+import org.apache.spark.sql.types.{BooleanType, IntegerType, LongType, StringType, StructType}
 import org.apache.spark.util.Utils
 
 class HivePartitionFilteringSuite(version: String)
@@ -290,6 +290,13 @@ class HivePartitionFilteringSuite(version: String)
       (20170101 to 20170103, 0 to 4, Seq("ab", "bb")) :: Nil)
   }
 
+  test("getPartitionsByFilter: chunk in ('ab', 'ba') and ((cast(ds as string)>'20170102')") {
+    val day = (20170101 to 20170103, 0 to 4, Seq("ab", "ba"))
+    testMetastorePartitionFiltering(
+      attr("chunk").in("ab", "ba") && (attr("ds").cast(StringType) > "20170102"),
+      day :: Nil)
+  }
+
   private def testMetastorePartitionFiltering(
       filterExpr: Expression,
       expectedDs: Seq[Int],

From 0b0fb70b09c7424805478a261e264d9df044fb96 Mon Sep 17 00:00:00 2001
From: Prakhar Jain <prakharjain09@gmail.com>
Date: Thu, 19 Nov 2020 06:25:37 +0000
Subject: [PATCH 0517/1009] [SPARK-33400][SQL] Normalize sameOrderExpressions
 in SortOrder to avoid unnecessary sort operations

### What changes were proposed in this pull request?
This pull request tries to normalize the SortOrder properly to prevent unnecessary sort operators. Currently the sameOrderExpressions are not normalized as part of AliasAwareOutputOrdering.

Example: consider this join of three tables:

      """
        |SELECT t2id, t3.id as t3id
        |FROM (
        |    SELECT t1.id as t1id, t2.id as t2id
        |    FROM t1, t2
        |    WHERE t1.id = t2.id
        |) t12, t3
        |WHERE t1id = t3.id
      """.

The plan for this looks like:

      *(8) Project [t2id#1059L, id#1004L AS t3id#1060L]
      +- *(8) SortMergeJoin [t2id#1059L], [id#1004L], Inner
         :- *(5) Sort [t2id#1059L ASC NULLS FIRST ], false, 0         <-----------------------------
         :  +- *(5) Project [id#1000L AS t2id#1059L]
         :     +- *(5) SortMergeJoin [id#996L], [id#1000L], Inner
         :        :- *(2) Sort [id#996L ASC NULLS FIRST ], false, 0
         :        :  +- Exchange hashpartitioning(id#996L, 5), true, [id=#1426]
         :        :     +- *(1) Range (0, 10, step=1, splits=2)
         :        +- *(4) Sort [id#1000L ASC NULLS FIRST ], false, 0
         :           +- Exchange hashpartitioning(id#1000L, 5), true, [id=#1432]
         :              +- *(3) Range (0, 20, step=1, splits=2)
         +- *(7) Sort [id#1004L ASC NULLS FIRST ], false, 0
            +- Exchange hashpartitioning(id#1004L, 5), true, [id=#1443]
               +- *(6) Range (0, 30, step=1, splits=2)

In this plan, the marked sort node could have been avoided as the data is already sorted on "t2.id" by the lower SortMergeJoin.

### Why are the changes needed?
To remove unneeded Sort operators.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
New UT added.

Closes #30302 from prakharjain09/SPARK-33400-sortorder.

Authored-by: Prakhar Jain <prakharjain09@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../AliasAwareOutputExpression.scala          |  6 +++-
 .../spark/sql/execution/PlannerSuite.scala    | 31 +++++++++++++++++++
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala
index 3cbe1654ea2cd..3ba8745be995f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala
@@ -65,7 +65,11 @@ trait AliasAwareOutputOrdering extends AliasAwareOutputExpression {
 
   final override def outputOrdering: Seq[SortOrder] = {
     if (hasAlias) {
-      orderingExpressions.map(normalizeExpression(_).asInstanceOf[SortOrder])
+      orderingExpressions.map { sortOrder =>
+        val newSortOrder = normalizeExpression(sortOrder).asInstanceOf[SortOrder]
+        val newSameOrderExpressions = newSortOrder.sameOrderExpressions.map(normalizeExpression)
+        newSortOrder.copy(sameOrderExpressions = newSameOrderExpressions)
+      }
     } else {
       orderingExpressions
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index b631f08405a39..6de81cc414d7d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -1059,6 +1059,37 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
     }
   }
 
+  test("SPARK-33400: Normalization of sortOrder should take care of sameOrderExprs") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      withTempView("t1", "t2", "t3") {
+        spark.range(10).repartition($"id").createTempView("t1")
+        spark.range(20).repartition($"id").createTempView("t2")
+        spark.range(30).repartition($"id").createTempView("t3")
+        val planned = sql(
+          """
+            |SELECT t2id, t3.id as t3id
+            |FROM (
+            |    SELECT t1.id as t1id, t2.id as t2id
+            |    FROM t1, t2
+            |    WHERE t1.id = t2.id
+            |) t12, t3
+            |WHERE t2id = t3.id
+          """.stripMargin).queryExecution.executedPlan
+
+        val sortNodes = planned.collect { case s: SortExec => s }
+        assert(sortNodes.size == 3)
+
+        val projects = planned.collect { case p: ProjectExec => p }
+        assert(projects.exists(_.outputOrdering match {
+          case Seq(SortOrder(_, Ascending, NullsFirst, sameOrderExprs)) =>
+            sameOrderExprs.size == 1 && sameOrderExprs.head.isInstanceOf[AttributeReference] &&
+              sameOrderExprs.head.asInstanceOf[AttributeReference].name == "t2id"
+          case _ => false
+        }))
+      }
+    }
+  }
+
   test("aliases to expressions should not be replaced") {
     withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
       withTempView("df1", "df2") {

From d5e7bd0cc497a5ea2baab0046501a804e7d42aa4 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Wed, 18 Nov 2020 23:59:11 -0800
Subject: [PATCH 0518/1009] [SPARK-33483][INFRA][TESTS] Fix rat exclusion
 patterns and add a LICENSE

### What changes were proposed in this pull request?

This PR fixes the RAT exclusion rule which was originated from SPARK-1144 (Apache Spark 1.0)

### Why are the changes needed?

This prevents the situation like https://github.com/apache/spark/pull/30415.

Currently, it missed `catalog` directory due to `.log` rule.
```
$ dev/check-license
Could not find Apache license headers in the following files:
 !????? /Users/dongjoon/APACHE/spark-merge/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java
 !????? /Users/dongjoon/APACHE/spark-merge/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CI with the new rule.

Closes #30418 from dongjoon-hyun/SPARK-RAT.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/.rat-excludes                             | 28 ++++++++++++-------
 .../resources/data/scripts/test_transform.py  | 18 ++++++++++++
 2 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/dev/.rat-excludes b/dev/.rat-excludes
index 0e892a927906a..7da330dfe1fbf 100644
--- a/dev/.rat-excludes
+++ b/dev/.rat-excludes
@@ -42,11 +42,11 @@ jquery.dataTables.1.10.20.min.js
 jquery.mustache.js
 jsonFormatter.min.css
 jsonFormatter.min.js
-.*avsc
-.*txt
-.*json
-.*data
-.*log
+.*\.avsc
+.*\.txt
+.*\.json
+.*\.data
+.*\.log
 pyspark-coverage-site/*
 cloudpickle/*
 join.py
@@ -98,17 +98,17 @@ local-1430917381535_2
 DESCRIPTION
 NAMESPACE
 test_support/*
-.*Rd
+.*\.Rd
 help/*
 html/*
 INDEX
 .lintr
 gen-java.*
-.*avpr
-.*parquet
+.*\.avpr
+.*\.parquet
 spark-deps-.*
-.*csv
-.*tsv
+.*\.csv
+.*\.tsv
 .*\.sql
 .Rbuildignore
 META-INF/*
@@ -125,3 +125,11 @@ application_1578436911597_0052
 config.properties
 app-20200706201101-0003
 py.typed
+_metadata
+_SUCCESS
+part-00000
+.*\.res
+flights_tiny.txt.1
+over1k
+over10k
+exported_table/*
diff --git a/sql/hive/src/test/resources/data/scripts/test_transform.py b/sql/hive/src/test/resources/data/scripts/test_transform.py
index ac6d11d8b919c..dedb370f6c90e 100755
--- a/sql/hive/src/test/resources/data/scripts/test_transform.py
+++ b/sql/hive/src/test/resources/data/scripts/test_transform.py
@@ -1,3 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
 import sys
 
 delim = sys.argv[1]

From ef2638c3e3aa1d2ce137f1c50c9697a7877d1719 Mon Sep 17 00:00:00 2001
From: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com>
Date: Thu, 19 Nov 2020 00:12:22 -0800
Subject: [PATCH 0519/1009] [SPARK-33183][SQL][FOLLOW-UP] Update rule
 RemoveRedundantSorts config version

### What changes were proposed in this pull request?
This PR is a follow up for #30093 to updates the config `spark.sql.execution.removeRedundantSorts` version to 2.4.8.

### Why are the changes needed?
To update the rule version it has been backported to 2.4. #30194

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
N/A

Closes #30420 from allisonwang-db/spark-33183-follow-up.

Authored-by: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 5c17f0434bc79..43014feecfd8e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1277,7 +1277,7 @@ object SQLConf {
   val REMOVE_REDUNDANT_SORTS_ENABLED = buildConf("spark.sql.execution.removeRedundantSorts")
     .internal()
     .doc("Whether to remove redundant physical sort node")
-    .version("3.1.0")
+    .version("2.4.8")
     .booleanConf
     .createWithDefault(true)
 

From a03c540cf7fe92160caf41ef6d2e2993f667dc59 Mon Sep 17 00:00:00 2001
From: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com>
Date: Thu, 19 Nov 2020 13:29:01 +0000
Subject: [PATCH 0520/1009] [SPARK-33472][SQL] Adjust RemoveRedundantSorts rule
 order

### What changes were proposed in this pull request?

This PR switched the order for the rule `RemoveRedundantSorts` and `EnsureRequirements` so that `EnsureRequirements` will be invoked before `RemoveRedundantSorts` to avoid IllegalArgumentException when instantiating PartitioningCollection.

### Why are the changes needed?
`RemoveRedundantSorts` rule uses SparkPlan's `outputPartitioning` to check whether a sort node is redundant. Currently, it is added before `EnsureRequirements`. Since `PartitioningCollection` requires left and right partitioning to have the same number of partitions, which is not necessarily true before applying `EnsureRequirements`, the rule can fail with the following exception:
```
IllegalArgumentException: requirement failed: PartitioningCollection requires all of its partitionings have the same numPartitions.
```

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Unit test

Closes #30373 from allisonwang-db/sort-follow-up.

Authored-by: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/QueryExecution.scala  |  4 ++-
 .../spark/sql/execution/SparkPlan.scala       |  7 ++++-
 .../adaptive/AdaptiveSparkPlanExec.scala      |  2 +-
 .../execution/RemoveRedundantSortsSuite.scala | 28 +++++++++++++++++++
 4 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index 77f7a4e553f06..040d1f36ed8a5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -343,8 +343,10 @@ object QueryExecution {
       PlanDynamicPruningFilters(sparkSession),
       PlanSubqueries(sparkSession),
       RemoveRedundantProjects,
-      RemoveRedundantSorts,
       EnsureRequirements,
+      // `RemoveRedundantSorts` needs to be added before `EnsureRequirements` to guarantee the same
+      // number of partitions when instantiating PartitioningCollection.
+      RemoveRedundantSorts,
       DisableUnnecessaryBucketedScan,
       ApplyColumnarRulesAndInsertTransitions(sparkSession.sessionState.columnarRules),
       CollapseCodegenStages(),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
index ead8c00031112..062aa69b3adb3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -135,7 +135,12 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
   def longMetric(name: String): SQLMetric = metrics(name)
 
   // TODO: Move to `DistributedPlan`
-  /** Specifies how data is partitioned across different nodes in the cluster. */
+  /**
+   * Specifies how data is partitioned across different nodes in the cluster.
+   * Note this method may fail if it is invoked before `EnsureRequirements` is applied
+   * since `PartitioningCollection` requires all its partitionings to have
+   * the same number of partitions.
+   */
   def outputPartitioning: Partitioning = UnknownPartitioning(0) // TODO: WRONG WIDTH!
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index 0865e42b440db..570edbf5f78a3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -88,8 +88,8 @@ case class AdaptiveSparkPlanExec(
   // Exchange nodes) after running these rules.
   private def queryStagePreparationRules: Seq[Rule[SparkPlan]] = Seq(
     RemoveRedundantProjects,
-    RemoveRedundantSorts,
     EnsureRequirements,
+    RemoveRedundantSorts,
     DisableUnnecessaryBucketedScan
   ) ++ context.session.sessionState.queryStagePrepRules
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala
index 54c5a33441900..751078d08fda9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala
@@ -18,7 +18,9 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.sql.{DataFrame, QueryTest}
+import org.apache.spark.sql.catalyst.plans.physical.{RangePartitioning, UnknownPartitioning}
 import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, DisableAdaptiveExecutionSuite, EnableAdaptiveExecutionSuite}
+import org.apache.spark.sql.execution.joins.ShuffledJoin
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 
@@ -135,6 +137,32 @@ abstract class RemoveRedundantSortsSuiteBase
       }
     }
   }
+
+  test("SPARK-33472: shuffled join with different left and right side partition numbers") {
+    withTempView("t1", "t2") {
+      spark.range(0, 100, 1, 2).select('id as "key").createOrReplaceTempView("t1")
+      (0 to 100).toDF("key").createOrReplaceTempView("t2")
+
+      val queryTemplate = """
+        |SELECT /*+ %s(t1) */ t1.key
+        |FROM t1 JOIN t2 ON t1.key = t2.key
+        |WHERE t1.key > 10 AND t2.key < 50
+        |ORDER BY t1.key ASC
+      """.stripMargin
+
+      Seq(("MERGE", 3), ("SHUFFLE_HASH", 1)).foreach { case (hint, count) =>
+        val query = queryTemplate.format(hint)
+        val df = sql(query)
+        val sparkPlan = df.queryExecution.sparkPlan
+        val join = sparkPlan.collect { case j: ShuffledJoin => j }.head
+        val leftPartitioning = join.left.outputPartitioning
+        assert(leftPartitioning.isInstanceOf[RangePartitioning])
+        assert(leftPartitioning.numPartitions == 2)
+        assert(join.right.outputPartitioning == UnknownPartitioning(0))
+        checkSorts(query, count, count)
+      }
+    }
+  }
 }
 
 class RemoveRedundantSortsSuite extends RemoveRedundantSortsSuiteBase

From 21b13506cd822ed7db343bff4ca25d9555178f10 Mon Sep 17 00:00:00 2001
From: ulysses <youxiduo@weidian.com>
Date: Thu, 19 Nov 2020 13:31:10 +0000
Subject: [PATCH 0521/1009] [SPARK-33442][SQL] Change Combine Limit to
 Eliminate limit using max row

### What changes were proposed in this pull request?

Change `CombineLimits` name to `EliminateLimits` and add check if `Limit` child max row <= limit.

### Why are the changes needed?

In Add-hoc scene, we always add limit for the query if user have no special limit value, but not all limit is nesessary.

A general negative example is
```
select count(*) from t limit 100000;
```

It will be great if we can eliminate limit at Spark side.

Also, we make a benchmark for this case
```
runBenchmark("Sort and Limit") {
  val N = 100000
  val benchmark = new Benchmark("benchmark sort and limit", N)

  benchmark.addCase("TakeOrderedAndProject", 3) { _ =>
    spark.range(N).toDF("c").repartition(200).sort("c").take(200000)
  }

  benchmark.addCase("Sort And Limit", 3) { _ =>
    withSQLConf("spark.sql.execution.topKSortFallbackThreshold" -> "-1") {
      spark.range(N).toDF("c").repartition(200).sort("c").take(200000)
    }
  }

  benchmark.addCase("Sort", 3) { _ =>
    spark.range(N).toDF("c").repartition(200).sort("c").collect()
  }
  benchmark.run()
}
```

and the result is
```
Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Mac OS X 10.15.6
Intel(R) Core(TM) i5-5257U CPU  2.70GHz
benchmark sort and limit:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
TakeOrderedAndProject                              1833           2259         382          0.1       18327.1       1.0X
Sort And Limit                                     1417           1658         285          0.1       14167.5       1.3X
Sort                                               1324           1484         225          0.1       13238.3       1.4X
```

It shows that it makes sense to replace `TakeOrderedAndProjectExec` with `Sort + Project`.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Add test.

Closes #30368 from ulysses-you/SPARK-33442.

Authored-by: ulysses <youxiduo@weidian.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/optimizer/Optimizer.scala    |  19 +-
 .../optimizer/CombiningLimitsSuite.scala      |  31 +-
 .../optimizer/LimitPushdownSuite.scala        |   4 +-
 .../approved-plans-v1_4/q16.sf100/explain.txt |   6 +-
 .../q16.sf100/simplified.txt                  |   4 +-
 .../approved-plans-v1_4/q16/explain.txt       |   6 +-
 .../approved-plans-v1_4/q16/simplified.txt    |   4 +-
 .../q23a.sf100/explain.txt                    | 303 +++++++-------
 .../q23a.sf100/simplified.txt                 | 381 +++++++++---------
 .../approved-plans-v1_4/q23a/explain.txt      | 239 ++++++-----
 .../approved-plans-v1_4/q23a/simplified.txt   | 273 +++++++------
 .../approved-plans-v1_4/q38.sf100/explain.txt | 139 +++----
 .../q38.sf100/simplified.txt                  | 189 +++++----
 .../approved-plans-v1_4/q38/explain.txt       | 113 +++---
 .../approved-plans-v1_4/q38/simplified.txt    | 125 +++---
 .../approved-plans-v1_4/q92.sf100/explain.txt |   6 +-
 .../q92.sf100/simplified.txt                  |   4 +-
 .../approved-plans-v1_4/q92/explain.txt       |   6 +-
 .../approved-plans-v1_4/q92/simplified.txt    |   4 +-
 .../approved-plans-v1_4/q94.sf100/explain.txt |   6 +-
 .../q94.sf100/simplified.txt                  |   4 +-
 .../approved-plans-v1_4/q94/explain.txt       |   6 +-
 .../approved-plans-v1_4/q94/simplified.txt    |   4 +-
 .../approved-plans-v1_4/q95.sf100/explain.txt |   6 +-
 .../q95.sf100/simplified.txt                  |   4 +-
 .../approved-plans-v1_4/q95/explain.txt       |   6 +-
 .../approved-plans-v1_4/q95/simplified.txt    |   4 +-
 .../approved-plans-v1_4/q96.sf100/explain.txt |   6 +-
 .../q96.sf100/simplified.txt                  |   4 +-
 .../approved-plans-v1_4/q96/explain.txt       |   6 +-
 .../approved-plans-v1_4/q96/simplified.txt    |   4 +-
 .../approved-plans-v1_4/q97.sf100/explain.txt |  63 ++-
 .../q97.sf100/simplified.txt                  |  91 +++--
 .../approved-plans-v1_4/q97/explain.txt       |  63 ++-
 .../approved-plans-v1_4/q97/simplified.txt    |  91 +++--
 .../spark/sql/streaming/StreamSuite.scala     |   2 +-
 36 files changed, 1113 insertions(+), 1113 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 86c46e072c887..c4b9936fa4c4f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -85,7 +85,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
         OptimizeWindowFunctions,
         CollapseWindow,
         CombineFilters,
-        CombineLimits,
+        EliminateLimits,
         CombineUnions,
         // Constant folding and strength reduction
         TransposeWindow,
@@ -1451,11 +1451,20 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
 }
 
 /**
- * Combines two adjacent [[Limit]] operators into one, merging the
- * expressions into one single expression.
+ * This rule optimizes Limit operators by:
+ * 1. Eliminate [[Limit]] operators if it's child max row <= limit.
+ * 2. Combines two adjacent [[Limit]] operators into one, merging the
+ *    expressions into one single expression.
  */
-object CombineLimits extends Rule[LogicalPlan] {
-  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+object EliminateLimits extends Rule[LogicalPlan] {
+  private def canEliminate(limitExpr: Expression, child: LogicalPlan): Boolean = {
+    limitExpr.foldable && child.maxRows.exists { _ <= limitExpr.eval().asInstanceOf[Int] }
+  }
+
+  def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
+    case Limit(l, child) if canEliminate(l, child) =>
+      child
+
     case GlobalLimit(le, GlobalLimit(ne, grandChild)) =>
       GlobalLimit(Least(Seq(ne, le)), grandChild)
     case LocalLimit(le, LocalLimit(ne, grandChild)) =>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
index b190dd5a7c220..70f130f834c68 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
@@ -30,8 +30,8 @@ class CombiningLimitsSuite extends PlanTest {
       Batch("Column Pruning", FixedPoint(100),
         ColumnPruning,
         RemoveNoopOperators) ::
-      Batch("Combine Limit", FixedPoint(10),
-        CombineLimits) ::
+      Batch("Eliminate Limit", FixedPoint(10),
+        EliminateLimits) ::
       Batch("Constant Folding", FixedPoint(10),
         NullPropagation,
         ConstantFolding,
@@ -90,4 +90,31 @@ class CombiningLimitsSuite extends PlanTest {
 
     comparePlans(optimized, correctAnswer)
   }
+
+  test("SPARK-33442: Change Combine Limit to Eliminate limit using max row") {
+    // test child max row <= limit.
+    val query1 = testRelation.select().groupBy()(count(1)).limit(1).analyze
+    val optimized1 = Optimize.execute(query1)
+    val expected1 = testRelation.select().groupBy()(count(1)).analyze
+    comparePlans(optimized1, expected1)
+
+    // test child max row > limit.
+    val query2 = testRelation.select().groupBy()(count(1)).limit(0).analyze
+    val optimized2 = Optimize.execute(query2)
+    comparePlans(optimized2, query2)
+
+    // test child max row is none
+    val query3 = testRelation.select(Symbol("a")).limit(1).analyze
+    val optimized3 = Optimize.execute(query3)
+    comparePlans(optimized3, query3)
+
+    // test sort after limit
+    val query4 = testRelation.select().groupBy()(count(1))
+      .orderBy(count(1).asc).limit(1).analyze
+    val optimized4 = Optimize.execute(query4)
+    // the top project has been removed, so we need optimize expected too
+    val expected4 = Optimize.execute(
+      testRelation.select().groupBy()(count(1)).orderBy(count(1).asc).analyze)
+    comparePlans(optimized4, expected4)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala
index d993aee3d7518..e365e3300096e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala
@@ -33,7 +33,7 @@ class LimitPushdownSuite extends PlanTest {
         EliminateSubqueryAliases) ::
       Batch("Limit pushdown", FixedPoint(100),
         LimitPushDown,
-        CombineLimits,
+        EliminateLimits,
         ConstantFolding,
         BooleanSimplification) :: Nil
   }
@@ -74,7 +74,7 @@ class LimitPushdownSuite extends PlanTest {
       Union(testRelation.limit(1), testRelation2.select('d, 'e, 'f).limit(1)).limit(2)
     val unionOptimized = Optimize.execute(unionQuery.analyze)
     val unionCorrectAnswer =
-      Limit(2, Union(testRelation.limit(1), testRelation2.select('d, 'e, 'f).limit(1))).analyze
+      Union(testRelation.limit(1), testRelation2.select('d, 'e, 'f).limit(1)).analyze
     comparePlans(unionOptimized, unionCorrectAnswer)
   }
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt
index 509fb0133095b..a446163e3d29d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt
@@ -1,5 +1,5 @@
 == Physical Plan ==
-TakeOrderedAndProject (44)
+* Sort (44)
 +- * HashAggregate (43)
    +- Exchange (42)
       +- * HashAggregate (41)
@@ -244,7 +244,7 @@ Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net
 Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#23, sum(UnscaledValue(cs_net_profit#7))#24, count(cs_order_number#5)#27]
 Results [3]: [count(cs_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#23,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#24,17,2) AS total net profit #32]
 
-(44) TakeOrderedAndProject
+(44) Sort [codegen id : 12]
 Input [3]: [order count #30, total shipping cost #31, total net profit #32]
-Arguments: 100, [order count #30 ASC NULLS FIRST], [order count #30, total shipping cost #31, total net profit #32]
+Arguments: [order count #30 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt
index ea9a0b27ff700..73a9b58010f58 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt
@@ -1,5 +1,5 @@
-TakeOrderedAndProject [order count ,total shipping cost ,total net profit ]
-  WholeStageCodegen (12)
+WholeStageCodegen (12)
+  Sort [order count ]
     HashAggregate [sum,sum,count] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count]
       InputAdapter
         Exchange #1
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt
index 2ae939cfe41f3..ea7e298393e4c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt
@@ -1,5 +1,5 @@
 == Physical Plan ==
-TakeOrderedAndProject (41)
+* Sort (41)
 +- * HashAggregate (40)
    +- Exchange (39)
       +- * HashAggregate (38)
@@ -229,7 +229,7 @@ Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net
 Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#22, sum(UnscaledValue(cs_net_profit#7))#23, count(cs_order_number#5)#27]
 Results [3]: [count(cs_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#22,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#23,17,2) AS total net profit #32]
 
-(41) TakeOrderedAndProject
+(41) Sort [codegen id : 8]
 Input [3]: [order count #30, total shipping cost #31, total net profit #32]
-Arguments: 100, [order count #30 ASC NULLS FIRST], [order count #30, total shipping cost #31, total net profit #32]
+Arguments: [order count #30 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt
index a044b05365f8e..169f07c2d85e5 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt
@@ -1,5 +1,5 @@
-TakeOrderedAndProject [order count ,total shipping cost ,total net profit ]
-  WholeStageCodegen (8)
+WholeStageCodegen (8)
+  Sort [order count ]
     HashAggregate [sum,sum,count] [sum(UnscaledValue(cs_ext_ship_cost)),sum(UnscaledValue(cs_net_profit)),count(cs_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count]
       InputAdapter
         Exchange #1
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt
index bda9824b71b5a..85f71b6cd9388 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt
@@ -1,104 +1,103 @@
 == Physical Plan ==
-CollectLimit (100)
-+- * HashAggregate (99)
-   +- Exchange (98)
-      +- * HashAggregate (97)
-         +- Union (96)
-            :- * Project (59)
-            :  +- * BroadcastHashJoin Inner BuildRight (58)
-            :     :- * Project (52)
-            :     :  +- SortMergeJoin LeftSemi (51)
-            :     :     :- * Sort (33)
-            :     :     :  +- Exchange (32)
-            :     :     :     +- * Project (31)
-            :     :     :        +- SortMergeJoin LeftSemi (30)
-            :     :     :           :- * Sort (5)
-            :     :     :           :  +- Exchange (4)
-            :     :     :           :     +- * Filter (3)
-            :     :     :           :        +- * ColumnarToRow (2)
-            :     :     :           :           +- Scan parquet default.catalog_sales (1)
-            :     :     :           +- * Sort (29)
-            :     :     :              +- * Project (28)
-            :     :     :                 +- * Filter (27)
-            :     :     :                    +- * HashAggregate (26)
-            :     :     :                       +- * HashAggregate (25)
-            :     :     :                          +- * Project (24)
-            :     :     :                             +- * SortMergeJoin Inner (23)
-            :     :     :                                :- * Sort (17)
-            :     :     :                                :  +- Exchange (16)
-            :     :     :                                :     +- * Project (15)
-            :     :     :                                :        +- * BroadcastHashJoin Inner BuildRight (14)
-            :     :     :                                :           :- * Filter (8)
-            :     :     :                                :           :  +- * ColumnarToRow (7)
-            :     :     :                                :           :     +- Scan parquet default.store_sales (6)
-            :     :     :                                :           +- BroadcastExchange (13)
-            :     :     :                                :              +- * Project (12)
-            :     :     :                                :                 +- * Filter (11)
-            :     :     :                                :                    +- * ColumnarToRow (10)
-            :     :     :                                :                       +- Scan parquet default.date_dim (9)
-            :     :     :                                +- * Sort (22)
-            :     :     :                                   +- Exchange (21)
-            :     :     :                                      +- * Filter (20)
-            :     :     :                                         +- * ColumnarToRow (19)
-            :     :     :                                            +- Scan parquet default.item (18)
-            :     :     +- * Sort (50)
-            :     :        +- * Project (49)
-            :     :           +- * Filter (48)
-            :     :              +- * HashAggregate (47)
-            :     :                 +- * HashAggregate (46)
-            :     :                    +- * Project (45)
-            :     :                       +- * SortMergeJoin Inner (44)
-            :     :                          :- * Sort (38)
-            :     :                          :  +- Exchange (37)
-            :     :                          :     +- * Filter (36)
-            :     :                          :        +- * ColumnarToRow (35)
-            :     :                          :           +- Scan parquet default.store_sales (34)
-            :     :                          +- * Sort (43)
-            :     :                             +- Exchange (42)
-            :     :                                +- * Filter (41)
-            :     :                                   +- * ColumnarToRow (40)
-            :     :                                      +- Scan parquet default.customer (39)
-            :     +- BroadcastExchange (57)
-            :        +- * Project (56)
-            :           +- * Filter (55)
-            :              +- * ColumnarToRow (54)
-            :                 +- Scan parquet default.date_dim (53)
-            +- * Project (95)
-               +- * BroadcastHashJoin Inner BuildRight (94)
-                  :- * Project (92)
-                  :  +- SortMergeJoin LeftSemi (91)
-                  :     :- * Sort (79)
-                  :     :  +- Exchange (78)
-                  :     :     +- * Project (77)
-                  :     :        +- SortMergeJoin LeftSemi (76)
-                  :     :           :- * Sort (64)
-                  :     :           :  +- Exchange (63)
-                  :     :           :     +- * Filter (62)
-                  :     :           :        +- * ColumnarToRow (61)
-                  :     :           :           +- Scan parquet default.web_sales (60)
-                  :     :           +- * Sort (75)
-                  :     :              +- * Project (74)
-                  :     :                 +- * Filter (73)
-                  :     :                    +- * HashAggregate (72)
-                  :     :                       +- * HashAggregate (71)
-                  :     :                          +- * Project (70)
-                  :     :                             +- * SortMergeJoin Inner (69)
-                  :     :                                :- * Sort (66)
-                  :     :                                :  +- ReusedExchange (65)
-                  :     :                                +- * Sort (68)
-                  :     :                                   +- ReusedExchange (67)
-                  :     +- * Sort (90)
-                  :        +- * Project (89)
-                  :           +- * Filter (88)
-                  :              +- * HashAggregate (87)
-                  :                 +- * HashAggregate (86)
-                  :                    +- * Project (85)
-                  :                       +- * SortMergeJoin Inner (84)
-                  :                          :- * Sort (81)
-                  :                          :  +- ReusedExchange (80)
-                  :                          +- * Sort (83)
-                  :                             +- ReusedExchange (82)
-                  +- ReusedExchange (93)
+* HashAggregate (99)
++- Exchange (98)
+   +- * HashAggregate (97)
+      +- Union (96)
+         :- * Project (59)
+         :  +- * BroadcastHashJoin Inner BuildRight (58)
+         :     :- * Project (52)
+         :     :  +- SortMergeJoin LeftSemi (51)
+         :     :     :- * Sort (33)
+         :     :     :  +- Exchange (32)
+         :     :     :     +- * Project (31)
+         :     :     :        +- SortMergeJoin LeftSemi (30)
+         :     :     :           :- * Sort (5)
+         :     :     :           :  +- Exchange (4)
+         :     :     :           :     +- * Filter (3)
+         :     :     :           :        +- * ColumnarToRow (2)
+         :     :     :           :           +- Scan parquet default.catalog_sales (1)
+         :     :     :           +- * Sort (29)
+         :     :     :              +- * Project (28)
+         :     :     :                 +- * Filter (27)
+         :     :     :                    +- * HashAggregate (26)
+         :     :     :                       +- * HashAggregate (25)
+         :     :     :                          +- * Project (24)
+         :     :     :                             +- * SortMergeJoin Inner (23)
+         :     :     :                                :- * Sort (17)
+         :     :     :                                :  +- Exchange (16)
+         :     :     :                                :     +- * Project (15)
+         :     :     :                                :        +- * BroadcastHashJoin Inner BuildRight (14)
+         :     :     :                                :           :- * Filter (8)
+         :     :     :                                :           :  +- * ColumnarToRow (7)
+         :     :     :                                :           :     +- Scan parquet default.store_sales (6)
+         :     :     :                                :           +- BroadcastExchange (13)
+         :     :     :                                :              +- * Project (12)
+         :     :     :                                :                 +- * Filter (11)
+         :     :     :                                :                    +- * ColumnarToRow (10)
+         :     :     :                                :                       +- Scan parquet default.date_dim (9)
+         :     :     :                                +- * Sort (22)
+         :     :     :                                   +- Exchange (21)
+         :     :     :                                      +- * Filter (20)
+         :     :     :                                         +- * ColumnarToRow (19)
+         :     :     :                                            +- Scan parquet default.item (18)
+         :     :     +- * Sort (50)
+         :     :        +- * Project (49)
+         :     :           +- * Filter (48)
+         :     :              +- * HashAggregate (47)
+         :     :                 +- * HashAggregate (46)
+         :     :                    +- * Project (45)
+         :     :                       +- * SortMergeJoin Inner (44)
+         :     :                          :- * Sort (38)
+         :     :                          :  +- Exchange (37)
+         :     :                          :     +- * Filter (36)
+         :     :                          :        +- * ColumnarToRow (35)
+         :     :                          :           +- Scan parquet default.store_sales (34)
+         :     :                          +- * Sort (43)
+         :     :                             +- Exchange (42)
+         :     :                                +- * Filter (41)
+         :     :                                   +- * ColumnarToRow (40)
+         :     :                                      +- Scan parquet default.customer (39)
+         :     +- BroadcastExchange (57)
+         :        +- * Project (56)
+         :           +- * Filter (55)
+         :              +- * ColumnarToRow (54)
+         :                 +- Scan parquet default.date_dim (53)
+         +- * Project (95)
+            +- * BroadcastHashJoin Inner BuildRight (94)
+               :- * Project (92)
+               :  +- SortMergeJoin LeftSemi (91)
+               :     :- * Sort (79)
+               :     :  +- Exchange (78)
+               :     :     +- * Project (77)
+               :     :        +- SortMergeJoin LeftSemi (76)
+               :     :           :- * Sort (64)
+               :     :           :  +- Exchange (63)
+               :     :           :     +- * Filter (62)
+               :     :           :        +- * ColumnarToRow (61)
+               :     :           :           +- Scan parquet default.web_sales (60)
+               :     :           +- * Sort (75)
+               :     :              +- * Project (74)
+               :     :                 +- * Filter (73)
+               :     :                    +- * HashAggregate (72)
+               :     :                       +- * HashAggregate (71)
+               :     :                          +- * Project (70)
+               :     :                             +- * SortMergeJoin Inner (69)
+               :     :                                :- * Sort (66)
+               :     :                                :  +- ReusedExchange (65)
+               :     :                                +- * Sort (68)
+               :     :                                   +- ReusedExchange (67)
+               :     +- * Sort (90)
+               :        +- * Project (89)
+               :           +- * Filter (88)
+               :              +- * HashAggregate (87)
+               :                 +- * HashAggregate (86)
+               :                    +- * Project (85)
+               :                       +- * SortMergeJoin Inner (84)
+               :                          :- * Sort (81)
+               :                          :  +- ReusedExchange (80)
+               :                          +- * Sort (83)
+               :                             +- ReusedExchange (82)
+               +- ReusedExchange (93)
 
 
 (1) Scan parquet default.catalog_sales
@@ -547,149 +546,145 @@ Functions [1]: [sum(sales#40)]
 Aggregate Attributes [1]: [sum(sales#40)#65]
 Results [1]: [sum(sales#40)#65 AS sum(sales)#66]
 
-(100) CollectLimit
-Input [1]: [sum(sales)#66]
-Arguments: 100
-
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 48 Hosting Expression = Subquery scalar-subquery#36, [id=#37]
-* HashAggregate (124)
-+- Exchange (123)
-   +- * HashAggregate (122)
-      +- * HashAggregate (121)
-         +- * HashAggregate (120)
-            +- * Project (119)
-               +- * SortMergeJoin Inner (118)
-                  :- * Sort (112)
-                  :  +- Exchange (111)
-                  :     +- * Project (110)
-                  :        +- * BroadcastHashJoin Inner BuildRight (109)
-                  :           :- * Filter (103)
-                  :           :  +- * ColumnarToRow (102)
-                  :           :     +- Scan parquet default.store_sales (101)
-                  :           +- BroadcastExchange (108)
-                  :              +- * Project (107)
-                  :                 +- * Filter (106)
-                  :                    +- * ColumnarToRow (105)
-                  :                       +- Scan parquet default.date_dim (104)
-                  +- * Sort (117)
-                     +- Exchange (116)
-                        +- * Filter (115)
-                           +- * ColumnarToRow (114)
-                              +- Scan parquet default.customer (113)
-
-
-(101) Scan parquet default.store_sales
+* HashAggregate (123)
++- Exchange (122)
+   +- * HashAggregate (121)
+      +- * HashAggregate (120)
+         +- * HashAggregate (119)
+            +- * Project (118)
+               +- * SortMergeJoin Inner (117)
+                  :- * Sort (111)
+                  :  +- Exchange (110)
+                  :     +- * Project (109)
+                  :        +- * BroadcastHashJoin Inner BuildRight (108)
+                  :           :- * Filter (102)
+                  :           :  +- * ColumnarToRow (101)
+                  :           :     +- Scan parquet default.store_sales (100)
+                  :           +- BroadcastExchange (107)
+                  :              +- * Project (106)
+                  :                 +- * Filter (105)
+                  :                    +- * ColumnarToRow (104)
+                  :                       +- Scan parquet default.date_dim (103)
+                  +- * Sort (116)
+                     +- Exchange (115)
+                        +- * Filter (114)
+                           +- * ColumnarToRow (113)
+                              +- Scan parquet default.customer (112)
+
+
+(100) Scan parquet default.store_sales
 Output [4]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
 PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)]
 ReadSchema: struct<ss_sold_date_sk:int,ss_customer_sk:int,ss_quantity:int,ss_sales_price:decimal(7,2)>
 
-(102) ColumnarToRow [codegen id : 2]
+(101) ColumnarToRow [codegen id : 2]
 Input [4]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 
-(103) Filter [codegen id : 2]
+(102) Filter [codegen id : 2]
 Input [4]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 Condition : (isnotnull(ss_customer_sk#24) AND isnotnull(ss_sold_date_sk#7))
 
-(104) Scan parquet default.date_dim
+(103) Scan parquet default.date_dim
 Output [2]: [d_date_sk#9, d_year#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(105) ColumnarToRow [codegen id : 1]
+(104) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#9, d_year#11]
 
-(106) Filter [codegen id : 1]
+(105) Filter [codegen id : 1]
 Input [2]: [d_date_sk#9, d_year#11]
 Condition : (d_year#11 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#9))
 
-(107) Project [codegen id : 1]
+(106) Project [codegen id : 1]
 Output [1]: [d_date_sk#9]
 Input [2]: [d_date_sk#9, d_year#11]
 
-(108) BroadcastExchange
+(107) BroadcastExchange
 Input [1]: [d_date_sk#9]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#67]
 
-(109) BroadcastHashJoin [codegen id : 2]
+(108) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#7]
 Right keys [1]: [d_date_sk#9]
 Join condition: None
 
-(110) Project [codegen id : 2]
+(109) Project [codegen id : 2]
 Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 Input [5]: [ss_sold_date_sk#7, ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, d_date_sk#9]
 
-(111) Exchange
+(110) Exchange
 Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 Arguments: hashpartitioning(ss_customer_sk#24, 5), true, [id=#68]
 
-(112) Sort [codegen id : 3]
+(111) Sort [codegen id : 3]
 Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26]
 Arguments: [ss_customer_sk#24 ASC NULLS FIRST], false, 0
 
-(113) Scan parquet default.customer
+(112) Scan parquet default.customer
 Output [1]: [c_customer_sk#28]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [IsNotNull(c_customer_sk)]
 ReadSchema: struct<c_customer_sk:int>
 
-(114) ColumnarToRow [codegen id : 4]
+(113) ColumnarToRow [codegen id : 4]
 Input [1]: [c_customer_sk#28]
 
-(115) Filter [codegen id : 4]
+(114) Filter [codegen id : 4]
 Input [1]: [c_customer_sk#28]
 Condition : isnotnull(c_customer_sk#28)
 
-(116) Exchange
+(115) Exchange
 Input [1]: [c_customer_sk#28]
 Arguments: hashpartitioning(c_customer_sk#28, 5), true, [id=#69]
 
-(117) Sort [codegen id : 5]
+(116) Sort [codegen id : 5]
 Input [1]: [c_customer_sk#28]
 Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
 
-(118) SortMergeJoin [codegen id : 6]
+(117) SortMergeJoin [codegen id : 6]
 Left keys [1]: [ss_customer_sk#24]
 Right keys [1]: [c_customer_sk#28]
 Join condition: None
 
-(119) Project [codegen id : 6]
+(118) Project [codegen id : 6]
 Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
 Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
 
-(120) HashAggregate [codegen id : 6]
+(119) HashAggregate [codegen id : 6]
 Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28]
 Keys [1]: [c_customer_sk#28]
 Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
 Aggregate Attributes [2]: [sum#70, isEmpty#71]
 Results [3]: [c_customer_sk#28, sum#72, isEmpty#73]
 
-(121) HashAggregate [codegen id : 6]
+(120) HashAggregate [codegen id : 6]
 Input [3]: [c_customer_sk#28, sum#72, isEmpty#73]
 Keys [1]: [c_customer_sk#28]
 Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))]
 Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#74]
 Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#25 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2), true))#74 AS csales#75]
 
-(122) HashAggregate [codegen id : 6]
+(121) HashAggregate [codegen id : 6]
 Input [1]: [csales#75]
 Keys: []
 Functions [1]: [partial_max(csales#75)]
 Aggregate Attributes [1]: [max#76]
 Results [1]: [max#77]
 
-(123) Exchange
+(122) Exchange
 Input [1]: [max#77]
 Arguments: SinglePartition, true, [id=#78]
 
-(124) HashAggregate [codegen id : 7]
+(123) HashAggregate [codegen id : 7]
 Input [1]: [max#77]
 Keys: []
 Functions [1]: [max(csales#75)]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt
index 695e6ccd71821..5bb8bc5b99d0c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt
@@ -1,209 +1,208 @@
-CollectLimit
-  WholeStageCodegen (36)
-    HashAggregate [sum,isEmpty] [sum(sales),sum(sales),sum,isEmpty]
-      InputAdapter
-        Exchange #1
-          WholeStageCodegen (35)
-            HashAggregate [sales] [sum,isEmpty,sum,isEmpty]
-              InputAdapter
-                Union
-                  WholeStageCodegen (17)
-                    Project [cs_quantity,cs_list_price]
-                      BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                        Project [cs_sold_date_sk,cs_quantity,cs_list_price]
-                          InputAdapter
-                            SortMergeJoin [cs_bill_customer_sk,c_customer_sk]
-                              WholeStageCodegen (10)
-                                Sort [cs_bill_customer_sk]
-                                  InputAdapter
-                                    Exchange [cs_bill_customer_sk] #2
-                                      WholeStageCodegen (9)
-                                        Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price]
-                                          InputAdapter
-                                            SortMergeJoin [cs_item_sk,item_sk]
-                                              WholeStageCodegen (2)
-                                                Sort [cs_item_sk]
-                                                  InputAdapter
-                                                    Exchange [cs_item_sk] #3
-                                                      WholeStageCodegen (1)
-                                                        Filter [cs_sold_date_sk]
-                                                          ColumnarToRow
-                                                            InputAdapter
-                                                              Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price]
-                                              WholeStageCodegen (8)
-                                                Sort [item_sk]
-                                                  Project [item_sk]
-                                                    Filter [count(1)]
-                                                      HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count]
-                                                        HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count]
-                                                          Project [d_date,i_item_sk,i_item_desc]
-                                                            SortMergeJoin [ss_item_sk,i_item_sk]
-                                                              InputAdapter
-                                                                WholeStageCodegen (5)
-                                                                  Sort [ss_item_sk]
-                                                                    InputAdapter
-                                                                      Exchange [ss_item_sk] #4
-                                                                        WholeStageCodegen (4)
-                                                                          Project [ss_item_sk,d_date]
-                                                                            BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                              Filter [ss_sold_date_sk,ss_item_sk]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk]
-                                                                              InputAdapter
-                                                                                BroadcastExchange #5
-                                                                                  WholeStageCodegen (3)
-                                                                                    Project [d_date_sk,d_date]
-                                                                                      Filter [d_year,d_date_sk]
-                                                                                        ColumnarToRow
-                                                                                          InputAdapter
-                                                                                            Scan parquet default.date_dim [d_date_sk,d_date,d_year]
-                                                              InputAdapter
-                                                                WholeStageCodegen (7)
-                                                                  Sort [i_item_sk]
-                                                                    InputAdapter
-                                                                      Exchange [i_item_sk] #6
-                                                                        WholeStageCodegen (6)
-                                                                          Filter [i_item_sk]
-                                                                            ColumnarToRow
-                                                                              InputAdapter
-                                                                                Scan parquet default.item [i_item_sk,i_item_desc]
-                              WholeStageCodegen (15)
-                                Sort [c_customer_sk]
-                                  Project [c_customer_sk]
-                                    Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                      Subquery #1
-                                        WholeStageCodegen (7)
-                                          HashAggregate [max] [max(csales),tpcds_cmax,max]
-                                            InputAdapter
-                                              Exchange #9
-                                                WholeStageCodegen (6)
-                                                  HashAggregate [csales] [max,max]
-                                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty]
-                                                      HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
-                                                        Project [ss_quantity,ss_sales_price,c_customer_sk]
-                                                          SortMergeJoin [ss_customer_sk,c_customer_sk]
+WholeStageCodegen (36)
+  HashAggregate [sum,isEmpty] [sum(sales),sum(sales),sum,isEmpty]
+    InputAdapter
+      Exchange #1
+        WholeStageCodegen (35)
+          HashAggregate [sales] [sum,isEmpty,sum,isEmpty]
+            InputAdapter
+              Union
+                WholeStageCodegen (17)
+                  Project [cs_quantity,cs_list_price]
+                    BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                      Project [cs_sold_date_sk,cs_quantity,cs_list_price]
+                        InputAdapter
+                          SortMergeJoin [cs_bill_customer_sk,c_customer_sk]
+                            WholeStageCodegen (10)
+                              Sort [cs_bill_customer_sk]
+                                InputAdapter
+                                  Exchange [cs_bill_customer_sk] #2
+                                    WholeStageCodegen (9)
+                                      Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price]
+                                        InputAdapter
+                                          SortMergeJoin [cs_item_sk,item_sk]
+                                            WholeStageCodegen (2)
+                                              Sort [cs_item_sk]
+                                                InputAdapter
+                                                  Exchange [cs_item_sk] #3
+                                                    WholeStageCodegen (1)
+                                                      Filter [cs_sold_date_sk]
+                                                        ColumnarToRow
+                                                          InputAdapter
+                                                            Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price]
+                                            WholeStageCodegen (8)
+                                              Sort [item_sk]
+                                                Project [item_sk]
+                                                  Filter [count(1)]
+                                                    HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count]
+                                                      HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count]
+                                                        Project [d_date,i_item_sk,i_item_desc]
+                                                          SortMergeJoin [ss_item_sk,i_item_sk]
                                                             InputAdapter
-                                                              WholeStageCodegen (3)
-                                                                Sort [ss_customer_sk]
+                                                              WholeStageCodegen (5)
+                                                                Sort [ss_item_sk]
                                                                   InputAdapter
-                                                                    Exchange [ss_customer_sk] #10
-                                                                      WholeStageCodegen (2)
-                                                                        Project [ss_customer_sk,ss_quantity,ss_sales_price]
+                                                                    Exchange [ss_item_sk] #4
+                                                                      WholeStageCodegen (4)
+                                                                        Project [ss_item_sk,d_date]
                                                                           BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                            Filter [ss_customer_sk,ss_sold_date_sk]
+                                                                            Filter [ss_sold_date_sk,ss_item_sk]
                                                                               ColumnarToRow
                                                                                 InputAdapter
-                                                                                  Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price]
+                                                                                  Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk]
                                                                             InputAdapter
-                                                                              BroadcastExchange #11
-                                                                                WholeStageCodegen (1)
-                                                                                  Project [d_date_sk]
+                                                                              BroadcastExchange #5
+                                                                                WholeStageCodegen (3)
+                                                                                  Project [d_date_sk,d_date]
                                                                                     Filter [d_year,d_date_sk]
                                                                                       ColumnarToRow
                                                                                         InputAdapter
-                                                                                          Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                          Scan parquet default.date_dim [d_date_sk,d_date,d_year]
                                                             InputAdapter
-                                                              WholeStageCodegen (5)
-                                                                Sort [c_customer_sk]
+                                                              WholeStageCodegen (7)
+                                                                Sort [i_item_sk]
                                                                   InputAdapter
-                                                                    Exchange [c_customer_sk] #12
-                                                                      WholeStageCodegen (4)
-                                                                        Filter [c_customer_sk]
+                                                                    Exchange [i_item_sk] #6
+                                                                      WholeStageCodegen (6)
+                                                                        Filter [i_item_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.customer [c_customer_sk]
-                                      HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty]
-                                        HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
-                                          Project [ss_quantity,ss_sales_price,c_customer_sk]
-                                            SortMergeJoin [ss_customer_sk,c_customer_sk]
-                                              InputAdapter
-                                                WholeStageCodegen (12)
-                                                  Sort [ss_customer_sk]
-                                                    InputAdapter
-                                                      Exchange [ss_customer_sk] #7
-                                                        WholeStageCodegen (11)
-                                                          Filter [ss_customer_sk]
-                                                            ColumnarToRow
-                                                              InputAdapter
-                                                                Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price]
-                                              InputAdapter
-                                                WholeStageCodegen (14)
-                                                  Sort [c_customer_sk]
-                                                    InputAdapter
-                                                      Exchange [c_customer_sk] #8
-                                                        WholeStageCodegen (13)
-                                                          Filter [c_customer_sk]
-                                                            ColumnarToRow
-                                                              InputAdapter
-                                                                Scan parquet default.customer [c_customer_sk]
-                        InputAdapter
-                          BroadcastExchange #13
-                            WholeStageCodegen (16)
-                              Project [d_date_sk]
-                                Filter [d_year,d_moy,d_date_sk]
-                                  ColumnarToRow
-                                    InputAdapter
-                                      Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
-                  WholeStageCodegen (34)
-                    Project [ws_quantity,ws_list_price]
-                      BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                        Project [ws_sold_date_sk,ws_quantity,ws_list_price]
-                          InputAdapter
-                            SortMergeJoin [ws_bill_customer_sk,c_customer_sk]
-                              WholeStageCodegen (27)
-                                Sort [ws_bill_customer_sk]
-                                  InputAdapter
-                                    Exchange [ws_bill_customer_sk] #14
-                                      WholeStageCodegen (26)
-                                        Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price]
+                                                                              Scan parquet default.item [i_item_sk,i_item_desc]
+                            WholeStageCodegen (15)
+                              Sort [c_customer_sk]
+                                Project [c_customer_sk]
+                                  Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                    Subquery #1
+                                      WholeStageCodegen (7)
+                                        HashAggregate [max] [max(csales),tpcds_cmax,max]
                                           InputAdapter
-                                            SortMergeJoin [ws_item_sk,item_sk]
-                                              WholeStageCodegen (19)
-                                                Sort [ws_item_sk]
+                                            Exchange #9
+                                              WholeStageCodegen (6)
+                                                HashAggregate [csales] [max,max]
+                                                  HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty]
+                                                    HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
+                                                      Project [ss_quantity,ss_sales_price,c_customer_sk]
+                                                        SortMergeJoin [ss_customer_sk,c_customer_sk]
+                                                          InputAdapter
+                                                            WholeStageCodegen (3)
+                                                              Sort [ss_customer_sk]
+                                                                InputAdapter
+                                                                  Exchange [ss_customer_sk] #10
+                                                                    WholeStageCodegen (2)
+                                                                      Project [ss_customer_sk,ss_quantity,ss_sales_price]
+                                                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                          Filter [ss_customer_sk,ss_sold_date_sk]
+                                                                            ColumnarToRow
+                                                                              InputAdapter
+                                                                                Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price]
+                                                                          InputAdapter
+                                                                            BroadcastExchange #11
+                                                                              WholeStageCodegen (1)
+                                                                                Project [d_date_sk]
+                                                                                  Filter [d_year,d_date_sk]
+                                                                                    ColumnarToRow
+                                                                                      InputAdapter
+                                                                                        Scan parquet default.date_dim [d_date_sk,d_year]
+                                                          InputAdapter
+                                                            WholeStageCodegen (5)
+                                                              Sort [c_customer_sk]
+                                                                InputAdapter
+                                                                  Exchange [c_customer_sk] #12
+                                                                    WholeStageCodegen (4)
+                                                                      Filter [c_customer_sk]
+                                                                        ColumnarToRow
+                                                                          InputAdapter
+                                                                            Scan parquet default.customer [c_customer_sk]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty]
+                                      HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
+                                        Project [ss_quantity,ss_sales_price,c_customer_sk]
+                                          SortMergeJoin [ss_customer_sk,c_customer_sk]
+                                            InputAdapter
+                                              WholeStageCodegen (12)
+                                                Sort [ss_customer_sk]
+                                                  InputAdapter
+                                                    Exchange [ss_customer_sk] #7
+                                                      WholeStageCodegen (11)
+                                                        Filter [ss_customer_sk]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price]
+                                            InputAdapter
+                                              WholeStageCodegen (14)
+                                                Sort [c_customer_sk]
                                                   InputAdapter
-                                                    Exchange [ws_item_sk] #15
-                                                      WholeStageCodegen (18)
-                                                        Filter [ws_sold_date_sk]
+                                                    Exchange [c_customer_sk] #8
+                                                      WholeStageCodegen (13)
+                                                        Filter [c_customer_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price]
-                                              WholeStageCodegen (25)
-                                                Sort [item_sk]
-                                                  Project [item_sk]
-                                                    Filter [count(1)]
-                                                      HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count]
-                                                        HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count]
-                                                          Project [d_date,i_item_sk,i_item_desc]
-                                                            SortMergeJoin [ss_item_sk,i_item_sk]
-                                                              InputAdapter
-                                                                WholeStageCodegen (22)
-                                                                  Sort [ss_item_sk]
-                                                                    InputAdapter
-                                                                      ReusedExchange [ss_item_sk,d_date] #4
-                                                              InputAdapter
-                                                                WholeStageCodegen (24)
-                                                                  Sort [i_item_sk]
-                                                                    InputAdapter
-                                                                      ReusedExchange [i_item_sk,i_item_desc] #6
-                              WholeStageCodegen (32)
-                                Sort [c_customer_sk]
-                                  Project [c_customer_sk]
-                                    Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                      ReusedSubquery [tpcds_cmax] #1
-                                      HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty]
-                                        HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
-                                          Project [ss_quantity,ss_sales_price,c_customer_sk]
-                                            SortMergeJoin [ss_customer_sk,c_customer_sk]
-                                              InputAdapter
-                                                WholeStageCodegen (29)
-                                                  Sort [ss_customer_sk]
-                                                    InputAdapter
-                                                      ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #7
-                                              InputAdapter
-                                                WholeStageCodegen (31)
-                                                  Sort [c_customer_sk]
-                                                    InputAdapter
-                                                      ReusedExchange [c_customer_sk] #8
+                                                              Scan parquet default.customer [c_customer_sk]
+                      InputAdapter
+                        BroadcastExchange #13
+                          WholeStageCodegen (16)
+                            Project [d_date_sk]
+                              Filter [d_year,d_moy,d_date_sk]
+                                ColumnarToRow
+                                  InputAdapter
+                                    Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                WholeStageCodegen (34)
+                  Project [ws_quantity,ws_list_price]
+                    BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                      Project [ws_sold_date_sk,ws_quantity,ws_list_price]
                         InputAdapter
-                          ReusedExchange [d_date_sk] #13
+                          SortMergeJoin [ws_bill_customer_sk,c_customer_sk]
+                            WholeStageCodegen (27)
+                              Sort [ws_bill_customer_sk]
+                                InputAdapter
+                                  Exchange [ws_bill_customer_sk] #14
+                                    WholeStageCodegen (26)
+                                      Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price]
+                                        InputAdapter
+                                          SortMergeJoin [ws_item_sk,item_sk]
+                                            WholeStageCodegen (19)
+                                              Sort [ws_item_sk]
+                                                InputAdapter
+                                                  Exchange [ws_item_sk] #15
+                                                    WholeStageCodegen (18)
+                                                      Filter [ws_sold_date_sk]
+                                                        ColumnarToRow
+                                                          InputAdapter
+                                                            Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price]
+                                            WholeStageCodegen (25)
+                                              Sort [item_sk]
+                                                Project [item_sk]
+                                                  Filter [count(1)]
+                                                    HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count]
+                                                      HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count]
+                                                        Project [d_date,i_item_sk,i_item_desc]
+                                                          SortMergeJoin [ss_item_sk,i_item_sk]
+                                                            InputAdapter
+                                                              WholeStageCodegen (22)
+                                                                Sort [ss_item_sk]
+                                                                  InputAdapter
+                                                                    ReusedExchange [ss_item_sk,d_date] #4
+                                                            InputAdapter
+                                                              WholeStageCodegen (24)
+                                                                Sort [i_item_sk]
+                                                                  InputAdapter
+                                                                    ReusedExchange [i_item_sk,i_item_desc] #6
+                            WholeStageCodegen (32)
+                              Sort [c_customer_sk]
+                                Project [c_customer_sk]
+                                  Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                    ReusedSubquery [tpcds_cmax] #1
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty]
+                                      HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
+                                        Project [ss_quantity,ss_sales_price,c_customer_sk]
+                                          SortMergeJoin [ss_customer_sk,c_customer_sk]
+                                            InputAdapter
+                                              WholeStageCodegen (29)
+                                                Sort [ss_customer_sk]
+                                                  InputAdapter
+                                                    ReusedExchange [ss_customer_sk,ss_quantity,ss_sales_price] #7
+                                            InputAdapter
+                                              WholeStageCodegen (31)
+                                                Sort [c_customer_sk]
+                                                  InputAdapter
+                                                    ReusedExchange [c_customer_sk] #8
+                      InputAdapter
+                        ReusedExchange [d_date_sk] #13
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt
index 6d2b5b0013d8f..15ae5bfe24303 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt
@@ -1,76 +1,75 @@
 == Physical Plan ==
-CollectLimit (72)
-+- * HashAggregate (71)
-   +- Exchange (70)
-      +- * HashAggregate (69)
-         +- Union (68)
-            :- * Project (51)
-            :  +- * BroadcastHashJoin Inner BuildRight (50)
-            :     :- * Project (44)
-            :     :  +- * BroadcastHashJoin LeftSemi BuildRight (43)
-            :     :     :- * Project (27)
-            :     :     :  +- * BroadcastHashJoin LeftSemi BuildRight (26)
-            :     :     :     :- * Filter (3)
-            :     :     :     :  +- * ColumnarToRow (2)
-            :     :     :     :     +- Scan parquet default.catalog_sales (1)
-            :     :     :     +- BroadcastExchange (25)
-            :     :     :        +- * Project (24)
-            :     :     :           +- * Filter (23)
-            :     :     :              +- * HashAggregate (22)
-            :     :     :                 +- Exchange (21)
-            :     :     :                    +- * HashAggregate (20)
-            :     :     :                       +- * Project (19)
-            :     :     :                          +- * BroadcastHashJoin Inner BuildRight (18)
-            :     :     :                             :- * Project (13)
-            :     :     :                             :  +- * BroadcastHashJoin Inner BuildRight (12)
-            :     :     :                             :     :- * Filter (6)
-            :     :     :                             :     :  +- * ColumnarToRow (5)
-            :     :     :                             :     :     +- Scan parquet default.store_sales (4)
-            :     :     :                             :     +- BroadcastExchange (11)
-            :     :     :                             :        +- * Project (10)
-            :     :     :                             :           +- * Filter (9)
-            :     :     :                             :              +- * ColumnarToRow (8)
-            :     :     :                             :                 +- Scan parquet default.date_dim (7)
-            :     :     :                             +- BroadcastExchange (17)
-            :     :     :                                +- * Filter (16)
-            :     :     :                                   +- * ColumnarToRow (15)
-            :     :     :                                      +- Scan parquet default.item (14)
-            :     :     +- BroadcastExchange (42)
-            :     :        +- * Project (41)
-            :     :           +- * Filter (40)
-            :     :              +- * HashAggregate (39)
-            :     :                 +- Exchange (38)
-            :     :                    +- * HashAggregate (37)
-            :     :                       +- * Project (36)
-            :     :                          +- * BroadcastHashJoin Inner BuildRight (35)
-            :     :                             :- * Filter (30)
-            :     :                             :  +- * ColumnarToRow (29)
-            :     :                             :     +- Scan parquet default.store_sales (28)
-            :     :                             +- BroadcastExchange (34)
-            :     :                                +- * Filter (33)
-            :     :                                   +- * ColumnarToRow (32)
-            :     :                                      +- Scan parquet default.customer (31)
-            :     +- BroadcastExchange (49)
-            :        +- * Project (48)
-            :           +- * Filter (47)
-            :              +- * ColumnarToRow (46)
-            :                 +- Scan parquet default.date_dim (45)
-            +- * Project (67)
-               +- * BroadcastHashJoin Inner BuildRight (66)
-                  :- * Project (64)
-                  :  +- * BroadcastHashJoin LeftSemi BuildRight (63)
-                  :     :- * Project (57)
-                  :     :  +- * BroadcastHashJoin LeftSemi BuildRight (56)
-                  :     :     :- * Filter (54)
-                  :     :     :  +- * ColumnarToRow (53)
-                  :     :     :     +- Scan parquet default.web_sales (52)
-                  :     :     +- ReusedExchange (55)
-                  :     +- BroadcastExchange (62)
-                  :        +- * Project (61)
-                  :           +- * Filter (60)
-                  :              +- * HashAggregate (59)
-                  :                 +- ReusedExchange (58)
-                  +- ReusedExchange (65)
+* HashAggregate (71)
++- Exchange (70)
+   +- * HashAggregate (69)
+      +- Union (68)
+         :- * Project (51)
+         :  +- * BroadcastHashJoin Inner BuildRight (50)
+         :     :- * Project (44)
+         :     :  +- * BroadcastHashJoin LeftSemi BuildRight (43)
+         :     :     :- * Project (27)
+         :     :     :  +- * BroadcastHashJoin LeftSemi BuildRight (26)
+         :     :     :     :- * Filter (3)
+         :     :     :     :  +- * ColumnarToRow (2)
+         :     :     :     :     +- Scan parquet default.catalog_sales (1)
+         :     :     :     +- BroadcastExchange (25)
+         :     :     :        +- * Project (24)
+         :     :     :           +- * Filter (23)
+         :     :     :              +- * HashAggregate (22)
+         :     :     :                 +- Exchange (21)
+         :     :     :                    +- * HashAggregate (20)
+         :     :     :                       +- * Project (19)
+         :     :     :                          +- * BroadcastHashJoin Inner BuildRight (18)
+         :     :     :                             :- * Project (13)
+         :     :     :                             :  +- * BroadcastHashJoin Inner BuildRight (12)
+         :     :     :                             :     :- * Filter (6)
+         :     :     :                             :     :  +- * ColumnarToRow (5)
+         :     :     :                             :     :     +- Scan parquet default.store_sales (4)
+         :     :     :                             :     +- BroadcastExchange (11)
+         :     :     :                             :        +- * Project (10)
+         :     :     :                             :           +- * Filter (9)
+         :     :     :                             :              +- * ColumnarToRow (8)
+         :     :     :                             :                 +- Scan parquet default.date_dim (7)
+         :     :     :                             +- BroadcastExchange (17)
+         :     :     :                                +- * Filter (16)
+         :     :     :                                   +- * ColumnarToRow (15)
+         :     :     :                                      +- Scan parquet default.item (14)
+         :     :     +- BroadcastExchange (42)
+         :     :        +- * Project (41)
+         :     :           +- * Filter (40)
+         :     :              +- * HashAggregate (39)
+         :     :                 +- Exchange (38)
+         :     :                    +- * HashAggregate (37)
+         :     :                       +- * Project (36)
+         :     :                          +- * BroadcastHashJoin Inner BuildRight (35)
+         :     :                             :- * Filter (30)
+         :     :                             :  +- * ColumnarToRow (29)
+         :     :                             :     +- Scan parquet default.store_sales (28)
+         :     :                             +- BroadcastExchange (34)
+         :     :                                +- * Filter (33)
+         :     :                                   +- * ColumnarToRow (32)
+         :     :                                      +- Scan parquet default.customer (31)
+         :     +- BroadcastExchange (49)
+         :        +- * Project (48)
+         :           +- * Filter (47)
+         :              +- * ColumnarToRow (46)
+         :                 +- Scan parquet default.date_dim (45)
+         +- * Project (67)
+            +- * BroadcastHashJoin Inner BuildRight (66)
+               :- * Project (64)
+               :  +- * BroadcastHashJoin LeftSemi BuildRight (63)
+               :     :- * Project (57)
+               :     :  +- * BroadcastHashJoin LeftSemi BuildRight (56)
+               :     :     :- * Filter (54)
+               :     :     :  +- * ColumnarToRow (53)
+               :     :     :     +- Scan parquet default.web_sales (52)
+               :     :     +- ReusedExchange (55)
+               :     +- BroadcastExchange (62)
+               :        +- * Project (61)
+               :           +- * Filter (60)
+               :              +- * HashAggregate (59)
+               :                 +- ReusedExchange (58)
+               +- ReusedExchange (65)
 
 
 (1) Scan parquet default.catalog_sales
@@ -398,139 +397,135 @@ Functions [1]: [sum(sales#40)]
 Aggregate Attributes [1]: [sum(sales#40)#57]
 Results [1]: [sum(sales#40)#57 AS sum(sales)#58]
 
-(72) CollectLimit
-Input [1]: [sum(sales)#58]
-Arguments: 100
-
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 40 Hosting Expression = Subquery scalar-subquery#35, [id=#36]
-* HashAggregate (94)
-+- Exchange (93)
-   +- * HashAggregate (92)
-      +- * HashAggregate (91)
-         +- Exchange (90)
-            +- * HashAggregate (89)
-               +- * Project (88)
-                  +- * BroadcastHashJoin Inner BuildRight (87)
-                     :- * Project (81)
-                     :  +- * BroadcastHashJoin Inner BuildRight (80)
-                     :     :- * Filter (75)
-                     :     :  +- * ColumnarToRow (74)
-                     :     :     +- Scan parquet default.store_sales (73)
-                     :     +- BroadcastExchange (79)
-                     :        +- * Filter (78)
-                     :           +- * ColumnarToRow (77)
-                     :              +- Scan parquet default.customer (76)
-                     +- BroadcastExchange (86)
-                        +- * Project (85)
-                           +- * Filter (84)
-                              +- * ColumnarToRow (83)
-                                 +- Scan parquet default.date_dim (82)
-
-
-(73) Scan parquet default.store_sales
+* HashAggregate (93)
++- Exchange (92)
+   +- * HashAggregate (91)
+      +- * HashAggregate (90)
+         +- Exchange (89)
+            +- * HashAggregate (88)
+               +- * Project (87)
+                  +- * BroadcastHashJoin Inner BuildRight (86)
+                     :- * Project (80)
+                     :  +- * BroadcastHashJoin Inner BuildRight (79)
+                     :     :- * Filter (74)
+                     :     :  +- * ColumnarToRow (73)
+                     :     :     +- Scan parquet default.store_sales (72)
+                     :     +- BroadcastExchange (78)
+                     :        +- * Filter (77)
+                     :           +- * ColumnarToRow (76)
+                     :              +- Scan parquet default.customer (75)
+                     +- BroadcastExchange (85)
+                        +- * Project (84)
+                           +- * Filter (83)
+                              +- * ColumnarToRow (82)
+                                 +- Scan parquet default.date_dim (81)
+
+
+(72) Scan parquet default.store_sales
 Output [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
 PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)]
 ReadSchema: struct<ss_sold_date_sk:int,ss_customer_sk:int,ss_quantity:int,ss_sales_price:decimal(7,2)>
 
-(74) ColumnarToRow [codegen id : 3]
+(73) ColumnarToRow [codegen id : 3]
 Input [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25]
 
-(75) Filter [codegen id : 3]
+(74) Filter [codegen id : 3]
 Input [4]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25]
 Condition : (isnotnull(ss_customer_sk#23) AND isnotnull(ss_sold_date_sk#6))
 
-(76) Scan parquet default.customer
+(75) Scan parquet default.customer
 Output [1]: [c_customer_sk#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [IsNotNull(c_customer_sk)]
 ReadSchema: struct<c_customer_sk:int>
 
-(77) ColumnarToRow [codegen id : 1]
+(76) ColumnarToRow [codegen id : 1]
 Input [1]: [c_customer_sk#26]
 
-(78) Filter [codegen id : 1]
+(77) Filter [codegen id : 1]
 Input [1]: [c_customer_sk#26]
 Condition : isnotnull(c_customer_sk#26)
 
-(79) BroadcastExchange
+(78) BroadcastExchange
 Input [1]: [c_customer_sk#26]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#59]
 
-(80) BroadcastHashJoin [codegen id : 3]
+(79) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_customer_sk#23]
 Right keys [1]: [c_customer_sk#26]
 Join condition: None
 
-(81) Project [codegen id : 3]
+(80) Project [codegen id : 3]
 Output [4]: [ss_sold_date_sk#6, ss_quantity#24, ss_sales_price#25, c_customer_sk#26]
 Input [5]: [ss_sold_date_sk#6, ss_customer_sk#23, ss_quantity#24, ss_sales_price#25, c_customer_sk#26]
 
-(82) Scan parquet default.date_dim
+(81) Scan parquet default.date_dim
 Output [2]: [d_date_sk#8, d_year#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(83) ColumnarToRow [codegen id : 2]
+(82) ColumnarToRow [codegen id : 2]
 Input [2]: [d_date_sk#8, d_year#10]
 
-(84) Filter [codegen id : 2]
+(83) Filter [codegen id : 2]
 Input [2]: [d_date_sk#8, d_year#10]
 Condition : (d_year#10 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#8))
 
-(85) Project [codegen id : 2]
+(84) Project [codegen id : 2]
 Output [1]: [d_date_sk#8]
 Input [2]: [d_date_sk#8, d_year#10]
 
-(86) BroadcastExchange
+(85) BroadcastExchange
 Input [1]: [d_date_sk#8]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#60]
 
-(87) BroadcastHashJoin [codegen id : 3]
+(86) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#6]
 Right keys [1]: [d_date_sk#8]
 Join condition: None
 
-(88) Project [codegen id : 3]
+(87) Project [codegen id : 3]
 Output [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26]
 Input [5]: [ss_sold_date_sk#6, ss_quantity#24, ss_sales_price#25, c_customer_sk#26, d_date_sk#8]
 
-(89) HashAggregate [codegen id : 3]
+(88) HashAggregate [codegen id : 3]
 Input [3]: [ss_quantity#24, ss_sales_price#25, c_customer_sk#26]
 Keys [1]: [c_customer_sk#26]
 Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))]
 Aggregate Attributes [2]: [sum#61, isEmpty#62]
 Results [3]: [c_customer_sk#26, sum#63, isEmpty#64]
 
-(90) Exchange
+(89) Exchange
 Input [3]: [c_customer_sk#26, sum#63, isEmpty#64]
 Arguments: hashpartitioning(c_customer_sk#26, 5), true, [id=#65]
 
-(91) HashAggregate [codegen id : 4]
+(90) HashAggregate [codegen id : 4]
 Input [3]: [c_customer_sk#26, sum#63, isEmpty#64]
 Keys [1]: [c_customer_sk#26]
 Functions [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))]
 Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#66]
 Results [1]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#24 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price#25 as decimal(12,2)))), DecimalType(18,2), true))#66 AS csales#67]
 
-(92) HashAggregate [codegen id : 4]
+(91) HashAggregate [codegen id : 4]
 Input [1]: [csales#67]
 Keys: []
 Functions [1]: [partial_max(csales#67)]
 Aggregate Attributes [1]: [max#68]
 Results [1]: [max#69]
 
-(93) Exchange
+(92) Exchange
 Input [1]: [max#69]
 Arguments: SinglePartition, true, [id=#70]
 
-(94) HashAggregate [codegen id : 5]
+(93) HashAggregate [codegen id : 5]
 Input [1]: [max#69]
 Keys: []
 Functions [1]: [max(csales#67)]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt
index d860e18574f2a..aebe2bd3e1a6c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt
@@ -1,143 +1,142 @@
-CollectLimit
-  WholeStageCodegen (20)
-    HashAggregate [sum,isEmpty] [sum(sales),sum(sales),sum,isEmpty]
-      InputAdapter
-        Exchange #1
-          WholeStageCodegen (19)
-            HashAggregate [sales] [sum,isEmpty,sum,isEmpty]
-              InputAdapter
-                Union
-                  WholeStageCodegen (9)
-                    Project [cs_quantity,cs_list_price]
-                      BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                        Project [cs_sold_date_sk,cs_quantity,cs_list_price]
-                          BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk]
-                            Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price]
-                              BroadcastHashJoin [cs_item_sk,item_sk]
-                                Filter [cs_sold_date_sk]
-                                  ColumnarToRow
-                                    InputAdapter
-                                      Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price]
-                                InputAdapter
-                                  BroadcastExchange #2
-                                    WholeStageCodegen (4)
-                                      Project [item_sk]
-                                        Filter [count(1)]
-                                          HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count]
-                                            InputAdapter
-                                              Exchange [substr(i_item_desc, 1, 30),i_item_sk,d_date] #3
-                                                WholeStageCodegen (3)
-                                                  HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count]
-                                                    Project [d_date,i_item_sk,i_item_desc]
-                                                      BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                        Project [ss_item_sk,d_date]
-                                                          BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                            Filter [ss_sold_date_sk,ss_item_sk]
+WholeStageCodegen (20)
+  HashAggregate [sum,isEmpty] [sum(sales),sum(sales),sum,isEmpty]
+    InputAdapter
+      Exchange #1
+        WholeStageCodegen (19)
+          HashAggregate [sales] [sum,isEmpty,sum,isEmpty]
+            InputAdapter
+              Union
+                WholeStageCodegen (9)
+                  Project [cs_quantity,cs_list_price]
+                    BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                      Project [cs_sold_date_sk,cs_quantity,cs_list_price]
+                        BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk]
+                          Project [cs_sold_date_sk,cs_bill_customer_sk,cs_quantity,cs_list_price]
+                            BroadcastHashJoin [cs_item_sk,item_sk]
+                              Filter [cs_sold_date_sk]
+                                ColumnarToRow
+                                  InputAdapter
+                                    Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price]
+                              InputAdapter
+                                BroadcastExchange #2
+                                  WholeStageCodegen (4)
+                                    Project [item_sk]
+                                      Filter [count(1)]
+                                        HashAggregate [substr(i_item_desc, 1, 30),i_item_sk,d_date,count] [count(1),item_sk,count(1),count]
+                                          InputAdapter
+                                            Exchange [substr(i_item_desc, 1, 30),i_item_sk,d_date] #3
+                                              WholeStageCodegen (3)
+                                                HashAggregate [i_item_desc,i_item_sk,d_date] [count,substr(i_item_desc, 1, 30),count]
+                                                  Project [d_date,i_item_sk,i_item_desc]
+                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                      Project [ss_item_sk,d_date]
+                                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                          Filter [ss_sold_date_sk,ss_item_sk]
+                                                            ColumnarToRow
+                                                              InputAdapter
+                                                                Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk]
+                                                          InputAdapter
+                                                            BroadcastExchange #4
+                                                              WholeStageCodegen (1)
+                                                                Project [d_date_sk,d_date]
+                                                                  Filter [d_year,d_date_sk]
+                                                                    ColumnarToRow
+                                                                      InputAdapter
+                                                                        Scan parquet default.date_dim [d_date_sk,d_date,d_year]
+                                                      InputAdapter
+                                                        BroadcastExchange #5
+                                                          WholeStageCodegen (2)
+                                                            Filter [i_item_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk]
-                                                            InputAdapter
-                                                              BroadcastExchange #4
-                                                                WholeStageCodegen (1)
-                                                                  Project [d_date_sk,d_date]
-                                                                    Filter [d_year,d_date_sk]
+                                                                  Scan parquet default.item [i_item_sk,i_item_desc]
+                          InputAdapter
+                            BroadcastExchange #6
+                              WholeStageCodegen (7)
+                                Project [c_customer_sk]
+                                  Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                    Subquery #1
+                                      WholeStageCodegen (5)
+                                        HashAggregate [max] [max(csales),tpcds_cmax,max]
+                                          InputAdapter
+                                            Exchange #9
+                                              WholeStageCodegen (4)
+                                                HashAggregate [csales] [max,max]
+                                                  HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty]
+                                                    InputAdapter
+                                                      Exchange [c_customer_sk] #10
+                                                        WholeStageCodegen (3)
+                                                          HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
+                                                            Project [ss_quantity,ss_sales_price,c_customer_sk]
+                                                              BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                Project [ss_sold_date_sk,ss_quantity,ss_sales_price,c_customer_sk]
+                                                                  BroadcastHashJoin [ss_customer_sk,c_customer_sk]
+                                                                    Filter [ss_customer_sk,ss_sold_date_sk]
                                                                       ColumnarToRow
                                                                         InputAdapter
-                                                                          Scan parquet default.date_dim [d_date_sk,d_date,d_year]
-                                                        InputAdapter
-                                                          BroadcastExchange #5
-                                                            WholeStageCodegen (2)
-                                                              Filter [i_item_sk]
-                                                                ColumnarToRow
-                                                                  InputAdapter
-                                                                    Scan parquet default.item [i_item_sk,i_item_desc]
-                            InputAdapter
-                              BroadcastExchange #6
-                                WholeStageCodegen (7)
-                                  Project [c_customer_sk]
-                                    Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                      Subquery #1
-                                        WholeStageCodegen (5)
-                                          HashAggregate [max] [max(csales),tpcds_cmax,max]
-                                            InputAdapter
-                                              Exchange #9
-                                                WholeStageCodegen (4)
-                                                  HashAggregate [csales] [max,max]
-                                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),csales,sum,isEmpty]
-                                                      InputAdapter
-                                                        Exchange [c_customer_sk] #10
-                                                          WholeStageCodegen (3)
-                                                            HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
-                                                              Project [ss_quantity,ss_sales_price,c_customer_sk]
-                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                  Project [ss_sold_date_sk,ss_quantity,ss_sales_price,c_customer_sk]
-                                                                    BroadcastHashJoin [ss_customer_sk,c_customer_sk]
-                                                                      Filter [ss_customer_sk,ss_sold_date_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price]
-                                                                      InputAdapter
-                                                                        BroadcastExchange #11
-                                                                          WholeStageCodegen (1)
-                                                                            Filter [c_customer_sk]
-                                                                              ColumnarToRow
-                                                                                InputAdapter
-                                                                                  Scan parquet default.customer [c_customer_sk]
-                                                                  InputAdapter
-                                                                    BroadcastExchange #12
-                                                                      WholeStageCodegen (2)
-                                                                        Project [d_date_sk]
-                                                                          Filter [d_year,d_date_sk]
+                                                                          Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_quantity,ss_sales_price]
+                                                                    InputAdapter
+                                                                      BroadcastExchange #11
+                                                                        WholeStageCodegen (1)
+                                                                          Filter [c_customer_sk]
                                                                             ColumnarToRow
                                                                               InputAdapter
-                                                                                Scan parquet default.date_dim [d_date_sk,d_year]
-                                      HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty]
-                                        InputAdapter
-                                          Exchange [c_customer_sk] #7
-                                            WholeStageCodegen (6)
-                                              HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
-                                                Project [ss_quantity,ss_sales_price,c_customer_sk]
-                                                  BroadcastHashJoin [ss_customer_sk,c_customer_sk]
-                                                    Filter [ss_customer_sk]
-                                                      ColumnarToRow
-                                                        InputAdapter
-                                                          Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price]
-                                                    InputAdapter
-                                                      BroadcastExchange #8
-                                                        WholeStageCodegen (5)
-                                                          Filter [c_customer_sk]
-                                                            ColumnarToRow
-                                                              InputAdapter
-                                                                Scan parquet default.customer [c_customer_sk]
-                        InputAdapter
-                          BroadcastExchange #13
-                            WholeStageCodegen (8)
-                              Project [d_date_sk]
-                                Filter [d_year,d_moy,d_date_sk]
-                                  ColumnarToRow
-                                    InputAdapter
-                                      Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
-                  WholeStageCodegen (18)
-                    Project [ws_quantity,ws_list_price]
-                      BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                        Project [ws_sold_date_sk,ws_quantity,ws_list_price]
-                          BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk]
-                            Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price]
-                              BroadcastHashJoin [ws_item_sk,item_sk]
-                                Filter [ws_sold_date_sk]
-                                  ColumnarToRow
-                                    InputAdapter
-                                      Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price]
-                                InputAdapter
-                                  ReusedExchange [item_sk] #2
-                            InputAdapter
-                              BroadcastExchange #14
-                                WholeStageCodegen (16)
-                                  Project [c_customer_sk]
-                                    Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                      ReusedSubquery [tpcds_cmax] #1
-                                      HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty]
-                                        InputAdapter
-                                          ReusedExchange [c_customer_sk,sum,isEmpty] #7
-                        InputAdapter
-                          ReusedExchange [d_date_sk] #13
+                                                                                Scan parquet default.customer [c_customer_sk]
+                                                                InputAdapter
+                                                                  BroadcastExchange #12
+                                                                    WholeStageCodegen (2)
+                                                                      Project [d_date_sk]
+                                                                        Filter [d_year,d_date_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet default.date_dim [d_date_sk,d_year]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty]
+                                      InputAdapter
+                                        Exchange [c_customer_sk] #7
+                                          WholeStageCodegen (6)
+                                            HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
+                                              Project [ss_quantity,ss_sales_price,c_customer_sk]
+                                                BroadcastHashJoin [ss_customer_sk,c_customer_sk]
+                                                  Filter [ss_customer_sk]
+                                                    ColumnarToRow
+                                                      InputAdapter
+                                                        Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price]
+                                                  InputAdapter
+                                                    BroadcastExchange #8
+                                                      WholeStageCodegen (5)
+                                                        Filter [c_customer_sk]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet default.customer [c_customer_sk]
+                      InputAdapter
+                        BroadcastExchange #13
+                          WholeStageCodegen (8)
+                            Project [d_date_sk]
+                              Filter [d_year,d_moy,d_date_sk]
+                                ColumnarToRow
+                                  InputAdapter
+                                    Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                WholeStageCodegen (18)
+                  Project [ws_quantity,ws_list_price]
+                    BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                      Project [ws_sold_date_sk,ws_quantity,ws_list_price]
+                        BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk]
+                          Project [ws_sold_date_sk,ws_bill_customer_sk,ws_quantity,ws_list_price]
+                            BroadcastHashJoin [ws_item_sk,item_sk]
+                              Filter [ws_sold_date_sk]
+                                ColumnarToRow
+                                  InputAdapter
+                                    Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price]
+                              InputAdapter
+                                ReusedExchange [item_sk] #2
+                          InputAdapter
+                            BroadcastExchange #14
+                              WholeStageCodegen (16)
+                                Project [c_customer_sk]
+                                  Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                    ReusedSubquery [tpcds_cmax] #1
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty]
+                                      InputAdapter
+                                        ReusedExchange [c_customer_sk,sum,isEmpty] #7
+                      InputAdapter
+                        ReusedExchange [d_date_sk] #13
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt
index 92b9c26825e51..7465ddae84e8a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt
@@ -1,72 +1,71 @@
 == Physical Plan ==
-CollectLimit (68)
-+- * HashAggregate (67)
-   +- Exchange (66)
-      +- * HashAggregate (65)
-         +- * HashAggregate (64)
-            +- * HashAggregate (63)
-               +- * HashAggregate (62)
-                  +- * HashAggregate (61)
-                     +- * HashAggregate (60)
-                        +- Exchange (59)
-                           +- * HashAggregate (58)
-                              +- SortMergeJoin LeftSemi (57)
-                                 :- SortMergeJoin LeftSemi (39)
-                                 :  :- * Sort (21)
-                                 :  :  +- Exchange (20)
-                                 :  :     +- * Project (19)
-                                 :  :        +- * SortMergeJoin Inner (18)
-                                 :  :           :- * Sort (12)
-                                 :  :           :  +- Exchange (11)
-                                 :  :           :     +- * Project (10)
-                                 :  :           :        +- * BroadcastHashJoin Inner BuildRight (9)
-                                 :  :           :           :- * Filter (3)
-                                 :  :           :           :  +- * ColumnarToRow (2)
-                                 :  :           :           :     +- Scan parquet default.store_sales (1)
-                                 :  :           :           +- BroadcastExchange (8)
-                                 :  :           :              +- * Project (7)
-                                 :  :           :                 +- * Filter (6)
-                                 :  :           :                    +- * ColumnarToRow (5)
-                                 :  :           :                       +- Scan parquet default.date_dim (4)
-                                 :  :           +- * Sort (17)
-                                 :  :              +- Exchange (16)
-                                 :  :                 +- * Filter (15)
-                                 :  :                    +- * ColumnarToRow (14)
-                                 :  :                       +- Scan parquet default.customer (13)
-                                 :  +- * Sort (38)
-                                 :     +- Exchange (37)
-                                 :        +- * HashAggregate (36)
-                                 :           +- Exchange (35)
-                                 :              +- * HashAggregate (34)
-                                 :                 +- * Project (33)
-                                 :                    +- * SortMergeJoin Inner (32)
-                                 :                       :- * Sort (29)
-                                 :                       :  +- Exchange (28)
-                                 :                       :     +- * Project (27)
-                                 :                       :        +- * BroadcastHashJoin Inner BuildRight (26)
-                                 :                       :           :- * Filter (24)
-                                 :                       :           :  +- * ColumnarToRow (23)
-                                 :                       :           :     +- Scan parquet default.catalog_sales (22)
-                                 :                       :           +- ReusedExchange (25)
-                                 :                       +- * Sort (31)
-                                 :                          +- ReusedExchange (30)
-                                 +- * Sort (56)
-                                    +- Exchange (55)
-                                       +- * HashAggregate (54)
-                                          +- Exchange (53)
-                                             +- * HashAggregate (52)
-                                                +- * Project (51)
-                                                   +- * SortMergeJoin Inner (50)
-                                                      :- * Sort (47)
-                                                      :  +- Exchange (46)
-                                                      :     +- * Project (45)
-                                                      :        +- * BroadcastHashJoin Inner BuildRight (44)
-                                                      :           :- * Filter (42)
-                                                      :           :  +- * ColumnarToRow (41)
-                                                      :           :     +- Scan parquet default.web_sales (40)
-                                                      :           +- ReusedExchange (43)
-                                                      +- * Sort (49)
-                                                         +- ReusedExchange (48)
+* HashAggregate (67)
++- Exchange (66)
+   +- * HashAggregate (65)
+      +- * HashAggregate (64)
+         +- * HashAggregate (63)
+            +- * HashAggregate (62)
+               +- * HashAggregate (61)
+                  +- * HashAggregate (60)
+                     +- Exchange (59)
+                        +- * HashAggregate (58)
+                           +- SortMergeJoin LeftSemi (57)
+                              :- SortMergeJoin LeftSemi (39)
+                              :  :- * Sort (21)
+                              :  :  +- Exchange (20)
+                              :  :     +- * Project (19)
+                              :  :        +- * SortMergeJoin Inner (18)
+                              :  :           :- * Sort (12)
+                              :  :           :  +- Exchange (11)
+                              :  :           :     +- * Project (10)
+                              :  :           :        +- * BroadcastHashJoin Inner BuildRight (9)
+                              :  :           :           :- * Filter (3)
+                              :  :           :           :  +- * ColumnarToRow (2)
+                              :  :           :           :     +- Scan parquet default.store_sales (1)
+                              :  :           :           +- BroadcastExchange (8)
+                              :  :           :              +- * Project (7)
+                              :  :           :                 +- * Filter (6)
+                              :  :           :                    +- * ColumnarToRow (5)
+                              :  :           :                       +- Scan parquet default.date_dim (4)
+                              :  :           +- * Sort (17)
+                              :  :              +- Exchange (16)
+                              :  :                 +- * Filter (15)
+                              :  :                    +- * ColumnarToRow (14)
+                              :  :                       +- Scan parquet default.customer (13)
+                              :  +- * Sort (38)
+                              :     +- Exchange (37)
+                              :        +- * HashAggregate (36)
+                              :           +- Exchange (35)
+                              :              +- * HashAggregate (34)
+                              :                 +- * Project (33)
+                              :                    +- * SortMergeJoin Inner (32)
+                              :                       :- * Sort (29)
+                              :                       :  +- Exchange (28)
+                              :                       :     +- * Project (27)
+                              :                       :        +- * BroadcastHashJoin Inner BuildRight (26)
+                              :                       :           :- * Filter (24)
+                              :                       :           :  +- * ColumnarToRow (23)
+                              :                       :           :     +- Scan parquet default.catalog_sales (22)
+                              :                       :           +- ReusedExchange (25)
+                              :                       +- * Sort (31)
+                              :                          +- ReusedExchange (30)
+                              +- * Sort (56)
+                                 +- Exchange (55)
+                                    +- * HashAggregate (54)
+                                       +- Exchange (53)
+                                          +- * HashAggregate (52)
+                                             +- * Project (51)
+                                                +- * SortMergeJoin Inner (50)
+                                                   :- * Sort (47)
+                                                   :  +- Exchange (46)
+                                                   :     +- * Project (45)
+                                                   :        +- * BroadcastHashJoin Inner BuildRight (44)
+                                                   :           :- * Filter (42)
+                                                   :           :  +- * ColumnarToRow (41)
+                                                   :           :     +- Scan parquet default.web_sales (40)
+                                                   :           +- ReusedExchange (43)
+                                                   +- * Sort (49)
+                                                      +- ReusedExchange (48)
 
 
 (1) Scan parquet default.store_sales
@@ -387,7 +386,3 @@ Functions [1]: [count(1)]
 Aggregate Attributes [1]: [count(1)#37]
 Results [1]: [count(1)#37 AS count(1)#38]
 
-(68) CollectLimit
-Input [1]: [count(1)#38]
-Arguments: 100
-
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt
index 5bcd7dbb93022..8dd59340cf069 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt
@@ -1,118 +1,117 @@
-CollectLimit
-  WholeStageCodegen (26)
-    HashAggregate [count] [count(1),count(1),count]
-      InputAdapter
-        Exchange #1
-          WholeStageCodegen (25)
-            HashAggregate [count,count]
+WholeStageCodegen (26)
+  HashAggregate [count] [count(1),count(1),count]
+    InputAdapter
+      Exchange #1
+        WholeStageCodegen (25)
+          HashAggregate [count,count]
+            HashAggregate [c_last_name,c_first_name,d_date]
               HashAggregate [c_last_name,c_first_name,d_date]
                 HashAggregate [c_last_name,c_first_name,d_date]
                   HashAggregate [c_last_name,c_first_name,d_date]
                     HashAggregate [c_last_name,c_first_name,d_date]
-                      HashAggregate [c_last_name,c_first_name,d_date]
-                        InputAdapter
-                          Exchange [c_last_name,c_first_name,d_date] #2
-                            WholeStageCodegen (24)
-                              HashAggregate [c_last_name,c_first_name,d_date]
-                                InputAdapter
+                      InputAdapter
+                        Exchange [c_last_name,c_first_name,d_date] #2
+                          WholeStageCodegen (24)
+                            HashAggregate [c_last_name,c_first_name,d_date]
+                              InputAdapter
+                                SortMergeJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date]
                                   SortMergeJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date]
-                                    SortMergeJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date]
-                                      WholeStageCodegen (7)
-                                        Sort [c_last_name,c_first_name,d_date]
-                                          InputAdapter
-                                            Exchange [c_last_name,c_first_name,d_date] #3
-                                              WholeStageCodegen (6)
-                                                Project [d_date,c_first_name,c_last_name]
-                                                  SortMergeJoin [ss_customer_sk,c_customer_sk]
-                                                    InputAdapter
-                                                      WholeStageCodegen (3)
-                                                        Sort [ss_customer_sk]
-                                                          InputAdapter
-                                                            Exchange [ss_customer_sk] #4
-                                                              WholeStageCodegen (2)
-                                                                Project [ss_customer_sk,d_date]
-                                                                  BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                    Filter [ss_sold_date_sk,ss_customer_sk]
-                                                                      ColumnarToRow
-                                                                        InputAdapter
-                                                                          Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk]
-                                                                    InputAdapter
-                                                                      BroadcastExchange #5
-                                                                        WholeStageCodegen (1)
-                                                                          Project [d_date_sk,d_date]
-                                                                            Filter [d_month_seq,d_date_sk]
-                                                                              ColumnarToRow
-                                                                                InputAdapter
-                                                                                  Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq]
-                                                    InputAdapter
-                                                      WholeStageCodegen (5)
-                                                        Sort [c_customer_sk]
-                                                          InputAdapter
-                                                            Exchange [c_customer_sk] #6
-                                                              WholeStageCodegen (4)
-                                                                Filter [c_customer_sk]
-                                                                  ColumnarToRow
-                                                                    InputAdapter
-                                                                      Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
-                                      WholeStageCodegen (15)
-                                        Sort [c_last_name,c_first_name,d_date]
-                                          InputAdapter
-                                            Exchange [c_last_name,c_first_name,d_date] #7
-                                              WholeStageCodegen (14)
-                                                HashAggregate [c_last_name,c_first_name,d_date]
+                                    WholeStageCodegen (7)
+                                      Sort [c_last_name,c_first_name,d_date]
+                                        InputAdapter
+                                          Exchange [c_last_name,c_first_name,d_date] #3
+                                            WholeStageCodegen (6)
+                                              Project [d_date,c_first_name,c_last_name]
+                                                SortMergeJoin [ss_customer_sk,c_customer_sk]
                                                   InputAdapter
-                                                    Exchange [c_last_name,c_first_name,d_date] #8
-                                                      WholeStageCodegen (13)
-                                                        HashAggregate [c_last_name,c_first_name,d_date]
-                                                          Project [c_last_name,c_first_name,d_date]
-                                                            SortMergeJoin [cs_bill_customer_sk,c_customer_sk]
-                                                              InputAdapter
-                                                                WholeStageCodegen (10)
-                                                                  Sort [cs_bill_customer_sk]
-                                                                    InputAdapter
-                                                                      Exchange [cs_bill_customer_sk] #9
-                                                                        WholeStageCodegen (9)
-                                                                          Project [cs_bill_customer_sk,d_date]
-                                                                            BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                              Filter [cs_sold_date_sk,cs_bill_customer_sk]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk]
+                                                    WholeStageCodegen (3)
+                                                      Sort [ss_customer_sk]
+                                                        InputAdapter
+                                                          Exchange [ss_customer_sk] #4
+                                                            WholeStageCodegen (2)
+                                                              Project [ss_customer_sk,d_date]
+                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                  Filter [ss_sold_date_sk,ss_customer_sk]
+                                                                    ColumnarToRow
+                                                                      InputAdapter
+                                                                        Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk]
+                                                                  InputAdapter
+                                                                    BroadcastExchange #5
+                                                                      WholeStageCodegen (1)
+                                                                        Project [d_date_sk,d_date]
+                                                                          Filter [d_month_seq,d_date_sk]
+                                                                            ColumnarToRow
                                                                               InputAdapter
-                                                                                ReusedExchange [d_date_sk,d_date] #5
-                                                              InputAdapter
-                                                                WholeStageCodegen (12)
-                                                                  Sort [c_customer_sk]
-                                                                    InputAdapter
-                                                                      ReusedExchange [c_customer_sk,c_first_name,c_last_name] #6
-                                    WholeStageCodegen (23)
+                                                                                Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq]
+                                                  InputAdapter
+                                                    WholeStageCodegen (5)
+                                                      Sort [c_customer_sk]
+                                                        InputAdapter
+                                                          Exchange [c_customer_sk] #6
+                                                            WholeStageCodegen (4)
+                                                              Filter [c_customer_sk]
+                                                                ColumnarToRow
+                                                                  InputAdapter
+                                                                    Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
+                                    WholeStageCodegen (15)
                                       Sort [c_last_name,c_first_name,d_date]
                                         InputAdapter
-                                          Exchange [c_last_name,c_first_name,d_date] #10
-                                            WholeStageCodegen (22)
+                                          Exchange [c_last_name,c_first_name,d_date] #7
+                                            WholeStageCodegen (14)
                                               HashAggregate [c_last_name,c_first_name,d_date]
                                                 InputAdapter
-                                                  Exchange [c_last_name,c_first_name,d_date] #11
-                                                    WholeStageCodegen (21)
+                                                  Exchange [c_last_name,c_first_name,d_date] #8
+                                                    WholeStageCodegen (13)
                                                       HashAggregate [c_last_name,c_first_name,d_date]
                                                         Project [c_last_name,c_first_name,d_date]
-                                                          SortMergeJoin [ws_bill_customer_sk,c_customer_sk]
+                                                          SortMergeJoin [cs_bill_customer_sk,c_customer_sk]
                                                             InputAdapter
-                                                              WholeStageCodegen (18)
-                                                                Sort [ws_bill_customer_sk]
+                                                              WholeStageCodegen (10)
+                                                                Sort [cs_bill_customer_sk]
                                                                   InputAdapter
-                                                                    Exchange [ws_bill_customer_sk] #12
-                                                                      WholeStageCodegen (17)
-                                                                        Project [ws_bill_customer_sk,d_date]
-                                                                          BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                            Filter [ws_sold_date_sk,ws_bill_customer_sk]
+                                                                    Exchange [cs_bill_customer_sk] #9
+                                                                      WholeStageCodegen (9)
+                                                                        Project [cs_bill_customer_sk,d_date]
+                                                                          BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                            Filter [cs_sold_date_sk,cs_bill_customer_sk]
                                                                               ColumnarToRow
                                                                                 InputAdapter
-                                                                                  Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk]
+                                                                                  Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk]
                                                                             InputAdapter
                                                                               ReusedExchange [d_date_sk,d_date] #5
                                                             InputAdapter
-                                                              WholeStageCodegen (20)
+                                                              WholeStageCodegen (12)
                                                                 Sort [c_customer_sk]
                                                                   InputAdapter
                                                                     ReusedExchange [c_customer_sk,c_first_name,c_last_name] #6
+                                  WholeStageCodegen (23)
+                                    Sort [c_last_name,c_first_name,d_date]
+                                      InputAdapter
+                                        Exchange [c_last_name,c_first_name,d_date] #10
+                                          WholeStageCodegen (22)
+                                            HashAggregate [c_last_name,c_first_name,d_date]
+                                              InputAdapter
+                                                Exchange [c_last_name,c_first_name,d_date] #11
+                                                  WholeStageCodegen (21)
+                                                    HashAggregate [c_last_name,c_first_name,d_date]
+                                                      Project [c_last_name,c_first_name,d_date]
+                                                        SortMergeJoin [ws_bill_customer_sk,c_customer_sk]
+                                                          InputAdapter
+                                                            WholeStageCodegen (18)
+                                                              Sort [ws_bill_customer_sk]
+                                                                InputAdapter
+                                                                  Exchange [ws_bill_customer_sk] #12
+                                                                    WholeStageCodegen (17)
+                                                                      Project [ws_bill_customer_sk,d_date]
+                                                                        BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                                          Filter [ws_sold_date_sk,ws_bill_customer_sk]
+                                                                            ColumnarToRow
+                                                                              InputAdapter
+                                                                                Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk]
+                                                                          InputAdapter
+                                                                            ReusedExchange [d_date_sk,d_date] #5
+                                                          InputAdapter
+                                                            WholeStageCodegen (20)
+                                                              Sort [c_customer_sk]
+                                                                InputAdapter
+                                                                  ReusedExchange [c_customer_sk,c_first_name,c_last_name] #6
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt
index 09ab60c7cf651..74454cf32afd0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt
@@ -1,59 +1,58 @@
 == Physical Plan ==
-CollectLimit (55)
-+- * HashAggregate (54)
-   +- Exchange (53)
-      +- * HashAggregate (52)
-         +- * HashAggregate (51)
-            +- * HashAggregate (50)
-               +- * HashAggregate (49)
-                  +- * HashAggregate (48)
-                     +- * HashAggregate (47)
-                        +- Exchange (46)
-                           +- * HashAggregate (45)
-                              +- * BroadcastHashJoin LeftSemi BuildRight (44)
-                                 :- * BroadcastHashJoin LeftSemi BuildRight (30)
-                                 :  :- * Project (16)
-                                 :  :  +- * BroadcastHashJoin Inner BuildRight (15)
-                                 :  :     :- * Project (10)
-                                 :  :     :  +- * BroadcastHashJoin Inner BuildRight (9)
-                                 :  :     :     :- * Filter (3)
-                                 :  :     :     :  +- * ColumnarToRow (2)
-                                 :  :     :     :     +- Scan parquet default.store_sales (1)
-                                 :  :     :     +- BroadcastExchange (8)
-                                 :  :     :        +- * Project (7)
-                                 :  :     :           +- * Filter (6)
-                                 :  :     :              +- * ColumnarToRow (5)
-                                 :  :     :                 +- Scan parquet default.date_dim (4)
-                                 :  :     +- BroadcastExchange (14)
-                                 :  :        +- * Filter (13)
-                                 :  :           +- * ColumnarToRow (12)
-                                 :  :              +- Scan parquet default.customer (11)
-                                 :  +- BroadcastExchange (29)
-                                 :     +- * HashAggregate (28)
-                                 :        +- Exchange (27)
-                                 :           +- * HashAggregate (26)
-                                 :              +- * Project (25)
-                                 :                 +- * BroadcastHashJoin Inner BuildRight (24)
-                                 :                    :- * Project (22)
-                                 :                    :  +- * BroadcastHashJoin Inner BuildRight (21)
-                                 :                    :     :- * Filter (19)
-                                 :                    :     :  +- * ColumnarToRow (18)
-                                 :                    :     :     +- Scan parquet default.catalog_sales (17)
-                                 :                    :     +- ReusedExchange (20)
-                                 :                    +- ReusedExchange (23)
-                                 +- BroadcastExchange (43)
-                                    +- * HashAggregate (42)
-                                       +- Exchange (41)
-                                          +- * HashAggregate (40)
-                                             +- * Project (39)
-                                                +- * BroadcastHashJoin Inner BuildRight (38)
-                                                   :- * Project (36)
-                                                   :  +- * BroadcastHashJoin Inner BuildRight (35)
-                                                   :     :- * Filter (33)
-                                                   :     :  +- * ColumnarToRow (32)
-                                                   :     :     +- Scan parquet default.web_sales (31)
-                                                   :     +- ReusedExchange (34)
-                                                   +- ReusedExchange (37)
+* HashAggregate (54)
++- Exchange (53)
+   +- * HashAggregate (52)
+      +- * HashAggregate (51)
+         +- * HashAggregate (50)
+            +- * HashAggregate (49)
+               +- * HashAggregate (48)
+                  +- * HashAggregate (47)
+                     +- Exchange (46)
+                        +- * HashAggregate (45)
+                           +- * BroadcastHashJoin LeftSemi BuildRight (44)
+                              :- * BroadcastHashJoin LeftSemi BuildRight (30)
+                              :  :- * Project (16)
+                              :  :  +- * BroadcastHashJoin Inner BuildRight (15)
+                              :  :     :- * Project (10)
+                              :  :     :  +- * BroadcastHashJoin Inner BuildRight (9)
+                              :  :     :     :- * Filter (3)
+                              :  :     :     :  +- * ColumnarToRow (2)
+                              :  :     :     :     +- Scan parquet default.store_sales (1)
+                              :  :     :     +- BroadcastExchange (8)
+                              :  :     :        +- * Project (7)
+                              :  :     :           +- * Filter (6)
+                              :  :     :              +- * ColumnarToRow (5)
+                              :  :     :                 +- Scan parquet default.date_dim (4)
+                              :  :     +- BroadcastExchange (14)
+                              :  :        +- * Filter (13)
+                              :  :           +- * ColumnarToRow (12)
+                              :  :              +- Scan parquet default.customer (11)
+                              :  +- BroadcastExchange (29)
+                              :     +- * HashAggregate (28)
+                              :        +- Exchange (27)
+                              :           +- * HashAggregate (26)
+                              :              +- * Project (25)
+                              :                 +- * BroadcastHashJoin Inner BuildRight (24)
+                              :                    :- * Project (22)
+                              :                    :  +- * BroadcastHashJoin Inner BuildRight (21)
+                              :                    :     :- * Filter (19)
+                              :                    :     :  +- * ColumnarToRow (18)
+                              :                    :     :     +- Scan parquet default.catalog_sales (17)
+                              :                    :     +- ReusedExchange (20)
+                              :                    +- ReusedExchange (23)
+                              +- BroadcastExchange (43)
+                                 +- * HashAggregate (42)
+                                    +- Exchange (41)
+                                       +- * HashAggregate (40)
+                                          +- * Project (39)
+                                             +- * BroadcastHashJoin Inner BuildRight (38)
+                                                :- * Project (36)
+                                                :  +- * BroadcastHashJoin Inner BuildRight (35)
+                                                :     :- * Filter (33)
+                                                :     :  +- * ColumnarToRow (32)
+                                                :     :     +- Scan parquet default.web_sales (31)
+                                                :     +- ReusedExchange (34)
+                                                +- ReusedExchange (37)
 
 
 (1) Scan parquet default.store_sales
@@ -322,7 +321,3 @@ Functions [1]: [count(1)]
 Aggregate Attributes [1]: [count(1)#33]
 Results [1]: [count(1)#33 AS count(1)#34]
 
-(55) CollectLimit
-Input [1]: [count(1)#34]
-Arguments: 100
-
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt
index 10a2166ce761d..a5b57a4ac9450 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt
@@ -1,81 +1,80 @@
-CollectLimit
-  WholeStageCodegen (13)
-    HashAggregate [count] [count(1),count(1),count]
-      InputAdapter
-        Exchange #1
-          WholeStageCodegen (12)
-            HashAggregate [count,count]
+WholeStageCodegen (13)
+  HashAggregate [count] [count(1),count(1),count]
+    InputAdapter
+      Exchange #1
+        WholeStageCodegen (12)
+          HashAggregate [count,count]
+            HashAggregate [c_last_name,c_first_name,d_date]
               HashAggregate [c_last_name,c_first_name,d_date]
                 HashAggregate [c_last_name,c_first_name,d_date]
                   HashAggregate [c_last_name,c_first_name,d_date]
                     HashAggregate [c_last_name,c_first_name,d_date]
-                      HashAggregate [c_last_name,c_first_name,d_date]
-                        InputAdapter
-                          Exchange [c_last_name,c_first_name,d_date] #2
-                            WholeStageCodegen (11)
-                              HashAggregate [c_last_name,c_first_name,d_date]
+                      InputAdapter
+                        Exchange [c_last_name,c_first_name,d_date] #2
+                          WholeStageCodegen (11)
+                            HashAggregate [c_last_name,c_first_name,d_date]
+                              BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date]
                                 BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date]
-                                  BroadcastHashJoin [c_last_name,c_first_name,d_date,c_last_name,c_first_name,d_date]
-                                    Project [d_date,c_first_name,c_last_name]
-                                      BroadcastHashJoin [ss_customer_sk,c_customer_sk]
-                                        Project [ss_customer_sk,d_date]
-                                          BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                            Filter [ss_sold_date_sk,ss_customer_sk]
+                                  Project [d_date,c_first_name,c_last_name]
+                                    BroadcastHashJoin [ss_customer_sk,c_customer_sk]
+                                      Project [ss_customer_sk,d_date]
+                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                          Filter [ss_sold_date_sk,ss_customer_sk]
+                                            ColumnarToRow
+                                              InputAdapter
+                                                Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk]
+                                          InputAdapter
+                                            BroadcastExchange #3
+                                              WholeStageCodegen (1)
+                                                Project [d_date_sk,d_date]
+                                                  Filter [d_month_seq,d_date_sk]
+                                                    ColumnarToRow
+                                                      InputAdapter
+                                                        Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq]
+                                      InputAdapter
+                                        BroadcastExchange #4
+                                          WholeStageCodegen (2)
+                                            Filter [c_customer_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk]
-                                            InputAdapter
-                                              BroadcastExchange #3
-                                                WholeStageCodegen (1)
-                                                  Project [d_date_sk,d_date]
-                                                    Filter [d_month_seq,d_date_sk]
-                                                      ColumnarToRow
-                                                        InputAdapter
-                                                          Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq]
-                                        InputAdapter
-                                          BroadcastExchange #4
-                                            WholeStageCodegen (2)
-                                              Filter [c_customer_sk]
-                                                ColumnarToRow
-                                                  InputAdapter
-                                                    Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
-                                    InputAdapter
-                                      BroadcastExchange #5
-                                        WholeStageCodegen (6)
-                                          HashAggregate [c_last_name,c_first_name,d_date]
-                                            InputAdapter
-                                              Exchange [c_last_name,c_first_name,d_date] #6
-                                                WholeStageCodegen (5)
-                                                  HashAggregate [c_last_name,c_first_name,d_date]
-                                                    Project [c_last_name,c_first_name,d_date]
-                                                      BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk]
-                                                        Project [cs_bill_customer_sk,d_date]
-                                                          BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                            Filter [cs_sold_date_sk,cs_bill_customer_sk]
-                                                              ColumnarToRow
-                                                                InputAdapter
-                                                                  Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk]
-                                                            InputAdapter
-                                                              ReusedExchange [d_date_sk,d_date] #3
-                                                        InputAdapter
-                                                          ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4
+                                                  Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
                                   InputAdapter
-                                    BroadcastExchange #7
-                                      WholeStageCodegen (10)
+                                    BroadcastExchange #5
+                                      WholeStageCodegen (6)
                                         HashAggregate [c_last_name,c_first_name,d_date]
                                           InputAdapter
-                                            Exchange [c_last_name,c_first_name,d_date] #8
-                                              WholeStageCodegen (9)
+                                            Exchange [c_last_name,c_first_name,d_date] #6
+                                              WholeStageCodegen (5)
                                                 HashAggregate [c_last_name,c_first_name,d_date]
                                                   Project [c_last_name,c_first_name,d_date]
-                                                    BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk]
-                                                      Project [ws_bill_customer_sk,d_date]
-                                                        BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                          Filter [ws_sold_date_sk,ws_bill_customer_sk]
+                                                    BroadcastHashJoin [cs_bill_customer_sk,c_customer_sk]
+                                                      Project [cs_bill_customer_sk,d_date]
+                                                        BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                          Filter [cs_sold_date_sk,cs_bill_customer_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk]
+                                                                Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk]
                                                           InputAdapter
                                                             ReusedExchange [d_date_sk,d_date] #3
                                                       InputAdapter
                                                         ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4
+                                InputAdapter
+                                  BroadcastExchange #7
+                                    WholeStageCodegen (10)
+                                      HashAggregate [c_last_name,c_first_name,d_date]
+                                        InputAdapter
+                                          Exchange [c_last_name,c_first_name,d_date] #8
+                                            WholeStageCodegen (9)
+                                              HashAggregate [c_last_name,c_first_name,d_date]
+                                                Project [c_last_name,c_first_name,d_date]
+                                                  BroadcastHashJoin [ws_bill_customer_sk,c_customer_sk]
+                                                    Project [ws_bill_customer_sk,d_date]
+                                                      BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                        Filter [ws_sold_date_sk,ws_bill_customer_sk]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet default.web_sales [ws_sold_date_sk,ws_bill_customer_sk]
+                                                        InputAdapter
+                                                          ReusedExchange [d_date_sk,d_date] #3
+                                                    InputAdapter
+                                                      ReusedExchange [c_customer_sk,c_first_name,c_last_name] #4
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt
index dc4665185b014..99459bfe9a049 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt
@@ -1,5 +1,5 @@
 == Physical Plan ==
-TakeOrderedAndProject (34)
+* Sort (34)
 +- * HashAggregate (33)
    +- Exchange (32)
       +- * HashAggregate (31)
@@ -190,7 +190,7 @@ Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#6))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_discount_amt#6))#22]
 Results [1]: [MakeDecimal(sum(UnscaledValue(ws_ext_discount_amt#6))#22,17,2) AS Excess Discount Amount #23]
 
-(34) TakeOrderedAndProject
+(34) Sort [codegen id : 7]
 Input [1]: [Excess Discount Amount #23]
-Arguments: 100, [Excess Discount Amount #23 ASC NULLS FIRST], [Excess Discount Amount #23]
+Arguments: [Excess Discount Amount #23 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt
index 7fd1cd3637a09..0721155286d17 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt
@@ -1,5 +1,5 @@
-TakeOrderedAndProject [Excess Discount Amount ]
-  WholeStageCodegen (7)
+WholeStageCodegen (7)
+  Sort [Excess Discount Amount ]
     HashAggregate [sum] [sum(UnscaledValue(ws_ext_discount_amt)),Excess Discount Amount ,sum]
       InputAdapter
         Exchange #1
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt
index b17a48db8baac..8a441392f4165 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt
@@ -1,5 +1,5 @@
 == Physical Plan ==
-TakeOrderedAndProject (34)
+* Sort (34)
 +- * HashAggregate (33)
    +- Exchange (32)
       +- * HashAggregate (31)
@@ -190,7 +190,7 @@ Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#3))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_discount_amt#3))#22]
 Results [1]: [MakeDecimal(sum(UnscaledValue(ws_ext_discount_amt#3))#22,17,2) AS Excess Discount Amount #23]
 
-(34) TakeOrderedAndProject
+(34) Sort [codegen id : 7]
 Input [1]: [Excess Discount Amount #23]
-Arguments: 100, [Excess Discount Amount #23 ASC NULLS FIRST], [Excess Discount Amount #23]
+Arguments: [Excess Discount Amount #23 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt
index 652b2e36cf781..1f24a7c964f20 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt
@@ -1,5 +1,5 @@
-TakeOrderedAndProject [Excess Discount Amount ]
-  WholeStageCodegen (7)
+WholeStageCodegen (7)
+  Sort [Excess Discount Amount ]
     HashAggregate [sum] [sum(UnscaledValue(ws_ext_discount_amt)),Excess Discount Amount ,sum]
       InputAdapter
         Exchange #1
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt
index 7720d9dee4170..43390c5048a6d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt
@@ -1,5 +1,5 @@
 == Physical Plan ==
-TakeOrderedAndProject (47)
+* Sort (47)
 +- * HashAggregate (46)
    +- Exchange (45)
       +- * HashAggregate (44)
@@ -259,7 +259,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net
 Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#24, sum(UnscaledValue(ws_net_profit#7))#25, count(ws_order_number#5)#29]
 Results [3]: [count(ws_order_number#5)#29 AS order count #32, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#24,17,2) AS total shipping cost #33, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#25,17,2) AS total net profit #34]
 
-(47) TakeOrderedAndProject
+(47) Sort [codegen id : 14]
 Input [3]: [order count #32, total shipping cost #33, total net profit #34]
-Arguments: 100, [order count #32 ASC NULLS FIRST], [order count #32, total shipping cost #33, total net profit #34]
+Arguments: [order count #32 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt
index 128a8179ac10b..7b3d461b9e80f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt
@@ -1,5 +1,5 @@
-TakeOrderedAndProject [order count ,total shipping cost ,total net profit ]
-  WholeStageCodegen (14)
+WholeStageCodegen (14)
+  Sort [order count ]
     HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count]
       InputAdapter
         Exchange #1
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt
index a94e74f66b201..2abbe4f9b8390 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt
@@ -1,5 +1,5 @@
 == Physical Plan ==
-TakeOrderedAndProject (41)
+* Sort (41)
 +- * HashAggregate (40)
    +- Exchange (39)
       +- * HashAggregate (38)
@@ -229,7 +229,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net
 Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#22, sum(UnscaledValue(ws_net_profit#7))#23, count(ws_order_number#5)#27]
 Results [3]: [count(ws_order_number#5)#27 AS order count #30, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#22,17,2) AS total shipping cost #31, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#23,17,2) AS total net profit #32]
 
-(41) TakeOrderedAndProject
+(41) Sort [codegen id : 8]
 Input [3]: [order count #30, total shipping cost #31, total net profit #32]
-Arguments: 100, [order count #30 ASC NULLS FIRST], [order count #30, total shipping cost #31, total net profit #32]
+Arguments: [order count #30 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt
index 9d30b998fe174..5e7d7db5c0a9e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt
@@ -1,5 +1,5 @@
-TakeOrderedAndProject [order count ,total shipping cost ,total net profit ]
-  WholeStageCodegen (8)
+WholeStageCodegen (8)
+  Sort [order count ]
     HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count]
       InputAdapter
         Exchange #1
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt
index 7fec07e259559..547792f3d7ae4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt
@@ -1,5 +1,5 @@
 == Physical Plan ==
-TakeOrderedAndProject (61)
+* Sort (61)
 +- * HashAggregate (60)
    +- Exchange (59)
       +- * HashAggregate (58)
@@ -331,7 +331,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net
 Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#28, sum(UnscaledValue(ws_net_profit#6))#29, count(ws_order_number#4)#33]
 Results [3]: [count(ws_order_number#4)#33 AS order count #36, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#28,17,2) AS total shipping cost #37, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#29,17,2) AS total net profit #38]
 
-(61) TakeOrderedAndProject
+(61) Sort [codegen id : 23]
 Input [3]: [order count #36, total shipping cost #37, total net profit #38]
-Arguments: 100, [order count #36 ASC NULLS FIRST], [order count #36, total shipping cost #37, total net profit #38]
+Arguments: [order count #36 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt
index da48d34c72a04..7213a9f58d3f8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt
@@ -1,5 +1,5 @@
-TakeOrderedAndProject [order count ,total shipping cost ,total net profit ]
-  WholeStageCodegen (23)
+WholeStageCodegen (23)
+  Sort [order count ]
     HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count]
       InputAdapter
         Exchange #1
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt
index 3a24e83aff256..1cc99e296383f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt
@@ -1,5 +1,5 @@
 == Physical Plan ==
-TakeOrderedAndProject (56)
+* Sort (56)
 +- * HashAggregate (55)
    +- Exchange (54)
       +- * HashAggregate (53)
@@ -312,7 +312,7 @@ Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net
 Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28, count(ws_order_number#4)#32]
 Results [3]: [count(ws_order_number#4)#32 AS order count #35, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#27,17,2) AS total shipping cost #36, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#28,17,2) AS total net profit #37]
 
-(56) TakeOrderedAndProject
+(56) Sort [codegen id : 11]
 Input [3]: [order count #35, total shipping cost #36, total net profit #37]
-Arguments: 100, [order count #35 ASC NULLS FIRST], [order count #35, total shipping cost #36, total net profit #37]
+Arguments: [order count #35 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt
index 6d35311c810f5..191ff22c1961f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt
@@ -1,5 +1,5 @@
-TakeOrderedAndProject [order count ,total shipping cost ,total net profit ]
-  WholeStageCodegen (11)
+WholeStageCodegen (11)
+  Sort [order count ]
     HashAggregate [sum,sum,count] [sum(UnscaledValue(ws_ext_ship_cost)),sum(UnscaledValue(ws_net_profit)),count(ws_order_number),order count ,total shipping cost ,total net profit ,sum,sum,count]
       InputAdapter
         Exchange #1
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt
index d00029f985471..5ae0e1632f15b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt
@@ -1,5 +1,5 @@
 == Physical Plan ==
-TakeOrderedAndProject (28)
+* Sort (28)
 +- * HashAggregate (27)
    +- Exchange (26)
       +- * HashAggregate (25)
@@ -154,7 +154,7 @@ Functions [1]: [count(1)]
 Aggregate Attributes [1]: [count(1)#17]
 Results [1]: [count(1)#17 AS count(1)#18]
 
-(28) TakeOrderedAndProject
+(28) Sort [codegen id : 5]
 Input [1]: [count(1)#18]
-Arguments: 100, [count(1)#18 ASC NULLS FIRST], [count(1)#18]
+Arguments: [count(1)#18 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt
index 1355caffbbfe8..d9ee3e09481ed 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt
@@ -1,5 +1,5 @@
-TakeOrderedAndProject [count(1)]
-  WholeStageCodegen (5)
+WholeStageCodegen (5)
+  Sort [count(1)]
     HashAggregate [count] [count(1),count(1),count]
       InputAdapter
         Exchange #1
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt
index 3561eff8f57ef..6729910d9cb4a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt
@@ -1,5 +1,5 @@
 == Physical Plan ==
-TakeOrderedAndProject (28)
+* Sort (28)
 +- * HashAggregate (27)
    +- Exchange (26)
       +- * HashAggregate (25)
@@ -154,7 +154,7 @@ Functions [1]: [count(1)]
 Aggregate Attributes [1]: [count(1)#17]
 Results [1]: [count(1)#17 AS count(1)#18]
 
-(28) TakeOrderedAndProject
+(28) Sort [codegen id : 5]
 Input [1]: [count(1)#18]
-Arguments: 100, [count(1)#18 ASC NULLS FIRST], [count(1)#18]
+Arguments: [count(1)#18 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt
index b13f28bf69cfd..45400b6c512f4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt
@@ -1,5 +1,5 @@
-TakeOrderedAndProject [count(1)]
-  WholeStageCodegen (5)
+WholeStageCodegen (5)
+  Sort [count(1)]
     HashAggregate [count] [count(1),count(1),count]
       InputAdapter
         Exchange #1
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt
index 0a2e88b5bc160..e904ad94dd8fa 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt
@@ -1,34 +1,33 @@
 == Physical Plan ==
-CollectLimit (30)
-+- * HashAggregate (29)
-   +- Exchange (28)
-      +- * HashAggregate (27)
-         +- * Project (26)
-            +- SortMergeJoin FullOuter (25)
-               :- * Sort (14)
-               :  +- * HashAggregate (13)
-               :     +- Exchange (12)
-               :        +- * HashAggregate (11)
-               :           +- * Project (10)
-               :              +- * BroadcastHashJoin Inner BuildRight (9)
-               :                 :- * Filter (3)
-               :                 :  +- * ColumnarToRow (2)
-               :                 :     +- Scan parquet default.store_sales (1)
-               :                 +- BroadcastExchange (8)
-               :                    +- * Project (7)
-               :                       +- * Filter (6)
-               :                          +- * ColumnarToRow (5)
-               :                             +- Scan parquet default.date_dim (4)
-               +- * Sort (24)
-                  +- * HashAggregate (23)
-                     +- Exchange (22)
-                        +- * HashAggregate (21)
-                           +- * Project (20)
-                              +- * BroadcastHashJoin Inner BuildRight (19)
-                                 :- * Filter (17)
-                                 :  +- * ColumnarToRow (16)
-                                 :     +- Scan parquet default.catalog_sales (15)
-                                 +- ReusedExchange (18)
+* HashAggregate (29)
++- Exchange (28)
+   +- * HashAggregate (27)
+      +- * Project (26)
+         +- SortMergeJoin FullOuter (25)
+            :- * Sort (14)
+            :  +- * HashAggregate (13)
+            :     +- Exchange (12)
+            :        +- * HashAggregate (11)
+            :           +- * Project (10)
+            :              +- * BroadcastHashJoin Inner BuildRight (9)
+            :                 :- * Filter (3)
+            :                 :  +- * ColumnarToRow (2)
+            :                 :     +- Scan parquet default.store_sales (1)
+            :                 +- BroadcastExchange (8)
+            :                    +- * Project (7)
+            :                       +- * Filter (6)
+            :                          +- * ColumnarToRow (5)
+            :                             +- Scan parquet default.date_dim (4)
+            +- * Sort (24)
+               +- * HashAggregate (23)
+                  +- Exchange (22)
+                     +- * HashAggregate (21)
+                        +- * Project (20)
+                           +- * BroadcastHashJoin Inner BuildRight (19)
+                              :- * Filter (17)
+                              :  +- * ColumnarToRow (16)
+                              :     +- Scan parquet default.catalog_sales (15)
+                              +- ReusedExchange (18)
 
 
 (1) Scan parquet default.store_sales
@@ -173,7 +172,3 @@ Functions [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer
 Aggregate Attributes [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25]
 Results [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23 AS store_only#26, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24 AS catalog_only#27, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25 AS store_and_catalog#28]
 
-(30) CollectLimit
-Input [3]: [store_only#26, catalog_only#27, store_and_catalog#28]
-Arguments: 100
-
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt
index bae48ec244faa..c5921a11cd889 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt
@@ -1,46 +1,45 @@
-CollectLimit
-  WholeStageCodegen (8)
-    HashAggregate [sum,sum,sum] [sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_only,catalog_only,store_and_catalog,sum,sum,sum]
-      InputAdapter
-        Exchange #1
-          WholeStageCodegen (7)
-            HashAggregate [customer_sk,customer_sk] [sum,sum,sum,sum,sum,sum]
-              Project [customer_sk,customer_sk]
-                InputAdapter
-                  SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk]
-                    WholeStageCodegen (3)
-                      Sort [customer_sk,item_sk]
-                        HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk]
-                          InputAdapter
-                            Exchange [ss_customer_sk,ss_item_sk] #2
-                              WholeStageCodegen (2)
-                                HashAggregate [ss_customer_sk,ss_item_sk]
-                                  Project [ss_item_sk,ss_customer_sk]
-                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                      Filter [ss_sold_date_sk]
-                                        ColumnarToRow
-                                          InputAdapter
-                                            Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk]
-                                      InputAdapter
-                                        BroadcastExchange #3
-                                          WholeStageCodegen (1)
-                                            Project [d_date_sk]
-                                              Filter [d_month_seq,d_date_sk]
-                                                ColumnarToRow
-                                                  InputAdapter
-                                                    Scan parquet default.date_dim [d_date_sk,d_month_seq]
-                    WholeStageCodegen (6)
-                      Sort [customer_sk,item_sk]
-                        HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk]
-                          InputAdapter
-                            Exchange [cs_bill_customer_sk,cs_item_sk] #4
-                              WholeStageCodegen (5)
-                                HashAggregate [cs_bill_customer_sk,cs_item_sk]
-                                  Project [cs_bill_customer_sk,cs_item_sk]
-                                    BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                      Filter [cs_sold_date_sk]
-                                        ColumnarToRow
-                                          InputAdapter
-                                            Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk]
-                                      InputAdapter
-                                        ReusedExchange [d_date_sk] #3
+WholeStageCodegen (8)
+  HashAggregate [sum,sum,sum] [sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_only,catalog_only,store_and_catalog,sum,sum,sum]
+    InputAdapter
+      Exchange #1
+        WholeStageCodegen (7)
+          HashAggregate [customer_sk,customer_sk] [sum,sum,sum,sum,sum,sum]
+            Project [customer_sk,customer_sk]
+              InputAdapter
+                SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk]
+                  WholeStageCodegen (3)
+                    Sort [customer_sk,item_sk]
+                      HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk]
+                        InputAdapter
+                          Exchange [ss_customer_sk,ss_item_sk] #2
+                            WholeStageCodegen (2)
+                              HashAggregate [ss_customer_sk,ss_item_sk]
+                                Project [ss_item_sk,ss_customer_sk]
+                                  BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                    Filter [ss_sold_date_sk]
+                                      ColumnarToRow
+                                        InputAdapter
+                                          Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk]
+                                    InputAdapter
+                                      BroadcastExchange #3
+                                        WholeStageCodegen (1)
+                                          Project [d_date_sk]
+                                            Filter [d_month_seq,d_date_sk]
+                                              ColumnarToRow
+                                                InputAdapter
+                                                  Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                  WholeStageCodegen (6)
+                    Sort [customer_sk,item_sk]
+                      HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk]
+                        InputAdapter
+                          Exchange [cs_bill_customer_sk,cs_item_sk] #4
+                            WholeStageCodegen (5)
+                              HashAggregate [cs_bill_customer_sk,cs_item_sk]
+                                Project [cs_bill_customer_sk,cs_item_sk]
+                                  BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                    Filter [cs_sold_date_sk]
+                                      ColumnarToRow
+                                        InputAdapter
+                                          Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk]
+                                    InputAdapter
+                                      ReusedExchange [d_date_sk] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt
index 0a2e88b5bc160..e904ad94dd8fa 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt
@@ -1,34 +1,33 @@
 == Physical Plan ==
-CollectLimit (30)
-+- * HashAggregate (29)
-   +- Exchange (28)
-      +- * HashAggregate (27)
-         +- * Project (26)
-            +- SortMergeJoin FullOuter (25)
-               :- * Sort (14)
-               :  +- * HashAggregate (13)
-               :     +- Exchange (12)
-               :        +- * HashAggregate (11)
-               :           +- * Project (10)
-               :              +- * BroadcastHashJoin Inner BuildRight (9)
-               :                 :- * Filter (3)
-               :                 :  +- * ColumnarToRow (2)
-               :                 :     +- Scan parquet default.store_sales (1)
-               :                 +- BroadcastExchange (8)
-               :                    +- * Project (7)
-               :                       +- * Filter (6)
-               :                          +- * ColumnarToRow (5)
-               :                             +- Scan parquet default.date_dim (4)
-               +- * Sort (24)
-                  +- * HashAggregate (23)
-                     +- Exchange (22)
-                        +- * HashAggregate (21)
-                           +- * Project (20)
-                              +- * BroadcastHashJoin Inner BuildRight (19)
-                                 :- * Filter (17)
-                                 :  +- * ColumnarToRow (16)
-                                 :     +- Scan parquet default.catalog_sales (15)
-                                 +- ReusedExchange (18)
+* HashAggregate (29)
++- Exchange (28)
+   +- * HashAggregate (27)
+      +- * Project (26)
+         +- SortMergeJoin FullOuter (25)
+            :- * Sort (14)
+            :  +- * HashAggregate (13)
+            :     +- Exchange (12)
+            :        +- * HashAggregate (11)
+            :           +- * Project (10)
+            :              +- * BroadcastHashJoin Inner BuildRight (9)
+            :                 :- * Filter (3)
+            :                 :  +- * ColumnarToRow (2)
+            :                 :     +- Scan parquet default.store_sales (1)
+            :                 +- BroadcastExchange (8)
+            :                    +- * Project (7)
+            :                       +- * Filter (6)
+            :                          +- * ColumnarToRow (5)
+            :                             +- Scan parquet default.date_dim (4)
+            +- * Sort (24)
+               +- * HashAggregate (23)
+                  +- Exchange (22)
+                     +- * HashAggregate (21)
+                        +- * Project (20)
+                           +- * BroadcastHashJoin Inner BuildRight (19)
+                              :- * Filter (17)
+                              :  +- * ColumnarToRow (16)
+                              :     +- Scan parquet default.catalog_sales (15)
+                              +- ReusedExchange (18)
 
 
 (1) Scan parquet default.store_sales
@@ -173,7 +172,3 @@ Functions [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer
 Aggregate Attributes [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25]
 Results [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23 AS store_only#26, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24 AS catalog_only#27, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25 AS store_and_catalog#28]
 
-(30) CollectLimit
-Input [3]: [store_only#26, catalog_only#27, store_and_catalog#28]
-Arguments: 100
-
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt
index bae48ec244faa..c5921a11cd889 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt
@@ -1,46 +1,45 @@
-CollectLimit
-  WholeStageCodegen (8)
-    HashAggregate [sum,sum,sum] [sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_only,catalog_only,store_and_catalog,sum,sum,sum]
-      InputAdapter
-        Exchange #1
-          WholeStageCodegen (7)
-            HashAggregate [customer_sk,customer_sk] [sum,sum,sum,sum,sum,sum]
-              Project [customer_sk,customer_sk]
-                InputAdapter
-                  SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk]
-                    WholeStageCodegen (3)
-                      Sort [customer_sk,item_sk]
-                        HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk]
-                          InputAdapter
-                            Exchange [ss_customer_sk,ss_item_sk] #2
-                              WholeStageCodegen (2)
-                                HashAggregate [ss_customer_sk,ss_item_sk]
-                                  Project [ss_item_sk,ss_customer_sk]
-                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                      Filter [ss_sold_date_sk]
-                                        ColumnarToRow
-                                          InputAdapter
-                                            Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk]
-                                      InputAdapter
-                                        BroadcastExchange #3
-                                          WholeStageCodegen (1)
-                                            Project [d_date_sk]
-                                              Filter [d_month_seq,d_date_sk]
-                                                ColumnarToRow
-                                                  InputAdapter
-                                                    Scan parquet default.date_dim [d_date_sk,d_month_seq]
-                    WholeStageCodegen (6)
-                      Sort [customer_sk,item_sk]
-                        HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk]
-                          InputAdapter
-                            Exchange [cs_bill_customer_sk,cs_item_sk] #4
-                              WholeStageCodegen (5)
-                                HashAggregate [cs_bill_customer_sk,cs_item_sk]
-                                  Project [cs_bill_customer_sk,cs_item_sk]
-                                    BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                      Filter [cs_sold_date_sk]
-                                        ColumnarToRow
-                                          InputAdapter
-                                            Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk]
-                                      InputAdapter
-                                        ReusedExchange [d_date_sk] #3
+WholeStageCodegen (8)
+  HashAggregate [sum,sum,sum] [sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_only,catalog_only,store_and_catalog,sum,sum,sum]
+    InputAdapter
+      Exchange #1
+        WholeStageCodegen (7)
+          HashAggregate [customer_sk,customer_sk] [sum,sum,sum,sum,sum,sum]
+            Project [customer_sk,customer_sk]
+              InputAdapter
+                SortMergeJoin [customer_sk,item_sk,customer_sk,item_sk]
+                  WholeStageCodegen (3)
+                    Sort [customer_sk,item_sk]
+                      HashAggregate [ss_customer_sk,ss_item_sk] [customer_sk,item_sk]
+                        InputAdapter
+                          Exchange [ss_customer_sk,ss_item_sk] #2
+                            WholeStageCodegen (2)
+                              HashAggregate [ss_customer_sk,ss_item_sk]
+                                Project [ss_item_sk,ss_customer_sk]
+                                  BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                    Filter [ss_sold_date_sk]
+                                      ColumnarToRow
+                                        InputAdapter
+                                          Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk]
+                                    InputAdapter
+                                      BroadcastExchange #3
+                                        WholeStageCodegen (1)
+                                          Project [d_date_sk]
+                                            Filter [d_month_seq,d_date_sk]
+                                              ColumnarToRow
+                                                InputAdapter
+                                                  Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                  WholeStageCodegen (6)
+                    Sort [customer_sk,item_sk]
+                      HashAggregate [cs_bill_customer_sk,cs_item_sk] [customer_sk,item_sk]
+                        InputAdapter
+                          Exchange [cs_bill_customer_sk,cs_item_sk] #4
+                            WholeStageCodegen (5)
+                              HashAggregate [cs_bill_customer_sk,cs_item_sk]
+                                Project [cs_bill_customer_sk,cs_item_sk]
+                                  BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                    Filter [cs_sold_date_sk]
+                                      ColumnarToRow
+                                        InputAdapter
+                                          Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk]
+                                    InputAdapter
+                                      ReusedExchange [d_date_sk] #3
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index 8797e5ad64149..e64d5f6f3587e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -1134,7 +1134,7 @@ class StreamSuite extends StreamTest {
     verifyLocalLimit(inputDF.toDF("value").join(staticDF, "value"), expectStreamingLimit = false)
 
     verifyLocalLimit(
-      inputDF.groupBy().count().limit(1),
+      inputDF.groupBy("value").count().limit(1),
       expectStreamingLimit = false,
       outputMode = OutputMode.Complete())
   }

From 3695e997d5d436be086235505bbb030c87ae8eef Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Thu, 19 Nov 2020 16:56:21 +0000
Subject: [PATCH 0522/1009] [SPARK-33045][SQL] Support build-in function
 like_all and fix StackOverflowError issue

### What changes were proposed in this pull request?
Spark already support `LIKE ALL` syntax, but it will throw `StackOverflowError` if there are many elements(more than 14378 elements). We should implement built-in function for LIKE ALL to fix this issue.

Why the stack overflow can happen in the current approach ?
The current approach uses reduceLeft to connect each `Like(e, p)`, this will lead the the call depth of the thread is too large, causing `StackOverflowError` problems.

Why the fix in this PR can avoid the error?
This PR support built-in function for `LIKE ALL` and avoid this issue.

### Why are the changes needed?
1.Fix the `StackOverflowError` issue.
2.Support built-in function `like_all`.

### Does this PR introduce _any_ user-facing change?
'No'.

### How was this patch tested?
Jenkins test.

Closes #29999 from beliefer/SPARK-33045-like_all.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: beliefer <beliefer@163.com>
Co-authored-by: Jiaan Geng <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/dsl/package.scala      |  5 ++
 .../expressions/regexpExpressions.scala       | 84 +++++++++++++++++++
 .../sql/catalyst/parser/AstBuilder.scala      | 15 +++-
 .../apache/spark/sql/internal/SQLConf.scala   | 14 ++++
 .../expressions/RegexpExpressionsSuite.scala  | 24 ++++++
 .../resources/sql-tests/inputs/like-all.sql   |  4 +
 6 files changed, 145 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index b61c4b8d065f2..4cd649b07a5c0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.expressions.objects.Invoke
 import org.apache.spark.sql.catalyst.plans.{Inner, JoinType}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
 
 /**
  * A collection of implicit conversions that create a DSL for constructing catalyst data structures.
@@ -102,6 +103,10 @@ package object dsl {
     def like(other: Expression, escapeChar: Char = '\\'): Expression =
       Like(expr, other, escapeChar)
     def rlike(other: Expression): Expression = RLike(expr, other)
+    def likeAll(others: Expression*): Expression =
+      LikeAll(expr, others.map(_.eval(EmptyRow).asInstanceOf[UTF8String]))
+    def notLikeAll(others: Expression*): Expression =
+      NotLikeAll(expr, others.map(_.eval(EmptyRow).asInstanceOf[UTF8String]))
     def contains(other: Expression): Expression = Contains(expr, other)
     def startsWith(other: Expression): Expression = StartsWith(expr, other)
     def endsWith(other: Expression): Expression = EndsWith(expr, other)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index c9dd7c7acddde..b4d9921488d5f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -20,10 +20,12 @@ package org.apache.spark.sql.catalyst.expressions
 import java.util.Locale
 import java.util.regex.{Matcher, MatchResult, Pattern}
 
+import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.commons.text.StringEscapeUtils
 
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.expressions.codegen._
@@ -178,6 +180,88 @@ case class Like(left: Expression, right: Expression, escapeChar: Char)
   }
 }
 
+/**
+ * Optimized version of LIKE ALL, when all pattern values are literal.
+ */
+abstract class LikeAllBase extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
+
+  protected def patterns: Seq[UTF8String]
+
+  protected def isNotLikeAll: Boolean
+
+  override def inputTypes: Seq[DataType] = StringType :: Nil
+
+  override def dataType: DataType = BooleanType
+
+  override def nullable: Boolean = true
+
+  private lazy val hasNull: Boolean = patterns.contains(null)
+
+  private lazy val cache = patterns.filterNot(_ == null)
+    .map(s => Pattern.compile(StringUtils.escapeLikeRegex(s.toString, '\\')))
+
+  private lazy val matchFunc = if (isNotLikeAll) {
+    (p: Pattern, inputValue: String) => !p.matcher(inputValue).matches()
+  } else {
+    (p: Pattern, inputValue: String) => p.matcher(inputValue).matches()
+  }
+
+  override def eval(input: InternalRow): Any = {
+    val exprValue = child.eval(input)
+    if (exprValue == null) {
+      null
+    } else {
+      if (cache.forall(matchFunc(_, exprValue.toString))) {
+        if (hasNull) null else true
+      } else {
+        false
+      }
+    }
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val eval = child.genCode(ctx)
+    val patternClass = classOf[Pattern].getName
+    val javaDataType = CodeGenerator.javaType(child.dataType)
+    val pattern = ctx.freshName("pattern")
+    val valueArg = ctx.freshName("valueArg")
+    val patternCache = ctx.addReferenceObj("patternCache", cache.asJava)
+
+    val checkNotMatchCode = if (isNotLikeAll) {
+      s"$pattern.matcher($valueArg.toString()).matches()"
+    } else {
+      s"!$pattern.matcher($valueArg.toString()).matches()"
+    }
+
+    ev.copy(code =
+      code"""
+            |${eval.code}
+            |boolean ${ev.isNull} = false;
+            |boolean ${ev.value} = true;
+            |if (${eval.isNull}) {
+            |  ${ev.isNull} = true;
+            |} else {
+            |  $javaDataType $valueArg = ${eval.value};
+            |  for ($patternClass $pattern: $patternCache) {
+            |    if ($checkNotMatchCode) {
+            |      ${ev.value} = false;
+            |      break;
+            |    }
+            |  }
+            |  if (${ev.value} && $hasNull) ${ev.isNull} = true;
+            |}
+      """.stripMargin)
+  }
+}
+
+case class LikeAll(child: Expression, patterns: Seq[UTF8String]) extends LikeAllBase {
+  override def isNotLikeAll: Boolean = false
+}
+
+case class NotLikeAll(child: Expression, patterns: Seq[UTF8String]) extends LikeAllBase {
+  override def isNotLikeAll: Boolean = true
+}
+
 // scalastyle:off line.contains.tab
 @ExpressionDescription(
   usage = "str _FUNC_ regexp - Returns true if `str` matches `regexp`, or false otherwise.",
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index c3855fe088db6..79857a63a69b5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1406,7 +1406,20 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
           case Some(SqlBaseParser.ANY) | Some(SqlBaseParser.SOME) =>
             getLikeQuantifierExprs(ctx.expression).reduceLeft(Or)
           case Some(SqlBaseParser.ALL) =>
-            getLikeQuantifierExprs(ctx.expression).reduceLeft(And)
+            validate(!ctx.expression.isEmpty, "Expected something between '(' and ')'.", ctx)
+            val expressions = ctx.expression.asScala.map(expression)
+            if (expressions.size > SQLConf.get.optimizerLikeAllConversionThreshold &&
+              expressions.forall(_.foldable) && expressions.forall(_.dataType == StringType)) {
+              // If there are many pattern expressions, will throw StackOverflowError.
+              // So we use LikeAll or NotLikeAll instead.
+              val patterns = expressions.map(_.eval(EmptyRow).asInstanceOf[UTF8String])
+              ctx.NOT match {
+                case null => LikeAll(e, patterns)
+                case _ => NotLikeAll(e, patterns)
+              }
+            } else {
+              getLikeQuantifierExprs(ctx.expression).reduceLeft(And)
+            }
           case _ =>
             val escapeChar = Option(ctx.escapeChar).map(string).map { str =>
               if (str.length != 1) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 43014feecfd8e..fcf222c8fdab0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -216,6 +216,18 @@ object SQLConf {
         "for using switch statements in InSet must be non-negative and less than or equal to 600")
       .createWithDefault(400)
 
+  val OPTIMIZER_LIKE_ALL_CONVERSION_THRESHOLD =
+    buildConf("spark.sql.optimizer.likeAllConversionThreshold")
+      .internal()
+      .doc("Configure the maximum size of the pattern sequence in like all. Spark will convert " +
+        "the logical combination of like to avoid StackOverflowError. 200 is an empirical value " +
+        "that will not cause StackOverflowError.")
+      .version("3.1.0")
+      .intConf
+      .checkValue(threshold => threshold >= 0, "The maximum size of pattern sequence " +
+        "in like all must be non-negative")
+      .createWithDefault(200)
+
   val PLAN_CHANGE_LOG_LEVEL = buildConf("spark.sql.planChangeLog.level")
     .internal()
     .doc("Configures the log level for logging the change from the original plan to the new " +
@@ -3037,6 +3049,8 @@ class SQLConf extends Serializable with Logging {
 
   def optimizerInSetSwitchThreshold: Int = getConf(OPTIMIZER_INSET_SWITCH_THRESHOLD)
 
+  def optimizerLikeAllConversionThreshold: Int = getConf(OPTIMIZER_LIKE_ALL_CONVERSION_THRESHOLD)
+
   def planChangeLogLevel: String = getConf(PLAN_CHANGE_LOG_LEVEL)
 
   def planChangeRules: Option[String] = getConf(PLAN_CHANGE_LOG_RULES)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
index 77a32a735f76d..cc5ab5dc7b4e0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
@@ -48,6 +48,30 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(mkExpr(regex), expected, create_row(input)) // check row input
   }
 
+  test("LIKE ALL") {
+    checkEvaluation(Literal.create(null, StringType).likeAll("%foo%", "%oo"), null)
+    checkEvaluation(Literal.create("foo", StringType).likeAll("%foo%", "%oo"), true)
+    checkEvaluation(Literal.create("foo", StringType).likeAll("%foo%", "%bar%"), false)
+    checkEvaluation(Literal.create("foo", StringType)
+      .likeAll("%foo%", Literal.create(null, StringType)), null)
+    checkEvaluation(Literal.create("foo", StringType)
+      .likeAll(Literal.create(null, StringType), "%foo%"), null)
+    checkEvaluation(Literal.create("foo", StringType)
+      .likeAll("%feo%", Literal.create(null, StringType)), false)
+    checkEvaluation(Literal.create("foo", StringType)
+      .likeAll(Literal.create(null, StringType), "%feo%"), false)
+    checkEvaluation(Literal.create("foo", StringType).notLikeAll("tee", "%yoo%"), true)
+    checkEvaluation(Literal.create("foo", StringType).notLikeAll("%oo%", "%yoo%"), false)
+    checkEvaluation(Literal.create("foo", StringType)
+      .notLikeAll("%foo%", Literal.create(null, StringType)), false)
+    checkEvaluation(Literal.create("foo", StringType)
+      .notLikeAll(Literal.create(null, StringType), "%foo%"), false)
+    checkEvaluation(Literal.create("foo", StringType)
+      .notLikeAll("%yoo%", Literal.create(null, StringType)), null)
+    checkEvaluation(Literal.create("foo", StringType)
+      .notLikeAll(Literal.create(null, StringType), "%yoo%"), null)
+  }
+
   test("LIKE Pattern") {
 
     // null handling
diff --git a/sql/core/src/test/resources/sql-tests/inputs/like-all.sql b/sql/core/src/test/resources/sql-tests/inputs/like-all.sql
index a084dbef61a0c..f83277376e680 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/like-all.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/like-all.sql
@@ -1,3 +1,7 @@
+-- test cases for like all
+--CONFIG_DIM1 spark.sql.optimizer.likeAllConversionThreshold=0
+--CONFIG_DIM1 spark.sql.optimizer.likeAllConversionThreshold=200
+
 CREATE OR REPLACE TEMPORARY VIEW like_all_table AS SELECT * FROM (VALUES
   ('google', '%oo%'),
   ('facebook', '%oo%'),

From 6da8ade5f46cac69820ef0f6987806ffa78873f1 Mon Sep 17 00:00:00 2001
From: Chao Sun <sunchao@apple.com>
Date: Thu, 19 Nov 2020 12:42:33 -0800
Subject: [PATCH 0523/1009] [SPARK-33045][SQL][FOLLOWUP] Fix build failure with
 Scala 2.13

### What changes were proposed in this pull request?

Explicitly convert `scala.collection.mutable.Buffer` to `Seq`. In Scala 2.13 `Seq` is an alias of `scala.collection.immutable.Seq` instead of `scala.collection.Seq`.

### Why are the changes needed?

Without the change build with Scala 2.13 fails with the following:
```
[error] /home/runner/work/spark/spark/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala:1417:41: type mismatch;
[error]  found   : scala.collection.mutable.Buffer[org.apache.spark.unsafe.types.UTF8String]
[error]  required: Seq[org.apache.spark.unsafe.types.UTF8String]
[error]                 case null => LikeAll(e, patterns)
[error]                                         ^
[error] /home/runner/work/spark/spark/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala:1418:41: type mismatch;
[error]  found   : scala.collection.mutable.Buffer[org.apache.spark.unsafe.types.UTF8String]
[error]  required: Seq[org.apache.spark.unsafe.types.UTF8String]
[error]                 case _ => NotLikeAll(e, patterns)
[error]                                         ^
```

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

N/A

Closes #30431 from sunchao/SPARK-33045-followup.

Authored-by: Chao Sun <sunchao@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../org/apache/spark/sql/catalyst/parser/AstBuilder.scala     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 79857a63a69b5..23de8ab09dd0a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1414,8 +1414,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
               // So we use LikeAll or NotLikeAll instead.
               val patterns = expressions.map(_.eval(EmptyRow).asInstanceOf[UTF8String])
               ctx.NOT match {
-                case null => LikeAll(e, patterns)
-                case _ => NotLikeAll(e, patterns)
+                case null => LikeAll(e, patterns.toSeq)
+                case _ => NotLikeAll(e, patterns.toSeq)
               }
             } else {
               getLikeQuantifierExprs(ctx.expression).reduceLeft(And)

From 883a213a8f721d19855f7a5696084533da2002f7 Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Thu, 19 Nov 2020 13:36:45 -0800
Subject: [PATCH 0524/1009] [MINOR] Structured Streaming statistics page indent
 fix

### What changes were proposed in this pull request?
Structured Streaming statistics page code contains an indentation issue. This PR fixes it.

### Why are the changes needed?
Indent fix.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Existing unit tests.

Closes #30434 from gaborgsomogyi/STAT-INDENT-FIX.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../ui/StreamingQueryStatisticsPage.scala     | 54 +++++++++----------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
index 77078046dda7c..7d38acfceee81 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
@@ -209,33 +209,33 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab)
         <td class={"aggregated-num-total-state-rows-timeline"}>{graphUIDataForNumberTotalRows.generateTimelineHtml(jsCollector)}</td>
         <td class={"aggregated-num-total-state-rows-histogram"}>{graphUIDataForNumberTotalRows.generateHistogramHtml(jsCollector)}</td>
       </tr>
-        <tr>
-          <td style="vertical-align: middle;">
-            <div style="width: 160px;">
-              <div><strong>Aggregated Number Of Updated State Rows {SparkUIUtils.tooltip("Aggregated number of updated state rows.", "right")}</strong></div>
-            </div>
-          </td>
-          <td class={"aggregated-num-updated-state-rows-timeline"}>{graphUIDataForNumberUpdatedRows.generateTimelineHtml(jsCollector)}</td>
-          <td class={"aggregated-num-updated-state-rows-histogram"}>{graphUIDataForNumberUpdatedRows.generateHistogramHtml(jsCollector)}</td>
-        </tr>
-        <tr>
-          <td style="vertical-align: middle;">
-            <div style="width: 160px;">
-              <div><strong>Aggregated State Memory Used In Bytes {SparkUIUtils.tooltip("Aggregated state memory used in bytes.", "right")}</strong></div>
-            </div>
-          </td>
-          <td class={"aggregated-state-memory-used-bytes-timeline"}>{graphUIDataForMemoryUsedBytes.generateTimelineHtml(jsCollector)}</td>
-          <td class={"aggregated-state-memory-used-bytes-histogram"}>{graphUIDataForMemoryUsedBytes.generateHistogramHtml(jsCollector)}</td>
-        </tr>
-        <tr>
-          <td style="vertical-align: middle;">
-            <div style="width: 160px;">
-              <div><strong>Aggregated Number Of State Rows Dropped By Watermark {SparkUIUtils.tooltip("Aggregated number of state rows dropped by watermark.", "right")}</strong></div>
-            </div>
-          </td>
-          <td class={"aggregated-num-state-rows-dropped-by-watermark-timeline"}>{graphUIDataForNumRowsDroppedByWatermark.generateTimelineHtml(jsCollector)}</td>
-          <td class={"aggregated-num-state-rows-dropped-by-watermark-histogram"}>{graphUIDataForNumRowsDroppedByWatermark.generateHistogramHtml(jsCollector)}</td>
-        </tr>
+      <tr>
+        <td style="vertical-align: middle;">
+          <div style="width: 160px;">
+            <div><strong>Aggregated Number Of Updated State Rows {SparkUIUtils.tooltip("Aggregated number of updated state rows.", "right")}</strong></div>
+          </div>
+        </td>
+        <td class={"aggregated-num-updated-state-rows-timeline"}>{graphUIDataForNumberUpdatedRows.generateTimelineHtml(jsCollector)}</td>
+        <td class={"aggregated-num-updated-state-rows-histogram"}>{graphUIDataForNumberUpdatedRows.generateHistogramHtml(jsCollector)}</td>
+      </tr>
+      <tr>
+        <td style="vertical-align: middle;">
+          <div style="width: 160px;">
+            <div><strong>Aggregated State Memory Used In Bytes {SparkUIUtils.tooltip("Aggregated state memory used in bytes.", "right")}</strong></div>
+          </div>
+        </td>
+        <td class={"aggregated-state-memory-used-bytes-timeline"}>{graphUIDataForMemoryUsedBytes.generateTimelineHtml(jsCollector)}</td>
+        <td class={"aggregated-state-memory-used-bytes-histogram"}>{graphUIDataForMemoryUsedBytes.generateHistogramHtml(jsCollector)}</td>
+      </tr>
+      <tr>
+        <td style="vertical-align: middle;">
+          <div style="width: 160px;">
+            <div><strong>Aggregated Number Of State Rows Dropped By Watermark {SparkUIUtils.tooltip("Aggregated number of state rows dropped by watermark.", "right")}</strong></div>
+          </div>
+        </td>
+        <td class={"aggregated-num-state-rows-dropped-by-watermark-timeline"}>{graphUIDataForNumRowsDroppedByWatermark.generateTimelineHtml(jsCollector)}</td>
+        <td class={"aggregated-num-state-rows-dropped-by-watermark-histogram"}>{graphUIDataForNumRowsDroppedByWatermark.generateHistogramHtml(jsCollector)}</td>
+      </tr>
       // scalastyle:on
     } else {
       new NodeBuffer()

From 02d410a18c966944c7a46e5bc3006dadf3d579b6 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Fri, 20 Nov 2020 13:14:20 +0900
Subject: [PATCH 0525/1009] [MINOR][DOCS] Document 'without' value for
 HADOOP_VERSION in pip installation

### What changes were proposed in this pull request?

I believe it's self-descriptive.

### Why are the changes needed?

To document supported features.

### Does this PR introduce _any_ user-facing change?

Yes, the docs are updated. It's master only.

### How was this patch tested?

Manually built the docs via `cd python/docs` and `make clean html`:

![Screen Shot 2020-11-20 at 10 59 07 AM](https://user-images.githubusercontent.com/6477701/99748225-7ad9b280-2b1f-11eb-86fd-165012b1bb7c.png)

Closes #30436 from HyukjinKwon/minor-doc-fix.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/docs/source/getting_started/install.rst | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst
index 4039698d39958..9c9ff7fa7844b 100644
--- a/python/docs/source/getting_started/install.rst
+++ b/python/docs/source/getting_started/install.rst
@@ -48,7 +48,7 @@ If you want to install extra dependencies for a specific componenet, you can ins
 
     pip install pyspark[sql]
 
-For PySpark with a different Hadoop version, you can install it by using ``HADOOP_VERSION`` environment variables as below:
+For PySpark with/without a specific Hadoop version, you can install it by using ``HADOOP_VERSION`` environment variables as below:
 
 .. code-block:: bash
 
@@ -68,8 +68,13 @@ It is recommended to use ``-v`` option in ``pip`` to track the installation and
 
     HADOOP_VERSION=2.7 pip install pyspark -v
 
-Supported versions of Hadoop are ``HADOOP_VERSION=2.7`` and ``HADOOP_VERSION=3.2`` (default).
-Note that this installation of PySpark with a different version of Hadoop is experimental. It can change or be removed between minor releases.
+Supported values in ``HADOOP_VERSION`` are:
+
+- ``without``: Spark pre-built with user-provided Apache Hadoop
+- ``2.7``: Spark pre-built for Apache Hadoop 2.7
+- ``3.2``: Spark pre-built for Apache Hadoop 3.2 and later (default)
+
+Note that this installation way of PySpark with/without a specific Hadoop version is experimental. It can change or be removed between minor releases.
 
 
 Using Conda

From 8218b488035049434271dc9e3bd5af45ffadf0fd Mon Sep 17 00:00:00 2001
From: Venkata krishnan Sowrirajan <vsowrirajan@linkedin.com>
Date: Fri, 20 Nov 2020 06:00:30 -0600
Subject: [PATCH 0526/1009] [SPARK-32919][SHUFFLE][TEST-MAVEN][TEST-HADOOP2.7]
 Driver side changes for coordinating push based shuffle by selecting external
 shuffle services for merging partitions

### What changes were proposed in this pull request?
Driver side changes for coordinating push based shuffle by selecting external shuffle services for merging partitions.

This PR includes changes related to `ShuffleMapStage` preparation which is selection of merger locations and initializing them as part of `ShuffleDependency`.

Currently this code is not used as some of the changes would come subsequently as part of https://issues.apache.org/jira/browse/SPARK-32917 (shuffle blocks push as part of `ShuffleMapTask`), https://issues.apache.org/jira/browse/SPARK-32918 (support for finalize API) and https://issues.apache.org/jira/browse/SPARK-32920 (finalization of push/merge phase). This is why the tests here are also partial, once these above mentioned changes are raised as PR we will have enough tests for DAGScheduler piece of code as well.

### Why are the changes needed?
Added a new API in `SchedulerBackend` to get merger locations for push based shuffle. This is currently implemented for Yarn and other cluster managers can have separate implementations which is why a new API is introduced.

### Does this PR introduce _any_ user-facing change?
Yes, user facing config to enable push based shuffle is introduced

### How was this patch tested?
Added unit tests partially and some of the changes in DAGScheduler depends on future changes, DAGScheduler tests will be added along with those changes.

Lead-authored-by: Venkata krishnan Sowrirajan vsowrirajanlinkedin.com
Co-authored-by: Min Shen mshenlinkedin.com

Closes #30164 from venkata91/upstream-SPARK-32919.

Lead-authored-by: Venkata krishnan Sowrirajan <vsowrirajan@linkedin.com>
Co-authored-by: Min Shen <mshen@linkedin.com>
Signed-off-by: Mridul Muralidharan <mridul<at>gmail.com>
---
 .../scala/org/apache/spark/Dependency.scala   | 15 +++++
 .../spark/internal/config/package.scala       | 47 ++++++++++++++
 .../apache/spark/scheduler/DAGScheduler.scala | 40 ++++++++++++
 .../spark/scheduler/SchedulerBackend.scala    | 13 ++++
 .../apache/spark/storage/BlockManagerId.scala |  2 +
 .../spark/storage/BlockManagerMaster.scala    | 20 ++++++
 .../storage/BlockManagerMasterEndpoint.scala  | 65 +++++++++++++++++++
 .../spark/storage/BlockManagerMessages.scala  |  6 ++
 .../scala/org/apache/spark/util/Utils.scala   |  8 +++
 .../spark/storage/BlockManagerSuite.scala     | 49 +++++++++++++-
 .../org/apache/spark/util/UtilsSuite.scala    | 12 ++++
 .../cluster/YarnSchedulerBackend.scala        | 50 ++++++++++++--
 12 files changed, 320 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/Dependency.scala b/core/src/main/scala/org/apache/spark/Dependency.scala
index ba8e4d69ba755..d21b9d9833e9e 100644
--- a/core/src/main/scala/org/apache/spark/Dependency.scala
+++ b/core/src/main/scala/org/apache/spark/Dependency.scala
@@ -23,6 +23,7 @@ import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.rdd.RDD
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.shuffle.{ShuffleHandle, ShuffleWriteProcessor}
+import org.apache.spark.storage.BlockManagerId
 
 /**
  * :: DeveloperApi ::
@@ -95,6 +96,20 @@ class ShuffleDependency[K: ClassTag, V: ClassTag, C: ClassTag](
   val shuffleHandle: ShuffleHandle = _rdd.context.env.shuffleManager.registerShuffle(
     shuffleId, this)
 
+  /**
+   * Stores the location of the list of chosen external shuffle services for handling the
+   * shuffle merge requests from mappers in this shuffle map stage.
+   */
+  private[spark] var mergerLocs: Seq[BlockManagerId] = Nil
+
+  def setMergerLocs(mergerLocs: Seq[BlockManagerId]): Unit = {
+    if (mergerLocs != null) {
+      this.mergerLocs = mergerLocs
+    }
+  }
+
+  def getMergerLocs: Seq[BlockManagerId] = mergerLocs
+
   _rdd.sparkContext.cleaner.foreach(_.registerShuffleForCleanup(this))
   _rdd.sparkContext.shuffleDriverComponents.registerShuffle(shuffleId)
 }
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 4bc49514fc5ad..b38d0e5c617b9 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -1945,4 +1945,51 @@ package object config {
       .version("3.0.1")
       .booleanConf
       .createWithDefault(false)
+
+  private[spark] val PUSH_BASED_SHUFFLE_ENABLED =
+    ConfigBuilder("spark.shuffle.push.enabled")
+      .doc("Set to 'true' to enable push-based shuffle on the client side and this works in " +
+        "conjunction with the server side flag spark.shuffle.server.mergedShuffleFileManagerImpl " +
+        "which needs to be set with the appropriate " +
+        "org.apache.spark.network.shuffle.MergedShuffleFileManager implementation for push-based " +
+        "shuffle to be enabled")
+      .version("3.1.0")
+      .booleanConf
+      .createWithDefault(false)
+
+  private[spark] val SHUFFLE_MERGER_MAX_RETAINED_LOCATIONS =
+    ConfigBuilder("spark.shuffle.push.maxRetainedMergerLocations")
+      .doc("Maximum number of shuffle push merger locations cached for push based shuffle. " +
+        "Currently, shuffle push merger locations are nothing but external shuffle services " +
+        "which are responsible for handling pushed blocks and merging them and serving " +
+        "merged blocks for later shuffle fetch.")
+      .version("3.1.0")
+      .intConf
+      .createWithDefault(500)
+
+  private[spark] val SHUFFLE_MERGER_LOCATIONS_MIN_THRESHOLD_RATIO =
+    ConfigBuilder("spark.shuffle.push.mergersMinThresholdRatio")
+      .doc("The minimum number of shuffle merger locations required to enable push based " +
+        "shuffle for a stage. This is specified as a ratio of the number of partitions in " +
+        "the child stage. For example, a reduce stage which has 100 partitions and uses the " +
+        "default value 0.05 requires at least 5 unique merger locations to enable push based " +
+        "shuffle. Merger locations are currently defined as external shuffle services.")
+      .version("3.1.0")
+      .doubleConf
+      .createWithDefault(0.05)
+
+  private[spark] val SHUFFLE_MERGER_LOCATIONS_MIN_STATIC_THRESHOLD =
+    ConfigBuilder("spark.shuffle.push.mergersMinStaticThreshold")
+      .doc(s"The static threshold for number of shuffle push merger locations should be " +
+        "available in order to enable push based shuffle for a stage. Note this config " +
+        s"works in conjunction with ${SHUFFLE_MERGER_LOCATIONS_MIN_THRESHOLD_RATIO.key}. " +
+        "Maximum of spark.shuffle.push.mergersMinStaticThreshold and " +
+        s"${SHUFFLE_MERGER_LOCATIONS_MIN_THRESHOLD_RATIO.key} ratio number of mergers needed to " +
+        "enable push based shuffle for a stage. For eg: with 1000 partitions for the child " +
+        "stage with spark.shuffle.push.mergersMinStaticThreshold as 5 and " +
+        s"${SHUFFLE_MERGER_LOCATIONS_MIN_THRESHOLD_RATIO.key} set to 0.05, we would need " +
+        "at least 50 mergers to enable push based shuffle for that stage.")
+      .version("3.1.0")
+      .doubleConf
+      .createWithDefault(5)
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index 13b766e654832..6fb0fb93f253b 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -249,6 +249,8 @@ private[spark] class DAGScheduler(
   private[spark] val eventProcessLoop = new DAGSchedulerEventProcessLoop(this)
   taskScheduler.setDAGScheduler(this)
 
+  private val pushBasedShuffleEnabled = Utils.isPushBasedShuffleEnabled(sc.getConf)
+
   /**
    * Called by the TaskSetManager to report task's starting.
    */
@@ -1252,6 +1254,33 @@ private[spark] class DAGScheduler(
     execCores.map(cores => properties.setProperty(EXECUTOR_CORES_LOCAL_PROPERTY, cores))
   }
 
+  /**
+   * If push based shuffle is enabled, set the shuffle services to be used for the given
+   * shuffle map stage for block push/merge.
+   *
+   * Even with dynamic resource allocation kicking in and significantly reducing the number
+   * of available active executors, we would still be able to get sufficient shuffle service
+   * locations for block push/merge by getting the historical locations of past executors.
+   */
+  private def prepareShuffleServicesForShuffleMapStage(stage: ShuffleMapStage): Unit = {
+    // TODO(SPARK-32920) Handle stage reuse/retry cases separately as without finalize
+    // TODO changes we cannot disable shuffle merge for the retry/reuse cases
+    val mergerLocs = sc.schedulerBackend.getShufflePushMergerLocations(
+      stage.shuffleDep.partitioner.numPartitions, stage.resourceProfileId)
+
+    if (mergerLocs.nonEmpty) {
+      stage.shuffleDep.setMergerLocs(mergerLocs)
+      logInfo(s"Push-based shuffle enabled for $stage (${stage.name}) with" +
+        s" ${stage.shuffleDep.getMergerLocs.size} merger locations")
+
+      logDebug("List of shuffle push merger locations " +
+        s"${stage.shuffleDep.getMergerLocs.map(_.host).mkString(", ")}")
+    } else {
+      logInfo("No available merger locations." +
+        s" Push-based shuffle disabled for $stage (${stage.name})")
+    }
+  }
+
   /** Called when stage's parents are available and we can now do its task. */
   private def submitMissingTasks(stage: Stage, jobId: Int): Unit = {
     logDebug("submitMissingTasks(" + stage + ")")
@@ -1281,6 +1310,12 @@ private[spark] class DAGScheduler(
     stage match {
       case s: ShuffleMapStage =>
         outputCommitCoordinator.stageStart(stage = s.id, maxPartitionId = s.numPartitions - 1)
+        // Only generate merger location for a given shuffle dependency once. This way, even if
+        // this stage gets retried, it would still be merging blocks using the same set of
+        // shuffle services.
+        if (pushBasedShuffleEnabled) {
+          prepareShuffleServicesForShuffleMapStage(s)
+        }
       case s: ResultStage =>
         outputCommitCoordinator.stageStart(
           stage = s.id, maxPartitionId = s.rdd.partitions.length - 1)
@@ -2027,6 +2062,11 @@ private[spark] class DAGScheduler(
     if (!executorFailureEpoch.contains(execId) || executorFailureEpoch(execId) < currentEpoch) {
       executorFailureEpoch(execId) = currentEpoch
       logInfo(s"Executor lost: $execId (epoch $currentEpoch)")
+      if (pushBasedShuffleEnabled) {
+        // Remove fetchFailed host in the shuffle push merger list for push based shuffle
+        hostToUnregisterOutputs.foreach(
+          host => blockManagerMaster.removeShufflePushMergerLocation(host))
+      }
       blockManagerMaster.removeExecutor(execId)
       clearCacheLocs()
     }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala
index a566d0a04387c..b2acdb3e12a6d 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.scheduler
 
 import org.apache.spark.resource.ResourceProfile
+import org.apache.spark.storage.BlockManagerId
 
 /**
  * A backend interface for scheduling systems that allows plugging in different ones under
@@ -92,4 +93,16 @@ private[spark] trait SchedulerBackend {
    */
   def maxNumConcurrentTasks(rp: ResourceProfile): Int
 
+  /**
+   * Get the list of host locations for push based shuffle
+   *
+   * Currently push based shuffle is disabled for both stage retry and stage reuse cases
+   * (for eg: in the case where few partitions are lost due to failure). Hence this method
+   * should be invoked only once for a ShuffleDependency.
+   * @return List of external shuffle services locations
+   */
+  def getShufflePushMergerLocations(
+      numPartitions: Int,
+      resourceProfileId: Int): Seq[BlockManagerId] = Nil
+
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
index 49e32d04d450a..c6a4457d8f910 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
@@ -145,4 +145,6 @@ private[spark] object BlockManagerId {
   def getCachedBlockManagerId(id: BlockManagerId): BlockManagerId = {
     blockManagerIdCache.get(id)
   }
+
+  private[spark] val SHUFFLE_MERGER_IDENTIFIER = "shuffle-push-merger"
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
index f544d47b8e13c..fe1a5aef9499c 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
@@ -125,6 +125,26 @@ class BlockManagerMaster(
     driverEndpoint.askSync[Seq[BlockManagerId]](GetPeers(blockManagerId))
   }
 
+  /**
+   * Get a list of unique shuffle service locations where an executor is successfully
+   * registered in the past for block push/merge with push based shuffle.
+   */
+  def getShufflePushMergerLocations(
+      numMergersNeeded: Int,
+      hostsToFilter: Set[String]): Seq[BlockManagerId] = {
+    driverEndpoint.askSync[Seq[BlockManagerId]](
+      GetShufflePushMergerLocations(numMergersNeeded, hostsToFilter))
+  }
+
+  /**
+   * Remove the host from the candidate list of shuffle push mergers. This can be
+   * triggered if there is a FetchFailedException on the host
+   * @param host
+   */
+  def removeShufflePushMergerLocation(host: String): Unit = {
+    driverEndpoint.askSync[Seq[BlockManagerId]](RemoveShufflePushMergerLocation(host))
+  }
+
   def getExecutorEndpointRef(executorId: String): Option[RpcEndpointRef] = {
     driverEndpoint.askSync[Option[RpcEndpointRef]](GetExecutorEndpointRef(executorId))
   }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
index a7532a9870fae..4d565511704d4 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
@@ -74,6 +74,14 @@ class BlockManagerMasterEndpoint(
   // Mapping from block id to the set of block managers that have the block.
   private val blockLocations = new JHashMap[BlockId, mutable.HashSet[BlockManagerId]]
 
+  // Mapping from host name to shuffle (mergers) services where the current app
+  // registered an executor in the past. Older hosts are removed when the
+  // maxRetainedMergerLocations size is reached in favor of newer locations.
+  private val shuffleMergerLocations = new mutable.LinkedHashMap[String, BlockManagerId]()
+
+  // Maximum number of merger locations to cache
+  private val maxRetainedMergerLocations = conf.get(config.SHUFFLE_MERGER_MAX_RETAINED_LOCATIONS)
+
   private val askThreadPool =
     ThreadUtils.newDaemonCachedThreadPool("block-manager-ask-thread-pool", 100)
   private implicit val askExecutionContext = ExecutionContext.fromExecutorService(askThreadPool)
@@ -92,6 +100,8 @@ class BlockManagerMasterEndpoint(
 
   val defaultRpcTimeout = RpcUtils.askRpcTimeout(conf)
 
+  private val pushBasedShuffleEnabled = Utils.isPushBasedShuffleEnabled(conf)
+
   logInfo("BlockManagerMasterEndpoint up")
   // same as `conf.get(config.SHUFFLE_SERVICE_ENABLED)
   //   && conf.get(config.SHUFFLE_SERVICE_FETCH_RDD_ENABLED)`
@@ -139,6 +149,12 @@ class BlockManagerMasterEndpoint(
     case GetBlockStatus(blockId, askStorageEndpoints) =>
       context.reply(blockStatus(blockId, askStorageEndpoints))
 
+    case GetShufflePushMergerLocations(numMergersNeeded, hostsToFilter) =>
+      context.reply(getShufflePushMergerLocations(numMergersNeeded, hostsToFilter))
+
+    case RemoveShufflePushMergerLocation(host) =>
+      context.reply(removeShufflePushMergerLocation(host))
+
     case IsExecutorAlive(executorId) =>
       context.reply(blockManagerIdByExecutor.contains(executorId))
 
@@ -360,6 +376,17 @@ class BlockManagerMasterEndpoint(
 
   }
 
+  private def addMergerLocation(blockManagerId: BlockManagerId): Unit = {
+    if (!blockManagerId.isDriver && !shuffleMergerLocations.contains(blockManagerId.host)) {
+      val shuffleServerId = BlockManagerId(BlockManagerId.SHUFFLE_MERGER_IDENTIFIER,
+        blockManagerId.host, externalShuffleServicePort)
+      if (shuffleMergerLocations.size >= maxRetainedMergerLocations) {
+        shuffleMergerLocations -= shuffleMergerLocations.head._1
+      }
+      shuffleMergerLocations(shuffleServerId.host) = shuffleServerId
+    }
+  }
+
   private def removeExecutor(execId: String): Unit = {
     logInfo("Trying to remove executor " + execId + " from BlockManagerMaster.")
     blockManagerIdByExecutor.get(execId).foreach(removeBlockManager)
@@ -526,6 +553,10 @@ class BlockManagerMasterEndpoint(
 
       blockManagerInfo(id) = new BlockManagerInfo(id, System.currentTimeMillis(),
         maxOnHeapMemSize, maxOffHeapMemSize, storageEndpoint, externalShuffleServiceBlockStatus)
+
+      if (pushBasedShuffleEnabled) {
+        addMergerLocation(id)
+      }
     }
     listenerBus.post(SparkListenerBlockManagerAdded(time, id, maxOnHeapMemSize + maxOffHeapMemSize,
         Some(maxOnHeapMemSize), Some(maxOffHeapMemSize)))
@@ -657,6 +688,40 @@ class BlockManagerMasterEndpoint(
     }
   }
 
+  private def getShufflePushMergerLocations(
+      numMergersNeeded: Int,
+      hostsToFilter: Set[String]): Seq[BlockManagerId] = {
+    val blockManagerHosts = blockManagerIdByExecutor.values.map(_.host).toSet
+    val filteredBlockManagerHosts = blockManagerHosts.filterNot(hostsToFilter.contains(_))
+    val filteredMergersWithExecutors = filteredBlockManagerHosts.map(
+      BlockManagerId(BlockManagerId.SHUFFLE_MERGER_IDENTIFIER, _, externalShuffleServicePort))
+    // Enough mergers are available as part of active executors list
+    if (filteredMergersWithExecutors.size >= numMergersNeeded) {
+      filteredMergersWithExecutors.toSeq
+    } else {
+      // Delta mergers added from inactive mergers list to the active mergers list
+      val filteredMergersWithExecutorsHosts = filteredMergersWithExecutors.map(_.host)
+      val filteredMergersWithoutExecutors = shuffleMergerLocations.values
+        .filterNot(x => hostsToFilter.contains(x.host))
+        .filterNot(x => filteredMergersWithExecutorsHosts.contains(x.host))
+      val randomFilteredMergersLocations =
+        if (filteredMergersWithoutExecutors.size >
+          numMergersNeeded - filteredMergersWithExecutors.size) {
+          Utils.randomize(filteredMergersWithoutExecutors)
+            .take(numMergersNeeded - filteredMergersWithExecutors.size)
+        } else {
+          filteredMergersWithoutExecutors
+        }
+      filteredMergersWithExecutors.toSeq ++ randomFilteredMergersLocations
+    }
+  }
+
+  private def removeShufflePushMergerLocation(host: String): Unit = {
+    if (shuffleMergerLocations.contains(host)) {
+      shuffleMergerLocations.remove(host)
+    }
+  }
+
   /**
    * Returns an [[RpcEndpointRef]] of the [[BlockManagerReplicaEndpoint]] for sending RPC messages.
    */
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala
index bbc076cea9ba8..afe416a55ed0d 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala
@@ -141,4 +141,10 @@ private[spark] object BlockManagerMessages {
   case class BlockManagerHeartbeat(blockManagerId: BlockManagerId) extends ToBlockManagerMaster
 
   case class IsExecutorAlive(executorId: String) extends ToBlockManagerMaster
+
+  case class GetShufflePushMergerLocations(numMergersNeeded: Int, hostsToFilter: Set[String])
+    extends ToBlockManagerMaster
+
+  case class RemoveShufflePushMergerLocation(host: String) extends ToBlockManagerMaster
+
 }
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index b743ab6507117..6ccf65b737c1a 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2541,6 +2541,14 @@ private[spark] object Utils extends Logging {
     master == "local" || master.startsWith("local[")
   }
 
+  /**
+   * Push based shuffle can only be enabled when external shuffle service is enabled.
+   */
+  def isPushBasedShuffleEnabled(conf: SparkConf): Boolean = {
+    conf.get(PUSH_BASED_SHUFFLE_ENABLED) &&
+      (conf.get(IS_TESTING).getOrElse(false) || conf.get(SHUFFLE_SERVICE_ENABLED))
+  }
+
   /**
    * Return whether dynamic allocation is enabled in the given conf.
    */
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 55280fc578310..144489c5f7922 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -100,6 +100,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
       .set(Kryo.KRYO_SERIALIZER_BUFFER_SIZE.key, "1m")
       .set(STORAGE_UNROLL_MEMORY_THRESHOLD, 512L)
       .set(Network.RPC_ASK_TIMEOUT, "5s")
+      .set(PUSH_BASED_SHUFFLE_ENABLED, true)
   }
 
   private def makeSortShuffleManager(): SortShuffleManager = {
@@ -1974,6 +1975,48 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     }
   }
 
+  test("SPARK-32919: Shuffle push merger locations should be bounded with in" +
+    " spark.shuffle.push.retainedMergerLocations") {
+    assert(master.getShufflePushMergerLocations(10, Set.empty).isEmpty)
+    makeBlockManager(100, "execA",
+      transferService = Some(new MockBlockTransferService(10, "hostA")))
+    makeBlockManager(100, "execB",
+      transferService = Some(new MockBlockTransferService(10, "hostB")))
+    makeBlockManager(100, "execC",
+      transferService = Some(new MockBlockTransferService(10, "hostC")))
+    makeBlockManager(100, "execD",
+      transferService = Some(new MockBlockTransferService(10, "hostD")))
+    makeBlockManager(100, "execE",
+      transferService = Some(new MockBlockTransferService(10, "hostA")))
+    assert(master.getShufflePushMergerLocations(10, Set.empty).size == 4)
+    assert(master.getShufflePushMergerLocations(10, Set.empty).map(_.host).sorted ===
+      Seq("hostC", "hostD", "hostA", "hostB").sorted)
+    assert(master.getShufflePushMergerLocations(10, Set("hostB")).size == 3)
+  }
+
+  test("SPARK-32919: Prefer active executor locations for shuffle push mergers") {
+    makeBlockManager(100, "execA",
+      transferService = Some(new MockBlockTransferService(10, "hostA")))
+    makeBlockManager(100, "execB",
+      transferService = Some(new MockBlockTransferService(10, "hostB")))
+    makeBlockManager(100, "execC",
+      transferService = Some(new MockBlockTransferService(10, "hostC")))
+    makeBlockManager(100, "execD",
+      transferService = Some(new MockBlockTransferService(10, "hostD")))
+    makeBlockManager(100, "execE",
+      transferService = Some(new MockBlockTransferService(10, "hostA")))
+    assert(master.getShufflePushMergerLocations(5, Set.empty).size == 4)
+
+    master.removeExecutor("execA")
+    master.removeExecutor("execE")
+
+    assert(master.getShufflePushMergerLocations(3, Set.empty).size == 3)
+    assert(master.getShufflePushMergerLocations(3, Set.empty).map(_.host).sorted ===
+      Seq("hostC", "hostB", "hostD").sorted)
+    assert(master.getShufflePushMergerLocations(4, Set.empty).map(_.host).sorted ===
+      Seq("hostB", "hostA", "hostC", "hostD").sorted)
+  }
+
   test("SPARK-33387 Support ordered shuffle block migration") {
     val blocks: Seq[ShuffleBlockInfo] = Seq(
       ShuffleBlockInfo(1, 0L),
@@ -1995,7 +2038,9 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     assert(sortedBlocks.sameElements(decomManager.shufflesToMigrate.asScala.map(_._1)))
   }
 
-  class MockBlockTransferService(val maxFailures: Int) extends BlockTransferService {
+  class MockBlockTransferService(
+      val maxFailures: Int,
+      override val hostName: String = "MockBlockTransferServiceHost") extends BlockTransferService {
     var numCalls = 0
     var tempFileManager: DownloadFileManager = null
 
@@ -2013,8 +2058,6 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
 
     override def close(): Unit = {}
 
-    override def hostName: String = { "MockBlockTransferServiceHost" }
-
     override def port: Int = { 63332 }
 
     override def uploadBlock(
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 20624c743bc22..8fb408041ca9d 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -41,6 +41,7 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.{SparkConf, SparkException, SparkFunSuite, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.network.util.ByteUnit
 import org.apache.spark.scheduler.SparkListener
 import org.apache.spark.util.io.ChunkedByteBufferInputStream
@@ -1432,6 +1433,17 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
     }.getMessage
     assert(message.contains(expected))
   }
+
+  test("isPushBasedShuffleEnabled when both PUSH_BASED_SHUFFLE_ENABLED" +
+    " and SHUFFLE_SERVICE_ENABLED are true") {
+    val conf = new SparkConf()
+    assert(Utils.isPushBasedShuffleEnabled(conf) === false)
+    conf.set(PUSH_BASED_SHUFFLE_ENABLED, true)
+    conf.set(IS_TESTING, false)
+    assert(Utils.isPushBasedShuffleEnabled(conf) === false)
+    conf.set(SHUFFLE_SERVICE_ENABLED, true)
+    assert(Utils.isPushBasedShuffleEnabled(conf) === true)
+  }
 }
 
 private class SimpleExtension
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
index b42bdb9816600..22002bb32004d 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.scheduler.cluster
 
 import java.util.EnumSet
-import java.util.concurrent.atomic.{AtomicBoolean}
+import java.util.concurrent.atomic.AtomicBoolean
 import javax.servlet.DispatcherType
 
 import scala.concurrent.{ExecutionContext, Future}
@@ -29,14 +29,14 @@ import org.apache.hadoop.yarn.api.records.{ApplicationAttemptId, ApplicationId}
 
 import org.apache.spark.SparkContext
 import org.apache.spark.deploy.security.HadoopDelegationTokenManager
-import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config
+import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.rpc._
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
-import org.apache.spark.util.{RpcUtils, ThreadUtils}
+import org.apache.spark.storage.{BlockManagerId, BlockManagerMaster}
+import org.apache.spark.util.{RpcUtils, ThreadUtils, Utils}
 
 /**
  * Abstract Yarn scheduler backend that contains common logic
@@ -80,6 +80,18 @@ private[spark] abstract class YarnSchedulerBackend(
   /** Attempt ID. This is unset for client-mode schedulers */
   private var attemptId: Option[ApplicationAttemptId] = None
 
+  private val blockManagerMaster: BlockManagerMaster = sc.env.blockManager.master
+
+  private val minMergersThresholdRatio =
+    conf.get(config.SHUFFLE_MERGER_LOCATIONS_MIN_THRESHOLD_RATIO)
+
+  private val minMergersStaticThreshold =
+    conf.get(config.SHUFFLE_MERGER_LOCATIONS_MIN_STATIC_THRESHOLD)
+
+  private val maxNumExecutors = conf.get(config.DYN_ALLOCATION_MAX_EXECUTORS)
+
+  private val numExecutors = conf.get(config.EXECUTOR_INSTANCES).getOrElse(0)
+
   /**
    * Bind to YARN. This *must* be done before calling [[start()]].
    *
@@ -161,6 +173,36 @@ private[spark] abstract class YarnSchedulerBackend(
     totalRegisteredExecutors.get() >= totalExpectedExecutors * minRegisteredRatio
   }
 
+  override def getShufflePushMergerLocations(
+      numPartitions: Int,
+      resourceProfileId: Int): Seq[BlockManagerId] = {
+    // TODO (SPARK-33481) This is a naive way of calculating numMergersDesired for a stage,
+    // TODO we can use better heuristics to calculate numMergersDesired for a stage.
+    val maxExecutors = if (Utils.isDynamicAllocationEnabled(sc.getConf)) {
+      maxNumExecutors
+    } else {
+      numExecutors
+    }
+    val tasksPerExecutor = sc.resourceProfileManager
+      .resourceProfileFromId(resourceProfileId).maxTasksPerExecutor(sc.conf)
+    val numMergersDesired = math.min(
+      math.max(1, math.ceil(numPartitions / tasksPerExecutor).toInt), maxExecutors)
+    val minMergersNeeded = math.max(minMergersStaticThreshold,
+      math.floor(numMergersDesired * minMergersThresholdRatio).toInt)
+
+    // Request for numMergersDesired shuffle mergers to BlockManagerMasterEndpoint
+    // and if it's less than minMergersNeeded, we disable push based shuffle.
+    val mergerLocations = blockManagerMaster
+      .getShufflePushMergerLocations(numMergersDesired, scheduler.excludedNodes())
+    if (mergerLocations.size < numMergersDesired && mergerLocations.size < minMergersNeeded) {
+      Seq.empty[BlockManagerId]
+    } else {
+      logDebug(s"The number of shuffle mergers desired ${numMergersDesired}" +
+        s" and available locations are ${mergerLocations.length}")
+      mergerLocations
+    }
+  }
+
   /**
    * Add filters to the SparkUI.
    */

From 2289389821a23e5b5badabfb4e62c427de2554a5 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Fri, 20 Nov 2020 21:27:41 +0900
Subject: [PATCH 0527/1009] [SPARK-33441][BUILD][FOLLOWUP] Make unused-imports
 check for SBT specific

### What changes were proposed in this pull request?
Move "unused-imports" check config to `SparkBuild.scala` and make it SBT specific.

### Why are the changes needed?
Make unused-imports check for SBT specific.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass the Jenkins or GitHub Action

Closes #30441 from LuciferYang/SPARK-33441-FOLLOWUP.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 pom.xml                  | 5 +----
 project/SparkBuild.scala | 3 +++
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pom.xml b/pom.xml
index 3ae2e7420e154..85cf5a00b0b24 100644
--- a/pom.xml
+++ b/pom.xml
@@ -164,7 +164,6 @@
     <commons.collections.version>3.2.2</commons.collections.version>
     <scala.version>2.12.10</scala.version>
     <scala.binary.version>2.12</scala.binary.version>
-    <scalac.arg.unused-imports>-Ywarn-unused-import</scalac.arg.unused-imports>
     <scalatest-maven-plugin.version>2.0.0</scalatest-maven-plugin.version>
     <scalafmt.parameters>--test</scalafmt.parameters>
     <!-- for now, not running scalafmt as part of default verify pipeline -->
@@ -2538,7 +2537,6 @@
               <arg>-deprecation</arg>
               <arg>-feature</arg>
               <arg>-explaintypes</arg>
-              <arg>${scalac.arg.unused-imports}</arg>
               <arg>-target:jvm-1.8</arg>
             </args>
             <jvmArgs>
@@ -3262,13 +3260,12 @@
         </pluginManagement>
       </build>
     </profile>
-    
+
     <profile>
       <id>scala-2.13</id>
       <properties>
         <scala.version>2.13.3</scala.version>
         <scala.binary.version>2.13</scala.binary.version>
-        <scalac.arg.unused-imports>-Wconf:cat=unused-imports:e</scalac.arg.unused-imports>
       </properties>
       <dependencyManagement>
         <dependencies>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 55c87fcb3aaa2..05413b7091ad9 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -221,6 +221,7 @@ object SparkBuild extends PomBuild {
         Seq(
           "-Xfatal-warnings",
           "-deprecation",
+          "-Ywarn-unused-import",
           "-P:silencer:globalFilters=.*deprecated.*" //regex to catch deprecation warnings and supress them
         )
       } else {
@@ -230,6 +231,8 @@ object SparkBuild extends PomBuild {
           // see `scalac -Wconf:help` for details
           "-Wconf:cat=deprecation:wv,any:e",
           // 2.13-specific warning hits to be muted (as narrowly as possible) and addressed separately
+          // TODO(SPARK-33499): Enable this option when Scala 2.12 is no longer supported.
+          // "-Wunused:imports",
           "-Wconf:cat=lint-multiarg-infix:wv",
           "-Wconf:cat=other-nullary-override:wv",
           "-Wconf:cat=other-match-analysis&site=org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupFunction.catalogFunction:wv",

From 870d4095336f29f5bef77b9232d6cb9d025987dd Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Fri, 20 Nov 2020 12:53:45 +0000
Subject: [PATCH 0528/1009] [SPARK-32512][SQL][TESTS][FOLLOWUP] Remove
 duplicate tests for ALTER TABLE .. PARTITIONS from DataSourceV2SQLSuite

### What changes were proposed in this pull request?
Remove tests from `DataSourceV2SQLSuite` that were copied to `AlterTablePartitionV2SQLSuite` by https://github.com/apache/spark/pull/29339.

### Why are the changes needed?
- To reduce tests execution time
- To improve test maintenance

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running the modified tests:
```
$ build/sbt "test:testOnly *DataSourceV2SQLSuite"
$ build/sbt "test:testOnly *AlterTablePartitionV2SQLSuite"
```

Closes #30444 from MaxGekk/dedup-tests-AlterTablePartitionV2SQLSuite.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/connector/DataSourceV2SQLSuite.scala  | 53 -------------------
 1 file changed, 53 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index ddafa1bb5070a..0057415ff6e1d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -24,7 +24,6 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NamespaceAlreadyExistsException, NoSuchDatabaseException, NoSuchNamespaceException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.connector.catalog._
@@ -43,7 +42,6 @@ class DataSourceV2SQLSuite
   with AlterTableTests with DatasourceV2SQLBase {
 
   import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-  import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
 
   private val v2Source = classOf[FakeV2Provider].getName
   override protected val v2Format = v2Source
@@ -1980,57 +1978,6 @@ class DataSourceV2SQLSuite
     }
   }
 
-  test("ALTER TABLE RECOVER PARTITIONS") {
-    val t = "testcat.ns1.ns2.tbl"
-    withTable(t) {
-      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
-      val e = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $t RECOVER PARTITIONS")
-      }
-      assert(e.message.contains("ALTER TABLE RECOVER PARTITIONS is only supported with v1 tables"))
-    }
-  }
-
-  test("ALTER TABLE ADD PARTITION") {
-    val t = "testpart.ns1.ns2.tbl"
-    withTable(t) {
-      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
-      spark.sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'")
-
-      val partTable = catalog("testpart").asTableCatalog
-        .loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")).asInstanceOf[InMemoryPartitionTable]
-      assert(partTable.partitionExists(InternalRow.fromSeq(Seq(1))))
-
-      val partMetadata = partTable.loadPartitionMetadata(InternalRow.fromSeq(Seq(1)))
-      assert(partMetadata.containsKey("location"))
-      assert(partMetadata.get("location") == "loc")
-    }
-  }
-
-  test("ALTER TABLE RENAME PARTITION") {
-    val t = "testcat.ns1.ns2.tbl"
-    withTable(t) {
-      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
-      val e = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $t PARTITION (id=1) RENAME TO PARTITION (id=2)")
-      }
-      assert(e.message.contains("ALTER TABLE RENAME PARTITION is only supported with v1 tables"))
-    }
-  }
-
-  test("ALTER TABLE DROP PARTITION") {
-    val t = "testpart.ns1.ns2.tbl"
-    withTable(t) {
-      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
-      spark.sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'")
-      spark.sql(s"ALTER TABLE $t DROP PARTITION (id=1)")
-
-      val partTable =
-        catalog("testpart").asTableCatalog.loadTable(Identifier.of(Array("ns1", "ns2"), "tbl"))
-      assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(1))))
-    }
-  }
-
   test("ALTER TABLE SerDe properties") {
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {

From cbc8be24c896ed25be63ef9a111ff015af4fabec Mon Sep 17 00:00:00 2001
From: liucht <liucht@inspur.com>
Date: Fri, 20 Nov 2020 22:19:35 +0900
Subject: [PATCH 0529/1009] [SPARK-33422][DOC] Fix the correct display of left
 menu item

### What changes were proposed in this pull request?
Limit the height of the menu area on the left to display vertical scroll bar

### Why are the changes needed?

The bottom menu item cannot be displayed when the left menu tree is long

### Does this PR introduce any user-facing change?

Yes, if the menu item shows more, you'll see it by pulling down the vertical scroll bar

before:
![image](https://user-images.githubusercontent.com/28332082/98805115-16995d80-2452-11eb-933a-3b72c14bea78.png)

after:
![image](https://user-images.githubusercontent.com/28332082/98805418-7e4fa880-2452-11eb-9a9b-8d265078297c.png)

### How was this patch tested?
NA

Closes #30335 from liucht-inspur/master.

Authored-by: liucht <liucht@inspur.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/css/main.css | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/css/main.css b/docs/css/main.css
index 8168a46f9a437..8b279a157c2b6 100755
--- a/docs/css/main.css
+++ b/docs/css/main.css
@@ -162,6 +162,7 @@ body .container-wrapper {
   margin-right: auto;
   border-radius: 15px;
   position: relative;
+  min-height: 100vh;
 }
 
 .title {
@@ -264,6 +265,7 @@ a:hover code {
   max-width: 914px;
   line-height: 1.6; /* Inspired by Github's wiki style */
   padding-left: 30px;
+  min-height: 100vh;
 }
 
 .dropdown-menu {
@@ -325,6 +327,7 @@ a.anchorjs-link:hover { text-decoration: none; }
   border-bottom-width: 0px;
   margin-top: 0px;
   width: 210px;
+  height: 80%;
   float: left;
   position: fixed;
   overflow-y: scroll;

From 3384bda453d0e728be311ce458e00d70d2484973 Mon Sep 17 00:00:00 2001
From: ulysses <youxiduo@weidian.com>
Date: Fri, 20 Nov 2020 13:23:08 +0000
Subject: [PATCH 0530/1009] [SPARK-33468][SQL] ParseUrl  in ANSI mode should
 fail if input string is not a valid url

### What changes were proposed in this pull request?

With `ParseUrl`, instead of return null we throw exception if input string is not a vaild url.

### Why are the changes needed?

For ANSI mode.

### Does this PR introduce _any_ user-facing change?

Yes, user will get exception if `set spark.sql.ansi.enabled=true`.

### How was this patch tested?

Add test.

Closes #30399 from ulysses-you/SPARK-33468.

Lead-authored-by: ulysses <youxiduo@weidian.com>
Co-authored-by: ulysses-you <youxiduo@weidian.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-ref-ansi-compliance.md                    |  1 +
 .../catalyst/expressions/stringExpressions.scala   |  7 +++++--
 .../expressions/StringExpressionsSuite.scala       | 14 ++++++++++++++
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index fd7208615a09f..870ed0aa0daaa 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -135,6 +135,7 @@ The behavior of some SQL functions can be different under ANSI mode (`spark.sql.
   - `element_at`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices. 
   - `element_at`: This function throws `NoSuchElementException` if key does not exist in map. 
   - `elt`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices.
+  - `parse_url`: This function throws `IllegalArgumentException` if an input string is not a valid url.
 
 ### SQL Operators
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 16e22940495f1..9f92181b34df1 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -1357,8 +1357,9 @@ object ParseUrl {
        1
   """,
   since = "2.0.0")
-case class ParseUrl(children: Seq[Expression])
+case class ParseUrl(children: Seq[Expression], failOnError: Boolean = SQLConf.get.ansiEnabled)
   extends Expression with ExpectsInputTypes with CodegenFallback {
+  def this(children: Seq[Expression]) = this(children, SQLConf.get.ansiEnabled)
 
   override def nullable: Boolean = true
   override def inputTypes: Seq[DataType] = Seq.fill(children.size)(StringType)
@@ -1404,7 +1405,9 @@ case class ParseUrl(children: Seq[Expression])
     try {
       new URI(url.toString)
     } catch {
-      case e: URISyntaxException => null
+      case e: URISyntaxException if failOnError =>
+        throw new IllegalArgumentException(s"Find an invaild url string ${url.toString}", e)
+      case _: URISyntaxException => null
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index a1b6cec24f23f..730574a4b9846 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -943,6 +943,20 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     GenerateUnsafeProjection.generate(ParseUrl(Seq(Literal("\"quote"), Literal("\"quote"))) :: Nil)
   }
 
+  test("SPARK-33468: ParseUrl in ANSI mode should fail if input string is not a valid url") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+      val msg = intercept[IllegalArgumentException] {
+        evaluateWithoutCodegen(
+          ParseUrl(Seq("https://a.b.c/index.php?params1=a|b&params2=x", "HOST")))
+      }.getMessage
+      assert(msg.contains("Find an invaild url string"))
+    }
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      checkEvaluation(
+        ParseUrl(Seq("https://a.b.c/index.php?params1=a|b&params2=x", "HOST")), null)
+    }
+  }
+
   test("Sentences") {
     val nullString = Literal.create(null, StringType)
     checkEvaluation(Sentences(nullString, nullString, nullString), null)

From 47326ac1c6a296a84af76d832061741740ae9f12 Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Fri, 20 Nov 2020 08:40:14 -0800
Subject: [PATCH 0531/1009] [SPARK-28704][SQL][TEST] Add back Skiped
 HiveExternalCatalogVersionsSuite in HiveSparkSubmitSuite at JDK9+

### What changes were proposed in this pull request?
We skip test HiveExternalCatalogVersionsSuite when testing with JAVA_9 or later because our previous version does not support JAVA_9 or later. We now add it back since we have a version supports JAVA_9 or later.

### Why are the changes needed?

To recover test coverage.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Check CI logs.

Closes #30428 from AngersZhuuuu/SPARK-28704.

Authored-by: angerszhu <angers.zhu@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../HiveExternalCatalogVersionsSuite.scala    | 22 +++++++------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index 38a8c492d77a7..4cafd3e8ca626 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -52,7 +52,6 @@ import org.apache.spark.util.Utils
 @ExtendedHiveTest
 class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
   import HiveExternalCatalogVersionsSuite._
-  private val isTestAtLeastJava9 = SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)
   private val wareHousePath = Utils.createTempDir(namePrefix = "warehouse")
   private val tmpDataDir = Utils.createTempDir(namePrefix = "test-data")
   // For local test, you can set `spark.test.cache-dir` to a static value like `/tmp/test-spark`, to
@@ -149,7 +148,9 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
     new String(Files.readAllBytes(contentPath), StandardCharsets.UTF_8)
   }
 
-  private def prepare(): Unit = {
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+
     val tempPyFile = File.createTempFile("test", ".py")
     // scalastyle:off line.size.limit
     Files.write(tempPyFile.toPath,
@@ -199,7 +200,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
         "--master", "local[2]",
         "--conf", s"${UI_ENABLED.key}=false",
         "--conf", s"${MASTER_REST_SERVER_ENABLED.key}=false",
-        "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=1.2.1",
+        "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=2.3.7",
         "--conf", s"${HiveUtils.HIVE_METASTORE_JARS.key}=maven",
         "--conf", s"${WAREHOUSE_PATH.key}=${wareHousePath.getCanonicalPath}",
         "--conf", s"spark.sql.test.version.index=$index",
@@ -211,23 +212,14 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
     tempPyFile.delete()
   }
 
-  override def beforeAll(): Unit = {
-    super.beforeAll()
-    if (!isTestAtLeastJava9) {
-      prepare()
-    }
-  }
-
   test("backward compatibility") {
-    // TODO SPARK-28704 Test backward compatibility on JDK9+ once we have a version supports JDK9+
-    assume(!isTestAtLeastJava9)
     val args = Seq(
       "--class", PROCESS_TABLES.getClass.getName.stripSuffix("$"),
       "--name", "HiveExternalCatalog backward compatibility test",
       "--master", "local[2]",
       "--conf", s"${UI_ENABLED.key}=false",
       "--conf", s"${MASTER_REST_SERVER_ENABLED.key}=false",
-      "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=1.2.1",
+      "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=2.3.7",
       "--conf", s"${HiveUtils.HIVE_METASTORE_JARS.key}=maven",
       "--conf", s"${WAREHOUSE_PATH.key}=${wareHousePath.getCanonicalPath}",
       "--driver-java-options", s"-Dderby.system.home=${wareHousePath.getCanonicalPath}",
@@ -252,7 +244,9 @@ object PROCESS_TABLES extends QueryTest with SQLTestUtils {
       // do not throw exception during object initialization.
       case NonFatal(_) => Seq("3.0.1", "2.4.7") // A temporary fallback to use a specific version
     }
-    versions.filter(v => v.startsWith("3") || !TestUtils.isPythonVersionAtLeast38())
+    versions
+      .filter(v => v.startsWith("3") || !TestUtils.isPythonVersionAtLeast38())
+      .filter(v => v.startsWith("3") || !SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9))
   }
 
   protected var spark: SparkSession = _

From 116b7b72a1980a0768413329f28591f772822827 Mon Sep 17 00:00:00 2001
From: Ruifeng Zheng <ruifengz@foxmail.com>
Date: Fri, 20 Nov 2020 11:35:34 -0600
Subject: [PATCH 0532/1009] [SPARK-33466][ML][PYTHON] Imputer support
 mode(most_frequent) strategy

### What changes were proposed in this pull request?
impl a new strategy `mode`: replace missing using the most frequent value along each column.

### Why are the changes needed?
it is highly scalable, and had been a function in [sklearn.impute.SimpleImputer](https://scikit-learn.org/stable/modules/generated/sklearn.impute.SimpleImputer.html#sklearn.impute.SimpleImputer) for a long time.

### Does this PR introduce _any_ user-facing change?
Yes, a new strategy is added

### How was this patch tested?
updated testsuites

Closes #30397 from zhengruifeng/imputer_max_freq.

Lead-authored-by: Ruifeng Zheng <ruifengz@foxmail.com>
Co-authored-by: zhengruifeng <ruifengz@foxmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../org/apache/spark/ml/feature/Imputer.scala |  49 ++--
 .../spark/ml/feature/ImputerSuite.scala       | 211 ++++++++++--------
 python/pyspark/ml/feature.py                  |   5 +-
 3 files changed, 144 insertions(+), 121 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
index ad1010da5c104..03ebe0299f63f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
@@ -39,14 +39,16 @@ private[feature] trait ImputerParams extends Params with HasInputCol with HasInp
    * The imputation strategy. Currently only "mean" and "median" are supported.
    * If "mean", then replace missing values using the mean value of the feature.
    * If "median", then replace missing values using the approximate median value of the feature.
+   * If "mode", then replace missing using the most frequent value of the feature.
    * Default: mean
    *
    * @group param
    */
   final val strategy: Param[String] = new Param(this, "strategy", s"strategy for imputation. " +
     s"If ${Imputer.mean}, then replace missing values using the mean value of the feature. " +
-    s"If ${Imputer.median}, then replace missing values using the median value of the feature.",
-    ParamValidators.inArray[String](Array(Imputer.mean, Imputer.median)))
+    s"If ${Imputer.median}, then replace missing values using the median value of the feature. " +
+    s"If ${Imputer.mode}, then replace missing values using the most frequent value of " +
+    s"the feature.", ParamValidators.inArray[String](Imputer.supportedStrategies))
 
   /** @group getParam */
   def getStrategy: String = $(strategy)
@@ -104,7 +106,7 @@ private[feature] trait ImputerParams extends Params with HasInputCol with HasInp
  * For example, if the input column is IntegerType (1, 2, 4, null),
  * the output will be IntegerType (1, 2, 4, 2) after mean imputation.
  *
- * Note that the mean/median value is computed after filtering out missing values.
+ * Note that the mean/median/mode value is computed after filtering out missing values.
  * All Null values in the input columns are treated as missing, and so are also imputed. For
  * computing median, DataFrameStatFunctions.approxQuantile is used with a relative error of 0.001.
  */
@@ -132,7 +134,7 @@ class Imputer @Since("2.2.0") (@Since("2.2.0") override val uid: String)
   def setOutputCols(value: Array[String]): this.type = set(outputCols, value)
 
   /**
-   * Imputation strategy. Available options are ["mean", "median"].
+   * Imputation strategy. Available options are ["mean", "median", "mode"].
    * @group setParam
    */
   @Since("2.2.0")
@@ -151,39 +153,42 @@ class Imputer @Since("2.2.0") (@Since("2.2.0") override val uid: String)
     val spark = dataset.sparkSession
 
     val (inputColumns, _) = getInOutCols()
-
     val cols = inputColumns.map { inputCol =>
       when(col(inputCol).equalTo($(missingValue)), null)
         .when(col(inputCol).isNaN, null)
         .otherwise(col(inputCol))
-        .cast("double")
+        .cast(DoubleType)
         .as(inputCol)
     }
+    val numCols = cols.length
 
     val results = $(strategy) match {
       case Imputer.mean =>
         // Function avg will ignore null automatically.
         // For a column only containing null, avg will return null.
         val row = dataset.select(cols.map(avg): _*).head()
-        Array.range(0, inputColumns.length).map { i =>
-          if (row.isNullAt(i)) {
-            Double.NaN
-          } else {
-            row.getDouble(i)
-          }
-        }
+        Array.tabulate(numCols)(i => if (row.isNullAt(i)) Double.NaN else row.getDouble(i))
 
       case Imputer.median =>
         // Function approxQuantile will ignore null automatically.
         // For a column only containing null, approxQuantile will return an empty array.
         dataset.select(cols: _*).stat.approxQuantile(inputColumns, Array(0.5), $(relativeError))
-          .map { array =>
-            if (array.isEmpty) {
-              Double.NaN
-            } else {
-              array.head
-            }
-          }
+          .map(_.headOption.getOrElse(Double.NaN))
+
+      case Imputer.mode =>
+        import spark.implicits._
+        // If there is more than one mode, choose the smallest one to keep in line
+        // with sklearn.impute.SimpleImputer (using scipy.stats.mode).
+        val modes = dataset.select(cols: _*).flatMap { row =>
+          // Ignore null.
+          Iterator.range(0, numCols)
+            .flatMap(i => if (row.isNullAt(i)) None else Some((i, row.getDouble(i))))
+        }.toDF("index", "value")
+         .groupBy("index", "value").agg(negate(count(lit(0))).as("negative_count"))
+         .groupBy("index").agg(min(struct("negative_count", "value")).as("mode"))
+         .select("index", "mode.value")
+         .as[(Int, Double)].collect().toMap
+        Array.tabulate(numCols)(i => modes.getOrElse(i, Double.NaN))
     }
 
     val emptyCols = inputColumns.zip(results).filter(_._2.isNaN).map(_._1)
@@ -212,6 +217,10 @@ object Imputer extends DefaultParamsReadable[Imputer] {
   /** strategy names that Imputer currently supports. */
   private[feature] val mean = "mean"
   private[feature] val median = "median"
+  private[feature] val mode = "mode"
+
+  /* Set of strategies that Imputer supports */
+  private[feature] val supportedStrategies = Array(mean, median, mode)
 
   @Since("2.2.0")
   override def load(path: String): Imputer = super.load(path)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala
index dfee2b4029c8b..30887f55638f9 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala
@@ -28,13 +28,14 @@ import org.apache.spark.sql.types._
 class ImputerSuite extends MLTest with DefaultReadWriteTest {
 
   test("Imputer for Double with default missing Value NaN") {
-    val df = spark.createDataFrame( Seq(
-      (0, 1.0, 4.0, 1.0, 1.0, 4.0, 4.0),
-      (1, 11.0, 12.0, 11.0, 11.0, 12.0, 12.0),
-      (2, 3.0, Double.NaN, 3.0, 3.0, 10.0, 12.0),
-      (3, Double.NaN, 14.0, 5.0, 3.0, 14.0, 14.0)
-    )).toDF("id", "value1", "value2", "expected_mean_value1", "expected_median_value1",
-      "expected_mean_value2", "expected_median_value2")
+    val df = spark.createDataFrame(Seq(
+      (0, 1.0, 4.0, 1.0, 1.0, 1.0, 4.0, 4.0, 4.0),
+      (1, 11.0, 12.0, 11.0, 11.0, 11.0, 12.0, 12.0, 12.0),
+      (2, 3.0, Double.NaN, 3.0, 3.0, 3.0, 10.0, 12.0, 4.0),
+      (3, Double.NaN, 14.0, 5.0, 3.0, 1.0, 14.0, 14.0, 14.0)
+    )).toDF("id", "value1", "value2",
+      "expected_mean_value1", "expected_median_value1", "expected_mode_value1",
+      "expected_mean_value2", "expected_median_value2", "expected_mode_value2")
     val imputer = new Imputer()
       .setInputCols(Array("value1", "value2"))
       .setOutputCols(Array("out1", "out2"))
@@ -42,23 +43,25 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest {
   }
 
   test("Single Column: Imputer for Double with default missing Value NaN") {
-    val df1 = spark.createDataFrame( Seq(
-      (0, 1.0, 1.0, 1.0),
-      (1, 11.0, 11.0, 11.0),
-      (2, 3.0, 3.0, 3.0),
-      (3, Double.NaN, 5.0, 3.0)
-    )).toDF("id", "value", "expected_mean_value", "expected_median_value")
+    val df1 = spark.createDataFrame(Seq(
+      (0, 1.0, 1.0, 1.0, 1.0),
+      (1, 11.0, 11.0, 11.0, 11.0),
+      (2, 3.0, 3.0, 3.0, 3.0),
+      (3, Double.NaN, 5.0, 3.0, 1.0)
+    )).toDF("id", "value",
+      "expected_mean_value", "expected_median_value", "expected_mode_value")
     val imputer1 = new Imputer()
       .setInputCol("value")
       .setOutputCol("out")
     ImputerSuite.iterateStrategyTest(false, imputer1, df1)
 
-    val df2 = spark.createDataFrame( Seq(
-      (0, 4.0, 4.0, 4.0),
-      (1, 12.0, 12.0, 12.0),
-      (2, Double.NaN, 10.0, 12.0),
-      (3, 14.0, 14.0, 14.0)
-    )).toDF("id", "value", "expected_mean_value", "expected_median_value")
+    val df2 = spark.createDataFrame(Seq(
+      (0, 4.0, 4.0, 4.0, 4.0),
+      (1, 12.0, 12.0, 12.0, 12.0),
+      (2, Double.NaN, 10.0, 12.0, 4.0),
+      (3, 14.0, 14.0, 14.0, 14.0)
+    )).toDF("id", "value",
+      "expected_mean_value", "expected_median_value", "expected_mode_value")
     val imputer2 = new Imputer()
       .setInputCol("value")
       .setOutputCol("out")
@@ -66,12 +69,13 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest {
   }
 
   test("Imputer should handle NaNs when computing surrogate value, if missingValue is not NaN") {
-    val df = spark.createDataFrame( Seq(
-      (0, 1.0, 1.0, 1.0),
-      (1, 3.0, 3.0, 3.0),
-      (2, Double.NaN, Double.NaN, Double.NaN),
-      (3, -1.0, 2.0, 1.0)
-    )).toDF("id", "value", "expected_mean_value", "expected_median_value")
+    val df = spark.createDataFrame(Seq(
+      (0, 1.0, 1.0, 1.0, 1.0),
+      (1, 3.0, 3.0, 3.0, 3.0),
+      (2, Double.NaN, Double.NaN, Double.NaN, Double.NaN),
+      (3, -1.0, 2.0, 1.0, 1.0)
+    )).toDF("id", "value",
+      "expected_mean_value", "expected_median_value", "expected_mode_value")
     val imputer = new Imputer().setInputCols(Array("value")).setOutputCols(Array("out"))
       .setMissingValue(-1.0)
     ImputerSuite.iterateStrategyTest(true, imputer, df)
@@ -79,64 +83,69 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest {
 
   test("Single Column: Imputer should handle NaNs when computing surrogate value," +
     " if missingValue is not NaN") {
-    val df = spark.createDataFrame( Seq(
-      (0, 1.0, 1.0, 1.0),
-      (1, 3.0, 3.0, 3.0),
-      (2, Double.NaN, Double.NaN, Double.NaN),
-      (3, -1.0, 2.0, 1.0)
-    )).toDF("id", "value", "expected_mean_value", "expected_median_value")
+    val df = spark.createDataFrame(Seq(
+      (0, 1.0, 1.0, 1.0, 1.0),
+      (1, 3.0, 3.0, 3.0, 3.0),
+      (2, Double.NaN, Double.NaN, Double.NaN, Double.NaN),
+      (3, -1.0, 2.0, 1.0, 1.0)
+    )).toDF("id", "value",
+      "expected_mean_value", "expected_median_value", "expected_mode_value")
     val imputer = new Imputer().setInputCol("value").setOutputCol("out")
       .setMissingValue(-1.0)
     ImputerSuite.iterateStrategyTest(false, imputer, df)
   }
 
   test("Imputer for Float with missing Value -1.0") {
-    val df = spark.createDataFrame( Seq(
-      (0, 1.0F, 1.0F, 1.0F),
-      (1, 3.0F, 3.0F, 3.0F),
-      (2, 10.0F, 10.0F, 10.0F),
-      (3, 10.0F, 10.0F, 10.0F),
-      (4, -1.0F, 6.0F, 3.0F)
-    )).toDF("id", "value", "expected_mean_value", "expected_median_value")
+    val df = spark.createDataFrame(Seq(
+      (0, 1.0F, 1.0F, 1.0F, 1.0F),
+      (1, 3.0F, 3.0F, 3.0F, 3.0F),
+      (2, 10.0F, 10.0F, 10.0F, 10.0F),
+      (3, 10.0F, 10.0F, 10.0F, 10.0F),
+      (4, -1.0F, 6.0F, 3.0F, 10.0F)
+    )).toDF("id", "value",
+      "expected_mean_value", "expected_median_value", "expected_mode_value")
     val imputer = new Imputer().setInputCols(Array("value")).setOutputCols(Array("out"))
       .setMissingValue(-1)
     ImputerSuite.iterateStrategyTest(true, imputer, df)
   }
 
   test("Single Column: Imputer for Float with missing Value -1.0") {
-    val df = spark.createDataFrame( Seq(
-      (0, 1.0F, 1.0F, 1.0F),
-      (1, 3.0F, 3.0F, 3.0F),
-      (2, 10.0F, 10.0F, 10.0F),
-      (3, 10.0F, 10.0F, 10.0F),
-      (4, -1.0F, 6.0F, 3.0F)
-    )).toDF("id", "value", "expected_mean_value", "expected_median_value")
+    val df = spark.createDataFrame(Seq(
+      (0, 1.0F, 1.0F, 1.0F, 1.0F),
+      (1, 3.0F, 3.0F, 3.0F, 3.0F),
+      (2, 10.0F, 10.0F, 10.0F, 10.0F),
+      (3, 10.0F, 10.0F, 10.0F, 10.0F),
+      (4, -1.0F, 6.0F, 3.0F, 10.0F)
+    )).toDF("id", "value",
+      "expected_mean_value", "expected_median_value", "expected_mode_value")
     val imputer = new Imputer().setInputCol("value").setOutputCol("out")
       .setMissingValue(-1)
     ImputerSuite.iterateStrategyTest(false, imputer, df)
   }
 
   test("Imputer should impute null as well as 'missingValue'") {
-    val rawDf = spark.createDataFrame( Seq(
-      (0, 4.0, 4.0, 4.0),
-      (1, 10.0, 10.0, 10.0),
-      (2, 10.0, 10.0, 10.0),
-      (3, Double.NaN, 8.0, 10.0),
-      (4, -1.0, 8.0, 10.0)
-    )).toDF("id", "rawValue", "expected_mean_value", "expected_median_value")
+    val rawDf = spark.createDataFrame(Seq(
+      (0, 4.0, 4.0, 4.0, 4.0),
+      (1, 10.0, 10.0, 10.0, 10.0),
+      (2, 10.0, 10.0, 10.0, 10.0),
+      (3, Double.NaN, 8.0, 10.0, 10.0),
+      (4, -1.0, 8.0, 10.0, 10.0)
+    )).toDF("id", "rawValue",
+      "expected_mean_value", "expected_median_value", "expected_mode_value")
     val df = rawDf.selectExpr("*", "IF(rawValue=-1.0, null, rawValue) as value")
     val imputer = new Imputer().setInputCols(Array("value")).setOutputCols(Array("out"))
     ImputerSuite.iterateStrategyTest(true, imputer, df)
   }
 
   test("Single Column: Imputer should impute null as well as 'missingValue'") {
-    val rawDf = spark.createDataFrame( Seq(
-      (0, 4.0, 4.0, 4.0),
-      (1, 10.0, 10.0, 10.0),
-      (2, 10.0, 10.0, 10.0),
-      (3, Double.NaN, 8.0, 10.0),
-      (4, -1.0, 8.0, 10.0)
-    )).toDF("id", "rawValue", "expected_mean_value", "expected_median_value")
+    val rawDf = spark.createDataFrame(Seq(
+      (0, 4.0, 4.0, 4.0, 4.0),
+      (1, 10.0, 10.0, 10.0, 10.0),
+      (2, 10.0, 10.0, 10.0, 10.0),
+      (3, Double.NaN, 8.0, 10.0, 10.0),
+      (4, -1.0, 8.0, 10.0, 10.0)
+    )).toDF("id", "rawValue",
+      "expected_mean_value", "expected_median_value", "expected_mode_value")
     val df = rawDf.selectExpr("*", "IF(rawValue=-1.0, null, rawValue) as value")
     val imputer = new Imputer().setInputCol("value").setOutputCol("out")
     ImputerSuite.iterateStrategyTest(false, imputer, df)
@@ -187,7 +196,7 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest {
   }
 
   test("Imputer throws exception when surrogate cannot be computed") {
-    val df = spark.createDataFrame( Seq(
+    val df = spark.createDataFrame(Seq(
       (0, Double.NaN, 1.0, 1.0),
       (1, Double.NaN, 3.0, 3.0),
       (2, Double.NaN, Double.NaN, Double.NaN)
@@ -205,12 +214,13 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest {
   }
 
   test("Single Column: Imputer throws exception when surrogate cannot be computed") {
-    val df = spark.createDataFrame( Seq(
-      (0, Double.NaN, 1.0, 1.0),
-      (1, Double.NaN, 3.0, 3.0),
-      (2, Double.NaN, Double.NaN, Double.NaN)
-    )).toDF("id", "value", "expected_mean_value", "expected_median_value")
-    Seq("mean", "median").foreach { strategy =>
+    val df = spark.createDataFrame(Seq(
+      (0, Double.NaN, 1.0, 1.0, 1.0),
+      (1, Double.NaN, 3.0, 3.0, 3.0),
+      (2, Double.NaN, Double.NaN, Double.NaN, Double.NaN)
+    )).toDF("id", "value",
+      "expected_mean_value", "expected_median_value", "expected_mode_value")
+    Seq("mean", "median", "mode").foreach { strategy =>
       val imputer = new Imputer().setInputCol("value").setOutputCol("out")
         .setStrategy(strategy)
       withClue("Imputer should fail all the values are invalid") {
@@ -223,12 +233,12 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest {
   }
 
   test("Imputer input & output column validation") {
-    val df = spark.createDataFrame( Seq(
+    val df = spark.createDataFrame(Seq(
       (0, 1.0, 1.0, 1.0),
       (1, Double.NaN, 3.0, 3.0),
       (2, Double.NaN, Double.NaN, Double.NaN)
     )).toDF("id", "value1", "value2", "value3")
-    Seq("mean", "median").foreach { strategy =>
+    Seq("mean", "median", "mode").foreach { strategy =>
       withClue("Imputer should fail if inputCols and outputCols are different length") {
         val e: IllegalArgumentException = intercept[IllegalArgumentException] {
           val imputer = new Imputer().setStrategy(strategy)
@@ -306,13 +316,13 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest {
   }
 
   test("Imputer for IntegerType with default missing value null") {
-
-    val df = spark.createDataFrame(Seq[(Integer, Integer, Integer)](
-      (1, 1, 1),
-      (11, 11, 11),
-      (3, 3, 3),
-      (null, 5, 3)
-    )).toDF("value1", "expected_mean_value1", "expected_median_value1")
+    val df = spark.createDataFrame(Seq[(Integer, Integer, Integer, Integer)](
+      (1, 1, 1, 1),
+      (11, 11, 11, 11),
+      (3, 3, 3, 3),
+      (null, 5, 3, 1)
+    )).toDF("value1",
+      "expected_mean_value1", "expected_median_value1", "expected_mode_value1")
 
     val imputer = new Imputer()
       .setInputCols(Array("value1"))
@@ -327,12 +337,13 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest {
   }
 
   test("Single Column Imputer for IntegerType with default missing value null") {
-    val df = spark.createDataFrame(Seq[(Integer, Integer, Integer)](
-      (1, 1, 1),
-      (11, 11, 11),
-      (3, 3, 3),
-      (null, 5, 3)
-    )).toDF("value", "expected_mean_value", "expected_median_value")
+    val df = spark.createDataFrame(Seq[(Integer, Integer, Integer, Integer)](
+      (1, 1, 1, 1),
+      (11, 11, 11, 11),
+      (3, 3, 3, 3),
+      (null, 5, 3, 1)
+    )).toDF("value",
+      "expected_mean_value", "expected_median_value", "expected_mode_value")
 
     val imputer = new Imputer()
       .setInputCol("value")
@@ -347,13 +358,13 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest {
   }
 
   test("Imputer for IntegerType with missing value -1") {
-
-    val df = spark.createDataFrame(Seq[(Integer, Integer, Integer)](
-      (1, 1, 1),
-      (11, 11, 11),
-      (3, 3, 3),
-      (-1, 5, 3)
-    )).toDF("value1", "expected_mean_value1", "expected_median_value1")
+    val df = spark.createDataFrame(Seq[(Integer, Integer, Integer, Integer)](
+      (1, 1, 1, 1),
+      (11, 11, 11, 11),
+      (3, 3, 3, 3),
+      (-1, 5, 3, 1)
+    )).toDF("value1",
+      "expected_mean_value1", "expected_median_value1", "expected_mode_value1")
 
     val imputer = new Imputer()
       .setInputCols(Array("value1"))
@@ -369,12 +380,13 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest {
   }
 
   test("Single Column: Imputer for IntegerType with missing value -1") {
-    val df = spark.createDataFrame(Seq[(Integer, Integer, Integer)](
-      (1, 1, 1),
-      (11, 11, 11),
-      (3, 3, 3),
-      (-1, 5, 3)
-    )).toDF("value", "expected_mean_value", "expected_median_value")
+    val df = spark.createDataFrame(Seq[(Integer, Integer, Integer, Integer)](
+      (1, 1, 1, 1),
+      (11, 11, 11, 11),
+      (3, 3, 3, 3),
+      (-1, 5, 3, 1)
+    )).toDF("value",
+      "expected_mean_value", "expected_median_value", "expected_mode_value")
 
     val imputer = new Imputer()
       .setInputCol("value")
@@ -402,13 +414,13 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest {
   }
 
   test("Compare single/multiple column(s) Imputer in pipeline") {
-    val df = spark.createDataFrame( Seq(
+    val df = spark.createDataFrame(Seq(
       (0, 1.0, 4.0),
       (1, 11.0, 12.0),
       (2, 3.0, Double.NaN),
       (3, Double.NaN, 14.0)
     )).toDF("id", "value1", "value2")
-    Seq("mean", "median").foreach { strategy =>
+    Seq("mean", "median", "mode").foreach { strategy =>
       val multiColsImputer = new Imputer()
         .setInputCols(Array("value1", "value2"))
         .setOutputCols(Array("result1", "result2"))
@@ -450,11 +462,12 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest {
 object ImputerSuite {
 
   /**
-   * Imputation strategy. Available options are ["mean", "median"].
-   * @param df DataFrame with columns "id", "value", "expected_mean", "expected_median"
+   * Imputation strategy. Available options are ["mean", "median", "mode"].
+   * @param df DataFrame with columns "id", "value", "expected_mean", "expected_median",
+   *           "expected_mode".
    */
   def iterateStrategyTest(isMultiCol: Boolean, imputer: Imputer, df: DataFrame): Unit = {
-    Seq("mean", "median").foreach { strategy =>
+    Seq("mean", "median", "mode").foreach { strategy =>
       imputer.setStrategy(strategy)
       val model = imputer.fit(df)
       val resultDF = model.transform(df)
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 4d898bd5fffa8..82b9a6db1eb92 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -1507,7 +1507,8 @@ class _ImputerParams(HasInputCol, HasInputCols, HasOutputCol, HasOutputCols, Has
     strategy = Param(Params._dummy(), "strategy",
                      "strategy for imputation. If mean, then replace missing values using the mean "
                      "value of the feature. If median, then replace missing values using the "
-                     "median value of the feature.",
+                     "median value of the feature. If mode, then replace missing using the most "
+                     "frequent value of the feature.",
                      typeConverter=TypeConverters.toString)
 
     missingValue = Param(Params._dummy(), "missingValue",
@@ -1541,7 +1542,7 @@ class Imputer(JavaEstimator, _ImputerParams, JavaMLReadable, JavaMLWritable):
     numeric type. Currently Imputer does not support categorical features and
     possibly creates incorrect values for a categorical feature.
 
-    Note that the mean/median value is computed after filtering out missing values.
+    Note that the mean/median/mode value is computed after filtering out missing values.
     All Null values in the input columns are treated as missing, and so are also imputed. For
     computing median, :py:meth:`pyspark.sql.DataFrame.approxQuantile` is used with a
     relative error of `0.001`.

From a1a3d5cb02e380156eab320bf6cf512c01b11284 Mon Sep 17 00:00:00 2001
From: Huaxin Gao <huaxing@us.ibm.com>
Date: Fri, 20 Nov 2020 10:14:37 -0800
Subject: [PATCH 0533/1009] [MINOR][TESTS][DOCS] Use fully-qualified class name
 in docker integration test

### What changes were proposed in this pull request?
change
```
./build/sbt -Pdocker-integration-tests "testOnly *xxxIntegrationSuite"
```
to
```
./build/sbt -Pdocker-integration-tests "testOnly org.apache.spark.sql.jdbc.xxxIntegrationSuite"
```

### Why are the changes needed?
We only want to start v1 ```xxxIntegrationSuite```, not the newly added```v2.xxxIntegrationSuite```.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Manually checked

Closes #30448 from huaxingao/dockertest.

Authored-by: Huaxin Gao <huaxing@us.ibm.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala  | 3 ++-
 .../apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala    | 3 ++-
 .../org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala      | 3 ++-
 .../org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala   | 3 ++-
 4 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
index 4b9acd0d39f3f..d086c8cdcc589 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
@@ -29,7 +29,8 @@ import org.apache.spark.tags.DockerTest
  * To run this test suite for a specific version (e.g., ibmcom/db2:11.5.4.0):
  * {{{
  *   DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.4.0
- *     ./build/sbt -Pdocker-integration-tests "testOnly *DB2IntegrationSuite"
+ *     ./build/sbt -Pdocker-integration-tests
+ *     "testOnly org.apache.spark.sql.jdbc.DB2IntegrationSuite"
  * }}}
  */
 @DockerTest
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
index f1ffc8f0f3dc7..939a07238934b 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
@@ -28,7 +28,8 @@ import org.apache.spark.tags.DockerTest
  * To run this test suite for a specific version (e.g., 2019-GA-ubuntu-16.04):
  * {{{
  *   MSSQLSERVER_DOCKER_IMAGE_NAME=2019-GA-ubuntu-16.04
- *     ./build/sbt -Pdocker-integration-tests "testOnly *MsSqlServerIntegrationSuite"
+ *     ./build/sbt -Pdocker-integration-tests
+ *     "testOnly org.apache.spark.sql.jdbc.MsSqlServerIntegrationSuite"
  * }}}
  */
 @DockerTest
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
index 6f96ab33d0fee..68f0dbc057c1f 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
@@ -28,7 +28,8 @@ import org.apache.spark.tags.DockerTest
  * To run this test suite for a specific version (e.g., mysql:5.7.31):
  * {{{
  *   MYSQL_DOCKER_IMAGE_NAME=mysql:5.7.31
- *     ./build/sbt -Pdocker-integration-tests "testOnly *MySQLIntegrationSuite"
+ *     ./build/sbt -Pdocker-integration-tests
+ *     "testOnly org.apache.spark.sql.jdbc.MySQLIntegrationSuite"
  * }}}
  */
 @DockerTest
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
index fa13100b5fdc8..0347c98bba2c4 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
@@ -30,7 +30,8 @@ import org.apache.spark.tags.DockerTest
  * To run this test suite for a specific version (e.g., postgres:13.0):
  * {{{
  *   POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0
- *     ./build/sbt -Pdocker-integration-tests "testOnly *PostgresIntegrationSuite"
+ *     ./build/sbt -Pdocker-integration-tests
+ *     "testOnly org.apache.spark.sql.jdbc.PostgresIntegrationSuite"
  * }}}
  */
 @DockerTest

From 247977893473f810ffbcda31ee2710e445120e42 Mon Sep 17 00:00:00 2001
From: Chao Sun <sunchao@apple.com>
Date: Fri, 20 Nov 2020 14:59:56 -0800
Subject: [PATCH 0534/1009] [SPARK-33492][SQL] DSv2:
 Append/Overwrite/ReplaceTable should invalidate cache

### What changes were proposed in this pull request?

This adds changes in the following places:
- logic to also refresh caches referencing the target table in v2 `AppendDataExec`, `OverwriteByExpressionExec`, `OverwritePartitionsDynamicExec`, as well as their v1 fallbacks `AppendDataExecV1` and `OverwriteByExpressionExecV1`.
- logic to invalidate caches referencing the target table in v2 `ReplaceTableAsSelectExec` and its atomic version `AtomicReplaceTableAsSelectExec`. These are only supported in v2 at the moment though.

In addition to the above, in order to test the v1 write fallback behavior, I extended `InMemoryTableWithV1Fallback` to also support batch reads.

### Why are the changes needed?

Currently in DataSource v2 we don't refresh or invalidate caches referencing the target table when the table content is changed by operations such as append, overwrite, or replace table. This is different from DataSource v1, and could potentially cause data correctness issue if the staled caches are queried later.

### Does this PR introduce _any_ user-facing change?

Yes. Now When a data source v2 is cached (either directly or indirectly), all the relevant caches will be refreshed or invalidated if the table is replaced.

### How was this patch tested?

Added unit tests for the new code path.

Closes #30429 from sunchao/SPARK-33492.

Authored-by: Chao Sun <sunchao@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../datasources/v2/DataSourceV2Strategy.scala | 13 +--
 .../datasources/v2/V1FallbackWriters.scala    | 21 +++--
 .../v2/WriteToDataSourceV2Exec.scala          | 38 ++++++++-
 .../sql/connector/DataSourceV2SQLSuite.scala  | 78 ++++++++++++++++++
 .../sql/connector/V1WriteFallbackSuite.scala  | 79 ++++++++++++++++++-
 5 files changed, 212 insertions(+), 17 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 21abfc2816ee4..e5c29312b80e7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -147,6 +147,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       catalog match {
         case staging: StagingTableCatalog =>
           AtomicReplaceTableAsSelectExec(
+            session,
             staging,
             ident,
             parts,
@@ -157,6 +158,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
             orCreate = orCreate) :: Nil
         case _ =>
           ReplaceTableAsSelectExec(
+            session,
             catalog,
             ident,
             parts,
@@ -170,9 +172,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
     case AppendData(r: DataSourceV2Relation, query, writeOptions, _) =>
       r.table.asWritable match {
         case v1 if v1.supports(TableCapability.V1_BATCH_WRITE) =>
-          AppendDataExecV1(v1, writeOptions.asOptions, query) :: Nil
+          AppendDataExecV1(v1, writeOptions.asOptions, query, r) :: Nil
         case v2 =>
-          AppendDataExec(v2, writeOptions.asOptions, planLater(query)) :: Nil
+          AppendDataExec(session, v2, r, writeOptions.asOptions, planLater(query)) :: Nil
       }
 
     case OverwriteByExpression(r: DataSourceV2Relation, deleteExpr, query, writeOptions, _) =>
@@ -184,14 +186,15 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       }.toArray
       r.table.asWritable match {
         case v1 if v1.supports(TableCapability.V1_BATCH_WRITE) =>
-          OverwriteByExpressionExecV1(v1, filters, writeOptions.asOptions, query) :: Nil
+          OverwriteByExpressionExecV1(v1, filters, writeOptions.asOptions, query, r) :: Nil
         case v2 =>
-          OverwriteByExpressionExec(v2, filters, writeOptions.asOptions, planLater(query)) :: Nil
+          OverwriteByExpressionExec(session, v2, r, filters,
+            writeOptions.asOptions, planLater(query)) :: Nil
       }
 
     case OverwritePartitionsDynamic(r: DataSourceV2Relation, query, writeOptions, _) =>
       OverwritePartitionsDynamicExec(
-        r.table.asWritable, writeOptions.asOptions, planLater(query)) :: Nil
+        session, r.table.asWritable, r, writeOptions.asOptions, planLater(query)) :: Nil
 
     case DeleteFromTable(relation, condition) =>
       relation match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala
index 560da39314b36..af7721588edeb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala
@@ -37,10 +37,11 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 case class AppendDataExecV1(
     table: SupportsWrite,
     writeOptions: CaseInsensitiveStringMap,
-    plan: LogicalPlan) extends V1FallbackWriters {
+    plan: LogicalPlan,
+    v2Relation: DataSourceV2Relation) extends V1FallbackWriters {
 
   override protected def run(): Seq[InternalRow] = {
-    writeWithV1(newWriteBuilder().buildForV1Write())
+    writeWithV1(newWriteBuilder().buildForV1Write(), Some(v2Relation))
   }
 }
 
@@ -59,7 +60,8 @@ case class OverwriteByExpressionExecV1(
     table: SupportsWrite,
     deleteWhere: Array[Filter],
     writeOptions: CaseInsensitiveStringMap,
-    plan: LogicalPlan) extends V1FallbackWriters {
+    plan: LogicalPlan,
+    v2Relation: DataSourceV2Relation) extends V1FallbackWriters {
 
   private def isTruncate(filters: Array[Filter]): Boolean = {
     filters.length == 1 && filters(0).isInstanceOf[AlwaysTrue]
@@ -68,10 +70,10 @@ case class OverwriteByExpressionExecV1(
   override protected def run(): Seq[InternalRow] = {
     newWriteBuilder() match {
       case builder: SupportsTruncate if isTruncate(deleteWhere) =>
-        writeWithV1(builder.truncate().asV1Builder.buildForV1Write())
+        writeWithV1(builder.truncate().asV1Builder.buildForV1Write(), Some(v2Relation))
 
       case builder: SupportsOverwrite =>
-        writeWithV1(builder.overwrite(deleteWhere).asV1Builder.buildForV1Write())
+        writeWithV1(builder.overwrite(deleteWhere).asV1Builder.buildForV1Write(), Some(v2Relation))
 
       case _ =>
         throw new SparkException(s"Table does not support overwrite by expression: $table")
@@ -112,9 +114,14 @@ sealed trait V1FallbackWriters extends V2CommandExec with SupportsV1Write {
 trait SupportsV1Write extends SparkPlan {
   def plan: LogicalPlan
 
-  protected def writeWithV1(relation: InsertableRelation): Seq[InternalRow] = {
+  protected def writeWithV1(
+      relation: InsertableRelation,
+      v2Relation: Option[DataSourceV2Relation] = None): Seq[InternalRow] = {
+    val session = sqlContext.sparkSession
     // The `plan` is already optimized, we should not analyze and optimize it again.
-    relation.insert(AlreadyOptimized.dataFrame(sqlContext.sparkSession, plan), overwrite = false)
+    relation.insert(AlreadyOptimized.dataFrame(session, plan), overwrite = false)
+    v2Relation.foreach(r => session.sharedState.cacheManager.recacheByPlan(session, r))
+
     Nil
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
index 1421a9315c3a8..1648134d0a1b2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
@@ -26,6 +26,7 @@ import org.apache.spark.{SparkEnv, SparkException, TaskContext}
 import org.apache.spark.executor.CommitDeniedException
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.expressions.Attribute
@@ -127,6 +128,7 @@ case class AtomicCreateTableAsSelectExec(
  * ReplaceTableAsSelectStagingExec.
  */
 case class ReplaceTableAsSelectExec(
+    session: SparkSession,
     catalog: TableCatalog,
     ident: Identifier,
     partitioning: Seq[Transform],
@@ -146,6 +148,8 @@ case class ReplaceTableAsSelectExec(
     // 2. Writing to the new table fails,
     // 3. The table returned by catalog.createTable doesn't support writing.
     if (catalog.tableExists(ident)) {
+      val table = catalog.loadTable(ident)
+      uncacheTable(session, catalog, table, ident)
       catalog.dropTable(ident)
     } else if (!orCreate) {
       throw new CannotReplaceMissingTableException(ident)
@@ -169,6 +173,7 @@ case class ReplaceTableAsSelectExec(
  * is left untouched.
  */
 case class AtomicReplaceTableAsSelectExec(
+    session: SparkSession,
     catalog: StagingTableCatalog,
     ident: Identifier,
     partitioning: Seq[Transform],
@@ -180,6 +185,10 @@ case class AtomicReplaceTableAsSelectExec(
 
   override protected def run(): Seq[InternalRow] = {
     val schema = query.schema.asNullable
+    if (catalog.tableExists(ident)) {
+      val table = catalog.loadTable(ident)
+      uncacheTable(session, catalog, table, ident)
+    }
     val staged = if (orCreate) {
       catalog.stageCreateOrReplace(
         ident, schema, partitioning.toArray, properties.asJava)
@@ -204,12 +213,16 @@ case class AtomicReplaceTableAsSelectExec(
  * Rows in the output data set are appended.
  */
 case class AppendDataExec(
+    session: SparkSession,
     table: SupportsWrite,
+    relation: DataSourceV2Relation,
     writeOptions: CaseInsensitiveStringMap,
     query: SparkPlan) extends V2TableWriteExec with BatchWriteHelper {
 
   override protected def run(): Seq[InternalRow] = {
-    writeWithV2(newWriteBuilder().buildForBatch())
+    val writtenRows = writeWithV2(newWriteBuilder().buildForBatch())
+    session.sharedState.cacheManager.recacheByPlan(session, relation)
+    writtenRows
   }
 }
 
@@ -224,7 +237,9 @@ case class AppendDataExec(
  * AlwaysTrue to delete all rows.
  */
 case class OverwriteByExpressionExec(
+    session: SparkSession,
     table: SupportsWrite,
+    relation: DataSourceV2Relation,
     deleteWhere: Array[Filter],
     writeOptions: CaseInsensitiveStringMap,
     query: SparkPlan) extends V2TableWriteExec with BatchWriteHelper {
@@ -234,7 +249,7 @@ case class OverwriteByExpressionExec(
   }
 
   override protected def run(): Seq[InternalRow] = {
-    newWriteBuilder() match {
+    val writtenRows = newWriteBuilder() match {
       case builder: SupportsTruncate if isTruncate(deleteWhere) =>
         writeWithV2(builder.truncate().buildForBatch())
 
@@ -244,9 +259,12 @@ case class OverwriteByExpressionExec(
       case _ =>
         throw new SparkException(s"Table does not support overwrite by expression: $table")
     }
+    session.sharedState.cacheManager.recacheByPlan(session, relation)
+    writtenRows
   }
 }
 
+
 /**
  * Physical plan node for dynamic partition overwrite into a v2 table.
  *
@@ -257,18 +275,22 @@ case class OverwriteByExpressionExec(
  * are not modified.
  */
 case class OverwritePartitionsDynamicExec(
+    session: SparkSession,
     table: SupportsWrite,
+    relation: DataSourceV2Relation,
     writeOptions: CaseInsensitiveStringMap,
     query: SparkPlan) extends V2TableWriteExec with BatchWriteHelper {
 
   override protected def run(): Seq[InternalRow] = {
-    newWriteBuilder() match {
+    val writtenRows = newWriteBuilder() match {
       case builder: SupportsDynamicOverwrite =>
         writeWithV2(builder.overwriteDynamicPartitions().buildForBatch())
 
       case _ =>
         throw new SparkException(s"Table does not support dynamic partition overwrite: $table")
     }
+    session.sharedState.cacheManager.recacheByPlan(session, relation)
+    writtenRows
   }
 }
 
@@ -370,6 +392,15 @@ trait V2TableWriteExec extends V2CommandExec with UnaryExecNode {
 
     Nil
   }
+
+  protected def uncacheTable(
+      session: SparkSession,
+      catalog: TableCatalog,
+      table: Table,
+      ident: Identifier): Unit = {
+    val plan = DataSourceV2Relation.create(table, Some(catalog), Some(ident))
+    session.sharedState.cacheManager.uncacheQuery(session, plan, cascade = true)
+  }
 }
 
 object DataWritingSparkTask extends Logging {
@@ -484,3 +515,4 @@ private[v2] case class DataWritingSparkTaskResult(
  * Sink progress information collected after commit.
  */
 private[sql] case class StreamWriterCommitProgress(numOutputRows: Long)
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 0057415ff6e1d..0e7aec8d80e01 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -780,6 +780,84 @@ class DataSourceV2SQLSuite
     }
   }
 
+  test("SPARK-33492: ReplaceTableAsSelect (atomic or non-atomic) should invalidate cache") {
+    Seq("testcat.ns.t", "testcat_atomic.ns.t").foreach { t =>
+      val view = "view"
+      withTable(t) {
+        withTempView(view) {
+          sql(s"CREATE TABLE $t USING foo AS SELECT id, data FROM source")
+          sql(s"CACHE TABLE $view AS SELECT id FROM $t")
+          checkAnswer(sql(s"SELECT * FROM $t"), spark.table("source"))
+          checkAnswer(sql(s"SELECT * FROM $view"), spark.table("source").select("id"))
+
+          sql(s"REPLACE TABLE $t USING foo AS SELECT id FROM source")
+          assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty)
+        }
+      }
+    }
+  }
+
+  test("SPARK-33492: AppendData should refresh cache") {
+    import testImplicits._
+
+    val t = "testcat.ns.t"
+    val view = "view"
+    withTable(t) {
+      withTempView(view) {
+        Seq((1, "a")).toDF("i", "j").write.saveAsTable(t)
+        sql(s"CACHE TABLE $view AS SELECT i FROM $t")
+        checkAnswer(sql(s"SELECT * FROM $t"), Row(1, "a") :: Nil)
+        checkAnswer(sql(s"SELECT * FROM $view"), Row(1) :: Nil)
+
+        Seq((2, "b")).toDF("i", "j").write.mode(SaveMode.Append).saveAsTable(t)
+
+        assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isDefined)
+        checkAnswer(sql(s"SELECT * FROM $t"), Row(1, "a") :: Row(2, "b") :: Nil)
+        checkAnswer(sql(s"SELECT * FROM $view"), Row(1) :: Row(2) :: Nil)
+      }
+    }
+  }
+
+  test("SPARK-33492: OverwriteByExpression should refresh cache") {
+    val t = "testcat.ns.t"
+    val view = "view"
+    withTable(t) {
+      withTempView(view) {
+        sql(s"CREATE TABLE $t USING foo AS SELECT id, data FROM source")
+        sql(s"CACHE TABLE $view AS SELECT id FROM $t")
+        checkAnswer(sql(s"SELECT * FROM $t"), spark.table("source"))
+        checkAnswer(sql(s"SELECT * FROM $view"), spark.table("source").select("id"))
+
+        sql(s"INSERT OVERWRITE TABLE $t VALUES (1, 'a')")
+
+        assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isDefined)
+        checkAnswer(sql(s"SELECT * FROM $t"), Row(1, "a") :: Nil)
+        checkAnswer(sql(s"SELECT * FROM $view"), Row(1) :: Nil)
+      }
+    }
+  }
+
+  test("SPARK-33492: OverwritePartitionsDynamic should refresh cache") {
+    import testImplicits._
+
+    val t = "testcat.ns.t"
+    val view = "view"
+    withTable(t) {
+      withTempView(view) {
+        Seq((1, "a", 1)).toDF("i", "j", "k").write.partitionBy("k") saveAsTable(t)
+        sql(s"CACHE TABLE $view AS SELECT i FROM $t")
+        checkAnswer(sql(s"SELECT * FROM $t"), Row(1, "a", 1) :: Nil)
+        checkAnswer(sql(s"SELECT * FROM $view"), Row(1) :: Nil)
+
+        Seq((2, "b", 1)).toDF("i", "j", "k").writeTo(t).overwritePartitions()
+
+        assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isDefined)
+        checkAnswer(sql(s"SELECT * FROM $t"), Row(2, "b", 1) :: Nil)
+        checkAnswer(sql(s"SELECT * FROM $view"), Row(2) :: Nil)
+      }
+    }
+  }
+
   test("Relation: basic") {
     val t1 = "testcat.ns1.ns2.tbl"
     withTable(t1) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
index 4b52a4cbf4116..cba7dd35fb3bc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
@@ -24,14 +24,17 @@ import scala.collection.mutable
 
 import org.scalatest.BeforeAndAfter
 
+import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row, SaveMode, SparkSession, SQLContext}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
-import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.connector.catalog.{Identifier, SupportsRead, SupportsWrite, Table, TableCapability}
 import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform, Transform}
+import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, V1Scan}
 import org.apache.spark.sql.connector.write.{LogicalWriteInfo, LogicalWriteInfoImpl, SupportsOverwrite, SupportsTruncate, V1WriteBuilder, WriteBuilder}
 import org.apache.spark.sql.execution.datasources.DataSourceUtils
+import org.apache.spark.sql.functions.lit
 import org.apache.spark.sql.internal.SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.internal.connector.SimpleTableProvider
 import org.apache.spark.sql.sources._
@@ -145,6 +148,52 @@ class V1WriteFallbackSuite extends QueryTest with SharedSparkSession with Before
       SparkSession.setDefaultSession(spark)
     }
   }
+
+  test("SPARK-33492: append fallback should refresh cache") {
+    SparkSession.clearActiveSession()
+    SparkSession.clearDefaultSession()
+    try {
+      val session = SparkSession.builder()
+        .master("local[1]")
+        .config(V2_SESSION_CATALOG_IMPLEMENTATION.key, classOf[V1FallbackTableCatalog].getName)
+        .getOrCreate()
+      val df = session.createDataFrame(Seq((1, "x")))
+      df.write.mode("append").option("name", "t1").format(v2Format).saveAsTable("test")
+      session.catalog.cacheTable("test")
+      checkAnswer(session.read.table("test"), Row(1, "x") :: Nil)
+
+      val df2 = session.createDataFrame(Seq((2, "y")))
+      df2.writeTo("test").append()
+      checkAnswer(session.read.table("test"), Row(1, "x") :: Row(2, "y") :: Nil)
+
+    } finally {
+      SparkSession.setActiveSession(spark)
+      SparkSession.setDefaultSession(spark)
+    }
+  }
+
+  test("SPARK-33492: overwrite fallback should refresh cache") {
+    SparkSession.clearActiveSession()
+    SparkSession.clearDefaultSession()
+    try {
+      val session = SparkSession.builder()
+        .master("local[1]")
+        .config(V2_SESSION_CATALOG_IMPLEMENTATION.key, classOf[V1FallbackTableCatalog].getName)
+        .getOrCreate()
+      val df = session.createDataFrame(Seq((1, "x")))
+      df.write.mode("append").option("name", "t1").format(v2Format).saveAsTable("test")
+      session.catalog.cacheTable("test")
+      checkAnswer(session.read.table("test"), Row(1, "x") :: Nil)
+
+      val df2 = session.createDataFrame(Seq((2, "y")))
+      df2.writeTo("test").overwrite(lit(true))
+      checkAnswer(session.read.table("test"), Row(2, "y") :: Nil)
+
+    } finally {
+      SparkSession.setActiveSession(spark)
+      SparkSession.setDefaultSession(spark)
+    }
+  }
 }
 
 class V1WriteFallbackSessionCatalogSuite
@@ -177,6 +226,7 @@ class V1FallbackTableCatalog extends TestV2SessionCatalogBase[InMemoryTableWithV
       properties: util.Map[String, String]): InMemoryTableWithV1Fallback = {
     val t = new InMemoryTableWithV1Fallback(name, schema, partitions, properties)
     InMemoryV1Provider.tables.put(name, t)
+    tables.put(Identifier.of(Array("default"), name), t)
     t
   }
 }
@@ -272,7 +322,7 @@ class InMemoryTableWithV1Fallback(
     override val partitioning: Array[Transform],
     override val properties: util.Map[String, String])
   extends Table
-  with SupportsWrite {
+  with SupportsWrite with SupportsRead {
 
   partitioning.foreach { t =>
     if (!t.isInstanceOf[IdentityTransform]) {
@@ -281,6 +331,7 @@ class InMemoryTableWithV1Fallback(
   }
 
   override def capabilities: util.Set[TableCapability] = Set(
+    TableCapability.BATCH_READ,
     TableCapability.V1_BATCH_WRITE,
     TableCapability.OVERWRITE_BY_FILTER,
     TableCapability.TRUNCATE).asJava
@@ -338,6 +389,30 @@ class InMemoryTableWithV1Fallback(
       }
     }
   }
+
+  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder =
+    new V1ReadFallbackScanBuilder(schema)
+
+  private class V1ReadFallbackScanBuilder(schema: StructType) extends ScanBuilder {
+    override def build(): Scan = new V1ReadFallbackScan(schema)
+  }
+
+  private class V1ReadFallbackScan(schema: StructType) extends V1Scan {
+    override def readSchema(): StructType = schema
+    override def toV1TableScan[T <: BaseRelation with TableScan](context: SQLContext): T =
+      new V1TableScan(context, schema).asInstanceOf[T]
+  }
+
+  private class V1TableScan(
+      context: SQLContext,
+      requiredSchema: StructType) extends BaseRelation with TableScan {
+    override def sqlContext: SQLContext = context
+    override def schema: StructType = requiredSchema
+    override def buildScan(): RDD[Row] = {
+      val data = InMemoryV1Provider.getTableData(context.sparkSession, name).collect()
+      context.sparkContext.makeRDD(data)
+    }
+  }
 }
 
 /** A rule that fails if a query plan is analyzed twice. */

From de0f50abf407ec972c6a80ae80853a66b24468f4 Mon Sep 17 00:00:00 2001
From: anchovYu <aureole@sjtu.edu.cn>
Date: Sat, 21 Nov 2020 08:33:39 +0900
Subject: [PATCH 0535/1009] [SPARK-32670][SQL] Group exception messages in
 Catalyst Analyzer in one file

### What changes were proposed in this pull request?

Group all messages of `AnalysisExcpetions` created and thrown directly in org.apache.spark.sql.catalyst.analysis.Analyzer in one file.
* Create a new object: `org.apache.spark.sql.CatalystErrors` with many exception-creating functions.
* When the `Analyzer` wants to create and throw a new `AnalysisException`, call functions of `CatalystErrors`

### Why are the changes needed?

This is the sample PR that groups exception messages together in several files. It will largely help with standardization of error messages and its maintenance.

### Does this PR introduce _any_ user-facing change?

No. Error messages remain unchanged.

### How was this patch tested?

No new tests - pass all original tests to make sure it doesn't break any existing behavior.

### Naming of exception functions

All function names ended with `Error`.
* For specific errors like `groupingIDMismatch` and `groupingColInvalid`, directly use them as name, just like `groupingIDMismatchError` and `groupingColInvalidError`.
* For generic errors like `dataTypeMismatch`,
  * if confident with the context, prefix and condition can be added, like `pivotValDataTypeMismatchError`
  * if not sure about the context, add a `For` suffix of the specific component that this exception is related to, like `dataTypeMismatchForDeserializerError`

Closes #29497 from anchovYu/32670.

Lead-authored-by: anchovYu <aureole@sjtu.edu.cn>
Co-authored-by: anchovYu <xyyu15@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../spark/sql/QueryCompilationErrors.scala    | 164 ++++++++++++++++++
 .../sql/catalyst/analysis/Analyzer.scala      |  84 +++------
 2 files changed, 192 insertions(+), 56 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala
new file mode 100644
index 0000000000000..c680502cb328f
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.errors
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.expressions.{Expression, GroupingID}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.util.toPrettySQL
+import org.apache.spark.sql.connector.catalog.TableChange
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{AbstractDataType, DataType, StructType}
+
+/**
+ * Object for grouping all error messages of the query compilation.
+ * Currently it includes all AnalysisExcpetions created and thrown directly in
+ * org.apache.spark.sql.catalyst.analysis.Analyzer.
+ */
+object QueryCompilationErrors {
+  def groupingIDMismatchError(groupingID: GroupingID, groupByExprs: Seq[Expression]): Throwable = {
+    new AnalysisException(
+      s"Columns of grouping_id (${groupingID.groupByExprs.mkString(",")}) " +
+        s"does not match grouping columns (${groupByExprs.mkString(",")})")
+  }
+
+  def groupingColInvalidError(groupingCol: Expression, groupByExprs: Seq[Expression]): Throwable = {
+    new AnalysisException(
+      s"Column of grouping ($groupingCol) can't be found " +
+        s"in grouping columns ${groupByExprs.mkString(",")}")
+  }
+
+  def groupingSizeTooLargeError(sizeLimit: Int): Throwable = {
+    new AnalysisException(
+      s"Grouping sets size cannot be greater than $sizeLimit")
+  }
+
+  def unorderablePivotColError(pivotCol: Expression): Throwable = {
+    new AnalysisException(
+      s"Invalid pivot column '$pivotCol'. Pivot columns must be comparable."
+    )
+  }
+
+  def nonLiteralPivotValError(pivotVal: Expression): Throwable = {
+    new AnalysisException(
+      s"Literal expressions required for pivot values, found '$pivotVal'")
+  }
+
+  def pivotValDataTypeMismatchError(pivotVal: Expression, pivotCol: Expression): Throwable = {
+    new AnalysisException(
+      s"Invalid pivot value '$pivotVal': " +
+        s"value data type ${pivotVal.dataType.simpleString} does not match " +
+        s"pivot column data type ${pivotCol.dataType.catalogString}")
+  }
+
+  def unsupportedIfNotExistsError(tableName: String): Throwable = {
+    new AnalysisException(
+      s"Cannot write, IF NOT EXISTS is not supported for table: $tableName")
+  }
+
+  def nonPartitionColError(partitionName: String): Throwable = {
+    new AnalysisException(
+      s"PARTITION clause cannot contain a non-partition column name: $partitionName")
+  }
+
+  def addStaticValToUnknownColError(staticName: String): Throwable = {
+    new AnalysisException(
+      s"Cannot add static value for unknown column: $staticName")
+  }
+
+  def unknownStaticPartitionColError(name: String): Throwable = {
+    new AnalysisException(s"Unknown static partition column: $name")
+  }
+
+  def nestedGeneratorError(trimmedNestedGenerator: Expression): Throwable = {
+    new AnalysisException(
+      "Generators are not supported when it's nested in " +
+        "expressions, but got: " + toPrettySQL(trimmedNestedGenerator))
+  }
+
+  def moreThanOneGeneratorError(generators: Seq[Expression], clause: String): Throwable = {
+    new AnalysisException(
+      s"Only one generator allowed per $clause clause but found " +
+        generators.size + ": " + generators.map(toPrettySQL).mkString(", "))
+  }
+
+  def generatorOutsideSelectError(plan: LogicalPlan): Throwable = {
+    new AnalysisException(
+      "Generators are not supported outside the SELECT clause, but " +
+        "got: " + plan.simpleString(SQLConf.get.maxToStringFields))
+  }
+
+  def legacyStoreAssignmentPolicyError(): Throwable = {
+    val configKey = SQLConf.STORE_ASSIGNMENT_POLICY.key
+    new AnalysisException(
+      "LEGACY store assignment policy is disallowed in Spark data source V2. " +
+        s"Please set the configuration $configKey to other values.")
+  }
+
+  def unresolvedUsingColForJoinError(
+      colName: String, plan: LogicalPlan, side: String): Throwable = {
+    new AnalysisException(
+      s"USING column `$colName` cannot be resolved on the $side " +
+        s"side of the join. The $side-side columns: [${plan.output.map(_.name).mkString(", ")}]")
+  }
+
+  def dataTypeMismatchForDeserializerError(
+      dataType: DataType, desiredType: String): Throwable = {
+    val quantifier = if (desiredType.equals("array")) "an" else "a"
+    new AnalysisException(
+      s"need $quantifier $desiredType field but got " + dataType.catalogString)
+  }
+
+  def fieldNumberMismatchForDeserializerError(
+      schema: StructType, maxOrdinal: Int): Throwable = {
+    new AnalysisException(
+      s"Try to map ${schema.catalogString} to Tuple${maxOrdinal + 1}, " +
+        "but failed as the number of fields does not line up.")
+  }
+
+  def upCastFailureError(
+      fromStr: String, from: Expression, to: DataType, walkedTypePath: Seq[String]): Throwable = {
+    new AnalysisException(
+      s"Cannot up cast $fromStr from " +
+        s"${from.dataType.catalogString} to ${to.catalogString}.\n" +
+        s"The type path of the target object is:\n" + walkedTypePath.mkString("", "\n", "\n") +
+        "You can either add an explicit cast to the input data or choose a higher precision " +
+        "type of the field in the target object")
+  }
+
+  def unsupportedAbstractDataTypeForUpCastError(gotType: AbstractDataType): Throwable = {
+    new AnalysisException(
+      s"UpCast only support DecimalType as AbstractDataType yet, but got: $gotType")
+  }
+
+  def outerScopeFailureForNewInstanceError(className: String): Throwable = {
+    new AnalysisException(
+      s"Unable to generate an encoder for inner class `$className` without " +
+        "access to the scope that this class was defined in.\n" +
+        "Try moving this class out of its parent class.")
+  }
+
+  def referenceColNotFoundForAlterTableChangesError(
+      after: TableChange.After, parentName: String): Throwable = {
+    new AnalysisException(
+      s"Couldn't find the reference column for $after at $parentName")
+  }
+
+}
+
+
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 8d95d8cf49d45..53c0ff687c6d2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -44,6 +44,7 @@ import org.apache.spark.sql.connector.catalog._
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, After, ColumnChange, ColumnPosition, DeleteColumn, RenameColumn, UpdateColumnComment, UpdateColumnNullability, UpdateColumnPosition, UpdateColumnType}
 import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform, Transform}
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.{PartitionOverwriteMode, StoreAssignmentPolicy}
@@ -448,9 +449,7 @@ class Analyzer(override val catalogManager: CatalogManager)
               e.groupByExprs.map(_.canonicalized) == groupByExprs.map(_.canonicalized)) {
             Alias(gid, toPrettySQL(e))()
           } else {
-            throw new AnalysisException(
-              s"Columns of grouping_id (${e.groupByExprs.mkString(",")}) does not match " +
-                s"grouping columns (${groupByExprs.mkString(",")})")
+            throw QueryCompilationErrors.groupingIDMismatchError(e, groupByExprs)
           }
         case e @ Grouping(col: Expression) =>
           val idx = groupByExprs.indexWhere(_.semanticEquals(col))
@@ -458,8 +457,7 @@ class Analyzer(override val catalogManager: CatalogManager)
             Alias(Cast(BitwiseAnd(ShiftRight(gid, Literal(groupByExprs.length - 1 - idx)),
               Literal(1L)), ByteType), toPrettySQL(e))()
           } else {
-            throw new AnalysisException(s"Column of grouping ($col) can't be found " +
-              s"in grouping columns ${groupByExprs.mkString(",")}")
+            throw QueryCompilationErrors.groupingColInvalidError(col, groupByExprs)
           }
       }
     }
@@ -575,8 +573,7 @@ class Analyzer(override val catalogManager: CatalogManager)
       val finalGroupByExpressions = getFinalGroupByExpressions(selectedGroupByExprs, groupByExprs)
 
       if (finalGroupByExpressions.size > GroupingID.dataType.defaultSize * 8) {
-        throw new AnalysisException(
-          s"Grouping sets size cannot be greater than ${GroupingID.dataType.defaultSize * 8}")
+        throw QueryCompilationErrors.groupingSizeTooLargeError(GroupingID.dataType.defaultSize * 8)
       }
 
       // Expand works by setting grouping expressions to null as determined by the
@@ -712,8 +709,7 @@ class Analyzer(override val catalogManager: CatalogManager)
         || !p.pivotColumn.resolved || !p.pivotValues.forall(_.resolved) => p
       case Pivot(groupByExprsOpt, pivotColumn, pivotValues, aggregates, child) =>
         if (!RowOrdering.isOrderable(pivotColumn.dataType)) {
-          throw new AnalysisException(
-            s"Invalid pivot column '${pivotColumn}'. Pivot columns must be comparable.")
+          throw QueryCompilationErrors.unorderablePivotColError(pivotColumn)
         }
         // Check all aggregate expressions.
         aggregates.foreach(checkValidAggregateExpression)
@@ -724,13 +720,10 @@ class Analyzer(override val catalogManager: CatalogManager)
             case _ => value.foldable
           }
           if (!foldable) {
-            throw new AnalysisException(
-              s"Literal expressions required for pivot values, found '$value'")
+            throw QueryCompilationErrors.nonLiteralPivotValError(value)
           }
           if (!Cast.canCast(value.dataType, pivotColumn.dataType)) {
-            throw new AnalysisException(s"Invalid pivot value '$value': " +
-              s"value data type ${value.dataType.simpleString} does not match " +
-              s"pivot column data type ${pivotColumn.dataType.catalogString}")
+            throw QueryCompilationErrors.pivotValDataTypeMismatchError(value, pivotColumn)
           }
           Cast(value, pivotColumn.dataType, Some(conf.sessionLocalTimeZone)).eval(EmptyRow)
         }
@@ -1167,8 +1160,7 @@ class Analyzer(override val catalogManager: CatalogManager)
       case i @ InsertIntoStatement(r: DataSourceV2Relation, _, _, _, _) if i.query.resolved =>
         // ifPartitionNotExists is append with validation, but validation is not supported
         if (i.ifPartitionNotExists) {
-          throw new AnalysisException(
-            s"Cannot write, IF NOT EXISTS is not supported for table: ${r.table.name}")
+          throw QueryCompilationErrors.unsupportedIfNotExistsError(r.table.name)
         }
 
         val partCols = partitionColumnNames(r.table)
@@ -1205,8 +1197,7 @@ class Analyzer(override val catalogManager: CatalogManager)
         partitionColumnNames.find(name => conf.resolver(name, partitionName)) match {
           case Some(_) =>
           case None =>
-            throw new AnalysisException(
-              s"PARTITION clause cannot contain a non-partition column name: $partitionName")
+            throw QueryCompilationErrors.nonPartitionColError(partitionName)
         }
       }
     }
@@ -1228,8 +1219,7 @@ class Analyzer(override val catalogManager: CatalogManager)
               case Some(attr) =>
                 attr.name -> staticName
               case _ =>
-                throw new AnalysisException(
-                  s"Cannot add static value for unknown column: $staticName")
+                throw QueryCompilationErrors.addStaticValToUnknownColError(staticName)
             }).toMap
 
           val queryColumns = query.output.iterator
@@ -1271,7 +1261,7 @@ class Analyzer(override val catalogManager: CatalogManager)
               // an UnresolvedAttribute.
               EqualTo(UnresolvedAttribute(attr.name), Cast(Literal(value), attr.dataType))
             case None =>
-              throw new AnalysisException(s"Unknown static partition column: $name")
+              throw QueryCompilationErrors.unknownStaticPartitionColError(name)
           }
         }.reduce(And)
       }
@@ -2483,23 +2473,19 @@ class Analyzer(override val catalogManager: CatalogManager)
     def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
       case Project(projectList, _) if projectList.exists(hasNestedGenerator) =>
         val nestedGenerator = projectList.find(hasNestedGenerator).get
-        throw new AnalysisException("Generators are not supported when it's nested in " +
-          "expressions, but got: " + toPrettySQL(trimAlias(nestedGenerator)))
+        throw QueryCompilationErrors.nestedGeneratorError(trimAlias(nestedGenerator))
 
       case Project(projectList, _) if projectList.count(hasGenerator) > 1 =>
         val generators = projectList.filter(hasGenerator).map(trimAlias)
-        throw new AnalysisException("Only one generator allowed per select clause but found " +
-          generators.size + ": " + generators.map(toPrettySQL).mkString(", "))
+        throw QueryCompilationErrors.moreThanOneGeneratorError(generators, "select")
 
       case Aggregate(_, aggList, _) if aggList.exists(hasNestedGenerator) =>
         val nestedGenerator = aggList.find(hasNestedGenerator).get
-        throw new AnalysisException("Generators are not supported when it's nested in " +
-          "expressions, but got: " + toPrettySQL(trimAlias(nestedGenerator)))
+        throw QueryCompilationErrors.nestedGeneratorError(trimAlias(nestedGenerator))
 
       case Aggregate(_, aggList, _) if aggList.count(hasGenerator) > 1 =>
         val generators = aggList.filter(hasGenerator).map(trimAlias)
-        throw new AnalysisException("Only one generator allowed per aggregate clause but found " +
-          generators.size + ": " + generators.map(toPrettySQL).mkString(", "))
+        throw QueryCompilationErrors.moreThanOneGeneratorError(generators, "aggregate")
 
       case agg @ Aggregate(groupList, aggList, child) if aggList.forall {
           case AliasedGenerator(_, _, _) => true
@@ -2582,8 +2568,7 @@ class Analyzer(override val catalogManager: CatalogManager)
       case g: Generate => g
 
       case p if p.expressions.exists(hasGenerator) =>
-        throw new AnalysisException("Generators are not supported outside the SELECT clause, but " +
-          "got: " + p.simpleString(SQLConf.get.maxToStringFields))
+        throw QueryCompilationErrors.generatorOutsideSelectError(p)
     }
   }
 
@@ -3122,10 +3107,7 @@ class Analyzer(override val catalogManager: CatalogManager)
   private def validateStoreAssignmentPolicy(): Unit = {
     // SPARK-28730: LEGACY store assignment policy is disallowed in data source v2.
     if (conf.storeAssignmentPolicy == StoreAssignmentPolicy.LEGACY) {
-      val configKey = SQLConf.STORE_ASSIGNMENT_POLICY.key
-      throw new AnalysisException(s"""
-        |"LEGACY" store assignment policy is disallowed in Spark data source V2.
-        |Please set the configuration $configKey to other values.""".stripMargin)
+      throw QueryCompilationErrors.legacyStoreAssignmentPolicyError()
     }
   }
 
@@ -3138,14 +3120,12 @@ class Analyzer(override val catalogManager: CatalogManager)
       hint: JoinHint) = {
     val leftKeys = joinNames.map { keyName =>
       left.output.find(attr => resolver(attr.name, keyName)).getOrElse {
-        throw new AnalysisException(s"USING column `$keyName` cannot be resolved on the left " +
-          s"side of the join. The left-side columns: [${left.output.map(_.name).mkString(", ")}]")
+        throw QueryCompilationErrors.unresolvedUsingColForJoinError(keyName, left, "left")
       }
     }
     val rightKeys = joinNames.map { keyName =>
       right.output.find(attr => resolver(attr.name, keyName)).getOrElse {
-        throw new AnalysisException(s"USING column `$keyName` cannot be resolved on the right " +
-          s"side of the join. The right-side columns: [${right.output.map(_.name).mkString(", ")}]")
+        throw QueryCompilationErrors.unresolvedUsingColForJoinError(keyName, right, "right")
       }
     }
     val joinPairs = leftKeys.zip(rightKeys)
@@ -3208,7 +3188,8 @@ class Analyzer(override val catalogManager: CatalogManager)
                       ExtractValue(child, fieldName, resolver)
                   }
                 case other =>
-                  throw new AnalysisException("need an array field but got " + other.catalogString)
+                  throw QueryCompilationErrors.dataTypeMismatchForDeserializerError(other,
+                    "array")
               }
             case u: UnresolvedCatalystToExternalMap if u.child.resolved =>
               u.child.dataType match {
@@ -3218,7 +3199,7 @@ class Analyzer(override val catalogManager: CatalogManager)
                       ExtractValue(child, fieldName, resolver)
                   }
                 case other =>
-                  throw new AnalysisException("need a map field but got " + other.catalogString)
+                  throw QueryCompilationErrors.dataTypeMismatchForDeserializerError(other, "map")
               }
           }
           validateNestedTupleFields(result)
@@ -3227,8 +3208,7 @@ class Analyzer(override val catalogManager: CatalogManager)
     }
 
     private def fail(schema: StructType, maxOrdinal: Int): Unit = {
-      throw new AnalysisException(s"Try to map ${schema.catalogString} to Tuple${maxOrdinal + 1}" +
-        ", but failed as the number of fields does not line up.")
+      throw QueryCompilationErrors.fieldNumberMismatchForDeserializerError(schema, maxOrdinal)
     }
 
     /**
@@ -3287,10 +3267,7 @@ class Analyzer(override val catalogManager: CatalogManager)
         case n: NewInstance if n.childrenResolved && !n.resolved =>
           val outer = OuterScopes.getOuterScope(n.cls)
           if (outer == null) {
-            throw new AnalysisException(
-              s"Unable to generate an encoder for inner class `${n.cls.getName}` without " +
-                "access to the scope that this class was defined in.\n" +
-                "Try moving this class out of its parent class.")
+            throw QueryCompilationErrors.outerScopeFailureForNewInstanceError(n.cls.getName)
           }
           n.copy(outerPointer = Some(outer))
       }
@@ -3306,11 +3283,7 @@ class Analyzer(override val catalogManager: CatalogManager)
         case l: LambdaVariable => "array element"
         case e => e.sql
       }
-      throw new AnalysisException(s"Cannot up cast $fromStr from " +
-        s"${from.dataType.catalogString} to ${to.catalogString}.\n" +
-        "The type path of the target object is:\n" + walkedTypePath.mkString("", "\n", "\n") +
-        "You can either add an explicit cast to the input data or choose a higher precision " +
-        "type of the field in the target object")
+      throw QueryCompilationErrors.upCastFailureError(fromStr, from, to, walkedTypePath)
     }
 
     def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
@@ -3321,8 +3294,7 @@ class Analyzer(override val catalogManager: CatalogManager)
         case u @ UpCast(child, _, _) if !child.resolved => u
 
         case UpCast(_, target, _) if target != DecimalType && !target.isInstanceOf[DataType] =>
-          throw new AnalysisException(
-            s"UpCast only support DecimalType as AbstractDataType yet, but got: $target")
+          throw QueryCompilationErrors.unsupportedAbstractDataTypeForUpCastError(target)
 
         case UpCast(child, target, walkedTypePath) if target == DecimalType
           && child.dataType.isInstanceOf[DecimalType] =>
@@ -3501,8 +3473,8 @@ class Analyzer(override val catalogManager: CatalogManager)
             case Some(colName) =>
               ColumnPosition.after(colName)
             case None =>
-              throw new AnalysisException("Couldn't find the reference column for " +
-                s"$after at $parentName")
+              throw QueryCompilationErrors.referenceColNotFoundForAlterTableChangesError(after,
+                parentName)
           }
         case other => other
       }

From 67c6ed90682455dbc866e43709fd9081dfc15ad9 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan.opensource@gmail.com>
Date: Sat, 21 Nov 2020 10:27:00 +0900
Subject: [PATCH 0536/1009] [SPARK-33223][SS][FOLLOWUP] Clarify the meaning of
 "number of rows dropped by watermark" in SS UI page

### What changes were proposed in this pull request?

This PR fixes the representation to clarify the meaning of "number of rows dropped by watermark" in SS UI page.

### Why are the changes needed?

`Aggregated Number Of State Rows Dropped By Watermark` says that the dropped rows are from the state, whereas they're not. We say "evicted from the state" for the case, which is "normal" to emit outputs and reduce memory usage of the state.

The metric actually represents the number of "input" rows dropped by watermark, and the meaning of "input" is relative to the "stateful operator". That's a bit confusing as we normally think "input" as "input from source" whereas it's not.

### Does this PR introduce _any_ user-facing change?

Yes, UI element & tooltip change.

### How was this patch tested?

Only text change in UI, so we know how thing will be changed intuitively.

Closes #30439 from HeartSaVioR/SPARK-33223-FOLLOWUP.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
Signed-off-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
---
 .../streaming/ui/StreamingQueryStatisticsPage.scala    | 10 +++++-----
 .../spark/sql/streaming/ui/UISeleniumSuite.scala       |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
index 7d38acfceee81..f48672afb41f3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
@@ -189,8 +189,8 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab)
 
       val graphUIDataForNumRowsDroppedByWatermark =
         new GraphUIData(
-          "aggregated-num-state-rows-dropped-by-watermark-timeline",
-          "aggregated-num-state-rows-dropped-by-watermark-histogram",
+          "aggregated-num-rows-dropped-by-watermark-timeline",
+          "aggregated-num-rows-dropped-by-watermark-histogram",
           numRowsDroppedByWatermarkData,
           minBatchTime,
           maxBatchTime,
@@ -230,11 +230,11 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab)
       <tr>
         <td style="vertical-align: middle;">
           <div style="width: 160px;">
-            <div><strong>Aggregated Number Of State Rows Dropped By Watermark {SparkUIUtils.tooltip("Aggregated number of state rows dropped by watermark.", "right")}</strong></div>
+            <div><strong>Aggregated Number Of Rows Dropped By Watermark {SparkUIUtils.tooltip("Accumulates all input rows being dropped in stateful operators by watermark. 'Inputs' are relative to operators.", "right")}</strong></div>
           </div>
         </td>
-        <td class={"aggregated-num-state-rows-dropped-by-watermark-timeline"}>{graphUIDataForNumRowsDroppedByWatermark.generateTimelineHtml(jsCollector)}</td>
-        <td class={"aggregated-num-state-rows-dropped-by-watermark-histogram"}>{graphUIDataForNumRowsDroppedByWatermark.generateHistogramHtml(jsCollector)}</td>
+        <td class={"aggregated-num-rows-dropped-by-watermark-timeline"}>{graphUIDataForNumRowsDroppedByWatermark.generateTimelineHtml(jsCollector)}</td>
+        <td class={"aggregated-num-rows-dropped-by-watermark-histogram"}>{graphUIDataForNumRowsDroppedByWatermark.generateHistogramHtml(jsCollector)}</td>
       </tr>
       // scalastyle:on
     } else {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala
index 1a8b28001b8d1..307479db33949 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala
@@ -139,7 +139,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
             summaryText should contain ("Aggregated Number Of Total State Rows (?)")
             summaryText should contain ("Aggregated Number Of Updated State Rows (?)")
             summaryText should contain ("Aggregated State Memory Used In Bytes (?)")
-            summaryText should contain ("Aggregated Number Of State Rows Dropped By Watermark (?)")
+            summaryText should contain ("Aggregated Number Of Rows Dropped By Watermark (?)")
           }
         } finally {
           spark.streams.active.foreach(_.stop())

From 530c0a8e28973c57a5d0deec6b15fc29500b6c00 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Fri, 20 Nov 2020 18:41:25 -0800
Subject: [PATCH 0537/1009] [SPARK-33505][SQL][TESTS] Fix adding new partitions
 by INSERT INTO `InMemoryPartitionTable`

### What changes were proposed in this pull request?
1. Add a hook method to `addPartitionKey()` of `InMemoryTable` which is called per every row.
2. Override `addPartitionKey()` in `InMemoryPartitionTable`, and add partition key every time when new row is inserted to the table.

### Why are the changes needed?
To be able to write unified tests for datasources V1 and V2. Currently, INSERT INTO a V1 table creates partitions but the same doesn't work for the custom catalog `InMemoryPartitionTableCatalog` used in DSv2 tests.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running the affected test suite `DataSourceV2SQLSuite`.

Closes #30449 from MaxGekk/insert-into-InMemoryPartitionTable.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../connector/InMemoryPartitionTable.scala    |  4 ++++
 .../spark/sql/connector/InMemoryTable.scala   |  3 +++
 .../sql/connector/DataSourceV2SQLSuite.scala  | 21 +++++++++++++++++++
 3 files changed, 28 insertions(+)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala
index 1c96bdf3afa20..23987e909aa70 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala
@@ -92,4 +92,8 @@ class InMemoryPartitionTable(
 
   override def partitionExists(ident: InternalRow): Boolean =
     memoryTablePartitions.containsKey(ident)
+
+  override protected def addPartitionKey(key: Seq[Any]): Unit = {
+    memoryTablePartitions.put(InternalRow.fromSeq(key), Map.empty[String, String].asJava)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
index 3b47271a114e2..c93053abc550a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
@@ -160,12 +160,15 @@ class InMemoryTable(
     }
   }
 
+  protected def addPartitionKey(key: Seq[Any]): Unit = {}
+
   def withData(data: Array[BufferedRows]): InMemoryTable = dataMap.synchronized {
     data.foreach(_.rows.foreach { row =>
       val key = getKey(row)
       dataMap += dataMap.get(key)
         .map(key -> _.withRow(row))
         .getOrElse(key -> new BufferedRows(key.toArray.mkString("/")).withRow(row))
+      addPartitionKey(key)
     })
     this
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 0e7aec8d80e01..90df4ee08bfc0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -24,6 +24,7 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NamespaceAlreadyExistsException, NoSuchDatabaseException, NoSuchNamespaceException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.connector.catalog._
@@ -35,6 +36,7 @@ import org.apache.spark.sql.internal.connector.SimpleTableProvider
 import org.apache.spark.sql.sources.SimpleScanSource
 import org.apache.spark.sql.types.{BooleanType, LongType, StringType, StructField, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.Utils
 
 class DataSourceV2SQLSuite
@@ -2538,6 +2540,25 @@ class DataSourceV2SQLSuite
     }
   }
 
+  test("SPARK-33505: insert into partitioned table") {
+    val t = "testpart.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"""
+        |CREATE TABLE $t (id bigint, city string, data string)
+        |USING foo
+        |PARTITIONED BY (id, city)""".stripMargin)
+      val partTable = catalog("testpart").asTableCatalog
+        .loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")).asInstanceOf[InMemoryPartitionTable]
+      val expectedPartitionIdent = InternalRow.fromSeq(Seq(1, UTF8String.fromString("NY")))
+      assert(!partTable.partitionExists(expectedPartitionIdent))
+      sql(s"INSERT INTO $t PARTITION(id = 1, city = 'NY') SELECT 'abc'")
+      assert(partTable.partitionExists(expectedPartitionIdent))
+      // Insert into the existing partition must not fail
+      sql(s"INSERT INTO $t PARTITION(id = 1, city = 'NY') SELECT 'def'")
+      assert(partTable.partitionExists(expectedPartitionIdent))
+    }
+  }
+
   private def testNotSupportedV2Command(sqlCommand: String, sqlParams: String): Unit = {
     val e = intercept[AnalysisException] {
       sql(s"$sqlCommand $sqlParams")

From b623c03456be12169de7d3823f191ae6774e33ce Mon Sep 17 00:00:00 2001
From: Chao Sun <sunchao@apple.com>
Date: Fri, 20 Nov 2020 18:45:17 -0800
Subject: [PATCH 0538/1009] [SPARK-32381][CORE][FOLLOWUP][TEST-HADOOP2.7] Don't
 remove SerializableFileStatus and SerializableBlockLocation for Hadoop 2.7

### What changes were proposed in this pull request?

Revert the change in #29959 and don't remove `SerializableFileStatus` and `SerializableBlockLocation`.

### Why are the changes needed?

In Hadoop 2.7 `FileStatus` and `BlockLocation` are not serializable, so we still need the two wrapper classes.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

N/A

Closes #30447 from sunchao/SPARK-32381-followup.

Authored-by: Chao Sun <sunchao@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../org/apache/spark/util/HadoopFSUtils.scala | 61 ++++++++++++++++++-
 1 file changed, 60 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala b/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala
index a3a528cddee37..4af48d5b9125c 100644
--- a/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala
@@ -136,12 +136,53 @@ private[spark] object HadoopFSUtils extends Logging {
               parallelismMax = 0)
             (path, leafFiles)
           }.iterator
+        }.map { case (path, statuses) =>
+            val serializableStatuses = statuses.map { status =>
+              // Turn FileStatus into SerializableFileStatus so we can send it back to the driver
+              val blockLocations = status match {
+                case f: LocatedFileStatus =>
+                  f.getBlockLocations.map { loc =>
+                    SerializableBlockLocation(
+                      loc.getNames,
+                      loc.getHosts,
+                      loc.getOffset,
+                      loc.getLength)
+                  }
+
+                case _ =>
+                  Array.empty[SerializableBlockLocation]
+              }
+
+              SerializableFileStatus(
+                status.getPath.toString,
+                status.getLen,
+                status.isDirectory,
+                status.getReplication,
+                status.getBlockSize,
+                status.getModificationTime,
+                status.getAccessTime,
+                blockLocations)
+            }
+            (path.toString, serializableStatuses)
         }.collect()
     } finally {
       sc.setJobDescription(previousJobDescription)
     }
 
-    statusMap.toSeq
+    // turn SerializableFileStatus back to Status
+    statusMap.map { case (path, serializableStatuses) =>
+      val statuses = serializableStatuses.map { f =>
+        val blockLocations = f.blockLocations.map { loc =>
+          new BlockLocation(loc.names, loc.hosts, loc.offset, loc.length)
+        }
+        new LocatedFileStatus(
+          new FileStatus(
+            f.length, f.isDir, f.blockReplication, f.blockSize, f.modificationTime,
+            new Path(f.path)),
+          blockLocations)
+      }
+      (new Path(path), statuses)
+    }
   }
 
   // scalastyle:off argcount
@@ -291,4 +332,22 @@ private[spark] object HadoopFSUtils extends Logging {
     resolvedLeafStatuses
   }
   // scalastyle:on argcount
+
+  /** A serializable variant of HDFS's BlockLocation. This is required by Hadoop 2.7. */
+  private case class SerializableBlockLocation(
+    names: Array[String],
+    hosts: Array[String],
+    offset: Long,
+    length: Long)
+
+  /** A serializable variant of HDFS's FileStatus. This is required by Hadoop 2.7. */
+  private case class SerializableFileStatus(
+    path: String,
+    length: Long,
+    isDir: Boolean,
+    blockReplication: Short,
+    blockSize: Long,
+    modificationTime: Long,
+    accessTime: Long,
+    blockLocations: Array[SerializableBlockLocation])
 }

From cf7490112ab81cce4a483c2a94368ce3d9d986df Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 20 Nov 2020 19:01:58 -0800
Subject: [PATCH 0539/1009] Revert "[SPARK-28704][SQL][TEST] Add back Skiped
 HiveExternalCatalogVersionsSuite in HiveSparkSubmitSuite at JDK9+"

This reverts commit 47326ac1c6a296a84af76d832061741740ae9f12.
---
 .../HiveExternalCatalogVersionsSuite.scala    | 22 ++++++++++++-------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index 4cafd3e8ca626..38a8c492d77a7 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -52,6 +52,7 @@ import org.apache.spark.util.Utils
 @ExtendedHiveTest
 class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
   import HiveExternalCatalogVersionsSuite._
+  private val isTestAtLeastJava9 = SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)
   private val wareHousePath = Utils.createTempDir(namePrefix = "warehouse")
   private val tmpDataDir = Utils.createTempDir(namePrefix = "test-data")
   // For local test, you can set `spark.test.cache-dir` to a static value like `/tmp/test-spark`, to
@@ -148,9 +149,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
     new String(Files.readAllBytes(contentPath), StandardCharsets.UTF_8)
   }
 
-  override def beforeAll(): Unit = {
-    super.beforeAll()
-
+  private def prepare(): Unit = {
     val tempPyFile = File.createTempFile("test", ".py")
     // scalastyle:off line.size.limit
     Files.write(tempPyFile.toPath,
@@ -200,7 +199,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
         "--master", "local[2]",
         "--conf", s"${UI_ENABLED.key}=false",
         "--conf", s"${MASTER_REST_SERVER_ENABLED.key}=false",
-        "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=2.3.7",
+        "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=1.2.1",
         "--conf", s"${HiveUtils.HIVE_METASTORE_JARS.key}=maven",
         "--conf", s"${WAREHOUSE_PATH.key}=${wareHousePath.getCanonicalPath}",
         "--conf", s"spark.sql.test.version.index=$index",
@@ -212,14 +211,23 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
     tempPyFile.delete()
   }
 
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    if (!isTestAtLeastJava9) {
+      prepare()
+    }
+  }
+
   test("backward compatibility") {
+    // TODO SPARK-28704 Test backward compatibility on JDK9+ once we have a version supports JDK9+
+    assume(!isTestAtLeastJava9)
     val args = Seq(
       "--class", PROCESS_TABLES.getClass.getName.stripSuffix("$"),
       "--name", "HiveExternalCatalog backward compatibility test",
       "--master", "local[2]",
       "--conf", s"${UI_ENABLED.key}=false",
       "--conf", s"${MASTER_REST_SERVER_ENABLED.key}=false",
-      "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=2.3.7",
+      "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=1.2.1",
       "--conf", s"${HiveUtils.HIVE_METASTORE_JARS.key}=maven",
       "--conf", s"${WAREHOUSE_PATH.key}=${wareHousePath.getCanonicalPath}",
       "--driver-java-options", s"-Dderby.system.home=${wareHousePath.getCanonicalPath}",
@@ -244,9 +252,7 @@ object PROCESS_TABLES extends QueryTest with SQLTestUtils {
       // do not throw exception during object initialization.
       case NonFatal(_) => Seq("3.0.1", "2.4.7") // A temporary fallback to use a specific version
     }
-    versions
-      .filter(v => v.startsWith("3") || !TestUtils.isPythonVersionAtLeast38())
-      .filter(v => v.startsWith("3") || !SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9))
+    versions.filter(v => v.startsWith("3") || !TestUtils.isPythonVersionAtLeast38())
   }
 
   protected var spark: SparkSession = _

From 517b810dfa5076c3d0155d1e134dc93317ec3ec0 Mon Sep 17 00:00:00 2001
From: Gustavo Martin Morcuende <gu.martinm@gmail.com>
Date: Sat, 21 Nov 2020 08:39:16 -0800
Subject: [PATCH 0540/1009] [SPARK-33463][SQL] Keep Job Id during incremental
 collect in Spark Thrift Server

### What changes were proposed in this pull request?

When enabling **spark.sql.thriftServer.incrementalCollect** Job Ids get lost and tracing queries in Spark Thrift Server ends up being too complicated.

### Why are the changes needed?

Because it will make easier tracing Spark Thrift Server queries.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

The current tests are enough. No need of more tests.

Closes #30390 from gumartinm/master.

Authored-by: Gustavo Martin Morcuende <gu.martinm@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../SparkExecuteStatementOperation.scala       | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index 2e9975bcabc3f..f7a4be9591818 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -63,6 +63,10 @@ private[hive] class SparkExecuteStatementOperation(
     }
   }
 
+  private val substitutorStatement = SQLConf.withExistingConf(sqlContext.conf) {
+    new VariableSubstitution().substitute(statement)
+  }
+
   private var result: DataFrame = _
 
   // We cache the returned rows to get iterators again in case the user wants to use FETCH_FIRST.
@@ -126,6 +130,17 @@ private[hive] class SparkExecuteStatementOperation(
   }
 
   def getNextRowSet(order: FetchOrientation, maxRowsL: Long): RowSet = withLocalProperties {
+    try {
+      sqlContext.sparkContext.setJobGroup(statementId, substitutorStatement)
+      getNextRowSetInternal(order, maxRowsL)
+    } finally {
+      sqlContext.sparkContext.clearJobGroup()
+    }
+  }
+
+  private def getNextRowSetInternal(
+      order: FetchOrientation,
+      maxRowsL: Long): RowSet = withLocalProperties {
     log.info(s"Received getNextRowSet request order=${order} and maxRowsL=${maxRowsL} " +
       s"with ${statementId}")
     validateDefaultFetchOrientation(order)
@@ -306,9 +321,6 @@ private[hive] class SparkExecuteStatementOperation(
         parentSession.getSessionState.getConf.setClassLoader(executionHiveClassLoader)
       }
 
-      val substitutorStatement = SQLConf.withExistingConf(sqlContext.conf) {
-        new VariableSubstitution().substitute(statement)
-      }
       sqlContext.sparkContext.setJobGroup(statementId, substitutorStatement)
       result = sqlContext.sql(statement)
       logDebug(result.queryExecution.toString())

From d7f4b2ad50aa7acdb0392bb400fc0c87491c6e45 Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Sun, 22 Nov 2020 10:29:15 -0800
Subject: [PATCH 0541/1009] [SPARK-28704][SQL][TEST] Add back Skiped
 HiveExternalCatalogVersionsSuite in HiveSparkSubmitSuite at JDK9+

### What changes were proposed in this pull request?
We skip test HiveExternalCatalogVersionsSuite when testing with JAVA_9 or later because our previous version does not support JAVA_9 or later. We now add it back since we have a version supports JAVA_9 or later.

### Why are the changes needed?

To recover test coverage.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Check CI logs.

Closes #30451 from AngersZhuuuu/SPARK-28704.

Authored-by: angerszhu <angers.zhu@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../HiveExternalCatalogVersionsSuite.scala    | 27 +++++++++----------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index 38a8c492d77a7..cf070f4611f3b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -52,7 +52,6 @@ import org.apache.spark.util.Utils
 @ExtendedHiveTest
 class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
   import HiveExternalCatalogVersionsSuite._
-  private val isTestAtLeastJava9 = SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)
   private val wareHousePath = Utils.createTempDir(namePrefix = "warehouse")
   private val tmpDataDir = Utils.createTempDir(namePrefix = "test-data")
   // For local test, you can set `spark.test.cache-dir` to a static value like `/tmp/test-spark`, to
@@ -60,6 +59,11 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
   private val sparkTestingDir = Option(System.getProperty(SPARK_TEST_CACHE_DIR_SYSTEM_PROPERTY))
       .map(new File(_)).getOrElse(Utils.createTempDir(namePrefix = "test-spark"))
   private val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
+  val hiveVersion = if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) {
+    "2.3.7"
+  } else {
+    "1.2.1"
+  }
 
   override def afterAll(): Unit = {
     try {
@@ -149,7 +153,9 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
     new String(Files.readAllBytes(contentPath), StandardCharsets.UTF_8)
   }
 
-  private def prepare(): Unit = {
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+
     val tempPyFile = File.createTempFile("test", ".py")
     // scalastyle:off line.size.limit
     Files.write(tempPyFile.toPath,
@@ -199,7 +205,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
         "--master", "local[2]",
         "--conf", s"${UI_ENABLED.key}=false",
         "--conf", s"${MASTER_REST_SERVER_ENABLED.key}=false",
-        "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=1.2.1",
+        "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=$hiveVersion",
         "--conf", s"${HiveUtils.HIVE_METASTORE_JARS.key}=maven",
         "--conf", s"${WAREHOUSE_PATH.key}=${wareHousePath.getCanonicalPath}",
         "--conf", s"spark.sql.test.version.index=$index",
@@ -211,23 +217,14 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
     tempPyFile.delete()
   }
 
-  override def beforeAll(): Unit = {
-    super.beforeAll()
-    if (!isTestAtLeastJava9) {
-      prepare()
-    }
-  }
-
   test("backward compatibility") {
-    // TODO SPARK-28704 Test backward compatibility on JDK9+ once we have a version supports JDK9+
-    assume(!isTestAtLeastJava9)
     val args = Seq(
       "--class", PROCESS_TABLES.getClass.getName.stripSuffix("$"),
       "--name", "HiveExternalCatalog backward compatibility test",
       "--master", "local[2]",
       "--conf", s"${UI_ENABLED.key}=false",
       "--conf", s"${MASTER_REST_SERVER_ENABLED.key}=false",
-      "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=1.2.1",
+      "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=$hiveVersion",
       "--conf", s"${HiveUtils.HIVE_METASTORE_JARS.key}=maven",
       "--conf", s"${WAREHOUSE_PATH.key}=${wareHousePath.getCanonicalPath}",
       "--driver-java-options", s"-Dderby.system.home=${wareHousePath.getCanonicalPath}",
@@ -252,7 +249,9 @@ object PROCESS_TABLES extends QueryTest with SQLTestUtils {
       // do not throw exception during object initialization.
       case NonFatal(_) => Seq("3.0.1", "2.4.7") // A temporary fallback to use a specific version
     }
-    versions.filter(v => v.startsWith("3") || !TestUtils.isPythonVersionAtLeast38())
+    versions
+      .filter(v => v.startsWith("3") || !TestUtils.isPythonVersionAtLeast38())
+      .filter(v => v.startsWith("3") || !SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9))
   }
 
   protected var spark: SparkSession = _

From d338af3101a4c986b5e979e8fdc63b8551e12d29 Mon Sep 17 00:00:00 2001
From: CC Highman <christopher.highman@microsoft.com>
Date: Mon, 23 Nov 2020 08:30:41 +0900
Subject: [PATCH 0542/1009] [SPARK-31962][SQL] Provide modifiedAfter and
 modifiedBefore options when filtering from a batch-based file data source

### What changes were proposed in this pull request?

Two new options, _modifiiedBefore_  and _modifiedAfter_, is provided expecting a value in 'YYYY-MM-DDTHH:mm:ss' format.  _PartioningAwareFileIndex_ considers these options during the process of checking for files, just before considering applied _PathFilters_ such as `pathGlobFilter.`  In order to filter file results, a new PathFilter class was derived for this purpose.  General house-keeping around classes extending PathFilter was performed for neatness.  It became apparent support was needed to handle multiple potential path filters.  Logic was introduced for this purpose and the associated tests written.

### Why are the changes needed?

When loading files from a data source, there can often times be thousands of file within a respective file path.  In many cases I've seen, we want to start loading from a folder path and ideally be able to begin loading files having modification dates past a certain point.  This would mean out of thousands of potential files, only the ones with modification dates greater than the specified timestamp would be considered.  This saves a ton of time automatically and reduces significant complexity managing this in code.

### Does this PR introduce _any_ user-facing change?

This PR introduces an option that can be used with batch-based Spark file data sources.  A documentation update was made to reflect an example and usage of the new data source option.

**Example Usages**
_Load all CSV files modified after date:_
`spark.read.format("csv").option("modifiedAfter","2020-06-15T05:00:00").load()`

_Load all CSV files modified before date:_
`spark.read.format("csv").option("modifiedBefore","2020-06-15T05:00:00").load()`

_Load all CSV files modified between two dates:_
`spark.read.format("csv").option("modifiedAfter","2019-01-15T05:00:00").option("modifiedBefore","2020-06-15T05:00:00").load()
`

### How was this patch tested?

A handful of unit tests were added to support the positive, negative, and edge case code paths.

It's also live in a handful of our Databricks dev environments.  (quoted from cchighman)

Closes #30411 from HeartSaVioR/SPARK-31962.

Lead-authored-by: CC Highman <christopher.highman@microsoft.com>
Co-authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
Signed-off-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
---
 docs/sql-data-sources-generic-options.md      |  37 +++
 .../sql/JavaSQLDataSourceExample.java         |  16 +
 examples/src/main/python/sql/datasource.py    |  20 ++
 examples/src/main/r/RSparkSQLExample.R        |   8 +
 .../examples/sql/SQLDataSourceExample.scala   |  21 ++
 python/pyspark/sql/readwriter.py              |  81 ++++-
 .../apache/spark/sql/DataFrameReader.scala    |  30 ++
 .../PartitioningAwareFileIndex.scala          |  13 +-
 .../execution/datasources/pathFilters.scala   | 161 +++++++++
 .../streaming/FileStreamOptions.scala         |  11 +
 .../spark/sql/FileBasedDataSourceSuite.scala  |  32 --
 .../datasources/PathFilterStrategySuite.scala |  54 +++
 .../datasources/PathFilterSuite.scala         | 307 ++++++++++++++++++
 .../sql/streaming/FileStreamSourceSuite.scala |  44 ++-
 14 files changed, 787 insertions(+), 48 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/pathFilters.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PathFilterStrategySuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PathFilterSuite.scala

diff --git a/docs/sql-data-sources-generic-options.md b/docs/sql-data-sources-generic-options.md
index 6bcf48235bced..2e4fc879a435f 100644
--- a/docs/sql-data-sources-generic-options.md
+++ b/docs/sql-data-sources-generic-options.md
@@ -119,3 +119,40 @@ To load all files recursively, you can use:
 {% include_example recursive_file_lookup r/RSparkSQLExample.R %}
 </div>
 </div>
+
+### Modification Time Path Filters
+
+`modifiedBefore` and `modifiedAfter` are options that can be 
+applied together or separately in order to achieve greater
+granularity over which files may load during a Spark batch query.
+(Note that Structured Streaming file sources don't support these options.)
+
+* `modifiedBefore`: an optional timestamp to only include files with
+modification times occurring before the specified time. The provided timestamp
+must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
+* `modifiedAfter`: an optional timestamp to only include files with
+modification times occurring after the specified time. The provided timestamp
+must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
+
+When a timezone option is not provided, the timestamps will be interpreted according
+to the Spark session timezone (`spark.sql.session.timeZone`).
+
+To load files with paths matching a given modified time range, you can use:
+
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
+{% include_example load_with_modified_time_filter scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala %}
+</div>
+
+<div data-lang="java"  markdown="1">
+{% include_example load_with_modified_time_filter  java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java %}
+</div>
+
+<div data-lang="python"  markdown="1">
+{% include_example load_with_modified_time_filter  python/sql/datasource.py %}
+</div>
+
+<div data-lang="r"  markdown="1">
+{% include_example load_with_modified_time_filter  r/RSparkSQLExample.R %}
+</div>
+</div>
\ No newline at end of file
diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java
index 2295225387a33..46e740d78bffb 100644
--- a/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java
@@ -147,6 +147,22 @@ private static void runGenericFileSourceOptionsExample(SparkSession spark) {
     // |file1.parquet|
     // +-------------+
     // $example off:load_with_path_glob_filter$
+    // $example on:load_with_modified_time_filter$
+    Dataset<Row> beforeFilterDF = spark.read().format("parquet")
+            // Only load files modified before 7/1/2020 at 05:30
+            .option("modifiedBefore", "2020-07-01T05:30:00")
+            // Only load files modified after 6/1/2020 at 05:30
+            .option("modifiedAfter", "2020-06-01T05:30:00")
+            // Interpret both times above relative to CST timezone
+            .option("timeZone", "CST")
+            .load("examples/src/main/resources/dir1");
+    beforeFilterDF.show();
+    // +-------------+
+    // |         file|
+    // +-------------+
+    // |file1.parquet|
+    // +-------------+
+    // $example off:load_with_modified_time_filter$
   }
 
   private static void runBasicDataSourceExample(SparkSession spark) {
diff --git a/examples/src/main/python/sql/datasource.py b/examples/src/main/python/sql/datasource.py
index eecd8c2d84788..8c146ba0c9455 100644
--- a/examples/src/main/python/sql/datasource.py
+++ b/examples/src/main/python/sql/datasource.py
@@ -67,6 +67,26 @@ def generic_file_source_options_example(spark):
     # +-------------+
     # $example off:load_with_path_glob_filter$
 
+    # $example on:load_with_modified_time_filter$
+    # Only load files modified before 07/1/2050 @ 08:30:00
+    df = spark.read.load("examples/src/main/resources/dir1",
+                         format="parquet", modifiedBefore="2050-07-01T08:30:00")
+    df.show()
+    # +-------------+
+    # |         file|
+    # +-------------+
+    # |file1.parquet|
+    # +-------------+
+    # Only load files modified after 06/01/2050 @ 08:30:00
+    df = spark.read.load("examples/src/main/resources/dir1",
+                         format="parquet", modifiedAfter="2050-06-01T08:30:00")
+    df.show()
+    # +-------------+
+    # |         file|
+    # +-------------+
+    # +-------------+
+    # $example off:load_with_modified_time_filter$
+
 
 def basic_datasource_example(spark):
     # $example on:generic_load_save_functions$
diff --git a/examples/src/main/r/RSparkSQLExample.R b/examples/src/main/r/RSparkSQLExample.R
index 8685cfb5c05f2..86ad5334248bc 100644
--- a/examples/src/main/r/RSparkSQLExample.R
+++ b/examples/src/main/r/RSparkSQLExample.R
@@ -144,6 +144,14 @@ df <- read.df("examples/src/main/resources/dir1", "parquet", pathGlobFilter = "*
 # 1 file1.parquet
 # $example off:load_with_path_glob_filter$
 
+# $example on:load_with_modified_time_filter$
+beforeDF <- read.df("examples/src/main/resources/dir1", "parquet", modifiedBefore= "2020-07-01T05:30:00")
+#            file
+# 1 file1.parquet
+afterDF <- read.df("examples/src/main/resources/dir1", "parquet", modifiedAfter = "2020-06-01T05:30:00")
+#            file
+# $example off:load_with_modified_time_filter$
+
 # $example on:manual_save_options_orc$
 df <- read.df("examples/src/main/resources/users.orc", "orc")
 write.orc(df, "users_with_options.orc", orc.bloom.filter.columns = "favorite_color", orc.dictionary.key.threshold = 1.0, orc.column.encoding.direct = "name")
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala b/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala
index 2c7abfcd335d1..90c0eeb5ba888 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala
@@ -81,6 +81,27 @@ object SQLDataSourceExample {
     // |file1.parquet|
     // +-------------+
     // $example off:load_with_path_glob_filter$
+    // $example on:load_with_modified_time_filter$
+    val beforeFilterDF = spark.read.format("parquet")
+      // Files modified before 07/01/2020 at 05:30 are allowed
+      .option("modifiedBefore", "2020-07-01T05:30:00")
+      .load("examples/src/main/resources/dir1");
+    beforeFilterDF.show();
+    // +-------------+
+    // |         file|
+    // +-------------+
+    // |file1.parquet|
+    // +-------------+
+    val afterFilterDF = spark.read.format("parquet")
+       // Files modified after 06/01/2020 at 05:30 are allowed
+      .option("modifiedAfter", "2020-06-01T05:30:00")
+      .load("examples/src/main/resources/dir1");
+    afterFilterDF.show();
+    // +-------------+
+    // |         file|
+    // +-------------+
+    // +-------------+
+    // $example off:load_with_modified_time_filter$
   }
 
   private def runBasicDataSourceExample(spark: SparkSession): Unit = {
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 2ed991c87f506..bb31e6a3e09f8 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -125,6 +125,12 @@ def option(self, key, value):
             * ``pathGlobFilter``: an optional glob pattern to only include files with paths matching
                 the pattern. The syntax follows org.apache.hadoop.fs.GlobFilter.
                 It does not change the behavior of partition discovery.
+            * ``modifiedBefore``: an optional timestamp to only include files with
+                modification times occurring before the specified time. The provided timestamp
+                must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
+            * ``modifiedAfter``: an optional timestamp to only include files with
+                modification times occurring after the specified time. The provided timestamp
+                must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
         """
         self._jreader = self._jreader.option(key, to_str(value))
         return self
@@ -149,6 +155,12 @@ def options(self, **options):
             * ``pathGlobFilter``: an optional glob pattern to only include files with paths matching
                 the pattern. The syntax follows org.apache.hadoop.fs.GlobFilter.
                 It does not change the behavior of partition discovery.
+            * ``modifiedBefore``: an optional timestamp to only include files with
+                modification times occurring before the specified time. The provided timestamp
+                must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
+            * ``modifiedAfter``: an optional timestamp to only include files with
+                modification times occurring after the specified time. The provided timestamp
+                must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
         """
         for k in options:
             self._jreader = self._jreader.option(k, to_str(options[k]))
@@ -203,7 +215,8 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
              mode=None, columnNameOfCorruptRecord=None, dateFormat=None, timestampFormat=None,
              multiLine=None, allowUnquotedControlChars=None, lineSep=None, samplingRatio=None,
              dropFieldIfAllNull=None, encoding=None, locale=None, pathGlobFilter=None,
-             recursiveFileLookup=None, allowNonNumericNumbers=None):
+             recursiveFileLookup=None, allowNonNumericNumbers=None,
+             modifiedBefore=None, modifiedAfter=None):
         """
         Loads JSON files and returns the results as a :class:`DataFrame`.
 
@@ -322,6 +335,13 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
                             ``+Infinity`` and ``Infinity``.
                 *  ``-INF``: for negative infinity, alias ``-Infinity``.
                 *  ``NaN``: for other not-a-numbers, like result of division by zero.
+        modifiedBefore : an optional timestamp to only include files with
+            modification times occurring before the specified time. The provided timestamp
+            must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
+        modifiedAfter : an optional timestamp to only include files with
+            modification times occurring after the specified time. The provided timestamp
+            must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
+
 
         Examples
         --------
@@ -344,6 +364,7 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
             allowUnquotedControlChars=allowUnquotedControlChars, lineSep=lineSep,
             samplingRatio=samplingRatio, dropFieldIfAllNull=dropFieldIfAllNull, encoding=encoding,
             locale=locale, pathGlobFilter=pathGlobFilter, recursiveFileLookup=recursiveFileLookup,
+            modifiedBefore=modifiedBefore, modifiedAfter=modifiedAfter,
             allowNonNumericNumbers=allowNonNumericNumbers)
         if isinstance(path, str):
             path = [path]
@@ -410,6 +431,15 @@ def parquet(self, *paths, **options):
             disables
             `partition discovery <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery>`_.  # noqa
 
+            modification times occurring before the specified time. The provided timestamp
+            must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
+        modifiedBefore (batch only) : an optional timestamp to only include files with
+            modification times occurring before the specified time. The provided timestamp
+            must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
+        modifiedAfter (batch only) : an optional timestamp to only include files with
+            modification times occurring after the specified time. The provided timestamp
+            must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
+
         Examples
         --------
         >>> df = spark.read.parquet('python/test_support/sql/parquet_partitioned')
@@ -418,13 +448,18 @@ def parquet(self, *paths, **options):
         """
         mergeSchema = options.get('mergeSchema', None)
         pathGlobFilter = options.get('pathGlobFilter', None)
+        modifiedBefore = options.get('modifiedBefore', None)
+        modifiedAfter = options.get('modifiedAfter', None)
         recursiveFileLookup = options.get('recursiveFileLookup', None)
         self._set_opts(mergeSchema=mergeSchema, pathGlobFilter=pathGlobFilter,
-                       recursiveFileLookup=recursiveFileLookup)
+                       recursiveFileLookup=recursiveFileLookup, modifiedBefore=modifiedBefore,
+                       modifiedAfter=modifiedAfter)
+
         return self._df(self._jreader.parquet(_to_seq(self._spark._sc, paths)))
 
     def text(self, paths, wholetext=False, lineSep=None, pathGlobFilter=None,
-             recursiveFileLookup=None):
+             recursiveFileLookup=None, modifiedBefore=None,
+             modifiedAfter=None):
         """
         Loads text files and returns a :class:`DataFrame` whose schema starts with a
         string column named "value", and followed by partitioned columns if there
@@ -453,6 +488,15 @@ def text(self, paths, wholetext=False, lineSep=None, pathGlobFilter=None,
             recursively scan a directory for files. Using this option disables
             `partition discovery <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery>`_.  # noqa
 
+            modification times occurring before the specified time. The provided timestamp
+            must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
+        modifiedBefore (batch only) : an optional timestamp to only include files with
+            modification times occurring before the specified time. The provided timestamp
+            must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
+        modifiedAfter (batch only) : an optional timestamp to only include files with
+            modification times occurring after the specified time. The provided timestamp
+            must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
+
         Examples
         --------
         >>> df = spark.read.text('python/test_support/sql/text-test.txt')
@@ -464,7 +508,9 @@ def text(self, paths, wholetext=False, lineSep=None, pathGlobFilter=None,
         """
         self._set_opts(
             wholetext=wholetext, lineSep=lineSep, pathGlobFilter=pathGlobFilter,
-            recursiveFileLookup=recursiveFileLookup)
+            recursiveFileLookup=recursiveFileLookup, modifiedBefore=modifiedBefore,
+            modifiedAfter=modifiedAfter)
+
         if isinstance(paths, str):
             paths = [paths]
         return self._df(self._jreader.text(self._spark._sc._jvm.PythonUtils.toSeq(paths)))
@@ -476,7 +522,7 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
             maxCharsPerColumn=None, maxMalformedLogPerPartition=None, mode=None,
             columnNameOfCorruptRecord=None, multiLine=None, charToEscapeQuoteEscaping=None,
             samplingRatio=None, enforceSchema=None, emptyValue=None, locale=None, lineSep=None,
-            pathGlobFilter=None, recursiveFileLookup=None):
+            pathGlobFilter=None, recursiveFileLookup=None, modifiedBefore=None, modifiedAfter=None):
         r"""Loads a CSV file and returns the result as a  :class:`DataFrame`.
 
         This function will go through the input once to determine the input schema if
@@ -631,6 +677,15 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
             recursively scan a directory for files. Using this option disables
             `partition discovery <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery>`_.  # noqa
 
+            modification times occurring before the specified time. The provided timestamp
+            must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
+        modifiedBefore (batch only) : an optional timestamp to only include files with
+            modification times occurring before the specified time. The provided timestamp
+            must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
+        modifiedAfter (batch only) : an optional timestamp to only include files with
+            modification times occurring after the specified time. The provided timestamp
+            must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
+
         Examples
         --------
         >>> df = spark.read.csv('python/test_support/sql/ages.csv')
@@ -652,7 +707,8 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
             columnNameOfCorruptRecord=columnNameOfCorruptRecord, multiLine=multiLine,
             charToEscapeQuoteEscaping=charToEscapeQuoteEscaping, samplingRatio=samplingRatio,
             enforceSchema=enforceSchema, emptyValue=emptyValue, locale=locale, lineSep=lineSep,
-            pathGlobFilter=pathGlobFilter, recursiveFileLookup=recursiveFileLookup)
+            pathGlobFilter=pathGlobFilter, recursiveFileLookup=recursiveFileLookup,
+            modifiedBefore=modifiedBefore, modifiedAfter=modifiedAfter)
         if isinstance(path, str):
             path = [path]
         if type(path) == list:
@@ -679,7 +735,8 @@ def func(iterator):
         else:
             raise TypeError("path can be only string, list or RDD")
 
-    def orc(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLookup=None):
+    def orc(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLookup=None,
+            modifiedBefore=None, modifiedAfter=None):
         """Loads ORC files, returning the result as a :class:`DataFrame`.
 
         .. versionadded:: 1.5.0
@@ -701,6 +758,15 @@ def orc(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLookup=N
             disables
             `partition discovery <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery>`_.  # noqa
 
+            modification times occurring before the specified time. The provided timestamp
+            must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
+        modifiedBefore : an optional timestamp to only include files with
+            modification times occurring before the specified time. The provided timestamp
+            must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
+        modifiedAfter : an optional timestamp to only include files with
+            modification times occurring after the specified time. The provided timestamp
+            must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
+
         Examples
         --------
         >>> df = spark.read.orc('python/test_support/sql/orc_partitioned')
@@ -708,6 +774,7 @@ def orc(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLookup=N
         [('a', 'bigint'), ('b', 'int'), ('c', 'int')]
         """
         self._set_opts(mergeSchema=mergeSchema, pathGlobFilter=pathGlobFilter,
+                       modifiedBefore=modifiedBefore, modifiedAfter=modifiedAfter,
                        recursiveFileLookup=recursiveFileLookup)
         if isinstance(path, str):
             path = [path]
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 276d5d29bfa2c..b26bc6441b6cf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -493,6 +493,12 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * <li>`pathGlobFilter`: an optional glob pattern to only include files with paths matching
    * the pattern. The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
    * It does not change the behavior of partition discovery.</li>
+   * <li>`modifiedBefore` (batch only): an optional timestamp to only include files with
+   * modification times  occurring before the specified Time. The provided timestamp
+   * must be in the following form: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)</li>
+   * <li>`modifiedAfter` (batch only): an optional timestamp to only include files with
+   * modification times occurring after the specified Time. The provided timestamp
+   * must be in the following form: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)</li>
    * <li>`recursiveFileLookup`: recursively scan a directory for files. Using this option
    * disables partition discovery</li>
    * <li>`allowNonNumericNumbers` (default `true`): allows JSON parser to recognize set of
@@ -750,6 +756,12 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * <li>`pathGlobFilter`: an optional glob pattern to only include files with paths matching
    * the pattern. The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
    * It does not change the behavior of partition discovery.</li>
+   * <li>`modifiedBefore` (batch only): an optional timestamp to only include files with
+   * modification times  occurring before the specified Time. The provided timestamp
+   * must be in the following form: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)</li>
+   * <li>`modifiedAfter` (batch only): an optional timestamp to only include files with
+   * modification times occurring after the specified Time. The provided timestamp
+   * must be in the following form: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)</li>
    * <li>`recursiveFileLookup`: recursively scan a directory for files. Using this option
    * disables partition discovery</li>
    * </ul>
@@ -781,6 +793,12 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * <li>`pathGlobFilter`: an optional glob pattern to only include files with paths matching
    * the pattern. The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
    * It does not change the behavior of partition discovery.</li>
+   * <li>`modifiedBefore` (batch only): an optional timestamp to only include files with
+   * modification times  occurring before the specified Time. The provided timestamp
+   * must be in the following form: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)</li>
+   * <li>`modifiedAfter` (batch only): an optional timestamp to only include files with
+   * modification times occurring after the specified Time. The provided timestamp
+   * must be in the following form: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)</li>
    * <li>`recursiveFileLookup`: recursively scan a directory for files. Using this option
    * disables partition discovery</li>
    * </ul>
@@ -814,6 +832,12 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * <li>`pathGlobFilter`: an optional glob pattern to only include files with paths matching
    * the pattern. The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
    * It does not change the behavior of partition discovery.</li>
+   * <li>`modifiedBefore` (batch only): an optional timestamp to only include files with
+   * modification times  occurring before the specified Time. The provided timestamp
+   * must be in the following form: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)</li>
+   * <li>`modifiedAfter` (batch only): an optional timestamp to only include files with
+   * modification times occurring after the specified Time. The provided timestamp
+   * must be in the following form: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)</li>
    * <li>`recursiveFileLookup`: recursively scan a directory for files. Using this option
    * disables partition discovery</li>
    * </ul>
@@ -880,6 +904,12 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * <li>`pathGlobFilter`: an optional glob pattern to only include files with paths matching
    * the pattern. The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
    * It does not change the behavior of partition discovery.</li>
+   * <li>`modifiedBefore` (batch only): an optional timestamp to only include files with
+   * modification times  occurring before the specified Time. The provided timestamp
+   * must be in the following form: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)</li>
+   * <li>`modifiedAfter` (batch only): an optional timestamp to only include files with
+   * modification times occurring after the specified Time. The provided timestamp
+   * must be in the following form: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)</li>
    * <li>`recursiveFileLookup`: recursively scan a directory for files. Using this option
    * disables partition discovery</li>
    * </ul>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
index fed9614347f6a..5b0d0606da093 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
@@ -57,13 +57,10 @@ abstract class PartitioningAwareFileIndex(
   protected def leafDirToChildrenFiles: Map[Path, Array[FileStatus]]
 
   private val caseInsensitiveMap = CaseInsensitiveMap(parameters)
+  private val pathFilters = PathFilterFactory.create(caseInsensitiveMap)
 
-  protected lazy val pathGlobFilter: Option[GlobFilter] =
-    caseInsensitiveMap.get("pathGlobFilter").map(new GlobFilter(_))
-
-  protected def matchGlobPattern(file: FileStatus): Boolean = {
-    pathGlobFilter.forall(_.accept(file.getPath))
-  }
+  protected def matchPathPattern(file: FileStatus): Boolean =
+    pathFilters.forall(_.accept(file))
 
   protected lazy val recursiveFileLookup: Boolean = {
     caseInsensitiveMap.getOrElse("recursiveFileLookup", "false").toBoolean
@@ -86,7 +83,7 @@ abstract class PartitioningAwareFileIndex(
           val files: Seq[FileStatus] = leafDirToChildrenFiles.get(path) match {
             case Some(existingDir) =>
               // Directory has children files in it, return them
-              existingDir.filter(f => matchGlobPattern(f) && isNonEmptyFile(f))
+              existingDir.filter(f => matchPathPattern(f) && isNonEmptyFile(f))
 
             case None =>
               // Directory does not exist, or has no children files
@@ -135,7 +132,7 @@ abstract class PartitioningAwareFileIndex(
     } else {
       leafFiles.values.toSeq
     }
-    files.filter(matchGlobPattern)
+    files.filter(matchPathPattern)
   }
 
   protected def inferPartitioning(): PartitionSpec = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/pathFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/pathFilters.scala
new file mode 100644
index 0000000000000..c8f23988f93c6
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/pathFilters.scala
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import java.util.{Locale, TimeZone}
+
+import org.apache.hadoop.fs.{FileStatus, GlobFilter}
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.unsafe.types.UTF8String
+
+trait PathFilterStrategy extends Serializable {
+  def accept(fileStatus: FileStatus): Boolean
+}
+
+trait StrategyBuilder {
+  def create(parameters: CaseInsensitiveMap[String]): Option[PathFilterStrategy]
+}
+
+class PathGlobFilter(filePatten: String) extends PathFilterStrategy {
+
+  private val globFilter = new GlobFilter(filePatten)
+
+  override def accept(fileStatus: FileStatus): Boolean =
+    globFilter.accept(fileStatus.getPath)
+}
+
+object PathGlobFilter extends StrategyBuilder {
+  val PARAM_NAME = "pathglobfilter"
+
+  override def create(parameters: CaseInsensitiveMap[String]): Option[PathFilterStrategy] = {
+    parameters.get(PARAM_NAME).map(new PathGlobFilter(_))
+  }
+}
+
+/**
+ * Provide modifiedAfter and modifiedBefore options when
+ * filtering from a batch-based file data source.
+ *
+ * Example Usages
+ * Load all CSV files modified after date:
+ * {{{
+ *   spark.read.format("csv").option("modifiedAfter","2020-06-15T05:00:00").load()
+ * }}}
+ *
+ * Load all CSV files modified before date:
+ * {{{
+ *   spark.read.format("csv").option("modifiedBefore","2020-06-15T05:00:00").load()
+ * }}}
+ *
+ * Load all CSV files modified between two dates:
+ * {{{
+ *   spark.read.format("csv").option("modifiedAfter","2019-01-15T05:00:00")
+ *     .option("modifiedBefore","2020-06-15T05:00:00").load()
+ * }}}
+ */
+abstract class ModifiedDateFilter extends PathFilterStrategy {
+
+  def timeZoneId: String
+
+  protected def localTime(micros: Long): Long =
+    DateTimeUtils.fromUTCTime(micros, timeZoneId)
+}
+
+object ModifiedDateFilter {
+
+  def getTimeZoneId(options: CaseInsensitiveMap[String]): String = {
+    options.getOrElse(
+      DateTimeUtils.TIMEZONE_OPTION.toLowerCase(Locale.ROOT),
+      SQLConf.get.sessionLocalTimeZone)
+  }
+
+  def toThreshold(timeString: String, timeZoneId: String, strategy: String): Long = {
+    val timeZone: TimeZone = DateTimeUtils.getTimeZone(timeZoneId)
+    val ts = UTF8String.fromString(timeString)
+    DateTimeUtils.stringToTimestamp(ts, timeZone.toZoneId).getOrElse {
+      throw new AnalysisException(
+        s"The timestamp provided for the '$strategy' option is invalid. The expected format " +
+          s"is 'YYYY-MM-DDTHH:mm:ss', but the provided timestamp: $timeString")
+    }
+  }
+}
+
+/**
+ * Filter used to determine whether file was modified before the provided timestamp.
+ */
+class ModifiedBeforeFilter(thresholdTime: Long, val timeZoneId: String)
+    extends ModifiedDateFilter {
+
+  override def accept(fileStatus: FileStatus): Boolean =
+    // We standardize on microseconds wherever possible
+    // getModificationTime returns in milliseconds
+    thresholdTime - localTime(DateTimeUtils.millisToMicros(fileStatus.getModificationTime)) > 0
+}
+
+object ModifiedBeforeFilter extends StrategyBuilder {
+  import ModifiedDateFilter._
+
+  val PARAM_NAME = "modifiedbefore"
+
+  override def create(parameters: CaseInsensitiveMap[String]): Option[PathFilterStrategy] = {
+    parameters.get(PARAM_NAME).map { value =>
+      val timeZoneId = getTimeZoneId(parameters)
+      val thresholdTime = toThreshold(value, timeZoneId, PARAM_NAME)
+      new ModifiedBeforeFilter(thresholdTime, timeZoneId)
+    }
+  }
+}
+
+/**
+ * Filter used to determine whether file was modified after the provided timestamp.
+ */
+class ModifiedAfterFilter(thresholdTime: Long, val timeZoneId: String)
+    extends ModifiedDateFilter {
+
+  override def accept(fileStatus: FileStatus): Boolean =
+    // getModificationTime returns in milliseconds
+    // We standardize on microseconds wherever possible
+    localTime(DateTimeUtils.millisToMicros(fileStatus.getModificationTime)) - thresholdTime > 0
+}
+
+object ModifiedAfterFilter extends StrategyBuilder {
+  import ModifiedDateFilter._
+
+  val PARAM_NAME = "modifiedafter"
+
+  override def create(parameters: CaseInsensitiveMap[String]): Option[PathFilterStrategy] = {
+    parameters.get(PARAM_NAME).map { value =>
+      val timeZoneId = getTimeZoneId(parameters)
+      val thresholdTime = toThreshold(value, timeZoneId, PARAM_NAME)
+      new ModifiedAfterFilter(thresholdTime, timeZoneId)
+    }
+  }
+}
+
+object PathFilterFactory {
+
+  private val strategies =
+    Seq(PathGlobFilter, ModifiedBeforeFilter, ModifiedAfterFilter)
+
+  def create(parameters: CaseInsensitiveMap[String]): Seq[PathFilterStrategy] = {
+    strategies.flatMap { _.create(parameters) }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala
index 712ed1585bc8a..6f43542fd6595 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala
@@ -23,6 +23,7 @@ import scala.util.Try
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.execution.datasources.{ModifiedAfterFilter, ModifiedBeforeFilter}
 import org.apache.spark.util.Utils
 
 /**
@@ -32,6 +33,16 @@ class FileStreamOptions(parameters: CaseInsensitiveMap[String]) extends Logging
 
   def this(parameters: Map[String, String]) = this(CaseInsensitiveMap(parameters))
 
+  checkDisallowedOptions(parameters)
+
+  private def checkDisallowedOptions(options: Map[String, String]): Unit = {
+    Seq(ModifiedBeforeFilter.PARAM_NAME, ModifiedAfterFilter.PARAM_NAME).foreach { param =>
+      if (parameters.contains(param)) {
+        throw new IllegalArgumentException(s"option '$param' is not allowed in file stream sources")
+      }
+    }
+  }
+
   val maxFilesPerTrigger: Option[Int] = parameters.get("maxFilesPerTrigger").map { str =>
     Try(str.toInt).toOption.filter(_ > 0).getOrElse {
       throw new IllegalArgumentException(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index b27c1145181bd..876f62803dc7c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -577,38 +577,6 @@ class FileBasedDataSourceSuite extends QueryTest
     }
   }
 
-  test("Option pathGlobFilter: filter files correctly") {
-    withTempPath { path =>
-      val dataDir = path.getCanonicalPath
-      Seq("foo").toDS().write.text(dataDir)
-      Seq("bar").toDS().write.mode("append").orc(dataDir)
-      val df = spark.read.option("pathGlobFilter", "*.txt").text(dataDir)
-      checkAnswer(df, Row("foo"))
-
-      // Both glob pattern in option and path should be effective to filter files.
-      val df2 = spark.read.option("pathGlobFilter", "*.txt").text(dataDir + "/*.orc")
-      checkAnswer(df2, Seq.empty)
-
-      val df3 = spark.read.option("pathGlobFilter", "*.txt").text(dataDir + "/*xt")
-      checkAnswer(df3, Row("foo"))
-    }
-  }
-
-  test("Option pathGlobFilter: simple extension filtering should contains partition info") {
-    withTempPath { path =>
-      val input = Seq(("foo", 1), ("oof", 2)).toDF("a", "b")
-      input.write.partitionBy("b").text(path.getCanonicalPath)
-      Seq("bar").toDS().write.mode("append").orc(path.getCanonicalPath + "/b=1")
-
-      // If we use glob pattern in the path, the partition column won't be shown in the result.
-      val df = spark.read.text(path.getCanonicalPath + "/*/*.txt")
-      checkAnswer(df, input.select("a"))
-
-      val df2 = spark.read.option("pathGlobFilter", "*.txt").text(path.getCanonicalPath)
-      checkAnswer(df2, input)
-    }
-  }
-
   test("Option recursiveFileLookup: recursive loading correctly") {
 
     val expectedFileList = mutable.ListBuffer[String]()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PathFilterStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PathFilterStrategySuite.scala
new file mode 100644
index 0000000000000..b965a78c9eec0
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PathFilterStrategySuite.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.test.SharedSparkSession
+
+class PathFilterStrategySuite extends QueryTest with SharedSparkSession {
+
+  test("SPARK-31962: PathFilterStrategies - modifiedAfter option") {
+    val options =
+      CaseInsensitiveMap[String](Map("modifiedAfter" -> "2010-10-01T01:01:00"))
+    val strategy = PathFilterFactory.create(options)
+    assert(strategy.head.isInstanceOf[ModifiedAfterFilter])
+    assert(strategy.size == 1)
+  }
+
+  test("SPARK-31962: PathFilterStrategies - modifiedBefore option") {
+    val options =
+      CaseInsensitiveMap[String](Map("modifiedBefore" -> "2020-10-01T01:01:00"))
+    val strategy = PathFilterFactory.create(options)
+    assert(strategy.head.isInstanceOf[ModifiedBeforeFilter])
+    assert(strategy.size == 1)
+  }
+
+  test("SPARK-31962: PathFilterStrategies - pathGlobFilter option") {
+    val options = CaseInsensitiveMap[String](Map("pathGlobFilter" -> "*.txt"))
+    val strategy = PathFilterFactory.create(options)
+    assert(strategy.head.isInstanceOf[PathGlobFilter])
+    assert(strategy.size == 1)
+  }
+
+  test("SPARK-31962: PathFilterStrategies - no options") {
+    val options = CaseInsensitiveMap[String](Map.empty)
+    val strategy = PathFilterFactory.create(options)
+    assert(strategy.isEmpty)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PathFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PathFilterSuite.scala
new file mode 100644
index 0000000000000..1af2adfd8640c
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PathFilterSuite.scala
@@ -0,0 +1,307 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import java.io.File
+import java.time.{LocalDateTime, ZoneId, ZoneOffset}
+import java.time.format.DateTimeFormatter
+
+import scala.util.Random
+
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.catalyst.util.{stringToFile, DateTimeUtils}
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{StringType, StructField, StructType}
+
+class PathFilterSuite extends QueryTest with SharedSparkSession {
+  import testImplicits._
+
+  test("SPARK-31962: modifiedBefore specified" +
+      " and sharing same timestamp with file last modified time.") {
+    withTempDir { dir =>
+      val curTime = LocalDateTime.now(ZoneOffset.UTC)
+      executeTest(dir, Seq(curTime), 0, modifiedBefore = Some(formatTime(curTime)))
+    }
+  }
+
+  test("SPARK-31962: modifiedAfter specified" +
+      " and sharing same timestamp with file last modified time.") {
+    withTempDir { dir =>
+      val curTime = LocalDateTime.now(ZoneOffset.UTC)
+      executeTest(dir, Seq(curTime), 0, modifiedAfter = Some(formatTime(curTime)))
+    }
+  }
+
+  test("SPARK-31962: modifiedBefore and modifiedAfter option" +
+      " share same timestamp with file last modified time.") {
+    withTempDir { dir =>
+      val curTime = LocalDateTime.now(ZoneOffset.UTC)
+      val formattedTime = formatTime(curTime)
+      executeTest(dir, Seq(curTime), 0, modifiedBefore = Some(formattedTime),
+        modifiedAfter = Some(formattedTime))
+    }
+  }
+
+  test("SPARK-31962: modifiedBefore and modifiedAfter option" +
+      " share same timestamp with earlier file last modified time.") {
+    withTempDir { dir =>
+      val curTime = LocalDateTime.now(ZoneOffset.UTC)
+      val fileTime = curTime.minusDays(3)
+      val formattedTime = formatTime(curTime)
+      executeTest(dir, Seq(fileTime), 0, modifiedBefore = Some(formattedTime),
+        modifiedAfter = Some(formattedTime))
+    }
+  }
+
+  test("SPARK-31962: modifiedBefore and modifiedAfter option" +
+      " share same timestamp with later file last modified time.") {
+    withTempDir { dir =>
+      val curTime = LocalDateTime.now(ZoneOffset.UTC)
+      val formattedTime = formatTime(curTime)
+      executeTest(dir, Seq(curTime), 0, modifiedBefore = Some(formattedTime),
+        modifiedAfter = Some(formattedTime))
+    }
+  }
+
+  test("SPARK-31962: when modifiedAfter specified with a past date") {
+    withTempDir { dir =>
+      val curTime = LocalDateTime.now(ZoneOffset.UTC)
+      val pastTime = curTime.minusYears(1)
+      val formattedTime = formatTime(pastTime)
+      executeTest(dir, Seq(curTime), 1, modifiedAfter = Some(formattedTime))
+    }
+  }
+
+  test("SPARK-31962: when modifiedBefore specified with a future date") {
+    withTempDir { dir =>
+      val curTime = LocalDateTime.now(ZoneOffset.UTC)
+      val futureTime = curTime.plusYears(1)
+      val formattedTime = formatTime(futureTime)
+      executeTest(dir, Seq(curTime), 1, modifiedBefore = Some(formattedTime))
+    }
+  }
+
+  test("SPARK-31962: with modifiedBefore option provided using a past date") {
+    withTempDir { dir =>
+      val curTime = LocalDateTime.now(ZoneOffset.UTC)
+      val pastTime = curTime.minusYears(1)
+      val formattedTime = formatTime(pastTime)
+      executeTest(dir, Seq(curTime), 0, modifiedBefore = Some(formattedTime))
+    }
+  }
+
+  test("SPARK-31962: modifiedAfter specified with a past date, multiple files, one valid") {
+    withTempDir { dir =>
+      val fileTime1 = LocalDateTime.now(ZoneOffset.UTC)
+      val fileTime2 = LocalDateTime.ofEpochSecond(0, 0, ZoneOffset.UTC)
+      val pastTime = fileTime1.minusYears(1)
+      val formattedTime = formatTime(pastTime)
+      executeTest(dir, Seq(fileTime1, fileTime2), 1, modifiedAfter = Some(formattedTime))
+    }
+  }
+
+  test("SPARK-31962: modifiedAfter specified with a past date, multiple files, both valid") {
+    withTempDir { dir =>
+      val curTime = LocalDateTime.now(ZoneOffset.UTC)
+      val pastTime = curTime.minusYears(1)
+      val formattedTime = formatTime(pastTime)
+      executeTest(dir, Seq(curTime, curTime), 2, modifiedAfter = Some(formattedTime))
+    }
+  }
+
+  test("SPARK-31962: modifiedAfter specified with a past date, multiple files, none valid") {
+    withTempDir { dir =>
+      val fileTime = LocalDateTime.ofEpochSecond(0, 0, ZoneOffset.UTC)
+      val pastTime = LocalDateTime.now(ZoneOffset.UTC).minusYears(1)
+      val formattedTime = formatTime(pastTime)
+      executeTest(dir, Seq(fileTime, fileTime), 0, modifiedAfter = Some(formattedTime))
+    }
+  }
+
+  test("SPARK-31962: modifiedBefore specified with a future date, multiple files, both valid") {
+    withTempDir { dir =>
+      val fileTime = LocalDateTime.ofEpochSecond(0, 0, ZoneOffset.UTC)
+      val futureTime = LocalDateTime.now(ZoneOffset.UTC).plusYears(1)
+      val formattedTime = formatTime(futureTime)
+      executeTest(dir, Seq(fileTime, fileTime), 2, modifiedBefore = Some(formattedTime))
+    }
+  }
+
+  test("SPARK-31962: modifiedBefore specified with a future date, multiple files, one valid") {
+    withTempDir { dir =>
+      val curTime = LocalDateTime.now(ZoneOffset.UTC)
+      val fileTime1 = LocalDateTime.ofEpochSecond(0, 0, ZoneOffset.UTC)
+      val fileTime2 = curTime.plusDays(3)
+      val formattedTime = formatTime(curTime)
+      executeTest(dir, Seq(fileTime1, fileTime2), 1, modifiedBefore = Some(formattedTime))
+    }
+  }
+
+  test("SPARK-31962: modifiedBefore specified with a future date, multiple files, none valid") {
+    withTempDir { dir =>
+      val fileTime = LocalDateTime.now(ZoneOffset.UTC).minusDays(1)
+      val formattedTime = formatTime(fileTime)
+      executeTest(dir, Seq(fileTime, fileTime), 0, modifiedBefore = Some(formattedTime))
+    }
+  }
+
+  test("SPARK-31962: modifiedBefore/modifiedAfter is specified with an invalid date") {
+    executeTestWithBadOption(
+      Map("modifiedBefore" -> "2024-05+1 01:00:00"),
+      Seq("The timestamp provided", "modifiedbefore", "2024-05+1 01:00:00"))
+
+    executeTestWithBadOption(
+      Map("modifiedAfter" -> "2024-05+1 01:00:00"),
+      Seq("The timestamp provided", "modifiedafter", "2024-05+1 01:00:00"))
+  }
+
+  test("SPARK-31962: modifiedBefore/modifiedAfter - empty option") {
+    executeTestWithBadOption(
+      Map("modifiedBefore" -> ""),
+      Seq("The timestamp provided", "modifiedbefore"))
+
+    executeTestWithBadOption(
+      Map("modifiedAfter" -> ""),
+      Seq("The timestamp provided", "modifiedafter"))
+  }
+
+  test("SPARK-31962: modifiedBefore/modifiedAfter filter takes into account local timezone " +
+      "when specified as an option.") {
+    Seq("modifiedbefore", "modifiedafter").foreach { filterName =>
+      // CET = UTC + 1 hour, HST = UTC - 10 hours
+      Seq("CET", "HST").foreach { tzId =>
+        testModifiedDateFilterWithTimezone(tzId, filterName)
+      }
+    }
+  }
+
+  test("Option pathGlobFilter: filter files correctly") {
+    withTempPath { path =>
+      val dataDir = path.getCanonicalPath
+      Seq("foo").toDS().write.text(dataDir)
+      Seq("bar").toDS().write.mode("append").orc(dataDir)
+      val df = spark.read.option("pathGlobFilter", "*.txt").text(dataDir)
+      checkAnswer(df, Row("foo"))
+
+      // Both glob pattern in option and path should be effective to filter files.
+      val df2 = spark.read.option("pathGlobFilter", "*.txt").text(dataDir + "/*.orc")
+      checkAnswer(df2, Seq.empty)
+
+      val df3 = spark.read.option("pathGlobFilter", "*.txt").text(dataDir + "/*xt")
+      checkAnswer(df3, Row("foo"))
+    }
+  }
+
+  test("Option pathGlobFilter: simple extension filtering should contains partition info") {
+    withTempPath { path =>
+      val input = Seq(("foo", 1), ("oof", 2)).toDF("a", "b")
+      input.write.partitionBy("b").text(path.getCanonicalPath)
+      Seq("bar").toDS().write.mode("append").orc(path.getCanonicalPath + "/b=1")
+
+      // If we use glob pattern in the path, the partition column won't be shown in the result.
+      val df = spark.read.text(path.getCanonicalPath + "/*/*.txt")
+      checkAnswer(df, input.select("a"))
+
+      val df2 = spark.read.option("pathGlobFilter", "*.txt").text(path.getCanonicalPath)
+      checkAnswer(df2, input)
+    }
+  }
+
+  private def executeTest(
+      dir: File,
+      fileDates: Seq[LocalDateTime],
+      expectedCount: Long,
+      modifiedBefore: Option[String] = None,
+      modifiedAfter: Option[String] = None): Unit = {
+    fileDates.foreach { fileDate =>
+      val file = createSingleFile(dir)
+      setFileTime(fileDate, file)
+    }
+
+    val schema = StructType(Seq(StructField("a", StringType)))
+
+    var dfReader = spark.read.format("csv").option("timeZone", "UTC").schema(schema)
+    modifiedBefore.foreach { opt => dfReader = dfReader.option("modifiedBefore", opt) }
+    modifiedAfter.foreach { opt => dfReader = dfReader.option("modifiedAfter", opt) }
+
+    if (expectedCount > 0) {
+      // without pathGlobFilter
+      val df1 = dfReader.load(dir.getCanonicalPath)
+      assert(df1.count() === expectedCount)
+
+      // pathGlobFilter matched
+      val df2 = dfReader.option("pathGlobFilter", "*.csv").load(dir.getCanonicalPath)
+      assert(df2.count() === expectedCount)
+
+      // pathGlobFilter mismatched
+      val df3 = dfReader.option("pathGlobFilter", "*.txt").load(dir.getCanonicalPath)
+      assert(df3.count() === 0)
+    } else {
+      val df = dfReader.load(dir.getCanonicalPath)
+      assert(df.count() === 0)
+    }
+  }
+
+  private def executeTestWithBadOption(
+      options: Map[String, String],
+      expectedMsgParts: Seq[String]): Unit = {
+    withTempDir { dir =>
+      createSingleFile(dir)
+      val exc = intercept[AnalysisException] {
+        var dfReader = spark.read.format("csv")
+        options.foreach { case (key, value) =>
+          dfReader = dfReader.option(key, value)
+        }
+        dfReader.load(dir.getCanonicalPath)
+      }
+      expectedMsgParts.foreach { msg => assert(exc.getMessage.contains(msg)) }
+    }
+  }
+
+  private def testModifiedDateFilterWithTimezone(
+      timezoneId: String,
+      filterParamName: String): Unit = {
+    val curTime = LocalDateTime.now(ZoneOffset.UTC)
+    val zoneId: ZoneId = DateTimeUtils.getTimeZone(timezoneId).toZoneId
+    val strategyTimeInMicros =
+      ModifiedDateFilter.toThreshold(
+        curTime.toString,
+        timezoneId,
+        filterParamName)
+    val strategyTimeInSeconds = strategyTimeInMicros / 1000 / 1000
+
+    val curTimeAsSeconds = curTime.atZone(zoneId).toEpochSecond
+    withClue(s"timezone: $timezoneId / param: $filterParamName,") {
+      assert(strategyTimeInSeconds === curTimeAsSeconds)
+    }
+  }
+
+  private def createSingleFile(dir: File): File = {
+    val file = new File(dir, "temp" + Random.nextInt(1000000) + ".csv")
+    stringToFile(file, "text")
+  }
+
+  private def setFileTime(time: LocalDateTime, file: File): Boolean = {
+    val sameTime = time.toEpochSecond(ZoneOffset.UTC)
+    file.setLastModified(sameTime * 1000)
+  }
+
+  private def formatTime(time: LocalDateTime): String = {
+    time.format(DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss"))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index cf9664a9764be..718095003b096 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.streaming
 
 import java.io.File
 import java.net.URI
+import java.time.{LocalDateTime, ZoneOffset}
+import java.time.format.DateTimeFormatter
 import java.util.concurrent.atomic.AtomicLong
 
 import scala.collection.mutable
@@ -40,7 +42,6 @@ import org.apache.spark.sql.execution.streaming.sources.MemorySink
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.util.StreamManualClock
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types._
 import org.apache.spark.sql.types.{StructType, _}
 import org.apache.spark.util.Utils
 
@@ -2054,6 +2055,47 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
     }
   }
 
+  test("SPARK-31962: file stream source shouldn't allow modifiedBefore/modifiedAfter") {
+    def formatTime(time: LocalDateTime): String = {
+      time.format(DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss"))
+    }
+
+    def assertOptionIsNotSupported(options: Map[String, String], path: String): Unit = {
+      val schema = StructType(Seq(StructField("a", StringType)))
+      var dsReader = spark.readStream
+        .format("csv")
+        .option("timeZone", "UTC")
+        .schema(schema)
+
+      options.foreach { case (k, v) => dsReader = dsReader.option(k, v) }
+
+      val df = dsReader.load(path)
+
+      testStream(df)(
+        ExpectFailure[IllegalArgumentException](
+          t => assert(t.getMessage.contains("is not allowed in file stream source")),
+          isFatalError = false)
+      )
+    }
+
+    withTempDir { dir =>
+      // "modifiedBefore"
+      val futureTime = LocalDateTime.now(ZoneOffset.UTC).plusYears(1)
+      val formattedFutureTime = formatTime(futureTime)
+      assertOptionIsNotSupported(Map("modifiedBefore" -> formattedFutureTime), dir.getCanonicalPath)
+
+      // "modifiedAfter"
+      val prevTime = LocalDateTime.now(ZoneOffset.UTC).minusYears(1)
+      val formattedPrevTime = formatTime(prevTime)
+      assertOptionIsNotSupported(Map("modifiedAfter" -> formattedPrevTime), dir.getCanonicalPath)
+
+      // both
+      assertOptionIsNotSupported(
+        Map("modifiedBefore" -> formattedFutureTime, "modifiedAfter" -> formattedPrevTime),
+        dir.getCanonicalPath)
+    }
+  }
+
   private def createFile(content: String, src: File, tmp: File): File = {
     val tempFile = Utils.tempFileWith(new File(tmp, "text"))
     val finalFile = new File(src, tempFile.getName)

From 6d625ccd5b5a76a149e2070df31984610629a295 Mon Sep 17 00:00:00 2001
From: ulysses <youxiduo@weidian.com>
Date: Sun, 22 Nov 2020 15:36:44 -0800
Subject: [PATCH 0543/1009] [SPARK-33469][SQL] Add current_timezone function

### What changes were proposed in this pull request?

Add a `CurrentTimeZone` function and replace the value at `Optimizer` side.

### Why are the changes needed?

Let user get current timezone easily. Then user can call
```
SELECT current_timezone()
```

Presto: https://prestodb.io/docs/current/functions/datetime.html
SQL Server: https://docs.microsoft.com/en-us/sql/t-sql/functions/current-timezone-transact-sql?view=sql-server-ver15

### Does this PR introduce _any_ user-facing change?

Yes, a new function.

### How was this patch tested?

Add test.

Closes #30400 from ulysses-you/SPARK-33469.

Lead-authored-by: ulysses <youxiduo@weidian.com>
Co-authored-by: ulysses-you <youxiduo@weidian.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../sql/catalyst/analysis/FunctionRegistry.scala |  1 +
 .../expressions/datetimeExpressions.scala        | 15 +++++++++++++++
 .../sql/catalyst/optimizer/finishAnalysis.scala  |  3 +++
 .../optimizer/ComputeCurrentTimeSuite.scala      | 16 +++++++++++++++-
 .../sql-functions/sql-expression-schema.md       |  3 ++-
 .../org/apache/spark/sql/DatasetSuite.scala      |  8 ++++++++
 .../sql/expressions/ExpressionInfoSuite.scala    |  1 +
 7 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 508239077a70e..6fb9bed9625d5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -391,6 +391,7 @@ object FunctionRegistry {
     expression[AddMonths]("add_months"),
     expression[CurrentDate]("current_date"),
     expression[CurrentTimestamp]("current_timestamp"),
+    expression[CurrentTimeZone]("current_timezone"),
     expression[DateDiff]("datediff"),
     expression[DateAdd]("date_add"),
     expression[DateFormatClass]("date_format"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 97aacb3f7530c..9953b780ceace 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -73,6 +73,21 @@ trait TimestampFormatterHelper extends TimeZoneAwareExpression {
   }
 }
 
+@ExpressionDescription(
+  usage = "_FUNC_() - Returns the current session local timezone.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_();
+       Asia/Shanghai
+  """,
+  group = "datetime_funcs",
+  since = "3.1.0")
+case class CurrentTimeZone() extends LeafExpression with Unevaluable {
+  override def nullable: Boolean = false
+  override def dataType: DataType = StringType
+  override def prettyName: String = "current_timezone"
+}
+
 /**
  * Returns the current date at the start of query evaluation.
  * There is no code generation since this expression should get constant folded by the optimizer.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
index 9aa7e3201ab1b..1f2389176d1e0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 
@@ -75,6 +76,7 @@ object ComputeCurrentTime extends Rule[LogicalPlan] {
     val timeExpr = CurrentTimestamp()
     val timestamp = timeExpr.eval(EmptyRow).asInstanceOf[Long]
     val currentTime = Literal.create(timestamp, timeExpr.dataType)
+    val timezone = Literal.create(SQLConf.get.sessionLocalTimeZone, StringType)
 
     plan transformAllExpressions {
       case currentDate @ CurrentDate(Some(timeZoneId)) =>
@@ -84,6 +86,7 @@ object ComputeCurrentTime extends Rule[LogicalPlan] {
             DateType)
         })
       case CurrentTimestamp() | Now() => currentTime
+      case CurrentTimeZone() => timezone
     }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala
index db0399d2a73ee..82d6757407b51 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala
@@ -20,11 +20,13 @@ package org.apache.spark.sql.catalyst.optimizer
 import java.time.ZoneId
 
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.{Alias, CurrentDate, CurrentTimestamp, Literal}
+import org.apache.spark.sql.catalyst.expressions.{Alias, CurrentDate, CurrentTimestamp, CurrentTimeZone, Literal}
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.unsafe.types.UTF8String
 
 class ComputeCurrentTimeSuite extends PlanTest {
   object Optimize extends RuleExecutor[LogicalPlan] {
@@ -67,4 +69,16 @@ class ComputeCurrentTimeSuite extends PlanTest {
     assert(lits(1) >= min && lits(1) <= max)
     assert(lits(0) == lits(1))
   }
+
+  test("SPARK-33469: Add current_timezone function") {
+    val in = Project(Seq(Alias(CurrentTimeZone(), "c")()), LocalRelation())
+    val plan = Optimize.execute(in.analyze).asInstanceOf[Project]
+    val lits = new scala.collection.mutable.ArrayBuffer[String]
+    plan.transformAllExpressions { case e: Literal =>
+      lits += e.value.asInstanceOf[UTF8String].toString
+      e
+    }
+    assert(lits.size == 1)
+    assert(lits.head == SQLConf.get.sessionLocalTimeZone)
+  }
 }
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index da83df4994d8d..0a54dff3a1cea 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -1,6 +1,6 @@
 <!-- Automatically generated by ExpressionsSchemaSuite -->
 ## Summary
-  - Number of queries: 341
+  - Number of queries: 342
   - Number of expressions that missing example: 13
   - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint,window
 ## Schema of Built-in Functions
@@ -86,6 +86,7 @@
 | org.apache.spark.sql.catalyst.expressions.CurrentCatalog | current_catalog | SELECT current_catalog() | struct<current_catalog():string> |
 | org.apache.spark.sql.catalyst.expressions.CurrentDatabase | current_database | SELECT current_database() | struct<current_database():string> |
 | org.apache.spark.sql.catalyst.expressions.CurrentDate | current_date | SELECT current_date() | struct<current_date():date> |
+| org.apache.spark.sql.catalyst.expressions.CurrentTimeZone | current_timezone | SELECT current_timezone() | struct<current_timezone():string> |
 | org.apache.spark.sql.catalyst.expressions.CurrentTimestamp | current_timestamp | SELECT current_timestamp() | struct<current_timestamp():timestamp> |
 | org.apache.spark.sql.catalyst.expressions.DateAdd | date_add | SELECT date_add('2016-07-30', 1) | struct<date_add(CAST(2016-07-30 AS DATE), 1):date> |
 | org.apache.spark.sql.catalyst.expressions.DateDiff | datediff | SELECT datediff('2009-07-31', '2009-07-30') | struct<datediff(CAST(2009-07-31 AS DATE), CAST(2009-07-30 AS DATE)):int> |
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 6a1378837ea9b..953a58760cd5c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -1947,6 +1947,14 @@ class DatasetSuite extends QueryTest
       df.where($"zoo".contains(Array('a', 'b'))),
       Seq(Row("abc")))
   }
+
+  test("SPARK-33469: Add current_timezone function") {
+    val df = Seq(1).toDF("c")
+    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "Asia/Shanghai") {
+      val timezone = df.selectExpr("current_timezone()").collect().head.getString(0)
+      assert(timezone == "Asia/Shanghai")
+    }
+  }
 }
 
 object AssertExecutionId {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
index 9f62ff8301ebc..6085c1f2cccb0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
@@ -149,6 +149,7 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
       "org.apache.spark.sql.catalyst.expressions.UnixTimestamp",
       "org.apache.spark.sql.catalyst.expressions.CurrentDate",
       "org.apache.spark.sql.catalyst.expressions.CurrentTimestamp",
+      "org.apache.spark.sql.catalyst.expressions.CurrentTimeZone",
       "org.apache.spark.sql.catalyst.expressions.Now",
       // Random output without a seed
       "org.apache.spark.sql.catalyst.expressions.Rand",

From df4a1c2256b71c9a1bd2006819135f56c99a2f21 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sun, 22 Nov 2020 16:40:54 -0800
Subject: [PATCH 0544/1009] [SPARK-33512][BUILD] Upgrade test libraries

### What changes were proposed in this pull request?

This PR aims to update the test libraries.
- ScalaTest: 3.2.0 -> 3.2.3
- JUnit: 4.12 -> 4.13.1
- Mockito: 3.1.0 -> 3.4.6
- JMock: 2.8.4 -> 2.12.0
- maven-surefire-plugin: 3.0.0-M3 -> 3.0.0-M5
- scala-maven-plugin: 4.3.0 -> 4.4.0

### Why are the changes needed?

This will make the test frameworks up-to-date for Apache Spark 3.1.0.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

Closes #30456 from dongjoon-hyun/SPARK-33512.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 pom.xml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pom.xml b/pom.xml
index 85cf5a00b0b24..0ab5a8c5b3efa 100644
--- a/pom.xml
+++ b/pom.xml
@@ -931,7 +931,7 @@
       <dependency>
         <groupId>org.scalatest</groupId>
         <artifactId>scalatest_${scala.binary.version}</artifactId>
-        <version>3.2.0</version>
+        <version>3.2.3</version>
         <scope>test</scope>
       </dependency>
       <dependency>
@@ -955,14 +955,14 @@
       <dependency>
         <groupId>org.mockito</groupId>
         <artifactId>mockito-core</artifactId>
-        <version>3.1.0</version>
+        <version>3.4.6</version>
         <scope>test</scope>
       </dependency>
       <dependency>
         <groupId>org.jmock</groupId>
         <artifactId>jmock-junit4</artifactId>
         <scope>test</scope>
-        <version>2.8.4</version>
+        <version>2.12.0</version>
       </dependency>
       <dependency>
         <groupId>org.scalacheck</groupId>
@@ -973,7 +973,7 @@
       <dependency>
         <groupId>junit</groupId>
         <artifactId>junit</artifactId>
-        <version>4.12</version>
+        <version>4.13.1</version>
         <scope>test</scope>
       </dependency>
       <dependency>
@@ -2498,7 +2498,7 @@
         <plugin>
           <groupId>net.alchim31.maven</groupId>
           <artifactId>scala-maven-plugin</artifactId>
-          <version>4.3.0</version>
+          <version>4.4.0</version>
           <executions>
             <execution>
               <id>eclipse-add-source</id>
@@ -2573,7 +2573,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-surefire-plugin</artifactId>
-          <version>3.0.0-M3</version>
+          <version>3.0.0-M5</version>
           <!-- Note config is repeated in scalatest config -->
           <configuration>
             <includes>

From a45923852342ce3f9454743a71740b09e6efe859 Mon Sep 17 00:00:00 2001
From: William Hyun <williamhyun3@gmail.com>
Date: Mon, 23 Nov 2020 10:38:40 +0900
Subject: [PATCH 0545/1009] [MINOR][INFRA] Suppress warning in check-license

### What changes were proposed in this pull request?
This PR aims to suppress the warning `File exists` in check-license

### Why are the changes needed?

**BEFORE**
```
% dev/check-license
Attempting to fetch rat
RAT checks passed.

% dev/check-license
mkdir: target: File exists
RAT checks passed.
```

**AFTER**
```
% dev/check-license
Attempting to fetch rat
RAT checks passed.

% dev/check-license
RAT checks passed.
```

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Manually do dev/check-license twice.

Closes #30460 from williamhyun/checklicense.

Authored-by: William Hyun <williamhyun3@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 dev/check-license | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/check-license b/dev/check-license
index 0cc17ffe55c67..bd255954d6db4 100755
--- a/dev/check-license
+++ b/dev/check-license
@@ -67,7 +67,7 @@ mkdir -p "$FWDIR"/lib
     exit 1
 }
 
-mkdir target
+mkdir -p target
 $java_cmd -jar "$rat_jar" -E "$FWDIR"/dev/.rat-excludes -d "$FWDIR" > target/rat-results.txt
 
 if [ $? -ne 0 ]; then

From aa78c05edc9cb910cca9fb14f7670559fe00c62d Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 23 Nov 2020 10:42:28 +0900
Subject: [PATCH 0546/1009] [SPARK-33427][SQL][FOLLOWUP] Put key and value into
 IdentityHashMap sequantially

### What changes were proposed in this pull request?

This follow-up fixes an issue when inserting key/value pairs into `IdentityHashMap` in `SubExprEvaluationRuntime`.

### Why are the changes needed?

The last commits to #30341 follows review comment to use `IdentityHashMap`. Because we leverage `IdentityHashMap` to compare keys in reference, we should not convert expression pairs to Scala map before inserting. Scala map compares keys by equality so we will loss keys with different references.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Run benchmark to verify.

Closes #30459 from viirya/SPARK-33427-map.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../SubExprEvaluationRuntime.scala            |  9 +++++---
 .../SubExprEvaluationRuntimeSuite.scala       | 22 +++++++++++++++++++
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntime.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntime.scala
index 3189d81289903..ff9c4cf3147d5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntime.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntime.scala
@@ -18,8 +18,6 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.util.IdentityHashMap
 
-import scala.collection.JavaConverters._
-
 import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache}
 import com.google.common.util.concurrent.{ExecutionError, UncheckedExecutionException}
 
@@ -98,7 +96,12 @@ class SubExprEvaluationRuntime(cacheMaxEntries: Int) {
       val proxy = ExpressionProxy(expr, proxyExpressionCurrentId, this)
       proxyExpressionCurrentId += 1
 
-      proxyMap.putAll(e.map(_ -> proxy).toMap.asJava)
+      // We leverage `IdentityHashMap` so we compare expression keys by reference here.
+      // So for example if there are one group of common exprs like Seq(common expr 1,
+      // common expr2, ..., common expr n), we will insert into `proxyMap` some key/value
+      // pairs like Map(common expr 1 -> proxy(common expr 1), ...,
+      // common expr n -> proxy(common expr 1)).
+      e.map(proxyMap.put(_, proxy))
     }
 
     // Only adding proxy if we find subexpressions.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala
index 64b619ca7766b..f8dca266a62d4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala
@@ -95,4 +95,26 @@ class SubExprEvaluationRuntimeSuite extends SparkFunSuite {
     })
     assert(proxys.isEmpty)
   }
+
+  test("SubExprEvaluationRuntime should wrap semantically equal exprs") {
+    val runtime = new SubExprEvaluationRuntime(1)
+
+    val one = Literal(1)
+    val two = Literal(2)
+    def mul: (Literal, Literal) => Expression =
+      (left: Literal, right: Literal) => Multiply(left, right)
+
+    val mul2_1 = Multiply(mul(one, two), mul(one, two))
+    val mul2_2 = Multiply(mul(one, two), mul(one, two))
+
+    val sqrt = Sqrt(mul2_1)
+    val sum = Add(mul2_2, sqrt)
+    val proxyExpressions = runtime.proxyExpressions(Seq(sum))
+    val proxys = proxyExpressions.flatMap(_.collect {
+      case p: ExpressionProxy => p
+    })
+    // ( (one * two) * (one * two) )
+    assert(proxys.size == 2)
+    assert(proxys.forall(_.child.semanticEquals(mul2_1)))
+  }
 }

From 0bb911d979955ac59adc39818667b616eb539103 Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Mon, 23 Nov 2020 15:19:34 +0900
Subject: [PATCH 0547/1009] [SPARK-33143][PYTHON] Add configurable timeout to
 python server and client

### What changes were proposed in this pull request?
Spark creates local server to serialize several type of data for python. The python code tries to connect to the server, immediately after it's created but there are several system calls in between (this may change in each Spark version):
* getaddrinfo
* socket
* settimeout
* connect

Under some circumstances in heavy user environments these calls can be super slow (more than 15 seconds). These issues must be analyzed one-by-one but since these are system calls the underlying OS and/or DNS servers must be debugged and fixed. This is not trivial task and at the same time data processing must work somehow. In this PR I'm only intended to add a configuration possibility to increase the mentioned timeouts in order to be able to provide temporary workaround. The rootcause analysis is ongoing but I think this can vary in each case.

Because the server part doesn't contain huge amount of log entries to with one can measure time, I've added some.

### Why are the changes needed?
Provide workaround when localhost python server connection timeout appears.

### Does this PR introduce _any_ user-facing change?
Yes, new configuration added.

### How was this patch tested?
Existing unit tests + manual test.
```
#Compile Spark

echo "spark.io.encryption.enabled true" >> conf/spark-defaults.conf
echo "spark.python.authenticate.socketTimeout 10" >> conf/spark-defaults.conf

$ ./bin/pyspark
Python 3.8.5 (default, Jul 21 2020, 10:48:26)
[Clang 11.0.3 (clang-1103.0.32.62)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
20/11/20 10:17:03 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
20/11/20 10:17:03 WARN SparkEnv: I/O encryption enabled without RPC encryption: keys will be visible on the wire.
Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /__ / .__/\_,_/_/ /_/\_\   version 3.1.0-SNAPSHOT
      /_/

Using Python version 3.8.5 (default, Jul 21 2020 10:48:26)
Spark context Web UI available at http://192.168.0.189:4040
Spark context available as 'sc' (master = local[*], app id = local-1605863824276).
SparkSession available as 'spark'.
>>> sc.setLogLevel("TRACE")
>>> sc.parallelize([0, 2, 3, 4, 6], 5).glom().collect()
20/11/20 10:17:09 TRACE PythonParallelizeServer: Creating listening socket
20/11/20 10:17:09 TRACE PythonParallelizeServer: Setting timeout to 10 sec
20/11/20 10:17:09 TRACE PythonParallelizeServer: Waiting for connection on port 59726
20/11/20 10:17:09 TRACE PythonParallelizeServer: Connection accepted from address /127.0.0.1:59727
20/11/20 10:17:09 TRACE PythonParallelizeServer: Client authenticated
20/11/20 10:17:09 TRACE PythonParallelizeServer: Closing server
...
20/11/20 10:17:10 TRACE SocketFuncServer: Creating listening socket
20/11/20 10:17:10 TRACE SocketFuncServer: Setting timeout to 10 sec
20/11/20 10:17:10 TRACE SocketFuncServer: Waiting for connection on port 59735
20/11/20 10:17:10 TRACE SocketFuncServer: Connection accepted from address /127.0.0.1:59736
20/11/20 10:17:10 TRACE SocketFuncServer: Client authenticated
20/11/20 10:17:10 TRACE SocketFuncServer: Closing server
[[0], [2], [3], [4], [6]]
>>>
```

Closes #30389 from gaborgsomogyi/SPARK-33143.

Lead-authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
Co-authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../apache/spark/api/python/PythonRunner.scala  |  2 ++
 .../apache/spark/api/python/PythonUtils.scala   |  4 ++++
 .../apache/spark/internal/config/Python.scala   |  6 ++++++
 .../spark/security/SocketAuthHelper.scala       |  2 +-
 .../spark/security/SocketAuthServer.scala       | 17 +++++++++++++----
 python/pyspark/context.py                       |  2 ++
 python/pyspark/java_gateway.py                  |  2 +-
 7 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
index 136da80d48dee..f49cb3c2b8836 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
@@ -80,6 +80,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
 
   private val conf = SparkEnv.get.conf
   protected val bufferSize: Int = conf.get(BUFFER_SIZE)
+  protected val authSocketTimeout = conf.get(PYTHON_AUTH_SOCKET_TIMEOUT)
   private val reuseWorker = conf.get(PYTHON_WORKER_REUSE)
   protected val simplifiedTraceback: Boolean = false
 
@@ -139,6 +140,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
     if (workerMemoryMb.isDefined) {
       envVars.put("PYSPARK_EXECUTOR_MEMORY_MB", workerMemoryMb.get.toString)
     }
+    envVars.put("SPARK_AUTH_SOCKET_TIMEOUT", authSocketTimeout.toString)
     envVars.put("SPARK_BUFFER_SIZE", bufferSize.toString)
     val worker: Socket = env.createPythonWorker(pythonExec, envVars.asScala.toMap)
     // Whether is the worker released into idle pool or closed. When any codes try to release or
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
index 527d0d6d3a48d..33849f6fcb65f 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
@@ -85,4 +85,8 @@ private[spark] object PythonUtils {
   def getBroadcastThreshold(sc: JavaSparkContext): Long = {
     sc.conf.get(org.apache.spark.internal.config.BROADCAST_FOR_UDF_COMPRESSION_THRESHOLD)
   }
+
+  def getPythonAuthSocketTimeout(sc: JavaSparkContext): Long = {
+    sc.conf.get(org.apache.spark.internal.config.Python.PYTHON_AUTH_SOCKET_TIMEOUT)
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/internal/config/Python.scala b/core/src/main/scala/org/apache/spark/internal/config/Python.scala
index 188d884319644..348a33e129d65 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/Python.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/Python.scala
@@ -50,4 +50,10 @@ private[spark] object Python {
     .version("2.4.0")
     .bytesConf(ByteUnit.MiB)
     .createOptional
+
+  val PYTHON_AUTH_SOCKET_TIMEOUT = ConfigBuilder("spark.python.authenticate.socketTimeout")
+    .internal()
+    .version("3.1.0")
+    .timeConf(TimeUnit.SECONDS)
+    .createWithDefaultString("15s")
 }
diff --git a/core/src/main/scala/org/apache/spark/security/SocketAuthHelper.scala b/core/src/main/scala/org/apache/spark/security/SocketAuthHelper.scala
index dbcb376905338..f800553c5388b 100644
--- a/core/src/main/scala/org/apache/spark/security/SocketAuthHelper.scala
+++ b/core/src/main/scala/org/apache/spark/security/SocketAuthHelper.scala
@@ -34,7 +34,7 @@ import org.apache.spark.util.Utils
  *
  * There's no secrecy, so this relies on the sockets being either local or somehow encrypted.
  */
-private[spark] class SocketAuthHelper(conf: SparkConf) {
+private[spark] class SocketAuthHelper(val conf: SparkConf) {
 
   val secret = Utils.createSecret(conf)
 
diff --git a/core/src/main/scala/org/apache/spark/security/SocketAuthServer.scala b/core/src/main/scala/org/apache/spark/security/SocketAuthServer.scala
index 548fd1b07ddc5..35990b5a59281 100644
--- a/core/src/main/scala/org/apache/spark/security/SocketAuthServer.scala
+++ b/core/src/main/scala/org/apache/spark/security/SocketAuthServer.scala
@@ -25,6 +25,8 @@ import scala.concurrent.duration.Duration
 import scala.util.Try
 
 import org.apache.spark.SparkEnv
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.Python.PYTHON_AUTH_SOCKET_TIMEOUT
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.util.{ThreadUtils, Utils}
 
@@ -34,11 +36,11 @@ import org.apache.spark.util.{ThreadUtils, Utils}
  * handling one batch of data, with authentication and error handling.
  *
  * The socket server can only accept one connection, or close if no connection
- * in 15 seconds.
+ * in configurable amount of seconds (default 15).
  */
 private[spark] abstract class SocketAuthServer[T](
     authHelper: SocketAuthHelper,
-    threadName: String) {
+    threadName: String) extends Logging {
 
   def this(env: SparkEnv, threadName: String) = this(new SocketAuthHelper(env.conf), threadName)
   def this(threadName: String) = this(SparkEnv.get, threadName)
@@ -46,19 +48,26 @@ private[spark] abstract class SocketAuthServer[T](
   private val promise = Promise[T]()
 
   private def startServer(): (Int, String) = {
+    logTrace("Creating listening socket")
     val serverSocket = new ServerSocket(0, 1, InetAddress.getByAddress(Array(127, 0, 0, 1)))
-    // Close the socket if no connection in 15 seconds
-    serverSocket.setSoTimeout(15000)
+    // Close the socket if no connection in the configured seconds
+    val timeout = authHelper.conf.get(PYTHON_AUTH_SOCKET_TIMEOUT).toInt
+    logTrace(s"Setting timeout to $timeout sec")
+    serverSocket.setSoTimeout(timeout * 1000)
 
     new Thread(threadName) {
       setDaemon(true)
       override def run(): Unit = {
         var sock: Socket = null
         try {
+          logTrace(s"Waiting for connection on port ${serverSocket.getLocalPort}")
           sock = serverSocket.accept()
+          logTrace(s"Connection accepted from address ${sock.getRemoteSocketAddress}")
           authHelper.authClient(sock)
+          logTrace("Client authenticated")
           promise.complete(Try(handleConnection(sock)))
         } finally {
+          logTrace("Closing server")
           JavaUtils.closeQuietly(serverSocket)
           JavaUtils.closeQuietly(sock)
         }
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 9c9e3f4b3c881..1bd5961e0525a 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -222,6 +222,8 @@ def _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize,
         # data via a socket.
         # scala's mangled names w/ $ in them require special treatment.
         self._encryption_enabled = self._jvm.PythonUtils.isEncryptionEnabled(self._jsc)
+        os.environ["SPARK_AUTH_SOCKET_TIMEOUT"] = \
+            str(self._jvm.PythonUtils.getPythonAuthSocketTimeout(self._jsc))
 
         self.pythonExec = os.environ.get("PYSPARK_PYTHON", 'python')
         self.pythonVer = "%d.%d" % sys.version_info[:2]
diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py
index eafa5d90f9ff8..fe2e326dff8be 100644
--- a/python/pyspark/java_gateway.py
+++ b/python/pyspark/java_gateway.py
@@ -201,7 +201,7 @@ def local_connect_and_auth(port, auth_secret):
         af, socktype, proto, _, sa = res
         try:
             sock = socket.socket(af, socktype, proto)
-            sock.settimeout(15)
+            sock.settimeout(int(os.environ.get("SPARK_AUTH_SOCKET_TIMEOUT", 15)))
             sock.connect(sa)
             sockfile = sock.makefile("rwb", int(os.environ.get("SPARK_BUFFER_SIZE", 65536)))
             _do_server_auth(sockfile, auth_secret)

From 84e70362dbf2bbebc7f1a1b734b99952d7e95e4d Mon Sep 17 00:00:00 2001
From: William Hyun <williamhyun3@gmail.com>
Date: Sun, 22 Nov 2020 22:56:59 -0800
Subject: [PATCH 0548/1009] [SPARK-33510][BUILD] Update SBT to 1.4.4

### What changes were proposed in this pull request?
This PR aims to update SBT from 1.4.2 to 1.4.4.

### Why are the changes needed?

This will bring the latest bug fixes.
- https://github.com/sbt/sbt/releases/tag/v1.4.3
- https://github.com/sbt/sbt/releases/tag/v1.4.4

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Pass the CIs.

Closes #30453 from williamhyun/sbt143.

Authored-by: William Hyun <williamhyun3@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/mima                 | 4 ++--
 project/build.properties | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/mima b/dev/mima
index f324c5c00a45c..d214bb96e09a3 100755
--- a/dev/mima
+++ b/dev/mima
@@ -25,8 +25,8 @@ FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
 cd "$FWDIR"
 
 SPARK_PROFILES=${1:-"-Pmesos -Pkubernetes -Pyarn -Pspark-ganglia-lgpl -Pkinesis-asl -Phive-thriftserver -Phive"}
-TOOLS_CLASSPATH="$(build/sbt -DcopyDependencies=false "export tools/fullClasspath" | tail -n1)"
-OLD_DEPS_CLASSPATH="$(build/sbt -DcopyDependencies=false $SPARK_PROFILES "export oldDeps/fullClasspath" | tail -n1)"
+TOOLS_CLASSPATH="$(build/sbt -DcopyDependencies=false "export tools/fullClasspath" | grep jar | tail -n1)"
+OLD_DEPS_CLASSPATH="$(build/sbt -DcopyDependencies=false $SPARK_PROFILES "export oldDeps/fullClasspath" | grep jar | tail -n1)"
 
 rm -f .generated-mima*
 
diff --git a/project/build.properties b/project/build.properties
index 5ec1d700fd2a8..c92de941c10be 100644
--- a/project/build.properties
+++ b/project/build.properties
@@ -14,4 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-sbt.version=1.4.2
+sbt.version=1.4.4

From c891e025b8ed34392fbc81e988b75bdbdb268c11 Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Mon, 23 Nov 2020 17:43:58 +0900
Subject: [PATCH 0549/1009] Revert "[SPARK-32481][CORE][SQL] Support truncate
 table to move data to trash"

### What changes were proposed in this pull request?

This reverts commit 065f17386d1851d732b4c1badf1ce2e14d0de338, which is not part of any released version. That is, this is an unreleased feature

### Why are the changes needed?

I like the concept of Trash, but I think this PR might just resolve a very specific issue by introducing a mechanism without a proper design doc. This could make the usage more complex.

I think we need to consider the big picture. Trash directory is an important concept. If we decide to introduce it, we should consider all the code paths of Spark SQL that could delete the data, instead of Truncate only. We also need to consider what is the current behavior if the underlying file system does not provide the API `Trash.moveToAppropriateTrash`. Is the exception good? How about the performance when users are using the object store instead of HDFS? Will it impact the GDPR compliance?

In sum, I think we should not merge the PR https://github.com/apache/spark/pull/29552 without the design doc and implementation plan. That is why I reverted it before the code freeze of Spark 3.1

### Does this PR introduce _any_ user-facing change?
Reverted the original commit

### How was this patch tested?
The existing tests.

Closes #30463 from gatorsmile/revertSpark-32481.

Authored-by: Xiao Li <gatorsmile@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../scala/org/apache/spark/util/Utils.scala   | 25 +-----
 .../apache/spark/sql/internal/SQLConf.scala   | 14 ----
 .../spark/sql/execution/command/tables.scala  |  4 +-
 .../sql/execution/command/DDLSuite.scala      | 78 -------------------
 4 files changed, 2 insertions(+), 119 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 6ccf65b737c1a..71a310a4279ad 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -50,7 +50,7 @@ import com.google.common.net.InetAddresses
 import org.apache.commons.codec.binary.Hex
 import org.apache.commons.lang3.SystemUtils
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FileSystem, FileUtil, Path, Trash}
+import org.apache.hadoop.fs.{FileSystem, FileUtil, Path}
 import org.apache.hadoop.io.compress.{CompressionCodecFactory, SplittableCompressionCodec}
 import org.apache.hadoop.security.UserGroupInformation
 import org.apache.hadoop.yarn.conf.YarnConfiguration
@@ -269,29 +269,6 @@ private[spark] object Utils extends Logging {
     file.setExecutable(true, true)
   }
 
-  /**
-   * Move data to trash if 'spark.sql.truncate.trash.enabled' is true, else
-   * delete the data permanently. If move data to trash failed fallback to hard deletion.
-   */
-  def moveToTrashOrDelete(
-      fs: FileSystem,
-      partitionPath: Path,
-      isTrashEnabled: Boolean,
-      hadoopConf: Configuration): Boolean = {
-    if (isTrashEnabled) {
-      logDebug(s"Try to move data ${partitionPath.toString} to trash")
-      val isSuccess = Trash.moveToAppropriateTrash(fs, partitionPath, hadoopConf)
-      if (!isSuccess) {
-        logWarning(s"Failed to move data ${partitionPath.toString} to trash. " +
-          "Fallback to hard deletion")
-        return fs.delete(partitionPath, true)
-      }
-      isSuccess
-    } else {
-      fs.delete(partitionPath, true)
-    }
-  }
-
   /**
    * Create a directory given the abstract pathname
    * @return true, if the directory is successfully created; otherwise, return false.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index fcf222c8fdab0..ef974dc176e51 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2913,18 +2913,6 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
-  val TRUNCATE_TRASH_ENABLED =
-    buildConf("spark.sql.truncate.trash.enabled")
-      .doc("This configuration decides when truncating table, whether data files will be moved " +
-        "to trash directory or deleted permanently. The trash retention time is controlled by " +
-        "'fs.trash.interval', and in default, the server side configuration value takes " +
-        "precedence over the client-side one. Note that if 'fs.trash.interval' is non-positive, " +
-        "this will be a no-op and log a warning message. If the data fails to be moved to "  +
-        "trash, Spark will turn to delete it permanently.")
-      .version("3.1.0")
-      .booleanConf
-      .createWithDefault(false)
-
   val DISABLED_JDBC_CONN_PROVIDER_LIST =
     buildConf("spark.sql.sources.disabledJdbcConnProviderList")
     .internal()
@@ -3577,8 +3565,6 @@ class SQLConf extends Serializable with Logging {
 
   def legacyPathOptionBehavior: Boolean = getConf(SQLConf.LEGACY_PATH_OPTION_BEHAVIOR)
 
-  def truncateTrashEnabled: Boolean = getConf(SQLConf.TRUNCATE_TRASH_ENABLED)
-
   def disabledJdbcConnectionProviders: String = getConf(SQLConf.DISABLED_JDBC_CONN_PROVIDER_LIST)
 
   /** ********************** SQLConf functionality methods ************ */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 206f952fed0ca..847052cd4fcde 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -48,7 +48,6 @@ import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetDataSourceV2
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.SchemaUtils
-import org.apache.spark.util.Utils
 
 /**
  * A command to create a table with the same definition of the given existing table.
@@ -490,7 +489,6 @@ case class TruncateTableCommand(
       }
     val hadoopConf = spark.sessionState.newHadoopConf()
     val ignorePermissionAcl = SQLConf.get.truncateTableIgnorePermissionAcl
-    val isTrashEnabled = SQLConf.get.truncateTrashEnabled
     locations.foreach { location =>
       if (location.isDefined) {
         val path = new Path(location.get)
@@ -515,7 +513,7 @@ case class TruncateTableCommand(
             }
           }
 
-          Utils.moveToTrashOrDelete(fs, path, isTrashEnabled, hadoopConf)
+          fs.delete(path, true)
 
           // We should keep original permission/acl of the path.
           // For owner/group, only super-user can set it, for example on HDFS. Because
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 9d0147048dbb8..43a33860d262e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -3104,84 +3104,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       assert(spark.sessionState.catalog.isRegisteredFunction(rand))
     }
   }
-
-  test("SPARK-32481 Move data to trash on truncate table if enabled") {
-    val trashIntervalKey = "fs.trash.interval"
-    withTable("tab1") {
-      withSQLConf(SQLConf.TRUNCATE_TRASH_ENABLED.key -> "true") {
-        sql("CREATE TABLE tab1 (col INT) USING parquet")
-        sql("INSERT INTO tab1 SELECT 1")
-        // scalastyle:off hadoopconfiguration
-        val hadoopConf = spark.sparkContext.hadoopConfiguration
-        // scalastyle:on hadoopconfiguration
-        val originalValue = hadoopConf.get(trashIntervalKey, "0")
-        val tablePath = new Path(spark.sessionState.catalog
-          .getTableMetadata(TableIdentifier("tab1")).storage.locationUri.get)
-
-        val fs = tablePath.getFileSystem(hadoopConf)
-        val trashCurrent = new Path(fs.getHomeDirectory, ".Trash/Current")
-        val trashPath = Path.mergePaths(trashCurrent, tablePath)
-        assume(
-          fs.mkdirs(trashPath) && fs.delete(trashPath, false),
-          "Trash directory could not be created, skipping.")
-        assert(!fs.exists(trashPath))
-        try {
-          hadoopConf.set(trashIntervalKey, "5")
-          sql("TRUNCATE TABLE tab1")
-        } finally {
-          hadoopConf.set(trashIntervalKey, originalValue)
-        }
-        assert(fs.exists(trashPath))
-        fs.delete(trashPath, true)
-      }
-    }
-  }
-
-  test("SPARK-32481 delete data permanently on truncate table if trash interval is non-positive") {
-    val trashIntervalKey = "fs.trash.interval"
-    withTable("tab1") {
-      withSQLConf(SQLConf.TRUNCATE_TRASH_ENABLED.key -> "true") {
-        sql("CREATE TABLE tab1 (col INT) USING parquet")
-        sql("INSERT INTO tab1 SELECT 1")
-        // scalastyle:off hadoopconfiguration
-        val hadoopConf = spark.sparkContext.hadoopConfiguration
-        // scalastyle:on hadoopconfiguration
-        val originalValue = hadoopConf.get(trashIntervalKey, "0")
-        val tablePath = new Path(spark.sessionState.catalog
-          .getTableMetadata(TableIdentifier("tab1")).storage.locationUri.get)
-
-        val fs = tablePath.getFileSystem(hadoopConf)
-        val trashCurrent = new Path(fs.getHomeDirectory, ".Trash/Current")
-        val trashPath = Path.mergePaths(trashCurrent, tablePath)
-        assert(!fs.exists(trashPath))
-        try {
-          hadoopConf.set(trashIntervalKey, "0")
-          sql("TRUNCATE TABLE tab1")
-        } finally {
-          hadoopConf.set(trashIntervalKey, originalValue)
-        }
-        assert(!fs.exists(trashPath))
-      }
-    }
-  }
-
-  test("SPARK-32481 Do not move data to trash on truncate table if disabled") {
-    withTable("tab1") {
-      withSQLConf(SQLConf.TRUNCATE_TRASH_ENABLED.key -> "false") {
-        sql("CREATE TABLE tab1 (col INT) USING parquet")
-        sql("INSERT INTO tab1 SELECT 1")
-        val hadoopConf = spark.sessionState.newHadoopConf()
-        val tablePath = new Path(spark.sessionState.catalog
-          .getTableMetadata(TableIdentifier("tab1")).storage.locationUri.get)
-
-        val fs = tablePath.getFileSystem(hadoopConf)
-        val trashCurrent = new Path(fs.getHomeDirectory, ".Trash/Current")
-        val trashPath = Path.mergePaths(trashCurrent, tablePath)
-        sql("TRUNCATE TABLE tab1")
-        assert(!fs.exists(trashPath))
-      }
-    }
-  }
 }
 
 object FakeLocalFsFileSystem {

From 60f3a730e4e67c3b67d6e45fb18f589ad66b07e6 Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Mon, 23 Nov 2020 08:54:00 +0000
Subject: [PATCH 0550/1009] [SPARK-33515][SQL] Improve exception messages while
 handling UnresolvedTable

### What changes were proposed in this pull request?

This PR proposes to improve the exception messages while `UnresolvedTable` is handled based on this suggestion: https://github.com/apache/spark/pull/30321#discussion_r521127001.

Currently, when an identifier is resolved to a view when a table is expected, the following exception message is displayed (e.g., for `COMMENT ON TABLE`):
```
v is a temp view not table.
```
After this PR, the message will be:
```
v is a temp view. 'COMMENT ON TABLE' expects a table.
```

Also, if an identifier is not resolved, the following exception message is currently used:
```
Table not found: t
```
After this PR, the message will be:
```
Table not found for 'COMMENT ON TABLE': t
```

### Why are the changes needed?

To improve the exception message.

### Does this PR introduce _any_ user-facing change?

Yes, the exception message will be changed as described above.

### How was this patch tested?

Updated existing tests.

Closes #30461 from imback82/unresolved_table_message.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/analysis/Analyzer.scala | 10 +++++-----
 .../sql/catalyst/analysis/CheckAnalysis.scala  |  2 +-
 .../catalyst/analysis/v2ResolutionPlans.scala  |  4 +++-
 .../spark/sql/catalyst/parser/AstBuilder.scala | 12 ++++++++----
 .../sql/catalyst/parser/DDLParserSuite.scala   | 18 +++++++++---------
 .../sql/connector/DataSourceV2SQLSuite.scala   |  3 ++-
 .../spark/sql/execution/SQLViewSuite.scala     |  8 ++++----
 .../sql/hive/execution/HiveDDLSuite.scala      |  4 ++--
 8 files changed, 34 insertions(+), 27 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 53c0ff687c6d2..837686420375a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -861,9 +861,9 @@ class Analyzer(override val catalogManager: CatalogManager)
             }.getOrElse(write)
           case _ => write
         }
-      case u @ UnresolvedTable(ident) =>
+      case u @ UnresolvedTable(ident, cmd) =>
         lookupTempView(ident).foreach { _ =>
-          u.failAnalysis(s"${ident.quoted} is a temp view not table.")
+          u.failAnalysis(s"${ident.quoted} is a temp view. '$cmd' expects a table")
         }
         u
       case u @ UnresolvedTableOrView(ident, allowTempView) =>
@@ -950,7 +950,7 @@ class Analyzer(override val catalogManager: CatalogManager)
             SubqueryAlias(catalog.get.name +: ident.namespace :+ ident.name, relation)
           }.getOrElse(u)
 
-      case u @ UnresolvedTable(NonSessionCatalogAndIdentifier(catalog, ident)) =>
+      case u @ UnresolvedTable(NonSessionCatalogAndIdentifier(catalog, ident), _) =>
         CatalogV2Util.loadTable(catalog, ident)
           .map(ResolvedTable(catalog.asTableCatalog, ident, _))
           .getOrElse(u)
@@ -1077,11 +1077,11 @@ class Analyzer(override val catalogManager: CatalogManager)
         lookupRelation(u.multipartIdentifier, u.options, u.isStreaming)
           .map(resolveViews).getOrElse(u)
 
-      case u @ UnresolvedTable(identifier) =>
+      case u @ UnresolvedTable(identifier, cmd) =>
         lookupTableOrView(identifier).map {
           case v: ResolvedView =>
             val viewStr = if (v.isTemp) "temp view" else "view"
-            u.failAnalysis(s"${v.identifier.quoted} is a $viewStr not table.")
+            u.failAnalysis(s"${v.identifier.quoted} is a $viewStr. '$cmd' expects a table.'")
           case table => table
         }.getOrElse(u)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 452ba80b23441..9998035d65c3f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -98,7 +98,7 @@ trait CheckAnalysis extends PredicateHelper {
         u.failAnalysis(s"Namespace not found: ${u.multipartIdentifier.quoted}")
 
       case u: UnresolvedTable =>
-        u.failAnalysis(s"Table not found: ${u.multipartIdentifier.quoted}")
+        u.failAnalysis(s"Table not found for '${u.commandName}': ${u.multipartIdentifier.quoted}")
 
       case u: UnresolvedTableOrView =>
         u.failAnalysis(s"Table or view not found: ${u.multipartIdentifier.quoted}")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
index 98bd84fb94bd6..0e883a88f2691 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
@@ -37,7 +37,9 @@ case class UnresolvedNamespace(multipartIdentifier: Seq[String]) extends LeafNod
  * Holds the name of a table that has yet to be looked up in a catalog. It will be resolved to
  * [[ResolvedTable]] during analysis.
  */
-case class UnresolvedTable(multipartIdentifier: Seq[String]) extends LeafNode {
+case class UnresolvedTable(
+    multipartIdentifier: Seq[String],
+    commandName: String) extends LeafNode {
   override lazy val resolved: Boolean = false
 
   override def output: Seq[Attribute] = Nil
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 23de8ab09dd0a..ea4baafbacede 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3303,7 +3303,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    */
   override def visitLoadData(ctx: LoadDataContext): LogicalPlan = withOrigin(ctx) {
     LoadData(
-      child = UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier)),
+      child = UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier), "LOAD DATA"),
       path = string(ctx.path),
       isLocal = ctx.LOCAL != null,
       isOverwrite = ctx.OVERWRITE != null,
@@ -3449,7 +3449,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
       UnresolvedPartitionSpec(spec, location)
     }
     AlterTableAddPartition(
-      UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier)),
+      UnresolvedTable(
+        visitMultipartIdentifier(ctx.multipartIdentifier),
+        "ALTER TABLE ... ADD PARTITION ..."),
       specsAndLocs.toSeq,
       ctx.EXISTS != null)
   }
@@ -3491,7 +3493,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
     val partSpecs = ctx.partitionSpec.asScala.map(visitNonOptionalPartitionSpec)
       .map(spec => UnresolvedPartitionSpec(spec))
     AlterTableDropPartition(
-      UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier)),
+      UnresolvedTable(
+        visitMultipartIdentifier(ctx.multipartIdentifier),
+        "ALTER TABLE ... DROP PARTITION ..."),
       partSpecs.toSeq,
       ifExists = ctx.EXISTS != null,
       purge = ctx.PURGE != null,
@@ -3720,6 +3724,6 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
       case _ => string(ctx.STRING)
     }
     val nameParts = visitMultipartIdentifier(ctx.multipartIdentifier)
-    CommentOnTable(UnresolvedTable(nameParts), comment)
+    CommentOnTable(UnresolvedTable(nameParts, "COMMENT ON TABLE"), comment)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index f93c0dcf59f4c..bd28484b23f46 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -1555,15 +1555,15 @@ class DDLParserSuite extends AnalysisTest {
   test("LOAD DATA INTO table") {
     comparePlans(
       parsePlan("LOAD DATA INPATH 'filepath' INTO TABLE a.b.c"),
-      LoadData(UnresolvedTable(Seq("a", "b", "c")), "filepath", false, false, None))
+      LoadData(UnresolvedTable(Seq("a", "b", "c"), "LOAD DATA"), "filepath", false, false, None))
 
     comparePlans(
       parsePlan("LOAD DATA LOCAL INPATH 'filepath' INTO TABLE a.b.c"),
-      LoadData(UnresolvedTable(Seq("a", "b", "c")), "filepath", true, false, None))
+      LoadData(UnresolvedTable(Seq("a", "b", "c"), "LOAD DATA"), "filepath", true, false, None))
 
     comparePlans(
       parsePlan("LOAD DATA LOCAL INPATH 'filepath' OVERWRITE INTO TABLE a.b.c"),
-      LoadData(UnresolvedTable(Seq("a", "b", "c")), "filepath", true, true, None))
+      LoadData(UnresolvedTable(Seq("a", "b", "c"), "LOAD DATA"), "filepath", true, true, None))
 
     comparePlans(
       parsePlan(
@@ -1572,7 +1572,7 @@ class DDLParserSuite extends AnalysisTest {
            |PARTITION(ds='2017-06-10')
          """.stripMargin),
       LoadData(
-        UnresolvedTable(Seq("a", "b", "c")),
+        UnresolvedTable(Seq("a", "b", "c"), "LOAD DATA"),
         "filepath",
         true,
         true,
@@ -1674,13 +1674,13 @@ class DDLParserSuite extends AnalysisTest {
     val parsed2 = parsePlan(sql2)
 
     val expected1 = AlterTableAddPartition(
-      UnresolvedTable(Seq("a", "b", "c")),
+      UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... ADD PARTITION ..."),
       Seq(
         UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us"), Some("location1")),
         UnresolvedPartitionSpec(Map("dt" -> "2009-09-09", "country" -> "uk"), None)),
       ifNotExists = true)
     val expected2 = AlterTableAddPartition(
-      UnresolvedTable(Seq("a", "b", "c")),
+      UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... ADD PARTITION ..."),
       Seq(UnresolvedPartitionSpec(Map("dt" -> "2008-08-08"), Some("loc"))),
       ifNotExists = false)
 
@@ -1747,7 +1747,7 @@ class DDLParserSuite extends AnalysisTest {
     assertUnsupported(sql2_view)
 
     val expected1_table = AlterTableDropPartition(
-      UnresolvedTable(Seq("table_name")),
+      UnresolvedTable(Seq("table_name"), "ALTER TABLE ... DROP PARTITION ..."),
       Seq(
         UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us")),
         UnresolvedPartitionSpec(Map("dt" -> "2009-09-09", "country" -> "uk"))),
@@ -1763,7 +1763,7 @@ class DDLParserSuite extends AnalysisTest {
 
     val sql3_table = "ALTER TABLE a.b.c DROP IF EXISTS PARTITION (ds='2017-06-10')"
     val expected3_table = AlterTableDropPartition(
-      UnresolvedTable(Seq("a", "b", "c")),
+      UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... DROP PARTITION ..."),
       Seq(UnresolvedPartitionSpec(Map("ds" -> "2017-06-10"))),
       ifExists = true,
       purge = false,
@@ -2174,7 +2174,7 @@ class DDLParserSuite extends AnalysisTest {
 
     comparePlans(
       parsePlan("COMMENT ON TABLE a.b.c IS 'xYz'"),
-      CommentOnTable(UnresolvedTable(Seq("a", "b", "c")), "xYz"))
+      CommentOnTable(UnresolvedTable(Seq("a", "b", "c"), "COMMENT ON TABLE"), "xYz"))
   }
 
   // TODO: ignored by SPARK-31707, restore the test after create table syntax unification
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 90df4ee08bfc0..da53936239de8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -2414,7 +2414,8 @@ class DataSourceV2SQLSuite
     withTempView("v") {
       sql("create global temp view v as select 1")
       val e = intercept[AnalysisException](sql("COMMENT ON TABLE global_temp.v IS NULL"))
-      assert(e.getMessage.contains("global_temp.v is a temp view not table."))
+      assert(e.getMessage.contains(
+        "global_temp.v is a temp view. 'COMMENT ON TABLE' expects a table"))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index 792f920ee0217..504cc57dc12d3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -147,10 +147,10 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         s"'$viewName' is a view not a table")
       assertAnalysisError(
         s"ALTER TABLE $viewName ADD IF NOT EXISTS PARTITION (a='4', b='8')",
-        s"$viewName is a temp view not table")
+        s"$viewName is a temp view. 'ALTER TABLE ... ADD PARTITION ...' expects a table")
       assertAnalysisError(
         s"ALTER TABLE $viewName DROP PARTITION (a='4', b='8')",
-        s"$viewName is a temp view not table")
+        s"$viewName is a temp view. 'ALTER TABLE ... DROP PARTITION ...' expects a table")
 
       // For the following v2 ALERT TABLE statements, unsupported operations are checked first
       // before resolving the relations.
@@ -175,7 +175,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
       val e2 = intercept[AnalysisException] {
         sql(s"""LOAD DATA LOCAL INPATH "$dataFilePath" INTO TABLE $viewName""")
       }.getMessage
-      assert(e2.contains(s"$viewName is a temp view not table"))
+      assert(e2.contains(s"$viewName is a temp view. 'LOAD DATA' expects a table"))
       assertNoSuchTable(s"TRUNCATE TABLE $viewName")
       val e3 = intercept[AnalysisException] {
         sql(s"SHOW CREATE TABLE $viewName")
@@ -214,7 +214,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
       e = intercept[AnalysisException] {
         sql(s"""LOAD DATA LOCAL INPATH "$dataFilePath" INTO TABLE $viewName""")
       }.getMessage
-      assert(e.contains("default.testView is a view not table"))
+      assert(e.contains("default.testView is a view. 'LOAD DATA' expects a table"))
 
       e = intercept[AnalysisException] {
         sql(s"TRUNCATE TABLE $viewName")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 1f15bd685b239..56b871644453b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -904,10 +904,10 @@ class HiveDDLSuite
 
         assertAnalysisError(
           s"ALTER TABLE $oldViewName ADD IF NOT EXISTS PARTITION (a='4', b='8')",
-          s"$oldViewName is a view not table")
+          s"$oldViewName is a view. 'ALTER TABLE ... ADD PARTITION ...' expects a table.")
         assertAnalysisError(
           s"ALTER TABLE $oldViewName DROP IF EXISTS PARTITION (a='2')",
-          s"$oldViewName is a view not table")
+          s"$oldViewName is a view. 'ALTER TABLE ... DROP PARTITION ...' expects a table.")
 
         assert(catalog.tableExists(TableIdentifier(tabName)))
         assert(catalog.tableExists(TableIdentifier(oldViewName)))

From 23e9920b3910e4f05269853429c7f18888cdc7b5 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 23 Nov 2020 09:00:41 +0000
Subject: [PATCH 0551/1009] [SPARK-33511][SQL] Respect case sensitivity while
 resolving V2 partition specs

### What changes were proposed in this pull request?
1. Pre-process partition specs in `ResolvePartitionSpec`, and convert partition names according to the partition schema and the SQL config `spark.sql.caseSensitive`. In the PR, I propose to invoke `normalizePartitionSpec` for that. The function is used in DSv1 commands, so, the behavior will be similar to DSv1.
2. Move `normalizePartitionSpec()` from `sql/core/.../datasources/PartitioningUtils` to `sql/catalyst/.../util/PartitioningUtils` to use it in Catalyst's rule `ResolvePartitionSpec`

### Why are the changes needed?
DSv1 commands like `ALTER TABLE .. ADD PARTITION` and `ALTER TABLE .. DROP PARTITION` respect the SQL config `spark.sql.caseSensitive` while resolving partition specs. For example:
```sql
spark-sql> CREATE TABLE tbl1 (id bigint, data string) USING parquet PARTITIONED BY (id);
spark-sql> ALTER TABLE tbl1 ADD PARTITION (ID=1);
spark-sql> SHOW PARTITIONS tbl1;
id=1
```
The same command fails on V2 Table catalog with error:
```
AnalysisException: Partition key ID not exists
```

### Does this PR introduce _any_ user-facing change?
Yes. After the changes, partition spec resolution works as for DSv1 (without the exception showed above).

### How was this patch tested?
By running `AlterTablePartitionV2SQLSuite`.

Closes #30454 from MaxGekk/partition-spec-case-sensitivity.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../analysis/ResolvePartitionSpec.scala       | 27 +++++++----
 .../spark/sql/util/PartitioningUtils.scala    | 47 +++++++++++++++++++
 .../command/AnalyzePartitionCommand.scala     |  2 +-
 .../spark/sql/execution/command/ddl.scala     |  3 +-
 .../spark/sql/execution/command/tables.scala  |  3 +-
 .../datasources/PartitioningUtils.scala       | 26 +---------
 .../sql/execution/datasources/rules.scala     |  3 +-
 .../AlterTablePartitionV2SQLSuite.scala       | 26 ++++++++++
 8 files changed, 98 insertions(+), 39 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/util/PartitioningUtils.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
index 5e19a32968992..531d40f431dee 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{AlterTableAddPartition, Alte
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.connector.catalog.SupportsPartitionManagement
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.PartitioningUtils.normalizePartitionSpec
 
 /**
  * Resolve [[UnresolvedPartitionSpec]] to [[ResolvedPartitionSpec]] in partition related commands.
@@ -33,32 +34,38 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     case r @ AlterTableAddPartition(
         ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs, _) =>
-      r.copy(parts = resolvePartitionSpecs(partSpecs, table.partitionSchema()))
+      r.copy(parts = resolvePartitionSpecs(table.name, partSpecs, table.partitionSchema()))
 
     case r @ AlterTableDropPartition(
         ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs, _, _, _) =>
-      r.copy(parts = resolvePartitionSpecs(partSpecs, table.partitionSchema()))
+      r.copy(parts = resolvePartitionSpecs(table.name, partSpecs, table.partitionSchema()))
   }
 
   private def resolvePartitionSpecs(
-      partSpecs: Seq[PartitionSpec], partSchema: StructType): Seq[ResolvedPartitionSpec] =
+      tableName: String,
+      partSpecs: Seq[PartitionSpec],
+      partSchema: StructType): Seq[ResolvedPartitionSpec] =
     partSpecs.map {
       case unresolvedPartSpec: UnresolvedPartitionSpec =>
         ResolvedPartitionSpec(
-          convertToPartIdent(unresolvedPartSpec.spec, partSchema), unresolvedPartSpec.location)
+          convertToPartIdent(tableName, unresolvedPartSpec.spec, partSchema),
+          unresolvedPartSpec.location)
       case resolvedPartitionSpec: ResolvedPartitionSpec =>
         resolvedPartitionSpec
     }
 
   private def convertToPartIdent(
-      partSpec: TablePartitionSpec, partSchema: StructType): InternalRow = {
-    val conflictKeys = partSpec.keys.toSeq.diff(partSchema.map(_.name))
-    if (conflictKeys.nonEmpty) {
-      throw new AnalysisException(s"Partition key ${conflictKeys.mkString(",")} not exists")
-    }
+      tableName: String,
+      partitionSpec: TablePartitionSpec,
+      partSchema: StructType): InternalRow = {
+    val normalizedSpec = normalizePartitionSpec(
+      partitionSpec,
+      partSchema.map(_.name),
+      tableName,
+      conf.resolver)
 
     val partValues = partSchema.map { part =>
-      val partValue = partSpec.get(part.name).orNull
+      val partValue = normalizedSpec.get(part.name).orNull
       if (partValue == null) {
         null
       } else {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/PartitioningUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/PartitioningUtils.scala
new file mode 100644
index 0000000000000..586aa6c59164f
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/PartitioningUtils.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.util
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis.Resolver
+
+object PartitioningUtils {
+  /**
+   * Normalize the column names in partition specification, w.r.t. the real partition column names
+   * and case sensitivity. e.g., if the partition spec has a column named `monTh`, and there is a
+   * partition column named `month`, and it's case insensitive, we will normalize `monTh` to
+   * `month`.
+   */
+  def normalizePartitionSpec[T](
+      partitionSpec: Map[String, T],
+      partColNames: Seq[String],
+      tblName: String,
+      resolver: Resolver): Map[String, T] = {
+    val normalizedPartSpec = partitionSpec.toSeq.map { case (key, value) =>
+      val normalizedKey = partColNames.find(resolver(_, key)).getOrElse {
+        throw new AnalysisException(s"$key is not a valid partition column in table $tblName.")
+      }
+      normalizedKey -> value
+    }
+
+    SchemaUtils.checkColumnNameDuplication(
+      normalizedPartSpec.map(_._1), "in the partition schema", resolver)
+
+    normalizedPartSpec.toMap
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzePartitionCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzePartitionCommand.scala
index fc62dce5002b1..0b265bfb63e3e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzePartitionCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzePartitionCommand.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, Unresol
 import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, ExternalCatalogUtils}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{And, EqualTo, Literal}
-import org.apache.spark.sql.execution.datasources.PartitioningUtils
+import org.apache.spark.sql.util.PartitioningUtils
 
 /**
  * Analyzes a given set of partitions to generate per-partition statistics, which will be used in
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index d550fe270c753..27ad62026c9b5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -39,11 +39,12 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.connector.catalog.{CatalogV2Util, TableCatalog}
 import org.apache.spark.sql.connector.catalog.SupportsNamespaces._
-import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, PartitioningUtils}
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
 import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
 import org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.PartitioningUtils
 import org.apache.spark.util.{SerializableConfiguration, ThreadUtils}
 
 // Note: The definition of these commands are based on the ones described in
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 847052cd4fcde..bd238948aab02 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -37,7 +37,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier, CaseInsensitiveMap}
-import org.apache.spark.sql.execution.datasources.{DataSource, PartitioningUtils}
+import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
 import org.apache.spark.sql.execution.datasources.json.JsonFileFormat
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
@@ -47,6 +47,7 @@ import org.apache.spark.sql.execution.datasources.v2.orc.OrcDataSourceV2
 import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetDataSourceV2
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.PartitioningUtils
 import org.apache.spark.sql.util.SchemaUtils
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index 796c23c7337d8..ea437d200eaab 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -30,7 +30,7 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.{Resolver, TypeCoercion}
+import org.apache.spark.sql.catalyst.analysis.TypeCoercion
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Literal}
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateFormatter, DateTimeUtils, TimestampFormatter}
@@ -357,30 +357,6 @@ object PartitioningUtils {
     getPathFragment(spec, StructType.fromAttributes(partitionColumns))
   }
 
-  /**
-   * Normalize the column names in partition specification, w.r.t. the real partition column names
-   * and case sensitivity. e.g., if the partition spec has a column named `monTh`, and there is a
-   * partition column named `month`, and it's case insensitive, we will normalize `monTh` to
-   * `month`.
-   */
-  def normalizePartitionSpec[T](
-      partitionSpec: Map[String, T],
-      partColNames: Seq[String],
-      tblName: String,
-      resolver: Resolver): Map[String, T] = {
-    val normalizedPartSpec = partitionSpec.toSeq.map { case (key, value) =>
-      val normalizedKey = partColNames.find(resolver(_, key)).getOrElse {
-        throw new AnalysisException(s"$key is not a valid partition column in table $tblName.")
-      }
-      normalizedKey -> value
-    }
-
-    SchemaUtils.checkColumnNameDuplication(
-      normalizedPartSpec.map(_._1), "in the partition schema", resolver)
-
-    normalizedPartSpec.toMap
-  }
-
   /**
    * Resolves possible type conflicts between partitions by up-casting "lower" types using
    * [[findWiderTypeForPartitionColumn]].
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index 3a2a642b870f8..9e65b0ce13693 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
 import org.apache.spark.sql.sources.InsertableRelation
 import org.apache.spark.sql.types.{AtomicType, StructType}
+import org.apache.spark.sql.util.PartitioningUtils.normalizePartitionSpec
 import org.apache.spark.sql.util.SchemaUtils
 
 /**
@@ -386,7 +387,7 @@ object PreprocessTableInsertion extends Rule[LogicalPlan] {
       partColNames: Seq[String],
       catalogTable: Option[CatalogTable]): InsertIntoStatement = {
 
-    val normalizedPartSpec = PartitioningUtils.normalizePartitionSpec(
+    val normalizedPartSpec = normalizePartitionSpec(
       insert.partitionSpec, partColNames, tblName, conf.resolver)
 
     val staticPartCols = normalizedPartSpec.filter(_._2.isDefined).keySet
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
index 107d0ea47249d..e05c2c09ace2a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
@@ -22,6 +22,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionsException, PartitionsAlreadyExistException}
 import org.apache.spark.sql.connector.catalog.{CatalogV2Implicits, Identifier}
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits
+import org.apache.spark.sql.internal.SQLConf
 
 class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase {
 
@@ -159,4 +160,29 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase {
       assert(partTable.asPartitionable.listPartitionIdentifiers(InternalRow.empty).isEmpty)
     }
   }
+
+  test("case sensitivity in resolving partition specs") {
+    val t = "testpart.ns1.ns2.tbl"
+    withTable(t) {
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+        val errMsg = intercept[AnalysisException] {
+          spark.sql(s"ALTER TABLE $t ADD PARTITION (ID=1) LOCATION 'loc1'")
+        }.getMessage
+        assert(errMsg.contains(s"ID is not a valid partition column in table $t"))
+      }
+
+      val partTable = catalog("testpart").asTableCatalog
+        .loadTable(Identifier.of(Array("ns1", "ns2"), "tbl"))
+        .asPartitionable
+      assert(!partTable.partitionExists(InternalRow.fromSeq(Seq(1))))
+
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+        spark.sql(s"ALTER TABLE $t ADD PARTITION (ID=1) LOCATION 'loc1'")
+        assert(partTable.partitionExists(InternalRow.fromSeq(Seq(1))))
+        spark.sql(s"ALTER TABLE $t DROP PARTITION (Id=1)")
+        assert(!partTable.partitionExists(InternalRow.fromSeq(Seq(1))))
+      }
+    }
+  }
 }

From f83fcb12543049672a54ef5b582d58817e2ee5d3 Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Mon, 23 Nov 2020 14:54:44 +0000
Subject: [PATCH 0552/1009] [SPARK-33278][SQL][FOLLOWUP] Improve
 OptimizeWindowFunctions to avoid transfer first to nth_value

### What changes were proposed in this pull request?
https://github.com/apache/spark/pull/30178 provided `OptimizeWindowFunctions` used to transfer `first` to `nth_value`.
If the window frame is `UNBOUNDED PRECEDING AND CURRENT ROW` or `UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING`, `nth_value` has better performance than `first`.
But the `OptimizeWindowFunctions` need to exclude other window frame.

### Why are the changes needed?
 Improve `OptimizeWindowFunctions` to avoid transfer `first` to `nth_value` if the specified window frame isn't `UNBOUNDED PRECEDING AND CURRENT ROW` or `UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING`.

### Does this PR introduce _any_ user-facing change?
'No'.

### How was this patch tested?
Jenkins test.

Closes #30419 from beliefer/SPARK-33278_followup.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: beliefer <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/optimizer/Optimizer.scala    |  9 +++--
 .../OptimizeWindowFunctionsSuite.scala        | 33 +++++++++++++++++--
 2 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index c4b9936fa4c4f..9eee7c2b914a4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -811,9 +811,12 @@ object CollapseRepartition extends Rule[LogicalPlan] {
  */
 object OptimizeWindowFunctions extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions {
-    case we @ WindowExpression(AggregateExpression(first: First, _, _, _, _), spec)
-      if spec.orderSpec.nonEmpty &&
-        spec.frameSpecification.asInstanceOf[SpecifiedWindowFrame].frameType == RowFrame =>
+    case we @ WindowExpression(AggregateExpression(first: First, _, _, _, _),
+        WindowSpecDefinition(_, orderSpec, frameSpecification: SpecifiedWindowFrame))
+        if orderSpec.nonEmpty && frameSpecification.frameType == RowFrame &&
+          frameSpecification.lower == UnboundedPreceding &&
+          (frameSpecification.upper == UnboundedFollowing ||
+            frameSpecification.upper == CurrentRow) =>
       we.copy(windowFunction = NthValue(first.child, Literal(1), first.ignoreNulls))
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWindowFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWindowFunctionsSuite.scala
index 389aaeafe655f..cf850bbe21ce6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWindowFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWindowFunctionsSuite.scala
@@ -36,7 +36,7 @@ class OptimizeWindowFunctionsSuite extends PlanTest {
   val b = testRelation.output(1)
   val c = testRelation.output(2)
 
-  test("replace first(col) by nth_value(col, 1)") {
+  test("replace first by nth_value if frame is UNBOUNDED PRECEDING AND CURRENT ROW") {
     val inputPlan = testRelation.select(
       WindowExpression(
         First(a, false).toAggregateExpression(),
@@ -52,7 +52,34 @@ class OptimizeWindowFunctionsSuite extends PlanTest {
     assert(optimized == correctAnswer)
   }
 
-  test("can't replace first(col) by nth_value(col, 1) if the window frame type is range") {
+  test("replace first by nth_value if frame is UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING") {
+    val inputPlan = testRelation.select(
+      WindowExpression(
+        First(a, false).toAggregateExpression(),
+        WindowSpecDefinition(b :: Nil, c.asc :: Nil,
+          SpecifiedWindowFrame(RowFrame, UnboundedPreceding, UnboundedFollowing))))
+    val correctAnswer = testRelation.select(
+      WindowExpression(
+        NthValue(a, Literal(1), false),
+        WindowSpecDefinition(b :: Nil, c.asc :: Nil,
+          SpecifiedWindowFrame(RowFrame, UnboundedPreceding, UnboundedFollowing))))
+
+    val optimized = Optimize.execute(inputPlan)
+    assert(optimized == correctAnswer)
+  }
+
+  test("can't replace first by nth_value if frame is not suitable") {
+    val inputPlan = testRelation.select(
+      WindowExpression(
+        First(a, false).toAggregateExpression(),
+        WindowSpecDefinition(b :: Nil, c.asc :: Nil,
+          SpecifiedWindowFrame(RowFrame, Literal(1), CurrentRow))))
+
+    val optimized = Optimize.execute(inputPlan)
+    assert(optimized == inputPlan)
+  }
+
+  test("can't replace first by nth_value if the window frame type is range") {
     val inputPlan = testRelation.select(
       WindowExpression(
         First(a, false).toAggregateExpression(),
@@ -63,7 +90,7 @@ class OptimizeWindowFunctionsSuite extends PlanTest {
     assert(optimized == inputPlan)
   }
 
-  test("can't replace first(col) by nth_value(col, 1) if the window frame isn't ordered") {
+  test("can't replace first by nth_value if the window frame isn't ordered") {
     val inputPlan = testRelation.select(
       WindowExpression(
         First(a, false).toAggregateExpression(),

From 1bd897cbc4fe30eb8b7740c7232aae87081e8e33 Mon Sep 17 00:00:00 2001
From: Ye Zhou <yezhou@linkedin.com>
Date: Mon, 23 Nov 2020 15:16:20 -0600
Subject: [PATCH 0553/1009] [SPARK-32918][SHUFFLE] RPC implementation to
 support control plane coordination for push-based shuffle

### What changes were proposed in this pull request?
This is one of the patches for SPIP SPARK-30602 which is needed for push-based shuffle.
Summary of changes:
This PR introduces a new RPC to be called within Driver. When the expected shuffle push wait time reaches, Driver will call this RPC to facilitate coordination of shuffle map/reduce stages and notify external shuffle services to finalize shuffle block merge for a given shuffle. Shuffle services also respond back the metadata about a merged shuffle partition back to the caller.

### Why are the changes needed?
Refer to the SPIP in SPARK-30602.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
This code snippets won't be called by any existing code and will be tested after the coordinated driver changes gets merged in SPARK-32920.

Lead-authored-by: Min Shen mshenlinkedin.com

Closes #30163 from zhouyejoe/SPARK-32918.

Lead-authored-by: Ye Zhou <yezhou@linkedin.com>
Co-authored-by: Min Shen <mshen@linkedin.com>
Signed-off-by: Mridul Muralidharan <mridul<at>gmail.com>
---
 .../network/shuffle/BlockStoreClient.java     | 22 ++++++++++
 .../shuffle/ExternalBlockStoreClient.java     | 29 +++++++++++++
 .../shuffle/MergeFinalizerListener.java       | 43 +++++++++++++++++++
 3 files changed, 94 insertions(+)
 create mode 100644 common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergeFinalizerListener.java

diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java
index 37befcd4b67fa..a6bdc13e93234 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java
@@ -147,6 +147,8 @@ public void onFailure(Throwable t) {
    * @param blockIds block ids to be pushed
    * @param buffers buffers to be pushed
    * @param listener the listener to receive block push status.
+   *
+   * @since 3.1.0
    */
   public void pushBlocks(
       String host,
@@ -156,4 +158,24 @@ public void pushBlocks(
       BlockFetchingListener listener) {
     throw new UnsupportedOperationException();
   }
+
+  /**
+   * Invoked by Spark driver to notify external shuffle services to finalize the shuffle merge
+   * for a given shuffle. This allows the driver to start the shuffle reducer stage after properly
+   * finishing the shuffle merge process associated with the shuffle mapper stage.
+   *
+   * @param host host of shuffle server
+   * @param port port of shuffle server.
+   * @param shuffleId shuffle ID of the shuffle to be finalized
+   * @param listener the listener to receive MergeStatuses
+   *
+   * @since 3.1.0
+   */
+  public void finalizeShuffleMerge(
+      String host,
+      int port,
+      int shuffleId,
+      MergeFinalizerListener listener) {
+    throw new UnsupportedOperationException();
+  }
 }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java
index eca35ed290467..56c06e640acda 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java
@@ -158,6 +158,35 @@ public void pushBlocks(
     }
   }
 
+  @Override
+  public void finalizeShuffleMerge(
+      String host,
+      int port,
+      int shuffleId,
+      MergeFinalizerListener listener) {
+    checkInit();
+    try {
+      TransportClient client = clientFactory.createClient(host, port);
+      ByteBuffer finalizeShuffleMerge = new FinalizeShuffleMerge(appId, shuffleId).toByteBuffer();
+      client.sendRpc(finalizeShuffleMerge, new RpcResponseCallback() {
+        @Override
+        public void onSuccess(ByteBuffer response) {
+          listener.onShuffleMergeSuccess(
+            (MergeStatuses) BlockTransferMessage.Decoder.fromByteBuffer(response));
+        }
+
+        @Override
+        public void onFailure(Throwable e) {
+          listener.onShuffleMergeFailure(e);
+        }
+      });
+    } catch (Exception e) {
+      logger.error("Exception while sending finalizeShuffleMerge request to {}:{}",
+        host, port, e);
+      listener.onShuffleMergeFailure(e);
+    }
+  }
+
   @Override
   public MetricSet shuffleMetrics() {
     checkInit();
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergeFinalizerListener.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergeFinalizerListener.java
new file mode 100644
index 0000000000000..08e13eea9f40d
--- /dev/null
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergeFinalizerListener.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle;
+
+import java.util.EventListener;
+
+import org.apache.spark.network.shuffle.protocol.MergeStatuses;
+
+/**
+ * :: DeveloperApi ::
+ *
+ * Listener providing a callback function to invoke when driver receives the response for the
+ * finalize shuffle merge request sent to remote shuffle service.
+ *
+ * @since 3.1.0
+ */
+public interface MergeFinalizerListener extends EventListener {
+  /**
+   * Called once upon successful response on finalize shuffle merge on a remote shuffle service.
+   * The returned {@link MergeStatuses} is passed to the listener for further processing
+   */
+  void onShuffleMergeSuccess(MergeStatuses statuses);
+
+  /**
+   * Called once upon failure response on finalize shuffle merge on a remote shuffle service.
+   */
+  void onShuffleMergeFailure(Throwable e);
+}

From 05921814e2349e1acecb14a365e6d47ffb0d68e8 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Tue, 24 Nov 2020 09:27:44 +0900
Subject: [PATCH 0554/1009] [SPARK-33479][DOC][FOLLOWUP] DocSearch: Support
 filtering search results by version

### What changes were proposed in this pull request?

In the discussion https://github.com/apache/spark/pull/30292#issuecomment-725613417, we planned to apply a new API key for each Spark release. However, it turns that DocSearch supports crawling multiple URLs from one website and filtering by fact key: https://docsearch.algolia.com/docs/config-file/#using-regular-expressions

Thanks to the help from shortcuts, our Spark doc supports multiple version now: https://github.com/algolia/docsearch-configs/pull/2868

This PR is to add the fact key in the search script and update the instruction in the comment.

### Why are the changes needed?

To support filtering Spark documentation search results by the current document version.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Manual test

Closes #30469 from gengliangwang/apiKeyFollowUp.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 docs/_config.yml | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/docs/_config.yml b/docs/_config.yml
index cd341063a1f92..026b3dd804690 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -26,15 +26,20 @@ SCALA_VERSION: "2.12.10"
 MESOS_VERSION: 1.0.0
 SPARK_ISSUE_TRACKER_URL: https://issues.apache.org/jira/browse/SPARK
 SPARK_GITHUB_URL: https://github.com/apache/spark
-# Before a new release, we should apply a new `apiKey` for the new Spark documentation
-# on https://docsearch.algolia.com/. Otherwise, after release, the search results are always based
-# on the latest documentation(https://spark.apache.org/docs/latest/) even when visiting the
-# documentation of previous releases.
+# Before a new release, we should:
+#   1. update the `version` array for the new Spark documentation
+#      on https://github.com/algolia/docsearch-configs/blob/master/configs/apache_spark.json.
+#   2. update the value of `facetFilters.version` in `algoliaOptions` on the new release branch.
+# Otherwise, after release, the search results are always based on the latest documentation
+# (https://spark.apache.org/docs/latest/) even when visiting the documentation of previous releases.
 DOCSEARCH_SCRIPT: |
   docsearch({
       apiKey: 'b18ca3732c502995563043aa17bc6ecb',
       indexName: 'apache_spark',
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
+      algoliaOptions: {
+        'facetFilters': ["version:latest"]
+      },
       debug: false // Set debug to true if you want to inspect the dropdown
   });

From 3ce4ab545bfc28db7df2c559726b887b0c8c33b7 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 23 Nov 2020 16:28:43 -0800
Subject: [PATCH 0555/1009] [SPARK-33513][BUILD] Upgrade to Scala 2.13.4 to
 improve exhaustivity

### What changes were proposed in this pull request?

This PR aims the followings.
1. Upgrade from Scala 2.13.3 to 2.13.4 for Apache Spark 3.1
2. Fix exhaustivity issues in both Scala 2.12/2.13 (Scala 2.13.4 requires this for compilation.)
3. Enforce the improved exhaustive check by using the existing Scala 2.13 GitHub Action compilation job.

### Why are the changes needed?

Scala 2.13.4 is a maintenance release for 2.13 line and improves JDK 15 support.
- https://github.com/scala/scala/releases/tag/v2.13.4

Also, it improves exhaustivity check.
- https://github.com/scala/scala/pull/9140 (Check exhaustivity of pattern matches with "if" guards and custom extractors)
- https://github.com/scala/scala/pull/9147 (Check all bindings exhaustively, e.g. tuples components)

### Does this PR introduce _any_ user-facing change?

Yep. Although it's a maintenance version change, it's a Scala version change.

### How was this patch tested?

Pass the CIs and do the manual testing.
- Scala 2.12 CI jobs(GitHub Action/Jenkins UT/Jenkins K8s IT) to check the validity of code change.
- Scala 2.13 Compilation job to check the compilation

Closes #30455 from dongjoon-hyun/SCALA_3.13.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../scala/org/apache/spark/storage/StorageUtils.scala     | 2 +-
 .../main/scala/org/apache/spark/util/JsonProtocol.scala   | 8 ++++----
 .../src/main/scala/org/apache/spark/ml/linalg/BLAS.scala  | 2 ++
 .../org/apache/spark/ml/feature/RFormulaParser.scala      | 6 +++++-
 .../org/apache/spark/ml/feature/StandardScaler.scala      | 2 ++
 .../org/apache/spark/ml/linalg/JsonMatrixConverter.scala  | 2 ++
 .../org/apache/spark/ml/linalg/JsonVectorConverter.scala  | 2 ++
 .../main/scala/org/apache/spark/ml/linalg/VectorUDT.scala | 2 ++
 .../spark/ml/optim/aggregator/HingeAggregator.scala       | 3 +++
 .../spark/ml/optim/aggregator/LogisticAggregator.scala    | 3 +++
 .../scala/org/apache/spark/ml/util/Instrumentation.scala  | 2 ++
 .../org/apache/spark/mllib/feature/StandardScaler.scala   | 2 ++
 .../main/scala/org/apache/spark/mllib/linalg/BLAS.scala   | 2 ++
 .../scala/org/apache/spark/mllib/linalg/Vectors.scala     | 2 ++
 .../spark/mllib/linalg/distributed/IndexedRowMatrix.scala | 4 ++++
 .../apache/spark/mllib/linalg/distributed/RowMatrix.scala | 2 ++
 pom.xml                                                   | 2 +-
 .../scheduler/cluster/mesos/MesosSchedulerUtils.scala     | 2 +-
 .../mesos/MesosFineGrainedSchedulerBackendSuite.scala     | 2 +-
 .../spark/sql/catalyst/expressions/jsonExpressions.scala  | 2 +-
 .../apache/spark/sql/catalyst/expressions/literals.scala  | 4 +++-
 .../spark/sql/catalyst/expressions/objects/objects.scala  | 2 +-
 .../apache/spark/sql/catalyst/json/JsonInferSchema.scala  | 3 +++
 .../sql/catalyst/optimizer/StarSchemaDetection.scala      | 6 +++---
 .../apache/spark/sql/catalyst/optimizer/expressions.scala | 1 +
 .../org/apache/spark/sql/catalyst/parser/AstBuilder.scala | 2 ++
 .../catalyst/plans/logical/basicLogicalOperators.scala    | 2 +-
 .../apache/spark/sql/catalyst/util/GenericArrayData.scala | 2 +-
 .../spark/sql/catalyst/planning/ScanOperationSuite.scala  | 5 +++++
 .../sql/catalyst/util/ArrayDataIndexedSeqSuite.scala      | 2 +-
 .../org/apache/spark/sql/execution/SparkSqlParser.scala   | 6 +++---
 .../spark/sql/execution/aggregate/BaseAggregateExec.scala | 2 +-
 .../spark/sql/execution/window/WindowExecBase.scala       | 6 ++++++
 .../scala/org/apache/spark/sql/hive/HiveInspectors.scala  | 1 +
 .../spark/streaming/util/FileBasedWriteAheadLog.scala     | 2 +-
 35 files changed, 77 insertions(+), 23 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala b/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
index 147731a0fb547..c607fb28b2f56 100644
--- a/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
+++ b/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
@@ -169,7 +169,7 @@ private[spark] class StorageStatus(
           .getOrElse((0L, 0L))
       case _ if !level.useOffHeap =>
         (_nonRddStorageInfo.onHeapUsage, _nonRddStorageInfo.diskUsage)
-      case _ if level.useOffHeap =>
+      case _ =>
         (_nonRddStorageInfo.offHeapUsage, _nonRddStorageInfo.diskUsage)
     }
     val newMem = math.max(oldMem + changeInMem, 0L)
diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
index 13f7cb453346f..103965e4860a3 100644
--- a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
@@ -757,7 +757,7 @@ private[spark] object JsonProtocol {
 
   def taskResourceRequestMapFromJson(json: JValue): Map[String, TaskResourceRequest] = {
     val jsonFields = json.asInstanceOf[JObject].obj
-    jsonFields.map { case JField(k, v) =>
+    jsonFields.collect { case JField(k, v) =>
       val req = taskResourceRequestFromJson(v)
       (k, req)
     }.toMap
@@ -765,7 +765,7 @@ private[spark] object JsonProtocol {
 
   def executorResourceRequestMapFromJson(json: JValue): Map[String, ExecutorResourceRequest] = {
     val jsonFields = json.asInstanceOf[JObject].obj
-    jsonFields.map { case JField(k, v) =>
+    jsonFields.collect { case JField(k, v) =>
       val req = executorResourceRequestFromJson(v)
       (k, req)
     }.toMap
@@ -1229,7 +1229,7 @@ private[spark] object JsonProtocol {
 
   def resourcesMapFromJson(json: JValue): Map[String, ResourceInformation] = {
     val jsonFields = json.asInstanceOf[JObject].obj
-    jsonFields.map { case JField(k, v) =>
+    jsonFields.collect { case JField(k, v) =>
       val resourceInfo = ResourceInformation.parseJson(v)
       (k, resourceInfo)
     }.toMap
@@ -1241,7 +1241,7 @@ private[spark] object JsonProtocol {
 
   def mapFromJson(json: JValue): Map[String, String] = {
     val jsonFields = json.asInstanceOf[JObject].obj
-    jsonFields.map { case JField(k, JString(v)) => (k, v) }.toMap
+    jsonFields.collect { case JField(k, JString(v)) => (k, v) }.toMap
   }
 
   def propertiesFromJson(json: JValue): Properties = {
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala
index 368f177cda828..b6c1b011f004c 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala
@@ -302,6 +302,8 @@ private[spark] object BLAS extends Serializable {
           j += 1
           prevCol = col
         }
+      case _ =>
+        throw new IllegalArgumentException(s"spr doesn't support vector type ${v.getClass}.")
     }
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormulaParser.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormulaParser.scala
index dbbfd8f329431..c5b28c95eb7c9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormulaParser.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormulaParser.scala
@@ -286,6 +286,7 @@ private[ml] object RFormulaParser extends RegexParsers {
 
   private val pow: Parser[Term] = term ~ "^" ~ "^[1-9]\\d*".r ^^ {
     case base ~ "^" ~ degree => power(base, degree.toInt)
+    case t => throw new IllegalArgumentException(s"Invalid term: $t")
   } | term
 
   private val interaction: Parser[Term] = pow * (":" ^^^ { interact _ })
@@ -298,7 +299,10 @@ private[ml] object RFormulaParser extends RegexParsers {
   private val expr = (sum | term)
 
   private val formula: Parser[ParsedRFormula] =
-    (label ~ "~" ~ expr) ^^ { case r ~ "~" ~ t => ParsedRFormula(r, t.asTerms.terms) }
+    (label ~ "~" ~ expr) ^^ {
+      case r ~ "~" ~ t => ParsedRFormula(r, t.asTerms.terms)
+      case t => throw new IllegalArgumentException(s"Invalid term: $t")
+    }
 
   def parse(value: String): ParsedRFormula = parseAll(formula, value) match {
     case Success(result, _) => result
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
index 7434b1adb2ff2..92dee46ad0055 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
@@ -314,6 +314,8 @@ object StandardScalerModel extends MLReadable[StandardScalerModel] {
             case SparseVector(size, indices, values) =>
               val newValues = transformSparseWithScale(scale, indices, values.clone())
               Vectors.sparse(size, indices, newValues)
+            case v =>
+              throw new IllegalArgumentException(s"Unknown vector type ${v.getClass}.")
           }
 
       case (false, false) =>
diff --git a/mllib/src/main/scala/org/apache/spark/ml/linalg/JsonMatrixConverter.scala b/mllib/src/main/scala/org/apache/spark/ml/linalg/JsonMatrixConverter.scala
index 0bee643412b3f..8f03a29eb991a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/linalg/JsonMatrixConverter.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/linalg/JsonMatrixConverter.scala
@@ -74,6 +74,8 @@ private[ml] object JsonMatrixConverter {
           ("values" -> values.toSeq) ~
           ("isTransposed" -> isTransposed)
         compact(render(jValue))
+      case _ =>
+        throw new IllegalArgumentException(s"Unknown matrix type ${m.getClass}.")
     }
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/linalg/JsonVectorConverter.scala b/mllib/src/main/scala/org/apache/spark/ml/linalg/JsonVectorConverter.scala
index 781e69f8d63db..1b949d75eeaa0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/linalg/JsonVectorConverter.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/linalg/JsonVectorConverter.scala
@@ -57,6 +57,8 @@ private[ml] object JsonVectorConverter {
       case DenseVector(values) =>
         val jValue = ("type" -> 1) ~ ("values" -> values.toSeq)
         compact(render(jValue))
+      case _ =>
+        throw new IllegalArgumentException(s"Unknown vector type ${v.getClass}.")
     }
   }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala b/mllib/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala
index 37f173bc20469..35bbaf5aa1ded 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala
@@ -45,6 +45,8 @@ private[spark] class VectorUDT extends UserDefinedType[Vector] {
         row.setNullAt(2)
         row.update(3, UnsafeArrayData.fromPrimitiveArray(values))
         row
+      case v =>
+        throw new IllegalArgumentException(s"Unknown vector type ${v.getClass}.")
     }
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeAggregator.scala
index 3d72512563154..0fe1ed231aa83 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeAggregator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeAggregator.scala
@@ -200,6 +200,9 @@ private[ml] class BlockHingeAggregator(
       case sm: SparseMatrix if !fitIntercept =>
         val gradSumVec = new DenseVector(gradientSumArray)
         BLAS.gemv(1.0, sm.transpose, vec, 1.0, gradSumVec)
+
+      case m =>
+        throw new IllegalArgumentException(s"Unknown matrix type ${m.getClass}.")
     }
 
     if (fitIntercept) gradientSumArray(numFeatures) += vec.values.sum
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregator.scala
index 2496c789f8da6..5a516940b9788 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LogisticAggregator.scala
@@ -504,6 +504,9 @@ private[ml] class BlockLogisticAggregator(
       case sm: SparseMatrix if !fitIntercept =>
         val gradSumVec = new DenseVector(gradientSumArray)
         BLAS.gemv(1.0, sm.transpose, vec, 1.0, gradSumVec)
+
+      case m =>
+        throw new IllegalArgumentException(s"Unknown matrix type ${m.getClass}.")
     }
 
     if (fitIntercept) gradientSumArray(numFeatures) += vec.values.sum
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala b/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala
index d4b39e11fd1d7..2215c2b071584 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala
@@ -192,6 +192,8 @@ private[spark] object Instrumentation {
       case Failure(NonFatal(e)) =>
         instr.logFailure(e)
         throw e
+      case Failure(e) =>
+        throw e
       case Success(result) =>
         instr.logSuccess()
         result
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
index 8f9d6d07a4c36..12a5a0f2b2189 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala
@@ -167,6 +167,8 @@ class StandardScalerModel @Since("1.3.0") (
             val newValues = NewStandardScalerModel
               .transformSparseWithScale(localScale, indices, values.clone())
             Vectors.sparse(size, indices, newValues)
+          case v =>
+            throw new IllegalArgumentException(s"Unknown vector type ${v.getClass}.")
         }
 
       case _ => vector
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
index da486010cfa9e..bd60364326e28 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
@@ -285,6 +285,8 @@ private[spark] object BLAS extends Serializable with Logging {
           j += 1
           prevCol = col
         }
+      case _ =>
+        throw new IllegalArgumentException(s"Unknown vector type ${v.getClass}.")
     }
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
index 2fe415f14032f..9ed9dd0c88c9b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@@ -289,6 +289,8 @@ class VectorUDT extends UserDefinedType[Vector] {
         row.setNullAt(2)
         row.update(3, UnsafeArrayData.fromPrimitiveArray(values))
         row
+      case v =>
+        throw new IllegalArgumentException(s"Unknown vector type ${v.getClass}.")
     }
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
index ad79230c7513c..da5d1650694d6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
@@ -145,6 +145,8 @@ class IndexedRowMatrix @Since("1.0.0") (
             .map { case (values, blockColumn) =>
               ((blockRow.toInt, blockColumn), (rowInBlock.toInt, values.zipWithIndex))
             }
+        case v =>
+          throw new IllegalArgumentException(s"Unknown vector type ${v.getClass}.")
       }
     }.groupByKey(GridPartitioner(numRowBlocks, numColBlocks, rows.getNumPartitions)).map {
       case ((blockRow, blockColumn), itr) =>
@@ -187,6 +189,8 @@ class IndexedRowMatrix @Since("1.0.0") (
           Iterator.tabulate(indices.length)(i => MatrixEntry(rowIndex, indices(i), values(i)))
         case DenseVector(values) =>
           Iterator.tabulate(values.length)(i => MatrixEntry(rowIndex, i, values(i)))
+        case v =>
+          throw new IllegalArgumentException(s"Unknown vector type ${v.getClass}.")
       }
     }
     new CoordinateMatrix(entries, numRows(), numCols())
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index 07b9d91c1f59b..c618b71ddc5a8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -748,6 +748,8 @@ class RowMatrix @Since("1.0.0") (
               }
               buf
             }.flatten
+          case v =>
+            throw new IllegalArgumentException(s"Unknown vector type ${v.getClass}.")
         }
       }
     }.reduceByKey(_ + _).map { case ((i, j), sim) =>
diff --git a/pom.xml b/pom.xml
index 0ab5a8c5b3efa..e5b1f30edd3be 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3264,7 +3264,7 @@
     <profile>
       <id>scala-2.13</id>
       <properties>
-        <scala.version>2.13.3</scala.version>
+        <scala.version>2.13.4</scala.version>
         <scala.binary.version>2.13</scala.binary.version>
       </properties>
       <dependencyManagement>
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
index b5a360167679e..4620bdb005094 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
@@ -313,7 +313,6 @@ trait MesosSchedulerUtils extends Logging {
       // offer has the required attribute and subsumes the required values for that attribute
       case (name, requiredValues) =>
         offerAttributes.get(name) match {
-          case None => false
           case Some(_) if requiredValues.isEmpty => true // empty value matches presence
           case Some(scalarValue: Value.Scalar) =>
             // check if provided values is less than equal to the offered values
@@ -332,6 +331,7 @@ trait MesosSchedulerUtils extends Logging {
             // check if the specified value is equal, if multiple values are specified
             // we succeed if any of them match.
             requiredValues.contains(textValue.getValue)
+          case _ => false
         }
     }
   }
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala
index 67ecf3242f52d..6a6514569cf90 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala
@@ -178,7 +178,7 @@ class MesosFineGrainedSchedulerBackendSuite
     val (execInfo, _) = backend.createExecutorInfo(
       Arrays.asList(backend.createResource("cpus", 4)), "mockExecutor")
     assert(execInfo.getContainer.getDocker.getImage.equals("spark/mock"))
-    assert(execInfo.getContainer.getDocker.getForcePullImage.equals(true))
+    assert(execInfo.getContainer.getDocker.getForcePullImage)
     val portmaps = execInfo.getContainer.getDocker.getPortMappingsList
     assert(portmaps.get(0).getHostPort.equals(80))
     assert(portmaps.get(0).getContainerPort.equals(8080))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index 39d9eb5a36964..a363615d3afe0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -94,7 +94,7 @@ private[this] object JsonPathParser extends RegexParsers {
       case Success(result, _) =>
         Some(result)
 
-      case NoSuccess(msg, next) =>
+      case _ =>
         None
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index 1e69814673082..810cecff379d0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -322,7 +322,9 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression {
         case (a: Array[Byte], b: Array[Byte]) => util.Arrays.equals(a, b)
         case (a: ArrayBasedMapData, b: ArrayBasedMapData) =>
           a.keyArray == b.keyArray && a.valueArray == b.valueArray
-        case (a, b) => a != null && a.equals(b)
+        case (a: Double, b: Double) if a.isNaN && b.isNaN => true
+        case (a: Float, b: Float) if a.isNaN && b.isNaN => true
+        case (a, b) => a != null && a == b
       }
     case _ => false
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 9701420e65870..9303df75af503 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -981,7 +981,7 @@ case class MapObjects private(
             (genValue: String) => s"$builder.add($genValue);",
             s"$builder;"
           )
-        case None =>
+        case _ =>
           // array
           (
             s"""
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
index de396a4c63458..a39f06628b9ec 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
@@ -190,6 +190,9 @@ private[sql] class JsonInferSchema(options: JSONOptions) extends Serializable {
         }
 
       case VALUE_TRUE | VALUE_FALSE => BooleanType
+
+      case _ =>
+        throw new SparkException("Malformed JSON")
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/StarSchemaDetection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/StarSchemaDetection.scala
index b65fc7f7e2bde..bf3fced0ae0fd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/StarSchemaDetection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/StarSchemaDetection.scala
@@ -197,9 +197,9 @@ object StarSchemaDetection extends PredicateHelper with SQLConfHelper {
               } else {
                 false
               }
-            case None => false
+            case _ => false
           }
-        case None => false
+        case _ => false
       }
     case _ => false
   }
@@ -239,7 +239,7 @@ object StarSchemaDetection extends PredicateHelper with SQLConfHelper {
         case Some(col) if t.outputSet.contains(col) =>
           val stats = t.stats
           stats.attributeStats.nonEmpty && stats.attributeStats.contains(col)
-        case None => false
+        case _ => false
       }
     case _ => false
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 55a45f4410b34..d1eb3b07d3d5f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -685,6 +685,7 @@ object FoldablePropagation extends Rule[LogicalPlan] {
           case LeftOuter => newJoin.right.output
           case RightOuter => newJoin.left.output
           case FullOuter => newJoin.left.output ++ newJoin.right.output
+          case _ => Nil
         })
         val newFoldableMap = AttributeMap(foldableMap.baseMap.values.filterNot {
           case (attr, _) => missDerivedAttrsSet.contains(attr)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index ea4baafbacede..50580b8e335ff 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -967,6 +967,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
             (UsingJoin(baseJoinType, visitIdentifierList(c.identifierList)), None)
           case Some(c) if c.booleanExpression != null =>
             (baseJoinType, Option(expression(c.booleanExpression)))
+          case Some(c) =>
+            throw new ParseException(s"Unimplemented joinCriteria: $c", ctx)
           case None if join.NATURAL != null =>
             if (baseJoinType == Cross) {
               throw new ParseException("NATURAL CROSS JOIN is not supported", ctx)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index f96e07863fa69..c7108ea8ac74b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -362,7 +362,7 @@ case class Join(
         left.constraints
       case RightOuter =>
         right.constraints
-      case FullOuter =>
+      case _ =>
         ExpressionSet()
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala
index 81f412c14304d..e46d730afb4a3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala
@@ -120,7 +120,7 @@ class GenericArrayData(val array: Array[Any]) extends ArrayData {
             if (!o2.isInstanceOf[Double] || ! java.lang.Double.isNaN(o2.asInstanceOf[Double])) {
               return false
             }
-          case _ => if (!o1.equals(o2)) {
+          case _ => if (o1.getClass != o2.getClass || o1 != o2) {
             return false
           }
         }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/planning/ScanOperationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/planning/ScanOperationSuite.scala
index 7790f467a890b..1290f770349e7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/planning/ScanOperationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/planning/ScanOperationSuite.scala
@@ -39,6 +39,7 @@ class ScanOperationSuite extends SparkFunSuite {
         assert(projects(0) === colB)
         assert(projects(1) === aliasR)
         assert(filters.size === 1)
+      case _ => assert(false)
     }
   }
 
@@ -50,6 +51,7 @@ class ScanOperationSuite extends SparkFunSuite {
         assert(projects(0) === colA)
         assert(projects(1) === colB)
         assert(filters.size === 1)
+      case _ => assert(false)
     }
   }
 
@@ -65,6 +67,7 @@ class ScanOperationSuite extends SparkFunSuite {
         assert(projects.size === 2)
         assert(projects(0) === colA)
         assert(projects(1) === aliasId)
+      case _ => assert(false)
     }
   }
 
@@ -81,6 +84,7 @@ class ScanOperationSuite extends SparkFunSuite {
         assert(projects(0) === colA)
         assert(projects(1) === aliasR)
         assert(filters.size === 1)
+      case _ => assert(false)
     }
   }
 
@@ -93,6 +97,7 @@ class ScanOperationSuite extends SparkFunSuite {
         assert(projects(0) === colA)
         assert(projects(1) === aliasR)
         assert(filters.size === 1)
+      case _ => assert(false)
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala
index 1e430351b5137..9c3aaea0f7772 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala
@@ -45,7 +45,7 @@ class ArrayDataIndexedSeqSuite extends SparkFunSuite {
       if (e != null) {
         elementDt match {
           // For Nan, etc.
-          case FloatType | DoubleType => assert(seq(i).equals(e))
+          case FloatType | DoubleType => assert(seq(i) == e)
           case _ => assert(seq(i) === e)
         }
       } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 85476bcd21e19..01522257c072d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -868,12 +868,12 @@ class SparkSqlAstBuilder extends AstBuilder {
       // assert if directory is local when LOCAL keyword is mentioned
       val scheme = Option(storage.locationUri.get.getScheme)
       scheme match {
-        case None =>
+        case Some(pathScheme) if (!pathScheme.equals("file")) =>
+          throw new ParseException("LOCAL is supported only with file: scheme", ctx)
+        case _ =>
           // force scheme to be file rather than fs.default.name
           val loc = Some(UriBuilder.fromUri(CatalogUtils.stringToURI(path)).scheme("file").build())
           storage = storage.copy(locationUri = loc)
-        case Some(pathScheme) if (!pathScheme.equals("file")) =>
-          throw new ParseException("LOCAL is supported only with file: scheme", ctx)
       }
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/BaseAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/BaseAggregateExec.scala
index efba51706cf98..c676609bc37e4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/BaseAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/BaseAggregateExec.scala
@@ -91,7 +91,7 @@ trait BaseAggregateExec extends UnaryExecNode with AliasAwareOutputPartitioning
   override def requiredChildDistribution: List[Distribution] = {
     requiredChildDistributionExpressions match {
       case Some(exprs) if exprs.isEmpty => AllTuples :: Nil
-      case Some(exprs) if exprs.nonEmpty => ClusteredDistribution(exprs) :: Nil
+      case Some(exprs) => ClusteredDistribution(exprs) :: Nil
       case None => UnspecifiedDistribution :: Nil
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
index c6b98d48d7dde..9832e5cd74ae7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
@@ -71,6 +71,9 @@ trait WindowExecBase extends UnaryExecNode {
       case (RowFrame, IntegerLiteral(offset)) =>
         RowBoundOrdering(offset)
 
+      case (RowFrame, _) =>
+        sys.error(s"Unhandled bound in windows expressions: $bound")
+
       case (RangeFrame, CurrentRow) =>
         val ordering = RowOrdering.create(orderSpec, child.output)
         RangeBoundOrdering(ordering, IdentityProjection, IdentityProjection)
@@ -249,6 +252,9 @@ trait WindowExecBase extends UnaryExecNode {
                 createBoundOrdering(frameType, lower, timeZone),
                 createBoundOrdering(frameType, upper, timeZone))
             }
+
+          case _ =>
+            sys.error(s"Unsupported factory: $key")
         }
 
         // Keep track of the number of expressions. This is a side-effect in a map...
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
index 8ab6e28366753..9213173bbc9ba 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
@@ -1039,6 +1039,7 @@ private[hive] trait HiveInspectors {
 
     private def decimalTypeInfo(decimalType: DecimalType): TypeInfo = decimalType match {
       case DecimalType.Fixed(precision, scale) => new DecimalTypeInfo(precision, scale)
+      case dt => throw new AnalysisException(s"${dt.catalogString} is not supported.")
     }
 
     def toTypeInfo: TypeInfo = dt match {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
index 2e5000159bcb7..d1f9dfb791355 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
@@ -293,7 +293,7 @@ private[streaming] object FileBasedWriteAheadLog {
           val startTime = startTimeStr.toLong
           val stopTime = stopTimeStr.toLong
           Some(LogInfo(startTime, stopTime, file.toString))
-        case None =>
+        case None | Some(_) =>
           None
       }
     }.sortBy { _.startTime }

From 8380e00419281cd1b1fc5706d23d5231356a3379 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 23 Nov 2020 19:35:58 -0800
Subject: [PATCH 0556/1009] [SPARK-33524][SQL][TESTS] Change `InMemoryTable`
 not to use Tuple.hashCode for `BucketTransform`

### What changes were proposed in this pull request?

This PR aims to change `InMemoryTable` not to use `Tuple.hashCode` for `BucketTransform`.

### Why are the changes needed?

SPARK-32168 made `InMemoryTable` to handle `BucketTransform` as a hash of `Tuple` which is dependents on Scala versions.
- https://github.com/apache/spark/blob/master/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala#L159

**Scala 2.12.10**
```scala
$ bin/scala
Welcome to Scala 2.12.10 (OpenJDK 64-Bit Server VM, Java 1.8.0_272).
Type in expressions for evaluation. Or try :help.

scala> (1, 1).hashCode
res0: Int = -2074071657
```

**Scala 2.13.3**
```scala
Welcome to Scala 2.13.3 (OpenJDK 64-Bit Server VM, Java 1.8.0_272).
Type in expressions for evaluation. Or try :help.

scala> (1, 1).hashCode
val res0: Int = -1669302457
```

### Does this PR introduce _any_ user-facing change?

Yes. This is a correctness issue.

### How was this patch tested?

Pass the UT with both Scala 2.12/2.13.

Closes #30477 from dongjoon-hyun/SPARK-33524.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../scala/org/apache/spark/sql/connector/InMemoryTable.scala  | 4 +++-
 .../org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
index c93053abc550a..ffff00b54f1b8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
@@ -156,7 +156,9 @@ class InMemoryTable(
             throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)")
         }
       case BucketTransform(numBuckets, ref) =>
-        (extractor(ref.fieldNames, schema, row).hashCode() & Integer.MAX_VALUE) % numBuckets
+        val (value, dataType) = extractor(ref.fieldNames, schema, row)
+        val valueHashCode = if (value == null) 0 else value.hashCode
+        ((valueHashCode + 31 * dataType.hashCode()) & Integer.MAX_VALUE) % numBuckets
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index da53936239de8..dc4abf3eb19cf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -2511,7 +2511,7 @@ class DataSourceV2SQLSuite
 
       checkAnswer(
         spark.sql(s"SELECT id, data, _partition FROM $t1"),
-        Seq(Row(1, "a", "3/1"), Row(2, "b", "2/2"), Row(3, "c", "2/3")))
+        Seq(Row(1, "a", "3/1"), Row(2, "b", "0/2"), Row(3, "c", "1/3")))
     }
   }
 
@@ -2524,7 +2524,7 @@ class DataSourceV2SQLSuite
 
       checkAnswer(
         spark.sql(s"SELECT index, data, _partition FROM $t1"),
-        Seq(Row(3, "c", "2/3"), Row(2, "b", "2/2"), Row(1, "a", "3/1")))
+        Seq(Row(3, "c", "1/3"), Row(2, "b", "0/2"), Row(1, "a", "3/1")))
     }
   }
 

From f35e28fea5605de4b28630eb643a821ecd7c8523 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Tue, 24 Nov 2020 13:30:06 +0900
Subject: [PATCH 0557/1009] [SPARK-33523][SQL][TEST] Add predicate related
 benchmark to SubExprEliminationBenchmark

### What changes were proposed in this pull request?

This patch adds predicate related benchmark to `SubExprEliminationBenchmark`.

### Why are the changes needed?

We should have a benchmark for subexpression elimination of predicate.

### Does this PR introduce _any_ user-facing change?

No, dev only.

### How was this patch tested?

Run benchmark locally.

Closes #30476 from viirya/SPARK-33523.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 ...ExprEliminationBenchmark-jdk11-results.txt |  22 +++-
 .../SubExprEliminationBenchmark-results.txt   |  22 +++-
 .../SubExprEliminationBenchmark.scala         | 106 ++++++++++--------
 3 files changed, 90 insertions(+), 60 deletions(-)

diff --git a/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt b/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt
index 3d2b2e5c8edba..1eb7b534d2194 100644
--- a/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt
@@ -5,11 +5,21 @@ Benchmark for performance of subexpression elimination
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 11.0.9+11 on Mac OS X 10.15.6
 Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
-from_json as subExpr:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
--------------------------------------------------------------------------------------------------------------------------
-subexpressionElimination off, codegen on           25932          26908         916          0.0   259320042.3       1.0X
-subexpressionElimination off, codegen off          26085          26159          65          0.0   260848905.0       1.0X
-subexpressionElimination on, codegen on             2860           2939          72          0.0    28603312.9       9.1X
-subexpressionElimination on, codegen off            2517           2617          93          0.0    25165157.7      10.3X
+from_json as subExpr in Project:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+subExprElimination false, codegen: true           26447          27127         605          0.0   264467933.4       1.0X
+subExprElimination false, codegen: false          25673          26035         546          0.0   256732419.1       1.0X
+subExprElimination true, codegen: true             1384           1448         102          0.0    13842910.3      19.1X
+subExprElimination true, codegen: false            1244           1347         123          0.0    12442389.3      21.3X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 11.0.9+11 on Mac OS X 10.15.6
+Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
+from_json as subExpr in Filter:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+subexpressionElimination off, codegen on          34631          35449         833          0.0   346309884.0       1.0X
+subexpressionElimination off, codegen on          34480          34851         353          0.0   344798490.4       1.0X
+subexpressionElimination off, codegen on          16618          16811         291          0.0   166176642.6       2.1X
+subexpressionElimination off, codegen on          34316          34667         310          0.0   343157094.7       1.0X
 
 
diff --git a/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt b/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt
index ca2a9c6497500..801f519ca76a1 100644
--- a/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt
+++ b/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt
@@ -5,11 +5,21 @@ Benchmark for performance of subexpression elimination
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.6
 Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
-from_json as subExpr:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
--------------------------------------------------------------------------------------------------------------------------
-subexpressionElimination off, codegen on           26503          27622        1937          0.0   265033362.4       1.0X
-subexpressionElimination off, codegen off          24920          25376         430          0.0   249196978.2       1.1X
-subexpressionElimination on, codegen on             2421           2466          39          0.0    24213606.1      10.9X
-subexpressionElimination on, codegen off            2360           2435          87          0.0    23604320.7      11.2X
+from_json as subExpr in Project:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+subExprElimination false, codegen: true           22767          23240         424          0.0   227665316.7       1.0X
+subExprElimination false, codegen: false          22869          23351         465          0.0   228693464.1       1.0X
+subExprElimination true, codegen: true             1328           1340          10          0.0    13280056.2      17.1X
+subExprElimination true, codegen: false            1248           1276          31          0.0    12476135.1      18.2X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.6
+Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
+from_json as subExpr in Filter:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+subexpressionElimination off, codegen on          37691          38846        1004          0.0   376913767.9       1.0X
+subexpressionElimination off, codegen on          37852          39124        1103          0.0   378517745.5       1.0X
+subexpressionElimination off, codegen on          22900          23085         202          0.0   229000242.5       1.6X
+subexpressionElimination off, codegen on          38298          38598         374          0.0   382978731.3       1.0X
 
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala
index 34b4a70d05a25..e26acbcb3cd21 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala
@@ -17,6 +17,8 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.Column
+import org.apache.spark.sql.catalyst.expressions.{Expression, Literal, Or}
 import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
@@ -39,7 +41,7 @@ object SubExprEliminationBenchmark extends SqlBasedBenchmark {
   import spark.implicits._
 
   def withFromJson(rowsNum: Int, numIters: Int): Unit = {
-    val benchmark = new Benchmark("from_json as subExpr", rowsNum, output = output)
+    val benchmark = new Benchmark("from_json as subExpr in Project", rowsNum, output = output)
 
     withTempPath { path =>
       prepareDataInfo(benchmark)
@@ -50,57 +52,65 @@ object SubExprEliminationBenchmark extends SqlBasedBenchmark {
         from_json('value, schema).getField(s"col$idx")
       }
 
-      // We only benchmark subexpression performance under codegen/non-codegen, so disabling
-      // json optimization.
-      benchmark.addCase("subexpressionElimination off, codegen on", numIters) { _ =>
-        withSQLConf(
-          SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> "false",
-          SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true",
-          SQLConf.CODEGEN_FACTORY_MODE.key -> "CODEGEN_ONLY",
-          SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") {
-          val df = spark.read
-            .text(path.getAbsolutePath)
-            .select(cols: _*)
-          df.collect()
+      Seq(
+        ("false", "true", "CODEGEN_ONLY"),
+        ("false", "false", "NO_CODEGEN"),
+        ("true", "true", "CODEGEN_ONLY"),
+        ("true", "false", "NO_CODEGEN")
+      ).foreach { case (subExprEliminationEnabled, codegenEnabled, codegenFactory) =>
+        // We only benchmark subexpression performance under codegen/non-codegen, so disabling
+        // json optimization.
+        val caseName = s"subExprElimination $subExprEliminationEnabled, codegen: $codegenEnabled"
+        benchmark.addCase(caseName, numIters) { _ =>
+          withSQLConf(
+            SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> subExprEliminationEnabled,
+            SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> codegenEnabled,
+            SQLConf.CODEGEN_FACTORY_MODE.key -> codegenFactory,
+            SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") {
+            val df = spark.read
+              .text(path.getAbsolutePath)
+              .select(cols: _*)
+            df.write.mode("overwrite").format("noop").save()
+          }
         }
       }
 
-      benchmark.addCase("subexpressionElimination off, codegen off", numIters) { _ =>
-        withSQLConf(
-          SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> "false",
-          SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false",
-          SQLConf.CODEGEN_FACTORY_MODE.key -> "NO_CODEGEN",
-          SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") {
-          val df = spark.read
-            .text(path.getAbsolutePath)
-            .select(cols: _*)
-          df.collect()
-        }
-      }
+      benchmark.run()
+    }
+  }
 
-      benchmark.addCase("subexpressionElimination on, codegen on", numIters) { _ =>
-        withSQLConf(
-            SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> "true",
-            SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true",
-            SQLConf.CODEGEN_FACTORY_MODE.key -> "CODEGEN_ONLY",
-            SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") {
-          val df = spark.read
-            .text(path.getAbsolutePath)
-            .select(cols: _*)
-          df.collect()
-        }
-      }
+  def withFilter(rowsNum: Int, numIters: Int): Unit = {
+    val benchmark = new Benchmark("from_json as subExpr in Filter", rowsNum, output = output)
 
-      benchmark.addCase("subexpressionElimination on, codegen off", numIters) { _ =>
-        withSQLConf(
-          SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> "true",
-          SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false",
-          SQLConf.CODEGEN_FACTORY_MODE.key -> "NO_CODEGEN",
-          SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") {
-          val df = spark.read
-            .text(path.getAbsolutePath)
-            .select(cols: _*)
-          df.collect()
+    withTempPath { path =>
+      prepareDataInfo(benchmark)
+      val numCols = 1000
+      val schema = writeWideRow(path.getAbsolutePath, rowsNum, numCols)
+
+      val predicate = (0 until numCols).map { idx =>
+        (from_json('value, schema).getField(s"col$idx") >= Literal(100000)).expr
+      }.asInstanceOf[Seq[Expression]].reduce(Or)
+
+      Seq(
+        ("false", "true", "CODEGEN_ONLY"),
+        ("false", "false", "NO_CODEGEN"),
+        ("true", "true", "CODEGEN_ONLY"),
+        ("true", "false", "NO_CODEGEN")
+      ).foreach { case (subExprEliminationEnabled, codegenEnabled, codegenFactory) =>
+        // We only benchmark subexpression performance under codegen/non-codegen, so disabling
+        // json optimization.
+        val caseName = s"subExprElimination $subExprEliminationEnabled, codegen: $codegenEnabled"
+        benchmark.addCase("subexpressionElimination off, codegen on", numIters) { _ =>
+          withSQLConf(
+            SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> subExprEliminationEnabled,
+            SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> codegenEnabled,
+            SQLConf.CODEGEN_FACTORY_MODE.key -> codegenFactory,
+            SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") {
+            val df = spark.read
+              .text(path.getAbsolutePath)
+              .where(Column(predicate))
+            df.write.mode("overwrite").format("noop").save()
+          }
         }
       }
 
@@ -108,11 +118,11 @@ object SubExprEliminationBenchmark extends SqlBasedBenchmark {
     }
   }
 
-
   override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
     val numIters = 3
     runBenchmark("Benchmark for performance of subexpression elimination") {
       withFromJson(100, numIters)
+      withFilter(100, numIters)
     }
   }
 }

From a6555ee59626bbc4ef860c4ff9fcefae0d45b45e Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Tue, 24 Nov 2020 08:04:21 +0000
Subject: [PATCH 0558/1009] [SPARK-33521][SQL] Universal type conversion in
 resolving V2 partition specs

### What changes were proposed in this pull request?
In the PR, I propose to changes the resolver of partition specs used in V2 `ALTER TABLE .. ADD/DROP PARTITION` (at the moment), and re-use `CAST` in conversion partition values to desired types according to the partition schema.

### Why are the changes needed?
Currently, the resolver of V2 partition specs supports just a few types: https://github.com/apache/spark/blob/23e9920b3910e4f05269853429c7f18888cdc7b5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala#L72, and fails on other types like date/timestamp.

### Does this PR introduce _any_ user-facing change?
Yes

### How was this patch tested?
By running `AlterTablePartitionV2SQLSuite`

Closes #30474 from MaxGekk/dsv2-partition-value-types.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../analysis/ResolvePartitionSpec.scala       | 29 +---------
 .../AlterTablePartitionV2SQLSuite.scala       | 58 +++++++++++++++++++
 2 files changed, 61 insertions(+), 26 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
index 531d40f431dee..6d061fce06919 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
@@ -17,9 +17,9 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import org.apache.spark.sql.catalyst.expressions.{Cast, Literal}
 import org.apache.spark.sql.catalyst.plans.logical.{AlterTableAddPartition, AlterTableDropPartition, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.connector.catalog.SupportsPartitionManagement
@@ -65,31 +65,8 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] {
       conf.resolver)
 
     val partValues = partSchema.map { part =>
-      val partValue = normalizedSpec.get(part.name).orNull
-      if (partValue == null) {
-        null
-      } else {
-        // TODO: Support other datatypes, such as DateType
-        part.dataType match {
-          case _: ByteType =>
-            partValue.toByte
-          case _: ShortType =>
-            partValue.toShort
-          case _: IntegerType =>
-            partValue.toInt
-          case _: LongType =>
-            partValue.toLong
-          case _: FloatType =>
-            partValue.toFloat
-          case _: DoubleType =>
-            partValue.toDouble
-          case _: StringType =>
-            partValue
-          case _ =>
-            throw new AnalysisException(
-              s"Type ${part.dataType.typeName} is not supported for partition.")
-        }
-      }
+      val raw = normalizedSpec.get(part.name).orNull
+      Cast(Literal.create(raw, StringType), part.dataType, Some(conf.sessionLocalTimeZone)).eval()
     }
     InternalRow.fromSeq(partValues)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
index e05c2c09ace2a..4cacd5ec2b49e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
@@ -17,12 +17,16 @@
 
 package org.apache.spark.sql.connector
 
+import java.time.{LocalDate, LocalDateTime}
+
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionsException, PartitionsAlreadyExistException}
+import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils}
 import org.apache.spark.sql.connector.catalog.{CatalogV2Implicits, Identifier}
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.unsafe.types.UTF8String
 
 class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase {
 
@@ -185,4 +189,58 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase {
       }
     }
   }
+
+  test("SPARK-33521: universal type conversions of partition values") {
+    val t = "testpart.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"""
+        |CREATE TABLE $t (
+        |  part0 tinyint,
+        |  part1 smallint,
+        |  part2 int,
+        |  part3 bigint,
+        |  part4 float,
+        |  part5 double,
+        |  part6 string,
+        |  part7 boolean,
+        |  part8 date,
+        |  part9 timestamp
+        |) USING foo
+        |PARTITIONED BY (part0, part1, part2, part3, part4, part5, part6, part7, part8, part9)
+        |""".stripMargin)
+      val partTable = catalog("testpart").asTableCatalog
+        .loadTable(Identifier.of(Array("ns1", "ns2"), "tbl"))
+        .asPartitionable
+      val expectedPartition = InternalRow.fromSeq(Seq[Any](
+        -1,    // tinyint
+        0,     // smallint
+        1,     // int
+        2,     // bigint
+        3.14F, // float
+        3.14D, // double
+        UTF8String.fromString("abc"), // string
+        true, // boolean
+        LocalDate.parse("2020-11-23").toEpochDay,
+        DateTimeUtils.instantToMicros(
+          LocalDateTime.parse("2020-11-23T22:13:10.123456").atZone(DateTimeTestUtils.LA).toInstant)
+      ))
+      assert(!partTable.partitionExists(expectedPartition))
+      val partSpec = """
+        |  part0 = -1,
+        |  part1 = 0,
+        |  part2 = 1,
+        |  part3 = 2,
+        |  part4 = 3.14,
+        |  part5 = 3.14,
+        |  part6 = 'abc',
+        |  part7 = true,
+        |  part8 = '2020-11-23',
+        |  part9 = '2020-11-23T22:13:10.123456'
+        |""".stripMargin
+      sql(s"ALTER TABLE $t ADD PARTITION ($partSpec) LOCATION 'loc1'")
+      assert(partTable.partitionExists(expectedPartition))
+      sql(s" ALTER TABLE $t DROP PARTITION ($partSpec)")
+      assert(!partTable.partitionExists(expectedPartition))
+    }
+  }
 }

From fdd6c73b3cfac5af30c789c7f70b92367a79f7e7 Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Tue, 24 Nov 2020 11:06:39 +0000
Subject: [PATCH 0559/1009] [SPARK-33514][SQL] Migrate TRUNCATE TABLE command
 to use UnresolvedTable to resolve the identifier

### What changes were proposed in this pull request?

This PR proposes to migrate `TRUNCATE TABLE` to use `UnresolvedTable` to resolve the table identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing).

Note that `TRUNCATE TABLE` works only with v1 tables, and not supported for v2 tables.

### Why are the changes needed?

The changes allow consistent resolution behavior when resolving the table identifier. For example, the following is the current behavior:
```scala
sql("CREATE TEMPORARY VIEW t AS SELECT 1")
sql("CREATE DATABASE db")
sql("CREATE TABLE t using csv AS SELECT 1")
sql("USE db")
sql("TRUNCATE TABLE t") // Succeeds
```
With this PR, `TRUNCATE TABLE` above fails with the following:
```
org.apache.spark.sql.AnalysisException: t is a temp view not table.; line 1 pos 0
    at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)
    at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveTempViews$$anonfun$apply$7.$anonfun$applyOrElse$42(Analyzer.scala:866)

```
, which is expected since temporary view is resolved first and `TRUNCATE TABLE` doesn't support a temporary view.

### Does this PR introduce _any_ user-facing change?

After this PR, `TRUNCATE TABLE` is resolved to a temp view `t` instead of table `db.t` in the above scenario.

### How was this patch tested?

Updated existing tests.

Closes #30457 from imback82/truncate_table.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/parser/AstBuilder.scala      |  6 +++---
 .../sql/catalyst/plans/logical/v2Commands.scala     |  9 +++++++++
 .../spark/sql/catalyst/parser/DDLParserSuite.scala  |  6 ++++--
 .../catalyst/analysis/ResolveSessionCatalog.scala   |  5 ++---
 .../datasources/v2/DataSourceV2Strategy.scala       |  3 +++
 .../spark/sql/connector/DataSourceV2SQLSuite.scala  |  4 ++--
 .../apache/spark/sql/execution/SQLViewSuite.scala   | 13 ++++++++-----
 .../spark/sql/execution/command/DDLSuite.scala      | 10 +++++++---
 8 files changed, 38 insertions(+), 18 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 50580b8e335ff..a4298abd211b3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3356,7 +3356,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
   }
 
   /**
-   * Create a [[TruncateTableStatement]] command.
+   * Create a [[TruncateTable]] command.
    *
    * For example:
    * {{{
@@ -3364,8 +3364,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    * }}}
    */
   override def visitTruncateTable(ctx: TruncateTableContext): LogicalPlan = withOrigin(ctx) {
-    TruncateTableStatement(
-      visitMultipartIdentifier(ctx.multipartIdentifier),
+    TruncateTable(
+      UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier), "TRUNCATE TABLE"),
       Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec))
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index 5bda2b5b8db01..a65b9fc59bd55 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -670,3 +670,12 @@ case class LoadData(
 case class ShowCreateTable(child: LogicalPlan, asSerde: Boolean = false) extends Command {
   override def children: Seq[LogicalPlan] = child :: Nil
 }
+
+/**
+ * The logical plan of the TRUNCATE TABLE command.
+ */
+case class TruncateTable(
+    child: LogicalPlan,
+    partitionSpec: Option[TablePartitionSpec]) extends Command {
+  override def children: Seq[LogicalPlan] = child :: Nil
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index bd28484b23f46..997c642276bfb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -1621,11 +1621,13 @@ class DDLParserSuite extends AnalysisTest {
   test("TRUNCATE table") {
     comparePlans(
       parsePlan("TRUNCATE TABLE a.b.c"),
-      TruncateTableStatement(Seq("a", "b", "c"), None))
+      TruncateTable(UnresolvedTable(Seq("a", "b", "c"), "TRUNCATE TABLE"), None))
 
     comparePlans(
       parsePlan("TRUNCATE TABLE a.b.c PARTITION(ds='2017-06-10')"),
-      TruncateTableStatement(Seq("a", "b", "c"), Some(Map("ds" -> "2017-06-10"))))
+      TruncateTable(
+        UnresolvedTable(Seq("a", "b", "c"), "TRUNCATE TABLE"),
+        Some(Map("ds" -> "2017-06-10"))))
   }
 
   test("REFRESH TABLE") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 303ae47f06b84..726099991a897 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -456,10 +456,9 @@ class ResolveSessionCatalog(
       val name = parseTempViewOrV1Table(tbl, "UNCACHE TABLE")
       UncacheTableCommand(name.asTableIdentifier, ifExists)
 
-    case TruncateTableStatement(tbl, partitionSpec) =>
-      val v1TableName = parseV1Table(tbl, "TRUNCATE TABLE")
+    case TruncateTable(ResolvedV1TableIdentifier(ident), partitionSpec) =>
       TruncateTableCommand(
-        v1TableName.asTableIdentifier,
+        ident.asTableIdentifier,
         partitionSpec)
 
     case ShowPartitionsStatement(tbl, partitionSpec) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index e5c29312b80e7..30d976524bfa8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -302,6 +302,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
     case ShowCreateTable(_: ResolvedTable, _) =>
       throw new AnalysisException("SHOW CREATE TABLE is not supported for v2 tables.")
 
+    case TruncateTable(_: ResolvedTable, _) =>
+      throw new AnalysisException("TRUNCATE TABLE is not supported for v2 tables.")
+
     case _ => Nil
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index dc4abf3eb19cf..9a3fa0c5bd3f4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -1986,8 +1986,8 @@ class DataSourceV2SQLSuite
            |PARTITIONED BY (id)
          """.stripMargin)
 
-      testV1Command("TRUNCATE TABLE", t)
-      testV1Command("TRUNCATE TABLE", s"$t PARTITION(id='1')")
+      testNotSupportedV2Command("TRUNCATE TABLE", t)
+      testNotSupportedV2Command("TRUNCATE TABLE", s"$t PARTITION(id='1')")
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index 504cc57dc12d3..edeebde7db726 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -176,15 +176,18 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         sql(s"""LOAD DATA LOCAL INPATH "$dataFilePath" INTO TABLE $viewName""")
       }.getMessage
       assert(e2.contains(s"$viewName is a temp view. 'LOAD DATA' expects a table"))
-      assertNoSuchTable(s"TRUNCATE TABLE $viewName")
       val e3 = intercept[AnalysisException] {
-        sql(s"SHOW CREATE TABLE $viewName")
+        sql(s"TRUNCATE TABLE $viewName")
       }.getMessage
-      assert(e3.contains(s"$viewName is a temp view not table or permanent view"))
+      assert(e3.contains(s"$viewName is a temp view. 'TRUNCATE TABLE' expects a table"))
       val e4 = intercept[AnalysisException] {
-        sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS")
+        sql(s"SHOW CREATE TABLE $viewName")
       }.getMessage
       assert(e4.contains(s"$viewName is a temp view not table or permanent view"))
+      val e5 = intercept[AnalysisException] {
+        sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS")
+      }.getMessage
+      assert(e5.contains(s"$viewName is a temp view not table or permanent view"))
       assertNoSuchTable(s"ANALYZE TABLE $viewName COMPUTE STATISTICS FOR COLUMNS id")
     }
   }
@@ -219,7 +222,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
       e = intercept[AnalysisException] {
         sql(s"TRUNCATE TABLE $viewName")
       }.getMessage
-      assert(e.contains(s"Operation not allowed: TRUNCATE TABLE on views: `default`.`testview`"))
+      assert(e.contains("default.testView is a view. 'TRUNCATE TABLE' expects a table"))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 43a33860d262e..07201f9f85b5d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -2169,11 +2169,15 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
         (1 to 10).map { i => (i, i) }.toDF("a", "b").createTempView("my_temp_tab")
         sql(s"CREATE TABLE my_ext_tab using parquet LOCATION '${tempDir.toURI}'")
         sql(s"CREATE VIEW my_view AS SELECT 1")
-        intercept[NoSuchTableException] {
+        val e1 = intercept[AnalysisException] {
           sql("TRUNCATE TABLE my_temp_tab")
-        }
+        }.getMessage
+        assert(e1.contains("my_temp_tab is a temp view. 'TRUNCATE TABLE' expects a table"))
         assertUnsupported("TRUNCATE TABLE my_ext_tab")
-        assertUnsupported("TRUNCATE TABLE my_view")
+        val e2 = intercept[AnalysisException] {
+          sql("TRUNCATE TABLE my_view")
+        }.getMessage
+        assert(e2.contains("default.my_view is a view. 'TRUNCATE TABLE' expects a table"))
       }
     }
   }

From 048a9821c788b6796d52d1e2a0cd174377ebd0f0 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Tue, 24 Nov 2020 09:50:10 -0800
Subject: [PATCH 0560/1009] [SPARK-33535][INFRA][TESTS] Export LANG to
 en_US.UTF-8 in run-tests-jenkins script
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
It seems that Jenkins tests tasks in many pr have test failed. The failed cases include:

-  `org.apache.spark.sql.hive.thriftserver.SparkThriftServerProtocolVersionsSuite.HIVE_CLI_SERVICE_PROTOCOL_V1 get binary type`
- `org.apache.spark.sql.hive.thriftserver.SparkThriftServerProtocolVersionsSuite.HIVE_CLI_SERVICE_PROTOCOL_V2 get binary type`
- `org.apache.spark.sql.hive.thriftserver.SparkThriftServerProtocolVersionsSuite.HIVE_CLI_SERVICE_PROTOCOL_V3 get binary type`
- `org.apache.spark.sql.hive.thriftserver.SparkThriftServerProtocolVersionsSuite.HIVE_CLI_SERVICE_PROTOCOL_V4 get binary type`
- `org.apache.spark.sql.hive.thriftserver.SparkThriftServerProtocolVersionsSuite.HIVE_CLI_SERVICE_PROTOCOL_V5 get binary type`

The error message as follows:

```
Error Messageorg.scalatest.exceptions.TestFailedException: "[?](" did not equal "[�]("Stacktracesbt.ForkMain$ForkError: org.scalatest.exceptions.TestFailedException: "[?](" did not equal "[�]("
	at org.scalatest.Assertions.newAssertionFailedException(Assertions.scala:472)
	at org.scalatest.Assertions.newAssertionFailedException$(Assertions.scala:471)
	at org.scalatest.Assertions$.newAssertionFailedException(Assertions.scala:1231)
	at org.scalatest.Assertions$AssertionsHelper.macroAssert(Assertions.scala:1295)
	at org.apache.spark.sql.hive.thriftserver.SparkThriftServerProtocolVersionsSuite.$anonfun$new$26(SparkThriftServerProtocolVersionsSuite.scala:302)
```

But they can pass the GitHub Action, maybe it's related to the `LANG` of the Jenkins build machine, this pr add `export LANG="en_US.UTF-8"` in `run-test-jenkins` script.

### Why are the changes needed?
Ensure LANG in Jenkins test process is `en_US.UTF-8` to pass `HIVE_CLI_SERVICE_PROTOCOL_VX` related tests

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Jenkins tests pass

Closes #30487 from LuciferYang/SPARK-33535.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/run-tests-jenkins | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dev/run-tests-jenkins b/dev/run-tests-jenkins
index c3adc696a5122..c155d4ea3f076 100755
--- a/dev/run-tests-jenkins
+++ b/dev/run-tests-jenkins
@@ -26,6 +26,7 @@ FWDIR="$( cd "$( dirname "$0" )/.." && pwd )"
 cd "$FWDIR"
 
 export PATH=/home/anaconda/envs/py36/bin:$PATH
+export LANG="en_US.UTF-8"
 
 PYTHON_VERSION_CHECK=$(python3 -c 'import sys; print(sys.version_info < (3, 6, 0))')
 if [[ "$PYTHON_VERSION_CHECK" == "True" ]]; then

From 95b6dabc33515f1975eb889480ccca12bf5ac3c8 Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Wed, 25 Nov 2020 07:38:45 +0900
Subject: [PATCH 0561/1009] [SPARK-33287][SS][UI] Expose state custom metrics
 information on SS UI

### What changes were proposed in this pull request?
Structured Streaming UI is not containing state custom metrics information. In this PR I've added it.

### Why are the changes needed?
Missing state custom metrics information.

### Does this PR introduce _any_ user-facing change?
Additional UI elements appear.

### How was this patch tested?
Existing unit tests + manual test.
```
#Compile Spark
echo "spark.sql.streaming.ui.enabledCustomMetricList stateOnCurrentVersionSizeBytes" >> conf/spark-defaults.conf
sbin/start-master.sh
sbin/start-worker.sh spark://gsomogyi-MBP16:7077
./bin/spark-submit --master spark://gsomogyi-MBP16:7077 --deploy-mode client --class com.spark.Main ../spark-test/target/spark-test-1.0-SNAPSHOT-jar-with-dependencies.jar
```
<img width="1119" alt="Screenshot 2020-11-18 at 12 45 36" src="https://user-images.githubusercontent.com/18561820/99527506-2f979680-299d-11eb-9187-4ae7fbd2596a.png">

Closes #30336 from gaborgsomogyi/SPARK-33287.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
---
 .../spark/sql/internal/StaticSQLConf.scala    |  12 ++
 .../ui/StreamingQueryStatisticsPage.scala     | 143 +++++++++++++-----
 .../ui/StreamingQueryPageSuite.scala          |   5 +
 .../sql/streaming/ui/UISeleniumSuite.scala    |   6 +
 4 files changed, 127 insertions(+), 39 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
index ca1074fcf6fc0..02cb6f29622f5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
@@ -249,4 +249,16 @@ object StaticSQLConf {
     .version("3.1.0")
     .timeConf(TimeUnit.SECONDS)
     .createWithDefault(-1)
+
+  val ENABLED_STREAMING_UI_CUSTOM_METRIC_LIST =
+    buildStaticConf("spark.sql.streaming.ui.enabledCustomMetricList")
+      .internal()
+      .doc("Configures a list of custom metrics on Structured Streaming UI, which are enabled. " +
+        "The list contains the name of the custom metrics separated by comma. In aggregation" +
+        "only sum used. The list of supported custom metrics is state store provider specific " +
+        "and it can be found out for example from query progress log entry.")
+      .version("3.1.0")
+      .stringConf
+      .toSequence
+      .createWithDefault(Nil)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
index f48672afb41f3..77b1e61d587a7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
@@ -19,18 +19,32 @@ package org.apache.spark.sql.streaming.ui
 
 import java.{util => ju}
 import java.lang.{Long => JLong}
-import java.util.UUID
+import java.util.{Locale, UUID}
 import javax.servlet.http.HttpServletRequest
 
+import scala.collection.JavaConverters._
 import scala.xml.{Node, NodeBuffer, Unparsed}
 
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.execution.streaming.state.StateStoreProvider
+import org.apache.spark.sql.internal.SQLConf.STATE_STORE_PROVIDER_CLASS
+import org.apache.spark.sql.internal.StaticSQLConf.ENABLED_STREAMING_UI_CUSTOM_METRIC_LIST
 import org.apache.spark.sql.streaming.ui.UIUtils._
 import org.apache.spark.ui.{GraphUIData, JsCollector, UIUtils => SparkUIUtils, WebUIPage}
 
 private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab)
   extends WebUIPage("statistics") with Logging {
 
+  // State store provider implementation mustn't do any heavyweight initialiation in constructor
+  // but in its init method.
+  private val supportedCustomMetrics = StateStoreProvider.create(
+    parent.parent.conf.get(STATE_STORE_PROVIDER_CLASS)).supportedCustomMetrics
+  logDebug(s"Supported custom metrics: $supportedCustomMetrics")
+
+  private val enabledCustomMetrics =
+    parent.parent.conf.get(ENABLED_STREAMING_UI_CUSTOM_METRIC_LIST).map(_.toLowerCase(Locale.ROOT))
+  logDebug(s"Enabled custom metrics: $enabledCustomMetrics")
+
   def generateLoadResources(request: HttpServletRequest): Seq[Node] = {
     // scalastyle:off
     <script src={SparkUIUtils.prependBaseUri(request, "/static/d3.min.js")}></script>
@@ -199,49 +213,100 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab)
           "records")
       graphUIDataForNumRowsDroppedByWatermark.generateDataJs(jsCollector)
 
-      // scalastyle:off
-      <tr>
-        <td style="vertical-align: middle;">
-          <div style="width: 160px;">
-            <div><strong>Aggregated Number Of Total State Rows {SparkUIUtils.tooltip("Aggregated number of total state rows.", "right")}</strong></div>
-          </div>
-        </td>
-        <td class={"aggregated-num-total-state-rows-timeline"}>{graphUIDataForNumberTotalRows.generateTimelineHtml(jsCollector)}</td>
-        <td class={"aggregated-num-total-state-rows-histogram"}>{graphUIDataForNumberTotalRows.generateHistogramHtml(jsCollector)}</td>
-      </tr>
-      <tr>
-        <td style="vertical-align: middle;">
-          <div style="width: 160px;">
-            <div><strong>Aggregated Number Of Updated State Rows {SparkUIUtils.tooltip("Aggregated number of updated state rows.", "right")}</strong></div>
-          </div>
-        </td>
-        <td class={"aggregated-num-updated-state-rows-timeline"}>{graphUIDataForNumberUpdatedRows.generateTimelineHtml(jsCollector)}</td>
-        <td class={"aggregated-num-updated-state-rows-histogram"}>{graphUIDataForNumberUpdatedRows.generateHistogramHtml(jsCollector)}</td>
-      </tr>
-      <tr>
-        <td style="vertical-align: middle;">
-          <div style="width: 160px;">
-            <div><strong>Aggregated State Memory Used In Bytes {SparkUIUtils.tooltip("Aggregated state memory used in bytes.", "right")}</strong></div>
-          </div>
-        </td>
-        <td class={"aggregated-state-memory-used-bytes-timeline"}>{graphUIDataForMemoryUsedBytes.generateTimelineHtml(jsCollector)}</td>
-        <td class={"aggregated-state-memory-used-bytes-histogram"}>{graphUIDataForMemoryUsedBytes.generateHistogramHtml(jsCollector)}</td>
-      </tr>
-      <tr>
-        <td style="vertical-align: middle;">
-          <div style="width: 160px;">
-            <div><strong>Aggregated Number Of Rows Dropped By Watermark {SparkUIUtils.tooltip("Accumulates all input rows being dropped in stateful operators by watermark. 'Inputs' are relative to operators.", "right")}</strong></div>
-          </div>
-        </td>
-        <td class={"aggregated-num-rows-dropped-by-watermark-timeline"}>{graphUIDataForNumRowsDroppedByWatermark.generateTimelineHtml(jsCollector)}</td>
-        <td class={"aggregated-num-rows-dropped-by-watermark-histogram"}>{graphUIDataForNumRowsDroppedByWatermark.generateHistogramHtml(jsCollector)}</td>
-      </tr>
-      // scalastyle:on
+      val result =
+        // scalastyle:off
+        <tr>
+          <td style="vertical-align: middle;">
+            <div style="width: 160px;">
+              <div><strong>Aggregated Number Of Total State Rows {SparkUIUtils.tooltip("Aggregated number of total state rows.", "right")}</strong></div>
+            </div>
+          </td>
+          <td class={"aggregated-num-total-state-rows-timeline"}>{graphUIDataForNumberTotalRows.generateTimelineHtml(jsCollector)}</td>
+          <td class={"aggregated-num-total-state-rows-histogram"}>{graphUIDataForNumberTotalRows.generateHistogramHtml(jsCollector)}</td>
+        </tr>
+        <tr>
+          <td style="vertical-align: middle;">
+            <div style="width: 160px;">
+              <div><strong>Aggregated Number Of Updated State Rows {SparkUIUtils.tooltip("Aggregated number of updated state rows.", "right")}</strong></div>
+            </div>
+          </td>
+          <td class={"aggregated-num-updated-state-rows-timeline"}>{graphUIDataForNumberUpdatedRows.generateTimelineHtml(jsCollector)}</td>
+          <td class={"aggregated-num-updated-state-rows-histogram"}>{graphUIDataForNumberUpdatedRows.generateHistogramHtml(jsCollector)}</td>
+        </tr>
+        <tr>
+          <td style="vertical-align: middle;">
+            <div style="width: 160px;">
+              <div><strong>Aggregated State Memory Used In Bytes {SparkUIUtils.tooltip("Aggregated state memory used in bytes.", "right")}</strong></div>
+            </div>
+          </td>
+          <td class={"aggregated-state-memory-used-bytes-timeline"}>{graphUIDataForMemoryUsedBytes.generateTimelineHtml(jsCollector)}</td>
+          <td class={"aggregated-state-memory-used-bytes-histogram"}>{graphUIDataForMemoryUsedBytes.generateHistogramHtml(jsCollector)}</td>
+        </tr>
+        <tr>
+          <td style="vertical-align: middle;">
+            <div style="width: 160px;">
+              <div><strong>Aggregated Number Of Rows Dropped By Watermark {SparkUIUtils.tooltip("Accumulates all input rows being dropped in stateful operators by watermark. 'Inputs' are relative to operators.", "right")}</strong></div>
+            </div>
+          </td>
+          <td class={"aggregated-num-rows-dropped-by-watermark-timeline"}>{graphUIDataForNumRowsDroppedByWatermark.generateTimelineHtml(jsCollector)}</td>
+          <td class={"aggregated-num-rows-dropped-by-watermark-histogram"}>{graphUIDataForNumRowsDroppedByWatermark.generateHistogramHtml(jsCollector)}</td>
+        </tr>
+        // scalastyle:on
+
+      if (enabledCustomMetrics.nonEmpty) {
+        result ++= generateAggregatedCustomMetrics(query, minBatchTime, maxBatchTime, jsCollector)
+      }
+      result
     } else {
       new NodeBuffer()
     }
   }
 
+  def generateAggregatedCustomMetrics(
+      query: StreamingQueryUIData,
+      minBatchTime: Long,
+      maxBatchTime: Long,
+      jsCollector: JsCollector): NodeBuffer = {
+    val result: NodeBuffer = new NodeBuffer
+
+    // This is made sure on caller side but put it here to be defensive
+    require(query.lastProgress.stateOperators.nonEmpty)
+    query.lastProgress.stateOperators.head.customMetrics.keySet().asScala
+      .filter(m => enabledCustomMetrics.contains(m.toLowerCase(Locale.ROOT))).map { metricName =>
+        val data = query.recentProgress.map(p => (parseProgressTimestamp(p.timestamp),
+          p.stateOperators.map(_.customMetrics.get(metricName).toDouble).sum))
+        val max = data.maxBy(_._2)._2
+        val metric = supportedCustomMetrics.find(_.name.equalsIgnoreCase(metricName)).get
+
+        val graphUIData =
+          new GraphUIData(
+            s"aggregated-$metricName-timeline",
+            s"aggregated-$metricName-histogram",
+            data,
+            minBatchTime,
+            maxBatchTime,
+            0,
+            max,
+            "")
+        graphUIData.generateDataJs(jsCollector)
+
+        result ++=
+          // scalastyle:off
+          <tr>
+            <td style="vertical-align: middle;">
+              <div style="width: 240px;">
+                <div><strong>Aggregated Custom Metric {s"$metricName"} {SparkUIUtils.tooltip(metric.desc, "right")}</strong></div>
+              </div>
+            </td>
+            <td class={s"aggregated-$metricName-timeline"}>{graphUIData.generateTimelineHtml(jsCollector)}</td>
+            <td class={s"aggregated-$metricName-histogram"}>{graphUIData.generateHistogramHtml(jsCollector)}</td>
+          </tr>
+          // scalastyle:on
+      }
+
+    result
+  }
+
   def generateStatTable(query: StreamingQueryUIData): Seq[Node] = {
     val batchToTimestamps = withNoProgress(query,
       query.recentProgress.map(p => (p.batchId, parseProgressTimestamp(p.timestamp))),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala
index 640c21c52a146..c2b6688faf0e7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala
@@ -24,8 +24,10 @@ import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS}
 import org.scalatest.BeforeAndAfter
 import scala.xml.Node
 
+import org.apache.spark.SparkConf
 import org.apache.spark.sql.streaming.StreamingQueryProgress
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.ui.SparkUI
 
 class StreamingQueryPageSuite extends SharedSparkSession with BeforeAndAfter {
 
@@ -65,10 +67,13 @@ class StreamingQueryPageSuite extends SharedSparkSession with BeforeAndAfter {
     val request = mock(classOf[HttpServletRequest])
     val tab = mock(classOf[StreamingQueryTab], RETURNS_SMART_NULLS)
     val statusListener = mock(classOf[StreamingQueryStatusListener], RETURNS_SMART_NULLS)
+    val ui = mock(classOf[SparkUI])
     when(request.getParameter("id")).thenReturn(id.toString)
     when(tab.appName).thenReturn("testing")
     when(tab.headerTabs).thenReturn(Seq.empty)
     when(tab.statusListener).thenReturn(statusListener)
+    when(ui.conf).thenReturn(new SparkConf())
+    when(tab.parent).thenReturn(ui)
 
     val streamQuery = createStreamQueryUIData(id)
     when(statusListener.allQueryStatus).thenReturn(Seq(streamQuery))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala
index 307479db33949..94844c4e87a84 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala
@@ -31,6 +31,7 @@ import org.apache.spark.internal.config.UI.{UI_ENABLED, UI_PORT}
 import org.apache.spark.sql.LocalSparkSession.withSparkSession
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.util.quietly
+import org.apache.spark.sql.internal.StaticSQLConf.ENABLED_STREAMING_UI_CUSTOM_METRIC_LIST
 import org.apache.spark.sql.streaming.StreamingQueryException
 import org.apache.spark.ui.SparkUICssErrorHandler
 
@@ -53,6 +54,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
       .setAppName("ui-test")
       .set(UI_ENABLED, true)
       .set(UI_PORT, 0)
+      .set(ENABLED_STREAMING_UI_CUSTOM_METRIC_LIST, Seq("stateOnCurrentVersionSizeBytes"))
     additionalConfs.foreach { case (k, v) => conf.set(k, v) }
     val spark = SparkSession.builder().master(master).config(conf).getOrCreate()
     assert(spark.sparkContext.ui.isDefined)
@@ -140,6 +142,10 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
             summaryText should contain ("Aggregated Number Of Updated State Rows (?)")
             summaryText should contain ("Aggregated State Memory Used In Bytes (?)")
             summaryText should contain ("Aggregated Number Of Rows Dropped By Watermark (?)")
+            summaryText should contain ("Aggregated Custom Metric stateOnCurrentVersionSizeBytes" +
+              " (?)")
+            summaryText should not contain ("Aggregated Custom Metric loadedMapCacheHitCount (?)")
+            summaryText should not contain ("Aggregated Custom Metric loadedMapCacheMissCount (?)")
           }
         } finally {
           spark.streams.active.foreach(_.stop())

From 665817bd4fc07b18cee0f8c6ff759288472514c2 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Wed, 25 Nov 2020 09:27:04 +0900
Subject: [PATCH 0562/1009] [SPARK-33457][PYTHON] Adjust mypy configuration

### What changes were proposed in this pull request?

This pull request:

- Adds following flags to the main mypy configuration:
  - [`strict_optional`](https://mypy.readthedocs.io/en/stable/config_file.html#confval-strict_optional)
  - [`no_implicit_optional`](https://mypy.readthedocs.io/en/stable/config_file.html#confval-no_implicit_optional)
  - [`disallow_untyped_defs`](https://mypy.readthedocs.io/en/stable/config_file.html#confval-disallow_untyped_calls)

These flags are enabled only for public API and disabled for tests and internal modules.

Additionally, these PR fixes missing annotations.

### Why are the changes needed?

Primary reason to propose this changes is to use standard configuration as used by typeshed project. This will allow us to be more strict, especially when interacting with JVM code. See for example https://github.com/apache/spark/pull/29122#pullrequestreview-513112882

Additionally, it will allow us to detect cases where annotations have unintentionally omitted.

### Does this PR introduce _any_ user-facing change?

Annotations only.

### How was this patch tested?

`dev/lint-python`.

Closes #30382 from zero323/SPARK-33457.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/mypy.ini                           | 87 +++++++++++++++++++++++
 python/pyspark/broadcast.pyi              | 10 +--
 python/pyspark/context.pyi                | 25 +++++--
 python/pyspark/ml/classification.pyi      |  6 +-
 python/pyspark/ml/common.pyi              | 10 ++-
 python/pyspark/ml/evaluation.pyi          | 24 ++++---
 python/pyspark/ml/feature.pyi             | 20 ++++--
 python/pyspark/ml/linalg/__init__.pyi     | 36 +++++-----
 python/pyspark/ml/pipeline.pyi            |  4 +-
 python/pyspark/ml/regression.pyi          | 10 +--
 python/pyspark/mllib/classification.pyi   |  2 +-
 python/pyspark/mllib/clustering.pyi       |  6 +-
 python/pyspark/mllib/common.pyi           | 20 ++++--
 python/pyspark/mllib/linalg/__init__.pyi  | 45 +++++++-----
 python/pyspark/mllib/random.pyi           |  2 +-
 python/pyspark/mllib/recommendation.pyi   |  4 +-
 python/pyspark/mllib/stat/_statistics.pyi |  2 +-
 python/pyspark/rdd.pyi                    |  8 ++-
 python/pyspark/resource/profile.pyi       |  2 +-
 python/pyspark/sql/column.pyi             |  8 ++-
 python/pyspark/sql/context.pyi            |  6 +-
 python/pyspark/sql/functions.pyi          |  8 ++-
 python/pyspark/sql/session.pyi            | 10 ++-
 python/pyspark/sql/types.pyi              | 15 ++--
 python/pyspark/sql/udf.pyi                |  7 +-
 python/pyspark/streaming/context.pyi      |  2 +-
 python/pyspark/streaming/dstream.pyi      | 10 ++-
 python/pyspark/streaming/kinesis.pyi      |  2 +-
 28 files changed, 277 insertions(+), 114 deletions(-)

diff --git a/python/mypy.ini b/python/mypy.ini
index 4a5368a519097..5103452a053be 100644
--- a/python/mypy.ini
+++ b/python/mypy.ini
@@ -16,10 +16,97 @@
 ;
 
 [mypy]
+strict_optional = True
+no_implicit_optional = True
+disallow_untyped_defs = True
+
+; Allow untyped def in internal modules and tests
+
+[mypy-pyspark.daemon]
+disallow_untyped_defs = False
+
+[mypy-pyspark.find_spark_home]
+disallow_untyped_defs = False
+
+[mypy-pyspark._globals]
+disallow_untyped_defs = False
+
+[mypy-pyspark.install]
+disallow_untyped_defs = False
+
+[mypy-pyspark.java_gateway]
+disallow_untyped_defs = False
+
+[mypy-pyspark.join]
+disallow_untyped_defs = False
+
+[mypy-pyspark.ml.tests.*]
+disallow_untyped_defs = False
+
+[mypy-pyspark.mllib.tests.*]
+disallow_untyped_defs = False
+
+[mypy-pyspark.rddsampler]
+disallow_untyped_defs = False
+
+[mypy-pyspark.resource.tests.*]
+disallow_untyped_defs = False
+
+[mypy-pyspark.serializers]
+disallow_untyped_defs = False
+
+[mypy-pyspark.shuffle]
+disallow_untyped_defs = False
+
+[mypy-pyspark.streaming.tests.*]
+disallow_untyped_defs = False
+
+[mypy-pyspark.streaming.util]
+disallow_untyped_defs = False
+
+[mypy-pyspark.sql.tests.*]
+disallow_untyped_defs = False
+
+[mypy-pyspark.sql.pandas.serializers]
+disallow_untyped_defs = False
+
+[mypy-pyspark.sql.pandas.types]
+disallow_untyped_defs = False
+
+[mypy-pyspark.sql.pandas.typehints]
+disallow_untyped_defs = False
+
+[mypy-pyspark.sql.pandas.utils]
+disallow_untyped_defs = False
+
+[mypy-pyspark.sql.pandas._typing.protocols.*]
+disallow_untyped_defs = False
+
+[mypy-pyspark.sql.utils]
+disallow_untyped_defs = False
+
+[mypy-pyspark.tests.*]
+disallow_untyped_defs = False
+
+[mypy-pyspark.testing.*]
+disallow_untyped_defs = False
+
+[mypy-pyspark.traceback_utils]
+disallow_untyped_defs = False
+
+[mypy-pyspark.util]
+disallow_untyped_defs = False
+
+[mypy-pyspark.worker]
+disallow_untyped_defs = False
+
+; Ignore errors in embedded third party code
 
 [mypy-pyspark.cloudpickle.*]
 ignore_errors = True
 
+; Ignore missing imports for external untyped packages
+
 [mypy-py4j.*]
 ignore_missing_imports = True
 
diff --git a/python/pyspark/broadcast.pyi b/python/pyspark/broadcast.pyi
index 4b019a509a003..944cb06d4178c 100644
--- a/python/pyspark/broadcast.pyi
+++ b/python/pyspark/broadcast.pyi
@@ -17,7 +17,7 @@
 # under the License.
 
 import threading
-from typing import Any, Dict, Generic, Optional, TypeVar
+from typing import Any, Callable, Dict, Generic, Optional, Tuple, TypeVar
 
 T = TypeVar("T")
 
@@ -32,14 +32,14 @@ class Broadcast(Generic[T]):
         path: Optional[Any] = ...,
         sock_file: Optional[Any] = ...,
     ) -> None: ...
-    def dump(self, value: Any, f: Any) -> None: ...
-    def load_from_path(self, path: Any): ...
-    def load(self, file: Any): ...
+    def dump(self, value: T, f: Any) -> None: ...
+    def load_from_path(self, path: Any) -> T: ...
+    def load(self, file: Any) -> T: ...
     @property
     def value(self) -> T: ...
     def unpersist(self, blocking: bool = ...) -> None: ...
     def destroy(self, blocking: bool = ...) -> None: ...
-    def __reduce__(self): ...
+    def __reduce__(self) -> Tuple[Callable[[int], T], Tuple[int]]: ...
 
 class BroadcastPickleRegistry(threading.local):
     def __init__(self) -> None: ...
diff --git a/python/pyspark/context.pyi b/python/pyspark/context.pyi
index 2789a38b3be9f..640a69cad08ab 100644
--- a/python/pyspark/context.pyi
+++ b/python/pyspark/context.pyi
@@ -16,7 +16,19 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, TypeVar
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Iterable,
+    List,
+    NoReturn,
+    Optional,
+    Tuple,
+    Type,
+    TypeVar,
+)
+from types import TracebackType
 
 from py4j.java_gateway import JavaGateway, JavaObject  # type: ignore[import]
 
@@ -51,9 +63,14 @@ class SparkContext:
         jsc: Optional[JavaObject] = ...,
         profiler_cls: type = ...,
     ) -> None: ...
-    def __getnewargs__(self): ...
-    def __enter__(self): ...
-    def __exit__(self, type, value, trace): ...
+    def __getnewargs__(self) -> NoReturn: ...
+    def __enter__(self) -> SparkContext: ...
+    def __exit__(
+        self,
+        type: Optional[Type[BaseException]],
+        value: Optional[BaseException],
+        trace: Optional[TracebackType],
+    ) -> None: ...
     @classmethod
     def getOrCreate(cls, conf: Optional[SparkConf] = ...) -> SparkContext: ...
     def setLogLevel(self, logLevel: str) -> None: ...
diff --git a/python/pyspark/ml/classification.pyi b/python/pyspark/ml/classification.pyi
index 4bde851bb1e0d..c44176a13a69b 100644
--- a/python/pyspark/ml/classification.pyi
+++ b/python/pyspark/ml/classification.pyi
@@ -107,7 +107,7 @@ class _JavaProbabilisticClassifier(
 class _JavaProbabilisticClassificationModel(
     ProbabilisticClassificationModel, _JavaClassificationModel[T]
 ):
-    def predictProbability(self, value: Any): ...
+    def predictProbability(self, value: Vector) -> Vector: ...
 
 class _ClassificationSummary(JavaWrapper):
     @property
@@ -543,7 +543,7 @@ class RandomForestClassificationModel(
     @property
     def trees(self) -> List[DecisionTreeClassificationModel]: ...
     def summary(self) -> RandomForestClassificationTrainingSummary: ...
-    def evaluate(self, dataset) -> RandomForestClassificationSummary: ...
+    def evaluate(self, dataset: DataFrame) -> RandomForestClassificationSummary: ...
 
 class RandomForestClassificationSummary(_ClassificationSummary): ...
 class RandomForestClassificationTrainingSummary(
@@ -891,7 +891,7 @@ class FMClassifier(
         solver: str = ...,
         thresholds: Optional[Any] = ...,
         seed: Optional[Any] = ...,
-    ): ...
+    ) -> FMClassifier: ...
     def setFactorSize(self, value: int) -> FMClassifier: ...
     def setFitLinear(self, value: bool) -> FMClassifier: ...
     def setMiniBatchFraction(self, value: float) -> FMClassifier: ...
diff --git a/python/pyspark/ml/common.pyi b/python/pyspark/ml/common.pyi
index 7bf0ed6183d8a..a38fc5734f466 100644
--- a/python/pyspark/ml/common.pyi
+++ b/python/pyspark/ml/common.pyi
@@ -16,5 +16,11 @@
 # specific language governing permissions and limitations
 # under the License.
 
-def callJavaFunc(sc, func, *args): ...
-def inherit_doc(cls): ...
+from typing import Any, TypeVar
+
+import pyspark.context
+
+C = TypeVar("C", bound=type)
+
+def callJavaFunc(sc: pyspark.context.SparkContext, func: Any, *args: Any) -> Any: ...
+def inherit_doc(cls: C) -> C: ...
diff --git a/python/pyspark/ml/evaluation.pyi b/python/pyspark/ml/evaluation.pyi
index ea0a9f045cd6a..55a3ae2774115 100644
--- a/python/pyspark/ml/evaluation.pyi
+++ b/python/pyspark/ml/evaluation.pyi
@@ -39,9 +39,12 @@ from pyspark.ml.param.shared import (
     HasWeightCol,
 )
 from pyspark.ml.util import JavaMLReadable, JavaMLWritable
+from pyspark.sql.dataframe import DataFrame
 
 class Evaluator(Params, metaclass=abc.ABCMeta):
-    def evaluate(self, dataset, params: Optional[ParamMap] = ...) -> float: ...
+    def evaluate(
+        self, dataset: DataFrame, params: Optional[ParamMap] = ...
+    ) -> float: ...
     def isLargerBetter(self) -> bool: ...
 
 class JavaEvaluator(JavaParams, Evaluator, metaclass=abc.ABCMeta):
@@ -75,16 +78,15 @@ class BinaryClassificationEvaluator(
     def setLabelCol(self, value: str) -> BinaryClassificationEvaluator: ...
     def setRawPredictionCol(self, value: str) -> BinaryClassificationEvaluator: ...
     def setWeightCol(self, value: str) -> BinaryClassificationEvaluator: ...
-
-def setParams(
-    self,
-    *,
-    rawPredictionCol: str = ...,
-    labelCol: str = ...,
-    metricName: BinaryClassificationEvaluatorMetricType = ...,
-    weightCol: Optional[str] = ...,
-    numBins: int = ...
-) -> BinaryClassificationEvaluator: ...
+    def setParams(
+        self,
+        *,
+        rawPredictionCol: str = ...,
+        labelCol: str = ...,
+        metricName: BinaryClassificationEvaluatorMetricType = ...,
+        weightCol: Optional[str] = ...,
+        numBins: int = ...
+    ) -> BinaryClassificationEvaluator: ...
 
 class RegressionEvaluator(
     JavaEvaluator,
diff --git a/python/pyspark/ml/feature.pyi b/python/pyspark/ml/feature.pyi
index f5b12a5b2ffc6..4999defdf8a70 100644
--- a/python/pyspark/ml/feature.pyi
+++ b/python/pyspark/ml/feature.pyi
@@ -100,9 +100,9 @@ class _LSHParams(HasInputCol, HasOutputCol):
     def getNumHashTables(self) -> int: ...
 
 class _LSH(Generic[JM], JavaEstimator[JM], _LSHParams, JavaMLReadable, JavaMLWritable):
-    def setNumHashTables(self: P, value) -> P: ...
-    def setInputCol(self: P, value) -> P: ...
-    def setOutputCol(self: P, value) -> P: ...
+    def setNumHashTables(self: P, value: int) -> P: ...
+    def setInputCol(self: P, value: str) -> P: ...
+    def setOutputCol(self: P, value: str) -> P: ...
 
 class _LSHModel(JavaModel, _LSHParams):
     def setInputCol(self: P, value: str) -> P: ...
@@ -1518,7 +1518,7 @@ class ChiSqSelector(
         fpr: float = ...,
         fdr: float = ...,
         fwe: float = ...
-    ): ...
+    ) -> ChiSqSelector: ...
     def setSelectorType(self, value: str) -> ChiSqSelector: ...
     def setNumTopFeatures(self, value: int) -> ChiSqSelector: ...
     def setPercentile(self, value: float) -> ChiSqSelector: ...
@@ -1602,7 +1602,10 @@ class _VarianceThresholdSelectorParams(HasFeaturesCol, HasOutputCol):
     def getVarianceThreshold(self) -> float: ...
 
 class VarianceThresholdSelector(
-    JavaEstimator, _VarianceThresholdSelectorParams, JavaMLReadable, JavaMLWritable
+    JavaEstimator[VarianceThresholdSelectorModel],
+    _VarianceThresholdSelectorParams,
+    JavaMLReadable[VarianceThresholdSelector],
+    JavaMLWritable,
 ):
     def __init__(
         self,
@@ -1615,13 +1618,16 @@ class VarianceThresholdSelector(
         featuresCol: str = ...,
         outputCol: Optional[str] = ...,
         varianceThreshold: float = ...,
-    ): ...
+    ) -> VarianceThresholdSelector: ...
     def setVarianceThreshold(self, value: float) -> VarianceThresholdSelector: ...
     def setFeaturesCol(self, value: str) -> VarianceThresholdSelector: ...
     def setOutputCol(self, value: str) -> VarianceThresholdSelector: ...
 
 class VarianceThresholdSelectorModel(
-    JavaModel, _VarianceThresholdSelectorParams, JavaMLReadable, JavaMLWritable
+    JavaModel,
+    _VarianceThresholdSelectorParams,
+    JavaMLReadable[VarianceThresholdSelectorModel],
+    JavaMLWritable,
 ):
     def setFeaturesCol(self, value: str) -> VarianceThresholdSelectorModel: ...
     def setOutputCol(self, value: str) -> VarianceThresholdSelectorModel: ...
diff --git a/python/pyspark/ml/linalg/__init__.pyi b/python/pyspark/ml/linalg/__init__.pyi
index a576b30aec308..b4fba8823b678 100644
--- a/python/pyspark/ml/linalg/__init__.pyi
+++ b/python/pyspark/ml/linalg/__init__.pyi
@@ -17,7 +17,7 @@
 # under the License.
 
 from typing import overload
-from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
+from typing import Any, Dict, Iterable, List, NoReturn, Optional, Tuple, Type, Union
 
 from pyspark.ml import linalg as newlinalg  # noqa: F401
 from pyspark.sql.types import StructType, UserDefinedType
@@ -45,7 +45,7 @@ class MatrixUDT(UserDefinedType):
     @classmethod
     def scalaUDT(cls) -> str: ...
     def serialize(
-        self, obj
+        self, obj: Matrix
     ) -> Tuple[
         int, int, int, Optional[List[int]], Optional[List[int]], List[float], bool
     ]: ...
@@ -64,9 +64,7 @@ class DenseVector(Vector):
     def __init__(self, __arr: bytes) -> None: ...
     @overload
     def __init__(self, __arr: Iterable[float]) -> None: ...
-    @staticmethod
-    def parse(s) -> DenseVector: ...
-    def __reduce__(self) -> Tuple[type, bytes]: ...
+    def __reduce__(self) -> Tuple[Type[DenseVector], bytes]: ...
     def numNonzeros(self) -> int: ...
     def norm(self, p: Union[float, str]) -> float64: ...
     def dot(self, other: Iterable[float]) -> float64: ...
@@ -112,16 +110,14 @@ class SparseVector(Vector):
     def __init__(self, size: int, __map: Dict[int, float]) -> None: ...
     def numNonzeros(self) -> int: ...
     def norm(self, p: Union[float, str]) -> float64: ...
-    def __reduce__(self): ...
-    @staticmethod
-    def parse(s: str) -> SparseVector: ...
+    def __reduce__(self) -> Tuple[Type[SparseVector], Tuple[int, bytes, bytes]]: ...
     def dot(self, other: Iterable[float]) -> float64: ...
     def squared_distance(self, other: Iterable[float]) -> float64: ...
     def toArray(self) -> ndarray: ...
     def __len__(self) -> int: ...
-    def __eq__(self, other) -> bool: ...
+    def __eq__(self, other: Any) -> bool: ...
     def __getitem__(self, index: int) -> float64: ...
-    def __ne__(self, other) -> bool: ...
+    def __ne__(self, other: Any) -> bool: ...
     def __hash__(self) -> int: ...
 
 class Vectors:
@@ -144,13 +140,13 @@ class Vectors:
     def sparse(size: int, __map: Dict[int, float]) -> SparseVector: ...
     @overload
     @staticmethod
-    def dense(self, *elements: float) -> DenseVector: ...
+    def dense(*elements: float) -> DenseVector: ...
     @overload
     @staticmethod
-    def dense(self, __arr: bytes) -> DenseVector: ...
+    def dense(__arr: bytes) -> DenseVector: ...
     @overload
     @staticmethod
-    def dense(self, __arr: Iterable[float]) -> DenseVector: ...
+    def dense(__arr: Iterable[float]) -> DenseVector: ...
     @staticmethod
     def stringify(vector: Vector) -> str: ...
     @staticmethod
@@ -158,8 +154,6 @@ class Vectors:
     @staticmethod
     def norm(vector: Vector, p: Union[float, str]) -> float64: ...
     @staticmethod
-    def parse(s: str) -> Vector: ...
-    @staticmethod
     def zeros(size: int) -> DenseVector: ...
 
 class Matrix:
@@ -170,7 +164,7 @@ class Matrix:
     def __init__(
         self, numRows: int, numCols: int, isTransposed: bool = ...
     ) -> None: ...
-    def toArray(self): ...
+    def toArray(self) -> NoReturn: ...
 
 class DenseMatrix(Matrix):
     values: Any
@@ -186,11 +180,11 @@ class DenseMatrix(Matrix):
         values: Iterable[float],
         isTransposed: bool = ...,
     ) -> None: ...
-    def __reduce__(self) -> Tuple[type, Tuple[int, int, bytes, int]]: ...
+    def __reduce__(self) -> Tuple[Type[DenseMatrix], Tuple[int, int, bytes, int]]: ...
     def toArray(self) -> ndarray: ...
     def toSparse(self) -> SparseMatrix: ...
     def __getitem__(self, indices: Tuple[int, int]) -> float64: ...
-    def __eq__(self, other) -> bool: ...
+    def __eq__(self, other: Any) -> bool: ...
 
 class SparseMatrix(Matrix):
     colPtrs: ndarray
@@ -216,11 +210,13 @@ class SparseMatrix(Matrix):
         values: Iterable[float],
         isTransposed: bool = ...,
     ) -> None: ...
-    def __reduce__(self) -> Tuple[type, Tuple[int, int, bytes, bytes, bytes, int]]: ...
+    def __reduce__(
+        self,
+    ) -> Tuple[Type[SparseMatrix], Tuple[int, int, bytes, bytes, bytes, int]]: ...
     def __getitem__(self, indices: Tuple[int, int]) -> float64: ...
     def toArray(self) -> ndarray: ...
     def toDense(self) -> DenseMatrix: ...
-    def __eq__(self, other) -> bool: ...
+    def __eq__(self, other: Any) -> bool: ...
 
 class Matrices:
     @overload
diff --git a/python/pyspark/ml/pipeline.pyi b/python/pyspark/ml/pipeline.pyi
index 44680586d70d1..f47e9e012ae14 100644
--- a/python/pyspark/ml/pipeline.pyi
+++ b/python/pyspark/ml/pipeline.pyi
@@ -51,7 +51,7 @@ class PipelineWriter(MLWriter):
     def __init__(self, instance: Pipeline) -> None: ...
     def saveImpl(self, path: str) -> None: ...
 
-class PipelineReader(MLReader):
+class PipelineReader(MLReader[Pipeline]):
     cls: Type[Pipeline]
     def __init__(self, cls: Type[Pipeline]) -> None: ...
     def load(self, path: str) -> Pipeline: ...
@@ -61,7 +61,7 @@ class PipelineModelWriter(MLWriter):
     def __init__(self, instance: PipelineModel) -> None: ...
     def saveImpl(self, path: str) -> None: ...
 
-class PipelineModelReader(MLReader):
+class PipelineModelReader(MLReader[PipelineModel]):
     cls: Type[PipelineModel]
     def __init__(self, cls: Type[PipelineModel]) -> None: ...
     def load(self, path: str) -> PipelineModel: ...
diff --git a/python/pyspark/ml/regression.pyi b/python/pyspark/ml/regression.pyi
index 5cb0e7a5092f7..b8f1e61859c72 100644
--- a/python/pyspark/ml/regression.pyi
+++ b/python/pyspark/ml/regression.pyi
@@ -414,7 +414,7 @@ class RandomForestRegressionModel(
     _TreeEnsembleModel,
     _RandomForestRegressorParams,
     JavaMLWritable,
-    JavaMLReadable,
+    JavaMLReadable[RandomForestRegressionModel],
 ):
     @property
     def trees(self) -> List[DecisionTreeRegressionModel]: ...
@@ -749,10 +749,10 @@ class _FactorizationMachinesParams(
     initStd: Param[float]
     solver: Param[str]
     def __init__(self, *args: Any): ...
-    def getFactorSize(self): ...
-    def getFitLinear(self): ...
-    def getMiniBatchFraction(self): ...
-    def getInitStd(self): ...
+    def getFactorSize(self) -> int: ...
+    def getFitLinear(self) -> bool: ...
+    def getMiniBatchFraction(self) -> float: ...
+    def getInitStd(self) -> float: ...
 
 class FMRegressor(
     _JavaRegressor[FMRegressionModel],
diff --git a/python/pyspark/mllib/classification.pyi b/python/pyspark/mllib/classification.pyi
index c51882c87bfc2..967b0a9f289dd 100644
--- a/python/pyspark/mllib/classification.pyi
+++ b/python/pyspark/mllib/classification.pyi
@@ -118,7 +118,7 @@ class NaiveBayesModel(Saveable, Loader[NaiveBayesModel]):
     labels: ndarray
     pi: ndarray
     theta: ndarray
-    def __init__(self, labels, pi, theta) -> None: ...
+    def __init__(self, labels: ndarray, pi: ndarray, theta: ndarray) -> None: ...
     @overload
     def predict(self, x: VectorLike) -> float64: ...
     @overload
diff --git a/python/pyspark/mllib/clustering.pyi b/python/pyspark/mllib/clustering.pyi
index 1c3eba17e201c..b4f349612f0fe 100644
--- a/python/pyspark/mllib/clustering.pyi
+++ b/python/pyspark/mllib/clustering.pyi
@@ -63,7 +63,7 @@ class BisectingKMeans:
 
 class KMeansModel(Saveable, Loader[KMeansModel]):
     centers: List[ndarray]
-    def __init__(self, centers: List[ndarray]) -> None: ...
+    def __init__(self, centers: List[VectorLike]) -> None: ...
     @property
     def clusterCenters(self) -> List[ndarray]: ...
     @property
@@ -144,7 +144,9 @@ class PowerIterationClustering:
     class Assignment(NamedTuple("Assignment", [("id", int), ("cluster", int)])): ...
 
 class StreamingKMeansModel(KMeansModel):
-    def __init__(self, clusterCenters, clusterWeights) -> None: ...
+    def __init__(
+        self, clusterCenters: List[VectorLike], clusterWeights: VectorLike
+    ) -> None: ...
     @property
     def clusterWeights(self) -> List[float64]: ...
     centers: ndarray
diff --git a/python/pyspark/mllib/common.pyi b/python/pyspark/mllib/common.pyi
index 1df308b91b5a1..daba212d93633 100644
--- a/python/pyspark/mllib/common.pyi
+++ b/python/pyspark/mllib/common.pyi
@@ -16,12 +16,20 @@
 # specific language governing permissions and limitations
 # under the License.
 
-def callJavaFunc(sc, func, *args): ...
-def callMLlibFunc(name, *args): ...
+from typing import Any, TypeVar
+
+import pyspark.context
+
+from py4j.java_gateway import JavaObject
+
+C = TypeVar("C", bound=type)
+
+def callJavaFunc(sc: pyspark.context.SparkContext, func: Any, *args: Any) -> Any: ...
+def callMLlibFunc(name: str, *args: Any) -> Any: ...
 
 class JavaModelWrapper:
-    def __init__(self, java_model) -> None: ...
-    def __del__(self): ...
-    def call(self, name, *a): ...
+    def __init__(self, java_model: JavaObject) -> None: ...
+    def __del__(self) -> None: ...
+    def call(self, name: str, *a: Any) -> Any: ...
 
-def inherit_doc(cls): ...
+def inherit_doc(cls: C) -> C: ...
diff --git a/python/pyspark/mllib/linalg/__init__.pyi b/python/pyspark/mllib/linalg/__init__.pyi
index c0719c535c8f4..60d16b26f3590 100644
--- a/python/pyspark/mllib/linalg/__init__.pyi
+++ b/python/pyspark/mllib/linalg/__init__.pyi
@@ -17,7 +17,18 @@
 # under the License.
 
 from typing import overload
-from typing import Any, Dict, Generic, Iterable, List, Optional, Tuple, TypeVar, Union
+from typing import (
+    Any,
+    Dict,
+    Generic,
+    Iterable,
+    List,
+    Optional,
+    Tuple,
+    Type,
+    TypeVar,
+    Union,
+)
 from pyspark.ml import linalg as newlinalg
 from pyspark.sql.types import StructType, UserDefinedType
 from numpy import float64, ndarray  # type: ignore[import]
@@ -46,7 +57,7 @@ class MatrixUDT(UserDefinedType):
     @classmethod
     def scalaUDT(cls) -> str: ...
     def serialize(
-        self, obj
+        self, obj: Matrix
     ) -> Tuple[
         int, int, int, Optional[List[int]], Optional[List[int]], List[float], bool
     ]: ...
@@ -67,8 +78,8 @@ class DenseVector(Vector):
     @overload
     def __init__(self, __arr: Iterable[float]) -> None: ...
     @staticmethod
-    def parse(s) -> DenseVector: ...
-    def __reduce__(self) -> Tuple[type, bytes]: ...
+    def parse(s: str) -> DenseVector: ...
+    def __reduce__(self) -> Tuple[Type[DenseVector], bytes]: ...
     def numNonzeros(self) -> int: ...
     def norm(self, p: Union[float, str]) -> float64: ...
     def dot(self, other: Iterable[float]) -> float64: ...
@@ -115,7 +126,7 @@ class SparseVector(Vector):
     def __init__(self, size: int, __map: Dict[int, float]) -> None: ...
     def numNonzeros(self) -> int: ...
     def norm(self, p: Union[float, str]) -> float64: ...
-    def __reduce__(self): ...
+    def __reduce__(self) -> Tuple[Type[SparseVector], Tuple[int, bytes, bytes]]: ...
     @staticmethod
     def parse(s: str) -> SparseVector: ...
     def dot(self, other: Iterable[float]) -> float64: ...
@@ -123,9 +134,9 @@ class SparseVector(Vector):
     def toArray(self) -> ndarray: ...
     def asML(self) -> newlinalg.SparseVector: ...
     def __len__(self) -> int: ...
-    def __eq__(self, other) -> bool: ...
+    def __eq__(self, other: Any) -> bool: ...
     def __getitem__(self, index: int) -> float64: ...
-    def __ne__(self, other) -> bool: ...
+    def __ne__(self, other: Any) -> bool: ...
     def __hash__(self) -> int: ...
 
 class Vectors:
@@ -148,13 +159,13 @@ class Vectors:
     def sparse(size: int, __map: Dict[int, float]) -> SparseVector: ...
     @overload
     @staticmethod
-    def dense(self, *elements: float) -> DenseVector: ...
+    def dense(*elements: float) -> DenseVector: ...
     @overload
     @staticmethod
-    def dense(self, __arr: bytes) -> DenseVector: ...
+    def dense(__arr: bytes) -> DenseVector: ...
     @overload
     @staticmethod
-    def dense(self, __arr: Iterable[float]) -> DenseVector: ...
+    def dense(__arr: Iterable[float]) -> DenseVector: ...
     @staticmethod
     def fromML(vec: newlinalg.DenseVector) -> DenseVector: ...
     @staticmethod
@@ -176,8 +187,8 @@ class Matrix:
     def __init__(
         self, numRows: int, numCols: int, isTransposed: bool = ...
     ) -> None: ...
-    def toArray(self): ...
-    def asML(self): ...
+    def toArray(self) -> ndarray: ...
+    def asML(self) -> newlinalg.Matrix: ...
 
 class DenseMatrix(Matrix):
     values: Any
@@ -193,12 +204,12 @@ class DenseMatrix(Matrix):
         values: Iterable[float],
         isTransposed: bool = ...,
     ) -> None: ...
-    def __reduce__(self) -> Tuple[type, Tuple[int, int, bytes, int]]: ...
+    def __reduce__(self) -> Tuple[Type[DenseMatrix], Tuple[int, int, bytes, int]]: ...
     def toArray(self) -> ndarray: ...
     def toSparse(self) -> SparseMatrix: ...
     def asML(self) -> newlinalg.DenseMatrix: ...
     def __getitem__(self, indices: Tuple[int, int]) -> float64: ...
-    def __eq__(self, other) -> bool: ...
+    def __eq__(self, other: Any) -> bool: ...
 
 class SparseMatrix(Matrix):
     colPtrs: ndarray
@@ -224,12 +235,14 @@ class SparseMatrix(Matrix):
         values: Iterable[float],
         isTransposed: bool = ...,
     ) -> None: ...
-    def __reduce__(self) -> Tuple[type, Tuple[int, int, bytes, bytes, bytes, int]]: ...
+    def __reduce__(
+        self,
+    ) -> Tuple[Type[SparseMatrix], Tuple[int, int, bytes, bytes, bytes, int]]: ...
     def __getitem__(self, indices: Tuple[int, int]) -> float64: ...
     def toArray(self) -> ndarray: ...
     def toDense(self) -> DenseMatrix: ...
     def asML(self) -> newlinalg.SparseMatrix: ...
-    def __eq__(self, other) -> bool: ...
+    def __eq__(self, other: Any) -> bool: ...
 
 class Matrices:
     @overload
diff --git a/python/pyspark/mllib/random.pyi b/python/pyspark/mllib/random.pyi
index dc5f4701614da..ec83170625c74 100644
--- a/python/pyspark/mllib/random.pyi
+++ b/python/pyspark/mllib/random.pyi
@@ -90,7 +90,7 @@ class RandomRDDs:
     def logNormalVectorRDD(
         sc: SparkContext,
         mean: float,
-        std,
+        std: float,
         numRows: int,
         numCols: int,
         numPartitions: Optional[int] = ...,
diff --git a/python/pyspark/mllib/recommendation.pyi b/python/pyspark/mllib/recommendation.pyi
index e2f15494209e9..4fea0acf3c1f9 100644
--- a/python/pyspark/mllib/recommendation.pyi
+++ b/python/pyspark/mllib/recommendation.pyi
@@ -16,7 +16,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from typing import List, Optional, Tuple, Union
+from typing import List, Optional, Tuple, Type, Union
 
 import array
 from collections import namedtuple
@@ -27,7 +27,7 @@ from pyspark.mllib.common import JavaModelWrapper
 from pyspark.mllib.util import JavaLoader, JavaSaveable
 
 class Rating(namedtuple("Rating", ["user", "product", "rating"])):
-    def __reduce__(self): ...
+    def __reduce__(self) -> Tuple[Type[Rating], Tuple[int, int, float]]: ...
 
 class MatrixFactorizationModel(
     JavaModelWrapper, JavaSaveable, JavaLoader[MatrixFactorizationModel]
diff --git a/python/pyspark/mllib/stat/_statistics.pyi b/python/pyspark/mllib/stat/_statistics.pyi
index 4d2701d486881..3834d51639eb2 100644
--- a/python/pyspark/mllib/stat/_statistics.pyi
+++ b/python/pyspark/mllib/stat/_statistics.pyi
@@ -65,5 +65,5 @@ class Statistics:
     def chiSqTest(observed: RDD[LabeledPoint]) -> List[ChiSqTestResult]: ...
     @staticmethod
     def kolmogorovSmirnovTest(
-        data, distName: Literal["norm"] = ..., *params: float
+        data: RDD[float], distName: Literal["norm"] = ..., *params: float
     ) -> KolmogorovSmirnovTestResult: ...
diff --git a/python/pyspark/rdd.pyi b/python/pyspark/rdd.pyi
index 35c49e952b0cd..a277cd9f7edae 100644
--- a/python/pyspark/rdd.pyi
+++ b/python/pyspark/rdd.pyi
@@ -85,12 +85,16 @@ class PythonEvalType:
     SQL_COGROUPED_MAP_PANDAS_UDF: PandasCogroupedMapUDFType
 
 class BoundedFloat(float):
-    def __new__(cls, mean: float, confidence: float, low: float, high: float): ...
+    def __new__(
+        cls, mean: float, confidence: float, low: float, high: float
+    ) -> BoundedFloat: ...
 
 class Partitioner:
     numPartitions: int
     partitionFunc: Callable[[Any], int]
-    def __init__(self, numPartitions, partitionFunc) -> None: ...
+    def __init__(
+        self, numPartitions: int, partitionFunc: Callable[[Any], int]
+    ) -> None: ...
     def __eq__(self, other: Any) -> bool: ...
     def __call__(self, k: Any) -> int: ...
 
diff --git a/python/pyspark/resource/profile.pyi b/python/pyspark/resource/profile.pyi
index 6763baf6590a3..04838692436df 100644
--- a/python/pyspark/resource/profile.pyi
+++ b/python/pyspark/resource/profile.pyi
@@ -49,7 +49,7 @@ class ResourceProfileBuilder:
     def __init__(self) -> None: ...
     def require(
         self, resourceRequest: Union[ExecutorResourceRequest, TaskResourceRequests]
-    ): ...
+    ) -> ResourceProfileBuilder: ...
     def clearExecutorResourceRequests(self) -> None: ...
     def clearTaskResourceRequests(self) -> None: ...
     @property
diff --git a/python/pyspark/sql/column.pyi b/python/pyspark/sql/column.pyi
index 0fbb10053fdbf..1f63e65b3de81 100644
--- a/python/pyspark/sql/column.pyi
+++ b/python/pyspark/sql/column.pyi
@@ -32,7 +32,7 @@ from pyspark.sql.window import WindowSpec
 from py4j.java_gateway import JavaObject  # type: ignore[import]
 
 class Column:
-    def __init__(self, JavaObject) -> None: ...
+    def __init__(self, jc: JavaObject) -> None: ...
     def __neg__(self) -> Column: ...
     def __add__(self, other: Union[Column, LiteralType, DecimalLiteral]) -> Column: ...
     def __sub__(self, other: Union[Column, LiteralType, DecimalLiteral]) -> Column: ...
@@ -105,7 +105,11 @@ class Column:
     def name(self, *alias: str) -> Column: ...
     def cast(self, dataType: Union[DataType, str]) -> Column: ...
     def astype(self, dataType: Union[DataType, str]) -> Column: ...
-    def between(self, lowerBound, upperBound) -> Column: ...
+    def between(
+        self,
+        lowerBound: Union[Column, LiteralType, DateTimeLiteral, DecimalLiteral],
+        upperBound: Union[Column, LiteralType, DateTimeLiteral, DecimalLiteral],
+    ) -> Column: ...
     def when(self, condition: Column, value: Any) -> Column: ...
     def otherwise(self, value: Any) -> Column: ...
     def over(self, window: WindowSpec) -> Column: ...
diff --git a/python/pyspark/sql/context.pyi b/python/pyspark/sql/context.pyi
index 64927b37ac2a9..915a0fe1f6709 100644
--- a/python/pyspark/sql/context.pyi
+++ b/python/pyspark/sql/context.pyi
@@ -43,14 +43,14 @@ class SQLContext:
     sparkSession: SparkSession
     def __init__(
         self,
-        sparkContext,
+        sparkContext: SparkContext,
         sparkSession: Optional[SparkSession] = ...,
         jsqlContext: Optional[JavaObject] = ...,
     ) -> None: ...
     @classmethod
     def getOrCreate(cls: type, sc: SparkContext) -> SQLContext: ...
     def newSession(self) -> SQLContext: ...
-    def setConf(self, key: str, value) -> None: ...
+    def setConf(self, key: str, value: Union[bool, int, str]) -> None: ...
     def getConf(self, key: str, defaultValue: Optional[str] = ...) -> str: ...
     @property
     def udf(self) -> UDFRegistration: ...
@@ -116,7 +116,7 @@ class SQLContext:
         path: Optional[str] = ...,
         source: Optional[str] = ...,
         schema: Optional[StructType] = ...,
-        **options
+        **options: str
     ) -> DataFrame: ...
     def sql(self, sqlQuery: str) -> DataFrame: ...
     def table(self, tableName: str) -> DataFrame: ...
diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi
index 281c1d75436c6..252f883b5fb09 100644
--- a/python/pyspark/sql/functions.pyi
+++ b/python/pyspark/sql/functions.pyi
@@ -65,13 +65,13 @@ def round(col: ColumnOrName, scale: int = ...) -> Column: ...
 def bround(col: ColumnOrName, scale: int = ...) -> Column: ...
 def shiftLeft(col: ColumnOrName, numBits: int) -> Column: ...
 def shiftRight(col: ColumnOrName, numBits: int) -> Column: ...
-def shiftRightUnsigned(col, numBits) -> Column: ...
+def shiftRightUnsigned(col: ColumnOrName, numBits: int) -> Column: ...
 def spark_partition_id() -> Column: ...
 def expr(str: str) -> Column: ...
 def struct(*cols: ColumnOrName) -> Column: ...
 def greatest(*cols: ColumnOrName) -> Column: ...
 def least(*cols: Column) -> Column: ...
-def when(condition: Column, value) -> Column: ...
+def when(condition: Column, value: Any) -> Column: ...
 @overload
 def log(arg1: ColumnOrName) -> Column: ...
 @overload
@@ -174,7 +174,9 @@ def create_map(*cols: ColumnOrName) -> Column: ...
 def array(*cols: ColumnOrName) -> Column: ...
 def array_contains(col: ColumnOrName, value: Any) -> Column: ...
 def arrays_overlap(a1: ColumnOrName, a2: ColumnOrName) -> Column: ...
-def slice(x: ColumnOrName, start: Union[Column, int], length: Union[Column, int]) -> Column: ...
+def slice(
+    x: ColumnOrName, start: Union[Column, int], length: Union[Column, int]
+) -> Column: ...
 def array_join(
     col: ColumnOrName, delimiter: str, null_replacement: Optional[str] = ...
 ) -> Column: ...
diff --git a/python/pyspark/sql/session.pyi b/python/pyspark/sql/session.pyi
index 17ba8894c1731..6cd2d3bed2b2f 100644
--- a/python/pyspark/sql/session.pyi
+++ b/python/pyspark/sql/session.pyi
@@ -17,7 +17,8 @@
 # under the License.
 
 from typing import overload
-from typing import Any, Iterable, List, Optional, Tuple, TypeVar, Union
+from typing import Any, Iterable, List, Optional, Tuple, Type, TypeVar, Union
+from types import TracebackType
 
 from py4j.java_gateway import JavaObject  # type: ignore[import]
 
@@ -122,4 +123,9 @@ class SparkSession(SparkConversionMixin):
     def streams(self) -> StreamingQueryManager: ...
     def stop(self) -> None: ...
     def __enter__(self) -> SparkSession: ...
-    def __exit__(self, exc_type, exc_val, exc_tb) -> None: ...
+    def __exit__(
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc_val: Optional[BaseException],
+        exc_tb: Optional[TracebackType],
+    ) -> None: ...
diff --git a/python/pyspark/sql/types.pyi b/python/pyspark/sql/types.pyi
index 31765e94884d7..3adf823d99a82 100644
--- a/python/pyspark/sql/types.pyi
+++ b/python/pyspark/sql/types.pyi
@@ -17,7 +17,8 @@
 # under the License.
 
 from typing import overload
-from typing import Any, Callable, Dict, Iterator, List, Optional, Union, Tuple, TypeVar
+from typing import Any, Callable, Dict, Iterator, List, Optional, Union, Tuple, Type, TypeVar
+from py4j.java_gateway import JavaGateway, JavaObject
 import datetime
 
 T = TypeVar("T")
@@ -37,7 +38,7 @@ class DataType:
     def fromInternal(self, obj: Any) -> Any: ...
 
 class DataTypeSingleton(type):
-    def __call__(cls): ...
+    def __call__(cls: Type[T]) -> T: ...  # type: ignore
 
 class NullType(DataType, metaclass=DataTypeSingleton): ...
 class AtomicType(DataType): ...
@@ -85,8 +86,8 @@ class ShortType(IntegralType):
 class ArrayType(DataType):
     elementType: DataType
     containsNull: bool
-    def __init__(self, elementType=DataType, containsNull: bool = ...) -> None: ...
-    def simpleString(self): ...
+    def __init__(self, elementType: DataType, containsNull: bool = ...) -> None: ...
+    def simpleString(self) -> str: ...
     def jsonValue(self) -> Dict[str, Any]: ...
     @classmethod
     def fromJson(cls, json: Dict[str, Any]) -> ArrayType: ...
@@ -197,8 +198,8 @@ class Row(tuple):
 
 class DateConverter:
     def can_convert(self, obj: Any) -> bool: ...
-    def convert(self, obj, gateway_client) -> Any: ...
+    def convert(self, obj: datetime.date, gateway_client: JavaGateway) -> JavaObject: ...
 
 class DatetimeConverter:
-    def can_convert(self, obj) -> bool: ...
-    def convert(self, obj, gateway_client) -> Any: ...
+    def can_convert(self, obj: Any) -> bool: ...
+    def convert(self, obj: datetime.datetime, gateway_client: JavaGateway) -> JavaObject: ...
diff --git a/python/pyspark/sql/udf.pyi b/python/pyspark/sql/udf.pyi
index 87c3672780037..ea61397a67ba1 100644
--- a/python/pyspark/sql/udf.pyi
+++ b/python/pyspark/sql/udf.pyi
@@ -18,8 +18,9 @@
 
 from typing import Any, Callable, Optional
 
-from pyspark.sql._typing import ColumnOrName, DataTypeOrString
+from pyspark.sql._typing import ColumnOrName, DataTypeOrString, UserDefinedFunctionLike
 from pyspark.sql.column import Column
+from pyspark.sql.types import DataType
 import pyspark.sql.session
 
 class UserDefinedFunction:
@@ -35,7 +36,7 @@ class UserDefinedFunction:
         deterministic: bool = ...,
     ) -> None: ...
     @property
-    def returnType(self): ...
+    def returnType(self) -> DataType: ...
     def __call__(self, *cols: ColumnOrName) -> Column: ...
     def asNondeterministic(self) -> UserDefinedFunction: ...
 
@@ -47,7 +48,7 @@ class UDFRegistration:
         name: str,
         f: Callable[..., Any],
         returnType: Optional[DataTypeOrString] = ...,
-    ): ...
+    ) -> UserDefinedFunctionLike: ...
     def registerJavaFunction(
         self,
         name: str,
diff --git a/python/pyspark/streaming/context.pyi b/python/pyspark/streaming/context.pyi
index 026163fc9a1db..117a6742e6b6b 100644
--- a/python/pyspark/streaming/context.pyi
+++ b/python/pyspark/streaming/context.pyi
@@ -16,7 +16,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from typing import Any, Callable, List, Optional, TypeVar, Union
+from typing import Any, Callable, List, Optional, TypeVar
 
 from py4j.java_gateway import JavaObject  # type: ignore[import]
 
diff --git a/python/pyspark/streaming/dstream.pyi b/python/pyspark/streaming/dstream.pyi
index 7b76ce4c65233..1521d838fc2b5 100644
--- a/python/pyspark/streaming/dstream.pyi
+++ b/python/pyspark/streaming/dstream.pyi
@@ -30,9 +30,12 @@ from typing import (
 )
 import datetime
 from pyspark.rdd import RDD
+import pyspark.serializers
 from pyspark.storagelevel import StorageLevel
 import pyspark.streaming.context
 
+from py4j.java_gateway import JavaObject
+
 S = TypeVar("S")
 T = TypeVar("T")
 U = TypeVar("U")
@@ -42,7 +45,12 @@ V = TypeVar("V")
 class DStream(Generic[T]):
     is_cached: bool
     is_checkpointed: bool
-    def __init__(self, jdstream, ssc, jrdd_deserializer) -> None: ...
+    def __init__(
+        self,
+        jdstream: JavaObject,
+        ssc: pyspark.streaming.context.StreamingContext,
+        jrdd_deserializer: pyspark.serializers.Serializer,
+    ) -> None: ...
     def context(self) -> pyspark.streaming.context.StreamingContext: ...
     def count(self) -> DStream[int]: ...
     def filter(self, f: Callable[[T], bool]) -> DStream[T]: ...
diff --git a/python/pyspark/streaming/kinesis.pyi b/python/pyspark/streaming/kinesis.pyi
index af7cd6f6ec13c..399c37f869620 100644
--- a/python/pyspark/streaming/kinesis.pyi
+++ b/python/pyspark/streaming/kinesis.pyi
@@ -16,7 +16,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from typing import Any, Callable, Optional, TypeVar
+from typing import Callable, Optional, TypeVar
 from pyspark.storagelevel import StorageLevel
 from pyspark.streaming.context import StreamingContext
 from pyspark.streaming.dstream import DStream

From 01321bc0fec54a1610d0873c17fa7354137d3a6b Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Wed, 25 Nov 2020 10:24:41 +0900
Subject: [PATCH 0563/1009] [SPARK-33252][PYTHON][DOCS] Migration to NumPy
 documentation style in MLlib (pyspark.mllib.*)

### What changes were proposed in this pull request?

This PR proposes migration of `pyspark.mllib` to NumPy documentation style.

### Why are the changes needed?

To improve documentation style.

Before:

![old](https://user-images.githubusercontent.com/1554276/100097941-90234980-2e5d-11eb-8b4d-c25d98d85191.png)

After:

![new](https://user-images.githubusercontent.com/1554276/100097966-987b8480-2e5d-11eb-9e02-07b18c327624.png)

### Does this PR introduce _any_ user-facing change?

Yes, this changes both rendered HTML docs and console representation (SPARK-33243).

### How was this patch tested?

`dev/lint-python` and manual inspection.

Closes #30413 from zero323/SPARK-33252.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../docs/source/reference/pyspark.mllib.rst   |   3 +-
 python/pyspark/mllib/classification.py        | 353 ++++++-----
 python/pyspark/mllib/clustering.py            | 576 +++++++++++-------
 python/pyspark/mllib/evaluation.py            |  60 +-
 python/pyspark/mllib/feature.py               | 288 ++++++---
 python/pyspark/mllib/feature.pyi              |   4 +-
 python/pyspark/mllib/fpm.py                   |  86 +--
 python/pyspark/mllib/fpm.pyi                  |   4 +-
 python/pyspark/mllib/linalg/__init__.py       | 132 +++-
 python/pyspark/mllib/linalg/distributed.py    | 495 ++++++++++-----
 python/pyspark/mllib/linalg/distributed.pyi   |   6 +-
 python/pyspark/mllib/random.py                | 378 ++++++++----
 python/pyspark/mllib/recommendation.py        | 116 ++--
 python/pyspark/mllib/regression.py            | 392 +++++++-----
 python/pyspark/mllib/stat/KernelDensity.py    |   2 +
 python/pyspark/mllib/stat/__init__.py         |   5 +-
 python/pyspark/mllib/stat/_statistics.py      | 115 ++--
 python/pyspark/mllib/stat/distribution.py     |   2 +
 python/pyspark/mllib/tree.py                  | 469 +++++++-------
 python/pyspark/mllib/util.py                  | 256 +++++---
 20 files changed, 2375 insertions(+), 1367 deletions(-)

diff --git a/python/docs/source/reference/pyspark.mllib.rst b/python/docs/source/reference/pyspark.mllib.rst
index acc834c065ac3..df5ea017d0fbf 100644
--- a/python/docs/source/reference/pyspark.mllib.rst
+++ b/python/docs/source/reference/pyspark.mllib.rst
@@ -216,6 +216,8 @@ Statistics
     ChiSqTestResult
     MultivariateGaussian
     KernelDensity
+    ChiSqTestResult
+    KolmogorovSmirnovTestResult
 
 
 Tree
@@ -250,4 +252,3 @@ Utilities
     Loader
     MLUtils
     Saveable
-
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index bbca216cce493..bd43e91afd280 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -88,20 +88,26 @@ class LogisticRegressionModel(LinearClassificationModel):
     Classification model trained using Multinomial/Binary Logistic
     Regression.
 
-    :param weights:
-      Weights computed for every feature.
-    :param intercept:
-      Intercept computed for this model. (Only used in Binary Logistic
-      Regression. In Multinomial Logistic Regression, the intercepts will
-      not bea single value, so the intercepts will be part of the
-      weights.)
-    :param numFeatures:
-      The dimension of the features.
-    :param numClasses:
-      The number of possible outcomes for k classes classification problem
-      in Multinomial Logistic Regression. By default, it is binary
-      logistic regression so numClasses will be set to 2.
+    .. versionadded:: 0.9.0
 
+    Parameters
+    ----------
+    weights : :py:class:`pyspark.mllib.linalg.Vector`
+        Weights computed for every feature.
+    intercept : float
+        Intercept computed for this model. (Only used in Binary Logistic
+        Regression. In Multinomial Logistic Regression, the intercepts will
+        not be a single value, so the intercepts will be part of the
+        weights.)
+    numFeatures : int
+        The dimension of the features.
+    numClasses : int
+        The number of possible outcomes for k classes classification problem
+        in Multinomial Logistic Regression. By default, it is binary
+        logistic regression so numClasses will be set to 2.
+
+    Examples
+    --------
     >>> from pyspark.mllib.linalg import SparseVector
     >>> data = [
     ...     LabeledPoint(0.0, [0.0, 1.0]),
@@ -159,8 +165,6 @@ class LogisticRegressionModel(LinearClassificationModel):
     1
     >>> mcm.predict([0.0, 0.0, 0.3])
     2
-
-    .. versionadded:: 0.9.0
     """
     def __init__(self, weights, intercept, numFeatures, numClasses):
         super(LogisticRegressionModel, self).__init__(weights, intercept)
@@ -263,54 +267,60 @@ def __repr__(self):
 
 class LogisticRegressionWithSGD(object):
     """
+    Train a classification model for Binary Logistic Regression using Stochastic Gradient Descent.
+
     .. versionadded:: 0.9.0
-    .. note:: Deprecated in 2.0.0. Use ml.classification.LogisticRegression or
-            LogisticRegressionWithLBFGS.
+    .. deprecated:: 2.0.0
+        Use ml.classification.LogisticRegression or LogisticRegressionWithLBFGS.
     """
     @classmethod
-    @since('0.9.0')
     def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
               initialWeights=None, regParam=0.01, regType="l2", intercept=False,
               validateData=True, convergenceTol=0.001):
         """
         Train a logistic regression model on the given data.
 
-        :param data:
-          The training data, an RDD of LabeledPoint.
-        :param iterations:
-          The number of iterations.
-          (default: 100)
-        :param step:
-          The step parameter used in SGD.
-          (default: 1.0)
-        :param miniBatchFraction:
-          Fraction of data to be used for each SGD iteration.
-          (default: 1.0)
-        :param initialWeights:
-          The initial weights.
-          (default: None)
-        :param regParam:
-          The regularizer parameter.
-          (default: 0.01)
-        :param regType:
-          The type of regularizer used for training our model.
-          Supported values:
+        .. versionadded:: 0.9.0
+
+        Parameters
+        ----------
+        data : :py:class:`pyspark.RDD`
+            The training data, an RDD of :py:class:`pyspark.mllib.regression.LabeledPoint`.
+        iterations : int, optional
+            The number of iterations.
+            (default: 100)
+        step : float, optional
+            The step parameter used in SGD.
+            (default: 1.0)
+        miniBatchFraction : float, optional
+            Fraction of data to be used for each SGD iteration.
+            (default: 1.0)
+        initialWeights : :py:class:`pyspark.mllib.linalg.Vector` or convertible, optional
+            The initial weights.
+            (default: None)
+        regParam : float, optional
+            The regularizer parameter.
+            (default: 0.01)
+        regType : str, optional
+            The type of regularizer used for training our model.
+            Supported values:
 
             - "l1" for using L1 regularization
             - "l2" for using L2 regularization (default)
             - None for no regularization
-        :param intercept:
-          Boolean parameter which indicates the use or not of the
-          augmented representation for training data (i.e., whether bias
-          features are activated or not).
-          (default: False)
-        :param validateData:
-          Boolean parameter which indicates if the algorithm should
-          validate data before training.
-          (default: True)
-        :param convergenceTol:
-          A condition which decides iteration termination.
-          (default: 0.001)
+
+        intercept : bool, optional
+            Boolean parameter which indicates the use or not of the
+            augmented representation for training data (i.e., whether bias
+            features are activated or not).
+            (default: False)
+        validateData : bool, optional
+            Boolean parameter which indicates if the algorithm should
+            validate data before training.
+            (default: True)
+        convergenceTol : float, optional
+            A condition which decides iteration termination.
+            (default: 0.001)
         """
         warnings.warn(
             "Deprecated in 2.0.0. Use ml.classification.LogisticRegression or "
@@ -326,55 +336,65 @@ def train(rdd, i):
 
 class LogisticRegressionWithLBFGS(object):
     """
+    Train a classification model for Multinomial/Binary Logistic Regression
+    using Limited-memory BFGS.
+
+    Standard feature scaling and L2 regularization are used by default.
     .. versionadded:: 1.2.0
     """
     @classmethod
-    @since('1.2.0')
     def train(cls, data, iterations=100, initialWeights=None, regParam=0.0, regType="l2",
               intercept=False, corrections=10, tolerance=1e-6, validateData=True, numClasses=2):
         """
         Train a logistic regression model on the given data.
 
-        :param data:
-          The training data, an RDD of LabeledPoint.
-        :param iterations:
-          The number of iterations.
-          (default: 100)
-        :param initialWeights:
-          The initial weights.
-          (default: None)
-        :param regParam:
-          The regularizer parameter.
-          (default: 0.0)
-        :param regType:
-          The type of regularizer used for training our model.
-          Supported values:
+        .. versionadded:: 1.2.0
+
+        Parameters
+        ----------
+        data : :py:class:`pyspark.RDD`
+            The training data, an RDD of :py:class:`pyspark.mllib.regression.LabeledPoint`.
+        iterations : int, optional
+            The number of iterations.
+            (default: 100)
+        initialWeights : :py:class:`pyspark.mllib.linalg.Vector` or convertible, optional
+            The initial weights.
+            (default: None)
+        regParam : float, optional
+            The regularizer parameter.
+            (default: 0.01)
+        regType : str, optional
+            The type of regularizer used for training our model.
+            Supported values:
 
             - "l1" for using L1 regularization
             - "l2" for using L2 regularization (default)
             - None for no regularization
-        :param intercept:
-          Boolean parameter which indicates the use or not of the
-          augmented representation for training data (i.e., whether bias
-          features are activated or not).
-          (default: False)
-        :param corrections:
-          The number of corrections used in the LBFGS update.
-          If a known updater is used for binary classification,
-          it calls the ml implementation and this parameter will
-          have no effect. (default: 10)
-        :param tolerance:
-          The convergence tolerance of iterations for L-BFGS.
-          (default: 1e-6)
-        :param validateData:
-          Boolean parameter which indicates if the algorithm should
-          validate data before training.
-          (default: True)
-        :param numClasses:
-          The number of classes (i.e., outcomes) a label can take in
-          Multinomial Logistic Regression.
-          (default: 2)
 
+        intercept : bool, optional
+            Boolean parameter which indicates the use or not of the
+            augmented representation for training data (i.e., whether bias
+            features are activated or not).
+            (default: False)
+        corrections : int, optional
+            The number of corrections used in the LBFGS update.
+            If a known updater is used for binary classification,
+            it calls the ml implementation and this parameter will
+            have no effect. (default: 10)
+        tolerance : float, optional
+            The convergence tolerance of iterations for L-BFGS.
+            (default: 1e-6)
+        validateData : bool, optional
+            Boolean parameter which indicates if the algorithm should
+            validate data before training.
+            (default: True)
+        numClasses : int, optional
+            The number of classes (i.e., outcomes) a label can take in
+            Multinomial Logistic Regression.
+            (default: 2)
+
+        Examples
+        --------
         >>> data = [
         ...     LabeledPoint(0.0, [0.0, 1.0]),
         ...     LabeledPoint(1.0, [1.0, 0.0]),
@@ -406,11 +426,17 @@ class SVMModel(LinearClassificationModel):
     """
     Model for Support Vector Machines (SVMs).
 
-    :param weights:
-      Weights computed for every feature.
-    :param intercept:
-      Intercept computed for this model.
+    .. versionadded:: 0.9.0
+
+    Parameters
+    ----------
+    weights : :py:class:`pyspark.mllib.linalg.Vector`
+        Weights computed for every feature.
+    intercept : float
+        Intercept computed for this model.
 
+    Examples
+    --------
     >>> from pyspark.mllib.linalg import SparseVector
     >>> data = [
     ...     LabeledPoint(0.0, [0.0]),
@@ -451,8 +477,6 @@ class SVMModel(LinearClassificationModel):
     ...    rmtree(path)
     ... except:
     ...    pass
-
-    .. versionadded:: 0.9.0
     """
     def __init__(self, weights, intercept):
         super(SVMModel, self).__init__(weights, intercept)
@@ -501,53 +525,59 @@ def load(cls, sc, path):
 
 class SVMWithSGD(object):
     """
+    Train a Support Vector Machine (SVM) using Stochastic Gradient Descent.
+
     .. versionadded:: 0.9.0
     """
 
     @classmethod
-    @since('0.9.0')
     def train(cls, data, iterations=100, step=1.0, regParam=0.01,
               miniBatchFraction=1.0, initialWeights=None, regType="l2",
               intercept=False, validateData=True, convergenceTol=0.001):
         """
         Train a support vector machine on the given data.
 
-        :param data:
-          The training data, an RDD of LabeledPoint.
-        :param iterations:
-          The number of iterations.
-          (default: 100)
-        :param step:
-          The step parameter used in SGD.
-          (default: 1.0)
-        :param regParam:
-          The regularizer parameter.
-          (default: 0.01)
-        :param miniBatchFraction:
-          Fraction of data to be used for each SGD iteration.
-          (default: 1.0)
-        :param initialWeights:
-          The initial weights.
-          (default: None)
-        :param regType:
-          The type of regularizer used for training our model.
-          Allowed values:
+        .. versionadded:: 0.9.0
+
+        Parameters
+        ----------
+        data : :py:class:`pyspark.RDD`
+            The training data, an RDD of :py:class:`pyspark.mllib.regression.LabeledPoint`.
+        iterations : int, optional
+            The number of iterations.
+            (default: 100)
+        step : float, optional
+            The step parameter used in SGD.
+            (default: 1.0)
+        regParam : float, optional
+            The regularizer parameter.
+            (default: 0.01)
+        miniBatchFraction : float, optional
+            Fraction of data to be used for each SGD iteration.
+            (default: 1.0)
+        initialWeights : :py:class:`pyspark.mllib.linalg.Vector` or convertible, optional
+            The initial weights.
+            (default: None)
+        regType : str, optional
+            The type of regularizer used for training our model.
+            Allowed values:
 
             - "l1" for using L1 regularization
             - "l2" for using L2 regularization (default)
             - None for no regularization
-        :param intercept:
-          Boolean parameter which indicates the use or not of the
-          augmented representation for training data (i.e. whether bias
-          features are activated or not).
-          (default: False)
-        :param validateData:
-          Boolean parameter which indicates if the algorithm should
-          validate data before training.
-          (default: True)
-        :param convergenceTol:
-          A condition which decides iteration termination.
-          (default: 0.001)
+
+        intercept : bool, optional
+            Boolean parameter which indicates the use or not of the
+            augmented representation for training data (i.e. whether bias
+            features are activated or not).
+            (default: False)
+        validateData : bool, optional
+            Boolean parameter which indicates if the algorithm should
+            validate data before training.
+            (default: True)
+        convergenceTol : float, optional
+            A condition which decides iteration termination.
+            (default: 0.001)
         """
         def train(rdd, i):
             return callMLlibFunc("trainSVMModelWithSGD", rdd, int(iterations), float(step),
@@ -563,14 +593,20 @@ class NaiveBayesModel(Saveable, Loader):
     """
     Model for Naive Bayes classifiers.
 
-    :param labels:
-      List of labels.
-    :param pi:
-      Log of class priors, whose dimension is C, number of labels.
-    :param theta:
-      Log of class conditional probabilities, whose dimension is C-by-D,
-      where D is number of features.
+    .. versionadded:: 0.9.0
 
+    Parameters
+    ----------
+    labels : :py:class:`numpy.ndarray`
+        List of labels.
+    pi : :py:class:`numpy.ndarray`
+        Log of class priors, whose dimension is C, number of labels.
+    theta : :py:class:`numpy.ndarray`
+        Log of class conditional probabilities, whose dimension is C-by-D,
+        where D is number of features.
+
+    Examples
+    --------
     >>> from pyspark.mllib.linalg import SparseVector
     >>> data = [
     ...     LabeledPoint(0.0, [0.0, 0.0]),
@@ -605,8 +641,6 @@ class NaiveBayesModel(Saveable, Loader):
     ...     rmtree(path)
     ... except OSError:
     ...     pass
-
-    .. versionadded:: 0.9.0
     """
     def __init__(self, labels, pi, theta):
         self.labels = labels
@@ -652,11 +686,12 @@ def load(cls, sc, path):
 
 class NaiveBayes(object):
     """
+    Train a Multinomial Naive Bayes model.
+
     .. versionadded:: 0.9.0
     """
 
     @classmethod
-    @since('0.9.0')
     def train(cls, data, lambda_=1.0):
         """
         Train a Naive Bayes model given an RDD of (label, features)
@@ -669,11 +704,15 @@ def train(cls, data, lambda_=1.0):
         it can also be used as `Bernoulli NB <http://tinyurl.com/p7c96j6>`_.
         The input feature values must be nonnegative.
 
-        :param data:
-          RDD of LabeledPoint.
-        :param lambda_:
-          The smoothing parameter.
-          (default: 1.0)
+        .. versionadded:: 0.9.0
+
+        Parameters
+        ----------
+        data : :py:class:`pyspark.RDD`
+            The training data, an RDD of :py:class:`pyspark.mllib.regression.LabeledPoint`.
+        lambda\\_ : float, optional
+            The smoothing parameter.
+            (default: 1.0)
         """
         first = data.first()
         if not isinstance(first, LabeledPoint):
@@ -694,23 +733,25 @@ class StreamingLogisticRegressionWithSGD(StreamingLinearAlgorithm):
     of features must be constant. An initial weight
     vector must be provided.
 
-    :param stepSize:
-      Step size for each iteration of gradient descent.
-      (default: 0.1)
-    :param numIterations:
-      Number of iterations run for each batch of data.
-      (default: 50)
-    :param miniBatchFraction:
-      Fraction of each batch of data to use for updates.
-      (default: 1.0)
-    :param regParam:
-      L2 Regularization parameter.
-      (default: 0.0)
-    :param convergenceTol:
-      Value used to determine when to terminate iterations.
-      (default: 0.001)
-
     .. versionadded:: 1.5.0
+
+    Parameters
+    ----------
+    stepSize : float, optional
+        Step size for each iteration of gradient descent.
+        (default: 0.1)
+    numIterations : int, optional
+        Number of iterations run for each batch of data.
+        (default: 50)
+    miniBatchFraction : float, optional
+        Fraction of each batch of data to use for updates.
+        (default: 1.0)
+    regParam : float, optional
+        L2 Regularization parameter.
+        (default: 0.0)
+    convergenceTol : float, optional
+        Value used to determine when to terminate iterations.
+        (default: 0.001)
     """
     def __init__(self, stepSize=0.1, numIterations=50, miniBatchFraction=1.0, regParam=0.0,
                  convergenceTol=0.001):
diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py
index b99a4150c396d..e1a009643c5f2 100644
--- a/python/pyspark/mllib/clustering.py
+++ b/python/pyspark/mllib/clustering.py
@@ -41,6 +41,10 @@ class BisectingKMeansModel(JavaModelWrapper):
     """
     A clustering model derived from the bisecting k-means method.
 
+    .. versionadded:: 2.0.0
+
+    Examples
+    --------
     >>> data = array([0.0,0.0, 1.0,1.0, 9.0,8.0, 8.0,9.0]).reshape(4, 2)
     >>> bskm = BisectingKMeans()
     >>> model = bskm.train(sc.parallelize(data, 2), k=4)
@@ -51,8 +55,6 @@ class BisectingKMeansModel(JavaModelWrapper):
     4
     >>> model.computeCost(p)
     0.0
-
-    .. versionadded:: 2.0.0
     """
 
     def __init__(self, java_model):
@@ -72,17 +74,25 @@ def k(self):
         """Get the number of clusters"""
         return self.call("k")
 
-    @since('2.0.0')
     def predict(self, x):
         """
         Find the cluster that each of the points belongs to in this
         model.
 
-        :param x:
-          A data point (or RDD of points) to determine cluster index.
-        :return:
-          Predicted cluster index or an RDD of predicted cluster indices
-          if the input is an RDD.
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        x : :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD`
+            A data point (or RDD of points) to determine cluster index.
+            :py:class:`pyspark.mllib.linalg.Vector` can be replaced with equivalent
+            objects (list, tuple, numpy.ndarray).
+
+        Returns
+        -------
+        int or :py:class:`pyspark.RDD` of int
+            Predicted cluster index or an RDD of predicted cluster indices
+            if the input is an RDD.
         """
         if isinstance(x, RDD):
             vecs = x.map(_convert_to_vector)
@@ -91,15 +101,20 @@ def predict(self, x):
         x = _convert_to_vector(x)
         return self.call("predict", x)
 
-    @since('2.0.0')
     def computeCost(self, x):
         """
         Return the Bisecting K-means cost (sum of squared distances of
         points to their nearest center) for this model on the given
         data. If provided with an RDD of points returns the sum.
 
-        :param point:
-          A data point (or RDD of points) to compute the cost(s).
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        point : :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD`
+            A data point (or RDD of points) to compute the cost(s).
+            :py:class:`pyspark.mllib.linalg.Vector` can be replaced with equivalent
+            objects (list, tuple, numpy.ndarray).
         """
         if isinstance(x, RDD):
             vecs = x.map(_convert_to_vector)
@@ -122,37 +137,43 @@ class BisectingKMeans(object):
     clusters on the bottom level would result more than `k` leaf
     clusters, larger clusters get higher priority.
 
-    Based on
-    `Steinbach, Karypis, and Kumar, A comparison of document clustering
-    techniques, KDD Workshop on Text Mining, 2000
-    <http://glaros.dtc.umn.edu/gkhome/fetch/papers/docclusterKDDTMW00.pdf>`_.
-
     .. versionadded:: 2.0.0
+
+    Notes
+    -----
+    See the original paper [1]_
+
+    .. [1] Steinbach, M. et al. “A Comparison of Document Clustering Techniques.” (2000).
+        KDD Workshop on Text Mining, 2000
+        http://glaros.dtc.umn.edu/gkhome/fetch/papers/docclusterKDDTMW00.pdf
     """
 
     @classmethod
-    @since('2.0.0')
     def train(self, rdd, k=4, maxIterations=20, minDivisibleClusterSize=1.0, seed=-1888008604):
         """
         Runs the bisecting k-means algorithm return the model.
 
-        :param rdd:
-          Training points as an `RDD` of `Vector` or convertible
-          sequence types.
-        :param k:
-          The desired number of leaf clusters. The actual number could
-          be smaller if there are no divisible leaf clusters.
-          (default: 4)
-        :param maxIterations:
-          Maximum number of iterations allowed to split clusters.
-          (default: 20)
-        :param minDivisibleClusterSize:
-          Minimum number of points (if >= 1.0) or the minimum proportion
-          of points (if < 1.0) of a divisible cluster.
-          (default: 1)
-        :param seed:
-          Random seed value for cluster initialization.
-          (default: -1888008604 from classOf[BisectingKMeans].getName.##)
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        rdd : :py:class:`pyspark.RDD`
+            Training points as an `RDD` of `Vector` or convertible
+            sequence types.
+        k : int, optional
+            The desired number of leaf clusters. The actual number could
+            be smaller if there are no divisible leaf clusters.
+            (default: 4)
+        maxIterations : int, optional
+            Maximum number of iterations allowed to split clusters.
+            (default: 20)
+        minDivisibleClusterSize : float, optional
+            Minimum number of points (if >= 1.0) or the minimum proportion
+            of points (if < 1.0) of a divisible cluster.
+            (default: 1)
+        seed : int, optional
+            Random seed value for cluster initialization.
+            (default: -1888008604 from classOf[BisectingKMeans].getName.##)
         """
         java_model = callMLlibFunc(
             "trainBisectingKMeans", rdd.map(_convert_to_vector),
@@ -165,6 +186,10 @@ class KMeansModel(Saveable, Loader):
 
     """A clustering model derived from the k-means method.
 
+    .. versionadded:: 0.9.0
+
+    Examples
+    --------
     >>> data = array([0.0,0.0, 1.0,1.0, 9.0,8.0, 8.0,9.0]).reshape(4, 2)
     >>> model = KMeans.train(
     ...     sc.parallelize(data), 2, maxIterations=10, initializationMode="random",
@@ -213,8 +238,6 @@ class KMeansModel(Saveable, Loader):
     ...     initialModel = KMeansModel([(-1000.0,-1000.0),(5.0,5.0),(1000.0,1000.0)]))
     >>> model.clusterCenters
     [array([-1000., -1000.]), array([ 5.,  5.]), array([ 1000.,  1000.])]
-
-    .. versionadded:: 0.9.0
     """
 
     def __init__(self, centers):
@@ -232,17 +255,25 @@ def k(self):
         """Total number of clusters."""
         return len(self.centers)
 
-    @since('0.9.0')
     def predict(self, x):
         """
         Find the cluster that each of the points belongs to in this
         model.
 
-        :param x:
-          A data point (or RDD of points) to determine cluster index.
-        :return:
-          Predicted cluster index or an RDD of predicted cluster indices
-          if the input is an RDD.
+        .. versionadded:: 0.9.0
+
+        Parameters
+        ----------
+        x : :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD`
+            A data point (or RDD of points) to determine cluster index.
+            :py:class:`pyspark.mllib.linalg.Vector` can be replaced with equivalent
+            objects (list, tuple, numpy.ndarray).
+
+        Returns
+        -------
+        int or :py:class:`pyspark.RDD` of int
+            Predicted cluster index or an RDD of predicted cluster indices
+            if the input is an RDD.
         """
         best = 0
         best_distance = float("inf")
@@ -257,15 +288,18 @@ def predict(self, x):
                 best_distance = distance
         return best
 
-    @since('1.4.0')
     def computeCost(self, rdd):
         """
         Return the K-means cost (sum of squared distances of points to
         their nearest center) for this model on the given
         data.
 
-        :param rdd:
-          The RDD of points to compute the cost on.
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        rdd : ::py:class:`pyspark.RDD`
+            The RDD of points to compute the cost on.
         """
         cost = callMLlibFunc("computeCostKmeansModel", rdd.map(_convert_to_vector),
                              [_convert_to_vector(c) for c in self.centers])
@@ -292,46 +326,51 @@ def load(cls, sc, path):
 
 class KMeans(object):
     """
+    K-means clustering.
+
     .. versionadded:: 0.9.0
     """
 
     @classmethod
-    @since('0.9.0')
     def train(cls, rdd, k, maxIterations=100, initializationMode="k-means||",
               seed=None, initializationSteps=2, epsilon=1e-4, initialModel=None):
         """
         Train a k-means clustering model.
 
-        :param rdd:
-          Training points as an `RDD` of `Vector` or convertible
-          sequence types.
-        :param k:
-          Number of clusters to create.
-        :param maxIterations:
-          Maximum number of iterations allowed.
-          (default: 100)
-        :param initializationMode:
-          The initialization algorithm. This can be either "random" or
-          "k-means||".
-          (default: "k-means||")
-        :param seed:
-          Random seed value for cluster initialization. Set as None to
-          generate seed based on system time.
-          (default: None)
-        :param initializationSteps:
-          Number of steps for the k-means|| initialization mode.
-          This is an advanced setting -- the default of 2 is almost
-          always enough.
-          (default: 2)
-        :param epsilon:
-          Distance threshold within which a center will be considered to
-          have converged. If all centers move less than this Euclidean
-          distance, iterations are stopped.
-          (default: 1e-4)
-        :param initialModel:
-          Initial cluster centers can be provided as a KMeansModel object
-          rather than using the random or k-means|| initializationModel.
-          (default: None)
+        .. versionadded:: 0.9.0
+
+        Parameters
+        ----------
+        rdd : ::py:class:`pyspark.RDD`
+            Training points as an `RDD` of :py:class:`pyspark.mllib.linalg.Vector`
+            or convertible sequence types.
+        k : int
+            Number of clusters to create.
+        maxIterations : int, optional
+            Maximum number of iterations allowed.
+            (default: 100)
+        initializationMode : str, optional
+            The initialization algorithm. This can be either "random" or
+            "k-means||".
+            (default: "k-means||")
+        seed : int, optional
+            Random seed value for cluster initialization. Set as None to
+            generate seed based on system time.
+            (default: None)
+        initializationSteps :
+            Number of steps for the k-means|| initialization mode.
+            This is an advanced setting -- the default of 2 is almost
+            always enough.
+            (default: 2)
+        epsilon : float, optional
+            Distance threshold within which a center will be considered to
+            have converged. If all centers move less than this Euclidean
+            distance, iterations are stopped.
+            (default: 1e-4)
+        initialModel : :py:class:`KMeansModel`, optional
+            Initial cluster centers can be provided as a KMeansModel object
+            rather than using the random or k-means|| initializationModel.
+            (default: None)
         """
         clusterInitialModel = []
         if initialModel is not None:
@@ -352,6 +391,10 @@ class GaussianMixtureModel(JavaModelWrapper, JavaSaveable, JavaLoader):
     """
     A clustering model derived from the Gaussian Mixture Model method.
 
+    .. versionadded:: 1.3.0
+
+    Examples
+    --------
     >>> from pyspark.mllib.linalg import Vectors, DenseMatrix
     >>> from numpy.testing import assert_equal
     >>> from shutil import rmtree
@@ -410,8 +453,6 @@ class GaussianMixtureModel(JavaModelWrapper, JavaSaveable, JavaLoader):
     True
     >>> labels[2]==labels[3]==labels[4]
     True
-
-    .. versionadded:: 1.3.0
     """
 
     @property
@@ -440,17 +481,23 @@ def k(self):
         """Number of gaussians in mixture."""
         return len(self.weights)
 
-    @since('1.3.0')
     def predict(self, x):
         """
         Find the cluster to which the point 'x' or each point in RDD 'x'
         has maximum membership in this model.
 
-        :param x:
-          A feature vector or an RDD of vectors representing data points.
-        :return:
-          Predicted cluster label or an RDD of predicted cluster labels
-          if the input is an RDD.
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        x : :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD`
+            A feature vector or an RDD of vectors representing data points.
+
+        Returns
+        -------
+        numpy.float64 or :py:class:`pyspark.RDD` of int
+            Predicted cluster label or an RDD of predicted cluster labels
+            if the input is an RDD.
         """
         if isinstance(x, RDD):
             cluster_labels = self.predictSoft(x).map(lambda z: z.index(max(z)))
@@ -459,16 +506,22 @@ def predict(self, x):
             z = self.predictSoft(x)
             return z.argmax()
 
-    @since('1.3.0')
     def predictSoft(self, x):
         """
         Find the membership of point 'x' or each point in RDD 'x' to all mixture components.
 
-        :param x:
-          A feature vector or an RDD of vectors representing data points.
-        :return:
-          The membership value to all mixture components for vector 'x'
-          or each vector in RDD 'x'.
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        x : :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD`
+            A feature vector or an RDD of vectors representing data points.
+
+        Returns
+        -------
+        numpy.ndarray or :py:class:`pyspark.RDD`
+            The membership value to all mixture components for vector 'x'
+            or each vector in RDD 'x'.
         """
         if isinstance(x, RDD):
             means, sigmas = zip(*[(g.mu, g.sigma) for g in self.gaussians])
@@ -479,14 +532,16 @@ def predictSoft(self, x):
             return self.call("predictSoft", _convert_to_vector(x)).toArray()
 
     @classmethod
-    @since('1.5.0')
     def load(cls, sc, path):
         """Load the GaussianMixtureModel from disk.
 
-        :param sc:
-          SparkContext.
-        :param path:
-          Path to where the model is stored.
+        .. versionadded:: 1.5.0
+
+        Parameters
+        ----------
+        sc : :py:class:`SparkContext`
+        path : str
+            Path to where the model is stored.
         """
         model = cls._load_java(sc, path)
         wrapper = sc._jvm.org.apache.spark.mllib.api.python.GaussianMixtureModelWrapper(model)
@@ -499,32 +554,36 @@ class GaussianMixture(object):
 
     .. versionadded:: 1.3.0
     """
+
     @classmethod
-    @since('1.3.0')
     def train(cls, rdd, k, convergenceTol=1e-3, maxIterations=100, seed=None, initialModel=None):
         """
         Train a Gaussian Mixture clustering model.
 
-        :param rdd:
-          Training points as an `RDD` of `Vector` or convertible
-          sequence types.
-        :param k:
-          Number of independent Gaussians in the mixture model.
-        :param convergenceTol:
-          Maximum change in log-likelihood at which convergence is
-          considered to have occurred.
-          (default: 1e-3)
-        :param maxIterations:
-          Maximum number of iterations allowed.
-          (default: 100)
-        :param seed:
-          Random seed for initial Gaussian distribution. Set as None to
-          generate seed based on system time.
-          (default: None)
-        :param initialModel:
-          Initial GMM starting point, bypassing the random
-          initialization.
-          (default: None)
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        rdd : ::py:class:`pyspark.RDD`
+            Training points as an `RDD` of :py:class:`pyspark.mllib.linalg.Vector`
+            or convertible sequence types.
+        k : int
+            Number of independent Gaussians in the mixture model.
+        convergenceTol : float, optional
+            Maximum change in log-likelihood at which convergence is
+            considered to have occurred.
+            (default: 1e-3)
+        maxIterations : int, optional
+            Maximum number of iterations allowed.
+            (default: 100)
+        seed : int, optional
+            Random seed for initial Gaussian distribution. Set as None to
+            generate seed based on system time.
+            (default: None)
+        initialModel : GaussianMixtureModel, optional
+            Initial GMM starting point, bypassing the random
+            initialization.
+            (default: None)
         """
         initialModelWeights = None
         initialModelMu = None
@@ -545,8 +604,12 @@ def train(cls, rdd, k, convergenceTol=1e-3, maxIterations=100, seed=None, initia
 class PowerIterationClusteringModel(JavaModelWrapper, JavaSaveable, JavaLoader):
 
     """
-    Model produced by [[PowerIterationClustering]].
+    Model produced by :py:class:`PowerIterationClustering`.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> import math
     >>> def genCircle(r, n):
     ...     points = []
@@ -589,8 +652,6 @@ class PowerIterationClusteringModel(JavaModelWrapper, JavaSaveable, JavaLoader):
     ...     rmtree(path)
     ... except OSError:
     ...     pass
-
-    .. versionadded:: 1.5.0
     """
 
     @property
@@ -623,37 +684,48 @@ def load(cls, sc, path):
 
 class PowerIterationClustering(object):
     """
-    Power Iteration Clustering (PIC), a scalable graph clustering algorithm
-    developed by [[http://www.cs.cmu.edu/~frank/papers/icml2010-pic-final.pdf Lin and Cohen]].
-    From the abstract: PIC finds a very low-dimensional embedding of a
-    dataset using truncated power iteration on a normalized pair-wise
-    similarity matrix of the data.
+    Power Iteration Clustering (PIC), a scalable graph clustering algorithm.
+
+
+    Developed by Lin and Cohen [1]_. From the abstract:
+
+        "PIC finds a very low-dimensional embedding of a
+        dataset using truncated power iteration on a normalized pair-wise
+        similarity matrix of the data."
 
     .. versionadded:: 1.5.0
+
+    .. [1] Lin, Frank & Cohen, William. (2010). Power Iteration Clustering.
+        http://www.cs.cmu.edu/~frank/papers/icml2010-pic-final.pdf
     """
 
     @classmethod
-    @since('1.5.0')
     def train(cls, rdd, k, maxIterations=100, initMode="random"):
         r"""
-        :param rdd:
-          An RDD of (i, j, s\ :sub:`ij`\) tuples representing the
-          affinity matrix, which is the matrix A in the PIC paper.  The
-          similarity s\ :sub:`ij`\ must be nonnegative.  This is a symmetric
-          matrix and hence s\ :sub:`ij`\ = s\ :sub:`ji`\  For any (i, j) with
-          nonzero similarity, there should be either (i, j, s\ :sub:`ij`\) or
-          (j, i, s\ :sub:`ji`\) in the input.  Tuples with i = j are ignored,
-          because it is assumed s\ :sub:`ij`\ = 0.0.
-        :param k:
-          Number of clusters.
-        :param maxIterations:
-          Maximum number of iterations of the PIC algorithm.
-          (default: 100)
-        :param initMode:
-          Initialization mode. This can be either "random" to use
-          a random vector as vertex properties, or "degree" to use
-          normalized sum similarities.
-          (default: "random")
+        Train PowerIterationClusteringModel
+
+        .. versionadded:: 1.5.0
+
+        Parameters
+        ----------
+        rdd : :py:class:`pyspark.RDD`
+            An RDD of (i, j, s\ :sub:`ij`\) tuples representing the
+            affinity matrix, which is the matrix A in the PIC paper.  The
+            similarity s\ :sub:`ij`\ must be nonnegative.  This is a symmetric
+            matrix and hence s\ :sub:`ij`\ = s\ :sub:`ji`\  For any (i, j) with
+            nonzero similarity, there should be either (i, j, s\ :sub:`ij`\) or
+            (j, i, s\ :sub:`ji`\) in the input.  Tuples with i = j are ignored,
+            because it is assumed s\ :sub:`ij`\ = 0.0.
+        k : int
+            Number of clusters.
+        maxIterations : int, optional
+            Maximum number of iterations of the PIC algorithm.
+            (default: 100)
+        initMode : str, optional
+            Initialization mode. This can be either "random" to use
+            a random vector as vertex properties, or "degree" to use
+            normalized sum similarities.
+            (default: "random")
         """
         model = callMLlibFunc("trainPowerIterationClusteringModel",
                               rdd.map(_convert_to_vector), int(k), int(maxIterations), initMode)
@@ -673,29 +745,37 @@ class StreamingKMeansModel(KMeansModel):
 
     The update formula for each centroid is given by
 
-    * c_t+1 = ((c_t * n_t * a) + (x_t * m_t)) / (n_t + m_t)
-    * n_t+1 = n_t * a + m_t
+    - c_t+1 = ((c_t * n_t * a) + (x_t * m_t)) / (n_t + m_t)
+    - n_t+1 = n_t * a + m_t
 
     where
 
-    * c_t: Centroid at the n_th iteration.
-    * n_t: Number of samples (or) weights associated with the centroid
-           at the n_th iteration.
-    * x_t: Centroid of the new data closest to c_t.
-    * m_t: Number of samples (or) weights of the new data closest to c_t
-    * c_t+1: New centroid.
-    * n_t+1: New number of weights.
-    * a: Decay Factor, which gives the forgetfulness.
+    - c_t: Centroid at the n_th iteration.
+    - n_t: Number of samples (or) weights associated with the centroid
+      at the n_th iteration.
+    - x_t: Centroid of the new data closest to c_t.
+    - m_t: Number of samples (or) weights of the new data closest to c_t
+    - c_t+1: New centroid.
+    - n_t+1: New number of weights.
+    - a: Decay Factor, which gives the forgetfulness.
 
-    .. note:: If a is set to 1, it is the weighted mean of the previous
-        and new data. If it set to zero, the old centroids are completely
-        forgotten.
-
-    :param clusterCenters:
-      Initial cluster centers.
-    :param clusterWeights:
-      List of weights assigned to each cluster.
+    .. versionadded:: 1.5.0
 
+    Parameters
+    ----------
+    clusterCenters : list of :py:class:`pyspark.mllib.linalg.Vector` or covertible
+        Initial cluster centers.
+    clusterWeights : :py:class:`pyspark.mllib.linalg.Vector` or covertible
+        List of weights assigned to each cluster.
+
+    Notes
+    -----
+    If a is set to 1, it is the weighted mean of the previous
+    and new data. If it set to zero, the old centroids are completely
+    forgotten.
+
+    Examples
+    --------
     >>> initCenters = [[0.0, 0.0], [1.0, 1.0]]
     >>> initWeights = [1.0, 1.0]
     >>> stkm = StreamingKMeansModel(initCenters, initWeights)
@@ -723,8 +803,6 @@ class StreamingKMeansModel(KMeansModel):
     0
     >>> stkm.predict([1.5, 1.5])
     1
-
-    .. versionadded:: 1.5.0
     """
     def __init__(self, clusterCenters, clusterWeights):
         super(StreamingKMeansModel, self).__init__(centers=clusterCenters)
@@ -740,14 +818,18 @@ def clusterWeights(self):
     def update(self, data, decayFactor, timeUnit):
         """Update the centroids, according to data
 
-        :param data:
-          RDD with new data for the model update.
-        :param decayFactor:
-          Forgetfulness of the previous centroids.
-        :param timeUnit:
-          Can be "batches" or "points". If points, then the decay factor
-          is raised to the power of number of new points and if batches,
-          then decay factor will be used as is.
+        .. versionadded:: 1.5.0
+
+        Parameters
+        ----------
+        data : :py:class:`pyspark.RDD`
+            RDD with new data for the model update.
+        decayFactor : float
+            Forgetfulness of the previous centroids.
+        timeUnit :  str
+            Can be "batches" or "points". If points, then the decay factor
+            is raised to the power of number of new points and if batches,
+            then decay factor will be used as is.
         """
         if not isinstance(data, RDD):
             raise TypeError("Data should be of an RDD, got %s." % type(data))
@@ -772,19 +854,21 @@ class StreamingKMeans(object):
     More details on how the centroids are updated are provided under the
     docs of StreamingKMeansModel.
 
-    :param k:
-      Number of clusters.
-      (default: 2)
-    :param decayFactor:
-      Forgetfulness of the previous centroids.
-      (default: 1.0)
-    :param timeUnit:
-      Can be "batches" or "points". If points, then the decay factor is
-      raised to the power of number of new points and if batches, then
-      decay factor will be used as is.
-      (default: "batches")
-
     .. versionadded:: 1.5.0
+
+    Parameters
+    ----------
+    k : int, optional
+        Number of clusters.
+        (default: 2)
+    decayFactor : float, optional
+        Forgetfulness of the previous centroids.
+        (default: 1.0)
+    timeUnit : str, optional
+        Can be "batches" or "points". If points, then the decay factor is
+        raised to the power of number of new points and if batches, then
+        decay factor will be used as is.
+        (default: "batches")
     """
     def __init__(self, k=2, decayFactor=1.0, timeUnit="batches"):
         self._k = k
@@ -887,13 +971,23 @@ class LDAModel(JavaModelWrapper, JavaSaveable, Loader):
 
     Latent Dirichlet Allocation (LDA), a topic model designed for text documents.
     Terminology
+
     - "word" = "term": an element of the vocabulary
     - "token": instance of a term appearing in a document
     - "topic": multinomial distribution over words representing some concept
-    References:
-    - Original LDA paper (journal version):
-    Blei, Ng, and Jordan.  "Latent Dirichlet Allocation."  JMLR, 2003.
 
+    .. versionadded:: 1.5.0
+
+    Notes
+    -----
+    See the original LDA paper (journal version) [1]_
+
+    .. [1] Blei, D. et al. "Latent Dirichlet Allocation."
+        J. Mach. Learn. Res. 3 (2003): 993-1022.
+        https://www.jmlr.org/papers/v3/blei03a
+
+    Examples
+    --------
     >>> from pyspark.mllib.linalg import Vectors
     >>> from numpy.testing import assert_almost_equal, assert_equal
     >>> data = [
@@ -925,8 +1019,6 @@ class LDAModel(JavaModelWrapper, JavaSaveable, Loader):
     ...     rmtree(path)
     ... except OSError:
     ...     pass
-
-    .. versionadded:: 1.5.0
     """
 
     @since('1.5.0')
@@ -939,19 +1031,24 @@ def vocabSize(self):
         """Vocabulary size (number of terms or terms in the vocabulary)"""
         return self.call("vocabSize")
 
-    @since('1.6.0')
     def describeTopics(self, maxTermsPerTopic=None):
         """Return the topics described by weighted terms.
 
-        WARNING: If vocabSize and k are large, this can return a large object!
-
-        :param maxTermsPerTopic:
-          Maximum number of terms to collect for each topic.
-          (default: vocabulary size)
-        :return:
-          Array over topics. Each topic is represented as a pair of
-          matching arrays: (term indices, term weights in topic).
-          Each topic's terms are sorted in order of decreasing weight.
+        .. versionadded:: 1.6.0
+        .. warning:: If vocabSize and k are large, this can return a large object!
+
+        Parameters
+        ----------
+        maxTermsPerTopic : int, optional
+            Maximum number of terms to collect for each topic.
+            (default: vocabulary size)
+
+        Returns
+        -------
+        list
+            Array over topics. Each topic is represented as a pair of
+            matching arrays: (term indices, term weights in topic).
+            Each topic's terms are sorted in order of decreasing weight.
         """
         if maxTermsPerTopic is None:
             topics = self.call("describeTopics")
@@ -960,14 +1057,16 @@ def describeTopics(self, maxTermsPerTopic=None):
         return topics
 
     @classmethod
-    @since('1.5.0')
     def load(cls, sc, path):
         """Load the LDAModel from disk.
 
-        :param sc:
-          SparkContext.
-        :param path:
-          Path to where the model is stored.
+        .. versionadded:: 1.5.0
+
+        Parameters
+        ----------
+        sc : :py:class:`pyspark.SparkContext`
+        path : str
+            Path to where the model is stored.
         """
         if not isinstance(sc, SparkContext):
             raise TypeError("sc should be a SparkContext, got type %s" % type(sc))
@@ -979,47 +1078,52 @@ def load(cls, sc, path):
 
 class LDA(object):
     """
+    Train Latent Dirichlet Allocation (LDA) model.
+
     .. versionadded:: 1.5.0
     """
 
     @classmethod
-    @since('1.5.0')
     def train(cls, rdd, k=10, maxIterations=20, docConcentration=-1.0,
               topicConcentration=-1.0, seed=None, checkpointInterval=10, optimizer="em"):
         """Train a LDA model.
 
-        :param rdd:
-          RDD of documents, which are tuples of document IDs and term
-          (word) count vectors. The term count vectors are "bags of
-          words" with a fixed-size vocabulary (where the vocabulary size
-          is the length of the vector). Document IDs must be unique
-          and >= 0.
-        :param k:
-          Number of topics to infer, i.e., the number of soft cluster
-          centers.
-          (default: 10)
-        :param maxIterations:
-          Maximum number of iterations allowed.
-          (default: 20)
-        :param docConcentration:
-          Concentration parameter (commonly named "alpha") for the prior
-          placed on documents' distributions over topics ("theta").
-          (default: -1.0)
-        :param topicConcentration:
-          Concentration parameter (commonly named "beta" or "eta") for
-          the prior placed on topics' distributions over terms.
-          (default: -1.0)
-        :param seed:
-          Random seed for cluster initialization. Set as None to generate
-          seed based on system time.
-          (default: None)
-        :param checkpointInterval:
-          Period (in iterations) between checkpoints.
-          (default: 10)
-        :param optimizer:
-          LDAOptimizer used to perform the actual calculation. Currently
-          "em", "online" are supported.
-          (default: "em")
+        .. versionadded:: 1.5.0
+
+        Parameters
+        ----------
+        rdd : :py:class:`pyspark.RDD`
+            RDD of documents, which are tuples of document IDs and term
+            (word) count vectors. The term count vectors are "bags of
+            words" with a fixed-size vocabulary (where the vocabulary size
+            is the length of the vector). Document IDs must be unique
+            and >= 0.
+        k : int, optional
+            Number of topics to infer, i.e., the number of soft cluster
+            centers.
+            (default: 10)
+        maxIterations : int, optional
+            Maximum number of iterations allowed.
+            (default: 20)
+        docConcentration : float, optional
+            Concentration parameter (commonly named "alpha") for the prior
+            placed on documents' distributions over topics ("theta").
+            (default: -1.0)
+        topicConcentration : float, optional
+            Concentration parameter (commonly named "beta" or "eta") for
+            the prior placed on topics' distributions over terms.
+            (default: -1.0)
+        seed : int, optional
+            Random seed for cluster initialization. Set as None to generate
+            seed based on system time.
+            (default: None)
+        checkpointInterval : int, optional
+            Period (in iterations) between checkpoints.
+            (default: 10)
+        optimizer : str, optional
+            LDAOptimizer used to perform the actual calculation. Currently
+            "em", "online" are supported.
+            (default: "em")
         """
         model = callMLlibFunc("trainLDAModel", rdd, k, maxIterations,
                               docConcentration, topicConcentration, seed,
diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py
index f3be827fb6e4f..198a9791774a9 100644
--- a/python/pyspark/mllib/evaluation.py
+++ b/python/pyspark/mllib/evaluation.py
@@ -30,8 +30,15 @@ class BinaryClassificationMetrics(JavaModelWrapper):
     """
     Evaluator for binary classification.
 
-    :param scoreAndLabels: an RDD of score, label and optional weight.
+    .. versionadded:: 1.4.0
+
+    Parameters
+    ----------
+    scoreAndLabels : :py:class:`pyspark.RDD`
+        an RDD of score, label and optional weight.
 
+    Examples
+    --------
     >>> scoreAndLabels = sc.parallelize([
     ...     (0.1, 0.0), (0.1, 1.0), (0.4, 0.0), (0.6, 0.0), (0.6, 1.0), (0.6, 1.0), (0.8, 1.0)], 2)
     >>> metrics = BinaryClassificationMetrics(scoreAndLabels)
@@ -48,8 +55,6 @@ class BinaryClassificationMetrics(JavaModelWrapper):
     0.79...
     >>> metrics.areaUnderPR
     0.88...
-
-    .. versionadded:: 1.4.0
     """
 
     def __init__(self, scoreAndLabels):
@@ -95,8 +100,15 @@ class RegressionMetrics(JavaModelWrapper):
     """
     Evaluator for regression.
 
-    :param predictionAndObservations: an RDD of prediction, observation and optional weight.
+    .. versionadded:: 1.4.0
+
+    Parameters
+    ----------
+    predictionAndObservations : :py:class:`pyspark.RDD`
+        an RDD of prediction, observation and optional weight.
 
+    Examples
+    --------
     >>> predictionAndObservations = sc.parallelize([
     ...     (2.5, 3.0), (0.0, -0.5), (2.0, 2.0), (8.0, 7.0)])
     >>> metrics = RegressionMetrics(predictionAndObservations)
@@ -115,8 +127,6 @@ class RegressionMetrics(JavaModelWrapper):
     >>> metrics = RegressionMetrics(predictionAndObservationsWithOptWeight)
     >>> metrics.rootMeanSquaredError
     0.68...
-
-    .. versionadded:: 1.4.0
     """
 
     def __init__(self, predictionAndObservations):
@@ -182,9 +192,15 @@ class MulticlassMetrics(JavaModelWrapper):
     """
     Evaluator for multiclass classification.
 
-    :param predictionAndLabels: an RDD of prediction, label, optional weight
-     and optional probability.
+    .. versionadded:: 1.4.0
+
+    Parameters
+    ----------
+    predictionAndLabels : :py:class:`pyspark.RDD`
+        an RDD of prediction, label, optional weight and optional probability.
 
+    Examples
+    --------
     >>> predictionAndLabels = sc.parallelize([(0.0, 0.0), (0.0, 1.0), (0.0, 0.0),
     ...     (1.0, 0.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (2.0, 2.0), (2.0, 0.0)])
     >>> metrics = MulticlassMetrics(predictionAndLabels)
@@ -246,8 +262,6 @@ class MulticlassMetrics(JavaModelWrapper):
     >>> metrics = MulticlassMetrics(predictionAndLabelsWithProbabilities)
     >>> metrics.logLoss()
     0.9682...
-
-    .. versionadded:: 1.4.0
     """
 
     def __init__(self, predictionAndLabels):
@@ -377,9 +391,15 @@ class RankingMetrics(JavaModelWrapper):
     """
     Evaluator for ranking algorithms.
 
-    :param predictionAndLabels: an RDD of (predicted ranking,
-                                ground truth set) pairs.
+    .. versionadded:: 1.4.0
 
+    Parameters
+    ----------
+    predictionAndLabels : :py:class:`pyspark.RDD`
+        an RDD of (predicted ranking, ground truth set) pairs.
+
+    Examples
+    --------
     >>> predictionAndLabels = sc.parallelize([
     ...     ([1, 6, 2, 7, 8, 3, 9, 10, 4, 5], [1, 2, 3, 4, 5]),
     ...     ([4, 1, 5, 6, 2, 7, 3, 8, 9, 10], [1, 2, 3]),
@@ -407,8 +427,6 @@ class RankingMetrics(JavaModelWrapper):
     0.35...
     >>> metrics.recallAt(15)
     0.66...
-
-    .. versionadded:: 1.4.0
     """
 
     def __init__(self, predictionAndLabels):
@@ -484,10 +502,16 @@ class MultilabelMetrics(JavaModelWrapper):
     """
     Evaluator for multilabel classification.
 
-    :param predictionAndLabels: an RDD of (predictions, labels) pairs,
-                                both are non-null Arrays, each with
-                                unique elements.
+    .. versionadded:: 1.4.0
+
+    Parameters
+    ----------
+    predictionAndLabels : :py:class:`pyspark.RDD`
+        an RDD of (predictions, labels) pairs,
+        both are non-null Arrays, each with unique elements.
 
+    Examples
+    --------
     >>> predictionAndLabels = sc.parallelize([([0.0, 1.0], [0.0, 2.0]), ([0.0, 2.0], [0.0, 1.0]),
     ...     ([], [0.0]), ([2.0], [2.0]), ([2.0, 0.0], [2.0, 0.0]),
     ...     ([0.0, 1.0, 2.0], [0.0, 1.0]), ([1.0], [1.0, 2.0])])
@@ -516,8 +540,6 @@ class MultilabelMetrics(JavaModelWrapper):
     0.28...
     >>> metrics.accuracy
     0.54...
-
-    .. versionadded:: 1.4.0
     """
 
     def __init__(self, predictionAndLabels):
diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
index d95f9197eaedf..1d37ab815655b 100644
--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@@ -41,7 +41,10 @@ def transform(self, vector):
         """
         Applies transformation on a vector.
 
-        :param vector: vector to be transformed.
+        Parameters
+        ----------
+        vector : :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD`
+            vector or convertible or RDD to be transformed.
         """
         raise NotImplementedError
 
@@ -56,8 +59,15 @@ class Normalizer(VectorTransformer):
     For `p` = float('inf'), max(abs(vector)) will be used as norm for
     normalization.
 
-    :param p: Normalization in L^p^ space, p = 2 by default.
+    .. versionadded:: 1.2.0
+
+    Parameters
+    ----------
+    p : float, optional
+        Normalization in L^p^ space, p = 2 by default.
 
+    Examples
+    --------
     >>> from pyspark.mllib.linalg import Vectors
     >>> v = Vectors.dense(range(3))
     >>> nor = Normalizer(1)
@@ -71,21 +81,27 @@ class Normalizer(VectorTransformer):
     >>> nor2 = Normalizer(float("inf"))
     >>> nor2.transform(v)
     DenseVector([0.0, 0.5, 1.0])
-
-    .. versionadded:: 1.2.0
     """
     def __init__(self, p=2.0):
         assert p >= 1.0, "p should be greater than 1.0"
         self.p = float(p)
 
-    @since('1.2.0')
     def transform(self, vector):
         """
         Applies unit length normalization on a vector.
 
-        :param vector: vector or RDD of vector to be normalized.
-        :return: normalized vector. If the norm of the input is zero, it
-                 will return the input vector.
+        .. versionadded:: 1.2.0
+
+        Parameters
+        ----------
+        vector : :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD`
+            vector or RDD of vector to be normalized.
+
+        Returns
+        -------
+        :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD`
+            normalized vector(s). If the norm of the input is zero, it
+            will return the input vector.
         """
         if isinstance(vector, RDD):
             vector = vector.map(_convert_to_vector)
@@ -103,11 +119,16 @@ def transform(self, vector):
         """
         Applies transformation on a vector or an RDD[Vector].
 
-        .. note:: In Python, transform cannot currently be used within
-            an RDD transformation or action.
-            Call transform directly on the RDD instead.
+        Parameters
+        ----------
+        vector : :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD`
+            Input vector(s) to be transformed.
 
-        :param vector: Vector or RDD of Vector to be transformed.
+        Notes
+        -----
+        In Python, transform cannot currently be used within
+        an RDD transformation or action.
+        Call transform directly on the RDD instead.
         """
         if isinstance(vector, RDD):
             vector = vector.map(_convert_to_vector)
@@ -123,19 +144,29 @@ class StandardScalerModel(JavaVectorTransformer):
     .. versionadded:: 1.2.0
     """
 
-    @since('1.2.0')
     def transform(self, vector):
         """
         Applies standardization transformation on a vector.
 
-        .. note:: In Python, transform cannot currently be used within
-            an RDD transformation or action.
-            Call transform directly on the RDD instead.
+        .. versionadded:: 1.2.0
+
+        Parameters
+        ----------
+        vector : :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD`
+            Input vector(s) to be standardized.
 
-        :param vector: Vector or RDD of Vector to be standardized.
-        :return: Standardized vector. If the variance of a column is
-                 zero, it will return default `0.0` for the column with
-                 zero variance.
+        Returns
+        -------
+        :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD`
+            Standardized vector(s). If the variance of a column is
+            zero, it will return default `0.0` for the column with
+            zero variance.
+
+        Notes
+        -----
+        In Python, transform cannot currently be used within
+        an RDD transformation or action.
+        Call transform directly on the RDD instead.
         """
         return JavaVectorTransformer.transform(self, vector)
 
@@ -196,12 +227,20 @@ class StandardScaler(object):
     variance using column summary statistics on the samples in the
     training set.
 
-    :param withMean: False by default. Centers the data with mean
-                     before scaling. It will build a dense output, so take
-                     care when applying to sparse input.
-    :param withStd: True by default. Scales the data to unit
-                    standard deviation.
+    .. versionadded:: 1.2.0
 
+    Parameters
+    ----------
+    withMean : bool, optional
+        False by default. Centers the data with mean
+        before scaling. It will build a dense output, so take
+        care when applying to sparse input.
+    withStd : bool, optional
+        True by default. Scales the data to unit
+        standard deviation.
+
+    Examples
+    --------
     >>> vs = [Vectors.dense([-2.0, 2.3, 0]), Vectors.dense([3.8, 0.0, 1.9])]
     >>> dataset = sc.parallelize(vs)
     >>> standardizer = StandardScaler(True, True)
@@ -218,8 +257,6 @@ class StandardScaler(object):
     True
     >>> model.withMean
     True
-
-    .. versionadded:: 1.2.0
     """
     def __init__(self, withMean=False, withStd=True):
         if not (withMean or withStd):
@@ -227,15 +264,22 @@ def __init__(self, withMean=False, withStd=True):
         self.withMean = withMean
         self.withStd = withStd
 
-    @since('1.2.0')
     def fit(self, dataset):
         """
         Computes the mean and variance and stores as a model to be used
         for later scaling.
 
-        :param dataset: The data used to compute the mean and variance
-                     to build the transformation model.
-        :return: a StandardScalarModel
+        .. versionadded:: 1.2.0
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.RDD`
+            The data used to compute the mean and variance
+            to build the transformation model.
+
+        Returns
+        -------
+        :py:class:`StandardScalerModel`
         """
         dataset = dataset.map(_convert_to_vector)
         jmodel = callMLlibFunc("fitStandardScaler", self.withMean, self.withStd, dataset)
@@ -249,13 +293,21 @@ class ChiSqSelectorModel(JavaVectorTransformer):
     .. versionadded:: 1.4.0
     """
 
-    @since('1.4.0')
     def transform(self, vector):
         """
         Applies transformation on a vector.
 
-        :param vector: Vector or RDD of Vector to be transformed.
-        :return: transformed vector.
+        .. versionadded:: 1.4.0
+
+        Examples
+        --------
+        vector : :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD`
+            Input vector(s) to be transformed.
+
+        Returns
+        -------
+        :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD`
+            transformed vector(s).
         """
         return JavaVectorTransformer.transform(self, vector)
 
@@ -284,6 +336,10 @@ class ChiSqSelector(object):
     By default, the selection method is `numTopFeatures`, with the default number of top features
     set to 50.
 
+    .. versionadded:: 1.4.0
+
+    Examples
+    --------
     >>> from pyspark.mllib.linalg import SparseVector, DenseVector
     >>> from pyspark.mllib.regression import LabeledPoint
     >>> data = sc.parallelize([
@@ -306,8 +362,6 @@ class ChiSqSelector(object):
     >>> model = ChiSqSelector(selectorType="percentile", percentile=0.34).fit(data)
     >>> model.transform(DenseVector([7.0, 9.0, 5.0]))
     DenseVector([7.0])
-
-    .. versionadded:: 1.4.0
     """
     def __init__(self, numTopFeatures=50, selectorType="numTopFeatures", percentile=0.1, fpr=0.05,
                  fdr=0.05, fwe=0.05):
@@ -372,15 +426,18 @@ def setSelectorType(self, selectorType):
         self.selectorType = str(selectorType)
         return self
 
-    @since('1.4.0')
     def fit(self, data):
         """
         Returns a ChiSquared feature selector.
 
-        :param data: an `RDD[LabeledPoint]` containing the labeled dataset
-                     with categorical features. Real-valued features will be
-                     treated as categorical for each distinct value.
-                     Apply feature discretizer before using this function.
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        data : :py:class:`pyspark.RDD` of :py:class:`pyspark.mllib.regression.LabeledPoint`
+            containing the labeled dataset with categorical features.
+            Real-valued features will be treated as categorical for each
+            distinct value. Apply feature discretizer before using this function.
         """
         jmodel = callMLlibFunc("fitChiSqSelector", self.selectorType, self.numTopFeatures,
                                self.percentile, self.fpr, self.fdr, self.fwe, data)
@@ -399,6 +456,10 @@ class PCA(object):
     """
     A feature transformer that projects vectors to a low-dimensional space using PCA.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> data = [Vectors.sparse(5, [(1, 1.0), (3, 7.0)]),
     ...     Vectors.dense([2.0, 0.0, 3.0, 4.0, 5.0]),
     ...     Vectors.dense([4.0, 0.0, 0.0, 6.0, 7.0])]
@@ -408,20 +469,26 @@ class PCA(object):
     1.648...
     >>> pcArray[1]
     -4.013...
-
-    .. versionadded:: 1.5.0
     """
     def __init__(self, k):
         """
-        :param k: number of principal components.
+        Parameters
+        ----------
+        k : int
+            number of principal components.
         """
         self.k = int(k)
 
-    @since('1.5.0')
     def fit(self, data):
         """
         Computes a [[PCAModel]] that contains the principal components of the input vectors.
-        :param data: source vectors
+
+        .. versionadded:: 1.5.0
+
+        Parameters
+        ----------
+        data : :py:class:`pyspark.RDD`
+            source vectors
         """
         jmodel = callMLlibFunc("fitPCA", self.k, data)
         return PCAModel(jmodel)
@@ -432,16 +499,23 @@ class HashingTF(object):
     Maps a sequence of terms to their term frequencies using the hashing
     trick.
 
-    .. note:: The terms must be hashable (can not be dict/set/list...).
+    .. versionadded:: 1.2.0
+
+    Parameters
+    ----------
+    numFeatures : int, optional
+        number of features (default: 2^20)
 
-    :param numFeatures: number of features (default: 2^20)
+    Notes
+    -----
+    The terms must be hashable (can not be dict/set/list...).
 
+    Examples
+    --------
     >>> htf = HashingTF(100)
     >>> doc = "a a b b c d".split(" ")
     >>> htf.transform(doc)
     SparseVector(100, {...})
-
-    .. versionadded:: 1.2.0
     """
     def __init__(self, numFeatures=1 << 20):
         self.numFeatures = numFeatures
@@ -485,7 +559,7 @@ class IDFModel(JavaVectorTransformer):
 
     .. versionadded:: 1.2.0
     """
-    @since('1.2.0')
+
     def transform(self, x):
         """
         Transforms term frequency (TF) vectors to TF-IDF vectors.
@@ -494,13 +568,24 @@ def transform(self, x):
         the terms which occur in fewer than `minDocFreq`
         documents will have an entry of 0.
 
-        .. note:: In Python, transform cannot currently be used within
-            an RDD transformation or action.
-            Call transform directly on the RDD instead.
+        .. versionadded:: 1.2.0
+
+        Parameters
+        ----------
+        x : :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD`
+            an RDD of term frequency vectors or a term frequency
+            vector
 
-        :param x: an RDD of term frequency vectors or a term frequency
-                  vector
-        :return: an RDD of TF-IDF vectors or a TF-IDF vector
+        Returns
+        -------
+        :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD`
+            an RDD of TF-IDF vectors or a TF-IDF vector
+
+        Notes
+        -----
+        In Python, transform cannot currently be used within
+        an RDD transformation or action.
+        Call transform directly on the RDD instead.
         """
         return JavaVectorTransformer.transform(self, x)
 
@@ -539,9 +624,15 @@ class IDF(object):
     `minDocFreq`). For terms that are not in at least `minDocFreq`
     documents, the IDF is found as 0, resulting in TF-IDFs of 0.
 
-    :param minDocFreq: minimum of documents in which a term
-                       should appear for filtering
+    .. versionadded:: 1.2.0
+
+    Parameters
+    ----------
+    minDocFreq : int
+        minimum of documents in which a term should appear for filtering
 
+    Examples
+    --------
     >>> n = 4
     >>> freqs = [Vectors.sparse(n, (1, 3), (1.0, 2.0)),
     ...          Vectors.dense([0.0, 1.0, 2.0, 3.0]),
@@ -560,18 +651,20 @@ class IDF(object):
     DenseVector([0.0, 0.0, 1.3863, 0.863])
     >>> model.transform(Vectors.sparse(n, (1, 3), (1.0, 2.0)))
     SparseVector(4, {1: 0.0, 3: 0.5754})
-
-    .. versionadded:: 1.2.0
     """
     def __init__(self, minDocFreq=0):
         self.minDocFreq = minDocFreq
 
-    @since('1.2.0')
     def fit(self, dataset):
         """
         Computes the inverse document frequency.
 
-        :param dataset: an RDD of term frequency vectors
+        .. versionadded:: 1.2.0
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.RDD`
+            an RDD of term frequency vectors
         """
         if not isinstance(dataset, RDD):
             raise TypeError("dataset should be an RDD of term frequency vectors")
@@ -582,34 +675,55 @@ def fit(self, dataset):
 class Word2VecModel(JavaVectorTransformer, JavaSaveable, JavaLoader):
     """
     class for Word2Vec model
-
-    .. versionadded:: 1.2.0
     """
-    @since('1.2.0')
+
     def transform(self, word):
         """
         Transforms a word to its vector representation
 
-        .. note:: Local use only
+        .. versionadded:: 1.2.0
+
+        Parameters
+        ----------
+        word : str
+            a word
 
-        :param word: a word
-        :return: vector representation of word(s)
+        Returns
+        -------
+        :py:class:`pyspark.mllib.linalg.Vector`
+            vector representation of word(s)
+
+        Notes
+        -----
+        Local use only
         """
         try:
             return self.call("transform", word)
         except Py4JJavaError:
             raise ValueError("%s not found" % word)
 
-    @since('1.2.0')
     def findSynonyms(self, word, num):
         """
         Find synonyms of a word
 
-        :param word: a word or a vector representation of word
-        :param num: number of synonyms to find
-        :return: array of (word, cosineSimilarity)
+        .. versionadded:: 1.2.0
+
+        Parameters
+        ----------
+
+        word : str or  :py:class:`pyspark.mllib.linalg.Vector`
+            a word or a vector representation of word
+        num : int
+            number of synonyms to find
+
+        Returns
+        -------
+        :py:class:`collections.abc.Iterable`
+            array of (word, cosineSimilarity)
 
-        .. note:: Local use only
+        Notes
+        -----
+        Local use only
         """
         if not isinstance(word, str):
             word = _convert_to_vector(word)
@@ -653,6 +767,10 @@ class Word2Vec(object):
     and Distributed Representations of Words and Phrases and their
     Compositionality.
 
+    .. versionadded:: 1.2.0
+
+    Examples
+    --------
     >>> sentence = "a b " * 100 + "a c " * 10
     >>> localDoc = [sentence, sentence]
     >>> doc = sc.parallelize(localDoc).map(lambda line: line.split(" "))
@@ -686,9 +804,6 @@ class Word2Vec(object):
     ...     rmtree(path)
     ... except OSError:
     ...     pass
-
-    .. versionadded:: 1.2.0
-
     """
     def __init__(self):
         """
@@ -761,13 +876,20 @@ def setWindowSize(self, windowSize):
         self.windowSize = windowSize
         return self
 
-    @since('1.2.0')
     def fit(self, data):
         """
         Computes the vector representation of each word in vocabulary.
 
-        :param data: training data. RDD of list of string
-        :return: Word2VecModel instance
+        .. versionadded:: 1.2.0
+
+        Parameters
+        ----------
+        data : :py:class:`pyspark.RDD`
+            training data. RDD of list of string
+
+        Returns
+        -------
+        :py:class:`Word2VecModel`
         """
         if not isinstance(data, RDD):
             raise TypeError("data should be an RDD of list of string")
@@ -783,6 +905,10 @@ class ElementwiseProduct(VectorTransformer):
     Scales each column of the vector, with the supplied weight vector.
     i.e the elementwise product.
 
+    .. versionadded:: 1.5.0
+
+    Examples
+    --------
     >>> weight = Vectors.dense([1.0, 2.0, 3.0])
     >>> eprod = ElementwiseProduct(weight)
     >>> a = Vectors.dense([2.0, 1.0, 3.0])
@@ -792,8 +918,6 @@ class ElementwiseProduct(VectorTransformer):
     >>> rdd = sc.parallelize([a, b])
     >>> eprod.transform(rdd).collect()
     [DenseVector([2.0, 2.0, 9.0]), DenseVector([9.0, 6.0, 12.0])]
-
-    .. versionadded:: 1.5.0
     """
     def __init__(self, scalingVector):
         self.scalingVector = _convert_to_vector(scalingVector)
diff --git a/python/pyspark/mllib/feature.pyi b/python/pyspark/mllib/feature.pyi
index 9ccec36abd6ff..24a46f6bee798 100644
--- a/python/pyspark/mllib/feature.pyi
+++ b/python/pyspark/mllib/feature.pyi
@@ -17,7 +17,7 @@
 # under the License.
 
 from typing import overload
-from typing import Iterable, Hashable, List, Tuple
+from typing import Iterable, Hashable, List, Tuple, Union
 
 from pyspark.mllib._typing import VectorLike
 from pyspark.context import SparkContext
@@ -135,7 +135,7 @@ class IDF:
 
 class Word2VecModel(JavaVectorTransformer, JavaSaveable, JavaLoader[Word2VecModel]):
     def transform(self, word: str) -> Vector: ...  # type: ignore
-    def findSynonyms(self, word: str, num: int) -> Iterable[Tuple[str, float]]: ...
+    def findSynonyms(self, word: Union[str, VectorLike], num: int) -> Iterable[Tuple[str, float]]: ...
     def getVectors(self) -> JavaMap: ...
     @classmethod
     def load(cls, sc: SparkContext, path: str) -> Word2VecModel: ...
diff --git a/python/pyspark/mllib/fpm.py b/python/pyspark/mllib/fpm.py
index cbbd7b351b20d..1f87a15cb11c9 100644
--- a/python/pyspark/mllib/fpm.py
+++ b/python/pyspark/mllib/fpm.py
@@ -32,6 +32,10 @@ class FPGrowthModel(JavaModelWrapper, JavaSaveable, JavaLoader):
     A FP-Growth model for mining frequent itemsets
     using the Parallel FP-Growth algorithm.
 
+    .. versionadded:: 1.4.0
+
+    Examples
+    --------
     >>> data = [["a", "b", "c"], ["a", "b", "d", "e"], ["a", "c", "e"], ["a", "c", "f"]]
     >>> rdd = sc.parallelize(data, 2)
     >>> model = FPGrowth.train(rdd, 0.6, 2)
@@ -42,8 +46,6 @@ class FPGrowthModel(JavaModelWrapper, JavaSaveable, JavaLoader):
     >>> sameModel = FPGrowthModel.load(sc, model_path)
     >>> sorted(model.freqItemsets().collect()) == sorted(sameModel.freqItemsets().collect())
     True
-
-    .. versionadded:: 1.4.0
     """
 
     @since("1.4.0")
@@ -72,20 +74,23 @@ class FPGrowth(object):
     """
 
     @classmethod
-    @since("1.4.0")
     def train(cls, data, minSupport=0.3, numPartitions=-1):
         """
         Computes an FP-Growth model that contains frequent itemsets.
 
-        :param data:
-          The input data set, each element contains a transaction.
-        :param minSupport:
-          The minimal support level.
-          (default: 0.3)
-        :param numPartitions:
-          The number of partitions used by parallel FP-growth. A value
-          of -1 will use the same number as input data.
-          (default: -1)
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        data : :py:class:`pyspark.RDD`
+            The input data set, each element contains a transaction.
+        minSupport : float, optional
+            The minimal support level.
+            (default: 0.3)
+        numPartitions : int, optional
+            The number of partitions used by parallel FP-growth. A value
+            of -1 will use the same number as input data.
+            (default: -1)
         """
         model = callMLlibFunc("trainFPGrowthModel", data, float(minSupport), int(numPartitions))
         return FPGrowthModel(model)
@@ -103,6 +108,10 @@ class PrefixSpanModel(JavaModelWrapper):
     """
     Model fitted by PrefixSpan
 
+    .. versionadded:: 1.6.0
+
+    Examples
+    --------
     >>> data = [
     ...    [["a", "b"], ["c"]],
     ...    [["a"], ["c", "b"], ["a", "b"]],
@@ -112,8 +121,6 @@ class PrefixSpanModel(JavaModelWrapper):
     >>> model = PrefixSpan.train(rdd)
     >>> sorted(model.freqSequences().collect())
     [FreqSequence(sequence=[['a']], freq=3), FreqSequence(sequence=[['a'], ['a']], freq=1), ...
-
-    .. versionadded:: 1.6.0
     """
 
     @since("1.6.0")
@@ -125,38 +132,45 @@ def freqSequences(self):
 class PrefixSpan(object):
     """
     A parallel PrefixSpan algorithm to mine frequent sequential patterns.
-    The PrefixSpan algorithm is described in J. Pei, et al., PrefixSpan:
-    Mining Sequential Patterns Efficiently by Prefix-Projected Pattern Growth
-    ([[https://doi.org/10.1109/ICDE.2001.914830]]).
+    The PrefixSpan algorithm is described in Jian Pei et al (2001) [1]_
 
     .. versionadded:: 1.6.0
+
+    .. [1] Jian Pei et al.,
+        "PrefixSpan,: mining sequential patterns efficiently by prefix-projected pattern growth,"
+        Proceedings 17th International Conference on Data Engineering, Heidelberg,
+        Germany, 2001, pp. 215-224,
+        doi: https://doi.org/10.1109/ICDE.2001.914830
     """
 
     @classmethod
-    @since("1.6.0")
     def train(cls, data, minSupport=0.1, maxPatternLength=10, maxLocalProjDBSize=32000000):
         """
         Finds the complete set of frequent sequential patterns in the
         input sequences of itemsets.
 
-        :param data:
-          The input data set, each element contains a sequence of
-          itemsets.
-        :param minSupport:
-          The minimal support level of the sequential pattern, any
-          pattern that appears more than (minSupport *
-          size-of-the-dataset) times will be output.
-          (default: 0.1)
-        :param maxPatternLength:
-          The maximal length of the sequential pattern, any pattern
-          that appears less than maxPatternLength will be output.
-          (default: 10)
-        :param maxLocalProjDBSize:
-          The maximum number of items (including delimiters used in the
-          internal storage format) allowed in a projected database before
-          local processing. If a projected database exceeds this size,
-          another iteration of distributed prefix growth is run.
-          (default: 32000000)
+        .. versionadded:: 1.6.0
+
+        Parameters
+        ----------
+        data : :py:class:`pyspark.RDD`
+            The input data set, each element contains a sequence of
+            itemsets.
+        minSupport : float, optional
+            The minimal support level of the sequential pattern, any
+            pattern that appears more than (minSupport *
+            size-of-the-dataset) times will be output.
+            (default: 0.1)
+        maxPatternLength : int, optional
+            The maximal length of the sequential pattern, any pattern
+            that appears less than maxPatternLength will be output.
+            (default: 10)
+        maxLocalProjDBSize : int, optional
+            The maximum number of items (including delimiters used in the
+            internal storage format) allowed in a projected database before
+            local processing. If a projected database exceeds this size,
+            another iteration of distributed prefix growth is run.
+            (default: 32000000)
         """
         model = callMLlibFunc("trainPrefixSpanModel",
                               data, minSupport, maxPatternLength, maxLocalProjDBSize)
diff --git a/python/pyspark/mllib/fpm.pyi b/python/pyspark/mllib/fpm.pyi
index 880baae1a91a5..c5a6b5f6806c0 100644
--- a/python/pyspark/mllib/fpm.pyi
+++ b/python/pyspark/mllib/fpm.pyi
@@ -37,8 +37,8 @@ class FPGrowth:
         cls, data: RDD[List[T]], minSupport: float = ..., numPartitions: int = ...
     ) -> FPGrowthModel[T]: ...
     class FreqItemset(Generic[T]):
-        items = ...  # List[T]
-        freq = ...  # int
+        items: List[T]
+        freq: int
 
 class PrefixSpanModel(JavaModelWrapper, Generic[T]):
     def freqSequences(self) -> RDD[PrefixSpan.FreqSequence[T]]: ...
diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py
index c1402fb98a50d..f20004ab70ab3 100644
--- a/python/pyspark/mllib/linalg/__init__.py
+++ b/python/pyspark/mllib/linalg/__init__.py
@@ -71,6 +71,8 @@ def _vector_size(v):
     """
     Returns the size of the vector.
 
+    Examples
+    --------
     >>> _vector_size([1., 2., 3.])
     3
     >>> _vector_size((1., 2., 3.))
@@ -231,7 +233,9 @@ def toArray(self):
         """
         Convert the vector into an numpy.ndarray
 
-        :return: numpy.ndarray
+        Returns
+        -------
+        :py:class:`numpy.ndarray`
         """
         raise NotImplementedError
 
@@ -240,7 +244,9 @@ def asML(self):
         Convert this vector to the new mllib-local representation.
         This does NOT copy the data; it copies references.
 
-        :return: :py:class:`pyspark.ml.linalg.Vector`
+        Returns
+        -------
+        :py:class:`pyspark.ml.linalg.Vector`
         """
         raise NotImplementedError
 
@@ -251,6 +257,8 @@ class DenseVector(Vector):
     storage and arithmetics will be delegated to the underlying numpy
     array.
 
+    Examples
+    --------
     >>> v = Vectors.dense([1.0, 2.0])
     >>> u = Vectors.dense([3.0, 4.0])
     >>> v + u
@@ -282,6 +290,8 @@ def parse(s):
         """
         Parse string representation back into the DenseVector.
 
+        Examples
+        --------
         >>> DenseVector.parse(' [ 0.0,1.0,2.0,  3.0]')
         DenseVector([0.0, 1.0, 2.0, 3.0])
         """
@@ -312,6 +322,8 @@ def norm(self, p):
         """
         Calculates the norm of a DenseVector.
 
+        Examples
+        --------
         >>> a = DenseVector([0, -1, 2, -3])
         >>> a.norm(2)
         3.7...
@@ -327,6 +339,8 @@ def dot(self, other):
         and a target NumPy array that is either 1- or 2-dimensional.
         Equivalent to calling numpy.dot of the two vectors.
 
+        Examples
+        --------
         >>> dense = DenseVector(array.array('d', [1., 2.]))
         >>> dense.dot(dense)
         5.0
@@ -367,6 +381,8 @@ def squared_distance(self, other):
         """
         Squared distance of two Vectors.
 
+        Examples
+        --------
         >>> dense1 = DenseVector(array.array('d', [1., 2.]))
         >>> dense1.squared_distance(dense1)
         0.0
@@ -412,9 +428,11 @@ def asML(self):
         Convert this vector to the new mllib-local representation.
         This does NOT copy the data; it copies references.
 
-        :return: :py:class:`pyspark.ml.linalg.DenseVector`
-
         .. versionadded:: 2.0.0
+
+        Returns
+        -------
+        :py:class:`pyspark.ml.linalg.DenseVector`
         """
         return newlinalg.DenseVector(self.array)
 
@@ -501,12 +519,18 @@ def __init__(self, size, *args):
         (index, value) pairs, or two separate arrays of indices and
         values (sorted by index).
 
-        :param size: Size of the vector.
-        :param args: Active entries, as a dictionary {index: value, ...},
-          a list of tuples [(index, value), ...], or a list of strictly
-          increasing indices and a list of corresponding values [index, ...],
-          [value, ...]. Inactive entries are treated as zeros.
-
+        Parameters
+        ----------
+        size : int
+            Size of the vector.
+        args
+            Active entries, as a dictionary {index: value, ...},
+            a list of tuples [(index, value), ...], or a list of strictly
+            increasing indices and a list of corresponding values [index, ...],
+            [value, ...]. Inactive entries are treated as zeros.
+
+        Examples
+        --------
         >>> SparseVector(4, {1: 1.0, 3: 5.5})
         SparseVector(4, {1: 1.0, 3: 5.5})
         >>> SparseVector(4, [(1, 1.0), (3, 5.5)])
@@ -556,6 +580,8 @@ def norm(self, p):
         """
         Calculates the norm of a SparseVector.
 
+        Examples
+        --------
         >>> a = SparseVector(4, [0, 1], [3., -4.])
         >>> a.norm(1)
         7.0
@@ -574,6 +600,8 @@ def parse(s):
         """
         Parse string representation back into the SparseVector.
 
+        Examples
+        --------
         >>> SparseVector.parse(' (4, [0,1 ],[ 4.0,5.0] )')
         SparseVector(4, {0: 4.0, 1: 5.0})
         """
@@ -622,6 +650,8 @@ def dot(self, other):
         """
         Dot product with a SparseVector or 1- or 2-dimensional Numpy array.
 
+        Examples
+        --------
         >>> a = SparseVector(4, [1, 3], [3.0, 4.0])
         >>> a.dot(a)
         25.0
@@ -678,6 +708,8 @@ def squared_distance(self, other):
         """
         Squared distance from a SparseVector or 1-dimensional NumPy array.
 
+        Examples
+        --------
         >>> a = SparseVector(4, [1, 3], [3.0, 4.0])
         >>> a.squared_distance(a)
         0.0
@@ -754,9 +786,11 @@ def asML(self):
         Convert this vector to the new mllib-local representation.
         This does NOT copy the data; it copies references.
 
-        :return: :py:class:`pyspark.ml.linalg.SparseVector`
-
         .. versionadded:: 2.0.0
+
+        Returns
+        -------
+        :py:class:`pyspark.ml.linalg.SparseVector`
         """
         return newlinalg.SparseVector(self.size, self.indices, self.values)
 
@@ -828,10 +862,12 @@ class Vectors(object):
     """
     Factory methods for working with vectors.
 
-    .. note:: Dense vectors are simply represented as NumPy array objects,
-        so there is no need to covert them for use in MLlib. For sparse vectors,
-        the factory methods in this class create an MLlib-compatible type, or users
-        can pass in SciPy's `scipy.sparse` column vectors.
+    Notes
+    -----
+    Dense vectors are simply represented as NumPy array objects,
+    so there is no need to covert them for use in MLlib. For sparse vectors,
+    the factory methods in this class create an MLlib-compatible type, or users
+    can pass in SciPy's `scipy.sparse` column vectors.
     """
 
     @staticmethod
@@ -841,10 +877,16 @@ def sparse(size, *args):
         (index, value) pairs, or two separate arrays of indices and
         values (sorted by index).
 
-        :param size: Size of the vector.
-        :param args: Non-zero entries, as a dictionary, list of tuples,
-                     or two sorted lists containing indices and values.
+        Parameters
+        ----------
+        size : int
+            Size of the vector.
+        args
+            Non-zero entries, as a dictionary, list of tuples,
+            or two sorted lists containing indices and values.
 
+        Examples
+        --------
         >>> Vectors.sparse(4, {1: 1.0, 3: 5.5})
         SparseVector(4, {1: 1.0, 3: 5.5})
         >>> Vectors.sparse(4, [(1, 1.0), (3, 5.5)])
@@ -859,6 +901,8 @@ def dense(*elements):
         """
         Create a dense vector of 64-bit floats from a Python list or numbers.
 
+        Examples
+        --------
         >>> Vectors.dense([1, 2, 3])
         DenseVector([1.0, 2.0, 3.0])
         >>> Vectors.dense(1.0, 2.0)
@@ -875,10 +919,15 @@ def fromML(vec):
         Convert a vector from the new mllib-local representation.
         This does NOT copy the data; it copies references.
 
-        :param vec: a :py:class:`pyspark.ml.linalg.Vector`
-        :return: a :py:class:`pyspark.mllib.linalg.Vector`
-
         .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        vec : :py:class:`pyspark.ml.linalg.Vector`
+
+        Returns
+        -------
+        :py:class:`pyspark.mllib.linalg.Vector`
         """
         if isinstance(vec, newlinalg.DenseVector):
             return DenseVector(vec.array)
@@ -893,6 +942,8 @@ def stringify(vector):
         Converts a vector into a string, which can be recognized by
         Vectors.parse().
 
+        Examples
+        --------
         >>> Vectors.stringify(Vectors.sparse(2, [1], [1.0]))
         '(2,[1],[1.0])'
         >>> Vectors.stringify(Vectors.dense([0.0, 1.0]))
@@ -907,6 +958,8 @@ def squared_distance(v1, v2):
         a and b can be of type SparseVector, DenseVector, np.ndarray
         or array.array.
 
+        Examples
+        --------
         >>> a = Vectors.sparse(4, [(0, 1), (3, 4)])
         >>> b = Vectors.dense([2, 5, 4, 1])
         >>> a.squared_distance(b)
@@ -926,6 +979,8 @@ def norm(vector, p):
     def parse(s):
         """Parse a string representation back into the Vector.
 
+        Examples
+        --------
         >>> Vectors.parse('[2,1,2 ]')
         DenseVector([2.0, 1.0, 2.0])
         >>> Vectors.parse(' ( 100,  [0],  [2])')
@@ -1023,6 +1078,8 @@ def __str__(self):
         """
         Pretty printing of a DenseMatrix
 
+        Examples
+        --------
         >>> dm = DenseMatrix(2, 2, range(4))
         >>> print(dm)
         DenseMatrix([[ 0.,  2.],
@@ -1044,6 +1101,8 @@ def __repr__(self):
         """
         Representation of a DenseMatrix
 
+        Examples
+        --------
         >>> dm = DenseMatrix(2, 2, range(4))
         >>> dm
         DenseMatrix(2, 2, [0.0, 1.0, 2.0, 3.0], False)
@@ -1067,6 +1126,8 @@ def toArray(self):
         """
         Return an numpy.ndarray
 
+        Examples
+        --------
         >>> m = DenseMatrix(2, 2, range(4))
         >>> m.toArray()
         array([[ 0.,  2.],
@@ -1098,9 +1159,11 @@ def asML(self):
         Convert this matrix to the new mllib-local representation.
         This does NOT copy the data; it copies references.
 
-        :return: :py:class:`pyspark.ml.linalg.DenseMatrix`
-
         .. versionadded:: 2.0.0
+
+        Returns
+        -------
+        :py:class:`pyspark.ml.linalg.DenseMatrix`
         """
         return newlinalg.DenseMatrix(self.numRows, self.numCols, self.values, self.isTransposed)
 
@@ -1154,6 +1217,8 @@ def __str__(self):
         """
         Pretty printing of a SparseMatrix
 
+        Examples
+        --------
         >>> sm1 = SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
         >>> print(sm1)
         2 X 2 CSCMatrix
@@ -1200,6 +1265,8 @@ def __repr__(self):
         """
         Representation of a SparseMatrix
 
+        Examples
+        --------
         >>> sm1 = SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2, 3, 4])
         >>> sm1
         SparseMatrix(2, 2, [0, 2, 3], [0, 1, 1], [2.0, 3.0, 4.0], False)
@@ -1281,9 +1348,11 @@ def asML(self):
         Convert this matrix to the new mllib-local representation.
         This does NOT copy the data; it copies references.
 
-        :return: :py:class:`pyspark.ml.linalg.SparseMatrix`
-
         .. versionadded:: 2.0.0
+
+        Returns
+        -------
+        :py:class:`pyspark.ml.linalg.SparseMatrix`
         """
         return newlinalg.SparseMatrix(self.numRows, self.numCols, self.colPtrs, self.rowIndices,
                                       self.values, self.isTransposed)
@@ -1314,10 +1383,15 @@ def fromML(mat):
         Convert a matrix from the new mllib-local representation.
         This does NOT copy the data; it copies references.
 
-        :param mat: a :py:class:`pyspark.ml.linalg.Matrix`
-        :return: a :py:class:`pyspark.mllib.linalg.Matrix`
-
         .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        mat : :py:class:`pyspark.ml.linalg.Matrix`
+
+        Returns
+        -------
+        :py:class:`pyspark.mllib.linalg.Matrix`
         """
         if isinstance(mat, newlinalg.DenseMatrix):
             return DenseMatrix(mat.numRows, mat.numCols, mat.values, mat.isTransposed)
diff --git a/python/pyspark/mllib/linalg/distributed.py b/python/pyspark/mllib/linalg/distributed.py
index 603d31d3d7b26..f0e889b15bf51 100644
--- a/python/pyspark/mllib/linalg/distributed.py
+++ b/python/pyspark/mllib/linalg/distributed.py
@@ -55,16 +55,22 @@ class RowMatrix(DistributedMatrix):
     Represents a row-oriented distributed Matrix with no meaningful
     row indices.
 
-    :param rows: An RDD or DataFrame of vectors. If a DataFrame is provided, it must have a single
-                 vector typed column.
-    :param numRows: Number of rows in the matrix. A non-positive
-                    value means unknown, at which point the number
-                    of rows will be determined by the number of
-                    records in the `rows` RDD.
-    :param numCols: Number of columns in the matrix. A non-positive
-                    value means unknown, at which point the number
-                    of columns will be determined by the size of
-                    the first row.
+
+    Parameters
+    ----------
+    rows : :py:class:`pyspark.RDD` or :py:class:`pyspark.sql.DataFrame`
+        An RDD or DataFrame of vectors. If a DataFrame is provided, it must have a single
+        vector typed column.
+    numRows : int, optional
+        Number of rows in the matrix. A non-positive
+        value means unknown, at which point the number
+        of rows will be determined by the number of
+        records in the `rows` RDD.
+    numCols : int, optional
+        Number of columns in the matrix. A non-positive
+        value means unknown, at which point the number
+        of columns will be determined by the size of
+        the first row.
     """
     def __init__(self, rows, numRows=0, numCols=0):
         """
@@ -77,6 +83,8 @@ def __init__(self, rows, numRows=0, numCols=0):
         object, in which case we can wrap it directly.  This
         assists in clean matrix conversions.
 
+        Examples
+        --------
         >>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6]])
         >>> mat = RowMatrix(rows)
 
@@ -108,6 +116,8 @@ def rows(self):
         """
         Rows of the RowMatrix stored as an RDD of vectors.
 
+        Examples
+        --------
         >>> mat = RowMatrix(sc.parallelize([[1, 2, 3], [4, 5, 6]]))
         >>> rows = mat.rows
         >>> rows.first()
@@ -119,6 +129,8 @@ def numRows(self):
         """
         Get or compute the number of rows.
 
+        Examples
+        --------
         >>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6],
         ...                        [7, 8, 9], [10, 11, 12]])
 
@@ -136,6 +148,8 @@ def numCols(self):
         """
         Get or compute the number of cols.
 
+        Examples
+        --------
         >>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6],
         ...                        [7, 8, 9], [10, 11, 12]])
 
@@ -149,14 +163,19 @@ def numCols(self):
         """
         return self._java_matrix_wrapper.call("numCols")
 
-    @since('2.0.0')
     def computeColumnSummaryStatistics(self):
         """
         Computes column-wise summary statistics.
 
-        :return: :class:`MultivariateStatisticalSummary` object
-                 containing column-wise summary statistics.
+        .. versionadded:: 2.0.0
+
+        Returns
+        -------
+        :py:class:`MultivariateStatisticalSummary`
+            object containing column-wise summary statistics.
 
+        Examples
+        --------
         >>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6]])
         >>> mat = RowMatrix(rows)
 
@@ -167,14 +186,19 @@ def computeColumnSummaryStatistics(self):
         java_col_stats = self._java_matrix_wrapper.call("computeColumnSummaryStatistics")
         return MultivariateStatisticalSummary(java_col_stats)
 
-    @since('2.0.0')
     def computeCovariance(self):
         """
         Computes the covariance matrix, treating each row as an
         observation.
 
-        .. note:: This cannot be computed on matrices with more than 65535 columns.
+        .. versionadded:: 2.0.0
+
+        Notes
+        -----
+        This cannot be computed on matrices with more than 65535 columns.
 
+        Examples
+        --------
         >>> rows = sc.parallelize([[1, 2], [2, 1]])
         >>> mat = RowMatrix(rows)
 
@@ -183,13 +207,18 @@ def computeCovariance(self):
         """
         return self._java_matrix_wrapper.call("computeCovariance")
 
-    @since('2.0.0')
     def computeGramianMatrix(self):
         """
         Computes the Gramian matrix `A^T A`.
 
-        .. note:: This cannot be computed on matrices with more than 65535 columns.
+        .. versionadded:: 2.0.0
 
+        Notes
+        -----
+        This cannot be computed on matrices with more than 65535 columns.
+
+        Examples
+        --------
         >>> rows = sc.parallelize([[1, 2, 3], [4, 5, 6]])
         >>> mat = RowMatrix(rows)
 
@@ -220,11 +249,12 @@ def columnSimilarities(self, threshold=0.0):
         similarity threshold.
 
         To describe the guarantee, we set some notation:
-            * Let A be the smallest in magnitude non-zero element of
-              this matrix.
-            * Let B be the largest in magnitude non-zero element of
-              this matrix.
-            * Let L be the maximum number of non-zeros per row.
+
+        - Let A be the smallest in magnitude non-zero element of
+          this matrix.
+        - Let B be the largest in magnitude non-zero element of
+          this matrix.
+        - Let L be the maximum number of non-zeros per row.
 
         For example, for {0,1} matrices: A=B=1.
         Another example, for the Netflix matrix: A=1, B=5
@@ -236,20 +266,31 @@ def columnSimilarities(self, threshold=0.0):
         The shuffle size is bounded by the *smaller* of the following
         two expressions:
 
-            * O(n log(n) L / (threshold * A))
-            * O(m L^2^)
+        - O(n log(n) L / (threshold * A))
+        - O(m L^2^)
 
         The latter is the cost of the brute-force approach, so for
         non-zero thresholds, the cost is always cheaper than the
         brute-force approach.
 
-        :param: threshold: Set to 0 for deterministic guaranteed
-                           correctness. Similarities above this
-                           threshold are estimated with the cost vs
-                           estimate quality trade-off described above.
-        :return: An n x n sparse upper-triangular CoordinateMatrix of
-                 cosine similarities between columns of this matrix.
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        threshold : float, optional
+            Set to 0 for deterministic guaranteed
+            correctness. Similarities above this
+            threshold are estimated with the cost vs
+            estimate quality trade-off described above.
 
+        Returns
+        -------
+        :py:class:`CoordinateMatrix`
+            An n x n sparse upper-triangular CoordinateMatrix of
+            cosine similarities between columns of this matrix.
+
+        Examples
+        --------
         >>> rows = sc.parallelize([[1, 2], [1, 5]])
         >>> mat = RowMatrix(rows)
 
@@ -260,23 +301,32 @@ def columnSimilarities(self, threshold=0.0):
         java_sims_mat = self._java_matrix_wrapper.call("columnSimilarities", float(threshold))
         return CoordinateMatrix(java_sims_mat)
 
-    @since('2.0.0')
     def tallSkinnyQR(self, computeQ=False):
         """
         Compute the QR decomposition of this RowMatrix.
 
         The implementation is designed to optimize the QR decomposition
-        (factorization) for the RowMatrix of a tall and skinny shape.
+        (factorization) for the RowMatrix of a tall and skinny shape [1]_.
 
-        Reference:
-         Paul G. Constantine, David F. Gleich. "Tall and skinny QR
-         factorizations in MapReduce architectures"
-         ([[https://doi.org/10.1145/1996092.1996103]])
+        .. [1] Paul G. Constantine, David F. Gleich. "Tall and skinny QR
+            factorizations in MapReduce architectures"
+            https://doi.org/10.1145/1996092.1996103
 
-        :param: computeQ: whether to computeQ
-        :return: QRDecomposition(Q: RowMatrix, R: Matrix), where
-                 Q = None if computeQ = false.
+        .. versionadded:: 2.0.0
 
+        Parameters
+        ----------
+        computeQ : bool, optional
+            whether to computeQ
+
+        Returns
+        -------
+        :py:class:`pyspark.mllib.linalg.QRDecomposition`
+            QRDecomposition(Q: RowMatrix, R: Matrix), where
+            Q = None if computeQ = false.
+
+        Examples
+        --------
         >>> rows = sc.parallelize([[3, -6], [4, -8], [0, 1]])
         >>> mat = RowMatrix(rows)
         >>> decomp = mat.tallSkinnyQR(True)
@@ -301,7 +351,6 @@ def tallSkinnyQR(self, computeQ=False):
         R = decomp.call("R")
         return QRDecomposition(Q, R)
 
-    @since('2.2.0')
     def computeSVD(self, k, computeU=False, rCond=1e-9):
         """
         Computes the singular value decomposition of the RowMatrix.
@@ -309,27 +358,39 @@ def computeSVD(self, k, computeU=False, rCond=1e-9):
         The given row matrix A of dimension (m X n) is decomposed into
         U * s * V'T where
 
-        * U: (m X k) (left singular vectors) is a RowMatrix whose
-             columns are the eigenvectors of (A X A')
-        * s: DenseVector consisting of square root of the eigenvalues
-             (singular values) in descending order.
-        * v: (n X k) (right singular vectors) is a Matrix whose columns
-             are the eigenvectors of (A' X A)
+        - U: (m X k) (left singular vectors) is a RowMatrix whose
+          columns are the eigenvectors of (A X A')
+        - s: DenseVector consisting of square root of the eigenvalues
+          (singular values) in descending order.
+        - v: (n X k) (right singular vectors) is a Matrix whose columns
+          are the eigenvectors of (A' X A)
 
         For more specific details on implementation, please refer
         the Scala documentation.
 
-        :param k: Number of leading singular values to keep (`0 < k <= n`).
-                  It might return less than k if there are numerically zero singular values
-                  or there are not enough Ritz values converged before the maximum number of
-                  Arnoldi update iterations is reached (in case that matrix A is ill-conditioned).
-        :param computeU: Whether or not to compute U. If set to be
-                         True, then U is computed by A * V * s^-1
-        :param rCond: Reciprocal condition number. All singular values
-                      smaller than rCond * s[0] are treated as zero
-                      where s[0] is the largest singular value.
-        :returns: :py:class:`SingularValueDecomposition`
-
+        .. versionadded:: 2.2.0
+
+        Parameters
+        ----------
+        k : int
+            Number of leading singular values to keep (`0 < k <= n`).
+            It might return less than k if there are numerically zero singular values
+            or there are not enough Ritz values converged before the maximum number of
+            Arnoldi update iterations is reached (in case that matrix A is ill-conditioned).
+        computeU : bool, optional
+            Whether or not to compute U. If set to be
+            True, then U is computed by A * V * s^-1
+        rCond : float, optional
+            Reciprocal condition number. All singular values
+            smaller than rCond * s[0] are treated as zero
+            where s[0] is the largest singular value.
+
+        Returns
+        -------
+        :py:class:`SingularValueDecomposition`
+
+        Examples
+        --------
         >>> rows = sc.parallelize([[3, 1, 1], [-1, 3, 1]])
         >>> rm = RowMatrix(rows)
 
@@ -345,16 +406,27 @@ def computeSVD(self, k, computeU=False, rCond=1e-9):
             "computeSVD", int(k), bool(computeU), float(rCond))
         return SingularValueDecomposition(j_model)
 
-    @since('2.2.0')
     def computePrincipalComponents(self, k):
         """
         Computes the k principal components of the given row matrix
 
-        .. note:: This cannot be computed on matrices with more than 65535 columns.
+        .. versionadded:: 2.2.0
+
+        Notes
+        -----
+        This cannot be computed on matrices with more than 65535 columns.
 
-        :param k: Number of principal components to keep.
-        :returns: :py:class:`pyspark.mllib.linalg.DenseMatrix`
+        Parameters
+        ----------
+        k : int
+            Number of principal components to keep.
 
+        Returns
+        -------
+        :py:class:`pyspark.mllib.linalg.DenseMatrix`
+
+        Examples
+        --------
         >>> rows = sc.parallelize([[1, 2, 3], [2, 4, 5], [3, 6, 1]])
         >>> rm = RowMatrix(rows)
 
@@ -370,15 +442,24 @@ def computePrincipalComponents(self, k):
         """
         return self._java_matrix_wrapper.call("computePrincipalComponents", k)
 
-    @since('2.2.0')
     def multiply(self, matrix):
         """
         Multiply this matrix by a local dense matrix on the right.
 
-        :param matrix: a local dense matrix whose number of rows must match the number of columns
-                       of this matrix
-        :returns: :py:class:`RowMatrix`
+        .. versionadded:: 2.2.0
+
+        Parameters
+        ----------
+        matrix : :py:class:`pyspark.mllib.linalg.Matrix`
+            a local dense matrix whose number of rows must match the number of columns
+            of this matrix
 
+        Returns
+        -------
+        :py:class:`RowMatrix`
+
+        Examples
+        --------
         >>> rm = RowMatrix(sc.parallelize([[0, 1], [2, 3]]))
         >>> rm.multiply(DenseMatrix(2, 2, [0, 2, 1, 3])).rows.collect()
         [DenseVector([2.0, 3.0]), DenseVector([6.0, 11.0])]
@@ -438,8 +519,12 @@ class IndexedRow(object):
 
     Just a wrapper over a (int, vector) tuple.
 
-    :param index: The index for the given row.
-    :param vector: The row in the matrix at the given index.
+    Parameters
+    ----------
+    index : int
+        The index for the given row.
+    vector : :py:class:`pyspark.mllib.linalg.Vector` or convertible
+        The row in the matrix at the given index.
     """
     def __init__(self, index, vector):
         self.index = int(index)
@@ -462,16 +547,21 @@ class IndexedRowMatrix(DistributedMatrix):
     """
     Represents a row-oriented distributed Matrix with indexed rows.
 
-    :param rows: An RDD of IndexedRows or (int, vector) tuples or a DataFrame consisting of a
-                 int typed column of indices and a vector typed column.
-    :param numRows: Number of rows in the matrix. A non-positive
-                    value means unknown, at which point the number
-                    of rows will be determined by the max row
-                    index plus one.
-    :param numCols: Number of columns in the matrix. A non-positive
-                    value means unknown, at which point the number
-                    of columns will be determined by the size of
-                    the first row.
+    Parameters
+    ----------
+    rows : :py:class:`pyspark.RDD`
+        An RDD of IndexedRows or (int, vector) tuples or a DataFrame consisting of a
+        int typed column of indices and a vector typed column.
+    numRows : int, optional
+        Number of rows in the matrix. A non-positive
+        value means unknown, at which point the number
+        of rows will be determined by the max row
+        index plus one.
+    numCols : int, optional
+        Number of columns in the matrix. A non-positive
+        value means unknown, at which point the number
+        of columns will be determined by the size of
+        the first row.
     """
     def __init__(self, rows, numRows=0, numCols=0):
         """
@@ -484,6 +574,8 @@ def __init__(self, rows, numRows=0, numCols=0):
         object, in which case we can wrap it directly.  This
         assists in clean matrix conversions.
 
+        Examples
+        --------
         >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),
         ...                        IndexedRow(1, [4, 5, 6])])
         >>> mat = IndexedRowMatrix(rows)
@@ -524,6 +616,8 @@ def rows(self):
         """
         Rows of the IndexedRowMatrix stored as an RDD of IndexedRows.
 
+        Examples
+        --------
         >>> mat = IndexedRowMatrix(sc.parallelize([IndexedRow(0, [1, 2, 3]),
         ...                                        IndexedRow(1, [4, 5, 6])]))
         >>> rows = mat.rows
@@ -542,6 +636,8 @@ def numRows(self):
         """
         Get or compute the number of rows.
 
+        Examples
+        --------
         >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),
         ...                        IndexedRow(1, [4, 5, 6]),
         ...                        IndexedRow(2, [7, 8, 9]),
@@ -561,6 +657,8 @@ def numCols(self):
         """
         Get or compute the number of cols.
 
+        Examples
+        --------
         >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),
         ...                        IndexedRow(1, [4, 5, 6]),
         ...                        IndexedRow(2, [7, 8, 9]),
@@ -580,6 +678,8 @@ def columnSimilarities(self):
         """
         Compute all cosine similarities between columns.
 
+        Examples
+        --------
         >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),
         ...                        IndexedRow(6, [4, 5, 6])])
         >>> mat = IndexedRowMatrix(rows)
@@ -590,13 +690,18 @@ def columnSimilarities(self):
         java_coordinate_matrix = self._java_matrix_wrapper.call("columnSimilarities")
         return CoordinateMatrix(java_coordinate_matrix)
 
-    @since('2.0.0')
     def computeGramianMatrix(self):
         """
         Computes the Gramian matrix `A^T A`.
 
-        .. note:: This cannot be computed on matrices with more than 65535 columns.
+        .. versionadded:: 2.0.0
+
+        Notes
+        -----
+        This cannot be computed on matrices with more than 65535 columns.
 
+        Examples
+        --------
         >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),
         ...                        IndexedRow(1, [4, 5, 6])])
         >>> mat = IndexedRowMatrix(rows)
@@ -610,6 +715,8 @@ def toRowMatrix(self):
         """
         Convert this matrix to a RowMatrix.
 
+        Examples
+        --------
         >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),
         ...                        IndexedRow(6, [4, 5, 6])])
         >>> mat = IndexedRowMatrix(rows).toRowMatrix()
@@ -623,6 +730,8 @@ def toCoordinateMatrix(self):
         """
         Convert this matrix to a CoordinateMatrix.
 
+        Examples
+        --------
         >>> rows = sc.parallelize([IndexedRow(0, [1, 0]),
         ...                        IndexedRow(6, [0, 5])])
         >>> mat = IndexedRowMatrix(rows).toCoordinateMatrix()
@@ -636,13 +745,19 @@ def toBlockMatrix(self, rowsPerBlock=1024, colsPerBlock=1024):
         """
         Convert this matrix to a BlockMatrix.
 
-        :param rowsPerBlock: Number of rows that make up each block.
-                             The blocks forming the final rows are not
-                             required to have the given number of rows.
-        :param colsPerBlock: Number of columns that make up each block.
-                             The blocks forming the final columns are not
-                             required to have the given number of columns.
-
+        Parameters
+        ----------
+        rowsPerBlock : int, optional
+            Number of rows that make up each block.
+            The blocks forming the final rows are not
+            required to have the given number of rows.
+        colsPerBlock : int, optional
+            Number of columns that make up each block.
+            The blocks forming the final columns are not
+            required to have the given number of columns.
+
+        Examples
+        --------
         >>> rows = sc.parallelize([IndexedRow(0, [1, 2, 3]),
         ...                        IndexedRow(6, [4, 5, 6])])
         >>> mat = IndexedRowMatrix(rows).toBlockMatrix()
@@ -661,7 +776,6 @@ def toBlockMatrix(self, rowsPerBlock=1024, colsPerBlock=1024):
                                                            colsPerBlock)
         return BlockMatrix(java_block_matrix, rowsPerBlock, colsPerBlock)
 
-    @since('2.2.0')
     def computeSVD(self, k, computeU=False, rCond=1e-9):
         """
         Computes the singular value decomposition of the IndexedRowMatrix.
@@ -679,17 +793,29 @@ def computeSVD(self, k, computeU=False, rCond=1e-9):
         For more specific details on implementation, please refer
         the scala documentation.
 
-        :param k: Number of leading singular values to keep (`0 < k <= n`).
-                  It might return less than k if there are numerically zero singular values
-                  or there are not enough Ritz values converged before the maximum number of
-                  Arnoldi update iterations is reached (in case that matrix A is ill-conditioned).
-        :param computeU: Whether or not to compute U. If set to be
-                         True, then U is computed by A * V * s^-1
-        :param rCond: Reciprocal condition number. All singular values
-                      smaller than rCond * s[0] are treated as zero
-                      where s[0] is the largest singular value.
-        :returns: SingularValueDecomposition object
-
+        .. versionadded:: 2.2.0
+
+        Parameters
+        ----------
+        k : int
+            Number of leading singular values to keep (`0 < k <= n`).
+            It might return less than k if there are numerically zero singular values
+            or there are not enough Ritz values converged before the maximum number of
+            Arnoldi update iterations is reached (in case that matrix A is ill-conditioned).
+        computeU : bool, optional
+            Whether or not to compute U. If set to be
+            True, then U is computed by A * V * s^-1
+        rCond : float, optional
+            Reciprocal condition number. All singular values
+            smaller than rCond * s[0] are treated as zero
+            where s[0] is the largest singular value.
+
+        Returns
+        -------
+        :py:class:`SingularValueDecomposition`
+
+        Examples
+        --------
         >>> rows = [(0, (3, 1, 1)), (1, (-1, 3, 1))]
         >>> irm = IndexedRowMatrix(sc.parallelize(rows))
         >>> svd_model = irm.computeSVD(2, True)
@@ -705,15 +831,24 @@ def computeSVD(self, k, computeU=False, rCond=1e-9):
             "computeSVD", int(k), bool(computeU), float(rCond))
         return SingularValueDecomposition(j_model)
 
-    @since('2.2.0')
     def multiply(self, matrix):
         """
         Multiply this matrix by a local dense matrix on the right.
 
-        :param matrix: a local dense matrix whose number of rows must match the number of columns
-                       of this matrix
-        :returns: :py:class:`IndexedRowMatrix`
+        .. versionadded:: 2.2.0
+
+        Parameters
+        ----------
+        matrix : :py:class:`pyspark.mllib.linalg.Matrix`
+            a local dense matrix whose number of rows must match the number of columns
+            of this matrix
 
+        Returns
+        -------
+        :py:class:`IndexedRowMatrix`
+
+        Examples
+        --------
         >>> mat = IndexedRowMatrix(sc.parallelize([(0, (0, 1)), (1, (2, 3))]))
         >>> mat.multiply(DenseMatrix(2, 2, [0, 2, 1, 3])).rows.collect()
         [IndexedRow(0, [2.0,3.0]), IndexedRow(1, [6.0,11.0])]
@@ -730,9 +865,14 @@ class MatrixEntry(object):
 
     Just a wrapper over a (int, int, float) tuple.
 
-    :param i: The row index of the matrix.
-    :param j: The column index of the matrix.
-    :param value: The (i, j)th entry of the matrix, as a float.
+    Parameters
+    ----------
+    i : int
+        The row index of the matrix.
+    j : int
+        The column index of the matrix.
+    value : float
+        The (i, j)th entry of the matrix, as a float.
     """
     def __init__(self, i, j, value):
         self.i = int(i)
@@ -756,16 +896,21 @@ class CoordinateMatrix(DistributedMatrix):
     """
     Represents a matrix in coordinate format.
 
-    :param entries: An RDD of MatrixEntry inputs or
-                    (int, int, float) tuples.
-    :param numRows: Number of rows in the matrix. A non-positive
-                    value means unknown, at which point the number
-                    of rows will be determined by the max row
-                    index plus one.
-    :param numCols: Number of columns in the matrix. A non-positive
-                    value means unknown, at which point the number
-                    of columns will be determined by the max row
-                    index plus one.
+    Parameters
+    ----------
+    entries : :py:class:`pyspark.RDD`
+        An RDD of MatrixEntry inputs or
+        (int, int, float) tuples.
+    numRows : int, optional
+        Number of rows in the matrix. A non-positive
+        value means unknown, at which point the number
+        of rows will be determined by the max row
+        index plus one.
+    numCols : int, optional
+        Number of columns in the matrix. A non-positive
+        value means unknown, at which point the number
+        of columns will be determined by the max row
+        index plus one.
     """
     def __init__(self, entries, numRows=0, numCols=0):
         """
@@ -778,6 +923,8 @@ def __init__(self, entries, numRows=0, numCols=0):
         object, in which case we can wrap it directly.  This
         assists in clean matrix conversions.
 
+        Examples
+        --------
         >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2),
         ...                           MatrixEntry(6, 4, 2.1)])
         >>> mat = CoordinateMatrix(entries)
@@ -817,6 +964,8 @@ def entries(self):
         Entries of the CoordinateMatrix stored as an RDD of
         MatrixEntries.
 
+        Examples
+        --------
         >>> mat = CoordinateMatrix(sc.parallelize([MatrixEntry(0, 0, 1.2),
         ...                                        MatrixEntry(6, 4, 2.1)]))
         >>> entries = mat.entries
@@ -835,6 +984,8 @@ def numRows(self):
         """
         Get or compute the number of rows.
 
+        Examples
+        --------
         >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2),
         ...                           MatrixEntry(1, 0, 2),
         ...                           MatrixEntry(2, 1, 3.7)])
@@ -853,6 +1004,8 @@ def numCols(self):
         """
         Get or compute the number of cols.
 
+        Examples
+        --------
         >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2),
         ...                           MatrixEntry(1, 0, 2),
         ...                           MatrixEntry(2, 1, 3.7)])
@@ -867,11 +1020,14 @@ def numCols(self):
         """
         return self._java_matrix_wrapper.call("numCols")
 
-    @since('2.0.0')
     def transpose(self):
         """
         Transpose this CoordinateMatrix.
 
+        .. versionadded:: 2.0.0
+
+        Examples
+        --------
         >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2),
         ...                           MatrixEntry(1, 0, 2),
         ...                           MatrixEntry(2, 1, 3.7)])
@@ -891,6 +1047,8 @@ def toRowMatrix(self):
         """
         Convert this matrix to a RowMatrix.
 
+        Examples
+        --------
         >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2),
         ...                           MatrixEntry(6, 4, 2.1)])
         >>> mat = CoordinateMatrix(entries).toRowMatrix()
@@ -915,6 +1073,8 @@ def toIndexedRowMatrix(self):
         """
         Convert this matrix to an IndexedRowMatrix.
 
+        Examples
+        --------
         >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2),
         ...                           MatrixEntry(6, 4, 2.1)])
         >>> mat = CoordinateMatrix(entries).toIndexedRowMatrix()
@@ -938,13 +1098,19 @@ def toBlockMatrix(self, rowsPerBlock=1024, colsPerBlock=1024):
         """
         Convert this matrix to a BlockMatrix.
 
-        :param rowsPerBlock: Number of rows that make up each block.
-                             The blocks forming the final rows are not
-                             required to have the given number of rows.
-        :param colsPerBlock: Number of columns that make up each block.
-                             The blocks forming the final columns are not
-                             required to have the given number of columns.
-
+        Parameters
+        ----------
+        rowsPerBlock : int, optional
+            Number of rows that make up each block.
+            The blocks forming the final rows are not
+            required to have the given number of rows.
+        colsPerBlock : int, optional
+            Number of columns that make up each block.
+            The blocks forming the final columns are not
+            required to have the given number of columns.
+
+        Examples
+        --------
         >>> entries = sc.parallelize([MatrixEntry(0, 0, 1.2),
         ...                           MatrixEntry(6, 4, 2.1)])
         >>> mat = CoordinateMatrix(entries).toBlockMatrix()
@@ -983,26 +1149,33 @@ class BlockMatrix(DistributedMatrix):
     """
     Represents a distributed matrix in blocks of local matrices.
 
-    :param blocks: An RDD of sub-matrix blocks
-                   ((blockRowIndex, blockColIndex), sub-matrix) that
-                   form this distributed matrix. If multiple blocks
-                   with the same index exist, the results for
-                   operations like add and multiply will be
-                   unpredictable.
-    :param rowsPerBlock: Number of rows that make up each block.
-                         The blocks forming the final rows are not
-                         required to have the given number of rows.
-    :param colsPerBlock: Number of columns that make up each block.
-                         The blocks forming the final columns are not
-                         required to have the given number of columns.
-    :param numRows: Number of rows of this matrix. If the supplied
-                    value is less than or equal to zero, the number
-                    of rows will be calculated when `numRows` is
-                    invoked.
-    :param numCols: Number of columns of this matrix. If the supplied
-                    value is less than or equal to zero, the number
-                    of columns will be calculated when `numCols` is
-                    invoked.
+    Parameters
+    ----------
+    blocks : :py:class:`pyspark.RDD`
+        An RDD of sub-matrix blocks
+        ((blockRowIndex, blockColIndex), sub-matrix) that
+        form this distributed matrix. If multiple blocks
+        with the same index exist, the results for
+        operations like add and multiply will be
+        unpredictable.
+    rowsPerBlock : int
+        Number of rows that make up each block.
+        The blocks forming the final rows are not
+        required to have the given number of rows.
+    colsPerBlock : int
+        Number of columns that make up each block.
+        The blocks forming the final columns are not
+        required to have the given number of columns.
+    numRows : int, optional
+        Number of rows of this matrix. If the supplied
+        value is less than or equal to zero, the number
+        of rows will be calculated when `numRows` is
+        invoked.
+    numCols : int, optional
+        Number of columns of this matrix. If the supplied
+        value is less than or equal to zero, the number
+        of columns will be calculated when `numCols` is
+        invoked.
     """
     def __init__(self, blocks, rowsPerBlock, colsPerBlock, numRows=0, numCols=0):
         """
@@ -1015,6 +1188,8 @@ def __init__(self, blocks, rowsPerBlock, colsPerBlock, numRows=0, numCols=0):
         object, in which case we can wrap it directly.  This
         assists in clean matrix conversions.
 
+        Examples
+        --------
         >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
         ...                          ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])
         >>> mat = BlockMatrix(blocks, 3, 2)
@@ -1058,6 +1233,8 @@ def blocks(self):
         ((blockRowIndex, blockColIndex), sub-matrix) that form this
         distributed matrix.
 
+        Examples
+        --------
         >>> mat = BlockMatrix(
         ...     sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
         ...                     ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))]), 3, 2)
@@ -1079,6 +1256,8 @@ def rowsPerBlock(self):
         """
         Number of rows that make up each block.
 
+        Examples
+        --------
         >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
         ...                          ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])
         >>> mat = BlockMatrix(blocks, 3, 2)
@@ -1092,6 +1271,8 @@ def colsPerBlock(self):
         """
         Number of columns that make up each block.
 
+        Examples
+        --------
         >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
         ...                          ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])
         >>> mat = BlockMatrix(blocks, 3, 2)
@@ -1105,6 +1286,8 @@ def numRowBlocks(self):
         """
         Number of rows of blocks in the BlockMatrix.
 
+        Examples
+        --------
         >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
         ...                          ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])
         >>> mat = BlockMatrix(blocks, 3, 2)
@@ -1118,6 +1301,8 @@ def numColBlocks(self):
         """
         Number of columns of blocks in the BlockMatrix.
 
+        Examples
+        --------
         >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
         ...                          ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])
         >>> mat = BlockMatrix(blocks, 3, 2)
@@ -1130,6 +1315,8 @@ def numRows(self):
         """
         Get or compute the number of rows.
 
+        Examples
+        --------
         >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
         ...                          ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])
 
@@ -1147,6 +1334,8 @@ def numCols(self):
         """
         Get or compute the number of cols.
 
+        Examples
+        --------
         >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
         ...                          ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])
 
@@ -1197,6 +1386,8 @@ def add(self, other):
         two dense sub matrix blocks are added, the output block will
         also be a DenseMatrix.
 
+        Examples
+        --------
         >>> dm1 = Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])
         >>> dm2 = Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12])
         >>> sm = Matrices.sparse(3, 2, [0, 1, 3], [0, 1, 2], [7, 11, 12])
@@ -1220,7 +1411,6 @@ def add(self, other):
         java_block_matrix = self._java_matrix_wrapper.call("add", other_java_block_matrix)
         return BlockMatrix(java_block_matrix, self.rowsPerBlock, self.colsPerBlock)
 
-    @since('2.0.0')
     def subtract(self, other):
         """
         Subtracts the given block matrix `other` from this block matrix:
@@ -1232,6 +1422,10 @@ def subtract(self, other):
         If two dense sub matrix blocks are subtracted, the output block
         will also be a DenseMatrix.
 
+        .. versionadded:: 2.0.0
+
+        Examples
+        --------
         >>> dm1 = Matrices.dense(3, 2, [3, 1, 5, 4, 6, 2])
         >>> dm2 = Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12])
         >>> sm = Matrices.sparse(3, 2, [0, 1, 3], [0, 1, 2], [1, 2, 3])
@@ -1265,6 +1459,8 @@ def multiply(self, other):
         This may cause some performance issues until support for
         multiplying two sparse matrices is added.
 
+        Examples
+        --------
         >>> dm1 = Matrices.dense(2, 3, [1, 2, 3, 4, 5, 6])
         >>> dm2 = Matrices.dense(2, 3, [7, 8, 9, 10, 11, 12])
         >>> dm3 = Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])
@@ -1290,12 +1486,15 @@ def multiply(self, other):
         java_block_matrix = self._java_matrix_wrapper.call("multiply", other_java_block_matrix)
         return BlockMatrix(java_block_matrix, self.rowsPerBlock, self.colsPerBlock)
 
-    @since('2.0.0')
     def transpose(self):
         """
         Transpose this BlockMatrix. Returns a new BlockMatrix
         instance sharing the same underlying data. Is a lazy operation.
 
+        .. versionadded:: 2.0.0
+
+        Examples
+        --------
         >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
         ...                          ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])
         >>> mat = BlockMatrix(blocks, 3, 2)
@@ -1311,6 +1510,8 @@ def toLocalMatrix(self):
         """
         Collect the distributed matrix on the driver as a DenseMatrix.
 
+        Examples
+        --------
         >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
         ...                          ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])
         >>> mat = BlockMatrix(blocks, 3, 2).toLocalMatrix()
@@ -1333,6 +1534,8 @@ def toIndexedRowMatrix(self):
         """
         Convert this matrix to an IndexedRowMatrix.
 
+        Examples
+        --------
         >>> blocks = sc.parallelize([((0, 0), Matrices.dense(3, 2, [1, 2, 3, 4, 5, 6])),
         ...                          ((1, 0), Matrices.dense(3, 2, [7, 8, 9, 10, 11, 12]))])
         >>> mat = BlockMatrix(blocks, 3, 2).toIndexedRowMatrix()
@@ -1356,6 +1559,8 @@ def toCoordinateMatrix(self):
         """
         Convert this matrix to a CoordinateMatrix.
 
+        Examples
+        --------
         >>> blocks = sc.parallelize([((0, 0), Matrices.dense(1, 2, [1, 2])),
         ...                          ((1, 0), Matrices.dense(1, 2, [7, 8]))])
         >>> mat = BlockMatrix(blocks, 1, 2).toCoordinateMatrix()
diff --git a/python/pyspark/mllib/linalg/distributed.pyi b/python/pyspark/mllib/linalg/distributed.pyi
index 238c4ea32e4e8..7ec2d60c5a947 100644
--- a/python/pyspark/mllib/linalg/distributed.pyi
+++ b/python/pyspark/mllib/linalg/distributed.pyi
@@ -22,6 +22,7 @@ from pyspark.storagelevel import StorageLevel
 from pyspark.mllib.common import JavaModelWrapper
 from pyspark.mllib.linalg import Vector, Matrix, QRDecomposition
 from pyspark.mllib.stat import MultivariateStatisticalSummary
+import pyspark.sql.dataframe
 from numpy import ndarray  # noqa: F401
 
 VectorLike = Union[Vector, Sequence[Union[float, int]]]
@@ -35,7 +36,10 @@ class DistributedMatrix:
 
 class RowMatrix(DistributedMatrix):
     def __init__(
-        self, rows: RDD[Vector], numRows: int = ..., numCols: int = ...
+        self,
+        rows: Union[RDD[Vector], pyspark.sql.dataframe.DataFrame],
+        numRows: int = ...,
+        numCols: int = ...,
     ) -> None: ...
     @property
     def rows(self) -> RDD[Vector]: ...
diff --git a/python/pyspark/mllib/random.py b/python/pyspark/mllib/random.py
index 6106c58584882..a33dfe26fbad9 100644
--- a/python/pyspark/mllib/random.py
+++ b/python/pyspark/mllib/random.py
@@ -22,7 +22,6 @@
 import sys
 from functools import wraps
 
-from pyspark import since
 from pyspark.mllib.common import callMLlibFunc
 
 
@@ -46,7 +45,6 @@ class RandomRDDs(object):
     """
 
     @staticmethod
-    @since("1.1.0")
     def uniformRDD(sc, size, numPartitions=None, seed=None):
         """
         Generates an RDD comprised of i.i.d. samples from the
@@ -56,12 +54,26 @@ def uniformRDD(sc, size, numPartitions=None, seed=None):
         to U(a, b), use
         ``RandomRDDs.uniformRDD(sc, n, p, seed).map(lambda v: a + (b - a) * v)``
 
-        :param sc: SparkContext used to create the RDD.
-        :param size: Size of the RDD.
-        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
-        :param seed: Random seed (default: a random long integer).
-        :return: RDD of float comprised of i.i.d. samples ~ `U(0.0, 1.0)`.
-
+        .. versionadded:: 1.1.0
+
+        Parameters
+        ----------
+        sc : :py:class:`pyspark.SparkContext`
+            used to create the RDD.
+        size : int
+            Size of the RDD.
+        numPartitions : int, optional
+            Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        seed : int, optional
+            Random seed (default: a random long integer).
+
+        Returns
+        -------
+        :py:class:`pyspark.RDD`
+            RDD of float comprised of i.i.d. samples ~ `U(0.0, 1.0)`.
+
+        Examples
+        --------
         >>> x = RandomRDDs.uniformRDD(sc, 100).collect()
         >>> len(x)
         100
@@ -76,7 +88,6 @@ def uniformRDD(sc, size, numPartitions=None, seed=None):
         return callMLlibFunc("uniformRDD", sc._jsc, size, numPartitions, seed)
 
     @staticmethod
-    @since("1.1.0")
     def normalRDD(sc, size, numPartitions=None, seed=None):
         """
         Generates an RDD comprised of i.i.d. samples from the standard normal
@@ -86,12 +97,26 @@ def normalRDD(sc, size, numPartitions=None, seed=None):
         to some other normal N(mean, sigma^2), use
         ``RandomRDDs.normal(sc, n, p, seed).map(lambda v: mean + sigma * v)``
 
-        :param sc: SparkContext used to create the RDD.
-        :param size: Size of the RDD.
-        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
-        :param seed: Random seed (default: a random long integer).
-        :return: RDD of float comprised of i.i.d. samples ~ N(0.0, 1.0).
-
+        .. versionadded:: 1.1.0
+
+        Parameters
+        ----------
+        sc : :py:class:`pyspark.SparkContext`
+            used to create the RDD.
+        size : int
+            Size of the RDD.
+        numPartitions : int, optional
+            Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        seed : int, optional
+            Random seed (default: a random long integer).
+
+        Returns
+        -------
+        :py:class:`pyspark.RDD`
+            RDD of float comprised of i.i.d. samples ~ N(0.0, 1.0).
+
+        Examples
+        --------
         >>> x = RandomRDDs.normalRDD(sc, 1000, seed=1)
         >>> stats = x.stats()
         >>> stats.count()
@@ -104,20 +129,34 @@ def normalRDD(sc, size, numPartitions=None, seed=None):
         return callMLlibFunc("normalRDD", sc._jsc, size, numPartitions, seed)
 
     @staticmethod
-    @since("1.3.0")
     def logNormalRDD(sc, mean, std, size, numPartitions=None, seed=None):
         """
         Generates an RDD comprised of i.i.d. samples from the log normal
         distribution with the input mean and standard distribution.
 
-        :param sc: SparkContext used to create the RDD.
-        :param mean: mean for the log Normal distribution
-        :param std: std for the log Normal distribution
-        :param size: Size of the RDD.
-        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
-        :param seed: Random seed (default: a random long integer).
-        :return: RDD of float comprised of i.i.d. samples ~ log N(mean, std).
-
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        sc : :py:class:`pyspark.SparkContext`
+            used to create the RDD.
+        mean : float
+            mean for the log Normal distribution
+        std : float
+            std for the log Normal distribution
+        size : int
+            Size of the RDD.
+        numPartitions : int, optional
+            Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        seed : int, optional
+            Random seed (default: a random long integer).
+
+        Returns
+        -------
+        RDD of float comprised of i.i.d. samples ~ log N(mean, std).
+
+        Examples
+        --------
         >>> from math import sqrt, exp
         >>> mean = 0.0
         >>> std = 1.0
@@ -137,19 +176,33 @@ def logNormalRDD(sc, mean, std, size, numPartitions=None, seed=None):
                              size, numPartitions, seed)
 
     @staticmethod
-    @since("1.1.0")
     def poissonRDD(sc, mean, size, numPartitions=None, seed=None):
         """
         Generates an RDD comprised of i.i.d. samples from the Poisson
         distribution with the input mean.
 
-        :param sc: SparkContext used to create the RDD.
-        :param mean: Mean, or lambda, for the Poisson distribution.
-        :param size: Size of the RDD.
-        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
-        :param seed: Random seed (default: a random long integer).
-        :return: RDD of float comprised of i.i.d. samples ~ Pois(mean).
-
+        .. versionadded:: 1.1.0
+
+        Parameters
+        ----------
+        sc : :py:class:`pyspark.SparkContext`
+            SparkContext used to create the RDD.
+        mean : float
+            Mean, or lambda, for the Poisson distribution.
+        size : int
+            Size of the RDD.
+        numPartitions : int, optional
+            Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        seed : int, optional
+            Random seed (default: a random long integer).
+
+        Returns
+        -------
+        :py:class:`pyspark.RDD`
+            RDD of float comprised of i.i.d. samples ~ Pois(mean).
+
+        Examples
+        --------
         >>> mean = 100.0
         >>> x = RandomRDDs.poissonRDD(sc, mean, 1000, seed=2)
         >>> stats = x.stats()
@@ -164,19 +217,33 @@ def poissonRDD(sc, mean, size, numPartitions=None, seed=None):
         return callMLlibFunc("poissonRDD", sc._jsc, float(mean), size, numPartitions, seed)
 
     @staticmethod
-    @since("1.3.0")
     def exponentialRDD(sc, mean, size, numPartitions=None, seed=None):
         """
         Generates an RDD comprised of i.i.d. samples from the Exponential
         distribution with the input mean.
 
-        :param sc: SparkContext used to create the RDD.
-        :param mean: Mean, or 1 / lambda, for the Exponential distribution.
-        :param size: Size of the RDD.
-        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
-        :param seed: Random seed (default: a random long integer).
-        :return: RDD of float comprised of i.i.d. samples ~ Exp(mean).
-
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        sc : :py:class:`pyspark.SparkContext`
+            SparkContext used to create the RDD.
+        mean : float
+            Mean, or 1 / lambda, for the Exponential distribution.
+        size : int
+            Size of the RDD.
+        numPartitions : int, optional
+            Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        seed : int, optional
+            Random seed (default: a random long integer).
+
+        Returns
+        -------
+        :py:class:`pyspark.RDD`
+            RDD of float comprised of i.i.d. samples ~ Exp(mean).
+
+        Examples
+        --------
         >>> mean = 2.0
         >>> x = RandomRDDs.exponentialRDD(sc, mean, 1000, seed=2)
         >>> stats = x.stats()
@@ -191,20 +258,35 @@ def exponentialRDD(sc, mean, size, numPartitions=None, seed=None):
         return callMLlibFunc("exponentialRDD", sc._jsc, float(mean), size, numPartitions, seed)
 
     @staticmethod
-    @since("1.3.0")
     def gammaRDD(sc, shape, scale, size, numPartitions=None, seed=None):
         """
         Generates an RDD comprised of i.i.d. samples from the Gamma
         distribution with the input shape and scale.
 
-        :param sc: SparkContext used to create the RDD.
-        :param shape: shape (> 0) parameter for the Gamma distribution
-        :param scale: scale (> 0) parameter for the Gamma distribution
-        :param size: Size of the RDD.
-        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
-        :param seed: Random seed (default: a random long integer).
-        :return: RDD of float comprised of i.i.d. samples ~ Gamma(shape, scale).
-
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        sc : :py:class:`pyspark.SparkContext`
+            SparkContext used to create the RDD.
+        shape : float
+            shape (> 0) parameter for the Gamma distribution
+        scale : float
+            scale (> 0) parameter for the Gamma distribution
+        size : int
+            Size of the RDD.
+        numPartitions : int, optional
+            Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        seed : int, optional
+            Random seed (default: a random long integer).
+
+        Returns
+        -------
+        :py:class:`pyspark.RDD`
+            RDD of float comprised of i.i.d. samples ~ Gamma(shape, scale).
+
+        Examples
+        --------
         >>> from math import sqrt
         >>> shape = 1.0
         >>> scale = 2.0
@@ -224,19 +306,33 @@ def gammaRDD(sc, shape, scale, size, numPartitions=None, seed=None):
 
     @staticmethod
     @toArray
-    @since("1.1.0")
     def uniformVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None):
         """
         Generates an RDD comprised of vectors containing i.i.d. samples drawn
         from the uniform distribution U(0.0, 1.0).
 
-        :param sc: SparkContext used to create the RDD.
-        :param numRows: Number of Vectors in the RDD.
-        :param numCols: Number of elements in each Vector.
-        :param numPartitions: Number of partitions in the RDD.
-        :param seed: Seed for the RNG that generates the seed for the generator in each partition.
-        :return: RDD of Vector with vectors containing i.i.d samples ~ `U(0.0, 1.0)`.
-
+        .. versionadded:: 1.1.0
+
+        Parameters
+        ----------
+        sc : :py:class:`pyspark.SparkContext`
+            SparkContext used to create the RDD.
+        numRows : int
+            Number of Vectors in the RDD.
+        numCols : int
+            Number of elements in each Vector.
+        numPartitions : int, optional
+            Number of partitions in the RDD.
+        seed : int, optional
+            Seed for the RNG that generates the seed for the generator in each partition.
+
+        Returns
+        -------
+        :py:class:`pyspark.RDD`
+            RDD of Vector with vectors containing i.i.d samples ~ `U(0.0, 1.0)`.
+
+        Examples
+        --------
         >>> import numpy as np
         >>> mat = np.matrix(RandomRDDs.uniformVectorRDD(sc, 10, 10).collect())
         >>> mat.shape
@@ -250,19 +346,33 @@ def uniformVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None):
 
     @staticmethod
     @toArray
-    @since("1.1.0")
     def normalVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None):
         """
         Generates an RDD comprised of vectors containing i.i.d. samples drawn
         from the standard normal distribution.
 
-        :param sc: SparkContext used to create the RDD.
-        :param numRows: Number of Vectors in the RDD.
-        :param numCols: Number of elements in each Vector.
-        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
-        :param seed: Random seed (default: a random long integer).
-        :return: RDD of Vector with vectors containing i.i.d. samples ~ `N(0.0, 1.0)`.
-
+        .. versionadded:: 1.1.0
+
+        Parameters
+        ----------
+        sc : :py:class:`pyspark.SparkContext`
+            SparkContext used to create the RDD.
+        numRows : int
+            Number of Vectors in the RDD.
+        numCols : int
+            Number of elements in each Vector.
+        numPartitions : int, optional
+            Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        seed : int, optional
+            Random seed (default: a random long integer).
+
+        Returns
+        -------
+        :py:class:`pyspark.RDD`
+            RDD of Vector with vectors containing i.i.d. samples ~ `N(0.0, 1.0)`.
+
+        Examples
+        --------
         >>> import numpy as np
         >>> mat = np.matrix(RandomRDDs.normalVectorRDD(sc, 100, 100, seed=1).collect())
         >>> mat.shape
@@ -276,21 +386,37 @@ def normalVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None):
 
     @staticmethod
     @toArray
-    @since("1.3.0")
     def logNormalVectorRDD(sc, mean, std, numRows, numCols, numPartitions=None, seed=None):
         """
         Generates an RDD comprised of vectors containing i.i.d. samples drawn
         from the log normal distribution.
 
-        :param sc: SparkContext used to create the RDD.
-        :param mean: Mean of the log normal distribution
-        :param std: Standard Deviation of the log normal distribution
-        :param numRows: Number of Vectors in the RDD.
-        :param numCols: Number of elements in each Vector.
-        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
-        :param seed: Random seed (default: a random long integer).
-        :return: RDD of Vector with vectors containing i.i.d. samples ~ log `N(mean, std)`.
-
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        sc : :py:class:`pyspark.SparkContext`
+            SparkContext used to create the RDD.
+        mean : float
+            Mean of the log normal distribution
+        std : float
+            Standard Deviation of the log normal distribution
+        numRows : int
+            Number of Vectors in the RDD.
+        numCols : int
+            Number of elements in each Vector.
+        numPartitions : int, optional
+            Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        seed : int, optional
+            Random seed (default: a random long integer).
+
+        Returns
+        -------
+        :py:class:`pyspark.RDD`
+            RDD of Vector with vectors containing i.i.d. samples ~ log `N(mean, std)`.
+
+        Examples
+        --------
         >>> import numpy as np
         >>> from math import sqrt, exp
         >>> mean = 0.0
@@ -311,20 +437,35 @@ def logNormalVectorRDD(sc, mean, std, numRows, numCols, numPartitions=None, seed
 
     @staticmethod
     @toArray
-    @since("1.1.0")
     def poissonVectorRDD(sc, mean, numRows, numCols, numPartitions=None, seed=None):
         """
         Generates an RDD comprised of vectors containing i.i.d. samples drawn
         from the Poisson distribution with the input mean.
 
-        :param sc: SparkContext used to create the RDD.
-        :param mean: Mean, or lambda, for the Poisson distribution.
-        :param numRows: Number of Vectors in the RDD.
-        :param numCols: Number of elements in each Vector.
-        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`)
-        :param seed: Random seed (default: a random long integer).
-        :return: RDD of Vector with vectors containing i.i.d. samples ~ Pois(mean).
-
+        .. versionadded:: 1.1.0
+
+        Parameters
+        ----------
+        sc : :py:class:`pyspark.SparkContext`
+            SparkContext used to create the RDD.
+        mean : float
+            Mean, or lambda, for the Poisson distribution.
+        numRows : float
+            Number of Vectors in the RDD.
+        numCols : int
+            Number of elements in each Vector.
+        numPartitions : int, optional
+            Number of partitions in the RDD (default: `sc.defaultParallelism`)
+        seed : int, optional
+            Random seed (default: a random long integer).
+
+        Returns
+        -------
+        :py:class:`pyspark.RDD`
+            RDD of Vector with vectors containing i.i.d. samples ~ Pois(mean).
+
+        Examples
+        --------
         >>> import numpy as np
         >>> mean = 100.0
         >>> rdd = RandomRDDs.poissonVectorRDD(sc, mean, 100, 100, seed=1)
@@ -342,20 +483,35 @@ def poissonVectorRDD(sc, mean, numRows, numCols, numPartitions=None, seed=None):
 
     @staticmethod
     @toArray
-    @since("1.3.0")
     def exponentialVectorRDD(sc, mean, numRows, numCols, numPartitions=None, seed=None):
         """
         Generates an RDD comprised of vectors containing i.i.d. samples drawn
         from the Exponential distribution with the input mean.
 
-        :param sc: SparkContext used to create the RDD.
-        :param mean: Mean, or 1 / lambda, for the Exponential distribution.
-        :param numRows: Number of Vectors in the RDD.
-        :param numCols: Number of elements in each Vector.
-        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`)
-        :param seed: Random seed (default: a random long integer).
-        :return: RDD of Vector with vectors containing i.i.d. samples ~ Exp(mean).
-
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        sc : :py:class:`pyspark.SparkContext`
+            SparkContext used to create the RDD.
+        mean : float
+            Mean, or 1 / lambda, for the Exponential distribution.
+        numRows : int
+            Number of Vectors in the RDD.
+        numCols : int
+            Number of elements in each Vector.
+        numPartitions : int, optional
+            Number of partitions in the RDD (default: `sc.defaultParallelism`)
+        seed : int, optional
+            Random seed (default: a random long integer).
+
+        Returns
+        -------
+        :py:class:`pyspark.RDD`
+            RDD of Vector with vectors containing i.i.d. samples ~ Exp(mean).
+
+        Examples
+        --------
         >>> import numpy as np
         >>> mean = 0.5
         >>> rdd = RandomRDDs.exponentialVectorRDD(sc, mean, 100, 100, seed=1)
@@ -373,21 +529,37 @@ def exponentialVectorRDD(sc, mean, numRows, numCols, numPartitions=None, seed=No
 
     @staticmethod
     @toArray
-    @since("1.3.0")
     def gammaVectorRDD(sc, shape, scale, numRows, numCols, numPartitions=None, seed=None):
         """
         Generates an RDD comprised of vectors containing i.i.d. samples drawn
         from the Gamma distribution.
 
-        :param sc: SparkContext used to create the RDD.
-        :param shape: Shape (> 0) of the Gamma distribution
-        :param scale: Scale (> 0) of the Gamma distribution
-        :param numRows: Number of Vectors in the RDD.
-        :param numCols: Number of elements in each Vector.
-        :param numPartitions: Number of partitions in the RDD (default: `sc.defaultParallelism`).
-        :param seed: Random seed (default: a random long integer).
-        :return: RDD of Vector with vectors containing i.i.d. samples ~ Gamma(shape, scale).
-
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        sc : :py:class:`pyspark.SparkContext`
+            SparkContext used to create the RDD.
+        shape : float
+            Shape (> 0) of the Gamma distribution
+        scale : float
+            Scale (> 0) of the Gamma distribution
+        numRows : int
+            Number of Vectors in the RDD.
+        numCols : int
+            Number of elements in each Vector.
+        numPartitions : int, optional
+            Number of partitions in the RDD (default: `sc.defaultParallelism`).
+        seed : int, optional,
+            Random seed (default: a random long integer).
+
+        Returns
+        -------
+        :py:class:`pyspark.RDD`
+            RDD of Vector with vectors containing i.i.d. samples ~ Gamma(shape, scale).
+
+        Examples
+        --------
         >>> import numpy as np
         >>> from math import sqrt
         >>> shape = 1.0
diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py
index 3dd7cb200c280..7a5fb6e6eea9e 100644
--- a/python/pyspark/mllib/recommendation.py
+++ b/python/pyspark/mllib/recommendation.py
@@ -32,13 +32,15 @@ class Rating(namedtuple("Rating", ["user", "product", "rating"])):
     """
     Represents a (user, product, rating) tuple.
 
+    .. versionadded:: 1.2.0
+
+    Examples
+    --------
     >>> r = Rating(1, 2, 5.0)
     >>> (r.user, r.product, r.rating)
     (1, 2, 5.0)
     >>> (r[0], r[1], r[2])
     (1, 2, 5.0)
-
-    .. versionadded:: 1.2.0
     """
 
     def __reduce__(self):
@@ -51,6 +53,10 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
     """A matrix factorisation model trained by regularized alternating
     least-squares.
 
+    .. versionadded:: 0.9.0
+
+    Examples
+    --------
     >>> r1 = (1, 1, 1.0)
     >>> r2 = (1, 2, 2.0)
     >>> r3 = (2, 1, 2.0)
@@ -126,8 +132,6 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
     ...     rmtree(path)
     ... except OSError:
     ...     pass
-
-    .. versionadded:: 0.9.0
     """
     @since("0.9.0")
     def predict(self, user, product):
@@ -237,7 +241,6 @@ def _prepare(cls, ratings):
         return ratings
 
     @classmethod
-    @since("0.9.0")
     def train(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, nonnegative=False,
               seed=None):
         """
@@ -247,35 +250,38 @@ def train(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, nonnegative
         features). To solve for these features, ALS is run iteratively with
         a configurable level of parallelism.
 
-        :param ratings:
-          RDD of `Rating` or (userID, productID, rating) tuple.
-        :param rank:
-          Number of features to use (also referred to as the number of latent factors).
-        :param iterations:
-          Number of iterations of ALS.
-          (default: 5)
-        :param lambda_:
-          Regularization parameter.
-          (default: 0.01)
-        :param blocks:
-          Number of blocks used to parallelize the computation. A value
-          of -1 will use an auto-configured number of blocks.
-          (default: -1)
-        :param nonnegative:
-          A value of True will solve least-squares with nonnegativity
-          constraints.
-          (default: False)
-        :param seed:
-          Random seed for initial matrix factorization model. A value
-          of None will use system time as the seed.
-          (default: None)
+        .. versionadded:: 0.9.0
+
+        Parameters
+        ----------
+        ratings : :py:class:`pyspark.RDD`
+            RDD of `Rating` or (userID, productID, rating) tuple.
+        rank : int
+            Number of features to use (also referred to as the number of latent factors).
+        iterations : int, optional
+            Number of iterations of ALS.
+            (default: 5)
+        lambda\\_ : float, optional
+            Regularization parameter.
+            (default: 0.01)
+        blocks : int, optional
+            Number of blocks used to parallelize the computation. A value
+            of -1 will use an auto-configured number of blocks.
+            (default: -1)
+        nonnegative : bool, optional
+            A value of True will solve least-squares with nonnegativity
+            constraints.
+            (default: False)
+        seed : bool, optional
+            Random seed for initial matrix factorization model. A value
+            of None will use system time as the seed.
+            (default: None)
         """
         model = callMLlibFunc("trainALSModel", cls._prepare(ratings), rank, iterations,
                               lambda_, blocks, nonnegative, seed)
         return MatrixFactorizationModel(model)
 
     @classmethod
-    @since("0.9.0")
     def trainImplicit(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, alpha=0.01,
                       nonnegative=False, seed=None):
         """
@@ -285,31 +291,35 @@ def trainImplicit(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, alp
         given rank (number of features). To solve for these features, ALS
         is run iteratively with a configurable level of parallelism.
 
-        :param ratings:
-          RDD of `Rating` or (userID, productID, rating) tuple.
-        :param rank:
-          Number of features to use (also referred to as the number of latent factors).
-        :param iterations:
-          Number of iterations of ALS.
-          (default: 5)
-        :param lambda_:
-          Regularization parameter.
-          (default: 0.01)
-        :param blocks:
-          Number of blocks used to parallelize the computation. A value
-          of -1 will use an auto-configured number of blocks.
-          (default: -1)
-        :param alpha:
-          A constant used in computing confidence.
-          (default: 0.01)
-        :param nonnegative:
-          A value of True will solve least-squares with nonnegativity
-          constraints.
-          (default: False)
-        :param seed:
-          Random seed for initial matrix factorization model. A value
-          of None will use system time as the seed.
-          (default: None)
+        .. versionadded:: 0.9.0
+
+        Parameters
+        ----------
+        ratings : :py:class:`pyspark.RDD`
+            RDD of `Rating` or (userID, productID, rating) tuple.
+        rank : int
+            Number of features to use (also referred to as the number of latent factors).
+        iterations : int, optional
+            Number of iterations of ALS.
+            (default: 5)
+        lambda\\_ : float, optional
+            Regularization parameter.
+            (default: 0.01)
+        blocks : int, optional
+            Number of blocks used to parallelize the computation. A value
+            of -1 will use an auto-configured number of blocks.
+            (default: -1)
+        alpha : float, optional
+            A constant used in computing confidence.
+            (default: 0.01)
+        nonnegative : bool, optional
+            A value of True will solve least-squares with nonnegativity
+            constraints.
+            (default: False)
+        seed : int, optional
+            Random seed for initial matrix factorization model. A value
+            of None will use system time as the seed.
+            (default: None)
         """
         model = callMLlibFunc("trainImplicitALSModel", cls._prepare(ratings), rank,
                               iterations, lambda_, blocks, alpha, nonnegative, seed)
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index 77bca86ac1b27..e549b0ac43721 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -39,15 +39,19 @@ class LabeledPoint(object):
     """
     Class that represents the features and labels of a data point.
 
-    :param label:
-      Label for this data point.
-    :param features:
-      Vector of features for this point (NumPy array, list,
-      pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix).
-
-    .. note:: 'label' and 'features' are accessible as class attributes.
-
     .. versionadded:: 1.0.0
+
+    Parameters
+    ----------
+    label : int
+        Label for this data point.
+    features : :py:class:`pyspark.mllib.linalg.Vector` or convertible
+        Vector of features for this point (NumPy array, list,
+        pyspark.mllib.linalg.SparseVector, or scipy.sparse column matrix).
+
+    Notes
+    -----
+    'label' and 'features' are accessible as class attributes.
     """
 
     def __init__(self, label, features):
@@ -69,12 +73,14 @@ class LinearModel(object):
     """
     A linear model that has a vector of coefficients and an intercept.
 
-    :param weights:
-      Weights computed for every feature.
-    :param intercept:
-      Intercept computed for this model.
-
     .. versionadded:: 0.9.0
+
+    Parameters
+    ----------
+    weights : :py:class:`pyspark.mllib.linalg.Vector`
+        Weights computed for every feature.
+    intercept : float
+      Intercept computed for this model.
     """
 
     def __init__(self, weights, intercept):
@@ -102,14 +108,16 @@ class LinearRegressionModelBase(LinearModel):
 
     """A linear regression model.
 
+    .. versionadded:: 0.9.0
+
+    Examples
+    --------
     >>> from pyspark.mllib.linalg import SparseVector
     >>> lrmb = LinearRegressionModelBase(np.array([1.0, 2.0]), 0.1)
     >>> abs(lrmb.predict(np.array([-1.03, 7.777])) - 14.624) < 1e-6
     True
     >>> abs(lrmb.predict(SparseVector(2, {0: -1.03, 1: 7.777})) - 14.624) < 1e-6
     True
-
-    .. versionadded:: 0.9.0
     """
 
     @since("0.9.0")
@@ -129,6 +137,10 @@ class LinearRegressionModel(LinearRegressionModelBase):
 
     """A linear regression model derived from a least-squares fit.
 
+    .. versionadded:: 0.9.0
+
+    Examples
+    --------
     >>> from pyspark.mllib.linalg import SparseVector
     >>> from pyspark.mllib.regression import LabeledPoint
     >>> data = [
@@ -181,8 +193,6 @@ class LinearRegressionModel(LinearRegressionModelBase):
     True
     >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
     True
-
-    .. versionadded:: 0.9.0
     """
     @since("1.4.0")
     def save(self, sc, path):
@@ -224,11 +234,13 @@ def _regression_train_wrapper(train_func, modelClass, data, initial_weights):
 
 class LinearRegressionWithSGD(object):
     """
+    Train a linear regression model with no regularization using Stochastic Gradient Descent.
+
     .. versionadded:: 0.9.0
-    .. note:: Deprecated in 2.0.0. Use ml.regression.LinearRegression.
+    .. deprecated:: 2.0.0
+        Use :py:class:`pyspark.ml.regression.LinearRegression`.
     """
     @classmethod
-    @since("0.9.0")
     def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
               initialWeights=None, regParam=0.0, regType=None, intercept=False,
               validateData=True, convergenceTol=0.001):
@@ -244,42 +256,47 @@ def train(cls, data, iterations=100, step=1.0, miniBatchFraction=1.0,
         corresponding right hand side label y.
         See also the documentation for the precise formulation.
 
-        :param data:
-          The training data, an RDD of LabeledPoint.
-        :param iterations:
-          The number of iterations.
-          (default: 100)
-        :param step:
-          The step parameter used in SGD.
-          (default: 1.0)
-        :param miniBatchFraction:
-          Fraction of data to be used for each SGD iteration.
-          (default: 1.0)
-        :param initialWeights:
-          The initial weights.
-          (default: None)
-        :param regParam:
-          The regularizer parameter.
-          (default: 0.0)
-        :param regType:
-          The type of regularizer used for training our model.
-          Supported values:
+        .. versionadded:: 0.9.0
+
+        Parameters
+        ----------
+        data : :py:class:`pyspark.RDD`
+            The training data, an RDD of LabeledPoint.
+        iterations : int, optional
+            The number of iterations.
+            (default: 100)
+        step : float, optional
+            The step parameter used in SGD.
+            (default: 1.0)
+        miniBatchFraction : float, optional
+            Fraction of data to be used for each SGD iteration.
+            (default: 1.0)
+        initialWeights : :py:class:`pyspark.mllib.linalg.Vector` or convertible, optional
+            The initial weights.
+            (default: None)
+        regParam : float, optional
+            The regularizer parameter.
+            (default: 0.0)
+        regType : str, optional
+            The type of regularizer used for training our model.
+            Supported values:
 
             - "l1" for using L1 regularization
             - "l2" for using L2 regularization
             - None for no regularization (default)
-        :param intercept:
-          Boolean parameter which indicates the use or not of the
-          augmented representation for training data (i.e., whether bias
-          features are activated or not).
-          (default: False)
-        :param validateData:
-          Boolean parameter which indicates if the algorithm should
-          validate data before training.
-          (default: True)
-        :param convergenceTol:
-          A condition which decides iteration termination.
-          (default: 0.001)
+
+        intercept : bool, optional
+            Boolean parameter which indicates the use or not of the
+            augmented representation for training data (i.e., whether bias
+            features are activated or not).
+            (default: False)
+        validateData : bool, optional
+            Boolean parameter which indicates if the algorithm should
+            validate data before training.
+            (default: True)
+        convergenceTol : float, optional
+            A condition which decides iteration termination.
+            (default: 0.001)
         """
         warnings.warn(
             "Deprecated in 2.0.0. Use ml.regression.LinearRegression.", DeprecationWarning)
@@ -299,6 +316,10 @@ class LassoModel(LinearRegressionModelBase):
     """A linear regression model derived from a least-squares fit with
     an l_1 penalty term.
 
+    .. versionadded:: 0.9.0
+
+    Examples
+    --------
     >>> from pyspark.mllib.linalg import SparseVector
     >>> from pyspark.mllib.regression import LabeledPoint
     >>> data = [
@@ -351,8 +372,6 @@ class LassoModel(LinearRegressionModelBase):
     True
     >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
     True
-
-    .. versionadded:: 0.9.0
     """
     @since("1.4.0")
     def save(self, sc, path):
@@ -375,12 +394,14 @@ def load(cls, sc, path):
 
 class LassoWithSGD(object):
     """
+    Train a regression model with L1-regularization using Stochastic Gradient Descent.
+
     .. versionadded:: 0.9.0
-    .. note:: Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 1.0.
-            Note the default regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.
+    .. deprecated:: 2.0.0
+        Use :py:class:`pyspark.ml.regression.LinearRegression` with elasticNetParam = 1.0.
+        Note the default regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.
     """
     @classmethod
-    @since("0.9.0")
     def train(cls, data, iterations=100, step=1.0, regParam=0.01,
               miniBatchFraction=1.0, initialWeights=None, intercept=False,
               validateData=True, convergenceTol=0.001):
@@ -395,35 +416,39 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01,
         of rows of A, each with its corresponding right hand side label y.
         See also the documentation for the precise formulation.
 
-        :param data:
-          The training data, an RDD of LabeledPoint.
-        :param iterations:
-          The number of iterations.
-          (default: 100)
-        :param step:
-          The step parameter used in SGD.
-          (default: 1.0)
-        :param regParam:
-          The regularizer parameter.
-          (default: 0.01)
-        :param miniBatchFraction:
-          Fraction of data to be used for each SGD iteration.
-          (default: 1.0)
-        :param initialWeights:
-          The initial weights.
-          (default: None)
-        :param intercept:
-          Boolean parameter which indicates the use or not of the
-          augmented representation for training data (i.e. whether bias
-          features are activated or not).
-          (default: False)
-        :param validateData:
-          Boolean parameter which indicates if the algorithm should
-          validate data before training.
-          (default: True)
-        :param convergenceTol:
-          A condition which decides iteration termination.
-          (default: 0.001)
+        .. versionadded:: 0.9.0
+
+        Parameters
+        ----------
+        data : :py:class:`pyspark.RDD`
+            The training data, an RDD of LabeledPoint.
+        iterations : int, optional
+            The number of iterations.
+            (default: 100)
+        step : float, optional
+            The step parameter used in SGD.
+            (default: 1.0)
+        regParam : float, optional
+            The regularizer parameter.
+            (default: 0.01)
+        miniBatchFraction : float, optional
+            Fraction of data to be used for each SGD iteration.
+            (default: 1.0)
+        initialWeights : :py:class:`pyspark.mllib.linalg.Vector` or convertible, optional
+            The initial weights.
+            (default: None)
+        intercept : bool, optional
+            Boolean parameter which indicates the use or not of the
+            augmented representation for training data (i.e. whether bias
+            features are activated or not).
+            (default: False)
+        validateData : bool, optional
+            Boolean parameter which indicates if the algorithm should
+            validate data before training.
+            (default: True)
+        convergenceTol : float, optional
+            A condition which decides iteration termination.
+            (default: 0.001)
         """
         warnings.warn(
             "Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 1.0. "
@@ -444,6 +469,10 @@ class RidgeRegressionModel(LinearRegressionModelBase):
     """A linear regression model derived from a least-squares fit with
     an l_2 penalty term.
 
+    .. versionadded:: 0.9.0
+
+    Examples
+    --------
     >>> from pyspark.mllib.linalg import SparseVector
     >>> from pyspark.mllib.regression import LabeledPoint
     >>> data = [
@@ -496,8 +525,6 @@ class RidgeRegressionModel(LinearRegressionModelBase):
     True
     >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
     True
-
-    .. versionadded:: 0.9.0
     """
     @since("1.4.0")
     def save(self, sc, path):
@@ -520,13 +547,15 @@ def load(cls, sc, path):
 
 class RidgeRegressionWithSGD(object):
     """
+    Train a regression model with L2-regularization using Stochastic Gradient Descent.
+
     .. versionadded:: 0.9.0
-    .. note:: Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 0.0.
-            Note the default regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for
-            LinearRegression.
+    .. deprecated:: 2.0.0
+        Use :py:class:`pyspark.ml.regression.LinearRegression` with elasticNetParam = 0.0.
+        Note the default regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for
+        LinearRegression.
     """
     @classmethod
-    @since("0.9.0")
     def train(cls, data, iterations=100, step=1.0, regParam=0.01,
               miniBatchFraction=1.0, initialWeights=None, intercept=False,
               validateData=True, convergenceTol=0.001):
@@ -541,35 +570,39 @@ def train(cls, data, iterations=100, step=1.0, regParam=0.01,
         of rows of A, each with its corresponding right hand side label y.
         See also the documentation for the precise formulation.
 
-        :param data:
-          The training data, an RDD of LabeledPoint.
-        :param iterations:
-          The number of iterations.
-          (default: 100)
-        :param step:
-          The step parameter used in SGD.
-          (default: 1.0)
-        :param regParam:
-          The regularizer parameter.
-          (default: 0.01)
-        :param miniBatchFraction:
-          Fraction of data to be used for each SGD iteration.
-          (default: 1.0)
-        :param initialWeights:
-          The initial weights.
-          (default: None)
-        :param intercept:
-          Boolean parameter which indicates the use or not of the
-          augmented representation for training data (i.e. whether bias
-          features are activated or not).
-          (default: False)
-        :param validateData:
-          Boolean parameter which indicates if the algorithm should
-          validate data before training.
-          (default: True)
-        :param convergenceTol:
-          A condition which decides iteration termination.
-          (default: 0.001)
+        .. versionadded:: 0.9.0
+
+        Parameters
+        ----------
+        data : :py:class:`pyspark.RDD`
+            The training data, an RDD of LabeledPoint.
+        iterations : int, optional
+            The number of iterations.
+            (default: 100)
+        step : float, optional
+            The step parameter used in SGD.
+            (default: 1.0)
+        regParam : float, optional
+            The regularizer parameter.
+            (default: 0.01)
+        miniBatchFraction : float, optional
+            Fraction of data to be used for each SGD iteration.
+            (default: 1.0)
+        initialWeights : :py:class:`pyspark.mllib.linalg.Vector` or convertible, optional
+            The initial weights.
+            (default: None)
+        intercept : bool, optional
+            Boolean parameter which indicates the use or not of the
+            augmented representation for training data (i.e. whether bias
+            features are activated or not).
+            (default: False)
+        validateData : bool, optional
+            Boolean parameter which indicates if the algorithm should
+            validate data before training.
+            (default: True)
+        convergenceTol : float, optional
+            A condition which decides iteration termination.
+            (default: 0.001)
         """
         warnings.warn(
             "Deprecated in 2.0.0. Use ml.regression.LinearRegression with elasticNetParam = 0.0. "
@@ -589,15 +622,21 @@ class IsotonicRegressionModel(Saveable, Loader):
     """
     Regression model for isotonic regression.
 
-    :param boundaries:
-      Array of boundaries for which predictions are known. Boundaries
-      must be sorted in increasing order.
-    :param predictions:
-      Array of predictions associated to the boundaries at the same
-      index. Results of isotonic regression and therefore monotone.
-    :param isotonic:
-      Indicates whether this is isotonic or antitonic.
+    .. versionadded:: 1.4.0
 
+    Parameters
+    ----------
+    boundaries : ndarray
+        Array of boundaries for which predictions are known. Boundaries
+        must be sorted in increasing order.
+    predictions : ndarray
+        Array of predictions associated to the boundaries at the same
+        index. Results of isotonic regression and therefore monotone.
+    isotonic : true
+        Indicates whether this is isotonic or antitonic.
+
+    Examples
+    --------
     >>> data = [(1, 0, 1), (2, 1, 1), (3, 2, 1), (1, 3, 1), (6, 4, 1), (17, 5, 1), (16, 6, 1)]
     >>> irm = IsotonicRegression.train(sc.parallelize(data))
     >>> irm.predict(3)
@@ -619,8 +658,6 @@ class IsotonicRegressionModel(Saveable, Loader):
     ...     rmtree(path)
     ... except OSError:
     ...     pass
-
-    .. versionadded:: 1.4.0
     """
 
     def __init__(self, boundaries, predictions, isotonic):
@@ -628,7 +665,6 @@ def __init__(self, boundaries, predictions, isotonic):
         self.predictions = predictions
         self.isotonic = isotonic
 
-    @since("1.4.0")
     def predict(self, x):
         """
         Predict labels for provided features.
@@ -647,8 +683,13 @@ def predict(self, x):
         values with the same boundary then the same rules as in 2)
         are used.
 
-        :param x:
-          Feature or RDD of Features to be labeled.
+
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        x : :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD`
+            Feature or RDD of Features to be labeled.
         """
         if isinstance(x, RDD):
             return x.map(lambda v: self.predict(v))
@@ -680,35 +721,42 @@ class IsotonicRegression(object):
     Currently implemented using parallelized pool adjacent violators
     algorithm. Only univariate (single feature) algorithm supported.
 
-    Sequential PAV implementation based on:
+    .. versionadded:: 1.4.0
+
+    Notes
+    -----
+    Sequential PAV implementation based on
+    Tibshirani, Ryan J., Holger Hoefling, and Robert Tibshirani (2011) [1]_
 
-      Tibshirani, Ryan J., Holger Hoefling, and Robert Tibshirani.
-      "Nearly-isotonic regression." Technometrics 53.1 (2011): 54-61.
-      Available from http://www.stat.cmu.edu/~ryantibs/papers/neariso.pdf
+    Sequential PAV parallelization based on
+    Kearsley, Anthony J., Richard A. Tapia, and Michael W. Trosset (1996) [2]_
 
-    Sequential PAV parallelization based on:
+    See also
+    `Isotonic regression (Wikipedia) <http://en.wikipedia.org/wiki/Isotonic_regression>`_.
 
-        Kearsley, Anthony J., Richard A. Tapia, and Michael W. Trosset.
+    .. [1] Tibshirani, Ryan J., Holger Hoefling, and Robert Tibshirani.
+        "Nearly-isotonic regression." Technometrics 53.1 (2011): 54-61.
+        Available from http://www.stat.cmu.edu/~ryantibs/papers/neariso.pdf
+    .. [2] Kearsley, Anthony J., Richard A. Tapia, and Michael W. Trosset
         "An approach to parallelizing isotonic regression."
         Applied Mathematics and Parallel Computing. Physica-Verlag HD, 1996. 141-147.
         Available from http://softlib.rice.edu/pub/CRPC-TRs/reports/CRPC-TR96640.pdf
-
-    See `Isotonic regression (Wikipedia) <http://en.wikipedia.org/wiki/Isotonic_regression>`_.
-
-    .. versionadded:: 1.4.0
     """
 
     @classmethod
-    @since("1.4.0")
     def train(cls, data, isotonic=True):
         """
         Train an isotonic regression model on the given data.
 
-        :param data:
-          RDD of (label, feature, weight) tuples.
-        :param isotonic:
-          Whether this is isotonic (which is default) or antitonic.
-          (default: True)
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        data : :py:class:`pyspark.RDD`
+            RDD of (label, feature, weight) tuples.
+        isotonic : bool, optional
+            Whether this is isotonic (which is default) or antitonic.
+            (default: True)
         """
         boundaries, predictions = callMLlibFunc("trainIsotonicRegressionModel",
                                                 data.map(_convert_to_vector), bool(isotonic))
@@ -741,26 +789,32 @@ def _validate(self, dstream):
             raise ValueError(
                 "Model must be intialized using setInitialWeights")
 
-    @since("1.5.0")
     def predictOn(self, dstream):
         """
         Use the model to make predictions on batches of data from a
         DStream.
 
-        :return:
-          DStream containing predictions.
+        .. versionadded:: 1.5.0
+
+        Returns
+        -------
+        :py:class:`pyspark.streaming.DStream`
+            DStream containing predictions.
         """
         self._validate(dstream)
         return dstream.map(lambda x: self._model.predict(x))
 
-    @since("1.5.0")
     def predictOnValues(self, dstream):
         """
         Use the model to make predictions on the values of a DStream and
         carry over its keys.
 
-        :return:
-          DStream containing the input keys and the predictions as values.
+        .. versionadded:: 1.5.0
+
+        Returns
+        -------
+        :py:class:`pyspark.streaming.DStream`
+            DStream containing predictions.
         """
         self._validate(dstream)
         return dstream.mapValues(lambda x: self._model.predict(x))
@@ -779,20 +833,22 @@ class StreamingLinearRegressionWithSGD(StreamingLinearAlgorithm):
     of features must be constant. An initial weight vector must
     be provided.
 
-    :param stepSize:
-      Step size for each iteration of gradient descent.
-      (default: 0.1)
-    :param numIterations:
-      Number of iterations run for each batch of data.
-      (default: 50)
-    :param miniBatchFraction:
-      Fraction of each batch of data to use for updates.
-      (default: 1.0)
-    :param convergenceTol:
-      Value used to determine when to terminate iterations.
-      (default: 0.001)
-
     .. versionadded:: 1.5.0
+
+    Parameters
+    ----------
+    stepSize : float, optional
+        Step size for each iteration of gradient descent.
+        (default: 0.1)
+    numIterations : int, optional
+        Number of iterations run for each batch of data.
+        (default: 50)
+    miniBatchFraction : float, optional
+        Fraction of each batch of data to use for updates.
+        (default: 1.0)
+    convergenceTol : float, optional
+        Value used to determine when to terminate iterations.
+        (default: 0.001)
     """
     def __init__(self, stepSize=0.1, numIterations=50, miniBatchFraction=1.0, convergenceTol=0.001):
         self.stepSize = stepSize
diff --git a/python/pyspark/mllib/stat/KernelDensity.py b/python/pyspark/mllib/stat/KernelDensity.py
index 56444c152f0ba..1d4d43e53519c 100644
--- a/python/pyspark/mllib/stat/KernelDensity.py
+++ b/python/pyspark/mllib/stat/KernelDensity.py
@@ -26,6 +26,8 @@ class KernelDensity(object):
     Estimate probability density at required points given an RDD of samples
     from the population.
 
+    Examples
+    --------
     >>> kd = KernelDensity()
     >>> sample = sc.parallelize([0.0, 1.0])
     >>> kd.setSample(sample)
diff --git a/python/pyspark/mllib/stat/__init__.py b/python/pyspark/mllib/stat/__init__.py
index 0fb33061838af..d3b4ddf7e4c68 100644
--- a/python/pyspark/mllib/stat/__init__.py
+++ b/python/pyspark/mllib/stat/__init__.py
@@ -21,8 +21,9 @@
 
 from pyspark.mllib.stat._statistics import Statistics, MultivariateStatisticalSummary
 from pyspark.mllib.stat.distribution import MultivariateGaussian
-from pyspark.mllib.stat.test import ChiSqTestResult
+from pyspark.mllib.stat.test import ChiSqTestResult, KolmogorovSmirnovTestResult
 from pyspark.mllib.stat.KernelDensity import KernelDensity
 
-__all__ = ["Statistics", "MultivariateStatisticalSummary", "ChiSqTestResult",
+__all__ = ["Statistics", "MultivariateStatisticalSummary",
+           "ChiSqTestResult", "KolmogorovSmirnovTestResult",
            "MultivariateGaussian", "KernelDensity"]
diff --git a/python/pyspark/mllib/stat/_statistics.py b/python/pyspark/mllib/stat/_statistics.py
index 43454ba5187dd..a4b45cf55febe 100644
--- a/python/pyspark/mllib/stat/_statistics.py
+++ b/python/pyspark/mllib/stat/_statistics.py
@@ -65,11 +65,19 @@ def colStats(rdd):
         """
         Computes column-wise summary statistics for the input RDD[Vector].
 
-        :param rdd: an RDD[Vector] for which column-wise summary statistics
-                    are to be computed.
-        :return: :class:`MultivariateStatisticalSummary` object containing
-                 column-wise summary statistics.
-
+        Parameters
+        ----------
+        rdd : :py:class:`pyspark.RDD`
+            an RDD[Vector] for which column-wise summary statistics
+            are to be computed.
+
+        Returns
+        -------
+        :class:`MultivariateStatisticalSummary`
+            object containing column-wise summary statistics.
+
+        Examples
+        --------
         >>> from pyspark.mllib.linalg import Vectors
         >>> rdd = sc.parallelize([Vectors.dense([2, 0, 0, -2]),
         ...                       Vectors.dense([4, 5, 0,  3]),
@@ -103,13 +111,24 @@ def corr(x, y=None, method=None):
         to specify the method to be used for single RDD inout.
         If two RDDs of floats are passed in, a single float is returned.
 
-        :param x: an RDD of vector for which the correlation matrix is to be computed,
-                  or an RDD of float of the same cardinality as y when y is specified.
-        :param y: an RDD of float of the same cardinality as x.
-        :param method: String specifying the method to use for computing correlation.
-                       Supported: `pearson` (default), `spearman`
-        :return: Correlation matrix comparing columns in x.
-
+        Parameters
+        ----------
+        x : :py:class:`pyspark.RDD`
+            an RDD of vector for which the correlation matrix is to be computed,
+            or an RDD of float of the same cardinality as y when y is specified.
+        y : :py:class:`pyspark.RDD`, optional
+            an RDD of float of the same cardinality as x.
+        method : str, optional
+            String specifying the method to use for computing correlation.
+            Supported: `pearson` (default), `spearman`
+
+        Returns
+        -------
+        :py:class:`pyspark.mllib.linalg.Matrix`
+            Correlation matrix comparing columns in x.
+
+        Examples
+        --------
         >>> x = sc.parallelize([1.0, 0.0, -2.0], 2)
         >>> y = sc.parallelize([4.0, 5.0, 3.0], 2)
         >>> zeros = sc.parallelize([0.0, 0.0, 0.0], 2)
@@ -172,20 +191,33 @@ def chiSqTest(observed, expected=None):
         contingency matrix for which the chi-squared statistic is computed.
         All label and feature values must be categorical.
 
-        .. note:: `observed` cannot contain negative values
-
-        :param observed: it could be a vector containing the observed categorical
-                         counts/relative frequencies, or the contingency matrix
-                         (containing either counts or relative frequencies),
-                         or an RDD of LabeledPoint containing the labeled dataset
-                         with categorical features. Real-valued features will be
-                         treated as categorical for each distinct value.
-        :param expected: Vector containing the expected categorical counts/relative
-                         frequencies. `expected` is rescaled if the `expected` sum
-                         differs from the `observed` sum.
-        :return: ChiSquaredTest object containing the test statistic, degrees
-                 of freedom, p-value, the method used, and the null hypothesis.
-
+        Parameters
+        ----------
+        observed : :py:class:`pyspark.mllib.linalg.Vector` or \
+            :py:class:`pyspark.mllib.linalg.Matrix`
+            it could be a vector containing the observed categorical
+            counts/relative frequencies, or the contingency matrix
+            (containing either counts or relative frequencies),
+            or an RDD of LabeledPoint containing the labeled dataset
+            with categorical features. Real-valued features will be
+            treated as categorical for each distinct value.
+        expected : :py:class:`pyspark.mllib.linalg.Vector`
+            Vector containing the expected categorical counts/relative
+            frequencies. `expected` is rescaled if the `expected` sum
+            differs from the `observed` sum.
+
+        Returns
+        -------
+        :py:class:`pyspark.mllib.stat.ChiSqTestResult`
+            object containing the test statistic, degrees
+            of freedom, p-value, the method used, and the null hypothesis.
+
+        Notes
+        -----
+        `observed` cannot contain negative values
+
+        Examples
+        --------
         >>> from pyspark.mllib.linalg import Vectors, Matrices
         >>> observed = Vectors.dense([4, 6, 5])
         >>> pearson = Statistics.chiSqTest(observed)
@@ -259,17 +291,28 @@ def kolmogorovSmirnovTest(data, distName="norm", *params):
         For specific details of the implementation, please have a look
         at the Scala documentation.
 
-        :param data: RDD, samples from the data
-        :param distName: string, currently only "norm" is supported.
-                         (Normal distribution) to calculate the
-                         theoretical distribution of the data.
-        :param params: additional values which need to be provided for
-                       a certain distribution.
-                       If not provided, the default values are used.
-        :return: KolmogorovSmirnovTestResult object containing the test
-                 statistic, degrees of freedom, p-value,
-                 the method used, and the null hypothesis.
 
+        Parameters
+        ----------
+        data : :py:class:`pyspark.RDD`
+            RDD, samples from the data
+        distName : str, optional
+            string, currently only "norm" is supported.
+            (Normal distribution) to calculate the
+            theoretical distribution of the data.
+        params
+            additional values which need to be provided for
+            a certain distribution.
+            If not provided, the default values are used.
+
+        Returns
+        -------
+        :py:class:`pyspark.mllib.stat.KolmogorovSmirnovTestResult`
+            object containing the test statistic, degrees of freedom, p-value,
+            the method used, and the null hypothesis.
+
+        Examples
+        --------
         >>> kstest = Statistics.kolmogorovSmirnovTest
         >>> data = sc.parallelize([-1.0, 0.0, 1.0])
         >>> ksmodel = kstest(data, "norm")
diff --git a/python/pyspark/mllib/stat/distribution.py b/python/pyspark/mllib/stat/distribution.py
index 46f7a1d2f277a..aa35ac6dfdae1 100644
--- a/python/pyspark/mllib/stat/distribution.py
+++ b/python/pyspark/mllib/stat/distribution.py
@@ -24,6 +24,8 @@ class MultivariateGaussian(namedtuple('MultivariateGaussian', ['mu', 'sigma'])):
 
     """Represents a (mu, sigma) tuple
 
+    Examples
+    --------
     >>> m = MultivariateGaussian(Vectors.dense([11,12]),DenseMatrix(2, 2, (1.0, 3.0, 5.0, 2.0)))
     >>> (m.mu, m.sigma.toArray())
     (DenseVector([11.0, 12.0]), array([[ 1., 5.],[ 3., 2.]]))
diff --git a/python/pyspark/mllib/tree.py b/python/pyspark/mllib/tree.py
index e05dfdb953ceb..493dcf8db6fd2 100644
--- a/python/pyspark/mllib/tree.py
+++ b/python/pyspark/mllib/tree.py
@@ -33,15 +33,18 @@ class TreeEnsembleModel(JavaModelWrapper, JavaSaveable):
 
     .. versionadded:: 1.3.0
     """
-    @since("1.3.0")
     def predict(self, x):
         """
         Predict values for a single data point or an RDD of points using
         the model trained.
 
-        .. note:: In Python, predict cannot currently be used within an RDD
-            transformation or action.
-            Call predict directly on the RDD instead.
+        .. versionadded:: 1.3.0
+
+        Notes
+        -----
+        In Python, predict cannot currently be used within an RDD
+        transformation or action.
+        Call predict directly on the RDD instead.
         """
         if isinstance(x, RDD):
             return self.call("predict", x.map(_convert_to_vector))
@@ -79,18 +82,23 @@ class DecisionTreeModel(JavaModelWrapper, JavaSaveable, JavaLoader):
 
     .. versionadded:: 1.1.0
     """
-    @since("1.1.0")
     def predict(self, x):
         """
         Predict the label of one or more examples.
 
-        .. note:: In Python, predict cannot currently be used within an RDD
-            transformation or action.
-            Call predict directly on the RDD instead.
+        .. versionadded:: 1.1.0
+
+        Parameters
+        ----------
+        x : :py:class:`pyspark.mllib.linalg.Vector` or :py:class:`pyspark.RDD`
+            Data point (feature vector), or an RDD of data points (feature
+            vectors).
 
-        :param x:
-          Data point (feature vector), or an RDD of data points (feature
-          vectors).
+        Notes
+        -----
+        In Python, predict cannot currently be used within an RDD
+        transformation or action.
+        Call predict directly on the RDD instead.
         """
         if isinstance(x, RDD):
             return self.call("predict", x.map(_convert_to_vector))
@@ -143,45 +151,50 @@ def _train(cls, data, type, numClasses, features, impurity="gini", maxDepth=5, m
         return DecisionTreeModel(model)
 
     @classmethod
-    @since("1.1.0")
     def trainClassifier(cls, data, numClasses, categoricalFeaturesInfo,
                         impurity="gini", maxDepth=5, maxBins=32, minInstancesPerNode=1,
                         minInfoGain=0.0):
         """
         Train a decision tree model for classification.
 
-        :param data:
-          Training data: RDD of LabeledPoint. Labels should take values
-          {0, 1, ..., numClasses-1}.
-        :param numClasses:
-          Number of classes for classification.
-        :param categoricalFeaturesInfo:
-          Map storing arity of categorical features. An entry (n -> k)
-          indicates that feature n is categorical with k categories
-          indexed from 0: {0, 1, ..., k-1}.
-        :param impurity:
-          Criterion used for information gain calculation.
-          Supported values: "gini" or "entropy".
-          (default: "gini")
-        :param maxDepth:
-          Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1
-          means 1 internal node + 2 leaf nodes).
-          (default: 5)
-        :param maxBins:
-          Number of bins used for finding splits at each node.
-          (default: 32)
-        :param minInstancesPerNode:
-          Minimum number of instances required at child nodes to create
-          the parent split.
-          (default: 1)
-        :param minInfoGain:
-          Minimum info gain required to create a split.
-          (default: 0.0)
-        :return:
-          DecisionTreeModel.
-
-        Example usage:
-
+        .. versionadded:: 1.1.0
+
+        Parameters
+        ----------
+        data :  :py:class:`pyspark.RDD`
+            Training data: RDD of LabeledPoint. Labels should take values
+            {0, 1, ..., numClasses-1}.
+        numClasses : int
+            Number of classes for classification.
+        categoricalFeaturesInfo : dict
+            Map storing arity of categorical features. An entry (n -> k)
+            indicates that feature n is categorical with k categories
+            indexed from 0: {0, 1, ..., k-1}.
+        impurity : str, optional
+            Criterion used for information gain calculation.
+            Supported values: "gini" or "entropy".
+            (default: "gini")
+        maxDepth : int, optional
+            Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1
+            means 1 internal node + 2 leaf nodes).
+            (default: 5)
+        maxBins : int, optional
+            Number of bins used for finding splits at each node.
+            (default: 32)
+        minInstancesPerNode : int, optional
+            Minimum number of instances required at child nodes to create
+            the parent split.
+            (default: 1)
+        minInfoGain : float, optional
+            Minimum info gain required to create a split.
+            (default: 0.0)
+
+        Returns
+        -------
+        :py:class:`DecisionTreeModel`
+
+        Examples
+        --------
         >>> from numpy import array
         >>> from pyspark.mllib.regression import LabeledPoint
         >>> from pyspark.mllib.tree import DecisionTree
@@ -222,35 +235,39 @@ def trainRegressor(cls, data, categoricalFeaturesInfo,
         """
         Train a decision tree model for regression.
 
-        :param data:
-          Training data: RDD of LabeledPoint. Labels are real numbers.
-        :param categoricalFeaturesInfo:
-          Map storing arity of categorical features. An entry (n -> k)
-          indicates that feature n is categorical with k categories
-          indexed from 0: {0, 1, ..., k-1}.
-        :param impurity:
-          Criterion used for information gain calculation.
-          The only supported value for regression is "variance".
-          (default: "variance")
-        :param maxDepth:
-          Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1
-          means 1 internal node + 2 leaf nodes).
-          (default: 5)
-        :param maxBins:
-          Number of bins used for finding splits at each node.
-          (default: 32)
-        :param minInstancesPerNode:
-          Minimum number of instances required at child nodes to create
-          the parent split.
-          (default: 1)
-        :param minInfoGain:
-          Minimum info gain required to create a split.
-          (default: 0.0)
-        :return:
-          DecisionTreeModel.
-
-        Example usage:
-
+        Parameters
+        ----------
+        data : :py:class:`pyspark.RDD`
+            Training data: RDD of LabeledPoint. Labels are real numbers.
+        categoricalFeaturesInfo : dict
+            Map storing arity of categorical features. An entry (n -> k)
+            indicates that feature n is categorical with k categories
+            indexed from 0: {0, 1, ..., k-1}.
+        impurity : str, optional
+            Criterion used for information gain calculation.
+            The only supported value for regression is "variance".
+            (default: "variance")
+        maxDepth : int, optional
+            Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1
+            means 1 internal node + 2 leaf nodes).
+            (default: 5)
+        maxBins : int, optional
+            Number of bins used for finding splits at each node.
+            (default: 32)
+        minInstancesPerNode : int, optional
+            Minimum number of instances required at child nodes to create
+            the parent split.
+            (default: 1)
+        minInfoGain : float, optional
+            Minimum info gain required to create a split.
+            (default: 0.0)
+
+        Returns
+        -------
+        :py:class:`DecisionTreeModel`
+
+        Examples
+        --------
         >>> from pyspark.mllib.regression import LabeledPoint
         >>> from pyspark.mllib.tree import DecisionTree
         >>> from pyspark.mllib.linalg import SparseVector
@@ -313,7 +330,6 @@ def _train(cls, data, algo, numClasses, categoricalFeaturesInfo, numTrees,
         return RandomForestModel(model)
 
     @classmethod
-    @since("1.2.0")
     def trainClassifier(cls, data, numClasses, categoricalFeaturesInfo, numTrees,
                         featureSubsetStrategy="auto", impurity="gini", maxDepth=4, maxBins=32,
                         seed=None):
@@ -321,44 +337,51 @@ def trainClassifier(cls, data, numClasses, categoricalFeaturesInfo, numTrees,
         Train a random forest model for binary or multiclass
         classification.
 
-        :param data:
-          Training dataset: RDD of LabeledPoint. Labels should take values
-          {0, 1, ..., numClasses-1}.
-        :param numClasses:
-          Number of classes for classification.
-        :param categoricalFeaturesInfo:
-          Map storing arity of categorical features. An entry (n -> k)
-          indicates that feature n is categorical with k categories
-          indexed from 0: {0, 1, ..., k-1}.
-        :param numTrees:
-          Number of trees in the random forest.
-        :param featureSubsetStrategy:
-          Number of features to consider for splits at each node.
-          Supported values: "auto", "all", "sqrt", "log2", "onethird".
-          If "auto" is set, this parameter is set based on numTrees:
-          if numTrees == 1, set to "all";
-          if numTrees > 1 (forest) set to "sqrt".
-          (default: "auto")
-        :param impurity:
-          Criterion used for information gain calculation.
-          Supported values: "gini" or "entropy".
-          (default: "gini")
-        :param maxDepth:
-          Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1
-          means 1 internal node + 2 leaf nodes).
-          (default: 4)
-        :param maxBins:
-          Maximum number of bins used for splitting features.
-          (default: 32)
-        :param seed:
-          Random seed for bootstrapping and choosing feature subsets.
-          Set as None to generate seed based on system time.
-          (default: None)
-        :return:
-          RandomForestModel that can be used for prediction.
-
-        Example usage:
-
+        .. versionadded:: 1.2.0
+
+        Parameters
+        ----------
+        data : :py:class:`pyspark.RDD`
+            Training dataset: RDD of LabeledPoint. Labels should take values
+            {0, 1, ..., numClasses-1}.
+        numClasses : int
+            Number of classes for classification.
+        categoricalFeaturesInfo : dict
+            Map storing arity of categorical features. An entry (n -> k)
+            indicates that feature n is categorical with k categories
+            indexed from 0: {0, 1, ..., k-1}.
+        numTrees : int
+            Number of trees in the random forest.
+        featureSubsetStrategy : str, optional
+            Number of features to consider for splits at each node.
+            Supported values: "auto", "all", "sqrt", "log2", "onethird".
+            If "auto" is set, this parameter is set based on numTrees:
+            if numTrees == 1, set to "all";
+            if numTrees > 1 (forest) set to "sqrt".
+            (default: "auto")
+        impurity : str, optional
+            Criterion used for information gain calculation.
+            Supported values: "gini" or "entropy".
+            (default: "gini")
+        maxDepth : int, optional
+            Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1
+            means 1 internal node + 2 leaf nodes).
+            (default: 4)
+        maxBins : int, optional
+            Maximum number of bins used for splitting features.
+            (default: 32)
+        seed : int, Optional
+            Random seed for bootstrapping and choosing feature subsets.
+            Set as None to generate seed based on system time.
+            (default: None)
+
+        Returns
+        -------
+        :py:class:`RandomForestModel`
+            that can be used for prediction.
+
+        Examples
+        --------
         >>> from pyspark.mllib.regression import LabeledPoint
         >>> from pyspark.mllib.tree import RandomForest
         >>>
@@ -405,47 +428,55 @@ def trainClassifier(cls, data, numClasses, categoricalFeaturesInfo, numTrees,
                           maxDepth, maxBins, seed)
 
     @classmethod
-    @since("1.2.0")
     def trainRegressor(cls, data, categoricalFeaturesInfo, numTrees, featureSubsetStrategy="auto",
                        impurity="variance", maxDepth=4, maxBins=32, seed=None):
         """
         Train a random forest model for regression.
 
-        :param data:
-          Training dataset: RDD of LabeledPoint. Labels are real numbers.
-        :param categoricalFeaturesInfo:
-          Map storing arity of categorical features. An entry (n -> k)
-          indicates that feature n is categorical with k categories
-          indexed from 0: {0, 1, ..., k-1}.
-        :param numTrees:
-          Number of trees in the random forest.
-        :param featureSubsetStrategy:
-          Number of features to consider for splits at each node.
-          Supported values: "auto", "all", "sqrt", "log2", "onethird".
-          If "auto" is set, this parameter is set based on numTrees:
-          if numTrees == 1, set to "all";
-          if numTrees > 1 (forest) set to "onethird" for regression.
-          (default: "auto")
-        :param impurity:
-          Criterion used for information gain calculation.
-          The only supported value for regression is "variance".
-          (default: "variance")
-        :param maxDepth:
-          Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1
-          means 1 internal node + 2 leaf nodes).
-          (default: 4)
-        :param maxBins:
-          Maximum number of bins used for splitting features.
-          (default: 32)
-        :param seed:
-          Random seed for bootstrapping and choosing feature subsets.
-          Set as None to generate seed based on system time.
-          (default: None)
-        :return:
-          RandomForestModel that can be used for prediction.
-
-        Example usage:
-
+        .. versionadded:: 1.2.0
+
+        Parameters
+        ----------
+        data : :py:class:`pyspark.RDD`
+            Training dataset: RDD of LabeledPoint. Labels are real numbers.
+        categoricalFeaturesInfo : dict
+            Map storing arity of categorical features. An entry (n -> k)
+            indicates that feature n is categorical with k categories
+            indexed from 0: {0, 1, ..., k-1}.
+        numTrees : int
+            Number of trees in the random forest.
+        featureSubsetStrategy : str, optional
+            Number of features to consider for splits at each node.
+            Supported values: "auto", "all", "sqrt", "log2", "onethird".
+            If "auto" is set, this parameter is set based on numTrees:
+
+            - if numTrees == 1, set to "all";
+            - if numTrees > 1 (forest) set to "onethird" for regression.
+
+            (default: "auto")
+        impurity : str, optional
+            Criterion used for information gain calculation.
+            The only supported value for regression is "variance".
+            (default: "variance")
+        maxDepth : int, optional
+            Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1
+            means 1 internal node + 2 leaf nodes).
+            (default: 4)
+        maxBins : int, optional
+            Maximum number of bins used for splitting features.
+            (default: 32)
+        seed : int, optional
+            Random seed for bootstrapping and choosing feature subsets.
+            Set as None to generate seed based on system time.
+            (default: None)
+
+        Returns
+        -------
+        :py:class:`RandomForestModel`
+            that can be used for prediction.
+
+        Examples
+        --------
         >>> from pyspark.mllib.regression import LabeledPoint
         >>> from pyspark.mllib.tree import RandomForest
         >>> from pyspark.mllib.linalg import SparseVector
@@ -505,45 +536,51 @@ def _train(cls, data, algo, categoricalFeaturesInfo,
         return GradientBoostedTreesModel(model)
 
     @classmethod
-    @since("1.3.0")
     def trainClassifier(cls, data, categoricalFeaturesInfo,
                         loss="logLoss", numIterations=100, learningRate=0.1, maxDepth=3,
                         maxBins=32):
         """
         Train a gradient-boosted trees model for classification.
 
-        :param data:
-          Training dataset: RDD of LabeledPoint. Labels should take values
-          {0, 1}.
-        :param categoricalFeaturesInfo:
-          Map storing arity of categorical features. An entry (n -> k)
-          indicates that feature n is categorical with k categories
-          indexed from 0: {0, 1, ..., k-1}.
-        :param loss:
-          Loss function used for minimization during gradient boosting.
-          Supported values: "logLoss", "leastSquaresError",
-          "leastAbsoluteError".
-          (default: "logLoss")
-        :param numIterations:
-          Number of iterations of boosting.
-          (default: 100)
-        :param learningRate:
-          Learning rate for shrinking the contribution of each estimator.
-          The learning rate should be between in the interval (0, 1].
-          (default: 0.1)
-        :param maxDepth:
-          Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1
-          means 1 internal node + 2 leaf nodes).
-          (default: 3)
-        :param maxBins:
-          Maximum number of bins used for splitting features. DecisionTree
-          requires maxBins >= max categories.
-          (default: 32)
-        :return:
-          GradientBoostedTreesModel that can be used for prediction.
-
-        Example usage:
-
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        data : :py:class:`pyspark.RDD`
+            Training dataset: RDD of LabeledPoint. Labels should take values
+            {0, 1}.
+        categoricalFeaturesInfo : dict
+            Map storing arity of categorical features. An entry (n -> k)
+            indicates that feature n is categorical with k categories
+            indexed from 0: {0, 1, ..., k-1}.
+        loss : str, optional
+            Loss function used for minimization during gradient boosting.
+            Supported values: "logLoss", "leastSquaresError",
+            "leastAbsoluteError".
+            (default: "logLoss")
+        numIterations : int, optional
+            Number of iterations of boosting.
+            (default: 100)
+        learningRate : float, optional
+            Learning rate for shrinking the contribution of each estimator.
+            The learning rate should be between in the interval (0, 1].
+            (default: 0.1)
+        maxDepth : int, optional
+            Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1
+            means 1 internal node + 2 leaf nodes).
+            (default: 3)
+        maxBins : int, optional
+            Maximum number of bins used for splitting features. DecisionTree
+            requires maxBins >= max categories.
+            (default: 32)
+
+        Returns
+        -------
+        :py:class:`GradientBoostedTreesModel`
+            that can be used for prediction.
+
+        Examples
+        --------
         >>> from pyspark.mllib.regression import LabeledPoint
         >>> from pyspark.mllib.tree import GradientBoostedTrees
         >>>
@@ -574,44 +611,50 @@ def trainClassifier(cls, data, categoricalFeaturesInfo,
                           loss, numIterations, learningRate, maxDepth, maxBins)
 
     @classmethod
-    @since("1.3.0")
     def trainRegressor(cls, data, categoricalFeaturesInfo,
                        loss="leastSquaresError", numIterations=100, learningRate=0.1, maxDepth=3,
                        maxBins=32):
         """
         Train a gradient-boosted trees model for regression.
 
-        :param data:
-          Training dataset: RDD of LabeledPoint. Labels are real numbers.
-        :param categoricalFeaturesInfo:
-          Map storing arity of categorical features. An entry (n -> k)
-          indicates that feature n is categorical with k categories
-          indexed from 0: {0, 1, ..., k-1}.
-        :param loss:
-          Loss function used for minimization during gradient boosting.
-          Supported values: "logLoss", "leastSquaresError",
-          "leastAbsoluteError".
-          (default: "leastSquaresError")
-        :param numIterations:
-          Number of iterations of boosting.
-          (default: 100)
-        :param learningRate:
-          Learning rate for shrinking the contribution of each estimator.
-          The learning rate should be between in the interval (0, 1].
-          (default: 0.1)
-        :param maxDepth:
-          Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1
-          means 1 internal node + 2 leaf nodes).
-          (default: 3)
-        :param maxBins:
-          Maximum number of bins used for splitting features. DecisionTree
-          requires maxBins >= max categories.
-          (default: 32)
-        :return:
-          GradientBoostedTreesModel that can be used for prediction.
-
-        Example usage:
-
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        data :
+            Training dataset: RDD of LabeledPoint. Labels are real numbers.
+        categoricalFeaturesInfo : dict
+            Map storing arity of categorical features. An entry (n -> k)
+            indicates that feature n is categorical with k categories
+            indexed from 0: {0, 1, ..., k-1}.
+        loss : str, optional
+            Loss function used for minimization during gradient boosting.
+            Supported values: "logLoss", "leastSquaresError",
+            "leastAbsoluteError".
+            (default: "leastSquaresError")
+        numIterations : int, optional
+            Number of iterations of boosting.
+            (default: 100)
+        learningRate : float, optional
+            Learning rate for shrinking the contribution of each estimator.
+            The learning rate should be between in the interval (0, 1].
+            (default: 0.1)
+        maxDepth : int, optional
+            Maximum depth of tree (e.g. depth 0 means 1 leaf node, depth 1
+            means 1 internal node + 2 leaf nodes).
+            (default: 3)
+        maxBins : int, optional
+            Maximum number of bins used for splitting features. DecisionTree
+            requires maxBins >= max categories.
+            (default: 32)
+
+        Returns
+        -------
+        :py:class:`GradientBoostedTreesModel`
+            that can be used for prediction.
+
+        Examples
+        --------
         >>> from pyspark.mllib.regression import LabeledPoint
         >>> from pyspark.mllib.tree import GradientBoostedTrees
         >>> from pyspark.mllib.linalg import SparseVector
diff --git a/python/pyspark/mllib/util.py b/python/pyspark/mllib/util.py
index a0be29a82e3dc..68feb9563852c 100644
--- a/python/pyspark/mllib/util.py
+++ b/python/pyspark/mllib/util.py
@@ -65,7 +65,6 @@ def _convert_labeled_point_to_libsvm(p):
         return " ".join(items)
 
     @staticmethod
-    @since("1.0.0")
     def loadLibSVMFile(sc, path, numFeatures=-1, minPartitions=None):
         """
         Loads labeled data in the LIBSVM format into an RDD of
@@ -79,20 +78,33 @@ def loadLibSVMFile(sc, path, numFeatures=-1, minPartitions=None):
         method parses each line into a LabeledPoint, where the feature
         indices are converted to zero-based.
 
-        :param sc: Spark context
-        :param path: file or directory path in any Hadoop-supported file
-                     system URI
-        :param numFeatures: number of features, which will be determined
-                            from the input data if a nonpositive value
-                            is given. This is useful when the dataset is
-                            already split into multiple files and you
-                            want to load them separately, because some
-                            features may not present in certain files,
-                            which leads to inconsistent feature
-                            dimensions.
-        :param minPartitions: min number of partitions
-        :return: labeled data stored as an RDD of LabeledPoint
-
+        .. versionadded:: 1.0.0
+
+        Parameters
+        ----------
+        sc : :py:class:`pyspark.SparkContext`
+            Spark context
+        path : str
+            file or directory path in any Hadoop-supported file system URI
+        numFeatures : int, optional
+            number of features, which will be determined
+            from the input data if a nonpositive value
+            is given. This is useful when the dataset is
+            already split into multiple files and you
+            want to load them separately, because some
+            features may not present in certain files,
+            which leads to inconsistent feature
+            dimensions.
+        minPartitions : int, optional
+            min number of partitions
+
+        Returns
+        -------
+        :py:class:`pyspark.RDD`
+            labeled data stored as an RDD of LabeledPoint
+
+        Examples
+        --------
         >>> from tempfile import NamedTemporaryFile
         >>> from pyspark.mllib.util import MLUtils
         >>> from pyspark.mllib.regression import LabeledPoint
@@ -118,14 +130,21 @@ def loadLibSVMFile(sc, path, numFeatures=-1, minPartitions=None):
         return parsed.map(lambda x: LabeledPoint(x[0], Vectors.sparse(numFeatures, x[1], x[2])))
 
     @staticmethod
-    @since("1.0.0")
     def saveAsLibSVMFile(data, dir):
         """
         Save labeled data in LIBSVM format.
 
-        :param data: an RDD of LabeledPoint to be saved
-        :param dir: directory to save the data
+        .. versionadded:: 1.0.0
+
+        Parameters
+        ----------
+        data : :py:class:`pyspark.RDD`
+            an RDD of LabeledPoint to be saved
+        dir : str
+            directory to save the data
 
+        Examples
+        --------
         >>> from tempfile import NamedTemporaryFile
         >>> from fileinput import input
         >>> from pyspark.mllib.regression import LabeledPoint
@@ -143,17 +162,28 @@ def saveAsLibSVMFile(data, dir):
         lines.saveAsTextFile(dir)
 
     @staticmethod
-    @since("1.1.0")
     def loadLabeledPoints(sc, path, minPartitions=None):
         """
         Load labeled points saved using RDD.saveAsTextFile.
 
-        :param sc: Spark context
-        :param path: file or directory path in any Hadoop-supported file
-                     system URI
-        :param minPartitions: min number of partitions
-        :return: labeled data stored as an RDD of LabeledPoint
+        .. versionadded:: 1.0.0
+
+        Parameters
+        ----------
+        sc : :py:class:`pyspark.SparkContext`
+            Spark context
+        path : str
+            file or directory path in any Hadoop-supported file system URI
+        minPartitions : int, optional
+            min number of partitions
 
+        Returns
+        -------
+        :py:class:`pyspark.RDD`
+            labeled data stored as an RDD of LabeledPoint
+
+        Examples
+        --------
         >>> from tempfile import NamedTemporaryFile
         >>> from pyspark.mllib.util import MLUtils
         >>> from pyspark.mllib.regression import LabeledPoint
@@ -193,7 +223,6 @@ def loadVectors(sc, path):
         return callMLlibFunc("loadVectors", sc, path)
 
     @staticmethod
-    @since("2.0.0")
     def convertVectorColumnsToML(dataset, *cols):
         """
         Converts vector columns in an input DataFrame from the
@@ -201,16 +230,26 @@ def convertVectorColumnsToML(dataset, *cols):
         :py:class:`pyspark.ml.linalg.Vector` type under the `spark.ml`
         package.
 
-        :param dataset:
-          input dataset
-        :param cols:
-          a list of vector columns to be converted.
-          New vector columns will be ignored. If unspecified, all old
-          vector columns will be converted excepted nested ones.
-        :return:
-          the input dataset with old vector columns converted to the
-          new vector type
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            input dataset
+        \\*cols : str
+            Vector columns to be converted.
 
+            New vector columns will be ignored. If unspecified, all old
+            vector columns will be converted excepted nested ones.
+
+        Returns
+        -------
+        :py:class:`pyspark.sql.DataFrame`
+            the input dataset with old vector columns converted to the
+            new vector type
+
+        Examples
+        --------
         >>> import pyspark
         >>> from pyspark.mllib.linalg import Vectors
         >>> from pyspark.mllib.util import MLUtils
@@ -233,7 +272,6 @@ def convertVectorColumnsToML(dataset, *cols):
         return callMLlibFunc("convertVectorColumnsToML", dataset, list(cols))
 
     @staticmethod
-    @since("2.0.0")
     def convertVectorColumnsFromML(dataset, *cols):
         """
         Converts vector columns in an input DataFrame to the
@@ -241,16 +279,26 @@ def convertVectorColumnsFromML(dataset, *cols):
         :py:class:`pyspark.ml.linalg.Vector` type under the `spark.ml`
         package.
 
-        :param dataset:
-          input dataset
-        :param cols:
-          a list of vector columns to be converted.
-          Old vector columns will be ignored. If unspecified, all new
-          vector columns will be converted except nested ones.
-        :return:
-          the input dataset with new vector columns converted to the
-          old vector type
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            input dataset
+        \\*cols : str
+            Vector columns to be converted.
+
+            Old vector columns will be ignored. If unspecified, all new
+            vector columns will be converted except nested ones.
+
+        Returns
+        -------
+        :py:class:`pyspark.sql.DataFrame`
+            the input dataset with new vector columns converted to the
+            old vector type
 
+        Examples
+        --------
         >>> import pyspark
         >>> from pyspark.ml.linalg import Vectors
         >>> from pyspark.mllib.util import MLUtils
@@ -273,7 +321,6 @@ def convertVectorColumnsFromML(dataset, *cols):
         return callMLlibFunc("convertVectorColumnsFromML", dataset, list(cols))
 
     @staticmethod
-    @since("2.0.0")
     def convertMatrixColumnsToML(dataset, *cols):
         """
         Converts matrix columns in an input DataFrame from the
@@ -281,16 +328,26 @@ def convertMatrixColumnsToML(dataset, *cols):
         :py:class:`pyspark.ml.linalg.Matrix` type under the `spark.ml`
         package.
 
-        :param dataset:
-          input dataset
-        :param cols:
-          a list of matrix columns to be converted.
-          New matrix columns will be ignored. If unspecified, all old
-          matrix columns will be converted excepted nested ones.
-        :return:
-          the input dataset with old matrix columns converted to the
-          new matrix type
+        .. versionadded:: 2.0.0
 
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            input dataset
+        \\*cols : str
+            Matrix columns to be converted.
+
+            New matrix columns will be ignored. If unspecified, all old
+            matrix columns will be converted excepted nested ones.
+
+        Returns
+        -------
+        :py:class:`pyspark.sql.DataFrame`
+            the input dataset with old matrix columns converted to the
+            new matrix type
+
+        Examples
+        --------
         >>> import pyspark
         >>> from pyspark.mllib.linalg import Matrices
         >>> from pyspark.mllib.util import MLUtils
@@ -313,7 +370,6 @@ def convertMatrixColumnsToML(dataset, *cols):
         return callMLlibFunc("convertMatrixColumnsToML", dataset, list(cols))
 
     @staticmethod
-    @since("2.0.0")
     def convertMatrixColumnsFromML(dataset, *cols):
         """
         Converts matrix columns in an input DataFrame to the
@@ -321,16 +377,26 @@ def convertMatrixColumnsFromML(dataset, *cols):
         :py:class:`pyspark.ml.linalg.Matrix` type under the `spark.ml`
         package.
 
-        :param dataset:
-          input dataset
-        :param cols:
-          a list of matrix columns to be converted.
-          Old matrix columns will be ignored. If unspecified, all new
-          matrix columns will be converted except nested ones.
-        :return:
-          the input dataset with new matrix columns converted to the
-          old matrix type
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        dataset : :py:class:`pyspark.sql.DataFrame`
+            input dataset
+        \\*cols : str
+            Matrix columns to be converted.
+
+            Old matrix columns will be ignored. If unspecified, all new
+            matrix columns will be converted except nested ones.
 
+        Returns
+        -------
+        :py:class:`pyspark.sql.DataFrame`
+            the input dataset with new matrix columns converted to the
+            old matrix type
+
+        Examples
+        --------
         >>> import pyspark
         >>> from pyspark.ml.linalg import Matrices
         >>> from pyspark.mllib.util import MLUtils
@@ -370,10 +436,14 @@ def save(self, sc, path):
 
         The model may be loaded using :py:meth:`Loader.load`.
 
-        :param sc: Spark context used to save model data.
-        :param path: Path specifying the directory in which to save
-                     this model. If the directory already exists,
-                     this method throws an exception.
+        Parameters
+        ----------
+        sc : :py:class:`pyspark.SparkContext`
+            Spark context used to save model data.
+        path : str
+            Path specifying the directory in which to save
+            this model. If the directory already exists,
+            this method throws an exception.
         """
         raise NotImplementedError
 
@@ -410,10 +480,17 @@ def load(cls, sc, path):
         Load a model from the given path. The model should have been
         saved using :py:meth:`Saveable.save`.
 
-        :param sc: Spark context used for loading model files.
-        :param path: Path specifying the directory to which the model
-                     was saved.
-        :return: model instance
+        Parameters
+        ----------
+        sc : :py:class:`pyspark.SparkContext`
+            Spark context used for loading model files.
+        path : str
+            Path specifying the directory to which the model was saved.
+
+        Returns
+        -------
+        object
+            model instance
         """
         raise NotImplementedError
 
@@ -463,20 +540,33 @@ class LinearDataGenerator(object):
     """
 
     @staticmethod
-    @since("1.5.0")
     def generateLinearInput(intercept, weights, xMean, xVariance,
                             nPoints, seed, eps):
         """
-        :param: intercept bias factor, the term c in X'w + c
-        :param: weights   feature vector, the term w in X'w + c
-        :param: xMean     Point around which the data X is centered.
-        :param: xVariance Variance of the given data
-        :param: nPoints   Number of points to be generated
-        :param: seed      Random Seed
-        :param: eps       Used to scale the noise. If eps is set high,
-                          the amount of gaussian noise added is more.
-
-        Returns a list of LabeledPoints of length nPoints
+        .. versionadded:: 1.5.0
+
+        Parameters
+        ----------
+        intercept : float
+            bias factor, the term c in X'w + c
+        weights : :py:class:`pyspark.mllib.linalg.Vector` or convertible
+            feature vector, the term w in X'w + c
+        xMean : :py:class:`pyspark.mllib.linalg.Vector` or convertible
+            Point around which the data X is centered.
+        xVariance : :py:class:`pyspark.mllib.linalg.Vector` or convertible
+            Variance of the given data
+        nPoints : int
+            Number of points to be generated
+        seed : int
+            Random Seed
+        eps : float
+            Used to scale the noise. If eps is set high,
+            the amount of gaussian noise added is more.
+
+        Returns
+        -------
+        list
+            of :py:class:`pyspark.mllib.regression.LabeledPoints` of length nPoints
         """
         weights = [float(weight) for weight in weights]
         xMean = [float(mean) for mean in xMean]

From d1b4f06179f3f7838ae1ce7a6244b2ba75134e41 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 25 Nov 2020 02:02:32 +0000
Subject: [PATCH 0564/1009] [SPARK-33494][SQL][AQE] Do not use local shuffle
 reader for repartition

### What changes were proposed in this pull request?

This PR updates `ShuffleExchangeExec` to carry more information about how much we can change the partitioning. For `repartition(col)`, we should preserve the user-specified partitioning and don't apply the AQE local shuffle reader.

### Why are the changes needed?

Similar to `repartition(number, col)`, we should respect the user-specified partitioning.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

a new test

Closes #30432 from cloud-fan/aqe.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/SparkStrategies.scala | 14 ++++----
 .../adaptive/CoalesceShufflePartitions.scala  |  9 +++++-
 .../adaptive/OptimizeLocalShuffleReader.scala | 11 +++++--
 .../exchange/ShuffleExchangeExec.scala        | 28 +++++++++++-----
 .../sql-tests/results/explain-aqe.sql.out     | 24 +++++++-------
 .../sql-tests/results/explain.sql.out         | 32 +++++++++----------
 .../sql/SparkSessionExtensionSuite.scala      |  6 ++--
 .../adaptive/AdaptiveQueryExecSuite.scala     | 10 ++++++
 8 files changed, 86 insertions(+), 48 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index e9b1aa81895f5..f5f77b03c2b1b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.streaming.{InternalOutputModes, StreamingRe
 import org.apache.spark.sql.execution.aggregate.AggUtils
 import org.apache.spark.sql.execution.columnar.{InMemoryRelation, InMemoryTableScanExec}
 import org.apache.spark.sql.execution.command._
-import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
+import org.apache.spark.sql.execution.exchange.{REPARTITION, REPARTITION_WITH_NUM, ShuffleExchangeExec}
 import org.apache.spark.sql.execution.python._
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.sources.MemoryPlan
@@ -670,7 +670,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       case logical.Repartition(numPartitions, shuffle, child) =>
         if (shuffle) {
           ShuffleExchangeExec(RoundRobinPartitioning(numPartitions),
-            planLater(child), noUserSpecifiedNumPartition = false) :: Nil
+            planLater(child), REPARTITION_WITH_NUM) :: Nil
         } else {
           execution.CoalesceExec(numPartitions, planLater(child)) :: Nil
         }
@@ -703,10 +703,12 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       case r: logical.Range =>
         execution.RangeExec(r) :: Nil
       case r: logical.RepartitionByExpression =>
-        exchange.ShuffleExchangeExec(
-          r.partitioning,
-          planLater(r.child),
-          noUserSpecifiedNumPartition = r.optNumPartitions.isEmpty) :: Nil
+        val shuffleOrigin = if (r.optNumPartitions.isEmpty) {
+          REPARTITION
+        } else {
+          REPARTITION_WITH_NUM
+        }
+        exchange.ShuffleExchangeExec(r.partitioning, planLater(r.child), shuffleOrigin) :: Nil
       case ExternalRDD(outputObjAttr, rdd) => ExternalRDDScanExec(outputObjAttr, rdd) :: Nil
       case r: LogicalRDD =>
         RDDScanExec(r.output, r.rdd, "ExistingRDD", r.outputPartitioning, r.outputOrdering) :: Nil
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
index 89ff528d7a188..0cf3ab0cca49a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
@@ -18,8 +18,10 @@
 package org.apache.spark.sql.execution.adaptive
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.plans.physical.SinglePartition
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.exchange.{ENSURE_REQUIREMENTS, REPARTITION, ShuffleExchangeLike}
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -47,7 +49,7 @@ case class CoalesceShufflePartitions(session: SparkSession) extends Rule[SparkPl
     val shuffleStages = collectShuffleStages(plan)
     // ShuffleExchanges introduced by repartition do not support changing the number of partitions.
     // We change the number of partitions in the stage only if all the ShuffleExchanges support it.
-    if (!shuffleStages.forall(_.shuffle.canChangeNumPartitions)) {
+    if (!shuffleStages.forall(s => supportCoalesce(s.shuffle))) {
       plan
     } else {
       // `ShuffleQueryStageExec#mapStats` returns None when the input RDD has 0 partitions,
@@ -82,4 +84,9 @@ case class CoalesceShufflePartitions(session: SparkSession) extends Rule[SparkPl
       }
     }
   }
+
+  private def supportCoalesce(s: ShuffleExchangeLike): Boolean = {
+    s.outputPartitioning != SinglePartition &&
+      (s.shuffleOrigin == ENSURE_REQUIREMENTS || s.shuffleOrigin == REPARTITION)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeLocalShuffleReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeLocalShuffleReader.scala
index 8db2827beaf43..8f57947cb6396 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeLocalShuffleReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeLocalShuffleReader.scala
@@ -18,9 +18,10 @@
 package org.apache.spark.sql.execution.adaptive
 
 import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide}
+import org.apache.spark.sql.catalyst.plans.physical.SinglePartition
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution._
-import org.apache.spark.sql.execution.exchange.{EnsureRequirements, ShuffleExchangeExec}
+import org.apache.spark.sql.execution.exchange.{ENSURE_REQUIREMENTS, EnsureRequirements, ShuffleExchangeExec, ShuffleExchangeLike}
 import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec
 import org.apache.spark.sql.internal.SQLConf
 
@@ -136,9 +137,13 @@ object OptimizeLocalShuffleReader extends Rule[SparkPlan] {
 
   def canUseLocalShuffleReader(plan: SparkPlan): Boolean = plan match {
     case s: ShuffleQueryStageExec =>
-      s.shuffle.canChangeNumPartitions && s.mapStats.isDefined
+      s.mapStats.isDefined && supportLocalReader(s.shuffle)
     case CustomShuffleReaderExec(s: ShuffleQueryStageExec, partitionSpecs) =>
-      s.shuffle.canChangeNumPartitions && s.mapStats.isDefined && partitionSpecs.nonEmpty
+      s.mapStats.isDefined && partitionSpecs.nonEmpty && supportLocalReader(s.shuffle)
     case _ => false
   }
+
+  private def supportLocalReader(s: ShuffleExchangeLike): Boolean = {
+    s.outputPartitioning != SinglePartition && s.shuffleOrigin == ENSURE_REQUIREMENTS
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
index 6af4b098bee2f..affa92de693af 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
@@ -57,9 +57,9 @@ trait ShuffleExchangeLike extends Exchange {
   def numPartitions: Int
 
   /**
-   * Returns whether the shuffle partition number can be changed.
+   * The origin of this shuffle operator.
    */
-  def canChangeNumPartitions: Boolean
+  def shuffleOrigin: ShuffleOrigin
 
   /**
    * The asynchronous job that materializes the shuffle.
@@ -77,18 +77,30 @@ trait ShuffleExchangeLike extends Exchange {
   def runtimeStatistics: Statistics
 }
 
+// Describes where the shuffle operator comes from.
+sealed trait ShuffleOrigin
+
+// Indicates that the shuffle operator was added by the internal `EnsureRequirements` rule. It
+// means that the shuffle operator is used to ensure internal data partitioning requirements and
+// Spark is free to optimize it as long as the requirements are still ensured.
+case object ENSURE_REQUIREMENTS extends ShuffleOrigin
+
+// Indicates that the shuffle operator was added by the user-specified repartition operator. Spark
+// can still optimize it via changing shuffle partition number, as data partitioning won't change.
+case object REPARTITION extends ShuffleOrigin
+
+// Indicates that the shuffle operator was added by the user-specified repartition operator with
+// a certain partition number. Spark can't optimize it.
+case object REPARTITION_WITH_NUM extends ShuffleOrigin
+
 /**
  * Performs a shuffle that will result in the desired partitioning.
  */
 case class ShuffleExchangeExec(
     override val outputPartitioning: Partitioning,
     child: SparkPlan,
-    noUserSpecifiedNumPartition: Boolean = true) extends ShuffleExchangeLike {
-
-  // If users specify the num partitions via APIs like `repartition`, we shouldn't change it.
-  // For `SinglePartition`, it requires exactly one partition and we can't change it either.
-  override def canChangeNumPartitions: Boolean =
-    noUserSpecifiedNumPartition && outputPartitioning != SinglePartition
+    shuffleOrigin: ShuffleOrigin = ENSURE_REQUIREMENTS)
+  extends ShuffleExchangeLike {
 
   private lazy val writeMetrics =
     SQLShuffleWriteMetricsReporter.createShuffleWriteMetrics(sparkContext)
diff --git a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out
index 567e0eabe1805..578b0a807fc52 100644
--- a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 23
+-- Number of queries: 24
 
 
 -- !query
@@ -67,10 +67,10 @@ Aggregate [sum(distinct cast(val#x as bigint)) AS sum(DISTINCT val)#xL]
 == Physical Plan ==
 AdaptiveSparkPlan isFinalPlan=false
 +- HashAggregate(keys=[], functions=[sum(distinct cast(val#x as bigint)#xL)], output=[sum(DISTINCT val)#xL])
-   +- Exchange SinglePartition, true, [id=#x]
+   +- Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#x]
       +- HashAggregate(keys=[], functions=[partial_sum(distinct cast(val#x as bigint)#xL)], output=[sum#xL])
          +- HashAggregate(keys=[cast(val#x as bigint)#xL], functions=[], output=[cast(val#x as bigint)#xL])
-            +- Exchange hashpartitioning(cast(val#x as bigint)#xL, 4), true, [id=#x]
+            +- Exchange hashpartitioning(cast(val#x as bigint)#xL, 4), ENSURE_REQUIREMENTS, [id=#x]
                +- HashAggregate(keys=[cast(val#x as bigint) AS cast(val#x as bigint)#xL], functions=[], output=[cast(val#x as bigint)#xL])
                   +- FileScan parquet default.explain_temp1[val#x] Batched: true, DataFilters: [], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/explain_temp1], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<val:int>
 
@@ -116,7 +116,7 @@ Results [2]: [key#x, max#x]
 
 (4) Exchange
 Input [2]: [key#x, max#x]
-Arguments: hashpartitioning(key#x, 4), true, [id=#x]
+Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x]
 
 (5) HashAggregate
 Input [2]: [key#x, max#x]
@@ -127,7 +127,7 @@ Results [2]: [key#x, max(val#x)#x AS max(val)#x]
 
 (6) Exchange
 Input [2]: [key#x, max(val)#x]
-Arguments: rangepartitioning(key#x ASC NULLS FIRST, 4), true, [id=#x]
+Arguments: rangepartitioning(key#x ASC NULLS FIRST, 4), ENSURE_REQUIREMENTS, [id=#x]
 
 (7) Sort
 Input [2]: [key#x, max(val)#x]
@@ -179,7 +179,7 @@ Results [2]: [key#x, max#x]
 
 (4) Exchange
 Input [2]: [key#x, max#x]
-Arguments: hashpartitioning(key#x, 4), true, [id=#x]
+Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x]
 
 (5) HashAggregate
 Input [2]: [key#x, max#x]
@@ -254,7 +254,7 @@ Results [2]: [key#x, val#x]
 
 (7) Exchange
 Input [2]: [key#x, val#x]
-Arguments: hashpartitioning(key#x, val#x, 4), true, [id=#x]
+Arguments: hashpartitioning(key#x, val#x, 4), ENSURE_REQUIREMENTS, [id=#x]
 
 (8) HashAggregate
 Input [2]: [key#x, val#x]
@@ -576,7 +576,7 @@ Results [2]: [key#x, max#x]
 
 (4) Exchange
 Input [2]: [key#x, max#x]
-Arguments: hashpartitioning(key#x, 4), true, [id=#x]
+Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x]
 
 (5) HashAggregate
 Input [2]: [key#x, max#x]
@@ -605,7 +605,7 @@ Results [2]: [key#x, max#x]
 
 (9) Exchange
 Input [2]: [key#x, max#x]
-Arguments: hashpartitioning(key#x, 4), true, [id=#x]
+Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x]
 
 (10) HashAggregate
 Input [2]: [key#x, max#x]
@@ -687,7 +687,7 @@ Results [3]: [count#xL, sum#xL, count#xL]
 
 (3) Exchange
 Input [3]: [count#xL, sum#xL, count#xL]
-Arguments: SinglePartition, true, [id=#x]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x]
 
 (4) HashAggregate
 Input [3]: [count#xL, sum#xL, count#xL]
@@ -732,7 +732,7 @@ Results [2]: [key#x, buf#x]
 
 (3) Exchange
 Input [2]: [key#x, buf#x]
-Arguments: hashpartitioning(key#x, 4), true, [id=#x]
+Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x]
 
 (4) ObjectHashAggregate
 Input [2]: [key#x, buf#x]
@@ -783,7 +783,7 @@ Results [2]: [key#x, min#x]
 
 (4) Exchange
 Input [2]: [key#x, min#x]
-Arguments: hashpartitioning(key#x, 4), true, [id=#x]
+Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x]
 
 (5) Sort
 Input [2]: [key#x, min#x]
diff --git a/sql/core/src/test/resources/sql-tests/results/explain.sql.out b/sql/core/src/test/resources/sql-tests/results/explain.sql.out
index fcd69549f2c6e..886b98e538d28 100644
--- a/sql/core/src/test/resources/sql-tests/results/explain.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/explain.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 23
+-- Number of queries: 24
 
 
 -- !query
@@ -66,10 +66,10 @@ Aggregate [sum(distinct cast(val#x as bigint)) AS sum(DISTINCT val)#xL]
 
 == Physical Plan ==
 *HashAggregate(keys=[], functions=[sum(distinct cast(val#x as bigint)#xL)], output=[sum(DISTINCT val)#xL])
-+- Exchange SinglePartition, true, [id=#x]
++- Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#x]
    +- *HashAggregate(keys=[], functions=[partial_sum(distinct cast(val#x as bigint)#xL)], output=[sum#xL])
       +- *HashAggregate(keys=[cast(val#x as bigint)#xL], functions=[], output=[cast(val#x as bigint)#xL])
-         +- Exchange hashpartitioning(cast(val#x as bigint)#xL, 4), true, [id=#x]
+         +- Exchange hashpartitioning(cast(val#x as bigint)#xL, 4), ENSURE_REQUIREMENTS, [id=#x]
             +- *HashAggregate(keys=[cast(val#x as bigint) AS cast(val#x as bigint)#xL], functions=[], output=[cast(val#x as bigint)#xL])
                +- *ColumnarToRow
                   +- FileScan parquet default.explain_temp1[val#x] Batched: true, DataFilters: [], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/explain_temp1], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<val:int>
@@ -119,7 +119,7 @@ Results [2]: [key#x, max#x]
 
 (5) Exchange
 Input [2]: [key#x, max#x]
-Arguments: hashpartitioning(key#x, 4), true, [id=#x]
+Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x]
 
 (6) HashAggregate [codegen id : 2]
 Input [2]: [key#x, max#x]
@@ -130,7 +130,7 @@ Results [2]: [key#x, max(val#x)#x AS max(val)#x]
 
 (7) Exchange
 Input [2]: [key#x, max(val)#x]
-Arguments: rangepartitioning(key#x ASC NULLS FIRST, 4), true, [id=#x]
+Arguments: rangepartitioning(key#x ASC NULLS FIRST, 4), ENSURE_REQUIREMENTS, [id=#x]
 
 (8) Sort [codegen id : 3]
 Input [2]: [key#x, max(val)#x]
@@ -181,7 +181,7 @@ Results [2]: [key#x, max#x]
 
 (5) Exchange
 Input [2]: [key#x, max#x]
-Arguments: hashpartitioning(key#x, 4), true, [id=#x]
+Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x]
 
 (6) HashAggregate [codegen id : 2]
 Input [2]: [key#x, max#x]
@@ -259,7 +259,7 @@ Results [2]: [key#x, val#x]
 
 (9) Exchange
 Input [2]: [key#x, val#x]
-Arguments: hashpartitioning(key#x, val#x, 4), true, [id=#x]
+Arguments: hashpartitioning(key#x, val#x, 4), ENSURE_REQUIREMENTS, [id=#x]
 
 (10) HashAggregate [codegen id : 4]
 Input [2]: [key#x, val#x]
@@ -452,7 +452,7 @@ Results [1]: [max#x]
 
 (9) Exchange
 Input [1]: [max#x]
-Arguments: SinglePartition, true, [id=#x]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x]
 
 (10) HashAggregate [codegen id : 2]
 Input [1]: [max#x]
@@ -498,7 +498,7 @@ Results [1]: [max#x]
 
 (16) Exchange
 Input [1]: [max#x]
-Arguments: SinglePartition, true, [id=#x]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x]
 
 (17) HashAggregate [codegen id : 2]
 Input [1]: [max#x]
@@ -580,7 +580,7 @@ Results [1]: [max#x]
 
 (9) Exchange
 Input [1]: [max#x]
-Arguments: SinglePartition, true, [id=#x]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x]
 
 (10) HashAggregate [codegen id : 2]
 Input [1]: [max#x]
@@ -626,7 +626,7 @@ Results [2]: [sum#x, count#xL]
 
 (16) Exchange
 Input [2]: [sum#x, count#xL]
-Arguments: SinglePartition, true, [id=#x]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x]
 
 (17) HashAggregate [codegen id : 2]
 Input [2]: [sum#x, count#xL]
@@ -690,7 +690,7 @@ Results [2]: [sum#x, count#xL]
 
 (7) Exchange
 Input [2]: [sum#x, count#xL]
-Arguments: SinglePartition, true, [id=#x]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x]
 
 (8) HashAggregate [codegen id : 2]
 Input [2]: [sum#x, count#xL]
@@ -810,7 +810,7 @@ Results [2]: [key#x, max#x]
 
 (5) Exchange
 Input [2]: [key#x, max#x]
-Arguments: hashpartitioning(key#x, 4), true, [id=#x]
+Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x]
 
 (6) HashAggregate [codegen id : 4]
 Input [2]: [key#x, max#x]
@@ -901,7 +901,7 @@ Results [3]: [count#xL, sum#xL, count#xL]
 
 (4) Exchange
 Input [3]: [count#xL, sum#xL, count#xL]
-Arguments: SinglePartition, true, [id=#x]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x]
 
 (5) HashAggregate [codegen id : 2]
 Input [3]: [count#xL, sum#xL, count#xL]
@@ -945,7 +945,7 @@ Results [2]: [key#x, buf#x]
 
 (4) Exchange
 Input [2]: [key#x, buf#x]
-Arguments: hashpartitioning(key#x, 4), true, [id=#x]
+Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x]
 
 (5) ObjectHashAggregate
 Input [2]: [key#x, buf#x]
@@ -995,7 +995,7 @@ Results [2]: [key#x, min#x]
 
 (5) Exchange
 Input [2]: [key#x, min#x]
-Arguments: hashpartitioning(key#x, 4), true, [id=#x]
+Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x]
 
 (6) Sort [codegen id : 2]
 Input [2]: [key#x, min#x]
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
index 951b72a863483..12abd31b99e93 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, QueryStageExec}
-import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, BroadcastExchangeLike, ShuffleExchangeExec, ShuffleExchangeLike}
+import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, BroadcastExchangeLike, ShuffleExchangeExec, ShuffleExchangeLike, ShuffleOrigin}
 import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.COLUMN_BATCH_SIZE
@@ -766,7 +766,9 @@ case class PreRuleReplaceAddWithBrokenVersion() extends Rule[SparkPlan] {
 case class MyShuffleExchangeExec(delegate: ShuffleExchangeExec) extends ShuffleExchangeLike {
   override def numMappers: Int = delegate.numMappers
   override def numPartitions: Int = delegate.numPartitions
-  override def canChangeNumPartitions: Boolean = delegate.canChangeNumPartitions
+  override def shuffleOrigin: ShuffleOrigin = {
+    delegate.shuffleOrigin
+  }
   override def mapOutputStatisticsFuture: Future[MapOutputStatistics] =
     delegate.mapOutputStatisticsFuture
   override def getShuffleRDD(partitionSpecs: Array[ShufflePartitionSpec]): RDD[_] =
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index 38a323b1c057e..758965954b374 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -1307,4 +1307,14 @@ class AdaptiveQueryExecSuite
       spark.listenerManager.unregister(listener)
     }
   }
+
+  test("SPARK-33494: Do not use local shuffle reader for repartition") {
+    withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
+      val df = spark.table("testData").repartition('key)
+      df.collect()
+      // local shuffle reader breaks partitioning and shouldn't be used for repartition operation
+      // which is specified by users.
+      checkNumLocalShuffleReaders(df.queryExecution.executedPlan, numShufflesWithoutLocalReader = 1)
+    }
+  }
 }

From b7f034d8dc17b9ae5eced387d20f37b9e3e58901 Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Wed, 25 Nov 2020 03:04:04 +0000
Subject: [PATCH 0565/1009] [SPARK-33543][SQL] Migrate SHOW COLUMNS command to
 use UnresolvedTableOrView to resolve the identifier

### What changes were proposed in this pull request?

This PR proposes to migrate `SHOW COLUMNS` to use `UnresolvedTableOrView` to resolve the table/view identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing).

Note that `SHOW COLUMNS` is not yet supported for v2 tables.

### Why are the changes needed?

To use `UnresolvedTableOrView` for table/view resolution. Note that `ShowColumnsCommand` internally resolves to a temp view first, so there is no resolution behavior change with this PR.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Updated existing tests.

Closes #30490 from imback82/show_columns.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/parser/AstBuilder.scala | 13 ++++++++++---
 .../catalyst/plans/logical/statements.scala    |  7 -------
 .../catalyst/plans/logical/v2Commands.scala    | 10 ++++++++++
 .../sql/catalyst/parser/DDLParserSuite.scala   |  8 ++++----
 .../analysis/ResolveSessionCatalog.scala       | 18 +++---------------
 .../datasources/v2/DataSourceV2Strategy.scala  |  3 +++
 .../sql-tests/results/show_columns.sql.out     | 16 ++++++++--------
 .../sql/connector/DataSourceV2SQLSuite.scala   | 11 +++--------
 .../spark/sql/execution/command/DDLSuite.scala | 11 +++++++++++
 9 files changed, 52 insertions(+), 45 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index a4298abd211b3..5f8394c525949 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3400,7 +3400,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
 
   /**
    * A command for users to list the column names for a table.
-   * This function creates a [[ShowColumnsStatement]] logical plan.
+   * This function creates a [[ShowColumns]] logical plan.
    *
    * The syntax of using this command in SQL is:
    * {{{
@@ -3409,9 +3409,16 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    * }}}
    */
   override def visitShowColumns(ctx: ShowColumnsContext): LogicalPlan = withOrigin(ctx) {
-    val table = visitMultipartIdentifier(ctx.table)
+    val nameParts = visitMultipartIdentifier(ctx.table)
     val namespace = Option(ctx.ns).map(visitMultipartIdentifier)
-    ShowColumnsStatement(table, namespace)
+    // Use namespace only if table name doesn't specify it. If namespace is already specified
+    // in the table name, it's checked against the given namespace after table/view is resolved.
+    val tableName = if (namespace.isDefined && nameParts.length == 1) {
+      namespace.get ++ nameParts
+    } else {
+      nameParts
+    }
+    ShowColumns(UnresolvedTableOrView(tableName), namespace)
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
index 39bc5a5604b20..3660e8a95a7f6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
@@ -359,13 +359,6 @@ case class ShowPartitionsStatement(
     tableName: Seq[String],
     partitionSpec: Option[TablePartitionSpec]) extends ParsedStatement
 
-/**
- * A SHOW COLUMNS statement, as parsed from SQL
- */
-case class ShowColumnsStatement(
-    table: Seq[String],
-    namespace: Option[Seq[String]]) extends ParsedStatement
-
 /**
  * A SHOW CURRENT NAMESPACE statement, as parsed from SQL
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index a65b9fc59bd55..ebf41f6a6e304 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -671,6 +671,15 @@ case class ShowCreateTable(child: LogicalPlan, asSerde: Boolean = false) extends
   override def children: Seq[LogicalPlan] = child :: Nil
 }
 
+/**
+ * The logical plan of the SHOW COLUMN command.
+ */
+case class ShowColumns(
+    child: LogicalPlan,
+    namespace: Option[Seq[String]]) extends Command {
+  override def children: Seq[LogicalPlan] = child :: Nil
+}
+
 /**
  * The logical plan of the TRUNCATE TABLE command.
  */
@@ -679,3 +688,4 @@ case class TruncateTable(
     partitionSpec: Option[TablePartitionSpec]) extends Command {
   override def children: Seq[LogicalPlan] = child :: Nil
 }
+
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 997c642276bfb..cc3c824befb3e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -1643,13 +1643,13 @@ class DDLParserSuite extends AnalysisTest {
     val sql4 = "SHOW COLUMNS FROM db1.t1 IN db1"
 
     val parsed1 = parsePlan(sql1)
-    val expected1 = ShowColumnsStatement(Seq("t1"), None)
+    val expected1 = ShowColumns(UnresolvedTableOrView(Seq("t1")), None)
     val parsed2 = parsePlan(sql2)
-    val expected2 = ShowColumnsStatement(Seq("db1", "t1"), None)
+    val expected2 = ShowColumns(UnresolvedTableOrView(Seq("db1", "t1")), None)
     val parsed3 = parsePlan(sql3)
-    val expected3 = ShowColumnsStatement(Seq("t1"), Some(Seq("db1")))
+    val expected3 = ShowColumns(UnresolvedTableOrView(Seq("db1", "t1")), Some(Seq("db1")))
     val parsed4 = parsePlan(sql4)
-    val expected4 = ShowColumnsStatement(Seq("db1", "t1"), Some(Seq("db1")))
+    val expected4 = ShowColumns(UnresolvedTableOrView(Seq("db1", "t1")), Some(Seq("db1")))
 
     comparePlans(parsed1, expected1)
     comparePlans(parsed2, expected2)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 726099991a897..395f5efd5a52d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -467,25 +467,13 @@ class ResolveSessionCatalog(
         v1TableName.asTableIdentifier,
         partitionSpec)
 
-    case ShowColumnsStatement(tbl, ns) =>
-      if (ns.isDefined && ns.get.length > 1) {
-        throw new AnalysisException(
-          s"Namespace name should have only one part if specified: ${ns.get.quoted}")
-      }
-      // Use namespace only if table name doesn't specify it. If namespace is already specified
-      // in the table name, it's checked against the given namespace below.
-      val nameParts = if (ns.isDefined && tbl.length == 1) {
-        ns.get ++ tbl
-      } else {
-        tbl
-      }
-      val sql = "SHOW COLUMNS"
-      val v1TableName = parseTempViewOrV1Table(nameParts, sql).asTableIdentifier
+    case ShowColumns(ResolvedV1TableOrViewIdentifier(ident), ns) =>
+      val v1TableName = ident.asTableIdentifier
       val resolver = conf.resolver
       val db = ns match {
         case Some(db) if v1TableName.database.exists(!resolver(_, db.head)) =>
           throw new AnalysisException(
-            s"SHOW COLUMNS with conflicting databases: " +
+            "SHOW COLUMNS with conflicting databases: " +
               s"'${db.head}' != '${v1TableName.database.get}'")
         case _ => ns.map(_.head)
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 30d976524bfa8..eb0d7010041b9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -305,6 +305,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
     case TruncateTable(_: ResolvedTable, _) =>
       throw new AnalysisException("TRUNCATE TABLE is not supported for v2 tables.")
 
+    case ShowColumns(_: ResolvedTable, _) =>
+      throw new AnalysisException("SHOW COLUMNS is not supported for v2 tables.")
+
     case _ => Nil
   }
 }
diff --git a/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out b/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out
index 4f5db7f6c6b2f..6ddffb89987d8 100644
--- a/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out
@@ -93,8 +93,8 @@ SHOW COLUMNS IN badtable FROM showdb
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.catalyst.analysis.NoSuchTableException
-Table or view 'badtable' not found in database 'showdb';
+org.apache.spark.sql.AnalysisException
+Table or view not found: showdb.badtable; line 1 pos 0
 
 
 -- !query
@@ -129,8 +129,8 @@ SHOW COLUMNS IN showdb.showcolumn3
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.catalyst.analysis.NoSuchTableException
-Table or view 'showcolumn3' not found in database 'showdb';
+org.apache.spark.sql.AnalysisException
+Table or view not found: showdb.showcolumn3; line 1 pos 0
 
 
 -- !query
@@ -138,8 +138,8 @@ SHOW COLUMNS IN showcolumn3 FROM showdb
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.catalyst.analysis.NoSuchTableException
-Table or view 'showcolumn3' not found in database 'showdb';
+org.apache.spark.sql.AnalysisException
+Table or view not found: showdb.showcolumn3; line 1 pos 0
 
 
 -- !query
@@ -147,8 +147,8 @@ SHOW COLUMNS IN showcolumn4
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.catalyst.analysis.NoSuchTableException
-Table or view 'showcolumn4' not found in database 'showdb';
+org.apache.spark.sql.AnalysisException
+Table or view not found: showcolumn4; line 1 pos 0
 
 
 -- !query
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 9a3fa0c5bd3f4..222fa8ace4dca 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -2047,14 +2047,9 @@ class DataSourceV2SQLSuite
     withTable(t) {
       spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
 
-      testV1CommandSupportingTempView("SHOW COLUMNS", s"FROM $t")
-      testV1CommandSupportingTempView("SHOW COLUMNS", s"IN $t")
-
-      val e3 = intercept[AnalysisException] {
-        sql(s"SHOW COLUMNS FROM tbl IN testcat.ns1.ns2")
-      }
-      assert(e3.message.contains("Namespace name should have " +
-        "only one part if specified: testcat.ns1.ns2"))
+      testNotSupportedV2Command("SHOW COLUMNS", s"FROM $t")
+      testNotSupportedV2Command("SHOW COLUMNS", s"IN $t")
+      testNotSupportedV2Command("SHOW COLUMNS", "FROM tbl IN testcat.ns1.ns2")
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 07201f9f85b5d..4f79e71419a10 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -2266,6 +2266,17 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     }
   }
 
+  test("show columns - invalid db name") {
+    withTable("tbl") {
+      sql("CREATE TABLE tbl(col1 int, col2 string) USING parquet ")
+      val message = intercept[AnalysisException] {
+        sql("SHOW COLUMNS IN tbl FROM a.b.c")
+      }.getMessage
+      assert(message.contains(
+        "The namespace in session catalog must have exactly one name part: a.b.c.tbl"))
+    }
+  }
+
   test("SPARK-18009 calling toLocalIterator on commands") {
     import scala.collection.JavaConverters._
     val df = sql("show databases")

From edab094dda3d5acbc100d01bd98e0ab15d7b4178 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan.opensource@gmail.com>
Date: Wed, 25 Nov 2020 13:12:20 +0900
Subject: [PATCH 0566/1009] [SPARK-33224][SS][WEBUI] Add watermark gap
 information into SS UI page

### What changes were proposed in this pull request?

This PR proposes to add the watermark gap information in SS UI page. Please refer below screenshots to see what we'd like to show in UI.

![Screen Shot 2020-11-19 at 6 56 38 PM](https://user-images.githubusercontent.com/1317309/99669306-3532d080-2ab2-11eb-9a93-03d2c6a54948.png)

Please note that this PR doesn't plot the watermark value - knowing the gap between actual wall clock and watermark looks more useful than the absolute value.

### Why are the changes needed?

Watermark is the one of major metrics the end users need to track for stateful queries. Watermark defines "when" the output will be emitted for append mode, hence knowing how much gap between wall clock and watermark (input data) is very helpful to make expectation of the output.

### Does this PR introduce _any_ user-facing change?

Yes, SS UI query page will contain the watermark gap information.

### How was this patch tested?

Basic UT added. Manually tested with two queries:

> simple case

You'll see consistent watermark gap with (15 seconds + a) = 10 seconds are from delay in watermark definition, 5 seconds are trigger interval.

```
import org.apache.spark.sql.streaming.Trigger

spark.conf.set("spark.sql.shuffle.partitions", "10")

val query = spark
  .readStream
  .format("rate")
  .option("rowsPerSecond", 1000)
  .option("rampUpTime", "10s")
  .load()
  .selectExpr("timestamp", "mod(value, 100) as mod", "value")
  .withWatermark("timestamp", "10 seconds")
  .groupBy(window($"timestamp", "1 minute", "10 seconds"), $"mod")
  .agg(max("value").as("max_value"), min("value").as("min_value"), avg("value").as("avg_value"))
  .writeStream
  .format("console")
  .trigger(Trigger.ProcessingTime("5 seconds"))
  .outputMode("append")
  .start()

query.awaitTermination()
```

![Screen Shot 2020-11-19 at 7 00 21 PM](https://user-images.githubusercontent.com/1317309/99669049-dbcaa180-2ab1-11eb-8789-10b35857dda0.png)

> complicated case

This randomizes the timestamp, hence producing random watermark gap. This won't be smaller than 15 seconds as I described earlier.

```
import org.apache.spark.sql.streaming.Trigger

spark.conf.set("spark.sql.shuffle.partitions", "10")

val query = spark
  .readStream
  .format("rate")
  .option("rowsPerSecond", 1000)
  .option("rampUpTime", "10s")
  .load()
  .selectExpr("*", "CAST(CAST(timestamp AS BIGINT) - CAST((RAND() * 100000) AS BIGINT) AS TIMESTAMP) AS tsMod")
  .selectExpr("tsMod", "mod(value, 100) as mod", "value")
  .withWatermark("tsMod", "10 seconds")
  .groupBy(window($"tsMod", "1 minute", "10 seconds"), $"mod")
  .agg(max("value").as("max_value"), min("value").as("min_value"), avg("value").as("avg_value"))
  .writeStream
  .format("console")
  .trigger(Trigger.ProcessingTime("5 seconds"))
  .outputMode("append")
  .start()

query.awaitTermination()
```

![Screen Shot 2020-11-19 at 6 56 47 PM](https://user-images.githubusercontent.com/1317309/99669029-d5d4c080-2ab1-11eb-9c63-d05b3e1ab391.png)

Closes #30427 from HeartSaVioR/SPARK-33224.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
Signed-off-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
---
 .../ui/StreamingQueryStatisticsPage.scala     | 53 +++++++++++++++++++
 .../sql/streaming/ui/UISeleniumSuite.scala    | 15 ++++--
 2 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
index 77b1e61d587a7..24709ba470cde 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
@@ -140,6 +140,58 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab)
     <br />
   }
 
+  def generateWatermark(
+      query: StreamingQueryUIData,
+      minBatchTime: Long,
+      maxBatchTime: Long,
+      jsCollector: JsCollector): Seq[Node] = {
+    // This is made sure on caller side but put it here to be defensive
+    require(query.lastProgress != null)
+    if (query.lastProgress.eventTime.containsKey("watermark")) {
+      val watermarkData = query.recentProgress.flatMap { p =>
+        val batchTimestamp = parseProgressTimestamp(p.timestamp)
+        val watermarkValue = parseProgressTimestamp(p.eventTime.get("watermark"))
+        if (watermarkValue > 0L) {
+          // seconds
+          Some((batchTimestamp, ((batchTimestamp - watermarkValue) / 1000.0)))
+        } else {
+          None
+        }
+      }
+
+      if (watermarkData.nonEmpty) {
+        val maxWatermark = watermarkData.maxBy(_._2)._2
+        val graphUIDataForWatermark =
+          new GraphUIData(
+            "watermark-gap-timeline",
+            "watermark-gap-histogram",
+            watermarkData,
+            minBatchTime,
+            maxBatchTime,
+            0,
+            maxWatermark,
+            "seconds")
+        graphUIDataForWatermark.generateDataJs(jsCollector)
+
+        // scalastyle:off
+        <tr>
+          <td style="vertical-align: middle;">
+            <div style="width: 160px;">
+              <div><strong>Global Watermark Gap {SparkUIUtils.tooltip("The gap between batch timestamp and global watermark for the batch.", "right")}</strong></div>
+            </div>
+          </td>
+          <td class="watermark-gap-timeline">{graphUIDataForWatermark.generateTimelineHtml(jsCollector)}</td>
+          <td class="watermark-gap-histogram">{graphUIDataForWatermark.generateHistogramHtml(jsCollector)}</td>
+        </tr>
+        // scalastyle:on
+      } else {
+        Seq.empty[Node]
+      }
+    } else {
+      Seq.empty[Node]
+    }
+  }
+
   def generateAggregatedStateOperators(
       query: StreamingQueryUIData,
       minBatchTime: Long,
@@ -465,6 +517,7 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab)
             </td>
             <td class="duration-area-stack" colspan="2">{graphUIDataForDuration.generateAreaStackHtmlWithData(jsCollector, operationDurationData)}</td>
           </tr>
+          {generateWatermark(query, minBatchTime, maxBatchTime, jsCollector)}
           {generateAggregatedStateOperators(query, minBatchTime, maxBatchTime, jsCollector)}
         </tbody>
       </table>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala
index 94844c4e87a84..db3d6529c9906 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala
@@ -31,8 +31,10 @@ import org.apache.spark.internal.config.UI.{UI_ENABLED, UI_PORT}
 import org.apache.spark.sql.LocalSparkSession.withSparkSession
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.util.quietly
+import org.apache.spark.sql.functions.{window => windowFn, _}
+import org.apache.spark.sql.internal.SQLConf.SHUFFLE_PARTITIONS
 import org.apache.spark.sql.internal.StaticSQLConf.ENABLED_STREAMING_UI_CUSTOM_METRIC_LIST
-import org.apache.spark.sql.streaming.StreamingQueryException
+import org.apache.spark.sql.streaming.{StreamingQueryException, Trigger}
 import org.apache.spark.ui.SparkUICssErrorHandler
 
 class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with BeforeAndAfterAll {
@@ -52,6 +54,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
     val conf = new SparkConf()
       .setMaster(master)
       .setAppName("ui-test")
+      .set(SHUFFLE_PARTITIONS, 5)
       .set(UI_ENABLED, true)
       .set(UI_PORT, 0)
       .set(ENABLED_STREAMING_UI_CUSTOM_METRIC_LIST, Seq("stateOnCurrentVersionSizeBytes"))
@@ -79,10 +82,15 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
 
           val input1 = spark.readStream.format("rate").load()
           val input2 = spark.readStream.format("rate").load()
+          val input3 = spark.readStream.format("rate").load()
           val activeQuery =
-            input1.join(input2, "value").writeStream.format("noop").start()
+            input1.selectExpr("timestamp", "mod(value, 100) as mod", "value")
+              .withWatermark("timestamp", "0 second")
+              .groupBy(windowFn($"timestamp", "10 seconds", "2 seconds"), $"mod")
+              .agg(avg("value").as("avg_value"))
+              .writeStream.format("noop").trigger(Trigger.ProcessingTime("5 seconds")).start()
           val completedQuery =
-            input1.join(input2, "value").writeStream.format("noop").start()
+            input2.join(input3, "value").writeStream.format("noop").start()
           completedQuery.stop()
           val failedQuery = spark.readStream.format("rate").load().select("value").as[Long]
             .map(_ / 0).writeStream.format("noop").start()
@@ -138,6 +146,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
             summaryText should contain ("Input Rows (?)")
             summaryText should contain ("Batch Duration (?)")
             summaryText should contain ("Operation Duration (?)")
+            summaryText should contain ("Global Watermark Gap (?)")
             summaryText should contain ("Aggregated Number Of Total State Rows (?)")
             summaryText should contain ("Aggregated Number Of Updated State Rows (?)")
             summaryText should contain ("Aggregated State Memory Used In Bytes (?)")

From c3ce9701b458511255072c72b9b245036fa98653 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Tue, 24 Nov 2020 20:18:45 -0800
Subject: [PATCH 0567/1009] [SPARK-33533][SQL] Fix the regression bug that
 ConnectionProviders don't consider case-sensitivity for properties

### What changes were proposed in this pull request?

This PR fixes an issue that `BasicConnectionProvider` doesn't consider case-sensitivity for properties.
For example, the property `oracle.jdbc.mapDateToTimestamp` should be considered case-sensitivity but it is not considered.

### Why are the changes needed?

This is a bug introduced by #29024 .
Caused by this issue, `OracleIntegrationSuite` doesn't pass.

```
[info] - SPARK-16625: General data types to be mapped to Oracle *** FAILED *** (32 seconds, 129 milliseconds)
[info]   types.apply(9).equals(org.apache.spark.sql.types.DateType) was false (OracleIntegrationSuite.scala:238)
[info]   org.scalatest.exceptions.TestFailedException:
[info]   at org.scalatest.Assertions.newAssertionFailedException(Assertions.scala:472)
[info]   at org.scalatest.Assertions.newAssertionFailedException$(Assertions.scala:471)
[info]   at org.scalatest.Assertions$.newAssertionFailedException(Assertions.scala:1231)
[info]   at org.scalatest.Assertions$AssertionsHelper.macroAssert(Assertions.scala:1295)
[info]   at org.apache.spark.sql.jdbc.OracleIntegrationSuite.$anonfun$new$4(OracleIntegrationSuite.scala:238)
[info]   at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85)
[info]   at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83)
[info]   at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
[info]   at org.scalatest.Transformer.apply(Transformer.scala:22)
[info]   at org.scalatest.Transformer.apply(Transformer.scala:20)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:190)
[info]   at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:176)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:188)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest$1(AnyFunSuiteLike.scala:200)
[info]   at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.runTest(AnyFunSuiteLike.scala:200)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.runTest$(AnyFunSuiteLike.scala:182)
[info]   at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(SparkFunSuite.scala:61)
[info]   at org.scalatest.BeforeAndAfterEach.runTest(BeforeAndAfterEach.scala:234)
[info]   at org.scalatest.BeforeAndAfterEach.runTest$(BeforeAndAfterEach.scala:227)
[info]   at org.apache.spark.SparkFunSuite.runTest(SparkFunSuite.scala:61)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTests$1(AnyFunSuiteLike.scala:233)
[info]   at org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:413)
[info]   at scala.collection.immutable.List.foreach(List.scala:392)
[info]   at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401)
[info]   at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:396)
[info]   at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:475)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.runTests(AnyFunSuiteLike.scala:233)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.runTests$(AnyFunSuiteLike.scala:232)
[info]   at org.scalatest.funsuite.AnyFunSuite.runTests(AnyFunSuite.scala:1563)
[info]   at org.scalatest.Suite.run(Suite.scala:1112)
[info]   at org.scalatest.Suite.run$(Suite.scala:1094)
[info]   at org.scalatest.funsuite.AnyFunSuite.org$scalatest$funsuite$AnyFunSuiteLike$$super$run(AnyFunSuite.scala:1563)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$run$1(AnyFunSuiteLike.scala:237)
[info]   at org.scalatest.SuperEngine.runImpl(Engine.scala:535)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.run(AnyFunSuiteLike.scala:237)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.run$(AnyFunSuiteLike.scala:236)
[info]   at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:61)
[info]   at org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213)
[info]   at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210)
[info]   at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208)
[info]   at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:61)
[info]   at org.scalatest.tools.Framework.org$scalatest$tools$Framework$$runSuite(Framework.scala:318)
[info]   at org.scalatest.tools.Framework$ScalaTestTask.execute(Framework.scala:513)
[info]   at sbt.ForkMain$Run.lambda$runTest$1(ForkMain.java:413)
[info]   at java.util.concurrent.FutureTask.run(FutureTask.java:266)
[info]   at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
[info]   at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
[info]   at java.lang.Thread.run(Thread.java:748)
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

With this change, I confirmed that `OracleIntegrationSuite` passes with the following command.
```
$ git clone https://github.com/oracle/docker-images.git
$ cd docker-images/OracleDatabase/SingleInstance/dockerfiles
$ ./buildDockerImage.sh -v 18.4.0 -x
$ ORACLE_DOCKER_IMAGE_NAME=oracle/database:18.4.0-xe build/sbt  -Pdocker-integration-tests -Phive -Phive-thriftserver "testOnly org.apache.spark.sql.jdbc.OracleIntegrationSuite"
```

Closes #30485 from sarutak/fix-oracle-integration-suite.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../datasources/jdbc/connection/BasicConnectionProvider.scala | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala
index 1c0513f982a1e..890205f2f6826 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/BasicConnectionProvider.scala
@@ -20,6 +20,8 @@ package org.apache.spark.sql.execution.datasources.jdbc.connection
 import java.sql.{Connection, Driver}
 import java.util.Properties
 
+import scala.collection.JavaConverters._
+
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
 import org.apache.spark.sql.jdbc.JdbcConnectionProvider
@@ -40,7 +42,7 @@ private[jdbc] class BasicConnectionProvider extends JdbcConnectionProvider with
   override def getConnection(driver: Driver, options: Map[String, String]): Connection = {
     val jdbcOptions = new JDBCOptions(options)
     val properties = getAdditionalProperties(jdbcOptions)
-    options.foreach { case(k, v) =>
+    jdbcOptions.asProperties.asScala.foreach { case(k, v) =>
       properties.put(k, v)
     }
     logDebug(s"JDBC connection initiated with URL: ${jdbcOptions.url} and properties: $properties")

From 781e19c4d1f376b52e5305078356bf0a58522bcd Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Wed, 25 Nov 2020 16:38:55 +0900
Subject: [PATCH 0568/1009] [SPARK-33477][SQL] Hive Metastore support filter by
 date type

### What changes were proposed in this pull request?

Hive Metastore supports strings and integral types in filters. It could also support dates. Please see [HIVE-5679](https://github.com/apache/hive/commit/5106bf1c8671740099fca8e1a7d4b37afe97137f) for more details.

This pr add support it.

### Why are the changes needed?

Improve query performance.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #30408 from wangyum/SPARK-33477.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../spark/sql/hive/HiveExternalCatalog.scala  |  6 +-
 .../spark/sql/hive/client/HiveClient.scala    |  3 +-
 .../sql/hive/client/HiveClientImpl.scala      |  6 +-
 .../spark/sql/hive/client/HiveShim.scala      | 46 ++++++++--
 .../spark/sql/hive/client/FiltersSuite.scala  | 35 ++++++-
 .../client/HivePartitionFilteringSuite.scala  | 92 +++++++++++++++----
 .../spark/sql/hive/client/VersionsSuite.scala |  3 +-
 7 files changed, 155 insertions(+), 36 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 907bb86ad0c1c..54c237f78cb9c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -40,7 +40,7 @@ import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils._
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.{PartitioningUtils, SourceOptions}
 import org.apache.spark.sql.hive.client.HiveClient
@@ -1264,11 +1264,13 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       defaultTimeZoneId: String): Seq[CatalogTablePartition] = withClient {
     val rawTable = getRawTable(db, table)
     val catalogTable = restoreTableMetadata(rawTable)
+    val timeZoneId = CaseInsensitiveMap(catalogTable.storage.properties).getOrElse(
+      DateTimeUtils.TIMEZONE_OPTION, defaultTimeZoneId)
 
     val partColNameMap = buildLowerCasePartColNameMap(catalogTable)
 
     val clientPrunedPartitions =
-      client.getPartitionsByFilter(rawTable, predicates).map { part =>
+      client.getPartitionsByFilter(rawTable, predicates, timeZoneId).map { part =>
         part.copy(spec = restorePartitionSpec(part.spec, partColNameMap))
       }
     prunePartitionsByFilter(catalogTable, clientPrunedPartitions, predicates, defaultTimeZoneId)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
index 3ea80eaf6f714..48f3837740933 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
@@ -233,7 +233,8 @@ private[hive] trait HiveClient {
   /** Returns partitions filtered by predicates for the given table. */
   def getPartitionsByFilter(
       catalogTable: CatalogTable,
-      predicates: Seq[Expression]): Seq[CatalogTablePartition]
+      predicates: Seq[Expression],
+      timeZoneId: String): Seq[CatalogTablePartition]
 
   /** Loads a static partition into an existing table. */
   def loadPartition(
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 9bc99b08c2cc8..b2f0867114bae 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -733,9 +733,11 @@ private[hive] class HiveClientImpl(
 
   override def getPartitionsByFilter(
       table: CatalogTable,
-      predicates: Seq[Expression]): Seq[CatalogTablePartition] = withHiveState {
+      predicates: Seq[Expression],
+      timeZoneId: String): Seq[CatalogTablePartition] = withHiveState {
     val hiveTable = toHiveTable(table, Some(userName))
-    val parts = shim.getPartitionsByFilter(client, hiveTable, predicates).map(fromHivePartition)
+    val parts = shim.getPartitionsByFilter(client, hiveTable, predicates, timeZoneId)
+      .map(fromHivePartition)
     HiveCatalogMetrics.incrementFetchedPartitions(parts.length)
     parts
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index d989f0154ea95..17a64a67df283 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -45,9 +45,9 @@ import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.analysis.NoSuchPermanentFunctionException
 import org.apache.spark.sql.catalyst.catalog.{CatalogFunction, CatalogTablePartition, CatalogUtils, FunctionResource, FunctionResourceType}
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.util.TypeUtils
+import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TypeUtils}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{AtomicType, IntegralType, StringType}
+import org.apache.spark.sql.types.{AtomicType, DateType, IntegralType, StringType}
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.Utils
 
@@ -79,7 +79,11 @@ private[client] sealed abstract class Shim {
 
   def getAllPartitions(hive: Hive, table: Table): Seq[Partition]
 
-  def getPartitionsByFilter(hive: Hive, table: Table, predicates: Seq[Expression]): Seq[Partition]
+  def getPartitionsByFilter(
+      hive: Hive,
+      table: Table,
+      predicates: Seq[Expression],
+      timeZoneId: String): Seq[Partition]
 
   def getCommandProcessor(token: String, conf: HiveConf): CommandProcessor
 
@@ -349,7 +353,8 @@ private[client] class Shim_v0_12 extends Shim with Logging {
   override def getPartitionsByFilter(
       hive: Hive,
       table: Table,
-      predicates: Seq[Expression]): Seq[Partition] = {
+      predicates: Seq[Expression],
+      timeZoneId: String): Seq[Partition] = {
     // getPartitionsByFilter() doesn't support binary comparison ops in Hive 0.12.
     // See HIVE-4888.
     logDebug("Hive 0.12 doesn't support predicate pushdown to metastore. " +
@@ -632,7 +637,9 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
    *
    * Unsupported predicates are skipped.
    */
-  def convertFilters(table: Table, filters: Seq[Expression]): String = {
+  def convertFilters(table: Table, filters: Seq[Expression], timeZoneId: String): String = {
+    lazy val dateFormatter = DateFormatter(DateTimeUtils.getZoneId(timeZoneId))
+
     /**
      * An extractor that matches all binary comparison operators except null-safe equality.
      *
@@ -650,6 +657,8 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
         case Literal(null, _) => None // `null`s can be cast as other types; we want to avoid NPEs.
         case Literal(value, _: IntegralType) => Some(value.toString)
         case Literal(value, _: StringType) => Some(quoteStringLiteral(value.toString))
+        case Literal(value, _: DateType) =>
+          Some(dateFormatter.format(value.asInstanceOf[Int]))
         case _ => None
       }
     }
@@ -700,6 +709,21 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
       }
     }
 
+    object ExtractableDateValues {
+      private lazy val valueToLiteralString: PartialFunction[Any, String] = {
+        case value: Int => dateFormatter.format(value)
+      }
+
+      def unapply(values: Set[Any]): Option[Seq[String]] = {
+        val extractables = values.toSeq.map(valueToLiteralString.lift)
+        if (extractables.nonEmpty && extractables.forall(_.isDefined)) {
+          Some(extractables.map(_.get))
+        } else {
+          None
+        }
+      }
+    }
+
     object SupportedAttribute {
       // hive varchar is treated as catalyst string, but hive varchar can't be pushed down.
       private val varcharKeys = table.getPartitionKeys.asScala
@@ -711,7 +735,8 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
         val resolver = SQLConf.get.resolver
         if (varcharKeys.exists(c => resolver(c, attr.name))) {
           None
-        } else if (attr.dataType.isInstanceOf[IntegralType] || attr.dataType == StringType) {
+        } else if (attr.dataType.isInstanceOf[IntegralType] || attr.dataType == StringType ||
+            attr.dataType == DateType) {
           Some(attr.name)
         } else {
           None
@@ -748,6 +773,10 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
         convert(And(GreaterThanOrEqual(child, Literal(sortedValues.head, dataType)),
           LessThanOrEqual(child, Literal(sortedValues.last, dataType))))
 
+      case InSet(child @ ExtractAttribute(SupportedAttribute(name)), ExtractableDateValues(values))
+          if useAdvanced && child.dataType == DateType =>
+        Some(convertInToOr(name, values))
+
       case InSet(ExtractAttribute(SupportedAttribute(name)), ExtractableValues(values))
           if useAdvanced =>
         Some(convertInToOr(name, values))
@@ -803,11 +832,12 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
   override def getPartitionsByFilter(
       hive: Hive,
       table: Table,
-      predicates: Seq[Expression]): Seq[Partition] = {
+      predicates: Seq[Expression],
+      timeZoneId: String): Seq[Partition] = {
 
     // Hive getPartitionsByFilter() takes a string that represents partition
     // predicates like "str_key=\"value\" and int_key=1 ..."
-    val filter = convertFilters(table, predicates)
+    val filter = convertFilters(table, predicates, timeZoneId)
 
     val partitions =
       if (filter.isEmpty) {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
index 12b409e487061..6c0531182e6d6 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.hive.client
 
+import java.sql.Date
 import java.util.Collections
 
 import org.apache.hadoop.hive.metastore.api.FieldSchema
@@ -29,6 +30,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
 
 /**
  * A set of tests for the filter conversion logic used when pushing partition pruning into the
@@ -63,6 +65,28 @@ class FiltersSuite extends SparkFunSuite with Logging with PlanTest {
     (Literal(1) === a("intcol", IntegerType)) :: (Literal("a") === a("strcol", IntegerType)) :: Nil,
     "1 = intcol and \"a\" = strcol")
 
+  filterTest("date filter",
+    (a("datecol", DateType) === Literal(Date.valueOf("2019-01-01"))) :: Nil,
+    "datecol = 2019-01-01")
+
+  filterTest("date filter with IN predicate",
+    (a("datecol", DateType) in
+      (Literal(Date.valueOf("2019-01-01")), Literal(Date.valueOf("2019-01-07")))) :: Nil,
+    "(datecol = 2019-01-01 or datecol = 2019-01-07)")
+
+  filterTest("date and string filter",
+    (Literal(Date.valueOf("2019-01-01")) === a("datecol", DateType)) ::
+      (Literal("a") === a("strcol", IntegerType)) :: Nil,
+    "2019-01-01 = datecol and \"a\" = strcol")
+
+  filterTest("date filter with null",
+    (a("datecol", DateType) ===  Literal(null)) :: Nil,
+    "")
+
+  filterTest("string filter with InSet predicate",
+    InSet(a("strcol", StringType), Set("1", "2").map(s => UTF8String.fromString(s))) :: Nil,
+    "(strcol = \"1\" or strcol = \"2\")")
+
   filterTest("skip varchar",
     (Literal("") === a("varchar", StringType)) :: Nil,
     "")
@@ -89,7 +113,7 @@ class FiltersSuite extends SparkFunSuite with Logging with PlanTest {
   private def filterTest(name: String, filters: Seq[Expression], result: String) = {
     test(name) {
       withSQLConf(SQLConf.ADVANCED_PARTITION_PREDICATE_PUSHDOWN.key -> "true") {
-        val converted = shim.convertFilters(testTable, filters)
+        val converted = shim.convertFilters(testTable, filters, conf.sessionLocalTimeZone)
         if (converted != result) {
           fail(s"Expected ${filters.mkString(",")} to convert to '$result' but got '$converted'")
         }
@@ -104,7 +128,7 @@ class FiltersSuite extends SparkFunSuite with Logging with PlanTest {
         val filters =
           (Literal(1) === a("intcol", IntegerType) ||
             Literal(2) === a("intcol", IntegerType)) :: Nil
-        val converted = shim.convertFilters(testTable, filters)
+        val converted = shim.convertFilters(testTable, filters, conf.sessionLocalTimeZone)
         if (enabled) {
           assert(converted == "(1 = intcol or 2 = intcol)")
         } else {
@@ -116,7 +140,7 @@ class FiltersSuite extends SparkFunSuite with Logging with PlanTest {
 
   test("SPARK-33416: Avoid Hive metastore stack overflow when InSet predicate have many values") {
     def checkConverted(inSet: InSet, result: String): Unit = {
-      assert(shim.convertFilters(testTable, inSet :: Nil) == result)
+      assert(shim.convertFilters(testTable, inSet :: Nil, conf.sessionLocalTimeZone) == result)
     }
 
     withSQLConf(SQLConf.HIVE_METASTORE_PARTITION_PRUNING_INSET_THRESHOLD.key -> "15") {
@@ -139,6 +163,11 @@ class FiltersSuite extends SparkFunSuite with Logging with PlanTest {
         InSet(a("doublecol", DoubleType),
           Range(1, 20).map(s => Literal(s.toDouble).eval(EmptyRow)).toSet),
         "")
+
+      checkConverted(
+        InSet(a("datecol", DateType),
+          Range(1, 20).map(d => Literal(d, DateType).eval(EmptyRow)).toSet),
+        "(datecol >= 1970-01-02 and datecol <= 1970-01-20)")
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
index 81186909bb167..ab83f751f1425 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.hive.client
 
+import java.sql.Date
+
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -28,7 +30,8 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.types.{BooleanType, IntegerType, LongType, StringType, StructType}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{BooleanType, DateType, IntegerType, LongType, StringType, StructType}
 import org.apache.spark.util.Utils
 
 class HivePartitionFilteringSuite(version: String)
@@ -38,15 +41,16 @@ class HivePartitionFilteringSuite(version: String)
 
   private val testPartitionCount = 3 * 5 * 4
 
-  private def init(tryDirectSql: Boolean): HiveClient = {
-    val storageFormat = CatalogStorageFormat(
-      locationUri = None,
-      inputFormat = None,
-      outputFormat = None,
-      serde = None,
-      compressed = false,
-      properties = Map.empty)
+  private val storageFormat = CatalogStorageFormat(
+    locationUri = None,
+    inputFormat = Some(classOf[TextInputFormat].getName),
+    outputFormat = Some(classOf[HiveIgnoreKeyTextOutputFormat[_, _]].getName),
+    serde = Some(classOf[LazySimpleSerDe].getName()),
+    compressed = false,
+    properties = Map.empty
+  )
 
+  private def init(tryDirectSql: Boolean): HiveClient = {
     val hadoopConf = new Configuration()
     hadoopConf.setBoolean(tryDirectSqlKey, tryDirectSql)
     hadoopConf.set("hive.metastore.warehouse.dir", Utils.createTempDir().toURI().toString())
@@ -58,14 +62,7 @@ class HivePartitionFilteringSuite(version: String)
       tableType = CatalogTableType.MANAGED,
       schema = tableSchema,
       partitionColumnNames = Seq("ds", "h", "chunk"),
-      storage = CatalogStorageFormat(
-        locationUri = None,
-        inputFormat = Some(classOf[TextInputFormat].getName),
-        outputFormat = Some(classOf[HiveIgnoreKeyTextOutputFormat[_, _]].getName),
-        serde = Some(classOf[LazySimpleSerDe].getName()),
-        compressed = false,
-        properties = Map.empty
-      ))
+      storage = storageFormat)
     client.createTable(table, ignoreIfExists = false)
 
     val partitions =
@@ -102,7 +99,7 @@ class HivePartitionFilteringSuite(version: String)
   test(s"getPartitionsByFilter returns all partitions when $tryDirectSqlKey=false") {
     val client = init(false)
     val filteredPartitions = client.getPartitionsByFilter(client.getTable("default", "test"),
-      Seq(attr("ds") === 20170101))
+      Seq(attr("ds") === 20170101), SQLConf.get.sessionLocalTimeZone)
 
     assert(filteredPartitions.size == testPartitionCount)
   }
@@ -297,6 +294,63 @@ class HivePartitionFilteringSuite(version: String)
       day :: Nil)
   }
 
+  test("getPartitionsByFilter: date type pruning by metastore") {
+    val table = CatalogTable(
+      identifier = TableIdentifier("test_date", Some("default")),
+      tableType = CatalogTableType.MANAGED,
+      schema = new StructType().add("value", "int").add("part", "date"),
+      partitionColumnNames = Seq("part"),
+      storage = storageFormat)
+    client.createTable(table, ignoreIfExists = false)
+
+    val partitions =
+      for {
+        date <- Seq("2019-01-01", "2019-01-02", "2019-01-03", "2019-01-04")
+      } yield CatalogTablePartition(Map(
+        "part" -> date
+      ), storageFormat)
+    assert(partitions.size == 4)
+
+    client.createPartitions("default", "test_date", partitions, ignoreIfExists = false)
+
+    def testDataTypeFiltering(
+        filterExprs: Seq[Expression],
+        expectedPartitionCubes: Seq[Seq[Date]]): Unit = {
+      val filteredPartitions = client.getPartitionsByFilter(
+        client.getTable("default", "test_date"),
+        filterExprs,
+        SQLConf.get.sessionLocalTimeZone)
+
+      val expectedPartitions = expectedPartitionCubes.map {
+        expectedDt =>
+          for {
+            dt <- expectedDt
+          } yield Set(
+            "part" -> dt.toString
+          )
+      }.reduce(_ ++ _)
+
+      assert(filteredPartitions.map(_.spec.toSet).toSet == expectedPartitions.toSet)
+    }
+
+    val dateAttr: Attribute = AttributeReference("part", DateType)()
+
+    testDataTypeFiltering(
+      Seq(dateAttr === Date.valueOf("2019-01-01")),
+      Seq("2019-01-01").map(Date.valueOf) :: Nil)
+    testDataTypeFiltering(
+      Seq(dateAttr > Date.valueOf("2019-01-02")),
+      Seq("2019-01-03", "2019-01-04").map(Date.valueOf) :: Nil)
+    testDataTypeFiltering(
+      Seq(In(dateAttr,
+        Seq("2019-01-01", "2019-01-02").map(d => Literal(Date.valueOf(d))))),
+      Seq("2019-01-01", "2019-01-02").map(Date.valueOf) :: Nil)
+    testDataTypeFiltering(
+      Seq(InSet(dateAttr,
+        Set("2019-01-01", "2019-01-02").map(d => Literal(Date.valueOf(d)).eval(EmptyRow)))),
+      Seq("2019-01-01", "2019-01-02").map(Date.valueOf) :: Nil)
+  }
+
   private def testMetastorePartitionFiltering(
       filterExpr: Expression,
       expectedDs: Seq[Int],
@@ -333,7 +387,7 @@ class HivePartitionFilteringSuite(version: String)
     val filteredPartitions = client.getPartitionsByFilter(client.getTable("default", "test"),
       Seq(
         transform(filterExpr)
-      ))
+      ), SQLConf.get.sessionLocalTimeZone)
 
     val expectedPartitionCount = expectedPartitionCubes.map {
       case (expectedDs, expectedH, expectedChunks) =>
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index c5c92ddad9014..d9ba6dd80e4ef 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -488,7 +488,8 @@ class VersionsSuite extends SparkFunSuite with Logging {
     test(s"$version: getPartitionsByFilter") {
       // Only one partition [1, 1] for key2 == 1
       val result = client.getPartitionsByFilter(client.getTable("default", "src_part"),
-        Seq(EqualTo(AttributeReference("key2", IntegerType)(), Literal(1))))
+        Seq(EqualTo(AttributeReference("key2", IntegerType)(), Literal(1))),
+        versionSpark.conf.sessionLocalTimeZone)
 
       // Hive 0.12 doesn't support getPartitionsByFilter, it ignores the filter condition.
       if (version != "0.12") {

From 19f3b89d62932fef96e72095164920deb64ea647 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Wed, 25 Nov 2020 08:59:31 +0000
Subject: [PATCH 0569/1009] [SPARK-33549][SQL] Remove configuration
 spark.sql.legacy.allowCastNumericToTimestamp

### What changes were proposed in this pull request?

Remove SQL configuration spark.sql.legacy.allowCastNumericToTimestamp

### Why are the changes needed?

In the current master branch, there is a new configuration `spark.sql.legacy.allowCastNumericToTimestamp` which controls whether to cast Numeric types to Timestamp or not. The default value is true.

After https://github.com/apache/spark/pull/30260, the type conversion between Timestamp type and Numeric type is disallowed in ANSI mode. So, we don't need to a separate configuration `spark.sql.legacy.allowCastNumericToTimestamp` for disallowing the conversion. Users just need to set `spark.sql.ansi.enabled` for the behavior.

As the configuration is not in any released yet, we should remove the configuration to make things simpler.

### Does this PR introduce _any_ user-facing change?

No, since the configuration is not released yet.

### How was this patch tested?

Existing test cases

Closes #30493 from gengliangwang/LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/expressions/Cast.scala      | 13 ++-----------
 .../org/apache/spark/sql/internal/SQLConf.scala    | 12 ------------
 .../spark/sql/catalyst/expressions/CastSuite.scala | 14 --------------
 .../hive/execution/HiveCompatibilitySuite.scala    |  6 ------
 4 files changed, 2 insertions(+), 43 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 5afc308e52ead..e5f11b5e74916 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -59,8 +59,7 @@ object Cast {
     case (StringType, TimestampType) => true
     case (BooleanType, TimestampType) => true
     case (DateType, TimestampType) => true
-    case (_: NumericType, TimestampType) =>
-      SQLConf.get.getConf(SQLConf.LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP)
+    case (_: NumericType, TimestampType) => true
 
     case (StringType, DateType) => true
     case (TimestampType, DateType) => true
@@ -273,15 +272,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
       TypeCheckResult.TypeCheckSuccess
     } else {
       TypeCheckResult.TypeCheckFailure(
-        if (child.dataType.isInstanceOf[NumericType] && dataType.isInstanceOf[TimestampType]) {
-          s"cannot cast ${child.dataType.catalogString} to ${dataType.catalogString}," +
-            "you can enable the casting by setting " +
-            s"${SQLConf.LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP.key} to true," +
-            "but we strongly recommend using function " +
-            "TIMESTAMP_SECONDS/TIMESTAMP_MILLIS/TIMESTAMP_MICROS instead."
-        } else {
-          s"cannot cast ${child.dataType.catalogString} to ${dataType.catalogString}"
-        })
+        s"cannot cast ${child.dataType.catalogString} to ${dataType.catalogString}")
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index ef974dc176e51..0738478888aeb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2824,15 +2824,6 @@ object SQLConf {
       .checkValue(_ > 0, "The timeout value must be positive")
       .createWithDefault(10L)
 
-  val LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP =
-    buildConf("spark.sql.legacy.allowCastNumericToTimestamp")
-      .internal()
-      .doc("When true, allow casting numeric to timestamp," +
-        "when false, forbid the cast, more details in SPARK-31710")
-      .version("3.1.0")
-      .booleanConf
-      .createWithDefault(true)
-
   val COALESCE_BUCKETS_IN_JOIN_ENABLED =
     buildConf("spark.sql.bucketing.coalesceBucketsInJoin.enabled")
       .doc("When true, if two bucketed tables with the different number of buckets are joined, " +
@@ -3550,9 +3541,6 @@ class SQLConf extends Serializable with Logging {
 
   def integerGroupingIdEnabled: Boolean = getConf(SQLConf.LEGACY_INTEGER_GROUPING_ID)
 
-  def legacyAllowCastNumericToTimestamp: Boolean =
-    getConf(SQLConf.LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP)
-
   def metadataCacheTTL: Long = getConf(StaticSQLConf.METADATA_CACHE_TTL_SECONDS)
 
   def coalesceBucketsInJoinEnabled: Boolean = getConf(SQLConf.COALESCE_BUCKETS_IN_JOIN_ENABLED)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index afb76d8a5a68c..2bc27ad35efff 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -1311,20 +1311,6 @@ class CastSuite extends CastSuiteBase {
     }
   }
 
-  test("SPARK-31710: fail casting from numeric to timestamp if it is forbidden") {
-    Seq(true, false).foreach { enable =>
-      withSQLConf(SQLConf.LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP.key -> enable.toString) {
-        assert(cast(2.toByte, TimestampType).resolved == enable)
-        assert(cast(10.toShort, TimestampType).resolved == enable)
-        assert(cast(3, TimestampType).resolved == enable)
-        assert(cast(10L, TimestampType).resolved == enable)
-        assert(cast(Decimal(1.2), TimestampType).resolved == enable)
-        assert(cast(1.7f, TimestampType).resolved == enable)
-        assert(cast(2.3d, TimestampType).resolved == enable)
-      }
-    }
-  }
-
   test("SPARK-32828: cast from a derived user-defined type to a base type") {
     val v = Literal.create(Row(1), new ExampleSubTypeUDT())
     checkEvaluation(cast(v, new ExampleBaseTypeUDT), Row(1))
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index d9b6bb43c2b47..462206d8c546f 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -40,8 +40,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
   private val originalInMemoryPartitionPruning = TestHive.conf.inMemoryPartitionPruning
   private val originalCrossJoinEnabled = TestHive.conf.crossJoinEnabled
   private val originalSessionLocalTimeZone = TestHive.conf.sessionLocalTimeZone
-  private val originalLegacyAllowCastNumericToTimestamp =
-    TestHive.conf.legacyAllowCastNumericToTimestamp
 
   def testCases: Seq[(String, File)] = {
     hiveQueryDir.listFiles.map(f => f.getName.stripSuffix(".q") -> f)
@@ -61,8 +59,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     // Fix session local timezone to America/Los_Angeles for those timezone sensitive tests
     // (timestamp_*)
     TestHive.setConf(SQLConf.SESSION_LOCAL_TIMEZONE, "America/Los_Angeles")
-    // Ensures that cast numeric to timestamp enabled so that we can test them
-    TestHive.setConf(SQLConf.LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP, true)
     RuleExecutor.resetMetrics()
   }
 
@@ -73,8 +69,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
       TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, originalInMemoryPartitionPruning)
       TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, originalCrossJoinEnabled)
       TestHive.setConf(SQLConf.SESSION_LOCAL_TIMEZONE, originalSessionLocalTimeZone)
-      TestHive.setConf(SQLConf.LEGACY_ALLOW_CAST_NUMERIC_TO_TIMESTAMP,
-        originalLegacyAllowCastNumericToTimestamp)
 
       // For debugging dump some statistics about how much time was spent in various optimizer rules
       logWarning(RuleExecutor.dumpTimeSpent())

From 2c5cc36e3f59011009c3c6083e0d0c1c81857cbd Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Wed, 25 Nov 2020 12:41:53 +0000
Subject: [PATCH 0570/1009] [SPARK-33509][SQL] List partition by names from a
 V2 table which supports partition management

### What changes were proposed in this pull request?
1. Add new method `listPartitionByNames` to the `SupportsPartitionManagement` interface. It allows to list partitions by partition names and their values.
2. Implement new method in `InMemoryPartitionTable` which is used in DSv2 tests.

### Why are the changes needed?
Currently, the `SupportsPartitionManagement` interface exposes only `listPartitionIdentifiers` which allows to list partitions by partition values. And it requires to specify all values for partition schema fields in the prefix. This restriction does not allow to list partitions by some of partition names (not all of them).

For example, the table `tableA` is partitioned by two column `year` and `month`
```
CREATE TABLE tableA (price int, year int, month int)
USING _
partitioned by (year, month)
```
and has the following partitions:
```
PARTITION(year = 2015, month = 1)
PARTITION(year = 2015, month = 2)
PARTITION(year = 2016, month = 2)
PARTITION(year = 2016, month = 3)
```
If we want to list all partitions with `month = 2`, we have to specify `year` for **listPartitionIdentifiers()** which not always possible as we don't know all `year` values in advance. New method **listPartitionByNames()** allows to specify partition values only for `month`, and get two partitions:
```
PARTITION(year = 2015, month = 2)
PARTITION(year = 2016, month = 2)
```

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running the affected test suite `SupportsPartitionManagementSuite`.

Closes #30452 from MaxGekk/column-names-listPartitionIdentifiers.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalog/SupportsPartitionManagement.java  | 11 ++++-
 .../connector/InMemoryPartitionTable.scala    | 22 ++++++++++
 .../SupportsPartitionManagementSuite.scala    | 43 ++++++++++++++++++-
 3 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java
index 446ea1463309f..380717d2e0e9b 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java
@@ -106,10 +106,19 @@ Map<String, String> loadPartitionMetadata(InternalRow ident)
         throws UnsupportedOperationException;
 
     /**
-     * List the identifiers of all partitions that contains the ident in a table.
+     * List the identifiers of all partitions that have the ident prefix in a table.
      *
      * @param ident a prefix of partition identifier
      * @return an array of Identifiers for the partitions
      */
     InternalRow[] listPartitionIdentifiers(InternalRow ident);
+
+    /**
+     * List the identifiers of all partitions that match to the ident by names.
+     *
+     * @param names the names of partition values in the identifier.
+     * @param ident a partition identifier values.
+     * @return an array of Identifiers for the partitions
+     */
+    InternalRow[] listPartitionByNames(String[] names, InternalRow ident);
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala
index 23987e909aa70..ba762a58b1e52 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala
@@ -24,6 +24,7 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, PartitionAlreadyExistsException}
+import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
 import org.apache.spark.sql.connector.catalog.SupportsPartitionManagement
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.types.StructType
@@ -96,4 +97,25 @@ class InMemoryPartitionTable(
   override protected def addPartitionKey(key: Seq[Any]): Unit = {
     memoryTablePartitions.put(InternalRow.fromSeq(key), Map.empty[String, String].asJava)
   }
+
+  override def listPartitionByNames(
+      names: Array[String],
+      ident: InternalRow): Array[InternalRow] = {
+    assert(names.length == ident.numFields,
+      s"Number of partition names (${names.length}) must be equal to " +
+      s"the number of partition values (${ident.numFields}).")
+    val schema = partitionSchema
+    assert(names.forall(fieldName => schema.fieldNames.contains(fieldName)),
+      s"Some partition names ${names.mkString("[", ", ", "]")} don't belong to " +
+      s"the partition schema '${schema.sql}'.")
+    val indexes = names.map(schema.fieldIndex)
+    val dataTypes = names.map(schema(_).dataType)
+    val currentRow = new GenericInternalRow(new Array[Any](names.length))
+    memoryTablePartitions.keySet().asScala.filter { key =>
+      for (i <- 0 until names.length) {
+        currentRow.values(i) = key.get(indexes(i), dataTypes(i))
+      }
+      currentRow == ident
+    }.toArray
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala
index e8e28e3422f27..caf7e91612563 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala
@@ -23,7 +23,7 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.connector.{InMemoryPartitionTable, InMemoryTableCatalog}
+import org.apache.spark.sql.connector.{InMemoryPartitionTable, InMemoryPartitionTableCatalog, InMemoryTableCatalog}
 import org.apache.spark.sql.connector.expressions.{LogicalExpressions, NamedReference}
 import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -140,4 +140,45 @@ class SupportsPartitionManagementSuite extends SparkFunSuite {
     partTable.dropPartition(partIdent1)
     assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty)
   }
+
+  test("listPartitionByNames") {
+    val partCatalog = new InMemoryPartitionTableCatalog
+    partCatalog.initialize("test", CaseInsensitiveStringMap.empty())
+    val table = partCatalog.createTable(
+      ident,
+      new StructType()
+        .add("col0", IntegerType)
+        .add("part0", IntegerType)
+        .add("part1", StringType),
+      Array(LogicalExpressions.identity(ref("part0")), LogicalExpressions.identity(ref("part1"))),
+      util.Collections.emptyMap[String, String])
+    val partTable = table.asInstanceOf[InMemoryPartitionTable]
+
+    Seq(
+      InternalRow(0, "abc"),
+      InternalRow(0, "def"),
+      InternalRow(1, "abc")).foreach { partIdent =>
+      partTable.createPartition(partIdent, new util.HashMap[String, String]())
+    }
+
+    Seq(
+      (Array("part0", "part1"), InternalRow(0, "abc")) -> Set(InternalRow(0, "abc")),
+      (Array("part0"), InternalRow(0)) -> Set(InternalRow(0, "abc"), InternalRow(0, "def")),
+      (Array("part1"), InternalRow("abc")) -> Set(InternalRow(0, "abc"), InternalRow(1, "abc")),
+      (Array.empty[String], InternalRow.empty) ->
+        Set(InternalRow(0, "abc"), InternalRow(0, "def"), InternalRow(1, "abc")),
+      (Array("part0", "part1"), InternalRow(3, "xyz")) -> Set(),
+      (Array("part1"), InternalRow(3.14f)) -> Set()
+    ).foreach { case ((names, idents), expected) =>
+      assert(partTable.listPartitionByNames(names, idents).toSet === expected)
+    }
+    // Check invalid parameters
+    Seq(
+      (Array("part0", "part1"), InternalRow(0)),
+      (Array("col0", "part1"), InternalRow(0, 1)),
+      (Array("wrong"), InternalRow("invalid"))
+    ).foreach { case (names, idents) =>
+      intercept[AssertionError](partTable.listPartitionByNames(names, idents))
+    }
+  }
 }

From 7c59aeeef4c571838bd291079f9b804d6f546487 Mon Sep 17 00:00:00 2001
From: duripeng <duripeng@baidu.com>
Date: Wed, 25 Nov 2020 12:50:21 +0000
Subject: [PATCH 0571/1009] [SPARK-27194][SPARK-29302][SQL] Fix commit
 collision in dynamic partition overwrite mode

### What changes were proposed in this pull request?

When using dynamic partition overwrite, each task has its working dir under staging dir like `stagingDir/.spark-staging-{jobId}`, each task commits to `outputPath/.spark-staging-{jobId}/{partitionId}/part-{taskId}-{jobId}{ext}`.
When speculation enable, multiple task attempts would be setup for one task, **they have same task id and they would commit to same file concurrently**. Due to host done or node preemption, the partly-committed files aren't cleaned up, a FileAlreadyExistsException would be raised in this situation, resulting in job failure.

I don't try to change task commit process for dynamic partition overwrite, like adding attempt id to task working dir for each attempts and committing to final output dir via a new outputCommitCoordinator, here is reason:

1. `FileOutputCommitter` already has commit coordinator for each task attempts, we can leverage it rather than build a new one.
2. To say the least, we implement a coordinator solving task attempts commit conflict, suppose a severe case, application master failover, tasks with same attempt id and same task id would commit to same files, the `FileAlreadyExistsException` risk still exists

In this pr, I leverage FileOutputCommitter to solve the problem:

1. when initing a write job description, set `outputPath/.spark-staging-{jobId}` as the output dir
2. each task attempt writes output to `outputPath/.spark-staging-{jobId}/_temporary/${appAttemptId}/_temporary/${taskAttemptId}/{partitionId}/part-{taskId}-{jobId}{ext}`
3. leverage `FileOutputCommitter` coordinator, write job firstly commits output to `outputPath/.spark-staging-{jobId}/{partitionId}`
4. for dynamic partition overwrite, write job finally move `outputPath/.spark-staging-{jobId}/{partitionId}` to `outputPath/{partitionId}`

### Why are the changes needed?

Without this pr, dynamic partition overwrite would fail

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

added UT.

Closes #29000 from WinkerDu/master-fix-dynamic-partition-multi-commit.

Authored-by: duripeng <duripeng@baidu.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../internal/io/FileCommitProtocol.scala      |  4 ++
 .../io/HadoopMapReduceCommitProtocol.scala    | 41 +++++++++++-----
 .../InsertIntoHadoopFsRelationCommand.scala   | 14 +++++-
 .../SQLHadoopMapReduceCommitProtocol.scala    |  3 +-
 .../sql/sources/PartitionedWriteSuite.scala   | 47 ++++++++++++++++++-
 5 files changed, 92 insertions(+), 17 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala
index 0746e43babf9a..d9d7b06cdb8ce 100644
--- a/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/internal/io/FileCommitProtocol.scala
@@ -169,4 +169,8 @@ object FileCommitProtocol extends Logging {
         ctor.newInstance(jobId, outputPath)
     }
   }
+
+  def getStagingDir(path: String, jobId: String): Path = {
+    new Path(path, ".spark-staging-" + jobId)
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
index 11ce608f52ee2..30f9a650a69c9 100644
--- a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
@@ -41,13 +41,28 @@ import org.apache.spark.mapred.SparkHadoopMapRedUtil
  * @param jobId the job's or stage's id
  * @param path the job's output path, or null if committer acts as a noop
  * @param dynamicPartitionOverwrite If true, Spark will overwrite partition directories at runtime
- *                                  dynamically, i.e., we first write files under a staging
- *                                  directory with partition path, e.g.
- *                                  /path/to/staging/a=1/b=1/xxx.parquet. When committing the job,
- *                                  we first clean up the corresponding partition directories at
- *                                  destination path, e.g. /path/to/destination/a=1/b=1, and move
- *                                  files from staging directory to the corresponding partition
- *                                  directories under destination path.
+ *                                  dynamically. Suppose final path is /path/to/outputPath, output
+ *                                  path of [[FileOutputCommitter]] is an intermediate path, e.g.
+ *                                  /path/to/outputPath/.spark-staging-{jobId}, which is a staging
+ *                                  directory. Task attempts firstly write files under the
+ *                                  intermediate path, e.g.
+ *                                  /path/to/outputPath/.spark-staging-{jobId}/_temporary/
+ *                                  {appAttemptId}/_temporary/{taskAttemptId}/a=1/b=1/xxx.parquet.
+ *
+ *                                  1. When [[FileOutputCommitter]] algorithm version set to 1,
+ *                                  we firstly move task attempt output files to
+ *                                  /path/to/outputPath/.spark-staging-{jobId}/_temporary/
+ *                                  {appAttemptId}/{taskId}/a=1/b=1,
+ *                                  then move them to
+ *                                  /path/to/outputPath/.spark-staging-{jobId}/a=1/b=1.
+ *                                  2. When [[FileOutputCommitter]] algorithm version set to 2,
+ *                                  committing tasks directly move task attempt output files to
+ *                                  /path/to/outputPath/.spark-staging-{jobId}/a=1/b=1.
+ *
+ *                                  At the end of committing job, we move output files from
+ *                                  intermediate path to final path, e.g., move files from
+ *                                  /path/to/outputPath/.spark-staging-{jobId}/a=1/b=1
+ *                                  to /path/to/outputPath/a=1/b=1
  */
 class HadoopMapReduceCommitProtocol(
     jobId: String,
@@ -89,7 +104,7 @@ class HadoopMapReduceCommitProtocol(
    * The staging directory of this write job. Spark uses it to deal with files with absolute output
    * path, or writing data into partitioned directory with dynamicPartitionOverwrite=true.
    */
-  private def stagingDir = new Path(path, ".spark-staging-" + jobId)
+  protected def stagingDir = getStagingDir(path, jobId)
 
   protected def setupCommitter(context: TaskAttemptContext): OutputCommitter = {
     val format = context.getOutputFormatClass.getConstructor().newInstance()
@@ -106,13 +121,13 @@ class HadoopMapReduceCommitProtocol(
     val filename = getFilename(taskContext, ext)
 
     val stagingDir: Path = committer match {
-      case _ if dynamicPartitionOverwrite =>
-        assert(dir.isDefined,
-          "The dataset to be written must be partitioned when dynamicPartitionOverwrite is true.")
-        partitionPaths += dir.get
-        this.stagingDir
       // For FileOutputCommitter it has its own staging path called "work path".
       case f: FileOutputCommitter =>
+        if (dynamicPartitionOverwrite) {
+          assert(dir.isDefined,
+            "The dataset to be written must be partitioned when dynamicPartitionOverwrite is true.")
+          partitionPaths += dir.get
+        }
         new Path(Option(f.getWorkPath).map(_.toString).getOrElse(path))
       case _ => new Path(path)
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
index fe733f4238e1a..db7264d0c6ec8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
@@ -106,9 +106,10 @@ case class InsertIntoHadoopFsRelationCommand(
         fs, catalogTable.get, qualifiedOutputPath, matchingPartitions)
     }
 
+    val jobId = java.util.UUID.randomUUID().toString
     val committer = FileCommitProtocol.instantiate(
       sparkSession.sessionState.conf.fileCommitProtocolClass,
-      jobId = java.util.UUID.randomUUID().toString,
+      jobId = jobId,
       outputPath = outputPath.toString,
       dynamicPartitionOverwrite = dynamicPartitionOverwrite)
 
@@ -163,6 +164,15 @@ case class InsertIntoHadoopFsRelationCommand(
         }
       }
 
+      // For dynamic partition overwrite, FileOutputCommitter's output path is staging path, files
+      // will be renamed from staging path to final output path during commit job
+      val committerOutputPath = if (dynamicPartitionOverwrite) {
+        FileCommitProtocol.getStagingDir(outputPath.toString, jobId)
+          .makeQualified(fs.getUri, fs.getWorkingDirectory)
+      } else {
+        qualifiedOutputPath
+      }
+
       val updatedPartitionPaths =
         FileFormatWriter.write(
           sparkSession = sparkSession,
@@ -170,7 +180,7 @@ case class InsertIntoHadoopFsRelationCommand(
           fileFormat = fileFormat,
           committer = committer,
           outputSpec = FileFormatWriter.OutputSpec(
-            qualifiedOutputPath.toString, customPartitionLocations, outputColumns),
+            committerOutputPath.toString, customPartitionLocations, outputColumns),
           hadoopConf = hadoopConf,
           partitionColumns = partitionColumns,
           bucketSpec = bucketSpec,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SQLHadoopMapReduceCommitProtocol.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SQLHadoopMapReduceCommitProtocol.scala
index 39c594a9bc618..144be2316f091 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SQLHadoopMapReduceCommitProtocol.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SQLHadoopMapReduceCommitProtocol.scala
@@ -55,7 +55,8 @@ class SQLHadoopMapReduceCommitProtocol(
         // The specified output committer is a FileOutputCommitter.
         // So, we will use the FileOutputCommitter-specified constructor.
         val ctor = clazz.getDeclaredConstructor(classOf[Path], classOf[TaskAttemptContext])
-        committer = ctor.newInstance(new Path(path), context)
+        val committerOutputPath = if (dynamicPartitionOverwrite) stagingDir else new Path(path)
+        committer = ctor.newInstance(committerOutputPath, context)
       } else {
         // The specified output committer is just an OutputCommitter.
         // So, we will use the no-argument constructor.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala
index 6df1c5db14c26..52825a155e46a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala
@@ -20,7 +20,8 @@ package org.apache.spark.sql.sources
 import java.io.File
 import java.sql.Timestamp
 
-import org.apache.hadoop.mapreduce.TaskAttemptContext
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapreduce.{JobContext, TaskAttemptContext}
 
 import org.apache.spark.TestUtils
 import org.apache.spark.internal.Logging
@@ -164,4 +165,48 @@ class PartitionedWriteSuite extends QueryTest with SharedSparkSession {
       assert(e.getMessage.contains("Found duplicate column(s) b, b: `b`;"))
     }
   }
+
+  test("SPARK-27194 SPARK-29302: Fix commit collision in dynamic partition overwrite mode") {
+    withSQLConf(SQLConf.PARTITION_OVERWRITE_MODE.key ->
+      SQLConf.PartitionOverwriteMode.DYNAMIC.toString,
+      SQLConf.FILE_COMMIT_PROTOCOL_CLASS.key ->
+        classOf[PartitionFileExistCommitProtocol].getName) {
+      withTempDir { d =>
+        withTable("t") {
+          sql(
+            s"""
+               | create table t(c1 int, p1 int) using parquet partitioned by (p1)
+               | location '${d.getAbsolutePath}'
+            """.stripMargin)
+
+          val df = Seq((1, 2)).toDF("c1", "p1")
+          df.write
+            .partitionBy("p1")
+            .mode("overwrite")
+            .saveAsTable("t")
+          checkAnswer(sql("select * from t"), df)
+        }
+      }
+    }
+  }
+}
+
+/**
+ * A file commit protocol with pre-created partition file. when try to overwrite partition dir
+ * in dynamic partition mode, FileAlreadyExist exception would raise without SPARK-27194
+ */
+private class PartitionFileExistCommitProtocol(
+    jobId: String,
+    path: String,
+    dynamicPartitionOverwrite: Boolean)
+  extends SQLHadoopMapReduceCommitProtocol(jobId, path, dynamicPartitionOverwrite) {
+  override def setupJob(jobContext: JobContext): Unit = {
+    super.setupJob(jobContext)
+    val stagingDir = new File(new Path(path).toUri.getPath, s".spark-staging-$jobId")
+    stagingDir.mkdirs()
+    val stagingPartDir = new File(stagingDir, "p1=2")
+    stagingPartDir.mkdirs()
+    val conflictTaskFile = new File(stagingPartDir, s"part-00000-$jobId.c000.snappy.parquet")
+    conflictTaskFile.createNewFile()
+  }
 }

From 6f68ccf532ec3fdd7224ba05c52bce58372572e9 Mon Sep 17 00:00:00 2001
From: Ryan Blue <blue@apache.org>
Date: Wed, 25 Nov 2020 15:09:02 +0000
Subject: [PATCH 0572/1009] [SPARK-31257][SPARK-33561][SQL] Unify create table
 syntax

### What changes were proposed in this pull request?

* Unify the create table syntax in the parser by merging Hive and DataSource clauses
* Add `SerdeInfo` and `external` boolean to statement plans and update AstBuilder to produce them
* Add conversion from create statement plan to v1 create plans in ResolveSessionCatalog
* Support new statement clauses in ResolveCatalogs conversion to v2 create plans
* Remove SparkSqlParser rules for Hive syntax
* Add "option." namespace to distinguish SERDEPROPERTIES and OPTIONS in table properties

### Why are the changes needed?

* Current behavior is confusing.
* A way to pass the Hive create options to DSv2 is needed for a Hive source.

### Does this PR introduce any user-facing change?

Not by default, but v2 sources will be able to handle STORED AS and other Hive clauses.

### How was this patch tested?

Existing tests validate there are no behavior changes.

Update unit tests for using a statement plan for Hive create syntax:
* Move create tests from spark-sql DDLParserSuite into PlanResolutionSuite
* Add parser tests to spark-catalyst DDLParserSuite

Closes #28026 from rdblue/unify-create-table.

Lead-authored-by: Ryan Blue <blue@apache.org>
Co-authored-by: Wenchen Fan <cloud0fan@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/parser/SqlBase.g4      |  29 +-
 .../sql/connector/catalog/TableCatalog.java   |  10 +
 .../catalyst/analysis/ResolveCatalogs.scala   |  16 +-
 .../sql/catalyst/parser/AstBuilder.scala      | 331 +++++++--
 .../catalyst/plans/logical/statements.scala   |  81 +++
 .../sql/connector/catalog/CatalogV2Util.scala |  55 +-
 .../sql/catalyst/parser/DDLParserSuite.scala  | 348 ++++++++-
 .../apache/spark/sql/DataFrameWriter.scala    |   5 +-
 .../apache/spark/sql/DataFrameWriterV2.scala  |   5 +-
 .../analysis/ResolveSessionCatalog.scala      | 111 ++-
 .../spark/sql/execution/SparkSqlParser.scala  | 394 ++---------
 .../datasources/v2/V2SessionCatalog.scala     |   8 +-
 .../sql/connector/DataSourceV2SQLSuite.scala  |   4 +-
 .../sql/execution/SparkSqlParserSuite.scala   | 129 +---
 .../execution/command/DDLParserSuite.scala    | 524 +-------------
 .../command/PlanResolutionSuite.scala         | 660 +++++++++++++++++-
 .../sources/CreateTableAsSelectSuite.scala    |   4 +-
 .../sql/hive/execution/HiveDDLSuite.scala     |  24 +-
 .../sql/hive/execution/HiveSerDeSuite.scala   |   7 +-
 .../sql/hive/execution/SQLQuerySuite.scala    |   3 +-
 20 files changed, 1626 insertions(+), 1122 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 6b6b751cc3c15..5d17028c32ae2 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -119,20 +119,9 @@ statement
         (RESTRICT | CASCADE)?                                          #dropNamespace
     | SHOW (DATABASES | NAMESPACES) ((FROM | IN) multipartIdentifier)?
         (LIKE? pattern=STRING)?                                        #showNamespaces
-    | createTableHeader ('(' colTypeList ')')? tableProvider
+    | createTableHeader ('(' colTypeList ')')? tableProvider?
         createTableClauses
         (AS? query)?                                                   #createTable
-    | createTableHeader ('(' columns=colTypeList ')')?
-        (commentSpec |
-        (PARTITIONED BY '(' partitionColumns=colTypeList ')' |
-        PARTITIONED BY partitionColumnNames=identifierList) |
-        bucketSpec |
-        skewSpec |
-        rowFormat |
-        createFileFormat |
-        locationSpec |
-        (TBLPROPERTIES tableProps=tablePropertyList))*
-        (AS? query)?                                                   #createHiveTable
     | CREATE TABLE (IF NOT EXISTS)? target=tableIdentifier
         LIKE source=tableIdentifier
         (tableProvider |
@@ -140,7 +129,7 @@ statement
         createFileFormat |
         locationSpec |
         (TBLPROPERTIES tableProps=tablePropertyList))*                 #createTableLike
-    | replaceTableHeader ('(' colTypeList ')')? tableProvider
+    | replaceTableHeader ('(' colTypeList ')')? tableProvider?
         createTableClauses
         (AS? query)?                                                   #replaceTable
     | ANALYZE TABLE multipartIdentifier partitionSpec? COMPUTE STATISTICS
@@ -393,8 +382,11 @@ tableProvider
 
 createTableClauses
     :((OPTIONS options=tablePropertyList) |
-     (PARTITIONED BY partitioning=transformList) |
+     (PARTITIONED BY partitioning=partitionFieldList) |
+     skewSpec |
      bucketSpec |
+     rowFormat |
+     createFileFormat |
      locationSpec |
      commentSpec |
      (TBLPROPERTIES tableProps=tablePropertyList))*
@@ -741,8 +733,13 @@ namedExpressionSeq
     : namedExpression (',' namedExpression)*
     ;
 
-transformList
-    : '(' transforms+=transform (',' transforms+=transform)* ')'
+partitionFieldList
+    : '(' fields+=partitionField (',' fields+=partitionField)* ')'
+    ;
+
+partitionField
+    : transform  #partitionTransform
+    | colType    #partitionColumn
     ;
 
 transform
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
index 92079d127b1e3..52a74ab9dd9f5 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
@@ -46,6 +46,11 @@ public interface TableCatalog extends CatalogPlugin {
    */
   String PROP_LOCATION = "location";
 
+  /**
+   * A reserved property to specify a table was created with EXTERNAL.
+   */
+  String PROP_EXTERNAL = "external";
+
   /**
    * A reserved property to specify the description of the table.
    */
@@ -61,6 +66,11 @@ public interface TableCatalog extends CatalogPlugin {
    */
   String PROP_OWNER = "owner";
 
+  /**
+   * A prefix used to pass OPTIONS in table properties
+   */
+  String OPTION_PREFIX = "option.";
+
   /**
    * List the tables in a namespace from the catalog.
    * <p>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
index deeb8215d22c6..7354d2478b7c8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
@@ -143,7 +143,7 @@ class ResolveCatalogs(val catalogManager: CatalogManager)
       RenameTable(catalog.asTableCatalog, oldName.asIdentifier, newNameParts.asIdentifier)
 
     case c @ CreateTableStatement(
-         NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _) =>
+         NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _, _) =>
       assertNoNullTypeInSchema(c.tableSchema)
       assertNoCharTypeInSchema(c.tableSchema)
       CreateV2Table(
@@ -152,11 +152,11 @@ class ResolveCatalogs(val catalogManager: CatalogManager)
         c.tableSchema,
         // convert the bucket spec and add it as a transform
         c.partitioning ++ c.bucketSpec.map(_.asTransform),
-        convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider),
+        convertTableProperties(c),
         ignoreIfExists = c.ifNotExists)
 
     case c @ CreateTableAsSelectStatement(
-         NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _) =>
+         NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _, _, _) =>
       if (c.asSelect.resolved) {
         assertNoNullTypeInSchema(c.asSelect.schema)
       }
@@ -166,12 +166,12 @@ class ResolveCatalogs(val catalogManager: CatalogManager)
         // convert the bucket spec and add it as a transform
         c.partitioning ++ c.bucketSpec.map(_.asTransform),
         c.asSelect,
-        convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider),
+        convertTableProperties(c),
         writeOptions = c.writeOptions,
         ignoreIfExists = c.ifNotExists)
 
     case c @ ReplaceTableStatement(
-         NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _) =>
+         NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _) =>
       assertNoNullTypeInSchema(c.tableSchema)
       assertNoCharTypeInSchema(c.tableSchema)
       ReplaceTable(
@@ -180,11 +180,11 @@ class ResolveCatalogs(val catalogManager: CatalogManager)
         c.tableSchema,
         // convert the bucket spec and add it as a transform
         c.partitioning ++ c.bucketSpec.map(_.asTransform),
-        convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider),
+        convertTableProperties(c),
         orCreate = c.orCreate)
 
     case c @ ReplaceTableAsSelectStatement(
-         NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _) =>
+         NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _, _) =>
       if (c.asSelect.resolved) {
         assertNoNullTypeInSchema(c.asSelect.schema)
       }
@@ -194,7 +194,7 @@ class ResolveCatalogs(val catalogManager: CatalogManager)
         // convert the bucket spec and add it as a transform
         c.partitioning ++ c.bucketSpec.map(_.asTransform),
         c.asSelect,
-        convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider),
+        convertTableProperties(c),
         writeOptions = c.writeOptions,
         orCreate = c.orCreate)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 5f8394c525949..25423e510157a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -2459,10 +2459,22 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
 
   /**
    * Type to keep track of table clauses:
-   * (partitioning, bucketSpec, properties, options, location, comment).
+   * - partition transforms
+   * - partition columns
+   * - bucketSpec
+   * - properties
+   * - options
+   * - location
+   * - comment
+   * - serde
+   *
+   * Note: Partition transforms are based on existing table schema definition. It can be simple
+   * column names, or functions like `year(date_col)`. Partition columns are column names with data
+   * types like `i INT`, which should be appended to the existing table schema.
    */
-  type TableClauses = (Seq[Transform], Option[BucketSpec], Map[String, String],
-    Map[String, String], Option[String], Option[String])
+  type TableClauses = (
+      Seq[Transform], Seq[StructField], Option[BucketSpec], Map[String, String],
+      Map[String, String], Option[String], Option[String], Option[SerdeInfo])
 
   /**
    * Validate a create table statement and return the [[TableIdentifier]].
@@ -2495,9 +2507,22 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
   }
 
   /**
-   * Parse a list of transforms.
+   * Parse a list of transforms or columns.
    */
-  override def visitTransformList(ctx: TransformListContext): Seq[Transform] = withOrigin(ctx) {
+  override def visitPartitionFieldList(
+      ctx: PartitionFieldListContext): (Seq[Transform], Seq[StructField]) = withOrigin(ctx) {
+    val (transforms, columns) = ctx.fields.asScala.map {
+      case transform: PartitionTransformContext =>
+        (Some(visitPartitionTransform(transform)), None)
+      case field: PartitionColumnContext =>
+        (None, Some(visitColType(field.colType)))
+    }.unzip
+
+    (transforms.flatten.toSeq, columns.flatten.toSeq)
+  }
+
+  override def visitPartitionTransform(
+      ctx: PartitionTransformContext): Transform = withOrigin(ctx) {
     def getFieldReference(
         ctx: ApplyTransformContext,
         arg: V2Expression): FieldReference = {
@@ -2524,7 +2549,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
       }
     }
 
-    ctx.transforms.asScala.map {
+    ctx.transform match {
       case identityCtx: IdentityTransformContext =>
         IdentityTransform(FieldReference(typedVisit[Seq[String]](identityCtx.qualifiedName)))
 
@@ -2563,7 +2588,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
           case name =>
             ApplyTransform(name, arguments)
         }
-    }.toSeq
+    }
   }
 
   /**
@@ -2763,16 +2788,157 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
     (filtered, path)
   }
 
+  /**
+   * Create a [[SerdeInfo]] for creating tables.
+   *
+   * Format: STORED AS (name | INPUTFORMAT input_format OUTPUTFORMAT output_format)
+   */
+  override def visitCreateFileFormat(ctx: CreateFileFormatContext): SerdeInfo = withOrigin(ctx) {
+    (ctx.fileFormat, ctx.storageHandler) match {
+      // Expected format: INPUTFORMAT input_format OUTPUTFORMAT output_format
+      case (c: TableFileFormatContext, null) =>
+        SerdeInfo(formatClasses = Some(FormatClasses(string(c.inFmt), string(c.outFmt))))
+      // Expected format: SEQUENCEFILE | TEXTFILE | RCFILE | ORC | PARQUET | AVRO
+      case (c: GenericFileFormatContext, null) =>
+        SerdeInfo(storedAs = Some(c.identifier.getText))
+      case (null, storageHandler) =>
+        operationNotAllowed("STORED BY", ctx)
+      case _ =>
+        throw new ParseException("Expected either STORED AS or STORED BY, not both", ctx)
+    }
+  }
+
+  /**
+   * Create a [[SerdeInfo]] used for creating tables.
+   *
+   * Example format:
+   * {{{
+   *   SERDE serde_name [WITH SERDEPROPERTIES (k1=v1, k2=v2, ...)]
+   * }}}
+   *
+   * OR
+   *
+   * {{{
+   *   DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]]
+   *   [COLLECTION ITEMS TERMINATED BY char]
+   *   [MAP KEYS TERMINATED BY char]
+   *   [LINES TERMINATED BY char]
+   *   [NULL DEFINED AS char]
+   * }}}
+   */
+  def visitRowFormat(ctx: RowFormatContext): SerdeInfo = withOrigin(ctx) {
+    ctx match {
+      case serde: RowFormatSerdeContext => visitRowFormatSerde(serde)
+      case delimited: RowFormatDelimitedContext => visitRowFormatDelimited(delimited)
+    }
+  }
+
+  /**
+   * Create SERDE row format name and properties pair.
+   */
+  override def visitRowFormatSerde(ctx: RowFormatSerdeContext): SerdeInfo = withOrigin(ctx) {
+    import ctx._
+    SerdeInfo(
+      serde = Some(string(name)),
+      serdeProperties = Option(tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty))
+  }
+
+  /**
+   * Create a delimited row format properties object.
+   */
+  override def visitRowFormatDelimited(
+      ctx: RowFormatDelimitedContext): SerdeInfo = withOrigin(ctx) {
+    // Collect the entries if any.
+    def entry(key: String, value: Token): Seq[(String, String)] = {
+      Option(value).toSeq.map(x => key -> string(x))
+    }
+    // TODO we need proper support for the NULL format.
+    val entries =
+      entry("field.delim", ctx.fieldsTerminatedBy) ++
+          entry("serialization.format", ctx.fieldsTerminatedBy) ++
+          entry("escape.delim", ctx.escapedBy) ++
+          // The following typo is inherited from Hive...
+          entry("colelction.delim", ctx.collectionItemsTerminatedBy) ++
+          entry("mapkey.delim", ctx.keysTerminatedBy) ++
+          Option(ctx.linesSeparatedBy).toSeq.map { token =>
+            val value = string(token)
+            validate(
+              value == "\n",
+              s"LINES TERMINATED BY only supports newline '\\n' right now: $value",
+              ctx)
+            "line.delim" -> value
+          }
+    SerdeInfo(serdeProperties = entries.toMap)
+  }
+
+  /**
+   * Throw a [[ParseException]] if the user specified incompatible SerDes through ROW FORMAT
+   * and STORED AS.
+   *
+   * The following are allowed. Anything else is not:
+   *   ROW FORMAT SERDE ... STORED AS [SEQUENCEFILE | RCFILE | TEXTFILE]
+   *   ROW FORMAT DELIMITED ... STORED AS TEXTFILE
+   *   ROW FORMAT ... STORED AS INPUTFORMAT ... OUTPUTFORMAT ...
+   */
+  protected def validateRowFormatFileFormat(
+      rowFormatCtx: RowFormatContext,
+      createFileFormatCtx: CreateFileFormatContext,
+      parentCtx: ParserRuleContext): Unit = {
+    if (rowFormatCtx == null || createFileFormatCtx == null) {
+      return
+    }
+    (rowFormatCtx, createFileFormatCtx.fileFormat) match {
+      case (_, ffTable: TableFileFormatContext) => // OK
+      case (rfSerde: RowFormatSerdeContext, ffGeneric: GenericFileFormatContext) =>
+        ffGeneric.identifier.getText.toLowerCase(Locale.ROOT) match {
+          case ("sequencefile" | "textfile" | "rcfile") => // OK
+          case fmt =>
+            operationNotAllowed(
+              s"ROW FORMAT SERDE is incompatible with format '$fmt', which also specifies a serde",
+              parentCtx)
+        }
+      case (rfDelimited: RowFormatDelimitedContext, ffGeneric: GenericFileFormatContext) =>
+        ffGeneric.identifier.getText.toLowerCase(Locale.ROOT) match {
+          case "textfile" => // OK
+          case fmt => operationNotAllowed(
+            s"ROW FORMAT DELIMITED is only compatible with 'textfile', not '$fmt'", parentCtx)
+        }
+      case _ =>
+        // should never happen
+        def str(ctx: ParserRuleContext): String = {
+          (0 until ctx.getChildCount).map { i => ctx.getChild(i).getText }.mkString(" ")
+        }
+        operationNotAllowed(
+          s"Unexpected combination of ${str(rowFormatCtx)} and ${str(createFileFormatCtx)}",
+          parentCtx)
+    }
+  }
+
+  protected def validateRowFormatFileFormat(
+      rowFormatCtx: Seq[RowFormatContext],
+      createFileFormatCtx: Seq[CreateFileFormatContext],
+      parentCtx: ParserRuleContext): Unit = {
+    if (rowFormatCtx.size == 1 && createFileFormatCtx.size == 1) {
+      validateRowFormatFileFormat(rowFormatCtx.head, createFileFormatCtx.head, parentCtx)
+    }
+  }
+
   override def visitCreateTableClauses(ctx: CreateTableClausesContext): TableClauses = {
     checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
     checkDuplicateClauses(ctx.OPTIONS, "OPTIONS", ctx)
     checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED BY", ctx)
+    checkDuplicateClauses(ctx.createFileFormat, "STORED AS/BY", ctx)
+    checkDuplicateClauses(ctx.rowFormat, "ROW FORMAT", ctx)
     checkDuplicateClauses(ctx.commentSpec(), "COMMENT", ctx)
     checkDuplicateClauses(ctx.bucketSpec(), "CLUSTERED BY", ctx)
     checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx)
 
-    val partitioning: Seq[Transform] =
-      Option(ctx.partitioning).map(visitTransformList).getOrElse(Nil)
+    if (ctx.skewSpec.size > 0) {
+      operationNotAllowed("CREATE TABLE ... SKEWED BY", ctx)
+    }
+
+    val (partTransforms, partCols) =
+      Option(ctx.partitioning).map(visitPartitionFieldList).getOrElse((Nil, Nil))
     val bucketSpec = ctx.bucketSpec().asScala.headOption.map(visitBucketSpec)
     val properties = Option(ctx.tableProps).map(visitPropertyKeyValues).getOrElse(Map.empty)
     val cleanedProperties = cleanTableProperties(ctx, properties)
@@ -2780,7 +2946,45 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
     val location = visitLocationSpecList(ctx.locationSpec())
     val (cleanedOptions, newLocation) = cleanTableOptions(ctx, options, location)
     val comment = visitCommentSpecList(ctx.commentSpec())
-    (partitioning, bucketSpec, cleanedProperties, cleanedOptions, newLocation, comment)
+    val serdeInfo = getSerdeInfo(ctx.rowFormat.asScala, ctx.createFileFormat.asScala, ctx)
+    (partTransforms, partCols, bucketSpec, cleanedProperties, cleanedOptions, newLocation, comment,
+      serdeInfo)
+  }
+
+  protected def getSerdeInfo(
+      rowFormatCtx: Seq[RowFormatContext],
+      createFileFormatCtx: Seq[CreateFileFormatContext],
+      ctx: ParserRuleContext,
+      skipCheck: Boolean = false): Option[SerdeInfo] = {
+    if (!skipCheck) validateRowFormatFileFormat(rowFormatCtx, createFileFormatCtx, ctx)
+    val rowFormatSerdeInfo = rowFormatCtx.map(visitRowFormat)
+    val fileFormatSerdeInfo = createFileFormatCtx.map(visitCreateFileFormat)
+    (fileFormatSerdeInfo ++ rowFormatSerdeInfo).reduceLeftOption((l, r) => l.merge(r))
+  }
+
+  private def partitionExpressions(
+      partTransforms: Seq[Transform],
+      partCols: Seq[StructField],
+      ctx: ParserRuleContext): Seq[Transform] = {
+    if (partTransforms.nonEmpty) {
+      if (partCols.nonEmpty) {
+        val references = partTransforms.map(_.describe()).mkString(", ")
+        val columns = partCols
+          .map(field => s"${field.name} ${field.dataType.simpleString}")
+          .mkString(", ")
+        operationNotAllowed(
+          s"""PARTITION BY: Cannot mix partition expressions and partition columns:
+             |Expressions: $references
+             |Columns: $columns""".stripMargin, ctx)
+
+      }
+      partTransforms
+    } else {
+      // columns were added to create the schema. convert to column references
+      partCols.map { column =>
+        IdentityTransform(FieldReference(Seq(column.name)))
+      }
+    }
   }
 
   /**
@@ -2789,13 +2993,15 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    * Expected format:
    * {{{
    *   CREATE [TEMPORARY] TABLE [IF NOT EXISTS] [db_name.]table_name
-   *   USING table_provider
+   *   [USING table_provider]
    *   create_table_clauses
    *   [[AS] select_statement];
    *
    *   create_table_clauses (order insensitive):
+   *     [PARTITIONED BY (partition_fields)]
    *     [OPTIONS table_property_list]
-   *     [PARTITIONED BY (col_name, transform(col_name), transform(constant, col_name), ...)]
+   *     [ROW FORMAT row_format]
+   *     [STORED AS file_format]
    *     [CLUSTERED BY (col_name, col_name, ...)
    *       [SORTED BY (col_name [ASC|DESC], ...)]
    *       INTO num_buckets BUCKETS
@@ -2803,40 +3009,55 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    *     [LOCATION path]
    *     [COMMENT table_comment]
    *     [TBLPROPERTIES (property_name=property_value, ...)]
+   *
+   *   partition_fields:
+   *     col_name, transform(col_name), transform(constant, col_name), ... |
+   *     col_name data_type [NOT NULL] [COMMENT col_comment], ...
    * }}}
    */
   override def visitCreateTable(ctx: CreateTableContext): LogicalPlan = withOrigin(ctx) {
     val (table, temp, ifNotExists, external) = visitCreateTableHeader(ctx.createTableHeader)
-    if (external) {
-      operationNotAllowed("CREATE EXTERNAL TABLE ...", ctx)
-    }
-    val schema = Option(ctx.colTypeList()).map(createSchema)
+
+    val columns = Option(ctx.colTypeList()).map(visitColTypeList).getOrElse(Nil)
     val provider = Option(ctx.tableProvider).map(_.multipartIdentifier.getText)
-    val (partitioning, bucketSpec, properties, options, location, comment) =
+    val (partTransforms, partCols, bucketSpec, properties, options, location, comment, serdeInfo) =
       visitCreateTableClauses(ctx.createTableClauses())
 
-    Option(ctx.query).map(plan) match {
-      case Some(_) if temp =>
-        operationNotAllowed("CREATE TEMPORARY TABLE ... USING ... AS query", ctx)
+    if (provider.isDefined && serdeInfo.isDefined) {
+      operationNotAllowed(s"CREATE TABLE ... USING ... ${serdeInfo.get.describe}", ctx)
+    }
+
+    if (temp) {
+      val asSelect = if (ctx.query == null) "" else " AS ..."
+      operationNotAllowed(
+        s"CREATE TEMPORARY TABLE ...$asSelect, use CREATE TEMPORARY VIEW instead", ctx)
+    }
 
-      case Some(_) if schema.isDefined =>
+    val partitioning = partitionExpressions(partTransforms, partCols, ctx)
+
+    Option(ctx.query).map(plan) match {
+      case Some(_) if columns.nonEmpty =>
         operationNotAllowed(
           "Schema may not be specified in a Create Table As Select (CTAS) statement",
           ctx)
 
+      case Some(_) if partCols.nonEmpty =>
+        // non-reference partition columns are not allowed because schema can't be specified
+        operationNotAllowed(
+          "Partition column types may not be specified in Create Table As Select (CTAS)",
+          ctx)
+
       case Some(query) =>
         CreateTableAsSelectStatement(
           table, query, partitioning, bucketSpec, properties, provider, options, location, comment,
-          writeOptions = Map.empty, ifNotExists = ifNotExists)
-
-      case None if temp =>
-        // CREATE TEMPORARY TABLE ... USING ... is not supported by the catalyst parser.
-        // Use CREATE TEMPORARY VIEW ... USING ... instead.
-        operationNotAllowed("CREATE TEMPORARY TABLE IF NOT EXISTS", ctx)
+          writeOptions = Map.empty, serdeInfo, external = external, ifNotExists = ifNotExists)
 
       case _ =>
-        CreateTableStatement(table, schema.getOrElse(new StructType), partitioning, bucketSpec,
-          properties, provider, options, location, comment, ifNotExists = ifNotExists)
+        // Note: table schema includes both the table columns list and the partition columns
+        // with data type.
+        val schema = StructType(columns ++ partCols)
+        CreateTableStatement(table, schema, partitioning, bucketSpec, properties, provider,
+          options, location, comment, serdeInfo, external = external, ifNotExists = ifNotExists)
     }
   }
 
@@ -2846,13 +3067,13 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    * Expected format:
    * {{{
    *   [CREATE OR] REPLACE TABLE [db_name.]table_name
-   *   USING table_provider
+   *   [USING table_provider]
    *   replace_table_clauses
    *   [[AS] select_statement];
    *
    *   replace_table_clauses (order insensitive):
    *     [OPTIONS table_property_list]
-   *     [PARTITIONED BY (col_name, transform(col_name), transform(constant, col_name), ...)]
+   *     [PARTITIONED BY (partition_fields)]
    *     [CLUSTERED BY (col_name, col_name, ...)
    *       [SORTED BY (col_name [ASC|DESC], ...)]
    *       INTO num_buckets BUCKETS
@@ -2860,33 +3081,63 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    *     [LOCATION path]
    *     [COMMENT table_comment]
    *     [TBLPROPERTIES (property_name=property_value, ...)]
+   *
+   *   partition_fields:
+   *     col_name, transform(col_name), transform(constant, col_name), ... |
+   *     col_name data_type [NOT NULL] [COMMENT col_comment], ...
    * }}}
    */
   override def visitReplaceTable(ctx: ReplaceTableContext): LogicalPlan = withOrigin(ctx) {
-    val (table, _, ifNotExists, external) = visitReplaceTableHeader(ctx.replaceTableHeader)
+    val (table, temp, ifNotExists, external) = visitReplaceTableHeader(ctx.replaceTableHeader)
+    val orCreate = ctx.replaceTableHeader().CREATE() != null
+
+    if (temp) {
+      val action = if (orCreate) "CREATE OR REPLACE" else "REPLACE"
+      operationNotAllowed(s"$action TEMPORARY TABLE ..., use $action TEMPORARY VIEW instead.", ctx)
+    }
+
     if (external) {
-      operationNotAllowed("REPLACE EXTERNAL TABLE ... USING", ctx)
+      operationNotAllowed("REPLACE EXTERNAL TABLE ...", ctx)
+    }
+
+    if (ifNotExists) {
+      operationNotAllowed("REPLACE ... IF NOT EXISTS, use CREATE IF NOT EXISTS instead", ctx)
     }
 
-    val (partitioning, bucketSpec, properties, options, location, comment) =
+    val (partTransforms, partCols, bucketSpec, properties, options, location, comment, serdeInfo) =
       visitCreateTableClauses(ctx.createTableClauses())
-    val schema = Option(ctx.colTypeList()).map(createSchema)
+    val columns = Option(ctx.colTypeList()).map(visitColTypeList).getOrElse(Nil)
     val provider = Option(ctx.tableProvider).map(_.multipartIdentifier.getText)
-    val orCreate = ctx.replaceTableHeader().CREATE() != null
+
+    if (provider.isDefined && serdeInfo.isDefined) {
+      operationNotAllowed(s"CREATE TABLE ... USING ... ${serdeInfo.get.describe}", ctx)
+    }
+
+    val partitioning = partitionExpressions(partTransforms, partCols, ctx)
 
     Option(ctx.query).map(plan) match {
-      case Some(_) if schema.isDefined =>
+      case Some(_) if columns.nonEmpty =>
         operationNotAllowed(
           "Schema may not be specified in a Replace Table As Select (RTAS) statement",
           ctx)
 
+      case Some(_) if partCols.nonEmpty =>
+        // non-reference partition columns are not allowed because schema can't be specified
+        operationNotAllowed(
+          "Partition column types may not be specified in Replace Table As Select (RTAS)",
+          ctx)
+
       case Some(query) =>
         ReplaceTableAsSelectStatement(table, query, partitioning, bucketSpec, properties,
-          provider, options, location, comment, writeOptions = Map.empty, orCreate = orCreate)
+          provider, options, location, comment, writeOptions = Map.empty, serdeInfo,
+          orCreate = orCreate)
 
       case _ =>
-        ReplaceTableStatement(table, schema.getOrElse(new StructType), partitioning,
-          bucketSpec, properties, provider, options, location, comment, orCreate = orCreate)
+        // Note: table schema includes both the table columns list and the partition columns
+        // with data type.
+        val schema = StructType(columns ++ partCols)
+        ReplaceTableStatement(table, schema, partitioning, bucketSpec, properties, provider,
+          options, location, comment, serdeInfo, orCreate = orCreate)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
index 3660e8a95a7f6..281d57b3648f4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
@@ -53,6 +53,81 @@ abstract class ParsedStatement extends LogicalPlan {
   final override lazy val resolved = false
 }
 
+/**
+ * Type to keep track of Hive serde info
+ */
+case class SerdeInfo(
+    storedAs: Option[String] = None,
+    formatClasses: Option[FormatClasses] = None,
+    serde: Option[String] = None,
+    serdeProperties: Map[String, String] = Map.empty) {
+  // this uses assertions because validation is done in validateRowFormatFileFormat etc.
+  assert(storedAs.isEmpty || formatClasses.isEmpty,
+    "Cannot specify both STORED AS and INPUTFORMAT/OUTPUTFORMAT")
+
+  def describe: String = {
+    val serdeString = if (serde.isDefined || serdeProperties.nonEmpty) {
+      "ROW FORMAT " + serde.map(sd => s"SERDE $sd").getOrElse("DELIMITED")
+    } else {
+      ""
+    }
+
+    this match {
+      case SerdeInfo(Some(storedAs), _, _, _) =>
+        s"STORED AS $storedAs $serdeString"
+      case SerdeInfo(_, Some(formatClasses), _, _) =>
+        s"STORED AS $formatClasses $serdeString"
+      case _ =>
+        serdeString
+    }
+  }
+
+  def merge(other: SerdeInfo): SerdeInfo = {
+    def getOnly[T](desc: String, left: Option[T], right: Option[T]): Option[T] = {
+      (left, right) match {
+        case (Some(l), Some(r)) =>
+          assert(l == r, s"Conflicting $desc values: $l != $r")
+          left
+        case (Some(_), _) =>
+          left
+        case (_, Some(_)) =>
+          right
+        case _ =>
+          None
+      }
+    }
+
+    SerdeInfo.checkSerdePropMerging(serdeProperties, other.serdeProperties)
+    SerdeInfo(
+      getOnly("STORED AS", storedAs, other.storedAs),
+      getOnly("INPUTFORMAT/OUTPUTFORMAT", formatClasses, other.formatClasses),
+      getOnly("SERDE", serde, other.serde),
+      serdeProperties ++ other.serdeProperties)
+  }
+}
+
+case class FormatClasses(input: String, output: String) {
+  override def toString: String = s"INPUTFORMAT $input OUTPUTFORMAT $output"
+}
+
+object SerdeInfo {
+  val empty: SerdeInfo = SerdeInfo(None, None, None, Map.empty)
+
+  def checkSerdePropMerging(
+      props1: Map[String, String], props2: Map[String, String]): Unit = {
+    val conflictKeys = props1.keySet.intersect(props2.keySet)
+    if (conflictKeys.nonEmpty) {
+      throw new UnsupportedOperationException(
+        s"""
+          |Cannot safely merge SERDEPROPERTIES:
+          |${props1.map { case (k, v) => s"$k=$v" }.mkString("{", ",", "}")}
+          |${props2.map { case (k, v) => s"$k=$v" }.mkString("{", ",", "}")}
+          |The conflict keys: ${conflictKeys.mkString(", ")}
+          |""".stripMargin)
+    }
+  }
+}
+
 /**
  * A CREATE TABLE command, as parsed from SQL.
  *
@@ -68,6 +143,8 @@ case class CreateTableStatement(
     options: Map[String, String],
     location: Option[String],
     comment: Option[String],
+    serde: Option[SerdeInfo],
+    external: Boolean,
     ifNotExists: Boolean) extends ParsedStatement
 
 /**
@@ -84,6 +161,8 @@ case class CreateTableAsSelectStatement(
     location: Option[String],
     comment: Option[String],
     writeOptions: Map[String, String],
+    serde: Option[SerdeInfo],
+    external: Boolean,
     ifNotExists: Boolean) extends ParsedStatement {
 
   override def children: Seq[LogicalPlan] = Seq(asSelect)
@@ -119,6 +198,7 @@ case class ReplaceTableStatement(
     options: Map[String, String],
     location: Option[String],
     comment: Option[String],
+    serde: Option[SerdeInfo],
     orCreate: Boolean) extends ParsedStatement
 
 /**
@@ -135,6 +215,7 @@ case class ReplaceTableAsSelectStatement(
     location: Option[String],
     comment: Option[String],
     writeOptions: Map[String, String],
+    serde: Option[SerdeInfo],
     orCreate: Boolean) extends ParsedStatement {
 
   override def children: Seq[LogicalPlan] = Seq(asSelect)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
index 1a3a7207c6ca9..b6dc4f61c8588 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
@@ -25,7 +25,7 @@ import scala.collection.JavaConverters._
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{NamedRelation, NoSuchDatabaseException, NoSuchNamespaceException, NoSuchTableException, UnresolvedV2Relation}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.catalyst.plans.logical.AlterTable
+import org.apache.spark.sql.catalyst.plans.logical.{AlterTable, CreateTableAsSelectStatement, CreateTableStatement, ReplaceTableAsSelectStatement, ReplaceTableStatement, SerdeInfo}
 import org.apache.spark.sql.connector.catalog.TableChange._
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.types.{ArrayType, DataType, HIVE_TYPE_STRING, HiveStringType, MapType, NullType, StructField, StructType}
@@ -295,18 +295,65 @@ private[sql] object CatalogV2Util {
     catalog.name().equalsIgnoreCase(CatalogManager.SESSION_CATALOG_NAME)
   }
 
-  def convertTableProperties(
+  def convertTableProperties(c: CreateTableStatement): Map[String, String] = {
+    convertTableProperties(
+      c.properties, c.options, c.serde, c.location, c.comment, c.provider, c.external)
+  }
+
+  def convertTableProperties(c: CreateTableAsSelectStatement): Map[String, String] = {
+    convertTableProperties(
+      c.properties, c.options, c.serde, c.location, c.comment, c.provider, c.external)
+  }
+
+  def convertTableProperties(r: ReplaceTableStatement): Map[String, String] = {
+    convertTableProperties(r.properties, r.options, r.serde, r.location, r.comment, r.provider)
+  }
+
+  def convertTableProperties(r: ReplaceTableAsSelectStatement): Map[String, String] = {
+    convertTableProperties(r.properties, r.options, r.serde, r.location, r.comment, r.provider)
+  }
+
+  private def convertTableProperties(
       properties: Map[String, String],
       options: Map[String, String],
+      serdeInfo: Option[SerdeInfo],
       location: Option[String],
       comment: Option[String],
-      provider: Option[String]): Map[String, String] = {
-    properties ++ options ++
+      provider: Option[String],
+      external: Boolean = false): Map[String, String] = {
+    properties ++
+      options ++ // to make the transition to the "option." prefix easier, add both
+      options.map { case (key, value) => TableCatalog.OPTION_PREFIX + key -> value } ++
+      convertToProperties(serdeInfo) ++
+      (if (external) Some(TableCatalog.PROP_EXTERNAL -> "true") else None) ++
       provider.map(TableCatalog.PROP_PROVIDER -> _) ++
       comment.map(TableCatalog.PROP_COMMENT -> _) ++
       location.map(TableCatalog.PROP_LOCATION -> _)
   }
 
+  /**
+   * Converts Hive Serde info to table properties. The mapped property keys are:
+   *  - INPUTFORMAT/OUTPUTFORMAT: hive.input/output-format
+   *  - STORED AS: hive.stored-as
+   *  - ROW FORMAT SERDE: hive.serde
+   *  - SERDEPROPERTIES: add "option." prefix
+   */
+  private def convertToProperties(serdeInfo: Option[SerdeInfo]): Map[String, String] = {
+    serdeInfo match {
+      case Some(s) =>
+        s.formatClasses.map { f =>
+          Map("hive.input-format" -> f.input, "hive.output-format" -> f.output)
+        }.getOrElse(Map.empty) ++
+        s.storedAs.map("hive.stored-as" -> _) ++
+        s.serde.map("hive.serde" -> _) ++
+        s.serdeProperties.map {
+          case (key, value) => TableCatalog.OPTION_PREFIX + key -> value
+        }
+      case None =>
+        Map.empty
+    }
+  }
+
   def withDefaultOwnership(properties: Map[String, String]): Map[String, String] = {
     properties ++ Map(TableCatalog.PROP_OWNER -> Utils.getCurrentUserName())
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index cc3c824befb3e..f650922e75f6e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -63,6 +63,7 @@ class DDLParserSuite extends AnalysisTest {
       Some("parquet"),
       Map.empty[String, String],
       None,
+      None,
       None)
 
     Seq(createSql, replaceSql).foreach { sql =>
@@ -70,7 +71,7 @@ class DDLParserSuite extends AnalysisTest {
     }
 
     intercept("CREATE TABLE my_tab(a: INT COMMENT 'test', b: STRING) USING parquet",
-      "no viable alternative at input")
+      "extraneous input ':'")
   }
 
   test("create/replace table - with IF NOT EXISTS") {
@@ -86,6 +87,7 @@ class DDLParserSuite extends AnalysisTest {
         Some("parquet"),
         Map.empty[String, String],
         None,
+        None,
         None),
       expectedIfNotExists = true)
   }
@@ -106,6 +108,7 @@ class DDLParserSuite extends AnalysisTest {
       Some("parquet"),
       Map.empty[String, String],
       None,
+      None,
       None)
     Seq(createSql, replaceSql).foreach { sql =>
       testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
@@ -160,6 +163,7 @@ class DDLParserSuite extends AnalysisTest {
       Some("parquet"),
       Map.empty[String, String],
       None,
+      None,
       None)
     Seq(createSql, replaceSql).foreach { sql =>
       testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
@@ -182,6 +186,7 @@ class DDLParserSuite extends AnalysisTest {
       Some("parquet"),
       Map.empty[String, String],
       None,
+      None,
       None)
     Seq(createSql, replaceSql).foreach { sql =>
       testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
@@ -200,7 +205,8 @@ class DDLParserSuite extends AnalysisTest {
       Some("parquet"),
       Map.empty[String, String],
       None,
-      Some("abc"))
+      Some("abc"),
+      None)
     Seq(createSql, replaceSql).foreach{ sql =>
       testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
     }
@@ -220,6 +226,7 @@ class DDLParserSuite extends AnalysisTest {
       Some("parquet"),
       Map.empty[String, String],
       None,
+      None,
       None)
     Seq(createSql, replaceSql).foreach { sql =>
       testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
@@ -238,6 +245,7 @@ class DDLParserSuite extends AnalysisTest {
         Some("parquet"),
         Map.empty[String, String],
         Some("/tmp/file"),
+        None,
         None)
     Seq(createSql, replaceSql).foreach { sql =>
       testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
@@ -256,19 +264,309 @@ class DDLParserSuite extends AnalysisTest {
       Some("parquet"),
       Map.empty[String, String],
       None,
+      None,
       None)
     Seq(createSql, replaceSql).foreach { sql =>
       testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
     }
   }
 
+  test("create/replace table - partition column definitions") {
+    val createSql = "CREATE TABLE my_tab (id bigint) PARTITIONED BY (part string)"
+    val replaceSql = "REPLACE TABLE my_tab (id bigint) PARTITIONED BY (part string)"
+    val expectedTableSpec = TableSpec(
+      Seq("my_tab"),
+      Some(new StructType().add("id", LongType).add("part", StringType)),
+      Seq(IdentityTransform(FieldReference("part"))),
+      None,
+      Map.empty[String, String],
+      None,
+      Map.empty[String, String],
+      None,
+      None,
+      None)
+    Seq(createSql, replaceSql).foreach { sql =>
+      testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
+    }
+  }
+
+  test("create/replace table - empty columns list") {
+    val createSql = "CREATE TABLE my_tab PARTITIONED BY (part string)"
+    val replaceSql = "REPLACE TABLE my_tab PARTITIONED BY (part string)"
+    val expectedTableSpec = TableSpec(
+      Seq("my_tab"),
+      Some(new StructType().add("part", StringType)),
+      Seq(IdentityTransform(FieldReference("part"))),
+      None,
+      Map.empty[String, String],
+      None,
+      Map.empty[String, String],
+      None,
+      None,
+      None)
+    Seq(createSql, replaceSql).foreach { sql =>
+      testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
+    }
+  }
+
+  test("create/replace table - using with partition column definitions") {
+    val createSql = "CREATE TABLE my_tab (id bigint) USING parquet PARTITIONED BY (part string)"
+    val replaceSql = "REPLACE TABLE my_tab (id bigint) USING parquet PARTITIONED BY (part string)"
+    val expectedTableSpec = TableSpec(
+      Seq("my_tab"),
+      Some(new StructType().add("id", LongType).add("part", StringType)),
+      Seq(IdentityTransform(FieldReference("part"))),
+      None,
+      Map.empty[String, String],
+      Some("parquet"),
+      Map.empty[String, String],
+      None,
+      None,
+      None)
+    Seq(createSql, replaceSql).foreach { sql =>
+      testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
+    }
+  }
+
+  test("create/replace table - mixed partition references and column definitions") {
+    val createSql = "CREATE TABLE my_tab (id bigint, p1 string) PARTITIONED BY (p1, p2 string)"
+    val replaceSql = createSql.replaceFirst("CREATE", "REPLACE")
+    Seq(createSql, replaceSql).foreach { sql =>
+      assertUnsupported(sql, Seq(
+        "PARTITION BY: Cannot mix partition expressions and partition columns",
+        "Expressions: p1",
+        "Columns: p2 string"))
+    }
+
+    val createSqlWithExpr =
+      "CREATE TABLE my_tab (id bigint, p1 string) PARTITIONED BY (p2 string, truncate(p1, 16))"
+    val replaceSqlWithExpr = createSqlWithExpr.replaceFirst("CREATE", "REPLACE")
+    Seq(createSqlWithExpr, replaceSqlWithExpr).foreach { sql =>
+      assertUnsupported(sql, Seq(
+        "PARTITION BY: Cannot mix partition expressions and partition columns",
+        "Expressions: truncate(p1, 16)",
+        "Columns: p2 string"))
+    }
+  }
+
+  test("create/replace table - stored as") {
+    val createSql =
+      """CREATE TABLE my_tab (id bigint)
+        |PARTITIONED BY (part string)
+        |STORED AS parquet
+        """.stripMargin
+    val replaceSql = createSql.replaceFirst("CREATE", "REPLACE")
+    val expectedTableSpec = TableSpec(
+      Seq("my_tab"),
+      Some(new StructType().add("id", LongType).add("part", StringType)),
+      Seq(IdentityTransform(FieldReference("part"))),
+      None,
+      Map.empty[String, String],
+      None,
+      Map.empty[String, String],
+      None,
+      None,
+      Some(SerdeInfo(storedAs = Some("parquet"))))
+    Seq(createSql, replaceSql).foreach { sql =>
+      testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
+    }
+  }
+
+  test("create/replace table - stored as format with serde") {
+    Seq("sequencefile", "textfile", "rcfile").foreach { format =>
+      val createSql =
+        s"""CREATE TABLE my_tab (id bigint)
+          |PARTITIONED BY (part string)
+          |STORED AS $format
+          |ROW FORMAT SERDE 'customSerde'
+          |WITH SERDEPROPERTIES ('prop'='value')
+        """.stripMargin
+      val replaceSql = createSql.replaceFirst("CREATE", "REPLACE")
+      val expectedTableSpec = TableSpec(
+        Seq("my_tab"),
+        Some(new StructType().add("id", LongType).add("part", StringType)),
+        Seq(IdentityTransform(FieldReference("part"))),
+        None,
+        Map.empty[String, String],
+        None,
+        Map.empty[String, String],
+        None,
+        None,
+        Some(SerdeInfo(storedAs = Some(format), serde = Some("customSerde"), serdeProperties = Map(
+          "prop" -> "value"
+        ))))
+      Seq(createSql, replaceSql).foreach { sql =>
+        testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
+      }
+    }
+
+    val createSql =
+      s"""CREATE TABLE my_tab (id bigint)
+         |PARTITIONED BY (part string)
+         |STORED AS otherFormat
+         |ROW FORMAT SERDE 'customSerde'
+         |WITH SERDEPROPERTIES ('prop'='value')
+         """.stripMargin
+    val replaceSql = createSql.replaceFirst("CREATE", "REPLACE")
+    Seq(createSql, replaceSql).foreach { sql =>
+      assertUnsupported(sql, Seq("ROW FORMAT SERDE is incompatible with format 'otherFormat'"))
+    }
+  }
+
+  test("create/replace table - stored as format with delimited clauses") {
+    val createSql =
+      s"""CREATE TABLE my_tab (id bigint)
+         |PARTITIONED BY (part string)
+         |STORED AS textfile
+         |ROW FORMAT DELIMITED
+         |FIELDS TERMINATED BY ',' ESCAPED BY '\\\\' -- double escape for Scala and for SQL
+         |COLLECTION ITEMS TERMINATED BY '#'
+         |MAP KEYS TERMINATED BY '='
+         |LINES TERMINATED BY '\\n'
+      """.stripMargin
+    val replaceSql = createSql.replaceFirst("CREATE", "REPLACE")
+    val expectedTableSpec = TableSpec(
+      Seq("my_tab"),
+      Some(new StructType().add("id", LongType).add("part", StringType)),
+      Seq(IdentityTransform(FieldReference("part"))),
+      None,
+      Map.empty[String, String],
+      None,
+      Map.empty[String, String],
+      None,
+      None,
+      Some(SerdeInfo(storedAs = Some("textfile"), serdeProperties = Map(
+        "field.delim" -> ",", "serialization.format" -> ",", "escape.delim" -> "\\",
+        "colelction.delim" -> "#", "mapkey.delim" -> "=", "line.delim" -> "\n"
+      ))))
+    Seq(createSql, replaceSql).foreach { sql =>
+      testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
+    }
+
+    val createFailSql =
+      s"""CREATE TABLE my_tab (id bigint)
+         |PARTITIONED BY (part string)
+         |STORED AS otherFormat
+         |ROW FORMAT DELIMITED
+         |FIELDS TERMINATED BY ','
+         """.stripMargin
+    val replaceFailSql = createFailSql.replaceFirst("CREATE", "REPLACE")
+    Seq(createFailSql, replaceFailSql).foreach { sql =>
+      assertUnsupported(sql, Seq(
+        "ROW FORMAT DELIMITED is only compatible with 'textfile', not 'otherFormat'"))
+    }
+  }
+
+  test("create/replace table - stored as inputformat/outputformat") {
+    val createSql =
+      """CREATE TABLE my_tab (id bigint)
+        |PARTITIONED BY (part string)
+        |STORED AS INPUTFORMAT 'inFormat' OUTPUTFORMAT 'outFormat'
+        """.stripMargin
+    val replaceSql = createSql.replaceFirst("CREATE", "REPLACE")
+    val expectedTableSpec = TableSpec(
+      Seq("my_tab"),
+      Some(new StructType().add("id", LongType).add("part", StringType)),
+      Seq(IdentityTransform(FieldReference("part"))),
+      None,
+      Map.empty[String, String],
+      None,
+      Map.empty[String, String],
+      None,
+      None,
+      Some(SerdeInfo(formatClasses = Some(FormatClasses("inFormat", "outFormat")))))
+    Seq(createSql, replaceSql).foreach { sql =>
+      testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
+    }
+  }
+
+  test("create/replace table - stored as inputformat/outputformat with serde") {
+    val createSql =
+      """CREATE TABLE my_tab (id bigint)
+        |PARTITIONED BY (part string)
+        |STORED AS INPUTFORMAT 'inFormat' OUTPUTFORMAT 'outFormat'
+        |ROW FORMAT SERDE 'customSerde'
+        """.stripMargin
+    val replaceSql = createSql.replaceFirst("CREATE", "REPLACE")
+    val expectedTableSpec = TableSpec(
+      Seq("my_tab"),
+      Some(new StructType().add("id", LongType).add("part", StringType)),
+      Seq(IdentityTransform(FieldReference("part"))),
+      None,
+      Map.empty[String, String],
+      None,
+      Map.empty[String, String],
+      None,
+      None,
+      Some(SerdeInfo(
+        formatClasses = Some(FormatClasses("inFormat", "outFormat")),
+        serde = Some("customSerde"))))
+    Seq(createSql, replaceSql).foreach { sql =>
+      testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
+    }
+  }
+
+  test("create/replace table - using with stored as") {
+    val createSql =
+      """CREATE TABLE my_tab (id bigint, part string)
+        |USING parquet
+        |STORED AS parquet
+        """.stripMargin
+    val replaceSql = createSql.replaceFirst("CREATE", "REPLACE")
+    Seq(createSql, replaceSql).foreach { sql =>
+      assertUnsupported(sql, Seq("CREATE TABLE ... USING ... STORED AS"))
+    }
+  }
+
+  test("create/replace table - using with row format serde") {
+    val createSql =
+      """CREATE TABLE my_tab (id bigint, part string)
+        |USING parquet
+        |ROW FORMAT SERDE 'customSerde'
+        """.stripMargin
+    val replaceSql = createSql.replaceFirst("CREATE", "REPLACE")
+    Seq(createSql, replaceSql).foreach { sql =>
+      assertUnsupported(sql, Seq("CREATE TABLE ... USING ... ROW FORMAT SERDE"))
+    }
+  }
+
+  test("create/replace table - using with row format delimited") {
+    val createSql =
+      """CREATE TABLE my_tab (id bigint, part string)
+        |USING parquet
+        |ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+        """.stripMargin
+    val replaceSql = createSql.replaceFirst("CREATE", "REPLACE")
+    Seq(createSql, replaceSql).foreach { sql =>
+      assertUnsupported(sql, Seq("CREATE TABLE ... USING ... ROW FORMAT DELIMITED"))
+    }
+  }
+
+  test("create/replace table - stored by") {
+    val createSql =
+      """CREATE TABLE my_tab (id bigint, p1 string)
+        |STORED BY 'handler'
+        """.stripMargin
+    val replaceSql = createSql.replaceFirst("CREATE", "REPLACE")
+    Seq(createSql, replaceSql).foreach { sql =>
+      assertUnsupported(sql, Seq("stored by"))
+    }
+  }
+
+  test("Unsupported skew clause - create/replace table") {
+    intercept("CREATE TABLE my_tab (id bigint) SKEWED BY (id) ON (1,2,3)",
+      "CREATE TABLE ... SKEWED BY")
+    intercept("REPLACE TABLE my_tab (id bigint) SKEWED BY (id) ON (1,2,3)",
+      "CREATE TABLE ... SKEWED BY")
+  }
+
   test("Duplicate clauses - create/replace table") {
     def createTableHeader(duplicateClause: String): String = {
-      s"CREATE TABLE my_tab(a INT, b STRING) USING parquet $duplicateClause $duplicateClause"
+      s"CREATE TABLE my_tab(a INT, b STRING) $duplicateClause $duplicateClause"
     }
 
     def replaceTableHeader(duplicateClause: String): String = {
-      s"CREATE TABLE my_tab(a INT, b STRING) USING parquet $duplicateClause $duplicateClause"
+      s"CREATE TABLE my_tab(a INT, b STRING) $duplicateClause $duplicateClause"
     }
 
     intercept(createTableHeader("TBLPROPERTIES('test' = 'test2')"),
@@ -281,6 +579,14 @@ class DDLParserSuite extends AnalysisTest {
       "Found duplicate clauses: CLUSTERED BY")
     intercept(createTableHeader("PARTITIONED BY (b)"),
       "Found duplicate clauses: PARTITIONED BY")
+    intercept(createTableHeader("PARTITIONED BY (c int)"),
+      "Found duplicate clauses: PARTITIONED BY")
+    intercept(createTableHeader("STORED AS parquet"),
+      "Found duplicate clauses: STORED AS")
+    intercept(createTableHeader("STORED AS INPUTFORMAT 'in' OUTPUTFORMAT 'out'"),
+      "Found duplicate clauses: STORED AS")
+    intercept(createTableHeader("ROW FORMAT SERDE 'serde'"),
+      "Found duplicate clauses: ROW FORMAT")
 
     intercept(replaceTableHeader("TBLPROPERTIES('test' = 'test2')"),
       "Found duplicate clauses: TBLPROPERTIES")
@@ -292,6 +598,14 @@ class DDLParserSuite extends AnalysisTest {
       "Found duplicate clauses: CLUSTERED BY")
     intercept(replaceTableHeader("PARTITIONED BY (b)"),
       "Found duplicate clauses: PARTITIONED BY")
+    intercept(replaceTableHeader("PARTITIONED BY (c int)"),
+      "Found duplicate clauses: PARTITIONED BY")
+    intercept(replaceTableHeader("STORED AS parquet"),
+      "Found duplicate clauses: STORED AS")
+    intercept(replaceTableHeader("STORED AS INPUTFORMAT 'in' OUTPUTFORMAT 'out'"),
+      "Found duplicate clauses: STORED AS")
+    intercept(replaceTableHeader("ROW FORMAT SERDE 'serde'"),
+      "Found duplicate clauses: ROW FORMAT")
   }
 
   test("support for other types in OPTIONS") {
@@ -317,6 +631,7 @@ class DDLParserSuite extends AnalysisTest {
           Some("json"),
           Map("a" -> "1", "b" -> "0.1", "c" -> "true"),
           None,
+          None,
           None),
         expectedIfNotExists = false)
     }
@@ -372,7 +687,8 @@ class DDLParserSuite extends AnalysisTest {
         Some("parquet"),
         Map.empty[String, String],
         Some("/user/external/page_view"),
-        Some("This is the staging page view table"))
+        Some("This is the staging page view table"),
+        None)
     Seq(s1, s2, s3, s4).foreach { sql =>
       testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = true)
     }
@@ -2105,7 +2421,9 @@ class DDLParserSuite extends AnalysisTest {
       provider: Option[String],
       options: Map[String, String],
       location: Option[String],
-      comment: Option[String])
+      comment: Option[String],
+      serdeInfo: Option[SerdeInfo],
+      external: Boolean = false)
 
   private object TableSpec {
     def apply(plan: LogicalPlan): TableSpec = {
@@ -2120,7 +2438,9 @@ class DDLParserSuite extends AnalysisTest {
             create.provider,
             create.options,
             create.location,
-            create.comment)
+            create.comment,
+            create.serde,
+            create.external)
         case replace: ReplaceTableStatement =>
           TableSpec(
             replace.tableName,
@@ -2131,7 +2451,8 @@ class DDLParserSuite extends AnalysisTest {
             replace.provider,
             replace.options,
             replace.location,
-            replace.comment)
+            replace.comment,
+            replace.serde)
         case ctas: CreateTableAsSelectStatement =>
           TableSpec(
             ctas.tableName,
@@ -2142,7 +2463,9 @@ class DDLParserSuite extends AnalysisTest {
             ctas.provider,
             ctas.options,
             ctas.location,
-            ctas.comment)
+            ctas.comment,
+            ctas.serde,
+            ctas.external)
         case rtas: ReplaceTableAsSelectStatement =>
           TableSpec(
             rtas.tableName,
@@ -2153,7 +2476,8 @@ class DDLParserSuite extends AnalysisTest {
             rtas.provider,
             rtas.options,
             rtas.location,
-            rtas.comment)
+            rtas.comment,
+            rtas.serde)
         case other =>
           fail(s"Expected to parse Create, CTAS, Replace, or RTAS plan" +
             s" from query, got ${other.getClass.getName}.")
@@ -2179,8 +2503,7 @@ class DDLParserSuite extends AnalysisTest {
       CommentOnTable(UnresolvedTable(Seq("a", "b", "c"), "COMMENT ON TABLE"), "xYz"))
   }
 
-  // TODO: ignored by SPARK-31707, restore the test after create table syntax unification
-  ignore("create table - without using") {
+  test("create table - without using") {
     val sql = "CREATE TABLE 1m.2g(a INT)"
     val expectedTableSpec = TableSpec(
       Seq("1m", "2g"),
@@ -2191,6 +2514,7 @@ class DDLParserSuite extends AnalysisTest {
       None,
       Map.empty[String, String],
       None,
+      None,
       None)
 
     testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 31b4c158aa67b..a8688bdf15495 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -658,6 +658,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
           extraOptions.get("path"),
           extraOptions.get(TableCatalog.PROP_COMMENT),
           extraOptions.toMap,
+          None,
           orCreate = true)      // Create the table if it doesn't exist
 
       case (other, _) =>
@@ -675,7 +676,9 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
           extraOptions.get("path"),
           extraOptions.get(TableCatalog.PROP_COMMENT),
           extraOptions.toMap,
-          ifNotExists = other == SaveMode.Ignore)
+          None,
+          ifNotExists = other == SaveMode.Ignore,
+          external = false)
     }
 
     runCommand(df.sparkSession, "saveAsTable") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
index d55b5c3103537..9a49fc3d74780 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
@@ -119,7 +119,9 @@ final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T])
         None,
         None,
         options.toMap,
-        ifNotExists = false)
+        None,
+        ifNotExists = false,
+        external = false)
     }
   }
 
@@ -207,6 +209,7 @@ final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T])
         None,
         None,
         options.toMap,
+        None,
         orCreate = orCreate)
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 395f5efd5a52d..f49caf7f04a20 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource}
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
+import org.apache.spark.sql.internal.HiveSerDe
 import org.apache.spark.sql.types.{HIVE_TYPE_STRING, HiveStringType, MetadataBuilder, StructField, StructType}
 
 /**
@@ -265,16 +266,17 @@ class ResolveSessionCatalog(
     // For CREATE TABLE [AS SELECT], we should use the v1 command if the catalog is resolved to the
     // session catalog and the table provider is not v2.
     case c @ CreateTableStatement(
-         SessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _) =>
+         SessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _, _) =>
       assertNoNullTypeInSchema(c.tableSchema)
-      val provider = c.provider.getOrElse(conf.defaultDataSourceName)
+      val (storageFormat, provider) = getStorageFormatAndProvider(
+        c.provider, c.options, c.location, c.serde, ctas = false)
       if (!isV2Provider(provider)) {
         if (!DDLUtils.isHiveTable(Some(provider))) {
           assertNoCharTypeInSchema(c.tableSchema)
         }
         val tableDesc = buildCatalogTable(tbl.asTableIdentifier, c.tableSchema,
-          c.partitioning, c.bucketSpec, c.properties, provider, c.options, c.location,
-          c.comment, c.ifNotExists)
+          c.partitioning, c.bucketSpec, c.properties, provider, c.location,
+          c.comment, storageFormat, c.external)
         val mode = if (c.ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists
         CreateTable(tableDesc, mode, None)
       } else {
@@ -285,30 +287,32 @@ class ResolveSessionCatalog(
           c.tableSchema,
           // convert the bucket spec and add it as a transform
           c.partitioning ++ c.bucketSpec.map(_.asTransform),
-          convertTableProperties(c.properties, c.options, c.location, c.comment, Some(provider)),
+          convertTableProperties(c),
           ignoreIfExists = c.ifNotExists)
       }
 
     case c @ CreateTableAsSelectStatement(
-         SessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _) =>
+         SessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _, _, _) =>
       if (c.asSelect.resolved) {
         assertNoNullTypeInSchema(c.asSelect.schema)
       }
-      val provider = c.provider.getOrElse(conf.defaultDataSourceName)
+      val (storageFormat, provider) = getStorageFormatAndProvider(
+        c.provider, c.options, c.location, c.serde, ctas = true)
       if (!isV2Provider(provider)) {
         val tableDesc = buildCatalogTable(tbl.asTableIdentifier, new StructType,
-          c.partitioning, c.bucketSpec, c.properties, provider, c.options, c.location,
-          c.comment, c.ifNotExists)
+          c.partitioning, c.bucketSpec, c.properties, provider, c.location,
+          c.comment, storageFormat, c.external)
         val mode = if (c.ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists
         CreateTable(tableDesc, mode, Some(c.asSelect))
       } else {
+        assertNoCharTypeInSchema(c.schema)
         CreateTableAsSelect(
           catalog.asTableCatalog,
           tbl.asIdentifier,
           // convert the bucket spec and add it as a transform
           c.partitioning ++ c.bucketSpec.map(_.asTransform),
           c.asSelect,
-          convertTableProperties(c.properties, c.options, c.location, c.comment, Some(provider)),
+          convertTableProperties(c),
           writeOptions = c.writeOptions,
           ignoreIfExists = c.ifNotExists)
       }
@@ -322,7 +326,7 @@ class ResolveSessionCatalog(
     // For REPLACE TABLE [AS SELECT], we should fail if the catalog is resolved to the
     // session catalog and the table provider is not v2.
     case c @ ReplaceTableStatement(
-         SessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _) =>
+         SessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _) =>
       assertNoNullTypeInSchema(c.tableSchema)
       val provider = c.provider.getOrElse(conf.defaultDataSourceName)
       if (!isV2Provider(provider)) {
@@ -335,12 +339,12 @@ class ResolveSessionCatalog(
           c.tableSchema,
           // convert the bucket spec and add it as a transform
           c.partitioning ++ c.bucketSpec.map(_.asTransform),
-          convertTableProperties(c.properties, c.options, c.location, c.comment, Some(provider)),
+          convertTableProperties(c),
           orCreate = c.orCreate)
       }
 
     case c @ ReplaceTableAsSelectStatement(
-         SessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _) =>
+         SessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _, _) =>
       if (c.asSelect.resolved) {
         assertNoNullTypeInSchema(c.asSelect.schema)
       }
@@ -354,7 +358,7 @@ class ResolveSessionCatalog(
           // convert the bucket spec and add it as a transform
           c.partitioning ++ c.bucketSpec.map(_.asTransform),
           c.asSelect,
-          convertTableProperties(c.properties, c.options, c.location, c.comment, Some(provider)),
+          convertTableProperties(c),
           writeOptions = c.writeOptions,
           orCreate = c.orCreate)
       }
@@ -621,6 +625,64 @@ class ResolveSessionCatalog(
     case _ => throw new AnalysisException(s"$sql is only supported with temp views or v1 tables.")
   }
 
+  private def getStorageFormatAndProvider(
+      provider: Option[String],
+      options: Map[String, String],
+      location: Option[String],
+      maybeSerdeInfo: Option[SerdeInfo],
+      ctas: Boolean): (CatalogStorageFormat, String) = {
+    val nonHiveStorageFormat = CatalogStorageFormat.empty.copy(
+      locationUri = location.map(CatalogUtils.stringToURI),
+      properties = options)
+    val defaultHiveStorage = HiveSerDe.getDefaultStorage(conf).copy(
+      locationUri = location.map(CatalogUtils.stringToURI),
+      properties = options)
+
+    if (provider.isDefined) {
+      // The parser guarantees that USING and STORED AS/ROW FORMAT won't co-exist.
+      if (maybeSerdeInfo.isDefined) {
+        throw new AnalysisException(
+          s"Cannot create table with both USING $provider and ${maybeSerdeInfo.get.describe}")
+      }
+      (nonHiveStorageFormat, provider.get)
+    } else if (maybeSerdeInfo.isDefined) {
+      val serdeInfo = maybeSerdeInfo.get
+      SerdeInfo.checkSerdePropMerging(serdeInfo.serdeProperties, defaultHiveStorage.properties)
+      val storageFormat = if (serdeInfo.storedAs.isDefined) {
+        // If `STORED AS fileFormat` is used, infer inputFormat, outputFormat and serde from it.
+        HiveSerDe.sourceToSerDe(serdeInfo.storedAs.get) match {
+          case Some(hiveSerde) =>
+            defaultHiveStorage.copy(
+              inputFormat = hiveSerde.inputFormat.orElse(defaultHiveStorage.inputFormat),
+              outputFormat = hiveSerde.outputFormat.orElse(defaultHiveStorage.outputFormat),
+              // User specified serde takes precedence over the one inferred from file format.
+              serde = serdeInfo.serde.orElse(hiveSerde.serde).orElse(defaultHiveStorage.serde),
+              properties = serdeInfo.serdeProperties ++ defaultHiveStorage.properties)
+          case _ => throw new AnalysisException(
+            s"STORED AS with file format '${serdeInfo.storedAs.get}' is invalid.")
+        }
+      } else {
+        defaultHiveStorage.copy(
+          inputFormat =
+            serdeInfo.formatClasses.map(_.input).orElse(defaultHiveStorage.inputFormat),
+          outputFormat =
+            serdeInfo.formatClasses.map(_.output).orElse(defaultHiveStorage.outputFormat),
+          serde = serdeInfo.serde.orElse(defaultHiveStorage.serde),
+          properties = serdeInfo.serdeProperties ++ defaultHiveStorage.properties)
+      }
+      (storageFormat, DDLUtils.HIVE_PROVIDER)
+    } else {
+      // If neither USING nor STORED AS/ROW FORMAT is specified, we create native data source
+      // tables if it's a CTAS and `conf.convertCTAS` is true.
+      // TODO: create native data source table by default for non-CTAS.
+      if (ctas && conf.convertCTAS) {
+        (nonHiveStorageFormat, conf.defaultDataSourceName)
+      } else {
+        (defaultHiveStorage, DDLUtils.HIVE_PROVIDER)
+      }
+    }
+  }
+
   private def buildCatalogTable(
       table: TableIdentifier,
       schema: StructType,
@@ -628,13 +690,19 @@ class ResolveSessionCatalog(
       bucketSpec: Option[BucketSpec],
       properties: Map[String, String],
       provider: String,
-      options: Map[String, String],
       location: Option[String],
       comment: Option[String],
-      ifNotExists: Boolean): CatalogTable = {
-    val storage = CatalogStorageFormat.empty.copy(
-      locationUri = location.map(CatalogUtils.stringToURI),
-      properties = options)
+      storageFormat: CatalogStorageFormat,
+      external: Boolean): CatalogTable = {
+    if (external) {
+      if (DDLUtils.isHiveTable(Some(provider))) {
+        if (location.isEmpty) {
+          throw new AnalysisException(s"CREATE EXTERNAL TABLE must be accompanied by LOCATION")
+        }
+      } else {
+        throw new AnalysisException(s"Operation not allowed: CREATE EXTERNAL TABLE ... USING")
+      }
+    }
 
     val tableType = if (location.isDefined) {
       CatalogTableType.EXTERNAL
@@ -645,7 +713,7 @@ class ResolveSessionCatalog(
     CatalogTable(
       identifier = table,
       tableType = tableType,
-      storage = storage,
+      storage = storageFormat,
       schema = schema,
       provider = Some(provider),
       partitionColumnNames = partitioning.asPartitionColumns,
@@ -717,6 +785,9 @@ class ResolveSessionCatalog(
   }
 
   private def isV2Provider(provider: String): Boolean = {
+    // Return earlier since `lookupDataSourceV2` may fail to resolve provider "hive" to
+    // `HiveFileFormat`, when running tests in sql/core.
+    if (DDLUtils.isHiveTable(Some(provider))) return false
     DataSource.lookupDataSourceV2(provider, conf) match {
       // TODO(SPARK-28396): Currently file source v2 can't work with tables.
       case Some(_: FileDataSourceV2) => false
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 01522257c072d..a92f0775f1c05 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -26,7 +26,6 @@ import scala.collection.JavaConverters._
 import org.antlr.v4.runtime.{ParserRuleContext, Token}
 import org.antlr.v4.runtime.tree.TerminalNode
 
-import org.apache.spark.sql.SaveMode
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions.Expression
@@ -37,7 +36,6 @@ import org.apache.spark.sql.catalyst.util.DateTimeConstants
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf, VariableSubstitution}
-import org.apache.spark.sql.types.StructType
 
 /**
  * Concrete parser for Spark SQL statements.
@@ -279,7 +277,7 @@ class SparkSqlAstBuilder extends AstBuilder {
         operationNotAllowed("CREATE TEMPORARY TABLE IF NOT EXISTS", ctx)
       }
 
-      val (_, _, _, options, location, _) = visitCreateTableClauses(ctx.createTableClauses())
+      val (_, _, _, _, options, location, _, _) = visitCreateTableClauses(ctx.createTableClauses())
       val provider = Option(ctx.tableProvider).map(_.multipartIdentifier.getText).getOrElse(
         throw new ParseException("CREATE TEMPORARY TABLE without a provider is not allowed.", ctx))
       val schema = Option(ctx.colTypeList()).map(createSchema)
@@ -382,153 +380,34 @@ class SparkSqlAstBuilder extends AstBuilder {
     }
   }
 
-  /**
-   * Create a Hive serde table, returning a [[CreateTable]] logical plan.
-   *
-   * This is a legacy syntax for Hive compatibility, we recommend users to use the Spark SQL
-   * CREATE TABLE syntax to create Hive serde table, e.g. "CREATE TABLE ... USING hive ..."
-   *
-   * Note: several features are currently not supported - temporary tables, bucketing,
-   * skewed columns and storage handlers (STORED BY).
-   *
-   * Expected format:
-   * {{{
-   *   CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
-   *   [(col1[:] data_type [COMMENT col_comment], ...)]
-   *   create_table_clauses
-   *   [AS select_statement];
-   *
-   *   create_table_clauses (order insensitive):
-   *     [COMMENT table_comment]
-   *     [PARTITIONED BY (col2[:] data_type [COMMENT col_comment], ...)]
-   *     [ROW FORMAT row_format]
-   *     [STORED AS file_format]
-   *     [LOCATION path]
-   *     [TBLPROPERTIES (property_name=property_value, ...)]
-   * }}}
-   */
-  override def visitCreateHiveTable(ctx: CreateHiveTableContext): LogicalPlan = withOrigin(ctx) {
-    val (ident, temp, ifNotExists, external) = visitCreateTableHeader(ctx.createTableHeader)
-    // TODO: implement temporary tables
-    if (temp) {
-      throw new ParseException(
-        "CREATE TEMPORARY TABLE is not supported yet. " +
-          "Please use CREATE TEMPORARY VIEW as an alternative.", ctx)
-    }
-    if (ctx.skewSpec.size > 0) {
-      operationNotAllowed("CREATE TABLE ... SKEWED BY", ctx)
-    }
-
-    checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
-    checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED BY", ctx)
-    checkDuplicateClauses(ctx.commentSpec(), "COMMENT", ctx)
-    checkDuplicateClauses(ctx.bucketSpec(), "CLUSTERED BY", ctx)
-    checkDuplicateClauses(ctx.createFileFormat, "STORED AS/BY", ctx)
-    checkDuplicateClauses(ctx.rowFormat, "ROW FORMAT", ctx)
-    checkDuplicateClauses(ctx.locationSpec, "LOCATION", ctx)
-
-    val dataCols = Option(ctx.columns).map(visitColTypeList).getOrElse(Nil)
-    val partitionCols = Option(ctx.partitionColumns).map(visitColTypeList).getOrElse(Nil)
-    val properties = Option(ctx.tableProps).map(visitPropertyKeyValues).getOrElse(Map.empty)
-    val selectQuery = Option(ctx.query).map(plan)
-    val bucketSpec = ctx.bucketSpec().asScala.headOption.map(visitBucketSpec)
-
-    // Note: Hive requires partition columns to be distinct from the schema, so we need
-    // to include the partition columns here explicitly
-    val schema = StructType(dataCols ++ partitionCols)
-
-    // Storage format
-    val defaultStorage = HiveSerDe.getDefaultStorage(conf)
-    validateRowFormatFileFormat(
-      ctx.rowFormat.asScala.toSeq, ctx.createFileFormat.asScala.toSeq, ctx)
-    val fileStorage = ctx.createFileFormat.asScala.headOption.map(visitCreateFileFormat)
-      .getOrElse(CatalogStorageFormat.empty)
-    val rowStorage = ctx.rowFormat.asScala.headOption.map(visitRowFormat)
-      .getOrElse(CatalogStorageFormat.empty)
-    val location = visitLocationSpecList(ctx.locationSpec())
-    // If we are creating an EXTERNAL table, then the LOCATION field is required
-    if (external && location.isEmpty) {
-      operationNotAllowed("CREATE EXTERNAL TABLE must be accompanied by LOCATION", ctx)
-    }
-
-    val locUri = location.map(CatalogUtils.stringToURI(_))
-    val storage = CatalogStorageFormat(
-      locationUri = locUri,
-      inputFormat = fileStorage.inputFormat.orElse(defaultStorage.inputFormat),
-      outputFormat = fileStorage.outputFormat.orElse(defaultStorage.outputFormat),
-      serde = rowStorage.serde.orElse(fileStorage.serde).orElse(defaultStorage.serde),
-      compressed = false,
-      properties = rowStorage.properties ++ fileStorage.properties)
-    // If location is defined, we'll assume this is an external table.
-    // Otherwise, we may accidentally delete existing data.
-    val tableType = if (external || location.isDefined) {
-      CatalogTableType.EXTERNAL
+  private def toStorageFormat(
+      location: Option[String],
+      maybeSerdeInfo: Option[SerdeInfo],
+      ctx: ParserRuleContext): CatalogStorageFormat = {
+    if (maybeSerdeInfo.isEmpty) {
+      CatalogStorageFormat.empty.copy(locationUri = location.map(CatalogUtils.stringToURI))
     } else {
-      CatalogTableType.MANAGED
-    }
-
-    val name = tableIdentifier(ident, "CREATE TABLE ... STORED AS ...", ctx)
-
-    // TODO support the sql text - have a proper location for this!
-    val tableDesc = CatalogTable(
-      identifier = name,
-      tableType = tableType,
-      storage = storage,
-      schema = schema,
-      bucketSpec = bucketSpec,
-      provider = Some(DDLUtils.HIVE_PROVIDER),
-      partitionColumnNames = partitionCols.map(_.name),
-      properties = properties,
-      comment = visitCommentSpecList(ctx.commentSpec()))
-
-    val mode = if (ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists
-
-    selectQuery match {
-      case Some(q) =>
-        // Don't allow explicit specification of schema for CTAS.
-        if (dataCols.nonEmpty) {
-          operationNotAllowed(
-            "Schema may not be specified in a Create Table As Select (CTAS) statement",
-            ctx)
-        }
-
-        // When creating partitioned table with CTAS statement, we can't specify data type for the
-        // partition columns.
-        if (partitionCols.nonEmpty) {
-          val errorMessage = "Create Partitioned Table As Select cannot specify data type for " +
-            "the partition columns of the target table."
-          operationNotAllowed(errorMessage, ctx)
-        }
-
-        // Hive CTAS supports dynamic partition by specifying partition column names.
-        val partitionColumnNames =
-          Option(ctx.partitionColumnNames)
-            .map(visitIdentifierList(_).toArray)
-            .getOrElse(Array.empty[String])
-
-        val tableDescWithPartitionColNames =
-          tableDesc.copy(partitionColumnNames = partitionColumnNames)
-
-        val hasStorageProperties = (ctx.createFileFormat.size != 0) || (ctx.rowFormat.size != 0)
-        if (conf.convertCTAS && !hasStorageProperties) {
-          // At here, both rowStorage.serdeProperties and fileStorage.serdeProperties
-          // are empty Maps.
-          val newTableDesc = tableDescWithPartitionColNames.copy(
-            storage = CatalogStorageFormat.empty.copy(locationUri = locUri),
-            provider = Some(conf.defaultDataSourceName))
-          CreateTable(newTableDesc, mode, Some(q))
-        } else {
-          CreateTable(tableDescWithPartitionColNames, mode, Some(q))
-        }
-      case None =>
-        // When creating partitioned table, we must specify data type for the partition columns.
-        if (Option(ctx.partitionColumnNames).isDefined) {
-          val errorMessage = "Must specify a data type for each partition column while creating " +
-            "Hive partitioned table."
-          operationNotAllowed(errorMessage, ctx)
+      val serdeInfo = maybeSerdeInfo.get
+      if (serdeInfo.storedAs.isEmpty) {
+        CatalogStorageFormat.empty.copy(
+          locationUri = location.map(CatalogUtils.stringToURI),
+          inputFormat = serdeInfo.formatClasses.map(_.input),
+          outputFormat = serdeInfo.formatClasses.map(_.output),
+          serde = serdeInfo.serde,
+          properties = serdeInfo.serdeProperties)
+      } else {
+        HiveSerDe.sourceToSerDe(serdeInfo.storedAs.get) match {
+          case Some(hiveSerde) =>
+            CatalogStorageFormat.empty.copy(
+              locationUri = location.map(CatalogUtils.stringToURI),
+              inputFormat = hiveSerde.inputFormat,
+              outputFormat = hiveSerde.outputFormat,
+              serde = serdeInfo.serde.orElse(hiveSerde.serde),
+              properties = serdeInfo.serdeProperties)
+          case _ =>
+            operationNotAllowed(s"STORED AS with file format '${serdeInfo.storedAs.get}'", ctx)
         }
-
-        CreateTable(tableDesc, mode, None)
+      }
     }
   }
 
@@ -559,189 +438,27 @@ class SparkSqlAstBuilder extends AstBuilder {
     checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
     val provider = ctx.tableProvider.asScala.headOption.map(_.multipartIdentifier.getText)
     val location = visitLocationSpecList(ctx.locationSpec())
-    // rowStorage used to determine CatalogStorageFormat.serde and
-    // CatalogStorageFormat.properties in STORED AS clause.
-    val rowStorage = ctx.rowFormat.asScala.headOption.map(visitRowFormat)
-      .getOrElse(CatalogStorageFormat.empty)
-    val fileFormat = ctx.createFileFormat.asScala.headOption.map(visitCreateFileFormat) match {
-      case Some(f) =>
-        if (provider.isDefined) {
-          throw new ParseException("'STORED AS hiveFormats' and 'USING provider' " +
-            "should not be specified both", ctx)
-        }
-        f.copy(
-          locationUri = location.map(CatalogUtils.stringToURI),
-          serde = rowStorage.serde.orElse(f.serde),
-          properties = rowStorage.properties ++ f.properties)
-      case None =>
-        if (rowStorage.serde.isDefined) {
-          throw new ParseException("'ROW FORMAT' must be used with 'STORED AS'", ctx)
-        }
-        CatalogStorageFormat.empty.copy(locationUri = location.map(CatalogUtils.stringToURI))
+    // TODO: Do not skip serde check for CREATE TABLE LIKE.
+    val serdeInfo = getSerdeInfo(
+      ctx.rowFormat.asScala, ctx.createFileFormat.asScala, ctx, skipCheck = true)
+    if (provider.isDefined && serdeInfo.isDefined) {
+      operationNotAllowed(s"CREATE TABLE LIKE ... USING ... ${serdeInfo.get.describe}", ctx)
     }
-    val properties = Option(ctx.tableProps).map(visitPropertyKeyValues).getOrElse(Map.empty)
-    CreateTableLikeCommand(
-      targetTable, sourceTable, fileFormat, provider, properties, ctx.EXISTS != null)
-  }
 
-  /**
-   * Create a [[CatalogStorageFormat]] for creating tables.
-   *
-   * Format: STORED AS ...
-   */
-  override def visitCreateFileFormat(
-      ctx: CreateFileFormatContext): CatalogStorageFormat = withOrigin(ctx) {
-    (ctx.fileFormat, ctx.storageHandler) match {
-      // Expected format: INPUTFORMAT input_format OUTPUTFORMAT output_format
-      case (c: TableFileFormatContext, null) =>
-        visitTableFileFormat(c)
-      // Expected format: SEQUENCEFILE | TEXTFILE | RCFILE | ORC | PARQUET | AVRO
-      case (c: GenericFileFormatContext, null) =>
-        visitGenericFileFormat(c)
-      case (null, storageHandler) =>
-        operationNotAllowed("STORED BY", ctx)
-      case _ =>
-        throw new ParseException("Expected either STORED AS or STORED BY, not both", ctx)
-    }
-  }
-
-  /**
-   * Create a [[CatalogStorageFormat]].
-   */
-  override def visitTableFileFormat(
-      ctx: TableFileFormatContext): CatalogStorageFormat = withOrigin(ctx) {
-    CatalogStorageFormat.empty.copy(
-      inputFormat = Option(string(ctx.inFmt)),
-      outputFormat = Option(string(ctx.outFmt)))
-  }
-
-  /**
-   * Resolve a [[HiveSerDe]] based on the name given and return it as a [[CatalogStorageFormat]].
-   */
-  override def visitGenericFileFormat(
-      ctx: GenericFileFormatContext): CatalogStorageFormat = withOrigin(ctx) {
-    val source = ctx.identifier.getText
-    HiveSerDe.sourceToSerDe(source) match {
-      case Some(s) =>
-        CatalogStorageFormat.empty.copy(
-          inputFormat = s.inputFormat,
-          outputFormat = s.outputFormat,
-          serde = s.serde)
-      case None =>
-        operationNotAllowed(s"STORED AS with file format '$source'", ctx)
-    }
-  }
-
-  /**
-   * Create a [[CatalogStorageFormat]] used for creating tables.
-   *
-   * Example format:
-   * {{{
-   *   SERDE serde_name [WITH SERDEPROPERTIES (k1=v1, k2=v2, ...)]
-   * }}}
-   *
-   * OR
-   *
-   * {{{
-   *   DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]]
-   *   [COLLECTION ITEMS TERMINATED BY char]
-   *   [MAP KEYS TERMINATED BY char]
-   *   [LINES TERMINATED BY char]
-   *   [NULL DEFINED AS char]
-   * }}}
-   */
-  private def visitRowFormat(ctx: RowFormatContext): CatalogStorageFormat = withOrigin(ctx) {
-    ctx match {
-      case serde: RowFormatSerdeContext => visitRowFormatSerde(serde)
-      case delimited: RowFormatDelimitedContext => visitRowFormatDelimited(delimited)
-    }
-  }
-
-  /**
-   * Create SERDE row format name and properties pair.
-   */
-  override def visitRowFormatSerde(
-      ctx: RowFormatSerdeContext): CatalogStorageFormat = withOrigin(ctx) {
-    import ctx._
-    CatalogStorageFormat.empty.copy(
-      serde = Option(string(name)),
-      properties = Option(tablePropertyList).map(visitPropertyKeyValues).getOrElse(Map.empty))
-  }
-
-  /**
-   * Create a delimited row format properties object.
-   */
-  override def visitRowFormatDelimited(
-      ctx: RowFormatDelimitedContext): CatalogStorageFormat = withOrigin(ctx) {
-    // TODO we need proper support for the NULL format.
-    val entries =
-      entry("field.delim", ctx.fieldsTerminatedBy) ++
-        entry("serialization.format", ctx.fieldsTerminatedBy) ++
-        entry("escape.delim", ctx.escapedBy) ++
-        // The following typo is inherited from Hive...
-        entry("colelction.delim", ctx.collectionItemsTerminatedBy) ++
-        entry("mapkey.delim", ctx.keysTerminatedBy) ++
-        Option(ctx.linesSeparatedBy).toSeq.map { token =>
-          val value = string(token)
-          validate(
-            value == "\n",
-            s"LINES TERMINATED BY only supports newline '\\n' right now: $value",
-            ctx)
-          "line.delim" -> value
-        }
-    CatalogStorageFormat.empty.copy(properties = entries.toMap)
-  }
-
-  /**
-   * Throw a [[ParseException]] if the user specified incompatible SerDes through ROW FORMAT
-   * and STORED AS.
-   *
-   * The following are allowed. Anything else is not:
-   *   ROW FORMAT SERDE ... STORED AS [SEQUENCEFILE | RCFILE | TEXTFILE]
-   *   ROW FORMAT DELIMITED ... STORED AS TEXTFILE
-   *   ROW FORMAT ... STORED AS INPUTFORMAT ... OUTPUTFORMAT ...
-   */
-  private def validateRowFormatFileFormat(
-      rowFormatCtx: RowFormatContext,
-      createFileFormatCtx: CreateFileFormatContext,
-      parentCtx: ParserRuleContext): Unit = {
-    if (rowFormatCtx == null || createFileFormatCtx == null) {
-      return
-    }
-    (rowFormatCtx, createFileFormatCtx.fileFormat) match {
-      case (_, ffTable: TableFileFormatContext) => // OK
-      case (rfSerde: RowFormatSerdeContext, ffGeneric: GenericFileFormatContext) =>
-        ffGeneric.identifier.getText.toLowerCase(Locale.ROOT) match {
-          case ("sequencefile" | "textfile" | "rcfile") => // OK
-          case fmt =>
-            operationNotAllowed(
-              s"ROW FORMAT SERDE is incompatible with format '$fmt', which also specifies a serde",
-              parentCtx)
-        }
-      case (rfDelimited: RowFormatDelimitedContext, ffGeneric: GenericFileFormatContext) =>
-        ffGeneric.identifier.getText.toLowerCase(Locale.ROOT) match {
-          case "textfile" => // OK
-          case fmt => operationNotAllowed(
-            s"ROW FORMAT DELIMITED is only compatible with 'textfile', not '$fmt'", parentCtx)
+    // TODO: remove this restriction as it seems unnecessary.
+    serdeInfo match {
+      case Some(SerdeInfo(storedAs, formatClasses, serde, _)) =>
+        if (storedAs.isEmpty && formatClasses.isEmpty && serde.isDefined) {
+          throw new ParseException("'ROW FORMAT' must be used with 'STORED AS'", ctx)
         }
       case _ =>
-        // should never happen
-        def str(ctx: ParserRuleContext): String = {
-          (0 until ctx.getChildCount).map { i => ctx.getChild(i).getText }.mkString(" ")
-        }
-        operationNotAllowed(
-          s"Unexpected combination of ${str(rowFormatCtx)} and ${str(createFileFormatCtx)}",
-          parentCtx)
     }
-  }
 
-  private def validateRowFormatFileFormat(
-      rowFormatCtx: Seq[RowFormatContext],
-      createFileFormatCtx: Seq[CreateFileFormatContext],
-      parentCtx: ParserRuleContext): Unit = {
-    if (rowFormatCtx.size == 1 && createFileFormatCtx.size == 1) {
-      validateRowFormatFileFormat(rowFormatCtx.head, createFileFormatCtx.head, parentCtx)
-    }
+    // TODO: also look at `HiveSerDe.getDefaultStorage`.
+    val storage = toStorageFormat(location, serdeInfo, ctx)
+    val properties = Option(ctx.tableProps).map(visitPropertyKeyValues).getOrElse(Map.empty)
+    CreateTableLikeCommand(
+      targetTable, sourceTable, storage, provider, properties, ctx.EXISTS != null)
   }
 
   /**
@@ -788,7 +505,7 @@ class SparkSqlAstBuilder extends AstBuilder {
 
       case c: RowFormatSerdeContext =>
         // Use a serde format.
-        val CatalogStorageFormat(None, None, None, Some(name), _, props) = visitRowFormatSerde(c)
+        val SerdeInfo(None, None, Some(name), props) = visitRowFormatSerde(c)
 
         // SPARK-10310: Special cases LazySimpleSerDe
         val recordHandler = if (name == "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe") {
@@ -896,28 +613,21 @@ class SparkSqlAstBuilder extends AstBuilder {
    */
   override def visitInsertOverwriteHiveDir(
       ctx: InsertOverwriteHiveDirContext): InsertDirParams = withOrigin(ctx) {
-    validateRowFormatFileFormat(ctx.rowFormat, ctx.createFileFormat, ctx)
-    val rowStorage = Option(ctx.rowFormat).map(visitRowFormat)
-      .getOrElse(CatalogStorageFormat.empty)
-    val fileStorage = Option(ctx.createFileFormat).map(visitCreateFileFormat)
-      .getOrElse(CatalogStorageFormat.empty)
-
+    val serdeInfo = getSerdeInfo(
+      Option(ctx.rowFormat).toSeq, Option(ctx.createFileFormat).toSeq, ctx)
     val path = string(ctx.path)
     // The path field is required
     if (path.isEmpty) {
       operationNotAllowed("INSERT OVERWRITE DIRECTORY must be accompanied by path", ctx)
     }
 
-    val defaultStorage = HiveSerDe.getDefaultStorage(conf)
-
-    val storage = CatalogStorageFormat(
-      locationUri = Some(CatalogUtils.stringToURI(path)),
-      inputFormat = fileStorage.inputFormat.orElse(defaultStorage.inputFormat),
-      outputFormat = fileStorage.outputFormat.orElse(defaultStorage.outputFormat),
-      serde = rowStorage.serde.orElse(fileStorage.serde).orElse(defaultStorage.serde),
-      compressed = false,
-      properties = rowStorage.properties ++ fileStorage.properties)
+    val default = HiveSerDe.getDefaultStorage(conf)
+    val storage = toStorageFormat(Some(path), serdeInfo, ctx)
+    val finalStorage = storage.copy(
+      inputFormat = storage.inputFormat.orElse(default.inputFormat),
+      outputFormat = storage.outputFormat.orElse(default.outputFormat),
+      serde = storage.serde.orElse(default.serde))
 
-    (ctx.LOCAL != null, storage, Some(DDLUtils.HIVE_PROVIDER))
+    (ctx.LOCAL != null, finalStorage, Some(DDLUtils.HIVE_PROVIDER))
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
index 9ee145580ce6d..f330d6a8c99e2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
@@ -85,7 +85,7 @@ class V2SessionCatalog(catalog: SessionCatalog)
     val provider = properties.getOrDefault(TableCatalog.PROP_PROVIDER, conf.defaultDataSourceName)
     val tableProperties = properties.asScala
     val location = Option(properties.get(TableCatalog.PROP_LOCATION))
-    val storage = DataSource.buildStorageFormatFromOptions(tableProperties.toMap)
+    val storage = DataSource.buildStorageFormatFromOptions(toOptions(tableProperties.toMap))
         .copy(locationUri = location.map(CatalogUtils.stringToURI))
     val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED
 
@@ -111,6 +111,12 @@ class V2SessionCatalog(catalog: SessionCatalog)
     loadTable(ident)
   }
 
+  private def toOptions(properties: Map[String, String]): Map[String, String] = {
+    properties.filterKeys(_.startsWith(TableCatalog.OPTION_PREFIX)).map {
+      case (key, value) => key.drop(TableCatalog.OPTION_PREFIX.length) -> value
+    }
+  }
+
   override def alterTable(
       ident: Identifier,
       changes: TableChange*): Table = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 222fa8ace4dca..f2b57f9442d09 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -432,7 +432,7 @@ class DataSourceV2SQLSuite
 
     intercept[Exception] {
       spark.sql("REPLACE TABLE testcat.table_name" +
-        s" USING foo OPTIONS (`${InMemoryTable.SIMULATE_FAILED_WRITE_OPTION}`=true)" +
+        s" USING foo TBLPROPERTIES (`${InMemoryTable.SIMULATE_FAILED_WRITE_OPTION}`=true)" +
         s" AS SELECT id FROM source")
     }
 
@@ -465,7 +465,7 @@ class DataSourceV2SQLSuite
 
     intercept[Exception] {
       spark.sql("REPLACE TABLE testcat_atomic.table_name" +
-        s" USING foo OPTIONS (`${InMemoryTable.SIMULATE_FAILED_WRITE_OPTION}=true)" +
+        s" USING foo TBLPROPERTIES (`${InMemoryTable.SIMULATE_FAILED_WRITE_OPTION}=true)" +
         s" AS SELECT id FROM source")
     }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
index f55fbc9809f71..61c16baedb7cc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
@@ -20,16 +20,14 @@ package org.apache.spark.sql.execution
 import scala.collection.JavaConverters._
 
 import org.apache.spark.internal.config.ConfigEntry
-import org.apache.spark.sql.SaveMode
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAlias, UnresolvedAttribute, UnresolvedRelation, UnresolvedStar}
-import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType}
 import org.apache.spark.sql.catalyst.expressions.{Ascending, AttributeReference, Concat, SortOrder}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.execution.command._
-import org.apache.spark.sql.execution.datasources.{CreateTable, CreateTempViewUsing, RefreshResource}
-import org.apache.spark.sql.internal.{HiveSerDe, StaticSQLConf}
-import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType}
+import org.apache.spark.sql.execution.datasources.{CreateTempViewUsing, RefreshResource}
+import org.apache.spark.sql.internal.StaticSQLConf
+import org.apache.spark.sql.types.StringType
 
 /**
  * Parser test cases for rules defined in [[SparkSqlParser]].
@@ -42,23 +40,8 @@ class SparkSqlParserSuite extends AnalysisTest {
 
   private lazy val parser = new SparkSqlParser()
 
-  /**
-   * Normalizes plans:
-   * - CreateTable the createTime in tableDesc will replaced by -1L.
-   */
-  override def normalizePlan(plan: LogicalPlan): LogicalPlan = {
-    plan match {
-      case CreateTable(tableDesc, mode, query) =>
-        val newTableDesc = tableDesc.copy(createTime = -1L)
-        CreateTable(newTableDesc, mode, query)
-      case _ => plan // Don't transform
-    }
-  }
-
   private def assertEqual(sqlCommand: String, plan: LogicalPlan): Unit = {
-    val normalized1 = normalizePlan(parser.parsePlan(sqlCommand))
-    val normalized2 = normalizePlan(plan)
-    comparePlans(normalized1, normalized2)
+    comparePlans(parser.parsePlan(sqlCommand), plan)
   }
 
   private def intercept(sqlCommand: String, messages: String*): Unit =
@@ -210,110 +193,6 @@ class SparkSqlParserSuite extends AnalysisTest {
         Map("path" -> "/data/tmp/testspark1")))
   }
 
-  private def createTableUsing(
-      table: String,
-      database: Option[String] = None,
-      tableType: CatalogTableType = CatalogTableType.MANAGED,
-      storage: CatalogStorageFormat = CatalogStorageFormat.empty,
-      schema: StructType = new StructType,
-      provider: Option[String] = Some("parquet"),
-      partitionColumnNames: Seq[String] = Seq.empty,
-      bucketSpec: Option[BucketSpec] = None,
-      mode: SaveMode = SaveMode.ErrorIfExists,
-      query: Option[LogicalPlan] = None): CreateTable = {
-    CreateTable(
-      CatalogTable(
-        identifier = TableIdentifier(table, database),
-        tableType = tableType,
-        storage = storage,
-        schema = schema,
-        provider = provider,
-        partitionColumnNames = partitionColumnNames,
-        bucketSpec = bucketSpec
-      ), mode, query
-    )
-  }
-
-  private def createTable(
-      table: String,
-      database: Option[String] = None,
-      tableType: CatalogTableType = CatalogTableType.MANAGED,
-      storage: CatalogStorageFormat = CatalogStorageFormat.empty.copy(
-        inputFormat = HiveSerDe.sourceToSerDe("textfile").get.inputFormat,
-        outputFormat = HiveSerDe.sourceToSerDe("textfile").get.outputFormat,
-        serde = Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")),
-      schema: StructType = new StructType,
-      provider: Option[String] = Some("hive"),
-      partitionColumnNames: Seq[String] = Seq.empty,
-      comment: Option[String] = None,
-      mode: SaveMode = SaveMode.ErrorIfExists,
-      query: Option[LogicalPlan] = None): CreateTable = {
-    CreateTable(
-      CatalogTable(
-        identifier = TableIdentifier(table, database),
-        tableType = tableType,
-        storage = storage,
-        schema = schema,
-        provider = provider,
-        partitionColumnNames = partitionColumnNames,
-        comment = comment
-      ), mode, query
-    )
-  }
-
-  test("create table - schema") {
-    assertEqual("CREATE TABLE my_tab(a INT COMMENT 'test', b STRING) STORED AS textfile",
-      createTable(
-        table = "my_tab",
-        schema = (new StructType)
-          .add("a", IntegerType, nullable = true, "test")
-          .add("b", StringType)
-      )
-    )
-    assertEqual("CREATE TABLE my_tab(a INT COMMENT 'test', b STRING) " +
-      "PARTITIONED BY (c INT, d STRING COMMENT 'test2')",
-      createTable(
-        table = "my_tab",
-        schema = (new StructType)
-          .add("a", IntegerType, nullable = true, "test")
-          .add("b", StringType)
-          .add("c", IntegerType)
-          .add("d", StringType, nullable = true, "test2"),
-        partitionColumnNames = Seq("c", "d")
-      )
-    )
-    assertEqual("CREATE TABLE my_tab(id BIGINT, nested STRUCT<col1: STRING,col2: INT>) " +
-      "STORED AS textfile",
-      createTable(
-        table = "my_tab",
-        schema = (new StructType)
-          .add("id", LongType)
-          .add("nested", (new StructType)
-            .add("col1", StringType)
-            .add("col2", IntegerType)
-          )
-      )
-    )
-    // Partitioned by a StructType should be accepted by `SparkSqlParser` but will fail an analyze
-    // rule in `AnalyzeCreateTable`.
-    assertEqual("CREATE TABLE my_tab(a INT COMMENT 'test', b STRING) " +
-      "PARTITIONED BY (nested STRUCT<col1: STRING,col2: INT>)",
-      createTable(
-        table = "my_tab",
-        schema = (new StructType)
-          .add("a", IntegerType, nullable = true, "test")
-          .add("b", StringType)
-          .add("nested", (new StructType)
-            .add("col1", StringType)
-            .add("col2", IntegerType)
-          ),
-        partitionColumnNames = Seq("nested")
-      )
-    )
-    intercept("CREATE TABLE my_tab(a: INT COMMENT 'test', b: STRING)",
-      "no viable alternative at input")
-  }
-
   test("describe query") {
     val query = "SELECT * FROM t"
     assertEqual("DESCRIBE QUERY " + query, DescribeQueryCommand(query, parser.parsePlan(query)))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
index 8ce4bcbadc223..96f9421e1d988 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
@@ -17,14 +17,10 @@
 
 package org.apache.spark.sql.execution.command
 
-import java.net.URI
 import java.util.Locale
 
-import scala.reflect.{classTag, ClassTag}
-
-import org.apache.spark.sql.{AnalysisException, SaveMode}
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAttribute}
-import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans
 import org.apache.spark.sql.catalyst.dsl.plans.DslLogicalPlan
@@ -32,10 +28,7 @@ import org.apache.spark.sql.catalyst.expressions.JsonTuple
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.execution.SparkSqlParser
-import org.apache.spark.sql.execution.datasources.CreateTable
-import org.apache.spark.sql.internal.HiveSerDe
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.StructType
 
 class DDLParserSuite extends AnalysisTest with SharedSparkSession {
   private lazy val parser = new SparkSqlParser()
@@ -50,159 +43,17 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     }
   }
 
-  private def intercept(sqlCommand: String, messages: String*): Unit =
-    interceptParseException(parser.parsePlan)(sqlCommand, messages: _*)
-
-  private def parseAs[T: ClassTag](query: String): T = {
-    parser.parsePlan(query) match {
-      case t: T => t
-      case other =>
-        fail(s"Expected to parse ${classTag[T].runtimeClass} from query," +
-          s"got ${other.getClass.getName}: $query")
-    }
-  }
-
   private def compareTransformQuery(sql: String, expected: LogicalPlan): Unit = {
     val plan = parser.parsePlan(sql).asInstanceOf[ScriptTransformation].copy(ioschema = null)
     comparePlans(plan, expected, checkAnalysis = false)
   }
 
-  private def extractTableDesc(sql: String): (CatalogTable, Boolean) = {
-    parser.parsePlan(sql).collect {
-      case CreateTable(tableDesc, mode, _) => (tableDesc, mode == SaveMode.Ignore)
-    }.head
-  }
-
   test("alter database - property values must be set") {
     assertUnsupported(
       sql = "ALTER DATABASE my_db SET DBPROPERTIES('key_without_value', 'key_with_value'='x')",
       containsThesePhrases = Seq("key_without_value"))
   }
 
-  test("create hive table - table file format") {
-    val allSources = Seq("parquet", "parquetfile", "orc", "orcfile", "avro", "avrofile",
-      "sequencefile", "rcfile", "textfile")
-
-    allSources.foreach { s =>
-      val query = s"CREATE TABLE my_tab STORED AS $s"
-      val ct = parseAs[CreateTable](query)
-      val hiveSerde = HiveSerDe.sourceToSerDe(s)
-      assert(hiveSerde.isDefined)
-      assert(ct.tableDesc.storage.serde ==
-        hiveSerde.get.serde.orElse(Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")))
-      assert(ct.tableDesc.storage.inputFormat == hiveSerde.get.inputFormat)
-      assert(ct.tableDesc.storage.outputFormat == hiveSerde.get.outputFormat)
-    }
-  }
-
-  test("create hive table - row format and table file format") {
-    val createTableStart = "CREATE TABLE my_tab ROW FORMAT"
-    val fileFormat = s"STORED AS INPUTFORMAT 'inputfmt' OUTPUTFORMAT 'outputfmt'"
-    val query1 = s"$createTableStart SERDE 'anything' $fileFormat"
-    val query2 = s"$createTableStart DELIMITED FIELDS TERMINATED BY ' ' $fileFormat"
-
-    // No conflicting serdes here, OK
-    val parsed1 = parseAs[CreateTable](query1)
-    assert(parsed1.tableDesc.storage.serde == Some("anything"))
-    assert(parsed1.tableDesc.storage.inputFormat == Some("inputfmt"))
-    assert(parsed1.tableDesc.storage.outputFormat == Some("outputfmt"))
-
-    val parsed2 = parseAs[CreateTable](query2)
-    assert(parsed2.tableDesc.storage.serde ==
-      Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
-    assert(parsed2.tableDesc.storage.inputFormat == Some("inputfmt"))
-    assert(parsed2.tableDesc.storage.outputFormat == Some("outputfmt"))
-  }
-
-  test("create hive table - row format serde and generic file format") {
-    val allSources = Seq("parquet", "orc", "avro", "sequencefile", "rcfile", "textfile")
-    val supportedSources = Set("sequencefile", "rcfile", "textfile")
-
-    allSources.foreach { s =>
-      val query = s"CREATE TABLE my_tab ROW FORMAT SERDE 'anything' STORED AS $s"
-      if (supportedSources.contains(s)) {
-        val ct = parseAs[CreateTable](query)
-        val hiveSerde = HiveSerDe.sourceToSerDe(s)
-        assert(hiveSerde.isDefined)
-        assert(ct.tableDesc.storage.serde == Some("anything"))
-        assert(ct.tableDesc.storage.inputFormat == hiveSerde.get.inputFormat)
-        assert(ct.tableDesc.storage.outputFormat == hiveSerde.get.outputFormat)
-      } else {
-        assertUnsupported(query, Seq("row format serde", "incompatible", s))
-      }
-    }
-  }
-
-  test("create hive table - row format delimited and generic file format") {
-    val allSources = Seq("parquet", "orc", "avro", "sequencefile", "rcfile", "textfile")
-    val supportedSources = Set("textfile")
-
-    allSources.foreach { s =>
-      val query = s"CREATE TABLE my_tab ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS $s"
-      if (supportedSources.contains(s)) {
-        val ct = parseAs[CreateTable](query)
-        val hiveSerde = HiveSerDe.sourceToSerDe(s)
-        assert(hiveSerde.isDefined)
-        assert(ct.tableDesc.storage.serde ==
-          hiveSerde.get.serde.orElse(Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")))
-        assert(ct.tableDesc.storage.inputFormat == hiveSerde.get.inputFormat)
-        assert(ct.tableDesc.storage.outputFormat == hiveSerde.get.outputFormat)
-      } else {
-        assertUnsupported(query, Seq("row format delimited", "only compatible with 'textfile'", s))
-      }
-    }
-  }
-
-  test("create hive external table - location must be specified") {
-    assertUnsupported(
-      sql = "CREATE EXTERNAL TABLE my_tab STORED AS parquet",
-      containsThesePhrases = Seq("create external table", "location"))
-    val query = "CREATE EXTERNAL TABLE my_tab STORED AS parquet LOCATION '/something/anything'"
-    val ct = parseAs[CreateTable](query)
-    assert(ct.tableDesc.tableType == CatalogTableType.EXTERNAL)
-    assert(ct.tableDesc.storage.locationUri == Some(new URI("/something/anything")))
-  }
-
-  test("create hive table - property values must be set") {
-    assertUnsupported(
-      sql = "CREATE TABLE my_tab STORED AS parquet " +
-        "TBLPROPERTIES('key_without_value', 'key_with_value'='x')",
-      containsThesePhrases = Seq("key_without_value"))
-    assertUnsupported(
-      sql = "CREATE TABLE my_tab ROW FORMAT SERDE 'serde' " +
-        "WITH SERDEPROPERTIES('key_without_value', 'key_with_value'='x')",
-      containsThesePhrases = Seq("key_without_value"))
-  }
-
-  test("create hive table - location implies external") {
-    val query = "CREATE TABLE my_tab STORED AS parquet LOCATION '/something/anything'"
-    val ct = parseAs[CreateTable](query)
-    assert(ct.tableDesc.tableType == CatalogTableType.EXTERNAL)
-    assert(ct.tableDesc.storage.locationUri == Some(new URI("/something/anything")))
-  }
-
-  test("Duplicate clauses - create hive table") {
-    def createTableHeader(duplicateClause: String): String = {
-      s"CREATE TABLE my_tab(a INT, b STRING) STORED AS parquet $duplicateClause $duplicateClause"
-    }
-
-    intercept(createTableHeader("TBLPROPERTIES('test' = 'test2')"),
-      "Found duplicate clauses: TBLPROPERTIES")
-    intercept(createTableHeader("LOCATION '/tmp/file'"),
-      "Found duplicate clauses: LOCATION")
-    intercept(createTableHeader("COMMENT 'a table'"),
-      "Found duplicate clauses: COMMENT")
-    intercept(createTableHeader("CLUSTERED BY(b) INTO 256 BUCKETS"),
-      "Found duplicate clauses: CLUSTERED BY")
-    intercept(createTableHeader("PARTITIONED BY (k int)"),
-      "Found duplicate clauses: PARTITIONED BY")
-    intercept(createTableHeader("STORED AS parquet"),
-      "Found duplicate clauses: STORED AS/BY")
-    intercept(
-      createTableHeader("ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'"),
-      "Found duplicate clauses: ROW FORMAT")
-  }
-
   test("insert overwrite directory") {
     val v1 = "INSERT OVERWRITE DIRECTORY '/tmp/file' USING parquet SELECT 1 as a"
     parser.parsePlan(v1) match {
@@ -359,180 +210,6 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     assert(e.contains("Found duplicate keys 'a'"))
   }
 
-  test("Test CTAS #1") {
-    val s1 =
-      """
-        |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
-        |COMMENT 'This is the staging page view table'
-        |STORED AS RCFILE
-        |LOCATION '/user/external/page_view'
-        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
-        |AS SELECT * FROM src
-      """.stripMargin
-
-    val s2 =
-      """
-        |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
-        |STORED AS RCFILE
-        |COMMENT 'This is the staging page view table'
-        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
-        |LOCATION '/user/external/page_view'
-        |AS SELECT * FROM src
-      """.stripMargin
-
-    val s3 =
-      """
-        |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
-        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
-        |LOCATION '/user/external/page_view'
-        |STORED AS RCFILE
-        |COMMENT 'This is the staging page view table'
-        |AS SELECT * FROM src
-      """.stripMargin
-
-    checkParsing(s1)
-    checkParsing(s2)
-    checkParsing(s3)
-
-    def checkParsing(sql: String): Unit = {
-      val (desc, exists) = extractTableDesc(sql)
-      assert(exists)
-      assert(desc.identifier.database == Some("mydb"))
-      assert(desc.identifier.table == "page_view")
-      assert(desc.tableType == CatalogTableType.EXTERNAL)
-      assert(desc.storage.locationUri == Some(new URI("/user/external/page_view")))
-      assert(desc.schema.isEmpty) // will be populated later when the table is actually created
-      assert(desc.comment == Some("This is the staging page view table"))
-      // TODO will be SQLText
-      assert(desc.viewText.isEmpty)
-      assert(desc.viewCatalogAndNamespace.isEmpty)
-      assert(desc.viewQueryColumnNames.isEmpty)
-      assert(desc.partitionColumnNames.isEmpty)
-      assert(desc.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileInputFormat"))
-      assert(desc.storage.outputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileOutputFormat"))
-      assert(desc.storage.serde ==
-        Some("org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe"))
-      assert(desc.properties == Map("p1" -> "v1", "p2" -> "v2"))
-    }
-  }
-
-  test("Test CTAS #2") {
-    val s1 =
-      """
-        |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
-        |COMMENT 'This is the staging page view table'
-        |ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'
-        | STORED AS
-        | INPUTFORMAT 'parquet.hive.DeprecatedParquetInputFormat'
-        | OUTPUTFORMAT 'parquet.hive.DeprecatedParquetOutputFormat'
-        |LOCATION '/user/external/page_view'
-        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
-        |AS SELECT * FROM src
-      """.stripMargin
-
-    val s2 =
-      """
-        |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
-        |LOCATION '/user/external/page_view'
-        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
-        |ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'
-        | STORED AS
-        | INPUTFORMAT 'parquet.hive.DeprecatedParquetInputFormat'
-        | OUTPUTFORMAT 'parquet.hive.DeprecatedParquetOutputFormat'
-        |COMMENT 'This is the staging page view table'
-        |AS SELECT * FROM src
-      """.stripMargin
-
-    checkParsing(s1)
-    checkParsing(s2)
-
-    def checkParsing(sql: String): Unit = {
-      val (desc, exists) = extractTableDesc(sql)
-      assert(exists)
-      assert(desc.identifier.database == Some("mydb"))
-      assert(desc.identifier.table == "page_view")
-      assert(desc.tableType == CatalogTableType.EXTERNAL)
-      assert(desc.storage.locationUri == Some(new URI("/user/external/page_view")))
-      assert(desc.schema.isEmpty) // will be populated later when the table is actually created
-      // TODO will be SQLText
-      assert(desc.comment == Some("This is the staging page view table"))
-      assert(desc.viewText.isEmpty)
-      assert(desc.viewCatalogAndNamespace.isEmpty)
-      assert(desc.viewQueryColumnNames.isEmpty)
-      assert(desc.partitionColumnNames.isEmpty)
-      assert(desc.storage.properties == Map())
-      assert(desc.storage.inputFormat == Some("parquet.hive.DeprecatedParquetInputFormat"))
-      assert(desc.storage.outputFormat == Some("parquet.hive.DeprecatedParquetOutputFormat"))
-      assert(desc.storage.serde == Some("parquet.hive.serde.ParquetHiveSerDe"))
-      assert(desc.properties == Map("p1" -> "v1", "p2" -> "v2"))
-    }
-  }
-
-  test("Test CTAS #3") {
-    val s3 = """CREATE TABLE page_view AS SELECT * FROM src"""
-    val (desc, exists) = extractTableDesc(s3)
-    assert(exists == false)
-    assert(desc.identifier.database == None)
-    assert(desc.identifier.table == "page_view")
-    assert(desc.tableType == CatalogTableType.MANAGED)
-    assert(desc.storage.locationUri == None)
-    assert(desc.schema.isEmpty)
-    assert(desc.viewText == None) // TODO will be SQLText
-    assert(desc.viewQueryColumnNames.isEmpty)
-    assert(desc.storage.properties == Map())
-    assert(desc.storage.inputFormat == Some("org.apache.hadoop.mapred.TextInputFormat"))
-    assert(desc.storage.outputFormat ==
-      Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"))
-    assert(desc.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
-    assert(desc.properties == Map())
-  }
-
-  test("Test CTAS #4") {
-    val s4 =
-      """CREATE TABLE page_view
-        |STORED BY 'storage.handler.class.name' AS SELECT * FROM src""".stripMargin
-    intercept[AnalysisException] {
-      extractTableDesc(s4)
-    }
-  }
-
-  test("Test CTAS #5") {
-    val s5 = """CREATE TABLE ctas2
-               | ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"
-               | WITH SERDEPROPERTIES("serde_p1"="p1","serde_p2"="p2")
-               | STORED AS RCFile
-               | TBLPROPERTIES("tbl_p1"="p11", "tbl_p2"="p22")
-               | AS
-               |   SELECT key, value
-               |   FROM src
-               |   ORDER BY key, value""".stripMargin
-    val (desc, exists) = extractTableDesc(s5)
-    assert(exists == false)
-    assert(desc.identifier.database == None)
-    assert(desc.identifier.table == "ctas2")
-    assert(desc.tableType == CatalogTableType.MANAGED)
-    assert(desc.storage.locationUri == None)
-    assert(desc.schema.isEmpty)
-    assert(desc.viewText == None) // TODO will be SQLText
-    assert(desc.viewCatalogAndNamespace.isEmpty)
-    assert(desc.viewQueryColumnNames.isEmpty)
-    assert(desc.storage.properties == Map(("serde_p1" -> "p1"), ("serde_p2" -> "p2")))
-    assert(desc.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileInputFormat"))
-    assert(desc.storage.outputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileOutputFormat"))
-    assert(desc.storage.serde == Some("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"))
-    assert(desc.properties == Map(("tbl_p1" -> "p11"), ("tbl_p2" -> "p22")))
-  }
-
-  test("CTAS statement with a PARTITIONED BY clause is not allowed") {
-    assertUnsupported(s"CREATE TABLE ctas1 PARTITIONED BY (k int)" +
-      " AS SELECT key, value FROM (SELECT 1 as key, 2 as value) tmp")
-  }
-
-  test("CTAS statement with schema") {
-    assertUnsupported(s"CREATE TABLE ctas1 (age INT, name STRING) AS SELECT * FROM src")
-    assertUnsupported(s"CREATE TABLE ctas1 (age INT, name STRING) AS SELECT 1, 'hello'")
-  }
-
   test("unsupported operations") {
     intercept[ParseException] {
       parser.parsePlan(
@@ -642,205 +319,6 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
       """.stripMargin)
   }
 
-  test("create table - basic") {
-    val query = "CREATE TABLE my_table (id int, name string)"
-    val (desc, allowExisting) = extractTableDesc(query)
-    assert(!allowExisting)
-    assert(desc.identifier.database.isEmpty)
-    assert(desc.identifier.table == "my_table")
-    assert(desc.tableType == CatalogTableType.MANAGED)
-    assert(desc.schema == new StructType().add("id", "int").add("name", "string"))
-    assert(desc.partitionColumnNames.isEmpty)
-    assert(desc.bucketSpec.isEmpty)
-    assert(desc.viewText.isEmpty)
-    assert(desc.viewQueryColumnNames.isEmpty)
-    assert(desc.storage.locationUri.isEmpty)
-    assert(desc.storage.inputFormat ==
-      Some("org.apache.hadoop.mapred.TextInputFormat"))
-    assert(desc.storage.outputFormat ==
-      Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"))
-    assert(desc.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
-    assert(desc.storage.properties.isEmpty)
-    assert(desc.properties.isEmpty)
-    assert(desc.comment.isEmpty)
-  }
-
-  test("create table - with database name") {
-    val query = "CREATE TABLE dbx.my_table (id int, name string)"
-    val (desc, _) = extractTableDesc(query)
-    assert(desc.identifier.database == Some("dbx"))
-    assert(desc.identifier.table == "my_table")
-  }
-
-  test("create table - temporary") {
-    val query = "CREATE TEMPORARY TABLE tab1 (id int, name string)"
-    val e = intercept[ParseException] { parser.parsePlan(query) }
-    assert(e.message.contains("CREATE TEMPORARY TABLE is not supported yet"))
-  }
-
-  test("create table - external") {
-    val query = "CREATE EXTERNAL TABLE tab1 (id int, name string) LOCATION '/path/to/nowhere'"
-    val (desc, _) = extractTableDesc(query)
-    assert(desc.tableType == CatalogTableType.EXTERNAL)
-    assert(desc.storage.locationUri == Some(new URI("/path/to/nowhere")))
-  }
-
-  test("create table - if not exists") {
-    val query = "CREATE TABLE IF NOT EXISTS tab1 (id int, name string)"
-    val (_, allowExisting) = extractTableDesc(query)
-    assert(allowExisting)
-  }
-
-  test("create table - comment") {
-    val query = "CREATE TABLE my_table (id int, name string) COMMENT 'its hot as hell below'"
-    val (desc, _) = extractTableDesc(query)
-    assert(desc.comment == Some("its hot as hell below"))
-  }
-
-  test("create table - partitioned columns") {
-    val query = "CREATE TABLE my_table (id int, name string) PARTITIONED BY (month int)"
-    val (desc, _) = extractTableDesc(query)
-    assert(desc.schema == new StructType()
-      .add("id", "int")
-      .add("name", "string")
-      .add("month", "int"))
-    assert(desc.partitionColumnNames == Seq("month"))
-  }
-
-  test("create table - clustered by") {
-    val numBuckets = 10
-    val bucketedColumn = "id"
-    val sortColumn = "id"
-    val baseQuery =
-      s"""
-         CREATE TABLE my_table (
-           $bucketedColumn int,
-           name string)
-         CLUSTERED BY($bucketedColumn)
-       """
-
-    val query1 = s"$baseQuery INTO $numBuckets BUCKETS"
-    val (desc1, _) = extractTableDesc(query1)
-    assert(desc1.bucketSpec.isDefined)
-    val bucketSpec1 = desc1.bucketSpec.get
-    assert(bucketSpec1.numBuckets == numBuckets)
-    assert(bucketSpec1.bucketColumnNames.head.equals(bucketedColumn))
-    assert(bucketSpec1.sortColumnNames.isEmpty)
-
-    val query2 = s"$baseQuery SORTED BY($sortColumn) INTO $numBuckets BUCKETS"
-    val (desc2, _) = extractTableDesc(query2)
-    assert(desc2.bucketSpec.isDefined)
-    val bucketSpec2 = desc2.bucketSpec.get
-    assert(bucketSpec2.numBuckets == numBuckets)
-    assert(bucketSpec2.bucketColumnNames.head.equals(bucketedColumn))
-    assert(bucketSpec2.sortColumnNames.head.equals(sortColumn))
-  }
-
-  test("create table(hive) - skewed by") {
-    val baseQuery = "CREATE TABLE my_table (id int, name string) SKEWED BY"
-    val query1 = s"$baseQuery(id) ON (1, 10, 100)"
-    val query2 = s"$baseQuery(id, name) ON ((1, 'x'), (2, 'y'), (3, 'z'))"
-    val query3 = s"$baseQuery(id, name) ON ((1, 'x'), (2, 'y'), (3, 'z')) STORED AS DIRECTORIES"
-    val e1 = intercept[ParseException] { parser.parsePlan(query1) }
-    val e2 = intercept[ParseException] { parser.parsePlan(query2) }
-    val e3 = intercept[ParseException] { parser.parsePlan(query3) }
-    assert(e1.getMessage.contains("Operation not allowed"))
-    assert(e2.getMessage.contains("Operation not allowed"))
-    assert(e3.getMessage.contains("Operation not allowed"))
-  }
-
-  test("create table(hive) - row format") {
-    val baseQuery = "CREATE TABLE my_table (id int, name string) ROW FORMAT"
-    val query1 = s"$baseQuery SERDE 'org.apache.poof.serde.Baff'"
-    val query2 = s"$baseQuery SERDE 'org.apache.poof.serde.Baff' WITH SERDEPROPERTIES ('k1'='v1')"
-    val query3 =
-      s"""
-         |$baseQuery DELIMITED FIELDS TERMINATED BY 'x' ESCAPED BY 'y'
-         |COLLECTION ITEMS TERMINATED BY 'a'
-         |MAP KEYS TERMINATED BY 'b'
-         |LINES TERMINATED BY '\n'
-         |NULL DEFINED AS 'c'
-      """.stripMargin
-    val (desc1, _) = extractTableDesc(query1)
-    val (desc2, _) = extractTableDesc(query2)
-    val (desc3, _) = extractTableDesc(query3)
-    assert(desc1.storage.serde == Some("org.apache.poof.serde.Baff"))
-    assert(desc1.storage.properties.isEmpty)
-    assert(desc2.storage.serde == Some("org.apache.poof.serde.Baff"))
-    assert(desc2.storage.properties == Map("k1" -> "v1"))
-    assert(desc3.storage.properties == Map(
-      "field.delim" -> "x",
-      "escape.delim" -> "y",
-      "serialization.format" -> "x",
-      "line.delim" -> "\n",
-      "colelction.delim" -> "a", // yes, it's a typo from Hive :)
-      "mapkey.delim" -> "b"))
-  }
-
-  test("create table(hive) - file format") {
-    val baseQuery = "CREATE TABLE my_table (id int, name string) STORED AS"
-    val query1 = s"$baseQuery INPUTFORMAT 'winput' OUTPUTFORMAT 'wowput'"
-    val query2 = s"$baseQuery ORC"
-    val (desc1, _) = extractTableDesc(query1)
-    val (desc2, _) = extractTableDesc(query2)
-    assert(desc1.storage.inputFormat == Some("winput"))
-    assert(desc1.storage.outputFormat == Some("wowput"))
-    assert(desc1.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
-    assert(desc2.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"))
-    assert(desc2.storage.outputFormat == Some("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"))
-    assert(desc2.storage.serde == Some("org.apache.hadoop.hive.ql.io.orc.OrcSerde"))
-  }
-
-  test("create table(hive) - storage handler") {
-    val baseQuery = "CREATE TABLE my_table (id int, name string) STORED BY"
-    val query1 = s"$baseQuery 'org.papachi.StorageHandler'"
-    val query2 = s"$baseQuery 'org.mamachi.StorageHandler' WITH SERDEPROPERTIES ('k1'='v1')"
-    val e1 = intercept[ParseException] { parser.parsePlan(query1) }
-    val e2 = intercept[ParseException] { parser.parsePlan(query2) }
-    assert(e1.getMessage.contains("Operation not allowed"))
-    assert(e2.getMessage.contains("Operation not allowed"))
-  }
-
-  test("create table - properties") {
-    val query = "CREATE TABLE my_table (id int, name string) TBLPROPERTIES ('k1'='v1', 'k2'='v2')"
-    val (desc, _) = extractTableDesc(query)
-    assert(desc.properties == Map("k1" -> "v1", "k2" -> "v2"))
-  }
-
-  test("create table(hive) - everything!") {
-    val query =
-      """
-        |CREATE EXTERNAL TABLE IF NOT EXISTS dbx.my_table (id int, name string)
-        |COMMENT 'no comment'
-        |PARTITIONED BY (month int)
-        |ROW FORMAT SERDE 'org.apache.poof.serde.Baff' WITH SERDEPROPERTIES ('k1'='v1')
-        |STORED AS INPUTFORMAT 'winput' OUTPUTFORMAT 'wowput'
-        |LOCATION '/path/to/mercury'
-        |TBLPROPERTIES ('k1'='v1', 'k2'='v2')
-      """.stripMargin
-    val (desc, allowExisting) = extractTableDesc(query)
-    assert(allowExisting)
-    assert(desc.identifier.database == Some("dbx"))
-    assert(desc.identifier.table == "my_table")
-    assert(desc.tableType == CatalogTableType.EXTERNAL)
-    assert(desc.schema == new StructType()
-      .add("id", "int")
-      .add("name", "string")
-      .add("month", "int"))
-    assert(desc.partitionColumnNames == Seq("month"))
-    assert(desc.bucketSpec.isEmpty)
-    assert(desc.viewText.isEmpty)
-    assert(desc.viewCatalogAndNamespace.isEmpty)
-    assert(desc.viewQueryColumnNames.isEmpty)
-    assert(desc.storage.locationUri == Some(new URI("/path/to/mercury")))
-    assert(desc.storage.inputFormat == Some("winput"))
-    assert(desc.storage.outputFormat == Some("wowput"))
-    assert(desc.storage.serde == Some("org.apache.poof.serde.Baff"))
-    assert(desc.storage.properties == Map("k1" -> "v1"))
-    assert(desc.properties == Map("k1" -> "v1", "k2" -> "v2"))
-    assert(desc.comment == Some("no comment"))
-  }
-
   test("create table like") {
     val v1 = "CREATE TABLE table1 LIKE table2"
     val (target, source, fileFormat, provider, properties, exists) =
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index fd1978c5137a5..92c114e116d0c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -29,14 +29,14 @@ import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, Analyzer, CTESubstitution, EmptyFunctionRegistry, NoSuchTableException, ResolveCatalogs, ResolvedTable, ResolveInlineTables, ResolveSessionCatalog, UnresolvedAttribute, UnresolvedRelation, UnresolvedSubqueryColumnAliases, UnresolvedV2Relation}
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Expression, InSubquery, IntegerLiteral, ListQuery, StringLiteral}
-import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.catalyst.plans.logical.{AlterTable, Assignment, CreateTableAsSelect, CreateV2Table, DeleteAction, DeleteFromTable, DescribeRelation, DropTable, InsertAction, InsertIntoStatement, LocalRelation, LogicalPlan, MergeIntoTable, OneRowRelation, Project, ShowTableProperties, SubqueryAlias, UpdateAction, UpdateTable}
+import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
+import org.apache.spark.sql.catalyst.plans.logical.{AlterTable, Assignment, CreateTableAsSelect, CreateTableStatement, CreateV2Table, DeleteAction, DeleteFromTable, DescribeRelation, DropTable, InsertAction, InsertIntoStatement, LocalRelation, LogicalPlan, MergeIntoTable, OneRowRelation, Project, ShowTableProperties, SubqueryAlias, UpdateAction, UpdateTable}
 import org.apache.spark.sql.connector.FakeV2Provider
 import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogNotFoundException, Identifier, Table, TableCapability, TableCatalog, TableChange, V1Table}
 import org.apache.spark.sql.connector.catalog.TableChange.{UpdateColumnComment, UpdateColumnType}
 import org.apache.spark.sql.execution.datasources.CreateTable
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
 import org.apache.spark.sql.sources.SimpleScanSource
 import org.apache.spark.sql.types.{CharType, DoubleType, HIVE_TYPE_STRING, IntegerType, LongType, MetadataBuilder, StringType, StructField, StructType}
 
@@ -178,6 +178,16 @@ class PlanResolutionSuite extends AnalysisTest {
     }.head
   }
 
+  private def assertUnsupported(sql: String, containsThesePhrases: Seq[String] = Seq()): Unit = {
+    val e = intercept[ParseException] {
+      parsePlan(sql)
+    }
+    assert(e.getMessage.toLowerCase(Locale.ROOT).contains("operation not allowed"))
+    containsThesePhrases.foreach { p =>
+      assert(e.getMessage.toLowerCase(Locale.ROOT).contains(p.toLowerCase(Locale.ROOT)))
+    }
+  }
+
   test("create table - with partitioned by") {
     val query = "CREATE TABLE my_tab(a INT comment 'test', b STRING) " +
         "USING parquet PARTITIONED BY (a)"
@@ -428,10 +438,11 @@ class PlanResolutionSuite extends AnalysisTest {
     val expectedProperties = Map(
       "p1" -> "v1",
       "p2" -> "v2",
-      "other" -> "20",
+      "option.other" -> "20",
       "provider" -> "parquet",
       "location" -> "s3://bucket/path/to/data",
-      "comment" -> "table comment")
+      "comment" -> "table comment",
+      "other" -> "20")
 
     parseAndResolve(sql) match {
       case create: CreateV2Table =>
@@ -467,10 +478,11 @@ class PlanResolutionSuite extends AnalysisTest {
     val expectedProperties = Map(
       "p1" -> "v1",
       "p2" -> "v2",
-      "other" -> "20",
+      "option.other" -> "20",
       "provider" -> "parquet",
       "location" -> "s3://bucket/path/to/data",
-      "comment" -> "table comment")
+      "comment" -> "table comment",
+      "other" -> "20")
 
     parseAndResolve(sql, withDefault = true) match {
       case create: CreateV2Table =>
@@ -542,10 +554,11 @@ class PlanResolutionSuite extends AnalysisTest {
     val expectedProperties = Map(
       "p1" -> "v1",
       "p2" -> "v2",
-      "other" -> "20",
+      "option.other" -> "20",
       "provider" -> "parquet",
       "location" -> "s3://bucket/path/to/data",
-      "comment" -> "table comment")
+      "comment" -> "table comment",
+      "other" -> "20")
 
     parseAndResolve(sql) match {
       case ctas: CreateTableAsSelect =>
@@ -576,10 +589,11 @@ class PlanResolutionSuite extends AnalysisTest {
     val expectedProperties = Map(
       "p1" -> "v1",
       "p2" -> "v2",
-      "other" -> "20",
+      "option.other" -> "20",
       "provider" -> "parquet",
       "location" -> "s3://bucket/path/to/data",
-      "comment" -> "table comment")
+      "comment" -> "table comment",
+      "other" -> "20")
 
     parseAndResolve(sql, withDefault = true) match {
       case ctas: CreateTableAsSelect =>
@@ -1557,6 +1571,630 @@ class PlanResolutionSuite extends AnalysisTest {
     checkFailure("testcat.tab", "foo")
   }
 
+  private def compareNormalized(plan1: LogicalPlan, plan2: LogicalPlan): Unit = {
+    /**
+     * Normalizes plans:
+     * - CreateTable the createTime in tableDesc will replaced by -1L.
+     */
+    def normalizePlan(plan: LogicalPlan): LogicalPlan = {
+      plan match {
+        case CreateTable(tableDesc, mode, query) =>
+          val newTableDesc = tableDesc.copy(createTime = -1L)
+          CreateTable(newTableDesc, mode, query)
+        case _ => plan // Don't transform
+      }
+    }
+    comparePlans(normalizePlan(plan1), normalizePlan(plan2))
+  }
+
+  test("create table - schema") {
+    def createTable(
+        table: String,
+        database: Option[String] = None,
+        tableType: CatalogTableType = CatalogTableType.MANAGED,
+        storage: CatalogStorageFormat = CatalogStorageFormat.empty.copy(
+          inputFormat = HiveSerDe.sourceToSerDe("textfile").get.inputFormat,
+          outputFormat = HiveSerDe.sourceToSerDe("textfile").get.outputFormat,
+          serde = Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")),
+        schema: StructType = new StructType,
+        provider: Option[String] = Some("hive"),
+        partitionColumnNames: Seq[String] = Seq.empty,
+        comment: Option[String] = None,
+        mode: SaveMode = SaveMode.ErrorIfExists,
+        query: Option[LogicalPlan] = None): CreateTable = {
+      CreateTable(
+        CatalogTable(
+          identifier = TableIdentifier(table, database),
+          tableType = tableType,
+          storage = storage,
+          schema = schema,
+          provider = provider,
+          partitionColumnNames = partitionColumnNames,
+          comment = comment
+        ), mode, query
+      )
+    }
+
+    def compare(sql: String, plan: LogicalPlan): Unit = {
+      compareNormalized(parseAndResolve(sql), plan)
+    }
+
+    compare("CREATE TABLE my_tab(a INT COMMENT 'test', b STRING) STORED AS textfile",
+      createTable(
+        table = "my_tab",
+        database = Some("default"),
+        schema = (new StructType)
+            .add("a", IntegerType, nullable = true, "test")
+            .add("b", StringType)
+      )
+    )
+    compare("CREATE TABLE my_tab(a INT COMMENT 'test', b STRING) " +
+        "PARTITIONED BY (c INT, d STRING COMMENT 'test2')",
+      createTable(
+        table = "my_tab",
+        database = Some("default"),
+        schema = (new StructType)
+            .add("a", IntegerType, nullable = true, "test")
+            .add("b", StringType)
+            .add("c", IntegerType)
+            .add("d", StringType, nullable = true, "test2"),
+        partitionColumnNames = Seq("c", "d")
+      )
+    )
+    compare("CREATE TABLE my_tab(id BIGINT, nested STRUCT<col1: STRING,col2: INT>) " +
+        "STORED AS textfile",
+      createTable(
+        table = "my_tab",
+        database = Some("default"),
+        schema = (new StructType)
+            .add("id", LongType)
+            .add("nested", (new StructType)
+                .add("col1", StringType)
+                .add("col2", IntegerType)
+            )
+      )
+    )
+    // Partitioned by a StructType should be accepted by `SparkSqlParser` but will fail an analyze
+    // rule in `AnalyzeCreateTable`.
+    compare("CREATE TABLE my_tab(a INT COMMENT 'test', b STRING) " +
+        "PARTITIONED BY (nested STRUCT<col1: STRING,col2: INT>)",
+      createTable(
+        table = "my_tab",
+        database = Some("default"),
+        schema = (new StructType)
+            .add("a", IntegerType, nullable = true, "test")
+            .add("b", StringType)
+            .add("nested", (new StructType)
+                .add("col1", StringType)
+                .add("col2", IntegerType)
+            ),
+        partitionColumnNames = Seq("nested")
+      )
+    )
+
+    interceptParseException(parsePlan)(
+      "CREATE TABLE my_tab(a: INT COMMENT 'test', b: STRING)",
+      "extraneous input ':'")
+  }
+
+  test("create hive table - table file format") {
+    val allSources = Seq("parquet", "parquetfile", "orc", "orcfile", "avro", "avrofile",
+      "sequencefile", "rcfile", "textfile")
+
+    allSources.foreach { s =>
+      val query = s"CREATE TABLE my_tab STORED AS $s"
+      parseAndResolve(query) match {
+        case ct: CreateTable =>
+          val hiveSerde = HiveSerDe.sourceToSerDe(s)
+          assert(hiveSerde.isDefined)
+          assert(ct.tableDesc.storage.serde ==
+            hiveSerde.get.serde.orElse(Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")))
+          assert(ct.tableDesc.storage.inputFormat == hiveSerde.get.inputFormat)
+          assert(ct.tableDesc.storage.outputFormat == hiveSerde.get.outputFormat)
+      }
+    }
+  }
+
+  test("create hive table - row format and table file format") {
+    val createTableStart = "CREATE TABLE my_tab ROW FORMAT"
+    val fileFormat = s"STORED AS INPUTFORMAT 'inputfmt' OUTPUTFORMAT 'outputfmt'"
+    val query1 = s"$createTableStart SERDE 'anything' $fileFormat"
+    val query2 = s"$createTableStart DELIMITED FIELDS TERMINATED BY ' ' $fileFormat"
+
+    // No conflicting serdes here, OK
+    parseAndResolve(query1) match {
+      case parsed1: CreateTable =>
+        assert(parsed1.tableDesc.storage.serde == Some("anything"))
+        assert(parsed1.tableDesc.storage.inputFormat == Some("inputfmt"))
+        assert(parsed1.tableDesc.storage.outputFormat == Some("outputfmt"))
+    }
+
+    parseAndResolve(query2) match {
+      case parsed2: CreateTable =>
+        assert(parsed2.tableDesc.storage.serde ==
+            Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
+        assert(parsed2.tableDesc.storage.inputFormat == Some("inputfmt"))
+        assert(parsed2.tableDesc.storage.outputFormat == Some("outputfmt"))
+    }
+  }
+
+  test("create hive table - row format serde and generic file format") {
+    val allSources = Seq("parquet", "orc", "avro", "sequencefile", "rcfile", "textfile")
+    val supportedSources = Set("sequencefile", "rcfile", "textfile")
+
+    allSources.foreach { s =>
+      val query = s"CREATE TABLE my_tab ROW FORMAT SERDE 'anything' STORED AS $s"
+      if (supportedSources.contains(s)) {
+        parseAndResolve(query) match {
+          case ct: CreateTable =>
+            val hiveSerde = HiveSerDe.sourceToSerDe(s)
+            assert(hiveSerde.isDefined)
+            assert(ct.tableDesc.storage.serde == Some("anything"))
+            assert(ct.tableDesc.storage.inputFormat == hiveSerde.get.inputFormat)
+            assert(ct.tableDesc.storage.outputFormat == hiveSerde.get.outputFormat)
+        }
+      } else {
+        assertUnsupported(query, Seq("row format serde", "incompatible", s))
+      }
+    }
+  }
+
+  test("create hive table - row format delimited and generic file format") {
+    val allSources = Seq("parquet", "orc", "avro", "sequencefile", "rcfile", "textfile")
+    val supportedSources = Set("textfile")
+
+    allSources.foreach { s =>
+      val query = s"CREATE TABLE my_tab ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS $s"
+      if (supportedSources.contains(s)) {
+        parseAndResolve(query) match {
+          case ct: CreateTable =>
+            val hiveSerde = HiveSerDe.sourceToSerDe(s)
+            assert(hiveSerde.isDefined)
+            assert(ct.tableDesc.storage.serde == hiveSerde.get.serde
+                .orElse(Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")))
+            assert(ct.tableDesc.storage.inputFormat == hiveSerde.get.inputFormat)
+            assert(ct.tableDesc.storage.outputFormat == hiveSerde.get.outputFormat)
+        }
+      } else {
+        assertUnsupported(query, Seq("row format delimited", "only compatible with 'textfile'", s))
+      }
+    }
+  }
+
+  test("create hive external table - location must be specified") {
+    val exc = intercept[AnalysisException] {
+      parseAndResolve("CREATE EXTERNAL TABLE my_tab STORED AS parquet")
+    }
+    assert(exc.getMessage.contains("CREATE EXTERNAL TABLE must be accompanied by LOCATION"))
+
+    val query = "CREATE EXTERNAL TABLE my_tab STORED AS parquet LOCATION '/something/anything'"
+    parseAndResolve(query) match {
+      case ct: CreateTable =>
+        assert(ct.tableDesc.tableType == CatalogTableType.EXTERNAL)
+        assert(ct.tableDesc.storage.locationUri == Some(new URI("/something/anything")))
+    }
+  }
+
+  test("create hive table - property values must be set") {
+    assertUnsupported(
+      sql = "CREATE TABLE my_tab STORED AS parquet " +
+          "TBLPROPERTIES('key_without_value', 'key_with_value'='x')",
+      containsThesePhrases = Seq("key_without_value"))
+    assertUnsupported(
+      sql = "CREATE TABLE my_tab ROW FORMAT SERDE 'serde' " +
+          "WITH SERDEPROPERTIES('key_without_value', 'key_with_value'='x')",
+      containsThesePhrases = Seq("key_without_value"))
+  }
+
+  test("create hive table - location implies external") {
+    val query = "CREATE TABLE my_tab STORED AS parquet LOCATION '/something/anything'"
+    parseAndResolve(query) match {
+      case ct: CreateTable =>
+        assert(ct.tableDesc.tableType == CatalogTableType.EXTERNAL)
+        assert(ct.tableDesc.storage.locationUri == Some(new URI("/something/anything")))
+    }
+  }
+
+  test("Duplicate clauses - create hive table") {
+    def intercept(sqlCommand: String, messages: String*): Unit =
+      interceptParseException(parsePlan)(sqlCommand, messages: _*)
+
+    def createTableHeader(duplicateClause: String): String = {
+      s"CREATE TABLE my_tab(a INT, b STRING) STORED AS parquet $duplicateClause $duplicateClause"
+    }
+
+    intercept(createTableHeader("TBLPROPERTIES('test' = 'test2')"),
+      "Found duplicate clauses: TBLPROPERTIES")
+    intercept(createTableHeader("LOCATION '/tmp/file'"),
+      "Found duplicate clauses: LOCATION")
+    intercept(createTableHeader("COMMENT 'a table'"),
+      "Found duplicate clauses: COMMENT")
+    intercept(createTableHeader("CLUSTERED BY(b) INTO 256 BUCKETS"),
+      "Found duplicate clauses: CLUSTERED BY")
+    intercept(createTableHeader("PARTITIONED BY (k int)"),
+      "Found duplicate clauses: PARTITIONED BY")
+    intercept(createTableHeader("STORED AS parquet"),
+      "Found duplicate clauses: STORED AS/BY")
+    intercept(
+      createTableHeader("ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'"),
+      "Found duplicate clauses: ROW FORMAT")
+  }
+
+  test("Test CTAS #1") {
+    val s1 =
+      """
+        |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
+        |COMMENT 'This is the staging page view table'
+        |STORED AS RCFILE
+        |LOCATION '/user/external/page_view'
+        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
+        |AS SELECT * FROM src
+      """.stripMargin
+
+    val s2 =
+      """
+        |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
+        |STORED AS RCFILE
+        |COMMENT 'This is the staging page view table'
+        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
+        |LOCATION '/user/external/page_view'
+        |AS SELECT * FROM src
+      """.stripMargin
+
+    val s3 =
+      """
+        |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
+        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
+        |LOCATION '/user/external/page_view'
+        |STORED AS RCFILE
+        |COMMENT 'This is the staging page view table'
+        |AS SELECT * FROM src
+      """.stripMargin
+
+    checkParsing(s1)
+    checkParsing(s2)
+    checkParsing(s3)
+
+    def checkParsing(sql: String): Unit = {
+      val (desc, exists) = extractTableDesc(sql)
+      assert(exists)
+      assert(desc.identifier.database == Some("mydb"))
+      assert(desc.identifier.table == "page_view")
+      assert(desc.tableType == CatalogTableType.EXTERNAL)
+      assert(desc.storage.locationUri == Some(new URI("/user/external/page_view")))
+      assert(desc.schema.isEmpty) // will be populated later when the table is actually created
+      assert(desc.comment == Some("This is the staging page view table"))
+      // TODO will be SQLText
+      assert(desc.viewText.isEmpty)
+      assert(desc.viewCatalogAndNamespace.isEmpty)
+      assert(desc.viewQueryColumnNames.isEmpty)
+      assert(desc.partitionColumnNames.isEmpty)
+      assert(desc.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileInputFormat"))
+      assert(desc.storage.outputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileOutputFormat"))
+      assert(desc.storage.serde ==
+          Some("org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe"))
+      assert(desc.properties == Map("p1" -> "v1", "p2" -> "v2"))
+    }
+  }
+
+  test("Test CTAS #2") {
+    val s1 =
+      """
+        |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
+        |COMMENT 'This is the staging page view table'
+        |ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'
+        | STORED AS
+        | INPUTFORMAT 'parquet.hive.DeprecatedParquetInputFormat'
+        | OUTPUTFORMAT 'parquet.hive.DeprecatedParquetOutputFormat'
+        |LOCATION '/user/external/page_view'
+        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
+        |AS SELECT * FROM src
+      """.stripMargin
+
+    val s2 =
+      """
+        |CREATE EXTERNAL TABLE IF NOT EXISTS mydb.page_view
+        |LOCATION '/user/external/page_view'
+        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
+        |ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'
+        | STORED AS
+        | INPUTFORMAT 'parquet.hive.DeprecatedParquetInputFormat'
+        | OUTPUTFORMAT 'parquet.hive.DeprecatedParquetOutputFormat'
+        |COMMENT 'This is the staging page view table'
+        |AS SELECT * FROM src
+      """.stripMargin
+
+    checkParsing(s1)
+    checkParsing(s2)
+
+    def checkParsing(sql: String): Unit = {
+      val (desc, exists) = extractTableDesc(sql)
+      assert(exists)
+      assert(desc.identifier.database == Some("mydb"))
+      assert(desc.identifier.table == "page_view")
+      assert(desc.tableType == CatalogTableType.EXTERNAL)
+      assert(desc.storage.locationUri == Some(new URI("/user/external/page_view")))
+      assert(desc.schema.isEmpty) // will be populated later when the table is actually created
+      // TODO will be SQLText
+      assert(desc.comment == Some("This is the staging page view table"))
+      assert(desc.viewText.isEmpty)
+      assert(desc.viewCatalogAndNamespace.isEmpty)
+      assert(desc.viewQueryColumnNames.isEmpty)
+      assert(desc.partitionColumnNames.isEmpty)
+      assert(desc.storage.properties == Map())
+      assert(desc.storage.inputFormat == Some("parquet.hive.DeprecatedParquetInputFormat"))
+      assert(desc.storage.outputFormat == Some("parquet.hive.DeprecatedParquetOutputFormat"))
+      assert(desc.storage.serde == Some("parquet.hive.serde.ParquetHiveSerDe"))
+      assert(desc.properties == Map("p1" -> "v1", "p2" -> "v2"))
+    }
+  }
+
+  test("Test CTAS #3") {
+    val s3 = """CREATE TABLE page_view AS SELECT * FROM src"""
+    val (desc, exists) = extractTableDesc(s3)
+    assert(exists == false)
+    assert(desc.identifier.database == Some("default"))
+    assert(desc.identifier.table == "page_view")
+    assert(desc.tableType == CatalogTableType.MANAGED)
+    assert(desc.storage.locationUri == None)
+    assert(desc.schema.isEmpty)
+    assert(desc.viewText == None) // TODO will be SQLText
+    assert(desc.viewQueryColumnNames.isEmpty)
+    assert(desc.storage.properties == Map())
+    assert(desc.storage.inputFormat == Some("org.apache.hadoop.mapred.TextInputFormat"))
+    assert(desc.storage.outputFormat ==
+        Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"))
+    assert(desc.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
+    assert(desc.properties == Map())
+  }
+
+  test("Test CTAS #4") {
+    val s4 =
+      """CREATE TABLE page_view
+        |STORED BY 'storage.handler.class.name' AS SELECT * FROM src""".stripMargin
+    intercept[AnalysisException] {
+      extractTableDesc(s4)
+    }
+  }
+
+  test("Test CTAS #5") {
+    val s5 = """CREATE TABLE ctas2
+               | ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"
+               | WITH SERDEPROPERTIES("serde_p1"="p1","serde_p2"="p2")
+               | STORED AS RCFile
+               | TBLPROPERTIES("tbl_p1"="p11", "tbl_p2"="p22")
+               | AS
+               |   SELECT key, value
+               |   FROM src
+               |   ORDER BY key, value""".stripMargin
+    val (desc, exists) = extractTableDesc(s5)
+    assert(exists == false)
+    assert(desc.identifier.database == Some("default"))
+    assert(desc.identifier.table == "ctas2")
+    assert(desc.tableType == CatalogTableType.MANAGED)
+    assert(desc.storage.locationUri == None)
+    assert(desc.schema.isEmpty)
+    assert(desc.viewText == None) // TODO will be SQLText
+    assert(desc.viewCatalogAndNamespace.isEmpty)
+    assert(desc.viewQueryColumnNames.isEmpty)
+    assert(desc.storage.properties == Map(("serde_p1" -> "p1"), ("serde_p2" -> "p2")))
+    assert(desc.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileInputFormat"))
+    assert(desc.storage.outputFormat == Some("org.apache.hadoop.hive.ql.io.RCFileOutputFormat"))
+    assert(desc.storage.serde == Some("org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"))
+    assert(desc.properties == Map(("tbl_p1" -> "p11"), ("tbl_p2" -> "p22")))
+  }
+
+  test("CTAS statement with a PARTITIONED BY clause is not allowed") {
+    assertUnsupported(s"CREATE TABLE ctas1 PARTITIONED BY (k int)" +
+        " AS SELECT key, value FROM (SELECT 1 as key, 2 as value) tmp")
+  }
+
+  test("CTAS statement with schema") {
+    assertUnsupported(s"CREATE TABLE ctas1 (age INT, name STRING) AS SELECT * FROM src")
+    assertUnsupported(s"CREATE TABLE ctas1 (age INT, name STRING) AS SELECT 1, 'hello'")
+  }
+
+  test("create table - basic") {
+    val query = "CREATE TABLE my_table (id int, name string)"
+    val (desc, allowExisting) = extractTableDesc(query)
+    assert(!allowExisting)
+    assert(desc.identifier.database == Some("default"))
+    assert(desc.identifier.table == "my_table")
+    assert(desc.tableType == CatalogTableType.MANAGED)
+    assert(desc.schema == new StructType().add("id", "int").add("name", "string"))
+    assert(desc.partitionColumnNames.isEmpty)
+    assert(desc.bucketSpec.isEmpty)
+    assert(desc.viewText.isEmpty)
+    assert(desc.viewQueryColumnNames.isEmpty)
+    assert(desc.storage.locationUri.isEmpty)
+    assert(desc.storage.inputFormat ==
+        Some("org.apache.hadoop.mapred.TextInputFormat"))
+    assert(desc.storage.outputFormat ==
+        Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"))
+    assert(desc.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
+    assert(desc.storage.properties.isEmpty)
+    assert(desc.properties.isEmpty)
+    assert(desc.comment.isEmpty)
+  }
+
+  test("create table - with database name") {
+    val query = "CREATE TABLE dbx.my_table (id int, name string)"
+    val (desc, _) = extractTableDesc(query)
+    assert(desc.identifier.database == Some("dbx"))
+    assert(desc.identifier.table == "my_table")
+  }
+
+  test("create table - temporary") {
+    val query = "CREATE TEMPORARY TABLE tab1 (id int, name string)"
+    val e = intercept[ParseException] { parsePlan(query) }
+    assert(e.message.contains("Operation not allowed: CREATE TEMPORARY TABLE"))
+  }
+
+  test("create table - external") {
+    val query = "CREATE EXTERNAL TABLE tab1 (id int, name string) LOCATION '/path/to/nowhere'"
+    val (desc, _) = extractTableDesc(query)
+    assert(desc.tableType == CatalogTableType.EXTERNAL)
+    assert(desc.storage.locationUri == Some(new URI("/path/to/nowhere")))
+  }
+
+  test("create table - if not exists") {
+    val query = "CREATE TABLE IF NOT EXISTS tab1 (id int, name string)"
+    val (_, allowExisting) = extractTableDesc(query)
+    assert(allowExisting)
+  }
+
+  test("create table - comment") {
+    val query = "CREATE TABLE my_table (id int, name string) COMMENT 'its hot as hell below'"
+    val (desc, _) = extractTableDesc(query)
+    assert(desc.comment == Some("its hot as hell below"))
+  }
+
+  test("create table - partitioned columns") {
+    val query = "CREATE TABLE my_table (id int, name string) PARTITIONED BY (month int)"
+    val (desc, _) = extractTableDesc(query)
+    assert(desc.schema == new StructType()
+        .add("id", "int")
+        .add("name", "string")
+        .add("month", "int"))
+    assert(desc.partitionColumnNames == Seq("month"))
+  }
+
+  test("create table - clustered by") {
+    val numBuckets = 10
+    val bucketedColumn = "id"
+    val sortColumn = "id"
+    val baseQuery =
+      s"""
+         CREATE TABLE my_table (
+           $bucketedColumn int,
+           name string)
+         CLUSTERED BY($bucketedColumn)
+       """
+
+    val query1 = s"$baseQuery INTO $numBuckets BUCKETS"
+    val (desc1, _) = extractTableDesc(query1)
+    assert(desc1.bucketSpec.isDefined)
+    val bucketSpec1 = desc1.bucketSpec.get
+    assert(bucketSpec1.numBuckets == numBuckets)
+    assert(bucketSpec1.bucketColumnNames.head.equals(bucketedColumn))
+    assert(bucketSpec1.sortColumnNames.isEmpty)
+
+    val query2 = s"$baseQuery SORTED BY($sortColumn) INTO $numBuckets BUCKETS"
+    val (desc2, _) = extractTableDesc(query2)
+    assert(desc2.bucketSpec.isDefined)
+    val bucketSpec2 = desc2.bucketSpec.get
+    assert(bucketSpec2.numBuckets == numBuckets)
+    assert(bucketSpec2.bucketColumnNames.head.equals(bucketedColumn))
+    assert(bucketSpec2.sortColumnNames.head.equals(sortColumn))
+  }
+
+  test("create table(hive) - skewed by") {
+    val baseQuery = "CREATE TABLE my_table (id int, name string) SKEWED BY"
+    val query1 = s"$baseQuery(id) ON (1, 10, 100)"
+    val query2 = s"$baseQuery(id, name) ON ((1, 'x'), (2, 'y'), (3, 'z'))"
+    val query3 = s"$baseQuery(id, name) ON ((1, 'x'), (2, 'y'), (3, 'z')) STORED AS DIRECTORIES"
+    val e1 = intercept[ParseException] { parsePlan(query1) }
+    val e2 = intercept[ParseException] { parsePlan(query2) }
+    val e3 = intercept[ParseException] { parsePlan(query3) }
+    assert(e1.getMessage.contains("Operation not allowed"))
+    assert(e2.getMessage.contains("Operation not allowed"))
+    assert(e3.getMessage.contains("Operation not allowed"))
+  }
+
+  test("create table(hive) - row format") {
+    val baseQuery = "CREATE TABLE my_table (id int, name string) ROW FORMAT"
+    val query1 = s"$baseQuery SERDE 'org.apache.poof.serde.Baff'"
+    val query2 = s"$baseQuery SERDE 'org.apache.poof.serde.Baff' WITH SERDEPROPERTIES ('k1'='v1')"
+    val query3 =
+      s"""
+         |$baseQuery DELIMITED FIELDS TERMINATED BY 'x' ESCAPED BY 'y'
+         |COLLECTION ITEMS TERMINATED BY 'a'
+         |MAP KEYS TERMINATED BY 'b'
+         |LINES TERMINATED BY '\n'
+         |NULL DEFINED AS 'c'
+      """.stripMargin
+    val (desc1, _) = extractTableDesc(query1)
+    val (desc2, _) = extractTableDesc(query2)
+    val (desc3, _) = extractTableDesc(query3)
+    assert(desc1.storage.serde == Some("org.apache.poof.serde.Baff"))
+    assert(desc1.storage.properties.isEmpty)
+    assert(desc2.storage.serde == Some("org.apache.poof.serde.Baff"))
+    assert(desc2.storage.properties == Map("k1" -> "v1"))
+    assert(desc3.storage.properties == Map(
+      "field.delim" -> "x",
+      "escape.delim" -> "y",
+      "serialization.format" -> "x",
+      "line.delim" -> "\n",
+      "colelction.delim" -> "a", // yes, it's a typo from Hive :)
+      "mapkey.delim" -> "b"))
+  }
+
+  test("create table(hive) - file format") {
+    val baseQuery = "CREATE TABLE my_table (id int, name string) STORED AS"
+    val query1 = s"$baseQuery INPUTFORMAT 'winput' OUTPUTFORMAT 'wowput'"
+    val query2 = s"$baseQuery ORC"
+    val (desc1, _) = extractTableDesc(query1)
+    val (desc2, _) = extractTableDesc(query2)
+    assert(desc1.storage.inputFormat == Some("winput"))
+    assert(desc1.storage.outputFormat == Some("wowput"))
+    assert(desc1.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
+    assert(desc2.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"))
+    assert(desc2.storage.outputFormat == Some("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"))
+    assert(desc2.storage.serde == Some("org.apache.hadoop.hive.ql.io.orc.OrcSerde"))
+  }
+
+  test("create table(hive) - storage handler") {
+    val baseQuery = "CREATE TABLE my_table (id int, name string) STORED BY"
+    val query1 = s"$baseQuery 'org.papachi.StorageHandler'"
+    val query2 = s"$baseQuery 'org.mamachi.StorageHandler' WITH SERDEPROPERTIES ('k1'='v1')"
+    val e1 = intercept[ParseException] { parsePlan(query1) }
+    val e2 = intercept[ParseException] { parsePlan(query2) }
+    assert(e1.getMessage.contains("Operation not allowed"))
+    assert(e2.getMessage.contains("Operation not allowed"))
+  }
+
+  test("create table - properties") {
+    val query = "CREATE TABLE my_table (id int, name string) TBLPROPERTIES ('k1'='v1', 'k2'='v2')"
+    parsePlan(query) match {
+      case state: CreateTableStatement =>
+        assert(state.properties == Map("k1" -> "v1", "k2" -> "v2"))
+    }
+  }
+
+  test("create table(hive) - everything!") {
+    val query =
+      """
+        |CREATE EXTERNAL TABLE IF NOT EXISTS dbx.my_table (id int, name string)
+        |COMMENT 'no comment'
+        |PARTITIONED BY (month int)
+        |ROW FORMAT SERDE 'org.apache.poof.serde.Baff' WITH SERDEPROPERTIES ('k1'='v1')
+        |STORED AS INPUTFORMAT 'winput' OUTPUTFORMAT 'wowput'
+        |LOCATION '/path/to/mercury'
+        |TBLPROPERTIES ('k1'='v1', 'k2'='v2')
+      """.stripMargin
+    val (desc, allowExisting) = extractTableDesc(query)
+    assert(allowExisting)
+    assert(desc.identifier.database == Some("dbx"))
+    assert(desc.identifier.table == "my_table")
+    assert(desc.tableType == CatalogTableType.EXTERNAL)
+    assert(desc.schema == new StructType()
+        .add("id", "int")
+        .add("name", "string")
+        .add("month", "int"))
+    assert(desc.partitionColumnNames == Seq("month"))
+    assert(desc.bucketSpec.isEmpty)
+    assert(desc.viewText.isEmpty)
+    assert(desc.viewCatalogAndNamespace.isEmpty)
+    assert(desc.viewQueryColumnNames.isEmpty)
+    assert(desc.storage.locationUri == Some(new URI("/path/to/mercury")))
+    assert(desc.storage.inputFormat == Some("winput"))
+    assert(desc.storage.outputFormat == Some("wowput"))
+    assert(desc.storage.serde == Some("org.apache.poof.serde.Baff"))
+    assert(desc.storage.properties == Map("k1" -> "v1"))
+    assert(desc.properties == Map("k1" -> "v1", "k2" -> "v2"))
+    assert(desc.comment == Some("no comment"))
+  }
+
   // TODO: add tests for more commands.
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
index 983209051c8ae..00c599065ce31 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
@@ -166,13 +166,13 @@ class CreateTableAsSelectSuite extends DataSourceTest with SharedSparkSession {
         )
       }.getMessage
       assert(error.contains("Operation not allowed") &&
-        error.contains("CREATE TEMPORARY TABLE ... USING ... AS query"))
+        error.contains("CREATE TEMPORARY TABLE"))
     }
   }
 
   test("disallows CREATE EXTERNAL TABLE ... USING ... AS query") {
     withTable("t") {
-      val error = intercept[ParseException] {
+      val error = intercept[AnalysisException] {
         sql(
           s"""
              |CREATE EXTERNAL TABLE t USING PARQUET
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 56b871644453b..b8b1da4cb9db7 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -598,8 +598,7 @@ class HiveDDLSuite
     val e = intercept[AnalysisException] {
       sql("CREATE TABLE tbl(a int) PARTITIONED BY (b) STORED AS parquet")
     }
-    assert(e.message.contains("Must specify a data type for each partition column while creating " +
-      "Hive partitioned table."))
+    assert(e.message.contains("partition column b is not defined in table"))
   }
 
   test("add/drop partition with location - managed table") {
@@ -2701,8 +2700,7 @@ class HiveDDLSuite
            |AS SELECT 1 as a, "a" as b
                  """.stripMargin)
     }.getMessage
-    assert(err1.contains("Schema may not be specified in a Create Table As Select " +
-      "(CTAS) statement"))
+    assert(err1.contains("Schema may not be specified in a Create Table As Select"))
 
     val err2 = intercept[ParseException] {
       spark.sql(
@@ -2713,8 +2711,7 @@ class HiveDDLSuite
            |AS SELECT 1 as a, "a" as b
                  """.stripMargin)
     }.getMessage
-    assert(err2.contains("Create Partitioned Table As Select cannot specify data type for " +
-      "the partition columns of the target table"))
+    assert(err2.contains("Partition column types may not be specified in Create Table As Select"))
   }
 
   test("Hive CTAS with dynamic partition") {
@@ -2783,7 +2780,7 @@ class HiveDDLSuite
             |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
             """.stripMargin)
       }.getMessage
-      assert(e.contains("'ROW FORMAT' must be used with 'STORED AS'"))
+      assert(e.contains("Operation not allowed: CREATE TABLE LIKE ... USING ... ROW FORMAT SERDE"))
 
       // row format doesn't work with provider hive
       e = intercept[AnalysisException] {
@@ -2794,7 +2791,7 @@ class HiveDDLSuite
             |WITH SERDEPROPERTIES ('test' = 'test')
           """.stripMargin)
       }.getMessage
-      assert(e.contains("'ROW FORMAT' must be used with 'STORED AS'"))
+      assert(e.contains("Operation not allowed: CREATE TABLE LIKE ... USING ... ROW FORMAT SERDE"))
 
       // row format doesn't work without 'STORED AS'
       e = intercept[AnalysisException] {
@@ -2807,6 +2804,17 @@ class HiveDDLSuite
       }.getMessage
       assert(e.contains("'ROW FORMAT' must be used with 'STORED AS'"))
 
+      // 'INPUTFORMAT' and 'OUTPUTFORMAT' conflict with 'USING'
+      e = intercept[AnalysisException] {
+        spark.sql(
+          """
+            |CREATE TABLE targetDsTable LIKE sourceDsTable USING format
+            |STORED AS INPUTFORMAT 'inFormat' OUTPUTFORMAT 'outFormat'
+            |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+            """.stripMargin)
+      }.getMessage
+      assert(e.contains("Operation not allowed: CREATE TABLE LIKE ... USING ... STORED AS"))
+
       // row format works with STORED AS hive format (from hive table)
       spark.sql(
         """
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
index 24b1e3405379c..f723c9f80c2ab 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
@@ -21,11 +21,10 @@ import java.net.URI
 
 import org.scalatest.BeforeAndAfterAll
 
-import org.apache.spark.sql.{AnalysisException, SaveMode, SparkSession}
+import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.execution.command.{CreateTableCommand, DDLUtils}
-import org.apache.spark.sql.execution.datasources.CreateTable
 import org.apache.spark.sql.execution.metric.InputOutputMetricsHelper
 import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
@@ -71,8 +70,8 @@ class HiveSerDeSuite extends HiveComparisonTest with PlanTest with BeforeAndAfte
   }
 
   private def extractTableDesc(sql: String): (CatalogTable, Boolean) = {
-    TestHive.sessionState.sqlParser.parsePlan(sql).collect {
-      case CreateTable(tableDesc, mode, _) => (tableDesc, mode == SaveMode.Ignore)
+    TestHive.sessionState.analyzer.execute(TestHive.sessionState.sqlParser.parsePlan(sql)).collect {
+      case CreateTableCommand(tableDesc, ifNotExists) => (tableDesc, ifNotExists)
     }.head
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 712f81d98753e..79b3c3efe531c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -712,8 +712,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
               |AS SELECT key, value FROM mytable1
             """.stripMargin)
         }.getMessage
-        assert(e.contains("Create Partitioned Table As Select cannot specify data type for " +
-          "the partition columns of the target table"))
+        assert(e.contains("Partition column types may not be specified in Create Table As Select"))
       }
     }
   }

From d691d85701adc3db3b7545b87065f2a5113c2b99 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Wed, 25 Nov 2020 23:15:52 +0800
Subject: [PATCH 0573/1009] [SPARK-33496][SQL] Improve error message of ANSI
 explicit cast

### What changes were proposed in this pull request?

After https://github.com/apache/spark/pull/30260, there are some type conversions disallowed under ANSI mode.
We should tell users what they can do if they have to use the disallowed casting.

### Why are the changes needed?

Make it more user-friendly.

### Does this PR introduce _any_ user-facing change?

Yes, the error message is improved on casting failure when ANSI mode is enabled
### How was this patch tested?

Unit tests.

Closes #30440 from gengliangwang/improveAnsiCastErrorMSG.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Gengliang Wang <gengliang.wang@databricks.com>
---
 .../spark/sql/catalyst/expressions/Cast.scala | 51 ++++++++++++++++++-
 .../sql/catalyst/expressions/CastSuite.scala  | 38 ++++++++++++--
 2 files changed, 82 insertions(+), 7 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index e5f11b5e74916..e6f585cacc6c7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -262,6 +262,11 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
    */
   def canCast(from: DataType, to: DataType): Boolean
 
+  /**
+   * Returns the error message if casting from one type to another one is invalid.
+   */
+  def typeCheckFailureMessage: String
+
   override def toString: String = {
     val ansi = if (ansiEnabled) "ansi_" else ""
     s"${ansi}cast($child as ${dataType.simpleString})"
@@ -271,8 +276,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
     if (canCast(child.dataType, dataType)) {
       TypeCheckResult.TypeCheckSuccess
     } else {
-      TypeCheckResult.TypeCheckFailure(
-        s"cannot cast ${child.dataType.catalogString} to ${dataType.catalogString}")
+      TypeCheckResult.TypeCheckFailure(typeCheckFailureMessage)
     }
   }
 
@@ -1755,6 +1759,12 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   } else {
     Cast.canCast(from, to)
   }
+
+  override def typeCheckFailureMessage: String = if (ansiEnabled) {
+    AnsiCast.typeCheckFailureMessage(child.dataType, dataType, SQLConf.ANSI_ENABLED.key, "false")
+  } else {
+    s"cannot cast ${child.dataType.catalogString} to ${dataType.catalogString}"
+  }
 }
 
 /**
@@ -1774,6 +1784,14 @@ case class AnsiCast(child: Expression, dataType: DataType, timeZoneId: Option[St
   override protected val ansiEnabled: Boolean = true
 
   override def canCast(from: DataType, to: DataType): Boolean = AnsiCast.canCast(from, to)
+
+  // For now, this expression is only used in table insertion.
+  // If there are more scenarios for this expression, we should update the error message on type
+  // check failure.
+  override def typeCheckFailureMessage: String =
+    AnsiCast.typeCheckFailureMessage(child.dataType, dataType,
+      SQLConf.STORE_ASSIGNMENT_POLICY.key, SQLConf.StoreAssignmentPolicy.LEGACY.toString)
+
 }
 
 object AnsiCast {
@@ -1876,6 +1894,35 @@ object AnsiCast {
 
     case _ => false
   }
+
+  def typeCheckFailureMessage(
+      from: DataType,
+      to: DataType,
+      fallbackConfKey: String,
+      fallbackConfValue: String): String =
+    (from, to) match {
+      case (_: NumericType, TimestampType) =>
+        // scalastyle:off line.size.limit
+        s"""
+           | cannot cast ${from.catalogString} to ${to.catalogString}.
+           | To convert values from ${from.catalogString} to ${to.catalogString}, you can use functions TIMESTAMP_SECONDS/TIMESTAMP_MILLIS/TIMESTAMP_MICROS instead.
+           |""".stripMargin
+
+      case (_: ArrayType, StringType) =>
+        s"""
+           | cannot cast ${from.catalogString} to ${to.catalogString} with ANSI mode on.
+           | If you have to cast ${from.catalogString} to ${to.catalogString}, you can use the function ARRAY_JOIN or set $fallbackConfKey as $fallbackConfValue.
+           |""".stripMargin
+
+      case _ if Cast.canCast(from, to) =>
+        s"""
+           | cannot cast ${from.catalogString} to ${to.catalogString} with ANSI mode on.
+           | If you have to cast ${from.catalogString} to ${to.catalogString}, you can set $fallbackConfKey as $fallbackConfValue.
+           |""".stripMargin
+
+      case _ => s"cannot cast ${from.catalogString} to ${to.catalogString}"
+      // scalastyle:on line.size.limit
+    }
 }
 
 /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index 2bc27ad35efff..f1fc921e401ba 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -25,6 +25,7 @@ import scala.collection.parallel.immutable.ParVector
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure
 import org.apache.spark.sql.catalyst.analysis.TypeCoercion.numericPrecedence
 import org.apache.spark.sql.catalyst.analysis.TypeCoercionSuite
 import org.apache.spark.sql.catalyst.expressions.aggregate.{CollectList, CollectSet}
@@ -841,12 +842,28 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase {
       cast(Literal(134.12), DecimalType(3, 2)), "cannot be represented")
   }
 
+  protected def setConfigurationHint: String
+
+  private def verifyCastFailure(c: CastBase, optionalExpectedMsg: Option[String] = None): Unit = {
+    val typeCheckResult = c.checkInputDataTypes()
+    assert(typeCheckResult.isFailure)
+    assert(typeCheckResult.isInstanceOf[TypeCheckFailure])
+    val message = typeCheckResult.asInstanceOf[TypeCheckFailure].message
+
+    if (optionalExpectedMsg.isDefined) {
+      assert(message.contains(optionalExpectedMsg.get))
+    } else {
+      assert(message.contains("with ANSI mode on"))
+      assert(message.contains(setConfigurationHint))
+    }
+  }
+
   test("ANSI mode: disallow type conversions between Numeric types and Timestamp type") {
     import DataTypeTestUtils.numericTypes
     checkInvalidCastFromNumericType(TimestampType)
     val timestampLiteral = Literal(1L, TimestampType)
     numericTypes.foreach { numericType =>
-      assert(cast(timestampLiteral, numericType).checkInputDataTypes().isFailure)
+      verifyCastFailure(cast(timestampLiteral, numericType))
     }
   }
 
@@ -855,7 +872,7 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase {
     checkInvalidCastFromNumericType(DateType)
     val dateLiteral = Literal(1, DateType)
     numericTypes.foreach { numericType =>
-      assert(cast(dateLiteral, numericType).checkInputDataTypes().isFailure)
+      verifyCastFailure(cast(dateLiteral, numericType))
     }
   }
 
@@ -880,9 +897,9 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase {
   }
 
   test("ANSI mode: disallow casting complex types as String type") {
-    assert(cast(Literal.create(Array(1, 2, 3, 4, 5)), StringType).checkInputDataTypes().isFailure)
-    assert(cast(Literal.create(Map(1 -> "a")), StringType).checkInputDataTypes().isFailure)
-    assert(cast(Literal.create((1, "a", 0.1)), StringType).checkInputDataTypes().isFailure)
+    verifyCastFailure(cast(Literal.create(Array(1, 2, 3, 4, 5)), StringType))
+    verifyCastFailure(cast(Literal.create(Map(1 -> "a")), StringType))
+    verifyCastFailure(cast(Literal.create((1, "a", 0.1)), StringType))
   }
 
   test("cast from invalid string to numeric should throw NumberFormatException") {
@@ -1489,6 +1506,9 @@ class CastSuiteWithAnsiModeOn extends AnsiCastSuiteBase {
       case _ => Cast(Literal(v), targetType, timeZoneId)
     }
   }
+
+  override def setConfigurationHint: String =
+    s"set ${SQLConf.ANSI_ENABLED.key} as false"
 }
 
 /**
@@ -1511,6 +1531,10 @@ class AnsiCastSuiteWithAnsiModeOn extends AnsiCastSuiteBase {
       case _ => AnsiCast(Literal(v), targetType, timeZoneId)
     }
   }
+
+  override def setConfigurationHint: String =
+    s"set ${SQLConf.STORE_ASSIGNMENT_POLICY.key} as" +
+      s" ${SQLConf.StoreAssignmentPolicy.LEGACY.toString}"
 }
 
 /**
@@ -1533,4 +1557,8 @@ class AnsiCastSuiteWithAnsiModeOff extends AnsiCastSuiteBase {
       case _ => AnsiCast(Literal(v), targetType, timeZoneId)
     }
   }
+
+  override def setConfigurationHint: String =
+    s"set ${SQLConf.STORE_ASSIGNMENT_POLICY.key} as" +
+      s" ${SQLConf.StoreAssignmentPolicy.LEGACY.toString}"
 }

From 9643eab53e4bbaee08f7f8c766b0d1e0d9348d55 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Wed, 25 Nov 2020 08:55:39 -0800
Subject: [PATCH 0574/1009] [SPARK-33540][SQL] Subexpression elimination for
 interpreted predicate

### What changes were proposed in this pull request?

This patch proposes to support subexpression elimination for interpreted predicate.

### Why are the changes needed?

Similar to interpreted projection, there are use cases when codegen predicate is not able to work, e.g. too complex schema, non-codegen expression, etc. When there are frequently occurring expressions (subexpressions) among predicate expression, the performance is quite bad as we need to re-compute same expressions. We should be able to support subexpression elimination for interpreted predicate like interpreted projection.

### Does this PR introduce _any_ user-facing change?

No, this doesn't change user behavior.

### How was this patch tested?

Unit test and benchmark.

Closes #30497 from viirya/SPARK-33540.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../sql/catalyst/expressions/predicates.scala | 19 +++++++++++++++++--
 ...ExprEliminationBenchmark-jdk11-results.txt | 16 ++++++++--------
 .../SubExprEliminationBenchmark-results.txt   | 16 ++++++++--------
 3 files changed, 33 insertions(+), 18 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 53d6394d0d1f1..53ac3560bc3b3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -46,11 +46,26 @@ abstract class BasePredicate {
 }
 
 case class InterpretedPredicate(expression: Expression) extends BasePredicate {
-  override def eval(r: InternalRow): Boolean = expression.eval(r).asInstanceOf[Boolean]
+  private[this] val subExprEliminationEnabled = SQLConf.get.subexpressionEliminationEnabled
+  private[this] lazy val runtime =
+    new SubExprEvaluationRuntime(SQLConf.get.subexpressionEliminationCacheMaxEntries)
+  private[this] val expr = if (subExprEliminationEnabled) {
+    runtime.proxyExpressions(Seq(expression)).head
+  } else {
+    expression
+  }
+
+  override def eval(r: InternalRow): Boolean = {
+    if (subExprEliminationEnabled) {
+      runtime.setInput(r)
+    }
+
+    expr.eval(r).asInstanceOf[Boolean]
+  }
 
   override def initialize(partitionIndex: Int): Unit = {
     super.initialize(partitionIndex)
-    expression.foreach {
+    expr.foreach {
       case n: Nondeterministic => n.initialize(partitionIndex)
       case _ =>
     }
diff --git a/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt b/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt
index 1eb7b534d2194..a7f0acc3cdc86 100644
--- a/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt
@@ -7,19 +7,19 @@ OpenJDK 64-Bit Server VM 11.0.9+11 on Mac OS X 10.15.6
 Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
 from_json as subExpr in Project:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-subExprElimination false, codegen: true           26447          27127         605          0.0   264467933.4       1.0X
-subExprElimination false, codegen: false          25673          26035         546          0.0   256732419.1       1.0X
-subExprElimination true, codegen: true             1384           1448         102          0.0    13842910.3      19.1X
-subExprElimination true, codegen: false            1244           1347         123          0.0    12442389.3      21.3X
+subExprElimination false, codegen: true           24827          25398         562          0.0   248271027.2       1.0X
+subExprElimination false, codegen: false          25052          25704         625          0.0   250518603.6       1.0X
+subExprElimination true, codegen: true             1540           1606          92          0.0    15403083.7      16.1X
+subExprElimination true, codegen: false            1487           1535          53          0.0    14865051.6      16.7X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 11.0.9+11 on Mac OS X 10.15.6
 Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
 from_json as subExpr in Filter:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-subexpressionElimination off, codegen on          34631          35449         833          0.0   346309884.0       1.0X
-subexpressionElimination off, codegen on          34480          34851         353          0.0   344798490.4       1.0X
-subexpressionElimination off, codegen on          16618          16811         291          0.0   166176642.6       2.1X
-subexpressionElimination off, codegen on          34316          34667         310          0.0   343157094.7       1.0X
+subexpressionElimination off, codegen on          37327          38261         809          0.0   373266387.0       1.0X
+subexpressionElimination off, codegen on          36126          37445        1575          0.0   361263987.0       1.0X
+subexpressionElimination off, codegen on          20152          21596        1263          0.0   201522903.8       1.9X
+subexpressionElimination off, codegen on          20799          20940         233          0.0   207993923.0       1.8X
 
 
diff --git a/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt b/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt
index 801f519ca76a1..e5f1bc14243e0 100644
--- a/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt
+++ b/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt
@@ -7,19 +7,19 @@ OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.6
 Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
 from_json as subExpr in Project:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-subExprElimination false, codegen: true           22767          23240         424          0.0   227665316.7       1.0X
-subExprElimination false, codegen: false          22869          23351         465          0.0   228693464.1       1.0X
-subExprElimination true, codegen: true             1328           1340          10          0.0    13280056.2      17.1X
-subExprElimination true, codegen: false            1248           1276          31          0.0    12476135.1      18.2X
+subExprElimination false, codegen: true           23094          23763         585          0.0   230939301.2       1.0X
+subExprElimination false, codegen: false          23161          24087         844          0.0   231611379.8       1.0X
+subExprElimination true, codegen: true             1492           1517          30          0.0    14921022.9      15.5X
+subExprElimination true, codegen: false            1300           1361          93          0.0    12996167.7      17.8X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.6
 Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
 from_json as subExpr in Filter:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-subexpressionElimination off, codegen on          37691          38846        1004          0.0   376913767.9       1.0X
-subexpressionElimination off, codegen on          37852          39124        1103          0.0   378517745.5       1.0X
-subexpressionElimination off, codegen on          22900          23085         202          0.0   229000242.5       1.6X
-subexpressionElimination off, codegen on          38298          38598         374          0.0   382978731.3       1.0X
+subexpressionElimination off, codegen on          37069          37767         985          0.0   370694301.5       1.0X
+subexpressionElimination off, codegen on          37095          37970        1008          0.0   370945081.6       1.0X
+subexpressionElimination off, codegen on          20618          21443         715          0.0   206175173.8       1.8X
+subexpressionElimination off, codegen on          21563          21887         307          0.0   215626274.7       1.7X
 
 
From 7cf6a6f996e25754de13aa66badbe6d1d53efb36 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Wed, 25 Nov 2020 09:57:46 -0800
Subject: [PATCH 0575/1009] [SPARK-31257][SPARK-33561][SQL][FOLLOWUP] Fix Scala
 2.13 compilation

### What changes were proposed in this pull request?

This PR is a follow-up to fix Scala 2.13 compilation.

### Why are the changes needed?

To support Scala 2.13 in Apache Spark 3.1.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the GitHub Action Scala 2.13 compilation job.

Closes #30502 from dongjoon-hyun/SPARK-31257.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../org/apache/spark/sql/catalyst/parser/AstBuilder.scala      | 3 ++-
 .../scala/org/apache/spark/sql/execution/SparkSqlParser.scala  | 2 +-
 .../spark/sql/execution/datasources/v2/V2SessionCatalog.scala  | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 25423e510157a..606d923061441 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -2946,7 +2946,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
     val location = visitLocationSpecList(ctx.locationSpec())
     val (cleanedOptions, newLocation) = cleanTableOptions(ctx, options, location)
     val comment = visitCommentSpecList(ctx.commentSpec())
-    val serdeInfo = getSerdeInfo(ctx.rowFormat.asScala, ctx.createFileFormat.asScala, ctx)
+    val serdeInfo =
+      getSerdeInfo(ctx.rowFormat.asScala.toSeq, ctx.createFileFormat.asScala.toSeq, ctx)
     (partTransforms, partCols, bucketSpec, cleanedProperties, cleanedOptions, newLocation, comment,
       serdeInfo)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index a92f0775f1c05..568c7112954f5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -440,7 +440,7 @@ class SparkSqlAstBuilder extends AstBuilder {
     val location = visitLocationSpecList(ctx.locationSpec())
     // TODO: Do not skip serde check for CREATE TABLE LIKE.
     val serdeInfo = getSerdeInfo(
-      ctx.rowFormat.asScala, ctx.createFileFormat.asScala, ctx, skipCheck = true)
+      ctx.rowFormat.asScala.toSeq, ctx.createFileFormat.asScala.toSeq, ctx, skipCheck = true)
     if (provider.isDefined && serdeInfo.isDefined) {
       operationNotAllowed(s"CREATE TABLE LIKE ... USING ... ${serdeInfo.get.describe}", ctx)
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
index f330d6a8c99e2..a0bc65d3f9057 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
@@ -114,7 +114,7 @@ class V2SessionCatalog(catalog: SessionCatalog)
   private def toOptions(properties: Map[String, String]): Map[String, String] = {
     properties.filterKeys(_.startsWith(TableCatalog.OPTION_PREFIX)).map {
       case (key, value) => key.drop(TableCatalog.OPTION_PREFIX.length) -> value
-    }
+    }.toMap
   }
 
   override def alterTable(

From 1de3fc42829187c54334df1fb2149dc4aeb78ed9 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Wed, 25 Nov 2020 12:37:59 -0800
Subject: [PATCH 0576/1009] [SPARK-33525][SQL] Update hive-service-rpc to 3.1.2

### What changes were proposed in this pull request?

We supported Hive metastore are 0.12.0 through 3.1.2, but we supported hive-jdbc are 0.12.0 through 2.3.7. It will throw `TProtocolException` if we use hive-jdbc 3.x:

```
[rootspark-3267648 apache-hive-3.1.2-bin]# bin/beeline -u jdbc:hive2://localhost:10000/default
Connecting to jdbc:hive2://localhost:10000/default
Connected to: Spark SQL (version 3.1.0-SNAPSHOT)
Driver: Hive JDBC (version 3.1.2)
Transaction isolation: TRANSACTION_REPEATABLE_READ
Beeline version 3.1.2 by Apache Hive
0: jdbc:hive2://localhost:10000/default> create table t1(id int) using parquet;
Unexpected end of file when reading from HS2 server. The root cause might be too many concurrent connections. Please ask the administrator to check the number of active connections, and adjust hive.server2.thrift.max.worker.threads if applicable.
Error: org.apache.thrift.transport.TTransportException (state=08S01,code=0)
```
```
org.apache.thrift.protocol.TProtocolException: Missing version in readMessageBegin, old client?
	at org.apache.thrift.protocol.TBinaryProtocol.readMessageBegin(TBinaryProtocol.java:234)
	at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:27)
	at org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:53)
	at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:310)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1130)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:630)
	at java.base/java.lang.Thread.run(Thread.java:832)
```

This pr upgrade hive-service-rpc to 3.1.2 to fix this issue.

### Why are the changes needed?

To support hive-jdbc 3.x.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manual test:
```
[rootspark-3267648 apache-hive-3.1.2-bin]# bin/beeline -u jdbc:hive2://localhost:10000/default
Connecting to jdbc:hive2://localhost:10000/default
Connected to: Spark SQL (version 3.1.0-SNAPSHOT)
Driver: Hive JDBC (version 3.1.2)
Transaction isolation: TRANSACTION_REPEATABLE_READ
Beeline version 3.1.2 by Apache Hive
0: jdbc:hive2://localhost:10000/default> create table t1(id int) using parquet;
+---------+
| Result  |
+---------+
+---------+
No rows selected (1.051 seconds)
0: jdbc:hive2://localhost:10000/default> insert into t1 values(1);
+---------+
| Result  |
+---------+
+---------+
No rows selected (2.08 seconds)
0: jdbc:hive2://localhost:10000/default> select * from t1;
+-----+
| id  |
+-----+
| 1   |
+-----+
1 row selected (0.605 seconds)
```

Closes #30478 from wangyum/SPARK-33525.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/deps/spark-deps-hadoop-2.7-hive-2.3       |  2 +-
 dev/deps/spark-deps-hadoop-3.2-hive-2.3       |  2 +-
 pom.xml                                       |  2 +-
 .../apache/hive/service/cli/CLIService.java   | 10 ++++++
 .../apache/hive/service/cli/GetInfoType.java  |  3 +-
 .../apache/hive/service/cli/ICLIService.java  |  3 ++
 .../cli/thrift/ThriftBinaryCLIService.java    | 13 ++++++++
 .../service/cli/thrift/ThriftCLIService.java  | 31 +++++++++++++++++++
 .../cli/thrift/ThriftCLIServiceClient.java    |  9 ++++++
 .../thriftserver/SparkSQLCLIService.scala     |  1 +
 10 files changed, 72 insertions(+), 4 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index bcf05506855c5..8802220726f78 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -88,7 +88,7 @@ hive-jdbc/2.3.7//hive-jdbc-2.3.7.jar
 hive-llap-common/2.3.7//hive-llap-common-2.3.7.jar
 hive-metastore/2.3.7//hive-metastore-2.3.7.jar
 hive-serde/2.3.7//hive-serde-2.3.7.jar
-hive-service-rpc/2.3.7//hive-service-rpc-2.3.7.jar
+hive-service-rpc/3.1.2//hive-service-rpc-3.1.2.jar
 hive-shims-0.23/2.3.7//hive-shims-0.23-2.3.7.jar
 hive-shims-common/2.3.7//hive-shims-common-2.3.7.jar
 hive-shims-scheduler/2.3.7//hive-shims-scheduler-2.3.7.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index cd274bef7045b..d45eeea0ee92b 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -65,7 +65,7 @@ hive-jdbc/2.3.7//hive-jdbc-2.3.7.jar
 hive-llap-common/2.3.7//hive-llap-common-2.3.7.jar
 hive-metastore/2.3.7//hive-metastore-2.3.7.jar
 hive-serde/2.3.7//hive-serde-2.3.7.jar
-hive-service-rpc/2.3.7//hive-service-rpc-2.3.7.jar
+hive-service-rpc/3.1.2//hive-service-rpc-3.1.2.jar
 hive-shims-0.23/2.3.7//hive-shims-0.23-2.3.7.jar
 hive-shims-common/2.3.7//hive-shims-common-2.3.7.jar
 hive-shims-scheduler/2.3.7//hive-shims-scheduler-2.3.7.jar
diff --git a/pom.xml b/pom.xml
index e5b1f30edd3be..cd7e1767d6b18 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2088,7 +2088,7 @@
       <dependency>
         <groupId>${hive.group}</groupId>
         <artifactId>hive-service-rpc</artifactId>
-        <version>${hive.version}</version>
+        <version>3.1.2</version>
         <exclusions>
           <exclusion>
             <groupId>*</groupId>
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java
index bdc1e6251e560..68f044c6a0f28 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java
@@ -45,6 +45,7 @@
 import org.apache.hive.service.cli.operation.Operation;
 import org.apache.hive.service.cli.session.HiveSession;
 import org.apache.hive.service.cli.session.SessionManager;
+import org.apache.hive.service.rpc.thrift.TOperationHandle;
 import org.apache.hive.service.rpc.thrift.TProtocolVersion;
 import org.apache.hive.service.server.HiveServer2;
 import org.slf4j.Logger;
@@ -567,6 +568,15 @@ public void renewDelegationToken(SessionHandle sessionHandle, HiveAuthFactory au
     LOG.info(sessionHandle  + ": renewDelegationToken()");
   }
 
+  @Override
+  public String getQueryId(TOperationHandle opHandle) throws HiveSQLException {
+    Operation operation = sessionManager.getOperationManager().getOperation(
+        new OperationHandle(opHandle));
+    final String queryId = operation.getParentSession().getHiveConf().getVar(ConfVars.HIVEQUERYID);
+    LOG.debug(opHandle + ": getQueryId() " + queryId);
+    return queryId;
+  }
+
   public SessionManager getSessionManager() {
     return sessionManager;
   }
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoType.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoType.java
index a64d262a8f301..575dff8f8f47b 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoType.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoType.java
@@ -72,7 +72,8 @@ public enum GetInfoType {
   CLI_DESCRIBE_PARAMETER(TGetInfoType.CLI_DESCRIBE_PARAMETER),
   CLI_CATALOG_NAME(TGetInfoType.CLI_CATALOG_NAME),
   CLI_COLLATION_SEQ(TGetInfoType.CLI_COLLATION_SEQ),
-  CLI_MAX_IDENTIFIER_LEN(TGetInfoType.CLI_MAX_IDENTIFIER_LEN);
+  CLI_MAX_IDENTIFIER_LEN(TGetInfoType.CLI_MAX_IDENTIFIER_LEN),
+  CLI_ODBC_KEYWORDS(TGetInfoType.CLI_ODBC_KEYWORDS);
 
   private final TGetInfoType tInfoType;
 
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ICLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ICLIService.java
index 3200909477821..a87c6691ebac7 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ICLIService.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ICLIService.java
@@ -24,6 +24,7 @@
 
 
 import org.apache.hive.service.auth.HiveAuthFactory;
+import org.apache.hive.service.rpc.thrift.TOperationHandle;
 
 public interface ICLIService {
 
@@ -98,6 +99,8 @@ RowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation,
   String getDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
       String owner, String renewer) throws HiveSQLException;
 
+  String getQueryId(TOperationHandle operationHandle) throws HiveSQLException;
+
   void cancelDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
       String tokenStr) throws HiveSQLException;
 
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
index ce79e3c8228a6..ffca1070d0047 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
@@ -32,7 +32,11 @@
 import org.apache.hive.service.ServiceException;
 import org.apache.hive.service.auth.HiveAuthFactory;
 import org.apache.hive.service.cli.CLIService;
+import org.apache.hive.service.cli.HiveSQLException;
+import org.apache.hive.service.rpc.thrift.TGetQueryIdReq;
+import org.apache.hive.service.rpc.thrift.TGetQueryIdResp;
 import org.apache.hive.service.server.ThreadFactoryWithGarbageCleanup;
+import org.apache.thrift.TException;
 import org.apache.thrift.TProcessorFactory;
 import org.apache.thrift.protocol.TBinaryProtocol;
 import org.apache.thrift.server.TThreadPoolServer;
@@ -107,6 +111,15 @@ protected void initializeServer() {
     }
   }
 
+  @Override
+  public TGetQueryIdResp GetQueryId(TGetQueryIdReq req) throws TException {
+    try {
+      return new TGetQueryIdResp(cliService.getQueryId(req.getOperationHandle()));
+    } catch (HiveSQLException e) {
+      throw new TException(e);
+    }
+  }
+
   @Override
   public void run() {
     try {
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
index ea9ed57410045..150f1d60fc466 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
@@ -262,6 +262,28 @@ public TOpenSessionResp OpenSession(TOpenSessionReq req) throws TException {
     return resp;
   }
 
+  @Override
+  public TSetClientInfoResp SetClientInfo(TSetClientInfoReq req) throws TException {
+    // TODO: We don't do anything for now, just log this for debugging.
+    //       We may be able to make use of this later, e.g. for workload management.
+    if (req.isSetConfiguration()) {
+      StringBuilder sb = null;
+      for (Map.Entry<String, String> e : req.getConfiguration().entrySet()) {
+        if (sb == null) {
+          SessionHandle sh = new SessionHandle(req.getSessionHandle());
+          sb = new StringBuilder("Client information for ").append(sh).append(": ");
+        } else {
+          sb.append(", ");
+        }
+        sb.append(e.getKey()).append(" = ").append(e.getValue());
+      }
+      if (sb != null) {
+        LOG.info("{}", sb);
+      }
+    }
+    return new TSetClientInfoResp(OK_STATUS);
+  }
+
   private String getIpAddress() {
     String clientIpAddress;
     // Http transport mode.
@@ -674,6 +696,15 @@ public TGetCrossReferenceResp GetCrossReference(TGetCrossReferenceReq req)
 
   protected abstract void initializeServer();
 
+  @Override
+  public TGetQueryIdResp GetQueryId(TGetQueryIdReq req) throws TException {
+    try {
+      return new TGetQueryIdResp(cliService.getQueryId(req.getOperationHandle()));
+    } catch (HiveSQLException e) {
+      throw new TException(e);
+    }
+  }
+
   @Override
   public abstract void run();
 
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java
index b13ddf72f77e7..0e81e4446caac 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java
@@ -490,4 +490,13 @@ public OperationHandle getCrossReference(SessionHandle sessionHandle,
       throw new HiveSQLException(e);
     }
   }
+
+  @Override
+  public String getQueryId(TOperationHandle operationHandle) throws HiveSQLException {
+    try {
+      return cliService.GetQueryId(new TGetQueryIdReq(operationHandle)).getQueryId();
+    } catch (TException e) {
+      throw new HiveSQLException(e);
+    }
+  }
 }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
index df0fa514ccff3..e9420ad21bebd 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
@@ -104,6 +104,7 @@ private[hive] class SparkSQLCLIService(hiveServer: HiveServer2, sqlContext: SQLC
       case GetInfoType.CLI_SERVER_NAME => new GetInfoValue("Spark SQL")
       case GetInfoType.CLI_DBMS_NAME => new GetInfoValue("Spark SQL")
       case GetInfoType.CLI_DBMS_VER => new GetInfoValue(sqlContext.sparkContext.version)
+      case GetInfoType.CLI_ODBC_KEYWORDS => new GetInfoValue("Unimplemented")
       case _ => super.getInfo(sessionHandle, getInfoType)
     }
   }

From c529426d872c6f09b05679ba76478e3b932e3696 Mon Sep 17 00:00:00 2001
From: shane knapp <incomplete@gmail.com>
Date: Wed, 25 Nov 2020 15:15:50 -0800
Subject: [PATCH 0577/1009] [SPARK-33565][BUILD][PYTHON] remove python3.8 and
 fix breakage

### What changes were proposed in this pull request?
remove python 3.8 from python/run-tests.py and stop build breaks

### Why are the changes needed?
the python tests are running against the bare-bones system install of python3, rather than an anaconda environment.

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?
via jenkins

Closes #30506 from shaneknapp/remove-py38.

Authored-by: shane knapp <incomplete@gmail.com>
Signed-off-by: shane knapp <incomplete@gmail.com>
---
 python/run-tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/run-tests.py b/python/run-tests.py
index 712f38fb81b83..34800b0e9fa54 100755
--- a/python/run-tests.py
+++ b/python/run-tests.py
@@ -160,7 +160,7 @@ def run_individual_python_test(target_dir, test_name, pyspark_python):
 
 
 def get_default_python_executables():
-    python_execs = [x for x in ["python3.6", "python3.8", "pypy3"] if which(x)]
+    python_execs = [x for x in ["python3.6", "pypy3"] if which(x)]
 
     if "python3.6" not in python_execs:
         p = which("python3")

From fb7b87021437c52d72ad276f92c8d6f5443ebd78 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Wed, 25 Nov 2020 15:22:47 -0800
Subject: [PATCH 0578/1009] [SPARK-33523][SQL][TEST][FOLLOWUP] Fix benchmark
 case name in SubExprEliminationBenchmark

### What changes were proposed in this pull request?

Fix the wrong benchmark case name.

### Why are the changes needed?

The last commit to refactor the benchmark code missed a change of case name.

### Does this PR introduce _any_ user-facing change?

No, dev only.

### How was this patch tested?

Unit test.

Closes #30505 from viirya/SPARK-33523-followup.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 ...SubExprEliminationBenchmark-jdk11-results.txt | 16 ++++++++--------
 .../SubExprEliminationBenchmark-results.txt      | 16 ++++++++--------
 .../execution/SubExprEliminationBenchmark.scala  |  2 +-
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt b/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt
index a7f0acc3cdc86..5eeb485a921b8 100644
--- a/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt
@@ -7,19 +7,19 @@ OpenJDK 64-Bit Server VM 11.0.9+11 on Mac OS X 10.15.6
 Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
 from_json as subExpr in Project:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-subExprElimination false, codegen: true           24827          25398         562          0.0   248271027.2       1.0X
-subExprElimination false, codegen: false          25052          25704         625          0.0   250518603.6       1.0X
-subExprElimination true, codegen: true             1540           1606          92          0.0    15403083.7      16.1X
-subExprElimination true, codegen: false            1487           1535          53          0.0    14865051.6      16.7X
+subExprElimination false, codegen: true           22482          23194         652          0.0   224817884.1       1.0X
+subExprElimination false, codegen: false          22544          22658         155          0.0   225436869.9       1.0X
+subExprElimination true, codegen: true             1371           1403          34          0.0    13710714.3      16.4X
+subExprElimination true, codegen: false            1295           1317          20          0.0    12949824.3      17.4X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 11.0.9+11 on Mac OS X 10.15.6
 Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
 from_json as subExpr in Filter:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-subexpressionElimination off, codegen on          37327          38261         809          0.0   373266387.0       1.0X
-subexpressionElimination off, codegen on          36126          37445        1575          0.0   361263987.0       1.0X
-subexpressionElimination off, codegen on          20152          21596        1263          0.0   201522903.8       1.9X
-subexpressionElimination off, codegen on          20799          20940         233          0.0   207993923.0       1.8X
+subExprElimination false, codegen: true           34976          35331         326          0.0   349759975.5       1.0X
+subExprElimination false, codegen: false          34101          34802         607          0.0   341014685.7       1.0X
+subExprElimination true, codegen: true            19440          19622         272          0.0   194402251.0       1.8X
+subExprElimination true, codegen: false           19247          20064         719          0.0   192466667.6       1.8X
 
 
diff --git a/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt b/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt
index e5f1bc14243e0..49a107f542857 100644
--- a/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt
+++ b/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt
@@ -7,19 +7,19 @@ OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.6
 Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
 from_json as subExpr in Project:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-subExprElimination false, codegen: true           23094          23763         585          0.0   230939301.2       1.0X
-subExprElimination false, codegen: false          23161          24087         844          0.0   231611379.8       1.0X
-subExprElimination true, codegen: true             1492           1517          30          0.0    14921022.9      15.5X
-subExprElimination true, codegen: false            1300           1361          93          0.0    12996167.7      17.8X
+subExprElimination false, codegen: true           25399          25869         466          0.0   253992369.6       1.0X
+subExprElimination false, codegen: false          24086          25094         888          0.0   240858699.5       1.1X
+subExprElimination true, codegen: true             1527           1600          64          0.0    15274388.8      16.6X
+subExprElimination true, codegen: false            1560           1600          52          0.0    15597825.4      16.3X
 
 Preparing data for benchmarking ...
 OpenJDK 64-Bit Server VM 1.8.0_265-b01 on Mac OS X 10.15.6
 Intel(R) Core(TM) i7-9750H CPU @ 2.60GHz
 from_json as subExpr in Filter:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-subexpressionElimination off, codegen on          37069          37767         985          0.0   370694301.5       1.0X
-subexpressionElimination off, codegen on          37095          37970        1008          0.0   370945081.6       1.0X
-subexpressionElimination off, codegen on          20618          21443         715          0.0   206175173.8       1.8X
-subexpressionElimination off, codegen on          21563          21887         307          0.0   215626274.7       1.7X
+subExprElimination false, codegen: true           39661          40585         844          0.0   396612867.5       1.0X
+subExprElimination false, codegen: false          40633          48813        1858          0.0   406328241.3       1.0X
+subExprElimination true, codegen: true            25819          27096        1174          0.0   258194064.4       1.5X
+subExprElimination true, codegen: false           23467          25137        1447          0.0   234668398.2       1.7X
 
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala
index e26acbcb3cd21..0ed0126add7a2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala
@@ -100,7 +100,7 @@ object SubExprEliminationBenchmark extends SqlBasedBenchmark {
         // We only benchmark subexpression performance under codegen/non-codegen, so disabling
         // json optimization.
         val caseName = s"subExprElimination $subExprEliminationEnabled, codegen: $codegenEnabled"
-        benchmark.addCase("subexpressionElimination off, codegen on", numIters) { _ =>
+        benchmark.addCase(caseName, numIters) { _ =>
           withSQLConf(
             SQLConf.SUBEXPRESSION_ELIMINATION_ENABLED.key -> subExprEliminationEnabled,
             SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> codegenEnabled,

From 919ea45e89b17d2f9b336dc4bfe6e15e8a083ed3 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Thu, 26 Nov 2020 10:19:38 +0900
Subject: [PATCH 0579/1009] [SPARK-33562][UI] Improve the style of the checkbox
 in executor page

### What changes were proposed in this pull request?

1. Remove the fixed width style of class `container-fluid-div`. So that the UI looks clean when the text is long.
2. Add one space between a checkbox and the text on the right side, which is consistent with the stage page.

### Why are the changes needed?

The width of class `container-fluid-div` is set as 200px after https://github.com/apache/spark/pull/21688 . This makes the checkbox in the executor page messy.
![image](https://user-images.githubusercontent.com/1097932/100242069-3bc5ab80-2ee9-11eb-8c7d-96c221398fee.png)

We should remove the width limit.

### Does this PR introduce _any_ user-facing change?

No
### How was this patch tested?

Manual test.
After the changes:
![image](https://user-images.githubusercontent.com/1097932/100257802-2f4a4e80-2efb-11eb-9eb0-92d6988ad14b.png)

Closes #30500 from gengliangwang/reviseStyle.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../apache/spark/ui/static/executorspage.js    | 18 +++++++++---------
 .../org/apache/spark/ui/static/webui.css       |  4 ----
 2 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
index 4f179a93c9d5f..1d3f628f5fab6 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
@@ -608,15 +608,15 @@ $(document).ready(function () {
                     "Show Additional Metrics" +
                     "</a></div>" +
                     "<div class='container-fluid-div ml-4 d-none' id='toggle-metrics'>" +
-                    "<div><input type='checkbox' class='toggle-vis' id='select-all-box'>Select All</div>" +
-                    "<div id='on_heap_memory' class='on-heap-memory-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='3' data-exec-col-idx='5'>On Heap Memory</div>" +
-                    "<div id='off_heap_memory' class='off-heap-memory-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='4' data-exec-col-idx='6'>Off Heap Memory</div>" +
-                    "<div id='jvm_on_off_heap_memory' class='jvm_on_off_heap_memory-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='' data-exec-col-idx='7'>Peak JVM Memory OnHeap / OffHeap</div>" +
-                    "<div id='on_off_heap_execution_memory' class='on_off_heap_execution_memory-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='' data-exec-col-idx='8'>Peak Execution Memory OnHeap / OffHeap</div>" +
-                    "<div id='on_off_heap_storage_memory' class='on_off_heap_storage_memory'><input type='checkbox' class='toggle-vis' data-sum-col-idx='' data-exec-col-idx='9'>Peak Storage Memory OnHeap / OffHeap</div>" +
-                    "<div id='direct_mapped_pool_memory' class='direct_mapped_pool_memory-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='' data-exec-col-idx='10'>Peak Pool Memory Direct / Mapped</div>" +
-                    "<div id='extra_resources' class='resources-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='' data-exec-col-idx='13'>Resources</div>" +
-                    "<div id='resource_prof_id' class='resource-prof-id-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='' data-exec-col-idx='14'>Resource Profile Id</div>" +
+                    "<div><input type='checkbox' class='toggle-vis' id='select-all-box'> Select All</div>" +
+                    "<div id='on_heap_memory' class='on-heap-memory-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='3' data-exec-col-idx='5'> On Heap Memory</div>" +
+                    "<div id='off_heap_memory' class='off-heap-memory-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='4' data-exec-col-idx='6'> Off Heap Memory</div>" +
+                    "<div id='jvm_on_off_heap_memory' class='jvm_on_off_heap_memory-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='' data-exec-col-idx='7'> Peak JVM Memory OnHeap / OffHeap</div>" +
+                    "<div id='on_off_heap_execution_memory' class='on_off_heap_execution_memory-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='' data-exec-col-idx='8'> Peak Execution Memory OnHeap / OffHeap</div>" +
+                    "<div id='on_off_heap_storage_memory' class='on_off_heap_storage_memory'><input type='checkbox' class='toggle-vis' data-sum-col-idx='' data-exec-col-idx='9'> Peak Storage Memory OnHeap / OffHeap</div>" +
+                    "<div id='direct_mapped_pool_memory' class='direct_mapped_pool_memory-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='' data-exec-col-idx='10'> Peak Pool Memory Direct / Mapped</div>" +
+                    "<div id='extra_resources' class='resources-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='' data-exec-col-idx='13'> Resources</div>" +
+                    "<div id='resource_prof_id' class='resource-prof-id-checkbox-div'><input type='checkbox' class='toggle-vis' data-sum-col-idx='' data-exec-col-idx='14'> Resource Profile Id</div>" +
                     "</div>");
 
                 reselectCheckboxesBasedOnTaskTableState();
diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.css b/core/src/main/resources/org/apache/spark/ui/static/webui.css
index d4394ebcfd258..262cee7b58aff 100755
--- a/core/src/main/resources/org/apache/spark/ui/static/webui.css
+++ b/core/src/main/resources/org/apache/spark/ui/static/webui.css
@@ -321,10 +321,6 @@ a.expandbutton {
   width: 100%;
 }
 
-.container-fluid-div {
-  width: 200px;
-}
-
 .select-all-div-checkbox-div {
   width: 90px;
 }

From ed9e6fc18236ef6994c7f24a4017cf43f77b7ca1 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Thu, 26 Nov 2020 11:42:12 +0900
Subject: [PATCH 0580/1009] [SPARK-33565][INFRA][FOLLOW-UP] Keep the test
 coverage with Python 3.8 in GitHub Actions

### What changes were proposed in this pull request?

This PR proposes to keep the test coverage with Python 3.8 in GitHub Actions. It is not tested for now in Jenkins due to an env issue.

**Before this change in GitHub Actions:**

```
========================================================================
Running PySpark tests
========================================================================
Running PySpark tests. Output is in /__w/spark/spark/python/unit-tests.log
Will test against the following Python executables: ['python3.6', 'pypy3']
...
```

**After this change in GitHub Actions:**

```
========================================================================
Running PySpark tests
========================================================================
Running PySpark tests. Output is in /__w/spark/spark/python/unit-tests.log
Will test against the following Python executables: ['python3.6', 'python3.8', 'pypy3']
```

### Why are the changes needed?

To keep the test coverage with Python 3.8 in GitHub Actions.

### Does this PR introduce _any_ user-facing change?

No, dev-only.

### How was this patch tested?

GitHub Actions in this build will test.

Closes #30510 from HyukjinKwon/SPARK-33565.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 dev/run-tests.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/dev/run-tests.py b/dev/run-tests.py
index 5bdbc0ffb850c..6bc73ca3669f3 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -483,6 +483,12 @@ def run_python_tests(test_modules, parallelism, with_coverage=False):
     if test_modules != [modules.root]:
         command.append("--modules=%s" % ','.join(m.name for m in test_modules))
     command.append("--parallelism=%i" % parallelism)
+    if "GITHUB_ACTIONS" in os.environ:
+        # See SPARK-33565. Python 3.8 was temporarily removed as its default Python executables
+        # to test because of Jenkins environment issue. Once Jenkins has Python 3.8 to test,
+        # we should remove this change back and add python3.8 into python/run-tests.py script.
+        command.append("--python-executable=%s" % ','.join(
+            x for x in ["python3.6", "python3.8", "pypy3"] if which(x)))
     run_cmd(command)
 
     if with_coverage:

From dfa3978d9191e02eabf65d1829c970644d25d57e Mon Sep 17 00:00:00 2001
From: Maryann Xue <maryann.xue@gmail.com>
Date: Wed, 25 Nov 2020 19:32:22 -0800
Subject: [PATCH 0581/1009] [SPARK-33551][SQL] Do not use custom shuffle reader
 for repartition

### What changes were proposed in this pull request?

This PR fixes an AQE issue where local shuffle reader, partition coalescing, or skew join optimization can be mistakenly applied to a shuffle introduced by repartition or a regular shuffle that logically replaces a repartition shuffle.
The proposed solution checks for the presence of any repartition shuffle and filters out not applicable optimization rules for the final stage in an AQE plan.

### Why are the changes needed?

Without the change, the output of a repartition query may not be correct.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Added UT.

Closes #30494 from maryannxue/csr-repartition.

Authored-by: Maryann Xue <maryann.xue@gmail.com>
Signed-off-by: Xiao Li <gatorsmile@gmail.com>
---
 .../apache/spark/sql/internal/SQLConf.scala   |   2 +-
 .../adaptive/AdaptiveSparkPlanExec.scala      |  31 +++--
 .../adaptive/CoalesceShufflePartitions.scala  |  11 +-
 .../adaptive/CustomShuffleReaderRule.scala    |  33 +++++
 .../adaptive/OptimizeLocalShuffleReader.scala |   9 +-
 .../adaptive/OptimizeSkewedJoin.scala         |  14 ++-
 .../adaptive/AdaptiveQueryExecSuite.scala     | 116 +++++++++++++++++-
 7 files changed, 187 insertions(+), 29 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CustomShuffleReaderRule.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 0738478888aeb..add9a1d0f3aa6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -509,7 +509,7 @@ object SQLConf {
         "'spark.sql.adaptive.skewJoin.skewedPartitionThresholdInBytes'")
       .version("3.0.0")
       .intConf
-      .checkValue(_ > 0, "The skew factor must be positive.")
+      .checkValue(_ >= 0, "The skew factor cannot be negative.")
       .createWithDefault(5)
 
   val SKEW_JOIN_SKEWED_PARTITION_THRESHOLD =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index 570edbf5f78a3..89d3b53510469 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -37,8 +37,6 @@ import org.apache.spark.sql.catalyst.trees.TreeNodeTag
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec._
 import org.apache.spark.sql.execution.bucketing.DisableUnnecessaryBucketedScan
-import org.apache.spark.sql.execution.command.DataWritingCommandExec
-import org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec
 import org.apache.spark.sql.execution.exchange._
 import org.apache.spark.sql.execution.ui.{SparkListenerSQLAdaptiveExecutionUpdate, SparkListenerSQLAdaptiveSQLMetricUpdates, SQLPlanMetric}
 import org.apache.spark.sql.internal.SQLConf
@@ -104,16 +102,6 @@ case class AdaptiveSparkPlanExec(
     OptimizeLocalShuffleReader
   )
 
-  private def finalStageOptimizerRules: Seq[Rule[SparkPlan]] =
-    context.qe.sparkPlan match {
-      case _: DataWritingCommandExec | _: V2TableWriteExec =>
-        // SPARK-32932: Local shuffle reader could break partitioning that works best
-        // for the following writing command
-        queryStageOptimizerRules.filterNot(_ == OptimizeLocalShuffleReader)
-      case _ =>
-        queryStageOptimizerRules
-    }
-
   // A list of physical optimizer rules to be applied right after a new stage is created. The input
   // plan to these rules has exchange as its root node.
   @transient private val postStageCreationRules = Seq(
@@ -121,6 +109,23 @@ case class AdaptiveSparkPlanExec(
     CollapseCodegenStages()
   )
 
+  // The partitioning of the query output depends on the shuffle(s) in the final stage. If the
+  // original plan contains a repartition operator, we need to preserve the specified partitioning,
+  // whether or not the repartition-introduced shuffle is optimized out because of an underlying
+  // shuffle of the same partitioning. Thus, we need to exclude some `CustomShuffleReaderRule`s
+  // from the final stage, depending on the presence and properties of repartition operators.
+  private def finalStageOptimizerRules: Seq[Rule[SparkPlan]] = {
+    val origins = inputPlan.collect {
+      case s: ShuffleExchangeLike => s.shuffleOrigin
+    }
+    val allRules = queryStageOptimizerRules ++ postStageCreationRules
+    allRules.filter {
+      case c: CustomShuffleReaderRule =>
+        origins.forall(c.supportedShuffleOrigins.contains)
+      case _ => true
+    }
+  }
+
   @transient private val costEvaluator = SimpleCostEvaluator
 
   @transient private val initialPlan = context.session.withActive {
@@ -249,7 +254,7 @@ case class AdaptiveSparkPlanExec(
       // Run the final plan when there's no more unfinished stages.
       currentPhysicalPlan = applyPhysicalRules(
         result.newPlan,
-        finalStageOptimizerRules ++ postStageCreationRules,
+        finalStageOptimizerRules,
         Some((planChangeLogger, "AQE Final Query Stage Optimization")))
       isFinalPlan = true
       executionId.foreach(onUpdatePlan(_, Seq(currentPhysicalPlan)))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
index 0cf3ab0cca49a..0f482142227d2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
@@ -19,16 +19,18 @@ package org.apache.spark.sql.execution.adaptive
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.plans.physical.SinglePartition
-import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.execution.exchange.{ENSURE_REQUIREMENTS, REPARTITION, ShuffleExchangeLike}
+import org.apache.spark.sql.execution.exchange.{ENSURE_REQUIREMENTS, REPARTITION, ShuffleExchangeLike, ShuffleOrigin}
 import org.apache.spark.sql.internal.SQLConf
 
 /**
  * A rule to coalesce the shuffle partitions based on the map output statistics, which can
  * avoid many small reduce tasks that hurt performance.
  */
-case class CoalesceShufflePartitions(session: SparkSession) extends Rule[SparkPlan] {
+case class CoalesceShufflePartitions(session: SparkSession) extends CustomShuffleReaderRule {
+
+  override val supportedShuffleOrigins: Seq[ShuffleOrigin] = Seq(ENSURE_REQUIREMENTS, REPARTITION)
+
   override def apply(plan: SparkPlan): SparkPlan = {
     if (!conf.coalesceShufflePartitionsEnabled) {
       return plan
@@ -86,7 +88,6 @@ case class CoalesceShufflePartitions(session: SparkSession) extends Rule[SparkPl
   }
 
   private def supportCoalesce(s: ShuffleExchangeLike): Boolean = {
-    s.outputPartitioning != SinglePartition &&
-      (s.shuffleOrigin == ENSURE_REQUIREMENTS || s.shuffleOrigin == REPARTITION)
+    s.outputPartitioning != SinglePartition && supportedShuffleOrigins.contains(s.shuffleOrigin)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CustomShuffleReaderRule.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CustomShuffleReaderRule.scala
new file mode 100644
index 0000000000000..c5b8f73ea59d3
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CustomShuffleReaderRule.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.adaptive
+
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.exchange.ShuffleOrigin
+
+/**
+ * Adaptive Query Execution rule that may create [[CustomShuffleReaderExec]] on top of query stages.
+ */
+trait CustomShuffleReaderRule extends Rule[SparkPlan] {
+
+  /**
+   * Returns the list of [[ShuffleOrigin]]s supported by this rule.
+   */
+  def supportedShuffleOrigins: Seq[ShuffleOrigin]
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeLocalShuffleReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeLocalShuffleReader.scala
index 8f57947cb6396..4dc982d666d18 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeLocalShuffleReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeLocalShuffleReader.scala
@@ -19,9 +19,8 @@ package org.apache.spark.sql.execution.adaptive
 
 import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide}
 import org.apache.spark.sql.catalyst.plans.physical.SinglePartition
-import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution._
-import org.apache.spark.sql.execution.exchange.{ENSURE_REQUIREMENTS, EnsureRequirements, ShuffleExchangeExec, ShuffleExchangeLike}
+import org.apache.spark.sql.execution.exchange.{ENSURE_REQUIREMENTS, EnsureRequirements, ShuffleExchangeExec, ShuffleExchangeLike, ShuffleOrigin}
 import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec
 import org.apache.spark.sql.internal.SQLConf
 
@@ -34,7 +33,9 @@ import org.apache.spark.sql.internal.SQLConf
  * then run `EnsureRequirements` to check whether additional shuffle introduced.
  * If introduced, we will revert all the local readers.
  */
-object OptimizeLocalShuffleReader extends Rule[SparkPlan] {
+object OptimizeLocalShuffleReader extends CustomShuffleReaderRule {
+
+  override val supportedShuffleOrigins: Seq[ShuffleOrigin] = Seq(ENSURE_REQUIREMENTS)
 
   private val ensureRequirements = EnsureRequirements
 
@@ -144,6 +145,6 @@ object OptimizeLocalShuffleReader extends Rule[SparkPlan] {
   }
 
   private def supportLocalReader(s: ShuffleExchangeLike): Boolean = {
-    s.outputPartitioning != SinglePartition && s.shuffleOrigin == ENSURE_REQUIREMENTS
+    s.outputPartitioning != SinglePartition && supportedShuffleOrigins.contains(s.shuffleOrigin)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala
index 582d586c59358..085934d906b3c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala
@@ -23,9 +23,8 @@ import org.apache.commons.io.FileUtils
 
 import org.apache.spark.{MapOutputStatistics, MapOutputTrackerMaster, SparkEnv}
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution._
-import org.apache.spark.sql.execution.exchange.{EnsureRequirements, ShuffleExchangeExec}
+import org.apache.spark.sql.execution.exchange.{ENSURE_REQUIREMENTS, EnsureRequirements, ShuffleExchangeExec, ShuffleOrigin}
 import org.apache.spark.sql.execution.joins.SortMergeJoinExec
 import org.apache.spark.sql.internal.SQLConf
 
@@ -53,7 +52,9 @@ import org.apache.spark.sql.internal.SQLConf
  * Note that, when this rule is enabled, it also coalesces non-skewed partitions like
  * `CoalesceShufflePartitions` does.
  */
-object OptimizeSkewedJoin extends Rule[SparkPlan] {
+object OptimizeSkewedJoin extends CustomShuffleReaderRule {
+
+  override val supportedShuffleOrigins: Seq[ShuffleOrigin] = Seq(ENSURE_REQUIREMENTS)
 
   private val ensureRequirements = EnsureRequirements
 
@@ -290,7 +291,9 @@ object OptimizeSkewedJoin extends Rule[SparkPlan] {
 
 private object ShuffleStage {
   def unapply(plan: SparkPlan): Option[ShuffleStageInfo] = plan match {
-    case s: ShuffleQueryStageExec if s.mapStats.isDefined =>
+    case s: ShuffleQueryStageExec
+        if s.mapStats.isDefined &&
+          OptimizeSkewedJoin.supportedShuffleOrigins.contains(s.shuffle.shuffleOrigin) =>
       val mapStats = s.mapStats.get
       val sizes = mapStats.bytesByPartitionId
       val partitions = sizes.zipWithIndex.map {
@@ -299,7 +302,8 @@ private object ShuffleStage {
       Some(ShuffleStageInfo(s, mapStats, partitions))
 
     case CustomShuffleReaderExec(s: ShuffleQueryStageExec, partitionSpecs)
-      if s.mapStats.isDefined && partitionSpecs.nonEmpty =>
+        if s.mapStats.isDefined && partitionSpecs.nonEmpty &&
+          OptimizeSkewedJoin.supportedShuffleOrigins.contains(s.shuffle.shuffleOrigin) =>
       val mapStats = s.mapStats.get
       val sizes = mapStats.bytesByPartitionId
       val partitions = partitionSpecs.map {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index 758965954b374..45ba2202d83d3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.execution.{PartialReducerPartitionSpec, QueryExecuti
 import org.apache.spark.sql.execution.command.DataWritingCommandExec
 import org.apache.spark.sql.execution.datasources.noop.NoopDataSource
 import org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec
-import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, Exchange, ReusedExchangeExec, ShuffleExchangeExec}
+import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, Exchange, REPARTITION, REPARTITION_WITH_NUM, ReusedExchangeExec, ShuffleExchangeExec, ShuffleExchangeLike}
 import org.apache.spark.sql.execution.joins.{BaseJoinExec, BroadcastHashJoinExec, SortMergeJoinExec}
 import org.apache.spark.sql.execution.ui.SparkListenerSQLAdaptiveExecutionUpdate
 import org.apache.spark.sql.functions._
@@ -1317,4 +1317,118 @@ class AdaptiveQueryExecSuite
       checkNumLocalShuffleReaders(df.queryExecution.executedPlan, numShufflesWithoutLocalReader = 1)
     }
   }
+
+  test("SPARK-33551: Do not use custom shuffle reader for repartition") {
+    def hasRepartitionShuffle(plan: SparkPlan): Boolean = {
+      find(plan) {
+        case s: ShuffleExchangeLike =>
+          s.shuffleOrigin == REPARTITION || s.shuffleOrigin == REPARTITION_WITH_NUM
+        case _ => false
+      }.isDefined
+    }
+
+    withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+      SQLConf.SHUFFLE_PARTITIONS.key -> "5") {
+      val df = sql(
+        """
+          |SELECT * FROM (
+          |  SELECT * FROM testData WHERE key = 1
+          |)
+          |RIGHT OUTER JOIN testData2
+          |ON value = b
+        """.stripMargin)
+
+      withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80") {
+        // Repartition with no partition num specified.
+        val dfRepartition = df.repartition('b)
+        dfRepartition.collect()
+        val plan = dfRepartition.queryExecution.executedPlan
+        // The top shuffle from repartition is optimized out.
+        assert(!hasRepartitionShuffle(plan))
+        val bhj = findTopLevelBroadcastHashJoin(plan)
+        assert(bhj.length == 1)
+        checkNumLocalShuffleReaders(plan, 1)
+        // Probe side is coalesced.
+        val customReader = bhj.head.right.find(_.isInstanceOf[CustomShuffleReaderExec])
+        assert(customReader.isDefined)
+        assert(customReader.get.asInstanceOf[CustomShuffleReaderExec].hasCoalescedPartition)
+
+        // Repartition with partition default num specified.
+        val dfRepartitionWithNum = df.repartition(5, 'b)
+        dfRepartitionWithNum.collect()
+        val planWithNum = dfRepartitionWithNum.queryExecution.executedPlan
+        // The top shuffle from repartition is optimized out.
+        assert(!hasRepartitionShuffle(planWithNum))
+        val bhjWithNum = findTopLevelBroadcastHashJoin(planWithNum)
+        assert(bhjWithNum.length == 1)
+        checkNumLocalShuffleReaders(planWithNum, 1)
+        // Probe side is not coalesced.
+        assert(bhjWithNum.head.right.find(_.isInstanceOf[CustomShuffleReaderExec]).isEmpty)
+
+        // Repartition with partition non-default num specified.
+        val dfRepartitionWithNum2 = df.repartition(3, 'b)
+        dfRepartitionWithNum2.collect()
+        val planWithNum2 = dfRepartitionWithNum2.queryExecution.executedPlan
+        // The top shuffle from repartition is not optimized out, and this is the only shuffle that
+        // does not have local shuffle reader.
+        assert(hasRepartitionShuffle(planWithNum2))
+        val bhjWithNum2 = findTopLevelBroadcastHashJoin(planWithNum2)
+        assert(bhjWithNum2.length == 1)
+        checkNumLocalShuffleReaders(planWithNum2, 1)
+        val customReader2 = bhjWithNum2.head.right.find(_.isInstanceOf[CustomShuffleReaderExec])
+        assert(customReader2.isDefined)
+        assert(customReader2.get.asInstanceOf[CustomShuffleReaderExec].isLocalReader)
+      }
+
+      // Force skew join
+      withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+        SQLConf.SKEW_JOIN_ENABLED.key -> "true",
+        SQLConf.SKEW_JOIN_SKEWED_PARTITION_THRESHOLD.key -> "1",
+        SQLConf.SKEW_JOIN_SKEWED_PARTITION_FACTOR.key -> "0",
+        SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES.key -> "10") {
+        // Repartition with no partition num specified.
+        val dfRepartition = df.repartition('b)
+        dfRepartition.collect()
+        val plan = dfRepartition.queryExecution.executedPlan
+        // The top shuffle from repartition is optimized out.
+        assert(!hasRepartitionShuffle(plan))
+        val smj = findTopLevelSortMergeJoin(plan)
+        assert(smj.length == 1)
+        // No skew join due to the repartition.
+        assert(!smj.head.isSkewJoin)
+        // Both sides are coalesced.
+        val customReaders = collect(smj.head) {
+          case c: CustomShuffleReaderExec if c.hasCoalescedPartition => c
+        }
+        assert(customReaders.length == 2)
+
+        // Repartition with default partition num specified.
+        val dfRepartitionWithNum = df.repartition(5, 'b)
+        dfRepartitionWithNum.collect()
+        val planWithNum = dfRepartitionWithNum.queryExecution.executedPlan
+        // The top shuffle from repartition is optimized out.
+        assert(!hasRepartitionShuffle(planWithNum))
+        val smjWithNum = findTopLevelSortMergeJoin(planWithNum)
+        assert(smjWithNum.length == 1)
+        // No skew join due to the repartition.
+        assert(!smjWithNum.head.isSkewJoin)
+        // No coalesce due to the num in repartition.
+        val customReadersWithNum = collect(smjWithNum.head) {
+          case c: CustomShuffleReaderExec if c.hasCoalescedPartition => c
+        }
+        assert(customReadersWithNum.isEmpty)
+
+        // Repartition with default non-partition num specified.
+        val dfRepartitionWithNum2 = df.repartition(3, 'b)
+        dfRepartitionWithNum2.collect()
+        val planWithNum2 = dfRepartitionWithNum2.queryExecution.executedPlan
+        // The top shuffle from repartition is not optimized out.
+        assert(hasRepartitionShuffle(planWithNum2))
+        val smjWithNum2 = findTopLevelSortMergeJoin(planWithNum2)
+        assert(smjWithNum2.length == 1)
+        // Skew join can apply as the repartition is not optimized out.
+        assert(smjWithNum2.head.isSkewJoin)
+      }
+    }
+  }
 }

From d082ad0abfe0bc26760626ae0ecb415a8d508a1f Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Fri, 27 Nov 2020 11:00:09 +0900
Subject: [PATCH 0582/1009] [SPARK-33563][PYTHON][R][SQL] Expose inverse
 hyperbolic trig functions in PySpark and SparkR

### What changes were proposed in this pull request?

This PR adds the following functions (introduced in Scala API with SPARK-33061):

- `acosh`
- `asinh`
- `atanh`

to Python and R.

### Why are the changes needed?

Feature parity.

### Does this PR introduce _any_ user-facing change?

New functions.

### How was this patch tested?

New unit tests.

Closes #30501 from zero323/SPARK-33563.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 R/pkg/NAMESPACE                              |  3 ++
 R/pkg/R/functions.R                          | 39 ++++++++++++++++++++
 R/pkg/tests/fulltests/test_sparkSQL.R        |  1 +
 python/docs/source/reference/pyspark.sql.rst |  4 +-
 python/pyspark/sql/functions.py              | 39 ++++++++++++++++++++
 python/pyspark/sql/functions.pyi             |  3 ++
 python/pyspark/sql/tests/test_functions.py   | 16 ++++++++
 7 files changed, 104 insertions(+), 1 deletion(-)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index b927a6b96b810..91f6e6dc8a0e6 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -202,6 +202,7 @@ exportMethods("%<=>%",
               "%in%",
               "abs",
               "acos",
+              "acosh",
               "add_months",
               "alias",
               "approx_count_distinct",
@@ -232,8 +233,10 @@ exportMethods("%<=>%",
               "asc_nulls_last",
               "ascii",
               "asin",
+              "asinh",
               "assert_true",
               "atan",
+              "atanh",
               "atan2",
               "avg",
               "base64",
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 039d28a3a37b6..b12f7b472ec83 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -455,6 +455,19 @@ setMethod("acos",
             column(jc)
           })
 
+#' @details
+#' \code{acosh}: Computes inverse hyperbolic cosine of the input column.
+#'
+#' @rdname column_math_functions
+#' @aliases acosh acosh,Column-method
+#' @note acosh since 3.1.0
+setMethod("acosh",
+          signature(x = "Column"),
+          function(x) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "acosh", x@jc)
+            column(jc)
+          })
+
 #' @details
 #' \code{approx_count_distinct}: Returns the approximate number of distinct items in a group.
 #'
@@ -522,6 +535,19 @@ setMethod("asin",
             column(jc)
           })
 
+#' @details
+#' \code{asinh}: Computes inverse hyperbolic sine of the input column.
+#'
+#' @rdname column_math_functions
+#' @aliases asinh asinh,Column-method
+#' @note asinh since 3.1.0
+setMethod("asinh",
+          signature(x = "Column"),
+          function(x) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "asinh", x@jc)
+            column(jc)
+          })
+
 #' @details
 #' \code{atan}: Returns the inverse tangent of the given value,
 #' as if computed by \code{java.lang.Math.atan()}
@@ -536,6 +562,19 @@ setMethod("atan",
             column(jc)
           })
 
+#' @details
+#' \code{atanh}: Computes inverse hyperbolic tangent of the input column.
+#'
+#' @rdname column_math_functions
+#' @aliases atanh atanh,Column-method
+#' @note atanh since 3.1.0
+setMethod("atanh",
+          signature(x = "Column"),
+          function(x) {
+            jc <- callJStatic("org.apache.spark.sql.functions", "atanh", x@jc)
+            column(jc)
+          })
+
 #' avg
 #'
 #' Aggregate function: returns the average of the values in a group.
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 45de1ef1bd3d1..81d4e14df791d 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1430,6 +1430,7 @@ test_that("column functions", {
     nth_value(column("v"), 3) + nth_value(column("z"), 4L, FALSE)
   c28 <- asc_nulls_first(c1) + asc_nulls_last(c1) +
     desc_nulls_first(c1) + desc_nulls_last(c1)
+  c29 <- acosh(c1) + asinh(c1) + atanh(c1)
 
   # Test if base::is.nan() is exposed
   expect_equal(is.nan(c("a", "b")), c(FALSE, FALSE))
diff --git a/python/docs/source/reference/pyspark.sql.rst b/python/docs/source/reference/pyspark.sql.rst
index 3f903fe8c7acd..0dc2f6e55bb96 100644
--- a/python/docs/source/reference/pyspark.sql.rst
+++ b/python/docs/source/reference/pyspark.sql.rst
@@ -307,6 +307,7 @@ Functions
 
     abs
     acos
+    acosh
     add_months
     aggregate
     approxCountDistinct
@@ -331,8 +332,10 @@ Functions
     asc_nulls_last
     ascii
     asin
+    asinh
     assert_true
     atan
+    atanh
     atan2
     avg
     base64
@@ -583,4 +586,3 @@ Grouping
     GroupedData.pivot
     GroupedData.sum
     PandasCogroupedOps.applyInPandas
-
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 4af5d1f484ee4..ea91e8593e21f 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -220,6 +220,19 @@ def acos(col):
     return _invoke_function_over_column("acos", col)
 
 
+def acosh(col):
+    """
+    Computes inverse hyperbolic cosine of the input column.
+
+    .. versionadded:: 3.1.0
+
+    Returns
+    -------
+    :class:`Column`
+    """
+    return _invoke_function_over_column("acosh", col)
+
+
 def asin(col):
     """
     .. versionadded:: 1.3.0
@@ -233,6 +246,19 @@ def asin(col):
     return _invoke_function_over_column("asin", col)
 
 
+def asinh(col):
+    """
+    Computes inverse hyperbolic sine of the input column.
+
+    .. versionadded:: 3.1.0
+
+    Returns
+    -------
+    :class:`Column`
+    """
+    return _invoke_function_over_column("asinh", col)
+
+
 def atan(col):
     """
     .. versionadded:: 1.4.0
@@ -245,6 +271,19 @@ def atan(col):
     return _invoke_function_over_column("atan", col)
 
 
+def atanh(col):
+    """
+    Computes inverse hyperbolic tangent of the input column.
+
+    .. versionadded:: 3.1.0
+
+    Returns
+    -------
+    :class:`Column`
+    """
+    return _invoke_function_over_column("atanh", col)
+
+
 @since(1.4)
 def cbrt(col):
     """
diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi
index 252f883b5fb09..50e178df9996f 100644
--- a/python/pyspark/sql/functions.pyi
+++ b/python/pyspark/sql/functions.pyi
@@ -260,12 +260,15 @@ def map_zip_with(
 ) -> Column: ...
 def abs(col: ColumnOrName) -> Column: ...
 def acos(col: ColumnOrName) -> Column: ...
+def acosh(col: ColumnOrName) -> Column: ...
 def asc(col: ColumnOrName) -> Column: ...
 def asc_nulls_first(col: ColumnOrName) -> Column: ...
 def asc_nulls_last(col: ColumnOrName) -> Column: ...
 def ascii(col: ColumnOrName) -> Column: ...
 def asin(col: ColumnOrName) -> Column: ...
+def asinh(col: ColumnOrName) -> Column: ...
 def atan(col: ColumnOrName) -> Column: ...
+def atanh(col: ColumnOrName) -> Column: ...
 @overload
 def atan2(col1: ColumnOrName, col2: ColumnOrName) -> Column: ...
 @overload
diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py
index 32549343d938f..2858bdeca0d5a 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -116,6 +116,7 @@ def assert_close(a, b):
             c = get_values(b)
             diff = [abs(v - c[k]) < 1e-6 for k, v in enumerate(a)]
             return sum(diff) == len(a)
+
         assert_close([math.cos(i) for i in range(10)],
                      df.select(functions.cos(df.a)).collect())
         assert_close([math.cos(i) for i in range(10)],
@@ -139,6 +140,21 @@ def assert_close(a, b):
         assert_close([math.hypot(i, 2) for i in range(10)],
                      df.select(functions.hypot(df.a, 2)).collect())
 
+    def test_inverse_trig_functions(self):
+        from pyspark.sql import functions
+
+        funs = [
+            (functions.acosh, "ACOSH"),
+            (functions.asinh, "ASINH"),
+            (functions.atanh, "ATANH"),
+        ]
+
+        cols = ["a", functions.col("a")]
+
+        for f, alias in funs:
+            for c in cols:
+                self.assertIn(f"{alias}(a)", repr(f(c)))
+
     def test_rand_functions(self):
         df = self.df
         from pyspark.sql import functions

From 433ae9064f55b8adb27b561e1ff17c32f0bf3465 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Fri, 27 Nov 2020 15:47:39 +0900
Subject: [PATCH 0583/1009] [SPARK-33566][CORE][SQL][SS][PYTHON] Make
 unescapedQuoteHandling option configurable when read CSV
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
There are some differences between Spark CSV, opencsv and commons-csv, the typical case are described in SPARK-33566, When there are both unescaped quotes and unescaped qualifier in value,  the results of parsing are different.

The reason for the difference is Spark use `STOP_AT_DELIMITER` as default `UnescapedQuoteHandling` to build `CsvParser` and it not configurable.

On the other hand, opencsv and commons-csv use the parsing mechanism similar to `STOP_AT_CLOSING_QUOTE ` by default.

So this pr make `unescapedQuoteHandling` option configurable to get the same parsing result as opencsv and commons-csv.

### Why are the changes needed?
Make unescapedQuoteHandling option configurable when read CSV to make parsing more flexible。

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

- Pass the Jenkins or GitHub Action

- Add a new case similar to that described in SPARK-33566

Closes #30518 from LuciferYang/SPARK-33566.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/sql/readwriter.py              | 26 +++++++++++++++++--
 python/pyspark/sql/readwriter.pyi             |  1 +
 python/pyspark/sql/streaming.py               | 25 ++++++++++++++++--
 python/pyspark/sql/streaming.pyi              |  1 +
 .../spark/sql/catalyst/csv/CSVOptions.scala   |  8 +++++-
 .../apache/spark/sql/DataFrameReader.scala    | 21 +++++++++++++++
 .../sql/streaming/DataStreamReader.scala      | 21 +++++++++++++++
 .../execution/datasources/csv/CSVSuite.scala  | 24 +++++++++++++++++
 8 files changed, 122 insertions(+), 5 deletions(-)

diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index bb31e6a3e09f8..d120daa5a9434 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -522,7 +522,8 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
             maxCharsPerColumn=None, maxMalformedLogPerPartition=None, mode=None,
             columnNameOfCorruptRecord=None, multiLine=None, charToEscapeQuoteEscaping=None,
             samplingRatio=None, enforceSchema=None, emptyValue=None, locale=None, lineSep=None,
-            pathGlobFilter=None, recursiveFileLookup=None, modifiedBefore=None, modifiedAfter=None):
+            pathGlobFilter=None, recursiveFileLookup=None, modifiedBefore=None, modifiedAfter=None,
+            unescapedQuoteHandling=None):
         r"""Loads a CSV file and returns the result as a  :class:`DataFrame`.
 
         This function will go through the input once to determine the input schema if
@@ -685,6 +686,26 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
         modifiedAfter (batch only) : an optional timestamp to only include files with
             modification times occurring after the specified time. The provided timestamp
             must be in the following format: YYYY-MM-DDTHH:mm:ss (e.g. 2020-06-01T13:00:00)
+        unescapedQuoteHandling : str, optional
+            defines how the CsvParser will handle values with unescaped quotes. If None is
+            set, it uses the default value, ``STOP_AT_DELIMITER``.
+
+            * ``STOP_AT_CLOSING_QUOTE``: If unescaped quotes are found in the input, accumulate
+              the quote character and proceed parsing the value as a quoted value, until a closing
+              quote is found.
+            * ``BACK_TO_DELIMITER``: If unescaped quotes are found in the input, consider the value
+              as an unquoted value. This will make the parser accumulate all characters of the current
+              parsed value until the delimiter is found. If no delimiter is found in the value, the
+              parser will continue accumulating characters from the input until a delimiter or line
+              ending is found.
+            * ``STOP_AT_DELIMITER``: If unescaped quotes are found in the input, consider the value
+              as an unquoted value. This will make the parser accumulate all characters until the
+              delimiter or a line ending is found in the input.
+            * ``STOP_AT_DELIMITER``: If unescaped quotes are found in the input, the content parsed
+              for the given value will be skipped and the value set in nullValue will be produced
+              instead.
+            * ``RAISE_ERROR``: If unescaped quotes are found in the input, a TextParsingException
+              will be thrown.
 
         Examples
         --------
@@ -708,7 +729,8 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
             charToEscapeQuoteEscaping=charToEscapeQuoteEscaping, samplingRatio=samplingRatio,
             enforceSchema=enforceSchema, emptyValue=emptyValue, locale=locale, lineSep=lineSep,
             pathGlobFilter=pathGlobFilter, recursiveFileLookup=recursiveFileLookup,
-            modifiedBefore=modifiedBefore, modifiedAfter=modifiedAfter)
+            modifiedBefore=modifiedBefore, modifiedAfter=modifiedAfter,
+            unescapedQuoteHandling=unescapedQuoteHandling)
         if isinstance(path, str):
             path = [path]
         if type(path) == list:
diff --git a/python/pyspark/sql/readwriter.pyi b/python/pyspark/sql/readwriter.pyi
index 64c5697203a44..c3b9a428f22b3 100644
--- a/python/pyspark/sql/readwriter.pyi
+++ b/python/pyspark/sql/readwriter.pyi
@@ -113,6 +113,7 @@ class DataFrameReader(OptionUtils):
         lineSep: Optional[str] = ...,
         pathGlobFilter: Optional[Union[bool, str]] = ...,
         recursiveFileLookup: Optional[Union[bool, str]] = ...,
+        unescapedQuoteHandling: Optional[str] = ...,
     ) -> DataFrame: ...
     def orc(
         self,
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index e7b2fa16d620a..365b5f38694a7 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -761,7 +761,7 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
             maxCharsPerColumn=None, maxMalformedLogPerPartition=None, mode=None,
             columnNameOfCorruptRecord=None, multiLine=None, charToEscapeQuoteEscaping=None,
             enforceSchema=None, emptyValue=None, locale=None, lineSep=None,
-            pathGlobFilter=None, recursiveFileLookup=None):
+            pathGlobFilter=None, recursiveFileLookup=None, unescapedQuoteHandling=None):
         r"""Loads a CSV file stream and returns the result as a :class:`DataFrame`.
 
         This function will go through the input once to determine the input schema if
@@ -900,6 +900,26 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
         recursiveFileLookup : str or bool, optional
             recursively scan a directory for files. Using this option disables
             `partition discovery <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery>`_.  # noqa
+        unescapedQuoteHandling : str, optional
+            defines how the CsvParser will handle values with unescaped quotes. If None is
+            set, it uses the default value, ``STOP_AT_DELIMITER``.
+
+            * ``STOP_AT_CLOSING_QUOTE``: If unescaped quotes are found in the input, accumulate
+              the quote character and proceed parsing the value as a quoted value, until a closing
+              quote is found.
+            * ``BACK_TO_DELIMITER``: If unescaped quotes are found in the input, consider the value
+              as an unquoted value. This will make the parser accumulate all characters of the current
+              parsed value until the delimiter is found. If no delimiter is found in the value, the
+              parser will continue accumulating characters from the input until a delimiter or line
+              ending is found.
+            * ``STOP_AT_DELIMITER``: If unescaped quotes are found in the input, consider the value
+              as an unquoted value. This will make the parser accumulate all characters until the
+              delimiter or a line ending is found in the input.
+            * ``STOP_AT_DELIMITER``: If unescaped quotes are found in the input, the content parsed
+              for the given value will be skipped and the value set in nullValue will be produced
+              instead.
+            * ``RAISE_ERROR``: If unescaped quotes are found in the input, a TextParsingException
+              will be thrown.
 
         .. versionadded:: 2.0.0
 
@@ -926,7 +946,8 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
             columnNameOfCorruptRecord=columnNameOfCorruptRecord, multiLine=multiLine,
             charToEscapeQuoteEscaping=charToEscapeQuoteEscaping, enforceSchema=enforceSchema,
             emptyValue=emptyValue, locale=locale, lineSep=lineSep,
-            pathGlobFilter=pathGlobFilter, recursiveFileLookup=recursiveFileLookup)
+            pathGlobFilter=pathGlobFilter, recursiveFileLookup=recursiveFileLookup,
+            unescapedQuoteHandling=unescapedQuoteHandling)
         if isinstance(path, str):
             return self._df(self._jreader.csv(path))
         else:
diff --git a/python/pyspark/sql/streaming.pyi b/python/pyspark/sql/streaming.pyi
index 56ce140b826d5..829610ad3b94b 100644
--- a/python/pyspark/sql/streaming.pyi
+++ b/python/pyspark/sql/streaming.pyi
@@ -149,6 +149,7 @@ class DataStreamReader(OptionUtils):
         lineSep: Optional[str] = ...,
         pathGlobFilter: Optional[Union[bool, str]] = ...,
         recursiveFileLookup: Optional[Union[bool, str]] = ...,
+        unescapedQuoteHandling: Optional[str] = ...,
     ) -> DataFrame: ...
 
 class DataStreamWriter:
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
index f2191fcf35f1a..ec405994eadef 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
@@ -213,6 +213,12 @@ class CSVOptions(
   }
   val lineSeparatorInWrite: Option[String] = lineSeparator
 
+  /**
+   * The handling method to be used when unescaped quotes are found in the input.
+   */
+  val unescapedQuoteHandling: UnescapedQuoteHandling = UnescapedQuoteHandling.valueOf(parameters
+    .getOrElse("unescapedQuoteHandling", "STOP_AT_DELIMITER").toUpperCase(Locale.ROOT))
+
   def asWriterSettings: CsvWriterSettings = {
     val writerSettings = new CsvWriterSettings()
     val format = writerSettings.getFormat
@@ -258,7 +264,7 @@ class CSVOptions(
     settings.setNullValue(nullValue)
     settings.setEmptyValue(emptyValueInRead)
     settings.setMaxCharsPerColumn(maxCharsPerColumn)
-    settings.setUnescapedQuoteHandling(UnescapedQuoteHandling.STOP_AT_DELIMITER)
+    settings.setUnescapedQuoteHandling(unescapedQuoteHandling)
     settings.setLineSeparatorDetectionEnabled(lineSeparatorInRead.isEmpty && multiLine)
     lineSeparatorInRead.foreach { _ =>
       settings.setNormalizeLineEndingsWithinQuotes(!multiLine)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index b26bc6441b6cf..8f96f0b882424 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -727,6 +727,27 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * a record can have.</li>
    * <li>`maxCharsPerColumn` (default `-1`): defines the maximum number of characters allowed
    * for any given value being read. By default, it is -1 meaning unlimited length</li>
+   * <li>`unescapedQuoteHandling` (default `STOP_AT_DELIMITER`): defines how the CsvParser
+   * will handle values with unescaped quotes.
+   *   <ul>
+   *     <li>`STOP_AT_CLOSING_QUOTE`: If unescaped quotes are found in the input, accumulate
+   *     the quote character and proceed parsing the value as a quoted value, until a closing
+   *     quote is found.</li>
+   *     <li>`BACK_TO_DELIMITER`: If unescaped quotes are found in the input, consider the value
+   *     as an unquoted value. This will make the parser accumulate all characters of the current
+   *     parsed value until the delimiter is found. If no
+   *     delimiter is found in the value, the parser will continue accumulating characters from
+   *     the input until a delimiter or line ending is found.</li>
+   *     <li>`STOP_AT_DELIMITER`: If unescaped quotes are found in the input, consider the value
+   *     as an unquoted value. This will make the parser accumulate all characters until the
+   *     delimiter or a line ending is found in the input.</li>
+   *     <li>`STOP_AT_DELIMITER`: If unescaped quotes are found in the input, the content parsed
+   *     for the given value will be skipped and the value set in nullValue will be produced
+   *     instead.</li>
+   *     <li>`RAISE_ERROR`: If unescaped quotes are found in the input, a TextParsingException
+   *     will be thrown.</li>
+   *   </ul>
+   * </li>
    * <li>`mode` (default `PERMISSIVE`): allows a mode for dealing with corrupt records
    *    during parsing. It supports the following case-insensitive modes. Note that Spark tries
    *    to parse only required columns in CSV under column pruning. Therefore, corrupt records
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index 9bc4acd49a980..7f4ef8be562fb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -396,6 +396,27 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
    * a record can have.</li>
    * <li>`maxCharsPerColumn` (default `-1`): defines the maximum number of characters allowed
    * for any given value being read. By default, it is -1 meaning unlimited length</li>
+   * <li>`unescapedQuoteHandling` (default `STOP_AT_DELIMITER`): defines how the CsvParser
+   * will handle values with unescaped quotes.
+   *   <ul>
+   *     <li>`STOP_AT_CLOSING_QUOTE`: If unescaped quotes are found in the input, accumulate
+   *     the quote character and proceed parsing the value as a quoted value, until a closing
+   *     quote is found.</li>
+   *     <li>`BACK_TO_DELIMITER`: If unescaped quotes are found in the input, consider the value
+   *     as an unquoted value. This will make the parser accumulate all characters of the current
+   *     parsed value until the delimiter is found. If no delimiter is found in the value, the
+   *     parser will continue accumulating characters from the input until a delimiter or line
+   *     ending is found.</li>
+   *     <li>`STOP_AT_DELIMITER`: If unescaped quotes are found in the input, consider the value
+   *     as an unquoted value. This will make the parser accumulate all characters until the
+   *     delimiter or a line ending is found in the input.</li>
+   *     <li>`STOP_AT_DELIMITER`: If unescaped quotes are found in the input, the content parsed
+   *     for the given value will be skipped and the value set in nullValue will be produced
+   *     instead.</li>
+   *     <li>`RAISE_ERROR`: If unescaped quotes are found in the input, a TextParsingException
+   *     will be thrown.</li>
+   *   </ul>
+   * </li>
    * <li>`mode` (default `PERMISSIVE`): allows a mode for dealing with corrupt records
    *    during parsing. It supports the following case-insensitive modes.
    *   <ul>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index a236814fdcdcd..30f0e45d04eab 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -2428,6 +2428,30 @@ abstract class CSVSuite
       assert(readback.collect sameElements Array(Row("0"), Row("1"), Row("2")))
     }
   }
+
+  test("SPARK-33566: configure UnescapedQuoteHandling to parse " +
+    "unescaped quotes and unescaped delimiter data correctly") {
+    withTempPath { path =>
+      val dataPath = path.getCanonicalPath
+      val row1 = Row("""a,""b,c""", "xyz")
+      val row2 = Row("""a,b,c""", """x""yz""")
+      // Generate the test data, use `,` as delimiter and `"` as quotes, but they didn't escape.
+      Seq(
+        """c1,c2""",
+        s""""${row1.getString(0)}","${row1.getString(1)}"""",
+        s""""${row2.getString(0)}","${row2.getString(1)}"""")
+        .toDF().repartition(1).write.text(dataPath)
+      // Without configure UnescapedQuoteHandling to STOP_AT_CLOSING_QUOTE,
+      // the result will be Row(""""a,""b""", """c""""), Row("""a,b,c""", """"x""yz"""")
+      val result = spark.read
+        .option("inferSchema", "true")
+        .option("header", "true")
+        .option("unescapedQuoteHandling", "STOP_AT_CLOSING_QUOTE")
+        .csv(dataPath).collect()
+      val exceptResults = Array(row1, row2)
+      assert(result.sameElements(exceptResults))
+    }
+  }
 }
 
 class CSVv1Suite extends CSVSuite {

From 8792280a735598589dc6cbced03262be2b6f8f76 Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Fri, 27 Nov 2020 07:08:24 +0000
Subject: [PATCH 0584/1009] [SPARK-33575][SQL] Fix misleading exception for
 "ANALYZE TABLE ... FOR COLUMNS" on temporary views

### What changes were proposed in this pull request?

This PR proposes to fix the exception message for `ANALYZE TABLE ... FOR COLUMNS` on temporary views.

The current behavior throws `NoSuchTableException` even if the temporary view exists:
```
sql("CREATE TEMP VIEW t AS SELECT 1 AS id")
sql("ANALYZE TABLE t COMPUTE STATISTICS FOR COLUMNS id")
org.apache.spark.sql.catalyst.analysis.NoSuchTableException: Table or view 't' not found in database 'db';
  at org.apache.spark.sql.execution.command.AnalyzeColumnCommand.analyzeColumnInTempView(AnalyzeColumnCommand.scala:76)
  at org.apache.spark.sql.execution.command.AnalyzeColumnCommand.run(AnalyzeColumnCommand.scala:54)
```

After this PR, more reasonable exception is thrown:
```
org.apache.spark.sql.AnalysisException: Temporary view `testView` is not cached for analyzing columns.;
[info]   at org.apache.spark.sql.execution.command.AnalyzeColumnCommand.analyzeColumnInTempView(AnalyzeColumnCommand.scala:74)
[info]   at org.apache.spark.sql.execution.command.AnalyzeColumnCommand.run(AnalyzeColumnCommand.scala:54)
```

### Why are the changes needed?

To fix a misleading exception.

### Does this PR introduce _any_ user-facing change?

Yes, the exception thrown is changed as shown above.

### How was this patch tested?

Updated existing test.

Closes #30519 from imback82/analyze_table_message.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/command/AnalyzeColumnCommand.scala   | 5 ++---
 .../org/apache/spark/sql/StatisticsCollectionSuite.scala     | 5 +++--
 .../scala/org/apache/spark/sql/execution/SQLViewSuite.scala  | 5 ++++-
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
index 5017893077922..3b90f807b3138 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
@@ -71,9 +71,8 @@ case class AnalyzeColumnCommand(
 
   private def analyzeColumnInTempView(plan: LogicalPlan, sparkSession: SparkSession): Unit = {
     if (!analyzeColumnInCachedData(plan, sparkSession)) {
-      val catalog = sparkSession.sessionState.catalog
-      val db = tableIdent.database.getOrElse(catalog.getCurrentDatabase)
-      throw new NoSuchTableException(db = db, table = tableIdent.identifier)
+      throw new AnalysisException(
+        s"Temporary view $tableIdent is not cached for analyzing columns.")
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index 65377594f083c..cd03fadf34b98 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -526,7 +526,7 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
       val errMsg = intercept[AnalysisException] {
         sql("ANALYZE TABLE tempView COMPUTE STATISTICS FOR COLUMNS id")
       }.getMessage
-      assert(errMsg.contains(s"Table or view 'tempView' not found in database 'default'"))
+      assert(errMsg.contains("Temporary view `tempView` is not cached for analyzing columns"))
 
       // Cache the view then analyze it
       sql("CACHE TABLE tempView")
@@ -548,7 +548,8 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
       val errMsg2 = intercept[AnalysisException] {
         sql(s"ANALYZE TABLE $globalTempDB.gTempView COMPUTE STATISTICS FOR COLUMNS id")
       }.getMessage
-      assert(errMsg2.contains(s"Table or view 'gTempView' not found in database '$globalTempDB'"))
+      assert(errMsg2.contains(
+        s"Temporary view `$globalTempDB`.`gTempView` is not cached for analyzing columns"))
 
       // Cache the view then analyze it
       sql(s"CACHE TABLE $globalTempDB.gTempView")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index edeebde7db726..5d29503848772 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -188,7 +188,10 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS")
       }.getMessage
       assert(e5.contains(s"$viewName is a temp view not table or permanent view"))
-      assertNoSuchTable(s"ANALYZE TABLE $viewName COMPUTE STATISTICS FOR COLUMNS id")
+      val e6 = intercept[AnalysisException] {
+        sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS FOR COLUMNS id")
+      }.getMessage
+      assert(e6.contains(s"Temporary view `$viewName` is not cached for analyzing columns."))
     }
   }
 

From 2c41d9d8fa363b62519128819841f39e68429205 Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Fri, 27 Nov 2020 10:16:56 +0000
Subject: [PATCH 0585/1009] [SPARK-33522][SQL] Improve exception messages while
 handling UnresolvedTableOrView

### What changes were proposed in this pull request?

This PR proposes to improve the exception messages while `UnresolvedTableOrView` is handled based on this suggestion: https://github.com/apache/spark/pull/30321#discussion_r521127001.

Currently, when an identifier is resolved to a temp view when a table/permanent view is expected, the following exception message is displayed (e.g., for `SHOW CREATE TABLE`):
```
t is a temp view not table or permanent view.
```
After this PR, the message will be:
```
t is a temp view. 'SHOW CREATE TABLE' expects a table or permanent view.
```

Also, if an identifier is not resolved, the following exception message is currently used:
```
Table or view not found: t
```
After this PR, the message will be:
```
Table or permanent view not found for 'SHOW CREATE TABLE': t
```
or
```
Table or view not found for 'ANALYZE TABLE ... FOR COLUMNS ...': t
```

### Why are the changes needed?

To improve the exception message.

### Does this PR introduce _any_ user-facing change?

Yes, the exception message will be changed as described above.

### How was this patch tested?

Updated existing tests.

Closes #30475 from imback82/unresolved_table_or_view.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  9 +-
 .../sql/catalyst/analysis/CheckAnalysis.scala |  4 +-
 .../catalyst/analysis/v2ResolutionPlans.scala |  1 +
 .../sql/catalyst/parser/AstBuilder.scala      | 31 +++---
 .../sql/catalyst/parser/DDLParserSuite.scala  | 96 +++++++++++--------
 .../sql-tests/results/describe.sql.out        |  2 +-
 .../sql-tests/results/show_columns.sql.out    |  8 +-
 .../spark/sql/ShowCreateTableSuite.scala      |  6 +-
 .../spark/sql/StatisticsCollectionSuite.scala |  3 +-
 .../sql/connector/DataSourceV2SQLSuite.scala  |  2 +-
 .../spark/sql/execution/SQLViewSuite.scala    |  6 +-
 .../v2/jdbc/JDBCTableCatalogSuite.scala       |  6 +-
 .../sql/hive/execution/HiveCommandSuite.scala |  2 +-
 13 files changed, 104 insertions(+), 72 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 837686420375a..77c1dd9ebb7fa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -866,11 +866,12 @@ class Analyzer(override val catalogManager: CatalogManager)
           u.failAnalysis(s"${ident.quoted} is a temp view. '$cmd' expects a table")
         }
         u
-      case u @ UnresolvedTableOrView(ident, allowTempView) =>
+      case u @ UnresolvedTableOrView(ident, cmd, allowTempView) =>
         lookupTempView(ident)
           .map { _ =>
             if (!allowTempView) {
-              u.failAnalysis(s"${ident.quoted} is a temp view not table or permanent view.")
+              u.failAnalysis(
+                s"${ident.quoted} is a temp view. '$cmd' expects a table or permanent view.")
             }
             ResolvedView(ident.asIdentifier, isTemp = true)
           }
@@ -955,7 +956,7 @@ class Analyzer(override val catalogManager: CatalogManager)
           .map(ResolvedTable(catalog.asTableCatalog, ident, _))
           .getOrElse(u)
 
-      case u @ UnresolvedTableOrView(NonSessionCatalogAndIdentifier(catalog, ident), _) =>
+      case u @ UnresolvedTableOrView(NonSessionCatalogAndIdentifier(catalog, ident), _, _) =>
         CatalogV2Util.loadTable(catalog, ident)
           .map(ResolvedTable(catalog.asTableCatalog, ident, _))
           .getOrElse(u)
@@ -1085,7 +1086,7 @@ class Analyzer(override val catalogManager: CatalogManager)
           case table => table
         }.getOrElse(u)
 
-      case u @ UnresolvedTableOrView(identifier, _) =>
+      case u @ UnresolvedTableOrView(identifier, _, _) =>
         lookupTableOrView(identifier).getOrElse(u)
     }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 9998035d65c3f..9a3ab4a5f8d11 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -101,7 +101,9 @@ trait CheckAnalysis extends PredicateHelper {
         u.failAnalysis(s"Table not found for '${u.commandName}': ${u.multipartIdentifier.quoted}")
 
       case u: UnresolvedTableOrView =>
-        u.failAnalysis(s"Table or view not found: ${u.multipartIdentifier.quoted}")
+        val viewStr = if (u.allowTempView) "view" else "permanent view"
+        u.failAnalysis(
+          s"Table or $viewStr not found for '${u.commandName}': ${u.multipartIdentifier.quoted}")
 
       case u: UnresolvedRelation =>
         u.failAnalysis(s"Table or view not found: ${u.multipartIdentifier.quoted}")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
index 0e883a88f2691..95fc4f47dec7f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
@@ -51,6 +51,7 @@ case class UnresolvedTable(
  */
 case class UnresolvedTableOrView(
     multipartIdentifier: Seq[String],
+    commandName: String,
     allowTempView: Boolean = true) extends LeafNode {
   override lazy val resolved: Boolean = false
   override def output: Seq[Attribute] = Nil
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 606d923061441..4cd9b2bea32a4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3148,7 +3148,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
   override def visitDropTable(ctx: DropTableContext): LogicalPlan = withOrigin(ctx) {
     // DROP TABLE works with either a table or a temporary view.
     DropTable(
-      UnresolvedTableOrView(visitMultipartIdentifier(ctx.multipartIdentifier())),
+      UnresolvedTableOrView(visitMultipartIdentifier(ctx.multipartIdentifier()), "DROP TABLE"),
       ctx.EXISTS != null,
       ctx.PURGE != null)
   }
@@ -3453,12 +3453,15 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    */
   override def visitDescribeRelation(ctx: DescribeRelationContext): LogicalPlan = withOrigin(ctx) {
     val isExtended = ctx.EXTENDED != null || ctx.FORMATTED != null
+    val relation = UnresolvedTableOrView(
+      visitMultipartIdentifier(ctx.multipartIdentifier()),
+      "DESCRIBE TABLE")
     if (ctx.describeColName != null) {
       if (ctx.partitionSpec != null) {
         throw new ParseException("DESC TABLE COLUMN for a specific partition is not supported", ctx)
       } else {
         DescribeColumn(
-          UnresolvedTableOrView(visitMultipartIdentifier(ctx.multipartIdentifier())),
+          relation,
           ctx.describeColName.nameParts.asScala.map(_.getText).toSeq,
           isExtended)
       }
@@ -3473,10 +3476,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
       } else {
         Map.empty[String, String]
       }
-      DescribeRelation(
-        UnresolvedTableOrView(visitMultipartIdentifier(ctx.multipartIdentifier())),
-        partitionSpec,
-        isExtended)
+      DescribeRelation(relation, partitionSpec, isExtended)
     }
   }
 
@@ -3514,7 +3514,10 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
     val tableName = visitMultipartIdentifier(ctx.multipartIdentifier())
     if (ctx.ALL() != null) {
       checkPartitionSpec()
-      AnalyzeColumn(UnresolvedTableOrView(tableName), None, allColumns = true)
+      AnalyzeColumn(
+        UnresolvedTableOrView(tableName, "ANALYZE TABLE ... FOR ALL COLUMNS"),
+        None,
+        allColumns = true)
     } else if (ctx.identifierSeq() == null) {
       val partitionSpec = if (ctx.partitionSpec != null) {
         visitPartitionSpec(ctx.partitionSpec)
@@ -3522,13 +3525,13 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
         Map.empty[String, Option[String]]
       }
       AnalyzeTable(
-        UnresolvedTableOrView(tableName, allowTempView = false),
+        UnresolvedTableOrView(tableName, "ANALYZE TABLE", allowTempView = false),
         partitionSpec,
         noScan = ctx.identifier != null)
     } else {
       checkPartitionSpec()
       AnalyzeColumn(
-        UnresolvedTableOrView(tableName),
+        UnresolvedTableOrView(tableName, "ANALYZE TABLE ... FOR COLUMNS ..."),
         Option(visitIdentifierSeq(ctx.identifierSeq())),
         allColumns = false)
     }
@@ -3572,6 +3575,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
     ShowCreateTable(
       UnresolvedTableOrView(
         visitMultipartIdentifier(ctx.multipartIdentifier()),
+        "SHOW CREATE TABLE",
         allowTempView = false),
       ctx.SERDE != null)
   }
@@ -3647,7 +3651,10 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    * }}}
    */
   override def visitRefreshTable(ctx: RefreshTableContext): LogicalPlan = withOrigin(ctx) {
-    RefreshTable(UnresolvedTableOrView(visitMultipartIdentifier(ctx.multipartIdentifier())))
+    RefreshTable(
+      UnresolvedTableOrView(
+        visitMultipartIdentifier(ctx.multipartIdentifier()),
+        "REFRESH TABLE"))
   }
 
   /**
@@ -3670,7 +3677,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
     } else {
       nameParts
     }
-    ShowColumns(UnresolvedTableOrView(tableName), namespace)
+    ShowColumns(UnresolvedTableOrView(tableName, "SHOW COLUMNS"), namespace)
   }
 
   /**
@@ -3881,7 +3888,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
   override def visitShowTblProperties(
       ctx: ShowTblPropertiesContext): LogicalPlan = withOrigin(ctx) {
     ShowTableProperties(
-      UnresolvedTableOrView(visitMultipartIdentifier(ctx.table)),
+      UnresolvedTableOrView(visitMultipartIdentifier(ctx.table), "SHOW TBLPROPERTIES"),
       Option(ctx.key).map(visitTablePropertyKey))
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index f650922e75f6e..c58ff81f17131 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -697,27 +697,27 @@ class DDLParserSuite extends AnalysisTest {
   test("drop table") {
     parseCompare("DROP TABLE testcat.ns1.ns2.tbl",
       DropTable(
-        UnresolvedTableOrView(Seq("testcat", "ns1", "ns2", "tbl")),
+        UnresolvedTableOrView(Seq("testcat", "ns1", "ns2", "tbl"), "DROP TABLE"),
         ifExists = false,
         purge = false))
     parseCompare(s"DROP TABLE db.tab",
       DropTable(
-        UnresolvedTableOrView(Seq("db", "tab")), ifExists = false, purge = false))
+        UnresolvedTableOrView(Seq("db", "tab"), "DROP TABLE"), ifExists = false, purge = false))
     parseCompare(s"DROP TABLE IF EXISTS db.tab",
       DropTable(
-        UnresolvedTableOrView(Seq("db", "tab")), ifExists = true, purge = false))
+        UnresolvedTableOrView(Seq("db", "tab"), "DROP TABLE"), ifExists = true, purge = false))
     parseCompare(s"DROP TABLE tab",
       DropTable(
-        UnresolvedTableOrView(Seq("tab")), ifExists = false, purge = false))
+        UnresolvedTableOrView(Seq("tab"), "DROP TABLE"), ifExists = false, purge = false))
     parseCompare(s"DROP TABLE IF EXISTS tab",
       DropTable(
-        UnresolvedTableOrView(Seq("tab")), ifExists = true, purge = false))
+        UnresolvedTableOrView(Seq("tab"), "DROP TABLE"), ifExists = true, purge = false))
     parseCompare(s"DROP TABLE tab PURGE",
       DropTable(
-        UnresolvedTableOrView(Seq("tab")), ifExists = false, purge = true))
+        UnresolvedTableOrView(Seq("tab"), "DROP TABLE"), ifExists = false, purge = true))
     parseCompare(s"DROP TABLE IF EXISTS tab PURGE",
       DropTable(
-        UnresolvedTableOrView(Seq("tab")), ifExists = true, purge = true))
+        UnresolvedTableOrView(Seq("tab"), "DROP TABLE"), ifExists = true, purge = true))
   }
 
   test("drop view") {
@@ -1112,26 +1112,26 @@ class DDLParserSuite extends AnalysisTest {
   test("describe table column") {
     comparePlans(parsePlan("DESCRIBE t col"),
       DescribeColumn(
-        UnresolvedTableOrView(Seq("t")), Seq("col"), isExtended = false))
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("col"), isExtended = false))
     comparePlans(parsePlan("DESCRIBE t `abc.xyz`"),
       DescribeColumn(
-        UnresolvedTableOrView(Seq("t")), Seq("abc.xyz"), isExtended = false))
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("abc.xyz"), isExtended = false))
     comparePlans(parsePlan("DESCRIBE t abc.xyz"),
       DescribeColumn(
-        UnresolvedTableOrView(Seq("t")), Seq("abc", "xyz"), isExtended = false))
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("abc", "xyz"), isExtended = false))
     comparePlans(parsePlan("DESCRIBE t `a.b`.`x.y`"),
       DescribeColumn(
-        UnresolvedTableOrView(Seq("t")), Seq("a.b", "x.y"), isExtended = false))
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("a.b", "x.y"), isExtended = false))
 
     comparePlans(parsePlan("DESCRIBE TABLE t col"),
       DescribeColumn(
-        UnresolvedTableOrView(Seq("t")), Seq("col"), isExtended = false))
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("col"), isExtended = false))
     comparePlans(parsePlan("DESCRIBE TABLE EXTENDED t col"),
       DescribeColumn(
-        UnresolvedTableOrView(Seq("t")), Seq("col"), isExtended = true))
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("col"), isExtended = true))
     comparePlans(parsePlan("DESCRIBE TABLE FORMATTED t col"),
       DescribeColumn(
-        UnresolvedTableOrView(Seq("t")), Seq("col"), isExtended = true))
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("col"), isExtended = true))
 
     val caught = intercept[AnalysisException](
       parsePlan("DESCRIBE TABLE t PARTITION (ds='1970-01-01') col"))
@@ -1150,13 +1150,17 @@ class DDLParserSuite extends AnalysisTest {
 
   test("SPARK-17328 Fix NPE with EXPLAIN DESCRIBE TABLE") {
     comparePlans(parsePlan("describe t"),
-      DescribeRelation(UnresolvedTableOrView(Seq("t")), Map.empty, isExtended = false))
+      DescribeRelation(
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Map.empty, isExtended = false))
     comparePlans(parsePlan("describe table t"),
-      DescribeRelation(UnresolvedTableOrView(Seq("t")), Map.empty, isExtended = false))
+      DescribeRelation(
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Map.empty, isExtended = false))
     comparePlans(parsePlan("describe table extended t"),
-      DescribeRelation(UnresolvedTableOrView(Seq("t")), Map.empty, isExtended = true))
+      DescribeRelation(
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Map.empty, isExtended = true))
     comparePlans(parsePlan("describe table formatted t"),
-      DescribeRelation(UnresolvedTableOrView(Seq("t")), Map.empty, isExtended = true))
+      DescribeRelation(
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Map.empty, isExtended = true))
   }
 
   test("insert table: basic append") {
@@ -1769,57 +1773,57 @@ class DDLParserSuite extends AnalysisTest {
   test("analyze table statistics") {
     comparePlans(parsePlan("analyze table a.b.c compute statistics"),
       AnalyzeTable(
-        UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false),
+        UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE", allowTempView = false),
         Map.empty, noScan = false))
     comparePlans(parsePlan("analyze table a.b.c compute statistics noscan"),
       AnalyzeTable(
-        UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false),
+        UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE", allowTempView = false),
         Map.empty, noScan = true))
     comparePlans(parsePlan("analyze table a.b.c partition (a) compute statistics nOscAn"),
       AnalyzeTable(
-        UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false),
+        UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE", allowTempView = false),
         Map("a" -> None), noScan = true))
 
     // Partitions specified
     comparePlans(
       parsePlan("ANALYZE TABLE a.b.c PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS"),
       AnalyzeTable(
-        UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false),
+        UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE", allowTempView = false),
         Map("ds" -> Some("2008-04-09"), "hr" -> Some("11")), noScan = false))
     comparePlans(
       parsePlan("ANALYZE TABLE a.b.c PARTITION(ds='2008-04-09', hr=11) COMPUTE STATISTICS noscan"),
       AnalyzeTable(
-        UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false),
+        UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE", allowTempView = false),
         Map("ds" -> Some("2008-04-09"), "hr" -> Some("11")), noScan = true))
     comparePlans(
       parsePlan("ANALYZE TABLE a.b.c PARTITION(ds='2008-04-09') COMPUTE STATISTICS noscan"),
       AnalyzeTable(
-        UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false),
+        UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE", allowTempView = false),
         Map("ds" -> Some("2008-04-09")), noScan = true))
     comparePlans(
       parsePlan("ANALYZE TABLE a.b.c PARTITION(ds='2008-04-09', hr) COMPUTE STATISTICS"),
       AnalyzeTable(
-        UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false),
+        UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE", allowTempView = false),
         Map("ds" -> Some("2008-04-09"), "hr" -> None), noScan = false))
     comparePlans(
       parsePlan("ANALYZE TABLE a.b.c PARTITION(ds='2008-04-09', hr) COMPUTE STATISTICS noscan"),
       AnalyzeTable(
-        UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false),
+        UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE", allowTempView = false),
         Map("ds" -> Some("2008-04-09"), "hr" -> None), noScan = true))
     comparePlans(
       parsePlan("ANALYZE TABLE a.b.c PARTITION(ds, hr=11) COMPUTE STATISTICS noscan"),
       AnalyzeTable(
-        UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false),
+        UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE", allowTempView = false),
         Map("ds" -> None, "hr" -> Some("11")), noScan = true))
     comparePlans(
       parsePlan("ANALYZE TABLE a.b.c PARTITION(ds, hr) COMPUTE STATISTICS"),
       AnalyzeTable(
-        UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false),
+        UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE", allowTempView = false),
         Map("ds" -> None, "hr" -> None), noScan = false))
     comparePlans(
       parsePlan("ANALYZE TABLE a.b.c PARTITION(ds, hr) COMPUTE STATISTICS noscan"),
       AnalyzeTable(
-        UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false),
+        UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE", allowTempView = false),
         Map("ds" -> None, "hr" -> None), noScan = true))
 
     intercept("analyze table a.b.c compute statistics xxxx",
@@ -1834,7 +1838,9 @@ class DDLParserSuite extends AnalysisTest {
     comparePlans(
       parsePlan("ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR COLUMNS key, value"),
       AnalyzeColumn(
-        UnresolvedTableOrView(Seq("a", "b", "c")), Option(Seq("key", "value")), allColumns = false))
+        UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE ... FOR COLUMNS ..."),
+        Option(Seq("key", "value")),
+        allColumns = false))
 
     // Partition specified - should be ignored
     comparePlans(
@@ -1844,7 +1850,9 @@ class DDLParserSuite extends AnalysisTest {
            |COMPUTE STATISTICS FOR COLUMNS key, value
          """.stripMargin),
       AnalyzeColumn(
-        UnresolvedTableOrView(Seq("a", "b", "c")), Option(Seq("key", "value")), allColumns = false))
+        UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE ... FOR COLUMNS ..."),
+        Option(Seq("key", "value")),
+        allColumns = false))
 
     // Partition specified should be ignored in case of COMPUTE STATISTICS FOR ALL COLUMNS
     comparePlans(
@@ -1854,7 +1862,9 @@ class DDLParserSuite extends AnalysisTest {
            |COMPUTE STATISTICS FOR ALL COLUMNS
          """.stripMargin),
       AnalyzeColumn(
-        UnresolvedTableOrView(Seq("a", "b", "c")), None, allColumns = true))
+        UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE ... FOR ALL COLUMNS"),
+        None,
+        allColumns = true))
 
     intercept("ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR ALL COLUMNS key, value",
       "mismatched input 'key' expecting {<EOF>, ';'}")
@@ -1898,12 +1908,13 @@ class DDLParserSuite extends AnalysisTest {
   test("SHOW CREATE table") {
     comparePlans(
       parsePlan("SHOW CREATE TABLE a.b.c"),
-      ShowCreateTable(UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false)))
+      ShowCreateTable(
+        UnresolvedTableOrView(Seq("a", "b", "c"), "SHOW CREATE TABLE", allowTempView = false)))
 
     comparePlans(
       parsePlan("SHOW CREATE TABLE a.b.c AS SERDE"),
       ShowCreateTable(
-        UnresolvedTableOrView(Seq("a", "b", "c"), allowTempView = false),
+        UnresolvedTableOrView(Seq("a", "b", "c"), "SHOW CREATE TABLE", allowTempView = false),
         asSerde = true))
   }
 
@@ -1949,7 +1960,7 @@ class DDLParserSuite extends AnalysisTest {
   test("REFRESH TABLE") {
     comparePlans(
       parsePlan("REFRESH TABLE a.b.c"),
-      RefreshTable(UnresolvedTableOrView(Seq("a", "b", "c"))))
+      RefreshTable(UnresolvedTableOrView(Seq("a", "b", "c"), "REFRESH TABLE")))
   }
 
   test("show columns") {
@@ -1959,13 +1970,15 @@ class DDLParserSuite extends AnalysisTest {
     val sql4 = "SHOW COLUMNS FROM db1.t1 IN db1"
 
     val parsed1 = parsePlan(sql1)
-    val expected1 = ShowColumns(UnresolvedTableOrView(Seq("t1")), None)
+    val expected1 = ShowColumns(UnresolvedTableOrView(Seq("t1"), "SHOW COLUMNS"), None)
     val parsed2 = parsePlan(sql2)
-    val expected2 = ShowColumns(UnresolvedTableOrView(Seq("db1", "t1")), None)
+    val expected2 = ShowColumns(UnresolvedTableOrView(Seq("db1", "t1"), "SHOW COLUMNS"), None)
     val parsed3 = parsePlan(sql3)
-    val expected3 = ShowColumns(UnresolvedTableOrView(Seq("db1", "t1")), Some(Seq("db1")))
+    val expected3 =
+      ShowColumns(UnresolvedTableOrView(Seq("db1", "t1"), "SHOW COLUMNS"), Some(Seq("db1")))
     val parsed4 = parsePlan(sql4)
-    val expected4 = ShowColumns(UnresolvedTableOrView(Seq("db1", "t1")), Some(Seq("db1")))
+    val expected4 =
+      ShowColumns(UnresolvedTableOrView(Seq("db1", "t1"), "SHOW COLUMNS"), Some(Seq("db1")))
 
     comparePlans(parsed1, expected1)
     comparePlans(parsed2, expected2)
@@ -2300,11 +2313,12 @@ class DDLParserSuite extends AnalysisTest {
   test("SHOW TBLPROPERTIES table") {
     comparePlans(
       parsePlan("SHOW TBLPROPERTIES a.b.c"),
-      ShowTableProperties(UnresolvedTableOrView(Seq("a", "b", "c")), None))
+      ShowTableProperties(UnresolvedTableOrView(Seq("a", "b", "c"), "SHOW TBLPROPERTIES"), None))
 
     comparePlans(
       parsePlan("SHOW TBLPROPERTIES a.b.c('propKey1')"),
-      ShowTableProperties(UnresolvedTableOrView(Seq("a", "b", "c")), Some("propKey1")))
+      ShowTableProperties(
+        UnresolvedTableOrView(Seq("a", "b", "c"), "SHOW TBLPROPERTIES"), Some("propKey1")))
   }
 
   test("DESCRIBE FUNCTION") {
diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
index 07aed98d120f9..145c987ee5f61 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
@@ -540,7 +540,7 @@ struct<plan:string>
 -- !query output
 == Parsed Logical Plan ==
 'DescribeRelation false
-+- 'UnresolvedTableOrView [t], true
++- 'UnresolvedTableOrView [t], DESCRIBE TABLE, true
 
 == Analyzed Logical Plan ==
 col_name: string, data_type: string, comment: string
diff --git a/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out b/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out
index 6ddffb89987d8..03df876133aa4 100644
--- a/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out
@@ -94,7 +94,7 @@ SHOW COLUMNS IN badtable FROM showdb
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table or view not found: showdb.badtable; line 1 pos 0
+Table or view not found for 'SHOW COLUMNS': showdb.badtable; line 1 pos 0
 
 
 -- !query
@@ -130,7 +130,7 @@ SHOW COLUMNS IN showdb.showcolumn3
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table or view not found: showdb.showcolumn3; line 1 pos 0
+Table or view not found for 'SHOW COLUMNS': showdb.showcolumn3; line 1 pos 0
 
 
 -- !query
@@ -139,7 +139,7 @@ SHOW COLUMNS IN showcolumn3 FROM showdb
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table or view not found: showdb.showcolumn3; line 1 pos 0
+Table or view not found for 'SHOW COLUMNS': showdb.showcolumn3; line 1 pos 0
 
 
 -- !query
@@ -148,7 +148,7 @@ SHOW COLUMNS IN showcolumn4
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table or view not found: showcolumn4; line 1 pos 0
+Table or view not found for 'SHOW COLUMNS': showcolumn4; line 1 pos 0
 
 
 -- !query
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala
index 7b4c8d1cc71d8..92d306c0e3c11 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala
@@ -155,7 +155,8 @@ abstract class ShowCreateTableSuite extends QueryTest with SQLTestUtils {
       val ex = intercept[AnalysisException] {
         sql(s"SHOW CREATE TABLE $viewName")
       }
-      assert(ex.getMessage.contains(s"$viewName is a temp view not table or permanent view"))
+      assert(ex.getMessage.contains(
+        s"$viewName is a temp view. 'SHOW CREATE TABLE' expects a table or permanent view."))
     }
 
     withGlobalTempView(viewName) {
@@ -165,7 +166,8 @@ abstract class ShowCreateTableSuite extends QueryTest with SQLTestUtils {
         sql(s"SHOW CREATE TABLE $globalTempViewDb.$viewName")
       }
       assert(ex.getMessage.contains(
-        s"$globalTempViewDb.$viewName is a temp view not table or permanent view"))
+        s"$globalTempViewDb.$viewName is a temp view. " +
+          "'SHOW CREATE TABLE' expects a table or permanent view."))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index cd03fadf34b98..3fc679f6b9fc7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -542,7 +542,8 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
       val errMsg1 = intercept[AnalysisException] {
         sql(s"ANALYZE TABLE $globalTempDB.gTempView COMPUTE STATISTICS FOR COLUMNS id")
       }.getMessage
-      assert(errMsg1.contains(s"Table or view not found: $globalTempDB.gTempView"))
+      assert(errMsg1.contains("Table or view not found for 'ANALYZE TABLE ... FOR COLUMNS ...': " +
+        s"$globalTempDB.gTempView"))
       // Analyzes in a global temporary view
       sql("CREATE GLOBAL TEMP VIEW gTempView AS SELECT * FROM range(1, 30)")
       val errMsg2 = intercept[AnalysisException] {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index f2b57f9442d09..98580568a8df6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -729,7 +729,7 @@ class DataSourceV2SQLSuite
     val ex = intercept[AnalysisException] {
       sql("DROP TABLE testcat.db.notbl")
     }
-    assert(ex.getMessage.contains("Table or view not found: testcat.db.notbl"))
+    assert(ex.getMessage.contains("Table or view not found for 'DROP TABLE': testcat.db.notbl"))
     sql("DROP TABLE IF EXISTS testcat.db.notbl")
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index 5d29503848772..d776198bc7470 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -183,11 +183,13 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
       val e4 = intercept[AnalysisException] {
         sql(s"SHOW CREATE TABLE $viewName")
       }.getMessage
-      assert(e4.contains(s"$viewName is a temp view not table or permanent view"))
+      assert(e4.contains(
+        s"$viewName is a temp view. 'SHOW CREATE TABLE' expects a table or permanent view."))
       val e5 = intercept[AnalysisException] {
         sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS")
       }.getMessage
-      assert(e5.contains(s"$viewName is a temp view not table or permanent view"))
+      assert(e5.contains(
+        s"$viewName is a temp view. 'ANALYZE TABLE' expects a table or permanent view."))
       val e6 = intercept[AnalysisException] {
         sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS FOR COLUMNS id")
       }.getMessage
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
index c7ad96c8f7619..97dd92acc7805 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
@@ -80,8 +80,10 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
     sql("DROP TABLE h2.test.to_drop")
     checkAnswer(sql("SHOW TABLES IN h2.test"), Seq(Row("test", "people")))
     Seq(
-      "h2.test.not_existing_table" -> "Table or view not found: h2.test.not_existing_table",
-      "h2.bad_test.not_existing_table" -> "Table or view not found: h2.bad_test.not_existing_table"
+      "h2.test.not_existing_table" ->
+        "Table or view not found for 'DROP TABLE': h2.test.not_existing_table",
+      "h2.bad_test.not_existing_table" ->
+        "Table or view not found for 'DROP TABLE': h2.bad_test.not_existing_table"
     ).foreach { case (table, expectedMsg) =>
       val msg = intercept[AnalysisException] {
         sql(s"DROP TABLE $table")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
index d3398842afb21..4feb970ea6f1a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
@@ -137,7 +137,7 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
     val message = intercept[AnalysisException] {
       sql("SHOW TBLPROPERTIES badtable")
     }.getMessage
-    assert(message.contains("Table or view not found: badtable"))
+    assert(message.contains("Table or view not found for 'SHOW TBLPROPERTIES': badtable"))
 
     // When key is not found, a row containing the error is returned.
     checkAnswer(

From e43255051c0a82713d653fe590fe7728e43556ce Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Fri, 27 Nov 2020 10:27:08 +0000
Subject: [PATCH 0586/1009] [SPARK-28645][SQL] ParseException is thrown when
 the window is redefined

### What changes were proposed in this pull request?
Currently in Spark one could redefine a window. For instance:

`select count(*) OVER w FROM tenk1 WINDOW w AS (ORDER BY unique1), w AS (ORDER BY unique1);`
The window `w` is defined two times. In PgSQL, on the other hand, a thrown will happen:

`ERROR:  window "w" is already defined`

### Why are the changes needed?
The current implement gives the following window definitions a higher priority. But it wasn't Spark's intention and users can't know from any document of Spark.
This PR fixes the bug.

### Does this PR introduce _any_ user-facing change?
Yes.
There is an example query output with/without this fix.
```
SELECT
    employee_name,
    salary,
    first_value(employee_name) OVER w highest_salary,
    nth_value(employee_name, 2) OVER w second_highest_salary
FROM
    basic_pays
WINDOW
    w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING),
    w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING)
ORDER BY salary DESC
```
The output before this fix:
```
Larry Bott	11798	Larry Bott	Gerard Bondur
Gerard Bondur	11472	Larry Bott	Gerard Bondur
Pamela Castillo	11303	Larry Bott	Gerard Bondur
Barry Jones	10586	Larry Bott	Gerard Bondur
George Vanauf	10563	Larry Bott	Gerard Bondur
Loui Bondur	10449	Larry Bott	Gerard Bondur
Mary Patterson	9998	Larry Bott	Gerard Bondur
Steve Patterson	9441	Larry Bott	Gerard Bondur
Julie Firrelli	9181	Larry Bott	Gerard Bondur
Jeff Firrelli	8992	Larry Bott	Gerard Bondur
William Patterson	8870	Larry Bott	Gerard Bondur
Diane Murphy	8435	Larry Bott	Gerard Bondur
Leslie Jennings	8113	Larry Bott	Gerard Bondur
Gerard Hernandez	6949	Larry Bott	Gerard Bondur
Foon Yue Tseng	6660	Larry Bott	Gerard Bondur
Anthony Bow	6627	Larry Bott	Gerard Bondur
Leslie Thompson	5186	Larry Bott	Gerard Bondur
```
The output after this fix:
```
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

The definition of window 'w' is repetitive(line 8, pos 0)
```

### How was this patch tested?
Jenkins test.

Closes #30512 from beliefer/SPARK-28645.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: beliefer <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/parser/AstBuilder.scala      | 10 ++++-
 .../resources/sql-tests/inputs/window.sql     | 14 ++++++-
 .../sql-tests/results/window.sql.out          | 38 ++++++++++++++++++-
 3 files changed, 57 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 4cd9b2bea32a4..afef88f7e97e8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -815,10 +815,16 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
       ctx: WindowClauseContext,
       query: LogicalPlan): LogicalPlan = withOrigin(ctx) {
     // Collect all window specifications defined in the WINDOW clause.
-    val baseWindowMap = ctx.namedWindow.asScala.map {
+    val baseWindowTuples = ctx.namedWindow.asScala.map {
       wCtx =>
         (wCtx.name.getText, typedVisit[WindowSpec](wCtx.windowSpec))
-    }.toMap
+    }
+    baseWindowTuples.groupBy(_._1).foreach { kv =>
+      if (kv._2.size > 1) {
+        throw new ParseException(s"The definition of window '${kv._1}' is repetitive", ctx)
+      }
+    }
+    val baseWindowMap = baseWindowTuples.toMap
 
     // Handle cases like
     // window w1 as (partition by p_mfgr order by p_name
diff --git a/sql/core/src/test/resources/sql-tests/inputs/window.sql b/sql/core/src/test/resources/sql-tests/inputs/window.sql
index f5223af9125f6..f0336d764bdea 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/window.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/window.sql
@@ -250,4 +250,16 @@ WINDOW w AS (
   ORDER BY salary DESC
   RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
 )
-ORDER BY department;
\ No newline at end of file
+ORDER BY department;
+
+SELECT
+    employee_name,
+    salary,
+    first_value(employee_name) OVER w highest_salary,
+    nth_value(employee_name, 2) OVER w second_highest_salary
+FROM
+    basic_pays
+WINDOW
+    w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING),
+    w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING)
+ORDER BY salary DESC;
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out
index 1304dcf21d0b3..df2ad96649186 100644
--- a/sql/core/src/test/resources/sql-tests/results/window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 35
+-- Number of queries: 36
 
 
 -- !query
@@ -739,4 +739,38 @@ Gerard Hernandez	SCM	6949	Larry Bott	Pamela Castillo
 George Vanauf	Sales	10563	George Vanauf	Steve Patterson
 Steve Patterson	Sales	9441	George Vanauf	Steve Patterson
 Julie Firrelli	Sales	9181	George Vanauf	Steve Patterson
-Foon Yue Tseng	Sales	6660	George Vanauf	Steve Patterson
\ No newline at end of file
+Foon Yue Tseng	Sales	6660	George Vanauf	Steve Patterson
+
+
+-- !query
+SELECT
+    employee_name,
+    salary,
+    first_value(employee_name) OVER w highest_salary,
+    nth_value(employee_name, 2) OVER w second_highest_salary
+FROM
+    basic_pays
+WINDOW
+    w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING),
+    w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING)
+ORDER BY salary DESC
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+The definition of window 'w' is repetitive(line 8, pos 0)
+
+== SQL ==
+SELECT
+    employee_name,
+    salary,
+    first_value(employee_name) OVER w highest_salary,
+    nth_value(employee_name, 2) OVER w second_highest_salary
+FROM
+    basic_pays
+WINDOW
+^^^
+    w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING),
+    w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING)
+ORDER BY salary DESC
\ No newline at end of file

From b9f2f78de59758d1932c1573338539e485a01112 Mon Sep 17 00:00:00 2001
From: "xuewei.linxuewei" <xuewei.linxuewei@alibaba-inc.com>
Date: Fri, 27 Nov 2020 13:24:11 +0000
Subject: [PATCH 0587/1009] [SPARK-33498][SQL] Datetime parsing should fail if
 the input string can't be parsed, or the pattern string is invalid

### What changes were proposed in this pull request?

Datetime parsing should fail if the input string can't be parsed, or the pattern string is invalid, when ANSI mode is enable. This patch should update GetTimeStamp, UnixTimeStamp, ToUnixTimeStamp and Cast.

### Why are the changes needed?

For ANSI mode.

### Does this PR introduce any user-facing change?

No.

### How was this patch tested?

Added UT and Existing UT.

Closes #30442 from leanken/leanken-SPARK-33498.

Authored-by: xuewei.linxuewei <xuewei.linxuewei@alibaba-inc.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-ref-ansi-compliance.md               |   5 +
 .../spark/sql/catalyst/expressions/Cast.scala |  33 +++--
 .../expressions/datetimeExpressions.scala     |  51 +++++---
 .../sql/catalyst/util/DateTimeUtils.scala     |   9 ++
 .../sql/catalyst/expressions/CastSuite.scala  |  41 ++++--
 .../expressions/DateExpressionsSuite.scala    |  59 ++++++++-
 .../resources/sql-tests/inputs/datetime.sql   |  11 ++
 .../sql-tests/results/ansi/datetime.sql.out   | 123 +++++++++++++++---
 .../sql-tests/results/datetime-legacy.sql.out |  74 ++++++++++-
 .../sql-tests/results/datetime.sql.out        |  74 ++++++++++-
 .../results/postgreSQL/window_part3.sql.out   |   3 +-
 11 files changed, 424 insertions(+), 59 deletions(-)

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index 870ed0aa0daaa..4e19799ca75b9 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -136,12 +136,17 @@ The behavior of some SQL functions can be different under ANSI mode (`spark.sql.
   - `element_at`: This function throws `NoSuchElementException` if key does not exist in map. 
   - `elt`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices.
   - `parse_url`: This function throws `IllegalArgumentException` if an input string is not a valid url.
+  - `to_date` This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid.
+  - `to_timestamp` This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid.
+  - `unix_timestamp` This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid.
+  - `to_unix_timestamp` This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid.
 
 ### SQL Operators
 
 The behavior of some SQL operators can be different under ANSI mode (`spark.sql.ansi.enabled=true`).
   - `array_col[index]`: This operator throws `ArrayIndexOutOfBoundsException` if using invalid indices.
   - `map_col[key]`: This operator throws `NoSuchElementException` if key does not exist in map.
+  - `CAST(string_col AS TIMESTAMP)`: This operator should fail with an exception if the input string can't be parsed.
 
 ### SQL Keywords
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index e6f585cacc6c7..95f09d64c484b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -448,7 +448,13 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
   // TimestampConverter
   private[this] def castToTimestamp(from: DataType): Any => Any = from match {
     case StringType =>
-      buildCast[UTF8String](_, utfs => DateTimeUtils.stringToTimestamp(utfs, zoneId).orNull)
+      buildCast[UTF8String](_, utfs => {
+        if (ansiEnabled) {
+          DateTimeUtils.stringToTimestampAnsi(utfs, zoneId)
+        } else {
+          DateTimeUtils.stringToTimestamp(utfs, zoneId).orNull
+        }
+      })
     case BooleanType =>
       buildCast[Boolean](_, b => if (b) 1L else 0)
     case LongType =>
@@ -1250,15 +1256,22 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
         zoneIdClass)
       val longOpt = ctx.freshVariable("longOpt", classOf[Option[Long]])
       (c, evPrim, evNull) =>
-        code"""
-          scala.Option<Long> $longOpt =
-            org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToTimestamp($c, $zid);
-          if ($longOpt.isDefined()) {
-            $evPrim = ((Long) $longOpt.get()).longValue();
-          } else {
-            $evNull = true;
-          }
-         """
+        if (ansiEnabled) {
+          code"""
+            $evPrim =
+              org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToTimestampAnsi($c, $zid);
+           """
+        } else {
+          code"""
+            scala.Option<Long> $longOpt =
+              org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToTimestamp($c, $zid);
+            if ($longOpt.isDefined()) {
+              $evPrim = ((Long) $longOpt.get()).longValue();
+            } else {
+              $evNull = true;
+            }
+           """
+        }
     case BooleanType =>
       (c, evPrim, evNull) => code"$evPrim = $c ? 1L : 0L;"
     case _: IntegralType =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 9953b780ceace..1ff5833fb4dd6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -720,10 +720,12 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti
 case class ToUnixTimestamp(
     timeExp: Expression,
     format: Expression,
-    timeZoneId: Option[String] = None)
+    timeZoneId: Option[String] = None,
+    failOnError: Boolean = SQLConf.get.ansiEnabled)
   extends UnixTime {
 
-  def this(timeExp: Expression, format: Expression) = this(timeExp, format, None)
+  def this(timeExp: Expression, format: Expression) =
+    this(timeExp, format, None, SQLConf.get.ansiEnabled)
 
   override def left: Expression = timeExp
   override def right: Expression = format
@@ -767,10 +769,15 @@ case class ToUnixTimestamp(
   group = "datetime_funcs",
   since = "1.5.0")
 // scalastyle:on line.size.limit
-case class UnixTimestamp(timeExp: Expression, format: Expression, timeZoneId: Option[String] = None)
+case class UnixTimestamp(
+    timeExp: Expression,
+    format: Expression,
+    timeZoneId: Option[String] = None,
+    failOnError: Boolean = SQLConf.get.ansiEnabled)
   extends UnixTime {
 
-  def this(timeExp: Expression, format: Expression) = this(timeExp, format, None)
+  def this(timeExp: Expression, format: Expression) =
+    this(timeExp, format, None, SQLConf.get.ansiEnabled)
 
   override def left: Expression = timeExp
   override def right: Expression = format
@@ -792,6 +799,8 @@ case class UnixTimestamp(timeExp: Expression, format: Expression, timeZoneId: Op
 abstract class ToTimestamp
   extends BinaryExpression with TimestampFormatterHelper with ExpectsInputTypes {
 
+  def failOnError: Boolean
+
   // The result of the conversion to timestamp is microseconds divided by this factor.
   // For example if the factor is 1000000, the result of the expression is in seconds.
   protected def downScaleFactor: Long
@@ -803,7 +812,14 @@ abstract class ToTimestamp
     Seq(TypeCollection(StringType, DateType, TimestampType), StringType)
 
   override def dataType: DataType = LongType
-  override def nullable: Boolean = true
+  override def nullable: Boolean = if (failOnError) children.exists(_.nullable) else true
+
+  private def isParseError(e: Throwable): Boolean = e match {
+    case _: DateTimeParseException |
+         _: DateTimeException |
+         _: ParseException => true
+    case _ => false
+  }
 
   override def eval(input: InternalRow): Any = {
     val t = left.eval(input)
@@ -824,9 +840,12 @@ abstract class ToTimestamp
             try {
               formatter.parse(t.asInstanceOf[UTF8String].toString) / downScaleFactor
             } catch {
-              case _: DateTimeParseException |
-                   _: DateTimeException |
-                   _: ParseException => null
+              case e if isParseError(e) =>
+                if (failOnError) {
+                  throw e
+                } else {
+                  null
+                }
             }
           }
       }
@@ -835,6 +854,7 @@ abstract class ToTimestamp
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val javaType = CodeGenerator.javaType(dataType)
+    val parseErrorBranch = if (failOnError) "throw e;" else s"${ev.isNull} = true;"
     left.dataType match {
       case StringType => formatterOption.map { fmt =>
         val df = classOf[TimestampFormatter].getName
@@ -844,11 +864,11 @@ abstract class ToTimestamp
              |try {
              |  ${ev.value} = $formatterName.parse($datetimeStr.toString()) / $downScaleFactor;
              |} catch (java.time.DateTimeException e) {
-             |  ${ev.isNull} = true;
+             |  $parseErrorBranch
              |} catch (java.time.format.DateTimeParseException e) {
-             |  ${ev.isNull} = true;
+             |  $parseErrorBranch
              |} catch (java.text.ParseException e) {
-             |  ${ev.isNull} = true;
+             |  $parseErrorBranch
              |}
              |""".stripMargin)
       }.getOrElse {
@@ -866,11 +886,11 @@ abstract class ToTimestamp
              |try {
              |  ${ev.value} = $timestampFormatter.parse($string.toString()) / $downScaleFactor;
              |} catch (java.time.format.DateTimeParseException e) {
-             |    ${ev.isNull} = true;
+             |    $parseErrorBranch
              |} catch (java.time.DateTimeException e) {
-             |    ${ev.isNull} = true;
+             |    $parseErrorBranch
              |} catch (java.text.ParseException e) {
-             |    ${ev.isNull} = true;
+             |    $parseErrorBranch
              |}
              |""".stripMargin)
       }
@@ -1737,7 +1757,8 @@ case class DateDiff(endDate: Expression, startDate: Expression)
 private case class GetTimestamp(
     left: Expression,
     right: Expression,
-    timeZoneId: Option[String] = None)
+    timeZoneId: Option[String] = None,
+    failOnError: Boolean = SQLConf.get.ansiEnabled)
   extends ToTimestamp {
 
   override val downScaleFactor = 1
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 3b974759bd6c0..87cf3c93ba26e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -364,6 +364,15 @@ object DateTimeUtils {
     }
   }
 
+  def stringToTimestampAnsi(s: UTF8String, timeZoneId: ZoneId): Long = {
+    val timestamp = stringToTimestamp(s, timeZoneId)
+    if (timestamp.isEmpty) {
+      throw new DateTimeException(s"Cannot cast $s to TimestampType.")
+    } else {
+      timestamp.get
+    }
+  }
+
   /**
    * Gets the number of microseconds since the epoch of 1970-01-01 00:00:00Z from the given
    * instance of `java.time.Instant`. The epoch microsecond count is a simple incrementing count of
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index f1fc921e401ba..0900a303b4cbe 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.{Date, Timestamp}
+import java.time.DateTimeException
 import java.util.{Calendar, TimeZone}
 
 import scala.collection.parallel.immutable.ParVector
@@ -106,8 +107,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
         checkEvaluation(cast(Literal(str), TimestampType, Option(zid.getId)), expected)
       }
 
-      checkCastStringToTimestamp("123", null)
-
       val tz = TimeZone.getTimeZone(zid)
       var c = Calendar.getInstance(tz)
       c.set(2015, 0, 1, 0, 0, 0)
@@ -184,15 +183,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
       c.set(2015, 2, 18, 12, 3, 17)
       c.set(Calendar.MILLISECOND, 123)
       checkCastStringToTimestamp("2015-03-18T12:03:17.123+7:3", new Timestamp(c.getTimeInMillis))
-
-      checkCastStringToTimestamp("2015-03-18 123142", null)
-      checkCastStringToTimestamp("2015-03-18T123123", null)
-      checkCastStringToTimestamp("2015-03-18X", null)
-      checkCastStringToTimestamp("2015/03/18", null)
-      checkCastStringToTimestamp("2015.03.18", null)
-      checkCastStringToTimestamp("20150318", null)
-      checkCastStringToTimestamp("2015-031-8", null)
-      checkCastStringToTimestamp("2015-03-18T12:03:17-0:70", null)
     }
   }
 
@@ -302,7 +292,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     }
 
     checkEvaluation(cast("abdef", StringType), "abdef")
-    checkEvaluation(cast("abdef", TimestampType, UTC_OPT), null)
     checkEvaluation(cast("12.65", DecimalType.SYSTEM_DEFAULT), Decimal(12.65))
 
     checkEvaluation(cast(cast(sd, DateType), StringType), sd)
@@ -962,6 +951,34 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase {
       cast("abcd", DecimalType(38, 1)),
       "invalid input syntax for type numeric")
   }
+
+  test("ANSI mode: cast string to timestamp with parse error") {
+    val activeConf = conf
+    new ParVector(ALL_TIMEZONES.toVector).foreach { zid =>
+      def checkCastWithParseError(str: String): Unit = {
+        checkExceptionInExpression[DateTimeException](
+          cast(Literal(str), TimestampType, Option(zid.getId)),
+          s"Cannot cast $str to TimestampType.")
+      }
+
+      SQLConf.withExistingConf(activeConf) {
+        checkCastWithParseError("123")
+        checkCastWithParseError("2015-03-18 123142")
+        checkCastWithParseError("2015-03-18T123123")
+        checkCastWithParseError("2015-03-18X")
+        checkCastWithParseError("2015/03/18")
+        checkCastWithParseError("2015.03.18")
+        checkCastWithParseError("20150318")
+        checkCastWithParseError("2015-031-8")
+        checkCastWithParseError("2015-03-18T12:03:17-0:70")
+
+        val input = "abdef"
+        checkExceptionInExpression[DateTimeException](
+          cast(input, TimestampType, Option(zid.getId)),
+          s"Cannot cast $input to TimestampType.")
+      }
+    }
+  }
 }
 
 /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 85492084d51ac..a3ffc1129fd5e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -18,8 +18,9 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.{Date, Timestamp}
-import java.text.SimpleDateFormat
+import java.text.{ParseException, SimpleDateFormat}
 import java.time.{Instant, LocalDate, ZoneId}
+import java.time.format.DateTimeParseException
 import java.util.{Calendar, Locale, TimeZone}
 import java.util.concurrent.TimeUnit._
 
@@ -1286,4 +1287,58 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     testIntegralFunc(Long.MaxValue)
     testIntegralFunc(Long.MinValue)
   }
-}
+
+  test("SPARK-33498: GetTimestamp,UnixTimestamp,ToUnixTimestamp with parseError") {
+    Seq(true, false).foreach { ansiEnabled =>
+      Seq("LEGACY", "CORRECTED", "EXCEPTION").foreach { policy =>
+        withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> policy,
+          SQLConf.ANSI_ENABLED.key -> ansiEnabled.toString) {
+
+          val exprSeq = Seq[Expression](
+            GetTimestamp(Literal("2020-01-27T20:06:11.847"), Literal("yyyy-MM-dd HH:mm:ss.SSS")),
+            GetTimestamp(Literal("Unparseable"), Literal("yyyy-MM-dd HH:mm:ss.SSS")),
+            UnixTimestamp(Literal("2020-01-27T20:06:11.847"), Literal("yyyy-MM-dd HH:mm:ss.SSS")),
+            UnixTimestamp(Literal("Unparseable"), Literal("yyyy-MM-dd HH:mm:ss.SSS")),
+            ToUnixTimestamp(Literal("2020-01-27T20:06:11.847"), Literal("yyyy-MM-dd HH:mm:ss.SSS")),
+            ToUnixTimestamp(Literal("Unparseable"), Literal("yyyy-MM-dd HH:mm:ss.SSS"))
+          )
+
+          if (!ansiEnabled) {
+            exprSeq.foreach(checkEvaluation(_, null))
+          } else if (policy == "LEGACY") {
+            exprSeq.foreach(checkExceptionInExpression[ParseException](_, "Unparseable"))
+          } else {
+            exprSeq.foreach(
+              checkExceptionInExpression[DateTimeParseException](_, "could not be parsed"))
+          }
+
+          // LEGACY works, CORRECTED failed, EXCEPTION with SparkUpgradeException
+          val exprSeq2 = Seq[(Expression, Long)](
+            (GetTimestamp(Literal("2020-01-27T20:06:11.847!!!"),
+              Literal("yyyy-MM-dd'T'HH:mm:ss.SSS")), 1580184371847000L),
+            (UnixTimestamp(Literal("2020-01-27T20:06:11.847!!!"),
+              Literal("yyyy-MM-dd'T'HH:mm:ss.SSS")), 1580184371L),
+            (ToUnixTimestamp(Literal("2020-01-27T20:06:11.847!!!"),
+              Literal("yyyy-MM-dd'T'HH:mm:ss.SSS")), 1580184371L)
+          )
+
+          if (policy == "LEGACY") {
+            exprSeq2.foreach(pair => checkEvaluation(pair._1, pair._2))
+          } else if (policy == "EXCEPTION") {
+            exprSeq2.foreach(pair =>
+              checkExceptionInExpression[SparkUpgradeException](
+                pair._1,
+                  "You may get a different result due to the upgrading of Spark 3.0"))
+          } else {
+            if (ansiEnabled) {
+              exprSeq2.foreach(pair =>
+                checkExceptionInExpression[DateTimeParseException](pair._1, "could not be parsed"))
+            } else {
+              exprSeq2.foreach(pair => checkEvaluation(pair._1, null))
+            }
+          }
+        }
+      }
+    }
+  }
+ }
diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
index 19b4c53702662..534e222b7c13e 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
@@ -153,3 +153,14 @@ select from_json('{"t":"26/October/2015"}', 't Timestamp', map('timestampFormat'
 select from_json('{"d":"26/October/2015"}', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy'));
 select from_csv('26/October/2015', 't Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'));
 select from_csv('26/October/2015', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy'));
+
+-- Timestamp type parse error
+select to_date("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS");
+select to_date("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS");
+select to_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS");
+select to_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS");
+select unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS");
+select unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS");
+select to_unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS");
+select to_unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS");
+select cast("Unparseable" as timestamp)
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
index 5b357fd064e41..10669f14aa87b 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 108
+-- Number of queries: 117
 
 
 -- !query
@@ -301,9 +301,10 @@ struct<CAST(2011-11-11 11:11:11 - INTERVAL '2 seconds' AS STRING):string>
 -- !query
 select '1' - interval '2' second
 -- !query schema
-struct<CAST(1 - INTERVAL '2 seconds' AS STRING):string>
+struct<>
 -- !query output
-NULL
+java.time.DateTimeException
+Cannot cast 1 to TimestampType.
 
 
 -- !query
@@ -600,9 +601,10 @@ struct<subtractdates(DATE '2001-10-01', DATE '2001-09-28'):interval>
 -- !query
 select to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query schema
-struct<to_timestamp(2019-10-06 10:11:12., yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp>
+struct<>
 -- !query output
-NULL
+java.time.format.DateTimeParseException
+Text '2019-10-06 10:11:12.' could not be parsed at index 20
 
 
 -- !query
@@ -664,9 +666,10 @@ struct<to_timestamp(2019-10-06 10:11:12.123456PST, yyyy-MM-dd HH:mm:ss.SSSSSS[zz
 -- !query
 select to_timestamp('2019-10-06 10:11:12.1234567PST', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query schema
-struct<to_timestamp(2019-10-06 10:11:12.1234567PST, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp>
+struct<>
 -- !query output
-NULL
+java.time.format.DateTimeParseException
+Text '2019-10-06 10:11:12.1234567PST' could not be parsed, unparsed text found at index 26
 
 
 -- !query
@@ -680,9 +683,10 @@ struct<to_timestamp(123456 2019-10-06 10:11:12.123456PST, SSSSSS yyyy-MM-dd HH:m
 -- !query
 select to_timestamp('223456 2019-10-06 10:11:12.123456PST', 'SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query schema
-struct<to_timestamp(223456 2019-10-06 10:11:12.123456PST, SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp>
+struct<>
 -- !query output
-NULL
+java.time.format.DateTimeParseException
+Text '223456 2019-10-06 10:11:12.123456PST' could not be parsed at index 27
 
 
 -- !query
@@ -744,17 +748,19 @@ struct<to_timestamp(12.12342019-10-06S10:11, ss.SSSSyyyy-MM-dd'S'HH:mm):timestam
 -- !query
 select to_timestamp("12.1232019-10-06S10:11", "ss.SSSSyyyy-MM-dd'S'HH:mm")
 -- !query schema
-struct<to_timestamp(12.1232019-10-06S10:11, ss.SSSSyyyy-MM-dd'S'HH:mm):timestamp>
+struct<>
 -- !query output
-NULL
+java.time.format.DateTimeParseException
+Text '12.1232019-10-06S10:11' could not be parsed at index 7
 
 
 -- !query
 select to_timestamp("12.1232019-10-06S10:11", "ss.SSSSyy-MM-dd'S'HH:mm")
 -- !query schema
-struct<to_timestamp(12.1232019-10-06S10:11, ss.SSSSyy-MM-dd'S'HH:mm):timestamp>
+struct<>
 -- !query output
-NULL
+java.time.format.DateTimeParseException
+Text '12.1232019-10-06S10:11' could not be parsed at index 9
 
 
 -- !query
@@ -824,9 +830,10 @@ struct<to_timestamp(16, dd):timestamp>
 -- !query
 select to_timestamp("02-29", "MM-dd")
 -- !query schema
-struct<to_timestamp(02-29, MM-dd):timestamp>
+struct<>
 -- !query output
-NULL
+java.time.DateTimeException
+Invalid date 'February 29' as '1970' is not a leap year
 
 
 -- !query
@@ -840,9 +847,10 @@ struct<to_date(16, dd):date>
 -- !query
 select to_date("02-29", "MM-dd")
 -- !query schema
-struct<to_date(02-29, MM-dd):date>
+struct<>
 -- !query output
-NULL
+java.time.DateTimeException
+Invalid date 'February 29' as '1970' is not a leap year
 
 
 -- !query
@@ -931,3 +939,84 @@ struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
 You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+
+
+-- !query
+select to_date("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<>
+-- !query output
+java.time.format.DateTimeParseException
+Text '2020-01-27T20:06:11.847' could not be parsed at index 10
+
+
+-- !query
+select to_date("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<>
+-- !query output
+java.time.format.DateTimeParseException
+Text 'Unparseable' could not be parsed at index 0
+
+
+-- !query
+select to_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<>
+-- !query output
+java.time.format.DateTimeParseException
+Text '2020-01-27T20:06:11.847' could not be parsed at index 10
+
+
+-- !query
+select to_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<>
+-- !query output
+java.time.format.DateTimeParseException
+Text 'Unparseable' could not be parsed at index 0
+
+
+-- !query
+select unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<>
+-- !query output
+java.time.format.DateTimeParseException
+Text '2020-01-27T20:06:11.847' could not be parsed at index 10
+
+
+-- !query
+select unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<>
+-- !query output
+java.time.format.DateTimeParseException
+Text 'Unparseable' could not be parsed at index 0
+
+
+-- !query
+select to_unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<>
+-- !query output
+java.time.format.DateTimeParseException
+Text '2020-01-27T20:06:11.847' could not be parsed at index 10
+
+
+-- !query
+select to_unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<>
+-- !query output
+java.time.format.DateTimeParseException
+Text 'Unparseable' could not be parsed at index 0
+
+
+-- !query
+select cast("Unparseable" as timestamp)
+-- !query schema
+struct<>
+-- !query output
+java.time.DateTimeException
+Cannot cast Unparseable to TimestampType.
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
index 8727b74d771ee..7c2c62a2db496 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 108
+-- Number of queries: 117
 
 
 -- !query
@@ -901,3 +901,75 @@ select from_csv('26/October/2015', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy'))
 struct<from_csv(26/October/2015):struct<d:date>>
 -- !query output
 {"d":2015-10-26}
+
+
+-- !query
+select to_date("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<to_date(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS):date>
+-- !query output
+NULL
+
+
+-- !query
+select to_date("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<to_date(Unparseable, yyyy-MM-dd HH:mm:ss.SSS):date>
+-- !query output
+NULL
+
+
+-- !query
+select to_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<to_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select to_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<to_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select to_unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<to_unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select to_unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<to_unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select cast("Unparseable" as timestamp)
+-- !query schema
+struct<CAST(Unparseable AS TIMESTAMP):timestamp>
+-- !query output
+NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
index 850cc86d943d3..810ab6ef0cbfc 100755
--- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 108
+-- Number of queries: 117
 
 
 -- !query
@@ -909,3 +909,75 @@ struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
 You may get a different result due to the upgrading of Spark 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+
+
+-- !query
+select to_date("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<to_date(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS):date>
+-- !query output
+NULL
+
+
+-- !query
+select to_date("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<to_date(Unparseable, yyyy-MM-dd HH:mm:ss.SSS):date>
+-- !query output
+NULL
+
+
+-- !query
+select to_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<to_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select to_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<to_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select to_unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<to_unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select to_unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<to_unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select cast("Unparseable" as timestamp)
+-- !query schema
+struct<CAST(Unparseable AS TIMESTAMP):timestamp>
+-- !query output
+NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out
index 553432e503d5c..0e177f7ea82bd 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out
@@ -71,7 +71,8 @@ insert into datetimes values
 -- !query schema
 struct<>
 -- !query output
-
+org.apache.spark.sql.AnalysisException
+failed to evaluate expression CAST('11:00 BST' AS TIMESTAMP): Cannot cast 11:00 BST to TimestampType.; line 1 pos 22
 
 
 -- !query

From 35ded12fc67a3d8e51f8be3186246745a72a05bc Mon Sep 17 00:00:00 2001
From: luluorta <luluorta@gmail.com>
Date: Fri, 27 Nov 2020 13:32:25 +0000
Subject: [PATCH 0588/1009] [SPARK-33141][SQL] Capture SQL configs when
 creating permanent views

### What changes were proposed in this pull request?
This PR makes CreateViewCommand/AlterViewAsCommand capturing runtime SQL configs and store them as view properties. These configs will be applied during the parsing and analysis phases of the view resolution. Users can set `spark.sql.legacy.useCurrentConfigsForView` to `true` to restore the behavior before.

### Why are the changes needed?
This PR is a sub-task of [SPARK-33138](https://issues.apache.org/jira/browse/SPARK-33138) that proposes to unify temp view and permanent view behaviors. This PR makes permanent views mimicking the temp view behavior that "fixes" view semantic by directly storing resolved LogicalPlan. For example, if a user uses spark 2.4 to create a view that contains null values from division-by-zero expressions, she may not want that other users' queries which reference her view throw exceptions when running on spark 3.x with ansi mode on.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
added UT + existing UTs (improved)

Closes #30289 from luluorta/SPARK-33141.

Authored-by: luluorta <luluorta@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide.md                   |  2 +
 .../sql/catalyst/analysis/Analyzer.scala      |  4 +-
 .../sql/catalyst/catalog/SessionCatalog.scala |  9 ++-
 .../sql/catalyst/catalog/interface.scala      | 18 +++++
 .../plans/logical/basicLogicalOperators.scala | 16 ++++
 .../apache/spark/sql/internal/SQLConf.scala   | 11 +++
 .../spark/sql/execution/command/views.scala   | 49 +++++++++++-
 .../results/postgreSQL/create_view.sql.out    | 28 +++----
 .../spark/sql/execution/SQLViewSuite.scala    | 75 ++++++++++++++++++-
 9 files changed, 190 insertions(+), 22 deletions(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 6942ef7201703..7997090e710a9 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -51,6 +51,8 @@ license: |
   - In Spark 3.1, the `schema_of_json` and `schema_of_csv` functions return the schema in the SQL format in which field names are quoted. In Spark 3.0, the function returns a catalog string without field quoting and in lower case. 
 
   - In Spark 3.1, refreshing a table will trigger an uncache operation for all other caches that reference the table, even if the table itself is not cached. In Spark 3.0 the operation will only be triggered if the table itself is cached.
+  
+  - In Spark 3.1, creating or altering a view will capture runtime SQL configs and store them as view properties. These configs will be applied during the parsing and analysis phases of the view resolution. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.useCurrentConfigsForView` to `true`.
 
 ## Upgrading from Spark SQL 3.0 to 3.0.1
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 77c1dd9ebb7fa..dae496244c858 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1034,7 +1034,9 @@ class Analyzer(override val catalogManager: CatalogManager)
               s"avoid errors. Increase the value of ${SQLConf.MAX_NESTED_VIEW_DEPTH.key} to work " +
               "around this.")
           }
-          executeSameContext(child)
+          SQLConf.withExistingConf(View.effectiveSQLConf(desc.viewSQLConfigs)) {
+            executeSameContext(child)
+          }
         }
         view.copy(child = newChild)
       case p @ SubqueryAlias(_, view: View) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 17ab6664df75c..5122ca7521d9a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -795,14 +795,19 @@ class SessionCatalog(
 
     if (metadata.tableType == CatalogTableType.VIEW) {
       val viewText = metadata.viewText.getOrElse(sys.error("Invalid view without text."))
-      logDebug(s"'$viewText' will be used for the view($table).")
+      val viewConfigs = metadata.viewSQLConfigs
+      val viewPlan = SQLConf.withExistingConf(View.effectiveSQLConf(viewConfigs)) {
+        parser.parsePlan(viewText)
+      }
+
+      logDebug(s"'$viewText' will be used for the view($table) with configs: $viewConfigs.")
       // The relation is a view, so we wrap the relation by:
       // 1. Add a [[View]] operator over the relation to keep track of the view desc;
       // 2. Wrap the logical plan in a [[SubqueryAlias]] which tracks the name of the view.
       val child = View(
         desc = metadata,
         output = metadata.schema.toAttributes,
-        child = parser.parsePlan(viewText))
+        child = viewPlan)
       SubqueryAlias(multiParts, child)
     } else {
       SubqueryAlias(multiParts, UnresolvedCatalogRelation(metadata, options))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index ee7216e93ebb5..621ad84f1f5ec 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -305,6 +305,22 @@ case class CatalogTable(
     }
   }
 
+  /**
+   * Return the SQL configs of when the view was created, the configs are applied when parsing and
+   * analyzing the view, should be empty if the CatalogTable is not a View or created by older
+   * versions of Spark(before 3.1.0).
+   */
+  def viewSQLConfigs: Map[String, String] = {
+    try {
+      for ((key, value) <- properties if key.startsWith(CatalogTable.VIEW_SQL_CONFIG_PREFIX))
+        yield (key.substring(CatalogTable.VIEW_SQL_CONFIG_PREFIX.length), value)
+    } catch {
+      case e: Exception =>
+        throw new AnalysisException(
+          "Corrupted view SQL configs in catalog", cause = Some(e))
+    }
+  }
+
   /**
    * Return the output column names of the query that creates a view, the column names are used to
    * resolve a view, should be empty if the CatalogTable is not a View or created by older versions
@@ -411,6 +427,8 @@ object CatalogTable {
     props.toMap
   }
 
+  val VIEW_SQL_CONFIG_PREFIX = VIEW_PREFIX + "sqlConfig."
+
   val VIEW_QUERY_OUTPUT_PREFIX = VIEW_PREFIX + "query.out."
   val VIEW_QUERY_OUTPUT_NUM_COLUMNS = VIEW_QUERY_OUTPUT_PREFIX + "numCols"
   val VIEW_QUERY_OUTPUT_COLUMN_NAME_PREFIX = VIEW_QUERY_OUTPUT_PREFIX + "col."
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index c7108ea8ac74b..a524ed4ff73e9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -453,6 +453,22 @@ case class View(
   }
 }
 
+object View {
+  def effectiveSQLConf(configs: Map[String, String]): SQLConf = {
+    val activeConf = SQLConf.get
+    if (activeConf.useCurrentSQLConfigsForView) return activeConf
+
+    val sqlConf = new SQLConf()
+    for ((k, v) <- configs) {
+      sqlConf.settings.put(k, v)
+    }
+    // We should respect the current maxNestedViewDepth cause the view resolving are executed
+    // from top to down.
+    sqlConf.setConf(SQLConf.MAX_NESTED_VIEW_DEPTH, activeConf.maxNestedViewDepth)
+    sqlConf
+  }
+}
+
 /**
  * A container for holding named common table expressions (CTEs) and a query plan.
  * This operator will be removed during analysis and the relations will be substituted into child.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index add9a1d0f3aa6..b2c28ffa984a9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1481,6 +1481,15 @@ object SQLConf {
         "must be positive.")
       .createWithDefault(100)
 
+  val USE_CURRENT_SQL_CONFIGS_FOR_VIEW =
+    buildConf("spark.sql.legacy.useCurrentConfigsForView")
+      .internal()
+      .doc("When true, SQL Configs of the current active SparkSession instead of the captured " +
+        "ones will be applied during the parsing and analysis phases of the view resolution.")
+      .version("3.1.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val STREAMING_FILE_COMMIT_PROTOCOL_CLASS =
     buildConf("spark.sql.streaming.commitProtocolClass")
       .version("2.1.0")
@@ -3415,6 +3424,8 @@ class SQLConf extends Serializable with Logging {
 
   def maxNestedViewDepth: Int = getConf(SQLConf.MAX_NESTED_VIEW_DEPTH)
 
+  def useCurrentSQLConfigsForView: Boolean = getConf(SQLConf.USE_CURRENT_SQL_CONFIGS_FOR_VIEW)
+
   def starSchemaDetection: Boolean = getConf(STARSCHEMA_DETECTION)
 
   def starSchemaFTRatio: Double = getConf(STARSCHEMA_FACT_TABLE_RATIO)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index 43bc50522f2a8..a02f863a360f8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeRef
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, View}
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper
-import org.apache.spark.sql.internal.StaticSQLConf
+import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 import org.apache.spark.sql.types.{BooleanType, MetadataBuilder, StringType}
 import org.apache.spark.sql.util.SchemaUtils
 
@@ -334,6 +334,18 @@ case class ShowViewsCommand(
 
 object ViewHelper {
 
+  private val configPrefixDenyList = Seq(
+    SQLConf.MAX_NESTED_VIEW_DEPTH.key,
+    "spark.sql.optimizer.",
+    "spark.sql.codegen.",
+    "spark.sql.execution.",
+    "spark.sql.shuffle.",
+    "spark.sql.adaptive.")
+
+  private def shouldCaptureConfig(key: String): Boolean = {
+    !configPrefixDenyList.exists(prefix => key.startsWith(prefix))
+  }
+
   import CatalogTable._
 
   /**
@@ -361,11 +373,37 @@ object ViewHelper {
     }
   }
 
+  /**
+   * Convert the view SQL configs to `properties`.
+   */
+  private def sqlConfigsToProps(conf: SQLConf): Map[String, String] = {
+    val modifiedConfs = conf.getAllConfs.filter { case (k, _) =>
+      conf.isModifiable(k) && shouldCaptureConfig(k)
+    }
+    val props = new mutable.HashMap[String, String]
+    for ((key, value) <- modifiedConfs) {
+      props.put(s"$VIEW_SQL_CONFIG_PREFIX$key", value)
+    }
+    props.toMap
+  }
+
+  /**
+   * Remove the view SQL configs in `properties`.
+   */
+  private def removeSQLConfigs(properties: Map[String, String]): Map[String, String] = {
+    // We can't use `filterKeys` here, as the map returned by `filterKeys` is not serializable,
+    // while `CatalogTable` should be serializable.
+    properties.filterNot { case (key, _) =>
+      key.startsWith(VIEW_SQL_CONFIG_PREFIX)
+    }
+  }
+
   /**
    * Generate the view properties in CatalogTable, including:
    * 1. view default database that is used to provide the default database name on view resolution.
    * 2. the output column names of the query that creates a view, this is used to map the output of
    *    the view child to the view output during view resolution.
+   * 3. the SQL configs when creating the view.
    *
    * @param properties the `properties` in CatalogTable.
    * @param session the spark session.
@@ -380,15 +418,18 @@ object ViewHelper {
     // for createViewCommand queryOutput may be different from fieldNames
     val queryOutput = analyzedPlan.schema.fieldNames
 
+    val conf = session.sessionState.conf
+
     // Generate the query column names, throw an AnalysisException if there exists duplicate column
     // names.
     SchemaUtils.checkColumnNameDuplication(
-      fieldNames, "in the view definition", session.sessionState.conf.resolver)
+      fieldNames, "in the view definition", conf.resolver)
 
-    // Generate the view default catalog and namespace.
+    // Generate the view default catalog and namespace, as well as captured SQL configs.
     val manager = session.sessionState.catalogManager
-    removeQueryColumnNames(properties) ++
+    removeSQLConfigs(removeQueryColumnNames(properties)) ++
       catalogAndNamespaceToProps(manager.currentCatalog.name, manager.currentNamespace) ++
+      sqlConfigsToProps(conf) ++
       generateQueryColumnNames(queryOutput)
   }
 
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
index ae1cb2f171704..2fab32fa4b4eb 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
@@ -257,7 +257,7 @@ View Text           	SELECT * FROM base_table
 View Original Text  	SELECT * FROM base_table	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[a, id]             	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
 
 
 -- !query
@@ -313,7 +313,7 @@ View Text           	SELECT * FROM base_table
 View Original Text  	SELECT * FROM base_table	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[a, id]             	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
 
 
 -- !query
@@ -359,7 +359,7 @@ View Original Text  	SELECT t1.a AS t1_a, t2.a AS t2_a
     WHERE t1.id = t2.id	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[t1_a, t2_a]        	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=t1_a, view.query.out.numCols=2, view.query.out.col.1=t2_a, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=t1_a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=t2_a, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
 
 
 -- !query
@@ -413,7 +413,7 @@ View Text           	SELECT * FROM base_table WHERE id IN (SELECT id FROM base_t
 View Original Text  	SELECT * FROM base_table WHERE id IN (SELECT id FROM base_table2)	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[a, id]             	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
 
 
 -- !query
@@ -443,7 +443,7 @@ View Text           	SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_
 View Original Text  	SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_table2) t2	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[id, a]             	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=id, view.query.out.numCols=2, view.query.out.col.1=a, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=id, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=a, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
 
 
 -- !query
@@ -473,7 +473,7 @@ View Text           	SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_t
 View Original Text  	SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_table2)	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[a, id]             	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
 
 
 -- !query
@@ -503,7 +503,7 @@ View Text           	SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM ba
 View Original Text  	SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM base_table2)	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[a, id]             	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
 
 
 -- !query
@@ -533,7 +533,7 @@ View Text           	SELECT * FROM base_table WHERE EXISTS (SELECT 1)
 View Original Text  	SELECT * FROM base_table WHERE EXISTS (SELECT 1)	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[a, id]             	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
 
 
 -- !query
@@ -669,7 +669,7 @@ View Text           	SELECT * FROM t1 CROSS JOIN t2
 View Original Text  	SELECT * FROM t1 CROSS JOIN t2	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
 View Query Output Columns	[num, name, num2, value]	                    
-Table Properties    	[view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2]
+Table Properties    	[view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2]
 
 
 -- !query
@@ -710,7 +710,7 @@ View Text           	SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2
 View Original Text  	SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
 View Query Output Columns	[num, name, num2, value]	                    
-Table Properties    	[view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2]
+Table Properties    	[view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2]
 
 
 -- !query
@@ -751,7 +751,7 @@ View Text           	SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2
 View Original Text  	SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
 View Query Output Columns	[num, name, num2, value]	                    
-Table Properties    	[view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2]
+Table Properties    	[view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2]
 
 
 -- !query
@@ -792,7 +792,7 @@ View Text           	SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.va
 View Original Text  	SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.value = 'xxx'	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
 View Query Output Columns	[num, name, num2, value]	                    
-Table Properties    	[view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2]
+Table Properties    	[view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2]
 
 
 -- !query
@@ -894,7 +894,7 @@ BETWEEN (SELECT d FROM tbl2 WHERE c = 1) AND (SELECT e FROM tbl3 WHERE f = 2)
 AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f)	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
 View Query Output Columns	[a, b]              	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2]
 
 
 -- !query
@@ -933,7 +933,7 @@ AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f)
 AND NOT EXISTS (SELECT g FROM tbl4 LEFT JOIN tmptbl ON tbl4.h = tmptbl.j)	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
 View Query Output Columns	[a, b]              	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2]
 
 
 -- !query
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index d776198bc7470..0b19f706836be 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.catalyst.parser.ParseException
-import org.apache.spark.sql.internal.SQLConf.MAX_NESTED_VIEW_DEPTH
+import org.apache.spark.sql.internal.SQLConf._
 import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
 
 class SimpleSQLViewSuite extends SQLViewSuite with SharedSparkSession
@@ -762,4 +762,77 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
       }
     }
   }
+
+  test("SPARK-33141: view should be parsed and analyzed with configs set when creating") {
+    withTable("t") {
+      withView("v1", "v2", "v3", "v4", "v5") {
+        Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t")
+        sql("CREATE VIEW v1 (c1) AS SELECT C1 FROM t")
+        sql("CREATE VIEW v2 (c1) AS SELECT c1 FROM t ORDER BY 1 ASC, c1 DESC")
+        sql("CREATE VIEW v3 (c1, count) AS SELECT c1, count(c1) FROM t GROUP BY 1")
+        sql("CREATE VIEW v4 (a, count) AS SELECT c1 as a, count(c1) FROM t GROUP BY a")
+        sql("CREATE VIEW v5 (c1) AS SELECT 1/0")
+
+        withSQLConf(CASE_SENSITIVE.key -> "true") {
+          checkAnswer(sql("SELECT * FROM v1"), Seq(Row(2), Row(3), Row(1)))
+        }
+        withSQLConf(ORDER_BY_ORDINAL.key -> "false") {
+          checkAnswer(sql("SELECT * FROM v2"), Seq(Row(1), Row(2), Row(3)))
+        }
+        withSQLConf(GROUP_BY_ORDINAL.key -> "false") {
+          checkAnswer(sql("SELECT * FROM v3"),
+            Seq(Row(1, 1), Row(2, 1), Row(3, 1)))
+        }
+        withSQLConf(GROUP_BY_ALIASES.key -> "false") {
+          checkAnswer(sql("SELECT * FROM v4"),
+            Seq(Row(1, 1), Row(2, 1), Row(3, 1)))
+        }
+        withSQLConf(ANSI_ENABLED.key -> "true") {
+          checkAnswer(sql("SELECT * FROM v5"), Seq(Row(null)))
+        }
+
+        withSQLConf(USE_CURRENT_SQL_CONFIGS_FOR_VIEW.key -> "true") {
+          withSQLConf(CASE_SENSITIVE.key -> "true") {
+            val e = intercept[AnalysisException] {
+              sql("SELECT * FROM v1")
+            }.getMessage
+            assert(e.contains("cannot resolve '`C1`' given input columns: " +
+              "[spark_catalog.default.t.c1]"))
+          }
+          withSQLConf(ORDER_BY_ORDINAL.key -> "false") {
+            checkAnswer(sql("SELECT * FROM v2"), Seq(Row(3), Row(2), Row(1)))
+          }
+          withSQLConf(GROUP_BY_ORDINAL.key -> "false") {
+            val e = intercept[AnalysisException] {
+              sql("SELECT * FROM v3")
+            }.getMessage
+            assert(e.contains("expression 'spark_catalog.default.t.`c1`' is neither present " +
+              "in the group by, nor is it an aggregate function. Add to group by or wrap in " +
+              "first() (or first_value) if you don't care which value you get."))
+          }
+          withSQLConf(GROUP_BY_ALIASES.key -> "false") {
+            val e = intercept[AnalysisException] {
+              sql("SELECT * FROM v4")
+            }.getMessage
+            assert(e.contains("cannot resolve '`a`' given input columns: " +
+              "[spark_catalog.default.t.c1]"))
+          }
+          withSQLConf(ANSI_ENABLED.key -> "true") {
+            val e = intercept[ArithmeticException] {
+              sql("SELECT * FROM v5").collect()
+            }.getMessage
+            assert(e.contains("divide by zero"))
+          }
+        }
+
+        withSQLConf(ANSI_ENABLED.key -> "true") {
+          sql("ALTER VIEW v1 AS SELECT 1/0")
+        }
+        val e = intercept[ArithmeticException] {
+          sql("SELECT * FROM v1").collect()
+        }.getMessage
+        assert(e.contains("divide by zero"))
+      }
+    }
+  }
 }

From 13fd272cd353c8aa40a6030c4c847c2e2f632f68 Mon Sep 17 00:00:00 2001
From: Josh Soref <jsoref@users.noreply.github.com>
Date: Fri, 27 Nov 2020 10:22:45 -0600
Subject: [PATCH 0589/1009] Spelling r common dev mlib external project
 streaming resource managers python

### What changes were proposed in this pull request?

This PR intends to fix typos in the sub-modules:
* `R`
* `common`
* `dev`
* `mlib`
* `external`
* `project`
* `streaming`
* `resource-managers`
* `python`

Split per srowen https://github.com/apache/spark/pull/30323#issuecomment-728981618

NOTE: The misspellings have been reported at https://github.com/jsoref/spark/commit/706a726f87a0bbf5e31467fae9015218773db85b#commitcomment-44064356

### Why are the changes needed?

Misspelled words make it harder to read / understand content.

### Does this PR introduce _any_ user-facing change?

There are various fixes to documentation, etc...

### How was this patch tested?

No testing was performed

Closes #30402 from jsoref/spelling-R_common_dev_mlib_external_project_streaming_resource-managers_python.

Authored-by: Josh Soref <jsoref@users.noreply.github.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 R/CRAN_RELEASE.md                             |  2 +-
 R/install-dev.bat                             |  2 +-
 R/pkg/R/DataFrame.R                           |  6 ++---
 R/pkg/R/RDD.R                                 |  4 ++--
 R/pkg/R/SQLContext.R                          |  2 +-
 R/pkg/R/WindowSpec.R                          |  4 ++--
 R/pkg/R/column.R                              | 16 +++++++-------
 R/pkg/R/context.R                             |  4 ++--
 R/pkg/R/deserialize.R                         |  2 +-
 R/pkg/R/functions.R                           |  4 ++--
 R/pkg/R/install.R                             |  2 +-
 R/pkg/R/mllib_fpm.R                           |  2 +-
 R/pkg/R/mllib_tree.R                          |  4 ++--
 R/pkg/R/mllib_utils.R                         |  2 +-
 R/pkg/R/pairRDD.R                             |  4 ++--
 R/pkg/R/streaming.R                           |  2 +-
 R/pkg/R/types.R                               |  2 +-
 R/pkg/R/utils.R                               |  2 +-
 R/pkg/inst/worker/daemon.R                    |  4 ++--
 R/pkg/inst/worker/worker.R                    |  8 +++----
 R/pkg/tests/fulltests/test_Serde.R            |  2 +-
 R/pkg/tests/fulltests/test_jvm_api.R          |  6 ++---
 R/pkg/tests/fulltests/test_sparkSQL.R         |  6 ++---
 R/pkg/tests/fulltests/test_utils.R            |  2 +-
 R/pkg/vignettes/sparkr-vignettes.Rmd          |  2 +-
 .../spark/util/kvstore/LevelDBTypeInfo.java   |  2 +-
 .../spark/network/client/TransportClient.java |  2 +-
 .../spark/network/crypto/AuthEngine.java      |  2 +-
 .../spark/network/crypto/AuthEngineSuite.java | 10 ++++-----
 .../protocol/MessageWithHeaderSuite.java      |  4 ++--
 .../spark/network/sasl/SparkSaslSuite.java    | 16 +++++++-------
 .../server/OneForOneStreamManagerSuite.java   |  2 +-
 .../util/TransportFrameDecoderSuite.java      |  2 +-
 .../network/shuffle/SimpleDownloadFile.java   |  2 +-
 .../apache/spark/unsafe/types/UTF8String.java | 10 ++++-----
 .../types/UTF8StringPropertyCheckSuite.scala  |  6 ++---
 dev/appveyor-guide.md                         | 12 +++++-----
 dev/create-release/known_translations         |  2 +-
 dev/create-release/release-build.sh           |  2 +-
 dev/create-release/releaseutils.py            |  6 ++---
 dev/create-release/translate-contributors.py  | 22 +++++++++----------
 dev/github_jira_sync.py                       | 10 ++++-----
 dev/run-tests-jenkins.py                      | 18 +++++++--------
 dev/run-tests.py                              |  6 ++---
 dev/tests/pr_merge_ability.sh                 |  2 +-
 dev/tests/pr_public_classes.sh                |  2 +-
 project/MimaExcludes.scala                    |  2 +-
 project/SparkBuild.scala                      |  6 ++---
 python/docs/source/_static/css/pyspark.css    |  2 +-
 .../source/_templates/autosummary/class.rst   |  2 +-
 python/docs/source/development/debugging.rst  |  2 +-
 python/docs/source/development/testing.rst    |  2 +-
 .../docs/source/getting_started/install.rst   |  6 ++---
 .../source/getting_started/quickstart.ipynb   |  4 ++--
 python/docs/source/index.rst                  |  2 +-
 python/pyspark/__init__.pyi                   |  2 +-
 python/pyspark/cloudpickle/cloudpickle.py     | 10 ++++-----
 .../pyspark/cloudpickle/cloudpickle_fast.py   | 10 ++++-----
 python/pyspark/context.py                     |  4 ++--
 python/pyspark/java_gateway.py                |  2 +-
 python/pyspark/ml/feature.py                  |  2 +-
 python/pyspark/ml/regression.py               |  2 +-
 python/pyspark/ml/regression.pyi              |  2 +-
 python/pyspark/ml/tests/test_algorithms.py    |  2 +-
 python/pyspark/ml/tests/test_image.py         |  2 +-
 python/pyspark/mllib/clustering.py            |  2 +-
 python/pyspark/mllib/evaluation.py            |  4 ++--
 python/pyspark/mllib/regression.py            |  2 +-
 python/pyspark/mllib/stat/_statistics.py      |  2 +-
 .../mllib/tests/test_streaming_algorithms.py  |  2 +-
 python/pyspark/rdd.py                         |  4 ++--
 python/pyspark/resource/requests.py           |  4 ++--
 python/pyspark/shuffle.py                     |  2 +-
 python/pyspark/sql/column.py                  |  2 +-
 python/pyspark/sql/dataframe.py               |  2 +-
 python/pyspark/sql/functions.py               | 14 ++++++------
 .../sql/pandas/_typing/protocols/frame.pyi    |  2 +-
 .../sql/pandas/_typing/protocols/series.pyi   |  2 +-
 python/pyspark/sql/pandas/functions.py        |  4 ++--
 .../sql/tests/test_pandas_grouped_map.py      |  2 +-
 python/pyspark/sql/tests/test_udf.py          |  4 ++--
 python/pyspark/sql/utils.py                   |  6 ++---
 python/pyspark/streaming/context.py           |  2 +-
 python/pyspark/tests/test_context.py          |  4 ++--
 python/pyspark/worker.py                      |  2 +-
 python/test_support/userlibrary.py            |  2 +-
 .../org/apache/spark/deploy/k8s/Config.scala  |  2 +-
 .../k8s/ExecutorPodsSnapshotsStoreImpl.scala  |  4 ++--
 .../k8s/KubernetesVolumeUtilsSuite.scala      |  4 ++--
 .../MountVolumesFeatureStepSuite.scala        |  2 +-
 .../apache/spark/deploy/mesos/config.scala    |  2 +-
 .../cluster/mesos/MesosSchedulerUtils.scala   |  2 +-
 .../spark/deploy/yarn/YarnAllocator.scala     |  2 +-
 .../apache/hadoop/net/ServerSocketUtil.java   |  2 +-
 .../yarn/YarnShuffleServiceSuite.scala        |  2 +-
 .../streaming/api/python/PythonDStream.scala  |  2 +-
 .../spark/streaming/dstream/DStream.scala     |  4 ++--
 .../spark/streaming/util/HdfsUtils.scala      |  2 +-
 .../apache/spark/streaming/JavaAPISuite.java  |  2 +-
 .../spark/streaming/MapWithStateSuite.scala   |  6 ++---
 .../streaming/rdd/MapWithStateRDDSuite.scala  |  6 ++---
 101 files changed, 208 insertions(+), 208 deletions(-)

diff --git a/R/CRAN_RELEASE.md b/R/CRAN_RELEASE.md
index 4d9b6416c01cb..2f410cf8bfd94 100644
--- a/R/CRAN_RELEASE.md
+++ b/R/CRAN_RELEASE.md
@@ -25,7 +25,7 @@ To release SparkR as a package to CRAN, we would use the `devtools` package. Ple
 
 First, check that the `Version:` field in the `pkg/DESCRIPTION` file is updated. Also, check for stale files not under source control.
 
-Note that while `run-tests.sh` runs `check-cran.sh` (which runs `R CMD check`), it is doing so with `--no-manual --no-vignettes`, which skips a few vignettes or PDF checks - therefore it will be preferred to run `R CMD check` on the source package built manually before uploading a release. Also note that for CRAN checks for pdf vignettes to success, `qpdf` tool must be there (to install it, eg. `yum -q -y install qpdf`).
+Note that while `run-tests.sh` runs `check-cran.sh` (which runs `R CMD check`), it is doing so with `--no-manual --no-vignettes`, which skips a few vignettes or PDF checks - therefore it will be preferred to run `R CMD check` on the source package built manually before uploading a release. Also note that for CRAN checks for pdf vignettes to success, `qpdf` tool must be there (to install it, e.g. `yum -q -y install qpdf`).
 
 To upload a release, we would need to update the `cran-comments.md`. This should generally contain the results from running the `check-cran.sh` script along with comments on status of all `WARNING` (should not be any) or `NOTE`. As a part of `check-cran.sh` and the release process, the vignettes is build - make sure `SPARK_HOME` is set and Spark jars are accessible.
 
diff --git a/R/install-dev.bat b/R/install-dev.bat
index c570d93049a14..ae5aa589a19d1 100644
--- a/R/install-dev.bat
+++ b/R/install-dev.bat
@@ -26,7 +26,7 @@ MKDIR %SPARK_HOME%\R\lib
 
 rem When you pass the package path directly as an argument to R CMD INSTALL,
 rem it takes the path as 'C:\projects\spark\R\..\R\pkg"' as an example at
-rem R 4.0. To work around this, directly go to the directoy and install it.
+rem R 4.0. To work around this, directly go to the directory and install it.
 rem See also SPARK-32074
 pushd %SPARK_HOME%\R\pkg\
 R.exe CMD INSTALL --library="%SPARK_HOME%\R\lib" .
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 2ce53782d9af0..31a651ea1279b 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2772,7 +2772,7 @@ setMethod("merge",
 #' Creates a list of columns by replacing the intersected ones with aliases
 #'
 #' Creates a list of columns by replacing the intersected ones with aliases.
-#' The name of the alias column is formed by concatanating the original column name and a suffix.
+#' The name of the alias column is formed by concatenating the original column name and a suffix.
 #'
 #' @param x a SparkDataFrame
 #' @param intersectedColNames a list of intersected column names of the SparkDataFrame
@@ -3231,7 +3231,7 @@ setMethod("describe",
 #' \item stddev
 #' \item min
 #' \item max
-#' \item arbitrary approximate percentiles specified as a percentage (eg, "75\%")
+#' \item arbitrary approximate percentiles specified as a percentage (e.g., "75\%")
 #' }
 #' If no statistics are given, this function computes count, mean, stddev, min,
 #' approximate quartiles (percentiles at 25\%, 50\%, and 75\%), and max.
@@ -3743,7 +3743,7 @@ setMethod("histogram",
 #'
 #' @param x a SparkDataFrame.
 #' @param url JDBC database url of the form \code{jdbc:subprotocol:subname}.
-#' @param tableName yhe name of the table in the external database.
+#' @param tableName the name of the table in the external database.
 #' @param mode one of 'append', 'overwrite', 'error', 'errorifexists', 'ignore'
 #'             save mode (it is 'error' by default)
 #' @param ... additional JDBC database connection properties.
diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R
index 7a1d157bb8a36..408a3ff25b2b2 100644
--- a/R/pkg/R/RDD.R
+++ b/R/pkg/R/RDD.R
@@ -970,7 +970,7 @@ setMethod("takeSample", signature(x = "RDD", withReplacement = "logical",
                                                                   MAXINT)))))
             # If the first sample didn't turn out large enough, keep trying to
             # take samples; this shouldn't happen often because we use a big
-            # multiplier for thei initial size
+            # multiplier for the initial size
             while (length(samples) < total)
               samples <- collectRDD(sampleRDD(x, withReplacement, fraction,
                                            as.integer(ceiling(stats::runif(1,
@@ -1512,7 +1512,7 @@ setMethod("glom",
 #'
 #' @param x An RDD.
 #' @param y An RDD.
-#' @return a new RDD created by performing the simple union (witout removing
+#' @return a new RDD created by performing the simple union (without removing
 #' duplicates) of two input RDDs.
 #' @examples
 #'\dontrun{
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index c0ac68332ec41..5ed0481f33d8f 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -203,7 +203,7 @@ getSchema <- function(schema, firstRow = NULL, rdd = NULL) {
       })
     }
 
-    # SPAKR-SQL does not support '.' in column name, so replace it with '_'
+    # SPARK-SQL does not support '.' in column name, so replace it with '_'
     # TODO(davies): remove this once SPARK-2775 is fixed
     names <- lapply(names, function(n) {
       nn <- gsub(".", "_", n, fixed = TRUE)
diff --git a/R/pkg/R/WindowSpec.R b/R/pkg/R/WindowSpec.R
index 037809cd0923e..be47d0117ed7f 100644
--- a/R/pkg/R/WindowSpec.R
+++ b/R/pkg/R/WindowSpec.R
@@ -54,7 +54,7 @@ setMethod("show", "WindowSpec",
 #' Defines the partitioning columns in a WindowSpec.
 #'
 #' @param x a WindowSpec.
-#' @param col a column to partition on (desribed by the name or Column).
+#' @param col a column to partition on (described by the name or Column).
 #' @param ... additional column(s) to partition on.
 #' @return A WindowSpec.
 #' @rdname partitionBy
@@ -231,7 +231,7 @@ setMethod("rangeBetween",
 #' @rdname over
 #' @name over
 #' @aliases over,Column,WindowSpec-method
-#' @family colum_func
+#' @family column_func
 #' @examples
 #' \dontrun{
 #'   df <- createDataFrame(mtcars)
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index 835178990b485..9fa117ccb6281 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -135,7 +135,7 @@ createMethods()
 #' @rdname alias
 #' @name alias
 #' @aliases alias,Column-method
-#' @family colum_func
+#' @family column_func
 #' @examples
 #' \dontrun{
 #' df <- createDataFrame(iris)
@@ -161,7 +161,7 @@ setMethod("alias",
 #'
 #' @rdname substr
 #' @name substr
-#' @family colum_func
+#' @family column_func
 #' @aliases substr,Column-method
 #'
 #' @param x a Column.
@@ -187,7 +187,7 @@ setMethod("substr", signature(x = "Column"),
 #'
 #' @rdname startsWith
 #' @name startsWith
-#' @family colum_func
+#' @family column_func
 #' @aliases startsWith,Column-method
 #'
 #' @param x vector of character string whose "starts" are considered
@@ -206,7 +206,7 @@ setMethod("startsWith", signature(x = "Column"),
 #'
 #' @rdname endsWith
 #' @name endsWith
-#' @family colum_func
+#' @family column_func
 #' @aliases endsWith,Column-method
 #'
 #' @param x vector of character string whose "ends" are considered
@@ -224,7 +224,7 @@ setMethod("endsWith", signature(x = "Column"),
 #'
 #' @rdname between
 #' @name between
-#' @family colum_func
+#' @family column_func
 #' @aliases between,Column-method
 #'
 #' @param x a Column
@@ -251,7 +251,7 @@ setMethod("between", signature(x = "Column"),
 # nolint end
 #' @rdname cast
 #' @name cast
-#' @family colum_func
+#' @family column_func
 #' @aliases cast,Column-method
 #'
 #' @examples
@@ -300,7 +300,7 @@ setMethod("%in%",
 #'              Can be a single value or a Column.
 #' @rdname otherwise
 #' @name otherwise
-#' @family colum_func
+#' @family column_func
 #' @aliases otherwise,Column-method
 #' @note otherwise since 1.5.0
 setMethod("otherwise",
@@ -440,7 +440,7 @@ setMethod("withField",
 #' )
 #'
 #' # However, if you are going to add/replace multiple nested fields,
-#' # it is preffered to extract out the nested struct before
+#' # it is preferred to extract out the nested struct before
 #' # adding/replacing multiple fields e.g.
 #' head(
 #'   withColumn(
diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
index e3c9d9f8793d6..cca6c2c817de9 100644
--- a/R/pkg/R/context.R
+++ b/R/pkg/R/context.R
@@ -86,7 +86,7 @@ makeSplits <- function(numSerializedSlices, length) {
   # For instance, for numSerializedSlices of 22, length of 50
   #  [1]  0  0  2  2  4  4  6  6  6  9  9 11 11 13 13 15 15 15 18 18 20 20 22 22 22
   # [26] 25 25 27 27 29 29 31 31 31 34 34 36 36 38 38 40 40 40 43 43 45 45 47 47 47
-  # Notice the slice group with 3 slices (ie. 6, 15, 22) are roughly evenly spaced.
+  # Notice the slice group with 3 slices (i.e. 6, 15, 22) are roughly evenly spaced.
   # We are trying to reimplement the calculation in the positions method in ParallelCollectionRDD
   if (numSerializedSlices > 0) {
     unlist(lapply(0: (numSerializedSlices - 1), function(x) {
@@ -116,7 +116,7 @@ makeSplits <- function(numSerializedSlices, length) {
 #' This change affects both createDataFrame and spark.lapply.
 #' In the specific one case that it is used to convert R native object into SparkDataFrame, it has
 #' always been kept at the default of 1. In the case the object is large, we are explicitly setting
-#' the parallism to numSlices (which is still 1).
+#' the parallelism to numSlices (which is still 1).
 #'
 #' Specifically, we are changing to split positions to match the calculation in positions() of
 #' ParallelCollectionRDD in Spark.
diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R
index 5d22340fb62a0..89a8fbecd36b0 100644
--- a/R/pkg/R/deserialize.R
+++ b/R/pkg/R/deserialize.R
@@ -250,7 +250,7 @@ readDeserializeWithKeysInArrow <- function(inputCon) {
 
   keys <- readMultipleObjects(inputCon)
 
-  # Read keys to map with each groupped batch later.
+  # Read keys to map with each grouped batch later.
   list(keys = keys, data = data)
 }
 
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index b12f7b472ec83..99406443165d5 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -144,7 +144,7 @@ NULL
 #' @param y Column to compute on.
 #' @param pos In \itemize{
 #'                \item \code{locate}: a start position of search.
-#'                \item \code{overlay}: a start postiton for replacement.
+#'                \item \code{overlay}: a start position for replacement.
 #'                }
 #' @param len In \itemize{
 #'               \item \code{lpad} the maximum length of each output result.
@@ -2918,7 +2918,7 @@ setMethod("shiftRight", signature(y = "Column", x = "numeric"),
           })
 
 #' @details
-#' \code{shiftRightUnsigned}: (Unigned) shifts the given value numBits right. If the given value is
+#' \code{shiftRightUnsigned}: (Unsigned) shifts the given value numBits right. If the given value is
 #' a long value, it will return a long value else it will return an integer value.
 #'
 #' @rdname column_math_functions
diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R
index ea2c0b4c0f42f..5bc5ae07c5f03 100644
--- a/R/pkg/R/install.R
+++ b/R/pkg/R/install.R
@@ -289,7 +289,7 @@ sparkCachePath <- function() {
 }
 
 # Length of the Spark cache specific relative path segments for each platform
-# eg. "Apache\Spark\Cache" is 3 in Windows, or "spark" is 1 in unix
+# e.g. "Apache\Spark\Cache" is 3 in Windows, or "spark" is 1 in unix
 # Must match sparkCachePath() exactly.
 sparkCacheRelPathLength <- function() {
   if (is_windows()) {
diff --git a/R/pkg/R/mllib_fpm.R b/R/pkg/R/mllib_fpm.R
index 30bc51b932041..65a43514930f0 100644
--- a/R/pkg/R/mllib_fpm.R
+++ b/R/pkg/R/mllib_fpm.R
@@ -125,7 +125,7 @@ setMethod("spark.freqItemsets", signature(object = "FPGrowthModel"),
 #'         The \code{SparkDataFrame} contains five columns:
 #'         \code{antecedent} (an array of the same type as the input column),
 #'         \code{consequent} (an array of the same type as the input column),
-#'         \code{condfidence} (confidence for the rule)
+#'         \code{confidence} (confidence for the rule)
 #'         \code{lift} (lift for the rule)
 #'         and \code{support} (support for the rule)
 #' @rdname spark.fpGrowth
diff --git a/R/pkg/R/mllib_tree.R b/R/pkg/R/mllib_tree.R
index f6aa48f5fa04a..b5a014b0a3cfd 100644
--- a/R/pkg/R/mllib_tree.R
+++ b/R/pkg/R/mllib_tree.R
@@ -53,7 +53,7 @@ setClass("DecisionTreeRegressionModel", representation(jobj = "jobj"))
 #' @note DecisionTreeClassificationModel since 2.3.0
 setClass("DecisionTreeClassificationModel", representation(jobj = "jobj"))
 
-# Create the summary of a tree ensemble model (eg. Random Forest, GBT)
+# Create the summary of a tree ensemble model (e.g. Random Forest, GBT)
 summary.treeEnsemble <- function(model) {
   jobj <- model@jobj
   formula <- callJMethod(jobj, "formula")
@@ -73,7 +73,7 @@ summary.treeEnsemble <- function(model) {
        jobj = jobj)
 }
 
-# Prints the summary of tree ensemble models (eg. Random Forest, GBT)
+# Prints the summary of tree ensemble models (e.g. Random Forest, GBT)
 print.summary.treeEnsemble <- function(x) {
   jobj <- x$jobj
   cat("Formula: ", x$formula)
diff --git a/R/pkg/R/mllib_utils.R b/R/pkg/R/mllib_utils.R
index f38f1ac3a6b4c..d943d8d0ab4c0 100644
--- a/R/pkg/R/mllib_utils.R
+++ b/R/pkg/R/mllib_utils.R
@@ -18,7 +18,7 @@
 # mllib_utils.R: Utilities for MLlib integration
 
 # Integration with R's standard functions.
-# Most of MLlib's argorithms are provided in two flavours:
+# Most of MLlib's algorithms are provided in two flavours:
 # - a specialization of the default R methods (glm). These methods try to respect
 #   the inputs and the outputs of R's method to the largest extent, but some small differences
 #   may exist.
diff --git a/R/pkg/R/pairRDD.R b/R/pkg/R/pairRDD.R
index b29381bb900fb..41676be03e951 100644
--- a/R/pkg/R/pairRDD.R
+++ b/R/pkg/R/pairRDD.R
@@ -239,7 +239,7 @@ setMethod("partitionByRDD",
             javaPairRDD <- callJMethod(javaPairRDD, "partitionBy", rPartitioner)
 
             # Call .values() on the result to get back the final result, the
-            # shuffled acutal content key-val pairs.
+            # shuffled actual content key-val pairs.
             r <- callJMethod(javaPairRDD, "values")
 
             RDD(r, serializedMode = "byte")
@@ -411,7 +411,7 @@ setMethod("reduceByKeyLocally",
 #' \itemize{
 #'   \item createCombiner, which turns a V into a C (e.g., creates a one-element list)
 #'   \item mergeValue, to merge a V into a C (e.g., adds it to the end of a list) -
-#'   \item mergeCombiners, to combine two C's into a single one (e.g., concatentates
+#'   \item mergeCombiners, to combine two C's into a single one (e.g., concatenates
 #'    two lists).
 #' }
 #'
diff --git a/R/pkg/R/streaming.R b/R/pkg/R/streaming.R
index 5eccbdc9d3818..2bcfb363f9d24 100644
--- a/R/pkg/R/streaming.R
+++ b/R/pkg/R/streaming.R
@@ -93,7 +93,7 @@ setMethod("explain",
 
 #' lastProgress
 #'
-#' Prints the most recent progess update of this streaming query in JSON format.
+#' Prints the most recent progress update of this streaming query in JSON format.
 #'
 #' @param x a StreamingQuery.
 #' @rdname lastProgress
diff --git a/R/pkg/R/types.R b/R/pkg/R/types.R
index 5d48a9eee2799..dfa83c35665ce 100644
--- a/R/pkg/R/types.R
+++ b/R/pkg/R/types.R
@@ -68,7 +68,7 @@ rToSQLTypes <- as.environment(list(
   "character" = "string",
   "logical" = "boolean"))
 
-# Helper function of coverting decimal type. When backend returns column type in the
+# Helper function of converting decimal type. When backend returns column type in the
 # format of decimal(,) (e.g., decimal(10, 0)), this function coverts the column type
 # as double type. This function converts backend returned types that are not the key
 # of PRIMITIVE_TYPES, but should be treated as PRIMITIVE_TYPES.
diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R
index d6f9f927d5cdc..264cbfc9ba929 100644
--- a/R/pkg/R/utils.R
+++ b/R/pkg/R/utils.R
@@ -930,7 +930,7 @@ getOne <- function(x, envir, inherits = TRUE, ifnotfound = NULL) {
 }
 
 # Returns a vector of parent directories, traversing up count times, starting with a full path
-# eg. traverseParentDirs("/Users/user/Library/Caches/spark/spark2.2", 1) should return
+# e.g. traverseParentDirs("/Users/user/Library/Caches/spark/spark2.2", 1) should return
 # this "/Users/user/Library/Caches/spark/spark2.2"
 # and  "/Users/user/Library/Caches/spark"
 traverseParentDirs <- function(x, count) {
diff --git a/R/pkg/inst/worker/daemon.R b/R/pkg/inst/worker/daemon.R
index fb9db63b07cd0..4589bb9c6ad1b 100644
--- a/R/pkg/inst/worker/daemon.R
+++ b/R/pkg/inst/worker/daemon.R
@@ -32,7 +32,7 @@ inputCon <- socketConnection(
 
 SparkR:::doServerAuth(inputCon, Sys.getenv("SPARKR_WORKER_SECRET"))
 
-# Waits indefinitely for a socket connecion by default.
+# Waits indefinitely for a socket connection by default.
 selectTimeout <- NULL
 
 while (TRUE) {
@@ -72,7 +72,7 @@ while (TRUE) {
       }
     })
   } else if (is.null(children)) {
-    # If it is NULL, there are no children. Waits indefinitely for a socket connecion.
+    # If it is NULL, there are no children. Waits indefinitely for a socket connection.
     selectTimeout <- NULL
   }
 
diff --git a/R/pkg/inst/worker/worker.R b/R/pkg/inst/worker/worker.R
index 1ef05ea621e83..dd271f91d0084 100644
--- a/R/pkg/inst/worker/worker.R
+++ b/R/pkg/inst/worker/worker.R
@@ -85,7 +85,7 @@ outputResult <- function(serializer, output, outputCon) {
 }
 
 # Constants
-specialLengths <- list(END_OF_STERAM = 0L, TIMING_DATA = -1L)
+specialLengths <- list(END_OF_STREAM = 0L, TIMING_DATA = -1L)
 
 # Timing R process boot
 bootTime <- currentTimeSecs()
@@ -180,7 +180,7 @@ if (isEmpty != 0) {
     } else if (deserializer == "arrow" && mode == 1) {
       data <- SparkR:::readDeserializeInArrow(inputCon)
       # See https://stat.ethz.ch/pipermail/r-help/2010-September/252046.html
-      # rbind.fill might be an anternative to make it faster if plyr is installed.
+      # rbind.fill might be an alternative to make it faster if plyr is installed.
       # Also, note that, 'dapply' applies a function to each partition.
       data <- do.call("rbind", data)
     }
@@ -212,7 +212,7 @@ if (isEmpty != 0) {
 
         if (serializer == "arrow") {
           # See https://stat.ethz.ch/pipermail/r-help/2010-September/252046.html
-          # rbind.fill might be an anternative to make it faster if plyr is installed.
+          # rbind.fill might be an alternative to make it faster if plyr is installed.
           combined <- do.call("rbind", outputs)
           SparkR:::writeSerializeInArrow(outputCon, combined)
         }
@@ -285,7 +285,7 @@ SparkR:::writeDouble(outputCon, computeInputElapsDiff)    # compute
 SparkR:::writeDouble(outputCon, outputComputeElapsDiff)   # output
 
 # End of output
-SparkR:::writeInt(outputCon, specialLengths$END_OF_STERAM)
+SparkR:::writeInt(outputCon, specialLengths$END_OF_STREAM)
 
 close(outputCon)
 close(inputCon)
diff --git a/R/pkg/tests/fulltests/test_Serde.R b/R/pkg/tests/fulltests/test_Serde.R
index e01f6ee005218..a52289e43ca5e 100644
--- a/R/pkg/tests/fulltests/test_Serde.R
+++ b/R/pkg/tests/fulltests/test_Serde.R
@@ -125,7 +125,7 @@ test_that("SerDe of list of lists", {
 
 sparkR.session.stop()
 
-# Note that this test should be at the end of tests since the configruations used here are not
+# Note that this test should be at the end of tests since the configurations used here are not
 # specific to sessions, and the Spark context is restarted.
 test_that("createDataFrame large objects", {
   for (encryptionEnabled in list("true", "false")) {
diff --git a/R/pkg/tests/fulltests/test_jvm_api.R b/R/pkg/tests/fulltests/test_jvm_api.R
index 8b3b4f73de170..3bf6ae556c079 100644
--- a/R/pkg/tests/fulltests/test_jvm_api.R
+++ b/R/pkg/tests/fulltests/test_jvm_api.R
@@ -20,11 +20,11 @@ context("JVM API")
 sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE)
 
 test_that("Create and call methods on object", {
-  jarr <- sparkR.newJObject("java.util.ArrayList")
+  jarray <- sparkR.newJObject("java.util.ArrayList")
   # Add an element to the array
-  sparkR.callJMethod(jarr, "add", 1L)
+  sparkR.callJMethod(jarray, "add", 1L)
   # Check if get returns the same element
-  expect_equal(sparkR.callJMethod(jarr, "get", 0L), 1L)
+  expect_equal(sparkR.callJMethod(jarray, "get", 0L), 1L)
 })
 
 test_that("Call static methods", {
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 81d4e14df791d..833f77786c80b 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -2093,7 +2093,7 @@ test_that("higher order functions", {
     createDataFrame(data.frame(id = 1)),
     expr("CAST(array(1.0, 2.0, -3.0, -4.0) AS array<double>) xs"),
     expr("CAST(array(0.0, 3.0, 48.0) AS array<double>) ys"),
-    expr("array('FAILED', 'SUCCEDED') as vs"),
+    expr("array('FAILED', 'SUCCEEDED') as vs"),
     expr("map('foo', 1, 'bar', 2) as mx"),
     expr("map('foo', 42, 'bar', -1, 'baz', 0) as my")
   )
@@ -3667,7 +3667,7 @@ test_that("gapply() and gapplyCollect() on a DataFrame", {
     }
 
     # Computes the arithmetic mean of the second column by grouping
-    # on the first and third columns. Output the groupping value and the average.
+    # on the first and third columns. Output the grouping value and the average.
     schema <-  structType(structField("a", "integer"), structField("c", "string"),
                           structField("avg", "double"))
     df3 <- gapply(
@@ -3965,7 +3965,7 @@ test_that("catalog APIs, listTables, listColumns, listFunctions", {
                paste("Error in listFunctions : analysis error - Database",
                      "'zxwtyswklpf_db' does not exist"))
 
-  # recoverPartitions does not work with tempory view
+  # recoverPartitions does not work with temporary view
   expect_error(recoverPartitions("cars"),
                "no such table - Table or view 'cars' not found in database 'default'")
   expect_error(refreshTable("cars"), NA)
diff --git a/R/pkg/tests/fulltests/test_utils.R b/R/pkg/tests/fulltests/test_utils.R
index c3fb9046fcda4..6c83a137cfb7b 100644
--- a/R/pkg/tests/fulltests/test_utils.R
+++ b/R/pkg/tests/fulltests/test_utils.R
@@ -116,7 +116,7 @@ test_that("cleanClosure on R functions", {
   actual <- get("y", envir = env, inherits = FALSE)
   expect_equal(actual, y)
 
-  # Test for combination for nested and sequenctial functions in a closure
+  # Test for combination for nested and sequential functions in a closure
   f1 <- function(x) x + 1
   f2 <- function(x) f1(x) + 2
   userFunc <- function(x) { f1(x); f2(x) }
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index 3713e6c784855..a0608748696a3 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -146,7 +146,7 @@ sparkR.session.stop()
 
 Different from many other R packages, to use SparkR, you need an additional installation of Apache Spark. The Spark installation will be used to run a backend process that will compile and execute SparkR programs.
 
-After installing the SparkR package, you can call `sparkR.session` as explained in the previous section to start and it will check for the Spark installation. If you are working with SparkR from an interactive shell (eg. R, RStudio) then Spark is downloaded and cached automatically if it is not found. Alternatively, we provide an easy-to-use function `install.spark` for running this manually. If you don't have Spark installed on the computer, you may download it from [Apache Spark Website](https://spark.apache.org/downloads.html).
+After installing the SparkR package, you can call `sparkR.session` as explained in the previous section to start and it will check for the Spark installation. If you are working with SparkR from an interactive shell (e.g. R, RStudio) then Spark is downloaded and cached automatically if it is not found. Alternatively, we provide an easy-to-use function `install.spark` for running this manually. If you don't have Spark installed on the computer, you may download it from [Apache Spark Website](https://spark.apache.org/downloads.html).
 
 ```{r, eval=FALSE}
 install.spark()
diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBTypeInfo.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBTypeInfo.java
index d7423537ddfcf..4d7f76f673865 100644
--- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBTypeInfo.java
+++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBTypeInfo.java
@@ -133,7 +133,7 @@ class LevelDBTypeInfo {
 
     // First create the parent indices, then the child indices.
     ti.indices().forEach(idx -> {
-      // In LevelDB, there is no parent index for the NUTURAL INDEX.
+      // In LevelDB, there is no parent index for the NATURAL INDEX.
       if (idx.parent().isEmpty() || idx.value().equals(KVIndex.NATURAL_INDEX_NAME)) {
         indices.put(idx.value(), new Index(idx, ti.getAccessor(idx.value()), null));
       }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java
index 6dcc703e92669..eb2882074d7c7 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java
@@ -303,7 +303,7 @@ public void close() {
   @Override
   public String toString() {
     return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
-      .append("remoteAdress", channel.remoteAddress())
+      .append("remoteAddress", channel.remoteAddress())
       .append("clientId", clientId)
       .append("isActive", isActive())
       .toString();
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java
index 64fdb32a67ada..c2b2edc7f07d5 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java
@@ -287,7 +287,7 @@ private byte[] doCipherOp(int mode, byte[] in, boolean isFinal)
         }
       }
     } catch (InternalError ie) {
-      // SPARK-25535. The commons-cryto library will throw InternalError if something goes wrong,
+      // SPARK-25535. The commons-crypto library will throw InternalError if something goes wrong,
       // and leave bad state behind in the Java wrappers, so it's not safe to use them afterwards.
       if (mode == Cipher.ENCRYPT_MODE) {
         this.encryptor = null;
diff --git a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java
index 0790f0079c2bd..1c2061699a128 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java
@@ -150,8 +150,8 @@ public void testEncryptedMessage() throws Exception {
 
       ByteArrayWritableChannel channel = new ByteArrayWritableChannel(data.length);
       TransportCipher.EncryptedMessage emsg = handler.createEncryptedMessage(buf);
-      while (emsg.transfered() < emsg.count()) {
-        emsg.transferTo(channel, emsg.transfered());
+      while (emsg.transferred() < emsg.count()) {
+        emsg.transferTo(channel, emsg.transferred());
       }
       assertEquals(data.length, channel.length());
     } finally {
@@ -196,9 +196,9 @@ public Long answer(InvocationOnMock invocationOnMock) throws Throwable {
       TransportCipher.EncryptedMessage emsg = handler.createEncryptedMessage(region);
       ByteArrayWritableChannel channel = new ByteArrayWritableChannel(testDataLength);
       // "transferTo" should act correctly when the underlying FileRegion transfers 0 bytes.
-      assertEquals(0L, emsg.transferTo(channel, emsg.transfered()));
-      assertEquals(testDataLength, emsg.transferTo(channel, emsg.transfered()));
-      assertEquals(emsg.transfered(), emsg.count());
+      assertEquals(0L, emsg.transferTo(channel, emsg.transferred()));
+      assertEquals(testDataLength, emsg.transferTo(channel, emsg.transferred()));
+      assertEquals(emsg.transferred(), emsg.count());
       assertEquals(4, channel.length());
     } finally {
       client.close();
diff --git a/common/network-common/src/test/java/org/apache/spark/network/protocol/MessageWithHeaderSuite.java b/common/network-common/src/test/java/org/apache/spark/network/protocol/MessageWithHeaderSuite.java
index 3bff34e210e3c..af1c2878672c0 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/protocol/MessageWithHeaderSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/protocol/MessageWithHeaderSuite.java
@@ -129,8 +129,8 @@ private void testFileRegionBody(int totalWrites, int writesPerCall) throws Excep
   private ByteBuf doWrite(MessageWithHeader msg, int minExpectedWrites) throws Exception {
     int writes = 0;
     ByteArrayWritableChannel channel = new ByteArrayWritableChannel((int) msg.count());
-    while (msg.transfered() < msg.count()) {
-      msg.transferTo(channel, msg.transfered());
+    while (msg.transferred() < msg.count()) {
+      msg.transferTo(channel, msg.transferred());
       writes++;
     }
     assertTrue("Not enough writes!", minExpectedWrites <= writes);
diff --git a/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java b/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java
index ecaeec98da182..32c9acd327213 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java
@@ -191,28 +191,28 @@ public void testEncryptedMessage() throws Exception {
 
       SaslEncryption.EncryptedMessage emsg =
         new SaslEncryption.EncryptedMessage(backend, msg, 1024);
-      long count = emsg.transferTo(channel, emsg.transfered());
+      long count = emsg.transferTo(channel, emsg.transferred());
       assertTrue(count < data.length);
       assertTrue(count > 0);
 
       // Here, the output buffer is full so nothing should be transferred.
-      assertEquals(0, emsg.transferTo(channel, emsg.transfered()));
+      assertEquals(0, emsg.transferTo(channel, emsg.transferred()));
 
       // Now there's room in the buffer, but not enough to transfer all the remaining data,
       // so the dummy count should be returned.
       channel.reset();
-      assertEquals(1, emsg.transferTo(channel, emsg.transfered()));
+      assertEquals(1, emsg.transferTo(channel, emsg.transferred()));
 
       // Eventually, the whole message should be transferred.
       for (int i = 0; i < data.length / 32 - 2; i++) {
         channel.reset();
-        assertEquals(1, emsg.transferTo(channel, emsg.transfered()));
+        assertEquals(1, emsg.transferTo(channel, emsg.transferred()));
       }
 
       channel.reset();
-      count = emsg.transferTo(channel, emsg.transfered());
+      count = emsg.transferTo(channel, emsg.transferred());
       assertTrue("Unexpected count: " + count, count > 1 && count < data.length);
-      assertEquals(data.length, emsg.transfered());
+      assertEquals(data.length, emsg.transferred());
     } finally {
       msg.release();
     }
@@ -237,9 +237,9 @@ public void testEncryptedMessageChunking() throws Exception {
         new SaslEncryption.EncryptedMessage(backend, msg.convertToNetty(), data.length / 8);
 
       ByteArrayWritableChannel channel = new ByteArrayWritableChannel(data.length);
-      while (emsg.transfered() < emsg.count()) {
+      while (emsg.transferred() < emsg.count()) {
         channel.reset();
-        emsg.transferTo(channel, emsg.transfered());
+        emsg.transferTo(channel, emsg.transferred());
       }
 
       verify(backend, times(8)).wrap(any(byte[].class), anyInt(), anyInt());
diff --git a/common/network-common/src/test/java/org/apache/spark/network/server/OneForOneStreamManagerSuite.java b/common/network-common/src/test/java/org/apache/spark/network/server/OneForOneStreamManagerSuite.java
index 45e1836da641f..634b40ed450ee 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/server/OneForOneStreamManagerSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/server/OneForOneStreamManagerSuite.java
@@ -72,7 +72,7 @@ public void testMissingChunk() {
     Assert.assertNotNull(getChunk(manager, streamId, 2));
     manager.connectionTerminated(dummyChannel);
 
-    // loaded buffers are not released yet as in production a MangedBuffer returned by getChunk()
+    // loaded buffers are not released yet as in production a ManagedBuffer returned by getChunk()
     // would only be released by Netty after it is written to the network
     Mockito.verify(buffer1, Mockito.never()).release();
     Mockito.verify(buffer2, Mockito.never()).release();
diff --git a/common/network-common/src/test/java/org/apache/spark/network/util/TransportFrameDecoderSuite.java b/common/network-common/src/test/java/org/apache/spark/network/util/TransportFrameDecoderSuite.java
index 4b67aa80351d2..163c52b023822 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/util/TransportFrameDecoderSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/util/TransportFrameDecoderSuite.java
@@ -98,7 +98,7 @@ public void testConsolidationPerf() throws Exception {
             writtenBytes += pieceBytes;
           }
           logger.info("Writing 300MiB frame buf with consolidation of threshold " + threshold
-              + " took " + totalTime + " milis");
+              + " took " + totalTime + " millis");
         } finally {
           for (ByteBuf buf : retained) {
             release(buf);
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/SimpleDownloadFile.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/SimpleDownloadFile.java
index 670612fd6f66a..97ecaa627b66c 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/SimpleDownloadFile.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/SimpleDownloadFile.java
@@ -32,7 +32,7 @@
  * A DownloadFile that does not take any encryption settings into account for reading and
  * writing data.
  *
- * This does *not* mean the data in the file is un-encrypted -- it could be that the data is
+ * This does *not* mean the data in the file is unencrypted -- it could be that the data is
  * already encrypted when its written, and subsequent layer is responsible for decrypting.
  */
 public class SimpleDownloadFile implements DownloadFile {
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index b8dda22240042..c6aa5f0b58285 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -635,13 +635,13 @@ public UTF8String trimLeft() {
   public UTF8String trimLeft(UTF8String trimString) {
     if (trimString == null) return null;
     // the searching byte position in the source string
-    int srchIdx = 0;
+    int searchIdx = 0;
     // the first beginning byte position of a non-matching character
     int trimIdx = 0;
 
-    while (srchIdx < numBytes) {
+    while (searchIdx < numBytes) {
       UTF8String searchChar = copyUTF8String(
-          srchIdx, srchIdx + numBytesForFirstByte(this.getByte(srchIdx)) - 1);
+          searchIdx, searchIdx + numBytesForFirstByte(this.getByte(searchIdx)) - 1);
       int searchCharBytes = searchChar.numBytes;
       // try to find the matching for the searchChar in the trimString set
       if (trimString.find(searchChar, 0) >= 0) {
@@ -650,9 +650,9 @@ public UTF8String trimLeft(UTF8String trimString) {
         // no matching, exit the search
         break;
       }
-      srchIdx += searchCharBytes;
+      searchIdx += searchCharBytes;
     }
-    if (srchIdx == 0) {
+    if (searchIdx == 0) {
       // Nothing trimmed
       return this;
     }
diff --git a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala
index 69a082053aa65..ab488e18ba3f4 100644
--- a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala
+++ b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala
@@ -192,7 +192,7 @@ class UTF8StringPropertyCheckSuite extends AnyFunSuite with ScalaCheckDrivenProp
     }
   }
 
-  val nullalbeSeq = Gen.listOf(Gen.oneOf[String](null: String, randomString))
+  val nullableSeq = Gen.listOf(Gen.oneOf[String](null: String, randomString))
 
   test("concat") {
     def concat(origin: Seq[String]): String =
@@ -201,7 +201,7 @@ class UTF8StringPropertyCheckSuite extends AnyFunSuite with ScalaCheckDrivenProp
     forAll { (inputs: Seq[String]) =>
       assert(UTF8String.concat(inputs.map(toUTF8): _*) === toUTF8(inputs.mkString))
     }
-    forAll (nullalbeSeq) { (inputs: Seq[String]) =>
+    forAll (nullableSeq) { (inputs: Seq[String]) =>
       assert(UTF8String.concat(inputs.map(toUTF8): _*) === toUTF8(concat(inputs)))
     }
   }
@@ -216,7 +216,7 @@ class UTF8StringPropertyCheckSuite extends AnyFunSuite with ScalaCheckDrivenProp
       assert(UTF8String.concatWs(toUTF8(sep), inputs.map(toUTF8): _*) ===
         toUTF8(inputs.mkString(sep)))
     }
-    forAll(randomString, nullalbeSeq) {(sep: String, inputs: Seq[String]) =>
+    forAll(randomString, nullableSeq) {(sep: String, inputs: Seq[String]) =>
       assert(UTF8String.concatWs(toUTF8(sep), inputs.map(toUTF8): _*) ===
         toUTF8(concatWs(sep, inputs)))
     }
diff --git a/dev/appveyor-guide.md b/dev/appveyor-guide.md
index a8c0c1ef23ac3..c68b5de9e61d0 100644
--- a/dev/appveyor-guide.md
+++ b/dev/appveyor-guide.md
@@ -33,22 +33,22 @@ Currently, SparkR on Windows is being tested with [AppVeyor](https://ci.appveyor
     
   <img width="379" alt="2016-09-04 11 07 58" src="https://cloud.githubusercontent.com/assets/6477701/18228810/2f674e5e-7299-11e6-929d-5c2dff269ddc.png">
 
-- Click "Github".
+- Click "GitHub".
 
   <img width="360" alt="2016-09-04 11 08 10" src="https://cloud.githubusercontent.com/assets/6477701/18228811/344263a0-7299-11e6-90b7-9b1c7b6b8b01.png">
 
 
-#### After signing up, go to profile to link Github and AppVeyor.
+#### After signing up, go to profile to link GitHub and AppVeyor.
 
 - Click your account and then click "Profile".
 
   <img width="204" alt="2016-09-04 11 09 43" src="https://cloud.githubusercontent.com/assets/6477701/18228803/12a4b810-7299-11e6-9140-5cfc277297b1.png">
 
-- Enable the link with GitHub via clicking "Link Github account".
+- Enable the link with GitHub via clicking "Link GitHub account".
 
   <img width="256" alt="2016-09-04 11 09 52" src="https://cloud.githubusercontent.com/assets/6477701/18228808/23861584-7299-11e6-9352-640a9c747c83.png">
 
-- Click "Authorize application" in Github site.
+- Click "Authorize application" in GitHub site.
 
 <img width="491" alt="2016-09-04 11 10 05" src="https://cloud.githubusercontent.com/assets/6477701/18228814/5cc239e0-7299-11e6-8aeb-71305e22d930.png">
 
@@ -63,11 +63,11 @@ Currently, SparkR on Windows is being tested with [AppVeyor](https://ci.appveyor
   
   <img width="144" alt="2016-08-30 12 16 35" src="https://cloud.githubusercontent.com/assets/6477701/18075026/3ee57bc6-6eac-11e6-826e-5dd09aeb0e7c.png">
 
-- Since we will use Github here, click the "GITHUB" button and then click "Authorize Github" so that AppVeyor can access the Github logs (e.g. commits).
+- Since we will use GitHub here, click the "GITHUB" button and then click "Authorize GitHub" so that AppVeyor can access the GitHub logs (e.g. commits).
     
   <img width="517" alt="2016-09-04 11 10 22" src="https://cloud.githubusercontent.com/assets/6477701/18228819/9a4d5722-7299-11e6-900c-c5ff6b0450b1.png">
 
-- Click "Authorize application" from Github (the above step will pop up this page).
+- Click "Authorize application" from GitHub (the above step will pop up this page).
 
   <img width="484" alt="2016-09-04 11 10 27" src="https://cloud.githubusercontent.com/assets/6477701/18228820/a7cfce02-7299-11e6-8ec0-1dd7807eecb7.png">
 
diff --git a/dev/create-release/known_translations b/dev/create-release/known_translations
index ff41cccde0140..64bd9ada1bf61 100644
--- a/dev/create-release/known_translations
+++ b/dev/create-release/known_translations
@@ -1,5 +1,5 @@
 # This is a mapping of names to be translated through translate-contributors.py
-# The format expected on each line should be: <Github ID> - <full name>
+# The format expected on each line should be: <GitHub ID> - <full name>
 012huang - Weiyi Huang
 07ARB - Ankit Raj Boudh
 10110346 - Xian Liu
diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index 240f4c8dfd371..d2953a86afafd 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -452,7 +452,7 @@ if [[ "$1" == "publish-release" ]]; then
 
   if ! is_dry_run; then
     nexus_upload=$NEXUS_ROOT/deployByRepositoryId/$staged_repo_id
-    echo "Uplading files to $nexus_upload"
+    echo "Uploading files to $nexus_upload"
     for file in $(find . -type f)
     do
       # strip leading ./
diff --git a/dev/create-release/releaseutils.py b/dev/create-release/releaseutils.py
index cc7ad931198a2..a0e9695d58361 100755
--- a/dev/create-release/releaseutils.py
+++ b/dev/create-release/releaseutils.py
@@ -110,7 +110,7 @@ def __str__(self):
 # Under the hood, this runs a `git log` on that tag and parses the fields
 # from the command output to construct a list of Commit objects. Note that
 # because certain fields reside in the commit description and cannot be parsed
-# through the Github API itself, we need to do some intelligent regex parsing
+# through the GitHub API itself, we need to do some intelligent regex parsing
 # to extract those fields.
 #
 # This is written using Git 1.8.5.
@@ -140,7 +140,7 @@ def get_commits(tag):
             sys.exit("Unexpected format in commit: %s" % commit_digest)
         [_hash, author, title] = commit_digest.split(field_end_marker)
         # The PR number and github username is in the commit message
-        # itself and cannot be accessed through any Github API
+        # itself and cannot be accessed through any GitHub API
         pr_number = None
         match = re.search("Closes #([0-9]+) from ([^/\\s]+)/", commit_body)
         if match:
@@ -252,7 +252,7 @@ def nice_join(str_list):
         return ", ".join(str_list[:-1]) + ", and " + str_list[-1]
 
 
-# Return the full name of the specified user on Github
+# Return the full name of the specified user on GitHub
 # If the user doesn't exist, return None
 def get_github_name(author, github_client):
     if github_client:
diff --git a/dev/create-release/translate-contributors.py b/dev/create-release/translate-contributors.py
index 8340266527fc6..be5611ce65a7d 100755
--- a/dev/create-release/translate-contributors.py
+++ b/dev/create-release/translate-contributors.py
@@ -17,7 +17,7 @@
 
 # This script translates invalid authors in the contributors list generated
 # by generate-contributors.py. When the script encounters an author name that
-# is considered invalid, it searches Github and JIRA in an attempt to search
+# is considered invalid, it searches GitHub and JIRA in an attempt to search
 # for replacements. This tool runs in two modes:
 #
 # (1) Interactive mode: For each invalid author name, this script presents
@@ -68,7 +68,7 @@
 if INTERACTIVE_MODE:
     print("Running in interactive mode. To disable this, provide the --non-interactive flag.")
 
-# Setup Github and JIRA clients
+# Setup GitHub and JIRA clients
 jira_options = {"server": JIRA_API_BASE}
 jira_client = JIRA(options=jira_options, basic_auth=(JIRA_USERNAME, JIRA_PASSWORD))
 github_client = Github(GITHUB_API_TOKEN)
@@ -89,11 +89,11 @@
 
 # Generate candidates for the given author. This should only be called if the given author
 # name does not represent a full name as this operation is somewhat expensive. Under the
-# hood, it makes several calls to the Github and JIRA API servers to find the candidates.
+# hood, it makes several calls to the GitHub and JIRA API servers to find the candidates.
 #
 # This returns a list of (candidate name, source) 2-tuples. E.g.
 # [
-#   (NOT_FOUND, "No full name found for Github user andrewor14"),
+#   (NOT_FOUND, "No full name found for GitHub user andrewor14"),
 #   ("Andrew Or", "Full name of JIRA user andrewor14"),
 #   ("Andrew Orso", "Full name of SPARK-1444 assignee andrewor14"),
 #   ("Andrew Ordall", "Full name of SPARK-1663 assignee andrewor14"),
@@ -104,12 +104,12 @@
 
 def generate_candidates(author, issues):
     candidates = []
-    # First check for full name of Github user
+    # First check for full name of GitHub user
     github_name = get_github_name(author, github_client)
     if github_name:
-        candidates.append((github_name, "Full name of Github user %s" % author))
+        candidates.append((github_name, "Full name of GitHub user %s" % author))
     else:
-        candidates.append((NOT_FOUND, "No full name found for Github user %s" % author))
+        candidates.append((NOT_FOUND, "No full name found for GitHub user %s" % author))
     # Then do the same for JIRA user
     jira_name = get_jira_name(author, jira_client)
     if jira_name:
@@ -151,7 +151,7 @@ def generate_candidates(author, issues):
         candidates[i] = (candidate, source)
     return candidates
 
-# Translate each invalid author by searching for possible candidates from Github and JIRA
+# Translate each invalid author by searching for possible candidates from GitHub and JIRA
 # In interactive mode, this script presents the user with a list of choices and have the user
 # select from this list. Additionally, the user may also choose to enter a custom name.
 # In non-interactive mode, this script picks the first valid author name from the candidates
@@ -180,12 +180,12 @@ def generate_candidates(author, issues):
         issues = temp_author.split("/")[1:]
         candidates = generate_candidates(author, issues)
         # Print out potential replacement candidates along with the sources, e.g.
-        #   [X] No full name found for Github user andrewor14
+        #   [X] No full name found for GitHub user andrewor14
         #   [X] No assignee found for SPARK-1763
         #   [0] Andrew Or - Full name of JIRA user andrewor14
         #   [1] Andrew Orso - Full name of SPARK-1444 assignee andrewor14
         #   [2] Andrew Ordall - Full name of SPARK-1663 assignee andrewor14
-        #   [3] andrewor14 - Raw Github username
+        #   [3] andrewor14 - Raw GitHub username
         #   [4] Custom
         candidate_names = []
         bad_prompts = []  # Prompts that can't actually be selected; print these first.
@@ -207,7 +207,7 @@ def generate_candidates(author, issues):
             print(p)
         # In interactive mode, additionally provide "custom" option and await user response
         if INTERACTIVE_MODE:
-            print("    [%d] %s - Raw Github username" % (raw_index, author))
+            print("    [%d] %s - Raw GitHub username" % (raw_index, author))
             print("    [%d] Custom" % custom_index)
             response = raw_input("    Your choice: ")
             last_index = custom_index
diff --git a/dev/github_jira_sync.py b/dev/github_jira_sync.py
index 9bcebaa22ab86..27451bba905dd 100755
--- a/dev/github_jira_sync.py
+++ b/dev/github_jira_sync.py
@@ -16,7 +16,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-# Utility for updating JIRA's with information about Github pull requests
+# Utility for updating JIRA's with information about GitHub pull requests
 
 import json
 import os
@@ -142,9 +142,9 @@ def reset_pr_labels(pr_num, jira_components):
 jira_prs = get_jira_prs()
 
 previous_max = get_max_pr()
-print("Retrieved %s JIRA PR's from Github" % len(jira_prs))
+print("Retrieved %s JIRA PR's from GitHub" % len(jira_prs))
 jira_prs = [(k, v) for k, v in jira_prs if int(v['number']) > previous_max]
-print("%s PR's remain after excluding visted ones" % len(jira_prs))
+print("%s PR's remain after excluding visited ones" % len(jira_prs))
 
 num_updates = 0
 considered = []
@@ -157,7 +157,7 @@ def reset_pr_labels(pr_num, jira_components):
     considered = considered + [pr_num]
 
     url = pr['html_url']
-    title = "[Github] Pull Request #%s (%s)" % (pr['number'], pr['user']['login'])
+    title = "[GitHub] Pull Request #%s (%s)" % (pr['number'], pr['user']['login'])
     try:
         page = get_json(get_url(JIRA_API_BASE + "/rest/api/2/issue/" + issue + "/remotelink"))
         existing_links = map(lambda l: l['object']['url'], page)
@@ -174,7 +174,7 @@ def reset_pr_labels(pr_num, jira_components):
     destination = {"title": title, "url": url, "icon": icon}
     # For all possible fields see:
     # https://developer.atlassian.com/display/JIRADEV/Fields+in+Remote+Issue+Links
-    # application = {"name": "Github pull requests", "type": "org.apache.spark.jira.github"}
+    # application = {"name": "GitHub pull requests", "type": "org.apache.spark.jira.github"}
     jira_client.add_remote_link(issue, destination)
 
     comment = "User '%s' has created a pull request for this issue:" % pr['user']['login']
diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py
index 610fb1fd27027..4309a74773e89 100755
--- a/dev/run-tests-jenkins.py
+++ b/dev/run-tests-jenkins.py
@@ -38,7 +38,7 @@ def print_err(msg):
 
 
 def post_message_to_github(msg, ghprb_pull_id):
-    print("Attempting to post to Github...")
+    print("Attempting to post to GitHub...")
 
     api_url = os.getenv("GITHUB_API_BASE", "https://api.github.com/repos/apache/spark")
     url = api_url + "/issues/" + ghprb_pull_id + "/comments"
@@ -57,12 +57,12 @@ def post_message_to_github(msg, ghprb_pull_id):
         if response.getcode() == 201:
             print(" > Post successful.")
     except HTTPError as http_e:
-        print_err("Failed to post message to Github.")
+        print_err("Failed to post message to GitHub.")
         print_err(" > http_code: %s" % http_e.code)
         print_err(" > api_response: %s" % http_e.read())
         print_err(" > data: %s" % posted_message)
     except URLError as url_e:
-        print_err("Failed to post message to Github.")
+        print_err("Failed to post message to GitHub.")
         print_err(" > urllib_status: %s" % url_e.reason[1])
         print_err(" > data: %s" % posted_message)
 
@@ -89,7 +89,7 @@ def run_pr_checks(pr_tests, ghprb_actual_commit, sha1):
     """
     Executes a set of pull request checks to ease development and report issues with various
     components such as style, linting, dependencies, compatibilities, etc.
-    @return a list of messages to post back to Github
+    @return a list of messages to post back to GitHub
     """
     # Ensure we save off the current HEAD to revert to
     current_pr_head = run_cmd(['git', 'rev-parse', 'HEAD'], return_output=True).strip()
@@ -109,7 +109,7 @@ def run_tests(tests_timeout):
     """
     Runs the `dev/run-tests` script and responds with the correct error message
     under the various failure scenarios.
-    @return a tuple containing the test result code and the result note to post to Github
+    @return a tuple containing the test result code and the result note to post to GitHub
     """
 
     test_result_code = subprocess.Popen(['timeout',
@@ -198,16 +198,16 @@ def main():
     # To write a PR test:
     #   * the file must reside within the dev/tests directory
     #   * be an executable bash script
-    #   * accept three arguments on the command line, the first being the Github PR long commit
-    #     hash, the second the Github SHA1 hash, and the final the current PR hash
+    #   * accept three arguments on the command line, the first being the GitHub PR long commit
+    #     hash, the second the GitHub SHA1 hash, and the final the current PR hash
     #   * and, lastly, return string output to be included in the pr message output that will
-    #     be posted to Github
+    #     be posted to GitHub
     pr_tests = [
         "pr_merge_ability",
         "pr_public_classes"
     ]
 
-    # `bind_message_base` returns a function to generate messages for Github posting
+    # `bind_message_base` returns a function to generate messages for GitHub posting
     github_message = functools.partial(pr_message,
                                        build_display_name,
                                        build_url,
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 6bc73ca3669f3..37a15a758d898 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -642,7 +642,7 @@ def main():
         # /home/jenkins/anaconda2/envs/py36/bin
         os.environ["PATH"] = "/home/anaconda/envs/py36/bin:" + os.environ.get("PATH")
     else:
-        # else we're running locally or Github Actions.
+        # else we're running locally or GitHub Actions.
         build_tool = "sbt"
         hadoop_version = os.environ.get("HADOOP_PROFILE", "hadoop3.2")
         hive_version = os.environ.get("HIVE_PROFILE", "hive2.3")
@@ -660,12 +660,12 @@ def main():
     included_tags = []
     excluded_tags = []
     if should_only_test_modules:
-        # If we're running the tests in Github Actions, attempt to detect and test
+        # If we're running the tests in GitHub Actions, attempt to detect and test
         # only the affected modules.
         if test_env == "github_actions":
             if os.environ["GITHUB_INPUT_BRANCH"] != "":
                 # Dispatched request
-                # Note that it assumes Github Actions has already merged
+                # Note that it assumes GitHub Actions has already merged
                 # the given `GITHUB_INPUT_BRANCH` branch.
                 changed_files = identify_changed_files_from_git_commits(
                     "HEAD", target_branch=os.environ["GITHUB_SHA"])
diff --git a/dev/tests/pr_merge_ability.sh b/dev/tests/pr_merge_ability.sh
index 25fdbccac4dd8..a32667730f76c 100755
--- a/dev/tests/pr_merge_ability.sh
+++ b/dev/tests/pr_merge_ability.sh
@@ -22,7 +22,7 @@
 # another branch and returning results to be published. More details can be
 # found at dev/run-tests-jenkins.
 #
-# Arg1: The Github Pull Request Actual Commit
+# Arg1: The GitHub Pull Request Actual Commit
 # known as `ghprbActualCommit` in `run-tests-jenkins`
 # Arg2: The SHA1 hash
 # known as `sha1` in `run-tests-jenkins`
diff --git a/dev/tests/pr_public_classes.sh b/dev/tests/pr_public_classes.sh
index 479d1851fe0b8..ad1ad5e736594 100755
--- a/dev/tests/pr_public_classes.sh
+++ b/dev/tests/pr_public_classes.sh
@@ -22,7 +22,7 @@
 # another branch and returning results to be published. More details can be
 # found at dev/run-tests-jenkins.
 #
-# Arg1: The Github Pull Request Actual Commit
+# Arg1: The GitHub Pull Request Actual Commit
 # known as `ghprbActualCommit` in `run-tests-jenkins`
 
 ghprbActualCommit="$1"
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 98769d951b6ac..5a66bfca27a27 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -1729,7 +1729,7 @@ object MimaExcludes {
       ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.numTrees"),
       ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.ml.regression.RandomForestRegressionModel.setFeatureSubsetStrategy")
     ) ++ Seq(
-      // [SPARK-21680][ML][MLLIB]optimzie Vector coompress
+      // [SPARK-21680][ML][MLLIB]optimize Vector compress
       ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.mllib.linalg.Vector.toSparseWithSize"),
       ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.ml.linalg.Vector.toSparseWithSize")
     ) ++ Seq(
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 05413b7091ad9..a5951e0452943 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -198,7 +198,7 @@ object SparkBuild extends PomBuild {
   )
 
   // Silencer: Scala compiler plugin for warning suppression
-  // Aim: enable fatal warnings, but supress ones related to using of deprecated APIs
+  // Aim: enable fatal warnings, but suppress ones related to using of deprecated APIs
   // depends on scala version:
   // <2.13 - silencer 1.6.0 and compiler settings to enable fatal warnings
   // 2.13.0,2.13.1 - silencer 1.7.1 and compiler settings to enable fatal warnings
@@ -222,7 +222,7 @@ object SparkBuild extends PomBuild {
           "-Xfatal-warnings",
           "-deprecation",
           "-Ywarn-unused-import",
-          "-P:silencer:globalFilters=.*deprecated.*" //regex to catch deprecation warnings and supress them
+          "-P:silencer:globalFilters=.*deprecated.*" //regex to catch deprecation warnings and suppress them
         )
       } else {
         Seq(
@@ -327,7 +327,7 @@ object SparkBuild extends PomBuild {
     // to be enabled in specific ones that have previous artifacts
     MimaKeys.mimaFailOnNoPrevious := false,
 
-    // To prevent intermittent compliation failures, see also SPARK-33297
+    // To prevent intermittent compilation failures, see also SPARK-33297
     // Apparently we can remove this when we use JDK 11.
     Test / classLoaderLayeringStrategy := ClassLoaderLayeringStrategy.Flat
   )
diff --git a/python/docs/source/_static/css/pyspark.css b/python/docs/source/_static/css/pyspark.css
index 2fd8720e2fa0d..1e493c4c868e6 100644
--- a/python/docs/source/_static/css/pyspark.css
+++ b/python/docs/source/_static/css/pyspark.css
@@ -51,7 +51,7 @@ h3 {
     max-width: 80%;
 }
 
-/* Left pannel size */
+/* Left panel size */
 @media (min-width: 768px) {
     .col-md-3 {
         flex: 0 0 20%;
diff --git a/python/docs/source/_templates/autosummary/class.rst b/python/docs/source/_templates/autosummary/class.rst
index d794f797ee2ad..b5f62677ee0ed 100644
--- a/python/docs/source/_templates/autosummary/class.rst
+++ b/python/docs/source/_templates/autosummary/class.rst
@@ -15,7 +15,7 @@
     specific language governing permissions and limitations
     under the License.
 
-.. Workaround to avoud documenting __init__.
+.. Workaround to avoid documenting __init__.
 
 {% extends "!autosummary/class.rst" %}
 
diff --git a/python/docs/source/development/debugging.rst b/python/docs/source/development/debugging.rst
index bc141a6f44a6f..829919858f67a 100644
--- a/python/docs/source/development/debugging.rst
+++ b/python/docs/source/development/debugging.rst
@@ -54,7 +54,7 @@ Enter the name of this new configuration, for example, ``MyRemoteDebugger`` and
 .. image:: ../../../../docs/img/pyspark-remote-debug1.png
     :alt: PyCharm remote debugger setting
 
-| After that, you should install the corresponding version of the ``pydevd-pycahrm`` package in all the machines which will connect to your PyCharm debugger. In the previous dialog, it shows the command to install.
+| After that, you should install the corresponding version of the ``pydevd-pycharm`` package in all the machines which will connect to your PyCharm debugger. In the previous dialog, it shows the command to install.
 
 .. code-block:: text
 
diff --git a/python/docs/source/development/testing.rst b/python/docs/source/development/testing.rst
index 08fd730a19f4b..3eab8d04511d6 100644
--- a/python/docs/source/development/testing.rst
+++ b/python/docs/source/development/testing.rst
@@ -53,5 +53,5 @@ Running tests using GitHub Actions
 ----------------------------------
 
 You can run the full PySpark tests by using GitHub Actions in your own forked GitHub
-repositry with a few clicks. Please refer to
+repository with a few clicks. Please refer to
 `Running tests in your forked repository using GitHub Actions <https://spark.apache.org/developer-tools.html>`_ for more details.
diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst
index 9c9ff7fa7844b..a90f5fe159553 100644
--- a/python/docs/source/getting_started/install.rst
+++ b/python/docs/source/getting_started/install.rst
@@ -42,7 +42,7 @@ PySpark installation using `PyPI <https://pypi.org/project/pyspark/>`_ is as fol
 
     pip install pyspark
 
-If you want to install extra dependencies for a specific componenet, you can install it as below:
+If you want to install extra dependencies for a specific component, you can install it as below:
 
 .. code-block:: bash
 
@@ -105,7 +105,7 @@ Now activate the newly created environment with the following command:
     conda activate pyspark_env
 
 You can install pyspark by `Using PyPI <#using-pypi>`_ to install PySpark in the newly created
-environment, for example as below. It will install PySpark under the new virtual environemnt
+environment, for example as below. It will install PySpark under the new virtual environment
 ``pyspark_env`` created above.
 
 .. code-block:: bash
@@ -126,7 +126,7 @@ Manually Downloading
 --------------------
 
 PySpark is included in the distributions available at the `Apache Spark website <https://spark.apache.org/downloads.html>`_.
-You can download a distribution you want from the site. After that, uncompress the tar file into the directoy where you want
+You can download a distribution you want from the site. After that, uncompress the tar file into the directory where you want
 to install Spark, for example, as below:
 
 .. code-block:: bash
diff --git a/python/docs/source/getting_started/quickstart.ipynb b/python/docs/source/getting_started/quickstart.ipynb
index ab3645591955f..550b532fefc14 100644
--- a/python/docs/source/getting_started/quickstart.ipynb
+++ b/python/docs/source/getting_started/quickstart.ipynb
@@ -11,7 +11,7 @@
     "\n",
     "There is also other useful information in Apache Spark documentation site, see the latest version of [Spark SQL and DataFrames](https://spark.apache.org/docs/latest/sql-programming-guide.html), [RDD Programming Guide](https://spark.apache.org/docs/latest/rdd-programming-guide.html), [Structured Streaming Programming Guide](https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html), [Spark Streaming Programming Guide](https://spark.apache.org/docs/latest/streaming-programming-guide.html) and [Machine Learning Library (MLlib) Guide](https://spark.apache.org/docs/latest/ml-guide.html).\n",
     "\n",
-    "PySaprk applications start with initializing `SparkSession` which is the entry point of PySpark as below. In case of running it in PySpark shell via <code>pyspark</code> executable, the shell automatically creates the session in the variable <code>spark</code> for users."
+    "PySpark applications start with initializing `SparkSession` which is the entry point of PySpark as below. In case of running it in PySpark shell via <code>pyspark</code> executable, the shell automatically creates the session in the variable <code>spark</code> for users."
    ]
   },
   {
@@ -392,7 +392,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "`DataFrame.collect()` collects the distributed data to the driver side as the local data in Python. Note that this can throw an out-of-memory error when the dataset is too larget to fit in the driver side because it collects all the data from executors to the driver side."
+    "`DataFrame.collect()` collects the distributed data to the driver side as the local data in Python. Note that this can throw an out-of-memory error when the dataset is too large to fit in the driver side because it collects all the data from executors to the driver side."
    ]
   },
   {
diff --git a/python/docs/source/index.rst b/python/docs/source/index.rst
index 4286f616374c5..6a631052a642d 100644
--- a/python/docs/source/index.rst
+++ b/python/docs/source/index.rst
@@ -30,7 +30,7 @@ of Spark's features such as Spark SQL, DataFrame, Streaming, MLlib
 (Machine Learning) and Spark Core.
 
 .. image:: ../../../docs/img/pyspark-components.png
-  :alt: PySpark Compoenents
+  :alt: PySpark Components
 
 **Spark SQL and DataFrame**
 
diff --git a/python/pyspark/__init__.pyi b/python/pyspark/__init__.pyi
index 98bd40684c01b..ef07c32b1db7b 100644
--- a/python/pyspark/__init__.pyi
+++ b/python/pyspark/__init__.pyi
@@ -53,7 +53,7 @@ from pyspark.taskcontext import (  # noqa: F401
 )
 from pyspark.util import InheritableThread as InheritableThread  # noqa: F401
 
-# Compatiblity imports
+# Compatibility imports
 from pyspark.sql import (  # noqa: F401
     SQLContext as SQLContext,
     HiveContext as HiveContext,
diff --git a/python/pyspark/cloudpickle/cloudpickle.py b/python/pyspark/cloudpickle/cloudpickle.py
index 8e683e7a6988b..58c274bd79720 100644
--- a/python/pyspark/cloudpickle/cloudpickle.py
+++ b/python/pyspark/cloudpickle/cloudpickle.py
@@ -88,7 +88,7 @@ def g():
 DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL
 
 # Track the provenance of reconstructed dynamic classes to make it possible to
-# recontruct instances from the matching singleton class definition when
+# reconstruct instances from the matching singleton class definition when
 # appropriate and preserve the usual "isinstance" semantics of Python objects.
 _DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary()
 _DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary()
@@ -236,7 +236,7 @@ def _extract_code_globals(co):
         out_names = {names[oparg] for _, oparg in _walk_global_ops(co)}
 
         # Declaring a function inside another one using the "def ..."
-        # syntax generates a constant code object corresonding to the one
+        # syntax generates a constant code object corresponding to the one
         # of the nested function's As the nested function may itself need
         # global variables, we need to introspect its code, extract its
         # globals, (look for code object in it's co_consts attribute..) and
@@ -457,7 +457,7 @@ def _is_parametrized_type_hint(obj):
         is_typing = getattr(obj, '__origin__', None) is not None
 
         # typing_extensions.Literal
-        is_litteral = getattr(obj, '__values__', None) is not None
+        is_literal = getattr(obj, '__values__', None) is not None
 
         # typing_extensions.Final
         is_final = getattr(obj, '__type__', None) is not None
@@ -469,7 +469,7 @@ def _is_parametrized_type_hint(obj):
             getattr(obj, '__result__', None) is not None and
             getattr(obj, '__args__', None) is not None
         )
-        return any((is_typing, is_litteral, is_final, is_union, is_tuple,
+        return any((is_typing, is_literal, is_final, is_union, is_tuple,
                     is_callable))
 
     def _create_parametrized_type_hint(origin, args):
@@ -699,7 +699,7 @@ def _make_skel_func(code, cell_count, base_globals=None):
     """
     # This function is deprecated and should be removed in cloudpickle 1.7
     warnings.warn(
-        "A pickle file created using an old (<=1.4.1) version of cloudpicke "
+        "A pickle file created using an old (<=1.4.1) version of cloudpickle "
         "is currently being loaded. This is not supported by cloudpickle and "
         "will break in cloudpickle 1.7", category=UserWarning
     )
diff --git a/python/pyspark/cloudpickle/cloudpickle_fast.py b/python/pyspark/cloudpickle/cloudpickle_fast.py
index e8e46b88fdc91..3c48ff7b0a885 100644
--- a/python/pyspark/cloudpickle/cloudpickle_fast.py
+++ b/python/pyspark/cloudpickle/cloudpickle_fast.py
@@ -6,7 +6,7 @@
 is only available for Python versions 3.8+, a lot of backward-compatibility
 code is also removed.
 
-Note that the C Pickler sublassing API is CPython-specific. Therefore, some
+Note that the C Pickler subclassing API is CPython-specific. Therefore, some
 guards present in cloudpickle.py that were written to handle PyPy specificities
 are not present in cloudpickle_fast.py
 """
@@ -179,7 +179,7 @@ def _class_getstate(obj):
     clsdict.pop('__weakref__', None)
 
     if issubclass(type(obj), abc.ABCMeta):
-        # If obj is an instance of an ABCMeta subclass, dont pickle the
+        # If obj is an instance of an ABCMeta subclass, don't pickle the
         # cache/negative caches populated during isinstance/issubclass
         # checks, but pickle the list of registered subclasses of obj.
         clsdict.pop('_abc_cache', None)
@@ -407,7 +407,7 @@ def _class_reduce(obj):
 
 
 def _function_setstate(obj, state):
-    """Update the state of a dynaamic function.
+    """Update the state of a dynamic function.
 
     As __closure__ and __globals__ are readonly attributes of a function, we
     cannot rely on the native setstate routine of pickle.load_build, that calls
@@ -556,7 +556,7 @@ def dump(self, obj):
         # `dispatch` attribute.  Earlier versions of the protocol 5 CloudPickler
         # used `CloudPickler.dispatch` as a class-level attribute storing all
         # reducers implemented by cloudpickle, but the attribute name was not a
-        # great choice given the meaning of `Cloudpickler.dispatch` when
+        # great choice given the meaning of `CloudPickler.dispatch` when
         # `CloudPickler` extends the pure-python pickler.
         dispatch = dispatch_table
 
@@ -630,7 +630,7 @@ def reducer_override(self, obj):
                 return self._function_reduce(obj)
             else:
                 # fallback to save_global, including the Pickler's
-                # distpatch_table
+                # dispatch_table
                 return NotImplemented
 
     else:
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 1bd5961e0525a..1c542fa897ece 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -260,7 +260,7 @@ def _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize,
                         sys.path.insert(1, filepath)
                 except Exception:
                     warnings.warn(
-                        "Failed to add file [%s] speficied in 'spark.submit.pyFiles' to "
+                        "Failed to add file [%s] specified in 'spark.submit.pyFiles' to "
                         "Python path:\n  %s" % (path, "\n  ".join(sys.path)),
                         RuntimeWarning)
 
@@ -603,7 +603,7 @@ def _serialize_to_jvm(self, data, serializer, reader_func, createRDDServer):
                     tempFile.close()
                 return reader_func(tempFile.name)
             finally:
-                # we eagerily reads the file so we can delete right after.
+                # we eagerly reads the file so we can delete right after.
                 os.unlink(tempFile.name)
 
     def pickleFile(self, name, minPartitions=None):
diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py
index fe2e326dff8be..cc0c3a8888a66 100644
--- a/python/pyspark/java_gateway.py
+++ b/python/pyspark/java_gateway.py
@@ -208,7 +208,7 @@ def local_connect_and_auth(port, auth_secret):
             return (sockfile, sock)
         except socket.error as e:
             emsg = str(e)
-            errors.append("tried to connect to %s, but an error occured: %s" % (sa, emsg))
+            errors.append("tried to connect to %s, but an error occurred: %s" % (sa, emsg))
             sock.close()
             sock = None
     raise Exception("could not open socket: %s" % errors)
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 82b9a6db1eb92..8138f34d7a19e 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -5798,7 +5798,7 @@ def setHandleInvalid(self, value):
 class _VarianceThresholdSelectorParams(HasFeaturesCol, HasOutputCol):
     """
     Params for :py:class:`VarianceThresholdSelector` and
-    :py:class:`VarianceThresholdSelectorrModel`.
+    :py:class:`VarianceThresholdSelectorModel`.
 
     .. versionadded:: 3.1.0
     """
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 5ce484d964a5a..d37654a7388f5 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -1491,7 +1491,7 @@ def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="
                   maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0,
                   maxMemoryInMB=256, cacheNodeIds=False, subsamplingRate=1.0,
                   checkpointInterval=10, lossType="squared", maxIter=20, stepSize=0.1, seed=None,
-                  impuriy="variance", featureSubsetStrategy="all", validationTol=0.01,
+                  impurity="variance", featureSubsetStrategy="all", validationTol=0.01,
                   validationIndicatorCol=None, leafCol="", minWeightFractionPerNode=0.0,
                   weightCol=None):
         """
diff --git a/python/pyspark/ml/regression.pyi b/python/pyspark/ml/regression.pyi
index b8f1e61859c72..61172305a3726 100644
--- a/python/pyspark/ml/regression.pyi
+++ b/python/pyspark/ml/regression.pyi
@@ -477,7 +477,7 @@ class GBTRegressor(
         maxIter: int = ...,
         stepSize: float = ...,
         seed: Optional[int] = ...,
-        impuriy: str = ...,
+        impurity: str = ...,
         featureSubsetStrategy: str = ...,
         validationTol: float = ...,
         validationIndicatorCol: Optional[str] = ...,
diff --git a/python/pyspark/ml/tests/test_algorithms.py b/python/pyspark/ml/tests/test_algorithms.py
index f8b61b7c57919..50475210607c8 100644
--- a/python/pyspark/ml/tests/test_algorithms.py
+++ b/python/pyspark/ml/tests/test_algorithms.py
@@ -116,7 +116,7 @@ def test_output_columns(self):
         output = model.transform(df)
         self.assertEqual(output.columns, ["label", "features", "rawPrediction", "prediction"])
 
-    def test_parallelism_doesnt_change_output(self):
+    def test_parallelism_does_not_change_output(self):
         df = self.spark.createDataFrame([(0.0, Vectors.dense(1.0, 0.8)),
                                          (1.0, Vectors.sparse(2, [], [])),
                                          (2.0, Vectors.dense(0.5, 0.5))],
diff --git a/python/pyspark/ml/tests/test_image.py b/python/pyspark/ml/tests/test_image.py
index ceecdae971c99..1001598779d48 100644
--- a/python/pyspark/ml/tests/test_image.py
+++ b/python/pyspark/ml/tests/test_image.py
@@ -33,7 +33,7 @@ def test_read_images(self):
         self.assertEqual(df.count(), 4)
         first_row = df.take(1)[0][0]
         # compare `schema.simpleString()` instead of directly compare schema,
-        # because the df loaded from datasouce may change schema column nullability.
+        # because the df loaded from datasource may change schema column nullability.
         self.assertEqual(df.schema.simpleString(), ImageSchema.imageSchema.simpleString())
         self.assertEqual(df.schema["image"].dataType.simpleString(),
                          ImageSchema.columnSchema.simpleString())
diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py
index e1a009643c5f2..cfc18c057f0a8 100644
--- a/python/pyspark/mllib/clustering.py
+++ b/python/pyspark/mllib/clustering.py
@@ -927,7 +927,7 @@ def setInitialCenters(self, centers, weights):
     @since('1.5.0')
     def setRandomCenters(self, dim, weight, seed):
         """
-        Set the initial centres to be random samples from
+        Set the initial centers to be random samples from
         a gaussian population with constant weights.
         """
         rng = random.RandomState(seed)
diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py
index 198a9791774a9..2f25c7672a93a 100644
--- a/python/pyspark/mllib/evaluation.py
+++ b/python/pyspark/mllib/evaluation.py
@@ -457,7 +457,7 @@ def meanAveragePrecision(self):
         """
         Returns the mean average precision (MAP) of all the queries.
         If a query has an empty ground truth set, the average precision will be zero and
-        a log warining is generated.
+        a log warning is generated.
         """
         return self.call("meanAveragePrecision")
 
@@ -466,7 +466,7 @@ def meanAveragePrecisionAt(self, k):
         """
         Returns the mean average precision (MAP) at first k ranking of all the queries.
         If a query has an empty ground truth set, the average precision will be zero and
-        a log warining is generated.
+        a log warning is generated.
         """
         return self.call("meanAveragePrecisionAt", int(k))
 
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index e549b0ac43721..c224e38473cf6 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -787,7 +787,7 @@ def _validate(self, dstream):
                 "dstream should be a DStream object, got %s" % type(dstream))
         if not self._model:
             raise ValueError(
-                "Model must be intialized using setInitialWeights")
+                "Model must be initialized using setInitialWeights")
 
     def predictOn(self, dstream):
         """
diff --git a/python/pyspark/mllib/stat/_statistics.py b/python/pyspark/mllib/stat/_statistics.py
index a4b45cf55febe..d8f3cb840e45c 100644
--- a/python/pyspark/mllib/stat/_statistics.py
+++ b/python/pyspark/mllib/stat/_statistics.py
@@ -178,7 +178,7 @@ def chiSqTest(observed, expected=None):
         """
         If `observed` is Vector, conduct Pearson's chi-squared goodness
         of fit test of the observed data against the expected distribution,
-        or againt the uniform distribution (by default), with each category
+        or against the uniform distribution (by default), with each category
         having an expected frequency of `1 / len(observed)`.
 
         If `observed` is matrix, conduct Pearson's independence test on the
diff --git a/python/pyspark/mllib/tests/test_streaming_algorithms.py b/python/pyspark/mllib/tests/test_streaming_algorithms.py
index b94fb2778d88d..f6c6779e83f13 100644
--- a/python/pyspark/mllib/tests/test_streaming_algorithms.py
+++ b/python/pyspark/mllib/tests/test_streaming_algorithms.py
@@ -189,7 +189,7 @@ def generateLogisticInput(offset, scale, nPoints, seed):
         Generate 1 / (1 + exp(-x * scale + offset))
 
         where,
-        x is randomnly distributed and the threshold
+        x is randomly distributed and the threshold
         and labels for each sample in x is obtained from a random uniform
         distribution.
         """
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 1964070040cdf..34faaacff5eb3 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -1253,7 +1253,7 @@ def histogram(self, buckets):
         and 50 we would have a histogram of 1,0,1.
 
         If your histogram is evenly spaced (e.g. [0, 10, 20, 30]),
-        this can be switched from an O(log n) inseration to O(1) per
+        this can be switched from an O(log n) insertion to O(1) per
         element (where n is the number of buckets).
 
         Buckets must be sorted, not contain any duplicates, and have
@@ -2292,7 +2292,7 @@ def groupWith(self, other, *others):
         """
         return python_cogroup((self, other) + others, numPartitions=None)
 
-    # TODO: add variant with custom parittioner
+    # TODO: add variant with custom partitioner
     def cogroup(self, other, numPartitions=None):
         """
         For each key k in `self` or `other`, return a resulting RDD that
diff --git a/python/pyspark/resource/requests.py b/python/pyspark/resource/requests.py
index 74d26d04312c4..4deb22b5948f0 100644
--- a/python/pyspark/resource/requests.py
+++ b/python/pyspark/resource/requests.py
@@ -189,7 +189,7 @@ def requests(self):
 
 class TaskResourceRequest(object):
     """
-    A task resource request. This is used in conjuntion with the
+    A task resource request. This is used in conjunction with the
     :class:`pyspark.resource.ResourceProfile` to programmatically specify the resources
     needed for an RDD that will be applied at the stage level. The amount is specified
     as a Double to allow for saying you want more than 1 task per resource. Valid values
@@ -226,7 +226,7 @@ def amount(self):
 class TaskResourceRequests(object):
 
     """
-    A set of task resource requests. This is used in conjuntion with the
+    A set of task resource requests. This is used in conjunction with the
     :class:`pyspark.resource.ResourceProfileBuilder` to programmatically specify the resources
     needed for an RDD that will be applied at the stage level.
 
diff --git a/python/pyspark/shuffle.py b/python/pyspark/shuffle.py
index 89be6295f9888..4ba846227188c 100644
--- a/python/pyspark/shuffle.py
+++ b/python/pyspark/shuffle.py
@@ -418,7 +418,7 @@ def _cleanup(self):
 
 class ExternalSorter(object):
     """
-    ExtenalSorter will divide the elements into chunks, sort them in
+    ExternalSorter will divide the elements into chunks, sort them in
     memory and dump them into disks, finally merge them back.
 
     The spilling will only happen when the used memory goes above
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 345e81bd2d73e..760805400aca9 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -425,7 +425,7 @@ def dropFields(self, *fieldNames):
         +--------------+
 
         However, if you are going to add/replace multiple nested fields,
-        it is preffered to extract out the nested struct before
+        it is preferred to extract out the nested struct before
         adding/replacing multiple fields e.g.
 
         >>> df.select(col("a").withField(
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 9fae27a2d9c6c..fe7d26d1bcfd2 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -1497,7 +1497,7 @@ def summary(self, *statistics):
         - stddev
         - min
         - max
-        - arbitrary approximate percentiles specified as a percentage (eg, 75%)
+        - arbitrary approximate percentiles specified as a percentage (e.g., 75%)
 
         If no statistics are given, this function computes count, mean, stddev, min,
         approximate quartiles (percentiles at 25%, 50%, and 75%), and max.
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index ea91e8593e21f..4dc3129fd6bc2 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1300,7 +1300,7 @@ def spark_partition_id():
 
     Notes
     -----
-    This is indeterministic because it depends on data partitioning and task scheduling.
+    This is non deterministic because it depends on data partitioning and task scheduling.
 
     Examples
     --------
@@ -4110,7 +4110,7 @@ def _get_lambda_parameters(f):
     # We should exclude functions that use
     # variable args and keyword argnames
     # as well as keyword only args
-    supported_parmeter_types = {
+    supported_parameter_types = {
         inspect.Parameter.POSITIONAL_OR_KEYWORD,
         inspect.Parameter.POSITIONAL_ONLY,
     }
@@ -4125,7 +4125,7 @@ def _get_lambda_parameters(f):
         )
 
     # and all arguments can be used as positional
-    if not all(p.kind in supported_parmeter_types for p in parameters):
+    if not all(p.kind in supported_parameter_types for p in parameters):
         raise ValueError(
             "f should use only POSITIONAL or POSITIONAL OR KEYWORD arguments"
         )
@@ -4640,7 +4640,7 @@ def years(col):
 
     Notes
     -----
-    This function can be used only in combinatiion with
+    This function can be used only in combination with
     :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy`
     method of the `DataFrameWriterV2`.
 
@@ -4664,7 +4664,7 @@ def months(col):
 
     Notes
     -----
-    This function can be used only in combinatiion with
+    This function can be used only in combination with
     :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy`
     method of the `DataFrameWriterV2`.
 
@@ -4688,7 +4688,7 @@ def days(col):
 
     Notes
     -----
-    This function can be used only in combinatiion with
+    This function can be used only in combination with
     :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy`
     method of the `DataFrameWriterV2`.
 
@@ -4712,7 +4712,7 @@ def hours(col):
 
     Notes
     -----
-    This function can be used only in combinatiion with
+    This function can be used only in combination with
     :py:meth:`~pyspark.sql.readwriter.DataFrameWriterV2.partitionedBy`
     method of the `DataFrameWriterV2`.
 
diff --git a/python/pyspark/sql/pandas/_typing/protocols/frame.pyi b/python/pyspark/sql/pandas/_typing/protocols/frame.pyi
index de679ee2cd017..9148e7a2dca8e 100644
--- a/python/pyspark/sql/pandas/_typing/protocols/frame.pyi
+++ b/python/pyspark/sql/pandas/_typing/protocols/frame.pyi
@@ -16,7 +16,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# This Protocol resuses core Pandas annotation.
+# This Protocol reuses core Pandas annotation.
 # Overall pipeline looks as follows
 # - Stubgen pandas.core.frame
 # - Add Protocol as a base class
diff --git a/python/pyspark/sql/pandas/_typing/protocols/series.pyi b/python/pyspark/sql/pandas/_typing/protocols/series.pyi
index 14babb067da0d..f2de2e8b129fd 100644
--- a/python/pyspark/sql/pandas/_typing/protocols/series.pyi
+++ b/python/pyspark/sql/pandas/_typing/protocols/series.pyi
@@ -16,7 +16,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# This Protocol resuses core Pandas annotation.
+# This Protocol reuses core Pandas annotation.
 # Overall pipeline looks as follows
 # - Stubgen pandas.core.series
 # - Add Protocol as a base class
diff --git a/python/pyspark/sql/pandas/functions.py b/python/pyspark/sql/pandas/functions.py
index 750aa4b0e6c56..4cd0b196d3366 100644
--- a/python/pyspark/sql/pandas/functions.py
+++ b/python/pyspark/sql/pandas/functions.py
@@ -99,7 +99,7 @@ def pandas_udf(f=None, returnType=None, functionType=None):
     ...     s3['col2'] = s1 + s2.str.len()
     ...     return s3
     ...
-    >>> # Create a Spark DataFrame that has three columns including a sturct column.
+    >>> # Create a Spark DataFrame that has three columns including a struct column.
     ... df = spark.createDataFrame(
     ...     [[1, "a string", ("a nested string",)]],
     ...     "long_col long, string_col string, struct_col struct<col1:string>")
@@ -114,7 +114,7 @@ def pandas_udf(f=None, returnType=None, functionType=None):
     |    |-- col1: string (nullable = true)
     |    |-- col2: long (nullable = true)
 
-    In the following sections, it describes the cominations of the supported type hints. For
+    In the following sections, it describes the combinations of the supported type hints. For
     simplicity, `pandas.DataFrame` variant is omitted.
 
     * Series to Series
diff --git a/python/pyspark/sql/tests/test_pandas_grouped_map.py b/python/pyspark/sql/tests/test_pandas_grouped_map.py
index ee68b95fc478d..a639a8d51f55c 100644
--- a/python/pyspark/sql/tests/test_pandas_grouped_map.py
+++ b/python/pyspark/sql/tests/test_pandas_grouped_map.py
@@ -484,7 +484,7 @@ def dummy_pandas_udf(df):
                                                  col('temp0.key') == col('temp1.key'))
         self.assertEquals(res.count(), 5)
 
-    def test_mixed_scalar_udfs_followed_by_grouby_apply(self):
+    def test_mixed_scalar_udfs_followed_by_groupby_apply(self):
         df = self.spark.range(0, 10).toDF('v1')
         df = df.withColumn('v2', udf(lambda x: x + 1, 'int')(df['v1'])) \
             .withColumn('v3', pandas_udf(lambda x: x + 2, 'int')(df['v1']))
diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py
index a7dcbfd32ac1c..9a1c0edcce4ed 100644
--- a/python/pyspark/sql/tests/test_udf.py
+++ b/python/pyspark/sql/tests/test_udf.py
@@ -459,7 +459,7 @@ def test_udf_with_string_return_type(self):
 
         self.assertTupleEqual(expected, actual)
 
-    def test_udf_shouldnt_accept_noncallable_object(self):
+    def test_udf_should_not_accept_noncallable_object(self):
         non_callable = None
         self.assertRaises(TypeError, UserDefinedFunction, non_callable, StringType())
 
@@ -683,7 +683,7 @@ def tearDown(self):
         if SparkContext._active_spark_context is not None:
             SparkContext._active_spark_context.stop()
 
-    def test_udf_init_shouldnt_initialize_context(self):
+    def test_udf_init_should_not_initialize_context(self):
         UserDefinedFunction(lambda x: x, StringType())
 
         self.assertIsNone(
diff --git a/python/pyspark/sql/utils.py b/python/pyspark/sql/utils.py
index 18f8ba29f95a2..f5db783d2b5bc 100644
--- a/python/pyspark/sql/utils.py
+++ b/python/pyspark/sql/utils.py
@@ -151,10 +151,10 @@ def toJArray(gateway, jtype, arr):
     arr :
         python type list
     """
-    jarr = gateway.new_array(jtype, len(arr))
+    jarray = gateway.new_array(jtype, len(arr))
     for i in range(0, len(arr)):
-        jarr[i] = arr[i]
-    return jarr
+        jarray[i] = arr[i]
+    return jarray
 
 
 def require_test_compiled():
diff --git a/python/pyspark/streaming/context.py b/python/pyspark/streaming/context.py
index c4dc0d3af3332..2e6d7ede88551 100644
--- a/python/pyspark/streaming/context.py
+++ b/python/pyspark/streaming/context.py
@@ -281,7 +281,7 @@ def socketTextStream(self, hostname, port, storageLevel=StorageLevel.MEMORY_AND_
     def textFileStream(self, directory):
         """
         Create an input stream that monitors a Hadoop-compatible file system
-        for new files and reads them as text files. Files must be wrriten to the
+        for new files and reads them as text files. Files must be written to the
         monitored directory by "moving" them from another location within the same
         file system. File names starting with . are ignored.
         The text files must be encoded as UTF-8.
diff --git a/python/pyspark/tests/test_context.py b/python/pyspark/tests/test_context.py
index d86f6c3c1571c..8397ef1c4b62d 100644
--- a/python/pyspark/tests/test_context.py
+++ b/python/pyspark/tests/test_context.py
@@ -175,8 +175,8 @@ def test_parallelize_eager_cleanup(self):
         with SparkContext() as sc:
             temp_files = os.listdir(sc._temp_dir)
             rdd = sc.parallelize([0, 1, 2])
-            post_parallalize_temp_files = os.listdir(sc._temp_dir)
-            self.assertEqual(temp_files, post_parallalize_temp_files)
+            post_parallelize_temp_files = os.listdir(sc._temp_dir)
+            self.assertEqual(temp_files, post_parallelize_temp_files)
 
     def test_set_conf(self):
         # This is for an internal use case. When there is an existing SparkContext,
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 1b09d327a5dfe..8ca4bb37e5fa4 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -59,7 +59,7 @@ def report_times(outfile, boot, init, finish):
 
 
 def add_path(path):
-    # worker can be used, so donot add path multiple times
+    # worker can be used, so do not add path multiple times
     if path not in sys.path:
         # overwrite system packages
         sys.path.insert(1, path)
diff --git a/python/test_support/userlibrary.py b/python/test_support/userlibrary.py
index 73fd26e71f10d..90cd30723ddfe 100755
--- a/python/test_support/userlibrary.py
+++ b/python/test_support/userlibrary.py
@@ -16,7 +16,7 @@
 #
 
 """
-Used to test shipping of code depenencies with SparkContext.addPyFile().
+Used to test shipping of code dependencies with SparkContext.addPyFile().
 """
 
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index e3af1ccc24f1c..41194f3a2676f 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -420,7 +420,7 @@ private[spark] object Config extends Logging {
   val KUBERNETES_FILE_UPLOAD_PATH =
     ConfigBuilder("spark.kubernetes.file.upload.path")
       .doc("Hadoop compatible file system path where files from the local file system " +
-        "will be uploded to in cluster mode.")
+        "will be uploaded to in cluster mode.")
       .version("3.0.0")
       .stringConf
       .createOptional
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreImpl.scala
index 3f2cb485bbb31..22764d9d2eb0e 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreImpl.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreImpl.scala
@@ -52,7 +52,7 @@ import org.apache.spark.util.ThreadUtils
  * time-windowed chunks. Each subscriber can choose to receive their snapshot chunks at different
  * time intervals.
  * <br>
- * The subcriber notification callback is guaranteed to be called from a single thread at a time.
+ * The subscriber notification callback is guaranteed to be called from a single thread at a time.
  */
 private[spark] class ExecutorPodsSnapshotsStoreImpl(subscribersExecutor: ScheduledExecutorService)
   extends ExecutorPodsSnapshotsStore with Logging {
@@ -142,7 +142,7 @@ private[spark] class ExecutorPodsSnapshotsStoreImpl(subscribersExecutor: Schedul
           }
 
           if (notificationCount.decrementAndGet() > 0) {
-            // There was another concurrent request for this subcriber. Schedule a task to
+            // There was another concurrent request for this subscriber. Schedule a task to
             // immediately process snapshots again, so that the subscriber can pick up any
             // changes that may have happened between the time it started looking at snapshots
             // above, and the time the concurrent request arrived.
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala
index 349cbd04f6027..156740d7c8aee 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala
@@ -49,14 +49,14 @@ class KubernetesVolumeUtilsSuite extends SparkFunSuite {
     val sparkConf = new SparkConf(false)
     sparkConf.set("test.persistentVolumeClaim.volumeName.mount.path", "/path")
     sparkConf.set("test.persistentVolumeClaim.volumeName.mount.readOnly", "true")
-    sparkConf.set("test.persistentVolumeClaim.volumeName.options.claimName", "claimeName")
+    sparkConf.set("test.persistentVolumeClaim.volumeName.options.claimName", "claimName")
 
     val volumeSpec = KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.").head
     assert(volumeSpec.volumeName === "volumeName")
     assert(volumeSpec.mountPath === "/path")
     assert(volumeSpec.mountReadOnly)
     assert(volumeSpec.volumeConf.asInstanceOf[KubernetesPVCVolumeConf] ===
-      KubernetesPVCVolumeConf("claimeName"))
+      KubernetesPVCVolumeConf("claimName"))
   }
 
   test("Parses emptyDir volumes correctly") {
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
index 95ee37e3daa41..38f8fac1858f1 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
@@ -42,7 +42,7 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
     assert(configuredPod.container.getVolumeMounts.get(0).getReadOnly === false)
   }
 
-  test("Mounts pesistentVolumeClaims") {
+  test("Mounts persistentVolumeClaims") {
     val volumeConf = KubernetesVolumeSpec(
       "testVolume",
       "/tmp",
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala
index bd42f6f05655f..5927af176062d 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala
@@ -230,7 +230,7 @@ package object config {
     ConfigBuilder("spark.mesos.appJar.local.resolution.mode")
       .doc("Provides support for the `local:///` scheme to reference the app jar resource in " +
         "cluster mode. If user uses a local resource (`local:///path/to/jar`) and the config " +
-        "option is not used it defaults to `host` eg. the mesos fetcher tries to get the " +
+        "option is not used it defaults to `host` e.g. the mesos fetcher tries to get the " +
         "resource from the host's file system. If the value is unknown it prints a warning msg " +
         "in the dispatcher logs and defaults to `host`. If the value is `container` then spark " +
         "submit in the container will use the jar in the container's path: `/path/to/jar`.")
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
index 4620bdb005094..8dbb70b616df1 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
@@ -356,7 +356,7 @@ trait MesosSchedulerUtils extends Logging {
    *                       https://github.com/apache/mesos/blob/master/src/common/values.cpp
    *                       https://github.com/apache/mesos/blob/master/src/common/attributes.cpp
    *
-   * @param constraintsVal constains string consisting of ';' separated key-value pairs (separated
+   * @param constraintsVal contains string consisting of ';' separated key-value pairs (separated
    *                       by ':')
    * @return  Map of constraints to match resources offers.
    */
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index 57af76b46fe64..ac50c1c77a24e 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -781,7 +781,7 @@ private[yarn] class YarnAllocator(
         val (exitCausedByApp, containerExitReason) = exitStatus match {
           case ContainerExitStatus.SUCCESS =>
             (false, s"Executor for container $containerId exited because of a YARN event (e.g., " +
-              "pre-emption) and not because of an error in the running job.")
+              "preemption) and not because of an error in the running job.")
           case ContainerExitStatus.PREEMPTED =>
             // Preemption is not the fault of the running tasks, since YARN preempts containers
             // merely to do resource sharing, and tasks that fail due to preempted executors could
diff --git a/resource-managers/yarn/src/test/java/org/apache/hadoop/net/ServerSocketUtil.java b/resource-managers/yarn/src/test/java/org/apache/hadoop/net/ServerSocketUtil.java
index df0ebcc9871ac..89e012ecd42e1 100644
--- a/resource-managers/yarn/src/test/java/org/apache/hadoop/net/ServerSocketUtil.java
+++ b/resource-managers/yarn/src/test/java/org/apache/hadoop/net/ServerSocketUtil.java
@@ -112,7 +112,7 @@ public static int waitForPort(int port, int retries)
    * The ports are all closed afterwards,
    * so other network services started may grab those same ports.
    *
-   * @param numPorts number of required port nubmers
+   * @param numPorts number of required port numbers
    * @return array of available port numbers
    * @throws IOException
    */
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala
index c2bdd971a0fe9..188a48509212d 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala
@@ -250,7 +250,7 @@ class YarnShuffleServiceSuite extends SparkFunSuite with Matchers with BeforeAnd
     ShuffleTestAccessor.getExecutorInfo(app2Id, "exec-2", resolver2) should be (Some(shuffleInfo2))
     s2.stop()
 
-    // another stop & restart should be fine though (eg., we recover from previous corruption)
+    // another stop & restart should be fine though (e.g., we recover from previous corruption)
     s3 = new YarnShuffleService
     s3.setRecoveryPath(new Path(recoveryLocalDir.toURI))
     s3.init(yarnConfig)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala
index 570663c6f6ad3..7a8e3f1d2ccf4 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala
@@ -163,7 +163,7 @@ private[python] object PythonTransformFunctionSerializer {
 private[streaming] object PythonDStream {
 
   /**
-   * can not access PythonTransformFunctionSerializer.register() via Py4j
+   * cannot access PythonTransformFunctionSerializer.register() via Py4j
    * Py4JError: PythonTransformFunctionSerializerregister does not exist in the JVM
    */
   def registerSerializer(ser: PythonTransformFunctionSerializer): Unit = {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
index e037f26088347..ca4f3670d5ad7 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
@@ -960,7 +960,7 @@ object DStream {
   /** Get the creation site of a DStream from the stack trace of when the DStream is created. */
   private[streaming] def getCreationSite(): CallSite = {
     /** Filtering function that excludes non-user classes for a streaming application */
-    def streamingExclustionFunction(className: String): Boolean = {
+    def streamingExclusionFunction(className: String): Boolean = {
       def doesMatch(r: Regex): Boolean = r.findFirstIn(className).isDefined
       val isSparkClass = doesMatch(SPARK_CLASS_REGEX)
       val isSparkExampleClass = doesMatch(SPARK_EXAMPLES_CLASS_REGEX)
@@ -972,6 +972,6 @@ object DStream {
       // non-Spark and non-Scala class, as the rest would streaming application classes.
       (isSparkClass || isScalaClass) && !isSparkExampleClass && !isSparkStreamingTestClass
     }
-    org.apache.spark.util.Utils.getCallSite(streamingExclustionFunction)
+    org.apache.spark.util.Utils.getCallSite(streamingExclusionFunction)
   }
 }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala
index 006bcad5d68c2..ef040681adf37 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala
@@ -39,7 +39,7 @@ private[streaming] object HdfsUtils {
           throw new IllegalStateException("File exists and there is no append support!")
         }
       } else {
-        // we dont' want to use hdfs erasure coding, as that lacks support for append and hflush
+        // we don't want to use hdfs erasure coding, as that lacks support for append and hflush
         SparkHadoopUtil.createFile(dfs, dfsPath, false)
       }
     }
diff --git a/streaming/src/test/java/test/org/apache/spark/streaming/JavaAPISuite.java b/streaming/src/test/java/test/org/apache/spark/streaming/JavaAPISuite.java
index c7cde5674f547..8a57b0c58b228 100644
--- a/streaming/src/test/java/test/org/apache/spark/streaming/JavaAPISuite.java
+++ b/streaming/src/test/java/test/org/apache/spark/streaming/JavaAPISuite.java
@@ -1595,7 +1595,7 @@ public void testContextGetOrCreate() throws InterruptedException {
   /* TEST DISABLED: Pending a discussion about checkpoint() semantics with TD
   @SuppressWarnings("unchecked")
   @Test
-  public void testCheckpointofIndividualStream() throws InterruptedException {
+  public void testCheckpointOfIndividualStream() throws InterruptedException {
     List<List<String>> inputData = Arrays.asList(
         Arrays.asList("this", "is"),
         Arrays.asList("a", "test"),
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala
index b2b8d2f41fc80..3ffaa62bd75ac 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala
@@ -541,12 +541,12 @@ class MapWithStateSuite extends SparkFunSuite with LocalStreamingContext
     // Setup the stream computation
     val ssc = new StreamingContext(sc, Seconds(1))
     val inputStream = new TestInputStream(ssc, input, numPartitions = 2)
-    val trackeStateStream = inputStream.map(x => (x, 1)).mapWithState(mapWithStateSpec)
+    val trackedStateStream = inputStream.map(x => (x, 1)).mapWithState(mapWithStateSpec)
     val collectedOutputs = new ConcurrentLinkedQueue[Seq[T]]
-    val outputStream = new TestOutputStream(trackeStateStream, collectedOutputs)
+    val outputStream = new TestOutputStream(trackedStateStream, collectedOutputs)
     val collectedStateSnapshots = new ConcurrentLinkedQueue[Seq[(K, S)]]
     val stateSnapshotStream = new TestOutputStream(
-      trackeStateStream.stateSnapshots(), collectedStateSnapshots)
+      trackedStateStream.stateSnapshots(), collectedStateSnapshots)
     outputStream.register()
     stateSnapshotStream.register()
 
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/rdd/MapWithStateRDDSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/rdd/MapWithStateRDDSuite.scala
index 58ce3a93251a9..f06b1feb8c0cd 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/rdd/MapWithStateRDDSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/rdd/MapWithStateRDDSuite.scala
@@ -320,7 +320,7 @@ class MapWithStateRDDSuite extends SparkFunSuite with RDDCheckpointTester with B
       makeStateRDDWithLongLineageDataRDD, reliableCheckpoint = true, rddCollectFunc _)
 
     /** Generate MapWithStateRDD with parent state RDD having a long lineage */
-    def makeStateRDDWithLongLineageParenttateRDD(
+    def makeStateRDDWithLongLineageParentStateRDD(
         longLineageRDD: RDD[Int]): MapWithStateRDD[Int, Int, Int, Int] = {
 
       // Create a MapWithStateRDD that has a long lineage using the data RDD with a long lineage
@@ -337,9 +337,9 @@ class MapWithStateRDDSuite extends SparkFunSuite with RDDCheckpointTester with B
     }
 
     testRDD(
-      makeStateRDDWithLongLineageParenttateRDD, reliableCheckpoint = true, rddCollectFunc _)
+      makeStateRDDWithLongLineageParentStateRDD, reliableCheckpoint = true, rddCollectFunc _)
     testRDDPartitions(
-      makeStateRDDWithLongLineageParenttateRDD, reliableCheckpoint = true, rddCollectFunc _)
+      makeStateRDDWithLongLineageParentStateRDD, reliableCheckpoint = true, rddCollectFunc _)
   }
 
   test("checkpointing empty state RDD") {

From cf98a761de677c733f3c33230e1c63ddb785d5c5 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Sat, 28 Nov 2020 23:38:11 +0900
Subject: [PATCH 0590/1009] [SPARK-33570][SQL][TESTS] Set the proper version of
 gssapi plugin automatically for MariaDBKrbIntegrationSuite

### What changes were proposed in this pull request?

This PR changes mariadb_docker_entrypoint.sh to set the proper version automatically for mariadb-plugin-gssapi-server.
The proper version is based on the one of mariadb-server.
Also, this PR enables to use arbitrary docker image by setting the environment variable `MARIADB_CONTAINER_IMAGE_NAME`.

### Why are the changes needed?

For `MariaDBKrbIntegrationSuite`, the version of `mariadb-plugin-gssapi-server` is currently set to `10.5.5` in `mariadb_docker_entrypoint.sh` but it's no longer available in the official apt repository and `MariaDBKrbIntegrationSuite` doesn't pass for now.
It seems that only the most recent three versions are available for each major version and they are `10.5.6`, `10.5.7` and `10.5.8` for now.
Further, the release cycle of MariaDB seems to be very rapid (1 ~ 2 months) so I don't think it's a good idea to set to an specific version for `mariadb-plugin-gssapi-server`.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Confirmed that `MariaDBKrbIntegrationSuite` passes with the following commands.
```
$  build/sbt -Pdocker-integration-tests -Phive -Phive-thriftserver package "testOnly org.apache.spark.sql.jdbc.MariaDBKrbIntegrationSuite"
```
In this case, we can see what version of `mariadb-plugin-gssapi-server` is going to be installed in the following container log message.
```
Installing mariadb-plugin-gssapi-server=1:10.5.8+maria~focal
```

Or, we can set MARIADB_CONTAINER_IMAGE_NAME for a specific version of MariaDB.
```
$ MARIADB_DOCKER_IMAGE_NAME=mariadb:10.5.6 build/sbt -Pdocker-integration-tests -Phive -Phive-thriftserver package "testOnly org.apache.spark.sql.jdbc.MariaDBKrbIntegrationSuite"
```
```
Installing mariadb-plugin-gssapi-server=1:10.5.6+maria~focal
```

Closes #30515 from sarutak/fix-MariaDBKrbIntegrationSuite.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../src/test/resources/mariadb_docker_entrypoint.sh  |  4 +++-
 .../spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala  | 12 +++++++++---
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/external/docker-integration-tests/src/test/resources/mariadb_docker_entrypoint.sh b/external/docker-integration-tests/src/test/resources/mariadb_docker_entrypoint.sh
index 97c00a9d81b76..ab7d967a927d0 100755
--- a/external/docker-integration-tests/src/test/resources/mariadb_docker_entrypoint.sh
+++ b/external/docker-integration-tests/src/test/resources/mariadb_docker_entrypoint.sh
@@ -18,7 +18,9 @@
 
 dpkg-divert --add /bin/systemctl && ln -sT /bin/true /bin/systemctl
 apt update
-apt install -y mariadb-plugin-gssapi-server=1:10.5.5+maria~focal
+GSSAPI_PLUGIN=mariadb-plugin-gssapi-server=$(dpkg -s mariadb-server | sed -n "s/^Version: \(.*\)/\1/p")
+echo "Installing $GSSAPI_PLUGIN"
+apt install -y "$GSSAPI_PLUGIN"
 echo "gssapi_keytab_path=/docker-entrypoint-initdb.d/mariadb.keytab" >> /etc/mysql/mariadb.conf.d/auth_gssapi.cnf
 echo "gssapi_principal_name=mariadb/__IP_ADDRESS_REPLACE_ME__@EXAMPLE.COM" >> /etc/mysql/mariadb.conf.d/auth_gssapi.cnf
 docker-entrypoint.sh mysqld
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala
index adee2bebe41ce..59a6f530afd7e 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala
@@ -24,15 +24,21 @@ import com.spotify.docker.client.messages.{ContainerConfig, HostConfig}
 import org.apache.spark.sql.execution.datasources.jdbc.connection.SecureConnectionProvider
 import org.apache.spark.tags.DockerTest
 
+/**
+ * To run this test suite for a specific version (e.g., mariadb:10.5.8):
+ * {{{
+ *   MARIADB_DOCKER_IMAGE_NAME=mariadb:10.5.8
+ *     ./build/sbt -Pdocker-integration-tests
+ *     "testOnly org.apache.spark.sql.jdbc.MariaDBKrbIntegrationSuite"
+ * }}}
+ */
 @DockerTest
 class MariaDBKrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite {
   override protected val userName = s"mariadb/$dockerIp"
   override protected val keytabFileName = "mariadb.keytab"
 
   override val db = new DatabaseOnDocker {
-    // If you change `imageName`, you need to update the version of `mariadb-plugin-gssapi-server`
-    // in `resources/mariadb_docker_entrypoint.sh` accordingly.
-    override val imageName = "mariadb:10.5"
+    override val imageName = sys.env.getOrElse("MARIADB_DOCKER_IMAGE_NAME", "mariadb:10.5")
     override val env = Map(
       "MYSQL_ROOT_PASSWORD" -> "rootpass"
     )

From 3650a6bd97b9cecf382f96a55a97ff56b75471cd Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Sat, 28 Nov 2020 12:47:47 -0800
Subject: [PATCH 0591/1009] [SPARK-33580][CORE] resolveDependencyPaths should
 use classifier attribute of artifact

### What changes were proposed in this pull request?

This patch proposes to use classifier attribute to construct artifact path instead of type.

### Why are the changes needed?

`resolveDependencyPaths` now takes artifact type to decide to add "-tests" postfix. However, the path pattern of ivy in `resolveMavenCoordinates` is `[organization]_[artifact][revision](-[classifier]).[ext]`. We should use classifier instead of type to construct file path.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Unit test. Manual test.

Closes #30524 from viirya/SPARK-33580.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../org/apache/spark/deploy/SparkSubmit.scala      | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 4b17661496808..7332c6d54c981 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -1186,12 +1186,16 @@ private[spark] object SparkSubmitUtils {
   def resolveDependencyPaths(
       artifacts: Array[AnyRef],
       cacheDirectory: File): String = {
-    artifacts.map { ai =>
-      val artifactInfo = ai.asInstanceOf[Artifact]
-      val artifact = artifactInfo.getModuleRevisionId
-      val testSuffix = if (artifactInfo.getType == "test-jar") "-tests" else ""
+    artifacts.map { artifactInfo =>
+      val artifact = artifactInfo.asInstanceOf[Artifact].getModuleRevisionId
+      val extraAttrs = artifactInfo.asInstanceOf[Artifact].getExtraAttributes
+      val classifier = if (extraAttrs.containsKey("classifier")) {
+        "-" + extraAttrs.get("classifier")
+      } else {
+        ""
+      }
       cacheDirectory.getAbsolutePath + File.separator +
-        s"${artifact.getOrganisation}_${artifact.getName}-${artifact.getRevision}${testSuffix}.jar"
+        s"${artifact.getOrganisation}_${artifact.getName}-${artifact.getRevision}$classifier.jar"
     }.mkString(",")
   }
 

From bfe9380ba2bc9762ccfaa36d3ed938867c143876 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Sat, 28 Nov 2020 16:58:40 -0800
Subject: [PATCH 0592/1009] [MINOR][SQL] Remove `getTables()` from `r.SQLUtils`

### What changes were proposed in this pull request?
Remove the unused method `getTables()` from `r.SQLUtils`. The method was used before the changes https://github.com/apache/spark/pull/17483 but R's `tables.default` was rewritten using `listTables()`: https://github.com/apache/spark/pull/17483/files#diff-2c01472a7bcb1d318244afcd621d726e00d36cd15dffe7e44fa96c54fce4cd9aR220-R223

### Why are the changes needed?
To improve code maintenance, and remove the dead code.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By R tests.

Closes #30527 from MaxGekk/remove-getTables-in-r-SQLUtils.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../scala/org/apache/spark/sql/api/r/SQLUtils.scala    | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
index 693be99d47495..1d1358487abcb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
@@ -33,7 +33,6 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.expressions.{ExprUtils, GenericRowWithSchema}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.execution.arrow.ArrowConverters
-import org.apache.spark.sql.execution.command.ShowTablesCommand
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.types._
 
@@ -216,15 +215,6 @@ private[sql] object SQLUtils extends Logging {
     }
   }
 
-  def getTables(sparkSession: SparkSession, databaseName: String): DataFrame = {
-    databaseName match {
-      case n: String if n != null && n.trim.nonEmpty =>
-        Dataset.ofRows(sparkSession, ShowTablesCommand(Some(n), None))
-      case _ =>
-        Dataset.ofRows(sparkSession, ShowTablesCommand(None, None))
-    }
-  }
-
   def getTableNames(sparkSession: SparkSession, databaseName: String): Array[String] = {
     val db = databaseName match {
       case _ if databaseName != null && databaseName.trim.nonEmpty =>

From ba178f852f8e4b11a243d907ac204b30a60369b5 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Sun, 29 Nov 2020 09:36:55 +0800
Subject: [PATCH 0593/1009] [SPARK-33581][SQL][TEST] Refactor
 HivePartitionFilteringSuite

### What changes were proposed in this pull request?

This pr refactor HivePartitionFilteringSuite.

### Why are the changes needed?

To make it easy to maintain.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

N/A

Closes #30525 from wangyum/SPARK-33581.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Yuming Wang <yumwang@ebay.com>
---
 .../client/HivePartitionFilteringSuite.scala  | 291 +++++++++++-------
 1 file changed, 177 insertions(+), 114 deletions(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
index ab83f751f1425..e07fbc29ee8aa 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
@@ -39,7 +39,13 @@ class HivePartitionFilteringSuite(version: String)
 
   private val tryDirectSqlKey = HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL.varname
 
-  private val testPartitionCount = 3 * 5 * 4
+  private val dsValue = 20170101 to 20170103
+  private val hValue = 0 to 4
+  private val chunkValue = Seq("aa", "ab", "ba", "bb")
+  private val dateValue = Seq("2019-01-01", "2019-01-02", "2019-01-03")
+  private val dateStrValue = Seq("2020-01-01", "2020-01-02", "2020-01-03")
+  private val testPartitionCount =
+    dsValue.size * hValue.size * chunkValue.size * dateValue.size * dateStrValue.size
 
   private val storageFormat = CatalogStorageFormat(
     locationUri = None,
@@ -57,23 +63,28 @@ class HivePartitionFilteringSuite(version: String)
     val client = buildClient(hadoopConf)
     val tableSchema =
       new StructType().add("value", "int").add("ds", "int").add("h", "int").add("chunk", "string")
+        .add("d", "date").add("datestr", "string")
     val table = CatalogTable(
       identifier = TableIdentifier("test", Some("default")),
       tableType = CatalogTableType.MANAGED,
       schema = tableSchema,
-      partitionColumnNames = Seq("ds", "h", "chunk"),
+      partitionColumnNames = Seq("ds", "h", "chunk", "d", "datestr"),
       storage = storageFormat)
     client.createTable(table, ignoreIfExists = false)
 
     val partitions =
       for {
-        ds <- 20170101 to 20170103
-        h <- 0 to 4
-        chunk <- Seq("aa", "ab", "ba", "bb")
+        ds <- dsValue
+        h <- hValue
+        chunk <- chunkValue
+        date <- dateValue
+        dateStr <- dateStrValue
       } yield CatalogTablePartition(Map(
         "ds" -> ds.toString,
         "h" -> h.toString,
-        "chunk" -> chunk
+        "chunk" -> chunk,
+        "d" -> date,
+        "datestr" -> dateStr
       ), storageFormat)
     assert(partitions.size == testPartitionCount)
 
@@ -108,17 +119,21 @@ class HivePartitionFilteringSuite(version: String)
     // Should return all partitions where <=> is not supported
     testMetastorePartitionFiltering(
       attr("ds") <=> 20170101,
-      20170101 to 20170103,
-      0 to 4,
-      "aa" :: "ab" :: "ba" :: "bb" :: Nil)
+      dsValue,
+      hValue,
+      chunkValue,
+      dateValue,
+      dateStrValue)
   }
 
   test("getPartitionsByFilter: ds=20170101") {
     testMetastorePartitionFiltering(
       attr("ds") === 20170101,
       20170101 to 20170101,
-      0 to 4,
-      "aa" :: "ab" :: "ba" :: "bb" :: Nil)
+      hValue,
+      chunkValue,
+      dateValue,
+      dateStrValue)
   }
 
   test("getPartitionsByFilter: ds=(20170101 + 1) and h=0") {
@@ -126,41 +141,51 @@ class HivePartitionFilteringSuite(version: String)
     // comparisons to non-literal values
     testMetastorePartitionFiltering(
       attr("ds") === (Literal(20170101) + 1) && attr("h") === 0,
-      20170101 to 20170103,
+      dsValue,
       0 to 0,
-      "aa" :: "ab" :: "ba" :: "bb" :: Nil)
+      chunkValue,
+      dateValue,
+      dateStrValue)
   }
 
   test("getPartitionsByFilter: chunk='aa'") {
     testMetastorePartitionFiltering(
       attr("chunk") === "aa",
-      20170101 to 20170103,
-      0 to 4,
-      "aa" :: Nil)
+      dsValue,
+      hValue,
+      "aa" :: Nil,
+      dateValue,
+      dateStrValue)
   }
 
   test("getPartitionsByFilter: cast(chunk as int)=1 (not a valid partition predicate)") {
     testMetastorePartitionFiltering(
       attr("chunk").cast(IntegerType) === 1,
-      20170101 to 20170103,
-      0 to 4,
-      "aa" :: "ab" :: "ba" :: "bb" :: Nil)
+      dsValue,
+      hValue,
+      chunkValue,
+      dateValue,
+      dateStrValue)
   }
 
   test("getPartitionsByFilter: cast(chunk as boolean)=true (not a valid partition predicate)") {
     testMetastorePartitionFiltering(
       attr("chunk").cast(BooleanType) === true,
-      20170101 to 20170103,
-      0 to 4,
-      "aa" :: "ab" :: "ba" :: "bb" :: Nil)
+      dsValue,
+      hValue,
+      chunkValue,
+      dateValue,
+      dateStrValue)
   }
 
   test("getPartitionsByFilter: 20170101=ds") {
     testMetastorePartitionFiltering(
       Literal(20170101) === attr("ds"),
       20170101 to 20170101,
-      0 to 4,
-      "aa" :: "ab" :: "ba" :: "bb" :: Nil)
+      hValue,
+      chunkValue,
+      dateValue,
+      dateStrValue)
   }
 
   test("getPartitionsByFilter: ds=20170101 and h=2") {
@@ -168,7 +193,9 @@ class HivePartitionFilteringSuite(version: String)
       attr("ds") === 20170101 && attr("h") === 2,
       20170101 to 20170101,
       2 to 2,
-      "aa" :: "ab" :: "ba" :: "bb" :: Nil)
+      chunkValue,
+      dateValue,
+      dateStrValue)
   }
 
   test("getPartitionsByFilter: cast(ds as long)=20170101L and h=2") {
@@ -176,39 +203,49 @@ class HivePartitionFilteringSuite(version: String)
       attr("ds").cast(LongType) === 20170101L && attr("h") === 2,
       20170101 to 20170101,
       2 to 2,
-      "aa" :: "ab" :: "ba" :: "bb" :: Nil)
+      chunkValue,
+      dateValue,
+      dateStrValue)
   }
 
   test("getPartitionsByFilter: ds=20170101 or ds=20170102") {
     testMetastorePartitionFiltering(
       attr("ds") === 20170101 || attr("ds") === 20170102,
       20170101 to 20170102,
-      0 to 4,
-      "aa" :: "ab" :: "ba" :: "bb" :: Nil)
+      hValue,
+      chunkValue,
+      dateValue,
+      dateStrValue)
   }
 
   test("getPartitionsByFilter: ds in (20170102, 20170103) (using IN expression)") {
     testMetastorePartitionFiltering(
       attr("ds").in(20170102, 20170103),
       20170102 to 20170103,
-      0 to 4,
-      "aa" :: "ab" :: "ba" :: "bb" :: Nil)
+      hValue,
+      chunkValue,
+      dateValue,
+      dateStrValue)
   }
 
   test("getPartitionsByFilter: cast(ds as long) in (20170102L, 20170103L) (using IN expression)") {
     testMetastorePartitionFiltering(
       attr("ds").cast(LongType).in(20170102L, 20170103L),
       20170102 to 20170103,
-      0 to 4,
-      "aa" :: "ab" :: "ba" :: "bb" :: Nil)
+      hValue,
+      chunkValue,
+      dateValue,
+      dateStrValue)
   }
 
   test("getPartitionsByFilter: ds in (20170102, 20170103) (using INSET expression)") {
     testMetastorePartitionFiltering(
       attr("ds").in(20170102, 20170103),
       20170102 to 20170103,
-      0 to 4,
-      "aa" :: "ab" :: "ba" :: "bb" :: Nil, {
+      hValue,
+      chunkValue,
+      dateValue,
+      dateStrValue, {
         case expr @ In(v, list) if expr.inSetConvertible =>
           InSet(v, list.map(_.eval(EmptyRow)).toSet)
       })
@@ -219,8 +256,10 @@ class HivePartitionFilteringSuite(version: String)
     testMetastorePartitionFiltering(
       attr("ds").cast(LongType).in(20170102L, 20170103L),
       20170102 to 20170103,
-      0 to 4,
-      "aa" :: "ab" :: "ba" :: "bb" :: Nil, {
+      hValue,
+      chunkValue,
+      dateValue,
+      dateStrValue, {
         case expr @ In(v, list) if expr.inSetConvertible =>
           InSet(v, list.map(_.eval(EmptyRow)).toSet)
       })
@@ -229,41 +268,45 @@ class HivePartitionFilteringSuite(version: String)
   test("getPartitionsByFilter: chunk in ('ab', 'ba') (using IN expression)") {
     testMetastorePartitionFiltering(
       attr("chunk").in("ab", "ba"),
-      20170101 to 20170103,
-      0 to 4,
-      "ab" :: "ba" :: Nil)
+      dsValue,
+      hValue,
+      "ab" :: "ba" :: Nil,
+      dateValue,
+      dateStrValue)
   }
 
   test("getPartitionsByFilter: chunk in ('ab', 'ba') (using INSET expression)") {
     testMetastorePartitionFiltering(
       attr("chunk").in("ab", "ba"),
-      20170101 to 20170103,
-      0 to 4,
-      "ab" :: "ba" :: Nil, {
+      dsValue,
+      hValue,
+      "ab" :: "ba" :: Nil,
+      dateValue,
+      dateStrValue, {
         case expr @ In(v, list) if expr.inSetConvertible =>
           InSet(v, list.map(_.eval(EmptyRow)).toSet)
       })
   }
 
   test("getPartitionsByFilter: (ds=20170101 and h>=2) or (ds=20170102 and h<2)") {
-    val day1 = (20170101 to 20170101, 2 to 4, Seq("aa", "ab", "ba", "bb"))
-    val day2 = (20170102 to 20170102, 0 to 1, Seq("aa", "ab", "ba", "bb"))
+    val day1 = (20170101 to 20170101, 2 to 4, chunkValue, dateValue, dateStrValue)
+    val day2 = (20170102 to 20170102, 0 to 1, chunkValue, dateValue, dateStrValue)
     testMetastorePartitionFiltering((attr("ds") === 20170101 && attr("h") >= 2) ||
         (attr("ds") === 20170102 && attr("h") < 2), day1 :: day2 :: Nil)
   }
 
   test("getPartitionsByFilter: (ds=20170101 and h>=2) or (ds=20170102 and h<(1+1))") {
-    val day1 = (20170101 to 20170101, 2 to 4, Seq("aa", "ab", "ba", "bb"))
+    val day1 = (20170101 to 20170101, 2 to 4, chunkValue, dateValue, dateStrValue)
     // Day 2 should include all hours because we can't build a filter for h<(7+1)
-    val day2 = (20170102 to 20170102, 0 to 4, Seq("aa", "ab", "ba", "bb"))
+    val day2 = (20170102 to 20170102, 0 to 4, chunkValue, dateValue, dateStrValue)
     testMetastorePartitionFiltering((attr("ds") === 20170101 && attr("h") >= 2) ||
         (attr("ds") === 20170102 && attr("h") < (Literal(1) + 1)), day1 :: day2 :: Nil)
   }
 
   test("getPartitionsByFilter: " +
       "chunk in ('ab', 'ba') and ((ds=20170101 and h>=2) or (ds=20170102 and h<2))") {
-    val day1 = (20170101 to 20170101, 2 to 4, Seq("ab", "ba"))
-    val day2 = (20170102 to 20170102, 0 to 1, Seq("ab", "ba"))
+    val day1 = (20170101 to 20170101, 2 to 4, Seq("ab", "ba"), dateValue, dateStrValue)
+    val day2 = (20170102 to 20170102, 0 to 1, Seq("ab", "ba"), dateValue, dateStrValue)
     testMetastorePartitionFiltering(attr("chunk").in("ab", "ba") &&
         ((attr("ds") === 20170101 && attr("h") >= 2) || (attr("ds") === 20170102 && attr("h") < 2)),
       day1 :: day2 :: Nil)
@@ -272,93 +315,105 @@ class HivePartitionFilteringSuite(version: String)
   test("getPartitionsByFilter: chunk contains bb") {
     testMetastorePartitionFiltering(
       attr("chunk").contains("bb"),
-      (20170101 to 20170103, 0 to 4, Seq("bb")) :: Nil)
+      dsValue,
+      hValue,
+      Seq("bb"),
+      dateValue,
+      dateStrValue)
   }
 
   test("getPartitionsByFilter: chunk startsWith b") {
     testMetastorePartitionFiltering(
       attr("chunk").startsWith("b"),
-      (20170101 to 20170103, 0 to 4, Seq("ba", "bb")) :: Nil)
+      dsValue,
+      hValue,
+      Seq("ba", "bb"),
+      dateValue,
+      dateStrValue)
   }
 
   test("getPartitionsByFilter: chunk endsWith b") {
     testMetastorePartitionFiltering(
       attr("chunk").endsWith("b"),
-      (20170101 to 20170103, 0 to 4, Seq("ab", "bb")) :: Nil)
+      dsValue,
+      hValue,
+      Seq("ab", "bb"),
+      dateValue,
+      dateStrValue)
   }
 
   test("getPartitionsByFilter: chunk in ('ab', 'ba') and ((cast(ds as string)>'20170102')") {
-    val day = (20170101 to 20170103, 0 to 4, Seq("ab", "ba"))
     testMetastorePartitionFiltering(
       attr("chunk").in("ab", "ba") && (attr("ds").cast(StringType) > "20170102"),
-      day :: Nil)
+      dsValue,
+      hValue,
+      Seq("ab", "ba"),
+      dateValue,
+      dateStrValue)
   }
 
-  test("getPartitionsByFilter: date type pruning by metastore") {
-    val table = CatalogTable(
-      identifier = TableIdentifier("test_date", Some("default")),
-      tableType = CatalogTableType.MANAGED,
-      schema = new StructType().add("value", "int").add("part", "date"),
-      partitionColumnNames = Seq("part"),
-      storage = storageFormat)
-    client.createTable(table, ignoreIfExists = false)
+  test("getPartitionsByFilter: d=2019-01-01") {
+    testMetastorePartitionFiltering(
+      attr("d") === Date.valueOf("2019-01-01"),
+      dsValue,
+      hValue,
+      chunkValue,
+      Seq("2019-01-01"),
+      dateStrValue)
+  }
 
-    val partitions =
-      for {
-        date <- Seq("2019-01-01", "2019-01-02", "2019-01-03", "2019-01-04")
-      } yield CatalogTablePartition(Map(
-        "part" -> date
-      ), storageFormat)
-    assert(partitions.size == 4)
-
-    client.createPartitions("default", "test_date", partitions, ignoreIfExists = false)
-
-    def testDataTypeFiltering(
-        filterExprs: Seq[Expression],
-        expectedPartitionCubes: Seq[Seq[Date]]): Unit = {
-      val filteredPartitions = client.getPartitionsByFilter(
-        client.getTable("default", "test_date"),
-        filterExprs,
-        SQLConf.get.sessionLocalTimeZone)
-
-      val expectedPartitions = expectedPartitionCubes.map {
-        expectedDt =>
-          for {
-            dt <- expectedDt
-          } yield Set(
-            "part" -> dt.toString
-          )
-      }.reduce(_ ++ _)
-
-      assert(filteredPartitions.map(_.spec.toSet).toSet == expectedPartitions.toSet)
-    }
+  test("getPartitionsByFilter: d>2019-01-02") {
+    testMetastorePartitionFiltering(
+      attr("d") > Date.valueOf("2019-01-02"),
+      dsValue,
+      hValue,
+      chunkValue,
+      Seq("2019-01-03"),
+      dateStrValue)
+  }
+
+  test("getPartitionsByFilter: In(d, 2019-01-01, 2019-01-02)") {
+    testMetastorePartitionFiltering(
+      In(attr("d"),
+        Seq("2019-01-01", "2019-01-02").map(d => Literal(Date.valueOf(d)))),
+      dsValue,
+      hValue,
+      chunkValue,
+      Seq("2019-01-01", "2019-01-02"),
+      dateStrValue)
+  }
 
-    val dateAttr: Attribute = AttributeReference("part", DateType)()
+  test("getPartitionsByFilter: InSet(d, 2019-01-01, 2019-01-02)") {
+    testMetastorePartitionFiltering(
+      InSet(attr("d"),
+        Set("2019-01-01", "2019-01-02").map(d => Literal(Date.valueOf(d)).eval(EmptyRow))),
+      dsValue,
+      hValue,
+      chunkValue,
+      Seq("2019-01-01", "2019-01-02"),
+      dateStrValue)
+  }
 
-    testDataTypeFiltering(
-      Seq(dateAttr === Date.valueOf("2019-01-01")),
-      Seq("2019-01-01").map(Date.valueOf) :: Nil)
-    testDataTypeFiltering(
-      Seq(dateAttr > Date.valueOf("2019-01-02")),
-      Seq("2019-01-03", "2019-01-04").map(Date.valueOf) :: Nil)
-    testDataTypeFiltering(
-      Seq(In(dateAttr,
-        Seq("2019-01-01", "2019-01-02").map(d => Literal(Date.valueOf(d))))),
-      Seq("2019-01-01", "2019-01-02").map(Date.valueOf) :: Nil)
-    testDataTypeFiltering(
-      Seq(InSet(dateAttr,
-        Set("2019-01-01", "2019-01-02").map(d => Literal(Date.valueOf(d)).eval(EmptyRow)))),
-      Seq("2019-01-01", "2019-01-02").map(Date.valueOf) :: Nil)
+  test("getPartitionsByFilter: cast(datestr as date)= 2020-01-01") {
+    testMetastorePartitionFiltering(
+      attr("datestr").cast(DateType) === Date.valueOf("2020-01-01"),
+      dsValue,
+      hValue,
+      chunkValue,
+      dateValue,
+      dateStrValue)
   }
 
   private def testMetastorePartitionFiltering(
       filterExpr: Expression,
       expectedDs: Seq[Int],
       expectedH: Seq[Int],
-      expectedChunks: Seq[String]): Unit = {
+      expectedChunks: Seq[String],
+      expectedD: Seq[String],
+      expectedDatestr: Seq[String]): Unit = {
     testMetastorePartitionFiltering(
       filterExpr,
-      (expectedDs, expectedH, expectedChunks) :: Nil,
+      (expectedDs, expectedH, expectedChunks, expectedD, expectedDatestr) :: Nil,
       identity)
   }
 
@@ -367,22 +422,25 @@ class HivePartitionFilteringSuite(version: String)
       expectedDs: Seq[Int],
       expectedH: Seq[Int],
       expectedChunks: Seq[String],
+      expectedD: Seq[String],
+      expectedDatestr: Seq[String],
       transform: Expression => Expression): Unit = {
     testMetastorePartitionFiltering(
       filterExpr,
-      (expectedDs, expectedH, expectedChunks) :: Nil,
+      (expectedDs, expectedH, expectedChunks, expectedD, expectedDatestr) :: Nil,
       transform)
   }
 
   private def testMetastorePartitionFiltering(
       filterExpr: Expression,
-      expectedPartitionCubes: Seq[(Seq[Int], Seq[Int], Seq[String])]): Unit = {
+      expectedPartitionCubes:
+        Seq[(Seq[Int], Seq[Int], Seq[String], Seq[String], Seq[String])]): Unit = {
     testMetastorePartitionFiltering(filterExpr, expectedPartitionCubes, identity)
   }
 
   private def testMetastorePartitionFiltering(
       filterExpr: Expression,
-      expectedPartitionCubes: Seq[(Seq[Int], Seq[Int], Seq[String])],
+      expectedPartitionCubes: Seq[(Seq[Int], Seq[Int], Seq[String], Seq[String], Seq[String])],
       transform: Expression => Expression): Unit = {
     val filteredPartitions = client.getPartitionsByFilter(client.getTable("default", "test"),
       Seq(
@@ -390,20 +448,25 @@ class HivePartitionFilteringSuite(version: String)
       ), SQLConf.get.sessionLocalTimeZone)
 
     val expectedPartitionCount = expectedPartitionCubes.map {
-      case (expectedDs, expectedH, expectedChunks) =>
-        expectedDs.size * expectedH.size * expectedChunks.size
+      case (expectedDs, expectedH, expectedChunks, expectedD, expectedDatestr) =>
+        expectedDs.size * expectedH.size * expectedChunks.size *
+          expectedD.size * expectedDatestr.size
     }.sum
 
     val expectedPartitions = expectedPartitionCubes.map {
-      case (expectedDs, expectedH, expectedChunks) =>
+      case (expectedDs, expectedH, expectedChunks, expectedD, expectedDatestr) =>
         for {
           ds <- expectedDs
           h <- expectedH
           chunk <- expectedChunks
+          d <- expectedD
+          datestr <- expectedDatestr
         } yield Set(
           "ds" -> ds.toString,
           "h" -> h.toString,
-          "chunk" -> chunk
+          "chunk" -> chunk,
+          "d" -> d,
+          "datestr" -> datestr
         )
     }.reduce(_ ++ _)
 

From b94ff1e870152ac692c6f1ebf3d110caa274ebb2 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Sun, 29 Nov 2020 11:24:58 -0800
Subject: [PATCH 0594/1009] [SPARK-33590][DOCS][SQL] Add missing sub-bullets in
 Spark SQL Guide

### What changes were proposed in this pull request?

Add the missing sub-bullets in the left side of `Spark SQL Guide`

### Why are the changes needed?

The three sub-bullets in the left side is not consistent with the contents (five bullets) in the right side.

![image](https://user-images.githubusercontent.com/1315079/100546388-7a21e880-32a4-11eb-922d-62a52f4f9f9b.png)

### Does this PR introduce _any_ user-facing change?

Yes, you can see more lines in the left menu.

### How was this patch tested?

Manually build the doc as follows. This can be verified as attached:

```
cd docs
SKIP_API=1 jekyll build
firefox _site/sql-pyspark-pandas-with-arrow.html
```

![image](https://user-images.githubusercontent.com/1315079/100546399-8ad25e80-32a4-11eb-80ac-44af0aebc717.png)

Closes #30537 from kiszk/SPARK-33590.

Authored-by: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 docs/_data/menu-sql.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/_data/menu-sql.yaml b/docs/_data/menu-sql.yaml
index 2207bd6a17656..ec0b404fe672f 100644
--- a/docs/_data/menu-sql.yaml
+++ b/docs/_data/menu-sql.yaml
@@ -51,6 +51,10 @@
       url: sql-performance-tuning.html#other-configuration-options
     - text: Join Strategy Hints for SQL Queries
       url: sql-performance-tuning.html#join-strategy-hints-for-sql-queries
+    - text: Coalesce Hints for SQL Queries
+      url: sql-performance-tuning.html#coalesce-hints-for-sql-queries
+    - text: Adaptive Query Execution
+      url: sql-performance-tuning.html#adaptive-query-execution
 - text: Distributed SQL Engine
   url: sql-distributed-sql-engine.html
   subitems:

From c8286ec41616909f1f6e452ce63f0e7605d5bc63 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <zsxwing@gmail.com>
Date: Sun, 29 Nov 2020 11:56:48 -0800
Subject: [PATCH 0595/1009] [SPARK-33587][CORE] Kill the executor on nested
 fatal errors

### What changes were proposed in this pull request?

Currently we will kill the executor when hitting a fatal error. However, if the fatal error is wrapped by another exception, such as
- java.util.concurrent.ExecutionException, com.google.common.util.concurrent.UncheckedExecutionException, com.google.common.util.concurrent.ExecutionError when using Guava cache or Java thread pool.
- SparkException thrown from https://github.com/apache/spark/blob/cf98a761de677c733f3c33230e1c63ddb785d5c5/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala#L231 or https://github.com/apache/spark/blob/cf98a761de677c733f3c33230e1c63ddb785d5c5/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala#L296

We will still keep the executor running. Fatal errors are usually unrecoverable (such as OutOfMemoryError), some components may be in a broken state when hitting a fatal error and it's hard to predicate the behaviors of a broken component. Hence, it's better to detect the nested fatal error as well and kill the executor. Then we can rely on Spark's fault tolerance to recover.

### Why are the changes needed?

Fatal errors are usually unrecoverable (such as OutOfMemoryError), some components may be in a broken state when hitting a fatal error and it's hard to predicate the behaviors of a broken component. Hence, it's better to detect the nested fatal error as well and kill the executor. Then we can rely on Spark's fault tolerance to recover.

### Does this PR introduce _any_ user-facing change?

Yep. There is a slight internal behavior change on when to kill an executor. We will kill the executor when detecting a nested fatal error in the exception chain. `spark.executor.killOnFatalError.depth` is added to allow users to turn off this change if the slight behavior change impacts them.

### How was this patch tested?

The new method `Executor.isFatalError` is tested by `spark.executor.killOnNestedFatalError`.

Closes #30528 from zsxwing/SPARK-33587.

Authored-by: Shixiong Zhu <zsxwing@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../org/apache/spark/executor/Executor.scala  | 28 ++++++-
 .../spark/internal/config/package.scala       | 11 +++
 .../apache/spark/executor/ExecutorSuite.scala | 73 ++++++++++++++++++-
 3 files changed, 108 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index f7246448959e9..efb0b2c26d9a9 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -150,6 +150,8 @@ private[spark] class Executor(
   // Whether to monitor killed / interrupted tasks
   private val taskReaperEnabled = conf.get(TASK_REAPER_ENABLED)
 
+  private val killOnFatalErrorDepth = conf.get(EXECUTOR_KILL_ON_FATAL_ERROR_DEPTH)
+
   // Create our ClassLoader
   // do this after SparkEnv creation so can access the SecurityManager
   private val urlClassLoader = createClassLoader()
@@ -648,7 +650,7 @@ private[spark] class Executor(
           plugins.foreach(_.onTaskFailed(reason))
           execBackend.statusUpdate(taskId, TaskState.KILLED, ser.serialize(reason))
 
-        case t: Throwable if hasFetchFailure && !Utils.isFatalError(t) =>
+        case t: Throwable if hasFetchFailure && !Executor.isFatalError(t, killOnFatalErrorDepth) =>
           val reason = task.context.fetchFailed.get.toTaskFailedReason
           if (!t.isInstanceOf[FetchFailedException]) {
             // there was a fetch failure in the task, but some user code wrapped that exception
@@ -711,7 +713,7 @@ private[spark] class Executor(
 
           // Don't forcibly exit unless the exception was inherently fatal, to avoid
           // stopping other tasks unnecessarily.
-          if (!t.isInstanceOf[SparkOutOfMemoryError] && Utils.isFatalError(t)) {
+          if (Executor.isFatalError(t, killOnFatalErrorDepth)) {
             uncaughtExceptionHandler.uncaughtException(Thread.currentThread(), t)
           }
       } finally {
@@ -997,4 +999,26 @@ private[spark] object Executor {
 
   // Used to store executorSource, for local mode only
   var executorSourceLocalModeOnly: ExecutorSource = null
+
+  /**
+   * Whether a `Throwable` thrown from a task is a fatal error. We will use this to decide whether
+   * to kill the executor.
+   *
+   * @param depthToCheck The max depth of the exception chain we should search for a fatal error. 0
+   *                     means not checking any fatal error (in other words, return false), 1 means
+   *                     checking only the exception but not the cause, and so on. This is to avoid
+   *                     `StackOverflowError` when hitting a cycle in the exception chain.
+   */
+  def isFatalError(t: Throwable, depthToCheck: Int): Boolean = {
+    if (depthToCheck <= 0) {
+      false
+    } else {
+      t match {
+        case _: SparkOutOfMemoryError => false
+        case e if Utils.isFatalError(e) => true
+        case e if e.getCause != null => isFatalError(e.getCause, depthToCheck - 1)
+        case _ => false
+      }
+    }
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index b38d0e5c617b9..b8bcb374ef961 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -1946,6 +1946,17 @@ package object config {
       .booleanConf
       .createWithDefault(false)
 
+  private[spark] val EXECUTOR_KILL_ON_FATAL_ERROR_DEPTH =
+    ConfigBuilder("spark.executor.killOnFatalError.depth")
+      .doc("The max depth of the exception chain in a failed task Spark will search for a fatal " +
+        "error to check whether it should kill an executor. 0 means not checking any fatal " +
+        "error, 1 means checking only the exception but not the cause, and so on.")
+      .internal()
+      .version("3.1.0")
+      .intConf
+      .checkValue(_ >= 0, "needs to be a non-negative value")
+      .createWithDefault(5)
+
   private[spark] val PUSH_BASED_SHUFFLE_ENABLED =
     ConfigBuilder("spark.shuffle.push.enabled")
       .doc("Set to 'true' to enable push-based shuffle on the client side and this works in " +
diff --git a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
index 31049d104e63d..1326ae3c11a06 100644
--- a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
@@ -28,6 +28,7 @@ import scala.collection.immutable
 import scala.collection.mutable.{ArrayBuffer, Map}
 import scala.concurrent.duration._
 
+import com.google.common.cache.{CacheBuilder, CacheLoader}
 import org.mockito.ArgumentCaptor
 import org.mockito.ArgumentMatchers.{any, eq => meq}
 import org.mockito.Mockito.{inOrder, verify, when}
@@ -43,7 +44,7 @@ import org.apache.spark.TaskState.TaskState
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.UI._
-import org.apache.spark.memory.TestMemoryManager
+import org.apache.spark.memory.{SparkOutOfMemoryError, TestMemoryManager}
 import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.rdd.RDD
 import org.apache.spark.resource.ResourceInformation
@@ -52,7 +53,7 @@ import org.apache.spark.scheduler.{DirectTaskResult, FakeTask, ResultTask, Task,
 import org.apache.spark.serializer.{JavaSerializer, SerializerInstance, SerializerManager}
 import org.apache.spark.shuffle.FetchFailedException
 import org.apache.spark.storage.{BlockManager, BlockManagerId}
-import org.apache.spark.util.{LongAccumulator, UninterruptibleThread}
+import org.apache.spark.util.{LongAccumulator, ThreadUtils, UninterruptibleThread}
 
 class ExecutorSuite extends SparkFunSuite
     with LocalSparkContext with MockitoSugar with Eventually with PrivateMethodTester {
@@ -402,6 +403,74 @@ class ExecutorSuite extends SparkFunSuite
     assert(taskMetrics.getMetricValue("JVMHeapMemory") > 0)
   }
 
+  test("SPARK-33587: isFatalError") {
+    def errorInThreadPool(e: => Throwable): Throwable = {
+      intercept[Throwable] {
+        val taskPool = ThreadUtils.newDaemonFixedThreadPool(1, "test")
+        try {
+          val f = taskPool.submit(new java.util.concurrent.Callable[String] {
+            override def call(): String = throw e
+          })
+          f.get()
+        } finally {
+          taskPool.shutdown()
+        }
+      }
+    }
+
+    def errorInGuavaCache(e: => Throwable): Throwable = {
+      val cache = CacheBuilder.newBuilder()
+        .build(new CacheLoader[String, String] {
+          override def load(key: String): String = throw e
+        })
+      intercept[Throwable] {
+        cache.get("test")
+      }
+    }
+
+    def testThrowable(
+        e: => Throwable,
+        depthToCheck: Int,
+        isFatal: Boolean): Unit = {
+      import Executor.isFatalError
+      // `e`'s depth is 1 so `depthToCheck` needs to be at least 3 to detect fatal errors.
+      assert(isFatalError(e, depthToCheck) == (depthToCheck >= 1 && isFatal))
+      // `e`'s depth is 2 so `depthToCheck` needs to be at least 3 to detect fatal errors.
+      assert(isFatalError(errorInThreadPool(e), depthToCheck) == (depthToCheck >= 2 && isFatal))
+      assert(isFatalError(errorInGuavaCache(e), depthToCheck) == (depthToCheck >= 2 && isFatal))
+      assert(isFatalError(
+        new SparkException("foo", e),
+        depthToCheck) == (depthToCheck >= 2 && isFatal))
+      // `e`'s depth is 3 so `depthToCheck` needs to be at least 3 to detect fatal errors.
+      assert(isFatalError(
+        errorInThreadPool(errorInGuavaCache(e)),
+        depthToCheck) == (depthToCheck >= 3 && isFatal))
+      assert(isFatalError(
+        errorInGuavaCache(errorInThreadPool(e)),
+        depthToCheck) == (depthToCheck >= 3 && isFatal))
+      assert(isFatalError(
+        new SparkException("foo", new SparkException("foo", e)),
+        depthToCheck) == (depthToCheck >= 3 && isFatal))
+    }
+
+    for (depthToCheck <- 0 to 5) {
+      testThrowable(new OutOfMemoryError(), depthToCheck, isFatal = true)
+      testThrowable(new InterruptedException(), depthToCheck, isFatal = false)
+      testThrowable(new RuntimeException("test"), depthToCheck, isFatal = false)
+      testThrowable(new SparkOutOfMemoryError("test"), depthToCheck, isFatal = false)
+    }
+
+    // Verify we can handle the cycle in the exception chain
+    val e1 = new Exception("test1")
+    val e2 = new Exception("test2")
+    e1.initCause(e2)
+    e2.initCause(e1)
+    for (depthToCheck <- 0 to 5) {
+      testThrowable(e1, depthToCheck, isFatal = false)
+      testThrowable(e2, depthToCheck, isFatal = false)
+    }
+  }
+
   private def createMockEnv(conf: SparkConf, serializer: JavaSerializer): SparkEnv = {
     val mockEnv = mock[SparkEnv]
     val mockRpcEnv = mock[RpcEnv]

From 0054fc937f804660c6501d9d3f6319f3047a68f8 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Sun, 29 Nov 2020 12:10:16 -0800
Subject: [PATCH 0596/1009] [SPARK-33588][SQL] Respect the
 `spark.sql.caseSensitive` config while resolving partition spec in v1 `SHOW
 TABLE EXTENDED`

### What changes were proposed in this pull request?
Perform partition spec normalization in `ShowTablesCommand` according to the table schema before getting partitions from the catalog. The normalization via `PartitioningUtils.normalizePartitionSpec()` adjusts the column names in partition specification, w.r.t. the real partition column names and case sensitivity.

### Why are the changes needed?
Even when `spark.sql.caseSensitive` is `false` which is the default value, v1 `SHOW TABLE EXTENDED` is case sensitive:
```sql
spark-sql> CREATE TABLE tbl1 (price int, qty int, year int, month int)
         > USING parquet
         > partitioned by (year, month);
spark-sql> INSERT INTO tbl1 PARTITION(year = 2015, month = 1) SELECT 1, 1;
spark-sql> SHOW TABLE EXTENDED LIKE 'tbl1' PARTITION(YEAR = 2015, Month = 1);
Error in query: Partition spec is invalid. The spec (YEAR, Month) must match the partition spec (year, month) defined in table '`default`.`tbl1`';
```

### Does this PR introduce _any_ user-facing change?
Yes. After the changes, the `SHOW TABLE EXTENDED` command respects the SQL config. And for example above, it returns correct result:
```sql
spark-sql> SHOW TABLE EXTENDED LIKE 'tbl1' PARTITION(YEAR = 2015, Month = 1);
default	tbl1	false	Partition Values: [year=2015, month=1]
Location: file:/Users/maximgekk/spark-warehouse/tbl1/year=2015/month=1
Serde Library: org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe
InputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat
Storage Properties: [serialization.format=1, path=file:/Users/maximgekk/spark-warehouse/tbl1]
Partition Parameters: {transient_lastDdlTime=1606595118, totalSize=623, numFiles=1}
Created Time: Sat Nov 28 23:25:18 MSK 2020
Last Access: UNKNOWN
Partition Statistics: 623 bytes
```

### How was this patch tested?
By running the modified test suite `v1/ShowTablesSuite`

Closes #30529 from MaxGekk/show-table-case-sensitive-spec.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/execution/command/tables.scala  | 17 ++++++++-----
 .../sql-tests/results/show-tables.sql.out     |  2 +-
 .../command/v1/ShowTablesSuite.scala          | 25 +++++++++++++++++++
 3 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index bd238948aab02..9e3ca3c321a54 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -878,12 +878,17 @@ case class ShowTablesCommand(
       //
       // Note: tableIdentifierPattern should be non-empty, otherwise a [[ParseException]]
       // should have been thrown by the sql parser.
-      val tableIdent = TableIdentifier(tableIdentifierPattern.get, Some(db))
-      val table = catalog.getTableMetadata(tableIdent).identifier
-      val partition = catalog.getPartition(tableIdent, partitionSpec.get)
-      val database = table.database.getOrElse("")
-      val tableName = table.table
-      val isTemp = catalog.isTemporaryTable(table)
+      val table = catalog.getTableMetadata(TableIdentifier(tableIdentifierPattern.get, Some(db)))
+      val tableIdent = table.identifier
+      val normalizedSpec = PartitioningUtils.normalizePartitionSpec(
+        partitionSpec.get,
+        table.partitionColumnNames,
+        tableIdent.quotedString,
+        sparkSession.sessionState.conf.resolver)
+      val partition = catalog.getPartition(tableIdent, normalizedSpec)
+      val database = tableIdent.database.getOrElse("")
+      val tableName = tableIdent.table
+      val isTemp = catalog.isTemporaryTable(tableIdent)
       val information = partition.simpleString
       Seq(Row(database, tableName, isTemp, s"$information\n"))
     }
diff --git a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
index a95b02c7f7743..60c5e6d5642b7 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
@@ -224,7 +224,7 @@ SHOW TABLE EXTENDED LIKE 'show_t1' PARTITION(a='Us', d=1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Partition spec is invalid. The spec (a, d) must match the partition spec (c, d) defined in table '`showdb`.`show_t1`';
+a is not a valid partition column in table `showdb`.`show_t1`.;
 
 
 -- !query
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
index 5bbc6c6285193..8f29f9f276138 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.command.v1
 import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.execution.command
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{BooleanType, StringType, StructType}
 
@@ -84,6 +85,30 @@ trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase {
       result.foreach { case Row(_, _, _, info: String) => assert(info.nonEmpty) }
     }
   }
+
+  test("case sensitivity of partition spec") {
+    withNamespace(s"$catalog.ns") {
+      sql(s"CREATE NAMESPACE $catalog.ns")
+      val t = s"$catalog.ns.part_table"
+      withTable(t) {
+        sql(s"""
+          |CREATE TABLE $t (price int, qty int, year int, month int)
+          |$defaultUsing
+          |partitioned by (year, month)""".stripMargin)
+        sql(s"INSERT INTO $t PARTITION(year = 2015, month = 1) SELECT 1, 1")
+        Seq(
+          true -> "PARTITION(year = 2015, month = 1)",
+          false -> "PARTITION(YEAR = 2015, Month = 1)"
+        ).foreach { case (caseSensitive, partitionSpec) =>
+          withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+            val df = sql(s"SHOW TABLE EXTENDED LIKE 'part_table' $partitionSpec")
+            val information = df.select("information").first().getString(0)
+            assert(information.contains("Partition Values: [year=2015, month=1]"))
+          }
+        }
+      }
+    }
+  }
 }
 
 class ShowTablesSuite extends ShowTablesSuiteBase with SharedSparkSession

From a088a801ed8c17171545c196a3f26ce415de0cd1 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Sun, 29 Nov 2020 12:18:07 -0800
Subject: [PATCH 0597/1009] [SPARK-33585][SQL][DOCS] Fix the comment for
 `SQLContext.tables()` and mention the `database` column

### What changes were proposed in this pull request?
Change the comments for `SQLContext.tables()` to "The returned DataFrame has three columns, database, tableName and isTemporary".

### Why are the changes needed?
Currently, the comment mentions only 2 columns but `tables()` returns 3 columns actually:
```scala
scala> spark.range(10).createOrReplaceTempView("view1")
scala> val tables = spark.sqlContext.tables()
tables: org.apache.spark.sql.DataFrame = [database: string, tableName: string ... 1 more field]

scala> tables.printSchema
root
 |-- database: string (nullable = false)
 |-- tableName: string (nullable = false)
 |-- isTemporary: boolean (nullable = false)

scala> tables.show
+--------+---------+-----------+
|database|tableName|isTemporary|
+--------+---------+-----------+
| default|       t1|      false|
| default|       t2|      false|
| default|      ymd|      false|
|        |    view1|       true|
+--------+---------+-----------+
```

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running `./dev/scalastyle`

Closes #30526 from MaxGekk/sqlcontext-tables-doc.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 7cf0b6bb70364..dd237962110ef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -661,7 +661,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
 
   /**
    * Returns a `DataFrame` containing names of existing tables in the current database.
-   * The returned DataFrame has two columns, tableName and isTemporary (a Boolean
+   * The returned DataFrame has three columns, database, tableName and isTemporary (a Boolean
    * indicating if a table is a temporary one or not).
    *
    * @group ddl_ops
@@ -673,7 +673,7 @@ class SQLContext private[sql](val sparkSession: SparkSession)
 
   /**
    * Returns a `DataFrame` containing names of existing tables in the given database.
-   * The returned DataFrame has two columns, tableName and isTemporary (a Boolean
+   * The returned DataFrame has three columns, database, tableName and isTemporary (a Boolean
    * indicating if a table is a temporary one or not).
    *
    * @group ddl_ops

From 3d54774fb9cbf674580851aa2323991c7e462a1e Mon Sep 17 00:00:00 2001
From: liucht <liucht@inspur.com>
Date: Mon, 30 Nov 2020 10:03:18 +0900
Subject: [PATCH 0598/1009] [SPARK-33517][SQL][DOCS] Fix the correct menu items
 and page links in PySpark Usage Guide for Pandas with Apache Arrow
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

Change "Apache Arrow in Spark" to "Apache Arrow in PySpark"
and the link to “/sql-pyspark-pandas-with-arrow.html#apache-arrow-in-pyspark”

### Why are the changes needed?
When I click on the menu item it doesn't point to the correct page, and from the parent menu I can infer that the correct menu item name and link should be "Apache Arrow in PySpark".
like this:
 image
![image](https://user-images.githubusercontent.com/28332082/99954725-2b64e200-2dbe-11eb-9576-cf6a3d758980.png)

### Does this PR introduce any user-facing change?
Yes, clicking on the menu item will take you to the correct guide page

### How was this patch tested?
Manually build the doc. This can be verified as below:

cd docs
SKIP_API=1 jekyll build
open _site/sql-pyspark-pandas-with-arrow.html

Closes #30466 from liucht-inspur/master.

Authored-by: liucht <liucht@inspur.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/_data/menu-sql.yaml | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/docs/_data/menu-sql.yaml b/docs/_data/menu-sql.yaml
index ec0b404fe672f..cda2a1a5139a1 100644
--- a/docs/_data/menu-sql.yaml
+++ b/docs/_data/menu-sql.yaml
@@ -64,17 +64,6 @@
       url: sql-distributed-sql-engine.html#running-the-spark-sql-cli
 - text: PySpark Usage Guide for Pandas with Apache Arrow
   url: sql-pyspark-pandas-with-arrow.html
-  subitems:
-    - text: Apache Arrow in Spark
-      url: sql-pyspark-pandas-with-arrow.html#apache-arrow-in-spark
-    - text: "Enabling for Conversion to/from Pandas"
-      url: sql-pyspark-pandas-with-arrow.html#enabling-for-conversion-tofrom-pandas
-    - text: "Pandas UDFs (a.k.a. Vectorized UDFs)"
-      url: sql-pyspark-pandas-with-arrow.html#pandas-udfs-aka-vectorized-udfs
-    - text: "Pandas Function APIs"
-      url: sql-pyspark-pandas-with-arrow.html#pandas-function-apis
-    - text: Usage Notes
-      url: sql-pyspark-pandas-with-arrow.html#usage-notes
 - text: Migration Guide
   url: sql-migration-old.html
 - text: SQL Reference

From f93d4395b25ea546cebb1ff16879dea696a217b5 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Mon, 30 Nov 2020 11:21:02 +0900
Subject: [PATCH 0599/1009] [SPARK-33589][SQL] Close opened session if the
 initialization fails

### What changes were proposed in this pull request?

This pr add try catch when opening session.

### Why are the changes needed?

Close opened session if the initialization fails.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manual test.

Before this pr:

```
[rootspark-3267648 spark]#  bin/beeline -u jdbc:hive2://localhost:10000/db_not_exist
NOTE: SPARK_PREPEND_CLASSES is set, placing locally compiled Spark classes ahead of assembly.
Connecting to jdbc:hive2://localhost:10000/db_not_exist
log4j:WARN No appenders could be found for logger (org.apache.hive.jdbc.Utils).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
Error: Could not open client transport with JDBC Uri: jdbc:hive2://localhost:10000/db_not_exist: Database 'db_not_exist' not found; (state=08S01,code=0)
Beeline version 2.3.7 by Apache Hive
beeline>
```
![image](https://user-images.githubusercontent.com/5399861/100560975-73ba5d80-32f2-11eb-8f92-b2509e7a121f.png)

After this pr:
```
[rootspark-3267648 spark]#  bin/beeline -u jdbc:hive2://localhost:10000/db_not_exist
NOTE: SPARK_PREPEND_CLASSES is set, placing locally compiled Spark classes ahead of assembly.
log4j:WARN No appenders could be found for logger (org.apache.hadoop.util.Shell).
log4j:WARN Please initialize the log4j system properly.
log4j:WARN See http://logging.apache.org/log4j/1.2/faq.html#noconfig for more info.
Connecting to jdbc:hive2://localhost:10000/db_not_exist
Error: Could not open client transport with JDBC Uri: jdbc:hive2://localhost:10000/db_not_exist: Failed to open new session: org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException: Database 'db_not_exist' not found; (state=08S01,code=0)
Beeline version 2.3.7 by Apache Hive
beeline>
```
![image](https://user-images.githubusercontent.com/5399861/100560917-479edc80-32f2-11eb-986f-7a997f1163fc.png)

Closes #30536 from wangyum/SPARK-33589.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../thriftserver/SparkSQLSessionManager.scala | 50 ++++++++++++-------
 1 file changed, 31 insertions(+), 19 deletions(-)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
index 856edede0b85f..0c092abb37f3e 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
@@ -18,11 +18,12 @@
 package org.apache.spark.sql.hive.thriftserver
 
 import org.apache.hadoop.hive.conf.HiveConf
-import org.apache.hive.service.cli.SessionHandle
+import org.apache.hive.service.cli.{HiveSQLException, SessionHandle}
 import org.apache.hive.service.cli.session.SessionManager
 import org.apache.hive.service.rpc.thrift.TProtocolVersion
 import org.apache.hive.service.server.HiveServer2
 
+import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._
@@ -32,7 +33,7 @@ import org.apache.spark.sql.internal.SQLConf
 
 private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext: SQLContext)
   extends SessionManager(hiveServer)
-  with ReflectedCompositeService {
+  with ReflectedCompositeService with Logging {
 
   private lazy val sparkSqlOperationManager = new SparkSQLOperationManager()
 
@@ -52,24 +53,35 @@ private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext:
     val sessionHandle =
       super.openSession(protocol, username, passwd, ipAddress, sessionConf, withImpersonation,
           delegationToken)
-    val session = super.getSession(sessionHandle)
-    HiveThriftServer2.eventManager.onSessionCreated(
-      session.getIpAddress, sessionHandle.getSessionId.toString, session.getUsername)
-    val ctx = if (sqlContext.conf.hiveThriftServerSingleSession) {
-      sqlContext
-    } else {
-      sqlContext.newSession()
+    try {
+      val session = super.getSession(sessionHandle)
+      HiveThriftServer2.eventManager.onSessionCreated(
+        session.getIpAddress, sessionHandle.getSessionId.toString, session.getUsername)
+      val ctx = if (sqlContext.conf.hiveThriftServerSingleSession) {
+        sqlContext
+      } else {
+        sqlContext.newSession()
+      }
+      ctx.setConf(HiveUtils.FAKE_HIVE_VERSION.key, HiveUtils.builtinHiveVersion)
+      ctx.setConf(SQLConf.DATETIME_JAVA8API_ENABLED, true)
+      val hiveSessionState = session.getSessionState
+      setConfMap(ctx, hiveSessionState.getOverriddenConfigurations)
+      setConfMap(ctx, hiveSessionState.getHiveVariables)
+      if (sessionConf != null && sessionConf.containsKey("use:database")) {
+        ctx.sql(s"use ${sessionConf.get("use:database")}")
+      }
+      sparkSqlOperationManager.sessionToContexts.put(sessionHandle, ctx)
+      sessionHandle
+    } catch {
+      case e: Exception =>
+        try {
+          closeSession(sessionHandle)
+        } catch {
+          case t: Throwable =>
+            logWarning("Error closing session", t)
+        }
+        throw new HiveSQLException("Failed to open new session: " + e, e)
     }
-    ctx.setConf(HiveUtils.FAKE_HIVE_VERSION.key, HiveUtils.builtinHiveVersion)
-    ctx.setConf(SQLConf.DATETIME_JAVA8API_ENABLED, true)
-    val hiveSessionState = session.getSessionState
-    setConfMap(ctx, hiveSessionState.getOverriddenConfigurations)
-    setConfMap(ctx, hiveSessionState.getHiveVariables)
-    if (sessionConf != null && sessionConf.containsKey("use:database")) {
-      ctx.sql(s"use ${sessionConf.get("use:database")}")
-    }
-    sparkSqlOperationManager.sessionToContexts.put(sessionHandle, ctx)
-    sessionHandle
   }
 
   override def closeSession(sessionHandle: SessionHandle): Unit = {

From a5e13acd19871831a93a5bdcbc99a9eb9f1aba07 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Mon, 30 Nov 2020 11:24:15 +0900
Subject: [PATCH 0600/1009] [SPARK-33582][SQL] Hive Metastore support filter by
 not-equals

### What changes were proposed in this pull request?

This pr make partition predicate pushdown into Hive metastore support not-equals operator.

Hive related changes:
https://github.com/apache/hive/blob/b8bd4594bef718b1eeac9fceb437d7df7b480ed1/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java#L2194-L2207
https://issues.apache.org/jira/browse/HIVE-2702

### Why are the changes needed?

Improve query performance.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #30534 from wangyum/SPARK-33582.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../spark/sql/hive/client/HiveShim.scala      |  8 ++++++++
 .../spark/sql/hive/client/FiltersSuite.scala  |  8 ++++++++
 .../client/HivePartitionFilteringSuite.scala  | 20 +++++++++++++++++++
 3 files changed, 36 insertions(+)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index 17a64a67df283..ed088648bc20a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -812,6 +812,14 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
           right <- convert(expr2)
         } yield s"($left or $right)"
 
+      case Not(EqualTo(
+          ExtractAttribute(SupportedAttribute(name)), ExtractableLiteral(value))) if useAdvanced =>
+        Some(s"$name != $value")
+
+      case Not(EqualTo(
+          ExtractableLiteral(value), ExtractAttribute(SupportedAttribute(name)))) if useAdvanced =>
+        Some(s"$value != $name")
+
       case _ => None
     }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
index 6c0531182e6d6..12ed0e5305299 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
@@ -100,6 +100,14 @@ class FiltersSuite extends SparkFunSuite with Logging with PlanTest {
     (a("intcol", IntegerType) in (Literal(1), Literal(null))) :: Nil,
     "(intcol = 1)")
 
+  filterTest("NOT: int and string filters",
+    (a("intcol", IntegerType) =!= Literal(1)) :: (Literal("a") =!= a("strcol", IntegerType)) :: Nil,
+    """intcol != 1 and "a" != strcol""")
+
+  filterTest("NOT: date filter",
+    (a("datecol", DateType) =!= Literal(Date.valueOf("2019-01-01"))) :: Nil,
+    "datecol != 2019-01-01")
+
   // Applying the predicate `x IN (NULL)` should return an empty set, but since this optimization
   // will be applied by Catalyst, this filter converter does not need to account for this.
   filterTest("SPARK-24879 IN predicates with only NULLs will not cause a NPE",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
index e07fbc29ee8aa..dc56e6bc4da81 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
@@ -352,6 +352,26 @@ class HivePartitionFilteringSuite(version: String)
       dateStrValue)
   }
 
+  test("getPartitionsByFilter: ds<>20170101") {
+    testMetastorePartitionFiltering(
+      attr("ds") =!= 20170101,
+      20170102 to 20170103,
+      hValue,
+      chunkValue,
+      dateValue,
+      dateStrValue)
+  }
+
+  test("getPartitionsByFilter: h<>0 and chunk<>ab and d<>2019-01-01") {
+    testMetastorePartitionFiltering(
+      attr("h") =!= 0 && attr("chunk") =!= "ab" && attr("d") =!= Date.valueOf("2019-01-01"),
+      dsValue,
+      1 to 4,
+      Seq("aa", "ba", "bb"),
+      Seq("2019-01-02", "2019-01-03"),
+      dateStrValue)
+  }
+
   test("getPartitionsByFilter: d=2019-01-01") {
     testMetastorePartitionFiltering(
       attr("d") === Date.valueOf("2019-01-01"),

From feda7299e3d8ebe665b8fae0328f22a4927c66da Mon Sep 17 00:00:00 2001
From: Chao Sun <sunchao@apple.com>
Date: Mon, 30 Nov 2020 04:50:50 +0000
Subject: [PATCH 0601/1009] [SPARK-33567][SQL] DSv2: Use callback instead of
 passing Spark session and v2 relation for refreshing cache

### What changes were proposed in this pull request?

This replaces Spark session and `DataSourceV2Relation` in V2 write plans by replacing them with a callback `afterWrite`.

### Why are the changes needed?

Per discussion in #30429, it's better to not pass Spark session and `DataSourceV2Relation` through Spark plans. Instead we can use a callback which makes the interface cleaner.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

N/A

Closes #30491 from sunchao/SPARK-33492-followup.

Authored-by: Chao Sun <sunchao@apple.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../datasources/v2/DataSourceV2Strategy.scala | 26 +++++++++++++------
 .../datasources/v2/DropTableExec.scala        | 11 +++-----
 .../datasources/v2/RefreshTableExec.scala     | 11 +++-----
 .../datasources/v2/V1FallbackWriters.scala    | 15 ++++++-----
 .../v2/WriteToDataSourceV2Exec.scala          | 21 +++++++--------
 5 files changed, 43 insertions(+), 41 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index eb0d7010041b9..1fae8d937e90c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -52,6 +52,15 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
     }
   }
 
+  private def refreshCache(r: DataSourceV2Relation)(): Unit = {
+    session.sharedState.cacheManager.recacheByPlan(session, r)
+  }
+
+  private def invalidateCache(r: ResolvedTable)(): Unit = {
+    val v2Relation = DataSourceV2Relation.create(r.table, Some(r.catalog), Some(r.identifier))
+    session.sharedState.cacheManager.uncacheQuery(session, v2Relation, cascade = true)
+  }
+
   override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
     case PhysicalOperation(project, filters,
         relation @ DataSourceV2ScanRelation(_, V1ScanWrapper(scan, translated, pushed), output)) =>
@@ -128,7 +137,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       }
 
     case RefreshTable(r: ResolvedTable) =>
-      RefreshTableExec(session, r.catalog, r.table, r.identifier) :: Nil
+      RefreshTableExec(r.catalog, r.identifier, invalidateCache(r)) :: Nil
 
     case ReplaceTable(catalog, ident, schema, parts, props, orCreate) =>
       val propsWithOwner = CatalogV2Util.withDefaultOwnership(props)
@@ -172,9 +181,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
     case AppendData(r: DataSourceV2Relation, query, writeOptions, _) =>
       r.table.asWritable match {
         case v1 if v1.supports(TableCapability.V1_BATCH_WRITE) =>
-          AppendDataExecV1(v1, writeOptions.asOptions, query, r) :: Nil
+          AppendDataExecV1(v1, writeOptions.asOptions, query, refreshCache(r)) :: Nil
         case v2 =>
-          AppendDataExec(session, v2, r, writeOptions.asOptions, planLater(query)) :: Nil
+          AppendDataExec(v2, writeOptions.asOptions, planLater(query), refreshCache(r)) :: Nil
       }
 
     case OverwriteByExpression(r: DataSourceV2Relation, deleteExpr, query, writeOptions, _) =>
@@ -186,15 +195,16 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       }.toArray
       r.table.asWritable match {
         case v1 if v1.supports(TableCapability.V1_BATCH_WRITE) =>
-          OverwriteByExpressionExecV1(v1, filters, writeOptions.asOptions, query, r) :: Nil
+          OverwriteByExpressionExecV1(v1, filters, writeOptions.asOptions,
+            query, refreshCache(r)) :: Nil
         case v2 =>
-          OverwriteByExpressionExec(session, v2, r, filters,
-            writeOptions.asOptions, planLater(query)) :: Nil
+          OverwriteByExpressionExec(v2, filters,
+            writeOptions.asOptions, planLater(query), refreshCache(r)) :: Nil
       }
 
     case OverwritePartitionsDynamic(r: DataSourceV2Relation, query, writeOptions, _) =>
       OverwritePartitionsDynamicExec(
-        session, r.table.asWritable, r, writeOptions.asOptions, planLater(query)) :: Nil
+        r.table.asWritable, writeOptions.asOptions, planLater(query), refreshCache(r)) :: Nil
 
     case DeleteFromTable(relation, condition) =>
       relation match {
@@ -232,7 +242,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       throw new AnalysisException("Describing columns is not supported for v2 tables.")
 
     case DropTable(r: ResolvedTable, ifExists, purge) =>
-      DropTableExec(session, r.catalog, r.table, r.identifier, ifExists, purge) :: Nil
+      DropTableExec(r.catalog, r.identifier, ifExists, purge, invalidateCache(r)) :: Nil
 
     case _: NoopDropTable =>
       LocalTableScanExec(Nil, Nil) :: Nil
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala
index 068475fc56f47..f89b89096772a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala
@@ -17,27 +17,24 @@
 
 package org.apache.spark.sql.execution.datasources.v2
 
-import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog}
+import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
 
 /**
  * Physical plan node for dropping a table.
  */
 case class DropTableExec(
-    session: SparkSession,
     catalog: TableCatalog,
-    table: Table,
     ident: Identifier,
     ifExists: Boolean,
-    purge: Boolean) extends V2CommandExec {
+    purge: Boolean,
+    invalidateCache: () => Unit) extends V2CommandExec {
 
   override def run(): Seq[InternalRow] = {
     if (catalog.tableExists(ident)) {
-      val v2Relation = DataSourceV2Relation.create(table, Some(catalog), Some(ident))
-      session.sharedState.cacheManager.uncacheQuery(session, v2Relation, cascade = true)
+      invalidateCache()
       catalog.dropTable(ident, purge)
     } else if (!ifExists) {
       throw new NoSuchTableException(ident)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RefreshTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RefreshTableExec.scala
index 52836de5a926b..994583c1e338f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RefreshTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RefreshTableExec.scala
@@ -17,23 +17,20 @@
 
 package org.apache.spark.sql.execution.datasources.v2
 
-import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog}
+import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
 
 case class RefreshTableExec(
-    session: SparkSession,
     catalog: TableCatalog,
-    table: Table,
-    ident: Identifier) extends V2CommandExec {
+    ident: Identifier,
+    invalidateCache: () => Unit) extends V2CommandExec {
   override protected def run(): Seq[InternalRow] = {
     catalog.invalidateTable(ident)
 
     // invalidate all caches referencing the given table
     // TODO(SPARK-33437): re-cache the table itself once we support caching a DSv2 table
-    val v2Relation = DataSourceV2Relation.create(table, Some(catalog), Some(ident))
-    session.sharedState.cacheManager.uncacheQuery(session, v2Relation, cascade = true)
+    invalidateCache()
 
     Seq.empty
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala
index af7721588edeb..9d2cea9fbaff3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala
@@ -38,10 +38,10 @@ case class AppendDataExecV1(
     table: SupportsWrite,
     writeOptions: CaseInsensitiveStringMap,
     plan: LogicalPlan,
-    v2Relation: DataSourceV2Relation) extends V1FallbackWriters {
+    refreshCache: () => Unit) extends V1FallbackWriters {
 
   override protected def run(): Seq[InternalRow] = {
-    writeWithV1(newWriteBuilder().buildForV1Write(), Some(v2Relation))
+    writeWithV1(newWriteBuilder().buildForV1Write(), refreshCache = refreshCache)
   }
 }
 
@@ -61,7 +61,7 @@ case class OverwriteByExpressionExecV1(
     deleteWhere: Array[Filter],
     writeOptions: CaseInsensitiveStringMap,
     plan: LogicalPlan,
-    v2Relation: DataSourceV2Relation) extends V1FallbackWriters {
+    refreshCache: () => Unit) extends V1FallbackWriters {
 
   private def isTruncate(filters: Array[Filter]): Boolean = {
     filters.length == 1 && filters(0).isInstanceOf[AlwaysTrue]
@@ -70,10 +70,11 @@ case class OverwriteByExpressionExecV1(
   override protected def run(): Seq[InternalRow] = {
     newWriteBuilder() match {
       case builder: SupportsTruncate if isTruncate(deleteWhere) =>
-        writeWithV1(builder.truncate().asV1Builder.buildForV1Write(), Some(v2Relation))
+        writeWithV1(builder.truncate().asV1Builder.buildForV1Write(), refreshCache = refreshCache)
 
       case builder: SupportsOverwrite =>
-        writeWithV1(builder.overwrite(deleteWhere).asV1Builder.buildForV1Write(), Some(v2Relation))
+        writeWithV1(builder.overwrite(deleteWhere).asV1Builder.buildForV1Write(),
+          refreshCache = refreshCache)
 
       case _ =>
         throw new SparkException(s"Table does not support overwrite by expression: $table")
@@ -116,11 +117,11 @@ trait SupportsV1Write extends SparkPlan {
 
   protected def writeWithV1(
       relation: InsertableRelation,
-      v2Relation: Option[DataSourceV2Relation] = None): Seq[InternalRow] = {
+      refreshCache: () => Unit = () => ()): Seq[InternalRow] = {
     val session = sqlContext.sparkSession
     // The `plan` is already optimized, we should not analyze and optimize it again.
     relation.insert(AlreadyOptimized.dataFrame(session, plan), overwrite = false)
-    v2Relation.foreach(r => session.sharedState.cacheManager.recacheByPlan(session, r))
+    refreshCache()
 
     Nil
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
index 1648134d0a1b2..47aad2bcb2c56 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
@@ -213,15 +213,14 @@ case class AtomicReplaceTableAsSelectExec(
  * Rows in the output data set are appended.
  */
 case class AppendDataExec(
-    session: SparkSession,
     table: SupportsWrite,
-    relation: DataSourceV2Relation,
     writeOptions: CaseInsensitiveStringMap,
-    query: SparkPlan) extends V2TableWriteExec with BatchWriteHelper {
+    query: SparkPlan,
+    refreshCache: () => Unit) extends V2TableWriteExec with BatchWriteHelper {
 
   override protected def run(): Seq[InternalRow] = {
     val writtenRows = writeWithV2(newWriteBuilder().buildForBatch())
-    session.sharedState.cacheManager.recacheByPlan(session, relation)
+    refreshCache()
     writtenRows
   }
 }
@@ -237,12 +236,11 @@ case class AppendDataExec(
  * AlwaysTrue to delete all rows.
  */
 case class OverwriteByExpressionExec(
-    session: SparkSession,
     table: SupportsWrite,
-    relation: DataSourceV2Relation,
     deleteWhere: Array[Filter],
     writeOptions: CaseInsensitiveStringMap,
-    query: SparkPlan) extends V2TableWriteExec with BatchWriteHelper {
+    query: SparkPlan,
+    refreshCache: () => Unit) extends V2TableWriteExec with BatchWriteHelper {
 
   private def isTruncate(filters: Array[Filter]): Boolean = {
     filters.length == 1 && filters(0).isInstanceOf[AlwaysTrue]
@@ -259,7 +257,7 @@ case class OverwriteByExpressionExec(
       case _ =>
         throw new SparkException(s"Table does not support overwrite by expression: $table")
     }
-    session.sharedState.cacheManager.recacheByPlan(session, relation)
+    refreshCache()
     writtenRows
   }
 }
@@ -275,11 +273,10 @@ case class OverwriteByExpressionExec(
  * are not modified.
  */
 case class OverwritePartitionsDynamicExec(
-    session: SparkSession,
     table: SupportsWrite,
-    relation: DataSourceV2Relation,
     writeOptions: CaseInsensitiveStringMap,
-    query: SparkPlan) extends V2TableWriteExec with BatchWriteHelper {
+    query: SparkPlan,
+    refreshCache: () => Unit) extends V2TableWriteExec with BatchWriteHelper {
 
   override protected def run(): Seq[InternalRow] = {
     val writtenRows = newWriteBuilder() match {
@@ -289,7 +286,7 @@ case class OverwritePartitionsDynamicExec(
       case _ =>
         throw new SparkException(s"Table does not support dynamic partition overwrite: $table")
     }
-    session.sharedState.cacheManager.recacheByPlan(session, relation)
+    refreshCache()
     writtenRows
   }
 }

From 485145326a9c97ede260b0e267ee116f182cfd56 Mon Sep 17 00:00:00 2001
From: Josh Soref <jsoref@users.noreply.github.com>
Date: Mon, 30 Nov 2020 13:59:51 +0900
Subject: [PATCH 0602/1009] [MINOR] Spelling bin core docs external mllib repl

### What changes were proposed in this pull request?

This PR intends to fix typos in the sub-modules:
* `bin`
* `core`
* `docs`
* `external`
* `mllib`
* `repl`
* `pom.xml`

Split per srowen https://github.com/apache/spark/pull/30323#issuecomment-728981618

NOTE: The misspellings have been reported at https://github.com/jsoref/spark/commit/706a726f87a0bbf5e31467fae9015218773db85b#commitcomment-44064356

### Why are the changes needed?

Misspelled words make it harder to read / understand content.

### Does this PR introduce _any_ user-facing change?

There are various fixes to documentation, etc...

### How was this patch tested?

No testing was performed

Closes #30530 from jsoref/spelling-bin-core-docs-external-mllib-repl.

Authored-by: Josh Soref <jsoref@users.noreply.github.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 bin/docker-image-tool.sh                      |   2 +-
 .../apache/spark/ui/static/spark-dag-viz.js   |   2 +-
 .../org/apache/spark/ui/static/utils.js       |   2 +-
 .../spark/ExecutorAllocationManager.scala     |   4 +-
 .../apache/spark/api/java/JavaPairRDD.scala   |   4 +-
 .../apache/spark/api/java/JavaRDDLike.scala   |   2 +-
 .../apache/spark/api/python/PythonRDD.scala   |   6 +-
 .../apache/spark/deploy/JsonProtocol.scala    |   2 +-
 .../org/apache/spark/deploy/SparkSubmit.scala |   2 +-
 .../deploy/history/FsHistoryProvider.scala    |   2 +-
 .../spark/deploy/history/HybridStore.scala    |   2 +-
 .../org/apache/spark/executor/Executor.scala  |   4 +-
 .../apache/spark/metrics/MetricsConfig.scala  |   2 +-
 .../metrics/sink/PrometheusServlet.scala      |   6 +-
 .../apache/spark/rdd/DoubleRDDFunctions.scala |   2 +-
 .../spark/rdd/OrderedRDDFunctions.scala       |   4 +-
 .../main/scala/org/apache/spark/rdd/RDD.scala |   2 +-
 .../spark/resource/TaskResourceRequest.scala  |   2 +-
 .../apache/spark/rpc/netty/NettyRpcEnv.scala  |   4 +-
 .../BarrierJobAllocationFailed.scala          |   4 +-
 .../apache/spark/scheduler/DAGScheduler.scala |   8 +-
 .../spark/scheduler/HealthTracker.scala       |   4 +-
 .../spark/scheduler/TaskSetManager.scala      |   2 +-
 .../spark/security/CryptoStreamUtils.scala    |   2 +-
 .../apache/spark/storage/BlockManager.scala   |   4 +-
 .../storage/BlockManagerMasterEndpoint.scala  |   2 +-
 .../apache/spark/ui/jobs/AllJobsPage.scala    |   2 +-
 .../org/apache/spark/ui/jobs/JobPage.scala    |   2 +-
 .../apache/spark/util/ClosureCleaner.scala    |   2 +-
 .../scala/org/apache/spark/util/Utils.scala   |  22 ++--
 .../spark/util/io/ChunkedByteBuffer.scala     |   2 +-
 .../sort/UnsafeShuffleWriterSuite.java        |  10 +-
 .../test/org/apache/spark/JavaAPISuite.java   |   2 +-
 .../org/apache/spark/CheckpointSuite.scala    |  12 +-
 .../apache/spark/ContextCleanerSuite.scala    |  10 +-
 .../ExecutorAllocationManagerSuite.scala      |   2 +-
 .../scala/org/apache/spark/FileSuite.scala    |   2 +-
 .../spark/benchmark/BenchmarkBase.scala       |   2 +-
 .../history/FsHistoryProviderSuite.scala      |   4 +-
 .../spark/deploy/master/MasterSuite.scala     |   2 +-
 .../spark/deploy/worker/WorkerSuite.scala     |   2 +-
 .../apache/spark/executor/ExecutorSuite.scala |   2 +-
 ...FileCommitProtocolInstantiationSuite.scala |   4 +-
 .../metrics/InputOutputMetricsSuite.scala     |   2 +-
 .../NettyBlockTransferServiceSuite.scala      |   2 +-
 .../spark/rdd/PairRDDFunctionsSuite.scala     |  34 +++---
 .../scala/org/apache/spark/rdd/RDDSuite.scala |   2 +-
 .../spark/resource/ResourceUtilsSuite.scala   |   2 +-
 .../spark/rpc/netty/NettyRpcEnvSuite.scala    |   2 +-
 .../spark/scheduler/DAGSchedulerSuite.scala   |   6 +-
 .../spark/scheduler/ReplayListenerSuite.scala |   2 +-
 .../scheduler/SchedulerIntegrationSuite.scala |   8 +-
 .../spark/scheduler/SparkListenerSuite.scala  |   6 +-
 .../spark/scheduler/TaskSetManagerSuite.scala |   6 +-
 .../spark/status/AppStatusListenerSuite.scala |   2 +-
 .../spark/storage/BlockManagerSuite.scala     |   4 +-
 .../apache/spark/util/JsonProtocolSuite.scala |   8 +-
 .../spark/util/SizeEstimatorSuite.scala       |   2 +-
 docs/_plugins/include_example.rb              |   4 +-
 docs/building-spark.md                        |   2 +-
 docs/configuration.md                         |   2 +-
 docs/css/main.css                             |   4 +-
 docs/graphx-programming-guide.md              |   4 +-
 docs/ml-migration-guide.md                    |   2 +-
 docs/mllib-clustering.md                      |   2 +-
 docs/mllib-data-types.md                      |   2 +-
 docs/monitoring.md                            |   6 +-
 docs/running-on-kubernetes.md                 |   4 +-
 docs/running-on-mesos.md                      |   2 +-
 docs/running-on-yarn.md                       |   2 +-
 docs/sparkr.md                                |   2 +-
 docs/sql-data-sources-jdbc.md                 |   2 +-
 docs/sql-migration-guide.md                   |   6 +-
 ...l-ref-syntax-aux-conf-mgmt-set-timezone.md |   2 +-
 ...-ref-syntax-ddl-create-table-hiveformat.md |   8 +-
 docs/sql-ref-syntax-dml-insert-into.md        | 114 +++++++++---------
 ...l-ref-syntax-dml-insert-overwrite-table.md |  52 ++++----
 docs/sql-ref-syntax-qry-select-groupby.md     |   4 +-
 .../sql-ref-syntax-qry-select-lateral-view.md |   6 +-
 docs/sql-ref-syntax-qry-select-orderby.md     |   2 +-
 .../ml/evaluation/ClusteringMetrics.scala     |   4 +-
 .../apache/spark/ml/feature/Binarizer.scala   |   6 +-
 .../apache/spark/ml/feature/Selector.scala    |   2 +-
 .../spark/ml/feature/StopWordsRemover.scala   |   6 +-
 .../apache/spark/ml/image/ImageSchema.scala   |   2 +-
 .../ml/r/AFTSurvivalRegressionWrapper.scala   |   4 +-
 .../spark/ml/regression/FMRegressor.scala     |   2 +-
 .../spark/mllib/classification/SVM.scala      |   2 +-
 .../mllib/clustering/DistanceMeasure.scala    |   6 +-
 .../spark/mllib/clustering/LDAOptimizer.scala |   2 +-
 .../mllib/clustering/StreamingKMeans.scala    |   2 +-
 .../org/apache/spark/mllib/feature/PCA.scala  |   4 +-
 .../apache/spark/mllib/feature/Word2Vec.scala |   2 +-
 .../spark/mllib/fpm/AssociationRules.scala    |   4 +-
 .../mllib/linalg/distributed/RowMatrix.scala  |   4 +-
 .../stat/test/KolmogorovSmirnovTest.scala     |   2 +-
 .../ml/feature/JavaStopWordsRemoverSuite.java |   2 +-
 .../ml/clustering/GaussianMixtureSuite.scala  |   2 +-
 .../evaluation/RegressionEvaluatorSuite.scala |   2 +-
 .../spark/ml/feature/ANOVASelectorSuite.scala |  10 +-
 .../apache/spark/ml/feature/DCTSuite.scala    |   2 +-
 .../org/apache/spark/ml/feature/LSHTest.scala |   2 +-
 .../VarianceThresholdSelectorSuite.scala      |   2 +-
 .../GeneralizedLinearRegressionSuite.scala    |   4 +-
 pom.xml                                       |   4 +-
 .../spark/repl/ExecutorClassLoaderSuite.scala |   5 +-
 106 files changed, 288 insertions(+), 289 deletions(-)

diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh
index 6d74f8328aea2..2ec1ab8861798 100755
--- a/bin/docker-image-tool.sh
+++ b/bin/docker-image-tool.sh
@@ -274,7 +274,7 @@ Examples:
   - Build and push JDK11-based image for multiple archs to docker.io/myrepo
     $0 -r docker.io/myrepo -t v3.0.0 -X -b java_image_tag=11-jre-slim build
     # Note: buildx, which does cross building, needs to do the push during build
-    # So there is no seperate push step with -X
+    # So there is no separate push step with -X
 
 EOF
 }
diff --git a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
index 474c453643365..1fc1fb4b4513b 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/spark-dag-viz.js
@@ -334,7 +334,7 @@ function preprocessGraphLayout(g, forJob) {
 }
 
 /*
- * Helper function to size the SVG appropriately such that all elements are displyed.
+ * Helper function to size the SVG appropriately such that all elements are displayed.
  * This assumes that all outermost elements are clusters (rectangles).
  */
 function resizeSvg(svg) {
diff --git a/core/src/main/resources/org/apache/spark/ui/static/utils.js b/core/src/main/resources/org/apache/spark/ui/static/utils.js
index 4cd83332cde5f..7e6dd678e2641 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/utils.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/utils.js
@@ -74,7 +74,7 @@ function getTimeZone() {
     return Intl.DateTimeFormat().resolvedOptions().timeZone;
   } catch(ex) {
     // Get time zone from a string representing the date,
-    // eg. "Thu Nov 16 2017 01:13:32 GMT+0800 (CST)" -> "CST"
+    // e.g. "Thu Nov 16 2017 01:13:32 GMT+0800 (CST)" -> "CST"
     return new Date().toString().match(/\((.*)\)/)[1];
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index e445f188e1eed..61ab63584269b 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -248,7 +248,7 @@ private[spark] class ExecutorAllocationManager(
       executor.scheduleWithFixedDelay(scheduleTask, 0, intervalMillis, TimeUnit.MILLISECONDS)
     }
 
-    // copy the maps inside synchonize to ensure not being modified
+    // copy the maps inside synchronize to ensure not being modified
     val (numExecutorsTarget, numLocalityAware) = synchronized {
       val numTarget = numExecutorsTargetPerResourceProfileId.toMap
       val numLocality = numLocalityAwareTasksPerResourceProfileId.toMap
@@ -379,7 +379,7 @@ private[spark] class ExecutorAllocationManager(
 
           // We lower the target number of executors but don't actively kill any yet.  Killing is
           // controlled separately by an idle timeout.  It's still helpful to reduce
-          // the target number in case an executor just happens to get lost (eg., bad hardware,
+          // the target number in case an executor just happens to get lost (e.g., bad hardware,
           // or the cluster manager preempts it) -- in that case, there is no point in trying
           // to immediately  get a new executor, since we wouldn't even use it yet.
           decrementExecutorsFromTarget(maxNeeded, rpId, updatesNeeded)
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
index 1bcd203f2e435..6dd36309378cc 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -941,7 +941,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
   /**
    * Return a RDD containing only the elements in the inclusive range `lower` to `upper`.
    * If the RDD has been partitioned using a `RangePartitioner`, then this operation can be
-   * performed efficiently by only scanning the partitions that might containt matching elements.
+   * performed efficiently by only scanning the partitions that might contain matching elements.
    * Otherwise, a standard `filter` is applied to all partitions.
    *
    * @since 3.1.0
@@ -955,7 +955,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
   /**
    * Return a RDD containing only the elements in the inclusive range `lower` to `upper`.
    * If the RDD has been partitioned using a `RangePartitioner`, then this operation can be
-   * performed efficiently by only scanning the partitions that might containt matching elements.
+   * performed efficiently by only scanning the partitions that might contain matching elements.
    * Otherwise, a standard `filter` is applied to all partitions.
    *
    * @since 3.1.0
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
index 89b33945dfb08..306af24ada584 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
@@ -78,7 +78,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
 
   /**
    * Internal method to this RDD; will read from cache if applicable, or otherwise compute it.
-   * This should ''not'' be called by users directly, but is available for implementors of custom
+   * This should ''not'' be called by users directly, but is available for implementers of custom
    * subclasses of RDD.
    */
   def iterator(split: Partition, taskContext: TaskContext): JIterator[T] =
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index 86a1ac31c0845..6d4dc3d3dfe92 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -48,14 +48,14 @@ import org.apache.spark.util._
 private[spark] class PythonRDD(
     parent: RDD[_],
     func: PythonFunction,
-    preservePartitoning: Boolean,
+    preservePartitioning: Boolean,
     isFromBarrier: Boolean = false)
   extends RDD[Array[Byte]](parent) {
 
   override def getPartitions: Array[Partition] = firstParent.partitions
 
   override val partitioner: Option[Partitioner] = {
-    if (preservePartitoning) firstParent.partitioner else None
+    if (preservePartitioning) firstParent.partitioner else None
   }
 
   val asJavaRDD: JavaRDD[Array[Byte]] = JavaRDD.fromRDD(this)
@@ -837,7 +837,7 @@ private[spark] class PythonBroadcast(@transient var path: String) extends Serial
  * We might be serializing a really large object from python -- we don't want
  * python to buffer the whole thing in memory, nor can it write to a file,
  * so we don't know the length in advance.  So python writes it in chunks, each chunk
- * preceeded by a length, till we get a "length" of -1 which serves as EOF.
+ * preceded by a length, till we get a "length" of -1 which serves as EOF.
  *
  * Tested from python tests.
  */
diff --git a/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala
index d76fb7f9a20b3..f697892aacc83 100644
--- a/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala
@@ -80,7 +80,7 @@ private[deploy] object JsonProtocol {
   }
 
   /**
-   * Export the [[ApplicationInfo]] to a Json objec. An [[ApplicationInfo]] consists of the
+   * Export the [[ApplicationInfo]] to a Json object. An [[ApplicationInfo]] consists of the
    * information of an application.
    *
    * @return a Json object containing the following fields:
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 7332c6d54c981..4aa393c514af6 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -311,7 +311,7 @@ private[spark] class SparkSubmit extends Logging {
         // In K8s client mode, when in the driver, add resolved jars early as we might need
         // them at the submit time for artifact downloading.
         // For example we might use the dependencies for downloading
-        // files from a Hadoop Compatible fs eg. S3. In this case the user might pass:
+        // files from a Hadoop Compatible fs e.g. S3. In this case the user might pass:
         // --packages com.amazonaws:aws-java-sdk:1.7.4:org.apache.hadoop:hadoop-aws:2.7.6
         if (isKubernetesClusterModeDriver) {
           val loader = getSubmitClassLoader(sparkConf)
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index e5341aff8ce66..e6df260bdeaa3 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -722,7 +722,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
 
   /**
    * Replay the given log file, saving the application in the listing db.
-   * Visable for testing
+   * Visible for testing
    */
   private[history] def doMergeApplicationListing(
       reader: EventLogFileReader,
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HybridStore.scala b/core/src/main/scala/org/apache/spark/deploy/history/HybridStore.scala
index 1b8c7ff26e9f5..4eb5c15d4ed18 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HybridStore.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HybridStore.scala
@@ -52,7 +52,7 @@ private[history] class HybridStore extends KVStore {
   // A background thread that dumps data from inMemoryStore to levelDB
   private var backgroundThread: Thread = null
 
-  // A hash map that stores all classes that had been writen to inMemoryStore
+  // A hash map that stores all classes that had been written to inMemoryStore
   // Visible for testing
   private[history] val klassMap = new ConcurrentHashMap[Class[_], Boolean]
 
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index efb0b2c26d9a9..c81ac778a32d1 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -685,7 +685,7 @@ private[spark] class Executor(
           // SPARK-20904: Do not report failure to driver if if happened during shut down. Because
           // libraries may set up shutdown hooks that race with running tasks during shutdown,
           // spurious failures may occur and can result in improper accounting in the driver (e.g.
-          // the task failure would not be ignored if the shutdown happened because of premption,
+          // the task failure would not be ignored if the shutdown happened because of preemption,
           // instead of an app issue).
           if (!ShutdownHookManager.inShutdown()) {
             val (accums, accUpdates) = collectAccumulatorsAndResetStatusOnFailure(taskStartTimeNs)
@@ -744,7 +744,7 @@ private[spark] class Executor(
    * sending a Thread.interrupt(), and monitoring the task until it finishes.
    *
    * Spark's current task cancellation / task killing mechanism is "best effort" because some tasks
-   * may not be interruptable or may not respond to their "killed" flags being set. If a significant
+   * may not be interruptible or may not respond to their "killed" flags being set. If a significant
    * fraction of a cluster's task slots are occupied by tasks that have been marked as killed but
    * remain running then this can lead to a situation where new jobs and tasks are starved of
    * resources that are being used by these zombie tasks.
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
index d98d5e3b81aa0..bddd18adc683e 100644
--- a/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
@@ -102,7 +102,7 @@ private[spark] class MetricsConfig(conf: SparkConf) extends Logging {
    *
    * @param prop the flat list of properties to "unflatten" based on prefixes
    * @param regex the regex that the prefix has to comply with
-   * @return an unflatted map, mapping prefix with sub-properties under that prefix
+   * @return an unflattened map, mapping prefix with sub-properties under that prefix
    */
   def subProperties(prop: Properties, regex: Regex): mutable.HashMap[String, Properties] = {
     val subProperties = new mutable.HashMap[String, Properties]
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/PrometheusServlet.scala b/core/src/main/scala/org/apache/spark/metrics/sink/PrometheusServlet.scala
index 59b863b89f75a..e9c2974622300 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/PrometheusServlet.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/PrometheusServlet.scala
@@ -56,7 +56,7 @@ private[spark] class PrometheusServlet(
   def getMetricsSnapshot(request: HttpServletRequest): String = {
     import scala.collection.JavaConverters._
 
-    val guagesLabel = """{type="gauges"}"""
+    val gaugesLabel = """{type="gauges"}"""
     val countersLabel = """{type="counters"}"""
     val metersLabel = countersLabel
     val histogramslabels = """{type="histograms"}"""
@@ -65,8 +65,8 @@ private[spark] class PrometheusServlet(
     val sb = new StringBuilder()
     registry.getGauges.asScala.foreach { case (k, v) =>
       if (!v.getValue.isInstanceOf[String]) {
-        sb.append(s"${normalizeKey(k)}Number$guagesLabel ${v.getValue}\n")
-        sb.append(s"${normalizeKey(k)}Value$guagesLabel ${v.getValue}\n")
+        sb.append(s"${normalizeKey(k)}Number$gaugesLabel ${v.getValue}\n")
+        sb.append(s"${normalizeKey(k)}Value$gaugesLabel ${v.getValue}\n")
       }
     }
     registry.getCounters.asScala.foreach { case (k, v) =>
diff --git a/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
index 943abae17a911..39f69567981ea 100644
--- a/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
@@ -173,7 +173,7 @@ class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable {
     if (buckets.length < 2) {
       throw new IllegalArgumentException("buckets array must have at least two elements")
     }
-    // The histogramPartition function computes the partail histogram for a given
+    // The histogramPartition function computes the partial histogram for a given
     // partition. The provided bucketFunction determines which bucket in the array
     // to increment or returns None if there is no bucket. This is done so we can
     // specialize for uniformly distributed buckets and save the O(log n) binary
diff --git a/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala
index 5b1c024257529..3cefcb16d6eb1 100644
--- a/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala
@@ -88,10 +88,10 @@ class OrderedRDDFunctions[K : Ordering : ClassTag,
 
     val rddToFilter: RDD[P] = self.partitioner match {
       case Some(rp: RangePartitioner[K, V]) =>
-        val partitionIndicies = (rp.getPartition(lower), rp.getPartition(upper)) match {
+        val partitionIndices = (rp.getPartition(lower), rp.getPartition(upper)) match {
           case (l, u) => Math.min(l, u) to Math.max(l, u)
         }
-        PartitionPruningRDD.create(self, partitionIndicies.contains)
+        PartitionPruningRDD.create(self, partitionIndices.contains)
       case _ =>
         self
     }
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 15b00a4496da6..65b39c4b65603 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -327,7 +327,7 @@ abstract class RDD[T: ClassTag](
 
   /**
    * Internal method to this RDD; will read from cache if applicable, or otherwise compute it.
-   * This should ''not'' be called by users directly, but is available for implementors of custom
+   * This should ''not'' be called by users directly, but is available for implementers of custom
    * subclasses of RDD.
    */
   final def iterator(split: Partition, context: TaskContext): Iterator[T] = {
diff --git a/core/src/main/scala/org/apache/spark/resource/TaskResourceRequest.scala b/core/src/main/scala/org/apache/spark/resource/TaskResourceRequest.scala
index d3f979fa8672f..12ef34241f9cb 100644
--- a/core/src/main/scala/org/apache/spark/resource/TaskResourceRequest.scala
+++ b/core/src/main/scala/org/apache/spark/resource/TaskResourceRequest.scala
@@ -20,7 +20,7 @@ package org.apache.spark.resource
 import org.apache.spark.annotation.{Evolving, Since}
 
 /**
- * A task resource request. This is used in conjuntion with the ResourceProfile to
+ * A task resource request. This is used in conjunction with the ResourceProfile to
  * programmatically specify the resources needed for an RDD that will be applied at the
  * stage level.
  *
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala b/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
index fcb9fe422c0d4..5864e9e2ceac0 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
@@ -254,14 +254,14 @@ private[netty] class NettyRpcEnv(
 
       val timeoutCancelable = timeoutScheduler.schedule(new Runnable {
         override def run(): Unit = {
-          val remoteReceAddr = if (remoteAddr == null) {
+          val remoteRecAddr = if (remoteAddr == null) {
             Try {
               message.receiver.client.getChannel.remoteAddress()
             }.toOption.orNull
           } else {
             remoteAddr
           }
-          onFailure(new TimeoutException(s"Cannot receive any reply from ${remoteReceAddr} " +
+          onFailure(new TimeoutException(s"Cannot receive any reply from ${remoteRecAddr} " +
             s"in ${timeout.duration}"))
         }
       }, timeout.duration.toNanos, TimeUnit.NANOSECONDS)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/BarrierJobAllocationFailed.scala b/core/src/main/scala/org/apache/spark/scheduler/BarrierJobAllocationFailed.scala
index 043c6b90384b4..8f0764ed1a61e 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/BarrierJobAllocationFailed.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/BarrierJobAllocationFailed.scala
@@ -45,10 +45,10 @@ private[spark] object BarrierJobAllocationFailed {
   val ERROR_MESSAGE_RUN_BARRIER_WITH_UNSUPPORTED_RDD_CHAIN_PATTERN =
     "[SPARK-24820][SPARK-24821]: Barrier execution mode does not allow the following pattern of " +
       "RDD chain within a barrier stage:\n1. Ancestor RDDs that have different number of " +
-      "partitions from the resulting RDD (eg. union()/coalesce()/first()/take()/" +
+      "partitions from the resulting RDD (e.g. union()/coalesce()/first()/take()/" +
       "PartitionPruningRDD). A workaround for first()/take() can be barrierRdd.collect().head " +
       "(scala) or barrierRdd.collect()[0] (python).\n" +
-      "2. An RDD that depends on multiple barrier RDDs (eg. barrierRdd1.zip(barrierRdd2))."
+      "2. An RDD that depends on multiple barrier RDDs (e.g. barrierRdd1.zip(barrierRdd2))."
 
   // Error message when running a barrier stage with dynamic resource allocation enabled.
   val ERROR_MESSAGE_RUN_BARRIER_WITH_DYN_ALLOCATION =
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index 6fb0fb93f253b..02f5bb8cccd52 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -409,9 +409,9 @@ private[spark] class DAGScheduler(
   /**
    * Check to make sure we don't launch a barrier stage with unsupported RDD chain pattern. The
    * following patterns are not supported:
-   * 1. Ancestor RDDs that have different number of partitions from the resulting RDD (eg.
+   * 1. Ancestor RDDs that have different number of partitions from the resulting RDD (e.g.
    * union()/coalesce()/first()/take()/PartitionPruningRDD);
-   * 2. An RDD that depends on multiple barrier RDDs (eg. barrierRdd1.zip(barrierRdd2)).
+   * 2. An RDD that depends on multiple barrier RDDs (e.g. barrierRdd1.zip(barrierRdd2)).
    */
   private def checkBarrierStageWithRDDChainPattern(rdd: RDD[_], numTasksInStage: Int): Unit = {
     if (rdd.isBarrier() &&
@@ -459,7 +459,7 @@ private[spark] class DAGScheduler(
 
   /**
    * We don't support run a barrier stage with dynamic resource allocation enabled, it shall lead
-   * to some confusing behaviors (eg. with dynamic resource allocation enabled, it may happen that
+   * to some confusing behaviors (e.g. with dynamic resource allocation enabled, it may happen that
    * we acquire some executors (but not enough to launch all the tasks in a barrier stage) and
    * later release them due to executor idle time expire, and then acquire again).
    *
@@ -1555,7 +1555,7 @@ private[spark] class DAGScheduler(
       event.reason)
 
     if (!stageIdToStage.contains(task.stageId)) {
-      // The stage may have already finished when we get this event -- eg. maybe it was a
+      // The stage may have already finished when we get this event -- e.g. maybe it was a
       // speculative task. It is important that we send the TaskEnd event in any case, so listeners
       // are properly notified and can chose to handle it. For instance, some listeners are
       // doing their own accounting and if they don't get the task end event they think
diff --git a/core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala b/core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala
index 9bbacea94bf68..c6b8dca3597ba 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala
@@ -32,7 +32,7 @@ import org.apache.spark.util.{Clock, SystemClock, Utils}
  * additional logic for exclusion of executors and nodes for individual tasks and stages which
  * works in concert with the logic here.
  *
- * The tracker needs to deal with a variety of workloads, eg.:
+ * The tracker needs to deal with a variety of workloads, e.g.:
  *
  *  * bad user code -- this may lead to many task failures, but that should not count against
  *      individual executors
@@ -362,7 +362,7 @@ private[scheduler] class HealthTracker (
      * Apply the timeout to individual tasks.  This is to prevent one-off failures that are very
      * spread out in time (and likely have nothing to do with problems on the executor) from
      * triggering exlusion.  However, note that we do *not* remove executors and nodes from
-     * being excluded as we expire individual task failures -- each have their own timeout.  Eg.,
+     * being excluded as we expire individual task failures -- each have their own timeout.  E.g.,
      * suppose:
      *  * timeout = 10, maxFailuresPerExec = 2
      *  * Task 1 fails on exec 1 at time 0
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index 0cfa76583bfbb..914fccc1a67cd 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -216,7 +216,7 @@ private[spark] class TaskSetManager(
   /**
    * Track the set of locality levels which are valid given the tasks locality preferences and
    * the set of currently available executors.  This is updated as executors are added and removed.
-   * This allows a performance optimization, of skipping levels that aren't relevant (eg., skip
+   * This allows a performance optimization, of skipping levels that aren't relevant (e.g., skip
    * PROCESS_LOCAL if no tasks could be run PROCESS_LOCAL for the current set of executors).
    */
   private[scheduler] var myLocalityLevels = computeValidLocalityLevels()
diff --git a/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala b/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala
index a4df0d543ecbe..4ebb7b0defd7f 100644
--- a/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala
+++ b/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala
@@ -167,7 +167,7 @@ private[spark] object CryptoStreamUtils extends Logging {
   }
 
   /**
-   * SPARK-25535. The commons-cryto library will throw InternalError if something goes
+   * SPARK-25535. The commons-crypto library will throw InternalError if something goes
    * wrong, and leave bad state behind in the Java wrappers, so it's not safe to use them
    * afterwards. This wrapper detects that situation and avoids further calls into the
    * commons-crypto code, while still allowing the underlying streams to be closed.
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 924601f92c5b8..072702b343328 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -1103,7 +1103,7 @@ private[spark] class BlockManager(
       blockSize: Long): Option[ManagedBuffer] = {
     val file = ExecutorDiskUtils.getFile(localDirs, subDirsPerLocalDir, blockId.name)
     if (file.exists()) {
-      val mangedBuffer = securityManager.getIOEncryptionKey() match {
+      val managedBuffer = securityManager.getIOEncryptionKey() match {
         case Some(key) =>
           // Encrypted blocks cannot be memory mapped; return a special object that does decryption
           // and provides InputStream / FileRegion implementations for reading the data.
@@ -1114,7 +1114,7 @@ private[spark] class BlockManager(
           val transportConf = SparkTransportConf.fromSparkConf(conf, "shuffle")
           new FileSegmentManagedBuffer(transportConf, file, 0, file.length)
       }
-      Some(mangedBuffer)
+      Some(managedBuffer)
     } else {
       None
     }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
index 4d565511704d4..eada4b3ee2e38 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
@@ -357,7 +357,7 @@ class BlockManagerMasterEndpoint(
         blockLocations.remove(blockId)
         logWarning(s"No more replicas available for $blockId !")
       } else if (proactivelyReplicate && (blockId.isRDD || blockId.isInstanceOf[TestBlockId])) {
-        // As a heursitic, assume single executor failure to find out the number of replicas that
+        // As a heuristic, assume single executor failure to find out the number of replicas that
         // existed before failure
         val maxReplicas = locations.size + 1
         val i = (new Random(blockId.hashCode)).nextInt(locations.size)
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
index 5f5a08fe0e574..cfe15eb832273 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
@@ -85,7 +85,7 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
       }
 
       // The timeline library treats contents as HTML, so we have to escape them. We need to add
-      // extra layers of escaping in order to embed this in a Javascript string literal.
+      // extra layers of escaping in order to embed this in a JavaScript string literal.
       val escapedDesc = Utility.escape(jobDescription)
       val jsEscapedDescForTooltip = StringEscapeUtils.escapeEcmaScript(Utility.escape(escapedDesc))
       val jsEscapedDescForLabel = StringEscapeUtils.escapeEcmaScript(escapedDesc)
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
index 19eccc5209b8e..c40e1bc248a49 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
@@ -68,7 +68,7 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP
         .getOrElse(System.currentTimeMillis())
 
       // The timeline library treats contents as HTML, so we have to escape them. We need to add
-      // extra layers of escaping in order to embed this in a Javascript string literal.
+      // extra layers of escaping in order to embed this in a JavaScript string literal.
       val escapedName = Utility.escape(name)
       val jsEscapedNameForTooltip = StringEscapeUtils.escapeEcmaScript(Utility.escape(escapedName))
       val jsEscapedNameForLabel = StringEscapeUtils.escapeEcmaScript(escapedName)
diff --git a/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
index 6ffd6605f75b8..7e2b9c72ad91b 100644
--- a/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
@@ -285,7 +285,7 @@ private[spark] object ClosureCleaner extends Logging {
           logDebug(s" + outermost object is a closure, so we clone it: ${outermostClass}")
         } else if (outermostClass.getName.startsWith("$line")) {
           // SPARK-14558: if the outermost object is a REPL line object, we should clone
-          // and clean it as it may carray a lot of unnecessary information,
+          // and clean it as it may carry a lot of unnecessary information,
           // e.g. hadoop conf, spark conf, etc.
           logDebug(s" + outermost object is a REPL line object, so we clone it:" +
             s" ${outermostClass}")
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 71a310a4279ad..accf3d7c0d333 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -376,7 +376,7 @@ private[spark] object Utils extends Logging {
    * This returns a new InputStream which contains the same data as the original input stream.
    * It may be entirely on in-memory buffer, or it may be a combination of in-memory data, and then
    * continue to read from the original stream. The only real use of this is if the original input
-   * stream will potentially detect corruption while the data is being read (eg. from compression).
+   * stream will potentially detect corruption while the data is being read (e.g. from compression).
    * This allows for an eager check of corruption in the first maxSize bytes of data.
    *
    * @return An InputStream which includes all data from the original stream (combining buffered
@@ -1067,20 +1067,20 @@ private[spark] object Utils extends Logging {
     }
     // checks if the hostport contains IPV6 ip and parses the host, port
     if (hostPort != null && hostPort.split(":").length > 2) {
-      val indx: Int = hostPort.lastIndexOf("]:")
-      if (-1 == indx) {
+      val index: Int = hostPort.lastIndexOf("]:")
+      if (-1 == index) {
         return setDefaultPortValue
       }
-      val port = hostPort.substring(indx + 2).trim()
-      val retval = (hostPort.substring(0, indx + 1).trim(), if (port.isEmpty) 0 else port.toInt)
+      val port = hostPort.substring(index + 2).trim()
+      val retval = (hostPort.substring(0, index + 1).trim(), if (port.isEmpty) 0 else port.toInt)
       hostPortParseResults.putIfAbsent(hostPort, retval)
     } else {
-      val indx: Int = hostPort.lastIndexOf(':')
-      if (-1 == indx) {
+      val index: Int = hostPort.lastIndexOf(':')
+      if (-1 == index) {
         return setDefaultPortValue
       }
-      val port = hostPort.substring(indx + 1).trim()
-      val retval = (hostPort.substring(0, indx).trim(), if (port.isEmpty) 0 else port.toInt)
+      val port = hostPort.substring(index + 1).trim()
+      val retval = (hostPort.substring(0, index).trim(), if (port.isEmpty) 0 else port.toInt)
       hostPortParseResults.putIfAbsent(hostPort, retval)
     }
 
@@ -2854,11 +2854,11 @@ private[spark] object Utils extends Logging {
     if (lastDollarIndex < s.length - 1) {
       // The last char is not a dollar sign
       if (lastDollarIndex == -1 || !s.contains("$iw")) {
-        // The name does not have dollar sign or is not an intepreter
+        // The name does not have dollar sign or is not an interpreter
         // generated class, so we should return the full string
         s
       } else {
-        // The class name is intepreter generated,
+        // The class name is interpreter generated,
         // return the part after the last dollar sign
         // This is the same behavior as getClass.getSimpleName
         s.substring(lastDollarIndex + 1)
diff --git a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala
index 2c3730de08b5b..8635f1a3d702e 100644
--- a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala
+++ b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala
@@ -193,7 +193,7 @@ private[spark] object ChunkedByteBuffer {
       length: Long): ChunkedByteBuffer = {
     // We do *not* memory map the file, because we may end up putting this into the memory store,
     // and spark currently is not expecting memory-mapped buffers in the memory store, it conflicts
-    // with other parts that manage the lifecyle of buffers and dispose them.  See SPARK-25422.
+    // with other parts that manage the lifecycle of buffers and dispose them.  See SPARK-25422.
     val is = new FileInputStream(file)
     ByteStreams.skipFully(is, offset)
     val in = new LimitedInputStream(is, length)
diff --git a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
index ee8e38c24b47f..df1d306e628a9 100644
--- a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
+++ b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
@@ -68,10 +68,10 @@
 public class UnsafeShuffleWriterSuite {
 
   static final int DEFAULT_INITIAL_SORT_BUFFER_SIZE = 4096;
-  static final int NUM_PARTITITONS = 4;
+  static final int NUM_PARTITIONS = 4;
   TestMemoryManager memoryManager;
   TaskMemoryManager taskMemoryManager;
-  final HashPartitioner hashPartitioner = new HashPartitioner(NUM_PARTITITONS);
+  final HashPartitioner hashPartitioner = new HashPartitioner(NUM_PARTITIONS);
   File mergedOutputFile;
   File tempDir;
   long[] partitionSizesInMergedFile;
@@ -194,7 +194,7 @@ private void assertSpillFilesWereCleanedUp() {
   private List<Tuple2<Object, Object>> readRecordsFromFile() throws IOException {
     final ArrayList<Tuple2<Object, Object>> recordsList = new ArrayList<>();
     long startOffset = 0;
-    for (int i = 0; i < NUM_PARTITITONS; i++) {
+    for (int i = 0; i < NUM_PARTITIONS; i++) {
       final long partitionSize = partitionSizesInMergedFile[i];
       if (partitionSize > 0) {
         FileInputStream fin = new FileInputStream(mergedOutputFile);
@@ -253,7 +253,7 @@ public void writeEmptyIterator() throws Exception {
     assertTrue(mapStatus.isDefined());
     assertTrue(mergedOutputFile.exists());
     assertEquals(0, spillFilesCreated.size());
-    assertArrayEquals(new long[NUM_PARTITITONS], partitionSizesInMergedFile);
+    assertArrayEquals(new long[NUM_PARTITIONS], partitionSizesInMergedFile);
     assertEquals(0, taskMetrics.shuffleWriteMetrics().recordsWritten());
     assertEquals(0, taskMetrics.shuffleWriteMetrics().bytesWritten());
     assertEquals(0, taskMetrics.diskBytesSpilled());
@@ -264,7 +264,7 @@ public void writeEmptyIterator() throws Exception {
   public void writeWithoutSpilling() throws Exception {
     // In this example, each partition should have exactly one record:
     final ArrayList<Product2<Object, Object>> dataToWrite = new ArrayList<>();
-    for (int i = 0; i < NUM_PARTITITONS; i++) {
+    for (int i = 0; i < NUM_PARTITIONS; i++) {
       dataToWrite.add(new Tuple2<>(i, i));
     }
     final UnsafeShuffleWriter<Object, Object> writer = createWriter(true);
diff --git a/core/src/test/java/test/org/apache/spark/JavaAPISuite.java b/core/src/test/java/test/org/apache/spark/JavaAPISuite.java
index dbaca71c5fdc3..e73ac0e9fb7a6 100644
--- a/core/src/test/java/test/org/apache/spark/JavaAPISuite.java
+++ b/core/src/test/java/test/org/apache/spark/JavaAPISuite.java
@@ -1518,7 +1518,7 @@ public void testAsyncActionErrorWrapping() throws Exception {
     JavaFutureAction<Long> future = rdd.map(new BuggyMapFunction<>()).countAsync();
     try {
       future.get(2, TimeUnit.SECONDS);
-      fail("Expected future.get() for failed job to throw ExcecutionException");
+      fail("Expected future.get() for failed job to throw ExecutionException");
     } catch (ExecutionException ee) {
       assertTrue(Throwables.getStackTraceAsString(ee).contains("Custom exception!"));
     }
diff --git a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
index 21090e98ea285..e42df0821589b 100644
--- a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
+++ b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
@@ -635,12 +635,12 @@ class CheckpointStorageSuite extends SparkFunSuite with LocalSparkContext {
       // Verify that RDD is checkpointed
       assert(rdd.firstParent.isInstanceOf[ReliableCheckpointRDD[_]])
       val checkpointedRDD = rdd.firstParent.asInstanceOf[ReliableCheckpointRDD[_]]
-      val partiton = checkpointedRDD.partitions(0)
-      assert(!checkpointedRDD.cachedPreferredLocations.asMap.containsKey(partiton))
+      val partition = checkpointedRDD.partitions(0)
+      assert(!checkpointedRDD.cachedPreferredLocations.asMap.containsKey(partition))
 
-      val preferredLoc = checkpointedRDD.preferredLocations(partiton)
-      assert(checkpointedRDD.cachedPreferredLocations.asMap.containsKey(partiton))
-      assert(preferredLoc == checkpointedRDD.cachedPreferredLocations.get(partiton))
+      val preferredLoc = checkpointedRDD.preferredLocations(partition)
+      assert(checkpointedRDD.cachedPreferredLocations.asMap.containsKey(partition))
+      assert(preferredLoc == checkpointedRDD.cachedPreferredLocations.get(partition))
     }
   }
 
@@ -653,7 +653,7 @@ class CheckpointStorageSuite extends SparkFunSuite with LocalSparkContext {
       val rdd = sc.makeRDD(1 to 200, numSlices = 4).repartition(1).mapPartitions { iter =>
         iter.map { i =>
           if (i > 100 && TaskContext.get().stageAttemptNumber() == 0) {
-            // throw new SparkException("Make first attemp failed.")
+            // throw new SparkException("Make first attempt failed.")
             // Throw FetchFailedException to explicitly trigger stage resubmission.
             // A normal exception will only trigger task resubmission in the same stage.
             throw new FetchFailedException(null, 0, 0L, 0, 0, "Fake")
diff --git a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
index 81530a8fda84d..5434e82c95b1b 100644
--- a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
@@ -368,7 +368,7 @@ class CleanerTester(
 
   val toBeCleanedRDDIds = new HashSet[Int] ++= rddIds
   val toBeCleanedShuffleIds = new HashSet[Int] ++= shuffleIds
-  val toBeCleanedBroadcstIds = new HashSet[Long] ++= broadcastIds
+  val toBeCleanedBroadcastIds = new HashSet[Long] ++= broadcastIds
   val toBeCheckpointIds = new HashSet[Long] ++= checkpointIds
   val isDistributed = !sc.isLocal
 
@@ -384,7 +384,7 @@ class CleanerTester(
     }
 
     def broadcastCleaned(broadcastId: Long): Unit = {
-      toBeCleanedBroadcstIds.synchronized { toBeCleanedBroadcstIds -= broadcastId }
+      toBeCleanedBroadcastIds.synchronized { toBeCleanedBroadcastIds -= broadcastId }
       logInfo("Broadcast " + broadcastId + " cleaned")
     }
 
@@ -508,8 +508,8 @@ class CleanerTester(
     val s2 = toBeCleanedShuffleIds.synchronized {
       toBeCleanedShuffleIds.toSeq.sorted.mkString("[", ", ", "]")
     }
-    val s3 = toBeCleanedBroadcstIds.synchronized {
-      toBeCleanedBroadcstIds.toSeq.sorted.mkString("[", ", ", "]")
+    val s3 = toBeCleanedBroadcastIds.synchronized {
+      toBeCleanedBroadcastIds.toSeq.sorted.mkString("[", ", ", "]")
     }
     s"""
        |\tRDDs = $s1
@@ -521,7 +521,7 @@ class CleanerTester(
   private def isAllCleanedUp =
     toBeCleanedRDDIds.synchronized { toBeCleanedRDDIds.isEmpty } &&
     toBeCleanedShuffleIds.synchronized { toBeCleanedShuffleIds.isEmpty } &&
-    toBeCleanedBroadcstIds.synchronized { toBeCleanedBroadcstIds.isEmpty } &&
+    toBeCleanedBroadcastIds.synchronized { toBeCleanedBroadcastIds.isEmpty } &&
     toBeCheckpointIds.synchronized { toBeCheckpointIds.isEmpty }
 
   private def getRDDBlocks(rddId: Int): Seq[BlockId] = {
diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
index d1edb80e40b21..c1269a9c91049 100644
--- a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
@@ -268,7 +268,7 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
 
   test("add executors multiple profiles initial num same as needed") {
     // test when the initial number of executors equals the number needed for the first
-    // stage using a non default profile to make sure we request the intitial number
+    // stage using a non default profile to make sure we request the initial number
     // properly. Here initial is 2, each executor in ResourceProfile 1 can have 2 tasks
     // per executor, and start a stage with 4 tasks, which would need 2 executors.
     val clock = new ManualClock(8888L)
diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala b/core/src/test/scala/org/apache/spark/FileSuite.scala
index e9ee6b5dfb665..f953bf4043f33 100644
--- a/core/src/test/scala/org/apache/spark/FileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileSuite.scala
@@ -170,7 +170,7 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
     val nums = sc.makeRDD(1 to 3).map(x => (x, "a" * x)) // (1,a), (2,aa), (3,aaa)
     nums.saveAsSequenceFile(outputDir)
     // Similar to the tests above, we read a SequenceFile, but this time we pass type params
-    // that are convertable to Writable instead of calling sequenceFile[IntWritable, Text]
+    // that are convertible to Writable instead of calling sequenceFile[IntWritable, Text]
     val output1 = sc.sequenceFile[Int, String](outputDir)
     assert(output1.collect().toList === List((1, "a"), (2, "aa"), (3, "aaa")))
     // Also try having one type be a subclass of Writable and one not
diff --git a/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala b/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala
index e97b9d5d6bea6..eff4fd20d7fca 100644
--- a/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala
+++ b/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala
@@ -21,7 +21,7 @@ import java.io.{File, FileOutputStream, OutputStream}
 
 /**
  * A base class for generate benchmark results to a file.
- * For JDK9+, JDK major version number is added to the file names to distingush the results.
+ * For JDK9+, JDK major version number is added to the file names to distinguish the results.
  */
 abstract class BenchmarkBase {
   var output: Option[OutputStream] = None
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index 0b0754be2f56f..3b8677742ca16 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -926,8 +926,8 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
     oldProvider.listing.setMetadata(meta)
     oldProvider.stop()
 
-    val mistatchedVersionProvider = new FsHistoryProvider(conf)
-    assert(mistatchedVersionProvider.listing.count(classOf[ApplicationInfoWrapper]) === 0)
+    val mismatchedVersionProvider = new FsHistoryProvider(conf)
+    assert(mismatchedVersionProvider.listing.count(classOf[ApplicationInfoWrapper]) === 0)
   }
 
   test("invalidate cached UI") {
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
index a46799df069d6..b1b97a61ed1f0 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
@@ -784,7 +784,7 @@ class MasterSuite extends SparkFunSuite
     var worker: MockExecutorLaunchFailWorker = null
     try {
       val conf = new SparkConf()
-      // SPARK-32250: When running test on Github Action machine, the available processors in JVM
+      // SPARK-32250: When running test on GitHub Action machine, the available processors in JVM
       // is only 2, while on Jenkins it's 32. For this specific test, 2 available processors, which
       // also decides number of threads in Dispatcher, is not enough to consume the messages. In
       // the worst situation, MockExecutorLaunchFailWorker would occupy these 2 threads for
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala
index 5bbd60f99f77e..8ed861ad34ea7 100644
--- a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala
@@ -342,7 +342,7 @@ class WorkerSuite extends SparkFunSuite with Matchers with BeforeAndAfter {
     testWorkDirCleanupAndRemoveMetadataWithConfig(true)
   }
 
-  test("WorkdDirCleanup cleans only app dirs when" +
+  test("WorkDirCleanup cleans only app dirs when" +
     "spark.shuffle.service.db.enabled=false") {
     testWorkDirCleanupAndRemoveMetadataWithConfig(false)
   }
diff --git a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
index 1326ae3c11a06..5b868604ecf94 100644
--- a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
@@ -552,7 +552,7 @@ class ExecutorSuite extends SparkFunSuite
               if (poll) {
                 executor.metricsPoller.poll()
               }
-              executor.killAllTasks(true, "Killed task, eg. because of speculative execution")
+              executor.killAllTasks(true, "Killed task, e.g. because of speculative execution")
             } else {
               timedOut.set(true)
             }
diff --git a/core/src/test/scala/org/apache/spark/internal/io/FileCommitProtocolInstantiationSuite.scala b/core/src/test/scala/org/apache/spark/internal/io/FileCommitProtocolInstantiationSuite.scala
index 2bd32fc927e21..778f748f83950 100644
--- a/core/src/test/scala/org/apache/spark/internal/io/FileCommitProtocolInstantiationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/internal/io/FileCommitProtocolInstantiationSuite.scala
@@ -75,7 +75,7 @@ class FileCommitProtocolInstantiationSuite extends SparkFunSuite {
 
   /**
    * Create a classic two-arg protocol instance.
-   * @param dynamic dyanmic partitioning mode
+   * @param dynamic dynamic partitioning mode
    * @return the instance
    */
   private def instantiateClassic(dynamic: Boolean): ClassicConstructorCommitProtocol = {
@@ -88,7 +88,7 @@ class FileCommitProtocolInstantiationSuite extends SparkFunSuite {
 
   /**
    * Create a three-arg protocol instance.
-   * @param dynamic dyanmic partitioning mode
+   * @param dynamic dynamic partitioning mode
    * @return the instance
    */
   private def instantiateNew(
diff --git a/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala b/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala
index 330347299ab56..905bb8110736d 100644
--- a/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala
@@ -213,7 +213,7 @@ class InputOutputMetricsSuite extends SparkFunSuite with SharedSparkContext
     }
 
     // Computing the amount of bytes read for a cartesian operation is a little involved.
-    // Cartesian interleaves reads between two partitions eg. p1 and p2.
+    // Cartesian interleaves reads between two partitions e.g. p1 and p2.
     // Here are the steps:
     //  1) First it creates an iterator for p1
     //  2) Creates an iterator for p2
diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala
index 182c3c09e0524..c8a8f37212a82 100644
--- a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala
@@ -88,7 +88,7 @@ class NettyBlockTransferServiceSuite
   }
 
   test("SPARK-27637: test fetch block with executor dead") {
-    implicit val exectionContext = ExecutionContext.global
+    implicit val executionContext = ExecutionContext.global
     val port = 17634 + Random.nextInt(10000)
     logInfo("random port for test: " + port)
 
diff --git a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
index 2de4b109e40e9..a669993352fe7 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
@@ -28,7 +28,7 @@ import org.apache.hadoop.fs.FileSystem
 import org.apache.hadoop.mapred._
 import org.apache.hadoop.mapreduce.{Job => NewJob, JobContext => NewJobContext,
   OutputCommitter => NewOutputCommitter, OutputFormat => NewOutputFormat,
-  RecordWriter => NewRecordWriter, TaskAttemptContext => NewTaskAttempContext}
+  RecordWriter => NewRecordWriter, TaskAttemptContext => NewTaskAttemptContext}
 import org.apache.hadoop.util.Progressable
 import org.scalatest.Assertions
 
@@ -892,7 +892,7 @@ class FakeOutputFormat() extends OutputFormat[Integer, Integer]() {
  */
 class NewFakeWriter extends NewRecordWriter[Integer, Integer] {
 
-  def close(p1: NewTaskAttempContext): Unit = ()
+  def close(p1: NewTaskAttemptContext): Unit = ()
 
   def write(p1: Integer, p2: Integer): Unit = ()
 
@@ -901,24 +901,24 @@ class NewFakeWriter extends NewRecordWriter[Integer, Integer] {
 class NewFakeCommitter extends NewOutputCommitter {
   def setupJob(p1: NewJobContext): Unit = ()
 
-  def needsTaskCommit(p1: NewTaskAttempContext): Boolean = false
+  def needsTaskCommit(p1: NewTaskAttemptContext): Boolean = false
 
-  def setupTask(p1: NewTaskAttempContext): Unit = ()
+  def setupTask(p1: NewTaskAttemptContext): Unit = ()
 
-  def commitTask(p1: NewTaskAttempContext): Unit = ()
+  def commitTask(p1: NewTaskAttemptContext): Unit = ()
 
-  def abortTask(p1: NewTaskAttempContext): Unit = ()
+  def abortTask(p1: NewTaskAttemptContext): Unit = ()
 }
 
 class NewFakeFormat() extends NewOutputFormat[Integer, Integer]() {
 
   def checkOutputSpecs(p1: NewJobContext): Unit = ()
 
-  def getRecordWriter(p1: NewTaskAttempContext): NewRecordWriter[Integer, Integer] = {
+  def getRecordWriter(p1: NewTaskAttemptContext): NewRecordWriter[Integer, Integer] = {
     new NewFakeWriter()
   }
 
-  def getOutputCommitter(p1: NewTaskAttempContext): NewOutputCommitter = {
+  def getOutputCommitter(p1: NewTaskAttemptContext): NewOutputCommitter = {
     new NewFakeCommitter()
   }
 }
@@ -958,7 +958,7 @@ class FakeFormatWithCallback() extends FakeOutputFormat {
 }
 
 class NewFakeWriterWithCallback extends NewFakeWriter {
-  override def close(p1: NewTaskAttempContext): Unit = {
+  override def close(p1: NewTaskAttemptContext): Unit = {
     FakeWriterWithCallback.calledBy += "close"
   }
 
@@ -972,7 +972,7 @@ class NewFakeWriterWithCallback extends NewFakeWriter {
 }
 
 class NewFakeFormatWithCallback() extends NewFakeFormat {
-  override def getRecordWriter(p1: NewTaskAttempContext): NewRecordWriter[Integer, Integer] = {
+  override def getRecordWriter(p1: NewTaskAttemptContext): NewRecordWriter[Integer, Integer] = {
     new NewFakeWriterWithCallback()
   }
 }
@@ -982,27 +982,27 @@ class YetAnotherFakeCommitter extends NewOutputCommitter with Assertions {
     JobID.jobid = j.getJobID().getId
   }
 
-  def needsTaskCommit(t: NewTaskAttempContext): Boolean = false
+  def needsTaskCommit(t: NewTaskAttemptContext): Boolean = false
 
-  def setupTask(t: NewTaskAttempContext): Unit = {
+  def setupTask(t: NewTaskAttemptContext): Unit = {
     val jobId = t.getTaskAttemptID().getJobID().getId
     assert(jobId === JobID.jobid)
   }
 
-  def commitTask(t: NewTaskAttempContext): Unit = {}
+  def commitTask(t: NewTaskAttemptContext): Unit = {}
 
-  def abortTask(t: NewTaskAttempContext): Unit = {}
+  def abortTask(t: NewTaskAttemptContext): Unit = {}
 }
 
 class YetAnotherFakeFormat() extends NewOutputFormat[Integer, Integer]() {
 
   def checkOutputSpecs(j: NewJobContext): Unit = {}
 
-  def getRecordWriter(t: NewTaskAttempContext): NewRecordWriter[Integer, Integer] = {
+  def getRecordWriter(t: NewTaskAttemptContext): NewRecordWriter[Integer, Integer] = {
     new NewFakeWriter()
   }
 
-  def getOutputCommitter(t: NewTaskAttempContext): NewOutputCommitter = {
+  def getOutputCommitter(t: NewTaskAttemptContext): NewOutputCommitter = {
     new YetAnotherFakeCommitter()
   }
 }
@@ -1021,7 +1021,7 @@ class ConfigTestFormat() extends NewFakeFormat() with Configurable {
 
   def getConf: Configuration = null
 
-  override def getRecordWriter(p1: NewTaskAttempContext): NewRecordWriter[Integer, Integer] = {
+  override def getRecordWriter(p1: NewTaskAttemptContext): NewRecordWriter[Integer, Integer] = {
     assert(setConfCalled, "setConf was never called")
     super.getRecordWriter(p1)
   }
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
index 8962fd6740bf6..df8ac2ef744cd 100644
--- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
@@ -1102,7 +1102,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
     }
   }
 
-  test("RDD.partitions() fails fast when partitions indicies are incorrect (SPARK-13021)") {
+  test("RDD.partitions() fails fast when partitions indices are incorrect (SPARK-13021)") {
     class BadRDD[T: ClassTag](prev: RDD[T]) extends RDD[T](prev) {
 
       override def compute(part: Partition, context: TaskContext): Iterator[T] = {
diff --git a/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala b/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala
index e8e8682e20ed4..eac45e6ac5801 100644
--- a/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/resource/ResourceUtilsSuite.scala
@@ -221,7 +221,7 @@ class ResourceUtilsSuite extends SparkFunSuite
     val conf = new SparkConf
     assume(!(Utils.isWindows))
     withTempDir { dir =>
-      val gpuDiscovery = createTempScriptWithExpectedOutput(dir, "gpuDisocveryScript",
+      val gpuDiscovery = createTempScriptWithExpectedOutput(dir, "gpuDiscoveryScript",
         """{"name": "gpu", "addresses": ["0", "1"]}""")
       conf.set(DRIVER_GPU_ID.amountConf, "2")
       conf.set(DRIVER_GPU_ID.discoveryScriptConf, gpuDiscovery)
diff --git a/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcEnvSuite.scala b/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcEnvSuite.scala
index c2730f90ed982..fe6d0db837bda 100644
--- a/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcEnvSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcEnvSuite.scala
@@ -73,7 +73,7 @@ class NettyRpcEnvSuite extends RpcEnvSuite with MockitoSugar with TimeLimits {
 
     val nettyEnv = env.asInstanceOf[NettyRpcEnv]
     val client = mock[TransportClient]
-    val senderAddress = RpcAddress("locahost", 12345)
+    val senderAddress = RpcAddress("localhost", 12345)
     val receiverAddress = RpcEndpointAddress("localhost", 54321, "test")
     val receiver = new NettyRpcEndpointRef(nettyEnv.conf, receiverAddress, nettyEnv)
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index 58aa246b7358f..194e0dfe312d5 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -2569,7 +2569,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     val newTaskSet = taskSets(1)
     // 2 tasks should have been re-submitted, for tasks 0 and 1 (which ran on hostA).
     assert(newTaskSet.tasks.size === 2)
-    // Complete task 0 from the original task set (i.e., not hte one that's currently active).
+    // Complete task 0 from the original task set (i.e., not the one that's currently active).
     // This should still be counted towards the job being complete (but there's still one
     // outstanding task).
     runEvent(makeCompletionEvent(newTaskSet.tasks(0), Success, makeMapStatus("hostB", 2)))
@@ -3057,7 +3057,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     assertResultStageFailToRollback(shuffleMapRdd)
   }
 
-  private def assertResultStageNotRollbacked(mapRdd: MyRDD): Unit = {
+  private def assertResultStageNotRolledBack(mapRdd: MyRDD): Unit = {
     val shuffleDep = new ShuffleDependency(mapRdd, new HashPartitioner(2))
     val shuffleId = shuffleDep.shuffleId
     val finalRdd = new MyRDD(sc, 2, List(shuffleDep), tracker = mapOutputTracker)
@@ -3097,7 +3097,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
       val shuffleMapRdd = new MyCheckpointRDD(sc, 2, Nil, indeterminate = true)
       shuffleMapRdd.checkpoint()
       shuffleMapRdd.doCheckpoint()
-      assertResultStageNotRollbacked(shuffleMapRdd)
+      assertResultStageNotRolledBack(shuffleMapRdd)
     }
   }
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala
index e6fbf9b09d43d..cb50c7c959754 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala
@@ -255,7 +255,7 @@ class ReplayListenerSuite extends SparkFunSuite with BeforeAndAfter with LocalSp
 
   /*
    * This is a dummy input stream that wraps another input stream but ends prematurely when
-   * reading at the specified position, throwing an EOFExeption.
+   * reading at the specified position, throwing an EOFException.
    */
   private class EarlyEOFInputStream(in: InputStream, failAtPos: Int) extends InputStream {
     private val countDown = new AtomicInteger(failAtPos)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
index 0874163b0e946..88d2868b957f9 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
@@ -44,7 +44,7 @@ import org.apache.spark.util.{CallSite, ThreadUtils, Utils}
  * TaskSetManagers.
  *
  * Test cases are configured by providing a set of jobs to submit, and then simulating interaction
- * with spark's executors via a mocked backend (eg., task completion, task failure, executors
+ * with spark's executors via a mocked backend (e.g., task completion, task failure, executors
  * disconnecting, etc.).
  */
 abstract class SchedulerIntegrationSuite[T <: MockBackend: ClassTag] extends SparkFunSuite
@@ -372,7 +372,7 @@ private[spark] abstract class MockBackend(
 
   /**
    * Accessed by both scheduling and backend thread, so should be protected by this.
-   * Most likely the only thing that needs to be protected are the inidividual ExecutorTaskStatus,
+   * Most likely the only thing that needs to be protected are the individual ExecutorTaskStatus,
    * but for simplicity in this mock just lock the whole backend.
    */
   def executorIdToExecutor: Map[String, ExecutorTaskStatus]
@@ -535,8 +535,8 @@ class BasicSchedulerIntegrationSuite extends SchedulerIntegrationSuite[SingleCor
    */
   testScheduler("super simple job") {
     def runBackend(): Unit = {
-      val (taskDescripition, _) = backend.beginTask()
-      backend.taskSuccess(taskDescripition, 42)
+      val (taskDescription, _) = backend.beginTask()
+      backend.taskSuccess(taskDescription, 42)
     }
     withBackend(runBackend _) {
       val jobFuture = submit(new MockRDD(sc, 10, Nil), (0 until 10).toArray)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
index a4a84b0e89809..d72744c5cc348 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
@@ -571,9 +571,9 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     }
   }
 
-  test("event queue size can be configued through spark conf") {
+  test("event queue size can be configured through spark conf") {
     // configure the shared queue size to be 1, event log queue size to be 2,
-    // and listner bus event queue size to be 5
+    // and listener bus event queue size to be 5
     val conf = new SparkConf(false)
       .set(LISTENER_BUS_EVENT_QUEUE_CAPACITY, 5)
       .set(s"spark.scheduler.listenerbus.eventqueue.${SHARED_QUEUE}.capacity", "1")
@@ -593,7 +593,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     // check the size of shared queue is 1 as configured
     assert(bus.getQueueCapacity(SHARED_QUEUE) == Some(1))
     // no specific size of status queue is configured,
-    // it shoud use the LISTENER_BUS_EVENT_QUEUE_CAPACITY
+    // it should use the LISTENER_BUS_EVENT_QUEUE_CAPACITY
     assert(bus.getQueueCapacity(APP_STATUS_QUEUE) == Some(5))
     // check the size of event log queue is 5 as configured
     assert(bus.getQueueCapacity(EVENT_LOG_QUEUE) == Some(2))
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index a760dda3897df..3bf6cc226c0aa 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -377,8 +377,8 @@ class TaskSetManagerSuite
 
     // offers not accepted due to task set zombies are not delay schedule rejects
     manager.isZombie = true
-    val (taskDesciption, delayReject) = manager.resourceOffer("exec2", "host2", ANY)
-    assert(taskDesciption.isEmpty)
+    val (taskDescription, delayReject) = manager.resourceOffer("exec2", "host2", ANY)
+    assert(taskDescription.isEmpty)
     assert(delayReject === false)
     manager.isZombie = false
 
@@ -1322,7 +1322,7 @@ class TaskSetManagerSuite
 
   test("SPARK-19868: DagScheduler only notified of taskEnd when state is ready") {
     // dagScheduler.taskEnded() is async, so it may *seem* ok to call it before we've set all
-    // appropriate state, eg. isZombie.   However, this sets up a race that could go the wrong way.
+    // appropriate state, e.g. isZombie.   However, this sets up a race that could go the wrong way.
     // This is a super-focused regression test which checks the zombie state as soon as
     // dagScheduler.taskEnded() is called, to ensure we haven't introduced a race.
     sc = new SparkContext("local", "test")
diff --git a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
index 6ca1109791c35..a251c164a79ca 100644
--- a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
@@ -234,7 +234,7 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter {
 
     // Send two executor metrics update. Only update one metric to avoid a lot of boilerplate code.
     // The tasks are distributed among the two executors, so the executor-level metrics should
-    // hold half of the cummulative value of the metric being updated.
+    // hold half of the cumulative value of the metric being updated.
     Seq(1L, 2L).foreach { value =>
       s1Tasks.foreach { task =>
         val accum = new AccumulableInfo(1L, Some(InternalAccumulator.MEMORY_BYTES_SPILLED),
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 144489c5f7922..44b6f1b82e75a 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -1712,12 +1712,12 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     val externalShuffleServicePort = StorageUtils.externalShuffleServicePort(conf)
     val port = store.blockTransferService.port
     val rack = Some("rack")
-    val blockManagerWithTopolgyInfo = BlockManagerId(
+    val blockManagerWithTopologyInfo = BlockManagerId(
       store.blockManagerId.executorId,
       store.blockManagerId.host,
       store.blockManagerId.port,
       rack)
-    store.blockManagerId = blockManagerWithTopolgyInfo
+    store.blockManagerId = blockManagerWithTopologyInfo
     val locations = Seq(
       BlockManagerId("executor4", otherHost, externalShuffleServicePort, rack),
       BlockManagerId("executor3", otherHost, port, rack),
diff --git a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
index 4cd1fc19f1484..7640c17166222 100644
--- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
@@ -506,9 +506,9 @@ class JsonProtocolSuite extends SparkFunSuite {
     val oldExecutorMetricsJson =
       JsonProtocol.executorMetricsToJson(executorMetrics)
         .removeField( _._1 == "MappedPoolMemory")
-    val exepectedExecutorMetrics = new ExecutorMetrics(Array(12L, 23L, 45L, 67L,
+    val expectedExecutorMetrics = new ExecutorMetrics(Array(12L, 23L, 45L, 67L,
       78L, 89L, 90L, 123L, 456L, 0L, 40L, 20L, 20L, 10L, 20L, 10L))
-    assertEquals(exepectedExecutorMetrics,
+    assertEquals(expectedExecutorMetrics,
       JsonProtocol.executorMetricsFromJson(oldExecutorMetricsJson))
   }
 
@@ -978,8 +978,8 @@ private[spark] object JsonProtocolSuite extends Assertions {
   private val stackTrace = {
     Array[StackTraceElement](
       new StackTraceElement("Apollo", "Venus", "Mercury", 42),
-      new StackTraceElement("Afollo", "Vemus", "Mercurry", 420),
-      new StackTraceElement("Ayollo", "Vesus", "Blackberry", 4200)
+      new StackTraceElement("Afollo", "Vemus", "Mercurry", 420), /* odd spellings intentional */
+      new StackTraceElement("Ayollo", "Vesus", "Blackberry", 4200) /* odd spellings intentional */
     )
   }
 
diff --git a/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
index 6183ba9faa6b4..d669f2c655abb 100644
--- a/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
@@ -94,7 +94,7 @@ class SizeEstimatorSuite
   override def beforeEach(): Unit = {
     super.beforeEach()
     // Set the arch to 64-bit and compressedOops to true so that SizeEstimator
-    // provides identical results accross all systems in these tests.
+    // provides identical results across all systems in these tests.
     reinitializeSizeEstimator("amd64", "true")
   }
 
diff --git a/docs/_plugins/include_example.rb b/docs/_plugins/include_example.rb
index 6b4b1c652a81b..7d0e78738095e 100644
--- a/docs/_plugins/include_example.rb
+++ b/docs/_plugins/include_example.rb
@@ -66,10 +66,10 @@ def render(context)
       rendered_code + hint
     end
 
-    # Trim the code block so as to have the same indention, regardless of their positions in the
+    # Trim the code block so as to have the same indentation, regardless of their positions in the
     # code file.
     def trim_codeblock(lines)
-      # Select the minimum indention of the current code block.
+      # Select the minimum indentation of the current code block.
       min_start_spaces = lines
         .select { |l| l.strip.size !=0 }
         .map { |l| l[/\A */].size }
diff --git a/docs/building-spark.md b/docs/building-spark.md
index 73c527b7a5ed6..5106f2abd4187 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -273,7 +273,7 @@ Enable the profile (e.g. 2.13):
     # For sbt
     ./build/sbt -Pscala-2.13 compile
 
-## Running Jenkins tests with Github Enterprise
+## Running Jenkins tests with GitHub Enterprise
 
 To run tests with Jenkins:
 
diff --git a/docs/configuration.md b/docs/configuration.md
index 14ff38dac9b13..76494b04c9279 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -2170,7 +2170,7 @@ Apart from these, the following properties are also available, and may be useful
   <td>120s</td>
   <td>
     The timeout in seconds to wait to acquire a new executor and schedule a task before aborting a
-    TaskSet which is unschedulable because all executors are exluded due to task failures.
+    TaskSet which is unschedulable because all executors are excluded due to task failures.
   </td>
   <td>2.4.1</td>
 </tr>
diff --git a/docs/css/main.css b/docs/css/main.css
index 8b279a157c2b6..271113c904d26 100755
--- a/docs/css/main.css
+++ b/docs/css/main.css
@@ -254,7 +254,7 @@ a:hover code {
   position: relative;
   background-color: #FFF;
   max-width: 914px;
-  line-height: 1.6; /* Inspired by Github's wiki style */
+  line-height: 1.6; /* Inspired by GitHub's wiki style */
   padding-left: 15px;
 }
 
@@ -263,7 +263,7 @@ a:hover code {
   position: relative;
   background-color: #FFF;
   max-width: 914px;
-  line-height: 1.6; /* Inspired by Github's wiki style */
+  line-height: 1.6; /* Inspired by GitHub's wiki style */
   padding-left: 30px;
   min-height: 100vh;
 }
diff --git a/docs/graphx-programming-guide.md b/docs/graphx-programming-guide.md
index 50c9366a0999f..a1026669dc4fd 100644
--- a/docs/graphx-programming-guide.md
+++ b/docs/graphx-programming-guide.md
@@ -571,7 +571,7 @@ messages to the source and destination attributes.  Think of `sendMsg` as the <i
 function in map-reduce.
 The user defined `mergeMsg` function takes two messages destined to the same vertex and
 yields a single message.  Think of `mergeMsg` as the <i>reduce</i> function in map-reduce.
-The  [`aggregateMessages`][Graph.aggregateMessages] operator returns a `VertexRDD[Msg]`
+The  [`aggregateMessages`][Graph.aggregateMessages] operator returns an `VertexRDD[Msg]`
 containing the aggregate message (of type `Msg`) destined to each vertex.  Vertices that did not
 receive a message are not included in the returned `VertexRDD`[VertexRDD].
 
@@ -874,7 +874,7 @@ change the `VertexId` thereby enabling the same `HashMap` data structures to be
 `HashMap` and implement the join by linear scan rather than costly point lookups.
 
 The `aggregateUsingIndex` operator is useful for efficient construction of a new `VertexRDD`[VertexRDD] from an
-`RDD[(VertexId, A)]`.  Conceptually, if I have constructed a `VertexRDD[B]` over a set of vertices,
+`RDD[(VertexId, A)]`.  Conceptually, if I have constructed an `VertexRDD[B]` over a set of vertices,
 *which is a super-set* of the vertices in some `RDD[(VertexId, A)]` then I can reuse the index to
 both aggregate and then subsequently index the `RDD[(VertexId, A)]`.  For example:
 
diff --git a/docs/ml-migration-guide.md b/docs/ml-migration-guide.md
index 4e6d68f5a8cf4..43b8de83a9d8c 100644
--- a/docs/ml-migration-guide.md
+++ b/docs/ml-migration-guide.md
@@ -281,7 +281,7 @@ Several deprecated methods were removed in the `spark.mllib` and `spark.ml` pack
 * `weights` in `LinearRegression` and `LogisticRegression` in `spark.ml`
 * `setMaxNumIterations` in `mllib.optimization.LBFGS` (marked as `DeveloperApi`)
 * `treeReduce` and `treeAggregate` in `mllib.rdd.RDDFunctions` (these functions are available on `RDD`s directly, and were marked as `DeveloperApi`)
-* `defaultStategy` in `mllib.tree.configuration.Strategy`
+* `defaultStrategy` in `mllib.tree.configuration.Strategy`
 * `build` in `mllib.tree.Node`
 * libsvm loaders for multiclass and load/save labeledData methods in `mllib.util.MLUtils`
 
diff --git a/docs/mllib-clustering.md b/docs/mllib-clustering.md
index 4cb2e259ccfbc..cc0c0e39e66f8 100644
--- a/docs/mllib-clustering.md
+++ b/docs/mllib-clustering.md
@@ -189,7 +189,7 @@ Refer to the [`PowerIterationClustering` Scala docs](api/scala/org/apache/spark/
 
 [`PowerIterationClustering`](api/java/org/apache/spark/mllib/clustering/PowerIterationClustering.html)
 implements the PIC algorithm.
-It takes an `JavaRDD` of `(srcId: Long, dstId: Long, similarity: Double)` tuples representing the
+It takes a `JavaRDD` of `(srcId: Long, dstId: Long, similarity: Double)` tuples representing the
 affinity matrix.
 Calling `PowerIterationClustering.run` returns a
 [`PowerIterationClusteringModel`](api/java/org/apache/spark/mllib/clustering/PowerIterationClusteringModel.html)
diff --git a/docs/mllib-data-types.md b/docs/mllib-data-types.md
index 6d3b1a599d48b..ce4e6b8e05814 100644
--- a/docs/mllib-data-types.md
+++ b/docs/mllib-data-types.md
@@ -643,7 +643,7 @@ entries = sc.parallelize([MatrixEntry(0, 0, 1.2), MatrixEntry(1, 0, 2.1), Matrix
 #   - or using (long, long, float) tuples:
 entries = sc.parallelize([(0, 0, 1.2), (1, 0, 2.1), (2, 1, 3.7)])
 
-# Create an CoordinateMatrix from an RDD of MatrixEntries.
+# Create a CoordinateMatrix from an RDD of MatrixEntries.
 mat = CoordinateMatrix(entries)
 
 # Get its size.
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 15a6cbd910210..c6105188f07ec 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -421,7 +421,7 @@ to handle the Spark Context setup and tear down.
 
 In addition to viewing the metrics in the UI, they are also available as JSON.  This gives developers
 an easy way to create new visualizations and monitoring tools for Spark.  The JSON is available for
-both running applications, and in the history server.  The endpoints are mounted at `/api/v1`.  Eg.,
+both running applications, and in the history server.  The endpoints are mounted at `/api/v1`.  For example,
 for the history server, they would typically be accessible at `http://<server-url>:18080/api/v1`, and
 for a running application, at `http://localhost:4040/api/v1`.
 
@@ -951,11 +951,11 @@ These endpoints have been strongly versioned to make it easier to develop applic
 * Individual fields will never be removed for any given endpoint
 * New endpoints may be added
 * New fields may be added to existing endpoints
-* New versions of the api may be added in the future as a separate endpoint (eg., `api/v2`).  New versions are *not* required to be backwards compatible.
+* New versions of the api may be added in the future as a separate endpoint (e.g., `api/v2`).  New versions are *not* required to be backwards compatible.
 * Api versions may be dropped, but only after at least one minor release of co-existing with a new api version.
 
 Note that even when examining the UI of running applications, the `applications/[app-id]` portion is
-still required, though there is only one application available.  Eg. to see the list of jobs for the
+still required, though there is only one application available.  E.g. to see the list of jobs for the
 running app, you would go to `http://localhost:4040/api/v1/applications/[app-id]/jobs`.  This is to
 keep the paths consistent in both modes.
 
diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index 5ec7a2c6f0bf4..71b7df8176d1b 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -1079,7 +1079,7 @@ See the [configuration page](configuration.html) for information on Spark config
   <td><code>0.1</code></td>
   <td>
     This sets the Memory Overhead Factor that will allocate memory to non-JVM memory, which includes off-heap memory allocations, non-JVM tasks, and various systems processes. For JVM-based jobs this value will default to 0.10 and 0.40 for non-JVM jobs.
-    This is done as non-JVM tasks need more non-JVM heap space and such tasks commonly fail with "Memory Overhead Exceeded" errors. This prempts this error with a higher default.
+    This is done as non-JVM tasks need more non-JVM heap space and such tasks commonly fail with "Memory Overhead Exceeded" errors. This preempts this error with a higher default.
   </td>
   <td>2.4.0</td>
 </tr>
@@ -1402,4 +1402,4 @@ Kubernetes does not tell Spark the addresses of the resources allocated to each
 ### Stage Level Scheduling Overview
 
 Stage level scheduling is supported on Kubernetes when dynamic allocation is enabled. This also requires <code>spark.dynamicAllocation.shuffleTracking.enabled</code> to be enabled since Kubernetes doesn't support an external shuffle service at this time. The order in which containers for different profiles is requested from Kubernetes is not guaranteed. Note that since dynamic allocation on Kubernetes requires the shuffle tracking feature, this means that executors from previous stages that used a different ResourceProfile may not idle timeout due to having shuffle data on them. This could result in using more cluster resources and in the worst case if there are no remaining resources on the Kubernetes cluster then Spark could potentially hang. You may consider looking at config <code>spark.dynamicAllocation.shuffleTracking.timeout</code> to set a timeout, but that could result in data having to be recomputed if the shuffle data is really needed.
-Note, there is a difference in the way pod template resources are handled between the base default profile and custom ResourceProfiles. Any resources specified in the pod template file will only be used with the base default profile. If you create custom ResourceProfiles be sure to include all necessary resources there since the resources from the template file will not be propogated to custom ResourceProfiles.
+Note, there is a difference in the way pod template resources are handled between the base default profile and custom ResourceProfiles. Any resources specified in the pod template file will only be used with the base default profile. If you create custom ResourceProfiles be sure to include all necessary resources there since the resources from the template file will not be propagated to custom ResourceProfiles.
diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md
index 80591bd08650a..8c0bac1815bbd 100644
--- a/docs/running-on-mesos.md
+++ b/docs/running-on-mesos.md
@@ -857,7 +857,7 @@ See the [configuration page](configuration.html) for information on Spark config
   <td><code>host</code></td>
   <td>
     Provides support for the `local:///` scheme to reference the app jar resource in cluster mode.
-    If user uses a local resource (`local:///path/to/jar`) and the config option is not used it defaults to `host` eg.
+    If user uses a local resource (`local:///path/to/jar`) and the config option is not used it defaults to `host` e.g.
     the mesos fetcher tries to get the resource from the host's file system.
     If the value is unknown it prints a warning msg in the dispatcher logs and defaults to `host`.
     If the value is `container` then spark submit in the container will use the jar in the container's path:
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 73c4930dadbd5..797d18a0d4139 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -644,7 +644,7 @@ YARN does not tell Spark the addresses of the resources allocated to each contai
 # Stage Level Scheduling Overview
 
 Stage level scheduling is supported on YARN when dynamic allocation is enabled. One thing to note that is YARN specific is that each ResourceProfile requires a different container priority on YARN. The mapping is simply the ResourceProfile id becomes the priority, on YARN lower numbers are higher priority. This means that profiles created earlier will have a higher priority in YARN. Normally this won't matter as Spark finishes one stage before starting another one, the only case this might have an affect is in a job server type scenario, so its something to keep in mind.
-Note there is a difference in the way custom resources are handled between the base default profile and custom ResourceProfiles. To allow for the user to request YARN containers with extra resources without Spark scheduling on them, the user can specify resources via the <code>spark.yarn.executor.resource.</code> config. Those configs are only used in the base default profile though and do not get propogated into any other custom ResourceProfiles. This is because there would be no way to remove them if you wanted a stage to not have them. This results in your default profile getting custom resources defined in <code>spark.yarn.executor.resource.</code> plus spark defined resources of GPU or FPGA. Spark converts GPU and FPGA resources into the YARN built in types <code>yarn.io/gpu</code>) and <code>yarn.io/fpga</code>, but does not know the mapping of any other resources. Any other Spark custom resources are not propogated to YARN for the default profile. So if you want Spark to schedule based off a custom resource and have it requested from YARN, you must specify it in both YARN (<code>spark.yarn.{driver/executor}.resource.</code>) and Spark (<code>spark.{driver/executor}.resource.</code>) configs. Leave the Spark config off if you only want YARN containers with the extra resources but Spark not to schedule using them. Now for custom ResourceProfiles, it doesn't currently have a way to only specify YARN resources without Spark scheduling off of them. This means for custom ResourceProfiles we propogate all the resources defined in the ResourceProfile to YARN. We still convert GPU and FPGA to the YARN build in types as well. This requires that the name of any custom resources you specify match what they are defined as in YARN.
+Note there is a difference in the way custom resources are handled between the base default profile and custom ResourceProfiles. To allow for the user to request YARN containers with extra resources without Spark scheduling on them, the user can specify resources via the <code>spark.yarn.executor.resource.</code> config. Those configs are only used in the base default profile though and do not get propagated into any other custom ResourceProfiles. This is because there would be no way to remove them if you wanted a stage to not have them. This results in your default profile getting custom resources defined in <code>spark.yarn.executor.resource.</code> plus spark defined resources of GPU or FPGA. Spark converts GPU and FPGA resources into the YARN built in types <code>yarn.io/gpu</code>) and <code>yarn.io/fpga</code>, but does not know the mapping of any other resources. Any other Spark custom resources are not propagated to YARN for the default profile. So if you want Spark to schedule based off a custom resource and have it requested from YARN, you must specify it in both YARN (<code>spark.yarn.{driver/executor}.resource.</code>) and Spark (<code>spark.{driver/executor}.resource.</code>) configs. Leave the Spark config off if you only want YARN containers with the extra resources but Spark not to schedule using them. Now for custom ResourceProfiles, it doesn't currently have a way to only specify YARN resources without Spark scheduling off of them. This means for custom ResourceProfiles we propagate all the resources defined in the ResourceProfile to YARN. We still convert GPU and FPGA to the YARN build in types as well. This requires that the name of any custom resources you specify match what they are defined as in YARN.
 
 # Important notes
 
diff --git a/docs/sparkr.md b/docs/sparkr.md
index 05310f89f278d..002da5a56fa9e 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -671,7 +671,7 @@ Arrow R library is available on CRAN and it can be installed as below.
 ```bash
 Rscript -e 'install.packages("arrow", repos="https://cloud.r-project.org/")'
 ```
-Please refer [the official documentation of Apache Arrow](https://arrow.apache.org/docs/r/) for more detials.
+Please refer [the official documentation of Apache Arrow](https://arrow.apache.org/docs/r/) for more details.
 
 Note that you must ensure that Arrow R package is installed and available on all cluster nodes.
 The current supported minimum version is 1.0.0; however, this might change between the minor releases since Arrow optimization in SparkR is experimental.
diff --git a/docs/sql-data-sources-jdbc.md b/docs/sql-data-sources-jdbc.md
index b95be0974585e..7d60915e2a65e 100644
--- a/docs/sql-data-sources-jdbc.md
+++ b/docs/sql-data-sources-jdbc.md
@@ -131,7 +131,7 @@ the following case-insensitive options:
   <tr>
     <td><code>fetchsize</code></td>
     <td>
-      The JDBC fetch size, which determines how many rows to fetch per round trip. This can help performance on JDBC drivers which default to low fetch size (eg. Oracle with 10 rows). This option applies only to reading.
+      The JDBC fetch size, which determines how many rows to fetch per round trip. This can help performance on JDBC drivers which default to low fetch size (e.g. Oracle with 10 rows). This option applies only to reading.
     </td>
   </tr>
 
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 7997090e710a9..2c86e7a932637 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -333,7 +333,7 @@ license: |
         </tr>
     </table>
 
-  - Since Spark 2.4, when there is a struct field in front of the IN operator before a subquery, the inner query must contain a struct field as well. In previous versions, instead, the fields of the struct were compared to the output of the inner query. Eg. if `a` is a `struct(a string, b int)`, in Spark 2.4 `a in (select (1 as a, 'a' as b) from range(1))` is a valid query, while `a in (select 1, 'a' from range(1))` is not. In previous version it was the opposite.
+  - Since Spark 2.4, when there is a struct field in front of the IN operator before a subquery, the inner query must contain a struct field as well. In previous versions, instead, the fields of the struct were compared to the output of the inner query. For example, if `a` is a `struct(a string, b int)`, in Spark 2.4 `a in (select (1 as a, 'a' as b) from range(1))` is a valid query, while `a in (select 1, 'a' from range(1))` is not. In previous version it was the opposite.
 
   - In versions 2.2.1+ and 2.3, if `spark.sql.caseSensitive` is set to true, then the `CURRENT_DATE` and `CURRENT_TIMESTAMP` functions incorrectly became case-sensitive and would resolve to columns (unless typed in lower case). In Spark 2.4 this has been fixed and the functions are no longer case-sensitive.
 
@@ -532,11 +532,11 @@ license: |
 
  - Since Spark 2.3, by default arithmetic operations between decimals return a rounded value if an exact representation is not possible (instead of returning NULL). This is compliant with SQL ANSI 2011 specification and Hive's new behavior introduced in Hive 2.2 (HIVE-15331). This involves the following changes
 
-    - The rules to determine the result type of an arithmetic operation have been updated. In particular, if the precision / scale needed are out of the range of available values, the scale is reduced up to 6, in order to prevent the truncation of the integer part of the decimals. All the arithmetic operations are affected by the change, ie. addition (`+`), subtraction (`-`), multiplication (`*`), division (`/`), remainder (`%`) and positive module (`pmod`).
+    - The rules to determine the result type of an arithmetic operation have been updated. In particular, if the precision / scale needed are out of the range of available values, the scale is reduced up to 6, in order to prevent the truncation of the integer part of the decimals. All the arithmetic operations are affected by the change, i.e. addition (`+`), subtraction (`-`), multiplication (`*`), division (`/`), remainder (`%`) and positive modulus (`pmod`).
 
     - Literal values used in SQL operations are converted to DECIMAL with the exact precision and scale needed by them.
 
-    - The configuration `spark.sql.decimalOperations.allowPrecisionLoss` has been introduced. It defaults to `true`, which means the new behavior described here; if set to `false`, Spark uses previous rules, ie. it doesn't adjust the needed scale to represent the values and it returns NULL if an exact representation of the value is not possible.
+    - The configuration `spark.sql.decimalOperations.allowPrecisionLoss` has been introduced. It defaults to `true`, which means the new behavior described here; if set to `false`, Spark uses previous rules, i.e. it doesn't adjust the needed scale to represent the values and it returns NULL if an exact representation of the value is not possible.
 
   - Un-aliased subquery's semantic has not been well defined with confusing behaviors. Since Spark 2.3, we invalidate such confusing cases, for example: `SELECT v.i from (SELECT i FROM v)`, Spark will throw an analysis exception in this case because users should not be able to use the qualifier inside a subquery. See [SPARK-20690](https://issues.apache.org/jira/browse/SPARK-20690) and [SPARK-21335](https://issues.apache.org/jira/browse/SPARK-21335) for more details.
 
diff --git a/docs/sql-ref-syntax-aux-conf-mgmt-set-timezone.md b/docs/sql-ref-syntax-aux-conf-mgmt-set-timezone.md
index 47dd2be77ae90..ada86d8dd3913 100644
--- a/docs/sql-ref-syntax-aux-conf-mgmt-set-timezone.md
+++ b/docs/sql-ref-syntax-aux-conf-mgmt-set-timezone.md
@@ -43,7 +43,7 @@ SET TIME ZONE INTERVAL interval_literal
 
 * **interval_literal**
 
-    The [interval literal](sql-ref-literals.html#interval-literal) represents the difference between the session time zone to the 'UTC'. It must be in the range of [-18, 18] hours and max to second precision, e.g. `INTERVAL 2 HOURS 30 MINITUES` or `INTERVAL '15:40:32' HOUR TO SECOND`.
+    The [interval literal](sql-ref-literals.html#interval-literal) represents the difference between the session time zone to the 'UTC'. It must be in the range of [-18, 18] hours and max to second precision, e.g. `INTERVAL 2 HOURS 30 MINUTES` or `INTERVAL '15:40:32' HOUR TO SECOND`.
 
 ### Examples
 
diff --git a/docs/sql-ref-syntax-ddl-create-table-hiveformat.md b/docs/sql-ref-syntax-ddl-create-table-hiveformat.md
index 3a8c8d5b1160a..11ec2f1d9ea85 100644
--- a/docs/sql-ref-syntax-ddl-create-table-hiveformat.md
+++ b/docs/sql-ref-syntax-ddl-create-table-hiveformat.md
@@ -42,10 +42,10 @@ CREATE [ EXTERNAL ] TABLE [ IF NOT EXISTS ] table_identifier
 
 row_format:    
     : SERDE serde_class [ WITH SERDEPROPERTIES (k1=v1, k2=v2, ... ) ]
-    | DELIMITED [ FIELDS TERMINATED BY fields_termiated_char [ ESCAPED BY escaped_char ] ] 
-        [ COLLECTION ITEMS TERMINATED BY collection_items_termiated_char ] 
-        [ MAP KEYS TERMINATED BY map_key_termiated_char ]
-        [ LINES TERMINATED BY row_termiated_char ]
+    | DELIMITED [ FIELDS TERMINATED BY fields_terminated_char [ ESCAPED BY escaped_char ] ] 
+        [ COLLECTION ITEMS TERMINATED BY collection_items_terminated_char ] 
+        [ MAP KEYS TERMINATED BY map_key_terminated_char ]
+        [ LINES TERMINATED BY row_terminated_char ]
         [ NULL DEFINED AS null_char ]
 ```
 
diff --git a/docs/sql-ref-syntax-dml-insert-into.md b/docs/sql-ref-syntax-dml-insert-into.md
index ed5da2b2d28df..39d15808d033e 100644
--- a/docs/sql-ref-syntax-dml-insert-into.md
+++ b/docs/sql-ref-syntax-dml-insert-into.md
@@ -69,11 +69,11 @@ INSERT INTO students VALUES
     ('Amy Smith', '123 Park Ave, San Jose', 111111);
 
 SELECT * FROM students;
-+---------+---------------------+----------+
-|     name|              address|student_id|
-+---------+---------------------+----------+
-|Amy Smith|123 Park Ave,San Jose|    111111|
-+---------+---------------------+----------+
++---------+----------------------+----------+
+|     name|               address|student_id|
++---------+----------------------+----------+
+|Amy Smith|123 Park Ave, San Jose|    111111|
++---------+----------------------+----------+
 ```
 
 #### Multi-Row Insert Using a VALUES Clause
@@ -100,29 +100,29 @@ SELECT * FROM students;
 ```sql
 -- Assuming the persons table has already been created and populated.
 SELECT * FROM persons;
-+-------------+-------------------------+---------+
-|         name|                  address|      ssn|
-+-------------+-------------------------+---------+
-|Dora Williams|134 Forest Ave, Melo Park|123456789|
-+-------------+-------------------------+---------+
-|  Eddie Davis|  245 Market St, Milpitas|345678901|
-+-------------+-------------------------+---------+
++-------------+--------------------------+---------+
+|         name|                   address|      ssn|
++-------------+--------------------------+---------+
+|Dora Williams|134 Forest Ave, Menlo Park|123456789|
++-------------+--------------------------+---------+
+|  Eddie Davis|   245 Market St, Milpitas|345678901|
++-------------+--------------------------+---------+
 
 INSERT INTO students PARTITION (student_id = 444444)
     SELECT name, address FROM persons WHERE name = "Dora Williams";
 
 SELECT * FROM students;
-+-------------+-------------------------+----------+
-|         name|                  address|student_id|
-+-------------+-------------------------+----------+
-|    Amy Smith|   123 Park Ave, San Jose|    111111|
-+-------------+-------------------------+----------+
-|    Bob Brown| 456 Taylor St, Cupertino|    222222|
-+-------------+-------------------------+----------+
-|Cathy Johnson|  789 Race Ave, Palo Alto|    333333|
-+-------------+-------------------------+----------+
-|Dora Williams|134 Forest Ave, Melo Park|    444444|
-+-------------+-------------------------+----------+
++-------------+--------------------------+----------+
+|         name|                   address|student_id|
++-------------+--------------------------+----------+
+|    Amy Smith|    123 Park Ave, San Jose|    111111|
++-------------+--------------------------+----------+
+|    Bob Brown|  456 Taylor St, Cupertino|    222222|
++-------------+--------------------------+----------+
+|Cathy Johnson|   789 Race Ave, Palo Alto|    333333|
++-------------+--------------------------+----------+
+|Dora Williams|134 Forest Ave, Menlo Park|    444444|
++-------------+--------------------------+----------+
 ```
 
 #### Insert Using a TABLE Statement
@@ -141,21 +141,21 @@ SELECT * FROM visiting_students;
 INSERT INTO students TABLE visiting_students;
 
 SELECT * FROM students;
-+-------------+-------------------------+----------+
-|         name|                  address|student_id|
-+-------------+-------------------------+----------+
-|    Amy Smith|    123 Park Ave,San Jose|    111111|
-+-------------+-------------------------+----------+
-|    Bob Brown| 456 Taylor St, Cupertino|    222222|
-+-------------+-------------------------+----------+
-|Cathy Johnson|  789 Race Ave, Palo Alto|    333333|
-+-------------+-------------------------+----------+
-|Dora Williams|134 Forest Ave, Melo Park|    444444|
-+-------------+-------------------------+----------+
-|Fleur Laurent|    345 Copper St, London|    777777|
-+-------------+-------------------------+----------+
-|Gordon Martin|     779 Lake Ave, Oxford|    888888|
-+-------------+-------------------------+----------+
++-------------+--------------------------+----------+
+|         name|                   address|student_id|
++-------------+--------------------------+----------+
+|    Amy Smith|    123 Park Ave, San Jose|    111111|
++-------------+--------------------------+----------+
+|    Bob Brown|  456 Taylor St, Cupertino|    222222|
++-------------+--------------------------+----------+
+|Cathy Johnson|   789 Race Ave, Palo Alto|    333333|
++-------------+--------------------------+----------+
+|Dora Williams|134 Forest Ave, Menlo Park|    444444|
++-------------+--------------------------+----------+
+|Fleur Laurent|     345 Copper St, London|    777777|
++-------------+--------------------------+----------+
+|Gordon Martin|      779 Lake Ave, Oxford|    888888|
++-------------+--------------------------+----------+
 ```
 
 #### Insert Using a FROM Statement
@@ -177,25 +177,25 @@ INSERT INTO students
      FROM applicants SELECT name, address, id applicants WHERE qualified = true;
 
 SELECT * FROM students;
-+-------------+-------------------------+----------+
-|         name|                  address|student_id|
-+-------------+-------------------------+----------+
-|    Amy Smith|   123 Park Ave, San Jose|    111111|
-+-------------+-------------------------+----------+
-|    Bob Brown| 456 Taylor St, Cupertino|    222222|
-+-------------+-------------------------+----------+
-|Cathy Johnson|  789 Race Ave, Palo Alto|    333333|
-+-------------+-------------------------+----------+
-|Dora Williams|134 Forest Ave, Melo Park|    444444|
-+-------------+-------------------------+----------+
-|Fleur Laurent|    345 Copper St, London|    777777|
-+-------------+-------------------------+----------+
-|Gordon Martin|     779 Lake Ave, Oxford|    888888|
-+-------------+-------------------------+----------+
-|  Helen Davis|469 Mission St, San Diego|    999999|
-+-------------+-------------------------+----------+
-|   Jason Wang|    908 Bird St, Saratoga|    121212|
-+-------------+-------------------------+----------+
++-------------+--------------------------+----------+
+|         name|                   address|student_id|
++-------------+--------------------------+----------+
+|    Amy Smith|    123 Park Ave, San Jose|    111111|
++-------------+--------------------------+----------+
+|    Bob Brown|  456 Taylor St, Cupertino|    222222|
++-------------+--------------------------+----------+
+|Cathy Johnson|   789 Race Ave, Palo Alto|    333333|
++-------------+--------------------------+----------+
+|Dora Williams|134 Forest Ave, Menlo Park|    444444|
++-------------+--------------------------+----------+
+|Fleur Laurent|     345 Copper St, London|    777777|
++-------------+--------------------------+----------+
+|Gordon Martin|      779 Lake Ave, Oxford|    888888|
++-------------+--------------------------+----------+
+|  Helen Davis| 469 Mission St, San Diego|    999999|
++-------------+--------------------------+----------+
+|   Jason Wang|     908 Bird St, Saratoga|    121212|
++-------------+--------------------------+----------+
 ```
 
 ### Related Statements
diff --git a/docs/sql-ref-syntax-dml-insert-overwrite-table.md b/docs/sql-ref-syntax-dml-insert-overwrite-table.md
index ecfd060dfd5ee..638dcb34bb1d2 100644
--- a/docs/sql-ref-syntax-dml-insert-overwrite-table.md
+++ b/docs/sql-ref-syntax-dml-insert-overwrite-table.md
@@ -64,18 +64,18 @@ INSERT OVERWRITE [ TABLE ] table_identifier [ partition_spec [ IF NOT EXISTS ] ]
 ```sql
 -- Assuming the students table has already been created and populated.
 SELECT * FROM students;
-+-------------+-------------------------+----------+
-|         name|                  address|student_id|
-+-------------+-------------------------+----------+
-|    Amy Smith|   123 Park Ave, San Jose|    111111|
-|    Bob Brown| 456 Taylor St, Cupertino|    222222|
-|Cathy Johnson|  789 Race Ave, Palo Alto|    333333|
-|Dora Williams|134 Forest Ave, Melo Park|    444444|
-|Fleur Laurent|    345 Copper St, London|    777777|
-|Gordon Martin|     779 Lake Ave, Oxford|    888888|
-|  Helen Davis|469 Mission St, San Diego|    999999|
-|   Jason Wang|    908 Bird St, Saratoga|    121212|
-+-------------+-------------------------+----------+
++-------------+--------------------------+----------+
+|         name|                   address|student_id|
++-------------+--------------------------+----------+
+|    Amy Smith|    123 Park Ave, San Jose|    111111|
+|    Bob Brown|  456 Taylor St, Cupertino|    222222|
+|Cathy Johnson|   789 Race Ave, Palo Alto|    333333|
+|Dora Williams|134 Forest Ave, Menlo Park|    444444|
+|Fleur Laurent|     345 Copper St, London|    777777|
+|Gordon Martin|      779 Lake Ave, Oxford|    888888|
+|  Helen Davis| 469 Mission St, San Diego|    999999|
+|   Jason Wang|     908 Bird St, Saratoga|    121212|
++-------------+--------------------------+----------+
 
 INSERT OVERWRITE students VALUES
     ('Ashua Hill', '456 Erica Ct, Cupertino', 111111),
@@ -95,25 +95,25 @@ SELECT * FROM students;
 ```sql
 -- Assuming the persons table has already been created and populated.
 SELECT * FROM persons;
-+-------------+-------------------------+---------+
-|         name|                  address|      ssn|
-+-------------+-------------------------+---------+
-|Dora Williams|134 Forest Ave, Melo Park|123456789|
-+-------------+-------------------------+---------+
-|  Eddie Davis|   245 Market St,Milpitas|345678901|
-+-------------+-------------------------+---------+
++-------------+--------------------------+---------+
+|         name|                   address|      ssn|
++-------------+--------------------------+---------+
+|Dora Williams|134 Forest Ave, Menlo Park|123456789|
++-------------+--------------------------+---------+
+|  Eddie Davis|   245 Market St, Milpitas|345678901|
++-------------+--------------------------+---------+
 
 INSERT OVERWRITE students PARTITION (student_id = 222222)
     SELECT name, address FROM persons WHERE name = "Dora Williams";
 
 SELECT * FROM students;
-+-------------+-------------------------+----------+
-|         name|                  address|student_id|
-+-------------+-------------------------+----------+
-|   Ashua Hill|  456 Erica Ct, Cupertino|    111111|
-+-------------+-------------------------+----------+
-|Dora Williams|134 Forest Ave, Melo Park|    222222|
-+-------------+-------------------------+----------+
++-------------+--------------------------+----------+
+|         name|                   address|student_id|
++-------------+--------------------------+----------+
+|   Ashua Hill|   456 Erica Ct, Cupertino|    111111|
++-------------+--------------------------+----------+
+|Dora Williams|134 Forest Ave, Menlo Park|    222222|
++-------------+--------------------------+----------+
 ```
 
 #### Insert Using a TABLE Statement
diff --git a/docs/sql-ref-syntax-qry-select-groupby.md b/docs/sql-ref-syntax-qry-select-groupby.md
index 934e5f70d4b08..ef9de1f594a31 100644
--- a/docs/sql-ref-syntax-qry-select-groupby.md
+++ b/docs/sql-ref-syntax-qry-select-groupby.md
@@ -269,7 +269,7 @@ INSERT INTO person VALUES
     (300, 'Mike', 80),
     (400, 'Dan', 50);
 
---Select the first row in cloumn age
+--Select the first row in column age
 SELECT FIRST(age) FROM person;
 +--------------------+
 | first(age, false)  |
@@ -277,7 +277,7 @@ SELECT FIRST(age) FROM person;
 | NULL               |
 +--------------------+
 
---Get the first row in cloumn `age` ignore nulls,last row in column `id` and sum of cloumn `id`.
+--Get the first row in column `age` ignore nulls,last row in column `id` and sum of column `id`.
 SELECT FIRST(age IGNORE NULLS), LAST(id), SUM(id) FROM person;
 +-------------------+------------------+----------+
 | first(age, true)  | last(id, false)  | sum(id)  |
diff --git a/docs/sql-ref-syntax-qry-select-lateral-view.md b/docs/sql-ref-syntax-qry-select-lateral-view.md
index f742c8fa57043..c854625a1a959 100644
--- a/docs/sql-ref-syntax-qry-select-lateral-view.md
+++ b/docs/sql-ref-syntax-qry-select-lateral-view.md
@@ -58,7 +58,7 @@ INSERT INTO person VALUES
     (400, 'Dan', 50, 4, 'Street 4');
 
 SELECT * FROM person
-    LATERAL VIEW EXPLODE(ARRAY(30, 60)) tabelName AS c_age
+    LATERAL VIEW EXPLODE(ARRAY(30, 60)) tableName AS c_age
     LATERAL VIEW EXPLODE(ARRAY(40, 80)) AS d_age;
 +------+-------+-------+--------+-----------+--------+--------+
 |  id  | name  |  age  | class  |  address  | c_age  | d_age  |
@@ -93,14 +93,14 @@ GROUP BY c_age;
 +--------+-----------+
 
 SELECT * FROM person
-    LATERAL VIEW EXPLODE(ARRAY()) tabelName AS c_age;
+    LATERAL VIEW EXPLODE(ARRAY()) tableName AS c_age;
 +-----+-------+------+--------+----------+--------+
 | id  | name  | age  | class  | address  | c_age  |
 +-----+-------+------+--------+----------+--------+
 +-----+-------+------+--------+----------+--------+
 
 SELECT * FROM person
-    LATERAL VIEW OUTER EXPLODE(ARRAY()) tabelName AS c_age;
+    LATERAL VIEW OUTER EXPLODE(ARRAY()) tableName AS c_age;
 +------+-------+-------+--------+-----------+--------+
 |  id  | name  |  age  | class  |  address  | c_age  |
 +------+-------+-------+--------+-----------+--------+
diff --git a/docs/sql-ref-syntax-qry-select-orderby.md b/docs/sql-ref-syntax-qry-select-orderby.md
index 13f0ae40cb828..552ee9be66d1e 100644
--- a/docs/sql-ref-syntax-qry-select-orderby.md
+++ b/docs/sql-ref-syntax-qry-select-orderby.md
@@ -28,7 +28,7 @@ clause, this clause guarantees a total order in the output.
 ### Syntax
 
 ```sql
-ORDER BY { expression [ sort_direction | nulls_sort_oder ] [ , ... ] }
+ORDER BY { expression [ sort_direction | nulls_sort_order ] [ , ... ] }
 ```
 
 ### Parameters
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringMetrics.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringMetrics.scala
index a785d063f1476..3dea244c77226 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringMetrics.scala
@@ -127,7 +127,7 @@ private[evaluation] abstract class Silhouette {
  * `$a_{i}$` can be interpreted as how well `i` is assigned to its cluster
  * (the smaller the value, the better the assignment), while `$b_{i}$` is
  * a measure of how well `i` has not been assigned to its "neighboring cluster",
- * ie. the nearest cluster to `i`.
+ * i.e. the nearest cluster to `i`.
  *
  * Unfortunately, the naive implementation of the algorithm requires to compute
  * the distance of each couple of points in the dataset. Since the computation of
@@ -486,7 +486,7 @@ private[evaluation] object CosineSilhouette extends Silhouette {
    *                      for the point.
    * @param weightCol The name of the column which contains the instance weight.
    * @return A [[scala.collection.immutable.Map]] which associates each cluster id to a
-   *         its statistics (ie. the precomputed values `N` and `$\Omega_{\Gamma}$`).
+   *         its statistics (i.e. the precomputed values `N` and `$\Omega_{\Gamma}$`).
    */
   def computeClusterStats(
       df: DataFrame,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
index 5ed7619fce5dc..2ec7a8632e39d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
@@ -112,7 +112,7 @@ final class Binarizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
         (Seq($(inputCol)), Seq($(outputCol)), Seq($(threshold)))
       }
 
-    val ouputCols = inputColNames.zip(tds).map { case (inputColName, td) =>
+    val mappedOutputCols = inputColNames.zip(tds).map { case (inputColName, td) =>
       val binarizerUDF = dataset.schema(inputColName).dataType match {
         case DoubleType =>
           udf { in: Double => if (in > td) 1.0 else 0.0 }
@@ -147,8 +147,8 @@ final class Binarizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
       binarizerUDF(col(inputColName))
     }
 
-    val ouputMetadata = outputColNames.map(outputSchema(_).metadata)
-    dataset.withColumns(outputColNames, ouputCols, ouputMetadata)
+    val outputMetadata = outputColNames.map(outputSchema(_).metadata)
+    dataset.withColumns(outputColNames, mappedOutputCols, outputMetadata)
   }
 
   @Since("1.4.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Selector.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Selector.scala
index 46052a89fdf1a..41de26dff03ab 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Selector.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Selector.scala
@@ -77,7 +77,7 @@ private[feature] trait SelectorParams extends Params
    * @group param
    */
   @Since("3.1.0")
-  final val fpr = new DoubleParam(this, "fpr", "The higest p-value for features to be kept.",
+  final val fpr = new DoubleParam(this, "fpr", "The highest p-value for features to be kept.",
     ParamValidators.inRange(0, 1))
 
   /** @group getParam */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
index b6ed4f2b000cc..8bcd7909b6078 100755
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
@@ -166,11 +166,11 @@ class StopWordsRemover @Since("1.5.0") (@Since("1.5.0") override val uid: String
     }
 
     val (inputColNames, outputColNames) = getInOutCols()
-    val ouputCols = inputColNames.map { inputColName =>
+    val outputCols = inputColNames.map { inputColName =>
       t(col(inputColName))
     }
-    val ouputMetadata = outputColNames.map(outputSchema(_).metadata)
-    dataset.withColumns(outputColNames, ouputCols, ouputMetadata)
+    val outputMetadata = outputColNames.map(outputSchema(_).metadata)
+    dataset.withColumns(outputColNames, outputCols, outputMetadata)
   }
 
   @Since("1.5.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala b/mllib/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala
index 5efcf0dce68a2..37b715930a501 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala
@@ -133,7 +133,7 @@ object ImageSchema {
     val img = try {
       ImageIO.read(new ByteArrayInputStream(bytes))
     } catch {
-      // Catch runtime exception because `ImageIO` may throw unexcepted `RuntimeException`.
+      // Catch runtime exception because `ImageIO` may throw unexpected `RuntimeException`.
       // But do not catch the declared `IOException` (regarded as FileSystem failure)
       case _: RuntimeException => null
     }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/AFTSurvivalRegressionWrapper.scala b/mllib/src/main/scala/org/apache/spark/ml/r/AFTSurvivalRegressionWrapper.scala
index 1b5f77a9ae897..594d9f315f508 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/AFTSurvivalRegressionWrapper.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/AFTSurvivalRegressionWrapper.scala
@@ -88,9 +88,9 @@ private[r] object AFTSurvivalRegressionWrapper extends MLReadable[AFTSurvivalReg
       aggregationDepth: Int,
       stringIndexerOrderType: String): AFTSurvivalRegressionWrapper = {
 
-    val (rewritedFormula, censorCol) = formulaRewrite(formula)
+    val (rewrittenFormula, censorCol) = formulaRewrite(formula)
 
-    val rFormula = new RFormula().setFormula(rewritedFormula)
+    val rFormula = new RFormula().setFormula(rewrittenFormula)
       .setStringIndexerOrderType(stringIndexerOrderType)
     RWrapperUtils.checkDataColumns(rFormula, data)
     val rFormulaModel = rFormula.fit(data)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
index 84c0985245a2e..f70baa4ddd393 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
@@ -555,7 +555,7 @@ object FMRegressionModel extends MLReadable[FMRegressionModel] {
  *   \hat{y} = p\left( y_{fm} \right)
  * }}}
  * p is the prediction function, for binary classification task is sigmoid.
- * The loss funcation gradient formula:
+ * The loss function gradient formula:
  * {{{
  *   \frac{\partial}{\partial\theta} l\left( \hat{y},y \right) =
  *   \frac{\partial}{\partial\theta} l\left( p\left( y_{fm} \right),y \right) =
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
index 087c2c2639831..90cc4fb13b995 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala
@@ -146,7 +146,7 @@ class SVMWithSGD private (
 
   /**
    * Construct a SVM object with default parameters: {stepSize: 1.0, numIterations: 100,
-   * regParm: 0.01, miniBatchFraction: 1.0}.
+   * regParam: 0.01, miniBatchFraction: 1.0}.
    */
   @Since("0.8.0")
   def this() = this(1.0, 100, 0.01, 1.0)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/DistanceMeasure.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/DistanceMeasure.scala
index bffed61c291ea..9ac473aabecea 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/DistanceMeasure.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/DistanceMeasure.scala
@@ -41,7 +41,7 @@ private[spark] abstract class DistanceMeasure extends Serializable {
    *         1, if i != j: a bound r = matrix(i,j) to help avoiding unnecessary distance
    *         computation. Given point x, let i be current closest center, and d be current best
    *         distance, if d < f(r), then we no longer need to compute the distance to center j;
-   *         2, if i == j: a bound r = matrix(i,i) = min_k{maxtrix(i,k)|k!=i}. If distance
+   *         2, if i == j: a bound r = matrix(i,i) = min_k{matrix(i,k)|k!=i}. If distance
    *         between point x and center i is less than f(r), then center i is the closest center
    *         to point x.
    */
@@ -268,7 +268,7 @@ private[spark] class EuclideanDistanceMeasure extends DistanceMeasure {
    *         squared distance, if d < r, then we no longer need to compute the distance to center
    *         j. matrix(i,j) equals to squared of half of Euclidean distance between centers i
    *         and j;
-   *         2, if i == j: a bound r = matrix(i,i) = min_k{maxtrix(i,k)|k!=i}. If squared
+   *         2, if i == j: a bound r = matrix(i,i) = min_k{matrix(i,k)|k!=i}. If squared
    *         distance between point x and center i is less than r, then center i is the closest
    *         center to point x.
    */
@@ -405,7 +405,7 @@ private[spark] class CosineDistanceMeasure extends DistanceMeasure {
    *         is used instead of Cosine distance to compute matrix(i,j): for centers i and j,
    *         compute the radian/angle between them, halving it, and converting it back to Cosine
    *         distance at the end;
-   *         2, if i == j: a bound r = matrix(i,i) = min_k{maxtrix(i,k)|k!=i}. If Cosine
+   *         2, if i == j: a bound r = matrix(i,i) = min_k{matrix(i,k)|k!=i}. If Cosine
    *         distance between point x and center i is less than r, then center i is the closest
    *         center to point x.
    */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
index b2742ee6ecb5b..c9f6d789d6740 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala
@@ -466,7 +466,7 @@ final class OnlineLDAOptimizer extends LDAOptimizer with Logging {
     val seed = randomGenerator.nextLong()
     // If and only if optimizeDocConcentration is set true,
     // we calculate logphat in the same pass as other statistics.
-    // No calculation of loghat happens otherwise.
+    // No calculation of logphat happens otherwise.
     val logphatPartOptionBase = () => if (optimizeDocConcentration) {
                                         Some(BDV.zeros[Double](k))
                                       } else {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
index 3c9b806d616fc..111030dada491 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
@@ -36,7 +36,7 @@ import org.apache.spark.util.random.XORShiftRandom
  * doing a single iteration of the standard k-means algorithm.
  *
  * The update algorithm uses the "mini-batch" KMeans rule,
- * generalized to incorporate forgetfullness (i.e. decay).
+ * generalized to incorporate forgetfulness (i.e. decay).
  * The update rule (for each cluster) is:
  *
  * <blockquote>
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala
index c165d4810c934..f7c6d09f5e437 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/PCA.scala
@@ -48,11 +48,11 @@ class PCA @Since("1.4.0") (@Since("1.4.0") val k: Int) {
     val mat = if (numFeatures > 65535) {
       val summary = Statistics.colStats(sources.map((_, 1.0)), Seq("mean"))
       val mean = Vectors.fromML(summary.mean)
-      val meanCentredRdd = sources.map { row =>
+      val meanCenteredRdd = sources.map { row =>
         BLAS.axpy(-1, mean, row)
         row
       }
-      new RowMatrix(meanCentredRdd)
+      new RowMatrix(meanCenteredRdd)
     } else {
       require(PCAUtil.memoryCost(k, numFeatures) < Int.MaxValue,
         "The param k and numFeatures is too large for SVD computation. " +
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
index 13899fa8296f6..eeb583f84ca8b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
@@ -560,7 +560,7 @@ class Word2VecModel private[spark] (
 
   /**
    * Find synonyms of the vector representation of a word, possibly
-   * including any words in the model vocabulary whose vector respresentation
+   * including any words in the model vocabulary whose vector representation
    * is the supplied vector.
    * @param vector vector representation of a word
    * @param num number of synonyms to find
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
index 601c7da30ffed..606e2f2f212ca 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
@@ -88,8 +88,8 @@ class AssociationRules private[fpm] (
 
     // Join to get (X, ((Y, freq(X union Y)), freq(X))), generate rules, and filter by confidence
     candidates.join(freqItemsets.map(x => (x.items.toSeq, x.freq)))
-      .map { case (antecendent, ((consequent, freqUnion), freqAntecedent)) =>
-        new Rule(antecendent.toArray,
+      .map { case (antecedent, ((consequent, freqUnion), freqAntecedent)) =>
+        new Rule(antecedent.toArray,
           consequent.toArray,
           freqUnion,
           freqAntecedent,
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index c618b71ddc5a8..d546f0c1a8e19 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -693,11 +693,11 @@ class RowMatrix @Since("1.0.0") (
     val pBV = sc.broadcast(colMagsCorrected.map(c => sg / c))
     val qBV = sc.broadcast(colMagsCorrected.map(c => math.min(sg, c)))
 
-    val sims = rows.mapPartitionsWithIndex { (indx, iter) =>
+    val sims = rows.mapPartitionsWithIndex { (index, iter) =>
       val p = pBV.value
       val q = qBV.value
 
-      val rand = new XORShiftRandom(indx)
+      val rand = new XORShiftRandom(index)
       val scaled = new Array[Double](p.size)
       iter.flatMap { row =>
         row match {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/KolmogorovSmirnovTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/KolmogorovSmirnovTest.scala
index d17f7047c5b2b..778de30e756c0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/KolmogorovSmirnovTest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/KolmogorovSmirnovTest.scala
@@ -91,7 +91,7 @@ private[stat] object KolmogorovSmirnovTest extends Logging {
    * @param partData `Iterator[Double]` 1 partition of a sorted RDD
    * @param n `Double` the total size of the RDD
    * @param cdf `Double => Double` a function the calculates the theoretical CDF of a value
-   * @return `Iterator[(Double, Double)] `Unadjusted (ie. off by a constant) potential extrema
+   * @return `Iterator[(Double, Double)] `Unadjusted (i.e. off by a constant) potential extrema
    *        in a partition. The first element corresponds to the (empirical CDF - 1/N) - CDF,
    *        the second element corresponds to empirical CDF - CDF.  We can then search the resulting
    *        iterator for the minimum of the first and the maximum of the second element, and provide
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java
index 6480b57e1f796..af32e03854b53 100644
--- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java
@@ -41,7 +41,7 @@ public void javaCompatibilityTest() {
       .setOutputCol("filtered");
 
     List<Row> data = Arrays.asList(
-      RowFactory.create(Arrays.asList("I", "saw", "the", "red", "baloon")),
+      RowFactory.create(Arrays.asList("I", "saw", "the", "red", "balloon")),
       RowFactory.create(Arrays.asList("Mary", "had", "a", "little", "lamb"))
     );
     StructType schema = new StructType(new StructField[]{
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala
index b35f964c959bf..0eae23df8358d 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala
@@ -181,7 +181,7 @@ class GaussianMixtureSuite extends MLTest with DefaultReadWriteTest {
     }
   }
 
-  test("multivariate data and check againt R mvnormalmixEM") {
+  test("multivariate data and check against R mvnormalmixEM") {
     /*
       Using the following R code to generate data and train the model using mixtools package.
       library(mvtnorm)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
index 5ee161ce8dd33..deaad2bd54d0e 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
@@ -36,7 +36,7 @@ class RegressionEvaluatorSuite
   test("Regression Evaluator: default params") {
     /**
      * Here is the instruction describing how to export the test data into CSV format
-     * so we can validate the metrics compared with R's mmetric package.
+     * so we can validate the metrics compared with R's mmetric function.
      *
      * import org.apache.spark.mllib.util.LinearDataGenerator
      * val data = sc.parallelize(LinearDataGenerator.generateLinearInput(6.3,
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/ANOVASelectorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/ANOVASelectorSuite.scala
index 1e1ab206cc1c2..0d664e421da4c 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/ANOVASelectorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/ANOVASelectorSuite.scala
@@ -133,35 +133,35 @@ class ANOVASelectorSuite extends MLTest with DefaultReadWriteTest {
     ParamsSuite.checkParams(new ANOVASelector())
   }
 
-  test("Test ANOVAFValue calssification selector: numTopFeatures") {
+  test("Test ANOVAFValue classification selector: numTopFeatures") {
     val selector = new ANOVASelector()
       .setOutputCol("filtered").setSelectorType("numTopFeatures").setNumTopFeatures(1)
     val model = testSelector(selector, dataset)
     MLTestingUtils.checkCopyAndUids(selector, model)
   }
 
-  test("Test ANOVAFValue calssification selector: percentile") {
+  test("Test ANOVAFValue classification selector: percentile") {
     val selector = new ANOVASelector()
       .setOutputCol("filtered").setSelectorType("percentile").setPercentile(0.17)
     val model = testSelector(selector, dataset)
     MLTestingUtils.checkCopyAndUids(selector, model)
   }
 
-  test("Test ANOVAFValue calssification selector: fpr") {
+  test("Test ANOVAFValue classification selector: fpr") {
     val selector = new ANOVASelector()
       .setOutputCol("filtered").setSelectorType("fpr").setFpr(1.0E-12)
     val model = testSelector(selector, dataset)
     MLTestingUtils.checkCopyAndUids(selector, model)
   }
 
-  test("Test ANOVAFValue calssification selector: fdr") {
+  test("Test ANOVAFValue classification selector: fdr") {
     val selector = new ANOVASelector()
       .setOutputCol("filtered").setSelectorType("fdr").setFdr(6.0E-12)
     val model = testSelector(selector, dataset)
     MLTestingUtils.checkCopyAndUids(selector, model)
   }
 
-  test("Test ANOVAFValue calssification selector: fwe") {
+  test("Test ANOVAFValue classification selector: fwe") {
     val selector = new ANOVASelector()
       .setOutputCol("filtered").setSelectorType("fwe").setFwe(6.0E-12)
     val model = testSelector(selector, dataset)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala
index 19645b517d79c..8f8365a59082b 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala
@@ -81,7 +81,7 @@ class DCTSuite extends MLTest with DefaultReadWriteTest {
       .map { case Row(vec: Vector) => vec.size }
       .head()
 
-    // Can not infer size of ouput vector, since no metadata is provided
+    // Can not infer size of output vector, since no metadata is provided
     intercept[TestFailedException] {
       val transformed = transformer.transform(dataset)
       checkVectorSizeOnDF(transformed, "resultVec", vectorSize)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala
index 93564681994d7..55dade28920ed 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/LSHTest.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.types.DataTypes
 
 private[ml] object LSHTest {
   /**
-   * For any locality sensitive function h in a metric space, we meed to verify whether
+   * For any locality sensitive function h in a metric space, we need to verify whether
    * the following property is satisfied.
    *
    * There exist dist1, dist2, p1, p2, so that for any two elements e1 and e2,
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/VarianceThresholdSelectorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/VarianceThresholdSelectorSuite.scala
index cc451c0b60379..142abf2ccdfb9 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/VarianceThresholdSelectorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/VarianceThresholdSelectorSuite.scala
@@ -53,7 +53,7 @@ class VarianceThresholdSelectorSuite extends MLTest with DefaultReadWriteTest {
     ParamsSuite.checkParams(new VarianceThresholdSelector)
   }
 
-  test("Test VarianceThresholdSelector: varainceThreshold not set") {
+  test("Test VarianceThresholdSelector: varianceThreshold not set") {
     val selector = new VarianceThresholdSelector().setOutputCol("filtered")
     val model = testSelector(selector, dataset)
     MLTestingUtils.checkCopyAndUids(selector, model)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
index a0e17a4b40fd2..bfa9f4b59511c 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
@@ -494,7 +494,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
        [1] -0.0457441 -0.6833928
        [1] 1.8121235  -0.1747493  -0.5815417
 
-       R code for deivance calculation:
+       R code for deviance calculation:
        data = cbind(y=c(0,1,0,0,0,1), x1=c(18, 12, 15, 13, 15, 16), x2=c(1,0,0,2,1,1))
        summary(glm(y~x1+x2, family=poisson, data=data.frame(data)))$deviance
        [1] 3.70055
@@ -1661,7 +1661,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
   }
 
   test("evaluate with labels that are not doubles") {
-    // Evaulate with a dataset that contains Labels not as doubles to verify correct casting
+    // Evaluate with a dataset that contains Labels not as doubles to verify correct casting
     val dataset = Seq(
       Instance(17.0, 1.0, Vectors.dense(0.0, 5.0).toSparse),
       Instance(19.0, 1.0, Vectors.dense(1.0, 7.0)),
diff --git a/pom.xml b/pom.xml
index cd7e1767d6b18..f0ad9b0167c32 100644
--- a/pom.xml
+++ b/pom.xml
@@ -229,7 +229,7 @@
       declared in the projects that build assemblies.
 
       For other projects the scope should remain as "compile", otherwise they are not available
-      during compilation if the dependency is transivite (e.g. "graphx/" depending on "core/" and
+      during compilation if the dependency is transitive (e.g. "graphx/" depending on "core/" and
       needing Hadoop classes in the classpath to compile).
     -->
     <hadoop.deps.scope>compile</hadoop.deps.scope>
@@ -1758,7 +1758,7 @@
         <scope>${hive.deps.scope}</scope>
         <exclusions>
 
-          <!-- pull this in when needed; the explicit definition culls the surplis-->
+          <!-- pull this in when needed; the explicit definition culls the surplus-->
           <exclusion>
             <groupId>${hive.group}</groupId>
             <artifactId>hive-metastore</artifactId>
diff --git a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
index f696e93e9cef2..386de19e919e6 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
@@ -113,10 +113,9 @@ class ExecutorClassLoaderSuite
     val classLoader = new ExecutorClassLoader(
       new SparkConf(), null, url1, parentLoader, true)
 
-    // load 'scala.Option', using ClassforName to do the exact same behavior as
-    // what JavaDeserializationStream does
-
     // scalastyle:off classforname
+    // load 'scala.Option', using Class.forName to do the exact same behavior as
+    // what JavaDeserializationStream does
     val optionClass = Class.forName("scala.Option", false, classLoader)
     // scalastyle:on classforname
 

From 2da72593c1cf63fc6f815416b8d553f0a53f3e65 Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Mon, 30 Nov 2020 05:23:23 +0000
Subject: [PATCH 0603/1009] [SPARK-32976][SQL] Support column list in INSERT
 statement
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

#### JIRA expectations
```
   INSERT currently does not support named column lists.

   INSERT INTO <table> (col1, col2,…) VALUES( 'val1', 'val2', … )
   Note, we assume the column list contains all the column names. Issue an exception if the list is not complete. The column order could be different from the column order defined in the table definition.
```
#### implemetations
In this PR, we add a column list  as an optional part to the `INSERT OVERWRITE/INTO` statements:
```
  /**
   * {{{
   *   INSERT OVERWRITE TABLE tableIdentifier [partitionSpec [IF NOT EXISTS]]? [identifierList] ...
   *   INSERT INTO [TABLE] tableIdentifier [partitionSpec]  [identifierList] ...
   * }}}
   */
```
The column list represents all expected columns with an explicit order that you want to insert to the target table. **Particularly**,  we assume the column list contains all the column names in the current implementation, it will fail when the list is incomplete.

In **Analyzer**, we add a code path to resolve the column list in the `ResolveOutputRelation` rule before it is transformed to v1 or v2 command. It will fail here if the list has any field that not belongs to the target table.

Then, for v2 command, e.g. `AppendData`, we use the resolved column list and output of the target table to resolve the output of the source query `ResolveOutputRelation` rule. If the list has duplicated columns, we fail. If the list is not empty but the list size does not match the target table, we fail. If no other exceptions occur, we use the column list to map the output of the source query to the output of the target table.  The column list will be set to Nil and it will not hit the rule again after it is resolved.

for v1 command, those all happen in the `PreprocessTableInsertion` rule

### Why are the changes needed?
 new feature support

### Does this PR introduce _any_ user-facing change?

yes, insert into/overwrite table support specify column list
### How was this patch tested?

new tests

Closes #29893 from yaooqinn/SPARK-32976.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/parser/SqlBase.g4      |   4 +-
 .../sql/catalyst/analysis/Analyzer.scala      |  52 ++++-
 .../sql/catalyst/analysis/CheckAnalysis.scala |   2 +-
 .../spark/sql/catalyst/dsl/package.scala      |   2 +-
 .../sql/catalyst/parser/AstBuilder.scala      |  20 +-
 .../catalyst/plans/logical/statements.scala   |   2 +
 .../sql/catalyst/parser/DDLParserSuite.scala  |  66 ++++++
 .../sql/catalyst/parser/PlanParserSuite.scala |   4 +-
 .../apache/spark/sql/DataFrameWriter.scala    |   1 +
 .../datasources/DataSourceStrategy.scala      |  10 +-
 .../datasources/FallBackFileSourceV2.scala    |   4 +-
 .../sql/execution/datasources/rules.scala     |   6 +-
 .../apache/spark/sql/SQLInsertTestSuite.scala | 221 ++++++++++++++++++
 .../command/PlanResolutionSuite.scala         |   2 +-
 .../spark/sql/hive/HiveStrategies.scala       |   9 +-
 .../sql/hive/HiveSQLInsertTestSuite.scala     |  25 ++
 16 files changed, 396 insertions(+), 34 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSQLInsertTestSuite.scala

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 5d17028c32ae2..a23994f456f75 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -332,8 +332,8 @@ query
     ;
 
 insertInto
-    : INSERT OVERWRITE TABLE? multipartIdentifier (partitionSpec (IF NOT EXISTS)?)?                         #insertOverwriteTable
-    | INSERT INTO TABLE? multipartIdentifier partitionSpec? (IF NOT EXISTS)?                                #insertIntoTable
+    : INSERT OVERWRITE TABLE? multipartIdentifier (partitionSpec (IF NOT EXISTS)?)?  identifierList?        #insertOverwriteTable
+    | INSERT INTO TABLE? multipartIdentifier partitionSpec? (IF NOT EXISTS)? identifierList?                #insertIntoTable
     | INSERT OVERWRITE LOCAL? DIRECTORY path=STRING rowFormat? createFileFormat?                            #insertOverwriteHiveDir
     | INSERT OVERWRITE LOCAL? DIRECTORY (path=STRING)? tableProvider (OPTIONS options=tablePropertyList)?   #insertOverwriteDir
     ;
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index dae496244c858..9b599b4c8f8d4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -49,7 +49,7 @@ import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.{PartitionOverwriteMode, StoreAssignmentPolicy}
 import org.apache.spark.sql.types._
-import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.sql.util.{CaseInsensitiveStringMap, SchemaUtils}
 import org.apache.spark.util.Utils
 
 /**
@@ -218,6 +218,7 @@ class Analyzer(override val catalogManager: CatalogManager)
       ResolveTableValuedFunctions ::
       ResolveNamespace(catalogManager) ::
       new ResolveCatalogs(catalogManager) ::
+      ResolveUserSpecifiedColumns ::
       ResolveInsertInto ::
       ResolveRelations ::
       ResolveTables ::
@@ -846,7 +847,7 @@ class Analyzer(override val catalogManager: CatalogManager)
     def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
       case u @ UnresolvedRelation(ident, _, isStreaming) =>
         lookupTempView(ident, isStreaming).getOrElse(u)
-      case i @ InsertIntoStatement(UnresolvedRelation(ident, _, false), _, _, _, _) =>
+      case i @ InsertIntoStatement(UnresolvedRelation(ident, _, false), _, _, _, _, _) =>
         lookupTempView(ident)
           .map(view => i.copy(table = view))
           .getOrElse(i)
@@ -961,7 +962,7 @@ class Analyzer(override val catalogManager: CatalogManager)
           .map(ResolvedTable(catalog.asTableCatalog, ident, _))
           .getOrElse(u)
 
-      case i @ InsertIntoStatement(u @ UnresolvedRelation(_, _, false), _, _, _, _)
+      case i @ InsertIntoStatement(u @ UnresolvedRelation(_, _, false), _, _, _, _, _)
           if i.query.resolved =>
         lookupV2Relation(u.multipartIdentifier, u.options, false)
           .map(v2Relation => i.copy(table = v2Relation))
@@ -1045,7 +1046,7 @@ class Analyzer(override val catalogManager: CatalogManager)
     }
 
     def apply(plan: LogicalPlan): LogicalPlan = ResolveTempViews(plan).resolveOperatorsUp {
-      case i @ InsertIntoStatement(table, _, _, _, _) if i.query.resolved =>
+      case i @ InsertIntoStatement(table, _, _, _, _, _) if i.query.resolved =>
         val relation = table match {
           case u @ UnresolvedRelation(_, _, false) =>
             lookupRelation(u.multipartIdentifier, u.options, false).getOrElse(u)
@@ -1160,7 +1161,8 @@ class Analyzer(override val catalogManager: CatalogManager)
 
   object ResolveInsertInto extends Rule[LogicalPlan] {
     override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-      case i @ InsertIntoStatement(r: DataSourceV2Relation, _, _, _, _) if i.query.resolved =>
+      case i @ InsertIntoStatement(r: DataSourceV2Relation, _, _, _, _, _)
+          if i.query.resolved && i.userSpecifiedCols.isEmpty =>
         // ifPartitionNotExists is append with validation, but validation is not supported
         if (i.ifPartitionNotExists) {
           throw QueryCompilationErrors.unsupportedIfNotExistsError(r.table.name)
@@ -3107,6 +3109,46 @@ class Analyzer(override val catalogManager: CatalogManager)
     }
   }
 
+  object ResolveUserSpecifiedColumns extends Rule[LogicalPlan] {
+    override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
+      case i: InsertIntoStatement if i.table.resolved && i.query.resolved &&
+          i.userSpecifiedCols.nonEmpty =>
+        val resolved = resolveUserSpecifiedColumns(i)
+        val projection = addColumnListOnQuery(i.table.output, resolved, i.query)
+        i.copy(userSpecifiedCols = Nil, query = projection)
+    }
+
+    private def resolveUserSpecifiedColumns(i: InsertIntoStatement): Seq[NamedExpression] = {
+      SchemaUtils.checkColumnNameDuplication(
+        i.userSpecifiedCols, "in the column list", resolver)
+
+      i.userSpecifiedCols.map { col =>
+          i.table.resolve(Seq(col), resolver)
+            .getOrElse(i.table.failAnalysis(s"Cannot resolve column name $col"))
+      }
+    }
+
+    private def addColumnListOnQuery(
+        tableOutput: Seq[Attribute],
+        cols: Seq[NamedExpression],
+        query: LogicalPlan): LogicalPlan = {
+      if (cols.size != query.output.size) {
+        query.failAnalysis(
+          s"Cannot write to table due to mismatched user specified column size(${cols.size}) and" +
+            s" data column size(${query.output.size})")
+      }
+      val nameToQueryExpr = cols.zip(query.output).toMap
+      // Static partition columns in the table output should not appear in the column list
+      // they will be handled in another rule ResolveInsertInto
+      val reordered = tableOutput.flatMap { nameToQueryExpr.get(_).orElse(None) }
+      if (reordered == query.output) {
+        query
+      } else {
+        Project(reordered, query)
+      }
+    }
+  }
+
   private def validateStoreAssignmentPolicy(): Unit = {
     // SPARK-28730: LEGACY store assignment policy is disallowed in data source v2.
     if (conf.storeAssignmentPolicy == StoreAssignmentPolicy.LEGACY) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 9a3ab4a5f8d11..7f89c130749f4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -108,7 +108,7 @@ trait CheckAnalysis extends PredicateHelper {
       case u: UnresolvedRelation =>
         u.failAnalysis(s"Table or view not found: ${u.multipartIdentifier.quoted}")
 
-      case InsertIntoStatement(u: UnresolvedRelation, _, _, _, _) =>
+      case InsertIntoStatement(u: UnresolvedRelation, _, _, _, _, _) =>
         failAnalysis(s"Table not found: ${u.multipartIdentifier.quoted}")
 
       // TODO (SPARK-27484): handle streaming write commands when we have them.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index 4cd649b07a5c0..89cf97e76d798 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -431,7 +431,7 @@ package object dsl {
           partition: Map[String, Option[String]] = Map.empty,
           overwrite: Boolean = false,
           ifPartitionNotExists: Boolean = false): LogicalPlan =
-        InsertIntoStatement(table, partition, logicalPlan, overwrite, ifPartitionNotExists)
+        InsertIntoStatement(table, partition, Nil, logicalPlan, overwrite, ifPartitionNotExists)
 
       def as(alias: String): LogicalPlan = SubqueryAlias(alias, logicalPlan)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index afef88f7e97e8..e85a3eba85377 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -243,9 +243,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
 
   /**
    * Parameters used for writing query to a table:
-   *   (multipartIdentifier, partitionKeys, ifPartitionNotExists).
+   *   (multipartIdentifier, tableColumnList, partitionKeys, ifPartitionNotExists).
    */
-  type InsertTableParams = (Seq[String], Map[String, Option[String]], Boolean)
+  type InsertTableParams = (Seq[String], Seq[String], Map[String, Option[String]], Boolean)
 
   /**
    * Parameters used for writing query to a directory: (isLocal, CatalogStorageFormat, provider).
@@ -255,8 +255,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
   /**
    * Add an
    * {{{
-   *   INSERT OVERWRITE TABLE tableIdentifier [partitionSpec [IF NOT EXISTS]]?
-   *   INSERT INTO [TABLE] tableIdentifier [partitionSpec]
+   *   INSERT OVERWRITE TABLE tableIdentifier [partitionSpec [IF NOT EXISTS]]? [identifierList]
+   *   INSERT INTO [TABLE] tableIdentifier [partitionSpec]  [identifierList]
    *   INSERT OVERWRITE [LOCAL] DIRECTORY STRING [rowFormat] [createFileFormat]
    *   INSERT OVERWRITE [LOCAL] DIRECTORY [STRING] tableProvider [OPTIONS tablePropertyList]
    * }}}
@@ -267,18 +267,20 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
       query: LogicalPlan): LogicalPlan = withOrigin(ctx) {
     ctx match {
       case table: InsertIntoTableContext =>
-        val (tableIdent, partition, ifPartitionNotExists) = visitInsertIntoTable(table)
+        val (tableIdent, cols, partition, ifPartitionNotExists) = visitInsertIntoTable(table)
         InsertIntoStatement(
           UnresolvedRelation(tableIdent),
           partition,
+          cols,
           query,
           overwrite = false,
           ifPartitionNotExists)
       case table: InsertOverwriteTableContext =>
-        val (tableIdent, partition, ifPartitionNotExists) = visitInsertOverwriteTable(table)
+        val (tableIdent, cols, partition, ifPartitionNotExists) = visitInsertOverwriteTable(table)
         InsertIntoStatement(
           UnresolvedRelation(tableIdent),
           partition,
+          cols,
           query,
           overwrite = true,
           ifPartitionNotExists)
@@ -299,13 +301,14 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
   override def visitInsertIntoTable(
       ctx: InsertIntoTableContext): InsertTableParams = withOrigin(ctx) {
     val tableIdent = visitMultipartIdentifier(ctx.multipartIdentifier)
+    val cols = Option(ctx.identifierList()).map(visitIdentifierList).getOrElse(Nil)
     val partitionKeys = Option(ctx.partitionSpec).map(visitPartitionSpec).getOrElse(Map.empty)
 
     if (ctx.EXISTS != null) {
       operationNotAllowed("INSERT INTO ... IF NOT EXISTS", ctx)
     }
 
-    (tableIdent, partitionKeys, false)
+    (tableIdent, cols, partitionKeys, false)
   }
 
   /**
@@ -315,6 +318,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
       ctx: InsertOverwriteTableContext): InsertTableParams = withOrigin(ctx) {
     assert(ctx.OVERWRITE() != null)
     val tableIdent = visitMultipartIdentifier(ctx.multipartIdentifier)
+    val cols = Option(ctx.identifierList()).map(visitIdentifierList).getOrElse(Nil)
     val partitionKeys = Option(ctx.partitionSpec).map(visitPartitionSpec).getOrElse(Map.empty)
 
     val dynamicPartitionKeys: Map[String, Option[String]] = partitionKeys.filter(_._2.isEmpty)
@@ -323,7 +327,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
         dynamicPartitionKeys.keys.mkString(", "), ctx)
     }
 
-    (tableIdent, partitionKeys, ctx.EXISTS() != null)
+    (tableIdent, cols, partitionKeys, ctx.EXISTS() != null)
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
index 281d57b3648f4..d5f739466a802 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
@@ -357,6 +357,7 @@ case class DropViewStatement(
  * An INSERT INTO statement, as parsed from SQL.
  *
  * @param table                the logical plan representing the table.
+ * @param userSpecifiedCols    the user specified list of columns that belong to the table.
  * @param query                the logical plan representing data to write to.
  * @param overwrite            overwrite existing table or partitions.
  * @param partitionSpec        a map from the partition key to the partition value (optional).
@@ -371,6 +372,7 @@ case class DropViewStatement(
 case class InsertIntoStatement(
     table: LogicalPlan,
     partitionSpec: Map[String, Option[String]],
+    userSpecifiedCols: Seq[String],
     query: LogicalPlan,
     overwrite: Boolean,
     ifPartitionNotExists: Boolean) extends ParsedStatement {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index c58ff81f17131..91b35bcac98ae 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -1172,6 +1172,22 @@ class DDLParserSuite extends AnalysisTest {
         InsertIntoStatement(
           UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl")),
           Map.empty,
+          Nil,
+          Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("source"))),
+          overwrite = false, ifPartitionNotExists = false))
+    }
+  }
+
+  test("insert table: basic append with a column list") {
+    Seq(
+      "INSERT INTO TABLE testcat.ns1.ns2.tbl (a, b) SELECT * FROM source",
+      "INSERT INTO testcat.ns1.ns2.tbl (a, b) SELECT * FROM source"
+    ).foreach { sql =>
+      parseCompare(sql,
+        InsertIntoStatement(
+          UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl")),
+          Map.empty,
+          Seq("a", "b"),
           Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("source"))),
           overwrite = false, ifPartitionNotExists = false))
     }
@@ -1182,6 +1198,7 @@ class DDLParserSuite extends AnalysisTest {
       InsertIntoStatement(
         UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl")),
         Map.empty,
+        Nil,
         Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("testcat2", "db", "tbl"))),
         overwrite = false, ifPartitionNotExists = false))
   }
@@ -1196,6 +1213,22 @@ class DDLParserSuite extends AnalysisTest {
       InsertIntoStatement(
         UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl")),
         Map("p1" -> Some("3"), "p2" -> None),
+        Nil,
+        Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("source"))),
+        overwrite = false, ifPartitionNotExists = false))
+  }
+
+  test("insert table: append with partition and a column list") {
+    parseCompare(
+      """
+        |INSERT INTO testcat.ns1.ns2.tbl
+        |PARTITION (p1 = 3, p2) (a, b)
+        |SELECT * FROM source
+      """.stripMargin,
+      InsertIntoStatement(
+        UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl")),
+        Map("p1" -> Some("3"), "p2" -> None),
+        Seq("a", "b"),
         Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("source"))),
         overwrite = false, ifPartitionNotExists = false))
   }
@@ -1209,6 +1242,22 @@ class DDLParserSuite extends AnalysisTest {
         InsertIntoStatement(
           UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl")),
           Map.empty,
+          Nil,
+          Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("source"))),
+          overwrite = true, ifPartitionNotExists = false))
+    }
+  }
+
+  test("insert table: overwrite with column list") {
+    Seq(
+      "INSERT OVERWRITE TABLE testcat.ns1.ns2.tbl (a, b) SELECT * FROM source",
+      "INSERT OVERWRITE testcat.ns1.ns2.tbl (a, b) SELECT * FROM source"
+    ).foreach { sql =>
+      parseCompare(sql,
+        InsertIntoStatement(
+          UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl")),
+          Map.empty,
+          Seq("a", "b"),
           Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("source"))),
           overwrite = true, ifPartitionNotExists = false))
     }
@@ -1224,6 +1273,22 @@ class DDLParserSuite extends AnalysisTest {
       InsertIntoStatement(
         UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl")),
         Map("p1" -> Some("3"), "p2" -> None),
+        Nil,
+        Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("source"))),
+        overwrite = true, ifPartitionNotExists = false))
+  }
+
+  test("insert table: overwrite with partition and column list") {
+    parseCompare(
+      """
+        |INSERT OVERWRITE TABLE testcat.ns1.ns2.tbl
+        |PARTITION (p1 = 3, p2) (a, b)
+        |SELECT * FROM source
+      """.stripMargin,
+      InsertIntoStatement(
+        UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl")),
+        Map("p1" -> Some("3"), "p2" -> None),
+        Seq("a", "b"),
         Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("source"))),
         overwrite = true, ifPartitionNotExists = false))
   }
@@ -1238,6 +1303,7 @@ class DDLParserSuite extends AnalysisTest {
       InsertIntoStatement(
         UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl")),
         Map("p1" -> Some("3")),
+        Nil,
         Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("source"))),
         overwrite = true, ifPartitionNotExists = true))
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 88afcb10d9c20..6fef18babedb6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -295,7 +295,7 @@ class PlanParserSuite extends AnalysisTest {
         partition: Map[String, Option[String]],
         overwrite: Boolean = false,
         ifPartitionNotExists: Boolean = false): LogicalPlan =
-      InsertIntoStatement(table("s"), partition, plan, overwrite, ifPartitionNotExists)
+      InsertIntoStatement(table("s"), partition, Nil, plan, overwrite, ifPartitionNotExists)
 
     // Single inserts
     assertEqual(s"insert overwrite table s $sql",
@@ -713,7 +713,7 @@ class PlanParserSuite extends AnalysisTest {
     comparePlans(
       parsePlan(
         "INSERT INTO s SELECT /*+ REPARTITION(100), COALESCE(500), COALESCE(10) */ * FROM t"),
-      InsertIntoStatement(table("s"), Map.empty,
+      InsertIntoStatement(table("s"), Map.empty, Nil,
         UnresolvedHint("REPARTITION", Seq(Literal(100)),
           UnresolvedHint("COALESCE", Seq(Literal(500)),
             UnresolvedHint("COALESCE", Seq(Literal(10)),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index a8688bdf15495..c5f2a3d568e97 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -536,6 +536,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
       InsertIntoStatement(
         table = UnresolvedRelation(tableIdent),
         partitionSpec = Map.empty[String, Option[String]],
+        Nil,
         query = df.logicalPlan,
         overwrite = mode == SaveMode.Overwrite,
         ifPartitionNotExists = false)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 361d1fab03421..e4f001d61a767 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -156,7 +156,7 @@ object DataSourceAnalysis extends Rule[LogicalPlan] with CastSupport {
       CreateDataSourceTableAsSelectCommand(tableDesc, mode, query, query.output.map(_.name))
 
     case InsertIntoStatement(l @ LogicalRelation(_: InsertableRelation, _, _, _),
-        parts, query, overwrite, false) if parts.isEmpty =>
+        parts, _, query, overwrite, false) if parts.isEmpty =>
       InsertIntoDataSourceCommand(l, query, overwrite)
 
     case InsertIntoDir(_, storage, provider, query, overwrite)
@@ -168,7 +168,7 @@ object DataSourceAnalysis extends Rule[LogicalPlan] with CastSupport {
       InsertIntoDataSourceDirCommand(storage, provider.get, query, overwrite)
 
     case i @ InsertIntoStatement(
-        l @ LogicalRelation(t: HadoopFsRelation, _, table, _), parts, query, overwrite, _) =>
+        l @ LogicalRelation(t: HadoopFsRelation, _, table, _), parts, _, query, overwrite, _) =>
       // If the InsertIntoTable command is for a partitioned HadoopFsRelation and
       // the user has specified static partitions, we add a Project operator on top of the query
       // to include those constant column values in the query result.
@@ -276,11 +276,11 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan]
 
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-    case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta, options, false), _, _, _, _)
-        if DDLUtils.isDatasourceTable(tableMeta) =>
+    case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta, options, false),
+        _, _, _, _, _) if DDLUtils.isDatasourceTable(tableMeta) =>
       i.copy(table = readDataSourceTable(tableMeta, options))
 
-    case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta, _, false), _, _, _, _) =>
+    case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta, _, false), _, _, _, _, _) =>
       i.copy(table = DDLUtils.readHiveTable(tableMeta))
 
     case UnresolvedCatalogRelation(tableMeta, options, false)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala
index 1149767bdade2..b5d06db024112 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala
@@ -34,8 +34,8 @@ import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, File
  */
 class FallBackFileSourceV2(sparkSession: SparkSession) extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-    case i @
-        InsertIntoStatement(d @ DataSourceV2Relation(table: FileTable, _, _, _, _), _, _, _, _) =>
+    case i @ InsertIntoStatement(
+        d @ DataSourceV2Relation(table: FileTable, _, _, _, _), _, _, _, _, _) =>
       val v1FileFormat = table.fallbackFileFormat.newInstance()
       val relation = HadoopFsRelation(
         table.fileIndex,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index 9e65b0ce13693..2cc78258378ab 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -434,7 +434,7 @@ object PreprocessTableInsertion extends Rule[LogicalPlan] {
   }
 
   def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-    case i @ InsertIntoStatement(table, _, query, _, _) if table.resolved && query.resolved =>
+    case i @ InsertIntoStatement(table, _, _, query, _, _) if table.resolved && query.resolved =>
       table match {
         case relation: HiveTableRelation =>
           val metadata = relation.tableMeta
@@ -512,7 +512,7 @@ object PreWriteCheck extends (LogicalPlan => Unit) {
 
   def apply(plan: LogicalPlan): Unit = {
     plan.foreach {
-      case InsertIntoStatement(l @ LogicalRelation(relation, _, _, _), partition, query, _, _) =>
+      case InsertIntoStatement(l @ LogicalRelation(relation, _, _, _), partition, _, query, _, _) =>
         // Get all input data source relations of the query.
         val srcRelations = query.collect {
           case LogicalRelation(src, _, _, _) => src
@@ -534,7 +534,7 @@ object PreWriteCheck extends (LogicalPlan => Unit) {
           case _ => failAnalysis(s"$relation does not allow insertion.")
         }
 
-      case InsertIntoStatement(t, _, _, _, _)
+      case InsertIntoStatement(t, _, _, _, _, _)
         if !t.isInstanceOf[LeafNode] ||
           t.isInstanceOf[Range] ||
           t.isInstanceOf[OneRowRelation] ||
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala
new file mode 100644
index 0000000000000..e454f0e6d540f
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala
@@ -0,0 +1,221 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.connector.InMemoryPartitionTableCatalog
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
+
+/**
+ * The base trait for DML - insert syntax
+ */
+trait SQLInsertTestSuite extends QueryTest with SQLTestUtils {
+
+  import testImplicits._
+
+  def format: String
+
+  protected def createTable(
+      table: String,
+      cols: Seq[String],
+      colTypes: Seq[String],
+      partCols: Seq[String] = Nil): Unit = {
+    val values = cols.zip(colTypes).map(tuple => tuple._1 + " " + tuple._2).mkString("(", ", ", ")")
+    val partitionSpec = if (partCols.nonEmpty) {
+      partCols.mkString("PARTITIONED BY (", ",", ")")
+    } else ""
+    sql(s"CREATE TABLE $table$values USING $format $partitionSpec")
+  }
+
+  protected def processInsert(
+      tableName: String,
+      input: DataFrame,
+      cols: Seq[String] = Nil,
+      partitionExprs: Seq[String] = Nil,
+      overwrite: Boolean): Unit = {
+    val tmpView = "tmp_view"
+    val columnList = if (cols.nonEmpty) cols.mkString("(", ",", ")") else ""
+    val partitionList = if (partitionExprs.nonEmpty) {
+      partitionExprs.mkString("PARTITION (", ",", ")")
+    } else ""
+    withTempView(tmpView) {
+      input.createOrReplaceTempView(tmpView)
+      val overwriteStr = if (overwrite) "OVERWRITE" else "INTO"
+      sql(
+        s"INSERT $overwriteStr TABLE $tableName $partitionList $columnList SELECT * FROM $tmpView")
+    }
+  }
+
+  protected def verifyTable(tableName: String, expected: DataFrame): Unit = {
+    checkAnswer(spark.table(tableName), expected)
+  }
+
+  test("insert with column list - follow table output order") {
+    withTable("t1") {
+      val df = Seq((1, 2L, "3")).toDF()
+      val cols = Seq("c1", "c2", "c3")
+      createTable("t1", cols, Seq("int", "long", "string"))
+      Seq(false, true).foreach { m =>
+        processInsert("t1", df, cols, overwrite = m)
+        verifyTable("t1", df)
+      }
+    }
+  }
+
+  test("insert with column list - follow table output order + partitioned table") {
+    val cols = Seq("c1", "c2", "c3", "c4")
+    val df = Seq((1, 2, 3, 4)).toDF(cols: _*)
+    withTable("t1") {
+      createTable("t1", cols, Seq("int", "int", "int", "int"), cols.takeRight(2))
+      Seq(false, true).foreach { m =>
+        processInsert("t1", df, cols, overwrite = m)
+        verifyTable("t1", df)
+      }
+    }
+
+    withTable("t1") {
+      createTable("t1", cols, Seq("int", "int", "int", "int"), cols.takeRight(2))
+      Seq(false, true).foreach { m =>
+        processInsert(
+          "t1", df.selectExpr("c1", "c2"), cols.take(2), Seq("c3=3", "c4=4"), overwrite = m)
+        verifyTable("t1", df)
+      }
+    }
+
+    withTable("t1") {
+      createTable("t1", cols, Seq("int", "int", "int", "int"), cols.takeRight(2))
+      Seq(false, true).foreach { m =>
+        processInsert("t1", df.selectExpr("c1", "c2", "c4"),
+          cols.filterNot(_ == "c3"), Seq("c3=3", "c4"), overwrite = m)
+        verifyTable("t1", df)
+      }
+    }
+  }
+
+  test("insert with column list - table output reorder") {
+    withTable("t1") {
+      val cols = Seq("c1", "c2", "c3")
+      val df = Seq((1, 2, 3)).toDF(cols: _*)
+      createTable("t1", cols, Seq("int", "int", "int"))
+      Seq(false, true).foreach { m =>
+        processInsert("t1", df, cols.reverse, overwrite = m)
+        verifyTable("t1", df.selectExpr(cols.reverse: _*))
+      }
+    }
+  }
+
+  test("insert with column list - table output reorder + partitioned table") {
+    val cols = Seq("c1", "c2", "c3", "c4")
+    val df = Seq((1, 2, 3, 4)).toDF(cols: _*)
+    withTable("t1") {
+      createTable("t1", cols, Seq("int", "int", "int", "int"), cols.takeRight(2))
+      Seq(false, true).foreach { m =>
+        processInsert("t1", df, cols.reverse, overwrite = m)
+        verifyTable("t1", df.selectExpr(cols.reverse: _*))
+      }
+    }
+
+    withTable("t1") {
+      createTable("t1", cols, Seq("int", "int", "int", "int"), cols.takeRight(2))
+      Seq(false, true).foreach { m =>
+        processInsert(
+          "t1", df.selectExpr("c1", "c2"), cols.take(2).reverse, Seq("c3=3", "c4=4"), overwrite = m)
+        verifyTable("t1", df.selectExpr("c2", "c1", "c3", "c4"))
+      }
+    }
+
+    withTable("t1") {
+      createTable("t1", cols, Seq("int", "int", "int", "int"), cols.takeRight(2))
+      Seq(false, true).foreach { m =>
+        processInsert("t1",
+          df.selectExpr("c1", "c2", "c4"), Seq("c4", "c2", "c1"), Seq("c3=3", "c4"), overwrite = m)
+        verifyTable("t1", df.selectExpr("c4", "c2", "c3", "c1"))
+      }
+    }
+  }
+
+  test("insert with column list - duplicated columns") {
+    withTable("t1") {
+      val cols = Seq("c1", "c2", "c3")
+      createTable("t1", cols, Seq("int", "long", "string"))
+      val e1 = intercept[AnalysisException](sql(s"INSERT INTO t1 (c1, c2, c2) values(1, 2, 3)"))
+      assert(e1.getMessage === "Found duplicate column(s) in the column list: `c2`;")
+    }
+  }
+
+  test("insert with column list - invalid columns") {
+    withTable("t1") {
+      val cols = Seq("c1", "c2", "c3")
+      createTable("t1", cols, Seq("int", "long", "string"))
+      val e1 = intercept[AnalysisException](sql(s"INSERT INTO t1 (c1, c2, c4) values(1, 2, 3)"))
+      assert(e1.getMessage === "Cannot resolve column name c4;")
+    }
+  }
+
+  test("insert with column list - mismatched column list size") {
+    val msg = "Cannot write to table due to mismatched user specified column size"
+    withTable("t1") {
+      val cols = Seq("c1", "c2", "c3")
+      createTable("t1", cols, Seq("int", "long", "string"))
+      val e1 = intercept[AnalysisException](sql(s"INSERT INTO t1 (c1, c2) values(1, 2, 3)"))
+      assert(e1.getMessage.contains(msg))
+      val e2 = intercept[AnalysisException](sql(s"INSERT INTO t1 (c1, c2, c3) values(1, 2)"))
+      assert(e2.getMessage.contains(msg))
+    }
+  }
+
+  test("insert with column list - mismatched target table out size after rewritten query") {
+    val v2Msg = "Cannot write to 'testcat.t1', not enough data columns:"
+    val cols = Seq("c1", "c2", "c3", "c4")
+
+    withTable("t1") {
+      createTable("t1", cols, Seq.fill(4)("int"))
+      val e1 = intercept[AnalysisException](sql(s"INSERT INTO t1 (c1) values(1)"))
+      assert(e1.getMessage.contains("target table has 4 column(s) but the inserted data has 1") ||
+        e1.getMessage.contains(v2Msg))
+    }
+
+    withTable("t1") {
+      createTable("t1", cols, Seq.fill(4)("int"), cols.takeRight(2))
+      val e1 = intercept[AnalysisException] {
+        sql(s"INSERT INTO t1 partition(c3=3, c4=4) (c1) values(1)")
+      }
+      assert(e1.getMessage.contains("target table has 4 column(s) but the inserted data has 3") ||
+        e1.getMessage.contains(v2Msg))
+    }
+  }
+}
+
+class FileSourceSQLInsertTestSuite extends SQLInsertTestSuite with SharedSparkSession {
+  override def format: String = "parquet"
+  override protected def sparkConf: SparkConf = {
+    super.sparkConf.set(SQLConf.USE_V1_SOURCE_LIST, format)
+  }
+}
+
+class DSV2SQLInsertTestSuite extends SQLInsertTestSuite with SharedSparkSession {
+
+  override def format: String = "foo"
+
+  protected override def sparkConf: SparkConf = {
+    super.sparkConf
+      .set("spark.sql.catalog.testcat", classOf[InMemoryPartitionTableCatalog].getName)
+      .set(SQLConf.DEFAULT_CATALOG.key, "testcat")
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index 92c114e116d0c..9710fca6bc82c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -1185,7 +1185,7 @@ class PlanResolutionSuite extends AnalysisTest {
         case Project(_, AsDataSourceV2Relation(r)) =>
           assert(r.catalog.exists(_ == catlogIdent))
           assert(r.identifier.exists(_.name() == tableIdent))
-        case InsertIntoStatement(r: DataSourceV2Relation, _, _, _, _) =>
+        case InsertIntoStatement(r: DataSourceV2Relation, _, _, _, _, _) =>
           assert(r.catalog.exists(_ == catlogIdent))
           assert(r.identifier.exists(_.name() == tableIdent))
         case DescribeRelation(r: ResolvedTable, _, _) =>
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index 3d8bba8b1b425..ff7dc58829fa1 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -145,7 +145,7 @@ class DetermineTableStats(session: SparkSession) extends Rule[LogicalPlan] {
 
     // handles InsertIntoStatement specially as the table in InsertIntoStatement is not added in its
     // children, hence not matched directly by previous HiveTableRelation case.
-    case i @ InsertIntoStatement(relation: HiveTableRelation, _, _, _, _)
+    case i @ InsertIntoStatement(relation: HiveTableRelation, _, _, _, _, _)
       if DDLUtils.isHiveTable(relation.tableMeta) && relation.tableMeta.stats.isEmpty =>
       i.copy(table = hiveTableWithStats(relation))
   }
@@ -159,7 +159,8 @@ class DetermineTableStats(session: SparkSession) extends Rule[LogicalPlan] {
  */
 object HiveAnalysis extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-    case InsertIntoStatement(r: HiveTableRelation, partSpec, query, overwrite, ifPartitionNotExists)
+    case InsertIntoStatement(
+        r: HiveTableRelation, partSpec, _, query, overwrite, ifPartitionNotExists)
         if DDLUtils.isHiveTable(r.tableMeta) =>
       InsertIntoHiveTable(r.tableMeta, partSpec, query, overwrite,
         ifPartitionNotExists, query.output.map(_.name))
@@ -207,11 +208,11 @@ case class RelationConversions(
     plan resolveOperators {
       // Write path
       case InsertIntoStatement(
-           r: HiveTableRelation, partition, query, overwrite, ifPartitionNotExists)
+          r: HiveTableRelation, partition, cols, query, overwrite, ifPartitionNotExists)
           if query.resolved && DDLUtils.isHiveTable(r.tableMeta) &&
             (!r.isPartitioned || SQLConf.get.getConf(HiveUtils.CONVERT_INSERTING_PARTITIONED_TABLE))
             && isConvertible(r) =>
-        InsertIntoStatement(metastoreCatalog.convert(r), partition,
+        InsertIntoStatement(metastoreCatalog.convert(r), partition, cols,
           query, overwrite, ifPartitionNotExists)
 
       // Read path
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSQLInsertTestSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSQLInsertTestSuite.scala
new file mode 100644
index 0000000000000..49b005bca938e
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSQLInsertTestSuite.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import org.apache.spark.sql.SQLInsertTestSuite
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+
+class HiveSQLInsertTestSuite extends SQLInsertTestSuite with TestHiveSingleton {
+  override def format: String = "hive OPTIONS(fileFormat='parquet')"
+}

From 0fd9f57dd4cee32b4d0a16345f98e628a9d5f0fe Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Mon, 30 Nov 2020 05:37:10 +0000
Subject: [PATCH 0604/1009] [SPARK-33448][SQL] Support CACHE/UNCACHE TABLE
 commands for v2 tables

### What changes were proposed in this pull request?

This PR proposes to support `CHACHE/UNCACHE TABLE` commands for v2 tables.

In addtion, this PR proposes to migrate `CACHE/UNCACHE TABLE` to use `UnresolvedTableOrView` to resolve the table identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing).

### Why are the changes needed?

To support `CACHE/UNCACHE TABLE` commands for v2 tables.

Note that `CACHE/UNCACHE TABLE` for v1 tables/views go through `SparkSession.table` to resolve identifier, which resolves temp views first, so there is no change in the behavior by moving to the new framework.

### Does this PR introduce _any_ user-facing change?

Yes. Now the user can run `CACHE/UNCACHE TABLE` commands on v2 tables.

### How was this patch tested?

Added/updated existing tests.

Closes #30403 from imback82/cache_table.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/parser/AstBuilder.scala      | 31 -------------
 .../catalyst/plans/logical/statements.scala   | 16 -------
 .../sql/catalyst/parser/DDLParserSuite.scala  | 27 -----------
 .../analysis/ResolveSessionCatalog.scala      | 19 +-------
 .../spark/sql/execution/SparkSqlParser.scala  | 34 ++++++++++++++
 .../spark/sql/execution/command/cache.scala   | 43 +++++++++++-------
 .../apache/spark/sql/CachedTableSuite.scala   | 11 +++++
 .../sql/connector/DataSourceV2SQLSuite.scala  | 40 ++++++++++-------
 .../sql/execution/SparkSqlParserSuite.scala   | 45 ++++++++++++++++++-
 .../execution/metric/SQLMetricsSuite.scala    |  2 +-
 .../HiveThriftServer2Suites.scala             |  4 +-
 .../spark/sql/hive/CachedTableSuite.scala     | 14 +++---
 .../apache/spark/sql/hive/test/TestHive.scala |  2 +-
 13 files changed, 152 insertions(+), 136 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index e85a3eba85377..a31d7ca7268a6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3590,37 +3590,6 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
       ctx.SERDE != null)
   }
 
-  /**
-   * Create a [[CacheTableStatement]].
-   *
-   * For example:
-   * {{{
-   *   CACHE [LAZY] TABLE multi_part_name
-   *   [OPTIONS tablePropertyList] [[AS] query]
-   * }}}
-   */
-  override def visitCacheTable(ctx: CacheTableContext): LogicalPlan = withOrigin(ctx) {
-    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
-    val query = Option(ctx.query).map(plan)
-    val tableName = visitMultipartIdentifier(ctx.multipartIdentifier)
-    if (query.isDefined && tableName.length > 1) {
-      val catalogAndNamespace = tableName.init
-      throw new ParseException("It is not allowed to add catalog/namespace " +
-        s"prefix ${catalogAndNamespace.quoted} to " +
-        "the table name in CACHE TABLE AS SELECT", ctx)
-    }
-    val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty)
-    CacheTableStatement(tableName, query, ctx.LAZY != null, options)
-  }
-
-  /**
-   * Create an [[UncacheTableStatement]] logical plan.
-   */
-  override def visitUncacheTable(ctx: UncacheTableContext): LogicalPlan = withOrigin(ctx) {
-    UncacheTableStatement(visitMultipartIdentifier(ctx.multipartIdentifier), ctx.EXISTS != null)
-  }
-
   /**
    * Create a [[TruncateTable]] command.
    *
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
index d5f739466a802..effb4cff75930 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
@@ -412,22 +412,6 @@ case class UseStatement(isNamespaceSet: Boolean, nameParts: Seq[String]) extends
  */
 case class RepairTableStatement(tableName: Seq[String]) extends ParsedStatement
 
-/**
- * A CACHE TABLE statement, as parsed from SQL
- */
-case class CacheTableStatement(
-    tableName: Seq[String],
-    plan: Option[LogicalPlan],
-    isLazy: Boolean,
-    options: Map[String, String]) extends ParsedStatement
-
-/**
- * An UNCACHE TABLE statement, as parsed from SQL
- */
-case class UncacheTableStatement(
-    tableName: Seq[String],
-    ifExists: Boolean) extends ParsedStatement
-
 /**
  * A TRUNCATE TABLE statement, as parsed from SQL
  */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 91b35bcac98ae..0f1b4a3ea918c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -1984,33 +1984,6 @@ class DDLParserSuite extends AnalysisTest {
         asSerde = true))
   }
 
-  test("CACHE TABLE") {
-    comparePlans(
-      parsePlan("CACHE TABLE a.b.c"),
-      CacheTableStatement(Seq("a", "b", "c"), None, false, Map.empty))
-
-    comparePlans(
-      parsePlan("CACHE LAZY TABLE a.b.c"),
-      CacheTableStatement(Seq("a", "b", "c"), None, true, Map.empty))
-
-    comparePlans(
-      parsePlan("CACHE LAZY TABLE a.b.c OPTIONS('storageLevel' 'DISK_ONLY')"),
-      CacheTableStatement(Seq("a", "b", "c"), None, true, Map("storageLevel" -> "DISK_ONLY")))
-
-    intercept("CACHE TABLE a.b.c AS SELECT * FROM testData",
-      "It is not allowed to add catalog/namespace prefix a.b")
-  }
-
-  test("UNCACHE TABLE") {
-    comparePlans(
-      parsePlan("UNCACHE TABLE a.b.c"),
-      UncacheTableStatement(Seq("a", "b", "c"), ifExists = false))
-
-    comparePlans(
-      parsePlan("UNCACHE TABLE IF EXISTS a.b.c"),
-      UncacheTableStatement(Seq("a", "b", "c"), ifExists = true))
-  }
-
   test("TRUNCATE table") {
     comparePlans(
       parsePlan("TRUNCATE TABLE a.b.c"),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index f49caf7f04a20..582f11a2be8fa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -446,20 +446,6 @@ class ResolveSessionCatalog(
         ShowCreateTableCommand(ident.asTableIdentifier)
       }
 
-    case CacheTableStatement(tbl, plan, isLazy, options) =>
-      val name = if (plan.isDefined) {
-        // CACHE TABLE ... AS SELECT creates a temp view with the input query.
-        // Temp view doesn't belong to any catalog and we shouldn't resolve catalog in the name.
-        tbl
-      } else {
-        parseTempViewOrV1Table(tbl, "CACHE TABLE")
-      }
-      CacheTableCommand(name.asTableIdentifier, plan, isLazy, options)
-
-    case UncacheTableStatement(tbl, ifExists) =>
-      val name = parseTempViewOrV1Table(tbl, "UNCACHE TABLE")
-      UncacheTableCommand(name.asTableIdentifier, ifExists)
-
     case TruncateTable(ResolvedV1TableIdentifier(ident), partitionSpec) =>
       TruncateTableCommand(
         ident.asTableIdentifier,
@@ -561,12 +547,9 @@ class ResolveSessionCatalog(
             "SHOW VIEWS, only SessionCatalog supports this command.")
       }
 
-    case ShowTableProperties(ResolvedV1TableIdentifier(ident), propertyKey) =>
+    case ShowTableProperties(ResolvedV1TableOrViewIdentifier(ident), propertyKey) =>
       ShowTablePropertiesCommand(ident.asTableIdentifier, propertyKey)
 
-    case ShowTableProperties(r: ResolvedView, propertyKey) =>
-      ShowTablePropertiesCommand(r.identifier.asTableIdentifier, propertyKey)
-
     case DescribeFunction(ResolvedFunc(identifier), extended) =>
       DescribeFunctionCommand(identifier.asFunctionIdentifier, extended)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 568c7112954f5..c82e3818b48cc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -192,6 +192,40 @@ class SparkSqlAstBuilder extends AstBuilder {
     unquotedPath
   }
 
+  /**
+   * Create a [[CacheTableCommand]].
+   *
+   * For example:
+   * {{{
+   *   CACHE [LAZY] TABLE multi_part_name
+   *   [OPTIONS tablePropertyList] [[AS] query]
+   * }}}
+   */
+  override def visitCacheTable(ctx: CacheTableContext): LogicalPlan = withOrigin(ctx) {
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+    val query = Option(ctx.query).map(plan)
+    val tableName = visitMultipartIdentifier(ctx.multipartIdentifier)
+    if (query.isDefined && tableName.length > 1) {
+      val catalogAndNamespace = tableName.init
+      throw new ParseException("It is not allowed to add catalog/namespace " +
+        s"prefix ${catalogAndNamespace.quoted} to " +
+        "the table name in CACHE TABLE AS SELECT", ctx)
+    }
+    val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty)
+    CacheTableCommand(tableName, query, ctx.LAZY != null, options)
+  }
+
+
+  /**
+   * Create an [[UncacheTableCommand]] logical plan.
+   */
+  override def visitUncacheTable(ctx: UncacheTableContext): LogicalPlan = withOrigin(ctx) {
+    UncacheTableCommand(
+      visitMultipartIdentifier(ctx.multipartIdentifier),
+      ctx.EXISTS != null)
+  }
+
   /**
    * Create a [[ClearCacheCommand]] logical plan.
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala
index f99dc8d9f1a8e..3f0945d1e817b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala
@@ -19,26 +19,27 @@ package org.apache.spark.sql.execution.command
 
 import java.util.Locale
 
-import org.apache.spark.sql.{Dataset, Row, SparkSession}
-import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, Row, SparkSession}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper
 import org.apache.spark.storage.StorageLevel
 
 case class CacheTableCommand(
-    tableIdent: TableIdentifier,
+    multipartIdentifier: Seq[String],
     plan: Option[LogicalPlan],
     isLazy: Boolean,
     options: Map[String, String]) extends RunnableCommand {
-  require(plan.isEmpty || tableIdent.database.isEmpty,
-    "Database name is not allowed in CACHE TABLE AS SELECT")
+  require(plan.isEmpty || multipartIdentifier.length == 1,
+    "Namespace name is not allowed in CACHE TABLE AS SELECT")
 
   override def innerChildren: Seq[QueryPlan[_]] = plan.toSeq
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
+    val tableName = multipartIdentifier.quoted
     plan.foreach { logicalPlan =>
-      Dataset.ofRows(sparkSession, logicalPlan).createTempView(tableIdent.quotedString)
+      Dataset.ofRows(sparkSession, logicalPlan).createTempView(tableName)
     }
 
     val storageLevelKey = "storagelevel"
@@ -49,34 +50,46 @@ case class CacheTableCommand(
       logWarning(s"Invalid options: ${withoutStorageLevel.mkString(", ")}")
     }
 
+    val table = sparkSession.table(tableName)
     if (storageLevelValue.nonEmpty) {
-      sparkSession.catalog.cacheTable(
-        tableIdent.quotedString, StorageLevel.fromString(storageLevelValue.get))
+      sparkSession.sharedState.cacheManager.cacheQuery(
+        table,
+        Some(tableName),
+        StorageLevel.fromString(storageLevelValue.get))
     } else {
-      sparkSession.catalog.cacheTable(tableIdent.quotedString)
+      sparkSession.sharedState.cacheManager.cacheQuery(table, Some(tableName))
     }
 
     if (!isLazy) {
       // Performs eager caching
-      sparkSession.table(tableIdent).count()
+      table.count()
     }
 
     Seq.empty[Row]
   }
 }
 
-
 case class UncacheTableCommand(
-    tableIdent: TableIdentifier,
+    multipartIdentifier: Seq[String],
     ifExists: Boolean) extends RunnableCommand {
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
-    val tableId = tableIdent.quotedString
-    if (!ifExists || sparkSession.catalog.tableExists(tableId)) {
-      sparkSession.catalog.uncacheTable(tableId)
+    val tableName = multipartIdentifier.quoted
+    table(sparkSession, tableName).foreach { table =>
+      val cascade = !sparkSession.sessionState.catalog.isTempView(multipartIdentifier)
+      sparkSession.sharedState.cacheManager.uncacheQuery(table, cascade)
     }
     Seq.empty[Row]
   }
+
+  private def table(sparkSession: SparkSession, name: String): Option[DataFrame] = {
+    try {
+      Some(sparkSession.table(name))
+    } catch {
+      case ex: AnalysisException if ifExists && ex.getMessage.contains("Table or view not found") =>
+        None
+    }
+  }
 }
 
 /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index 6313370476c93..ef3f4daa6dc6b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -25,6 +25,7 @@ import org.apache.spark.executor.DataReadMethod._
 import org.apache.spark.executor.DataReadMethod.DataReadMethod
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
 import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis.TempTableAlreadyExistsException
 import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
 import org.apache.spark.sql.catalyst.plans.logical.{BROADCAST, Join, JoinStrategyHint, SHUFFLE_HASH}
 import org.apache.spark.sql.catalyst.util.DateTimeConstants
@@ -140,6 +141,16 @@ class CachedTableSuite extends QueryTest with SQLTestUtils
     }
   }
 
+  test("cache table as select - existing temp view") {
+    withTempView("tempView") {
+      sql("CREATE TEMPORARY VIEW tempView as SELECT 1")
+      val e = intercept[TempTableAlreadyExistsException] {
+        sql("CACHE TABLE tempView AS SELECT 1")
+      }
+      assert(e.getMessage.contains("Temporary view 'tempView' already exists"))
+    }
+  }
+
   test("uncaching temp table") {
     withTempView("tempTable1", "tempTable2") {
       testData.select("key").createOrReplaceTempView("tempTable1")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 98580568a8df6..ffbc2287d81ad 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -30,6 +30,7 @@ import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.connector.catalog._
 import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.connector.catalog.CatalogV2Util.withDefaultOwnership
+import org.apache.spark.sql.execution.columnar.InMemoryRelation
 import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 import org.apache.spark.sql.internal.SQLConf.{PARTITION_OVERWRITE_MODE, PartitionOverwriteMode, V2_SESSION_CATALOG_IMPLEMENTATION}
 import org.apache.spark.sql.internal.connector.SimpleTableProvider
@@ -2018,28 +2019,29 @@ class DataSourceV2SQLSuite
     }
   }
 
-  test("CACHE TABLE") {
+  test("CACHE/UNCACHE TABLE") {
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
-      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
+      def isCached(table: String): Boolean = {
+        spark.table(table).queryExecution.withCachedData.isInstanceOf[InMemoryRelation]
+      }
 
-      testV1CommandSupportingTempView("CACHE TABLE", t)
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
+      sql(s"CACHE TABLE $t")
+      assert(isCached(t))
 
-      val e = intercept[AnalysisException] {
-        sql(s"CACHE LAZY TABLE $t")
-      }
-      assert(e.message.contains("CACHE TABLE is only supported with temp views or v1 tables"))
+      sql(s"UNCACHE TABLE $t")
+      assert(!isCached(t))
     }
-  }
 
-  test("UNCACHE TABLE") {
-    val t = "testcat.ns1.ns2.tbl"
-    withTable(t) {
-      sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
-
-      testV1CommandSupportingTempView("UNCACHE TABLE", t)
-      testV1CommandSupportingTempView("UNCACHE TABLE", s"IF EXISTS $t")
+    // Test a scenario where a table does not exist.
+    val e = intercept[AnalysisException] {
+      sql(s"UNCACHE TABLE $t")
     }
+    assert(e.message.contains("Table or view not found: testcat.ns1.ns2.tbl"))
+
+    // If "IF EXISTS" is set, UNCACHE TABLE will not throw an exception.
+    sql(s"UNCACHE TABLE IF EXISTS $t")
   }
 
   test("SHOW COLUMNS") {
@@ -2555,11 +2557,15 @@ class DataSourceV2SQLSuite
     }
   }
 
-  private def testNotSupportedV2Command(sqlCommand: String, sqlParams: String): Unit = {
+  private def testNotSupportedV2Command(
+      sqlCommand: String,
+      sqlParams: String,
+      sqlCommandInMessage: Option[String] = None): Unit = {
     val e = intercept[AnalysisException] {
       sql(s"$sqlCommand $sqlParams")
     }
-    assert(e.message.contains(s"$sqlCommand is not supported for v2 tables"))
+    val cmdStr = sqlCommandInMessage.getOrElse(sqlCommand)
+    assert(e.message.contains(s"$cmdStr is not supported for v2 tables"))
   }
 
   private def testV1Command(sqlCommand: String, sqlParams: String): Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
index 61c16baedb7cc..1a826c00c81f2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
@@ -337,5 +337,48 @@ class SparkSqlParserSuite extends AnalysisTest {
          |FROM v
         """.stripMargin,
       "LINES TERMINATED BY only supports newline '\\n' right now")
-    }
+  }
+
+  test("CACHE TABLE") {
+    assertEqual(
+      "CACHE TABLE a.b.c",
+      CacheTableCommand(Seq("a", "b", "c"), None, false, Map.empty))
+
+    assertEqual(
+      "CACHE TABLE t AS SELECT * FROM testData",
+      CacheTableCommand(
+        Seq("t"),
+        Some(Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("testData")))),
+        false,
+        Map.empty))
+
+    assertEqual(
+      "CACHE LAZY TABLE a.b.c",
+      CacheTableCommand(Seq("a", "b", "c"), None, true, Map.empty))
+
+    assertEqual(
+      "CACHE LAZY TABLE a.b.c OPTIONS('storageLevel' 'DISK_ONLY')",
+      CacheTableCommand(
+        Seq("a", "b", "c"),
+        None,
+        true,
+        Map("storageLevel" -> "DISK_ONLY")))
+
+    intercept("CACHE TABLE a.b.c AS SELECT * FROM testData",
+      "It is not allowed to add catalog/namespace prefix a.b")
+  }
+
+  test("UNCACHE TABLE") {
+    assertEqual(
+      "UNCACHE TABLE a.b.c",
+      UncacheTableCommand(Seq("a", "b", "c"), ifExists = false))
+
+    assertEqual(
+      "UNCACHE TABLE IF EXISTS a.b.c",
+      UncacheTableCommand(Seq("a", "b", "c"), ifExists = true))
+  }
+
+  test("CLEAR CACHE") {
+    assertEqual("CLEAR CACHE", ClearCacheCommand)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index 4872906dbfec3..b4f921efcac81 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -705,7 +705,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
       sql("CREATE TEMPORARY VIEW inMemoryTable AS SELECT 1 AS c1")
       sql("CACHE TABLE inMemoryTable")
       testSparkPlanMetrics(spark.table("inMemoryTable"), 1,
-        Map(1L -> (("Scan In-memory table `inMemoryTable`", Map.empty)))
+        Map(1L -> (("Scan In-memory table inMemoryTable", Map.empty)))
       )
 
       sql("CREATE TEMPORARY VIEW ```a``b``` AS SELECT 2 AS c1")
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
index 7cc60bb505089..5bf7892478082 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
@@ -305,7 +305,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
         val plan = statement.executeQuery("explain select * from test_table")
         plan.next()
         plan.next()
-        assert(plan.getString(1).contains("Scan In-memory table `test_table`"))
+        assert(plan.getString(1).contains("Scan In-memory table test_table"))
 
         val rs1 = statement.executeQuery("SELECT key FROM test_table ORDER BY KEY DESC")
         val buf1 = new collection.mutable.ArrayBuffer[Int]()
@@ -391,7 +391,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
         val plan = statement.executeQuery("explain select key from test_map ORDER BY key DESC")
         plan.next()
         plan.next()
-        assert(plan.getString(1).contains("Scan In-memory table `test_table`"))
+        assert(plan.getString(1).contains("Scan In-memory table test_table"))
 
         val rs = statement.executeQuery("SELECT key FROM test_map ORDER BY KEY DESC")
         val buf = new collection.mutable.ArrayBuffer[Int]()
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
index fc793534641df..81c3f271b18d4 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
@@ -113,7 +113,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
     e = intercept[AnalysisException] {
       sql("UNCACHE TABLE nonexistentTable")
     }.getMessage
-    assert(e.contains(s"$expectedErrorMsg default.nonexistentTable"))
+    assert(e.contains(s"$expectedErrorMsg nonexistentTable"))
     sql("UNCACHE TABLE IF EXISTS nonexistentTable")
   }
 
@@ -364,14 +364,14 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
           // Cache the table 'cachedTable' in temp db with qualified table name,
           // and then check whether the table is cached with expected name
           sql(s"CACHE TABLE $db.cachedTable OPTIONS('storageLevel' 'MEMORY_ONLY')")
-          assertCached(sql(s"SELECT * FROM $db.cachedTable"), s"`$db`.`cachedTable`", MEMORY_ONLY)
+          assertCached(sql(s"SELECT * FROM $db.cachedTable"), s"$db.cachedTable", MEMORY_ONLY)
           assert(spark.catalog.isCached(s"$db.cachedTable"),
             s"Table '$db.cachedTable' should be cached.")
 
           // Refresh the table 'cachedTable' in temp db with qualified table name, and then check
           // whether the table is still cached with the same name and storage level.
           sql(s"REFRESH TABLE $db.cachedTable")
-          assertCached(sql(s"select * from $db.cachedTable"), s"`$db`.`cachedTable`", MEMORY_ONLY)
+          assertCached(sql(s"select * from $db.cachedTable"), s"$db.cachedTable", MEMORY_ONLY)
           assert(spark.catalog.isCached(s"$db.cachedTable"),
             s"Table '$db.cachedTable' should be cached after refreshing with its qualified name.")
 
@@ -382,7 +382,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
           // 'cachedTable', instead of '$db.cachedTable'
           activateDatabase(db) {
             sql("REFRESH TABLE cachedTable")
-            assertCached(sql("SELECT * FROM cachedTable"), s"`$db`.`cachedTable`", MEMORY_ONLY)
+            assertCached(sql("SELECT * FROM cachedTable"), s"$db.cachedTable", MEMORY_ONLY)
             assert(spark.catalog.isCached("cachedTable"),
               s"Table '$db.cachedTable' should be cached after refreshing with its " +
                 "unqualified name.")
@@ -403,13 +403,13 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
           // Cache the table 'cachedTable' in default db without qualified table name , and then
           // check whether the table is cached with expected name.
           sql("CACHE TABLE cachedTable OPTIONS('storageLevel' 'DISK_ONLY')")
-          assertCached(sql("SELECT * FROM cachedTable"), "`default`.`cachedTable`", DISK_ONLY)
+          assertCached(sql("SELECT * FROM cachedTable"), "cachedTable", DISK_ONLY)
           assert(spark.catalog.isCached("cachedTable"), "Table 'cachedTable' should be cached.")
 
           // Refresh the table 'cachedTable' in default db with unqualified table name, and then
           // check whether the table is still cached with the same name.
           sql("REFRESH TABLE cachedTable")
-          assertCached(sql("SELECT * FROM cachedTable"), "`default`.`cachedTable`", DISK_ONLY)
+          assertCached(sql("SELECT * FROM cachedTable"), "cachedTable", DISK_ONLY)
           assert(spark.catalog.isCached("cachedTable"),
             "Table 'cachedTable' should be cached after refreshing with its unqualified name.")
 
@@ -421,7 +421,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
           activateDatabase(db) {
             sql("REFRESH TABLE default.cachedTable")
             assertCached(
-              sql("SELECT * FROM default.cachedTable"), "`default`.`cachedTable`", DISK_ONLY)
+              sql("SELECT * FROM default.cachedTable"), "cachedTable", DISK_ONLY)
             assert(spark.catalog.isCached("default.cachedTable"),
               "Table 'cachedTable' should be cached after refreshing with its qualified name.")
           }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
index f7c13ea047da7..a25c61c96f3d8 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -596,7 +596,7 @@ private[hive] class TestHiveQueryExecution(
 
   override lazy val analyzed: LogicalPlan = sparkSession.withActive {
     val describedTables = logical match {
-      case CacheTableCommand(tbl, _, _, _) => tbl :: Nil
+      case CacheTableCommand(tbl, _, _, _) => tbl.asTableIdentifier :: Nil
       case _ => Nil
     }
 

From 225c2e2815988ebf3e0926a4ca2af9a933b48467 Mon Sep 17 00:00:00 2001
From: "xuewei.linxuewei" <xuewei.linxuewei@alibaba-inc.com>
Date: Mon, 30 Nov 2020 15:36:26 +0900
Subject: [PATCH 0605/1009] [SPARK-33498][SQL][FOLLOW-UP] Deduplicate the
 unittest by using checkCastWithParseError

### What changes were proposed in this pull request?

Dup code removed in SPARK-33498 as follow-up.

### Why are the changes needed?

Nit.

### Does this PR introduce any user-facing change?

No.

### How was this patch tested?

Existing UT.

Closes #30540 from leanken/leanken-SPARK-33498.

Authored-by: xuewei.linxuewei <xuewei.linxuewei@alibaba-inc.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../apache/spark/sql/catalyst/expressions/CastSuite.scala   | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index 0900a303b4cbe..d284c417042c1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -971,11 +971,7 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase {
         checkCastWithParseError("20150318")
         checkCastWithParseError("2015-031-8")
         checkCastWithParseError("2015-03-18T12:03:17-0:70")
-
-        val input = "abdef"
-        checkExceptionInExpression[DateTimeException](
-          cast(input, TimestampType, Option(zid.getId)),
-          s"Cannot cast $input to TimestampType.")
+        checkCastWithParseError("abdef")
       }
     }
   }

From b665d5881915f042930f502bcc3c6ee3cb00c50d Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Mon, 30 Nov 2020 17:04:38 +0900
Subject: [PATCH 0606/1009] [SPARK-28646][SQL] Fix bug of Count so as
 consistent with mainstream databases

### What changes were proposed in this pull request?
Currently, Spark allows calls to `count` even for non parameterless aggregate function. For example, the following query actually works:
`SELECT count() FROM tenk1;`
On the other hand, mainstream databases will throw an error.
**Oracle**
`> ORA-00909: invalid number of arguments`
**PgSQL**
`ERROR:  count(*) must be used to call a parameterless aggregate function`
**MySQL**
`> 1064 - You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near ')`

### Why are the changes needed?
Fix a bug so that consistent with mainstream databases.
There is an example query output with/without this fix.
`SELECT count() FROM testData;`
The output before this fix:
`0`
The output after this fix:
```
org.apache.spark.sql.AnalysisException
cannot resolve 'count()' due to data type mismatch: count requires at least one argument.; line 1 pos 7
```

### Does this PR introduce _any_ user-facing change?
Yes.
If not specify parameter for `count`, will throw an error.

### How was this patch tested?
Jenkins test.

Closes #30541 from beliefer/SPARK-28646.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: beliefer <beliefer@163.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../sql/catalyst/expressions/aggregate/Count.scala  | 10 ++++++++++
 .../src/test/resources/sql-tests/inputs/count.sql   |  3 +++
 .../test/resources/sql-tests/results/count.sql.out  | 13 +++++++++++--
 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala
index e043c81975066..e4488b26f197e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.types._
@@ -43,11 +44,20 @@ import org.apache.spark.sql.types._
   since = "1.0.0")
 // scalastyle:on line.size.limit
 case class Count(children: Seq[Expression]) extends DeclarativeAggregate {
+
   override def nullable: Boolean = false
 
   // Return data type.
   override def dataType: DataType = LongType
 
+  override def checkInputDataTypes(): TypeCheckResult = {
+    if (children.isEmpty) {
+      TypeCheckResult.TypeCheckFailure(s"$prettyName requires at least one argument.")
+    } else {
+      TypeCheckResult.TypeCheckSuccess
+    }
+  }
+
   protected lazy val count = AttributeReference("count", LongType, nullable = false)()
 
   override lazy val aggBufferAttributes = count :: Nil
diff --git a/sql/core/src/test/resources/sql-tests/inputs/count.sql b/sql/core/src/test/resources/sql-tests/inputs/count.sql
index 203f04c589373..fc0d66258ea29 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/count.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/count.sql
@@ -35,3 +35,6 @@ SELECT count(DISTINCT a), count(DISTINCT 3,2) FROM testData;
 SELECT count(DISTINCT a), count(DISTINCT 2), count(DISTINCT 2,3) FROM testData;
 SELECT count(DISTINCT a), count(DISTINCT 2), count(DISTINCT 3,2) FROM testData;
 SELECT count(distinct 0.8), percentile_approx(distinct a, 0.8) FROM testData;
+
+-- count without expressions
+SELECT count() FROM testData;
diff --git a/sql/core/src/test/resources/sql-tests/results/count.sql.out b/sql/core/src/test/resources/sql-tests/results/count.sql.out
index c0cdd0d697538..64614b5b67784 100644
--- a/sql/core/src/test/resources/sql-tests/results/count.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/count.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 13
+-- Number of queries: 14
 
 
 -- !query
@@ -116,4 +116,13 @@ SELECT count(distinct 0.8), percentile_approx(distinct a, 0.8) FROM testData
 -- !query schema
 struct<count(DISTINCT 0.8):bigint,percentile_approx(DISTINCT a, CAST(0.8 AS DOUBLE), 10000):int>
 -- !query output
-1	2
\ No newline at end of file
+1	2
+
+
+-- !query
+SELECT count() FROM testData
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'count()' due to data type mismatch: count requires at least one argument.; line 1 pos 7
\ No newline at end of file

From 5cfbdddefe0753c3aff03f326b31c0ba8882b3a9 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 30 Nov 2020 09:23:05 +0000
Subject: [PATCH 0607/1009] [SPARK-33480][SQL] Support char/varchar type

### What changes were proposed in this pull request?

This PR adds the char/varchar type which is kind of a variant of string type:
1. Char type is fixed-length string. When comparing char type values, we need to pad the shorter one to the longer length.
2. Varchar type is string with a length limitation.

To implement the char/varchar semantic, this PR:
1. Do string length check when writing to char/varchar type columns.
2. Do string padding when reading char type columns. We don't do it at the writing side to save storage space.
3. Do string padding when comparing char type column with string literal or another char type column. (string literal is fixed length so should be treated as char type as well)

To simplify the implementation, this PR doesn't propagate char/varchar type info through functions/operators(e.g. `substring`). That said, a column can only be char/varchar type if it's a table column, not a derived column like `SELECT substring(col)`.

To be safe, this PR doesn't add char/varchar type to the query engine(expression input check, internal row framework, codegen framework, etc.). We will replace char/varchar type by string type with metadata (`Attribute.metadata` or `StructField.metadata`) that includes the original type string before it goes into the query engine. That said, the existing code will not see char/varchar type but only string type.

char/varchar type may come from several places:
1. v1 table from hive catalog.
2. v2 table from v2 catalog.
3. user-specified schema in `spark.read.schema` and `spark.readStream.schema`
4. `Column.cast`
5. schema string in places like `from_json`, pandas UDF, etc. These places use SQL parser which replaces char/varchar with string already, even before this PR.

This PR covers all the above cases, implements the length check and padding feature by looking at string type with special metadata.

### Why are the changes needed?

char and varchar are standard SQL types. varchar is widely used in other databases instead of string type.

### Does this PR introduce _any_ user-facing change?

For hive tables: now the table insertion fails if the value exceeds char/varchar length. Previously we truncate the value silently.

For other tables:
1. now char type is allowed.
2. now we have length check when inserting to varchar columns. Previously we write the value as it is.

### How was this patch tested?

new tests

Closes #30412 from cloud-fan/char.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-ref-datatypes.md                     |   2 +
 .../sql/catalyst/analysis/Analyzer.scala      |   9 +-
 .../sql/catalyst/analysis/CheckAnalysis.scala |   6 +-
 .../catalyst/analysis/ResolveCatalogs.scala   |   5 -
 .../analysis/ResolvePartitionSpec.scala       |   4 +-
 .../analysis/TableOutputResolver.scala        |  19 +-
 .../sql/catalyst/catalog/SessionCatalog.scala |   7 +-
 .../sql/catalyst/parser/AstBuilder.scala      |  17 +-
 .../catalyst/plans/logical/v2Commands.scala   |   4 +-
 .../sql/catalyst/util/CharVarcharUtils.scala  | 276 ++++++++++
 .../sql/connector/catalog/CatalogV2Util.scala |  18 +-
 .../datasources/v2/DataSourceV2Relation.scala |   8 +-
 .../org/apache/spark/sql/types/CharType.scala |  38 ++
 .../org/apache/spark/sql/types/DataType.scala |  10 +-
 .../spark/sql/types/HiveStringType.scala      |  81 ---
 .../apache/spark/sql/types/VarcharType.scala  |  37 ++
 .../org/apache/spark/sql/types/package.scala  |  10 +-
 .../sql/catalyst/analysis/AnalysisSuite.scala |  18 +-
 .../parser/TableSchemaParserSuite.scala       |  15 +-
 .../spark/sql/connector/InMemoryTable.scala   |  15 +-
 .../catalog/CatalogV2UtilSuite.scala          |   2 +-
 .../scala/org/apache/spark/sql/Column.scala   |   6 +-
 .../apache/spark/sql/DataFrameReader.scala    |   4 +-
 .../analysis/ResolveSessionCatalog.scala      |  37 +-
 .../datasources/ApplyCharTypePadding.scala    | 135 +++++
 .../datasources/LogicalRelation.scala         |  18 +-
 .../datasources/jdbc/JdbcUtils.scala          |  19 +-
 .../datasources/v2/PushDownUtils.scala        |   4 +-
 .../internal/BaseSessionStateBuilder.scala    |   1 +
 .../sql/streaming/DataStreamReader.scala      |   4 +-
 .../spark/sql/CharVarcharTestSuite.scala      | 505 ++++++++++++++++++
 .../command/PlanResolutionSuite.scala         |  44 +-
 .../spark/sql/sources/TableScanSuite.scala    |  14 +-
 .../sql/hive/HiveSessionStateBuilder.scala    |   1 +
 .../sql/hive/client/HiveClientImpl.scala      |  19 +-
 .../spark/sql/HiveCharVarcharTestSuite.scala  |  43 ++
 .../sql/hive/HiveMetastoreCatalogSuite.scala  |  15 +-
 .../sql/hive/execution/HiveDDLSuite.scala     |   4 +-
 38 files changed, 1172 insertions(+), 302 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/types/CharType.scala
 delete mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/types/HiveStringType.scala
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/types/VarcharType.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala

diff --git a/docs/sql-ref-datatypes.md b/docs/sql-ref-datatypes.md
index f27f1a0ca967f..0087867a8c7f7 100644
--- a/docs/sql-ref-datatypes.md
+++ b/docs/sql-ref-datatypes.md
@@ -37,6 +37,8 @@ Spark SQL and DataFrames support the following data types:
   - `DecimalType`: Represents arbitrary-precision signed decimal numbers. Backed internally by `java.math.BigDecimal`. A `BigDecimal` consists of an arbitrary precision integer unscaled value and a 32-bit integer scale.
 * String type
   - `StringType`: Represents character string values.
+  - `VarcharType(length)`: A variant of `StringType` which has a length limitation. Data writing will fail if the input string exceeds the length limitation. Note: this type can only be used in table schema, not functions/operators.
+  - `CharType(length)`: A variant of `VarcharType(length)` which is fixed length. Reading column of type `CharType(n)` always returns string values of length `n`. Char type column comparison will pad the short one to the longer length.
 * Binary type
   - `BinaryType`: Represents byte sequence values.
 * Boolean type
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 9b599b4c8f8d4..23a1b7bdde93c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -39,7 +39,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
 import org.apache.spark.sql.catalyst.trees.TreeNodeRef
-import org.apache.spark.sql.catalyst.util.toPrettySQL
+import org.apache.spark.sql.catalyst.util.{toPrettySQL, CharVarcharUtils}
 import org.apache.spark.sql.connector.catalog._
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, After, ColumnChange, ColumnPosition, DeleteColumn, RenameColumn, UpdateColumnComment, UpdateColumnNullability, UpdateColumnPosition, UpdateColumnType}
@@ -3102,7 +3102,12 @@ class Analyzer(override val catalogManager: CatalogManager)
         val projection = TableOutputResolver.resolveOutputColumns(
           v2Write.table.name, v2Write.table.output, v2Write.query, v2Write.isByName, conf)
         if (projection != v2Write.query) {
-          v2Write.withNewQuery(projection)
+          val cleanedTable = v2Write.table match {
+            case r: DataSourceV2Relation =>
+              r.copy(output = r.output.map(CharVarcharUtils.cleanAttrMetadata))
+            case other => other
+          }
+          v2Write.withNewQuery(projection).withNewTable(cleanedTable)
         } else {
           v2Write
         }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 7f89c130749f4..2818ba58075cd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.optimizer.BooleanSimplification
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.util.TypeUtils
+import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, TypeUtils}
 import org.apache.spark.sql.connector.catalog.{SupportsAtomicPartitionManagement, SupportsPartitionManagement, Table}
 import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, After, ColumnPosition, DeleteColumn, RenameColumn, UpdateColumnComment, UpdateColumnNullability, UpdateColumnPosition, UpdateColumnType}
 import org.apache.spark.sql.internal.SQLConf
@@ -94,6 +94,10 @@ trait CheckAnalysis extends PredicateHelper {
 
       case p if p.analyzed => // Skip already analyzed sub-plans
 
+      case leaf: LeafNode if leaf.output.map(_.dataType).exists(CharVarcharUtils.hasCharVarchar) =>
+        throw new IllegalStateException(
+          "[BUG] logical plan should not have output of char/varchar type: " + leaf)
+
       case u: UnresolvedNamespace =>
         u.failAnalysis(s"Namespace not found: ${u.multipartIdentifier.quoted}")
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
index 7354d2478b7c8..a90de697bc084 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
@@ -35,7 +35,6 @@ class ResolveCatalogs(val catalogManager: CatalogManager)
     case AlterTableAddColumnsStatement(
          nameParts @ NonSessionCatalogAndTable(catalog, tbl), cols) =>
       cols.foreach(c => failNullType(c.dataType))
-      cols.foreach(c => failCharType(c.dataType))
       val changes = cols.map { col =>
         TableChange.addColumn(
           col.name.toArray,
@@ -49,7 +48,6 @@ class ResolveCatalogs(val catalogManager: CatalogManager)
     case AlterTableReplaceColumnsStatement(
         nameParts @ NonSessionCatalogAndTable(catalog, tbl), cols) =>
       cols.foreach(c => failNullType(c.dataType))
-      cols.foreach(c => failCharType(c.dataType))
       val changes: Seq[TableChange] = loadTable(catalog, tbl.asIdentifier) match {
         case Some(table) =>
           // REPLACE COLUMNS deletes all the existing columns and adds new columns specified.
@@ -72,7 +70,6 @@ class ResolveCatalogs(val catalogManager: CatalogManager)
     case a @ AlterTableAlterColumnStatement(
          nameParts @ NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _) =>
       a.dataType.foreach(failNullType)
-      a.dataType.foreach(failCharType)
       val colName = a.column.toArray
       val typeChange = a.dataType.map { newDataType =>
         TableChange.updateColumnType(colName, newDataType)
@@ -145,7 +142,6 @@ class ResolveCatalogs(val catalogManager: CatalogManager)
     case c @ CreateTableStatement(
          NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _, _) =>
       assertNoNullTypeInSchema(c.tableSchema)
-      assertNoCharTypeInSchema(c.tableSchema)
       CreateV2Table(
         catalog.asTableCatalog,
         tbl.asIdentifier,
@@ -173,7 +169,6 @@ class ResolveCatalogs(val catalogManager: CatalogManager)
     case c @ ReplaceTableStatement(
          NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _) =>
       assertNoNullTypeInSchema(c.tableSchema)
-      assertNoCharTypeInSchema(c.tableSchema)
       ReplaceTable(
         catalog.asTableCatalog,
         tbl.asIdentifier,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
index 6d061fce06919..98c6872a47cc6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
@@ -22,6 +22,7 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Cast, Literal}
 import org.apache.spark.sql.catalyst.plans.logical.{AlterTableAddPartition, AlterTableDropPartition, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.connector.catalog.SupportsPartitionManagement
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.PartitioningUtils.normalizePartitionSpec
@@ -66,7 +67,8 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] {
 
     val partValues = partSchema.map { part =>
       val raw = normalizedSpec.get(part.name).orNull
-      Cast(Literal.create(raw, StringType), part.dataType, Some(conf.sessionLocalTimeZone)).eval()
+      val dt = CharVarcharUtils.replaceCharVarcharWithString(part.dataType)
+      Cast(Literal.create(raw, StringType), dt, Some(conf.sessionLocalTimeZone)).eval()
     }
     InternalRow.fromSeq(partValues)
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
index 4f33ca99c02db..d5c407b47c5be 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
@@ -22,6 +22,7 @@ import scala.collection.mutable
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions.{Alias, AnsiCast, Attribute, Cast, NamedExpression}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
 import org.apache.spark.sql.types.DataType
@@ -93,19 +94,17 @@ object TableOutputResolver {
       tableAttr.metadata == queryExpr.metadata) {
       Some(queryExpr)
     } else {
-      // Renaming is needed for handling the following cases like
-      // 1) Column names/types do not match, e.g., INSERT INTO TABLE tab1 SELECT 1, 2
-      // 2) Target tables have column metadata
-      storeAssignmentPolicy match {
+      val casted = storeAssignmentPolicy match {
         case StoreAssignmentPolicy.ANSI =>
-          Some(Alias(
-            AnsiCast(queryExpr, tableAttr.dataType, Option(conf.sessionLocalTimeZone)),
-            tableAttr.name)(explicitMetadata = Option(tableAttr.metadata)))
+          AnsiCast(queryExpr, tableAttr.dataType, Option(conf.sessionLocalTimeZone))
         case _ =>
-          Some(Alias(
-            Cast(queryExpr, tableAttr.dataType, Option(conf.sessionLocalTimeZone)),
-            tableAttr.name)(explicitMetadata = Option(tableAttr.metadata)))
+          Cast(queryExpr, tableAttr.dataType, Option(conf.sessionLocalTimeZone))
       }
+      val exprWithStrLenCheck = CharVarcharUtils.stringLengthCheck(casted, tableAttr)
+      // Renaming is needed for handling the following cases like
+      // 1) Column names/types do not match, e.g., INSERT INTO TABLE tab1 SELECT 1, 2
+      // 2) Target tables have column metadata
+      Some(Alias(exprWithStrLenCheck, tableAttr.name)(explicitMetadata = Some(tableAttr.metadata)))
     }
 
     storeAssignmentPolicy match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 5122ca7521d9a..01bce079610ae 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -38,7 +38,7 @@ import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
 import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionInfo, ImplicitCastInputTypes}
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParserInterface}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias, View}
-import org.apache.spark.sql.catalyst.util.StringUtils
+import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, StringUtils}
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE
@@ -473,7 +473,10 @@ class SessionCatalog(
     val table = formatTableName(name.table)
     requireDbExists(db)
     requireTableExists(TableIdentifier(table, Some(db)))
-    externalCatalog.getTable(db, table)
+    val t = externalCatalog.getTable(db, table)
+    // We replace char/varchar with "annotated" string type in the table schema, as the query
+    // engine doesn't support char/varchar yet.
+    t.copy(schema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(t.schema))
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index a31d7ca7268a6..ce95ea4b41def 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -36,8 +36,8 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last}
 import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, IntervalUtils}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, stringToDate, stringToTimestamp}
-import org.apache.spark.sql.catalyst.util.IntervalUtils
 import org.apache.spark.sql.catalyst.util.IntervalUtils.IntervalUnit
 import org.apache.spark.sql.connector.catalog.{SupportsNamespaces, TableCatalog}
 import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition
@@ -99,7 +99,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
   }
 
   override def visitSingleTableSchema(ctx: SingleTableSchemaContext): StructType = {
-    withOrigin(ctx)(StructType(visitColTypeList(ctx.colTypeList)))
+    val schema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(
+      StructType(visitColTypeList(ctx.colTypeList)))
+    withOrigin(ctx)(schema)
   }
 
   def parseRawDataType(ctx: SingleDataTypeContext): DataType = withOrigin(ctx) {
@@ -2226,7 +2228,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    * Create a Spark DataType.
    */
   private def visitSparkDataType(ctx: DataTypeContext): DataType = {
-    HiveStringType.replaceCharType(typedVisit(ctx))
+    CharVarcharUtils.replaceCharVarcharWithString(typedVisit(ctx))
   }
 
   /**
@@ -2301,16 +2303,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
       builder.putString("comment", _)
     }
 
-    // Add Hive type string to metadata.
-    val rawDataType = typedVisit[DataType](ctx.dataType)
-    val cleanedDataType = HiveStringType.replaceCharType(rawDataType)
-    if (rawDataType != cleanedDataType) {
-      builder.putString(HIVE_TYPE_STRING, rawDataType.catalogString)
-    }
-
     StructField(
       name = colName.getText,
-      dataType = cleanedDataType,
+      dataType = typedVisit[DataType](ctx.dataType),
       nullable = NULL == null,
       metadata = builder.build())
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index ebf41f6a6e304..4931f0eb2c007 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.analysis.{NamedRelation, PartitionSpec, Res
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, Expression, Unevaluable}
 import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.connector.catalog._
 import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, ColumnChange}
 import org.apache.spark.sql.connector.expressions.Transform
@@ -45,9 +46,10 @@ trait V2WriteCommand extends Command {
     table.skipSchemaResolution || (query.output.size == table.output.size &&
       query.output.zip(table.output).forall {
         case (inAttr, outAttr) =>
+          val outType = CharVarcharUtils.getRawType(outAttr.metadata).getOrElse(outAttr.dataType)
           // names and types must match, nullability must be compatible
           inAttr.name == outAttr.name &&
-            DataType.equalsIgnoreCompatibleNullability(inAttr.dataType, outAttr.dataType) &&
+            DataType.equalsIgnoreCompatibleNullability(inAttr.dataType, outType) &&
             (outAttr.nullable || !inAttr.nullable)
       })
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
new file mode 100644
index 0000000000000..0cbe5abdbbd7a
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
@@ -0,0 +1,276 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import scala.collection.mutable
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.types._
+
+object CharVarcharUtils {
+
+  private val CHAR_VARCHAR_TYPE_STRING_METADATA_KEY = "__CHAR_VARCHAR_TYPE_STRING"
+
+  /**
+   * Replaces CharType/VarcharType with StringType recursively in the given struct type. If a
+   * top-level StructField's data type is CharType/VarcharType or has nested CharType/VarcharType,
+   * this method will add the original type string to the StructField's metadata, so that we can
+   * re-construct the original data type with CharType/VarcharType later when needed.
+   */
+  def replaceCharVarcharWithStringInSchema(st: StructType): StructType = {
+    StructType(st.map { field =>
+      if (hasCharVarchar(field.dataType)) {
+        val metadata = new MetadataBuilder().withMetadata(field.metadata)
+          .putString(CHAR_VARCHAR_TYPE_STRING_METADATA_KEY, field.dataType.sql).build()
+        field.copy(dataType = replaceCharVarcharWithString(field.dataType), metadata = metadata)
+      } else {
+        field
+      }
+    })
+  }
+
+  /**
+   * Returns true if the given data type is CharType/VarcharType or has nested CharType/VarcharType.
+   */
+  def hasCharVarchar(dt: DataType): Boolean = {
+    dt.existsRecursively(f => f.isInstanceOf[CharType] || f.isInstanceOf[VarcharType])
+  }
+
+  /**
+   * Replaces CharType/VarcharType with StringType recursively in the given data type.
+   */
+  def replaceCharVarcharWithString(dt: DataType): DataType = dt match {
+    case ArrayType(et, nullable) =>
+      ArrayType(replaceCharVarcharWithString(et), nullable)
+    case MapType(kt, vt, nullable) =>
+      MapType(replaceCharVarcharWithString(kt), replaceCharVarcharWithString(vt), nullable)
+    case StructType(fields) =>
+      StructType(fields.map { field =>
+        field.copy(dataType = replaceCharVarcharWithString(field.dataType))
+      })
+    case _: CharType => StringType
+    case _: VarcharType => StringType
+    case _ => dt
+  }
+
+  /**
+   * Removes the metadata entry that contains the original type string of CharType/VarcharType from
+   * the given attribute's metadata.
+   */
+  def cleanAttrMetadata(attr: AttributeReference): AttributeReference = {
+    val cleaned = new MetadataBuilder().withMetadata(attr.metadata)
+      .remove(CHAR_VARCHAR_TYPE_STRING_METADATA_KEY).build()
+    attr.withMetadata(cleaned)
+  }
+
+  /**
+   * Re-construct the original data type from the type string in the given metadata.
+   * This is needed when dealing with char/varchar columns/fields.
+   */
+  def getRawType(metadata: Metadata): Option[DataType] = {
+    if (metadata.contains(CHAR_VARCHAR_TYPE_STRING_METADATA_KEY)) {
+      Some(CatalystSqlParser.parseRawDataType(
+        metadata.getString(CHAR_VARCHAR_TYPE_STRING_METADATA_KEY)))
+    } else {
+      None
+    }
+  }
+
+  /**
+   * Returns expressions to apply read-side char type padding for the given attributes. String
+   * values should be right-padded to N characters if it's from a CHAR(N) column/field.
+   */
+  def charTypePadding(output: Seq[AttributeReference]): Seq[NamedExpression] = {
+    output.map { attr =>
+      getRawType(attr.metadata).filter { rawType =>
+        rawType.existsRecursively(_.isInstanceOf[CharType])
+      }.map { rawType =>
+        Alias(charTypePadding(attr, rawType), attr.name)(explicitMetadata = Some(attr.metadata))
+      }.getOrElse(attr)
+    }
+  }
+
+  private def charTypePadding(expr: Expression, dt: DataType): Expression = dt match {
+    case CharType(length) => StringRPad(expr, Literal(length))
+
+    case StructType(fields) =>
+      val struct = CreateNamedStruct(fields.zipWithIndex.flatMap { case (f, i) =>
+        Seq(Literal(f.name), charTypePadding(GetStructField(expr, i, Some(f.name)), f.dataType))
+      })
+      if (expr.nullable) {
+        If(IsNull(expr), Literal(null, struct.dataType), struct)
+      } else {
+        struct
+      }
+
+    case ArrayType(et, containsNull) => charTypePaddingInArray(expr, et, containsNull)
+
+    case MapType(kt, vt, valueContainsNull) =>
+      val newKeys = charTypePaddingInArray(MapKeys(expr), kt, containsNull = false)
+      val newValues = charTypePaddingInArray(MapValues(expr), vt, valueContainsNull)
+      MapFromArrays(newKeys, newValues)
+
+    case _ => expr
+  }
+
+  private def charTypePaddingInArray(
+      arr: Expression, et: DataType, containsNull: Boolean): Expression = {
+    val param = NamedLambdaVariable("x", replaceCharVarcharWithString(et), containsNull)
+    val func = LambdaFunction(charTypePadding(param, et), Seq(param))
+    ArrayTransform(arr, func)
+  }
+
+  /**
+   * Returns an expression to apply write-side string length check for the given expression. A
+   * string value can not exceed N characters if it's written into a CHAR(N)/VARCHAR(N)
+   * column/field.
+   */
+  def stringLengthCheck(expr: Expression, targetAttr: Attribute): Expression = {
+    getRawType(targetAttr.metadata).map { rawType =>
+      stringLengthCheck(expr, rawType)
+    }.getOrElse(expr)
+  }
+
+  private def raiseError(expr: Expression, typeName: String, length: Int): Expression = {
+    val errorMsg = Concat(Seq(
+      Literal("input string '"),
+      expr,
+      Literal(s"' exceeds $typeName type length limitation: $length")))
+    Cast(RaiseError(errorMsg), StringType)
+  }
+
+  private def stringLengthCheck(expr: Expression, dt: DataType): Expression = dt match {
+    case CharType(length) =>
+      val trimmed = StringTrimRight(expr)
+      // Trailing spaces do not count in the length check. We don't need to retain the trailing
+      // spaces, as we will pad char type columns/fields at read time.
+      If(
+        GreaterThan(Length(trimmed), Literal(length)),
+        raiseError(expr, "char", length),
+        trimmed)
+
+    case VarcharType(length) =>
+      val trimmed = StringTrimRight(expr)
+      // Trailing spaces do not count in the length check. We need to retain the trailing spaces
+      // (truncate to length N), as there is no read-time padding for varchar type.
+      // TODO: create a special TrimRight function that can trim to a certain length.
+      If(
+        LessThanOrEqual(Length(expr), Literal(length)),
+        expr,
+        If(
+          GreaterThan(Length(trimmed), Literal(length)),
+          raiseError(expr, "varchar", length),
+          StringRPad(trimmed, Literal(length))))
+
+    case StructType(fields) =>
+      val struct = CreateNamedStruct(fields.zipWithIndex.flatMap { case (f, i) =>
+        Seq(Literal(f.name), stringLengthCheck(GetStructField(expr, i, Some(f.name)), f.dataType))
+      })
+      if (expr.nullable) {
+        If(IsNull(expr), Literal(null, struct.dataType), struct)
+      } else {
+        struct
+      }
+
+    case ArrayType(et, containsNull) => stringLengthCheckInArray(expr, et, containsNull)
+
+    case MapType(kt, vt, valueContainsNull) =>
+      val newKeys = stringLengthCheckInArray(MapKeys(expr), kt, containsNull = false)
+      val newValues = stringLengthCheckInArray(MapValues(expr), vt, valueContainsNull)
+      MapFromArrays(newKeys, newValues)
+
+    case _ => expr
+  }
+
+  private def stringLengthCheckInArray(
+      arr: Expression, et: DataType, containsNull: Boolean): Expression = {
+    val param = NamedLambdaVariable("x", replaceCharVarcharWithString(et), containsNull)
+    val func = LambdaFunction(stringLengthCheck(param, et), Seq(param))
+    ArrayTransform(arr, func)
+  }
+
+  /**
+   * Return expressions to apply char type padding for the string comparison between the given
+   * attributes. When comparing two char type columns/fields, we need to pad the shorter one to
+   * the longer length.
+   */
+  def addPaddingInStringComparison(attrs: Seq[Attribute]): Seq[Expression] = {
+    val rawTypes = attrs.map(attr => getRawType(attr.metadata))
+    if (rawTypes.exists(_.isEmpty)) {
+      attrs
+    } else {
+      val typeWithTargetCharLength = rawTypes.map(_.get).reduce(typeWithWiderCharLength)
+      attrs.zip(rawTypes.map(_.get)).map { case (attr, rawType) =>
+        padCharToTargetLength(attr, rawType, typeWithTargetCharLength).getOrElse(attr)
+      }
+    }
+  }
+
+  private def typeWithWiderCharLength(type1: DataType, type2: DataType): DataType = {
+    (type1, type2) match {
+      case (CharType(len1), CharType(len2)) =>
+        CharType(math.max(len1, len2))
+      case (StructType(fields1), StructType(fields2)) =>
+        assert(fields1.length == fields2.length)
+        StructType(fields1.zip(fields2).map { case (left, right) =>
+          StructField("", typeWithWiderCharLength(left.dataType, right.dataType))
+        })
+      case (ArrayType(et1, _), ArrayType(et2, _)) =>
+        ArrayType(typeWithWiderCharLength(et1, et2))
+      case _ => NullType
+    }
+  }
+
+  private def padCharToTargetLength(
+      expr: Expression,
+      rawType: DataType,
+      typeWithTargetCharLength: DataType): Option[Expression] = {
+    (rawType, typeWithTargetCharLength) match {
+      case (CharType(len), CharType(target)) if target > len =>
+        Some(StringRPad(expr, Literal(target)))
+
+      case (StructType(fields), StructType(targets)) =>
+        assert(fields.length == targets.length)
+        var i = 0
+        var needPadding = false
+        val createStructExprs = mutable.ArrayBuffer.empty[Expression]
+        while (i < fields.length) {
+          val field = fields(i)
+          val fieldExpr = GetStructField(expr, i, Some(field.name))
+          val padded = padCharToTargetLength(fieldExpr, field.dataType, targets(i).dataType)
+          needPadding = padded.isDefined
+          createStructExprs += Literal(field.name)
+          createStructExprs += padded.getOrElse(fieldExpr)
+          i += 1
+        }
+        if (needPadding) Some(CreateNamedStruct(createStructExprs.toSeq)) else None
+
+      case (ArrayType(et, containsNull), ArrayType(target, _)) =>
+        val param = NamedLambdaVariable("x", replaceCharVarcharWithString(et), containsNull)
+        padCharToTargetLength(param, et, target).map { padded =>
+          val func = LambdaFunction(padded, Seq(param))
+          ArrayTransform(expr, func)
+        }
+
+      // We don't handle MapType here as it's not comparable.
+
+      case _ => None
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
index b6dc4f61c8588..02db2293ec64a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
@@ -24,11 +24,10 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{NamedRelation, NoSuchDatabaseException, NoSuchNamespaceException, NoSuchTableException, UnresolvedV2Relation}
-import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.plans.logical.{AlterTable, CreateTableAsSelectStatement, CreateTableStatement, ReplaceTableAsSelectStatement, ReplaceTableStatement, SerdeInfo}
 import org.apache.spark.sql.connector.catalog.TableChange._
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-import org.apache.spark.sql.types.{ArrayType, DataType, HIVE_TYPE_STRING, HiveStringType, MapType, NullType, StructField, StructType}
+import org.apache.spark.sql.types.{ArrayType, DataType, MapType, NullType, StructField, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.Utils
 
@@ -379,21 +378,6 @@ private[sql] object CatalogV2Util {
       .asTableCatalog
   }
 
-  def failCharType(dt: DataType): Unit = {
-    if (HiveStringType.containsCharType(dt)) {
-      throw new AnalysisException(
-        "Cannot use CHAR type in non-Hive-Serde tables, please use STRING type instead.")
-    }
-  }
-
-  def assertNoCharTypeInSchema(schema: StructType): Unit = {
-    schema.foreach { f =>
-      if (f.metadata.contains(HIVE_TYPE_STRING)) {
-        failCharType(CatalystSqlParser.parseRawDataType(f.metadata.getString(HIVE_TYPE_STRING)))
-      }
-    }
-  }
-
   def failNullType(dt: DataType): Unit = {
     def containsNullType(dt: DataType): Boolean = dt match {
       case ArrayType(et, _) => containsNullType(et)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
index f541411daeff4..4debdd380e6b4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources.v2
 import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, NamedRelation}
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
-import org.apache.spark.sql.catalyst.util.truncatedString
+import org.apache.spark.sql.catalyst.util.{truncatedString, CharVarcharUtils}
 import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, MetadataColumn, SupportsMetadataColumns, Table, TableCapability}
 import org.apache.spark.sql.connector.read.{Scan, Statistics => V2Statistics, SupportsReportStatistics}
 import org.apache.spark.sql.connector.read.streaming.{Offset, SparkDataStream}
@@ -171,8 +171,10 @@ object DataSourceV2Relation {
       catalog: Option[CatalogPlugin],
       identifier: Option[Identifier],
       options: CaseInsensitiveStringMap): DataSourceV2Relation = {
-    val output = table.schema().toAttributes
-    DataSourceV2Relation(table, output, catalog, identifier, options)
+    // The v2 source may return schema containing char/varchar type. We replace char/varchar
+    // with "annotated" string type here as the query engine doesn't support char/varchar yet.
+    val schema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(table.schema)
+    DataSourceV2Relation(table, schema.toAttributes, catalog, identifier, options)
   }
 
   def create(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CharType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CharType.scala
new file mode 100644
index 0000000000000..67ab1cc2f3321
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CharType.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.types
+
+import scala.math.Ordering
+import scala.reflect.runtime.universe.typeTag
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.unsafe.types.UTF8String
+
+@Experimental
+case class CharType(length: Int) extends AtomicType {
+  require(length >= 0, "The length of char type cannot be negative.")
+
+  private[sql] type InternalType = UTF8String
+  @transient private[sql] lazy val tag = typeTag[InternalType]
+  private[sql] val ordering = implicitly[Ordering[InternalType]]
+
+  override def defaultSize: Int = length
+  override def typeName: String = s"char($length)"
+  override def toString: String = s"CharType($length)"
+  private[spark] override def asNullable: CharType = this
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
index 7556a19f0d316..e4ee6eb377a4d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
@@ -124,13 +124,15 @@ abstract class DataType extends AbstractDataType {
 object DataType {
 
   private val FIXED_DECIMAL = """decimal\(\s*(\d+)\s*,\s*(\-?\d+)\s*\)""".r
+  private val CHAR_TYPE = """char\(\s*(\d+)\s*\)""".r
+  private val VARCHAR_TYPE = """varchar\(\s*(\d+)\s*\)""".r
 
   def fromDDL(ddl: String): DataType = {
     parseTypeWithFallback(
       ddl,
       CatalystSqlParser.parseDataType,
       "Cannot parse the data type: ",
-      fallbackParser = CatalystSqlParser.parseTableSchema)
+      fallbackParser = str => CatalystSqlParser.parseTableSchema(str))
   }
 
   /**
@@ -166,7 +168,7 @@ object DataType {
 
   def fromJson(json: String): DataType = parseDataType(parse(json))
 
-  private val nonDecimalNameToType = {
+  private val otherTypes = {
     Seq(NullType, DateType, TimestampType, BinaryType, IntegerType, BooleanType, LongType,
       DoubleType, FloatType, ShortType, ByteType, StringType, CalendarIntervalType)
       .map(t => t.typeName -> t).toMap
@@ -177,7 +179,9 @@ object DataType {
     name match {
       case "decimal" => DecimalType.USER_DEFAULT
       case FIXED_DECIMAL(precision, scale) => DecimalType(precision.toInt, scale.toInt)
-      case other => nonDecimalNameToType.getOrElse(
+      case CHAR_TYPE(length) => CharType(length.toInt)
+      case VARCHAR_TYPE(length) => VarcharType(length.toInt)
+      case other => otherTypes.getOrElse(
         other,
         throw new IllegalArgumentException(
           s"Failed to convert the JSON string '$name' to a data type."))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/HiveStringType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/HiveStringType.scala
deleted file mode 100644
index a29f49ad14a77..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/HiveStringType.scala
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.sql.types
-
-import scala.math.Ordering
-import scala.reflect.runtime.universe.typeTag
-
-import org.apache.spark.unsafe.types.UTF8String
-
-/**
- * A hive string type for compatibility. These datatypes should only used for parsing,
- * and should NOT be used anywhere else. Any instance of these data types should be
- * replaced by a [[StringType]] before analysis.
- */
-sealed abstract class HiveStringType extends AtomicType {
-  private[sql] type InternalType = UTF8String
-
-  private[sql] val ordering = implicitly[Ordering[InternalType]]
-
-  @transient private[sql] lazy val tag = typeTag[InternalType]
-
-  override def defaultSize: Int = length
-
-  private[spark] override def asNullable: HiveStringType = this
-
-  def length: Int
-}
-
-object HiveStringType {
-  def replaceCharType(dt: DataType): DataType = dt match {
-    case ArrayType(et, nullable) =>
-      ArrayType(replaceCharType(et), nullable)
-    case MapType(kt, vt, nullable) =>
-      MapType(replaceCharType(kt), replaceCharType(vt), nullable)
-    case StructType(fields) =>
-      StructType(fields.map { field =>
-        field.copy(dataType = replaceCharType(field.dataType))
-      })
-    case _: HiveStringType => StringType
-    case _ => dt
-  }
-
-  def containsCharType(dt: DataType): Boolean = dt match {
-    case ArrayType(et, _) => containsCharType(et)
-    case MapType(kt, vt, _) => containsCharType(kt) || containsCharType(vt)
-    case StructType(fields) => fields.exists(f => containsCharType(f.dataType))
-    case _ => dt.isInstanceOf[CharType]
-  }
-}
-
-/**
- * Hive char type. Similar to other HiveStringType's, these datatypes should only used for
- * parsing, and should NOT be used anywhere else. Any instance of these data types should be
- * replaced by a [[StringType]] before analysis.
- */
-case class CharType(length: Int) extends HiveStringType {
-  override def simpleString: String = s"char($length)"
-}
-
-/**
- * Hive varchar type. Similar to other HiveStringType's, these datatypes should only used for
- * parsing, and should NOT be used anywhere else. Any instance of these data types should be
- * replaced by a [[StringType]] before analysis.
- */
-case class VarcharType(length: Int) extends HiveStringType {
-  override def simpleString: String = s"varchar($length)"
-}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/VarcharType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/VarcharType.scala
new file mode 100644
index 0000000000000..8d78640c1e125
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/VarcharType.scala
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.types
+
+import scala.math.Ordering
+import scala.reflect.runtime.universe.typeTag
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.unsafe.types.UTF8String
+
+@Experimental
+case class VarcharType(length: Int) extends AtomicType {
+  require(length >= 0, "The length of varchar type cannot be negative.")
+
+  private[sql] type InternalType = UTF8String
+  @transient private[sql] lazy val tag = typeTag[InternalType]
+  private[sql] val ordering = implicitly[Ordering[InternalType]]
+
+  override def defaultSize: Int = length
+  override def typeName: String = s"varchar($length)"
+  override def toString: String = s"CharType($length)"
+  private[spark] override def asNullable: VarcharType = this
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/package.scala
index f29cbc2069e39..346a51ea10c82 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/package.scala
@@ -21,12 +21,4 @@ package org.apache.spark.sql
  * Contains a type system for attributes produced by relations, including complex types like
  * structs, arrays and maps.
  */
-package object types {
-  /**
-   * Metadata key used to store the raw hive type string in the metadata of StructField. This
-   * is relevant for datatypes that do not have a direct Spark SQL counterpart, such as CHAR and
-   * VARCHAR. We need to preserve the original type in order to invoke the correct object
-   * inspector in Hive.
-   */
-  val HIVE_TYPE_STRING = "HIVE_TYPE_STRING"
-}
+package object types
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index f0a24d4a56048..0afa811e5d590 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.analysis
 
 import java.util.TimeZone
 
+import scala.collection.JavaConverters._
 import scala.reflect.ClassTag
 import scala.reflect.runtime.universe.TypeTag
 
@@ -41,9 +42,11 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, RangePartitioning, RoundRobinPartitioning}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.connector.InMemoryTable
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class AnalysisSuite extends AnalysisTest with Matchers {
   import org.apache.spark.sql.catalyst.analysis.TestRelations._
@@ -55,6 +58,19 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     }
   }
 
+  test("fail if a leaf node has char/varchar type output") {
+    val schema1 = new StructType().add("c", CharType(5))
+    val schema2 = new StructType().add("c", VarcharType(5))
+    val schema3 = new StructType().add("c", ArrayType(CharType(5)))
+    Seq(schema1, schema2, schema3).foreach { schema =>
+      val table = new InMemoryTable("t", schema, Array.empty, Map.empty[String, String].asJava)
+      intercept[IllegalStateException] {
+        DataSourceV2Relation(
+          table, schema.toAttributes, None, None, CaseInsensitiveStringMap.empty()).analyze
+      }
+    }
+  }
+
   test("union project *") {
     val plan = (1 to 120)
       .map(_ => testRelation)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala
index 6803fc307f919..95851d44b4747 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.parser
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.types._
 
 class TableSchemaParserSuite extends SparkFunSuite {
@@ -57,11 +58,6 @@ class TableSchemaParserSuite extends SparkFunSuite {
         |anotherArray:Array<char(9)>>
       """.stripMargin.replace("\n", "")
 
-    val builder = new MetadataBuilder
-    builder.putString(HIVE_TYPE_STRING,
-      "struct<struct:struct<deciMal:decimal(10,0),anotherDecimal:decimal(5,2)>," +
-        "MAP:map<timestamp,varchar(10)>,arrAy:array<double>,anotherArray:array<char(9)>>")
-
     val expectedDataType =
       StructType(
         StructField("complexStructCol", StructType(
@@ -69,13 +65,12 @@ class TableSchemaParserSuite extends SparkFunSuite {
             StructType(
               StructField("deciMal", DecimalType.USER_DEFAULT) ::
                 StructField("anotherDecimal", DecimalType(5, 2)) :: Nil)) ::
-            StructField("MAP", MapType(TimestampType, StringType)) ::
+            StructField("MAP", MapType(TimestampType, VarcharType(10))) ::
             StructField("arrAy", ArrayType(DoubleType)) ::
-            StructField("anotherArray", ArrayType(StringType)) :: Nil),
-          nullable = true,
-          builder.build()) :: Nil)
+            StructField("anotherArray", ArrayType(CharType(9))) :: Nil)) :: Nil)
 
-    assert(parse(tableSchemaString) === expectedDataType)
+    assert(parse(tableSchemaString) ===
+      CharVarcharUtils.replaceCharVarcharWithStringInSchema(expectedDataType))
   }
 
   // Negative cases
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
index ffff00b54f1b8..cfb044b428e41 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
@@ -28,7 +28,7 @@ import org.scalatest.Assertions._
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, JoinedRow}
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, DateTimeUtils}
 import org.apache.spark.sql.connector.catalog._
 import org.apache.spark.sql.connector.expressions.{BucketTransform, DaysTransform, HoursTransform, IdentityTransform, MonthsTransform, Transform, YearsTransform}
 import org.apache.spark.sql.connector.read._
@@ -116,11 +116,12 @@ class InMemoryTable(
       }
     }
 
+    val cleanedSchema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(schema)
     partitioning.map {
       case IdentityTransform(ref) =>
-        extractor(ref.fieldNames, schema, row)._1
+        extractor(ref.fieldNames, cleanedSchema, row)._1
       case YearsTransform(ref) =>
-        extractor(ref.fieldNames, schema, row) match {
+        extractor(ref.fieldNames, cleanedSchema, row) match {
           case (days: Int, DateType) =>
             ChronoUnit.YEARS.between(EPOCH_LOCAL_DATE, DateTimeUtils.daysToLocalDate(days))
           case (micros: Long, TimestampType) =>
@@ -130,7 +131,7 @@ class InMemoryTable(
             throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)")
         }
       case MonthsTransform(ref) =>
-        extractor(ref.fieldNames, schema, row) match {
+        extractor(ref.fieldNames, cleanedSchema, row) match {
           case (days: Int, DateType) =>
             ChronoUnit.MONTHS.between(EPOCH_LOCAL_DATE, DateTimeUtils.daysToLocalDate(days))
           case (micros: Long, TimestampType) =>
@@ -140,7 +141,7 @@ class InMemoryTable(
             throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)")
         }
       case DaysTransform(ref) =>
-        extractor(ref.fieldNames, schema, row) match {
+        extractor(ref.fieldNames, cleanedSchema, row) match {
           case (days, DateType) =>
             days
           case (micros: Long, TimestampType) =>
@@ -149,14 +150,14 @@ class InMemoryTable(
             throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)")
         }
       case HoursTransform(ref) =>
-        extractor(ref.fieldNames, schema, row) match {
+        extractor(ref.fieldNames, cleanedSchema, row) match {
           case (micros: Long, TimestampType) =>
             ChronoUnit.HOURS.between(Instant.EPOCH, DateTimeUtils.microsToInstant(micros))
           case (v, t) =>
             throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)")
         }
       case BucketTransform(numBuckets, ref) =>
-        val (value, dataType) = extractor(ref.fieldNames, schema, row)
+        val (value, dataType) = extractor(ref.fieldNames, cleanedSchema, row)
         val valueHashCode = if (value == null) 0 else value.hashCode
         ((valueHashCode + 31 * dataType.hashCode()) & Integer.MAX_VALUE) % numBuckets
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogV2UtilSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogV2UtilSuite.scala
index 7a9a7f52ff8fd..da5cfab8be3c7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogV2UtilSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogV2UtilSuite.scala
@@ -28,7 +28,7 @@ class CatalogV2UtilSuite extends SparkFunSuite {
     val testCatalog = mock(classOf[TableCatalog])
     val ident = mock(classOf[Identifier])
     val table = mock(classOf[Table])
-    when(table.schema()).thenReturn(mock(classOf[StructType]))
+    when(table.schema()).thenReturn(new StructType().add("i", "int"))
     when(testCatalog.loadTable(ident)).thenReturn(table)
     val r = CatalogV2Util.loadRelation(testCatalog, ident)
     assert(r.isDefined)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index c164835c753e8..b3e403ffa7382 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.catalyst.util.toPrettySQL
+import org.apache.spark.sql.catalyst.util.{toPrettySQL, CharVarcharUtils}
 import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression
 import org.apache.spark.sql.expressions.Window
 import org.apache.spark.sql.functions.lit
@@ -1181,7 +1181,9 @@ class Column(val expr: Expression) extends Logging {
    * @group expr_ops
    * @since 1.3.0
    */
-  def cast(to: DataType): Column = withExpr { Cast(expr, to) }
+  def cast(to: DataType): Column = withExpr {
+    Cast(expr, CharVarcharUtils.replaceCharVarcharWithString(to))
+  }
 
   /**
    * Casts the column to a different data type, using the canonical string representation
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 8f96f0b882424..007df183ee353 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.csv.{CSVHeaderChecker, CSVOptions, UnivocityParser}
 import org.apache.spark.sql.catalyst.expressions.ExprUtils
 import org.apache.spark.sql.catalyst.json.{CreateJacksonParser, JacksonParser, JSONOptions}
-import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, FailureSafeParser}
+import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils, FailureSafeParser}
 import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsCatalogOptions, SupportsRead}
 import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.execution.command.DDLUtils
@@ -73,7 +73,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * @since 1.4.0
    */
   def schema(schema: StructType): DataFrameReader = {
-    this.userSpecifiedSchema = Option(schema)
+    this.userSpecifiedSchema = Option(CharVarcharUtils.replaceCharVarcharWithStringInSchema(schema))
     this
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 582f11a2be8fa..53edd4fca7794 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource}
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
 import org.apache.spark.sql.internal.HiveSerDe
-import org.apache.spark.sql.types.{HIVE_TYPE_STRING, HiveStringType, MetadataBuilder, StructField, StructType}
+import org.apache.spark.sql.types.{MetadataBuilder, StructField, StructType}
 
 /**
  * Resolves catalogs from the multi-part identifiers in SQL statements, and convert the statements
@@ -51,9 +51,6 @@ class ResolveSessionCatalog(
       cols.foreach(c => failNullType(c.dataType))
       loadTable(catalog, tbl.asIdentifier).collect {
         case v1Table: V1Table =>
-          if (!DDLUtils.isHiveTable(v1Table.v1Table)) {
-            cols.foreach(c => failCharType(c.dataType))
-          }
           cols.foreach { c =>
             assertTopLevelColumn(c.name, "AlterTableAddColumnsCommand")
             if (!c.nullable) {
@@ -63,7 +60,6 @@ class ResolveSessionCatalog(
           }
           AlterTableAddColumnsCommand(tbl.asTableIdentifier, cols.map(convertToStructField))
       }.getOrElse {
-        cols.foreach(c => failCharType(c.dataType))
         val changes = cols.map { col =>
           TableChange.addColumn(
             col.name.toArray,
@@ -82,7 +78,6 @@ class ResolveSessionCatalog(
         case Some(_: V1Table) =>
           throw new AnalysisException("REPLACE COLUMNS is only supported with v2 tables.")
         case Some(table) =>
-          cols.foreach(c => failCharType(c.dataType))
           // REPLACE COLUMNS deletes all the existing columns and adds new columns specified.
           val deleteChanges = table.schema.fieldNames.map { name =>
             TableChange.deleteColumn(Array(name))
@@ -105,10 +100,6 @@ class ResolveSessionCatalog(
       a.dataType.foreach(failNullType)
       loadTable(catalog, tbl.asIdentifier).collect {
         case v1Table: V1Table =>
-          if (!DDLUtils.isHiveTable(v1Table.v1Table)) {
-            a.dataType.foreach(failCharType)
-          }
-
           if (a.column.length > 1) {
             throw new AnalysisException(
               "ALTER COLUMN with qualified column is only supported with v2 tables.")
@@ -134,19 +125,13 @@ class ResolveSessionCatalog(
                     s"Available: ${v1Table.schema.fieldNames.mkString(", ")}")
               }
           }
-          // Add Hive type string to metadata.
-          val cleanedDataType = HiveStringType.replaceCharType(dataType)
-          if (dataType != cleanedDataType) {
-            builder.putString(HIVE_TYPE_STRING, dataType.catalogString)
-          }
           val newColumn = StructField(
             colName,
-            cleanedDataType,
+            dataType,
             nullable = true,
             builder.build())
           AlterTableChangeColumnCommand(tbl.asTableIdentifier, colName, newColumn)
       }.getOrElse {
-        a.dataType.foreach(failCharType)
         val colName = a.column.toArray
         val typeChange = a.dataType.map { newDataType =>
           TableChange.updateColumnType(colName, newDataType)
@@ -271,16 +256,12 @@ class ResolveSessionCatalog(
       val (storageFormat, provider) = getStorageFormatAndProvider(
         c.provider, c.options, c.location, c.serde, ctas = false)
       if (!isV2Provider(provider)) {
-        if (!DDLUtils.isHiveTable(Some(provider))) {
-          assertNoCharTypeInSchema(c.tableSchema)
-        }
         val tableDesc = buildCatalogTable(tbl.asTableIdentifier, c.tableSchema,
           c.partitioning, c.bucketSpec, c.properties, provider, c.location,
           c.comment, storageFormat, c.external)
         val mode = if (c.ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists
         CreateTable(tableDesc, mode, None)
       } else {
-        assertNoCharTypeInSchema(c.tableSchema)
         CreateV2Table(
           catalog.asTableCatalog,
           tbl.asIdentifier,
@@ -305,7 +286,6 @@ class ResolveSessionCatalog(
         val mode = if (c.ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists
         CreateTable(tableDesc, mode, Some(c.asSelect))
       } else {
-        assertNoCharTypeInSchema(c.schema)
         CreateTableAsSelect(
           catalog.asTableCatalog,
           tbl.asIdentifier,
@@ -332,7 +312,6 @@ class ResolveSessionCatalog(
       if (!isV2Provider(provider)) {
         throw new AnalysisException("REPLACE TABLE is only supported with v2 tables.")
       } else {
-        assertNoCharTypeInSchema(c.tableSchema)
         ReplaceTable(
           catalog.asTableCatalog,
           tbl.asIdentifier,
@@ -754,17 +733,7 @@ class ResolveSessionCatalog(
   private def convertToStructField(col: QualifiedColType): StructField = {
     val builder = new MetadataBuilder
     col.comment.foreach(builder.putString("comment", _))
-
-    val cleanedDataType = HiveStringType.replaceCharType(col.dataType)
-    if (col.dataType != cleanedDataType) {
-      builder.putString(HIVE_TYPE_STRING, col.dataType.catalogString)
-    }
-
-    StructField(
-      col.name.head,
-      cleanedDataType,
-      nullable = true,
-      builder.build())
+    StructField(col.name.head, col.dataType, nullable = true, builder.build())
   }
 
   private def isV2Provider(provider: String): Boolean = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala
new file mode 100644
index 0000000000000..35bb86f178eb1
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
+import org.apache.spark.sql.catalyst.expressions.{Attribute, BinaryComparison, Expression, In, Literal, StringRPad}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.types.{CharType, StringType}
+import org.apache.spark.unsafe.types.UTF8String
+
+/**
+ * This rule applies char type padding in two places:
+ *   1. When reading values from column/field of type CHAR(N), right-pad the values to length N.
+ *   2. When comparing char type column/field with string literal or char type column/field,
+ *      right-pad the shorter one to the longer length.
+ */
+object ApplyCharTypePadding extends Rule[LogicalPlan] {
+
+  override def apply(plan: LogicalPlan): LogicalPlan = {
+    val padded = plan.resolveOperatorsUpWithNewOutput {
+      case r: LogicalRelation =>
+        val projectList = CharVarcharUtils.charTypePadding(r.output)
+        if (projectList == r.output) {
+          r -> Nil
+        } else {
+          val cleanedOutput = r.output.map(CharVarcharUtils.cleanAttrMetadata)
+          val padded = Project(projectList, r.copy(output = cleanedOutput))
+          padded -> r.output.zip(padded.output)
+        }
+
+      case r: DataSourceV2Relation =>
+        val projectList = CharVarcharUtils.charTypePadding(r.output)
+        if (projectList == r.output) {
+          r -> Nil
+        } else {
+          val cleanedOutput = r.output.map(CharVarcharUtils.cleanAttrMetadata)
+          val padded = Project(projectList, r.copy(output = cleanedOutput))
+          padded -> r.output.zip(padded.output)
+        }
+
+      case r: HiveTableRelation =>
+        val projectList = CharVarcharUtils.charTypePadding(r.output)
+        if (projectList == r.output) {
+          r -> Nil
+        } else {
+          val cleanedDataCols = r.dataCols.map(CharVarcharUtils.cleanAttrMetadata)
+          val cleanedPartCols = r.partitionCols.map(CharVarcharUtils.cleanAttrMetadata)
+          val padded = Project(projectList,
+            r.copy(dataCols = cleanedDataCols, partitionCols = cleanedPartCols))
+          padded -> r.output.zip(padded.output)
+        }
+    }
+
+    padded.resolveOperatorsUp {
+      case operator if operator.resolved => operator.transformExpressionsUp {
+        // String literal is treated as char type when it's compared to a char type column.
+        // We should pad the shorter one to the longer length.
+        case b @ BinaryComparison(attr: Attribute, lit) if lit.foldable =>
+          padAttrLitCmp(attr, lit).map { newChildren =>
+            b.withNewChildren(newChildren)
+          }.getOrElse(b)
+
+        case b @ BinaryComparison(lit, attr: Attribute) if lit.foldable =>
+          padAttrLitCmp(attr, lit).map { newChildren =>
+            b.withNewChildren(newChildren.reverse)
+          }.getOrElse(b)
+
+        case i @ In(attr: Attribute, list)
+          if attr.dataType == StringType && list.forall(_.foldable) =>
+          CharVarcharUtils.getRawType(attr.metadata).flatMap {
+            case CharType(length) =>
+              val literalCharLengths = list.map(_.eval().asInstanceOf[UTF8String].numChars())
+              val targetLen = (length +: literalCharLengths).max
+              Some(i.copy(
+                value = addPadding(attr, length, targetLen),
+                list = list.zip(literalCharLengths).map {
+                  case (lit, charLength) => addPadding(lit, charLength, targetLen)
+                }))
+            case _ => None
+          }.getOrElse(i)
+
+        // For char type column or inner field comparison, pad the shorter one to the longer length.
+        case b @ BinaryComparison(left: Attribute, right: Attribute) =>
+          b.withNewChildren(CharVarcharUtils.addPaddingInStringComparison(Seq(left, right)))
+
+        case i @ In(attr: Attribute, list) if list.forall(_.isInstanceOf[Attribute]) =>
+          val newChildren = CharVarcharUtils.addPaddingInStringComparison(
+            attr +: list.map(_.asInstanceOf[Attribute]))
+          i.copy(value = newChildren.head, list = newChildren.tail)
+      }
+    }
+  }
+
+  private def padAttrLitCmp(attr: Attribute, lit: Expression): Option[Seq[Expression]] = {
+    if (attr.dataType == StringType) {
+      CharVarcharUtils.getRawType(attr.metadata).flatMap {
+        case CharType(length) =>
+          val str = lit.eval().asInstanceOf[UTF8String]
+          val stringLitLen = str.numChars()
+          if (length < stringLitLen) {
+            Some(Seq(StringRPad(attr, Literal(stringLitLen)), lit))
+          } else if (length > stringLitLen) {
+            Some(Seq(attr, StringRPad(lit, Literal(length))))
+          } else {
+            None
+          }
+        case _ => None
+      }
+    } else {
+      None
+    }
+  }
+
+  private def addPadding(expr: Expression, charLength: Int, targetLength: Int): Expression = {
+    if (targetLength > charLength) StringRPad(expr, Literal(targetLength)) else expr
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
index 33a3486bf6f67..8c61c8cd4f52e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.catalyst.expressions.{AttributeMap, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
-import org.apache.spark.sql.catalyst.util.truncatedString
+import org.apache.spark.sql.catalyst.util.{truncatedString, CharVarcharUtils}
 import org.apache.spark.sql.sources.BaseRelation
 
 /**
@@ -69,9 +69,17 @@ case class LogicalRelation(
 }
 
 object LogicalRelation {
-  def apply(relation: BaseRelation, isStreaming: Boolean = false): LogicalRelation =
-    LogicalRelation(relation, relation.schema.toAttributes, None, isStreaming)
+  def apply(relation: BaseRelation, isStreaming: Boolean = false): LogicalRelation = {
+    // The v1 source may return schema containing char/varchar type. We replace char/varchar
+    // with "annotated" string type here as the query engine doesn't support char/varchar yet.
+    val schema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(relation.schema)
+    LogicalRelation(relation, schema.toAttributes, None, isStreaming)
+  }
 
-  def apply(relation: BaseRelation, table: CatalogTable): LogicalRelation =
-    LogicalRelation(relation, relation.schema.toAttributes, Some(table), false)
+  def apply(relation: BaseRelation, table: CatalogTable): LogicalRelation = {
+    // The v1 source may return schema containing char/varchar type. We replace char/varchar
+    // with "annotated" string type here as the query engine doesn't support char/varchar yet.
+    val schema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(relation.schema)
+    LogicalRelation(relation, schema.toAttributes, Some(table), false)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index 78f31fb80ecf6..5dd0d2bd74838 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.analysis.Resolver
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.SpecificInternalRow
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils, GenericArrayData}
+import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils, DateTimeUtils, GenericArrayData}
 import org.apache.spark.sql.connector.catalog.TableChange
 import org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProvider
 import org.apache.spark.sql.jdbc.{JdbcDialect, JdbcDialects, JdbcType}
@@ -761,17 +761,10 @@ object JdbcUtils extends Logging {
       schema: StructType,
       caseSensitive: Boolean,
       createTableColumnTypes: String): Map[String, String] = {
-    def typeName(f: StructField): String = {
-      // char/varchar gets translated to string type. Real data type specified by the user
-      // is available in the field metadata as HIVE_TYPE_STRING
-      if (f.metadata.contains(HIVE_TYPE_STRING)) {
-        f.metadata.getString(HIVE_TYPE_STRING)
-      } else {
-        f.dataType.catalogString
-      }
-    }
-
-    val userSchema = CatalystSqlParser.parseTableSchema(createTableColumnTypes)
+    val parsedSchema = CatalystSqlParser.parseTableSchema(createTableColumnTypes)
+    val userSchema = StructType(parsedSchema.map { field =>
+      field.copy(dataType = CharVarcharUtils.getRawType(field.metadata).getOrElse(field.dataType))
+    })
     val nameEquality = if (caseSensitive) {
       org.apache.spark.sql.catalyst.analysis.caseSensitiveResolution
     } else {
@@ -791,7 +784,7 @@ object JdbcUtils extends Logging {
       }
     }
 
-    val userSchemaMap = userSchema.fields.map(f => f.name -> typeName(f)).toMap
+    val userSchemaMap = userSchema.fields.map(f => f.name -> f.dataType.catalogString).toMap
     if (caseSensitive) userSchemaMap else CaseInsensitiveMap(userSchemaMap)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala
index ce8edce6f08d6..2208e930f6b08 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.datasources.v2
 import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, AttributeSet, Expression, NamedExpression, PredicateHelper, SchemaPruning}
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownFilters, SupportsPushDownRequiredColumns}
 import org.apache.spark.sql.execution.datasources.DataSourceStrategy
 import org.apache.spark.sql.internal.SQLConf
@@ -110,7 +111,8 @@ object PushDownUtils extends PredicateHelper {
       schema: StructType,
       relation: DataSourceV2Relation): Seq[AttributeReference] = {
     val nameToAttr = relation.output.map(_.name).zip(relation.output).toMap
-    schema.toAttributes.map {
+    val cleaned = CharVarcharUtils.replaceCharVarcharWithString(schema).asInstanceOf[StructType]
+    cleaned.toAttributes.map {
       // we have to keep the attribute id during transformation
       a => a.withExprId(nameToAttr(a.name).exprId)
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 538a5408723bb..a89a5de3b7e72 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -189,6 +189,7 @@ abstract class BaseSessionStateBuilder(
         PreprocessTableCreation(session) +:
         PreprocessTableInsertion +:
         DataSourceAnalysis +:
+        ApplyCharTypePadding +:
         customPostHocResolutionRules
 
     override val extendedCheckRules: Seq[LogicalPlan => Unit] =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index 7f4ef8be562fb..eb7bb5c87a990 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -26,7 +26,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
-import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils}
 import org.apache.spark.sql.connector.catalog.{SupportsRead, TableProvider}
 import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.execution.command.DDLUtils
@@ -64,7 +64,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
    * @since 2.0.0
    */
   def schema(schema: StructType): DataStreamReader = {
-    this.userSpecifiedSchema = Option(schema)
+    this.userSpecifiedSchema = Option(CharVarcharUtils.replaceCharVarcharWithStringInSchema(schema))
     this
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
new file mode 100644
index 0000000000000..abb13270d20e7
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
@@ -0,0 +1,505 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.{SparkConf, SparkException}
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
+import org.apache.spark.sql.connector.{InMemoryPartitionTableCatalog, SchemaRequiredDataSource}
+import org.apache.spark.sql.execution.datasources.LogicalRelation
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.sources.SimpleInsertSource
+import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
+import org.apache.spark.sql.types.{ArrayType, CharType, DataType, MapType, StringType, StructField, StructType}
+
+// The base trait for char/varchar tests that need to be run with different table implementations.
+trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
+
+  def format: String
+
+  def checkColType(f: StructField, dt: DataType): Unit = {
+    assert(f.dataType == CharVarcharUtils.replaceCharVarcharWithString(dt))
+    assert(CharVarcharUtils.getRawType(f.metadata) == Some(dt))
+  }
+
+  test("char type values should be padded: top-level columns") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(i STRING, c CHAR(5)) USING $format")
+      sql("INSERT INTO t VALUES ('1', 'a')")
+      checkAnswer(spark.table("t"), Row("1", "a" + " " * 4))
+      checkColType(spark.table("t").schema(1), CharType(5))
+
+      sql("INSERT OVERWRITE t VALUES ('1', null)")
+      checkAnswer(spark.table("t"), Row("1", null))
+    }
+  }
+
+  test("char type values should be padded: partitioned columns") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(i STRING, c CHAR(5)) USING $format PARTITIONED BY (c)")
+      sql("INSERT INTO t VALUES ('1', 'a')")
+      checkAnswer(spark.table("t"), Row("1", "a" + " " * 4))
+      checkColType(spark.table("t").schema(1), CharType(5))
+
+      sql("ALTER TABLE t DROP PARTITION(c='a')")
+      sql("INSERT OVERWRITE t VALUES ('1', null)")
+      checkAnswer(spark.table("t"), Row("1", null))
+    }
+  }
+
+  test("char type values should be padded: nested in struct") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(i STRING, c STRUCT<c: CHAR(5)>) USING $format")
+      sql("INSERT INTO t VALUES ('1', struct('a'))")
+      checkAnswer(spark.table("t"), Row("1", Row("a" + " " * 4)))
+      checkColType(spark.table("t").schema(1), new StructType().add("c", CharType(5)))
+
+      sql("INSERT OVERWRITE t VALUES ('1', null)")
+      checkAnswer(spark.table("t"), Row("1", null))
+      sql("INSERT OVERWRITE t VALUES ('1', struct(null))")
+      checkAnswer(spark.table("t"), Row("1", Row(null)))
+    }
+  }
+
+  test("char type values should be padded: nested in array") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(i STRING, c ARRAY<CHAR(5)>) USING $format")
+      sql("INSERT INTO t VALUES ('1', array('a', 'ab'))")
+      checkAnswer(spark.table("t"), Row("1", Seq("a" + " " * 4, "ab" + " " * 3)))
+      checkColType(spark.table("t").schema(1), ArrayType(CharType(5)))
+
+      sql("INSERT OVERWRITE t VALUES ('1', null)")
+      checkAnswer(spark.table("t"), Row("1", null))
+      sql("INSERT OVERWRITE t VALUES ('1', array(null))")
+      checkAnswer(spark.table("t"), Row("1", Seq(null)))
+    }
+  }
+
+  test("char type values should be padded: nested in map key") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(i STRING, c MAP<CHAR(5), STRING>) USING $format")
+      sql("INSERT INTO t VALUES ('1', map('a', 'ab'))")
+      checkAnswer(spark.table("t"), Row("1", Map(("a" + " " * 4, "ab"))))
+      checkColType(spark.table("t").schema(1), MapType(CharType(5), StringType))
+
+      sql("INSERT OVERWRITE t VALUES ('1', null)")
+      checkAnswer(spark.table("t"), Row("1", null))
+    }
+  }
+
+  test("char type values should be padded: nested in map value") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(i STRING, c MAP<STRING, CHAR(5)>) USING $format")
+      sql("INSERT INTO t VALUES ('1', map('a', 'ab'))")
+      checkAnswer(spark.table("t"), Row("1", Map(("a", "ab" + " " * 3))))
+      checkColType(spark.table("t").schema(1), MapType(StringType, CharType(5)))
+
+      sql("INSERT OVERWRITE t VALUES ('1', null)")
+      checkAnswer(spark.table("t"), Row("1", null))
+      sql("INSERT OVERWRITE t VALUES ('1', map('a', null))")
+      checkAnswer(spark.table("t"), Row("1", Map("a" -> null)))
+    }
+  }
+
+  test("char type values should be padded: nested in both map key and value") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(i STRING, c MAP<CHAR(5), CHAR(10)>) USING $format")
+      sql("INSERT INTO t VALUES ('1', map('a', 'ab'))")
+      checkAnswer(spark.table("t"), Row("1", Map(("a" + " " * 4, "ab" + " " * 8))))
+      checkColType(spark.table("t").schema(1), MapType(CharType(5), CharType(10)))
+
+      sql("INSERT OVERWRITE t VALUES ('1', null)")
+      checkAnswer(spark.table("t"), Row("1", null))
+    }
+  }
+
+  test("char type values should be padded: nested in struct of array") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(i STRING, c STRUCT<c: ARRAY<CHAR(5)>>) USING $format")
+      sql("INSERT INTO t VALUES ('1', struct(array('a', 'ab')))")
+      checkAnswer(spark.table("t"), Row("1", Row(Seq("a" + " " * 4, "ab" + " " * 3))))
+      checkColType(spark.table("t").schema(1),
+        new StructType().add("c", ArrayType(CharType(5))))
+
+      sql("INSERT OVERWRITE t VALUES ('1', null)")
+      checkAnswer(spark.table("t"), Row("1", null))
+      sql("INSERT OVERWRITE t VALUES ('1', struct(null))")
+      checkAnswer(spark.table("t"), Row("1", Row(null)))
+      sql("INSERT OVERWRITE t VALUES ('1', struct(array(null)))")
+      checkAnswer(spark.table("t"), Row("1", Row(Seq(null))))
+    }
+  }
+
+  test("char type values should be padded: nested in array of struct") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(i STRING, c ARRAY<STRUCT<c: CHAR(5)>>) USING $format")
+      sql("INSERT INTO t VALUES ('1', array(struct('a'), struct('ab')))")
+      checkAnswer(spark.table("t"), Row("1", Seq(Row("a" + " " * 4), Row("ab" + " " * 3))))
+      checkColType(spark.table("t").schema(1),
+        ArrayType(new StructType().add("c", CharType(5))))
+
+      sql("INSERT OVERWRITE t VALUES ('1', null)")
+      checkAnswer(spark.table("t"), Row("1", null))
+      sql("INSERT OVERWRITE t VALUES ('1', array(null))")
+      checkAnswer(spark.table("t"), Row("1", Seq(null)))
+      sql("INSERT OVERWRITE t VALUES ('1', array(struct(null)))")
+      checkAnswer(spark.table("t"), Row("1", Seq(Row(null))))
+    }
+  }
+
+  test("char type values should be padded: nested in array of array") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(i STRING, c ARRAY<ARRAY<CHAR(5)>>) USING $format")
+      sql("INSERT INTO t VALUES ('1', array(array('a', 'ab')))")
+      checkAnswer(spark.table("t"), Row("1", Seq(Seq("a" + " " * 4, "ab" + " " * 3))))
+      checkColType(spark.table("t").schema(1), ArrayType(ArrayType(CharType(5))))
+
+      sql("INSERT OVERWRITE t VALUES ('1', null)")
+      checkAnswer(spark.table("t"), Row("1", null))
+      sql("INSERT OVERWRITE t VALUES ('1', array(null))")
+      checkAnswer(spark.table("t"), Row("1", Seq(null)))
+      sql("INSERT OVERWRITE t VALUES ('1', array(array(null)))")
+      checkAnswer(spark.table("t"), Row("1", Seq(Seq(null))))
+    }
+  }
+
+  private def testTableWrite(f: String => Unit): Unit = {
+    withTable("t") { f("char") }
+    withTable("t") { f("varchar") }
+  }
+
+  test("length check for input string values: top-level columns") {
+    testTableWrite { typeName =>
+      sql(s"CREATE TABLE t(c $typeName(5)) USING $format")
+      sql("INSERT INTO t VALUES (null)")
+      checkAnswer(spark.table("t"), Row(null))
+      val e = intercept[SparkException](sql("INSERT INTO t VALUES ('123456')"))
+      assert(e.getCause.getMessage.contains(
+        s"input string '123456' exceeds $typeName type length limitation: 5"))
+    }
+  }
+
+  test("length check for input string values: partitioned columns") {
+    // DS V2 doesn't support partitioned table.
+    if (!conf.contains(SQLConf.DEFAULT_CATALOG.key)) {
+      testTableWrite { typeName =>
+        sql(s"CREATE TABLE t(i INT, c $typeName(5)) USING $format PARTITIONED BY (c)")
+        sql("INSERT INTO t VALUES (1, null)")
+        checkAnswer(spark.table("t"), Row(1, null))
+        val e = intercept[SparkException](sql("INSERT INTO t VALUES (1, '123456')"))
+        assert(e.getCause.getMessage.contains(
+          s"input string '123456' exceeds $typeName type length limitation: 5"))
+      }
+    }
+  }
+
+  test("length check for input string values: nested in struct") {
+    testTableWrite { typeName =>
+      sql(s"CREATE TABLE t(c STRUCT<c: $typeName(5)>) USING $format")
+      sql("INSERT INTO t SELECT struct(null)")
+      checkAnswer(spark.table("t"), Row(Row(null)))
+      val e = intercept[SparkException](sql("INSERT INTO t SELECT struct('123456')"))
+      assert(e.getCause.getMessage.contains(
+        s"input string '123456' exceeds $typeName type length limitation: 5"))
+    }
+  }
+
+  test("length check for input string values: nested in array") {
+    testTableWrite { typeName =>
+      sql(s"CREATE TABLE t(c ARRAY<$typeName(5)>) USING $format")
+      sql("INSERT INTO t VALUES (array(null))")
+      checkAnswer(spark.table("t"), Row(Seq(null)))
+      val e = intercept[SparkException](sql("INSERT INTO t VALUES (array('a', '123456'))"))
+      assert(e.getCause.getMessage.contains(
+        s"input string '123456' exceeds $typeName type length limitation: 5"))
+    }
+  }
+
+  test("length check for input string values: nested in map key") {
+    testTableWrite { typeName =>
+      sql(s"CREATE TABLE t(c MAP<$typeName(5), STRING>) USING $format")
+      val e = intercept[SparkException](sql("INSERT INTO t VALUES (map('123456', 'a'))"))
+      assert(e.getCause.getMessage.contains(
+        s"input string '123456' exceeds $typeName type length limitation: 5"))
+    }
+  }
+
+  test("length check for input string values: nested in map value") {
+    testTableWrite { typeName =>
+      sql(s"CREATE TABLE t(c MAP<STRING, $typeName(5)>) USING $format")
+      sql("INSERT INTO t VALUES (map('a', null))")
+      checkAnswer(spark.table("t"), Row(Map("a" -> null)))
+      val e = intercept[SparkException](sql("INSERT INTO t VALUES (map('a', '123456'))"))
+      assert(e.getCause.getMessage.contains(
+        s"input string '123456' exceeds $typeName type length limitation: 5"))
+    }
+  }
+
+  test("length check for input string values: nested in both map key and value") {
+    testTableWrite { typeName =>
+      sql(s"CREATE TABLE t(c MAP<$typeName(5), $typeName(5)>) USING $format")
+      val e1 = intercept[SparkException](sql("INSERT INTO t VALUES (map('123456', 'a'))"))
+      assert(e1.getCause.getMessage.contains(
+        s"input string '123456' exceeds $typeName type length limitation: 5"))
+      val e2 = intercept[SparkException](sql("INSERT INTO t VALUES (map('a', '123456'))"))
+      assert(e2.getCause.getMessage.contains(
+        s"input string '123456' exceeds $typeName type length limitation: 5"))
+    }
+  }
+
+  test("length check for input string values: nested in struct of array") {
+    testTableWrite { typeName =>
+      sql(s"CREATE TABLE t(c STRUCT<c: ARRAY<$typeName(5)>>) USING $format")
+      sql("INSERT INTO t SELECT struct(array(null))")
+      checkAnswer(spark.table("t"), Row(Row(Seq(null))))
+      val e = intercept[SparkException](sql("INSERT INTO t SELECT struct(array('123456'))"))
+      assert(e.getCause.getMessage.contains(
+        s"input string '123456' exceeds $typeName type length limitation: 5"))
+    }
+  }
+
+  test("length check for input string values: nested in array of struct") {
+    testTableWrite { typeName =>
+      sql(s"CREATE TABLE t(c ARRAY<STRUCT<c: $typeName(5)>>) USING $format")
+      sql("INSERT INTO t VALUES (array(struct(null)))")
+      checkAnswer(spark.table("t"), Row(Seq(Row(null))))
+      val e = intercept[SparkException](sql("INSERT INTO t VALUES (array(struct('123456')))"))
+      assert(e.getCause.getMessage.contains(
+        s"input string '123456' exceeds $typeName type length limitation: 5"))
+    }
+  }
+
+  test("length check for input string values: nested in array of array") {
+    testTableWrite { typeName =>
+      sql(s"CREATE TABLE t(c ARRAY<ARRAY<$typeName(5)>>) USING $format")
+      sql("INSERT INTO t VALUES (array(array(null)))")
+      checkAnswer(spark.table("t"), Row(Seq(Seq(null))))
+      val e = intercept[SparkException](sql("INSERT INTO t VALUES (array(array('123456')))"))
+      assert(e.getCause.getMessage.contains(
+        s"input string '123456' exceeds $typeName type length limitation: 5"))
+    }
+  }
+
+  test("length check for input string values: with trailing spaces") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(c1 CHAR(5), c2 VARCHAR(5)) USING $format")
+      sql("INSERT INTO t VALUES ('12 ', '12 ')")
+      sql("INSERT INTO t VALUES ('1234  ', '1234  ')")
+      checkAnswer(spark.table("t"), Seq(
+        Row("12" + " " * 3, "12 "),
+        Row("1234 ", "1234 ")))
+    }
+  }
+
+  test("length check for input string values: with implicit cast") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(c1 CHAR(5), c2 VARCHAR(5)) USING $format")
+      sql("INSERT INTO t VALUES (1234, 1234)")
+      checkAnswer(spark.table("t"), Row("1234 ", "1234"))
+      val e1 = intercept[SparkException](sql("INSERT INTO t VALUES (123456, 1)"))
+      assert(e1.getCause.getMessage.contains(
+        "input string '123456' exceeds char type length limitation: 5"))
+      val e2 = intercept[SparkException](sql("INSERT INTO t VALUES (1, 123456)"))
+      assert(e2.getCause.getMessage.contains(
+        "input string '123456' exceeds varchar type length limitation: 5"))
+    }
+  }
+
+  private def testConditions(df: DataFrame, conditions: Seq[(String, Boolean)]): Unit = {
+    checkAnswer(df.selectExpr(conditions.map(_._1): _*), Row.fromSeq(conditions.map(_._2)))
+  }
+
+  test("char type comparison: top-level columns") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(c1 CHAR(2), c2 CHAR(5)) USING $format")
+      sql("INSERT INTO t VALUES ('a', 'a')")
+      testConditions(spark.table("t"), Seq(
+        ("c1 = 'a'", true),
+        ("'a' = c1", true),
+        ("c1 = 'a  '", true),
+        ("c1 > 'a'", false),
+        ("c1 IN ('a', 'b')", true),
+        ("c1 = c2", true),
+        ("c1 < c2", false),
+        ("c1 IN (c2)", true)))
+    }
+  }
+
+  test("char type comparison: partitioned columns") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(i INT, c1 CHAR(2), c2 CHAR(5)) USING $format PARTITIONED BY (c1, c2)")
+      sql("INSERT INTO t VALUES (1, 'a', 'a')")
+      testConditions(spark.table("t"), Seq(
+        ("c1 = 'a'", true),
+        ("'a' = c1", true),
+        ("c1 = 'a  '", true),
+        ("c1 > 'a'", false),
+        ("c1 IN ('a', 'b')", true),
+        ("c1 = c2", true),
+        ("c1 < c2", false),
+        ("c1 IN (c2)", true)))
+    }
+  }
+
+  test("char type comparison: join") {
+    withTable("t1", "t2") {
+      sql(s"CREATE TABLE t1(c CHAR(2)) USING $format")
+      sql(s"CREATE TABLE t2(c CHAR(5)) USING $format")
+      sql("INSERT INTO t1 VALUES ('a')")
+      sql("INSERT INTO t2 VALUES ('a')")
+      checkAnswer(sql("SELECT t1.c FROM t1 JOIN t2 ON t1.c = t2.c"), Row("a "))
+    }
+  }
+
+  test("char type comparison: nested in struct") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(c1 STRUCT<c: CHAR(2)>, c2 STRUCT<c: CHAR(5)>) USING $format")
+      sql("INSERT INTO t VALUES (struct('a'), struct('a'))")
+      testConditions(spark.table("t"), Seq(
+        ("c1 = c2", true),
+        ("c1 < c2", false),
+        ("c1 IN (c2)", true)))
+    }
+  }
+
+  test("char type comparison: nested in array") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(c1 ARRAY<CHAR(2)>, c2 ARRAY<CHAR(5)>) USING $format")
+      sql("INSERT INTO t VALUES (array('a', 'b'), array('a', 'b'))")
+      testConditions(spark.table("t"), Seq(
+        ("c1 = c2", true),
+        ("c1 < c2", false),
+        ("c1 IN (c2)", true)))
+    }
+  }
+
+  test("char type comparison: nested in struct of array") {
+    withTable("t") {
+      sql("CREATE TABLE t(c1 STRUCT<a: ARRAY<CHAR(2)>>, c2 STRUCT<a: ARRAY<CHAR(5)>>) " +
+        s"USING $format")
+      sql("INSERT INTO t VALUES (struct(array('a', 'b')), struct(array('a', 'b')))")
+      testConditions(spark.table("t"), Seq(
+        ("c1 = c2", true),
+        ("c1 < c2", false),
+        ("c1 IN (c2)", true)))
+    }
+  }
+
+  test("char type comparison: nested in array of struct") {
+    withTable("t") {
+      sql("CREATE TABLE t(c1 ARRAY<STRUCT<c: CHAR(2)>>, c2 ARRAY<STRUCT<c: CHAR(5)>>) " +
+        s"USING $format")
+      sql("INSERT INTO t VALUES (array(struct('a')), array(struct('a')))")
+      testConditions(spark.table("t"), Seq(
+        ("c1 = c2", true),
+        ("c1 < c2", false),
+        ("c1 IN (c2)", true)))
+    }
+  }
+
+  test("char type comparison: nested in array of array") {
+    withTable("t") {
+      sql("CREATE TABLE t(c1 ARRAY<ARRAY<CHAR(2)>>, c2 ARRAY<ARRAY<CHAR(5)>>) " +
+        s"USING $format")
+      sql("INSERT INTO t VALUES (array(array('a')), array(array('a')))")
+      testConditions(spark.table("t"), Seq(
+        ("c1 = c2", true),
+        ("c1 < c2", false),
+        ("c1 IN (c2)", true)))
+    }
+  }
+}
+
+// Some basic char/varchar tests which doesn't rely on table implementation.
+class BasicCharVarcharTestSuite extends QueryTest with SharedSparkSession {
+  import testImplicits._
+
+  test("user-specified schema in cast") {
+    def assertNoCharType(df: DataFrame): Unit = {
+      checkAnswer(df, Row("0"))
+      assert(df.schema.map(_.dataType) == Seq(StringType))
+    }
+
+    assertNoCharType(spark.range(1).select($"id".cast("char(5)")))
+    assertNoCharType(spark.range(1).select($"id".cast(CharType(5))))
+    assertNoCharType(spark.range(1).selectExpr("CAST(id AS CHAR(5))"))
+    assertNoCharType(sql("SELECT CAST(id AS CHAR(5)) FROM range(1)"))
+  }
+
+  test("user-specified schema in functions") {
+    val df = sql("""SELECT from_json('{"a": "str"}', 'a CHAR(5)')""")
+    checkAnswer(df, Row(Row("str")))
+    val schema = df.schema.head.dataType.asInstanceOf[StructType]
+    assert(schema.map(_.dataType) == Seq(StringType))
+  }
+
+  test("user-specified schema in DataFrameReader: file source from Dataset") {
+    val ds = spark.range(10).map(_.toString)
+    val df1 = spark.read.schema(new StructType().add("id", CharType(5))).csv(ds)
+    assert(df1.schema.map(_.dataType) == Seq(StringType))
+    val df2 = spark.read.schema("id char(5)").csv(ds)
+    assert(df2.schema.map(_.dataType) == Seq(StringType))
+  }
+
+  test("user-specified schema in DataFrameReader: DSV1") {
+    def checkSchema(df: DataFrame): Unit = {
+      val relations = df.queryExecution.analyzed.collect {
+        case l: LogicalRelation => l.relation
+      }
+      assert(relations.length == 1)
+      assert(relations.head.schema.map(_.dataType) == Seq(StringType))
+    }
+
+    checkSchema(spark.read.schema(new StructType().add("id", CharType(5)))
+      .format(classOf[SimpleInsertSource].getName).load())
+    checkSchema(spark.read.schema("id char(5)")
+      .format(classOf[SimpleInsertSource].getName).load())
+  }
+
+  test("user-specified schema in DataFrameReader: DSV2") {
+    def checkSchema(df: DataFrame): Unit = {
+      val tables = df.queryExecution.analyzed.collect {
+        case d: DataSourceV2Relation => d.table
+      }
+      assert(tables.length == 1)
+      assert(tables.head.schema.map(_.dataType) == Seq(StringType))
+    }
+
+    checkSchema(spark.read.schema(new StructType().add("id", CharType(5)))
+      .format(classOf[SchemaRequiredDataSource].getName).load())
+    checkSchema(spark.read.schema("id char(5)")
+      .format(classOf[SchemaRequiredDataSource].getName).load())
+  }
+}
+
+class FileSourceCharVarcharTestSuite extends CharVarcharTestSuite with SharedSparkSession {
+  override def format: String = "parquet"
+  override protected def sparkConf: SparkConf = {
+    super.sparkConf.set(SQLConf.USE_V1_SOURCE_LIST, "parquet")
+  }
+}
+
+class DSV2CharVarcharTestSuite extends CharVarcharTestSuite
+  with SharedSparkSession {
+  override def format: String = "foo"
+  protected override def sparkConf = {
+    super.sparkConf
+      .set("spark.sql.catalog.testcat", classOf[InMemoryPartitionTableCatalog].getName)
+      .set(SQLConf.DEFAULT_CATALOG.key, "testcat")
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index 9710fca6bc82c..20cad721d3d0e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -38,7 +38,7 @@ import org.apache.spark.sql.execution.datasources.CreateTable
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
 import org.apache.spark.sql.sources.SimpleScanSource
-import org.apache.spark.sql.types.{CharType, DoubleType, HIVE_TYPE_STRING, IntegerType, LongType, MetadataBuilder, StringType, StructField, StructType}
+import org.apache.spark.sql.types.{CharType, DoubleType, IntegerType, LongType, StringType, StructField, StructType}
 
 class PlanResolutionSuite extends AnalysisTest {
   import CatalystSqlParser._
@@ -1090,9 +1090,7 @@ class PlanResolutionSuite extends AnalysisTest {
     }
 
     val sql = s"ALTER TABLE v1HiveTable ALTER COLUMN i TYPE char(1)"
-    val builder = new MetadataBuilder
-    builder.putString(HIVE_TYPE_STRING, CharType(1).catalogString)
-    val newColumnWithCleanedType = StructField("i", StringType, true, builder.build())
+    val newColumnWithCleanedType = StructField("i", CharType(1), true)
     val expected = AlterTableChangeColumnCommand(
       TableIdentifier("v1HiveTable", Some("default")), "i", newColumnWithCleanedType)
     val parsed = parseAndResolve(sql)
@@ -1533,44 +1531,6 @@ class PlanResolutionSuite extends AnalysisTest {
     }
   }
 
-  test("SPARK-31147: forbid CHAR type in non-Hive tables") {
-    def checkFailure(t: String, provider: String): Unit = {
-      val types = Seq(
-        "CHAR(2)",
-        "ARRAY<CHAR(2)>",
-        "MAP<INT, CHAR(2)>",
-        "MAP<CHAR(2), INT>",
-        "STRUCT<s: CHAR(2)>")
-      types.foreach { tpe =>
-        intercept[AnalysisException] {
-          parseAndResolve(s"CREATE TABLE $t(col $tpe) USING $provider")
-        }
-        intercept[AnalysisException] {
-          parseAndResolve(s"REPLACE TABLE $t(col $tpe) USING $provider")
-        }
-        intercept[AnalysisException] {
-          parseAndResolve(s"CREATE OR REPLACE TABLE $t(col $tpe) USING $provider")
-        }
-        intercept[AnalysisException] {
-          parseAndResolve(s"ALTER TABLE $t ADD COLUMN col $tpe")
-        }
-        intercept[AnalysisException] {
-          parseAndResolve(s"ALTER TABLE $t ADD COLUMN col $tpe")
-        }
-        intercept[AnalysisException] {
-          parseAndResolve(s"ALTER TABLE $t ALTER COLUMN col TYPE $tpe")
-        }
-        intercept[AnalysisException] {
-          parseAndResolve(s"ALTER TABLE $t REPLACE COLUMNS (col $tpe)")
-        }
-      }
-    }
-
-    checkFailure("v1Table", v1Format)
-    checkFailure("v2Table", v2Format)
-    checkFailure("testcat.tab", "foo")
-  }
-
   private def compareNormalized(plan1: LogicalPlan, plan2: LogicalPlan): Unit = {
     /**
      * Normalizes plans:
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
index 9a95bf770772e..ca3e714665818 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
@@ -22,6 +22,7 @@ import java.sql.{Date, Timestamp}
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
@@ -127,7 +128,7 @@ class TableScanSuite extends DataSourceTest with SharedSparkSession {
       Date.valueOf("1970-01-01"),
       new Timestamp(20000 + i),
       s"varchar_$i",
-      s"char_$i",
+      s"char_$i".padTo(18, ' '),
       Seq(i, i + 1),
       Seq(Map(s"str_$i" -> Row(i.toLong))),
       Map(i -> i.toString),
@@ -206,10 +207,6 @@ class TableScanSuite extends DataSourceTest with SharedSparkSession {
     (2 to 10).map(i => Row(i, i - 1)).toSeq)
 
   test("Schema and all fields") {
-    def hiveMetadata(dt: String): Metadata = {
-      new MetadataBuilder().putString(HIVE_TYPE_STRING, dt).build()
-    }
-
     val expectedSchema = StructType(
       StructField("string$%Field", StringType, true) ::
       StructField("binaryField", BinaryType, true) ::
@@ -224,8 +221,8 @@ class TableScanSuite extends DataSourceTest with SharedSparkSession {
       StructField("decimalField2", DecimalType(9, 2), true) ::
       StructField("dateField", DateType, true) ::
       StructField("timestampField", TimestampType, true) ::
-      StructField("varcharField", StringType, true, hiveMetadata("varchar(12)")) ::
-      StructField("charField", StringType, true, hiveMetadata("char(18)")) ::
+      StructField("varcharField", VarcharType(12), true) ::
+      StructField("charField", CharType(18), true) ::
       StructField("arrayFieldSimple", ArrayType(IntegerType), true) ::
       StructField("arrayFieldComplex",
         ArrayType(
@@ -248,7 +245,8 @@ class TableScanSuite extends DataSourceTest with SharedSparkSession {
       Nil
     )
 
-    assert(expectedSchema == spark.table("tableWithSchema").schema)
+    assert(CharVarcharUtils.replaceCharVarcharWithStringInSchema(expectedSchema) ==
+      spark.table("tableWithSchema").schema)
 
     withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "false") {
         checkAnswer(
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index b30492802495f..da37b61688951 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -90,6 +90,7 @@ class HiveSessionStateBuilder(
         PreprocessTableCreation(session) +:
         PreprocessTableInsertion +:
         DataSourceAnalysis +:
+        ApplyCharTypePadding +:
         HiveAnalysis +:
         customPostHocResolutionRules
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index b2f0867114bae..bada131c8ba6d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -978,19 +978,14 @@ private[hive] class HiveClientImpl(
 private[hive] object HiveClientImpl extends Logging {
   /** Converts the native StructField to Hive's FieldSchema. */
   def toHiveColumn(c: StructField): FieldSchema = {
-    val typeString = if (c.metadata.contains(HIVE_TYPE_STRING)) {
-      c.metadata.getString(HIVE_TYPE_STRING)
-    } else {
-      // replace NullType to HiveVoidType since Hive parse void not null.
-      HiveVoidType.replaceVoidType(c.dataType).catalogString
-    }
+    val typeString = HiveVoidType.replaceVoidType(c.dataType).catalogString
     new FieldSchema(c.name, typeString, c.getComment().orNull)
   }
 
   /** Get the Spark SQL native DataType from Hive's FieldSchema. */
   private def getSparkSQLDataType(hc: FieldSchema): DataType = {
     try {
-      CatalystSqlParser.parseDataType(hc.getType)
+      CatalystSqlParser.parseRawDataType(hc.getType)
     } catch {
       case e: ParseException =>
         throw new SparkException(
@@ -1001,18 +996,10 @@ private[hive] object HiveClientImpl extends Logging {
   /** Builds the native StructField from Hive's FieldSchema. */
   def fromHiveColumn(hc: FieldSchema): StructField = {
     val columnType = getSparkSQLDataType(hc)
-    val replacedVoidType = HiveVoidType.replaceVoidType(columnType)
-    val metadata = if (hc.getType != replacedVoidType.catalogString) {
-      new MetadataBuilder().putString(HIVE_TYPE_STRING, hc.getType).build()
-    } else {
-      Metadata.empty
-    }
-
     val field = StructField(
       name = hc.getName,
       dataType = columnType,
-      nullable = true,
-      metadata = metadata)
+      nullable = true)
     Option(hc.getComment).map(field.withComment).getOrElse(field)
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala
new file mode 100644
index 0000000000000..55d305fda4f96
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+
+class HiveCharVarcharTestSuite extends CharVarcharTestSuite with TestHiveSingleton {
+
+  // The default Hive serde doesn't support nested null values.
+  override def format: String = "hive OPTIONS(fileFormat='parquet')"
+
+  private var originalPartitionMode = ""
+
+  override protected def beforeAll(): Unit = {
+    super.beforeAll()
+    originalPartitionMode = spark.conf.get("hive.exec.dynamic.partition.mode", "")
+    spark.conf.set("hive.exec.dynamic.partition.mode", "nonstrict")
+  }
+
+  override protected def afterAll(): Unit = {
+    if (originalPartitionMode == "") {
+      spark.conf.unset("hive.exec.dynamic.partition.mode")
+    } else {
+      spark.conf.set("hive.exec.dynamic.partition.mode", originalPartitionMode)
+    }
+    super.afterAll()
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
index 8f71ba3337aa2..1a6f6843d3911 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
@@ -113,24 +113,19 @@ class HiveMetastoreCatalogSuite extends TestHiveSingleton with SQLTestUtils {
         .add("c9", "date")
         .add("c10", "timestamp")
         .add("c11", "string")
-        .add("c12", "string", true,
-          new MetadataBuilder().putString(HIVE_TYPE_STRING, "char(10)").build())
-        .add("c13", "string", true,
-          new MetadataBuilder().putString(HIVE_TYPE_STRING, "varchar(10)").build())
+        .add("c12", CharType(10), true)
+        .add("c13", VarcharType(10), true)
         .add("c14", "binary")
         .add("c15", "decimal")
         .add("c16", "decimal(10)")
         .add("c17", "decimal(10,2)")
         .add("c18", "array<string>")
         .add("c19", "array<int>")
-        .add("c20", "array<string>", true,
-          new MetadataBuilder().putString(HIVE_TYPE_STRING, "array<char(10)>").build())
+        .add("c20", ArrayType(CharType(10)), true)
         .add("c21", "map<int,int>")
-        .add("c22", "map<int,string>", true,
-          new MetadataBuilder().putString(HIVE_TYPE_STRING, "map<int,char(10)>").build())
+        .add("c22", MapType(IntegerType, CharType(10)), true)
         .add("c23", "struct<a:int,b:int>")
-        .add("c24", "struct<c:string,d:int>", true,
-          new MetadataBuilder().putString(HIVE_TYPE_STRING, "struct<c:varchar(10),d:int>").build())
+        .add("c24", new StructType().add("c", VarcharType(10)).add("d", "int"), true)
       assert(schema == expectedSchema)
     }
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index b8b1da4cb9db7..2dfb8bb552594 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -2251,8 +2251,8 @@ class HiveDDLSuite
         )
 
         sql("ALTER TABLE tab ADD COLUMNS (c5 char(10))")
-        assert(spark.table("tab").schema.find(_.name == "c5")
-          .get.metadata.getString("HIVE_TYPE_STRING") == "char(10)")
+        assert(spark.sharedState.externalCatalog.getTable("default", "tab")
+          .schema.find(_.name == "c5").get.dataType == CharType(10))
       }
     }
   }

From 6e5446e61f278e9afac342e8f33905f5630aa7d5 Mon Sep 17 00:00:00 2001
From: Pascal Gillet <pascal.gillet@stack-labs.com>
Date: Mon, 30 Nov 2020 19:31:42 +0900
Subject: [PATCH 0608/1009] [SPARK-33579][UI] Fix executor blank page behind
 proxy

### What changes were proposed in this pull request?

Fix some "hardcoded" API urls in Web UI.
More specifically, we avoid the use of `location.origin` when constructing URLs for internal API calls within the JavaScript.
Instead, we use `apiRoot` global variable.

### Why are the changes needed?

On one hand, it allows us to build relative URLs. On the other hand, `apiRoot` reflects the Spark property `spark.ui.proxyBase` which can be set to change the root path of the Web UI.

If `spark.ui.proxyBase` is actually set, original URLs become incorrect, and we end up with an executors blank page.
I encounter this bug when accessing the Web UI behind a proxy (in my case a Kubernetes Ingress).

See the following link for more context:
https://github.com/jupyterhub/jupyter-server-proxy/issues/57#issuecomment-699163115

### Does this PR introduce _any_ user-facing change?

Yes, as all the changes introduced are in the JavaScript for the Web UI.

### How the changes have been tested ?
I modified/debugged the JavaScript as in the commit with the help of the developer tools in Google Chrome, while accessing the Web UI of my Spark app behind my k8s ingress.

Closes #30523 from pgillet/fix-executors-blank-page-behind-proxy.

Authored-by: Pascal Gillet <pascal.gillet@stack-labs.com>
Signed-off-by: Kousuke Saruta <sarutak@oss.nttdata.com>
---
 .../main/resources/org/apache/spark/ui/static/stagepage.js  | 2 +-
 core/src/main/resources/org/apache/spark/ui/static/utils.js | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
index ee1115868f69b..2877aa819ab9e 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
@@ -70,7 +70,7 @@ function stageEndPoint(appId) {
             return newBaseURI + "/api/v1/applications/" + appId + "/" + appAttemptId + "/stages/" + stageId;
         }
     }
-    return location.origin + "/api/v1/applications/" + appId + "/stages/" + stageId;
+    return uiRoot + "/api/v1/applications/" + appId + "/stages/" + stageId;
 }
 
 function getColumnNameForTaskMetricSummary(columnKey) {
diff --git a/core/src/main/resources/org/apache/spark/ui/static/utils.js b/core/src/main/resources/org/apache/spark/ui/static/utils.js
index 7e6dd678e2641..f4914f000e705 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/utils.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/utils.js
@@ -105,7 +105,7 @@ function getStandAloneAppId(cb) {
   }
   // Looks like Web UI is running in standalone mode
   // Let's get application-id using REST End Point
-  $.getJSON(location.origin + "/api/v1/applications", function(response, status, jqXHR) {
+  $.getJSON(uiRoot + "/api/v1/applications", function(response, status, jqXHR) {
     if (response && response.length > 0) {
       var appId = response[0].id;
       cb(appId);
@@ -152,7 +152,7 @@ function createTemplateURI(appId, templateName) {
     var baseURI = words.slice(0, ind).join('/') + '/static/' + templateName + '-template.html';
     return baseURI;
   }
-  return location.origin + "/static/" + templateName + "-template.html";
+  return uiRoot + "/static/" + templateName + "-template.html";
 }
 
 function setDataTableDefaults() {
@@ -193,5 +193,5 @@ function createRESTEndPointForExecutorsPage(appId) {
             return newBaseURI + "/api/v1/applications/" + appId + "/" + attemptId + "/allexecutors";
         }
     }
-    return location.origin + "/api/v1/applications/" + appId + "/allexecutors";
+    return uiRoot + "/api/v1/applications/" + appId + "/allexecutors";
 }

From 0a612b6a40696ed8ce00997ebb4e76d05adbbd82 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 30 Nov 2020 13:45:53 +0000
Subject: [PATCH 0609/1009] [SPARK-33452][SQL] Support v2 SHOW PARTITIONS

### What changes were proposed in this pull request?
1. Remove V2 logical node `ShowPartitionsStatement `, and replace it by V2 `ShowPartitions`.
2. Implement V2 execution node `ShowPartitionsExec` similar to V1 `ShowPartitionsCommand`.

### Why are the changes needed?
To have feature parity with Datasource V1.

### Does this PR introduce _any_ user-facing change?
Yes.

Before the change, `SHOW PARTITIONS` fails in V2 table catalogs with the exception:
```
org.apache.spark.sql.AnalysisException: SHOW PARTITIONS is only supported with v1 tables.
   at org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog.org$apache$spark$sql$catalyst$analysis$ResolveSessionCatalog$$parseV1Table(ResolveSessionCatalog.scala:628)
   at org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog$$anonfun$apply$1.applyOrElse(ResolveSessionCatalog.scala:466)
```

### How was this patch tested?
By running the following test suites:
1. Modified `ShowPartitionsParserSuite` where `ShowPartitionsStatement` is replaced by V2 `ShowPartitions`.
2. `v2.ShowPartitionsSuite`

Closes #30398 from MaxGekk/show-partitions-exec-v2.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |   2 +-
 .../sql/catalyst/analysis/CheckAnalysis.scala |  14 ++
 .../analysis/ResolvePartitionSpec.scala       |  31 +++--
 .../catalyst/analysis/v2ResolutionPlans.scala |   3 +-
 .../sql/catalyst/parser/AstBuilder.scala      |   9 +-
 .../catalyst/plans/logical/statements.scala   |   7 -
 .../catalyst/plans/logical/v2Commands.scala   |  15 +++
 .../analysis/ResolveSessionCatalog.scala      |   9 +-
 .../v2/AlterTableAddPartitionExec.scala       |   8 +-
 .../v2/AlterTableDropPartitionExec.scala      |   2 +-
 .../datasources/v2/DataSourceV2Strategy.scala |  11 +-
 .../datasources/v2/ShowPartitionsExec.scala   |  65 ++++++++++
 .../sql/connector/DataSourceV2SQLSuite.scala  |   1 -
 .../command/ShowPartitionsParserSuite.scala   |  23 ++--
 .../command/ShowPartitionsSuiteBase.scala     | 120 +++++++++++++++++-
 .../command/v1/ShowPartitionsSuite.scala      | 110 +++-------------
 .../command/v2/ShowPartitionsSuite.scala      |  38 +++---
 .../hive/PartitionedTablePerfStatsSuite.scala |   4 +-
 18 files changed, 309 insertions(+), 163 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 23a1b7bdde93c..abd38f2f9d940 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1085,7 +1085,7 @@ class Analyzer(override val catalogManager: CatalogManager)
         lookupTableOrView(identifier).map {
           case v: ResolvedView =>
             val viewStr = if (v.isTemp) "temp view" else "view"
-            u.failAnalysis(s"${v.identifier.quoted} is a $viewStr. '$cmd' expects a table.'")
+            u.failAnalysis(s"${v.identifier.quoted} is a $viewStr. '$cmd' expects a table.")
           case table => table
         }.getOrElse(u)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 2818ba58075cd..61ac6346ff944 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -577,6 +577,8 @@ trait CheckAnalysis extends PredicateHelper {
           case AlterTableDropPartition(ResolvedTable(_, _, table), parts, _, _, _) =>
             checkAlterTablePartition(table, parts)
 
+          case showPartitions: ShowPartitions => checkShowPartitions(showPartitions)
+
           case _ => // Fallbacks to the following checks
         }
 
@@ -1009,4 +1011,16 @@ trait CheckAnalysis extends PredicateHelper {
       case _ =>
     }
   }
+
+  // Make sure that the `SHOW PARTITIONS` command is allowed for the table
+  private def checkShowPartitions(showPartitions: ShowPartitions): Unit = showPartitions match {
+    case ShowPartitions(rt: ResolvedTable, _)
+        if !rt.table.isInstanceOf[SupportsPartitionManagement] =>
+      failAnalysis(s"SHOW PARTITIONS cannot run for a table which does not support partitioning")
+    case ShowPartitions(ResolvedTable(_, _, partTable: SupportsPartitionManagement), _)
+        if partTable.partitionSchema().isEmpty =>
+      failAnalysis(
+        s"SHOW PARTITIONS is not allowed on a table that is not partitioned: ${partTable.name()}")
+    case _ =>
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
index 98c6872a47cc6..38991a9e24fa8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Cast, Literal}
-import org.apache.spark.sql.catalyst.plans.logical.{AlterTableAddPartition, AlterTableDropPartition, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{AlterTableAddPartition, AlterTableDropPartition, LogicalPlan, ShowPartitions}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.connector.catalog.SupportsPartitionManagement
@@ -40,6 +40,12 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] {
     case r @ AlterTableDropPartition(
         ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs, _, _, _) =>
       r.copy(parts = resolvePartitionSpecs(table.name, partSpecs, table.partitionSchema()))
+
+    case r @ ShowPartitions(ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs) =>
+      r.copy(pattern = resolvePartitionSpecs(
+        table.name,
+        partSpecs.toSeq,
+        table.partitionSchema()).headOption)
   }
 
   private def resolvePartitionSpecs(
@@ -48,25 +54,26 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] {
       partSchema: StructType): Seq[ResolvedPartitionSpec] =
     partSpecs.map {
       case unresolvedPartSpec: UnresolvedPartitionSpec =>
+        val normalizedSpec = normalizePartitionSpec(
+          unresolvedPartSpec.spec,
+          partSchema.map(_.name),
+          tableName,
+          conf.resolver)
+        val partitionNames = normalizedSpec.keySet
+        val requestedFields = partSchema.filter(field => partitionNames.contains(field.name))
         ResolvedPartitionSpec(
-          convertToPartIdent(tableName, unresolvedPartSpec.spec, partSchema),
+          requestedFields.map(_.name),
+          convertToPartIdent(normalizedSpec, requestedFields),
           unresolvedPartSpec.location)
       case resolvedPartitionSpec: ResolvedPartitionSpec =>
         resolvedPartitionSpec
     }
 
   private def convertToPartIdent(
-      tableName: String,
       partitionSpec: TablePartitionSpec,
-      partSchema: StructType): InternalRow = {
-    val normalizedSpec = normalizePartitionSpec(
-      partitionSpec,
-      partSchema.map(_.name),
-      tableName,
-      conf.resolver)
-
-    val partValues = partSchema.map { part =>
-      val raw = normalizedSpec.get(part.name).orNull
+      schema: Seq[StructField]): InternalRow = {
+    val partValues = schema.map { part =>
+      val raw = partitionSpec.get(part.name).orNull
       val dt = CharVarcharUtils.replaceCharVarcharWithString(part.dataType)
       Cast(Literal.create(raw, StringType), dt, Some(conf.sessionLocalTimeZone)).eval()
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
index 95fc4f47dec7f..1518f064d78db 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
@@ -89,7 +89,8 @@ case class ResolvedTable(catalog: TableCatalog, identifier: Identifier, table: T
 }
 
 case class ResolvedPartitionSpec(
-    spec: InternalRow,
+    names: Seq[String],
+    ident: InternalRow,
     location: Option[String] = None) extends PartitionSpec
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index ce95ea4b41def..ff8b56f0b724b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3611,9 +3611,12 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    * }}}
    */
   override def visitShowPartitions(ctx: ShowPartitionsContext): LogicalPlan = withOrigin(ctx) {
-    val table = visitMultipartIdentifier(ctx.multipartIdentifier)
-    val partitionKeys = Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec)
-    ShowPartitionsStatement(table, partitionKeys)
+    val partitionKeys = Option(ctx.partitionSpec).map { specCtx =>
+      UnresolvedPartitionSpec(visitNonOptionalPartitionSpec(specCtx), None)
+    }
+    ShowPartitions(
+      UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier()), "SHOW PARTITIONS"),
+      partitionKeys)
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
index effb4cff75930..1763547792e35 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
@@ -419,13 +419,6 @@ case class TruncateTableStatement(
     tableName: Seq[String],
     partitionSpec: Option[TablePartitionSpec]) extends ParsedStatement
 
-/**
- * A SHOW PARTITIONS statement, as parsed from SQL
- */
-case class ShowPartitionsStatement(
-    tableName: Seq[String],
-    partitionSpec: Option[TablePartitionSpec]) extends ParsedStatement
-
 /**
  * A SHOW CURRENT NAMESPACE statement, as parsed from SQL
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index 4931f0eb2c007..67056470418fe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -691,3 +691,18 @@ case class TruncateTable(
   override def children: Seq[LogicalPlan] = child :: Nil
 }
 
+
+/**
+ * The logical plan of the SHOW PARTITIONS command.
+ */
+case class ShowPartitions(
+    child: LogicalPlan,
+    pattern: Option[PartitionSpec]) extends Command {
+  override def children: Seq[LogicalPlan] = child :: Nil
+
+  override lazy val resolved: Boolean =
+    childrenResolved && pattern.forall(_.isInstanceOf[ResolvedPartitionSpec])
+
+  override val output: Seq[Attribute] = Seq(
+    AttributeReference("partition", StringType, nullable = false)())
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 53edd4fca7794..f6005f4b413a2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -430,11 +430,12 @@ class ResolveSessionCatalog(
         ident.asTableIdentifier,
         partitionSpec)
 
-    case ShowPartitionsStatement(tbl, partitionSpec) =>
-      val v1TableName = parseV1Table(tbl, "SHOW PARTITIONS")
+    case ShowPartitions(
+        ResolvedV1TableOrViewIdentifier(ident),
+        pattern @ (None | Some(UnresolvedPartitionSpec(_, _)))) =>
       ShowPartitionsCommand(
-        v1TableName.asTableIdentifier,
-        partitionSpec)
+        ident.asTableIdentifier,
+        pattern.map(_.asInstanceOf[UnresolvedPartitionSpec].spec))
 
     case ShowColumns(ResolvedV1TableOrViewIdentifier(ident), ns) =>
       val v1TableName = ident.asTableIdentifier
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableAddPartitionExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableAddPartitionExec.scala
index 0171cdd9ca41a..d7fe25cff2064 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableAddPartitionExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableAddPartitionExec.scala
@@ -37,20 +37,20 @@ case class AlterTableAddPartitionExec(
 
   override protected def run(): Seq[InternalRow] = {
     val (existsParts, notExistsParts) =
-      partSpecs.partition(p => table.partitionExists(p.spec))
+      partSpecs.partition(p => table.partitionExists(p.ident))
 
     if (existsParts.nonEmpty && !ignoreIfExists) {
       throw new PartitionsAlreadyExistException(
-        table.name(), existsParts.map(_.spec), table.partitionSchema())
+        table.name(), existsParts.map(_.ident), table.partitionSchema())
     }
 
     notExistsParts match {
       case Seq() => // Nothing will be done
       case Seq(partitionSpec) =>
         val partProp = partitionSpec.location.map(loc => "location" -> loc).toMap
-        table.createPartition(partitionSpec.spec, partProp.asJava)
+        table.createPartition(partitionSpec.ident, partProp.asJava)
       case _ if table.isInstanceOf[SupportsAtomicPartitionManagement] =>
-        val partIdents = notExistsParts.map(_.spec)
+        val partIdents = notExistsParts.map(_.ident)
         val partProps = notExistsParts.map(_.location.map(loc => "location" -> loc).toMap)
         table.asAtomicPartitionable
           .createPartitions(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala
index 09a65804a05eb..c7a68ecb2bbee 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala
@@ -35,7 +35,7 @@ case class AlterTableDropPartitionExec(
 
   override protected def run(): Seq[InternalRow] = {
     val (existsPartIdents, notExistsPartIdents) =
-      partSpecs.map(_.spec).partition(table.partitionExists)
+      partSpecs.map(_.ident).partition(table.partitionExists)
 
     if (notExistsPartIdents.nonEmpty && !ignoreIfNotExists) {
       throw new NoSuchPartitionsException(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 1fae8d937e90c..0c7bc19ad054e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources.v2
 import scala.collection.JavaConverters._
 
 import org.apache.spark.sql.{AnalysisException, SparkSession, Strategy}
-import org.apache.spark.sql.catalyst.analysis.{ResolvedNamespace, ResolvedTable}
+import org.apache.spark.sql.catalyst.analysis.{ResolvedNamespace, ResolvedPartitionSpec, ResolvedTable}
 import org.apache.spark.sql.catalyst.expressions.{And, Expression, NamedExpression, PredicateHelper, SubqueryExpression}
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -318,6 +318,15 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
     case ShowColumns(_: ResolvedTable, _) =>
       throw new AnalysisException("SHOW COLUMNS is not supported for v2 tables.")
 
+    case r @ ShowPartitions(
+        ResolvedTable(catalog, _, table: SupportsPartitionManagement),
+        pattern @ (None | Some(_: ResolvedPartitionSpec))) =>
+      ShowPartitionsExec(
+        r.output,
+        catalog,
+        table,
+        pattern.map(_.asInstanceOf[ResolvedPartitionSpec])) :: Nil
+
     case _ => Nil
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala
new file mode 100644
index 0000000000000..44d6f4495f552
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.ResolvedPartitionSpec
+import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.escapePathName
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Literal}
+import org.apache.spark.sql.connector.catalog.{SupportsPartitionManagement, TableCatalog}
+import org.apache.spark.sql.execution.LeafExecNode
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.StringType
+import org.apache.spark.unsafe.types.UTF8String
+
+/**
+ * Physical plan node for showing partitions.
+ */
+case class ShowPartitionsExec(
+    output: Seq[Attribute],
+    catalog: TableCatalog,
+    table: SupportsPartitionManagement,
+    partitionSpec: Option[ResolvedPartitionSpec]) extends V2CommandExec with LeafExecNode {
+  override protected def run(): Seq[InternalRow] = {
+    val (names, ident) = partitionSpec
+      .map(spec => (spec.names, spec.ident))
+      // listPartitionByNames() should return all partitions if the partition spec
+      // does not specify any partition names.
+      .getOrElse((Seq.empty[String], InternalRow.empty))
+    val partitionIdentifiers = table.listPartitionByNames(names.toArray, ident)
+    // Converting partition identifiers as `InternalRow` of partition values,
+    // for instance InternalRow(value0, value1, ..., valueN), to `InternalRow`s
+    // with a string in the format: "col0=value0/col1=value1/.../colN=valueN".
+    val schema = table.partitionSchema()
+    val len = schema.length
+    val partitions = new Array[String](len)
+    val timeZoneId = SQLConf.get.sessionLocalTimeZone
+    partitionIdentifiers.map { row =>
+      var i = 0
+      while (i < len) {
+        val dataType = schema(i).dataType
+        val partValue = row.get(i, dataType)
+        val partValueStr = Cast(Literal(partValue, dataType), StringType, Some(timeZoneId))
+          .eval().toString
+        partitions(i) = escapePathName(schema(i).name) + "=" + escapePathName(partValueStr)
+        i += 1
+      }
+      InternalRow(UTF8String.fromString(partitions.mkString("/")))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index ffbc2287d81ad..583bc694dc3be 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -2284,7 +2284,6 @@ class DataSourceV2SQLSuite
         verify(s"CACHE TABLE $t")
         verify(s"UNCACHE TABLE $t")
         verify(s"TRUNCATE TABLE $t")
-        verify(s"SHOW PARTITIONS $t")
         verify(s"SHOW COLUMNS FROM $t")
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsParserSuite.scala
index bc75528b9644c..7b5cf8af4eead 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsParserSuite.scala
@@ -17,25 +17,30 @@
 
 package org.apache.spark.sql.execution.command
 
-import org.apache.spark.sql.catalyst.analysis.AnalysisTest
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedPartitionSpec, UnresolvedTable}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
 import org.apache.spark.sql.catalyst.parser.ParseException
-import org.apache.spark.sql.catalyst.plans.logical.ShowPartitionsStatement
+import org.apache.spark.sql.catalyst.plans.logical.ShowPartitions
 import org.apache.spark.sql.execution.SparkSqlParser
 import org.apache.spark.sql.test.SharedSparkSession
 
 class ShowPartitionsParserSuite extends AnalysisTest with SharedSparkSession {
   test("SHOW PARTITIONS") {
+    val commandName = "SHOW PARTITIONS"
     Seq(
-      "SHOW PARTITIONS t1" -> ShowPartitionsStatement(Seq("t1"), None),
-      "SHOW PARTITIONS db1.t1" -> ShowPartitionsStatement(Seq("db1", "t1"), None),
+      "SHOW PARTITIONS t1" -> ShowPartitions(UnresolvedTable(Seq("t1"), commandName), None),
+      "SHOW PARTITIONS db1.t1" -> ShowPartitions(
+        UnresolvedTable(Seq("db1", "t1"), commandName), None),
       "SHOW PARTITIONS t1 PARTITION(partcol1='partvalue', partcol2='partvalue')" ->
-        ShowPartitionsStatement(
-          Seq("t1"),
-          Some(Map("partcol1" -> "partvalue", "partcol2" -> "partvalue"))),
-      "SHOW PARTITIONS a.b.c" -> ShowPartitionsStatement(Seq("a", "b", "c"), None),
+        ShowPartitions(
+          UnresolvedTable(Seq("t1"), commandName),
+          Some(UnresolvedPartitionSpec(Map("partcol1" -> "partvalue", "partcol2" -> "partvalue")))),
+      "SHOW PARTITIONS a.b.c" -> ShowPartitions(
+        UnresolvedTable(Seq("a", "b", "c"), commandName), None),
       "SHOW PARTITIONS a.b.c PARTITION(ds='2017-06-10')" ->
-        ShowPartitionsStatement(Seq("a", "b", "c"), Some(Map("ds" -> "2017-06-10")))
+        ShowPartitions(
+          UnresolvedTable(Seq("a", "b", "c"), commandName),
+          Some(UnresolvedPartitionSpec(Map("ds" -> "2017-06-10"))))
     ).foreach { case (sql, expected) =>
       val parsed = parsePlan(sql)
       comparePlans(parsed, expected)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
index 413e170326eea..82457f96a3003 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
@@ -20,17 +20,133 @@ package org.apache.spark.sql.execution.command
 import org.scalactic.source.Position
 import org.scalatest.Tag
 
-import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.test.SQLTestUtils
+import org.apache.spark.sql.types.{StringType, StructType}
 
 trait ShowPartitionsSuiteBase extends QueryTest with SQLTestUtils {
   protected def version: String
   protected def catalog: String
-  protected def defaultNamespace: Seq[String]
   protected def defaultUsing: String
+  protected def wrongPartitionColumnsError(columns: String*): String
+  // Gets the schema of `SHOW PARTITIONS`
+  private val showSchema: StructType = new StructType().add("partition", StringType, false)
+  protected def runShowPartitionsSql(sqlText: String, expected: Seq[Row]): Unit = {
+    val df = spark.sql(sqlText)
+    assert(df.schema === showSchema)
+    checkAnswer(df, expected)
+  }
 
   override def test(testName: String, testTags: Tag*)(testFun: => Any)
       (implicit pos: Position): Unit = {
     super.test(s"SHOW PARTITIONS $version: " + testName, testTags: _*)(testFun)
   }
+
+  protected def createDateTable(table: String): Unit = {
+    sql(s"""
+      |CREATE TABLE $table (price int, qty int, year int, month int)
+      |$defaultUsing
+      |partitioned by (year, month)""".stripMargin)
+    sql(s"INSERT INTO $table PARTITION(year = 2015, month = 1) SELECT 1, 1")
+    sql(s"INSERT INTO $table PARTITION(year = 2015, month = 2) SELECT 2, 2")
+    sql(s"ALTER TABLE $table ADD PARTITION(year = 2016, month = 2)")
+    sql(s"ALTER TABLE $table ADD PARTITION(year = 2016, month = 3)")
+  }
+
+  protected def createWideTable(table: String): Unit = {
+    sql(s"""
+      |CREATE TABLE $table (
+      |  price int, qty int,
+      |  year int, month int, hour int, minute int, sec int, extra int)
+      |$defaultUsing
+      |PARTITIONED BY (year, month, hour, minute, sec, extra)
+      |""".stripMargin)
+    sql(s"""
+      |INSERT INTO $table
+      |PARTITION(year = 2016, month = 3, hour = 10, minute = 10, sec = 10, extra = 1) SELECT 3, 3
+      |""".stripMargin)
+    sql(s"""
+      |ALTER TABLE $table
+      |ADD PARTITION(year = 2016, month = 4, hour = 10, minute = 10, sec = 10, extra = 1)
+      |""".stripMargin)
+  }
+
+  test("show partitions of non-partitioned table") {
+    withNamespace(s"$catalog.ns") {
+      sql(s"CREATE NAMESPACE $catalog.ns")
+      val table = s"$catalog.ns.not_partitioned_table"
+      withTable(table) {
+        sql(s"CREATE TABLE $table (col1 int) $defaultUsing")
+        val errMsg = intercept[AnalysisException] {
+          sql(s"SHOW PARTITIONS $table")
+        }.getMessage
+        assert(errMsg.contains("not allowed on a table that is not partitioned"))
+      }
+    }
+  }
+
+  test("non-partitioning columns") {
+    withNamespace(s"$catalog.ns") {
+      sql(s"CREATE NAMESPACE $catalog.ns")
+      val table = s"$catalog.ns.dateTable"
+      withTable(table) {
+        createDateTable(table)
+        val errMsg = intercept[AnalysisException] {
+          sql(s"SHOW PARTITIONS $table PARTITION(abcd=2015, xyz=1)")
+        }.getMessage
+        assert(errMsg.contains(wrongPartitionColumnsError("abcd", "xyz")))
+      }
+    }
+  }
+
+  test("show everything") {
+    withNamespace(s"$catalog.ns") {
+      sql(s"CREATE NAMESPACE $catalog.ns")
+      val table = s"$catalog.ns.dateTable"
+      withTable(table) {
+        createDateTable(table)
+        runShowPartitionsSql(
+          s"show partitions $table",
+          Row("year=2015/month=1") ::
+          Row("year=2015/month=2") ::
+          Row("year=2016/month=2") ::
+          Row("year=2016/month=3") :: Nil)
+      }
+    }
+  }
+
+  test("filter by partitions") {
+    withNamespace(s"$catalog.ns") {
+      sql(s"CREATE NAMESPACE $catalog.ns")
+      val table = s"$catalog.ns.dateTable"
+      withTable(table) {
+        createDateTable(table)
+        runShowPartitionsSql(
+          s"show partitions $table PARTITION(year=2015)",
+          Row("year=2015/month=1") ::
+          Row("year=2015/month=2") :: Nil)
+        runShowPartitionsSql(
+          s"show partitions $table PARTITION(year=2015, month=1)",
+          Row("year=2015/month=1") :: Nil)
+        runShowPartitionsSql(
+          s"show partitions $table PARTITION(month=2)",
+          Row("year=2015/month=2") ::
+          Row("year=2016/month=2") :: Nil)
+      }
+    }
+  }
+
+  test("show everything more than 5 part keys") {
+    withNamespace(s"$catalog.ns") {
+      sql(s"CREATE NAMESPACE $catalog.ns")
+      val table = s"$catalog.ns.wideTable"
+      withTable(table) {
+        createWideTable(table)
+        runShowPartitionsSql(
+          s"show partitions $table",
+          Row("year=2016/month=3/hour=10/minute=10/sec=10/extra=1") ::
+          Row("year=2016/month=4/hour=10/minute=10/sec=10/extra=1") :: Nil)
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
index bcc71e9b7241c..2b2bc9e63dc82 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.execution.command.v1
 
 import org.apache.spark.sql.{AnalysisException, Row, SaveMode}
-import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.execution.command
 import org.apache.spark.sql.test.SharedSparkSession
@@ -26,104 +25,27 @@ import org.apache.spark.sql.test.SharedSparkSession
 trait ShowPartitionsSuiteBase extends command.ShowPartitionsSuiteBase {
   override def version: String = "V1"
   override def catalog: String = CatalogManager.SESSION_CATALOG_NAME
-  override def defaultNamespace: Seq[String] = Seq("default")
   override def defaultUsing: String = "USING parquet"
 
-  private def createDateTable(table: String): Unit = {
-    sql(s"""
-      |CREATE TABLE $table (price int, qty int, year int, month int)
-      |$defaultUsing
-      |partitioned by (year, month)""".stripMargin)
-    sql(s"INSERT INTO $table PARTITION(year = 2015, month = 1) SELECT 1, 1")
-    sql(s"INSERT INTO $table PARTITION(year = 2015, month = 2) SELECT 2, 2")
-    sql(s"INSERT INTO $table PARTITION(year = 2016, month = 2) SELECT 3, 3")
-    sql(s"INSERT INTO $table PARTITION(year = 2016, month = 3) SELECT 3, 3")
+  override protected def wrongPartitionColumnsError(columns: String*): String = {
+    s"Non-partitioning column(s) ${columns.mkString("[", ", ", "]")} are specified"
   }
 
-  test("show everything") {
+  test("show everything in the default database") {
     val table = "dateTable"
     withTable(table) {
       createDateTable(table)
-      checkAnswer(
-        sql(s"show partitions $table"),
+      runShowPartitionsSql(
+        s"show partitions default.$table",
         Row("year=2015/month=1") ::
-          Row("year=2015/month=2") ::
-          Row("year=2016/month=2") ::
-          Row("year=2016/month=3") :: Nil)
-
-      checkAnswer(
-        sql(s"show partitions default.$table"),
-        Row("year=2015/month=1") ::
-          Row("year=2015/month=2") ::
-          Row("year=2016/month=2") ::
-          Row("year=2016/month=3") :: Nil)
-    }
-  }
-
-  test("filter by partitions") {
-    val table = "dateTable"
-    withTable(table) {
-      createDateTable(table)
-      checkAnswer(
-        sql(s"show partitions default.$table PARTITION(year=2015)"),
-        Row("year=2015/month=1") ::
-          Row("year=2015/month=2") :: Nil)
-      checkAnswer(
-        sql(s"show partitions default.$table PARTITION(year=2015, month=1)"),
-        Row("year=2015/month=1") :: Nil)
-      checkAnswer(
-        sql(s"show partitions default.$table PARTITION(month=2)"),
         Row("year=2015/month=2") ::
-          Row("year=2016/month=2") :: Nil)
-    }
-  }
-
-  test("show everything more than 5 part keys") {
-    val table = "wideTable"
-    withTable(table) {
-      sql(s"""
-        |CREATE TABLE $table (
-        |  price int, qty int,
-        |  year int, month int, hour int, minute int, sec int, extra int)
-        |$defaultUsing
-        |PARTITIONED BY (year, month, hour, minute, sec, extra)""".stripMargin)
-      sql(s"""
-        |INSERT INTO $table
-        |PARTITION(year = 2016, month = 3, hour = 10, minute = 10, sec = 10, extra = 1) SELECT 3, 3
-      """.stripMargin)
-      sql(s"""
-        |INSERT INTO $table
-        |PARTITION(year = 2016, month = 4, hour = 10, minute = 10, sec = 10, extra = 1) SELECT 3, 3
-      """.stripMargin)
-      checkAnswer(
-        sql(s"show partitions $table"),
-        Row("year=2016/month=3/hour=10/minute=10/sec=10/extra=1") ::
-          Row("year=2016/month=4/hour=10/minute=10/sec=10/extra=1") :: Nil)
-    }
-  }
-
-  test("non-partitioning columns") {
-    val table = "dateTable"
-    withTable(table) {
-      createDateTable(table)
-      val errMsg = intercept[AnalysisException] {
-        sql(s"SHOW PARTITIONS $table PARTITION(abcd=2015, xyz=1)")
-      }.getMessage
-      assert(errMsg.contains("Non-partitioning column(s) [abcd, xyz] are specified"))
-    }
-  }
-
-  test("show partitions of non-partitioned table") {
-    val table = "not_partitioned_table"
-    withTable(table) {
-      sql(s"CREATE TABLE $table (col1 int) $defaultUsing")
-      val errMsg = intercept[AnalysisException] {
-        sql(s"SHOW PARTITIONS $table")
-      }.getMessage
-      assert(errMsg.contains("not allowed on a table that is not partitioned"))
+        Row("year=2016/month=2") ::
+        Row("year=2016/month=3") :: Nil)
     }
   }
 
+  // The test fails for V2 Table Catalogs with the exception:
+  // org.apache.spark.sql.AnalysisException: CREATE VIEW is only supported with v1 tables.
   test("show partitions of a view") {
     val table = "dateTable"
     withTable(table) {
@@ -134,7 +56,7 @@ trait ShowPartitionsSuiteBase extends command.ShowPartitionsSuiteBase {
         val errMsg = intercept[AnalysisException] {
           sql(s"SHOW PARTITIONS $view")
         }.getMessage
-        assert(errMsg.contains("is not allowed on a view"))
+        assert(errMsg.contains("'SHOW PARTITIONS' expects a table"))
       }
     }
   }
@@ -143,10 +65,10 @@ trait ShowPartitionsSuiteBase extends command.ShowPartitionsSuiteBase {
     val viewName = "test_view"
     withTempView(viewName) {
       spark.range(10).createTempView(viewName)
-      val errMsg = intercept[NoSuchTableException] {
+      val errMsg = intercept[AnalysisException] {
         sql(s"SHOW PARTITIONS $viewName")
       }.getMessage
-      assert(errMsg.contains(s"Table or view '$viewName' not found"))
+      assert(errMsg.contains("'SHOW PARTITIONS' expects a table"))
     }
   }
 }
@@ -159,12 +81,12 @@ class ShowPartitionsSuite extends ShowPartitionsSuiteBase with SharedSparkSessio
     val viewName = "test_view"
     withTempView(viewName) {
       sql(s"""
-             |CREATE TEMPORARY VIEW $viewName (c1 INT, c2 STRING)
-             |$defaultUsing""".stripMargin)
-      val errMsg = intercept[NoSuchTableException] {
+        |CREATE TEMPORARY VIEW $viewName (c1 INT, c2 STRING)
+        |$defaultUsing""".stripMargin)
+      val errMsg = intercept[AnalysisException] {
         sql(s"SHOW PARTITIONS $viewName")
       }.getMessage
-      assert(errMsg.contains(s"Table or view '$viewName' not found"))
+      assert(errMsg.contains("'SHOW PARTITIONS' expects a table"))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
index 8a63cd49e89e9..ca47a713ad604 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
@@ -19,38 +19,34 @@ package org.apache.spark.sql.execution.command.v2
 
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.connector.InMemoryTableCatalog
+import org.apache.spark.sql.connector.{InMemoryPartitionTableCatalog, InMemoryTableCatalog}
 import org.apache.spark.sql.execution.command
 import org.apache.spark.sql.test.SharedSparkSession
 
 class ShowPartitionsSuite extends command.ShowPartitionsSuiteBase with SharedSparkSession {
   override def version: String = "V2"
   override def catalog: String = "test_catalog"
-  override def defaultNamespace: Seq[String] = Nil
   override def defaultUsing: String = "USING _"
 
   override def sparkConf: SparkConf = super.sparkConf
-    .set(s"spark.sql.catalog.$catalog", classOf[InMemoryTableCatalog].getName)
+    .set(s"spark.sql.catalog.$catalog", classOf[InMemoryPartitionTableCatalog].getName)
+    .set(s"spark.sql.catalog.non_part_$catalog", classOf[InMemoryTableCatalog].getName)
 
-  // TODO(SPARK-33452): Create a V2 SHOW PARTITIONS execution node
-  test("not supported SHOW PARTITIONS") {
-    def testV1Command(sqlCommand: String, sqlParams: String): Unit = {
-      val e = intercept[AnalysisException] {
-        sql(s"$sqlCommand $sqlParams")
-      }
-      assert(e.message.contains(s"$sqlCommand is only supported with v1 tables"))
-    }
-    val t = s"$catalog.ns1.ns2.tbl"
-    withTable(t) {
-      sql(
-        s"""
-           |CREATE TABLE $t (id bigint, data string)
-           |$defaultUsing
-           |PARTITIONED BY (id)
-         """.stripMargin)
+  override protected def wrongPartitionColumnsError(columns: String*): String = {
+    s"${columns.head} is not a valid partition column"
+  }
 
-      testV1Command("SHOW PARTITIONS", t)
-      testV1Command("SHOW PARTITIONS", s"$t PARTITION(id='1')")
+  test("a table does not support partitioning") {
+    val table = s"non_part_$catalog.tab1"
+    withTable(table) {
+      sql(s"""
+        |CREATE TABLE $table (price int, qty int, year int, month int)
+        |$defaultUsing""".stripMargin)
+      val errMsg = intercept[AnalysisException] {
+        sql(s"SHOW PARTITIONS $table")
+      }.getMessage
+      assert(errMsg.contains(
+        "SHOW PARTITIONS cannot run for a table which does not support partitioning"))
     }
   }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
index 3af163af0968c..49e26614e13c4 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionedTablePerfStatsSuite.scala
@@ -300,7 +300,7 @@ class PartitionedTablePerfStatsSuite
 
           HiveCatalogMetrics.reset()
           assert(spark.sql("show partitions test").count() == 100)
-          assert(HiveCatalogMetrics.METRIC_HIVE_CLIENT_CALLS.getCount() < 10)
+          assert(HiveCatalogMetrics.METRIC_HIVE_CLIENT_CALLS.getCount() <= 10)
         }
       }
     }
@@ -323,7 +323,7 @@ class PartitionedTablePerfStatsSuite
 
           HiveCatalogMetrics.reset()
           assert(spark.sql("show partitions test").count() == 100)
-          assert(HiveCatalogMetrics.METRIC_HIVE_CLIENT_CALLS.getCount() < 10)
+          assert(HiveCatalogMetrics.METRIC_HIVE_CLIENT_CALLS.getCount() <= 10)
         }
       }
     }

From 6fd148fea890391941f876e0a14446d875fe72e1 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 30 Nov 2020 14:05:49 +0000
Subject: [PATCH 0610/1009] [SPARK-33569][SQL] Remove getting partitions by an
 identifier prefix

### What changes were proposed in this pull request?
1. Remove the method `listPartitionIdentifiers()` from the `SupportsPartitionManagement` interface. The method lists partitions by ident prefix.
2. Rename `listPartitionByNames()` to `listPartitionIdentifiers()`.
3. Re-implement the default method `partitionExists()` using new method.

### Why are the changes needed?
Getting partitions by ident prefix only is not used, and it can be removed to improve code maintenance. Also this makes the `SupportsPartitionManagement` interface cleaner.

### Does this PR introduce _any_ user-facing change?
Should not.

### How was this patch tested?
By running the affected test suites:
```
$ build/sbt "test:testOnly org.apache.spark.sql.connector.catalog.*"
```

Closes #30514 from MaxGekk/remove-listPartitionIdentifiers.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalog/SupportsPartitionManagement.java  | 15 ++----
 .../connector/InMemoryPartitionTable.scala    | 10 +---
 ...pportsAtomicPartitionManagementSuite.scala | 28 ++++++-----
 .../SupportsPartitionManagementSuite.scala    | 48 ++++++++++---------
 .../AlterTablePartitionV2SQLSuite.scala       |  6 ++-
 5 files changed, 52 insertions(+), 55 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java
index 380717d2e0e9b..9d898f2f477e1 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.connector.catalog;
 
+import java.util.Arrays;
 import java.util.Map;
 
 import org.apache.spark.annotation.Experimental;
@@ -79,7 +80,9 @@ void createPartition(
      * @return true if the partition exists, false otherwise
      */
     default boolean partitionExists(InternalRow ident) {
-        return listPartitionIdentifiers(ident).length > 0;
+        String[] partitionNames = partitionSchema().names();
+        String[] requiredNames = Arrays.copyOfRange(partitionNames, 0, ident.numFields());
+        return listPartitionIdentifiers(requiredNames, ident).length > 0;
     }
 
     /**
@@ -105,14 +108,6 @@ void replacePartitionMetadata(
     Map<String, String> loadPartitionMetadata(InternalRow ident)
         throws UnsupportedOperationException;
 
-    /**
-     * List the identifiers of all partitions that have the ident prefix in a table.
-     *
-     * @param ident a prefix of partition identifier
-     * @return an array of Identifiers for the partitions
-     */
-    InternalRow[] listPartitionIdentifiers(InternalRow ident);
-
     /**
      * List the identifiers of all partitions that match to the ident by names.
      *
@@ -120,5 +115,5 @@ Map<String, String> loadPartitionMetadata(InternalRow ident)
      * @param ident a partition identifier values.
      * @return an array of Identifiers for the partitions
      */
-    InternalRow[] listPartitionByNames(String[] names, InternalRow ident);
+    InternalRow[] listPartitionIdentifiers(String[] names, InternalRow ident);
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala
index ba762a58b1e52..6a8432e635310 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala
@@ -83,14 +83,6 @@ class InMemoryPartitionTable(
     }
   }
 
-  def listPartitionIdentifiers(ident: InternalRow): Array[InternalRow] = {
-    val prefixPartCols =
-      new StructType(partitionSchema.dropRight(partitionSchema.length - ident.numFields).toArray)
-    val prefixPart = ident.toSeq(prefixPartCols)
-    memoryTablePartitions.keySet().asScala
-      .filter(_.toSeq(partitionSchema).startsWith(prefixPart)).toArray
-  }
-
   override def partitionExists(ident: InternalRow): Boolean =
     memoryTablePartitions.containsKey(ident)
 
@@ -98,7 +90,7 @@ class InMemoryPartitionTable(
     memoryTablePartitions.put(InternalRow.fromSeq(key), Map.empty[String, String].asJava)
   }
 
-  override def listPartitionByNames(
+  override def listPartitionIdentifiers(
       names: Array[String],
       ident: InternalRow): Array[InternalRow] = {
     assert(names.length == ident.numFields,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagementSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagementSuite.scala
index 6f7c30653110b..ad2631650b7ef 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagementSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagementSuite.scala
@@ -47,34 +47,38 @@ class SupportsAtomicPartitionManagementSuite extends SparkFunSuite {
     newCatalog
   }
 
+  private def hasPartitions(table: SupportsPartitionManagement): Boolean = {
+    !table.listPartitionIdentifiers(Array.empty, InternalRow.empty).isEmpty
+  }
+
   test("createPartitions") {
     val table = catalog.loadTable(ident)
     val partTable = new InMemoryAtomicPartitionTable(
       table.name(), table.schema(), table.partitioning(), table.properties())
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty)
+    assert(!hasPartitions(partTable))
 
     val partIdents = Array(InternalRow.apply("3"), InternalRow.apply("4"))
     partTable.createPartitions(
       partIdents,
       Array(new util.HashMap[String, String](), new util.HashMap[String, String]()))
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).nonEmpty)
+    assert(hasPartitions(partTable))
     assert(partTable.partitionExists(InternalRow.apply("3")))
     assert(partTable.partitionExists(InternalRow.apply("4")))
 
     partTable.dropPartition(InternalRow.apply("3"))
     partTable.dropPartition(InternalRow.apply("4"))
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty)
+    assert(!hasPartitions(partTable))
   }
 
   test("createPartitions failed if partition already exists") {
     val table = catalog.loadTable(ident)
     val partTable = new InMemoryAtomicPartitionTable(
       table.name(), table.schema(), table.partitioning(), table.properties())
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty)
+    assert(!hasPartitions(partTable))
 
     val partIdent = InternalRow.apply("4")
     partTable.createPartition(partIdent, new util.HashMap[String, String]())
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).nonEmpty)
+    assert(hasPartitions(partTable))
     assert(partTable.partitionExists(partIdent))
 
     val partIdents = Array(InternalRow.apply("3"), InternalRow.apply("4"))
@@ -85,42 +89,42 @@ class SupportsAtomicPartitionManagementSuite extends SparkFunSuite {
     assert(!partTable.partitionExists(InternalRow.apply("3")))
 
     partTable.dropPartition(partIdent)
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty)
+    assert(!hasPartitions(partTable))
   }
 
   test("dropPartitions") {
     val table = catalog.loadTable(ident)
     val partTable = new InMemoryAtomicPartitionTable(
       table.name(), table.schema(), table.partitioning(), table.properties())
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty)
+    assert(!hasPartitions(partTable))
 
     val partIdents = Array(InternalRow.apply("3"), InternalRow.apply("4"))
     partTable.createPartitions(
       partIdents,
       Array(new util.HashMap[String, String](), new util.HashMap[String, String]()))
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).nonEmpty)
+    assert(hasPartitions(partTable))
     assert(partTable.partitionExists(InternalRow.apply("3")))
     assert(partTable.partitionExists(InternalRow.apply("4")))
 
     partTable.dropPartitions(partIdents)
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty)
+    assert(!hasPartitions(partTable))
   }
 
   test("dropPartitions failed if partition not exists") {
     val table = catalog.loadTable(ident)
     val partTable = new InMemoryAtomicPartitionTable(
       table.name(), table.schema(), table.partitioning(), table.properties())
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty)
+    assert(!hasPartitions(partTable))
 
     val partIdent = InternalRow.apply("4")
     partTable.createPartition(partIdent, new util.HashMap[String, String]())
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).length == 1)
+    assert(partTable.listPartitionIdentifiers(Array.empty, InternalRow.empty).length == 1)
 
     val partIdents = Array(InternalRow.apply("3"), InternalRow.apply("4"))
     assert(!partTable.dropPartitions(partIdents))
     assert(partTable.partitionExists(partIdent))
 
     partTable.dropPartition(partIdent)
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty)
+    assert(!hasPartitions(partTable))
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala
index caf7e91612563..9de0fe6108c99 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala
@@ -48,97 +48,101 @@ class SupportsPartitionManagementSuite extends SparkFunSuite {
     newCatalog
   }
 
+  private def hasPartitions(table: SupportsPartitionManagement): Boolean = {
+    !table.listPartitionIdentifiers(Array.empty, InternalRow.empty).isEmpty
+  }
+
   test("createPartition") {
     val table = catalog.loadTable(ident)
     val partTable = new InMemoryPartitionTable(
       table.name(), table.schema(), table.partitioning(), table.properties())
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty)
+    assert(!hasPartitions(partTable))
 
     val partIdent = InternalRow.apply("3")
     partTable.createPartition(partIdent, new util.HashMap[String, String]())
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).nonEmpty)
+    assert(hasPartitions(partTable))
     assert(partTable.partitionExists(partIdent))
 
     partTable.dropPartition(partIdent)
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty)
+    assert(!hasPartitions(partTable))
   }
 
   test("dropPartition") {
     val table = catalog.loadTable(ident)
     val partTable = new InMemoryPartitionTable(
       table.name(), table.schema(), table.partitioning(), table.properties())
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty)
+    assert(!hasPartitions(partTable))
 
     val partIdent = InternalRow.apply("3")
     val partIdent1 = InternalRow.apply("4")
     partTable.createPartition(partIdent, new util.HashMap[String, String]())
     partTable.createPartition(partIdent1, new util.HashMap[String, String]())
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).length == 2)
+    assert(partTable.listPartitionIdentifiers(Array.empty, InternalRow.empty).length == 2)
 
     partTable.dropPartition(partIdent)
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).length == 1)
+    assert(partTable.listPartitionIdentifiers(Array.empty, InternalRow.empty).length == 1)
     partTable.dropPartition(partIdent1)
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty)
+    assert(!hasPartitions(partTable))
   }
 
   test("replacePartitionMetadata") {
     val table = catalog.loadTable(ident)
     val partTable = new InMemoryPartitionTable(
       table.name(), table.schema(), table.partitioning(), table.properties())
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty)
+    assert(!hasPartitions(partTable))
 
     val partIdent = InternalRow.apply("3")
     partTable.createPartition(partIdent, new util.HashMap[String, String]())
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).nonEmpty)
+    assert(hasPartitions(partTable))
     assert(partTable.partitionExists(partIdent))
     assert(partTable.loadPartitionMetadata(partIdent).isEmpty)
 
     partTable.replacePartitionMetadata(partIdent, Map("paramKey" -> "paramValue").asJava)
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).nonEmpty)
+    assert(hasPartitions(partTable))
     assert(partTable.partitionExists(partIdent))
     assert(!partTable.loadPartitionMetadata(partIdent).isEmpty)
     assert(partTable.loadPartitionMetadata(partIdent).get("paramKey") == "paramValue")
 
     partTable.dropPartition(partIdent)
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty)
+    assert(!hasPartitions(partTable))
   }
 
   test("loadPartitionMetadata") {
     val table = catalog.loadTable(ident)
     val partTable = new InMemoryPartitionTable(
       table.name(), table.schema(), table.partitioning(), table.properties())
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty)
+    assert(!hasPartitions(partTable))
 
     val partIdent = InternalRow.apply("3")
     partTable.createPartition(partIdent, Map("paramKey" -> "paramValue").asJava)
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).nonEmpty)
+    assert(hasPartitions(partTable))
     assert(partTable.partitionExists(partIdent))
     assert(!partTable.loadPartitionMetadata(partIdent).isEmpty)
     assert(partTable.loadPartitionMetadata(partIdent).get("paramKey") == "paramValue")
 
     partTable.dropPartition(partIdent)
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty)
+    assert(!hasPartitions(partTable))
   }
 
   test("listPartitionIdentifiers") {
     val table = catalog.loadTable(ident)
     val partTable = new InMemoryPartitionTable(
       table.name(), table.schema(), table.partitioning(), table.properties())
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty)
+    assert(!hasPartitions(partTable))
 
     val partIdent = InternalRow.apply("3")
     partTable.createPartition(partIdent, new util.HashMap[String, String]())
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).length == 1)
+    assert(partTable.listPartitionIdentifiers(Array.empty, InternalRow.empty).length == 1)
 
     val partIdent1 = InternalRow.apply("4")
     partTable.createPartition(partIdent1, new util.HashMap[String, String]())
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).length == 2)
-    assert(partTable.listPartitionIdentifiers(partIdent1).length == 1)
+    assert(partTable.listPartitionIdentifiers(Array.empty, InternalRow.empty).length == 2)
+    assert(partTable.listPartitionIdentifiers(Array("dt"), partIdent1).length == 1)
 
     partTable.dropPartition(partIdent)
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).length == 1)
+    assert(partTable.listPartitionIdentifiers(Array.empty, InternalRow.empty).length == 1)
     partTable.dropPartition(partIdent1)
-    assert(partTable.listPartitionIdentifiers(InternalRow.empty).isEmpty)
+    assert(!hasPartitions(partTable))
   }
 
   test("listPartitionByNames") {
@@ -170,7 +174,7 @@ class SupportsPartitionManagementSuite extends SparkFunSuite {
       (Array("part0", "part1"), InternalRow(3, "xyz")) -> Set(),
       (Array("part1"), InternalRow(3.14f)) -> Set()
     ).foreach { case ((names, idents), expected) =>
-      assert(partTable.listPartitionByNames(names, idents).toSet === expected)
+      assert(partTable.listPartitionIdentifiers(names, idents).toSet === expected)
     }
     // Check invalid parameters
     Seq(
@@ -178,7 +182,7 @@ class SupportsPartitionManagementSuite extends SparkFunSuite {
       (Array("col0", "part1"), InternalRow(0, 1)),
       (Array("wrong"), InternalRow("invalid"))
     ).foreach { case (names, idents) =>
-      intercept[AssertionError](partTable.listPartitionByNames(names, idents))
+      intercept[AssertionError](partTable.listPartitionIdentifiers(names, idents))
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
index 4cacd5ec2b49e..3583eceec7559 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
@@ -141,7 +141,8 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase {
         catalog("testpart").asTableCatalog.loadTable(Identifier.of(Array("ns1", "ns2"), "tbl"))
       assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(1))))
       assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(2))))
-      assert(partTable.asPartitionable.listPartitionIdentifiers(InternalRow.empty).isEmpty)
+      assert(
+        partTable.asPartitionable.listPartitionIdentifiers(Array.empty, InternalRow.empty).isEmpty)
     }
   }
 
@@ -161,7 +162,8 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase {
       spark.sql(s"ALTER TABLE $t DROP IF EXISTS PARTITION (id=1), PARTITION (id=2)")
       assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(1))))
       assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(2))))
-      assert(partTable.asPartitionable.listPartitionIdentifiers(InternalRow.empty).isEmpty)
+      assert(
+        partTable.asPartitionable.listPartitionIdentifiers(Array.empty, InternalRow.empty).isEmpty)
     }
   }
 

From 030b3139dadc342e82d71f3fb241c320a7577131 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 30 Nov 2020 16:40:36 +0000
Subject: [PATCH 0611/1009] [SPARK-33569][SPARK-33452][SQL][FOLLOWUP] Fix a
 build error in `ShowPartitionsExec`

### What changes were proposed in this pull request?
Use `listPartitionIdentifiers ` instead of `listPartitionByNames` in `ShowPartitionsExec`. The `listPartitionByNames` was renamed by https://github.com/apache/spark/pull/30514.

### Why are the changes needed?
To fix build error.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running tests for the `SHOW PARTITIONS` command:
```
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *ShowPartitionsSuite"
```

Closes #30553 from MaxGekk/fix-build-show-partitions-exec.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/datasources/v2/ShowPartitionsExec.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala
index 44d6f4495f552..c4b6aa805d58f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala
@@ -41,7 +41,7 @@ case class ShowPartitionsExec(
       // listPartitionByNames() should return all partitions if the partition spec
       // does not specify any partition names.
       .getOrElse((Seq.empty[String], InternalRow.empty))
-    val partitionIdentifiers = table.listPartitionByNames(names.toArray, ident)
+    val partitionIdentifiers = table.listPartitionIdentifiers(names.toArray, ident)
     // Converting partition identifiers as `InternalRow` of partition values,
     // for instance InternalRow(value0, value1, ..., valueN), to `InternalRow`s
     // with a string in the format: "col0=value0/col1=value1/.../colN=valueN".

From f3c2583cc3ad6a2a24bfb09e2ee7af4e63e5bf66 Mon Sep 17 00:00:00 2001
From: Erik Krogen <xkrogen@apache.org>
Date: Mon, 30 Nov 2020 14:40:51 -0600
Subject: [PATCH 0612/1009] [SPARK-33185][YARN][FOLLOW-ON] Leverage RM's RPC
 API instead of REST to fetch driver log links in yarn.Client

### What changes were proposed in this pull request?
This is a follow-on to PR #30096 which initially added support for printing direct links to the driver stdout/stderr logs from the application report output in `yarn.Client` using the `spark.yarn.includeDriverLogsLink` configuration. That PR made use of the ResourceManager's REST APIs to fetch the necessary information to construct the links. This PR proposes removing the dependency on the REST API, since the new logic is the only place in `yarn.Client` which makes use of this API, and instead leverages the RPC API via `YarnClient`, which brings the code in line with the rest of `yarn.Client`.

### Why are the changes needed?

While the old logic worked okay when running a Spark application in a "standard" environment with full access to Kerberos credentials, it can fail when run in an environment with restricted Kerberos credentials. In our case, this environment is represented by [Azkaban](https://azkaban.github.io/), but it likely affects other job scheduling systems as well. In such an environment, the application has delegation tokens which enabled it to communicate with services such as YARN, but the RM REST API is not typically covered by such delegation tokens (note that although YARN does actually support accessing the RM REST API via a delegation token as documented [here](https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html#Cluster_Delegation_Tokens_API), it is a new feature in alpha phase, and most deployments are likely not retrieving this token today).

Besides this enhancement, leveraging the `YarnClient` APIs greatly simplifies the processing logic, such as removing all JSON parsing.

### Does this PR introduce _any_ user-facing change?

Very minimal user-facing changes on top of PR #30096. Basically expands the scope of environments in which that feature will operate correctly.

### How was this patch tested?

In addition to redoing the `spark-submit` testing as mentioned in PR #30096, I also tested this logic in a restricted-credentials environment (Azkaban). It succeeds where the previous logic would fail with a 401 error.

Closes #30450 from xkrogen/xkrogen-SPARK-33185-driverlogs-followon.

Authored-by: Erik Krogen <xkrogen@apache.org>
Signed-off-by: Mridul Muralidharan <mridul<at>gmail.com>
---
 .../org/apache/spark/deploy/yarn/Client.scala | 67 +++++++------------
 .../spark/deploy/yarn/ClientSuite.scala       | 47 -------------
 .../spark/deploy/yarn/YarnClusterSuite.scala  | 31 +++++++++
 3 files changed, 54 insertions(+), 91 deletions(-)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 552167c935b30..d252e8368a0c4 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -29,12 +29,8 @@ import scala.collection.immutable.{Map => IMap}
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, ListBuffer, Map}
 import scala.util.control.NonFatal
 
-import com.fasterxml.jackson.databind.ObjectMapper
 import com.google.common.base.Objects
 import com.google.common.io.Files
-import javax.ws.rs.client.ClientBuilder
-import javax.ws.rs.core.MediaType
-import javax.ws.rs.core.Response.Status.Family
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs._
 import org.apache.hadoop.fs.permission.FsPermission
@@ -51,7 +47,6 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException
 import org.apache.hadoop.yarn.security.AMRMTokenIdentifier
 import org.apache.hadoop.yarn.util.Records
-import org.apache.hadoop.yarn.webapp.util.WebAppUtils
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
 import org.apache.spark.api.python.PythonUtils
@@ -1089,9 +1084,9 @@ private[spark] class Client(
         // If DEBUG is enabled, log report details every iteration
         // Otherwise, log them every time the application changes state
         if (log.isDebugEnabled) {
-          logDebug(formatReportDetails(report, getDriverLogsLink(report.getApplicationId)))
+          logDebug(formatReportDetails(report, getDriverLogsLink(report)))
         } else if (lastState != state) {
-          logInfo(formatReportDetails(report, getDriverLogsLink(report.getApplicationId)))
+          logInfo(formatReportDetails(report, getDriverLogsLink(report)))
         }
       }
 
@@ -1192,33 +1187,31 @@ private[spark] class Client(
   }
 
   /**
-   * Fetch links to the logs of the driver for the given application ID. This requires hitting the
-   * RM REST API. Returns an empty map if the links could not be fetched. If this feature is
-   * disabled via [[CLIENT_INCLUDE_DRIVER_LOGS_LINK]], an empty map is returned immediately.
+   * Fetch links to the logs of the driver for the given application report. This requires
+   * query the ResourceManager via RPC. Returns an empty map if the links could not be fetched.
+   * If this feature is disabled via [[CLIENT_INCLUDE_DRIVER_LOGS_LINK]], or if the application
+   * report indicates that the driver container isn't currently running, an empty map is
+   * returned immediately.
    */
-  private def getDriverLogsLink(appId: ApplicationId): IMap[String, String] = {
-    if (!sparkConf.get(CLIENT_INCLUDE_DRIVER_LOGS_LINK)) {
-      return IMap()
+  private def getDriverLogsLink(appReport: ApplicationReport): IMap[String, String] = {
+    if (!sparkConf.get(CLIENT_INCLUDE_DRIVER_LOGS_LINK)
+      || appReport.getYarnApplicationState != YarnApplicationState.RUNNING) {
+      return IMap.empty
     }
     try {
-      val baseRmUrl = WebAppUtils.getRMWebAppURLWithScheme(hadoopConf)
-      val response = ClientBuilder.newClient()
-          .target(baseRmUrl)
-          .path("ws").path("v1").path("cluster").path("apps")
-          .path(appId.toString).path("appattempts")
-          .request(MediaType.APPLICATION_JSON)
-          .get()
-      response.getStatusInfo.getFamily match {
-        case Family.SUCCESSFUL => parseAppAttemptsJsonResponse(response.readEntity(classOf[String]))
-        case _ =>
-          logWarning(s"Unable to fetch app attempts info from $baseRmUrl, got "
-              + s"status code ${response.getStatus}: ${response.getStatusInfo.getReasonPhrase}")
-          IMap()
-      }
+      Option(appReport.getCurrentApplicationAttemptId)
+        .flatMap(attemptId => Option(yarnClient.getApplicationAttemptReport(attemptId)))
+        .flatMap(attemptReport => Option(attemptReport.getAMContainerId))
+        .flatMap(amContainerId => Option(yarnClient.getContainerReport(amContainerId)))
+        .flatMap(containerReport => Option(containerReport.getLogUrl))
+        .map(YarnContainerInfoHelper.getLogUrlsFromBaseUrl)
+        .getOrElse(IMap.empty)
     } catch {
       case e: Exception =>
-        logWarning(s"Unable to get driver log links for $appId", e)
-        IMap()
+        logWarning(s"Unable to get driver log links for $appId: $e")
+        // Include the full stack trace only at DEBUG level to reduce verbosity
+        logDebug(s"Unable to get driver log links for $appId", e)
+        IMap.empty
     }
   }
 
@@ -1236,7 +1229,7 @@ private[spark] class Client(
       val report = getApplicationReport(appId)
       val state = report.getYarnApplicationState
       logInfo(s"Application report for $appId (state: $state)")
-      logInfo(formatReportDetails(report, getDriverLogsLink(report.getApplicationId)))
+      logInfo(formatReportDetails(report, getDriverLogsLink(report)))
       if (state == YarnApplicationState.FAILED || state == YarnApplicationState.KILLED) {
         throw new SparkException(s"Application $appId finished with status: $state")
       }
@@ -1627,20 +1620,6 @@ private object Client extends Logging {
     writer.flush()
     out.closeEntry()
   }
-
-  private[yarn] def parseAppAttemptsJsonResponse(jsonString: String): IMap[String, String] = {
-    val objectMapper = new ObjectMapper()
-    // If JSON response is malformed somewhere along the way, MissingNode will be returned,
-    // which allows for safe continuation of chaining. The `elements()` call will be empty,
-    // and None will get returned.
-    objectMapper.readTree(jsonString)
-      .path("appAttempts").path("appAttempt")
-      .elements().asScala.toList.takeRight(1).headOption
-      .map(_.path("logsLink").asText(""))
-      .filterNot(_ == "")
-      .map(baseUrl => YarnContainerInfoHelper.getLogUrlsFromBaseUrl(baseUrl))
-      .getOrElse(IMap())
-  }
 }
 
 private[spark] class YarnClusterApplication extends SparkApplication {
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
index fccb2406d66f8..ea3acec3bb78b 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
@@ -583,53 +583,6 @@ class ClientSuite extends SparkFunSuite with Matchers {
     }
   }
 
-  test("SPARK-33185 Parse YARN AppAttempts valid JSON response") {
-    val appIdSuffix = "1500000000000_1234567"
-    val containerId = s"container_e1_${appIdSuffix}_01_000001"
-    val nodeHost = "node.example.com"
-    val jsonString =
-      s"""
-        |{"appAttempts": {
-        |  "appAttempt": [ {
-        |    "id":1,
-        |    "startTime":1600000000000,
-        |    "finishedTime":1600000100000,
-        |    "containerId":"$containerId",
-        |    "nodeHttpAddress":"$nodeHost:8042",
-        |    "nodeId":"node.example.com:8041",
-        |    "logsLink":"http://$nodeHost:8042/node/containerlogs/$containerId/username",
-        |    "blacklistedNodes":"",
-        |    "nodesBlacklistedBySystem":"",
-        |    "appAttemptId":"appattempt_${appIdSuffix}_000001"
-        |  }]
-        |}}
-        |""".stripMargin
-    val logLinkMap = Client.parseAppAttemptsJsonResponse(jsonString)
-    assert(logLinkMap.keySet === Set("stdout", "stderr"))
-    assert(logLinkMap("stdout") ===
-        s"http://$nodeHost:8042/node/containerlogs/$containerId/username/stdout?start=-4096")
-    assert(logLinkMap("stderr") ===
-        s"http://$nodeHost:8042/node/containerlogs/$containerId/username/stderr?start=-4096")
-  }
-
-  test("SPARK-33185 Parse YARN AppAttempts invalid JSON response") {
-    // No "appAttempt" present
-    assert(Client.parseAppAttemptsJsonResponse("""{"appAttempts": { } }""") === Map())
-
-    // "appAttempt" is empty
-    assert(Client.parseAppAttemptsJsonResponse("""{"appAttempts": { "appAttempt": [ ] } }""")
-        === Map())
-
-    // logsLink is missing
-    assert(Client.parseAppAttemptsJsonResponse("""{"appAttempts":{"appAttempt":[{"id":1}]}}""")
-        === Map())
-
-    // logsLink is present but empty
-    assert(
-      Client.parseAppAttemptsJsonResponse("""{"appAttempts":{"appAttempt":[{"logsLink":""}]}}""")
-          === Map())
-  }
-
   private val matching = Seq(
     ("files URI match test1", "file:///file1", "file:///file2"),
     ("files URI match test2", "file:///c:file1", "file://c:file2"),
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index cf754cca315f0..222b24ca12dce 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -230,6 +230,37 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
     }
   }
 
+  test("running Spark in yarn-cluster mode displays driver log links") {
+    val log4jConf = new File(tempDir, "log4j.properties")
+    val logOutFile = new File(tempDir, "logs")
+    Files.write(
+      s"""log4j.rootCategory=DEBUG,file
+         |log4j.appender.file=org.apache.log4j.FileAppender
+         |log4j.appender.file.file=$logOutFile
+         |log4j.appender.file.layout=org.apache.log4j.PatternLayout
+         |""".stripMargin,
+      log4jConf, StandardCharsets.UTF_8)
+    // Since this test is trying to extract log output from the SparkSubmit process itself,
+    // standard options to the Spark process don't take effect. Leverage the java-opts file which
+    // will get picked up for the SparkSubmit process.
+    val confDir = new File(tempDir, "conf")
+    confDir.mkdir()
+    val javaOptsFile = new File(confDir, "java-opts")
+    Files.write(s"-Dlog4j.configuration=file://$log4jConf\n", javaOptsFile, StandardCharsets.UTF_8)
+
+    val result = File.createTempFile("result", null, tempDir)
+    val finalState = runSpark(clientMode = false,
+      mainClassName(YarnClusterDriver.getClass),
+      appArgs = Seq(result.getAbsolutePath),
+      extraEnv = Map("SPARK_CONF_DIR" -> confDir.getAbsolutePath),
+      extraConf = Map(CLIENT_INCLUDE_DRIVER_LOGS_LINK.key -> true.toString))
+    checkResult(finalState, result)
+    val logOutput = Files.toString(logOutFile, StandardCharsets.UTF_8)
+    val logFilePattern = raw"""(?s).+\sDriver Logs \(<NAME>\): https?://.+/<NAME>(\?\S+)?\s.+"""
+    logOutput should fullyMatch regex logFilePattern.replace("<NAME>", "stdout")
+    logOutput should fullyMatch regex logFilePattern.replace("<NAME>", "stderr")
+  }
+
   test("timeout to get SparkContext in cluster mode triggers failure") {
     val timeout = 2000
     val finalState = runSpark(false, mainClassName(SparkContextTimeoutApp.getClass),

From c6994354f70061b2a15445dbd298a2db926b548c Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 30 Nov 2020 13:29:50 -0800
Subject: [PATCH 0613/1009] [SPARK-33545][CORE] Support Fallback Storage during
 Worker decommission

### What changes were proposed in this pull request?

This PR aims to support storage migration to the fallback storage like cloud storage (`S3`) during worker decommission for the corner cases where the exceptions occur or there is no live peer left.

Although this PR focuses on cloud storage like `S3` which has a TTL feature in order to simplify Spark's logic, we can use alternative fallback storages like HDFS/NFS(EFS) if the user provides a clean-up mechanism.

### Why are the changes needed?

Currently, storage migration is not possible when there is no available executor. For example, when there is one executor, the executor cannot perform storage migration because it has no peer.

### Does this PR introduce _any_ user-facing change?

Yes. This is a new feature.

### How was this patch tested?

Pass the CIs with newly added test cases.

Closes #30492 from dongjoon-hyun/SPARK-33545.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 core/pom.xml                                  |  41 +++
 .../scala/org/apache/spark/SparkContext.scala |   1 +
 .../spark/internal/config/package.scala       |  10 +
 .../shuffle/IndexShuffleBlockResolver.scala   |   2 +-
 .../apache/spark/storage/BlockManager.scala   |  18 +-
 .../storage/BlockManagerDecommissioner.scala  |   3 +
 .../spark/storage/FallbackStorage.scala       | 174 +++++++++++
 .../storage/ShuffleBlockFetcherIterator.scala |   3 +-
 .../spark/storage/FallbackStorageSuite.scala  | 269 ++++++++++++++++++
 9 files changed, 517 insertions(+), 4 deletions(-)
 create mode 100644 core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala
 create mode 100644 core/src/test/scala/org/apache/spark/storage/FallbackStorageSuite.scala

diff --git a/core/pom.xml b/core/pom.xml
index 7a56c4ca3c638..9d2bf7dbe57a9 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -461,6 +461,47 @@
       <scope>test</scope>
     </dependency>
 
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-aws</artifactId>
+      <version>${hadoop.version}</version>
+      <scope>test</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-common</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>commons-logging</groupId>
+          <artifactId>commons-logging</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.codehaus.jackson</groupId>
+          <artifactId>jackson-mapper-asl</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.codehaus.jackson</groupId>
+          <artifactId>jackson-core-asl</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.fasterxml.jackson.core</groupId>
+          <artifactId>jackson-core</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.fasterxml.jackson.core</groupId>
+          <artifactId>jackson-databind</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.fasterxml.jackson.core</groupId>
+          <artifactId>jackson-annotations</artifactId>
+        </exclusion>
+        <!-- Keep old SDK out of the assembly to avoid conflict with Kinesis module -->
+        <exclusion>
+          <groupId>com.amazonaws</groupId>
+          <artifactId>aws-java-sdk</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-crypto</artifactId>
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 0440a9de6ab31..b953592fa04dc 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -576,6 +576,7 @@ class SparkContext(config: SparkConf) extends Logging {
     }
     _ui.foreach(_.setAppId(_applicationId))
     _env.blockManager.initialize(_applicationId)
+    FallbackStorage.registerBlockManagerIfNeeded(_env.blockManager.master, _conf)
 
     // The metrics system for Driver need to be set spark.app.id to app ID.
     // So it should start after we get app ID from the task scheduler and set spark.app.id.
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index b8bcb374ef961..093a0ecf58d32 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -471,6 +471,16 @@ package object config {
         "cache block replication should be positive.")
       .createWithDefaultString("30s")
 
+  private[spark] val STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH =
+    ConfigBuilder("spark.storage.decommission.fallbackStorage.path")
+      .doc("The location for fallback storage during block manager decommissioning. " +
+        "For example, `s3a://spark-storage/`. In case of empty, fallback storage is disabled. " +
+        "The storage should be managed by TTL because Spark will not clean it up.")
+      .version("3.1.0")
+      .stringConf
+      .checkValue(_.endsWith(java.io.File.separator), "Path should end with separator.")
+      .createOptional
+
   private[spark] val STORAGE_REPLICATION_TOPOLOGY_FILE =
     ConfigBuilder("spark.storage.replication.topologyFile")
       .version("2.1.0")
diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
index e5df27c0d3c7a..5f0bb42108c56 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
@@ -91,7 +91,7 @@ private[spark] class IndexShuffleBlockResolver(
    * When the dirs parameter is None then use the disk manager's local directories. Otherwise,
    * read from the specified directories.
    */
-  private def getIndexFile(
+  def getIndexFile(
       shuffleId: Int,
       mapId: Long,
       dirs: Option[Array[String]] = None): File = {
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 072702b343328..a5b8d5d0c8cda 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -627,7 +627,16 @@ private[spark] class BlockManager(
   override def getLocalBlockData(blockId: BlockId): ManagedBuffer = {
     if (blockId.isShuffle) {
       logDebug(s"Getting local shuffle block ${blockId}")
-      shuffleManager.shuffleBlockResolver.getBlockData(blockId)
+      try {
+        shuffleManager.shuffleBlockResolver.getBlockData(blockId)
+      } catch {
+        case e: IOException =>
+          if (conf.get(config.STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH).isDefined) {
+            FallbackStorage.read(conf, blockId)
+          } else {
+            throw e
+          }
+      }
     } else {
       getLocalBytes(blockId) match {
         case Some(blockData) =>
@@ -1580,7 +1589,12 @@ private[spark] class BlockManager(
         lastPeerFetchTimeNs = System.nanoTime()
         logDebug("Fetched peers from master: " + cachedPeers.mkString("[", ",", "]"))
       }
-      cachedPeers
+      if (cachedPeers.isEmpty &&
+          conf.get(config.STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH).isDefined) {
+        Seq(FallbackStorage.FALLBACK_BLOCK_MANAGER_ID)
+      } else {
+        cachedPeers
+      }
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
index 7a55039db1b60..e73e359a70f1e 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
@@ -39,6 +39,7 @@ private[storage] class BlockManagerDecommissioner(
     conf: SparkConf,
     bm: BlockManager) extends Logging {
 
+  private val fallbackStorage = FallbackStorage.getFallbackStorage(conf)
   private val maxReplicationFailuresForDecommission =
     conf.get(config.STORAGE_DECOMMISSION_MAX_REPLICATION_FAILURE_PER_BLOCK)
 
@@ -114,6 +115,8 @@ private[storage] class BlockManagerDecommissioner(
                       // driver a no longer referenced RDD with shuffle files.
                       if (bm.migratableResolver.getMigrationBlocks(shuffleBlockInfo).isEmpty) {
                         logWarning(s"Skipping block ${shuffleBlockInfo}, block deleted.")
+                      } else if (fallbackStorage.isDefined) {
+                        fallbackStorage.foreach(_.copy(shuffleBlockInfo, bm))
                       } else {
                         throw e
                       }
diff --git a/core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala b/core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala
new file mode 100644
index 0000000000000..9221731f77a59
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.storage
+
+import java.io.DataInputStream
+import java.nio.ByteBuffer
+
+import scala.concurrent.Future
+import scala.reflect.ClassTag
+
+import org.apache.hadoop.fs.{FileSystem, Path}
+
+import org.apache.spark.SparkConf
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH
+import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
+import org.apache.spark.rpc.{RpcAddress, RpcEndpointRef, RpcTimeout}
+import org.apache.spark.shuffle.{IndexShuffleBlockResolver, ShuffleBlockInfo}
+import org.apache.spark.shuffle.IndexShuffleBlockResolver.NOOP_REDUCE_ID
+import org.apache.spark.util.Utils
+
+/**
+ * A fallback storage used by storage decommissioners.
+ */
+private[storage] class FallbackStorage(conf: SparkConf) extends Logging {
+  require(conf.contains("spark.app.id"))
+  require(conf.get(STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH).isDefined)
+
+  private val fallbackPath = new Path(conf.get(STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH).get)
+  private val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
+  private val fallbackFileSystem = FileSystem.get(fallbackPath.toUri, hadoopConf)
+  private val appId = conf.getAppId
+
+  // Visible for testing
+  def copy(
+      shuffleBlockInfo: ShuffleBlockInfo,
+      bm: BlockManager): Unit = {
+    val shuffleId = shuffleBlockInfo.shuffleId
+    val mapId = shuffleBlockInfo.mapId
+
+    bm.migratableResolver match {
+      case r: IndexShuffleBlockResolver =>
+        val indexFile = r.getIndexFile(shuffleId, mapId)
+
+        if (indexFile.exists()) {
+          fallbackFileSystem.copyFromLocalFile(
+            new Path(indexFile.getAbsolutePath),
+            new Path(fallbackPath, s"$appId/$shuffleId/${indexFile.getName}"))
+
+          val dataFile = r.getDataFile(shuffleId, mapId)
+          if (dataFile.exists()) {
+            fallbackFileSystem.copyFromLocalFile(
+              new Path(dataFile.getAbsolutePath),
+              new Path(fallbackPath, s"$appId/$shuffleId/${dataFile.getName}"))
+          }
+
+          // Report block statuses
+          val reduceId = NOOP_REDUCE_ID
+          val indexBlockId = ShuffleIndexBlockId(shuffleId, mapId, reduceId)
+          FallbackStorage.reportBlockStatus(bm, indexBlockId, indexFile.length)
+          if (dataFile.exists) {
+            val dataBlockId = ShuffleDataBlockId(shuffleId, mapId, reduceId)
+            FallbackStorage.reportBlockStatus(bm, dataBlockId, dataFile.length)
+          }
+        }
+      case r =>
+        logWarning(s"Unsupported Resolver: ${r.getClass.getName}")
+    }
+  }
+
+  def exists(shuffleId: Int, filename: String): Boolean = {
+    fallbackFileSystem.exists(new Path(fallbackPath, s"$appId/$shuffleId/$filename"))
+  }
+}
+
+class NoopRpcEndpointRef(conf: SparkConf) extends RpcEndpointRef(conf) {
+  import scala.concurrent.ExecutionContext.Implicits.global
+  override def address: RpcAddress = null
+  override def name: String = "fallback"
+  override def send(message: Any): Unit = {}
+  override def ask[T: ClassTag](message: Any, timeout: RpcTimeout): Future[T] = {
+    Future{true.asInstanceOf[T]}
+  }
+}
+
+object FallbackStorage extends Logging {
+  /** We use one block manager id as a place holder. */
+  val FALLBACK_BLOCK_MANAGER_ID: BlockManagerId = BlockManagerId("fallback", "remote", 7337)
+
+  def getFallbackStorage(conf: SparkConf): Option[FallbackStorage] = {
+    if (conf.get(STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH).isDefined) {
+      Some(new FallbackStorage(conf))
+    } else {
+      None
+    }
+  }
+
+  /** Register the fallback block manager and its RPC endpoint. */
+  def registerBlockManagerIfNeeded(master: BlockManagerMaster, conf: SparkConf): Unit = {
+    if (conf.get(STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH).isDefined) {
+      master.registerBlockManager(
+        FALLBACK_BLOCK_MANAGER_ID, Array.empty[String], 0, 0, new NoopRpcEndpointRef(conf))
+    }
+  }
+
+  /** Report block status to block manager master and map output tracker master. */
+  private def reportBlockStatus(blockManager: BlockManager, blockId: BlockId, dataLength: Long) = {
+    assert(blockManager.master != null)
+    blockManager.master.updateBlockInfo(
+      FALLBACK_BLOCK_MANAGER_ID, blockId, StorageLevel.DISK_ONLY, memSize = 0, dataLength)
+  }
+
+  /**
+   * Read a ManagedBuffer.
+   */
+  def read(conf: SparkConf, blockId: BlockId): ManagedBuffer = {
+    logInfo(s"Read $blockId")
+    val fallbackPath = new Path(conf.get(STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH).get)
+    val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
+    val fallbackFileSystem = FileSystem.get(fallbackPath.toUri, hadoopConf)
+    val appId = conf.getAppId
+
+    val (shuffleId, mapId, startReduceId, endReduceId) = blockId match {
+      case id: ShuffleBlockId =>
+        (id.shuffleId, id.mapId, id.reduceId, id.reduceId + 1)
+      case batchId: ShuffleBlockBatchId =>
+        (batchId.shuffleId, batchId.mapId, batchId.startReduceId, batchId.endReduceId)
+      case _ =>
+        throw new IllegalArgumentException("unexpected shuffle block id format: " + blockId)
+    }
+
+    val name = ShuffleIndexBlockId(shuffleId, mapId, NOOP_REDUCE_ID).name
+    val indexFile = new Path(fallbackPath, s"$appId/$shuffleId/$name")
+    val start = startReduceId * 8L
+    val end = endReduceId * 8L
+    Utils.tryWithResource(fallbackFileSystem.open(indexFile)) { inputStream =>
+      Utils.tryWithResource(new DataInputStream(inputStream)) { index =>
+        index.skip(start)
+        val offset = index.readLong()
+        index.skip(end - (start + 8L))
+        val nextOffset = index.readLong()
+        val name = ShuffleDataBlockId(shuffleId, mapId, NOOP_REDUCE_ID).name
+        val dataFile = new Path(fallbackPath, s"$appId/$shuffleId/$name")
+        val f = fallbackFileSystem.open(dataFile)
+        val size = nextOffset - 1 - offset
+        logDebug(s"To byte array $size")
+        val array = new Array[Byte](size.toInt)
+        val startTimeNs = System.nanoTime()
+        f.seek(offset)
+        f.read(array)
+        logDebug(s"Took ${(System.nanoTime() - startTimeNs) / (1000 * 1000)}ms")
+        f.close()
+        new NioManagedBuffer(ByteBuffer.wrap(array))
+      }
+    }
+  }
+}
+
diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
index e3b3fc5cc4565..fa4e46590aa5e 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -295,8 +295,9 @@ final class ShuffleBlockFetcherIterator(
     var hostLocalBlockBytes = 0L
     var remoteBlockBytes = 0L
 
+    val fallback = FallbackStorage.FALLBACK_BLOCK_MANAGER_ID.executorId
     for ((address, blockInfos) <- blocksByAddress) {
-      if (address.executorId == blockManager.blockManagerId.executorId) {
+      if (Seq(blockManager.blockManagerId.executorId, fallback).contains(address.executorId)) {
         checkBlockSizes(blockInfos)
         val mergedBlockInfos = mergeContinuousShuffleBlockIdsIfNeeded(
           blockInfos.map(info => FetchBlockInfo(info._1, info._2, info._3)), doBatchFetch)
diff --git a/core/src/test/scala/org/apache/spark/storage/FallbackStorageSuite.scala b/core/src/test/scala/org/apache/spark/storage/FallbackStorageSuite.scala
new file mode 100644
index 0000000000000..2eeae2ecad5eb
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/storage/FallbackStorageSuite.scala
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.storage
+
+import java.io.{DataOutputStream, FileOutputStream, IOException}
+import java.nio.file.Files
+
+import scala.concurrent.duration._
+
+import org.mockito.{ArgumentMatchers => mc}
+import org.mockito.Mockito.{mock, times, verify, when}
+import org.scalatest.concurrent.Eventually.{eventually, interval, timeout}
+
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkFunSuite, TestUtils}
+import org.apache.spark.LocalSparkContext.withSpark
+import org.apache.spark.internal.config._
+import org.apache.spark.launcher.SparkLauncher.{EXECUTOR_MEMORY, SPARK_MASTER}
+import org.apache.spark.network.BlockTransferService
+import org.apache.spark.network.buffer.ManagedBuffer
+import org.apache.spark.scheduler.ExecutorDecommissionInfo
+import org.apache.spark.scheduler.cluster.StandaloneSchedulerBackend
+import org.apache.spark.shuffle.{IndexShuffleBlockResolver, ShuffleBlockInfo}
+import org.apache.spark.shuffle.IndexShuffleBlockResolver.NOOP_REDUCE_ID
+import org.apache.spark.util.Utils.tryWithResource
+
+class FallbackStorageSuite extends SparkFunSuite with LocalSparkContext {
+
+  def getSparkConf(initialExecutor: Int = 1, minExecutor: Int = 1): SparkConf = {
+    new SparkConf(false)
+      .setAppName(getClass.getName)
+      .set(SPARK_MASTER, s"local-cluster[$initialExecutor,1,1024]")
+      .set(EXECUTOR_MEMORY, "1g")
+      .set(UI.UI_ENABLED, false)
+      .set(DYN_ALLOCATION_ENABLED, true)
+      .set(DYN_ALLOCATION_SHUFFLE_TRACKING_ENABLED, true)
+      .set(DYN_ALLOCATION_INITIAL_EXECUTORS, initialExecutor)
+      .set(DYN_ALLOCATION_MIN_EXECUTORS, minExecutor)
+      .set(DECOMMISSION_ENABLED, true)
+      .set(STORAGE_DECOMMISSION_ENABLED, true)
+      .set(STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED, true)
+      .set(STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH,
+         Files.createTempDirectory("tmp").toFile.getAbsolutePath + "/")
+  }
+
+  test("fallback storage APIs - copy/exists") {
+    val conf = new SparkConf(false)
+      .set("spark.app.id", "testId")
+      .set(STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED, true)
+      .set(STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH,
+        Files.createTempDirectory("tmp").toFile.getAbsolutePath + "/")
+    val fallbackStorage = new FallbackStorage(conf)
+    val bmm = new BlockManagerMaster(new NoopRpcEndpointRef(conf), null, conf, false)
+
+    val bm = mock(classOf[BlockManager])
+    val dbm = new DiskBlockManager(conf, false)
+    when(bm.diskBlockManager).thenReturn(dbm)
+    when(bm.master).thenReturn(bmm)
+    val resolver = new IndexShuffleBlockResolver(conf, bm)
+    when(bm.migratableResolver).thenReturn(resolver)
+
+    resolver.getIndexFile(1, 1L).createNewFile()
+    resolver.getDataFile(1, 1L).createNewFile()
+
+    val indexFile = resolver.getIndexFile(1, 2L)
+    tryWithResource(new FileOutputStream(indexFile)) { fos =>
+      tryWithResource(new DataOutputStream(fos)) { dos =>
+        dos.writeLong(0)
+        dos.writeLong(4)
+      }
+    }
+
+    val dataFile = resolver.getDataFile(1, 2L)
+    tryWithResource(new FileOutputStream(dataFile)) { fos =>
+      tryWithResource(new DataOutputStream(fos)) { dos =>
+        dos.writeLong(0)
+      }
+    }
+
+    fallbackStorage.copy(ShuffleBlockInfo(1, 1L), bm)
+    fallbackStorage.copy(ShuffleBlockInfo(1, 2L), bm)
+
+    assert(fallbackStorage.exists(1, ShuffleIndexBlockId(1, 1L, NOOP_REDUCE_ID).name))
+    assert(fallbackStorage.exists(1, ShuffleDataBlockId(1, 1L, NOOP_REDUCE_ID).name))
+    assert(fallbackStorage.exists(1, ShuffleIndexBlockId(1, 2L, NOOP_REDUCE_ID).name))
+    assert(fallbackStorage.exists(1, ShuffleDataBlockId(1, 2L, NOOP_REDUCE_ID).name))
+
+    // The files for shuffle 1 and map 1 are empty intentionally.
+    intercept[java.io.EOFException] {
+      FallbackStorage.read(conf, ShuffleBlockId(1, 1L, 0))
+    }
+    FallbackStorage.read(conf, ShuffleBlockId(1, 2L, 0))
+  }
+
+  test("migrate shuffle data to fallback storage") {
+    val conf = new SparkConf(false)
+      .set("spark.app.id", "testId")
+      .set(STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED, true)
+      .set(STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH,
+        Files.createTempDirectory("tmp").toFile.getAbsolutePath + "/")
+
+    val ids = Set((1, 1L, 1))
+    val bm = mock(classOf[BlockManager])
+    val dbm = new DiskBlockManager(conf, false)
+    when(bm.diskBlockManager).thenReturn(dbm)
+    val indexShuffleBlockResolver = new IndexShuffleBlockResolver(conf, bm)
+    val indexFile = indexShuffleBlockResolver.getIndexFile(1, 1L)
+    val dataFile = indexShuffleBlockResolver.getDataFile(1, 1L)
+    indexFile.createNewFile()
+    dataFile.createNewFile()
+
+    val resolver = mock(classOf[IndexShuffleBlockResolver])
+    when(resolver.getStoredShuffles())
+      .thenReturn(ids.map(triple => ShuffleBlockInfo(triple._1, triple._2)).toSeq)
+    ids.foreach { case (shuffleId: Int, mapId: Long, reduceId: Int) =>
+      when(resolver.getMigrationBlocks(mc.any()))
+        .thenReturn(List(
+          (ShuffleIndexBlockId(shuffleId, mapId, reduceId), mock(classOf[ManagedBuffer])),
+          (ShuffleDataBlockId(shuffleId, mapId, reduceId), mock(classOf[ManagedBuffer]))))
+      when(resolver.getIndexFile(shuffleId, mapId)).thenReturn(indexFile)
+      when(resolver.getDataFile(shuffleId, mapId)).thenReturn(dataFile)
+    }
+
+    when(bm.getPeers(mc.any()))
+      .thenReturn(Seq(FallbackStorage.FALLBACK_BLOCK_MANAGER_ID))
+    val bmm = new BlockManagerMaster(new NoopRpcEndpointRef(conf), null, conf, false)
+    when(bm.master).thenReturn(bmm)
+    val blockTransferService = mock(classOf[BlockTransferService])
+    when(blockTransferService.uploadBlockSync(mc.any(), mc.any(), mc.any(), mc.any(), mc.any(),
+      mc.any(), mc.any())).thenThrow(new IOException)
+    when(bm.blockTransferService).thenReturn(blockTransferService)
+    when(bm.migratableResolver).thenReturn(resolver)
+    when(bm.getMigratableRDDBlocks()).thenReturn(Seq())
+
+    val decommissioner = new BlockManagerDecommissioner(conf, bm)
+
+    try {
+      decommissioner.start()
+      val fallbackStorage = new FallbackStorage(conf)
+      eventually(timeout(10.second), interval(1.seconds)) {
+        // uploadBlockSync is not used
+        verify(blockTransferService, times(1))
+          .uploadBlockSync(mc.any(), mc.any(), mc.any(), mc.any(), mc.any(), mc.any(), mc.any())
+
+        Seq("shuffle_1_1_0.index", "shuffle_1_1_0.data").foreach { filename =>
+          assert(fallbackStorage.exists(shuffleId = 1, filename))
+        }
+      }
+    } finally {
+      decommissioner.stop()
+    }
+  }
+
+  test("Upload from all decommissioned executors") {
+    sc = new SparkContext(getSparkConf(2, 2))
+    withSpark(sc) { sc =>
+      TestUtils.waitUntilExecutorsUp(sc, 2, 60000)
+      val rdd1 = sc.parallelize(1 to 10, 10)
+      val rdd2 = rdd1.map(x => (x % 2, 1))
+      val rdd3 = rdd2.reduceByKey(_ + _)
+      assert(rdd3.count() === 2)
+
+      // Decommission all
+      val sched = sc.schedulerBackend.asInstanceOf[StandaloneSchedulerBackend]
+      sc.getExecutorIds().foreach {
+        sched.decommissionExecutor(_, ExecutorDecommissionInfo(""), false)
+      }
+
+      val files = Seq("shuffle_0_0_0.index", "shuffle_0_0_0.data")
+      val fallbackStorage = new FallbackStorage(sc.getConf)
+      // Uploading is not started yet.
+      files.foreach { file => assert(!fallbackStorage.exists(0, file)) }
+
+      // Uploading is completed on decommissioned executors
+      eventually(timeout(20.seconds), interval(1.seconds)) {
+        files.foreach { file => assert(fallbackStorage.exists(0, file)) }
+      }
+
+      // All executors are still alive.
+      assert(sc.getExecutorIds().size == 2)
+    }
+  }
+
+  test("Upload multi stages") {
+    sc = new SparkContext(getSparkConf())
+    withSpark(sc) { sc =>
+      TestUtils.waitUntilExecutorsUp(sc, 1, 60000)
+      val rdd1 = sc.parallelize(1 to 10, 2)
+      val rdd2 = rdd1.map(x => (x % 2, 1))
+      val rdd3 = rdd2.reduceByKey(_ + _)
+      val rdd4 = rdd3.sortByKey()
+      assert(rdd4.count() === 2)
+
+      val shuffle0_files = Seq(
+        "shuffle_0_0_0.index", "shuffle_0_0_0.data",
+        "shuffle_0_1_0.index", "shuffle_0_1_0.data")
+      val shuffle1_files = Seq(
+        "shuffle_1_4_0.index", "shuffle_1_4_0.data",
+        "shuffle_1_5_0.index", "shuffle_1_5_0.data")
+      val fallbackStorage = new FallbackStorage(sc.getConf)
+      shuffle0_files.foreach { file => assert(!fallbackStorage.exists(0, file)) }
+      shuffle1_files.foreach { file => assert(!fallbackStorage.exists(1, file)) }
+
+      // Decommission all
+      val sched = sc.schedulerBackend.asInstanceOf[StandaloneSchedulerBackend]
+      sc.getExecutorIds().foreach {
+        sched.decommissionExecutor(_, ExecutorDecommissionInfo(""), false)
+      }
+
+      eventually(timeout(10.seconds), interval(1.seconds)) {
+        shuffle0_files.foreach { file => assert(fallbackStorage.exists(0, file)) }
+        shuffle1_files.foreach { file => assert(fallbackStorage.exists(1, file)) }
+      }
+    }
+  }
+
+  test("Newly added executors should access old data from remote storage") {
+    sc = new SparkContext(getSparkConf(2, 0))
+    withSpark(sc) { sc =>
+      TestUtils.waitUntilExecutorsUp(sc, 2, 60000)
+      val rdd1 = sc.parallelize(1 to 10, 2)
+      val rdd2 = rdd1.map(x => (x % 2, 1))
+      val rdd3 = rdd2.reduceByKey(_ + _)
+      assert(rdd3.collect() === Array((0, 5), (1, 5)))
+
+      // Decommission all
+      val sched = sc.schedulerBackend.asInstanceOf[StandaloneSchedulerBackend]
+      sc.getExecutorIds().foreach {
+        sched.decommissionExecutor(_, ExecutorDecommissionInfo(""), false)
+      }
+
+      // Make it sure that fallback storage are ready
+      val fallbackStorage = new FallbackStorage(sc.getConf)
+      eventually(timeout(10.seconds), interval(1.seconds)) {
+        Seq(
+          "shuffle_0_0_0.index", "shuffle_0_0_0.data",
+          "shuffle_0_1_0.index", "shuffle_0_1_0.data").foreach { file =>
+          assert(fallbackStorage.exists(0, file))
+        }
+      }
+
+      // Since the data is safe, force to shrink down to zero executor
+      sc.getExecutorIds().foreach { id =>
+        sched.killExecutor(id)
+      }
+      eventually(timeout(20.seconds), interval(1.seconds)) {
+        assert(sc.getExecutorIds().isEmpty)
+      }
+
+      // Dynamic allocation will start new executors
+      assert(rdd3.collect() === Array((0, 5), (1, 5)))
+      assert(rdd3.sortByKey().count() == 2)
+      assert(sc.getExecutorIds().nonEmpty)
+    }
+  }
+}

From f5d2165c95fe83f24be9841807613950c1d5d6d0 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan.opensource@gmail.com>
Date: Tue, 1 Dec 2020 06:44:15 +0900
Subject: [PATCH 0614/1009] [SPARK-33440][CORE] Use current timestamp with
 warning log in HadoopFSDelegationTokenProvider when the issue date for token
 is not set up properly

### What changes were proposed in this pull request?

This PR proposes to use current timestamp with warning log when the issue date for token is not set up properly. The next section will explain the rationalization with details.

### Why are the changes needed?

Unfortunately not every implementations respect the `issue date` in `AbstractDelegationTokenIdentifier`, which Spark relies on while calculating. The default value of issue date is 0L, which is far from actual issue date, breaking logic on calculating next renewal date under some circumstance, leading to 0 interval (immediate) on rescheduling token renewal.

In HadoopFSDelegationTokenProvider, Spark calculates token renewal interval as below:

https://github.com/apache/spark/blob/2c64b731ae6a976b0d75a95901db849b4a0e2393/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala#L123-L134

The interval is calculated as `token.renew() - identifier.getIssueDate`, which is providing correct interval assuming both `token.renew()` and `identifier.getIssueDate` produce correct value, but it's going to be weird when `identifier.getIssueDate` provides 0L (default value), like below:

```
20/10/13 06:34:19 INFO security.HadoopFSDelegationTokenProvider: Renewal interval is 1603175657000 for token S3ADelegationToken/IDBroker
20/10/13 06:34:19 INFO security.HadoopFSDelegationTokenProvider: Renewal interval is 86400048 for token HDFS_DELEGATION_TOKEN
```

Hopefully we pick the minimum value as safety guard (so in this case, `86400048` is being picked up), but the safety guard leads unintentional bad impact on this case.

https://github.com/apache/spark/blob/2c64b731ae6a976b0d75a95901db849b4a0e2393/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala#L58-L71

Spark leverages the interval being calculated in above, "minimum" value of intervals, and blindly adds the value to token's issue date to calculates the next renewal date for the token, and picks "minimum" value again. In problematic case, the value would be `86400048` (86400048 + 0) which is quite smaller than current timestamp.

https://github.com/apache/spark/blob/2c64b731ae6a976b0d75a95901db849b4a0e2393/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala#L228-L234

The next renewal date is subtracted with current timestamp again to get the interval, and multiplexed by configured ratio to produce the final schedule interval. In problematic case, this value goes to negative.

https://github.com/apache/spark/blob/2c64b731ae6a976b0d75a95901db849b4a0e2393/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala#L180-L188

There's a safety guard to not allow negative value, but that's simply 0 meaning schedule immediately. This triggers next calculation of next renewal date to calculate the schedule interval, lead to the same behavior, hence updating delegation token immediately and continuously.

As we fetch token just before the calculation happens, the actual issue date is likely slightly before, hence it's not that dangerous to use current timestamp as issue date for the token the issue date has not been set up properly. Still, it's better not to leave the token implementation as it is, so we log warn message to let end users consult with token implementer.

### Does this PR introduce _any_ user-facing change?

Yes. End users won't encounter the tight loop of schedule of token renewal after the PR. In end users' perspective of reflection, there's nothing end users need to change.

### How was this patch tested?

Manually tested with problematic environment.

Closes #30366 from HeartSaVioR/SPARK-33440.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
Signed-off-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
---
 .../HadoopDelegationTokenManager.scala        |  4 ++-
 .../HadoopFSDelegationTokenProvider.scala     | 27 ++++++++++++++++---
 2 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
index 3168c763df4df..6ce195b6c7a34 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
@@ -178,7 +178,7 @@ private[spark] class HadoopDelegationTokenManager(
 
   private def scheduleRenewal(delay: Long): Unit = {
     val _delay = math.max(0, delay)
-    logInfo(s"Scheduling renewal in ${UIUtils.formatDuration(delay)}.")
+    logInfo(s"Scheduling renewal in ${UIUtils.formatDuration(_delay)}.")
 
     val renewalTask = new Runnable() {
       override def run(): Unit = {
@@ -230,6 +230,8 @@ private[spark] class HadoopDelegationTokenManager(
         val now = System.currentTimeMillis
         val ratio = sparkConf.get(CREDENTIALS_RENEWAL_INTERVAL_RATIO)
         val delay = (ratio * (nextRenewal - now)).toLong
+        logInfo(s"Calculated delay on renewal is $delay, based on next renewal $nextRenewal " +
+          s"and the ratio $ratio, and current time $now")
         scheduleRenewal(delay)
         creds
       }
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
index a46864e2d3c9c..0dc6aa1d7ef30 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
@@ -63,7 +63,8 @@ private[deploy] class HadoopFSDelegationTokenProvider
             val identifier = token
               .decodeIdentifier()
               .asInstanceOf[AbstractDelegationTokenIdentifier]
-            identifier.getIssueDate + interval
+            val tokenKind = token.getKind.toString
+            getIssueDate(tokenKind, identifier) + interval
           }
         if (nextRenewalDates.isEmpty) None else Some(nextRenewalDates.min)
       }
@@ -126,13 +127,33 @@ private[deploy] class HadoopFSDelegationTokenProvider
       Try {
         val newExpiration = token.renew(hadoopConf)
         val identifier = token.decodeIdentifier().asInstanceOf[AbstractDelegationTokenIdentifier]
-        val interval = newExpiration - identifier.getIssueDate
-        logInfo(s"Renewal interval is $interval for token ${token.getKind.toString}")
+        val tokenKind = token.getKind.toString
+        val interval = newExpiration - getIssueDate(tokenKind, identifier)
+        logInfo(s"Renewal interval is $interval for token $tokenKind")
         interval
       }.toOption
     }
     if (renewIntervals.isEmpty) None else Some(renewIntervals.min)
   }
+
+  private def getIssueDate(kind: String, identifier: AbstractDelegationTokenIdentifier): Long = {
+    val now = System.currentTimeMillis()
+    val issueDate = identifier.getIssueDate
+    if (issueDate > now) {
+      logWarning(s"Token $kind has set up issue date later than current time. (provided: " +
+        s"$issueDate / current timestamp: $now) Please make sure clocks are in sync between " +
+        "machines. If the issue is not a clock mismatch, consult token implementor to check " +
+        "whether issue date is valid.")
+      issueDate
+    } else if (issueDate > 0L) {
+      issueDate
+    } else {
+      logWarning(s"Token $kind has not set up issue date properly. (provided: $issueDate) " +
+        s"Using current timestamp ($now) as issue date instead. Consult token implementor to fix " +
+        "the behavior.")
+      now
+    }
+  }
 }
 
 private[deploy] object HadoopFSDelegationTokenProvider {

From 596fbc1d292259c8850f026e2d7267056abee3bc Mon Sep 17 00:00:00 2001
From: Weichen Xu <weichen.xu@databricks.com>
Date: Tue, 1 Dec 2020 09:52:19 +0900
Subject: [PATCH 0615/1009] [SPARK-33556][ML] Add array_to_vector function for
 dataframe column

### What changes were proposed in this pull request?

Add array_to_vector function for dataframe column

### Why are the changes needed?
Utility function for array to vector conversion.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
scala unit test & doctest.

Closes #30498 from WeichenXu123/array_to_vec.

Lead-authored-by: Weichen Xu <weichen.xu@databricks.com>
Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../scala/org/apache/spark/ml/functions.scala | 16 ++++++++-
 .../org/apache/spark/ml/FunctionsSuite.scala  | 18 ++++++++--
 python/docs/source/reference/pyspark.ml.rst   |  1 +
 python/pyspark/ml/functions.py                | 34 +++++++++++++++++++
 python/pyspark/ml/functions.pyi               |  2 ++
 5 files changed, 68 insertions(+), 3 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/functions.scala b/mllib/src/main/scala/org/apache/spark/ml/functions.scala
index a0b6d11a46be9..43622a4f3edfb 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/functions.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/functions.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.ml
 
 import org.apache.spark.annotation.Since
-import org.apache.spark.ml.linalg.{SparseVector, Vector}
+import org.apache.spark.ml.linalg.{SparseVector, Vector, Vectors}
 import org.apache.spark.mllib.linalg.{Vector => OldVector}
 import org.apache.spark.sql.Column
 import org.apache.spark.sql.functions.udf
@@ -72,6 +72,20 @@ object functions {
     }
   }
 
+  private val arrayToVectorUdf = udf { array: Seq[Double] =>
+    Vectors.dense(array.toArray)
+  }
+
+  /**
+   * Converts a column of array of numeric type into a column of dense vectors in MLlib.
+   * @param v: the column of array&lt;NumericType&gt type
+   * @return a column of type `org.apache.spark.ml.linalg.Vector`
+   * @since 3.1.0
+   */
+  def array_to_vector(v: Column): Column = {
+    arrayToVectorUdf(v)
+  }
+
   private[ml] def checkNonNegativeWeight = udf {
     value: Double =>
       require(value >= 0, s"illegal weight value: $value. weight must be >= 0.0.")
diff --git a/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala
index 3dd9a7d8ec85d..21b823383d233 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala
@@ -18,8 +18,8 @@
 package org.apache.spark.ml
 
 import org.apache.spark.SparkException
-import org.apache.spark.ml.functions.vector_to_array
-import org.apache.spark.ml.linalg.Vectors
+import org.apache.spark.ml.functions.{array_to_vector, vector_to_array}
+import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.util.MLTest
 import org.apache.spark.mllib.linalg.{Vectors => OldVectors}
 import org.apache.spark.sql.functions.col
@@ -87,4 +87,18 @@ class FunctionsSuite extends MLTest {
     assert(thrown2.getMessage.contains(
       s"Unsupported dtype: float16. Valid values: float64, float32."))
   }
+
+  test("test array_to_vector") {
+    val df1 = Seq(Tuple1(Array(0.5, 1.5))).toDF("c1")
+    val resultVec = df1.select(array_to_vector(col("c1"))).collect()(0)(0).asInstanceOf[Vector]
+    assert(resultVec === Vectors.dense(Array(0.5, 1.5)))
+
+    val df2 = Seq(Tuple1(Array(1.5f, 2.5f))).toDF("c1")
+    val resultVec2 = df2.select(array_to_vector(col("c1"))).collect()(0)(0).asInstanceOf[Vector]
+    assert(resultVec2 === Vectors.dense(Array(1.5, 2.5)))
+
+    val df3 = Seq(Tuple1(Array(1, 2))).toDF("c1")
+    val resultVec3 = df3.select(array_to_vector(col("c1"))).collect()(0)(0).asInstanceOf[Vector]
+    assert(resultVec3 === Vectors.dense(Array(1.0, 2.0)))
+  }
 }
diff --git a/python/docs/source/reference/pyspark.ml.rst b/python/docs/source/reference/pyspark.ml.rst
index 5fafe5899f20b..2de0ff65a3ae8 100644
--- a/python/docs/source/reference/pyspark.ml.rst
+++ b/python/docs/source/reference/pyspark.ml.rst
@@ -196,6 +196,7 @@ ML Functions
 .. autosummary::
     :toctree: api/
 
+    array_to_vector
     vector_to_array
 
 
diff --git a/python/pyspark/ml/functions.py b/python/pyspark/ml/functions.py
index cf4a014d897fb..fb245a3d05827 100644
--- a/python/pyspark/ml/functions.py
+++ b/python/pyspark/ml/functions.py
@@ -69,6 +69,40 @@ def vector_to_array(col, dtype="float64"):
         sc._jvm.org.apache.spark.ml.functions.vector_to_array(_to_java_column(col), dtype))
 
 
+def array_to_vector(col):
+    """
+    Converts a column of array of numeric type into a column of dense vectors in MLlib
+
+    .. versionadded:: 3.1.0
+
+    Parameters
+    ----------
+    col : :py:class:`pyspark.sql.Column` or str
+        Input column
+
+    Returns
+    -------
+    :py:class:`pyspark.sql.Column`
+        The converted column of MLlib dense vectors.
+
+    Examples
+    --------
+    >>> from pyspark.ml.functions import array_to_vector
+    >>> df1 = spark.createDataFrame([([1.5, 2.5],),], schema='v1 array<double>')
+    >>> df1.select(array_to_vector('v1').alias('vec1')).collect()
+    [Row(vec1=DenseVector([1.5, 2.5]))]
+    >>> df2 = spark.createDataFrame([([1.5, 3.5],),], schema='v1 array<float>')
+    >>> df2.select(array_to_vector('v1').alias('vec1')).collect()
+    [Row(vec1=DenseVector([1.5, 3.5]))]
+    >>> df3 = spark.createDataFrame([([1, 3],),], schema='v1 array<int>')
+    >>> df3.select(array_to_vector('v1').alias('vec1')).collect()
+    [Row(vec1=DenseVector([1.0, 3.0]))]
+    """
+    sc = SparkContext._active_spark_context
+    return Column(
+        sc._jvm.org.apache.spark.ml.functions.array_to_vector(_to_java_column(col)))
+
+
 def _test():
     import doctest
     from pyspark.sql import SparkSession
diff --git a/python/pyspark/ml/functions.pyi b/python/pyspark/ml/functions.pyi
index 42650e742e781..12b44fc63b5b7 100644
--- a/python/pyspark/ml/functions.pyi
+++ b/python/pyspark/ml/functions.pyi
@@ -20,3 +20,5 @@ from pyspark import SparkContext as SparkContext, since as since  # noqa: F401
 from pyspark.sql.column import Column as Column
 
 def vector_to_array(col: Column) -> Column: ...
+
+def array_to_vector(col: Column) -> Column: ...

From aeb3649fb9103a7541ef54f451c60fcd5a091934 Mon Sep 17 00:00:00 2001
From: Bryan Cutler <cutlerb@gmail.com>
Date: Tue, 1 Dec 2020 10:34:40 +0900
Subject: [PATCH 0616/1009] [SPARK-33613][PYTHON][TESTS] Replace deprecated
 APIs in pyspark tests

### What changes were proposed in this pull request?

This replaces deprecated API usage in PySpark tests with the preferred APIs. These have been deprecated for some time and usage is not consistent within tests.

- https://docs.python.org/3/library/unittest.html#deprecated-aliases

### Why are the changes needed?

For consistency and eventual removal of deprecated APIs.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing tests

Closes #30557 from BryanCutler/replace-deprecated-apis-in-tests.

Authored-by: Bryan Cutler <cutlerb@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/ml/tests/test_feature.py       |   2 +-
 python/pyspark/ml/tests/test_image.py         |   6 +-
 python/pyspark/ml/tests/test_param.py         |   2 +-
 python/pyspark/ml/tests/test_persistence.py   |   2 +-
 python/pyspark/ml/tests/test_tuning.py        |   4 +-
 python/pyspark/ml/tests/test_wrapper.py       |   6 +-
 python/pyspark/sql/tests/test_arrow.py        |  28 ++---
 python/pyspark/sql/tests/test_catalog.py      |  56 ++++-----
 python/pyspark/sql/tests/test_column.py       |  10 +-
 python/pyspark/sql/tests/test_conf.py         |   2 +-
 python/pyspark/sql/tests/test_dataframe.py    |  78 ++++++-------
 python/pyspark/sql/tests/test_datasources.py  |  10 +-
 python/pyspark/sql/tests/test_functions.py    |  22 ++--
 .../sql/tests/test_pandas_cogrouped_map.py    |  14 +--
 .../sql/tests/test_pandas_grouped_map.py      |  32 +++---
 python/pyspark/sql/tests/test_pandas_map.py   |   8 +-
 python/pyspark/sql/tests/test_pandas_udf.py   |  32 +++---
 .../sql/tests/test_pandas_udf_grouped_agg.py  |  16 +--
 .../sql/tests/test_pandas_udf_scalar.py       | 108 +++++++++---------
 .../sql/tests/test_pandas_udf_typehints.py    |   2 +-
 .../sql/tests/test_pandas_udf_window.py       |   6 +-
 python/pyspark/sql/tests/test_types.py        |  24 ++--
 python/pyspark/sql/tests/test_udf.py          |  28 ++---
 python/pyspark/sql/tests/test_utils.py        |  15 ++-
 python/pyspark/tests/test_profiler.py         |   4 +-
 python/pyspark/tests/test_rdd.py              |  30 ++---
 python/pyspark/tests/test_worker.py           |   2 +-
 27 files changed, 274 insertions(+), 275 deletions(-)

diff --git a/python/pyspark/ml/tests/test_feature.py b/python/pyspark/ml/tests/test_feature.py
index 244110a986138..98b8ce6dfb95c 100644
--- a/python/pyspark/ml/tests/test_feature.py
+++ b/python/pyspark/ml/tests/test_feature.py
@@ -169,7 +169,7 @@ def test_count_vectorizer_from_vocab(self):
 
         # Test an empty vocabulary
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(Exception, "vocabSize.*invalid.*0"):
+            with self.assertRaisesRegex(Exception, "vocabSize.*invalid.*0"):
                 CountVectorizerModel.from_vocabulary([], inputCol="words")
 
         # Test model with default settings can transform
diff --git a/python/pyspark/ml/tests/test_image.py b/python/pyspark/ml/tests/test_image.py
index 1001598779d48..00e4c95a84355 100644
--- a/python/pyspark/ml/tests/test_image.py
+++ b/python/pyspark/ml/tests/test_image.py
@@ -47,19 +47,19 @@ def test_read_images(self):
         self.assertEqual(ImageSchema.undefinedImageType, "Undefined")
 
         with QuietTest(self.sc):
-            self.assertRaisesRegexp(
+            self.assertRaisesRegex(
                 TypeError,
                 "image argument should be pyspark.sql.types.Row; however",
                 lambda: ImageSchema.toNDArray("a"))
 
         with QuietTest(self.sc):
-            self.assertRaisesRegexp(
+            self.assertRaisesRegex(
                 ValueError,
                 "image argument should have attributes specified in",
                 lambda: ImageSchema.toNDArray(Row(a=1)))
 
         with QuietTest(self.sc):
-            self.assertRaisesRegexp(
+            self.assertRaisesRegex(
                 TypeError,
                 "array argument should be numpy.ndarray; however, it got",
                 lambda: ImageSchema.toImage("a"))
diff --git a/python/pyspark/ml/tests/test_param.py b/python/pyspark/ml/tests/test_param.py
index 4cddf50f36bdf..09fe21e9fdeca 100644
--- a/python/pyspark/ml/tests/test_param.py
+++ b/python/pyspark/ml/tests/test_param.py
@@ -308,7 +308,7 @@ def test_logistic_regression_check_thresholds(self):
             LogisticRegression
         )
 
-        self.assertRaisesRegexp(
+        self.assertRaisesRegex(
             ValueError,
             "Logistic Regression getThreshold found inconsistent.*$",
             LogisticRegression, threshold=0.42, thresholds=[0.5, 0.5]
diff --git a/python/pyspark/ml/tests/test_persistence.py b/python/pyspark/ml/tests/test_persistence.py
index 826e6cd351d32..0bbcfcdf50e95 100644
--- a/python/pyspark/ml/tests/test_persistence.py
+++ b/python/pyspark/ml/tests/test_persistence.py
@@ -442,7 +442,7 @@ def test_default_read_write_default_params(self):
         del metadata['defaultParamMap']
         metadataStr = json.dumps(metadata, separators=[',',  ':'])
         loadedMetadata = reader._parseMetaData(metadataStr, )
-        with self.assertRaisesRegexp(AssertionError, "`defaultParamMap` section not found"):
+        with self.assertRaisesRegex(AssertionError, "`defaultParamMap` section not found"):
             reader.getAndSetParams(lr, loadedMetadata)
 
         # Prior to 2.4.0, metadata doesn't have `defaultParamMap`.
diff --git a/python/pyspark/ml/tests/test_tuning.py b/python/pyspark/ml/tests/test_tuning.py
index 729e46419ae2c..ced32c07f245f 100644
--- a/python/pyspark/ml/tests/test_tuning.py
+++ b/python/pyspark/ml/tests/test_tuning.py
@@ -499,7 +499,7 @@ def test_invalid_user_specified_folds(self):
                             evaluator=evaluator,
                             numFolds=2,
                             foldCol="fold")
-        with self.assertRaisesRegexp(Exception, "Fold number must be in range"):
+        with self.assertRaisesRegex(Exception, "Fold number must be in range"):
             cv.fit(dataset_with_folds)
 
         cv = CrossValidator(estimator=lr,
@@ -507,7 +507,7 @@ def test_invalid_user_specified_folds(self):
                             evaluator=evaluator,
                             numFolds=4,
                             foldCol="fold")
-        with self.assertRaisesRegexp(Exception, "The validation data at fold 3 is empty"):
+        with self.assertRaisesRegex(Exception, "The validation data at fold 3 is empty"):
             cv.fit(dataset_with_folds)
 
 
diff --git a/python/pyspark/ml/tests/test_wrapper.py b/python/pyspark/ml/tests/test_wrapper.py
index 31475299c7b98..8ed6a6bad95ed 100644
--- a/python/pyspark/ml/tests/test_wrapper.py
+++ b/python/pyspark/ml/tests/test_wrapper.py
@@ -54,7 +54,7 @@ def test_java_object_gets_detached(self):
         model.__del__()
 
         def condition():
-            with self.assertRaisesRegexp(py4j.protocol.Py4JError, error_no_object):
+            with self.assertRaisesRegex(py4j.protocol.Py4JError, error_no_object):
                 model._java_obj.toString()
             self.assertIn("LinearRegressionTrainingSummary", summary._java_obj.toString())
             return True
@@ -67,9 +67,9 @@ def condition():
             pass
 
         def condition():
-            with self.assertRaisesRegexp(py4j.protocol.Py4JError, error_no_object):
+            with self.assertRaisesRegex(py4j.protocol.Py4JError, error_no_object):
                 model._java_obj.toString()
-            with self.assertRaisesRegexp(py4j.protocol.Py4JError, error_no_object):
+            with self.assertRaisesRegex(py4j.protocol.Py4JError, error_no_object):
                 summary._java_obj.toString()
             return True
 
diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py
index e764c42d88a31..bf80c62ea0542 100644
--- a/python/pyspark/sql/tests/test_arrow.py
+++ b/python/pyspark/sql/tests/test_arrow.py
@@ -34,7 +34,7 @@
 
 if have_pandas:
     import pandas as pd
-    from pandas.util.testing import assert_frame_equal
+    from pandas.testing import assert_frame_equal
 
 if have_pyarrow:
     import pyarrow as pa  # noqa: F401
@@ -137,7 +137,7 @@ def test_toPandas_fallback_disabled(self):
         df = self.spark.createDataFrame([(None,)], schema=schema)
         with QuietTest(self.sc):
             with self.warnings_lock:
-                with self.assertRaisesRegexp(Exception, 'Unsupported type'):
+                with self.assertRaisesRegex(Exception, 'Unsupported type'):
                     df.toPandas()
 
     def test_null_conversion(self):
@@ -214,7 +214,7 @@ def raise_exception():
         exception_udf = udf(raise_exception, IntegerType())
         df = df.withColumn("error", exception_udf())
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(Exception, 'My error'):
+            with self.assertRaisesRegex(Exception, 'My error'):
                 df.toPandas()
 
     def _createDataFrame_toggle(self, pdf, schema=None):
@@ -228,7 +228,7 @@ def _createDataFrame_toggle(self, pdf, schema=None):
     def test_createDataFrame_toggle(self):
         pdf = self.create_pandas_data_frame()
         df_no_arrow, df_arrow = self._createDataFrame_toggle(pdf, schema=self.schema)
-        self.assertEquals(df_no_arrow.collect(), df_arrow.collect())
+        self.assertEqual(df_no_arrow.collect(), df_arrow.collect())
 
     def test_createDataFrame_respect_session_timezone(self):
         from datetime import timedelta
@@ -258,7 +258,7 @@ def test_createDataFrame_respect_session_timezone(self):
     def test_createDataFrame_with_schema(self):
         pdf = self.create_pandas_data_frame()
         df = self.spark.createDataFrame(pdf, schema=self.schema)
-        self.assertEquals(self.schema, df.schema)
+        self.assertEqual(self.schema, df.schema)
         pdf_arrow = df.toPandas()
         assert_frame_equal(pdf_arrow, pdf)
 
@@ -269,7 +269,7 @@ def test_createDataFrame_with_incorrect_schema(self):
         wrong_schema = StructType(fields)
         with self.sql_conf({"spark.sql.execution.pandas.convertToArrowArraySafely": False}):
             with QuietTest(self.sc):
-                with self.assertRaisesRegexp(Exception, "[D|d]ecimal.*got.*date"):
+                with self.assertRaisesRegex(Exception, "[D|d]ecimal.*got.*date"):
                     self.spark.createDataFrame(pdf, schema=wrong_schema)
 
     def test_createDataFrame_with_names(self):
@@ -277,23 +277,23 @@ def test_createDataFrame_with_names(self):
         new_names = list(map(str, range(len(self.schema.fieldNames()))))
         # Test that schema as a list of column names gets applied
         df = self.spark.createDataFrame(pdf, schema=list(new_names))
-        self.assertEquals(df.schema.fieldNames(), new_names)
+        self.assertEqual(df.schema.fieldNames(), new_names)
         # Test that schema as tuple of column names gets applied
         df = self.spark.createDataFrame(pdf, schema=tuple(new_names))
-        self.assertEquals(df.schema.fieldNames(), new_names)
+        self.assertEqual(df.schema.fieldNames(), new_names)
 
     def test_createDataFrame_column_name_encoding(self):
         pdf = pd.DataFrame({u'a': [1]})
         columns = self.spark.createDataFrame(pdf).columns
         self.assertTrue(isinstance(columns[0], str))
-        self.assertEquals(columns[0], 'a')
+        self.assertEqual(columns[0], 'a')
         columns = self.spark.createDataFrame(pdf, [u'b']).columns
         self.assertTrue(isinstance(columns[0], str))
-        self.assertEquals(columns[0], 'b')
+        self.assertEqual(columns[0], 'b')
 
     def test_createDataFrame_with_single_data_type(self):
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(ValueError, ".*IntegerType.*not supported.*"):
+            with self.assertRaisesRegex(ValueError, ".*IntegerType.*not supported.*"):
                 self.spark.createDataFrame(pd.DataFrame({"a": [1]}), schema="int")
 
     def test_createDataFrame_does_not_modify_input(self):
@@ -311,7 +311,7 @@ def test_schema_conversion_roundtrip(self):
         from pyspark.sql.pandas.types import from_arrow_schema, to_arrow_schema
         arrow_schema = to_arrow_schema(self.schema)
         schema_rt = from_arrow_schema(arrow_schema)
-        self.assertEquals(self.schema, schema_rt)
+        self.assertEqual(self.schema, schema_rt)
 
     def test_createDataFrame_with_array_type(self):
         pdf = pd.DataFrame({"a": [[1, 2], [3, 4]], "b": [[u"x", u"y"], [u"y", u"z"]]})
@@ -420,7 +420,7 @@ def test_createDataFrame_fallback_enabled(self):
 
     def test_createDataFrame_fallback_disabled(self):
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(TypeError, 'Unsupported type'):
+            with self.assertRaisesRegex(TypeError, 'Unsupported type'):
                 self.spark.createDataFrame(
                     pd.DataFrame({"a": [[datetime.datetime(2015, 11, 1, 0, 30)]]}),
                     "a: array<timestamp>")
@@ -545,7 +545,7 @@ def tearDownClass(cls):
             cls.spark.stop()
 
     def test_exception_by_max_results(self):
-        with self.assertRaisesRegexp(Exception, "is bigger than"):
+        with self.assertRaisesRegex(Exception, "is bigger than"):
             self.spark.range(0, 10000, 1, 100).toPandas()
 
 
diff --git a/python/pyspark/sql/tests/test_catalog.py b/python/pyspark/sql/tests/test_catalog.py
index ca4e427a7db28..56e7c97020662 100644
--- a/python/pyspark/sql/tests/test_catalog.py
+++ b/python/pyspark/sql/tests/test_catalog.py
@@ -25,11 +25,11 @@ class CatalogTests(ReusedSQLTestCase):
     def test_current_database(self):
         spark = self.spark
         with self.database("some_db"):
-            self.assertEquals(spark.catalog.currentDatabase(), "default")
+            self.assertEqual(spark.catalog.currentDatabase(), "default")
             spark.sql("CREATE DATABASE some_db")
             spark.catalog.setCurrentDatabase("some_db")
-            self.assertEquals(spark.catalog.currentDatabase(), "some_db")
-            self.assertRaisesRegexp(
+            self.assertEqual(spark.catalog.currentDatabase(), "some_db")
+            self.assertRaisesRegex(
                 AnalysisException,
                 "does_not_exist",
                 lambda: spark.catalog.setCurrentDatabase("does_not_exist"))
@@ -38,10 +38,10 @@ def test_list_databases(self):
         spark = self.spark
         with self.database("some_db"):
             databases = [db.name for db in spark.catalog.listDatabases()]
-            self.assertEquals(databases, ["default"])
+            self.assertEqual(databases, ["default"])
             spark.sql("CREATE DATABASE some_db")
             databases = [db.name for db in spark.catalog.listDatabases()]
-            self.assertEquals(sorted(databases), ["default", "some_db"])
+            self.assertEqual(sorted(databases), ["default", "some_db"])
 
     def test_list_tables(self):
         from pyspark.sql.catalog import Table
@@ -50,8 +50,8 @@ def test_list_tables(self):
             spark.sql("CREATE DATABASE some_db")
             with self.table("tab1", "some_db.tab2", "tab3_via_catalog"):
                 with self.tempView("temp_tab"):
-                    self.assertEquals(spark.catalog.listTables(), [])
-                    self.assertEquals(spark.catalog.listTables("some_db"), [])
+                    self.assertEqual(spark.catalog.listTables(), [])
+                    self.assertEqual(spark.catalog.listTables("some_db"), [])
                     spark.createDataFrame([(1, 1)]).createOrReplaceTempView("temp_tab")
                     spark.sql("CREATE TABLE tab1 (name STRING, age INT) USING parquet")
                     spark.sql("CREATE TABLE some_db.tab2 (name STRING, age INT) USING parquet")
@@ -66,40 +66,40 @@ def test_list_tables(self):
                         sorted(spark.catalog.listTables("default"), key=lambda t: t.name)
                     tablesSomeDb = \
                         sorted(spark.catalog.listTables("some_db"), key=lambda t: t.name)
-                    self.assertEquals(tables, tablesDefault)
-                    self.assertEquals(len(tables), 3)
-                    self.assertEquals(len(tablesSomeDb), 2)
-                    self.assertEquals(tables[0], Table(
+                    self.assertEqual(tables, tablesDefault)
+                    self.assertEqual(len(tables), 3)
+                    self.assertEqual(len(tablesSomeDb), 2)
+                    self.assertEqual(tables[0], Table(
                         name="tab1",
                         database="default",
                         description=None,
                         tableType="MANAGED",
                         isTemporary=False))
-                    self.assertEquals(tables[1], Table(
+                    self.assertEqual(tables[1], Table(
                         name="tab3_via_catalog",
                         database="default",
                         description=description,
                         tableType="MANAGED",
                         isTemporary=False))
-                    self.assertEquals(tables[2], Table(
+                    self.assertEqual(tables[2], Table(
                         name="temp_tab",
                         database=None,
                         description=None,
                         tableType="TEMPORARY",
                         isTemporary=True))
-                    self.assertEquals(tablesSomeDb[0], Table(
+                    self.assertEqual(tablesSomeDb[0], Table(
                         name="tab2",
                         database="some_db",
                         description=None,
                         tableType="MANAGED",
                         isTemporary=False))
-                    self.assertEquals(tablesSomeDb[1], Table(
+                    self.assertEqual(tablesSomeDb[1], Table(
                         name="temp_tab",
                         database=None,
                         description=None,
                         tableType="TEMPORARY",
                         isTemporary=True))
-                    self.assertRaisesRegexp(
+                    self.assertRaisesRegex(
                         AnalysisException,
                         "does_not_exist",
                         lambda: spark.catalog.listTables("does_not_exist"))
@@ -119,12 +119,12 @@ def test_list_functions(self):
             self.assertTrue("to_timestamp" in functions)
             self.assertTrue("to_unix_timestamp" in functions)
             self.assertTrue("current_database" in functions)
-            self.assertEquals(functions["+"], Function(
+            self.assertEqual(functions["+"], Function(
                 name="+",
                 description=None,
                 className="org.apache.spark.sql.catalyst.expressions.Add",
                 isTemporary=True))
-            self.assertEquals(functions, functionsDefault)
+            self.assertEqual(functions, functionsDefault)
 
             with self.function("func1", "some_db.func2"):
                 spark.catalog.registerFunction("temp_func", lambda x: str(x))
@@ -141,7 +141,7 @@ def test_list_functions(self):
                 self.assertTrue("temp_func" in newFunctionsSomeDb)
                 self.assertTrue("func1" not in newFunctionsSomeDb)
                 self.assertTrue("func2" in newFunctionsSomeDb)
-                self.assertRaisesRegexp(
+                self.assertRaisesRegex(
                     AnalysisException,
                     "does_not_exist",
                     lambda: spark.catalog.listFunctions("does_not_exist"))
@@ -158,16 +158,16 @@ def test_list_columns(self):
                 columns = sorted(spark.catalog.listColumns("tab1"), key=lambda c: c.name)
                 columnsDefault = \
                     sorted(spark.catalog.listColumns("tab1", "default"), key=lambda c: c.name)
-                self.assertEquals(columns, columnsDefault)
-                self.assertEquals(len(columns), 2)
-                self.assertEquals(columns[0], Column(
+                self.assertEqual(columns, columnsDefault)
+                self.assertEqual(len(columns), 2)
+                self.assertEqual(columns[0], Column(
                     name="age",
                     description=None,
                     dataType="int",
                     nullable=True,
                     isPartition=False,
                     isBucket=False))
-                self.assertEquals(columns[1], Column(
+                self.assertEqual(columns[1], Column(
                     name="name",
                     description=None,
                     dataType="string",
@@ -176,26 +176,26 @@ def test_list_columns(self):
                     isBucket=False))
                 columns2 = \
                     sorted(spark.catalog.listColumns("tab2", "some_db"), key=lambda c: c.name)
-                self.assertEquals(len(columns2), 2)
-                self.assertEquals(columns2[0], Column(
+                self.assertEqual(len(columns2), 2)
+                self.assertEqual(columns2[0], Column(
                     name="nickname",
                     description=None,
                     dataType="string",
                     nullable=True,
                     isPartition=False,
                     isBucket=False))
-                self.assertEquals(columns2[1], Column(
+                self.assertEqual(columns2[1], Column(
                     name="tolerance",
                     description=None,
                     dataType="float",
                     nullable=True,
                     isPartition=False,
                     isBucket=False))
-                self.assertRaisesRegexp(
+                self.assertRaisesRegex(
                     AnalysisException,
                     "tab2",
                     lambda: spark.catalog.listColumns("tab2"))
-                self.assertRaisesRegexp(
+                self.assertRaisesRegex(
                     AnalysisException,
                     "does_not_exist",
                     lambda: spark.catalog.listColumns("does_not_exist"))
diff --git a/python/pyspark/sql/tests/test_column.py b/python/pyspark/sql/tests/test_column.py
index 4a9c7106a12b0..2ae0a9bedd67d 100644
--- a/python/pyspark/sql/tests/test_column.py
+++ b/python/pyspark/sql/tests/test_column.py
@@ -47,7 +47,7 @@ def test_validate_column_types(self):
         self.assertTrue("Column" in _to_java_column(u"a").getClass().toString())
         self.assertTrue("Column" in _to_java_column(self.spark.range(1).id).getClass().toString())
 
-        self.assertRaisesRegexp(
+        self.assertRaisesRegex(
             TypeError,
             "Invalid argument, not a string or column",
             lambda: _to_java_column(1))
@@ -58,7 +58,7 @@ class A():
         self.assertRaises(TypeError, lambda: _to_java_column(A()))
         self.assertRaises(TypeError, lambda: _to_java_column([]))
 
-        self.assertRaisesRegexp(
+        self.assertRaisesRegex(
             TypeError,
             "Invalid argument, not a string or column",
             lambda: udf(lambda x: x)(None))
@@ -79,9 +79,9 @@ def test_column_operators(self):
             cs.startswith('a'), cs.endswith('a'), ci.eqNullSafe(cs)
         self.assertTrue(all(isinstance(c, Column) for c in css))
         self.assertTrue(isinstance(ci.cast(LongType()), Column))
-        self.assertRaisesRegexp(ValueError,
-                                "Cannot apply 'in' operator against a column",
-                                lambda: 1 in cs)
+        self.assertRaisesRegex(ValueError,
+                               "Cannot apply 'in' operator against a column",
+                               lambda: 1 in cs)
 
     def test_column_accessor(self):
         from pyspark.sql.functions import col
diff --git a/python/pyspark/sql/tests/test_conf.py b/python/pyspark/sql/tests/test_conf.py
index 1cc0c1b7562c5..9222e2b8272d6 100644
--- a/python/pyspark/sql/tests/test_conf.py
+++ b/python/pyspark/sql/tests/test_conf.py
@@ -28,7 +28,7 @@ def test_conf(self):
         self.assertEqual(spark.conf.get("bogo"), "ta")
         self.assertEqual(spark.conf.get("bogo", "not.read"), "ta")
         self.assertEqual(spark.conf.get("not.set", "ta"), "ta")
-        self.assertRaisesRegexp(Exception, "not.set", lambda: spark.conf.get("not.set"))
+        self.assertRaisesRegex(Exception, "not.set", lambda: spark.conf.get("not.set"))
         spark.conf.unset("bogo")
         self.assertEqual(spark.conf.get("bogo", "colombia"), "colombia")
 
diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py
index d941707b8969f..e3977e8185180 100644
--- a/python/pyspark/sql/tests/test_dataframe.py
+++ b/python/pyspark/sql/tests/test_dataframe.py
@@ -343,7 +343,7 @@ def test_replace(self):
             self.spark.createDataFrame(
                 [(u'Alice', 10, 80.1)], schema).replace({u"Alice": u"Bob", 10: 20}).first()
 
-        with self.assertRaisesRegexp(
+        with self.assertRaisesRegex(
                 TypeError,
                 'value argument is required when to_replace is not a dictionary.'):
             self.spark.createDataFrame(
@@ -390,7 +390,7 @@ def test_extended_hint_types(self):
         self.assertEqual(3, logical_plan.toString().count("itworks"))
 
     def test_sample(self):
-        self.assertRaisesRegexp(
+        self.assertRaisesRegex(
             TypeError,
             "should be a bool, float and number",
             lambda: self.spark.range(1).sample())
@@ -426,12 +426,12 @@ def test_toDF_with_schema_string(self):
         self.assertEqual(df.collect(), data)
 
         # number of fields must match.
-        self.assertRaisesRegexp(Exception, "Length of object",
-                                lambda: rdd.toDF("key: int").collect())
+        self.assertRaisesRegex(Exception, "Length of object",
+                               lambda: rdd.toDF("key: int").collect())
 
         # field types mismatch will cause exception at runtime.
-        self.assertRaisesRegexp(Exception, "FloatType can not accept",
-                                lambda: rdd.toDF("key: float, value: string").collect())
+        self.assertRaisesRegex(Exception, "FloatType can not accept",
+                               lambda: rdd.toDF("key: float, value: string").collect())
 
         # flat schema values will be wrapped into row.
         df = rdd.map(lambda row: row.key).toDF("int")
@@ -491,15 +491,15 @@ def test_cache(self):
             spark.catalog.clearCache()
             self.assertFalse(spark.catalog.isCached("tab1"))
             self.assertFalse(spark.catalog.isCached("tab2"))
-            self.assertRaisesRegexp(
+            self.assertRaisesRegex(
                 AnalysisException,
                 "does_not_exist",
                 lambda: spark.catalog.isCached("does_not_exist"))
-            self.assertRaisesRegexp(
+            self.assertRaisesRegex(
                 AnalysisException,
                 "does_not_exist",
                 lambda: spark.catalog.cacheTable("does_not_exist"))
-            self.assertRaisesRegexp(
+            self.assertRaisesRegex(
                 AnalysisException,
                 "does_not_exist",
                 lambda: spark.catalog.uncacheTable("does_not_exist"))
@@ -523,12 +523,12 @@ def test_to_pandas(self):
         import numpy as np
         pdf = self._to_pandas()
         types = pdf.dtypes
-        self.assertEquals(types[0], np.int32)
-        self.assertEquals(types[1], np.object)
-        self.assertEquals(types[2], np.bool)
-        self.assertEquals(types[3], np.float32)
-        self.assertEquals(types[4], np.object)  # datetime.date
-        self.assertEquals(types[5], 'datetime64[ns]')
+        self.assertEqual(types[0], np.int32)
+        self.assertEqual(types[1], np.object)
+        self.assertEqual(types[2], np.bool)
+        self.assertEqual(types[3], np.float32)
+        self.assertEqual(types[4], np.object)  # datetime.date
+        self.assertEqual(types[5], 'datetime64[ns]')
 
     @unittest.skipIf(not have_pandas, pandas_requirement_message)  # type: ignore
     def test_to_pandas_with_duplicated_column_names(self):
@@ -540,8 +540,8 @@ def test_to_pandas_with_duplicated_column_names(self):
                 df = self.spark.sql(sql)
                 pdf = df.toPandas()
                 types = pdf.dtypes
-                self.assertEquals(types.iloc[0], np.int32)
-                self.assertEquals(types.iloc[1], np.int32)
+                self.assertEqual(types.iloc[0], np.int32)
+                self.assertEqual(types.iloc[1], np.int32)
 
     @unittest.skipIf(not have_pandas, pandas_requirement_message)  # type: ignore
     def test_to_pandas_on_cross_join(self):
@@ -560,13 +560,13 @@ def test_to_pandas_on_cross_join(self):
                 df = self.spark.sql(sql)
                 pdf = df.toPandas()
                 types = pdf.dtypes
-                self.assertEquals(types.iloc[0], np.int32)
-                self.assertEquals(types.iloc[1], np.int32)
+                self.assertEqual(types.iloc[0], np.int32)
+                self.assertEqual(types.iloc[1], np.int32)
 
     @unittest.skipIf(have_pandas, "Required Pandas was found.")
     def test_to_pandas_required_pandas_not_found(self):
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(ImportError, 'Pandas >= .* must be installed'):
+            with self.assertRaisesRegex(ImportError, 'Pandas >= .* must be installed'):
                 self._to_pandas()
 
     @unittest.skipIf(not have_pandas, pandas_requirement_message)  # type: ignore
@@ -577,9 +577,9 @@ def test_to_pandas_avoid_astype(self):
         data = [(1, "foo", 16777220), (None, "bar", None)]
         df = self.spark.createDataFrame(data, schema)
         types = df.toPandas().dtypes
-        self.assertEquals(types[0], np.float64)  # doesn't convert to np.int32 due to NaN value.
-        self.assertEquals(types[1], np.object)
-        self.assertEquals(types[2], np.float64)
+        self.assertEqual(types[0], np.float64)  # doesn't convert to np.int32 due to NaN value.
+        self.assertEqual(types[1], np.object)
+        self.assertEqual(types[2], np.float64)
 
     @unittest.skipIf(not have_pandas, pandas_requirement_message)  # type: ignore
     def test_to_pandas_from_empty_dataframe(self):
@@ -675,7 +675,7 @@ def test_create_dataframe_from_pandas_with_timestamp(self):
     @unittest.skipIf(have_pandas, "Required Pandas was found.")
     def test_create_dataframe_required_pandas_not_found(self):
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(
+            with self.assertRaisesRegex(
                     ImportError,
                     "(Pandas >= .* must be installed|No module named '?pandas'?)"):
                 import pandas as pd
@@ -688,7 +688,7 @@ def test_create_dataframe_required_pandas_not_found(self):
     @unittest.skipIf(not have_pandas, pandas_requirement_message)  # type: ignore
     def test_create_dataframe_from_pandas_with_dst(self):
         import pandas as pd
-        from pandas.util.testing import assert_frame_equal
+        from pandas.testing import assert_frame_equal
         from datetime import datetime
 
         pdf = pd.DataFrame({'time': [datetime(2015, 10, 31, 22, 30)]})
@@ -724,7 +724,7 @@ def test_repr_behaviors(self):
                 ||22222|22222|
                 |+-----+-----+
                 |"""
-            self.assertEquals(re.sub(pattern, '', expected1), df.__repr__())
+            self.assertEqual(re.sub(pattern, '', expected1), df.__repr__())
             with self.sql_conf({"spark.sql.repl.eagerEval.truncate": 3}):
                 expected2 = """+---+-----+
                 ||key|value|
@@ -733,7 +733,7 @@ def test_repr_behaviors(self):
                 ||222|  222|
                 |+---+-----+
                 |"""
-                self.assertEquals(re.sub(pattern, '', expected2), df.__repr__())
+                self.assertEqual(re.sub(pattern, '', expected2), df.__repr__())
                 with self.sql_conf({"spark.sql.repl.eagerEval.maxNumRows": 1}):
                     expected3 = """+---+-----+
                     ||key|value|
@@ -742,7 +742,7 @@ def test_repr_behaviors(self):
                     |+---+-----+
                     |only showing top 1 row
                     |"""
-                    self.assertEquals(re.sub(pattern, '', expected3), df.__repr__())
+                    self.assertEqual(re.sub(pattern, '', expected3), df.__repr__())
 
         # test when eager evaluation is enabled and _repr_html_ will be called
         with self.sql_conf({"spark.sql.repl.eagerEval.enabled": True}):
@@ -752,7 +752,7 @@ def test_repr_behaviors(self):
                 |<tr><td>22222</td><td>22222</td></tr>
                 |</table>
                 |"""
-            self.assertEquals(re.sub(pattern, '', expected1), df._repr_html_())
+            self.assertEqual(re.sub(pattern, '', expected1), df._repr_html_())
             with self.sql_conf({"spark.sql.repl.eagerEval.truncate": 3}):
                 expected2 = """<table border='1'>
                     |<tr><th>key</th><th>value</th></tr>
@@ -760,7 +760,7 @@ def test_repr_behaviors(self):
                     |<tr><td>222</td><td>222</td></tr>
                     |</table>
                     |"""
-                self.assertEquals(re.sub(pattern, '', expected2), df._repr_html_())
+                self.assertEqual(re.sub(pattern, '', expected2), df._repr_html_())
                 with self.sql_conf({"spark.sql.repl.eagerEval.maxNumRows": 1}):
                     expected3 = """<table border='1'>
                         |<tr><th>key</th><th>value</th></tr>
@@ -768,19 +768,19 @@ def test_repr_behaviors(self):
                         |</table>
                         |only showing top 1 row
                         |"""
-                    self.assertEquals(re.sub(pattern, '', expected3), df._repr_html_())
+                    self.assertEqual(re.sub(pattern, '', expected3), df._repr_html_())
 
         # test when eager evaluation is disabled and _repr_html_ will be called
         with self.sql_conf({"spark.sql.repl.eagerEval.enabled": False}):
             expected = "DataFrame[key: bigint, value: string]"
-            self.assertEquals(None, df._repr_html_())
-            self.assertEquals(expected, df.__repr__())
+            self.assertEqual(None, df._repr_html_())
+            self.assertEqual(expected, df.__repr__())
             with self.sql_conf({"spark.sql.repl.eagerEval.truncate": 3}):
-                self.assertEquals(None, df._repr_html_())
-                self.assertEquals(expected, df.__repr__())
+                self.assertEqual(None, df._repr_html_())
+                self.assertEqual(expected, df.__repr__())
                 with self.sql_conf({"spark.sql.repl.eagerEval.maxNumRows": 1}):
-                    self.assertEquals(None, df._repr_html_())
-                    self.assertEquals(expected, df.__repr__())
+                    self.assertEqual(None, df._repr_html_())
+                    self.assertEqual(expected, df.__repr__())
 
     def test_to_local_iterator(self):
         df = self.spark.range(8, numPartitions=4)
@@ -818,7 +818,7 @@ def test_to_local_iterator_not_fully_consumed(self):
 
     def test_same_semantics_error(self):
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(ValueError, "should be of DataFrame.*int"):
+            with self.assertRaisesRegex(ValueError, "should be of DataFrame.*int"):
                 self.spark.range(10).sameSemantics(1)
 
     def test_input_files(self):
@@ -830,7 +830,7 @@ def test_input_files(self):
             input_files_list = self.spark.read.parquet(tpath).inputFiles()
 
             # input files list should contain 10 entries
-            self.assertEquals(len(input_files_list), 10)
+            self.assertEqual(len(input_files_list), 10)
             # all file paths in list must contain tpath
             for file_path in input_files_list:
                 self.assertTrue(tpath in file_path)
diff --git a/python/pyspark/sql/tests/test_datasources.py b/python/pyspark/sql/tests/test_datasources.py
index 9425494fb0d90..26a6c58dbad6b 100644
--- a/python/pyspark/sql/tests/test_datasources.py
+++ b/python/pyspark/sql/tests/test_datasources.py
@@ -107,7 +107,7 @@ def test_read_text_file_list(self):
         df = self.spark.read.text(['python/test_support/sql/text-test.txt',
                                    'python/test_support/sql/text-test.txt'])
         count = df.count()
-        self.assertEquals(count, 4)
+        self.assertEqual(count, 4)
 
     def test_json_sampling_ratio(self):
         rdd = self.spark.sparkContext.range(0, 100, 1, 1) \
@@ -115,14 +115,14 @@ def test_json_sampling_ratio(self):
         schema = self.spark.read.option('inferSchema', True) \
             .option('samplingRatio', 0.5) \
             .json(rdd).schema
-        self.assertEquals(schema, StructType([StructField("a", LongType(), True)]))
+        self.assertEqual(schema, StructType([StructField("a", LongType(), True)]))
 
     def test_csv_sampling_ratio(self):
         rdd = self.spark.sparkContext.range(0, 100, 1, 1) \
             .map(lambda x: '0.1' if x == 1 else str(x))
         schema = self.spark.read.option('inferSchema', True)\
             .csv(rdd, samplingRatio=0.5).schema
-        self.assertEquals(schema, StructType([StructField("_c0", IntegerType(), True)]))
+        self.assertEqual(schema, StructType([StructField("_c0", IntegerType(), True)]))
 
     def test_checking_csv_header(self):
         path = tempfile.mkdtemp()
@@ -135,7 +135,7 @@ def test_checking_csv_header(self):
                 StructField('f1', IntegerType(), nullable=True)])
             df = self.spark.read.option('header', 'true').schema(schema)\
                 .csv(path, enforceSchema=False)
-            self.assertRaisesRegexp(
+            self.assertRaisesRegex(
                 Exception,
                 "CSV header does not conform to the schema",
                 lambda: df.collect())
@@ -154,7 +154,7 @@ def test_ignore_column_of_all_nulls(self):
                 StructField('b', LongType(), nullable=True),
                 StructField('c', StringType(), nullable=True)])
             readback = self.spark.read.json(path, dropFieldIfAllNull=True)
-            self.assertEquals(readback.schema, schema)
+            self.assertEqual(readback.schema, schema)
         finally:
             shutil.rmtree(path)
 
diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py
index 2858bdeca0d5a..58599a9fa42f5 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -185,7 +185,7 @@ def test_string_functions(self):
         ]
 
         df = self.spark.createDataFrame([['nick']], schema=['name'])
-        self.assertRaisesRegexp(
+        self.assertRaisesRegex(
             TypeError,
             "must be the same type",
             lambda: df.select(col('name').substr(0, lit(1))))
@@ -321,16 +321,16 @@ def test_sort_with_nulls_order(self):
 
         df = self.spark.createDataFrame(
             [('Tom', 80), (None, 60), ('Alice', 50)], ["name", "height"])
-        self.assertEquals(
+        self.assertEqual(
             df.select(df.name).orderBy(functions.asc_nulls_first('name')).collect(),
             [Row(name=None), Row(name=u'Alice'), Row(name=u'Tom')])
-        self.assertEquals(
+        self.assertEqual(
             df.select(df.name).orderBy(functions.asc_nulls_last('name')).collect(),
             [Row(name=u'Alice'), Row(name=u'Tom'), Row(name=None)])
-        self.assertEquals(
+        self.assertEqual(
             df.select(df.name).orderBy(functions.desc_nulls_first('name')).collect(),
             [Row(name=None), Row(name=u'Tom'), Row(name=u'Alice')])
-        self.assertEquals(
+        self.assertEqual(
             df.select(df.name).orderBy(functions.desc_nulls_last('name')).collect(),
             [Row(name=u'Tom'), Row(name=u'Alice'), Row(name=None)])
 
@@ -354,7 +354,7 @@ def test_slice(self):
 
         df = self.spark.createDataFrame([([1, 2, 3],), ([4, 5],)], ['x'])
 
-        self.assertEquals(
+        self.assertEqual(
             df.select(slice(df.x, 2, 2).alias("sliced")).collect(),
             df.select(slice(df.x, lit(2), lit(2)).alias("sliced")).collect(),
         )
@@ -364,7 +364,7 @@ def test_array_repeat(self):
 
         df = self.spark.range(1)
 
-        self.assertEquals(
+        self.assertEqual(
             df.select(array_repeat("id", 3)).toDF("val").collect(),
             df.select(array_repeat("id", lit(3))).toDF("val").collect(),
         )
@@ -580,14 +580,14 @@ def test_datetime_functions(self):
         from datetime import date
         df = self.spark.range(1).selectExpr("'2017-01-22' as dateCol")
         parse_result = df.select(functions.to_date(functions.col("dateCol"))).first()
-        self.assertEquals(date(2017, 1, 22), parse_result['to_date(dateCol)'])
+        self.assertEqual(date(2017, 1, 22), parse_result['to_date(dateCol)'])
 
     def test_assert_true(self):
         from pyspark.sql.functions import assert_true
 
         df = self.spark.range(3)
 
-        self.assertEquals(
+        self.assertEqual(
             df.select(assert_true(df.id < 3)).toDF("val").collect(),
             [Row(val=None), Row(val=None), Row(val=None)],
         )
@@ -604,7 +604,7 @@ def test_assert_true(self):
 
         with self.assertRaises(TypeError) as cm:
             df.select(assert_true(df.id < 2, 5))
-        self.assertEquals(
+        self.assertEqual(
             "errMsg should be a Column or a str, got <class 'int'>",
             str(cm.exception)
         )
@@ -626,7 +626,7 @@ def test_raise_error(self):
 
         with self.assertRaises(TypeError) as cm:
             df.select(raise_error(None))
-        self.assertEquals(
+        self.assertEqual(
             "errMsg should be a Column or a str, got <class 'NoneType'>",
             str(cm.exception)
         )
diff --git a/python/pyspark/sql/tests/test_pandas_cogrouped_map.py b/python/pyspark/sql/tests/test_pandas_cogrouped_map.py
index 4afc1dfcc1c6e..3c016e04adf2e 100644
--- a/python/pyspark/sql/tests/test_pandas_cogrouped_map.py
+++ b/python/pyspark/sql/tests/test_pandas_cogrouped_map.py
@@ -25,7 +25,7 @@
 
 if have_pandas:
     import pandas as pd
-    from pandas.util.testing import assert_frame_equal
+    from pandas.testing import assert_frame_equal
 
 if have_pyarrow:
     import pyarrow as pa  # noqa: F401
@@ -135,8 +135,8 @@ def test_mixed_scalar_udfs_followed_by_cogrouby_apply(self):
             .applyInPandas(lambda x, y: pd.DataFrame([(x.sum().sum(), y.sum().sum())]),
                            'sum1 int, sum2 int').collect()
 
-        self.assertEquals(result[0]['sum1'], 165)
-        self.assertEquals(result[0]['sum2'], 165)
+        self.assertEqual(result[0]['sum1'], 165)
+        self.assertEqual(result[0]['sum2'], 165)
 
     def test_with_key_left(self):
         self._test_with_key(self.data1, self.data1, isLeft=True)
@@ -174,7 +174,7 @@ def test_wrong_return_type(self):
         left = self.data1
         right = self.data2
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(
+            with self.assertRaisesRegex(
                     NotImplementedError,
                     'Invalid return type.*ArrayType.*TimestampType'):
                 left.groupby('id').cogroup(right.groupby('id')).applyInPandas(
@@ -183,7 +183,7 @@ def test_wrong_return_type(self):
     def test_wrong_args(self):
         left = self.data1
         right = self.data2
-        with self.assertRaisesRegexp(ValueError, 'Invalid function'):
+        with self.assertRaisesRegex(ValueError, 'Invalid function'):
             left.groupby('id').cogroup(right.groupby('id')) \
                 .applyInPandas(lambda: 1, StructType([StructField("d", DoubleType())]))
 
@@ -194,14 +194,14 @@ def test_case_insensitive_grouping_column(self):
         row = df1.groupby("ColUmn").cogroup(
             df1.groupby("COLUMN")
         ).applyInPandas(lambda r, l: r + l, "column long, value long").first()
-        self.assertEquals(row.asDict(), Row(column=2, value=2).asDict())
+        self.assertEqual(row.asDict(), Row(column=2, value=2).asDict())
 
         df2 = self.spark.createDataFrame([(1, 1)], ("column", "value"))
 
         row = df1.groupby("ColUmn").cogroup(
             df2.groupby("COLUMN")
         ).applyInPandas(lambda r, l: r + l, "column long, value long").first()
-        self.assertEquals(row.asDict(), Row(column=2, value=2).asDict())
+        self.assertEqual(row.asDict(), Row(column=2, value=2).asDict())
 
     @staticmethod
     def _test_with_key(left, right, isLeft):
diff --git a/python/pyspark/sql/tests/test_pandas_grouped_map.py b/python/pyspark/sql/tests/test_pandas_grouped_map.py
index a639a8d51f55c..64803a6574675 100644
--- a/python/pyspark/sql/tests/test_pandas_grouped_map.py
+++ b/python/pyspark/sql/tests/test_pandas_grouped_map.py
@@ -33,7 +33,7 @@
 
 if have_pandas:
     import pandas as pd
-    from pandas.util.testing import assert_frame_equal
+    from pandas.testing import assert_frame_equal
 
 if have_pyarrow:
     import pyarrow as pa  # noqa: F401
@@ -160,7 +160,7 @@ def test_array_type_correct(self):
     def test_register_grouped_map_udf(self):
         foo_udf = pandas_udf(lambda x: x, "id long", PandasUDFType.GROUPED_MAP)
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(
+            with self.assertRaisesRegex(
                     ValueError,
                     'f.*SQL_BATCHED_UDF.*SQL_SCALAR_PANDAS_UDF.*SQL_GROUPED_AGG_PANDAS_UDF.*'):
                 self.spark.catalog.registerFunction("foo_udf", foo_udf)
@@ -244,7 +244,7 @@ def test_datatype_string(self):
 
     def test_wrong_return_type(self):
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(
+            with self.assertRaisesRegex(
                     NotImplementedError,
                     'Invalid return type.*grouped map Pandas UDF.*ArrayType.*TimestampType'):
                 pandas_udf(
@@ -256,20 +256,20 @@ def test_wrong_args(self):
         df = self.data
 
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(ValueError, 'Invalid udf'):
+            with self.assertRaisesRegex(ValueError, 'Invalid udf'):
                 df.groupby('id').apply(lambda x: x)
-            with self.assertRaisesRegexp(ValueError, 'Invalid udf'):
+            with self.assertRaisesRegex(ValueError, 'Invalid udf'):
                 df.groupby('id').apply(udf(lambda x: x, DoubleType()))
-            with self.assertRaisesRegexp(ValueError, 'Invalid udf'):
+            with self.assertRaisesRegex(ValueError, 'Invalid udf'):
                 df.groupby('id').apply(sum(df.v))
-            with self.assertRaisesRegexp(ValueError, 'Invalid udf'):
+            with self.assertRaisesRegex(ValueError, 'Invalid udf'):
                 df.groupby('id').apply(df.v + 1)
-            with self.assertRaisesRegexp(ValueError, 'Invalid function'):
+            with self.assertRaisesRegex(ValueError, 'Invalid function'):
                 df.groupby('id').apply(
                     pandas_udf(lambda: 1, StructType([StructField("d", DoubleType())])))
-            with self.assertRaisesRegexp(ValueError, 'Invalid udf'):
+            with self.assertRaisesRegex(ValueError, 'Invalid udf'):
                 df.groupby('id').apply(pandas_udf(lambda x, y: x, DoubleType()))
-            with self.assertRaisesRegexp(ValueError, 'Invalid udf.*GROUPED_MAP'):
+            with self.assertRaisesRegex(ValueError, 'Invalid udf.*GROUPED_MAP'):
                 df.groupby('id').apply(
                     pandas_udf(lambda x, y: x, DoubleType(), PandasUDFType.SCALAR))
 
@@ -284,7 +284,7 @@ def test_unsupported_types(self):
         for unsupported_type in unsupported_types:
             schema = StructType([StructField('id', LongType(), True), unsupported_type])
             with QuietTest(self.sc):
-                with self.assertRaisesRegexp(NotImplementedError, common_err_msg):
+                with self.assertRaisesRegex(NotImplementedError, common_err_msg):
                     pandas_udf(lambda x: x, schema, PandasUDFType.GROUPED_MAP)
 
     # Regression test for SPARK-23314
@@ -451,9 +451,9 @@ def invalid_positional_types(pdf):
 
         with self.sql_conf({"spark.sql.execution.pandas.convertToArrowArraySafely": False}):
             with QuietTest(self.sc):
-                with self.assertRaisesRegexp(Exception, "KeyError: 'id'"):
+                with self.assertRaisesRegex(Exception, "KeyError: 'id'"):
                     grouped_df.apply(column_name_typo).collect()
-                with self.assertRaisesRegexp(Exception, "[D|d]ecimal.*got.*date"):
+                with self.assertRaisesRegex(Exception, "[D|d]ecimal.*got.*date"):
                     grouped_df.apply(invalid_positional_types).collect()
 
     def test_positional_assignment_conf(self):
@@ -482,7 +482,7 @@ def dummy_pandas_udf(df):
         # this was throwing an AnalysisException before SPARK-24208
         res = df_with_pandas.alias('temp0').join(df_with_pandas.alias('temp1'),
                                                  col('temp0.key') == col('temp1.key'))
-        self.assertEquals(res.count(), 5)
+        self.assertEqual(res.count(), 5)
 
     def test_mixed_scalar_udfs_followed_by_groupby_apply(self):
         df = self.spark.range(0, 10).toDF('v1')
@@ -494,7 +494,7 @@ def test_mixed_scalar_udfs_followed_by_groupby_apply(self):
                               'sum int',
                               PandasUDFType.GROUPED_MAP))
 
-        self.assertEquals(result.collect()[0]['sum'], 165)
+        self.assertEqual(result.collect()[0]['sum'], 165)
 
     def test_grouped_with_empty_partition(self):
         data = [Row(id=1, x=2), Row(id=1, x=3), Row(id=2, x=4)]
@@ -604,7 +604,7 @@ def my_pandas_udf(pdf):
         df = self.spark.createDataFrame([[1, 1]], ["column", "score"])
         row = df.groupby('COLUMN').applyInPandas(
             my_pandas_udf, schema="column integer, score float").first()
-        self.assertEquals(row.asDict(), Row(column=1, score=0.5).asDict())
+        self.assertEqual(row.asDict(), Row(column=1, score=0.5).asDict())
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/sql/tests/test_pandas_map.py b/python/pyspark/sql/tests/test_pandas_map.py
index 3ca437f75fc23..d53face702201 100644
--- a/python/pyspark/sql/tests/test_pandas_map.py
+++ b/python/pyspark/sql/tests/test_pandas_map.py
@@ -61,7 +61,7 @@ def func(iterator):
         df = self.spark.range(10)
         actual = df.mapInPandas(func, 'id long').collect()
         expected = df.collect()
-        self.assertEquals(actual, expected)
+        self.assertEqual(actual, expected)
 
     def test_multiple_columns(self):
         data = [(1, "foo"), (2, None), (3, "bar"), (4, "bar")]
@@ -75,7 +75,7 @@ def func(iterator):
 
         actual = df.mapInPandas(func, df.schema).collect()
         expected = df.collect()
-        self.assertEquals(actual, expected)
+        self.assertEqual(actual, expected)
 
     def test_different_output_length(self):
         def func(iterator):
@@ -84,7 +84,7 @@ def func(iterator):
 
         df = self.spark.range(10)
         actual = df.repartition(1).mapInPandas(func, 'a long').collect()
-        self.assertEquals(set((r.a for r in actual)), set(range(100)))
+        self.assertEqual(set((r.a for r in actual)), set(range(100)))
 
     def test_empty_iterator(self):
         def empty_iter(_):
@@ -110,7 +110,7 @@ def func(iterator):
         df = self.spark.range(10)
         actual = df.mapInPandas(func, 'id long').mapInPandas(func, 'id long').collect()
         expected = df.collect()
-        self.assertEquals(actual, expected)
+        self.assertEqual(actual, expected)
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/sql/tests/test_pandas_udf.py b/python/pyspark/sql/tests/test_pandas_udf.py
index cc742fc4267cb..975eb4680dd04 100644
--- a/python/pyspark/sql/tests/test_pandas_udf.py
+++ b/python/pyspark/sql/tests/test_pandas_udf.py
@@ -114,31 +114,31 @@ def test_udf_wrong_arg(self):
                 @pandas_udf('blah')
                 def foo(x):
                     return x
-            with self.assertRaisesRegexp(ValueError, 'Invalid return type.*None'):
+            with self.assertRaisesRegex(ValueError, 'Invalid return type.*None'):
                 @pandas_udf(functionType=PandasUDFType.SCALAR)
                 def foo(x):
                     return x
-            with self.assertRaisesRegexp(ValueError, 'Invalid function'):
+            with self.assertRaisesRegex(ValueError, 'Invalid function'):
                 @pandas_udf('double', 100)
                 def foo(x):
                     return x
 
-            with self.assertRaisesRegexp(ValueError, '0-arg pandas_udfs.*not.*supported'):
+            with self.assertRaisesRegex(ValueError, '0-arg pandas_udfs.*not.*supported'):
                 pandas_udf(lambda: 1, LongType(), PandasUDFType.SCALAR)
-            with self.assertRaisesRegexp(ValueError, '0-arg pandas_udfs.*not.*supported'):
+            with self.assertRaisesRegex(ValueError, '0-arg pandas_udfs.*not.*supported'):
                 @pandas_udf(LongType(), PandasUDFType.SCALAR)
                 def zero_with_type():
                     return 1
 
-            with self.assertRaisesRegexp(TypeError, 'Invalid return type'):
+            with self.assertRaisesRegex(TypeError, 'Invalid return type'):
                 @pandas_udf(returnType=PandasUDFType.GROUPED_MAP)
                 def foo(df):
                     return df
-            with self.assertRaisesRegexp(TypeError, 'Invalid return type'):
+            with self.assertRaisesRegex(TypeError, 'Invalid return type'):
                 @pandas_udf(returnType='double', functionType=PandasUDFType.GROUPED_MAP)
                 def foo(df):
                     return df
-            with self.assertRaisesRegexp(ValueError, 'Invalid function'):
+            with self.assertRaisesRegex(ValueError, 'Invalid function'):
                 @pandas_udf(returnType='k int, v double', functionType=PandasUDFType.GROUPED_MAP)
                 def foo(k, v, w):
                     return k
@@ -154,14 +154,14 @@ def foofoo(x, y):
         df = self.spark.range(0, 100)
 
         # plain udf (test for SPARK-23754)
-        self.assertRaisesRegexp(
+        self.assertRaisesRegex(
             PythonException,
             exc_message,
             df.withColumn('v', udf(foo)('id')).collect
         )
 
         # pandas scalar udf
-        self.assertRaisesRegexp(
+        self.assertRaisesRegex(
             PythonException,
             exc_message,
             df.withColumn(
@@ -170,7 +170,7 @@ def foofoo(x, y):
         )
 
         # pandas grouped map
-        self.assertRaisesRegexp(
+        self.assertRaisesRegex(
             PythonException,
             exc_message,
             df.groupBy('id').apply(
@@ -178,7 +178,7 @@ def foofoo(x, y):
             ).collect
         )
 
-        self.assertRaisesRegexp(
+        self.assertRaisesRegex(
             PythonException,
             exc_message,
             df.groupBy('id').apply(
@@ -187,7 +187,7 @@ def foofoo(x, y):
         )
 
         # pandas grouped agg
-        self.assertRaisesRegexp(
+        self.assertRaisesRegex(
             PythonException,
             exc_message,
             df.groupBy('id').agg(
@@ -210,8 +210,8 @@ def udf(column):
         # Since 0.11.0, PyArrow supports the feature to raise an error for unsafe cast.
         with self.sql_conf({
                 "spark.sql.execution.pandas.convertToArrowArraySafely": True}):
-            with self.assertRaisesRegexp(Exception,
-                                         "Exception thrown when converting pandas.Series"):
+            with self.assertRaisesRegex(Exception,
+                                        "Exception thrown when converting pandas.Series"):
                 df.select(['A']).withColumn('udf', udf('A')).collect()
 
         # Disabling Arrow safe type check.
@@ -231,8 +231,8 @@ def udf(column):
         # When enabling safe type check, Arrow 0.11.0+ disallows overflow cast.
         with self.sql_conf({
                 "spark.sql.execution.pandas.convertToArrowArraySafely": True}):
-            with self.assertRaisesRegexp(Exception,
-                                         "Exception thrown when converting pandas.Series"):
+            with self.assertRaisesRegex(Exception,
+                                        "Exception thrown when converting pandas.Series"):
                 df.withColumn('udf', udf('id')).collect()
 
         # Disabling safe type check, let Arrow do the cast anyway.
diff --git a/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py b/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py
index 2cbcf31f6e7b3..b49092ed70d04 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py
@@ -30,7 +30,7 @@
 
 if have_pandas:
     import pandas as pd
-    from pandas.util.testing import assert_frame_equal
+    from pandas.testing import assert_frame_equal
 
 
 @unittest.skipIf(
@@ -145,20 +145,20 @@ def test_basic(self):
 
     def test_unsupported_types(self):
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(NotImplementedError, 'not supported'):
+            with self.assertRaisesRegex(NotImplementedError, 'not supported'):
                 pandas_udf(
                     lambda x: x,
                     ArrayType(ArrayType(TimestampType())),
                     PandasUDFType.GROUPED_AGG)
 
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(NotImplementedError, 'not supported'):
+            with self.assertRaisesRegex(NotImplementedError, 'not supported'):
                 @pandas_udf('mean double, std double', PandasUDFType.GROUPED_AGG)
                 def mean_and_std_udf(v):
                     return v.mean(), v.std()
 
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(NotImplementedError, 'not supported'):
+            with self.assertRaisesRegex(NotImplementedError, 'not supported'):
                 @pandas_udf(ArrayType(TimestampType()), PandasUDFType.GROUPED_AGG)
                 def mean_and_std_udf(v):
                     return {v.mean(): v.std()}
@@ -428,7 +428,7 @@ def test_array_type(self):
 
         array_udf = pandas_udf(lambda x: [1.0, 2.0], 'array<double>', PandasUDFType.GROUPED_AGG)
         result1 = df.groupby('id').agg(array_udf(df['v']).alias('v2'))
-        self.assertEquals(result1.first()['v2'], [1.0, 2.0])
+        self.assertEqual(result1.first()['v2'], [1.0, 2.0])
 
     def test_invalid_args(self):
         df = self.data
@@ -436,19 +436,19 @@ def test_invalid_args(self):
         mean_udf = self.pandas_agg_mean_udf
 
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(
+            with self.assertRaisesRegex(
                     AnalysisException,
                     'nor.*aggregate function'):
                 df.groupby(df.id).agg(plus_one(df.v)).collect()
 
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(
+            with self.assertRaisesRegex(
                     AnalysisException,
                     'aggregate function.*argument.*aggregate function'):
                 df.groupby(df.id).agg(mean_udf(mean_udf(df.v))).collect()
 
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(
+            with self.assertRaisesRegex(
                     AnalysisException,
                     'mixture.*aggregate function.*group aggregate pandas UDF'):
                 df.groupby(df.id).agg(mean_udf(df.v), mean(df.v)).collect()
diff --git a/python/pyspark/sql/tests/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
index 5da5d043ceca4..2eb2dec00106e 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_scalar.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
@@ -133,7 +133,7 @@ def test_vectorized_udf_basic(self):
                             long_f(col('long')), float_f(col('float')),
                             double_f(col('double')), decimal_f('decimal'),
                             bool_f(col('bool')), array_long_f('array_long'))
-            self.assertEquals(df.collect(), res.collect())
+            self.assertEqual(df.collect(), res.collect())
 
     def test_register_nondeterministic_vectorized_udf_basic(self):
         random_pandas_udf = pandas_udf(
@@ -169,7 +169,7 @@ def test_vectorized_udf_null_boolean(self):
         for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
             bool_f = pandas_udf(lambda x: x, BooleanType(), udf_type)
             res = df.select(bool_f(col('bool')))
-            self.assertEquals(df.collect(), res.collect())
+            self.assertEqual(df.collect(), res.collect())
 
     def test_vectorized_udf_null_byte(self):
         data = [(None,), (2,), (3,), (4,)]
@@ -178,7 +178,7 @@ def test_vectorized_udf_null_byte(self):
         for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
             byte_f = pandas_udf(lambda x: x, ByteType(), udf_type)
             res = df.select(byte_f(col('byte')))
-            self.assertEquals(df.collect(), res.collect())
+            self.assertEqual(df.collect(), res.collect())
 
     def test_vectorized_udf_null_short(self):
         data = [(None,), (2,), (3,), (4,)]
@@ -187,7 +187,7 @@ def test_vectorized_udf_null_short(self):
         for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
             short_f = pandas_udf(lambda x: x, ShortType(), udf_type)
             res = df.select(short_f(col('short')))
-            self.assertEquals(df.collect(), res.collect())
+            self.assertEqual(df.collect(), res.collect())
 
     def test_vectorized_udf_null_int(self):
         data = [(None,), (2,), (3,), (4,)]
@@ -196,7 +196,7 @@ def test_vectorized_udf_null_int(self):
         for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
             int_f = pandas_udf(lambda x: x, IntegerType(), udf_type)
             res = df.select(int_f(col('int')))
-            self.assertEquals(df.collect(), res.collect())
+            self.assertEqual(df.collect(), res.collect())
 
     def test_vectorized_udf_null_long(self):
         data = [(None,), (2,), (3,), (4,)]
@@ -205,7 +205,7 @@ def test_vectorized_udf_null_long(self):
         for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
             long_f = pandas_udf(lambda x: x, LongType(), udf_type)
             res = df.select(long_f(col('long')))
-            self.assertEquals(df.collect(), res.collect())
+            self.assertEqual(df.collect(), res.collect())
 
     def test_vectorized_udf_null_float(self):
         data = [(3.0,), (5.0,), (-1.0,), (None,)]
@@ -214,7 +214,7 @@ def test_vectorized_udf_null_float(self):
         for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
             float_f = pandas_udf(lambda x: x, FloatType(), udf_type)
             res = df.select(float_f(col('float')))
-            self.assertEquals(df.collect(), res.collect())
+            self.assertEqual(df.collect(), res.collect())
 
     def test_vectorized_udf_null_double(self):
         data = [(3.0,), (5.0,), (-1.0,), (None,)]
@@ -223,7 +223,7 @@ def test_vectorized_udf_null_double(self):
         for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
             double_f = pandas_udf(lambda x: x, DoubleType(), udf_type)
             res = df.select(double_f(col('double')))
-            self.assertEquals(df.collect(), res.collect())
+            self.assertEqual(df.collect(), res.collect())
 
     def test_vectorized_udf_null_decimal(self):
         data = [(Decimal(3.0),), (Decimal(5.0),), (Decimal(-1.0),), (None,)]
@@ -232,7 +232,7 @@ def test_vectorized_udf_null_decimal(self):
         for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
             decimal_f = pandas_udf(lambda x: x, DecimalType(38, 18), udf_type)
             res = df.select(decimal_f(col('decimal')))
-            self.assertEquals(df.collect(), res.collect())
+            self.assertEqual(df.collect(), res.collect())
 
     def test_vectorized_udf_null_string(self):
         data = [("foo",), (None,), ("bar",), ("bar",)]
@@ -241,7 +241,7 @@ def test_vectorized_udf_null_string(self):
         for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
             str_f = pandas_udf(lambda x: x, StringType(), udf_type)
             res = df.select(str_f(col('str')))
-            self.assertEquals(df.collect(), res.collect())
+            self.assertEqual(df.collect(), res.collect())
 
     def test_vectorized_udf_string_in_udf(self):
         df = self.spark.range(10)
@@ -255,7 +255,7 @@ def iter_f(it):
             str_f = pandas_udf(f, StringType(), udf_type)
             actual = df.select(str_f(col('id')))
             expected = df.select(col('id').cast('string'))
-            self.assertEquals(expected.collect(), actual.collect())
+            self.assertEqual(expected.collect(), actual.collect())
 
     def test_vectorized_udf_datatype_string(self):
         df = self.spark.range(10).select(
@@ -279,7 +279,7 @@ def test_vectorized_udf_datatype_string(self):
                             long_f(col('long')), float_f(col('float')),
                             double_f(col('double')), decimal_f('decimal'),
                             bool_f(col('bool')))
-            self.assertEquals(df.collect(), res.collect())
+            self.assertEqual(df.collect(), res.collect())
 
     def test_vectorized_udf_null_binary(self):
         data = [(bytearray(b"a"),), (None,), (bytearray(b"bb"),), (bytearray(b"ccc"),)]
@@ -288,7 +288,7 @@ def test_vectorized_udf_null_binary(self):
         for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
             str_f = pandas_udf(lambda x: x, BinaryType(), udf_type)
             res = df.select(str_f(col('binary')))
-            self.assertEquals(df.collect(), res.collect())
+            self.assertEqual(df.collect(), res.collect())
 
     def test_vectorized_udf_array_type(self):
         data = [([1, 2],), ([3, 4],)]
@@ -297,7 +297,7 @@ def test_vectorized_udf_array_type(self):
         for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
             array_f = pandas_udf(lambda x: x, ArrayType(IntegerType()), udf_type)
             result = df.select(array_f(col('array')))
-            self.assertEquals(df.collect(), result.collect())
+            self.assertEqual(df.collect(), result.collect())
 
     def test_vectorized_udf_null_array(self):
         data = [([1, 2],), (None,), (None,), ([3, 4],), (None,)]
@@ -306,7 +306,7 @@ def test_vectorized_udf_null_array(self):
         for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
             array_f = pandas_udf(lambda x: x, ArrayType(IntegerType()), udf_type)
             result = df.select(array_f(col('array')))
-            self.assertEquals(df.collect(), result.collect())
+            self.assertEqual(df.collect(), result.collect())
 
     def test_vectorized_udf_struct_type(self):
         df = self.spark.range(10)
@@ -375,7 +375,7 @@ def test_vectorized_udf_nested_struct(self):
 
         for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
             with QuietTest(self.sc):
-                with self.assertRaisesRegexp(
+                with self.assertRaisesRegex(
                         Exception,
                         'Invalid return type with scalar Pandas UDFs'):
                     pandas_udf(lambda x: x, returnType=nested_type, functionType=udf_type)
@@ -392,7 +392,7 @@ def test_vectorized_udf_map_type(self):
             else:
                 map_f = pandas_udf(lambda x: x, MapType(StringType(), LongType()), udf_type)
                 result = df.select(map_f(col('map')))
-                self.assertEquals(df.collect(), result.collect())
+                self.assertEqual(df.collect(), result.collect())
 
     def test_vectorized_udf_complex(self):
         df = self.spark.range(10).select(
@@ -422,7 +422,7 @@ def iter_mul(it):
                                  (iter_add, iter_power2, iter_mul)]:
             res = df.select(add(col('a'), col('b')), power2(col('a')), mul(col('b'), col('c')))
             expected = df.select(expr('a + b'), expr('power(2, a)'), expr('b * c'))
-            self.assertEquals(expected.collect(), res.collect())
+            self.assertEqual(expected.collect(), res.collect())
 
     def test_vectorized_udf_exception(self):
         df = self.spark.range(10)
@@ -435,14 +435,14 @@ def iter_raise_exception(it):
 
         for raise_exception in [scalar_raise_exception, iter_raise_exception]:
             with QuietTest(self.sc):
-                with self.assertRaisesRegexp(Exception, 'division( or modulo)? by zero'):
+                with self.assertRaisesRegex(Exception, 'division( or modulo)? by zero'):
                     df.select(raise_exception(col('id'))).collect()
 
     def test_vectorized_udf_invalid_length(self):
         df = self.spark.range(10)
         raise_exception = pandas_udf(lambda _: pd.Series(1), LongType())
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(
+            with self.assertRaisesRegex(
                     Exception,
                     'Result vector from pandas_udf was not the required length'):
                 df.select(raise_exception(col('id'))).collect()
@@ -453,7 +453,7 @@ def iter_udf_wong_output_size(it):
                 yield pd.Series(1)
 
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(
+            with self.assertRaisesRegex(
                     Exception,
                     "The length of output in Scalar iterator.*"
                     "the length of output was 1"):
@@ -469,7 +469,7 @@ def iter_udf_not_reading_all_input(it):
         with self.sql_conf({"spark.sql.execution.arrow.maxRecordsPerBatch": 3}):
             df1 = self.spark.range(10).repartition(1)
             with QuietTest(self.sc):
-                with self.assertRaisesRegexp(
+                with self.assertRaisesRegex(
                         Exception,
                         "pandas iterator UDF should exhaust"):
                     df1.select(iter_udf_not_reading_all_input(col('id'))).collect()
@@ -486,7 +486,7 @@ def test_vectorized_udf_chained(self):
 
         for f, g in [(scalar_f, scalar_g), (iter_f, iter_g)]:
             res = df.select(g(f(col('id'))))
-            self.assertEquals(df.collect(), res.collect())
+            self.assertEqual(df.collect(), res.collect())
 
     def test_vectorized_udf_chained_struct_type(self):
         df = self.spark.range(10)
@@ -517,7 +517,7 @@ def iter_f(it):
     def test_vectorized_udf_wrong_return_type(self):
         with QuietTest(self.sc):
             for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
-                with self.assertRaisesRegexp(
+                with self.assertRaisesRegex(
                         NotImplementedError,
                         'Invalid return type.*scalar Pandas UDF.*ArrayType.*TimestampType'):
                     pandas_udf(lambda x: x, ArrayType(TimestampType()), udf_type)
@@ -529,7 +529,7 @@ def test_vectorized_udf_return_scalar(self):
                             PandasUDFType.SCALAR_ITER)
         for f in [scalar_f, iter_f]:
             with QuietTest(self.sc):
-                with self.assertRaisesRegexp(Exception, 'Return.*type.*Series'):
+                with self.assertRaisesRegex(Exception, 'Return.*type.*Series'):
                     df.select(f(col('id'))).collect()
 
     def test_vectorized_udf_decorator(self):
@@ -545,14 +545,14 @@ def iter_identity(x):
 
         for identity in [scalar_identity, iter_identity]:
             res = df.select(identity(col('id')))
-            self.assertEquals(df.collect(), res.collect())
+            self.assertEqual(df.collect(), res.collect())
 
     def test_vectorized_udf_empty_partition(self):
         df = self.spark.createDataFrame(self.sc.parallelize([Row(id=1)], 2))
         for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
             f = pandas_udf(lambda x: x, LongType(), udf_type)
             res = df.select(f(col('id')))
-            self.assertEquals(df.collect(), res.collect())
+            self.assertEqual(df.collect(), res.collect())
 
     def test_vectorized_udf_struct_with_empty_partition(self):
         df = self.spark.createDataFrame(self.sc.parallelize([Row(id=1)], 2))\
@@ -585,16 +585,16 @@ def iter_f(it):
 
         for f in [scalar_f, iter_f]:
             res = df.select(f(col('id'), col('id')))
-            self.assertEquals(df.collect(), res.collect())
+            self.assertEqual(df.collect(), res.collect())
 
     def test_vectorized_udf_unsupported_types(self):
         with QuietTest(self.sc):
             for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
-                with self.assertRaisesRegexp(
+                with self.assertRaisesRegex(
                         NotImplementedError,
                         'Invalid return type.*scalar Pandas UDF.*ArrayType.*TimestampType'):
                     pandas_udf(lambda x: x, ArrayType(TimestampType()), udf_type)
-                with self.assertRaisesRegexp(
+                with self.assertRaisesRegex(
                         NotImplementedError,
                         'Invalid return type.*scalar Pandas UDF.*ArrayType.StructType'):
                     pandas_udf(lambda x: x,
@@ -637,10 +637,10 @@ def iter_check_data(it):
             result = df.withColumn("check_data",
                                    check_data(col("idx"), col("date"), col("date_copy"))).collect()
 
-            self.assertEquals(len(data), len(result))
+            self.assertEqual(len(data), len(result))
             for i in range(len(result)):
-                self.assertEquals(data[i][1], result[i][1])  # "date" col
-                self.assertEquals(data[i][1], result[i][2])  # "date_copy" col
+                self.assertEqual(data[i][1], result[i][1])  # "date" col
+                self.assertEqual(data[i][1], result[i][2])  # "date_copy" col
                 self.assertIsNone(result[i][3])  # "check_data" col
 
     def test_vectorized_udf_timestamps(self):
@@ -686,10 +686,10 @@ def iter_check_data(it):
             result = df.withColumn("check_data", check_data(col("idx"), col("timestamp"),
                                                             col("timestamp_copy"))).collect()
             # Check that collection values are correct
-            self.assertEquals(len(data), len(result))
+            self.assertEqual(len(data), len(result))
             for i in range(len(result)):
-                self.assertEquals(data[i][1], result[i][1])  # "timestamp" col
-                self.assertEquals(data[i][1], result[i][2])  # "timestamp_copy" col
+                self.assertEqual(data[i][1], result[i][1])  # "timestamp" col
+                self.assertEqual(data[i][1], result[i][2])  # "timestamp_copy" col
                 self.assertIsNone(result[i][3])  # "check_data" col
 
     def test_vectorized_udf_return_timestamp_tz(self):
@@ -713,7 +713,7 @@ def iter_gen_timestamps(it):
                 i, ts = r
                 ts_tz = pd.Timestamp(i, unit='D', tz='America/Los_Angeles').to_pydatetime()
                 expected = spark_ts_t.fromInternal(spark_ts_t.toInternal(ts_tz))
-                self.assertEquals(expected, ts)
+                self.assertEqual(expected, ts)
 
     def test_vectorized_udf_check_config(self):
         with self.sql_conf({"spark.sql.execution.arrow.maxRecordsPerBatch": 3}):
@@ -799,9 +799,9 @@ def test_nondeterministic_vectorized_udf_in_aggregate(self):
         for random_udf in [self.nondeterministic_vectorized_udf,
                            self.nondeterministic_vectorized_iter_udf]:
             with QuietTest(self.sc):
-                with self.assertRaisesRegexp(AnalysisException, 'nondeterministic'):
+                with self.assertRaisesRegex(AnalysisException, 'nondeterministic'):
                     df.groupby(df.id).agg(sum(random_udf(df.id))).collect()
-                with self.assertRaisesRegexp(AnalysisException, 'nondeterministic'):
+                with self.assertRaisesRegex(AnalysisException, 'nondeterministic'):
                     df.agg(sum(random_udf(df.id))).collect()
 
     def test_register_vectorized_udf_basic(self):
@@ -825,8 +825,8 @@ def iter_original_add(it):
             res2 = self.spark.sql(
                 "SELECT add1(t.a, t.b) FROM (SELECT id as a, id as b FROM range(10)) t")
             expected = df.select(expr('a + b'))
-            self.assertEquals(expected.collect(), res1.collect())
-            self.assertEquals(expected.collect(), res2.collect())
+            self.assertEqual(expected.collect(), res1.collect())
+            self.assertEqual(expected.collect(), res2.collect())
 
     def test_scalar_iter_udf_init(self):
         import numpy as np
@@ -854,7 +854,7 @@ def test_close(batch_iter):
             finally:
                 raise RuntimeError("reached finally block")
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(Exception, "reached finally block"):
+            with self.assertRaisesRegex(Exception, "reached finally block"):
                 self.spark.range(1).select(test_close(col("id"))).collect()
 
     def test_scalar_iter_udf_close_early(self):
@@ -905,7 +905,7 @@ def test_timestamp_dst(self):
         for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
             foo_udf = pandas_udf(lambda x: x, 'timestamp', udf_type)
             result = df.withColumn('time', foo_udf(df.time))
-            self.assertEquals(df.collect(), result.collect())
+            self.assertEqual(df.collect(), result.collect())
 
     def test_udf_category_type(self):
 
@@ -1003,11 +1003,11 @@ def f4_iter(it):
             df_chained_4 = df.withColumn('f4_f2_f1', f4(f2(f1(df['v']))))
             df_chained_5 = df.withColumn('f4_f3_f1', f4(f3(f1(df['v']))))
 
-            self.assertEquals(expected_chained_1, df_chained_1.collect())
-            self.assertEquals(expected_chained_2, df_chained_2.collect())
-            self.assertEquals(expected_chained_3, df_chained_3.collect())
-            self.assertEquals(expected_chained_4, df_chained_4.collect())
-            self.assertEquals(expected_chained_5, df_chained_5.collect())
+            self.assertEqual(expected_chained_1, df_chained_1.collect())
+            self.assertEqual(expected_chained_2, df_chained_2.collect())
+            self.assertEqual(expected_chained_3, df_chained_3.collect())
+            self.assertEqual(expected_chained_4, df_chained_4.collect())
+            self.assertEqual(expected_chained_5, df_chained_5.collect())
 
             # Test multiple mixed UDF expressions in a single projection
             df_multi_1 = df \
@@ -1045,8 +1045,8 @@ def f4_iter(it):
                 .withColumn('f4_f3_f2', f4(f3(f2(col('v'))))) \
                 .withColumn('f4_f3_f2_f1', f4(f3(f2(f1(col('v'))))))
 
-            self.assertEquals(expected_multi, df_multi_1.collect())
-            self.assertEquals(expected_multi, df_multi_2.collect())
+            self.assertEqual(expected_multi, df_multi_1.collect())
+            self.assertEqual(expected_multi, df_multi_2.collect())
 
     def test_mixed_udf_and_sql(self):
         df = self.spark.range(0, 1).toDF('v')
@@ -1107,7 +1107,7 @@ def f3i(it):
                 .withColumn('f3_f1_f2', f3(f1(f2(df['v'])))) \
                 .withColumn('f3_f2_f1', f3(f2(f1(df['v']))))
 
-            self.assertEquals(expected, df1.collect())
+            self.assertEqual(expected, df1.collect())
 
     # SPARK-24721
     @unittest.skipIf(not test_compiled, test_not_compiled_message)  # type: ignore
@@ -1138,17 +1138,17 @@ def test_datasource_with_udf(self):
             for df in [filesource_df, datasource_df, datasource_v2_df]:
                 result = df.withColumn('c', c1)
                 expected = df.withColumn('c', lit(2))
-                self.assertEquals(expected.collect(), result.collect())
+                self.assertEqual(expected.collect(), result.collect())
 
             for df in [filesource_df, datasource_df, datasource_v2_df]:
                 result = df.withColumn('c', c2)
                 expected = df.withColumn('c', col('i') + 1)
-                self.assertEquals(expected.collect(), result.collect())
+                self.assertEqual(expected.collect(), result.collect())
 
             for df in [filesource_df, datasource_df, datasource_v2_df]:
                 for f in [f1, f2]:
                     result = df.filter(f)
-                    self.assertEquals(0, result.count())
+                    self.assertEqual(0, result.count())
         finally:
             shutil.rmtree(path)
 
diff --git a/python/pyspark/sql/tests/test_pandas_udf_typehints.py b/python/pyspark/sql/tests/test_pandas_udf_typehints.py
index d9717da4d2fbd..e30f43181ae96 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_typehints.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_typehints.py
@@ -29,7 +29,7 @@
 if have_pandas:
     import pandas as pd
     import numpy as np
-    from pandas.util.testing import assert_frame_equal
+    from pandas.testing import assert_frame_equal
 
 
 @unittest.skipIf(
diff --git a/python/pyspark/sql/tests/test_pandas_udf_window.py b/python/pyspark/sql/tests/test_pandas_udf_window.py
index 5ad2ecd8f85d4..d861bcce9e8b8 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_window.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_window.py
@@ -26,7 +26,7 @@
 from pyspark.testing.utils import QuietTest
 
 if have_pandas:
-    from pandas.util.testing import assert_frame_equal
+    from pandas.testing import assert_frame_equal
 
 
 @unittest.skipIf(
@@ -241,14 +241,14 @@ def test_array_type(self):
 
         array_udf = pandas_udf(lambda x: [1.0, 2.0], 'array<double>', PandasUDFType.GROUPED_AGG)
         result1 = df.withColumn('v2', array_udf(df['v']).over(w))
-        self.assertEquals(result1.first()['v2'], [1.0, 2.0])
+        self.assertEqual(result1.first()['v2'], [1.0, 2.0])
 
     def test_invalid_args(self):
         df = self.data
         w = self.unbounded_window
 
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(
+            with self.assertRaisesRegex(
                     AnalysisException,
                     '.*not supported within a window function'):
                 foo_udf = pandas_udf(lambda x: x, 'v double', PandasUDFType.GROUPED_MAP)
diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py
index 6b5c1ad6c4e46..eb4caf05d1af0 100644
--- a/python/pyspark/sql/tests/test_types.py
+++ b/python/pyspark/sql/tests/test_types.py
@@ -180,7 +180,7 @@ def test_infer_schema_not_enough_names(self):
         self.assertEqual(df.columns, ['col1', '_2'])
 
     def test_infer_schema_fails(self):
-        with self.assertRaisesRegexp(TypeError, 'field a'):
+        with self.assertRaisesRegex(TypeError, 'field a'):
             self.spark.createDataFrame(self.spark.sparkContext.parallelize([[1, 1], ["x", 1]]),
                                        schema=["a", "b"], samplingRatio=0.99)
 
@@ -578,18 +578,18 @@ def test_merge_type(self):
             ArrayType(LongType()),
             ArrayType(LongType())
         ), ArrayType(LongType()))
-        with self.assertRaisesRegexp(TypeError, 'element in array'):
+        with self.assertRaisesRegex(TypeError, 'element in array'):
             _merge_type(ArrayType(LongType()), ArrayType(DoubleType()))
 
         self.assertEqual(_merge_type(
             MapType(StringType(), LongType()),
             MapType(StringType(), LongType())
         ), MapType(StringType(), LongType()))
-        with self.assertRaisesRegexp(TypeError, 'key of map'):
+        with self.assertRaisesRegex(TypeError, 'key of map'):
             _merge_type(
                 MapType(StringType(), LongType()),
                 MapType(DoubleType(), LongType()))
-        with self.assertRaisesRegexp(TypeError, 'value of map'):
+        with self.assertRaisesRegex(TypeError, 'value of map'):
             _merge_type(
                 MapType(StringType(), LongType()),
                 MapType(StringType(), DoubleType()))
@@ -598,7 +598,7 @@ def test_merge_type(self):
             StructType([StructField("f1", LongType()), StructField("f2", StringType())]),
             StructType([StructField("f1", LongType()), StructField("f2", StringType())])
         ), StructType([StructField("f1", LongType()), StructField("f2", StringType())]))
-        with self.assertRaisesRegexp(TypeError, 'field f1'):
+        with self.assertRaisesRegex(TypeError, 'field f1'):
             _merge_type(
                 StructType([StructField("f1", LongType()), StructField("f2", StringType())]),
                 StructType([StructField("f1", DoubleType()), StructField("f2", StringType())]))
@@ -607,7 +607,7 @@ def test_merge_type(self):
             StructType([StructField("f1", StructType([StructField("f2", LongType())]))]),
             StructType([StructField("f1", StructType([StructField("f2", LongType())]))])
         ), StructType([StructField("f1", StructType([StructField("f2", LongType())]))]))
-        with self.assertRaisesRegexp(TypeError, 'field f2 in field f1'):
+        with self.assertRaisesRegex(TypeError, 'field f2 in field f1'):
             _merge_type(
                 StructType([StructField("f1", StructType([StructField("f2", LongType())]))]),
                 StructType([StructField("f1", StructType([StructField("f2", StringType())]))]))
@@ -616,7 +616,7 @@ def test_merge_type(self):
             StructType([StructField("f1", ArrayType(LongType())), StructField("f2", StringType())]),
             StructType([StructField("f1", ArrayType(LongType())), StructField("f2", StringType())])
         ), StructType([StructField("f1", ArrayType(LongType())), StructField("f2", StringType())]))
-        with self.assertRaisesRegexp(TypeError, 'element in array field f1'):
+        with self.assertRaisesRegex(TypeError, 'element in array field f1'):
             _merge_type(
                 StructType([
                     StructField("f1", ArrayType(LongType())),
@@ -635,7 +635,7 @@ def test_merge_type(self):
         ), StructType([
             StructField("f1", MapType(StringType(), LongType())),
             StructField("f2", StringType())]))
-        with self.assertRaisesRegexp(TypeError, 'value of map field f1'):
+        with self.assertRaisesRegex(TypeError, 'value of map field f1'):
             _merge_type(
                 StructType([
                     StructField("f1", MapType(StringType(), LongType())),
@@ -648,7 +648,7 @@ def test_merge_type(self):
             StructType([StructField("f1", ArrayType(MapType(StringType(), LongType())))]),
             StructType([StructField("f1", ArrayType(MapType(StringType(), LongType())))])
         ), StructType([StructField("f1", ArrayType(MapType(StringType(), LongType())))]))
-        with self.assertRaisesRegexp(TypeError, 'key of map element in array field f1'):
+        with self.assertRaisesRegex(TypeError, 'key of map element in array field f1'):
             _merge_type(
                 StructType([StructField("f1", ArrayType(MapType(StringType(), LongType())))]),
                 StructType([StructField("f1", ArrayType(MapType(DoubleType(), LongType())))])
@@ -734,7 +734,7 @@ def assertCollectSuccess(typecode, value):
         unsupported_types = all_types - set(supported_types)
         # test unsupported types
         for t in unsupported_types:
-            with self.assertRaisesRegexp(TypeError, "infer the type of the field myarray"):
+            with self.assertRaisesRegex(TypeError, "infer the type of the field myarray"):
                 a = array.array(t)
                 self.spark.createDataFrame([Row(myarray=a)]).collect()
 
@@ -789,13 +789,13 @@ def test_invalid_create_row(self):
 class DataTypeVerificationTests(unittest.TestCase):
 
     def test_verify_type_exception_msg(self):
-        self.assertRaisesRegexp(
+        self.assertRaisesRegex(
             ValueError,
             "test_name",
             lambda: _make_type_verifier(StringType(), nullable=False, name="test_name")(None))
 
         schema = StructType([StructField('a', StructType([StructField('b', IntegerType())]))])
-        self.assertRaisesRegexp(
+        self.assertRaisesRegex(
             TypeError,
             "field b in field a",
             lambda: _make_type_verifier(schema)([["data"]]))
diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py
index 9a1c0edcce4ed..bfc55dff94540 100644
--- a/python/pyspark/sql/tests/test_udf.py
+++ b/python/pyspark/sql/tests/test_udf.py
@@ -98,7 +98,7 @@ def test_udf_registration_return_type_none(self):
 
     def test_udf_registration_return_type_not_none(self):
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(TypeError, "Invalid return type"):
+            with self.assertRaisesRegex(TypeError, "Invalid return type"):
                 self.spark.catalog.registerFunction(
                     "f", UserDefinedFunction(lambda x, y: len(x) + y, StringType()), StringType())
 
@@ -149,9 +149,9 @@ def test_nondeterministic_udf_in_aggregate(self):
         df = self.spark.range(10)
 
         with QuietTest(self.sc):
-            with self.assertRaisesRegexp(AnalysisException, "nondeterministic"):
+            with self.assertRaisesRegex(AnalysisException, "nondeterministic"):
                 df.groupby('id').agg(sum(udf_random_col())).collect()
-            with self.assertRaisesRegexp(AnalysisException, "nondeterministic"):
+            with self.assertRaisesRegex(AnalysisException, "nondeterministic"):
                 df.agg(sum(udf_random_col())).collect()
 
     def test_chained_udf(self):
@@ -203,7 +203,7 @@ def test_udf_in_join_condition(self):
         # Cross join.
         df = left.join(right, f("a", "b"))
         with self.sql_conf({"spark.sql.crossJoin.enabled": False}):
-            with self.assertRaisesRegexp(AnalysisException, 'Detected implicit cartesian product'):
+            with self.assertRaisesRegex(AnalysisException, 'Detected implicit cartesian product'):
                 df.collect()
         with self.sql_conf({"spark.sql.crossJoin.enabled": True}):
             self.assertEqual(df.collect(), [Row(a=1, b=1)])
@@ -238,7 +238,7 @@ def test_udf_not_supported_in_join_condition(self):
         f = udf(lambda a, b: a == b, BooleanType())
 
         def runWithJoinType(join_type, type_string):
-            with self.assertRaisesRegexp(
+            with self.assertRaisesRegex(
                     AnalysisException,
                     'Using PythonUDF.*%s is not supported.' % type_string):
                 left.join(right, [f("a", "b"), left.a1 == right.b1], join_type).collect()
@@ -385,18 +385,18 @@ def test_register_java_udaf(self):
 
     def test_non_existed_udf(self):
         spark = self.spark
-        self.assertRaisesRegexp(AnalysisException, "Can not load class non_existed_udf",
-                                lambda: spark.udf.registerJavaFunction("udf1", "non_existed_udf"))
+        self.assertRaisesRegex(AnalysisException, "Can not load class non_existed_udf",
+                               lambda: spark.udf.registerJavaFunction("udf1", "non_existed_udf"))
 
         # This is to check if a deprecated 'SQLContext.registerJavaFunction' can call its alias.
         sqlContext = spark._wrapped
-        self.assertRaisesRegexp(AnalysisException, "Can not load class non_existed_udf",
-                                lambda: sqlContext.registerJavaFunction("udf1", "non_existed_udf"))
+        self.assertRaisesRegex(AnalysisException, "Can not load class non_existed_udf",
+                               lambda: sqlContext.registerJavaFunction("udf1", "non_existed_udf"))
 
     def test_non_existed_udaf(self):
         spark = self.spark
-        self.assertRaisesRegexp(AnalysisException, "Can not load class non_existed_udaf",
-                                lambda: spark.udf.registerJavaUDAF("udaf1", "non_existed_udaf"))
+        self.assertRaisesRegex(AnalysisException, "Can not load class non_existed_udaf",
+                               lambda: spark.udf.registerJavaUDAF("udaf1", "non_existed_udaf"))
 
     def test_udf_with_input_file_name(self):
         from pyspark.sql.functions import input_file_name
@@ -587,17 +587,17 @@ def test_datasource_with_udf(self):
             for df in [filesource_df, datasource_df, datasource_v2_df]:
                 result = df.withColumn('c', c1)
                 expected = df.withColumn('c', lit(2))
-                self.assertEquals(expected.collect(), result.collect())
+                self.assertEqual(expected.collect(), result.collect())
 
             for df in [filesource_df, datasource_df, datasource_v2_df]:
                 result = df.withColumn('c', c2)
                 expected = df.withColumn('c', col('i') + 1)
-                self.assertEquals(expected.collect(), result.collect())
+                self.assertEqual(expected.collect(), result.collect())
 
             for df in [filesource_df, datasource_df, datasource_v2_df]:
                 for f in [f1, f2]:
                     result = df.filter(f)
-                    self.assertEquals(0, result.count())
+                    self.assertEqual(0, result.count())
         finally:
             shutil.rmtree(path)
 
diff --git a/python/pyspark/sql/tests/test_utils.py b/python/pyspark/sql/tests/test_utils.py
index b08e17208d8af..005f0e892b60f 100644
--- a/python/pyspark/sql/tests/test_utils.py
+++ b/python/pyspark/sql/tests/test_utils.py
@@ -31,23 +31,22 @@ def test_capture_user_friendly_exception(self):
         try:
             self.spark.sql("select `中文字段`")
         except AnalysisException as e:
-            self.assertRegexpMatches(str(e), "cannot resolve '`中文字段`'")
+            self.assertRegex(str(e), "cannot resolve '`中文字段`'")
 
     def test_capture_parse_exception(self):
         self.assertRaises(ParseException, lambda: self.spark.sql("abc"))
 
     def test_capture_illegalargument_exception(self):
-        self.assertRaisesRegexp(IllegalArgumentException, "Setting negative mapred.reduce.tasks",
-                                lambda: self.spark.sql("SET mapred.reduce.tasks=-1"))
+        self.assertRaisesRegex(IllegalArgumentException, "Setting negative mapred.reduce.tasks",
+                               lambda: self.spark.sql("SET mapred.reduce.tasks=-1"))
         df = self.spark.createDataFrame([(1, 2)], ["a", "b"])
-        self.assertRaisesRegexp(IllegalArgumentException, "1024 is not in the permitted values",
-                                lambda: df.select(sha2(df.a, 1024)).collect())
+        self.assertRaisesRegex(IllegalArgumentException, "1024 is not in the permitted values",
+                               lambda: df.select(sha2(df.a, 1024)).collect())
         try:
             df.select(sha2(df.a, 1024)).collect()
         except IllegalArgumentException as e:
-            self.assertRegexpMatches(e.desc, "1024 is not in the permitted values")
-            self.assertRegexpMatches(e.stackTrace,
-                                     "org.apache.spark.sql.functions")
+            self.assertRegex(e.desc, "1024 is not in the permitted values")
+            self.assertRegex(e.stackTrace, "org.apache.spark.sql.functions")
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/tests/test_profiler.py b/python/pyspark/tests/test_profiler.py
index de72a547b0844..e621321283dab 100644
--- a/python/pyspark/tests/test_profiler.py
+++ b/python/pyspark/tests/test_profiler.py
@@ -85,11 +85,11 @@ class ProfilerTests2(unittest.TestCase):
     def test_profiler_disabled(self):
         sc = SparkContext(conf=SparkConf().set("spark.python.profile", "false"))
         try:
-            self.assertRaisesRegexp(
+            self.assertRaisesRegex(
                 RuntimeError,
                 "'spark.python.profile' configuration must be set",
                 lambda: sc.show_profiles())
-            self.assertRaisesRegexp(
+            self.assertRaisesRegex(
                 RuntimeError,
                 "'spark.python.profile' configuration must be set",
                 lambda: sc.dump_profiles("/tmp/abc"))
diff --git a/python/pyspark/tests/test_rdd.py b/python/pyspark/tests/test_rdd.py
index 47b8f10a5b05e..b17c039889a71 100644
--- a/python/pyspark/tests/test_rdd.py
+++ b/python/pyspark/tests/test_rdd.py
@@ -733,25 +733,25 @@ def stopit(*x):
         keyed_rdd = self.sc.parallelize((x % 2, x) for x in range(10))
         msg = "Caught StopIteration thrown from user's code; failing the task"
 
-        self.assertRaisesRegexp(Py4JJavaError, msg, seq_rdd.map(stopit).collect)
-        self.assertRaisesRegexp(Py4JJavaError, msg, seq_rdd.filter(stopit).collect)
-        self.assertRaisesRegexp(Py4JJavaError, msg, seq_rdd.foreach, stopit)
-        self.assertRaisesRegexp(Py4JJavaError, msg, seq_rdd.reduce, stopit)
-        self.assertRaisesRegexp(Py4JJavaError, msg, seq_rdd.fold, 0, stopit)
-        self.assertRaisesRegexp(Py4JJavaError, msg, seq_rdd.foreach, stopit)
-        self.assertRaisesRegexp(Py4JJavaError, msg,
-                                seq_rdd.cartesian(seq_rdd).flatMap(stopit).collect)
+        self.assertRaisesRegex(Py4JJavaError, msg, seq_rdd.map(stopit).collect)
+        self.assertRaisesRegex(Py4JJavaError, msg, seq_rdd.filter(stopit).collect)
+        self.assertRaisesRegex(Py4JJavaError, msg, seq_rdd.foreach, stopit)
+        self.assertRaisesRegex(Py4JJavaError, msg, seq_rdd.reduce, stopit)
+        self.assertRaisesRegex(Py4JJavaError, msg, seq_rdd.fold, 0, stopit)
+        self.assertRaisesRegex(Py4JJavaError, msg, seq_rdd.foreach, stopit)
+        self.assertRaisesRegex(Py4JJavaError, msg,
+                               seq_rdd.cartesian(seq_rdd).flatMap(stopit).collect)
 
         # these methods call the user function both in the driver and in the executor
         # the exception raised is different according to where the StopIteration happens
         # RuntimeError is raised if in the driver
         # Py4JJavaError is raised if in the executor (wraps the RuntimeError raised in the worker)
-        self.assertRaisesRegexp((Py4JJavaError, RuntimeError), msg,
-                                keyed_rdd.reduceByKeyLocally, stopit)
-        self.assertRaisesRegexp((Py4JJavaError, RuntimeError), msg,
-                                seq_rdd.aggregate, 0, stopit, lambda *x: 1)
-        self.assertRaisesRegexp((Py4JJavaError, RuntimeError), msg,
-                                seq_rdd.aggregate, 0, lambda *x: 1, stopit)
+        self.assertRaisesRegex((Py4JJavaError, RuntimeError), msg,
+                               keyed_rdd.reduceByKeyLocally, stopit)
+        self.assertRaisesRegex((Py4JJavaError, RuntimeError), msg,
+                               seq_rdd.aggregate, 0, stopit, lambda *x: 1)
+        self.assertRaisesRegex((Py4JJavaError, RuntimeError), msg,
+                               seq_rdd.aggregate, 0, lambda *x: 1, stopit)
 
     def test_overwritten_global_func(self):
         # Regression test for SPARK-27000
@@ -768,7 +768,7 @@ def fail(_):
 
         rdd = self.sc.range(10).map(fail)
 
-        with self.assertRaisesRegexp(Exception, "local iterator error"):
+        with self.assertRaisesRegex(Exception, "local iterator error"):
             for _ in rdd.toLocalIterator():
                 pass
 
diff --git a/python/pyspark/tests/test_worker.py b/python/pyspark/tests/test_worker.py
index d7a4b84e8dc41..51ebee4de7cec 100644
--- a/python/pyspark/tests/test_worker.py
+++ b/python/pyspark/tests/test_worker.py
@@ -165,7 +165,7 @@ def f():
 
             self.sc.parallelize([1]).map(lambda x: f()).count()
         except Py4JJavaError as e:
-            self.assertRegexpMatches(str(e), "exception with 中")
+            self.assertRegex(str(e), "exception with 中")
 
 
 class WorkerReuseTest(PySparkTestCase):

From 80161238fe9393aabd5fcd56752ff1e43f6989b1 Mon Sep 17 00:00:00 2001
From: Weichen Xu <weichen.xu@databricks.com>
Date: Tue, 1 Dec 2020 09:36:42 +0800
Subject: [PATCH 0617/1009] [SPARK-33592] Fix: Pyspark ML Validator params in
 estimatorParamMaps may be lost after saving and reloading

### What changes were proposed in this pull request?
Fix: Pyspark ML Validator params in estimatorParamMaps may be lost after saving and reloading

When saving validator estimatorParamMaps, will check all nested stages in tuned estimator to get correct param parent.

Two typical cases to manually test:
~~~python
tokenizer = Tokenizer(inputCol="text", outputCol="words")
hashingTF = HashingTF(inputCol=tokenizer.getOutputCol(), outputCol="features")
lr = LogisticRegression()
pipeline = Pipeline(stages=[tokenizer, hashingTF, lr])

paramGrid = ParamGridBuilder() \
    .addGrid(hashingTF.numFeatures, [10, 100]) \
    .addGrid(lr.maxIter, [100, 200]) \
    .build()
tvs = TrainValidationSplit(estimator=pipeline,
                           estimatorParamMaps=paramGrid,
                           evaluator=MulticlassClassificationEvaluator())

tvs.save(tvsPath)
loadedTvs = TrainValidationSplit.load(tvsPath)

# check `loadedTvs.getEstimatorParamMaps()` restored correctly.
~~~

~~~python
lr = LogisticRegression()
ova = OneVsRest(classifier=lr)
grid = ParamGridBuilder().addGrid(lr.maxIter, [100, 200]).build()
evaluator = MulticlassClassificationEvaluator()
tvs = TrainValidationSplit(estimator=ova, estimatorParamMaps=grid, evaluator=evaluator)

tvs.save(tvsPath)
loadedTvs = TrainValidationSplit.load(tvsPath)

# check `loadedTvs.getEstimatorParamMaps()` restored correctly.
~~~

### Why are the changes needed?
Bug fix.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Unit test.

Closes #30539 from WeichenXu123/fix_tuning_param_maps_io.

Authored-by: Weichen Xu <weichen.xu@databricks.com>
Signed-off-by: Ruifeng Zheng <ruifengz@foxmail.com>
---
 dev/sparktestsupport/modules.py        |  1 +
 python/pyspark/ml/classification.py    | 46 +------------
 python/pyspark/ml/param/__init__.py    |  6 ++
 python/pyspark/ml/pipeline.py          | 53 +--------------
 python/pyspark/ml/tests/test_tuning.py | 47 +++++++++++--
 python/pyspark/ml/tests/test_util.py   | 84 +++++++++++++++++++++++
 python/pyspark/ml/tuning.py            | 94 ++++++++++++++++++++++++--
 python/pyspark/ml/util.py              | 38 +++++++++++
 python/pyspark/ml/util.pyi             |  6 ++
 9 files changed, 268 insertions(+), 107 deletions(-)
 create mode 100644 python/pyspark/ml/tests/test_util.py

diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 868e4a5d23ed7..5d8b714711774 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -564,6 +564,7 @@ def __hash__(self):
         "pyspark.ml.tests.test_stat",
         "pyspark.ml.tests.test_training_summary",
         "pyspark.ml.tests.test_tuning",
+        "pyspark.ml.tests.test_util",
         "pyspark.ml.tests.test_wrapper",
     ],
     excluded_python_implementations=[
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 50882fc895d6c..763038ede876a 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -36,7 +36,7 @@
 from pyspark.ml.util import JavaMLWritable, JavaMLReadable, HasTrainingSummary
 from pyspark.ml.wrapper import JavaParams, \
     JavaPredictor, JavaPredictionModel, JavaWrapper
-from pyspark.ml.common import inherit_doc, _java2py, _py2java
+from pyspark.ml.common import inherit_doc
 from pyspark.ml.linalg import Vectors
 from pyspark.sql import DataFrame
 from pyspark.sql.functions import udf, when
@@ -2991,50 +2991,6 @@ def _to_java(self):
         _java_obj.setRawPredictionCol(self.getRawPredictionCol())
         return _java_obj
 
-    def _make_java_param_pair(self, param, value):
-        """
-        Makes a Java param pair.
-        """
-        sc = SparkContext._active_spark_context
-        param = self._resolveParam(param)
-        _java_obj = JavaParams._new_java_obj("org.apache.spark.ml.classification.OneVsRest",
-                                             self.uid)
-        java_param = _java_obj.getParam(param.name)
-        if isinstance(value, JavaParams):
-            # used in the case of an estimator having another estimator as a parameter
-            # the reason why this is not in _py2java in common.py is that importing
-            # Estimator and Model in common.py results in a circular import with inherit_doc
-            java_value = value._to_java()
-        else:
-            java_value = _py2java(sc, value)
-        return java_param.w(java_value)
-
-    def _transfer_param_map_to_java(self, pyParamMap):
-        """
-        Transforms a Python ParamMap into a Java ParamMap.
-        """
-        paramMap = JavaWrapper._new_java_obj("org.apache.spark.ml.param.ParamMap")
-        for param in self.params:
-            if param in pyParamMap:
-                pair = self._make_java_param_pair(param, pyParamMap[param])
-                paramMap.put([pair])
-        return paramMap
-
-    def _transfer_param_map_from_java(self, javaParamMap):
-        """
-        Transforms a Java ParamMap into a Python ParamMap.
-        """
-        sc = SparkContext._active_spark_context
-        paramMap = dict()
-        for pair in javaParamMap.toList():
-            param = pair.param()
-            if self.hasParam(str(param.name())):
-                if param.name() == "classifier":
-                    paramMap[self.getParam(param.name())] = JavaParams._from_java(pair.value())
-                else:
-                    paramMap[self.getParam(param.name())] = _java2py(sc, pair.value())
-        return paramMap
-
 
 class OneVsRestModel(Model, _OneVsRestParams, JavaMLReadable, JavaMLWritable):
     """
diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py
index f2381a4c42698..3eab6607aa7ee 100644
--- a/python/pyspark/ml/param/__init__.py
+++ b/python/pyspark/ml/param/__init__.py
@@ -437,6 +437,12 @@ def _resolveParam(self, param):
         else:
             raise ValueError("Cannot resolve %r as a param." % param)
 
+    def _testOwnParam(self, param_parent, param_name):
+        """
+        Test the ownership. Return True or False
+        """
+        return self.uid == param_parent and self.hasParam(param_name)
+
     @staticmethod
     def _dummy():
         """
diff --git a/python/pyspark/ml/pipeline.py b/python/pyspark/ml/pipeline.py
index a6471a8dd1fe5..b0aa735709e8d 100644
--- a/python/pyspark/ml/pipeline.py
+++ b/python/pyspark/ml/pipeline.py
@@ -21,8 +21,8 @@
 from pyspark.ml.param import Param, Params
 from pyspark.ml.util import MLReadable, MLWritable, JavaMLWriter, JavaMLReader, \
     DefaultParamsReader, DefaultParamsWriter, MLWriter, MLReader, JavaMLWritable
-from pyspark.ml.wrapper import JavaParams, JavaWrapper
-from pyspark.ml.common import inherit_doc, _java2py, _py2java
+from pyspark.ml.wrapper import JavaParams
+from pyspark.ml.common import inherit_doc
 
 
 @inherit_doc
@@ -190,55 +190,6 @@ def _to_java(self):
 
         return _java_obj
 
-    def _make_java_param_pair(self, param, value):
-        """
-        Makes a Java param pair.
-        """
-        sc = SparkContext._active_spark_context
-        param = self._resolveParam(param)
-        java_param = sc._jvm.org.apache.spark.ml.param.Param(param.parent, param.name, param.doc)
-        if isinstance(value, Params) and hasattr(value, "_to_java"):
-            # Convert JavaEstimator/JavaTransformer object or Estimator/Transformer object which
-            # implements `_to_java` method (such as OneVsRest, Pipeline object) to java object.
-            # used in the case of an estimator having another estimator as a parameter
-            # the reason why this is not in _py2java in common.py is that importing
-            # Estimator and Model in common.py results in a circular import with inherit_doc
-            java_value = value._to_java()
-        else:
-            java_value = _py2java(sc, value)
-        return java_param.w(java_value)
-
-    def _transfer_param_map_to_java(self, pyParamMap):
-        """
-        Transforms a Python ParamMap into a Java ParamMap.
-        """
-        paramMap = JavaWrapper._new_java_obj("org.apache.spark.ml.param.ParamMap")
-        for param in self.params:
-            if param in pyParamMap:
-                pair = self._make_java_param_pair(param, pyParamMap[param])
-                paramMap.put([pair])
-        return paramMap
-
-    def _transfer_param_map_from_java(self, javaParamMap):
-        """
-        Transforms a Java ParamMap into a Python ParamMap.
-        """
-        sc = SparkContext._active_spark_context
-        paramMap = dict()
-        for pair in javaParamMap.toList():
-            param = pair.param()
-            if self.hasParam(str(param.name())):
-                java_obj = pair.value()
-                if sc._jvm.Class.forName("org.apache.spark.ml.PipelineStage").isInstance(java_obj):
-                    # Note: JavaParams._from_java support both JavaEstimator/JavaTransformer class
-                    # and Estimator/Transformer class which implements `_from_java` static method
-                    # (such as OneVsRest, Pipeline class).
-                    py_obj = JavaParams._from_java(java_obj)
-                else:
-                    py_obj = _java2py(sc, java_obj)
-                paramMap[self.getParam(param.name())] = py_obj
-        return paramMap
-
 
 @inherit_doc
 class PipelineWriter(MLWriter):
diff --git a/python/pyspark/ml/tests/test_tuning.py b/python/pyspark/ml/tests/test_tuning.py
index ced32c07f245f..ebd7457e4d30a 100644
--- a/python/pyspark/ml/tests/test_tuning.py
+++ b/python/pyspark/ml/tests/test_tuning.py
@@ -73,7 +73,21 @@ def test_addGrid(self):
                     .build())
 
 
-class CrossValidatorTests(SparkSessionTestCase):
+class ValidatorTestUtilsMixin:
+    def assert_param_maps_equal(self, paramMaps1, paramMaps2):
+        self.assertEqual(len(paramMaps1), len(paramMaps2))
+        for paramMap1, paramMap2 in zip(paramMaps1, paramMaps2):
+            self.assertEqual(set(paramMap1.keys()), set(paramMap2.keys()))
+            for param in paramMap1.keys():
+                v1 = paramMap1[param]
+                v2 = paramMap2[param]
+                if isinstance(v1, Params):
+                    self.assertEqual(v1.uid, v2.uid)
+                else:
+                    self.assertEqual(v1, v2)
+
+
+class CrossValidatorTests(SparkSessionTestCase, ValidatorTestUtilsMixin):
 
     def test_copy(self):
         dataset = self.spark.createDataFrame([
@@ -256,7 +270,7 @@ def test_save_load_simple_estimator(self):
         loadedCV = CrossValidator.load(cvPath)
         self.assertEqual(loadedCV.getEstimator().uid, cv.getEstimator().uid)
         self.assertEqual(loadedCV.getEvaluator().uid, cv.getEvaluator().uid)
-        self.assertEqual(loadedCV.getEstimatorParamMaps(), cv.getEstimatorParamMaps())
+        self.assert_param_maps_equal(loadedCV.getEstimatorParamMaps(), cv.getEstimatorParamMaps())
 
         # test save/load of CrossValidatorModel
         cvModelPath = temp_path + "/cvModel"
@@ -351,6 +365,7 @@ def test_save_load_nested_estimator(self):
         cvPath = temp_path + "/cv"
         cv.save(cvPath)
         loadedCV = CrossValidator.load(cvPath)
+        self.assert_param_maps_equal(loadedCV.getEstimatorParamMaps(), grid)
         self.assertEqual(loadedCV.getEstimator().uid, cv.getEstimator().uid)
         self.assertEqual(loadedCV.getEvaluator().uid, cv.getEvaluator().uid)
 
@@ -367,6 +382,7 @@ def test_save_load_nested_estimator(self):
         cvModelPath = temp_path + "/cvModel"
         cvModel.save(cvModelPath)
         loadedModel = CrossValidatorModel.load(cvModelPath)
+        self.assert_param_maps_equal(loadedModel.getEstimatorParamMaps(), grid)
         self.assertEqual(loadedModel.bestModel.uid, cvModel.bestModel.uid)
 
     def test_save_load_pipeline_estimator(self):
@@ -401,6 +417,11 @@ def test_save_load_pipeline_estimator(self):
                                   estimatorParamMaps=paramGrid,
                                   evaluator=MulticlassClassificationEvaluator(),
                                   numFolds=2)  # use 3+ folds in practice
+        cvPath = temp_path + "/cv"
+        crossval.save(cvPath)
+        loadedCV = CrossValidator.load(cvPath)
+        self.assert_param_maps_equal(loadedCV.getEstimatorParamMaps(), paramGrid)
+        self.assertEqual(loadedCV.getEstimator().uid, crossval.getEstimator().uid)
 
         # Run cross-validation, and choose the best set of parameters.
         cvModel = crossval.fit(training)
@@ -421,6 +442,11 @@ def test_save_load_pipeline_estimator(self):
                                    estimatorParamMaps=paramGrid,
                                    evaluator=MulticlassClassificationEvaluator(),
                                    numFolds=2)  # use 3+ folds in practice
+        cv2Path = temp_path + "/cv2"
+        crossval2.save(cv2Path)
+        loadedCV2 = CrossValidator.load(cv2Path)
+        self.assert_param_maps_equal(loadedCV2.getEstimatorParamMaps(), paramGrid)
+        self.assertEqual(loadedCV2.getEstimator().uid, crossval2.getEstimator().uid)
 
         # Run cross-validation, and choose the best set of parameters.
         cvModel2 = crossval2.fit(training)
@@ -511,7 +537,7 @@ def test_invalid_user_specified_folds(self):
             cv.fit(dataset_with_folds)
 
 
-class TrainValidationSplitTests(SparkSessionTestCase):
+class TrainValidationSplitTests(SparkSessionTestCase, ValidatorTestUtilsMixin):
 
     def test_fit_minimize_metric(self):
         dataset = self.spark.createDataFrame([
@@ -632,7 +658,8 @@ def test_save_load_simple_estimator(self):
         loadedTvs = TrainValidationSplit.load(tvsPath)
         self.assertEqual(loadedTvs.getEstimator().uid, tvs.getEstimator().uid)
         self.assertEqual(loadedTvs.getEvaluator().uid, tvs.getEvaluator().uid)
-        self.assertEqual(loadedTvs.getEstimatorParamMaps(), tvs.getEstimatorParamMaps())
+        self.assert_param_maps_equal(
+            loadedTvs.getEstimatorParamMaps(), tvs.getEstimatorParamMaps())
 
         tvsModelPath = temp_path + "/tvsModel"
         tvsModel.save(tvsModelPath)
@@ -713,6 +740,7 @@ def test_save_load_nested_estimator(self):
         tvsPath = temp_path + "/tvs"
         tvs.save(tvsPath)
         loadedTvs = TrainValidationSplit.load(tvsPath)
+        self.assert_param_maps_equal(loadedTvs.getEstimatorParamMaps(), grid)
         self.assertEqual(loadedTvs.getEstimator().uid, tvs.getEstimator().uid)
         self.assertEqual(loadedTvs.getEvaluator().uid, tvs.getEvaluator().uid)
 
@@ -728,6 +756,7 @@ def test_save_load_nested_estimator(self):
         tvsModelPath = temp_path + "/tvsModel"
         tvsModel.save(tvsModelPath)
         loadedModel = TrainValidationSplitModel.load(tvsModelPath)
+        self.assert_param_maps_equal(loadedModel.getEstimatorParamMaps(), grid)
         self.assertEqual(loadedModel.bestModel.uid, tvsModel.bestModel.uid)
 
     def test_save_load_pipeline_estimator(self):
@@ -761,6 +790,11 @@ def test_save_load_pipeline_estimator(self):
         tvs = TrainValidationSplit(estimator=pipeline,
                                    estimatorParamMaps=paramGrid,
                                    evaluator=MulticlassClassificationEvaluator())
+        tvsPath = temp_path + "/tvs"
+        tvs.save(tvsPath)
+        loadedTvs = TrainValidationSplit.load(tvsPath)
+        self.assert_param_maps_equal(loadedTvs.getEstimatorParamMaps(), paramGrid)
+        self.assertEqual(loadedTvs.getEstimator().uid, tvs.getEstimator().uid)
 
         # Run train validation split, and choose the best set of parameters.
         tvsModel = tvs.fit(training)
@@ -780,6 +814,11 @@ def test_save_load_pipeline_estimator(self):
         tvs2 = TrainValidationSplit(estimator=nested_pipeline,
                                     estimatorParamMaps=paramGrid,
                                     evaluator=MulticlassClassificationEvaluator())
+        tvs2Path = temp_path + "/tvs2"
+        tvs2.save(tvs2Path)
+        loadedTvs2 = TrainValidationSplit.load(tvs2Path)
+        self.assert_param_maps_equal(loadedTvs2.getEstimatorParamMaps(), paramGrid)
+        self.assertEqual(loadedTvs2.getEstimator().uid, tvs2.getEstimator().uid)
 
         # Run train validation split, and choose the best set of parameters.
         tvsModel2 = tvs2.fit(training)
diff --git a/python/pyspark/ml/tests/test_util.py b/python/pyspark/ml/tests/test_util.py
new file mode 100644
index 0000000000000..498a649e480a8
--- /dev/null
+++ b/python/pyspark/ml/tests/test_util.py
@@ -0,0 +1,84 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.ml import Pipeline
+from pyspark.ml.classification import LogisticRegression, OneVsRest
+from pyspark.ml.feature import VectorAssembler
+from pyspark.ml.linalg import Vectors
+from pyspark.ml.util import MetaAlgorithmReadWrite
+from pyspark.testing.mlutils import SparkSessionTestCase
+
+
+class MetaAlgorithmReadWriteTests(SparkSessionTestCase):
+
+    def test_getAllNestedStages(self):
+        def _check_uid_set_equal(stages, expected_stages):
+            uids = set(map(lambda x: x.uid, stages))
+            expected_uids = set(map(lambda x: x.uid, expected_stages))
+            self.assertEqual(uids, expected_uids)
+
+        df1 = self.spark.createDataFrame([
+            (Vectors.dense([1., 2.]), 1.0),
+            (Vectors.dense([-1., -2.]), 0.0),
+        ], ['features', 'label'])
+        df2 = self.spark.createDataFrame([
+            (1., 2., 1.0),
+            (1., 2., 0.0),
+        ], ['a', 'b', 'label'])
+        vs = VectorAssembler(inputCols=['a', 'b'], outputCol='features')
+        lr = LogisticRegression()
+        pipeline = Pipeline(stages=[vs, lr])
+        pipelineModel = pipeline.fit(df2)
+        ova = OneVsRest(classifier=lr)
+        ovaModel = ova.fit(df1)
+
+        ova_pipeline = Pipeline(stages=[vs, ova])
+        nested_pipeline = Pipeline(stages=[ova_pipeline])
+
+        _check_uid_set_equal(
+            MetaAlgorithmReadWrite.getAllNestedStages(pipeline),
+            [pipeline, vs, lr]
+        )
+        _check_uid_set_equal(
+            MetaAlgorithmReadWrite.getAllNestedStages(pipelineModel),
+            [pipelineModel] + pipelineModel.stages
+        )
+        _check_uid_set_equal(
+            MetaAlgorithmReadWrite.getAllNestedStages(ova),
+            [ova, lr]
+        )
+        _check_uid_set_equal(
+            MetaAlgorithmReadWrite.getAllNestedStages(ovaModel),
+            [ovaModel, lr] + ovaModel.models
+        )
+        _check_uid_set_equal(
+            MetaAlgorithmReadWrite.getAllNestedStages(nested_pipeline),
+            [nested_pipeline, ova_pipeline, vs, ova, lr]
+        )
+
+
+if __name__ == "__main__":
+    from pyspark.ml.tests.test_util import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+        testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index 6f4ad99484546..2b5a9857b0f18 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -26,8 +26,9 @@
 from pyspark.ml.common import _py2java, _java2py
 from pyspark.ml.param import Params, Param, TypeConverters
 from pyspark.ml.param.shared import HasCollectSubModels, HasParallelism, HasSeed
-from pyspark.ml.util import MLReadable, MLWritable, JavaMLWriter, JavaMLReader
-from pyspark.ml.wrapper import JavaParams
+from pyspark.ml.util import MLReadable, MLWritable, JavaMLWriter, JavaMLReader, \
+    MetaAlgorithmReadWrite
+from pyspark.ml.wrapper import JavaParams, JavaEstimator, JavaWrapper
 from pyspark.sql.functions import col, lit, rand, UserDefinedFunction
 from pyspark.sql.types import BooleanType
 
@@ -64,6 +65,10 @@ def _parallelFitTasks(est, train, eva, validation, epm, collectSubModel):
 
     def singleTask():
         index, model = next(modelIter)
+        # TODO: duplicate evaluator to take extra params from input
+        #  Note: Supporting tuning params in evaluator need update method
+        #  `MetaAlgorithmReadWrite.getAllNestedStages`, make it return
+        #  all nested stages and evaluators
         metric = eva.evaluate(model.transform(validation, epm[index]))
         return index, metric, model if collectSubModel else None
 
@@ -186,8 +191,16 @@ def _from_java_impl(cls, java_stage):
         # Load information from java_stage to the instance.
         estimator = JavaParams._from_java(java_stage.getEstimator())
         evaluator = JavaParams._from_java(java_stage.getEvaluator())
-        epms = [estimator._transfer_param_map_from_java(epm)
-                for epm in java_stage.getEstimatorParamMaps()]
+        if isinstance(estimator, JavaEstimator):
+            epms = [estimator._transfer_param_map_from_java(epm)
+                    for epm in java_stage.getEstimatorParamMaps()]
+        elif MetaAlgorithmReadWrite.isMetaEstimator(estimator):
+            # Meta estimator such as Pipeline, OneVsRest
+            epms = _ValidatorSharedReadWrite.meta_estimator_transfer_param_maps_from_java(
+                estimator, java_stage.getEstimatorParamMaps())
+        else:
+            raise ValueError('Unsupported estimator used in tuning: ' + str(estimator))
+
         return estimator, epms, evaluator
 
     def _to_java_impl(self):
@@ -198,15 +211,82 @@ def _to_java_impl(self):
         gateway = SparkContext._gateway
         cls = SparkContext._jvm.org.apache.spark.ml.param.ParamMap
 
-        java_epms = gateway.new_array(cls, len(self.getEstimatorParamMaps()))
-        for idx, epm in enumerate(self.getEstimatorParamMaps()):
-            java_epms[idx] = self.getEstimator()._transfer_param_map_to_java(epm)
+        estimator = self.getEstimator()
+        if isinstance(estimator, JavaEstimator):
+            java_epms = gateway.new_array(cls, len(self.getEstimatorParamMaps()))
+            for idx, epm in enumerate(self.getEstimatorParamMaps()):
+                java_epms[idx] = self.getEstimator()._transfer_param_map_to_java(epm)
+        elif MetaAlgorithmReadWrite.isMetaEstimator(estimator):
+            # Meta estimator such as Pipeline, OneVsRest
+            java_epms = _ValidatorSharedReadWrite.meta_estimator_transfer_param_maps_to_java(
+                estimator, self.getEstimatorParamMaps())
+        else:
+            raise ValueError('Unsupported estimator used in tuning: ' + str(estimator))
 
         java_estimator = self.getEstimator()._to_java()
         java_evaluator = self.getEvaluator()._to_java()
         return java_estimator, java_epms, java_evaluator
 
 
+class _ValidatorSharedReadWrite:
+    @staticmethod
+    def meta_estimator_transfer_param_maps_to_java(pyEstimator, pyParamMaps):
+        pyStages = MetaAlgorithmReadWrite.getAllNestedStages(pyEstimator)
+        stagePairs = list(map(lambda stage: (stage, stage._to_java()), pyStages))
+        sc = SparkContext._active_spark_context
+
+        paramMapCls = SparkContext._jvm.org.apache.spark.ml.param.ParamMap
+        javaParamMaps = SparkContext._gateway.new_array(paramMapCls, len(pyParamMaps))
+
+        for idx, pyParamMap in enumerate(pyParamMaps):
+            javaParamMap = JavaWrapper._new_java_obj("org.apache.spark.ml.param.ParamMap")
+            for pyParam, pyValue in pyParamMap.items():
+                javaParam = None
+                for pyStage, javaStage in stagePairs:
+                    if pyStage._testOwnParam(pyParam.parent, pyParam.name):
+                        javaParam = javaStage.getParam(pyParam.name)
+                        break
+                if javaParam is None:
+                    raise ValueError('Resolve param in estimatorParamMaps failed: ' + str(pyParam))
+                if isinstance(pyValue, Params) and hasattr(pyValue, "_to_java"):
+                    javaValue = pyValue._to_java()
+                else:
+                    javaValue = _py2java(sc, pyValue)
+                pair = javaParam.w(javaValue)
+                javaParamMap.put([pair])
+            javaParamMaps[idx] = javaParamMap
+        return javaParamMaps
+
+    @staticmethod
+    def meta_estimator_transfer_param_maps_from_java(pyEstimator, javaParamMaps):
+        pyStages = MetaAlgorithmReadWrite.getAllNestedStages(pyEstimator)
+        stagePairs = list(map(lambda stage: (stage, stage._to_java()), pyStages))
+        sc = SparkContext._active_spark_context
+        pyParamMaps = []
+        for javaParamMap in javaParamMaps:
+            pyParamMap = dict()
+            for javaPair in javaParamMap.toList():
+                javaParam = javaPair.param()
+                pyParam = None
+                for pyStage, javaStage in stagePairs:
+                    if pyStage._testOwnParam(javaParam.parent(), javaParam.name()):
+                        pyParam = pyStage.getParam(javaParam.name())
+                if pyParam is None:
+                    raise ValueError('Resolve param in estimatorParamMaps failed: ' +
+                                     javaParam.parent() + '.' + javaParam.name())
+                javaValue = javaPair.value()
+                if sc._jvm.Class.forName("org.apache.spark.ml.PipelineStage").isInstance(javaValue):
+                    # Note: JavaParams._from_java support both JavaEstimator/JavaTransformer class
+                    # and Estimator/Transformer class which implements `_from_java` static method
+                    # (such as OneVsRest, Pipeline class).
+                    pyValue = JavaParams._from_java(javaValue)
+                else:
+                    pyValue = _java2py(sc, javaValue)
+                pyParamMap[pyParam] = pyValue
+            pyParamMaps.append(pyParamMap)
+        return pyParamMaps
+
+
 class _CrossValidatorParams(_ValidatorParams):
     """
     Params for :py:class:`CrossValidator` and :py:class:`CrossValidatorModel`.
diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index a7b5a79d75f5f..a34bfb53482a0 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -592,3 +592,41 @@ def summary(self):
         no summary exists.
         """
         return (self._call_java("summary"))
+
+
+class MetaAlgorithmReadWrite:
+
+    @staticmethod
+    def isMetaEstimator(pyInstance):
+        from pyspark.ml import Estimator, Pipeline
+        from pyspark.ml.tuning import _ValidatorParams
+        from pyspark.ml.classification import OneVsRest
+        return isinstance(pyInstance, Pipeline) or isinstance(pyInstance, OneVsRest) or \
+            (isinstance(pyInstance, Estimator) and isinstance(pyInstance, _ValidatorParams))
+
+    @staticmethod
+    def getAllNestedStages(pyInstance):
+        from pyspark.ml import Pipeline, PipelineModel
+        from pyspark.ml.tuning import _ValidatorParams
+        from pyspark.ml.classification import OneVsRest, OneVsRestModel
+
+        # TODO: We need to handle `RFormulaModel.pipelineModel` here after Pyspark RFormulaModel
+        #  support pipelineModel property.
+        if isinstance(pyInstance, Pipeline):
+            pySubStages = pyInstance.getStages()
+        elif isinstance(pyInstance, PipelineModel):
+            pySubStages = pyInstance.stages
+        elif isinstance(pyInstance, _ValidatorParams):
+            raise ValueError('PySpark does not support nested validator.')
+        elif isinstance(pyInstance, OneVsRest):
+            pySubStages = [pyInstance.getClassifier()]
+        elif isinstance(pyInstance, OneVsRestModel):
+            pySubStages = [pyInstance.getClassifier()] + pyInstance.models
+        else:
+            pySubStages = []
+
+        nestedStages = []
+        for pySubStage in pySubStages:
+            nestedStages.extend(MetaAlgorithmReadWrite.getAllNestedStages(pySubStage))
+
+        return [pyInstance] + nestedStages
diff --git a/python/pyspark/ml/util.pyi b/python/pyspark/ml/util.pyi
index d0781b2e26ed5..e2496e181f14f 100644
--- a/python/pyspark/ml/util.pyi
+++ b/python/pyspark/ml/util.pyi
@@ -126,3 +126,9 @@ class HasTrainingSummary(Generic[S]):
     def hasSummary(self) -> bool: ...
     @property
     def summary(self) -> S: ...
+
+class MetaAlgorithmReadWrite:
+    @staticmethod
+    def isMetaEstimator(pyInstance: Any) -> bool: ...
+    @staticmethod
+    def getAllNestedStages(pyInstance: Any) -> list: ...

From c50fcac00ea9b86aa6f6edb738e53ba476261027 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Tue, 1 Dec 2020 11:45:32 +0900
Subject: [PATCH 0618/1009] [SPARK-33607][SS][WEBUI] Input Rate
 timeline/histogram aren't rendered if built with Scala 2.13

### What changes were proposed in this pull request?

This PR fixes an issue that the histogram and timeline aren't rendered in the `Streaming Query Statistics` page if we built Spark with Scala 2.13.

![before-fix-the-issue](https://user-images.githubusercontent.com/4736016/100612855-f543d700-3356-11eb-90d9-ede57b8b3f4f.png)
![NaN_Error](https://user-images.githubusercontent.com/4736016/100612879-00970280-3357-11eb-97cf-43978bbe2d3a.png)

The reason is [`maxRecordRate` can be `NaN`](https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala#L371) for Scala 2.13.

The `NaN` is the result of [`query.recentProgress.map(_.inputRowsPerSecond).max`](https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala#L372) when the first element of `query.recentProgress.map(_.inputRowsPerSecond)` is `NaN`.
Actually, the comparison logic for `Double` type was changed in Scala 2.13.
https://github.com/scala/bug/issues/12107
https://github.com/scala/scala/pull/6410

So this issue happens as of Scala 2.13.

The root cause of the `NaN` is [here](https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala#L164).
This `NaN` seems to be an initial value of `inputTimeSec` so I think `Double.PositiveInfinity` is suitable rather than `NaN` and this change can resolve this issue.

### Why are the changes needed?

To make sure we can use the histogram/timeline with Scala 2.13.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

First, I built with the following commands.
```
$ /dev/change-scala-version.sh 2.13
$ build/sbt -Phive -Phive-thriftserver -Pscala-2.13 package
```

Then, ran the following query (this is brought from #30427 ).
```
import org.apache.spark.sql.streaming.Trigger
val query = spark
  .readStream
  .format("rate")
  .option("rowsPerSecond", 1000)
  .option("rampUpTime", "10s")
  .load()
  .selectExpr("*", "CAST(CAST(timestamp AS BIGINT) - CAST((RAND() * 100000) AS BIGINT) AS TIMESTAMP) AS tsMod")
  .selectExpr("tsMod", "mod(value, 100) as mod", "value")
  .withWatermark("tsMod", "10 seconds")
  .groupBy(window($"tsMod", "1 minute", "10 seconds"), $"mod")
  .agg(max("value").as("max_value"), min("value").as("min_value"), avg("value").as("avg_value"))
  .writeStream
  .format("console")
  .trigger(Trigger.ProcessingTime("5 seconds"))
  .outputMode("append")
  .start()
```

Finally, I confirmed that the timeline and histogram are rendered.
![after-fix-the-issue](https://user-images.githubusercontent.com/4736016/100612736-c9285600-3356-11eb-856d-7e53cc656c36.png)

```

Closes #30546 from sarutak/ss-nan.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
---
 .../apache/spark/sql/execution/streaming/ProgressReporter.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index fe3f0e95b383c..57cb551bba17d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -161,7 +161,7 @@ trait ProgressReporter extends Logging {
     val inputTimeSec = if (lastTriggerStartTimestamp >= 0) {
       (currentTriggerStartTimestamp - lastTriggerStartTimestamp).toDouble / MILLIS_PER_SECOND
     } else {
-      Double.NaN
+      Double.PositiveInfinity
     }
     logDebug(s"Execution stats: $executionStats")
 

From 2af2da5a4b1f5dbf0b55afd0b2514a52f03ffa94 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan.opensource@gmail.com>
Date: Tue, 1 Dec 2020 13:11:14 +0900
Subject: [PATCH 0619/1009] [SPARK-30900][SS] FileStreamSource: Avoid reading
 compact metadata log twice if the query restarts from compact batch

### What changes were proposed in this pull request?

This patch addresses the case where compact metadata file is read twice in FileStreamSource during restarting query.

When restarting the query, there is a case which the query starts from compaction batch, and the batch has source metadata file to read. One case is that the previous query succeeded to read from inputs, but not finalized the batch for various reasons.

The patch finds the latest compaction batch when restoring from metadata log, and put entries for the batch into the file entry cache which would avoid reading compact batch file twice.

FileStreamSourceLog doesn't know about offset / commit metadata in checkpoint so doesn't know which exactly batch to start from, but in practice, only couple of latest batches are candidates to
be started from when restarting query. This patch leverages the fact to skip calculation if possible.

### Why are the changes needed?

Spark incurs unnecessary cost on reading the compact metadata file twice on some case, which may not be ignorable when the query has been processed huge number of files so far.

### Does this PR introduce any user-facing change?

No.

### How was this patch tested?

New UT.

Closes #27649 from HeartSaVioR/SPARK-30900.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
Signed-off-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
---
 .../streaming/FileStreamSource.scala          |  2 +-
 .../streaming/FileStreamSourceLog.scala       | 27 ++++++++
 .../sql/streaming/FileStreamSourceSuite.scala | 64 +++++++++++++++++++
 3 files changed, 92 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
index 42401fe069551..e53c5a9c4024e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
@@ -104,7 +104,7 @@ class FileStreamSource(
   // Visible for testing and debugging in production.
   val seenFiles = new SeenFilesMap(maxFileAgeMs, fileNameOnly)
 
-  metadataLog.allFiles().foreach { entry =>
+  metadataLog.restore().foreach { entry =>
     seenFiles.add(entry.path, entry.timestamp)
   }
   seenFiles.purge()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala
index 88a2326c9a02c..5fe9a39c91e0b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSourceLog.scala
@@ -36,6 +36,7 @@ class FileStreamSourceLog(
   extends CompactibleFileStreamLog[FileEntry](metadataLogVersion, sparkSession, path) {
 
   import CompactibleFileStreamLog._
+  import FileStreamSourceLog._
 
   // Configurations about metadata compaction
   protected override val defaultCompactInterval: Int =
@@ -118,8 +119,34 @@ class FileStreamSourceLog(
     }
     batches
   }
+
+  def restore(): Array[FileEntry] = {
+    val files = allFiles()
+
+    // When restarting the query, there is a case which the query starts from compaction batch,
+    // and the batch has source metadata file to read. One case is that the previous query
+    // succeeded to read from inputs, but not finalized the batch for various reasons.
+    // The below code finds the latest compaction batch, and put entries for the batch into the
+    // file entry cache which would avoid reading compact batch file twice.
+    // It doesn't know about offset / commit metadata in checkpoint so doesn't know which exactly
+    // batch to start from, but in practice, only couple of latest batches are candidates to
+    // be started. We leverage the fact to skip calculation if possible.
+    files.lastOption.foreach { lastEntry =>
+      val latestBatchId = lastEntry.batchId
+      val latestCompactedBatchId = getAllValidBatches(latestBatchId, compactInterval)(0)
+      if ((latestBatchId - latestCompactedBatchId) < PREV_NUM_BATCHES_TO_READ_IN_RESTORE) {
+        val logsForLatestCompactedBatch = files.filter { entry =>
+          entry.batchId == latestCompactedBatchId
+        }
+        fileEntryCache.put(latestCompactedBatchId, logsForLatestCompactedBatch)
+      }
+    }
+
+    files
+  }
 }
 
 object FileStreamSourceLog {
   val VERSION = 1
+  val PREV_NUM_BATCHES_TO_READ_IN_RESTORE = 2
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index 718095003b096..3c74e316f260e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -1376,6 +1376,70 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
     }
   }
 
+  test("restore from file stream source log") {
+    def createEntries(batchId: Long, count: Int): Array[FileEntry] = {
+      (1 to count).map { idx =>
+        FileEntry(s"path_${batchId}_$idx", 10000 * batchId + count, batchId)
+      }.toArray
+    }
+
+    withSQLConf(SQLConf.FILE_SOURCE_LOG_COMPACT_INTERVAL.key -> "5") {
+      def verifyBatchAvailabilityInCache(
+          fileEntryCache: java.util.LinkedHashMap[Long, Array[FileEntry]],
+          expectNotAvailable: Seq[Int],
+          expectAvailable: Seq[Int]): Unit = {
+        expectNotAvailable.foreach { batchId =>
+          assert(!fileEntryCache.containsKey(batchId.toLong))
+        }
+        expectAvailable.foreach { batchId =>
+          assert(fileEntryCache.containsKey(batchId.toLong))
+        }
+      }
+      withTempDir { chk =>
+        val _fileEntryCache = PrivateMethod[java.util.LinkedHashMap[Long, Array[FileEntry]]](
+          Symbol("fileEntryCache"))
+
+        val metadata = new FileStreamSourceLog(FileStreamSourceLog.VERSION, spark,
+          chk.getCanonicalPath)
+        val fileEntryCache = metadata invokePrivate _fileEntryCache()
+
+        (0 to 4).foreach { batchId =>
+          metadata.add(batchId, createEntries(batchId, 100))
+        }
+        val allFiles = metadata.allFiles()
+
+        // batch 4 is a compact batch which logs would be cached in fileEntryCache
+        verifyBatchAvailabilityInCache(fileEntryCache, Seq(0, 1, 2, 3), Seq(4))
+
+        val metadata2 = new FileStreamSourceLog(FileStreamSourceLog.VERSION, spark,
+          chk.getCanonicalPath)
+        val fileEntryCache2 = metadata2 invokePrivate _fileEntryCache()
+
+        // allFiles() doesn't restore the logs for the latest compact batch into file entry cache
+        assert(metadata2.allFiles() === allFiles)
+        verifyBatchAvailabilityInCache(fileEntryCache2, Seq(0, 1, 2, 3, 4), Seq.empty)
+
+        // restore() will restore the logs for the latest compact batch into file entry cache
+        assert(metadata2.restore() === allFiles)
+        verifyBatchAvailabilityInCache(fileEntryCache2, Seq(0, 1, 2, 3), Seq(4))
+
+        (5 to 5 + FileStreamSourceLog.PREV_NUM_BATCHES_TO_READ_IN_RESTORE).foreach { batchId =>
+          metadata2.add(batchId, createEntries(batchId, 100))
+        }
+
+        val metadata3 = new FileStreamSourceLog(FileStreamSourceLog.VERSION, spark,
+          chk.getCanonicalPath)
+        val fileEntryCache3 = metadata3 invokePrivate _fileEntryCache()
+
+        // restore() will not restore the logs for the latest compact batch into file entry cache
+        // if the latest batch is too far from latest compact batch, because it's unlikely Spark
+        // will request the batch for the start point.
+        assert(metadata3.restore() === metadata2.allFiles())
+        verifyBatchAvailabilityInCache(fileEntryCache3, Seq(0, 1, 2, 3, 4), Seq.empty)
+      }
+    }
+  }
+
   test("get arbitrary batch from FileStreamSource") {
     withTempDirs { case (src, tmp) =>
       withSQLConf(

From 1a042cc414c0c720535798b9a1197fe8885d6f6e Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Tue, 1 Dec 2020 13:43:02 +0900
Subject: [PATCH 0620/1009] [SPARK-33530][CORE] Support --archives and
 spark.archives option natively

### What changes were proposed in this pull request?

TL;DR:
- This PR completes the support of archives in Spark itself instead of Yarn-only
  - It makes `--archives` option work in other cluster modes too and adds `spark.archives` configuration.
-  After this PR, PySpark users can leverage Conda to ship Python packages together as below:
    ```python
    conda create -y -n pyspark_env -c conda-forge pyarrow==2.0.0 pandas==1.1.4 conda-pack==0.5.0
    conda activate pyspark_env
    conda pack -f -o pyspark_env.tar.gz
    PYSPARK_DRIVER_PYTHON=python PYSPARK_PYTHON=./environment/bin/python pyspark --archives pyspark_env.tar.gz#environment
   ```
- Issue a warning that undocumented and hidden behavior of partial archive handling in `spark.files` / `SparkContext.addFile` will be deprecated, and users can use `spark.archives` and `SparkContext.addArchive`.

This PR proposes to add Spark's native `--archives` in Spark submit, and `spark.archives` configuration. Currently, both are supported only in Yarn mode:

```bash
./bin/spark-submit --help
```

```
Options:
...
 Spark on YARN only:
  --queue QUEUE_NAME          The YARN queue to submit to (Default: "default").
  --archives ARCHIVES         Comma separated list of archives to be extracted into the
                              working directory of each executor.
```

This `archives` feature is useful often when you have to ship a directory and unpack into executors. One example is native libraries to use e.g. JNI. Another example is to ship Python packages together by Conda environment.

Especially for Conda, PySpark currently does not have a nice way to ship a package that works in general, please see also https://hyukjin-spark.readthedocs.io/en/stable/user_guide/python_packaging.html#using-zipped-virtual-environment (PySpark new documentation demo for 3.1.0).

The neatest way is arguably to use Conda environment by shipping zipped Conda environment but this is currently dependent on this archive feature. NOTE that we are able to use `spark.files` by relying on its undocumented behaviour that untars `tar.gz` but I don't think we should document such ways and promote people to more rely on it.

Also, note that this PR does not target to add the feature parity of `spark.files.overwrite`, `spark.files.useFetchCache`, etc. yet. I documented that this is an experimental feature as well.

### Why are the changes needed?

To complete the feature parity, and to provide a better support of shipping Python libraries together with Conda env.

### Does this PR introduce _any_ user-facing change?

Yes, this makes `--archives` works in Spark instead of Yarn-only, and adds a new configuration `spark.archives`.

### How was this patch tested?

I added unittests. Also, manually tested in standalone cluster, local-cluster, and local modes.

Closes #30486 from HyukjinKwon/native-archive.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../scala/org/apache/spark/SparkContext.scala | 89 ++++++++++++++++---
 .../scala/org/apache/spark/SparkEnv.scala     |  5 +-
 .../org/apache/spark/deploy/SparkSubmit.scala |  3 +
 .../spark/deploy/SparkSubmitArguments.scala   |  5 +-
 .../org/apache/spark/executor/Executor.scala  | 50 ++++++++---
 .../spark/internal/config/package.scala       | 10 +++
 .../spark/scheduler/TaskDescription.scala     |  9 +-
 .../spark/scheduler/TaskSetManager.scala      |  2 +
 .../scala/org/apache/spark/util/Utils.scala   | 52 +++++++++--
 .../org/apache/spark/SparkContextSuite.scala  | 79 ++++++++++++++++
 .../spark/deploy/SparkSubmitSuite.scala       | 37 ++++++++
 .../deploy/rest/SubmitRestProtocolSuite.scala |  3 +
 .../CoarseGrainedExecutorBackendSuite.scala   |  2 +-
 .../apache/spark/executor/ExecutorSuite.scala |  1 +
 .../CoarseGrainedSchedulerBackendSuite.scala  |  3 +-
 .../scheduler/EventLoggingListenerSuite.scala |  3 +-
 .../scheduler/TaskDescriptionSuite.scala      |  6 ++
 docs/configuration.md                         | 11 +++
 project/MimaExcludes.scala                    |  1 +
 .../source/user_guide/python_packaging.rst    | 27 +++---
 ...esosFineGrainedSchedulerBackendSuite.scala |  2 +
 21 files changed, 347 insertions(+), 53 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index b953592fa04dc..86f1d745d91d4 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -22,6 +22,7 @@ import java.net.URI
 import java.util.{Arrays, Locale, Properties, ServiceLoader, UUID}
 import java.util.concurrent.{ConcurrentHashMap, ConcurrentMap}
 import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger, AtomicReference}
+import javax.ws.rs.core.UriBuilder
 
 import scala.collection.JavaConverters._
 import scala.collection.Map
@@ -39,7 +40,7 @@ import org.apache.hadoop.mapred.{FileInputFormat, InputFormat, JobConf, Sequence
 import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat, Job => NewHadoopJob}
 import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat => NewFileInputFormat}
 
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Experimental}
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.deploy.{LocalSparkCluster, SparkHadoopUtil}
 import org.apache.spark.executor.{Executor, ExecutorMetrics, ExecutorMetricsSource}
@@ -221,6 +222,7 @@ class SparkContext(config: SparkConf) extends Logging {
   private var _listenerBusStarted: Boolean = false
   private var _jars: Seq[String] = _
   private var _files: Seq[String] = _
+  private var _archives: Seq[String] = _
   private var _shutdownHookRef: AnyRef = _
   private var _statusStore: AppStatusStore = _
   private var _heartbeater: Heartbeater = _
@@ -246,6 +248,7 @@ class SparkContext(config: SparkConf) extends Logging {
 
   def jars: Seq[String] = _jars
   def files: Seq[String] = _files
+  def archives: Seq[String] = _archives
   def master: String = _conf.get("spark.master")
   def deployMode: String = _conf.get(SUBMIT_DEPLOY_MODE)
   def appName: String = _conf.get("spark.app.name")
@@ -278,6 +281,7 @@ class SparkContext(config: SparkConf) extends Logging {
 
   // Used to store a URL for each static file/jar together with the file's local timestamp
   private[spark] val addedFiles = new ConcurrentHashMap[String, Long]().asScala
+  private[spark] val addedArchives = new ConcurrentHashMap[String, Long]().asScala
   private[spark] val addedJars = new ConcurrentHashMap[String, Long]().asScala
 
   // Keeps track of all persisted RDDs
@@ -422,6 +426,7 @@ class SparkContext(config: SparkConf) extends Logging {
     _jars = Utils.getUserJars(_conf)
     _files = _conf.getOption(FILES.key).map(_.split(",")).map(_.filter(_.nonEmpty))
       .toSeq.flatten
+    _archives = _conf.getOption(ARCHIVES.key).map(Utils.stringToSeq).toSeq.flatten
 
     _eventLogDir =
       if (isEventLogEnabled) {
@@ -506,6 +511,13 @@ class SparkContext(config: SparkConf) extends Logging {
       }
     }
 
+    if (archives != null) {
+      archives.foreach(file => addFile(file, false, true, isArchive = true))
+      if (addedArchives.nonEmpty) {
+        _conf.set("spark.app.initial.archive.urls", addedArchives.keys.toSeq.mkString(","))
+      }
+    }
+
     _executorMemory = _conf.getOption(EXECUTOR_MEMORY.key)
       .orElse(Option(System.getenv("SPARK_EXECUTOR_MEMORY")))
       .orElse(Option(System.getenv("SPARK_MEM"))
@@ -1521,6 +1533,36 @@ class SparkContext(config: SparkConf) extends Logging {
    */
   def listFiles(): Seq[String] = addedFiles.keySet.toSeq
 
+  /**
+   * :: Experimental ::
+   * Add an archive to be downloaded and unpacked with this Spark job on every node.
+   *
+   * If an archive is added during execution, it will not be available until the next TaskSet
+   * starts.
+   *
+   * @param path can be either a local file, a file in HDFS (or other Hadoop-supported
+   * filesystems), or an HTTP, HTTPS or FTP URI. To access the file in Spark jobs,
+   * use `SparkFiles.get(paths-to-files)` to find its download/unpacked location.
+   * The given path should be one of .zip, .tar, .tar.gz, .tgz and .jar.
+   *
+   * @note A path can be added only once. Subsequent additions of the same path are ignored.
+   *
+   * @since 3.1.0
+   */
+  @Experimental
+  def addArchive(path: String): Unit = {
+    addFile(path, false, false, isArchive = true)
+  }
+
+  /**
+   * :: Experimental ::
+   * Returns a list of archive paths that are added to resources.
+   *
+   * @since 3.1.0
+   */
+  @Experimental
+  def listArchives(): Seq[String] = addedArchives.keySet.toSeq
+
   /**
    * Add a file to be downloaded with this Spark job on every node.
    *
@@ -1538,8 +1580,14 @@ class SparkContext(config: SparkConf) extends Logging {
     addFile(path, recursive, false)
   }
 
-  private def addFile(path: String, recursive: Boolean, addedOnSubmit: Boolean): Unit = {
-    val uri = new Path(path).toUri
+  private def addFile(
+      path: String, recursive: Boolean, addedOnSubmit: Boolean, isArchive: Boolean = false
+    ): Unit = {
+    val uri = if (!isArchive) {
+      new Path(path).toUri
+    } else {
+      Utils.resolveURI(path)
+    }
     val schemeCorrectedURI = uri.getScheme match {
       case null => new File(path).getCanonicalFile.toURI
       case "local" =>
@@ -1551,7 +1599,7 @@ class SparkContext(config: SparkConf) extends Logging {
 
     val hadoopPath = new Path(schemeCorrectedURI)
     val scheme = schemeCorrectedURI.getScheme
-    if (!Array("http", "https", "ftp").contains(scheme)) {
+    if (!Array("http", "https", "ftp").contains(scheme) && !isArchive) {
       val fs = hadoopPath.getFileSystem(hadoopConfiguration)
       val isDir = fs.getFileStatus(hadoopPath).isDirectory
       if (!isLocal && scheme == "file" && isDir) {
@@ -1569,21 +1617,39 @@ class SparkContext(config: SparkConf) extends Logging {
 
     val key = if (!isLocal && scheme == "file") {
       env.rpcEnv.fileServer.addFile(new File(uri.getPath))
+    } else if (uri.getScheme == null) {
+      schemeCorrectedURI.toString
+    } else if (isArchive) {
+      uri.toString
     } else {
-        if (uri.getScheme == null) {
-          schemeCorrectedURI.toString
-        } else {
-          path
-        }
+      path
     }
+
     val timestamp = if (addedOnSubmit) startTime else System.currentTimeMillis
-    if (addedFiles.putIfAbsent(key, timestamp).isEmpty) {
+    if (!isArchive && addedFiles.putIfAbsent(key, timestamp).isEmpty) {
       logInfo(s"Added file $path at $key with timestamp $timestamp")
       // Fetch the file locally so that closures which are run on the driver can still use the
       // SparkFiles API to access files.
       Utils.fetchFile(uri.toString, new File(SparkFiles.getRootDirectory()), conf,
         env.securityManager, hadoopConfiguration, timestamp, useCache = false)
       postEnvironmentUpdate()
+    } else if (
+      isArchive &&
+        addedArchives.putIfAbsent(
+          UriBuilder.fromUri(new URI(key)).fragment(uri.getFragment).build().toString,
+          timestamp).isEmpty) {
+      logInfo(s"Added archive $path at $key with timestamp $timestamp")
+      val uriToDownload = UriBuilder.fromUri(new URI(key)).fragment(null).build()
+      val source = Utils.fetchFile(uriToDownload.toString, Utils.createTempDir(), conf,
+        env.securityManager, hadoopConfiguration, timestamp, useCache = false, shouldUntar = false)
+      val dest = new File(
+        SparkFiles.getRootDirectory(),
+        if (uri.getFragment != null) uri.getFragment else source.getName)
+      logInfo(
+        s"Unpacking an archive $path from ${source.getAbsolutePath} to ${dest.getAbsolutePath}")
+      Utils.deleteRecursively(dest)
+      Utils.unpack(source, dest)
+      postEnvironmentUpdate()
     } else {
       logWarning(s"The path $path has been added already. Overwriting of added paths " +
         "is not supported in the current version.")
@@ -2495,8 +2561,9 @@ class SparkContext(config: SparkConf) extends Logging {
       val schedulingMode = getSchedulingMode.toString
       val addedJarPaths = addedJars.keys.toSeq
       val addedFilePaths = addedFiles.keys.toSeq
+      val addedArchivePaths = addedArchives.keys.toSeq
       val environmentDetails = SparkEnv.environmentDetails(conf, hadoopConfiguration,
-        schedulingMode, addedJarPaths, addedFilePaths)
+        schedulingMode, addedJarPaths, addedFilePaths, addedArchivePaths)
       val environmentUpdate = SparkListenerEnvironmentUpdate(environmentDetails)
       listenerBus.post(environmentUpdate)
     }
diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index d543359f4dedf..9fc60ac3990fc 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -454,7 +454,8 @@ object SparkEnv extends Logging {
       hadoopConf: Configuration,
       schedulingMode: String,
       addedJars: Seq[String],
-      addedFiles: Seq[String]): Map[String, Seq[(String, String)]] = {
+      addedFiles: Seq[String],
+      addedArchives: Seq[String]): Map[String, Seq[(String, String)]] = {
 
     import Properties._
     val jvmInformation = Seq(
@@ -484,7 +485,7 @@ object SparkEnv extends Logging {
       .split(File.pathSeparator)
       .filterNot(_.isEmpty)
       .map((_, "System Classpath"))
-    val addedJarsAndFiles = (addedJars ++ addedFiles).map((_, "Added By User"))
+    val addedJarsAndFiles = (addedJars ++ addedFiles ++ addedArchives).map((_, "Added By User"))
     val classPaths = (addedJarsAndFiles ++ classPathEntries).sorted
 
     // Add Hadoop properties, it will not ignore configs including in Spark. Some spark
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 4aa393c514af6..a344bce7a0f3c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -607,6 +607,8 @@ private[spark] class SparkSubmit extends Logging {
         confKey = CORES_MAX.key),
       OptionAssigner(args.files, LOCAL | STANDALONE | MESOS | KUBERNETES, ALL_DEPLOY_MODES,
         confKey = FILES.key),
+      OptionAssigner(args.archives, LOCAL | STANDALONE | MESOS | KUBERNETES, ALL_DEPLOY_MODES,
+        confKey = ARCHIVES.key),
       OptionAssigner(args.jars, LOCAL, CLIENT, confKey = JARS.key),
       OptionAssigner(args.jars, STANDALONE | MESOS | KUBERNETES, ALL_DEPLOY_MODES,
         confKey = JARS.key),
@@ -796,6 +798,7 @@ private[spark] class SparkSubmit extends Logging {
     val pathConfigs = Seq(
       JARS.key,
       FILES.key,
+      ARCHIVES.key,
       "spark.yarn.dist.files",
       "spark.yarn.dist.archives",
       "spark.yarn.dist.jars")
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index 3090a3b10a97c..9da1a73bba692 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -183,6 +183,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
     name = Option(name).orElse(sparkProperties.get("spark.app.name")).orNull
     jars = Option(jars).orElse(sparkProperties.get(config.JARS.key)).orNull
     files = Option(files).orElse(sparkProperties.get(config.FILES.key)).orNull
+    archives = Option(archives).orElse(sparkProperties.get(config.ARCHIVES.key)).orNull
     pyFiles = Option(pyFiles).orElse(sparkProperties.get(config.SUBMIT_PYTHON_FILES.key)).orNull
     ivyRepoPath = sparkProperties.get("spark.jars.ivy").orNull
     ivySettingsPath = sparkProperties.get("spark.jars.ivySettings")
@@ -512,6 +513,8 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
         |  --files FILES               Comma-separated list of files to be placed in the working
         |                              directory of each executor. File paths of these files
         |                              in executors can be accessed via SparkFiles.get(fileName).
+        |  --archives ARCHIVES         Comma-separated list of archives to be extracted into the
+        |                              working directory of each executor.
         |
         |  --conf, -c PROP=VALUE       Arbitrary Spark configuration property.
         |  --properties-file FILE      Path to a file from which to load extra properties. If not
@@ -562,8 +565,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
         |
         | Spark on YARN only:
         |  --queue QUEUE_NAME          The YARN queue to submit to (Default: "default").
-        |  --archives ARCHIVES         Comma separated list of archives to be extracted into the
-        |                              working directory of each executor.
       """.stripMargin
     )
 
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index c81ac778a32d1..e7f1b8f3cf17a 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -26,6 +26,7 @@ import java.util.{Locale, Properties}
 import java.util.concurrent._
 import java.util.concurrent.atomic.AtomicBoolean
 import javax.annotation.concurrent.GuardedBy
+import javax.ws.rs.core.UriBuilder
 
 import scala.collection.JavaConverters._
 import scala.collection.immutable
@@ -78,6 +79,7 @@ private[spark] class Executor(
   // Each map holds the master's timestamp for the version of that file or JAR we got.
   private val currentFiles: HashMap[String, Long] = new HashMap[String, Long]()
   private val currentJars: HashMap[String, Long] = new HashMap[String, Long]()
+  private val currentArchives: HashMap[String, Long] = new HashMap[String, Long]()
 
   private val EMPTY_BYTE_BUFFER = ByteBuffer.wrap(new Array[Byte](0))
 
@@ -232,16 +234,17 @@ private[spark] class Executor(
   private val appStartTime = conf.getLong("spark.app.startTime", 0)
 
   // To allow users to distribute plugins and their required files
-  // specified by --jars and --files on application submission, those jars/files should be
-  // downloaded and added to the class loader via updateDependencies.
-  // This should be done before plugin initialization below
+  // specified by --jars, --files and --archives on application submission, those
+  // jars/files/archives should be downloaded and added to the class loader via
+  // updateDependencies. This should be done before plugin initialization below
   // because executors search plugins from the class loader and initialize them.
-  private val Seq(initialUserJars, initialUserFiles) = Seq("jar", "file").map { key =>
-    conf.getOption(s"spark.app.initial.$key.urls").map { urls =>
-      Map(urls.split(",").map(url => (url, appStartTime)): _*)
-    }.getOrElse(Map.empty)
-  }
-  updateDependencies(initialUserFiles, initialUserJars)
+  private val Seq(initialUserJars, initialUserFiles, initialUserArchives) =
+    Seq("jar", "file", "archive").map { key =>
+      conf.getOption(s"spark.app.initial.$key.urls").map { urls =>
+        Map(urls.split(",").map(url => (url, appStartTime)): _*)
+      }.getOrElse(Map.empty)
+    }
+  updateDependencies(initialUserFiles, initialUserJars, initialUserArchives)
 
   // Plugins need to load using a class loader that includes the executor's user classpath.
   // Plugins also needs to be initialized after the heartbeater started
@@ -449,7 +452,8 @@ private[spark] class Executor(
         // requires access to properties contained within (e.g. for access control).
         Executor.taskDeserializationProps.set(taskDescription.properties)
 
-        updateDependencies(taskDescription.addedFiles, taskDescription.addedJars)
+        updateDependencies(
+          taskDescription.addedFiles, taskDescription.addedJars, taskDescription.addedArchives)
         task = ser.deserialize[Task[Any]](
           taskDescription.serializedTask, Thread.currentThread.getContextClassLoader)
         task.localProperties = taskDescription.properties
@@ -909,24 +913,42 @@ private[spark] class Executor(
    * Download any missing dependencies if we receive a new set of files and JARs from the
    * SparkContext. Also adds any new JARs we fetched to the class loader.
    */
-  private def updateDependencies(newFiles: Map[String, Long], newJars: Map[String, Long]): Unit = {
+  private def updateDependencies(
+      newFiles: Map[String, Long],
+      newJars: Map[String, Long],
+      newArchives: Map[String, Long]): Unit = {
     lazy val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
     synchronized {
       // Fetch missing dependencies
       for ((name, timestamp) <- newFiles if currentFiles.getOrElse(name, -1L) < timestamp) {
-        logInfo("Fetching " + name + " with timestamp " + timestamp)
+        logInfo(s"Fetching $name with timestamp $timestamp")
         // Fetch file with useCache mode, close cache for local mode.
         Utils.fetchFile(name, new File(SparkFiles.getRootDirectory()), conf,
           env.securityManager, hadoopConf, timestamp, useCache = !isLocal)
         currentFiles(name) = timestamp
       }
+      for ((name, timestamp) <- newArchives if currentArchives.getOrElse(name, -1L) < timestamp) {
+        logInfo(s"Fetching $name with timestamp $timestamp")
+        val sourceURI = new URI(name)
+        val uriToDownload = UriBuilder.fromUri(sourceURI).fragment(null).build()
+        val source = Utils.fetchFile(uriToDownload.toString, Utils.createTempDir(), conf,
+          env.securityManager, hadoopConf, timestamp, useCache = !isLocal, shouldUntar = false)
+        val dest = new File(
+          SparkFiles.getRootDirectory(),
+          if (sourceURI.getFragment != null) sourceURI.getFragment else source.getName)
+        logInfo(
+          s"Unpacking an archive $name from ${source.getAbsolutePath} to ${dest.getAbsolutePath}")
+        Utils.deleteRecursively(dest)
+        Utils.unpack(source, dest)
+        currentArchives(name) = timestamp
+      }
       for ((name, timestamp) <- newJars) {
         val localName = new URI(name).getPath.split("/").last
         val currentTimeStamp = currentJars.get(name)
           .orElse(currentJars.get(localName))
           .getOrElse(-1L)
         if (currentTimeStamp < timestamp) {
-          logInfo("Fetching " + name + " with timestamp " + timestamp)
+          logInfo(s"Fetching $name with timestamp $timestamp")
           // Fetch file with useCache mode, close cache for local mode.
           Utils.fetchFile(name, new File(SparkFiles.getRootDirectory()), conf,
             env.securityManager, hadoopConf, timestamp, useCache = !isLocal)
@@ -934,7 +956,7 @@ private[spark] class Executor(
           // Add it to our class loader
           val url = new File(SparkFiles.getRootDirectory(), localName).toURI.toURL
           if (!urlClassLoader.getURLs().contains(url)) {
-            logInfo("Adding " + url + " to class loader")
+            logInfo(s"Adding $url to class loader")
             urlClassLoader.addURL(url)
           }
         }
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 093a0ecf58d32..6639f20a068d4 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -1813,6 +1813,16 @@ package object config {
     .toSequence
     .createWithDefault(Nil)
 
+  private[spark] val ARCHIVES = ConfigBuilder("spark.archives")
+    .version("3.1.0")
+    .doc("Comma-separated list of archives to be extracted into the working directory of each " +
+      "executor. .jar, .tar.gz, .tgz and .zip are supported. You can specify the directory " +
+      "name to unpack via adding '#' after the file name to unpack, for example, " +
+      "'file.zip#directory'. This configuration is experimental.")
+    .stringConf
+    .toSequence
+    .createWithDefault(Nil)
+
   private[spark] val SUBMIT_DEPLOY_MODE = ConfigBuilder("spark.submit.deployMode")
     .version("1.5.0")
     .stringConf
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskDescription.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskDescription.scala
index 863bf27088355..12b911d06153b 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskDescription.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskDescription.scala
@@ -55,6 +55,7 @@ private[spark] class TaskDescription(
     val partitionId: Int,
     val addedFiles: Map[String, Long],
     val addedJars: Map[String, Long],
+    val addedArchives: Map[String, Long],
     val properties: Properties,
     val resources: immutable.Map[String, ResourceInformation],
     val serializedTask: ByteBuffer) {
@@ -99,6 +100,9 @@ private[spark] object TaskDescription {
     // Write jars.
     serializeStringLongMap(taskDescription.addedJars, dataOut)
 
+    // Write archives.
+    serializeStringLongMap(taskDescription.addedArchives, dataOut)
+
     // Write properties.
     dataOut.writeInt(taskDescription.properties.size())
     taskDescription.properties.asScala.foreach { case (key, value) =>
@@ -167,6 +171,9 @@ private[spark] object TaskDescription {
     // Read jars.
     val taskJars = deserializeStringLongMap(dataIn)
 
+    // Read archives.
+    val taskArchives = deserializeStringLongMap(dataIn)
+
     // Read properties.
     val properties = new Properties()
     val numProperties = dataIn.readInt()
@@ -185,6 +192,6 @@ private[spark] object TaskDescription {
     val serializedTask = byteBuffer.slice()
 
     new TaskDescription(taskId, attemptNumber, executorId, name, index, partitionId, taskFiles,
-      taskJars, properties, resources, serializedTask)
+      taskJars, taskArchives, properties, resources, serializedTask)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index 914fccc1a67cd..ad0791fa42931 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -63,6 +63,7 @@ private[spark] class TaskSetManager(
   // SPARK-21563 make a copy of the jars/files so they are consistent across the TaskSet
   private val addedJars = HashMap[String, Long](sched.sc.addedJars.toSeq: _*)
   private val addedFiles = HashMap[String, Long](sched.sc.addedFiles.toSeq: _*)
+  private val addedArchives = HashMap[String, Long](sched.sc.addedArchives.toSeq: _*)
 
   val maxResultSize = conf.get(config.MAX_RESULT_SIZE)
 
@@ -493,6 +494,7 @@ private[spark] class TaskSetManager(
           task.partitionId,
           addedFiles,
           addedJars,
+          addedArchives,
           task.localProperties,
           taskResourceAssignments,
           serializedTask)
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index accf3d7c0d333..ae4df146b0a4c 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -53,6 +53,7 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, FileUtil, Path}
 import org.apache.hadoop.io.compress.{CompressionCodecFactory, SplittableCompressionCodec}
 import org.apache.hadoop.security.UserGroupInformation
+import org.apache.hadoop.util.{RunJar, StringUtils}
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.eclipse.jetty.util.MultiException
 import org.slf4j.Logger
@@ -486,6 +487,10 @@ private[spark] object Utils extends Logging {
    *
    * Throws SparkException if the target file already exists and has different contents than
    * the requested file.
+   *
+   * If `shouldUntar` is true, it untars the given url if it is a tar.gz or tgz into `targetDir`.
+   * This is a legacy behavior, and users should better use `spark.archives` configuration or
+   * `SparkContext.addArchive`
    */
   def fetchFile(
       url: String,
@@ -494,7 +499,8 @@ private[spark] object Utils extends Logging {
       securityMgr: SecurityManager,
       hadoopConf: Configuration,
       timestamp: Long,
-      useCache: Boolean): File = {
+      useCache: Boolean,
+      shouldUntar: Boolean = true): File = {
     val fileName = decodeFileNameInURI(new URI(url))
     val targetFile = new File(targetDir, fileName)
     val fetchCacheEnabled = conf.getBoolean("spark.files.useFetchCache", defaultValue = true)
@@ -535,13 +541,23 @@ private[spark] object Utils extends Logging {
       doFetchFile(url, targetDir, fileName, conf, securityMgr, hadoopConf)
     }
 
-    // Decompress the file if it's a .tar or .tar.gz
-    if (fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz")) {
-      logInfo("Untarring " + fileName)
-      executeAndGetOutput(Seq("tar", "-xzf", fileName), targetDir)
-    } else if (fileName.endsWith(".tar")) {
-      logInfo("Untarring " + fileName)
-      executeAndGetOutput(Seq("tar", "-xf", fileName), targetDir)
+    if (shouldUntar) {
+      // Decompress the file if it's a .tar or .tar.gz
+      if (fileName.endsWith(".tar.gz") || fileName.endsWith(".tgz")) {
+        logWarning(
+          "Untarring behavior will be deprecated at spark.files and " +
+            "SparkContext.addFile. Consider using spark.archives or SparkContext.addArchive " +
+            "instead.")
+        logInfo("Untarring " + fileName)
+        executeAndGetOutput(Seq("tar", "-xzf", fileName), targetDir)
+      } else if (fileName.endsWith(".tar")) {
+        logWarning(
+          "Untarring behavior will be deprecated at spark.files and " +
+            "SparkContext.addFile. Consider using spark.archives or SparkContext.addArchive " +
+            "instead.")
+        logInfo("Untarring " + fileName)
+        executeAndGetOutput(Seq("tar", "-xf", fileName), targetDir)
+      }
     }
     // Make the file executable - That's necessary for scripts
     FileUtil.chmod(targetFile.getAbsolutePath, "a+x")
@@ -555,6 +571,26 @@ private[spark] object Utils extends Logging {
     targetFile
   }
 
+  /**
+   * Unpacks an archive file into the specified directory. It expects .jar, .zip, .tar.gz, .tgz
+   * and .tar files. This behaves same as Hadoop's archive in distributed cache. This method is
+   * basically copied from `org.apache.hadoop.yarn.util.FSDownload.unpack`.
+   */
+  def unpack(source: File, dest: File): Unit = {
+    val lowerSrc = StringUtils.toLowerCase(source.getName)
+    if (lowerSrc.endsWith(".jar")) {
+      RunJar.unJar(source, dest, RunJar.MATCH_ANY)
+    } else if (lowerSrc.endsWith(".zip")) {
+      FileUtil.unZip(source, dest)
+    } else if (
+      lowerSrc.endsWith(".tar.gz") || lowerSrc.endsWith(".tgz") || lowerSrc.endsWith(".tar")) {
+      FileUtil.unTar(source, dest)
+    } else {
+      logWarning(s"Cannot unpack $source, just copying it to $dest.")
+      copyRecursive(source, dest)
+    }
+  }
+
   /** Records the duration of running `body`. */
   def timeTakenMs[T](body: => T): (T, Long) = {
     val startTime = System.nanoTime()
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index ebdf2f59a2770..55bfa70f21fc2 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -160,6 +160,85 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
     }
   }
 
+  test("SPARK-33530: basic case for addArchive and listArchives") {
+    withTempDir { dir =>
+      val file1 = File.createTempFile("someprefix1", "somesuffix1", dir)
+      val file2 = File.createTempFile("someprefix2", "somesuffix2", dir)
+      val file3 = File.createTempFile("someprefix3", "somesuffix3", dir)
+      val file4 = File.createTempFile("someprefix4", "somesuffix4", dir)
+
+      val jarFile = new File(dir, "test!@$jar.jar")
+      val zipFile = new File(dir, "test-zip.zip")
+      val relativePath1 =
+        s"${zipFile.getParent}/../${zipFile.getParentFile.getName}/${zipFile.getName}"
+      val relativePath2 =
+        s"${jarFile.getParent}/../${jarFile.getParentFile.getName}/${jarFile.getName}#zoo"
+
+      try {
+        Files.write("somewords1", file1, StandardCharsets.UTF_8)
+        Files.write("somewords22", file2, StandardCharsets.UTF_8)
+        Files.write("somewords333", file3, StandardCharsets.UTF_8)
+        Files.write("somewords4444", file4, StandardCharsets.UTF_8)
+        val length1 = file1.length()
+        val length2 = file2.length()
+        val length3 = file1.length()
+        val length4 = file2.length()
+
+        createJar(Seq(file1, file2), jarFile)
+        createJar(Seq(file3, file4), zipFile)
+
+        sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
+        sc.addArchive(jarFile.getAbsolutePath)
+        sc.addArchive(relativePath1)
+        sc.addArchive(s"${jarFile.getAbsolutePath}#foo")
+        sc.addArchive(s"${zipFile.getAbsolutePath}#bar")
+        sc.addArchive(relativePath2)
+
+        sc.parallelize(Array(1), 1).map { x =>
+          val gotten1 = new File(SparkFiles.get(jarFile.getName))
+          val gotten2 = new File(SparkFiles.get(zipFile.getName))
+          val gotten3 = new File(SparkFiles.get("foo"))
+          val gotten4 = new File(SparkFiles.get("bar"))
+          val gotten5 = new File(SparkFiles.get("zoo"))
+
+          Seq(gotten1, gotten2, gotten3, gotten4, gotten5).foreach { gotten =>
+            if (!gotten.exists()) {
+              throw new SparkException(s"The archive doesn't exist: ${gotten.getAbsolutePath}")
+            }
+            if (!gotten.isDirectory) {
+              throw new SparkException(s"The archive was not unpacked: ${gotten.getAbsolutePath}")
+            }
+          }
+
+          // Jars
+          Seq(gotten1, gotten3, gotten5).foreach { gotten =>
+            val actualLength1 = new File(gotten, file1.getName).length()
+            val actualLength2 = new File(gotten, file2.getName).length()
+            if (actualLength1 != length1 || actualLength2 != length2) {
+              s"Unpacked files have different lengths $actualLength1 and $actualLength2. at " +
+                s"${gotten.getAbsolutePath}. They should be $length1 and $length2."
+            }
+          }
+
+          // Zip
+          Seq(gotten2, gotten4).foreach { gotten =>
+            val actualLength3 = new File(gotten, file1.getName).length()
+            val actualLength4 = new File(gotten, file2.getName).length()
+            if (actualLength3 != length3 || actualLength4 != length4) {
+              s"Unpacked files have different lengths $actualLength3 and $actualLength4. at " +
+                s"${gotten.getAbsolutePath}. They should be $length3 and $length4."
+            }
+          }
+          x
+        }.count()
+        assert(sc.listArchives().count(_.endsWith("test!@$jar.jar")) == 1)
+        assert(sc.listArchives().count(_.contains("test-zip.zip")) == 2)
+      } finally {
+        sc.stop()
+      }
+    }
+  }
+
   test("add and list jar files") {
     val jarPath = Thread.currentThread().getContextClassLoader.getResource("TestUDTF.jar")
     try {
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index b5b3751439750..dcd35f3f6b93f 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -335,6 +335,43 @@ class SparkSubmitSuite
     sys.props("SPARK_SUBMIT") should be ("true")
   }
 
+  test("SPARK-33530: handles standalone mode with archives") {
+    val clArgs = Seq(
+      "--master", "spark://localhost:1234",
+      "--executor-memory", "5g",
+      "--executor-cores", "5",
+      "--class", "org.SomeClass",
+      "--jars", "one.jar,two.jar,three.jar",
+      "--driver-memory", "4g",
+      "--files", "file1.txt,file2.txt",
+      "--archives", "archive1.zip,archive2.jar",
+      "--num-executors", "6",
+      "--name", "beauty",
+      "--conf", "spark.ui.enabled=false",
+      "thejar.jar",
+      "arg1", "arg2")
+    val appArgs = new SparkSubmitArguments(clArgs)
+    val (childArgs, classpath, conf, mainClass) = submit.prepareSubmitEnvironment(appArgs)
+    val childArgsStr = childArgs.mkString(" ")
+    childArgsStr should include ("arg1 arg2")
+    mainClass should be ("org.SomeClass")
+
+    classpath(0) should endWith ("thejar.jar")
+    classpath(1) should endWith ("one.jar")
+    classpath(2) should endWith ("two.jar")
+    classpath(3) should endWith ("three.jar")
+
+    conf.get("spark.executor.memory") should be ("5g")
+    conf.get("spark.driver.memory") should be ("4g")
+    conf.get("spark.executor.cores") should be ("5")
+    conf.get("spark.jars") should include regex (".*one.jar,.*two.jar,.*three.jar")
+    conf.get("spark.files") should include regex (".*file1.txt,.*file2.txt")
+    conf.get("spark.archives") should include regex (".*archive1.zip,.*archive2.jar")
+    conf.get("spark.app.name") should be ("beauty")
+    conf.get(UI_ENABLED) should be (false)
+    sys.props("SPARK_SUBMIT") should be ("true")
+  }
+
   test("handles standalone cluster mode") {
     testStandaloneCluster(useRest = true)
   }
diff --git a/core/src/test/scala/org/apache/spark/deploy/rest/SubmitRestProtocolSuite.scala b/core/src/test/scala/org/apache/spark/deploy/rest/SubmitRestProtocolSuite.scala
index d08052faa0043..9fdbf485e17d3 100644
--- a/core/src/test/scala/org/apache/spark/deploy/rest/SubmitRestProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/rest/SubmitRestProtocolSuite.scala
@@ -98,6 +98,7 @@ class SubmitRestProtocolSuite extends SparkFunSuite {
     // optional fields
     conf.set(JARS, Seq("mayonnaise.jar", "ketchup.jar"))
     conf.set(FILES.key, "fireball.png")
+    conf.set(ARCHIVES.key, "fireballs.zip")
     conf.set("spark.driver.memory", s"${Utils.DEFAULT_DRIVER_MEM_MB}m")
     conf.set(DRIVER_CORES, 180)
     conf.set("spark.driver.extraJavaOptions", " -Dslices=5 -Dcolor=mostly_red")
@@ -246,6 +247,7 @@ class SubmitRestProtocolSuite extends SparkFunSuite {
       |  },
       |  "mainClass" : "org.apache.spark.examples.SparkPie",
       |  "sparkProperties" : {
+      |    "spark.archives" : "fireballs.zip",
       |    "spark.driver.extraLibraryPath" : "pickle.jar",
       |    "spark.jars" : "mayonnaise.jar,ketchup.jar",
       |    "spark.driver.supervise" : "false",
@@ -272,6 +274,7 @@ class SubmitRestProtocolSuite extends SparkFunSuite {
       |  },
       |  "mainClass" : "org.apache.spark.examples.SparkPie",
       |  "sparkProperties" : {
+      |    "spark.archives" : "fireballs.zip",
       |    "spark.driver.extraLibraryPath" : "pickle.jar",
       |    "spark.jars" : "mayonnaise.jar,ketchup.jar",
       |    "spark.driver.supervise" : "false",
diff --git a/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala b/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala
index 319dcfeecee24..810dcf0e61007 100644
--- a/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala
+++ b/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala
@@ -302,7 +302,7 @@ class CoarseGrainedExecutorBackendSuite extends SparkFunSuite
       // We don't really verify the data, just pass it around.
       val data = ByteBuffer.wrap(Array[Byte](1, 2, 3, 4))
       val taskDescription = new TaskDescription(taskId, 2, "1", "TASK 1000000", 19,
-        1, mutable.Map.empty, mutable.Map.empty, new Properties,
+        1, mutable.Map.empty, mutable.Map.empty, mutable.Map.empty, new Properties,
         Map(GPU -> new ResourceInformation(GPU, Array("0", "1"))), data)
       val serializedTaskDescription = TaskDescription.encode(taskDescription)
       backend.executor = mock[Executor]
diff --git a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
index 5b868604ecf94..7cf7a81a76133 100644
--- a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
@@ -519,6 +519,7 @@ class ExecutorSuite extends SparkFunSuite
       partitionId = 0,
       addedFiles = Map[String, Long](),
       addedJars = Map[String, Long](),
+      addedArchives = Map[String, Long](),
       properties = new Properties,
       resources = immutable.Map[String, ResourceInformation](),
       serializedTask)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
index 65d51e57ee308..7a74dd877a042 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
@@ -244,7 +244,8 @@ class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkCo
 
     val taskResources = Map(GPU -> new ResourceInformation(GPU, Array("0")))
     var taskDescs: Seq[Seq[TaskDescription]] = Seq(Seq(new TaskDescription(1, 0, "1",
-      "t1", 0, 1, mutable.Map.empty[String, Long], mutable.Map.empty[String, Long],
+      "t1", 0, 1, mutable.Map.empty[String, Long],
+      mutable.Map.empty[String, Long], mutable.Map.empty[String, Long],
       new Properties(), taskResources, bytebuffer)))
     val ts = backend.getTaskSchedulerImpl()
     when(ts.resourceOffers(any[IndexedSeq[WorkerOffer]], any[Boolean])).thenReturn(taskDescs)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
index 915035e9eb71c..c4a8bcbb26a1d 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
@@ -91,7 +91,8 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
       .set(key, secretPassword)
     val hadoopconf = SparkHadoopUtil.get.newConfiguration(new SparkConf())
     val eventLogger = new EventLoggingListener("test", None, testDirPath.toUri(), conf)
-    val envDetails = SparkEnv.environmentDetails(conf, hadoopconf, "FIFO", Seq.empty, Seq.empty)
+    val envDetails = SparkEnv.environmentDetails(
+      conf, hadoopconf, "FIFO", Seq.empty, Seq.empty, Seq.empty)
     val event = SparkListenerEnvironmentUpdate(envDetails)
     val redactedProps = eventLogger.redactEvent(event).environmentDetails("Spark Properties").toMap
     assert(redactedProps(key) == "*********(redacted)")
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskDescriptionSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskDescriptionSuite.scala
index 5839532f11666..98b5bada27646 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskDescriptionSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskDescriptionSuite.scala
@@ -33,6 +33,10 @@ class TaskDescriptionSuite extends SparkFunSuite {
     originalFiles.put("fileUrl1", 1824)
     originalFiles.put("fileUrl2", 2)
 
+    val originalArchives = new HashMap[String, Long]()
+    originalArchives.put("archiveUrl1", 1824)
+    originalArchives.put("archiveUrl2", 2)
+
     val originalJars = new HashMap[String, Long]()
     originalJars.put("jar1", 3)
 
@@ -70,6 +74,7 @@ class TaskDescriptionSuite extends SparkFunSuite {
       partitionId = 1,
       originalFiles,
       originalJars,
+      originalArchives,
       originalProperties,
       originalResources,
       taskBuffer
@@ -87,6 +92,7 @@ class TaskDescriptionSuite extends SparkFunSuite {
     assert(decodedTaskDescription.partitionId === originalTaskDescription.partitionId)
     assert(decodedTaskDescription.addedFiles.equals(originalFiles))
     assert(decodedTaskDescription.addedJars.equals(originalJars))
+    assert(decodedTaskDescription.addedArchives.equals(originalArchives))
     assert(decodedTaskDescription.properties.equals(originalTaskDescription.properties))
     assert(equalResources(decodedTaskDescription.resources, originalTaskDescription.resources))
     assert(decodedTaskDescription.serializedTask.equals(taskBuffer))
diff --git a/docs/configuration.md b/docs/configuration.md
index 76494b04c9279..d4d8e47645921 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -784,6 +784,17 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>2.3.0</td>
 </tr>
+<tr>
+  <td><code>spark.archives</code></td>
+  <td></td>
+  <td>
+    Comma-separated list of archives to be extracted into the working directory of each executor.
+    .jar, .tar.gz, .tgz and .zip are supported. You can specify the directory name to unpack via
+    adding <code>#</code> after the file name to unpack, for example, <code>file.zip#directory</code>.
+    This configuration is experimental.
+  </td>
+  <td>3.1.0</td>
+</tr>
 <tr>
   <td><code>spark.pyspark.driver.python</code></td>
   <td></td>
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 5a66bfca27a27..9405927eb1cb5 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -41,6 +41,7 @@ object MimaExcludes {
     ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.shuffle.sort.io.LocalDiskShuffleMapOutputWriter.commitAllPartitions"),
     ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.shuffle.api.ShuffleMapOutputWriter.commitAllPartitions"),
     ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.shuffle.api.ShuffleMapOutputWriter.commitAllPartitions"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.SparkEnv.environmentDetails"),
     // mllib module
     ProblemFilters.exclude[NewMixinForwarderProblem]("org.apache.spark.ml.classification.LogisticRegressionTrainingSummary.totalIterations"),
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.LogisticRegressionTrainingSummary.$init$"),
diff --git a/python/docs/source/user_guide/python_packaging.rst b/python/docs/source/user_guide/python_packaging.rst
index ef4d05a8eefea..0aff6dc1d16b4 100644
--- a/python/docs/source/user_guide/python_packaging.rst
+++ b/python/docs/source/user_guide/python_packaging.rst
@@ -77,8 +77,7 @@ Using Zipped Virtual Environment
 --------------------------------
 
 The idea of zipped environments is to zip your whole `virtual environment <https://docs.python.org/3/tutorial/venv.html>`_, 
-ship it to the cluster, unzip it remotely and target the Python interpreter from inside this zipped environment. Note that this
-is currently supported *only for YARN*.
+ship it to the cluster, unzip it remotely and target the Python interpreter from inside this zipped environment.
 
 Zip Virtual Environment
 ~~~~~~~~~~~~~~~~~~~~~~~
@@ -92,16 +91,15 @@ Example with `conda-pack`:
 
 .. code-block:: bash
 
-    conda create -y -n conda_env -c conda-forge \
-      pyspark==3.0.1 pyarrow==0.15.1 pandas==0.25.3 conda-pack==0.4.0
-    conda activate conda_env
-    conda pack -f -o conda_env.tar.gz
+    conda create -y -n pyspark_env -c conda-forge pyarrow==2.0.0 pandas==1.1.4 conda-pack==0.5.0
+    conda activate pyspark_env
+    conda pack -f -o pyspark_env.tar.gz
 
 Upload to Spark Executors
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Unzipping will be done by Spark when using target ``--archives`` option in spark-submit
-or setting ``spark.yarn.dist.archives`` configuration.
+or setting ``spark.archives`` configuration.
 
 Example with ``spark-submit``:
 
@@ -109,8 +107,7 @@ Example with ``spark-submit``:
 
     export PYSPARK_DRIVER_PYTHON=python
     export PYSPARK_PYTHON=./environment/bin/python
-    spark-submit --master=yarn --deploy-mode client \
-      --archives conda_env.tar.gz#environment app.py
+    spark-submit --master=... --archives pyspark_env.tar.gz#environment app.py
 
 Example using ``SparkSession.builder``:
 
@@ -121,11 +118,17 @@ Example using ``SparkSession.builder``:
     from app import main
 
     os.environ['PYSPARK_PYTHON'] = "./environment/bin/python"
-    builder = SparkSession.builder.master("yarn").config(
-        "spark.yarn.dist.archives", "conda_env.tar.gz#environment")
-    spark = builder.getOrCreate()
+    spark = SparkSession.builder.master("...").config("spark.archives", "pyspark_env.tar.gz#environment").getOrCreate()
     main(spark)
 
+Example with ``pyspark`` shell:
+
+.. code-block:: bash
+
+    export PYSPARK_DRIVER_PYTHON=python
+    export PYSPARK_PYTHON=./environment/bin/python
+    pyspark  --master=... --archives pyspark_env.tar.gz#environment
+
 
 Using PEX
 ---------
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala
index 6a6514569cf90..10030a20f0884 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala
@@ -264,6 +264,7 @@ class MesosFineGrainedSchedulerBackendSuite
       partitionId = 0,
       addedFiles = mutable.Map.empty[String, Long],
       addedJars = mutable.Map.empty[String, Long],
+      addedArchives = mutable.Map.empty[String, Long],
       properties = new Properties(),
       resources = immutable.Map.empty[String, ResourceInformation],
       ByteBuffer.wrap(new Array[Byte](0)))
@@ -377,6 +378,7 @@ class MesosFineGrainedSchedulerBackendSuite
       partitionId = 0,
       addedFiles = mutable.Map.empty[String, Long],
       addedJars = mutable.Map.empty[String, Long],
+      addedArchives = mutable.Map.empty[String, Long],
       properties = new Properties(),
       resources = immutable.Map.empty[String, ResourceInformation],
       ByteBuffer.wrap(new Array[Byte](0)))

From 52e5cc46bc184bf582f9bc9ebcc5c8180222c421 Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan.opensource@gmail.com>
Date: Tue, 1 Dec 2020 14:42:48 +0900
Subject: [PATCH 0621/1009] [SPARK-27188][SS] FileStreamSink: provide a new
 option to have retention on output files

### What changes were proposed in this pull request?

This patch proposes to provide a new option to specify time-to-live (TTL) for output file entries in FileStreamSink. TTL is defined via current timestamp - the last modified time for the file.

This patch will filter out outdated output files in metadata while compacting batches (other batches don't have functionality to clean entries), which helps metadata to not grow linearly, as well as filtered out files will be "eventually" no longer seen in reader queries which leverage File(Stream)Source.

### Why are the changes needed?

The metadata log greatly helps to easily achieve exactly-once but given the output path is open to arbitrary readers, there's no way to compact the metadata log, which ends up growing the metadata file as query runs for long time, especially for compacted batch.

Lots of end users have been reporting the issue: see comments in [SPARK-24295](https://issues.apache.org/jira/browse/SPARK-24295) and [SPARK-29995](https://issues.apache.org/jira/browse/SPARK-29995), and [SPARK-30462](https://issues.apache.org/jira/browse/SPARK-30462).
(There're some reports from end users which include their workarounds: SPARK-24295)

### Does this PR introduce any user-facing change?

No, as the configuration is new and by default it is not applied.

### How was this patch tested?

New UT.

Closes #28363 from HeartSaVioR/SPARK-27188-v2.

Lead-authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
Co-authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
---
 .../structured-streaming-programming-guide.md |  6 +-
 .../streaming/CompactibleFileStreamLog.scala  |  8 +-
 .../execution/streaming/FileStreamSink.scala  |  7 +-
 .../streaming/FileStreamSinkLog.scala         | 25 +++++-
 .../streaming/FileStreamSinkLogSuite.scala    | 77 +++++++++++--------
 5 files changed, 83 insertions(+), 40 deletions(-)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index c671d6b590626..6995ee2475aee 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -1874,7 +1874,11 @@ Here are the details of all the sinks in Spark.
     <td><b>File Sink</b></td>
     <td>Append</td>
     <td>
-        <code>path</code>: path to the output directory, must be specified.
+        <code>path</code>: path to the output directory, must be specified.<br/>
+        <code>retention</code>: time to live (TTL) for output files. Output files which batches were
+        committed older than TTL will be eventually excluded in metadata log. This means reader queries which read
+        the sink's output directory may not process them. You can provide the value as string format of the time. (like "12h", "7d", etc.)
+        By default it's disabled.
         <br/><br/>
         For file-format-specific options, see the related methods in DataFrameWriter
         (<a href="api/scala/org/apache/spark/sql/DataFrameWriter.html">Scala</a>/<a href="api/java/org/apache/spark/sql/DataFrameWriter.html">Java</a>/<a href="api/python/pyspark.sql.html#pyspark.sql.DataFrameWriter">Python</a>/<a
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
index 427d44b6b155b..3c76306f20cd7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
@@ -112,7 +112,7 @@ abstract class CompactibleFileStreamLog[T <: AnyRef : ClassTag](
    * Default implementation retains all log entries. Implementations should override the method
    * to change the behavior.
    */
-  def shouldRetain(log: T): Boolean = true
+  def shouldRetain(log: T, currentTime: Long): Boolean = true
 
   override def batchIdToPath(batchId: Long): Path = {
     if (isCompactionBatch(batchId, compactInterval)) {
@@ -218,8 +218,9 @@ abstract class CompactibleFileStreamLog[T <: AnyRef : ClassTag](
    * corresponding `batchId` file. It will delete expired files as well if enabled.
    */
   private def compact(batchId: Long, logs: Array[T]): Boolean = {
+    val curTime = System.currentTimeMillis()
     def writeEntry(entry: T, output: OutputStream): Unit = {
-      if (shouldRetain(entry)) {
+      if (shouldRetain(entry, curTime)) {
         output.write('\n')
         serializeEntry(entry, output)
       }
@@ -249,6 +250,7 @@ abstract class CompactibleFileStreamLog[T <: AnyRef : ClassTag](
    * Returns all files except the deleted ones.
    */
   def allFiles(): Array[T] = {
+    val curTime = System.currentTimeMillis()
     var latestId = getLatestBatchId().getOrElse(-1L)
     // There is a race condition when `FileStreamSink` is deleting old files and `StreamFileIndex`
     // is calling this method. This loop will retry the reading to deal with the
@@ -258,7 +260,7 @@ abstract class CompactibleFileStreamLog[T <: AnyRef : ClassTag](
         try {
           val logs =
             getAllValidBatches(latestId, compactInterval).flatMap { id =>
-              filterInBatch(id)(shouldRetain).getOrElse {
+              filterInBatch(id)(shouldRetain(_, curTime)).getOrElse {
                 throw new IllegalStateException(
                   s"${batchIdToPath(id)} doesn't exist " +
                     s"(latestId: $latestId, compactInterval: $compactInterval)")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
index ecaf4f8160a06..e1c9b82ec2ac9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.{DataFrame, SparkSession}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.execution.datasources.{BasicWriteJobStatsTracker, FileFormat, FileFormatWriter}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.util.SerializableConfiguration
+import org.apache.spark.util.{SerializableConfiguration, Utils}
 
 object FileStreamSink extends Logging {
   // The name of the subdirectory that is used to store metadata about which files are valid.
@@ -136,8 +136,9 @@ class FileStreamSink(
   private val basePath = new Path(path)
   private val logPath = getMetadataLogPath(basePath.getFileSystem(hadoopConf), basePath,
     sparkSession.sessionState.conf)
-  private val fileLog =
-    new FileStreamSinkLog(FileStreamSinkLog.VERSION, sparkSession, logPath.toString)
+  private val retention = options.get("retention").map(Utils.timeStringAsMs)
+  private val fileLog = new FileStreamSinkLog(FileStreamSinkLog.VERSION, sparkSession,
+    logPath.toString, retention)
 
   private def basicWriteJobStatsTracker: BasicWriteJobStatsTracker = {
     val serializableHadoopConf = new SerializableConfiguration(hadoopConf)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala
index 5cb68e1ae956e..2d70d95c6850d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala
@@ -81,7 +81,8 @@ object SinkFileStatus {
 class FileStreamSinkLog(
     metadataLogVersion: Int,
     sparkSession: SparkSession,
-    path: String)
+    path: String,
+    _retentionMs: Option[Long] = None)
   extends CompactibleFileStreamLog[SinkFileStatus](metadataLogVersion, sparkSession, path) {
 
   private implicit val formats = Serialization.formats(NoTypeHints)
@@ -96,6 +97,28 @@ class FileStreamSinkLog(
   require(defaultCompactInterval > 0,
     s"Please set ${SQLConf.FILE_SINK_LOG_COMPACT_INTERVAL.key} (was $defaultCompactInterval) " +
       "to a positive value.")
+
+  val retentionMs: Long = _retentionMs match {
+    case Some(retention) =>
+      logInfo(s"Retention is set to $retention ms")
+      retention
+
+    case _ => Long.MaxValue
+  }
+
+  override def shouldRetain(log: SinkFileStatus, currentTime: Long): Boolean = {
+    if (retentionMs < Long.MaxValue) {
+      if (currentTime - log.modificationTime > retentionMs) {
+        logDebug(s"${log.path} excluded by retention - current time: $currentTime / " +
+          s"modification time: ${log.modificationTime} / retention: $retentionMs ms.")
+        false
+      } else {
+        true
+      }
+    } else {
+      true
+    }
+  }
 }
 
 object FileStreamSinkLog {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala
index 622d69e188821..d6707e7be71fc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLogSuite.scala
@@ -25,7 +25,7 @@ import java.util.concurrent.ConcurrentHashMap
 
 import scala.util.Random
 
-import org.apache.hadoop.fs.{FSDataInputStream, Path, RawLocalFileSystem}
+import org.apache.hadoop.fs.{FileSystem, FSDataInputStream, Path, RawLocalFileSystem}
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.internal.SQLConf
@@ -39,7 +39,7 @@ class FileStreamSinkLogSuite extends SparkFunSuite with SharedSparkSession {
   test("shouldRetain") {
     withFileStreamSinkLog { sinkLog =>
       val log = newFakeSinkFileStatus("/a/b/x", FileStreamSinkLog.ADD_ACTION)
-      assert(sinkLog.shouldRetain(log))
+      assert(sinkLog.shouldRetain(log, System.currentTimeMillis()))
     }
   }
 
@@ -129,6 +129,17 @@ class FileStreamSinkLogSuite extends SparkFunSuite with SharedSparkSession {
     }
   }
 
+  private def listBatchFiles(fs: FileSystem, sinkLog: FileStreamSinkLog): Set[String] = {
+    fs.listStatus(sinkLog.metadataPath).map(_.getPath.getName).filter { fileName =>
+      try {
+        getBatchIdFromFileName(fileName)
+        true
+      } catch {
+        case _: NumberFormatException => false
+      }
+    }.toSet
+  }
+
   test("delete expired file") {
     // Set FILE_SINK_LOG_CLEANUP_DELAY to 0 so that we can detect the deleting behaviour
     // deterministically and one min batches to retain
@@ -138,18 +149,7 @@ class FileStreamSinkLogSuite extends SparkFunSuite with SharedSparkSession {
       SQLConf.MIN_BATCHES_TO_RETAIN.key -> "1") {
       withFileStreamSinkLog { sinkLog =>
         val fs = sinkLog.metadataPath.getFileSystem(spark.sessionState.newHadoopConf())
-
-        def listBatchFiles(): Set[String] = {
-          fs.listStatus(sinkLog.metadataPath).map(_.getPath.getName).filter { fileName =>
-            try {
-              getBatchIdFromFileName(fileName)
-              true
-            } catch {
-              case _: NumberFormatException => false
-            }
-          }.toSet
-        }
-
+        def listBatchFiles(): Set[String] = this.listBatchFiles(fs, sinkLog)
         sinkLog.add(0, Array(newFakeSinkFileStatus("/a/b/0", FileStreamSinkLog.ADD_ACTION)))
         assert(Set("0") === listBatchFiles())
         sinkLog.add(1, Array(newFakeSinkFileStatus("/a/b/1", FileStreamSinkLog.ADD_ACTION)))
@@ -173,18 +173,7 @@ class FileStreamSinkLogSuite extends SparkFunSuite with SharedSparkSession {
       SQLConf.MIN_BATCHES_TO_RETAIN.key -> "2") {
       withFileStreamSinkLog { sinkLog =>
         val fs = sinkLog.metadataPath.getFileSystem(spark.sessionState.newHadoopConf())
-
-        def listBatchFiles(): Set[String] = {
-          fs.listStatus(sinkLog.metadataPath).map(_.getPath.getName).filter { fileName =>
-            try {
-              getBatchIdFromFileName(fileName)
-              true
-            } catch {
-              case _: NumberFormatException => false
-            }
-          }.toSet
-        }
-
+        def listBatchFiles(): Set[String] = this.listBatchFiles(fs, sinkLog)
         sinkLog.add(0, Array(newFakeSinkFileStatus("/a/b/0", FileStreamSinkLog.ADD_ACTION)))
         assert(Set("0") === listBatchFiles())
         sinkLog.add(1, Array(newFakeSinkFileStatus("/a/b/1", FileStreamSinkLog.ADD_ACTION)))
@@ -205,6 +194,24 @@ class FileStreamSinkLogSuite extends SparkFunSuite with SharedSparkSession {
     }
   }
 
+  test("filter out outdated entries when compacting") {
+    val curTime = System.currentTimeMillis()
+    withFileStreamSinkLog(sinkLog => {
+      val logs = Seq(
+        newFakeSinkFileStatus("/a/b/x", FileStreamSinkLog.ADD_ACTION, curTime),
+        newFakeSinkFileStatus("/a/b/y", FileStreamSinkLog.ADD_ACTION, curTime),
+        newFakeSinkFileStatus("/a/b/z", FileStreamSinkLog.ADD_ACTION, curTime))
+      logs.foreach { log => assert(sinkLog.shouldRetain(log, curTime)) }
+
+      val logs2 = Seq(
+        newFakeSinkFileStatus("/a/b/m", FileStreamSinkLog.ADD_ACTION, curTime - 80000),
+        newFakeSinkFileStatus("/a/b/n", FileStreamSinkLog.ADD_ACTION, curTime - 120000))
+      logs2.foreach { log =>
+        assert(!sinkLog.shouldRetain(log, curTime))
+      }
+    }, Some(60000))
+  }
+
   test("read Spark 2.1.0 log format") {
     assert(readFromResource("file-sink-log-version-2.1.0") === Seq(
       SinkFileStatus("/a/b/0", 1, false, 1, 1, 100, FileStreamSinkLog.ADD_ACTION),
@@ -259,23 +266,29 @@ class FileStreamSinkLogSuite extends SparkFunSuite with SharedSparkSession {
   }
 
   /**
-   * Create a fake SinkFileStatus using path and action. Most of tests don't care about other fields
-   * in SinkFileStatus.
+   * Create a fake SinkFileStatus using path and action, and optionally modification time.
+   * Most of tests don't care about other fields in SinkFileStatus.
    */
-  private def newFakeSinkFileStatus(path: String, action: String): SinkFileStatus = {
+  private def newFakeSinkFileStatus(
+      path: String,
+      action: String,
+      modificationTime: Long = Long.MaxValue): SinkFileStatus = {
     SinkFileStatus(
       path = path,
       size = 100L,
       isDir = false,
-      modificationTime = 100L,
+      modificationTime = modificationTime,
       blockReplication = 1,
       blockSize = 100L,
       action = action)
   }
 
-  private def withFileStreamSinkLog(f: FileStreamSinkLog => Unit): Unit = {
+  private def withFileStreamSinkLog(
+      f: FileStreamSinkLog => Unit,
+      ttl: Option[Long] = None): Unit = {
     withTempDir { file =>
-      val sinkLog = new FileStreamSinkLog(FileStreamSinkLog.VERSION, spark, file.getCanonicalPath)
+      val sinkLog = new FileStreamSinkLog(FileStreamSinkLog.VERSION, spark, file.getCanonicalPath,
+        ttl)
       f(sinkLog)
     }
   }

From 103481551979297729123aaa56896d182d74847f Mon Sep 17 00:00:00 2001
From: "zky.zhoukeyong" <zky.zhoukeyong@alibaba-inc.com>
Date: Tue, 1 Dec 2020 11:07:16 +0000
Subject: [PATCH 0622/1009] [SPARK-33572][SQL] Datetime building should fail if
 the year, month, ..., second combination is invalid

### What changes were proposed in this pull request?
Datetime building should fail if the year, month, ..., second combination is invalid, when ANSI mode is enabled. This patch should update MakeDate, MakeTimestamp and MakeInterval.

### Why are the changes needed?
For ANSI mode.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Added UT and Existing UT.

Closes #30516 from waitinfuture/SPARK-33498.

Lead-authored-by: zky.zhoukeyong <zky.zhoukeyong@alibaba-inc.com>
Co-authored-by: waitinfuture <waitinfuture@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/datetimeExpressions.scala     |  27 ++--
 .../expressions/intervalExpressions.scala     |  23 +++-
 .../expressions/DateExpressionsSuite.scala    | 118 ++++++++++++------
 .../IntervalExpressionsSuite.scala            |  60 +++++++++
 .../sql-tests/results/postgreSQL/date.sql.out |  15 ++-
 5 files changed, 187 insertions(+), 56 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 1ff5833fb4dd6..bbf1e4657f351 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -1789,31 +1789,36 @@ private case class GetTimestamp(
   """,
   group = "datetime_funcs",
   since = "3.0.0")
-case class MakeDate(year: Expression, month: Expression, day: Expression)
+case class MakeDate(year: Expression, month: Expression, day: Expression,
+  failOnError: Boolean = SQLConf.get.ansiEnabled)
   extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
+  def this(year: Expression, month: Expression, day: Expression) =
+    this(year, month, day, SQLConf.get.ansiEnabled)
+
   override def children: Seq[Expression] = Seq(year, month, day)
   override def inputTypes: Seq[AbstractDataType] = Seq(IntegerType, IntegerType, IntegerType)
   override def dataType: DataType = DateType
-  override def nullable: Boolean = true
+  override def nullable: Boolean = if (failOnError) children.exists(_.nullable) else true
 
   override def nullSafeEval(year: Any, month: Any, day: Any): Any = {
     try {
       val ld = LocalDate.of(year.asInstanceOf[Int], month.asInstanceOf[Int], day.asInstanceOf[Int])
       localDateToDays(ld)
     } catch {
-      case _: java.time.DateTimeException => null
+      case _: java.time.DateTimeException if !failOnError => null
     }
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
+    val failOnErrorBranch = if (failOnError) "throw e;" else s"${ev.isNull} = true;"
     nullSafeCodeGen(ctx, ev, (year, month, day) => {
       s"""
       try {
         ${ev.value} = $dtu.localDateToDays(java.time.LocalDate.of($year, $month, $day));
       } catch (java.time.DateTimeException e) {
-        ${ev.isNull} = true;
+        $failOnErrorBranch
       }"""
     })
   }
@@ -1860,7 +1865,8 @@ case class MakeTimestamp(
     min: Expression,
     sec: Expression,
     timezone: Option[Expression] = None,
-    timeZoneId: Option[String] = None)
+    timeZoneId: Option[String] = None,
+    failOnError: Boolean = SQLConf.get.ansiEnabled)
   extends SeptenaryExpression with TimeZoneAwareExpression with ImplicitCastInputTypes
     with NullIntolerant {
 
@@ -1871,7 +1877,7 @@ case class MakeTimestamp(
       hour: Expression,
       min: Expression,
       sec: Expression) = {
-    this(year, month, day, hour, min, sec, None, None)
+    this(year, month, day, hour, min, sec, None, None, SQLConf.get.ansiEnabled)
   }
 
   def this(
@@ -1882,7 +1888,7 @@ case class MakeTimestamp(
       min: Expression,
       sec: Expression,
       timezone: Expression) = {
-    this(year, month, day, hour, min, sec, Some(timezone), None)
+    this(year, month, day, hour, min, sec, Some(timezone), None, SQLConf.get.ansiEnabled)
   }
 
   override def children: Seq[Expression] = Seq(year, month, day, hour, min, sec) ++ timezone
@@ -1892,7 +1898,7 @@ case class MakeTimestamp(
     Seq(IntegerType, IntegerType, IntegerType, IntegerType, IntegerType, DecimalType(8, 6)) ++
     timezone.map(_ => StringType)
   override def dataType: DataType = TimestampType
-  override def nullable: Boolean = true
+  override def nullable: Boolean = if (failOnError) children.exists(_.nullable) else true
 
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
     copy(timeZoneId = Option(timeZoneId))
@@ -1926,7 +1932,7 @@ case class MakeTimestamp(
       }
       instantToMicros(ldt.atZone(zoneId).toInstant)
     } catch {
-      case _: DateTimeException => null
+      case _: DateTimeException if !failOnError => null
     }
   }
 
@@ -1955,6 +1961,7 @@ case class MakeTimestamp(
     val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
     val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
     val d = Decimal.getClass.getName.stripSuffix("$")
+    val failOnErrorBranch = if (failOnError) "throw e;" else s"${ev.isNull} = true;"
     nullSafeCodeGen(ctx, ev, (year, month, day, hour, min, secAndNanos, timezone) => {
       val zoneId = timezone.map(tz => s"$dtu.getZoneId(${tz}.toString())").getOrElse(zid)
       s"""
@@ -1978,7 +1985,7 @@ case class MakeTimestamp(
         java.time.Instant instant = ldt.atZone($zoneId).toInstant();
         ${ev.value} = $dtu.instantToMicros(instant);
       } catch (java.time.DateTimeException e) {
-        ${ev.isNull} = true;
+        $failOnErrorBranch
       }"""
     })
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
index 8b92c619df626..6219457bba994 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
@@ -161,9 +161,20 @@ case class MakeInterval(
     days: Expression,
     hours: Expression,
     mins: Expression,
-    secs: Expression)
+    secs: Expression,
+    failOnError: Boolean = SQLConf.get.ansiEnabled)
   extends SeptenaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
+  def this(
+    years: Expression,
+    months: Expression,
+    weeks: Expression,
+    days: Expression,
+    hours: Expression,
+    mins: Expression,
+    sec: Expression) = {
+    this(years, months, weeks, days, hours, mins, sec, SQLConf.get.ansiEnabled)
+  }
   def this(
       years: Expression,
       months: Expression,
@@ -171,7 +182,8 @@ case class MakeInterval(
       days: Expression,
       hours: Expression,
       mins: Expression) = {
-    this(years, months, weeks, days, hours, mins, Literal(Decimal(0, Decimal.MAX_LONG_DIGITS, 6)))
+    this(years, months, weeks, days, hours, mins, Literal(Decimal(0, Decimal.MAX_LONG_DIGITS, 6)),
+      SQLConf.get.ansiEnabled)
   }
   def this(
       years: Expression,
@@ -195,7 +207,7 @@ case class MakeInterval(
   override def inputTypes: Seq[AbstractDataType] = Seq(IntegerType, IntegerType, IntegerType,
     IntegerType, IntegerType, IntegerType, DecimalType(Decimal.MAX_LONG_DIGITS, 6))
   override def dataType: DataType = CalendarIntervalType
-  override def nullable: Boolean = true
+  override def nullable: Boolean = if (failOnError) children.exists(_.nullable) else true
 
   override def nullSafeEval(
       year: Any,
@@ -215,7 +227,7 @@ case class MakeInterval(
         min.asInstanceOf[Int],
         sec.map(_.asInstanceOf[Decimal]).getOrElse(Decimal(0, Decimal.MAX_LONG_DIGITS, 6)))
     } catch {
-      case _: ArithmeticException => null
+      case _: ArithmeticException if !failOnError => null
     }
   }
 
@@ -223,11 +235,12 @@ case class MakeInterval(
     nullSafeCodeGen(ctx, ev, (year, month, week, day, hour, min, sec) => {
       val iu = IntervalUtils.getClass.getName.stripSuffix("$")
       val secFrac = sec.getOrElse("0")
+      val faileOnErrorBranch = if (failOnError) "throw e;" else s"${ev.isNull} = true;"
       s"""
         try {
           ${ev.value} = $iu.makeInterval($year, $month, $week, $day, $hour, $min, $secFrac);
         } catch (java.lang.ArithmeticException e) {
-          ${ev.isNull} = true;
+          $faileOnErrorBranch
         }
       """
     })
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index a3ffc1129fd5e..587ca0cdbed6e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.{Date, Timestamp}
 import java.text.{ParseException, SimpleDateFormat}
-import java.time.{Instant, LocalDate, ZoneId}
+import java.time.{DateTimeException, Instant, LocalDate, ZoneId}
 import java.time.format.DateTimeParseException
 import java.util.{Calendar, Locale, TimeZone}
 import java.util.concurrent.TimeUnit._
@@ -1014,49 +1014,97 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("creating values of DateType via make_date") {
-    checkEvaluation(MakeDate(Literal(2013), Literal(7), Literal(15)), Date.valueOf("2013-7-15"))
-    checkEvaluation(MakeDate(Literal.create(null, IntegerType), Literal(7), Literal(15)), null)
-    checkEvaluation(MakeDate(Literal(2019), Literal.create(null, IntegerType), Literal(19)), null)
-    checkEvaluation(MakeDate(Literal(2019), Literal(7), Literal.create(null, IntegerType)), null)
-    checkEvaluation(MakeDate(Literal(Int.MaxValue), Literal(13), Literal(19)), null)
-    checkEvaluation(MakeDate(Literal(2019), Literal(13), Literal(19)), null)
-    checkEvaluation(MakeDate(Literal(2019), Literal(7), Literal(32)), null)
+    Seq(true, false).foreach({ ansi =>
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> ansi.toString) {
+        checkEvaluation(MakeDate(Literal(2013), Literal(7), Literal(15)), Date.valueOf("2013-7-15"))
+        checkEvaluation(MakeDate(Literal.create(null, IntegerType), Literal(7), Literal(15)), null)
+        checkEvaluation(MakeDate(Literal(2019), Literal.create(null, IntegerType), Literal(19)),
+          null)
+        checkEvaluation(MakeDate(Literal(2019), Literal(7), Literal.create(null, IntegerType)),
+          null)
+      }
+    })
+
+    // ansi test
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+      checkExceptionInExpression[DateTimeException](MakeDate(Literal(Int.MaxValue), Literal(13),
+        Literal(19)), EmptyRow, "Invalid value for Year")
+      checkExceptionInExpression[DateTimeException](MakeDate(Literal(2019),
+        Literal(13), Literal(19)), EmptyRow, "Invalid value for Month")
+      checkExceptionInExpression[DateTimeException](MakeDate(Literal(2019), Literal(7),
+        Literal(32)), EmptyRow, "Invalid value for Day")
+    }
+
+    // non-ansi test
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      checkEvaluation(MakeDate(Literal(Int.MaxValue), Literal(13), Literal(19)), null)
+      checkEvaluation(MakeDate(Literal(2019), Literal(13), Literal(19)), null)
+      checkEvaluation(MakeDate(Literal(2019), Literal(7), Literal(32)), null)
+    }
   }
 
   test("creating values of TimestampType via make_timestamp") {
-    var makeTimestampExpr = MakeTimestamp(
-      Literal(2013), Literal(7), Literal(15), Literal(8), Literal(15),
-      Literal(Decimal(BigDecimal(23.5), 8, 6)), Some(Literal(ZoneId.systemDefault().getId)))
     val expected = Timestamp.valueOf("2013-7-15 8:15:23.5")
-    checkEvaluation(makeTimestampExpr, expected)
-    checkEvaluation(makeTimestampExpr.copy(timezone = None), expected)
-
-    checkEvaluation(makeTimestampExpr.copy(year = Literal.create(null, IntegerType)), null)
-    checkEvaluation(makeTimestampExpr.copy(year = Literal(Int.MaxValue)), null)
-
-    checkEvaluation(makeTimestampExpr.copy(month = Literal.create(null, IntegerType)), null)
-    checkEvaluation(makeTimestampExpr.copy(month = Literal(13)), null)
-
-    checkEvaluation(makeTimestampExpr.copy(day = Literal.create(null, IntegerType)), null)
-    checkEvaluation(makeTimestampExpr.copy(day = Literal(32)), null)
 
-    checkEvaluation(makeTimestampExpr.copy(hour = Literal.create(null, IntegerType)), null)
-    checkEvaluation(makeTimestampExpr.copy(hour = Literal(25)), null)
+    Seq(true, false).foreach { ansi =>
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> ansi.toString) {
+        var makeTimestampExpr = MakeTimestamp(
+          Literal(2013), Literal(7), Literal(15), Literal(8), Literal(15),
+          Literal(Decimal(BigDecimal(23.5), 8, 6)), Some(Literal(ZoneId.systemDefault().getId)))
+        checkEvaluation(makeTimestampExpr, expected)
+        checkEvaluation(makeTimestampExpr.copy(year = Literal.create(null, IntegerType)), null)
+        checkEvaluation(makeTimestampExpr.copy(month = Literal.create(null, IntegerType)), null)
+        checkEvaluation(makeTimestampExpr.copy(day = Literal.create(null, IntegerType)), null)
+        checkEvaluation(makeTimestampExpr.copy(hour = Literal.create(null, IntegerType)), null)
+        checkEvaluation(makeTimestampExpr.copy(min = Literal.create(null, IntegerType)), null)
+        checkEvaluation(makeTimestampExpr.copy(sec = Literal.create(null, DecimalType(8, 6))), null)
+        checkEvaluation(makeTimestampExpr.copy(timezone = None), expected)
+
+        Seq(
+          (makeTimestampExpr.copy(year = Literal(Int.MaxValue)), "Invalid value for Year"),
+          (makeTimestampExpr.copy(month = Literal(13)), "Invalid value for Month"),
+          (makeTimestampExpr.copy(day = Literal(32)), "Invalid value for Day"),
+          (makeTimestampExpr.copy(hour = Literal(25)), "Invalid value for Hour"),
+          (makeTimestampExpr.copy(min = Literal(65)), "Invalid value for Min"),
+          (makeTimestampExpr.copy(sec = Literal(Decimal(
+            BigDecimal(70.0), 8, 6))), "Invalid value for Second")
+        ).foreach { entry =>
+          if (ansi) {
+            checkExceptionInExpression[DateTimeException](entry._1, EmptyRow, entry._2)
+          } else {
+            checkEvaluation(entry._1, null)
+          }
+        }
 
-    checkEvaluation(makeTimestampExpr.copy(min = Literal.create(null, IntegerType)), null)
-    checkEvaluation(makeTimestampExpr.copy(min = Literal(65)), null)
+        makeTimestampExpr = MakeTimestamp(Literal(2019), Literal(6), Literal(30),
+          Literal(23), Literal(59), Literal(Decimal(BigDecimal(60.0), 8, 6)))
+        if (ansi) {
+          checkExceptionInExpression[DateTimeException](makeTimestampExpr.copy(sec = Literal(
+            Decimal(BigDecimal(60.5), 8, 6))), EmptyRow, "The fraction of sec must be zero")
+        } else {
+          checkEvaluation(makeTimestampExpr, Timestamp.valueOf("2019-07-01 00:00:00"))
+        }
 
-    checkEvaluation(makeTimestampExpr.copy(sec = Literal.create(null, DecimalType(8, 6))), null)
-    checkEvaluation(makeTimestampExpr.copy(sec = Literal(Decimal(BigDecimal(70.0), 8, 6))), null)
+        makeTimestampExpr = MakeTimestamp(Literal(2019), Literal(8), Literal(12), Literal(0),
+          Literal(0), Literal(Decimal(BigDecimal(58.000001), 8, 6)))
+        checkEvaluation(makeTimestampExpr, Timestamp.valueOf("2019-08-12 00:00:58.000001"))
+      }
+    }
 
-    makeTimestampExpr = MakeTimestamp(Literal(2019), Literal(6), Literal(30),
-      Literal(23), Literal(59), Literal(Decimal(BigDecimal(60.0), 8, 6)))
-    checkEvaluation(makeTimestampExpr, Timestamp.valueOf("2019-07-01 00:00:00"))
-    checkEvaluation(makeTimestampExpr.copy(sec = Literal(Decimal(BigDecimal(60.5), 8, 6))), null)
+    // non-ansi test
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      val makeTimestampExpr = MakeTimestamp(Literal(2019), Literal(6), Literal(30),
+        Literal(23), Literal(59), Literal(Decimal(BigDecimal(60.0), 8, 6)))
+      checkEvaluation(makeTimestampExpr.copy(sec = Literal(Decimal(BigDecimal(60.5), 8, 6))), null)
+    }
 
-    makeTimestampExpr = MakeTimestamp(Literal(2019), Literal(8), Literal(12),
-      Literal(0), Literal(0), Literal(Decimal(BigDecimal(58.000001), 8, 6)))
-    checkEvaluation(makeTimestampExpr, Timestamp.valueOf("2019-08-12 00:00:58.000001"))
+    Seq(true, false).foreach { ansi =>
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> ansi.toString) {
+        val makeTimestampExpr = MakeTimestamp(Literal(2019), Literal(8), Literal(12),
+          Literal(0), Literal(0), Literal(Decimal(BigDecimal(58.000001), 8, 6)))
+        checkEvaluation(makeTimestampExpr, Timestamp.valueOf("2019-08-12 00:00:58.000001"))
+      }
+    }
   }
 
   test("ISO 8601 week-numbering year") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala
index 6b7be4f1609a5..5c73a91de4f79 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala
@@ -214,4 +214,64 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       millis = Int.MaxValue,
       micros = Int.MaxValue)
   }
+
+  test("ANSI mode: make interval") {
+    def check(
+      years: Int = 0,
+      months: Int = 0,
+      weeks: Int = 0,
+      days: Int = 0,
+      hours: Int = 0,
+      minutes: Int = 0,
+      seconds: Int = 0,
+      millis: Int = 0,
+      micros: Int = 0): Unit = {
+      val secFrac = DateTimeTestUtils.secFrac(seconds, millis, micros)
+      val intervalExpr = MakeInterval(Literal(years), Literal(months), Literal(weeks),
+        Literal(days), Literal(hours), Literal(minutes),
+        Literal(Decimal(secFrac, Decimal.MAX_LONG_DIGITS, 6)))
+      val totalMonths = years * MONTHS_PER_YEAR + months
+      val totalDays = weeks * DAYS_PER_WEEK + days
+      val totalMicros = secFrac + minutes * MICROS_PER_MINUTE + hours * MICROS_PER_HOUR
+      val expected = new CalendarInterval(totalMonths, totalDays, totalMicros)
+      checkEvaluation(intervalExpr, expected)
+    }
+
+    def checkException(
+      years: Int = 0,
+      months: Int = 0,
+      weeks: Int = 0,
+      days: Int = 0,
+      hours: Int = 0,
+      minutes: Int = 0,
+      seconds: Int = 0,
+      millis: Int = 0,
+      micros: Int = 0): Unit = {
+      val secFrac = DateTimeTestUtils.secFrac(seconds, millis, micros)
+      val intervalExpr = MakeInterval(Literal(years), Literal(months), Literal(weeks),
+        Literal(days), Literal(hours), Literal(minutes),
+        Literal(Decimal(secFrac, Decimal.MAX_LONG_DIGITS, 6)))
+      checkExceptionInExpression[ArithmeticException](intervalExpr, EmptyRow, "")
+    }
+
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+      check(months = 0, days = 0, micros = 0)
+      check(years = -123)
+      check(weeks = 123)
+      check(millis = -123)
+      check(9999, 11, 0, 31, 23, 59, 59, 999, 999)
+      check(years = 10000, micros = -1)
+      check(-9999, -11, 0, -31, -23, -59, -59, -999, -999)
+      check(years = -10000, micros = 1)
+      check(
+        hours = Int.MaxValue,
+        minutes = Int.MaxValue,
+        seconds = Int.MaxValue,
+        millis = Int.MaxValue,
+        micros = Int.MaxValue)
+
+      checkException(years = Int.MaxValue)
+      checkException(weeks = Int.MaxValue)
+    }
+  }
 }
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out
index 151fa1e28d725..a959284750483 100755
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out
@@ -590,25 +590,28 @@ struct<make_date(-44, 3, 15):date>
 -- !query
 select make_date(2013, 2, 30)
 -- !query schema
-struct<make_date(2013, 2, 30):date>
+struct<>
 -- !query output
-NULL
+java.time.DateTimeException
+Invalid date 'FEBRUARY 30'
 
 
 -- !query
 select make_date(2013, 13, 1)
 -- !query schema
-struct<make_date(2013, 13, 1):date>
+struct<>
 -- !query output
-NULL
+java.time.DateTimeException
+Invalid value for MonthOfYear (valid values 1 - 12): 13
 
 
 -- !query
 select make_date(2013, 11, -1)
 -- !query schema
-struct<make_date(2013, 11, -1):date>
+struct<>
 -- !query output
-NULL
+java.time.DateTimeException
+Invalid value for DayOfMonth (valid values 1 - 28/31): -1
 
 
 -- !query

From e5bb2937f6682239e83605b65214dfca3bdd50e5 Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Tue, 1 Dec 2020 20:34:00 +0900
Subject: [PATCH 0623/1009] [SPARK-32032][SS] Avoid infinite wait in driver
 because of KafkaConsumer.poll(long) API

### What changes were proposed in this pull request?
Deprecated `KafkaConsumer.poll(long)` API calls may cause infinite wait in the driver. In this PR I've added a new `AdminClient` based offset fetching which is turned off by default. There is a new flag named `spark.sql.streaming.kafka.useDeprecatedOffsetFetching` (default: `true`) which can be set to `false` to reach the newly added functionality. The Structured Streaming migration guide contains more information what migration consideration must be done. Please see the following [doc](https://docs.google.com/document/d/1gAh0pKgZUgyqO2Re3sAy-fdYpe_SxpJ6DkeXE8R1P7E/edit?usp=sharing) for further details.

The PR contains the following changes:
* Added `AdminClient` based offset fetching
* GroupId prefix feature removed from driver but only in `AdminClient` based approach (`AdminClient` doesn't need any GroupId)
* GroupId override feature removed from driver but only in `AdminClient` based approach  (`AdminClient` doesn't need any GroupId)
* Additional unit tests
* Code comment changes
* Minor bugfixes here and there
* Removed Kafka auto topic creation feature but only in `AdminClient` based approach (please see doc for rationale). In short, it's super hidden, not sure anybody ever used in production + error prone.
* Added documentation to `ss-migration-guide` and `structured-streaming-kafka-integration`

### Why are the changes needed?
Driver may hang forever.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Existing + additional unit tests.
Cluster test with simple Kafka topic to another topic query.
Documentation:
```
cd docs/
SKIP_API=1 jekyll build
```
Manual webpage check.

Closes #29729 from gaborgsomogyi/SPARK-32032.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
---
 docs/ss-migration-guide.md                    |   5 +
 .../structured-streaming-kafka-integration.md |  20 +
 .../spark/sql/kafka010/ConsumerStrategy.scala |  65 +-
 .../spark/sql/kafka010/KafkaBatch.scala       |   2 +-
 .../sql/kafka010/KafkaOffsetReader.scala      | 601 +----------------
 .../sql/kafka010/KafkaOffsetReaderAdmin.scala | 573 ++++++++++++++++
 .../kafka010/KafkaOffsetReaderConsumer.scala  | 614 ++++++++++++++++++
 .../spark/sql/kafka010/KafkaRelation.scala    |   2 +-
 .../sql/kafka010/KafkaSourceProvider.scala    |   6 +-
 .../sql/kafka010/ConsumerStrategySuite.scala  | 147 +++++
 .../kafka010/KafkaMicroBatchSourceSuite.scala |  42 +-
 .../sql/kafka010/KafkaOffsetReaderSuite.scala |  95 ++-
 .../sql/kafka010/KafkaRelationSuite.scala     |  47 +-
 .../apache/spark/sql/internal/SQLConf.scala   |  13 +
 14 files changed, 1587 insertions(+), 645 deletions(-)
 create mode 100644 external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala
 create mode 100644 external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala
 create mode 100644 external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/ConsumerStrategySuite.scala

diff --git a/docs/ss-migration-guide.md b/docs/ss-migration-guide.md
index d52b2e095fc76..480e5e2695a16 100644
--- a/docs/ss-migration-guide.md
+++ b/docs/ss-migration-guide.md
@@ -30,6 +30,11 @@ Please refer [Migration Guide: SQL, Datasets and DataFrame](sql-migration-guide.
 
 - In Spark 3.0 and before, for the queries that have stateful operation which can emit rows older than the current watermark plus allowed late record delay, which are "late rows" in downstream stateful operations and these rows can be discarded, Spark only prints a warning message. Since Spark 3.1, Spark will check for such queries with possible correctness issue and throw AnalysisException for it by default. For the users who understand the possible risk of correctness issue and still decide to run the query, please disable this check by setting the config `spark.sql.streaming.statefulOperator.checkCorrectness.enabled` to false.
 
+- In Spark 3.0 and before Spark uses `KafkaConsumer` for offset fetching which could cause infinite wait in the driver.
+  In Spark 3.1 a new configuration option added `spark.sql.streaming.kafka.useDeprecatedOffsetFetching` (default: `true`)
+  which could be set to `false` allowing Spark to use new offset fetching mechanism using `AdminClient`.
+  For further details please see [Structured Streaming Kafka Integration](structured-streaming-kafka-integration.html#offset-fetching).
+
 ## Upgrading from Structured Streaming 2.4 to 3.0
 
 - In Spark 3.0, Structured Streaming forces the source schema into nullable when file-based datasources such as text, json, csv, parquet and orc are used via `spark.readStream(...)`. Previously, it respected the nullability in source schema; however, it caused issues tricky to debug with NPE. To restore the previous behavior, set `spark.sql.streaming.fileSource.schema.forceNullable` to `false`.
diff --git a/docs/structured-streaming-kafka-integration.md b/docs/structured-streaming-kafka-integration.md
index 0e4d167b58d6b..f92dd039d53b7 100644
--- a/docs/structured-streaming-kafka-integration.md
+++ b/docs/structured-streaming-kafka-integration.md
@@ -512,6 +512,26 @@ The following configurations are optional:
 </tr>
 </table>
 
+### Offset fetching
+
+In Spark 3.0 and before Spark uses <code>KafkaConsumer</code> for offset fetching which could cause infinite wait in the driver.
+In Spark 3.1 a new configuration option added <code>spark.sql.streaming.kafka.useDeprecatedOffsetFetching</code> (default: <code>true</code>)
+which could be set to `false` allowing Spark to use new offset fetching mechanism using <code>AdminClient</code>.
+When the new mechanism used the following applies.
+
+First of all the new approach supports Kafka brokers `0.11.0.0+`.
+
+In Spark 3.0 and below, secure Kafka processing needed the following ACLs from driver perspective:
+* Topic resource describe operation
+* Topic resource read operation
+* Group resource read operation
+
+Since Spark 3.1, offsets can be obtained with <code>AdminClient</code> instead of <code>KafkaConsumer</code> and for that the following ACLs needed from driver perspective:
+* Topic resource describe operation
+
+Since <code>AdminClient</code> in driver is not connecting to consumer group, <code>group.id</code> based authorization will not work anymore (executors never done group based authorization).
+Worth to mention executor side is behaving the exact same way like before (group prefix and override works).
+
 ### Consumer Caching
 
 It's time-consuming to initialize Kafka consumers, especially in streaming scenarios where processing time is a key factor.
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala
index 7bb829c282eba..a0331d7889e04 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala
@@ -20,12 +20,15 @@ package org.apache.spark.sql.kafka010
 import java.{util => ju}
 
 import scala.collection.JavaConverters._
+import scala.collection.mutable
 
+import org.apache.kafka.clients.admin.Admin
 import org.apache.kafka.clients.consumer.{Consumer, KafkaConsumer}
 import org.apache.kafka.clients.consumer.internals.NoOpConsumerRebalanceListener
 import org.apache.kafka.common.TopicPartition
 
-import org.apache.spark.kafka010.KafkaConfigUpdater
+import org.apache.spark.internal.Logging
+import org.apache.spark.kafka010.{KafkaConfigUpdater, KafkaRedactionUtil}
 
 /**
  * Subscribe allows you to subscribe to a fixed collection of topics.
@@ -36,10 +39,20 @@ import org.apache.spark.kafka010.KafkaConfigUpdater
  * All three strategies have overloaded constructors that allow you to specify
  * the starting offset for a particular partition.
  */
-private[kafka010] sealed trait ConsumerStrategy {
+private[kafka010] sealed trait ConsumerStrategy extends Logging {
   /** Create a [[KafkaConsumer]] and subscribe to topics according to a desired strategy */
   def createConsumer(kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]]
 
+  /** Creates an [[org.apache.kafka.clients.admin.AdminClient]] */
+  def createAdmin(kafkaParams: ju.Map[String, Object]): Admin = {
+    val updatedKafkaParams = setAuthenticationConfigIfNeeded(kafkaParams)
+    logDebug(s"Admin params: ${KafkaRedactionUtil.redactParams(updatedKafkaParams.asScala.toSeq)}")
+    Admin.create(updatedKafkaParams)
+  }
+
+  /** Returns the assigned or subscribed [[TopicPartition]] */
+  def assignedTopicPartitions(admin: Admin): Set[TopicPartition]
+
   /**
    * Updates the parameters with security if needed.
    * Added a function to hide internals and reduce code duplications because all strategy uses it.
@@ -48,13 +61,24 @@ private[kafka010] sealed trait ConsumerStrategy {
     KafkaConfigUpdater("source", kafkaParams.asScala.toMap)
       .setAuthenticationConfigIfNeeded()
       .build()
+
+  protected def retrieveAllPartitions(admin: Admin, topics: Set[String]): Set[TopicPartition] = {
+    admin.describeTopics(topics.asJava).all().get().asScala.filterNot(_._2.isInternal).flatMap {
+      case (topic, topicDescription) =>
+        topicDescription.partitions().asScala.map { topicPartitionInfo =>
+          val partition = topicPartitionInfo.partition()
+          logDebug(s"Partition found: $topic:$partition")
+          new TopicPartition(topic, partition)
+        }
+    }.toSet
+  }
 }
 
 /**
  * Specify a fixed collection of partitions.
  */
 private[kafka010] case class AssignStrategy(partitions: Array[TopicPartition])
-    extends ConsumerStrategy {
+    extends ConsumerStrategy with Logging {
   override def createConsumer(
       kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]] = {
     val updatedKafkaParams = setAuthenticationConfigIfNeeded(kafkaParams)
@@ -63,13 +87,20 @@ private[kafka010] case class AssignStrategy(partitions: Array[TopicPartition])
     consumer
   }
 
+  override def assignedTopicPartitions(admin: Admin): Set[TopicPartition] = {
+    val topics = partitions.map(_.topic()).toSet
+    logDebug(s"Topics for assignment: $topics")
+    retrieveAllPartitions(admin, topics).filter(partitions.contains(_))
+  }
+
   override def toString: String = s"Assign[${partitions.mkString(", ")}]"
 }
 
 /**
  * Subscribe to a fixed collection of topics.
  */
-private[kafka010] case class SubscribeStrategy(topics: Seq[String]) extends ConsumerStrategy {
+private[kafka010] case class SubscribeStrategy(topics: Seq[String])
+    extends ConsumerStrategy with Logging {
   override def createConsumer(
       kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]] = {
     val updatedKafkaParams = setAuthenticationConfigIfNeeded(kafkaParams)
@@ -78,6 +109,10 @@ private[kafka010] case class SubscribeStrategy(topics: Seq[String]) extends Cons
     consumer
   }
 
+  override def assignedTopicPartitions(admin: Admin): Set[TopicPartition] = {
+    retrieveAllPartitions(admin, topics.toSet)
+  }
+
   override def toString: String = s"Subscribe[${topics.mkString(", ")}]"
 }
 
@@ -85,16 +120,30 @@ private[kafka010] case class SubscribeStrategy(topics: Seq[String]) extends Cons
  * Use a regex to specify topics of interest.
  */
 private[kafka010] case class SubscribePatternStrategy(topicPattern: String)
-    extends ConsumerStrategy {
+    extends ConsumerStrategy with Logging {
+  private val topicRegex = topicPattern.r
+
   override def createConsumer(
       kafkaParams: ju.Map[String, Object]): Consumer[Array[Byte], Array[Byte]] = {
     val updatedKafkaParams = setAuthenticationConfigIfNeeded(kafkaParams)
     val consumer = new KafkaConsumer[Array[Byte], Array[Byte]](updatedKafkaParams)
-    consumer.subscribe(
-      ju.regex.Pattern.compile(topicPattern),
-      new NoOpConsumerRebalanceListener())
+    consumer.subscribe(ju.regex.Pattern.compile(topicPattern), new NoOpConsumerRebalanceListener())
     consumer
   }
 
+  override def assignedTopicPartitions(admin: Admin): Set[TopicPartition] = {
+    logDebug(s"Topic pattern: $topicPattern")
+    var topics = mutable.Seq.empty[String]
+    // listTopics is not listing internal topics by default so no filter needed
+    admin.listTopics().listings().get().asScala.foreach { topicListing =>
+      val name = topicListing.name()
+      if (topicRegex.findFirstIn(name).isDefined) {
+        logDebug(s"Topic matches pattern: $name")
+        topics :+= name
+      }
+    }
+    retrieveAllPartitions(admin, topics.toSet)
+  }
+
   override def toString: String = s"SubscribePattern[$topicPattern]"
 }
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala
index a1b0f7d22216b..268719d6aed2c 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala
@@ -48,7 +48,7 @@ private[kafka010] class KafkaBatch(
     // id. Hence, we should generate a unique id for each query.
     val uniqueGroupId = KafkaSourceProvider.batchUniqueGroupId(sourceOptions)
 
-    val kafkaOffsetReader = new KafkaOffsetReader(
+    val kafkaOffsetReader = KafkaOffsetReader.build(
       strategy,
       KafkaSourceProvider.kafkaParamsForDriver(specifiedKafkaParams),
       sourceOptions,
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
index adcc20c25cb5f..b1992c1dc6a0a 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
@@ -19,595 +19,62 @@ package org.apache.spark.sql.kafka010
 
 import java.{util => ju}
 
-import scala.collection.JavaConverters._
-import scala.collection.mutable.ArrayBuffer
-import scala.util.control.NonFatal
-
-import org.apache.kafka.clients.consumer.{Consumer, ConsumerConfig, OffsetAndTimestamp}
 import org.apache.kafka.common.TopicPartition
 
-import org.apache.spark.SparkEnv
 import org.apache.spark.internal.Logging
-import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-import org.apache.spark.util.{UninterruptibleThread, UninterruptibleThreadRunner}
+import org.apache.spark.sql.internal.SQLConf
 
 /**
- * This class uses Kafka's own [[org.apache.kafka.clients.consumer.KafkaConsumer]] API to
- * read data offsets from Kafka.
- * The [[ConsumerStrategy]] class defines which Kafka topics and partitions should be read
- * by this source. These strategies directly correspond to the different consumption options
- * in. This class is designed to return a configured
- * [[org.apache.kafka.clients.consumer.KafkaConsumer]] that is used by the
- * [[KafkaSource]] to query for the offsets. See the docs on
- * [[org.apache.spark.sql.kafka010.ConsumerStrategy]]
- * for more details.
- *
- * Note: This class is not ThreadSafe
+ * Base trait to fetch offsets from Kafka. The implementations are
+ * [[KafkaOffsetReaderConsumer]] and [[KafkaOffsetReaderAdmin]].
+ * Please see the documentation and API description there.
  */
-private[kafka010] class KafkaOffsetReader(
-    consumerStrategy: ConsumerStrategy,
-    val driverKafkaParams: ju.Map[String, Object],
-    readerOptions: CaseInsensitiveMap[String],
-    driverGroupIdPrefix: String) extends Logging {
-
-  /**
-   * [[UninterruptibleThreadRunner]] ensures that all
-   * [[org.apache.kafka.clients.consumer.KafkaConsumer]] communication called in an
-   * [[UninterruptibleThread]]. In the case of streaming queries, we are already running in an
-   * [[UninterruptibleThread]], however for batch mode this is not the case.
-   */
-  val uninterruptibleThreadRunner = new UninterruptibleThreadRunner("Kafka Offset Reader")
-
-  /**
-   * Place [[groupId]] and [[nextId]] here so that they are initialized before any consumer is
-   * created -- see SPARK-19564.
-   */
-  private var groupId: String = null
-  private var nextId = 0
+private[kafka010] trait KafkaOffsetReader {
 
-  /**
-   * A KafkaConsumer used in the driver to query the latest Kafka offsets. This only queries the
-   * offsets and never commits them.
-   */
-  @volatile protected var _consumer: Consumer[Array[Byte], Array[Byte]] = null
-
-  protected def consumer: Consumer[Array[Byte], Array[Byte]] = synchronized {
-    assert(Thread.currentThread().isInstanceOf[UninterruptibleThread])
-    if (_consumer == null) {
-      val newKafkaParams = new ju.HashMap[String, Object](driverKafkaParams)
-      if (driverKafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG) == null) {
-        newKafkaParams.put(ConsumerConfig.GROUP_ID_CONFIG, nextGroupId())
-      }
-      _consumer = consumerStrategy.createConsumer(newKafkaParams)
-    }
-    _consumer
-  }
+  // These are needed here because of KafkaSourceProviderSuite
+  private[kafka010] val maxOffsetFetchAttempts: Int
+  private[kafka010] val offsetFetchAttemptIntervalMs: Long
 
-  private[kafka010] val maxOffsetFetchAttempts =
-    readerOptions.getOrElse(KafkaSourceProvider.FETCH_OFFSET_NUM_RETRY, "3").toInt
+  // This is needed here because of KafkaContinuousStream
+  val driverKafkaParams: ju.Map[String, Object]
 
-  /**
-   * Number of partitions to read from Kafka. If this value is greater than the number of Kafka
-   * topicPartitions, we will split up  the read tasks of the skewed partitions to multiple Spark
-   * tasks. The number of Spark tasks will be *approximately* `numPartitions`. It can be less or
-   * more depending on rounding errors or Kafka partitions that didn't receive any new data.
-   */
-  private val minPartitions =
-    readerOptions.get(KafkaSourceProvider.MIN_PARTITIONS_OPTION_KEY).map(_.toInt)
-
-  private val rangeCalculator = new KafkaOffsetRangeCalculator(minPartitions)
-
-  private[kafka010] val offsetFetchAttemptIntervalMs =
-    readerOptions.getOrElse(KafkaSourceProvider.FETCH_OFFSET_RETRY_INTERVAL_MS, "1000").toLong
-
-  /**
-   * Whether we should divide Kafka TopicPartitions with a lot of data into smaller Spark tasks.
-   */
-  private def shouldDivvyUpLargePartitions(numTopicPartitions: Int): Boolean = {
-    minPartitions.map(_ > numTopicPartitions).getOrElse(false)
-  }
-
-  private def nextGroupId(): String = {
-    groupId = driverGroupIdPrefix + "-" + nextId
-    nextId += 1
-    groupId
-  }
-
-  override def toString(): String = consumerStrategy.toString
-
-  /**
-   * Closes the connection to Kafka, and cleans up state.
-   */
-  def close(): Unit = {
-    if (_consumer != null) uninterruptibleThreadRunner.runUninterruptibly { stopConsumer() }
-    uninterruptibleThreadRunner.shutdown()
-  }
-
-  /**
-   * @return The Set of TopicPartitions for a given topic
-   */
-  def fetchTopicPartitions(): Set[TopicPartition] = uninterruptibleThreadRunner.runUninterruptibly {
-    assert(Thread.currentThread().isInstanceOf[UninterruptibleThread])
-    // Poll to get the latest assigned partitions
-    consumer.poll(0)
-    val partitions = consumer.assignment()
-    consumer.pause(partitions)
-    partitions.asScala.toSet
-  }
-
-  /**
-   * Fetch the partition offsets for the topic partitions that are indicated
-   * in the [[ConsumerStrategy]] and [[KafkaOffsetRangeLimit]].
-   */
+  def close(): Unit
   def fetchPartitionOffsets(
       offsetRangeLimit: KafkaOffsetRangeLimit,
-      isStartingOffsets: Boolean): Map[TopicPartition, Long] = {
-    def validateTopicPartitions(partitions: Set[TopicPartition],
-      partitionOffsets: Map[TopicPartition, Long]): Map[TopicPartition, Long] = {
-      assert(partitions == partitionOffsets.keySet,
-        "If startingOffsets contains specific offsets, you must specify all TopicPartitions.\n" +
-          "Use -1 for latest, -2 for earliest.\n" +
-          s"Specified: ${partitionOffsets.keySet} Assigned: ${partitions}")
-      logDebug(s"Partitions assigned to consumer: $partitions. Seeking to $partitionOffsets")
-      partitionOffsets
-    }
-    val partitions = fetchTopicPartitions()
-    // Obtain TopicPartition offsets with late binding support
-    offsetRangeLimit match {
-      case EarliestOffsetRangeLimit => partitions.map {
-        case tp => tp -> KafkaOffsetRangeLimit.EARLIEST
-      }.toMap
-      case LatestOffsetRangeLimit => partitions.map {
-        case tp => tp -> KafkaOffsetRangeLimit.LATEST
-      }.toMap
-      case SpecificOffsetRangeLimit(partitionOffsets) =>
-        validateTopicPartitions(partitions, partitionOffsets)
-      case SpecificTimestampRangeLimit(partitionTimestamps) =>
-        fetchSpecificTimestampBasedOffsets(partitionTimestamps,
-          failsOnNoMatchingOffset = isStartingOffsets).partitionToOffsets
-    }
-  }
-
-  /**
-   * Resolves the specific offsets based on Kafka seek positions.
-   * This method resolves offset value -1 to the latest and -2 to the
-   * earliest Kafka seek position.
-   *
-   * @param partitionOffsets the specific offsets to resolve
-   * @param reportDataLoss callback to either report or log data loss depending on setting
-   */
+      isStartingOffsets: Boolean): Map[TopicPartition, Long]
   def fetchSpecificOffsets(
       partitionOffsets: Map[TopicPartition, Long],
-      reportDataLoss: String => Unit): KafkaSourceOffset = {
-    val fnAssertParametersWithPartitions: ju.Set[TopicPartition] => Unit = { partitions =>
-      assert(partitions.asScala == partitionOffsets.keySet,
-        "If startingOffsets contains specific offsets, you must specify all TopicPartitions.\n" +
-          "Use -1 for latest, -2 for earliest, if you don't care.\n" +
-          s"Specified: ${partitionOffsets.keySet} Assigned: ${partitions.asScala}")
-      logDebug(s"Partitions assigned to consumer: $partitions. Seeking to $partitionOffsets")
-    }
-
-    val fnRetrievePartitionOffsets: ju.Set[TopicPartition] => Map[TopicPartition, Long] = { _ =>
-      partitionOffsets
-    }
-
-    val fnAssertFetchedOffsets: Map[TopicPartition, Long] => Unit = { fetched =>
-      partitionOffsets.foreach {
-        case (tp, off) if off != KafkaOffsetRangeLimit.LATEST &&
-          off != KafkaOffsetRangeLimit.EARLIEST =>
-          if (fetched(tp) != off) {
-            reportDataLoss(
-              s"startingOffsets for $tp was $off but consumer reset to ${fetched(tp)}")
-          }
-        case _ =>
-        // no real way to check that beginning or end is reasonable
-      }
-    }
-
-    fetchSpecificOffsets0(fnAssertParametersWithPartitions, fnRetrievePartitionOffsets,
-      fnAssertFetchedOffsets)
-  }
-
+      reportDataLoss: String => Unit): KafkaSourceOffset
   def fetchSpecificTimestampBasedOffsets(
       partitionTimestamps: Map[TopicPartition, Long],
-      failsOnNoMatchingOffset: Boolean): KafkaSourceOffset = {
-    val fnAssertParametersWithPartitions: ju.Set[TopicPartition] => Unit = { partitions =>
-      assert(partitions.asScala == partitionTimestamps.keySet,
-        "If starting/endingOffsetsByTimestamp contains specific offsets, you must specify all " +
-          s"topics. Specified: ${partitionTimestamps.keySet} Assigned: ${partitions.asScala}")
-      logDebug(s"Partitions assigned to consumer: $partitions. Seeking to $partitionTimestamps")
-    }
-
-    val fnRetrievePartitionOffsets: ju.Set[TopicPartition] => Map[TopicPartition, Long] = { _ => {
-        val converted = partitionTimestamps.map { case (tp, timestamp) =>
-          tp -> java.lang.Long.valueOf(timestamp)
-        }.asJava
-
-        val offsetForTime: ju.Map[TopicPartition, OffsetAndTimestamp] =
-          consumer.offsetsForTimes(converted)
-
-        offsetForTime.asScala.map { case (tp, offsetAndTimestamp) =>
-          if (failsOnNoMatchingOffset) {
-            assert(offsetAndTimestamp != null, "No offset matched from request of " +
-              s"topic-partition $tp and timestamp ${partitionTimestamps(tp)}.")
-          }
-
-          if (offsetAndTimestamp == null) {
-            tp -> KafkaOffsetRangeLimit.LATEST
-          } else {
-            tp -> offsetAndTimestamp.offset()
-          }
-        }.toMap
-      }
-    }
-
-    val fnAssertFetchedOffsets: Map[TopicPartition, Long] => Unit = { _ => }
-
-    fetchSpecificOffsets0(fnAssertParametersWithPartitions, fnRetrievePartitionOffsets,
-      fnAssertFetchedOffsets)
-  }
-
-  private def fetchSpecificOffsets0(
-      fnAssertParametersWithPartitions: ju.Set[TopicPartition] => Unit,
-      fnRetrievePartitionOffsets: ju.Set[TopicPartition] => Map[TopicPartition, Long],
-      fnAssertFetchedOffsets: Map[TopicPartition, Long] => Unit): KafkaSourceOffset = {
-    val fetched = partitionsAssignedToConsumer {
-      partitions => {
-        fnAssertParametersWithPartitions(partitions)
-
-        val partitionOffsets = fnRetrievePartitionOffsets(partitions)
-
-        partitionOffsets.foreach {
-          case (tp, KafkaOffsetRangeLimit.LATEST) =>
-            consumer.seekToEnd(ju.Arrays.asList(tp))
-          case (tp, KafkaOffsetRangeLimit.EARLIEST) =>
-            consumer.seekToBeginning(ju.Arrays.asList(tp))
-          case (tp, off) => consumer.seek(tp, off)
-        }
-
-        partitionOffsets.map {
-          case (tp, _) => tp -> consumer.position(tp)
-        }
-      }
-    }
-
-    fnAssertFetchedOffsets(fetched)
-
-    KafkaSourceOffset(fetched)
-  }
-
-  /**
-   * Fetch the earliest offsets for the topic partitions that are indicated
-   * in the [[ConsumerStrategy]].
-   */
-  def fetchEarliestOffsets(): Map[TopicPartition, Long] = partitionsAssignedToConsumer(
-    partitions => {
-      logDebug("Seeking to the beginning")
-
-      consumer.seekToBeginning(partitions)
-      val partitionOffsets = partitions.asScala.map(p => p -> consumer.position(p)).toMap
-      logDebug(s"Got earliest offsets for partition : $partitionOffsets")
-      partitionOffsets
-    }, fetchingEarliestOffset = true)
-
-  /**
-   * Fetch the latest offsets for the topic partitions that are indicated
-   * in the [[ConsumerStrategy]].
-   *
-   * Kafka may return earliest offsets when we are requesting latest offsets if `poll` is called
-   * right before `seekToEnd` (KAFKA-7703). As a workaround, we will call `position` right after
-   * `poll` to wait until the potential offset request triggered by `poll(0)` is done.
-   *
-   * In addition, to avoid other unknown issues, we also use the given `knownOffsets` to audit the
-   * latest offsets returned by Kafka. If we find some incorrect offsets (a latest offset is less
-   * than an offset in `knownOffsets`), we will retry at most `maxOffsetFetchAttempts` times. When
-   * a topic is recreated, the latest offsets may be less than offsets in `knownOffsets`. We cannot
-   * distinguish this with KAFKA-7703, so we just return whatever we get from Kafka after retrying.
-   */
-  def fetchLatestOffsets(
-      knownOffsets: Option[PartitionOffsetMap]): PartitionOffsetMap =
-    partitionsAssignedToConsumer { partitions => {
-      logDebug("Seeking to the end.")
-
-      if (knownOffsets.isEmpty) {
-        consumer.seekToEnd(partitions)
-        partitions.asScala.map(p => p -> consumer.position(p)).toMap
-      } else {
-        var partitionOffsets: PartitionOffsetMap = Map.empty
-
-        /**
-         * Compare `knownOffsets` and `partitionOffsets`. Returns all partitions that have incorrect
-         * latest offset (offset in `knownOffsets` is great than the one in `partitionOffsets`).
-         */
-        def findIncorrectOffsets(): Seq[(TopicPartition, Long, Long)] = {
-          var incorrectOffsets = ArrayBuffer[(TopicPartition, Long, Long)]()
-          partitionOffsets.foreach { case (tp, offset) =>
-            knownOffsets.foreach(_.get(tp).foreach { knownOffset =>
-              if (knownOffset > offset) {
-                val incorrectOffset = (tp, knownOffset, offset)
-                incorrectOffsets += incorrectOffset
-              }
-            })
-          }
-          incorrectOffsets.toSeq
-        }
-
-        // Retry to fetch latest offsets when detecting incorrect offsets. We don't use
-        // `withRetriesWithoutInterrupt` to retry because:
-        //
-        // - `withRetriesWithoutInterrupt` will reset the consumer for each attempt but a fresh
-        //    consumer has a much bigger chance to hit KAFKA-7703.
-        // - Avoid calling `consumer.poll(0)` which may cause KAFKA-7703.
-        var incorrectOffsets: Seq[(TopicPartition, Long, Long)] = Nil
-        var attempt = 0
-        do {
-          consumer.seekToEnd(partitions)
-          partitionOffsets = partitions.asScala.map(p => p -> consumer.position(p)).toMap
-          attempt += 1
-
-          incorrectOffsets = findIncorrectOffsets()
-          if (incorrectOffsets.nonEmpty) {
-            logWarning("Found incorrect offsets in some partitions " +
-              s"(partition, previous offset, fetched offset): $incorrectOffsets")
-            if (attempt < maxOffsetFetchAttempts) {
-              logWarning("Retrying to fetch latest offsets because of incorrect offsets")
-              Thread.sleep(offsetFetchAttemptIntervalMs)
-            }
-          }
-        } while (incorrectOffsets.nonEmpty && attempt < maxOffsetFetchAttempts)
-
-        logDebug(s"Got latest offsets for partition : $partitionOffsets")
-        partitionOffsets
-      }
-    }
-  }
-
-  /**
-   * Fetch the earliest offsets for specific topic partitions.
-   * The return result may not contain some partitions if they are deleted.
-   */
-  def fetchEarliestOffsets(
-      newPartitions: Seq[TopicPartition]): Map[TopicPartition, Long] = {
-    if (newPartitions.isEmpty) {
-      Map.empty[TopicPartition, Long]
-    } else {
-      partitionsAssignedToConsumer(partitions => {
-        // Get the earliest offset of each partition
-        consumer.seekToBeginning(partitions)
-        val partitionOffsets = newPartitions.filter { p =>
-          // When deleting topics happen at the same time, some partitions may not be in
-          // `partitions`. So we need to ignore them
-          partitions.contains(p)
-        }.map(p => p -> consumer.position(p)).toMap
-        logDebug(s"Got earliest offsets for new partitions: $partitionOffsets")
-        partitionOffsets
-      }, fetchingEarliestOffset = true)
-    }
-  }
-
-  /**
-   * Return the offset ranges for a Kafka batch query. If `minPartitions` is set, this method may
-   * split partitions to respect it. Since offsets can be early and late binding which are evaluated
-   * on the executors, in order to divvy up the partitions we need to perform some substitutions. We
-   * don't want to send exact offsets to the executors, because data may age out before we can
-   * consume the data. This method makes some approximate splitting, and replaces the special offset
-   * values in the final output.
-   */
+      failsOnNoMatchingOffset: Boolean): KafkaSourceOffset
+  def fetchEarliestOffsets(): Map[TopicPartition, Long]
+  def fetchLatestOffsets(knownOffsets: Option[PartitionOffsetMap]): PartitionOffsetMap
+  def fetchEarliestOffsets(newPartitions: Seq[TopicPartition]): Map[TopicPartition, Long]
   def getOffsetRangesFromUnresolvedOffsets(
       startingOffsets: KafkaOffsetRangeLimit,
-      endingOffsets: KafkaOffsetRangeLimit): Seq[KafkaOffsetRange] = {
-    val fromPartitionOffsets = fetchPartitionOffsets(startingOffsets, isStartingOffsets = true)
-    val untilPartitionOffsets = fetchPartitionOffsets(endingOffsets, isStartingOffsets = false)
-
-    // Obtain topicPartitions in both from and until partition offset, ignoring
-    // topic partitions that were added and/or deleted between the two above calls.
-    if (fromPartitionOffsets.keySet != untilPartitionOffsets.keySet) {
-      implicit val topicOrdering: Ordering[TopicPartition] = Ordering.by(t => t.topic())
-      val fromTopics = fromPartitionOffsets.keySet.toList.sorted.mkString(",")
-      val untilTopics = untilPartitionOffsets.keySet.toList.sorted.mkString(",")
-      throw new IllegalStateException("different topic partitions " +
-        s"for starting offsets topics[${fromTopics}] and " +
-        s"ending offsets topics[${untilTopics}]")
-    }
-
-    // Calculate offset ranges
-    val offsetRangesBase = untilPartitionOffsets.keySet.map { tp =>
-      val fromOffset = fromPartitionOffsets.get(tp).getOrElse {
-        // This should not happen since topicPartitions contains all partitions not in
-        // fromPartitionOffsets
-        throw new IllegalStateException(s"$tp doesn't have a from offset")
-      }
-      val untilOffset = untilPartitionOffsets(tp)
-      KafkaOffsetRange(tp, fromOffset, untilOffset, None)
-    }.toSeq
-
-    if (shouldDivvyUpLargePartitions(offsetRangesBase.size)) {
-      val fromOffsetsMap =
-        offsetRangesBase.map(range => (range.topicPartition, range.fromOffset)).toMap
-      val untilOffsetsMap =
-        offsetRangesBase.map(range => (range.topicPartition, range.untilOffset)).toMap
-
-      // No need to report data loss here
-      val resolvedFromOffsets = fetchSpecificOffsets(fromOffsetsMap, _ => ()).partitionToOffsets
-      val resolvedUntilOffsets = fetchSpecificOffsets(untilOffsetsMap, _ => ()).partitionToOffsets
-      val ranges = offsetRangesBase.map(_.topicPartition).map { tp =>
-        KafkaOffsetRange(tp, resolvedFromOffsets(tp), resolvedUntilOffsets(tp), preferredLoc = None)
-      }
-      val divvied = rangeCalculator.getRanges(ranges).groupBy(_.topicPartition)
-      divvied.flatMap { case (tp, splitOffsetRanges) =>
-        if (splitOffsetRanges.length == 1) {
-          Seq(KafkaOffsetRange(tp, fromOffsetsMap(tp), untilOffsetsMap(tp), None))
-        } else {
-          // the list can't be empty
-          val first = splitOffsetRanges.head.copy(fromOffset = fromOffsetsMap(tp))
-          val end = splitOffsetRanges.last.copy(untilOffset = untilOffsetsMap(tp))
-          Seq(first) ++ splitOffsetRanges.drop(1).dropRight(1) :+ end
-        }
-      }.toArray.toSeq
-    } else {
-      offsetRangesBase
-    }
-  }
-
-  private def getSortedExecutorList(): Array[String] = {
-    def compare(a: ExecutorCacheTaskLocation, b: ExecutorCacheTaskLocation): Boolean = {
-      if (a.host == b.host) {
-        a.executorId > b.executorId
-      } else {
-        a.host > b.host
-      }
-    }
-
-    val bm = SparkEnv.get.blockManager
-    bm.master.getPeers(bm.blockManagerId).toArray
-      .map(x => ExecutorCacheTaskLocation(x.host, x.executorId))
-      .sortWith(compare)
-      .map(_.toString)
-  }
-
-  /**
-   * Return the offset ranges for a Kafka streaming batch. If `minPartitions` is set, this method
-   * may split partitions to respect it. If any data lost issue is detected, `reportDataLoss` will
-   * be called.
-   */
+      endingOffsets: KafkaOffsetRangeLimit): Seq[KafkaOffsetRange]
   def getOffsetRangesFromResolvedOffsets(
       fromPartitionOffsets: PartitionOffsetMap,
       untilPartitionOffsets: PartitionOffsetMap,
-      reportDataLoss: String => Unit): Seq[KafkaOffsetRange] = {
-    // Find the new partitions, and get their earliest offsets
-    val newPartitions = untilPartitionOffsets.keySet.diff(fromPartitionOffsets.keySet)
-    val newPartitionInitialOffsets = fetchEarliestOffsets(newPartitions.toSeq)
-    if (newPartitionInitialOffsets.keySet != newPartitions) {
-      // We cannot get from offsets for some partitions. It means they got deleted.
-      val deletedPartitions = newPartitions.diff(newPartitionInitialOffsets.keySet)
-      reportDataLoss(
-        s"Cannot find earliest offsets of ${deletedPartitions}. Some data may have been missed")
-    }
-    logInfo(s"Partitions added: $newPartitionInitialOffsets")
-    newPartitionInitialOffsets.filter(_._2 != 0).foreach { case (p, o) =>
-      reportDataLoss(
-        s"Added partition $p starts from $o instead of 0. Some data may have been missed")
-    }
-
-    val deletedPartitions = fromPartitionOffsets.keySet.diff(untilPartitionOffsets.keySet)
-    if (deletedPartitions.nonEmpty) {
-      val message = if (driverKafkaParams.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) {
-        s"$deletedPartitions are gone. ${KafkaSourceProvider.CUSTOM_GROUP_ID_ERROR_MESSAGE}"
-      } else {
-        s"$deletedPartitions are gone. Some data may have been missed."
-      }
-      reportDataLoss(message)
-    }
-
-    // Use the until partitions to calculate offset ranges to ignore partitions that have
-    // been deleted
-    val topicPartitions = untilPartitionOffsets.keySet.filter { tp =>
-      // Ignore partitions that we don't know the from offsets.
-      newPartitionInitialOffsets.contains(tp) || fromPartitionOffsets.contains(tp)
-    }.toSeq
-    logDebug("TopicPartitions: " + topicPartitions.mkString(", "))
-
-    val fromOffsets = fromPartitionOffsets ++ newPartitionInitialOffsets
-    val untilOffsets = untilPartitionOffsets
-    val ranges = topicPartitions.map { tp =>
-      val fromOffset = fromOffsets(tp)
-      val untilOffset = untilOffsets(tp)
-      if (untilOffset < fromOffset) {
-        reportDataLoss(s"Partition $tp's offset was changed from " +
-          s"$fromOffset to $untilOffset, some data may have been missed")
-      }
-      KafkaOffsetRange(tp, fromOffset, untilOffset, preferredLoc = None)
-    }
-    rangeCalculator.getRanges(ranges, getSortedExecutorList)
-  }
-
-  private def partitionsAssignedToConsumer(
-      body: ju.Set[TopicPartition] => Map[TopicPartition, Long],
-      fetchingEarliestOffset: Boolean = false)
-    : Map[TopicPartition, Long] = uninterruptibleThreadRunner.runUninterruptibly {
-
-    withRetriesWithoutInterrupt {
-      // Poll to get the latest assigned partitions
-      consumer.poll(0)
-      val partitions = consumer.assignment()
-
-      if (!fetchingEarliestOffset) {
-        // Call `position` to wait until the potential offset request triggered by `poll(0)` is
-        // done. This is a workaround for KAFKA-7703, which an async `seekToBeginning` triggered by
-        // `poll(0)` may reset offsets that should have been set by another request.
-        partitions.asScala.map(p => p -> consumer.position(p)).foreach(_ => {})
-      }
-
-      consumer.pause(partitions)
-      logDebug(s"Partitions assigned to consumer: $partitions.")
-      body(partitions)
-    }
-  }
-
-  /**
-   * Helper function that does multiple retries on a body of code that returns offsets.
-   * Retries are needed to handle transient failures. For e.g. race conditions between getting
-   * assignment and getting position while topics/partitions are deleted can cause NPEs.
-   *
-   * This method also makes sure `body` won't be interrupted to workaround a potential issue in
-   * `KafkaConsumer.poll`. (KAFKA-1894)
-   */
-  private def withRetriesWithoutInterrupt(
-      body: => Map[TopicPartition, Long]): Map[TopicPartition, Long] = {
-    // Make sure `KafkaConsumer.poll` won't be interrupted (KAFKA-1894)
-    assert(Thread.currentThread().isInstanceOf[UninterruptibleThread])
+      reportDataLoss: String => Unit): Seq[KafkaOffsetRange]
+}
 
-    synchronized {
-      var result: Option[Map[TopicPartition, Long]] = None
-      var attempt = 1
-      var lastException: Throwable = null
-      while (result.isEmpty && attempt <= maxOffsetFetchAttempts
-        && !Thread.currentThread().isInterrupted) {
-        Thread.currentThread match {
-          case ut: UninterruptibleThread =>
-            // "KafkaConsumer.poll" may hang forever if the thread is interrupted (E.g., the query
-            // is stopped)(KAFKA-1894). Hence, we just make sure we don't interrupt it.
-            //
-            // If the broker addresses are wrong, or Kafka cluster is down, "KafkaConsumer.poll" may
-            // hang forever as well. This cannot be resolved in KafkaSource until Kafka fixes the
-            // issue.
-            ut.runUninterruptibly {
-              try {
-                result = Some(body)
-              } catch {
-                case NonFatal(e) =>
-                  lastException = e
-                  logWarning(s"Error in attempt $attempt getting Kafka offsets: ", e)
-                  attempt += 1
-                  Thread.sleep(offsetFetchAttemptIntervalMs)
-                  resetConsumer()
-              }
-            }
-          case _ =>
-            throw new IllegalStateException(
-              "Kafka APIs must be executed on a o.a.spark.util.UninterruptibleThread")
-        }
-      }
-      if (Thread.interrupted()) {
-        throw new InterruptedException()
-      }
-      if (result.isEmpty) {
-        assert(attempt > maxOffsetFetchAttempts)
-        assert(lastException != null)
-        throw lastException
-      }
-      result.get
+private[kafka010] object KafkaOffsetReader extends Logging {
+  def build(
+      consumerStrategy: ConsumerStrategy,
+      driverKafkaParams: ju.Map[String, Object],
+      readerOptions: CaseInsensitiveMap[String],
+      driverGroupIdPrefix: String): KafkaOffsetReader = {
+    if (SQLConf.get.useDeprecatedKafkaOffsetFetching) {
+      logDebug("Creating old and deprecated Consumer based offset reader")
+      new KafkaOffsetReaderConsumer(consumerStrategy, driverKafkaParams, readerOptions,
+        driverGroupIdPrefix)
+    } else {
+      logDebug("Creating new Admin based offset reader")
+      new KafkaOffsetReaderAdmin(consumerStrategy, driverKafkaParams, readerOptions,
+        driverGroupIdPrefix)
     }
   }
-
-  private def stopConsumer(): Unit = synchronized {
-    assert(Thread.currentThread().isInstanceOf[UninterruptibleThread])
-    if (_consumer != null) _consumer.close()
-  }
-
-  private def resetConsumer(): Unit = synchronized {
-    stopConsumer()
-    _consumer = null  // will automatically get reinitialized again
-  }
 }
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala
new file mode 100644
index 0000000000000..d5905795c626b
--- /dev/null
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala
@@ -0,0 +1,573 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import java.{util => ju}
+import java.util.Locale
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable.ArrayBuffer
+import scala.util.control.NonFatal
+
+import org.apache.kafka.clients.admin.{Admin, ListOffsetsOptions, OffsetSpec}
+import org.apache.kafka.clients.consumer.ConsumerConfig
+import org.apache.kafka.common.{IsolationLevel, TopicPartition}
+import org.apache.kafka.common.requests.OffsetFetchResponse
+
+import org.apache.spark.SparkEnv
+import org.apache.spark.internal.Logging
+import org.apache.spark.scheduler.ExecutorCacheTaskLocation
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.util.{UninterruptibleThread, UninterruptibleThreadRunner}
+
+/**
+ * This class uses Kafka's own [[Admin]] API to read data offsets from Kafka.
+ * The [[ConsumerStrategy]] class defines which Kafka topics and partitions should be read
+ * by this source. These strategies directly correspond to the different consumption options
+ * in. This class is designed to return a configured [[Admin]] that is used by the
+ * [[KafkaSource]] to query for the offsets. See the docs on
+ * [[org.apache.spark.sql.kafka010.ConsumerStrategy]]
+ * for more details.
+ *
+ * Note: This class is not ThreadSafe
+ */
+private[kafka010] class KafkaOffsetReaderAdmin(
+    consumerStrategy: ConsumerStrategy,
+    override val driverKafkaParams: ju.Map[String, Object],
+    readerOptions: CaseInsensitiveMap[String],
+    driverGroupIdPrefix: String) extends KafkaOffsetReader with Logging {
+
+  private[kafka010] val maxOffsetFetchAttempts =
+    readerOptions.getOrElse(KafkaSourceProvider.FETCH_OFFSET_NUM_RETRY, "3").toInt
+
+  private[kafka010] val offsetFetchAttemptIntervalMs =
+    readerOptions.getOrElse(KafkaSourceProvider.FETCH_OFFSET_RETRY_INTERVAL_MS, "1000").toLong
+
+  /**
+   * [[UninterruptibleThreadRunner]] ensures that all [[Admin]] communication called in an
+   * [[UninterruptibleThread]]. In the case of streaming queries, we are already running in an
+   * [[UninterruptibleThread]], however for batch mode this is not the case.
+   */
+  val uninterruptibleThreadRunner = new UninterruptibleThreadRunner("Kafka Offset Reader")
+
+  /**
+   * An AdminClient used in the driver to query the latest Kafka offsets.
+   * This only queries the offsets because AdminClient has no functionality to commit offsets like
+   * KafkaConsumer.
+   */
+  @volatile protected var _admin: Admin = null
+
+  protected def admin: Admin = synchronized {
+    assert(Thread.currentThread().isInstanceOf[UninterruptibleThread])
+    if (_admin == null) {
+      _admin = consumerStrategy.createAdmin(driverKafkaParams)
+    }
+    _admin
+  }
+
+  lazy val isolationLevel: IsolationLevel = {
+    Option(driverKafkaParams.get(ConsumerConfig.ISOLATION_LEVEL_CONFIG)) match {
+      case Some(s: String) => IsolationLevel.valueOf(s.toUpperCase(Locale.ROOT))
+      case None => IsolationLevel.valueOf(
+        ConsumerConfig.DEFAULT_ISOLATION_LEVEL.toUpperCase(Locale.ROOT))
+      case _ => throw new IllegalArgumentException(s"${ConsumerConfig.ISOLATION_LEVEL_CONFIG} " +
+        "must be either not defined or with type String")
+    }
+  }
+
+  private lazy val listOffsetsOptions = new ListOffsetsOptions(isolationLevel)
+
+  private def listOffsets(admin: Admin, listOffsetsParams: ju.Map[TopicPartition, OffsetSpec]) = {
+    admin.listOffsets(listOffsetsParams, listOffsetsOptions).all().get().asScala
+      .map(result => result._1 -> result._2.offset()).toMap
+  }
+
+  /**
+   * Number of partitions to read from Kafka. If this value is greater than the number of Kafka
+   * topicPartitions, we will split up  the read tasks of the skewed partitions to multiple Spark
+   * tasks. The number of Spark tasks will be *approximately* `numPartitions`. It can be less or
+   * more depending on rounding errors or Kafka partitions that didn't receive any new data.
+   */
+  private val minPartitions =
+    readerOptions.get(KafkaSourceProvider.MIN_PARTITIONS_OPTION_KEY).map(_.toInt)
+
+  private val rangeCalculator = new KafkaOffsetRangeCalculator(minPartitions)
+
+  /**
+   * Whether we should divide Kafka TopicPartitions with a lot of data into smaller Spark tasks.
+   */
+  private def shouldDivvyUpLargePartitions(numTopicPartitions: Int): Boolean = {
+    minPartitions.map(_ > numTopicPartitions).getOrElse(false)
+  }
+
+  override def toString(): String = consumerStrategy.toString
+
+  /**
+   * Closes the connection to Kafka, and cleans up state.
+   */
+  override def close(): Unit = {
+    if (_admin != null) uninterruptibleThreadRunner.runUninterruptibly { stopAdmin() }
+    uninterruptibleThreadRunner.shutdown()
+  }
+
+  /**
+   * Fetch the partition offsets for the topic partitions that are indicated
+   * in the [[ConsumerStrategy]] and [[KafkaOffsetRangeLimit]].
+   */
+  override def fetchPartitionOffsets(
+      offsetRangeLimit: KafkaOffsetRangeLimit,
+      isStartingOffsets: Boolean): Map[TopicPartition, Long] = {
+    def validateTopicPartitions(partitions: Set[TopicPartition],
+      partitionOffsets: Map[TopicPartition, Long]): Map[TopicPartition, Long] = {
+      assert(partitions == partitionOffsets.keySet,
+        "If startingOffsets contains specific offsets, you must specify all TopicPartitions.\n" +
+          "Use -1 for latest, -2 for earliest.\n" +
+          s"Specified: ${partitionOffsets.keySet} Assigned: ${partitions}")
+      logDebug(s"Assigned partitions: $partitions. Seeking to $partitionOffsets")
+      partitionOffsets
+    }
+    val partitions = uninterruptibleThreadRunner.runUninterruptibly {
+      consumerStrategy.assignedTopicPartitions(admin)
+    }
+    // Obtain TopicPartition offsets with late binding support
+    offsetRangeLimit match {
+      case EarliestOffsetRangeLimit => partitions.map {
+        case tp => tp -> KafkaOffsetRangeLimit.EARLIEST
+      }.toMap
+      case LatestOffsetRangeLimit => partitions.map {
+        case tp => tp -> KafkaOffsetRangeLimit.LATEST
+      }.toMap
+      case SpecificOffsetRangeLimit(partitionOffsets) =>
+        validateTopicPartitions(partitions, partitionOffsets)
+      case SpecificTimestampRangeLimit(partitionTimestamps) =>
+        fetchSpecificTimestampBasedOffsets(partitionTimestamps,
+          failsOnNoMatchingOffset = isStartingOffsets).partitionToOffsets
+    }
+  }
+
+  /**
+   * Resolves the specific offsets based on Kafka seek positions.
+   * This method resolves offset value -1 to the latest and -2 to the
+   * earliest Kafka seek position.
+   *
+   * @param partitionOffsets the specific offsets to resolve
+   * @param reportDataLoss callback to either report or log data loss depending on setting
+   */
+  override def fetchSpecificOffsets(
+      partitionOffsets: Map[TopicPartition, Long],
+      reportDataLoss: String => Unit): KafkaSourceOffset = {
+    val fnAssertParametersWithPartitions: ju.Set[TopicPartition] => Unit = { partitions =>
+      assert(partitions.asScala == partitionOffsets.keySet,
+        "If startingOffsets contains specific offsets, you must specify all TopicPartitions.\n" +
+          "Use -1 for latest, -2 for earliest, if you don't care.\n" +
+          s"Specified: ${partitionOffsets.keySet} Assigned: ${partitions.asScala}")
+      logDebug(s"Assigned partitions: $partitions. Seeking to $partitionOffsets")
+    }
+
+    val fnRetrievePartitionOffsets: ju.Set[TopicPartition] => Map[TopicPartition, Long] = { _ =>
+      partitionOffsets
+    }
+
+    fetchSpecificOffsets0(fnAssertParametersWithPartitions, fnRetrievePartitionOffsets)
+  }
+
+  override def fetchSpecificTimestampBasedOffsets(
+      partitionTimestamps: Map[TopicPartition, Long],
+      failsOnNoMatchingOffset: Boolean): KafkaSourceOffset = {
+    val fnAssertParametersWithPartitions: ju.Set[TopicPartition] => Unit = { partitions =>
+      assert(partitions.asScala == partitionTimestamps.keySet,
+        "If starting/endingOffsetsByTimestamp contains specific offsets, you must specify all " +
+          s"topics. Specified: ${partitionTimestamps.keySet} Assigned: ${partitions.asScala}")
+      logDebug(s"Assigned partitions: $partitions. Seeking to $partitionTimestamps")
+    }
+
+    val fnRetrievePartitionOffsets: ju.Set[TopicPartition] => Map[TopicPartition, Long] = { _ => {
+        val listOffsetsParams = partitionTimestamps.map { case (tp, timestamp) =>
+          tp -> OffsetSpec.forTimestamp(timestamp)
+        }.asJava
+        admin.listOffsets(listOffsetsParams, listOffsetsOptions).all().get().asScala.map {
+          case (tp, offsetSpec) =>
+            if (failsOnNoMatchingOffset) {
+              assert(offsetSpec.offset() != OffsetFetchResponse.INVALID_OFFSET, "No offset " +
+                s"matched from request of topic-partition $tp and timestamp " +
+                s"${partitionTimestamps(tp)}.")
+            }
+
+            if (offsetSpec.offset() == OffsetFetchResponse.INVALID_OFFSET) {
+              tp -> KafkaOffsetRangeLimit.LATEST
+            } else {
+              tp -> offsetSpec.offset()
+            }
+        }.toMap
+      }
+    }
+
+    fetchSpecificOffsets0(fnAssertParametersWithPartitions, fnRetrievePartitionOffsets)
+  }
+
+  private def fetchSpecificOffsets0(
+      fnAssertParametersWithPartitions: ju.Set[TopicPartition] => Unit,
+      fnRetrievePartitionOffsets: ju.Set[TopicPartition] => Map[TopicPartition, Long]
+    ): KafkaSourceOffset = {
+    val fetched = partitionsAssignedToConsumer {
+      partitions => {
+        fnAssertParametersWithPartitions(partitions)
+
+        val partitionOffsets = fnRetrievePartitionOffsets(partitions)
+
+        val listOffsetsParams = partitionOffsets.filter { case (_, off) =>
+          off == KafkaOffsetRangeLimit.LATEST || off == KafkaOffsetRangeLimit.EARLIEST
+        }.map { case (tp, off) =>
+          off match {
+            case KafkaOffsetRangeLimit.LATEST =>
+              tp -> OffsetSpec.latest()
+            case KafkaOffsetRangeLimit.EARLIEST =>
+              tp -> OffsetSpec.earliest()
+          }
+        }
+        val resolvedPartitionOffsets = listOffsets(admin, listOffsetsParams.asJava)
+
+        partitionOffsets.map { case (tp, off) =>
+          off match {
+            case KafkaOffsetRangeLimit.LATEST =>
+              tp -> resolvedPartitionOffsets(tp)
+            case KafkaOffsetRangeLimit.EARLIEST =>
+              tp -> resolvedPartitionOffsets(tp)
+            case _ =>
+              tp -> off
+          }
+        }
+      }
+    }
+
+    KafkaSourceOffset(fetched)
+  }
+
+  /**
+   * Fetch the earliest offsets for the topic partitions that are indicated
+   * in the [[ConsumerStrategy]].
+   */
+  override def fetchEarliestOffsets(): Map[TopicPartition, Long] = partitionsAssignedToConsumer(
+    partitions => {
+      val listOffsetsParams = partitions.asScala.map(p => p -> OffsetSpec.earliest()).toMap.asJava
+      val partitionOffsets = listOffsets(admin, listOffsetsParams)
+      logDebug(s"Got earliest offsets for partitions: $partitionOffsets")
+      partitionOffsets
+    })
+
+  /**
+   * Fetch the latest offsets for the topic partitions that are indicated
+   * in the [[ConsumerStrategy]].
+   *
+   * Kafka may return earliest offsets when we are requesting latest offsets if `poll` is called
+   * right before `seekToEnd` (KAFKA-7703). As a workaround, we will call `position` right after
+   * `poll` to wait until the potential offset request triggered by `poll(0)` is done.
+   *
+   * In addition, to avoid other unknown issues, we also use the given `knownOffsets` to audit the
+   * latest offsets returned by Kafka. If we find some incorrect offsets (a latest offset is less
+   * than an offset in `knownOffsets`), we will retry at most `maxOffsetFetchAttempts` times. When
+   * a topic is recreated, the latest offsets may be less than offsets in `knownOffsets`. We cannot
+   * distinguish this with KAFKA-7703, so we just return whatever we get from Kafka after retrying.
+   */
+  override def fetchLatestOffsets(
+      knownOffsets: Option[PartitionOffsetMap]): PartitionOffsetMap =
+    partitionsAssignedToConsumer { partitions => {
+      val listOffsetsParams = partitions.asScala.map(_ -> OffsetSpec.latest()).toMap.asJava
+      if (knownOffsets.isEmpty) {
+        val partitionOffsets = listOffsets(admin, listOffsetsParams)
+        logDebug(s"Got latest offsets for partitions: $partitionOffsets")
+        partitionOffsets
+      } else {
+        var partitionOffsets: PartitionOffsetMap = Map.empty
+
+        /**
+         * Compare `knownOffsets` and `partitionOffsets`. Returns all partitions that have incorrect
+         * latest offset (offset in `knownOffsets` is great than the one in `partitionOffsets`).
+         */
+        def findIncorrectOffsets(): Seq[(TopicPartition, Long, Long)] = {
+          var incorrectOffsets = ArrayBuffer[(TopicPartition, Long, Long)]()
+          partitionOffsets.foreach { case (tp, offset) =>
+            knownOffsets.foreach(_.get(tp).foreach { knownOffset =>
+              if (knownOffset > offset) {
+                val incorrectOffset = (tp, knownOffset, offset)
+                incorrectOffsets += incorrectOffset
+              }
+            })
+          }
+          // toSeq seems redundant but it's needed for Scala 2.13
+          incorrectOffsets.toSeq
+        }
+
+        // Retry to fetch latest offsets when detecting incorrect offsets. We don't use
+        // `withRetriesWithoutInterrupt` to retry because:
+        //
+        // - `withRetriesWithoutInterrupt` will reset the consumer for each attempt but a fresh
+        //    consumer has a much bigger chance to hit KAFKA-7703.
+        // - Avoid calling `consumer.poll(0)` which may cause KAFKA-7703.
+        var incorrectOffsets: Seq[(TopicPartition, Long, Long)] = Nil
+        var attempt = 0
+        do {
+          partitionOffsets = listOffsets(admin, listOffsetsParams)
+          attempt += 1
+
+          incorrectOffsets = findIncorrectOffsets()
+          if (incorrectOffsets.nonEmpty) {
+            logWarning("Found incorrect offsets in some partitions " +
+              s"(partition, previous offset, fetched offset): $incorrectOffsets")
+            if (attempt < maxOffsetFetchAttempts) {
+              logWarning("Retrying to fetch latest offsets because of incorrect offsets")
+              Thread.sleep(offsetFetchAttemptIntervalMs)
+            }
+          }
+        } while (incorrectOffsets.nonEmpty && attempt < maxOffsetFetchAttempts)
+
+        logDebug(s"Got latest offsets for partitions: $partitionOffsets")
+        partitionOffsets
+      }
+    }
+  }
+
+  /**
+   * Fetch the earliest offsets for specific topic partitions.
+   * The return result may not contain some partitions if they are deleted.
+   */
+  override def fetchEarliestOffsets(
+      newPartitions: Seq[TopicPartition]): Map[TopicPartition, Long] = {
+    if (newPartitions.isEmpty) {
+      Map.empty[TopicPartition, Long]
+    } else {
+      partitionsAssignedToConsumer(partitions => {
+        // Get the earliest offset of each partition
+        val listOffsetsParams = newPartitions.filter { newPartition =>
+          // When deleting topics happen at the same time, some partitions may not be in
+          // `partitions`. So we need to ignore them
+          partitions.contains(newPartition)
+        }.map(partition => partition -> OffsetSpec.earliest()).toMap.asJava
+        val partitionOffsets = listOffsets(admin, listOffsetsParams)
+        logDebug(s"Got earliest offsets for new partitions: $partitionOffsets")
+        partitionOffsets
+      })
+    }
+  }
+
+  /**
+   * Return the offset ranges for a Kafka batch query. If `minPartitions` is set, this method may
+   * split partitions to respect it. Since offsets can be early and late binding which are evaluated
+   * on the executors, in order to divvy up the partitions we need to perform some substitutions. We
+   * don't want to send exact offsets to the executors, because data may age out before we can
+   * consume the data. This method makes some approximate splitting, and replaces the special offset
+   * values in the final output.
+   */
+  override def getOffsetRangesFromUnresolvedOffsets(
+      startingOffsets: KafkaOffsetRangeLimit,
+      endingOffsets: KafkaOffsetRangeLimit): Seq[KafkaOffsetRange] = {
+    val fromPartitionOffsets = fetchPartitionOffsets(startingOffsets, isStartingOffsets = true)
+    val untilPartitionOffsets = fetchPartitionOffsets(endingOffsets, isStartingOffsets = false)
+
+    // Obtain topicPartitions in both from and until partition offset, ignoring
+    // topic partitions that were added and/or deleted between the two above calls.
+    if (fromPartitionOffsets.keySet != untilPartitionOffsets.keySet) {
+      implicit val topicOrdering: Ordering[TopicPartition] = Ordering.by(t => t.topic())
+      val fromTopics = fromPartitionOffsets.keySet.toList.sorted.mkString(",")
+      val untilTopics = untilPartitionOffsets.keySet.toList.sorted.mkString(",")
+      throw new IllegalStateException("different topic partitions " +
+        s"for starting offsets topics[${fromTopics}] and " +
+        s"ending offsets topics[${untilTopics}]")
+    }
+
+    // Calculate offset ranges
+    val offsetRangesBase = untilPartitionOffsets.keySet.map { tp =>
+      val fromOffset = fromPartitionOffsets.get(tp).getOrElse {
+        // This should not happen since topicPartitions contains all partitions not in
+        // fromPartitionOffsets
+        throw new IllegalStateException(s"$tp doesn't have a from offset")
+      }
+      val untilOffset = untilPartitionOffsets(tp)
+      KafkaOffsetRange(tp, fromOffset, untilOffset, None)
+    }.toSeq
+
+    if (shouldDivvyUpLargePartitions(offsetRangesBase.size)) {
+      val fromOffsetsMap =
+        offsetRangesBase.map(range => (range.topicPartition, range.fromOffset)).toMap
+      val untilOffsetsMap =
+        offsetRangesBase.map(range => (range.topicPartition, range.untilOffset)).toMap
+
+      // No need to report data loss here
+      val resolvedFromOffsets = fetchSpecificOffsets(fromOffsetsMap, _ => ()).partitionToOffsets
+      val resolvedUntilOffsets = fetchSpecificOffsets(untilOffsetsMap, _ => ()).partitionToOffsets
+      val ranges = offsetRangesBase.map(_.topicPartition).map { tp =>
+        KafkaOffsetRange(tp, resolvedFromOffsets(tp), resolvedUntilOffsets(tp), preferredLoc = None)
+      }
+      val divvied = rangeCalculator.getRanges(ranges).groupBy(_.topicPartition)
+      divvied.flatMap { case (tp, splitOffsetRanges) =>
+        if (splitOffsetRanges.length == 1) {
+          Seq(KafkaOffsetRange(tp, fromOffsetsMap(tp), untilOffsetsMap(tp), None))
+        } else {
+          // the list can't be empty
+          val first = splitOffsetRanges.head.copy(fromOffset = fromOffsetsMap(tp))
+          val end = splitOffsetRanges.last.copy(untilOffset = untilOffsetsMap(tp))
+          Seq(first) ++ splitOffsetRanges.drop(1).dropRight(1) :+ end
+        }
+      }.toArray.toSeq
+    } else {
+      offsetRangesBase
+    }
+  }
+
+  private def getSortedExecutorList: Array[String] = {
+    def compare(a: ExecutorCacheTaskLocation, b: ExecutorCacheTaskLocation): Boolean = {
+      if (a.host == b.host) {
+        a.executorId > b.executorId
+      } else {
+        a.host > b.host
+      }
+    }
+
+    val bm = SparkEnv.get.blockManager
+    bm.master.getPeers(bm.blockManagerId).toArray
+      .map(x => ExecutorCacheTaskLocation(x.host, x.executorId))
+      .sortWith(compare)
+      .map(_.toString)
+  }
+
+  /**
+   * Return the offset ranges for a Kafka streaming batch. If `minPartitions` is set, this method
+   * may split partitions to respect it. If any data lost issue is detected, `reportDataLoss` will
+   * be called.
+   */
+  override def getOffsetRangesFromResolvedOffsets(
+      fromPartitionOffsets: PartitionOffsetMap,
+      untilPartitionOffsets: PartitionOffsetMap,
+      reportDataLoss: String => Unit): Seq[KafkaOffsetRange] = {
+    // Find the new partitions, and get their earliest offsets
+    val newPartitions = untilPartitionOffsets.keySet.diff(fromPartitionOffsets.keySet)
+    val newPartitionInitialOffsets = fetchEarliestOffsets(newPartitions.toSeq)
+    if (newPartitionInitialOffsets.keySet != newPartitions) {
+      // We cannot get from offsets for some partitions. It means they got deleted.
+      val deletedPartitions = newPartitions.diff(newPartitionInitialOffsets.keySet)
+      reportDataLoss(
+        s"Cannot find earliest offsets of ${deletedPartitions}. Some data may have been missed")
+    }
+    logInfo(s"Partitions added: $newPartitionInitialOffsets")
+    newPartitionInitialOffsets.filter(_._2 != 0).foreach { case (p, o) =>
+      reportDataLoss(
+        s"Added partition $p starts from $o instead of 0. Some data may have been missed")
+    }
+
+    val deletedPartitions = fromPartitionOffsets.keySet.diff(untilPartitionOffsets.keySet)
+    if (deletedPartitions.nonEmpty) {
+      val message = if (driverKafkaParams.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) {
+        s"$deletedPartitions are gone. ${KafkaSourceProvider.CUSTOM_GROUP_ID_ERROR_MESSAGE}"
+      } else {
+        s"$deletedPartitions are gone. Some data may have been missed."
+      }
+      reportDataLoss(message)
+    }
+
+    // Use the until partitions to calculate offset ranges to ignore partitions that have
+    // been deleted
+    val topicPartitions = untilPartitionOffsets.keySet.filter { tp =>
+      // Ignore partitions that we don't know the from offsets.
+      newPartitionInitialOffsets.contains(tp) || fromPartitionOffsets.contains(tp)
+    }.toSeq
+    logDebug("TopicPartitions: " + topicPartitions.mkString(", "))
+
+    val fromOffsets = fromPartitionOffsets ++ newPartitionInitialOffsets
+    val untilOffsets = untilPartitionOffsets
+    val ranges = topicPartitions.map { tp =>
+      val fromOffset = fromOffsets(tp)
+      val untilOffset = untilOffsets(tp)
+      if (untilOffset < fromOffset) {
+        reportDataLoss(s"Partition $tp's offset was changed from " +
+          s"$fromOffset to $untilOffset, some data may have been missed")
+      }
+      KafkaOffsetRange(tp, fromOffset, untilOffset, preferredLoc = None)
+    }
+    rangeCalculator.getRanges(ranges, getSortedExecutorList)
+  }
+
+  private def partitionsAssignedToConsumer(
+      body: ju.Set[TopicPartition] => Map[TopicPartition, Long])
+    : Map[TopicPartition, Long] = uninterruptibleThreadRunner.runUninterruptibly {
+
+    withRetriesWithoutInterrupt {
+      val partitions = consumerStrategy.assignedTopicPartitions(admin).asJava
+      logDebug(s"Partitions assigned: $partitions.")
+      body(partitions)
+    }
+  }
+
+  /**
+   * Helper function that does multiple retries on a body of code that returns offsets.
+   * Retries are needed to handle transient failures. For e.g. race conditions between getting
+   * assignment and getting position while topics/partitions are deleted can cause NPEs.
+   *
+   * This method also makes sure `body` won't be interrupted to workaround similar issues like in
+   * `KafkaConsumer.poll`. (KAFKA-1894)
+   */
+  private def withRetriesWithoutInterrupt(
+      body: => Map[TopicPartition, Long]): Map[TopicPartition, Long] = {
+    assert(Thread.currentThread().isInstanceOf[UninterruptibleThread])
+
+    synchronized {
+      var result: Option[Map[TopicPartition, Long]] = None
+      var attempt = 1
+      var lastException: Throwable = null
+      while (result.isEmpty && attempt <= maxOffsetFetchAttempts
+        && !Thread.currentThread().isInterrupted) {
+        Thread.currentThread match {
+          case ut: UninterruptibleThread =>
+            ut.runUninterruptibly {
+              try {
+                result = Some(body)
+              } catch {
+                case NonFatal(e) =>
+                  lastException = e
+                  logWarning(s"Error in attempt $attempt getting Kafka offsets: ", e)
+                  attempt += 1
+                  Thread.sleep(offsetFetchAttemptIntervalMs)
+                  resetAdmin()
+              }
+            }
+          case _ =>
+            throw new IllegalStateException(
+              "Kafka APIs must be executed on a o.a.spark.util.UninterruptibleThread")
+        }
+      }
+      if (Thread.interrupted()) {
+        throw new InterruptedException()
+      }
+      if (result.isEmpty) {
+        assert(attempt > maxOffsetFetchAttempts)
+        assert(lastException != null)
+        throw lastException
+      }
+      result.get
+    }
+  }
+
+  private def stopAdmin(): Unit = synchronized {
+    assert(Thread.currentThread().isInstanceOf[UninterruptibleThread])
+    if (_admin != null) _admin.close()
+  }
+
+  private def resetAdmin(): Unit = synchronized {
+    stopAdmin()
+    _admin = null  // will automatically get reinitialized again
+  }
+}
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala
new file mode 100644
index 0000000000000..eca41c510f1f2
--- /dev/null
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala
@@ -0,0 +1,614 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import java.{util => ju}
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable.ArrayBuffer
+import scala.util.control.NonFatal
+
+import org.apache.kafka.clients.consumer.{Consumer, ConsumerConfig, OffsetAndTimestamp}
+import org.apache.kafka.common.TopicPartition
+
+import org.apache.spark.SparkEnv
+import org.apache.spark.internal.Logging
+import org.apache.spark.scheduler.ExecutorCacheTaskLocation
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.util.{UninterruptibleThread, UninterruptibleThreadRunner}
+
+/**
+ * This class uses Kafka's own [[org.apache.kafka.clients.consumer.KafkaConsumer]] API to
+ * read data offsets from Kafka.
+ * The [[ConsumerStrategy]] class defines which Kafka topics and partitions should be read
+ * by this source. These strategies directly correspond to the different consumption options
+ * in. This class is designed to return a configured
+ * [[org.apache.kafka.clients.consumer.KafkaConsumer]] that is used by the
+ * [[KafkaSource]] to query for the offsets. See the docs on
+ * [[org.apache.spark.sql.kafka010.ConsumerStrategy]]
+ * for more details.
+ *
+ * Note: This class is not ThreadSafe
+ */
+private[kafka010] class KafkaOffsetReaderConsumer(
+    consumerStrategy: ConsumerStrategy,
+    override val driverKafkaParams: ju.Map[String, Object],
+    readerOptions: CaseInsensitiveMap[String],
+    driverGroupIdPrefix: String) extends KafkaOffsetReader with Logging {
+
+  /**
+   * [[UninterruptibleThreadRunner]] ensures that all
+   * [[org.apache.kafka.clients.consumer.KafkaConsumer]] communication called in an
+   * [[UninterruptibleThread]]. In the case of streaming queries, we are already running in an
+   * [[UninterruptibleThread]], however for batch mode this is not the case.
+   */
+  val uninterruptibleThreadRunner = new UninterruptibleThreadRunner("Kafka Offset Reader")
+
+  /**
+   * Place [[groupId]] and [[nextId]] here so that they are initialized before any consumer is
+   * created -- see SPARK-19564.
+   */
+  private var groupId: String = null
+  private var nextId = 0
+
+  /**
+   * A KafkaConsumer used in the driver to query the latest Kafka offsets. This only queries the
+   * offsets and never commits them.
+   */
+  @volatile protected var _consumer: Consumer[Array[Byte], Array[Byte]] = null
+
+  protected def consumer: Consumer[Array[Byte], Array[Byte]] = synchronized {
+    assert(Thread.currentThread().isInstanceOf[UninterruptibleThread])
+    if (_consumer == null) {
+      val newKafkaParams = new ju.HashMap[String, Object](driverKafkaParams)
+      if (driverKafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG) == null) {
+        newKafkaParams.put(ConsumerConfig.GROUP_ID_CONFIG, nextGroupId())
+      }
+      _consumer = consumerStrategy.createConsumer(newKafkaParams)
+    }
+    _consumer
+  }
+
+  private[kafka010] val maxOffsetFetchAttempts =
+    readerOptions.getOrElse(KafkaSourceProvider.FETCH_OFFSET_NUM_RETRY, "3").toInt
+
+  /**
+   * Number of partitions to read from Kafka. If this value is greater than the number of Kafka
+   * topicPartitions, we will split up  the read tasks of the skewed partitions to multiple Spark
+   * tasks. The number of Spark tasks will be *approximately* `numPartitions`. It can be less or
+   * more depending on rounding errors or Kafka partitions that didn't receive any new data.
+   */
+  private val minPartitions =
+    readerOptions.get(KafkaSourceProvider.MIN_PARTITIONS_OPTION_KEY).map(_.toInt)
+
+  private val rangeCalculator = new KafkaOffsetRangeCalculator(minPartitions)
+
+  private[kafka010] val offsetFetchAttemptIntervalMs =
+    readerOptions.getOrElse(KafkaSourceProvider.FETCH_OFFSET_RETRY_INTERVAL_MS, "1000").toLong
+
+  /**
+   * Whether we should divide Kafka TopicPartitions with a lot of data into smaller Spark tasks.
+   */
+  private def shouldDivvyUpLargePartitions(numTopicPartitions: Int): Boolean = {
+    minPartitions.map(_ > numTopicPartitions).getOrElse(false)
+  }
+
+  private def nextGroupId(): String = {
+    groupId = driverGroupIdPrefix + "-" + nextId
+    nextId += 1
+    groupId
+  }
+
+  override def toString(): String = consumerStrategy.toString
+
+  /**
+   * Closes the connection to Kafka, and cleans up state.
+   */
+  override def close(): Unit = {
+    if (_consumer != null) uninterruptibleThreadRunner.runUninterruptibly { stopConsumer() }
+    uninterruptibleThreadRunner.shutdown()
+  }
+
+  /**
+   * @return The Set of TopicPartitions for a given topic
+   */
+  private def fetchTopicPartitions(): Set[TopicPartition] =
+    uninterruptibleThreadRunner.runUninterruptibly {
+      assert(Thread.currentThread().isInstanceOf[UninterruptibleThread])
+      // Poll to get the latest assigned partitions
+      consumer.poll(0)
+      val partitions = consumer.assignment()
+      consumer.pause(partitions)
+      partitions.asScala.toSet
+  }
+
+  /**
+   * Fetch the partition offsets for the topic partitions that are indicated
+   * in the [[ConsumerStrategy]] and [[KafkaOffsetRangeLimit]].
+   */
+  override def fetchPartitionOffsets(
+      offsetRangeLimit: KafkaOffsetRangeLimit,
+      isStartingOffsets: Boolean): Map[TopicPartition, Long] = {
+    def validateTopicPartitions(partitions: Set[TopicPartition],
+      partitionOffsets: Map[TopicPartition, Long]): Map[TopicPartition, Long] = {
+      assert(partitions == partitionOffsets.keySet,
+        "If startingOffsets contains specific offsets, you must specify all TopicPartitions.\n" +
+          "Use -1 for latest, -2 for earliest.\n" +
+          s"Specified: ${partitionOffsets.keySet} Assigned: ${partitions}")
+      logDebug(s"Partitions assigned to consumer: $partitions. Seeking to $partitionOffsets")
+      partitionOffsets
+    }
+    val partitions = fetchTopicPartitions()
+    // Obtain TopicPartition offsets with late binding support
+    offsetRangeLimit match {
+      case EarliestOffsetRangeLimit => partitions.map {
+        case tp => tp -> KafkaOffsetRangeLimit.EARLIEST
+      }.toMap
+      case LatestOffsetRangeLimit => partitions.map {
+        case tp => tp -> KafkaOffsetRangeLimit.LATEST
+      }.toMap
+      case SpecificOffsetRangeLimit(partitionOffsets) =>
+        validateTopicPartitions(partitions, partitionOffsets)
+      case SpecificTimestampRangeLimit(partitionTimestamps) =>
+        fetchSpecificTimestampBasedOffsets(partitionTimestamps,
+          failsOnNoMatchingOffset = isStartingOffsets).partitionToOffsets
+    }
+  }
+
+  /**
+   * Resolves the specific offsets based on Kafka seek positions.
+   * This method resolves offset value -1 to the latest and -2 to the
+   * earliest Kafka seek position.
+   *
+   * @param partitionOffsets the specific offsets to resolve
+   * @param reportDataLoss callback to either report or log data loss depending on setting
+   */
+  override def fetchSpecificOffsets(
+      partitionOffsets: Map[TopicPartition, Long],
+      reportDataLoss: String => Unit): KafkaSourceOffset = {
+    val fnAssertParametersWithPartitions: ju.Set[TopicPartition] => Unit = { partitions =>
+      assert(partitions.asScala == partitionOffsets.keySet,
+        "If startingOffsets contains specific offsets, you must specify all TopicPartitions.\n" +
+          "Use -1 for latest, -2 for earliest, if you don't care.\n" +
+          s"Specified: ${partitionOffsets.keySet} Assigned: ${partitions.asScala}")
+      logDebug(s"Partitions assigned to consumer: $partitions. Seeking to $partitionOffsets")
+    }
+
+    val fnRetrievePartitionOffsets: ju.Set[TopicPartition] => Map[TopicPartition, Long] = { _ =>
+      partitionOffsets
+    }
+
+    val fnAssertFetchedOffsets: Map[TopicPartition, Long] => Unit = { fetched =>
+      partitionOffsets.foreach {
+        case (tp, off) if off != KafkaOffsetRangeLimit.LATEST &&
+          off != KafkaOffsetRangeLimit.EARLIEST =>
+          if (fetched(tp) != off) {
+            reportDataLoss(
+              s"startingOffsets for $tp was $off but consumer reset to ${fetched(tp)}")
+          }
+        case _ =>
+        // no real way to check that beginning or end is reasonable
+      }
+    }
+
+    fetchSpecificOffsets0(fnAssertParametersWithPartitions, fnRetrievePartitionOffsets,
+      fnAssertFetchedOffsets)
+  }
+
+  override def fetchSpecificTimestampBasedOffsets(
+      partitionTimestamps: Map[TopicPartition, Long],
+      failsOnNoMatchingOffset: Boolean): KafkaSourceOffset = {
+    val fnAssertParametersWithPartitions: ju.Set[TopicPartition] => Unit = { partitions =>
+      assert(partitions.asScala == partitionTimestamps.keySet,
+        "If starting/endingOffsetsByTimestamp contains specific offsets, you must specify all " +
+          s"topics. Specified: ${partitionTimestamps.keySet} Assigned: ${partitions.asScala}")
+      logDebug(s"Partitions assigned to consumer: $partitions. Seeking to $partitionTimestamps")
+    }
+
+    val fnRetrievePartitionOffsets: ju.Set[TopicPartition] => Map[TopicPartition, Long] = { _ => {
+        val converted = partitionTimestamps.map { case (tp, timestamp) =>
+          tp -> java.lang.Long.valueOf(timestamp)
+        }.asJava
+
+        val offsetForTime: ju.Map[TopicPartition, OffsetAndTimestamp] =
+          consumer.offsetsForTimes(converted)
+
+        offsetForTime.asScala.map { case (tp, offsetAndTimestamp) =>
+          if (failsOnNoMatchingOffset) {
+            assert(offsetAndTimestamp != null, "No offset matched from request of " +
+              s"topic-partition $tp and timestamp ${partitionTimestamps(tp)}.")
+          }
+
+          if (offsetAndTimestamp == null) {
+            tp -> KafkaOffsetRangeLimit.LATEST
+          } else {
+            tp -> offsetAndTimestamp.offset()
+          }
+        }.toMap
+      }
+    }
+
+    val fnAssertFetchedOffsets: Map[TopicPartition, Long] => Unit = { _ => }
+
+    fetchSpecificOffsets0(fnAssertParametersWithPartitions, fnRetrievePartitionOffsets,
+      fnAssertFetchedOffsets)
+  }
+
+  private def fetchSpecificOffsets0(
+      fnAssertParametersWithPartitions: ju.Set[TopicPartition] => Unit,
+      fnRetrievePartitionOffsets: ju.Set[TopicPartition] => Map[TopicPartition, Long],
+      fnAssertFetchedOffsets: Map[TopicPartition, Long] => Unit): KafkaSourceOffset = {
+    val fetched = partitionsAssignedToConsumer {
+      partitions => {
+        fnAssertParametersWithPartitions(partitions)
+
+        val partitionOffsets = fnRetrievePartitionOffsets(partitions)
+
+        partitionOffsets.foreach {
+          case (tp, KafkaOffsetRangeLimit.LATEST) =>
+            consumer.seekToEnd(ju.Arrays.asList(tp))
+          case (tp, KafkaOffsetRangeLimit.EARLIEST) =>
+            consumer.seekToBeginning(ju.Arrays.asList(tp))
+          case (tp, off) => consumer.seek(tp, off)
+        }
+
+        partitionOffsets.map {
+          case (tp, _) => tp -> consumer.position(tp)
+        }
+      }
+    }
+
+    fnAssertFetchedOffsets(fetched)
+
+    KafkaSourceOffset(fetched)
+  }
+
+  /**
+   * Fetch the earliest offsets for the topic partitions that are indicated
+   * in the [[ConsumerStrategy]].
+   */
+  override def fetchEarliestOffsets(): Map[TopicPartition, Long] = partitionsAssignedToConsumer(
+    partitions => {
+      logDebug("Seeking to the beginning")
+
+      consumer.seekToBeginning(partitions)
+      val partitionOffsets = partitions.asScala.map(p => p -> consumer.position(p)).toMap
+      logDebug(s"Got earliest offsets for partition : $partitionOffsets")
+      partitionOffsets
+    }, fetchingEarliestOffset = true)
+
+  /**
+   * Fetch the latest offsets for the topic partitions that are indicated
+   * in the [[ConsumerStrategy]].
+   *
+   * Kafka may return earliest offsets when we are requesting latest offsets if `poll` is called
+   * right before `seekToEnd` (KAFKA-7703). As a workaround, we will call `position` right after
+   * `poll` to wait until the potential offset request triggered by `poll(0)` is done.
+   *
+   * In addition, to avoid other unknown issues, we also use the given `knownOffsets` to audit the
+   * latest offsets returned by Kafka. If we find some incorrect offsets (a latest offset is less
+   * than an offset in `knownOffsets`), we will retry at most `maxOffsetFetchAttempts` times. When
+   * a topic is recreated, the latest offsets may be less than offsets in `knownOffsets`. We cannot
+   * distinguish this with KAFKA-7703, so we just return whatever we get from Kafka after retrying.
+   */
+  override def fetchLatestOffsets(
+      knownOffsets: Option[PartitionOffsetMap]): PartitionOffsetMap =
+    partitionsAssignedToConsumer { partitions => {
+      logDebug("Seeking to the end.")
+
+      if (knownOffsets.isEmpty) {
+        consumer.seekToEnd(partitions)
+        partitions.asScala.map(p => p -> consumer.position(p)).toMap
+      } else {
+        var partitionOffsets: PartitionOffsetMap = Map.empty
+
+        /**
+         * Compare `knownOffsets` and `partitionOffsets`. Returns all partitions that have incorrect
+         * latest offset (offset in `knownOffsets` is great than the one in `partitionOffsets`).
+         */
+        def findIncorrectOffsets(): Seq[(TopicPartition, Long, Long)] = {
+          var incorrectOffsets = ArrayBuffer[(TopicPartition, Long, Long)]()
+          partitionOffsets.foreach { case (tp, offset) =>
+            knownOffsets.foreach(_.get(tp).foreach { knownOffset =>
+              if (knownOffset > offset) {
+                val incorrectOffset = (tp, knownOffset, offset)
+                incorrectOffsets += incorrectOffset
+              }
+            })
+          }
+          incorrectOffsets.toSeq
+        }
+
+        // Retry to fetch latest offsets when detecting incorrect offsets. We don't use
+        // `withRetriesWithoutInterrupt` to retry because:
+        //
+        // - `withRetriesWithoutInterrupt` will reset the consumer for each attempt but a fresh
+        //    consumer has a much bigger chance to hit KAFKA-7703.
+        // - Avoid calling `consumer.poll(0)` which may cause KAFKA-7703.
+        var incorrectOffsets: Seq[(TopicPartition, Long, Long)] = Nil
+        var attempt = 0
+        do {
+          consumer.seekToEnd(partitions)
+          partitionOffsets = partitions.asScala.map(p => p -> consumer.position(p)).toMap
+          attempt += 1
+
+          incorrectOffsets = findIncorrectOffsets()
+          if (incorrectOffsets.nonEmpty) {
+            logWarning("Found incorrect offsets in some partitions " +
+              s"(partition, previous offset, fetched offset): $incorrectOffsets")
+            if (attempt < maxOffsetFetchAttempts) {
+              logWarning("Retrying to fetch latest offsets because of incorrect offsets")
+              Thread.sleep(offsetFetchAttemptIntervalMs)
+            }
+          }
+        } while (incorrectOffsets.nonEmpty && attempt < maxOffsetFetchAttempts)
+
+        logDebug(s"Got latest offsets for partition : $partitionOffsets")
+        partitionOffsets
+      }
+    }
+  }
+
+  /**
+   * Fetch the earliest offsets for specific topic partitions.
+   * The return result may not contain some partitions if they are deleted.
+   */
+  override def fetchEarliestOffsets(
+      newPartitions: Seq[TopicPartition]): Map[TopicPartition, Long] = {
+    if (newPartitions.isEmpty) {
+      Map.empty[TopicPartition, Long]
+    } else {
+      partitionsAssignedToConsumer(partitions => {
+        // Get the earliest offset of each partition
+        consumer.seekToBeginning(partitions)
+        val partitionOffsets = newPartitions.filter { p =>
+          // When deleting topics happen at the same time, some partitions may not be in
+          // `partitions`. So we need to ignore them
+          partitions.contains(p)
+        }.map(p => p -> consumer.position(p)).toMap
+        logDebug(s"Got earliest offsets for new partitions: $partitionOffsets")
+        partitionOffsets
+      }, fetchingEarliestOffset = true)
+    }
+  }
+
+  /**
+   * Return the offset ranges for a Kafka batch query. If `minPartitions` is set, this method may
+   * split partitions to respect it. Since offsets can be early and late binding which are evaluated
+   * on the executors, in order to divvy up the partitions we need to perform some substitutions. We
+   * don't want to send exact offsets to the executors, because data may age out before we can
+   * consume the data. This method makes some approximate splitting, and replaces the special offset
+   * values in the final output.
+   */
+  override def getOffsetRangesFromUnresolvedOffsets(
+      startingOffsets: KafkaOffsetRangeLimit,
+      endingOffsets: KafkaOffsetRangeLimit): Seq[KafkaOffsetRange] = {
+    val fromPartitionOffsets = fetchPartitionOffsets(startingOffsets, isStartingOffsets = true)
+    val untilPartitionOffsets = fetchPartitionOffsets(endingOffsets, isStartingOffsets = false)
+
+    // Obtain topicPartitions in both from and until partition offset, ignoring
+    // topic partitions that were added and/or deleted between the two above calls.
+    if (fromPartitionOffsets.keySet != untilPartitionOffsets.keySet) {
+      implicit val topicOrdering: Ordering[TopicPartition] = Ordering.by(t => t.topic())
+      val fromTopics = fromPartitionOffsets.keySet.toList.sorted.mkString(",")
+      val untilTopics = untilPartitionOffsets.keySet.toList.sorted.mkString(",")
+      throw new IllegalStateException("different topic partitions " +
+        s"for starting offsets topics[${fromTopics}] and " +
+        s"ending offsets topics[${untilTopics}]")
+    }
+
+    // Calculate offset ranges
+    val offsetRangesBase = untilPartitionOffsets.keySet.map { tp =>
+      val fromOffset = fromPartitionOffsets.get(tp).getOrElse {
+        // This should not happen since topicPartitions contains all partitions not in
+        // fromPartitionOffsets
+        throw new IllegalStateException(s"$tp doesn't have a from offset")
+      }
+      val untilOffset = untilPartitionOffsets(tp)
+      KafkaOffsetRange(tp, fromOffset, untilOffset, None)
+    }.toSeq
+
+    if (shouldDivvyUpLargePartitions(offsetRangesBase.size)) {
+      val fromOffsetsMap =
+        offsetRangesBase.map(range => (range.topicPartition, range.fromOffset)).toMap
+      val untilOffsetsMap =
+        offsetRangesBase.map(range => (range.topicPartition, range.untilOffset)).toMap
+
+      // No need to report data loss here
+      val resolvedFromOffsets = fetchSpecificOffsets(fromOffsetsMap, _ => ()).partitionToOffsets
+      val resolvedUntilOffsets = fetchSpecificOffsets(untilOffsetsMap, _ => ()).partitionToOffsets
+      val ranges = offsetRangesBase.map(_.topicPartition).map { tp =>
+        KafkaOffsetRange(tp, resolvedFromOffsets(tp), resolvedUntilOffsets(tp), preferredLoc = None)
+      }
+      val divvied = rangeCalculator.getRanges(ranges).groupBy(_.topicPartition)
+      divvied.flatMap { case (tp, splitOffsetRanges) =>
+        if (splitOffsetRanges.length == 1) {
+          Seq(KafkaOffsetRange(tp, fromOffsetsMap(tp), untilOffsetsMap(tp), None))
+        } else {
+          // the list can't be empty
+          val first = splitOffsetRanges.head.copy(fromOffset = fromOffsetsMap(tp))
+          val end = splitOffsetRanges.last.copy(untilOffset = untilOffsetsMap(tp))
+          Seq(first) ++ splitOffsetRanges.drop(1).dropRight(1) :+ end
+        }
+      }.toArray.toSeq
+    } else {
+      offsetRangesBase
+    }
+  }
+
+  private def getSortedExecutorList(): Array[String] = {
+    def compare(a: ExecutorCacheTaskLocation, b: ExecutorCacheTaskLocation): Boolean = {
+      if (a.host == b.host) {
+        a.executorId > b.executorId
+      } else {
+        a.host > b.host
+      }
+    }
+
+    val bm = SparkEnv.get.blockManager
+    bm.master.getPeers(bm.blockManagerId).toArray
+      .map(x => ExecutorCacheTaskLocation(x.host, x.executorId))
+      .sortWith(compare)
+      .map(_.toString)
+  }
+
+  /**
+   * Return the offset ranges for a Kafka streaming batch. If `minPartitions` is set, this method
+   * may split partitions to respect it. If any data lost issue is detected, `reportDataLoss` will
+   * be called.
+   */
+  override def getOffsetRangesFromResolvedOffsets(
+      fromPartitionOffsets: PartitionOffsetMap,
+      untilPartitionOffsets: PartitionOffsetMap,
+      reportDataLoss: String => Unit): Seq[KafkaOffsetRange] = {
+    // Find the new partitions, and get their earliest offsets
+    val newPartitions = untilPartitionOffsets.keySet.diff(fromPartitionOffsets.keySet)
+    val newPartitionInitialOffsets = fetchEarliestOffsets(newPartitions.toSeq)
+    if (newPartitionInitialOffsets.keySet != newPartitions) {
+      // We cannot get from offsets for some partitions. It means they got deleted.
+      val deletedPartitions = newPartitions.diff(newPartitionInitialOffsets.keySet)
+      reportDataLoss(
+        s"Cannot find earliest offsets of ${deletedPartitions}. Some data may have been missed")
+    }
+    logInfo(s"Partitions added: $newPartitionInitialOffsets")
+    newPartitionInitialOffsets.filter(_._2 != 0).foreach { case (p, o) =>
+      reportDataLoss(
+        s"Added partition $p starts from $o instead of 0. Some data may have been missed")
+    }
+
+    val deletedPartitions = fromPartitionOffsets.keySet.diff(untilPartitionOffsets.keySet)
+    if (deletedPartitions.nonEmpty) {
+      val message = if (driverKafkaParams.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) {
+        s"$deletedPartitions are gone. ${KafkaSourceProvider.CUSTOM_GROUP_ID_ERROR_MESSAGE}"
+      } else {
+        s"$deletedPartitions are gone. Some data may have been missed."
+      }
+      reportDataLoss(message)
+    }
+
+    // Use the until partitions to calculate offset ranges to ignore partitions that have
+    // been deleted
+    val topicPartitions = untilPartitionOffsets.keySet.filter { tp =>
+      // Ignore partitions that we don't know the from offsets.
+      newPartitionInitialOffsets.contains(tp) || fromPartitionOffsets.contains(tp)
+    }.toSeq
+    logDebug("TopicPartitions: " + topicPartitions.mkString(", "))
+
+    val fromOffsets = fromPartitionOffsets ++ newPartitionInitialOffsets
+    val untilOffsets = untilPartitionOffsets
+    val ranges = topicPartitions.map { tp =>
+      val fromOffset = fromOffsets(tp)
+      val untilOffset = untilOffsets(tp)
+      if (untilOffset < fromOffset) {
+        reportDataLoss(s"Partition $tp's offset was changed from " +
+          s"$fromOffset to $untilOffset, some data may have been missed")
+      }
+      KafkaOffsetRange(tp, fromOffset, untilOffset, preferredLoc = None)
+    }
+    rangeCalculator.getRanges(ranges, getSortedExecutorList)
+  }
+
+  private def partitionsAssignedToConsumer(
+      body: ju.Set[TopicPartition] => Map[TopicPartition, Long],
+      fetchingEarliestOffset: Boolean = false)
+    : Map[TopicPartition, Long] = uninterruptibleThreadRunner.runUninterruptibly {
+
+    withRetriesWithoutInterrupt {
+      // Poll to get the latest assigned partitions
+      consumer.poll(0)
+      val partitions = consumer.assignment()
+
+      if (!fetchingEarliestOffset) {
+        // Call `position` to wait until the potential offset request triggered by `poll(0)` is
+        // done. This is a workaround for KAFKA-7703, which an async `seekToBeginning` triggered by
+        // `poll(0)` may reset offsets that should have been set by another request.
+        partitions.asScala.map(p => p -> consumer.position(p)).foreach(_ => {})
+      }
+
+      consumer.pause(partitions)
+      logDebug(s"Partitions assigned to consumer: $partitions.")
+      body(partitions)
+    }
+  }
+
+  /**
+   * Helper function that does multiple retries on a body of code that returns offsets.
+   * Retries are needed to handle transient failures. For e.g. race conditions between getting
+   * assignment and getting position while topics/partitions are deleted can cause NPEs.
+   *
+   * This method also makes sure `body` won't be interrupted to workaround a potential issue in
+   * `KafkaConsumer.poll`. (KAFKA-1894)
+   */
+  private def withRetriesWithoutInterrupt(
+      body: => Map[TopicPartition, Long]): Map[TopicPartition, Long] = {
+    // Make sure `KafkaConsumer.poll` won't be interrupted (KAFKA-1894)
+    assert(Thread.currentThread().isInstanceOf[UninterruptibleThread])
+
+    synchronized {
+      var result: Option[Map[TopicPartition, Long]] = None
+      var attempt = 1
+      var lastException: Throwable = null
+      while (result.isEmpty && attempt <= maxOffsetFetchAttempts
+        && !Thread.currentThread().isInterrupted) {
+        Thread.currentThread match {
+          case ut: UninterruptibleThread =>
+            // "KafkaConsumer.poll" may hang forever if the thread is interrupted (E.g., the query
+            // is stopped)(KAFKA-1894). Hence, we just make sure we don't interrupt it.
+            //
+            // If the broker addresses are wrong, or Kafka cluster is down, "KafkaConsumer.poll" may
+            // hang forever as well. This cannot be resolved in KafkaSource until Kafka fixes the
+            // issue.
+            ut.runUninterruptibly {
+              try {
+                result = Some(body)
+              } catch {
+                case NonFatal(e) =>
+                  lastException = e
+                  logWarning(s"Error in attempt $attempt getting Kafka offsets: ", e)
+                  attempt += 1
+                  Thread.sleep(offsetFetchAttemptIntervalMs)
+                  resetConsumer()
+              }
+            }
+          case _ =>
+            throw new IllegalStateException(
+              "Kafka APIs must be executed on a o.a.spark.util.UninterruptibleThread")
+        }
+      }
+      if (Thread.interrupted()) {
+        throw new InterruptedException()
+      }
+      if (result.isEmpty) {
+        assert(attempt > maxOffsetFetchAttempts)
+        assert(lastException != null)
+        throw lastException
+      }
+      result.get
+    }
+  }
+
+  private def stopConsumer(): Unit = synchronized {
+    assert(Thread.currentThread().isInstanceOf[UninterruptibleThread])
+    if (_consumer != null) _consumer.close()
+  }
+
+  private def resetConsumer(): Unit = synchronized {
+    stopConsumer()
+    _consumer = null  // will automatically get reinitialized again
+  }
+}
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
index 69a66e2209773..ed3407c822b96 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
@@ -56,7 +56,7 @@ private[kafka010] class KafkaRelation(
     // id. Hence, we should generate a unique id for each query.
     val uniqueGroupId = KafkaSourceProvider.batchUniqueGroupId(sourceOptions)
 
-    val kafkaOffsetReader = new KafkaOffsetReader(
+    val kafkaOffsetReader = KafkaOffsetReader.build(
       strategy,
       KafkaSourceProvider.kafkaParamsForDriver(specifiedKafkaParams),
       sourceOptions,
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index 3ace0874674b6..7299b182ae1cc 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -93,7 +93,7 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
       caseInsensitiveParameters, STARTING_OFFSETS_BY_TIMESTAMP_OPTION_KEY,
       STARTING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
 
-    val kafkaOffsetReader = new KafkaOffsetReader(
+    val kafkaOffsetReader = KafkaOffsetReader.build(
       strategy(caseInsensitiveParameters),
       kafkaParamsForDriver(specifiedKafkaParams),
       caseInsensitiveParameters,
@@ -460,7 +460,7 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
         caseInsensitiveOptions, STARTING_OFFSETS_BY_TIMESTAMP_OPTION_KEY,
         STARTING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
 
-      val kafkaOffsetReader = new KafkaOffsetReader(
+      val kafkaOffsetReader = KafkaOffsetReader.build(
         strategy(caseInsensitiveOptions),
         kafkaParamsForDriver(specifiedKafkaParams),
         caseInsensitiveOptions,
@@ -489,7 +489,7 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
         caseInsensitiveOptions, STARTING_OFFSETS_BY_TIMESTAMP_OPTION_KEY,
         STARTING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
 
-      val kafkaOffsetReader = new KafkaOffsetReader(
+      val kafkaOffsetReader = KafkaOffsetReader.build(
         strategy(caseInsensitiveOptions),
         kafkaParamsForDriver(specifiedKafkaParams),
         caseInsensitiveOptions,
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/ConsumerStrategySuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/ConsumerStrategySuite.scala
new file mode 100644
index 0000000000000..939cf0bb36a8c
--- /dev/null
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/ConsumerStrategySuite.scala
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import java.util.UUID
+
+import scala.collection.JavaConverters._
+
+import org.apache.kafka.clients.CommonClientConfigs
+import org.apache.kafka.clients.admin.Admin
+import org.apache.kafka.common.TopicPartition
+import org.mockito.Mockito.mock
+
+import org.apache.spark.{SparkConf, SparkEnv, SparkFunSuite}
+
+class ConsumerStrategySuite extends SparkFunSuite {
+  private var testUtils: KafkaTestUtils = _
+
+  private def doReturn(value: Any) = org.mockito.Mockito.doReturn(value, Seq.empty: _*)
+
+  protected def newTopic(prefix: String = "topic") = s"$prefix-${UUID.randomUUID().toString}"
+
+  private def setSparkEnv(settings: Iterable[(String, String)]): Unit = {
+    val conf = new SparkConf().setAll(settings)
+    val env = mock(classOf[SparkEnv])
+    doReturn(conf).when(env).conf
+    SparkEnv.set(env)
+  }
+
+  private def adminProps = {
+    Map[String, Object](
+      CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG -> testUtils.brokerAddress
+    ).asJava
+  }
+
+  private def admin(strategy: ConsumerStrategy): Admin = {
+    strategy.createAdmin(adminProps)
+  }
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    testUtils = new KafkaTestUtils(Map.empty)
+    testUtils.setup()
+    setSparkEnv(Map.empty)
+  }
+
+  override def afterAll(): Unit = {
+    if (testUtils != null) {
+      testUtils.teardown()
+      testUtils = null
+    }
+    super.afterAll()
+  }
+
+  test("createAdmin must create admin properly") {
+    val strategy = AssignStrategy(Array.empty)
+    assert(strategy.createAdmin(adminProps) != null)
+  }
+
+  test("AssignStrategy.assignedTopicPartitions must give back all assigned") {
+    val assignedTopic = newTopic()
+    testUtils.createTopic(assignedTopic, partitions = 3)
+    val otherExistingTopic = newTopic()
+    testUtils.createTopic(otherExistingTopic, partitions = 2)
+
+    val partitions = Array(
+      new TopicPartition(assignedTopic, 0),
+      new TopicPartition(assignedTopic, 2)
+    )
+    val strategy = AssignStrategy(partitions)
+    assert(strategy.assignedTopicPartitions(admin(strategy)) === partitions.toSet)
+
+    testUtils.deleteTopic(assignedTopic)
+    testUtils.deleteTopic(otherExistingTopic)
+  }
+
+  test("AssignStrategy.assignedTopicPartitions must skip invalid partitions") {
+    val assignedTopic = newTopic()
+    testUtils.createTopic(assignedTopic, partitions = 1)
+
+    val partitions = Array(new TopicPartition(assignedTopic, 1))
+    val strategy = AssignStrategy(partitions)
+    assert(strategy.assignedTopicPartitions(admin(strategy)) === Set.empty)
+
+    testUtils.deleteTopic(assignedTopic)
+  }
+
+  test("SubscribeStrategy.assignedTopicPartitions must give back all assigned") {
+    val subscribedTopic1 = newTopic()
+    testUtils.createTopic(subscribedTopic1, partitions = 2)
+    val subscribedTopic2 = newTopic()
+    testUtils.createTopic(subscribedTopic2, partitions = 2)
+    val otherExistingTopic = newTopic()
+    testUtils.createTopic(otherExistingTopic, partitions = 2)
+
+    val partitions = Set(
+      new TopicPartition(subscribedTopic1, 0),
+      new TopicPartition(subscribedTopic1, 1),
+      new TopicPartition(subscribedTopic2, 0),
+      new TopicPartition(subscribedTopic2, 1)
+    )
+    val strategy = SubscribeStrategy(Seq(subscribedTopic1, subscribedTopic2))
+    assert(strategy.assignedTopicPartitions(admin(strategy)) === partitions)
+
+    testUtils.deleteTopic(subscribedTopic1)
+    testUtils.deleteTopic(subscribedTopic2)
+    testUtils.deleteTopic(otherExistingTopic)
+  }
+
+  test("SubscribePatternStrategy.assignedTopicPartitions must give back all assigned") {
+    val subscribePattern = "subscribePattern"
+    val subscribedTopic1 = newTopic(subscribePattern)
+    testUtils.createTopic(subscribedTopic1, partitions = 2)
+    val subscribedTopic2 = newTopic(subscribePattern)
+    testUtils.createTopic(subscribedTopic2, partitions = 2)
+    val otherExistingTopic = newTopic("other")
+    testUtils.createTopic(otherExistingTopic, partitions = 2)
+
+    val partitions = Set(
+      new TopicPartition(subscribedTopic1, 0),
+      new TopicPartition(subscribedTopic1, 1),
+      new TopicPartition(subscribedTopic2, 0),
+      new TopicPartition(subscribedTopic2, 1)
+    )
+    val strategy = SubscribePatternStrategy(s"$subscribePattern.*")
+    assert(strategy.assignedTopicPartitions(admin(strategy)) === partitions)
+
+    testUtils.deleteTopic(subscribedTopic1)
+    testUtils.deleteTopic(subscribedTopic2)
+    testUtils.deleteTopic(otherExistingTopic)
+  }
+}
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
index 08f673455d729..f2be8475151e3 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
@@ -608,7 +608,9 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
     // in executors.
     val query = kafka.map(kv => kv._2.toInt).writeStream.foreach(new ForeachWriter[Int] {
       override def open(partitionId: Long, version: Long): Boolean = {
+        // Re-create topic since Kafka auto topic creation is not supported by Spark
         KafkaSourceSuite.globalTestUtils.deleteTopic(topic)
+        KafkaSourceSuite.globalTestUtils.createTopic(topic)
         true
       }
 
@@ -690,19 +692,25 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
   }
 
   test("allow group.id prefix") {
-    testGroupId("groupIdPrefix", (expected, actual) => {
-      assert(actual.exists(_.startsWith(expected)) && !actual.exists(_ === expected),
-        "Valid consumer groups don't contain the expected group id - " +
-        s"Valid consumer groups: $actual / expected group id: $expected")
-    })
+    // Group ID prefix is only supported by consumer based offset reader
+    if (spark.conf.get(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING)) {
+      testGroupId("groupIdPrefix", (expected, actual) => {
+        assert(actual.exists(_.startsWith(expected)) && !actual.exists(_ === expected),
+          "Valid consumer groups don't contain the expected group id - " +
+            s"Valid consumer groups: $actual / expected group id: $expected")
+      })
+    }
   }
 
   test("allow group.id override") {
-    testGroupId("kafka.group.id", (expected, actual) => {
-      assert(actual.exists(_ === expected), "Valid consumer groups don't " +
-        s"contain the expected group id - Valid consumer groups: $actual / " +
-        s"expected group id: $expected")
-    })
+    // Group ID override is only supported by consumer based offset reader
+    if (spark.conf.get(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING)) {
+      testGroupId("kafka.group.id", (expected, actual) => {
+        assert(actual.exists(_ === expected), "Valid consumer groups don't " +
+          s"contain the expected group id - Valid consumer groups: $actual / " +
+          s"expected group id: $expected")
+      })
+    }
   }
 
   private def testGroupId(groupIdKey: String,
@@ -1121,6 +1129,20 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
 }
 
 
+class KafkaMicroBatchV1SourceWithAdminSuite extends KafkaMicroBatchV1SourceSuite {
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    spark.conf.set(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING.key, "false")
+  }
+}
+
+class KafkaMicroBatchV2SourceWithAdminSuite extends KafkaMicroBatchV2SourceSuite {
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    spark.conf.set(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING.key, "false")
+  }
+}
+
 class KafkaMicroBatchV1SourceSuite extends KafkaMicroBatchSourceSuiteBase {
   override def beforeAll(): Unit = {
     super.beforeAll()
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderSuite.scala
index ad22a56d9157f..d1e49b0e14314 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderSuite.scala
@@ -17,13 +17,17 @@
 
 package org.apache.spark.sql.kafka010
 
+import java.util.Locale
 import java.util.UUID
 import java.util.concurrent.atomic.AtomicInteger
 
-import org.apache.kafka.common.TopicPartition
+import org.apache.kafka.clients.CommonClientConfigs
+import org.apache.kafka.clients.consumer.ConsumerConfig
+import org.apache.kafka.common.{IsolationLevel, TopicPartition}
 
 import org.apache.spark.sql.QueryTest
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.kafka010.KafkaOffsetRangeLimit.{EARLIEST, LATEST}
 import org.apache.spark.sql.test.SharedSparkSession
 
@@ -53,9 +57,9 @@ class KafkaOffsetReaderSuite extends QueryTest with SharedSparkSession with Kafk
   }
 
   private def createKafkaReader(topic: String, minPartitions: Option[Int]): KafkaOffsetReader = {
-    new KafkaOffsetReader(
+    KafkaOffsetReader.build(
       SubscribeStrategy(Seq(topic)),
-      org.apache.spark.sql.kafka010.KafkaSourceProvider.kafkaParamsForDriver(
+      KafkaSourceProvider.kafkaParamsForDriver(
         Map(
         "bootstrap.servers" ->
          testUtils.brokerAddress
@@ -66,7 +70,39 @@ class KafkaOffsetReaderSuite extends QueryTest with SharedSparkSession with Kafk
     )
   }
 
-  test("SPARK-30656: getOffsetRangesFromUnresolvedOffsets - using specific offsets") {
+  test("isolationLevel must give back default isolation level when not set") {
+    testIsolationLevel(None,
+      IsolationLevel.valueOf(ConsumerConfig.DEFAULT_ISOLATION_LEVEL.toUpperCase(Locale.ROOT)))
+  }
+
+  test("isolationLevel must give back READ_UNCOMMITTED when set") {
+    testIsolationLevel(Some("read_uncommitted"), IsolationLevel.READ_UNCOMMITTED)
+  }
+
+  test("isolationLevel must give back READ_COMMITTED when set") {
+    testIsolationLevel(Some("read_committed"), IsolationLevel.READ_COMMITTED)
+  }
+
+  test("isolationLevel must throw exception when invalid isolation level set") {
+    intercept[IllegalArgumentException] {
+      testIsolationLevel(Some("intentionally_invalid"), IsolationLevel.READ_COMMITTED)
+    }
+  }
+
+  private def testIsolationLevel(kafkaParam: Option[String], isolationLevel: IsolationLevel) = {
+    var kafkaParams = Map(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG -> testUtils.brokerAddress)
+    kafkaParam.foreach(p => kafkaParams ++= Map(ConsumerConfig.ISOLATION_LEVEL_CONFIG -> p))
+    val reader = new KafkaOffsetReaderAdmin(
+      SubscribeStrategy(Seq()),
+      KafkaSourceProvider.kafkaParamsForDriver(kafkaParams),
+      CaseInsensitiveMap(Map.empty),
+      ""
+    )
+    assert(reader.isolationLevel === isolationLevel)
+  }
+
+  testWithAllOffsetFetchingSQLConf("SPARK-30656: getOffsetRangesFromUnresolvedOffsets - " +
+    "using specific offsets") {
     val topic = newTopic()
     testUtils.createTopic(topic, partitions = 1)
     testUtils.sendMessages(topic, (0 until 10).map(_.toString).toArray, Some(0))
@@ -74,14 +110,16 @@ class KafkaOffsetReaderSuite extends QueryTest with SharedSparkSession with Kafk
     val reader = createKafkaReader(topic, minPartitions = Some(3))
     val startingOffsets = SpecificOffsetRangeLimit(Map(tp -> 1))
     val endingOffsets = SpecificOffsetRangeLimit(Map(tp -> 4))
-    val offsetRanges = reader.getOffsetRangesFromUnresolvedOffsets(startingOffsets, endingOffsets)
-    assert(offsetRanges === Seq(
+    val offsetRanges = reader.getOffsetRangesFromUnresolvedOffsets(startingOffsets,
+      endingOffsets)
+    assert(offsetRanges.sortBy(_.topicPartition.toString) === Seq(
       KafkaOffsetRange(tp, 1, 2, None),
       KafkaOffsetRange(tp, 2, 3, None),
-      KafkaOffsetRange(tp, 3, 4, None)))
+      KafkaOffsetRange(tp, 3, 4, None)).sortBy(_.topicPartition.toString))
   }
 
-  test("SPARK-30656: getOffsetRangesFromUnresolvedOffsets - using special offsets") {
+  testWithAllOffsetFetchingSQLConf("SPARK-30656: getOffsetRangesFromUnresolvedOffsets - " +
+    "using special offsets") {
     val topic = newTopic()
     testUtils.createTopic(topic, partitions = 1)
     testUtils.sendMessages(topic, (0 until 4).map(_.toString).toArray, Some(0))
@@ -89,14 +127,16 @@ class KafkaOffsetReaderSuite extends QueryTest with SharedSparkSession with Kafk
     val reader = createKafkaReader(topic, minPartitions = Some(3))
     val startingOffsets = EarliestOffsetRangeLimit
     val endingOffsets = LatestOffsetRangeLimit
-    val offsetRanges = reader.getOffsetRangesFromUnresolvedOffsets(startingOffsets, endingOffsets)
-    assert(offsetRanges === Seq(
+    val offsetRanges = reader.getOffsetRangesFromUnresolvedOffsets(startingOffsets,
+      endingOffsets)
+    assert(offsetRanges.sortBy(_.topicPartition.toString) === Seq(
       KafkaOffsetRange(tp, EARLIEST, 1, None),
       KafkaOffsetRange(tp, 1, 2, None),
-      KafkaOffsetRange(tp, 2, LATEST, None)))
+      KafkaOffsetRange(tp, 2, LATEST, None)).sortBy(_.topicPartition.toString))
   }
 
-  test("SPARK-30656: getOffsetRangesFromUnresolvedOffsets - multiple topic partitions") {
+  testWithAllOffsetFetchingSQLConf("SPARK-30656: getOffsetRangesFromUnresolvedOffsets - " +
+    "multiple topic partitions") {
     val topic = newTopic()
     testUtils.createTopic(topic, partitions = 2)
     testUtils.sendMessages(topic, (0 until 100).map(_.toString).toArray, Some(0))
@@ -107,15 +147,16 @@ class KafkaOffsetReaderSuite extends QueryTest with SharedSparkSession with Kafk
 
     val startingOffsets = SpecificOffsetRangeLimit(Map(tp1 -> EARLIEST, tp2 -> EARLIEST))
     val endingOffsets = SpecificOffsetRangeLimit(Map(tp1 -> LATEST, tp2 -> 3))
-    val offsetRanges = reader.getOffsetRangesFromUnresolvedOffsets(startingOffsets, endingOffsets)
-    assert(offsetRanges === Seq(
+    val offsetRanges = reader.getOffsetRangesFromUnresolvedOffsets(startingOffsets,
+      endingOffsets)
+    assert(offsetRanges.sortBy(_.topicPartition.toString) === Seq(
       KafkaOffsetRange(tp2, EARLIEST, 3, None),
       KafkaOffsetRange(tp1, EARLIEST, 33, None),
       KafkaOffsetRange(tp1, 33, 66, None),
-      KafkaOffsetRange(tp1, 66, LATEST, None)))
+      KafkaOffsetRange(tp1, 66, LATEST, None)).sortBy(_.topicPartition.toString))
   }
 
-  test("SPARK-30656: getOffsetRangesFromResolvedOffsets") {
+  testWithAllOffsetFetchingSQLConf("SPARK-30656: getOffsetRangesFromResolvedOffsets") {
     val topic = newTopic()
     testUtils.createTopic(topic, partitions = 2)
     testUtils.sendMessages(topic, (0 until 100).map(_.toString).toArray, Some(0))
@@ -130,10 +171,28 @@ class KafkaOffsetReaderSuite extends QueryTest with SharedSparkSession with Kafk
       fromPartitionOffsets,
       untilPartitionOffsets,
       _ => {})
-    assert(offsetRanges === Seq(
+    assert(offsetRanges.sortBy(_.topicPartition.toString) === Seq(
       KafkaOffsetRange(tp1, 0, 33, None),
       KafkaOffsetRange(tp1, 33, 66, None),
       KafkaOffsetRange(tp1, 66, 100, None),
-      KafkaOffsetRange(tp2, 0, 3, None)))
+      KafkaOffsetRange(tp2, 0, 3, None)).sortBy(_.topicPartition.toString))
+  }
+
+  private def testWithAllOffsetFetchingSQLConf(name: String)(func: => Any): Unit = {
+    Seq("true", "false").foreach { useDeprecatedOffsetFetching =>
+      val testName = s"$name with useDeprecatedOffsetFetching $useDeprecatedOffsetFetching"
+      executeFuncWithSQLConf(testName, useDeprecatedOffsetFetching, func)
+    }
+  }
+
+  private def executeFuncWithSQLConf(
+      name: String,
+      useDeprecatedOffsetFetching: String,
+      func: => Any): Unit = {
+    test(name) {
+      withSQLConf(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING.key -> useDeprecatedOffsetFetching) {
+        func
+      }
+    }
   }
 }
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
index 6f5dc0bb081ba..16fa24a68abe2 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
@@ -22,8 +22,6 @@ import java.util.Locale
 import java.util.concurrent.atomic.AtomicInteger
 
 import scala.annotation.tailrec
-import scala.collection.JavaConverters._
-import scala.util.Random
 
 import org.apache.kafka.clients.producer.ProducerRecord
 import org.apache.kafka.common.TopicPartition
@@ -465,41 +463,6 @@ abstract class KafkaRelationSuiteBase extends QueryTest with SharedSparkSession
     testBadOptions("subscribePattern" -> "")("pattern to subscribe is empty")
   }
 
-  test("allow group.id prefix") {
-    testGroupId("groupIdPrefix", (expected, actual) => {
-      assert(actual.exists(_.startsWith(expected)) && !actual.exists(_ === expected),
-        "Valid consumer groups don't contain the expected group id - " +
-        s"Valid consumer groups: $actual / expected group id: $expected")
-    })
-  }
-
-  test("allow group.id override") {
-    testGroupId("kafka.group.id", (expected, actual) => {
-      assert(actual.exists(_ === expected), "Valid consumer groups don't " +
-        s"contain the expected group id - Valid consumer groups: $actual / " +
-        s"expected group id: $expected")
-    })
-  }
-
-  private def testGroupId(groupIdKey: String,
-      validateGroupId: (String, Iterable[String]) => Unit): Unit = {
-    // Tests code path KafkaSourceProvider.createRelation(.)
-    val topic = newTopic()
-    testUtils.createTopic(topic, partitions = 3)
-    testUtils.sendMessages(topic, (1 to 10).map(_.toString).toArray, Some(0))
-    testUtils.sendMessages(topic, (11 to 20).map(_.toString).toArray, Some(1))
-    testUtils.sendMessages(topic, (21 to 30).map(_.toString).toArray, Some(2))
-
-    val customGroupId = "id-" + Random.nextInt()
-    val df = createDF(topic, withOptions = Map(groupIdKey -> customGroupId))
-    checkAnswer(df, (1 to 30).map(_.toString).toDF())
-
-    val consumerGroups = testUtils.listConsumerGroups()
-    val validGroups = consumerGroups.valid().get()
-    val validGroupsId = validGroups.asScala.map(_.groupId())
-    validateGroupId(customGroupId, validGroupsId)
-  }
-
   test("read Kafka transactional messages: read_committed") {
     val topic = newTopic()
     testUtils.createTopic(topic)
@@ -622,6 +585,16 @@ abstract class KafkaRelationSuiteBase extends QueryTest with SharedSparkSession
   }
 }
 
+class KafkaRelationSuiteWithAdminV1 extends KafkaRelationSuiteV1 {
+  override protected def sparkConf: SparkConf =
+    super.sparkConf.set(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING.key, "false")
+}
+
+class KafkaRelationSuiteWithAdminV2 extends KafkaRelationSuiteV2 {
+  override protected def sparkConf: SparkConf =
+    super.sparkConf.set(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING.key, "false")
+}
+
 class KafkaRelationSuiteV1 extends KafkaRelationSuiteBase {
   override protected def sparkConf: SparkConf =
     super
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index b2c28ffa984a9..979ddebc637f0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1415,6 +1415,17 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  val USE_DEPRECATED_KAFKA_OFFSET_FETCHING =
+    buildConf("spark.sql.streaming.kafka.useDeprecatedOffsetFetching")
+      .internal()
+      .doc("When true, the deprecated Consumer based offset fetching used which could cause " +
+        "infinite wait in Spark queries. Such cases query restart is the only workaround. " +
+        "For further details please see Offset Fetching chapter of Structured Streaming Kafka " +
+        "Integration Guide.")
+      .version("3.1.0")
+      .booleanConf
+      .createWithDefault(true)
+
   val STATEFUL_OPERATOR_CHECK_CORRECTNESS_ENABLED =
     buildConf("spark.sql.streaming.statefulOperator.checkCorrectness.enabled")
       .internal()
@@ -3065,6 +3076,8 @@ class SQLConf extends Serializable with Logging {
 
   def isUnsupportedOperationCheckEnabled: Boolean = getConf(UNSUPPORTED_OPERATION_CHECK_ENABLED)
 
+  def useDeprecatedKafkaOffsetFetching: Boolean = getConf(USE_DEPRECATED_KAFKA_OFFSET_FETCHING)
+
   def statefulOperatorCorrectnessCheckEnabled: Boolean =
     getConf(STATEFUL_OPERATOR_CHECK_CORRECTNESS_ENABLED)
 

From d38883c1d811f57e5b9f07b29730b7ac6a6731ca Mon Sep 17 00:00:00 2001
From: Huaxin Gao <huaxing@us.ibm.com>
Date: Tue, 1 Dec 2020 11:38:42 +0000
Subject: [PATCH 0624/1009] [SPARK-32405][SQL][FOLLOWUP] Throw Exception if
 provider is specified in JDBCTableCatalog create table

### What changes were proposed in this pull request?
Throw Exception if JDBC Table Catalog has provider in create table.

### Why are the changes needed?
JDBC Table Catalog doesn't support provider and we should throw Exception. Previously CREATE TABLE syntax forces people to specify a provider so we have to add a `USING_`. Now the problem was fix and we will throw Exception for provider.

### Does this PR introduce _any_ user-facing change?
Yes. We throw Exception if a provider is specified in CREATE TABLE for JDBC Table catalog.

### How was this patch tested?
Existing tests (remove `USING _`)

Closes #30544 from huaxingao/followup.

Authored-by: Huaxin Gao <huaxing@us.ibm.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../v2/jdbc/JDBCTableCatalog.scala            |  3 ++-
 .../v2/jdbc/JDBCTableCatalogSuite.scala       | 27 +++++++++----------
 .../apache/spark/sql/jdbc/JDBCV2Suite.scala   | 21 +++++----------
 3 files changed, 22 insertions(+), 29 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
index e96b37e05c762..63f802363f7c0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
@@ -126,8 +126,9 @@ class JDBCTableCatalog extends TableCatalog with Logging {
       properties.asScala.map {
         case (k, v) => k match {
           case "comment" => tableComment = v
-          // ToDo: have a follow up to fail provider once unify create table syntax PR is merged
           case "provider" =>
+            throw new AnalysisException("CREATE TABLE ... USING ... is not supported in" +
+              " JDBC catalog.")
           case "owner" => // owner is ignored. It is default to current user name.
           case "location" =>
             throw new AnalysisException("CREATE TABLE ... LOCATION ... is not supported in" +
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
index 97dd92acc7805..9e9df7db1e1c6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
@@ -153,21 +153,20 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
 
   test("create a table") {
     withTable("h2.test.new_table") {
-      // TODO (SPARK-32427): Omit USING in CREATE TABLE
-      sql("CREATE TABLE h2.test.new_table(i INT, j STRING) USING _")
+      sql("CREATE TABLE h2.test.new_table(i INT, j STRING)")
       checkAnswer(
         sql("SHOW TABLES IN h2.test"),
         Seq(Row("test", "people"), Row("test", "new_table")))
     }
     withTable("h2.test.new_table") {
-      sql("CREATE TABLE h2.test.new_table(i INT, j STRING) USING _")
+      sql("CREATE TABLE h2.test.new_table(i INT, j STRING)")
       val msg = intercept[AnalysisException] {
-        sql("CREATE TABLE h2.test.new_table(i INT, j STRING) USING _")
+        sql("CREATE TABLE h2.test.new_table(i INT, j STRING)")
       }.getMessage
       assert(msg.contains("Table test.new_table already exists"))
     }
     val exp = intercept[NoSuchNamespaceException] {
-      sql("CREATE TABLE h2.bad_test.new_table(i INT, j STRING) USING _")
+      sql("CREATE TABLE h2.bad_test.new_table(i INT, j STRING)")
     }
     assert(exp.getMessage.contains("Failed table creation: bad_test.new_table"))
     assert(exp.cause.get.getMessage.contains("Schema \"bad_test\" not found"))
@@ -176,7 +175,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
   test("ALTER TABLE ... add column") {
     val tableName = "h2.test.alt_table"
     withTable(tableName) {
-      sql(s"CREATE TABLE $tableName (ID INTEGER) USING _")
+      sql(s"CREATE TABLE $tableName (ID INTEGER)")
       sql(s"ALTER TABLE $tableName ADD COLUMNS (C1 INTEGER, C2 STRING)")
       var t = spark.table(tableName)
       var expectedSchema = new StructType()
@@ -206,7 +205,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
   test("ALTER TABLE ... rename column") {
     val tableName = "h2.test.alt_table"
     withTable(tableName) {
-      sql(s"CREATE TABLE $tableName (id INTEGER, C0 INTEGER) USING _")
+      sql(s"CREATE TABLE $tableName (id INTEGER, C0 INTEGER)")
       sql(s"ALTER TABLE $tableName RENAME COLUMN id TO C")
       val t = spark.table(tableName)
       val expectedSchema = new StructType()
@@ -231,7 +230,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
   test("ALTER TABLE ... drop column") {
     val tableName = "h2.test.alt_table"
     withTable(tableName) {
-      sql(s"CREATE TABLE $tableName (C1 INTEGER, C2 INTEGER, c3 INTEGER) USING _")
+      sql(s"CREATE TABLE $tableName (C1 INTEGER, C2 INTEGER, c3 INTEGER)")
       sql(s"ALTER TABLE $tableName DROP COLUMN C1")
       sql(s"ALTER TABLE $tableName DROP COLUMN c3")
       val t = spark.table(tableName)
@@ -255,7 +254,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
   test("ALTER TABLE ... update column type") {
     val tableName = "h2.test.alt_table"
     withTable(tableName) {
-      sql(s"CREATE TABLE $tableName (ID INTEGER, deptno INTEGER) USING _")
+      sql(s"CREATE TABLE $tableName (ID INTEGER, deptno INTEGER)")
       sql(s"ALTER TABLE $tableName ALTER COLUMN id TYPE DOUBLE")
       sql(s"ALTER TABLE $tableName ALTER COLUMN deptno TYPE DOUBLE")
       val t = spark.table(tableName)
@@ -284,7 +283,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
   test("ALTER TABLE ... update column nullability") {
     val tableName = "h2.test.alt_table"
     withTable(tableName) {
-      sql(s"CREATE TABLE $tableName (ID INTEGER NOT NULL, deptno INTEGER NOT NULL) USING _")
+      sql(s"CREATE TABLE $tableName (ID INTEGER NOT NULL, deptno INTEGER NOT NULL)")
       sql(s"ALTER TABLE $tableName ALTER COLUMN ID DROP NOT NULL")
       sql(s"ALTER TABLE $tableName ALTER COLUMN deptno DROP NOT NULL")
       val t = spark.table(tableName)
@@ -309,7 +308,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
   test("ALTER TABLE ... update column comment not supported") {
     val tableName = "h2.test.alt_table"
     withTable(tableName) {
-      sql(s"CREATE TABLE $tableName (ID INTEGER) USING _")
+      sql(s"CREATE TABLE $tableName (ID INTEGER)")
       val exp = intercept[AnalysisException] {
         sql(s"ALTER TABLE $tableName ALTER COLUMN ID COMMENT 'test'")
       }
@@ -333,7 +332,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
   test("ALTER TABLE case sensitivity") {
     val tableName = "h2.test.alt_table"
     withTable(tableName) {
-      sql(s"CREATE TABLE $tableName (c1 INTEGER NOT NULL, c2 INTEGER) USING _")
+      sql(s"CREATE TABLE $tableName (c1 INTEGER NOT NULL, c2 INTEGER)")
       var t = spark.table(tableName)
       var expectedSchema = new StructType().add("c1", IntegerType).add("c2", IntegerType)
       assert(t.schema === expectedSchema)
@@ -400,7 +399,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
     withTable("h2.test.new_table") {
       val logAppender = new LogAppender("table comment")
       withLogAppender(logAppender) {
-        sql("CREATE TABLE h2.test.new_table(i INT, j STRING) USING _ COMMENT 'this is a comment'")
+        sql("CREATE TABLE h2.test.new_table(i INT, j STRING) COMMENT 'this is a comment'")
       }
       val createCommentWarning = logAppender.loggingEvents
         .filter(_.getLevel == Level.WARN)
@@ -413,7 +412,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
   test("CREATE TABLE with table property") {
     withTable("h2.test.new_table") {
       val m = intercept[AnalysisException] {
-        sql("CREATE TABLE h2.test.new_table(i INT, j STRING) USING _" +
+        sql("CREATE TABLE h2.test.new_table(i INT, j STRING)" +
           " TBLPROPERTIES('ENGINE'='tableEngineName')")
       }.cause.get.getMessage
       assert(m.contains("\"TABLEENGINENAME\" not found"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
index 3bcacd03b4a0d..e8157e552d754 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
@@ -111,7 +111,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession {
 
   test("read/write with partition info") {
     withTable("h2.test.abc") {
-      sql("CREATE TABLE h2.test.abc USING _ AS SELECT * FROM h2.test.people")
+      sql("CREATE TABLE h2.test.abc AS SELECT * FROM h2.test.people")
       val df1 = Seq(("evan", 3), ("cathy", 4), ("alex", 5)).toDF("NAME", "ID")
       val e = intercept[IllegalArgumentException] {
         df1.write
@@ -148,11 +148,9 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession {
       Seq(Row("test", "people"), Row("test", "empty_table")))
   }
 
-  // TODO (SPARK-32603): Operation not allowed: CREATE TABLE ... STORED AS ... does not support
-  // multi-part identifiers
   test("SQL API: create table as select") {
     withTable("h2.test.abc") {
-      sql("CREATE TABLE h2.test.abc USING _ AS SELECT * FROM h2.test.people")
+      sql("CREATE TABLE h2.test.abc AS SELECT * FROM h2.test.people")
       checkAnswer(sql("SELECT name, id FROM h2.test.abc"), Seq(Row("fred", 1), Row("mary", 2)))
     }
   }
@@ -164,15 +162,14 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession {
     }
   }
 
-  // TODO (SPARK-32603): ParseException: mismatched input 'AS' expecting {'(', 'USING'}
   test("SQL API: replace table as select") {
     withTable("h2.test.abc") {
       intercept[CannotReplaceMissingTableException] {
-        sql("REPLACE TABLE h2.test.abc USING _ AS SELECT 1 as col")
+        sql("REPLACE TABLE h2.test.abc AS SELECT 1 as col")
       }
-      sql("CREATE OR REPLACE TABLE h2.test.abc USING _ AS SELECT 1 as col")
+      sql("CREATE OR REPLACE TABLE h2.test.abc AS SELECT 1 as col")
       checkAnswer(sql("SELECT col FROM h2.test.abc"), Row(1))
-      sql("REPLACE TABLE h2.test.abc USING _ AS SELECT * FROM h2.test.people")
+      sql("REPLACE TABLE h2.test.abc AS SELECT * FROM h2.test.people")
       checkAnswer(sql("SELECT name, id FROM h2.test.abc"), Seq(Row("fred", 1), Row("mary", 2)))
     }
   }
@@ -189,11 +186,9 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession {
     }
   }
 
-  // TODO (SPARK-32603): Operation not allowed: CREATE TABLE ... STORED AS ... does not support
-  // multi-part identifiers
   test("SQL API: insert and overwrite") {
     withTable("h2.test.abc") {
-      sql("CREATE TABLE h2.test.abc USING _ AS SELECT * FROM h2.test.people")
+      sql("CREATE TABLE h2.test.abc AS SELECT * FROM h2.test.people")
 
       sql("INSERT INTO h2.test.abc SELECT 'lucy', 3")
       checkAnswer(
@@ -205,11 +200,9 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession {
     }
   }
 
-  // TODO (SPARK-32603): Operation not allowed: CREATE TABLE ... STORED AS ... does not support
-  // multi-part identifiers
   test("DataFrameWriterV2: insert and overwrite") {
     withTable("h2.test.abc") {
-      sql("CREATE TABLE h2.test.abc USING _ AS SELECT * FROM h2.test.people")
+      sql("CREATE TABLE h2.test.abc AS SELECT * FROM h2.test.people")
 
       // `DataFrameWriterV2` is by-name.
       sql("SELECT 3 AS ID, 'lucy' AS NAME").writeTo("h2.test.abc").append()

From 9273d4250ddd5e011487a5a942c1b4d0f0412f78 Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Tue, 1 Dec 2020 11:48:30 +0000
Subject: [PATCH 0625/1009] [SPARK-33045][SQL][FOLLOWUP] Support built-in
 function like_any and fix StackOverflowError issue

### What changes were proposed in this pull request?
Spark already support `LIKE ANY` syntax, but it will throw `StackOverflowError` if there are many elements(more than 14378 elements). We should implement built-in function for LIKE ANY to fix this issue.

Why the stack overflow can happen in the current approach ?
The current approach uses reduceLeft to connect each `Like(e, p)`, this will lead the the call depth of the thread is too large, causing `StackOverflowError` problems.

Why the fix in this PR can avoid the error?
This PR support built-in function for `LIKE ANY` and avoid this issue.

### Why are the changes needed?
1.Fix the `StackOverflowError` issue.
2.Support built-in function `like_any`.

### Does this PR introduce _any_ user-facing change?
'No'.

### How was this patch tested?
Jenkins test.

Closes #30465 from beliefer/SPARK-33045-like_any-bak.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: beliefer <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/dsl/package.scala      |  4 +
 .../expressions/regexpExpressions.scala       | 98 ++++++++++++++++---
 .../sql/catalyst/parser/AstBuilder.scala      | 31 +++---
 .../apache/spark/sql/internal/SQLConf.scala   | 14 ---
 .../expressions/RegexpExpressionsSuite.scala  | 26 +++++
 .../parser/ExpressionParserSuite.scala        | 12 +--
 .../resources/sql-tests/inputs/like-all.sql   |  2 -
 .../resources/sql-tests/inputs/like-any.sql   |  2 +
 8 files changed, 138 insertions(+), 51 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index 89cf97e76d798..2bcbdf6512389 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -107,6 +107,10 @@ package object dsl {
       LikeAll(expr, others.map(_.eval(EmptyRow).asInstanceOf[UTF8String]))
     def notLikeAll(others: Expression*): Expression =
       NotLikeAll(expr, others.map(_.eval(EmptyRow).asInstanceOf[UTF8String]))
+    def likeAny(others: Expression*): Expression =
+      LikeAny(expr, others.map(_.eval(EmptyRow).asInstanceOf[UTF8String]))
+    def notLikeAny(others: Expression*): Expression =
+      NotLikeAny(expr, others.map(_.eval(EmptyRow).asInstanceOf[UTF8String]))
     def contains(other: Expression): Expression = Contains(expr, other)
     def startsWith(other: Expression): Expression = StartsWith(expr, other)
     def endsWith(other: Expression): Expression = EndsWith(expr, other)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index b4d9921488d5f..0b94fe8b5d47e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -180,14 +180,12 @@ case class Like(left: Expression, right: Expression, escapeChar: Char)
   }
 }
 
-/**
- * Optimized version of LIKE ALL, when all pattern values are literal.
- */
-abstract class LikeAllBase extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
+abstract class MultiLikeBase
+  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
   protected def patterns: Seq[UTF8String]
 
-  protected def isNotLikeAll: Boolean
+  protected def isNotSpecified: Boolean
 
   override def inputTypes: Seq[DataType] = StringType :: Nil
 
@@ -195,27 +193,39 @@ abstract class LikeAllBase extends UnaryExpression with ImplicitCastInputTypes w
 
   override def nullable: Boolean = true
 
-  private lazy val hasNull: Boolean = patterns.contains(null)
+  protected lazy val hasNull: Boolean = patterns.contains(null)
 
-  private lazy val cache = patterns.filterNot(_ == null)
+  protected lazy val cache = patterns.filterNot(_ == null)
     .map(s => Pattern.compile(StringUtils.escapeLikeRegex(s.toString, '\\')))
 
-  private lazy val matchFunc = if (isNotLikeAll) {
+  protected lazy val matchFunc = if (isNotSpecified) {
     (p: Pattern, inputValue: String) => !p.matcher(inputValue).matches()
   } else {
     (p: Pattern, inputValue: String) => p.matcher(inputValue).matches()
   }
 
+  protected def matches(exprValue: String): Any
+
   override def eval(input: InternalRow): Any = {
     val exprValue = child.eval(input)
     if (exprValue == null) {
       null
     } else {
-      if (cache.forall(matchFunc(_, exprValue.toString))) {
-        if (hasNull) null else true
-      } else {
-        false
-      }
+      matches(exprValue.toString)
+    }
+  }
+}
+
+/**
+ * Optimized version of LIKE ALL, when all pattern values are literal.
+ */
+abstract class LikeAllBase extends MultiLikeBase {
+
+  override def matches(exprValue: String): Any = {
+    if (cache.forall(matchFunc(_, exprValue))) {
+      if (hasNull) null else true
+    } else {
+      false
     }
   }
 
@@ -227,7 +237,7 @@ abstract class LikeAllBase extends UnaryExpression with ImplicitCastInputTypes w
     val valueArg = ctx.freshName("valueArg")
     val patternCache = ctx.addReferenceObj("patternCache", cache.asJava)
 
-    val checkNotMatchCode = if (isNotLikeAll) {
+    val checkNotMatchCode = if (isNotSpecified) {
       s"$pattern.matcher($valueArg.toString()).matches()"
     } else {
       s"!$pattern.matcher($valueArg.toString()).matches()"
@@ -255,11 +265,67 @@ abstract class LikeAllBase extends UnaryExpression with ImplicitCastInputTypes w
 }
 
 case class LikeAll(child: Expression, patterns: Seq[UTF8String]) extends LikeAllBase {
-  override def isNotLikeAll: Boolean = false
+  override def isNotSpecified: Boolean = false
 }
 
 case class NotLikeAll(child: Expression, patterns: Seq[UTF8String]) extends LikeAllBase {
-  override def isNotLikeAll: Boolean = true
+  override def isNotSpecified: Boolean = true
+}
+
+/**
+ * Optimized version of LIKE ANY, when all pattern values are literal.
+ */
+abstract class LikeAnyBase extends MultiLikeBase {
+
+  override def matches(exprValue: String): Any = {
+    if (cache.exists(matchFunc(_, exprValue))) {
+      true
+    } else {
+      if (hasNull) null else false
+    }
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val eval = child.genCode(ctx)
+    val patternClass = classOf[Pattern].getName
+    val javaDataType = CodeGenerator.javaType(child.dataType)
+    val pattern = ctx.freshName("pattern")
+    val valueArg = ctx.freshName("valueArg")
+    val patternCache = ctx.addReferenceObj("patternCache", cache.asJava)
+
+    val checkMatchCode = if (isNotSpecified) {
+      s"!$pattern.matcher($valueArg.toString()).matches()"
+    } else {
+      s"$pattern.matcher($valueArg.toString()).matches()"
+    }
+
+    ev.copy(code =
+      code"""
+            |${eval.code}
+            |boolean ${ev.isNull} = false;
+            |boolean ${ev.value} = false;
+            |if (${eval.isNull}) {
+            |  ${ev.isNull} = true;
+            |} else {
+            |  $javaDataType $valueArg = ${eval.value};
+            |  for ($patternClass $pattern: $patternCache) {
+            |    if ($checkMatchCode) {
+            |      ${ev.value} = true;
+            |      break;
+            |    }
+            |  }
+            |  if (!${ev.value} && $hasNull) ${ev.isNull} = true;
+            |}
+      """.stripMargin)
+  }
+}
+
+case class LikeAny(child: Expression, patterns: Seq[UTF8String]) extends LikeAnyBase {
+  override def isNotSpecified: Boolean = false
+}
+
+case class NotLikeAny(child: Expression, patterns: Seq[UTF8String]) extends LikeAnyBase {
+  override def isNotSpecified: Boolean = true
 }
 
 // scalastyle:off line.contains.tab
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index ff8b56f0b724b..3788e1631c3dd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1396,14 +1396,6 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
       case other => Seq(other)
     }
 
-    def getLikeQuantifierExprs(expressions: java.util.List[ExpressionContext]): Seq[Expression] = {
-      if (expressions.isEmpty) {
-        throw new ParseException("Expected something between '(' and ')'.", ctx)
-      } else {
-        expressions.asScala.map(expression).map(p => invertIfNotDefined(new Like(e, p))).toSeq
-      }
-    }
-
     // Create the predicate.
     ctx.kind.getType match {
       case SqlBaseParser.BETWEEN =>
@@ -1418,12 +1410,24 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
       case SqlBaseParser.LIKE =>
         Option(ctx.quantifier).map(_.getType) match {
           case Some(SqlBaseParser.ANY) | Some(SqlBaseParser.SOME) =>
-            getLikeQuantifierExprs(ctx.expression).reduceLeft(Or)
+            validate(!ctx.expression.isEmpty, "Expected something between '(' and ')'.", ctx)
+            val expressions = expressionList(ctx.expression)
+            if (expressions.forall(_.foldable) && expressions.forall(_.dataType == StringType)) {
+              // If there are many pattern expressions, will throw StackOverflowError.
+              // So we use LikeAny or NotLikeAny instead.
+              val patterns = expressions.map(_.eval(EmptyRow).asInstanceOf[UTF8String])
+              ctx.NOT match {
+                case null => LikeAny(e, patterns.toSeq)
+                case _ => NotLikeAny(e, patterns.toSeq)
+              }
+            } else {
+              ctx.expression.asScala.map(expression)
+                .map(p => invertIfNotDefined(new Like(e, p))).toSeq.reduceLeft(Or)
+            }
           case Some(SqlBaseParser.ALL) =>
             validate(!ctx.expression.isEmpty, "Expected something between '(' and ')'.", ctx)
-            val expressions = ctx.expression.asScala.map(expression)
-            if (expressions.size > SQLConf.get.optimizerLikeAllConversionThreshold &&
-              expressions.forall(_.foldable) && expressions.forall(_.dataType == StringType)) {
+            val expressions = expressionList(ctx.expression)
+            if (expressions.forall(_.foldable) && expressions.forall(_.dataType == StringType)) {
               // If there are many pattern expressions, will throw StackOverflowError.
               // So we use LikeAll or NotLikeAll instead.
               val patterns = expressions.map(_.eval(EmptyRow).asInstanceOf[UTF8String])
@@ -1432,7 +1436,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
                 case _ => NotLikeAll(e, patterns.toSeq)
               }
             } else {
-              getLikeQuantifierExprs(ctx.expression).reduceLeft(And)
+              ctx.expression.asScala.map(expression)
+                .map(p => invertIfNotDefined(new Like(e, p))).toSeq.reduceLeft(And)
             }
           case _ =>
             val escapeChar = Option(ctx.escapeChar).map(string).map { str =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 979ddebc637f0..a1d6f9f608873 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -216,18 +216,6 @@ object SQLConf {
         "for using switch statements in InSet must be non-negative and less than or equal to 600")
       .createWithDefault(400)
 
-  val OPTIMIZER_LIKE_ALL_CONVERSION_THRESHOLD =
-    buildConf("spark.sql.optimizer.likeAllConversionThreshold")
-      .internal()
-      .doc("Configure the maximum size of the pattern sequence in like all. Spark will convert " +
-        "the logical combination of like to avoid StackOverflowError. 200 is an empirical value " +
-        "that will not cause StackOverflowError.")
-      .version("3.1.0")
-      .intConf
-      .checkValue(threshold => threshold >= 0, "The maximum size of pattern sequence " +
-        "in like all must be non-negative")
-      .createWithDefault(200)
-
   val PLAN_CHANGE_LOG_LEVEL = buildConf("spark.sql.planChangeLog.level")
     .internal()
     .doc("Configures the log level for logging the change from the original plan to the new " +
@@ -3048,8 +3036,6 @@ class SQLConf extends Serializable with Logging {
 
   def optimizerInSetSwitchThreshold: Int = getConf(OPTIMIZER_INSET_SWITCH_THRESHOLD)
 
-  def optimizerLikeAllConversionThreshold: Int = getConf(OPTIMIZER_LIKE_ALL_CONVERSION_THRESHOLD)
-
   def planChangeLogLevel: String = getConf(PLAN_CHANGE_LOG_LEVEL)
 
   def planChangeRules: Option[String] = getConf(PLAN_CHANGE_LOG_RULES)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
index cc5ab5dc7b4e0..8d7501d952ecb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
@@ -72,6 +72,32 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       .notLikeAll(Literal.create(null, StringType), "%yoo%"), null)
   }
 
+  test("LIKE ANY") {
+    checkEvaluation(Literal.create(null, StringType).likeAny("%foo%", "%oo"), null)
+    checkEvaluation(Literal.create("foo", StringType).likeAny("%foo%", "%oo"), true)
+    checkEvaluation(Literal.create("foo", StringType).likeAny("%foo%", "%bar%"), true)
+    checkEvaluation(Literal.create("foo", StringType).likeAny("%fee%", "%bar%"), false)
+    checkEvaluation(Literal.create("foo", StringType)
+      .likeAny("%foo%", Literal.create(null, StringType)), true)
+    checkEvaluation(Literal.create("foo", StringType)
+      .likeAny(Literal.create(null, StringType), "%foo%"), true)
+    checkEvaluation(Literal.create("foo", StringType)
+      .likeAny("%feo%", Literal.create(null, StringType)), null)
+    checkEvaluation(Literal.create("foo", StringType)
+      .likeAny(Literal.create(null, StringType), "%feo%"), null)
+    checkEvaluation(Literal.create("foo", StringType).notLikeAny("tee", "%yoo%"), true)
+    checkEvaluation(Literal.create("foo", StringType).notLikeAny("%oo%", "%yoo%"), true)
+    checkEvaluation(Literal.create("foo", StringType).notLikeAny("%foo%", "%oo"), false)
+    checkEvaluation(Literal.create("foo", StringType)
+      .notLikeAny("%foo%", Literal.create(null, StringType)), null)
+    checkEvaluation(Literal.create("foo", StringType)
+      .notLikeAny(Literal.create(null, StringType), "%foo%"), null)
+    checkEvaluation(Literal.create("foo", StringType)
+      .notLikeAny("%yoo%", Literal.create(null, StringType)), true)
+    checkEvaluation(Literal.create("foo", StringType)
+      .notLikeAny(Literal.create(null, StringType), "%yoo%"), true)
+  }
+
   test("LIKE Pattern") {
 
     // null handling
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index b1d0d044eaead..9f6a76b9228c5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -210,13 +210,13 @@ class ExpressionParserSuite extends AnalysisTest {
 
   test("(NOT) LIKE (ANY | SOME | ALL) expressions") {
     Seq("any", "some").foreach { quantifier =>
-      assertEqual(s"a like $quantifier ('foo%', 'b%')", ('a like "foo%") || ('a like "b%"))
-      assertEqual(s"a not like $quantifier ('foo%', 'b%')", !('a like "foo%") || !('a like "b%"))
-      assertEqual(s"not (a like $quantifier ('foo%', 'b%'))", !(('a like "foo%") || ('a like "b%")))
+      assertEqual(s"a like $quantifier ('foo%', 'b%')", 'a likeAny("foo%", "b%"))
+      assertEqual(s"a not like $quantifier ('foo%', 'b%')", 'a notLikeAny("foo%", "b%"))
+      assertEqual(s"not (a like $quantifier ('foo%', 'b%'))", !('a likeAny("foo%", "b%")))
     }
-    assertEqual("a like all ('foo%', 'b%')", ('a like "foo%") && ('a like "b%"))
-    assertEqual("a not like all ('foo%', 'b%')", !('a like "foo%") && !('a like "b%"))
-    assertEqual("not (a like all ('foo%', 'b%'))", !(('a like "foo%") && ('a like "b%")))
+    assertEqual("a like all ('foo%', 'b%')", 'a likeAll("foo%", "b%"))
+    assertEqual("a not like all ('foo%', 'b%')", 'a notLikeAll("foo%", "b%"))
+    assertEqual("not (a like all ('foo%', 'b%'))", !('a likeAll("foo%", "b%")))
 
     Seq("any", "some", "all").foreach { quantifier =>
       intercept(s"a like $quantifier()", "Expected something between '(' and ')'")
diff --git a/sql/core/src/test/resources/sql-tests/inputs/like-all.sql b/sql/core/src/test/resources/sql-tests/inputs/like-all.sql
index f83277376e680..51b689607e8e3 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/like-all.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/like-all.sql
@@ -1,6 +1,4 @@
 -- test cases for like all
---CONFIG_DIM1 spark.sql.optimizer.likeAllConversionThreshold=0
---CONFIG_DIM1 spark.sql.optimizer.likeAllConversionThreshold=200
 
 CREATE OR REPLACE TEMPORARY VIEW like_all_table AS SELECT * FROM (VALUES
   ('google', '%oo%'),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/like-any.sql b/sql/core/src/test/resources/sql-tests/inputs/like-any.sql
index 5758a2a494944..a6e9827d58d94 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/like-any.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/like-any.sql
@@ -1,3 +1,5 @@
+-- test cases for like any
+
 CREATE OR REPLACE TEMPORARY VIEW like_any_table AS SELECT * FROM (VALUES
   ('google', '%oo%'),
   ('facebook', '%oo%'),

From cf4ad212b100901b7065f2db8c1688c83423141d Mon Sep 17 00:00:00 2001
From: Prakhar Jain <prakharjain09@gmail.com>
Date: Tue, 1 Dec 2020 21:13:27 +0900
Subject: [PATCH 0626/1009] [SPARK-33503][SQL] Refactor SortOrder class to
 allow multiple childrens

### What changes were proposed in this pull request?
This is a followup of #30302 . As part of this PR, sameOrderExpressions set is made part of children of SortOrder node - so that they don't need any special handling as done in #30302 .

### Why are the changes needed?
sameOrderExpressions should get same treatment as child. So making them part of children helps in transforming them easily.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing UTs

Closes #30430 from prakharjain09/SPARK-33400-sortorder-refactor.

Authored-by: Prakhar Jain <prakharjain09@gmail.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  2 +-
 .../spark/sql/catalyst/dsl/package.scala      |  4 +--
 .../sql/catalyst/expressions/SortOrder.scala  | 10 ++++---
 .../sql/catalyst/parser/AstBuilder.scala      |  2 +-
 .../scala/org/apache/spark/sql/Column.scala   |  8 +++---
 .../AliasAwareOutputExpression.scala          |  6 +----
 .../execution/joins/SortMergeJoinExec.scala   |  9 ++++---
 .../spark/sql/execution/PlannerSuite.scala    | 26 +++++++++++++++++++
 8 files changed, 46 insertions(+), 21 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index abd38f2f9d940..6b06cf13262d4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1822,7 +1822,7 @@ class Analyzer(override val catalogManager: CatalogManager)
         val newOrders = orders map {
           case s @ SortOrder(UnresolvedOrdinal(index), direction, nullOrdering, _) =>
             if (index > 0 && index <= child.output.size) {
-              SortOrder(child.output(index - 1), direction, nullOrdering, Set.empty)
+              SortOrder(child.output(index - 1), direction, nullOrdering, Seq.empty)
             } else {
               s.failAnalysis(
                 s"ORDER BY position $index is not in select list " +
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index 2bcbdf6512389..5a778d2785a67 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -135,9 +135,9 @@ package object dsl {
     }
 
     def asc: SortOrder = SortOrder(expr, Ascending)
-    def asc_nullsLast: SortOrder = SortOrder(expr, Ascending, NullsLast, Set.empty)
+    def asc_nullsLast: SortOrder = SortOrder(expr, Ascending, NullsLast, Seq.empty)
     def desc: SortOrder = SortOrder(expr, Descending)
-    def desc_nullsFirst: SortOrder = SortOrder(expr, Descending, NullsFirst, Set.empty)
+    def desc_nullsFirst: SortOrder = SortOrder(expr, Descending, NullsFirst, Seq.empty)
     def as(alias: String): NamedExpression = Alias(expr, alias)()
     def as(alias: Symbol): NamedExpression = Alias(expr, alias.name)()
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala
index 54259e713accd..d9923b5d022e0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala
@@ -63,8 +63,10 @@ case class SortOrder(
     child: Expression,
     direction: SortDirection,
     nullOrdering: NullOrdering,
-    sameOrderExpressions: Set[Expression])
-  extends UnaryExpression with Unevaluable {
+    sameOrderExpressions: Seq[Expression])
+  extends Expression with Unevaluable {
+
+  override def children: Seq[Expression] = child +: sameOrderExpressions
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (RowOrdering.isOrderable(dataType)) {
@@ -83,7 +85,7 @@ case class SortOrder(
   def isAscending: Boolean = direction == Ascending
 
   def satisfies(required: SortOrder): Boolean = {
-    (sameOrderExpressions + child).exists(required.child.semanticEquals) &&
+    children.exists(required.child.semanticEquals) &&
       direction == required.direction && nullOrdering == required.nullOrdering
   }
 }
@@ -92,7 +94,7 @@ object SortOrder {
   def apply(
      child: Expression,
      direction: SortDirection,
-     sameOrderExpressions: Set[Expression] = Set.empty): SortOrder = {
+     sameOrderExpressions: Seq[Expression] = Seq.empty): SortOrder = {
     new SortOrder(child, direction, direction.defaultNullOrdering, sameOrderExpressions)
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 3788e1631c3dd..12c5e0de686fa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1910,7 +1910,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
     } else {
       direction.defaultNullOrdering
     }
-    SortOrder(expression(ctx.expression), direction, nullOrdering, Set.empty)
+    SortOrder(expression(ctx.expression), direction, nullOrdering, Seq.empty)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index b3e403ffa7382..95134d9111593 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -1228,7 +1228,7 @@ class Column(val expr: Expression) extends Logging {
    * @group expr_ops
    * @since 2.1.0
    */
-  def desc_nulls_first: Column = withExpr { SortOrder(expr, Descending, NullsFirst, Set.empty) }
+  def desc_nulls_first: Column = withExpr { SortOrder(expr, Descending, NullsFirst, Seq.empty) }
 
   /**
    * Returns a sort expression based on the descending order of the column,
@@ -1244,7 +1244,7 @@ class Column(val expr: Expression) extends Logging {
    * @group expr_ops
    * @since 2.1.0
    */
-  def desc_nulls_last: Column = withExpr { SortOrder(expr, Descending, NullsLast, Set.empty) }
+  def desc_nulls_last: Column = withExpr { SortOrder(expr, Descending, NullsLast, Seq.empty) }
 
   /**
    * Returns a sort expression based on ascending order of the column.
@@ -1275,7 +1275,7 @@ class Column(val expr: Expression) extends Logging {
    * @group expr_ops
    * @since 2.1.0
    */
-  def asc_nulls_first: Column = withExpr { SortOrder(expr, Ascending, NullsFirst, Set.empty) }
+  def asc_nulls_first: Column = withExpr { SortOrder(expr, Ascending, NullsFirst, Seq.empty) }
 
   /**
    * Returns a sort expression based on ascending order of the column,
@@ -1291,7 +1291,7 @@ class Column(val expr: Expression) extends Logging {
    * @group expr_ops
    * @since 2.1.0
    */
-  def asc_nulls_last: Column = withExpr { SortOrder(expr, Ascending, NullsLast, Set.empty) }
+  def asc_nulls_last: Column = withExpr { SortOrder(expr, Ascending, NullsLast, Seq.empty) }
 
   /**
    * Prints the expression to the console for debugging purposes.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala
index 3ba8745be995f..3cbe1654ea2cd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala
@@ -65,11 +65,7 @@ trait AliasAwareOutputOrdering extends AliasAwareOutputExpression {
 
   final override def outputOrdering: Seq[SortOrder] = {
     if (hasAlias) {
-      orderingExpressions.map { sortOrder =>
-        val newSortOrder = normalizeExpression(sortOrder).asInstanceOf[SortOrder]
-        val newSameOrderExpressions = newSortOrder.sameOrderExpressions.map(normalizeExpression)
-        newSortOrder.copy(sameOrderExpressions = newSameOrderExpressions)
-      }
+      orderingExpressions.map(normalizeExpression(_).asInstanceOf[SortOrder])
     } else {
       orderingExpressions
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index 6e59ad07d7168..eabbdc8ed3243 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -68,9 +68,9 @@ case class SortMergeJoinExec(
       val leftKeyOrdering = getKeyOrdering(leftKeys, left.outputOrdering)
       val rightKeyOrdering = getKeyOrdering(rightKeys, right.outputOrdering)
       leftKeyOrdering.zip(rightKeyOrdering).map { case (lKey, rKey) =>
-        // Also add the right key and its `sameOrderExpressions`
-        SortOrder(lKey.child, Ascending, lKey.sameOrderExpressions + rKey.child ++ rKey
-          .sameOrderExpressions)
+        // Also add expressions from right side sort order
+        val sameOrderExpressions = ExpressionSet(lKey.sameOrderExpressions ++ rKey.children)
+        SortOrder(lKey.child, Ascending, sameOrderExpressions.toSeq)
       }
     // For left and right outer joins, the output is ordered by the streamed input's join keys.
     case LeftOuter => getKeyOrdering(leftKeys, left.outputOrdering)
@@ -96,7 +96,8 @@ case class SortMergeJoinExec(
     val requiredOrdering = requiredOrders(keys)
     if (SortOrder.orderingSatisfies(childOutputOrdering, requiredOrdering)) {
       keys.zip(childOutputOrdering).map { case (key, childOrder) =>
-        SortOrder(key, Ascending, childOrder.sameOrderExpressions + childOrder.child - key)
+        val sameOrderExpressionsSet = ExpressionSet(childOrder.children) - key
+        SortOrder(key, Ascending, sameOrderExpressionsSet.toSeq)
       }
     } else {
       requiredOrdering
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index 6de81cc414d7d..5e30f846307ae 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -1090,6 +1090,32 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
     }
   }
 
+  test("sort order doesn't have repeated expressions") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      withTempView("t1", "t2") {
+        spark.range(10).repartition($"id").createTempView("t1")
+        spark.range(20).repartition($"id").createTempView("t2")
+        val planned = sql(
+          """
+            | SELECT t12.id, t1.id
+            | FROM (SELECT t1.id FROM t1, t2 WHERE t1.id * 2 = t2.id) t12, t1
+            | where 2 * t12.id = t1.id
+        """.stripMargin).queryExecution.executedPlan
+
+        // t12 is already sorted on `t1.id * 2`. and we need to sort it on `2 * t12.id`
+        // for 2nd join. So sorting on t12 can be avoided
+        val sortNodes = planned.collect { case s: SortExec => s }
+        assert(sortNodes.size == 3)
+        val outputOrdering = planned.outputOrdering
+        assert(outputOrdering.size == 1)
+        // Sort order should have 3 childrens, not 4. This is because t1.id*2 and 2*t1.id are same
+        assert(outputOrdering.head.children.size == 3)
+        assert(outputOrdering.head.children.count(_.isInstanceOf[AttributeReference]) == 2)
+        assert(outputOrdering.head.children.count(_.isInstanceOf[Multiply]) == 1)
+      }
+    }
+  }
+
   test("aliases to expressions should not be replaced") {
     withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
       withTempView("df1", "df2") {

From 478fb7f5280d8da2c68b858114eda358708e681b Mon Sep 17 00:00:00 2001
From: Anton Okolnychyi <aokolnychyi@apple.com>
Date: Tue, 1 Dec 2020 14:11:01 +0000
Subject: [PATCH 0627/1009] [SPARK-33608][SQL] Handle DELETE/UPDATE/MERGE in
 PullupCorrelatedPredicates

### What changes were proposed in this pull request?

This PR adds logic to handle DELETE/UPDATE/MERGE plans in `PullupCorrelatedPredicates`.
### Why are the changes needed?

Right now, `PullupCorrelatedPredicates` applies only to filters and unary nodes. As a result, correlated predicates in DELETE/UPDATE/MERGE are not rewritten.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

The PR adds 3 new test cases.

Closes #30555 from aokolnychyi/spark-33608.

Authored-by: Anton Okolnychyi <aokolnychyi@apple.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/optimizer/subquery.scala     |  2 +
 .../PullupCorrelatedPredicatesSuite.scala     | 64 ++++++++++++++++++-
 2 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
index 11532d22204a4..3c2ee3149d317 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
@@ -328,6 +328,8 @@ object PullupCorrelatedPredicates extends Rule[LogicalPlan] with PredicateHelper
     // Only a few unary nodes (Project/Filter/Aggregate) can contain subqueries.
     case q: UnaryNode =>
       rewriteSubQueries(q, q.children)
+    case s: SupportsSubquery =>
+      rewriteSubQueries(s, s.children)
   }
 }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala
index 17dfc7f3f18f7..ae9a694b50444 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.PlanTest
-import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{Assignment, DeleteAction, DeleteFromTable, InsertAction, LocalRelation, LogicalPlan, MergeIntoTable, UpdateTable}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 
 class PullupCorrelatedPredicatesSuite extends PlanTest {
@@ -98,4 +98,66 @@ class PullupCorrelatedPredicatesSuite extends PlanTest {
     val doubleOptimized = Optimize.execute(optimized)
     comparePlans(optimized, doubleOptimized, false)
   }
+
+  test("PullupCorrelatedPredicates should handle deletes") {
+    val subPlan = testRelation2.where('a === 'c).select('c)
+    val cond = InSubquery(Seq('a), ListQuery(subPlan))
+    val deletePlan = DeleteFromTable(testRelation, Some(cond)).analyze
+    assert(deletePlan.resolved)
+
+    val optimized = Optimize.execute(deletePlan)
+    assert(optimized.resolved)
+
+    optimized match {
+      case DeleteFromTable(_, Some(s: InSubquery)) =>
+        val outerRefs = SubExprUtils.getOuterReferences(s.query.plan)
+        assert(outerRefs.isEmpty, "should be no outer refs")
+      case other =>
+        fail(s"unexpected logical plan: $other")
+    }
+  }
+
+  test("PullupCorrelatedPredicates should handle updates") {
+    val subPlan = testRelation2.where('a === 'c).select('c)
+    val cond = InSubquery(Seq('a), ListQuery(subPlan))
+    val updatePlan = UpdateTable(testRelation, Seq.empty, Some(cond)).analyze
+    assert(updatePlan.resolved)
+
+    val optimized = Optimize.execute(updatePlan)
+    assert(optimized.resolved)
+
+    optimized match {
+      case UpdateTable(_, _, Some(s: InSubquery)) =>
+        val outerRefs = SubExprUtils.getOuterReferences(s.query.plan)
+        assert(outerRefs.isEmpty, "should be no outer refs")
+      case other =>
+        fail(s"unexpected logical plan: $other")
+    }
+  }
+
+  test("PullupCorrelatedPredicates should handle merge") {
+    val testRelation3 = LocalRelation('e.int, 'f.double)
+    val subPlan = testRelation3.where('a === 'e).select('e)
+    val cond = InSubquery(Seq('a), ListQuery(subPlan))
+
+    val mergePlan = MergeIntoTable(
+      testRelation,
+      testRelation2,
+      cond,
+      Seq(DeleteAction(None)),
+      Seq(InsertAction(None, Seq(Assignment('a, 'c), Assignment('b, 'd)))))
+    val analyzedMergePlan = mergePlan.analyze
+    assert(analyzedMergePlan.resolved)
+
+    val optimized = Optimize.execute(analyzedMergePlan)
+    assert(optimized.resolved)
+
+    optimized match {
+      case MergeIntoTable(_, _, s: InSubquery, _, _) =>
+        val outerRefs = SubExprUtils.getOuterReferences(s.query.plan)
+        assert(outerRefs.isEmpty, "should be no outer refs")
+      case other =>
+        fail(s"unexpected logical plan: $other")
+    }
+  }
 }

From c24f2b2d6afb411fbfffb90fa87150f3b6912343 Mon Sep 17 00:00:00 2001
From: Anton Okolnychyi <aokolnychyi@apple.com>
Date: Tue, 1 Dec 2020 09:27:46 -0800
Subject: [PATCH 0628/1009] [SPARK-33612][SQL] Add dataSourceRewriteRules batch
 to Optimizer

### What changes were proposed in this pull request?

This PR adds a new batch to the optimizer for executing rules that rewrite plans for data sources.

### Why are the changes needed?

Right now, we have a special place in the optimizer where we construct v2 scans. As time shows, we need more rewrite rules that would be executed after the operator optimization and before any stats-related rules for v2 tables. Not all rules will be specific to reads. One option is to rename the current batch into something more generic but it would require changing quite some places. That's why it seems better to introduce a new batch and use it for all rewrites. The name is generic so that we don't limit ourselves to v2 data sources only.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

The change is trivial and SPARK-23889 will depend on it.

Closes #30558 from aokolnychyi/spark-33612.

Authored-by: Anton Okolnychyi <aokolnychyi@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/catalyst/optimizer/Optimizer.scala      |  9 +++++++++
 .../spark/sql/internal/BaseSessionStateBuilder.scala  | 11 +++++++++++
 2 files changed, 20 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 9eee7c2b914a4..b7c8f775b857f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -185,6 +185,9 @@ abstract class Optimizer(catalogManager: CatalogManager)
       RemoveLiteralFromGroupExpressions,
       RemoveRepetitionFromGroupExpressions) :: Nil ++
     operatorOptimizationBatch) :+
+    // This batch rewrites data source plans and should be run after the operator
+    // optimization batch and before any batches that depend on stats.
+    Batch("Data Source Rewrite Rules", Once, dataSourceRewriteRules: _*) :+
     // This batch pushes filters and projections into scan nodes. Before this batch, the logical
     // plan may contain nodes that do not report stats. Anything that uses stats must run after
     // this batch.
@@ -289,6 +292,12 @@ abstract class Optimizer(catalogManager: CatalogManager)
    */
   def earlyScanPushDownRules: Seq[Rule[LogicalPlan]] = Nil
 
+  /**
+   * Override to provide additional rules for rewriting data source plans. Such rules will be
+   * applied after operator optimization rules and before any rules that depend on stats.
+   */
+  def dataSourceRewriteRules: Seq[Rule[LogicalPlan]] = Nil
+
   /**
    * Returns (defaultBatches - (excludedRules - nonExcludableRules)), the rule batches that
    * eventually run in the Optimizer.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index a89a5de3b7e72..8101f9e291b44 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -241,6 +241,9 @@ abstract class BaseSessionStateBuilder(
       override def earlyScanPushDownRules: Seq[Rule[LogicalPlan]] =
         super.earlyScanPushDownRules ++ customEarlyScanPushDownRules
 
+      override def dataSourceRewriteRules: Seq[Rule[LogicalPlan]] =
+        super.dataSourceRewriteRules ++ customDataSourceRewriteRules
+
       override def extendedOperatorOptimizationRules: Seq[Rule[LogicalPlan]] =
         super.extendedOperatorOptimizationRules ++ customOperatorOptimizationRules
     }
@@ -264,6 +267,14 @@ abstract class BaseSessionStateBuilder(
    */
   protected def customEarlyScanPushDownRules: Seq[Rule[LogicalPlan]] = Nil
 
+  /**
+   * Custom rules for rewriting data source plans to add to the Optimizer. Prefer overriding
+   * this instead of creating your own Optimizer.
+   *
+   * Note that this may NOT depend on the `optimizer` function.
+   */
+  protected def customDataSourceRewriteRules: Seq[Rule[LogicalPlan]] = Nil
+
   /**
    * Planner that converts optimized logical plans to physical plans.
    *

From 5d0045eedf4b138c031accac2b1fa1e8d6f3f7c6 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Wed, 2 Dec 2020 01:36:41 +0800
Subject: [PATCH 0629/1009] [SPARK-33611][UI] Avoid encoding twice on the query
 parameter of rewritten proxy URL

### What changes were proposed in this pull request?

When running Spark behind a reverse proxy(e.g. Nginx, Apache HTTP server), the request URL can be encoded twice if we pass the query string directly to the constructor of `java.net.URI`:
```
> val uri = "http://localhost:8081/test"
> val query = "order%5B0%5D%5Bcolumn%5D=0"  // query string of URL from the reverse proxy
> val rewrittenURI = URI.create(uri.toString())

> new URI(rewrittenURI.getScheme(),
      rewrittenURI.getAuthority(),
      rewrittenURI.getPath(),
      query,
      rewrittenURI.getFragment()).toString
result: http://localhost:8081/test?order%255B0%255D%255Bcolumn%255D=0
```

In Spark's stage page, the URL of "/taskTable" contains query parameter order[0][dir]. After encoding twice, the query parameter becomes `order%255B0%255D%255Bdir%255D` and it will be decoded as `order%5B0%5D%5Bdir%5D` instead of `order[0][dir]`. As a result, there will be NullPointerException from https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala#L176
Other than that, the other parameter may not work as expected after encoded twice.

This PR is to fix the bug by calling the method `URI.create(String URL)` directly. This convenience method can avoid encoding twice on the query parameter.
```
> val uri = "http://localhost:8081/test"
> val query = "order%5B0%5D%5Bcolumn%5D=0"
> URI.create(s"$uri?$query").toString
result: http://localhost:8081/test?order%5B0%5D%5Bcolumn%5D=0

> URI.create(s"$uri?$query").getQuery
result: order[0][column]=0
```

### Why are the changes needed?

Fix a potential bug when Spark's reverse proxy is enabled.
The bug itself is similar to https://github.com/apache/spark/pull/29271.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Add a new unit test.
Also, Manual UI testing for master, worker and app UI with an nginx proxy

Spark config:
```
spark.ui.port 8080
spark.ui.reverseProxy=true
spark.ui.reverseProxyUrl=/path/to/spark/
```
nginx config:
```
server {
    listen 9000;
    set $SPARK_MASTER http://127.0.0.1:8080;
    # split spark UI path into prefix and local path within master UI
    location ~ ^(/path/to/spark/) {
        # strip prefix when forwarding request
        rewrite /path/to/spark(/.*) $1  break;
        #rewrite /path/to/spark/ "/" ;
        # forward to spark master UI
        proxy_pass $SPARK_MASTER;
        proxy_intercept_errors on;
        error_page 301 302 307 = handle_redirects;
    }
    location handle_redirects {
        set $saved_redirect_location '$upstream_http_location';
        proxy_pass $saved_redirect_location;
    }
}
```

Closes #30552 from gengliangwang/decodeProxyRedirect.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Gengliang Wang <gengliang.wang@databricks.com>
---
 .../scala/org/apache/spark/ui/JettyUtils.scala   | 16 ++++++----------
 .../test/scala/org/apache/spark/ui/UISuite.scala |  9 +++++++++
 2 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
index 2a3597e323543..663da0d33e20b 100644
--- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
@@ -401,17 +401,13 @@ private[spark] object JettyUtils extends Logging {
       uri.append(rest)
     }
 
-    val rewrittenURI = URI.create(uri.toString())
-    if (query != null) {
-      return new URI(
-          rewrittenURI.getScheme(),
-          rewrittenURI.getAuthority(),
-          rewrittenURI.getPath(),
-          query,
-          rewrittenURI.getFragment()
-        ).normalize()
+    val queryString = if (query == null) {
+      ""
+    } else {
+      s"?$query"
     }
-    rewrittenURI.normalize()
+    // SPARK-33611: use method `URI.create` to avoid percent-encoding twice on the query string.
+    URI.create(uri.toString() + queryString).normalize()
   }
 
   def createProxyLocationHeader(
diff --git a/core/src/test/scala/org/apache/spark/ui/UISuite.scala b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
index 56026eaa0072b..c7e1dfe71d563 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
@@ -216,6 +216,15 @@ class UISuite extends SparkFunSuite {
     assert(rewrittenURI === null)
   }
 
+  test("SPARK-33611: Avoid encoding twice on the query parameter of proxy rewrittenURI") {
+    val prefix = "/worker-id"
+    val target = "http://localhost:8081"
+    val path = "/worker-id/json"
+    val rewrittenURI =
+      JettyUtils.createProxyURI(prefix, target, path, "order%5B0%5D%5Bcolumn%5D=0")
+    assert(rewrittenURI.toString === "http://localhost:8081/json?order%5B0%5D%5Bcolumn%5D=0")
+  }
+
   test("verify rewriting location header for reverse proxy") {
     val clientRequest = mock(classOf[HttpServletRequest])
     var headerValue = "http://localhost:4040/jobs"

From 5a1c5ac8073ab46c145146485c71cc6aceb8c5b8 Mon Sep 17 00:00:00 2001
From: zero323 <mszymkiewicz@gmail.com>
Date: Tue, 1 Dec 2020 10:44:14 -0800
Subject: [PATCH 0630/1009] [SPARK-33622][R][ML] Add array_to_vector to SparkR

### What changes were proposed in this pull request?

This PR adds `array_to_vector` to R API.

### Why are the changes needed?

Feature parity.

### Does this PR introduce _any_ user-facing change?

New function exposed in the public API.

### How was this patch tested?

New unit test.
Manual verification of the documentation examples.

Closes #30561 from zero323/SPARK-33622.

Authored-by: zero323 <mszymkiewicz@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 R/pkg/NAMESPACE                       |  1 +
 R/pkg/R/functions.R                   | 26 +++++++++++++++++++++++++-
 R/pkg/R/generics.R                    |  4 ++++
 R/pkg/tests/fulltests/test_sparkSQL.R |  3 ++-
 4 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 91f6e6dc8a0e6..6ef2df5731e10 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -223,6 +223,7 @@ exportMethods("%<=>%",
               "array_remove",
               "array_repeat",
               "array_sort",
+              "array_to_vector",
               "array_transform",
               "arrays_overlap",
               "array_union",
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 99406443165d5..58d07a8d8fc2f 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -357,7 +357,13 @@ NULL
 #' @examples
 #' \dontrun{
 #' df <- read.df("data/mllib/sample_libsvm_data.txt", source = "libsvm")
-#' head(select(df, vector_to_array(df$features)))
+#' head(
+#'   withColumn(
+#'     withColumn(df, "array", vector_to_array(df$features)),
+#'     "vector",
+#'     array_to_vector(column("array"))
+#'   )
+#' )
 #' }
 NULL
 
@@ -4609,6 +4615,24 @@ setMethod("timestamp_seconds",
             column(jc)
           })
 
+#' @details
+#' \code{array_to_vector} Converts a column of array of numeric type into
+#' a column of dense vectors in MLlib
+#'
+#' @rdname column_ml_functions
+#' @aliases array_to_vector array_to_vector,Column-method
+#' @note array_to_vector since 3.1.0
+setMethod("array_to_vector",
+          signature(x = "Column"),
+          function(x) {
+            jc <- callJStatic(
+              "org.apache.spark.ml.functions",
+              "array_to_vector",
+              x@jc
+            )
+            column(jc)
+          })
+
 #' @details
 #' \code{vector_to_array} Converts a column of MLlib sparse/dense vectors into
 #' a column of dense arrays.
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 1fe6599bf1b97..fb830aa686f72 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -838,6 +838,10 @@ setGeneric("array_repeat", function(x, count) { standardGeneric("array_repeat")
 #' @name NULL
 setGeneric("array_sort", function(x) { standardGeneric("array_sort") })
 
+#' @rdname column_ml_functions
+#' @name NULL
+setGeneric("array_to_vector", function(x) { standardGeneric("array_to_vector") })
+
 #' @rdname column_collection_functions
 #' @name NULL
 setGeneric("array_transform", function(x, f) { standardGeneric("array_transform") })
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index 833f77786c80b..c623f534f706c 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1425,7 +1425,8 @@ test_that("column functions", {
   c25 <- overlay(c1, c2, c3, c3) + overlay(c1, c2, c3) + overlay(c1, c2, 1) +
     overlay(c1, c2, 3, 4)
   c26 <- timestamp_seconds(c1) + vector_to_array(c) +
-    vector_to_array(c, "float32") + vector_to_array(c, "float64")
+    vector_to_array(c, "float32") + vector_to_array(c, "float64") +
+    array_to_vector(c)
   c27 <- nth_value("x", 1L) + nth_value("y", 2, TRUE) +
     nth_value(column("v"), 3) + nth_value(column("z"), 4L, FALSE)
   c28 <- asc_nulls_first(c1) + asc_nulls_last(c1) +

From f71f34572d5510e50953ccd0191c833962b63a32 Mon Sep 17 00:00:00 2001
From: Thomas Graves <tgraves@nvidia.com>
Date: Wed, 2 Dec 2020 09:50:02 +0900
Subject: [PATCH 0631/1009] [SPARK-33544][SQL] Optimize size of
 CreateArray/CreateMap to be the size of its children

### What changes were proposed in this pull request?

https://issues.apache.org/jira/browse/SPARK-32295 added in an optimization to insert a filter for not null and size > 0 when using inner explode/inline. This is fine in most cases but the extra filter is not needed if the explode is with a create array and not using Literals (it already handles LIterals).  When this happens you know that the values aren't null and it has a size.  It already handles the empty array.

The not null check is already optimized out because Createarray and createMap are not nullable, that leaves the size > 0 check. To handle that this PR makes it so that the size > 0 check gets optimized in ConstantFolding to be the size of the children in the array or map.  That makes it a literal and then makes it ultimately be optimized out.

### Why are the changes needed?
remove unneeded filter

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?
Unit tests added and manually tested various cases

Closes #30504 from tgravescs/SPARK-33544.

Lead-authored-by: Thomas Graves <tgraves@nvidia.com>
Co-authored-by: Thomas Graves <tgraves@apache.org>
Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../expressions/complexTypeCreator.scala      | 12 ++++--
 .../sql/catalyst/optimizer/expressions.scala  | 13 ++++++
 .../optimizer/ConstantFoldingSuite.scala      | 36 ++++++++++++++++
 .../InferFiltersFromGenerateSuite.scala       | 41 ++++++++++++++++++-
 4 files changed, 98 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index 3958cfd0af2a3..f0f92e2d935f1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -30,6 +30,12 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
+/**
+ * Trait to indicate the expression doesn't have any side effects. This can be used
+ * to indicate its ok to optimize it out under certain circumstances.
+ */
+trait NoSideEffect
+
 /**
  * Returns an Array containing the evaluation of all children expressions.
  */
@@ -42,7 +48,7 @@ import org.apache.spark.unsafe.types.UTF8String
   """,
   since = "1.1.0")
 case class CreateArray(children: Seq[Expression], useStringTypeWhenEmpty: Boolean)
-  extends Expression {
+  extends Expression with NoSideEffect {
 
   def this(children: Seq[Expression]) = {
     this(children, SQLConf.get.getConf(SQLConf.LEGACY_CREATE_EMPTY_COLLECTION_USING_STRING_TYPE))
@@ -160,7 +166,7 @@ private [sql] object GenArrayData {
   """,
   since = "2.0.0")
 case class CreateMap(children: Seq[Expression], useStringTypeWhenEmpty: Boolean)
-  extends Expression {
+  extends Expression with NoSideEffect{
 
   def this(children: Seq[Expression]) = {
     this(children, SQLConf.get.getConf(SQLConf.LEGACY_CREATE_EMPTY_COLLECTION_USING_STRING_TYPE))
@@ -379,7 +385,7 @@ object CreateStruct {
   """,
   since = "1.5.0")
 // scalastyle:on line.size.limit
-case class CreateNamedStruct(children: Seq[Expression]) extends Expression {
+case class CreateNamedStruct(children: Seq[Expression]) extends Expression with NoSideEffect {
   lazy val (nameExprs, valExprs) = children.grouped(2).map {
     case Seq(name, value) => (name, value)
   }.toList.unzip
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index d1eb3b07d3d5f..4725f49340451 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -41,6 +41,14 @@ import org.apache.spark.sql.types._
  * equivalent [[Literal]] values.
  */
 object ConstantFolding extends Rule[LogicalPlan] {
+
+  private def hasNoSideEffect(e: Expression): Boolean = e match {
+    case _: Attribute => true
+    case _: Literal => true
+    case _: NoSideEffect => e.children.forall(hasNoSideEffect)
+    case _ => false
+  }
+
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     case q: LogicalPlan => q transformExpressionsDown {
       // Skip redundant folding of literals. This rule is technically not necessary. Placing this
@@ -48,6 +56,11 @@ object ConstantFolding extends Rule[LogicalPlan] {
       // object and running eval unnecessarily.
       case l: Literal => l
 
+      case Size(c: CreateArray, _) if c.children.forall(hasNoSideEffect) =>
+        Literal(c.children.length)
+      case Size(c: CreateMap, _) if c.children.forall(hasNoSideEffect) =>
+        Literal(c.children.length / 2)
+
       // Fold expressions that are foldable.
       case e if e.foldable => Literal.create(e.eval(EmptyRow), e.dataType)
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
index 23ab6b2df3e64..fd9b58a7a06aa 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
@@ -263,4 +263,40 @@ class ConstantFoldingSuite extends PlanTest {
 
     comparePlans(optimized, correctAnswer)
   }
+
+  test("SPARK-33544: Constant folding test with sideaffects") {
+    val originalQuery =
+      testRelation
+        .select('a)
+        .where(Size(CreateArray(Seq(AssertTrue(false)))) > 0)
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    comparePlans(optimized, originalQuery.analyze)
+  }
+
+  object OptimizeForCreate extends RuleExecutor[LogicalPlan] {
+    val batches =
+      Batch("AnalysisNodes", Once,
+        EliminateSubqueryAliases) ::
+      Batch("ConstantFolding", FixedPoint(4),
+        OptimizeIn,
+        ConstantFolding,
+        PruneFilters) :: Nil
+  }
+
+  test("SPARK-33544: Constant folding test CreateArray") {
+    val originalQuery =
+      testRelation
+        .select('a)
+        .where(Size(CreateArray(Seq('a))) > 0)
+
+    val optimized = OptimizeForCreate.execute(originalQuery.analyze)
+
+    val correctAnswer =
+      testRelation
+        .select('a)
+        .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala
index 3f83971aa9821..c6fa1bd6e415c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
+import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
@@ -33,7 +34,7 @@ class InferFiltersFromGenerateSuite extends PlanTest {
   val testRelation = LocalRelation('a.array(StructType(Seq(
     StructField("x", IntegerType),
     StructField("y", IntegerType)
-  ))))
+  ))), 'c1.string, 'c2.string)
 
   Seq(Explode(_), PosExplode(_), Inline(_)).foreach { f =>
     val generator = f('a)
@@ -72,4 +73,42 @@ class InferFiltersFromGenerateSuite extends PlanTest {
       comparePlans(optimized, originalQuery)
     }
   }
+
+  // setup rules to test inferFilters with ConstantFolding to make sure
+  // the Filter rule added in inferFilters is removed again when doing
+  // explode with CreateArray/CreateMap
+  object OptimizeInferAndConstantFold extends RuleExecutor[LogicalPlan] {
+    val batches =
+      Batch("AnalysisNodes", Once,
+        EliminateSubqueryAliases) ::
+      Batch("Infer Filters", Once, InferFiltersFromGenerate) ::
+      Batch("ConstantFolding after", FixedPoint(4),
+        ConstantFolding,
+        NullPropagation,
+        PruneFilters) :: Nil
+  }
+
+  Seq(Explode(_), PosExplode(_)).foreach { f =>
+     val createArrayExplode = f(CreateArray(Seq('c1)))
+     test("Don't infer filters from CreateArray " + createArrayExplode) {
+       val originalQuery = testRelation.generate(createArrayExplode).analyze
+       val optimized = OptimizeInferAndConstantFold.execute(originalQuery)
+       comparePlans(optimized, originalQuery)
+     }
+     val createMapExplode = f(CreateMap(Seq('c1, 'c2)))
+     test("Don't infer filters from CreateMap " + createMapExplode) {
+       val originalQuery = testRelation.generate(createMapExplode).analyze
+       val optimized = OptimizeInferAndConstantFold.execute(originalQuery)
+       comparePlans(optimized, originalQuery)
+     }
+   }
+
+   Seq(Inline(_)).foreach { f =>
+     val createArrayStructExplode = f(CreateArray(Seq(CreateStruct(Seq('c1)))))
+     test("Don't infer filters from CreateArray " + createArrayStructExplode) {
+       val originalQuery = testRelation.generate(createArrayStructExplode).analyze
+       val optimized = OptimizeInferAndConstantFold.execute(originalQuery)
+       comparePlans(optimized, originalQuery)
+     }
+   }
 }

From 51ebcd95a5f7e377245f302a91e90f9b3db9953e Mon Sep 17 00:00:00 2001
From: Cheng Su <chengsu@fb.com>
Date: Wed, 2 Dec 2020 10:17:00 +0900
Subject: [PATCH 0632/1009] [SPARK-32863][SS] Full outer stream-stream join

### What changes were proposed in this pull request?

This PR is to add full outer stream-stream join, and the implementation of full outer join is:
* For left side input row, check if there's a match on right side state store.
  * if there's a match, output the joined row, o.w. output nothing. Put the row in left side state store.
* For right side input row, check if there's a match on left side state store.
  * if there's a match, output the joined row, o.w. output nothing. Put the row in right side state store.
* State store eviction: evict rows from left/right side state store below watermark, and output rows never matched before (a combination of left outer and right outer join).

### Why are the changes needed?

Enable more use cases for spark stream-stream join.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Added unit tests in `UnsupportedOperationChecker.scala` and `StreamingJoinSuite.scala`.

Closes #30395 from c21/stream-foj.

Authored-by: Cheng Su <chengsu@fb.com>
Signed-off-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
---
 .../UnsupportedOperationChecker.scala         |  71 +++---
 .../analysis/UnsupportedOperationsSuite.scala |  16 +-
 .../StreamingSymmetricHashJoinExec.scala      |  57 +++--
 .../sql/streaming/StreamingJoinSuite.scala    | 209 +++++++++++++++++-
 4 files changed, 297 insertions(+), 56 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index 7dcc6a81b48cd..ab7d90098bfd3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -287,7 +287,7 @@ object UnsupportedOperationChecker extends Logging {
           throwError("dropDuplicates is not supported after aggregation on a " +
             "streaming DataFrame/Dataset")
 
-        case Join(left, right, joinType, condition, _) =>
+        case j @ Join(left, right, joinType, condition, _) =>
           if (left.isStreaming && right.isStreaming && outputMode != InternalOutputModes.Append) {
             throwError("Join between two streaming DataFrames/Datasets is not supported" +
               s" in ${outputMode} output mode, only in Append output mode")
@@ -298,8 +298,14 @@ object UnsupportedOperationChecker extends Logging {
               // no further validations needed
 
             case FullOuter =>
-              if (left.isStreaming || right.isStreaming) {
-                throwError("Full outer joins with streaming DataFrames/Datasets are not supported")
+              if (left.isStreaming && !right.isStreaming) {
+                throwError("FullOuter joins with streaming DataFrames/Datasets on the left " +
+                  "and a static DataFrame/Dataset on the right is not supported")
+              } else if (!left.isStreaming && right.isStreaming) {
+                throwError("FullOuter joins with streaming DataFrames/Datasets on the right " +
+                  "and a static DataFrame/Dataset on the left is not supported")
+              } else if (left.isStreaming && right.isStreaming) {
+                checkForStreamStreamJoinWatermark(j)
               }
 
             case LeftAnti =>
@@ -315,40 +321,17 @@ object UnsupportedOperationChecker extends Logging {
                 throwError(s"$joinType join with a streaming DataFrame/Dataset " +
                   "on the right and a static DataFrame/Dataset on the left is not supported")
               } else if (left.isStreaming && right.isStreaming) {
-                val watermarkInJoinKeys = StreamingJoinHelper.isWatermarkInJoinKeys(subPlan)
-
-                val hasValidWatermarkRange =
-                  StreamingJoinHelper.getStateValueWatermark(
-                    left.outputSet, right.outputSet, condition, Some(1000000)).isDefined
-
-                if (!watermarkInJoinKeys && !hasValidWatermarkRange) {
-                  throwError(
-                    s"Stream-stream $joinType join between two streaming DataFrame/Datasets " +
-                    "is not supported without a watermark in the join keys, or a watermark on " +
-                    "the nullable side and an appropriate range condition")
-                }
+                checkForStreamStreamJoinWatermark(j)
               }
 
             // We support streaming right outer joins with static on the left always, and with
             // stream on both sides under the appropriate conditions.
             case RightOuter =>
               if (left.isStreaming && !right.isStreaming) {
-                throwError("Right outer join with a streaming DataFrame/Dataset on the left and " +
+                throwError("RightOuter join with a streaming DataFrame/Dataset on the left and " +
                     "a static DataFrame/DataSet on the right not supported")
               } else if (left.isStreaming && right.isStreaming) {
-                val isWatermarkInJoinKeys = StreamingJoinHelper.isWatermarkInJoinKeys(subPlan)
-
-                // Check if the nullable side has a watermark, and there's a range condition which
-                // implies a state value watermark on the first side.
-                val hasValidWatermarkRange =
-                    StreamingJoinHelper.getStateValueWatermark(
-                      right.outputSet, left.outputSet, condition, Some(1000000)).isDefined
-
-                if (!isWatermarkInJoinKeys && !hasValidWatermarkRange) {
-                  throwError("Stream-stream outer join between two streaming DataFrame/Datasets " +
-                    "is not supported without a watermark in the join keys, or a watermark on " +
-                    "the nullable side and an appropriate range condition")
-                }
+                checkForStreamStreamJoinWatermark(j)
               }
 
             case NaturalJoin(_) | UsingJoin(_, _) =>
@@ -438,4 +421,34 @@ object UnsupportedOperationChecker extends Logging {
     throw new AnalysisException(
       msg, operator.origin.line, operator.origin.startPosition, Some(operator))
   }
+
+  private def checkForStreamStreamJoinWatermark(join: Join): Unit = {
+    val watermarkInJoinKeys = StreamingJoinHelper.isWatermarkInJoinKeys(join)
+
+    // Check if the nullable side has a watermark, and there's a range condition which
+    // implies a state value watermark on the first side.
+    val hasValidWatermarkRange = join.joinType match {
+      case LeftOuter | LeftSemi => StreamingJoinHelper.getStateValueWatermark(
+        join.left.outputSet, join.right.outputSet, join.condition, Some(1000000)).isDefined
+      case RightOuter => StreamingJoinHelper.getStateValueWatermark(
+        join.right.outputSet, join.left.outputSet, join.condition, Some(1000000)).isDefined
+      case FullOuter =>
+        Seq((join.left.outputSet, join.right.outputSet),
+          (join.right.outputSet, join.left.outputSet)).exists {
+          case (attributesToFindStateWatermarkFor, attributesWithEventWatermark) =>
+            StreamingJoinHelper.getStateValueWatermark(attributesToFindStateWatermarkFor,
+              attributesWithEventWatermark, join.condition, Some(1000000)).isDefined
+        }
+      case _ =>
+        throwError(
+          s"Join type ${join.joinType} is not supported with streaming DataFrame/Dataset")(join)
+    }
+
+    if (!watermarkInJoinKeys && !hasValidWatermarkRange) {
+      throwError(
+        s"Stream-stream ${join.joinType} join between two streaming DataFrame/Datasets " +
+          "is not supported without a watermark in the join keys, or a watermark on " +
+          "the nullable side and an appropriate range condition")(join)
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
index 3be417de472c6..cdc3f4275414c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
@@ -408,13 +408,15 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
     streamStreamSupported = false,
     expectedMsg = "is not supported in Update output mode")
 
-  // Full outer joins: only batch-batch is allowed
+  // Full outer joins: stream-batch/batch-stream join are not allowed,
+  // and stream-stream join is allowed 'conditionally' - see below check
   testBinaryOperationInStreamingPlan(
-    "full outer join",
+    "FullOuter join",
     _.join(_, joinType = FullOuter),
     streamStreamSupported = false,
     batchStreamSupported = false,
-    streamBatchSupported = false)
+    streamBatchSupported = false,
+    expectedMsg = "FullOuter join")
 
   // Left outer, left semi, left anti join: *-stream not allowed
   Seq((LeftOuter, "LeftOuter join"), (LeftSemi, "LeftSemi join"), (LeftAnti, "LeftAnti join"))
@@ -429,14 +431,14 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
 
   // Right outer joins: stream-* not allowed
   testBinaryOperationInStreamingPlan(
-    "right outer join",
+    "RightOuter join",
     _.join(_, joinType = RightOuter),
     streamBatchSupported = false,
     streamStreamSupported = false,
-    expectedMsg = "outer join")
+    expectedMsg = "RightOuter join")
 
-  // Left outer, right outer, left semi joins
-  Seq(LeftOuter, RightOuter, LeftSemi).foreach { joinType =>
+  // Left outer, right outer, full outer, left semi joins
+  Seq(LeftOuter, RightOuter, FullOuter, LeftSemi).foreach { joinType =>
     // Update mode not allowed
     assertNotSupportedInStreamingPlan(
       s"$joinType join with stream-stream relations and update mode",
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
index 8b69205530769..73d2f826f1126 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
@@ -165,8 +165,14 @@ case class StreamingSymmetricHashJoinExec(
     throw new IllegalArgumentException(errorMessageForJoinType)
   }
 
+  private def throwBadStateFormatVersionException(): Nothing = {
+    throw new IllegalStateException("Unexpected state format version! " +
+      s"version $stateFormatVersion")
+  }
+
   require(
-    joinType == Inner || joinType == LeftOuter || joinType == RightOuter || joinType == LeftSemi,
+    joinType == Inner || joinType == LeftOuter || joinType == RightOuter || joinType == FullOuter ||
+    joinType == LeftSemi,
     errorMessageForJoinType)
   require(leftKeys.map(_.dataType) == rightKeys.map(_.dataType))
 
@@ -186,6 +192,7 @@ case class StreamingSymmetricHashJoinExec(
     case _: InnerLike => left.output ++ right.output
     case LeftOuter => left.output ++ right.output.map(_.withNullability(true))
     case RightOuter => left.output.map(_.withNullability(true)) ++ right.output
+    case FullOuter => (left.output ++ right.output).map(_.withNullability(true))
     case LeftSemi => left.output
     case _ => throwBadJoinTypeException()
   }
@@ -195,6 +202,7 @@ case class StreamingSymmetricHashJoinExec(
       PartitioningCollection(Seq(left.outputPartitioning, right.outputPartitioning))
     case LeftOuter => left.outputPartitioning
     case RightOuter => right.outputPartitioning
+    case FullOuter => UnknownPartitioning(left.outputPartitioning.numPartitions)
     case LeftSemi => left.outputPartitioning
     case _ => throwBadJoinTypeException()
   }
@@ -250,14 +258,14 @@ case class StreamingSymmetricHashJoinExec(
     //  Join one side input using the other side's buffered/state rows. Here is how it is done.
     //
     //  - `leftSideJoiner.storeAndJoinWithOtherSide(rightSideJoiner)`
-    //    - Inner, Left Outer, Right Outer Join: generates all rows from matching new left input
-    //      with stored right input, and also stores all the left input.
+    //    - Inner, Left Outer, Right Outer, Full Outer Join: generates all rows from matching
+    //      new left input with stored right input, and also stores all the left input.
     //    - Left Semi Join: generates all new left input rows from matching new left input with
     //      stored right input, and also stores all the non-matched left input.
     //
     //  - `rightSideJoiner.storeAndJoinWithOtherSide(leftSideJoiner)`
-    //    - Inner, Left Outer, Right Outer Join: generates all rows from matching new right input
-    //      with stored left input, and also stores all the right input.
+    //    - Inner, Left Outer, Right Outer, Full Outer Join: generates all rows from matching
+    //      new right input with stored left input, and also stores all the right input.
     //      It also generates all rows from matching new left input with new right input, since
     //      the new left input has become stored by that point. This tiny asymmetry is necessary
     //      to avoid duplication.
@@ -314,9 +322,7 @@ case class StreamingSymmetricHashJoinExec(
           stateFormatVersion match {
             case 1 => matchesWithRightSideState(new UnsafeRowPair(kv.key, kv.value))
             case 2 => kv.matched
-            case _ =>
-              throw new IllegalStateException("Unexpected state format version! " +
-                s"version $stateFormatVersion")
+            case _ => throwBadStateFormatVersionException()
           }
         }.map(pair => joinedRow.withLeft(pair.value).withRight(nullRight))
 
@@ -333,13 +339,23 @@ case class StreamingSymmetricHashJoinExec(
           stateFormatVersion match {
             case 1 => matchesWithLeftSideState(new UnsafeRowPair(kv.key, kv.value))
             case 2 => kv.matched
-            case _ =>
-              throw new IllegalStateException("Unexpected state format version! " +
-                s"version $stateFormatVersion")
+            case _ => throwBadStateFormatVersionException()
           }
         }.map(pair => joinedRow.withLeft(nullLeft).withRight(pair.value))
 
         hashJoinOutputIter ++ outerOutputIter
+      case FullOuter =>
+        lazy val isKeyToValuePairMatched = (kv: KeyToValuePair) =>
+          stateFormatVersion match {
+            case 2 => kv.matched
+            case _ => throwBadStateFormatVersionException()
+          }
+        val leftSideOutputIter = leftSideJoiner.removeOldState().filterNot(
+          isKeyToValuePairMatched).map(pair => joinedRow.withLeft(pair.value).withRight(nullRight))
+        val rightSideOutputIter = rightSideJoiner.removeOldState().filterNot(
+          isKeyToValuePairMatched).map(pair => joinedRow.withLeft(nullLeft).withRight(pair.value))
+
+        hashJoinOutputIter ++ leftSideOutputIter ++ rightSideOutputIter
       case _ => throwBadJoinTypeException()
     }
 
@@ -372,16 +388,21 @@ case class StreamingSymmetricHashJoinExec(
         // For inner and left semi joins, we have to remove unnecessary state rows from both sides
         // if possible.
         //
-        // For outer joins, we have already removed unnecessary state rows from the outer side
-        // (e.g., left side for left outer join) while generating the outer "null" outputs. Now, we
-        // have to remove unnecessary state rows from the other side (e.g., right side for the left
-        // outer join) if possible. In all cases, nothing needs to be outputted, hence the removal
-        // needs to be done greedily by immediately consuming the returned iterator.
+        // For left outer and right outer joins, we have already removed unnecessary state rows from
+        // the outer side (e.g., left side for left outer join) while generating the outer "null"
+        // outputs. Now, we have to remove unnecessary state rows from the other side (e.g., right
+        // side for the left outer join) if possible. In all cases, nothing needs to be outputted,
+        // hence the removal needs to be done greedily by immediately consuming the returned
+        // iterator.
+        //
+        // For full outer joins, we have already removed unnecessary states from both sides, so
+        // nothing needs to be outputted here.
         val cleanupIter = joinType match {
           case Inner | LeftSemi =>
             leftSideJoiner.removeOldState() ++ rightSideJoiner.removeOldState()
           case LeftOuter => rightSideJoiner.removeOldState()
           case RightOuter => leftSideJoiner.removeOldState()
+          case FullOuter => Iterator.empty
           case _ => throwBadJoinTypeException()
         }
         while (cleanupIter.hasNext) {
@@ -491,9 +512,9 @@ case class StreamingSymmetricHashJoinExec(
         }
 
       val generateFilteredJoinedRow: InternalRow => Iterator[InternalRow] = joinSide match {
-        case LeftSide if joinType == LeftOuter =>
+        case LeftSide if joinType == LeftOuter || joinType == FullOuter =>
           (row: InternalRow) => Iterator(generateJoinedRow(row, nullRight))
-        case RightSide if joinType == RightOuter =>
+        case RightSide if joinType == RightOuter || joinType == FullOuter =>
           (row: InternalRow) => Iterator(generateJoinedRow(row, nullLeft))
         case _ => (_: InternalRow) => Iterator.empty
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
index a25616af360b1..476abcbf5c241 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
@@ -99,7 +99,8 @@ abstract class StreamingJoinSuite
     } else if (joinType == "right_outer") {
       joined.select(right("key"), right("window.end").cast("long"), 'leftValue, 'rightValue)
     } else {
-      joined
+      joined.select(left("key"), left("window.end").cast("long"), 'leftValue,
+        right("key"), right("window.end").cast("long"), 'rightValue)
     }
 
     (leftInput, rightInput, select)
@@ -128,7 +129,8 @@ abstract class StreamingJoinSuite
     } else if (joinType == "right_outer") {
       joined.select(right("key"), right("window.end").cast("long"), 'leftValue, 'rightValue)
     } else {
-      joined
+      joined.select(left("key"), left("window.end").cast("long"), 'leftValue,
+        right("key"), right("window.end").cast("long"), 'rightValue)
     }
 
     (leftInput, rightInput, select)
@@ -1070,6 +1072,209 @@ class StreamingOuterJoinSuite extends StreamingJoinSuite {
   }
 }
 
+class StreamingFullOuterJoinSuite extends StreamingJoinSuite {
+
+  test("windowed full outer join") {
+    val (leftInput, rightInput, joined) = setupWindowedJoin("full_outer")
+
+    testStream(joined)(
+      MultiAddData(leftInput, 1, 2, 3, 4, 5)(rightInput, 3, 4, 5, 6, 7),
+      CheckNewAnswer(Row(3, 10, 6, 9), Row(4, 10, 8, 12), Row(5, 10, 10, 15)),
+      // states
+      // left: 1, 2, 3, 4 ,5
+      // right: 3, 4, 5, 6, 7
+      assertNumStateRows(total = 10, updated = 10),
+      MultiAddData(leftInput, 21)(rightInput, 22),
+      // Watermark = 11, should remove rows having window=[0,10].
+      CheckNewAnswer(Row(1, 10, 2, null), Row(2, 10, 4, null), Row(6, 10, null, 18),
+        Row(7, 10, null, 21)),
+      // states
+      // left: 21
+      // right: 22
+      //
+      // states evicted
+      // left: 1, 2, 3, 4 ,5 (below watermark)
+      // right: 3, 4, 5, 6, 7 (below watermark)
+      assertNumStateRows(total = 2, updated = 2),
+      AddData(leftInput, 22),
+      CheckNewAnswer(Row(22, 30, 44, 66)),
+      // states
+      // left: 21, 22
+      // right: 22
+      assertNumStateRows(total = 3, updated = 1),
+      StopStream,
+      StartStream(),
+
+      AddData(leftInput, 1),
+      // Row not add as 1 < state key watermark = 12.
+      CheckNewAnswer(),
+      // states
+      // left: 21, 22
+      // right: 22
+      assertNumStateRows(total = 3, updated = 0, droppedByWatermark = 1),
+      AddData(rightInput, 5),
+      // Row not add as 5 < state key watermark = 12.
+      CheckNewAnswer(),
+      // states
+      // left: 21, 22
+      // right: 22
+      assertNumStateRows(total = 3, updated = 0, droppedByWatermark = 1)
+    )
+  }
+
+  test("full outer early state exclusion on left") {
+    val (leftInput, rightInput, joined) = setupWindowedJoinWithLeftCondition("full_outer")
+
+    testStream(joined)(
+      MultiAddData(leftInput, 1, 2, 3)(rightInput, 3, 4, 5),
+      // The left rows with leftValue <= 4 should generate their outer join rows now and
+      // not get added to the state.
+      CheckNewAnswer(Row(1, 10, 2, null, null, null), Row(2, 10, 4, null, null, null),
+        Row(3, 10, 6, 3, 10, "9")),
+      // states
+      // left: 3
+      // right: 3, 4, 5
+      assertNumStateRows(total = 4, updated = 4),
+      // Generate outer join result for all non-matched rows when the watermark advances.
+      MultiAddData(leftInput, 20)(rightInput, 21),
+      CheckNewAnswer(Row(null, null, null, 4, 10, "12"), Row(null, null, null, 5, 10, "15")),
+      // states
+      // left: 20
+      // right: 21
+      //
+      // states evicted
+      // left: 3 (below watermark)
+      // right: 3, 4, 5 (below watermark)
+      assertNumStateRows(total = 2, updated = 2),
+      AddData(rightInput, 20),
+      CheckNewAnswer(Row(20, 30, 40, 20, 30, "60")),
+      // states
+      // left: 20
+      // right: 21, 20
+      assertNumStateRows(total = 3, updated = 1)
+    )
+  }
+
+  test("full outer early state exclusion on right") {
+    val (leftInput, rightInput, joined) = setupWindowedJoinWithRightCondition("full_outer")
+
+    testStream(joined)(
+      MultiAddData(leftInput, 3, 4, 5)(rightInput, 1, 2, 3),
+      // The right rows with rightValue <= 7 should generate their outer join rows now,
+      // and never be added to the state.
+      // The right row with rightValue = 9 > 7, hence joined and added to state.
+      CheckNewAnswer(Row(null, null, null, 1, 10, "3"), Row(null, null, null, 2, 10, "6"),
+        Row(3, 10, 6, 3, 10, "9")),
+      // states
+      // left: 3, 4, 5
+      // right: 3
+      assertNumStateRows(total = 4, updated = 4),
+      // Generate outer join result for all non-matched rows when the watermark advances.
+      MultiAddData(leftInput, 20)(rightInput, 21),
+      CheckNewAnswer(Row(4, 10, 8, null, null, null), Row(5, 10, 10, null, null, null)),
+      // states
+      // left: 20
+      // right: 21
+      //
+      // states evicted
+      // left: 3, 4, 5 (below watermark)
+      // right: 3 (below watermark)
+      assertNumStateRows(total = 2, updated = 2),
+      AddData(rightInput, 20),
+      CheckNewAnswer(Row(20, 30, 40, 20, 30, "60")),
+      // states
+      // left: 20
+      // right: 21, 20
+      assertNumStateRows(total = 3, updated = 1)
+    )
+  }
+
+  test("full outer join with watermark range condition") {
+    val (leftInput, rightInput, joined) = setupWindowedJoinWithRangeCondition("full_outer")
+
+    testStream(joined)(
+      AddData(leftInput, (1, 5), (3, 5)),
+      CheckNewAnswer(),
+      // states
+      // left: (1, 5), (3, 5)
+      // right: nothing
+      assertNumStateRows(total = 2, updated = 2),
+      AddData(rightInput, (1, 10), (2, 5)),
+      // Match left row in the state.
+      CheckNewAnswer(Row(1, 1, 5, 10)),
+      // states
+      // left: (1, 5), (3, 5)
+      // right: (1, 10), (2, 5)
+      assertNumStateRows(total = 4, updated = 2),
+      AddData(rightInput, (1, 9)),
+      // Match left row in the state.
+      CheckNewAnswer(Row(1, 1, 5, 9)),
+      // states
+      // left: (1, 5), (3, 5)
+      // right: (1, 10), (2, 5), (1, 9)
+      assertNumStateRows(total = 5, updated = 1),
+      // Increase event time watermark to 20s by adding data with time = 30s on both inputs.
+      AddData(leftInput, (1, 7), (1, 30)),
+      CheckNewAnswer(Row(1, 1, 7, 9), Row(1, 1, 7, 10)),
+      // states
+      // left: (1, 5), (3, 5), (1, 7), (1, 30)
+      // right: (1, 10), (2, 5), (1, 9)
+      assertNumStateRows(total = 7, updated = 2),
+      // Watermark = 30 - 10 = 20, no matched row.
+      // Generate outer join result for all non-matched rows when the watermark advances.
+      AddData(rightInput, (0, 30)),
+      CheckNewAnswer(Row(3, null, 5, null), Row(null, 2, null, 5)),
+      // states
+      // left: (1, 30)
+      // right: (0, 30)
+      //
+      // states evicted
+      // left: (1, 5), (3, 5), (1, 5) (below watermark = 20)
+      // right: (1, 10), (2, 5), (1, 9) (below watermark = 20)
+      assertNumStateRows(total = 2, updated = 1)
+    )
+  }
+
+  test("self full outer join") {
+    val (inputStream, query) = setupWindowedSelfJoin("full_outer")
+
+    testStream(query)(
+      AddData(inputStream, (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L)),
+      CheckNewAnswer(Row(2, 2L, 2, 2L), Row(4, 4L, 4, 4L)),
+      // batch 1 - global watermark = 0
+      // states
+      // left: (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L)
+      // right: (2, 2L), (4, 4L)
+      assertNumStateRows(total = 7, updated = 7),
+      AddData(inputStream, (6, 6L), (7, 7L), (8, 8L), (9, 9L), (10, 10L)),
+      CheckNewAnswer(Row(6, 6L, 6, 6L), Row(8, 8L, 8, 8L), Row(10, 10L, 10, 10L)),
+      // batch 2 - global watermark = 5
+      // states
+      // left: (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L), (6, 6L), (7, 7L), (8, 8L),
+      //       (9, 9L), (10, 10L)
+      // right: (6, 6L), (8, 8L), (10, 10L)
+      //
+      // states evicted
+      // left: nothing (it waits for 5 seconds more than watermark due to join condition)
+      // right: (2, 2L), (4, 4L)
+      assertNumStateRows(total = 13, updated = 8),
+      AddData(inputStream, (11, 11L), (12, 12L), (13, 13L), (14, 14L), (15, 15L)),
+      CheckNewAnswer(Row(12, 12L, 12, 12L), Row(14, 14L, 14, 14L), Row(1, 1L, null, null),
+        Row(3, 3L, null, null)),
+      // batch 3 - global watermark = 9
+      // states
+      // left: (4, 4L), (5, 5L), (6, 6L), (7, 7L), (8, 8L), (9, 9L), (10, 10L), (11, 11L),
+      //       (12, 12L), (13, 13L), (14, 14L), (15, 15L)
+      // right: (10, 10L), (12, 12L), (14, 14L)
+      //
+      // states evicted
+      // left: (1, 1L), (2, 2L), (3, 3L)
+      // right: (6, 6L), (8, 8L)
+      assertNumStateRows(total = 15, updated = 7)
+    )
+  }
+}
+
 class StreamingLeftSemiJoinSuite extends StreamingJoinSuite {
 
   import testImplicits._

From a4788ee8c61e1373e6eded41bb57d84c68149968 Mon Sep 17 00:00:00 2001
From: Cheng Su <chengsu@fb.com>
Date: Wed, 2 Dec 2020 15:28:16 +0900
Subject: [PATCH 0633/1009] [MINOR][SS] Rename auxiliary protected methods in
 StreamingJoinSuite

### What changes were proposed in this pull request?

Per request from https://github.com/apache/spark/pull/30395#issuecomment-735028698, here we remove `Windowed` from methods names `setupWindowedJoinWithRangeCondition` and `setupWindowedSelfJoin` as they don't join on time window.

### Why are the changes needed?

There's no such official name for `windowed join`, so this is to help avoid confusion for future developers.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing unit tests.

Closes #30563 from c21/stream-minor.

Authored-by: Cheng Su <chengsu@fb.com>
Signed-off-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
---
 .../spark/sql/streaming/StreamingJoinSuite.scala | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
index 476abcbf5c241..d264886c8cf46 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
@@ -136,7 +136,7 @@ abstract class StreamingJoinSuite
     (leftInput, rightInput, select)
   }
 
-  protected def setupWindowedJoinWithRangeCondition(joinType: String)
+  protected def setupJoinWithRangeCondition(joinType: String)
     : (MemoryStream[(Int, Int)], MemoryStream[(Int, Int)], DataFrame) = {
 
     val leftInput = MemoryStream[(Int, Int)]
@@ -167,7 +167,7 @@ abstract class StreamingJoinSuite
     (leftInput, rightInput, select)
   }
 
-  protected def setupWindowedSelfJoin(joinType: String)
+  protected def setupSelfJoin(joinType: String)
     : (MemoryStream[(Int, Long)], DataFrame) = {
 
     val inputStream = MemoryStream[(Int, Long)]
@@ -750,7 +750,7 @@ class StreamingOuterJoinSuite extends StreamingJoinSuite {
     ("right_outer", Row(null, 2, null, 5))
   ).foreach { case (joinType: String, outerResult) =>
     test(s"${joinType.replaceAllLiterally("_", " ")} with watermark range condition") {
-      val (leftInput, rightInput, joined) = setupWindowedJoinWithRangeCondition(joinType)
+      val (leftInput, rightInput, joined) = setupJoinWithRangeCondition(joinType)
 
       testStream(joined)(
         AddData(leftInput, (1, 5), (3, 5)),
@@ -830,7 +830,7 @@ class StreamingOuterJoinSuite extends StreamingJoinSuite {
   }
 
   test("SPARK-26187 self left outer join should not return outer nulls for already matched rows") {
-    val (inputStream, query) = setupWindowedSelfJoin("left_outer")
+    val (inputStream, query) = setupSelfJoin("left_outer")
 
     testStream(query)(
       AddData(inputStream, (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L)),
@@ -1190,7 +1190,7 @@ class StreamingFullOuterJoinSuite extends StreamingJoinSuite {
   }
 
   test("full outer join with watermark range condition") {
-    val (leftInput, rightInput, joined) = setupWindowedJoinWithRangeCondition("full_outer")
+    val (leftInput, rightInput, joined) = setupJoinWithRangeCondition("full_outer")
 
     testStream(joined)(
       AddData(leftInput, (1, 5), (3, 5)),
@@ -1236,7 +1236,7 @@ class StreamingFullOuterJoinSuite extends StreamingJoinSuite {
   }
 
   test("self full outer join") {
-    val (inputStream, query) = setupWindowedSelfJoin("full_outer")
+    val (inputStream, query) = setupSelfJoin("full_outer")
 
     testStream(query)(
       AddData(inputStream, (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L)),
@@ -1394,7 +1394,7 @@ class StreamingLeftSemiJoinSuite extends StreamingJoinSuite {
   }
 
   test("left semi join with watermark range condition") {
-    val (leftInput, rightInput, joined) = setupWindowedJoinWithRangeCondition("left_semi")
+    val (leftInput, rightInput, joined) = setupJoinWithRangeCondition("left_semi")
 
     testStream(joined)(
       AddData(leftInput, (1, 5), (3, 5)),
@@ -1439,7 +1439,7 @@ class StreamingLeftSemiJoinSuite extends StreamingJoinSuite {
   }
 
   test("self left semi join") {
-    val (inputStream, query) = setupWindowedSelfJoin("left_semi")
+    val (inputStream, query) = setupSelfJoin("left_semi")
 
     testStream(query)(
       AddData(inputStream, (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L)),

From 290aa021796139e503454d315e5cd350f836ab42 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Wed, 2 Dec 2020 18:23:48 +0900
Subject: [PATCH 0634/1009] [SPARK-33618][CORE] Use hadoop-client instead of
 hadoop-client-api to make hadoop-aws work

### What changes were proposed in this pull request?

This reverts commit SPARK-33212 (cb3fa6c9368e64184a5f7b19688181d11de9511c) mostly with three exceptions:
1. `SparkSubmitUtils` was updated recently by SPARK-33580
2. `resource-managers/yarn/pom.xml` was updated recently by SPARK-33104 to add `hadoop-yarn-server-resourcemanager` test dependency.
3. Adjust `com.fasterxml.jackson.module:jackson-module-jaxb-annotations` dependency in K8s module which is updated recently by SPARK-33471.

### Why are the changes needed?

According to [HADOOP-16080](https://issues.apache.org/jira/browse/HADOOP-16080) since Apache Hadoop 3.1.1, `hadoop-aws` doesn't work with `hadoop-client-api`. It fails at write operation like the following.

**1. Spark distribution with `-Phadoop-cloud`**

```scala
$ bin/spark-shell --conf spark.hadoop.fs.s3a.access.key=$AWS_ACCESS_KEY_ID --conf spark.hadoop.fs.s3a.secret.key=$AWS_SECRET_ACCESS_KEY
20/11/30 23:01:24 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
Spark context available as 'sc' (master = local[*], app id = local-1606806088715).
Spark session available as 'spark'.
Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /___/ .__/\_,_/_/ /_/\_\   version 3.1.0-SNAPSHOT
      /_/

Using Scala version 2.12.10 (OpenJDK 64-Bit Server VM, Java 1.8.0_272)
Type in expressions to have them evaluated.
Type :help for more information.

scala> spark.read.parquet("s3a://dongjoon/users.parquet").show
20/11/30 23:01:34 WARN MetricsConfig: Cannot locate configuration: tried hadoop-metrics2-s3a-file-system.properties,hadoop-metrics2.properties
+------+--------------+----------------+
|  name|favorite_color|favorite_numbers|
+------+--------------+----------------+
|Alyssa|          null|  [3, 9, 15, 20]|
|   Ben|           red|              []|
+------+--------------+----------------+

scala> Seq(1).toDF.write.parquet("s3a://dongjoon/out.parquet")
20/11/30 23:02:14 ERROR Executor: Exception in task 0.0 in stage 2.0 (TID 2)/ 1]
java.lang.NoSuchMethodError: org.apache.hadoop.util.SemaphoredDelegatingExecutor.<init>(Lcom/google/common/util/concurrent/ListeningExecutorService;IZ)V
```

**2. Spark distribution without `-Phadoop-cloud`**
```scala
$ bin/spark-shell --conf spark.hadoop.fs.s3a.access.key=$AWS_ACCESS_KEY_ID --conf spark.hadoop.fs.s3a.secret.key=$AWS_SECRET_ACCESS_KEY -c spark.eventLog.enabled=true -c spark.eventLog.dir=s3a://dongjoon/spark-events/ --packages org.apache.hadoop:hadoop-aws:3.2.0,org.apache.hadoop:hadoop-common:3.2.0
...
java.lang.NoSuchMethodError: org.apache.hadoop.util.SemaphoredDelegatingExecutor.<init>(Lcom/google/common/util/concurrent/ListeningExecutorService;IZ)V
  at org.apache.hadoop.fs.s3a.S3AFileSystem.create(S3AFileSystem.java:772)
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CI.

Closes #30508 from dongjoon-hyun/SPARK-33212-REVERT.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 common/network-yarn/pom.xml                   |  8 +--
 core/pom.xml                                  | 16 +----
 dev/deps/spark-deps-hadoop-2.7-hive-2.3       |  3 +-
 dev/deps/spark-deps-hadoop-3.2-hive-2.3       | 52 +++++++++++++-
 external/kafka-0-10-assembly/pom.xml          |  8 +--
 external/kafka-0-10-sql/pom.xml               |  4 --
 external/kafka-0-10-token-provider/pom.xml    |  5 --
 external/kinesis-asl-assembly/pom.xml         |  8 +--
 hadoop-cloud/pom.xml                          |  7 +-
 launcher/pom.xml                              |  9 +--
 pom.xml                                       | 57 +++-------------
 resource-managers/kubernetes/core/pom.xml     |  9 +++
 resource-managers/yarn/pom.xml                | 67 +++++++------------
 .../spark/deploy/yarn/ApplicationMaster.scala |  6 +-
 .../deploy/yarn/BaseYarnClusterSuite.scala    | 10 ---
 sql/catalyst/pom.xml                          |  4 --
 sql/hive/pom.xml                              |  5 --
 .../hive/client/IsolatedClientLoader.scala    | 19 +-----
 18 files changed, 107 insertions(+), 190 deletions(-)

diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 9938e5d769e12..0225db81925c5 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -65,13 +65,7 @@
     <!-- Provided dependencies -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>${hadoop-client-api.artifact}</artifactId>
-      <version>${hadoop.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>${hadoop-client-runtime.artifact}</artifactId>
-      <version>${hadoop.version}</version>
+      <artifactId>hadoop-client</artifactId>
     </dependency>
     <dependency>
       <groupId>org.slf4j</groupId>
diff --git a/core/pom.xml b/core/pom.xml
index 9d2bf7dbe57a9..ce6f6ed9c7051 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -66,13 +66,7 @@
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>${hadoop-client-api.artifact}</artifactId>
-      <version>${hadoop.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>${hadoop-client-runtime.artifact}</artifactId>
-      <version>${hadoop.version}</version>
+      <artifactId>hadoop-client</artifactId>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
@@ -183,14 +177,6 @@
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-text</artifactId>
     </dependency>
-    <dependency>
-      <groupId>commons-io</groupId>
-      <artifactId>commons-io</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>commons-collections</groupId>
-      <artifactId>commons-collections</artifactId>
-    </dependency>
     <dependency>
       <groupId>com.google.code.findbugs</groupId>
       <artifactId>jsr305</artifactId>
diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index 8802220726f78..a19558bc2a5e3 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -127,7 +127,7 @@ javax.inject/1//javax.inject-1.jar
 javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar
 javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar
 javolution/5.5.1//javolution-5.5.1.jar
-jaxb-api/2.2.11//jaxb-api-2.2.11.jar
+jaxb-api/2.2.2//jaxb-api-2.2.2.jar
 jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar
 jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar
 jdo-api/3.0.1//jdo-api-3.0.1.jar
@@ -226,6 +226,7 @@ spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar
 spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar
 spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar
 spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar
+stax-api/1.0-2//stax-api-1.0-2.jar
 stax-api/1.0.1//stax-api-1.0.1.jar
 stream/2.9.6//stream-2.9.6.jar
 super-csv/2.2.0//super-csv-2.2.0.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index d45eeea0ee92b..24283224dd37d 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -3,12 +3,14 @@ JLargeArrays/1.5//JLargeArrays-1.5.jar
 JTransforms/3.1//JTransforms-3.1.jar
 RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar
 ST4/4.0.4//ST4-4.0.4.jar
+accessors-smart/1.2//accessors-smart-1.2.jar
 activation/1.1.1//activation-1.1.1.jar
 aircompressor/0.10//aircompressor-0.10.jar
 algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar
 antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
 antlr4-runtime/4.8-1//antlr4-runtime-4.8-1.jar
 aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar
+aopalliance/1.0//aopalliance-1.0.jar
 arpack_combined_all/0.1//arpack_combined_all-0.1.jar
 arrow-format/2.0.0//arrow-format-2.0.0.jar
 arrow-memory-core/2.0.0//arrow-memory-core-2.0.0.jar
@@ -25,12 +27,15 @@ breeze_2.12/1.0//breeze_2.12-1.0.jar
 cats-kernel_2.12/2.0.0-M4//cats-kernel_2.12-2.0.0-M4.jar
 chill-java/0.9.5//chill-java-0.9.5.jar
 chill_2.12/0.9.5//chill_2.12-0.9.5.jar
+commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar
 commons-cli/1.2//commons-cli-1.2.jar
 commons-codec/1.10//commons-codec-1.10.jar
 commons-collections/3.2.2//commons-collections-3.2.2.jar
 commons-compiler/3.0.16//commons-compiler-3.0.16.jar
 commons-compress/1.20//commons-compress-1.20.jar
+commons-configuration2/2.1.1//commons-configuration2-2.1.1.jar
 commons-crypto/1.1.0//commons-crypto-1.1.0.jar
+commons-daemon/1.0.13//commons-daemon-1.0.13.jar
 commons-dbcp/1.4//commons-dbcp-1.4.jar
 commons-httpclient/3.1//commons-httpclient-3.1.jar
 commons-io/2.5//commons-io-2.5.jar
@@ -50,13 +55,30 @@ datanucleus-api-jdo/4.2.4//datanucleus-api-jdo-4.2.4.jar
 datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar
 datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar
 derby/10.12.1.1//derby-10.12.1.1.jar
+dnsjava/2.1.7//dnsjava-2.1.7.jar
 dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar
+ehcache/3.3.1//ehcache-3.3.1.jar
 flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar
 generex/1.0.2//generex-1.0.2.jar
+geronimo-jcache_1.0_spec/1.0-alpha-1//geronimo-jcache_1.0_spec-1.0-alpha-1.jar
 gson/2.2.4//gson-2.2.4.jar
 guava/14.0.1//guava-14.0.1.jar
-hadoop-client-api/3.2.0//hadoop-client-api-3.2.0.jar
-hadoop-client-runtime/3.2.0//hadoop-client-runtime-3.2.0.jar
+guice-servlet/4.0//guice-servlet-4.0.jar
+guice/4.0//guice-4.0.jar
+hadoop-annotations/3.2.0//hadoop-annotations-3.2.0.jar
+hadoop-auth/3.2.0//hadoop-auth-3.2.0.jar
+hadoop-client/3.2.0//hadoop-client-3.2.0.jar
+hadoop-common/3.2.0//hadoop-common-3.2.0.jar
+hadoop-hdfs-client/3.2.0//hadoop-hdfs-client-3.2.0.jar
+hadoop-mapreduce-client-common/3.2.0//hadoop-mapreduce-client-common-3.2.0.jar
+hadoop-mapreduce-client-core/3.2.0//hadoop-mapreduce-client-core-3.2.0.jar
+hadoop-mapreduce-client-jobclient/3.2.0//hadoop-mapreduce-client-jobclient-3.2.0.jar
+hadoop-yarn-api/3.2.0//hadoop-yarn-api-3.2.0.jar
+hadoop-yarn-client/3.2.0//hadoop-yarn-client-3.2.0.jar
+hadoop-yarn-common/3.2.0//hadoop-yarn-common-3.2.0.jar
+hadoop-yarn-registry/3.2.0//hadoop-yarn-registry-3.2.0.jar
+hadoop-yarn-server-common/3.2.0//hadoop-yarn-server-common-3.2.0.jar
+hadoop-yarn-server-web-proxy/3.2.0//hadoop-yarn-server-web-proxy-3.2.0.jar
 hive-beeline/2.3.7//hive-beeline-2.3.7.jar
 hive-cli/2.3.7//hive-cli-2.3.7.jar
 hive-common/2.3.7//hive-common-2.3.7.jar
@@ -86,6 +108,8 @@ jackson-core/2.10.0//jackson-core-2.10.0.jar
 jackson-databind/2.10.0//jackson-databind-2.10.0.jar
 jackson-dataformat-yaml/2.10.0//jackson-dataformat-yaml-2.10.0.jar
 jackson-datatype-jsr310/2.11.2//jackson-datatype-jsr310-2.11.2.jar
+jackson-jaxrs-base/2.9.5//jackson-jaxrs-base-2.9.5.jar
+jackson-jaxrs-json-provider/2.9.5//jackson-jaxrs-json-provider-2.9.5.jar
 jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar
 jackson-module-jaxb-annotations/2.10.0//jackson-module-jaxb-annotations-2.10.0.jar
 jackson-module-paranamer/2.10.0//jackson-module-paranamer-2.10.0.jar
@@ -98,11 +122,13 @@ jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar
 jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar
 janino/3.0.16//janino-3.0.16.jar
 javassist/3.25.0-GA//javassist-3.25.0-GA.jar
+javax.inject/1//javax.inject-1.jar
 javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar
 javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar
 javolution/5.5.1//javolution-5.5.1.jar
 jaxb-api/2.2.11//jaxb-api-2.2.11.jar
 jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar
+jcip-annotations/1.0-1//jcip-annotations-1.0-1.jar
 jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar
 jdo-api/3.0.1//jdo-api-3.0.1.jar
 jersey-client/2.30//jersey-client-2.30.jar
@@ -116,14 +142,30 @@ jline/2.14.6//jline-2.14.6.jar
 joda-time/2.10.5//joda-time-2.10.5.jar
 jodd-core/3.5.2//jodd-core-3.5.2.jar
 jpam/1.1//jpam-1.1.jar
+json-smart/2.3//json-smart-2.3.jar
 json/1.8//json-1.8.jar
 json4s-ast_2.12/3.7.0-M5//json4s-ast_2.12-3.7.0-M5.jar
 json4s-core_2.12/3.7.0-M5//json4s-core_2.12-3.7.0-M5.jar
 json4s-jackson_2.12/3.7.0-M5//json4s-jackson_2.12-3.7.0-M5.jar
 json4s-scalap_2.12/3.7.0-M5//json4s-scalap_2.12-3.7.0-M5.jar
+jsp-api/2.1//jsp-api-2.1.jar
 jsr305/3.0.0//jsr305-3.0.0.jar
 jta/1.1//jta-1.1.jar
 jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar
+kerb-admin/1.0.1//kerb-admin-1.0.1.jar
+kerb-client/1.0.1//kerb-client-1.0.1.jar
+kerb-common/1.0.1//kerb-common-1.0.1.jar
+kerb-core/1.0.1//kerb-core-1.0.1.jar
+kerb-crypto/1.0.1//kerb-crypto-1.0.1.jar
+kerb-identity/1.0.1//kerb-identity-1.0.1.jar
+kerb-server/1.0.1//kerb-server-1.0.1.jar
+kerb-simplekdc/1.0.1//kerb-simplekdc-1.0.1.jar
+kerb-util/1.0.1//kerb-util-1.0.1.jar
+kerby-asn1/1.0.1//kerby-asn1-1.0.1.jar
+kerby-config/1.0.1//kerby-config-1.0.1.jar
+kerby-pkix/1.0.1//kerby-pkix-1.0.1.jar
+kerby-util/1.0.1//kerby-util-1.0.1.jar
+kerby-xdr/1.0.1//kerby-xdr-1.0.1.jar
 kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
 kubernetes-client/4.12.0//kubernetes-client-4.12.0.jar
 kubernetes-model-admissionregistration/4.12.0//kubernetes-model-admissionregistration-4.12.0.jar
@@ -161,7 +203,9 @@ metrics-json/4.1.1//metrics-json-4.1.1.jar
 metrics-jvm/4.1.1//metrics-jvm-4.1.1.jar
 minlog/1.3.0//minlog-1.3.0.jar
 netty-all/4.1.51.Final//netty-all-4.1.51.Final.jar
+nimbus-jose-jwt/4.41.1//nimbus-jose-jwt-4.41.1.jar
 objenesis/2.6//objenesis-2.6.jar
+okhttp/2.7.5//okhttp-2.7.5.jar
 okhttp/3.12.12//okhttp-3.12.12.jar
 okio/1.14.0//okio-1.14.0.jar
 opencsv/2.3//opencsv-2.3.jar
@@ -180,6 +224,7 @@ parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar
 protobuf-java/2.5.0//protobuf-java-2.5.0.jar
 py4j/0.10.9//py4j-0.10.9.jar
 pyrolite/4.30//pyrolite-4.30.jar
+re2j/1.1//re2j-1.1.jar
 scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar
 scala-compiler/2.12.10//scala-compiler-2.12.10.jar
 scala-library/2.12.10//scala-library-2.12.10.jar
@@ -197,12 +242,15 @@ spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar
 spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar
 spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar
 stax-api/1.0.1//stax-api-1.0.1.jar
+stax2-api/3.1.4//stax2-api-3.1.4.jar
 stream/2.9.6//stream-2.9.6.jar
 super-csv/2.2.0//super-csv-2.2.0.jar
 threeten-extra/1.5.0//threeten-extra-1.5.0.jar
+token-provider/1.0.1//token-provider-1.0.1.jar
 transaction-api/1.1//transaction-api-1.1.jar
 univocity-parsers/2.9.0//univocity-parsers-2.9.0.jar
 velocity/1.5//velocity-1.5.jar
+woodstox-core/5.0.3//woodstox-core-5.0.3.jar
 xbean-asm7-shaded/4.15//xbean-asm7-shaded-4.15.jar
 xz/1.5//xz-1.5.jar
 zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index b1e306c499385..d9d9fb7f55c77 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -71,15 +71,9 @@
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>${hadoop-client-api.artifact}</artifactId>
-      <version>${hadoop.version}</version>
+      <artifactId>hadoop-client</artifactId>
       <scope>provided</scope>
     </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>${hadoop-client-runtime.artifact}</artifactId>
-      <version>${hadoop.version}</version>
-    </dependency>
     <dependency>
       <groupId>org.apache.avro</groupId>
       <artifactId>avro-mapred</artifactId>
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 06a6bef005e69..95a99ac88412e 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -79,10 +79,6 @@
       <artifactId>kafka-clients</artifactId>
       <version>${kafka.version}</version>
     </dependency>
-    <dependency>
-      <groupId>com.google.code.findbugs</groupId>
-      <artifactId>jsr305</artifactId>
-    </dependency>
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-pool2</artifactId>
diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml
index 1b0d6d322917f..941946f30e96f 100644
--- a/external/kafka-0-10-token-provider/pom.xml
+++ b/external/kafka-0-10-token-provider/pom.xml
@@ -58,11 +58,6 @@
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>
     </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>${hadoop-client-runtime.artifact}</artifactId>
-      <scope>${hadoop.deps.scope}</scope>
-    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index 5a49358a84241..76ee5bb7b2f85 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -91,15 +91,9 @@
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>${hadoop-client-api.artifact}</artifactId>
-      <version>${hadoop.version}</version>
+      <artifactId>hadoop-client</artifactId>
       <scope>provided</scope>
     </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>${hadoop-client-runtime.artifact}</artifactId>
-      <version>${hadoop.version}</version>
-    </dependency>
     <dependency>
       <groupId>org.apache.avro</groupId>
       <artifactId>avro-ipc</artifactId>
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index a5642a5a68fe4..8689e0b8a9ea8 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -58,15 +58,10 @@
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>${hadoop-client-api.artifact}</artifactId>
+      <artifactId>hadoop-client</artifactId>
       <version>${hadoop.version}</version>
       <scope>provided</scope>
     </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>${hadoop-client-runtime.artifact}</artifactId>
-      <version>${hadoop.version}</version>
-    </dependency>
     <!--
       the AWS module pulls in jackson; its transitive dependencies can create
       intra-jackson-module version problems.
diff --git a/launcher/pom.xml b/launcher/pom.xml
index ebfd77a67a529..a2550ac939e83 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -81,14 +81,7 @@
     <!-- Not needed by the test code, but referenced by SparkSubmit which is used by the tests. -->
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>${hadoop-client-api.artifact}</artifactId>
-      <version>${hadoop.version}</version>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>${hadoop-client-runtime.artifact}</artifactId>
-      <version>${hadoop.version}</version>
+      <artifactId>hadoop-client</artifactId>
       <scope>test</scope>
     </dependency>
   </dependencies>
diff --git a/pom.xml b/pom.xml
index f0ad9b0167c32..4d6e3bbc95378 100644
--- a/pom.xml
+++ b/pom.xml
@@ -244,15 +244,6 @@
     <parquet.deps.scope>compile</parquet.deps.scope>
     <parquet.test.deps.scope>test</parquet.test.deps.scope>
 
-    <!--
-      These default to Hadoop 3.x shaded client/minicluster jars, but are switched to hadoop-client
-      when the Hadoop profile is hadoop-2.7, because these are only available in 3.x. Note that,
-      as result we have to include the same hadoop-client dependency multiple times in hadoop-2.7.
-    -->
-    <hadoop-client-api.artifact>hadoop-client-api</hadoop-client-api.artifact>
-    <hadoop-client-runtime.artifact>hadoop-client-runtime</hadoop-client-runtime.artifact>
-    <hadoop-client-minicluster.artifact>hadoop-client-minicluster</hadoop-client-minicluster.artifact>
-
     <!--
       Overridable test home. So that you can call individual pom files directly without
       things breaking.
@@ -866,11 +857,6 @@
         <artifactId>javax.ws.rs-api</artifactId>
         <version>2.0.1</version>
       </dependency>
-      <dependency>
-        <groupId>javax.xml.bind</groupId>
-        <artifactId>jaxb-api</artifactId>
-        <version>2.2.11</version>
-      </dependency>
       <dependency>
         <groupId>org.scalanlp</groupId>
         <artifactId>breeze_${scala.binary.version}</artifactId>
@@ -1079,26 +1065,6 @@
         <version>${curator.version}</version>
         <scope>test</scope>
       </dependency>
-      <!-- Hadoop 3.x dependencies -->
-      <dependency>
-        <groupId>org.apache.hadoop</groupId>
-        <artifactId>hadoop-client-api</artifactId>
-        <version>${hadoop.version}</version>
-        <scope>${hadoop.deps.scope}</scope>
-      </dependency>
-      <dependency>
-        <groupId>org.apache.hadoop</groupId>
-        <artifactId>hadoop-client-runtime</artifactId>
-        <version>${hadoop.version}</version>
-        <scope>${hadoop.deps.scope}</scope>
-      </dependency>
-      <dependency>
-        <groupId>org.apache.hadoop</groupId>
-        <artifactId>hadoop-client-minicluster</artifactId>
-        <version>${yarn.version}</version>
-        <scope>test</scope>
-      </dependency>
-      <!-- End of Hadoop 3.x dependencies -->
       <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-client</artifactId>
@@ -1688,14 +1654,6 @@
             <groupId>org.apache.ant</groupId>
             <artifactId>ant</artifactId>
           </exclusion>
-          <exclusion>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-common</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-auth</artifactId>
-          </exclusion>
           <exclusion>
             <groupId>org.apache.zookeeper</groupId>
             <artifactId>zookeeper</artifactId>
@@ -2460,6 +2418,17 @@
                 </rules>
               </configuration>
             </execution>
+            <execution>
+              <id>enforce-no-duplicate-dependencies</id>
+              <goals>
+                <goal>enforce</goal>
+              </goals>
+              <configuration>
+                <rules>
+                  <banDuplicatePomDependencyVersions/>
+                </rules>
+              </configuration>
+            </execution>
           </executions>
         </plugin>
 	<plugin>
@@ -2919,7 +2888,6 @@
         <artifactId>maven-shade-plugin</artifactId>
         <configuration>
           <shadedArtifactAttached>false</shadedArtifactAttached>
-          <createDependencyReducedPom>false</createDependencyReducedPom>
           <artifactSet>
             <includes>
               <include>org.spark-project.spark:unused</include>
@@ -3181,9 +3149,6 @@
         <hadoop.version>2.7.4</hadoop.version>
         <curator.version>2.7.1</curator.version>
         <commons-io.version>2.4</commons-io.version>
-        <hadoop-client-api.artifact>hadoop-client</hadoop-client-api.artifact>
-        <hadoop-client-runtime.artifact>hadoop-client</hadoop-client-runtime.artifact>
-        <hadoop-client-minicluster.artifact>hadoop-client</hadoop-client-minicluster.artifact>
       </properties>
     </profile>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index edeb95fdba684..18e1c65e2e932 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -63,6 +63,10 @@
           <groupId>com.fasterxml.jackson.core</groupId>
           <artifactId>*</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>com.fasterxml.jackson.module</groupId>
+          <artifactId>jackson-module-jaxb-annotations</artifactId>
+        </exclusion>
         <exclusion>
           <groupId>com.fasterxml.jackson.dataformat</groupId>
           <artifactId>jackson-dataformat-yaml</artifactId>
@@ -81,6 +85,11 @@
       <artifactId>jackson-dataformat-yaml</artifactId>
       <version>${fasterxml.jackson.version}</version>
     </dependency>
+    <dependency>
+      <groupId>com.fasterxml.jackson.module</groupId>
+      <artifactId>jackson-module-jaxb-annotations</artifactId>
+      <version>${fasterxml.jackson.version}</version>
+    </dependency>
 
     <!-- Explicitly depend on shaded dependencies from the parent, since shaded deps aren't transitive -->
     <dependency>
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index f6d6ddccc99c3..e9122ce202723 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -40,42 +40,6 @@
         <spark.yarn.isHadoopProvided>true</spark.yarn.isHadoopProvided>
       </properties>
     </profile>
-    <profile>
-      <id>hadoop-2.7</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-yarn-api</artifactId>
-        </dependency>
-        <dependency>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-yarn-common</artifactId>
-        </dependency>
-        <dependency>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-yarn-server-web-proxy</artifactId>
-        </dependency>
-        <dependency>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-yarn-client</artifactId>
-        </dependency>
-        <dependency>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-yarn-server-tests</artifactId>
-          <classifier>tests</classifier>
-          <scope>test</scope>
-        </dependency>
-        <!--
-          Hack to exclude org.apache.hadoop:hadoop-yarn-server-resourcemanager:jar:tests.
-          See the parent pom.xml for more details.
-        -->
-        <dependency>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
-          <scope>test</scope>
-        </dependency>
-      </dependencies>
-    </profile>
   </profiles>
 
   <dependencies>
@@ -105,20 +69,23 @@
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>${hadoop-client-api.artifact}</artifactId>
-      <version>${hadoop.version}</version>
+      <artifactId>hadoop-yarn-api</artifactId>
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>${hadoop-client-runtime.artifact}</artifactId>
-      <version>${hadoop.version}</version>
-      <scope>${hadoop.deps.scope}</scope>
+      <artifactId>hadoop-yarn-common</artifactId>
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
-      <artifactId>${hadoop-client-minicluster.artifact}</artifactId>
-      <version>${hadoop.version}</version>
-      <scope>test</scope>
+      <artifactId>hadoop-yarn-server-web-proxy</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-client</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-client</artifactId>
     </dependency>
 
     <dependency>
@@ -175,6 +142,18 @@
       <scope>test</scope>
     </dependency>
 
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-server-tests</artifactId>
+      <classifier>tests</classifier>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
+      <scope>test</scope>
+    </dependency>
+
     <dependency>
       <groupId>org.mockito</groupId>
       <artifactId>mockito-core</artifactId>
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index e23773229c560..be9a88ca9b1d6 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -19,7 +19,7 @@ package org.apache.spark.deploy.yarn
 
 import java.io.{File, IOException}
 import java.lang.reflect.{InvocationTargetException, Modifier}
-import java.net.{URI, URL, URLEncoder}
+import java.net.{URI, URL}
 import java.security.PrivilegedExceptionAction
 import java.util.concurrent.{TimeoutException, TimeUnit}
 
@@ -36,6 +36,7 @@ import org.apache.hadoop.yarn.api._
 import org.apache.hadoop.yarn.api.records._
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException
+import org.apache.hadoop.yarn.server.webproxy.ProxyUriUtils
 import org.apache.hadoop.yarn.util.{ConverterUtils, Records}
 
 import org.apache.spark._
@@ -307,8 +308,7 @@ private[spark] class ApplicationMaster(
       // The client-mode AM doesn't listen for incoming connections, so report an invalid port.
       registerAM(Utils.localHostName, -1, sparkConf,
         sparkConf.getOption("spark.driver.appUIAddress"), appAttemptId)
-      val encodedAppId = URLEncoder.encode(appAttemptId.getApplicationId.toString, "UTF-8")
-      addAmIpFilter(Some(driverRef), s"/proxy/$encodedAppId")
+      addAmIpFilter(Some(driverRef), ProxyUriUtils.getPath(appAttemptId.getApplicationId))
       createAllocator(driverRef, sparkConf, clientRpcEnv, appAttemptId, cachedResourcesConf)
       reporterThread.join()
     } catch {
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
index a813b9913f23b..20f5339c46fef 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
@@ -80,16 +80,6 @@ abstract class BaseYarnClusterSuite
     yarnConf.set("yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage",
       "100.0")
 
-    // capacity-scheduler.xml is missing in hadoop-client-minicluster so this is a workaround
-    yarnConf.set("yarn.scheduler.capacity.root.queues", "default")
-    yarnConf.setInt("yarn.scheduler.capacity.root.default.capacity", 100)
-    yarnConf.setFloat("yarn.scheduler.capacity.root.default.user-limit-factor", 1)
-    yarnConf.setInt("yarn.scheduler.capacity.root.default.maximum-capacity", 100)
-    yarnConf.set("yarn.scheduler.capacity.root.default.state", "RUNNING")
-    yarnConf.set("yarn.scheduler.capacity.root.default.acl_submit_applications", "*")
-    yarnConf.set("yarn.scheduler.capacity.root.default.acl_administer_queue", "*")
-    yarnConf.setInt("yarn.scheduler.capacity.node-locality-delay", -1)
-
     yarnCluster = new MiniYARNCluster(getClass().getName(), 1, 1, 1)
     yarnCluster.init(yarnConf)
     yarnCluster.start()
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index af976fa1fa983..6b79eb722fcdd 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -104,10 +104,6 @@
       <groupId>org.antlr</groupId>
       <artifactId>antlr4-runtime</artifactId>
     </dependency>
-    <dependency>
-      <groupId>javax.xml.bind</groupId>
-      <artifactId>jaxb-api</artifactId>
-    </dependency>
     <dependency>
       <groupId>commons-codec</groupId>
       <artifactId>commons-codec</artifactId>
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 4fca6264c0594..0453094cf8b7b 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -162,11 +162,6 @@
       <groupId>org.datanucleus</groupId>
       <artifactId>datanucleus-core</artifactId>
     </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>${hadoop-client-runtime.artifact}</artifactId>
-      <scope>${hadoop.deps.scope}</scope>
-    </dependency>
     <dependency>
       <groupId>org.apache.thrift</groupId>
       <artifactId>libthrift</artifactId>
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index 9663e03ee6a74..c0758dcdfc879 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -112,24 +112,11 @@ private[hive] object IsolatedClientLoader extends Logging {
       hadoopVersion: String,
       ivyPath: Option[String],
       remoteRepos: String): Seq[URL] = {
-    val hadoopJarNames = if (hadoopVersion.startsWith("3")) {
-      Seq(s"org.apache.hadoop:hadoop-client-api:$hadoopVersion",
-        s"org.apache.hadoop:hadoop-client-runtime:$hadoopVersion")
-    } else {
-      Seq(s"org.apache.hadoop:hadoop-client:$hadoopVersion")
-    }
     val hiveArtifacts = version.extraDeps ++
       Seq("hive-metastore", "hive-exec", "hive-common", "hive-serde")
         .map(a => s"org.apache.hive:$a:${version.fullVersion}") ++
-      Seq("com.google.guava:guava:14.0.1") ++ hadoopJarNames
-
-    val extraExclusions = if (hadoopVersion.startsWith("3")) {
-      // this introduced from lower version of Hive could conflict with jars in Hadoop 3.2+, so
-      // exclude here in favor of the ones in Hadoop 3.2+
-      Seq("org.apache.hadoop:hadoop-auth")
-    } else {
-      Seq.empty
-    }
+      Seq("com.google.guava:guava:14.0.1",
+        s"org.apache.hadoop:hadoop-client:$hadoopVersion")
 
     val classpath = quietly {
       SparkSubmitUtils.resolveMavenCoordinates(
@@ -137,7 +124,7 @@ private[hive] object IsolatedClientLoader extends Logging {
         SparkSubmitUtils.buildIvySettings(
           Some(remoteRepos),
           ivyPath),
-        exclusions = version.exclusions ++ extraExclusions)
+        exclusions = version.exclusions)
     }
     val allFiles = classpath.split(",").map(new File(_)).toSet
 

From 084d38b64ecbcaa9fac47ffca5604cf2a72936fc Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Wed, 2 Dec 2020 18:41:49 +0900
Subject: [PATCH 0635/1009] [SPARK-33557][CORE][MESOS][TEST] Ensure the
 relationship between STORAGE_BLOCKMANAGER_HEARTBEAT_TIMEOUT and
 NETWORK_TIMEOUT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
As described in SPARK-33557, `HeartbeatReceiver` and `MesosCoarseGrainedSchedulerBackend` will always use `Network.NETWORK_TIMEOUT.defaultValueString` as value of `STORAGE_BLOCKMANAGER_HEARTBEAT_TIMEOUT` when we configure `NETWORK_TIMEOUT` without configure `STORAGE_BLOCKMANAGER_HEARTBEAT_TIMEOUT`, this is different from the relationship described in `configuration.md`.

To fix this problem，the main change of this pr as follow:

- Remove the explicitly default value of `STORAGE_BLOCKMANAGER_HEARTBEAT_TIMEOUT`

- Use actual value of `NETWORK_TIMEOUT` as `STORAGE_BLOCKMANAGER_HEARTBEAT_TIMEOUT` when `STORAGE_BLOCKMANAGER_HEARTBEAT_TIMEOUT` not configured in `HeartbeatReceiver` and `MesosCoarseGrainedSchedulerBackend`

### Why are the changes needed?
To ensure the relationship between `NETWORK_TIMEOUT` and  `STORAGE_BLOCKMANAGER_HEARTBEAT_TIMEOUT` as we described in `configuration.md`

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

- Pass the Jenkins or GitHub Action

- Manual test configure `NETWORK_TIMEOUT` and `STORAGE_BLOCKMANAGER_HEARTBEAT_TIMEOUT` locally

Closes #30547 from LuciferYang/SPARK-33557.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala | 4 +++-
 .../scala/org/apache/spark/internal/config/package.scala     | 2 +-
 .../org/apache/spark/repl/ExecutorClassLoaderSuite.scala     | 1 +
 .../cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala   | 5 ++++-
 4 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
index 233ad884a721a..13ff075660cd7 100644
--- a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
+++ b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
@@ -80,7 +80,9 @@ private[spark] class HeartbeatReceiver(sc: SparkContext, clock: Clock)
   // executor ID -> timestamp of when the last heartbeat from this executor was received
   private val executorLastSeen = new HashMap[String, Long]
 
-  private val executorTimeoutMs = sc.conf.get(config.STORAGE_BLOCKMANAGER_HEARTBEAT_TIMEOUT)
+  private val executorTimeoutMs = sc.conf.get(
+    config.STORAGE_BLOCKMANAGER_HEARTBEAT_TIMEOUT
+  ).getOrElse(Utils.timeStringAsMs(s"${sc.conf.get(Network.NETWORK_TIMEOUT)}s"))
 
   private val checkTimeoutIntervalMs = sc.conf.get(Network.NETWORK_TIMEOUT_INTERVAL)
 
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 6639f20a068d4..f6de5e4128ca5 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -504,7 +504,7 @@ package object config {
       .version("0.7.0")
       .withAlternative("spark.storage.blockManagerSlaveTimeoutMs")
       .timeConf(TimeUnit.MILLISECONDS)
-      .createWithDefaultString(Network.NETWORK_TIMEOUT.defaultValueString)
+      .createOptional
 
   private[spark] val STORAGE_CLEANUP_FILES_AFTER_EXECUTOR_EXIT =
     ConfigBuilder("spark.storage.cleanupFilesAfterExecutorExit")
diff --git a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
index 386de19e919e6..23ea3fee2505b 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
@@ -231,6 +231,7 @@ class ExecutorClassLoaderSuite
       .setMaster("local")
       .setAppName("executor-class-loader-test")
       .set("spark.network.timeout", "11s")
+      .set("spark.network.timeoutInterval", "11s")
       .set("spark.repl.class.outputDir", tempDir1.getAbsolutePath)
     val sc = new SparkContext(conf)
     try {
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
index efcef09132f5b..6fedce61d8208 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
@@ -34,6 +34,7 @@ import org.apache.spark.{SecurityManager, SparkConf, SparkContext, SparkExceptio
 import org.apache.spark.deploy.mesos.config._
 import org.apache.spark.deploy.security.HadoopDelegationTokenManager
 import org.apache.spark.internal.config
+import org.apache.spark.internal.config.Network
 import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.launcher.{LauncherBackend, SparkAppHandle}
 import org.apache.spark.network.netty.SparkTransportConf
@@ -651,7 +652,9 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
           .registerDriverWithShuffleService(
             agent.hostname,
             externalShufflePort,
-            sc.conf.get(config.STORAGE_BLOCKMANAGER_HEARTBEAT_TIMEOUT),
+            sc.conf.get(
+              config.STORAGE_BLOCKMANAGER_HEARTBEAT_TIMEOUT
+            ).getOrElse(Utils.timeStringAsMs(s"${sc.conf.get(Network.NETWORK_TIMEOUT)}s")),
             sc.conf.get(config.EXECUTOR_HEARTBEAT_INTERVAL))
         agent.shuffleRegistered = true
       }

From 28dad1ba770e5b7f7cf542da1ae3f05975a969c6 Mon Sep 17 00:00:00 2001
From: neko <echohlne@gmail.com>
Date: Wed, 2 Dec 2020 09:24:19 -0600
Subject: [PATCH 0636/1009] [SPARK-33504][CORE] The application log in the
 Spark history server contains sensitive attributes should be redacted

### What changes were proposed in this pull request?
To make sure the sensitive attributes to be redacted in the history server log.

### Why are the changes needed?
We found the secure attributes like password  in SparkListenerJobStart and SparkListenerStageSubmitted events would not been redated, resulting in sensitive attributes can be viewd directly.
The screenshot can be viewed in the attachment of JIRA spark-33504
### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?
muntual test works well, I have also added unit testcase.

Closes #30446 from akiyamaneko/eventlog_unredact.

Authored-by: neko <echohlne@gmail.com>
Signed-off-by: Thomas Graves <tgraves@apache.org>
---
 .../scheduler/EventLoggingListener.scala      | 24 ++++++-
 .../scheduler/EventLoggingListenerSuite.scala | 64 ++++++++++++++++++-
 2 files changed, 85 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
index 1fda03f732636..d4e22d739098f 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
@@ -18,7 +18,9 @@
 package org.apache.spark.scheduler
 
 import java.net.URI
+import java.util.Properties
 
+import scala.collection.JavaConverters._
 import scala.collection.mutable
 
 import org.apache.hadoop.conf.Configuration
@@ -103,7 +105,7 @@ private[spark] class EventLoggingListener(
 
   // Events that do not trigger a flush
   override def onStageSubmitted(event: SparkListenerStageSubmitted): Unit = {
-    logEvent(event)
+    logEvent(event.copy(properties = redactProperties(event.properties)))
     if (shouldLogStageExecutorMetrics) {
       // record the peak metrics for the new stage
       liveStageExecutorMetrics.put((event.stageInfo.stageId, event.stageInfo.attemptNumber()),
@@ -156,7 +158,9 @@ private[spark] class EventLoggingListener(
     logEvent(event, flushLogger = true)
   }
 
-  override def onJobStart(event: SparkListenerJobStart): Unit = logEvent(event, flushLogger = true)
+  override def onJobStart(event: SparkListenerJobStart): Unit = {
+    logEvent(event.copy(properties = redactProperties(event.properties)), flushLogger = true)
+  }
 
   override def onJobEnd(event: SparkListenerJobEnd): Unit = logEvent(event, flushLogger = true)
 
@@ -276,6 +280,22 @@ private[spark] class EventLoggingListener(
     logWriter.stop()
   }
 
+  private def redactProperties(properties: Properties): Properties = {
+    if (properties == null) {
+      return properties
+    }
+    val redactedProperties = new Properties
+    // properties may contain some custom local properties such as stage/job description
+    // only properties in sparkConf need to be redacted.
+    val (globalProperties, localProperties) = properties.asScala.toSeq.partition {
+      case (key, _) => sparkConf.contains(key)
+    }
+    (Utils.redact(sparkConf, globalProperties) ++ localProperties).foreach {
+      case (key, value) => redactedProperties.setProperty(key, value)
+    }
+    redactedProperties
+  }
+
   private[spark] def redactEvent(
       event: SparkListenerEnvironmentUpdate): SparkListenerEnvironmentUpdate = {
     // environmentDetails maps a string descriptor to a set of properties
diff --git a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
index c4a8bcbb26a1d..7acb8451e3b38 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.scheduler
 
 import java.io.{File, InputStream}
-import java.util.Arrays
+import java.util.{Arrays, Properties}
 
 import scala.collection.immutable.Map
 import scala.collection.mutable
@@ -98,6 +98,68 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
     assert(redactedProps(key) == "*********(redacted)")
   }
 
+  test("Spark-33504 sensitive attributes redaction in properties") {
+    val (secretKey, secretPassword) = ("spark.executorEnv.HADOOP_CREDSTORE_PASSWORD",
+      "secret_password")
+    val (customKey, customValue) = ("parse_token", "secret_password")
+
+    val conf = getLoggingConf(testDirPath, None).set(secretKey, secretPassword)
+
+    val properties = new Properties()
+    properties.setProperty(secretKey, secretPassword)
+    properties.setProperty(customKey, customValue)
+
+    val logName = "properties-reaction-test"
+    val eventLogger = new EventLoggingListener(logName, None, testDirPath.toUri(), conf)
+    val listenerBus = new LiveListenerBus(conf)
+
+    val stageId = 1
+    val jobId = 1
+    val stageInfo = new StageInfo(stageId, 0, stageId.toString, 0,
+      Seq.empty, Seq.empty, "details",
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
+
+    val events = Array(SparkListenerStageSubmitted(stageInfo, properties),
+      SparkListenerJobStart(jobId, 0, Seq(stageInfo), properties))
+
+    eventLogger.start()
+    listenerBus.start(Mockito.mock(classOf[SparkContext]), Mockito.mock(classOf[MetricsSystem]))
+    listenerBus.addToEventLogQueue(eventLogger)
+    events.foreach(event => listenerBus.post(event))
+    listenerBus.stop()
+    eventLogger.stop()
+
+    val logData = EventLogFileReader.openEventLog(new Path(eventLogger.logWriter.logPath),
+      fileSystem)
+    try {
+      val lines = readLines(logData)
+      val logStart = SparkListenerLogStart(SPARK_VERSION)
+      assert(lines.size === 3)
+      assert(lines(0).contains("SparkListenerLogStart"))
+      assert(lines(1).contains("SparkListenerStageSubmitted"))
+      assert(lines(2).contains("SparkListenerJobStart"))
+
+      lines.foreach{
+        line => JsonProtocol.sparkEventFromJson(parse(line)) match {
+          case logStartEvent: SparkListenerLogStart =>
+            assert(logStartEvent == logStart)
+
+          case stageSubmittedEvent: SparkListenerStageSubmitted =>
+            assert(stageSubmittedEvent.properties.getProperty(secretKey) == "*********(redacted)")
+            assert(stageSubmittedEvent.properties.getProperty(customKey) ==  customValue)
+
+          case jobStartEvent : SparkListenerJobStart =>
+            assert(jobStartEvent.properties.getProperty(secretKey) == "*********(redacted)")
+            assert(jobStartEvent.properties.getProperty(customKey) ==  customValue)
+
+          case _ => assert(false)
+        }
+      }
+    } finally {
+      logData.close()
+    }
+  }
+
   test("Executor metrics update") {
     testStageExecutorMetricsEventLogging()
   }

From df8d3f1bf779ce1a9f3520939ab85814f09b48b7 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Wed, 2 Dec 2020 16:03:08 +0000
Subject: [PATCH 0637/1009] [SPARK-33544][SQL][FOLLOW-UP] Rename NoSideEffect
 to NoThrow and clarify the documentation more

### What changes were proposed in this pull request?

This PR is a followup of https://github.com/apache/spark/pull/30504. It proposes:

- Rename `NoSideEffect` to `NoThrow`, and use `Expression.deterministic` together where it is used.
- Clarify, in the docs in the expressions, that it means they don't throw exceptions

### Why are the changes needed?

`NoSideEffect` virtually means that `Expression.eval` does not throw an exception, and the expressions are deterministic.
It's best to be explicit so `NoThrow` was proposed -  I looked if there's a similar name to represent this concept and borrowed the name of [nothrow](https://clang.llvm.org/docs/AttributeReference.html#nothrow).
For determinism, we already have a way to note it under `Expression.deterministic`.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Manually ran the existing unittests written.

Closes #30570 from HyukjinKwon/SPARK-33544.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/complexTypeCreator.scala       | 18 ++++++++++++------
 .../sql/catalyst/optimizer/expressions.scala   |  2 +-
 .../optimizer/ConstantFoldingSuite.scala       |  2 +-
 .../InferFiltersFromGenerateSuite.scala        |  6 +++---
 4 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index f0f92e2d935f1..cb59fbda2b3b9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -31,10 +31,16 @@ import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
 /**
- * Trait to indicate the expression doesn't have any side effects. This can be used
- * to indicate its ok to optimize it out under certain circumstances.
+ * Trait to indicate the expression does not throw an exception by itself when they are evaluated.
+ * For example, UDFs, [[AssertTrue]], etc can throw an exception when they are executed.
+ * In such case, it is necessary to call [[Expression.eval]], and the optimization rule should
+ * not ignore it.
+ *
+ * This trait can be used in an optimization rule such as
+ * [[org.apache.spark.sql.catalyst.optimizer.ConstantFolding]] to fold the expressions that
+ * do not need to execute, for example, `size(array(c0, c1, c2))`.
  */
-trait NoSideEffect
+trait NoThrow
 
 /**
  * Returns an Array containing the evaluation of all children expressions.
@@ -48,7 +54,7 @@ trait NoSideEffect
   """,
   since = "1.1.0")
 case class CreateArray(children: Seq[Expression], useStringTypeWhenEmpty: Boolean)
-  extends Expression with NoSideEffect {
+  extends Expression with NoThrow {
 
   def this(children: Seq[Expression]) = {
     this(children, SQLConf.get.getConf(SQLConf.LEGACY_CREATE_EMPTY_COLLECTION_USING_STRING_TYPE))
@@ -166,7 +172,7 @@ private [sql] object GenArrayData {
   """,
   since = "2.0.0")
 case class CreateMap(children: Seq[Expression], useStringTypeWhenEmpty: Boolean)
-  extends Expression with NoSideEffect{
+  extends Expression with NoThrow {
 
   def this(children: Seq[Expression]) = {
     this(children, SQLConf.get.getConf(SQLConf.LEGACY_CREATE_EMPTY_COLLECTION_USING_STRING_TYPE))
@@ -385,7 +391,7 @@ object CreateStruct {
   """,
   since = "1.5.0")
 // scalastyle:on line.size.limit
-case class CreateNamedStruct(children: Seq[Expression]) extends Expression with NoSideEffect {
+case class CreateNamedStruct(children: Seq[Expression]) extends Expression with NoThrow {
   lazy val (nameExprs, valExprs) = children.grouped(2).map {
     case Seq(name, value) => (name, value)
   }.toList.unzip
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 4725f49340451..1b1e2ad71e7c8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -45,7 +45,7 @@ object ConstantFolding extends Rule[LogicalPlan] {
   private def hasNoSideEffect(e: Expression): Boolean = e match {
     case _: Attribute => true
     case _: Literal => true
-    case _: NoSideEffect => e.children.forall(hasNoSideEffect)
+    case _: NoThrow if e.deterministic => e.children.forall(hasNoSideEffect)
     case _ => false
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
index fd9b58a7a06aa..ae644c1110740 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
@@ -264,7 +264,7 @@ class ConstantFoldingSuite extends PlanTest {
     comparePlans(optimized, correctAnswer)
   }
 
-  test("SPARK-33544: Constant folding test with sideaffects") {
+  test("SPARK-33544: Constant folding test with side effects") {
     val originalQuery =
       testRelation
         .select('a)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala
index c6fa1bd6e415c..93a1d414ed403 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala
@@ -90,13 +90,13 @@ class InferFiltersFromGenerateSuite extends PlanTest {
 
   Seq(Explode(_), PosExplode(_)).foreach { f =>
      val createArrayExplode = f(CreateArray(Seq('c1)))
-     test("Don't infer filters from CreateArray " + createArrayExplode) {
+     test("SPARK-33544: Don't infer filters from CreateArray " + createArrayExplode) {
        val originalQuery = testRelation.generate(createArrayExplode).analyze
        val optimized = OptimizeInferAndConstantFold.execute(originalQuery)
        comparePlans(optimized, originalQuery)
      }
      val createMapExplode = f(CreateMap(Seq('c1, 'c2)))
-     test("Don't infer filters from CreateMap " + createMapExplode) {
+     test("SPARK-33544: Don't infer filters from CreateMap " + createMapExplode) {
        val originalQuery = testRelation.generate(createMapExplode).analyze
        val optimized = OptimizeInferAndConstantFold.execute(originalQuery)
        comparePlans(optimized, originalQuery)
@@ -105,7 +105,7 @@ class InferFiltersFromGenerateSuite extends PlanTest {
 
    Seq(Inline(_)).foreach { f =>
      val createArrayStructExplode = f(CreateArray(Seq(CreateStruct(Seq('c1)))))
-     test("Don't infer filters from CreateArray " + createArrayStructExplode) {
+     test("SPARK-33544: Don't infer filters from CreateArray " + createArrayStructExplode) {
        val originalQuery = testRelation.generate(createArrayStructExplode).analyze
        val optimized = OptimizeInferAndConstantFold.execute(originalQuery)
        comparePlans(optimized, originalQuery)

From 58583f7c3fdcac1232607a7ab4b0d052320ac3ea Mon Sep 17 00:00:00 2001
From: "xuewei.linxuewei" <xuewei.linxuewei@alibaba-inc.com>
Date: Wed, 2 Dec 2020 16:10:45 +0000
Subject: [PATCH 0638/1009] [SPARK-33619][SQL] Fix GetMapValueUtil code
 generation error

### What changes were proposed in this pull request?

Code Gen bug fix that introduced by SPARK-33460

```
GetMapValueUtil

s"""throw new NoSuchElementException("Key " + $eval2 + " does not exist.");"""

SHOULD BE

s"""throw new java.util.NoSuchElementException("Key " + $eval2 + " does not exist.");"""
```

And the reason why SPARK-33460 failed to detect this bug via UT,  it was because that `checkExceptionInExpression ` did not work as expect like `checkEvaluation` which will try eval expression with BOTH `CODEGEN_ONLY` and `NO_CODEGEN` mode, and in this PR, will also fix this Test bug, too.

### Why are the changes needed?

Bug Fix.

### Does this PR introduce any user-facing change?

No.

### How was this patch tested?

Add UT and Existing UT.

Closes #30560 from leanken/leanken-SPARK-33619.

Authored-by: xuewei.linxuewei <xuewei.linxuewei@alibaba-inc.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/complexTypeExtractors.scala   |  2 +-
 .../expressions/datetimeExpressions.scala     |  7 ++-
 .../expressions/intervalExpressions.scala     | 14 +++---
 .../expressions/ExpressionEvalHelper.scala    | 49 ++++++-------------
 .../ExpressionEvalHelperSuite.scala           | 25 +++++++++-
 .../IntervalExpressionsSuite.scala            | 36 +++++++-------
 .../expressions/MathExpressionsSuite.scala    |  5 +-
 7 files changed, 70 insertions(+), 68 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
index 767650d022200..ef247efbe1a04 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
@@ -394,7 +394,7 @@ trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes {
     val keyJavaType = CodeGenerator.javaType(keyType)
     nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
       val keyNotFoundBranch = if (failOnError) {
-        s"""throw new NoSuchElementException("Key " + $eval2 + " does not exist.");"""
+        s"""throw new java.util.NoSuchElementException("Key " + $eval2 + " does not exist.");"""
       } else {
         s"${ev.isNull} = true;"
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index bbf1e4657f351..424887a13cb97 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -1789,8 +1789,11 @@ private case class GetTimestamp(
   """,
   group = "datetime_funcs",
   since = "3.0.0")
-case class MakeDate(year: Expression, month: Expression, day: Expression,
-  failOnError: Boolean = SQLConf.get.ansiEnabled)
+case class MakeDate(
+    year: Expression,
+    month: Expression,
+    day: Expression,
+    failOnError: Boolean = SQLConf.get.ansiEnabled)
   extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
   def this(year: Expression, month: Expression, day: Expression) =
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
index 6219457bba994..27067e17e7f45 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
@@ -166,13 +166,13 @@ case class MakeInterval(
   extends SeptenaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
   def this(
-    years: Expression,
-    months: Expression,
-    weeks: Expression,
-    days: Expression,
-    hours: Expression,
-    mins: Expression,
-    sec: Expression) = {
+      years: Expression,
+      months: Expression,
+      weeks: Expression,
+      days: Expression,
+      hours: Expression,
+      mins: Expression,
+      sec: Expression) = {
     this(years, months, weeks, days, hours, mins, sec, SQLConf.get.ansiEnabled)
   }
   def this(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
index 842c8f3243f2a..70eb391ad6e05 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
@@ -36,7 +36,6 @@ import org.apache.spark.sql.catalyst.plans.logical.{OneRowRelation, Project}
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, MapData}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.util.Utils
 
 /**
  * A few helper functions for expression evaluation testing. Mixin this trait to use them.
@@ -160,9 +159,14 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB
       expectedErrMsg: String): Unit = {
 
     def checkException(eval: => Unit, testMode: String): Unit = {
+      val modes = Seq(CodegenObjectFactoryMode.CODEGEN_ONLY, CodegenObjectFactoryMode.NO_CODEGEN)
       withClue(s"($testMode)") {
         val errMsg = intercept[T] {
-          eval
+          for (fallbackMode <- modes) {
+            withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> fallbackMode.toString) {
+              eval
+            }
+          }
         }.getMessage
         if (errMsg == null) {
           if (expectedErrMsg != null) {
@@ -192,22 +196,6 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB
     expression.eval(inputRow)
   }
 
-  protected def generateProject(
-      generator: => Projection,
-      expression: Expression): Projection = {
-    try {
-      generator
-    } catch {
-      case e: Throwable =>
-        fail(
-          s"""
-            |Code generation of $expression failed:
-            |$e
-            |${Utils.exceptionString(e)}
-          """.stripMargin)
-    }
-  }
-
   protected def checkEvaluationWithoutCodegen(
       expression: Expression,
       expected: Any,
@@ -244,9 +232,7 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB
   protected def evaluateWithMutableProjection(
       expression: => Expression,
       inputRow: InternalRow = EmptyRow): Any = {
-    val plan = generateProject(
-      MutableProjection.create(Alias(expression, s"Optimized($expression)")() :: Nil),
-      expression)
+    val plan = MutableProjection.create(Alias(expression, s"Optimized($expression)")() :: Nil)
     plan.initialize(0)
 
     plan(inputRow).get(0, expression.dataType)
@@ -292,11 +278,9 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB
     // SPARK-16489 Explicitly doing code generation twice so code gen will fail if
     // some expression is reusing variable names across different instances.
     // This behavior is tested in ExpressionEvalHelperSuite.
-    val plan = generateProject(
-      UnsafeProjection.create(
-        Alias(expression, s"Optimized($expression)1")() ::
-          Alias(expression, s"Optimized($expression)2")() :: Nil),
-      expression)
+    val plan = UnsafeProjection.create(
+      Alias(expression, s"Optimized($expression)1")() ::
+        Alias(expression, s"Optimized($expression)2")() :: Nil)
 
     plan.initialize(0)
     plan(inputRow)
@@ -319,16 +303,13 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB
     checkEvaluationWithMutableProjection(expression, expected)
     checkEvaluationWithOptimization(expression, expected)
 
-    var plan = generateProject(
-      GenerateMutableProjection.generate(Alias(expression, s"Optimized($expression)")() :: Nil),
-      expression)
+    var plan: Projection =
+      GenerateMutableProjection.generate(Alias(expression, s"Optimized($expression)")() :: Nil)
     plan.initialize(0)
     var actual = plan(inputRow).get(0, expression.dataType)
     assert(checkResult(actual, expected, expression))
 
-    plan = generateProject(
-      GenerateUnsafeProjection.generate(Alias(expression, s"Optimized($expression)")() :: Nil),
-      expression)
+    plan = GenerateUnsafeProjection.generate(Alias(expression, s"Optimized($expression)")() :: Nil)
     plan.initialize(0)
     val ref = new BoundReference(0, expression.dataType, nullable = true)
     actual = GenerateSafeProjection.generate(ref :: Nil)(plan(inputRow)).get(0, expression.dataType)
@@ -456,9 +437,7 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB
       }
     }
 
-    val plan = generateProject(
-      GenerateMutableProjection.generate(Alias(expr, s"Optimized($expr)")() :: Nil),
-      expr)
+    val plan = GenerateMutableProjection.generate(Alias(expr, s"Optimized($expr)")() :: Nil)
     val (codegen, codegenExc) = try {
       (Some(plan(inputRow).get(0, expr.dataType)), None)
     } catch {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelperSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelperSuite.scala
index 54ef9641bee0d..3cc50da38906e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelperSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelperSuite.scala
@@ -32,8 +32,8 @@ import org.apache.spark.sql.types.{DataType, IntegerType, MapType}
  */
 class ExpressionEvalHelperSuite extends SparkFunSuite with ExpressionEvalHelper {
 
-  test("SPARK-16489 checkEvaluation should fail if expression reuses variable names") {
-    val e = intercept[RuntimeException] { checkEvaluation(BadCodegenExpression(), 10) }
+  test("SPARK-16489: checkEvaluation should fail if expression reuses variable names") {
+    val e = intercept[Exception] { checkEvaluation(BadCodegenExpression(), 10) }
     assert(e.getMessage.contains("some_variable"))
   }
 
@@ -43,6 +43,12 @@ class ExpressionEvalHelperSuite extends SparkFunSuite with ExpressionEvalHelper
     }
     assert(e.getMessage.contains("and exprNullable was"))
   }
+
+  test("SPARK-33619: make sure checkExceptionInExpression work as expected") {
+    checkExceptionInExpression[Exception](
+      BadCodegenAndEvalExpression(),
+      "Cannot determine simple type name \"NoSuchElementException\"")
+  }
 }
 
 /**
@@ -76,3 +82,18 @@ case class MapIncorrectDataTypeExpression() extends LeafExpression with CodegenF
   // since values includes null, valueContainsNull must be true
   override def dataType: DataType = MapType(IntegerType, IntegerType, valueContainsNull = false)
 }
+
+case class BadCodegenAndEvalExpression() extends LeafExpression {
+  override def nullable: Boolean = false
+  override def eval(input: InternalRow): Any =
+    throw new Exception("Cannot determine simple type name \"NoSuchElementException\"")
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    // it should be java.util.NoSuchElementException in generated code.
+    ev.copy(code =
+      code"""
+            |int ${ev.value} = 10;
+            |throw new NoSuchElementException("compile failed!");
+      """.stripMargin)
+  }
+  override def dataType: DataType = IntegerType
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala
index 5c73a91de4f79..950637c958426 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala
@@ -217,15 +217,15 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("ANSI mode: make interval") {
     def check(
-      years: Int = 0,
-      months: Int = 0,
-      weeks: Int = 0,
-      days: Int = 0,
-      hours: Int = 0,
-      minutes: Int = 0,
-      seconds: Int = 0,
-      millis: Int = 0,
-      micros: Int = 0): Unit = {
+        years: Int = 0,
+        months: Int = 0,
+        weeks: Int = 0,
+        days: Int = 0,
+        hours: Int = 0,
+        minutes: Int = 0,
+        seconds: Int = 0,
+        millis: Int = 0,
+        micros: Int = 0): Unit = {
       val secFrac = DateTimeTestUtils.secFrac(seconds, millis, micros)
       val intervalExpr = MakeInterval(Literal(years), Literal(months), Literal(weeks),
         Literal(days), Literal(hours), Literal(minutes),
@@ -238,15 +238,15 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
 
     def checkException(
-      years: Int = 0,
-      months: Int = 0,
-      weeks: Int = 0,
-      days: Int = 0,
-      hours: Int = 0,
-      minutes: Int = 0,
-      seconds: Int = 0,
-      millis: Int = 0,
-      micros: Int = 0): Unit = {
+        years: Int = 0,
+        months: Int = 0,
+        weeks: Int = 0,
+        days: Int = 0,
+        hours: Int = 0,
+        minutes: Int = 0,
+        seconds: Int = 0,
+        millis: Int = 0,
+        micros: Int = 0): Unit = {
       val secFrac = DateTimeTestUtils.secFrac(seconds, millis, micros)
       val intervalExpr = MakeInterval(Literal(years), Literal(months), Literal(weeks),
         Literal(days), Literal(hours), Literal(minutes),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
index b4096f21bea3a..6d09e28362e11 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
@@ -138,9 +138,8 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     expression: Expression,
     inputRow: InternalRow = EmptyRow): Unit = {
 
-    val plan = generateProject(
-      GenerateMutableProjection.generate(Alias(expression, s"Optimized($expression)")() :: Nil),
-      expression)
+    val plan =
+      GenerateMutableProjection.generate(Alias(expression, s"Optimized($expression)")() :: Nil)
 
     val actual = plan(inputRow).get(0, expression.dataType)
     if (!actual.asInstanceOf[Double].isNaN) {

From 91182d6cce0a56a50801d530aff0c8e3aba59e27 Mon Sep 17 00:00:00 2001
From: Prashant Sharma <prashsh1@in.ibm.com>
Date: Wed, 2 Dec 2020 08:43:30 -0800
Subject: [PATCH 0639/1009] [SPARK-33626][K8S][TEST] Allow k8s integration
 tests to assert both driver and executor logs for expected log(s)

### What changes were proposed in this pull request?

Allow k8s integration tests to assert both driver and executor logs for expected log(s)

### Why are the changes needed?

Some of the tests will be able to provide full coverage of the use case, by asserting both driver and executor logs.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

TBD

Closes #30568 from ScrapCodes/expectedDriverLogChanges.

Authored-by: Prashant Sharma <prashsh1@in.ibm.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../integrationtest/DecommissionSuite.scala   |  6 ++--
 .../k8s/integrationtest/DepsTestsSuite.scala  |  2 +-
 .../k8s/integrationtest/KubernetesSuite.scala | 32 ++++++++++++++++---
 .../integrationtest/PythonTestsSuite.scala    |  6 ++--
 .../k8s/integrationtest/RTestsSuite.scala     |  2 +-
 .../SparkConfPropagateSuite.scala             | 22 ++++++-------
 6 files changed, 47 insertions(+), 23 deletions(-)

diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
index 9d7db04bb72b0..92f6a32cd156a 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
@@ -38,7 +38,7 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite =>
     runSparkApplicationAndVerifyCompletion(
       appResource = PYSPARK_DECOMISSIONING,
       mainClass = "",
-      expectedLogOnCompletion = Seq(
+      expectedDriverLogOnCompletion = Seq(
         "Finished waiting, stopping Spark",
         "Decommission executors",
         "Final accumulator value is: 100"),
@@ -69,7 +69,7 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite =>
     runSparkApplicationAndVerifyCompletion(
       appResource = PYSPARK_DECOMISSIONING_CLEANUP,
       mainClass = "",
-      expectedLogOnCompletion = Seq(
+      expectedDriverLogOnCompletion = Seq(
         "Finished waiting, stopping Spark",
         "Decommission executors"),
       appArgs = Array.empty[String],
@@ -104,7 +104,7 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite =>
     runSparkApplicationAndVerifyCompletion(
       appResource = PYSPARK_SCALE,
       mainClass = "",
-      expectedLogOnCompletion = Seq(
+      expectedDriverLogOnCompletion = Seq(
         "Finished waiting, stopping Spark",
         "Decommission executors"),
       appArgs = Array.empty[String],
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
index 8f6e9cd8af740..760e9ba55d335 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
@@ -177,7 +177,7 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite =>
       runSparkApplicationAndVerifyCompletion(
         appResource = pySparkFiles,
         mainClass = "",
-        expectedLogOnCompletion = Seq(
+        expectedDriverLogOnCompletion = Seq(
           "Python runtime version check is: True",
           "Python environment version check is: True",
           "Python runtime version check for executor is: True"),
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
index cc226b341916d..193a02aad0cea 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
@@ -171,6 +171,7 @@ class KubernetesSuite extends SparkFunSuite
       appResource,
       SPARK_PI_MAIN_CLASS,
       Seq("Pi is roughly 3"),
+      Seq(),
       appArgs,
       driverPodChecker,
       executorPodChecker,
@@ -192,6 +193,7 @@ class KubernetesSuite extends SparkFunSuite
       SPARK_DFS_READ_WRITE_TEST,
       Seq(s"Success! Local Word Count $wordCount and " +
     s"DFS Word Count $wordCount agree."),
+      Seq(),
       appArgs,
       driverPodChecker,
       executorPodChecker,
@@ -212,6 +214,7 @@ class KubernetesSuite extends SparkFunSuite
       appResource,
       SPARK_REMOTE_MAIN_CLASS,
       Seq(s"Mounting of ${appArgs.head} was true"),
+      Seq(),
       appArgs,
       driverPodChecker,
       executorPodChecker,
@@ -261,7 +264,8 @@ class KubernetesSuite extends SparkFunSuite
   protected def runSparkApplicationAndVerifyCompletion(
       appResource: String,
       mainClass: String,
-      expectedLogOnCompletion: Seq[String],
+      expectedDriverLogOnCompletion: Seq[String],
+      expectedExecutorLogOnCompletion: Seq[String] = Seq(),
       appArgs: Array[String],
       driverPodChecker: Pod => Unit,
       executorPodChecker: Pod => Unit,
@@ -374,7 +378,6 @@ class KubernetesSuite extends SparkFunSuite
       .list()
       .getItems
       .get(0)
-
     driverPodChecker(driverPod)
 
     // If we're testing decommissioning we an executors, but we should have an executor
@@ -383,14 +386,35 @@ class KubernetesSuite extends SparkFunSuite
       execPods.values.nonEmpty should be (true)
     }
     execPods.values.foreach(executorPodChecker(_))
+
+    val execPod: Option[Pod] = if (expectedExecutorLogOnCompletion.nonEmpty) {
+      Some(kubernetesTestComponents.kubernetesClient
+        .pods()
+        .withLabel("spark-app-locator", appLocator)
+        .withLabel("spark-role", "executor")
+        .list()
+        .getItems
+        .get(0))
+    } else {
+      None
+    }
+
     Eventually.eventually(patienceTimeout, patienceInterval) {
-      expectedLogOnCompletion.foreach { e =>
+      expectedDriverLogOnCompletion.foreach { e =>
         assert(kubernetesTestComponents.kubernetesClient
           .pods()
           .withName(driverPod.getMetadata.getName)
           .getLog
           .contains(e),
-          s"The application did not complete, did not find str ${e}")
+          s"The application did not complete, driver log did not contain str ${e}")
+      }
+      expectedExecutorLogOnCompletion.foreach { e =>
+        assert(kubernetesTestComponents.kubernetesClient
+          .pods()
+          .withName(execPod.get.getMetadata.getName)
+          .getLog
+          .contains(e),
+          s"The application did not complete, executor log did not contain str ${e}")
       }
     }
     execWatcher.close()
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PythonTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PythonTestsSuite.scala
index bad6f1c1021ba..457a766cae124 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PythonTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PythonTestsSuite.scala
@@ -27,7 +27,7 @@ private[spark] trait PythonTestsSuite { k8sSuite: KubernetesSuite =>
     runSparkApplicationAndVerifyCompletion(
       appResource = PYSPARK_PI,
       mainClass = "",
-      expectedLogOnCompletion = Seq("Pi is roughly 3"),
+      expectedDriverLogOnCompletion = Seq("Pi is roughly 3"),
       appArgs = Array("5"),
       driverPodChecker = doBasicDriverPyPodCheck,
       executorPodChecker = doBasicExecutorPyPodCheck,
@@ -41,7 +41,7 @@ private[spark] trait PythonTestsSuite { k8sSuite: KubernetesSuite =>
     runSparkApplicationAndVerifyCompletion(
       appResource = PYSPARK_FILES,
       mainClass = "",
-      expectedLogOnCompletion = Seq(
+      expectedDriverLogOnCompletion = Seq(
         "Python runtime version check is: True",
         "Python environment version check is: True",
         "Python runtime version check for executor is: True"),
@@ -61,7 +61,7 @@ private[spark] trait PythonTestsSuite { k8sSuite: KubernetesSuite =>
     runSparkApplicationAndVerifyCompletion(
       appResource = PYSPARK_MEMORY_CHECK,
       mainClass = "",
-      expectedLogOnCompletion = Seq(
+      expectedDriverLogOnCompletion = Seq(
         "PySpark Worker Memory Check is: True"),
       appArgs = Array(s"$additionalMemoryInBytes"),
       driverPodChecker = doDriverMemoryCheck,
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/RTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/RTestsSuite.scala
index b7c8886a15ae7..a22066c18064c 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/RTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/RTestsSuite.scala
@@ -26,7 +26,7 @@ private[spark] trait RTestsSuite { k8sSuite: KubernetesSuite =>
     runSparkApplicationAndVerifyCompletion(
       appResource = SPARK_R_DATAFRAME_TEST,
       mainClass = "",
-      expectedLogOnCompletion = Seq("name: string (nullable = true)", "1 Justin"),
+      expectedDriverLogOnCompletion = Seq("name: string (nullable = true)", "1 Justin"),
       appArgs = Array.empty[String],
       driverPodChecker = doBasicDriverRPodCheck,
       executorPodChecker = doBasicExecutorRPodCheck,
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala
index 6d15201d19796..5d3b426598fdd 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala
@@ -16,14 +16,11 @@
  */
 package org.apache.spark.deploy.k8s.integrationtest
 
-import java.io.{BufferedWriter, File, FileWriter}
+import java.io.File
 import java.net.URL
+import java.nio.file.Files
 
-import scala.io.{BufferedSource, Source}
-
-import io.fabric8.kubernetes.api.model._
-
-import org.apache.spark.internal.config
+import scala.io.Source
 
 private[spark] trait SparkConfPropagateSuite { k8sSuite: KubernetesSuite =>
   import KubernetesSuite.{k8sTestTag, SPARK_PI_MAIN_CLASS}
@@ -38,18 +35,21 @@ private[spark] trait SparkConfPropagateSuite { k8sSuite: KubernetesSuite =>
     val logConfFilePath = s"${sparkHomeDir.toFile}/conf/log4j.properties"
 
     try {
-      val writer = new BufferedWriter(new FileWriter(logConfFilePath))
-      writer.write(content)
-      writer.close()
+      Files.write(new File(logConfFilePath).toPath, content.getBytes)
 
       sparkAppConf.set("spark.driver.extraJavaOptions", "-Dlog4j.debug")
+      sparkAppConf.set("spark.executor.extraJavaOptions", "-Dlog4j.debug")
+
+      val log4jExpectedLog =
+        s"log4j: Reading configuration from URL file:/opt/spark/conf/log4j.properties"
 
       runSparkApplicationAndVerifyCompletion(
         appResource = containerLocalSparkDistroExamplesJar,
         mainClass = SPARK_PI_MAIN_CLASS,
-        expectedLogOnCompletion = (Seq("DEBUG",
-          s"log4j: Reading configuration from URL file:/opt/spark/conf/log4j.properties",
+        expectedDriverLogOnCompletion = (Seq("DEBUG",
+          log4jExpectedLog,
           "Pi is roughly 3")),
+        expectedExecutorLogOnCompletion = Seq(log4jExpectedLog),
         appArgs = Array.empty[String],
         driverPodChecker = doBasicDriverPodCheck,
         executorPodChecker = doBasicExecutorPodCheck,

From a082f4600b1cb814442beed1b578bc3430a257a7 Mon Sep 17 00:00:00 2001
From: "yi.wu" <yi.wu@databricks.com>
Date: Wed, 2 Dec 2020 17:51:22 +0000
Subject: [PATCH 0640/1009] [SPARK-33071][SPARK-33536][SQL] Avoid changing
 dataset_id of LogicalPlan in join() to not break DetectAmbiguousSelfJoin

### What changes were proposed in this pull request?

Currently, `join()` uses `withPlan(logicalPlan)` for convenient to call some Dataset functions. But it leads to the `dataset_id` inconsistent between the `logicalPlan` and the original `Dataset`(because `withPlan(logicalPlan)` will create a new Dataset with the new id and reset the `dataset_id` with the new id of the `logicalPlan`). As a result, it breaks the rule `DetectAmbiguousSelfJoin`.

In this PR, we propose to drop the usage of `withPlan` but use the `logicalPlan` directly so its `dataset_id` doesn't change.

Besides, this PR also removes related metadata (`DATASET_ID_KEY`,  `COL_POS_KEY`) when an `Alias` tries to construct its own metadata. Because the `Alias` is no longer a reference column after converting to an `Attribute`.  To achieve that, we add a new field, `deniedMetadataKeys`, to indicate the metadata that needs to be removed.

### Why are the changes needed?

For the query below, it returns the wrong result while it should throws ambiguous self join exception instead:

```scala
val emp1 = Seq[TestData](
  TestData(1, "sales"),
  TestData(2, "personnel"),
  TestData(3, "develop"),
  TestData(4, "IT")).toDS()
val emp2 = Seq[TestData](
  TestData(1, "sales"),
  TestData(2, "personnel"),
  TestData(3, "develop")).toDS()
val emp3 = emp1.join(emp2, emp1("key") === emp2("key")).select(emp1("*"))
emp1.join(emp3, emp1.col("key") === emp3.col("key"), "left_outer")
  .select(emp1.col("*"), emp3.col("key").as("e2")).show()

// wrong result
+---+---------+---+
|key|    value| e2|
+---+---------+---+
|  1|    sales|  1|
|  2|personnel|  2|
|  3|  develop|  3|
|  4|       IT|  4|
+---+---------+---+
```
This PR fixes the wrong behaviour.

### Does this PR introduce _any_ user-facing change?

Yes, users hit the exception instead of the wrong result after this PR.

### How was this patch tested?

Added a new unit test.

Closes #30488 from Ngone51/fix-self-join.

Authored-by: yi.wu <yi.wu@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/expressions/AliasHelper.scala    |  3 +-
 .../expressions/namedExpressions.scala        | 15 +++++--
 .../scala/org/apache/spark/sql/Column.scala   |  5 ++-
 .../scala/org/apache/spark/sql/Dataset.scala  | 39 +++++++++++--------
 .../spark/sql/DataFrameSelfJoinSuite.scala    | 29 ++++++++++++++
 .../sql/SparkSessionExtensionSuite.scala      |  7 ++--
 6 files changed, 73 insertions(+), 25 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala
index ec47875754a6f..c61eb68db5bfa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala
@@ -89,7 +89,8 @@ trait AliasHelper {
         a.copy(child = trimAliases(a.child))(
           exprId = a.exprId,
           qualifier = a.qualifier,
-          explicitMetadata = Some(a.metadata))
+          explicitMetadata = Some(a.metadata),
+          deniedMetadataKeys = a.deniedMetadataKeys)
       case a: MultiAlias =>
         a.copy(child = trimAliases(a.child))
       case other => trimAliases(other)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index 2abd9d7bb4423..22aabd3c6b30b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -143,11 +143,14 @@ abstract class Attribute extends LeafExpression with NamedExpression with NullIn
  *                  fully qualified way. Consider the examples tableName.name, subQueryAlias.name.
  *                  tableName and subQueryAlias are possible qualifiers.
  * @param explicitMetadata Explicit metadata associated with this alias that overwrites child's.
+ * @param deniedMetadataKeys Keys of metadata entries that are supposed to be removed when
+ *                           inheriting the metadata from the child.
  */
 case class Alias(child: Expression, name: String)(
     val exprId: ExprId = NamedExpression.newExprId,
     val qualifier: Seq[String] = Seq.empty,
-    val explicitMetadata: Option[Metadata] = None)
+    val explicitMetadata: Option[Metadata] = None,
+    val deniedMetadataKeys: Seq[String] = Seq.empty)
   extends UnaryExpression with NamedExpression {
 
   // Alias(Generator, xx) need to be transformed into Generate(generator, ...)
@@ -167,7 +170,11 @@ case class Alias(child: Expression, name: String)(
   override def metadata: Metadata = {
     explicitMetadata.getOrElse {
       child match {
-        case named: NamedExpression => named.metadata
+        case named: NamedExpression =>
+          val builder = new MetadataBuilder().withMetadata(named.metadata)
+          deniedMetadataKeys.foreach(builder.remove)
+          builder.build()
+
         case _ => Metadata.empty
       }
     }
@@ -194,7 +201,7 @@ case class Alias(child: Expression, name: String)(
   override def toString: String = s"$child AS $name#${exprId.id}$typeSuffix$delaySuffix"
 
   override protected final def otherCopyArgs: Seq[AnyRef] = {
-    exprId :: qualifier :: explicitMetadata :: Nil
+    exprId :: qualifier :: explicitMetadata :: deniedMetadataKeys :: Nil
   }
 
   override def hashCode(): Int = {
@@ -205,7 +212,7 @@ case class Alias(child: Expression, name: String)(
   override def equals(other: Any): Boolean = other match {
     case a: Alias =>
       name == a.name && exprId == a.exprId && child == a.child && qualifier == a.qualifier &&
-        explicitMetadata == a.explicitMetadata
+        explicitMetadata == a.explicitMetadata && deniedMetadataKeys == a.deniedMetadataKeys
     case _ => false
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 95134d9111593..86ba81340272b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -1164,7 +1164,10 @@ class Column(val expr: Expression) extends Logging {
    * @since 2.0.0
    */
   def name(alias: String): Column = withExpr {
-    Alias(normalizedExpr(), alias)()
+    // SPARK-33536: The Alias is no longer a column reference after converting to an attribute.
+    // These denied metadata keys are used to strip the column reference related metadata for
+    // the Alias. So it won't be caught as a column reference in DetectAmbiguousSelfJoin.
+    Alias(expr, alias)(deniedMetadataKeys = Seq(Dataset.DATASET_ID_KEY, Dataset.COL_POS_KEY))
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 2c38a65ac2106..0716043bcf660 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -231,7 +231,8 @@ class Dataset[T] private[sql](
       case _ =>
         queryExecution.analyzed
     }
-    if (sparkSession.sessionState.conf.getConf(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED)) {
+    if (sparkSession.sessionState.conf.getConf(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED) &&
+        plan.getTagValue(Dataset.DATASET_ID_TAG).isEmpty) {
       plan.setTagValue(Dataset.DATASET_ID_TAG, id)
     }
     plan
@@ -259,15 +260,16 @@ class Dataset[T] private[sql](
   private[sql] def resolve(colName: String): NamedExpression = {
     val resolver = sparkSession.sessionState.analyzer.resolver
     queryExecution.analyzed.resolveQuoted(colName, resolver)
-      .getOrElse {
-        val fields = schema.fieldNames
-        val extraMsg = if (fields.exists(resolver(_, colName))) {
-          s"; did you mean to quote the `$colName` column?"
-        } else ""
-        val fieldsStr = fields.mkString(", ")
-        val errorMsg = s"""Cannot resolve column name "$colName" among (${fieldsStr})${extraMsg}"""
-        throw new AnalysisException(errorMsg)
-      }
+      .getOrElse(throw resolveException(colName, schema.fieldNames))
+  }
+
+  private def resolveException(colName: String, fields: Array[String]): AnalysisException = {
+    val extraMsg = if (fields.exists(sparkSession.sessionState.analyzer.resolver(_, colName))) {
+      s"; did you mean to quote the `$colName` column?"
+    } else ""
+    val fieldsStr = fields.mkString(", ")
+    val errorMsg = s"""Cannot resolve column name "$colName" among (${fieldsStr})${extraMsg}"""
+    new AnalysisException(errorMsg)
   }
 
   private[sql] def numericColumns: Seq[Expression] = {
@@ -1083,8 +1085,8 @@ class Dataset[T] private[sql](
     }
 
     // If left/right have no output set intersection, return the plan.
-    val lanalyzed = withPlan(this.logicalPlan).queryExecution.analyzed
-    val ranalyzed = withPlan(right.logicalPlan).queryExecution.analyzed
+    val lanalyzed = this.queryExecution.analyzed
+    val ranalyzed = right.queryExecution.analyzed
     if (lanalyzed.outputSet.intersect(ranalyzed.outputSet).isEmpty) {
       return withPlan(plan)
     }
@@ -1092,17 +1094,22 @@ class Dataset[T] private[sql](
     // Otherwise, find the trivially true predicates and automatically resolves them to both sides.
     // By the time we get here, since we have already run analysis, all attributes should've been
     // resolved and become AttributeReference.
+    val resolver = sparkSession.sessionState.analyzer.resolver
     val cond = plan.condition.map { _.transform {
       case catalyst.expressions.EqualTo(a: AttributeReference, b: AttributeReference)
           if a.sameRef(b) =>
         catalyst.expressions.EqualTo(
-          withPlan(plan.left).resolve(a.name),
-          withPlan(plan.right).resolve(b.name))
+          plan.left.resolveQuoted(a.name, resolver)
+            .getOrElse(throw resolveException(a.name, plan.left.schema.fieldNames)),
+          plan.right.resolveQuoted(b.name, resolver)
+            .getOrElse(throw resolveException(b.name, plan.right.schema.fieldNames)))
       case catalyst.expressions.EqualNullSafe(a: AttributeReference, b: AttributeReference)
         if a.sameRef(b) =>
         catalyst.expressions.EqualNullSafe(
-          withPlan(plan.left).resolve(a.name),
-          withPlan(plan.right).resolve(b.name))
+          plan.left.resolveQuoted(a.name, resolver)
+            .getOrElse(throw resolveException(a.name, plan.left.schema.fieldNames)),
+          plan.right.resolveQuoted(b.name, resolver)
+            .getOrElse(throw resolveException(b.name, plan.right.schema.fieldNames)))
     }}
 
     withPlan {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala
index 3b3b54f75da57..50846d9d12b97 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala
@@ -21,6 +21,7 @@ import org.apache.spark.sql.expressions.Window
 import org.apache.spark.sql.functions.{count, sum}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.test.SQLTestData.TestData
 
 class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
   import testImplicits._
@@ -219,4 +220,32 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
         Seq((1, 2), (1, 2), (2, 4), (2, 4)).map(Row.fromTuple))
     }
   }
+
+  test("SPARK-33071/SPARK-33536: Avoid changing dataset_id of LogicalPlan in join() " +
+    "to not break DetectAmbiguousSelfJoin") {
+    val emp1 = Seq[TestData](
+      TestData(1, "sales"),
+      TestData(2, "personnel"),
+      TestData(3, "develop"),
+      TestData(4, "IT")).toDS()
+    val emp2 = Seq[TestData](
+      TestData(1, "sales"),
+      TestData(2, "personnel"),
+      TestData(3, "develop")).toDS()
+    val emp3 = emp1.join(emp2, emp1("key") === emp2("key")).select(emp1("*"))
+    assertAmbiguousSelfJoin(emp1.join(emp3, emp1.col("key") === emp3.col("key"),
+      "left_outer").select(emp1.col("*"), emp3.col("key").as("e2")))
+  }
+
+  test("df.show() should also not change dataset_id of LogicalPlan") {
+    val df = Seq[TestData](
+      TestData(1, "sales"),
+      TestData(2, "personnel"),
+      TestData(3, "develop"),
+      TestData(4, "IT")).toDF()
+    val ds_id1 = df.logicalPlan.getTagValue(Dataset.DATASET_ID_TAG)
+    df.show(0)
+    val ds_id2 = df.logicalPlan.getTagValue(Dataset.DATASET_ID_TAG)
+    assert(ds_id1 === ds_id2)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
index 12abd31b99e93..f02d2041dd7f3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
@@ -573,8 +573,9 @@ class ColumnarBoundReference(ordinal: Int, dataType: DataType, nullable: Boolean
 class ColumnarAlias(child: ColumnarExpression, name: String)(
     override val exprId: ExprId = NamedExpression.newExprId,
     override val qualifier: Seq[String] = Seq.empty,
-    override val explicitMetadata: Option[Metadata] = None)
-  extends Alias(child, name)(exprId, qualifier, explicitMetadata)
+    override val explicitMetadata: Option[Metadata] = None,
+    override val deniedMetadataKeys: Seq[String] = Seq.empty)
+  extends Alias(child, name)(exprId, qualifier, explicitMetadata, deniedMetadataKeys)
   with ColumnarExpression {
 
   override def columnarEval(batch: ColumnarBatch): Any = child.columnarEval(batch)
@@ -711,7 +712,7 @@ case class PreRuleReplaceAddWithBrokenVersion() extends Rule[SparkPlan] {
   def replaceWithColumnarExpression(exp: Expression): ColumnarExpression = exp match {
     case a: Alias =>
       new ColumnarAlias(replaceWithColumnarExpression(a.child),
-        a.name)(a.exprId, a.qualifier, a.explicitMetadata)
+        a.name)(a.exprId, a.qualifier, a.explicitMetadata, a.deniedMetadataKeys)
     case att: AttributeReference =>
       new ColumnarAttributeReference(att.name, att.dataType, att.nullable,
         att.metadata)(att.exprId, att.qualifier)

From b76c6b759c8dd549290aa174b62b8d34ea34aa3f Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Wed, 2 Dec 2020 12:44:39 -0800
Subject: [PATCH 0641/1009] [SPARK-33627][SQL] Add new function UNIX_SECONDS,
 UNIX_MILLIS and UNIX_MICROS

### What changes were proposed in this pull request?

As https://github.com/apache/spark/pull/28534 adds functions from [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/timestamp_functions) for converting numbers to timestamp, this PR is to add functions UNIX_SECONDS, UNIX_MILLIS and UNIX_MICROS for converting timestamp to numbers.

### Why are the changes needed?

1. Symmetry of the conversion functions
2. Casting timestamp type to numeric types is disallowed in ANSI mode, we should provide functions for users to complete the conversion.

### Does this PR introduce _any_ user-facing change?

3 new functions UNIX_SECONDS, UNIX_MILLIS and UNIX_MICROS for converting timestamp to long type.

### How was this patch tested?

Unit tests.

Closes #30566 from gengliangwang/timestampLong.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../catalyst/analysis/FunctionRegistry.scala  |  3 +
 .../expressions/datetimeExpressions.scala     | 73 +++++++++++++++++++
 .../expressions/DateExpressionsSuite.scala    | 45 ++++++++++++
 .../sql-functions/sql-expression-schema.md    |  5 +-
 .../resources/sql-tests/inputs/datetime.sql   |  4 +
 .../sql-tests/results/ansi/datetime.sql.out   | 26 ++++++-
 .../sql-tests/results/datetime-legacy.sql.out | 26 ++++++-
 .../sql-tests/results/datetime.sql.out        | 26 ++++++-
 8 files changed, 204 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 6fb9bed9625d5..5c2816a0baa95 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -430,6 +430,9 @@ object FunctionRegistry {
     expression[SecondsToTimestamp]("timestamp_seconds"),
     expression[MillisToTimestamp]("timestamp_millis"),
     expression[MicrosToTimestamp]("timestamp_micros"),
+    expression[UnixSeconds]("unix_seconds"),
+    expression[UnixMillis]("unix_millis"),
+    expression[UnixMicros]("unix_micros"),
 
     // collection functions
     expression[CreateArray]("array"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 424887a13cb97..60dc32c1571fe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -524,6 +524,79 @@ case class MicrosToTimestamp(child: Expression)
   override def prettyName: String = "timestamp_micros"
 }
 
+abstract class TimestampToLongBase extends UnaryExpression
+  with ExpectsInputTypes with NullIntolerant {
+
+  protected def scaleFactor: Long
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType)
+
+  override def dataType: DataType = LongType
+
+  override def nullSafeEval(input: Any): Any = {
+    Math.floorDiv(input.asInstanceOf[Number].longValue(), scaleFactor)
+  }
+
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    if (scaleFactor == 1) {
+      defineCodeGen(ctx, ev, c => c)
+    } else {
+      defineCodeGen(ctx, ev, c => s"java.lang.Math.floorDiv($c, ${scaleFactor}L)")
+    }
+  }
+}
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(timestamp) - Returns the number of seconds since 1970-01-01 00:00:00 UTC. Truncates higher levels of precision.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(TIMESTAMP('1970-01-01 00:00:01Z'));
+       1
+  """,
+  group = "datetime_funcs",
+  since = "3.1.0")
+// scalastyle:on line.size.limit
+case class UnixSeconds(child: Expression) extends TimestampToLongBase {
+  override def scaleFactor: Long = MICROS_PER_SECOND
+
+  override def prettyName: String = "unix_seconds"
+}
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(timestamp) - Returns the number of milliseconds since 1970-01-01 00:00:00 UTC. Truncates higher levels of precision.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(TIMESTAMP('1970-01-01 00:00:01Z'));
+       1000
+  """,
+  group = "datetime_funcs",
+  since = "3.1.0")
+// scalastyle:on line.size.limit
+case class UnixMillis(child: Expression) extends TimestampToLongBase {
+  override def scaleFactor: Long = MICROS_PER_MILLIS
+
+  override def prettyName: String = "unix_millis"
+}
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(timestamp) - Returns the number of microseconds since 1970-01-01 00:00:00 UTC.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(TIMESTAMP('1970-01-01 00:00:01Z'));
+       1000000
+  """,
+  group = "datetime_funcs",
+  since = "3.1.0")
+// scalastyle:on line.size.limit
+case class UnixMicros(child: Expression) extends TimestampToLongBase {
+  override def scaleFactor: Long = 1L
+
+  override def prettyName: String = "unix_micros"
+}
+
 @ExpressionDescription(
   usage = "_FUNC_(date) - Returns the year component of the date/timestamp.",
   examples = """
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 587ca0cdbed6e..8a1a34276341d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -1245,6 +1245,51 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkResult(Int.MinValue.toLong - 100)
   }
 
+  test("UNIX_SECONDS") {
+    checkEvaluation(UnixSeconds(Literal(null, TimestampType)), null)
+    var timestamp = Literal(new Timestamp(0L))
+    checkEvaluation(UnixSeconds(timestamp), 0L)
+    timestamp = Literal(new Timestamp(1000L))
+    checkEvaluation(UnixSeconds(timestamp), 1L)
+    timestamp = Literal(new Timestamp(-1000L))
+    checkEvaluation(UnixSeconds(timestamp), -1L)
+    // -1ms is considered to be in -1st second, as 0-999ms is in 0th second.
+    timestamp = Literal(new Timestamp(-1L))
+    checkEvaluation(UnixSeconds(timestamp), -1L)
+    timestamp = Literal(new Timestamp(-1000L))
+    checkEvaluation(UnixSeconds(timestamp), -1L)
+    // Truncates higher levels of precision
+    timestamp = Literal(new Timestamp(1999L))
+    checkEvaluation(UnixSeconds(timestamp), 1L)
+  }
+
+  test("UNIX_MILLIS") {
+    checkEvaluation(UnixMillis(Literal(null, TimestampType)), null)
+    var timestamp = Literal(new Timestamp(0L))
+    checkEvaluation(UnixMillis(timestamp), 0L)
+    timestamp = Literal(new Timestamp(1000L))
+    checkEvaluation(UnixMillis(timestamp), 1000L)
+    timestamp = Literal(new Timestamp(-1000L))
+    checkEvaluation(UnixMillis(timestamp), -1000L)
+    // Truncates higher levels of precision
+    val timestampWithNanos = new Timestamp(1000L)
+    timestampWithNanos.setNanos(999999)
+    checkEvaluation(UnixMillis(Literal(timestampWithNanos)), 1000L)
+  }
+
+  test("UNIX_MICROS") {
+    checkEvaluation(UnixMicros(Literal(null, TimestampType)), null)
+    var timestamp = Literal(new Timestamp(0L))
+    checkEvaluation(UnixMicros(timestamp), 0L)
+    timestamp = Literal(new Timestamp(1000L))
+    checkEvaluation(UnixMicros(timestamp), 1000000L)
+    timestamp = Literal(new Timestamp(-1000L))
+    checkEvaluation(UnixMicros(timestamp), -1000000L)
+    val timestampWithNanos = new Timestamp(1000L)
+    timestampWithNanos.setNanos(1000) // 1 microsecond
+    checkEvaluation(UnixMicros(Literal(timestampWithNanos)), 1000001L)
+  }
+
   test("TIMESTAMP_SECONDS") {
     def testIntegralFunc(value: Number): Unit = {
       checkEvaluation(
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 0a54dff3a1cea..861062a1f7705 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -1,6 +1,6 @@
 <!-- Automatically generated by ExpressionsSchemaSuite -->
 ## Summary
-  - Number of queries: 342
+  - Number of queries: 345
   - Number of expressions that missing example: 13
   - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint,window
 ## Schema of Built-in Functions
@@ -289,6 +289,9 @@
 | org.apache.spark.sql.catalyst.expressions.UnaryMinus | negative | SELECT negative(1) | struct<negative(1):int> |
 | org.apache.spark.sql.catalyst.expressions.UnaryPositive | positive | SELECT positive(1) | struct<(+ 1):int> |
 | org.apache.spark.sql.catalyst.expressions.Unhex | unhex | SELECT decode(unhex('537061726B2053514C'), 'UTF-8') | struct<decode(unhex(537061726B2053514C), UTF-8):string> |
+| org.apache.spark.sql.catalyst.expressions.UnixMicros | unix_micros | SELECT unix_micros(TIMESTAMP('1970-01-01 00:00:01Z')) | struct<unix_micros(CAST(1970-01-01 00:00:01Z AS TIMESTAMP)):bigint> |
+| org.apache.spark.sql.catalyst.expressions.UnixMillis | unix_millis | SELECT unix_millis(TIMESTAMP('1970-01-01 00:00:01Z')) | struct<unix_millis(CAST(1970-01-01 00:00:01Z AS TIMESTAMP)):bigint> |
+| org.apache.spark.sql.catalyst.expressions.UnixSeconds | unix_seconds | SELECT unix_seconds(TIMESTAMP('1970-01-01 00:00:01Z')) | struct<unix_seconds(CAST(1970-01-01 00:00:01Z AS TIMESTAMP)):bigint> |
 | org.apache.spark.sql.catalyst.expressions.UnixTimestamp | unix_timestamp | SELECT unix_timestamp() | struct<unix_timestamp(current_timestamp(), yyyy-MM-dd HH:mm:ss):bigint> |
 | org.apache.spark.sql.catalyst.expressions.Upper | ucase | SELECT ucase('SparkSql') | struct<ucase(SparkSql):string> |
 | org.apache.spark.sql.catalyst.expressions.Upper | upper | SELECT upper('SparkSql') | struct<upper(SparkSql):string> |
diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
index 534e222b7c13e..c2ccb3ee0db06 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
@@ -14,6 +14,10 @@ select TIMESTAMP_MILLIS(-92233720368547758);
 select TIMESTAMP_SECONDS(0.1234567);
 -- truncation is OK for float/double
 select TIMESTAMP_SECONDS(0.1234567d), TIMESTAMP_SECONDS(FLOAT(0.1234567));
+-- UNIX_SECONDS, UNIX_MILLISECONDS and UNIX_MICROSECONDS
+select UNIX_SECONDS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_SECONDS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_SECONDS(null);
+select UNIX_MILLIS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_MILLIS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_MILLIS(null);
+select UNIX_MICROS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_MICROS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_MICROS(null);
 
 -- [SPARK-16836] current_date and current_timestamp literals
 select current_date = current_date(), current_timestamp = current_timestamp();
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
index 10669f14aa87b..9d99d3b870b3f 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 117
+-- Number of queries: 120
 
 
 -- !query
@@ -87,6 +87,30 @@ struct<timestamp_seconds(0.1234567):timestamp,timestamp_seconds(CAST(0.1234567 A
 1969-12-31 16:00:00.123456	1969-12-31 16:00:00.123456
 
 
+-- !query
+select UNIX_SECONDS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_SECONDS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_SECONDS(null)
+-- !query schema
+struct<unix_seconds(CAST(2020-12-01 14:30:08Z AS TIMESTAMP)):bigint,unix_seconds(CAST(2020-12-01 14:30:08.999999Z AS TIMESTAMP)):bigint,unix_seconds(CAST(NULL AS TIMESTAMP)):bigint>
+-- !query output
+1606833008	1606833008	NULL
+
+
+-- !query
+select UNIX_MILLIS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_MILLIS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_MILLIS(null)
+-- !query schema
+struct<unix_millis(CAST(2020-12-01 14:30:08Z AS TIMESTAMP)):bigint,unix_millis(CAST(2020-12-01 14:30:08.999999Z AS TIMESTAMP)):bigint,unix_millis(CAST(NULL AS TIMESTAMP)):bigint>
+-- !query output
+1606833008000	1606833008999	NULL
+
+
+-- !query
+select UNIX_MICROS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_MICROS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_MICROS(null)
+-- !query schema
+struct<unix_micros(CAST(2020-12-01 14:30:08Z AS TIMESTAMP)):bigint,unix_micros(CAST(2020-12-01 14:30:08.999999Z AS TIMESTAMP)):bigint,unix_micros(CAST(NULL AS TIMESTAMP)):bigint>
+-- !query output
+1606833008000000	1606833008999999	NULL
+
+
 -- !query
 select current_date = current_date(), current_timestamp = current_timestamp()
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
index 7c2c62a2db496..73e9823d96a73 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 117
+-- Number of queries: 120
 
 
 -- !query
@@ -87,6 +87,30 @@ struct<timestamp_seconds(0.1234567):timestamp,timestamp_seconds(CAST(0.1234567 A
 1969-12-31 16:00:00.123456	1969-12-31 16:00:00.123456
 
 
+-- !query
+select UNIX_SECONDS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_SECONDS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_SECONDS(null)
+-- !query schema
+struct<unix_seconds(CAST(2020-12-01 14:30:08Z AS TIMESTAMP)):bigint,unix_seconds(CAST(2020-12-01 14:30:08.999999Z AS TIMESTAMP)):bigint,unix_seconds(CAST(NULL AS TIMESTAMP)):bigint>
+-- !query output
+1606833008	1606833008	NULL
+
+
+-- !query
+select UNIX_MILLIS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_MILLIS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_MILLIS(null)
+-- !query schema
+struct<unix_millis(CAST(2020-12-01 14:30:08Z AS TIMESTAMP)):bigint,unix_millis(CAST(2020-12-01 14:30:08.999999Z AS TIMESTAMP)):bigint,unix_millis(CAST(NULL AS TIMESTAMP)):bigint>
+-- !query output
+1606833008000	1606833008999	NULL
+
+
+-- !query
+select UNIX_MICROS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_MICROS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_MICROS(null)
+-- !query schema
+struct<unix_micros(CAST(2020-12-01 14:30:08Z AS TIMESTAMP)):bigint,unix_micros(CAST(2020-12-01 14:30:08.999999Z AS TIMESTAMP)):bigint,unix_micros(CAST(NULL AS TIMESTAMP)):bigint>
+-- !query output
+1606833008000000	1606833008999999	NULL
+
+
 -- !query
 select current_date = current_date(), current_timestamp = current_timestamp()
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
index 810ab6ef0cbfc..2c39c1291aa70 100755
--- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 117
+-- Number of queries: 120
 
 
 -- !query
@@ -87,6 +87,30 @@ struct<timestamp_seconds(0.1234567):timestamp,timestamp_seconds(CAST(0.1234567 A
 1969-12-31 16:00:00.123456	1969-12-31 16:00:00.123456
 
 
+-- !query
+select UNIX_SECONDS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_SECONDS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_SECONDS(null)
+-- !query schema
+struct<unix_seconds(CAST(2020-12-01 14:30:08Z AS TIMESTAMP)):bigint,unix_seconds(CAST(2020-12-01 14:30:08.999999Z AS TIMESTAMP)):bigint,unix_seconds(CAST(NULL AS TIMESTAMP)):bigint>
+-- !query output
+1606833008	1606833008	NULL
+
+
+-- !query
+select UNIX_MILLIS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_MILLIS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_MILLIS(null)
+-- !query schema
+struct<unix_millis(CAST(2020-12-01 14:30:08Z AS TIMESTAMP)):bigint,unix_millis(CAST(2020-12-01 14:30:08.999999Z AS TIMESTAMP)):bigint,unix_millis(CAST(NULL AS TIMESTAMP)):bigint>
+-- !query output
+1606833008000	1606833008999	NULL
+
+
+-- !query
+select UNIX_MICROS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_MICROS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_MICROS(null)
+-- !query schema
+struct<unix_micros(CAST(2020-12-01 14:30:08Z AS TIMESTAMP)):bigint,unix_micros(CAST(2020-12-01 14:30:08.999999Z AS TIMESTAMP)):bigint,unix_micros(CAST(NULL AS TIMESTAMP)):bigint>
+-- !query output
+1606833008000000	1606833008999999	NULL
+
+
 -- !query
 select current_date = current_date(), current_timestamp = current_timestamp()
 -- !query schema

From 92bfbcb2e372e8fecfe65bc582c779d9df4036bb Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Wed, 2 Dec 2020 12:58:41 -0800
Subject: [PATCH 0642/1009] [SPARK-33631][DOCS][TEST] Clean up
 spark.core.connection.ack.wait.timeout from configuration.md

### What changes were proposed in this pull request?
SPARK-9767  remove `ConnectionManager` and related files, the configuration `spark.core.connection.ack.wait.timeout` previously used by `ConnectionManager` is no longer used by other Spark code, but it still exists in the `configuration.md`.

So this pr cleans up the useless configuration item spark.core.connection.ack.wait.timeout` from `configuration.md`.

### Why are the changes needed?
Clean up useless configuration from `configuration.md`.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass the Jenkins or GitHub Action

Closes #30569 from LuciferYang/SPARK-33631.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/storage/BlockManagerReplicationSuite.scala  |  2 --
 docs/configuration.md                                 | 11 -----------
 2 files changed, 13 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
index 0b673c580d71f..1e9b48102616f 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
@@ -95,8 +95,6 @@ trait BlockManagerReplicationBehavior extends SparkFunSuite
     conf.set(MEMORY_STORAGE_FRACTION, 0.999)
     conf.set(STORAGE_UNROLL_MEMORY_THRESHOLD, 512L)
 
-    // to make a replication attempt to inactive store fail fast
-    conf.set("spark.core.connection.ack.wait.timeout", "1s")
     // to make cached peers refresh frequently
     conf.set(STORAGE_CACHED_PEERS_TTL, 10)
 
diff --git a/docs/configuration.md b/docs/configuration.md
index d4d8e47645921..21506e6901263 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1919,7 +1919,6 @@ Apart from these, the following properties are also available, and may be useful
   <td>120s</td>
   <td>
     Default timeout for all network interactions. This config will be used in place of
-    <code>spark.core.connection.ack.wait.timeout</code>,
     <code>spark.storage.blockManagerHeartbeatTimeoutMs</code>,
     <code>spark.shuffle.io.connectionTimeout</code>, <code>spark.rpc.askTimeout</code> or
     <code>spark.rpc.lookupTimeout</code> if they are not configured.
@@ -1982,16 +1981,6 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>1.4.0</td>
 </tr>
-<tr>
-  <td><code>spark.core.connection.ack.wait.timeout</code></td>
-  <td><code>spark.network.timeout</code></td>
-  <td>
-    How long for the connection to wait for ack to occur before timing
-    out and giving up. To avoid unwilling timeout caused by long pause like GC,
-    you can set larger value.
-  </td>
-  <td>1.1.1</td>
-</tr>
 <tr>
   <td><code>spark.network.maxRemoteBlockSizeFetchToMem</code></td>
   <td>200m</td>

From f94cb53a90558285541090d484a6ae9938fe02e8 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 3 Dec 2020 09:34:42 +0900
Subject: [PATCH 0643/1009] [MINOR][INFRA] Use the latest image for GitHub
 Action jobs

### What changes were proposed in this pull request?

Currently, GitHub Action is using two docker images.

```
$ git grep dongjoon/apache-spark-github-action-image
.github/workflows/build_and_test.yml:      image: dongjoon/apache-spark-github-action-image:20201015
.github/workflows/build_and_test.yml:      image: dongjoon/apache-spark-github-action-image:20201025
```

This PR aims to make it consistent by using the latest one.
```
- image: dongjoon/apache-spark-github-action-image:20201015
+ image: dongjoon/apache-spark-github-action-image:20201025
```

### Why are the changes needed?

This is for better maintainability. The image size is almost the same.
```
$ docker images | grep 202010
dongjoon/apache-spark-github-action-image                       20201025               37adfa3d226a   5 weeks ago     2.18GB
dongjoon/apache-spark-github-action-image                       20201015               ff6fee8dc36d   6 weeks ago     2.16GB
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the GitHub Action.

Closes #30578 from dongjoon-hyun/SPARK-MINOR.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .github/workflows/build_and_test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index b2b6a38916eeb..a3bb083387f3e 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -153,7 +153,7 @@ jobs:
     name: "Build modules: ${{ matrix.modules }}"
     runs-on: ubuntu-20.04
     container:
-      image: dongjoon/apache-spark-github-action-image:20201015
+      image: dongjoon/apache-spark-github-action-image:20201025
     strategy:
       fail-fast: false
       matrix:

From 4f9667035886a67e6c9a4e8fad2efa390e87ca68 Mon Sep 17 00:00:00 2001
From: uncleGen <hustyugm@gmail.com>
Date: Wed, 2 Dec 2020 17:11:51 -0800
Subject: [PATCH 0644/1009] [SPARK-31953][SS] Add Spark Structured Streaming
 History Server Support

### What changes were proposed in this pull request?

Add Spark Structured Streaming History Server Support.

### Why are the changes needed?

Add a streaming query history server plugin.

![image](https://user-images.githubusercontent.com/7402327/84248291-d26cfe80-ab3b-11ea-86d2-98205fa2bcc4.png)
![image](https://user-images.githubusercontent.com/7402327/84248347-e44ea180-ab3b-11ea-81de-eefe207656f2.png)
![image](https://user-images.githubusercontent.com/7402327/84248396-f0d2fa00-ab3b-11ea-9b0d-e410115471b0.png)

- Follow-ups
  - Query duration should not update in history UI.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Update UT.

Closes #28781 from uncleGen/SPARK-31953.

Lead-authored-by: uncleGen <hustyugm@gmail.com>
Co-authored-by: Genmao Yu <hustyugm@gmail.com>
Co-authored-by: Yuanjian Li <yuanjian.li@databricks.com>
Signed-off-by: Shixiong Zhu <zsxwing@gmail.com>
---
 dev/.rat-excludes                             |   1 +
 ...apache.spark.status.AppHistoryServerPlugin |   1 +
 .../streaming/StreamingQueryListenerBus.scala |  26 ++-
 .../StreamingQueryHistoryServerPlugin.scala   |  43 +++++
 .../ui/StreamingQueryStatusStore.scala        |  53 ++++++
 .../spark/sql/internal/SharedState.scala      |   8 +-
 .../sql/streaming/StreamingQueryManager.scala |   3 +-
 .../sql/streaming/ui/StreamingQueryPage.scala |  44 ++---
 .../ui/StreamingQueryStatisticsPage.scala     |  27 +--
 .../ui/StreamingQueryStatusListener.scala     | 166 +++++++++++-------
 .../sql/streaming/ui/StreamingQueryTab.scala  |   3 +-
 .../spark/sql/streaming/ui/UIUtils.scala      |  12 +-
 .../spark-events/local-1596020211915          | 160 +++++++++++++++++
 .../apache/spark/deploy/history/Utils.scala   |  40 +++++
 .../ui/StreamingQueryHistorySuite.scala       |  63 +++++++
 .../ui/StreamingQueryPageSuite.scala          |  42 +++--
 .../StreamingQueryStatusListenerSuite.scala   | 159 ++++++++++++++---
 17 files changed, 693 insertions(+), 158 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/ui/StreamingQueryHistoryServerPlugin.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/ui/StreamingQueryStatusStore.scala
 create mode 100644 sql/core/src/test/resources/spark-events/local-1596020211915
 create mode 100644 sql/core/src/test/scala/org/apache/spark/deploy/history/Utils.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryHistorySuite.scala

diff --git a/dev/.rat-excludes b/dev/.rat-excludes
index 7da330dfe1fbf..167cf224f92c2 100644
--- a/dev/.rat-excludes
+++ b/dev/.rat-excludes
@@ -123,6 +123,7 @@ SessionHandler.java
 GangliaReporter.java
 application_1578436911597_0052
 config.properties
+local-1596020211915
 app-20200706201101-0003
 py.typed
 _metadata
diff --git a/sql/core/src/main/resources/META-INF/services/org.apache.spark.status.AppHistoryServerPlugin b/sql/core/src/main/resources/META-INF/services/org.apache.spark.status.AppHistoryServerPlugin
index 0bba2f88b92a5..6771eef525307 100644
--- a/sql/core/src/main/resources/META-INF/services/org.apache.spark.status.AppHistoryServerPlugin
+++ b/sql/core/src/main/resources/META-INF/services/org.apache.spark.status.AppHistoryServerPlugin
@@ -1 +1,2 @@
 org.apache.spark.sql.execution.ui.SQLHistoryServerPlugin
+org.apache.spark.sql.execution.ui.StreamingQueryHistoryServerPlugin
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala
index 1b8d69ffb7521..4b98acd16f6fc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala
@@ -31,16 +31,21 @@ import org.apache.spark.util.ListenerBus
  * Spark listener bus, so that it can receive [[StreamingQueryListener.Event]]s and dispatch them
  * to StreamingQueryListeners.
  *
- * Note that each bus and its registered listeners are associated with a single SparkSession
+ * Note 1: Each bus and its registered listeners are associated with a single SparkSession
  * and StreamingQueryManager. So this bus will dispatch events to registered listeners for only
  * those queries that were started in the associated SparkSession.
+ *
+ * Note 2: To rebuild Structured Streaming UI in SHS, this bus will be registered into
+ * [[org.apache.spark.scheduler.ReplayListenerBus]]. We check `sparkListenerBus` defined or not to
+ * determine how to process [[StreamingQueryListener.Event]]. If false, it means this bus is used to
+ * replay all streaming query event from eventLog.
  */
-class StreamingQueryListenerBus(sparkListenerBus: LiveListenerBus)
+class StreamingQueryListenerBus(sparkListenerBus: Option[LiveListenerBus])
   extends SparkListener with ListenerBus[StreamingQueryListener, StreamingQueryListener.Event] {
 
   import StreamingQueryListener._
 
-  sparkListenerBus.addToQueue(this, StreamingQueryListenerBus.STREAM_EVENT_QUERY)
+  sparkListenerBus.foreach(_.addToQueue(this, StreamingQueryListenerBus.STREAM_EVENT_QUERY))
 
   /**
    * RunIds of active queries whose events are supposed to be forwarded by this ListenerBus
@@ -67,11 +72,11 @@ class StreamingQueryListenerBus(sparkListenerBus: LiveListenerBus)
     event match {
       case s: QueryStartedEvent =>
         activeQueryRunIds.synchronized { activeQueryRunIds += s.runId }
-        sparkListenerBus.post(s)
+        sparkListenerBus.foreach(bus => bus.post(s))
         // post to local listeners to trigger callbacks
         postToAll(s)
       case _ =>
-        sparkListenerBus.post(event)
+        sparkListenerBus.foreach(bus => bus.post(event))
     }
   }
 
@@ -95,7 +100,11 @@ class StreamingQueryListenerBus(sparkListenerBus: LiveListenerBus)
         // synchronously and the ones attached to LiveListenerBus asynchronously. Therefore,
         // we need to ignore QueryStartedEvent if this method is called within SparkListenerBus
         // thread
-        if (!LiveListenerBus.withinListenerThread.value || !e.isInstanceOf[QueryStartedEvent]) {
+        //
+        // When loaded by Spark History Server, we should process all event coming from replay
+        // listener bus.
+        if (sparkListenerBus.isEmpty || !LiveListenerBus.withinListenerThread.value ||
+            !e.isInstanceOf[QueryStartedEvent])  {
           postToAll(e)
         }
       case _ =>
@@ -110,7 +119,10 @@ class StreamingQueryListenerBus(sparkListenerBus: LiveListenerBus)
       listener: StreamingQueryListener,
       event: StreamingQueryListener.Event): Unit = {
     def shouldReport(runId: UUID): Boolean = {
-      activeQueryRunIds.synchronized { activeQueryRunIds.contains(runId) }
+      // When loaded by Spark History Server, we should process all event coming from replay
+      // listener bus.
+      sparkListenerBus.isEmpty ||
+        activeQueryRunIds.synchronized { activeQueryRunIds.contains(runId) }
     }
 
     event match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/StreamingQueryHistoryServerPlugin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/StreamingQueryHistoryServerPlugin.scala
new file mode 100644
index 0000000000000..a127fa59b7433
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/StreamingQueryHistoryServerPlugin.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.ui
+
+import org.apache.spark.SparkConf
+import org.apache.spark.scheduler.SparkListener
+import org.apache.spark.sql.execution.streaming.StreamingQueryListenerBus
+import org.apache.spark.sql.streaming.ui.{StreamingQueryStatusListener, StreamingQueryTab}
+import org.apache.spark.status.{AppHistoryServerPlugin, ElementTrackingStore}
+import org.apache.spark.ui.SparkUI
+
+class StreamingQueryHistoryServerPlugin extends AppHistoryServerPlugin {
+
+  override def createListeners(conf: SparkConf, store: ElementTrackingStore): Seq[SparkListener] = {
+    val listenerBus = new StreamingQueryListenerBus(None)
+    listenerBus.addListener(new StreamingQueryStatusListener(conf, store))
+    Seq(listenerBus)
+  }
+
+  override def setupUI(ui: SparkUI): Unit = {
+    val streamingQueryStatusStore = new StreamingQueryStatusStore(ui.store.store)
+    if (streamingQueryStatusStore.allQueryUIData.nonEmpty) {
+      new StreamingQueryTab(streamingQueryStatusStore, ui)
+    }
+  }
+
+  override def displayOrder: Int = 1
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/StreamingQueryStatusStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/StreamingQueryStatusStore.scala
new file mode 100644
index 0000000000000..9eb14a6a63063
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/StreamingQueryStatusStore.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.ui
+
+import java.util.UUID
+
+import org.apache.spark.sql.streaming.ui.{StreamingQueryData, StreamingQueryProgressWrapper, StreamingQueryUIData}
+import org.apache.spark.status.KVUtils
+import org.apache.spark.util.kvstore.KVStore
+
+/**
+ * Provides a view of a KVStore with methods that make it easy to query Streaming Query state.
+ * There's no state kept in this class, so it's ok to have multiple instances of it in an
+ * application.
+ */
+class StreamingQueryStatusStore(store: KVStore) {
+
+  def allQueryUIData: Seq[StreamingQueryUIData] = {
+    val view = store.view(classOf[StreamingQueryData]).index("startTimestamp").first(0L)
+    KVUtils.viewToSeq(view, Int.MaxValue)(_ => true).map(makeUIData)
+  }
+
+  // visible for test
+  private[sql] def getQueryProgressData(runId: UUID): Seq[StreamingQueryProgressWrapper] = {
+    val view = store.view(classOf[StreamingQueryProgressWrapper])
+      .index("runId").first(runId.toString).last(runId.toString)
+    KVUtils.viewToSeq(view, Int.MaxValue)(_ => true)
+  }
+
+  private def makeUIData(summary: StreamingQueryData): StreamingQueryUIData = {
+    val runId = summary.runId.toString
+    val view = store.view(classOf[StreamingQueryProgressWrapper])
+      .index("runId").first(runId).last(runId)
+    val recentProgress = KVUtils.viewToSeq(view, Int.MaxValue)(_ => true)
+      .map(_.progress).sortBy(_.timestamp).toArray
+    StreamingQueryUIData(summary, recentProgress)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index 89aceacac6007..ea430db9f030f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -34,7 +34,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.execution.CacheManager
 import org.apache.spark.sql.execution.streaming.StreamExecution
-import org.apache.spark.sql.execution.ui.{SQLAppStatusListener, SQLAppStatusStore, SQLTab}
+import org.apache.spark.sql.execution.ui.{SQLAppStatusListener, SQLAppStatusStore, SQLTab, StreamingQueryStatusStore}
 import org.apache.spark.sql.internal.StaticSQLConf._
 import org.apache.spark.sql.streaming.ui.{StreamingQueryStatusListener, StreamingQueryTab}
 import org.apache.spark.status.ElementTrackingStore
@@ -111,9 +111,9 @@ private[sql] class SharedState(
   lazy val streamingQueryStatusListener: Option[StreamingQueryStatusListener] = {
     sparkContext.ui.flatMap { ui =>
       if (conf.get(STREAMING_UI_ENABLED)) {
-        val statusListener = new StreamingQueryStatusListener(conf)
-        new StreamingQueryTab(statusListener, ui)
-        Some(statusListener)
+        val kvStore = sparkContext.statusStore.store.asInstanceOf[ElementTrackingStore]
+        new StreamingQueryTab(new StreamingQueryStatusStore(kvStore), ui)
+        Some(new StreamingQueryStatusListener(conf, kvStore))
       } else {
         None
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index ffdbe9d4e4915..b66037d00919d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -49,7 +49,8 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
 
   private[sql] val stateStoreCoordinator =
     StateStoreCoordinatorRef.forDriver(sparkSession.sparkContext.env)
-  private val listenerBus = new StreamingQueryListenerBus(sparkSession.sparkContext.listenerBus)
+  private val listenerBus =
+    new StreamingQueryListenerBus(Some(sparkSession.sparkContext.listenerBus))
 
   @GuardedBy("activeQueriesSharedLock")
   private val activeQueries = new mutable.HashMap[UUID, StreamingQuery]
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala
index b98fdf16eef31..96e498991e1bb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala
@@ -40,8 +40,8 @@ private[ui] class StreamingQueryPage(parent: StreamingQueryTab)
   }
 
   private def generateStreamingQueryTable(request: HttpServletRequest): Seq[Node] = {
-    val (activeQueries, inactiveQueries) = parent.statusListener.allQueryStatus
-      .partition(_.isActive)
+    val (activeQueries, inactiveQueries) =
+      parent.store.allQueryUIData.partition(_.summary.isActive)
 
     val content = mutable.ListBuffer[Node]()
     // show active queries table only if there is at least one active query
@@ -176,7 +176,7 @@ class StreamingQueryPagedTable(
     val streamingQuery = query.streamingUIData
     val statisticsLink = "%s/%s/statistics?id=%s"
       .format(SparkUIUtils.prependBaseUri(request, parent.basePath), parent.prefix,
-        streamingQuery.runId)
+        streamingQuery.summary.runId)
 
     def details(detail: Any): Seq[Node] = {
       if (isActive) {
@@ -194,14 +194,14 @@ class StreamingQueryPagedTable(
     <tr>
       <td>{UIUtils.getQueryName(streamingQuery)}</td>
       <td>{UIUtils.getQueryStatus(streamingQuery)}</td>
-      <td>{streamingQuery.id}</td>
-      <td><a href={statisticsLink}>{streamingQuery.runId}</a></td>
-      <td>{SparkUIUtils.formatDate(streamingQuery.startTimestamp)}</td>
+      <td>{streamingQuery.summary.id}</td>
+      <td><a href={statisticsLink}>{streamingQuery.summary.runId}</a></td>
+      <td>{SparkUIUtils.formatDate(streamingQuery.summary.startTimestamp)}</td>
       <td>{SparkUIUtils.formatDurationVerbose(query.duration)}</td>
       <td>{withNoProgress(streamingQuery, {query.avgInput.formatted("%.2f")}, "NaN")}</td>
       <td>{withNoProgress(streamingQuery, {query.avgProcess.formatted("%.2f")}, "NaN")}</td>
       <td>{withNoProgress(streamingQuery, {streamingQuery.lastProgress.batchId}, "NaN")}</td>
-      {details(streamingQuery.exception.getOrElse("-"))}
+      {details(streamingQuery.summary.exception.getOrElse("-"))}
     </tr>
   }
 }
@@ -222,32 +222,32 @@ class StreamingQueryDataSource(uiData: Seq[StreamingQueryUIData], sortColumn: St
 
   override def sliceData(from: Int, to: Int): Seq[StructuredStreamingRow] = data.slice(from, to)
 
-  private def streamingRow(query: StreamingQueryUIData): StructuredStreamingRow = {
+  private def streamingRow(uiData: StreamingQueryUIData): StructuredStreamingRow = {
     val duration = if (isActive) {
-      System.currentTimeMillis() - query.startTimestamp
+      System.currentTimeMillis() - uiData.summary.startTimestamp
     } else {
-      withNoProgress(query, {
-        val endTimeMs = query.lastProgress.timestamp
-        parseProgressTimestamp(endTimeMs) - query.startTimestamp
+      withNoProgress(uiData, {
+        val endTimeMs = uiData.lastProgress.timestamp
+        parseProgressTimestamp(endTimeMs) - uiData.summary.startTimestamp
       }, 0)
     }
 
-    val avgInput = (query.recentProgress.map(p => withNumberInvalid(p.inputRowsPerSecond)).sum /
-      query.recentProgress.length)
+    val avgInput = (uiData.recentProgress.map(p => withNumberInvalid(p.inputRowsPerSecond)).sum /
+      uiData.recentProgress.length)
 
-    val avgProcess = (query.recentProgress.map(p =>
-      withNumberInvalid(p.processedRowsPerSecond)).sum / query.recentProgress.length)
+    val avgProcess = (uiData.recentProgress.map(p =>
+      withNumberInvalid(p.processedRowsPerSecond)).sum / uiData.recentProgress.length)
 
-    StructuredStreamingRow(duration, avgInput, avgProcess, query)
+    StructuredStreamingRow(duration, avgInput, avgProcess, uiData)
   }
 
   private def ordering(sortColumn: String, desc: Boolean): Ordering[StructuredStreamingRow] = {
     val ordering: Ordering[StructuredStreamingRow] = sortColumn match {
-      case "Name" => Ordering.by(q => UIUtils.getQueryName(q.streamingUIData))
-      case "Status" => Ordering.by(q => UIUtils.getQueryStatus(q.streamingUIData))
-      case "ID" => Ordering.by(_.streamingUIData.id)
-      case "Run ID" => Ordering.by(_.streamingUIData.runId)
-      case "Start Time" => Ordering.by(_.streamingUIData.startTimestamp)
+      case "Name" => Ordering.by(row => UIUtils.getQueryName(row.streamingUIData))
+      case "Status" => Ordering.by(row => UIUtils.getQueryStatus(row.streamingUIData))
+      case "ID" => Ordering.by(_.streamingUIData.summary.id)
+      case "Run ID" => Ordering.by(_.streamingUIData.summary.runId)
+      case "Start Time" => Ordering.by(_.streamingUIData.summary.startTimestamp)
       case "Duration" => Ordering.by(_.duration)
       case "Avg Input /sec" => Ordering.by(_.avgInput)
       case "Avg Process /sec" => Ordering.by(_.avgProcess)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
index 24709ba470cde..97691d9d7e827 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatisticsPage.scala
@@ -58,8 +58,8 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab)
     val parameterId = request.getParameter("id")
     require(parameterId != null && parameterId.nonEmpty, "Missing id parameter")
 
-    val query = parent.statusListener.allQueryStatus.find { case q =>
-      q.runId.equals(UUID.fromString(parameterId))
+    val query = parent.store.allQueryUIData.find { uiData =>
+      uiData.summary.runId.equals(UUID.fromString(parameterId))
     }.getOrElse(throw new IllegalArgumentException(s"Failed to find streaming query $parameterId"))
 
     val resources = generateLoadResources(request)
@@ -109,34 +109,35 @@ private[ui] class StreamingQueryStatisticsPage(parent: StreamingQueryTab)
     <script>{Unparsed(js)}</script>
   }
 
-  def generateBasicInfo(query: StreamingQueryUIData): Seq[Node] = {
-    val duration = if (query.isActive) {
-      SparkUIUtils.formatDurationVerbose(System.currentTimeMillis() - query.startTimestamp)
+  def generateBasicInfo(uiData: StreamingQueryUIData): Seq[Node] = {
+    val duration = if (uiData.summary.isActive) {
+      val durationMs = System.currentTimeMillis() - uiData.summary.startTimestamp
+      SparkUIUtils.formatDurationVerbose(durationMs)
     } else {
-      withNoProgress(query, {
-        val end = query.lastProgress.timestamp
-        val start = query.recentProgress.head.timestamp
+      withNoProgress(uiData, {
+        val end = uiData.lastProgress.timestamp
+        val start = uiData.recentProgress.head.timestamp
         SparkUIUtils.formatDurationVerbose(
           parseProgressTimestamp(end) - parseProgressTimestamp(start))
       }, "-")
     }
 
-    val name = UIUtils.getQueryName(query)
-    val numBatches = withNoProgress(query, { query.lastProgress.batchId + 1L }, 0)
+    val name = UIUtils.getQueryName(uiData)
+    val numBatches = withNoProgress(uiData, { uiData.lastProgress.batchId + 1L }, 0)
     <div>Running batches for
       <strong>
         {duration}
       </strong>
       since
       <strong>
-        {SparkUIUtils.formatDate(query.startTimestamp)}
+        {SparkUIUtils.formatDate(uiData.summary.startTimestamp)}
       </strong>
       (<strong>{numBatches}</strong> completed batches)
     </div>
     <br />
     <div><strong>Name: </strong>{name}</div>
-    <div><strong>Id: </strong>{query.id}</div>
-    <div><strong>RunId: </strong>{query.runId}</div>
+    <div><strong>Id: </strong>{uiData.summary.id}</div>
+    <div><strong>RunId: </strong>{uiData.summary.runId}</div>
     <br />
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListener.scala
index e331083b30024..fdd3754344108 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListener.scala
@@ -20,102 +20,144 @@ package org.apache.spark.sql.streaming.ui
 import java.util.UUID
 import java.util.concurrent.ConcurrentHashMap
 
-import scala.collection.JavaConverters._
 import scala.collection.mutable
 
+import com.fasterxml.jackson.annotation.JsonIgnore
+
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.internal.StaticSQLConf
 import org.apache.spark.sql.streaming.{StreamingQueryListener, StreamingQueryProgress}
+import org.apache.spark.sql.streaming.ui.StreamingQueryProgressWrapper._
 import org.apache.spark.sql.streaming.ui.UIUtils.parseProgressTimestamp
+import org.apache.spark.status.{ElementTrackingStore, KVUtils}
+import org.apache.spark.status.KVUtils.KVIndexParam
+import org.apache.spark.util.kvstore.KVIndex
 
 /**
  * A customized StreamingQueryListener used in structured streaming UI, which contains all
  * UI data for both active and inactive query.
- * TODO: Add support for history server.
  */
-private[sql] class StreamingQueryStatusListener(conf: SparkConf) extends StreamingQueryListener {
-
-  /**
-   * We use runId as the key here instead of id in active query status map,
-   * because the runId is unique for every started query, even it its a restart.
-   */
-  private[ui] val activeQueryStatus = new ConcurrentHashMap[UUID, StreamingQueryUIData]()
-  private[ui] val inactiveQueryStatus = new mutable.Queue[StreamingQueryUIData]()
+private[sql] class StreamingQueryStatusListener(
+    conf: SparkConf,
+    store: ElementTrackingStore) extends StreamingQueryListener {
 
   private val streamingProgressRetention =
     conf.get(StaticSQLConf.STREAMING_UI_RETAINED_PROGRESS_UPDATES)
   private val inactiveQueryStatusRetention = conf.get(StaticSQLConf.STREAMING_UI_RETAINED_QUERIES)
 
+  store.addTrigger(classOf[StreamingQueryData], inactiveQueryStatusRetention) { count =>
+    cleanupInactiveQueries(count)
+  }
+
+  // Events from the same query run will never be processed concurrently, so it's safe to
+  // access `progressIds` without any protection.
+  private val queryToProgress = new ConcurrentHashMap[UUID, mutable.Queue[String]]()
+
+  private def cleanupInactiveQueries(count: Long): Unit = {
+    val view = store.view(classOf[StreamingQueryData]).index("active").first(false).last(false)
+    val inactiveQueries = KVUtils.viewToSeq(view, Int.MaxValue)(_ => true)
+    val numInactiveQueries = inactiveQueries.size
+    if (numInactiveQueries <= inactiveQueryStatusRetention) {
+      return
+    }
+    val toDelete = inactiveQueries.sortBy(_.endTimestamp.get)
+      .take(numInactiveQueries - inactiveQueryStatusRetention)
+    val runIds = toDelete.map { e =>
+      store.delete(e.getClass, e.runId)
+      e.runId.toString
+    }
+    // Delete wrappers in one pass, as deleting them for each summary is slow
+    store.removeAllByIndexValues(classOf[StreamingQueryProgressWrapper], "runId", runIds)
+  }
+
   override def onQueryStarted(event: StreamingQueryListener.QueryStartedEvent): Unit = {
     val startTimestamp = parseProgressTimestamp(event.timestamp)
-    activeQueryStatus.putIfAbsent(event.runId,
-      new StreamingQueryUIData(event.name, event.id, event.runId, startTimestamp))
+    store.write(new StreamingQueryData(
+      event.name,
+      event.id,
+      event.runId,
+      isActive = true,
+      None,
+      startTimestamp
+    ), checkTriggers = true)
   }
 
   override def onQueryProgress(event: StreamingQueryListener.QueryProgressEvent): Unit = {
-    val batchTimestamp = parseProgressTimestamp(event.progress.timestamp)
-    val queryStatus = activeQueryStatus.getOrDefault(
-      event.progress.runId,
-      new StreamingQueryUIData(event.progress.name, event.progress.id, event.progress.runId,
-        batchTimestamp))
-    queryStatus.updateProcess(event.progress, streamingProgressRetention)
-  }
-
-  override def onQueryTerminated(
-      event: StreamingQueryListener.QueryTerminatedEvent): Unit = synchronized {
-    val queryStatus = activeQueryStatus.remove(event.runId)
-    if (queryStatus != null) {
-      queryStatus.queryTerminated(event)
-      inactiveQueryStatus += queryStatus
-      while (inactiveQueryStatus.length >= inactiveQueryStatusRetention) {
-        inactiveQueryStatus.dequeue()
-      }
+    val runId = event.progress.runId
+    val batchId = event.progress.batchId
+    val timestamp = event.progress.timestamp
+    if (!queryToProgress.containsKey(runId)) {
+      queryToProgress.put(runId, mutable.Queue.empty[String])
+    }
+    val progressIds = queryToProgress.get(runId)
+    progressIds.enqueue(getUniqueId(runId, batchId, timestamp))
+    store.write(new StreamingQueryProgressWrapper(event.progress))
+    while (progressIds.length > streamingProgressRetention) {
+      val uniqueId = progressIds.dequeue
+      store.delete(classOf[StreamingQueryProgressWrapper], uniqueId)
     }
   }
 
-  def allQueryStatus: Seq[StreamingQueryUIData] = synchronized {
-    activeQueryStatus.values().asScala.toSeq ++ inactiveQueryStatus
+  override def onQueryTerminated(
+      event: StreamingQueryListener.QueryTerminatedEvent): Unit = {
+    val querySummary = store.read(classOf[StreamingQueryData], event.runId)
+    val curTime = System.currentTimeMillis()
+    store.write(new StreamingQueryData(
+      querySummary.name,
+      querySummary.id,
+      querySummary.runId,
+      isActive = false,
+      querySummary.exception,
+      querySummary.startTimestamp,
+      Some(curTime)
+    ), checkTriggers = true)
+    queryToProgress.remove(event.runId)
   }
 }
 
+private[sql] class StreamingQueryData(
+    val name: String,
+    val id: UUID,
+    @KVIndexParam val runId: UUID,
+    @KVIndexParam("active") val isActive: Boolean,
+    val exception: Option[String],
+    @KVIndexParam("startTimestamp") val startTimestamp: Long,
+    val endTimestamp: Option[Long] = None)
+
 /**
  * This class contains all message related to UI display, each instance corresponds to a single
  * [[org.apache.spark.sql.streaming.StreamingQuery]].
  */
-private[ui] class StreamingQueryUIData(
-    val name: String,
-    val id: UUID,
-    val runId: UUID,
-    val startTimestamp: Long) {
-
-  /** Holds the most recent query progress updates. */
-  private val progressBuffer = new mutable.Queue[StreamingQueryProgress]()
-
-  private var _isActive = true
-  private var _exception: Option[String] = None
-
-  def isActive: Boolean = synchronized { _isActive }
-
-  def exception: Option[String] = synchronized { _exception }
-
-  def queryTerminated(event: StreamingQueryListener.QueryTerminatedEvent): Unit = synchronized {
-    _isActive = false
-    _exception = event.exception
-  }
-
-  def updateProcess(
-      newProgress: StreamingQueryProgress, retentionNum: Int): Unit = progressBuffer.synchronized {
-    progressBuffer += newProgress
-    while (progressBuffer.length >= retentionNum) {
-      progressBuffer.dequeue()
+private[sql] case class StreamingQueryUIData(
+    summary: StreamingQueryData,
+    recentProgress: Array[StreamingQueryProgress]) {
+
+  def lastProgress: StreamingQueryProgress = {
+    if (recentProgress.nonEmpty) {
+      recentProgress.last
+    } else {
+      null
     }
   }
+}
 
-  def recentProgress: Array[StreamingQueryProgress] = progressBuffer.synchronized {
-    progressBuffer.toArray
-  }
+private[sql] class StreamingQueryProgressWrapper(val progress: StreamingQueryProgress) {
+  @JsonIgnore @KVIndex
+  private val uniqueId: String = getUniqueId(progress.runId, progress.batchId, progress.timestamp)
 
-  def lastProgress: StreamingQueryProgress = progressBuffer.synchronized {
-    progressBuffer.lastOption.orNull
+  @JsonIgnore @KVIndex("runId")
+  private def runIdIndex: String = progress.runId.toString
+}
+
+private[sql] object StreamingQueryProgressWrapper {
+  /**
+   * Adding `timestamp` into unique id to support reporting `empty` query progress
+   * in which no data comes but with the same batchId.
+   */
+  def getUniqueId(
+      runId: UUID,
+      batchId: Long,
+      timestamp: String): String = {
+    s"${runId}_${batchId}_$timestamp"
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryTab.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryTab.scala
index bb097ffc06912..65cad8f06cc1c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryTab.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryTab.scala
@@ -17,10 +17,11 @@
 package org.apache.spark.sql.streaming.ui
 
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.execution.ui.StreamingQueryStatusStore
 import org.apache.spark.ui.{SparkUI, SparkUITab}
 
 private[sql] class StreamingQueryTab(
-    val statusListener: StreamingQueryStatusListener,
+    val store: StreamingQueryStatusStore,
     sparkUI: SparkUI) extends SparkUITab(sparkUI, "StreamingQuery") with Logging {
 
   override val name = "Structured Streaming"
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/UIUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/UIUtils.scala
index 1f7e65dede170..88a110fa9a329 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/UIUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/UIUtils.scala
@@ -46,19 +46,19 @@ private[ui] object UIUtils {
     }
   }
 
-  def getQueryName(query: StreamingQueryUIData): String = {
-    if (query.name == null || query.name.isEmpty) {
+  def getQueryName(uiData: StreamingQueryUIData): String = {
+    if (uiData.summary.name == null || uiData.summary.name.isEmpty) {
       "<no name>"
     } else {
-      query.name
+      uiData.summary.name
     }
   }
 
-  def getQueryStatus(query: StreamingQueryUIData): String = {
-    if (query.isActive) {
+  def getQueryStatus(uiData: StreamingQueryUIData): String = {
+    if (uiData.summary.isActive) {
       "RUNNING"
     } else {
-      query.exception.map(_ => "FAILED").getOrElse("FINISHED")
+      uiData.summary.exception.map(_ => "FAILED").getOrElse("FINISHED")
     }
   }
 
diff --git a/sql/core/src/test/resources/spark-events/local-1596020211915 b/sql/core/src/test/resources/spark-events/local-1596020211915
new file mode 100644
index 0000000000000..ff34bbc16ef3a
--- /dev/null
+++ b/sql/core/src/test/resources/spark-events/local-1596020211915
@@ -0,0 +1,160 @@
+{"Event":"SparkListenerLogStart","Spark Version":"3.1.0-SNAPSHOT"}
+{"Event":"SparkListenerResourceProfileAdded","Resource Profile Id":0,"Executor Resource Requests":{"cores":{"Resource Name":"cores","Amount":1,"Discovery Script":"","Vendor":""},"memory":{"Resource Name":"memory","Amount":1024,"Discovery Script":"","Vendor":""}},"Task Resource Requests":{"cpus":{"Resource Name":"cpus","Amount":1.0}}}
+{"Event":"SparkListenerExecutorAdded","Timestamp":1596020212090,"Executor ID":"driver","Executor Info":{"Host":"iZbp19vpr16ix621sdw476Z","Total Cores":4,"Log Urls":{},"Attributes":{},"Resources":{},"Resource Profile Id":0}}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Port":39845},"Maximum Memory":384093388,"Timestamp":1596020212109,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":0}
+{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64/jre","Java Version":"1.8.0_252 (Oracle Corporation)","Scala Version":"version 2.12.10"},"Spark Properties":{"spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.driver.port":"46309","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","spark.app.name":"StructuredKafkaWordCount","spark.scheduler.mode":"FIFO","spark.submit.pyFiles":"","spark.executor.id":"driver","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"},"Hadoop Properties":{"yarn.resourcemanager.amlauncher.thread-count":"50","yarn.sharedcache.enabled":"false","fs.s3a.connection.maximum":"15","fs.s3a.impl":"org.apache.hadoop.fs.s3a.S3AFileSystem","yarn.app.mapreduce.am.scheduler.heartbeat.interval-ms":"1000","hadoop.security.kms.client.timeout":"60","hadoop.http.authentication.kerberos.principal":"HTTP/_HOST@LOCALHOST","mapreduce.framework.name":"local","yarn.sharedcache.uploader.server.thread-count":"50","yarn.nodemanager.linux-container-executor.nonsecure-mode.user-pattern":"^[_.A-Za-z0-9][-@_.A-Za-z0-9]{0,255}?[$]?$","tfile.fs.output.buffer.size":"262144","yarn.app.mapreduce.am.job.task.listener.thread-count":"30","hadoop.security.groups.cache.background.reload.threads":"3","yarn.resourcemanager.webapp.cross-origin.enabled":"false","fs.AbstractFileSystem.ftp.impl":"org.apache.hadoop.fs.ftp.FtpFs","fs.s3.block.size":"67108864","hadoop.registry.secure":"false","hadoop.shell.safely.delete.limit.num.files":"100","dfs.bytes-per-checksum":"512","fs.s3.buffer.dir":"${hadoop.tmp.dir}/s3","mapreduce.job.acl-view-job":" ","mapreduce.jobhistory.loadedjobs.cache.size":"5","mapreduce.input.fileinputformat.split.minsize":"0","yarn.resourcemanager.container.liveness-monitor.interval-ms":"600000","yarn.resourcemanager.client.thread-count":"50","io.seqfile.compress.blocksize":"1000000","yarn.sharedcache.checksum.algo.impl":"org.apache.hadoop.yarn.sharedcache.ChecksumSHA256Impl","yarn.nodemanager.amrmproxy.interceptor-class.pipeline":"org.apache.hadoop.yarn.server.nodemanager.amrmproxy.DefaultRequestInterceptor","yarn.timeline-service.entity-group-fs-store.leveldb-cache-read-cache-size":"10485760","mapreduce.reduce.shuffle.fetch.retry.interval-ms":"1000","mapreduce.task.profile.maps":"0-2","yarn.scheduler.include-port-in-node-name":"false","yarn.nodemanager.admin-env":"MALLOC_ARENA_MAX=$MALLOC_ARENA_MAX","yarn.resourcemanager.node-removal-untracked.timeout-ms":"60000","mapreduce.am.max-attempts":"2","hadoop.security.kms.client.failover.sleep.base.millis":"100","mapreduce.jobhistory.webapp.https.address":"0.0.0.0:19890","yarn.node-labels.fs-store.impl.class":"org.apache.hadoop.yarn.nodelabels.FileSystemNodeLabelsStore","fs.trash.checkpoint.interval":"0","mapreduce.job.map.output.collector.class":"org.apache.hadoop.mapred.MapTask$MapOutputBuffer","yarn.resourcemanager.node-ip-cache.expiry-interval-secs":"-1","hadoop.http.authentication.signature.secret.file":"*********(redacted)","hadoop.jetty.logs.serve.aliases":"true","yarn.timeline-service.handler-thread-count":"10","yarn.resourcemanager.max-completed-applications":"10000","yarn.resourcemanager.system-metrics-publisher.enabled":"false","yarn.sharedcache.webapp.address":"0.0.0.0:8788","yarn.resourcemanager.delegation.token.renew-interval":"*********(redacted)","yarn.sharedcache.nm.uploader.replication.factor":"10","hadoop.security.groups.negative-cache.secs":"30","yarn.app.mapreduce.task.container.log.backups":"0","mapreduce.reduce.skip.proc-count.auto-incr":"true","hadoop.security.group.mapping.ldap.posix.attr.gid.name":"gidNumber","ipc.client.fallback-to-simple-auth-allowed":"false","yarn.client.failover-proxy-provider":"org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider","yarn.timeline-service.http-authentication.simple.anonymous.allowed":"true","ha.health-monitor.check-interval.ms":"1000","yarn.acl.reservation-enable":"false","yarn.resourcemanager.store.class":"org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore","yarn.app.mapreduce.am.hard-kill-timeout-ms":"10000","yarn.nodemanager.container-metrics.enable":"true","yarn.timeline-service.client.fd-clean-interval-secs":"60","yarn.nodemanager.docker-container-executor.exec-name":"/usr/bin/docker","yarn.resourcemanager.nodemanagers.heartbeat-interval-ms":"1000","mapred.child.java.opts":"-Xmx200m","hadoop.common.configuration.version":"0.23.0","yarn.nodemanager.remote-app-log-dir-suffix":"logs","yarn.nodemanager.windows-container.cpu-limit.enabled":"false","yarn.nodemanager.runtime.linux.docker.privileged-containers.allowed":"false","file.blocksize":"67108864","hadoop.registry.zk.retry.ceiling.ms":"60000","yarn.sharedcache.store.in-memory.initial-delay-mins":"10","mapreduce.jobhistory.principal":"jhs/_HOST@REALM.TLD","mapreduce.map.skip.proc-count.auto-incr":"true","mapreduce.task.profile.reduces":"0-2","yarn.timeline-service.webapp.https.address":"${yarn.timeline-service.hostname}:8190","yarn.resourcemanager.scheduler.address":"${yarn.resourcemanager.hostname}:8030","yarn.node-labels.enabled":"false","yarn.resourcemanager.webapp.ui-actions.enabled":"true","mapreduce.task.timeout":"600000","yarn.sharedcache.client-server.thread-count":"50","hadoop.security.crypto.cipher.suite":"AES/CTR/NoPadding","yarn.resourcemanager.connect.max-wait.ms":"900000","fs.defaultFS":"file:///","yarn.minicluster.use-rpc":"false","fs.har.impl.disable.cache":"true","io.compression.codec.bzip2.library":"system-native","mapreduce.shuffle.connection-keep-alive.timeout":"5","yarn.resourcemanager.webapp.https.address":"${yarn.resourcemanager.hostname}:8090","mapreduce.jobhistory.address":"0.0.0.0:10020","yarn.resourcemanager.nm-tokens.master-key-rolling-interval-secs":"*********(redacted)","yarn.is.minicluster":"false","yarn.nodemanager.address":"${yarn.nodemanager.hostname}:0","fs.AbstractFileSystem.s3a.impl":"org.apache.hadoop.fs.s3a.S3A","mapreduce.task.combine.progress.records":"10000","yarn.resourcemanager.am.max-attempts":"2","yarn.nodemanager.linux-container-executor.cgroups.hierarchy":"/hadoop-yarn","ipc.server.log.slow.rpc":"false","yarn.resourcemanager.node-labels.provider.fetch-interval-ms":"1800000","yarn.nodemanager.webapp.cross-origin.enabled":"false","yarn.app.mapreduce.am.job.committer.cancel-timeout":"60000","ftp.bytes-per-checksum":"512","yarn.nodemanager.resource.memory-mb":"-1","fs.s3a.fast.upload.active.blocks":"4","mapreduce.jobhistory.joblist.cache.size":"20000","fs.ftp.host":"0.0.0.0","yarn.resourcemanager.fs.state-store.num-retries":"0","yarn.resourcemanager.nodemanager-connect-retries":"10","hadoop.security.kms.client.encrypted.key.cache.low-watermark":"0.3f","yarn.timeline-service.client.max-retries":"30","dfs.ha.fencing.ssh.connect-timeout":"30000","yarn.log-aggregation-enable":"false","mapreduce.reduce.markreset.buffer.percent":"0.0","fs.AbstractFileSystem.viewfs.impl":"org.apache.hadoop.fs.viewfs.ViewFs","mapreduce.task.io.sort.factor":"10","yarn.nodemanager.amrmproxy.client.thread-count":"25","ha.failover-controller.new-active.rpc-timeout.ms":"60000","yarn.nodemanager.container-localizer.java.opts":"-Xmx256m","mapreduce.jobhistory.datestring.cache.size":"200000","mapreduce.job.acl-modify-job":" ","yarn.nodemanager.windows-container.memory-limit.enabled":"false","yarn.timeline-service.webapp.address":"${yarn.timeline-service.hostname}:8188","yarn.app.mapreduce.am.job.committer.commit-window":"10000","yarn.nodemanager.container-manager.thread-count":"20","yarn.minicluster.fixed.ports":"false","yarn.cluster.max-application-priority":"0","yarn.timeline-service.ttl-enable":"true","mapreduce.jobhistory.recovery.store.fs.uri":"${hadoop.tmp.dir}/mapred/history/recoverystore","hadoop.caller.context.signature.max.size":"40","ha.zookeeper.session-timeout.ms":"10000","tfile.io.chunk.size":"1048576","mapreduce.job.speculative.slowtaskthreshold":"1.0","io.serializations":"org.apache.hadoop.io.serializer.WritableSerialization, org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization, org.apache.hadoop.io.serializer.avro.AvroReflectSerialization","hadoop.security.kms.client.failover.sleep.max.millis":"2000","hadoop.security.group.mapping.ldap.directory.search.timeout":"10000","fs.swift.impl":"org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem","yarn.nodemanager.local-cache.max-files-per-directory":"8192","hadoop.http.cross-origin.enabled":"false","mapreduce.map.sort.spill.percent":"0.80","yarn.timeline-service.entity-group-fs-store.scan-interval-seconds":"60","yarn.timeline-service.client.best-effort":"false","yarn.resourcemanager.webapp.delegation-token-auth-filter.enabled":"*********(redacted)","hadoop.security.group.mapping.ldap.posix.attr.uid.name":"uidNumber","fs.AbstractFileSystem.swebhdfs.impl":"org.apache.hadoop.fs.SWebHdfs","mapreduce.ifile.readahead":"true","yarn.timeline-service.leveldb-timeline-store.ttl-interval-ms":"300000","hadoop.security.kms.client.encrypted.key.cache.num.refill.threads":"2","yarn.resourcemanager.scheduler.class":"org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler","yarn.app.mapreduce.am.command-opts":"-Xmx1024m","mapreduce.cluster.local.dir":"${hadoop.tmp.dir}/mapred/local","io.mapfile.bloom.error.rate":"0.005","yarn.nodemanager.runtime.linux.allowed-runtimes":"default","yarn.sharedcache.store.class":"org.apache.hadoop.yarn.server.sharedcachemanager.store.InMemorySCMStore","ha.failover-controller.graceful-fence.rpc-timeout.ms":"5000","ftp.replication":"3","hadoop.security.uid.cache.secs":"14400","mapreduce.job.maxtaskfailures.per.tracker":"3","io.skip.checksum.errors":"false","yarn.app.mapreduce.client-am.ipc.max-retries-on-timeouts":"3","fs.s3a.connection.timeout":"200000","mapreduce.job.max.split.locations":"10","hadoop.registry.zk.session.timeout.ms":"60000","mapreduce.jvm.system-properties-to-log":"os.name,os.version,java.home,java.runtime.version,java.vendor,java.version,java.vm.name,java.class.path,java.io.tmpdir,user.dir,user.name","yarn.timeline-service.entity-group-fs-store.active-dir":"/tmp/entity-file-history/active","mapreduce.shuffle.transfer.buffer.size":"131072","yarn.timeline-service.client.retry-interval-ms":"1000","yarn.http.policy":"HTTP_ONLY","fs.s3a.socket.send.buffer":"8192","yarn.sharedcache.uploader.server.address":"0.0.0.0:8046","hadoop.http.authentication.token.validity":"*********(redacted)","mapreduce.shuffle.max.connections":"0","yarn.minicluster.yarn.nodemanager.resource.memory-mb":"4096","mapreduce.job.emit-timeline-data":"false","yarn.nodemanager.resource.system-reserved-memory-mb":"-1","hadoop.kerberos.min.seconds.before.relogin":"60","mapreduce.jobhistory.move.thread-count":"3","yarn.resourcemanager.admin.client.thread-count":"1","yarn.dispatcher.drain-events.timeout":"300000","fs.s3a.buffer.dir":"${hadoop.tmp.dir}/s3a","hadoop.ssl.enabled.protocols":"TLSv1,SSLv2Hello,TLSv1.1,TLSv1.2","mapreduce.jobhistory.admin.address":"0.0.0.0:10033","yarn.log-aggregation-status.time-out.ms":"600000","mapreduce.shuffle.port":"13562","yarn.resourcemanager.max-log-aggregation-diagnostics-in-memory":"10","yarn.nodemanager.health-checker.interval-ms":"600000","ftp.blocksize":"67108864","yarn.nodemanager.log-container-debug-info.enabled":"false","yarn.client.max-cached-nodemanagers-proxies":"0","yarn.nodemanager.linux-container-executor.cgroups.delete-delay-ms":"20","yarn.nodemanager.delete.debug-delay-sec":"0","yarn.nodemanager.pmem-check-enabled":"true","yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage":"90.0","mapreduce.app-submission.cross-platform":"false","yarn.resourcemanager.work-preserving-recovery.scheduling-wait-ms":"10000","hadoop.security.groups.cache.secs":"300","yarn.resourcemanager.zk-retry-interval-ms":"1000","ipc.maximum.data.length":"67108864","mapreduce.shuffle.max.threads":"0","hadoop.security.authorization":"false","mapreduce.job.complete.cancel.delegation.tokens":"*********(redacted)","fs.s3a.paging.maximum":"5000","nfs.exports.allowed.hosts":"* rw","mapreduce.jobhistory.http.policy":"HTTP_ONLY","yarn.sharedcache.store.in-memory.check-period-mins":"720","s3native.replication":"3","hadoop.security.group.mapping.ldap.ssl":"false","yarn.client.application-client-protocol.poll-interval-ms":"200","ha.zookeeper.parent-znode":"/hadoop-ha","yarn.nodemanager.log-aggregation.policy.class":"org.apache.hadoop.yarn.server.nodemanager.containermanager.logaggregation.AllContainerLogAggregationPolicy","mapreduce.reduce.shuffle.merge.percent":"0.66","hadoop.security.group.mapping.ldap.search.filter.group":"(objectClass=group)","yarn.nodemanager.resourcemanager.minimum.version":"NONE","mapreduce.job.speculative.speculative-cap-running-tasks":"0.1","yarn.admin.acl":"*","yarn.nodemanager.recovery.supervised":"false","yarn.sharedcache.admin.thread-count":"1","yarn.resourcemanager.ha.automatic-failover.enabled":"true","mapreduce.reduce.skip.maxgroups":"0","mapreduce.reduce.shuffle.connect.timeout":"180000","yarn.resourcemanager.address":"${yarn.resourcemanager.hostname}:8032","ipc.client.ping":"true","mapreduce.task.local-fs.write-limit.bytes":"-1","fs.adl.oauth2.access.token.provider.type":"*********(redacted)","mapreduce.shuffle.ssl.file.buffer.size":"65536","yarn.resourcemanager.ha.automatic-failover.embedded":"true","hadoop.ssl.enabled":"false","fs.s3a.multipart.purge":"false","mapreduce.job.end-notification.max.attempts":"5","mapreduce.output.fileoutputformat.compress.codec":"org.apache.hadoop.io.compress.DefaultCodec","yarn.nodemanager.container-monitor.procfs-tree.smaps-based-rss.enabled":"false","ha.health-monitor.connect-retry-interval.ms":"1000","yarn.nodemanager.keytab":"/etc/krb5.keytab","mapreduce.jobhistory.keytab":"/etc/security/keytab/jhs.service.keytab","fs.s3a.threads.max":"10","mapreduce.reduce.shuffle.input.buffer.percent":"0.70","mapreduce.cluster.temp.dir":"${hadoop.tmp.dir}/mapred/temp","s3.replication":"3","yarn.nodemanager.node-labels.resync-interval-ms":"120000","hadoop.tmp.dir":"/tmp/hadoop-${user.name}","mapreduce.job.maps":"2","mapreduce.job.end-notification.max.retry.interval":"5000","yarn.log-aggregation.retain-check-interval-seconds":"-1","yarn.resourcemanager.resource-tracker.client.thread-count":"50","yarn.timeline-service.leveldb-timeline-store.start-time-read-cache-size":"10000","yarn.resourcemanager.ha.automatic-failover.zk-base-path":"/yarn-leader-election","io.seqfile.local.dir":"${hadoop.tmp.dir}/io/local","mapreduce.client.submit.file.replication":"10","mapreduce.jobhistory.minicluster.fixed.ports":"false","fs.s3a.multipart.threshold":"2147483647","mapreduce.jobhistory.done-dir":"${yarn.app.mapreduce.am.staging-dir}/history/done","yarn.resourcemanager.zk-acl":"world:anyone:rwcda","ipc.client.idlethreshold":"4000","yarn.nodemanager.linux-container-executor.cgroups.strict-resource-usage":"false","mapreduce.reduce.input.buffer.percent":"0.0","yarn.nodemanager.amrmproxy.enable":"false","fs.ftp.host.port":"21","ipc.ping.interval":"60000","yarn.resourcemanager.history-writer.multi-threaded-dispatcher.pool-size":"10","yarn.resourcemanager.admin.address":"${yarn.resourcemanager.hostname}:8033","file.client-write-packet-size":"65536","ipc.client.kill.max":"10","mapreduce.reduce.speculative":"true","mapreduce.local.clientfactory.class.name":"org.apache.hadoop.mapred.LocalClientFactory","mapreduce.job.reducer.unconditional-preempt.delay.sec":"300","yarn.nodemanager.disk-health-checker.interval-ms":"120000","yarn.nodemanager.log.deletion-threads-count":"4","ipc.client.connection.maxidletime":"10000","mapreduce.task.io.sort.mb":"100","yarn.nodemanager.localizer.client.thread-count":"5","yarn.sharedcache.admin.address":"0.0.0.0:8047","yarn.nodemanager.localizer.cache.cleanup.interval-ms":"600000","hadoop.security.crypto.codec.classes.aes.ctr.nopadding":"org.apache.hadoop.crypto.OpensslAesCtrCryptoCodec, org.apache.hadoop.crypto.JceAesCtrCryptoCodec","fs.s3a.connection.ssl.enabled":"true","yarn.nodemanager.process-kill-wait.ms":"2000","mapreduce.job.hdfs-servers":"${fs.defaultFS}","hadoop.workaround.non.threadsafe.getpwuid":"true","fs.df.interval":"60000","fs.s3.sleepTimeSeconds":"10","fs.s3a.multiobjectdelete.enable":"true","yarn.sharedcache.cleaner.resource-sleep-ms":"0","yarn.nodemanager.disk-health-checker.min-healthy-disks":"0.25","hadoop.shell.missing.defaultFs.warning":"false","io.file.buffer.size":"65536","hadoop.security.group.mapping.ldap.search.attr.member":"member","hadoop.security.random.device.file.path":"/dev/urandom","hadoop.security.sensitive-config-keys":"*********(redacted)","hadoop.rpc.socket.factory.class.default":"org.apache.hadoop.net.StandardSocketFactory","yarn.intermediate-data-encryption.enable":"false","yarn.resourcemanager.connect.retry-interval.ms":"30000","yarn.scheduler.minimum-allocation-mb":"1024","yarn.app.mapreduce.am.staging-dir":"/tmp/hadoop-yarn/staging","mapreduce.reduce.shuffle.read.timeout":"180000","hadoop.http.cross-origin.max-age":"1800","fs.s3a.connection.establish.timeout":"5000","mapreduce.job.running.map.limit":"0","yarn.minicluster.control-resource-monitoring":"false","hadoop.ssl.require.client.cert":"false","hadoop.kerberos.kinit.command":"kinit","mapreduce.reduce.log.level":"INFO","hadoop.security.dns.log-slow-lookups.threshold.ms":"1000","mapreduce.job.ubertask.enable":"false","hadoop.caller.context.enabled":"false","yarn.nodemanager.vmem-pmem-ratio":"2.1","hadoop.rpc.protection":"authentication","ha.health-monitor.rpc-timeout.ms":"45000","s3native.stream-buffer-size":"4096","yarn.nodemanager.remote-app-log-dir":"/tmp/logs","yarn.nodemanager.resource.pcores-vcores-multiplier":"1.0","yarn.app.mapreduce.am.containerlauncher.threadpool-initial-size":"10","fs.s3n.multipart.uploads.enabled":"false","hadoop.security.crypto.buffer.size":"8192","yarn.nodemanager.node-labels.provider.fetch-interval-ms":"600000","mapreduce.jobhistory.recovery.store.leveldb.path":"${hadoop.tmp.dir}/mapred/history/recoverystore","yarn.client.failover-retries-on-socket-timeouts":"0","hadoop.security.instrumentation.requires.admin":"false","yarn.nodemanager.delete.thread-count":"4","mapreduce.job.finish-when-all-reducers-done":"false","hadoop.registry.jaas.context":"Client","yarn.timeline-service.leveldb-timeline-store.path":"${hadoop.tmp.dir}/yarn/timeline","s3.blocksize":"67108864","io.map.index.interval":"128","mapreduce.job.counters.max":"120","yarn.timeline-service.store-class":"org.apache.hadoop.yarn.server.timeline.LeveldbTimelineStore","mapreduce.jobhistory.move.interval-ms":"180000","yarn.nodemanager.localizer.fetch.thread-count":"4","yarn.resourcemanager.scheduler.client.thread-count":"50","hadoop.ssl.hostname.verifier":"DEFAULT","yarn.timeline-service.leveldb-state-store.path":"${hadoop.tmp.dir}/yarn/timeline","mapreduce.job.classloader":"false","mapreduce.task.profile.map.params":"${mapreduce.task.profile.params}","ipc.client.connect.timeout":"20000","s3.stream-buffer-size":"4096","yarn.nm.liveness-monitor.expiry-interval-ms":"600000","yarn.resourcemanager.reservation-system.planfollower.time-step":"1000","s3native.bytes-per-checksum":"512","mapreduce.jobtracker.address":"local","yarn.nodemanager.recovery.enabled":"false","mapreduce.job.end-notification.retry.interval":"1000","fs.du.interval":"600000","hadoop.security.group.mapping.ldap.read.timeout.ms":"60000","hadoop.security.groups.cache.warn.after.ms":"5000","file.bytes-per-checksum":"512","yarn.node-labels.fs-store.retry-policy-spec":"2000, 500","hadoop.security.groups.cache.background.reload":"false","net.topology.script.number.args":"100","mapreduce.task.merge.progress.records":"10000","yarn.nodemanager.localizer.address":"${yarn.nodemanager.hostname}:8040","yarn.timeline-service.keytab":"/etc/krb5.keytab","mapreduce.reduce.shuffle.fetch.retry.timeout-ms":"30000","yarn.resourcemanager.rm.container-allocation.expiry-interval-ms":"600000","mapreduce.fileoutputcommitter.algorithm.version":"1","yarn.resourcemanager.work-preserving-recovery.enabled":"true","mapreduce.map.skip.maxrecords":"0","yarn.sharedcache.root-dir":"/sharedcache","hadoop.http.authentication.type":"simple","mapreduce.task.userlog.limit.kb":"0","yarn.resourcemanager.scheduler.monitor.enable":"false","fs.s3n.block.size":"67108864","ipc.client.connect.max.retries":"10","hadoop.registry.zk.retry.times":"5","mapreduce.jobtracker.staging.root.dir":"${hadoop.tmp.dir}/mapred/staging","yarn.nodemanager.resource-monitor.interval-ms":"3000","mapreduce.shuffle.listen.queue.size":"128","mapreduce.map.cpu.vcores":"1","yarn.timeline-service.client.fd-retain-secs":"300","hadoop.user.group.static.mapping.overrides":"dr.who=;","mapreduce.jobhistory.recovery.store.class":"org.apache.hadoop.mapreduce.v2.hs.HistoryServerFileSystemStateStoreService","yarn.resourcemanager.fail-fast":"${yarn.fail-fast}","yarn.resourcemanager.proxy-user-privileges.enabled":"false","mapreduce.job.reducer.preempt.delay.sec":"0","hadoop.util.hash.type":"murmur","yarn.app.mapreduce.client.job.max-retries":"0","mapreduce.reduce.shuffle.retry-delay.max.ms":"60000","hadoop.security.group.mapping.ldap.connection.timeout.ms":"60000","mapreduce.task.profile.params":"-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y,verbose=n,file=%s","yarn.app.mapreduce.shuffle.log.backups":"0","hadoop.registry.zk.retry.interval.ms":"1000","yarn.nodemanager.linux-container-executor.cgroups.delete-timeout-ms":"1000","fs.AbstractFileSystem.file.impl":"org.apache.hadoop.fs.local.LocalFs","yarn.nodemanager.log-aggregation.roll-monitoring-interval-seconds":"-1","mapreduce.jobhistory.cleaner.interval-ms":"86400000","hadoop.registry.zk.quorum":"localhost:2181","mapreduce.output.fileoutputformat.compress":"false","yarn.resourcemanager.am-rm-tokens.master-key-rolling-interval-secs":"*********(redacted)","hadoop.ssl.server.conf":"ssl-server.xml","yarn.sharedcache.cleaner.initial-delay-mins":"10","mapreduce.client.completion.pollinterval":"5000","hadoop.ssl.keystores.factory.class":"org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory","yarn.app.mapreduce.am.resource.cpu-vcores":"1","yarn.timeline-service.enabled":"false","yarn.nodemanager.runtime.linux.docker.capabilities":"CHOWN,DAC_OVERRIDE,FSETID,FOWNER,MKNOD,NET_RAW,SETGID,SETUID,SETFCAP,SETPCAP,NET_BIND_SERVICE,SYS_CHROOT,KILL,AUDIT_WRITE","yarn.acl.enable":"false","yarn.timeline-service.entity-group-fs-store.done-dir":"/tmp/entity-file-history/done/","mapreduce.task.profile":"false","yarn.resourcemanager.fs.state-store.uri":"${hadoop.tmp.dir}/yarn/system/rmstore","yarn.nodemanager.linux-container-executor.nonsecure-mode.local-user":"nobody","yarn.resourcemanager.configuration.provider-class":"org.apache.hadoop.yarn.LocalConfigurationProvider","yarn.resourcemanager.configuration.file-system-based-store":"/yarn/conf","yarn.nodemanager.resource.percentage-physical-cpu-limit":"100","mapreduce.jobhistory.client.thread-count":"10","tfile.fs.input.buffer.size":"262144","mapreduce.client.progressmonitor.pollinterval":"1000","yarn.nodemanager.log-dirs":"${yarn.log.dir}/userlogs","fs.automatic.close":"true","fs.s3n.multipart.copy.block.size":"5368709120","yarn.nodemanager.hostname":"0.0.0.0","yarn.resourcemanager.zk-timeout-ms":"10000","ftp.stream-buffer-size":"4096","yarn.fail-fast":"false","hadoop.security.group.mapping.ldap.search.filter.user":"(&(objectClass=user)(sAMAccountName={0}))","yarn.timeline-service.address":"${yarn.timeline-service.hostname}:10200","mapreduce.job.ubertask.maxmaps":"9","fs.s3a.threads.keepalivetime":"60","mapreduce.task.files.preserve.failedtasks":"false","yarn.app.mapreduce.client.job.retry-interval":"2000","ha.failover-controller.graceful-fence.connection.retries":"1","yarn.resourcemanager.delegation.token.max-lifetime":"*********(redacted)","yarn.timeline-service.entity-group-fs-store.summary-store":"org.apache.hadoop.yarn.server.timeline.LeveldbTimelineStore","mapreduce.reduce.cpu.vcores":"1","fs.client.resolve.remote.symlinks":"true","yarn.nodemanager.webapp.https.address":"0.0.0.0:8044","hadoop.http.cross-origin.allowed-origins":"*","yarn.timeline-service.entity-group-fs-store.retain-seconds":"604800","yarn.resourcemanager.metrics.runtime.buckets":"60,300,1440","yarn.timeline-service.generic-application-history.max-applications":"10000","yarn.nodemanager.local-dirs":"${hadoop.tmp.dir}/nm-local-dir","mapreduce.shuffle.connection-keep-alive.enable":"false","yarn.node-labels.configuration-type":"centralized","fs.s3a.path.style.access":"false","yarn.nodemanager.aux-services.mapreduce_shuffle.class":"org.apache.hadoop.mapred.ShuffleHandler","yarn.sharedcache.store.in-memory.staleness-period-mins":"10080","fs.adl.impl":"org.apache.hadoop.fs.adl.AdlFileSystem","yarn.resourcemanager.nodemanager.minimum.version":"NONE","net.topology.impl":"org.apache.hadoop.net.NetworkTopology","io.map.index.skip":"0","yarn.scheduler.maximum-allocation-vcores":"4","hadoop.http.cross-origin.allowed-headers":"X-Requested-With,Content-Type,Accept,Origin","yarn.nodemanager.log-aggregation.compression-type":"none","yarn.timeline-service.version":"1.0f","yarn.ipc.rpc.class":"org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC","mapreduce.reduce.maxattempts":"4","hadoop.security.dns.log-slow-lookups.enabled":"false","mapreduce.job.committer.setup.cleanup.needed":"true","mapreduce.job.running.reduce.limit":"0","ipc.maximum.response.length":"134217728","mapreduce.job.token.tracking.ids.enabled":"*********(redacted)","hadoop.caller.context.max.size":"128","hadoop.registry.system.acls":"sasl:yarn@, sasl:mapred@, sasl:hdfs@","yarn.nodemanager.recovery.dir":"${hadoop.tmp.dir}/yarn-nm-recovery","fs.s3a.fast.upload.buffer":"disk","mapreduce.jobhistory.intermediate-done-dir":"${yarn.app.mapreduce.am.staging-dir}/history/done_intermediate","yarn.app.mapreduce.shuffle.log.separate":"true","fs.s3a.max.total.tasks":"5","fs.s3a.readahead.range":"64K","hadoop.http.authentication.simple.anonymous.allowed":"true","fs.s3a.fast.upload":"false","fs.s3a.attempts.maximum":"20","hadoop.registry.zk.connection.timeout.ms":"15000","yarn.resourcemanager.delegation-token-renewer.thread-count":"*********(redacted)","yarn.nodemanager.health-checker.script.timeout-ms":"1200000","yarn.timeline-service.leveldb-timeline-store.start-time-write-cache-size":"10000","mapreduce.map.log.level":"INFO","mapreduce.output.fileoutputformat.compress.type":"RECORD","yarn.resourcemanager.leveldb-state-store.path":"${hadoop.tmp.dir}/yarn/system/rmstore","hadoop.registry.rm.enabled":"false","mapreduce.ifile.readahead.bytes":"4194304","yarn.resourcemanager.fs.state-store.retry-policy-spec":"2000, 500","yarn.sharedcache.app-checker.class":"org.apache.hadoop.yarn.server.sharedcachemanager.RemoteAppChecker","yarn.nodemanager.linux-container-executor.nonsecure-mode.limit-users":"true","yarn.nodemanager.resource.detect-hardware-capabilities":"false","mapreduce.cluster.acls.enabled":"false","mapreduce.job.speculative.retry-after-no-speculate":"1000","yarn.resourcemanager.fs.state-store.retry-interval-ms":"1000","file.stream-buffer-size":"4096","mapreduce.map.output.compress.codec":"org.apache.hadoop.io.compress.DefaultCodec","mapreduce.map.speculative":"true","mapreduce.job.speculative.retry-after-speculate":"15000","yarn.nodemanager.linux-container-executor.cgroups.mount":"false","yarn.app.mapreduce.am.container.log.backups":"0","yarn.app.mapreduce.am.log.level":"INFO","mapreduce.job.reduce.slowstart.completedmaps":"0.05","yarn.timeline-service.http-authentication.type":"simple","hadoop.security.group.mapping.ldap.search.attr.group.name":"cn","yarn.timeline-service.client.internal-timers-ttl-secs":"420","fs.s3a.block.size":"32M","yarn.sharedcache.client-server.address":"0.0.0.0:8045","yarn.resourcemanager.hostname":"0.0.0.0","yarn.resourcemanager.delegation.key.update-interval":"86400000","mapreduce.reduce.shuffle.fetch.retry.enabled":"${yarn.nodemanager.recovery.enabled}","mapreduce.map.memory.mb":"1024","mapreduce.task.skip.start.attempts":"2","fs.AbstractFileSystem.hdfs.impl":"org.apache.hadoop.fs.Hdfs","yarn.nodemanager.disk-health-checker.enable":"true","ipc.client.tcpnodelay":"true","ipc.client.rpc-timeout.ms":"0","fs.s3.maxRetries":"4","ipc.client.low-latency":"false","mapreduce.input.lineinputformat.linespermap":"1","ipc.client.connect.max.retries.on.timeouts":"45","yarn.timeline-service.leveldb-timeline-store.read-cache-size":"104857600","fs.AbstractFileSystem.har.impl":"org.apache.hadoop.fs.HarFs","mapreduce.job.split.metainfo.maxsize":"10000000","yarn.am.liveness-monitor.expiry-interval-ms":"600000","yarn.resourcemanager.container-tokens.master-key-rolling-interval-secs":"*********(redacted)","yarn.timeline-service.entity-group-fs-store.app-cache-size":"10","fs.s3a.socket.recv.buffer":"8192","fs.s3n.multipart.uploads.block.size":"67108864","yarn.resourcemanager.resource-tracker.address":"${yarn.resourcemanager.hostname}:8031","yarn.nodemanager.node-labels.provider.fetch-timeout-ms":"1200000","yarn.resourcemanager.leveldb-state-store.compaction-interval-secs":"3600","mapreduce.client.output.filter":"FAILED","hadoop.http.filter.initializers":"org.apache.hadoop.http.lib.StaticUserWebFilter","mapreduce.reduce.memory.mb":"1024","s3native.client-write-packet-size":"65536","yarn.timeline-service.hostname":"0.0.0.0","file.replication":"1","yarn.nodemanager.container-metrics.unregister-delay-ms":"10000","yarn.nodemanager.container-metrics.period-ms":"-1","yarn.nodemanager.log.retain-seconds":"10800","yarn.timeline-service.entity-group-fs-store.cleaner-interval-seconds":"3600","yarn.resourcemanager.keytab":"/etc/krb5.keytab","hadoop.security.group.mapping.providers.combined":"true","mapreduce.reduce.merge.inmem.threshold":"1000","yarn.timeline-service.recovery.enabled":"false","yarn.sharedcache.nm.uploader.thread-count":"20","mapreduce.shuffle.ssl.enabled":"false","yarn.resourcemanager.state-store.max-completed-applications":"${yarn.resourcemanager.max-completed-applications}","mapreduce.job.speculative.minimum-allowed-tasks":"10","yarn.log-aggregation.retain-seconds":"-1","yarn.nodemanager.disk-health-checker.min-free-space-per-disk-mb":"0","mapreduce.jobhistory.max-age-ms":"604800000","hadoop.http.cross-origin.allowed-methods":"GET,POST,HEAD","mapreduce.jobhistory.webapp.address":"0.0.0.0:19888","mapreduce.jobtracker.system.dir":"${hadoop.tmp.dir}/mapred/system","yarn.client.nodemanager-connect.max-wait-ms":"180000","yarn.resourcemanager.webapp.address":"${yarn.resourcemanager.hostname}:8088","mapreduce.jobhistory.recovery.enable":"false","mapreduce.reduce.shuffle.parallelcopies":"5","fs.AbstractFileSystem.webhdfs.impl":"org.apache.hadoop.fs.WebHdfs","fs.trash.interval":"0","yarn.app.mapreduce.client.max-retries":"3","hadoop.security.authentication":"simple","mapreduce.task.profile.reduce.params":"${mapreduce.task.profile.params}","yarn.app.mapreduce.am.resource.mb":"1536","mapreduce.input.fileinputformat.list-status.num-threads":"1","yarn.nodemanager.container-executor.class":"org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor","io.mapfile.bloom.size":"1048576","yarn.timeline-service.ttl-ms":"604800000","yarn.nodemanager.resource.cpu-vcores":"-1","mapreduce.job.reduces":"1","fs.s3a.multipart.size":"100M","yarn.scheduler.minimum-allocation-vcores":"1","mapreduce.job.speculative.speculative-cap-total-tasks":"0.01","hadoop.ssl.client.conf":"ssl-client.xml","mapreduce.job.queuename":"default","ha.health-monitor.sleep-after-disconnect.ms":"1000","s3.bytes-per-checksum":"512","yarn.app.mapreduce.shuffle.log.limit.kb":"0","hadoop.security.group.mapping":"org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback","yarn.client.application-client-protocol.poll-timeout-ms":"-1","mapreduce.jobhistory.jhist.format":"json","yarn.resourcemanager.ha.enabled":"false","hadoop.http.staticuser.user":"dr.who","mapreduce.task.exit.timeout.check-interval-ms":"20000","mapreduce.task.exit.timeout":"60000","yarn.nodemanager.linux-container-executor.resources-handler.class":"org.apache.hadoop.yarn.server.nodemanager.util.DefaultLCEResourcesHandler","mapreduce.reduce.shuffle.memory.limit.percent":"0.25","yarn.resourcemanager.reservation-system.enable":"false","s3.client-write-packet-size":"65536","mapreduce.map.output.compress":"false","ha.zookeeper.acl":"world:anyone:rwcda","ipc.server.max.connections":"0","yarn.scheduler.maximum-allocation-mb":"8192","yarn.resourcemanager.scheduler.monitor.policies":"org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy","yarn.sharedcache.cleaner.period-mins":"1440","yarn.app.mapreduce.am.container.log.limit.kb":"0","s3native.blocksize":"67108864","ipc.client.connect.retry.interval":"1000","yarn.resourcemanager.zk-state-store.parent-path":"/rmstore","mapreduce.jobhistory.cleaner.enable":"true","yarn.timeline-service.client.fd-flush-interval-secs":"10","hadoop.security.kms.client.encrypted.key.cache.expiry":"43200000","yarn.client.nodemanager-client-async.thread-pool-max-size":"500","mapreduce.map.maxattempts":"4","yarn.nodemanager.sleep-delay-before-sigkill.ms":"250","mapreduce.job.end-notification.retry.attempts":"0","yarn.nodemanager.resource.count-logical-processors-as-cores":"false","yarn.resourcemanager.zk-num-retries":"1000","hadoop.registry.zk.root":"/registry","adl.feature.ownerandgroup.enableupn":"false","mapreduce.job.reduce.shuffle.consumer.plugin.class":"org.apache.hadoop.mapreduce.task.reduce.Shuffle","yarn.resourcemanager.delayed.delegation-token.removal-interval-ms":"*********(redacted)","yarn.nodemanager.localizer.cache.target-size-mb":"10240","ftp.client-write-packet-size":"65536","fs.AbstractFileSystem.adl.impl":"org.apache.hadoop.fs.adl.Adl","yarn.client.failover-retries":"0","fs.s3a.multipart.purge.age":"86400","io.native.lib.available":"true","net.topology.node.switch.mapping.impl":"org.apache.hadoop.net.ScriptBasedMapping","yarn.nodemanager.amrmproxy.address":"0.0.0.0:8048","ipc.server.listen.queue.size":"128","map.sort.class":"org.apache.hadoop.util.QuickSort","fs.viewfs.rename.strategy":"SAME_MOUNTPOINT","hadoop.security.kms.client.authentication.retry-count":"1","fs.permissions.umask-mode":"022","yarn.nodemanager.vmem-check-enabled":"true","yarn.nodemanager.recovery.compaction-interval-secs":"3600","yarn.app.mapreduce.client-am.ipc.max-retries":"3","mapreduce.job.ubertask.maxreduces":"1","hadoop.security.kms.client.encrypted.key.cache.size":"500","hadoop.security.java.secure.random.algorithm":"SHA1PRNG","ha.failover-controller.cli-check.rpc-timeout.ms":"20000","mapreduce.jobhistory.jobname.limit":"50","yarn.client.nodemanager-connect.retry-interval-ms":"10000","yarn.timeline-service.state-store-class":"org.apache.hadoop.yarn.server.timeline.recovery.LeveldbTimelineStateStore","yarn.nodemanager.env-whitelist":"JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME","yarn.sharedcache.nested-level":"3","yarn.nodemanager.webapp.address":"${yarn.nodemanager.hostname}:8042","rpc.metrics.quantile.enable":"false","mapreduce.jobhistory.admin.acl":"*","yarn.resourcemanager.system-metrics-publisher.dispatcher.pool-size":"10","hadoop.http.authentication.kerberos.keytab":"${user.home}/hadoop.keytab","yarn.resourcemanager.recovery.enabled":"false"},"System Properties":{"java.io.tmpdir":"/tmp","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.8","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.8","user.home":"/root","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","sun.arch.data.model":"64","sun.boot.library.path":"/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64/jre/lib/amd64","user.dir":"/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8","java.library.path":"/usr/java/packages/lib/amd64:/usr/lib64:/lib64:/lib:/usr/lib","sun.cpu.isalist":"","os.arch":"amd64","java.vm.version":"25.252-b09","jetty.git.hash":"ab228fde9e55e9164c738d7fa121f8ac5acd51c9","java.endorsed.dirs":"/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64/jre/lib/endorsed","java.runtime.version":"1.8.0_252-b09","java.vm.info":"mixed mode","java.ext.dirs":"/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64/jre/lib/ext:/usr/java/packages/lib/ext","java.runtime.name":"OpenJDK Runtime Environment","file.separator":"/","java.class.version":"52.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64/jre/lib/resources.jar:/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64/jre/lib/rt.jar:/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64/jre/lib/sunrsasign.jar:/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64/jre/lib/jsse.jar:/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64/jre/lib/jce.jar:/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64/jre/lib/charsets.jar:/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64/jre/lib/jfr.jar:/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64/jre/classes","file.encoding":"UTF-8","user.timezone":"Asia/Shanghai","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"3.10.0-1127.10.1.el7.x86_64","sun.os.patch.level":"unknown","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","user.language":"en","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.print.PSPrinterJob","java.awt.graphicsenv":"sun.awt.X11GraphicsEnvironment","awt.toolkit":"sun.awt.X11.XToolkit","os.name":"Linux","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"root","java.vm.name":"OpenJDK 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --master local[*] --conf spark.eventLog.dir=/tmp/spark-history --conf spark.eventLog.enabled=true --conf spark.sql.shuffle.partitions=2 --class org.apache.spark.examples.sql.streaming.StructuredKafkaWordCount ./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar 192.168.130.97:9092 subscribe test5","java.home":"/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.252.b09-2.el7_8.x86_64/jre","java.version":"1.8.0_252","sun.io.unicode.encoding":"UnicodeLittle"},"Classpath Entries":{"/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/metrics-graphite-4.1.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/nimbus-jose-jwt-4.41.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-vector-code-gen-2.3.7.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jackson-jaxrs-1.9.13.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jersey-server-2.30.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/pyrolite-4.30.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/conf/":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/json-smart-2.3.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/objenesis-2.5.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-auth-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jsp-api-2.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-unsafe_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-codec-1.10.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/protobuf-java-2.5.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/avro-1.8.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/guice-3.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/aopalliance-repackaged-2.6.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/transaction-api-1.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spire_2.12-0.17.0-M1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/gson-2.2.4.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/datanucleus-rdbms-4.1.19.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jackson-module-paranamer-2.10.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/libfb303-0.9.3.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-cli-1.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-tags_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/scala-library-2.12.10.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/xbean-asm7-shaded-4.15.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jersey-container-servlet-2.30.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hk2-api-2.6.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jakarta.xml.bind-api-2.3.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/okhttp-2.4.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/derby-10.12.1.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jackson-core-asl-1.9.13.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-collections-3.2.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/httpcore-4.4.12.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-beanutils-1.9.4.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spire-util_2.12-0.17.0-M1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-crypto-1.0.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-launcher_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/stax-api-1.0-2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/json4s-ast_2.12-3.6.6.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/lz4-java-1.7.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/scala-parser-combinators_2.12-1.1.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/parquet-format-2.4.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/parquet-column-1.10.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-logging-1.1.3.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/audience-annotations-0.5.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-jdbc-2.3.7.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-hive-thriftserver_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-cli-2.3.7.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/javolution-5.5.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/JLargeArrays-1.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-yarn-api-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/algebra_2.12-2.0.0-M2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-dbcp-1.4.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jakarta.ws.rs-api-2.1.6.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/threeten-extra-1.5.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-io-2.4.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/metrics-json-4.1.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/libthrift-0.12.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/compress-lzf-1.0.3.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/metrics-jmx-4.1.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jakarta.inject-2.6.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/stax-api-1.0.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-shims-common-2.3.7.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/curator-recipes-2.7.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/antlr4-runtime-4.7.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/slf4j-api-1.7.30.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/oro-2.0.8.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/arrow-memory-0.15.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jpam-1.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/velocity-1.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/orc-core-1.5.10.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-sql_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jackson-databind-2.10.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-text-1.6.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jersey-client-2.30.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/htrace-core4-4.0.1-incubating.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/json-1.8.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-graphx_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/avro-ipc-1.8.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/macro-compat_2.12-1.1.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jetty-util-6.1.26.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/core-1.1.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-shims-2.3.7.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/joda-time-2.10.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/parquet-encoding-1.10.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-llap-common-2.3.7.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-network-common_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/datanucleus-api-jdo-4.2.4.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/paranamer-2.8.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-shims-0.23-2.3.7.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/activation-1.1.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/curator-framework-2.7.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-compress-1.8.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-mapreduce-client-common-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/RoaringBitmap-0.7.45.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/ivy-2.4.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jackson-core-2.10.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-yarn-client-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-httpclient-3.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-yarn_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/parquet-common-1.10.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/zstd-jni-1.4.5-2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jersey-container-servlet-core-2.30.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/snappy-java-1.1.7.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/shapeless_2.12-2.3.3.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-pool-1.5.4.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/json4s-core_2.12-3.6.6.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/httpclient-4.5.6.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/api-util-1.0.0-M20.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/aircompressor-0.10.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-repl_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-yarn-common-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/leveldbjni-all-1.8.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jersey-hk2-2.30.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jta-1.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jetty-sslengine-6.1.26.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-net-3.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/datanucleus-core-4.1.17.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-yarn-server-web-proxy-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/breeze_2.12-1.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/orc-mapreduce-1.5.10.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jetty-6.1.26.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-core_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/xz-1.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/javax.inject-1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/scala-compiler-2.12.10.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/metrics-jvm-4.1.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/orc-shims-1.5.10.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jaxb-api-2.2.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jakarta.validation-api-2.0.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spire-macros_2.12-0.17.0-M1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/janino-3.1.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/osgi-resource-locator-1.0.3.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jcl-over-slf4j-1.7.30.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-mapreduce-client-app-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hk2-utils-2.6.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-sketch_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/JTransforms-3.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/kafka-clients-2.4.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/guice-servlet-3.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/okio-1.4.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-annotations-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-math3-3.4.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/json4s-scalap_2.12-3.6.6.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/bonecp-0.8.0.RELEASE.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-streaming_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/accessors-smart-1.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/guava-14.0.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/shims-0.7.45.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/ST4-4.0.4.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jackson-module-scala_2.12-2.10.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/scala-xml_2.12-1.2.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/chill-java-0.9.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-mapreduce-client-shuffle-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/cats-kernel_2.12-2.0.0-M4.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/stream-2.9.6.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-configuration-1.6.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jodd-core-3.5.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/scala-collection-compat_2.12-2.1.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-pool2-2.6.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jul-to-slf4j-1.7.30.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/xmlenc-0.52.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/flatbuffers-java-1.9.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-token-provider-kafka-0-10_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/json4s-jackson_2.12-3.6.6.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-compiler-3.1.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jline-2.14.6.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/breeze-macros_2.12-1.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/kryo-shaded-4.0.2.jar":"System Classpath","spark://iZbp19vpr16ix621sdw476Z:46309/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar":"Added By User","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-common-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-hive_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jersey-common-2.30.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/istack-commons-runtime-3.0.8.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/curator-client-2.7.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jackson-xc-1.9.13.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/zookeeper-3.4.14.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/parquet-hadoop-1.10.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jakarta.annotation-api-1.3.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-shims-scheduler-2.3.7.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/univocity-parsers-2.8.3.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-digester-1.8.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-mllib_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/arpack_combined_all-0.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-sql-kafka-0-10_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jackson-annotations-2.10.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hk2-locator-2.6.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-mapreduce-client-core-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/avro-mapred-1.8.2-hadoop2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-yarn-server-common-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/scala-reflect-2.12.10.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/super-csv-2.2.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-mapreduce-client-jobclient-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-client-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-common-2.3.7.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/apacheds-kerberos-codec-2.0.0-M15.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-exec-2.3.7-core.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/opencsv-2.3.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/api-asn1-api-1.0.0-M20.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-storage-api-2.7.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spire-platform_2.12-0.17.0-M1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/aopalliance-1.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/HikariCP-2.5.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-metastore-2.3.7.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/minlog-1.3.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/arrow-format-0.15.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jsr305-3.0.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-lang-2.6.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/commons-lang3-3.9.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/javax.jdo-3.2.0-m3.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/apacheds-i18n-2.0.0-M15.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/javassist-3.25.0-GA.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jackson-mapper-asl-1.9.13.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/antlr-runtime-3.5.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/log4j-1.2.17.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-beeline-2.3.7.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/chill_2.12-0.9.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jdo-api-3.0.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-kvstore_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/metrics-core-4.1.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jaxb-runtime-2.3.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-mllib-local_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/py4j-0.10.9.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/javax.servlet-api-3.1.0.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hive-serde-2.3.7.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/hadoop-hdfs-client-2.8.5.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-network-shuffle_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jcip-annotations-1.0-1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/jersey-media-jaxb-2.30.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/parquet-jackson-1.10.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/slf4j-log4j12-1.7.30.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/netty-all-4.1.47.Final.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/arrow-vector-0.15.1.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/spark-catalyst_2.12-3.1.0-SNAPSHOT.jar":"System Classpath","/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/jars/machinist_2.12-0.6.8.jar":"System Classpath"}}
+{"Event":"SparkListenerApplicationStart","App Name":"StructuredKafkaWordCount","App ID":"local-1596020211915","Timestamp":1596020210919,"User":"root"}
+{"Event":"org.apache.spark.sql.streaming.StreamingQueryListener$QueryStartedEvent","id":"8d268dc2-bc9c-4be8-97a9-b135d2943028","runId":"e225d92f-2545-48f8-87a2-9c0309580f8a","name":null,"timestamp":"2020-07-29T10:56:55.947Z"}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":0,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 0","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n   +- StateStoreSave (12)\n      +- * HashAggregate (11)\n         +- StateStoreRestore (10)\n            +- Exchange (9)\n               +- * HashAggregate (8)\n                  +- * HashAggregate (7)\n                     +- * SerializeFromObject (6)\n                        +- MapPartitions (5)\n                           +- DeserializeToObject (4)\n                              +- * Project (3)\n                                 +- * Project (2)\n                                    +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48276}}, {\"test5\":{\"0\":48279}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#142]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = f7faa1e9-69d9-41b4-9d77-919795af2413, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = f7faa1e9-69d9-41b4-9d77-919795af2413, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@27fafcca\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@27fafcca","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 0, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 0, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#66]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":80,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":79,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":76,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":77,"metricType":"timing"},{"name":"peak memory","accumulatorId":75,"metricType":"size"},{"name":"number of output rows","accumulatorId":74,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":78,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":71,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":72,"metricType":"timing"},{"name":"peak memory","accumulatorId":70,"metricType":"size"},{"name":"number of output rows","accumulatorId":69,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":73,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":68,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":20,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":21,"metricType":"nsTiming"},{"name":"records read","accumulatorId":18,"metricType":"sum"},{"name":"local bytes read","accumulatorId":16,"metricType":"size"},{"name":"fetch wait time","accumulatorId":17,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":14,"metricType":"size"},{"name":"local blocks read","accumulatorId":13,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":12,"metricType":"sum"},{"name":"data size","accumulatorId":11,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":15,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":19,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":67,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":64,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":65,"metricType":"timing"},{"name":"peak memory","accumulatorId":63,"metricType":"size"},{"name":"number of output rows","accumulatorId":62,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":66,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":61,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":51,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":52,"metricType":"sum"},{"name":"memory used by state","accumulatorId":57,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":59,"metricType":"sum"},{"name":"number of output rows","accumulatorId":50,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":58,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":60,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":56,"metricType":"timing"},{"name":"time to remove","accumulatorId":55,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":53,"metricType":"sum"},{"name":"time to update","accumulatorId":54,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":47,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":48,"metricType":"timing"},{"name":"peak memory","accumulatorId":46,"metricType":"size"},{"name":"number of output rows","accumulatorId":45,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":49,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":44,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020220179}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":1,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 0","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n   +- StateStoreSave (12)\n      +- * HashAggregate (11)\n         +- StateStoreRestore (10)\n            +- Exchange (9)\n               +- * HashAggregate (8)\n                  +- * HashAggregate (7)\n                     +- * SerializeFromObject (6)\n                        +- MapPartitions (5)\n                           +- DeserializeToObject (4)\n                              +- * Project (3)\n                                 +- * Project (2)\n                                    +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48276}}, {\"test5\":{\"0\":48279}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#218]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = 64a4779b-846a-4f20-9f5c-899a8dbf68d8, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = 64a4779b-846a-4f20-9f5c-899a8dbf68d8, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@27fafcca\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@27fafcca","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 0, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 0, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#66]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":80,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":79,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":76,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":77,"metricType":"timing"},{"name":"peak memory","accumulatorId":75,"metricType":"size"},{"name":"number of output rows","accumulatorId":74,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":78,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":71,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":72,"metricType":"timing"},{"name":"peak memory","accumulatorId":70,"metricType":"size"},{"name":"number of output rows","accumulatorId":69,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":73,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":68,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":20,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":21,"metricType":"nsTiming"},{"name":"records read","accumulatorId":18,"metricType":"sum"},{"name":"local bytes read","accumulatorId":16,"metricType":"size"},{"name":"fetch wait time","accumulatorId":17,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":14,"metricType":"size"},{"name":"local blocks read","accumulatorId":13,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":12,"metricType":"sum"},{"name":"data size","accumulatorId":11,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":15,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":19,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":67,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":64,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":65,"metricType":"timing"},{"name":"peak memory","accumulatorId":63,"metricType":"size"},{"name":"number of output rows","accumulatorId":62,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":66,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":61,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":51,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":52,"metricType":"sum"},{"name":"memory used by state","accumulatorId":57,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":59,"metricType":"sum"},{"name":"number of output rows","accumulatorId":50,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":58,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":60,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":56,"metricType":"timing"},{"name":"time to remove","accumulatorId":55,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":53,"metricType":"sum"},{"name":"time to update","accumulatorId":54,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":47,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":48,"metricType":"timing"},{"name":"peak memory","accumulatorId":46,"metricType":"size"},{"name":"number of output rows","accumulatorId":45,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":49,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":44,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020220258}
+{"Event":"SparkListenerJobStart","Job ID":0,"Submission Time":1596020221633,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":6,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"8\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"15\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"20\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"16\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"9\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[4],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":4,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"14\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[3],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"DataSourceRDD","Scope":"{\"id\":\"20\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0},{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":11,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"0\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[10],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":7,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"8\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":9,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"4\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":10,"Name":"StateStoreRDD","Scope":"{\"id\":\"3\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[9],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":8,"Name":"StateStoreRDD","Scope":"{\"id\":\"7\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[0],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0}],"Stage IDs":[0,1],"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 0","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"0","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"1","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":6,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"8\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"15\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"20\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"16\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"9\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[4],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":4,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"14\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[3],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"DataSourceRDD","Scope":"{\"id\":\"20\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020221656,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 0","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"0","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"1","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}}
+{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1596020221738,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1596020221738,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020222649,"Failed":false,"Killed":false,"Accumulables":[{"ID":21,"Name":"shuffle write time","Update":"9599308","Value":"9599308","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"shuffle records written","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":19,"Name":"shuffle bytes written","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":11,"Name":"data size","Update":"128","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":68,"Name":"duration","Update":"296","Value":"296","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":69,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":70,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":72,"Name":"time in aggregation build","Update":"200","Value":"200","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":75,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":77,"Name":"time in aggregation build","Update":"190","Value":"190","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":79,"Name":"duration","Update":"336","Value":"336","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":80,"Name":"number of output rows","Update":"3","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":125,"Name":"internal.metrics.input.recordsRead","Update":3,"Value":3,"Internal":true,"Count Failed Values":true},{"ID":123,"Name":"internal.metrics.shuffle.write.writeTime","Update":9599308,"Value":9599308,"Internal":true,"Count Failed Values":true},{"ID":122,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":121,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":112,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":524288,"Internal":true,"Count Failed Values":true},{"ID":109,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":108,"Name":"internal.metrics.jvmGCTime","Update":17,"Value":17,"Internal":true,"Count Failed Values":true},{"ID":107,"Name":"internal.metrics.resultSize","Update":2630,"Value":2630,"Internal":true,"Count Failed Values":true},{"ID":106,"Name":"internal.metrics.executorCpuTime","Update":466139164,"Value":466139164,"Internal":true,"Count Failed Values":true},{"ID":105,"Name":"internal.metrics.executorRunTime","Update":503,"Value":503,"Internal":true,"Count Failed Values":true},{"ID":104,"Name":"internal.metrics.executorDeserializeCpuTime","Update":301869581,"Value":301869581,"Internal":true,"Count Failed Values":true},{"ID":103,"Name":"internal.metrics.executorDeserializeTime","Update":361,"Value":361,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":361,"Executor Deserialize CPU Time":301869581,"Executor Run Time":503,"Executor CPU Time":466139164,"Peak Execution Memory":524288,"Result Size":2630,"JVM GC Time":17,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":168,"Shuffle Write Time":9599308,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":3},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":6,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"8\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"15\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"20\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"16\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"9\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[4],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":4,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"14\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[3],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"DataSourceRDD","Scope":"{\"id\":\"20\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020221656,"Completion Time":1596020222661,"Accumulables":[{"ID":104,"Name":"internal.metrics.executorDeserializeCpuTime","Value":301869581,"Internal":true,"Count Failed Values":true},{"ID":122,"Name":"internal.metrics.shuffle.write.recordsWritten","Value":1,"Internal":true,"Count Failed Values":true},{"ID":77,"Name":"time in aggregation build","Value":"190","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":68,"Name":"duration","Value":"296","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":80,"Name":"number of output rows","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":125,"Name":"internal.metrics.input.recordsRead","Value":3,"Internal":true,"Count Failed Values":true},{"ID":107,"Name":"internal.metrics.resultSize","Value":2630,"Internal":true,"Count Failed Values":true},{"ID":74,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":11,"Name":"data size","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"shuffle records written","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":106,"Name":"internal.metrics.executorCpuTime","Value":466139164,"Internal":true,"Count Failed Values":true},{"ID":109,"Name":"internal.metrics.resultSerializationTime","Value":1,"Internal":true,"Count Failed Values":true},{"ID":121,"Name":"internal.metrics.shuffle.write.bytesWritten","Value":168,"Internal":true,"Count Failed Values":true},{"ID":112,"Name":"internal.metrics.peakExecutionMemory","Value":524288,"Internal":true,"Count Failed Values":true},{"ID":103,"Name":"internal.metrics.executorDeserializeTime","Value":361,"Internal":true,"Count Failed Values":true},{"ID":79,"Name":"duration","Value":"336","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":70,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":19,"Name":"shuffle bytes written","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":123,"Name":"internal.metrics.shuffle.write.writeTime","Value":9599308,"Internal":true,"Count Failed Values":true},{"ID":105,"Name":"internal.metrics.executorRunTime","Value":503,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":72,"Name":"time in aggregation build","Value":"200","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":108,"Name":"internal.metrics.jvmGCTime","Value":17,"Internal":true,"Count Failed Values":true},{"ID":21,"Name":"shuffle write time","Value":"9599308","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":75,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"}],"Resource Profile Id":0}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":11,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"0\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[10],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":7,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"8\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":9,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"4\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":10,"Name":"StateStoreRDD","Scope":"{\"id\":\"3\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[9],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":8,"Name":"StateStoreRDD","Scope":"{\"id\":\"7\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[0],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020222688,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 0","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"0","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"1","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1596020222709,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":0,"Attempt":0,"Launch Time":1596020222713,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":0,"Attempt":0,"Launch Time":1596020222713,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020222954,"Failed":false,"Killed":false,"Accumulables":[{"ID":44,"Name":"duration","Update":"19","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":46,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":48,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":54,"Name":"time to update","Update":"14","Value":"14","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":55,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":56,"Name":"time to commit changes","Update":"50","Value":"50","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":58,"Name":"estimated size of state only on current version","Update":"64","Value":"64","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":57,"Name":"memory used by state","Update":"208","Value":"208","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":61,"Name":"duration","Update":"14","Value":"14","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":63,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":65,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":145,"Name":"internal.metrics.shuffle.read.recordsRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":144,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":143,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":142,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":141,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":140,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":139,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":137,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":524288,"Internal":true,"Count Failed Values":true},{"ID":134,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":132,"Name":"internal.metrics.resultSize","Update":5354,"Value":5354,"Internal":true,"Count Failed Values":true},{"ID":131,"Name":"internal.metrics.executorCpuTime","Update":93367533,"Value":93367533,"Internal":true,"Count Failed Values":true},{"ID":130,"Name":"internal.metrics.executorRunTime","Update":203,"Value":203,"Internal":true,"Count Failed Values":true},{"ID":129,"Name":"internal.metrics.executorDeserializeCpuTime","Update":10308753,"Value":10308753,"Internal":true,"Count Failed Values":true},{"ID":128,"Name":"internal.metrics.executorDeserializeTime","Update":23,"Value":23,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":23,"Executor Deserialize CPU Time":10308753,"Executor Run Time":203,"Executor CPU Time":93367533,"Peak Execution Memory":524288,"Result Size":5354,"JVM GC Time":0,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1596020222709,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020222965,"Failed":false,"Killed":false,"Accumulables":[{"ID":44,"Name":"duration","Update":"33","Value":"52","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"avg hash probe bucket list iters","Update":"10","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":45,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":46,"Name":"peak memory","Update":"4456448","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":48,"Name":"time in aggregation build","Update":"19","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":54,"Name":"time to update","Update":"28","Value":"42","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":53,"Name":"number of updated state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":55,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":56,"Name":"time to commit changes","Update":"31","Value":"81","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":58,"Name":"estimated size of state only on current version","Update":"424","Value":"488","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":50,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":57,"Name":"memory used by state","Update":"568","Value":"776","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":52,"Name":"number of total state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":61,"Name":"duration","Update":"28","Value":"42","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":62,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":63,"Name":"peak memory","Update":"262144","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":65,"Name":"time in aggregation build","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":67,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":13,"Name":"local blocks read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":17,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":16,"Name":"local bytes read","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":18,"Name":"records read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":145,"Name":"internal.metrics.shuffle.read.recordsRead","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":144,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":143,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":142,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":141,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":140,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":139,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":137,"Name":"internal.metrics.peakExecutionMemory","Update":4718592,"Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":132,"Name":"internal.metrics.resultSize","Update":5574,"Value":10928,"Internal":true,"Count Failed Values":true},{"ID":131,"Name":"internal.metrics.executorCpuTime","Update":91355172,"Value":184722705,"Internal":true,"Count Failed Values":true},{"ID":130,"Name":"internal.metrics.executorRunTime","Update":205,"Value":408,"Internal":true,"Count Failed Values":true},{"ID":129,"Name":"internal.metrics.executorDeserializeCpuTime","Update":21029530,"Value":31338283,"Internal":true,"Count Failed Values":true},{"ID":128,"Name":"internal.metrics.executorDeserializeTime","Update":34,"Value":57,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":34,"Executor Deserialize CPU Time":21029530,"Executor Run Time":205,"Executor CPU Time":91355172,"Peak Execution Memory":4718592,"Result Size":5574,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":1,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":168,"Total Records Read":1},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":11,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"0\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[10],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":7,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"8\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":9,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"4\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":10,"Name":"StateStoreRDD","Scope":"{\"id\":\"3\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[9],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":8,"Name":"StateStoreRDD","Scope":"{\"id\":\"7\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[0],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020222688,"Completion Time":1596020222967,"Accumulables":[{"ID":137,"Name":"internal.metrics.peakExecutionMemory","Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":128,"Name":"internal.metrics.executorDeserializeTime","Value":57,"Internal":true,"Count Failed Values":true},{"ID":131,"Name":"internal.metrics.executorCpuTime","Value":184722705,"Internal":true,"Count Failed Values":true},{"ID":50,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":140,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Value":1,"Internal":true,"Count Failed Values":true},{"ID":53,"Name":"number of updated state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":62,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":17,"Name":"fetch wait time","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":134,"Name":"internal.metrics.resultSerializationTime","Value":1,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"duration","Value":"52","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":56,"Name":"time to commit changes","Value":"81","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":65,"Name":"time in aggregation build","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":142,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Value":0,"Internal":true,"Count Failed Values":true},{"ID":46,"Name":"peak memory","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":145,"Name":"internal.metrics.shuffle.read.recordsRead","Value":1,"Internal":true,"Count Failed Values":true},{"ID":55,"Name":"time to remove","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"avg hash probe bucket list iters","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":67,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":139,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Value":0,"Internal":true,"Count Failed Values":true},{"ID":58,"Name":"estimated size of state only on current version","Value":"488","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":13,"Name":"local blocks read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":130,"Name":"internal.metrics.executorRunTime","Value":408,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"local bytes read","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":52,"Name":"number of total state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":61,"Name":"duration","Value":"42","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":129,"Name":"internal.metrics.executorDeserializeCpuTime","Value":31338283,"Internal":true,"Count Failed Values":true},{"ID":132,"Name":"internal.metrics.resultSize","Value":10928,"Internal":true,"Count Failed Values":true},{"ID":141,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Value":0,"Internal":true,"Count Failed Values":true},{"ID":45,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":63,"Name":"peak memory","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":54,"Name":"time to update","Value":"42","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":144,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Value":0,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"records read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":57,"Name":"memory used by state","Value":"776","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":48,"Name":"time in aggregation build","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":143,"Name":"internal.metrics.shuffle.read.localBytesRead","Value":168,"Internal":true,"Count Failed Values":true}],"Resource Profile Id":0}}
+{"Event":"SparkListenerJobEnd","Job ID":0,"Completion Time":1596020222973,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":2,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 0","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nLocalTableScan (1)\n\n\n(1) LocalTableScan\nOutput [2]: [value#46, count#47]\nArguments: [value#46, count#47]\n\n","sparkPlanInfo":{"nodeName":"LocalTableScan","simpleString":"LocalTableScan [value#46, count#47]","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":153,"metricType":"sum"}]},"time":1596020223028}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":2,"time":1596020223062}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":1,"time":1596020223069}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":0,"time":1596020223069}
+{"Event":"org.apache.spark.sql.streaming.StreamingQueryListener$QueryProgressEvent","progress":{"id":"8d268dc2-bc9c-4be8-97a9-b135d2943028","runId":"e225d92f-2545-48f8-87a2-9c0309580f8a","name":null,"timestamp":"2020-07-29T10:56:56.015Z","batchId":0,"batchDuration":7110,"durationMs":{"triggerExecution":7109,"queryPlanning":439,"getBatch":21,"latestOffset":3524,"addBatch":3011,"walCommit":35},"eventTime":{},"stateOperators":[{"numRowsTotal":1,"numRowsUpdated":1,"memoryUsedBytes":776,"numLateInputs":0,"customMetrics":{"stateOnCurrentVersionSizeBytes":488,"loadedMapCacheHitCount":0,"loadedMapCacheMissCount":0}}],"sources":[{"description":"KafkaV2[Subscribe[test5]]","startOffset":null,"endOffset":"{\"test5\":{\"0\":48279}}","numInputRows":3,"inputRowsPerSecond":"NaN","processedRowsPerSecond":0.42194092827004215}],"sink":{"description":"org.apache.spark.sql.execution.streaming.ConsoleTable$@514ba885","numOutputRows":1},"observedMetrics":{}}}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":3,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 1","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n   +- StateStoreSave (12)\n      +- * HashAggregate (11)\n         +- StateStoreRestore (10)\n            +- Exchange (9)\n               +- * HashAggregate (8)\n                  +- * HashAggregate (7)\n                     +- * SerializeFromObject (6)\n                        +- MapPartitions (5)\n                           +- DeserializeToObject (4)\n                              +- * Project (3)\n                                 +- * Project (2)\n                                    +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48279}}, {\"test5\":{\"0\":48642}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#373]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = 1fb6b6c6-ced8-4f85-80af-1f3f4c424457, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = 1fb6b6c6-ced8-4f85-80af-1f3f4c424457, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@3a1eb73c\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@3a1eb73c","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 1, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 1, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#297]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":237,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":236,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":233,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":234,"metricType":"timing"},{"name":"peak memory","accumulatorId":232,"metricType":"size"},{"name":"number of output rows","accumulatorId":231,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":235,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":228,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":229,"metricType":"timing"},{"name":"peak memory","accumulatorId":227,"metricType":"size"},{"name":"number of output rows","accumulatorId":226,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":230,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":225,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":177,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":178,"metricType":"nsTiming"},{"name":"records read","accumulatorId":175,"metricType":"sum"},{"name":"local bytes read","accumulatorId":173,"metricType":"size"},{"name":"fetch wait time","accumulatorId":174,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":171,"metricType":"size"},{"name":"local blocks read","accumulatorId":170,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":169,"metricType":"sum"},{"name":"data size","accumulatorId":168,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":172,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":176,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":224,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":221,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":222,"metricType":"timing"},{"name":"peak memory","accumulatorId":220,"metricType":"size"},{"name":"number of output rows","accumulatorId":219,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":223,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":218,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":208,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":209,"metricType":"sum"},{"name":"memory used by state","accumulatorId":214,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":216,"metricType":"sum"},{"name":"number of output rows","accumulatorId":207,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":215,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":217,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":213,"metricType":"timing"},{"name":"time to remove","accumulatorId":212,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":210,"metricType":"sum"},{"name":"time to update","accumulatorId":211,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":204,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":205,"metricType":"timing"},{"name":"peak memory","accumulatorId":203,"metricType":"size"},{"name":"number of output rows","accumulatorId":202,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":206,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":201,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020223333}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":4,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 1","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n   +- StateStoreSave (12)\n      +- * HashAggregate (11)\n         +- StateStoreRestore (10)\n            +- Exchange (9)\n               +- * HashAggregate (8)\n                  +- * HashAggregate (7)\n                     +- * SerializeFromObject (6)\n                        +- MapPartitions (5)\n                           +- DeserializeToObject (4)\n                              +- * Project (3)\n                                 +- * Project (2)\n                                    +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48279}}, {\"test5\":{\"0\":48642}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#449]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = 7992c0a8-0641-440d-aaf7-ad453fe25c0a, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = 7992c0a8-0641-440d-aaf7-ad453fe25c0a, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@3a1eb73c\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@3a1eb73c","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 1, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 1, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#297]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":237,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":236,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":233,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":234,"metricType":"timing"},{"name":"peak memory","accumulatorId":232,"metricType":"size"},{"name":"number of output rows","accumulatorId":231,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":235,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":228,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":229,"metricType":"timing"},{"name":"peak memory","accumulatorId":227,"metricType":"size"},{"name":"number of output rows","accumulatorId":226,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":230,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":225,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":177,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":178,"metricType":"nsTiming"},{"name":"records read","accumulatorId":175,"metricType":"sum"},{"name":"local bytes read","accumulatorId":173,"metricType":"size"},{"name":"fetch wait time","accumulatorId":174,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":171,"metricType":"size"},{"name":"local blocks read","accumulatorId":170,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":169,"metricType":"sum"},{"name":"data size","accumulatorId":168,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":172,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":176,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":224,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":221,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":222,"metricType":"timing"},{"name":"peak memory","accumulatorId":220,"metricType":"size"},{"name":"number of output rows","accumulatorId":219,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":223,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":218,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":208,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":209,"metricType":"sum"},{"name":"memory used by state","accumulatorId":214,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":216,"metricType":"sum"},{"name":"number of output rows","accumulatorId":207,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":215,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":217,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":213,"metricType":"timing"},{"name":"time to remove","accumulatorId":212,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":210,"metricType":"sum"},{"name":"time to update","accumulatorId":211,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":204,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":205,"metricType":"timing"},{"name":"peak memory","accumulatorId":203,"metricType":"size"},{"name":"number of output rows","accumulatorId":202,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":206,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":201,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020223382}
+{"Event":"SparkListenerJobStart","Job ID":1,"Submission Time":1596020223482,"Stage Infos":[{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":18,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"41\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[17],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":12,"Name":"DataSourceRDD","Scope":"{\"id\":\"53\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":13,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"53\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[12],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":15,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"48\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[14],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":14,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"49\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[13],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":16,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"47\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[15],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":17,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"42\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[16],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0},{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":23,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"33\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[22],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":22,"Name":"StateStoreRDD","Scope":"{\"id\":\"36\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[21],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":19,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"41\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[18],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":21,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"37\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[20],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":20,"Name":"StateStoreRDD","Scope":"{\"id\":\"40\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[19],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[2],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0}],"Stage IDs":[2,3],"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 1","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"1","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"4","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":18,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"41\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[17],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":12,"Name":"DataSourceRDD","Scope":"{\"id\":\"53\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":13,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"53\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[12],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":15,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"48\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[14],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":14,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"49\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[13],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":16,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"47\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[15],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":17,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"42\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[16],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020223485,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 1","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"1","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"4","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}}
+{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":0,"Attempt":0,"Launch Time":1596020223493,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":0,"Attempt":0,"Launch Time":1596020223493,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020223601,"Failed":false,"Killed":false,"Accumulables":[{"ID":178,"Name":"shuffle write time","Update":"837580","Value":"837580","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":177,"Name":"shuffle records written","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":176,"Name":"shuffle bytes written","Update":"169","Value":"169","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":168,"Name":"data size","Update":"128","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":225,"Name":"duration","Update":"84","Value":"84","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":226,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":227,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":229,"Name":"time in aggregation build","Update":"74","Value":"74","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":231,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":232,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":234,"Name":"time in aggregation build","Update":"68","Value":"68","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":236,"Name":"duration","Update":"84","Value":"84","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":237,"Name":"number of output rows","Update":"363","Value":"363","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":282,"Name":"internal.metrics.input.recordsRead","Update":363,"Value":363,"Internal":true,"Count Failed Values":true},{"ID":280,"Name":"internal.metrics.shuffle.write.writeTime","Update":837580,"Value":837580,"Internal":true,"Count Failed Values":true},{"ID":279,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":278,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":169,"Value":169,"Internal":true,"Count Failed Values":true},{"ID":269,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":524288,"Internal":true,"Count Failed Values":true},{"ID":264,"Name":"internal.metrics.resultSize","Update":2544,"Value":2544,"Internal":true,"Count Failed Values":true},{"ID":263,"Name":"internal.metrics.executorCpuTime","Update":95945587,"Value":95945587,"Internal":true,"Count Failed Values":true},{"ID":262,"Name":"internal.metrics.executorRunTime","Update":96,"Value":96,"Internal":true,"Count Failed Values":true},{"ID":261,"Name":"internal.metrics.executorDeserializeCpuTime","Update":7437557,"Value":7437557,"Internal":true,"Count Failed Values":true},{"ID":260,"Name":"internal.metrics.executorDeserializeTime","Update":7,"Value":7,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":7,"Executor Deserialize CPU Time":7437557,"Executor Run Time":96,"Executor CPU Time":95945587,"Peak Execution Memory":524288,"Result Size":2544,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":169,"Shuffle Write Time":837580,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":363},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":18,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"41\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[17],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":12,"Name":"DataSourceRDD","Scope":"{\"id\":\"53\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":13,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"53\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[12],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":15,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"48\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[14],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":14,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"49\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[13],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":16,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"47\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[15],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":17,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"42\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[16],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020223485,"Completion Time":1596020223603,"Accumulables":[{"ID":227,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":236,"Name":"duration","Value":"84","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":176,"Name":"shuffle bytes written","Value":"169","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":262,"Name":"internal.metrics.executorRunTime","Value":96,"Internal":true,"Count Failed Values":true},{"ID":226,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":280,"Name":"internal.metrics.shuffle.write.writeTime","Value":837580,"Internal":true,"Count Failed Values":true},{"ID":229,"Name":"time in aggregation build","Value":"74","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":232,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":178,"Name":"shuffle write time","Value":"837580","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":225,"Name":"duration","Value":"84","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":261,"Name":"internal.metrics.executorDeserializeCpuTime","Value":7437557,"Internal":true,"Count Failed Values":true},{"ID":279,"Name":"internal.metrics.shuffle.write.recordsWritten","Value":1,"Internal":true,"Count Failed Values":true},{"ID":234,"Name":"time in aggregation build","Value":"68","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":264,"Name":"internal.metrics.resultSize","Value":2544,"Internal":true,"Count Failed Values":true},{"ID":282,"Name":"internal.metrics.input.recordsRead","Value":363,"Internal":true,"Count Failed Values":true},{"ID":237,"Name":"number of output rows","Value":"363","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":177,"Name":"shuffle records written","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":168,"Name":"data size","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":231,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":263,"Name":"internal.metrics.executorCpuTime","Value":95945587,"Internal":true,"Count Failed Values":true},{"ID":260,"Name":"internal.metrics.executorDeserializeTime","Value":7,"Internal":true,"Count Failed Values":true},{"ID":269,"Name":"internal.metrics.peakExecutionMemory","Value":524288,"Internal":true,"Count Failed Values":true},{"ID":278,"Name":"internal.metrics.shuffle.write.bytesWritten","Value":169,"Internal":true,"Count Failed Values":true}],"Resource Profile Id":0}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":23,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"33\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[22],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":22,"Name":"StateStoreRDD","Scope":"{\"id\":\"36\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[21],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":19,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"41\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[18],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":21,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"37\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[20],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":20,"Name":"StateStoreRDD","Scope":"{\"id\":\"40\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[19],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[2],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020223613,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 1","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"1","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"4","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":0,"Attempt":0,"Launch Time":1596020223625,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":3,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":1,"Attempt":0,"Launch Time":1596020223626,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":0,"Attempt":0,"Launch Time":1596020223625,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020223717,"Failed":false,"Killed":false,"Accumulables":[{"ID":201,"Name":"duration","Update":"4","Value":"4","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":203,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":205,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":211,"Name":"time to update","Update":"6","Value":"6","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":212,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":213,"Name":"time to commit changes","Update":"38","Value":"38","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":215,"Name":"estimated size of state only on current version","Update":"88","Value":"88","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":216,"Name":"count of cache hit on states cache in provider","Update":"2","Value":"2","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":214,"Name":"memory used by state","Update":"376","Value":"376","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":218,"Name":"duration","Update":"6","Value":"6","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":220,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":222,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":302,"Name":"internal.metrics.shuffle.read.recordsRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":301,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":300,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":299,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":298,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":297,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":296,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":294,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":524288,"Internal":true,"Count Failed Values":true},{"ID":289,"Name":"internal.metrics.resultSize","Update":5311,"Value":5311,"Internal":true,"Count Failed Values":true},{"ID":288,"Name":"internal.metrics.executorCpuTime","Update":22954307,"Value":22954307,"Internal":true,"Count Failed Values":true},{"ID":287,"Name":"internal.metrics.executorRunTime","Update":77,"Value":77,"Internal":true,"Count Failed Values":true},{"ID":286,"Name":"internal.metrics.executorDeserializeCpuTime","Update":6627382,"Value":6627382,"Internal":true,"Count Failed Values":true},{"ID":285,"Name":"internal.metrics.executorDeserializeTime","Update":6,"Value":6,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":6,"Executor Deserialize CPU Time":6627382,"Executor Run Time":77,"Executor CPU Time":22954307,"Peak Execution Memory":524288,"Result Size":5311,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":3,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":1,"Attempt":0,"Launch Time":1596020223626,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020223720,"Failed":false,"Killed":false,"Accumulables":[{"ID":201,"Name":"duration","Update":"4","Value":"8","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":206,"Name":"avg hash probe bucket list iters","Update":"10","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":202,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":203,"Name":"peak memory","Update":"4456448","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":205,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":211,"Name":"time to update","Update":"18","Value":"24","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":210,"Name":"number of updated state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":212,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":213,"Name":"time to commit changes","Update":"30","Value":"68","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":215,"Name":"estimated size of state only on current version","Update":"368","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":207,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":216,"Name":"count of cache hit on states cache in provider","Update":"2","Value":"4","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":214,"Name":"memory used by state","Update":"840","Value":"1216","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":209,"Name":"number of total state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":218,"Name":"duration","Update":"19","Value":"25","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":219,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":220,"Name":"peak memory","Update":"262144","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":222,"Name":"time in aggregation build","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":224,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":170,"Name":"local blocks read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":174,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":173,"Name":"local bytes read","Update":"169","Value":"169","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":175,"Name":"records read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":302,"Name":"internal.metrics.shuffle.read.recordsRead","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":301,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":300,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":169,"Value":169,"Internal":true,"Count Failed Values":true},{"ID":299,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":298,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":297,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":296,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":294,"Name":"internal.metrics.peakExecutionMemory","Update":4718592,"Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":289,"Name":"internal.metrics.resultSize","Update":5574,"Value":10885,"Internal":true,"Count Failed Values":true},{"ID":288,"Name":"internal.metrics.executorCpuTime","Update":25907369,"Value":48861676,"Internal":true,"Count Failed Values":true},{"ID":287,"Name":"internal.metrics.executorRunTime","Update":82,"Value":159,"Internal":true,"Count Failed Values":true},{"ID":286,"Name":"internal.metrics.executorDeserializeCpuTime","Update":7573630,"Value":14201012,"Internal":true,"Count Failed Values":true},{"ID":285,"Name":"internal.metrics.executorDeserializeTime","Update":7,"Value":13,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":7,"Executor Deserialize CPU Time":7573630,"Executor Run Time":82,"Executor CPU Time":25907369,"Peak Execution Memory":4718592,"Result Size":5574,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":1,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":169,"Total Records Read":1},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":23,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"33\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[22],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":22,"Name":"StateStoreRDD","Scope":"{\"id\":\"36\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[21],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":19,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"41\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[18],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":21,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"37\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[20],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":20,"Name":"StateStoreRDD","Scope":"{\"id\":\"40\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[19],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[2],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020223613,"Completion Time":1596020223724,"Accumulables":[{"ID":218,"Name":"duration","Value":"25","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":209,"Name":"number of total state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":173,"Name":"local bytes read","Value":"169","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":286,"Name":"internal.metrics.executorDeserializeCpuTime","Value":14201012,"Internal":true,"Count Failed Values":true},{"ID":298,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Value":0,"Internal":true,"Count Failed Values":true},{"ID":289,"Name":"internal.metrics.resultSize","Value":10885,"Internal":true,"Count Failed Values":true},{"ID":301,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Value":0,"Internal":true,"Count Failed Values":true},{"ID":175,"Name":"records read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":211,"Name":"time to update","Value":"24","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":202,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":220,"Name":"peak memory","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":214,"Name":"memory used by state","Value":"1216","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":205,"Name":"time in aggregation build","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":300,"Name":"internal.metrics.shuffle.read.localBytesRead","Value":169,"Internal":true,"Count Failed Values":true},{"ID":294,"Name":"internal.metrics.peakExecutionMemory","Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":285,"Name":"internal.metrics.executorDeserializeTime","Value":13,"Internal":true,"Count Failed Values":true},{"ID":207,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":297,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Value":1,"Internal":true,"Count Failed Values":true},{"ID":288,"Name":"internal.metrics.executorCpuTime","Value":48861676,"Internal":true,"Count Failed Values":true},{"ID":216,"Name":"count of cache hit on states cache in provider","Value":"4","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":174,"Name":"fetch wait time","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":210,"Name":"number of updated state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":219,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":201,"Name":"duration","Value":"8","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":222,"Name":"time in aggregation build","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":213,"Name":"time to commit changes","Value":"68","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":299,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Value":0,"Internal":true,"Count Failed Values":true},{"ID":302,"Name":"internal.metrics.shuffle.read.recordsRead","Value":1,"Internal":true,"Count Failed Values":true},{"ID":212,"Name":"time to remove","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":203,"Name":"peak memory","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":170,"Name":"local blocks read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":215,"Name":"estimated size of state only on current version","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":287,"Name":"internal.metrics.executorRunTime","Value":159,"Internal":true,"Count Failed Values":true},{"ID":206,"Name":"avg hash probe bucket list iters","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":224,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":296,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Value":0,"Internal":true,"Count Failed Values":true}],"Resource Profile Id":0}}
+{"Event":"SparkListenerJobEnd","Job ID":1,"Completion Time":1596020223725,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":5,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 1","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nLocalTableScan (1)\n\n\n(1) LocalTableScan\nOutput [2]: [value#60, count#61]\nArguments: [value#60, count#61]\n\n","sparkPlanInfo":{"nodeName":"LocalTableScan","simpleString":"LocalTableScan [value#60, count#61]","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":310,"metricType":"sum"}]},"time":1596020223752}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":5,"time":1596020223761}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":4,"time":1596020223762}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":3,"time":1596020223762}
+{"Event":"org.apache.spark.sql.streaming.StreamingQueryListener$QueryProgressEvent","progress":{"id":"8d268dc2-bc9c-4be8-97a9-b135d2943028","runId":"e225d92f-2545-48f8-87a2-9c0309580f8a","name":null,"timestamp":"2020-07-29T10:57:03.168Z","batchId":1,"batchDuration":622,"durationMs":{"triggerExecution":622,"queryPlanning":47,"getBatch":0,"latestOffset":7,"addBatch":478,"walCommit":59},"eventTime":{},"stateOperators":[{"numRowsTotal":1,"numRowsUpdated":1,"memoryUsedBytes":1216,"numLateInputs":0,"customMetrics":{"stateOnCurrentVersionSizeBytes":456,"loadedMapCacheHitCount":4,"loadedMapCacheMissCount":0}}],"sources":[{"description":"KafkaV2[Subscribe[test5]]","startOffset":"{\"test5\":{\"0\":48279}}","endOffset":"{\"test5\":{\"0\":48642}}","numInputRows":363,"inputRowsPerSecond":50.74793792814204,"processedRowsPerSecond":583.6012861736334}],"sink":{"description":"org.apache.spark.sql.execution.streaming.ConsoleTable$@514ba885","numOutputRows":1},"observedMetrics":{}}}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":6,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 2","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n   +- StateStoreSave (12)\n      +- * HashAggregate (11)\n         +- StateStoreRestore (10)\n            +- Exchange (9)\n               +- * HashAggregate (8)\n                  +- * HashAggregate (7)\n                     +- * SerializeFromObject (6)\n                        +- MapPartitions (5)\n                           +- DeserializeToObject (4)\n                              +- * Project (3)\n                                 +- * Project (2)\n                                    +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48642}}, {\"test5\":{\"0\":48705}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#604]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = 39c861a0-0e30-4ca2-b363-495aff0f3f93, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = 39c861a0-0e30-4ca2-b363-495aff0f3f93, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@52d6c50a\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@52d6c50a","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 2, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 2, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#528]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":394,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":393,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":390,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":391,"metricType":"timing"},{"name":"peak memory","accumulatorId":389,"metricType":"size"},{"name":"number of output rows","accumulatorId":388,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":392,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":385,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":386,"metricType":"timing"},{"name":"peak memory","accumulatorId":384,"metricType":"size"},{"name":"number of output rows","accumulatorId":383,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":387,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":382,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":334,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":335,"metricType":"nsTiming"},{"name":"records read","accumulatorId":332,"metricType":"sum"},{"name":"local bytes read","accumulatorId":330,"metricType":"size"},{"name":"fetch wait time","accumulatorId":331,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":328,"metricType":"size"},{"name":"local blocks read","accumulatorId":327,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":326,"metricType":"sum"},{"name":"data size","accumulatorId":325,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":329,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":333,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":381,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":378,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":379,"metricType":"timing"},{"name":"peak memory","accumulatorId":377,"metricType":"size"},{"name":"number of output rows","accumulatorId":376,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":380,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":375,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":365,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":366,"metricType":"sum"},{"name":"memory used by state","accumulatorId":371,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":373,"metricType":"sum"},{"name":"number of output rows","accumulatorId":364,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":372,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":374,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":370,"metricType":"timing"},{"name":"time to remove","accumulatorId":369,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":367,"metricType":"sum"},{"name":"time to update","accumulatorId":368,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":361,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":362,"metricType":"timing"},{"name":"peak memory","accumulatorId":360,"metricType":"size"},{"name":"number of output rows","accumulatorId":359,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":363,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":358,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020223909}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":7,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 2","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n   +- StateStoreSave (12)\n      +- * HashAggregate (11)\n         +- StateStoreRestore (10)\n            +- Exchange (9)\n               +- * HashAggregate (8)\n                  +- * HashAggregate (7)\n                     +- * SerializeFromObject (6)\n                        +- MapPartitions (5)\n                           +- DeserializeToObject (4)\n                              +- * Project (3)\n                                 +- * Project (2)\n                                    +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48642}}, {\"test5\":{\"0\":48705}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#680]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = c2fd3b95-1ba6-4d3e-8b9c-0256dfd90973, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = c2fd3b95-1ba6-4d3e-8b9c-0256dfd90973, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@52d6c50a\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@52d6c50a","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 2, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 2, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#528]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":394,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":393,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":390,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":391,"metricType":"timing"},{"name":"peak memory","accumulatorId":389,"metricType":"size"},{"name":"number of output rows","accumulatorId":388,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":392,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":385,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":386,"metricType":"timing"},{"name":"peak memory","accumulatorId":384,"metricType":"size"},{"name":"number of output rows","accumulatorId":383,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":387,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":382,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":334,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":335,"metricType":"nsTiming"},{"name":"records read","accumulatorId":332,"metricType":"sum"},{"name":"local bytes read","accumulatorId":330,"metricType":"size"},{"name":"fetch wait time","accumulatorId":331,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":328,"metricType":"size"},{"name":"local blocks read","accumulatorId":327,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":326,"metricType":"sum"},{"name":"data size","accumulatorId":325,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":329,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":333,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":381,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":378,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":379,"metricType":"timing"},{"name":"peak memory","accumulatorId":377,"metricType":"size"},{"name":"number of output rows","accumulatorId":376,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":380,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":375,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":365,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":366,"metricType":"sum"},{"name":"memory used by state","accumulatorId":371,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":373,"metricType":"sum"},{"name":"number of output rows","accumulatorId":364,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":372,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":374,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":370,"metricType":"timing"},{"name":"time to remove","accumulatorId":369,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":367,"metricType":"sum"},{"name":"time to update","accumulatorId":368,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":361,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":362,"metricType":"timing"},{"name":"peak memory","accumulatorId":360,"metricType":"size"},{"name":"number of output rows","accumulatorId":359,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":363,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":358,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020224006}
+{"Event":"SparkListenerJobStart","Job ID":2,"Submission Time":1596020224100,"Stage Infos":[{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":35,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"66\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[34],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":32,"Name":"StateStoreRDD","Scope":"{\"id\":\"73\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[31],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":34,"Name":"StateStoreRDD","Scope":"{\"id\":\"69\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[33],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":33,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"70\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[32],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":31,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"74\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[30],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[4],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0},{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":30,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"74\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[29],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":27,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"81\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[26],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":29,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"75\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[28],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":28,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"80\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[27],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":26,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"82\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[25],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":24,"Name":"DataSourceRDD","Scope":"{\"id\":\"86\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":25,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"86\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[24],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0}],"Stage IDs":[5,4],"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 2","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"2","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"7","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":30,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"74\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[29],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":27,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"81\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[26],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":29,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"75\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[28],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":28,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"80\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[27],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":26,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"82\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[25],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":24,"Name":"DataSourceRDD","Scope":"{\"id\":\"86\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":25,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"86\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[24],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020224103,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 2","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"2","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"7","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}}
+{"Event":"SparkListenerTaskStart","Stage ID":4,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":0,"Attempt":0,"Launch Time":1596020224113,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":4,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":0,"Attempt":0,"Launch Time":1596020224113,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020224174,"Failed":false,"Killed":false,"Accumulables":[{"ID":335,"Name":"shuffle write time","Update":"686296","Value":"686296","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":334,"Name":"shuffle records written","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":333,"Name":"shuffle bytes written","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":325,"Name":"data size","Update":"128","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":382,"Name":"duration","Update":"39","Value":"39","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":383,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":384,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":386,"Name":"time in aggregation build","Update":"32","Value":"32","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":388,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":389,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":391,"Name":"time in aggregation build","Update":"26","Value":"26","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":393,"Name":"duration","Update":"40","Value":"40","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":394,"Name":"number of output rows","Update":"63","Value":"63","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":439,"Name":"internal.metrics.input.recordsRead","Update":63,"Value":63,"Internal":true,"Count Failed Values":true},{"ID":437,"Name":"internal.metrics.shuffle.write.writeTime","Update":686296,"Value":686296,"Internal":true,"Count Failed Values":true},{"ID":436,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":435,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":426,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":524288,"Internal":true,"Count Failed Values":true},{"ID":421,"Name":"internal.metrics.resultSize","Update":2544,"Value":2544,"Internal":true,"Count Failed Values":true},{"ID":420,"Name":"internal.metrics.executorCpuTime","Update":33390843,"Value":33390843,"Internal":true,"Count Failed Values":true},{"ID":419,"Name":"internal.metrics.executorRunTime","Update":49,"Value":49,"Internal":true,"Count Failed Values":true},{"ID":418,"Name":"internal.metrics.executorDeserializeCpuTime","Update":4867521,"Value":4867521,"Internal":true,"Count Failed Values":true},{"ID":417,"Name":"internal.metrics.executorDeserializeTime","Update":8,"Value":8,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":8,"Executor Deserialize CPU Time":4867521,"Executor Run Time":49,"Executor CPU Time":33390843,"Peak Execution Memory":524288,"Result Size":2544,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":168,"Shuffle Write Time":686296,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":63},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":30,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"74\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[29],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":27,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"81\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[26],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":29,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"75\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[28],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":28,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"80\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[27],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":26,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"82\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[25],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":24,"Name":"DataSourceRDD","Scope":"{\"id\":\"86\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":25,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"86\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[24],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020224103,"Completion Time":1596020224175,"Accumulables":[{"ID":436,"Name":"internal.metrics.shuffle.write.recordsWritten","Value":1,"Internal":true,"Count Failed Values":true},{"ID":391,"Name":"time in aggregation build","Value":"26","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":382,"Name":"duration","Value":"39","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":418,"Name":"internal.metrics.executorDeserializeCpuTime","Value":4867521,"Internal":true,"Count Failed Values":true},{"ID":421,"Name":"internal.metrics.resultSize","Value":2544,"Internal":true,"Count Failed Values":true},{"ID":394,"Name":"number of output rows","Value":"63","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":439,"Name":"internal.metrics.input.recordsRead","Value":63,"Internal":true,"Count Failed Values":true},{"ID":388,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":334,"Name":"shuffle records written","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":325,"Name":"data size","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":420,"Name":"internal.metrics.executorCpuTime","Value":33390843,"Internal":true,"Count Failed Values":true},{"ID":426,"Name":"internal.metrics.peakExecutionMemory","Value":524288,"Internal":true,"Count Failed Values":true},{"ID":417,"Name":"internal.metrics.executorDeserializeTime","Value":8,"Internal":true,"Count Failed Values":true},{"ID":435,"Name":"internal.metrics.shuffle.write.bytesWritten","Value":168,"Internal":true,"Count Failed Values":true},{"ID":384,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":393,"Name":"duration","Value":"40","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":333,"Name":"shuffle bytes written","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":383,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":437,"Name":"internal.metrics.shuffle.write.writeTime","Value":686296,"Internal":true,"Count Failed Values":true},{"ID":419,"Name":"internal.metrics.executorRunTime","Value":49,"Internal":true,"Count Failed Values":true},{"ID":386,"Name":"time in aggregation build","Value":"32","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":335,"Name":"shuffle write time","Value":"686296","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":389,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"}],"Resource Profile Id":0}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":35,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"66\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[34],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":32,"Name":"StateStoreRDD","Scope":"{\"id\":\"73\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[31],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":34,"Name":"StateStoreRDD","Scope":"{\"id\":\"69\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[33],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":33,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"70\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[32],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":31,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"74\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[30],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[4],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020224179,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 2","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"2","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"7","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":0,"Attempt":0,"Launch Time":1596020224187,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":8,"Index":1,"Attempt":0,"Launch Time":1596020224187,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":0,"Attempt":0,"Launch Time":1596020224187,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020224256,"Failed":false,"Killed":false,"Accumulables":[{"ID":358,"Name":"duration","Update":"3","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":360,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":362,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":368,"Name":"time to update","Update":"3","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":369,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":370,"Name":"time to commit changes","Update":"32","Value":"32","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":372,"Name":"estimated size of state only on current version","Update":"88","Value":"88","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":373,"Name":"count of cache hit on states cache in provider","Update":"4","Value":"4","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":371,"Name":"memory used by state","Update":"400","Value":"400","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":375,"Name":"duration","Update":"3","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":377,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":379,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":459,"Name":"internal.metrics.shuffle.read.recordsRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":458,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":457,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":456,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":455,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":454,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":453,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":451,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":524288,"Internal":true,"Count Failed Values":true},{"ID":446,"Name":"internal.metrics.resultSize","Update":5311,"Value":5311,"Internal":true,"Count Failed Values":true},{"ID":445,"Name":"internal.metrics.executorCpuTime","Update":17230622,"Value":17230622,"Internal":true,"Count Failed Values":true},{"ID":444,"Name":"internal.metrics.executorRunTime","Update":56,"Value":56,"Internal":true,"Count Failed Values":true},{"ID":443,"Name":"internal.metrics.executorDeserializeCpuTime","Update":5948051,"Value":5948051,"Internal":true,"Count Failed Values":true},{"ID":442,"Name":"internal.metrics.executorDeserializeTime","Update":6,"Value":6,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":6,"Executor Deserialize CPU Time":5948051,"Executor Run Time":56,"Executor CPU Time":17230622,"Peak Execution Memory":524288,"Result Size":5311,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":8,"Index":1,"Attempt":0,"Launch Time":1596020224187,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020224257,"Failed":false,"Killed":false,"Accumulables":[{"ID":358,"Name":"duration","Update":"4","Value":"7","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":363,"Name":"avg hash probe bucket list iters","Update":"10","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":359,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":360,"Name":"peak memory","Update":"4456448","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":362,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":368,"Name":"time to update","Update":"21","Value":"24","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":367,"Name":"number of updated state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":369,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":370,"Name":"time to commit changes","Update":"18","Value":"50","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":372,"Name":"estimated size of state only on current version","Update":"368","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":364,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":373,"Name":"count of cache hit on states cache in provider","Update":"4","Value":"8","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":371,"Name":"memory used by state","Update":"784","Value":"1184","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":366,"Name":"number of total state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":375,"Name":"duration","Update":"22","Value":"25","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":376,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":377,"Name":"peak memory","Update":"262144","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":379,"Name":"time in aggregation build","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":381,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":327,"Name":"local blocks read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":331,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":330,"Name":"local bytes read","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":332,"Name":"records read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":459,"Name":"internal.metrics.shuffle.read.recordsRead","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":458,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":457,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":456,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":455,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":454,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":453,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":451,"Name":"internal.metrics.peakExecutionMemory","Update":4718592,"Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":446,"Name":"internal.metrics.resultSize","Update":5574,"Value":10885,"Internal":true,"Count Failed Values":true},{"ID":445,"Name":"internal.metrics.executorCpuTime","Update":23808555,"Value":41039177,"Internal":true,"Count Failed Values":true},{"ID":444,"Name":"internal.metrics.executorRunTime","Update":56,"Value":112,"Internal":true,"Count Failed Values":true},{"ID":443,"Name":"internal.metrics.executorDeserializeCpuTime","Update":6247106,"Value":12195157,"Internal":true,"Count Failed Values":true},{"ID":442,"Name":"internal.metrics.executorDeserializeTime","Update":6,"Value":12,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":6,"Executor Deserialize CPU Time":6247106,"Executor Run Time":56,"Executor CPU Time":23808555,"Peak Execution Memory":4718592,"Result Size":5574,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":1,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":168,"Total Records Read":1},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":35,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"66\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[34],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":32,"Name":"StateStoreRDD","Scope":"{\"id\":\"73\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[31],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":34,"Name":"StateStoreRDD","Scope":"{\"id\":\"69\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[33],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":33,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"70\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[32],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":31,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"74\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[30],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[4],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020224179,"Completion Time":1596020224259,"Accumulables":[{"ID":442,"Name":"internal.metrics.executorDeserializeTime","Value":12,"Internal":true,"Count Failed Values":true},{"ID":451,"Name":"internal.metrics.peakExecutionMemory","Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":445,"Name":"internal.metrics.executorCpuTime","Value":41039177,"Internal":true,"Count Failed Values":true},{"ID":364,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":454,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Value":1,"Internal":true,"Count Failed Values":true},{"ID":373,"Name":"count of cache hit on states cache in provider","Value":"8","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":367,"Name":"number of updated state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":376,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":358,"Name":"duration","Value":"7","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":331,"Name":"fetch wait time","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":457,"Name":"internal.metrics.shuffle.read.localBytesRead","Value":168,"Internal":true,"Count Failed Values":true},{"ID":379,"Name":"time in aggregation build","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":370,"Name":"time to commit changes","Value":"50","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":456,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Value":0,"Internal":true,"Count Failed Values":true},{"ID":369,"Name":"time to remove","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":459,"Name":"internal.metrics.shuffle.read.recordsRead","Value":1,"Internal":true,"Count Failed Values":true},{"ID":360,"Name":"peak memory","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":381,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":453,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Value":0,"Internal":true,"Count Failed Values":true},{"ID":372,"Name":"estimated size of state only on current version","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":363,"Name":"avg hash probe bucket list iters","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":327,"Name":"local blocks read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":444,"Name":"internal.metrics.executorRunTime","Value":112,"Internal":true,"Count Failed Values":true},{"ID":375,"Name":"duration","Value":"25","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":366,"Name":"number of total state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":330,"Name":"local bytes read","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":443,"Name":"internal.metrics.executorDeserializeCpuTime","Value":12195157,"Internal":true,"Count Failed Values":true},{"ID":455,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Value":0,"Internal":true,"Count Failed Values":true},{"ID":446,"Name":"internal.metrics.resultSize","Value":10885,"Internal":true,"Count Failed Values":true},{"ID":332,"Name":"records read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":377,"Name":"peak memory","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":359,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":458,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Value":0,"Internal":true,"Count Failed Values":true},{"ID":368,"Name":"time to update","Value":"24","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":362,"Name":"time in aggregation build","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":371,"Name":"memory used by state","Value":"1184","Internal":true,"Count Failed Values":true,"Metadata":"sql"}],"Resource Profile Id":0}}
+{"Event":"SparkListenerJobEnd","Job ID":2,"Completion Time":1596020224259,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":8,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 2","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nLocalTableScan (1)\n\n\n(1) LocalTableScan\nOutput [2]: [value#74, count#75]\nArguments: [value#74, count#75]\n\n","sparkPlanInfo":{"nodeName":"LocalTableScan","simpleString":"LocalTableScan [value#74, count#75]","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":467,"metricType":"sum"}]},"time":1596020224278}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":8,"time":1596020224287}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":7,"time":1596020224287}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":6,"time":1596020224288}
+{"Event":"org.apache.spark.sql.streaming.StreamingQueryListener$QueryProgressEvent","progress":{"id":"8d268dc2-bc9c-4be8-97a9-b135d2943028","runId":"e225d92f-2545-48f8-87a2-9c0309580f8a","name":null,"timestamp":"2020-07-29T10:57:03.793Z","batchId":2,"batchDuration":522,"durationMs":{"triggerExecution":522,"queryPlanning":41,"getBatch":1,"latestOffset":3,"addBatch":421,"walCommit":27},"eventTime":{},"stateOperators":[{"numRowsTotal":1,"numRowsUpdated":1,"memoryUsedBytes":1184,"numLateInputs":0,"customMetrics":{"stateOnCurrentVersionSizeBytes":456,"loadedMapCacheHitCount":8,"loadedMapCacheMissCount":0}}],"sources":[{"description":"KafkaV2[Subscribe[test5]]","startOffset":"{\"test5\":{\"0\":48642}}","endOffset":"{\"test5\":{\"0\":48705}}","numInputRows":63,"inputRowsPerSecond":100.8,"processedRowsPerSecond":120.6896551724138}],"sink":{"description":"org.apache.spark.sql.execution.streaming.ConsoleTable$@514ba885","numOutputRows":1},"observedMetrics":{}}}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":9,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 3","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n   +- StateStoreSave (12)\n      +- * HashAggregate (11)\n         +- StateStoreRestore (10)\n            +- Exchange (9)\n               +- * HashAggregate (8)\n                  +- * HashAggregate (7)\n                     +- * SerializeFromObject (6)\n                        +- MapPartitions (5)\n                           +- DeserializeToObject (4)\n                              +- * Project (3)\n                                 +- * Project (2)\n                                    +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48705}}, {\"test5\":{\"0\":48757}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#835]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = 8bb5d8a6-42f8-4141-8f25-e1b98f81aac4, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = 8bb5d8a6-42f8-4141-8f25-e1b98f81aac4, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@59b7c509\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@59b7c509","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 3, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 3, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#759]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":551,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":550,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":547,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":548,"metricType":"timing"},{"name":"peak memory","accumulatorId":546,"metricType":"size"},{"name":"number of output rows","accumulatorId":545,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":549,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":542,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":543,"metricType":"timing"},{"name":"peak memory","accumulatorId":541,"metricType":"size"},{"name":"number of output rows","accumulatorId":540,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":544,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":539,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":491,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":492,"metricType":"nsTiming"},{"name":"records read","accumulatorId":489,"metricType":"sum"},{"name":"local bytes read","accumulatorId":487,"metricType":"size"},{"name":"fetch wait time","accumulatorId":488,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":485,"metricType":"size"},{"name":"local blocks read","accumulatorId":484,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":483,"metricType":"sum"},{"name":"data size","accumulatorId":482,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":486,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":490,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":538,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":535,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":536,"metricType":"timing"},{"name":"peak memory","accumulatorId":534,"metricType":"size"},{"name":"number of output rows","accumulatorId":533,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":537,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":532,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":522,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":523,"metricType":"sum"},{"name":"memory used by state","accumulatorId":528,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":530,"metricType":"sum"},{"name":"number of output rows","accumulatorId":521,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":529,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":531,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":527,"metricType":"timing"},{"name":"time to remove","accumulatorId":526,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":524,"metricType":"sum"},{"name":"time to update","accumulatorId":525,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":518,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":519,"metricType":"timing"},{"name":"peak memory","accumulatorId":517,"metricType":"size"},{"name":"number of output rows","accumulatorId":516,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":520,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":515,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020224419}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":10,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 3","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n   +- StateStoreSave (12)\n      +- * HashAggregate (11)\n         +- StateStoreRestore (10)\n            +- Exchange (9)\n               +- * HashAggregate (8)\n                  +- * HashAggregate (7)\n                     +- * SerializeFromObject (6)\n                        +- MapPartitions (5)\n                           +- DeserializeToObject (4)\n                              +- * Project (3)\n                                 +- * Project (2)\n                                    +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48705}}, {\"test5\":{\"0\":48757}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#911]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = 29402d2a-a5da-4bb1-8d1a-c6d1c2d998d5, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = 29402d2a-a5da-4bb1-8d1a-c6d1c2d998d5, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@59b7c509\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@59b7c509","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 3, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 3, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#759]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":551,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":550,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":547,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":548,"metricType":"timing"},{"name":"peak memory","accumulatorId":546,"metricType":"size"},{"name":"number of output rows","accumulatorId":545,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":549,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":542,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":543,"metricType":"timing"},{"name":"peak memory","accumulatorId":541,"metricType":"size"},{"name":"number of output rows","accumulatorId":540,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":544,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":539,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":491,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":492,"metricType":"nsTiming"},{"name":"records read","accumulatorId":489,"metricType":"sum"},{"name":"local bytes read","accumulatorId":487,"metricType":"size"},{"name":"fetch wait time","accumulatorId":488,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":485,"metricType":"size"},{"name":"local blocks read","accumulatorId":484,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":483,"metricType":"sum"},{"name":"data size","accumulatorId":482,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":486,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":490,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":538,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":535,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":536,"metricType":"timing"},{"name":"peak memory","accumulatorId":534,"metricType":"size"},{"name":"number of output rows","accumulatorId":533,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":537,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":532,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":522,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":523,"metricType":"sum"},{"name":"memory used by state","accumulatorId":528,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":530,"metricType":"sum"},{"name":"number of output rows","accumulatorId":521,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":529,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":531,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":527,"metricType":"timing"},{"name":"time to remove","accumulatorId":526,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":524,"metricType":"sum"},{"name":"time to update","accumulatorId":525,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":518,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":519,"metricType":"timing"},{"name":"peak memory","accumulatorId":517,"metricType":"size"},{"name":"number of output rows","accumulatorId":516,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":520,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":515,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020224452}
+{"Event":"SparkListenerJobStart","Job ID":3,"Submission Time":1596020224533,"Stage Infos":[{"Stage ID":6,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":42,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"107\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[41],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":38,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"115\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[37],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":36,"Name":"DataSourceRDD","Scope":"{\"id\":\"119\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":41,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"108\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[40],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":37,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"119\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[36],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":40,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"113\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[39],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":39,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"114\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[38],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0},{"Stage ID":7,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":47,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"99\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[46],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":46,"Name":"StateStoreRDD","Scope":"{\"id\":\"102\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[45],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":45,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"103\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[44],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":44,"Name":"StateStoreRDD","Scope":"{\"id\":\"106\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[43],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":43,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"107\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[42],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[6],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0}],"Stage IDs":[6,7],"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 3","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"3","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"10","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":6,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":42,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"107\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[41],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":38,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"115\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[37],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":36,"Name":"DataSourceRDD","Scope":"{\"id\":\"119\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":41,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"108\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[40],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":37,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"119\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[36],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":40,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"113\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[39],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":39,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"114\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[38],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020224535,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 3","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"3","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"10","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}}
+{"Event":"SparkListenerTaskStart","Stage ID":6,"Stage Attempt ID":0,"Task Info":{"Task ID":9,"Index":0,"Attempt":0,"Launch Time":1596020224541,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":6,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":9,"Index":0,"Attempt":0,"Launch Time":1596020224541,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020224581,"Failed":false,"Killed":false,"Accumulables":[{"ID":492,"Name":"shuffle write time","Update":"643278","Value":"643278","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":491,"Name":"shuffle records written","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":490,"Name":"shuffle bytes written","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":482,"Name":"data size","Update":"128","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":539,"Name":"duration","Update":"20","Value":"20","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":540,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":541,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":543,"Name":"time in aggregation build","Update":"13","Value":"13","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":545,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":546,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":548,"Name":"time in aggregation build","Update":"9","Value":"9","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":550,"Name":"duration","Update":"20","Value":"20","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":551,"Name":"number of output rows","Update":"52","Value":"52","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":596,"Name":"internal.metrics.input.recordsRead","Update":52,"Value":52,"Internal":true,"Count Failed Values":true},{"ID":594,"Name":"internal.metrics.shuffle.write.writeTime","Update":643278,"Value":643278,"Internal":true,"Count Failed Values":true},{"ID":593,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":592,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":583,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":524288,"Internal":true,"Count Failed Values":true},{"ID":578,"Name":"internal.metrics.resultSize","Update":2544,"Value":2544,"Internal":true,"Count Failed Values":true},{"ID":577,"Name":"internal.metrics.executorCpuTime","Update":29099071,"Value":29099071,"Internal":true,"Count Failed Values":true},{"ID":576,"Name":"internal.metrics.executorRunTime","Update":29,"Value":29,"Internal":true,"Count Failed Values":true},{"ID":575,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3091128,"Value":3091128,"Internal":true,"Count Failed Values":true},{"ID":574,"Name":"internal.metrics.executorDeserializeTime","Update":3,"Value":3,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":3,"Executor Deserialize CPU Time":3091128,"Executor Run Time":29,"Executor CPU Time":29099071,"Peak Execution Memory":524288,"Result Size":2544,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":168,"Shuffle Write Time":643278,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":52},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":6,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":42,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"107\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[41],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":38,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"115\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[37],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":36,"Name":"DataSourceRDD","Scope":"{\"id\":\"119\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":41,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"108\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[40],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":37,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"119\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[36],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":40,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"113\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[39],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":39,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"114\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[38],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020224535,"Completion Time":1596020224582,"Accumulables":[{"ID":550,"Name":"duration","Value":"20","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":541,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":577,"Name":"internal.metrics.executorCpuTime","Value":29099071,"Internal":true,"Count Failed Values":true},{"ID":490,"Name":"shuffle bytes written","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":576,"Name":"internal.metrics.executorRunTime","Value":29,"Internal":true,"Count Failed Values":true},{"ID":540,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":594,"Name":"internal.metrics.shuffle.write.writeTime","Value":643278,"Internal":true,"Count Failed Values":true},{"ID":543,"Name":"time in aggregation build","Value":"13","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":492,"Name":"shuffle write time","Value":"643278","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":546,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":539,"Name":"duration","Value":"20","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":575,"Name":"internal.metrics.executorDeserializeCpuTime","Value":3091128,"Internal":true,"Count Failed Values":true},{"ID":593,"Name":"internal.metrics.shuffle.write.recordsWritten","Value":1,"Internal":true,"Count Failed Values":true},{"ID":548,"Name":"time in aggregation build","Value":"9","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":578,"Name":"internal.metrics.resultSize","Value":2544,"Internal":true,"Count Failed Values":true},{"ID":596,"Name":"internal.metrics.input.recordsRead","Value":52,"Internal":true,"Count Failed Values":true},{"ID":551,"Name":"number of output rows","Value":"52","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":482,"Name":"data size","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":491,"Name":"shuffle records written","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":545,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":592,"Name":"internal.metrics.shuffle.write.bytesWritten","Value":168,"Internal":true,"Count Failed Values":true},{"ID":574,"Name":"internal.metrics.executorDeserializeTime","Value":3,"Internal":true,"Count Failed Values":true},{"ID":583,"Name":"internal.metrics.peakExecutionMemory","Value":524288,"Internal":true,"Count Failed Values":true}],"Resource Profile Id":0}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":7,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":47,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"99\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[46],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":46,"Name":"StateStoreRDD","Scope":"{\"id\":\"102\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[45],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":45,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"103\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[44],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":44,"Name":"StateStoreRDD","Scope":"{\"id\":\"106\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[43],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":43,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"107\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[42],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[6],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020224588,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 3","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"3","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"10","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}}
+{"Event":"SparkListenerTaskStart","Stage ID":7,"Stage Attempt ID":0,"Task Info":{"Task ID":10,"Index":0,"Attempt":0,"Launch Time":1596020224596,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":7,"Stage Attempt ID":0,"Task Info":{"Task ID":11,"Index":1,"Attempt":0,"Launch Time":1596020224597,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":7,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":10,"Index":0,"Attempt":0,"Launch Time":1596020224596,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020224670,"Failed":false,"Killed":false,"Accumulables":[{"ID":515,"Name":"duration","Update":"3","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":517,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":519,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":525,"Name":"time to update","Update":"5","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":526,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":527,"Name":"time to commit changes","Update":"27","Value":"27","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":529,"Name":"estimated size of state only on current version","Update":"88","Value":"88","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":530,"Name":"count of cache hit on states cache in provider","Update":"6","Value":"6","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":528,"Name":"memory used by state","Update":"400","Value":"400","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":532,"Name":"duration","Update":"5","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":534,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":536,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":616,"Name":"internal.metrics.shuffle.read.recordsRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":615,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":614,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":613,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":612,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":611,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":610,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":608,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":524288,"Internal":true,"Count Failed Values":true},{"ID":603,"Name":"internal.metrics.resultSize","Update":5311,"Value":5311,"Internal":true,"Count Failed Values":true},{"ID":602,"Name":"internal.metrics.executorCpuTime","Update":19967906,"Value":19967906,"Internal":true,"Count Failed Values":true},{"ID":601,"Name":"internal.metrics.executorRunTime","Update":62,"Value":62,"Internal":true,"Count Failed Values":true},{"ID":600,"Name":"internal.metrics.executorDeserializeCpuTime","Update":4899567,"Value":4899567,"Internal":true,"Count Failed Values":true},{"ID":599,"Name":"internal.metrics.executorDeserializeTime","Update":4,"Value":4,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":4,"Executor Deserialize CPU Time":4899567,"Executor Run Time":62,"Executor CPU Time":19967906,"Peak Execution Memory":524288,"Result Size":5311,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":7,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":11,"Index":1,"Attempt":0,"Launch Time":1596020224597,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020224687,"Failed":false,"Killed":false,"Accumulables":[{"ID":515,"Name":"duration","Update":"4","Value":"7","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":520,"Name":"avg hash probe bucket list iters","Update":"10","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":516,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":517,"Name":"peak memory","Update":"4456448","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":519,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":525,"Name":"time to update","Update":"17","Value":"22","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":524,"Name":"number of updated state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":526,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":527,"Name":"time to commit changes","Update":"26","Value":"53","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":529,"Name":"estimated size of state only on current version","Update":"368","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":521,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":530,"Name":"count of cache hit on states cache in provider","Update":"6","Value":"12","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":528,"Name":"memory used by state","Update":"784","Value":"1184","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":523,"Name":"number of total state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":532,"Name":"duration","Update":"17","Value":"22","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":533,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":534,"Name":"peak memory","Update":"262144","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":536,"Name":"time in aggregation build","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":538,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":484,"Name":"local blocks read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":488,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":487,"Name":"local bytes read","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":489,"Name":"records read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":616,"Name":"internal.metrics.shuffle.read.recordsRead","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":615,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":614,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":613,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":612,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":611,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":610,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":608,"Name":"internal.metrics.peakExecutionMemory","Update":4718592,"Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":603,"Name":"internal.metrics.resultSize","Update":5574,"Value":10885,"Internal":true,"Count Failed Values":true},{"ID":602,"Name":"internal.metrics.executorCpuTime","Update":22402538,"Value":42370444,"Internal":true,"Count Failed Values":true},{"ID":601,"Name":"internal.metrics.executorRunTime","Update":79,"Value":141,"Internal":true,"Count Failed Values":true},{"ID":600,"Name":"internal.metrics.executorDeserializeCpuTime","Update":4671511,"Value":9571078,"Internal":true,"Count Failed Values":true},{"ID":599,"Name":"internal.metrics.executorDeserializeTime","Update":4,"Value":8,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":4,"Executor Deserialize CPU Time":4671511,"Executor Run Time":79,"Executor CPU Time":22402538,"Peak Execution Memory":4718592,"Result Size":5574,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":1,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":168,"Total Records Read":1},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":7,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":47,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"99\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[46],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":46,"Name":"StateStoreRDD","Scope":"{\"id\":\"102\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[45],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":45,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"103\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[44],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":44,"Name":"StateStoreRDD","Scope":"{\"id\":\"106\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[43],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":43,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"107\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[42],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[6],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020224588,"Completion Time":1596020224688,"Accumulables":[{"ID":523,"Name":"number of total state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":532,"Name":"duration","Value":"22","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":487,"Name":"local bytes read","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":517,"Name":"peak memory","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":600,"Name":"internal.metrics.executorDeserializeCpuTime","Value":9571078,"Internal":true,"Count Failed Values":true},{"ID":603,"Name":"internal.metrics.resultSize","Value":10885,"Internal":true,"Count Failed Values":true},{"ID":612,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Value":0,"Internal":true,"Count Failed Values":true},{"ID":516,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":615,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Value":0,"Internal":true,"Count Failed Values":true},{"ID":534,"Name":"peak memory","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":525,"Name":"time to update","Value":"22","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":489,"Name":"records read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":528,"Name":"memory used by state","Value":"1184","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":519,"Name":"time in aggregation build","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":608,"Name":"internal.metrics.peakExecutionMemory","Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":599,"Name":"internal.metrics.executorDeserializeTime","Value":8,"Internal":true,"Count Failed Values":true},{"ID":521,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":530,"Name":"count of cache hit on states cache in provider","Value":"12","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":611,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Value":1,"Internal":true,"Count Failed Values":true},{"ID":602,"Name":"internal.metrics.executorCpuTime","Value":42370444,"Internal":true,"Count Failed Values":true},{"ID":488,"Name":"fetch wait time","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":515,"Name":"duration","Value":"7","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":524,"Name":"number of updated state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":533,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":614,"Name":"internal.metrics.shuffle.read.localBytesRead","Value":168,"Internal":true,"Count Failed Values":true},{"ID":536,"Name":"time in aggregation build","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":527,"Name":"time to commit changes","Value":"53","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":613,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Value":0,"Internal":true,"Count Failed Values":true},{"ID":616,"Name":"internal.metrics.shuffle.read.recordsRead","Value":1,"Internal":true,"Count Failed Values":true},{"ID":526,"Name":"time to remove","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":520,"Name":"avg hash probe bucket list iters","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":610,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Value":0,"Internal":true,"Count Failed Values":true},{"ID":601,"Name":"internal.metrics.executorRunTime","Value":141,"Internal":true,"Count Failed Values":true},{"ID":484,"Name":"local blocks read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":538,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":529,"Name":"estimated size of state only on current version","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"}],"Resource Profile Id":0}}
+{"Event":"SparkListenerJobEnd","Job ID":3,"Completion Time":1596020224689,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":11,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 3","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nLocalTableScan (1)\n\n\n(1) LocalTableScan\nOutput [2]: [value#88, count#89]\nArguments: [value#88, count#89]\n\n","sparkPlanInfo":{"nodeName":"LocalTableScan","simpleString":"LocalTableScan [value#88, count#89]","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":624,"metricType":"sum"}]},"time":1596020224709}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":11,"time":1596020224713}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":10,"time":1596020224714}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":9,"time":1596020224714}
+{"Event":"org.apache.spark.sql.streaming.StreamingQueryListener$QueryProgressEvent","progress":{"id":"8d268dc2-bc9c-4be8-97a9-b135d2943028","runId":"e225d92f-2545-48f8-87a2-9c0309580f8a","name":null,"timestamp":"2020-07-29T10:57:04.317Z","batchId":3,"batchDuration":415,"durationMs":{"triggerExecution":415,"queryPlanning":38,"getBatch":1,"latestOffset":3,"addBatch":332,"walCommit":21},"eventTime":{},"stateOperators":[{"numRowsTotal":1,"numRowsUpdated":1,"memoryUsedBytes":1184,"numLateInputs":0,"customMetrics":{"stateOnCurrentVersionSizeBytes":456,"loadedMapCacheHitCount":12,"loadedMapCacheMissCount":0}}],"sources":[{"description":"KafkaV2[Subscribe[test5]]","startOffset":"{\"test5\":{\"0\":48705}}","endOffset":"{\"test5\":{\"0\":48757}}","numInputRows":52,"inputRowsPerSecond":99.23664122137404,"processedRowsPerSecond":125.30120481927712}],"sink":{"description":"org.apache.spark.sql.execution.streaming.ConsoleTable$@514ba885","numOutputRows":1},"observedMetrics":{}}}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":12,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 4","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n   +- StateStoreSave (12)\n      +- * HashAggregate (11)\n         +- StateStoreRestore (10)\n            +- Exchange (9)\n               +- * HashAggregate (8)\n                  +- * HashAggregate (7)\n                     +- * SerializeFromObject (6)\n                        +- MapPartitions (5)\n                           +- DeserializeToObject (4)\n                              +- * Project (3)\n                                 +- * Project (2)\n                                    +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48757}}, {\"test5\":{\"0\":48799}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#1066]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = 42efe357-12ef-4061-9b83-20bf4c29a257, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = 42efe357-12ef-4061-9b83-20bf4c29a257, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@1717338b\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@1717338b","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 4, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 4, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#990]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":708,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":707,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":704,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":705,"metricType":"timing"},{"name":"peak memory","accumulatorId":703,"metricType":"size"},{"name":"number of output rows","accumulatorId":702,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":706,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":699,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":700,"metricType":"timing"},{"name":"peak memory","accumulatorId":698,"metricType":"size"},{"name":"number of output rows","accumulatorId":697,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":701,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":696,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":648,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":649,"metricType":"nsTiming"},{"name":"records read","accumulatorId":646,"metricType":"sum"},{"name":"local bytes read","accumulatorId":644,"metricType":"size"},{"name":"fetch wait time","accumulatorId":645,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":642,"metricType":"size"},{"name":"local blocks read","accumulatorId":641,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":640,"metricType":"sum"},{"name":"data size","accumulatorId":639,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":643,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":647,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":695,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":692,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":693,"metricType":"timing"},{"name":"peak memory","accumulatorId":691,"metricType":"size"},{"name":"number of output rows","accumulatorId":690,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":694,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":689,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":679,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":680,"metricType":"sum"},{"name":"memory used by state","accumulatorId":685,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":687,"metricType":"sum"},{"name":"number of output rows","accumulatorId":678,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":686,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":688,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":684,"metricType":"timing"},{"name":"time to remove","accumulatorId":683,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":681,"metricType":"sum"},{"name":"time to update","accumulatorId":682,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":675,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":676,"metricType":"timing"},{"name":"peak memory","accumulatorId":674,"metricType":"size"},{"name":"number of output rows","accumulatorId":673,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":677,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":672,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020224817}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":13,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 4","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n   +- StateStoreSave (12)\n      +- * HashAggregate (11)\n         +- StateStoreRestore (10)\n            +- Exchange (9)\n               +- * HashAggregate (8)\n                  +- * HashAggregate (7)\n                     +- * SerializeFromObject (6)\n                        +- MapPartitions (5)\n                           +- DeserializeToObject (4)\n                              +- * Project (3)\n                                 +- * Project (2)\n                                    +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48757}}, {\"test5\":{\"0\":48799}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#1142]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = 6fa28bd2-2924-4e01-8bbe-128888d2669b, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = 6fa28bd2-2924-4e01-8bbe-128888d2669b, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@1717338b\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@1717338b","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 4, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 4, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#990]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":708,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":707,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":704,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":705,"metricType":"timing"},{"name":"peak memory","accumulatorId":703,"metricType":"size"},{"name":"number of output rows","accumulatorId":702,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":706,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":699,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":700,"metricType":"timing"},{"name":"peak memory","accumulatorId":698,"metricType":"size"},{"name":"number of output rows","accumulatorId":697,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":701,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":696,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":648,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":649,"metricType":"nsTiming"},{"name":"records read","accumulatorId":646,"metricType":"sum"},{"name":"local bytes read","accumulatorId":644,"metricType":"size"},{"name":"fetch wait time","accumulatorId":645,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":642,"metricType":"size"},{"name":"local blocks read","accumulatorId":641,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":640,"metricType":"sum"},{"name":"data size","accumulatorId":639,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":643,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":647,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":695,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":692,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":693,"metricType":"timing"},{"name":"peak memory","accumulatorId":691,"metricType":"size"},{"name":"number of output rows","accumulatorId":690,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":694,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":689,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":679,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":680,"metricType":"sum"},{"name":"memory used by state","accumulatorId":685,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":687,"metricType":"sum"},{"name":"number of output rows","accumulatorId":678,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":686,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":688,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":684,"metricType":"timing"},{"name":"time to remove","accumulatorId":683,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":681,"metricType":"sum"},{"name":"time to update","accumulatorId":682,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":675,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":676,"metricType":"timing"},{"name":"peak memory","accumulatorId":674,"metricType":"size"},{"name":"number of output rows","accumulatorId":673,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":677,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":672,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020224849}
+{"Event":"SparkListenerJobStart","Job ID":4,"Submission Time":1596020224928,"Stage Infos":[{"Stage ID":9,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":59,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"132\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[58],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":55,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"140\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[54],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":56,"Name":"StateStoreRDD","Scope":"{\"id\":\"139\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[55],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":57,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"136\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[56],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":58,"Name":"StateStoreRDD","Scope":"{\"id\":\"135\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[57],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[8],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0},{"Stage ID":8,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":54,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"140\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[53],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":53,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"141\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[52],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":51,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"147\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[50],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":49,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"152\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[48],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":52,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"146\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[51],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":50,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"148\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[49],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":48,"Name":"DataSourceRDD","Scope":"{\"id\":\"152\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0}],"Stage IDs":[9,8],"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 4","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"4","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"13","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":8,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":54,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"140\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[53],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":53,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"141\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[52],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":51,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"147\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[50],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":49,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"152\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[48],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":52,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"146\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[51],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":50,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"148\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[49],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":48,"Name":"DataSourceRDD","Scope":"{\"id\":\"152\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020224929,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 4","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"4","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"13","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}}
+{"Event":"SparkListenerTaskStart","Stage ID":8,"Stage Attempt ID":0,"Task Info":{"Task ID":12,"Index":0,"Attempt":0,"Launch Time":1596020224941,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":8,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":12,"Index":0,"Attempt":0,"Launch Time":1596020224941,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020224979,"Failed":false,"Killed":false,"Accumulables":[{"ID":649,"Name":"shuffle write time","Update":"572754","Value":"572754","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":648,"Name":"shuffle records written","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":647,"Name":"shuffle bytes written","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":639,"Name":"data size","Update":"128","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":696,"Name":"duration","Update":"19","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":697,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":698,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":700,"Name":"time in aggregation build","Update":"13","Value":"13","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":702,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":703,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":705,"Name":"time in aggregation build","Update":"9","Value":"9","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":707,"Name":"duration","Update":"19","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":708,"Name":"number of output rows","Update":"42","Value":"42","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":753,"Name":"internal.metrics.input.recordsRead","Update":42,"Value":42,"Internal":true,"Count Failed Values":true},{"ID":751,"Name":"internal.metrics.shuffle.write.writeTime","Update":572754,"Value":572754,"Internal":true,"Count Failed Values":true},{"ID":750,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":749,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":740,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":524288,"Internal":true,"Count Failed Values":true},{"ID":735,"Name":"internal.metrics.resultSize","Update":2544,"Value":2544,"Internal":true,"Count Failed Values":true},{"ID":734,"Name":"internal.metrics.executorCpuTime","Update":27800373,"Value":27800373,"Internal":true,"Count Failed Values":true},{"ID":733,"Name":"internal.metrics.executorRunTime","Update":28,"Value":28,"Internal":true,"Count Failed Values":true},{"ID":732,"Name":"internal.metrics.executorDeserializeCpuTime","Update":4768103,"Value":4768103,"Internal":true,"Count Failed Values":true},{"ID":731,"Name":"internal.metrics.executorDeserializeTime","Update":4,"Value":4,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":4,"Executor Deserialize CPU Time":4768103,"Executor Run Time":28,"Executor CPU Time":27800373,"Peak Execution Memory":524288,"Result Size":2544,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":168,"Shuffle Write Time":572754,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":42},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":8,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":54,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"140\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[53],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":53,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"141\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[52],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":51,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"147\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[50],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":49,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"152\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[48],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":52,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"146\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[51],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":50,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"148\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[49],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":48,"Name":"DataSourceRDD","Scope":"{\"id\":\"152\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020224929,"Completion Time":1596020224979,"Accumulables":[{"ID":732,"Name":"internal.metrics.executorDeserializeCpuTime","Value":4768103,"Internal":true,"Count Failed Values":true},{"ID":696,"Name":"duration","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":750,"Name":"internal.metrics.shuffle.write.recordsWritten","Value":1,"Internal":true,"Count Failed Values":true},{"ID":705,"Name":"time in aggregation build","Value":"9","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":735,"Name":"internal.metrics.resultSize","Value":2544,"Internal":true,"Count Failed Values":true},{"ID":708,"Name":"number of output rows","Value":"42","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":753,"Name":"internal.metrics.input.recordsRead","Value":42,"Internal":true,"Count Failed Values":true},{"ID":648,"Name":"shuffle records written","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":639,"Name":"data size","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":702,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":740,"Name":"internal.metrics.peakExecutionMemory","Value":524288,"Internal":true,"Count Failed Values":true},{"ID":731,"Name":"internal.metrics.executorDeserializeTime","Value":4,"Internal":true,"Count Failed Values":true},{"ID":749,"Name":"internal.metrics.shuffle.write.bytesWritten","Value":168,"Internal":true,"Count Failed Values":true},{"ID":698,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":734,"Name":"internal.metrics.executorCpuTime","Value":27800373,"Internal":true,"Count Failed Values":true},{"ID":707,"Name":"duration","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":647,"Name":"shuffle bytes written","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":733,"Name":"internal.metrics.executorRunTime","Value":28,"Internal":true,"Count Failed Values":true},{"ID":697,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":751,"Name":"internal.metrics.shuffle.write.writeTime","Value":572754,"Internal":true,"Count Failed Values":true},{"ID":700,"Name":"time in aggregation build","Value":"13","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":649,"Name":"shuffle write time","Value":"572754","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":703,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"}],"Resource Profile Id":0}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":9,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":59,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"132\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[58],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":55,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"140\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[54],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":56,"Name":"StateStoreRDD","Scope":"{\"id\":\"139\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[55],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":57,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"136\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[56],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":58,"Name":"StateStoreRDD","Scope":"{\"id\":\"135\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[57],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[8],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020224987,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 4","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"4","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"13","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}}
+{"Event":"SparkListenerTaskStart","Stage ID":9,"Stage Attempt ID":0,"Task Info":{"Task ID":13,"Index":0,"Attempt":0,"Launch Time":1596020224994,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":9,"Stage Attempt ID":0,"Task Info":{"Task ID":14,"Index":1,"Attempt":0,"Launch Time":1596020224994,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":9,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":14,"Index":1,"Attempt":0,"Launch Time":1596020224994,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020225056,"Failed":false,"Killed":false,"Accumulables":[{"ID":672,"Name":"duration","Update":"3","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":677,"Name":"avg hash probe bucket list iters","Update":"10","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":673,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":674,"Name":"peak memory","Update":"4456448","Value":"4456448","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":676,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":682,"Name":"time to update","Update":"19","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":681,"Name":"number of updated state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":683,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":684,"Name":"time to commit changes","Update":"11","Value":"11","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":686,"Name":"estimated size of state only on current version","Update":"368","Value":"368","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":678,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":687,"Name":"count of cache hit on states cache in provider","Update":"8","Value":"8","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":685,"Name":"memory used by state","Update":"784","Value":"784","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":680,"Name":"number of total state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":689,"Name":"duration","Update":"19","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":690,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":691,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":693,"Name":"time in aggregation build","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":695,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":641,"Name":"local blocks read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":645,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":644,"Name":"local bytes read","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":646,"Name":"records read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":773,"Name":"internal.metrics.shuffle.read.recordsRead","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":772,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":771,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":770,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":769,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":768,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":767,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":765,"Name":"internal.metrics.peakExecutionMemory","Update":4718592,"Value":4718592,"Internal":true,"Count Failed Values":true},{"ID":760,"Name":"internal.metrics.resultSize","Update":5574,"Value":5574,"Internal":true,"Count Failed Values":true},{"ID":759,"Name":"internal.metrics.executorCpuTime","Update":19548688,"Value":19548688,"Internal":true,"Count Failed Values":true},{"ID":758,"Name":"internal.metrics.executorRunTime","Update":52,"Value":52,"Internal":true,"Count Failed Values":true},{"ID":757,"Name":"internal.metrics.executorDeserializeCpuTime","Update":5622533,"Value":5622533,"Internal":true,"Count Failed Values":true},{"ID":756,"Name":"internal.metrics.executorDeserializeTime","Update":5,"Value":5,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":5,"Executor Deserialize CPU Time":5622533,"Executor Run Time":52,"Executor CPU Time":19548688,"Peak Execution Memory":4718592,"Result Size":5574,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":1,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":168,"Total Records Read":1},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":9,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":13,"Index":0,"Attempt":0,"Launch Time":1596020224994,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020225058,"Failed":false,"Killed":false,"Accumulables":[{"ID":672,"Name":"duration","Update":"2","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":674,"Name":"peak memory","Update":"262144","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":676,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":682,"Name":"time to update","Update":"4","Value":"23","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":683,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":684,"Name":"time to commit changes","Update":"35","Value":"46","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":686,"Name":"estimated size of state only on current version","Update":"88","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":687,"Name":"count of cache hit on states cache in provider","Update":"8","Value":"16","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":685,"Name":"memory used by state","Update":"400","Value":"1184","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":689,"Name":"duration","Update":"4","Value":"23","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":691,"Name":"peak memory","Update":"262144","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":693,"Name":"time in aggregation build","Update":"0","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":773,"Name":"internal.metrics.shuffle.read.recordsRead","Update":0,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":772,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":771,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":0,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":770,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":769,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":768,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":0,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":767,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":765,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":760,"Name":"internal.metrics.resultSize","Update":5311,"Value":10885,"Internal":true,"Count Failed Values":true},{"ID":759,"Name":"internal.metrics.executorCpuTime","Update":16813539,"Value":36362227,"Internal":true,"Count Failed Values":true},{"ID":758,"Name":"internal.metrics.executorRunTime","Update":55,"Value":107,"Internal":true,"Count Failed Values":true},{"ID":757,"Name":"internal.metrics.executorDeserializeCpuTime","Update":4322992,"Value":9945525,"Internal":true,"Count Failed Values":true},{"ID":756,"Name":"internal.metrics.executorDeserializeTime","Update":4,"Value":9,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":4,"Executor Deserialize CPU Time":4322992,"Executor Run Time":55,"Executor CPU Time":16813539,"Peak Execution Memory":524288,"Result Size":5311,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":9,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":59,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"132\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[58],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":55,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"140\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[54],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":56,"Name":"StateStoreRDD","Scope":"{\"id\":\"139\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[55],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":57,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"136\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[56],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":58,"Name":"StateStoreRDD","Scope":"{\"id\":\"135\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[57],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[8],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020224987,"Completion Time":1596020225059,"Accumulables":[{"ID":765,"Name":"internal.metrics.peakExecutionMemory","Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":756,"Name":"internal.metrics.executorDeserializeTime","Value":9,"Internal":true,"Count Failed Values":true},{"ID":678,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":759,"Name":"internal.metrics.executorCpuTime","Value":36362227,"Internal":true,"Count Failed Values":true},{"ID":768,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Value":1,"Internal":true,"Count Failed Values":true},{"ID":687,"Name":"count of cache hit on states cache in provider","Value":"16","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":681,"Name":"number of updated state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":771,"Name":"internal.metrics.shuffle.read.localBytesRead","Value":168,"Internal":true,"Count Failed Values":true},{"ID":690,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":672,"Name":"duration","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":645,"Name":"fetch wait time","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":684,"Name":"time to commit changes","Value":"46","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":693,"Name":"time in aggregation build","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":770,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Value":0,"Internal":true,"Count Failed Values":true},{"ID":683,"Name":"time to remove","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":773,"Name":"internal.metrics.shuffle.read.recordsRead","Value":1,"Internal":true,"Count Failed Values":true},{"ID":686,"Name":"estimated size of state only on current version","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":695,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":677,"Name":"avg hash probe bucket list iters","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":767,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Value":0,"Internal":true,"Count Failed Values":true},{"ID":641,"Name":"local blocks read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":758,"Name":"internal.metrics.executorRunTime","Value":107,"Internal":true,"Count Failed Values":true},{"ID":644,"Name":"local bytes read","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":680,"Name":"number of total state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":689,"Name":"duration","Value":"23","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":674,"Name":"peak memory","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":757,"Name":"internal.metrics.executorDeserializeCpuTime","Value":9945525,"Internal":true,"Count Failed Values":true},{"ID":769,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Value":0,"Internal":true,"Count Failed Values":true},{"ID":760,"Name":"internal.metrics.resultSize","Value":10885,"Internal":true,"Count Failed Values":true},{"ID":772,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Value":0,"Internal":true,"Count Failed Values":true},{"ID":646,"Name":"records read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":682,"Name":"time to update","Value":"23","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":691,"Name":"peak memory","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":673,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":676,"Name":"time in aggregation build","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":685,"Name":"memory used by state","Value":"1184","Internal":true,"Count Failed Values":true,"Metadata":"sql"}],"Resource Profile Id":0}}
+{"Event":"SparkListenerJobEnd","Job ID":4,"Completion Time":1596020225059,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":14,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 4","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nLocalTableScan (1)\n\n\n(1) LocalTableScan\nOutput [2]: [value#102, count#103]\nArguments: [value#102, count#103]\n\n","sparkPlanInfo":{"nodeName":"LocalTableScan","simpleString":"LocalTableScan [value#102, count#103]","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":781,"metricType":"sum"}]},"time":1596020225079}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":14,"time":1596020225087}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":13,"time":1596020225087}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":12,"time":1596020225087}
+{"Event":"org.apache.spark.sql.streaming.StreamingQueryListener$QueryProgressEvent","progress":{"id":"8d268dc2-bc9c-4be8-97a9-b135d2943028","runId":"e225d92f-2545-48f8-87a2-9c0309580f8a","name":null,"timestamp":"2020-07-29T10:57:04.734Z","batchId":4,"batchDuration":387,"durationMs":{"triggerExecution":387,"queryPlanning":30,"getBatch":1,"latestOffset":3,"addBatch":306,"walCommit":12},"eventTime":{},"stateOperators":[{"numRowsTotal":1,"numRowsUpdated":1,"memoryUsedBytes":1184,"numLateInputs":0,"customMetrics":{"stateOnCurrentVersionSizeBytes":456,"loadedMapCacheHitCount":16,"loadedMapCacheMissCount":0}}],"sources":[{"description":"KafkaV2[Subscribe[test5]]","startOffset":"{\"test5\":{\"0\":48757}}","endOffset":"{\"test5\":{\"0\":48799}}","numInputRows":42,"inputRowsPerSecond":100.71942446043165,"processedRowsPerSecond":108.52713178294573}],"sink":{"description":"org.apache.spark.sql.execution.streaming.ConsoleTable$@514ba885","numOutputRows":1},"observedMetrics":{}}}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":15,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 5","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n   +- StateStoreSave (12)\n      +- * HashAggregate (11)\n         +- StateStoreRestore (10)\n            +- Exchange (9)\n               +- * HashAggregate (8)\n                  +- * HashAggregate (7)\n                     +- * SerializeFromObject (6)\n                        +- MapPartitions (5)\n                           +- DeserializeToObject (4)\n                              +- * Project (3)\n                                 +- * Project (2)\n                                    +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48799}}, {\"test5\":{\"0\":48837}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#1297]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = 9579cc6c-8827-43f7-9678-7747602e493e, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = 9579cc6c-8827-43f7-9678-7747602e493e, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@2c214312\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@2c214312","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 5, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 5, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#1221]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":865,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":864,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":861,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":862,"metricType":"timing"},{"name":"peak memory","accumulatorId":860,"metricType":"size"},{"name":"number of output rows","accumulatorId":859,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":863,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":856,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":857,"metricType":"timing"},{"name":"peak memory","accumulatorId":855,"metricType":"size"},{"name":"number of output rows","accumulatorId":854,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":858,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":853,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":805,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":806,"metricType":"nsTiming"},{"name":"records read","accumulatorId":803,"metricType":"sum"},{"name":"local bytes read","accumulatorId":801,"metricType":"size"},{"name":"fetch wait time","accumulatorId":802,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":799,"metricType":"size"},{"name":"local blocks read","accumulatorId":798,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":797,"metricType":"sum"},{"name":"data size","accumulatorId":796,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":800,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":804,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":852,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":849,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":850,"metricType":"timing"},{"name":"peak memory","accumulatorId":848,"metricType":"size"},{"name":"number of output rows","accumulatorId":847,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":851,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":846,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":836,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":837,"metricType":"sum"},{"name":"memory used by state","accumulatorId":842,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":844,"metricType":"sum"},{"name":"number of output rows","accumulatorId":835,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":843,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":845,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":841,"metricType":"timing"},{"name":"time to remove","accumulatorId":840,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":838,"metricType":"sum"},{"name":"time to update","accumulatorId":839,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":832,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":833,"metricType":"timing"},{"name":"peak memory","accumulatorId":831,"metricType":"size"},{"name":"number of output rows","accumulatorId":830,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":834,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":829,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020225211}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":16,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 5","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n   +- StateStoreSave (12)\n      +- * HashAggregate (11)\n         +- StateStoreRestore (10)\n            +- Exchange (9)\n               +- * HashAggregate (8)\n                  +- * HashAggregate (7)\n                     +- * SerializeFromObject (6)\n                        +- MapPartitions (5)\n                           +- DeserializeToObject (4)\n                              +- * Project (3)\n                                 +- * Project (2)\n                                    +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48799}}, {\"test5\":{\"0\":48837}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#1373]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = b800d96e-7584-4e8d-8df8-c9b901b7f2e2, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = b800d96e-7584-4e8d-8df8-c9b901b7f2e2, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@2c214312\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@2c214312","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 5, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 5, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#1221]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":865,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":864,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":861,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":862,"metricType":"timing"},{"name":"peak memory","accumulatorId":860,"metricType":"size"},{"name":"number of output rows","accumulatorId":859,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":863,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":856,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":857,"metricType":"timing"},{"name":"peak memory","accumulatorId":855,"metricType":"size"},{"name":"number of output rows","accumulatorId":854,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":858,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":853,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":805,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":806,"metricType":"nsTiming"},{"name":"records read","accumulatorId":803,"metricType":"sum"},{"name":"local bytes read","accumulatorId":801,"metricType":"size"},{"name":"fetch wait time","accumulatorId":802,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":799,"metricType":"size"},{"name":"local blocks read","accumulatorId":798,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":797,"metricType":"sum"},{"name":"data size","accumulatorId":796,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":800,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":804,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":852,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":849,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":850,"metricType":"timing"},{"name":"peak memory","accumulatorId":848,"metricType":"size"},{"name":"number of output rows","accumulatorId":847,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":851,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":846,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":836,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":837,"metricType":"sum"},{"name":"memory used by state","accumulatorId":842,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":844,"metricType":"sum"},{"name":"number of output rows","accumulatorId":835,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":843,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":845,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":841,"metricType":"timing"},{"name":"time to remove","accumulatorId":840,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":838,"metricType":"sum"},{"name":"time to update","accumulatorId":839,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":832,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":833,"metricType":"timing"},{"name":"peak memory","accumulatorId":831,"metricType":"size"},{"name":"number of output rows","accumulatorId":830,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":834,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":829,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020225270}
+{"Event":"SparkListenerJobStart","Job ID":5,"Submission Time":1596020225342,"Stage Infos":[{"Stage ID":10,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":66,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"173\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[65],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":62,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"181\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[61],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":64,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"179\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[63],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":61,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"185\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[60],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":65,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"174\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[64],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":60,"Name":"DataSourceRDD","Scope":"{\"id\":\"185\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":63,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"180\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[62],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0},{"Stage ID":11,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":71,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"165\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[70],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":70,"Name":"StateStoreRDD","Scope":"{\"id\":\"168\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[69],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":69,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"169\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[68],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":67,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"173\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[66],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":68,"Name":"StateStoreRDD","Scope":"{\"id\":\"172\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[67],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[10],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0}],"Stage IDs":[10,11],"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 5","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"5","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"16","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":10,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":66,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"173\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[65],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":62,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"181\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[61],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":64,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"179\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[63],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":61,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"185\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[60],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":65,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"174\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[64],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":60,"Name":"DataSourceRDD","Scope":"{\"id\":\"185\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":63,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"180\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[62],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020225343,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 5","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"5","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"16","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}}
+{"Event":"SparkListenerTaskStart","Stage ID":10,"Stage Attempt ID":0,"Task Info":{"Task ID":15,"Index":0,"Attempt":0,"Launch Time":1596020225359,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":10,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":15,"Index":0,"Attempt":0,"Launch Time":1596020225359,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020225400,"Failed":false,"Killed":false,"Accumulables":[{"ID":806,"Name":"shuffle write time","Update":"530930","Value":"530930","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":805,"Name":"shuffle records written","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":804,"Name":"shuffle bytes written","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":796,"Name":"data size","Update":"128","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":853,"Name":"duration","Update":"21","Value":"21","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":854,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":855,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":857,"Name":"time in aggregation build","Update":"14","Value":"14","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":859,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":860,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":862,"Name":"time in aggregation build","Update":"9","Value":"9","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":864,"Name":"duration","Update":"21","Value":"21","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":865,"Name":"number of output rows","Update":"38","Value":"38","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":910,"Name":"internal.metrics.input.recordsRead","Update":38,"Value":38,"Internal":true,"Count Failed Values":true},{"ID":908,"Name":"internal.metrics.shuffle.write.writeTime","Update":530930,"Value":530930,"Internal":true,"Count Failed Values":true},{"ID":907,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":906,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":897,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":524288,"Internal":true,"Count Failed Values":true},{"ID":892,"Name":"internal.metrics.resultSize","Update":2544,"Value":2544,"Internal":true,"Count Failed Values":true},{"ID":891,"Name":"internal.metrics.executorCpuTime","Update":22440089,"Value":22440089,"Internal":true,"Count Failed Values":true},{"ID":890,"Name":"internal.metrics.executorRunTime","Update":29,"Value":29,"Internal":true,"Count Failed Values":true},{"ID":889,"Name":"internal.metrics.executorDeserializeCpuTime","Update":6808170,"Value":6808170,"Internal":true,"Count Failed Values":true},{"ID":888,"Name":"internal.metrics.executorDeserializeTime","Update":6,"Value":6,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":6,"Executor Deserialize CPU Time":6808170,"Executor Run Time":29,"Executor CPU Time":22440089,"Peak Execution Memory":524288,"Result Size":2544,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":168,"Shuffle Write Time":530930,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":38},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":10,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":66,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"173\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[65],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":62,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"181\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[61],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":64,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"179\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[63],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":61,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"185\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[60],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":65,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"174\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[64],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":60,"Name":"DataSourceRDD","Scope":"{\"id\":\"185\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":63,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"180\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[62],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020225343,"Completion Time":1596020225401,"Accumulables":[{"ID":855,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":891,"Name":"internal.metrics.executorCpuTime","Value":22440089,"Internal":true,"Count Failed Values":true},{"ID":864,"Name":"duration","Value":"21","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":804,"Name":"shuffle bytes written","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":908,"Name":"internal.metrics.shuffle.write.writeTime","Value":530930,"Internal":true,"Count Failed Values":true},{"ID":890,"Name":"internal.metrics.executorRunTime","Value":29,"Internal":true,"Count Failed Values":true},{"ID":857,"Name":"time in aggregation build","Value":"14","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":860,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":806,"Name":"shuffle write time","Value":"530930","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":854,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":853,"Name":"duration","Value":"21","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":862,"Name":"time in aggregation build","Value":"9","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":889,"Name":"internal.metrics.executorDeserializeCpuTime","Value":6808170,"Internal":true,"Count Failed Values":true},{"ID":907,"Name":"internal.metrics.shuffle.write.recordsWritten","Value":1,"Internal":true,"Count Failed Values":true},{"ID":892,"Name":"internal.metrics.resultSize","Value":2544,"Internal":true,"Count Failed Values":true},{"ID":910,"Name":"internal.metrics.input.recordsRead","Value":38,"Internal":true,"Count Failed Values":true},{"ID":865,"Name":"number of output rows","Value":"38","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":805,"Name":"shuffle records written","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":796,"Name":"data size","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":859,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":888,"Name":"internal.metrics.executorDeserializeTime","Value":6,"Internal":true,"Count Failed Values":true},{"ID":897,"Name":"internal.metrics.peakExecutionMemory","Value":524288,"Internal":true,"Count Failed Values":true},{"ID":906,"Name":"internal.metrics.shuffle.write.bytesWritten","Value":168,"Internal":true,"Count Failed Values":true}],"Resource Profile Id":0}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":11,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":71,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"165\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[70],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":70,"Name":"StateStoreRDD","Scope":"{\"id\":\"168\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[69],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":69,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"169\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[68],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":67,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"173\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[66],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":68,"Name":"StateStoreRDD","Scope":"{\"id\":\"172\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[67],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[10],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020225410,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 5","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"5","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"16","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}}
+{"Event":"SparkListenerTaskStart","Stage ID":11,"Stage Attempt ID":0,"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1596020225417,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":11,"Stage Attempt ID":0,"Task Info":{"Task ID":17,"Index":1,"Attempt":0,"Launch Time":1596020225417,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":11,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":17,"Index":1,"Attempt":0,"Launch Time":1596020225417,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020225498,"Failed":false,"Killed":false,"Accumulables":[{"ID":829,"Name":"duration","Update":"3","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":834,"Name":"avg hash probe bucket list iters","Update":"10","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":830,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":831,"Name":"peak memory","Update":"4456448","Value":"4456448","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":833,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":839,"Name":"time to update","Update":"11","Value":"11","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":838,"Name":"number of updated state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":840,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":841,"Name":"time to commit changes","Update":"37","Value":"37","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":843,"Name":"estimated size of state only on current version","Update":"368","Value":"368","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":835,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":844,"Name":"count of cache hit on states cache in provider","Update":"10","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":842,"Name":"memory used by state","Update":"784","Value":"784","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":837,"Name":"number of total state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":846,"Name":"duration","Update":"11","Value":"11","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":847,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":848,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":850,"Name":"time in aggregation build","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":852,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":798,"Name":"local blocks read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":802,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":801,"Name":"local bytes read","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":803,"Name":"records read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":930,"Name":"internal.metrics.shuffle.read.recordsRead","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":929,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":928,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":927,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":926,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":925,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":924,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":922,"Name":"internal.metrics.peakExecutionMemory","Update":4718592,"Value":4718592,"Internal":true,"Count Failed Values":true},{"ID":917,"Name":"internal.metrics.resultSize","Update":5574,"Value":5574,"Internal":true,"Count Failed Values":true},{"ID":916,"Name":"internal.metrics.executorCpuTime","Update":17945299,"Value":17945299,"Internal":true,"Count Failed Values":true},{"ID":915,"Name":"internal.metrics.executorRunTime","Update":68,"Value":68,"Internal":true,"Count Failed Values":true},{"ID":914,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3451032,"Value":3451032,"Internal":true,"Count Failed Values":true},{"ID":913,"Name":"internal.metrics.executorDeserializeTime","Update":3,"Value":3,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":3,"Executor Deserialize CPU Time":3451032,"Executor Run Time":68,"Executor CPU Time":17945299,"Peak Execution Memory":4718592,"Result Size":5574,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":1,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":168,"Total Records Read":1},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":11,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1596020225417,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020225509,"Failed":false,"Killed":false,"Accumulables":[{"ID":829,"Name":"duration","Update":"2","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":831,"Name":"peak memory","Update":"262144","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":833,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":839,"Name":"time to update","Update":"4","Value":"15","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":840,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":841,"Name":"time to commit changes","Update":"50","Value":"87","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":843,"Name":"estimated size of state only on current version","Update":"88","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":844,"Name":"count of cache hit on states cache in provider","Update":"10","Value":"20","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":842,"Name":"memory used by state","Update":"400","Value":"1184","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":846,"Name":"duration","Update":"4","Value":"15","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":848,"Name":"peak memory","Update":"262144","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":850,"Name":"time in aggregation build","Update":"0","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":930,"Name":"internal.metrics.shuffle.read.recordsRead","Update":0,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":929,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":928,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":0,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":927,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":926,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":925,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":0,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":924,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":922,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":917,"Name":"internal.metrics.resultSize","Update":5311,"Value":10885,"Internal":true,"Count Failed Values":true},{"ID":916,"Name":"internal.metrics.executorCpuTime","Update":15599091,"Value":33544390,"Internal":true,"Count Failed Values":true},{"ID":915,"Name":"internal.metrics.executorRunTime","Update":84,"Value":152,"Internal":true,"Count Failed Values":true},{"ID":914,"Name":"internal.metrics.executorDeserializeCpuTime","Update":4357806,"Value":7808838,"Internal":true,"Count Failed Values":true},{"ID":913,"Name":"internal.metrics.executorDeserializeTime","Update":4,"Value":7,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":4,"Executor Deserialize CPU Time":4357806,"Executor Run Time":84,"Executor CPU Time":15599091,"Peak Execution Memory":524288,"Result Size":5311,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":11,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":71,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"165\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[70],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":70,"Name":"StateStoreRDD","Scope":"{\"id\":\"168\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[69],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":69,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"169\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[68],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":67,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"173\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[66],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":68,"Name":"StateStoreRDD","Scope":"{\"id\":\"172\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[67],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[10],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020225410,"Completion Time":1596020225514,"Accumulables":[{"ID":846,"Name":"duration","Value":"15","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":837,"Name":"number of total state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":801,"Name":"local bytes read","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":831,"Name":"peak memory","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":926,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Value":0,"Internal":true,"Count Failed Values":true},{"ID":917,"Name":"internal.metrics.resultSize","Value":10885,"Internal":true,"Count Failed Values":true},{"ID":830,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":848,"Name":"peak memory","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":803,"Name":"records read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":839,"Name":"time to update","Value":"15","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":929,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Value":0,"Internal":true,"Count Failed Values":true},{"ID":833,"Name":"time in aggregation build","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":842,"Name":"memory used by state","Value":"1184","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":914,"Name":"internal.metrics.executorDeserializeCpuTime","Value":7808838,"Internal":true,"Count Failed Values":true},{"ID":922,"Name":"internal.metrics.peakExecutionMemory","Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":913,"Name":"internal.metrics.executorDeserializeTime","Value":7,"Internal":true,"Count Failed Values":true},{"ID":925,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Value":1,"Internal":true,"Count Failed Values":true},{"ID":844,"Name":"count of cache hit on states cache in provider","Value":"20","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":835,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":916,"Name":"internal.metrics.executorCpuTime","Value":33544390,"Internal":true,"Count Failed Values":true},{"ID":829,"Name":"duration","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":928,"Name":"internal.metrics.shuffle.read.localBytesRead","Value":168,"Internal":true,"Count Failed Values":true},{"ID":802,"Name":"fetch wait time","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":838,"Name":"number of updated state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":847,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":850,"Name":"time in aggregation build","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":841,"Name":"time to commit changes","Value":"87","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":927,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Value":0,"Internal":true,"Count Failed Values":true},{"ID":930,"Name":"internal.metrics.shuffle.read.recordsRead","Value":1,"Internal":true,"Count Failed Values":true},{"ID":840,"Name":"time to remove","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":834,"Name":"avg hash probe bucket list iters","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":852,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":798,"Name":"local blocks read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":843,"Name":"estimated size of state only on current version","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":915,"Name":"internal.metrics.executorRunTime","Value":152,"Internal":true,"Count Failed Values":true},{"ID":924,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Value":0,"Internal":true,"Count Failed Values":true}],"Resource Profile Id":0}}
+{"Event":"SparkListenerJobEnd","Job ID":5,"Completion Time":1596020225514,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":17,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 5","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nLocalTableScan (1)\n\n\n(1) LocalTableScan\nOutput [2]: [value#116, count#117]\nArguments: [value#116, count#117]\n\n","sparkPlanInfo":{"nodeName":"LocalTableScan","simpleString":"LocalTableScan [value#116, count#117]","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":938,"metricType":"sum"}]},"time":1596020225536}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":17,"time":1596020225541}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":16,"time":1596020225542}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":15,"time":1596020225542}
+{"Event":"org.apache.spark.sql.streaming.StreamingQueryListener$QueryProgressEvent","progress":{"id":"8d268dc2-bc9c-4be8-97a9-b135d2943028","runId":"e225d92f-2545-48f8-87a2-9c0309580f8a","name":null,"timestamp":"2020-07-29T10:57:05.123Z","batchId":5,"batchDuration":437,"durationMs":{"triggerExecution":437,"queryPlanning":35,"getBatch":1,"latestOffset":3,"addBatch":361,"walCommit":18},"eventTime":{},"stateOperators":[{"numRowsTotal":1,"numRowsUpdated":1,"memoryUsedBytes":1184,"numLateInputs":0,"customMetrics":{"stateOnCurrentVersionSizeBytes":456,"loadedMapCacheHitCount":20,"loadedMapCacheMissCount":0}}],"sources":[{"description":"KafkaV2[Subscribe[test5]]","startOffset":"{\"test5\":{\"0\":48799}}","endOffset":"{\"test5\":{\"0\":48837}}","numInputRows":38,"inputRowsPerSecond":97.68637532133675,"processedRowsPerSecond":86.95652173913044}],"sink":{"description":"org.apache.spark.sql.execution.streaming.ConsoleTable$@514ba885","numOutputRows":1},"observedMetrics":{}}}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":18,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 6","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n   +- StateStoreSave (12)\n      +- * HashAggregate (11)\n         +- StateStoreRestore (10)\n            +- Exchange (9)\n               +- * HashAggregate (8)\n                  +- * HashAggregate (7)\n                     +- * SerializeFromObject (6)\n                        +- MapPartitions (5)\n                           +- DeserializeToObject (4)\n                              +- * Project (3)\n                                 +- * Project (2)\n                                    +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48837}}, {\"test5\":{\"0\":48881}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#1528]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = 6a12c2d9-8d02-4241-93fc-f53da01bb454, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = 6a12c2d9-8d02-4241-93fc-f53da01bb454, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@27ec018d\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@27ec018d","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 6, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 6, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#1452]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":1022,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1021,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1018,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1019,"metricType":"timing"},{"name":"peak memory","accumulatorId":1017,"metricType":"size"},{"name":"number of output rows","accumulatorId":1016,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1020,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1013,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1014,"metricType":"timing"},{"name":"peak memory","accumulatorId":1012,"metricType":"size"},{"name":"number of output rows","accumulatorId":1011,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1015,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1010,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":962,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":963,"metricType":"nsTiming"},{"name":"records read","accumulatorId":960,"metricType":"sum"},{"name":"local bytes read","accumulatorId":958,"metricType":"size"},{"name":"fetch wait time","accumulatorId":959,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":956,"metricType":"size"},{"name":"local blocks read","accumulatorId":955,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":954,"metricType":"sum"},{"name":"data size","accumulatorId":953,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":957,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":961,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":1009,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1006,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1007,"metricType":"timing"},{"name":"peak memory","accumulatorId":1005,"metricType":"size"},{"name":"number of output rows","accumulatorId":1004,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1008,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1003,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":993,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":994,"metricType":"sum"},{"name":"memory used by state","accumulatorId":999,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":1001,"metricType":"sum"},{"name":"number of output rows","accumulatorId":992,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":1000,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":1002,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":998,"metricType":"timing"},{"name":"time to remove","accumulatorId":997,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":995,"metricType":"sum"},{"name":"time to update","accumulatorId":996,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":989,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":990,"metricType":"timing"},{"name":"peak memory","accumulatorId":988,"metricType":"size"},{"name":"number of output rows","accumulatorId":987,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":991,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":986,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020225657}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":19,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 6","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n   +- StateStoreSave (12)\n      +- * HashAggregate (11)\n         +- StateStoreRestore (10)\n            +- Exchange (9)\n               +- * HashAggregate (8)\n                  +- * HashAggregate (7)\n                     +- * SerializeFromObject (6)\n                        +- MapPartitions (5)\n                           +- DeserializeToObject (4)\n                              +- * Project (3)\n                                 +- * Project (2)\n                                    +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48837}}, {\"test5\":{\"0\":48881}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#1604]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = 96456757-8d0b-46da-a006-9fe2cb6fc936, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = 96456757-8d0b-46da-a006-9fe2cb6fc936, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@27ec018d\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@27ec018d","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 6, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 6, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#1452]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":1022,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1021,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1018,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1019,"metricType":"timing"},{"name":"peak memory","accumulatorId":1017,"metricType":"size"},{"name":"number of output rows","accumulatorId":1016,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1020,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1013,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1014,"metricType":"timing"},{"name":"peak memory","accumulatorId":1012,"metricType":"size"},{"name":"number of output rows","accumulatorId":1011,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1015,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1010,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":962,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":963,"metricType":"nsTiming"},{"name":"records read","accumulatorId":960,"metricType":"sum"},{"name":"local bytes read","accumulatorId":958,"metricType":"size"},{"name":"fetch wait time","accumulatorId":959,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":956,"metricType":"size"},{"name":"local blocks read","accumulatorId":955,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":954,"metricType":"sum"},{"name":"data size","accumulatorId":953,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":957,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":961,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":1009,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1006,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1007,"metricType":"timing"},{"name":"peak memory","accumulatorId":1005,"metricType":"size"},{"name":"number of output rows","accumulatorId":1004,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1008,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1003,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":993,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":994,"metricType":"sum"},{"name":"memory used by state","accumulatorId":999,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":1001,"metricType":"sum"},{"name":"number of output rows","accumulatorId":992,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":1000,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":1002,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":998,"metricType":"timing"},{"name":"time to remove","accumulatorId":997,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":995,"metricType":"sum"},{"name":"time to update","accumulatorId":996,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":989,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":990,"metricType":"timing"},{"name":"peak memory","accumulatorId":988,"metricType":"size"},{"name":"number of output rows","accumulatorId":987,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":991,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":986,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020225687}
+{"Event":"SparkListenerJobStart","Job ID":6,"Submission Time":1596020225759,"Stage Infos":[{"Stage ID":12,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":78,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"206\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[77],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":75,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"213\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[74],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":74,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"214\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[73],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":77,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"207\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[76],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":72,"Name":"DataSourceRDD","Scope":"{\"id\":\"218\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":73,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"218\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[72],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":76,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"212\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[75],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0},{"Stage ID":13,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":83,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"198\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[82],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":81,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"202\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[80],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":79,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"206\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[78],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":80,"Name":"StateStoreRDD","Scope":"{\"id\":\"205\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[79],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":82,"Name":"StateStoreRDD","Scope":"{\"id\":\"201\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[81],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[12],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0}],"Stage IDs":[12,13],"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 6","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"6","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"19","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":12,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":78,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"206\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[77],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":75,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"213\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[74],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":74,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"214\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[73],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":77,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"207\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[76],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":72,"Name":"DataSourceRDD","Scope":"{\"id\":\"218\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":73,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"218\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[72],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":76,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"212\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[75],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020225760,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 6","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"6","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"19","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}}
+{"Event":"SparkListenerTaskStart","Stage ID":12,"Stage Attempt ID":0,"Task Info":{"Task ID":18,"Index":0,"Attempt":0,"Launch Time":1596020225766,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":12,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":18,"Index":0,"Attempt":0,"Launch Time":1596020225766,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020225796,"Failed":false,"Killed":false,"Accumulables":[{"ID":963,"Name":"shuffle write time","Update":"543836","Value":"543836","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":962,"Name":"shuffle records written","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":961,"Name":"shuffle bytes written","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":953,"Name":"data size","Update":"128","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1010,"Name":"duration","Update":"17","Value":"17","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1011,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1012,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1014,"Name":"time in aggregation build","Update":"11","Value":"11","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1016,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1017,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1019,"Name":"time in aggregation build","Update":"8","Value":"8","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1021,"Name":"duration","Update":"17","Value":"17","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1022,"Name":"number of output rows","Update":"44","Value":"44","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1067,"Name":"internal.metrics.input.recordsRead","Update":44,"Value":44,"Internal":true,"Count Failed Values":true},{"ID":1065,"Name":"internal.metrics.shuffle.write.writeTime","Update":543836,"Value":543836,"Internal":true,"Count Failed Values":true},{"ID":1064,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":1063,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":1054,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":524288,"Internal":true,"Count Failed Values":true},{"ID":1049,"Name":"internal.metrics.resultSize","Update":2544,"Value":2544,"Internal":true,"Count Failed Values":true},{"ID":1048,"Name":"internal.metrics.executorCpuTime","Update":23733439,"Value":23733439,"Internal":true,"Count Failed Values":true},{"ID":1047,"Name":"internal.metrics.executorRunTime","Update":23,"Value":23,"Internal":true,"Count Failed Values":true},{"ID":1046,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3714406,"Value":3714406,"Internal":true,"Count Failed Values":true},{"ID":1045,"Name":"internal.metrics.executorDeserializeTime","Update":3,"Value":3,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":3,"Executor Deserialize CPU Time":3714406,"Executor Run Time":23,"Executor CPU Time":23733439,"Peak Execution Memory":524288,"Result Size":2544,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":168,"Shuffle Write Time":543836,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":44},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":12,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":78,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"206\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[77],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":75,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"213\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[74],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":74,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"214\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[73],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":77,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"207\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[76],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":72,"Name":"DataSourceRDD","Scope":"{\"id\":\"218\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":73,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"218\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[72],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":76,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"212\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[75],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020225760,"Completion Time":1596020225797,"Accumulables":[{"ID":1064,"Name":"internal.metrics.shuffle.write.recordsWritten","Value":1,"Internal":true,"Count Failed Values":true},{"ID":1010,"Name":"duration","Value":"17","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1046,"Name":"internal.metrics.executorDeserializeCpuTime","Value":3714406,"Internal":true,"Count Failed Values":true},{"ID":1019,"Name":"time in aggregation build","Value":"8","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1067,"Name":"internal.metrics.input.recordsRead","Value":44,"Internal":true,"Count Failed Values":true},{"ID":1022,"Name":"number of output rows","Value":"44","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1049,"Name":"internal.metrics.resultSize","Value":2544,"Internal":true,"Count Failed Values":true},{"ID":1016,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":962,"Name":"shuffle records written","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":953,"Name":"data size","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1054,"Name":"internal.metrics.peakExecutionMemory","Value":524288,"Internal":true,"Count Failed Values":true},{"ID":1045,"Name":"internal.metrics.executorDeserializeTime","Value":3,"Internal":true,"Count Failed Values":true},{"ID":1063,"Name":"internal.metrics.shuffle.write.bytesWritten","Value":168,"Internal":true,"Count Failed Values":true},{"ID":1048,"Name":"internal.metrics.executorCpuTime","Value":23733439,"Internal":true,"Count Failed Values":true},{"ID":1012,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1021,"Name":"duration","Value":"17","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":961,"Name":"shuffle bytes written","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1065,"Name":"internal.metrics.shuffle.write.writeTime","Value":543836,"Internal":true,"Count Failed Values":true},{"ID":1047,"Name":"internal.metrics.executorRunTime","Value":23,"Internal":true,"Count Failed Values":true},{"ID":1014,"Name":"time in aggregation build","Value":"11","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":963,"Name":"shuffle write time","Value":"543836","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1017,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1011,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"}],"Resource Profile Id":0}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":13,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":83,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"198\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[82],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":81,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"202\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[80],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":79,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"206\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[78],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":80,"Name":"StateStoreRDD","Scope":"{\"id\":\"205\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[79],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":82,"Name":"StateStoreRDD","Scope":"{\"id\":\"201\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[81],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[12],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020225801,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 6","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"6","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"19","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}}
+{"Event":"SparkListenerTaskStart","Stage ID":13,"Stage Attempt ID":0,"Task Info":{"Task ID":19,"Index":0,"Attempt":0,"Launch Time":1596020225808,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":13,"Stage Attempt ID":0,"Task Info":{"Task ID":20,"Index":1,"Attempt":0,"Launch Time":1596020225809,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":13,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":19,"Index":0,"Attempt":0,"Launch Time":1596020225808,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020225868,"Failed":false,"Killed":false,"Accumulables":[{"ID":986,"Name":"duration","Update":"3","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":988,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":990,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":996,"Name":"time to update","Update":"4","Value":"4","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":997,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":998,"Name":"time to commit changes","Update":"26","Value":"26","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1000,"Name":"estimated size of state only on current version","Update":"88","Value":"88","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1001,"Name":"count of cache hit on states cache in provider","Update":"12","Value":"12","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":999,"Name":"memory used by state","Update":"400","Value":"400","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1003,"Name":"duration","Update":"4","Value":"4","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1005,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1007,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1087,"Name":"internal.metrics.shuffle.read.recordsRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1086,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1085,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1084,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1083,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1082,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1081,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1079,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":524288,"Internal":true,"Count Failed Values":true},{"ID":1074,"Name":"internal.metrics.resultSize","Update":5311,"Value":5311,"Internal":true,"Count Failed Values":true},{"ID":1073,"Name":"internal.metrics.executorCpuTime","Update":17503528,"Value":17503528,"Internal":true,"Count Failed Values":true},{"ID":1072,"Name":"internal.metrics.executorRunTime","Update":50,"Value":50,"Internal":true,"Count Failed Values":true},{"ID":1071,"Name":"internal.metrics.executorDeserializeCpuTime","Update":4255703,"Value":4255703,"Internal":true,"Count Failed Values":true},{"ID":1070,"Name":"internal.metrics.executorDeserializeTime","Update":4,"Value":4,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":4,"Executor Deserialize CPU Time":4255703,"Executor Run Time":50,"Executor CPU Time":17503528,"Peak Execution Memory":524288,"Result Size":5311,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":13,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":20,"Index":1,"Attempt":0,"Launch Time":1596020225809,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020225874,"Failed":false,"Killed":false,"Accumulables":[{"ID":986,"Name":"duration","Update":"2","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":991,"Name":"avg hash probe bucket list iters","Update":"10","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":987,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":988,"Name":"peak memory","Update":"4456448","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":990,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":996,"Name":"time to update","Update":"15","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":995,"Name":"number of updated state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":997,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":998,"Name":"time to commit changes","Update":"23","Value":"49","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1000,"Name":"estimated size of state only on current version","Update":"368","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":992,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1001,"Name":"count of cache hit on states cache in provider","Update":"12","Value":"24","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":999,"Name":"memory used by state","Update":"784","Value":"1184","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":994,"Name":"number of total state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1003,"Name":"duration","Update":"15","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1004,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1005,"Name":"peak memory","Update":"262144","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1007,"Name":"time in aggregation build","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1009,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":955,"Name":"local blocks read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":959,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":958,"Name":"local bytes read","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":960,"Name":"records read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1087,"Name":"internal.metrics.shuffle.read.recordsRead","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":1086,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1085,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":1084,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1083,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1082,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":1081,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1079,"Name":"internal.metrics.peakExecutionMemory","Update":4718592,"Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":1074,"Name":"internal.metrics.resultSize","Update":5574,"Value":10885,"Internal":true,"Count Failed Values":true},{"ID":1073,"Name":"internal.metrics.executorCpuTime","Update":17516707,"Value":35020235,"Internal":true,"Count Failed Values":true},{"ID":1072,"Name":"internal.metrics.executorRunTime","Update":56,"Value":106,"Internal":true,"Count Failed Values":true},{"ID":1071,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3750230,"Value":8005933,"Internal":true,"Count Failed Values":true},{"ID":1070,"Name":"internal.metrics.executorDeserializeTime","Update":3,"Value":7,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":3,"Executor Deserialize CPU Time":3750230,"Executor Run Time":56,"Executor CPU Time":17516707,"Peak Execution Memory":4718592,"Result Size":5574,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":1,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":168,"Total Records Read":1},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":13,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":83,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"198\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[82],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":81,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"202\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[80],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":79,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"206\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[78],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":80,"Name":"StateStoreRDD","Scope":"{\"id\":\"205\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[79],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":82,"Name":"StateStoreRDD","Scope":"{\"id\":\"201\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[81],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[12],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020225801,"Completion Time":1596020225874,"Accumulables":[{"ID":1070,"Name":"internal.metrics.executorDeserializeTime","Value":7,"Internal":true,"Count Failed Values":true},{"ID":1079,"Name":"internal.metrics.peakExecutionMemory","Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":992,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1073,"Name":"internal.metrics.executorCpuTime","Value":35020235,"Internal":true,"Count Failed Values":true},{"ID":1082,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Value":1,"Internal":true,"Count Failed Values":true},{"ID":1001,"Name":"count of cache hit on states cache in provider","Value":"24","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":995,"Name":"number of updated state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1004,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":986,"Name":"duration","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":959,"Name":"fetch wait time","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1085,"Name":"internal.metrics.shuffle.read.localBytesRead","Value":168,"Internal":true,"Count Failed Values":true},{"ID":1007,"Name":"time in aggregation build","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":998,"Name":"time to commit changes","Value":"49","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1084,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Value":0,"Internal":true,"Count Failed Values":true},{"ID":997,"Name":"time to remove","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1087,"Name":"internal.metrics.shuffle.read.recordsRead","Value":1,"Internal":true,"Count Failed Values":true},{"ID":955,"Name":"local blocks read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1081,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Value":0,"Internal":true,"Count Failed Values":true},{"ID":991,"Name":"avg hash probe bucket list iters","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1009,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1072,"Name":"internal.metrics.executorRunTime","Value":106,"Internal":true,"Count Failed Values":true},{"ID":1000,"Name":"estimated size of state only on current version","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":994,"Name":"number of total state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1003,"Name":"duration","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":958,"Name":"local bytes read","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":988,"Name":"peak memory","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1074,"Name":"internal.metrics.resultSize","Value":10885,"Internal":true,"Count Failed Values":true},{"ID":1083,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Value":0,"Internal":true,"Count Failed Values":true},{"ID":960,"Name":"records read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1086,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Value":0,"Internal":true,"Count Failed Values":true},{"ID":987,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1005,"Name":"peak memory","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":996,"Name":"time to update","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1071,"Name":"internal.metrics.executorDeserializeCpuTime","Value":8005933,"Internal":true,"Count Failed Values":true},{"ID":999,"Name":"memory used by state","Value":"1184","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":990,"Name":"time in aggregation build","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"}],"Resource Profile Id":0}}
+{"Event":"SparkListenerJobEnd","Job ID":6,"Completion Time":1596020225875,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":20,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 6","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nLocalTableScan (1)\n\n\n(1) LocalTableScan\nOutput [2]: [value#130, count#131]\nArguments: [value#130, count#131]\n\n","sparkPlanInfo":{"nodeName":"LocalTableScan","simpleString":"LocalTableScan [value#130, count#131]","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":1095,"metricType":"sum"}]},"time":1596020225891}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":20,"time":1596020225896}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":19,"time":1596020225897}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":18,"time":1596020225897}
+{"Event":"org.apache.spark.sql.streaming.StreamingQueryListener$QueryProgressEvent","progress":{"id":"8d268dc2-bc9c-4be8-97a9-b135d2943028","runId":"e225d92f-2545-48f8-87a2-9c0309580f8a","name":null,"timestamp":"2020-07-29T10:57:05.562Z","batchId":6,"batchDuration":351,"durationMs":{"triggerExecution":351,"queryPlanning":28,"getBatch":1,"latestOffset":6,"addBatch":273,"walCommit":25},"eventTime":{},"stateOperators":[{"numRowsTotal":1,"numRowsUpdated":1,"memoryUsedBytes":1184,"numLateInputs":0,"customMetrics":{"stateOnCurrentVersionSizeBytes":456,"loadedMapCacheHitCount":24,"loadedMapCacheMissCount":0}}],"sources":[{"description":"KafkaV2[Subscribe[test5]]","startOffset":"{\"test5\":{\"0\":48837}}","endOffset":"{\"test5\":{\"0\":48881}}","numInputRows":44,"inputRowsPerSecond":100.22779043280183,"processedRowsPerSecond":125.35612535612536}],"sink":{"description":"org.apache.spark.sql.execution.streaming.ConsoleTable$@514ba885","numOutputRows":1},"observedMetrics":{}}}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":21,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 7","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n   +- StateStoreSave (12)\n      +- * HashAggregate (11)\n         +- StateStoreRestore (10)\n            +- Exchange (9)\n               +- * HashAggregate (8)\n                  +- * HashAggregate (7)\n                     +- * SerializeFromObject (6)\n                        +- MapPartitions (5)\n                           +- DeserializeToObject (4)\n                              +- * Project (3)\n                                 +- * Project (2)\n                                    +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48881}}, {\"test5\":{\"0\":48917}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#1759]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = c0968891-bf48-4112-a19b-444014085d1d, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = c0968891-bf48-4112-a19b-444014085d1d, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@6313b68e\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@6313b68e","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 7, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 7, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#1683]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":1179,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1178,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1175,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1176,"metricType":"timing"},{"name":"peak memory","accumulatorId":1174,"metricType":"size"},{"name":"number of output rows","accumulatorId":1173,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1177,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1170,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1171,"metricType":"timing"},{"name":"peak memory","accumulatorId":1169,"metricType":"size"},{"name":"number of output rows","accumulatorId":1168,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1172,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1167,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":1119,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":1120,"metricType":"nsTiming"},{"name":"records read","accumulatorId":1117,"metricType":"sum"},{"name":"local bytes read","accumulatorId":1115,"metricType":"size"},{"name":"fetch wait time","accumulatorId":1116,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":1113,"metricType":"size"},{"name":"local blocks read","accumulatorId":1112,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":1111,"metricType":"sum"},{"name":"data size","accumulatorId":1110,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":1114,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":1118,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":1166,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1163,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1164,"metricType":"timing"},{"name":"peak memory","accumulatorId":1162,"metricType":"size"},{"name":"number of output rows","accumulatorId":1161,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1165,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1160,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":1150,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":1151,"metricType":"sum"},{"name":"memory used by state","accumulatorId":1156,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":1158,"metricType":"sum"},{"name":"number of output rows","accumulatorId":1149,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":1157,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":1159,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":1155,"metricType":"timing"},{"name":"time to remove","accumulatorId":1154,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":1152,"metricType":"sum"},{"name":"time to update","accumulatorId":1153,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1146,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1147,"metricType":"timing"},{"name":"peak memory","accumulatorId":1145,"metricType":"size"},{"name":"number of output rows","accumulatorId":1144,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1148,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1143,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020225988}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":22,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 7","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nWriteToDataSourceV2 (14)\n+- * HashAggregate (13)\n   +- StateStoreSave (12)\n      +- * HashAggregate (11)\n         +- StateStoreRestore (10)\n            +- Exchange (9)\n               +- * HashAggregate (8)\n                  +- * HashAggregate (7)\n                     +- * SerializeFromObject (6)\n                        +- MapPartitions (5)\n                           +- DeserializeToObject (4)\n                              +- * Project (3)\n                                 +- * Project (2)\n                                    +- MicroBatchScan (1)\n\n\n(1) MicroBatchScan\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nArguments: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13], org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan@7e7b182c, KafkaV2[Subscribe[test5]], {\"test5\":{\"0\":48881}}, {\"test5\":{\"0\":48917}}\n\n(2) Project [codegen id : 1]\nOutput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(3) Project [codegen id : 1]\nOutput [1]: [cast(value#8 as string) AS value#21]\nInput [7]: [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]\n\n(4) DeserializeToObject\nInput [1]: [value#21]\nArguments: value#21.toString, obj#27: java.lang.String\n\n(5) MapPartitions\nInput [1]: [obj#27]\nArguments: org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String\n\n(6) SerializeFromObject [codegen id : 2]\nInput [1]: [obj#28]\nArguments: [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]\n\n(7) HashAggregate [codegen id : 2]\nInput [1]: [value#29]\nKeys [1]: [value#29]\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(8) HashAggregate [codegen id : 2]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(9) Exchange\nInput [2]: [value#29, count#38L]\nArguments: hashpartitioning(value#29, 2), true, [id=#1835]\n\n(10) StateStoreRestore\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = e165b23b-1a6f-459f-9c51-288922bb2647, opId = 0, ver = 0, numPartitions = 2], 2\n\n(11) HashAggregate [codegen id : 3]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [merge_count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count#38L]\n\n(12) StateStoreSave\nInput [2]: [value#29, count#38L]\nArguments: [value#29], state info [ checkpoint = <unknown>, runId = e165b23b-1a6f-459f-9c51-288922bb2647, opId = 0, ver = 0, numPartitions = 2], Append, 0, 2\n\n(13) HashAggregate [codegen id : 4]\nInput [2]: [value#29, count#38L]\nKeys [1]: [value#29]\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#31L]\nResults [2]: [value#29, count(1)#31L AS count#32L]\n\n(14) WriteToDataSourceV2\nInput [2]: [value#29, count#32L]\nArguments: org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@6313b68e\n\n","sparkPlanInfo":{"nodeName":"WriteToDataSourceV2","simpleString":"WriteToDataSourceV2 org.apache.spark.sql.execution.streaming.sources.MicroBatchWrite@6313b68e","children":[{"nodeName":"WholeStageCodegen (4)","simpleString":"WholeStageCodegen (4)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreSave","simpleString":"StateStoreSave [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 7, numPartitions = 2], Complete, 0, 2","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"StateStoreRestore","simpleString":"StateStoreRestore [value#29], state info [ checkpoint = file:/tmp/temporary-025d7997-5b66-4def-abbf-bdcca57312b9/state, runId = e225d92f-2545-48f8-87a2-9c0309580f8a, opId = 0, ver = 7, numPartitions = 2], 2","children":[{"nodeName":"Exchange","simpleString":"Exchange hashpartitioning(value#29, 2), true, [id=#1683]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[merge_count(1)])","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[value#29], functions=[partial_count(1)])","children":[{"nodeName":"SerializeFromObject","simpleString":"SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true, false) AS value#29]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MapPartitions","simpleString":"MapPartitions org.apache.spark.sql.Dataset$$Lambda$1321/872917583@67b99068, obj#28: java.lang.String","children":[{"nodeName":"DeserializeToObject","simpleString":"DeserializeToObject value#21.toString, obj#27: java.lang.String","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project [cast(value#8 as string) AS value#21]","children":[{"nodeName":"Project","simpleString":"Project [key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13]","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"MicroBatchScan","simpleString":"MicroBatchScan[key#7, value#8, topic#9, partition#10, offset#11L, timestamp#12, timestampType#13] class org.apache.spark.sql.kafka010.KafkaSourceProvider$KafkaScan","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":1179,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1178,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1175,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1176,"metricType":"timing"},{"name":"peak memory","accumulatorId":1174,"metricType":"size"},{"name":"number of output rows","accumulatorId":1173,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1177,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1170,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1171,"metricType":"timing"},{"name":"peak memory","accumulatorId":1169,"metricType":"size"},{"name":"number of output rows","accumulatorId":1168,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1172,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1167,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":1119,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":1120,"metricType":"nsTiming"},{"name":"records read","accumulatorId":1117,"metricType":"sum"},{"name":"local bytes read","accumulatorId":1115,"metricType":"size"},{"name":"fetch wait time","accumulatorId":1116,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":1113,"metricType":"size"},{"name":"local blocks read","accumulatorId":1112,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":1111,"metricType":"sum"},{"name":"data size","accumulatorId":1110,"metricType":"size"},{"name":"remote bytes read to disk","accumulatorId":1114,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":1118,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":1166,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1163,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1164,"metricType":"timing"},{"name":"peak memory","accumulatorId":1162,"metricType":"size"},{"name":"number of output rows","accumulatorId":1161,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1165,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1160,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"number of inputs which are later than watermark ('inputs' are relative to operators)","accumulatorId":1150,"metricType":"sum"},{"name":"number of total state rows","accumulatorId":1151,"metricType":"sum"},{"name":"memory used by state","accumulatorId":1156,"metricType":"size"},{"name":"count of cache hit on states cache in provider","accumulatorId":1158,"metricType":"sum"},{"name":"number of output rows","accumulatorId":1149,"metricType":"sum"},{"name":"estimated size of state only on current version","accumulatorId":1157,"metricType":"size"},{"name":"count of cache miss on states cache in provider","accumulatorId":1159,"metricType":"sum"},{"name":"time to commit changes","accumulatorId":1155,"metricType":"timing"},{"name":"time to remove","accumulatorId":1154,"metricType":"timing"},{"name":"number of updated state rows","accumulatorId":1152,"metricType":"sum"},{"name":"time to update","accumulatorId":1153,"metricType":"timing"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":1146,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":1147,"metricType":"timing"},{"name":"peak memory","accumulatorId":1145,"metricType":"size"},{"name":"number of output rows","accumulatorId":1144,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":1148,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":1143,"metricType":"timing"}]}],"metadata":{},"metrics":[]},"time":1596020226019}
+{"Event":"SparkListenerJobStart","Job ID":7,"Submission Time":1596020226076,"Stage Infos":[{"Stage ID":15,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":95,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"231\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[94],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":93,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"235\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[92],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":91,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"239\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[90],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":94,"Name":"StateStoreRDD","Scope":"{\"id\":\"234\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[93],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":92,"Name":"StateStoreRDD","Scope":"{\"id\":\"238\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[91],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[14],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0},{"Stage ID":14,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":90,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"239\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[89],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":88,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"245\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[87],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":84,"Name":"DataSourceRDD","Scope":"{\"id\":\"251\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":85,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"251\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[84],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":89,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"240\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[88],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":86,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"247\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[85],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":87,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"246\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[86],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Accumulables":[],"Resource Profile Id":0}],"Stage IDs":[15,14],"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 7","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"7","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"22","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":14,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":90,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"239\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[89],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":88,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"245\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[87],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":84,"Name":"DataSourceRDD","Scope":"{\"id\":\"251\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":85,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"251\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[84],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":89,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"240\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[88],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":86,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"247\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[85],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":87,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"246\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[86],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020226077,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 7","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"7","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"22","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}}
+{"Event":"SparkListenerTaskStart","Stage ID":14,"Stage Attempt ID":0,"Task Info":{"Task ID":21,"Index":0,"Attempt":0,"Launch Time":1596020226086,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":14,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":21,"Index":0,"Attempt":0,"Launch Time":1596020226086,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020226116,"Failed":false,"Killed":false,"Accumulables":[{"ID":1120,"Name":"shuffle write time","Update":"543034","Value":"543034","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1119,"Name":"shuffle records written","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1118,"Name":"shuffle bytes written","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1110,"Name":"data size","Update":"128","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1167,"Name":"duration","Update":"13","Value":"13","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1168,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1169,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1171,"Name":"time in aggregation build","Update":"8","Value":"8","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1173,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1174,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1176,"Name":"time in aggregation build","Update":"6","Value":"6","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1178,"Name":"duration","Update":"13","Value":"13","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1179,"Name":"number of output rows","Update":"36","Value":"36","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1224,"Name":"internal.metrics.input.recordsRead","Update":36,"Value":36,"Internal":true,"Count Failed Values":true},{"ID":1222,"Name":"internal.metrics.shuffle.write.writeTime","Update":543034,"Value":543034,"Internal":true,"Count Failed Values":true},{"ID":1221,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":1220,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":1211,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":524288,"Internal":true,"Count Failed Values":true},{"ID":1206,"Name":"internal.metrics.resultSize","Update":2544,"Value":2544,"Internal":true,"Count Failed Values":true},{"ID":1205,"Name":"internal.metrics.executorCpuTime","Update":19652237,"Value":19652237,"Internal":true,"Count Failed Values":true},{"ID":1204,"Name":"internal.metrics.executorRunTime","Update":19,"Value":19,"Internal":true,"Count Failed Values":true},{"ID":1203,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2829254,"Value":2829254,"Internal":true,"Count Failed Values":true},{"ID":1202,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":2,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":2829254,"Executor Run Time":19,"Executor CPU Time":19652237,"Peak Execution Memory":524288,"Result Size":2544,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":168,"Shuffle Write Time":543034,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":36},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":14,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":1,"RDD Info":[{"RDD ID":90,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"239\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[89],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":88,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"245\",\"name\":\"MapPartitions\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[87],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":84,"Name":"DataSourceRDD","Scope":"{\"id\":\"251\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":85,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"251\",\"name\":\"MicroBatchScan\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[84],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":89,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"240\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[88],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":86,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"247\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[85],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":87,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"246\",\"name\":\"DeserializeToObject\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[86],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020226077,"Completion Time":1596020226117,"Accumulables":[{"ID":1205,"Name":"internal.metrics.executorCpuTime","Value":19652237,"Internal":true,"Count Failed Values":true},{"ID":1178,"Name":"duration","Value":"13","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1169,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1118,"Name":"shuffle bytes written","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1204,"Name":"internal.metrics.executorRunTime","Value":19,"Internal":true,"Count Failed Values":true},{"ID":1222,"Name":"internal.metrics.shuffle.write.writeTime","Value":543034,"Internal":true,"Count Failed Values":true},{"ID":1171,"Name":"time in aggregation build","Value":"8","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1120,"Name":"shuffle write time","Value":"543034","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1174,"Name":"peak memory","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1168,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1203,"Name":"internal.metrics.executorDeserializeCpuTime","Value":2829254,"Internal":true,"Count Failed Values":true},{"ID":1167,"Name":"duration","Value":"13","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1221,"Name":"internal.metrics.shuffle.write.recordsWritten","Value":1,"Internal":true,"Count Failed Values":true},{"ID":1176,"Name":"time in aggregation build","Value":"6","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1206,"Name":"internal.metrics.resultSize","Value":2544,"Internal":true,"Count Failed Values":true},{"ID":1224,"Name":"internal.metrics.input.recordsRead","Value":36,"Internal":true,"Count Failed Values":true},{"ID":1179,"Name":"number of output rows","Value":"36","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1119,"Name":"shuffle records written","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1110,"Name":"data size","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1173,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1202,"Name":"internal.metrics.executorDeserializeTime","Value":2,"Internal":true,"Count Failed Values":true},{"ID":1211,"Name":"internal.metrics.peakExecutionMemory","Value":524288,"Internal":true,"Count Failed Values":true},{"ID":1220,"Name":"internal.metrics.shuffle.write.bytesWritten","Value":168,"Internal":true,"Count Failed Values":true}],"Resource Profile Id":0}}
+{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":15,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":95,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"231\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[94],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":93,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"235\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[92],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":91,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"239\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[90],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":94,"Name":"StateStoreRDD","Scope":"{\"id\":\"234\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[93],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":92,"Name":"StateStoreRDD","Scope":"{\"id\":\"238\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[91],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[14],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020226120,"Accumulables":[],"Resource Profile Id":0},"Properties":{"sql.streaming.queryId":"8d268dc2-bc9c-4be8-97a9-b135d2943028","spark.driver.host":"iZbp19vpr16ix621sdw476Z","spark.eventLog.enabled":"true","spark.sql.adaptive.enabled":"false","spark.job.interruptOnCancel":"true","spark.driver.port":"46309","__fetch_continuous_blocks_in_batch_enabled":"true","spark.jars":"file:/root/spark-3.1.0-SNAPSHOT-bin-hadoop2.8/./examples/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar","__is_continuous_processing":"false","spark.app.name":"StructuredKafkaWordCount","callSite.long":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","callSite.short":"start at StructuredKafkaWordCount.scala:86","spark.submit.pyFiles":"","spark.job.description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 7","spark.executor.id":"driver","spark.sql.cbo.enabled":"false","streaming.sql.batchId":"7","spark.jobGroup.id":"e225d92f-2545-48f8-87a2-9c0309580f8a","spark.submit.deployMode":"client","spark.master":"local[*]","spark.eventLog.dir":"/tmp/spark-history","spark.sql.execution.id":"22","spark.app.id":"local-1596020211915","spark.sql.shuffle.partitions":"2"}}
+{"Event":"SparkListenerTaskStart","Stage ID":15,"Stage Attempt ID":0,"Task Info":{"Task ID":22,"Index":0,"Attempt":0,"Launch Time":1596020226128,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskStart","Stage ID":15,"Stage Attempt ID":0,"Task Info":{"Task ID":23,"Index":1,"Attempt":0,"Launch Time":1596020226129,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":15,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":23,"Index":1,"Attempt":0,"Launch Time":1596020226129,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020226196,"Failed":false,"Killed":false,"Accumulables":[{"ID":1143,"Name":"duration","Update":"3","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1148,"Name":"avg hash probe bucket list iters","Update":"10","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1144,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1145,"Name":"peak memory","Update":"4456448","Value":"4456448","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1147,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1153,"Name":"time to update","Update":"21","Value":"21","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1152,"Name":"number of updated state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1154,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1155,"Name":"time to commit changes","Update":"19","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1157,"Name":"estimated size of state only on current version","Update":"368","Value":"368","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1149,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1158,"Name":"count of cache hit on states cache in provider","Update":"14","Value":"14","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1156,"Name":"memory used by state","Update":"784","Value":"784","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1151,"Name":"number of total state rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1160,"Name":"duration","Update":"21","Value":"21","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1161,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1162,"Name":"peak memory","Update":"262144","Value":"262144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1164,"Name":"time in aggregation build","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1166,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1112,"Name":"local blocks read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1116,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1115,"Name":"local bytes read","Update":"168","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1117,"Name":"records read","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1244,"Name":"internal.metrics.shuffle.read.recordsRead","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":1243,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1242,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":168,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":1241,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1240,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1239,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":1238,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1236,"Name":"internal.metrics.peakExecutionMemory","Update":4718592,"Value":4718592,"Internal":true,"Count Failed Values":true},{"ID":1231,"Name":"internal.metrics.resultSize","Update":5574,"Value":5574,"Internal":true,"Count Failed Values":true},{"ID":1230,"Name":"internal.metrics.executorCpuTime","Update":19415818,"Value":19415818,"Internal":true,"Count Failed Values":true},{"ID":1229,"Name":"internal.metrics.executorRunTime","Update":60,"Value":60,"Internal":true,"Count Failed Values":true},{"ID":1228,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3845429,"Value":3845429,"Internal":true,"Count Failed Values":true},{"ID":1227,"Name":"internal.metrics.executorDeserializeTime","Update":3,"Value":3,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":3,"Executor Deserialize CPU Time":3845429,"Executor Run Time":60,"Executor CPU Time":19415818,"Peak Execution Memory":4718592,"Result Size":5574,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":1,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":168,"Total Records Read":1},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":15,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":22,"Index":0,"Attempt":0,"Launch Time":1596020226128,"Executor ID":"driver","Host":"iZbp19vpr16ix621sdw476Z","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1596020226204,"Failed":false,"Killed":false,"Accumulables":[{"ID":1143,"Name":"duration","Update":"2","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1145,"Name":"peak memory","Update":"262144","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1147,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1153,"Name":"time to update","Update":"3","Value":"24","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1154,"Name":"time to remove","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1155,"Name":"time to commit changes","Update":"48","Value":"67","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1157,"Name":"estimated size of state only on current version","Update":"88","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1158,"Name":"count of cache hit on states cache in provider","Update":"14","Value":"28","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1156,"Name":"memory used by state","Update":"400","Value":"1184","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1160,"Name":"duration","Update":"3","Value":"24","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1162,"Name":"peak memory","Update":"262144","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1164,"Name":"time in aggregation build","Update":"0","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1244,"Name":"internal.metrics.shuffle.read.recordsRead","Update":0,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":1243,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1242,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":0,"Value":168,"Internal":true,"Count Failed Values":true},{"ID":1241,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1240,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1239,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":0,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":1238,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":1236,"Name":"internal.metrics.peakExecutionMemory","Update":524288,"Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":1231,"Name":"internal.metrics.resultSize","Update":5311,"Value":10885,"Internal":true,"Count Failed Values":true},{"ID":1230,"Name":"internal.metrics.executorCpuTime","Update":14652861,"Value":34068679,"Internal":true,"Count Failed Values":true},{"ID":1229,"Name":"internal.metrics.executorRunTime","Update":65,"Value":125,"Internal":true,"Count Failed Values":true},{"ID":1228,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3933877,"Value":7779306,"Internal":true,"Count Failed Values":true},{"ID":1227,"Name":"internal.metrics.executorDeserializeTime","Update":3,"Value":6,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":3,"Executor Deserialize CPU Time":3933877,"Executor Run Time":65,"Executor CPU Time":14652861,"Peak Execution Memory":524288,"Result Size":5311,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":15,"Stage Attempt ID":0,"Stage Name":"start at StructuredKafkaWordCount.scala:86","Number of Tasks":2,"RDD Info":[{"RDD ID":95,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"231\",\"name\":\"WholeStageCodegen (4)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[94],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":93,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"235\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[92],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":91,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"239\",\"name\":\"Exchange\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[90],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":94,"Name":"StateStoreRDD","Scope":"{\"id\":\"234\",\"name\":\"StateStoreSave\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[93],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":92,"Name":"StateStoreRDD","Scope":"{\"id\":\"238\",\"name\":\"StateStoreRestore\"}","Callsite":"start at StructuredKafkaWordCount.scala:86","Parent IDs":[91],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":2,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[14],"Details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","Submission Time":1596020226120,"Completion Time":1596020226204,"Accumulables":[{"ID":1115,"Name":"local bytes read","Value":"168","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1160,"Name":"duration","Value":"24","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1151,"Name":"number of total state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1145,"Name":"peak memory","Value":"4718592","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1154,"Name":"time to remove","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1231,"Name":"internal.metrics.resultSize","Value":10885,"Internal":true,"Count Failed Values":true},{"ID":1240,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Value":0,"Internal":true,"Count Failed Values":true},{"ID":1153,"Name":"time to update","Value":"24","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1162,"Name":"peak memory","Value":"524288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1144,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1243,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Value":0,"Internal":true,"Count Failed Values":true},{"ID":1117,"Name":"records read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1228,"Name":"internal.metrics.executorDeserializeCpuTime","Value":7779306,"Internal":true,"Count Failed Values":true},{"ID":1147,"Name":"time in aggregation build","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1156,"Name":"memory used by state","Value":"1184","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1236,"Name":"internal.metrics.peakExecutionMemory","Value":5242880,"Internal":true,"Count Failed Values":true},{"ID":1227,"Name":"internal.metrics.executorDeserializeTime","Value":6,"Internal":true,"Count Failed Values":true},{"ID":1158,"Name":"count of cache hit on states cache in provider","Value":"28","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1149,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1239,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Value":1,"Internal":true,"Count Failed Values":true},{"ID":1230,"Name":"internal.metrics.executorCpuTime","Value":34068679,"Internal":true,"Count Failed Values":true},{"ID":1152,"Name":"number of updated state rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1242,"Name":"internal.metrics.shuffle.read.localBytesRead","Value":168,"Internal":true,"Count Failed Values":true},{"ID":1116,"Name":"fetch wait time","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1161,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1143,"Name":"duration","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1155,"Name":"time to commit changes","Value":"67","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1164,"Name":"time in aggregation build","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1241,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Value":0,"Internal":true,"Count Failed Values":true},{"ID":1244,"Name":"internal.metrics.shuffle.read.recordsRead","Value":1,"Internal":true,"Count Failed Values":true},{"ID":1148,"Name":"avg hash probe bucket list iters","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1157,"Name":"estimated size of state only on current version","Value":"456","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1166,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1238,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Value":0,"Internal":true,"Count Failed Values":true},{"ID":1229,"Name":"internal.metrics.executorRunTime","Value":125,"Internal":true,"Count Failed Values":true},{"ID":1112,"Name":"local blocks read","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"}],"Resource Profile Id":0}}
+{"Event":"SparkListenerJobEnd","Job ID":7,"Completion Time":1596020226204,"Job Result":{"Result":"JobSucceeded"}}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":23,"description":"\nid = 8d268dc2-bc9c-4be8-97a9-b135d2943028\nrunId = e225d92f-2545-48f8-87a2-9c0309580f8a\nbatch = 7","details":"org.apache.spark.sql.streaming.DataStreamWriter.start(DataStreamWriter.scala:366)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount$.main(StructuredKafkaWordCount.scala:86)\norg.apache.spark.examples.sql.streaming.StructuredKafkaWordCount.main(StructuredKafkaWordCount.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:180)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)","physicalPlanDescription":"== Physical Plan ==\nLocalTableScan (1)\n\n\n(1) LocalTableScan\nOutput [2]: [value#144, count#145]\nArguments: [value#144, count#145]\n\n","sparkPlanInfo":{"nodeName":"LocalTableScan","simpleString":"LocalTableScan [value#144, count#145]","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":1252,"metricType":"sum"}]},"time":1596020226221}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":23,"time":1596020226230}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":22,"time":1596020226231}
+{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":21,"time":1596020226231}
+{"Event":"org.apache.spark.sql.streaming.StreamingQueryListener$QueryProgressEvent","progress":{"id":"8d268dc2-bc9c-4be8-97a9-b135d2943028","runId":"e225d92f-2545-48f8-87a2-9c0309580f8a","name":null,"timestamp":"2020-07-29T10:57:05.916Z","batchId":7,"batchDuration":341,"durationMs":{"triggerExecution":341,"queryPlanning":24,"getBatch":0,"latestOffset":3,"addBatch":271,"walCommit":14},"eventTime":{},"stateOperators":[{"numRowsTotal":1,"numRowsUpdated":1,"memoryUsedBytes":1184,"numLateInputs":0,"customMetrics":{"stateOnCurrentVersionSizeBytes":456,"loadedMapCacheHitCount":28,"loadedMapCacheMissCount":0}}],"sources":[{"description":"KafkaV2[Subscribe[test5]]","startOffset":"{\"test5\":{\"0\":48881}}","endOffset":"{\"test5\":{\"0\":48917}}","numInputRows":36,"inputRowsPerSecond":101.69491525423729,"processedRowsPerSecond":105.57184750733137}],"sink":{"description":"org.apache.spark.sql.execution.streaming.ConsoleTable$@514ba885","numOutputRows":1},"observedMetrics":{}}}
+{"Event":"SparkListenerApplicationEnd","Timestamp":1596020226301}
diff --git a/sql/core/src/test/scala/org/apache/spark/deploy/history/Utils.scala b/sql/core/src/test/scala/org/apache/spark/deploy/history/Utils.scala
new file mode 100644
index 0000000000000..f73305b1b001e
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/deploy/history/Utils.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.history
+
+import org.apache.spark.SparkConf
+import org.apache.spark.internal.config.History.HISTORY_LOG_DIR
+import org.apache.spark.util.ManualClock
+
+object Utils {
+  def withFsHistoryProvider(logDir: String)(fn: FsHistoryProvider => Unit): Unit = {
+    var provider: FsHistoryProvider = null
+    try {
+      val clock = new ManualClock()
+      val conf = new SparkConf().set(HISTORY_LOG_DIR, logDir)
+      val provider = new FsHistoryProvider(conf, clock)
+      provider.checkForLogs()
+      fn(provider)
+    } finally {
+      if (provider != null) {
+        provider.stop()
+        provider = null
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryHistorySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryHistorySuite.scala
new file mode 100644
index 0000000000000..160535ea4d048
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryHistorySuite.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming.ui
+
+import java.util.Locale
+import javax.servlet.http.HttpServletRequest
+
+import org.mockito.Mockito.{mock, when}
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.deploy.history.{Utils => HsUtils}
+import org.apache.spark.sql.execution.ui.StreamingQueryStatusStore
+import org.apache.spark.sql.test.SharedSparkSession
+
+class StreamingQueryHistorySuite extends SharedSparkSession with BeforeAndAfter {
+
+  test("support streaming query events") {
+    val logDir = Thread.currentThread().getContextClassLoader.getResource("spark-events").toString
+    HsUtils.withFsHistoryProvider(logDir) { provider =>
+      val appUi = provider.getAppUI("local-1596020211915", None).getOrElse {
+        assert(false, "Failed to load event log of local-1596020211915.")
+        null
+      }
+      assert(appUi.ui.appName == "StructuredKafkaWordCount")
+      assert(appUi.ui.store.store.count(classOf[StreamingQueryData]) == 1)
+      assert(appUi.ui.store.store.count(classOf[StreamingQueryProgressWrapper]) == 8)
+
+      val store = new StreamingQueryStatusStore(appUi.ui.store.store)
+      val tab = new StreamingQueryTab(store, appUi.ui)
+      val request = mock(classOf[HttpServletRequest])
+      var html = new StreamingQueryPage(tab).render(request)
+        .toString().toLowerCase(Locale.ROOT)
+      // 81.39: Avg Input /sec
+      assert(html.contains("81.39"))
+      // 157.05: Avg Process /sec
+      assert(html.contains("157.05"))
+
+      val id = "8d268dc2-bc9c-4be8-97a9-b135d2943028"
+      val runId = "e225d92f-2545-48f8-87a2-9c0309580f8a"
+      when(request.getParameter("id")).thenReturn(runId)
+      html = new StreamingQueryStatisticsPage(tab).render(request)
+        .toString().toLowerCase(Locale.ROOT)
+      assert(html.contains("<strong>8</strong> completed batches"))
+      assert(html.contains(id))
+      assert(html.contains(runId))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala
index c2b6688faf0e7..246fa1f7c9184 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPageSuite.scala
@@ -20,11 +20,13 @@ package org.apache.spark.sql.streaming.ui
 import java.util.{Locale, UUID}
 import javax.servlet.http.HttpServletRequest
 
+import scala.xml.Node
+
 import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS}
 import org.scalatest.BeforeAndAfter
-import scala.xml.Node
 
 import org.apache.spark.SparkConf
+import org.apache.spark.sql.execution.ui.StreamingQueryStatusStore
 import org.apache.spark.sql.streaming.StreamingQueryProgress
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.ui.SparkUI
@@ -35,26 +37,26 @@ class StreamingQueryPageSuite extends SharedSparkSession with BeforeAndAfter {
     val id = UUID.randomUUID()
     val request = mock(classOf[HttpServletRequest])
     val tab = mock(classOf[StreamingQueryTab], RETURNS_SMART_NULLS)
-    val statusListener = mock(classOf[StreamingQueryStatusListener], RETURNS_SMART_NULLS)
+    val store = mock(classOf[StreamingQueryStatusStore], RETURNS_SMART_NULLS)
     when(tab.appName).thenReturn("testing")
     when(tab.headerTabs).thenReturn(Seq.empty)
-    when(tab.statusListener).thenReturn(statusListener)
+    when(tab.store).thenReturn(store)
 
     val streamQuery = createStreamQueryUIData(id)
-    when(statusListener.allQueryStatus).thenReturn(Seq(streamQuery))
+    when(store.allQueryUIData).thenReturn(Seq(streamQuery))
     var html = renderStreamingQueryPage(request, tab)
       .toString().toLowerCase(Locale.ROOT)
     assert(html.contains("active streaming queries (1)"))
 
-    when(streamQuery.isActive).thenReturn(false)
-    when(streamQuery.exception).thenReturn(None)
+    when(streamQuery.summary.isActive).thenReturn(false)
+    when(streamQuery.summary.exception).thenReturn(None)
     html = renderStreamingQueryPage(request, tab)
       .toString().toLowerCase(Locale.ROOT)
     assert(html.contains("completed streaming queries (1)"))
     assert(html.contains("finished"))
 
-    when(streamQuery.isActive).thenReturn(false)
-    when(streamQuery.exception).thenReturn(Option("exception in query"))
+    when(streamQuery.summary.isActive).thenReturn(false)
+    when(streamQuery.summary.exception).thenReturn(Option("exception in query"))
     html = renderStreamingQueryPage(request, tab)
       .toString().toLowerCase(Locale.ROOT)
     assert(html.contains("completed streaming queries (1)"))
@@ -66,17 +68,20 @@ class StreamingQueryPageSuite extends SharedSparkSession with BeforeAndAfter {
     val id = UUID.randomUUID()
     val request = mock(classOf[HttpServletRequest])
     val tab = mock(classOf[StreamingQueryTab], RETURNS_SMART_NULLS)
-    val statusListener = mock(classOf[StreamingQueryStatusListener], RETURNS_SMART_NULLS)
+    val store = mock(classOf[StreamingQueryStatusStore], RETURNS_SMART_NULLS)
+    when(request.getParameter("id")).thenReturn(id.toString)
+    when(tab.appName).thenReturn("testing")
+    when(tab.headerTabs).thenReturn(Seq.empty)
+    when(tab.store).thenReturn(store)
     val ui = mock(classOf[SparkUI])
     when(request.getParameter("id")).thenReturn(id.toString)
     when(tab.appName).thenReturn("testing")
     when(tab.headerTabs).thenReturn(Seq.empty)
-    when(tab.statusListener).thenReturn(statusListener)
     when(ui.conf).thenReturn(new SparkConf())
     when(tab.parent).thenReturn(ui)
 
     val streamQuery = createStreamQueryUIData(id)
-    when(statusListener.allQueryStatus).thenReturn(Seq(streamQuery))
+    when(store.allQueryUIData).thenReturn(Seq(streamQuery))
     val html = renderStreamingQueryStatisticsPage(request, tab)
       .toString().toLowerCase(Locale.ROOT)
 
@@ -94,15 +99,18 @@ class StreamingQueryPageSuite extends SharedSparkSession with BeforeAndAfter {
     when(progress.batchId).thenReturn(2)
     when(progress.prettyJson).thenReturn("""{"a":1}""")
 
+    val summary = mock(classOf[StreamingQueryData], RETURNS_SMART_NULLS)
+    when(summary.isActive).thenReturn(true)
+    when(summary.name).thenReturn("query")
+    when(summary.id).thenReturn(id)
+    when(summary.runId).thenReturn(id)
+    when(summary.startTimestamp).thenReturn(1L)
+    when(summary.exception).thenReturn(None)
+
     val streamQuery = mock(classOf[StreamingQueryUIData], RETURNS_SMART_NULLS)
-    when(streamQuery.isActive).thenReturn(true)
-    when(streamQuery.name).thenReturn("query")
-    when(streamQuery.id).thenReturn(id)
-    when(streamQuery.runId).thenReturn(id)
-    when(streamQuery.startTimestamp).thenReturn(1L)
+    when(streamQuery.summary).thenReturn(summary)
     when(streamQuery.lastProgress).thenReturn(progress)
     when(streamQuery.recentProgress).thenReturn(Array(progress))
-    when(streamQuery.exception).thenReturn(None)
 
     streamQuery
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala
index 6aa440e5609c5..91c55d5598a6b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala
@@ -17,19 +17,28 @@
 
 package org.apache.spark.sql.streaming.ui
 
-import java.util.UUID
+import java.text.SimpleDateFormat
+import java.util.{Date, UUID}
 
 import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS}
+import org.scalatest.time.SpanSugar._
 
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.getTimeZone
+import org.apache.spark.sql.execution.ui.StreamingQueryStatusStore
+import org.apache.spark.sql.internal.StaticSQLConf
 import org.apache.spark.sql.streaming.{StreamingQueryListener, StreamingQueryProgress, StreamTest}
 import org.apache.spark.sql.streaming
+import org.apache.spark.status.ElementTrackingStore
+import org.apache.spark.util.kvstore.InMemoryStore
 
 class StreamingQueryStatusListenerSuite extends StreamTest {
 
   test("onQueryStarted, onQueryProgress, onQueryTerminated") {
-    val listener = new StreamingQueryStatusListener(spark.sparkContext.conf)
+    val kvStore = new ElementTrackingStore(new InMemoryStore(), sparkConf)
+    val listener = new StreamingQueryStatusListener(spark.sparkContext.conf, kvStore)
+    val queryStore = new StreamingQueryStatusStore(kvStore)
 
-    // hanlde query started event
+    // handle query started event
     val id = UUID.randomUUID()
     val runId = UUID.randomUUID()
     val startEvent = new StreamingQueryListener.QueryStartedEvent(
@@ -37,8 +46,9 @@ class StreamingQueryStatusListenerSuite extends StreamTest {
     listener.onQueryStarted(startEvent)
 
     // result checking
-    assert(listener.activeQueryStatus.size() == 1)
-    assert(listener.activeQueryStatus.get(runId).name == "test")
+    assert(queryStore.allQueryUIData.count(_.summary.isActive) == 1)
+    assert(queryStore.allQueryUIData.filter(_.summary.isActive).exists(uiData =>
+      uiData.summary.runId == runId && uiData.summary.name.equals("test")))
 
     // handle query progress event
     val progress = mock(classOf[StreamingQueryProgress], RETURNS_SMART_NULLS)
@@ -53,28 +63,32 @@ class StreamingQueryStatusListenerSuite extends StreamTest {
     listener.onQueryProgress(processEvent)
 
     // result checking
-    val activeQuery = listener.activeQueryStatus.get(runId)
-    assert(activeQuery.isActive)
-    assert(activeQuery.recentProgress.length == 1)
-    assert(activeQuery.lastProgress.id == id)
-    assert(activeQuery.lastProgress.runId == runId)
-    assert(activeQuery.lastProgress.timestamp == "2001-10-01T01:00:00.100Z")
-    assert(activeQuery.lastProgress.inputRowsPerSecond == 10.0)
-    assert(activeQuery.lastProgress.processedRowsPerSecond == 12.0)
-    assert(activeQuery.lastProgress.batchId == 2)
-    assert(activeQuery.lastProgress.prettyJson == """{"a":1}""")
+    val activeQuery =
+      queryStore.allQueryUIData.filter(_.summary.isActive).find(_.summary.runId == runId)
+    assert(activeQuery.isDefined)
+    assert(activeQuery.get.summary.isActive)
+    assert(activeQuery.get.recentProgress.length == 1)
+    assert(activeQuery.get.lastProgress.id == id)
+    assert(activeQuery.get.lastProgress.runId == runId)
+    assert(activeQuery.get.lastProgress.timestamp == "2001-10-01T01:00:00.100Z")
+    assert(activeQuery.get.lastProgress.inputRowsPerSecond == 10.0)
+    assert(activeQuery.get.lastProgress.processedRowsPerSecond == 12.0)
+    assert(activeQuery.get.lastProgress.batchId == 2)
+    assert(activeQuery.get.lastProgress.prettyJson == """{"a":1}""")
 
     // handle terminate event
     val terminateEvent = new StreamingQueryListener.QueryTerminatedEvent(id, runId, None)
     listener.onQueryTerminated(terminateEvent)
 
-    assert(!listener.inactiveQueryStatus.head.isActive)
-    assert(listener.inactiveQueryStatus.head.runId == runId)
-    assert(listener.inactiveQueryStatus.head.id == id)
+    assert(!queryStore.allQueryUIData.filterNot(_.summary.isActive).head.summary.isActive)
+    assert(queryStore.allQueryUIData.filterNot(_.summary.isActive).head.summary.runId == runId)
+    assert(queryStore.allQueryUIData.filterNot(_.summary.isActive).head.summary.id == id)
   }
 
   test("same query start multiple times") {
-    val listener = new StreamingQueryStatusListener(spark.sparkContext.conf)
+    val kvStore = new ElementTrackingStore(new InMemoryStore(), sparkConf)
+    val listener = new StreamingQueryStatusListener(spark.sparkContext.conf, kvStore)
+    val queryStore = new StreamingQueryStatusStore(kvStore)
 
     // handle first time start
     val id = UUID.randomUUID()
@@ -94,11 +108,106 @@ class StreamingQueryStatusListenerSuite extends StreamTest {
     listener.onQueryStarted(startEvent1)
 
     // result checking
-    assert(listener.activeQueryStatus.size() == 1)
-    assert(listener.inactiveQueryStatus.length == 1)
-    assert(listener.activeQueryStatus.containsKey(runId1))
-    assert(listener.activeQueryStatus.get(runId1).id == id)
-    assert(listener.inactiveQueryStatus.head.runId == runId0)
-    assert(listener.inactiveQueryStatus.head.id == id)
+    assert(queryStore.allQueryUIData.count(_.summary.isActive) == 1)
+    assert(queryStore.allQueryUIData.filterNot(_.summary.isActive).length == 1)
+    assert(queryStore.allQueryUIData.filter(_.summary.isActive).exists(_.summary.runId == runId1))
+    assert(queryStore.allQueryUIData.filter(_.summary.isActive).exists(uiData =>
+      uiData.summary.runId == runId1 && uiData.summary.id == id))
+    assert(queryStore.allQueryUIData.filterNot(_.summary.isActive).head.summary.runId == runId0)
+    assert(queryStore.allQueryUIData.filterNot(_.summary.isActive).head.summary.id == id)
+  }
+
+  test("test small retained queries") {
+    val kvStore = new ElementTrackingStore(new InMemoryStore(), sparkConf)
+    val conf = spark.sparkContext.conf
+    conf.set(StaticSQLConf.STREAMING_UI_RETAINED_QUERIES.key, "2")
+    val listener = new StreamingQueryStatusListener(conf, kvStore)
+    val queryStore = new StreamingQueryStatusStore(kvStore)
+
+    def addNewQuery(): (UUID, UUID) = {
+      val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'") // ISO8601
+      format.setTimeZone(getTimeZone("UTC"))
+      val id = UUID.randomUUID()
+      val runId = UUID.randomUUID()
+      val startEvent = new StreamingQueryListener.QueryStartedEvent(
+        id, runId, "test1", format.format(new Date(System.currentTimeMillis())))
+      listener.onQueryStarted(startEvent)
+      (id, runId)
+    }
+
+    def checkInactiveQueryStatus(numInactives: Int, targetInactives: Seq[UUID]): Unit = {
+      eventually(timeout(10.seconds)) {
+        val inactiveQueries = queryStore.allQueryUIData.filter(!_.summary.isActive)
+        assert(inactiveQueries.size == numInactives)
+        assert(inactiveQueries.map(_.summary.id).toSet == targetInactives.toSet)
+      }
+    }
+
+    val (id1, runId1) = addNewQuery()
+    val (id2, runId2) = addNewQuery()
+    val (id3, runId3) = addNewQuery()
+    assert(queryStore.allQueryUIData.count(!_.summary.isActive) == 0)
+
+    val terminateEvent1 = new StreamingQueryListener.QueryTerminatedEvent(id1, runId1, None)
+    listener.onQueryTerminated(terminateEvent1)
+    checkInactiveQueryStatus(1, Seq(id1))
+    val terminateEvent2 = new StreamingQueryListener.QueryTerminatedEvent(id2, runId2, None)
+    listener.onQueryTerminated(terminateEvent2)
+    checkInactiveQueryStatus(2, Seq(id1, id2))
+    val terminateEvent3 = new StreamingQueryListener.QueryTerminatedEvent(id3, runId3, None)
+    listener.onQueryTerminated(terminateEvent3)
+    checkInactiveQueryStatus(2, Seq(id2, id3))
+  }
+
+  test("test small retained progress") {
+    val kvStore = new ElementTrackingStore(new InMemoryStore(), sparkConf)
+    val conf = spark.sparkContext.conf
+    conf.set(StaticSQLConf.STREAMING_UI_RETAINED_PROGRESS_UPDATES.key, "5")
+    val listener = new StreamingQueryStatusListener(conf, kvStore)
+    val queryStore = new StreamingQueryStatusStore(kvStore)
+
+    val id = UUID.randomUUID()
+    val runId = UUID.randomUUID()
+    val startEvent = new StreamingQueryListener.QueryStartedEvent(
+      id, runId, "test", "2016-12-05T20:54:20.827Z")
+    listener.onQueryStarted(startEvent)
+
+    var batchId: Int = 0
+
+    def addQueryProgress(): Unit = {
+      val progress = mockProgressData(id, runId)
+      val processEvent = new streaming.StreamingQueryListener.QueryProgressEvent(progress)
+      listener.onQueryProgress(processEvent)
+    }
+
+    def mockProgressData(id: UUID, runId: UUID): StreamingQueryProgress = {
+      val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'") // ISO8601
+      format.setTimeZone(getTimeZone("UTC"))
+
+      val progress = mock(classOf[StreamingQueryProgress], RETURNS_SMART_NULLS)
+      when(progress.id).thenReturn(id)
+      when(progress.runId).thenReturn(runId)
+      when(progress.timestamp).thenReturn(format.format(new Date(System.currentTimeMillis())))
+      when(progress.inputRowsPerSecond).thenReturn(10.0)
+      when(progress.processedRowsPerSecond).thenReturn(12.0)
+      when(progress.batchId).thenReturn(batchId)
+      when(progress.prettyJson).thenReturn("""{"a":1}""")
+
+      batchId += 1
+      progress
+    }
+
+    def checkQueryProcessData(targetNum: Int): Unit = {
+      eventually(timeout(10.seconds)) {
+        assert(queryStore.getQueryProgressData(runId).size == targetNum)
+      }
+    }
+
+    Array.tabulate(4) { _ => addQueryProgress() }
+    checkQueryProcessData(4)
+    addQueryProgress()
+    checkQueryProcessData(5)
+    addQueryProgress()
+    checkQueryProcessData(5)
   }
 }

From 90d4d7d43ffd29ad780dc7c5588b7e55a73aba97 Mon Sep 17 00:00:00 2001
From: Ruifeng Zheng <ruifengz@foxmail.com>
Date: Thu, 3 Dec 2020 09:31:46 +0800
Subject: [PATCH 0645/1009] [SPARK-33610][ML] Imputer transform skip duplicate
 head() job

### What changes were proposed in this pull request?
on each call of `transform`, a head() job will be triggered, which can be skipped by using a lazy var.

### Why are the changes needed?
avoiding duplicate head() jobs

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
existing tests

Closes #30550 from zhengruifeng/imputer_transform.

Authored-by: Ruifeng Zheng <ruifengz@foxmail.com>
Signed-off-by: Ruifeng Zheng <ruifengz@foxmail.com>
---
 .../org/apache/spark/ml/feature/Imputer.scala | 29 +++++++++++--------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
index 03ebe0299f63f..d0b6ab1ef2cbc 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
@@ -254,20 +254,25 @@ class ImputerModel private[ml] (
   /** @group setParam */
   def setOutputCols(value: Array[String]): this.type = set(outputCols, value)
 
+  @transient private lazy val surrogates = {
+    val row = surrogateDF.head()
+    row.schema.fieldNames.zipWithIndex
+      .map { case (name, index) => (name, row.getDouble(index)) }
+      .toMap
+  }
+
   override def transform(dataset: Dataset[_]): DataFrame = {
     transformSchema(dataset.schema, logging = true)
-    val (inputColumns, outputColumns) = getInOutCols
-    val surrogates = surrogateDF.select(inputColumns.map(col): _*).head().toSeq
-
-
-    val newCols = inputColumns.zip(outputColumns).zip(surrogates).map {
-      case ((inputCol, outputCol), surrogate) =>
-        val inputType = dataset.schema(inputCol).dataType
-        val ic = col(inputCol).cast(DoubleType)
-        when(ic.isNull, surrogate)
-          .when(ic === $(missingValue), surrogate)
-          .otherwise(ic)
-          .cast(inputType)
+    val (inputColumns, outputColumns) = getInOutCols()
+
+    val newCols = inputColumns.map { inputCol =>
+      val surrogate = surrogates(inputCol)
+      val inputType = dataset.schema(inputCol).dataType
+      val ic = col(inputCol).cast(DoubleType)
+      when(ic.isNull, surrogate)
+        .when(ic === $(missingValue), surrogate)
+        .otherwise(ic)
+        .cast(inputType)
     }
     dataset.withColumns(outputColumns, newCols).toDF()
   }

From 878cc0e6e95f300a0a58c742654f53a28b30b174 Mon Sep 17 00:00:00 2001
From: Yuanjian Li <yuanjian.li@databricks.com>
Date: Wed, 2 Dec 2020 17:36:25 -0800
Subject: [PATCH 0646/1009] [SPARK-32896][SS][FOLLOW-UP] Rename the API to
 `toTable`

### What changes were proposed in this pull request?
As the discussion in https://github.com/apache/spark/pull/30521#discussion_r531463427, rename the API to `toTable`.

### Why are the changes needed?
Rename the API for further extension and accuracy.

### Does this PR introduce _any_ user-facing change?
Yes, it's an API change but the new API is not released yet.

### How was this patch tested?
Existing UT.

Closes #30571 from xuanyuanking/SPARK-32896-follow.

Authored-by: Yuanjian Li <yuanjian.li@databricks.com>
Signed-off-by: Shixiong Zhu <zsxwing@gmail.com>
---
 .../scala/org/apache/spark/sql/streaming/DataStreamWriter.scala | 2 +-
 .../spark/sql/streaming/test/DataStreamTableAPISuite.scala      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index d67e175c24dd9..9e3599712fde5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -304,7 +304,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
    * @since 3.1.0
    */
   @throws[TimeoutException]
-  def saveAsTable(tableName: String): StreamingQuery = {
+  def toTable(tableName: String): StreamingQuery = {
     this.source = SOURCE_NAME_TABLE
     this.tableName = tableName
     startInternal(None)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
index 062b1060bc601..bf850432d5c0e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
@@ -291,7 +291,7 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
     val query = inputDF
       .writeStream
       .option("checkpointLocation", checkpointDir.getAbsolutePath)
-      .saveAsTable(tableIdentifier)
+      .toTable(tableIdentifier)
 
     inputData.addData(newInputs: _*)
 

From 08809897554a48065c2280c709d7efba28fa441d Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 3 Dec 2020 10:57:14 +0900
Subject: [PATCH 0647/1009] [SPARK-22798][PYTHON][ML][FOLLOWUP] Add labelsArray
 to PySpark StringIndexer

### What changes were proposed in this pull request?

This is a followup to add missing `labelsArray` to PySpark `StringIndexer`.

### Why are the changes needed?

`labelsArray` is for multi-column case for `StringIndexer`. We should provide this accessor at PySpark side too.

### Does this PR introduce _any_ user-facing change?

Yes, `labelsArray` was missing in PySpark `StringIndexer` in Spark 3.0.

### How was this patch tested?

Unit test.

Closes #30579 from viirya/SPARK-22798-followup.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/ml/feature.py            | 12 ++++++++++++
 python/pyspark/ml/tests/test_feature.py |  1 +
 2 files changed, 13 insertions(+)

diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 8138f34d7a19e..7cfeabea4aa97 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -3852,9 +3852,21 @@ def from_arrays_of_labels(cls, arrayOfLabels, inputCols, outputCols=None,
     def labels(self):
         """
         Ordered list of labels, corresponding to indices to be assigned.
+
+        .. deprecated:: 3.1.0
+            It will be removed in future versions. Use `labelsArray` method instead.
         """
         return self._call_java("labels")
 
+    @property
+    @since("3.1.0")
+    def labelsArray(self):
+        """
+        Array of ordered list of labels, corresponding to indices to be assigned
+        for each input column.
+        """
+        return self._call_java("labelsArray")
+
 
 @inherit_doc
 class IndexToString(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):
diff --git a/python/pyspark/ml/tests/test_feature.py b/python/pyspark/ml/tests/test_feature.py
index 98b8ce6dfb95c..2cceb04338806 100644
--- a/python/pyspark/ml/tests/test_feature.py
+++ b/python/pyspark/ml/tests/test_feature.py
@@ -232,6 +232,7 @@ def test_string_indexer_from_labels(self):
         model = StringIndexerModel.from_labels(["a", "b", "c"], inputCol="label",
                                                outputCol="indexed", handleInvalid="keep")
         self.assertEqual(model.labels, ["a", "b", "c"])
+        self.assertEqual(model.labelsArray, [("a", "b", "c")])
 
         df1 = self.spark.createDataFrame([
             (0, "a"),

From 3b2ff16ee6e457daade0ecb9f96955c8ed73f2a5 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 3 Dec 2020 14:34:44 +0900
Subject: [PATCH 0648/1009] [SPARK-33636][PYTHON][ML][FOLLOWUP] Update since
 tag of labelsArray in StringIndexer

### What changes were proposed in this pull request?

This is to update `labelsArray`'s since tag.

### Why are the changes needed?

The original change was backported to branch-3.0 for 3.0.2 version. So it is better to update the since tag to reflect the fact.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

N/A. Just tag change.

Closes #30582 from viirya/SPARK-33636-followup.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/ml/feature.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 7cfeabea4aa97..546c46383d340 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -3859,7 +3859,7 @@ def labels(self):
         return self._call_java("labels")
 
     @property
-    @since("3.1.0")
+    @since("3.0.2")
     def labelsArray(self):
         """
         Array of ordered list of labels, corresponding to indices to be assigned

From ff13f574e67ff9e2c38167368dc6190455e8ed7f Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Thu, 3 Dec 2020 14:04:08 +0000
Subject: [PATCH 0649/1009] [SPARK-20044][SQL] Add new function
 DATE_FROM_UNIX_DATE and UNIX_DATE

### What changes were proposed in this pull request?

Add new functions DATE_FROM_UNIX_DATE and UNIX_DATE for conversion between Date type and Numeric types.

### Why are the changes needed?

1. Explicit conversion between Date type and Numeric types is disallowed in ANSI mode. We need to provide new functions for users to complete the conversion.

2. We have introduced new functions from Bigquery for conversion between Timestamp type and Numeric types: TIMESTAMP_SECONDS, TIMESTAMP_MILLIS, TIMESTAMP_MICROS , UNIX_SECONDS, UNIX_MILLIS, and UNIX_MICROS. It makes sense to add functions for conversion between Date type and Numeric types as well.

### Does this PR introduce _any_ user-facing change?

Yes, two new datetime functions are added.

### How was this patch tested?

Unit tests

Closes #30588 from gengliangwang/dateToNumber.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/analysis/FunctionRegistry.scala  |  2 +
 .../expressions/datetimeExpressions.scala     | 46 +++++++++++++++++++
 .../expressions/DateExpressionsSuite.scala    | 24 ++++++++++
 .../sql-functions/sql-expression-schema.md    |  4 +-
 .../resources/sql-tests/inputs/datetime.sql   |  5 +-
 .../sql-tests/results/ansi/datetime.sql.out   | 18 +++++++-
 .../sql-tests/results/datetime-legacy.sql.out | 18 +++++++-
 .../sql-tests/results/datetime.sql.out        | 18 +++++++-
 8 files changed, 130 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 5c2816a0baa95..3b46de539ce3d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -427,6 +427,8 @@ object FunctionRegistry {
     expression[MakeInterval]("make_interval"),
     expression[DatePart]("date_part"),
     expression[Extract]("extract"),
+    expression[DateFromUnixDate]("date_from_unix_date"),
+    expression[UnixDate]("unix_date"),
     expression[SecondsToTimestamp]("timestamp_seconds"),
     expression[MillisToTimestamp]("timestamp_millis"),
     expression[MicrosToTimestamp]("timestamp_micros"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 60dc32c1571fe..c20dd6148be3e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -400,6 +400,52 @@ case class DayOfYear(child: Expression) extends GetDateField {
   override val funcName = "getDayInYear"
 }
 
+@ExpressionDescription(
+  usage = "_FUNC_(days) - Create date from the number of days since 1970-01-01.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(1);
+       1970-01-02
+  """,
+  group = "datetime_funcs",
+  since = "3.1.0")
+case class DateFromUnixDate(child: Expression) extends UnaryExpression
+  with ImplicitCastInputTypes with NullIntolerant {
+  override def inputTypes: Seq[AbstractDataType] = Seq(IntegerType)
+
+  override def dataType: DataType = DateType
+
+  override def nullSafeEval(input: Any): Any = input.asInstanceOf[Int]
+
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
+    defineCodeGen(ctx, ev, c => c)
+
+  override def prettyName: String = "date_from_unix_date"
+}
+
+@ExpressionDescription(
+  usage = "_FUNC_(date) - Returns the number of days since 1970-01-01.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(DATE("1970-01-02"));
+       1
+  """,
+  group = "datetime_funcs",
+  since = "3.1.0")
+case class UnixDate(child: Expression) extends UnaryExpression
+  with ExpectsInputTypes with NullIntolerant {
+  override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
+
+  override def dataType: DataType = IntegerType
+
+  override def nullSafeEval(input: Any): Any = input.asInstanceOf[Int]
+
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
+    defineCodeGen(ctx, ev, c => c)
+
+  override def prettyName: String = "unix_date"
+}
+
 abstract class IntegralToTimestampBase extends UnaryExpression
   with ExpectsInputTypes with NullIntolerant {
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 8a1a34276341d..79770505ec35d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -1245,6 +1245,30 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkResult(Int.MinValue.toLong - 100)
   }
 
+  test("DATE_FROM_UNIX_DATE") {
+    def testIntegralFunc(value: Number): Unit = {
+      checkEvaluation(
+        DateFromUnixDate(Literal(value.intValue())),
+        LocalDate.ofEpochDay(value.intValue()))
+    }
+    // test null input
+    checkEvaluation(DateFromUnixDate(Literal(null, IntegerType)), null)
+    // test integral input
+    testIntegralInput(testIntegralFunc)
+  }
+
+  test("UNIX_DATE") {
+    def testIntegralFunc(value: Number): Unit = {
+      checkEvaluation(
+        UnixDate(Literal(LocalDate.ofEpochDay(value.intValue()))),
+        value.intValue())
+    }
+    // test null input
+    checkEvaluation(UnixDate(Literal(null, DateType)), null)
+    // test various inputs
+    testIntegralInput(testIntegralFunc)
+  }
+
   test("UNIX_SECONDS") {
     checkEvaluation(UnixSeconds(Literal(null, TimestampType)), null)
     var timestamp = Literal(new Timestamp(0L))
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 861062a1f7705..a6d041a588a6d 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -1,6 +1,6 @@
 <!-- Automatically generated by ExpressionsSchemaSuite -->
 ## Summary
-  - Number of queries: 345
+  - Number of queries: 347
   - Number of expressions that missing example: 13
   - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint,window
 ## Schema of Built-in Functions
@@ -91,6 +91,7 @@
 | org.apache.spark.sql.catalyst.expressions.DateAdd | date_add | SELECT date_add('2016-07-30', 1) | struct<date_add(CAST(2016-07-30 AS DATE), 1):date> |
 | org.apache.spark.sql.catalyst.expressions.DateDiff | datediff | SELECT datediff('2009-07-31', '2009-07-30') | struct<datediff(CAST(2009-07-31 AS DATE), CAST(2009-07-30 AS DATE)):int> |
 | org.apache.spark.sql.catalyst.expressions.DateFormatClass | date_format | SELECT date_format('2016-04-08', 'y') | struct<date_format(CAST(2016-04-08 AS TIMESTAMP), y):string> |
+| org.apache.spark.sql.catalyst.expressions.DateFromUnixDate | date_from_unix_date | SELECT date_from_unix_date(1) | struct<date_from_unix_date(1):date> |
 | org.apache.spark.sql.catalyst.expressions.DatePart | date_part | SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') | struct<date_part(YEAR, TIMESTAMP '2019-08-12 01:00:00.123456'):int> |
 | org.apache.spark.sql.catalyst.expressions.DateSub | date_sub | SELECT date_sub('2016-07-30', 1) | struct<date_sub(CAST(2016-07-30 AS DATE), 1):date> |
 | org.apache.spark.sql.catalyst.expressions.DayOfMonth | day | SELECT day('2009-07-30') | struct<day(CAST(2009-07-30 AS DATE)):int> |
@@ -289,6 +290,7 @@
 | org.apache.spark.sql.catalyst.expressions.UnaryMinus | negative | SELECT negative(1) | struct<negative(1):int> |
 | org.apache.spark.sql.catalyst.expressions.UnaryPositive | positive | SELECT positive(1) | struct<(+ 1):int> |
 | org.apache.spark.sql.catalyst.expressions.Unhex | unhex | SELECT decode(unhex('537061726B2053514C'), 'UTF-8') | struct<decode(unhex(537061726B2053514C), UTF-8):string> |
+| org.apache.spark.sql.catalyst.expressions.UnixDate | unix_date | SELECT unix_date(DATE("1970-01-02")) | struct<unix_date(CAST(1970-01-02 AS DATE)):int> |
 | org.apache.spark.sql.catalyst.expressions.UnixMicros | unix_micros | SELECT unix_micros(TIMESTAMP('1970-01-01 00:00:01Z')) | struct<unix_micros(CAST(1970-01-01 00:00:01Z AS TIMESTAMP)):bigint> |
 | org.apache.spark.sql.catalyst.expressions.UnixMillis | unix_millis | SELECT unix_millis(TIMESTAMP('1970-01-01 00:00:01Z')) | struct<unix_millis(CAST(1970-01-01 00:00:01Z AS TIMESTAMP)):bigint> |
 | org.apache.spark.sql.catalyst.expressions.UnixSeconds | unix_seconds | SELECT unix_seconds(TIMESTAMP('1970-01-01 00:00:01Z')) | struct<unix_seconds(CAST(1970-01-01 00:00:01Z AS TIMESTAMP)):bigint> |
diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
index c2ccb3ee0db06..e35266a85d46b 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
@@ -18,7 +18,10 @@ select TIMESTAMP_SECONDS(0.1234567d), TIMESTAMP_SECONDS(FLOAT(0.1234567));
 select UNIX_SECONDS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_SECONDS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_SECONDS(null);
 select UNIX_MILLIS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_MILLIS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_MILLIS(null);
 select UNIX_MICROS(TIMESTAMP('2020-12-01 14:30:08Z')), UNIX_MICROS(TIMESTAMP('2020-12-01 14:30:08.999999Z')), UNIX_MICROS(null);
-
+-- DATE_FROM_UNIX_DATE
+select DATE_FROM_UNIX_DATE(0), DATE_FROM_UNIX_DATE(1000), DATE_FROM_UNIX_DATE(null);
+-- UNIX_DATE
+select UNIX_DATE(DATE('1970-01-01')), UNIX_DATE(DATE('2020-12-04')), UNIX_DATE(null);
 -- [SPARK-16836] current_date and current_timestamp literals
 select current_date = current_date(), current_timestamp = current_timestamp();
 
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
index 9d99d3b870b3f..18a751f573bc2 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 120
+-- Number of queries: 122
 
 
 -- !query
@@ -111,6 +111,22 @@ struct<unix_micros(CAST(2020-12-01 14:30:08Z AS TIMESTAMP)):bigint,unix_micros(C
 1606833008000000	1606833008999999	NULL
 
 
+-- !query
+select DATE_FROM_UNIX_DATE(0), DATE_FROM_UNIX_DATE(1000), DATE_FROM_UNIX_DATE(null)
+-- !query schema
+struct<date_from_unix_date(0):date,date_from_unix_date(1000):date,date_from_unix_date(CAST(NULL AS INT)):date>
+-- !query output
+1970-01-01	1972-09-27	NULL
+
+
+-- !query
+select UNIX_DATE(DATE('1970-01-01')), UNIX_DATE(DATE('2020-12-04')), UNIX_DATE(null)
+-- !query schema
+struct<unix_date(CAST(1970-01-01 AS DATE)):int,unix_date(CAST(2020-12-04 AS DATE)):int,unix_date(CAST(NULL AS DATE)):int>
+-- !query output
+0	18600	NULL
+
+
 -- !query
 select current_date = current_date(), current_timestamp = current_timestamp()
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
index 73e9823d96a73..be75f6fb994dd 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 120
+-- Number of queries: 122
 
 
 -- !query
@@ -111,6 +111,22 @@ struct<unix_micros(CAST(2020-12-01 14:30:08Z AS TIMESTAMP)):bigint,unix_micros(C
 1606833008000000	1606833008999999	NULL
 
 
+-- !query
+select DATE_FROM_UNIX_DATE(0), DATE_FROM_UNIX_DATE(1000), DATE_FROM_UNIX_DATE(null)
+-- !query schema
+struct<date_from_unix_date(0):date,date_from_unix_date(1000):date,date_from_unix_date(CAST(NULL AS INT)):date>
+-- !query output
+1970-01-01	1972-09-27	NULL
+
+
+-- !query
+select UNIX_DATE(DATE('1970-01-01')), UNIX_DATE(DATE('2020-12-04')), UNIX_DATE(null)
+-- !query schema
+struct<unix_date(CAST(1970-01-01 AS DATE)):int,unix_date(CAST(2020-12-04 AS DATE)):int,unix_date(CAST(NULL AS DATE)):int>
+-- !query output
+0	18600	NULL
+
+
 -- !query
 select current_date = current_date(), current_timestamp = current_timestamp()
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
index 2c39c1291aa70..1e963ed16fd96 100755
--- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 120
+-- Number of queries: 122
 
 
 -- !query
@@ -111,6 +111,22 @@ struct<unix_micros(CAST(2020-12-01 14:30:08Z AS TIMESTAMP)):bigint,unix_micros(C
 1606833008000000	1606833008999999	NULL
 
 
+-- !query
+select DATE_FROM_UNIX_DATE(0), DATE_FROM_UNIX_DATE(1000), DATE_FROM_UNIX_DATE(null)
+-- !query schema
+struct<date_from_unix_date(0):date,date_from_unix_date(1000):date,date_from_unix_date(CAST(NULL AS INT)):date>
+-- !query output
+1970-01-01	1972-09-27	NULL
+
+
+-- !query
+select UNIX_DATE(DATE('1970-01-01')), UNIX_DATE(DATE('2020-12-04')), UNIX_DATE(null)
+-- !query schema
+struct<unix_date(CAST(1970-01-01 AS DATE)):int,unix_date(CAST(2020-12-04 AS DATE)):int,unix_date(CAST(NULL AS DATE)):int>
+-- !query output
+0	18600	NULL
+
+
 -- !query
 select current_date = current_date(), current_timestamp = current_timestamp()
 -- !query schema

From 512fb32b38e4694abd9f667581cdd5e99dee811f Mon Sep 17 00:00:00 2001
From: luluorta <luluorta@gmail.com>
Date: Thu, 3 Dec 2020 14:58:56 +0000
Subject: [PATCH 0650/1009] [SPARK-26218][SQL][FOLLOW UP] Fix the corner case
 of codegen when casting float to Integer

### What changes were proposed in this pull request?
This is a followup of [#27151](https://github.com/apache/spark/pull/27151). It fixes the same issue for the codegen path.

### Why are the changes needed?
Result corrupt.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Added Unit test.

Closes #30585 from luluorta/SPARK-26218.

Authored-by: luluorta <luluorta@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/expressions/Cast.scala | 52 +++++++------------
 .../sql/catalyst/expressions/CastSuite.scala  |  5 ++
 2 files changed, 24 insertions(+), 33 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 95f09d64c484b..1b2e2db932970 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -1393,25 +1393,19 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
       """
   }
 
-  private[this] def lowerAndUpperBound(
-      fractionType: String,
-      integralType: String): (String, String) = {
-    assert(fractionType == "float" || fractionType == "double")
-    val typeIndicator = fractionType.charAt(0)
-    val (min, max) = integralType.toLowerCase(Locale.ROOT) match {
-      case "long" => (Long.MinValue, Long.MaxValue)
-      case "int" => (Int.MinValue, Int.MaxValue)
-      case "short" => (Short.MinValue, Short.MaxValue)
-      case "byte" => (Byte.MinValue, Byte.MaxValue)
+  private[this] def lowerAndUpperBound(integralType: String): (String, String) = {
+    val (min, max, typeIndicator) = integralType.toLowerCase(Locale.ROOT) match {
+      case "long" => (Long.MinValue, Long.MaxValue, "L")
+      case "int" => (Int.MinValue, Int.MaxValue, "")
+      case "short" => (Short.MinValue, Short.MaxValue, "")
+      case "byte" => (Byte.MinValue, Byte.MaxValue, "")
     }
     (min.toString + typeIndicator, max.toString + typeIndicator)
   }
 
-  private[this] def castFractionToIntegralTypeCode(
-      fractionType: String,
-      integralType: String): CastFunction = {
+  private[this] def castFractionToIntegralTypeCode(integralType: String): CastFunction = {
     assert(ansiEnabled)
-    val (min, max) = lowerAndUpperBound(fractionType, integralType)
+    val (min, max) = lowerAndUpperBound(integralType)
     val mathClass = classOf[Math].getName
     // When casting floating values to integral types, Spark uses the method `Numeric.toInt`
     // Or `Numeric.toLong` directly. For positive floating values, it is equivalent to `Math.floor`;
@@ -1449,12 +1443,10 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
       (c, evPrim, evNull) => code"$evNull = true;"
     case TimestampType => castTimestampToIntegralTypeCode(ctx, "byte")
     case DecimalType() => castDecimalToIntegralTypeCode(ctx, "byte")
-    case _: ShortType | _: IntegerType | _: LongType if ansiEnabled =>
+    case ShortType | IntegerType | LongType if ansiEnabled =>
       castIntegralTypeToIntegralTypeExactCode("byte")
-    case _: FloatType if ansiEnabled =>
-      castFractionToIntegralTypeCode("float", "byte")
-    case _: DoubleType if ansiEnabled =>
-      castFractionToIntegralTypeCode("double", "byte")
+    case FloatType | DoubleType if ansiEnabled =>
+      castFractionToIntegralTypeCode("byte")
     case x: NumericType =>
       (c, evPrim, evNull) => code"$evPrim = (byte) $c;"
   }
@@ -1482,12 +1474,10 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
       (c, evPrim, evNull) => code"$evNull = true;"
     case TimestampType => castTimestampToIntegralTypeCode(ctx, "short")
     case DecimalType() => castDecimalToIntegralTypeCode(ctx, "short")
-    case _: IntegerType | _: LongType if ansiEnabled =>
+    case IntegerType | LongType if ansiEnabled =>
       castIntegralTypeToIntegralTypeExactCode("short")
-    case _: FloatType if ansiEnabled =>
-      castFractionToIntegralTypeCode("float", "short")
-    case _: DoubleType if ansiEnabled =>
-      castFractionToIntegralTypeCode("double", "short")
+    case FloatType | DoubleType if ansiEnabled =>
+      castFractionToIntegralTypeCode("short")
     case x: NumericType =>
       (c, evPrim, evNull) => code"$evPrim = (short) $c;"
   }
@@ -1513,11 +1503,9 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
       (c, evPrim, evNull) => code"$evNull = true;"
     case TimestampType => castTimestampToIntegralTypeCode(ctx, "int")
     case DecimalType() => castDecimalToIntegralTypeCode(ctx, "int")
-    case _: LongType if ansiEnabled => castIntegralTypeToIntegralTypeExactCode("int")
-    case _: FloatType if ansiEnabled =>
-      castFractionToIntegralTypeCode("float", "int")
-    case _: DoubleType if ansiEnabled =>
-      castFractionToIntegralTypeCode("double", "int")
+    case LongType if ansiEnabled => castIntegralTypeToIntegralTypeExactCode("int")
+    case FloatType | DoubleType if ansiEnabled =>
+      castFractionToIntegralTypeCode("int")
     case x: NumericType =>
       (c, evPrim, evNull) => code"$evPrim = (int) $c;"
   }
@@ -1544,10 +1532,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
     case TimestampType =>
       (c, evPrim, evNull) => code"$evPrim = (long) ${timestampToLongCode(c)};"
     case DecimalType() => castDecimalToIntegralTypeCode(ctx, "long")
-    case _: FloatType if ansiEnabled =>
-      castFractionToIntegralTypeCode("float", "long")
-    case _: DoubleType if ansiEnabled =>
-      castFractionToIntegralTypeCode("double", "long")
+    case FloatType | DoubleType if ansiEnabled =>
+      castFractionToIntegralTypeCode("long")
     case x: NumericType =>
       (c, evPrim, evNull) => code"$evPrim = (long) $c;"
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index d284c417042c1..35db25ec9342c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -975,6 +975,11 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase {
       }
     }
   }
+
+  test("SPARK-26218: Fix the corner case of codegen when casting float to Integer") {
+    checkExceptionInExpression[ArithmeticException](
+      cast(cast(Literal("2147483648"), FloatType), IntegerType), "overflow")
+  }
 }
 
 /**

From 0706e64c49f66431560cdbecb28adcda244c3342 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 3 Dec 2020 15:24:44 +0000
Subject: [PATCH 0651/1009] [SPARK-30098][SQL] Add a configuration to use
 default datasource as provider for CREATE TABLE command

### What changes were proposed in this pull request?

For CRETE TABLE [AS SELECT] command, creates native Parquet table if neither USING nor STORE AS is specified and `spark.sql.legacy.createHiveTableByDefault` is false.

This is a retry after we unify the CREATE TABLE syntax. It partially reverts https://github.com/apache/spark/commit/d2bec5e265e0aa4fa527c3f43cfe738cdbdc4598

This PR allows `CREATE EXTERNAL TABLE` when `LOCATION` is present. This was not allowed for data source tables before, which is an unnecessary behavior different with hive tables.

### Why are the changes needed?

Changing from Hive text table to native Parquet table has many benefits:
1. be consistent with `DataFrameWriter.saveAsTable`.
2. better performance
3. better support for nested types (Hive text table doesn't work well with nested types, e.g. `insert into t values struct(null)` actually inserts a null value not `struct(null)` if `t` is a Hive text table, which leads to wrong result)
4. better interoperability as Parquet is a more popular open file format.

### Does this PR introduce _any_ user-facing change?

No by default. If the config is set, the behavior change is described below:

Behavior-wise, the change is very small as the native Parquet table is also Hive-compatible. All the Spark DDL commands that works for hive tables also works for native Parquet tables, with two exceptions: `ALTER TABLE SET [SERDE | SERDEPROPERTIES]` and `LOAD DATA`.

char/varchar behavior has been taken care by https://github.com/apache/spark/pull/30412, and there is no behavior difference between data source and hive tables.

One potential issue is `CREATE TABLE ... LOCATION ...` while users want to directly access the files later. It's more like a corner case and the legacy config should be good enough.

Another potential issue is users may use Spark to create the table and then use Hive to add partitions with different serde. This is not allowed for Spark native tables.

### How was this patch tested?

Re-enable the tests

Closes #30554 from cloud-fan/create-table.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/internal/SQLConf.scala   |  9 +++++
 .../analysis/ResolveSessionCatalog.scala      | 13 +++++---
 .../sql/connector/DataSourceV2SQLSuite.scala  | 33 ++++++++++---------
 .../command/PlanResolutionSuite.scala         |  6 ++--
 .../execution/HiveCompatibilitySuite.scala    |  4 +++
 .../sql/hive/HiveShowCreateTableSuite.scala   | 18 +++++++++-
 .../apache/spark/sql/hive/InsertSuite.scala   |  3 +-
 .../spark/sql/hive/QueryPartitionSuite.scala  |  5 +--
 .../spark/sql/hive/StatisticsSuite.scala      | 27 ++++++++++-----
 .../spark/sql/hive/client/VersionsSuite.scala |  1 +
 .../sql/hive/execution/HiveDDLSuite.scala     |  2 +-
 .../sql/hive/execution/HiveSerDeSuite.scala   |  5 +--
 .../hive/execution/HiveTableScanSuite.scala   |  5 ++-
 .../sql/hive/execution/SQLQuerySuite.scala    |  1 +
 .../apache/spark/sql/hive/test/TestHive.scala | 13 ++++----
 15 files changed, 100 insertions(+), 45 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index a1d6f9f608873..b32476a5af71a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2921,6 +2921,15 @@ object SQLConf {
     .stringConf
     .createWithDefault("")
 
+  val LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT =
+    buildConf("spark.sql.legacy.createHiveTableByDefault")
+      .internal()
+      .doc("When set to true, CREATE TABLE syntax without USING or STORED AS will use Hive " +
+        s"instead of the value of ${DEFAULT_DATA_SOURCE_NAME.key} as the table provider.")
+      .version("3.1.0")
+      .booleanConf
+      .createWithDefault(true)
+
   /**
    * Holds information about keys that have been deprecated.
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index f6005f4b413a2..f35fcdc07c372 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource}
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
-import org.apache.spark.sql.internal.HiveSerDe
+import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
 import org.apache.spark.sql.types.{MetadataBuilder, StructField, StructType}
 
 /**
@@ -636,11 +636,16 @@ class ResolveSessionCatalog(
       (storageFormat, DDLUtils.HIVE_PROVIDER)
     } else {
       // If neither USING nor STORED AS/ROW FORMAT is specified, we create native data source
-      // tables if it's a CTAS and `conf.convertCTAS` is true.
-      // TODO: create native data source table by default for non-CTAS.
-      if (ctas && conf.convertCTAS) {
+      // tables if:
+      //   1. `LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT` is false, or
+      //   2. It's a CTAS and `conf.convertCTAS` is true.
+      val createHiveTableByDefault = conf.getConf(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT)
+      if (!createHiveTableByDefault || (ctas && conf.convertCTAS)) {
         (nonHiveStorageFormat, conf.defaultDataSourceName)
       } else {
+        logWarning("A Hive serde table will be created as there is no table provider " +
+          s"specified. You can set ${SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT.key} to false " +
+          "so that native data source table will be created instead.")
         (defaultHiveStorage, DDLUtils.HIVE_PROVIDER)
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 583bc694dc3be..7635590ab462e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -266,22 +266,23 @@ class DataSourceV2SQLSuite
     checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), Seq.empty)
   }
 
-  // TODO: ignored by SPARK-31707, restore the test after create table syntax unification
-  ignore("CreateTable: without USING clause") {
-    // unset this config to use the default v2 session catalog.
-    spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key)
-    val testCatalog = catalog("testcat").asTableCatalog
-
-    sql("CREATE TABLE testcat.t1 (id int)")
-    val t1 = testCatalog.loadTable(Identifier.of(Array(), "t1"))
-    // Spark shouldn't set the default provider for catalog plugins.
-    assert(!t1.properties.containsKey(TableCatalog.PROP_PROVIDER))
-
-    sql("CREATE TABLE t2 (id int)")
-    val t2 = spark.sessionState.catalogManager.v2SessionCatalog.asTableCatalog
-      .loadTable(Identifier.of(Array("default"), "t2")).asInstanceOf[V1Table]
-    // Spark should set the default provider as DEFAULT_DATA_SOURCE_NAME for the session catalog.
-    assert(t2.v1Table.provider == Some(conf.defaultDataSourceName))
+  test("CreateTable: without USING clause") {
+    withSQLConf(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT.key -> "false") {
+      // unset this config to use the default v2 session catalog.
+      spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key)
+      val testCatalog = catalog("testcat").asTableCatalog
+
+      sql("CREATE TABLE testcat.t1 (id int)")
+      val t1 = testCatalog.loadTable(Identifier.of(Array(), "t1"))
+      // Spark shouldn't set the default provider for catalog plugins.
+      assert(!t1.properties.containsKey(TableCatalog.PROP_PROVIDER))
+
+      sql("CREATE TABLE t2 (id int)")
+      val t2 = spark.sessionState.catalogManager.v2SessionCatalog.asTableCatalog
+        .loadTable(Identifier.of(Array("default"), "t2")).asInstanceOf[V1Table]
+      // Spark should set the default provider as DEFAULT_DATA_SOURCE_NAME for the session catalog.
+      assert(t2.v1Table.provider == Some(conf.defaultDataSourceName))
+    }
   }
 
   test("CreateTable/RepalceTable: invalid schema if has interval type") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index 20cad721d3d0e..33515ad41e918 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -1588,7 +1588,7 @@ class PlanResolutionSuite extends AnalysisTest {
             .add("b", StringType)
       )
     )
-    compare("CREATE TABLE my_tab(a INT COMMENT 'test', b STRING) " +
+    compare("CREATE TABLE my_tab(a INT COMMENT 'test', b STRING) STORED AS textfile " +
         "PARTITIONED BY (c INT, d STRING COMMENT 'test2')",
       createTable(
         table = "my_tab",
@@ -1616,7 +1616,7 @@ class PlanResolutionSuite extends AnalysisTest {
     )
     // Partitioned by a StructType should be accepted by `SparkSqlParser` but will fail an analyze
     // rule in `AnalyzeCreateTable`.
-    compare("CREATE TABLE my_tab(a INT COMMENT 'test', b STRING) " +
+    compare("CREATE TABLE my_tab(a INT COMMENT 'test', b STRING) STORED AS textfile " +
         "PARTITIONED BY (nested STRUCT<col1: STRING,col2: INT>)",
       createTable(
         table = "my_tab",
@@ -1890,7 +1890,7 @@ class PlanResolutionSuite extends AnalysisTest {
   }
 
   test("Test CTAS #3") {
-    val s3 = """CREATE TABLE page_view AS SELECT * FROM src"""
+    val s3 = """CREATE TABLE page_view STORED AS textfile AS SELECT * FROM src"""
     val (desc, exists) = extractTableDesc(s3)
     assert(exists == false)
     assert(desc.identifier.database == Some("default"))
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index 462206d8c546f..4ce1964a19bd9 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -40,6 +40,8 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
   private val originalInMemoryPartitionPruning = TestHive.conf.inMemoryPartitionPruning
   private val originalCrossJoinEnabled = TestHive.conf.crossJoinEnabled
   private val originalSessionLocalTimeZone = TestHive.conf.sessionLocalTimeZone
+  private val originalCreateHiveTable =
+    TestHive.conf.getConf(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT)
 
   def testCases: Seq[(String, File)] = {
     hiveQueryDir.listFiles.map(f => f.getName.stripSuffix(".q") -> f)
@@ -59,6 +61,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     // Fix session local timezone to America/Los_Angeles for those timezone sensitive tests
     // (timestamp_*)
     TestHive.setConf(SQLConf.SESSION_LOCAL_TIMEZONE, "America/Los_Angeles")
+    TestHive.setConf(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT, true)
     RuleExecutor.resetMetrics()
   }
 
@@ -69,6 +72,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
       TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, originalInMemoryPartitionPruning)
       TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, originalCrossJoinEnabled)
       TestHive.setConf(SQLConf.SESSION_LOCAL_TIMEZONE, originalSessionLocalTimeZone)
+      TestHive.setConf(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT, originalCreateHiveTable)
 
       // For debugging dump some statistics about how much time was spent in various optimizer rules
       logWarning(RuleExecutor.dumpTimeSpent())
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala
index 3e7c3e6799724..2fb67c793dc6a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveShowCreateTableSuite.scala
@@ -21,10 +21,26 @@ import org.apache.spark.sql.{AnalysisException, ShowCreateTableSuite}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.hive.test.TestHiveSingleton
-import org.apache.spark.sql.internal.HiveSerDe
+import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
 
 class HiveShowCreateTableSuite extends ShowCreateTableSuite with TestHiveSingleton {
 
+  private var origCreateHiveTableConfig = false
+
+  protected override def beforeAll(): Unit = {
+    super.beforeAll()
+    origCreateHiveTableConfig =
+      spark.conf.get(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT)
+    spark.conf.set(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT.key, true)
+  }
+
+  protected override def afterAll(): Unit = {
+    spark.conf.set(
+      SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT.key,
+      origCreateHiveTableConfig)
+    super.afterAll()
+  }
+
   test("view") {
     Seq(true, false).foreach { serde =>
       withView("v1") {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
index ebc6cfb77d355..71750e6b3a516 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
@@ -277,7 +277,8 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter
   test("Test partition mode = strict") {
     withSQLConf(("hive.exec.dynamic.partition.mode", "strict")) {
       withTable("partitioned") {
-        sql("CREATE TABLE partitioned (id bigint, data string) PARTITIONED BY (part string)")
+        sql("CREATE TABLE partitioned (id bigint, data string) USING hive " +
+          "PARTITIONED BY (part string)")
         val data = (1 to 10).map(i => (i, s"data-$i", if ((i % 2) == 0) "even" else "odd"))
           .toDF("id", "data", "part")
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala
index 483622b16762a..cec6ec1ee1275 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/QueryPartitionSuite.scala
@@ -38,7 +38,7 @@ class QueryPartitionSuite extends QueryTest with SQLTestUtils with TestHiveSingl
           testData.createOrReplaceTempView("testData")
 
           // create the table for test
-          sql(s"CREATE TABLE table_with_partition(key int,value string) " +
+          sql(s"CREATE TABLE table_with_partition(key int,value string) USING hive " +
               s"PARTITIONED by (ds string) location '${tmpDir.toURI}' ")
           sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='1') " +
               "SELECT key,value FROM testData")
@@ -81,7 +81,8 @@ class QueryPartitionSuite extends QueryTest with SQLTestUtils with TestHiveSingl
 
   test("SPARK-21739: Cast expression should initialize timezoneId") {
     withTable("table_with_timestamp_partition") {
-      sql("CREATE TABLE table_with_timestamp_partition(value int) PARTITIONED BY (ts TIMESTAMP)")
+      sql("CREATE TABLE table_with_timestamp_partition(value int) USING hive " +
+        "PARTITIONED BY (ts TIMESTAMP)")
       sql("INSERT OVERWRITE TABLE table_with_timestamp_partition " +
         "PARTITION (ts = '2010-01-01 00:00:00.000') VALUES (1)")
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index 43d1ba04c561d..2ea98943011f4 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -165,7 +165,8 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
     // Partitioned table
     val partTable = "part_table"
     withTable(partTable) {
-      sql(s"CREATE TABLE $partTable (key STRING, value STRING) PARTITIONED BY (ds STRING)")
+      sql(s"CREATE TABLE $partTable (key STRING, value STRING) USING hive " +
+        "PARTITIONED BY (ds STRING)")
       sql(s"INSERT INTO TABLE $partTable PARTITION (ds='2010-01-01') SELECT * FROM src")
       sql(s"INSERT INTO TABLE $partTable PARTITION (ds='2010-01-02') SELECT * FROM src")
       sql(s"INSERT INTO TABLE $partTable PARTITION (ds='2010-01-03') SELECT * FROM src")
@@ -191,7 +192,8 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
       SQLConf.PARALLEL_FILE_LISTING_IN_STATS_COMPUTATION.key -> "True") {
       val checkSizeTable = "checkSizeTable"
       withTable(checkSizeTable) {
-          sql(s"CREATE TABLE $checkSizeTable (key STRING, value STRING) PARTITIONED BY (ds STRING)")
+          sql(s"CREATE TABLE $checkSizeTable (key STRING, value STRING) USING hive " +
+            "PARTITIONED BY (ds STRING)")
           sql(s"INSERT INTO TABLE $checkSizeTable PARTITION (ds='2010-01-01') SELECT * FROM src")
           sql(s"INSERT INTO TABLE $checkSizeTable PARTITION (ds='2010-01-02') SELECT * FROM src")
           sql(s"INSERT INTO TABLE $checkSizeTable PARTITION (ds='2010-01-03') SELECT * FROM src")
@@ -274,7 +276,8 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
   test("SPARK-22745 - read Hive's statistics for partition") {
     val tableName = "hive_stats_part_table"
     withTable(tableName) {
-      sql(s"CREATE TABLE $tableName (key STRING, value STRING) PARTITIONED BY (ds STRING)")
+      sql(s"CREATE TABLE $tableName (key STRING, value STRING) USING hive " +
+        "PARTITIONED BY (ds STRING)")
       sql(s"INSERT INTO TABLE $tableName PARTITION (ds='2017-01-01') SELECT * FROM src")
       var partition = spark.sessionState.catalog
         .getPartition(TableIdentifier(tableName), Map("ds" -> "2017-01-01"))
@@ -296,7 +299,8 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
     val tableName = "analyzeTable_part"
     withTable(tableName) {
       withTempPath { path =>
-        sql(s"CREATE TABLE $tableName (key STRING, value STRING) PARTITIONED BY (ds STRING)")
+        sql(s"CREATE TABLE $tableName (key STRING, value STRING) USING hive " +
+          "PARTITIONED BY (ds STRING)")
 
         val partitionDates = List("2010-01-01", "2010-01-02", "2010-01-03")
         partitionDates.foreach { ds =>
@@ -321,6 +325,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
           sql(
             s"""
                |CREATE TABLE $sourceTableName (key STRING, value STRING)
+               |USING hive
                |PARTITIONED BY (ds STRING)
                |LOCATION '${path.toURI}'
              """.stripMargin)
@@ -338,6 +343,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
           sql(
             s"""
                |CREATE TABLE $tableName (key STRING, value STRING)
+               |USING hive
                |PARTITIONED BY (ds STRING)
                |LOCATION '${path.toURI}'
              """.stripMargin)
@@ -371,7 +377,8 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
     }
 
     withTable(tableName) {
-      sql(s"CREATE TABLE $tableName (key STRING, value STRING) PARTITIONED BY (ds STRING)")
+      sql(s"CREATE TABLE $tableName (key STRING, value STRING) USING hive " +
+        "PARTITIONED BY (ds STRING)")
 
       createPartition("2010-01-01", "SELECT '1', 'A' from src")
       createPartition("2010-01-02", "SELECT '1', 'A' from src UNION ALL SELECT '1', 'A' from src")
@@ -424,7 +431,8 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
     }
 
     withTable(tableName) {
-      sql(s"CREATE TABLE $tableName (key STRING, value STRING) PARTITIONED BY (ds STRING, hr INT)")
+      sql(s"CREATE TABLE $tableName (key STRING, value STRING) USING hive " +
+        "PARTITIONED BY (ds STRING, hr INT)")
 
       createPartition("2010-01-01", 10, "SELECT '1', 'A' from src")
       createPartition("2010-01-01", 11, "SELECT '1', 'A' from src")
@@ -472,7 +480,8 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
     }
 
     withTable(tableName) {
-      sql(s"CREATE TABLE $tableName (key STRING, value STRING) PARTITIONED BY (ds STRING, hr INT)")
+      sql(s"CREATE TABLE $tableName (key STRING, value STRING) USING hive " +
+        "PARTITIONED BY (ds STRING, hr INT)")
 
       createPartition("2010-01-01", 10, "SELECT '1', 'A' from src")
       createPartition("2010-01-01", 11, "SELECT '1', 'A' from src")
@@ -961,7 +970,8 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
     Seq(false, true).foreach { autoUpdate =>
       withSQLConf(SQLConf.AUTO_SIZE_UPDATE_ENABLED.key -> autoUpdate.toString) {
         withTable(table) {
-          sql(s"CREATE TABLE $table (i INT, j STRING) PARTITIONED BY (ds STRING, hr STRING)")
+          sql(s"CREATE TABLE $table (i INT, j STRING) USING hive " +
+            "PARTITIONED BY (ds STRING, hr STRING)")
           // table has two partitions initially
           for (ds <- Seq("2008-04-08"); hr <- Seq("11", "12")) {
             sql(s"INSERT OVERWRITE TABLE $table PARTITION (ds='$ds',hr='$hr') SELECT 1, 'a'")
@@ -1034,6 +1044,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
       sql(
         s"""
            |CREATE TABLE $managedTable (key INT, value STRING)
+           |USING hive
            |PARTITIONED BY (ds STRING, hr STRING)
          """.stripMargin)
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index d9ba6dd80e4ef..684529aa330a7 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -798,6 +798,7 @@ class VersionsSuite extends SparkFunSuite with Logging {
         versionSpark.sql(
           """
             |CREATE TABLE tbl(c1 string)
+            |USING hive
             |PARTITIONED BY (ds STRING)
           """.stripMargin)
         versionSpark.sql("INSERT OVERWRITE TABLE tbl partition (ds='2') SELECT '1'")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 2dfb8bb552594..ce31e39985971 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -983,7 +983,7 @@ class HiveDDLSuite
   }
 
   test("alter table partition - storage information") {
-    sql("CREATE TABLE boxes (height INT, length INT) PARTITIONED BY (width INT)")
+    sql("CREATE TABLE boxes (height INT, length INT) STORED AS textfile PARTITIONED BY (width INT)")
     sql("INSERT OVERWRITE TABLE boxes PARTITION (width=4) SELECT 4, 4")
     val catalog = spark.sessionState.catalog
     val expectedSerde = "com.sparkbricks.serde.ColumnarSerDe"
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
index f723c9f80c2ab..d7129bcb37e69 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeSuite.scala
@@ -88,7 +88,7 @@ class HiveSerDeSuite extends HiveComparisonTest with PlanTest with BeforeAndAfte
   test("Test the default fileformat for Hive-serde tables") {
     withSQLConf("hive.default.fileformat" -> "orc") {
       val (desc, exists) = extractTableDesc(
-        "CREATE TABLE IF NOT EXISTS fileformat_test (id int)")
+        "CREATE TABLE IF NOT EXISTS fileformat_test (id int) USING hive")
       assert(exists)
       assert(desc.storage.inputFormat == Some("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"))
       assert(desc.storage.outputFormat == Some("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"))
@@ -96,7 +96,8 @@ class HiveSerDeSuite extends HiveComparisonTest with PlanTest with BeforeAndAfte
     }
 
     withSQLConf("hive.default.fileformat" -> "parquet") {
-      val (desc, exists) = extractTableDesc("CREATE TABLE IF NOT EXISTS fileformat_test (id int)")
+      val (desc, exists) = extractTableDesc(
+        "CREATE TABLE IF NOT EXISTS fileformat_test (id int) USING hive")
       assert(exists)
       val input = desc.storage.inputFormat
       val output = desc.storage.outputFormat
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
index 4a50621d89d4e..5b43f82f253ea 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
@@ -113,6 +113,7 @@ class HiveTableScanSuite extends HiveComparisonTest with SQLTestUtils with TestH
         sql(
           s"""
              |CREATE TABLE $table(id string)
+             |USING hive
              |PARTITIONED BY (p1 string,p2 string,p3 string,p4 string,p5 string)
            """.stripMargin)
         sql(
@@ -157,6 +158,7 @@ class HiveTableScanSuite extends HiveComparisonTest with SQLTestUtils with TestH
         sql(
           s"""
              |CREATE TABLE $table(id string)
+             |USING hive
              |PARTITIONED BY (p1 string,p2 string,p3 string,p4 string,p5 string)
            """.stripMargin)
         sql(
@@ -182,6 +184,7 @@ class HiveTableScanSuite extends HiveComparisonTest with SQLTestUtils with TestH
       sql(
         s"""
            |CREATE TABLE $table (id int)
+           |USING hive
            |PARTITIONED BY (a int, b int)
          """.stripMargin)
       val scan1 = getHiveTableScanExec(s"SELECT * FROM $table WHERE a = 1 AND b = 2")
@@ -252,7 +255,7 @@ class HiveTableScanSuite extends HiveComparisonTest with SQLTestUtils with TestH
   test("SPARK-32069: Improve error message on reading unexpected directory") {
     withTable("t") {
       withTempDir { f =>
-        sql(s"CREATE TABLE t(i LONG) LOCATION '${f.getAbsolutePath}'")
+        sql(s"CREATE TABLE t(i LONG) USING hive LOCATION '${f.getAbsolutePath}'")
         sql("INSERT INTO t VALUES(1)")
         val dir = new File(f.getCanonicalPath + "/data")
         dir.mkdir()
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 79b3c3efe531c..6b82b1267bc66 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -2026,6 +2026,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
           sql(
             """
               |CREATE TABLE part_table (c STRING)
+              |STORED AS textfile
               |PARTITIONED BY (d STRING)
             """.stripMargin)
           sql(s"LOAD DATA LOCAL INPATH '$path/part-r-000011' " +
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
index a25c61c96f3d8..e996f2c6ec78f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -327,20 +327,22 @@ private[hive] class TestHiveSparkSession(
   }
 
   if (loadTestTables) {
+    def createTableSQL(tblName: String): String = {
+      s"CREATE TABLE $tblName (key INT, value STRING) STORED AS textfile"
+    }
     // The test tables that are defined in the Hive QTestUtil.
     // /itests/util/src/main/java/org/apache/hadoop/hive/ql/QTestUtil.java
     // https://github.com/apache/hive/blob/branch-0.13/data/scripts/q_test_init.sql
     @transient
     val hiveQTestUtilTables: Seq[TestTable] = Seq(
       TestTable("src",
-        "CREATE TABLE src (key INT, value STRING) STORED AS TEXTFILE".cmd,
+        createTableSQL("src").cmd,
         s"LOAD DATA LOCAL INPATH '${quoteHiveFile("data/files/kv1.txt")}' INTO TABLE src".cmd),
       TestTable("src1",
-        "CREATE TABLE src1 (key INT, value STRING) STORED AS TEXTFILE".cmd,
+        createTableSQL("src1").cmd,
         s"LOAD DATA LOCAL INPATH '${quoteHiveFile("data/files/kv3.txt")}' INTO TABLE src1".cmd),
       TestTable("srcpart", () => {
-        "CREATE TABLE srcpart (key INT, value STRING) PARTITIONED BY (ds STRING, hr STRING)"
-          .cmd.apply()
+        s"${createTableSQL("srcpart")} PARTITIONED BY (ds STRING, hr STRING)".cmd.apply()
         for (ds <- Seq("2008-04-08", "2008-04-09"); hr <- Seq("11", "12")) {
           s"""
              |LOAD DATA LOCAL INPATH '${quoteHiveFile("data/files/kv1.txt")}'
@@ -349,8 +351,7 @@ private[hive] class TestHiveSparkSession(
         }
       }),
       TestTable("srcpart1", () => {
-        "CREATE TABLE srcpart1 (key INT, value STRING) PARTITIONED BY (ds STRING, hr INT)"
-          .cmd.apply()
+        s"${createTableSQL("srcpart1")} PARTITIONED BY (ds STRING, hr INT)".cmd.apply()
         for (ds <- Seq("2008-04-08", "2008-04-09"); hr <- 11 to 12) {
           s"""
              |LOAD DATA LOCAL INPATH '${quoteHiveFile("data/files/kv1.txt")}'

From bd711863fdcdde21a7d64de8a9b6b7a8bf7c19ec Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Fri, 4 Dec 2020 01:37:44 +0900
Subject: [PATCH 0652/1009] [SPARK-33629][PYTHON] Make spark.buffer.size
 configuration visible on driver side

### What changes were proposed in this pull request?
`spark.buffer.size` not applied in driver from pyspark. In this PR I've fixed this issue.

### Why are the changes needed?
Apply the mentioned config on driver side.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Existing unit tests + manually.

Added the following code temporarily:
```
def local_connect_and_auth(port, auth_secret):
...
            sock.connect(sa)
            print("SPARK_BUFFER_SIZE: %d" % int(os.environ.get("SPARK_BUFFER_SIZE", 65536))) <- This is the addition
            sockfile = sock.makefile("rwb", int(os.environ.get("SPARK_BUFFER_SIZE", 65536)))
...
```

Test:
```
#Compile Spark

echo "spark.buffer.size 10000" >> conf/spark-defaults.conf

$ ./bin/pyspark
Python 3.8.5 (default, Jul 21 2020, 10:48:26)
[Clang 11.0.3 (clang-1103.0.32.62)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
20/12/03 13:38:13 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
20/12/03 13:38:14 WARN SparkEnv: I/O encryption enabled without RPC encryption: keys will be visible on the wire.
Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /__ / .__/\_,_/_/ /_/\_\   version 3.1.0-SNAPSHOT
      /_/

Using Python version 3.8.5 (default, Jul 21 2020 10:48:26)
Spark context Web UI available at http://192.168.0.189:4040
Spark context available as 'sc' (master = local[*], app id = local-1606999094506).
SparkSession available as 'spark'.
>>> sc.setLogLevel("TRACE")
>>> sc.parallelize([0, 2, 3, 4, 6], 5).glom().collect()
...
SPARK_BUFFER_SIZE: 10000
...
[[0], [2], [3], [4], [6]]
>>>
```

Closes #30592 from gaborgsomogyi/SPARK-33629.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../main/scala/org/apache/spark/api/python/PythonUtils.scala  | 4 ++++
 python/pyspark/context.py                                     | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
index 33849f6fcb65f..2f47d28f09103 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
@@ -89,4 +89,8 @@ private[spark] object PythonUtils {
   def getPythonAuthSocketTimeout(sc: JavaSparkContext): Long = {
     sc.conf.get(org.apache.spark.internal.config.Python.PYTHON_AUTH_SOCKET_TIMEOUT)
   }
+
+  def getSparkBufferSize(sc: JavaSparkContext): Int = {
+    sc.conf.get(org.apache.spark.internal.config.BUFFER_SIZE)
+  }
 }
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 1c542fa897ece..3da535b026137 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -224,6 +224,8 @@ def _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize,
         self._encryption_enabled = self._jvm.PythonUtils.isEncryptionEnabled(self._jsc)
         os.environ["SPARK_AUTH_SOCKET_TIMEOUT"] = \
             str(self._jvm.PythonUtils.getPythonAuthSocketTimeout(self._jsc))
+        os.environ["SPARK_BUFFER_SIZE"] = \
+            str(self._jvm.PythonUtils.getSparkBufferSize(self._jsc))
 
         self.pythonExec = os.environ.get("PYSPARK_PYTHON", 'python')
         self.pythonVer = "%d.%d" % sys.version_info[:2]

From aa13e207c9091e24aae1edcf3bb5cd35d3a27cbb Mon Sep 17 00:00:00 2001
From: Anton Okolnychyi <aokolnychyi@apple.com>
Date: Thu, 3 Dec 2020 09:12:30 -0800
Subject: [PATCH 0653/1009] [SPARK-33623][SQL] Add canDeleteWhere to
 SupportsDelete

### What changes were proposed in this pull request?

This PR provides us with a way to check if a data source is going to reject the delete via `deleteWhere` at planning time.

### Why are the changes needed?

The only way to support delete statements right now is to implement ``SupportsDelete``. According to its Javadoc, that interface is meant for cases when we can delete data without much effort (e.g. like deleting a complete partition in a Hive table).

This PR actually provides us with a way to check if a data source is going to reject the delete via `deleteWhere` at planning time instead of just getting an exception during execution. In the future, we can use this functionality to decide whether Spark should rewrite this delete and execute a distributed query or it can just pass a set of filters.

Consider an example of a partitioned Hive table. If we have a delete predicate like `part_col = '2020'`, we can just drop the matching partition to satisfy this delete. In this case, the data source should return `true` from `canDeleteWhere` and use the filters it accepts in `deleteWhere` to drop the partition. I consider this as a delete without significant effort. At the same time, if we have a delete predicate like `id = 10`, Hive tables would not be able to execute this delete using a metadata only operation without rewriting files. In that case, the data source should return `false` from `canDeleteWhere` and we should use a more sophisticated row-level API to find out which records should be removed (the API is yet to be discussed, but we need this PR as a basis).

If we decide to support subqueries and all delete use cases by simply extending the existing API, this will mean all data sources will have to implement a lot of Spark logic to determine which records changed. I don't think we want to go that way as the Spark logic to determine which records should be deleted is independent of the underlying data source. So the assumption is that Spark will execute a plan to find which records must be deleted for data sources that return `false` from `canDeleteWhere`.
### Does this PR introduce _any_ user-facing change?

Yes but it is backward compatible.

### How was this patch tested?

This PR comes with a new test.

Closes #30562 from aokolnychyi/spark-33623.

Authored-by: Anton Okolnychyi <aokolnychyi@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../sql/connector/catalog/SupportsDelete.java | 24 ++++++++++++++++++-
 .../spark/sql/connector/InMemoryTable.scala   | 12 ++++++++++
 .../datasources/v2/DataSourceV2Strategy.scala |  6 +++++
 .../sql/connector/DataSourceV2SQLSuite.scala  | 14 +++++++++++
 4 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDelete.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDelete.java
index 106f3283a62c8..261e5344be7b9 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDelete.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDelete.java
@@ -28,8 +28,30 @@
  */
 @Evolving
 public interface SupportsDelete {
+
+  /**
+   * Checks whether it is possible to delete data from a data source table that matches filter
+   * expressions.
+   * <p>
+   * Rows should be deleted from the data source iff all of the filter expressions match.
+   * That is, the expressions must be interpreted as a set of filters that are ANDed together.
+   * <p>
+   * Spark will call this method at planning time to check whether {@link #deleteWhere(Filter[])}
+   * would reject the delete operation because it requires significant effort. If this method
+   * returns false, Spark will not call {@link #deleteWhere(Filter[])} and will try to rewrite
+   * the delete operation and produce row-level changes if the data source table supports deleting
+   * individual records.
+   *
+   * @param filters filter expressions, used to select rows to delete when all expressions match
+   * @return true if the delete operation can be performed
+   */
+  default boolean canDeleteWhere(Filter[] filters) {
+    return true;
+  }
+
   /**
-   * Delete data from a data source table that matches filter expressions.
+   * Delete data from a data source table that matches filter expressions. Note that this method
+   * will be invoked only if {@link #canDeleteWhere(Filter[])} returns true.
    * <p>
    * Rows are deleted from the data source iff all of the filter expressions match. That is, the
    * expressions must be interpreted as a set of filters that are ANDed together.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
index cfb044b428e41..c4c5835d9d1f5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
@@ -335,6 +335,10 @@ class InMemoryTable(
     }
   }
 
+  override def canDeleteWhere(filters: Array[Filter]): Boolean = {
+    InMemoryTable.supportsFilters(filters)
+  }
+
   override def deleteWhere(filters: Array[Filter]): Unit = dataMap.synchronized {
     import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper
     dataMap --= InMemoryTable.filtersToKeys(dataMap.keys, partCols.map(_.toSeq.quoted), filters)
@@ -360,6 +364,14 @@ object InMemoryTable {
     }
   }
 
+  def supportsFilters(filters: Array[Filter]): Boolean = {
+    filters.flatMap(splitAnd).forall {
+      case _: EqualTo => true
+      case _: IsNotNull => true
+      case _ => false
+    }
+  }
+
   private def extractValue(
       attr: String,
       partFieldNames: Seq[String],
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 0c7bc19ad054e..938ba77fede47 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -221,6 +221,12 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
                   throw new AnalysisException(s"Exec update failed:" +
                       s" cannot translate expression to source filter: $f"))
               }).toArray
+
+          if (!table.asDeletable.canDeleteWhere(filters)) {
+            throw new AnalysisException(
+              s"Cannot delete from table ${table.name} where ${filters.mkString("[", ", ", "]")}")
+          }
+
           DeleteFromTableExec(table.asDeletable, filters) :: Nil
         case _ =>
           throw new AnalysisException("DELETE is only supported with v2 tables.")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 7635590ab462e..6ef4fd1372a78 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -1812,6 +1812,20 @@ class DataSourceV2SQLSuite
     }
   }
 
+  test("DeleteFrom: delete with unsupported predicates") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo")
+      sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
+      val exc = intercept[AnalysisException] {
+        sql(s"DELETE FROM $t WHERE id > 3 AND p > 3")
+      }
+
+      assert(spark.table(t).count === 3)
+      assert(exc.getMessage.contains(s"Cannot delete from table $t"))
+    }
+  }
+
   test("DeleteFrom: DELETE is only supported with v2 tables") {
     // unset this config to use the default v2 session catalog.
     spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key)

From 63f9d474b9ec4b66741fcca1d3c3865c32936a85 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 3 Dec 2020 09:22:53 -0800
Subject: [PATCH 0654/1009] [SPARK-33634][SQL][TESTS] Use Analyzer in
 PlanResolutionSuite

### What changes were proposed in this pull request?

Instead of using several analyzer rules, this PR uses the actual analyzer to run tests in `PlanResolutionSuite`.

### Why are the changes needed?

Make the test suite to match reality.

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

test-only

Closes #30574 from cloud-fan/test.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../command/PlanResolutionSuite.scala         | 35 ++++++++-----------
 1 file changed, 14 insertions(+), 21 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index 33515ad41e918..9b7222da55368 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -26,14 +26,16 @@ import org.mockito.invocation.InvocationOnMock
 
 import org.apache.spark.sql.{AnalysisException, SaveMode}
 import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, Analyzer, CTESubstitution, EmptyFunctionRegistry, NoSuchTableException, ResolveCatalogs, ResolvedTable, ResolveInlineTables, ResolveSessionCatalog, UnresolvedAttribute, UnresolvedRelation, UnresolvedSubqueryColumnAliases, UnresolvedV2Relation}
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, Analyzer, EmptyFunctionRegistry, NoSuchTableException, ResolvedTable, ResolveSessionCatalog, UnresolvedAttribute, UnresolvedRelation, UnresolvedSubqueryColumnAliases, UnresolvedV2Relation}
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Expression, InSubquery, IntegerLiteral, ListQuery, StringLiteral}
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
-import org.apache.spark.sql.catalyst.plans.logical.{AlterTable, Assignment, CreateTableAsSelect, CreateTableStatement, CreateV2Table, DeleteAction, DeleteFromTable, DescribeRelation, DropTable, InsertAction, InsertIntoStatement, LocalRelation, LogicalPlan, MergeIntoTable, OneRowRelation, Project, ShowTableProperties, SubqueryAlias, UpdateAction, UpdateTable}
+import org.apache.spark.sql.catalyst.plans.logical.{AlterTable, AppendData, Assignment, CreateTableAsSelect, CreateTableStatement, CreateV2Table, DeleteAction, DeleteFromTable, DescribeRelation, DropTable, InsertAction, LocalRelation, LogicalPlan, MergeIntoTable, OneRowRelation, Project, ShowTableProperties, SubqueryAlias, UpdateAction, UpdateTable}
+import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.connector.FakeV2Provider
 import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogNotFoundException, Identifier, Table, TableCapability, TableCatalog, TableChange, V1Table}
 import org.apache.spark.sql.connector.catalog.TableChange.{UpdateColumnComment, UpdateColumnType}
+import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.execution.datasources.CreateTable
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
@@ -49,6 +51,7 @@ class PlanResolutionSuite extends AnalysisTest {
   private val table: Table = {
     val t = mock(classOf[Table])
     when(t.schema()).thenReturn(new StructType().add("i", "int").add("s", "string"))
+    when(t.partitioning()).thenReturn(Array.empty[Transform])
     t
   }
 
@@ -151,22 +154,12 @@ class PlanResolutionSuite extends AnalysisTest {
     } else {
       catalogManagerWithoutDefault
     }
-    val analyzer = new Analyzer(catalogManager)
-    // TODO: run the analyzer directly.
-    val rules = Seq(
-      CTESubstitution,
-      ResolveInlineTables,
-      analyzer.ResolveRelations,
-      new ResolveCatalogs(catalogManager),
-      new ResolveSessionCatalog(catalogManager, _ == Seq("v"), _ => false),
-      analyzer.ResolveTables,
-      analyzer.ResolveReferences,
-      analyzer.ResolveSubqueryColumnAliases,
-      analyzer.ResolveReferences,
-      analyzer.ResolveAlterTableChanges)
-    rules.foldLeft(parsePlan(query)) {
-      case (plan, rule) => rule.apply(plan)
+    val analyzer = new Analyzer(catalogManager) {
+      override val extendedResolutionRules: Seq[Rule[LogicalPlan]] = Seq(
+        new ResolveSessionCatalog(catalogManager, _ == Seq("v"), _ => false))
     }
+    // We don't check analysis here, as we expect the plan to be unresolved such as `CreateTable`.
+    analyzer.execute(CatalystSqlParser.parsePlan(query))
   }
 
   private def parseResolveCompare(query: String, expected: LogicalPlan): Unit =
@@ -1156,9 +1149,9 @@ class PlanResolutionSuite extends AnalysisTest {
       ("ALTER TABLE testcat.tab ALTER COLUMN i TYPE bigint", false),
       ("ALTER TABLE tab ALTER COLUMN i TYPE bigint", false),
       (s"ALTER TABLE $v2SessionCatalogTable ALTER COLUMN i TYPE bigint", true),
-      ("INSERT INTO TABLE tab VALUES (1)", false),
-      ("INSERT INTO TABLE testcat.tab VALUES (1)", false),
-      (s"INSERT INTO TABLE $v2SessionCatalogTable VALUES (1)", true),
+      ("INSERT INTO TABLE tab VALUES (1, 'a')", false),
+      ("INSERT INTO TABLE testcat.tab VALUES (1, 'a')", false),
+      (s"INSERT INTO TABLE $v2SessionCatalogTable VALUES (1, 'a')", true),
       ("DESC TABLE tab", false),
       ("DESC TABLE testcat.tab", false),
       (s"DESC TABLE $v2SessionCatalogTable", true),
@@ -1183,7 +1176,7 @@ class PlanResolutionSuite extends AnalysisTest {
         case Project(_, AsDataSourceV2Relation(r)) =>
           assert(r.catalog.exists(_ == catlogIdent))
           assert(r.identifier.exists(_.name() == tableIdent))
-        case InsertIntoStatement(r: DataSourceV2Relation, _, _, _, _, _) =>
+        case AppendData(r: DataSourceV2Relation, _, _, _) =>
           assert(r.catalog.exists(_ == catlogIdent))
           assert(r.identifier.exists(_.name() == tableIdent))
         case DescribeRelation(r: ResolvedTable, _, _) =>

From 7e759b2d95eb3592d62ec010297c39384173a93c Mon Sep 17 00:00:00 2001
From: Weichen Xu <weichen.xu@databricks.com>
Date: Fri, 4 Dec 2020 08:35:50 +0800
Subject: [PATCH 0655/1009] [SPARK-33520][ML][PYSPARK] make
 CrossValidator/TrainValidateSplit/OneVsRest Reader/Writer support Python
 backend estimator/evaluator

### What changes were proposed in this pull request?
make CrossValidator/TrainValidateSplit/OneVsRest Reader/Writer support Python backend estimator/model

### Why are the changes needed?
Currently, pyspark support third-party library to define python backend estimator/evaluator, i.e., estimator that inherit `Estimator` instead of `JavaEstimator`, and only can be used in pyspark.

CrossValidator and TrainValidateSplit support tuning these python backend estimator,
but cannot support saving/load, becase CrossValidator and TrainValidateSplit writer implementation is use JavaMLWriter, which require to convert nested estimator and evaluator into java instance.

OneVsRest saving/load now only support java backend classifier due to similar issue.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Unit test.

Closes #30471 from WeichenXu123/support_pyio_tuning.

Authored-by: Weichen Xu <weichen.xu@databricks.com>
Signed-off-by: Weichen Xu <weichen.xu@databricks.com>
---
 python/pyspark/ml/classification.py         | 128 ++++++-
 python/pyspark/ml/classification.pyi        |  31 +-
 python/pyspark/ml/tests/test_persistence.py |  14 +-
 python/pyspark/ml/tests/test_tuning.py      |  97 ++++--
 python/pyspark/ml/tuning.py                 | 357 +++++++++++++++++++-
 python/pyspark/ml/tuning.pyi                |  40 +++
 python/pyspark/ml/util.py                   |  42 ++-
 python/pyspark/ml/util.pyi                  |   2 +
 python/pyspark/testing/mlutils.py           |  87 +++++
 9 files changed, 739 insertions(+), 59 deletions(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 763038ede876a..0553a61c6c771 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 #
 
+import os
 import operator
 import sys
 import uuid
@@ -33,7 +34,9 @@
     _HasVarianceImpurity, _TreeClassifierParams
 from pyspark.ml.regression import _FactorizationMachinesParams, DecisionTreeRegressionModel
 from pyspark.ml.base import _PredictorParams
-from pyspark.ml.util import JavaMLWritable, JavaMLReadable, HasTrainingSummary
+from pyspark.ml.util import DefaultParamsReader, DefaultParamsWriter, \
+    JavaMLReadable, JavaMLReader, JavaMLWritable, JavaMLWriter, \
+    MLReader, MLReadable, MLWriter, MLWritable, HasTrainingSummary
 from pyspark.ml.wrapper import JavaParams, \
     JavaPredictor, JavaPredictionModel, JavaWrapper
 from pyspark.ml.common import inherit_doc
@@ -2760,7 +2763,7 @@ def getClassifier(self):
 
 
 @inherit_doc
-class OneVsRest(Estimator, _OneVsRestParams, HasParallelism, JavaMLReadable, JavaMLWritable):
+class OneVsRest(Estimator, _OneVsRestParams, HasParallelism, MLReadable, MLWritable):
     """
     Reduction of Multiclass Classification to Binary Classification.
     Performs reduction using one against all strategy.
@@ -2991,8 +2994,73 @@ def _to_java(self):
         _java_obj.setRawPredictionCol(self.getRawPredictionCol())
         return _java_obj
 
+    @classmethod
+    def read(cls):
+        return OneVsRestReader(cls)
+
+    def write(self):
+        if isinstance(self.getClassifier(), JavaMLWritable):
+            return JavaMLWriter(self)
+        else:
+            return OneVsRestWriter(self)
+
+
+class _OneVsRestSharedReadWrite:
+    @staticmethod
+    def saveImpl(instance, sc, path, extraMetadata=None):
+        skipParams = ['classifier']
+        jsonParams = DefaultParamsWriter.extractJsonParams(instance, skipParams)
+        DefaultParamsWriter.saveMetadata(instance, path, sc, paramMap=jsonParams,
+                                         extraMetadata=extraMetadata)
+        classifierPath = os.path.join(path, 'classifier')
+        instance.getClassifier().save(classifierPath)
+
+    @staticmethod
+    def loadClassifier(path, sc):
+        classifierPath = os.path.join(path, 'classifier')
+        return DefaultParamsReader.loadParamsInstance(classifierPath, sc)
+
+    @staticmethod
+    def validateParams(instance):
+        elems_to_check = [instance.getClassifier()]
+        if isinstance(instance, OneVsRestModel):
+            elems_to_check.extend(instance.models)
+
+        for elem in elems_to_check:
+            if not isinstance(elem, MLWritable):
+                raise ValueError(f'OneVsRest write will fail because it contains {elem.uid} '
+                                 f'which is not writable.')
+
+
+@inherit_doc
+class OneVsRestReader(MLReader):
+    def __init__(self, cls):
+        super(OneVsRestReader, self).__init__()
+        self.cls = cls
+
+    def load(self, path):
+        metadata = DefaultParamsReader.loadMetadata(path, self.sc)
+        if not DefaultParamsReader.isPythonParamsInstance(metadata):
+            return JavaMLReader(self.cls).load(path)
+        else:
+            classifier = _OneVsRestSharedReadWrite.loadClassifier(path, self.sc)
+            ova = OneVsRest(classifier=classifier)._resetUid(metadata['uid'])
+            DefaultParamsReader.getAndSetParams(ova, metadata, skipParams=['classifier'])
+            return ova
+
+
+@inherit_doc
+class OneVsRestWriter(MLWriter):
+    def __init__(self, instance):
+        super(OneVsRestWriter, self).__init__()
+        self.instance = instance
+
+    def saveImpl(self, path):
+        _OneVsRestSharedReadWrite.validateParams(self.instance)
+        _OneVsRestSharedReadWrite.saveImpl(self.instance, self.sc, path)
 
-class OneVsRestModel(Model, _OneVsRestParams, JavaMLReadable, JavaMLWritable):
+
+class OneVsRestModel(Model, _OneVsRestParams, MLReadable, MLWritable):
     """
     Model fitted by OneVsRest.
     This stores the models resulting from training k binary classifiers: one for each class.
@@ -3023,6 +3091,9 @@ def setRawPredictionCol(self, value):
     def __init__(self, models):
         super(OneVsRestModel, self).__init__()
         self.models = models
+        if not isinstance(models[0], JavaMLWritable):
+            return
+        # set java instance
         java_models = [model._to_java() for model in self.models]
         sc = SparkContext._active_spark_context
         java_models_array = JavaWrapper._new_java_array(java_models,
@@ -3160,6 +3231,57 @@ def _to_java(self):
             _java_obj.set("weightCol", self.getWeightCol())
         return _java_obj
 
+    @classmethod
+    def read(cls):
+        return OneVsRestModelReader(cls)
+
+    def write(self):
+        if all(map(lambda elem: isinstance(elem, JavaMLWritable),
+                   [self.getClassifier()] + self.models)):
+            return JavaMLWriter(self)
+        else:
+            return OneVsRestModelWriter(self)
+
+
+@inherit_doc
+class OneVsRestModelReader(MLReader):
+    def __init__(self, cls):
+        super(OneVsRestModelReader, self).__init__()
+        self.cls = cls
+
+    def load(self, path):
+        metadata = DefaultParamsReader.loadMetadata(path, self.sc)
+        if not DefaultParamsReader.isPythonParamsInstance(metadata):
+            return JavaMLReader(self.cls).load(path)
+        else:
+            classifier = _OneVsRestSharedReadWrite.loadClassifier(path, self.sc)
+            numClasses = metadata['numClasses']
+            subModels = [None] * numClasses
+            for idx in range(numClasses):
+                subModelPath = os.path.join(path, f'model_{idx}')
+                subModels[idx] = DefaultParamsReader.loadParamsInstance(subModelPath, self.sc)
+            ovaModel = OneVsRestModel(subModels)._resetUid(metadata['uid'])
+            ovaModel.set(ovaModel.classifier, classifier)
+            DefaultParamsReader.getAndSetParams(ovaModel, metadata, skipParams=['classifier'])
+            return ovaModel
+
+
+@inherit_doc
+class OneVsRestModelWriter(MLWriter):
+    def __init__(self, instance):
+        super(OneVsRestModelWriter, self).__init__()
+        self.instance = instance
+
+    def saveImpl(self, path):
+        _OneVsRestSharedReadWrite.validateParams(self.instance)
+        instance = self.instance
+        numClasses = len(instance.models)
+        extraMetadata = {'numClasses': numClasses}
+        _OneVsRestSharedReadWrite.saveImpl(instance, self.sc, path, extraMetadata=extraMetadata)
+        for idx in range(numClasses):
+            subModelPath = os.path.join(path, f'model_{idx}')
+            instance.models[idx].save(subModelPath)
+
 
 @inherit_doc
 class FMClassifier(_JavaProbabilisticClassifier, _FactorizationMachinesParams, JavaMLWritable,
diff --git a/python/pyspark/ml/classification.pyi b/python/pyspark/ml/classification.pyi
index c44176a13a69b..a4a3d21018ad9 100644
--- a/python/pyspark/ml/classification.pyi
+++ b/python/pyspark/ml/classification.pyi
@@ -16,7 +16,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from typing import Any, List, Optional
+from typing import Any, List, Optional, Type
 from pyspark.ml._typing import JM, M, P, T, ParamMap
 
 import abc
@@ -53,7 +53,8 @@ from pyspark.ml.tree import (
     _TreeClassifierParams,
     _TreeEnsembleModel,
 )
-from pyspark.ml.util import HasTrainingSummary, JavaMLReadable, JavaMLWritable
+from pyspark.ml.util import HasTrainingSummary, JavaMLReadable, JavaMLWritable, \
+    MLReader, MLReadable, MLWriter, MLWritable
 from pyspark.ml.wrapper import JavaPredictionModel, JavaPredictor, JavaWrapper
 
 from pyspark.ml.linalg import Matrix, Vector
@@ -797,8 +798,8 @@ class OneVsRest(
     Estimator[OneVsRestModel],
     _OneVsRestParams,
     HasParallelism,
-    JavaMLReadable[OneVsRest],
-    JavaMLWritable,
+    MLReadable[OneVsRest],
+    MLWritable,
 ):
     def __init__(
         self,
@@ -832,7 +833,7 @@ class OneVsRest(
     def copy(self, extra: Optional[ParamMap] = ...) -> OneVsRest: ...
 
 class OneVsRestModel(
-    Model, _OneVsRestParams, JavaMLReadable[OneVsRestModel], JavaMLWritable
+    Model, _OneVsRestParams, MLReadable[OneVsRestModel], MLWritable
 ):
     models: List[Transformer]
     def __init__(self, models: List[Transformer]) -> None: ...
@@ -841,6 +842,26 @@ class OneVsRestModel(
     def setRawPredictionCol(self, value: str) -> OneVsRestModel: ...
     def copy(self, extra: Optional[ParamMap] = ...) -> OneVsRestModel: ...
 
+class OneVsRestWriter(MLWriter):
+    instance: OneVsRest
+    def __init__(self, instance: OneVsRest) -> None: ...
+    def saveImpl(self, path: str) -> None: ...
+
+class OneVsRestReader(MLReader[OneVsRest]):
+    cls: Type[OneVsRest]
+    def __init__(self, cls: Type[OneVsRest]) -> None: ...
+    def load(self, path: str) -> OneVsRest: ...
+
+class OneVsRestModelWriter(MLWriter):
+    instance: OneVsRestModel
+    def __init__(self, instance: OneVsRestModel) -> None: ...
+    def saveImpl(self, path: str) -> None: ...
+
+class OneVsRestModelReader(MLReader[OneVsRestModel]):
+    cls: Type[OneVsRestModel]
+    def __init__(self, cls: Type[OneVsRestModel]) -> None: ...
+    def load(self, path: str) -> OneVsRestModel: ...
+
 class FMClassifier(
     _JavaProbabilisticClassifier[FMClassificationModel],
     _FactorizationMachinesParams,
diff --git a/python/pyspark/ml/tests/test_persistence.py b/python/pyspark/ml/tests/test_persistence.py
index 0bbcfcdf50e95..77a6c0309628a 100644
--- a/python/pyspark/ml/tests/test_persistence.py
+++ b/python/pyspark/ml/tests/test_persistence.py
@@ -237,6 +237,11 @@ def _compare_pipelines(self, m1, m2):
                 self.assertEqual(len(m1.models), len(m2.models))
                 for x, y in zip(m1.models, m2.models):
                     self._compare_pipelines(x, y)
+        elif isinstance(m1, Params):
+            # Test on python backend Estimator/Transformer/Model/Evaluator
+            self.assertEqual(len(m1.params), len(m2.params))
+            for p in m1.params:
+                self._compare_params(m1, m2, p)
         else:
             raise RuntimeError("_compare_pipelines does not yet support type: %s" % type(m1))
 
@@ -326,14 +331,14 @@ def test_python_transformer_pipeline_persistence(self):
             except OSError:
                 pass
 
-    def test_onevsrest(self):
+    def _run_test_onevsrest(self, LogisticRegressionCls):
         temp_path = tempfile.mkdtemp()
         df = self.spark.createDataFrame([(0.0, 0.5, Vectors.dense(1.0, 0.8)),
                                          (1.0, 0.5, Vectors.sparse(2, [], [])),
                                          (2.0, 1.0, Vectors.dense(0.5, 0.5))] * 10,
                                         ["label", "wt", "features"])
 
-        lr = LogisticRegression(maxIter=5, regParam=0.01)
+        lr = LogisticRegressionCls(maxIter=5, regParam=0.01)
         ovr = OneVsRest(classifier=lr)
 
         def reload_and_compare(ovr, suffix):
@@ -350,6 +355,11 @@ def reload_and_compare(ovr, suffix):
         reload_and_compare(OneVsRest(classifier=lr), "ovr")
         reload_and_compare(OneVsRest(classifier=lr).setWeightCol("wt"), "ovrw")
 
+    def test_onevsrest(self):
+        from pyspark.testing.mlutils import DummyLogisticRegression
+        self._run_test_onevsrest(LogisticRegression)
+        self._run_test_onevsrest(DummyLogisticRegression)
+
     def test_decisiontree_classifier(self):
         dt = DecisionTreeClassifier(maxDepth=1)
         path = tempfile.mkdtemp()
diff --git a/python/pyspark/ml/tests/test_tuning.py b/python/pyspark/ml/tests/test_tuning.py
index ebd7457e4d30a..3cde34facbf9a 100644
--- a/python/pyspark/ml/tests/test_tuning.py
+++ b/python/pyspark/ml/tests/test_tuning.py
@@ -28,7 +28,8 @@
 from pyspark.ml.tuning import CrossValidator, CrossValidatorModel, ParamGridBuilder, \
     TrainValidationSplit, TrainValidationSplitModel
 from pyspark.sql.functions import rand
-from pyspark.testing.mlutils import SparkSessionTestCase
+from pyspark.testing.mlutils import DummyEvaluator, DummyLogisticRegression, \
+    DummyLogisticRegressionModel, SparkSessionTestCase
 
 
 class HasInducedError(Params):
@@ -201,7 +202,7 @@ def test_param_grid_type_coercion(self):
             for v in param.values():
                 assert(type(v) == float)
 
-    def test_save_load_trained_model(self):
+    def _run_test_save_load_trained_model(self, LogisticRegressionCls, LogisticRegressionModelCls):
         # This tests saving and loading the trained model only.
         # Save/load for CrossValidator will be added later: SPARK-13786
         temp_path = tempfile.mkdtemp()
@@ -212,7 +213,7 @@ def test_save_load_trained_model(self):
              (Vectors.dense([0.6]), 1.0),
              (Vectors.dense([1.0]), 1.0)] * 10,
             ["features", "label"])
-        lr = LogisticRegression()
+        lr = LogisticRegressionCls()
         grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1]).build()
         evaluator = BinaryClassificationEvaluator()
         cv = CrossValidator(
@@ -228,7 +229,7 @@ def test_save_load_trained_model(self):
 
         lrModelPath = temp_path + "/lrModel"
         lrModel.save(lrModelPath)
-        loadedLrModel = LogisticRegressionModel.load(lrModelPath)
+        loadedLrModel = LogisticRegressionModelCls.load(lrModelPath)
         self.assertEqual(loadedLrModel.uid, lrModel.uid)
         self.assertEqual(loadedLrModel.intercept, lrModel.intercept)
 
@@ -248,7 +249,12 @@ def test_save_load_trained_model(self):
             loadedCvModel.isSet(param) for param in loadedCvModel.params
         ))
 
-    def test_save_load_simple_estimator(self):
+    def test_save_load_trained_model(self):
+        self._run_test_save_load_trained_model(LogisticRegression, LogisticRegressionModel)
+        self._run_test_save_load_trained_model(DummyLogisticRegression,
+                                               DummyLogisticRegressionModel)
+
+    def _run_test_save_load_simple_estimator(self, LogisticRegressionCls, evaluatorCls):
         temp_path = tempfile.mkdtemp()
         dataset = self.spark.createDataFrame(
             [(Vectors.dense([0.0]), 0.0),
@@ -258,9 +264,9 @@ def test_save_load_simple_estimator(self):
              (Vectors.dense([1.0]), 1.0)] * 10,
             ["features", "label"])
 
-        lr = LogisticRegression()
+        lr = LogisticRegressionCls()
         grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1]).build()
-        evaluator = BinaryClassificationEvaluator()
+        evaluator = evaluatorCls()
 
         # test save/load of CrossValidator
         cv = CrossValidator(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator)
@@ -278,6 +284,12 @@ def test_save_load_simple_estimator(self):
         loadedModel = CrossValidatorModel.load(cvModelPath)
         self.assertEqual(loadedModel.bestModel.uid, cvModel.bestModel.uid)
 
+    def test_save_load_simple_estimator(self):
+        self._run_test_save_load_simple_estimator(
+            LogisticRegression, BinaryClassificationEvaluator)
+        self._run_test_save_load_simple_estimator(
+            DummyLogisticRegression, DummyEvaluator)
+
     def test_parallel_evaluation(self):
         dataset = self.spark.createDataFrame(
             [(Vectors.dense([0.0]), 0.0),
@@ -343,7 +355,7 @@ def checkSubModels(subModels):
             for j in range(len(grid)):
                 self.assertEqual(cvModel.subModels[i][j].uid, cvModel3.subModels[i][j].uid)
 
-    def test_save_load_nested_estimator(self):
+    def _run_test_save_load_nested_estimator(self, LogisticRegressionCls):
         temp_path = tempfile.mkdtemp()
         dataset = self.spark.createDataFrame(
             [(Vectors.dense([0.0]), 0.0),
@@ -353,9 +365,9 @@ def test_save_load_nested_estimator(self):
              (Vectors.dense([1.0]), 1.0)] * 10,
             ["features", "label"])
 
-        ova = OneVsRest(classifier=LogisticRegression())
-        lr1 = LogisticRegression().setMaxIter(100)
-        lr2 = LogisticRegression().setMaxIter(150)
+        ova = OneVsRest(classifier=LogisticRegressionCls())
+        lr1 = LogisticRegressionCls().setMaxIter(100)
+        lr2 = LogisticRegressionCls().setMaxIter(150)
         grid = ParamGridBuilder().addGrid(ova.classifier, [lr1, lr2]).build()
         evaluator = MulticlassClassificationEvaluator()
 
@@ -385,7 +397,11 @@ def test_save_load_nested_estimator(self):
         self.assert_param_maps_equal(loadedModel.getEstimatorParamMaps(), grid)
         self.assertEqual(loadedModel.bestModel.uid, cvModel.bestModel.uid)
 
-    def test_save_load_pipeline_estimator(self):
+    def test_save_load_nested_estimator(self):
+        self._run_test_save_load_nested_estimator(LogisticRegression)
+        self._run_test_save_load_nested_estimator(DummyLogisticRegression)
+
+    def _run_test_save_load_pipeline_estimator(self, LogisticRegressionCls):
         temp_path = tempfile.mkdtemp()
         training = self.spark.createDataFrame([
             (0, "a b c d e spark", 1.0),
@@ -402,9 +418,9 @@ def test_save_load_pipeline_estimator(self):
         tokenizer = Tokenizer(inputCol="text", outputCol="words")
         hashingTF = HashingTF(inputCol=tokenizer.getOutputCol(), outputCol="features")
 
-        ova = OneVsRest(classifier=LogisticRegression())
-        lr1 = LogisticRegression().setMaxIter(5)
-        lr2 = LogisticRegression().setMaxIter(10)
+        ova = OneVsRest(classifier=LogisticRegressionCls())
+        lr1 = LogisticRegressionCls().setMaxIter(5)
+        lr2 = LogisticRegressionCls().setMaxIter(10)
 
         pipeline = Pipeline(stages=[tokenizer, hashingTF, ova])
 
@@ -464,6 +480,10 @@ def test_save_load_pipeline_estimator(self):
                                               original_nested_pipeline_model.stages):
             self.assertEqual(loadedStage.uid, originalStage.uid)
 
+    def test_save_load_pipeline_estimator(self):
+        self._run_test_save_load_pipeline_estimator(LogisticRegression)
+        self._run_test_save_load_pipeline_estimator(DummyLogisticRegression)
+
     def test_user_specified_folds(self):
         from pyspark.sql import functions as F
 
@@ -593,7 +613,7 @@ def test_fit_maximize_metric(self):
                          "validationMetrics has the same size of grid parameter")
         self.assertEqual(1.0, max(validationMetrics))
 
-    def test_save_load_trained_model(self):
+    def _run_test_save_load_trained_model(self, LogisticRegressionCls, LogisticRegressionModelCls):
         # This tests saving and loading the trained model only.
         # Save/load for TrainValidationSplit will be added later: SPARK-13786
         temp_path = tempfile.mkdtemp()
@@ -604,7 +624,7 @@ def test_save_load_trained_model(self):
              (Vectors.dense([0.6]), 1.0),
              (Vectors.dense([1.0]), 1.0)] * 10,
             ["features", "label"])
-        lr = LogisticRegression()
+        lr = LogisticRegressionCls()
         grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1]).build()
         evaluator = BinaryClassificationEvaluator()
         tvs = TrainValidationSplit(
@@ -619,7 +639,7 @@ def test_save_load_trained_model(self):
 
         lrModelPath = temp_path + "/lrModel"
         lrModel.save(lrModelPath)
-        loadedLrModel = LogisticRegressionModel.load(lrModelPath)
+        loadedLrModel = LogisticRegressionModelCls.load(lrModelPath)
         self.assertEqual(loadedLrModel.uid, lrModel.uid)
         self.assertEqual(loadedLrModel.intercept, lrModel.intercept)
 
@@ -636,7 +656,12 @@ def test_save_load_trained_model(self):
             loadedTvsModel.isSet(param) for param in loadedTvsModel.params
         ))
 
-    def test_save_load_simple_estimator(self):
+    def test_save_load_trained_model(self):
+        self._run_test_save_load_trained_model(LogisticRegression, LogisticRegressionModel)
+        self._run_test_save_load_trained_model(DummyLogisticRegression,
+                                               DummyLogisticRegressionModel)
+
+    def _run_test_save_load_simple_estimator(self, LogisticRegressionCls, evaluatorCls):
         # This tests saving and loading the trained model only.
         # Save/load for TrainValidationSplit will be added later: SPARK-13786
         temp_path = tempfile.mkdtemp()
@@ -647,9 +672,9 @@ def test_save_load_simple_estimator(self):
              (Vectors.dense([0.6]), 1.0),
              (Vectors.dense([1.0]), 1.0)] * 10,
             ["features", "label"])
-        lr = LogisticRegression()
+        lr = LogisticRegressionCls()
         grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1]).build()
-        evaluator = BinaryClassificationEvaluator()
+        evaluator = evaluatorCls()
         tvs = TrainValidationSplit(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator)
         tvsModel = tvs.fit(dataset)
 
@@ -666,6 +691,12 @@ def test_save_load_simple_estimator(self):
         loadedModel = TrainValidationSplitModel.load(tvsModelPath)
         self.assertEqual(loadedModel.bestModel.uid, tvsModel.bestModel.uid)
 
+    def test_save_load_simple_estimator(self):
+        self._run_test_save_load_simple_estimator(
+            LogisticRegression, BinaryClassificationEvaluator)
+        self._run_test_save_load_simple_estimator(
+            DummyLogisticRegression, DummyEvaluator)
+
     def test_parallel_evaluation(self):
         dataset = self.spark.createDataFrame(
             [(Vectors.dense([0.0]), 0.0),
@@ -718,7 +749,7 @@ def test_expose_sub_models(self):
         for i in range(len(grid)):
             self.assertEqual(tvsModel.subModels[i].uid, tvsModel3.subModels[i].uid)
 
-    def test_save_load_nested_estimator(self):
+    def _run_test_save_load_nested_estimator(self, LogisticRegressionCls):
         # This tests saving and loading the trained model only.
         # Save/load for TrainValidationSplit will be added later: SPARK-13786
         temp_path = tempfile.mkdtemp()
@@ -729,9 +760,9 @@ def test_save_load_nested_estimator(self):
              (Vectors.dense([0.6]), 1.0),
              (Vectors.dense([1.0]), 1.0)] * 10,
             ["features", "label"])
-        ova = OneVsRest(classifier=LogisticRegression())
-        lr1 = LogisticRegression().setMaxIter(100)
-        lr2 = LogisticRegression().setMaxIter(150)
+        ova = OneVsRest(classifier=LogisticRegressionCls())
+        lr1 = LogisticRegressionCls().setMaxIter(100)
+        lr2 = LogisticRegressionCls().setMaxIter(150)
         grid = ParamGridBuilder().addGrid(ova.classifier, [lr1, lr2]).build()
         evaluator = MulticlassClassificationEvaluator()
 
@@ -759,7 +790,11 @@ def test_save_load_nested_estimator(self):
         self.assert_param_maps_equal(loadedModel.getEstimatorParamMaps(), grid)
         self.assertEqual(loadedModel.bestModel.uid, tvsModel.bestModel.uid)
 
-    def test_save_load_pipeline_estimator(self):
+    def test_save_load_nested_estimator(self):
+        self._run_test_save_load_nested_estimator(LogisticRegression)
+        self._run_test_save_load_nested_estimator(DummyLogisticRegression)
+
+    def _run_test_save_load_pipeline_estimator(self, LogisticRegressionCls):
         temp_path = tempfile.mkdtemp()
         training = self.spark.createDataFrame([
             (0, "a b c d e spark", 1.0),
@@ -776,9 +811,9 @@ def test_save_load_pipeline_estimator(self):
         tokenizer = Tokenizer(inputCol="text", outputCol="words")
         hashingTF = HashingTF(inputCol=tokenizer.getOutputCol(), outputCol="features")
 
-        ova = OneVsRest(classifier=LogisticRegression())
-        lr1 = LogisticRegression().setMaxIter(5)
-        lr2 = LogisticRegression().setMaxIter(10)
+        ova = OneVsRest(classifier=LogisticRegressionCls())
+        lr1 = LogisticRegressionCls().setMaxIter(5)
+        lr2 = LogisticRegressionCls().setMaxIter(10)
 
         pipeline = Pipeline(stages=[tokenizer, hashingTF, ova])
 
@@ -836,6 +871,10 @@ def test_save_load_pipeline_estimator(self):
                                               original_nested_pipeline_model.stages):
             self.assertEqual(loadedStage.uid, originalStage.uid)
 
+    def test_save_load_pipeline_estimator(self):
+        self._run_test_save_load_pipeline_estimator(LogisticRegression)
+        self._run_test_save_load_pipeline_estimator(DummyLogisticRegression)
+
     def test_copy(self):
         dataset = self.spark.createDataFrame([
             (10, 10.0),
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index 2b5a9857b0f18..2c083182de470 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 #
 
+import os
 import sys
 import itertools
 from multiprocessing.pool import ThreadPool
@@ -22,12 +23,13 @@
 import numpy as np
 
 from pyspark import keyword_only, since, SparkContext
-from pyspark.ml import Estimator, Model
-from pyspark.ml.common import _py2java, _java2py
+from pyspark.ml import Estimator, Transformer, Model
+from pyspark.ml.common import inherit_doc, _py2java, _java2py
+from pyspark.ml.evaluation import Evaluator
 from pyspark.ml.param import Params, Param, TypeConverters
 from pyspark.ml.param.shared import HasCollectSubModels, HasParallelism, HasSeed
-from pyspark.ml.util import MLReadable, MLWritable, JavaMLWriter, JavaMLReader, \
-    MetaAlgorithmReadWrite
+from pyspark.ml.util import DefaultParamsReader, DefaultParamsWriter, MetaAlgorithmReadWrite, \
+    MLReadable, MLReader, MLWritable, MLWriter, JavaMLReader, JavaMLWriter
 from pyspark.ml.wrapper import JavaParams, JavaEstimator, JavaWrapper
 from pyspark.sql.functions import col, lit, rand, UserDefinedFunction
 from pyspark.sql.types import BooleanType
@@ -229,6 +231,7 @@ def _to_java_impl(self):
 
 
 class _ValidatorSharedReadWrite:
+
     @staticmethod
     def meta_estimator_transfer_param_maps_to_java(pyEstimator, pyParamMaps):
         pyStages = MetaAlgorithmReadWrite.getAllNestedStages(pyEstimator)
@@ -275,10 +278,8 @@ def meta_estimator_transfer_param_maps_from_java(pyEstimator, javaParamMaps):
                     raise ValueError('Resolve param in estimatorParamMaps failed: ' +
                                      javaParam.parent() + '.' + javaParam.name())
                 javaValue = javaPair.value()
-                if sc._jvm.Class.forName("org.apache.spark.ml.PipelineStage").isInstance(javaValue):
-                    # Note: JavaParams._from_java support both JavaEstimator/JavaTransformer class
-                    # and Estimator/Transformer class which implements `_from_java` static method
-                    # (such as OneVsRest, Pipeline class).
+                if sc._jvm.Class.forName("org.apache.spark.ml.util.DefaultParamsWritable") \
+                        .isInstance(javaValue):
                     pyValue = JavaParams._from_java(javaValue)
                 else:
                     pyValue = _java2py(sc, javaValue)
@@ -286,6 +287,222 @@ def meta_estimator_transfer_param_maps_from_java(pyEstimator, javaParamMaps):
             pyParamMaps.append(pyParamMap)
         return pyParamMaps
 
+    @staticmethod
+    def is_java_convertible(instance):
+        allNestedStages = MetaAlgorithmReadWrite.getAllNestedStages(instance.getEstimator())
+        evaluator_convertible = isinstance(instance.getEvaluator(), JavaParams)
+        estimator_convertible = all(map(lambda stage: hasattr(stage, '_to_java'), allNestedStages))
+        return estimator_convertible and evaluator_convertible
+
+    @staticmethod
+    def saveImpl(path, instance, sc, extraMetadata=None):
+        numParamsNotJson = 0
+        jsonEstimatorParamMaps = []
+        for paramMap in instance.getEstimatorParamMaps():
+            jsonParamMap = []
+            for p, v in paramMap.items():
+                jsonParam = {'parent': p.parent, 'name': p.name}
+                if (isinstance(v, Estimator) and not MetaAlgorithmReadWrite.isMetaEstimator(v)) \
+                        or isinstance(v, Transformer) or isinstance(v, Evaluator):
+                    relative_path = f'epm_{p.name}{numParamsNotJson}'
+                    param_path = os.path.join(path, relative_path)
+                    numParamsNotJson += 1
+                    v.save(param_path)
+                    jsonParam['value'] = relative_path
+                    jsonParam['isJson'] = False
+                elif isinstance(v, MLWritable):
+                    raise RuntimeError(
+                        "ValidatorSharedReadWrite.saveImpl does not handle parameters of type: "
+                        "MLWritable that are not Estimaor/Evaluator/Transformer, and if parameter "
+                        "is estimator, it cannot be meta estimator such as Validator or OneVsRest")
+                else:
+                    jsonParam['value'] = v
+                    jsonParam['isJson'] = True
+                jsonParamMap.append(jsonParam)
+            jsonEstimatorParamMaps.append(jsonParamMap)
+
+        skipParams = ['estimator', 'evaluator', 'estimatorParamMaps']
+        jsonParams = DefaultParamsWriter.extractJsonParams(instance, skipParams)
+        jsonParams['estimatorParamMaps'] = jsonEstimatorParamMaps
+
+        DefaultParamsWriter.saveMetadata(instance, path, sc, extraMetadata, jsonParams)
+        evaluatorPath = os.path.join(path, 'evaluator')
+        instance.getEvaluator().save(evaluatorPath)
+        estimatorPath = os.path.join(path, 'estimator')
+        instance.getEstimator().save(estimatorPath)
+
+    @staticmethod
+    def load(path, sc, metadata):
+        evaluatorPath = os.path.join(path, 'evaluator')
+        evaluator = DefaultParamsReader.loadParamsInstance(evaluatorPath, sc)
+        estimatorPath = os.path.join(path, 'estimator')
+        estimator = DefaultParamsReader.loadParamsInstance(estimatorPath, sc)
+
+        uidToParams = MetaAlgorithmReadWrite.getUidMap(estimator)
+        uidToParams[evaluator.uid] = evaluator
+
+        jsonEstimatorParamMaps = metadata['paramMap']['estimatorParamMaps']
+
+        estimatorParamMaps = []
+        for jsonParamMap in jsonEstimatorParamMaps:
+            paramMap = {}
+            for jsonParam in jsonParamMap:
+                est = uidToParams[jsonParam['parent']]
+                param = getattr(est, jsonParam['name'])
+                if 'isJson' not in jsonParam or ('isJson' in jsonParam and jsonParam['isJson']):
+                    value = jsonParam['value']
+                else:
+                    relativePath = jsonParam['value']
+                    valueSavedPath = os.path.join(path, relativePath)
+                    value = DefaultParamsReader.loadParamsInstance(valueSavedPath, sc)
+                paramMap[param] = value
+            estimatorParamMaps.append(paramMap)
+
+        return metadata, estimator, evaluator, estimatorParamMaps
+
+    @staticmethod
+    def validateParams(instance):
+        estiamtor = instance.getEstimator()
+        evaluator = instance.getEvaluator()
+        uidMap = MetaAlgorithmReadWrite.getUidMap(estiamtor)
+
+        for elem in [evaluator] + list(uidMap.values()):
+            if not isinstance(elem, MLWritable):
+                raise ValueError(f'Validator write will fail because it contains {elem.uid} '
+                                 f'which is not writable.')
+
+        estimatorParamMaps = instance.getEstimatorParamMaps()
+        paramErr = 'Validator save requires all Params in estimatorParamMaps to apply to ' \
+                   f'its Estimator, An extraneous Param was found: '
+        for paramMap in estimatorParamMaps:
+            for param in paramMap:
+                if param.parent not in uidMap:
+                    raise ValueError(paramErr + repr(param))
+
+    @staticmethod
+    def getValidatorModelWriterPersistSubModelsParam(writer):
+        if 'persistsubmodels' in writer.optionMap:
+            persistSubModelsParam = writer.optionMap['persistsubmodels'].lower()
+            if persistSubModelsParam == 'true':
+                return True
+            elif persistSubModelsParam == 'false':
+                return False
+            else:
+                raise ValueError(
+                    f'persistSubModels option value {persistSubModelsParam} is invalid, '
+                    f"the possible values are True, 'True' or False, 'False'")
+        else:
+            return writer.instance.subModels is not None
+
+
+_save_with_persist_submodels_no_submodels_found_err = \
+    'When persisting tuning models, you can only set persistSubModels to true if the tuning ' \
+    'was done with collectSubModels set to true. To save the sub-models, try rerunning fitting ' \
+    'with collectSubModels set to true.'
+
+
+@inherit_doc
+class CrossValidatorReader(MLReader):
+
+    def __init__(self, cls):
+        super(CrossValidatorReader, self).__init__()
+        self.cls = cls
+
+    def load(self, path):
+        metadata = DefaultParamsReader.loadMetadata(path, self.sc)
+        if not DefaultParamsReader.isPythonParamsInstance(metadata):
+            return JavaMLReader(self.cls).load(path)
+        else:
+            metadata, estimator, evaluator, estimatorParamMaps = \
+                _ValidatorSharedReadWrite.load(path, self.sc, metadata)
+            cv = CrossValidator(estimator=estimator,
+                                estimatorParamMaps=estimatorParamMaps,
+                                evaluator=evaluator)
+            cv = cv._resetUid(metadata['uid'])
+            DefaultParamsReader.getAndSetParams(cv, metadata, skipParams=['estimatorParamMaps'])
+            return cv
+
+
+@inherit_doc
+class CrossValidatorWriter(MLWriter):
+
+    def __init__(self, instance):
+        super(CrossValidatorWriter, self).__init__()
+        self.instance = instance
+
+    def saveImpl(self, path):
+        _ValidatorSharedReadWrite.validateParams(self.instance)
+        _ValidatorSharedReadWrite.saveImpl(path, self.instance, self.sc)
+
+
+@inherit_doc
+class CrossValidatorModelReader(MLReader):
+
+    def __init__(self, cls):
+        super(CrossValidatorModelReader, self).__init__()
+        self.cls = cls
+
+    def load(self, path):
+        metadata = DefaultParamsReader.loadMetadata(path, self.sc)
+        if not DefaultParamsReader.isPythonParamsInstance(metadata):
+            return JavaMLReader(self.cls).load(path)
+        else:
+            metadata, estimator, evaluator, estimatorParamMaps = \
+                _ValidatorSharedReadWrite.load(path, self.sc, metadata)
+            numFolds = metadata['paramMap']['numFolds']
+            bestModelPath = os.path.join(path, 'bestModel')
+            bestModel = DefaultParamsReader.loadParamsInstance(bestModelPath, self.sc)
+            avgMetrics = metadata['avgMetrics']
+            persistSubModels = ('persistSubModels' in metadata) and metadata['persistSubModels']
+
+            if persistSubModels:
+                subModels = [[None] * len(estimatorParamMaps)] * numFolds
+                for splitIndex in range(numFolds):
+                    for paramIndex in range(len(estimatorParamMaps)):
+                        modelPath = os.path.join(
+                            path, 'subModels', f'fold{splitIndex}', f'{paramIndex}')
+                        subModels[splitIndex][paramIndex] = \
+                            DefaultParamsReader.loadParamsInstance(modelPath, self.sc)
+            else:
+                subModels = None
+
+            cvModel = CrossValidatorModel(bestModel, avgMetrics=avgMetrics, subModels=subModels)
+            cvModel = cvModel._resetUid(metadata['uid'])
+            cvModel.set(cvModel.estimator, estimator)
+            cvModel.set(cvModel.estimatorParamMaps, estimatorParamMaps)
+            cvModel.set(cvModel.evaluator, evaluator)
+            DefaultParamsReader.getAndSetParams(
+                cvModel, metadata, skipParams=['estimatorParamMaps'])
+            return cvModel
+
+
+@inherit_doc
+class CrossValidatorModelWriter(MLWriter):
+
+    def __init__(self, instance):
+        super(CrossValidatorModelWriter, self).__init__()
+        self.instance = instance
+
+    def saveImpl(self, path):
+        _ValidatorSharedReadWrite.validateParams(self.instance)
+        instance = self.instance
+        persistSubModels = _ValidatorSharedReadWrite \
+            .getValidatorModelWriterPersistSubModelsParam(self)
+        extraMetadata = {'avgMetrics': instance.avgMetrics,
+                         'persistSubModels': persistSubModels}
+        _ValidatorSharedReadWrite.saveImpl(path, instance, self.sc, extraMetadata=extraMetadata)
+        bestModelPath = os.path.join(path, 'bestModel')
+        instance.bestModel.save(bestModelPath)
+        if persistSubModels:
+            if instance.subModels is None:
+                raise ValueError(_save_with_persist_submodels_no_submodels_found_err)
+            subModelsPath = os.path.join(path, 'subModels')
+            for splitIndex in range(instance.getNumFolds()):
+                splitPath = os.path.join(subModelsPath, f'fold{splitIndex}')
+                for paramIndex in range(len(instance.getEstimatorParamMaps())):
+                    modelPath = os.path.join(splitPath, f'{paramIndex}')
+                    instance.subModels[splitIndex][paramIndex].save(modelPath)
+
 
 class _CrossValidatorParams(_ValidatorParams):
     """
@@ -553,13 +770,15 @@ def copy(self, extra=None):
     @since("2.3.0")
     def write(self):
         """Returns an MLWriter instance for this ML instance."""
-        return JavaMLWriter(self)
+        if _ValidatorSharedReadWrite.is_java_convertible(self):
+            return JavaMLWriter(self)
+        return CrossValidatorWriter(self)
 
     @classmethod
     @since("2.3.0")
     def read(cls):
         """Returns an MLReader instance for this class."""
-        return JavaMLReader(cls)
+        return CrossValidatorReader(cls)
 
     @classmethod
     def _from_java(cls, java_stage):
@@ -662,13 +881,15 @@ def copy(self, extra=None):
     @since("2.3.0")
     def write(self):
         """Returns an MLWriter instance for this ML instance."""
-        return JavaMLWriter(self)
+        if _ValidatorSharedReadWrite.is_java_convertible(self):
+            return JavaMLWriter(self)
+        return CrossValidatorModelWriter(self)
 
     @classmethod
     @since("2.3.0")
     def read(cls):
         """Returns an MLReader instance for this class."""
-        return JavaMLReader(cls)
+        return CrossValidatorModelReader(cls)
 
     @classmethod
     def _from_java(cls, java_stage):
@@ -738,6 +959,106 @@ def _to_java(self):
         return _java_obj
 
 
+@inherit_doc
+class TrainValidationSplitReader(MLReader):
+
+    def __init__(self, cls):
+        super(TrainValidationSplitReader, self).__init__()
+        self.cls = cls
+
+    def load(self, path):
+        metadata = DefaultParamsReader.loadMetadata(path, self.sc)
+        if not DefaultParamsReader.isPythonParamsInstance(metadata):
+            return JavaMLReader(self.cls).load(path)
+        else:
+            metadata, estimator, evaluator, estimatorParamMaps = \
+                _ValidatorSharedReadWrite.load(path, self.sc, metadata)
+            tvs = TrainValidationSplit(estimator=estimator,
+                                       estimatorParamMaps=estimatorParamMaps,
+                                       evaluator=evaluator)
+            tvs = tvs._resetUid(metadata['uid'])
+            DefaultParamsReader.getAndSetParams(tvs, metadata, skipParams=['estimatorParamMaps'])
+            return tvs
+
+
+@inherit_doc
+class TrainValidationSplitWriter(MLWriter):
+
+    def __init__(self, instance):
+        super(TrainValidationSplitWriter, self).__init__()
+        self.instance = instance
+
+    def saveImpl(self, path):
+        _ValidatorSharedReadWrite.validateParams(self.instance)
+        _ValidatorSharedReadWrite.saveImpl(path, self.instance, self.sc)
+
+
+@inherit_doc
+class TrainValidationSplitModelReader(MLReader):
+
+    def __init__(self, cls):
+        super(TrainValidationSplitModelReader, self).__init__()
+        self.cls = cls
+
+    def load(self, path):
+        metadata = DefaultParamsReader.loadMetadata(path, self.sc)
+        if not DefaultParamsReader.isPythonParamsInstance(metadata):
+            return JavaMLReader(self.cls).load(path)
+        else:
+            metadata, estimator, evaluator, estimatorParamMaps = \
+                _ValidatorSharedReadWrite.load(path, self.sc, metadata)
+            bestModelPath = os.path.join(path, 'bestModel')
+            bestModel = DefaultParamsReader.loadParamsInstance(bestModelPath, self.sc)
+            validationMetrics = metadata['validationMetrics']
+            persistSubModels = ('persistSubModels' in metadata) and metadata['persistSubModels']
+
+            if persistSubModels:
+                subModels = [None] * len(estimatorParamMaps)
+                for paramIndex in range(len(estimatorParamMaps)):
+                    modelPath = os.path.join(path, 'subModels', f'{paramIndex}')
+                    subModels[paramIndex] = \
+                        DefaultParamsReader.loadParamsInstance(modelPath, self.sc)
+            else:
+                subModels = None
+
+            tvsModel = TrainValidationSplitModel(
+                bestModel, validationMetrics=validationMetrics, subModels=subModels)
+            tvsModel = tvsModel._resetUid(metadata['uid'])
+            tvsModel.set(tvsModel.estimator, estimator)
+            tvsModel.set(tvsModel.estimatorParamMaps, estimatorParamMaps)
+            tvsModel.set(tvsModel.evaluator, evaluator)
+            DefaultParamsReader.getAndSetParams(
+                tvsModel, metadata, skipParams=['estimatorParamMaps'])
+            return tvsModel
+
+
+@inherit_doc
+class TrainValidationSplitModelWriter(MLWriter):
+
+    def __init__(self, instance):
+        super(TrainValidationSplitModelWriter, self).__init__()
+        self.instance = instance
+
+    def saveImpl(self, path):
+        _ValidatorSharedReadWrite.validateParams(self.instance)
+        instance = self.instance
+        persistSubModels = _ValidatorSharedReadWrite \
+            .getValidatorModelWriterPersistSubModelsParam(self)
+
+        extraMetadata = {'validationMetrics': instance.validationMetrics,
+                         'persistSubModels': persistSubModels}
+        _ValidatorSharedReadWrite.saveImpl(path, instance, self.sc, extraMetadata=extraMetadata)
+        bestModelPath = os.path.join(path, 'bestModel')
+        instance.bestModel.save(bestModelPath)
+        if persistSubModels:
+            if instance.subModels is None:
+                raise ValueError(_save_with_persist_submodels_no_submodels_found_err)
+            subModelsPath = os.path.join(path, 'subModels')
+            for paramIndex in range(len(instance.getEstimatorParamMaps())):
+                modelPath = os.path.join(subModelsPath, f'{paramIndex}')
+                instance.subModels[paramIndex].save(modelPath)
+
+
 class _TrainValidationSplitParams(_ValidatorParams):
     """
     Params for :py:class:`TrainValidationSplit` and :py:class:`TrainValidationSplitModel`.
@@ -942,13 +1263,15 @@ def copy(self, extra=None):
     @since("2.3.0")
     def write(self):
         """Returns an MLWriter instance for this ML instance."""
-        return JavaMLWriter(self)
+        if _ValidatorSharedReadWrite.is_java_convertible(self):
+            return JavaMLWriter(self)
+        return TrainValidationSplitWriter(self)
 
     @classmethod
     @since("2.3.0")
     def read(cls):
         """Returns an MLReader instance for this class."""
-        return JavaMLReader(cls)
+        return TrainValidationSplitReader(cls)
 
     @classmethod
     def _from_java(cls, java_stage):
@@ -1046,13 +1369,15 @@ def copy(self, extra=None):
     @since("2.3.0")
     def write(self):
         """Returns an MLWriter instance for this ML instance."""
-        return JavaMLWriter(self)
+        if _ValidatorSharedReadWrite.is_java_convertible(self):
+            return JavaMLWriter(self)
+        return TrainValidationSplitModelWriter(self)
 
     @classmethod
     @since("2.3.0")
     def read(cls):
         """Returns an MLReader instance for this class."""
-        return JavaMLReader(cls)
+        return TrainValidationSplitModelReader(cls)
 
     @classmethod
     def _from_java(cls, java_stage):
diff --git a/python/pyspark/ml/tuning.pyi b/python/pyspark/ml/tuning.pyi
index 63cd75f0e1d74..e5f153d49e9c6 100644
--- a/python/pyspark/ml/tuning.pyi
+++ b/python/pyspark/ml/tuning.pyi
@@ -183,3 +183,43 @@ class TrainValidationSplitModel(
     def write(self) -> MLWriter: ...
     @classmethod
     def read(cls: Type[TrainValidationSplitModel]) -> MLReader: ...
+
+class CrossValidatorWriter(MLWriter):
+    instance: CrossValidator
+    def __init__(self, instance: CrossValidator) -> None: ...
+    def saveImpl(self, path: str) -> None: ...
+
+class CrossValidatorReader(MLReader[CrossValidator]):
+    cls: Type[CrossValidator]
+    def __init__(self, cls: Type[CrossValidator]) -> None: ...
+    def load(self, path: str) -> CrossValidator: ...
+
+class CrossValidatorModelWriter(MLWriter):
+    instance: CrossValidatorModel
+    def __init__(self, instance: CrossValidatorModel) -> None: ...
+    def saveImpl(self, path: str) -> None: ...
+
+class CrossValidatorModelReader(MLReader[CrossValidatorModel]):
+    cls: Type[CrossValidatorModel]
+    def __init__(self, cls: Type[CrossValidatorModel]) -> None: ...
+    def load(self, path: str) -> CrossValidatorModel: ...
+
+class TrainValidationSplitWriter(MLWriter):
+    instance: TrainValidationSplit
+    def __init__(self, instance: TrainValidationSplit) -> None: ...
+    def saveImpl(self, path: str) -> None: ...
+
+class TrainValidationSplitReader(MLReader[TrainValidationSplit]):
+    cls: Type[TrainValidationSplit]
+    def __init__(self, cls: Type[TrainValidationSplit]) -> None: ...
+    def load(self, path: str) -> TrainValidationSplit: ...
+
+class TrainValidationSplitModelWriter(MLWriter):
+    instance: TrainValidationSplitModel
+    def __init__(self, instance: TrainValidationSplitModel) -> None: ...
+    def saveImpl(self, path: str) -> None: ...
+
+class TrainValidationSplitModelReader(MLReader[TrainValidationSplitModel]):
+    cls: Type[TrainValidationSplitModel]
+    def __init__(self, cls: Type[TrainValidationSplitModel]) -> None: ...
+    def load(self, path: str) -> TrainValidationSplitModel: ...
diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index a34bfb53482a0..156e7f0fe65e6 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -106,6 +106,7 @@ class MLWriter(BaseReadWrite):
     def __init__(self):
         super(MLWriter, self).__init__()
         self.shouldOverwrite = False
+        self.optionMap = {}
 
     def _handleOverwrite(self, path):
         from pyspark.ml.wrapper import JavaWrapper
@@ -132,6 +133,14 @@ def overwrite(self):
         self.shouldOverwrite = True
         return self
 
+    def option(self, key, value):
+        """
+        Adds an option to the underlying MLWriter. See the documentation for the specific model's
+        writer for possible options. The option name (key) is case-insensitive.
+        """
+        self.optionMap[key.lower()] = str(value)
+        return self
+
 
 @inherit_doc
 class GeneralMLWriter(MLWriter):
@@ -375,6 +384,13 @@ def __init__(self, instance):
     def saveImpl(self, path):
         DefaultParamsWriter.saveMetadata(self.instance, path, self.sc)
 
+    @staticmethod
+    def extractJsonParams(instance, skipParams):
+        paramMap = instance.extractParamMap()
+        jsonParams = {param.name: value for param, value in paramMap.items()
+                      if param.name not in skipParams}
+        return jsonParams
+
     @staticmethod
     def saveMetadata(instance, path, sc, extraMetadata=None, paramMap=None):
         """
@@ -530,15 +546,16 @@ def _parseMetaData(metadataStr, expectedClassName=""):
         return metadata
 
     @staticmethod
-    def getAndSetParams(instance, metadata):
+    def getAndSetParams(instance, metadata, skipParams=None):
         """
         Extract Params from metadata, and set them in the instance.
         """
         # Set user-supplied param values
         for paramName in metadata['paramMap']:
             param = instance.getParam(paramName)
-            paramValue = metadata['paramMap'][paramName]
-            instance.set(param, paramValue)
+            if skipParams is None or paramName not in skipParams:
+                paramValue = metadata['paramMap'][paramName]
+                instance.set(param, paramValue)
 
         # Set default param values
         majorAndMinorVersions = VersionUtils.majorMinorVersion(metadata['sparkVersion'])
@@ -554,6 +571,10 @@ def getAndSetParams(instance, metadata):
                 paramValue = metadata['defaultParamMap'][paramName]
                 instance._setDefault(**{paramName: paramValue})
 
+    @staticmethod
+    def isPythonParamsInstance(metadata):
+        return metadata['class'].startswith('pyspark.ml.')
+
     @staticmethod
     def loadParamsInstance(path, sc):
         """
@@ -561,7 +582,10 @@ def loadParamsInstance(path, sc):
         This assumes the instance inherits from :py:class:`MLReadable`.
         """
         metadata = DefaultParamsReader.loadMetadata(path, sc)
-        pythonClassName = metadata['class'].replace("org.apache.spark", "pyspark")
+        if DefaultParamsReader.isPythonParamsInstance(metadata):
+            pythonClassName = metadata['class']
+        else:
+            pythonClassName = metadata['class'].replace("org.apache.spark", "pyspark")
         py_type = DefaultParamsReader.__get_class(pythonClassName)
         instance = py_type.load(path)
         return instance
@@ -630,3 +654,13 @@ def getAllNestedStages(pyInstance):
             nestedStages.extend(MetaAlgorithmReadWrite.getAllNestedStages(pySubStage))
 
         return [pyInstance] + nestedStages
+
+    @staticmethod
+    def getUidMap(instance):
+        nestedStages = MetaAlgorithmReadWrite.getAllNestedStages(instance)
+        uidMap = {stage.uid: stage for stage in nestedStages}
+        if len(nestedStages) != len(uidMap):
+            raise RuntimeError(f'{instance.__class__.__module__}.{instance.__class__.__name__}'
+                               f'.load found a compound estimator with stages with duplicate '
+                               f'UIDs. List of UIDs: {list(uidMap.keys())}.')
+        return uidMap
diff --git a/python/pyspark/ml/util.pyi b/python/pyspark/ml/util.pyi
index e2496e181f14f..db28c095a5568 100644
--- a/python/pyspark/ml/util.pyi
+++ b/python/pyspark/ml/util.pyi
@@ -132,3 +132,5 @@ class MetaAlgorithmReadWrite:
     def isMetaEstimator(pyInstance: Any) -> bool: ...
     @staticmethod
     def getAllNestedStages(pyInstance: Any) -> list: ...
+    @staticmethod
+    def getUidMap(instance: Any) -> dict: ...
diff --git a/python/pyspark/testing/mlutils.py b/python/pyspark/testing/mlutils.py
index a90a64e747dea..d6edf9d64af49 100644
--- a/python/pyspark/testing/mlutils.py
+++ b/python/pyspark/testing/mlutils.py
@@ -17,8 +17,12 @@
 
 import numpy as np
 
+from pyspark import keyword_only
 from pyspark.ml import Estimator, Model, Transformer, UnaryTransformer
+from pyspark.ml.evaluation import Evaluator
 from pyspark.ml.param import Param, Params, TypeConverters
+from pyspark.ml.param.shared import HasMaxIter, HasRegParam
+from pyspark.ml.classification import Classifier, ClassificationModel
 from pyspark.ml.util import DefaultParamsReadable, DefaultParamsWritable
 from pyspark.ml.wrapper import _java2py  # type: ignore
 from pyspark.sql import DataFrame, SparkSession
@@ -161,3 +165,86 @@ def _fit(self, dataset):
 
 class MockModel(MockTransformer, Model, HasFake):
     pass
+
+
+class _DummyLogisticRegressionParams(HasMaxIter, HasRegParam):
+    def setMaxIter(self, value):
+        return self._set(maxIter=value)
+
+    def setRegParam(self, value):
+        return self._set(regParam=value)
+
+
+# This is a dummy LogisticRegression used in test for python backend estimator/model
+class DummyLogisticRegression(Classifier, _DummyLogisticRegressionParams,
+                              DefaultParamsReadable, DefaultParamsWritable):
+    @keyword_only
+    def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
+                 maxIter=100, regParam=0.0, rawPredictionCol="rawPrediction"):
+        super(DummyLogisticRegression, self).__init__()
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @keyword_only
+    def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
+                  maxIter=100, regParam=0.0, rawPredictionCol="rawPrediction"):
+        kwargs = self._input_kwargs
+        self._set(**kwargs)
+        return self
+
+    def _fit(self, dataset):
+        # Do nothing but create a dummy model
+        return self._copyValues(DummyLogisticRegressionModel())
+
+
+class DummyLogisticRegressionModel(ClassificationModel, _DummyLogisticRegressionParams,
+                                   DefaultParamsReadable, DefaultParamsWritable):
+
+    def __init__(self):
+        super(DummyLogisticRegressionModel, self).__init__()
+
+    def _transform(self, dataset):
+        # A dummy transform impl which always predict label 1
+        from pyspark.sql.functions import array, lit
+        from pyspark.ml.functions import array_to_vector
+        rawPredCol = self.getRawPredictionCol()
+        if rawPredCol:
+            dataset = dataset.withColumn(
+                rawPredCol, array_to_vector(array(lit(-100.0), lit(100.0))))
+        predCol = self.getPredictionCol()
+        if predCol:
+            dataset = dataset.withColumn(predCol, lit(1.0))
+
+        return dataset
+
+    @property
+    def numClasses(self):
+        # a dummy implementation for test.
+        return 2
+
+    @property
+    def intercept(self):
+        # a dummy implementation for test.
+        return 0.0
+
+    # This class only used in test. The following methods/properties are not used in tests.
+
+    @property
+    def coefficients(self):
+        raise NotImplementedError()
+
+    def predictRaw(self, value):
+        raise NotImplementedError()
+
+    def numFeatures(self):
+        raise NotImplementedError()
+
+    def predict(self, value):
+        raise NotImplementedError()
+
+
+class DummyEvaluator(Evaluator, DefaultParamsReadable, DefaultParamsWritable):
+
+    def _evaluate(self, dataset):
+        # a dummy implementation for test.
+        return 1.0

From 85949588b71ed548a2e10d2e58183d9cce313a48 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Thu, 3 Dec 2020 16:43:15 -0800
Subject: [PATCH 0656/1009] [SPARK-33650][SQL] Fix the error from ALTER TABLE
 .. ADD/DROP PARTITION for non-supported partition management table

### What changes were proposed in this pull request?
In the PR, I propose to change the order of post-analysis checks for the `ALTER TABLE .. ADD/DROP PARTITION` command, and perform the general check (does the table support partition management at all) before specific checks.

### Why are the changes needed?
The error message for the table which doesn't support partition management can mislead users:
```java
PartitionSpecs are not resolved;;
'AlterTableAddPartition [UnresolvedPartitionSpec(Map(id -> 1),None)], false
+- ResolvedTable org.apache.spark.sql.connector.InMemoryTableCatalog2fd64b11, ns1.ns2.tbl, org.apache.spark.sql.connector.InMemoryTable5d3ff859
```
because it says nothing about the root cause of the issue.

### Does this PR introduce _any_ user-facing change?
Yes. After the change, the error message will be:
```
Table ns1.ns2.tbl can not alter partitions
```

### How was this patch tested?
By running the affected test suite `AlterTablePartitionV2SQLSuite`.

Closes #30594 from MaxGekk/check-order-AlterTablePartition.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../sql/catalyst/analysis/CheckAnalysis.scala    |  6 +++---
 .../AlterTablePartitionV2SQLSuite.scala          | 16 ++++++++++++++++
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 61ac6346ff944..64496a953861a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -996,12 +996,12 @@ trait CheckAnalysis extends PredicateHelper {
   private def checkAlterTablePartition(
       table: Table, parts: Seq[PartitionSpec]): Unit = {
     (table, parts) match {
-      case (_, parts) if parts.exists(_.isInstanceOf[UnresolvedPartitionSpec]) =>
-        failAnalysis("PartitionSpecs are not resolved")
-
       case (table, _) if !table.isInstanceOf[SupportsPartitionManagement] =>
         failAnalysis(s"Table ${table.name()} can not alter partitions.")
 
+      case (_, parts) if parts.exists(_.isInstanceOf[UnresolvedPartitionSpec]) =>
+        failAnalysis("PartitionSpecs are not resolved")
+
       // Skip atomic partition tables
       case (_: SupportsAtomicPartitionManagement, _) =>
       case (_: SupportsPartitionManagement, parts) if parts.size > 1 =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
index 3583eceec7559..47b5e5e54edde 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
@@ -245,4 +245,20 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase {
       assert(!partTable.partitionExists(expectedPartition))
     }
   }
+
+  test("SPARK-33650: add/drop partition into a table which doesn't support partition management") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING _")
+      Seq(
+        s"ALTER TABLE $t ADD PARTITION (id=1)",
+        s"ALTER TABLE $t DROP PARTITION (id=1)"
+      ).foreach { alterTable =>
+        val errMsg = intercept[AnalysisException] {
+          spark.sql(alterTable)
+        }.getMessage
+        assert(errMsg.contains(s"Table $t can not alter partitions"))
+      }
+    }
+  }
 }

From 29e415deac3c90936dd1466eab6b001b7f1f4959 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Fri, 4 Dec 2020 10:58:41 +0800
Subject: [PATCH 0657/1009] [SPARK-33649][SQL][DOC] Improve the doc of
 spark.sql.ansi.enabled

### What changes were proposed in this pull request?

Improve the documentation of SQL configuration `spark.sql.ansi.enabled`

### Why are the changes needed?

As there are more and more new features under the SQL configuration `spark.sql.ansi.enabled`, we should make it more clear about:
1. what exactly it is
2. where can users find all the features of the ANSI mode
3. whether all the features are exactly from the SQL standard

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

It's just doc change.

Closes #30593 from gengliangwang/reviseAnsiDoc.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Gengliang Wang <gengliang.wang@databricks.com>
---
 docs/sql-ref-ansi-compliance.md                       |  3 ++-
 .../scala/org/apache/spark/sql/internal/SQLConf.scala | 11 ++++++-----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index 4e19799ca75b9..c13ea2b167d93 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -21,7 +21,8 @@ license: |
 
 Since Spark 3.0, Spark SQL introduces two experimental options to comply with the SQL standard: `spark.sql.ansi.enabled` and `spark.sql.storeAssignmentPolicy` (See a table below for details).
 
-When `spark.sql.ansi.enabled` is set to `true`, Spark SQL follows the standard in basic behaviours (e.g., arithmetic operations, type conversion, SQL functions and SQL parsing).
+When `spark.sql.ansi.enabled` is set to `true`, Spark SQL uses an ANSI compliant dialect instead of being Hive compliant. For example, Spark will throw an exception at runtime instead of returning null results if the inputs to a SQL operator/function are invalid. Some ANSI dialect features may be not from the ANSI SQL standard directly, but their behaviors align with ANSI SQL's style.
+
 Moreover, Spark SQL has an independent option to control implicit casting behaviours when inserting rows in a table.
 The casting behaviours are defined as store assignment rules in the standard.
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index b32476a5af71a..07cd41b06de21 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2209,11 +2209,12 @@ object SQLConf {
       .createWithDefault(StoreAssignmentPolicy.ANSI.toString)
 
   val ANSI_ENABLED = buildConf("spark.sql.ansi.enabled")
-    .doc("When true, Spark tries to conform to the ANSI SQL specification: 1. Spark will " +
-      "throw an exception at runtime if the inputs to a SQL operator/function are invalid, " +
-      "e.g. overflow in arithmetic operations, out-of-range index when accessing array elements. " +
-      "2. Spark will forbid using the reserved keywords of ANSI SQL as identifiers in " +
-      "the SQL parser. 3. Spark will return NULL for null input for function `size`.")
+    .doc("When true, Spark SQL uses an ANSI compliant dialect instead of being Hive compliant. " +
+      "For example, Spark will throw an exception at runtime instead of returning null results " +
+      "when the inputs to a SQL operator/function are invalid." +
+      "For full details of this dialect, you can find them in the section \"ANSI Compliance\" of " +
+      "Spark's documentation. Some ANSI dialect features may be not from the ANSI SQL " +
+      "standard directly, but their behaviors align with ANSI SQL's style")
     .version("3.0.0")
     .booleanConf
     .createWithDefault(false)

From e22ddb6740e73a5d1b4ef1ddd21e4241bf85f03c Mon Sep 17 00:00:00 2001
From: Huaxin Gao <huaxing@us.ibm.com>
Date: Fri, 4 Dec 2020 05:43:05 +0000
Subject: [PATCH 0658/1009] [SPARK-32405][SQL][FOLLOWUP] Remove USING _ in
 CREATE TABLE in JDBCTableCatalog docker tests

### What changes were proposed in this pull request?
remove USING _ in CREATE TABLE in JDBCTableCatalog docker tests

### Why are the changes needed?
Previously CREATE TABLE syntax forces users to specify a provider so we have to add a USING _ . Now the problem was fix and we need to remove it.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing tests

Closes #30599 from huaxingao/remove_USING.

Authored-by: Huaxin Gao <huaxing@us.ibm.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/jdbc/v2/DB2IntegrationSuite.scala      |  4 ++--
 .../sql/jdbc/v2/MsSqlServerIntegrationSuite.scala    |  4 ++--
 .../spark/sql/jdbc/v2/MySQLIntegrationSuite.scala    |  6 +++---
 .../spark/sql/jdbc/v2/OracleIntegrationSuite.scala   |  2 +-
 .../spark/sql/jdbc/v2/PostgresIntegrationSuite.scala |  4 ++--
 .../org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala    | 12 ++++++------
 6 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
index 4b6461815d306..6f803b8f61dd4 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
@@ -59,7 +59,7 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest {
   override def dataPreparation(conn: Connection): Unit = {}
 
   override def testUpdateColumnType(tbl: String): Unit = {
-    sql(s"CREATE TABLE $tbl (ID INTEGER) USING _")
+    sql(s"CREATE TABLE $tbl (ID INTEGER)")
     var t = spark.table(tbl)
     var expectedSchema = new StructType().add("ID", IntegerType)
     assert(t.schema === expectedSchema)
@@ -75,7 +75,7 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest {
   }
 
   override def testCreateTableWithProperty(tbl: String): Unit = {
-    sql(s"CREATE TABLE $tbl (ID INT) USING _" +
+    sql(s"CREATE TABLE $tbl (ID INT)" +
       s" TBLPROPERTIES('CCSID'='UNICODE')")
     var t = spark.table(tbl)
     var expectedSchema = new StructType().add("ID", IntegerType)
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
index fd101607ad3ee..a7e257dbdc554 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
@@ -65,7 +65,7 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBC
   override def notSupportsTableComment: Boolean = true
 
   override def testUpdateColumnType(tbl: String): Unit = {
-    sql(s"CREATE TABLE $tbl (ID INTEGER) USING _")
+    sql(s"CREATE TABLE $tbl (ID INTEGER)")
     var t = spark.table(tbl)
     var expectedSchema = new StructType().add("ID", IntegerType)
     assert(t.schema === expectedSchema)
@@ -81,7 +81,7 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBC
   }
 
   override def testUpdateColumnNullability(tbl: String): Unit = {
-    sql(s"CREATE TABLE $tbl (ID STRING NOT NULL) USING _")
+    sql(s"CREATE TABLE $tbl (ID STRING NOT NULL)")
     // Update nullability is unsupported for mssql db.
     val msg = intercept[AnalysisException] {
       sql(s"ALTER TABLE $tbl ALTER COLUMN ID DROP NOT NULL")
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
index a81399fc2a4f7..5f63fde7a0f58 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
@@ -67,7 +67,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest {
   }
 
   override def testUpdateColumnType(tbl: String): Unit = {
-    sql(s"CREATE TABLE $tbl (ID INTEGER) USING _")
+    sql(s"CREATE TABLE $tbl (ID INTEGER)")
     var t = spark.table(tbl)
     var expectedSchema = new StructType().add("ID", IntegerType)
     assert(t.schema === expectedSchema)
@@ -98,7 +98,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest {
   }
 
   override def testUpdateColumnNullability(tbl: String): Unit = {
-    sql(s"CREATE TABLE $tbl (ID STRING NOT NULL) USING _")
+    sql(s"CREATE TABLE $tbl (ID STRING NOT NULL)")
     // Update nullability is unsupported for mysql db.
     val msg = intercept[AnalysisException] {
       sql(s"ALTER TABLE $tbl ALTER COLUMN ID DROP NOT NULL")
@@ -108,7 +108,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest {
   }
 
   override def testCreateTableWithProperty(tbl: String): Unit = {
-    sql(s"CREATE TABLE $tbl (ID INT) USING _" +
+    sql(s"CREATE TABLE $tbl (ID INT)" +
       s" TBLPROPERTIES('ENGINE'='InnoDB', 'DEFAULT CHARACTER SET'='utf8')")
     var t = spark.table(tbl)
     var expectedSchema = new StructType().add("ID", IntegerType)
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
index 403f16aac6356..241c9c1409550 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
@@ -73,7 +73,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest
   override def dataPreparation(conn: Connection): Unit = {}
 
   override def testUpdateColumnType(tbl: String): Unit = {
-    sql(s"CREATE TABLE $tbl (ID INTEGER) USING _")
+    sql(s"CREATE TABLE $tbl (ID INTEGER)")
     var t = spark.table(tbl)
     var expectedSchema = new StructType().add("ID", DecimalType(10, 0))
     assert(t.schema === expectedSchema)
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
index df2c865e4d13b..a7fd9aa9a9868 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
@@ -52,7 +52,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTes
   override def dataPreparation(conn: Connection): Unit = {}
 
   override def testUpdateColumnType(tbl: String): Unit = {
-    sql(s"CREATE TABLE $tbl (ID INTEGER) USING _")
+    sql(s"CREATE TABLE $tbl (ID INTEGER)")
     var t = spark.table(tbl)
     var expectedSchema = new StructType().add("ID", IntegerType)
     assert(t.schema === expectedSchema)
@@ -68,7 +68,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTes
   }
 
   override def testCreateTableWithProperty(tbl: String): Unit = {
-    sql(s"CREATE TABLE $tbl (ID INT) USING _" +
+    sql(s"CREATE TABLE $tbl (ID INT)" +
       s" TBLPROPERTIES('TABLESPACE'='pg_default')")
     var t = spark.table(tbl)
     var expectedSchema = new StructType().add("ID", IntegerType)
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
index e36555e514c9f..a2dd8375834bf 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
@@ -33,7 +33,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession {
   def notSupportsTableComment: Boolean = false
 
   def testUpdateColumnNullability(tbl: String): Unit = {
-    sql(s"CREATE TABLE $catalogName.alt_table (ID STRING NOT NULL) USING _")
+    sql(s"CREATE TABLE $catalogName.alt_table (ID STRING NOT NULL)")
     var t = spark.table(s"$catalogName.alt_table")
     // nullable is true in the expectedSchema because Spark always sets nullable to true
     // regardless of the JDBC metadata https://github.com/apache/spark/pull/18445
@@ -62,7 +62,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession {
 
   test("SPARK-33034: ALTER TABLE ... add new columns") {
     withTable(s"$catalogName.alt_table") {
-      sql(s"CREATE TABLE $catalogName.alt_table (ID STRING) USING _")
+      sql(s"CREATE TABLE $catalogName.alt_table (ID STRING)")
       var t = spark.table(s"$catalogName.alt_table")
       var expectedSchema = new StructType().add("ID", StringType)
       assert(t.schema === expectedSchema)
@@ -89,7 +89,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession {
 
   test("SPARK-33034: ALTER TABLE ... drop column") {
     withTable(s"$catalogName.alt_table") {
-      sql(s"CREATE TABLE $catalogName.alt_table (C1 INTEGER, C2 STRING, c3 INTEGER) USING _")
+      sql(s"CREATE TABLE $catalogName.alt_table (C1 INTEGER, C2 STRING, c3 INTEGER)")
       sql(s"ALTER TABLE $catalogName.alt_table DROP COLUMN C1")
       sql(s"ALTER TABLE $catalogName.alt_table DROP COLUMN c3")
       val t = spark.table(s"$catalogName.alt_table")
@@ -127,7 +127,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession {
   test("SPARK-33034: ALTER TABLE ... rename column") {
     withTable(s"$catalogName.alt_table") {
       sql(s"CREATE TABLE $catalogName.alt_table (ID STRING NOT NULL," +
-        s" ID1 STRING NOT NULL, ID2 STRING NOT NULL) USING _")
+        s" ID1 STRING NOT NULL, ID2 STRING NOT NULL)")
       testRenameColumn(s"$catalogName.alt_table")
       // Rename to already existing column
       val msg = intercept[AnalysisException] {
@@ -157,7 +157,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession {
     withTable(s"$catalogName.new_table") {
       val logAppender = new LogAppender("table comment")
       withLogAppender(logAppender) {
-        sql(s"CREATE TABLE $catalogName.new_table(i INT) USING _ COMMENT 'this is a comment'")
+        sql(s"CREATE TABLE $catalogName.new_table(i INT) COMMENT 'this is a comment'")
       }
       val createCommentWarning = logAppender.loggingEvents
         .filter(_.getLevel == Level.WARN)
@@ -170,7 +170,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession {
   test("CREATE TABLE with table property") {
     withTable(s"$catalogName.new_table") {
       val m = intercept[AnalysisException] {
-        sql(s"CREATE TABLE $catalogName.new_table (i INT) USING _ TBLPROPERTIES('a'='1')")
+        sql(s"CREATE TABLE $catalogName.new_table (i INT) TBLPROPERTIES('a'='1')")
       }.message
       assert(m.contains("Failed table creation"))
       testCreateTableWithProperty(s"$catalogName.new_table")

From e02324f2dda3510dd229199e97c87ffdcc766a18 Mon Sep 17 00:00:00 2001
From: Linhong Liu <linhong.liu@databricks.com>
Date: Fri, 4 Dec 2020 06:48:49 +0000
Subject: [PATCH 0659/1009] [SPARK-33142][SPARK-33647][SQL] Store SQL text for
 SQL temp view

### What changes were proposed in this pull request?
Currently, in spark, the temp view is saved as its analyzed logical plan, while the permanent view
is kept in HMS with its origin SQL text. As a result, permanent and temporary views have
different behaviors in some cases. In this PR we store the SQL text for temporary view in order
to unify the behavior between permanent and temporary views.

### Why are the changes needed?
to unify the behavior between permanent and temporary views

### Does this PR introduce _any_ user-facing change?
Yes, with this PR, the temporary view will be re-analyzed when it's referred. So if the
underlying datasource changed, the view will also be updated.

### How was this patch tested?
existing and newly added test cases

Closes #30567 from linhongliu-db/SPARK-33142.

Authored-by: Linhong Liu <linhong.liu@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  43 ++--
 .../sql/catalyst/analysis/CheckAnalysis.scala |   2 +-
 .../spark/sql/catalyst/analysis/view.scala    |   4 +-
 .../sql/catalyst/catalog/SessionCatalog.scala |  57 +++--
 .../sql/catalyst/catalog/interface.scala      |  48 ++++
 .../plans/logical/basicLogicalOperators.scala |  42 ++-
 .../apache/spark/sql/internal/SQLConf.scala   |  11 +
 .../sql/catalyst/analysis/AnalysisSuite.scala |   1 +
 .../catalog/SessionCatalogSuite.scala         |   4 +-
 .../command/AnalyzeColumnCommand.scala        |   5 +-
 .../spark/sql/execution/command/views.scala   | 239 ++++++++++++++----
 .../sql-tests/results/describe.sql.out        |   4 +-
 .../sql-tests/results/group-by-filter.sql.out |  56 ++--
 .../results/postgreSQL/create_view.sql.out    |  28 +-
 .../results/show-tblproperties.sql.out        |   2 +
 .../invalid-correlation.sql.out               |   7 +-
 .../apache/spark/sql/CachedTableSuite.scala   |  22 --
 .../spark/sql/execution/SQLViewSuite.scala    |  84 ++++++
 .../sql/execution/SQLViewTestSuite.scala      | 203 +++++++++++++++
 .../SparkGetColumnsOperation.scala            |   2 +-
 20 files changed, 691 insertions(+), 173 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 6b06cf13262d4..ebe1004872ef6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -105,7 +105,8 @@ object FakeV2SessionCatalog extends TableCatalog {
 case class AnalysisContext(
     catalogAndNamespace: Seq[String] = Nil,
     nestedViewDepth: Int = 0,
-    relationCache: mutable.Map[Seq[String], LogicalPlan] = mutable.Map.empty)
+    relationCache: mutable.Map[Seq[String], LogicalPlan] = mutable.Map.empty,
+    referredTempViewNames: Seq[Seq[String]] = Seq.empty)
 
 object AnalysisContext {
   private val value = new ThreadLocal[AnalysisContext]() {
@@ -117,10 +118,14 @@ object AnalysisContext {
 
   private def set(context: AnalysisContext): Unit = value.set(context)
 
-  def withAnalysisContext[A](catalogAndNamespace: Seq[String])(f: => A): A = {
+  def withAnalysisContext[A](
+      catalogAndNamespace: Seq[String], referredTempViewNames: Seq[Seq[String]])(f: => A): A = {
     val originContext = value.get()
     val context = AnalysisContext(
-      catalogAndNamespace, originContext.nestedViewDepth + 1, originContext.relationCache)
+      catalogAndNamespace,
+      originContext.nestedViewDepth + 1,
+      originContext.relationCache,
+      referredTempViewNames)
     set(context)
     try f finally { set(originContext) }
   }
@@ -838,6 +843,7 @@ class Analyzer(override val catalogManager: CatalogManager)
   }
 
   private def isResolvingView: Boolean = AnalysisContext.get.catalogAndNamespace.nonEmpty
+  private def referredTempViewNames: Seq[Seq[String]] = AnalysisContext.get.referredTempViewNames
 
   /**
    * Resolve relations to temp views. This is not an actual rule, and is called by
@@ -882,7 +888,7 @@ class Analyzer(override val catalogManager: CatalogManager)
     def lookupTempView(
         identifier: Seq[String], isStreaming: Boolean = false): Option[LogicalPlan] = {
       // Permanent View can't refer to temp views, no need to lookup at all.
-      if (isResolvingView) return None
+      if (isResolvingView && !referredTempViewNames.contains(identifier)) return None
 
       val tmpView = identifier match {
         case Seq(part1) => v1SessionCatalog.lookupTempView(part1)
@@ -894,14 +900,14 @@ class Analyzer(override val catalogManager: CatalogManager)
         throw new AnalysisException(s"${identifier.quoted} is not a temp view of streaming " +
           s"logical plan, please use batch API such as `DataFrameReader.table` to read it.")
       }
-      tmpView
+      tmpView.map(ResolveRelations.resolveViews)
     }
   }
 
   // If we are resolving relations insides views, we need to expand single-part relation names with
   // the current catalog and namespace of when the view was created.
   private def expandRelationName(nameParts: Seq[String]): Seq[String] = {
-    if (!isResolvingView) return nameParts
+    if (!isResolvingView || referredTempViewNames.contains(nameParts)) return nameParts
 
     if (nameParts.length == 1) {
       AnalysisContext.get.catalogAndNamespace :+ nameParts.head
@@ -1022,23 +1028,24 @@ class Analyzer(override val catalogManager: CatalogManager)
     // look at `AnalysisContext.catalogAndNamespace` when resolving relations with single-part name.
     // If `AnalysisContext.catalogAndNamespace` is non-empty, analyzer will expand single-part names
     // with it, instead of current catalog and namespace.
-    private def resolveViews(plan: LogicalPlan): LogicalPlan = plan match {
+    def resolveViews(plan: LogicalPlan): LogicalPlan = plan match {
       // The view's child should be a logical plan parsed from the `desc.viewText`, the variable
       // `viewText` should be defined, or else we throw an error on the generation of the View
       // operator.
-      case view @ View(desc, _, child) if !child.resolved =>
+      case view @ View(desc, isTempView, _, child) if !child.resolved =>
         // Resolve all the UnresolvedRelations and Views in the child.
-        val newChild = AnalysisContext.withAnalysisContext(desc.viewCatalogAndNamespace) {
-          if (AnalysisContext.get.nestedViewDepth > conf.maxNestedViewDepth) {
-            view.failAnalysis(s"The depth of view ${desc.identifier} exceeds the maximum " +
-              s"view resolution depth (${conf.maxNestedViewDepth}). Analysis is aborted to " +
-              s"avoid errors. Increase the value of ${SQLConf.MAX_NESTED_VIEW_DEPTH.key} to work " +
-              "around this.")
-          }
-          SQLConf.withExistingConf(View.effectiveSQLConf(desc.viewSQLConfigs)) {
-            executeSameContext(child)
+        val newChild = AnalysisContext.withAnalysisContext(
+          desc.viewCatalogAndNamespace, desc.viewReferredTempViewNames) {
+            if (AnalysisContext.get.nestedViewDepth > conf.maxNestedViewDepth) {
+              view.failAnalysis(s"The depth of view ${desc.identifier} exceeds the maximum " +
+                s"view resolution depth (${conf.maxNestedViewDepth}). Analysis is aborted to " +
+                s"avoid errors. Increase the value of ${SQLConf.MAX_NESTED_VIEW_DEPTH.key} to " +
+                "work around this.")
+            }
+            SQLConf.withExistingConf(View.effectiveSQLConf(desc.viewSQLConfigs, isTempView)) {
+              executeSameContext(child)
+            }
           }
-        }
         view.copy(child = newChild)
       case p @ SubqueryAlias(_, view: View) =>
         p.copy(child = resolveViews(view))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 64496a953861a..11c4883992560 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -407,7 +407,7 @@ trait CheckAnalysis extends PredicateHelper {
           // output, nor with the query column names, throw an AnalysisException.
           // If the view's child output can't up cast to the view output,
           // throw an AnalysisException, too.
-          case v @ View(desc, output, child) if child.resolved && !v.sameOutput(child) =>
+          case v @ View(desc, _, output, child) if child.resolved && !v.sameOutput(child) =>
             val queryColumnNames = desc.viewQueryColumnNames
             val queryOutput = if (queryColumnNames.nonEmpty) {
               if (output.length != queryColumnNames.length) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala
index 06de023098a1c..dfadf0a539948 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala
@@ -56,7 +56,7 @@ object EliminateView extends Rule[LogicalPlan] with CastSupport {
   override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
     // The child has the different output attributes with the View operator. Adds a Project over
     // the child of the view.
-    case v @ View(desc, output, child) if child.resolved && !v.sameOutput(child) =>
+    case v @ View(desc, _, output, child) if child.resolved && !v.sameOutput(child) =>
       val resolver = conf.resolver
       val queryColumnNames = desc.viewQueryColumnNames
       val queryOutput = if (queryColumnNames.nonEmpty) {
@@ -83,7 +83,7 @@ object EliminateView extends Rule[LogicalPlan] with CastSupport {
 
     // The child should have the same output attributes with the View operator, so we simply
     // remove the View operator.
-    case View(_, _, child) =>
+    case View(_, _, _, child) =>
       child
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 01bce079610ae..29481b85e9f2e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -606,7 +606,7 @@ class SessionCatalog(
    * Return a local temporary view exactly as it was stored.
    */
   def getTempView(name: String): Option[LogicalPlan] = synchronized {
-    tempViews.get(formatTableName(name))
+    tempViews.get(formatTableName(name)).map(getTempViewPlan)
   }
 
   def getTempViewNames(): Seq[String] = synchronized {
@@ -617,7 +617,7 @@ class SessionCatalog(
    * Return a global temporary view exactly as it was stored.
    */
   def getGlobalTempView(name: String): Option[LogicalPlan] = {
-    globalTempViewManager.get(formatTableName(name))
+    globalTempViewManager.get(formatTableName(name)).map(getTempViewPlan)
   }
 
   /**
@@ -654,20 +654,25 @@ class SessionCatalog(
   def getTempViewOrPermanentTableMetadata(name: TableIdentifier): CatalogTable = synchronized {
     val table = formatTableName(name.table)
     if (name.database.isEmpty) {
-      getTempView(table).map { plan =>
-        CatalogTable(
-          identifier = TableIdentifier(table),
-          tableType = CatalogTableType.VIEW,
-          storage = CatalogStorageFormat.empty,
-          schema = plan.output.toStructType)
+      getTempView(table).map {
+        case TemporaryViewRelation(metadata) => metadata
+        case plan =>
+          CatalogTable(
+            identifier = TableIdentifier(table),
+            tableType = CatalogTableType.VIEW,
+            storage = CatalogStorageFormat.empty,
+            schema = plan.output.toStructType)
       }.getOrElse(getTableMetadata(name))
     } else if (formatDatabaseName(name.database.get) == globalTempViewManager.database) {
-      globalTempViewManager.get(table).map { plan =>
-        CatalogTable(
-          identifier = TableIdentifier(table, Some(globalTempViewManager.database)),
-          tableType = CatalogTableType.VIEW,
-          storage = CatalogStorageFormat.empty,
-          schema = plan.output.toStructType)
+      val a = globalTempViewManager.get(table)
+      globalTempViewManager.get(table).map {
+        case TemporaryViewRelation(metadata) => metadata
+        case plan =>
+          CatalogTable(
+            identifier = TableIdentifier(table, Some(globalTempViewManager.database)),
+            tableType = CatalogTableType.VIEW,
+            storage = CatalogStorageFormat.empty,
+            schema = plan.output.toStructType)
       }.getOrElse(throw new NoSuchTableException(globalTempViewManager.database, table))
     } else {
       getTableMetadata(name)
@@ -777,13 +782,13 @@ class SessionCatalog(
       val table = formatTableName(name.table)
       if (db == globalTempViewManager.database) {
         globalTempViewManager.get(table).map { viewDef =>
-          SubqueryAlias(table, db, viewDef)
+          SubqueryAlias(table, db, getTempViewPlan(viewDef))
         }.getOrElse(throw new NoSuchTableException(db, table))
       } else if (name.database.isDefined || !tempViews.contains(table)) {
         val metadata = externalCatalog.getTable(db, table)
         getRelation(metadata)
       } else {
-        SubqueryAlias(table, tempViews(table))
+        SubqueryAlias(table, getTempViewPlan(tempViews(table)))
       }
     }
   }
@@ -797,26 +802,24 @@ class SessionCatalog(
     val multiParts = Seq(CatalogManager.SESSION_CATALOG_NAME, db, table)
 
     if (metadata.tableType == CatalogTableType.VIEW) {
-      val viewText = metadata.viewText.getOrElse(sys.error("Invalid view without text."))
-      val viewConfigs = metadata.viewSQLConfigs
-      val viewPlan = SQLConf.withExistingConf(View.effectiveSQLConf(viewConfigs)) {
-        parser.parsePlan(viewText)
-      }
-
-      logDebug(s"'$viewText' will be used for the view($table) with configs: $viewConfigs.")
       // The relation is a view, so we wrap the relation by:
       // 1. Add a [[View]] operator over the relation to keep track of the view desc;
       // 2. Wrap the logical plan in a [[SubqueryAlias]] which tracks the name of the view.
-      val child = View(
-        desc = metadata,
-        output = metadata.schema.toAttributes,
-        child = viewPlan)
+      val child = View.fromCatalogTable(metadata, isTempView = false, parser)
       SubqueryAlias(multiParts, child)
     } else {
       SubqueryAlias(multiParts, UnresolvedCatalogRelation(metadata, options))
     }
   }
 
+  def getTempViewPlan(plan: LogicalPlan): LogicalPlan = {
+    plan match {
+      case viewInfo: TemporaryViewRelation =>
+        View.fromCatalogTable(viewInfo.tableMeta, isTempView = true, parser)
+      case v => v
+    }
+  }
+
   def lookupTempView(table: String): Option[SubqueryAlias] = {
     val formattedTable = formatTableName(table)
     getTempView(formattedTable).map { view =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 621ad84f1f5ec..6743b052fb3a1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -25,6 +25,8 @@ import scala.collection.mutable
 import scala.util.control.NonFatal
 
 import org.apache.commons.lang3.StringUtils
+import org.json4s.JsonAST.{JArray, JString}
+import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
@@ -337,6 +339,40 @@ case class CatalogTable(
     )
   }
 
+  /**
+   * Return temporary view names the current view was referred. should be empty if the
+   * CatalogTable is not a Temporary View or created by older versions of Spark(before 3.1.0).
+   */
+  def viewReferredTempViewNames: Seq[Seq[String]] = {
+    try {
+      properties.get(VIEW_REFERRED_TEMP_VIEW_NAMES).map { json =>
+        parse(json).asInstanceOf[JArray].arr.map { namePartsJson =>
+          namePartsJson.asInstanceOf[JArray].arr.map(_.asInstanceOf[JString].s)
+        }
+      }.getOrElse(Seq.empty)
+    } catch {
+      case e: Exception =>
+        throw new AnalysisException(
+          "corrupted view referred temp view names in catalog", cause = Some(e))
+    }
+  }
+
+  /**
+   * Return temporary function names the current view was referred. should be empty if the
+   * CatalogTable is not a Temporary View or created by older versions of Spark(before 3.1.0).
+   */
+  def viewReferredTempFunctionNames: Seq[String] = {
+    try {
+      properties.get(VIEW_REFERRED_TEMP_FUNCTION_NAMES).map { json =>
+        parse(json).asInstanceOf[JArray].arr.map(_.asInstanceOf[JString].s)
+      }.getOrElse(Seq.empty)
+    } catch {
+      case e: Exception =>
+        throw new AnalysisException(
+          "corrupted view referred temp functions names in catalog", cause = Some(e))
+    }
+  }
+
   /** Syntactic sugar to update a field in `storage`. */
   def withNewStorage(
       locationUri: Option[URI] = storage.locationUri,
@@ -432,6 +468,9 @@ object CatalogTable {
   val VIEW_QUERY_OUTPUT_PREFIX = VIEW_PREFIX + "query.out."
   val VIEW_QUERY_OUTPUT_NUM_COLUMNS = VIEW_QUERY_OUTPUT_PREFIX + "numCols"
   val VIEW_QUERY_OUTPUT_COLUMN_NAME_PREFIX = VIEW_QUERY_OUTPUT_PREFIX + "col."
+
+  val VIEW_REFERRED_TEMP_VIEW_NAMES = VIEW_PREFIX + "referredTempViewNames"
+  val VIEW_REFERRED_TEMP_FUNCTION_NAMES = VIEW_PREFIX + "referredTempFunctionsNames"
 }
 
 /**
@@ -667,6 +706,15 @@ case class UnresolvedCatalogRelation(
   override def output: Seq[Attribute] = Nil
 }
 
+/**
+ * A wrapper to store the temporary view info, will be kept in `SessionCatalog`
+ * and will be transformed to `View` during analysis
+ */
+case class TemporaryViewRelation(tableMeta: CatalogTable) extends LeafNode {
+  override lazy val resolved: Boolean = false
+  override def output: Seq[Attribute] = Nil
+}
+
 /**
  * A `LogicalPlan` that represents a hive table.
  *
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index a524ed4ff73e9..c8b7e8651686a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -18,10 +18,11 @@
 package org.apache.spark.sql.catalyst.plans.logical
 
 import org.apache.spark.sql.catalyst.AliasIdentifier
-import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
+import org.apache.spark.sql.catalyst.analysis.{EliminateView, MultiInstanceRelation}
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
+import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, RangePartitioning, RoundRobinPartitioning}
 import org.apache.spark.sql.catalyst.util.truncatedString
@@ -437,6 +438,7 @@ case class InsertIntoDir(
  */
 case class View(
     desc: CatalogTable,
+    isTempView: Boolean,
     output: Seq[Attribute],
     child: LogicalPlan) extends LogicalPlan with MultiInstanceRelation {
 
@@ -451,12 +453,31 @@ case class View(
   override def simpleString(maxFields: Int): String = {
     s"View (${desc.identifier}, ${output.mkString("[", ",", "]")})"
   }
+
+  override def doCanonicalize(): LogicalPlan = {
+    def sameOutput(
+      outerProject: Seq[NamedExpression], innerProject: Seq[NamedExpression]): Boolean = {
+      outerProject.length == innerProject.length &&
+        outerProject.zip(innerProject).forall {
+          case(outer, inner) => outer.name == inner.name && outer.dataType == inner.dataType
+        }
+    }
+
+    val eliminated = EliminateView(this) match {
+      case Project(viewProjectList, child @ Project(queryProjectList, _))
+        if sameOutput(viewProjectList, queryProjectList) =>
+        child
+      case other => other
+    }
+    eliminated.canonicalized
+  }
 }
 
 object View {
-  def effectiveSQLConf(configs: Map[String, String]): SQLConf = {
+  def effectiveSQLConf(configs: Map[String, String], isTempView: Boolean): SQLConf = {
     val activeConf = SQLConf.get
-    if (activeConf.useCurrentSQLConfigsForView) return activeConf
+    // For temporary view, we always use captured sql configs
+    if (activeConf.useCurrentSQLConfigsForView && !isTempView) return activeConf
 
     val sqlConf = new SQLConf()
     for ((k, v) <- configs) {
@@ -467,6 +488,21 @@ object View {
     sqlConf.setConf(SQLConf.MAX_NESTED_VIEW_DEPTH, activeConf.maxNestedViewDepth)
     sqlConf
   }
+
+  def fromCatalogTable(
+      metadata: CatalogTable, isTempView: Boolean, parser: ParserInterface): View = {
+    val viewText = metadata.viewText.getOrElse(sys.error("Invalid view without text."))
+    val viewConfigs = metadata.viewSQLConfigs
+    val viewPlan =
+      SQLConf.withExistingConf(effectiveSQLConf(viewConfigs, isTempView = isTempView)) {
+        parser.parsePlan(viewText)
+      }
+    View(
+      desc = metadata,
+      isTempView = isTempView,
+      output = metadata.schema.toAttributes,
+      child = viewPlan)
+  }
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 07cd41b06de21..496065f85fbbf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1489,6 +1489,15 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val STORE_ANALYZED_PLAN_FOR_VIEW =
+    buildConf("spark.sql.legacy.storeAnalyzedPlanForView")
+      .internal()
+      .doc("When true, analyzed plan instead of SQL text will be stored when creating " +
+        "temporary view")
+      .version("3.1.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val STREAMING_FILE_COMMIT_PROTOCOL_CLASS =
     buildConf("spark.sql.streaming.commitProtocolClass")
       .version("2.1.0")
@@ -3435,6 +3444,8 @@ class SQLConf extends Serializable with Logging {
 
   def useCurrentSQLConfigsForView: Boolean = getConf(SQLConf.USE_CURRENT_SQL_CONFIGS_FOR_VIEW)
 
+  def storeAnalyzedPlanForView: Boolean = getConf(SQLConf.STORE_ANALYZED_PLAN_FOR_VIEW)
+
   def starSchemaDetection: Boolean = getConf(STARSCHEMA_DETECTION)
 
   def starSchemaFTRatio: Double = getConf(STARSCHEMA_FACT_TABLE_RATIO)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 0afa811e5d590..f5bfdc5e695e0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -665,6 +665,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
         tableType = CatalogTableType.VIEW,
         storage = CatalogStorageFormat.empty,
         schema = StructType(Seq(StructField("a", IntegerType), StructField("b", StringType)))),
+      isTempView = false,
       output = Seq(Symbol("a").int, Symbol("b").string),
       child = relation)
     val tz = Option(conf.sessionLocalTimeZone)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index f30ae70dceffa..98f9ce6fe9dbb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -646,7 +646,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
 
       // Look up a view.
       catalog.setCurrentDatabase("default")
-      val view = View(desc = metadata, output = metadata.schema.toAttributes,
+      val view = View(desc = metadata, isTempView = false, output = metadata.schema.toAttributes,
         child = CatalystSqlParser.parsePlan(metadata.viewText.get))
       comparePlans(catalog.lookupRelation(TableIdentifier("view1", Some("db3"))),
         SubqueryAlias(Seq(CatalogManager.SESSION_CATALOG_NAME, "db3", "view1"), view))
@@ -666,7 +666,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
       assert(metadata.viewText.isDefined)
       assert(metadata.viewCatalogAndNamespace == Seq(CatalogManager.SESSION_CATALOG_NAME, "db2"))
 
-      val view = View(desc = metadata, output = metadata.schema.toAttributes,
+      val view = View(desc = metadata, isTempView = false, output = metadata.schema.toAttributes,
         child = CatalystSqlParser.parsePlan(metadata.viewText.get))
       comparePlans(catalog.lookupRelation(TableIdentifier("view2", Some("db3"))),
         SubqueryAlias(Seq(CatalogManager.SESSION_CATALOG_NAME, "db3", "view2"), view))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
index 3b90f807b3138..641bd26c381ad 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
@@ -61,9 +61,10 @@ case class AnalyzeColumnCommand(
 
   private def analyzeColumnInCachedData(plan: LogicalPlan, sparkSession: SparkSession): Boolean = {
     val cacheManager = sparkSession.sharedState.cacheManager
-    cacheManager.lookupCachedData(plan).map { cachedData =>
+    val planToLookup = sparkSession.sessionState.executePlan(plan).analyzed
+    cacheManager.lookupCachedData(planToLookup).map { cachedData =>
       val columnsToAnalyze = getColumnsToAnalyze(
-        tableIdent, cachedData.plan, columnNames, allColumns)
+        tableIdent, cachedData.cachedRepresentation, columnNames, allColumns)
       cacheManager.analyzeColumnCacheQuery(sparkSession, cachedData, columnsToAnalyze)
       cachedData
     }.isDefined
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index a02f863a360f8..4ad5eddb83f43 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -19,16 +19,19 @@ package org.apache.spark.sql.execution.command
 
 import scala.collection.mutable
 
+import org.json4s.JsonAST.{JArray, JString}
+import org.json4s.jackson.JsonMethods._
+
 import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{GlobalTempView, LocalTempView, PersistedView, UnresolvedFunction, UnresolvedRelation, ViewType}
-import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, SessionCatalog}
+import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, SessionCatalog, TemporaryViewRelation}
 import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, SubqueryExpression}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, View}
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper
 import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
-import org.apache.spark.sql.types.{BooleanType, MetadataBuilder, StringType}
+import org.apache.spark.sql.types.{BooleanType, MetadataBuilder, StringType, StructType}
 import org.apache.spark.sql.util.SchemaUtils
 
 /**
@@ -107,26 +110,61 @@ case class CreateViewCommand(
 
     // When creating a permanent view, not allowed to reference temporary objects.
     // This should be called after `qe.assertAnalyzed()` (i.e., `child` can be resolved)
-    verifyTemporaryObjectsNotExists(catalog)
+    verifyTemporaryObjectsNotExists(catalog, isTemporary, name, child)
 
     if (viewType == LocalTempView) {
-      if (replace && catalog.getTempView(name.table).isDefined &&
-          !catalog.getTempView(name.table).get.sameResult(child)) {
+      val samePlan = catalog.getTempView(name.table).exists {
+        // Don't perform sameResult check for View logical plan, since it's unresolved
+        case _: View => false
+        case other => other.sameResult(child)
+      }
+      if (replace && !samePlan) {
         logInfo(s"Try to uncache ${name.quotedString} before replacing.")
+        checkCyclicViewReference(analyzedPlan, Seq(name), name)
         CommandUtils.uncacheTableOrView(sparkSession, name.quotedString)
       }
       val aliasedPlan = aliasPlan(sparkSession, analyzedPlan)
-      catalog.createTempView(name.table, aliasedPlan, overrideIfExists = replace)
+      // If there is no sql text (e.g. from Dataset API), we will always store the analyzed plan
+      val tableDefinition = if (!conf.storeAnalyzedPlanForView && originalText.nonEmpty) {
+        TemporaryViewRelation(
+          prepareTemporaryView(
+            name,
+            sparkSession,
+            analyzedPlan,
+            aliasedPlan.schema,
+            originalText,
+            child))
+      } else {
+        aliasedPlan
+      }
+      catalog.createTempView(name.table, tableDefinition, overrideIfExists = replace)
     } else if (viewType == GlobalTempView) {
-      if (replace && catalog.getGlobalTempView(name.table).isDefined &&
-          !catalog.getGlobalTempView(name.table).get.sameResult(child)) {
-        val db = sparkSession.sessionState.conf.getConf(StaticSQLConf.GLOBAL_TEMP_DATABASE)
-        val globalTempView = TableIdentifier(name.table, Option(db))
-        logInfo(s"Try to uncache ${globalTempView.quotedString} before replacing.")
-        CommandUtils.uncacheTableOrView(sparkSession, globalTempView.quotedString)
+      val db = sparkSession.sessionState.conf.getConf(StaticSQLConf.GLOBAL_TEMP_DATABASE)
+      val viewIdent = TableIdentifier(name.table, Option(db))
+      val samePlan = catalog.getGlobalTempView(name.table).exists {
+        // Don't perform sameResult check for View logical plan, since it's unresolved
+        case _: View => false
+        case other => other.sameResult(child)
+      }
+      if (replace && !samePlan) {
+        logInfo(s"Try to uncache ${viewIdent.quotedString} before replacing.")
+        checkCyclicViewReference(analyzedPlan, Seq(viewIdent), viewIdent)
+        CommandUtils.uncacheTableOrView(sparkSession, viewIdent.quotedString)
       }
       val aliasedPlan = aliasPlan(sparkSession, analyzedPlan)
-      catalog.createGlobalTempView(name.table, aliasedPlan, overrideIfExists = replace)
+      val tableDefinition = if (!conf.storeAnalyzedPlanForView && originalText.nonEmpty) {
+        TemporaryViewRelation(
+          prepareTemporaryView(
+            viewIdent,
+            sparkSession,
+            analyzedPlan,
+            aliasedPlan.schema,
+            originalText,
+            child))
+      } else {
+        aliasedPlan
+      }
+      catalog.createGlobalTempView(name.table, tableDefinition, overrideIfExists = replace)
     } else if (catalog.tableExists(name)) {
       val tableMetadata = catalog.getTableMetadata(name)
       if (allowExisting) {
@@ -161,39 +199,6 @@ case class CreateViewCommand(
     Seq.empty[Row]
   }
 
-  /**
-   * Permanent views are not allowed to reference temp objects, including temp function and views
-   */
-  private def verifyTemporaryObjectsNotExists(catalog: SessionCatalog): Unit = {
-    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-    if (!isTemporary) {
-      // This func traverses the unresolved plan `child`. Below are the reasons:
-      // 1) Analyzer replaces unresolved temporary views by a SubqueryAlias with the corresponding
-      // logical plan. After replacement, it is impossible to detect whether the SubqueryAlias is
-      // added/generated from a temporary view.
-      // 2) The temp functions are represented by multiple classes. Most are inaccessible from this
-      // package (e.g., HiveGenericUDF).
-      def verify(child: LogicalPlan): Unit = {
-        child.collect {
-          // Disallow creating permanent views based on temporary views.
-          case UnresolvedRelation(nameParts, _, _) if catalog.isTempView(nameParts) =>
-            throw new AnalysisException(s"Not allowed to create a permanent view $name by " +
-              s"referencing a temporary view ${nameParts.quoted}. " +
-              "Please create a temp view instead by CREATE TEMP VIEW")
-          case other if !other.resolved => other.expressions.flatMap(_.collect {
-            // Traverse subquery plan for any unresolved relations.
-            case e: SubqueryExpression => verify(e.plan)
-            // Disallow creating permanent views based on temporary UDFs.
-            case e: UnresolvedFunction if catalog.isTemporaryFunction(e.name) =>
-              throw new AnalysisException(s"Not allowed to create a permanent view $name by " +
-                s"referencing a temporary function `${e.name}`")
-          })
-        }
-      }
-      verify(child)
-    }
-  }
-
   /**
    * If `userSpecifiedColumns` is defined, alias the analyzed plan to the user specified columns,
    * else return the analyzed plan directly.
@@ -266,15 +271,26 @@ case class AlterViewAsCommand(
     qe.assertAnalyzed()
     val analyzedPlan = qe.analyzed
 
-    if (session.sessionState.catalog.alterTempViewDefinition(name, analyzedPlan)) {
-      // a local/global temp view has been altered, we are done.
+    if (session.sessionState.catalog.isTemporaryTable(name)) {
+      alterTemporaryView(session, analyzedPlan)
     } else {
       alterPermanentView(session, analyzedPlan)
     }
-
     Seq.empty[Row]
   }
 
+  private def alterTemporaryView(session: SparkSession, analyzedPlan: LogicalPlan): Unit = {
+    val tableDefinition = if (conf.storeAnalyzedPlanForView) {
+      analyzedPlan
+    } else {
+      checkCyclicViewReference(analyzedPlan, Seq(name), name)
+      TemporaryViewRelation(
+        prepareTemporaryView(
+          name, session, analyzedPlan, analyzedPlan.schema, Some(originalText), query))
+    }
+    session.sessionState.catalog.alterTempViewDefinition(name, tableDefinition)
+  }
+
   private def alterPermanentView(session: SparkSession, analyzedPlan: LogicalPlan): Unit = {
     val viewMeta = session.sessionState.catalog.getTableMetadata(name)
     if (viewMeta.tableType != CatalogTableType.VIEW) {
@@ -398,6 +414,34 @@ object ViewHelper {
     }
   }
 
+  /**
+   * Convert the temporary object names to `properties`.
+   */
+  private def referredTempNamesToProps(
+      viewNames: Seq[Seq[String]], functionsNames: Seq[String]): Map[String, String] = {
+    val viewNamesJson =
+      JArray(viewNames.map(nameParts => JArray(nameParts.map(JString).toList)).toList)
+    val functionsNamesJson = JArray(functionsNames.map(JString).toList)
+
+    val props = new mutable.HashMap[String, String]
+    props.put(VIEW_REFERRED_TEMP_VIEW_NAMES, compact(render(viewNamesJson)))
+    props.put(VIEW_REFERRED_TEMP_FUNCTION_NAMES, compact(render(functionsNamesJson)))
+    props.toMap
+  }
+
+  /**
+   * Remove the temporary object names in `properties`.
+   */
+  private def removeReferredTempNames(properties: Map[String, String]): Map[String, String] = {
+    // We can't use `filterKeys` here, as the map returned by `filterKeys` is not serializable,
+    // while `CatalogTable` should be serializable.
+    properties.filterNot { case (key, _) =>
+      key.startsWith(VIEW_REFERRED_TEMP_VIEW_NAMES) ||
+        key.startsWith(VIEW_REFERRED_TEMP_FUNCTION_NAMES)
+    }
+  }
+
+
   /**
    * Generate the view properties in CatalogTable, including:
    * 1. view default database that is used to provide the default database name on view resolution.
@@ -414,7 +458,9 @@ object ViewHelper {
       properties: Map[String, String],
       session: SparkSession,
       analyzedPlan: LogicalPlan,
-      fieldNames: Array[String]): Map[String, String] = {
+      fieldNames: Array[String],
+      tempViewNames: Seq[Seq[String]] = Seq.empty,
+      tempFunctionNames: Seq[String] = Seq.empty): Map[String, String] = {
     // for createViewCommand queryOutput may be different from fieldNames
     val queryOutput = analyzedPlan.schema.fieldNames
 
@@ -427,10 +473,11 @@ object ViewHelper {
 
     // Generate the view default catalog and namespace, as well as captured SQL configs.
     val manager = session.sessionState.catalogManager
-    removeSQLConfigs(removeQueryColumnNames(properties)) ++
+    removeReferredTempNames(removeSQLConfigs(removeQueryColumnNames(properties))) ++
       catalogAndNamespaceToProps(manager.currentCatalog.name, manager.currentNamespace) ++
       sqlConfigsToProps(conf) ++
-      generateQueryColumnNames(queryOutput)
+      generateQueryColumnNames(queryOutput) ++
+      referredTempNamesToProps(tempViewNames, tempFunctionNames)
   }
 
   /**
@@ -481,4 +528,92 @@ object ViewHelper {
       }
     }
   }
+
+
+  /**
+   * Permanent views are not allowed to reference temp objects, including temp function and views
+   */
+  def verifyTemporaryObjectsNotExists(
+      catalog: SessionCatalog,
+      isTemporary: Boolean,
+      name: TableIdentifier,
+      child: LogicalPlan): Unit = {
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+    if (!isTemporary) {
+      val (tempViews, tempFunctions) = collectTemporaryObjects(catalog, child)
+      tempViews.foreach { nameParts =>
+        throw new AnalysisException(s"Not allowed to create a permanent view $name by " +
+          s"referencing a temporary view ${nameParts.quoted}. " +
+          "Please create a temp view instead by CREATE TEMP VIEW")
+      }
+      tempFunctions.foreach { funcName =>
+        throw new AnalysisException(s"Not allowed to create a permanent view $name by " +
+          s"referencing a temporary function `${funcName}`")
+      }
+    }
+  }
+
+  /**
+   * Collect all temporary views and functions and return the identifiers separately
+   * This func traverses the unresolved plan `child`. Below are the reasons:
+   * 1) Analyzer replaces unresolved temporary views by a SubqueryAlias with the corresponding
+   * logical plan. After replacement, it is impossible to detect whether the SubqueryAlias is
+   * added/generated from a temporary view.
+   * 2) The temp functions are represented by multiple classes. Most are inaccessible from this
+   * package (e.g., HiveGenericUDF).
+   */
+  private def collectTemporaryObjects(
+      catalog: SessionCatalog, child: LogicalPlan): (Seq[Seq[String]], Seq[String]) = {
+    def collectTempViews(child: LogicalPlan): Seq[Seq[String]] = {
+      child.collect {
+        case UnresolvedRelation(nameParts, _, _) if catalog.isTempView(nameParts) =>
+          Seq(nameParts)
+        case plan if !plan.resolved => plan.expressions.flatMap(_.collect {
+          case e: SubqueryExpression => collectTempViews(e.plan)
+        }).flatten
+      }.flatten.distinct
+    }
+
+    def collectTempFunctions(child: LogicalPlan): Seq[String] = {
+      child.collect {
+        case plan if !plan.resolved => plan.expressions.flatMap(_.collect {
+          case e: SubqueryExpression => collectTempFunctions(e.plan)
+          case e: UnresolvedFunction if catalog.isTemporaryFunction(e.name) =>
+            Seq(e.name.funcName)
+        }).flatten
+      }.flatten.distinct
+    }
+    (collectTempViews(child), collectTempFunctions(child))
+  }
+
+
+  /**
+   * Returns a [[CatalogTable]] that contains information for temporary view.
+   * Generate the view-specific properties(e.g. view default database, view query output
+   * column names) and store them as properties in the CatalogTable, and also creates
+   * the proper schema for the view.
+   */
+  def prepareTemporaryView(
+      viewName: TableIdentifier,
+      session: SparkSession,
+      analyzedPlan: LogicalPlan,
+      viewSchema: StructType,
+      originalText: Option[String],
+      child: LogicalPlan): CatalogTable = {
+
+    val catalog = session.sessionState.catalog
+    val (tempViews, tempFunctions) = collectTemporaryObjects(catalog, child)
+    // TBLPROPERTIES is not allowed for temporary view, so we don't use it for
+    // generating temporary view properties
+    val newProperties = generateViewProperties(
+      Map.empty, session, analyzedPlan, viewSchema.fieldNames, tempViews, tempFunctions)
+
+    CatalogTable(
+      identifier = viewName,
+      tableType = CatalogTableType.VIEW,
+      storage = CatalogStorageFormat.empty,
+      schema = viewSchema,
+      viewText = originalText,
+      properties = newProperties)
+  }
 }
diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
index 145c987ee5f61..2674d055ac450 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
@@ -477,7 +477,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Catalog and Namespace	spark_catalog.default	                    
 View Query Output Columns	[a, b, c, d]        	                    
-Table Properties    	[view.query.out.col.3=d, view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=4, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=c, view.catalogAndNamespace.part.1=default]
+Table Properties    	[view.query.out.col.3=d, view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=4, view.referredTempViewNames=[], view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=c, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=default]
 
 
 -- !query
@@ -501,7 +501,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Catalog and Namespace	spark_catalog.default	                    
 View Query Output Columns	[a, b, c, d]        	                    
-Table Properties    	[view.query.out.col.3=d, view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=4, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=c, view.catalogAndNamespace.part.1=default]
+Table Properties    	[view.query.out.col.3=d, view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=4, view.referredTempViewNames=[], view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=c, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=default]
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out
index 89a4da116a6b3..149e031e8829c 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out
@@ -795,13 +795,15 @@ IN/EXISTS predicate sub-queries can only be used in Filter/Join and a few comman
 :  +- Project [state#x]
 :     +- Filter (dept_id#x = outer(dept_id#x))
 :        +- SubqueryAlias dept
-:           +- Project [dept_id#x, dept_name#x, state#x]
-:              +- SubqueryAlias DEPT
-:                 +- LocalRelation [dept_id#x, dept_name#x, state#x]
+:           +- View (`DEPT`, [dept_id#x,dept_name#x,state#x])
+:              +- Project [dept_id#x, dept_name#x, state#x]
+:                 +- SubqueryAlias DEPT
+:                    +- LocalRelation [dept_id#x, dept_name#x, state#x]
 +- SubqueryAlias emp
-   +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
-      +- SubqueryAlias EMP
-         +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+   +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x])
+      +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+         +- SubqueryAlias EMP
+            +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
 ;
 
 
@@ -821,13 +823,15 @@ IN/EXISTS predicate sub-queries can only be used in Filter/Join and a few comman
 :  +- Project [state#x]
 :     +- Filter (dept_id#x = outer(dept_id#x))
 :        +- SubqueryAlias dept
-:           +- Project [dept_id#x, dept_name#x, state#x]
-:              +- SubqueryAlias DEPT
-:                 +- LocalRelation [dept_id#x, dept_name#x, state#x]
+:           +- View (`DEPT`, [dept_id#x,dept_name#x,state#x])
+:              +- Project [dept_id#x, dept_name#x, state#x]
+:                 +- SubqueryAlias DEPT
+:                    +- LocalRelation [dept_id#x, dept_name#x, state#x]
 +- SubqueryAlias emp
-   +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
-      +- SubqueryAlias EMP
-         +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+   +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x])
+      +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+         +- SubqueryAlias EMP
+            +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
 ;
 
 
@@ -846,13 +850,15 @@ IN/EXISTS predicate sub-queries can only be used in Filter/Join and a few comman
 :  +- Distinct
 :     +- Project [dept_id#x]
 :        +- SubqueryAlias dept
-:           +- Project [dept_id#x, dept_name#x, state#x]
-:              +- SubqueryAlias DEPT
-:                 +- LocalRelation [dept_id#x, dept_name#x, state#x]
+:           +- View (`DEPT`, [dept_id#x,dept_name#x,state#x])
+:              +- Project [dept_id#x, dept_name#x, state#x]
+:                 +- SubqueryAlias DEPT
+:                    +- LocalRelation [dept_id#x, dept_name#x, state#x]
 +- SubqueryAlias emp
-   +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
-      +- SubqueryAlias EMP
-         +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+   +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x])
+      +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+         +- SubqueryAlias EMP
+            +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
 ;
 
 
@@ -871,13 +877,15 @@ IN/EXISTS predicate sub-queries can only be used in Filter/Join and a few comman
 :  +- Distinct
 :     +- Project [dept_id#x]
 :        +- SubqueryAlias dept
-:           +- Project [dept_id#x, dept_name#x, state#x]
-:              +- SubqueryAlias DEPT
-:                 +- LocalRelation [dept_id#x, dept_name#x, state#x]
+:           +- View (`DEPT`, [dept_id#x,dept_name#x,state#x])
+:              +- Project [dept_id#x, dept_name#x, state#x]
+:                 +- SubqueryAlias DEPT
+:                    +- LocalRelation [dept_id#x, dept_name#x, state#x]
 +- SubqueryAlias emp
-   +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
-      +- SubqueryAlias EMP
-         +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+   +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x])
+      +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+         +- SubqueryAlias EMP
+            +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
 ;
 
 
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
index 2fab32fa4b4eb..7d331f24b9215 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
@@ -257,7 +257,7 @@ View Text           	SELECT * FROM base_table
 View Original Text  	SELECT * FROM base_table	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[a, id]             	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test]
 
 
 -- !query
@@ -313,7 +313,7 @@ View Text           	SELECT * FROM base_table
 View Original Text  	SELECT * FROM base_table	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[a, id]             	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test]
 
 
 -- !query
@@ -359,7 +359,7 @@ View Original Text  	SELECT t1.a AS t1_a, t2.a AS t2_a
     WHERE t1.id = t2.id	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[t1_a, t2_a]        	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=t1_a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=t2_a, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=t1_a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=t2_a, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test]
 
 
 -- !query
@@ -413,7 +413,7 @@ View Text           	SELECT * FROM base_table WHERE id IN (SELECT id FROM base_t
 View Original Text  	SELECT * FROM base_table WHERE id IN (SELECT id FROM base_table2)	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[a, id]             	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test]
 
 
 -- !query
@@ -443,7 +443,7 @@ View Text           	SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_
 View Original Text  	SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_table2) t2	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[id, a]             	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=id, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=a, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=id, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=a, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test]
 
 
 -- !query
@@ -473,7 +473,7 @@ View Text           	SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_t
 View Original Text  	SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_table2)	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[a, id]             	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test]
 
 
 -- !query
@@ -503,7 +503,7 @@ View Text           	SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM ba
 View Original Text  	SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM base_table2)	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[a, id]             	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test]
 
 
 -- !query
@@ -533,7 +533,7 @@ View Text           	SELECT * FROM base_table WHERE EXISTS (SELECT 1)
 View Original Text  	SELECT * FROM base_table WHERE EXISTS (SELECT 1)	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[a, id]             	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test]
 
 
 -- !query
@@ -669,7 +669,7 @@ View Text           	SELECT * FROM t1 CROSS JOIN t2
 View Original Text  	SELECT * FROM t1 CROSS JOIN t2	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
 View Query Output Columns	[num, name, num2, value]	                    
-Table Properties    	[view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2]
+Table Properties    	[view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2]
 
 
 -- !query
@@ -710,7 +710,7 @@ View Text           	SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2
 View Original Text  	SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
 View Query Output Columns	[num, name, num2, value]	                    
-Table Properties    	[view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2]
+Table Properties    	[view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2]
 
 
 -- !query
@@ -751,7 +751,7 @@ View Text           	SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2
 View Original Text  	SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
 View Query Output Columns	[num, name, num2, value]	                    
-Table Properties    	[view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2]
+Table Properties    	[view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2]
 
 
 -- !query
@@ -792,7 +792,7 @@ View Text           	SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.va
 View Original Text  	SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.value = 'xxx'	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
 View Query Output Columns	[num, name, num2, value]	                    
-Table Properties    	[view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.catalogAndNamespace.part.1=testviewschm2]
+Table Properties    	[view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2]
 
 
 -- !query
@@ -894,7 +894,7 @@ BETWEEN (SELECT d FROM tbl2 WHERE c = 1) AND (SELECT e FROM tbl3 WHERE f = 2)
 AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f)	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
 View Query Output Columns	[a, b]              	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2]
 
 
 -- !query
@@ -933,7 +933,7 @@ AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f)
 AND NOT EXISTS (SELECT g FROM tbl4 LEFT JOIN tmptbl ON tbl4.h = tmptbl.j)	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
 View Query Output Columns	[a, b]              	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2]
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/show-tblproperties.sql.out b/sql/core/src/test/resources/sql-tests/results/show-tblproperties.sql.out
index eaaf894590d35..3fb948056dc01 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-tblproperties.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-tblproperties.sql.out
@@ -64,6 +64,8 @@ view.catalogAndNamespace.part.0	spark_catalog
 view.catalogAndNamespace.part.1	default
 view.query.out.col.0	c1
 view.query.out.numCols	1
+view.referredTempFunctionsNames	[]
+view.referredTempViewNames	[]
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
index d703d4e9112e9..cd96eaf1b878b 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
@@ -111,7 +111,8 @@ org.apache.spark.sql.AnalysisException
 Expressions referencing the outer query are not supported outside of WHERE/HAVING clauses:
 Aggregate [min(outer(t2a#x)) AS min(outer(t2.`t2a`))#x]
 +- SubqueryAlias t3
-   +- Project [t3a#x, t3b#x, t3c#x]
-      +- SubqueryAlias t3
-         +- LocalRelation [t3a#x, t3b#x, t3c#x]
+   +- View (`t3`, [t3a#x,t3b#x,t3c#x])
+      +- Project [t3a#x, t3b#x, t3c#x]
+         +- SubqueryAlias t3
+            +- LocalRelation [t3a#x, t3b#x, t3c#x]
 ;
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index ef3f4daa6dc6b..d0150616cd67e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -1239,26 +1239,4 @@ class CachedTableSuite extends QueryTest with SQLTestUtils
       }
     }
   }
-
-  test("SPARK-33290: querying temporary view after REFRESH TABLE fails with FNFE") {
-    withTable("t") {
-      withTempPath { path =>
-        withTempView("tempView1") {
-          Seq((1 -> "a")).toDF("i", "j").write.parquet(path.getCanonicalPath)
-          sql(s"CREATE TABLE t USING parquet LOCATION '${path.toURI}'")
-          sql("CREATE TEMPORARY VIEW tempView1 AS SELECT * FROM t")
-          checkAnswer(sql("SELECT * FROM tempView1"), Seq(Row(1, "a")))
-
-          Utils.deleteRecursively(path)
-          sql("REFRESH TABLE t")
-          checkAnswer(sql("SELECT * FROM t"), Seq.empty)
-          val exception = intercept[Exception] {
-            checkAnswer(sql("SELECT * FROM tempView1"), Seq.empty)
-          }
-          assert(exception.getMessage.contains("FileNotFoundException"))
-          assert(exception.getMessage.contains("REFRESH TABLE"))
-        }
-      }
-    }
-  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index 0b19f706836be..709d6321d199d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.execution
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
@@ -763,6 +764,89 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
     }
   }
 
+  test("temporary view should ignore useCurrentSQLConfigsForView config") {
+    withTable("t") {
+      Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t")
+      withTempView("v1") {
+        sql("CREATE TEMPORARY VIEW v1 AS SELECT 1/0")
+        withSQLConf(
+          USE_CURRENT_SQL_CONFIGS_FOR_VIEW.key -> "true",
+          ANSI_ENABLED.key -> "true") {
+          checkAnswer(sql("SELECT * FROM v1"), Seq(Row(null)))
+        }
+      }
+    }
+  }
+
+  test("alter temporary view should follow current storeAnalyzedPlanForView config") {
+    withTable("t") {
+      Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t")
+      withView("v1") {
+        withSQLConf(STORE_ANALYZED_PLAN_FOR_VIEW.key -> "true") {
+          sql("CREATE TEMPORARY VIEW v1 AS SELECT * FROM t")
+          Seq(4, 6, 5).toDF("c1").write.mode("overwrite").format("parquet").saveAsTable("t")
+          val e = intercept[SparkException] {
+            sql("SELECT * FROM v1").collect()
+          }.getMessage
+          assert(e.contains("does not exist"))
+        }
+
+        withSQLConf(STORE_ANALYZED_PLAN_FOR_VIEW.key -> "false") {
+          // alter view from legacy to non-legacy config
+          sql("ALTER VIEW v1 AS SELECT * FROM t")
+          Seq(1, 3, 5).toDF("c1").write.mode("overwrite").format("parquet").saveAsTable("t")
+          checkAnswer(sql("SELECT * FROM v1"), Seq(Row(1), Row(3), Row(5)))
+        }
+
+        withSQLConf(STORE_ANALYZED_PLAN_FOR_VIEW.key -> "true") {
+          // alter view from non-legacy to legacy config
+          sql("ALTER VIEW v1 AS SELECT * FROM t")
+          Seq(2, 4, 6).toDF("c1").write.mode("overwrite").format("parquet").saveAsTable("t")
+          val e = intercept[SparkException] {
+            sql("SELECT * FROM v1").collect()
+          }.getMessage
+          assert(e.contains("does not exist"))
+        }
+      }
+    }
+  }
+
+  test("local temp view refers global temp view") {
+    withGlobalTempView("v1") {
+      withTempView("v2") {
+        val globalTempDB = spark.sharedState.globalTempViewManager.database
+        sql("CREATE GLOBAL TEMPORARY VIEW v1 AS SELECT 1")
+        sql(s"CREATE TEMPORARY VIEW v2 AS SELECT * FROM ${globalTempDB}.v1")
+        checkAnswer(sql("SELECT * FROM v2"), Seq(Row(1)))
+      }
+    }
+  }
+
+  test("global temp view refers local temp view") {
+    withTempView("v1") {
+      withGlobalTempView("v2") {
+        val globalTempDB = spark.sharedState.globalTempViewManager.database
+        sql("CREATE TEMPORARY VIEW v1 AS SELECT 1")
+        sql(s"CREATE GLOBAL TEMPORARY VIEW v2 AS SELECT * FROM v1")
+        checkAnswer(sql(s"SELECT * FROM ${globalTempDB}.v2"), Seq(Row(1)))
+      }
+    }
+  }
+
+  test("creating local temp view should not affect existing table reference") {
+    withTable("t") {
+      withTempView("t") {
+        withGlobalTempView("v") {
+          val globalTempDB = spark.sharedState.globalTempViewManager.database
+          Seq(2).toDF("c1").write.format("parquet").saveAsTable("t")
+          sql("CREATE GLOBAL TEMPORARY VIEW v AS SELECT * FROM t")
+          sql("CREATE TEMPORARY VIEW t AS SELECT 1")
+          checkAnswer(sql(s"SELECT * FROM ${globalTempDB}.v"), Seq(Row(2)))
+        }
+      }
+    }
+  }
+
   test("SPARK-33141: view should be parsed and analyzed with configs set when creating") {
     withTable("t") {
       withView("v1", "v2", "v3", "v4", "v5") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
new file mode 100644
index 0000000000000..fb9f5a73f6d9e
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.internal.SQLConf._
+import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
+
+/**
+ * A base suite contains a set of view related test cases for different kind of views
+ * Currently, the test cases in this suite should have same behavior across all kind of views
+ * TODO: Combine this with [[SQLViewSuite]]
+ */
+abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils {
+  import testImplicits._
+
+  protected def viewTypeString: String
+  protected def formattedViewName(viewName: String): String
+
+  def createView(
+      viewName: String,
+      sqlText: String,
+      columnNames: Seq[String] = Seq.empty,
+      replace: Boolean = false): String = {
+    val replaceString = if (replace) "OR REPLACE" else ""
+    val columnString = if (columnNames.nonEmpty) columnNames.mkString("(", ",", ")") else ""
+    sql(s"CREATE $replaceString $viewTypeString $viewName $columnString AS $sqlText")
+    formattedViewName(viewName)
+  }
+
+  def checkViewOutput(viewName: String, expectedAnswer: Seq[Row]): Unit = {
+    checkAnswer(sql(s"SELECT * FROM $viewName"), expectedAnswer)
+  }
+
+  test("change SQLConf should not change view behavior - caseSensitiveAnalysis") {
+    withTable("t") {
+      Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t")
+      val viewName = createView("v1", "SELECT c1 FROM t", Seq("C1"))
+      withView(viewName) {
+        Seq("true", "false").foreach { flag =>
+          withSQLConf(CASE_SENSITIVE.key -> flag) {
+            checkViewOutput(viewName, Seq(Row(2), Row(3), Row(1)))
+          }
+        }
+      }
+    }
+  }
+
+  test("change SQLConf should not change view behavior - orderByOrdinal") {
+    withTable("t") {
+      Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t")
+      val viewName = createView("v1", "SELECT c1 FROM t ORDER BY 1 ASC, c1 DESC", Seq("c1"))
+      withView(viewName) {
+        Seq("true", "false").foreach { flag =>
+          withSQLConf(ORDER_BY_ORDINAL.key -> flag) {
+            checkViewOutput(viewName, Seq(Row(1), Row(2), Row(3)))
+          }
+        }
+      }
+    }
+  }
+
+  test("change SQLConf should not change view behavior - groupByOrdinal") {
+    withTable("t") {
+      Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t")
+      val viewName = createView("v1", "SELECT c1, count(c1) FROM t GROUP BY 1", Seq("c1", "count"))
+      withView(viewName) {
+        Seq("true", "false").foreach { flag =>
+          withSQLConf(GROUP_BY_ORDINAL.key -> flag) {
+            checkViewOutput(viewName, Seq(Row(1, 1), Row(2, 1), Row(3, 1)))
+          }
+        }
+      }
+    }
+  }
+
+  test("change SQLConf should not change view behavior - groupByAliases") {
+    withTable("t") {
+      Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t")
+      val viewName = createView(
+        "v1", "SELECT c1 as a, count(c1) FROM t GROUP BY a", Seq("a", "count"))
+      withView(viewName) {
+        Seq("true", "false").foreach { flag =>
+          withSQLConf(GROUP_BY_ALIASES.key -> flag) {
+            checkViewOutput(viewName, Seq(Row(1, 1), Row(2, 1), Row(3, 1)))
+          }
+        }
+      }
+    }
+  }
+
+  test("change SQLConf should not change view behavior - ansiEnabled") {
+    withTable("t") {
+      Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t")
+      val viewName = createView("v1", "SELECT 1/0", Seq("c1"))
+      withView(viewName) {
+        Seq("true", "false").foreach { flag =>
+          withSQLConf(ANSI_ENABLED.key -> flag) {
+            checkViewOutput(viewName, Seq(Row(null)))
+          }
+        }
+      }
+    }
+  }
+
+  test("change current database should not change view behavior") {
+    withTable("t") {
+      Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t")
+      val viewName = createView("v1", "SELECT * from t")
+      withView(viewName) {
+        withTempDatabase { db =>
+          sql(s"USE $db")
+          Seq(4, 5, 6).toDF("c1").write.format("parquet").saveAsTable("t")
+          checkViewOutput(viewName, Seq(Row(2), Row(3), Row(1)))
+        }
+      }
+    }
+  }
+
+  test("view should read the new data if table is updated") {
+    withTable("t") {
+      Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t")
+      val viewName = createView("v1", "SELECT c1 from t", Seq("c1"))
+      withView(viewName) {
+        Seq(9, 7, 8).toDF("c1").write.mode("overwrite").format("parquet").saveAsTable("t")
+        checkViewOutput(viewName, Seq(Row(9), Row(7), Row(8)))
+      }
+    }
+  }
+
+  test("add column for table should not affect view output") {
+    withTable("t") {
+      Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t")
+      val viewName = createView("v1", "SELECT * from t")
+      withView(viewName) {
+        sql("ALTER TABLE t ADD COLUMN (c2 INT)")
+        checkViewOutput(viewName, Seq(Row(2), Row(3), Row(1)))
+      }
+    }
+  }
+
+  test("check cyclic view reference on CREATE OR REPLACE VIEW") {
+    withTable("t") {
+      Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t")
+      val viewName1 = createView("v1", "SELECT * from t")
+      val viewName2 = createView("v2", s"SELECT * from $viewName1")
+      withView(viewName2, viewName1) {
+        val e = intercept[AnalysisException] {
+          createView("v1", s"SELECT * FROM $viewName2", replace = true)
+        }.getMessage
+        assert(e.contains("Recursive view"))
+      }
+    }
+  }
+
+  test("check cyclic view reference on ALTER VIEW") {
+    withTable("t") {
+      Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t")
+      val viewName1 = createView("v1", "SELECT * from t")
+      val viewName2 = createView("v2", s"SELECT * from $viewName1")
+      withView(viewName2, viewName1) {
+        val e = intercept[AnalysisException] {
+          sql(s"ALTER VIEW $viewName1 AS SELECT * FROM $viewName2")
+        }.getMessage
+        assert(e.contains("Recursive view"))
+      }
+    }
+  }
+}
+
+class LocalTempViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
+  override protected def viewTypeString: String = "TEMPORARY VIEW"
+  override protected def formattedViewName(viewName: String): String = viewName
+
+}
+
+class GlobalTempViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
+  override protected def viewTypeString: String = "GLOBAL TEMPORARY VIEW"
+  override protected def formattedViewName(viewName: String): String = {
+    val globalTempDB = spark.sharedState.globalTempViewManager.database
+    s"$globalTempDB.$viewName"
+  }
+}
+
+class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
+  override protected def viewTypeString: String = "VIEW"
+  override protected def formattedViewName(viewName: String): String = s"default.$viewName"
+}
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala
index 88aebb36633f6..66e6cf82922b7 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala
@@ -105,7 +105,7 @@ private[hive] class SparkGetColumnsOperation(
       val databasePattern = Pattern.compile(CLIServiceUtils.patternToRegex(schemaName))
       if (databasePattern.matcher(globalTempViewDb).matches()) {
         catalog.globalTempViewManager.listViewNames(tablePattern).foreach { globalTempView =>
-          catalog.globalTempViewManager.get(globalTempView).foreach { plan =>
+          catalog.getGlobalTempView(globalTempView).foreach { plan =>
             addToRowSet(columnPattern, globalTempViewDb, globalTempView, plan.schema)
           }
         }

From 15579ba1f82e321a694130d4c9db2a6524e9ae2e Mon Sep 17 00:00:00 2001
From: Huaxin Gao <huaxing@us.ibm.com>
Date: Fri, 4 Dec 2020 07:23:35 +0000
Subject: [PATCH 0660/1009] [SPARK-33430][SQL] Support namespaces in JDBC v2
 Table Catalog

### What changes were proposed in this pull request?
Add namespaces support in JDBC v2 Table Catalog by making ```JDBCTableCatalog``` extends```SupportsNamespaces```

### Why are the changes needed?
make v2 JDBC implementation complete

### Does this PR introduce _any_ user-facing change?
Yes. Add the following to  ```JDBCTableCatalog```

- listNamespaces
- listNamespaces(String[] namespace)
- namespaceExists(String[] namespace)
- loadNamespaceMetadata(String[] namespace)
- createNamespace
- alterNamespace
- dropNamespace

### How was this patch tested?
Add new docker tests

Closes #30473 from huaxingao/name_space.

Authored-by: Huaxin Gao <huaxing@us.ibm.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/jdbc/v2/PostgresNamespaceSuite.scala  |  59 +++++++
 .../sql/jdbc/v2/V2JDBCNamespaceTest.scala     |  62 ++++++++
 .../datasources/jdbc/JdbcUtils.scala          |  49 ++++++
 .../v2/jdbc/JDBCTableCatalog.scala            | 144 +++++++++++++++++-
 .../apache/spark/sql/jdbc/JdbcDialects.scala  |  12 +-
 5 files changed, 317 insertions(+), 9 deletions(-)
 create mode 100644 external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala
 create mode 100644 external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala
new file mode 100644
index 0000000000000..e534df84ce6fa
--- /dev/null
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc.v2
+
+import java.sql.Connection
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.tags.DockerTest
+
+/**
+ * To run this test suite for a specific version (e.g., postgres:13.0):
+ * {{{
+ *   POSTGRES_DOCKER_IMAGE_NAME=postgres:13.0
+ *     ./build/sbt -Pdocker-integration-tests "testOnly *v2.PostgresNamespaceSuite"
+ * }}}
+ */
+@DockerTest
+class PostgresNamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNamespaceTest {
+  override val db = new DatabaseOnDocker {
+    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:13.0-alpine")
+    override val env = Map(
+      "POSTGRES_PASSWORD" -> "rootpass"
+    )
+    override val usesIpc = false
+    override val jdbcPort = 5432
+    override def getJdbcUrl(ip: String, port: Int): String =
+      s"jdbc:postgresql://$ip:$port/postgres?user=postgres&password=rootpass"
+  }
+
+  val map = new CaseInsensitiveStringMap(
+    Map("url" -> db.getJdbcUrl(dockerIp, externalPort),
+      "driver" -> "org.postgresql.Driver").asJava)
+
+  catalog.initialize("postgresql", map)
+
+  override def dataPreparation(conn: Connection): Unit = {}
+
+  override def builtinNamespaces: Array[Array[String]] = {
+    Array(Array("information_schema"), Array("pg_catalog"), Array("public"))
+  }
+}
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala
new file mode 100644
index 0000000000000..979b0784f0448
--- /dev/null
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc.v2
+
+import scala.collection.JavaConverters._
+
+import org.apache.log4j.Level
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.connector.catalog.NamespaceChange
+import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.tags.DockerTest
+
+@DockerTest
+private[v2] trait V2JDBCNamespaceTest extends SharedSparkSession {
+  val catalog = new JDBCTableCatalog()
+
+  def builtinNamespaces: Array[Array[String]]
+
+  test("listNamespaces: basic behavior") {
+    catalog.createNamespace(Array("foo"), Map("comment" -> "test comment").asJava)
+    assert(catalog.listNamespaces() === Array(Array("foo")) ++ builtinNamespaces)
+    assert(catalog.listNamespaces(Array("foo")) === Array())
+    assert(catalog.namespaceExists(Array("foo")) === true)
+
+    val logAppender = new LogAppender("catalog comment")
+    withLogAppender(logAppender) {
+      catalog.alterNamespace(Array("foo"), NamespaceChange
+        .setProperty("comment", "comment for foo"))
+      catalog.alterNamespace(Array("foo"), NamespaceChange.removeProperty("comment"))
+    }
+    val createCommentWarning = logAppender.loggingEvents
+      .filter(_.getLevel == Level.WARN)
+      .map(_.getRenderedMessage)
+      .exists(_.contains("catalog comment"))
+    assert(createCommentWarning === false)
+
+    catalog.dropNamespace(Array("foo"))
+    assert(catalog.namespaceExists(Array("foo")) === false)
+    assert(catalog.listNamespaces() === builtinNamespaces)
+    val msg = intercept[AnalysisException] {
+      catalog.listNamespaces(Array("foo"))
+    }.getMessage
+    assert(msg.contains("Namespace 'foo' not found"))
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index 5dd0d2bd74838..216fb02740500 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -927,6 +927,55 @@ object JdbcUtils extends Logging {
     }
   }
 
+  /**
+   * Creates a namespace.
+   */
+  def createNamespace(
+      conn: Connection,
+      options: JDBCOptions,
+      namespace: String,
+      comment: String): Unit = {
+    val dialect = JdbcDialects.get(options.url)
+    executeStatement(conn, options, s"CREATE SCHEMA ${dialect.quoteIdentifier(namespace)}")
+    if (!comment.isEmpty) createNamespaceComment(conn, options, namespace, comment)
+  }
+
+  def createNamespaceComment(
+      conn: Connection,
+      options: JDBCOptions,
+      namespace: String,
+      comment: String): Unit = {
+    val dialect = JdbcDialects.get(options.url)
+    try {
+      executeStatement(
+        conn, options, dialect.getSchemaCommentQuery(namespace, comment))
+    } catch {
+      case e: Exception =>
+        logWarning("Cannot create JDBC catalog comment. The catalog comment will be ignored.")
+    }
+  }
+
+  def removeNamespaceComment(
+      conn: Connection,
+      options: JDBCOptions,
+      namespace: String): Unit = {
+    val dialect = JdbcDialects.get(options.url)
+    try {
+      executeStatement(conn, options, dialect.removeSchemaCommentQuery(namespace))
+    } catch {
+      case e: Exception =>
+        logWarning("Cannot drop JDBC catalog comment.")
+    }
+  }
+
+  /**
+   * Drops a namespace from the JDBC database.
+   */
+  def dropNamespace(conn: Connection, options: JDBCOptions, namespace: String): Unit = {
+    val dialect = JdbcDialects.get(options.url)
+    executeStatement(conn, options, s"DROP SCHEMA ${dialect.quoteIdentifier(namespace)}")
+  }
+
   private def executeStatement(conn: Connection, options: JDBCOptions, sql: String): Unit = {
     val statement = conn.createStatement
     try {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
index 63f802363f7c0..27558e5b0d61b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
@@ -17,13 +17,16 @@
 package org.apache.spark.sql.execution.datasources.v2.jdbc
 
 import java.sql.{Connection, SQLException}
+import java.util
 
 import scala.collection.JavaConverters._
+import scala.collection.mutable
+import scala.collection.mutable.ArrayBuilder
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis.{NoSuchNamespaceException, NoSuchTableException}
-import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog, TableChange}
+import org.apache.spark.sql.catalyst.analysis.{NamespaceAlreadyExistsException, NoSuchNamespaceException, NoSuchTableException}
+import org.apache.spark.sql.connector.catalog.{Identifier, NamespaceChange, SupportsNamespaces, Table, TableCatalog, TableChange}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcOptionsInWrite, JDBCRDD, JdbcUtils}
 import org.apache.spark.sql.internal.SQLConf
@@ -31,7 +34,8 @@ import org.apache.spark.sql.jdbc.{JdbcDialect, JdbcDialects}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
-class JDBCTableCatalog extends TableCatalog with Logging {
+class JDBCTableCatalog extends TableCatalog with SupportsNamespaces with Logging {
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper
 
   private var catalogName: String = null
   private var options: JDBCOptions = _
@@ -125,12 +129,12 @@ class JDBCTableCatalog extends TableCatalog with Logging {
     if (!properties.isEmpty) {
       properties.asScala.map {
         case (k, v) => k match {
-          case "comment" => tableComment = v
-          case "provider" =>
+          case TableCatalog.PROP_COMMENT => tableComment = v
+          case TableCatalog.PROP_PROVIDER =>
             throw new AnalysisException("CREATE TABLE ... USING ... is not supported in" +
               " JDBC catalog.")
-          case "owner" => // owner is ignored. It is default to current user name.
-          case "location" =>
+          case TableCatalog.PROP_OWNER => // owner is ignored. It is default to current user name.
+          case TableCatalog.PROP_LOCATION =>
             throw new AnalysisException("CREATE TABLE ... LOCATION ... is not supported in" +
               " JDBC catalog.")
           case _ => tableProperties = tableProperties + " " + s"$k $v"
@@ -171,6 +175,132 @@ class JDBCTableCatalog extends TableCatalog with Logging {
     }
   }
 
+  override def namespaceExists(namespace: Array[String]): Boolean = namespace match {
+    case Array(db) =>
+      withConnection { conn =>
+        val rs = conn.getMetaData.getSchemas(null, db)
+        while (rs.next()) {
+          if (rs.getString(1) == db) return true;
+        }
+        false
+      }
+    case _ => false
+  }
+
+  override def listNamespaces(): Array[Array[String]] = {
+    withConnection { conn =>
+      val schemaBuilder = ArrayBuilder.make[Array[String]]
+      val rs = conn.getMetaData.getSchemas()
+      while (rs.next()) {
+        schemaBuilder += Array(rs.getString(1))
+      }
+      schemaBuilder.result
+    }
+  }
+
+  override def listNamespaces(namespace: Array[String]): Array[Array[String]] = {
+    namespace match {
+      case Array() =>
+        listNamespaces()
+      case Array(_) if namespaceExists(namespace) =>
+        Array()
+      case _ =>
+        throw new NoSuchNamespaceException(namespace)
+    }
+  }
+
+  override def loadNamespaceMetadata(namespace: Array[String]): util.Map[String, String] = {
+    namespace match {
+      case Array(db) =>
+        if (!namespaceExists(namespace)) throw new NoSuchNamespaceException(db)
+        mutable.HashMap[String, String]().asJava
+
+      case _ =>
+        throw new NoSuchNamespaceException(namespace)
+    }
+  }
+
+  override def createNamespace(
+      namespace: Array[String],
+      metadata: util.Map[String, String]): Unit = namespace match {
+    case Array(db) if !namespaceExists(namespace) =>
+      var comment = ""
+      if (!metadata.isEmpty) {
+        metadata.asScala.map {
+          case (k, v) => k match {
+            case SupportsNamespaces.PROP_COMMENT => comment = v
+            case SupportsNamespaces.PROP_OWNER => // ignore
+            case SupportsNamespaces.PROP_LOCATION =>
+              throw new AnalysisException("CREATE NAMESPACE ... LOCATION ... is not supported in" +
+                " JDBC catalog.")
+            case _ =>
+              throw new AnalysisException(s"CREATE NAMESPACE with property $k is not supported in" +
+                " JDBC catalog.")
+          }
+        }
+      }
+      withConnection { conn =>
+        classifyException(s"Failed create name space: $db") {
+          JdbcUtils.createNamespace(conn, options, db, comment)
+        }
+      }
+
+    case Array(_) =>
+      throw new NamespaceAlreadyExistsException(namespace)
+
+    case _ =>
+      throw new IllegalArgumentException(s"Invalid namespace name: ${namespace.quoted}")
+  }
+
+  override def alterNamespace(namespace: Array[String], changes: NamespaceChange*): Unit = {
+    namespace match {
+      case Array(db) =>
+        changes.foreach {
+          case set: NamespaceChange.SetProperty =>
+            if (set.property() == SupportsNamespaces.PROP_COMMENT) {
+              withConnection { conn =>
+                JdbcUtils.createNamespaceComment(conn, options, db, set.value)
+              }
+            } else {
+              throw new AnalysisException(s"SET NAMESPACE with property ${set.property} " +
+                "is not supported in JDBC catalog.")
+            }
+
+          case unset: NamespaceChange.RemoveProperty =>
+            if (unset.property() == SupportsNamespaces.PROP_COMMENT) {
+              withConnection { conn =>
+                JdbcUtils.removeNamespaceComment(conn, options, db)
+              }
+            } else {
+              throw new AnalysisException(s"Remove NAMESPACE property ${unset.property} " +
+                "is not supported in JDBC catalog.")
+            }
+
+          case _ =>
+            throw new AnalysisException(s"Unsupported NamespaceChange $changes in JDBC catalog.")
+        }
+
+      case _ =>
+        throw new NoSuchNamespaceException(namespace)
+    }
+  }
+
+  override def dropNamespace(namespace: Array[String]): Boolean = namespace match {
+    case Array(db) if namespaceExists(namespace) =>
+      if (listTables(Array(db)).nonEmpty) {
+        throw new IllegalStateException(s"Namespace ${namespace.quoted} is not empty")
+      }
+      withConnection { conn =>
+        classifyException(s"Failed drop name space: $db") {
+          JdbcUtils.dropNamespace(conn, options, db)
+          true
+        }
+      }
+
+    case _ =>
+      throw new NoSuchNamespaceException(namespace)
+  }
+
   private def checkNamespace(namespace: Array[String]): Unit = {
     // In JDBC there is no nested database/schema
     if (namespace.length > 1) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index b12882b72fb66..ead0a1aa3a243 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.jdbc
 
-import java.sql.{Connection, Date, SQLFeatureNotSupportedException, Timestamp}
+import java.sql.{Connection, Date, Timestamp}
 
 import scala.collection.mutable.ArrayBuilder
 
@@ -232,7 +232,7 @@ abstract class JdbcDialect extends Serializable with Logging{
           val name = updateNull.fieldNames
           updateClause += getUpdateColumnNullabilityQuery(tableName, name(0), updateNull.nullable())
         case _ =>
-          throw new SQLFeatureNotSupportedException(s"Unsupported TableChange $change")
+          throw new AnalysisException(s"Unsupported TableChange $change in JDBC catalog.")
       }
     }
     updateClause.result()
@@ -270,6 +270,14 @@ abstract class JdbcDialect extends Serializable with Logging{
     s"COMMENT ON TABLE $table IS '$comment'"
   }
 
+  def getSchemaCommentQuery(schema: String, comment: String): String = {
+    s"COMMENT ON SCHEMA ${quoteIdentifier(schema)} IS '$comment'"
+  }
+
+  def removeSchemaCommentQuery(schema: String): String = {
+    s"COMMENT ON SCHEMA ${quoteIdentifier(schema)} IS NULL"
+  }
+
   /**
    * Gets a dialect exception, classifies it and wraps it by `AnalysisException`.
    * @param message The error message to be placed to the returned exception.

From e8380665c7e3aca446631964f49e09f264dee1c2 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Fri, 4 Dec 2020 16:24:41 +0900
Subject: [PATCH 0661/1009] [SPARK-33658][SQL] Suggest using Datetime
 conversion functions for invalid ANSI casting

### What changes were proposed in this pull request?

Suggest users using Datetime conversion functions in the error message of invalid ANSI explicit casting.

### Why are the changes needed?

In ANSI mode, explicit cast between DateTime types and Numeric types is not allowed.
As of now, we have introduced new functions `UNIX_SECONDS`/`UNIX_MILLIS`/`UNIX_MICROS`/`UNIX_DATE`/`DATE_FROM_UNIX_DATE`, we can show suggestions to users so that they can complete these type conversions precisely and easily in ANSI mode.

### Does this PR introduce _any_ user-facing change?

Yes, better error messages

### How was this patch tested?

Unit test

Closes #30603 from gengliangwang/improveErrorMsgOfExplicitCast.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/sql-ref-ansi-compliance.md               | 11 +++++++
 .../spark/sql/catalyst/expressions/Cast.scala | 30 +++++++++++++++----
 .../sql/catalyst/expressions/CastSuite.scala  | 12 ++++++--
 3 files changed, 46 insertions(+), 7 deletions(-)

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index c13ea2b167d93..c3e17dc22eed0 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -96,6 +96,10 @@ java.lang.NumberFormatException: invalid input syntax for type numeric: a
 SELECT CAST(2147483648L AS INT);
 java.lang.ArithmeticException: Casting 2147483648 to int causes overflow
 
+SELECT CAST(DATE'2020-01-01' AS INT)
+org.apache.spark.sql.AnalysisException: cannot resolve 'CAST(DATE '2020-01-01' AS INT)' due to data type mismatch: cannot cast date to int.
+To convert values from date to int, you can use function UNIX_DATE instead.
+
 -- `spark.sql.ansi.enabled=false` (This is a default behaviour)
 SELECT CAST('a' AS INT);
 +--------------+
@@ -111,6 +115,13 @@ SELECT CAST(2147483648L AS INT);
 |            -2147483648|
 +-----------------------+
 
+SELECT CAST(DATE'2020-01-01' AS INT)
++------------------------------+
+|CAST(DATE '2020-01-01' AS INT)|
++------------------------------+
+|                          null|
++------------------------------+
+
 -- Examples of store assignment rules
 CREATE TABLE t (v INT);
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 1b2e2db932970..72bd9ca4d3d1c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -1894,6 +1894,19 @@ object AnsiCast {
     case _ => false
   }
 
+  // Show suggestion on how to complete the disallowed explicit casting with built-in type
+  // conversion functions.
+  private def suggestionOnConversionFunctions (
+      from: DataType,
+      to: DataType,
+      functionNames: String): String = {
+    // scalastyle:off line.size.limit
+    s"""cannot cast ${from.catalogString} to ${to.catalogString}.
+       |To convert values from ${from.catalogString} to ${to.catalogString}, you can use $functionNames instead.
+       |""".stripMargin
+    // scalastyle:on line.size.limit
+  }
+
   def typeCheckFailureMessage(
       from: DataType,
       to: DataType,
@@ -1901,12 +1914,19 @@ object AnsiCast {
       fallbackConfValue: String): String =
     (from, to) match {
       case (_: NumericType, TimestampType) =>
-        // scalastyle:off line.size.limit
-        s"""
-           | cannot cast ${from.catalogString} to ${to.catalogString}.
-           | To convert values from ${from.catalogString} to ${to.catalogString}, you can use functions TIMESTAMP_SECONDS/TIMESTAMP_MILLIS/TIMESTAMP_MICROS instead.
-           |""".stripMargin
+        suggestionOnConversionFunctions(from, to,
+          "functions TIMESTAMP_SECONDS/TIMESTAMP_MILLIS/TIMESTAMP_MICROS")
+
+      case (TimestampType, _: NumericType) =>
+        suggestionOnConversionFunctions(from, to, "functions UNIX_SECONDS/UNIX_MILLIS/UNIX_MICROS")
+
+      case (_: NumericType, DateType) =>
+        suggestionOnConversionFunctions(from, to, "function DATE_FROM_UNIX_DATE")
+
+      case (DateType, _: NumericType) =>
+        suggestionOnConversionFunctions(from, to, "function UNIX_DATE")
 
+      // scalastyle:off line.size.limit
       case (_: ArrayType, StringType) =>
         s"""
            | cannot cast ${from.catalogString} to ${to.catalogString} with ANSI mode on.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index 35db25ec9342c..e46599dc19a8b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -850,18 +850,26 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase {
   test("ANSI mode: disallow type conversions between Numeric types and Timestamp type") {
     import DataTypeTestUtils.numericTypes
     checkInvalidCastFromNumericType(TimestampType)
+    var errorMsg =
+      "you can use functions TIMESTAMP_SECONDS/TIMESTAMP_MILLIS/TIMESTAMP_MICROS instead"
+    verifyCastFailure(cast(Literal(0L), TimestampType), Some(errorMsg))
+
     val timestampLiteral = Literal(1L, TimestampType)
+    errorMsg = "you can use functions UNIX_SECONDS/UNIX_MILLIS/UNIX_MICROS instead."
     numericTypes.foreach { numericType =>
-      verifyCastFailure(cast(timestampLiteral, numericType))
+      verifyCastFailure(cast(timestampLiteral, numericType), Some(errorMsg))
     }
   }
 
   test("ANSI mode: disallow type conversions between Numeric types and Date type") {
     import DataTypeTestUtils.numericTypes
     checkInvalidCastFromNumericType(DateType)
+    var errorMsg = "you can use function DATE_FROM_UNIX_DATE instead"
+    verifyCastFailure(cast(Literal(0L), DateType), Some(errorMsg))
     val dateLiteral = Literal(1, DateType)
+    errorMsg = "you can use function UNIX_DATE instead"
     numericTypes.foreach { numericType =>
-      verifyCastFailure(cast(dateLiteral, numericType))
+      verifyCastFailure(cast(dateLiteral, numericType), Some(errorMsg))
     }
   }
 

From 94c144bdd05d6c751dcd907161e1b965e637f69c Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Fri, 4 Dec 2020 16:26:07 +0900
Subject: [PATCH 0662/1009] [SPARK-33571][SQL][DOCS] Add a ref to INT96 config
 from the doc for `spark.sql.legacy.parquet.datetimeRebaseModeInWrite/Read`

### What changes were proposed in this pull request?
For the SQL configs `spark.sql.legacy.parquet.datetimeRebaseModeInWrite` and `spark.sql.legacy.parquet.datetimeRebaseModeInRead`, improve their descriptions by:
1. Explicitly document on which parquet types, those configs influence on
2. Refer to corresponding configs for `INT96`

### Why are the changes needed?
To avoid user confusions like reposted in SPARK-33571, and make the config description more precise.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running `./dev/scalastyle`.

Closes #30596 from MaxGekk/clarify-rebase-docs.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../apache/spark/sql/internal/SQLConf.scala   | 50 +++++++++++--------
 1 file changed, 28 insertions(+), 22 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 496065f85fbbf..4442581b77811 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2746,20 +2746,6 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
-  val LEGACY_PARQUET_REBASE_MODE_IN_WRITE =
-    buildConf("spark.sql.legacy.parquet.datetimeRebaseModeInWrite")
-      .internal()
-      .doc("When LEGACY, Spark will rebase dates/timestamps from Proleptic Gregorian calendar " +
-        "to the legacy hybrid (Julian + Gregorian) calendar when writing Parquet files. " +
-        "When CORRECTED, Spark will not do rebase and write the dates/timestamps as it is. " +
-        "When EXCEPTION, which is the default, Spark will fail the writing if it sees " +
-        "ancient dates/timestamps that are ambiguous between the two calendars.")
-      .version("3.0.0")
-      .stringConf
-      .transform(_.toUpperCase(Locale.ROOT))
-      .checkValues(LegacyBehaviorPolicy.values.map(_.toString))
-      .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString)
-
   val LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE =
     buildConf("spark.sql.legacy.parquet.int96RebaseModeInWrite")
       .internal()
@@ -2774,15 +2760,17 @@ object SQLConf {
       .checkValues(LegacyBehaviorPolicy.values.map(_.toString))
       .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString)
 
-  val LEGACY_PARQUET_REBASE_MODE_IN_READ =
-    buildConf("spark.sql.legacy.parquet.datetimeRebaseModeInRead")
+  val LEGACY_PARQUET_REBASE_MODE_IN_WRITE =
+    buildConf("spark.sql.legacy.parquet.datetimeRebaseModeInWrite")
       .internal()
-      .doc("When LEGACY, Spark will rebase dates/timestamps from the legacy hybrid (Julian + " +
-        "Gregorian) calendar to Proleptic Gregorian calendar when reading Parquet files. " +
-        "When CORRECTED, Spark will not do rebase and read the dates/timestamps as it is. " +
-        "When EXCEPTION, which is the default, Spark will fail the reading if it sees " +
-        "ancient dates/timestamps that are ambiguous between the two calendars. This config is " +
-        "only effective if the writer info (like Spark, Hive) of the Parquet files is unknown.")
+      .doc("When LEGACY, Spark will rebase dates/timestamps from Proleptic Gregorian calendar " +
+        "to the legacy hybrid (Julian + Gregorian) calendar when writing Parquet files. " +
+        "When CORRECTED, Spark will not do rebase and write the dates/timestamps as it is. " +
+        "When EXCEPTION, which is the default, Spark will fail the writing if it sees " +
+        "ancient dates/timestamps that are ambiguous between the two calendars. " +
+        "This config influences on writes of the following parquet logical types: DATE, " +
+        "TIMESTAMP_MILLIS, TIMESTAMP_MICROS. The INT96 type has the separate config: " +
+        s"${LEGACY_PARQUET_INT96_REBASE_MODE_IN_WRITE.key}.")
       .version("3.0.0")
       .stringConf
       .transform(_.toUpperCase(Locale.ROOT))
@@ -2804,6 +2792,24 @@ object SQLConf {
       .checkValues(LegacyBehaviorPolicy.values.map(_.toString))
       .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString)
 
+  val LEGACY_PARQUET_REBASE_MODE_IN_READ =
+    buildConf("spark.sql.legacy.parquet.datetimeRebaseModeInRead")
+      .internal()
+      .doc("When LEGACY, Spark will rebase dates/timestamps from the legacy hybrid (Julian + " +
+        "Gregorian) calendar to Proleptic Gregorian calendar when reading Parquet files. " +
+        "When CORRECTED, Spark will not do rebase and read the dates/timestamps as it is. " +
+        "When EXCEPTION, which is the default, Spark will fail the reading if it sees " +
+        "ancient dates/timestamps that are ambiguous between the two calendars. This config is " +
+        "only effective if the writer info (like Spark, Hive) of the Parquet files is unknown. " +
+        "This config influences on reads of the following parquet logical types: DATE, " +
+        "TIMESTAMP_MILLIS, TIMESTAMP_MICROS. The INT96 type has the separate config: " +
+        s"${LEGACY_PARQUET_INT96_REBASE_MODE_IN_READ.key}.")
+      .version("3.0.0")
+      .stringConf
+      .transform(_.toUpperCase(Locale.ROOT))
+      .checkValues(LegacyBehaviorPolicy.values.map(_.toString))
+      .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString)
+
   val LEGACY_AVRO_REBASE_MODE_IN_WRITE =
     buildConf("spark.sql.legacy.avro.datetimeRebaseModeInWrite")
       .internal()

From 325abf7957373161d2cf0921d35567235186d6eb Mon Sep 17 00:00:00 2001
From: Yuanjian Li <yuanjian.li@databricks.com>
Date: Fri, 4 Dec 2020 16:45:55 +0900
Subject: [PATCH 0663/1009] [SPARK-33577][SS] Add support for V1Table in stream
 writer table API and create table if not exist by default

### What changes were proposed in this pull request?
After SPARK-32896, we have table API for stream writer but only support DataSource v2 tables. Here we add the following enhancements:

- Create non-existing tables by default
- Support both managed and external V1Tables

### Why are the changes needed?
Make the API covers more use cases. Especially for the file provider based tables.

### Does this PR introduce _any_ user-facing change?
Yes, new features added.

### How was this patch tested?
Add new UTs.

Closes #30521 from xuanyuanking/SPARK-33577.

Authored-by: Yuanjian Li <yuanjian.li@databricks.com>
Signed-off-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
---
 .../sql/streaming/DataStreamWriter.scala      | 101 ++++++++----
 .../test/DataStreamTableAPISuite.scala        | 151 ++++++++++++++----
 2 files changed, 188 insertions(+), 64 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index 9e3599712fde5..01e626e5436a4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -22,12 +22,16 @@ import java.util.concurrent.TimeoutException
 
 import scala.collection.JavaConverters._
 
+import org.apache.hadoop.fs.Path
+
 import org.apache.spark.annotation.Evolving
 import org.apache.spark.api.java.function.VoidFunction2
 import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType}
+import org.apache.spark.sql.catalyst.plans.logical.CreateTableStatement
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table, TableProvider}
+import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table, TableProvider, V1Table, V2TableWithV1Fallback}
 import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.DataSource
@@ -298,52 +302,85 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
 
   /**
    * Starts the execution of the streaming query, which will continually output results to the given
-   * table as new data arrives. The returned [[StreamingQuery]] object can be used to interact with
-   * the stream.
+   * table as new data arrives. A new table will be created if the table not exists. The returned
+   * [[StreamingQuery]] object can be used to interact with the stream.
    *
    * @since 3.1.0
    */
   @throws[TimeoutException]
   def toTable(tableName: String): StreamingQuery = {
-    this.source = SOURCE_NAME_TABLE
     this.tableName = tableName
-    startInternal(None)
-  }
 
-  private def startInternal(path: Option[String]): StreamingQuery = {
-    if (source.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) {
-      throw new AnalysisException("Hive data source can only be used with tables, you can not " +
-        "write files of Hive data source directly.")
-    }
+    import df.sparkSession.sessionState.analyzer.CatalogAndIdentifier
 
-    if (source == SOURCE_NAME_TABLE) {
-      assertNotPartitioned(SOURCE_NAME_TABLE)
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+    val originalMultipartIdentifier = df.sparkSession.sessionState.sqlParser
+      .parseMultipartIdentifier(tableName)
+    val CatalogAndIdentifier(catalog, identifier) = originalMultipartIdentifier
 
-      import df.sparkSession.sessionState.analyzer.CatalogAndIdentifier
+    // Currently we don't create a logical streaming writer node in logical plan, so cannot rely
+    // on analyzer to resolve it. Directly lookup only for temp view to provide clearer message.
+    // TODO (SPARK-27484): we should add the writing node before the plan is analyzed.
+    if (df.sparkSession.sessionState.catalog.isTempView(originalMultipartIdentifier)) {
+      throw new AnalysisException(s"Temporary view $tableName doesn't support streaming write")
+    }
 
+    if (!catalog.asTableCatalog.tableExists(identifier)) {
       import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-      val originalMultipartIdentifier = df.sparkSession.sessionState.sqlParser
-        .parseMultipartIdentifier(tableName)
-      val CatalogAndIdentifier(catalog, identifier) = originalMultipartIdentifier
-
-      // Currently we don't create a logical streaming writer node in logical plan, so cannot rely
-      // on analyzer to resolve it. Directly lookup only for temp view to provide clearer message.
-      // TODO (SPARK-27484): we should add the writing node before the plan is analyzed.
-      if (df.sparkSession.sessionState.catalog.isTempView(originalMultipartIdentifier)) {
-        throw new AnalysisException(s"Temporary view $tableName doesn't support streaming write")
-      }
+      /**
+       * Note, currently the new table creation by this API doesn't fully cover the V2 table.
+       * TODO (SPARK-33638): Full support of v2 table creation
+       */
+      val cmd = CreateTableStatement(
+        originalMultipartIdentifier,
+        df.schema.asNullable,
+        partitioningColumns.getOrElse(Nil).asTransforms.toSeq,
+        None,
+        Map.empty[String, String],
+        Some(source),
+        Map.empty[String, String],
+        extraOptions.get("path"),
+        None,
+        None,
+        external = false,
+        ifNotExists = false)
+      Dataset.ofRows(df.sparkSession, cmd)
+    }
 
-      val tableInstance = catalog.asTableCatalog.loadTable(identifier)
+    val tableInstance = catalog.asTableCatalog.loadTable(identifier)
 
-      import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
-      val sink = tableInstance match {
-        case t: SupportsWrite if t.supports(STREAMING_WRITE) => t
-        case t => throw new AnalysisException(s"Table $tableName doesn't support streaming " +
-          s"write - $t")
+    def writeToV1Table(table: CatalogTable): StreamingQuery = {
+      if (table.tableType == CatalogTableType.VIEW) {
+        throw new AnalysisException(s"Streaming into views $tableName is not supported.")
+      }
+      require(table.provider.isDefined)
+      if (source != table.provider.get) {
+        throw new AnalysisException(s"The input source($source) is different from the table " +
+          s"$tableName's data source provider(${table.provider.get}).")
       }
+      format(table.provider.get)
+        .option("path", new Path(table.location).toString).start()
+    }
 
-      startQuery(sink, extraOptions)
-    } else if (source == SOURCE_NAME_MEMORY) {
+    import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
+    tableInstance match {
+      case t: SupportsWrite if t.supports(STREAMING_WRITE) => startQuery(t, extraOptions)
+      case t: V2TableWithV1Fallback =>
+        writeToV1Table(t.v1Table)
+      case t: V1Table =>
+        writeToV1Table(t.v1Table)
+      case t => throw new AnalysisException(s"Table $tableName doesn't support streaming " +
+        s"write - $t")
+    }
+  }
+
+  private def startInternal(path: Option[String]): StreamingQuery = {
+    if (source.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) {
+      throw new AnalysisException("Hive data source can only be used with tables, you can not " +
+        "write files of Hive data source directly.")
+    }
+
+    if (source == SOURCE_NAME_MEMORY) {
       assertNotPartitioned(SOURCE_NAME_MEMORY)
       if (extraOptions.get("queryName").isEmpty) {
         throw new AnalysisException("queryName must be specified for memory sink")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
index bf850432d5c0e..0296366f3578b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
@@ -26,7 +26,7 @@ import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException}
+import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
 import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
 import org.apache.spark.sql.connector.{FakeV2Provider, InMemoryTableCatalog, InMemoryTableSessionCatalog}
@@ -39,6 +39,7 @@ import org.apache.spark.sql.streaming.StreamTest
 import org.apache.spark.sql.streaming.sources.FakeScanBuilder
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.util.Utils
 
 class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
   import testImplicits._
@@ -175,21 +176,24 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
   test("write: write to table with custom catalog & no namespace") {
     val tableIdentifier = "testcat.table_name"
 
-    spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING foo")
-    checkAnswer(spark.table(tableIdentifier), Seq.empty)
+    withTable(tableIdentifier) {
+      spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING foo")
+      checkAnswer(spark.table(tableIdentifier), Seq.empty)
 
-    runTestWithStreamAppend(tableIdentifier)
+      runTestWithStreamAppend(tableIdentifier)
+    }
   }
 
   test("write: write to table with custom catalog & namespace") {
     spark.sql("CREATE NAMESPACE testcat.ns")
-
     val tableIdentifier = "testcat.ns.table_name"
 
-    spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING foo")
-    checkAnswer(spark.table(tableIdentifier), Seq.empty)
+    withTable(tableIdentifier) {
+      spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING foo")
+      checkAnswer(spark.table(tableIdentifier), Seq.empty)
 
-    runTestWithStreamAppend(tableIdentifier)
+      runTestWithStreamAppend(tableIdentifier)
+    }
   }
 
   test("write: write to table with default session catalog") {
@@ -200,35 +204,19 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
     spark.sql("CREATE NAMESPACE ns")
 
     val tableIdentifier = "ns.table_name"
-    spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING $v2Source")
-    checkAnswer(spark.table(tableIdentifier), Seq.empty)
+    withTable(tableIdentifier) {
+      spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING $v2Source")
+      checkAnswer(spark.table(tableIdentifier), Seq.empty)
 
-    runTestWithStreamAppend(tableIdentifier)
+      runTestWithStreamAppend(tableIdentifier)
+    }
   }
 
   test("write: write to non-exist table with custom catalog") {
     val tableIdentifier = "testcat.nonexisttable"
-    spark.sql("CREATE NAMESPACE testcat.ns")
-
-    withTempDir { checkpointDir =>
-      val exc = intercept[NoSuchTableException] {
-        runStreamQueryAppendMode(tableIdentifier, checkpointDir, Seq.empty, Seq.empty)
-      }
-      assert(exc.getMessage.contains("nonexisttable"))
-    }
-  }
-
-  test("write: write to file provider based table isn't allowed yet") {
-    val tableIdentifier = "table_name"
-
-    spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING parquet")
-    checkAnswer(spark.table(tableIdentifier), Seq.empty)
 
-    withTempDir { checkpointDir =>
-      val exc = intercept[AnalysisException] {
-        runStreamQueryAppendMode(tableIdentifier, checkpointDir, Seq.empty, Seq.empty)
-      }
-      assert(exc.getMessage.contains("doesn't support streaming write"))
+    withTable(tableIdentifier) {
+      runTestWithStreamAppend(tableIdentifier)
     }
   }
 
@@ -262,8 +250,107 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
       val exc = intercept[AnalysisException] {
         runStreamQueryAppendMode(viewIdentifier, checkpointDir, Seq.empty, Seq.empty)
       }
-      assert(exc.getMessage.contains("doesn't support streaming write"))
+      assert(exc.getMessage.contains(s"Streaming into views $viewIdentifier is not supported"))
+    }
+  }
+
+  test("write: write to an external table") {
+    withTempDir { dir =>
+      val tableName = "stream_test"
+      withTable(tableName) {
+        checkForStreamTable(Some(dir), tableName)
+      }
+    }
+  }
+
+  test("write: write to a managed table") {
+    val tableName = "stream_test"
+    withTable(tableName) {
+      checkForStreamTable(None, tableName)
+    }
+  }
+
+  test("write: write to an external table with existing path") {
+    withTempDir { dir =>
+      val tableName = "stream_test"
+      withTable(tableName) {
+        // The file written by batch will not be seen after the table was written by a streaming
+        // query. This is because we loads files from the metadata log instead of listing them
+        // using HDFS API.
+        Seq(4, 5, 6).toDF("value").write.format("parquet")
+          .option("path", dir.getCanonicalPath).saveAsTable(tableName)
+
+        checkForStreamTable(Some(dir), tableName)
+      }
+    }
+  }
+
+  test("write: write to a managed table with existing path") {
+    val tableName = "stream_test"
+    withTable(tableName) {
+      // The file written by batch will not be seen after the table was written by a streaming
+      // query. This is because we loads files from the metadata log instead of listing them
+      // using HDFS API.
+      Seq(4, 5, 6).toDF("value").write.format("parquet").saveAsTable(tableName)
+
+      checkForStreamTable(None, tableName)
+    }
+  }
+
+  test("write: write to an external path and create table") {
+    withTempDir { dir =>
+      val tableName = "stream_test"
+      withTable(tableName) {
+        // The file written by batch will not be seen after the table was written by a streaming
+        // query. This is because we loads files from the metadata log instead of listing them
+        // using HDFS API.
+        Seq(4, 5, 6).toDF("value").write
+          .mode("append").format("parquet").save(dir.getCanonicalPath)
+
+        checkForStreamTable(Some(dir), tableName)
+      }
+    }
+  }
+
+  test("write: write to table with different format shouldn't be allowed") {
+    val tableName = "stream_test"
+
+    spark.sql(s"CREATE TABLE $tableName (id bigint, data string) USING json")
+    checkAnswer(spark.table(tableName), Seq.empty)
+
+    withTempDir { checkpointDir =>
+      val exc = intercept[AnalysisException] {
+        runStreamQueryAppendMode(tableName, checkpointDir, Seq.empty, Seq.empty)
+      }
+      assert(exc.getMessage.contains("The input source(parquet) is different from the table " +
+        s"$tableName's data source provider(json)"))
+    }
+  }
+
+  private def checkForStreamTable(dir: Option[File], tableName: String): Unit = {
+    val memory = MemoryStream[Int]
+    val dsw = memory.toDS().writeStream.format("parquet")
+    dir.foreach { output =>
+      dsw.option("path", output.getCanonicalPath)
+    }
+    val sq = dsw
+      .option("checkpointLocation", Utils.createTempDir().getCanonicalPath)
+      .toTable(tableName)
+    memory.addData(1, 2, 3)
+    sq.processAllAvailable()
+
+    checkDataset(
+      spark.table(tableName).as[Int],
+      1, 2, 3)
+    val catalogTable = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
+    val path = if (dir.nonEmpty) {
+      dir.get
+    } else {
+      new File(catalogTable.location)
     }
+    checkDataset(
+      spark.read.format("parquet").load(path.getCanonicalPath).as[Int],
+      1, 2, 3)
   }
 
   private def runTestWithStreamAppend(tableIdentifier: String) = {

From 91baab77f7e0a5102ac069846f0e2920bb2dd15a Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Thu, 3 Dec 2020 23:47:43 -0800
Subject: [PATCH 0664/1009] [SPARK-33656][TESTS] Add option to keep container
 after tests finish for DockerJDBCIntegrationSuites for debug

### What changes were proposed in this pull request?

This PR add an option to keep container after DockerJDBCIntegrationSuites (e.g. DB2IntegrationSuite, PostgresIntegrationSuite) finish.
By setting a system property `spark.test.docker.keepContainer` to `true`, we can use this option.

### Why are the changes needed?

If some error occur during the tests, it would be useful to keep the container for debug.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

I confirmed that the container is kept after the test by the following commands.
```
# With sbt
$ build/sbt -Dspark.test.docker.keepContainer=true -Pdocker-integration-tests -Phive -Phive-thriftserver package "testOnly org.apache.spark.sql.jdbc.MariaDBKrbIntegrationSuite"

# With Maven
$ build/mvn -Dspark.test.docker.keepContainer=true -Pdocker-integration-tests -Phive -Phive-thriftserver -Dtest=none -DwildcardSuites=org.apache.spark.sql.jdbc.MariaDBKrbIntegrationSuite test

$ docker container ls
```

I also confirmed that there are no regression for all the subclasses of `DockerJDBCIntegrationSuite` with sbt/Maven.
* MariaDBKrbIntegrationSuite
* DB2KrbIntegrationSuite
* PostgresKrbIntegrationSuite
* MySQLIntegrationSuite
* PostgresIntegrationSuite
* DB2IntegrationSuite
* MsSqlServerintegrationsuite
* OracleIntegrationSuite
* v2.MySQLIntegrationSuite
* v2.PostgresIntegrationSuite
* v2.DB2IntegrationSuite
* v2.MsSqlServerIntegrationSuite
* v2.OracleIntegrationSuite

NOTE: `DB2IntegrationSuite`, `v2.DB2IntegrationSuite` and `DB2KrbIntegrationSuite` can fail due to the too much short connection timeout. It's a separate issue and I'll fix it in #30583

Closes #30601 from sarutak/keepContainer.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../sql/jdbc/DockerJDBCIntegrationSuite.scala | 39 ++++++++++++-------
 pom.xml                                       |  2 +
 2 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
index 00b7b413a964d..d6270313cabea 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
@@ -25,6 +25,7 @@ import scala.collection.JavaConverters._
 import scala.util.control.NonFatal
 
 import com.spotify.docker.client._
+import com.spotify.docker.client.DockerClient.ListContainersParam
 import com.spotify.docker.client.exceptions.ImageNotFoundException
 import com.spotify.docker.client.messages.{ContainerConfig, HostConfig, PortBinding}
 import org.scalatest.concurrent.Eventually
@@ -95,7 +96,9 @@ abstract class DockerJDBCIntegrationSuite extends SharedSparkSession with Eventu
 
   protected val dockerIp = DockerUtils.getDockerIp()
   val db: DatabaseOnDocker
-  val connectionTimeout = timeout(2.minutes)
+  val connectionTimeout = timeout(5.minutes)
+  val keepContainer =
+    sys.props.getOrElse("spark.test.docker.keepContainer", "false").toBoolean
 
   private var docker: DockerClient = _
   // Configure networking (necessary for boot2docker / Docker Machine)
@@ -176,20 +179,11 @@ abstract class DockerJDBCIntegrationSuite extends SharedSparkSession with Eventu
 
   override def afterAll(): Unit = {
     try {
+      cleanupContainer()
+    } finally {
       if (docker != null) {
-        try {
-          if (containerId != null) {
-            docker.killContainer(containerId)
-            docker.removeContainer(containerId)
-          }
-        } catch {
-          case NonFatal(e) =>
-            logWarning(s"Could not stop container $containerId", e)
-        } finally {
-          docker.close()
-        }
+        docker.close()
       }
-    } finally {
       super.afterAll()
     }
   }
@@ -205,4 +199,23 @@ abstract class DockerJDBCIntegrationSuite extends SharedSparkSession with Eventu
    * Prepare databases and tables for testing.
    */
   def dataPreparation(connection: Connection): Unit
+
+  private def cleanupContainer(): Unit = {
+    if (docker != null && containerId != null && !keepContainer) {
+      try {
+        docker.killContainer(containerId)
+      } catch {
+        case NonFatal(e) =>
+          val exitContainerIds =
+            docker.listContainers(ListContainersParam.withStatusExited()).asScala.map(_.id())
+          if (exitContainerIds.contains(containerId)) {
+            logWarning(s"Container $containerId already stopped")
+          } else {
+            logWarning(s"Could not stop container $containerId", e)
+          }
+      } finally {
+        docker.removeContainer(containerId)
+      }
+    }
+  }
 }
diff --git a/pom.xml b/pom.xml
index 4d6e3bbc95378..80097aec0f429 100644
--- a/pom.xml
+++ b/pom.xml
@@ -250,6 +250,7 @@
     -->
     <spark.test.home>${session.executionRootDirectory}</spark.test.home>
     <spark.test.webdriver.chrome.driver></spark.test.webdriver.chrome.driver>
+    <spark.test.docker.keepContainer>false</spark.test.docker.keepContainer>
 
     <CodeCacheSize>1g</CodeCacheSize>
     <!-- Needed for consistent times -->
@@ -2626,6 +2627,7 @@
               <spark.ui.showConsoleProgress>false</spark.ui.showConsoleProgress>
               <spark.unsafe.exceptionOnMemoryLeak>true</spark.unsafe.exceptionOnMemoryLeak>
               <spark.test.webdriver.chrome.driver>${spark.test.webdriver.chrome.driver}</spark.test.webdriver.chrome.driver>
+              <spark.test.docker.keepContainer>${spark.test.docker.keepContainer}</spark.test.docker.keepContainer>
               <!-- Needed by sql/hive tests. -->
               <test.src.tables>__not_used__</test.src.tables>
             </systemProperties>

From 976e8970399a1a0fef4c826d4fdd1a138ca52c77 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Fri, 4 Dec 2020 00:12:04 -0800
Subject: [PATCH 0665/1009] [SPARK-33640][TESTS] Extend connection timeout to
 DB server for DB2IntegrationSuite and its variants

### What changes were proposed in this pull request?

This PR extends the connection timeout to the DB server for DB2IntegrationSuite and its variants.

The container image ibmcom/db2 creates a database when it starts up.
The database creation can take over 2 minutes.

DB2IntegrationSuite and its variants use the container image but the connection timeout is set to 2 minutes so these suites almost always fail.
### Why are the changes needed?

To pass those suites.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

I confirmed the suites pass with the following commands.
```
$ build/sbt -Pdocker-integration-tests -Phive -Phive-thriftserver package "testOnly org.apache.spark.sql.jdbc.DB2IntegrationSuite"
$ build/sbt -Pdocker-integration-tests -Phive -Phive-thriftserver package "testOnly org.apache.spark.sql.jdbc.v2.DB2IntegrationSuite"
$ build/sbt -Pdocker-integration-tests -Phive -Phive-thriftserver package "testOnly org.apache.spark.sql.jdbc.DB2KrbIntegrationSuite"

Closes #30583 from sarutak/extend-timeout-for-db2.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala | 4 ++++
 .../org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala    | 3 +++
 .../org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala    | 4 ++++
 3 files changed, 11 insertions(+)

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
index d086c8cdcc589..49ca91c50d25e 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
@@ -21,6 +21,8 @@ import java.math.BigDecimal
 import java.sql.{Connection, Date, Timestamp}
 import java.util.Properties
 
+import org.scalatest.time.SpanSugar._
+
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.types.{BooleanType, ByteType, ShortType, StructType}
 import org.apache.spark.tags.DockerTest
@@ -51,6 +53,8 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite {
       s"jdbc:db2://$ip:$port/foo:user=db2inst1;password=rootpass;retrieveMessagesFromServerOnGetMessage=true;" //scalastyle:ignore
   }
 
+  override val connectionTimeout = timeout(3.minutes)
+
   override def dataPreparation(conn: Connection): Unit = {
     conn.prepareStatement("CREATE TABLE tbl (x INTEGER, y VARCHAR(8))").executeUpdate()
     conn.prepareStatement("INSERT INTO tbl VALUES (42,'fred')").executeUpdate()
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala
index 9c3a609b98bbe..5cbe6fab186a5 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala
@@ -24,6 +24,7 @@ import javax.security.auth.login.Configuration
 import com.spotify.docker.client.messages.{ContainerConfig, HostConfig}
 import org.apache.hadoop.security.{SecurityUtil, UserGroupInformation}
 import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod.KERBEROS
+import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
 import org.apache.spark.sql.execution.datasources.jdbc.connection.{DB2ConnectionProvider, SecureConnectionProvider}
@@ -76,6 +77,8 @@ class DB2KrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite {
     }
   }
 
+  override val connectionTimeout = timeout(3.minutes)
+
   override protected def setAuthentication(keytabFile: String, principal: String): Unit = {
     val config = new SecureConnectionProvider.JDBCConfiguration(
       Configuration.getConfiguration, "JaasClient", keytabFile, principal)
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
index 6f803b8f61dd4..8cabf353c6fef 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.jdbc.v2
 
 import java.sql.Connection
 
+import org.scalatest.time.SpanSugar._
+
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
@@ -52,6 +54,8 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite with V2JDBCTest {
       s"jdbc:db2://$ip:$port/foo:user=db2inst1;password=rootpass;retrieveMessagesFromServerOnGetMessage=true;" //scalastyle:ignore
   }
 
+  override val connectionTimeout = timeout(3.minutes)
+
   override def sparkConf: SparkConf = super.sparkConf
     .set("spark.sql.catalog.db2", classOf[JDBCTableCatalog].getName)
     .set("spark.sql.catalog.db2.url", db.getJdbcUrl(dockerIp, externalPort))

From 233a8494c8cc7bc8a4a9393ec512943749f11bef Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan.opensource@gmail.com>
Date: Fri, 4 Dec 2020 19:33:11 +0900
Subject: [PATCH 0666/1009] [SPARK-27237][SS] Introduce State schema validation
 among query restart

## What changes were proposed in this pull request?

Please refer the description of [SPARK-27237](https://issues.apache.org/jira/browse/SPARK-27237) to see rationalization of this patch.

This patch proposes to introduce state schema validation, via storing key schema and value schema to `schema` file (for the first time) and verify new key schema and value schema for state are compatible with existing one. To be clear for definition of "compatible", state schema is "compatible" when number of fields are same and data type for each field is same - Spark has been allowing rename of field.

This patch will prevent query run which has incompatible state schema, which would reduce the chance to get indeterministic behavior (actually renaming of field is also the smell of semantically incompatible, but end users could just modify its name so we can't say) as well as providing more informative error message.

## How was this patch tested?

Added UTs.

Closes #24173 from HeartSaVioR/SPARK-27237.

Lead-authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
Co-authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../apache/spark/sql/internal/SQLConf.scala   |  10 +
 .../org/apache/spark/sql/types/DataType.scala |  38 ++-
 .../execution/streaming/HDFSMetadataLog.scala |  32 +--
 .../streaming/MetadataVersionUtil.scala       |  51 ++++
 .../StateSchemaCompatibilityChecker.scala     | 118 +++++++++
 .../streaming/state/StateStore.scala          |  36 ++-
 .../streaming/state/StateStoreConf.scala      |   3 +
 ...StateSchemaCompatibilityCheckerSuite.scala | 230 ++++++++++++++++++
 .../streaming/StreamingAggregationSuite.scala |  87 ++++++-
 ...ngStateStoreFormatCompatibilitySuite.scala |  21 +-
 10 files changed, 582 insertions(+), 44 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataVersionUtil.scala
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityCheckerSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 4442581b77811..025478214e492 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1294,6 +1294,14 @@ object SQLConf {
       .createWithDefault(
         "org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider")
 
+  val STATE_SCHEMA_CHECK_ENABLED =
+    buildConf("spark.sql.streaming.stateStore.stateSchemaCheck")
+      .doc("When true, Spark will validate the state schema against schema on existing state and " +
+        "fail query if it's incompatible.")
+      .version("3.1.0")
+      .booleanConf
+      .createWithDefault(true)
+
   val STATE_STORE_MIN_DELTAS_FOR_SNAPSHOT =
     buildConf("spark.sql.streaming.stateStore.minDeltasForSnapshot")
       .internal()
@@ -3079,6 +3087,8 @@ class SQLConf extends Serializable with Logging {
 
   def stateStoreProviderClass: String = getConf(STATE_STORE_PROVIDER_CLASS)
 
+  def isStateSchemaCheckEnabled: Boolean = getConf(STATE_SCHEMA_CHECK_ENABLED)
+
   def stateStoreMinDeltasForSnapshot: Int = getConf(STATE_STORE_MIN_DELTAS_FOR_SNAPSHOT)
 
   def stateStoreFormatValidationEnabled: Boolean = getConf(STATE_STORE_FORMAT_VALIDATION_ENABLED)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
index e4ee6eb377a4d..9e820f0796a96 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
@@ -307,21 +307,49 @@ object DataType {
    *   of `fromField.nullable` and `toField.nullable` are false.
    */
   private[sql] def equalsIgnoreCompatibleNullability(from: DataType, to: DataType): Boolean = {
+    equalsIgnoreCompatibleNullability(from, to, ignoreName = false)
+  }
+
+  /**
+   * Compares two types, ignoring compatible nullability of ArrayType, MapType, StructType, and
+   * also the field name. It compares based on the position.
+   *
+   * Compatible nullability is defined as follows:
+   *   - If `from` and `to` are ArrayTypes, `from` has a compatible nullability with `to`
+   *   if and only if `to.containsNull` is true, or both of `from.containsNull` and
+   *   `to.containsNull` are false.
+   *   - If `from` and `to` are MapTypes, `from` has a compatible nullability with `to`
+   *   if and only if `to.valueContainsNull` is true, or both of `from.valueContainsNull` and
+   *   `to.valueContainsNull` are false.
+   *   - If `from` and `to` are StructTypes, `from` has a compatible nullability with `to`
+   *   if and only if for all every pair of fields, `to.nullable` is true, or both
+   *   of `fromField.nullable` and `toField.nullable` are false.
+   */
+  private[sql] def equalsIgnoreNameAndCompatibleNullability(
+      from: DataType,
+      to: DataType): Boolean = {
+    equalsIgnoreCompatibleNullability(from, to, ignoreName = true)
+  }
+
+  private def equalsIgnoreCompatibleNullability(
+      from: DataType,
+      to: DataType,
+      ignoreName: Boolean = false): Boolean = {
     (from, to) match {
       case (ArrayType(fromElement, fn), ArrayType(toElement, tn)) =>
-        (tn || !fn) && equalsIgnoreCompatibleNullability(fromElement, toElement)
+        (tn || !fn) && equalsIgnoreCompatibleNullability(fromElement, toElement, ignoreName)
 
       case (MapType(fromKey, fromValue, fn), MapType(toKey, toValue, tn)) =>
         (tn || !fn) &&
-          equalsIgnoreCompatibleNullability(fromKey, toKey) &&
-          equalsIgnoreCompatibleNullability(fromValue, toValue)
+          equalsIgnoreCompatibleNullability(fromKey, toKey, ignoreName) &&
+          equalsIgnoreCompatibleNullability(fromValue, toValue, ignoreName)
 
       case (StructType(fromFields), StructType(toFields)) =>
         fromFields.length == toFields.length &&
           fromFields.zip(toFields).forall { case (fromField, toField) =>
-            fromField.name == toField.name &&
+            (ignoreName || fromField.name == toField.name) &&
               (toField.nullable || !fromField.nullable) &&
-              equalsIgnoreCompatibleNullability(fromField.dataType, toField.dataType)
+              equalsIgnoreCompatibleNullability(fromField.dataType, toField.dataType, ignoreName)
           }
 
       case (fromDataType, toDataType) => fromDataType == toDataType
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
index 893639a86c88c..b87a5b49eb6ea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
@@ -267,36 +267,8 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
     }
   }
 
-  /**
-   * Parse the log version from the given `text` -- will throw exception when the parsed version
-   * exceeds `maxSupportedVersion`, or when `text` is malformed (such as "xyz", "v", "v-1",
-   * "v123xyz" etc.)
-   */
-  private[sql] def validateVersion(text: String, maxSupportedVersion: Int): Int = {
-    if (text.length > 0 && text(0) == 'v') {
-      val version =
-        try {
-          text.substring(1, text.length).toInt
-        } catch {
-          case _: NumberFormatException =>
-            throw new IllegalStateException(s"Log file was malformed: failed to read correct log " +
-              s"version from $text.")
-        }
-      if (version > 0) {
-        if (version > maxSupportedVersion) {
-          throw new IllegalStateException(s"UnsupportedLogVersion: maximum supported log version " +
-            s"is v${maxSupportedVersion}, but encountered v$version. The log file was produced " +
-            s"by a newer version of Spark and cannot be read by this version. Please upgrade.")
-        } else {
-          return version
-        }
-      }
-    }
-
-    // reaching here means we failed to read the correct log version
-    throw new IllegalStateException(s"Log file was malformed: failed to read correct log " +
-      s"version from $text.")
-  }
+  private[sql] def validateVersion(text: String, maxSupportedVersion: Int): Int =
+    MetadataVersionUtil.validateVersion(text, maxSupportedVersion)
 }
 
 object HDFSMetadataLog {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataVersionUtil.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataVersionUtil.scala
new file mode 100644
index 0000000000000..548f2aa5d5c5b
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataVersionUtil.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+object MetadataVersionUtil {
+  /**
+   * Parse the log version from the given `text` -- will throw exception when the parsed version
+   * exceeds `maxSupportedVersion`, or when `text` is malformed (such as "xyz", "v", "v-1",
+   * "v123xyz" etc.)
+   */
+  def validateVersion(text: String, maxSupportedVersion: Int): Int = {
+    if (text.length > 0 && text(0) == 'v') {
+      val version =
+        try {
+          text.substring(1, text.length).toInt
+        } catch {
+          case _: NumberFormatException =>
+            throw new IllegalStateException(s"Log file was malformed: failed to read correct log " +
+              s"version from $text.")
+        }
+      if (version > 0) {
+        if (version > maxSupportedVersion) {
+          throw new IllegalStateException(s"UnsupportedLogVersion: maximum supported log version " +
+            s"is v${maxSupportedVersion}, but encountered v$version. The log file was produced " +
+            s"by a newer version of Spark and cannot be read by this version. Please upgrade.")
+        } else {
+          return version
+        }
+      }
+    }
+
+    // reaching here means we failed to read the correct log version
+    throw new IllegalStateException(s"Log file was malformed: failed to read correct log " +
+      s"version from $text.")
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala
new file mode 100644
index 0000000000000..4ac12c089c0d3
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming.state
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.execution.streaming.{CheckpointFileManager, MetadataVersionUtil}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{DataType, StructType}
+
+case class StateSchemaNotCompatible(message: String) extends Exception(message)
+
+class StateSchemaCompatibilityChecker(
+    providerId: StateStoreProviderId,
+    hadoopConf: Configuration) extends Logging {
+
+  private val storeCpLocation = providerId.storeId.storeCheckpointLocation()
+  private val fm = CheckpointFileManager.create(storeCpLocation, hadoopConf)
+  private val schemaFileLocation = schemaFile(storeCpLocation)
+
+  fm.mkdirs(schemaFileLocation.getParent)
+
+  def check(keySchema: StructType, valueSchema: StructType): Unit = {
+    if (fm.exists(schemaFileLocation)) {
+      logDebug(s"Schema file for provider $providerId exists. Comparing with provided schema.")
+      val (storedKeySchema, storedValueSchema) = readSchemaFile()
+      if (storedKeySchema.equals(keySchema) && storedValueSchema.equals(valueSchema)) {
+        // schema is exactly same
+      } else if (!schemasCompatible(storedKeySchema, keySchema) ||
+        !schemasCompatible(storedValueSchema, valueSchema)) {
+        val errorMsg = "Provided schema doesn't match to the schema for existing state! " +
+          "Please note that Spark allow difference of field name: check count of fields " +
+          "and data type of each field.\n" +
+          s"- Provided key schema: $keySchema\n" +
+          s"- Provided value schema: $valueSchema\n" +
+          s"- Existing key schema: $storedKeySchema\n" +
+          s"- Existing value schema: $storedValueSchema\n" +
+          s"If you want to force running query without schema validation, please set " +
+          s"${SQLConf.STATE_SCHEMA_CHECK_ENABLED.key} to false.\n" +
+          "Please note running query with incompatible schema could cause indeterministic" +
+          " behavior."
+        logError(errorMsg)
+        throw StateSchemaNotCompatible(errorMsg)
+      } else {
+        logInfo("Detected schema change which is compatible. Allowing to put rows.")
+      }
+    } else {
+      // schema doesn't exist, create one now
+      logDebug(s"Schema file for provider $providerId doesn't exist. Creating one.")
+      createSchemaFile(keySchema, valueSchema)
+    }
+  }
+
+  private def schemasCompatible(storedSchema: StructType, schema: StructType): Boolean =
+    DataType.equalsIgnoreNameAndCompatibleNullability(storedSchema, schema)
+
+  private def readSchemaFile(): (StructType, StructType) = {
+    val inStream = fm.open(schemaFileLocation)
+    try {
+      val versionStr = inStream.readUTF()
+      // Currently we only support version 1, which we can simplify the version validation and
+      // the parse logic.
+      val version = MetadataVersionUtil.validateVersion(versionStr,
+        StateSchemaCompatibilityChecker.VERSION)
+      require(version == 1)
+
+      val keySchemaStr = inStream.readUTF()
+      val valueSchemaStr = inStream.readUTF()
+
+      (StructType.fromString(keySchemaStr), StructType.fromString(valueSchemaStr))
+    } catch {
+      case e: Throwable =>
+        logError(s"Fail to read schema file from $schemaFileLocation", e)
+        throw e
+    } finally {
+      inStream.close()
+    }
+  }
+
+  private def createSchemaFile(keySchema: StructType, valueSchema: StructType): Unit = {
+    val outStream = fm.createAtomic(schemaFileLocation, overwriteIfPossible = false)
+    try {
+      outStream.writeUTF(s"v${StateSchemaCompatibilityChecker.VERSION}")
+      outStream.writeUTF(keySchema.json)
+      outStream.writeUTF(valueSchema.json)
+      outStream.close()
+    } catch {
+      case e: Throwable =>
+        logError(s"Fail to write schema file to $schemaFileLocation", e)
+        outStream.cancel()
+        throw e
+    }
+  }
+
+  private def schemaFile(storeCpLocation: Path): Path =
+    new Path(new Path(storeCpLocation, "_metadata"), "schema")
+}
+
+object StateSchemaCompatibilityChecker {
+  val VERSION = 1
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
index 05bcee7b05c6f..ab67c19783ff7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
@@ -22,6 +22,7 @@ import java.util.concurrent.{ScheduledFuture, TimeUnit}
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable
+import scala.util.Try
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.conf.Configuration
@@ -280,14 +281,14 @@ object StateStoreProvider {
    * Return a instance of the required provider, initialized with the given configurations.
    */
   def createAndInit(
-      stateStoreId: StateStoreId,
+      providerId: StateStoreProviderId,
       keySchema: StructType,
       valueSchema: StructType,
       indexOrdinal: Option[Int], // for sorting the data
       storeConf: StateStoreConf,
       hadoopConf: Configuration): StateStoreProvider = {
     val provider = create(storeConf.providerClass)
-    provider.init(stateStoreId, keySchema, valueSchema, indexOrdinal, storeConf, hadoopConf)
+    provider.init(providerId.storeId, keySchema, valueSchema, indexOrdinal, storeConf, hadoopConf)
     provider
   }
 
@@ -386,10 +387,14 @@ object StateStore extends Logging {
 
   val MAINTENANCE_INTERVAL_CONFIG = "spark.sql.streaming.stateStore.maintenanceInterval"
   val MAINTENANCE_INTERVAL_DEFAULT_SECS = 60
+  val PARTITION_ID_TO_CHECK_SCHEMA = 0
 
   @GuardedBy("loadedProviders")
   private val loadedProviders = new mutable.HashMap[StateStoreProviderId, StateStoreProvider]()
 
+  @GuardedBy("loadedProviders")
+  private val schemaValidated = new mutable.HashMap[StateStoreProviderId, Option[Throwable]]()
+
   /**
    * Runs the `task` periodically and automatically cancels it if there is an exception. `onError`
    * will be called when an exception happens.
@@ -467,10 +472,29 @@ object StateStore extends Logging {
       hadoopConf: Configuration): StateStoreProvider = {
     loadedProviders.synchronized {
       startMaintenanceIfNeeded()
+
+      if (storeProviderId.storeId.partitionId == PARTITION_ID_TO_CHECK_SCHEMA) {
+        val result = schemaValidated.getOrElseUpdate(storeProviderId, {
+          val checker = new StateSchemaCompatibilityChecker(storeProviderId, hadoopConf)
+          // regardless of configuration, we check compatibility to at least write schema file
+          // if necessary
+          val ret = Try(checker.check(keySchema, valueSchema)).toEither.fold(Some(_), _ => None)
+          if (storeConf.stateSchemaCheckEnabled) {
+            ret
+          } else {
+            None
+          }
+        })
+
+        if (result.isDefined) {
+          throw result.get
+        }
+      }
+
       val provider = loadedProviders.getOrElseUpdate(
         storeProviderId,
         StateStoreProvider.createAndInit(
-          storeProviderId.storeId, keySchema, valueSchema, indexOrdinal, storeConf, hadoopConf)
+          storeProviderId, keySchema, valueSchema, indexOrdinal, storeConf, hadoopConf)
       )
       reportActiveStoreInstance(storeProviderId)
       provider
@@ -482,6 +506,12 @@ object StateStore extends Logging {
     loadedProviders.remove(storeProviderId).foreach(_.close())
   }
 
+  /** Unload all state store providers: unit test purpose */
+  private[sql] def unloadAll(): Unit = loadedProviders.synchronized {
+    loadedProviders.keySet.foreach { key => unload(key) }
+    loadedProviders.clear()
+  }
+
   /** Whether a state store provider is loaded or not */
   def isLoaded(storeProviderId: StateStoreProviderId): Boolean = loadedProviders.synchronized {
     loadedProviders.contains(storeProviderId)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala
index 11043bc81ae3f..23cb3be32c85a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala
@@ -55,6 +55,9 @@ class StateStoreConf(
   /** The compression codec used to compress delta and snapshot files. */
   val compressionCodec: String = sqlConf.stateStoreCompressionCodec
 
+  /** whether to validate state schema during query run. */
+  val stateSchemaCheckEnabled = sqlConf.isStateSchemaCheckEnabled
+
   /**
    * Additional configurations related to state store. This will capture all configs in
    * SQLConf that start with `spark.sql.streaming.stateStore.` and extraOptions for a specific
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityCheckerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityCheckerSuite.scala
new file mode 100644
index 0000000000000..4eb7603b316aa
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityCheckerSuite.scala
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming.state
+
+import java.util.UUID
+
+import scala.util.Random
+
+import org.apache.hadoop.conf.Configuration
+
+import org.apache.spark.sql.execution.streaming.state.StateStoreTestsHelper.newDir
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types._
+
+class StateSchemaCompatibilityCheckerSuite extends SharedSparkSession {
+
+  private val hadoopConf: Configuration = new Configuration()
+  private val opId = Random.nextInt(100000)
+  private val partitionId = StateStore.PARTITION_ID_TO_CHECK_SCHEMA
+
+  private val structSchema = new StructType()
+    .add(StructField("nested1", IntegerType, nullable = true))
+    .add(StructField("nested2", StringType, nullable = true))
+
+  private val keySchema = new StructType()
+    .add(StructField("key1", IntegerType, nullable = true))
+    .add(StructField("key2", StringType, nullable = true))
+    .add(StructField("key3", structSchema, nullable = true))
+
+  private val valueSchema = new StructType()
+    .add(StructField("value1", IntegerType, nullable = true))
+    .add(StructField("value2", StringType, nullable = true))
+    .add(StructField("value3", structSchema, nullable = true))
+
+  test("adding field to key should fail") {
+    val fieldAddedKeySchema = keySchema.add(StructField("newKey", IntegerType))
+    verifyException(keySchema, valueSchema, fieldAddedKeySchema, valueSchema)
+  }
+
+  test("adding field to value should fail") {
+    val fieldAddedValueSchema = valueSchema.add(StructField("newValue", IntegerType))
+    verifyException(keySchema, valueSchema, keySchema, fieldAddedValueSchema)
+  }
+
+  test("adding nested field in key should fail") {
+    val fieldAddedNestedSchema = structSchema.add(StructField("newNested", IntegerType))
+    val newKeySchema = applyNewSchemaToNestedFieldInKey(fieldAddedNestedSchema)
+    verifyException(keySchema, valueSchema, newKeySchema, valueSchema)
+  }
+
+  test("adding nested field in value should fail") {
+    val fieldAddedNestedSchema = structSchema.add(StructField("newNested", IntegerType))
+    val newValueSchema = applyNewSchemaToNestedFieldInValue(fieldAddedNestedSchema)
+    verifyException(keySchema, valueSchema, keySchema, newValueSchema)
+  }
+
+  test("removing field from key should fail") {
+    val fieldRemovedKeySchema = StructType(keySchema.dropRight(1))
+    verifyException(keySchema, valueSchema, fieldRemovedKeySchema, valueSchema)
+  }
+
+  test("removing field from value should fail") {
+    val fieldRemovedValueSchema = StructType(valueSchema.drop(1))
+    verifyException(keySchema, valueSchema, keySchema, fieldRemovedValueSchema)
+  }
+
+  test("removing nested field from key should fail") {
+    val fieldRemovedNestedSchema = StructType(structSchema.dropRight(1))
+    val newKeySchema = applyNewSchemaToNestedFieldInKey(fieldRemovedNestedSchema)
+    verifyException(keySchema, valueSchema, newKeySchema, valueSchema)
+  }
+
+  test("removing nested field from value should fail") {
+    val fieldRemovedNestedSchema = StructType(structSchema.drop(1))
+    val newValueSchema = applyNewSchemaToNestedFieldInValue(fieldRemovedNestedSchema)
+    verifyException(keySchema, valueSchema, keySchema, newValueSchema)
+  }
+
+  test("changing the type of field in key should fail") {
+    val typeChangedKeySchema = StructType(keySchema.map(_.copy(dataType = TimestampType)))
+    verifyException(keySchema, valueSchema, typeChangedKeySchema, valueSchema)
+  }
+
+  test("changing the type of field in value should fail") {
+    val typeChangedValueSchema = StructType(valueSchema.map(_.copy(dataType = TimestampType)))
+    verifyException(keySchema, valueSchema, keySchema, typeChangedValueSchema)
+  }
+
+  test("changing the type of nested field in key should fail") {
+    val typeChangedNestedSchema = StructType(structSchema.map(_.copy(dataType = TimestampType)))
+    val newKeySchema = applyNewSchemaToNestedFieldInKey(typeChangedNestedSchema)
+    verifyException(keySchema, valueSchema, newKeySchema, valueSchema)
+  }
+
+  test("changing the type of nested field in value should fail") {
+    val typeChangedNestedSchema = StructType(structSchema.map(_.copy(dataType = TimestampType)))
+    val newValueSchema = applyNewSchemaToNestedFieldInValue(typeChangedNestedSchema)
+    verifyException(keySchema, valueSchema, keySchema, newValueSchema)
+  }
+
+  test("changing the nullability of nullable to non-nullable in key should fail") {
+    val nonNullChangedKeySchema = StructType(keySchema.map(_.copy(nullable = false)))
+    verifyException(keySchema, valueSchema, nonNullChangedKeySchema, valueSchema)
+  }
+
+  test("changing the nullability of nullable to non-nullable in value should fail") {
+    val nonNullChangedValueSchema = StructType(valueSchema.map(_.copy(nullable = false)))
+    verifyException(keySchema, valueSchema, keySchema, nonNullChangedValueSchema)
+  }
+
+  test("changing the nullability of nullable to nonnullable in nested field in key should fail") {
+    val typeChangedNestedSchema = StructType(structSchema.map(_.copy(nullable = false)))
+    val newKeySchema = applyNewSchemaToNestedFieldInKey(typeChangedNestedSchema)
+    verifyException(keySchema, valueSchema, newKeySchema, valueSchema)
+  }
+
+  test("changing the nullability of nullable to nonnullable in nested field in value should fail") {
+    val typeChangedNestedSchema = StructType(structSchema.map(_.copy(nullable = false)))
+    val newValueSchema = applyNewSchemaToNestedFieldInValue(typeChangedNestedSchema)
+    verifyException(keySchema, valueSchema, keySchema, newValueSchema)
+  }
+
+  test("changing the name of field in key should be allowed") {
+    val newName: StructField => StructField = f => f.copy(name = f.name + "_new")
+    val fieldNameChangedKeySchema = StructType(keySchema.map(newName))
+    verifySuccess(keySchema, valueSchema, fieldNameChangedKeySchema, valueSchema)
+  }
+
+  test("changing the name of field in value should be allowed") {
+    val newName: StructField => StructField = f => f.copy(name = f.name + "_new")
+    val fieldNameChangedValueSchema = StructType(valueSchema.map(newName))
+    verifySuccess(keySchema, valueSchema, keySchema, fieldNameChangedValueSchema)
+  }
+
+  test("changing the name of nested field in key should be allowed") {
+    val newName: StructField => StructField = f => f.copy(name = f.name + "_new")
+    val newNestedFieldsSchema = StructType(structSchema.map(newName))
+    val fieldNameChangedKeySchema = applyNewSchemaToNestedFieldInKey(newNestedFieldsSchema)
+    verifySuccess(keySchema, valueSchema, fieldNameChangedKeySchema, valueSchema)
+  }
+
+  test("changing the name of nested field in value should be allowed") {
+    val newName: StructField => StructField = f => f.copy(name = f.name + "_new")
+    val newNestedFieldsSchema = StructType(structSchema.map(newName))
+    val fieldNameChangedValueSchema = applyNewSchemaToNestedFieldInValue(newNestedFieldsSchema)
+    verifySuccess(keySchema, valueSchema, keySchema, fieldNameChangedValueSchema)
+  }
+
+  private def applyNewSchemaToNestedFieldInKey(newNestedSchema: StructType): StructType = {
+    applyNewSchemaToNestedField(keySchema, newNestedSchema, "key3")
+  }
+
+  private def applyNewSchemaToNestedFieldInValue(newNestedSchema: StructType): StructType = {
+    applyNewSchemaToNestedField(valueSchema, newNestedSchema, "value3")
+  }
+
+  private def applyNewSchemaToNestedField(
+      originSchema: StructType,
+      newNestedSchema: StructType,
+      fieldName: String): StructType = {
+    val newFields = originSchema.map { field =>
+      if (field.name == fieldName) {
+        field.copy(dataType = newNestedSchema)
+      } else {
+        field
+      }
+    }
+    StructType(newFields)
+  }
+
+  private def runSchemaChecker(
+      dir: String,
+      queryId: UUID,
+      newKeySchema: StructType,
+      newValueSchema: StructType): Unit = {
+    // in fact, Spark doesn't support online state schema change, so need to check
+    // schema only once for each running of JVM
+    val providerId = StateStoreProviderId(
+      StateStoreId(dir, opId, partitionId), queryId)
+
+    new StateSchemaCompatibilityChecker(providerId, hadoopConf)
+      .check(newKeySchema, newValueSchema)
+  }
+
+  private def verifyException(
+      oldKeySchema: StructType,
+      oldValueSchema: StructType,
+      newKeySchema: StructType,
+      newValueSchema: StructType): Unit = {
+    val dir = newDir()
+    val queryId = UUID.randomUUID()
+    runSchemaChecker(dir, queryId, oldKeySchema, oldValueSchema)
+
+    val e = intercept[StateSchemaNotCompatible] {
+      runSchemaChecker(dir, queryId, newKeySchema, newValueSchema)
+    }
+
+    e.getMessage.contains("Provided schema doesn't match to the schema for existing state!")
+    e.getMessage.contains(newKeySchema.json)
+    e.getMessage.contains(newValueSchema.json)
+    e.getMessage.contains(oldKeySchema.json)
+    e.getMessage.contains(oldValueSchema.json)
+  }
+
+  private def verifySuccess(
+      oldKeySchema: StructType,
+      oldValueSchema: StructType,
+      newKeySchema: StructType,
+      newValueSchema: StructType): Unit = {
+    val dir = newDir()
+    val queryId = UUID.randomUUID()
+    runSchemaChecker(dir, queryId, oldKeySchema, oldValueSchema)
+    runSchemaChecker(dir, queryId, newKeySchema, newValueSchema)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
index 0524e29662014..491b0d8b2c26c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
@@ -20,6 +20,8 @@ package org.apache.spark.sql.streaming
 import java.io.File
 import java.util.{Locale, TimeZone}
 
+import scala.annotation.tailrec
+
 import org.apache.commons.io.FileUtils
 import org.scalatest.Assertions
 
@@ -33,7 +35,7 @@ import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.execution.exchange.Exchange
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.sources.MemorySink
-import org.apache.spark.sql.execution.streaming.state.StreamingAggregationStateManager
+import org.apache.spark.sql.execution.streaming.state.{StateSchemaNotCompatible, StateStore, StreamingAggregationStateManager}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.OutputMode._
@@ -753,6 +755,89 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
     )
   }
 
+  testQuietlyWithAllStateVersions("changing schema of state when restarting query",
+    (SQLConf.STATE_STORE_FORMAT_VALIDATION_ENABLED.key, "false")) {
+    withTempDir { tempDir =>
+      val (inputData, aggregated) = prepareTestForChangingSchemaOfState(tempDir)
+
+      // if we don't have verification phase on state schema, modified query would throw NPE with
+      // stack trace which end users would not easily understand
+
+      testStream(aggregated, Update())(
+        StartStream(checkpointLocation = tempDir.getAbsolutePath),
+        AddData(inputData, 21),
+        ExpectFailure[SparkException] { e =>
+          val stateSchemaExc = findStateSchemaNotCompatible(e)
+          assert(stateSchemaExc.isDefined)
+          val msg = stateSchemaExc.get.getMessage
+          assert(msg.contains("Provided schema doesn't match to the schema for existing state"))
+          // other verifications are presented in StateStoreSuite
+        }
+      )
+    }
+  }
+
+  testQuietlyWithAllStateVersions("changing schema of state when restarting query -" +
+    " schema check off",
+    (SQLConf.STATE_SCHEMA_CHECK_ENABLED.key, "false"),
+    (SQLConf.STATE_STORE_FORMAT_VALIDATION_ENABLED.key, "false")) {
+    withTempDir { tempDir =>
+      val (inputData, aggregated) = prepareTestForChangingSchemaOfState(tempDir)
+
+      testStream(aggregated, Update())(
+        StartStream(checkpointLocation = tempDir.getAbsolutePath),
+        AddData(inputData, 21),
+        ExpectFailure[SparkException] { e =>
+          val stateSchemaExc = findStateSchemaNotCompatible(e)
+          // it would bring other error in runtime, but it shouldn't check schema in any way
+          assert(stateSchemaExc.isEmpty)
+        }
+      )
+    }
+  }
+
+  private def prepareTestForChangingSchemaOfState(
+      tempDir: File): (MemoryStream[Int], DataFrame) = {
+    val inputData = MemoryStream[Int]
+    val aggregated = inputData.toDF()
+      .selectExpr("value % 10 AS id", "value")
+      .groupBy($"id")
+      .agg(
+        sum("value").as("sum_value"),
+        avg("value").as("avg_value"),
+        max("value").as("max_value"))
+
+    testStream(aggregated, Update())(
+      StartStream(checkpointLocation = tempDir.getAbsolutePath),
+      AddData(inputData, 1, 11),
+      CheckLastBatch((1L, 12L, 6.0, 11)),
+      StopStream
+    )
+
+    StateStore.unloadAll()
+
+    val inputData2 = MemoryStream[Int]
+    val aggregated2 = inputData2.toDF()
+      .selectExpr("value % 10 AS id", "value")
+      .groupBy($"id")
+      .agg(
+        sum("value").as("sum_value"),
+        avg("value").as("avg_value"),
+        collect_list("value").as("values"))
+
+    inputData2.addData(1, 11)
+
+    (inputData2, aggregated2)
+  }
+
+  @tailrec
+  private def findStateSchemaNotCompatible(exc: Throwable): Option[StateSchemaNotCompatible] = {
+    exc match {
+      case e1: StateSchemaNotCompatible => Some(e1)
+      case e1 if e1.getCause != null => findStateSchemaNotCompatible(e1.getCause)
+      case _ => None
+    }
+  }
 
   /** Add blocks of data to the `BlockRDDBackedSource`. */
   case class AddBlockData(source: BlockRDDBackedSource, data: Seq[Int]*) extends AddData {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingStateStoreFormatCompatibilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingStateStoreFormatCompatibilitySuite.scala
index 33f6b02acb6dd..1032d6c5b6ff2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingStateStoreFormatCompatibilitySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingStateStoreFormatCompatibilitySuite.scala
@@ -19,12 +19,15 @@ package org.apache.spark.sql.streaming
 
 import java.io.File
 
+import scala.annotation.tailrec
+
 import org.apache.commons.io.FileUtils
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes.Complete
 import org.apache.spark.sql.execution.streaming.MemoryStream
+import org.apache.spark.sql.execution.streaming.state.{InvalidUnsafeRowException, StateSchemaNotCompatible}
 import org.apache.spark.sql.functions._
 import org.apache.spark.util.Utils
 
@@ -239,11 +242,19 @@ class StreamingStateStoreFormatCompatibilitySuite extends StreamTest {
         CheckAnswer(Row(0, 20, Seq(0, 2, 4, 6, 8)), Row(1, 25, Seq(1, 3, 5, 7, 9)))
        */
       AddData(inputData, 10 to 19: _*),
-      ExpectFailure[SparkException](e => {
-        // Check the exception message to make sure the state store format changing.
-        assert(e.getCause.getCause.getMessage.contains(
-          "The streaming query failed by state format invalidation."))
-      })
+      ExpectFailure[SparkException] { e =>
+        assert(findStateSchemaException(e))
+      }
     )
   }
+
+  @tailrec
+  private def findStateSchemaException(exc: Throwable): Boolean = {
+    exc match {
+      case _: StateSchemaNotCompatible => true
+      case _: InvalidUnsafeRowException => true
+      case e1 if e1.getCause != null => findStateSchemaException(e1.getCause)
+      case _ => false
+    }
+  }
 }

From 990bee9c58ea9abd8c4f04f20c78c6d5b720406a Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Fri, 4 Dec 2020 19:37:03 +0900
Subject: [PATCH 0667/1009] [SPARK-33615][K8S] Make 'spark.archives' working in
 Kubernates

### What changes were proposed in this pull request?

This PR proposes to make `spark.archives` configuration working in Kubernates.
It works without a problem in standalone cluster but there seems a bug in Kubernates.
It fails to fetch the file on the driver side as below:

```
20/12/03 13:33:53 INFO SparkContext: Added JAR file:/tmp/spark-75004286-c83a-4369-b624-14c5d2d2a748/spark-examples_2.12-3.1.0-SNAPSHOT.jar at spark://spark-test-app-48ae737628cee6f8-driver-svc.spark-integration-test.svc:7078/jars/spark-examples_2.12-3.1.0-SNAPSHOT.jar with timestamp 1607002432558
20/12/03 13:33:53 INFO SparkContext: Added archive file:///tmp/tmp4542734800151332666.txt.tar.gz#test_tar_gz at spark://spark-test-app-48ae737628cee6f8-driver-svc.spark-integration-test.svc:7078/files/tmp4542734800151332666.txt.tar.gz with timestamp 1607002432558
20/12/03 13:33:53 INFO TransportClientFactory: Successfully created connection to spark-test-app-48ae737628cee6f8-driver-svc.spark-integration-test.svc/172.17.0.4:7078 after 83 ms (47 ms spent in bootstraps)
20/12/03 13:33:53 INFO Utils: Fetching spark://spark-test-app-48ae737628cee6f8-driver-svc.spark-integration-test.svc:7078/files/tmp4542734800151332666.txt.tar.gz to /tmp/spark-66573e24-27a3-427c-99f4-36f06d9e9cd5/fetchFileTemp2665785666227461849.tmp
20/12/03 13:33:53 ERROR SparkContext: Error initializing SparkContext.
java.lang.RuntimeException: Stream '/files/tmp4542734800151332666.txt.tar.gz' was not found.
	at org.apache.spark.network.client.TransportResponseHandler.handle(TransportResponseHandler.java:242)
	at org.apache.spark.network.server.TransportChannelHandler.channelRead0(TransportChannelHandler.java:142)
	at org.apache.spark.network.server.TransportChannelHandler.channelRead0(TransportChannelHandler.java:53)
```

This is because `spark.archives` was not actually added on the driver side correctly. The changes here fix it by adding and resolving URIs correctly.

### Why are the changes needed?

`spark.archives` feature can be leveraged for many things such as Conda support. We should make it working in Kubernates as well.
This is a bug fix too.

### Does this PR introduce _any_ user-facing change?

No, this feature is not out yet.

### How was this patch tested?

I manually tested with Minikube 1.15.1. For an environment issue (?), I had to use a custom namespace, service account and roles. `default` service account does not work for me and complains it doesn't have permissions to get/list pods, etc.

```bash
minikube delete
minikube start --cpus 12 --memory 16384
kubectl create namespace spark-integration-test
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: ServiceAccount
metadata:
  name: spark
  namespace: spark-integration-test
EOF
kubectl create clusterrolebinding spark-role --clusterrole=edit --serviceaccount=spark-integration-test:spark --namespace=spark-integration-test
dev/make-distribution.sh --pip --tgz -Pkubernetes
resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh --spark-tgz `pwd`/spark-3.1.0-SNAPSHOT-bin-3.2.0.tgz  --service-account spark --namespace spark-integration-test
```

Closes #30581 from HyukjinKwon/SPARK-33615.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../scala/org/apache/spark/SparkContext.scala |  4 +++-
 .../org/apache/spark/deploy/SparkSubmit.scala | 13 +++++++++--
 docs/running-on-kubernetes.md                 |  2 +-
 .../k8s/features/BasicDriverFeatureStep.scala | 22 ++++++++++++++++---
 .../k8s/integrationtest/DepsTestsSuite.scala  | 12 ++++++++++
 .../deploy/k8s/integrationtest/Utils.scala    | 22 +++++++++++++++++++
 .../org/apache/spark/deploy/yarn/Client.scala |  1 +
 7 files changed, 69 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 86f1d745d91d4..17ceb5f1887c6 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1639,7 +1639,9 @@ class SparkContext(config: SparkConf) extends Logging {
           UriBuilder.fromUri(new URI(key)).fragment(uri.getFragment).build().toString,
           timestamp).isEmpty) {
       logInfo(s"Added archive $path at $key with timestamp $timestamp")
-      val uriToDownload = UriBuilder.fromUri(new URI(key)).fragment(null).build()
+      // If the scheme is file, use URI to simply copy instead of downloading.
+      val uriToUse = if (!isLocal && scheme == "file") uri else new URI(key)
+      val uriToDownload = UriBuilder.fromUri(uriToUse).fragment(null).build()
       val source = Utils.fetchFile(uriToDownload.toString, Utils.createTempDir(), conf,
         env.securityManager, hadoopConfiguration, timestamp, useCache = false, shouldUntar = false)
       val dest = new File(
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index a344bce7a0f3c..ea293f03a2169 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -24,6 +24,7 @@ import java.security.PrivilegedExceptionAction
 import java.text.ParseException
 import java.util.{ServiceLoader, UUID}
 import java.util.jar.JarInputStream
+import javax.ws.rs.core.UriBuilder
 
 import scala.annotation.tailrec
 import scala.collection.JavaConverters._
@@ -387,10 +388,18 @@ private[spark] class SparkSubmit extends Logging {
         // Executors will get the jars from the Spark file server.
         // Explicitly download the related files here
         args.jars = renameResourcesToLocalFS(args.jars, localJars)
-        val localFiles = Option(args.files).map {
+        val filesLocalFiles = Option(args.files).map {
           downloadFileList(_, targetDir, sparkConf, hadoopConf, secMgr)
         }.orNull
-        args.files = renameResourcesToLocalFS(args.files, localFiles)
+        val archiveLocalFiles = Option(args.archives).map { uri =>
+          val resolvedUri = Utils.resolveURI(uri)
+          val downloadedUri = downloadFileList(
+            UriBuilder.fromUri(resolvedUri).fragment(null).build().toString,
+            targetDir, sparkConf, hadoopConf, secMgr)
+          UriBuilder.fromUri(downloadedUri).fragment(resolvedUri.getFragment).build().toString
+        }.orNull
+        args.files = renameResourcesToLocalFS(args.files, filesLocalFiles)
+        args.archives = renameResourcesToLocalFS(args.archives, archiveLocalFiles)
         args.pyFiles = renameResourcesToLocalFS(args.pyFiles, localPyFiles)
       }
     }
diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index 71b7df8176d1b..e735c7493486e 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -222,7 +222,7 @@ The app jar file will be uploaded to the S3 and then when the driver is launched
 to the driver pod and will be added to its classpath. Spark will generate a subdir under the upload path with a random name
 to avoid conflicts with spark apps running in parallel. User could manage the subdirs created according to his needs.
 
-The client scheme is supported for the application jar, and dependencies specified by properties `spark.jars` and `spark.files`.
+The client scheme is supported for the application jar, and dependencies specified by properties `spark.jars`, `spark.files` and `spark.archives`.
 
 Important: all client-side dependencies will be uploaded to the given path with a flat directory structure so
 file names must be unique otherwise files will be overwritten. Also make sure in the derived k8s image default ivy dir
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
index f5ba261c8f405..cec8272beed57 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
@@ -16,6 +16,8 @@
  */
 package org.apache.spark.deploy.k8s.features
 
+import javax.ws.rs.core.UriBuilder
+
 import scala.collection.JavaConverters._
 import scala.collection.mutable
 
@@ -159,11 +161,25 @@ private[spark] class BasicDriverFeatureStep(conf: KubernetesDriverConf)
       KUBERNETES_DRIVER_SUBMIT_CHECK.key -> "true",
       MEMORY_OVERHEAD_FACTOR.key -> overheadFactor.toString)
     // try upload local, resolvable files to a hadoop compatible file system
-    Seq(JARS, FILES, SUBMIT_PYTHON_FILES).foreach { key =>
-      val value = conf.get(key).filter(uri => KubernetesUtils.isLocalAndResolvable(uri))
+    Seq(JARS, FILES, ARCHIVES, SUBMIT_PYTHON_FILES).foreach { key =>
+      val uris = conf.get(key).filter(uri => KubernetesUtils.isLocalAndResolvable(uri))
+      val value = {
+        if (key == ARCHIVES) {
+          uris.map(UriBuilder.fromUri(_).fragment(null).build()).map(_.toString)
+        } else {
+          uris
+        }
+      }
       val resolved = KubernetesUtils.uploadAndTransformFileUris(value, Some(conf.sparkConf))
       if (resolved.nonEmpty) {
-        additionalProps.put(key.key, resolved.mkString(","))
+        val resolvedValue = if (key == ARCHIVES) {
+          uris.zip(resolved).map { case (uri, r) =>
+            UriBuilder.fromUri(r).fragment(new java.net.URI(uri).getFragment).build().toString
+          }
+        } else {
+          resolved
+        }
+        additionalProps.put(key.key, resolvedValue.mkString(","))
       }
     }
     additionalProps.toMap
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
index 760e9ba55d335..a15f7ffa134b8 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
@@ -163,6 +163,18 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite =>
     })
   }
 
+  test("SPARK-33615: Launcher client archives", k8sTestTag, MinikubeTag) {
+    tryDepsTest {
+      val fileName = Utils.createTempFile(FILE_CONTENTS, HOST_PATH)
+      Utils.createTarGzFile(s"$HOST_PATH/$fileName", s"$HOST_PATH/$fileName.tar.gz")
+      sparkAppConf.set("spark.archives", s"$HOST_PATH/$fileName.tar.gz#test_tar_gz")
+      val examplesJar = Utils.getTestFileAbsolutePath(getExamplesJarName(), sparkHomeDir)
+      runSparkRemoteCheckAndVerifyCompletion(appResource = examplesJar,
+        appArgs = Array(s"test_tar_gz/$fileName"),
+        timeout = Option(DEPS_TIMEOUT))
+    }
+  }
+
   test("Launcher python client dependencies using a zip file", k8sTestTag, MinikubeTag) {
     val inDepsFile = Utils.getTestFileAbsolutePath("py_container_checks.py", sparkHomeDir)
     val outDepsFile = s"${inDepsFile.substring(0, inDepsFile.lastIndexOf("."))}.zip"
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
index ee44cb5f85835..519443130008b 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
@@ -25,6 +25,8 @@ import scala.collection.JavaConverters._
 
 import io.fabric8.kubernetes.client.dsl.ExecListener
 import okhttp3.Response
+import org.apache.commons.compress.archivers.tar.{TarArchiveEntry, TarArchiveOutputStream}
+import org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream
 import org.apache.commons.compress.utils.IOUtils
 import org.apache.commons.io.output.ByteArrayOutputStream
 import org.apache.hadoop.util.VersionInfo
@@ -149,4 +151,24 @@ object Utils extends Logging {
     IOUtils.closeQuietly(fis)
     IOUtils.closeQuietly(zipOut)
   }
+
+  def createTarGzFile(inFile: String, outFile: String): Unit = {
+    val fileToTarGz = new File(inFile)
+    Utils.tryWithResource(
+      new FileInputStream(fileToTarGz)
+    ) { fis =>
+      Utils.tryWithResource(
+        new TarArchiveOutputStream(
+          new GzipCompressorOutputStream(
+            new FileOutputStream(
+              new File(outFile))))
+      ) { tOut =>
+        val tarEntry = new TarArchiveEntry(fileToTarGz, fileToTarGz.getName)
+        tOut.putArchiveEntry(tarEntry)
+        IOUtils.copy(fis, tOut)
+        tOut.closeArchiveEntry()
+        tOut.finish()
+      }
+    }
+  }
 }
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index d252e8368a0c4..7f791e02a392b 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -1629,6 +1629,7 @@ private[spark] class YarnClusterApplication extends SparkApplication {
     // so remove them from sparkConf here for yarn mode.
     conf.remove(JARS)
     conf.remove(FILES)
+    conf.remove(ARCHIVES)
 
     new Client(new ClientArguments(args), conf, null).run()
   }

From acc211d2cf0e6ab94f6578e1eb488766fd20fa4e Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 4 Dec 2020 14:01:15 +0000
Subject: [PATCH 0668/1009] [SPARK-33141][SQL][FOLLOW-UP] Store the max nested
 view depth in AnalysisContext

### What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/30289. It removes the hack in `View.effectiveSQLConf`, by putting the max nested view depth in `AnalysisContext`. Then we don't get the max nested view depth from the active SQLConf, which keeps changing during nested view resolution.

### Why are the changes needed?

remove hacks.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

If I just remove the hack, `SimpleSQLViewSuite.restrict the nested level of a view` fails. With this fix, it passes again.

Closes #30575 from cloud-fan/view.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 49 ++++++++++++-------
 .../plans/logical/basicLogicalOperators.scala |  3 --
 .../spark/sql/execution/SQLViewSuite.scala    | 25 ----------
 .../sql/execution/SQLViewTestSuite.scala      | 32 +++++++++---
 4 files changed, 57 insertions(+), 52 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index ebe1004872ef6..6769dc895d32e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -87,8 +87,8 @@ object FakeV2SessionCatalog extends TableCatalog {
 }
 
 /**
- * Provides a way to keep state during the analysis, this enables us to decouple the concerns
- * of analysis environment from the catalog.
+ * Provides a way to keep state during the analysis, mostly for resolving views. This enables us to
+ * decouple the concerns of analysis environment from the catalog.
  * The state that is kept here is per-query.
  *
  * Note this is thread local.
@@ -98,13 +98,21 @@ object FakeV2SessionCatalog extends TableCatalog {
  *                            views.
  * @param nestedViewDepth The nested depth in the view resolution, this enables us to limit the
  *                        depth of nested views.
+ * @param maxNestedViewDepth The maximum allowed depth of nested view resolution.
  * @param relationCache A mapping from qualified table names to resolved relations. This can ensure
  *                      that the table is resolved only once if a table is used multiple times
  *                      in a query.
+ * @param referredTempViewNames All the temp view names referred by the current view we are
+ *                              resolving. It's used to make sure the relation resolution is
+ *                              consistent between view creation and view resolution. For example,
+ *                              if `t` was a permanent table when the current view was created, it
+ *                              should still be a permanent table when resolving the current view,
+ *                              even if a temp view `t` has been created.
  */
 case class AnalysisContext(
     catalogAndNamespace: Seq[String] = Nil,
     nestedViewDepth: Int = 0,
+    maxNestedViewDepth: Int = -1,
     relationCache: mutable.Map[Seq[String], LogicalPlan] = mutable.Map.empty,
     referredTempViewNames: Seq[Seq[String]] = Seq.empty)
 
@@ -118,14 +126,20 @@ object AnalysisContext {
 
   private def set(context: AnalysisContext): Unit = value.set(context)
 
-  def withAnalysisContext[A](
-      catalogAndNamespace: Seq[String], referredTempViewNames: Seq[Seq[String]])(f: => A): A = {
+  def withAnalysisContext[A](viewDesc: CatalogTable)(f: => A): A = {
     val originContext = value.get()
+    val maxNestedViewDepth = if (originContext.maxNestedViewDepth == -1) {
+      // Here we start to resolve views, get `maxNestedViewDepth` from configs.
+      SQLConf.get.maxNestedViewDepth
+    } else {
+      originContext.maxNestedViewDepth
+    }
     val context = AnalysisContext(
-      catalogAndNamespace,
+      viewDesc.viewCatalogAndNamespace,
       originContext.nestedViewDepth + 1,
+      maxNestedViewDepth,
       originContext.relationCache,
-      referredTempViewNames)
+      viewDesc.viewReferredTempViewNames)
     set(context)
     try f finally { set(originContext) }
   }
@@ -1034,18 +1048,19 @@ class Analyzer(override val catalogManager: CatalogManager)
       // operator.
       case view @ View(desc, isTempView, _, child) if !child.resolved =>
         // Resolve all the UnresolvedRelations and Views in the child.
-        val newChild = AnalysisContext.withAnalysisContext(
-          desc.viewCatalogAndNamespace, desc.viewReferredTempViewNames) {
-            if (AnalysisContext.get.nestedViewDepth > conf.maxNestedViewDepth) {
-              view.failAnalysis(s"The depth of view ${desc.identifier} exceeds the maximum " +
-                s"view resolution depth (${conf.maxNestedViewDepth}). Analysis is aborted to " +
-                s"avoid errors. Increase the value of ${SQLConf.MAX_NESTED_VIEW_DEPTH.key} to " +
-                "work around this.")
-            }
-            SQLConf.withExistingConf(View.effectiveSQLConf(desc.viewSQLConfigs, isTempView)) {
-              executeSameContext(child)
-            }
+        val newChild = AnalysisContext.withAnalysisContext(desc) {
+          val nestedViewDepth = AnalysisContext.get.nestedViewDepth
+          val maxNestedViewDepth = AnalysisContext.get.maxNestedViewDepth
+          if (nestedViewDepth > maxNestedViewDepth) {
+            view.failAnalysis(s"The depth of view ${desc.identifier} exceeds the maximum " +
+              s"view resolution depth ($maxNestedViewDepth). Analysis is aborted to " +
+              s"avoid errors. Increase the value of ${SQLConf.MAX_NESTED_VIEW_DEPTH.key} to " +
+              "work around this.")
+          }
+          SQLConf.withExistingConf(View.effectiveSQLConf(desc.viewSQLConfigs, isTempView)) {
+            executeSameContext(child)
           }
+        }
         view.copy(child = newChild)
       case p @ SubqueryAlias(_, view: View) =>
         p.copy(child = resolveViews(view))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index c8b7e8651686a..aa7151ad36850 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -483,9 +483,6 @@ object View {
     for ((k, v) <- configs) {
       sqlConf.settings.put(k, v)
     }
-    // We should respect the current maxNestedViewDepth cause the view resolving are executed
-    // from top to down.
-    sqlConf.setConf(SQLConf.MAX_NESTED_VIEW_DEPTH, activeConf.maxNestedViewDepth)
     sqlConf
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index 709d6321d199d..c4303f0f1e19d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -704,31 +704,6 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
     }
   }
 
-  test("restrict the nested level of a view") {
-    val viewNames = Array.range(0, 11).map(idx => s"view$idx")
-    withView(viewNames: _*) {
-      sql("CREATE VIEW view0 AS SELECT * FROM jt")
-      Array.range(0, 10).foreach { idx =>
-        sql(s"CREATE VIEW view${idx + 1} AS SELECT * FROM view$idx")
-      }
-
-      withSQLConf(MAX_NESTED_VIEW_DEPTH.key -> "10") {
-        val e = intercept[AnalysisException] {
-          sql("SELECT * FROM view10")
-        }.getMessage
-        assert(e.contains("The depth of view `default`.`view0` exceeds the maximum view " +
-          "resolution depth (10). Analysis is aborted to avoid errors. Increase the value " +
-          s"of ${MAX_NESTED_VIEW_DEPTH.key} to work around this."))
-      }
-
-      val e = intercept[IllegalArgumentException] {
-        withSQLConf(MAX_NESTED_VIEW_DEPTH.key -> "0") {}
-      }.getMessage
-      assert(e.contains("The maximum depth of a view reference in a nested view must be " +
-        "positive."))
-    }
-  }
-
   test("permanent view should be case-preserving") {
     withView("v") {
       sql("CREATE VIEW v AS SELECT 1 as aBc")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
index fb9f5a73f6d9e..3cffc5bc21ab6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
@@ -121,7 +121,7 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils {
   test("change current database should not change view behavior") {
     withTable("t") {
       Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t")
-      val viewName = createView("v1", "SELECT * from t")
+      val viewName = createView("v1", "SELECT * FROM t")
       withView(viewName) {
         withTempDatabase { db =>
           sql(s"USE $db")
@@ -135,7 +135,7 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils {
   test("view should read the new data if table is updated") {
     withTable("t") {
       Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t")
-      val viewName = createView("v1", "SELECT c1 from t", Seq("c1"))
+      val viewName = createView("v1", "SELECT c1 FROM t", Seq("c1"))
       withView(viewName) {
         Seq(9, 7, 8).toDF("c1").write.mode("overwrite").format("parquet").saveAsTable("t")
         checkViewOutput(viewName, Seq(Row(9), Row(7), Row(8)))
@@ -146,7 +146,7 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils {
   test("add column for table should not affect view output") {
     withTable("t") {
       Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t")
-      val viewName = createView("v1", "SELECT * from t")
+      val viewName = createView("v1", "SELECT * FROM t")
       withView(viewName) {
         sql("ALTER TABLE t ADD COLUMN (c2 INT)")
         checkViewOutput(viewName, Seq(Row(2), Row(3), Row(1)))
@@ -157,8 +157,8 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils {
   test("check cyclic view reference on CREATE OR REPLACE VIEW") {
     withTable("t") {
       Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t")
-      val viewName1 = createView("v1", "SELECT * from t")
-      val viewName2 = createView("v2", s"SELECT * from $viewName1")
+      val viewName1 = createView("v1", "SELECT * FROM t")
+      val viewName2 = createView("v2", s"SELECT * FROM $viewName1")
       withView(viewName2, viewName1) {
         val e = intercept[AnalysisException] {
           createView("v1", s"SELECT * FROM $viewName2", replace = true)
@@ -171,8 +171,8 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils {
   test("check cyclic view reference on ALTER VIEW") {
     withTable("t") {
       Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t")
-      val viewName1 = createView("v1", "SELECT * from t")
-      val viewName2 = createView("v2", s"SELECT * from $viewName1")
+      val viewName1 = createView("v1", "SELECT * FROM t")
+      val viewName2 = createView("v2", s"SELECT * FROM $viewName1")
       withView(viewName2, viewName1) {
         val e = intercept[AnalysisException] {
           sql(s"ALTER VIEW $viewName1 AS SELECT * FROM $viewName2")
@@ -181,6 +181,24 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils {
       }
     }
   }
+
+  test("restrict the nested level of a view") {
+    val viewNames = scala.collection.mutable.ArrayBuffer.empty[String]
+    val view0 = createView("view0", "SELECT 1")
+    viewNames += view0
+    for (i <- 1 to 10) {
+      viewNames += createView(s"view$i", s"SELECT * FROM ${viewNames.last}")
+    }
+    withView(viewNames.reverse: _*) {
+      withSQLConf(MAX_NESTED_VIEW_DEPTH.key -> "10") {
+        val e = intercept[AnalysisException] {
+          sql(s"SELECT * FROM ${viewNames.last}")
+        }.getMessage
+        assert(e.contains("exceeds the maximum view resolution depth (10)"))
+        assert(e.contains(s"Increase the value of ${MAX_NESTED_VIEW_DEPTH.key}"))
+      }
+    }
+  }
 }
 
 class LocalTempViewTestSuite extends SQLViewTestSuite with SharedSparkSession {

From d671e053e9806d6b4e43a39f5018aa9718790160 Mon Sep 17 00:00:00 2001
From: german <germanschiavon@gmail.com>
Date: Sat, 5 Dec 2020 06:51:54 +0900
Subject: [PATCH 0669/1009] [SPARK-33660][DOCS][SS] Fix Kafka Headers
 Documentation

### What changes were proposed in this pull request?

Update kafka headers documentation, type is not longer a map but an array

[jira](https://issues.apache.org/jira/browse/SPARK-33660)

### Why are the changes needed?
To help users

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?

It is only documentation

Closes #30605 from Gschiavon/SPARK-33660-fix-kafka-headers-documentation.

Authored-by: german <germanschiavon@gmail.com>
Signed-off-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
---
 docs/structured-streaming-kafka-integration.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/structured-streaming-kafka-integration.md b/docs/structured-streaming-kafka-integration.md
index f92dd039d53b7..5336695478c14 100644
--- a/docs/structured-streaming-kafka-integration.md
+++ b/docs/structured-streaming-kafka-integration.md
@@ -61,7 +61,7 @@ val df = spark
   .option("includeHeaders", "true")
   .load()
 df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)", "headers")
-  .as[(String, String, Map)]
+  .as[(String, String, Array[(String, Array[Byte])])]
 
 // Subscribe to multiple topics
 val df = spark

From de9818f043c1ebcda321077633f93072feba601f Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 4 Dec 2020 14:10:42 -0800
Subject: [PATCH 0670/1009] [SPARK-33662][BUILD] Setting version to
 3.2.0-SNAPSHOT

### What changes were proposed in this pull request?

This PR aims to update `master` branch version to 3.2.0-SNAPSHOT.

### Why are the changes needed?

Start to prepare Apache Spark 3.2.0.

### Does this PR introduce _any_ user-facing change?

N/A.

### How was this patch tested?

Pass the CIs.

Closes #30606 from dongjoon-hyun/SPARK-3.2.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 R/pkg/DESCRIPTION                                      | 2 +-
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 4 ++--
 examples/pom.xml                                       | 2 +-
 external/avro/pom.xml                                  | 2 +-
 external/docker-integration-tests/pom.xml              | 2 +-
 external/kafka-0-10-assembly/pom.xml                   | 2 +-
 external/kafka-0-10-sql/pom.xml                        | 2 +-
 external/kafka-0-10-token-provider/pom.xml             | 2 +-
 external/kafka-0-10/pom.xml                            | 2 +-
 external/kinesis-asl-assembly/pom.xml                  | 2 +-
 external/kinesis-asl/pom.xml                           | 2 +-
 external/spark-ganglia-lgpl/pom.xml                    | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 project/MimaExcludes.scala                             | 5 +++++
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 40 files changed, 45 insertions(+), 40 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 2047f0d75ca18..20433362459d9 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.1.0
+Version: 3.2.0
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
diff --git a/assembly/pom.xml b/assembly/pom.xml
index d17abe857ade5..6aa97710f7307 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 39cdc6d6d6cd3..4ade8c2032b24 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index d328a7de0a762..0318f60d546e7 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 562a1d495cc8a..6be6df993478d 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 0225db81925c5..7aff79ea91d72 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 72a2c4ceb43b6..b5a6775366a47 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index ea16dadca40cb..e51357d97faab 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 769e2518b1fd4..b22400575dd02 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index ce6f6ed9c7051..84ca852d1f30a 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 026b3dd804690..a8d42e483d17d 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,8 +19,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.1.0-SNAPSHOT
-SPARK_VERSION_SHORT: 3.1.0
+SPARK_VERSION: 3.2.0-SNAPSHOT
+SPARK_VERSION_SHORT: 3.2.0
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.10"
 MESOS_VERSION: 1.0.0
diff --git a/examples/pom.xml b/examples/pom.xml
index 8b632cef6d44d..3d7713f10402f 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/external/avro/pom.xml b/external/avro/pom.xml
index 98036846eb2a8..a8614c4ff76ab 100644
--- a/external/avro/pom.xml
+++ b/external/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
index b240dd281823a..808f48f18e1ff 100644
--- a/external/docker-integration-tests/pom.xml
+++ b/external/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
index d9d9fb7f55c77..2359e99f657f9 100644
--- a/external/kafka-0-10-assembly/pom.xml
+++ b/external/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 95a99ac88412e..843f16067463f 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml
index 941946f30e96f..dbe2ab92a28e7 100644
--- a/external/kafka-0-10-token-provider/pom.xml
+++ b/external/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 024fdb26d5bf4..69c5862fdbb2d 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
index 76ee5bb7b2f85..22259b08141da 100644
--- a/external/kinesis-asl-assembly/pom.xml
+++ b/external/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
index 7e80bd28c19e8..b54ad71eba305 100644
--- a/external/kinesis-asl/pom.xml
+++ b/external/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
index 728b489da6785..bbb71035c3e19 100644
--- a/external/spark-ganglia-lgpl/pom.xml
+++ b/external/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 38836db01553a..3ed68c0652711 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index 8689e0b8a9ea8..03910ba091997 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index a2550ac939e83..5da2a496e9eb8 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 851af8d52a3ee..2a2c373242201 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 9eacf380e17f2..f5b5a979e35b8 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 80097aec0f429..1d7704055898b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.1.0-SNAPSHOT</version>
+  <version>3.2.0-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>http://spark.apache.org/</url>
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 9405927eb1cb5..33e65c9def41b 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -34,6 +34,10 @@ import com.typesafe.tools.mima.core.ProblemFilters._
  */
 object MimaExcludes {
 
+  // Exclude rules for 3.2.x
+  lazy val v32excludes = v31excludes ++ Seq(
+  )
+
   // Exclude rules for 3.1.x
   lazy val v31excludes = v30excludes ++ Seq(
     // mima plugin update caused new incompatibilities to be detected
@@ -1742,6 +1746,7 @@ object MimaExcludes {
   }
 
   def excludes(version: String) = version match {
+    case v if v.startsWith("3.2") => v32excludes
     case v if v.startsWith("3.1") => v31excludes
     case v if v.startsWith("3.0") => v30excludes
     case v if v.startsWith("2.4") => v24excludes
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index e8da19fc44185..935795190797f 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "3.1.0.dev0"
+__version__ = "3.2.0.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index a1079e7a6fe6a..a982af21d86f9 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 18e1c65e2e932..44df4e1da5331 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 258d3dfc3df9d..bc680077ead8a 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index 54a8d66ea1ad6..b9b3642498992 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index e9122ce202723..1d3856742f520 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 6b79eb722fcdd..0553438a1ad4a 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 3f088e420a9a3..5ab66bd5aac8a 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 9cd8adb6cb4df..dd6d21e3cbdac 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 0453094cf8b7b..27d2756c741ef 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 53b49dd320e94..bd8d352092e73 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 6e806413ef261..8fe8ab358d60c 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.1.0-SNAPSHOT</version>
+    <version>3.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From b6b45bc695706201693572bfb87bcee310548945 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 4 Dec 2020 15:04:18 -0800
Subject: [PATCH 0671/1009] [SPARK-33141][SQL][FOLLOW-UP] Fix Scala 2.13
 compilation

### What changes were proposed in this pull request?

This PR aims to fix Scala 2.13 compilation.

### Why are the changes needed?

To recover Scala 2.13.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass GitHub Action Scala 2.13 build job.

Closes #30611 from dongjoon-hyun/SPARK-33141.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
index 3cffc5bc21ab6..f6172e3b65050 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
@@ -189,7 +189,7 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils {
     for (i <- 1 to 10) {
       viewNames += createView(s"view$i", s"SELECT * FROM ${viewNames.last}")
     }
-    withView(viewNames.reverse: _*) {
+    withView(viewNames.reverse.toSeq: _*) {
       withSQLConf(MAX_NESTED_VIEW_DEPTH.key -> "10") {
         val e = intercept[AnalysisException] {
           sql(s"SELECT * FROM ${viewNames.last}")

From 960d6af75d5ef29b1efcf0d03e7db840270382e6 Mon Sep 17 00:00:00 2001
From: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com>
Date: Fri, 4 Dec 2020 15:15:19 -0800
Subject: [PATCH 0672/1009] [SPARK-33472][SQL][FOLLOW-UP] Update
 RemoveRedundantSorts comment

### What changes were proposed in this pull request?
This PR is a follow-up for #30373 that updates the comment for RemoveRedundantSorts in QueryExecution.

### Why are the changes needed?
To update an incorrect comment.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
N/A

Closes #30584 from allisonwang-db/spark-33472-followup.

Authored-by: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../scala/org/apache/spark/sql/execution/QueryExecution.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index 040d1f36ed8a5..0531dd210e539 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -344,7 +344,7 @@ object QueryExecution {
       PlanSubqueries(sparkSession),
       RemoveRedundantProjects,
       EnsureRequirements,
-      // `RemoveRedundantSorts` needs to be added before `EnsureRequirements` to guarantee the same
+      // `RemoveRedundantSorts` needs to be added after `EnsureRequirements` to guarantee the same
       // number of partitions when instantiating PartitioningCollection.
       RemoveRedundantSorts,
       DisableUnnecessaryBucketedScan,

From 1b4e35d1a8acf7b744e11b9ac9ca8f81de6db5e5 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 4 Dec 2020 16:48:31 -0800
Subject: [PATCH 0673/1009] [SPARK-33651][SQL] Allow CREATE EXTERNAL TABLE with
 LOCATION for data source tables

### What changes were proposed in this pull request?

This PR removes the restriction and allows CREATE EXTERNAL TABLE with LOCATION for data source tables. It also moves the check from the analyzer rule `ResolveSessionCatalog` to `SessionCatalog`, so that v2 session catalog can overwrite it.

### Why are the changes needed?

It's an unnecessary behavior difference that Hive serde table can be created with `CREATE EXTERNAL TABLE` if LOCATION is present, while data source table doesn't allow `CREATE EXTERNAL TABLE` at all.

### Does this PR introduce _any_ user-facing change?

Yes, now `CREATE EXTERNAL TABLE ... USING ... LOCATION ...` is allowed.

### How was this patch tested?

new tests

Closes #30595 from cloud-fan/minor.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../sql/catalyst/catalog/SessionCatalog.scala |  5 ++++
 .../analysis/ResolveSessionCatalog.scala      | 12 +--------
 .../datasources/v2/V2SessionCatalog.scala     |  7 +++++-
 .../DataSourceV2SQLSessionCatalogSuite.scala  |  8 ++++++
 .../connector/TestV2SessionCatalogBase.scala  | 24 +++++++++++++++---
 .../command/PlanResolutionSuite.scala         | 14 ++++++-----
 .../sources/CreateTableAsSelectSuite.scala    | 25 ++++++++-----------
 .../spark/sql/sources/InsertSuite.scala       |  2 +-
 .../sql/hive/MetastoreDataSourcesSuite.scala  |  5 ++--
 9 files changed, 64 insertions(+), 38 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 29481b85e9f2e..0cdbc1a234c66 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -318,6 +318,11 @@ class SessionCatalog(
       tableDefinition: CatalogTable,
       ignoreIfExists: Boolean,
       validateLocation: Boolean = true): Unit = {
+    val isExternal = tableDefinition.tableType == CatalogTableType.EXTERNAL
+    if (isExternal && tableDefinition.storage.locationUri.isEmpty) {
+      throw new AnalysisException(s"CREATE EXTERNAL TABLE must be accompanied by LOCATION")
+    }
+
     val db = formatDatabaseName(tableDefinition.identifier.database.getOrElse(getCurrentDatabase))
     val table = formatTableName(tableDefinition.identifier.table)
     val tableIdentifier = TableIdentifier(table, Some(db))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index f35fcdc07c372..a87ed4b6275d8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -662,17 +662,7 @@ class ResolveSessionCatalog(
       comment: Option[String],
       storageFormat: CatalogStorageFormat,
       external: Boolean): CatalogTable = {
-    if (external) {
-      if (DDLUtils.isHiveTable(Some(provider))) {
-        if (location.isEmpty) {
-          throw new AnalysisException(s"CREATE EXTERNAL TABLE must be accompanied by LOCATION")
-        }
-      } else {
-        throw new AnalysisException(s"Operation not allowed: CREATE EXTERNAL TABLE ... USING")
-      }
-    }
-
-    val tableType = if (location.isDefined) {
+    val tableType = if (external || location.isDefined) {
       CatalogTableType.EXTERNAL
     } else {
       CatalogTableType.MANAGED
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
index a0bc65d3f9057..87f5366354fa0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
@@ -87,7 +87,12 @@ class V2SessionCatalog(catalog: SessionCatalog)
     val location = Option(properties.get(TableCatalog.PROP_LOCATION))
     val storage = DataSource.buildStorageFormatFromOptions(toOptions(tableProperties.toMap))
         .copy(locationUri = location.map(CatalogUtils.stringToURI))
-    val tableType = if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED
+    val isExternal = properties.containsKey(TableCatalog.PROP_EXTERNAL)
+    val tableType = if (isExternal || location.isDefined) {
+      CatalogTableType.EXTERNAL
+    } else {
+      CatalogTableType.MANAGED
+    }
 
     val tableDesc = CatalogTable(
       identifier = ident.asTableIdentifier,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala
index cf00b3b5e4410..c973e2ba30004 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala
@@ -79,4 +79,12 @@ class DataSourceV2SQLSessionCatalogSuite
         Row("keyX", s"Table default.$t1 does not have property: keyX"))
     }
   }
+
+  test("SPARK-33651: allow CREATE EXTERNAL TABLE without LOCATION") {
+    withTable("t") {
+      val prop = TestV2SessionCatalogBase.SIMULATE_ALLOW_EXTERNAL_PROPERTY + "=true"
+      // The following should not throw AnalysisException.
+      sql(s"CREATE EXTERNAL TABLE t (i INT) USING $v2Format TBLPROPERTIES($prop)")
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
index f57edb9eb220c..bf2749d1afc53 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
@@ -24,7 +24,7 @@ import java.util.concurrent.atomic.AtomicBoolean
 import scala.collection.JavaConverters._
 
 import org.apache.spark.sql.catalyst.catalog.CatalogTableType
-import org.apache.spark.sql.connector.catalog.{DelegatingCatalogExtension, Identifier, Table, V1Table}
+import org.apache.spark.sql.connector.catalog.{DelegatingCatalogExtension, Identifier, Table, TableCatalog, V1Table}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.types.StructType
 
@@ -70,8 +70,22 @@ private[connector] trait TestV2SessionCatalogBase[T <: Table] extends Delegating
       schema: StructType,
       partitions: Array[Transform],
       properties: util.Map[String, String]): Table = {
-    val created = super.createTable(ident, schema, partitions, properties)
-    val t = newTable(created.name(), schema, partitions, properties)
+    val key = TestV2SessionCatalogBase.SIMULATE_ALLOW_EXTERNAL_PROPERTY
+    val propsWithLocation = if (properties.containsKey(key)) {
+      // Always set a location so that CREATE EXTERNAL TABLE won't fail with LOCATION not specified.
+      if (!properties.containsKey(TableCatalog.PROP_LOCATION)) {
+        val newProps = new util.HashMap[String, String]()
+        newProps.putAll(properties)
+        newProps.put(TableCatalog.PROP_LOCATION, "file:/abc")
+        newProps
+      } else {
+        properties
+      }
+    } else {
+      properties
+    }
+    val created = super.createTable(ident, schema, partitions, propsWithLocation)
+    val t = newTable(created.name(), schema, partitions, propsWithLocation)
     addTable(ident, t)
     t
   }
@@ -90,3 +104,7 @@ private[connector] trait TestV2SessionCatalogBase[T <: Table] extends Delegating
     tableCreated.set(false)
   }
 }
+
+object TestV2SessionCatalogBase {
+  val SIMULATE_ALLOW_EXTERNAL_PROPERTY = "spark.sql.test.simulateAllowExternal"
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index 9b7222da55368..38719311f1aef 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -1714,14 +1714,16 @@ class PlanResolutionSuite extends AnalysisTest {
     }
   }
 
-  test("create hive external table - location must be specified") {
-    val exc = intercept[AnalysisException] {
-      parseAndResolve("CREATE EXTERNAL TABLE my_tab STORED AS parquet")
+  test("create hive external table") {
+    val withoutLoc = "CREATE EXTERNAL TABLE my_tab STORED AS parquet"
+    parseAndResolve(withoutLoc) match {
+      case ct: CreateTable =>
+        assert(ct.tableDesc.tableType == CatalogTableType.EXTERNAL)
+        assert(ct.tableDesc.storage.locationUri.isEmpty)
     }
-    assert(exc.getMessage.contains("CREATE EXTERNAL TABLE must be accompanied by LOCATION"))
 
-    val query = "CREATE EXTERNAL TABLE my_tab STORED AS parquet LOCATION '/something/anything'"
-    parseAndResolve(query) match {
+    val withLoc = "CREATE EXTERNAL TABLE my_tab STORED AS parquet LOCATION '/something/anything'"
+    parseAndResolve(withLoc) match {
       case ct: CreateTable =>
         assert(ct.tableDesc.tableType == CatalogTableType.EXTERNAL)
         assert(ct.tableDesc.storage.locationUri == Some(new URI("/something/anything")))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
index 00c599065ce31..9464f7e4c1241 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
@@ -22,7 +22,7 @@ import java.io.File
 import org.apache.spark.SparkException
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.catalog.BucketSpec
+import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTableType}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.internal.SQLConf.BUCKETING_MAX_BUCKETS
 import org.apache.spark.sql.test.SharedSparkSession
@@ -170,20 +170,17 @@ class CreateTableAsSelectSuite extends DataSourceTest with SharedSparkSession {
     }
   }
 
-  test("disallows CREATE EXTERNAL TABLE ... USING ... AS query") {
+  test("SPARK-33651: allow CREATE EXTERNAL TABLE ... USING ... if location is specified") {
     withTable("t") {
-      val error = intercept[AnalysisException] {
-        sql(
-          s"""
-             |CREATE EXTERNAL TABLE t USING PARQUET
-             |OPTIONS (PATH '${path.toURI}')
-             |AS SELECT 1 AS a, 2 AS b
-           """.stripMargin
-        )
-      }.getMessage
-
-      assert(error.contains("Operation not allowed") &&
-        error.contains("CREATE EXTERNAL TABLE ..."))
+      sql(
+        s"""
+           |CREATE EXTERNAL TABLE t USING PARQUET
+           |OPTIONS (PATH '${path.toURI}')
+           |AS SELECT 1 AS a, 2 AS b
+         """.stripMargin)
+      val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
+      assert(table.tableType == CatalogTableType.EXTERNAL)
+      assert(table.location.toString == path.toURI.toString.stripSuffix("/"))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index aaf8765c04425..bfd04ffaaf754 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -845,7 +845,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         .add("s", StringType, false)
       val newTable = CatalogTable(
         identifier = TableIdentifier("test_table", None),
-        tableType = CatalogTableType.EXTERNAL,
+        tableType = CatalogTableType.MANAGED,
         storage = CatalogStorageFormat(
           locationUri = None,
           inputFormat = None,
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index 41a26344f7c21..0593dbe7f6653 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -711,7 +711,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
           identifier = TableIdentifier("wide_schema"),
           tableType = CatalogTableType.EXTERNAL,
           storage = CatalogStorageFormat.empty.copy(
-            properties = Map("path" -> tempDir.getCanonicalPath)
+            locationUri = Some(tempDir.toURI)
           ),
           schema = schema,
           provider = Some("json")
@@ -1076,7 +1076,8 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
         identifier = TableIdentifier("skip_hive_metadata", Some("default")),
         tableType = CatalogTableType.EXTERNAL,
         storage = CatalogStorageFormat.empty.copy(
-          properties = Map("path" -> tempPath.getCanonicalPath, "skipHiveMetadata" -> "true")
+          locationUri = Some(tempPath.toURI),
+          properties = Map("skipHiveMetadata" -> "true")
         ),
         schema = schema,
         provider = Some("parquet")

From 154f6044033d1a3b4c19c64b206b168bf919cb3b Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Sun, 6 Dec 2020 12:03:14 +0900
Subject: [PATCH 0674/1009] [MINOR] Fix string interpolation in
 CommandUtils.scala and KafkaDataConsumer.scala

### What changes were proposed in this pull request?

This PR proposes to fix a string interpolation in `CommandUtils.scala` and `KafkaDataConsumer.scala`.

### Why are the changes needed?

To fix a string interpolation bug.

### Does this PR introduce _any_ user-facing change?

Yes, the string will be correctly constructed.

### How was this patch tested?

Existing tests since they were used in exception/log messages.

Closes #30609 from imback82/fix_cache_str_interporlation.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../apache/spark/sql/kafka010/consumer/KafkaDataConsumer.scala  | 2 +-
 .../org/apache/spark/sql/execution/command/CommandUtils.scala   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumer.scala
index f2bf7cd1360ec..649430d434a73 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumer.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumer.scala
@@ -276,7 +276,7 @@ private[kafka010] class KafkaDataConsumer(
     val fetchedData = getOrRetrieveFetchedData(offset)
 
     logDebug(s"Get $groupId $topicPartition nextOffset ${fetchedData.nextOffsetInFetchedData} " +
-      "requested $offset")
+      s"requested $offset")
 
     // The following loop is basically for `failOnDataLoss = false`. When `failOnDataLoss` is
     // `false`, first, we will try to fetch the record at `offset`. If no such record exists, then
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
index f86f62bbf853b..15a735be8043f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
@@ -391,7 +391,7 @@ object CommandUtils extends Logging {
     try {
       sparkSession.catalog.uncacheTable(name)
     } catch {
-      case NonFatal(e) => logWarning("Exception when attempting to uncache $name", e)
+      case NonFatal(e) => logWarning(s"Exception when attempting to uncache $name", e)
     }
   }
 }

From 6317ba29a1bb1b7198fe8df71ddefcf47a55bd51 Mon Sep 17 00:00:00 2001
From: Prashant Sharma <prashsh1@in.ibm.com>
Date: Sat, 5 Dec 2020 23:04:55 -0800
Subject: [PATCH 0675/1009] [SPARK-33668][K8S][TEST] Fix flaky test "Verify
 logging configuration is picked from the provided

### What changes were proposed in this pull request?
Fix flaky test "Verify logging configuration is picked from the provided SPARK_CONF_DIR/log4j.properties."
The test is flaking, with multiple flaked instances - the reason for the failure has been similar to:

```

The code passed to eventually never returned normally. Attempted 109 times over 3.0079882413999997 minutes. Last failure message: Failure executing: GET at:
https://192.168.39.167:8443/api/v1/namespaces/b37fc72a991b49baa68a2eaaa1516463/pods/spark-pi-97a9bc76308e7fe3-exec-1/log?pretty=false. Message: pods "spark-pi-97a9bc76308e7fe3-exec-1" not found. Received status: Status(apiVersion=v1, code=404, details=StatusDetails(causes=[], group=null, kind=pods, name=spark-pi-97a9bc76308e7fe3-exec-1, retryAfterSeconds=null, uid=null, additionalProperties={}), kind=Status, message=pods "spark-pi-97a9bc76308e7fe3-exec-1" not found, metadata=ListMeta(_continue=null, remainingItemCount=null, resourceVersion=null, selfLink=null, additionalProperties={}), reason=NotFound, status=Failure, additionalProperties={}).. (KubernetesSuite.scala:402)

```
https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/36854/console
https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/36852/console
https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/36850/console
https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder-K8s/36848/console
From the above failures, it seems, that executor finishes too quickly and is removed by spark before the test can complete.
So, in order to mitigate this situation, one way is to turn on the flag
   "spark.kubernetes.executor.deleteOnTermination"

### Why are the changes needed?

Fixes a flaky test.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing tests.
May be a few runs of jenkins integration test, may reveal if the problem is resolved or not.

Closes #30616 from ScrapCodes/SPARK-33668/fix-flaky-k8s-integration-test.

Authored-by: Prashant Sharma <prashsh1@in.ibm.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../k8s/integrationtest/KubernetesSuite.scala  | 18 ++++++++++++++++++
 .../SparkConfPropagateSuite.scala              |  1 +
 2 files changed, 19 insertions(+)

diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
index 193a02aad0cea..7b2a2d0820238 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
@@ -158,6 +158,7 @@ class KubernetesSuite extends SparkFunSuite
       kubernetesTestComponents.deleteNamespace()
     }
     deleteDriverPod()
+    deleteExecutorPod(appLocator)
   }
 
   protected def runSparkPiAndVerifyCompletion(
@@ -508,6 +509,23 @@ class KubernetesSuite extends SparkFunSuite
         .get() == null)
     }
   }
+
+  private def deleteExecutorPod(appLocator: String): Unit = {
+    kubernetesTestComponents
+      .kubernetesClient
+      .pods()
+      .withLabel("spark-app-locator", appLocator)
+      .withLabel("spark-role", "executor")
+      .delete()
+    Eventually.eventually(TIMEOUT, INTERVAL) {
+      assert(kubernetesTestComponents.kubernetesClient
+        .pods()
+        .withLabel("spark-app-locator", appLocator)
+        .withLabel("spark-role", "executor")
+        .list()
+        .getItems.isEmpty)
+    }
+  }
 }
 
 private[spark] object KubernetesSuite {
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala
index 5d3b426598fdd..0bc632716fa8b 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala
@@ -39,6 +39,7 @@ private[spark] trait SparkConfPropagateSuite { k8sSuite: KubernetesSuite =>
 
       sparkAppConf.set("spark.driver.extraJavaOptions", "-Dlog4j.debug")
       sparkAppConf.set("spark.executor.extraJavaOptions", "-Dlog4j.debug")
+      sparkAppConf.set("spark.kubernetes.executor.deleteOnTermination", "false")
 
       val log4jExpectedLog =
         s"log4j: Reading configuration from URL file:/opt/spark/conf/log4j.properties"

From e857e06452c2cf478beb31367f76d6950b660ebb Mon Sep 17 00:00:00 2001
From: Chao Sun <sunchao@apple.com>
Date: Sun, 6 Dec 2020 01:14:22 -0800
Subject: [PATCH 0676/1009] [SPARK-33652][SQL] DSv2: DeleteFrom should refresh
 cache

### What changes were proposed in this pull request?

This changes `DeleteFromTableExec` to also refresh caches referencing the original table, by passing the `refreshCache` callback to the class. Note that in order to construct the callback, I have to change `DataSourceV2ScanRelation` to contain a `DataSourceV2Relation` instead of a `Table`.

### Why are the changes needed?

Currently DSv2 delete from table doesn't refresh caches. This could lead to correctness issue if the staled cache is queried later.

### Does this PR introduce _any_ user-facing change?

Yes. Now delete from table in v2 also refreshes cache.

### How was this patch tested?

Added a test case.

Closes #30597 from sunchao/SPARK-33652.

Authored-by: Chao Sun <sunchao@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../datasources/v2/DataSourceV2Relation.scala    |  6 +++---
 .../scala/org/apache/spark/sql/Dataset.scala     |  4 ++--
 .../datasources/v2/DataSourceV2Strategy.scala    |  5 +++--
 .../datasources/v2/DeleteFromTableExec.scala     |  4 +++-
 .../datasources/v2/V2ScanRelationPushDown.scala  |  2 +-
 .../sql/connector/DataSourceV2SQLSuite.scala     | 16 ++++++++++++++++
 6 files changed, 28 insertions(+), 9 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
index 4debdd380e6b4..513fce0aba10c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
@@ -111,16 +111,16 @@ case class DataSourceV2Relation(
  * plan. This ensures that the stats that are used by the optimizer account for the filters and
  * projection that will be pushed down.
  *
- * @param table a DSv2 [[Table]]
+ * @param relation a [[DataSourceV2Relation]]
  * @param scan a DSv2 [[Scan]]
  * @param output the output attributes of this relation
  */
 case class DataSourceV2ScanRelation(
-    table: Table,
+    relation: DataSourceV2Relation,
     scan: Scan,
     output: Seq[AttributeReference]) extends LeafNode with NamedRelation {
 
-  override def name: String = table.name()
+  override def name: String = relation.table.name()
 
   override def simpleString(maxFields: Int): String = {
     s"RelationV2${truncatedString(output, "[", ", ", "]", maxFields)} $name"
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 0716043bcf660..05d6647afd958 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -53,7 +53,7 @@ import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression
 import org.apache.spark.sql.execution.arrow.{ArrowBatchStreamWriter, ArrowConverters}
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources.LogicalRelation
-import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2ScanRelation, FileTable}
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, DataSourceV2ScanRelation, FileTable}
 import org.apache.spark.sql.execution.python.EvaluatePython
 import org.apache.spark.sql.execution.stat.StatFunctions
 import org.apache.spark.sql.internal.SQLConf
@@ -3464,7 +3464,7 @@ class Dataset[T] private[sql](
         fr.inputFiles
       case r: HiveTableRelation =>
         r.tableMeta.storage.locationUri.map(_.toString).toArray
-      case DataSourceV2ScanRelation(table: FileTable, _, _) =>
+      case DataSourceV2ScanRelation(DataSourceV2Relation(table: FileTable, _, _, _, _), _, _) =>
         table.fileIndex.inputFiles
     }.flatten
     files.toSet.toArray
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 938ba77fede47..5289d359f7809 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -208,7 +208,8 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
 
     case DeleteFromTable(relation, condition) =>
       relation match {
-        case DataSourceV2ScanRelation(table, _, output) =>
+        case DataSourceV2ScanRelation(r, _, output) =>
+          val table = r.table
           if (condition.exists(SubqueryExpression.hasSubquery)) {
             throw new AnalysisException(
               s"Delete by condition with subquery is not supported: $condition")
@@ -227,7 +228,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
               s"Cannot delete from table ${table.name} where ${filters.mkString("[", ", ", "]")}")
           }
 
-          DeleteFromTableExec(table.asDeletable, filters) :: Nil
+          DeleteFromTableExec(table.asDeletable, filters, refreshCache(r)) :: Nil
         case _ =>
           throw new AnalysisException("DELETE is only supported with v2 tables.")
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DeleteFromTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DeleteFromTableExec.scala
index afebbfd01db22..f0a45c249dc10 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DeleteFromTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DeleteFromTableExec.scala
@@ -24,10 +24,12 @@ import org.apache.spark.sql.sources.Filter
 
 case class DeleteFromTableExec(
     table: SupportsDelete,
-    condition: Array[Filter]) extends V2CommandExec {
+    condition: Array[Filter],
+    refreshCache: () => Unit) extends V2CommandExec {
 
   override protected def run(): Seq[InternalRow] = {
     table.deleteWhere(condition)
+    refreshCache()
     Seq.empty
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala
index b168e848f0b6f..d2180566790ac 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala
@@ -64,7 +64,7 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] {
         case _ => scan
       }
 
-      val scanRelation = DataSourceV2ScanRelation(relation.table, wrappedScan, output)
+      val scanRelation = DataSourceV2ScanRelation(relation, wrappedScan, output)
 
       val projectionOverSchema = ProjectionOverSchema(output.toStructType)
       val projectionFunc = (expr: Expression) => expr transformDown {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 6ef4fd1372a78..6838a7644a29f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -1841,6 +1841,22 @@ class DataSourceV2SQLSuite
     }
   }
 
+  test("SPARK-33652: DeleteFrom should refresh caches referencing the table") {
+    val t = "testcat.ns1.ns2.tbl"
+    val view = "view"
+    withTable(t) {
+      withTempView(view) {
+        sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
+        sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
+        sql(s"CACHE TABLE view AS SELECT id FROM $t")
+        assert(spark.table(view).count() == 3)
+
+        sql(s"DELETE FROM $t WHERE id = 2")
+        assert(spark.table(view).count() == 1)
+      }
+    }
+  }
+
   test("UPDATE TABLE") {
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {

From 5250841537d7a8c54fb451748e2a21d3bcc5d966 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Sun, 6 Dec 2020 01:22:24 -0800
Subject: [PATCH 0677/1009] [SPARK-33256][PYTHON][DOCS] Clarify PySpark follows
 NumPy documentation style

### What changes were proposed in this pull request?

This PR adds few lines about docstring style to document that PySpark follows [NumPy documentation style](https://numpydoc.readthedocs.io/en/latest/format.html). We all completed the migration to NumPy documentation style at SPARK-32085.

Ideally we should have a page like https://pandas.pydata.org/docs/development/contributing_docstring.html but I would like to leave it as a future work.

### Why are the changes needed?

To tell developers that PySpark now follows NumPy documentation style.

### Does this PR introduce _any_ user-facing change?

No, it's a change in unreleased branches yet.

### How was this patch tested?

Manually tested via `make clean html` under `python/docs`:

![Screen Shot 2020-12-06 at 1 34 50 PM](https://user-images.githubusercontent.com/6477701/101271623-d5ce0380-37c7-11eb-93ac-da73caa50c37.png)

Closes #30622 from HyukjinKwon/SPARK-33256.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 python/docs/source/development/contributing.rst | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/docs/source/development/contributing.rst b/python/docs/source/development/contributing.rst
index 2b62c953e0786..a41b8a1a1de9e 100644
--- a/python/docs/source/development/contributing.rst
+++ b/python/docs/source/development/contributing.rst
@@ -123,11 +123,12 @@ Annotations can be validated using ``dev/lint-python`` script or by invoking myp
 
 
-Code Style Guide
-----------------
+Code and Docstring Guide
+----------------------------------
 
 Please follow the style of the existing codebase as is, which is virtually PEP 8 with one exception: lines can be up
 to 100 characters in length, not 79.
+For the docstring style, PySpark follows `NumPy documentation style <https://numpydoc.readthedocs.io/en/latest/format.html>`_.
 
 Note that the method and variable names in PySpark are the similar case is ``threading`` library in Python itself where
 the APIs were inspired by Java. PySpark also follows `camelCase` for exposed APIs that match with Scala and Java.

From 48297818f37a8e02cc02ba6fa9ec04fe37540aca Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Sun, 6 Dec 2020 02:56:08 -0800
Subject: [PATCH 0678/1009] [SPARK-33667][SQL] Respect the
 `spark.sql.caseSensitive` config while resolving partition spec in v1 `SHOW
 PARTITIONS`

### What changes were proposed in this pull request?
Preprocess the partition spec passed to the V1 SHOW PARTITIONS implementation `ShowPartitionsCommand`, and normalize the passed spec according to the partition columns w.r.t the case sensitivity flag  **spark.sql.caseSensitive**.

### Why are the changes needed?
V1 SHOW PARTITIONS is case sensitive in fact, and doesn't respect the SQL config **spark.sql.caseSensitive** which is false by default, for instance:
```sql
spark-sql> CREATE TABLE tbl1 (price int, qty int, year int, month int)
         > USING parquet
         > PARTITIONED BY (year, month);
spark-sql> INSERT INTO tbl1 PARTITION(year = 2015, month = 1) SELECT 1, 1;
spark-sql> SHOW PARTITIONS tbl1 PARTITION(YEAR = 2015, Month = 1);
Error in query: Non-partitioning column(s) [YEAR, Month] are specified for SHOW PARTITIONS;
```
The `SHOW PARTITIONS` command must show the partition `year = 2015, month = 1` specified by `YEAR = 2015, Month = 1`.

### Does this PR introduce _any_ user-facing change?
Yes. After the changes, the command above works as expected:
```sql
spark-sql> SHOW PARTITIONS tbl1 PARTITION(YEAR = 2015, Month = 1);
year=2015/month=1
```

### How was this patch tested?
By running the affected test suites:
- `v1/ShowPartitionsSuite`
- `v2/ShowPartitionsSuite`

Closes #30615 from MaxGekk/show-partitions-case-sensitivity-test.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/execution/command/tables.scala  | 18 ++++++------
 .../command/ShowPartitionsSuiteBase.scala     | 28 +++++++++++++++++--
 .../command/v1/ShowPartitionsSuite.scala      |  4 ---
 .../command/v2/ShowPartitionsSuite.scala      |  4 ---
 4 files changed, 34 insertions(+), 20 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 9e3ca3c321a54..59adb7dd7e319 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -1006,20 +1006,18 @@ case class ShowPartitionsCommand(
     DDLUtils.verifyPartitionProviderIsHive(sparkSession, table, "SHOW PARTITIONS")
 
     /**
-     * Validate the partitioning spec by making sure all the referenced columns are
+     * Normalizes the partition spec w.r.t the partition columns and case sensitivity settings,
+     * and validates the spec by making sure all the referenced columns are
      * defined as partitioning columns in table definition. An AnalysisException exception is
      * thrown if the partitioning spec is invalid.
      */
-    if (spec.isDefined) {
-      val badColumns = spec.get.keySet.filterNot(table.partitionColumnNames.contains)
-      if (badColumns.nonEmpty) {
-        val badCols = badColumns.mkString("[", ", ", "]")
-        throw new AnalysisException(
-          s"Non-partitioning column(s) $badCols are specified for SHOW PARTITIONS")
-      }
-    }
+    val normalizedSpec = spec.map(partitionSpec => PartitioningUtils.normalizePartitionSpec(
+      partitionSpec,
+      table.partitionColumnNames,
+      table.identifier.quotedString,
+      sparkSession.sessionState.conf.resolver))
 
-    val partNames = catalog.listPartitionNames(tableName, spec)
+    val partNames = catalog.listPartitionNames(tableName, normalizedSpec)
     partNames.map(Row(_))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
index 82457f96a3003..b695decdb3ec9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
@@ -21,6 +21,7 @@ import org.scalactic.source.Position
 import org.scalatest.Tag
 
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types.{StringType, StructType}
 
@@ -28,7 +29,6 @@ trait ShowPartitionsSuiteBase extends QueryTest with SQLTestUtils {
   protected def version: String
   protected def catalog: String
   protected def defaultUsing: String
-  protected def wrongPartitionColumnsError(columns: String*): String
   // Gets the schema of `SHOW PARTITIONS`
   private val showSchema: StructType = new StructType().add("partition", StringType, false)
   protected def runShowPartitionsSql(sqlText: String, expected: Seq[Row]): Unit = {
@@ -94,7 +94,7 @@ trait ShowPartitionsSuiteBase extends QueryTest with SQLTestUtils {
         val errMsg = intercept[AnalysisException] {
           sql(s"SHOW PARTITIONS $table PARTITION(abcd=2015, xyz=1)")
         }.getMessage
-        assert(errMsg.contains(wrongPartitionColumnsError("abcd", "xyz")))
+        assert(errMsg.contains("abcd is not a valid partition column"))
       }
     }
   }
@@ -149,4 +149,28 @@ trait ShowPartitionsSuiteBase extends QueryTest with SQLTestUtils {
       }
     }
   }
+
+  test("SPARK-33667: case sensitivity of partition spec") {
+    withNamespace(s"$catalog.ns") {
+      sql(s"CREATE NAMESPACE $catalog.ns")
+      val t = s"$catalog.ns.part_table"
+      withTable(t) {
+        sql(s"""
+          |CREATE TABLE $t (price int, qty int, year int, month int)
+          |$defaultUsing
+          |PARTITIONED BY (year, month)""".stripMargin)
+        sql(s"INSERT INTO $t PARTITION(year = 2015, month = 1) SELECT 1, 1")
+        Seq(
+          true -> "PARTITION(year = 2015, month = 1)",
+          false -> "PARTITION(YEAR = 2015, Month = 1)"
+        ).foreach { case (caseSensitive, partitionSpec) =>
+          withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+            runShowPartitionsSql(
+              s"SHOW PARTITIONS $t $partitionSpec",
+              Row("year=2015/month=1") :: Nil)
+          }
+        }
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
index 2b2bc9e63dc82..c752a5f358bb9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
@@ -27,10 +27,6 @@ trait ShowPartitionsSuiteBase extends command.ShowPartitionsSuiteBase {
   override def catalog: String = CatalogManager.SESSION_CATALOG_NAME
   override def defaultUsing: String = "USING parquet"
 
-  override protected def wrongPartitionColumnsError(columns: String*): String = {
-    s"Non-partitioning column(s) ${columns.mkString("[", ", ", "]")} are specified"
-  }
-
   test("show everything in the default database") {
     val table = "dateTable"
     withTable(table) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
index ca47a713ad604..55985a335c94b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
@@ -32,10 +32,6 @@ class ShowPartitionsSuite extends command.ShowPartitionsSuiteBase with SharedSpa
     .set(s"spark.sql.catalog.$catalog", classOf[InMemoryPartitionTableCatalog].getName)
     .set(s"spark.sql.catalog.non_part_$catalog", classOf[InMemoryTableCatalog].getName)
 
-  override protected def wrongPartitionColumnsError(columns: String*): String = {
-    s"${columns.head} is not a valid partition column"
-  }
-
   test("a table does not support partitioning") {
     val table = s"non_part_$catalog.tab1"
     withTable(table) {

From b94ecf0734b829878956d98b74323e0c80822fec Mon Sep 17 00:00:00 2001
From: Xiao Li <gatorsmile@gmail.com>
Date: Sun, 6 Dec 2020 22:36:34 +0800
Subject: [PATCH 0679/1009] [SPARK-33674][TEST] Show Slowpoke notifications in
 SBT tests

### What changes were proposed in this pull request?
This PR is to show Slowpoke notifications in the log when running tests using SBT.

For example, the test case "zero sized blocks" in ExternalShuffleServiceSuite enters the infinite loop. After this change, the log file will have a notification message every 5 minute when the test case running longer than two minutes. Below is an example message.

```
[info] ExternalShuffleServiceSuite:
[info] - groupByKey without compression (101 milliseconds)
[info] - shuffle non-zero block size (3 seconds, 186 milliseconds)
[info] - shuffle serializer (3 seconds, 189 milliseconds)
[info] *** Test still running after 2 minute, 1 seconds: suite name: ExternalShuffleServiceSuite, test name: zero sized blocks.
[info] *** Test still running after 7 minute, 1 seconds: suite name: ExternalShuffleServiceSuite, test name: zero sized blocks.
[info] *** Test still running after 12 minutes, 1 seconds: suite name: ExternalShuffleServiceSuite, test name: zero sized blocks.
[info] *** Test still running after 17 minutes, 1 seconds: suite name: ExternalShuffleServiceSuite, test name: zero sized blocks.
```

### Why are the changes needed?
When the tests/code has bug and enters the infinite loop, it is hard to tell which test cases hit some issues from the log, especially when we are running the tests in parallel. It would be nice to show the Slowpoke notifications.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Manual testing in my local dev environment.

Closes #30621 from gatorsmile/addSlowpoke.

Authored-by: Xiao Li <gatorsmile@gmail.com>
Signed-off-by: Yuming Wang <yumwang@ebay.com>
---
 project/SparkBuild.scala | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index a5951e0452943..23fb73d228e01 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -1055,6 +1055,9 @@ object TestSettings {
       }.getOrElse(Nil): _*),
     // Show full stack trace and duration in test cases.
     testOptions in Test += Tests.Argument("-oDF"),
+    // Slowpoke notifications: receive notifications every 5 minute of tests that have been running
+    // longer than two minutes.
+    testOptions in Test += Tests.Argument(TestFrameworks.ScalaTest, "-W", "120", "300"),
     testOptions in Test += Tests.Argument(TestFrameworks.JUnit, "-v", "-a"),
     // Enable Junit testing.
     libraryDependencies += "com.novocode" % "junit-interface" % "0.11" % "test",

From 119539fd493af5ed0e37af79320787f145eaf3f1 Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Mon, 7 Dec 2020 09:48:16 +0900
Subject: [PATCH 0680/1009] [SPARK-33663][SQL] Uncaching should not be called
 on non-existing temp views

### What changes were proposed in this pull request?

This PR proposes to fix a misleading logs in the following scenario when uncaching is called on non-existing views:
```
scala> sql("CREATE TABLE table USING parquet AS SELECT 2")
res0: org.apache.spark.sql.DataFrame = []

scala> val df = spark.table("table")
df: org.apache.spark.sql.DataFrame = [2: int]

scala> df.createOrReplaceTempView("t2")
20/12/04 10:16:24 WARN CommandUtils: Exception when attempting to uncache $name
org.apache.spark.sql.AnalysisException: Table or view not found: t2;;
'UnresolvedRelation [t2], [], false

	at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis$1(CheckAnalysis.scala:113)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis$1$adapted(CheckAnalysis.scala:93)
	at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:183)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis(CheckAnalysis.scala:93)
	at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis$(CheckAnalysis.scala:90)
	at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:152)
	at org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:172)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:214)
	at org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:169)
	at org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:73)
	at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111)
	at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:138)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:768)
	at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:138)
	at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:73)
	at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:71)
	at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:63)
	at org.apache.spark.sql.Dataset$.$anonfun$ofRows$1(Dataset.scala:90)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:768)
	at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:88)
	at org.apache.spark.sql.DataFrameReader.table(DataFrameReader.scala:889)
	at org.apache.spark.sql.SparkSession.table(SparkSession.scala:589)
	at org.apache.spark.sql.internal.CatalogImpl.uncacheTable(CatalogImpl.scala:476)
	at org.apache.spark.sql.execution.command.CommandUtils$.uncacheTableOrView(CommandUtils.scala:392)
	at org.apache.spark.sql.execution.command.CreateViewCommand.run(views.scala:124)
```
Since `t2` does not exist yet, it shouldn't try to uncache.

### Why are the changes needed?

To fix misleading message.

### Does this PR introduce _any_ user-facing change?

Yes, the above message will not be displayed if the view doesn't exist yet.

### How was this patch tested?

Manually tested since this is a log message printed.

Closes #30608 from imback82/fix_cache_message.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../spark/sql/execution/command/views.scala   | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index 4ad5eddb83f43..06b1e03adea50 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -113,12 +113,12 @@ case class CreateViewCommand(
     verifyTemporaryObjectsNotExists(catalog, isTemporary, name, child)
 
     if (viewType == LocalTempView) {
-      val samePlan = catalog.getTempView(name.table).exists {
-        // Don't perform sameResult check for View logical plan, since it's unresolved
-        case _: View => false
-        case other => other.sameResult(child)
+      val shouldUncache = replace && catalog.getTempView(name.table).exists {
+        // Uncache View logical plan without checking the same result check, since it's unresolved.
+        case _: View => true
+        case other => !other.sameResult(child)
       }
-      if (replace && !samePlan) {
+      if (shouldUncache) {
         logInfo(s"Try to uncache ${name.quotedString} before replacing.")
         checkCyclicViewReference(analyzedPlan, Seq(name), name)
         CommandUtils.uncacheTableOrView(sparkSession, name.quotedString)
@@ -141,12 +141,12 @@ case class CreateViewCommand(
     } else if (viewType == GlobalTempView) {
       val db = sparkSession.sessionState.conf.getConf(StaticSQLConf.GLOBAL_TEMP_DATABASE)
       val viewIdent = TableIdentifier(name.table, Option(db))
-      val samePlan = catalog.getGlobalTempView(name.table).exists {
-        // Don't perform sameResult check for View logical plan, since it's unresolved
-        case _: View => false
-        case other => other.sameResult(child)
+      val shouldUncache = replace && catalog.getGlobalTempView(name.table).exists {
+        // Uncache View logical plan without checking the same result check, since it's unresolved.
+        case _: View => true
+        case other => !other.sameResult(child)
       }
-      if (replace && !samePlan) {
+      if (shouldUncache) {
         logInfo(s"Try to uncache ${viewIdent.quotedString} before replacing.")
         checkCyclicViewReference(analyzedPlan, Seq(viewIdent), viewIdent)
         CommandUtils.uncacheTableOrView(sparkSession, viewIdent.quotedString)

From e32de29bcee6073a2d2b9bb4e5930459eaf460d9 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 7 Dec 2020 10:05:28 +0900
Subject: [PATCH 0681/1009] [SPARK-33675][INFRA] Add GitHub Action job to
 publish snapshot

### What changes were proposed in this pull request?

This PR aims to add `GitHub Action` job to publish daily snapshot for **master** branch.
- https://repository.apache.org/content/groups/snapshots/org/apache/spark/spark-core_2.12/3.2.0-SNAPSHOT/

For the other branches, I'll make adjusted backports.
- For `branch-3.1`, we can specify the checkout `ref` to `branch-3.1`.
- For `branch-2.4` and `branch-3.0`, we can publish at every commit since the traffic is low.
  - https://github.com/apache/spark/pull/30630 (branch-3.0)
  - https://github.com/apache/spark/pull/30629 (branch-2.4 LTS)

### Why are the changes needed?

After this series of jobs, this will reduce our maintenance burden permanently from AmpLab Jenkins by removing the following completely.

https://amplab.cs.berkeley.edu/jenkins/view/Spark%20Packaging/

For now, AmpLab Jenkins doesn't have a job for `branch-3.1`. We can do it by ourselves by `GitHub Action`.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

The snapshot publishing is tested here at PR trigger. Since this PR adds a scheduled job, we cannot test in this PR.
- https://github.com/dongjoon-hyun/spark/runs/1505792859

Apache Infra team finished the setup here.
- https://issues.apache.org/jira/browse/INFRA-21167

Closes #30623 from dongjoon-hyun/SPARK-33675.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .github/workflows/publish_snapshot.yml | 30 ++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 .github/workflows/publish_snapshot.yml

diff --git a/.github/workflows/publish_snapshot.yml b/.github/workflows/publish_snapshot.yml
new file mode 100644
index 0000000000000..9871680f73891
--- /dev/null
+++ b/.github/workflows/publish_snapshot.yml
@@ -0,0 +1,30 @@
+name: Publish Snapshot
+
+on:
+  schedule:
+  - cron: '0 0 * * *'
+
+jobs:
+  publish-snapshot:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout Spark repository
+      uses: actions/checkout@master
+    - name: Cache Maven local repository
+      uses: actions/cache@v2
+      with:
+        path: ~/.m2/repository
+        key: snapshot-maven-${{ hashFiles('**/pom.xml') }}
+        restore-keys: |
+          snapshot-maven-
+    - name: Install Java 8
+      uses: actions/setup-java@v1
+      with:
+        java-version: 8
+    - name: Publish snapshot
+      env:
+        ASF_USERNAME: ${{ secrets.NEXUS_USER }}
+        ASF_PASSWORD: ${{ secrets.NEXUS_PW }}
+        GPG_KEY: "not_used"
+        GPG_PASSPHRASE: "not_used"
+      run: ./dev/create-release/release-build.sh publish-snapshot

From 29096a8869c95221dc75ce7fd3d098680bef4f55 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 7 Dec 2020 10:21:04 +0900
Subject: [PATCH 0682/1009] [SPARK-33670][SQL] Verify the partition provider is
 Hive in v1 SHOW TABLE EXTENDED

### What changes were proposed in this pull request?
Invoke the check `DDLUtils.verifyPartitionProviderIsHive()` from V1 implementation of `SHOW TABLE EXTENDED` when partition specs are specified.

This PR is some kind of follow up https://github.com/apache/spark/pull/16373 and https://github.com/apache/spark/pull/15515.

### Why are the changes needed?
To output an user friendly error with recommendation like
**"
... partition metadata is not stored in the Hive metastore. To import this information into the metastore, run `msck repair table tableName`
"**
instead of silently output an empty result.

### Does this PR introduce _any_ user-facing change?
Yes.

### How was this patch tested?
By running the affected test suites, in particular:
```
$ build/sbt -Phive-2.3 -Phive-thriftserver "hive/test:testOnly *PartitionProviderCompatibilitySuite"
```

Closes #30618 from MaxGekk/show-table-extended-verifyPartitionProviderIsHive.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../spark/sql/execution/command/tables.scala   |  3 +++
 .../execution/command/v1/ShowTablesSuite.scala | 18 ++++++++++++++++--
 .../PartitionProviderCompatibilitySuite.scala  | 14 ++++++++++----
 3 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 59adb7dd7e319..54660ced8d834 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -879,6 +879,9 @@ case class ShowTablesCommand(
       // Note: tableIdentifierPattern should be non-empty, otherwise a [[ParseException]]
       // should have been thrown by the sql parser.
       val table = catalog.getTableMetadata(TableIdentifier(tableIdentifierPattern.get, Some(db)))
+
+      DDLUtils.verifyPartitionProviderIsHive(sparkSession, table, "SHOW TABLE EXTENDED")
+
       val tableIdent = table.identifier
       val normalizedSpec = PartitioningUtils.normalizePartitionSpec(
         partitionSpec.get,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
index 8f29f9f276138..3db880c776365 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.command.v1
 
-import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.{AnalysisException, Row, SaveMode}
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.execution.command
 import org.apache.spark.sql.internal.SQLConf
@@ -111,4 +111,18 @@ trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase {
   }
 }
 
-class ShowTablesSuite extends ShowTablesSuiteBase with SharedSparkSession
+class ShowTablesSuite extends ShowTablesSuiteBase with SharedSparkSession {
+  test("SPARK-33670: show partitions from a datasource table") {
+    import testImplicits._
+    withNamespace(s"$catalog.ns") {
+      sql(s"CREATE NAMESPACE $catalog.ns")
+      sql(s"USE $catalog.ns")
+      val t = "part_datasrc"
+      withTable(t) {
+        val df = (1 to 3).map(i => (i, s"val_$i", i * 2)).toDF("a", "b", "c")
+        df.write.partitionBy("a").format("parquet").mode(SaveMode.Overwrite).saveAsTable(t)
+        assert(sql(s"SHOW TABLE EXTENDED LIKE '$t' PARTITION(a = 1)").count() === 1)
+      }
+    }
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
index 80afc9d8f44bc..e1b0637963b75 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
@@ -53,7 +53,8 @@ class PartitionProviderCompatibilitySuite
       s"ALTER TABLE $tableName PARTITION (partCol=1) SET LOCATION '/foo'",
       s"ALTER TABLE $tableName DROP PARTITION (partCol=1)",
       s"DESCRIBE $tableName PARTITION (partCol=1)",
-      s"SHOW PARTITIONS $tableName")
+      s"SHOW PARTITIONS $tableName",
+      s"SHOW TABLE EXTENDED LIKE '$tableName' PARTITION (partCol=1)")
 
     withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> "true") {
       for (cmd <- unsupportedCommands) {
@@ -124,10 +125,15 @@ class PartitionProviderCompatibilitySuite
         }
         // disabled
         withSQLConf(SQLConf.HIVE_MANAGE_FILESOURCE_PARTITIONS.key -> "false") {
-          val e = intercept[AnalysisException] {
-            spark.sql(s"show partitions test")
+          Seq(
+            "SHOW PARTITIONS test",
+            "SHOW TABLE EXTENDED LIKE 'test' PARTITION (partCol=1)"
+          ).foreach { showPartitions =>
+            val e = intercept[AnalysisException] {
+              spark.sql(showPartitions)
+            }
+            assert(e.getMessage.contains("filesource partition management is disabled"))
           }
-          assert(e.getMessage.contains("filesource partition management is disabled"))
           spark.sql("refresh table test")
           assert(spark.sql("select * from test").count() == 5)
         }

From e88f0d4a2436cc47c8bf8ed2a739eab728ea3d81 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Sun, 6 Dec 2020 17:57:19 -0800
Subject: [PATCH 0683/1009] [SPARK-33683][INFRA] Remove -Djava.version=11 from
 Scala 2.13 build in GitHub Actions

### What changes were proposed in this pull request?

This PR removes `-Djava.version=11` from the build command for Scala 2.13 in the GitHub Actions' job.

In the GitHub Actions' job, the build command for Scala 2.13 is defined as follows.
```
./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Djava.version=11 -Pscala-2.13 compile test:compile
```

Though, Scala 2.13 build uses Java 8 rather than 11 so let's remove `-Djava.version=11`.

### Why are the changes needed?

To build with consistent configuration.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Should be done by GitHub Actions' workflow.

Closes #30633 from sarutak/scala-213-java11.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .github/workflows/build_and_test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index a3bb083387f3e..72b2caf907151 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -414,7 +414,7 @@ jobs:
     - name: Build with SBT
       run: |
         ./dev/change-scala-version.sh 2.13
-        ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Djava.version=11 -Pscala-2.13 compile test:compile
+        ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pscala-2.13 compile test:compile
 
   hadoop-2:
     name: Hadoop 2 build with SBT

From 73412ffb3a857acda5dab41d7be3f7ae627f6eaf Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sun, 6 Dec 2020 19:34:54 -0800
Subject: [PATCH 0684/1009] [SPARK-33680][SQL][TESTS] Fix
 PrunePartitionSuiteBase/BucketedReadWithHiveSupportSuite not to depend on the
 default conf

### What changes were proposed in this pull request?

This PR updates `PrunePartitionSuiteBase/BucketedReadWithHiveSupportSuite` to have the require conf explicitly.

### Why are the changes needed?

The unit test should not depend on the default configurations.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

According to https://github.com/apache/spark/pull/30628 , this seems to be the only ones.

Pass the CIs.

Closes #30631 from dongjoon-hyun/SPARK-CONF-AGNO.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../execution/PrunePartitionSuiteBase.scala   | 81 ++++++++++---------
 .../BucketedReadWithHiveSupportSuite.scala    |  4 +-
 2 files changed, 45 insertions(+), 40 deletions(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala
index 8e35cd034311d..bc170fcd59026 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PrunePartitionSuiteBase.scala
@@ -21,6 +21,7 @@ import org.apache.spark.sql.QueryTest
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, BinaryOperator, Expression, IsNotNull, Literal}
 import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan}
 import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.internal.SQLConf.ADAPTIVE_EXECUTION_ENABLED
 import org.apache.spark.sql.test.SQLTestUtils
 
 abstract class PrunePartitionSuiteBase extends QueryTest with SQLTestUtils with TestHiveSingleton {
@@ -28,48 +29,50 @@ abstract class PrunePartitionSuiteBase extends QueryTest with SQLTestUtils with
   protected def format: String
 
   test("SPARK-28169: Convert scan predicate condition to CNF") {
-    withTempView("temp") {
-      withTable("t") {
-        sql(
-          s"""
-             |CREATE TABLE t(i INT, p STRING)
-             |USING $format
-             |PARTITIONED BY (p)""".stripMargin)
-
-        spark.range(0, 1000, 1).selectExpr("id as col")
-          .createOrReplaceTempView("temp")
-
-        for (part <- Seq(1, 2, 3, 4)) {
+    withSQLConf(ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+      withTempView("temp") {
+        withTable("t") {
           sql(
             s"""
-               |INSERT OVERWRITE TABLE t PARTITION (p='$part')
-               |SELECT col FROM temp""".stripMargin)
-        }
+               |CREATE TABLE t(i INT, p STRING)
+               |USING $format
+               |PARTITIONED BY (p)""".stripMargin)
 
-        assertPrunedPartitions(
-          "SELECT * FROM t WHERE p = '1' OR (p = '2' AND i = 1)", 2,
-          "((`p` = '1') || (`p` = '2'))")
-        assertPrunedPartitions(
-          "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (i = 1 OR p = '2')", 4,
-          "")
-        assertPrunedPartitions(
-          "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (p = '3' AND i = 3 )", 2,
-          "((`p` = '1') || (`p` = '3'))")
-        assertPrunedPartitions(
-          "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (p = '2' OR p = '3')", 3,
-          "((`p` = '1') || ((`p` = '2') || (`p` = '3')))")
-        assertPrunedPartitions(
-          "SELECT * FROM t", 4,
-          "")
-        assertPrunedPartitions(
-          "SELECT * FROM t WHERE p = '1' AND i = 2", 1,
-          "(`p` = '1')")
-        assertPrunedPartitions(
-          """
-            |SELECT i, COUNT(1) FROM (
-            |SELECT * FROM t WHERE  p = '1' OR (p = '2' AND i = 1)
-            |) tmp GROUP BY i
-          """.stripMargin, 2, "((`p` = '1') || (`p` = '2'))")
+          spark.range(0, 1000, 1).selectExpr("id as col")
+            .createOrReplaceTempView("temp")
+
+          for (part <- Seq(1, 2, 3, 4)) {
+            sql(
+              s"""
+                 |INSERT OVERWRITE TABLE t PARTITION (p='$part')
+                 |SELECT col FROM temp""".stripMargin)
+          }
+
+          assertPrunedPartitions(
+            "SELECT * FROM t WHERE p = '1' OR (p = '2' AND i = 1)", 2,
+            "((`p` = '1') || (`p` = '2'))")
+          assertPrunedPartitions(
+            "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (i = 1 OR p = '2')", 4,
+            "")
+          assertPrunedPartitions(
+            "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (p = '3' AND i = 3 )", 2,
+            "((`p` = '1') || (`p` = '3'))")
+          assertPrunedPartitions(
+            "SELECT * FROM t WHERE (p = '1' AND i = 2) OR (p = '2' OR p = '3')", 3,
+            "((`p` = '1') || ((`p` = '2') || (`p` = '3')))")
+          assertPrunedPartitions(
+            "SELECT * FROM t", 4,
+            "")
+          assertPrunedPartitions(
+            "SELECT * FROM t WHERE p = '1' AND i = 2", 1,
+            "(`p` = '1')")
+          assertPrunedPartitions(
+            """
+              |SELECT i, COUNT(1) FROM (
+              |SELECT * FROM t WHERE  p = '1' OR (p = '2' AND i = 1)
+              |) tmp GROUP BY i
+            """.stripMargin, 2, "((`p` = '1') || (`p` = '2'))")
+        }
       }
     }
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedReadWithHiveSupportSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedReadWithHiveSupportSuite.scala
index 35dab79ff6dff..07901351fc0fc 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedReadWithHiveSupportSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedReadWithHiveSupportSuite.scala
@@ -17,10 +17,12 @@
 
 package org.apache.spark.sql.sources
 
+import org.apache.spark.sql.execution.adaptive.DisableAdaptiveExecutionSuite
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 
-class BucketedReadWithHiveSupportSuite extends BucketedReadSuite with TestHiveSingleton {
+class BucketedReadWithHiveSupportSuite
+  extends BucketedReadSuite with DisableAdaptiveExecutionSuite with TestHiveSingleton {
   protected override def beforeAll(): Unit = {
     super.beforeAll()
     assert(spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "hive")

From d48ef34911b8928b66df92399119caebb24616d4 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Sun, 6 Dec 2020 23:02:36 -0800
Subject: [PATCH 0685/1009] [SPARK-33684][BUILD] Upgrade httpclient from 4.5.6
 to 4.5.13

### What changes were proposed in this pull request?

This PR upgrades `commons.httpclient` from `4.5.6` to `4.5.13`.
4.5.6 is released over 2 years ago and now we can use more stable `4.5.13`.
https://archive.apache.org/dist/httpcomponents/httpclient/RELEASE_NOTES-4.5.x.txt

### Why are the changes needed?

To follow the more stable release.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Should be done by the existing tests.

Closes #30634 from sarutak/upgrade-httpclient.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 2 +-
 dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 2 +-
 pom.xml                                 | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index a19558bc2a5e3..401050a60e493 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -99,7 +99,7 @@ hk2-api/2.6.1//hk2-api-2.6.1.jar
 hk2-locator/2.6.1//hk2-locator-2.6.1.jar
 hk2-utils/2.6.1//hk2-utils-2.6.1.jar
 htrace-core/3.1.0-incubating//htrace-core-3.1.0-incubating.jar
-httpclient/4.5.6//httpclient-4.5.6.jar
+httpclient/4.5.13//httpclient-4.5.13.jar
 httpcore/4.4.12//httpcore-4.4.12.jar
 istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar
 ivy/2.4.0//ivy-2.4.0.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index 24283224dd37d..b0f8935843281 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -98,7 +98,7 @@ hk2-api/2.6.1//hk2-api-2.6.1.jar
 hk2-locator/2.6.1//hk2-locator-2.6.1.jar
 hk2-utils/2.6.1//hk2-utils-2.6.1.jar
 htrace-core4/4.1.0-incubating//htrace-core4-4.1.0-incubating.jar
-httpclient/4.5.6//httpclient-4.5.6.jar
+httpclient/4.5.13//httpclient-4.5.13.jar
 httpcore/4.4.12//httpcore-4.4.12.jar
 istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar
 ivy/2.4.0//ivy-2.4.0.jar
diff --git a/pom.xml b/pom.xml
index 1d7704055898b..364dec688b38b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -155,7 +155,7 @@
     <!-- the producer is used in tests -->
     <aws.kinesis.producer.version>0.12.8</aws.kinesis.producer.version>
     <!--  org.apache.httpcomponents/httpclient-->
-    <commons.httpclient.version>4.5.6</commons.httpclient.version>
+    <commons.httpclient.version>4.5.13</commons.httpclient.version>
     <commons.httpcore.version>4.4.12</commons.httpcore.version>
     <!--  commons-httpclient/commons-httpclient-->
     <httpclient.classic.version>3.1</httpclient.classic.version>

From 87c056088e853d475f1507e296ad06480862e8a7 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Sun, 6 Dec 2020 23:22:52 -0800
Subject: [PATCH 0686/1009] [SPARK-33671][SQL] Remove VIEW checks from V1 table
 commands

### What changes were proposed in this pull request?
Remove VIEW checks from the following V1 commands:
- `SHOW PARTITIONS`
- `TRUNCATE TABLE`
- `LOAD DATA`

The checks are performed earlier at:
https://github.com/apache/spark/blob/acc211d2cf0e6ab94f6578e1eb488766fd20fa4e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala#L885-L889

### Why are the changes needed?
To improve code maintenance, and remove dead codes.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By existing test suites like `v1/ShowPartitionsSuite`.

1. LOAD DATA:
https://github.com/apache/spark/blob/acc211d2cf0e6ab94f6578e1eb488766fd20fa4e/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala#L176-L179
2. TRUNCATE TABLE:
https://github.com/apache/spark/blob/acc211d2cf0e6ab94f6578e1eb488766fd20fa4e/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala#L180-L183
3. SHOW PARTITIONS:
- v1/ShowPartitionsSuite

Closes #30620 from MaxGekk/show-table-check-view.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../apache/spark/sql/execution/command/tables.scala   | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 54660ced8d834..640051384e94c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -311,9 +311,6 @@ case class LoadDataCommand(
         sparkSession.sessionState.conf.resolver)
     }
 
-    if (targetTable.tableType == CatalogTableType.VIEW) {
-      throw new AnalysisException(s"Target table in LOAD DATA cannot be a view: $tableIdentwithDB")
-    }
     if (DDLUtils.isDatasourceTable(targetTable)) {
       throw new AnalysisException(
         s"LOAD DATA is not supported for datasource tables: $tableIdentwithDB")
@@ -452,10 +449,6 @@ case class TruncateTableCommand(
       throw new AnalysisException(
         s"Operation not allowed: TRUNCATE TABLE on external tables: $tableIdentWithDB")
     }
-    if (table.tableType == CatalogTableType.VIEW) {
-      throw new AnalysisException(
-        s"Operation not allowed: TRUNCATE TABLE on views: $tableIdentWithDB")
-    }
     if (table.partitionColumnNames.isEmpty && partitionSpec.isDefined) {
       throw new AnalysisException(
         s"Operation not allowed: TRUNCATE TABLE ... PARTITION is not supported " +
@@ -995,11 +988,7 @@ case class ShowPartitionsCommand(
      * Validate and throws an [[AnalysisException]] exception under the following conditions:
      * 1. If the table is not partitioned.
      * 2. If it is a datasource table.
-     * 3. If it is a view.
      */
-    if (table.tableType == VIEW) {
-      throw new AnalysisException(s"SHOW PARTITIONS is not allowed on a view: $tableIdentWithDB")
-    }
 
     if (table.partitionColumnNames.isEmpty) {
       throw new AnalysisException(

From 26c0493318c2a3e5b74ff3829de88605aff8e832 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 7 Dec 2020 08:14:36 +0000
Subject: [PATCH 0687/1009] [SPARK-33676][SQL] Require exact matching of
 partition spec to the schema in V2 `ALTER TABLE .. ADD/DROP PARTITION`

### What changes were proposed in this pull request?
Check that partitions specs passed to v2 `ALTER TABLE .. ADD/DROP PARTITION` exactly match to the partition schema (all partition fields from the schema are specified in partition specs).

### Why are the changes needed?
1. To have the same behavior as V1 `ALTER TABLE .. ADD/DROP PARTITION` that output the error:
```sql
spark-sql> create table tab1 (id int, a int, b int) using parquet partitioned by (a, b);
spark-sql> ALTER TABLE tab1 ADD PARTITION (A='9');
Error in query: Partition spec is invalid. The spec (a) must match the partition spec (a, b) defined in table '`default`.`tab1`';
```
2. To prevent future errors caused by not fully specified partition specs.

### Does this PR introduce _any_ user-facing change?
Yes. The V2 implementation of `ALTER TABLE .. ADD/DROP PARTITION` output the same error as V1 commands.

### How was this patch tested?
By running the test suite with new UT:
```
$ build/sbt "test:testOnly *AlterTablePartitionV2SQLSuite"
```

Closes #30624 from MaxGekk/add-partition-full-spec.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../analysis/ResolvePartitionSpec.scala       | 20 +++++++++++++++----
 .../sql/catalyst/catalog/SessionCatalog.scala | 15 ++++++--------
 .../spark/sql/util/PartitioningUtils.scala    | 18 +++++++++++++++++
 .../AlterTablePartitionV2SQLSuite.scala       | 20 +++++++++++++++++++
 4 files changed, 60 insertions(+), 13 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
index 38991a9e24fa8..feb05d3b6926b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.connector.catalog.SupportsPartitionManagement
 import org.apache.spark.sql.types._
-import org.apache.spark.sql.util.PartitioningUtils.normalizePartitionSpec
+import org.apache.spark.sql.util.PartitioningUtils.{normalizePartitionSpec, requireExactMatchedPartitionSpec}
 
 /**
  * Resolve [[UnresolvedPartitionSpec]] to [[ResolvedPartitionSpec]] in partition related commands.
@@ -35,11 +35,21 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     case r @ AlterTableAddPartition(
         ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs, _) =>
-      r.copy(parts = resolvePartitionSpecs(table.name, partSpecs, table.partitionSchema()))
+      val partitionSchema = table.partitionSchema()
+      r.copy(parts = resolvePartitionSpecs(
+        table.name,
+        partSpecs,
+        partitionSchema,
+        requireExactMatchedPartitionSpec(table.name, _, partitionSchema.fieldNames)))
 
     case r @ AlterTableDropPartition(
         ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs, _, _, _) =>
-      r.copy(parts = resolvePartitionSpecs(table.name, partSpecs, table.partitionSchema()))
+      val partitionSchema = table.partitionSchema()
+      r.copy(parts = resolvePartitionSpecs(
+        table.name,
+        partSpecs,
+        partitionSchema,
+        requireExactMatchedPartitionSpec(table.name, _, partitionSchema.fieldNames)))
 
     case r @ ShowPartitions(ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs) =>
       r.copy(pattern = resolvePartitionSpecs(
@@ -51,7 +61,8 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] {
   private def resolvePartitionSpecs(
       tableName: String,
       partSpecs: Seq[PartitionSpec],
-      partSchema: StructType): Seq[ResolvedPartitionSpec] =
+      partSchema: StructType,
+      checkSpec: TablePartitionSpec => Unit = _ => ()): Seq[ResolvedPartitionSpec] =
     partSpecs.map {
       case unresolvedPartSpec: UnresolvedPartitionSpec =>
         val normalizedSpec = normalizePartitionSpec(
@@ -59,6 +70,7 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] {
           partSchema.map(_.name),
           tableName,
           conf.resolver)
+        checkSpec(normalizedSpec)
         val partitionNames = normalizedSpec.keySet
         val requestedFields = partSchema.filter(field => partitionNames.contains(field.name))
         ResolvedPartitionSpec(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 0cdbc1a234c66..a2ab756382488 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -43,7 +43,7 @@ import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE
 import org.apache.spark.sql.types.StructType
-import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.sql.util.{CaseInsensitiveStringMap, PartitioningUtils}
 import org.apache.spark.util.Utils
 
 object SessionCatalog {
@@ -1167,14 +1167,11 @@ class SessionCatalog(
   private def requireExactMatchedPartitionSpec(
       specs: Seq[TablePartitionSpec],
       table: CatalogTable): Unit = {
-    val defined = table.partitionColumnNames.sorted
-    specs.foreach { s =>
-      if (s.keys.toSeq.sorted != defined) {
-        throw new AnalysisException(
-          s"Partition spec is invalid. The spec (${s.keys.mkString(", ")}) must match " +
-            s"the partition spec (${table.partitionColumnNames.mkString(", ")}) defined in " +
-            s"table '${table.identifier}'")
-      }
+    specs.foreach { spec =>
+      PartitioningUtils.requireExactMatchedPartitionSpec(
+        table.identifier.toString,
+        spec,
+        table.partitionColumnNames)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/PartitioningUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/PartitioningUtils.scala
index 586aa6c59164f..e473e1d1b7ff3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/PartitioningUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/PartitioningUtils.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.util
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.Resolver
+import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 
 object PartitioningUtils {
   /**
@@ -44,4 +45,21 @@ object PartitioningUtils {
 
     normalizedPartSpec.toMap
   }
+
+  /**
+   * Verify if the input partition spec exactly matches the existing defined partition spec
+   * The columns must be the same but the orders could be different.
+   */
+  def requireExactMatchedPartitionSpec(
+      tableName: String,
+      spec: TablePartitionSpec,
+      partitionColumnNames: Seq[String]): Unit = {
+    val defined = partitionColumnNames.sorted
+    if (spec.keys.toSeq.sorted != defined) {
+      throw new AnalysisException(
+        s"Partition spec is invalid. The spec (${spec.keys.mkString(", ")}) must match " +
+        s"the partition spec (${partitionColumnNames.mkString(", ")}) defined in " +
+        s"table '$tableName'")
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
index 47b5e5e54edde..45d47c6d8681c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
@@ -261,4 +261,24 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase {
       }
     }
   }
+
+  test("SPARK-33676: not fully specified partition spec") {
+    val t = "testpart.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"""
+        |CREATE TABLE $t (id bigint, part0 int, part1 string)
+        |USING foo
+        |PARTITIONED BY (part0, part1)""".stripMargin)
+      Seq(
+        s"ALTER TABLE $t ADD PARTITION (part0 = 1)",
+        s"ALTER TABLE $t DROP PARTITION (part0 = 1)"
+      ).foreach { alterTable =>
+        val errMsg = intercept[AnalysisException] {
+          sql(alterTable)
+        }.getMessage
+        assert(errMsg.contains("Partition spec is invalid. " +
+          "The spec (part0) must match the partition spec (part0, part1)"))
+      }
+    }
+  }
 }

From 1e0c006748c031d5277ba3b906b0bbf68e6bc893 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Mon, 7 Dec 2020 21:36:52 +0900
Subject: [PATCH 0688/1009] [SPARK-33617][SQL] Add default parallelism
 configuration for Spark SQL queries

### What changes were proposed in this pull request?

This pr add default parallelism configuration(`spark.sql.default.parallelism`) for Spark SQL and make it effective for `LocalTableScan`.

### Why are the changes needed?

Avoid generating small files for INSERT INTO TABLE from VALUES, for example:
```sql
CREATE TABLE t1(id int) USING parquet;
INSERT INTO TABLE t1 VALUES (1), (2), (3), (4), (5), (6), (7), (8);
```

Before this pr:
```
-rw-r--r-- 1 root root 421 Dec  1 01:54 part-00000-4d5a3a89-2995-4328-b2ae-908febbbaf4a-c000.snappy.parquet
-rw-r--r-- 1 root root 421 Dec  1 01:54 part-00001-4d5a3a89-2995-4328-b2ae-908febbbaf4a-c000.snappy.parquet
-rw-r--r-- 1 root root 421 Dec  1 01:54 part-00002-4d5a3a89-2995-4328-b2ae-908febbbaf4a-c000.snappy.parquet
-rw-r--r-- 1 root root 421 Dec  1 01:54 part-00003-4d5a3a89-2995-4328-b2ae-908febbbaf4a-c000.snappy.parquet
-rw-r--r-- 1 root root 421 Dec  1 01:54 part-00004-4d5a3a89-2995-4328-b2ae-908febbbaf4a-c000.snappy.parquet
-rw-r--r-- 1 root root 421 Dec  1 01:54 part-00005-4d5a3a89-2995-4328-b2ae-908febbbaf4a-c000.snappy.parquet
-rw-r--r-- 1 root root 421 Dec  1 01:54 part-00006-4d5a3a89-2995-4328-b2ae-908febbbaf4a-c000.snappy.parquet
-rw-r--r-- 1 root root 421 Dec  1 01:54 part-00007-4d5a3a89-2995-4328-b2ae-908febbbaf4a-c000.snappy.parquet
-rw-r--r-- 1 root root   0 Dec  1 01:54 _SUCCESS
```

After this pr and set `spark.sql.files.minPartitionNum` to 1:
```
-rw-r--r-- 1 root root 452 Dec  1 01:59 part-00000-6de50c79-e305-4f8d-b6ae-39f46b2619c6-c000.snappy.parquet
-rw-r--r-- 1 root root   0 Dec  1 01:59 _SUCCESS
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #30559 from wangyum/SPARK-33617.

Lead-authored-by: Yuming Wang <yumwang@ebay.com>
Co-authored-by: Yuming Wang <yumwang@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../scala/org/apache/spark/sql/internal/SQLConf.scala  | 10 ++++++++++
 .../main/scala/org/apache/spark/sql/SparkSession.scala |  6 ++++--
 .../spark/sql/execution/LocalTableScanExec.scala       |  4 +++-
 .../execution/adaptive/CoalesceShufflePartitions.scala |  2 +-
 .../spark/sql/execution/basicPhysicalOperators.scala   |  3 ++-
 .../org/apache/spark/sql/execution/command/ddl.scala   |  3 ++-
 .../sql/execution/datasources/FilePartition.scala      |  1 +
 .../sql/execution/datasources/SchemaMergeUtils.scala   |  3 ++-
 .../apache/spark/sql/execution/SparkPlanSuite.scala    |  9 +++++++++
 9 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 025478214e492..ea30832008b56 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -374,6 +374,14 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  val DEFAULT_PARALLELISM = buildConf("spark.sql.default.parallelism")
+    .doc("The number of parallelism for Spark SQL, the default value is " +
+      "`spark.default.parallelism`.")
+    .version("3.2.0")
+    .intConf
+    .checkValue(_ > 0, "The value of spark.sql.default.parallelism must be positive.")
+    .createOptional
+
   val SHUFFLE_PARTITIONS = buildConf("spark.sql.shuffle.partitions")
     .doc("The default number of partitions to use when shuffling data for joins or aggregations. " +
       "Note: For structured streaming, this configuration cannot be changed between query " +
@@ -3160,6 +3168,8 @@ class SQLConf extends Serializable with Logging {
 
   def cacheVectorizedReaderEnabled: Boolean = getConf(CACHE_VECTORIZED_READER_ENABLED)
 
+  def defaultParallelism: Option[Int] = getConf(DEFAULT_PARALLELISM)
+
   def defaultNumShufflePartitions: Int = getConf(SHUFFLE_PARTITIONS)
 
   def numShufflePartitions: Int = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index db5ad52977c71..3a9b06940b769 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -519,7 +519,8 @@ class SparkSession private(
    * @since 2.0.0
    */
   def range(start: Long, end: Long): Dataset[java.lang.Long] = {
-    range(start, end, step = 1, numPartitions = sparkContext.defaultParallelism)
+    range(start, end, step = 1,
+      numPartitions = sqlContext.conf.defaultParallelism.getOrElse(sparkContext.defaultParallelism))
   }
 
   /**
@@ -529,7 +530,8 @@ class SparkSession private(
    * @since 2.0.0
    */
   def range(start: Long, end: Long, step: Long): Dataset[java.lang.Long] = {
-    range(start, end, step, numPartitions = sparkContext.defaultParallelism)
+    range(start, end, step,
+      numPartitions = sqlContext.conf.defaultParallelism.getOrElse(sparkContext.defaultParallelism))
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala
index b452213cd6cc7..02a8f46824241 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala
@@ -49,7 +49,9 @@ case class LocalTableScanExec(
     if (rows.isEmpty) {
       sqlContext.sparkContext.emptyRDD
     } else {
-      val numSlices = math.min(unsafeRows.length, sqlContext.sparkContext.defaultParallelism)
+      val numSlices = math.min(
+        unsafeRows.length,
+        conf.defaultParallelism.getOrElse(sqlContext.sparkContext.defaultParallelism))
       sqlContext.sparkContext.parallelize(unsafeRows, numSlices)
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
index 0f482142227d2..6149bd214e540 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
@@ -67,7 +67,7 @@ case class CoalesceShufflePartitions(session: SparkSession) extends CustomShuffl
         // We fall back to Spark default parallelism if the minimum number of coalesced partitions
         // is not set, so to avoid perf regressions compared to no coalescing.
         val minPartitionNum = conf.getConf(SQLConf.COALESCE_PARTITIONS_MIN_PARTITION_NUM)
-          .getOrElse(session.sparkContext.defaultParallelism)
+          .orElse(conf.defaultParallelism).getOrElse(session.sparkContext.defaultParallelism)
         val partitionSpecs = ShufflePartitionsUtil.coalescePartitions(
           validMetrics.toArray,
           advisoryTargetSize = conf.getConf(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index 006fa0fba4138..80a4090ce03f3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -382,7 +382,8 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
   val start: Long = range.start
   val end: Long = range.end
   val step: Long = range.step
-  val numSlices: Int = range.numSlices.getOrElse(sparkContext.defaultParallelism)
+  val numSlices: Int = range.numSlices.orElse(sqlContext.conf.defaultParallelism)
+    .getOrElse(sparkContext.defaultParallelism)
   val numElements: BigInt = range.numElements
   val isEmptyRange: Boolean = start == end || (start < end ^ 0 < step)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 27ad62026c9b5..69425cfed285f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -738,7 +738,8 @@ case class AlterTableRecoverPartitionsCommand(
       // Set the number of parallelism to prevent following file listing from generating many tasks
       // in case of large #defaultParallelism.
       val numParallelism = Math.min(serializedPaths.length,
-        Math.min(spark.sparkContext.defaultParallelism, 10000))
+        Math.min(spark.sessionState.conf.defaultParallelism
+          .getOrElse(spark.sparkContext.defaultParallelism), 10000))
       // gather the fast stats for all the partitions otherwise Hive metastore will list all the
       // files for all the new partitions in sequential way, which is super slow.
       logInfo(s"Gather the fast stats in parallel using $numParallelism tasks.")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala
index 864130bbd87b7..1b35db8d0873c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala
@@ -89,6 +89,7 @@ object FilePartition extends Logging {
     val defaultMaxSplitBytes = sparkSession.sessionState.conf.filesMaxPartitionBytes
     val openCostInBytes = sparkSession.sessionState.conf.filesOpenCostInBytes
     val minPartitionNum = sparkSession.sessionState.conf.filesMinPartitionNum
+      .orElse(sparkSession.sessionState.conf.defaultParallelism)
       .getOrElse(sparkSession.sparkContext.defaultParallelism)
     val totalBytes = selectedPartitions.flatMap(_.files.map(_.getLen + openCostInBytes)).sum
     val bytesPerCore = totalBytes / minPartitionNum
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaMergeUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaMergeUtils.scala
index 28097c35401c9..54d79898bb81b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaMergeUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaMergeUtils.scala
@@ -57,7 +57,8 @@ object SchemaMergeUtils extends Logging {
     // Set the number of partitions to prevent following schema reads from generating many tasks
     // in case of a small number of orc files.
     val numParallelism = Math.min(Math.max(partialFileStatusInfo.size, 1),
-      sparkSession.sparkContext.defaultParallelism)
+      sparkSession.sessionState.conf.defaultParallelism
+        .getOrElse(sparkSession.sparkContext.defaultParallelism))
 
     val ignoreCorruptFiles = sparkSession.sessionState.conf.ignoreCorruptFiles
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala
index 56fff1107ae39..254855247ced3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala
@@ -88,4 +88,13 @@ class SparkPlanSuite extends QueryTest with SharedSparkSession {
   test("SPARK-30780 empty LocalTableScan should use RDD without partitions") {
     assert(LocalTableScanExec(Nil, Nil).execute().getNumPartitions == 0)
   }
+
+  test("SPARK-33617: spark.sql.default.parallelism effective for LocalTableScan") {
+    Seq(1, 4).foreach { minPartitionNum =>
+      withSQLConf(SQLConf.DEFAULT_PARALLELISM.key -> minPartitionNum.toString) {
+        val df = spark.sql("SELECT * FROM VALUES (1), (2), (3), (4), (5), (6), (7), (8)")
+        assert(df.rdd.partitions.length === minPartitionNum)
+      }
+    }
+  }
 }

From d730b6bdaa92f2ca19cc8852ac58035e28d47a4f Mon Sep 17 00:00:00 2001
From: Linhong Liu <linhong.liu@databricks.com>
Date: Mon, 7 Dec 2020 13:25:43 +0000
Subject: [PATCH 0689/1009] [SPARK-32680][SQL] Don't Preprocess V2 CTAS with
 Unresolved Query

### What changes were proposed in this pull request?
The analyzer rule `PreprocessTableCreation` will preprocess table creation related logical plan. But for
CTAS, if the sub-query can't be resolved, preprocess it will cause "Invalid call to toAttribute on unresolved
object" (instead of a user-friendly error msg: "table or view not found").
This PR fixes this wrongly preprocess for CTAS using V2 catalog.

### Why are the changes needed?
bug fix

### Does this PR introduce _any_ user-facing change?
The error message for CTAS with a non-exists table changed from:
`UnresolvedException: Invalid call to toAttribute on unresolved object, tree: xxx` to
`AnalysisException: Table or view not found: xxx`

### How was this patch tested?
added test

Closes #30637 from linhongliu-db/fix-ctas.

Authored-by: Linhong Liu <linhong.liu@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/execution/datasources/rules.scala   | 2 +-
 .../test/scala/org/apache/spark/sql/DataFrameSuite.scala | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index 2cc78258378ab..b9866e415c9b1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -239,7 +239,7 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
         c.copy(tableDesc = normalizedTable.copy(schema = reorderedSchema))
       }
 
-    case create: V2CreateTablePlan =>
+    case create: V2CreateTablePlan if create.childrenResolved =>
       val schema = create.tableSchema
       val partitioning = create.partitioning
       val identifier = create.tableName
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index d34dcb4fe0c01..a45bf12e8f841 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -36,6 +36,7 @@ import org.apache.spark.sql.catalyst.expressions.Uuid
 import org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, OneRowRelation}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.connector.FakeV2Provider
 import org.apache.spark.sql.execution.{FilterExec, QueryExecution, WholeStageCodegenExec}
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.aggregate.HashAggregateExec
@@ -2451,6 +2452,14 @@ class DataFrameSuite extends QueryTest
     assert(e.getMessage.contains("Table or view not found:"))
   }
 
+  test("SPARK-32680: Don't analyze CTAS with unresolved query") {
+    val v2Source = classOf[FakeV2Provider].getName
+    val e = intercept[AnalysisException] {
+      sql(s"CREATE TABLE t USING $v2Source AS SELECT * from nonexist")
+    }
+    assert(e.getMessage.contains("Table or view not found:"))
+  }
+
   test("CalendarInterval reflection support") {
     val df = Seq((1, new CalendarInterval(1, 2, 3))).toDF("a", "b")
     checkAnswer(df.selectExpr("b"), Row(new CalendarInterval(1, 2, 3)))

From da72b87374a7be5416b99ed016dc2fc9da0ed88a Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Mon, 7 Dec 2020 13:40:15 +0000
Subject: [PATCH 0690/1009] [SPARK-33641][SQL] Invalidate new char/varchar
 types in public APIs that produce incorrect results

### What changes were proposed in this pull request?

In this PR, we suppose to narrow the use cases of the char/varchar data types, of which are invalid now or later

### Why are the changes needed?
1. udf
```scala
scala> spark.udf.register("abcd", () => "12345", org.apache.spark.sql.types.VarcharType(2))

scala> spark.sql("select abcd()").show
scala.MatchError: CharType(2) (of class org.apache.spark.sql.types.VarcharType)
  at org.apache.spark.sql.catalyst.encoders.RowEncoder$.externalDataTypeFor(RowEncoder.scala:215)
  at org.apache.spark.sql.catalyst.encoders.RowEncoder$.externalDataTypeForInput(RowEncoder.scala:212)
  at org.apache.spark.sql.catalyst.expressions.objects.ValidateExternalType.<init>(objects.scala:1741)
  at org.apache.spark.sql.catalyst.encoders.RowEncoder$.$anonfun$serializerFor$3(RowEncoder.scala:175)
  at scala.collection.TraversableLike.$anonfun$flatMap$1(TraversableLike.scala:245)
  at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
  at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
  at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198)
  at scala.collection.TraversableLike.flatMap(TraversableLike.scala:245)
  at scala.collection.TraversableLike.flatMap$(TraversableLike.scala:242)
  at scala.collection.mutable.ArrayOps$ofRef.flatMap(ArrayOps.scala:198)
  at org.apache.spark.sql.catalyst.encoders.RowEncoder$.serializerFor(RowEncoder.scala:171)
  at org.apache.spark.sql.catalyst.encoders.RowEncoder$.apply(RowEncoder.scala:66)
  at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99)
  at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:768)
  at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:96)
  at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:611)
  at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:768)
  at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:606)
  ... 47 elided
```

2. spark.createDataframe

```
scala> spark.createDataFrame(spark.read.text("README.md").rdd, new org.apache.spark.sql.types.StructType().add("c", "char(1)")).show
+--------------------+
|                   c|
+--------------------+
|      # Apache Spark|
|                    |
|Spark is a unifie...|
|high-level APIs i...|
|supports general ...|
|rich set of highe...|
|MLlib for machine...|
|and Structured St...|
|                    |
|<https://spark.ap...|
|                    |
|[![Jenkins Build]...|
|[![AppVeyor Build...|
|[![PySpark Covera...|
|                    |
|                    |
```

3. reader.schema

```
scala> spark.read.schema("a varchar(2)").text("./README.md").show(100)
+--------------------+
|                   a|
+--------------------+
|      # Apache Spark|
|                    |
|Spark is a unifie...|
|high-level APIs i...|
|supports general ...|
```
4. etc

### Does this PR introduce _any_ user-facing change?

NO, we intend to avoid protentical breaking change

### How was this patch tested?

new tests

Closes #30586 from yaooqinn/SPARK-33641.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/expressions/ExprUtils.scala  |   6 +-
 .../sql/catalyst/parser/AstBuilder.scala      |  19 +--
 .../sql/catalyst/parser/ParseDriver.scala     |   5 -
 .../sql/catalyst/parser/ParserInterface.scala |   6 -
 .../sql/catalyst/util/CharVarcharUtils.scala  |  38 +++++-
 .../apache/spark/sql/internal/SQLConf.scala   |  13 ++
 .../apache/spark/sql/types/VarcharType.scala  |   2 +-
 .../catalyst/parser/DataTypeParserSuite.scala |  14 +--
 .../parser/TableSchemaParserSuite.scala       |   4 +-
 .../spark/sql/types/DataTypeSuite.scala       |  10 ++
 .../scala/org/apache/spark/sql/Column.scala   |   2 +-
 .../apache/spark/sql/DataFrameReader.scala    |   7 +-
 .../org/apache/spark/sql/SparkSession.scala   |  10 +-
 .../apache/spark/sql/UDFRegistration.scala    |  73 +++++++----
 .../datasources/jdbc/JdbcUtils.scala          |   7 +-
 .../org/apache/spark/sql/functions.scala      |  12 +-
 .../spark/sql/CharVarcharTestSuite.scala      | 114 ++++++++++++------
 .../sql/SparkSessionExtensionSuite.scala      |   3 -
 .../spark/sql/jdbc/JDBCWriteSuite.scala       |   5 +-
 .../sql/hive/client/HiveClientImpl.scala      |   2 +-
 20 files changed, 226 insertions(+), 126 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala
index 56bd3d7026d52..b45bbe417caf4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala
@@ -21,7 +21,7 @@ import java.text.{DecimalFormat, DecimalFormatSymbols, ParsePosition}
 import java.util.Locale
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.util.ArrayBasedMapData
+import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, CharVarcharUtils}
 import org.apache.spark.sql.types.{DataType, MapType, StringType, StructType}
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -30,7 +30,9 @@ object ExprUtils {
   def evalTypeExpr(exp: Expression): DataType = {
     if (exp.foldable) {
       exp.eval() match {
-        case s: UTF8String if s != null => DataType.fromDDL(s.toString)
+        case s: UTF8String if s != null =>
+          val dataType = DataType.fromDDL(s.toString)
+          CharVarcharUtils.failIfHasCharVarchar(dataType)
         case _ => throw new AnalysisException(
           s"The expression '${exp.sql}' is not a valid schema string.")
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 12c5e0de686fa..a22383c62bf74 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -95,19 +95,14 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
   }
 
   override def visitSingleDataType(ctx: SingleDataTypeContext): DataType = withOrigin(ctx) {
-    visitSparkDataType(ctx.dataType)
+    typedVisit[DataType](ctx.dataType)
   }
 
   override def visitSingleTableSchema(ctx: SingleTableSchemaContext): StructType = {
-    val schema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(
-      StructType(visitColTypeList(ctx.colTypeList)))
+    val schema = StructType(visitColTypeList(ctx.colTypeList))
     withOrigin(ctx)(schema)
   }
 
-  def parseRawDataType(ctx: SingleDataTypeContext): DataType = withOrigin(ctx) {
-    typedVisit[DataType](ctx.dataType())
-  }
-
   /* ********************************************************************************************
    * Plan parsing
    * ******************************************************************************************** */
@@ -1550,7 +1545,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    * Create a [[Cast]] expression.
    */
   override def visitCast(ctx: CastContext): Expression = withOrigin(ctx) {
-    Cast(expression(ctx.expression), visitSparkDataType(ctx.dataType))
+    val rawDataType = typedVisit[DataType](ctx.dataType())
+    val dataType = CharVarcharUtils.replaceCharVarcharWithStringForCast(rawDataType)
+    Cast(expression(ctx.expression), dataType)
   }
 
   /**
@@ -2229,12 +2226,6 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
   /* ********************************************************************************************
    * DataType parsing
    * ******************************************************************************************** */
-  /**
-   * Create a Spark DataType.
-   */
-  private def visitSparkDataType(ctx: DataTypeContext): DataType = {
-    CharVarcharUtils.replaceCharVarcharWithString(typedVisit(ctx))
-  }
 
   /**
    * Resolve/create a primitive type.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index ac3fbbf6b0512..d08be467f96cc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -39,11 +39,6 @@ abstract class AbstractSqlParser extends ParserInterface with SQLConfHelper with
     astBuilder.visitSingleDataType(parser.singleDataType())
   }
 
-  /** Similar to `parseDataType`, but without CHAR/VARCHAR replacement. */
-  override def parseRawDataType(sqlText: String): DataType = parse(sqlText) { parser =>
-    astBuilder.parseRawDataType(parser.singleDataType())
-  }
-
   /** Creates Expression for a given SQL string. */
   override def parseExpression(sqlText: String): Expression = parse(sqlText) { parser =>
     astBuilder.visitSingleExpression(parser.singleExpression())
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
index d724933bc1029..77e357ad073da 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
@@ -70,10 +70,4 @@ trait ParserInterface {
    */
   @throws[ParseException]("Text cannot be parsed to a DataType")
   def parseDataType(sqlText: String): DataType
-
-  /**
-   * Parse a string to a raw [[DataType]] without CHAR/VARCHAR replacement.
-   */
-  @throws[ParseException]("Text cannot be parsed to a DataType")
-  def parseRawDataType(sqlText: String): DataType
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
index 0cbe5abdbbd7a..b551d9699f360 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
@@ -19,11 +19,14 @@ package org.apache.spark.sql.catalyst.util
 
 import scala.collection.mutable
 
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
-object CharVarcharUtils {
+object CharVarcharUtils extends Logging {
 
   private val CHAR_VARCHAR_TYPE_STRING_METADATA_KEY = "__CHAR_VARCHAR_TYPE_STRING"
 
@@ -52,6 +55,19 @@ object CharVarcharUtils {
     dt.existsRecursively(f => f.isInstanceOf[CharType] || f.isInstanceOf[VarcharType])
   }
 
+  /**
+   * Validate the given [[DataType]] to fail if it is char or varchar types or contains nested ones
+   */
+  def failIfHasCharVarchar(dt: DataType): DataType = {
+    if (!SQLConf.get.charVarcharAsString && hasCharVarchar(dt)) {
+      throw new AnalysisException("char/varchar type can only be used in the table schema. " +
+        s"You can set ${SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key} to true, so that Spark" +
+        s" treat them as string type as same as Spark 3.0 and earlier")
+    } else {
+      replaceCharVarcharWithString(dt)
+    }
+  }
+
   /**
    * Replaces CharType/VarcharType with StringType recursively in the given data type.
    */
@@ -69,6 +85,24 @@ object CharVarcharUtils {
     case _ => dt
   }
 
+  /**
+   * Replaces CharType/VarcharType with StringType recursively in the given data type, with a
+   * warning message if it has char or varchar types
+   */
+  def replaceCharVarcharWithStringForCast(dt: DataType): DataType = {
+    if (SQLConf.get.charVarcharAsString) {
+      replaceCharVarcharWithString(dt)
+    } else if (hasCharVarchar(dt)) {
+      logWarning("The Spark cast operator does not support char/varchar type and simply treats" +
+        " them as string type. Please use string type directly to avoid confusion. Otherwise," +
+        s" you can set ${SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key} to true, so that Spark treat" +
+        s" them as string type as same as Spark 3.0 and earlier")
+      replaceCharVarcharWithString(dt)
+    } else {
+      dt
+    }
+  }
+
   /**
    * Removes the metadata entry that contains the original type string of CharType/VarcharType from
    * the given attribute's metadata.
@@ -85,7 +119,7 @@ object CharVarcharUtils {
    */
   def getRawType(metadata: Metadata): Option[DataType] = {
     if (metadata.contains(CHAR_VARCHAR_TYPE_STRING_METADATA_KEY)) {
-      Some(CatalystSqlParser.parseRawDataType(
+      Some(CatalystSqlParser.parseDataType(
         metadata.getString(CHAR_VARCHAR_TYPE_STRING_METADATA_KEY)))
     } else {
       None
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index ea30832008b56..69f04e11ff0bc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2962,6 +2962,17 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  val LEGACY_CHAR_VARCHAR_AS_STRING =
+    buildConf("spark.sql.legacy.charVarcharAsString")
+      .internal()
+      .doc("When true, Spark will not fail if user uses char and varchar type directly in those" +
+        " APIs that accept or parse data types as parameters, e.g." +
+        " `SparkSession.read.schema(...)`, `SparkSession.udf.register(...)` but treat them as" +
+        " string type as Spark 3.0 and earlier.")
+      .version("3.1.0")
+      .booleanConf
+      .createWithDefault(false)
+
   /**
    * Holds information about keys that have been deprecated.
    *
@@ -3612,6 +3623,8 @@ class SQLConf extends Serializable with Logging {
 
   def disabledJdbcConnectionProviders: String = getConf(SQLConf.DISABLED_JDBC_CONN_PROVIDER_LIST)
 
+  def charVarcharAsString: Boolean = getConf(SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING)
+
   /** ********************** SQLConf functionality methods ************ */
 
   /** Set Spark SQL configuration properties. */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/VarcharType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/VarcharType.scala
index 8d78640c1e125..2e30820ef0a05 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/VarcharType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/VarcharType.scala
@@ -32,6 +32,6 @@ case class VarcharType(length: Int) extends AtomicType {
 
   override def defaultSize: Int = length
   override def typeName: String = s"varchar($length)"
-  override def toString: String = s"CharType($length)"
+  override def toString: String = s"VarcharType($length)"
   private[spark] override def asNullable: VarcharType = this
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
index 655b1d26d6c90..b9f984001523a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
@@ -56,10 +56,10 @@ class DataTypeParserSuite extends SparkFunSuite {
   checkDataType("DATE", DateType)
   checkDataType("timestamp", TimestampType)
   checkDataType("string", StringType)
-  checkDataType("ChaR(5)", StringType)
-  checkDataType("ChaRacter(5)", StringType)
-  checkDataType("varchAr(20)", StringType)
-  checkDataType("cHaR(27)", StringType)
+  checkDataType("ChaR(5)", CharType(5))
+  checkDataType("ChaRacter(5)", CharType(5))
+  checkDataType("varchAr(20)", VarcharType(20))
+  checkDataType("cHaR(27)", CharType(27))
   checkDataType("BINARY", BinaryType)
   checkDataType("void", NullType)
   checkDataType("interval", CalendarIntervalType)
@@ -103,9 +103,9 @@ class DataTypeParserSuite extends SparkFunSuite {
         StructType(
           StructField("deciMal", DecimalType.USER_DEFAULT, true) ::
           StructField("anotherDecimal", DecimalType(5, 2), true) :: Nil), true) ::
-      StructField("MAP", MapType(TimestampType, StringType), true) ::
+      StructField("MAP", MapType(TimestampType, VarcharType(10)), true) ::
       StructField("arrAy", ArrayType(DoubleType, true), true) ::
-      StructField("anotherArray", ArrayType(StringType, true), true) :: Nil)
+      StructField("anotherArray", ArrayType(CharType(9), true), true) :: Nil)
   )
   // Use backticks to quote column names having special characters.
   checkDataType(
@@ -113,7 +113,7 @@ class DataTypeParserSuite extends SparkFunSuite {
     StructType(
       StructField("x+y", IntegerType, true) ::
       StructField("!@#$%^&*()", StringType, true) ::
-      StructField("1_2.345<>:\"", StringType, true) :: Nil)
+      StructField("1_2.345<>:\"", VarcharType(20), true) :: Nil)
   )
   // Empty struct.
   checkDataType("strUCt<>", StructType(Nil))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala
index 95851d44b4747..5519f016e48d3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.catalyst.parser
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.types._
 
 class TableSchemaParserSuite extends SparkFunSuite {
@@ -69,8 +68,7 @@ class TableSchemaParserSuite extends SparkFunSuite {
             StructField("arrAy", ArrayType(DoubleType)) ::
             StructField("anotherArray", ArrayType(CharType(9))) :: Nil)) :: Nil)
 
-    assert(parse(tableSchemaString) ===
-      CharVarcharUtils.replaceCharVarcharWithStringInSchema(expectedDataType))
+    assert(parse(tableSchemaString) === expectedDataType)
   }
 
   // Negative cases
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
index 9442a3e87fc72..8c2e5db6e9364 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
@@ -249,6 +249,12 @@ class DataTypeSuite extends SparkFunSuite {
   checkDataTypeFromJson(MapType(IntegerType, ArrayType(DoubleType), false))
   checkDataTypeFromDDL(MapType(IntegerType, ArrayType(DoubleType), false))
 
+  checkDataTypeFromJson(CharType(1))
+  checkDataTypeFromDDL(CharType(1))
+
+  checkDataTypeFromJson(VarcharType(10))
+  checkDataTypeFromDDL(VarcharType(11))
+
   val metadata = new MetadataBuilder()
     .putString("name", "age")
     .build()
@@ -310,6 +316,10 @@ class DataTypeSuite extends SparkFunSuite {
   checkDefaultSize(MapType(IntegerType, StringType, true), 24)
   checkDefaultSize(MapType(IntegerType, ArrayType(DoubleType), false), 12)
   checkDefaultSize(structType, 20)
+  checkDefaultSize(CharType(5), 5)
+  checkDefaultSize(CharType(100), 100)
+  checkDefaultSize(VarcharType(5), 5)
+  checkDefaultSize(VarcharType(10), 10)
 
   def checkEqualsIgnoreCompatibleNullability(
       from: DataType,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 86ba81340272b..4ef23d7e31c59 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -1185,7 +1185,7 @@ class Column(val expr: Expression) extends Logging {
    * @since 1.3.0
    */
   def cast(to: DataType): Column = withExpr {
-    Cast(expr, CharVarcharUtils.replaceCharVarcharWithString(to))
+    Cast(expr, CharVarcharUtils.replaceCharVarcharWithStringForCast(to))
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 007df183ee353..b94c42a2c9544 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -73,7 +73,8 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * @since 1.4.0
    */
   def schema(schema: StructType): DataFrameReader = {
-    this.userSpecifiedSchema = Option(CharVarcharUtils.replaceCharVarcharWithStringInSchema(schema))
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(schema).asInstanceOf[StructType]
+    this.userSpecifiedSchema = Option(replaced)
     this
   }
 
@@ -89,7 +90,9 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    * @since 2.3.0
    */
   def schema(schemaString: String): DataFrameReader = {
-    this.userSpecifiedSchema = Option(StructType.fromDDL(schemaString))
+    val rawSchema = StructType.fromDDL(schemaString)
+    val schema = CharVarcharUtils.failIfHasCharVarchar(rawSchema).asInstanceOf[StructType]
+    this.userSpecifiedSchema = Option(schema)
     this
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 3a9b06940b769..a2c9406f6becf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -38,6 +38,7 @@ import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.encoders._
 import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Range}
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.connector.ExternalCommandRunner
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.command.ExternalCommandExecutor
@@ -347,9 +348,10 @@ class SparkSession private(
    */
   @DeveloperApi
   def createDataFrame(rowRDD: RDD[Row], schema: StructType): DataFrame = withActive {
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(schema).asInstanceOf[StructType]
     // TODO: use MutableProjection when rowRDD is another DataFrame and the applied
     // schema differs from the existing schema on any field data type.
-    val encoder = RowEncoder(schema)
+    val encoder = RowEncoder(replaced)
     val toRow = encoder.createSerializer()
     val catalystRows = rowRDD.map(toRow)
     internalCreateDataFrame(catalystRows.setName(rowRDD.name), schema)
@@ -365,7 +367,8 @@ class SparkSession private(
    */
   @DeveloperApi
   def createDataFrame(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = {
-    createDataFrame(rowRDD.rdd, schema)
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(schema).asInstanceOf[StructType]
+    createDataFrame(rowRDD.rdd, replaced)
   }
 
   /**
@@ -378,7 +381,8 @@ class SparkSession private(
    */
   @DeveloperApi
   def createDataFrame(rows: java.util.List[Row], schema: StructType): DataFrame = withActive {
-    Dataset.ofRows(self, LocalRelation.fromExternalRows(schema.toAttributes, rows.asScala.toSeq))
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(schema).asInstanceOf[StructType]
+    Dataset.ofRows(self, LocalRelation.fromExternalRows(replaced.toAttributes, rows.asScala.toSeq))
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index cceb38558946e..237cfe18ed855 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -30,6 +30,7 @@ import org.apache.spark.sql.catalyst.{JavaTypeInference, ScalaReflection}
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUDF}
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.execution.aggregate.ScalaUDAF
 import org.apache.spark.sql.execution.python.UserDefinedPythonFunction
 import org.apache.spark.sql.expressions.{SparkUserDefinedFunction, UserDefinedAggregateFunction, UserDefinedAggregator, UserDefinedFunction}
@@ -162,9 +163,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
         | * @since $version
         | */
         |def register(name: String, f: UDF$i[$extTypeArgs], returnType: DataType): Unit = {
+        |  val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
         |  val func = $funcCall
         |  def builder(e: Seq[Expression]) = if (e.length == $i) {
-        |    ScalaUDF(func, returnType, e, Nil, udfName = Some(name))
+        |    ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
         |  } else {
         |    throw new AnalysisException("Invalid number of arguments for function " + name +
         |      ". Expected: $i; Found: " + e.length)
@@ -753,9 +755,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 2.3.0
    */
   def register(name: String, f: UDF0[_], returnType: DataType): Unit = {
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
     val func = () => f.asInstanceOf[UDF0[Any]].call()
     def builder(e: Seq[Expression]) = if (e.length == 0) {
-      ScalaUDF(func, returnType, e, Nil, udfName = Some(name))
+      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 0; Found: " + e.length)
@@ -768,9 +771,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF1[_, _], returnType: DataType): Unit = {
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
     val func = f.asInstanceOf[UDF1[Any, Any]].call(_: Any)
     def builder(e: Seq[Expression]) = if (e.length == 1) {
-      ScalaUDF(func, returnType, e, Nil, udfName = Some(name))
+      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 1; Found: " + e.length)
@@ -783,9 +787,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF2[_, _, _], returnType: DataType): Unit = {
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
     val func = f.asInstanceOf[UDF2[Any, Any, Any]].call(_: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 2) {
-      ScalaUDF(func, returnType, e, Nil, udfName = Some(name))
+      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 2; Found: " + e.length)
@@ -798,9 +803,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF3[_, _, _, _], returnType: DataType): Unit = {
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
     val func = f.asInstanceOf[UDF3[Any, Any, Any, Any]].call(_: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 3) {
-      ScalaUDF(func, returnType, e, Nil, udfName = Some(name))
+      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 3; Found: " + e.length)
@@ -813,9 +819,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF4[_, _, _, _, _], returnType: DataType): Unit = {
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
     val func = f.asInstanceOf[UDF4[Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 4) {
-      ScalaUDF(func, returnType, e, Nil, udfName = Some(name))
+      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 4; Found: " + e.length)
@@ -828,9 +835,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF5[_, _, _, _, _, _], returnType: DataType): Unit = {
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
     val func = f.asInstanceOf[UDF5[Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 5) {
-      ScalaUDF(func, returnType, e, Nil, udfName = Some(name))
+      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 5; Found: " + e.length)
@@ -843,9 +851,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF6[_, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
     val func = f.asInstanceOf[UDF6[Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 6) {
-      ScalaUDF(func, returnType, e, Nil, udfName = Some(name))
+      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 6; Found: " + e.length)
@@ -858,9 +867,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF7[_, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
     val func = f.asInstanceOf[UDF7[Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 7) {
-      ScalaUDF(func, returnType, e, Nil, udfName = Some(name))
+      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 7; Found: " + e.length)
@@ -873,9 +883,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF8[_, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
     val func = f.asInstanceOf[UDF8[Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 8) {
-      ScalaUDF(func, returnType, e, Nil, udfName = Some(name))
+      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 8; Found: " + e.length)
@@ -888,9 +899,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF9[_, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
     val func = f.asInstanceOf[UDF9[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 9) {
-      ScalaUDF(func, returnType, e, Nil, udfName = Some(name))
+      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 9; Found: " + e.length)
@@ -903,9 +915,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF10[_, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
     val func = f.asInstanceOf[UDF10[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 10) {
-      ScalaUDF(func, returnType, e, Nil, udfName = Some(name))
+      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 10; Found: " + e.length)
@@ -918,9 +931,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF11[_, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
     val func = f.asInstanceOf[UDF11[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 11) {
-      ScalaUDF(func, returnType, e, Nil, udfName = Some(name))
+      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 11; Found: " + e.length)
@@ -933,9 +947,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF12[_, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
     val func = f.asInstanceOf[UDF12[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 12) {
-      ScalaUDF(func, returnType, e, Nil, udfName = Some(name))
+      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 12; Found: " + e.length)
@@ -948,9 +963,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF13[_, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
     val func = f.asInstanceOf[UDF13[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 13) {
-      ScalaUDF(func, returnType, e, Nil, udfName = Some(name))
+      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 13; Found: " + e.length)
@@ -963,9 +979,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
     val func = f.asInstanceOf[UDF14[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 14) {
-      ScalaUDF(func, returnType, e, Nil, udfName = Some(name))
+      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 14; Found: " + e.length)
@@ -978,9 +995,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
     val func = f.asInstanceOf[UDF15[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 15) {
-      ScalaUDF(func, returnType, e, Nil, udfName = Some(name))
+      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 15; Found: " + e.length)
@@ -993,9 +1011,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
     val func = f.asInstanceOf[UDF16[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 16) {
-      ScalaUDF(func, returnType, e, Nil, udfName = Some(name))
+      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 16; Found: " + e.length)
@@ -1008,9 +1027,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
     val func = f.asInstanceOf[UDF17[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 17) {
-      ScalaUDF(func, returnType, e, Nil, udfName = Some(name))
+      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 17; Found: " + e.length)
@@ -1023,9 +1043,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
     val func = f.asInstanceOf[UDF18[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 18) {
-      ScalaUDF(func, returnType, e, Nil, udfName = Some(name))
+      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 18; Found: " + e.length)
@@ -1038,9 +1059,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
     val func = f.asInstanceOf[UDF19[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 19) {
-      ScalaUDF(func, returnType, e, Nil, udfName = Some(name))
+      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 19; Found: " + e.length)
@@ -1053,9 +1075,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
     val func = f.asInstanceOf[UDF20[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 20) {
-      ScalaUDF(func, returnType, e, Nil, udfName = Some(name))
+      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 20; Found: " + e.length)
@@ -1068,9 +1091,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
     val func = f.asInstanceOf[UDF21[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 21) {
-      ScalaUDF(func, returnType, e, Nil, udfName = Some(name))
+      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 21; Found: " + e.length)
@@ -1083,9 +1107,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @since 1.3.0
    */
   def register(name: String, f: UDF22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
     val func = f.asInstanceOf[UDF22[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
     def builder(e: Seq[Expression]) = if (e.length == 22) {
-      ScalaUDF(func, returnType, e, Nil, udfName = Some(name))
+      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
       throw new AnalysisException("Invalid number of arguments for function " + name +
         ". Expected: 22; Found: " + e.length)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index 216fb02740500..f997e57b23206 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.analysis.Resolver
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.SpecificInternalRow
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils, DateTimeUtils, GenericArrayData}
+import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils, GenericArrayData}
 import org.apache.spark.sql.connector.catalog.TableChange
 import org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProvider
 import org.apache.spark.sql.jdbc.{JdbcDialect, JdbcDialects, JdbcType}
@@ -761,10 +761,7 @@ object JdbcUtils extends Logging {
       schema: StructType,
       caseSensitive: Boolean,
       createTableColumnTypes: String): Map[String, String] = {
-    val parsedSchema = CatalystSqlParser.parseTableSchema(createTableColumnTypes)
-    val userSchema = StructType(parsedSchema.map { field =>
-      field.copy(dataType = CharVarcharUtils.getRawType(field.metadata).getOrElse(field.dataType))
-    })
+    val userSchema = CatalystSqlParser.parseTableSchema(createTableColumnTypes)
     val nameEquality = if (caseSensitive) {
       org.apache.spark.sql.catalyst.analysis.caseSensitiveResolution
     } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 9861d21d3a430..5b1ee2deefc10 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.logical.{BROADCAST, HintInfo, ResolvedHint}
-import org.apache.spark.sql.catalyst.util.TimestampFormatter
+import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, TimestampFormatter}
 import org.apache.spark.sql.execution.SparkSqlParser
 import org.apache.spark.sql.expressions.{Aggregator, SparkUserDefinedFunction, UserDefinedAggregator, UserDefinedFunction}
 import org.apache.spark.sql.internal.SQLConf
@@ -4009,7 +4009,7 @@ object functions {
    * @since 2.2.0
    */
   def from_json(e: Column, schema: DataType, options: Map[String, String]): Column = withExpr {
-    JsonToStructs(schema, options, e.expr)
+    JsonToStructs(CharVarcharUtils.failIfHasCharVarchar(schema), options, e.expr)
   }
 
   /**
@@ -4040,8 +4040,9 @@ object functions {
    * @group collection_funcs
    * @since 2.2.0
    */
-  def from_json(e: Column, schema: DataType, options: java.util.Map[String, String]): Column =
-    from_json(e, schema, options.asScala.toMap)
+  def from_json(e: Column, schema: DataType, options: java.util.Map[String, String]): Column = {
+    from_json(e, CharVarcharUtils.failIfHasCharVarchar(schema), options.asScala.toMap)
+  }
 
   /**
    * Parses a column containing a JSON string into a `StructType` with the specified schema.
@@ -4393,7 +4394,8 @@ object functions {
    * @since 3.0.0
    */
   def from_csv(e: Column, schema: StructType, options: Map[String, String]): Column = withExpr {
-    CsvToStructs(schema, options, e.expr)
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(schema).asInstanceOf[StructType]
+    CsvToStructs(replaced, options, e.expr)
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
index abb13270d20e7..fcd334be7a6f7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.SimpleInsertSource
 import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
-import org.apache.spark.sql.types.{ArrayType, CharType, DataType, MapType, StringType, StructField, StructType}
+import org.apache.spark.sql.types._
 
 // The base trait for char/varchar tests that need to be run with different table implementations.
 trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
@@ -435,55 +435,91 @@ class BasicCharVarcharTestSuite extends QueryTest with SharedSparkSession {
       assert(df.schema.map(_.dataType) == Seq(StringType))
     }
 
-    assertNoCharType(spark.range(1).select($"id".cast("char(5)")))
-    assertNoCharType(spark.range(1).select($"id".cast(CharType(5))))
-    assertNoCharType(spark.range(1).selectExpr("CAST(id AS CHAR(5))"))
-    assertNoCharType(sql("SELECT CAST(id AS CHAR(5)) FROM range(1)"))
+    val logAppender = new LogAppender("The Spark cast operator does not support char/varchar" +
+      " type and simply treats them as string type. Please use string type directly to avoid" +
+      " confusion.")
+    withLogAppender(logAppender) {
+      assertNoCharType(spark.range(1).select($"id".cast("char(5)")))
+      assertNoCharType(spark.range(1).select($"id".cast(CharType(5))))
+      assertNoCharType(spark.range(1).selectExpr("CAST(id AS CHAR(5))"))
+      assertNoCharType(sql("SELECT CAST(id AS CHAR(5)) FROM range(1)"))
+    }
   }
 
-  test("user-specified schema in functions") {
-    val df = sql("""SELECT from_json('{"a": "str"}', 'a CHAR(5)')""")
-    checkAnswer(df, Row(Row("str")))
-    val schema = df.schema.head.dataType.asInstanceOf[StructType]
-    assert(schema.map(_.dataType) == Seq(StringType))
+  def failWithInvalidCharUsage[T](fn: => T): Unit = {
+    val e = intercept[AnalysisException](fn)
+    assert(e.getMessage contains "char/varchar type can only be used in the table schema")
   }
 
-  test("user-specified schema in DataFrameReader: file source from Dataset") {
-    val ds = spark.range(10).map(_.toString)
-    val df1 = spark.read.schema(new StructType().add("id", CharType(5))).csv(ds)
-    assert(df1.schema.map(_.dataType) == Seq(StringType))
-    val df2 = spark.read.schema("id char(5)").csv(ds)
-    assert(df2.schema.map(_.dataType) == Seq(StringType))
+  test("invalidate char/varchar in functions") {
+    failWithInvalidCharUsage(sql("""SELECT from_json('{"a": "str"}', 'a CHAR(5)')"""))
+    withSQLConf((SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key, "true")) {
+      val df = sql("""SELECT from_json('{"a": "str"}', 'a CHAR(5)')""")
+      checkAnswer(df, Row(Row("str")))
+      val schema = df.schema.head.dataType.asInstanceOf[StructType]
+      assert(schema.map(_.dataType) == Seq(StringType))
+    }
   }
 
-  test("user-specified schema in DataFrameReader: DSV1") {
-    def checkSchema(df: DataFrame): Unit = {
-      val relations = df.queryExecution.analyzed.collect {
-        case l: LogicalRelation => l.relation
-      }
-      assert(relations.length == 1)
-      assert(relations.head.schema.map(_.dataType) == Seq(StringType))
+  test("invalidate char/varchar in SparkSession createDataframe") {
+    val df = spark.range(10).map(_.toString).toDF()
+    val schema = new StructType().add("id", CharType(5))
+    failWithInvalidCharUsage(spark.createDataFrame(df.collectAsList(), schema))
+    failWithInvalidCharUsage(spark.createDataFrame(df.rdd, schema))
+    failWithInvalidCharUsage(spark.createDataFrame(df.toJavaRDD, schema))
+    withSQLConf((SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key, "true")) {
+      val df1 = spark.createDataFrame(df.collectAsList(), schema)
+      checkAnswer(df1, df)
+      assert(df1.schema.head.dataType === StringType)
     }
-
-    checkSchema(spark.read.schema(new StructType().add("id", CharType(5)))
-      .format(classOf[SimpleInsertSource].getName).load())
-    checkSchema(spark.read.schema("id char(5)")
-      .format(classOf[SimpleInsertSource].getName).load())
   }
 
-  test("user-specified schema in DataFrameReader: DSV2") {
-    def checkSchema(df: DataFrame): Unit = {
-      val tables = df.queryExecution.analyzed.collect {
-        case d: DataSourceV2Relation => d.table
+  test("invalidate char/varchar in spark.read.schema") {
+    failWithInvalidCharUsage(spark.read.schema(new StructType().add("id", CharType(5))))
+    failWithInvalidCharUsage(spark.read.schema("id char(5)"))
+    withSQLConf((SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key, "true")) {
+      val ds = spark.range(10).map(_.toString)
+      val df1 = spark.read.schema(new StructType().add("id", CharType(5))).csv(ds)
+      assert(df1.schema.map(_.dataType) == Seq(StringType))
+      val df2 = spark.read.schema("id char(5)").csv(ds)
+      assert(df2.schema.map(_.dataType) == Seq(StringType))
+
+      def checkSchema(df: DataFrame): Unit = {
+        val schemas = df.queryExecution.analyzed.collect {
+          case l: LogicalRelation => l.relation.schema
+          case d: DataSourceV2Relation => d.table.schema()
+        }
+        assert(schemas.length == 1)
+        assert(schemas.head.map(_.dataType) == Seq(StringType))
       }
-      assert(tables.length == 1)
-      assert(tables.head.schema.map(_.dataType) == Seq(StringType))
-    }
 
-    checkSchema(spark.read.schema(new StructType().add("id", CharType(5)))
-      .format(classOf[SchemaRequiredDataSource].getName).load())
-    checkSchema(spark.read.schema("id char(5)")
-      .format(classOf[SchemaRequiredDataSource].getName).load())
+      // user-specified schema in DataFrameReader: DSV1
+      checkSchema(spark.read.schema(new StructType().add("id", CharType(5)))
+        .format(classOf[SimpleInsertSource].getName).load())
+      checkSchema(spark.read.schema("id char(5)")
+        .format(classOf[SimpleInsertSource].getName).load())
+
+      // user-specified schema in DataFrameReader: DSV2
+      checkSchema(spark.read.schema(new StructType().add("id", CharType(5)))
+        .format(classOf[SchemaRequiredDataSource].getName).load())
+      checkSchema(spark.read.schema("id char(5)")
+        .format(classOf[SchemaRequiredDataSource].getName).load())
+    }
+  }
+
+  test("invalidate char/varchar in udf's result type") {
+    failWithInvalidCharUsage(spark.udf.register("testchar", () => "B", VarcharType(1)))
+    failWithInvalidCharUsage(spark.udf.register("testchar2", (x: String) => x, VarcharType(1)))
+    withSQLConf((SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key, "true")) {
+      spark.udf.register("testchar", () => "B", VarcharType(1))
+      spark.udf.register("testchar2", (x: String) => x, VarcharType(1))
+      val df1 = spark.sql("select testchar()")
+      checkAnswer(df1, Row("B"))
+      assert(df1.schema.head.dataType === StringType)
+      val df2 = spark.sql("select testchar2('abc')")
+      checkAnswer(df2, Row("abc"))
+      assert(df2.schema.head.dataType === StringType)
+    }
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
index f02d2041dd7f3..ea276bcec0f78 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
@@ -384,9 +384,6 @@ case class MyParser(spark: SparkSession, delegate: ParserInterface) extends Pars
 
   override def parseDataType(sqlText: String): DataType =
     delegate.parseDataType(sqlText)
-
-  override def parseRawDataType(sqlText: String): DataType =
-    delegate.parseRawDataType(sqlText)
 }
 
 object MyExtensions {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
index fb46c2ff4c0ea..1a28523cc939f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
@@ -390,14 +390,13 @@ class JDBCWriteSuite extends SharedSparkSession with BeforeAndAfter {
         .foldLeft(new StructType())((schema, colType) => schema.add(colType._1, colType._2))
       val createTableColTypes =
         colTypes.map { case (col, dataType) => s"$col $dataType" }.mkString(", ")
-      val df = spark.createDataFrame(sparkContext.parallelize(Seq(Row.empty)), schema)
 
       val expectedSchemaStr =
         colTypes.map { case (col, dataType) => s""""$col" $dataType """ }.mkString(", ")
 
       assert(JdbcUtils.schemaString(
-        df.schema,
-        df.sqlContext.conf.caseSensitiveAnalysis,
+        schema,
+        spark.sqlContext.conf.caseSensitiveAnalysis,
         url1,
         Option(createTableColTypes)) == expectedSchemaStr)
     }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index bada131c8ba6d..34befb8a6f965 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -985,7 +985,7 @@ private[hive] object HiveClientImpl extends Logging {
   /** Get the Spark SQL native DataType from Hive's FieldSchema. */
   private def getSparkSQLDataType(hc: FieldSchema): DataType = {
     try {
-      CatalystSqlParser.parseRawDataType(hc.getType)
+      CatalystSqlParser.parseDataType(hc.getType)
     } catch {
       case e: ParseException =>
         throw new SparkException(

From c62b84a0432e51fd10e628088ee311dc3be73d2f Mon Sep 17 00:00:00 2001
From: Josh Soref <jsoref@users.noreply.github.com>
Date: Mon, 7 Dec 2020 08:40:29 -0600
Subject: [PATCH 0691/1009] [MINOR] Spelling sql not core

### What changes were proposed in this pull request?

This PR intends to fix typos in the sub-modules:
* `sql/catalyst`
* `sql/hive-thriftserver`
* `sql/hive`

Split per srowen https://github.com/apache/spark/pull/30323#issuecomment-728981618

NOTE: The misspellings have been reported at https://github.com/jsoref/spark/commit/706a726f87a0bbf5e31467fae9015218773db85b#commitcomment-44064356

### Why are the changes needed?

Misspelled words make it harder to read / understand content.

### Does this PR introduce _any_ user-facing change?

There are various fixes to documentation, etc...

### How was this patch tested?

No testing was performed

Closes #30532 from jsoref/spelling-sql-not-core.

Authored-by: Josh Soref <jsoref@users.noreply.github.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../src/main/scala/org/apache/spark/sql/Row.scala    |  6 +++---
 .../apache/spark/sql/catalyst/StructFilters.scala    |  2 +-
 .../spark/sql/catalyst/analysis/Analyzer.scala       |  2 +-
 .../spark/sql/catalyst/analysis/CheckAnalysis.scala  |  2 +-
 .../sql/catalyst/analysis/StreamingJoinHelper.scala  |  4 ++--
 .../analysis/UpdateAttributeNullability.scala        |  2 +-
 .../spark/sql/catalyst/catalog/SessionCatalog.scala  |  2 +-
 .../spark/sql/catalyst/csv/CSVInferSchema.scala      |  2 +-
 .../sql/catalyst/encoders/ExpressionEncoder.scala    |  2 +-
 .../spark/sql/catalyst/expressions/AliasHelper.scala |  2 +-
 .../spark/sql/catalyst/expressions/ScalaUDF.scala    |  4 ++--
 .../catalyst/expressions/aggregate/Percentile.scala  |  6 +++---
 .../spark/sql/catalyst/expressions/arithmetic.scala  |  2 +-
 .../catalyst/expressions/codegen/CodeGenerator.scala |  2 +-
 .../expressions/codegen/GenerateSafeProjection.scala |  2 +-
 .../apache/spark/sql/catalyst/expressions/hash.scala |  4 ++--
 .../catalyst/expressions/higherOrderFunctions.scala  |  2 +-
 .../sql/catalyst/expressions/jsonExpressions.scala   | 12 ++++++------
 .../sql/catalyst/expressions/regexpExpressions.scala |  2 +-
 .../sql/catalyst/expressions/windowExpressions.scala |  2 +-
 .../catalyst/optimizer/NestedColumnAliasing.scala    |  2 +-
 .../spark/sql/catalyst/optimizer/Optimizer.scala     |  4 ++--
 .../optimizer/PushDownLeftSemiAntiJoin.scala         |  2 +-
 .../spark/sql/catalyst/optimizer/expressions.scala   |  2 +-
 .../spark/sql/catalyst/optimizer/subquery.scala      |  6 +++---
 .../spark/sql/catalyst/parser/ParserUtils.scala      |  2 +-
 .../apache/spark/sql/catalyst/plans/QueryPlan.scala  |  2 +-
 .../sql/catalyst/plans/logical/LogicalPlan.scala     |  2 +-
 .../sql/catalyst/plans/logical/PlanHelper.scala      |  2 +-
 .../plans/logical/basicLogicalOperators.scala        |  2 +-
 .../sql/catalyst/plans/physical/partitioning.scala   |  2 +-
 .../sql/catalyst/util/DateTimeFormatterHelper.scala  |  4 ++--
 .../spark/sql/catalyst/util/DateTimeUtils.scala      |  2 +-
 .../spark/sql/catalyst/util/QuantileSummaries.scala  |  6 +++---
 .../org/apache/spark/sql/internal/SQLConf.scala      |  6 +++---
 .../org/apache/spark/sql/RandomDataGenerator.scala   |  6 +++---
 .../spark/sql/catalyst/analysis/AnalysisSuite.scala  |  2 +-
 .../analysis/ResolveGroupingAnalyticsSuite.scala     |  4 ++--
 .../sql/catalyst/analysis/TypeCoercionSuite.scala    |  2 +-
 .../analysis/UnsupportedOperationsSuite.scala        |  2 +-
 .../catalyst/expressions/CodeGenerationSuite.scala   |  4 ++--
 .../sql/catalyst/expressions/ComplexTypeSuite.scala  |  4 ++--
 .../expressions/ConditionalExpressionSuite.scala     |  4 ++--
 .../catalyst/expressions/ExpressionEvalHelper.scala  |  2 +-
 .../expressions/ObjectExpressionsSuite.scala         |  4 ++--
 .../expressions/StringExpressionsSuite.scala         |  2 +-
 .../expressions/aggregate/PercentileSuite.scala      |  8 ++++----
 .../expressions/codegen/CodeBlockSuite.scala         |  2 +-
 .../sql/catalyst/optimizer/SetOperationSuite.scala   |  8 ++++----
 .../spark/sql/catalyst/parser/DDLParserSuite.scala   |  2 +-
 .../sql/catalyst/parser/DataTypeParserSuite.scala    |  4 ++--
 .../spark/sql/catalyst/parser/ErrorParserSuite.scala |  2 +-
 .../sql/catalyst/parser/ExpressionParserSuite.scala  |  4 ++--
 .../catalyst/parser/TableIdentifierParserSuite.scala |  2 +-
 .../spark/sql/catalyst/util/UnsafeArraySuite.scala   |  8 ++++----
 .../apache/hive/service/cli/ColumnDescriptor.java    |  2 +-
 .../org/apache/hive/service/cli/GetInfoValue.java    |  2 +-
 .../service/cli/operation/GetColumnsOperation.java   |  2 +-
 .../hive/service/cli/session/HiveSessionImpl.java    |  4 ++--
 .../service/cli/thrift/ThriftHttpCLIService.java     |  2 +-
 .../spark/sql/hive/thriftserver/DummyListeners.scala |  2 +-
 .../sql/hive/thriftserver/SparkSQLEnvSuite.scala     |  2 +-
 .../sql/hive/execution/HiveCompatibilitySuite.scala  | 12 ++++++------
 .../apache/spark/sql/hive/HiveMetastoreCatalog.scala |  2 +-
 .../spark/sql/hive/client/HiveClientImpl.scala       |  4 ++--
 .../execution/HiveScriptTransformationExec.scala     |  2 +-
 .../sql/hive/execution/InsertIntoHiveTable.scala     |  2 +-
 .../hive/execution/PruneHiveTablePartitions.scala    |  2 +-
 .../queries/clientpositive/auto_sortmerge_join_13.q  |  6 +++---
 .../clientpositive/bucketsortoptimize_insert_3.q     |  4 ++--
 .../src/test/queries/clientpositive/smb_mapjoin_20.q |  2 +-
 .../org/apache/spark/sql/hive/InsertSuite.scala      |  4 ++--
 .../org/apache/spark/sql/hive/StatisticsSuite.scala  |  2 +-
 .../execution/HiveScriptTransformationSuite.scala    |  4 ++--
 .../spark/sql/hive/execution/SQLQuerySuite.scala     |  8 ++++----
 75 files changed, 128 insertions(+), 128 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
index 88c672f1cdf85..d43c57ed0f5c8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
@@ -351,7 +351,7 @@ trait Row extends Serializable {
   /**
    * Returns the value at position i.
    * For primitive types if value is null it returns 'zero value' specific for primitive
-   * ie. 0 for Int - use isNullAt to ensure that value is not null
+   * i.e. 0 for Int - use isNullAt to ensure that value is not null
    *
    * @throws ClassCastException when data type does not match.
    */
@@ -360,7 +360,7 @@ trait Row extends Serializable {
   /**
    * Returns the value of a given fieldName.
    * For primitive types if value is null it returns 'zero value' specific for primitive
-   * ie. 0 for Int - use isNullAt to ensure that value is not null
+   * i.e. 0 for Int - use isNullAt to ensure that value is not null
    *
    * @throws UnsupportedOperationException when schema is not defined.
    * @throws IllegalArgumentException when fieldName do not exist.
@@ -381,7 +381,7 @@ trait Row extends Serializable {
   /**
    * Returns a Map consisting of names and values for the requested fieldNames
    * For primitive types if value is null it returns 'zero value' specific for primitive
-   * ie. 0 for Int - use isNullAt to ensure that value is not null
+   * i.e. 0 for Int - use isNullAt to ensure that value is not null
    *
    * @throws UnsupportedOperationException when schema is not defined.
    * @throws IllegalArgumentException when fieldName do not exist.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/StructFilters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/StructFilters.scala
index fed1b323f5773..ff67b6fccfae9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/StructFilters.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/StructFilters.scala
@@ -51,7 +51,7 @@ abstract class StructFilters(pushedFilters: Seq[sources.Filter], schema: StructT
 
   /**
    * Resets states of pushed down filters. The method must be called before
-   * precessing any new row otherwise `skipRow()` may return wrong result.
+   * processing any new row otherwise `skipRow()` may return wrong result.
    */
   def reset(): Unit
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 6769dc895d32e..6541961f5613e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1495,7 +1495,7 @@ class Analyzer(override val catalogManager: CatalogManager)
         val rightRes = rightAttributes
           .map(x => resolveExpressionBottomUp(x, right).asInstanceOf[Attribute])
         f.copy(leftAttributes = leftRes, rightAttributes = rightRes)
-      // intersect/except will be rewritten to join at the begininng of optimizer. Here we need to
+      // intersect/except will be rewritten to join at the beginning of optimizer. Here we need to
       // deduplicate the right side plan, so that we won't produce an invalid self-join later.
       case i @ Intersect(left, right, _) if !i.duplicateResolved =>
         i.copy(right = dedupRight(left, right))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 11c4883992560..9f5eefc744135 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -579,7 +579,7 @@ trait CheckAnalysis extends PredicateHelper {
 
           case showPartitions: ShowPartitions => checkShowPartitions(showPartitions)
 
-          case _ => // Fallbacks to the following checks
+          case _ => // Falls back to the following checks
         }
 
         operator match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
index cddc3a44f4d9d..d8e200d6b01e4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
@@ -55,7 +55,7 @@ object StreamingJoinHelper extends PredicateHelper with Logging {
    * given the join condition and the event time watermark. This is how it works.
    * - The condition is split into conjunctive predicates, and we find the predicates of the
    *   form `leftTime + c1 < rightTime + c2`   (or <=, >, >=).
-   * - We canoncalize the predicate and solve it with the event time watermark value to find the
+   * - We canonicalize the predicate and solve it with the event time watermark value to find the
    *  value of the state watermark.
    * This function is supposed to make best-effort attempt to get the state watermark. If there is
    * any error, it will return None.
@@ -94,7 +94,7 @@ object StreamingJoinHelper extends PredicateHelper with Logging {
 
       // The generated the state watermark cleanup expression is inclusive of the state watermark.
       // If state watermark is W, all state where timestamp <= W will be cleaned up.
-      // Now when the canonicalized join condition solves to leftTime >= W, we dont want to clean
+      // Now when the canonicalized join condition solves to leftTime >= W, we don't want to clean
       // up leftTime <= W. Rather we should clean up leftTime <= W - 1. Hence the -1 below.
       val stateWatermark = predicate match {
         case LessThan(l, r) => getStateWatermarkSafely(l, r)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UpdateAttributeNullability.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UpdateAttributeNullability.scala
index 3eae34da7e502..5004108d348b6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UpdateAttributeNullability.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UpdateAttributeNullability.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.rules.Rule
  * Updates nullability of Attributes in a resolved LogicalPlan by using the nullability of
  * corresponding Attributes of its children output Attributes. This step is needed because
  * users can use a resolved AttributeReference in the Dataset API and outer joins
- * can change the nullability of an AttribtueReference. Without this rule, a nullable column's
+ * can change the nullability of an AttributeReference. Without this rule, a nullable column's
  * nullable field can be actually set as non-nullable, which cause illegal optimization
  * (e.g., NULL propagation) and wrong answers.
  * See SPARK-13484 and SPARK-13801 for the concrete queries of this case.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index a2ab756382488..4c32870abe621 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -1615,7 +1615,7 @@ class SessionCatalog(
   }
 
   /**
-   * Validate the new locatoin before renaming a managed table, which should be non-existent.
+   * Validate the new location before renaming a managed table, which should be non-existent.
    */
   private def validateNewLocationOfRename(
       oldName: TableIdentifier,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
index 56677d7d97af2..fd9e30d155148 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
@@ -143,7 +143,7 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
       // The conversion can fail when the `field` is not a form of number.
       val bigDecimal = decimalParser(field)
       // Because many other formats do not support decimal, it reduces the cases for
-      // decimals by disallowing values having scale (eg. `1.1`).
+      // decimals by disallowing values having scale (e.g. `1.1`).
       if (bigDecimal.scale <= 0) {
         // `DecimalType` conversion can fail when
         //   1. The precision is bigger than 38.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
index 9ab38044e6a88..80a0374ae1f26 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
@@ -189,7 +189,7 @@ object ExpressionEncoder {
   }
 
   /**
-   * Function that serializesa an object of type `T` to an [[InternalRow]]. This class is not
+   * Function that serializes an object of type `T` to an [[InternalRow]]. This class is not
    * thread-safe. Note that multiple calls to `apply(..)` return the same actual [[InternalRow]]
    * object.  Thus, the caller should copy the result before making another call if required.
    */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala
index c61eb68db5bfa..ad6cf959a69c6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala
@@ -64,7 +64,7 @@ trait AliasHelper {
 
   /**
    * Replace all attributes, that reference an alias, with the aliased expression,
-   * but keep the name of the outmost attribute.
+   * but keep the name of the outermost attribute.
    */
   protected def replaceAliasButKeepName(
      expr: NamedExpression,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
index 0a69d5aa6b9ad..4a89d24e5f635 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
@@ -1145,7 +1145,7 @@ case class ScalaUDF(
     val resultConverter = s"$convertersTerm[${children.length}]"
     val boxedType = CodeGenerator.boxedType(dataType)
 
-    val funcInvokation = if (isPrimitive(dataType)
+    val funcInvocation = if (isPrimitive(dataType)
         // If the output is nullable, the returned value must be unwrapped from the Option
         && !nullable) {
       s"$resultTerm = ($boxedType)$getFuncResult"
@@ -1156,7 +1156,7 @@ case class ScalaUDF(
       s"""
          |$boxedType $resultTerm = null;
          |try {
-         |  $funcInvokation;
+         |  $funcInvocation;
          |} catch (Exception e) {
          |  throw new org.apache.spark.SparkException($errorMsgTerm, e);
          |}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala
index 0eba61c741133..b808083152cd3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala
@@ -191,13 +191,13 @@ case class Percentile(
 
     val sortedCounts = buffer.toSeq.sortBy(_._1)(
       child.dataType.asInstanceOf[NumericType].ordering.asInstanceOf[Ordering[AnyRef]])
-    val accumlatedCounts = sortedCounts.scanLeft((sortedCounts.head._1, 0L)) {
+    val accumulatedCounts = sortedCounts.scanLeft((sortedCounts.head._1, 0L)) {
       case ((key1, count1), (key2, count2)) => (key2, count1 + count2)
     }.tail
-    val maxPosition = accumlatedCounts.last._2 - 1
+    val maxPosition = accumulatedCounts.last._2 - 1
 
     percentages.map { percentile =>
-      getPercentile(accumlatedCounts, maxPosition * percentile)
+      getPercentile(accumulatedCounts, maxPosition * percentile)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
index c69edccc696bb..3fbb798f1fd53 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -216,7 +216,7 @@ abstract class BinaryArithmetic extends BinaryOperator with NullIntolerant {
     case DoubleType | FloatType =>
       // When Double/Float overflows, there can be 2 cases:
       // - precision loss: according to SQL standard, the number is truncated;
-      // - returns (+/-)Infinite: same behavior also other DBs have (eg. Postgres)
+      // - returns (+/-)Infinite: same behavior also other DBs have (e.g. Postgres)
       nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
         s"""
            |${ev.value} = $eval1 $symbol $eval2;
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 1ff4a93cf0acd..638878b312dc5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -175,7 +175,7 @@ class CodegenContext extends Logging {
     mutable.ArrayBuffer.empty[(String, String)]
 
   /**
-   * The mapping between mutable state types and corrseponding compacted arrays.
+   * The mapping between mutable state types and corresponding compacted arrays.
    * The keys are java type string. The values are [[MutableStateArrays]] which encapsulates
    * the compacted arrays for the mutable states with the same java type.
    *
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
index e285398ba1958..4efcca0017eaa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, Generic
 import org.apache.spark.sql.types._
 
 /**
- * Java can not access Projection (in package object)
+ * Java cannot access Projection (in package object)
  */
 abstract class BaseProjection extends Projection {}
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
index 64360827fb794..ce177f50956f0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
@@ -907,7 +907,7 @@ object HiveHashFunction extends InterpretedHashFunction {
    * - year, month (stored as HiveIntervalYearMonth)
    * - day, hour, minute, second, nanosecond (stored as HiveIntervalDayTime)
    *
-   * eg. (INTERVAL '30' YEAR + INTERVAL '-23' DAY) fails in Hive
+   * e.g. (INTERVAL '30' YEAR + INTERVAL '-23' DAY) fails in Hive
    *
    * This method mimics HiveIntervalDayTime.hashCode() in Hive.
    *
@@ -919,7 +919,7 @@ object HiveHashFunction extends InterpretedHashFunction {
    *
    * - Spark's [[CalendarInterval]] has precision upto microseconds but Hive's
    *   HiveIntervalDayTime can store data with precision upto nanoseconds. So, any input intervals
-   *   with nanosecond values will lead to wrong output hashes (ie. non adherent with Hive output)
+   *   with nanosecond values will lead to wrong output hashes (i.e. non adherent with Hive output)
    */
   def hashCalendarInterval(calendarInterval: CalendarInterval): Long = {
     val totalMicroSeconds = calendarInterval.days * MICROS_PER_DAY + calendarInterval.microseconds
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
index 4454afb6c099b..d1dabe732c882 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
@@ -128,7 +128,7 @@ trait HigherOrderFunction extends Expression with ExpectsInputTypes {
   def argumentTypes: Seq[AbstractDataType]
 
   /**
-   * All arguments have been resolved. This means that the types and nullabilty of (most of) the
+   * All arguments have been resolved. This means that the types and nullability of (most of) the
    * lambda function arguments is known, and that we can start binding the lambda functions.
    */
   lazy val argumentsResolved: Boolean = arguments.forall(_.resolved)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index a363615d3afe0..c22b68890a0d6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -808,10 +808,10 @@ case class SchemaOfJson(
 }
 
 /**
- * A function that returns the number of elements in the outmost JSON array.
+ * A function that returns the number of elements in the outermost JSON array.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(jsonArray) - Returns the number of elements in the outmost JSON array.",
+  usage = "_FUNC_(jsonArray) - Returns the number of elements in the outermost JSON array.",
   arguments = """
     Arguments:
       * jsonArray - A JSON array. `NULL` is returned in case of any other valid JSON string,
@@ -877,13 +877,13 @@ case class LengthOfJsonArray(child: Expression) extends UnaryExpression
 }
 
 /**
- * A function which returns all the keys of the outmost JSON object.
+ * A function which returns all the keys of the outermost JSON object.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(json_object) - Returns all the keys of the outmost JSON object as an array.",
+  usage = "_FUNC_(json_object) - Returns all the keys of the outermost JSON object as an array.",
   arguments = """
     Arguments:
-      * json_object - A JSON object. If a valid JSON object is given, all the keys of the outmost
+      * json_object - A JSON object. If a valid JSON object is given, all the keys of the outermost
           object will be returned as an array. If it is any other valid JSON string, an invalid JSON
           string or an empty string, the function returns null.
   """,
@@ -921,7 +921,7 @@ case class JsonObjectKeys(child: Expression) extends UnaryExpression with Codege
           if (parser.nextToken() == null || parser.currentToken() != JsonToken.START_OBJECT) {
             return null
           }
-          // Parse the JSON string to get all the keys of outmost JSON object
+          // Parse the JSON string to get all the keys of outermost JSON object
           getJsonKeys(parser, input)
         }
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index 0b94fe8b5d47e..28c9aefb42837 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -93,7 +93,7 @@ abstract class StringRegexExpression extends BinaryExpression
           Since Spark 2.0, string literals are unescaped in our SQL parser. For example, in order
           to match "\abc", the pattern should be "\\abc".
 
-          When SQL config 'spark.sql.parser.escapedStringLiterals' is enabled, it fallbacks
+          When SQL config 'spark.sql.parser.escapedStringLiterals' is enabled, it falls back
           to Spark 1.6 behavior regarding string literal parsing. For example, if the config is
           enabled, the pattern to match "\abc" should be "\abc".
       * escape - an character added since Spark 3.0. The default escape character is the '\'.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index b6dd817794723..43ecbd6a83fdb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -173,7 +173,7 @@ sealed trait WindowFrame extends Expression with Unevaluable {
 case object UnspecifiedFrame extends WindowFrame
 
 /**
- * A specified Window Frame. The val lower/uppper can be either a foldable [[Expression]] or a
+ * A specified Window Frame. The val lower/upper can be either a foldable [[Expression]] or a
  * [[SpecialFrameBoundary]].
  */
 case class SpecifiedWindowFrame(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala
index b053bf6d61e6b..0be2792bfd7db 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala
@@ -227,7 +227,7 @@ object NestedColumnAliasing {
 }
 
 /**
- * This prunes unnessary nested columns from `Generate` and optional `Project` on top
+ * This prunes unnecessary nested columns from `Generate` and optional `Project` on top
  * of it.
  */
 object GeneratorNestedColumnAliasing {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index b7c8f775b857f..aa8540fb44556 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -853,7 +853,7 @@ object CollapseWindow extends Rule[LogicalPlan] {
  *   of the child window expression, transpose them.
  */
 object TransposeWindow extends Rule[LogicalPlan] {
-  private def compatibleParititions(ps1 : Seq[Expression], ps2: Seq[Expression]): Boolean = {
+  private def compatiblePartitions(ps1 : Seq[Expression], ps2: Seq[Expression]): Boolean = {
     ps1.length < ps2.length && ps2.take(ps1.length).permutations.exists(ps1.zip(_).forall {
       case (l, r) => l.semanticEquals(r)
     })
@@ -864,7 +864,7 @@ object TransposeWindow extends Rule[LogicalPlan] {
         if w1.references.intersect(w2.windowOutputSet).isEmpty &&
            w1.expressions.forall(_.deterministic) &&
            w2.expressions.forall(_.deterministic) &&
-           compatibleParititions(ps1, ps2) =>
+           compatiblePartitions(ps1, ps2) =>
       Project(w1.output, Window(we2, ps2, os2, Window(we1, ps1, os1, grandChild)))
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala
index 50fe0192d6f26..286b447cdb5a8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala
@@ -172,7 +172,7 @@ object PushDownLeftSemiAntiJoin extends Rule[LogicalPlan] with PredicateHelper {
  * TODO:
  * Currently this rule can push down the left semi or left anti joins to either
  * left or right leg of the child join. This matches the behaviour of `PushPredicateThroughJoin`
- * when the lefi semi or left anti join is in expression form. We need to explore the possibility
+ * when the left semi or left anti join is in expression form. We need to explore the possibility
  * to push the left semi/anti joins to both legs of join if the join condition refers to
  * both left and right legs of the child join.
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 1b1e2ad71e7c8..4cdaf10dd3c60 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -70,7 +70,7 @@ object ConstantFolding extends Rule[LogicalPlan] {
 /**
  * Substitutes [[Attribute Attributes]] which can be statically evaluated with their corresponding
  * value in conjunctive [[Expression Expressions]]
- * eg.
+ * e.g.
  * {{{
  *   SELECT * FROM table WHERE i = 5 AND j = i + 3
  *   ==>  SELECT * FROM table WHERE i = 5 AND j = 8
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
index 3c2ee3149d317..9d023b7f11401 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
@@ -63,7 +63,7 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper {
     // the produced join then becomes unresolved and break structural integrity. We should
     // de-duplicate conflicting attributes.
     // SPARK-26078: it may also happen that the subquery has conflicting attributes with the outer
-    // values. In this case, the resulting join would contain trivially true conditions (eg.
+    // values. In this case, the resulting join would contain trivially true conditions (e.g.
     // id#3 = id#3) which cannot be de-duplicated after. In this method, if there are conflicting
     // attributes in the join condition, the subquery's conflicting attributes are changed using
     // a projection which aliases them and resolves the problem.
@@ -174,7 +174,7 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper {
           val inConditions = values.zip(sub.output).map(EqualTo.tupled)
           // To handle a null-aware predicate not-in-subquery in nested conditions
           // (e.g., `v > 0 OR t1.id NOT IN (SELECT id FROM t2)`), we transform
-          // `inConditon` (t1.id=t2.id) into `(inCondition) OR ISNULL(inCondition)`.
+          // `inCondition` (t1.id=t2.id) into `(inCondition) OR ISNULL(inCondition)`.
           //
           // For example, `SELECT * FROM t1 WHERE v > 0 OR t1.id NOT IN (SELECT id FROM t2)`
           // is transformed into a plan below;
@@ -567,7 +567,7 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] with AliasHelpe
                 subqueryRoot = Project(projList ++ havingInputs, subqueryRoot)
               case s @ SubqueryAlias(alias, _) =>
                 subqueryRoot = SubqueryAlias(alias, subqueryRoot)
-              case op => sys.error(s"Unexpected operator $op in corelated subquery")
+              case op => sys.error(s"Unexpected operator $op in correlated subquery")
             }
 
             // CASE WHEN alwaysTrue IS NULL THEN resultOnZeroTups
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
index 1f32620e54902..948b94a7e9d66 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
@@ -127,7 +127,7 @@ object ParserUtils {
     }
   }
 
-  /** Unescape baskslash-escaped string enclosed by quotes. */
+  /** Unescape backslash-escaped string enclosed by quotes. */
   def unescapeSQLString(b: String): String = {
     var enclosure: Character = null
     val sb = new StringBuilder(b.length())
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 864ca4f57483d..e0839a34ae589 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -397,7 +397,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
 
       case ar: AttributeReference if allAttributes.indexOf(ar.exprId) == -1 =>
         // Top level `AttributeReference` may also be used for output like `Alias`, we should
-        // normalize the epxrId too.
+        // normalize the exprId too.
         id += 1
         ar.withExprId(ExprId(id)).canonicalized
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index ad5c3fd74e9b5..1a9c9d14e3eed 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -136,7 +136,7 @@ abstract class LogicalPlan
   def outputOrdering: Seq[SortOrder] = Nil
 
   /**
-   * Returns true iff `other`'s output is semantically the same, ie.:
+   * Returns true iff `other`'s output is semantically the same, i.e.:
    *  - it contains the same number of `Attribute`s;
    *  - references are the same;
    *  - the order is equal too.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/PlanHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/PlanHelper.scala
index 63348f766a5b1..5ec488efc328c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/PlanHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/PlanHelper.scala
@@ -29,7 +29,7 @@ object PlanHelper {
   /**
    * Check if there's any expression in this query plan operator that is
    * - A WindowExpression but the plan is not Window
-   * - An AggregateExpresion but the plan is not Aggregate or Window
+   * - An AggregateExpression but the plan is not Aggregate or Window
    * - A Generator but the plan is not Generate
    * Returns the list of invalid expressions that this operator hosts. This can happen when
    * 1. The input query from users contain invalid expressions.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index aa7151ad36850..0e4bfa4dc34da 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -675,7 +675,7 @@ object Expand {
     val numAttributes = attrMap.size
     assert(numAttributes <= GroupingID.dataType.defaultSize * 8)
     val mask = if (numAttributes != 64) (1L << numAttributes) - 1 else 0xFFFFFFFFFFFFFFFFL
-    // Calculate the attrbute masks of selected grouping set. For example, if we have GroupBy
+    // Calculate the attribute masks of selected grouping set. For example, if we have GroupBy
     // attributes (a, b, c, d), grouping set (a, c) will produce the following sequence:
     // (15, 7, 13), whose binary form is (1111, 0111, 1101)
     val masks = (mask +: groupingSetAttrs.map(attrMap).map(index =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
index 17e1cb416fc8a..c4002aa441a50 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
@@ -164,7 +164,7 @@ trait Partitioning {
    * i.e. the current dataset does not need to be re-partitioned for the `required`
    * Distribution (it is possible that tuples within a partition need to be reorganized).
    *
-   * A [[Partitioning]] can never satisfy a [[Distribution]] if its `numPartitions` does't match
+   * A [[Partitioning]] can never satisfy a [[Distribution]] if its `numPartitions` doesn't match
    * [[Distribution.requiredNumPartitions]].
    */
   final def satisfies(required: Distribution): Boolean = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
index f02b2d08c0935..eac34c8f076a1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
@@ -226,8 +226,8 @@ private object DateTimeFormatterHelper {
       // string at res(0). So when the first element here is empty string we do not need append `'`
       // literal to the DateTimeFormatterBuilder.
       case ("", idx) if idx != 0 => builder.appendLiteral("'")
-      case (pattenPart, idx) if idx % 2 == 0 =>
-        var rest = pattenPart
+      case (patternPart, idx) if idx % 2 == 0 =>
+        var rest = patternPart
         while (rest.nonEmpty) {
           rest match {
             case extractor(prefix, secondFraction, suffix) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 87cf3c93ba26e..0543ef99f8947 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -189,7 +189,7 @@ object DateTimeUtils {
    * precision, so this conversion is lossy.
    */
   def microsToMillis(micros: Long): Long = {
-    // When the timestamp is negative i.e before 1970, we need to adjust the millseconds portion.
+    // When the timestamp is negative i.e before 1970, we need to adjust the milliseconds portion.
     // Example - 1965-01-01 10:11:12.123456 is represented as (-157700927876544) in micro precision.
     // In millis precision the above needs to be represented as (-157700927877).
     Math.floorDiv(micros, MICROS_PER_MILLIS)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala
index ae7066d87d530..addf1408a33a8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/QuantileSummaries.scala
@@ -173,13 +173,13 @@ class QuantileSummaries(
       // Take the case of the sample `10` from `b`. In the original stream, it could have appeared
       // right after `0` (as expressed by `g=1`) or right before `20`, so `delta=99+0-1=98`.
       // In the GK algorithm's style of working in terms of maximum bounds, one can observe that the
-      // maximum additional uncertainty over samples comming from `b` is `max(g_a + delta_a) =
+      // maximum additional uncertainty over samples coming from `b` is `max(g_a + delta_a) =
       // floor(2 * eps_a * n_a)`. Likewise, additional uncertainty over samples from `a` is
       // `floor(2 * eps_b * n_b)`.
       // Only samples that interleave the other side are affected. That means that samples from
       // one side that are lesser (or greater) than all samples from the other side are just copied
-      // unmodifed.
-      // If the merging instances have different `relativeError`, the resulting instance will cary
+      // unmodified.
+      // If the merging instances have different `relativeError`, the resulting instance will carry
       // the largest one: `eps_ab = max(eps_a, eps_b)`.
       // The main invariant of the GK algorithm is kept:
       // `max(g_ab + delta_ab) <= floor(2 * eps_ab * (n_a + n_b))` since
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 69f04e11ff0bc..e8e1120cbb884 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1145,7 +1145,7 @@ object SQLConf {
 
   val CODEGEN_FACTORY_MODE = buildConf("spark.sql.codegen.factoryMode")
     .doc("This config determines the fallback behavior of several codegen generators " +
-      "during tests. `FALLBACK` means trying codegen first and then fallbacking to " +
+      "during tests. `FALLBACK` means trying codegen first and then falling back to " +
       "interpreted if any compile error happens. Disabling fallback if `CODEGEN_ONLY`. " +
       "`NO_CODEGEN` skips codegen and goes interpreted path always. Note that " +
       "this config works only for tests.")
@@ -1570,7 +1570,7 @@ object SQLConf {
   val JSON_EXPRESSION_OPTIMIZATION =
     buildConf("spark.sql.optimizer.enableJsonExpressionOptimization")
       .doc("Whether to optimize JSON expressions in SQL optimizer. It includes pruning " +
-        "unnecessary columns from from_json, simplifing from_json + to_json, to_json + " +
+        "unnecessary columns from from_json, simplifying from_json + to_json, to_json + " +
         "named_struct(from_json.col1, from_json.col2, ....).")
       .version("3.1.0")
       .booleanConf
@@ -2058,7 +2058,7 @@ object SQLConf {
     buildConf("spark.sql.decimalOperations.allowPrecisionLoss")
       .internal()
       .doc("When true (default), establishing the result type of an arithmetic operation " +
-        "happens according to Hive behavior and SQL ANSI 2011 specification, ie. rounding the " +
+        "happens according to Hive behavior and SQL ANSI 2011 specification, i.e. rounding the " +
         "decimal part of the result if an exact representation is not possible. Otherwise, NULL " +
         "is returned in those cases, as previously.")
       .version("2.3.1")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
index 9fa27c7df3832..4badcbaa89aa4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
@@ -204,7 +204,7 @@ object RandomDataGenerator {
             specialDates.map(java.sql.Date.valueOf))
         }
       case TimestampType =>
-        def uniformMicorsRand(rand: Random): Long = {
+        def uniformMicrosRand(rand: Random): Long = {
           var milliseconds = rand.nextLong() % 253402329599999L
           // -62135740800000L is the number of milliseconds before January 1, 1970, 00:00:00 GMT
           // for "0001-01-01 00:00:00.000000". We need to find a
@@ -225,7 +225,7 @@ object RandomDataGenerator {
         if (SQLConf.get.getConf(SQLConf.DATETIME_JAVA8API_ENABLED)) {
           randomNumeric[Instant](
             rand,
-            (rand: Random) => DateTimeUtils.microsToInstant(uniformMicorsRand(rand)),
+            (rand: Random) => DateTimeUtils.microsToInstant(uniformMicrosRand(rand)),
             specialTs.map { s =>
               val ldt = LocalDateTime.parse(s.replace(" ", "T"))
               ldt.atZone(ZoneId.systemDefault()).toInstant
@@ -235,7 +235,7 @@ object RandomDataGenerator {
             rand,
             (rand: Random) => {
               // DateTimeUtils.toJavaTimestamp takes microsecond.
-              val ts = DateTimeUtils.toJavaTimestamp(uniformMicorsRand(rand))
+              val ts = DateTimeUtils.toJavaTimestamp(uniformMicrosRand(rand))
               // The generated `ts` is based on the hybrid calendar Julian + Gregorian since
               // 1582-10-15 but it should be valid in Proleptic Gregorian calendar too which is used
               // by Spark SQL since version 3.0 (see SPARK-26651). We try to convert `ts` to
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index f5bfdc5e695e0..61186c178b083 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -427,7 +427,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     checkAnalysis(plan, expected)
   }
 
-  test("SPARK-12102: Ignore nullablity when comparing two sides of case") {
+  test("SPARK-12102: Ignore nullability when comparing two sides of case") {
     val relation = LocalRelation(Symbol("a").struct(Symbol("x").int),
       Symbol("b").struct(Symbol("x").int.withNullability(false)))
     val plan = relation.select(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala
index 249e7a49a0a90..cdfae14138290 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala
@@ -160,7 +160,7 @@ class ResolveGroupingAnalyticsSuite extends AnalysisTest {
   }
 
   test("grouping function") {
-    // GrouingSets
+    // GroupingSets
     val originalPlan = GroupingSets(Seq(Seq(), Seq(unresolved_a), Seq(unresolved_a, unresolved_b)),
       Seq(unresolved_a, unresolved_b), r1,
       Seq(unresolved_a, unresolved_b, UnresolvedAlias(count(unresolved_c)),
@@ -200,7 +200,7 @@ class ResolveGroupingAnalyticsSuite extends AnalysisTest {
   }
 
   test("grouping_id") {
-    // GrouingSets
+    // GroupingSets
     val originalPlan = GroupingSets(Seq(Seq(), Seq(unresolved_a), Seq(unresolved_a, unresolved_b)),
       Seq(unresolved_a, unresolved_b), r1,
       Seq(unresolved_a, unresolved_b, UnresolvedAlias(count(unresolved_c)),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
index 1e5bc271ab270..5c4d45b5394f7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
@@ -260,7 +260,7 @@ class TypeCoercionSuite extends AnalysisTest {
 
     // Tests that its not possible to setup implicit casts between two map types when
     // source map's key type is integer and the target map's key type are either Binary,
-    // Boolean, Date, Timestamp, Array, Struct, CaleandarIntervalType or NullType
+    // Boolean, Date, Timestamp, Array, Struct, CalendarIntervalType or NullType
     nonCastableTargetTypes.foreach { targetType =>
       shouldNotCast(sourceType, targetType)
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
index cdc3f4275414c..fa779477cccab 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
@@ -887,7 +887,7 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
     }
   }
 
-  /** Assert that the logical plan is supported for continuous procsssing mode */
+  /** Assert that the logical plan is supported for continuous processing mode */
   def assertSupportedForContinuousProcessing(
     name: String,
     plan: LogicalPlan,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
index adaabfe4d32bb..bca8c56a1071e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
@@ -527,7 +527,7 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
       .exists(_.getRenderedMessage().contains("Generated method too long")))
   }
 
-  test("SPARK-28916: subexrepssion elimination can cause 64kb code limit on UnsafeProjection") {
+  test("SPARK-28916: subexpression elimination can cause 64kb code limit on UnsafeProjection") {
     val numOfExprs = 10000
     val exprs = (0 to numOfExprs).flatMap(colIndex =>
       Seq(Add(BoundReference(colIndex, DoubleType, true),
@@ -554,7 +554,7 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
 
     // Expecting result:
     //   "((scala.math.LowPriorityOrderingImplicits$$anon$3) references[0] /* comparator */)"
-    // Using lenient assertions to be resilient to annonymous class numbering changes
+    // Using lenient assertions to be resilient to anonymous class numbering changes
     assert(!refTerm.contains("null"))
     assert(refTerm.contains("scala.math.LowPriorityOrderingImplicits$$anon$"))
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
index 3d6f6937e780b..57abdb4de229f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
@@ -425,14 +425,14 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
     def checkErrorMessage(
       childDataType: DataType,
       fieldDataType: DataType,
-      errorMesage: String): Unit = {
+      errorMessage: String): Unit = {
       val e = intercept[org.apache.spark.sql.AnalysisException] {
         ExtractValue(
           Literal.create(null, childDataType),
           Literal.create(null, fieldDataType),
           _ == _)
       }
-      assert(e.getMessage().contains(errorMesage))
+      assert(e.getMessage().contains(errorMessage))
     }
 
     checkErrorMessage(structType, IntegerType, "Field name should be String Literal")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala
index 87e34aca510f5..ee6f89a155ae0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala
@@ -212,8 +212,8 @@ class ConditionalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
 
   test("case key when - internal pattern matching expects a List while apply takes a Seq") {
     val indexedSeq = IndexedSeq(Literal(1), Literal(42), Literal(42), Literal(1))
-    val caseKeyWhaen = CaseKeyWhen(Literal(12), indexedSeq)
-    assert(caseKeyWhaen.branches ==
+    val caseKeyWhen = CaseKeyWhen(Literal(12), indexedSeq)
+    assert(caseKeyWhen.branches ==
       IndexedSeq((Literal(12) === Literal(1), Literal(42)),
         (Literal(12) === Literal(42), Literal(1))))
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
index 70eb391ad6e05..26d98157807cd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
@@ -451,7 +451,7 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB
     if (interpret.isDefined && codegen.isDefined && !compareResults(interpret.get, codegen.get)) {
       fail(s"Incorrect evaluation: $expr, interpret: ${interpret.get}, codegen: ${codegen.get}")
     } else if (interpretExc.isDefined && codegenExc.isEmpty) {
-      fail(s"Incorrect evaluation: $expr, interpet threw exception ${interpretExc.get}")
+      fail(s"Incorrect evaluation: $expr, interpret threw exception ${interpretExc.get}")
     } else if (interpretExc.isEmpty && codegenExc.isDefined) {
       fail(s"Incorrect evaluation: $expr, codegen threw exception ${codegenExc.get}")
     } else if (interpretExc.isDefined && codegenExc.isDefined
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
index bc2b93e5390da..d425d0ba42186 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
@@ -212,9 +212,9 @@ class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
     val initializeWithNonexistingMethod = InitializeJavaBean(
       Literal.fromObject(new java.util.LinkedList[Int]),
-      Map("nonexisting" -> Literal(1)))
+      Map("nonexistent" -> Literal(1)))
     checkExceptionInExpression[Exception](initializeWithNonexistingMethod,
-      """A method named "nonexisting" is not declared in any enclosing class """ +
+      """A method named "nonexistent" is not declared in any enclosing class """ +
         "nor any supertype")
 
     val initializeWithWrongParamType = InitializeJavaBean(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index 730574a4b9846..78e9cf82a28b1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -118,7 +118,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     testElt(null, 1, null, "world")
     testElt(null, null, "hello", "world")
 
-    // Invalid ranages
+    // Invalid ranges
     testElt(null, 3, "hello", "world")
     testElt(null, 0, "hello", "world")
     testElt(null, -1, "hello", "world")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala
index 972db7fa30a91..d6e6142b07a3f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala
@@ -66,20 +66,20 @@ class PercentileSuite extends SparkFunSuite {
     // Test with row with frequency. Second and third columns are frequency in Int and Long
     val countForFrequencyTest = 1000
     val rowsWithFrequency = (1 to countForFrequencyTest).map(x => Seq(x, x):+ x.toLong)
-    val expectedPercentilesWithFrquency = Seq(1.0, 500.0, 707.0, 866.0, 1000.0)
+    val expectedPercentilesWithFrequency = Seq(1.0, 500.0, 707.0, 866.0, 1000.0)
 
     val frequencyExpressionInt = BoundReference(1, IntegerType, nullable = false)
     val aggInt = new Percentile(childExpression, percentageExpression, frequencyExpressionInt)
-    runTest(aggInt, rowsWithFrequency, expectedPercentilesWithFrquency)
+    runTest(aggInt, rowsWithFrequency, expectedPercentilesWithFrequency)
 
     val frequencyExpressionLong = BoundReference(2, LongType, nullable = false)
     val aggLong = new Percentile(childExpression, percentageExpression, frequencyExpressionLong)
-    runTest(aggLong, rowsWithFrequency, expectedPercentilesWithFrquency)
+    runTest(aggLong, rowsWithFrequency, expectedPercentilesWithFrequency)
 
     // Run test with Flatten data
     val flattenRows = (1 to countForFrequencyTest).flatMap(current =>
       (1 to current).map(y => current )).map(Seq(_))
-    runTest(agg, flattenRows, expectedPercentilesWithFrquency)
+    runTest(agg, flattenRows, expectedPercentilesWithFrequency)
   }
 
   private def runTest(agg: Percentile,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala
index d660afb7f8a05..9d4c5986300c5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala
@@ -115,7 +115,7 @@ class CodeBlockSuite extends SparkFunSuite {
     assert(exprValues === Set(isNull1, value1, isNull2, value2, literal))
   }
 
-  test("Throws exception when interpolating unexcepted object in code block") {
+  test("Throws exception when interpolating unexpected object in code block") {
     val obj = Tuple2(1, 1)
     val e = intercept[IllegalArgumentException] {
       code"$obj"
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SetOperationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SetOperationSuite.scala
index 2eea840e21a31..8543b62fd8bdd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SetOperationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SetOperationSuite.scala
@@ -154,11 +154,11 @@ class SetOperationSuite extends PlanTest {
       .union(testRelation2.select(Literal(-1L).as("vcol"), 'd, 'e, 'f))
       .groupBy('a, 'b, 'c)('a, 'b, 'c, sum('vcol).as("sum"))
       .where(GreaterThan('sum, Literal(0L))).analyze
-    val multiplerAttr = planFragment.output.last
+    val multiplierAttr = planFragment.output.last
     val output = planFragment.output.dropRight(1)
     val expectedPlan = Project(output,
       Generate(
-        ReplicateRows(Seq(multiplerAttr) ++ output),
+        ReplicateRows(Seq(multiplierAttr) ++ output),
         Nil,
         false,
         None,
@@ -183,11 +183,11 @@ class SetOperationSuite extends PlanTest {
       .select('a, 'b, 'c,
         If(GreaterThan('vcol1_count, 'vcol2_count), 'vcol2_count, 'vcol1_count).as("min_count"))
       .analyze
-    val multiplerAttr = planFragment.output.last
+    val multiplierAttr = planFragment.output.last
     val output = planFragment.output.dropRight(1)
     val expectedPlan = Project(output,
       Generate(
-        ReplicateRows(Seq(multiplerAttr) ++ output),
+        ReplicateRows(Seq(multiplierAttr) ++ output),
         Nil,
         false,
         None,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 0f1b4a3ea918c..e98ec6a667a73 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -958,7 +958,7 @@ class DDLParserSuite extends AnalysisTest {
         Some(first())))
   }
 
-  test("alter table: mutiple property changes are not allowed") {
+  test("alter table: multiple property changes are not allowed") {
     intercept[ParseException] {
       parsePlan("ALTER TABLE table_name ALTER COLUMN a.b.c " +
         "TYPE bigint COMMENT 'new comment'")}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
index b9f984001523a..46ad5d1dec7e4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
@@ -124,8 +124,8 @@ class DataTypeParserSuite extends SparkFunSuite {
   unsupported("struct<x int, y string>")
 
   test("Do not print empty parentheses for no params") {
-    assert(intercept("unkwon").getMessage.contains("unkwon is not supported"))
-    assert(intercept("unkwon(1,2,3)").getMessage.contains("unkwon(1,2,3) is not supported"))
+    assert(intercept("unknown").getMessage.contains("unknown is not supported"))
+    assert(intercept("unknown(1,2,3)").getMessage.contains("unknown(1,2,3) is not supported"))
   }
 
   // DataType parser accepts certain reserved keywords.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
index 00b6828c08b38..99051d692451b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
@@ -77,7 +77,7 @@ class ErrorParserSuite extends AnalysisTest {
   }
 
   test("SPARK-21136: misleading error message due to problematic antlr grammar") {
-    intercept("select * from a left joinn b on a.id = b.id", "missing 'JOIN' at 'joinn'")
+    intercept("select * from a left join_ b on a.id = b.id", "missing 'JOIN' at 'join_'")
     intercept("select * from test where test.t is like 'test'", "mismatched input 'is' expecting")
     intercept("SELECT * FROM test WHERE x NOT NULL", "mismatched input 'NOT' expecting")
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index 9f6a76b9228c5..0b304a799cdc5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -590,7 +590,7 @@ class ExpressionParserSuite extends AnalysisTest {
 
         // tests that have different result regarding the conf
         if (escape) {
-          // When SQLConf.ESCAPED_STRING_LITERALS is enabled, string literal parsing fallbacks to
+          // When SQLConf.ESCAPED_STRING_LITERALS is enabled, string literal parsing falls back to
           // Spark 1.6 behavior.
 
           // 'LIKE' string literals.
@@ -780,7 +780,7 @@ class ExpressionParserSuite extends AnalysisTest {
     val complexName = FunctionIdentifier("`ba`r", Some("`fo`o"))
     assertEqual(complexName.quotedString, UnresolvedAttribute("`fo`o.`ba`r"))
     intercept(complexName.unquotedString, "mismatched input")
-    // Function identifier contains countious backticks should be treated correctly.
+    // Function identifier contains continuous backticks should be treated correctly.
     val complexName2 = FunctionIdentifier("ba``r", Some("fo``o"))
     assertEqual(complexName2.quotedString, UnresolvedAttribute("fo``o.ba``r"))
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
index f037ce7b9e793..bad3e0d79dd12 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
@@ -355,7 +355,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLKeywordUtils {
     assert(complexName === parseTableIdentifier("```d``b``1`.```weird``table``name`"))
     assert(complexName === parseTableIdentifier(complexName.quotedString))
     intercept[ParseException](parseTableIdentifier(complexName.unquotedString))
-    // Table identifier contains countious backticks should be treated correctly.
+    // Table identifier contains continuous backticks should be treated correctly.
     val complexName2 = TableIdentifier("x``y", Some("d``b"))
     assert(complexName2 === parseTableIdentifier(complexName2.quotedString))
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala
index 2e190c6ba6d4b..5729b02dc4926 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala
@@ -55,7 +55,7 @@ class UnsafeArraySuite extends SparkFunSuite {
     BigDecimal("1.2345678901234567890123456").setScale(21, BigDecimal.RoundingMode.FLOOR),
     BigDecimal("2.3456789012345678901234567").setScale(21, BigDecimal.RoundingMode.FLOOR))
 
-  val calenderintervalArray = Array(
+  val calendarintervalArray = Array(
     new CalendarInterval(3, 2, 321), new CalendarInterval(1, 2, 123))
 
   val intMultiDimArray = Array(Array(1), Array(2, 20), Array(3, 30, 300))
@@ -142,12 +142,12 @@ class UnsafeArraySuite extends SparkFunSuite {
 
     val schema = new StructType().add("array", ArrayType(CalendarIntervalType))
     val encoder = RowEncoder(schema).resolveAndBind()
-    val externalRow = Row(calenderintervalArray)
+    val externalRow = Row(calendarintervalArray)
     val ir = encoder.createSerializer().apply(externalRow)
     val unsafeCalendar = ir.getArray(0)
     assert(unsafeCalendar.isInstanceOf[UnsafeArrayData])
-    assert(unsafeCalendar.numElements == calenderintervalArray.length)
-    calenderintervalArray.zipWithIndex.map { case (e, i) =>
+    assert(unsafeCalendar.numElements == calendarintervalArray.length)
+    calendarintervalArray.zipWithIndex.map { case (e, i) =>
       assert(unsafeCalendar.getInterval(i) == e)
     }
 
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java
index d8e61a87e7f62..b2ef1c7722ef8 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnDescriptor.java
@@ -49,7 +49,7 @@ public ColumnDescriptor(TColumnDesc tColumnDesc) {
 
   public static ColumnDescriptor newPrimitiveColumnDescriptor(String name, String comment, Type type, int position) {
     // Current usage looks like it's only for metadata columns, but if that changes then
-    // this method may need to require a type qualifiers aruments.
+    // this method may need to require a type qualifiers arguments.
     return new ColumnDescriptor(name, comment, new TypeDescriptor(type), position);
   }
 
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoValue.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoValue.java
index 2b2359cc13c0f..bf3c6b27ea81d 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoValue.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/GetInfoValue.java
@@ -52,7 +52,7 @@ public GetInfoValue(TGetInfoValue tGetInfoValue) {
       stringValue = tGetInfoValue.getStringValue();
       break;
     default:
-      throw new IllegalArgumentException("Unreconigzed TGetInfoValue");
+      throw new IllegalArgumentException("Unrecognized TGetInfoValue");
     }
   }
 
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
index c25c742d392b3..59630672847e4 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
@@ -103,7 +103,7 @@ public class GetColumnsOperation extends MetadataOperation {
       "Schema of table that is the scope of a reference attribute "
       + "(null if the DATA_TYPE isn't REF)")
   .addPrimitiveColumn("SCOPE_TABLE", Type.STRING_TYPE,
-      "Table name that this the scope of a reference attribure "
+      "Table name that this the scope of a reference attribute "
       + "(null if the DATA_TYPE isn't REF)")
   .addPrimitiveColumn("SOURCE_DATA_TYPE", Type.SMALLINT_TYPE,
       "Source type of a distinct type or user-generated Ref type, "
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
index 1b3e8fe6bfb9d..f47a4388f7bea 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
@@ -175,9 +175,9 @@ protected BufferedReader loadFile(String fileName) throws IOException {
     @Override
     protected int processCmd(String cmd) {
       int rc = 0;
-      String cmd_trimed = cmd.trim();
+      String cmd_trimmed = cmd.trim();
       try {
-        executeStatementInternal(cmd_trimed, null, false, 0);
+        executeStatementInternal(cmd_trimmed, null, false, 0);
       } catch (HiveSQLException e) {
         rc = -1;
         LOG.warn("Failed to execute HQL command in global .hiverc file.", e);
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
index ab9ed5b1f371e..13fc552a9a42e 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpCLIService.java
@@ -137,7 +137,7 @@ protected void initializeServer() {
       httpServer.setHandler(context);
       context.addServlet(new ServletHolder(thriftHttpServlet), httpPath);
 
-      // TODO: check defaults: maxTimeout, keepalive, maxBodySize, bodyRecieveDuration, etc.
+      // TODO: check defaults: maxTimeout, keepalive, maxBodySize, bodyReceiveDuration, etc.
       // Finally, start the server
       httpServer.start();
       // In case HIVE_SERVER2_THRIFT_HTTP_PORT or hive.server2.thrift.http.port is configured with
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/DummyListeners.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/DummyListeners.scala
index 4564c2209a931..820859b65925b 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/DummyListeners.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/DummyListeners.scala
@@ -17,7 +17,7 @@
 
 /**
  * These classes in this package are intentionally placed to the outer package of spark,
- * because IsolatedClientLoader leverages Spark classloader for shared classess including
+ * because IsolatedClientLoader leverages Spark classloader for shared classes including
  * spark package, and the test should fail if Spark initializes these listeners with
  * IsolatedClientLoader.
  */
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnvSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnvSuite.scala
index f28faea2be868..f2bb337e4a826 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnvSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnvSuite.scala
@@ -42,7 +42,7 @@ class SparkSQLEnvSuite extends SparkFunSuite {
       QUERY_EXECUTION_LISTENERS.key -> classOf[DummyQueryExecutionListener].getCanonicalName,
       STREAMING_QUERY_LISTENERS.key -> classOf[DummyStreamingQueryListener].getCanonicalName,
       WAREHOUSE_PATH.key -> TestHiveContext.makeWarehouseDir().toURI.getPath,
-      // The issue occured from "maven" and list of custom jars, but providing list of custom
+      // The issue occurred from "maven" and list of custom jars, but providing list of custom
       // jars to initialize HiveClient isn't trivial, so just use "maven".
       HIVE_METASTORE_JARS.key -> "maven",
       HIVE_METASTORE_VERSION.key -> null,
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index 4ce1964a19bd9..c263932c2f535 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -56,7 +56,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, true)
     // Ensures that cross joins are enabled so that we can test them
     TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, true)
-    // Ensures that the table insertion behaivor is consistent with Hive
+    // Ensures that the table insertion behavior is consistent with Hive
     TestHive.setConf(SQLConf.STORE_ASSIGNMENT_POLICY, StoreAssignmentPolicy.LEGACY.toString)
     // Fix session local timezone to America/Los_Angeles for those timezone sensitive tests
     // (timestamp_*)
@@ -305,7 +305,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     // Unsupported underscore syntax.
     "inputddl5",
 
-    // Thift is broken...
+    // Thrift is broken...
     "inputddl8",
 
     // Hive changed ordering of ddl:
@@ -496,7 +496,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "drop_partitions_filter2",
     "drop_partitions_filter3",
 
-    // The following failes due to truncate table
+    // The following fails due to truncate table
     "truncate_table",
 
     // We do not support DFS command.
@@ -716,7 +716,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "groupby_multi_insert_common_distinct",
     "groupby_multi_single_reducer2",
     "groupby_multi_single_reducer3",
-    "groupby_mutli_insert_common_distinct",
+    "groupby_multi_insert_common_distinct",
     "groupby_neg_float",
     "groupby_ppd",
     "groupby_ppr",
@@ -958,8 +958,8 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "subq2",
     "subquery_exists",
     "subquery_exists_having",
-    "subquery_notexists",
-    "subquery_notexists_having",
+    "subquery_nonexistent",
+    "subquery_nonexistent_having",
     "subquery_in_having",
     "tablename_with_select",
     "timestamp_comparison",
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index a89243c331c7b..e02589e5cad00 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -332,7 +332,7 @@ private[hive] object HiveMetastoreCatalog {
       metastoreSchema: StructType,
       inferredSchema: StructType): StructType = try {
     // scalastyle:off caselocale
-    // Find any nullable fields in mestastore schema that are missing from the inferred schema.
+    // Find any nullable fields in metastore schema that are missing from the inferred schema.
     val metastoreFields = metastoreSchema.map(f => f.name.toLowerCase -> f).toMap
     val missingNullables = metastoreFields
       .filterKeys(!inferredSchema.map(_.name.toLowerCase).contains(_))
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 34befb8a6f965..b4ebf153fc178 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -367,14 +367,14 @@ private[hive] class HiveClientImpl(
 
   override def getDatabase(dbName: String): CatalogDatabase = withHiveState {
     Option(client.getDatabase(dbName)).map { d =>
-      val paras = Option(d.getParameters).map(_.asScala.toMap).getOrElse(Map()) ++
+      val params = Option(d.getParameters).map(_.asScala.toMap).getOrElse(Map()) ++
         Map(PROP_OWNER -> shim.getDatabaseOwnerName(d))
 
       CatalogDatabase(
         name = d.getName,
         description = Option(d.getDescription).getOrElse(""),
         locationUri = CatalogUtils.stringToURI(d.getLocationUri),
-        properties = paras)
+        properties = params)
     }.getOrElse(throw new NoSuchDatabaseException(dbName))
   }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala
index 4096916a100c3..26baff3d83eec 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala
@@ -92,7 +92,7 @@ case class HiveScriptTransformationExec(
                 scriptOutputWritable.readFields(scriptOutputStream)
               } catch {
                 case _: EOFException =>
-                  // This means that the stdout of `proc` (ie. TRANSFORM process) has exhausted.
+                  // This means that the stdout of `proc` (i.e. TRANSFORM process) has exhausted.
                   // Ideally the proc should *not* be alive at this point but
                   // there can be a lag between EOF being written out and the process
                   // being terminated. So explicitly waiting for the process to be done.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index 3c3f31ac2994a..63e46880376e1 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -172,7 +172,7 @@ case class InsertIntoHiveTable(
     table.bucketSpec match {
       case Some(bucketSpec) =>
         // Writes to bucketed hive tables are allowed only if user does not care about maintaining
-        // table's bucketing ie. both "hive.enforce.bucketing" and "hive.enforce.sorting" are
+        // table's bucketing i.e. both "hive.enforce.bucketing" and "hive.enforce.sorting" are
         // set to false
         val enforceBucketingConfig = "hive.enforce.bucketing"
         val enforceSortingConfig = "hive.enforce.sorting"
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala
index cd07199e48ed7..3fa8449c3cb01 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.execution.datasources.DataSourceStrategy
  * the hive table relation will be updated based on pruned partitions.
  *
  * This rule is executed in optimization phase, so the statistics can be updated before physical
- * planning, which is useful for some spark strategy, eg.
+ * planning, which is useful for some spark strategy, e.g.
  * [[org.apache.spark.sql.execution.SparkStrategies.JoinSelection]].
  *
  * TODO: merge this with PruneFileSourcePartitions after we completely make hive as a data source.
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q
index 28bbc2d8f1a3e..df5334c785f6a 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/auto_sortmerge_join_13.q
@@ -19,7 +19,7 @@ set hive.optimize.bucketmapjoin = true;
 set hive.optimize.bucketmapjoin.sortedmerge = true;
 set hive.auto.convert.join=true;
 
--- A SMB join followed by a mutli-insert
+-- A SMB join followed by a multi-insert
 explain 
 from (
   SELECT a.key key1, a.value value1, b.key key2, b.value value2 
@@ -41,7 +41,7 @@ select * from dest2 order by k1, k2;
 set hive.auto.convert.join.noconditionaltask=true;
 set hive.auto.convert.join.noconditionaltask.size=200;
 
--- A SMB join followed by a mutli-insert
+-- A SMB join followed by a multi-insert
 explain 
 from (
   SELECT a.key key1, a.value value1, b.key key2, b.value value2 
@@ -61,7 +61,7 @@ select * from dest1 order by k1, k2;
 select * from dest2 order by k1, k2;
 
 set hive.auto.convert.sortmerge.join.to.mapjoin=true;
--- A SMB join followed by a mutli-insert
+-- A SMB join followed by a multi-insert
 explain 
 from (
   SELECT a.key key1, a.value value1, b.key key2, b.value value2 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_3.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_3.q
index 91e97de62c82f..843ba4a3dbacd 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_3.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/bucketsortoptimize_insert_3.q
@@ -18,7 +18,7 @@ FROM src
 INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT *;
 
 -- Insert data into the bucketed table by selecting from another bucketed table
--- The bucketing positions dont match - although the actual bucketing do.
+-- The bucketing positions don't match - although the actual bucketing do.
 -- This should be a map-only operation
 EXPLAIN
 INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
@@ -37,7 +37,7 @@ CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING)
 CLUSTERED BY (value) SORTED BY (value) INTO 2 BUCKETS;
 
 -- Insert data into the bucketed table by selecting from another bucketed table
--- The bucketing positions dont match - this should be a map-reduce operation
+-- The bucketing positions don't match - this should be a map-reduce operation
 EXPLAIN
 INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
 SELECT x.key, x.value from 
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_20.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_20.q
index f70e7d5c86237..4c56cad2411fc 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_20.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/smb_mapjoin_20.q
@@ -32,7 +32,7 @@ CREATE TABLE test_table3 (key STRING, value1 int, value2 string) PARTITIONED BY
 CLUSTERED BY (value1) SORTED BY (value1) INTO 2 BUCKETS;
 
 -- Insert data into the bucketed table by selecting from another bucketed table
--- This should be a map-only operation, although the bucketing positions dont match
+-- This should be a map-only operation, although the bucketing positions don't match
 EXPLAIN
 INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
 SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1';
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
index 71750e6b3a516..b715f484fa02a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
@@ -35,7 +35,7 @@ import org.apache.spark.util.Utils
 
 case class TestData(key: Int, value: String)
 
-case class ThreeCloumntable(key: Int, value: String, key1: String)
+case class ThreeColumnTable(key: Int, value: String, key1: String)
 
 class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter
     with SQLTestUtils  with PrivateMethodTester  {
@@ -764,7 +764,7 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter
       val path = dir.toURI.getPath
 
       val e = intercept[AnalysisException] {
-        sql(s"INSERT OVERWRITE LOCAL DIRECTORY '${path}' TABLE notexists")
+        sql(s"INSERT OVERWRITE LOCAL DIRECTORY '${path}' TABLE nonexistent")
       }.getMessage
       assert(e.contains("Table or view not found"))
     }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index 2ea98943011f4..2e98a76c52488 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -735,7 +735,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
     }
   }
 
-  test("analyze column command paramaters validation") {
+  test("analyze column command parameters validation") {
     val e1 = intercept[IllegalArgumentException] {
       AnalyzeColumnCommand(TableIdentifier("test"), Option(Seq("c1")), true).run(spark)
     }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
index 1018ae5b68895..0876709c31899 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
@@ -372,7 +372,7 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
     }
   }
 
-  test("SPARK-32400: TRANSFORM doesn't support CalenderIntervalType/UserDefinedType (hive serde)") {
+  test("SPARK-32400: TRANSFORM doesn't support CalendarIntervalType/UserDefinedType (hive serde)") {
     assume(TestUtils.testCommandAvailable("/bin/bash"))
     withTempView("v") {
       val df = Seq(
@@ -410,7 +410,7 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
   }
 
   test("SPARK-32400: TRANSFORM doesn't support" +
-    " CalenderIntervalType/UserDefinedType end to end (hive serde)") {
+    " CalendarIntervalType/UserDefinedType end to end (hive serde)") {
     assume(TestUtils.testCommandAvailable("/bin/bash"))
     withTempView("v") {
       val df = Seq(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 6b82b1267bc66..3370695245fd0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -228,7 +228,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
       checkAnswer(sql(s"SHOW functions $db.temp_abs"), Row("temp_abs"))
       checkAnswer(sql(s"SHOW functions `$db`.`temp_abs`"), Row("temp_abs"))
       checkAnswer(sql(s"SHOW functions `$db`.`temp_abs`"), Row("temp_abs"))
-      checkAnswer(sql("SHOW functions `a function doens't exist`"), Nil)
+      checkAnswer(sql("SHOW functions `a function doesn't exist`"), Nil)
       checkAnswer(sql("SHOW functions `temp_weekofyea*`"), Row("temp_weekofyear"))
 
       // this probably will failed if we add more function with `sha` prefixing.
@@ -768,7 +768,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
           sql("SELECT * FROM nested").collect().toSeq)
 
         intercept[AnalysisException] {
-          sql("CREATE TABLE test_ctas_1234 AS SELECT * from notexists").collect()
+          sql("CREATE TABLE test_ctas_1234 AS SELECT * from nonexistent").collect()
         }
       }
     }
@@ -1739,12 +1739,12 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
            |SELECT 'blarr'
          """.stripMargin)
 
-      // project list is the same order of paritioning columns in table definition
+      // project list is the same order of partitioning columns in table definition
       checkAnswer(
         sql(s"SELECT p1, p2, p3, p4, p5, c1 FROM $table"),
         Row("a", "b", "c", "d", "e", "blarr") :: Nil)
 
-      // project list does not have the same order of paritioning columns in table definition
+      // project list does not have the same order of partitioning columns in table definition
       checkAnswer(
         sql(s"SELECT p2, p3, p4, p1, p5, c1 FROM $table"),
         Row("b", "c", "d", "a", "e", "blarr") :: Nil)

From 6aff215077e2cdf9cec187c827da63c067514e4e Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 7 Dec 2020 10:50:31 -0800
Subject: [PATCH 0692/1009] [SPARK-33693][SQL] deprecate
 spark.sql.hive.convertCTAS

### What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/30554 . Now we have a new config for converting CREATE TABLE, we don't need the old config that only works for CTAS.

### Why are the changes needed?

It's confusing for having two config while one can cover another completely.

### Does this PR introduce _any_ user-facing change?

no, it's deprecating not removing.

### How was this patch tested?

N/A

Closes #30651 from cloud-fan/minor.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../main/scala/org/apache/spark/sql/internal/SQLConf.scala    | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index e8e1120cbb884..bc62213bdb740 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -3005,7 +3005,9 @@ object SQLConf {
         s"Use '${ADVISORY_PARTITION_SIZE_IN_BYTES.key}' instead of it."),
       DeprecatedConfig(OPTIMIZER_METADATA_ONLY.key, "3.0",
         "Avoid to depend on this optimization to prevent a potential correctness issue. " +
-          "If you must use, use 'SparkSessionExtensions' instead to inject it as a custom rule.")
+          "If you must use, use 'SparkSessionExtensions' instead to inject it as a custom rule."),
+      DeprecatedConfig(CONVERT_CTAS.key, "3.1",
+        s"Set '${LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT.key}' to false instead.")
     )
 
     Map(configs.map { cfg => cfg.key -> cfg } : _*)

From c0874ba9f13b9802eef4418490020692e37652ba Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 7 Dec 2020 13:35:37 -0800
Subject: [PATCH 0693/1009] [SPARK-33480][SQL][FOLLOWUP] do not expose user
 data in error message

### What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/30412. This PR updates the error message of char/varchar table insertion length check, to not expose user data.

### Why are the changes needed?

This is risky to expose user data in the error message, especially the string data, as it may contain sensitive data.

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

updated tests

Closes #30653 from cloud-fan/minor2.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../sql/catalyst/util/CharVarcharUtils.scala  |  6 ++---
 .../spark/sql/CharVarcharTestSuite.scala      | 26 +++++++++----------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
index b551d9699f360..e42e384e4b86b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
@@ -183,9 +183,9 @@ object CharVarcharUtils extends Logging {
 
   private def raiseError(expr: Expression, typeName: String, length: Int): Expression = {
     val errorMsg = Concat(Seq(
-      Literal("input string '"),
-      expr,
-      Literal(s"' exceeds $typeName type length limitation: $length")))
+      Literal("input string of length "),
+      Cast(Length(expr), StringType),
+      Literal(s" exceeds $typeName type length limitation: $length")))
     Cast(RaiseError(errorMsg), StringType)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
index fcd334be7a6f7..b0f1198e46440 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
@@ -190,7 +190,7 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
       checkAnswer(spark.table("t"), Row(null))
       val e = intercept[SparkException](sql("INSERT INTO t VALUES ('123456')"))
       assert(e.getCause.getMessage.contains(
-        s"input string '123456' exceeds $typeName type length limitation: 5"))
+        s"input string of length 6 exceeds $typeName type length limitation: 5"))
     }
   }
 
@@ -203,7 +203,7 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
         checkAnswer(spark.table("t"), Row(1, null))
         val e = intercept[SparkException](sql("INSERT INTO t VALUES (1, '123456')"))
         assert(e.getCause.getMessage.contains(
-          s"input string '123456' exceeds $typeName type length limitation: 5"))
+          s"input string of length 6 exceeds $typeName type length limitation: 5"))
       }
     }
   }
@@ -215,7 +215,7 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
       checkAnswer(spark.table("t"), Row(Row(null)))
       val e = intercept[SparkException](sql("INSERT INTO t SELECT struct('123456')"))
       assert(e.getCause.getMessage.contains(
-        s"input string '123456' exceeds $typeName type length limitation: 5"))
+        s"input string of length 6 exceeds $typeName type length limitation: 5"))
     }
   }
 
@@ -226,7 +226,7 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
       checkAnswer(spark.table("t"), Row(Seq(null)))
       val e = intercept[SparkException](sql("INSERT INTO t VALUES (array('a', '123456'))"))
       assert(e.getCause.getMessage.contains(
-        s"input string '123456' exceeds $typeName type length limitation: 5"))
+        s"input string of length 6 exceeds $typeName type length limitation: 5"))
     }
   }
 
@@ -235,7 +235,7 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
       sql(s"CREATE TABLE t(c MAP<$typeName(5), STRING>) USING $format")
       val e = intercept[SparkException](sql("INSERT INTO t VALUES (map('123456', 'a'))"))
       assert(e.getCause.getMessage.contains(
-        s"input string '123456' exceeds $typeName type length limitation: 5"))
+        s"input string of length 6 exceeds $typeName type length limitation: 5"))
     }
   }
 
@@ -246,7 +246,7 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
       checkAnswer(spark.table("t"), Row(Map("a" -> null)))
       val e = intercept[SparkException](sql("INSERT INTO t VALUES (map('a', '123456'))"))
       assert(e.getCause.getMessage.contains(
-        s"input string '123456' exceeds $typeName type length limitation: 5"))
+        s"input string of length 6 exceeds $typeName type length limitation: 5"))
     }
   }
 
@@ -255,10 +255,10 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
       sql(s"CREATE TABLE t(c MAP<$typeName(5), $typeName(5)>) USING $format")
       val e1 = intercept[SparkException](sql("INSERT INTO t VALUES (map('123456', 'a'))"))
       assert(e1.getCause.getMessage.contains(
-        s"input string '123456' exceeds $typeName type length limitation: 5"))
+        s"input string of length 6 exceeds $typeName type length limitation: 5"))
       val e2 = intercept[SparkException](sql("INSERT INTO t VALUES (map('a', '123456'))"))
       assert(e2.getCause.getMessage.contains(
-        s"input string '123456' exceeds $typeName type length limitation: 5"))
+        s"input string of length 6 exceeds $typeName type length limitation: 5"))
     }
   }
 
@@ -269,7 +269,7 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
       checkAnswer(spark.table("t"), Row(Row(Seq(null))))
       val e = intercept[SparkException](sql("INSERT INTO t SELECT struct(array('123456'))"))
       assert(e.getCause.getMessage.contains(
-        s"input string '123456' exceeds $typeName type length limitation: 5"))
+        s"input string of length 6 exceeds $typeName type length limitation: 5"))
     }
   }
 
@@ -280,7 +280,7 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
       checkAnswer(spark.table("t"), Row(Seq(Row(null))))
       val e = intercept[SparkException](sql("INSERT INTO t VALUES (array(struct('123456')))"))
       assert(e.getCause.getMessage.contains(
-        s"input string '123456' exceeds $typeName type length limitation: 5"))
+        s"input string of length 6 exceeds $typeName type length limitation: 5"))
     }
   }
 
@@ -291,7 +291,7 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
       checkAnswer(spark.table("t"), Row(Seq(Seq(null))))
       val e = intercept[SparkException](sql("INSERT INTO t VALUES (array(array('123456')))"))
       assert(e.getCause.getMessage.contains(
-        s"input string '123456' exceeds $typeName type length limitation: 5"))
+        s"input string of length 6 exceeds $typeName type length limitation: 5"))
     }
   }
 
@@ -313,10 +313,10 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
       checkAnswer(spark.table("t"), Row("1234 ", "1234"))
       val e1 = intercept[SparkException](sql("INSERT INTO t VALUES (123456, 1)"))
       assert(e1.getCause.getMessage.contains(
-        "input string '123456' exceeds char type length limitation: 5"))
+        "input string of length 6 exceeds char type length limitation: 5"))
       val e2 = intercept[SparkException](sql("INSERT INTO t VALUES (1, 123456)"))
       assert(e2.getCause.getMessage.contains(
-        "input string '123456' exceeds varchar type length limitation: 5"))
+        "input string of length 6 exceeds varchar type length limitation: 5"))
     }
   }
 

From 02508b68ecc56658a13d89bf798c5ef824ba2cdc Mon Sep 17 00:00:00 2001
From: Anton Okolnychyi <aokolnychyi@apple.com>
Date: Mon, 7 Dec 2020 15:32:10 -0800
Subject: [PATCH 0694/1009] [SPARK-33621][SQL] Add a way to inject data source
 rewrite rules

### What changes were proposed in this pull request?

This PR adds a way to inject data source rewrite rules.

### Why are the changes needed?

Right now `SparkSessionExtensions` allow us to inject optimization rules but they are added to operator optimization batch. There are cases when users need to run rules after the operator optimization batch (e.g. cases when a rule relies on the fact that expressions have been optimized). Currently, this is not possible.

### Does this PR introduce _any_ user-facing change?

Yes.

### How was this patch tested?

This PR comes with a new test.

Closes #30577 from aokolnychyi/spark-33621-v3.

Authored-by: Anton Okolnychyi <aokolnychyi@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/SparkSessionExtensions.scala       | 16 ++++++++++++++++
 .../sql/internal/BaseSessionStateBuilder.scala   |  4 +++-
 .../spark/sql/SparkSessionExtensionSuite.scala   |  6 ++++++
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
index 6952f4bfd0566..d5d969032a5e1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
@@ -40,6 +40,7 @@ import org.apache.spark.sql.execution.{ColumnarRule, SparkPlan}
  * <li>Analyzer Rules.</li>
  * <li>Check Analysis Rules.</li>
  * <li>Optimizer Rules.</li>
+ * <li>Data Source Rewrite Rules.</li>
  * <li>Planning Strategies.</li>
  * <li>Customized Parser.</li>
  * <li>(External) Catalog listeners.</li>
@@ -199,6 +200,21 @@ class SparkSessionExtensions {
     optimizerRules += builder
   }
 
+  private[this] val dataSourceRewriteRules = mutable.Buffer.empty[RuleBuilder]
+
+  private[sql] def buildDataSourceRewriteRules(session: SparkSession): Seq[Rule[LogicalPlan]] = {
+    dataSourceRewriteRules.map(_.apply(session)).toSeq
+  }
+
+  /**
+   * Inject an optimizer `Rule` builder that rewrites data source plans into the [[SparkSession]].
+   * The injected rules will be executed after the operator optimization batch and before rules
+   * that depend on stats.
+   */
+  def injectDataSourceRewriteRule(builder: RuleBuilder): Unit = {
+    dataSourceRewriteRules += builder
+  }
+
   private[this] val plannerStrategyBuilders = mutable.Buffer.empty[StrategyBuilder]
 
   private[sql] def buildPlannerStrategies(session: SparkSession): Seq[Strategy] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 8101f9e291b44..f51ee11091d02 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -273,7 +273,9 @@ abstract class BaseSessionStateBuilder(
    *
    * Note that this may NOT depend on the `optimizer` function.
    */
-  protected def customDataSourceRewriteRules: Seq[Rule[LogicalPlan]] = Nil
+  protected def customDataSourceRewriteRules: Seq[Rule[LogicalPlan]] = {
+    extensions.buildDataSourceRewriteRules(session)
+  }
 
   /**
    * Planner that converts optimized logical plans to physical plans.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
index ea276bcec0f78..576ad26505d27 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
@@ -88,6 +88,12 @@ class SparkSessionExtensionSuite extends SparkFunSuite {
     }
   }
 
+  test("SPARK-33621: inject data source rewrite rule") {
+    withSession(Seq(_.injectDataSourceRewriteRule(MyRule))) { session =>
+      assert(session.sessionState.optimizer.dataSourceRewriteRules.contains(MyRule(session)))
+    }
+  }
+
   test("inject spark planner strategy") {
     withSession(Seq(_.injectPlannerStrategy(MySparkStrategy))) { session =>
       assert(session.sessionState.planner.strategies.contains(MySparkStrategy(session)))

From e4d1c10760800563d2a30410b46e5b0cd2671c4d Mon Sep 17 00:00:00 2001
From: Fokko Driesprong <fokko@apache.org>
Date: Tue, 8 Dec 2020 09:35:36 +0800
Subject: [PATCH 0695/1009] [SPARK-32320][PYSPARK] Remove mutable default
 arguments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is bad practice, and might lead to unexpected behaviour:
https://florimond.dev/blog/articles/2018/08/python-mutable-defaults-are-the-source-of-all-evil/

```
fokkodriesprongFan spark % grep -R "={}" python | grep def

python/pyspark/resource/profile.py:    def __init__(self, _java_resource_profile=None, _exec_req={}, _task_req={}):
python/pyspark/sql/functions.py:def from_json(col, schema, options={}):
python/pyspark/sql/functions.py:def to_json(col, options={}):
python/pyspark/sql/functions.py:def schema_of_json(json, options={}):
python/pyspark/sql/functions.py:def schema_of_csv(csv, options={}):
python/pyspark/sql/functions.py:def to_csv(col, options={}):
python/pyspark/sql/functions.py:def from_csv(col, schema, options={}):
python/pyspark/sql/avro/functions.py:def from_avro(data, jsonFormatSchema, options={}):
```

```
fokkodriesprongFan spark % grep -R "=\[\]" python | grep def
python/pyspark/ml/tuning.py:    def __init__(self, bestModel, avgMetrics=[], subModels=None):
python/pyspark/ml/tuning.py:    def __init__(self, bestModel, validationMetrics=[], subModels=None):
```

### What changes were proposed in this pull request?

Removing the mutable default arguments.

### Why are the changes needed?

Removing the mutable default arguments, and changing the signature to `Optional[...]`.

### Does this PR introduce _any_ user-facing change?

No 👍

### How was this patch tested?

Using the Flake8 bugbear code analysis plugin.

Closes #29122 from Fokko/SPARK-32320.

Authored-by: Fokko Driesprong <fokko@apache.org>
Signed-off-by: Ruifeng Zheng <ruifengz@foxmail.com>
---
 dev/sparktestsupport/modules.py       |  9 +++++----
 dev/tox.ini                           |  2 +-
 python/mypy.ini                       |  2 ++
 python/pyspark/ml/regression.py       |  4 ++--
 python/pyspark/ml/tuning.py           |  8 ++++----
 python/pyspark/ml/tuning.pyi          |  4 ++--
 python/pyspark/resource/profile.py    |  6 +++---
 python/pyspark/resource/profile.pyi   |  6 +++---
 python/pyspark/sql/avro/functions.py  |  4 ++--
 python/pyspark/sql/avro/functions.pyi |  4 ++--
 python/pyspark/sql/functions.py       | 18 ++++++++++--------
 python/pyspark/sql/functions.pyi      | 12 ++++++------
 12 files changed, 42 insertions(+), 37 deletions(-)

diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 5d8b714711774..87bfbdf64a49f 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -31,9 +31,10 @@ class Module(object):
     files have changed.
     """
 
-    def __init__(self, name, dependencies, source_file_regexes, build_profile_flags=(), environ={},
-                 sbt_test_goals=(), python_test_goals=(), excluded_python_implementations=(),
-                 test_tags=(), should_run_r_tests=False, should_run_build_tests=False):
+    def __init__(self, name, dependencies, source_file_regexes, build_profile_flags=(),
+                 environ=None, sbt_test_goals=(), python_test_goals=(),
+                 excluded_python_implementations=(), test_tags=(), should_run_r_tests=False,
+                 should_run_build_tests=False):
         """
         Define a new module.
 
@@ -62,7 +63,7 @@ def __init__(self, name, dependencies, source_file_regexes, build_profile_flags=
         self.source_file_prefixes = source_file_regexes
         self.sbt_test_goals = sbt_test_goals
         self.build_profile_flags = build_profile_flags
-        self.environ = environ
+        self.environ = environ or {}
         self.python_test_goals = python_test_goals
         self.excluded_python_implementations = excluded_python_implementations
         self.test_tags = test_tags
diff --git a/dev/tox.ini b/dev/tox.ini
index 7edf7d597fb58..43cd5877dfdb8 100644
--- a/dev/tox.ini
+++ b/dev/tox.ini
@@ -19,6 +19,6 @@ max-line-length=100
 exclude=python/pyspark/cloudpickle/*.py,shared.py,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/*
 
 [flake8]
-select = E901,E999,F821,F822,F823,F401,F405
+select = E901,E999,F821,F822,F823,F401,F405,B006
 exclude = python/pyspark/cloudpickle/*.py,shared.py*,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/*,python/out,python/pyspark/sql/pandas/functions.pyi,python/pyspark/sql/column.pyi,python/pyspark/worker.pyi,python/pyspark/java_gateway.pyi
 max-line-length = 100
diff --git a/python/mypy.ini b/python/mypy.ini
index 5103452a053be..ad4fcf7f317f0 100644
--- a/python/mypy.ini
+++ b/python/mypy.ini
@@ -102,6 +102,8 @@ disallow_untyped_defs = False
 
 ; Ignore errors in embedded third party code
 
+no_implicit_optional = True
+
 [mypy-pyspark.cloudpickle.*]
 ignore_errors = True
 
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index d37654a7388f5..8ecb68458ffbc 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -1801,7 +1801,7 @@ class AFTSurvivalRegression(_JavaRegressor, _AFTSurvivalRegressionParams,
     @keyword_only
     def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                  fitIntercept=True, maxIter=100, tol=1E-6, censorCol="censor",
-                 quantileProbabilities=list([0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]),
+                 quantileProbabilities=list([0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]),  # noqa: B005
                  quantilesCol=None, aggregationDepth=2, maxBlockSizeInMB=0.0):
         """
         __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
@@ -1819,7 +1819,7 @@ def __init__(self, *, featuresCol="features", labelCol="label", predictionCol="p
     @since("1.6.0")
     def setParams(self, *, featuresCol="features", labelCol="label", predictionCol="prediction",
                   fitIntercept=True, maxIter=100, tol=1E-6, censorCol="censor",
-                  quantileProbabilities=list([0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]),
+                  quantileProbabilities=list([0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]),  # noqa: B005
                   quantilesCol=None, aggregationDepth=2, maxBlockSizeInMB=0.0):
         """
         setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index 2c083182de470..2bddfe822f29e 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -835,13 +835,13 @@ class CrossValidatorModel(Model, _CrossValidatorParams, MLReadable, MLWritable):
     .. versionadded:: 1.4.0
     """
 
-    def __init__(self, bestModel, avgMetrics=[], subModels=None):
+    def __init__(self, bestModel, avgMetrics=None, subModels=None):
         super(CrossValidatorModel, self).__init__()
         #: best model from cross validation
         self.bestModel = bestModel
         #: Average cross-validation metrics for each paramMap in
         #: CrossValidator.estimatorParamMaps, in the corresponding order.
-        self.avgMetrics = avgMetrics
+        self.avgMetrics = avgMetrics or []
         #: sub model list from cross validation
         self.subModels = subModels
 
@@ -1323,12 +1323,12 @@ class TrainValidationSplitModel(Model, _TrainValidationSplitParams, MLReadable,
     .. versionadded:: 2.0.0
     """
 
-    def __init__(self, bestModel, validationMetrics=[], subModels=None):
+    def __init__(self, bestModel, validationMetrics=None, subModels=None):
         super(TrainValidationSplitModel, self).__init__()
         #: best model from train validation split
         self.bestModel = bestModel
         #: evaluated validation metrics
-        self.validationMetrics = validationMetrics
+        self.validationMetrics = validationMetrics or []
         #: sub models from train validation split
         self.subModels = subModels
 
diff --git a/python/pyspark/ml/tuning.pyi b/python/pyspark/ml/tuning.pyi
index e5f153d49e9c6..912abd4d7124a 100644
--- a/python/pyspark/ml/tuning.pyi
+++ b/python/pyspark/ml/tuning.pyi
@@ -104,7 +104,7 @@ class CrossValidatorModel(
     def __init__(
         self,
         bestModel: Model,
-        avgMetrics: List[float] = ...,
+        avgMetrics: Optional[List[float]] = ...,
         subModels: Optional[List[List[Model]]] = ...,
     ) -> None: ...
     def copy(self, extra: Optional[ParamMap] = ...) -> CrossValidatorModel: ...
@@ -171,7 +171,7 @@ class TrainValidationSplitModel(
     def __init__(
         self,
         bestModel: Model,
-        validationMetrics: List[float] = ...,
+        validationMetrics: Optional[List[float]] = ...,
         subModels: Optional[List[Model]] = ...,
     ) -> None: ...
     def setEstimator(self, value: Estimator) -> TrainValidationSplitModel: ...
diff --git a/python/pyspark/resource/profile.py b/python/pyspark/resource/profile.py
index 1c59a1c4a123c..38a68bc74d97e 100644
--- a/python/pyspark/resource/profile.py
+++ b/python/pyspark/resource/profile.py
@@ -34,13 +34,13 @@ class ResourceProfile(object):
     This API is evolving.
     """
 
-    def __init__(self, _java_resource_profile=None, _exec_req={}, _task_req={}):
+    def __init__(self, _java_resource_profile=None, _exec_req=None, _task_req=None):
         if _java_resource_profile is not None:
             self._java_resource_profile = _java_resource_profile
         else:
             self._java_resource_profile = None
-            self._executor_resource_requests = _exec_req
-            self._task_resource_requests = _task_req
+            self._executor_resource_requests = _exec_req or {}
+            self._task_resource_requests = _task_req or {}
 
     @property
     def id(self):
diff --git a/python/pyspark/resource/profile.pyi b/python/pyspark/resource/profile.pyi
index 04838692436df..c8f23a5cac370 100644
--- a/python/pyspark/resource/profile.pyi
+++ b/python/pyspark/resource/profile.pyi
@@ -22,7 +22,7 @@ from pyspark.resource.requests import (  # noqa: F401
     TaskResourceRequest as TaskResourceRequest,
     TaskResourceRequests as TaskResourceRequests,
 )
-from typing import overload, Dict, Union
+from typing import overload, Dict, Union, Optional
 from py4j.java_gateway import JavaObject  # type: ignore[import]
 
 class ResourceProfile:
@@ -35,8 +35,8 @@ class ResourceProfile:
     def __init__(
         self,
         _java_resource_profile: None = ...,
-        _exec_req: Dict[str, ExecutorResourceRequest] = ...,
-        _task_req: Dict[str, TaskResourceRequest] = ...,
+        _exec_req: Optional[Dict[str, ExecutorResourceRequest]] = ...,
+        _task_req: Optional[Dict[str, TaskResourceRequest]] = ...,
     ) -> None: ...
     @property
     def id(self) -> int: ...
diff --git a/python/pyspark/sql/avro/functions.py b/python/pyspark/sql/avro/functions.py
index ce322814e34f8..7e4ceb20cd2c4 100644
--- a/python/pyspark/sql/avro/functions.py
+++ b/python/pyspark/sql/avro/functions.py
@@ -25,7 +25,7 @@
 from pyspark.util import _print_missing_jar
 
 
-def from_avro(data, jsonFormatSchema, options={}):
+def from_avro(data, jsonFormatSchema, options=None):
     """
     Converts a binary column of Avro format into its corresponding catalyst value.
     The specified schema must match the read data, otherwise the behavior is undefined:
@@ -70,7 +70,7 @@ def from_avro(data, jsonFormatSchema, options={}):
     sc = SparkContext._active_spark_context
     try:
         jc = sc._jvm.org.apache.spark.sql.avro.functions.from_avro(
-            _to_java_column(data), jsonFormatSchema, options)
+            _to_java_column(data), jsonFormatSchema, options or {})
     except TypeError as e:
         if str(e) == "'JavaPackage' object is not callable":
             _print_missing_jar("Avro", "avro", "avro", sc.version)
diff --git a/python/pyspark/sql/avro/functions.pyi b/python/pyspark/sql/avro/functions.pyi
index 4c2e3814a9e94..49881335d8fcc 100644
--- a/python/pyspark/sql/avro/functions.pyi
+++ b/python/pyspark/sql/avro/functions.pyi
@@ -16,12 +16,12 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from typing import Dict
+from typing import Dict, Optional
 
 from pyspark.sql._typing import ColumnOrName
 from pyspark.sql.column import Column
 
 def from_avro(
-    data: ColumnOrName, jsonFormatSchema: str, options: Dict[str, str] = ...
+    data: ColumnOrName, jsonFormatSchema: str, options: Optional[Dict[str, str]] = ...
 ) -> Column: ...
 def to_avro(data: ColumnOrName, jsonFormatSchema: str = ...) -> Column: ...
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 4dc3129fd6bc2..f612d2d0366f2 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -80,8 +80,10 @@ def _invoke_binary_math_function(name, col1, col2):
     )
 
 
-def _options_to_str(options):
-    return {key: to_str(value) for (key, value) in options.items()}
+def _options_to_str(options=None):
+    if options:
+        return {key: to_str(value) for (key, value) in options.items()}
+    return {}
 
 
 def lit(col):
@@ -3454,7 +3456,7 @@ def json_tuple(col, *fields):
     return Column(jc)
 
 
-def from_json(col, schema, options={}):
+def from_json(col, schema, options=None):
     """
     Parses a column containing a JSON string into a :class:`MapType` with :class:`StringType`
     as keys type, :class:`StructType` or :class:`ArrayType` with
@@ -3510,7 +3512,7 @@ def from_json(col, schema, options={}):
     return Column(jc)
 
 
-def to_json(col, options={}):
+def to_json(col, options=None):
     """
     Converts a column containing a :class:`StructType`, :class:`ArrayType` or a :class:`MapType`
     into a JSON string. Throws an exception, in the case of an unsupported type.
@@ -3557,7 +3559,7 @@ def to_json(col, options={}):
     return Column(jc)
 
 
-def schema_of_json(json, options={}):
+def schema_of_json(json, options=None):
     """
     Parses a JSON string and infers its schema in DDL format.
 
@@ -3594,7 +3596,7 @@ def schema_of_json(json, options={}):
     return Column(jc)
 
 
-def schema_of_csv(csv, options={}):
+def schema_of_csv(csv, options=None):
     """
     Parses a CSV string and infers its schema in DDL format.
 
@@ -3627,7 +3629,7 @@ def schema_of_csv(csv, options={}):
     return Column(jc)
 
 
-def to_csv(col, options={}):
+def to_csv(col, options=None):
     """
     Converts a column containing a :class:`StructType` into a CSV string.
     Throws an exception, in the case of an unsupported type.
@@ -4038,7 +4040,7 @@ def sequence(start, stop, step=None):
             _to_java_column(start), _to_java_column(stop), _to_java_column(step)))
 
 
-def from_csv(col, schema, options={}):
+def from_csv(col, schema, options=None):
     """
     Parses a column containing a CSV string to a row with the specified schema.
     Returns `null`, in the case of an unparseable string.
diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi
index 50e178df9996f..acb17a2657d00 100644
--- a/python/pyspark/sql/functions.pyi
+++ b/python/pyspark/sql/functions.pyi
@@ -196,12 +196,12 @@ def json_tuple(col: ColumnOrName, *fields: str) -> Column: ...
 def from_json(
     col: ColumnOrName,
     schema: Union[ArrayType, StructType, Column, str],
-    options: Dict[str, str] = ...,
+    options: Optional[Dict[str, str]] = ...,
 ) -> Column: ...
-def to_json(col: ColumnOrName, options: Dict[str, str] = ...) -> Column: ...
-def schema_of_json(json: ColumnOrName, options: Dict[str, str] = ...) -> Column: ...
-def schema_of_csv(csv: ColumnOrName, options: Dict[str, str] = ...) -> Column: ...
-def to_csv(col: ColumnOrName, options: Dict[str, str] = ...) -> Column: ...
+def to_json(col: ColumnOrName, options: Optional[Dict[str, str]] = ...) -> Column: ...
+def schema_of_json(json: ColumnOrName, options: Optional[Dict[str, str]] = ...) -> Column: ...
+def schema_of_csv(csv: ColumnOrName, options: Optional[Dict[str, str]] = ...) -> Column: ...
+def to_csv(col: ColumnOrName, options: Optional[Dict[str, str]] = ...) -> Column: ...
 def size(col: ColumnOrName) -> Column: ...
 def array_min(col: ColumnOrName) -> Column: ...
 def array_max(col: ColumnOrName) -> Column: ...
@@ -223,7 +223,7 @@ def sequence(
 def from_csv(
     col: ColumnOrName,
     schema: Union[StructType, Column, str],
-    options: Dict[str, str] = ...,
+    options: Optional[Dict[str, str]] = ...,
 ) -> Column: ...
 @overload
 def transform(col: ColumnOrName, f: Callable[[Column], Column]) -> Column: ...

From b2a79306ef7b330c5bf4dc1337ed80ebd6e08d0c Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 7 Dec 2020 18:59:15 -0800
Subject: [PATCH 0696/1009] [SPARK-33680][SQL][TESTS][FOLLOWUP] Fix more test
 suites to have explicit confs

### What changes were proposed in this pull request?

This is a follow-up for SPARK-33680 to remove the assumption on the default value of `spark.sql.adaptive.enabled` .

### Why are the changes needed?

According to the test result https://github.com/apache/spark/pull/30628#issuecomment-739866168, the [previous run](https://github.com/apache/spark/pull/30628#issuecomment-739641105) didn't run all tests.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

Closes #30655 from dongjoon-hyun/SPARK-33680.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/DataFrameAggregateSuite.scala   |  4 +-
 .../apache/spark/sql/DataFrameJoinSuite.scala |  4 +-
 .../org/apache/spark/sql/JoinSuite.scala      |  9 ++-
 .../spark/sql/execution/PlannerSuite.scala    | 73 +++++++++++++------
 .../spark/sql/sources/BucketedReadSuite.scala |  5 +-
 .../SqlResourceWithActualMetricsSuite.scala   | 11 ++-
 6 files changed, 74 insertions(+), 32 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index d4e64aa03df0e..78983a4bd1a29 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -1001,7 +1001,9 @@ class DataFrameAggregateSuite extends QueryTest
 
   Seq(true, false).foreach { value =>
     test(s"SPARK-31620: agg with subquery (whole-stage-codegen = $value)") {
-      withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> value.toString) {
+      withSQLConf(
+        SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> value.toString,
+        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
         withTempView("t1", "t2") {
           sql("create temporary view t1 as select * from values (1, 2) as t1(a, b)")
           sql("create temporary view t2 as select * from values (3, 4) as t2(c, d)")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
index 14d03a30453ac..c317f562c65dc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -335,7 +335,9 @@ class DataFrameJoinSuite extends QueryTest
 
     withTempDatabase { dbName =>
       withTable(table1Name, table2Name) {
-        withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+        withSQLConf(
+          SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+          SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
           spark.range(50).write.saveAsTable(s"$dbName.$table1Name")
           spark.range(100).write.saveAsTable(s"$dbName.$table2Name")
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index 8755dccb801c2..a728e5cc17001 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -1107,6 +1107,7 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
 
   test("SPARK-32330: Preserve shuffled hash join build side partitioning") {
     withSQLConf(
+        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false",
         SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "50",
         SQLConf.SHUFFLE_PARTITIONS.key -> "2",
         SQLConf.PREFER_SORTMERGEJOIN.key -> "false") {
@@ -1130,6 +1131,7 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
 
     // Test broadcast hash join
     withSQLConf(
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false",
       SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "50") {
       Seq("inner", "left_outer").foreach(joinType => {
         val plan = df1.join(df2, $"k1" === $"k2", joinType)
@@ -1146,6 +1148,7 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
 
     // Test shuffled hash join
     withSQLConf(
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false",
       SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "50",
       SQLConf.SHUFFLE_PARTITIONS.key -> "2",
       SQLConf.PREFER_SORTMERGEJOIN.key -> "false") {
@@ -1253,6 +1256,7 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
       withSQLConf(
         // Set broadcast join threshold and number of shuffle partitions,
         // as shuffled hash join depends on these two configs.
+        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false",
         SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80",
         SQLConf.SHUFFLE_PARTITIONS.key -> "2") {
         val smjDF = df1.join(df2, joinExprs, "full")
@@ -1284,7 +1288,9 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
     )
     inputDFs.foreach { case (df1, df2, joinType) =>
       // Test broadcast hash join
-      withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "200") {
+      withSQLConf(
+        SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "200",
+        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
         val bhjCodegenDF = df1.join(df2, $"k1" === $"k2", joinType)
         assert(bhjCodegenDF.queryExecution.executedPlan.collect {
           case WholeStageCodegenExec(_ : BroadcastHashJoinExec) => true
@@ -1305,6 +1311,7 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
         // Set broadcast join threshold and number of shuffle partitions,
         // as shuffled hash join depends on these two configs.
         SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "50",
+        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false",
         SQLConf.SHUFFLE_PARTITIONS.key -> "2") {
         val shjCodegenDF = df1.join(df2, $"k1" === $"k2", joinType)
         assert(shjCodegenDF.queryExecution.executedPlan.collect {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index 5e30f846307ae..4e01d1c06f64e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -877,7 +877,9 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
   }
 
   test("aliases in the project should not introduce extra shuffle") {
-    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+    withSQLConf(
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
       withTempView("df1", "df2") {
         spark.range(10).selectExpr("id AS key", "0").repartition($"key").createTempView("df1")
         spark.range(20).selectExpr("id AS key", "0").repartition($"key").createTempView("df2")
@@ -897,7 +899,9 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
 
   test("SPARK-33399: aliases should be handled properly in PartitioningCollection output" +
     " partitioning") {
-    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+    withSQLConf(
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
       withTempView("t1", "t2", "t3") {
         spark.range(10).repartition($"id").createTempView("t1")
         spark.range(20).repartition($"id").createTempView("t2")
@@ -927,7 +931,9 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
   }
 
   test("SPARK-33399: aliases should be handled properly in HashPartitioning") {
-    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+    withSQLConf(
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
       withTempView("t1", "t2", "t3") {
         spark.range(10).repartition($"id").createTempView("t1")
         spark.range(20).repartition($"id").createTempView("t2")
@@ -955,7 +961,9 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
   }
 
   test("SPARK-33399: alias handling should happen properly for RangePartitioning") {
-    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+    withSQLConf(
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
       val df = spark.range(1, 100)
         .select(col("id").as("id1")).groupBy("id1").count()
       // Plan for this will be Range -> ProjectWithAlias -> HashAggregate -> HashAggregate
@@ -976,7 +984,9 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
 
   test("SPARK-33399: aliased should be handled properly " +
     "for partitioning and sortorder involving complex expressions") {
-    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+    withSQLConf(
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
       withTempView("t1", "t2", "t3") {
         spark.range(10).select(col("id").as("id1")).createTempView("t1")
         spark.range(20).select(col("id").as("id2")).createTempView("t2")
@@ -1014,7 +1024,9 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
   }
 
   test("SPARK-33399: alias handling should happen properly for SinglePartition") {
-    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+    withSQLConf(
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
       val df = spark.range(1, 100, 1, 1)
         .select(col("id").as("id1")).groupBy("id1").count()
       val planned = df.queryExecution.executedPlan
@@ -1031,7 +1043,9 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
 
   test("SPARK-33399: No extra exchanges in case of" +
     " [Inner Join -> Project with aliases -> HashAggregate]") {
-    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+    withSQLConf(
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
       withTempView("t1", "t2") {
         spark.range(10).repartition($"id").createTempView("t1")
         spark.range(20).repartition($"id").createTempView("t2")
@@ -1060,7 +1074,9 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
   }
 
   test("SPARK-33400: Normalization of sortOrder should take care of sameOrderExprs") {
-    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+    withSQLConf(
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
       withTempView("t1", "t2", "t3") {
         spark.range(10).repartition($"id").createTempView("t1")
         spark.range(20).repartition($"id").createTempView("t2")
@@ -1091,7 +1107,9 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
   }
 
   test("sort order doesn't have repeated expressions") {
-    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+    withSQLConf(
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
       withTempView("t1", "t2") {
         spark.range(10).repartition($"id").createTempView("t1")
         spark.range(20).repartition($"id").createTempView("t2")
@@ -1117,7 +1135,9 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
   }
 
   test("aliases to expressions should not be replaced") {
-    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+    withSQLConf(
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
       withTempView("df1", "df2") {
         spark.range(10).selectExpr("id AS key", "0").repartition($"key").createTempView("df1")
         spark.range(20).selectExpr("id AS key", "0").repartition($"key").createTempView("df2")
@@ -1143,7 +1163,9 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
   }
 
   test("aliases in the aggregate expressions should not introduce extra shuffle") {
-    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+    withSQLConf(
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
       val t1 = spark.range(10).selectExpr("floor(id/4) as k1")
       val t2 = spark.range(20).selectExpr("floor(id/4) as k2")
 
@@ -1160,7 +1182,9 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
   }
 
   test("aliases in the object hash/sort aggregate expressions should not introduce extra shuffle") {
-    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+    withSQLConf(
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
       Seq(true, false).foreach { useObjectHashAgg =>
         withSQLConf(SQLConf.USE_OBJECT_HASH_AGG.key -> useObjectHashAgg.toString) {
           val t1 = spark.range(10).selectExpr("floor(id/4) as k1")
@@ -1185,21 +1209,22 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
   }
 
   test("aliases in the sort aggregate expressions should not introduce extra sort") {
-    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
-      withSQLConf(SQLConf.USE_OBJECT_HASH_AGG.key -> "false") {
-        val t1 = spark.range(10).selectExpr("floor(id/4) as k1")
-        val t2 = spark.range(20).selectExpr("floor(id/4) as k2")
+    withSQLConf(
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false",
+      SQLConf.USE_OBJECT_HASH_AGG.key -> "false") {
+      val t1 = spark.range(10).selectExpr("floor(id/4) as k1")
+      val t2 = spark.range(20).selectExpr("floor(id/4) as k2")
 
-        val agg1 = t1.groupBy("k1").agg(collect_list("k1")).withColumnRenamed("k1", "k3")
-        val agg2 = t2.groupBy("k2").agg(collect_list("k2"))
+      val agg1 = t1.groupBy("k1").agg(collect_list("k1")).withColumnRenamed("k1", "k3")
+      val agg2 = t2.groupBy("k2").agg(collect_list("k2"))
 
-        val planned = agg1.join(agg2, $"k3" === $"k2").queryExecution.executedPlan
-        assert(planned.collect { case s: SortAggregateExec => s }.nonEmpty)
+      val planned = agg1.join(agg2, $"k3" === $"k2").queryExecution.executedPlan
+      assert(planned.collect { case s: SortAggregateExec => s }.nonEmpty)
 
-        // We expect two SortExec nodes on each side of join.
-        val sorts = planned.collect { case s: SortExec => s }
-        assert(sorts.size == 4)
-      }
+      // We expect two SortExec nodes on each side of join.
+      val sorts = planned.collect { case s: SortExec => s }
+      assert(sorts.size == 4)
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
index 167e87dd3d5cb..0ff9303421ade 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
 import org.apache.spark.sql.execution.{FileSourceScanExec, SortExec, SparkPlan}
-import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec
+import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, DisableAdaptiveExecutionSuite}
 import org.apache.spark.sql.execution.datasources.BucketingUtils
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
 import org.apache.spark.sql.execution.joins.SortMergeJoinExec
@@ -39,7 +39,8 @@ import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
 import org.apache.spark.util.Utils
 import org.apache.spark.util.collection.BitSet
 
-class BucketedReadWithoutHiveSupportSuite extends BucketedReadSuite with SharedSparkSession {
+class BucketedReadWithoutHiveSupportSuite
+  extends BucketedReadSuite with DisableAdaptiveExecutionSuite with SharedSparkSession {
   protected override def beforeAll(): Unit = {
     super.beforeAll()
     assert(spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "in-memory")
diff --git a/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceWithActualMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceWithActualMetricsSuite.scala
index 0c0e3ac90510e..1510e8957f9ae 100644
--- a/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceWithActualMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceWithActualMetricsSuite.scala
@@ -26,7 +26,9 @@ import org.json4s.jackson.JsonMethods
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.history.HistoryServerSuite.getContentAndCode
 import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.execution.metric.SQLMetricsTestUtils
+import org.apache.spark.sql.internal.SQLConf.ADAPTIVE_EXECUTION_ENABLED
 import org.apache.spark.sql.test.SharedSparkSession
 
 case class Person(id: Int, name: String, age: Int)
@@ -35,7 +37,8 @@ case class Salary(personId: Int, salary: Double)
 /**
  * Sql Resource Public API Unit Tests running query and extracting the metrics.
  */
-class SqlResourceWithActualMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils {
+class SqlResourceWithActualMetricsSuite
+  extends SharedSparkSession with SQLMetricsTestUtils with SQLHelper {
 
   import testImplicits._
 
@@ -52,8 +55,10 @@ class SqlResourceWithActualMetricsSuite extends SharedSparkSession with SQLMetri
 
   test("Check Sql Rest Api Endpoints") {
     // Materalize result DataFrame
-    val count = getDF().count()
-    assert(count == 2, s"Expected Query Count is 2 but received: $count")
+    withSQLConf(ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+      val count = getDF().count()
+      assert(count == 2, s"Expected Query Count is 2 but received: $count")
+    }
 
     // Spark apps launched by local-mode seems not having `attemptId` as default
     // so UT is just added for existing endpoints.

From ebd8b9357af296b8859e65577ab1e16593fab50d Mon Sep 17 00:00:00 2001
From: Ruifeng Zheng <ruifengz@foxmail.com>
Date: Tue, 8 Dec 2020 11:04:29 +0800
Subject: [PATCH 0697/1009] [SPARK-33609][ML] word2vec reduce broadcast size

### What changes were proposed in this pull request?
1, directly use float vectors instead of converting to double vectors, this is about 2x faster than using vec.axpy;
2, mark `wordList` and `wordVecNorms` lazy
3, avoid slicing in computation of `wordVecNorms`

### Why are the changes needed?
halve broadcast size

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
existing testsuites

Closes #30548 from zhengruifeng/w2v_float32_transform.

Lead-authored-by: Ruifeng Zheng <ruifengz@foxmail.com>
Co-authored-by: zhengruifeng <ruifengz@foxmail.com>
Signed-off-by: Ruifeng Zheng <ruifengz@foxmail.com>
---
 .../apache/spark/ml/feature/Word2Vec.scala    | 32 +++++++++++--------
 .../apache/spark/mllib/feature/Word2Vec.scala | 27 +++++++---------
 2 files changed, 31 insertions(+), 28 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
index 9b5f5a619e02c..0b9c1b570d943 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
@@ -285,27 +285,33 @@ class Word2VecModel private[ml] (
   @Since("2.0.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
     val outputSchema = transformSchema(dataset.schema, logging = true)
-    val vectors = wordVectors.getVectors
-      .mapValues(vv => Vectors.dense(vv.map(_.toDouble)))
-      .map(identity).toMap // mapValues doesn't return a serializable map (SI-7005)
-    val bVectors = dataset.sparkSession.sparkContext.broadcast(vectors)
-    val d = $(vectorSize)
-    val emptyVec = Vectors.sparse(d, Array.emptyIntArray, Array.emptyDoubleArray)
-    val word2Vec = udf { sentence: Seq[String] =>
+
+    val bcModel = dataset.sparkSession.sparkContext.broadcast(this.wordVectors)
+    val size = $(vectorSize)
+    val emptyVec = Vectors.sparse(size, Array.emptyIntArray, Array.emptyDoubleArray)
+    val transformer = udf { sentence: Seq[String] =>
       if (sentence.isEmpty) {
         emptyVec
       } else {
-        val sum = Vectors.zeros(d)
+        val wordIndices = bcModel.value.wordIndex
+        val wordVectors = bcModel.value.wordVectors
+        val array = Array.ofDim[Double](size)
+        var count = 0
         sentence.foreach { word =>
-          bVectors.value.get(word).foreach { v =>
-            BLAS.axpy(1.0, v, sum)
+          wordIndices.get(word).foreach { index =>
+            val offset = index * size
+            var i = 0
+            while (i < size) { array(i) += wordVectors(offset + i); i += 1 }
           }
+          count += 1
         }
-        BLAS.scal(1.0 / sentence.size, sum)
-        sum
+        val vec = Vectors.dense(array)
+        BLAS.scal(1.0 / count, vec)
+        vec
       }
     }
-    dataset.withColumn($(outputCol), word2Vec(col($(inputCol))),
+
+    dataset.withColumn($(outputCol), transformer(col($(inputCol))),
       outputSchema($(outputCol)).metadata)
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
index eeb583f84ca8b..8a6317a910146 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
@@ -502,22 +502,15 @@ class Word2VecModel private[spark] (
   private val vectorSize = wordVectors.length / numWords
 
   // wordList: Ordered list of words obtained from wordIndex.
-  private val wordList: Array[String] = {
-    val (wl, _) = wordIndex.toSeq.sortBy(_._2).unzip
-    wl.toArray
+  private lazy val wordList: Array[String] = {
+    wordIndex.toSeq.sortBy(_._2).iterator.map(_._1).toArray
   }
 
   // wordVecNorms: Array of length numWords, each value being the Euclidean norm
   //               of the wordVector.
-  private val wordVecNorms: Array[Float] = {
-    val wordVecNorms = new Array[Float](numWords)
-    var i = 0
-    while (i < numWords) {
-      val vec = wordVectors.slice(i * vectorSize, i * vectorSize + vectorSize)
-      wordVecNorms(i) = blas.snrm2(vectorSize, vec, 1)
-      i += 1
-    }
-    wordVecNorms
+  private lazy val wordVecNorms: Array[Float] = {
+    val size = vectorSize
+    Array.tabulate(numWords)(i => blas.snrm2(size, wordVectors, i * size, 1))
   }
 
   @Since("1.5.0")
@@ -538,9 +531,13 @@ class Word2VecModel private[spark] (
   @Since("1.1.0")
   def transform(word: String): Vector = {
     wordIndex.get(word) match {
-      case Some(ind) =>
-        val vec = wordVectors.slice(ind * vectorSize, ind * vectorSize + vectorSize)
-        Vectors.dense(vec.map(_.toDouble))
+      case Some(index) =>
+        val size = vectorSize
+        val offset = index * size
+        val array = Array.ofDim[Double](size)
+        var i = 0
+        while (i < size) { array(i) = wordVectors(offset + i); i += 1 }
+        Vectors.dense(array)
       case None =>
         throw new IllegalStateException(s"$word not in vocabulary")
     }

From 8bcebfa59a64123f014c01bc4fb5de8d9624f8f4 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Mon, 7 Dec 2020 19:09:59 -0800
Subject: [PATCH 0698/1009] [SPARK-33698][BUILD][TESTS] Fix the build error of
 OracleIntegrationSuite for Scala 2.13

### What changes were proposed in this pull request?

This PR fixes a build error of `OracleIntegrationSuite` with Scala 2.13.

### Why are the changes needed?

Build should pass with Scala 2.13.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

I confirmed that the build pass with the following command.
```
$ build/sbt -Pdocker-integration-tests -Pscala-2.13  "docker-integration-tests/test:compile"
```

Closes #30660 from sarutak/fix-docker-integration-tests-for-scala-2.13.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
index 60eb1c055a38e..3937d62afacc2 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
+++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
@@ -401,7 +401,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark
     val values = rows(0)
     assert(values.getDecimal(0).equals(new java.math.BigDecimal("12312321321321312312312312123")))
     assert(values.getInt(1).equals(1))
-    assert(values.getBoolean(2).equals(false))
+    assert(values.getBoolean(2) == false)
   }
 
   test("SPARK-22303: handle BINARY_DOUBLE and BINARY_FLOAT as DoubleType and FloatType") {

From 5aefc49b0f7047f2c928c18b371098314c2f59f0 Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Tue, 8 Dec 2020 03:54:16 +0000
Subject: [PATCH 0699/1009] [SPARK-33664][SQL] Migrate ALTER TABLE ... RENAME
 TO to use UnresolvedTableOrView to resolve identifier

### What changes were proposed in this pull request?

This PR proposes to migrate `ALTER [TABLE|ViEW] ... RENAME TO` to use `UnresolvedTableOrView` to resolve the table/view identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing).

### Why are the changes needed?

To use `UnresolvedTableOrView` for table/view resolution. Note that `AlterTableRenameCommand` internally resolves to a temp view first, so there is no resolution behavior change with this PR.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Updated existing tests.

Closes #30610 from imback82/rename_v2.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/ResolveCatalogs.scala  |  6 ------
 .../spark/sql/catalyst/parser/AstBuilder.scala   | 12 ++++++++----
 .../sql/catalyst/plans/logical/statements.scala  |  8 --------
 .../sql/catalyst/plans/logical/v2Commands.scala  | 10 ++++++----
 .../sql/catalyst/parser/DDLParserSuite.scala     | 10 ++++++++--
 .../analysis/ResolveSessionCatalog.scala         |  3 +--
 .../datasources/v2/DataSourceV2Strategy.scala    |  8 ++++++--
 .../sql/connector/DataSourceV2SQLSuite.scala     | 13 ++++++++++---
 .../v2/jdbc/JDBCTableCatalogSuite.scala          | 16 +++++++---------
 9 files changed, 46 insertions(+), 40 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
index a90de697bc084..6d89414ba106d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
@@ -133,12 +133,6 @@ class ResolveCatalogs(val catalogManager: CatalogManager)
         s"Can not specify catalog `${catalog.name}` for view ${tbl.quoted} " +
           s"because view support in catalog has not been implemented yet")
 
-    case RenameTableStatement(NonSessionCatalogAndTable(catalog, oldName), newNameParts, isView) =>
-      if (isView) {
-        throw new AnalysisException("Renaming view is not supported in v2 catalogs.")
-      }
-      RenameTable(catalog.asTableCatalog, oldName.asIdentifier, newNameParts.asIdentifier)
-
     case c @ CreateTableStatement(
          NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _, _) =>
       assertNoNullTypeInSchema(c.tableSchema)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index a22383c62bf74..42c67ac963cbe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3834,7 +3834,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
   }
 
   /**
-   * Create a [[RenameTableStatement]] command.
+   * Create a [[RenameTable]] command.
    *
    * For example:
    * {{{
@@ -3843,10 +3843,14 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    * }}}
    */
   override def visitRenameTable(ctx: RenameTableContext): LogicalPlan = withOrigin(ctx) {
-    RenameTableStatement(
-      visitMultipartIdentifier(ctx.from),
+    val isView = ctx.VIEW != null
+    val relationStr = if (isView) "VIEW" else "TABLE"
+    RenameTable(
+      UnresolvedTableOrView(
+        visitMultipartIdentifier(ctx.from),
+        s"ALTER $relationStr ... RENAME TO"),
       visitMultipartIdentifier(ctx.to),
-      ctx.VIEW != null)
+      isView)
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
index 1763547792e35..8f0889bbcebd8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
@@ -338,14 +338,6 @@ case class AlterViewAsStatement(
     originalText: String,
     query: LogicalPlan) extends ParsedStatement
 
-/**
- * ALTER TABLE ... RENAME TO command, as parsed from SQL.
- */
-case class RenameTableStatement(
-    oldName: Seq[String],
-    newName: Seq[String],
-    isView: Boolean) extends ParsedStatement
-
 /**
  * A DROP VIEW statement, as parsed from SQL.
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index 67056470418fe..6f35364cce131 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -456,12 +456,14 @@ case class AlterTable(
 }
 
 /**
- * The logical plan of the ALTER TABLE RENAME command.
+ * The logical plan of the ALTER [TABLE|VIEW] ... RENAME TO command.
  */
 case class RenameTable(
-    catalog: TableCatalog,
-    oldIdent: Identifier,
-    newIdent: Identifier) extends Command
+    child: LogicalPlan,
+    newName: Seq[String],
+    isView: Boolean) extends Command {
+  override def children: Seq[LogicalPlan] = child :: Nil
+}
 
 /**
  * The logical plan of the SHOW TABLE command.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index e98ec6a667a73..f925be8617b47 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -1103,10 +1103,16 @@ class DDLParserSuite extends AnalysisTest {
   test("alter table/view: rename table/view") {
     comparePlans(
       parsePlan("ALTER TABLE a.b.c RENAME TO x.y.z"),
-      RenameTableStatement(Seq("a", "b", "c"), Seq("x", "y", "z"), isView = false))
+      RenameTable(
+        UnresolvedTableOrView(Seq("a", "b", "c"), "ALTER TABLE ... RENAME TO"),
+        Seq("x", "y", "z"),
+        isView = false))
     comparePlans(
       parsePlan("ALTER VIEW a.b.c RENAME TO x.y.z"),
-      RenameTableStatement(Seq("a", "b", "c"), Seq("x", "y", "z"), isView = true))
+      RenameTable(
+        UnresolvedTableOrView(Seq("a", "b", "c"), "ALTER VIEW ... RENAME TO"),
+        Seq("x", "y", "z"),
+        isView = true))
   }
 
   test("describe table column") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index a87ed4b6275d8..7e5f39e398a6b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -237,8 +237,7 @@ class ResolveSessionCatalog(
       }
       AlterDatabaseSetLocationCommand(ns.head, location)
 
-    // v1 RENAME TABLE supports temp view.
-    case RenameTableStatement(TempViewOrV1Table(oldName), newName, isView) =>
+    case RenameTable(ResolvedV1TableOrViewIdentifier(oldName), newName, isView) =>
       AlterTableRenameCommand(oldName.asTableIdentifier, newName.asTableIdentifier, isView)
 
     // Use v1 command to describe (temp) view, as v2 catalog doesn't support view yet.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 5289d359f7809..075d2a43dce4e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -257,8 +257,12 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
     case AlterTable(catalog, ident, _, changes) =>
       AlterTableExec(catalog, ident, changes) :: Nil
 
-    case RenameTable(catalog, oldIdent, newIdent) =>
-      RenameTableExec(catalog, oldIdent, newIdent) :: Nil
+    case RenameTable(ResolvedTable(catalog, oldIdent, _), newIdent, isView) =>
+      if (isView) {
+        throw new AnalysisException(
+          "Cannot rename a table with ALTER VIEW. Please use ALTER TABLE instead.")
+      }
+      RenameTableExec(catalog, oldIdent, newIdent.asIdentifier) :: Nil
 
     case AlterNamespaceSetProperties(ResolvedNamespace(catalog, ns), properties) =>
       AlterNamespaceSetPropertiesExec(catalog.asNamespaceCatalog, ns, properties) :: Nil
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 6838a7644a29f..2673577aecf36 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -1975,10 +1975,16 @@ class DataSourceV2SQLSuite
 
   test("AlterTable: rename table basic test") {
     withTable("testcat.ns1.new") {
-      sql(s"CREATE TABLE testcat.ns1.ns2.old USING foo AS SELECT id, data FROM source")
+      sql("CREATE TABLE testcat.ns1.ns2.old USING foo AS SELECT id, data FROM source")
       checkAnswer(sql("SHOW TABLES FROM testcat.ns1.ns2"), Seq(Row("ns1.ns2", "old")))
 
-      sql(s"ALTER TABLE testcat.ns1.ns2.old RENAME TO ns1.new")
+      val e = intercept[AnalysisException] {
+        sql("ALTER VIEW testcat.ns1.ns2.old RENAME TO ns1.new")
+      }
+      assert(e.getMessage.contains(
+        "Cannot rename a table with ALTER VIEW. Please use ALTER TABLE instead"))
+
+      sql("ALTER TABLE testcat.ns1.ns2.old RENAME TO ns1.new")
       checkAnswer(sql("SHOW TABLES FROM testcat.ns1.ns2"), Seq.empty)
       checkAnswer(sql("SHOW TABLES FROM testcat.ns1"), Seq(Row("ns1", "new")))
     }
@@ -1988,7 +1994,8 @@ class DataSourceV2SQLSuite
     val e = intercept[AnalysisException] {
       sql(s"ALTER VIEW testcat.ns.tbl RENAME TO ns.view")
     }
-    assert(e.getMessage.contains("Renaming view is not supported in v2 catalogs"))
+    assert(e.getMessage.contains(
+      "Table or view not found for 'ALTER VIEW ... RENAME TO': testcat.ns.tbl"))
   }
 
   test("ANALYZE TABLE") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
index 9e9df7db1e1c6..e764f71867426 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
@@ -23,7 +23,7 @@ import org.apache.log4j.Level
 
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
-import org.apache.spark.sql.catalyst.analysis.{NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
+import org.apache.spark.sql.catalyst.analysis.{NoSuchNamespaceException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
@@ -106,18 +106,16 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
         Seq(Row("test", "dst_table"), Row("test", "people")))
     }
     // Rename not existing table or namespace
-    val exp1 = intercept[NoSuchTableException] {
-      sql(s"ALTER TABLE h2.test.not_existing_table RENAME TO test.dst_table")
+    val exp1 = intercept[AnalysisException] {
+      sql("ALTER TABLE h2.test.not_existing_table RENAME TO test.dst_table")
     }
     assert(exp1.getMessage.contains(
-      "Failed table renaming from test.not_existing_table to test.dst_table"))
-    assert(exp1.cause.get.getMessage.contains("Table \"not_existing_table\" not found"))
-    val exp2 = intercept[NoSuchNamespaceException] {
-      sql(s"ALTER TABLE h2.bad_test.not_existing_table RENAME TO test.dst_table")
+      "Table or view not found for 'ALTER TABLE ... RENAME TO': h2.test.not_existing_table"))
+    val exp2 = intercept[AnalysisException] {
+      sql("ALTER TABLE h2.bad_test.not_existing_table RENAME TO test.dst_table")
     }
     assert(exp2.getMessage.contains(
-      "Failed table renaming from bad_test.not_existing_table to test.dst_table"))
-    assert(exp2.cause.get.getMessage.contains("Schema \"bad_test\" not found"))
+      "Table or view not found for 'ALTER TABLE ... RENAME TO': h2.bad_test.not_existing_table"))
     // Rename to an existing table
     withTable("h2.test.dst_table") {
       withConnection { conn =>

From 3a6546d3858e7c184f36cb6c4fd454f2142460f0 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Tue, 8 Dec 2020 14:11:39 +0900
Subject: [PATCH 0700/1009] [MINOR][INFRA] Add -Pdocker-integration-tests to
 GitHub Action Scala 2.13 build job

### What changes were proposed in this pull request?

This aims to add `-Pdocker-integration-tests` at GitHub Action job for Scala 2.13 compilation.

### Why are the changes needed?

We fixed Scala 2.13 compilation of this module at https://github.com/apache/spark/pull/30660 . This PR will prevent accidental regression at that module.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass GitHub Action Scala 2.13 job.

Closes #30661 from dongjoon-hyun/SPARK-DOCKER-IT.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Kousuke Saruta <sarutak@oss.nttdata.com>
---
 .github/workflows/build_and_test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 72b2caf907151..e40d6362fd23f 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -414,7 +414,7 @@ jobs:
     - name: Build with SBT
       run: |
         ./dev/change-scala-version.sh 2.13
-        ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pscala-2.13 compile test:compile
+        ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pscala-2.13 compile test:compile
 
   hadoop-2:
     name: Hadoop 2 build with SBT

From 031c5ef280e0cba8c4718a6457a44b6cccb17f46 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 7 Dec 2020 23:10:35 -0800
Subject: [PATCH 0701/1009] [SPARK-33679][SQL] Enable
 spark.sql.adaptive.enabled by default

### What changes were proposed in this pull request?

This PR aims to enable `spark.sql.adaptive.enabled` by default for Apache Spark **3.2.0**.

### Why are the changes needed?

By switching the default for Apache Spark 3.2, the whole community can focus more on the stabilizing this feature in the various situation more seriously.

### Does this PR introduce _any_ user-facing change?

Yes, but this is an improvement and it's supposed to have no bugs.

### How was this patch tested?

Pass the CIs.

Closes #30628 from dongjoon-hyun/SPARK-33679.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 docs/sql-migration-guide.md                                   | 4 ++++
 .../main/scala/org/apache/spark/sql/internal/SQLConf.scala    | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 2c86e7a932637..65a769da70aea 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -22,6 +22,10 @@ license: |
 * Table of contents
 {:toc}
 
+## Upgrading from Spark SQL 3.1 to 3.2
+
+  - In Spark 3.2, `spark.sql.adaptive.enabled` is enabled by default. To restore the behavior before Spark 3.2, you can set `spark.sql.adaptive.enabled` to `false`.
+
 ## Upgrading from Spark SQL 3.0 to 3.1
 
   - In Spark 3.1, statistical aggregation function includes `std`, `stddev`, `stddev_samp`, `variance`, `var_samp`, `skewness`, `kurtosis`, `covar_samp`, `corr` will return `NULL` instead of `Double.NaN` when `DivideByZero` occurs during expression evaluation, for example, when `stddev_samp` applied on a single element set. In Spark version 3.0 and earlier, it will return `Double.NaN` in such case. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.statisticalAggregate` to `true`.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index bc62213bdb740..11fe6c7894f76 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -404,7 +404,7 @@ object SQLConf {
       "middle of query execution, based on accurate runtime statistics.")
     .version("1.6.0")
     .booleanConf
-    .createWithDefault(false)
+    .createWithDefault(true)
 
   val ADAPTIVE_EXECUTION_FORCE_APPLY = buildConf("spark.sql.adaptive.forceApply")
     .internal()

From 99613cd5815b2de12274027dee0c0a6c0c57bd95 Mon Sep 17 00:00:00 2001
From: luluorta <luluorta@gmail.com>
Date: Tue, 8 Dec 2020 20:45:25 +0900
Subject: [PATCH 0702/1009] [SPARK-33677][SQL] Skip LikeSimplification rule if
 pattern contains any escapeChar

### What changes were proposed in this pull request?
`LikeSimplification` rule does not work correctly for many cases that have patterns containing escape characters, for example:

`SELECT s LIKE 'm%aca' ESCAPE '%' FROM t`
`SELECT s LIKE 'maacaa' ESCAPE 'a' FROM t`

For simpilicy, this PR makes this rule just be skipped if `pattern` contains any `escapeChar`.

### Why are the changes needed?
Result corrupt.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Added Unit test.

Closes #30625 from luluorta/SPARK-33677.

Authored-by: luluorta <luluorta@gmail.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../sql/catalyst/optimizer/expressions.scala  | 18 ++++---
 .../optimizer/LikeSimplificationSuite.scala   | 48 +++++++++++++++++++
 .../org/apache/spark/sql/SQLQuerySuite.scala  | 14 ++++++
 3 files changed, 74 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 4cdaf10dd3c60..7666c4a53e5dd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -543,27 +543,33 @@ object LikeSimplification extends Rule[LogicalPlan] {
   private val equalTo = "([^_%]*)".r
 
   def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
-    case Like(input, Literal(pattern, StringType), escapeChar) =>
+    case l @ Like(input, Literal(pattern, StringType), escapeChar) =>
       if (pattern == null) {
         // If pattern is null, return null value directly, since "col like null" == null.
         Literal(null, BooleanType)
       } else {
-        val escapeStr = String.valueOf(escapeChar)
         pattern.toString match {
-          case startsWith(prefix) if !prefix.endsWith(escapeStr) =>
+          // There are three different situations when pattern containing escapeChar:
+          // 1. pattern contains invalid escape sequence, e.g. 'm\aca'
+          // 2. pattern contains escaped wildcard character, e.g. 'ma\%ca'
+          // 3. pattern contains escaped escape character, e.g. 'ma\\ca'
+          // Although there are patterns can be optimized if we handle the escape first, we just
+          // skip this rule if pattern contains any escapeChar for simplicity.
+          case p if p.contains(escapeChar) => l
+          case startsWith(prefix) =>
             StartsWith(input, Literal(prefix))
           case endsWith(postfix) =>
             EndsWith(input, Literal(postfix))
           // 'a%a' pattern is basically same with 'a%' && '%a'.
           // However, the additional `Length` condition is required to prevent 'a' match 'a%a'.
-          case startsAndEndsWith(prefix, postfix) if !prefix.endsWith(escapeStr) =>
+          case startsAndEndsWith(prefix, postfix) =>
             And(GreaterThanOrEqual(Length(input), Literal(prefix.length + postfix.length)),
               And(StartsWith(input, Literal(prefix)), EndsWith(input, Literal(postfix))))
-          case contains(infix) if !infix.endsWith(escapeStr) =>
+          case contains(infix) =>
             Contains(input, Literal(infix))
           case equalTo(str) =>
             EqualTo(input, Literal(str))
-          case _ => Like(input, Literal.create(pattern, StringType), escapeChar)
+          case _ => l
         }
       }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala
index 436f62e4225c8..1812dce0da426 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala
@@ -116,4 +116,52 @@ class LikeSimplificationSuite extends PlanTest {
     val optimized2 = Optimize.execute(originalQuery2.analyze)
     comparePlans(optimized2, originalQuery2.analyze)
   }
+
+  test("SPARK-33677: LikeSimplification should be skipped if pattern contains any escapeChar") {
+    val originalQuery1 =
+      testRelation
+        .where(('a like "abc%") || ('a like "\\abc%"))
+    val optimized1 = Optimize.execute(originalQuery1.analyze)
+    val correctAnswer1 = testRelation
+      .where(StartsWith('a, "abc") || ('a like "\\abc%"))
+      .analyze
+    comparePlans(optimized1, correctAnswer1)
+
+    val originalQuery2 =
+      testRelation
+        .where(('a like "%xyz") || ('a like "%xyz\\"))
+    val optimized2 = Optimize.execute(originalQuery2.analyze)
+    val correctAnswer2 = testRelation
+      .where(EndsWith('a, "xyz") || ('a like "%xyz\\"))
+      .analyze
+    comparePlans(optimized2, correctAnswer2)
+
+    val originalQuery3 =
+      testRelation
+        .where(('a like ("@bc%def", '@')) || ('a like "abc%def"))
+    val optimized3 = Optimize.execute(originalQuery3.analyze)
+    val correctAnswer3 = testRelation
+      .where(('a like ("@bc%def", '@')) ||
+        (Length('a) >= 6 && (StartsWith('a, "abc") && EndsWith('a, "def"))))
+      .analyze
+    comparePlans(optimized3, correctAnswer3)
+
+    val originalQuery4 =
+      testRelation
+        .where(('a like "%mn%") || ('a like ("%mn%", '%')))
+    val optimized4 = Optimize.execute(originalQuery4.analyze)
+    val correctAnswer4 = testRelation
+      .where(Contains('a, "mn") || ('a like ("%mn%", '%')))
+      .analyze
+    comparePlans(optimized4, correctAnswer4)
+
+    val originalQuery5 =
+      testRelation
+        .where(('a like "abc") || ('a like ("abbc", 'b')))
+    val optimized5 = Optimize.execute(originalQuery5.analyze)
+    val correctAnswer5 = testRelation
+      .where(('a === "abc") || ('a like ("abbc", 'b')))
+      .analyze
+    comparePlans(optimized5, correctAnswer5)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 727482e551a8b..2eeb729ece3fb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -3718,6 +3718,20 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       }
     }
   }
+
+  test("SPARK-33677: LikeSimplification should be skipped if pattern contains any escapeChar") {
+    withTempView("df") {
+      Seq("m@ca").toDF("s").createOrReplaceTempView("df")
+
+      val e = intercept[AnalysisException] {
+        sql("SELECT s LIKE 'm%@ca' ESCAPE '%' FROM df").collect()
+      }
+      assert(e.message.contains("the pattern 'm%@ca' is invalid, " +
+        "the escape character is not allowed to precede '@'"))
+
+      checkAnswer(sql("SELECT s LIKE 'm@@ca' ESCAPE '@' FROM df"), Row(true))
+    }
+  }
 }
 
 case class Foo(bar: Option[String])

From 2b30dde24972f7123b7ee14583fdce72e9ee955f Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Tue, 8 Dec 2020 12:08:22 +0000
Subject: [PATCH 0703/1009] [SPARK-33688][SQL] Migrate SHOW TABLE EXTENDED to
 new resolution framework

### What changes were proposed in this pull request?
1. Remove old statement `ShowTableStatement`
2. Introduce new command `ShowTableExtended` for  `SHOW TABLE EXTENDED`.

This PR is the first step of new V2 implementation of `SHOW TABLE EXTENDED`, see SPARK-33393.

### Why are the changes needed?
This is a part of effort to make the relation lookup behavior consistent: SPARK-29900.

### Does this PR introduce _any_ user-facing change?
The changes should not affect V1 tables. For V2, Spark outputs the error:
```
SHOW TABLE EXTENDED is not supported for v2 tables.
```

### How was this patch tested?
By running `SHOW TABLE EXTENDED` tests:
```
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *ShowTablesSuite"
```

Closes #30645 from MaxGekk/show-table-extended-statement.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/parser/SqlBase.g4      |  2 +-
 .../sql/catalyst/analysis/Analyzer.scala      |  2 ++
 .../sql/catalyst/parser/AstBuilder.scala      | 15 +++++++----
 .../catalyst/plans/logical/statements.scala   |  9 -------
 .../catalyst/plans/logical/v2Commands.scala   | 20 ++++++++++++--
 .../analysis/ResolveSessionCatalog.scala      | 20 +++++++++-----
 .../datasources/v2/DataSourceV2Strategy.scala |  3 +++
 .../command/ShowTablesParserSuite.scala       | 27 ++++++++++++-------
 .../command/v2/ShowTablesSuite.scala          |  7 +++--
 9 files changed, 67 insertions(+), 38 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index a23994f456f75..b08451d8a6cfa 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -198,7 +198,7 @@ statement
     | SHOW TABLES ((FROM | IN) multipartIdentifier)?
         (LIKE? pattern=STRING)?                                        #showTables
     | SHOW TABLE EXTENDED ((FROM | IN) ns=multipartIdentifier)?
-        LIKE pattern=STRING partitionSpec?                             #showTable
+        LIKE pattern=STRING partitionSpec?                             #showTableExtended
     | SHOW TBLPROPERTIES table=multipartIdentifier
         ('(' key=tablePropertyKey ')')?                                #showTblProperties
     | SHOW COLUMNS (FROM | IN) table=multipartIdentifier
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 6541961f5613e..680ec982b2112 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -847,6 +847,8 @@ class Analyzer(override val catalogManager: CatalogManager)
     def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
       case s @ ShowTables(UnresolvedNamespace(Seq()), _) =>
         s.copy(namespace = ResolvedNamespace(currentCatalog, catalogManager.currentNamespace))
+      case s @ ShowTableExtended(UnresolvedNamespace(Seq()), _, _) =>
+        s.copy(namespace = ResolvedNamespace(currentCatalog, catalogManager.currentNamespace))
       case s @ ShowViews(UnresolvedNamespace(Seq()), _) =>
         s.copy(namespace = ResolvedNamespace(currentCatalog, catalogManager.currentNamespace))
       case UnresolvedNamespace(Seq()) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 42c67ac963cbe..b6bd3b77fc874 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3190,13 +3190,18 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
   }
 
   /**
-   * Create a [[ShowTableStatement]] command.
+   * Create a [[ShowTableExtended]] command.
    */
-  override def visitShowTable(ctx: ShowTableContext): LogicalPlan = withOrigin(ctx) {
-    ShowTableStatement(
-      Option(ctx.ns).map(visitMultipartIdentifier),
+  override def visitShowTableExtended(
+      ctx: ShowTableExtendedContext): LogicalPlan = withOrigin(ctx) {
+    val multiPart = Option(ctx.multipartIdentifier).map(visitMultipartIdentifier)
+    val partitionKeys = Option(ctx.partitionSpec).map { specCtx =>
+      UnresolvedPartitionSpec(visitNonOptionalPartitionSpec(specCtx), None)
+    }
+    ShowTableExtended(
+      UnresolvedNamespace(multiPart.getOrElse(Seq.empty[String])),
       string(ctx.pattern),
-      Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec))
+      partitionKeys)
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
index 8f0889bbcebd8..402ae657d1709 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
@@ -377,15 +377,6 @@ case class InsertIntoStatement(
   override def children: Seq[LogicalPlan] = query :: Nil
 }
 
-/**
- * A SHOW TABLE EXTENDED statement, as parsed from SQL.
- */
-case class ShowTableStatement(
-    namespace: Option[Seq[String]],
-    pattern: String,
-    partitionSpec: Option[TablePartitionSpec])
-  extends ParsedStatement
-
 /**
  * A CREATE NAMESPACE statement, as parsed from SQL.
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index 6f35364cce131..72ba9cf6db0e2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.connector.catalog._
 import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, ColumnChange}
 import org.apache.spark.sql.connector.expressions.Transform
-import org.apache.spark.sql.types.{DataType, MetadataBuilder, StringType, StructType}
+import org.apache.spark.sql.types.{BooleanType, DataType, MetadataBuilder, StringType, StructType}
 
 /**
  * Base trait for DataSourceV2 write commands
@@ -466,7 +466,7 @@ case class RenameTable(
 }
 
 /**
- * The logical plan of the SHOW TABLE command.
+ * The logical plan of the SHOW TABLES command.
  */
 case class ShowTables(
     namespace: LogicalPlan,
@@ -478,6 +478,22 @@ case class ShowTables(
     AttributeReference("tableName", StringType, nullable = false)())
 }
 
+/**
+ * The logical plan of the SHOW TABLE EXTENDED command.
+ */
+case class ShowTableExtended(
+    namespace: LogicalPlan,
+    pattern: String,
+    partitionSpec: Option[PartitionSpec]) extends Command {
+  override def children: Seq[LogicalPlan] = namespace :: Nil
+
+  override val output: Seq[Attribute] = Seq(
+    AttributeReference("namespace", StringType, nullable = false)(),
+    AttributeReference("tableName", StringType, nullable = false)(),
+    AttributeReference("isTemporary", BooleanType, nullable = false)(),
+    AttributeReference("information", StringType, nullable = false)())
+}
+
 /**
  * The logical plan of the SHOW VIEWS command.
  *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 7e5f39e398a6b..4c7e6fefd9759 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -383,14 +383,20 @@ class ResolveSessionCatalog(
       }
       ShowTablesCommand(Some(ns.head), pattern)
 
-    case ShowTableStatement(ns, pattern, partitionsSpec) =>
-      val db = ns match {
-        case Some(ns) if ns.length != 1 =>
-          throw new AnalysisException(
-            s"The database name is not valid: ${ns.quoted}")
-        case _ => ns.map(_.head)
+    case ShowTableExtended(
+        SessionCatalogAndNamespace(_, ns),
+        pattern,
+        partitionSpec @ (None | Some(UnresolvedPartitionSpec(_, _)))) =>
+      assert(ns.nonEmpty)
+      if (ns.length != 1) {
+        throw new AnalysisException(
+          s"The database name is not valid: ${ns.quoted}")
       }
-      ShowTablesCommand(db, Some(pattern), true, partitionsSpec)
+      ShowTablesCommand(
+        databaseName = Some(ns.head),
+        tableIdentifierPattern = Some(pattern),
+        isExtended = true,
+        partitionSpec.map(_.asInstanceOf[UnresolvedPartitionSpec].spec))
 
     // ANALYZE TABLE works on permanent views if the views are cached.
     case AnalyzeTable(ResolvedV1TableOrViewIdentifier(ident), partitionSpec, noScan) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 075d2a43dce4e..5f67b39b95c35 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -295,6 +295,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
     case r @ ShowTables(ResolvedNamespace(catalog, ns), pattern) =>
       ShowTablesExec(r.output, catalog.asTableCatalog, ns, pattern) :: Nil
 
+    case _: ShowTableExtended =>
+      throw new AnalysisException("SHOW TABLE EXTENDED is not supported for v2 tables.")
+
     case SetCatalogAndNamespace(catalogManager, catalogName, ns) =>
       SetCatalogAndNamespaceExec(catalogManager, catalogName, ns) :: Nil
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesParserSuite.scala
index 16f3dea8d75ef..d68e1233f7ab2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesParserSuite.scala
@@ -17,9 +17,9 @@
 
 package org.apache.spark.sql.execution.command
 
-import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedNamespace}
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedNamespace, UnresolvedPartitionSpec}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
-import org.apache.spark.sql.catalyst.plans.logical.{ShowTables, ShowTableStatement}
+import org.apache.spark.sql.catalyst.plans.logical.{ShowTableExtended, ShowTables}
 import org.apache.spark.sql.test.SharedSparkSession
 
 class ShowTablesParserSuite extends AnalysisTest with SharedSparkSession {
@@ -52,25 +52,32 @@ class ShowTablesParserSuite extends AnalysisTest with SharedSparkSession {
   test("show table extended") {
     comparePlans(
       parsePlan("SHOW TABLE EXTENDED LIKE '*test*'"),
-      ShowTableStatement(None, "*test*", None))
+      ShowTableExtended(UnresolvedNamespace(Seq.empty[String]), "*test*", None))
     comparePlans(
       parsePlan(s"SHOW TABLE EXTENDED FROM $catalog.ns1.ns2 LIKE '*test*'"),
-      ShowTableStatement(Some(Seq(catalog, "ns1", "ns2")), "*test*", None))
+      ShowTableExtended(UnresolvedNamespace(Seq(catalog, "ns1", "ns2")), "*test*", None))
     comparePlans(
       parsePlan(s"SHOW TABLE EXTENDED IN $catalog.ns1.ns2 LIKE '*test*'"),
-      ShowTableStatement(Some(Seq(catalog, "ns1", "ns2")), "*test*", None))
+      ShowTableExtended(UnresolvedNamespace(Seq(catalog, "ns1", "ns2")), "*test*", None))
     comparePlans(
       parsePlan("SHOW TABLE EXTENDED LIKE '*test*' PARTITION(ds='2008-04-09', hr=11)"),
-      ShowTableStatement(None, "*test*", Some(Map("ds" -> "2008-04-09", "hr" -> "11"))))
+      ShowTableExtended(
+        UnresolvedNamespace(Seq.empty[String]),
+        "*test*",
+        Some(UnresolvedPartitionSpec(Map("ds" -> "2008-04-09", "hr" -> "11")))))
     comparePlans(
       parsePlan(s"SHOW TABLE EXTENDED FROM $catalog.ns1.ns2 LIKE '*test*' " +
         "PARTITION(ds='2008-04-09')"),
-      ShowTableStatement(Some(Seq(catalog, "ns1", "ns2")), "*test*",
-        Some(Map("ds" -> "2008-04-09"))))
+      ShowTableExtended(
+        UnresolvedNamespace(Seq(catalog, "ns1", "ns2")),
+        "*test*",
+        Some(UnresolvedPartitionSpec(Map("ds" -> "2008-04-09")))))
     comparePlans(
       parsePlan(s"SHOW TABLE EXTENDED IN $catalog.ns1.ns2 LIKE '*test*' " +
         "PARTITION(ds='2008-04-09')"),
-      ShowTableStatement(Some(Seq(catalog, "ns1", "ns2")), "*test*",
-        Some(Map("ds" -> "2008-04-09"))))
+      ShowTableExtended(
+        UnresolvedNamespace(Seq(catalog, "ns1", "ns2")),
+        "*test*",
+        Some(UnresolvedPartitionSpec(Map("ds" -> "2008-04-09")))))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala
index aff1729a000b6..370c8358e64da 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.command.v2
 
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.{AnalysisException, Row}
-import org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException
 import org.apache.spark.sql.connector.InMemoryTableCatalog
 import org.apache.spark.sql.execution.command
 import org.apache.spark.sql.test.SharedSparkSession
@@ -74,7 +73,7 @@ class ShowTablesSuite extends command.ShowTablesSuiteBase with SharedSparkSessio
       val e = intercept[AnalysisException] {
         sql(sqlCommand)
       }
-      assert(e.message.contains(s"The database name is not valid: ${namespace}"))
+      assert(e.message.contains(s"SHOW TABLE EXTENDED is not supported for v2 tables"))
     }
 
     val namespace = s"$catalog.ns1.ns2"
@@ -101,10 +100,10 @@ class ShowTablesSuite extends command.ShowTablesSuiteBase with SharedSparkSessio
     val table = "people"
     withTable(s"$catalog.$table") {
       sql(s"CREATE TABLE $catalog.$table (name STRING, id INT) $defaultUsing")
-      val errMsg = intercept[NoSuchDatabaseException] {
+      val errMsg = intercept[AnalysisException] {
         sql(s"SHOW TABLE EXTENDED FROM $catalog LIKE '*$table*'").collect()
       }.getMessage
-      assert(errMsg.contains(s"Database '$catalog' not found"))
+      assert(errMsg.contains("SHOW TABLE EXTENDED is not supported for v2 tables"))
     }
   }
 }

From c05ee06f5b711dd261dc94a01b4ba4ffccdf2ea0 Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Tue, 8 Dec 2020 14:07:58 +0000
Subject: [PATCH 0704/1009] [SPARK-33685][SQL] Migrate DROP VIEW command to use
 UnresolvedView to resolve the identifier

### What changes were proposed in this pull request?

This PR introduces `UnresolvedView` in the resolution framework to resolve the identifier.

This PR then migrates `DROP VIEW` to use `UnresolvedView` to resolve the table/view identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing).

### Why are the changes needed?

To use `UnresolvedView` for view resolution. Note that there is no resolution behavior change with this PR.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Updated existing tests.

Closes #30636 from imback82/drop_view_v2.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 17 ++++++++--
 .../sql/catalyst/analysis/CheckAnalysis.scala | 13 ++++++--
 .../catalyst/analysis/ResolveCatalogs.scala   |  5 ---
 ...cala => ResolveCommandsWithIfExists.scala} | 14 +++++----
 .../catalyst/analysis/v2ResolutionPlans.scala | 13 ++++++++
 .../sql/catalyst/parser/AstBuilder.scala      |  9 ++++--
 .../catalyst/plans/logical/statements.scala   |  7 -----
 .../catalyst/plans/logical/v2Commands.scala   | 15 +++++++--
 .../sql/catalyst/parser/DDLParserSuite.scala  | 17 ++++++----
 .../analysis/ResolveSessionCatalog.scala      |  5 ++-
 .../datasources/v2/DataSourceV2Strategy.scala |  2 +-
 .../sql/connector/DataSourceV2SQLSuite.scala  | 14 ++++-----
 .../sql/execution/command/DDLSuite.scala      |  5 ++-
 .../command/PlanResolutionSuite.scala         | 31 ++++++++++++++++---
 .../sql/hive/execution/HiveDDLSuite.scala     |  3 +-
 15 files changed, 118 insertions(+), 52 deletions(-)
 rename sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/{ResolveNoopDropTable.scala => ResolveCommandsWithIfExists.scala} (63%)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 680ec982b2112..6b0cf4be7de74 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -150,7 +150,7 @@ object AnalysisContext {
  * [[UnresolvedRelation]]s into fully typed objects using information in a [[SessionCatalog]].
  */
 class Analyzer(override val catalogManager: CatalogManager)
-  extends RuleExecutor[LogicalPlan] with CheckAnalysis with LookupCatalog with SQLConfHelper {
+  extends RuleExecutor[LogicalPlan] with CheckAnalysis with SQLConfHelper {
 
   private val v1SessionCatalog: SessionCatalog = catalogManager.v1SessionCatalog
 
@@ -277,7 +277,7 @@ class Analyzer(override val catalogManager: CatalogManager)
       TypeCoercion.typeCoercionRules ++
       extendedResolutionRules : _*),
     Batch("Post-Hoc Resolution", Once,
-      Seq(ResolveNoopDropTable) ++
+      Seq(ResolveCommandsWithIfExists) ++
       postHocResolutionRules: _*),
     Batch("Normalize Alter Table", Once, ResolveAlterTableChanges),
     Batch("Remove Unresolved Hints", Once,
@@ -889,6 +889,11 @@ class Analyzer(override val catalogManager: CatalogManager)
           u.failAnalysis(s"${ident.quoted} is a temp view. '$cmd' expects a table")
         }
         u
+      case u @ UnresolvedView(ident, _, _) =>
+        lookupTempView(ident).map { _ =>
+          ResolvedView(ident.asIdentifier, isTemp = true)
+        }
+        .getOrElse(u)
       case u @ UnresolvedTableOrView(ident, cmd, allowTempView) =>
         lookupTempView(ident)
           .map { _ =>
@@ -1113,6 +1118,14 @@ class Analyzer(override val catalogManager: CatalogManager)
           case table => table
         }.getOrElse(u)
 
+      case u @ UnresolvedView(identifier, cmd, relationTypeMismatchHint) =>
+        lookupTableOrView(identifier).map {
+          case v: ResolvedView => v
+          case _ =>
+            u.failAnalysis(s"${identifier.quoted} is a table. '$cmd' expects a view." +
+              relationTypeMismatchHint.map(" " + _).getOrElse(""))
+        }.getOrElse(u)
+
       case u @ UnresolvedTableOrView(identifier, _, _) =>
         lookupTableOrView(identifier).getOrElse(u)
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 9f5eefc744135..39cdea2bd4d2a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.optimizer.BooleanSimplification
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, TypeUtils}
-import org.apache.spark.sql.connector.catalog.{SupportsAtomicPartitionManagement, SupportsPartitionManagement, Table}
+import org.apache.spark.sql.connector.catalog.{LookupCatalog, SupportsAtomicPartitionManagement, SupportsPartitionManagement, Table}
 import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, After, ColumnPosition, DeleteColumn, RenameColumn, UpdateColumnComment, UpdateColumnNullability, UpdateColumnPosition, UpdateColumnType}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -34,7 +34,7 @@ import org.apache.spark.sql.types._
 /**
  * Throws user facing errors when passed invalid queries that fail to analyze.
  */
-trait CheckAnalysis extends PredicateHelper {
+trait CheckAnalysis extends PredicateHelper with LookupCatalog {
 
   protected def isView(nameParts: Seq[String]): Boolean
 
@@ -104,6 +104,15 @@ trait CheckAnalysis extends PredicateHelper {
       case u: UnresolvedTable =>
         u.failAnalysis(s"Table not found for '${u.commandName}': ${u.multipartIdentifier.quoted}")
 
+      case u @ UnresolvedView(NonSessionCatalogAndIdentifier(catalog, ident), cmd, _) =>
+        u.failAnalysis(
+          s"Cannot specify catalog `${catalog.name}` for view ${ident.quoted} " +
+            "because view support in v2 catalog has not been implemented yet. " +
+            s"$cmd expects a view.")
+
+      case u: UnresolvedView =>
+        u.failAnalysis(s"View not found for '${u.commandName}': ${u.multipartIdentifier.quoted}")
+
       case u: UnresolvedTableOrView =>
         val viewStr = if (u.allowTempView) "view" else "permanent view"
         u.failAnalysis(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
index 6d89414ba106d..b4dfee1330036 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
@@ -187,11 +187,6 @@ class ResolveCatalogs(val catalogManager: CatalogManager)
         writeOptions = c.writeOptions,
         orCreate = c.orCreate)
 
-    case DropViewStatement(NonSessionCatalogAndTable(catalog, viewName), _) =>
-      throw new AnalysisException(
-        s"Can not specify catalog `${catalog.name}` for view ${viewName.quoted} " +
-          s"because view support in catalog has not been implemented yet")
-
     case c @ CreateNamespaceStatement(CatalogAndNamespace(catalog, ns), _, _)
         if !isSessionCatalog(catalog) =>
       CreateNamespace(catalog.asNamespaceCatalog, ns, c.ifNotExists, c.properties)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveNoopDropTable.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCommandsWithIfExists.scala
similarity index 63%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveNoopDropTable.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCommandsWithIfExists.scala
index f9da9174f85e6..196a07a7f9904 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveNoopDropTable.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCommandsWithIfExists.scala
@@ -17,17 +17,19 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import org.apache.spark.sql.catalyst.plans.logical.{DropTable, LogicalPlan, NoopDropTable}
+import org.apache.spark.sql.catalyst.plans.logical.{DropTable, DropView, LogicalPlan, NoopCommand}
 import org.apache.spark.sql.catalyst.rules.Rule
 
 /**
- * A rule for handling [[DropTable]] logical plan when the table or temp view is not resolved.
- * If "ifExists" flag is set to true, the plan is resolved to [[NoopDropTable]],
- * which is a no-op command.
+ * A rule for handling commands when the table or temp view is not resolved.
+ * These commands support a flag, "ifExists", so that they do not fail when a relation is not
+ * resolved. If the "ifExists" flag is set to true. the plan is resolved to [[NoopCommand]],
  */
-object ResolveNoopDropTable extends Rule[LogicalPlan] {
+object ResolveCommandsWithIfExists extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
     case DropTable(u: UnresolvedTableOrView, ifExists, _) if ifExists =>
-      NoopDropTable(u.multipartIdentifier)
+      NoopCommand("DROP TABLE", u.multipartIdentifier)
+    case DropView(u: UnresolvedView, ifExists) if ifExists =>
+      NoopCommand("DROP VIEW", u.multipartIdentifier)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
index 1518f064d78db..2737b5d58bf42 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
@@ -45,6 +45,19 @@ case class UnresolvedTable(
   override def output: Seq[Attribute] = Nil
 }
 
+/**
+ * Holds the name of a view that has yet to be looked up in a catalog. It will be resolved to
+ * [[ResolvedView]] during analysis.
+ */
+case class UnresolvedView(
+    multipartIdentifier: Seq[String],
+    commandName: String,
+    relationTypeMismatchHint: Option[String] = None) extends LeafNode {
+  override lazy val resolved: Boolean = false
+
+  override def output: Seq[Attribute] = Nil
+}
+
 /**
  * Holds the name of a table or view that has yet to be looked up in a catalog. It will
  * be resolved to [[ResolvedTable]] or [[ResolvedView]] during analysis.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index b6bd3b77fc874..89b81ec1d83aa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3155,11 +3155,14 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
   }
 
   /**
-   * Create a [[DropViewStatement]] command.
+   * Create a [[DropView]] command.
    */
   override def visitDropView(ctx: DropViewContext): AnyRef = withOrigin(ctx) {
-    DropViewStatement(
-      visitMultipartIdentifier(ctx.multipartIdentifier()),
+    DropView(
+      UnresolvedView(
+        visitMultipartIdentifier(ctx.multipartIdentifier()),
+        "DROP VIEW",
+        Some("Please use DROP TABLE instead.")),
       ctx.EXISTS != null)
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
index 402ae657d1709..c4ac8ea8f2e69 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
@@ -338,13 +338,6 @@ case class AlterViewAsStatement(
     originalText: String,
     query: LogicalPlan) extends ParsedStatement
 
-/**
- * A DROP VIEW statement, as parsed from SQL.
- */
-case class DropViewStatement(
-    viewName: Seq[String],
-    ifExists: Boolean) extends ParsedStatement
-
 /**
  * An INSERT INTO statement, as parsed from SQL.
  *
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index 72ba9cf6db0e2..1e17c51137a55 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -419,9 +419,11 @@ case class DropTable(
 }
 
 /**
- * The logical plan for handling non-existing table for DROP TABLE command.
+ * The logical plan for no-op command handling non-existing table.
  */
-case class NoopDropTable(multipartIdentifier: Seq[String]) extends Command
+case class NoopCommand(
+    commandName: String,
+    multipartIdentifier: Seq[String]) extends Command
 
 /**
  * The logical plan of the ALTER TABLE command.
@@ -724,3 +726,12 @@ case class ShowPartitions(
   override val output: Seq[Attribute] = Seq(
     AttributeReference("partition", StringType, nullable = false)())
 }
+
+/**
+ * The logical plan of the DROP VIEW command.
+ */
+case class DropView(
+    child: LogicalPlan,
+    ifExists: Boolean) extends Command {
+  override def children: Seq[LogicalPlan] = child :: Nil
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index f925be8617b47..d5b27d9ad25cf 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.parser
 import java.util.Locale
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedNamespace, UnresolvedPartitionSpec, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView}
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedNamespace, UnresolvedPartitionSpec, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView, UnresolvedView}
 import org.apache.spark.sql.catalyst.catalog.{ArchiveResource, BucketSpec, FileResource, FunctionResource, JarResource}
 import org.apache.spark.sql.catalyst.expressions.{EqualTo, Literal}
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -721,13 +721,18 @@ class DDLParserSuite extends AnalysisTest {
   }
 
   test("drop view") {
+    val cmd = "DROP VIEW"
+    val hint = Some("Please use DROP TABLE instead.")
     parseCompare(s"DROP VIEW testcat.db.view",
-      DropViewStatement(Seq("testcat", "db", "view"), ifExists = false))
-    parseCompare(s"DROP VIEW db.view", DropViewStatement(Seq("db", "view"), ifExists = false))
+      DropView(UnresolvedView(Seq("testcat", "db", "view"), cmd, hint), ifExists = false))
+    parseCompare(s"DROP VIEW db.view",
+      DropView(UnresolvedView(Seq("db", "view"), cmd, hint), ifExists = false))
     parseCompare(s"DROP VIEW IF EXISTS db.view",
-      DropViewStatement(Seq("db", "view"), ifExists = true))
-    parseCompare(s"DROP VIEW view", DropViewStatement(Seq("view"), ifExists = false))
-    parseCompare(s"DROP VIEW IF EXISTS view", DropViewStatement(Seq("view"), ifExists = true))
+      DropView(UnresolvedView(Seq("db", "view"), cmd, hint), ifExists = true))
+    parseCompare(s"DROP VIEW view",
+      DropView(UnresolvedView(Seq("view"), cmd, hint), ifExists = false))
+    parseCompare(s"DROP VIEW IF EXISTS view",
+      DropView(UnresolvedView(Seq("view"), cmd, hint), ifExists = true))
   }
 
   private def testCreateOrReplaceDdl(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 4c7e6fefd9759..657764832a931 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -352,9 +352,8 @@ class ResolveSessionCatalog(
       }
       DropTableCommand(r.identifier.asTableIdentifier, ifExists, isView = false, purge = purge)
 
-    // v1 DROP TABLE supports temp view.
-    case DropViewStatement(TempViewOrV1Table(name), ifExists) =>
-      DropTableCommand(name.asTableIdentifier, ifExists, isView = true, purge = false)
+    case DropView(r: ResolvedView, ifExists) =>
+      DropTableCommand(r.identifier.asTableIdentifier, ifExists, isView = true, purge = false)
 
     case c @ CreateNamespaceStatement(CatalogAndNamespace(catalog, ns), _, _)
         if isSessionCatalog(catalog) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 5f67b39b95c35..7e2a485dcb4cc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -251,7 +251,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
     case DropTable(r: ResolvedTable, ifExists, purge) =>
       DropTableExec(r.catalog, r.identifier, ifExists, purge, invalidateCache(r)) :: Nil
 
-    case _: NoopDropTable =>
+    case _: NoopCommand =>
       LocalTableScanExec(Nil, Nil) :: Nil
 
     case AlterTable(catalog, ident, _, changes) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 2673577aecf36..9a8c3e3cf1a11 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -2594,6 +2594,13 @@ class DataSourceV2SQLSuite
     }
   }
 
+  test("DROP VIEW is not supported for v2 catalogs") {
+    assertAnalysisError(
+      "DROP VIEW testcat.v",
+      "Cannot specify catalog `testcat` for view v because view support in v2 catalog " +
+        "has not been implemented yet. DROP VIEW expects a view.")
+  }
+
   private def testNotSupportedV2Command(
       sqlCommand: String,
       sqlParams: String,
@@ -2612,13 +2619,6 @@ class DataSourceV2SQLSuite
     assert(e.message.contains(s"$sqlCommand is only supported with v1 tables"))
   }
 
-  private def testV1CommandSupportingTempView(sqlCommand: String, sqlParams: String): Unit = {
-    val e = intercept[AnalysisException] {
-      sql(s"$sqlCommand $sqlParams")
-    }
-    assert(e.message.contains(s"$sqlCommand is only supported with temp views or v1 tables"))
-  }
-
   private def assertAnalysisError(sqlStatement: String, expectedError: String): Unit = {
     val errMsg = intercept[AnalysisException] {
       sql(sqlStatement)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 4f79e71419a10..b3cd9f1057a70 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -1363,12 +1363,11 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     createDatabase(catalog, "dbx")
     createTable(catalog, tableIdent)
     assert(catalog.listTables("dbx") == Seq(tableIdent))
-
     val e = intercept[AnalysisException] {
       sql("DROP VIEW dbx.tab1")
     }
-    assert(
-      e.getMessage.contains("Cannot drop a table with DROP VIEW. Please use DROP TABLE instead"))
+    assert(e.getMessage.contains(
+      "dbx.tab1 is a table. 'DROP VIEW' expects a view. Please use DROP TABLE instead."))
   }
 
   protected def testSetProperties(isDatasourceTable: Boolean): Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index 38719311f1aef..5147a8485ea25 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -78,6 +78,14 @@ class PlanResolutionSuite extends AnalysisTest {
     V1Table(t)
   }
 
+  private val view: V1Table = {
+    val t = mock(classOf[CatalogTable])
+    when(t.schema).thenReturn(new StructType().add("i", "int").add("s", "string"))
+    when(t.tableType).thenReturn(CatalogTableType.VIEW)
+    when(t.provider).thenReturn(Some(v1Format))
+    V1Table(t)
+  }
+
   private val testCat: TableCatalog = {
     val newCatalog = mock(classOf[TableCatalog])
     when(newCatalog.loadTable(any())).thenAnswer((invocation: InvocationOnMock) => {
@@ -101,6 +109,7 @@ class PlanResolutionSuite extends AnalysisTest {
         case "v2Table" => table
         case "v2Table1" => table
         case "v2TableWithAcceptAnySchemaCapability" => tableWithAcceptAnySchemaCapability
+        case "view" => view
         case name => throw new NoSuchTableException(name)
       }
     })
@@ -148,7 +157,10 @@ class PlanResolutionSuite extends AnalysisTest {
     manager
   }
 
-  def parseAndResolve(query: String, withDefault: Boolean = false): LogicalPlan = {
+  def parseAndResolve(
+      query: String,
+      withDefault: Boolean = false,
+      checkAnalysis: Boolean = false): LogicalPlan = {
     val catalogManager = if (withDefault) {
       catalogManagerWithDefault
     } else {
@@ -158,8 +170,13 @@ class PlanResolutionSuite extends AnalysisTest {
       override val extendedResolutionRules: Seq[Rule[LogicalPlan]] = Seq(
         new ResolveSessionCatalog(catalogManager, _ == Seq("v"), _ => false))
     }
-    // We don't check analysis here, as we expect the plan to be unresolved such as `CreateTable`.
-    analyzer.execute(CatalystSqlParser.parsePlan(query))
+    // We don't check analysis here by default, as we expect the plan to be unresolved
+    // such as `CreateTable`.
+    val analyzed = analyzer.execute(CatalystSqlParser.parsePlan(query))
+    if (checkAnalysis) {
+      analyzer.checkAnalysis(analyzed)
+    }
+    analyzed
   }
 
   private def parseResolveCompare(query: String, expected: LogicalPlan): Unit =
@@ -677,6 +694,8 @@ class PlanResolutionSuite extends AnalysisTest {
     val viewIdent1 = TableIdentifier("view", Option("db"))
     val viewName2 = "view"
     val viewIdent2 = TableIdentifier("view", Option("default"))
+    val tempViewName = "v"
+    val tempViewIdent = TableIdentifier("v")
 
     parseResolveCompare(s"DROP VIEW $viewName1",
       DropTableCommand(viewIdent1, ifExists = false, isView = true, purge = false))
@@ -686,11 +705,15 @@ class PlanResolutionSuite extends AnalysisTest {
       DropTableCommand(viewIdent2, ifExists = false, isView = true, purge = false))
     parseResolveCompare(s"DROP VIEW IF EXISTS $viewName2",
       DropTableCommand(viewIdent2, ifExists = true, isView = true, purge = false))
+    parseResolveCompare(s"DROP VIEW $tempViewName",
+      DropTableCommand(tempViewIdent, ifExists = false, isView = true, purge = false))
+    parseResolveCompare(s"DROP VIEW IF EXISTS $tempViewName",
+      DropTableCommand(tempViewIdent, ifExists = true, isView = true, purge = false))
   }
 
   test("drop view in v2 catalog") {
     intercept[AnalysisException] {
-      parseAndResolve("DROP VIEW testcat.db.view")
+      parseAndResolve("DROP VIEW testcat.db.view", checkAnalysis = true)
     }.getMessage.toLowerCase(Locale.ROOT).contains(
       "view support in catalog has not been implemented")
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index ce31e39985971..d6a4d76386889 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -1048,7 +1048,8 @@ class HiveDDLSuite
       val message = intercept[AnalysisException] {
         sql("DROP VIEW tab1")
       }.getMessage
-      assert(message.contains("Cannot drop a table with DROP VIEW. Please use DROP TABLE instead"))
+      assert(message.contains(
+        "tab1 is a table. 'DROP VIEW' expects a view. Please use DROP TABLE instead."))
     }
   }
 

From a093d6feefb0e086d19c86ae53bf92df12ccf2fa Mon Sep 17 00:00:00 2001
From: Josh Soref <jsoref@users.noreply.github.com>
Date: Tue, 8 Dec 2020 08:57:13 -0600
Subject: [PATCH 0705/1009] [MINOR] Spelling sql/core

### What changes were proposed in this pull request?

This PR intends to fix typos in the sub-modules:
* `sql/core`

Split per srowen https://github.com/apache/spark/pull/30323#issuecomment-728981618

NOTE: The misspellings have been reported at https://github.com/jsoref/spark/commit/706a726f87a0bbf5e31467fae9015218773db85b#commitcomment-44064356

### Why are the changes needed?

Misspelled words make it harder to read / understand content.

### Does this PR introduce _any_ user-facing change?

There are various fixes to documentation, etc...

### How was this patch tested?

No testing was performed

Closes #30531 from jsoref/spelling-sql-core.

Authored-by: Josh Soref <jsoref@users.noreply.github.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../sql/execution/ui/static/spark-sql-viz.js  |  8 ++---
 .../scala/org/apache/spark/sql/Dataset.scala  | 10 +++---
 .../sql/execution/DataSourceScanExec.scala    |  6 ++--
 .../spark/sql/execution/ExplainUtils.scala    |  8 ++---
 .../ExternalAppendOnlyUnsafeRowArray.scala    |  2 +-
 .../spark/sql/execution/SparkSqlParser.scala  | 14 ++++----
 .../sql/execution/WholeStageCodegenExec.scala |  2 +-
 .../adaptive/AdaptiveSparkPlanHelper.scala    |  2 +-
 .../InsertIntoDataSourceDirCommand.scala      |  2 +-
 .../spark/sql/execution/command/ddl.scala     |  4 +--
 .../spark/sql/execution/command/tables.scala  |  2 +-
 .../execution/datasources/DataSource.scala    |  2 +-
 .../datasources/FileFormatDataWriter.scala    | 14 ++++----
 .../datasources/FileFormatWriter.scala        |  2 +-
 .../datasources/PartitioningUtils.scala       |  2 +-
 .../v2/WriteToDataSourceV2Exec.scala          |  2 +-
 .../sql/execution/joins/HashedRelation.scala  |  4 +--
 .../execution/python/ExtractPythonUDFs.scala  |  6 ++--
 .../streaming/CompactibleFileStreamLog.scala  |  2 +-
 .../execution/streaming/StreamExecution.scala |  2 +-
 .../FlatMapGroupsWithStateExecHelper.scala    |  2 +-
 .../apache/spark/sql/internal/HiveSerDe.scala |  2 +-
 .../sql/streaming/DataStreamWriter.scala      |  4 +--
 .../sql/Java8DatasetAggregatorSuite.java      | 16 +++++-----
 .../spark/sql/JavaDatasetAggregatorSuite.java | 24 +++++++-------
 .../ansi/decimalArithmeticOperations.sql      |  2 +-
 .../inputs/postgreSQL/create_view.sql         |  2 +-
 .../apache/spark/sql/CachedTableSuite.scala   |  8 ++---
 .../org/apache/spark/sql/DataFrameSuite.scala |  2 +-
 .../apache/spark/sql/DatasetCacheSuite.scala  | 13 ++++----
 .../spark/sql/DatasetPrimitiveSuite.scala     |  8 ++---
 .../org/apache/spark/sql/DatasetSuite.scala   | 32 +++++++++----------
 .../apache/spark/sql/DateFunctionsSuite.scala |  6 ++--
 .../org/apache/spark/sql/SQLQuerySuite.scala  |  6 ++--
 .../apache/spark/sql/SQLQueryTestSuite.scala  | 10 +++---
 .../sql/SparkSessionExtensionSuite.scala      | 18 +++++------
 .../apache/spark/sql/TPCDSTableStats.scala    |  2 +-
 .../sql/connector/DataSourceV2SQLSuite.scala  | 12 +++----
 .../execution/SQLWindowFunctionSuite.scala    |  2 +-
 .../sql/execution/SparkSqlParserSuite.scala   |  2 +-
 .../execution/WholeStageCodegenSuite.scala    |  4 +--
 .../adaptive/AdaptiveQueryExecSuite.scala     |  8 ++---
 .../arrow/ArrowConvertersSuite.scala          |  2 +-
 .../sql/execution/command/DDLSuite.scala      | 12 +++----
 .../command/PlanResolutionSuite.scala         | 16 +++++-----
 .../datasources/DataSourceSuite.scala         |  4 +--
 .../datasources/SchemaPruningSuite.scala      |  8 ++---
 .../ParquetInteroperabilitySuite.scala        |  2 +-
 .../ParquetPartitionDiscoverySuite.scala      |  4 +--
 .../parquet/ParquetQuerySuite.scala           |  4 +--
 .../exchange/EnsureRequirementsSuite.scala    |  2 +-
 .../execution/metric/SQLMetricsSuite.scala    |  2 +-
 .../streaming/HDFSMetadataLogSuite.scala      |  2 +-
 .../sql/execution/ui/SparkPlanInfoSuite.scala |  6 ++--
 .../internal/ExecutorSideSQLConfSuite.scala   |  4 +--
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 12 +++----
 .../spark/sql/sources/BucketedReadSuite.scala | 18 ++++++-----
 .../sources/CreateTableAsSelectSuite.scala    |  2 +-
 .../spark/sql/sources/TableScanSuite.scala    |  6 ++--
 .../sql/streaming/FileStreamSourceSuite.scala |  4 +--
 .../spark/sql/streaming/StreamSuite.scala     |  8 ++---
 .../test/DataStreamTableAPISuite.scala        |  8 ++---
 .../apache/spark/sql/test/SQLTestData.scala   |  4 +--
 .../spark/sql/test/SharedSparkSession.scala   |  2 +-
 64 files changed, 208 insertions(+), 205 deletions(-)

diff --git a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js
index 301183f749a84..d1def1b0a42ff 100644
--- a/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js
+++ b/sql/core/src/main/resources/org/apache/spark/sql/execution/ui/static/spark-sql-viz.js
@@ -87,14 +87,14 @@ function preprocessGraphLayout(g) {
     var node = g.node(nodes[i]);
     node.padding = "5";
 
-    var firstSearator;
+    var firstSeparator;
     var secondSeparator;
     var splitter;
     if (node.isCluster) {
-      firstSearator = secondSeparator = labelSeparator;
+      firstSeparator = secondSeparator = labelSeparator;
       splitter = "\\n";
     } else {
-      firstSearator = "<span class='stageId-and-taskId-metrics'>";
+      firstSeparator = "<span class='stageId-and-taskId-metrics'>";
       secondSeparator = "</span>";
       splitter = "<br>";
     }
@@ -104,7 +104,7 @@ function preprocessGraphLayout(g) {
       if (newTexts) {
         node.label = node.label.replace(
             newTexts[0],
-            newTexts[1] + firstSearator + newTexts[2] + secondSeparator + newTexts[3]);
+            newTexts[1] + firstSeparator + newTexts[2] + secondSeparator + newTexts[3]);
       }
     });
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 05d6647afd958..6afbbce3ff8d4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -1363,7 +1363,7 @@ class Dataset[T] private[sql](
   // Attach the dataset id and column position to the column reference, so that we can detect
   // ambiguous self-join correctly. See the rule `DetectAmbiguousSelfJoin`.
   // This must be called before we return a `Column` that contains `AttributeReference`.
-  // Note that, the metadata added here are only avaiable in the analyzer, as the analyzer rule
+  // Note that, the metadata added here are only available in the analyzer, as the analyzer rule
   // `DetectAmbiguousSelfJoin` will remove it.
   private def addDataFrameIdToCol(expr: NamedExpression): NamedExpression = {
     val newExpr = expr transform {
@@ -1665,10 +1665,10 @@ class Dataset[T] private[sql](
    * See [[RelationalGroupedDataset]] for all the available aggregate functions.
    *
    * {{{
-   *   // Compute the average for all numeric columns rolluped by department and group.
+   *   // Compute the average for all numeric columns rolled up by department and group.
    *   ds.rollup($"department", $"group").avg()
    *
-   *   // Compute the max age and average salary, rolluped by department and gender.
+   *   // Compute the max age and average salary, rolled up by department and gender.
    *   ds.rollup($"department", $"gender").agg(Map(
    *     "salary" -> "avg",
    *     "age" -> "max"
@@ -1794,10 +1794,10 @@ class Dataset[T] private[sql](
    * (i.e. cannot construct expressions).
    *
    * {{{
-   *   // Compute the average for all numeric columns rolluped by department and group.
+   *   // Compute the average for all numeric columns rolled up by department and group.
    *   ds.rollup("department", "group").avg()
    *
-   *   // Compute the max age and average salary, rolluped by department and gender.
+   *   // Compute the max age and average salary, rolled up by department and gender.
    *   ds.rollup($"department", $"gender").agg(Map(
    *     "salary" -> "avg",
    *     "age" -> "max"
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index 44636beeec7fc..df3b9f2a4e9cb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -284,7 +284,7 @@ case class FileSourceScanExec(
       //
       // Sort ordering would be over the prefix subset of `sort columns` being read
       // from the table.
-      // eg.
+      // e.g.
       // Assume (col0, col2, col3) are the columns read from the table
       // If sort columns are (col0, col1), then sort ordering would be considered as (col0)
       // If sort columns are (col1, col0), then sort ordering would be empty as per rule #2
@@ -379,12 +379,12 @@ case class FileSourceScanExec(
       case (key, _) if (key.equals("Location")) =>
         val location = relation.location
         val numPaths = location.rootPaths.length
-        val abbreviatedLoaction = if (numPaths <= 1) {
+        val abbreviatedLocation = if (numPaths <= 1) {
           location.rootPaths.mkString("[", ", ", "]")
         } else {
           "[" + location.rootPaths.head + s", ... ${numPaths - 1} entries]"
         }
-        s"$key: ${location.getClass.getSimpleName} ${redact(abbreviatedLoaction)}"
+        s"$key: ${location.getClass.getSimpleName} ${redact(abbreviatedLocation)}"
       case (key, value) => s"$key: ${redact(value)}"
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala
index b54bd6a579b66..20e6fb6f96eaa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala
@@ -28,14 +28,14 @@ import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, AdaptiveS
 object ExplainUtils extends AdaptiveSparkPlanHelper {
   /**
    * Given a input physical plan, performs the following tasks.
-   *   1. Computes the operator id for current operator and records it in the operaror
+   *   1. Computes the operator id for current operator and records it in the operator
    *      by setting a tag.
    *   2. Computes the whole stage codegen id for current operator and records it in the
    *      operator by setting a tag.
    *   3. Generate the two part explain output for this plan.
    *      1. First part explains the operator tree with each operator tagged with an unique
    *         identifier.
-   *      2. Second part explans each operator in a verbose manner.
+   *      2. Second part explains each operator in a verbose manner.
    *
    * Note : This function skips over subqueries. They are handled by its caller.
    *
@@ -117,7 +117,7 @@ object ExplainUtils extends AdaptiveSparkPlanHelper {
   }
 
   /**
-   * Traverses the supplied input plan in a bottem-up fashion does the following :
+   * Traverses the supplied input plan in a bottom-up fashion does the following :
    *    1. produces a map : operator identifier -> operator
    *    2. Records the operator id via setting a tag in the operator.
    * Note :
@@ -210,7 +210,7 @@ object ExplainUtils extends AdaptiveSparkPlanHelper {
 
   /**
    * Given a input plan, returns an array of tuples comprising of :
-   *  1. Hosting opeator id.
+   *  1. Hosting operator id.
    *  2. Hosting expression
    *  3. Subquery plan
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala
index 993627847c08c..c5e5de588ba9d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala
@@ -87,7 +87,7 @@ private[sql] class ExternalAppendOnlyUnsafeRowArray(
   def isEmpty: Boolean = numRows == 0
 
   /**
-   * Clears up resources (eg. memory) held by the backing storage
+   * Clears up resources (e.g. memory) held by the backing storage
    */
   def clear(): Unit = {
     if (spillableArray != null) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index c82e3818b48cc..7a31b0dcdd43d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -386,25 +386,25 @@ class SparkSqlAstBuilder extends AstBuilder {
    *  - '/path/to/fileOrJar'
    */
   override def visitManageResource(ctx: ManageResourceContext): LogicalPlan = withOrigin(ctx) {
-    val mayebePaths = if (ctx.STRING != null) string(ctx.STRING) else remainder(ctx.identifier).trim
+    val maybePaths = if (ctx.STRING != null) string(ctx.STRING) else remainder(ctx.identifier).trim
     ctx.op.getType match {
       case SqlBaseParser.ADD =>
         ctx.identifier.getText.toLowerCase(Locale.ROOT) match {
-          case "file" => AddFileCommand(mayebePaths)
-          case "jar" => AddJarCommand(mayebePaths)
+          case "file" => AddFileCommand(maybePaths)
+          case "jar" => AddJarCommand(maybePaths)
           case other => operationNotAllowed(s"ADD with resource type '$other'", ctx)
         }
       case SqlBaseParser.LIST =>
         ctx.identifier.getText.toLowerCase(Locale.ROOT) match {
           case "files" | "file" =>
-            if (mayebePaths.length > 0) {
-              ListFilesCommand(mayebePaths.split("\\s+"))
+            if (maybePaths.length > 0) {
+              ListFilesCommand(maybePaths.split("\\s+"))
             } else {
               ListFilesCommand()
             }
           case "jars" | "jar" =>
-            if (mayebePaths.length > 0) {
-              ListJarsCommand(mayebePaths.split("\\s+"))
+            if (maybePaths.length > 0) {
+              ListJarsCommand(maybePaths.split("\\s+"))
             } else {
               ListJarsCommand()
             }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index b2963457e22db..c6ea99cfdad7b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -670,7 +670,7 @@ case class WholeStageCodegenExec(child: SparkPlan)(val codegenStageId: Int)
       }
 
       ${ctx.registerComment(
-        s"""Codegend pipeline for stage (id=$codegenStageId)
+        s"""Codegened pipeline for stage (id=$codegenStageId)
            |${this.treeString.trim}""".stripMargin,
          "wsc_codegenPipeline")}
       ${ctx.registerComment(s"codegenStageId=$codegenStageId", "wsc_codegenStageId", true)}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanHelper.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanHelper.scala
index 6ba375910a4eb..eecfa40e8d0bd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanHelper.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanHelper.scala
@@ -115,7 +115,7 @@ trait AdaptiveSparkPlanHelper {
 
   /**
    * Returns a sequence containing the subqueries in this plan, also including the (nested)
-   * subquries in its children
+   * subqueries in its children
    */
   def subqueriesAll(p: SparkPlan): Seq[SparkPlan] = {
     val subqueries = flatMap(p)(_.subqueries)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/InsertIntoDataSourceDirCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/InsertIntoDataSourceDirCommand.scala
index 08d31fdda2dc8..d065bc0dab4cd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/InsertIntoDataSourceDirCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/InsertIntoDataSourceDirCommand.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.execution.datasources._
  * @param storage storage format used to describe how the query result is stored.
  * @param provider the data source type to be used
  * @param query the logical plan representing data to write to
- * @param overwrite whthere overwrites existing directory
+ * @param overwrite whether overwrites existing directory
  */
 case class InsertIntoDataSourceDirCommand(
     storage: CatalogStorageFormat,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 69425cfed285f..6d631e044e917 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -89,8 +89,8 @@ case class CreateDatabaseCommand(
  * A command for users to remove a database from the system.
  *
  * 'ifExists':
- * - true, if database_name does't exist, no action
- * - false (default), if database_name does't exist, a warning message will be issued
+ * - true, if database_name doesn't exist, no action
+ * - false (default), if database_name doesn't exist, a warning message will be issued
  * 'cascade':
  * - true, the dependent objects are automatically dropped before dropping database.
  * - false (default), it is in the Restrict mode. The database cannot be dropped if
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 640051384e94c..431a103063c68 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -352,7 +352,7 @@ case class LoadDataCommand(
         // entire  string will be considered while making a Path instance,this is mainly done
         // by considering the wild card scenario in mind.as per old logic query param  is
         // been considered while creating URI instance and if path contains wild card char '?'
-        // the remaining charecters after '?' will be removed while forming URI instance
+        // the remaining characters after '?' will be removed while forming URI instance
         LoadDataCommand.makeQualified(defaultFS, uriPath, loadPath)
       }
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 34ded5d456d09..4783789b91f3e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -211,7 +211,7 @@ case class DataSource(
         s"Unable to infer schema for $format. It must be specified manually.")
     }
 
-    // We just print a waring message if the data schema and partition schema have the duplicate
+    // We just print a warning message if the data schema and partition schema have the duplicate
     // columns. This is because we allow users to do so in the previous Spark releases and
     // we have the existing tests for the cases (e.g., `ParquetHadoopFsRelationSuite`).
     // See SPARK-18108 and SPARK-21144 for related discussions.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala
index edb49d3f90ca3..6de9b1d7cea4b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala
@@ -167,7 +167,7 @@ class DynamicPartitionDataWriter(
 
   private var fileCounter: Int = _
   private var recordsInFile: Long = _
-  private var currentPartionValues: Option[UnsafeRow] = None
+  private var currentPartitionValues: Option[UnsafeRow] = None
   private var currentBucketId: Option[Int] = None
 
   /** Extracts the partition values out of an input row. */
@@ -247,11 +247,11 @@ class DynamicPartitionDataWriter(
     val nextPartitionValues = if (isPartitioned) Some(getPartitionValues(record)) else None
     val nextBucketId = if (isBucketed) Some(getBucketId(record)) else None
 
-    if (currentPartionValues != nextPartitionValues || currentBucketId != nextBucketId) {
+    if (currentPartitionValues != nextPartitionValues || currentBucketId != nextBucketId) {
       // See a new partition or bucket - write to a new partition dir (or a new bucket file).
-      if (isPartitioned && currentPartionValues != nextPartitionValues) {
-        currentPartionValues = Some(nextPartitionValues.get.copy())
-        statsTrackers.foreach(_.newPartition(currentPartionValues.get))
+      if (isPartitioned && currentPartitionValues != nextPartitionValues) {
+        currentPartitionValues = Some(nextPartitionValues.get.copy())
+        statsTrackers.foreach(_.newPartition(currentPartitionValues.get))
       }
       if (isBucketed) {
         currentBucketId = nextBucketId
@@ -259,7 +259,7 @@ class DynamicPartitionDataWriter(
       }
 
       fileCounter = 0
-      newOutputWriter(currentPartionValues, currentBucketId)
+      newOutputWriter(currentPartitionValues, currentBucketId)
     } else if (description.maxRecordsPerFile > 0 &&
       recordsInFile >= description.maxRecordsPerFile) {
       // Exceeded the threshold in terms of the number of records per file.
@@ -268,7 +268,7 @@ class DynamicPartitionDataWriter(
       assert(fileCounter < MAX_FILE_COUNTER,
         s"File counter $fileCounter is beyond max value $MAX_FILE_COUNTER")
 
-      newOutputWriter(currentPartionValues, currentBucketId)
+      newOutputWriter(currentPartitionValues, currentBucketId)
     }
     val outputRow = getOutputRow(record)
     currentWriter.write(outputRow)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
index a71aeb47872ce..48ebd6f0c610f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
@@ -164,7 +164,7 @@ object FileFormatWriter extends Logging {
 
     SQLExecution.checkSQLExecutionId(sparkSession)
 
-    // propagate the decription UUID into the jobs, so that committers
+    // propagate the description UUID into the jobs, so that committers
     // get an ID guaranteed to be unique.
     job.getConfiguration.set("spark.sql.sources.writeJobUUID", description.uuid)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index ea437d200eaab..69123ee7af5b9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -453,7 +453,7 @@ object PartitioningUtils {
     val decimalTry = Try {
       // `BigDecimal` conversion can fail when the `field` is not a form of number.
       val bigDecimal = new JBigDecimal(raw)
-      // It reduces the cases for decimals by disallowing values having scale (eg. `1.1`).
+      // It reduces the cases for decimals by disallowing values having scale (e.g. `1.1`).
       require(bigDecimal.scale <= 0)
       // `DecimalType` conversion can fail when
       //   1. The precision is bigger than 38.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
index 47aad2bcb2c56..f5f77d38b8716 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
@@ -168,7 +168,7 @@ case class ReplaceTableAsSelectExec(
  * A new table will be created using the schema of the query, and rows from the query are appended.
  * If the table exists, its contents and schema should be replaced with the schema and the contents
  * of the query. This implementation is atomic. The table replacement is staged, and the commit
- * operation at the end should perform tne replacement of the table's metadata and contents. If the
+ * operation at the end should perform the replacement of the table's metadata and contents. If the
  * write fails, the table is instructed to roll back staged changes and any previously written table
  * is left untouched.
  */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
index 3c5ed40551206..a91cc0782e1f8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
@@ -426,9 +426,9 @@ private[joins] class UnsafeHashedRelation(
       readBuffer(valuesBuffer, 0, valuesSize)
 
       val loc = binaryMap.lookup(keyBuffer, Platform.BYTE_ARRAY_OFFSET, keySize)
-      val putSuceeded = loc.append(keyBuffer, Platform.BYTE_ARRAY_OFFSET, keySize,
+      val putSucceeded = loc.append(keyBuffer, Platform.BYTE_ARRAY_OFFSET, keySize,
         valuesBuffer, Platform.BYTE_ARRAY_OFFSET, valuesSize)
-      if (!putSuceeded) {
+      if (!putSucceeded) {
         binaryMap.free()
         throw new IOException("Could not allocate memory to grow BytesToBytesMap")
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala
index dab2723d25726..b79bcd176b7b7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala
@@ -102,7 +102,7 @@ object ExtractGroupingPythonUDFFromAggregate extends Rule[LogicalPlan] {
           case p: PythonUDF =>
             // This is just a sanity check, the rule PullOutNondeterministic should
             // already pull out those nondeterministic expressions.
-            assert(p.udfDeterministic, "Non-determinstic PythonUDFs should not appear " +
+            assert(p.udfDeterministic, "Non-deterministic PythonUDFs should not appear " +
               "in grouping expression")
             val canonicalized = p.canonicalized.asInstanceOf[PythonUDF]
             if (attributeMap.contains(canonicalized)) {
@@ -174,7 +174,7 @@ object ExtractPythonUDFs extends Rule[LogicalPlan] with PredicateHelper {
   }
 
   private def collectEvaluableUDFsFromExpressions(expressions: Seq[Expression]): Seq[PythonUDF] = {
-    // If fisrt UDF is SQL_SCALAR_PANDAS_ITER_UDF, then only return this UDF,
+    // If first UDF is SQL_SCALAR_PANDAS_ITER_UDF, then only return this UDF,
     // otherwise check if subsequent UDFs are of the same type as the first UDF. (since we can only
     // extract UDFs of the same eval type)
 
@@ -268,7 +268,7 @@ object ExtractPythonUDFs extends Rule[LogicalPlan] with PredicateHelper {
             case PythonEvalType.SQL_SCALAR_PANDAS_UDF | PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF =>
               ArrowEvalPython(validUdfs, resultAttrs, child, evalType)
             case _ =>
-              throw new AnalysisException("Unexcepted UDF evalType")
+              throw new AnalysisException("Unexpected UDF evalType")
           }
 
           attributeMap ++= validUdfs.map(canonicalizeDeterministic).zip(resultAttrs)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
index 3c76306f20cd7..835c7c4d5261f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
@@ -288,7 +288,7 @@ abstract class CompactibleFileStreamLog[T <: AnyRef : ClassTag](
 
   /**
    * Delete expired log entries that proceed the currentBatchId and retain
-   * sufficient minimum number of batches (given by minBatchsToRetain). This
+   * sufficient minimum number of batches (given by minBatchesToRetain). This
    * equates to retaining the earliest compaction log that proceeds
    * batch id position currentBatchId + 1 - minBatchesToRetain. All log entries
    * prior to the earliest compaction log proceeding that position will be removed.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index d6be33c76e937..6b0d33b819a20 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -685,6 +685,6 @@ object StreamExecution {
 
 /**
  * A special thread to run the stream query. Some codes require to run in the QueryExecutionThread
- * and will use `classOf[QueryxecutionThread]` to check.
+ * and will use `classOf[QueryExecutionThread]` to check.
  */
 abstract class QueryExecutionThread(name: String) extends UninterruptibleThread(name)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelper.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelper.scala
index 0a16a3819b778..cc785ee4247c4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelper.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelper.scala
@@ -77,7 +77,7 @@ object FlatMapGroupsWithStateExecHelper {
   // =========================== Private implementations of StateManager ===========================
   // ===============================================================================================
 
-  /** Commmon methods for StateManager implementations */
+  /** Common methods for StateManager implementations */
   private abstract class StateManagerImplBase(shouldStoreTimestamp: Boolean)
     extends StateManager {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala
index 64b7e7fe7923a..cfcfeabbf1f6e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/HiveSerDe.scala
@@ -65,7 +65,7 @@ object HiveSerDe {
         outputFormat = Option("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat"),
         serde = Option("org.apache.hadoop.hive.serde2.avro.AvroSerDe")))
 
-  // `HiveSerDe` in `serdeMap` should be dintinct.
+  // `HiveSerDe` in `serdeMap` should be distinct.
   val serdeInverseMap: Map[HiveSerDe, String] = serdeMap.flatMap {
     case ("sequencefile", _) => None
     case ("rcfile", _) => None
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index 01e626e5436a4..9e8dff37bcfd2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -387,8 +387,8 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
       }
       val sink = new MemorySink()
       val resultDf = Dataset.ofRows(df.sparkSession, new MemoryPlan(sink, df.schema.toAttributes))
-      val recoverFromChkpoint = outputMode == OutputMode.Complete()
-      val query = startQuery(sink, extraOptions, recoverFromCheckpoint = recoverFromChkpoint)
+      val recoverFromCheckpoint = outputMode == OutputMode.Complete()
+      val query = startQuery(sink, extraOptions, recoverFromCheckpoint = recoverFromCheckpoint)
       resultDf.createOrReplaceTempView(query.name)
       query
     } else if (source == SOURCE_NAME_FOREACH) {
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/Java8DatasetAggregatorSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/Java8DatasetAggregatorSuite.java
index dd3755d3f904e..de88f80eb53b8 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/Java8DatasetAggregatorSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/Java8DatasetAggregatorSuite.java
@@ -34,43 +34,43 @@ public class Java8DatasetAggregatorSuite extends JavaDatasetAggregatorSuiteBase
   @Test
   public void testTypedAggregationAverage() {
     KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset();
-    Dataset<Tuple2<String, Double>> agged = grouped.agg(
+    Dataset<Tuple2<String, Double>> aggregated = grouped.agg(
       org.apache.spark.sql.expressions.javalang.typed.avg(v -> (double)(v._2() * 2)));
     Assert.assertEquals(
         Arrays.asList(new Tuple2<>("a", 3.0), new Tuple2<>("b", 6.0)),
-        agged.collectAsList());
+        aggregated.collectAsList());
   }
 
   @SuppressWarnings("deprecation")
   @Test
   public void testTypedAggregationCount() {
     KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset();
-    Dataset<Tuple2<String, Long>> agged = grouped.agg(
+    Dataset<Tuple2<String, Long>> aggregated = grouped.agg(
       org.apache.spark.sql.expressions.javalang.typed.count(v -> v));
     Assert.assertEquals(
         Arrays.asList(new Tuple2<>("a", 2L), new Tuple2<>("b", 1L)),
-        agged.collectAsList());
+        aggregated.collectAsList());
   }
 
   @SuppressWarnings("deprecation")
   @Test
   public void testTypedAggregationSumDouble() {
     KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset();
-    Dataset<Tuple2<String, Double>> agged = grouped.agg(
+    Dataset<Tuple2<String, Double>> aggregated = grouped.agg(
       org.apache.spark.sql.expressions.javalang.typed.sum(v -> (double)v._2()));
     Assert.assertEquals(
         Arrays.asList(new Tuple2<>("a", 3.0), new Tuple2<>("b", 3.0)),
-        agged.collectAsList());
+        aggregated.collectAsList());
   }
 
   @SuppressWarnings("deprecation")
   @Test
   public void testTypedAggregationSumLong() {
     KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset();
-    Dataset<Tuple2<String, Long>> agged = grouped.agg(
+    Dataset<Tuple2<String, Long>> aggregated = grouped.agg(
       org.apache.spark.sql.expressions.javalang.typed.sumLong(v -> (long)v._2()));
     Assert.assertEquals(
         Arrays.asList(new Tuple2<>("a", 3L), new Tuple2<>("b", 3L)),
-        agged.collectAsList());
+        aggregated.collectAsList());
   }
 }
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetAggregatorSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetAggregatorSuite.java
index 8a90624f2070b..979b7751fa9a8 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetAggregatorSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetAggregatorSuite.java
@@ -38,18 +38,18 @@ public class JavaDatasetAggregatorSuite extends JavaDatasetAggregatorSuiteBase {
   public void testTypedAggregationAnonClass() {
     KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset();
 
-    Dataset<Tuple2<String, Integer>> agged = grouped.agg(new IntSumOf().toColumn());
+    Dataset<Tuple2<String, Integer>> aggregated = grouped.agg(new IntSumOf().toColumn());
     Assert.assertEquals(
         Arrays.asList(new Tuple2<>("a", 3), new Tuple2<>("b", 3)),
-        agged.collectAsList());
+        aggregated.collectAsList());
 
-    Dataset<Tuple2<String, Integer>> agged2 = grouped.agg(new IntSumOf().toColumn())
+    Dataset<Tuple2<String, Integer>> aggregated2 = grouped.agg(new IntSumOf().toColumn())
       .as(Encoders.tuple(Encoders.STRING(), Encoders.INT()));
     Assert.assertEquals(
       Arrays.asList(
         new Tuple2<>("a", 3),
         new Tuple2<>("b", 3)),
-      agged2.collectAsList());
+      aggregated2.collectAsList());
   }
 
   static class IntSumOf extends Aggregator<Tuple2<String, Integer>, Integer, Integer> {
@@ -88,43 +88,43 @@ public Encoder<Integer> outputEncoder() {
   @Test
   public void testTypedAggregationAverage() {
     KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset();
-    Dataset<Tuple2<String, Double>> agged = grouped.agg(
+    Dataset<Tuple2<String, Double>> aggregated = grouped.agg(
       org.apache.spark.sql.expressions.javalang.typed.avg(value -> value._2() * 2.0));
     Assert.assertEquals(
         Arrays.asList(new Tuple2<>("a", 3.0), new Tuple2<>("b", 6.0)),
-        agged.collectAsList());
+        aggregated.collectAsList());
   }
 
   @SuppressWarnings("deprecation")
   @Test
   public void testTypedAggregationCount() {
     KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset();
-    Dataset<Tuple2<String, Long>> agged = grouped.agg(
+    Dataset<Tuple2<String, Long>> aggregated = grouped.agg(
       org.apache.spark.sql.expressions.javalang.typed.count(value -> value));
     Assert.assertEquals(
         Arrays.asList(new Tuple2<>("a", 2L), new Tuple2<>("b", 1L)),
-        agged.collectAsList());
+        aggregated.collectAsList());
   }
 
   @SuppressWarnings("deprecation")
   @Test
   public void testTypedAggregationSumDouble() {
     KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset();
-    Dataset<Tuple2<String, Double>> agged = grouped.agg(
+    Dataset<Tuple2<String, Double>> aggregated = grouped.agg(
       org.apache.spark.sql.expressions.javalang.typed.sum(value -> (double) value._2()));
     Assert.assertEquals(
         Arrays.asList(new Tuple2<>("a", 3.0), new Tuple2<>("b", 3.0)),
-        agged.collectAsList());
+        aggregated.collectAsList());
   }
 
   @SuppressWarnings("deprecation")
   @Test
   public void testTypedAggregationSumLong() {
     KeyValueGroupedDataset<String, Tuple2<String, Integer>> grouped = generateGroupedDataset();
-    Dataset<Tuple2<String, Long>> agged = grouped.agg(
+    Dataset<Tuple2<String, Long>> aggregated = grouped.agg(
       org.apache.spark.sql.expressions.javalang.typed.sumLong(value -> (long) value._2()));
     Assert.assertEquals(
         Arrays.asList(new Tuple2<>("a", 3L), new Tuple2<>("b", 3L)),
-        agged.collectAsList());
+        aggregated.collectAsList());
   }
 }
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql
index d190f38345d6b..d843847e6a149 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql
@@ -1,6 +1,6 @@
 -- SPARK-23179: SQL ANSI 2011 states that in case of overflow during arithmetic operations,
 -- an exception should be thrown instead of returning NULL.
--- This is what most of the SQL DBs do (eg. SQLServer, DB2).
+-- This is what most of the SQL DBs do (e.g. SQLServer, DB2).
 
 -- tests for decimals handling in operations
 create table decimals_test(id int, a decimal(38,18), b decimal(38,18)) using parquet;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/create_view.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/create_view.sql
index 21ffd85f7d01f..2889941c1fcc1 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/create_view.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/create_view.sql
@@ -636,7 +636,7 @@ DESC TABLE vv6;
 -- Check cases involving dropped/altered columns in a function's rowtype result
 --
 
--- Skip the tests below because Spark does't support PostgreSQL-specific UDFs/transactions
+-- Skip the tests below because Spark doesn't support PostgreSQL-specific UDFs/transactions
 -- create table tt14t (f1 text, f2 text, f3 text, f4 text);
 -- insert into tt14t values('foo', 'bar', 'baz', '42');
 --
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index d0150616cd67e..3765093f83bc2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -835,7 +835,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils
     }
   }
 
-  test("SPARK-19993 nested subquery caching and scalar + predicate subqueris") {
+  test("SPARK-19993 nested subquery caching and scalar + predicate subqueries") {
     withTempView("t1", "t2", "t3", "t4") {
       Seq(1).toDF("c1").createOrReplaceTempView("t1")
       Seq(2).toDF("c1").createOrReplaceTempView("t2")
@@ -886,17 +886,17 @@ class CachedTableSuite extends QueryTest with SQLTestUtils
   }
 
   private def checkIfNoJobTriggered[T](f: => T): T = {
-    var numJobTrigered = 0
+    var numJobTriggered = 0
     val jobListener = new SparkListener {
       override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
-        numJobTrigered += 1
+        numJobTriggered += 1
       }
     }
     sparkContext.addSparkListener(jobListener)
     try {
       val result = f
       sparkContext.listenerBus.waitUntilEmpty()
-      assert(numJobTrigered === 0)
+      assert(numJobTriggered === 0)
       result
     } finally {
       sparkContext.removeSparkListener(jobListener)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index a45bf12e8f841..4fecd625031ba 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -805,7 +805,7 @@ class DataFrameSuite extends QueryTest
     assert(df2.drop("`a.b`").columns.size == 2)
   }
 
-  test("drop(name: String) search and drop all top level columns that matchs the name") {
+  test("drop(name: String) search and drop all top level columns that matches the name") {
     val df1 = Seq((1, 2)).toDF("a", "b")
     val df2 = Seq((3, 4)).toDF("a", "b")
     checkAnswer(df1.crossJoin(df2), Row(1, 2, 3, 4))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala
index 5c144dad23c30..009ccb9a45354 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetCacheSuite.scala
@@ -102,18 +102,19 @@ class DatasetCacheSuite extends QueryTest
   test("persist and then groupBy columns asKey, map") {
     val ds = Seq(("a", 10), ("a", 20), ("b", 1), ("b", 2), ("c", 1)).toDS()
     val grouped = ds.groupByKey(_._1)
-    val agged = grouped.mapGroups { (g, iter) => (g, iter.map(_._2).sum) }
-    agged.persist()
+    val aggregated = grouped.mapGroups { (g, iter) => (g, iter.map(_._2).sum) }
+    aggregated.persist()
 
     checkDataset(
-      agged.filter(_._1 == "b"),
+      aggregated.filter(_._1 == "b"),
       ("b", 3))
-    assertCached(agged.filter(_._1 == "b"))
+    assertCached(aggregated.filter(_._1 == "b"))
 
     ds.unpersist(blocking = true)
     assert(ds.storageLevel == StorageLevel.NONE, "The Dataset ds should not be cached.")
-    agged.unpersist(blocking = true)
-    assert(agged.storageLevel == StorageLevel.NONE, "The Dataset agged should not be cached.")
+    aggregated.unpersist(blocking = true)
+    assert(aggregated.storageLevel == StorageLevel.NONE,
+           "The Dataset aggregated should not be cached.")
   }
 
   test("persist and then withColumn") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
index ac51634febc99..8547d96e0f457 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetPrimitiveSuite.scala
@@ -170,23 +170,23 @@ class DatasetPrimitiveSuite extends QueryTest with SharedSparkSession {
   test("groupBy function, map") {
     val ds = Seq(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11).toDS()
     val grouped = ds.groupByKey(_ % 2)
-    val agged = grouped.mapGroups { (g, iter) =>
+    val aggregated = grouped.mapGroups { (g, iter) =>
       val name = if (g == 0) "even" else "odd"
       (name, iter.size)
     }
 
     checkDatasetUnorderly(
-      agged,
+      aggregated,
       ("even", 5), ("odd", 6))
   }
 
   test("groupBy function, flatMap") {
     val ds = Seq("a", "b", "c", "xyz", "hello").toDS()
     val grouped = ds.groupByKey(_.length)
-    val agged = grouped.flatMapGroups { (g, iter) => Iterator(g.toString, iter.mkString) }
+    val aggregated = grouped.flatMapGroups { (g, iter) => Iterator(g.toString, iter.mkString) }
 
     checkDatasetUnorderly(
-      agged,
+      aggregated,
       "1", "abc", "3", "xyz", "5", "hello")
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 953a58760cd5c..67e3ad6a80642 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -528,42 +528,42 @@ class DatasetSuite extends QueryTest
   test("groupBy function, map") {
     val ds = Seq(("a", 10), ("a", 20), ("b", 1), ("b", 2), ("c", 1)).toDS()
     val grouped = ds.groupByKey(v => (v._1, "word"))
-    val agged = grouped.mapGroups { (g, iter) => (g._1, iter.map(_._2).sum) }
+    val aggregated = grouped.mapGroups { (g, iter) => (g._1, iter.map(_._2).sum) }
 
     checkDatasetUnorderly(
-      agged,
+      aggregated,
       ("a", 30), ("b", 3), ("c", 1))
   }
 
   test("groupBy function, flatMap") {
     val ds = Seq(("a", 10), ("a", 20), ("b", 1), ("b", 2), ("c", 1)).toDS()
     val grouped = ds.groupByKey(v => (v._1, "word"))
-    val agged = grouped.flatMapGroups { (g, iter) =>
+    val aggregated = grouped.flatMapGroups { (g, iter) =>
       Iterator(g._1, iter.map(_._2).sum.toString)
     }
 
     checkDatasetUnorderly(
-      agged,
+      aggregated,
       "a", "30", "b", "3", "c", "1")
   }
 
   test("groupBy function, mapValues, flatMap") {
     val ds = Seq(("a", 10), ("a", 20), ("b", 1), ("b", 2), ("c", 1)).toDS()
     val keyValue = ds.groupByKey(_._1).mapValues(_._2)
-    val agged = keyValue.mapGroups { (g, iter) => (g, iter.sum) }
-    checkDataset(agged, ("a", 30), ("b", 3), ("c", 1))
+    val aggregated = keyValue.mapGroups { (g, iter) => (g, iter.sum) }
+    checkDataset(aggregated, ("a", 30), ("b", 3), ("c", 1))
 
     val keyValue1 = ds.groupByKey(t => (t._1, "key")).mapValues(t => (t._2, "value"))
-    val agged1 = keyValue1.mapGroups { (g, iter) => (g._1, iter.map(_._1).sum) }
-    checkDataset(agged1, ("a", 30), ("b", 3), ("c", 1))
+    val aggregated1 = keyValue1.mapGroups { (g, iter) => (g._1, iter.map(_._1).sum) }
+    checkDataset(aggregated1, ("a", 30), ("b", 3), ("c", 1))
   }
 
   test("groupBy function, reduce") {
     val ds = Seq("abc", "xyz", "hello").toDS()
-    val agged = ds.groupByKey(_.length).reduceGroups(_ + _)
+    val aggregated = ds.groupByKey(_.length).reduceGroups(_ + _)
 
     checkDatasetUnorderly(
-      agged,
+      aggregated,
       3 -> "abcxyz", 5 -> "hello")
   }
 
@@ -914,11 +914,11 @@ class DatasetSuite extends QueryTest
 
   test("grouping key and grouped value has field with same name") {
     val ds = Seq(ClassData("a", 1), ClassData("a", 2)).toDS()
-    val agged = ds.groupByKey(d => ClassNullableData(d.a, null)).mapGroups {
+    val aggregated = ds.groupByKey(d => ClassNullableData(d.a, null)).mapGroups {
       (key, values) => key.a + values.map(_.b).sum
     }
 
-    checkDataset(agged, "a3")
+    checkDataset(aggregated, "a3")
   }
 
   test("cogroup's left and right side has field with same name") {
@@ -1286,7 +1286,7 @@ class DatasetSuite extends QueryTest
       Route("b", "c", 6))
     val ds = sparkContext.parallelize(data).toDF.as[Route]
 
-    val grped = ds.map(r => GroupedRoutes(r.src, r.dest, Seq(r)))
+    val grouped = ds.map(r => GroupedRoutes(r.src, r.dest, Seq(r)))
       .groupByKey(r => (r.src, r.dest))
       .reduceGroups { (g1: GroupedRoutes, g2: GroupedRoutes) =>
         GroupedRoutes(g1.src, g1.dest, g1.routes ++ g2.routes)
@@ -1303,7 +1303,7 @@ class DatasetSuite extends QueryTest
     implicit def ordering[GroupedRoutes]: Ordering[GroupedRoutes] =
       (x: GroupedRoutes, y: GroupedRoutes) => x.toString.compareTo(y.toString)
 
-    checkDatasetUnorderly(grped, expected: _*)
+    checkDatasetUnorderly(grouped, expected: _*)
   }
 
   test("SPARK-18189: Fix serialization issue in KeyValueGroupedDataset") {
@@ -1383,7 +1383,7 @@ class DatasetSuite extends QueryTest
               }
             }
           } else {
-            // Local checkpoints dont require checkpoint_dir
+            // Local checkpoints don't require checkpoint_dir
             f
           }
         }
@@ -1474,7 +1474,7 @@ class DatasetSuite extends QueryTest
   }
 
   test("SPARK-18717: code generation works for both scala.collection.Map" +
-    " and scala.collection.imutable.Map") {
+    " and scala.collection.immutable.Map") {
     val ds = Seq(WithImmutableMap("hi", Map(42L -> "foo"))).toDS
     checkDataset(ds.map(t => t), WithImmutableMap("hi", Map(42L -> "foo")))
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
index 9caa4c0377009..d7bbf597ff983 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -454,7 +454,7 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
     assert(e.getCause.isInstanceOf[IllegalArgumentException])
     assert(e.getMessage.contains("You may get a different result due to the upgrading of Spark"))
 
-    // february
+    // February
     val x1 = "2016-02-29"
     val x2 = "2017-02-29"
     val df1 = Seq(x1, x2).toDF("x")
@@ -629,7 +629,7 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
             e.getMessage.contains("You may get a different result due to the upgrading of Spark"))
         }
 
-        // february
+        // February
         val y1 = "2016-02-29"
         val y2 = "2017-02-29"
         val ts5 = Timestamp.valueOf("2016-02-29 00:00:00")
@@ -680,7 +680,7 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
         checkAnswer(df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd mm:HH:ss')"), Seq(
           Row(secs(ts4.getTime)), Row(null), Row(secs(ts3.getTime)), Row(null)))
 
-        // february
+        // February
         val y1 = "2016-02-29"
         val y2 = "2017-02-29"
         val ts5 = Timestamp.valueOf("2016-02-29 00:00:00")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 2eeb729ece3fb..ebfe8bdd7a749 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -1316,7 +1316,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     )
   }
 
-  test("oder by asc by default when not specify ascending and descending") {
+  test("order by asc by default when not specify ascending and descending") {
     checkAnswer(
       sql("SELECT a, b FROM testData2 ORDER BY a desc, b"),
       Seq(Row(3, 1), Row(3, 2), Row(2, 1), Row(2, 2), Row(1, 1), Row(1, 2))
@@ -2812,7 +2812,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     }
   }
 
-  test("SRARK-22266: the same aggregate function was calculated multiple times") {
+  test("SPARK-22266: the same aggregate function was calculated multiple times") {
     val query = "SELECT a, max(b+1), max(b+1) + 1 FROM testData2 GROUP BY a"
     val df = sql(query)
     val physical = df.queryExecution.sparkPlan
@@ -3092,7 +3092,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
           assert(scan.isInstanceOf[ParquetScan])
           assert(scan.asInstanceOf[ParquetScan].pushedFilters === filters)
         case _ =>
-          fail(s"unknow format $format")
+          fail(s"unknown format $format")
       }
     }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index 36e55c0994f18..02c6fba9725d3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -278,18 +278,18 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
     val allCode = importedCode ++ code
     val tempQueries = if (allCode.exists(_.trim.startsWith("--QUERY-DELIMITER"))) {
       // Although the loop is heavy, only used for bracketed comments test.
-      val querys = new ArrayBuffer[String]
+      val queries = new ArrayBuffer[String]
       val otherCodes = new ArrayBuffer[String]
       var tempStr = ""
       var start = false
       for (c <- allCode) {
         if (c.trim.startsWith("--QUERY-DELIMITER-START")) {
           start = true
-          querys ++= splitWithSemicolon(otherCodes.toSeq)
+          queries ++= splitWithSemicolon(otherCodes.toSeq)
           otherCodes.clear()
         } else if (c.trim.startsWith("--QUERY-DELIMITER-END")) {
           start = false
-          querys += s"\n${tempStr.stripSuffix(";")}"
+          queries += s"\n${tempStr.stripSuffix(";")}"
           tempStr = ""
         } else if (start) {
           tempStr += s"\n$c"
@@ -298,9 +298,9 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
         }
       }
       if (otherCodes.nonEmpty) {
-        querys ++= splitWithSemicolon(otherCodes.toSeq)
+        queries ++= splitWithSemicolon(otherCodes.toSeq)
       }
-      querys.toSeq
+      queries.toSeq
     } else {
       splitWithSemicolon(allCode).toSeq
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
index 576ad26505d27..5e1c6ba92803d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
@@ -166,13 +166,13 @@ class SparkSessionExtensionSuite extends SparkFunSuite {
       // inject rule that will run during AQE query stage optimization and will verify that the
       // custom tags were written in the preparation phase
       extensions.injectColumnar(session =>
-        MyColumarRule(MyNewQueryStageRule(), MyNewQueryStageRule()))
+        MyColumnarRule(MyNewQueryStageRule(), MyNewQueryStageRule()))
     }
     withSession(extensions) { session =>
       session.sessionState.conf.setConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED, true)
       assert(session.sessionState.queryStagePrepRules.contains(MyQueryStagePrepRule()))
       assert(session.sessionState.columnarRules.contains(
-        MyColumarRule(MyNewQueryStageRule(), MyNewQueryStageRule())))
+        MyColumnarRule(MyNewQueryStageRule(), MyNewQueryStageRule())))
       import session.sqlContext.implicits._
       val data = Seq((100L), (200L), (300L)).toDF("vals").repartition(1)
       val df = data.selectExpr("vals + 1")
@@ -205,12 +205,12 @@ class SparkSessionExtensionSuite extends SparkFunSuite {
 
     val extensions = create { extensions =>
       extensions.injectColumnar(session =>
-        MyColumarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule()))
+        MyColumnarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule()))
     }
     withSession(extensions) { session =>
       session.sessionState.conf.setConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED, enableAQE)
       assert(session.sessionState.columnarRules.contains(
-        MyColumarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())))
+        MyColumnarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())))
       import session.sqlContext.implicits._
       // perform a join to inject a broadcast exchange
       val left = Seq((1, 50L), (2, 100L), (3, 150L)).toDF("l1", "l2")
@@ -244,12 +244,12 @@ class SparkSessionExtensionSuite extends SparkFunSuite {
       .config(COLUMN_BATCH_SIZE.key, 2)
       .withExtensions { extensions =>
         extensions.injectColumnar(session =>
-          MyColumarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())) }
+          MyColumnarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())) }
       .getOrCreate()
 
     try {
       assert(session.sessionState.columnarRules.contains(
-        MyColumarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())))
+        MyColumnarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())))
       import session.sqlContext.implicits._
 
       val input = Seq((100L), (200L), (300L))
@@ -277,7 +277,7 @@ class SparkSessionExtensionSuite extends SparkFunSuite {
       assert(session.sessionState.functionRegistry
         .lookupFunction(MyExtensions.myFunction._1).isDefined)
       assert(session.sessionState.columnarRules.contains(
-        MyColumarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())))
+        MyColumnarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())))
     } finally {
       stop(session)
     }
@@ -824,7 +824,7 @@ case class MyPostRule() extends Rule[SparkPlan] {
   }
 }
 
-case class MyColumarRule(pre: Rule[SparkPlan], post: Rule[SparkPlan]) extends ColumnarRule {
+case class MyColumnarRule(pre: Rule[SparkPlan], post: Rule[SparkPlan]) extends ColumnarRule {
   override def preColumnarTransitions: Rule[SparkPlan] = pre
   override def postColumnarTransitions: Rule[SparkPlan] = post
 }
@@ -838,7 +838,7 @@ class MyExtensions extends (SparkSessionExtensions => Unit) {
     e.injectOptimizerRule(MyRule)
     e.injectParser(MyParser)
     e.injectFunction(MyExtensions.myFunction)
-    e.injectColumnar(session => MyColumarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule()))
+    e.injectColumnar(session => MyColumnarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule()))
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSTableStats.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSTableStats.scala
index f39b4b8b56c2e..ee9cf7b67225f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSTableStats.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSTableStats.scala
@@ -376,7 +376,7 @@ object TPCDSTableStats {
       "s_closed_date_sk" -> CatalogColumnStat(Some(70L), Some("2450823"), Some("2451313"), Some(296), Some(4), Some(4), None, CatalogColumnStat.VERSION),
       "s_store_id" -> CatalogColumnStat(Some(210L), None, None, Some(0), Some(16), Some(16), None, CatalogColumnStat.VERSION),
       "s_geography_class" -> CatalogColumnStat(Some(1L), None, None, Some(3), Some(7), Some(7), None, CatalogColumnStat.VERSION),
-      "s_tax_precentage" -> CatalogColumnStat(Some(12L), Some("0.00"), Some("0.11"), Some(5), Some(8), Some(8), None, CatalogColumnStat.VERSION)
+      "s_tax_percentage" -> CatalogColumnStat(Some(12L), Some("0.00"), Some("0.11"), Some(5), Some(8), Some(8), None, CatalogColumnStat.VERSION)
     )),
     "store_returns" -> CatalogStatistics(4837573440L, Some(28795080L), Map(
       "sr_item_sk" -> CatalogColumnStat(Some(197284L), Some("1"), Some("204000"), Some(0), Some(8), Some(8), None, CatalogColumnStat.VERSION),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 9a8c3e3cf1a11..b1d61658b8a8b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -285,7 +285,7 @@ class DataSourceV2SQLSuite
     }
   }
 
-  test("CreateTable/RepalceTable: invalid schema if has interval type") {
+  test("CreateTable/ReplaceTable: invalid schema if has interval type") {
     Seq("CREATE", "REPLACE").foreach { action =>
       val e1 = intercept[AnalysisException](
         sql(s"$action TABLE table_name (id int, value interval) USING $v2Format"))
@@ -1360,9 +1360,9 @@ class DataSourceV2SQLSuite
 
   test("ShowNamespaces: default v2 catalog doesn't support namespace") {
     spark.conf.set(
-      "spark.sql.catalog.testcat_no_namspace",
+      "spark.sql.catalog.testcat_no_namespace",
       classOf[BasicInMemoryTableCatalog].getName)
-    spark.conf.set(SQLConf.DEFAULT_CATALOG.key, "testcat_no_namspace")
+    spark.conf.set(SQLConf.DEFAULT_CATALOG.key, "testcat_no_namespace")
 
     val exception = intercept[AnalysisException] {
       sql("SHOW NAMESPACES")
@@ -1373,11 +1373,11 @@ class DataSourceV2SQLSuite
 
   test("ShowNamespaces: v2 catalog doesn't support namespace") {
     spark.conf.set(
-      "spark.sql.catalog.testcat_no_namspace",
+      "spark.sql.catalog.testcat_no_namespace",
       classOf[BasicInMemoryTableCatalog].getName)
 
     val exception = intercept[AnalysisException] {
-      sql("SHOW NAMESPACES in testcat_no_namspace")
+      sql("SHOW NAMESPACES in testcat_no_namespace")
     }
 
     assert(exception.getMessage.contains("does not support namespaces"))
@@ -2268,7 +2268,7 @@ class DataSourceV2SQLSuite
 
     val e = intercept[AnalysisException] {
       // Since the following multi-part name starts with `globalTempDB`, it is resolved to
-      // the session catalog, not the `gloabl_temp` v2 catalog.
+      // the session catalog, not the `global_temp` v2 catalog.
       sql(s"CREATE TABLE $globalTempDB.ns1.ns2.tbl (id bigint, data string) USING json")
     }
     assert(e.message.contains(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLWindowFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLWindowFunctionSuite.scala
index 67ec1028f1998..eec396b2e3998 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLWindowFunctionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLWindowFunctionSuite.scala
@@ -372,7 +372,7 @@ class SQLWindowFunctionSuite extends QueryTest with SharedSparkSession {
     spark.catalog.dropTempView("nums")
   }
 
-  test("window function: mutiple window expressions specified by range in a single expression") {
+  test("window function: multiple window expressions specified by range in a single expression") {
     val nums = sparkContext.parallelize(1 to 10).map(x => (x, x % 2)).toDF("x", "y")
     nums.createOrReplaceTempView("nums")
     withTempView("nums") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
index 1a826c00c81f2..81ba09f206b92 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
@@ -184,7 +184,7 @@ class SparkSqlParserSuite extends AnalysisTest {
     intercept("REFRESH", "Resource paths cannot be empty in REFRESH statements")
   }
 
-  test("SPARK-33118 CREATE TMEPORARY TABLE with LOCATION") {
+  test("SPARK-33118 CREATE TEMPORARY TABLE with LOCATION") {
     assertEqual("CREATE TEMPORARY TABLE t USING parquet OPTIONS (path '/data/tmp/testspark1')",
       CreateTempViewUsing(TableIdentifier("t", None), None, false, false, "parquet",
         Map("path" -> "/data/tmp/testspark1")))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
index fe40d7dce344d..eb5643df4c752 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
@@ -398,8 +398,8 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
     // Case2: The parent of a LocalTableScanExec supports WholeStageCodegen.
     // In this case, the LocalTableScanExec should be within a WholeStageCodegen domain
     // and no more InputAdapter is inserted as the direct parent of the LocalTableScanExec.
-    val aggedDF = Seq(1, 2, 3).toDF.groupBy("value").sum()
-    val executedPlan = aggedDF.queryExecution.executedPlan
+    val aggregatedDF = Seq(1, 2, 3).toDF.groupBy("value").sum()
+    val executedPlan = aggregatedDF.queryExecution.executedPlan
 
     // HashAggregateExec supports WholeStageCodegen and it's the parent of
     // LocalTableScanExec so LocalTableScanExec should be within a WholeStageCodegen domain.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index 45ba2202d83d3..69f1565c2f8de 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -755,9 +755,9 @@ class AdaptiveQueryExecSuite
         Utils.deleteRecursively(tableDir)
         df1.write.parquet(tableDir.getAbsolutePath)
 
-        val agged = spark.table("bucketed_table").groupBy("i").count()
+        val aggregated = spark.table("bucketed_table").groupBy("i").count()
         val error = intercept[Exception] {
-          agged.count()
+          aggregated.count()
         }
         assert(error.getCause().toString contains "Invalid bucket file")
         assert(error.getSuppressed.size === 0)
@@ -962,9 +962,9 @@ class AdaptiveQueryExecSuite
       withSQLConf(SQLConf.UI_EXPLAIN_MODE.key -> mode,
           SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
           SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80") {
-        val dfApdaptive = sql("SELECT * FROM testData JOIN testData2 ON key = a WHERE value = '1'")
+        val dfAdaptive = sql("SELECT * FROM testData JOIN testData2 ON key = a WHERE value = '1'")
         try {
-          checkAnswer(dfApdaptive, Row(1, "1", 1, 1) :: Row(1, "1", 1, 2) :: Nil)
+          checkAnswer(dfAdaptive, Row(1, "1", 1, 1) :: Row(1, "1", 1, 2) :: Nil)
           spark.sparkContext.listenerBus.waitUntilEmpty()
           assert(checkDone)
         } finally {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala
index 1e6e59456c887..d861bbbf67b1c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala
@@ -1210,7 +1210,7 @@ class ArrowConvertersSuite extends SharedSparkSession {
 
   testQuietly("interval is unsupported for arrow") {
     val e = intercept[SparkException] {
-      calenderIntervalData.toDF().toArrowBatchRdd.collect()
+      calendarIntervalData.toDF().toArrowBatchRdd.collect()
     }
 
     assert(e.getCause.isInstanceOf[UnsupportedOperationException])
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index b3cd9f1057a70..82d3e2dfe2212 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -549,9 +549,9 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     import testImplicits._
     val df = sparkContext.parallelize(1 to 10).map(i => (i, i.toString)).toDF("num", "str")
 
-    // Case 1: with partitioning columns but no schema: Option("inexistentColumns")
+    // Case 1: with partitioning columns but no schema: Option("nonexistentColumns")
     // Case 2: without schema and partitioning columns: None
-    Seq(Option("inexistentColumns"), None).foreach { partitionCols =>
+    Seq(Option("nonexistentColumns"), None).foreach { partitionCols =>
       withTempPath { pathToPartitionedTable =>
         df.write.format("parquet").partitionBy("num")
           .save(pathToPartitionedTable.getCanonicalPath)
@@ -589,9 +589,9 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     import testImplicits._
     val df = sparkContext.parallelize(1 to 10).map(i => (i, i.toString)).toDF("num", "str")
 
-    // Case 1: with partitioning columns but no schema: Option("inexistentColumns")
+    // Case 1: with partitioning columns but no schema: Option("nonexistentColumns")
     // Case 2: without schema and partitioning columns: None
-    Seq(Option("inexistentColumns"), None).foreach { partitionCols =>
+    Seq(Option("nonexistentColumns"), None).foreach { partitionCols =>
       withTempPath { pathToNonPartitionedTable =>
         df.write.format("parquet").save(pathToNonPartitionedTable.getCanonicalPath)
         checkSchemaInCreatedDataSourceTable(
@@ -608,7 +608,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     import testImplicits._
     val df = sparkContext.parallelize(1 to 10).map(i => (i, i.toString)).toDF("num", "str")
 
-    // Case 1: with partitioning columns but no schema: Option("inexistentColumns")
+    // Case 1: with partitioning columns but no schema: Option("nonexistentColumns")
     // Case 2: without schema and partitioning columns: None
     Seq(Option("num"), None).foreach { partitionCols =>
       withTempPath { pathToNonPartitionedTable =>
@@ -1910,7 +1910,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
              |OPTIONS (
              |  path '${tempDir.getCanonicalPath}'
              |)
-             |CLUSTERED BY (inexistentColumnA) SORTED BY (inexistentColumnB) INTO 2 BUCKETS
+             |CLUSTERED BY (nonexistentColumnA) SORTED BY (nonexistentColumnB) INTO 2 BUCKETS
            """.stripMargin)
         }
         assert(e.message == "Cannot specify bucketing information if the table schema is not " +
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index 5147a8485ea25..758540f1a42f5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -1187,26 +1187,26 @@ class PlanResolutionSuite extends AnalysisTest {
     )
   }
 
-  DSV2ResolutionTests.foreach { case (sql, isSessionCatlog) =>
+  DSV2ResolutionTests.foreach { case (sql, isSessionCatalog) =>
     test(s"Data source V2 relation resolution '$sql'") {
       val parsed = parseAndResolve(sql, withDefault = true)
-      val catlogIdent = if (isSessionCatlog) v2SessionCatalog else testCat
-      val tableIdent = if (isSessionCatlog) "v2Table" else "tab"
+      val catalogIdent = if (isSessionCatalog) v2SessionCatalog else testCat
+      val tableIdent = if (isSessionCatalog) "v2Table" else "tab"
       parsed match {
         case AlterTable(_, _, r: DataSourceV2Relation, _) =>
-          assert(r.catalog.exists(_ == catlogIdent))
+          assert(r.catalog.exists(_ == catalogIdent))
           assert(r.identifier.exists(_.name() == tableIdent))
         case Project(_, AsDataSourceV2Relation(r)) =>
-          assert(r.catalog.exists(_ == catlogIdent))
+          assert(r.catalog.exists(_ == catalogIdent))
           assert(r.identifier.exists(_.name() == tableIdent))
         case AppendData(r: DataSourceV2Relation, _, _, _) =>
-          assert(r.catalog.exists(_ == catlogIdent))
+          assert(r.catalog.exists(_ == catalogIdent))
           assert(r.identifier.exists(_.name() == tableIdent))
         case DescribeRelation(r: ResolvedTable, _, _) =>
-          assert(r.catalog == catlogIdent)
+          assert(r.catalog == catalogIdent)
           assert(r.identifier.name() == tableIdent)
         case ShowTableProperties(r: ResolvedTable, _) =>
-          assert(r.catalog == catlogIdent)
+          assert(r.catalog == catalogIdent)
           assert(r.identifier.name() == tableIdent)
         case ShowTablePropertiesCommand(t: TableIdentifier, _) =>
           assert(t.identifier == tableIdent)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
index dc97b7a55ee9a..6ba3d2723412b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
@@ -141,14 +141,14 @@ class DataSourceSuite extends SharedSparkSession with PrivateMethodTester {
   }
 
   test("Data source options should be propagated in method checkAndGlobPathIfNecessary") {
-    val dataSourceOptions = Map("fs.defaultFS" -> "nonexistsFs://nonexistsFs")
+    val dataSourceOptions = Map("fs.defaultFS" -> "nonexistentFs://nonexistentFs")
     val dataSource = DataSource(spark, "parquet", Seq("/path3"), options = dataSourceOptions)
     val checkAndGlobPathIfNecessary = PrivateMethod[Seq[Path]]('checkAndGlobPathIfNecessary)
 
     val message = intercept[java.io.IOException] {
       dataSource invokePrivate checkAndGlobPathIfNecessary(false, false)
     }.getMessage
-    val expectMessage = "No FileSystem for scheme nonexistsFs"
+    val expectMessage = "No FileSystem for scheme nonexistentFs"
     assert(message.filterNot(Set(':', '"').contains) == expectMessage)
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
index 2b5cb27d59ad9..c90732183cb7a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
@@ -623,9 +623,9 @@ abstract class SchemaPruningSuite
       spark.read.format(dataSourceName).schema(schema).load(path + "/contacts")
         .createOrReplaceTempView("contacts")
 
-      val departmentScahem = "`depId` INT,`depName` STRING,`contactId` INT, " +
+      val departmentSchema = "`depId` INT,`depName` STRING,`contactId` INT, " +
         "`employer` STRUCT<`id`: INT, `company`: STRUCT<`name`: STRING, `address`: STRING>>"
-      spark.read.format(dataSourceName).schema(departmentScahem).load(path + "/departments")
+      spark.read.format(dataSourceName).schema(departmentSchema).load(path + "/departments")
         .createOrReplaceTempView("departments")
 
       testThunk
@@ -651,9 +651,9 @@ abstract class SchemaPruningSuite
       spark.read.format(dataSourceName).schema(schema).load(path + "/contacts")
         .createOrReplaceTempView("contacts")
 
-      val departmentScahem = "`depId` INT,`depName` STRING,`contactId` INT, " +
+      val departmentSchema = "`depId` INT,`depName` STRING,`contactId` INT, " +
         "`employer` STRUCT<`id`: INT, `company`: STRUCT<`name`: STRING, `address`: STRING>>"
-      spark.read.format(dataSourceName).schema(departmentScahem).load(path + "/departments")
+      spark.read.format(dataSourceName).schema(departmentSchema).load(path + "/departments")
         .createOrReplaceTempView("departments")
 
       testThunk
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
index 8c5f7bed7c50d..2fe5953cbe12e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
@@ -183,7 +183,7 @@ class ParquetInteroperabilitySuite extends ParquetCompatibilityTest with SharedS
                 val oneBlockColumnMeta = oneBlockMeta.getColumns().get(0)
                 // This is the important assert.  Column stats are written, but they are ignored
                 // when the data is read back as mentioned above, b/c int96 is unsigned.  This
-                // assert makes sure this holds even if we change parquet versions (if eg. there
+                // assert makes sure this holds even if we change parquet versions (if e.g. there
                 // were ever statistics even on unsigned columns).
                 assert(!oneBlockColumnMeta.getStatistics.hasNonNullValue)
               }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
index 5c41614c45b6f..400f4d8e1b156 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
@@ -1157,7 +1157,7 @@ class ParquetV1PartitionDiscoverySuite extends ParquetPartitionDiscoverySuite {
   test("SPARK-21463: MetadataLogFileIndex should respect userSpecifiedSchema for partition cols") {
     withTempDir { tempDir =>
       val output = new File(tempDir, "output").toString
-      val checkpoint = new File(tempDir, "chkpoint").toString
+      val checkpoint = new File(tempDir, "checkpoint").toString
       try {
         val stream = MemoryStream[(String, Int)]
         val df = stream.toDS().toDF("time", "value")
@@ -1303,7 +1303,7 @@ class ParquetV2PartitionDiscoverySuite extends ParquetPartitionDiscoverySuite {
   test("SPARK-21463: MetadataLogFileIndex should respect userSpecifiedSchema for partition cols") {
     withTempDir { tempDir =>
       val output = new File(tempDir, "output").toString
-      val checkpoint = new File(tempDir, "chkpoint").toString
+      val checkpoint = new File(tempDir, "checkpoint").toString
       try {
         val stream = MemoryStream[(String, Int)]
         val df = stream.toDS().toDF("time", "value")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
index 05d305a9b52ba..8f85fe3c52583 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -857,7 +857,7 @@ class ParquetV1QuerySuite extends ParquetQuerySuite {
         val df = spark.range(10).select(Seq.tabulate(11) {i => ('id + i).as(s"c$i")} : _*)
         df.write.mode(SaveMode.Overwrite).parquet(path)
 
-        // donot return batch, because whole stage codegen is disabled for wide table (>200 columns)
+        // do not return batch - whole stage codegen is disabled for wide table (>200 columns)
         val df2 = spark.read.parquet(path)
         val fileScan2 = df2.queryExecution.sparkPlan.find(_.isInstanceOf[FileSourceScanExec]).get
         assert(!fileScan2.asInstanceOf[FileSourceScanExec].supportsColumnar)
@@ -890,7 +890,7 @@ class ParquetV2QuerySuite extends ParquetQuerySuite {
         val df = spark.range(10).select(Seq.tabulate(11) {i => ('id + i).as(s"c$i")} : _*)
         df.write.mode(SaveMode.Overwrite).parquet(path)
 
-        // donot return batch, because whole stage codegen is disabled for wide table (>200 columns)
+        // do not return batch - whole stage codegen is disabled for wide table (>200 columns)
         val df2 = spark.read.parquet(path)
         val fileScan2 = df2.queryExecution.sparkPlan.find(_.isInstanceOf[BatchScanExec]).get
         val parquetScan2 = fileScan2.asInstanceOf[BatchScanExec].scan.asInstanceOf[ParquetScan]
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala
index 296cbc3f3ad52..061799f439e5b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala
@@ -60,7 +60,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
       case other => fail(other.toString)
     }
 
-    // Both sides are PartitioningCollection, but left side cannot be reorderd to match
+    // Both sides are PartitioningCollection, but left side cannot be reordered to match
     // and it should fall back to the right side.
     val smjExec3 = SortMergeJoinExec(
       exprA :: exprC :: Nil, exprB :: exprA :: Nil, Inner, None, plan1, plan1)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index b4f921efcac81..21d17f40abb34 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -181,7 +181,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
           assert(probes.toDouble > 1.0)
         } else {
           val mainValue = probes.split("\n").apply(1).stripPrefix("(").stripSuffix(")")
-          // Extract min, med, max from the string and strip off everthing else.
+          // Extract min, med, max from the string and strip off everything else.
           val index = mainValue.indexOf(" (", 0)
           mainValue.slice(0, index).split(", ").foreach {
             probe => assert(probe.toDouble > 1.0)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala
index 67dd88cbab63b..980d532dd4779 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala
@@ -199,7 +199,7 @@ class HDFSMetadataLogSuite extends SharedSparkSession {
     intercept[IllegalStateException](verifyBatchIds(Seq(2, 3, 4), Some(1L), Some(5L)))
     intercept[IllegalStateException](verifyBatchIds(Seq(1, 2, 4, 5), Some(1L), Some(5L)))
 
-    // Related to SPARK-26629, this capatures the behavior for verifyBatchIds when startId > endId
+    // Related to SPARK-26629, this captures the behavior for verifyBatchIds when startId > endId
     intercept[IllegalStateException](verifyBatchIds(Seq(), Some(2L), Some(1L)))
     intercept[AssertionError](verifyBatchIds(Seq(2), Some(2L), Some(1L)))
     intercept[AssertionError](verifyBatchIds(Seq(1), Some(2L), Some(1L)))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SparkPlanInfoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SparkPlanInfoSuite.scala
index a702e00ff9f92..dfc64a41d9f86 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SparkPlanInfoSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SparkPlanInfoSuite.scala
@@ -24,10 +24,10 @@ class SparkPlanInfoSuite extends SharedSparkSession{
 
   import testImplicits._
 
-  def vaidateSparkPlanInfo(sparkPlanInfo: SparkPlanInfo): Unit = {
+  def validateSparkPlanInfo(sparkPlanInfo: SparkPlanInfo): Unit = {
     sparkPlanInfo.nodeName match {
       case "InMemoryTableScan" => assert(sparkPlanInfo.children.length == 1)
-      case _ => sparkPlanInfo.children.foreach(vaidateSparkPlanInfo)
+      case _ => sparkPlanInfo.children.foreach(validateSparkPlanInfo)
     }
   }
 
@@ -39,6 +39,6 @@ class SparkPlanInfoSuite extends SharedSparkSession{
 
     val planInfoResult = SparkPlanInfo.fromSparkPlan(dfWithCache.queryExecution.executedPlan)
 
-    vaidateSparkPlanInfo(planInfoResult)
+    validateSparkPlanInfo(planInfoResult)
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala
index 567524ac75c2e..13b22dba1168b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala
@@ -108,7 +108,7 @@ class ExecutorSideSQLConfSuite extends SparkFunSuite with SQLTestUtils {
           .queryExecution.executedPlan)
         assert(res.length == 2)
         assert(res.forall { case (_, code, _) =>
-          (code.contains("* Codegend pipeline") == flag) &&
+          (code.contains("* Codegened pipeline") == flag) &&
             (code.contains("// input[") == flag)
         })
       }
@@ -175,7 +175,7 @@ class ExecutorSideSQLConfSuite extends SparkFunSuite with SQLTestUtils {
         df.hint("broadcast")
       }
 
-      // set local propert and assert
+      // set local property and assert
       val df2 = generateBroadcastDataFrame(confKey, confValue1)
       spark.sparkContext.setLocalProperty(confKey, confValue1)
       val checks = df1.join(df2).collect()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index f0b19071a969b..ede5fe538a028 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -1418,7 +1418,7 @@ class JDBCSuite extends QueryTest
   }
 
   test("SPARK-24327 verify and normalize a partition column based on a JDBC resolved schema") {
-    def testJdbcParitionColumn(partColName: String, expectedColumnName: String): Unit = {
+    def testJdbcPartitionColumn(partColName: String, expectedColumnName: String): Unit = {
       val df = spark.read.format("jdbc")
         .option("url", urlWithUserAndPass)
         .option("dbtable", "TEST.PARTITION")
@@ -1439,16 +1439,16 @@ class JDBCSuite extends QueryTest
       }
     }
 
-    testJdbcParitionColumn("THEID", "THEID")
-    testJdbcParitionColumn("\"THEID\"", "THEID")
+    testJdbcPartitionColumn("THEID", "THEID")
+    testJdbcPartitionColumn("\"THEID\"", "THEID")
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
-      testJdbcParitionColumn("ThEiD", "THEID")
+      testJdbcPartitionColumn("ThEiD", "THEID")
     }
-    testJdbcParitionColumn("THE ID", "THE ID")
+    testJdbcPartitionColumn("THE ID", "THE ID")
 
     def testIncorrectJdbcPartitionColumn(partColName: String): Unit = {
       val errMsg = intercept[AnalysisException] {
-        testJdbcParitionColumn(partColName, "THEID")
+        testJdbcPartitionColumn(partColName, "THEID")
       }.getMessage
       assert(errMsg.contains(s"User-defined partition column $partColName not found " +
         "in the JDBC relation:"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
index 0ff9303421ade..4ae8cdbeb4f1e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
@@ -639,13 +639,14 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils {
     withTable("bucketed_table") {
       df1.write.format("parquet").bucketBy(8, "i", "j").saveAsTable("bucketed_table")
       val tbl = spark.table("bucketed_table")
-      val agged = tbl.groupBy("i", "j").agg(max("k"))
+      val aggregated = tbl.groupBy("i", "j").agg(max("k"))
 
       checkAnswer(
-        agged.sort("i", "j"),
+        aggregated.sort("i", "j"),
         df1.groupBy("i", "j").agg(max("k")).sort("i", "j"))
 
-      assert(agged.queryExecution.executedPlan.find(_.isInstanceOf[ShuffleExchangeExec]).isEmpty)
+      assert(
+        aggregated.queryExecution.executedPlan.find(_.isInstanceOf[ShuffleExchangeExec]).isEmpty)
     }
   }
 
@@ -679,13 +680,14 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils {
     withTable("bucketed_table") {
       df1.write.format("parquet").bucketBy(8, "i").saveAsTable("bucketed_table")
       val tbl = spark.table("bucketed_table")
-      val agged = tbl.groupBy("i", "j").agg(max("k"))
+      val aggregated = tbl.groupBy("i", "j").agg(max("k"))
 
       checkAnswer(
-        agged.sort("i", "j"),
+        aggregated.sort("i", "j"),
         df1.groupBy("i", "j").agg(max("k")).sort("i", "j"))
 
-      assert(agged.queryExecution.executedPlan.find(_.isInstanceOf[ShuffleExchangeExec]).isEmpty)
+      assert(
+        aggregated.queryExecution.executedPlan.find(_.isInstanceOf[ShuffleExchangeExec]).isEmpty)
     }
   }
 
@@ -806,9 +808,9 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils {
       Utils.deleteRecursively(tableDir)
       df1.write.parquet(tableDir.getAbsolutePath)
 
-      val agged = spark.table("bucketed_table").groupBy("i").count()
+      val aggregated = spark.table("bucketed_table").groupBy("i").count()
       val error = intercept[Exception] {
-        agged.count()
+        aggregated.count()
       }
 
       assert(error.getCause().toString contains "Invalid bucket file")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
index 9464f7e4c1241..9a7c7e0edc409 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
@@ -234,7 +234,7 @@ class CreateTableAsSelectSuite extends DataSourceTest with SharedSparkSession {
     }
   }
 
-  test("create table using as select - with overriden max number of buckets") {
+  test("create table using as select - with overridden max number of buckets") {
     def createTableSql(numBuckets: Int): String =
       s"""
          |CREATE TABLE t USING PARQUET
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
index ca3e714665818..0da6b487e31ee 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
@@ -359,7 +359,7 @@ class TableScanSuite extends DataSourceTest with SharedSparkSession {
       val schemaNotMatch = intercept[Exception] {
         sql(
           s"""
-             |CREATE $tableType relationProvierWithSchema (i int)
+             |CREATE $tableType relationProviderWithSchema (i int)
              |USING org.apache.spark.sql.sources.SimpleScanSource
              |OPTIONS (
              |  From '1',
@@ -373,7 +373,7 @@ class TableScanSuite extends DataSourceTest with SharedSparkSession {
       val schemaNeeded = intercept[Exception] {
         sql(
           s"""
-             |CREATE $tableType schemaRelationProvierWithoutSchema
+             |CREATE $tableType schemaRelationProviderWithoutSchema
              |USING org.apache.spark.sql.sources.AllDataTypesScanSource
              |OPTIONS (
              |  From '1',
@@ -387,7 +387,7 @@ class TableScanSuite extends DataSourceTest with SharedSparkSession {
 
   test("read the data source tables that do not extend SchemaRelationProvider") {
     Seq("TEMPORARY VIEW", "TABLE").foreach { tableType =>
-      val tableName = "relationProvierWithSchema"
+      val tableName = "relationProviderWithSchema"
       withTable (tableName) {
         sql(
           s"""
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index 3c74e316f260e..b240d2058a018 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -1946,9 +1946,9 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
   test("SourceFileArchiver - fail when base archive path matches source pattern") {
     val fakeFileSystem = new FakeFileSystem("fake")
 
-    def assertThrowIllegalArgumentException(sourcePatttern: Path, baseArchivePath: Path): Unit = {
+    def assertThrowIllegalArgumentException(sourcePattern: Path, baseArchivePath: Path): Unit = {
       intercept[IllegalArgumentException] {
-        new SourceFileArchiver(fakeFileSystem, sourcePatttern, fakeFileSystem, baseArchivePath)
+        new SourceFileArchiver(fakeFileSystem, sourcePattern, fakeFileSystem, baseArchivePath)
       }
     }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index e64d5f6f3587e..ed284df10aced 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -1064,13 +1064,13 @@ class StreamSuite extends StreamTest {
   }
 
   test("SPARK-30657: streaming limit should not apply on limits on state subplans") {
-    val streanData = MemoryStream[Int]
-    val streamingDF = streanData.toDF().toDF("value")
+    val streamData = MemoryStream[Int]
+    val streamingDF = streamData.toDF().toDF("value")
     val staticDF = spark.createDataset(Seq(1)).toDF("value").orderBy("value")
     testStream(streamingDF.join(staticDF.limit(1), "value"))(
-      AddData(streanData, 1, 2, 3),
+      AddData(streamData, 1, 2, 3),
       CheckAnswer(Row(1)),
-      AddData(streanData, 1, 3, 5),
+      AddData(streamData, 1, 3, 5),
       CheckAnswer(Row(1), Row(1)))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
index 0296366f3578b..9cf649605ed1c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
@@ -107,12 +107,12 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
   }
 
   test("read: read table without streaming capability support") {
-    val tableIdentifer = "testcat.table_name"
+    val tableIdentifier = "testcat.table_name"
 
-    spark.sql(s"CREATE TABLE $tableIdentifer (id bigint, data string) USING foo")
+    spark.sql(s"CREATE TABLE $tableIdentifier (id bigint, data string) USING foo")
 
     intercept[AnalysisException] {
-      spark.readStream.table(tableIdentifer)
+      spark.readStream.table(tableIdentifier)
     }.message.contains("does not support either micro-batch or continuous scan")
   }
 
@@ -213,7 +213,7 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
   }
 
   test("write: write to non-exist table with custom catalog") {
-    val tableIdentifier = "testcat.nonexisttable"
+    val tableIdentifier = "testcat.nonexistenttable"
 
     withTable(tableIdentifier) {
       runTestWithStreamAppend(tableIdentifier)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala
index c51faaf10f5dd..a1fd4a0215b1f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala
@@ -169,10 +169,10 @@ private[sql] trait SQLTestData { self =>
     rdd
   }
 
-  protected lazy val calenderIntervalData: RDD[IntervalData] = {
+  protected lazy val calendarIntervalData: RDD[IntervalData] = {
     val rdd = spark.sparkContext.parallelize(
       IntervalData(new CalendarInterval(1, 1, 1)) :: Nil)
-    rdd.toDF().createOrReplaceTempView("calenderIntervalData")
+    rdd.toDF().createOrReplaceTempView("calendarIntervalData")
     rdd
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
index cfc92a780308d..ed2e309fa075a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 trait SharedSparkSession extends SQLTestUtils with SharedSparkSessionBase {
 
   /**
-   * Suites extending [[SharedSparkSession]] are sharing resources (eg. SparkSession) in their
+   * Suites extending [[SharedSparkSession]] are sharing resources (e.g. SparkSession) in their
    * tests. That trait initializes the spark session in its [[beforeAll()]] implementation before
    * the automatic thread snapshot is performed, so the audit code could fail to report threads
    * leaked by that shared session.

From c001dd49e4e9bb42f18618afe710e401b2df3afb Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Tue, 8 Dec 2020 10:43:41 -0800
Subject: [PATCH 0706/1009] [SPARK-33675][INFRA][FOLLOWUP] Schedule branch-3.1
 snapshot at master branch

### What changes were proposed in this pull request?

Currently, `master`/`branch-3.0`/`branch-2.4` snapshot publishing is successfully migrated from Jenkins to `GitHub Action`.

- https://github.com/apache/spark/actions?query=workflow%3A%22Publish+Snapshot%22

This PR aims to schedule `branch-3.1` snapshot at `master` branch.

### Why are the changes needed?

This is because it turns out that `GitHub Action Schedule` works only at `master` branch. (the default branch).
- https://docs.github.com/en/free-pro-teamlatest/actions/reference/events-that-trigger-workflows#scheduled-events

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

The matrix triggering is tested at the forked branch.
- https://github.com/dongjoon-hyun/spark/runs/1519015974

Closes #30674 from dongjoon-hyun/SPARK-SCHEDULE-3.1.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .github/workflows/publish_snapshot.yml | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/.github/workflows/publish_snapshot.yml b/.github/workflows/publish_snapshot.yml
index 9871680f73891..504d702fd1f22 100644
--- a/.github/workflows/publish_snapshot.yml
+++ b/.github/workflows/publish_snapshot.yml
@@ -7,9 +7,17 @@ on:
 jobs:
   publish-snapshot:
     runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        branch:
+          - master
+          - branch-3.1
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@master
+      with:
+        ref: ${{ matrix.branch }}
     - name: Cache Maven local repository
       uses: actions/cache@v2
       with:
@@ -27,4 +35,5 @@ jobs:
         ASF_PASSWORD: ${{ secrets.NEXUS_PW }}
         GPG_KEY: "not_used"
         GPG_PASSPHRASE: "not_used"
+        GIT_REF: ${{ matrix.branch }}
       run: ./dev/create-release/release-build.sh publish-snapshot

From 6fd234503cf1e85715ccd3bda42f29dae1daa71b Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 8 Dec 2020 11:41:35 -0800
Subject: [PATCH 0707/1009] [SPARK-32110][SQL] normalize special floating
 numbers in HyperLogLog++

### What changes were proposed in this pull request?

Currently, Spark treats 0.0 and -0.0 semantically equal, while it still retains the difference between them so that users can see -0.0 when displaying the data set.

The comparison expressions in Spark take care of the special floating numbers and implement the correct semantic. However, Spark doesn't always use these comparison expressions to compare values, and we need to normalize the special floating numbers before comparing them in these places:
1. GROUP BY
2. join keys
3. window partition keys

This PR fixes one more place that compares values without using comparison expressions: HyperLogLog++

### Why are the changes needed?

Fix the query result

### Does this PR introduce _any_ user-facing change?

Yes, the result of HyperLogLog++ becomes correct now.

### How was this patch tested?

a new test case, and a few more test cases that pass before this PR to improve test coverage.

Closes #30673 from cloud-fan/bug.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../optimizer/NormalizeFloatingNumbers.scala  | 45 +++++-----
 .../util/HyperLogLogPlusPlusHelper.scala      |  8 +-
 .../catalyst/expressions/PredicateSuite.scala | 90 +++++++++++++++++++
 .../aggregate/HyperLogLogPlusPlusSuite.scala  | 24 ++++-
 4 files changed, 144 insertions(+), 23 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala
index 4434c29cbb3c4..ac8766cd74367 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala
@@ -143,6 +143,28 @@ object NormalizeFloatingNumbers extends Rule[LogicalPlan] {
 
     case _ => throw new IllegalStateException(s"fail to normalize $expr")
   }
+
+  val FLOAT_NORMALIZER: Any => Any = (input: Any) => {
+    val f = input.asInstanceOf[Float]
+    if (f.isNaN) {
+      Float.NaN
+    } else if (f == -0.0f) {
+      0.0f
+    } else {
+      f
+    }
+  }
+
+  val DOUBLE_NORMALIZER: Any => Any = (input: Any) => {
+    val d = input.asInstanceOf[Double]
+    if (d.isNaN) {
+      Double.NaN
+    } else if (d == -0.0d) {
+      0.0d
+    } else {
+      d
+    }
+  }
 }
 
 case class NormalizeNaNAndZero(child: Expression) extends UnaryExpression with ExpectsInputTypes {
@@ -152,27 +174,8 @@ case class NormalizeNaNAndZero(child: Expression) extends UnaryExpression with E
   override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(FloatType, DoubleType))
 
   private lazy val normalizer: Any => Any = child.dataType match {
-    case FloatType => (input: Any) => {
-      val f = input.asInstanceOf[Float]
-      if (f.isNaN) {
-        Float.NaN
-      } else if (f == -0.0f) {
-        0.0f
-      } else {
-        f
-      }
-    }
-
-    case DoubleType => (input: Any) => {
-      val d = input.asInstanceOf[Double]
-      if (d.isNaN) {
-        Double.NaN
-      } else if (d == -0.0d) {
-        0.0d
-      } else {
-        d
-      }
-    }
+    case FloatType => NormalizeFloatingNumbers.FLOAT_NORMALIZER
+    case DoubleType => NormalizeFloatingNumbers.DOUBLE_NORMALIZER
   }
 
   override def nullSafeEval(input: Any): Any = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/HyperLogLogPlusPlusHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/HyperLogLogPlusPlusHelper.scala
index ea619c6a7666c..6471a746f2edf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/HyperLogLogPlusPlusHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/HyperLogLogPlusPlusHelper.scala
@@ -22,6 +22,7 @@ import java.util
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.XxHash64Function
+import org.apache.spark.sql.catalyst.optimizer.NormalizeFloatingNumbers.{DOUBLE_NORMALIZER, FLOAT_NORMALIZER}
 import org.apache.spark.sql.types._
 
 // A helper class for HyperLogLogPlusPlus.
@@ -88,7 +89,12 @@ class HyperLogLogPlusPlusHelper(relativeSD: Double) extends Serializable {
    *
    * Variable names in the HLL++ paper match variable names in the code.
    */
-  def update(buffer: InternalRow, bufferOffset: Int, value: Any, dataType: DataType): Unit = {
+  def update(buffer: InternalRow, bufferOffset: Int, _value: Any, dataType: DataType): Unit = {
+    val value = dataType match {
+      case FloatType => FLOAT_NORMALIZER.apply(_value)
+      case DoubleType => DOUBLE_NORMALIZER.apply(_value)
+      case _ => _value
+    }
     // Create the hashed value 'x'.
     val x = XxHash64Function.hash(value, dataType, 42L)
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
index a36baec1a0b99..6f75623dc59ae 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
@@ -554,4 +554,94 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(GreaterThan(Literal(Float.NaN), Literal(Float.NaN)), false)
     checkEvaluation(GreaterThan(Literal(0.0F), Literal(-0.0F)), false)
   }
+
+  test("SPARK-32110: compare special double/float values in array") {
+    def createUnsafeDoubleArray(d: Double): Literal = {
+      Literal(UnsafeArrayData.fromPrimitiveArray(Array(d)), ArrayType(DoubleType))
+    }
+    def createSafeDoubleArray(d: Double): Literal = {
+      Literal(new GenericArrayData(Array(d)), ArrayType(DoubleType))
+    }
+    def createUnsafeFloatArray(d: Double): Literal = {
+      Literal(UnsafeArrayData.fromPrimitiveArray(Array(d.toFloat)), ArrayType(FloatType))
+    }
+    def createSafeFloatArray(d: Double): Literal = {
+      Literal(new GenericArrayData(Array(d.toFloat)), ArrayType(FloatType))
+    }
+    def checkExpr(
+        exprBuilder: (Expression, Expression) => Expression,
+        left: Double,
+        right: Double,
+        expected: Any): Unit = {
+      // test double
+      checkEvaluation(
+        exprBuilder(createUnsafeDoubleArray(left), createUnsafeDoubleArray(right)), expected)
+      checkEvaluation(
+        exprBuilder(createUnsafeDoubleArray(left), createSafeDoubleArray(right)), expected)
+      checkEvaluation(
+        exprBuilder(createSafeDoubleArray(left), createSafeDoubleArray(right)), expected)
+      // test float
+      checkEvaluation(
+        exprBuilder(createUnsafeFloatArray(left), createUnsafeFloatArray(right)), expected)
+      checkEvaluation(
+        exprBuilder(createUnsafeFloatArray(left), createSafeFloatArray(right)), expected)
+      checkEvaluation(
+        exprBuilder(createSafeFloatArray(left), createSafeFloatArray(right)), expected)
+    }
+
+    checkExpr(EqualTo, Double.NaN, Double.NaN, true)
+    checkExpr(EqualTo, Double.NaN, Double.PositiveInfinity, false)
+    checkExpr(EqualTo, 0.0, -0.0, true)
+    checkExpr(GreaterThan, Double.NaN, Double.PositiveInfinity, true)
+    checkExpr(GreaterThan, Double.NaN, Double.NaN, false)
+    checkExpr(GreaterThan, 0.0, -0.0, false)
+  }
+
+  test("SPARK-32110: compare special double/float values in struct") {
+    def createUnsafeDoubleRow(d: Double): Literal = {
+      val dt = new StructType().add("d", "double")
+      val converter = UnsafeProjection.create(dt)
+      val unsafeRow = converter.apply(InternalRow(d))
+      Literal(unsafeRow, dt)
+    }
+    def createSafeDoubleRow(d: Double): Literal = {
+      Literal(InternalRow(d), new StructType().add("d", "double"))
+    }
+    def createUnsafeFloatRow(d: Double): Literal = {
+      val dt = new StructType().add("f", "float")
+      val converter = UnsafeProjection.create(dt)
+      val unsafeRow = converter.apply(InternalRow(d.toFloat))
+      Literal(unsafeRow, dt)
+    }
+    def createSafeFloatRow(d: Double): Literal = {
+      Literal(InternalRow(d.toFloat), new StructType().add("f", "float"))
+    }
+    def checkExpr(
+        exprBuilder: (Expression, Expression) => Expression,
+        left: Double,
+        right: Double,
+        expected: Any): Unit = {
+      // test double
+      checkEvaluation(
+        exprBuilder(createUnsafeDoubleRow(left), createUnsafeDoubleRow(right)), expected)
+      checkEvaluation(
+        exprBuilder(createUnsafeDoubleRow(left), createSafeDoubleRow(right)), expected)
+      checkEvaluation(
+        exprBuilder(createSafeDoubleRow(left), createSafeDoubleRow(right)), expected)
+      // test float
+      checkEvaluation(
+        exprBuilder(createUnsafeFloatRow(left), createUnsafeFloatRow(right)), expected)
+      checkEvaluation(
+        exprBuilder(createUnsafeFloatRow(left), createSafeFloatRow(right)), expected)
+      checkEvaluation(
+        exprBuilder(createSafeFloatRow(left), createSafeFloatRow(right)), expected)
+    }
+
+    checkExpr(EqualTo, Double.NaN, Double.NaN, true)
+    checkExpr(EqualTo, Double.NaN, Double.PositiveInfinity, false)
+    checkExpr(EqualTo, 0.0, -0.0, true)
+    checkExpr(GreaterThan, Double.NaN, Double.PositiveInfinity, true)
+    checkExpr(GreaterThan, Double.NaN, Double.NaN, false)
+    checkExpr(GreaterThan, 0.0, -0.0, false)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlusSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlusSuite.scala
index 98fd04c9cca91..1afccea5aef15 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlusSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlusSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
+import java.lang.{Double => JDouble}
 import java.util.Random
 
 import scala.collection.mutable
@@ -24,7 +25,7 @@ import scala.collection.mutable
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{BoundReference, SpecificInternalRow}
-import org.apache.spark.sql.types.{DataType, IntegerType}
+import org.apache.spark.sql.types.{DataType, DoubleType, IntegerType}
 
 class HyperLogLogPlusPlusSuite extends SparkFunSuite {
 
@@ -153,4 +154,25 @@ class HyperLogLogPlusPlusSuite extends SparkFunSuite {
     // Check if the buffers are equal.
     assert(buffer2 == buffer1a, "Buffers should be equal")
   }
+
+  test("SPARK-32110: add 0.0 and -0.0") {
+    val (hll, input, buffer) = createEstimator(0.05, DoubleType)
+    input.setDouble(0, 0.0)
+    hll.update(buffer, input)
+    input.setDouble(0, -0.0)
+    hll.update(buffer, input)
+    evaluateEstimate(hll, buffer, 1);
+  }
+
+  test("SPARK-32110: add NaN") {
+    val (hll, input, buffer) = createEstimator(0.05, DoubleType)
+    input.setDouble(0, Double.NaN)
+    hll.update(buffer, input)
+    val specialNaN = JDouble.longBitsToDouble(0x7ff1234512345678L)
+    assert(JDouble.isNaN(specialNaN))
+    assert(JDouble.doubleToRawLongBits(Double.NaN) != JDouble.doubleToRawLongBits(specialNaN))
+    input.setDouble(0, specialNaN)
+    hll.update(buffer, input)
+    evaluateEstimate(hll, buffer, 1);
+  }
 }

From 3ac70f169d653f22bd04ec7bb6ebb49696807bb2 Mon Sep 17 00:00:00 2001
From: Nicholas Marion <nmarion@us.ibm.com>
Date: Tue, 8 Dec 2020 12:11:06 -0800
Subject: [PATCH 0708/1009] [SPARK-33695][BUILD] Upgrade to jackson to 2.10.5
 and jackson-databind to 2.10.5.1

### What changes were proposed in this pull request?

Upgrade the jackson dependencies to 2.10.5 and jackson-databind to 2.10.5.1

### Why are the changes needed?

Jackson dependency has vulnerability CVE-2020-25649.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing unit tests.

Closes #30656 from n-marion/SPARK-33695_upgrade-jackson.

Authored-by: Nicholas Marion <nmarion@us.ibm.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 16 ++++++++--------
 dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 16 ++++++++--------
 pom.xml                                 |  5 +++--
 3 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index 401050a60e493..3a54dbd6232e3 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -103,17 +103,17 @@ httpclient/4.5.13//httpclient-4.5.13.jar
 httpcore/4.4.12//httpcore-4.4.12.jar
 istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar
 ivy/2.4.0//ivy-2.4.0.jar
-jackson-annotations/2.10.0//jackson-annotations-2.10.0.jar
+jackson-annotations/2.10.5//jackson-annotations-2.10.5.jar
 jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar
-jackson-core/2.10.0//jackson-core-2.10.0.jar
-jackson-databind/2.10.0//jackson-databind-2.10.0.jar
-jackson-dataformat-yaml/2.10.0//jackson-dataformat-yaml-2.10.0.jar
+jackson-core/2.10.5//jackson-core-2.10.5.jar
+jackson-databind/2.10.5.1//jackson-databind-2.10.5.1.jar
+jackson-dataformat-yaml/2.10.5//jackson-dataformat-yaml-2.10.5.jar
 jackson-datatype-jsr310/2.11.2//jackson-datatype-jsr310-2.11.2.jar
 jackson-jaxrs/1.9.13//jackson-jaxrs-1.9.13.jar
 jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar
-jackson-module-jaxb-annotations/2.10.0//jackson-module-jaxb-annotations-2.10.0.jar
-jackson-module-paranamer/2.10.0//jackson-module-paranamer-2.10.0.jar
-jackson-module-scala_2.12/2.10.0//jackson-module-scala_2.12-2.10.0.jar
+jackson-module-jaxb-annotations/2.10.5//jackson-module-jaxb-annotations-2.10.5.jar
+jackson-module-paranamer/2.10.5//jackson-module-paranamer-2.10.5.jar
+jackson-module-scala_2.12/2.10.5//jackson-module-scala_2.12-2.10.5.jar
 jackson-xc/1.9.13//jackson-xc-1.9.13.jar
 jakarta.activation-api/1.2.1//jakarta.activation-api-1.2.1.jar
 jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar
@@ -220,7 +220,7 @@ shapeless_2.12/2.3.3//shapeless_2.12-2.3.3.jar
 shims/0.9.0//shims-0.9.0.jar
 slf4j-api/1.7.30//slf4j-api-1.7.30.jar
 slf4j-log4j12/1.7.30//slf4j-log4j12-1.7.30.jar
-snakeyaml/1.24//snakeyaml-1.24.jar
+snakeyaml/1.26//snakeyaml-1.26.jar
 snappy-java/1.1.8//snappy-java-1.1.8.jar
 spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar
 spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index b0f8935843281..67bcc7a8ed902 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -102,18 +102,18 @@ httpclient/4.5.13//httpclient-4.5.13.jar
 httpcore/4.4.12//httpcore-4.4.12.jar
 istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar
 ivy/2.4.0//ivy-2.4.0.jar
-jackson-annotations/2.10.0//jackson-annotations-2.10.0.jar
+jackson-annotations/2.10.5//jackson-annotations-2.10.5.jar
 jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar
-jackson-core/2.10.0//jackson-core-2.10.0.jar
-jackson-databind/2.10.0//jackson-databind-2.10.0.jar
-jackson-dataformat-yaml/2.10.0//jackson-dataformat-yaml-2.10.0.jar
+jackson-core/2.10.5//jackson-core-2.10.5.jar
+jackson-databind/2.10.5.1//jackson-databind-2.10.5.1.jar
+jackson-dataformat-yaml/2.10.5//jackson-dataformat-yaml-2.10.5.jar
 jackson-datatype-jsr310/2.11.2//jackson-datatype-jsr310-2.11.2.jar
 jackson-jaxrs-base/2.9.5//jackson-jaxrs-base-2.9.5.jar
 jackson-jaxrs-json-provider/2.9.5//jackson-jaxrs-json-provider-2.9.5.jar
 jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar
-jackson-module-jaxb-annotations/2.10.0//jackson-module-jaxb-annotations-2.10.0.jar
-jackson-module-paranamer/2.10.0//jackson-module-paranamer-2.10.0.jar
-jackson-module-scala_2.12/2.10.0//jackson-module-scala_2.12-2.10.0.jar
+jackson-module-jaxb-annotations/2.10.5//jackson-module-jaxb-annotations-2.10.5.jar
+jackson-module-paranamer/2.10.5//jackson-module-paranamer-2.10.5.jar
+jackson-module-scala_2.12/2.10.5//jackson-module-scala_2.12-2.10.5.jar
 jakarta.activation-api/1.2.1//jakarta.activation-api-1.2.1.jar
 jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar
 jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar
@@ -235,7 +235,7 @@ shapeless_2.12/2.3.3//shapeless_2.12-2.3.3.jar
 shims/0.9.0//shims-0.9.0.jar
 slf4j-api/1.7.30//slf4j-api-1.7.30.jar
 slf4j-log4j12/1.7.30//slf4j-log4j12-1.7.30.jar
-snakeyaml/1.24//snakeyaml-1.24.jar
+snakeyaml/1.26//snakeyaml-1.26.jar
 snappy-java/1.1.8//snappy-java-1.1.8.jar
 spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar
 spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar
diff --git a/pom.xml b/pom.xml
index 364dec688b38b..23eb16a7db472 100644
--- a/pom.xml
+++ b/pom.xml
@@ -169,7 +169,8 @@
     <!-- for now, not running scalafmt as part of default verify pipeline -->
     <scalafmt.skip>true</scalafmt.skip>
     <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
-    <fasterxml.jackson.version>2.10.0</fasterxml.jackson.version>
+    <fasterxml.jackson.version>2.10.5</fasterxml.jackson.version>
+    <fasterxml.jackson-databind.version>2.10.5.1</fasterxml.jackson-databind.version>
     <snappy.version>1.1.8</snappy.version>
     <netlib.java.version>1.1.2</netlib.java.version>
     <commons-codec.version>1.10</commons-codec.version>
@@ -773,7 +774,7 @@
       <dependency>
         <groupId>com.fasterxml.jackson.core</groupId>
         <artifactId>jackson-databind</artifactId>
-        <version>${fasterxml.jackson.version}</version>
+        <version>${fasterxml.jackson-databind.version}</version>
       </dependency>
       <dependency>
         <groupId>com.fasterxml.jackson.core</groupId>

From f021f6d3c72e1c84637798b4ddcb7e208fdfbf46 Mon Sep 17 00:00:00 2001
From: Weichen Xu <weichen.xu@databricks.com>
Date: Wed, 9 Dec 2020 11:18:09 +0800
Subject: [PATCH 0709/1009] [MINOR][ML] Increase Bounded MLOR (without
 regularization) test error tolerance

### What changes were proposed in this pull request?
Improve LogisticRegression test error tolerance

### Why are the changes needed?
When we switch BLAS version, some of the tests will fail due to too strict error tolerance in test.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
N/A

Closes #30587 from WeichenXu123/fix_lor_test.

Authored-by: Weichen Xu <weichen.xu@databricks.com>
Signed-off-by: Weichen Xu <weichen.xu@databricks.com>
---
 .../LogisticRegressionSuite.scala             | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index d0b282db1ece8..d2814b420e017 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -1548,9 +1548,9 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
     val interceptsExpected1 = Vectors.dense(
       1.0000152482448372, 3.591773288423673, 5.079685953744937)
 
-    checkCoefficientsEquivalent(model1.coefficientMatrix, coefficientsExpected1)
+    checkBoundedMLORCoefficientsEquivalent(model1.coefficientMatrix, coefficientsExpected1)
     assert(model1.interceptVector ~== interceptsExpected1 relTol 0.01)
-    checkCoefficientsEquivalent(model2.coefficientMatrix, coefficientsExpected1)
+    checkBoundedMLORCoefficientsEquivalent(model2.coefficientMatrix, coefficientsExpected1)
     assert(model2.interceptVector ~== interceptsExpected1 relTol 0.01)
 
     // Bound constrained optimization with bound on both side.
@@ -1585,9 +1585,9 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
       isTransposed = true)
     val interceptsExpected3 = Vectors.dense(1.0, 2.0, 2.0)
 
-    checkCoefficientsEquivalent(model3.coefficientMatrix, coefficientsExpected3)
+    checkBoundedMLORCoefficientsEquivalent(model3.coefficientMatrix, coefficientsExpected3)
     assert(model3.interceptVector ~== interceptsExpected3 relTol 0.01)
-    checkCoefficientsEquivalent(model4.coefficientMatrix, coefficientsExpected3)
+    checkBoundedMLORCoefficientsEquivalent(model4.coefficientMatrix, coefficientsExpected3)
     assert(model4.interceptVector ~== interceptsExpected3 relTol 0.01)
 
     // Bound constrained optimization with infinite bound on both side.
@@ -1621,9 +1621,9 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
     val interceptsExpected5 = Vectors.dense(
       -2.2231282183460723, 0.3669496747012527, 1.856178543644802)
 
-    checkCoefficientsEquivalent(model5.coefficientMatrix, coefficientsExpected5)
+    checkBoundedMLORCoefficientsEquivalent(model5.coefficientMatrix, coefficientsExpected5)
     assert(model5.interceptVector ~== interceptsExpected5 relTol 0.01)
-    checkCoefficientsEquivalent(model6.coefficientMatrix, coefficientsExpected5)
+    checkBoundedMLORCoefficientsEquivalent(model6.coefficientMatrix, coefficientsExpected5)
     assert(model6.interceptVector ~== interceptsExpected5 relTol 0.01)
   }
 
@@ -1719,9 +1719,9 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
       1.7524631428961193, 1.2292565990448736, 1.3433784431904323, 1.5846063017678864),
       isTransposed = true)
 
-    checkCoefficientsEquivalent(model1.coefficientMatrix, coefficientsExpected)
+    checkBoundedMLORCoefficientsEquivalent(model1.coefficientMatrix, coefficientsExpected)
     assert(model1.interceptVector.toArray === Array.fill(3)(0.0))
-    checkCoefficientsEquivalent(model2.coefficientMatrix, coefficientsExpected)
+    checkBoundedMLORCoefficientsEquivalent(model2.coefficientMatrix, coefficientsExpected)
     assert(model2.interceptVector.toArray === Array.fill(3)(0.0))
   }
 
@@ -2953,16 +2953,17 @@ object LogisticRegressionSuite {
   }
 
   /**
+   * Note: This method is only used in Bounded MLOR (without regularization) test
    * When no regularization is applied, the multinomial coefficients lack identifiability
    * because we do not use a pivot class. We can add any constant value to the coefficients
    * and get the same likelihood. If fitting under bound constrained optimization, we don't
    * choose the mean centered coefficients like what we do for unbound problems, since they
    * may out of the bounds. We use this function to check whether two coefficients are equivalent.
    */
-  def checkCoefficientsEquivalent(coefficients1: Matrix, coefficients2: Matrix): Unit = {
+  def checkBoundedMLORCoefficientsEquivalent(coefficients1: Matrix, coefficients2: Matrix): Unit = {
     coefficients1.colIter.zip(coefficients2.colIter).foreach { case (col1: Vector, col2: Vector) =>
       (col1.asBreeze - col2.asBreeze).toArray.toSeq.sliding(2).foreach {
-        case Seq(v1, v2) => assert(v1 ~= v2 absTol 1E-3)
+        case Seq(v1, v2) => assert(v1 ~= v2 absTol 1E-2)
       }
     }
   }

From 29fed23ba16d580e6247b6e70e9c9eef0698aa95 Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Wed, 9 Dec 2020 05:06:37 +0000
Subject: [PATCH 0710/1009] [SPARK-33703][SQL] Migrate MSCK REPAIR TABLE to use
 UnresolvedTable to resolve the identifier

### What changes were proposed in this pull request?

This PR proposes to migrate `MSCK REPAIR TABLE` to use `UnresolvedTable` to resolve the table identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing).

Note that `MSCK REPAIR TABLE` is not supported for v2 tables.

### Why are the changes needed?

The PR makes the resolution consistent behavior consistent. For example,
```scala
sql("CREATE DATABASE test")
sql("CREATE TABLE spark_catalog.test.t (id bigint, val string) USING csv PARTITIONED BY (id)")
sql("CREATE TEMPORARY VIEW t AS SELECT 2")
sql("USE spark_catalog.test")
sql("MSCK REPAIR TABLE t") // works fine
```
, but after this PR:
```
sql("MSCK REPAIR TABLE t")
org.apache.spark.sql.AnalysisException: t is a temp view. 'MSCK REPAIR TABLE' expects a table; line 1 pos 0
```
, which is the consistent behavior with other commands.

### Does this PR introduce _any_ user-facing change?

After this PR, `MSCK REPAIR TABLE t` in the above example is resolved to a temp view `t` first instead of `spark_catalog.test.t`.

### How was this patch tested?

Updated existing tests.

Closes #30664 from imback82/repair_table_V2.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/catalyst/parser/AstBuilder.scala    | 5 +++--
 .../spark/sql/catalyst/plans/logical/statements.scala    | 5 -----
 .../spark/sql/catalyst/plans/logical/v2Commands.scala    | 7 +++++++
 .../spark/sql/catalyst/parser/DDLParserSuite.scala       | 2 +-
 .../sql/catalyst/analysis/ResolveSessionCatalog.scala    | 7 ++-----
 .../execution/datasources/v2/DataSourceV2Strategy.scala  | 3 +++
 .../spark/sql/connector/DataSourceV2SQLSuite.scala       | 9 +--------
 7 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 89b81ec1d83aa..7787e199d3770 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3547,7 +3547,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
   }
 
   /**
-   * Create a [[RepairTableStatement]].
+   * Create a [[RepairTable]].
    *
    * For example:
    * {{{
@@ -3555,7 +3555,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    * }}}
    */
   override def visitRepairTable(ctx: RepairTableContext): LogicalPlan = withOrigin(ctx) {
-    RepairTableStatement(visitMultipartIdentifier(ctx.multipartIdentifier()))
+    RepairTable(
+      UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier()), "MSCK REPAIR TABLE"))
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
index c4ac8ea8f2e69..b731b8a2fd8fd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
@@ -383,11 +383,6 @@ case class CreateNamespaceStatement(
  */
 case class UseStatement(isNamespaceSet: Boolean, nameParts: Seq[String]) extends ParsedStatement
 
-/**
- * A REPAIR TABLE statement, as parsed from SQL
- */
-case class RepairTableStatement(tableName: Seq[String]) extends ParsedStatement
-
 /**
  * A TRUNCATE TABLE statement, as parsed from SQL
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index 1e17c51137a55..e014048f723f5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -735,3 +735,10 @@ case class DropView(
     ifExists: Boolean) extends Command {
   override def children: Seq[LogicalPlan] = child :: Nil
 }
+
+/**
+ * The logical plan of the MSCK REPAIR TABLE command.
+ */
+case class RepairTable(child: LogicalPlan) extends Command {
+  override def children: Seq[LogicalPlan] = child :: Nil
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index d5b27d9ad25cf..947154eae12c8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -1952,7 +1952,7 @@ class DDLParserSuite extends AnalysisTest {
   test("MSCK REPAIR TABLE") {
     comparePlans(
       parsePlan("MSCK REPAIR TABLE a.b.c"),
-      RepairTableStatement(Seq("a", "b", "c")))
+      RepairTable(UnresolvedTable(Seq("a", "b", "c"), "MSCK REPAIR TABLE")))
   }
 
   test("LOAD DATA INTO table") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 657764832a931..817a63aa9aa6e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -408,11 +408,8 @@ class ResolveSessionCatalog(
     case AnalyzeColumn(ResolvedV1TableOrViewIdentifier(ident), columnNames, allColumns) =>
       AnalyzeColumnCommand(ident.asTableIdentifier, columnNames, allColumns)
 
-    case RepairTableStatement(tbl) =>
-      val v1TableName = parseV1Table(tbl, "MSCK REPAIR TABLE")
-      AlterTableRecoverPartitionsCommand(
-        v1TableName.asTableIdentifier,
-        "MSCK REPAIR TABLE")
+    case RepairTable(ResolvedV1TableIdentifier(ident)) =>
+      AlterTableRecoverPartitionsCommand(ident.asTableIdentifier, "MSCK REPAIR TABLE")
 
     case LoadData(ResolvedV1TableIdentifier(ident), path, isLocal, isOverwrite, partition) =>
       LoadDataCommand(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 7e2a485dcb4cc..37a4dcf081be4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -341,6 +341,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
         table,
         pattern.map(_.asInstanceOf[ResolvedPartitionSpec])) :: Nil
 
+    case RepairTable(_: ResolvedTable) =>
+      throw new AnalysisException("MSCK REPAIR TABLE is not supported for v2 tables.")
+
     case _ => Nil
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index b1d61658b8a8b..9020065449cef 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -2011,7 +2011,7 @@ class DataSourceV2SQLSuite
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
       spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
-      testV1Command("MSCK REPAIR TABLE", t)
+      testNotSupportedV2Command("MSCK REPAIR TABLE", t)
     }
   }
 
@@ -2612,13 +2612,6 @@ class DataSourceV2SQLSuite
     assert(e.message.contains(s"$cmdStr is not supported for v2 tables"))
   }
 
-  private def testV1Command(sqlCommand: String, sqlParams: String): Unit = {
-    val e = intercept[AnalysisException] {
-      sql(s"$sqlCommand $sqlParams")
-    }
-    assert(e.message.contains(s"$sqlCommand is only supported with v1 tables"))
-  }
-
   private def assertAnalysisError(sqlStatement: String, expectedError: String): Unit = {
     val errMsg = intercept[AnalysisException] {
       sql(sqlStatement)

From c88eddac3bf860d04bba91fc913f8b2069a94153 Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Wed, 9 Dec 2020 06:44:10 +0000
Subject: [PATCH 0711/1009] [SPARK-33641][SQL][DOC][FOLLOW-UP] Add migration
 guide for CHAR VARCHAR types

### What changes were proposed in this pull request?

Add migration guide for CHAR VARCHAR types

### Why are the changes needed?

for migration

### Does this PR introduce _any_ user-facing change?

doc change

### How was this patch tested?

passing ci

Closes #30654 from yaooqinn/SPARK-33641-F.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 65a769da70aea..164bfd42d6e4a 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -58,6 +58,8 @@ license: |
   
   - In Spark 3.1, creating or altering a view will capture runtime SQL configs and store them as view properties. These configs will be applied during the parsing and analysis phases of the view resolution. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.useCurrentConfigsForView` to `true`.
 
+  - Since Spark 3.1, CHAR/CHARACTER and VARCHAR types are supported in the table schema. Table scan/insertion will respect the char/varchar semantic. If char/varchar is used in places other than table schema, an exception will be thrown (CAST is an exception that simply treats char/varchar as string like before). To restore the behavior before Spark 3.1, which treats them as STRING types and ignores a length parameter, e.g. `CHAR(4)`, you can set `spark.sql.legacy.charVarcharAsString` to `true`.
+
 ## Upgrading from Spark SQL 3.0 to 3.0.1
 
 - In Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they match to the pattern defined by the JSON option `timestampFormat`. Since version 3.0.1, the timestamp type inference is disabled by default. Set the JSON option `inferTimestamp` to `true` to enable such type inference.

From 48f93af9f3d40de5bf087eb1a06c1b9954b2ad76 Mon Sep 17 00:00:00 2001
From: suqilong <suqilong@qiyi.com>
Date: Wed, 9 Dec 2020 01:21:13 -0600
Subject: [PATCH 0712/1009] [SPARK-33669] Wrong error message from YARN
 application state monitor when sc.stop in yarn client mode

### What changes were proposed in this pull request?
This change make InterruptedIOException to be treated as InterruptedException when closing YarnClientSchedulerBackend, which doesn't log error with "YARN application has exited unexpectedly xxx"

### Why are the changes needed?
For YarnClient mode, when stopping YarnClientSchedulerBackend, it first tries to interrupt Yarn application monitor thread. In MonitorThread.run() it catches InterruptedException to gracefully response to stopping request.

But client.monitorApplication method also throws InterruptedIOException when the hadoop rpc call is calling. In this case, MonitorThread will not know it is interrupted, a Yarn App failed is returned with "Failed to contact YARN for application xxxxx;  YARN application has exited unexpectedly with state xxxxx" is logged with error level. which confuse user a lot.

### Does this PR introduce _any_ user-facing change?
Yes

### How was this patch tested?
very simple patch, seems no need?

Closes #30617 from sqlwindspeaker/yarn-client-interrupt-monitor.

Authored-by: suqilong <suqilong@qiyi.com>
Signed-off-by: Mridul Muralidharan <mridul<at>gmail.com>
---
 .../src/main/scala/org/apache/spark/deploy/yarn/Client.scala | 2 +-
 .../spark/scheduler/cluster/YarnClientSchedulerBackend.scala | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 7f791e02a392b..618faef2d58b3 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -1069,7 +1069,7 @@ private[spark] class Client(
             logError(s"Application $appId not found.")
             cleanupStagingDir()
             return YarnAppReport(YarnApplicationState.KILLED, FinalApplicationStatus.KILLED, None)
-          case NonFatal(e) =>
+          case NonFatal(e) if !e.isInstanceOf[InterruptedIOException] =>
             val msg = s"Failed to contact YARN for application $appId."
             logError(msg, e)
             // Don't necessarily clean up staging dir because status is unknown
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
index cb0de5a0d50b4..8a55e612ce719 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.scheduler.cluster
 
+import java.io.InterruptedIOException
+
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.hadoop.yarn.api.records.YarnApplicationState
@@ -121,7 +123,8 @@ private[spark] class YarnClientSchedulerBackend(
         allowInterrupt = false
         sc.stop()
       } catch {
-        case e: InterruptedException => logInfo("Interrupting monitor thread")
+        case _: InterruptedException | _: InterruptedIOException =>
+          logInfo("Interrupting monitor thread")
       }
     }
 

From a713a7eee3e7f76df6210a6e215ffc0d67ec71f2 Mon Sep 17 00:00:00 2001
From: Dooyoung Hwang <dooyoung.hwang@sk.com>
Date: Wed, 9 Dec 2020 18:35:24 +0900
Subject: [PATCH 0713/1009] [SPARK-33655][SQL] Improve performance of
 processing FETCH_PRIOR

### What changes were proposed in this pull request?
Currently, when a client requests FETCH_PRIOR to Thriftserver, Thriftserver reiterates from the start position. Because Thriftserver caches a query result with an array when THRIFTSERVER_INCREMENTAL_COLLECT feature is off, FETCH_PRIOR can be implemented without reiterating the result. A trait FeatureIterator is added in order to separate the implementation for iterator and an array. Also, FeatureIterator supports moves cursor with absolute position, which will be useful for the implementation of FETCH_RELATIVE, FETCH_ABSOLUTE.

### Why are the changes needed?
For better performance of Thriftserver.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
FetchIteratorSuite

Closes #30600 from Dooyoung-Hwang/refactor_with_fetch_iterator.

Authored-by: Dooyoung Hwang <dooyoung.hwang@sk.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../sql/hive/thriftserver/FetchIterator.scala | 107 ++++++++++++++
 .../SparkExecuteStatementOperation.scala      |  69 ++-------
 .../thriftserver/FetchIteratorSuite.scala     | 134 ++++++++++++++++++
 3 files changed, 256 insertions(+), 54 deletions(-)
 create mode 100644 sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/FetchIterator.scala
 create mode 100644 sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/FetchIteratorSuite.scala

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/FetchIterator.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/FetchIterator.scala
new file mode 100644
index 0000000000000..b9db657952b56
--- /dev/null
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/FetchIterator.scala
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+private[hive] sealed trait FetchIterator[A] extends Iterator[A] {
+  /**
+   * Begin a fetch block, forward from the current position.
+   * Resets the fetch start offset.
+   */
+  def fetchNext(): Unit
+
+  /**
+   * Begin a fetch block, moving the iterator back by offset from the start of the previous fetch
+   * block start.
+   * Resets the fetch start offset.
+   *
+   * @param offset the amount to move a fetch start position toward the prior direction.
+   */
+  def fetchPrior(offset: Long): Unit = fetchAbsolute(getFetchStart - offset)
+
+  /**
+   * Begin a fetch block, moving the iterator to the given position.
+   * Resets the fetch start offset.
+   *
+   * @param pos index to move a position of iterator.
+   */
+  def fetchAbsolute(pos: Long): Unit
+
+  def getFetchStart: Long
+
+  def getPosition: Long
+}
+
+private[hive] class ArrayFetchIterator[A](src: Array[A]) extends FetchIterator[A] {
+  private var fetchStart: Long = 0
+
+  private var position: Long = 0
+
+  override def fetchNext(): Unit = fetchStart = position
+
+  override def fetchAbsolute(pos: Long): Unit = {
+    position = (pos max 0) min src.length
+    fetchStart = position
+  }
+
+  override def getFetchStart: Long = fetchStart
+
+  override def getPosition: Long = position
+
+  override def hasNext: Boolean = position < src.length
+
+  override def next(): A = {
+    position += 1
+    src(position.toInt - 1)
+  }
+}
+
+private[hive] class IterableFetchIterator[A](iterable: Iterable[A]) extends FetchIterator[A] {
+  private var iter: Iterator[A] = iterable.iterator
+
+  private var fetchStart: Long = 0
+
+  private var position: Long = 0
+
+  override def fetchNext(): Unit = fetchStart = position
+
+  override def fetchAbsolute(pos: Long): Unit = {
+    val newPos = pos max 0
+    if (newPos < position) resetPosition()
+    while (position < newPos && hasNext) next()
+    fetchStart = position
+  }
+
+  override def getFetchStart: Long = fetchStart
+
+  override def getPosition: Long = position
+
+  override def hasNext: Boolean = iter.hasNext
+
+  override def next(): A = {
+    position += 1
+    iter.next()
+  }
+
+  private def resetPosition(): Unit = {
+    if (position != 0) {
+      iter = iterable.iterator
+      position = 0
+      fetchStart = 0
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index f7a4be9591818..c4ae035e1f836 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -69,13 +69,7 @@ private[hive] class SparkExecuteStatementOperation(
 
   private var result: DataFrame = _
 
-  // We cache the returned rows to get iterators again in case the user wants to use FETCH_FIRST.
-  // This is only used when `spark.sql.thriftServer.incrementalCollect` is set to `false`.
-  // In case of `true`, this will be `None` and FETCH_FIRST will trigger re-execution.
-  private var resultList: Option[Array[SparkRow]] = _
-  private var previousFetchEndOffset: Long = 0
-  private var previousFetchStartOffset: Long = 0
-  private var iter: Iterator[SparkRow] = _
+  private var iter: FetchIterator[SparkRow] = _
   private var dataTypes: Array[DataType] = _
 
   private lazy val resultSchema: TableSchema = {
@@ -148,43 +142,14 @@ private[hive] class SparkExecuteStatementOperation(
     setHasResultSet(true)
     val resultRowSet: RowSet = RowSetFactory.create(getResultSetSchema, getProtocolVersion, false)
 
-    // Reset iter when FETCH_FIRST or FETCH_PRIOR
-    if ((order.equals(FetchOrientation.FETCH_FIRST) ||
-        order.equals(FetchOrientation.FETCH_PRIOR)) && previousFetchEndOffset != 0) {
-      // Reset the iterator to the beginning of the query.
-      iter = if (sqlContext.getConf(SQLConf.THRIFTSERVER_INCREMENTAL_COLLECT.key).toBoolean) {
-        resultList = None
-        result.toLocalIterator.asScala
-      } else {
-        if (resultList.isEmpty) {
-          resultList = Some(result.collect())
-        }
-        resultList.get.iterator
-      }
-    }
-
-    var resultOffset = {
-      if (order.equals(FetchOrientation.FETCH_FIRST)) {
-        logInfo(s"FETCH_FIRST request with $statementId. Resetting to resultOffset=0")
-        0
-      } else if (order.equals(FetchOrientation.FETCH_PRIOR)) {
-        // TODO: FETCH_PRIOR should be handled more efficiently than rewinding to beginning and
-        // reiterating.
-        val targetOffset = math.max(previousFetchStartOffset - maxRowsL, 0)
-        logInfo(s"FETCH_PRIOR request with $statementId. Resetting to resultOffset=$targetOffset")
-        var off = 0
-        while (off < targetOffset && iter.hasNext) {
-          iter.next()
-          off += 1
-        }
-        off
-      } else { // FETCH_NEXT
-        previousFetchEndOffset
-      }
+    if (order.equals(FetchOrientation.FETCH_FIRST)) {
+      iter.fetchAbsolute(0)
+    } else if (order.equals(FetchOrientation.FETCH_PRIOR)) {
+      iter.fetchPrior(maxRowsL)
+    } else {
+      iter.fetchNext()
     }
-
-    resultRowSet.setStartOffset(resultOffset)
-    previousFetchStartOffset = resultOffset
+    resultRowSet.setStartOffset(iter.getPosition)
     if (!iter.hasNext) {
       resultRowSet
     } else {
@@ -206,11 +171,9 @@ private[hive] class SparkExecuteStatementOperation(
         }
         resultRowSet.addRow(row.toArray.asInstanceOf[Array[Object]])
         curRow += 1
-        resultOffset += 1
       }
-      previousFetchEndOffset = resultOffset
       log.info(s"Returning result set with ${curRow} rows from offsets " +
-        s"[$previousFetchStartOffset, $previousFetchEndOffset) with $statementId")
+        s"[${iter.getFetchStart}, ${iter.getPosition}) with $statementId")
       resultRowSet
     }
   }
@@ -326,14 +289,12 @@ private[hive] class SparkExecuteStatementOperation(
       logDebug(result.queryExecution.toString())
       HiveThriftServer2.eventManager.onStatementParsed(statementId,
         result.queryExecution.toString())
-      iter = {
-        if (sqlContext.getConf(SQLConf.THRIFTSERVER_INCREMENTAL_COLLECT.key).toBoolean) {
-          resultList = None
-          result.toLocalIterator.asScala
-        } else {
-          resultList = Some(result.collect())
-          resultList.get.iterator
-        }
+      iter = if (sqlContext.getConf(SQLConf.THRIFTSERVER_INCREMENTAL_COLLECT.key).toBoolean) {
+        new IterableFetchIterator[SparkRow](new Iterable[SparkRow] {
+          override def iterator: Iterator[SparkRow] = result.toLocalIterator.asScala
+        })
+      } else {
+        new ArrayFetchIterator[SparkRow](result.collect())
       }
       dataTypes = result.schema.fields.map(_.dataType)
     } catch {
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/FetchIteratorSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/FetchIteratorSuite.scala
new file mode 100644
index 0000000000000..0fbdb8a9050c8
--- /dev/null
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/FetchIteratorSuite.scala
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+import org.apache.spark.SparkFunSuite
+
+class FetchIteratorSuite extends SparkFunSuite {
+
+  private def getRows(fetchIter: FetchIterator[Int], maxRowCount: Int): Seq[Int] = {
+    for (_ <- 0 until maxRowCount if fetchIter.hasNext) yield fetchIter.next()
+  }
+
+  test("SPARK-33655: Test fetchNext and fetchPrior") {
+    val testData = 0 until 10
+
+    def iteratorTest(fetchIter: FetchIterator[Int]): Unit = {
+      fetchIter.fetchNext()
+      assert(fetchIter.getFetchStart == 0)
+      assert(fetchIter.getPosition == 0)
+      assertResult(0 until 2)(getRows(fetchIter, 2))
+      assert(fetchIter.getFetchStart == 0)
+      assert(fetchIter.getPosition == 2)
+
+      fetchIter.fetchNext()
+      assert(fetchIter.getFetchStart == 2)
+      assert(fetchIter.getPosition == 2)
+      assertResult(2 until 3)(getRows(fetchIter, 1))
+      assert(fetchIter.getFetchStart == 2)
+      assert(fetchIter.getPosition == 3)
+
+      fetchIter.fetchPrior(2)
+      assert(fetchIter.getFetchStart == 0)
+      assert(fetchIter.getPosition == 0)
+      assertResult(0 until 3)(getRows(fetchIter, 3))
+      assert(fetchIter.getFetchStart == 0)
+      assert(fetchIter.getPosition == 3)
+
+      fetchIter.fetchNext()
+      assert(fetchIter.getFetchStart == 3)
+      assert(fetchIter.getPosition == 3)
+      assertResult(3 until 8)(getRows(fetchIter, 5))
+      assert(fetchIter.getFetchStart == 3)
+      assert(fetchIter.getPosition == 8)
+
+      fetchIter.fetchPrior(2)
+      assert(fetchIter.getFetchStart == 1)
+      assert(fetchIter.getPosition == 1)
+      assertResult(1 until 4)(getRows(fetchIter, 3))
+      assert(fetchIter.getFetchStart == 1)
+      assert(fetchIter.getPosition == 4)
+
+      fetchIter.fetchNext()
+      assert(fetchIter.getFetchStart == 4)
+      assert(fetchIter.getPosition == 4)
+      assertResult(4 until 10)(getRows(fetchIter, 10))
+      assert(fetchIter.getFetchStart == 4)
+      assert(fetchIter.getPosition == 10)
+
+      fetchIter.fetchNext()
+      assert(fetchIter.getFetchStart == 10)
+      assert(fetchIter.getPosition == 10)
+      assertResult(Seq.empty[Int])(getRows(fetchIter, 10))
+      assert(fetchIter.getFetchStart == 10)
+      assert(fetchIter.getPosition == 10)
+
+      fetchIter.fetchPrior(20)
+      assert(fetchIter.getFetchStart == 0)
+      assert(fetchIter.getPosition == 0)
+      assertResult(0 until 3)(getRows(fetchIter, 3))
+      assert(fetchIter.getFetchStart == 0)
+      assert(fetchIter.getPosition == 3)
+    }
+    iteratorTest(new ArrayFetchIterator[Int](testData.toArray))
+    iteratorTest(new IterableFetchIterator[Int](testData))
+  }
+
+  test("SPARK-33655: Test fetchAbsolute") {
+    val testData = 0 until 10
+
+    def iteratorTest(fetchIter: FetchIterator[Int]): Unit = {
+      fetchIter.fetchNext()
+      assert(fetchIter.getFetchStart == 0)
+      assert(fetchIter.getPosition == 0)
+      assertResult(0 until 5)(getRows(fetchIter, 5))
+      assert(fetchIter.getFetchStart == 0)
+      assert(fetchIter.getPosition == 5)
+
+      fetchIter.fetchAbsolute(2)
+      assert(fetchIter.getFetchStart == 2)
+      assert(fetchIter.getPosition == 2)
+      assertResult(2 until 5)(getRows(fetchIter, 3))
+      assert(fetchIter.getFetchStart == 2)
+      assert(fetchIter.getPosition == 5)
+
+      fetchIter.fetchAbsolute(7)
+      assert(fetchIter.getFetchStart == 7)
+      assert(fetchIter.getPosition == 7)
+      assertResult(7 until 8)(getRows(fetchIter, 1))
+      assert(fetchIter.getFetchStart == 7)
+      assert(fetchIter.getPosition == 8)
+
+      fetchIter.fetchAbsolute(20)
+      assert(fetchIter.getFetchStart == 10)
+      assert(fetchIter.getPosition == 10)
+      assertResult(Seq.empty[Int])(getRows(fetchIter, 1))
+      assert(fetchIter.getFetchStart == 10)
+      assert(fetchIter.getPosition == 10)
+
+      fetchIter.fetchAbsolute(0)
+      assert(fetchIter.getFetchStart == 0)
+      assert(fetchIter.getPosition == 0)
+      assertResult(0 until 3)(getRows(fetchIter, 3))
+      assert(fetchIter.getFetchStart == 0)
+      assert(fetchIter.getPosition == 3)
+    }
+    iteratorTest(new ArrayFetchIterator[Int](testData.toArray))
+    iteratorTest(new IterableFetchIterator[Int](testData))
+  }
+}

From 9959d49942d334b03a05c43299f3949a48e5fa17 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Wed, 9 Dec 2020 19:47:20 +0900
Subject: [PATCH 0714/1009] [SPARK-33719][DOC] Add
 make_date/make_timestamp/make_interval into the doc of ANSI Compliance

### What changes were proposed in this pull request?

Add make_date/make_timestamp/make_interval into the doc of ANSI Compliance

### Why are the changes needed?

Users can know that these functions throw runtime exceptions under ANSI mode if the result is not valid.
### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Build doc and check it in browser:
![image](https://user-images.githubusercontent.com/1097932/101608930-34a79e80-39bb-11eb-9294-9d9b8c3f6faa.png)

Closes #30683 from gengliangwang/improveDoc.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/sql-ref-ansi-compliance.md | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index c3e17dc22eed0..08ba07aa8de63 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -144,14 +144,18 @@ SELECT * FROM t;
 
 The behavior of some SQL functions can be different under ANSI mode (`spark.sql.ansi.enabled=true`).
   - `size`: This function returns null for null input.
-  - `element_at`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices. 
-  - `element_at`: This function throws `NoSuchElementException` if key does not exist in map. 
+  - `element_at`:
+    - This function throws `ArrayIndexOutOfBoundsException` if using invalid indices.
+    - This function throws `NoSuchElementException` if key does not exist in map.
   - `elt`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices.
   - `parse_url`: This function throws `IllegalArgumentException` if an input string is not a valid url.
-  - `to_date` This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid.
-  - `to_timestamp` This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid.
-  - `unix_timestamp` This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid.
-  - `to_unix_timestamp` This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid.
+  - `to_date`: This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid.
+  - `to_timestamp`: This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid.
+  - `unix_timestamp`: This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid.
+  - `to_unix_timestamp`: This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid.
+  - `make_date`: This function should fail with an exception if the result date is invalid.
+  - `make_timestamp`: This function should fail with an exception if the result timestamp is invalid.
+  - `make_interval`:  This function should fail with an exception if the result interval is invalid.
 
 ### SQL Operators
 

From b5399d4ef1c4e3df9d01a07e76bede41d7255d1c Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Wed, 9 Dec 2020 20:26:18 +0900
Subject: [PATCH 0715/1009] [SPARK-33071][SPARK-33536][SQL][FOLLOW-UP] Rename
 deniedMetadataKeys to nonInheritableMetadataKeys in Alias

### What changes were proposed in this pull request?

This PR is a followup of https://github.com/apache/spark/pull/30488. This PR proposes to rename `Alias.deniedMetadataKeys` to `Alias.nonInheritableMetadataKeys` to make it less confusing.

### Why are the changes needed?

To make it easier to maintain and read.

### Does this PR introduce _any_ user-facing change?

No. This is rather a code cleanup.

### How was this patch tested?

Ran the unittests written in the previous PR manually. Jenkins and GitHub Actions in this PR should also test them.

Closes #30682 from HyukjinKwon/SPARK-33071-SPARK-33536.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../sql/catalyst/expressions/AliasHelper.scala |  2 +-
 .../expressions/namedExpressions.scala         | 18 +++++++++++-------
 .../scala/org/apache/spark/sql/Column.scala    |  9 +++++----
 .../spark/sql/SparkSessionExtensionSuite.scala |  6 +++---
 4 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala
index ad6cf959a69c6..1f3f762662252 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala
@@ -90,7 +90,7 @@ trait AliasHelper {
           exprId = a.exprId,
           qualifier = a.qualifier,
           explicitMetadata = Some(a.metadata),
-          deniedMetadataKeys = a.deniedMetadataKeys)
+          nonInheritableMetadataKeys = a.nonInheritableMetadataKeys)
       case a: MultiAlias =>
         a.copy(child = trimAliases(a.child))
       case other => trimAliases(other)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index 22aabd3c6b30b..badc2ecc9cb28 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -143,14 +143,14 @@ abstract class Attribute extends LeafExpression with NamedExpression with NullIn
  *                  fully qualified way. Consider the examples tableName.name, subQueryAlias.name.
  *                  tableName and subQueryAlias are possible qualifiers.
  * @param explicitMetadata Explicit metadata associated with this alias that overwrites child's.
- * @param deniedMetadataKeys Keys of metadata entries that are supposed to be removed when
- *                           inheriting the metadata from the child.
+ * @param nonInheritableMetadataKeys Keys of metadata entries that are supposed to be removed when
+ *                                   inheriting the metadata from the child.
  */
 case class Alias(child: Expression, name: String)(
     val exprId: ExprId = NamedExpression.newExprId,
     val qualifier: Seq[String] = Seq.empty,
     val explicitMetadata: Option[Metadata] = None,
-    val deniedMetadataKeys: Seq[String] = Seq.empty)
+    val nonInheritableMetadataKeys: Seq[String] = Seq.empty)
   extends UnaryExpression with NamedExpression {
 
   // Alias(Generator, xx) need to be transformed into Generate(generator, ...)
@@ -172,7 +172,7 @@ case class Alias(child: Expression, name: String)(
       child match {
         case named: NamedExpression =>
           val builder = new MetadataBuilder().withMetadata(named.metadata)
-          deniedMetadataKeys.foreach(builder.remove)
+          nonInheritableMetadataKeys.foreach(builder.remove)
           builder.build()
 
         case _ => Metadata.empty
@@ -181,7 +181,10 @@ case class Alias(child: Expression, name: String)(
   }
 
   def newInstance(): NamedExpression =
-    Alias(child, name)(qualifier = qualifier, explicitMetadata = explicitMetadata)
+    Alias(child, name)(
+      qualifier = qualifier,
+      explicitMetadata = explicitMetadata,
+      nonInheritableMetadataKeys = nonInheritableMetadataKeys)
 
   override def toAttribute: Attribute = {
     if (resolved) {
@@ -201,7 +204,7 @@ case class Alias(child: Expression, name: String)(
   override def toString: String = s"$child AS $name#${exprId.id}$typeSuffix$delaySuffix"
 
   override protected final def otherCopyArgs: Seq[AnyRef] = {
-    exprId :: qualifier :: explicitMetadata :: deniedMetadataKeys :: Nil
+    exprId :: qualifier :: explicitMetadata :: nonInheritableMetadataKeys :: Nil
   }
 
   override def hashCode(): Int = {
@@ -212,7 +215,8 @@ case class Alias(child: Expression, name: String)(
   override def equals(other: Any): Boolean = other match {
     case a: Alias =>
       name == a.name && exprId == a.exprId && child == a.child && qualifier == a.qualifier &&
-        explicitMetadata == a.explicitMetadata && deniedMetadataKeys == a.deniedMetadataKeys
+        explicitMetadata == a.explicitMetadata &&
+        nonInheritableMetadataKeys == a.nonInheritableMetadataKeys
     case _ => false
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 4ef23d7e31c59..539ef8dfe2665 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -1164,10 +1164,11 @@ class Column(val expr: Expression) extends Logging {
    * @since 2.0.0
    */
   def name(alias: String): Column = withExpr {
-    // SPARK-33536: The Alias is no longer a column reference after converting to an attribute.
-    // These denied metadata keys are used to strip the column reference related metadata for
-    // the Alias. So it won't be caught as a column reference in DetectAmbiguousSelfJoin.
-    Alias(expr, alias)(deniedMetadataKeys = Seq(Dataset.DATASET_ID_KEY, Dataset.COL_POS_KEY))
+    // SPARK-33536: an alias is no longer a column reference. Therefore,
+    // we should not inherit the column reference related metadata in an alias
+    // so that it is not caught as a column reference in DetectAmbiguousSelfJoin.
+    Alias(expr, alias)(
+      nonInheritableMetadataKeys = Seq(Dataset.DATASET_ID_KEY, Dataset.COL_POS_KEY))
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
index 5e1c6ba92803d..7c19f98b762f4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
@@ -577,8 +577,8 @@ class ColumnarAlias(child: ColumnarExpression, name: String)(
     override val exprId: ExprId = NamedExpression.newExprId,
     override val qualifier: Seq[String] = Seq.empty,
     override val explicitMetadata: Option[Metadata] = None,
-    override val deniedMetadataKeys: Seq[String] = Seq.empty)
-  extends Alias(child, name)(exprId, qualifier, explicitMetadata, deniedMetadataKeys)
+    override val nonInheritableMetadataKeys: Seq[String] = Seq.empty)
+  extends Alias(child, name)(exprId, qualifier, explicitMetadata, nonInheritableMetadataKeys)
   with ColumnarExpression {
 
   override def columnarEval(batch: ColumnarBatch): Any = child.columnarEval(batch)
@@ -715,7 +715,7 @@ case class PreRuleReplaceAddWithBrokenVersion() extends Rule[SparkPlan] {
   def replaceWithColumnarExpression(exp: Expression): ColumnarExpression = exp match {
     case a: Alias =>
       new ColumnarAlias(replaceWithColumnarExpression(a.child),
-        a.name)(a.exprId, a.qualifier, a.explicitMetadata, a.deniedMetadataKeys)
+        a.name)(a.exprId, a.qualifier, a.explicitMetadata, a.nonInheritableMetadataKeys)
     case att: AttributeReference =>
       new ColumnarAttributeReference(att.name, att.dataType, att.nullable,
         att.metadata)(att.exprId, att.qualifier)

From fa9ce1d4e893e3a32bc05e4d95241d32710deb54 Mon Sep 17 00:00:00 2001
From: Anton Okolnychyi <aokolnychyi@apple.com>
Date: Wed, 9 Dec 2020 11:42:54 -0800
Subject: [PATCH 0716/1009] [SPARK-33722][SQL] Handle DELETE in
 ReplaceNullWithFalseInPredicate

### What changes were proposed in this pull request?

This PR adds `DeleteFromTable` to supported plans in `ReplaceNullWithFalseInPredicate`.

### Why are the changes needed?

This change allows Spark to optimize delete conditions like we optimize filters.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

This PR extends the existing test cases to also cover `DeleteFromTable`.

Closes #30688 from aokolnychyi/spark-33722.

Authored-by: Anton Okolnychyi <aokolnychyi@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../ReplaceNullWithFalseInPredicate.scala     |  3 ++-
 ...ReplaceNullWithFalseInPredicateSuite.scala | 23 ++++++++++++++++++-
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
index ef3de4738c75c..698ece4f9e69f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.optimizer
 import org.apache.spark.sql.catalyst.expressions.{And, ArrayExists, ArrayFilter, CaseWhen, Expression, If}
 import org.apache.spark.sql.catalyst.expressions.{LambdaFunction, Literal, MapFilter, Or}
 import org.apache.spark.sql.catalyst.expressions.Literal.FalseLiteral
-import org.apache.spark.sql.catalyst.plans.logical.{Filter, Join, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, Filter, Join, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.types.BooleanType
 import org.apache.spark.util.Utils
@@ -53,6 +53,7 @@ object ReplaceNullWithFalseInPredicate extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     case f @ Filter(cond, _) => f.copy(condition = replaceNullWithFalse(cond))
     case j @ Join(_, _, _, Some(cond), _) => j.copy(condition = Some(replaceNullWithFalse(cond)))
+    case d @ DeleteFromTable(_, Some(cond)) => d.copy(condition = Some(replaceNullWithFalse(cond)))
     case p: LogicalPlan => p transformExpressions {
       case i @ If(pred, _, _) => i.copy(predicate = replaceNullWithFalse(pred))
       case cw @ CaseWhen(branches, _) =>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
index eb52c5b74772c..6fc31c94e47eb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions.{And, ArrayExists, ArrayFilter, ArrayTransform, CaseWhen, Expression, GreaterThan, If, LambdaFunction, Literal, MapFilter, NamedExpression, Or, UnresolvedNamedLambdaVariable}
 import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
 import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest}
-import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{BooleanType, IntegerType}
@@ -48,6 +48,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
   test("replace null inside filter and join conditions") {
     testFilter(originalCond = Literal(null, BooleanType), expectedCond = FalseLiteral)
     testJoin(originalCond = Literal(null, BooleanType), expectedCond = FalseLiteral)
+    testDelete(originalCond = Literal(null, BooleanType), expectedCond = FalseLiteral)
   }
 
   test("Not expected type - replaceNullWithFalse") {
@@ -64,6 +65,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
       Literal(null, BooleanType))
     testFilter(originalCond, expectedCond = FalseLiteral)
     testJoin(originalCond, expectedCond = FalseLiteral)
+    testDelete(originalCond, expectedCond = FalseLiteral)
   }
 
   test("replace nulls in nested expressions in branches of If") {
@@ -73,6 +75,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
       UnresolvedAttribute("b") && Literal(null, BooleanType))
     testFilter(originalCond, expectedCond = FalseLiteral)
     testJoin(originalCond, expectedCond = FalseLiteral)
+    testDelete(originalCond, expectedCond = FalseLiteral)
   }
 
   test("replace null in elseValue of CaseWhen") {
@@ -83,6 +86,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     val expectedCond = CaseWhen(branches, FalseLiteral)
     testFilter(originalCond, expectedCond)
     testJoin(originalCond, expectedCond)
+    testDelete(originalCond, expectedCond)
   }
 
   test("replace null in branch values of CaseWhen") {
@@ -92,6 +96,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     val originalCond = CaseWhen(branches, Literal(null))
     testFilter(originalCond, expectedCond = FalseLiteral)
     testJoin(originalCond, expectedCond = FalseLiteral)
+    testDelete(originalCond, expectedCond = FalseLiteral)
   }
 
   test("replace null in branches of If inside CaseWhen") {
@@ -108,6 +113,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
 
     testFilter(originalCond, expectedCond)
     testJoin(originalCond, expectedCond)
+    testDelete(originalCond, expectedCond)
   }
 
   test("replace null in complex CaseWhen expressions") {
@@ -127,6 +133,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
 
     testFilter(originalCond, expectedCond)
     testJoin(originalCond, expectedCond)
+    testDelete(originalCond, expectedCond)
   }
 
   test("replace null in Or") {
@@ -134,12 +141,14 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     val expectedCond = UnresolvedAttribute("b")
     testFilter(originalCond, expectedCond)
     testJoin(originalCond, expectedCond)
+    testDelete(originalCond, expectedCond)
   }
 
   test("replace null in And") {
     val originalCond = And(UnresolvedAttribute("b"), Literal(null))
     testFilter(originalCond, expectedCond = FalseLiteral)
     testJoin(originalCond, expectedCond = FalseLiteral)
+    testDelete(originalCond, expectedCond = FalseLiteral)
   }
 
   test("replace nulls in nested And/Or expressions") {
@@ -148,6 +157,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
       Or(Literal(null), And(Literal(null), And(UnresolvedAttribute("b"), Literal(null)))))
     testFilter(originalCond, expectedCond = FalseLiteral)
     testJoin(originalCond, expectedCond = FalseLiteral)
+    testDelete(originalCond, expectedCond = FalseLiteral)
   }
 
   test("replace null in And inside branches of If") {
@@ -157,6 +167,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
       And(UnresolvedAttribute("b"), Literal(null, BooleanType)))
     testFilter(originalCond, expectedCond = FalseLiteral)
     testJoin(originalCond, expectedCond = FalseLiteral)
+    testDelete(originalCond, expectedCond = FalseLiteral)
   }
 
   test("replace null in branches of If inside And") {
@@ -168,6 +179,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
         And(FalseLiteral, UnresolvedAttribute("b"))))
     testFilter(originalCond, expectedCond = FalseLiteral)
     testJoin(originalCond, expectedCond = FalseLiteral)
+    testDelete(originalCond, expectedCond = FalseLiteral)
   }
 
   test("replace null in branches of If inside another If") {
@@ -177,6 +189,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
       Literal(null))
     testFilter(originalCond, expectedCond = FalseLiteral)
     testJoin(originalCond, expectedCond = FalseLiteral)
+    testDelete(originalCond, expectedCond = FalseLiteral)
   }
 
   test("replace null in CaseWhen inside another CaseWhen") {
@@ -184,6 +197,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     val originalCond = CaseWhen(Seq(nestedCaseWhen -> TrueLiteral), Literal(null))
     testFilter(originalCond, expectedCond = FalseLiteral)
     testJoin(originalCond, expectedCond = FalseLiteral)
+    testDelete(originalCond, expectedCond = FalseLiteral)
   }
 
   test("inability to replace null in non-boolean branches of If") {
@@ -196,6 +210,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
       FalseLiteral)
     testFilter(originalCond = condition, expectedCond = condition)
     testJoin(originalCond = condition, expectedCond = condition)
+    testDelete(originalCond = condition, expectedCond = condition)
   }
 
   test("inability to replace null in non-boolean values of CaseWhen") {
@@ -210,6 +225,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     val condition = CaseWhen(branches)
     testFilter(originalCond = condition, expectedCond = condition)
     testJoin(originalCond = condition, expectedCond = condition)
+    testDelete(originalCond = condition, expectedCond = condition)
   }
 
   test("inability to replace null in non-boolean branches of If inside another If") {
@@ -222,6 +238,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
       FalseLiteral)
     testFilter(originalCond = condition, expectedCond = condition)
     testJoin(originalCond = condition, expectedCond = condition)
+    testDelete(originalCond = condition, expectedCond = condition)
   }
 
   test("replace null in If used as a join condition") {
@@ -353,6 +370,10 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     test((rel, exp) => rel.select(exp), originalExpr, expectedExpr)
   }
 
+  private def testDelete(originalCond: Expression, expectedCond: Expression): Unit = {
+    test((rel, expr) => DeleteFromTable(rel, Some(expr)), originalCond, expectedCond)
+  }
+
   private def testHigherOrderFunc(
       argument: Expression,
       createExpr: (Expression, Expression) => Expression,

From 667f64f447a75141b091c361acebdc363bfe9288 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Wed, 9 Dec 2020 14:26:53 -0800
Subject: [PATCH 0717/1009] [SPARK-33725][BUILD] Upgrade snappy-java to 1.1.8.2

### What changes were proposed in this pull request?

This upgrades snappy-java to 1.1.8.2.

### Why are the changes needed?

Minor version upgrade that includes:

- [Fixed](https://github.com/xerial/snappy-java/pull/265) an initialization issue when using a recent Mac OS X version
- Support Apple Silicon (M1, Mac-aarch64)
- Fixed the pure-java Snappy fallback logic when no native library for your platform is found.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Unit test.

Closes #30690 from viirya/upgrade-snappy.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 2 +-
 dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 2 +-
 pom.xml                                 | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index 3a54dbd6232e3..b731c643aabe7 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -221,7 +221,7 @@ shims/0.9.0//shims-0.9.0.jar
 slf4j-api/1.7.30//slf4j-api-1.7.30.jar
 slf4j-log4j12/1.7.30//slf4j-log4j12-1.7.30.jar
 snakeyaml/1.26//snakeyaml-1.26.jar
-snappy-java/1.1.8//snappy-java-1.1.8.jar
+snappy-java/1.1.8.2//snappy-java-1.1.8.2.jar
 spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar
 spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar
 spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index 67bcc7a8ed902..84b44342280a5 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -236,7 +236,7 @@ shims/0.9.0//shims-0.9.0.jar
 slf4j-api/1.7.30//slf4j-api-1.7.30.jar
 slf4j-log4j12/1.7.30//slf4j-log4j12-1.7.30.jar
 snakeyaml/1.26//snakeyaml-1.26.jar
-snappy-java/1.1.8//snappy-java-1.1.8.jar
+snappy-java/1.1.8.2//snappy-java-1.1.8.2.jar
 spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar
 spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar
 spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar
diff --git a/pom.xml b/pom.xml
index 23eb16a7db472..f449bf7928ecc 100644
--- a/pom.xml
+++ b/pom.xml
@@ -171,7 +171,7 @@
     <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
     <fasterxml.jackson.version>2.10.5</fasterxml.jackson.version>
     <fasterxml.jackson-databind.version>2.10.5.1</fasterxml.jackson-databind.version>
-    <snappy.version>1.1.8</snappy.version>
+    <snappy.version>1.1.8.2</snappy.version>
     <netlib.java.version>1.1.2</netlib.java.version>
     <commons-codec.version>1.10</commons-codec.version>
     <commons-compress.version>1.20</commons-compress.version>

From 991b7977b5006e1e0d02b7d67a3e0fc50f5a9f66 Mon Sep 17 00:00:00 2001
From: Holden Karau <hkarau@apple.com>
Date: Thu, 10 Dec 2020 11:35:55 +0900
Subject: [PATCH 0718/1009] [SPARK-33727][K8S] Fall back from gnupg.net to
 openpgp.org

### What changes were proposed in this pull request?

While building R docker image if we can't fetch the key from gnupg.net fall back to openpgp.org

### Why are the changes needed?

gnupg.net key servers are flaky and sometimes fail to resolve or return keys.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Tried to add key on my desktop, it failed, then tried to add key with openpgp.org and it succeed.

Closes #30696 from holdenk/SPARK-33727-gnupg-server-is-flaky.

Authored-by: Holden Karau <hkarau@apple.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../docker/src/main/dockerfiles/spark/bindings/R/Dockerfile     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile
index bd645e40677d0..f63f2d0d58e22 100644
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile
@@ -29,7 +29,7 @@ RUN mkdir ${SPARK_HOME}/R
 RUN \
   echo "deb http://cloud.r-project.org/bin/linux/debian buster-cran35/" >> /etc/apt/sources.list && \
   apt install -y gnupg && \
-  apt-key adv --keyserver keys.gnupg.net --recv-key 'E19F5F87128899B192B1A2C2AD5F960A256A04AF' && \
+  (apt-key adv --keyserver keys.gnupg.net --recv-key 'E19F5F87128899B192B1A2C2AD5F960A256A04AF' || apt-key adv --keyserver keys.openpgp.org --recv-key 'E19F5F87128899B192B1A2C2AD5F960A256A04AF') && \
   apt-get update && \
   apt install -y -t buster-cran35 r-base r-base-dev && \
   rm -rf /var/cache/apt/*

From 1c7f5f1ac7ecf0390410d2da6f3b1a615a5a71cc Mon Sep 17 00:00:00 2001
From: Holden Karau <hkarau@apple.com>
Date: Wed, 9 Dec 2020 20:42:10 -0800
Subject: [PATCH 0719/1009] [SPARK-33724][K8S] Add decom script as a
 configuration param

### What changes were proposed in this pull request?

Makes the location of the decommission script used in Kubernetes for graceful shutdown configurable.

### Why are the changes needed?

Some environments don't use the Spark image builder and instead mount the decompressed Spark distro. In those envs configuring the location of the decommissioning script is required.

### Does this PR introduce _any_ user-facing change?

New configuration parameter.

### How was this patch tested?

Existing decommissioning integration test.

Closes #30694 from holdenk/SPARK-33724-allow-decommissioning-script-location-to-be-configured.

Authored-by: Holden Karau <hkarau@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../main/scala/org/apache/spark/deploy/k8s/Config.scala   | 8 ++++++++
 .../deploy/k8s/features/BasicExecutorFeatureStep.scala    | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index 41194f3a2676f..c28d6fd405ae1 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -24,6 +24,14 @@ import org.apache.spark.internal.config.ConfigBuilder
 
 private[spark] object Config extends Logging {
 
+  val DECOMMISSION_SCRIPT =
+    ConfigBuilder("spark.kubernetes.decommission.script")
+      .doc("The location of the script to use for graceful decommissioning")
+      .version("3.2.0")
+      .stringConf
+      .createWithDefault("/opt/decom.sh")
+
+
   val KUBERNETES_CONTEXT =
     ConfigBuilder("spark.kubernetes.context")
       .doc("The desired context from your K8S config file used to configure the K8S " +
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
index 250dd8238d9ea..4398f545917bf 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
@@ -230,7 +230,7 @@ private[spark] class BasicExecutorFeatureStep(
         new ContainerBuilder(containerWithLimitCores).withNewLifecycle()
           .withNewPreStop()
             .withNewExec()
-              .addToCommand("/opt/decom.sh")
+              .addToCommand(kubernetesConf.get(DECOMMISSION_SCRIPT))
             .endExec()
           .endPreStop()
           .endLifecycle()

From af37c7f4115a2edf46a304f90db0aec4d3edde16 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Thu, 10 Dec 2020 04:54:52 +0000
Subject: [PATCH 0720/1009] [SPARK-33558][SQL][TESTS] Unify v1 and v2 ALTER
 TABLE .. ADD PARTITION tests

### What changes were proposed in this pull request?
1. Move the `ALTER TABLE .. ADD PARTITION` parsing tests to `AlterTableAddPartitionParserSuite`
2. Place v1 tests for `ALTER TABLE .. ADD PARTITION` from `DDLSuite` and v2 tests from `AlterTablePartitionV2SQLSuite` to the common trait `AlterTableAddPartitionSuiteBase`, so, the tests will run for V1, Hive V1 and V2 DS.

### Why are the changes needed?
- The unification will allow to run common `ALTER TABLE .. ADD PARTITION` tests for both DSv1 and Hive DSv1, DSv2
- We can detect missing features and differences between DSv1 and DSv2 implementations.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running new test suites:
```
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableAddPartitionSuite"
```

Closes #30685 from MaxGekk/unify-alter-table-add-partition-tests.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../analysis/ResolvePartitionSpec.scala       |   2 +-
 .../sql/catalyst/parser/DDLParserSuite.scala  |  27 ---
 .../AlterTablePartitionV2SQLSuite.scala       | 152 ++------------
 .../AlterTableAddPartitionParserSuite.scala   |  51 +++++
 .../AlterTableAddPartitionSuiteBase.scala     | 187 ++++++++++++++++++
 .../sql/execution/command/DDLSuite.scala      |  61 ------
 .../v1/AlterTableAddPartitionSuite.scala      |  64 ++++++
 .../v2/AlterTableAddPartitionSuite.scala      |  89 +++++++++
 .../sql/hive/execution/HiveDDLSuite.scala     |   4 -
 .../command/AlterTableAddPartitionSuite.scala |  46 +++++
 10 files changed, 450 insertions(+), 233 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionParserSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
index feb05d3b6926b..099ac6172c9e6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
@@ -81,7 +81,7 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] {
         resolvedPartitionSpec
     }
 
-  private def convertToPartIdent(
+  private[sql] def convertToPartIdent(
       partitionSpec: TablePartitionSpec,
       schema: Seq[StructField]): InternalRow = {
     val partValues = schema.map { part =>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 947154eae12c8..e194e7112b1d4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -2042,33 +2042,6 @@ class DDLParserSuite extends AnalysisTest {
       AlterTableRecoverPartitionsStatement(Seq("a", "b", "c")))
   }
 
-  test("alter table: add partition") {
-    val sql1 =
-      """
-        |ALTER TABLE a.b.c ADD IF NOT EXISTS PARTITION
-        |(dt='2008-08-08', country='us') LOCATION 'location1' PARTITION
-        |(dt='2009-09-09', country='uk')
-      """.stripMargin
-    val sql2 = "ALTER TABLE a.b.c ADD PARTITION (dt='2008-08-08') LOCATION 'loc'"
-
-    val parsed1 = parsePlan(sql1)
-    val parsed2 = parsePlan(sql2)
-
-    val expected1 = AlterTableAddPartition(
-      UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... ADD PARTITION ..."),
-      Seq(
-        UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us"), Some("location1")),
-        UnresolvedPartitionSpec(Map("dt" -> "2009-09-09", "country" -> "uk"), None)),
-      ifNotExists = true)
-    val expected2 = AlterTableAddPartition(
-      UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... ADD PARTITION ..."),
-      Seq(UnresolvedPartitionSpec(Map("dt" -> "2008-08-08"), Some("loc"))),
-      ifNotExists = false)
-
-    comparePlans(parsed1, expected1)
-    comparePlans(parsed2, expected2)
-  }
-
   test("alter view: add partition (not supported)") {
     assertUnsupported(
       """
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
index 45d47c6d8681c..570976965ec7c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
@@ -17,16 +17,12 @@
 
 package org.apache.spark.sql.connector
 
-import java.time.{LocalDate, LocalDateTime}
-
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionsException, PartitionsAlreadyExistException}
-import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils}
+import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionsException
 import org.apache.spark.sql.connector.catalog.{CatalogV2Implicits, Identifier}
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.unsafe.types.UTF8String
 
 class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase {
 
@@ -45,66 +41,6 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase {
     }
   }
 
-  test("ALTER TABLE ADD PARTITION") {
-    val t = "testpart.ns1.ns2.tbl"
-    withTable(t) {
-      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
-      spark.sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'")
-
-      val partTable = catalog("testpart").asTableCatalog
-        .loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")).asInstanceOf[InMemoryPartitionTable]
-      assert(partTable.partitionExists(InternalRow.fromSeq(Seq(1))))
-
-      val partMetadata = partTable.loadPartitionMetadata(InternalRow.fromSeq(Seq(1)))
-      assert(partMetadata.containsKey("location"))
-      assert(partMetadata.get("location") == "loc")
-    }
-  }
-
-  test("ALTER TABLE ADD PARTITIONS") {
-    val t = "testpart.ns1.ns2.tbl"
-    withTable(t) {
-      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
-      spark.sql(
-        s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc' PARTITION (id=2) LOCATION 'loc1'")
-
-      val partTable = catalog("testpart").asTableCatalog
-        .loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")).asInstanceOf[InMemoryPartitionTable]
-      assert(partTable.partitionExists(InternalRow.fromSeq(Seq(1))))
-      assert(partTable.partitionExists(InternalRow.fromSeq(Seq(2))))
-
-      val partMetadata = partTable.loadPartitionMetadata(InternalRow.fromSeq(Seq(1)))
-      assert(partMetadata.containsKey("location"))
-      assert(partMetadata.get("location") == "loc")
-
-      val partMetadata1 = partTable.loadPartitionMetadata(InternalRow.fromSeq(Seq(2)))
-      assert(partMetadata1.containsKey("location"))
-      assert(partMetadata1.get("location") == "loc1")
-    }
-  }
-
-  test("ALTER TABLE ADD PARTITIONS: partition already exists") {
-    val t = "testpart.ns1.ns2.tbl"
-    withTable(t) {
-      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
-      spark.sql(
-        s"ALTER TABLE $t ADD PARTITION (id=2) LOCATION 'loc1'")
-
-      assertThrows[PartitionsAlreadyExistException](
-        spark.sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'" +
-          " PARTITION (id=2) LOCATION 'loc1'"))
-
-      val partTable = catalog("testpart").asTableCatalog
-        .loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")).asInstanceOf[InMemoryPartitionTable]
-      assert(!partTable.partitionExists(InternalRow.fromSeq(Seq(1))))
-
-      spark.sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (id=1) LOCATION 'loc'" +
-        " PARTITION (id=2) LOCATION 'loc1'")
-      assert(partTable.partitionExists(InternalRow.fromSeq(Seq(1))))
-      assert(partTable.partitionExists(InternalRow.fromSeq(Seq(2))))
-    }
-  }
-
   test("ALTER TABLE RENAME PARTITION") {
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
@@ -173,7 +109,7 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase {
       spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
         val errMsg = intercept[AnalysisException] {
-          spark.sql(s"ALTER TABLE $t ADD PARTITION (ID=1) LOCATION 'loc1'")
+          spark.sql(s"ALTER TABLE $t DROP PARTITION (ID=1)")
         }.getMessage
         assert(errMsg.contains(s"ID is not a valid partition column in table $t"))
       }
@@ -192,73 +128,14 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase {
     }
   }
 
-  test("SPARK-33521: universal type conversions of partition values") {
-    val t = "testpart.ns1.ns2.tbl"
-    withTable(t) {
-      sql(s"""
-        |CREATE TABLE $t (
-        |  part0 tinyint,
-        |  part1 smallint,
-        |  part2 int,
-        |  part3 bigint,
-        |  part4 float,
-        |  part5 double,
-        |  part6 string,
-        |  part7 boolean,
-        |  part8 date,
-        |  part9 timestamp
-        |) USING foo
-        |PARTITIONED BY (part0, part1, part2, part3, part4, part5, part6, part7, part8, part9)
-        |""".stripMargin)
-      val partTable = catalog("testpart").asTableCatalog
-        .loadTable(Identifier.of(Array("ns1", "ns2"), "tbl"))
-        .asPartitionable
-      val expectedPartition = InternalRow.fromSeq(Seq[Any](
-        -1,    // tinyint
-        0,     // smallint
-        1,     // int
-        2,     // bigint
-        3.14F, // float
-        3.14D, // double
-        UTF8String.fromString("abc"), // string
-        true, // boolean
-        LocalDate.parse("2020-11-23").toEpochDay,
-        DateTimeUtils.instantToMicros(
-          LocalDateTime.parse("2020-11-23T22:13:10.123456").atZone(DateTimeTestUtils.LA).toInstant)
-      ))
-      assert(!partTable.partitionExists(expectedPartition))
-      val partSpec = """
-        |  part0 = -1,
-        |  part1 = 0,
-        |  part2 = 1,
-        |  part3 = 2,
-        |  part4 = 3.14,
-        |  part5 = 3.14,
-        |  part6 = 'abc',
-        |  part7 = true,
-        |  part8 = '2020-11-23',
-        |  part9 = '2020-11-23T22:13:10.123456'
-        |""".stripMargin
-      sql(s"ALTER TABLE $t ADD PARTITION ($partSpec) LOCATION 'loc1'")
-      assert(partTable.partitionExists(expectedPartition))
-      sql(s" ALTER TABLE $t DROP PARTITION ($partSpec)")
-      assert(!partTable.partitionExists(expectedPartition))
-    }
-  }
-
-  test("SPARK-33650: add/drop partition into a table which doesn't support partition management") {
+  test("SPARK-33650: drop partition into a table which doesn't support partition management") {
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
       spark.sql(s"CREATE TABLE $t (id bigint, data string) USING _")
-      Seq(
-        s"ALTER TABLE $t ADD PARTITION (id=1)",
-        s"ALTER TABLE $t DROP PARTITION (id=1)"
-      ).foreach { alterTable =>
-        val errMsg = intercept[AnalysisException] {
-          spark.sql(alterTable)
-        }.getMessage
-        assert(errMsg.contains(s"Table $t can not alter partitions"))
-      }
+      val errMsg = intercept[AnalysisException] {
+        spark.sql(s"ALTER TABLE $t DROP PARTITION (id=1)")
+      }.getMessage
+      assert(errMsg.contains(s"Table $t can not alter partitions"))
     }
   }
 
@@ -269,16 +146,11 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase {
         |CREATE TABLE $t (id bigint, part0 int, part1 string)
         |USING foo
         |PARTITIONED BY (part0, part1)""".stripMargin)
-      Seq(
-        s"ALTER TABLE $t ADD PARTITION (part0 = 1)",
-        s"ALTER TABLE $t DROP PARTITION (part0 = 1)"
-      ).foreach { alterTable =>
-        val errMsg = intercept[AnalysisException] {
-          sql(alterTable)
-        }.getMessage
-        assert(errMsg.contains("Partition spec is invalid. " +
-          "The spec (part0) must match the partition spec (part0, part1)"))
-      }
+      val errMsg = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $t DROP PARTITION (part0 = 1)")
+      }.getMessage
+      assert(errMsg.contains("Partition spec is invalid. " +
+        "The spec (part0) must match the partition spec (part0, part1)"))
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionParserSuite.scala
new file mode 100644
index 0000000000000..5ebca8f651604
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionParserSuite.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedPartitionSpec, UnresolvedTable}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
+import org.apache.spark.sql.catalyst.plans.logical.AlterTableAddPartition
+import org.apache.spark.sql.test.SharedSparkSession
+
+class AlterTableAddPartitionParserSuite extends AnalysisTest with SharedSparkSession {
+  test("add partition if not exists") {
+    val sql = """
+      |ALTER TABLE a.b.c ADD IF NOT EXISTS PARTITION
+      |(dt='2008-08-08', country='us') LOCATION 'location1' PARTITION
+      |(dt='2009-09-09', country='uk')""".stripMargin
+    val parsed = parsePlan(sql)
+    val expected = AlterTableAddPartition(
+      UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... ADD PARTITION ..."),
+      Seq(
+        UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us"), Some("location1")),
+        UnresolvedPartitionSpec(Map("dt" -> "2009-09-09", "country" -> "uk"), None)),
+      ifNotExists = true)
+    comparePlans(parsed, expected)
+  }
+
+  test("add partition") {
+    val sql = "ALTER TABLE a.b.c ADD PARTITION (dt='2008-08-08') LOCATION 'loc'"
+    val parsed = parsePlan(sql)
+    val expected = AlterTableAddPartition(
+      UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... ADD PARTITION ..."),
+      Seq(UnresolvedPartitionSpec(Map("dt" -> "2008-08-08"), Some("loc"))),
+      ifNotExists = false)
+
+    comparePlans(parsed, expected)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
new file mode 100644
index 0000000000000..0cf0b395f139b
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.scalactic.source.Position
+import org.scalatest.Tag
+
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import org.apache.spark.sql.execution.datasources.PartitioningUtils
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SQLTestUtils
+
+trait AlterTableAddPartitionSuiteBase extends QueryTest with SQLTestUtils {
+  protected def version: String
+  protected def catalog: String
+  protected def defaultUsing: String
+
+  override def test(testName: String, testTags: Tag*)(testFun: => Any)
+    (implicit pos: Position): Unit = {
+    super.test(s"ALTER TABLE .. ADD PARTITION $version: " + testName, testTags: _*)(testFun)
+  }
+
+  protected def checkPartitions(t: String, expected: Map[String, String]*): Unit = {
+    val partitions = sql(s"SHOW PARTITIONS $t")
+      .collect()
+      .toSet
+      .map((row: Row) => row.getString(0))
+      .map(PartitioningUtils.parsePathFragment)
+    assert(partitions === expected.toSet)
+  }
+  protected def checkLocation(t: String, spec: TablePartitionSpec, expected: String): Unit
+
+  protected def withNsTable(ns: String, tableName: String, cat: String = catalog)
+      (f: String => Unit): Unit = {
+    val nsCat = s"$cat.$ns"
+    withNamespace(nsCat) {
+      sql(s"CREATE NAMESPACE $nsCat")
+      val t = s"$nsCat.$tableName"
+      withTable(t) {
+        f(t)
+      }
+    }
+  }
+
+  test("one partition") {
+    withNsTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
+      Seq("", "IF NOT EXISTS").foreach { exists =>
+        sql(s"ALTER TABLE $t ADD $exists PARTITION (id=1) LOCATION 'loc'")
+
+        checkPartitions(t, Map("id" -> "1"))
+        checkLocation(t, Map("id" -> "1"), "loc")
+      }
+    }
+  }
+
+  test("multiple partitions") {
+    withNsTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
+      Seq("", "IF NOT EXISTS").foreach { exists =>
+        sql(s"""
+          |ALTER TABLE $t ADD $exists
+          |PARTITION (id=1) LOCATION 'loc'
+          |PARTITION (id=2) LOCATION 'loc1'""".stripMargin)
+
+        checkPartitions(t, Map("id" -> "1"), Map("id" -> "2"))
+        checkLocation(t, Map("id" -> "1"), "loc")
+        checkLocation(t, Map("id" -> "2"), "loc1")
+      }
+    }
+  }
+
+  test("multi-part partition") {
+    withNsTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (id bigint, a int, b string) $defaultUsing PARTITIONED BY (a, b)")
+      Seq("", "IF NOT EXISTS").foreach { exists =>
+        sql(s"ALTER TABLE $t ADD $exists PARTITION (a=2, b='abc')")
+
+        checkPartitions(t, Map("a" -> "2", "b" -> "abc"))
+      }
+    }
+  }
+
+  test("table to alter does not exist") {
+    withNsTable("ns", "does_not_exist") { t =>
+      val errMsg = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (a='4', b='9')")
+      }.getMessage
+      assert(errMsg.contains("Table not found"))
+    }
+  }
+
+  test("case sensitivity in resolving partition specs") {
+    withNsTable("ns", "tbl") { t =>
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+        val errMsg = intercept[AnalysisException] {
+          spark.sql(s"ALTER TABLE $t ADD PARTITION (ID=1) LOCATION 'loc1'")
+        }.getMessage
+        assert(errMsg.contains("ID is not a valid partition column"))
+      }
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+        spark.sql(s"ALTER TABLE $t ADD PARTITION (ID=1) LOCATION 'loc1'")
+        checkPartitions(t, Map("id" -> "1"))
+        checkLocation(t, Map("id" -> "1"), "loc1")
+      }
+    }
+  }
+
+  test("SPARK-33521: universal type conversions of partition values") {
+    withNsTable("ns", "tbl") { t =>
+      sql(s"""
+        |CREATE TABLE $t (
+        |  id int,
+        |  part0 tinyint,
+        |  part1 smallint,
+        |  part2 int,
+        |  part3 bigint,
+        |  part4 float,
+        |  part5 double,
+        |  part6 string,
+        |  part7 boolean,
+        |  part8 date,
+        |  part9 timestamp
+        |) $defaultUsing
+        |PARTITIONED BY (part0, part1, part2, part3, part4, part5, part6, part7, part8, part9)
+        |""".stripMargin)
+      val partSpec = """
+        |  part0 = -1,
+        |  part1 = 0,
+        |  part2 = 1,
+        |  part3 = 2,
+        |  part4 = 3.14,
+        |  part5 = 3.14,
+        |  part6 = 'abc',
+        |  part7 = true,
+        |  part8 = '2020-11-23',
+        |  part9 = '2020-11-23 22:13:10.123456'
+        |""".stripMargin
+      sql(s"ALTER TABLE $t ADD PARTITION ($partSpec) LOCATION 'loc1'")
+      val expected = Map(
+        "part0" -> "-1",
+        "part1" -> "0",
+        "part2" -> "1",
+        "part3" -> "2",
+        "part4" -> "3.14",
+        "part5" -> "3.14",
+        "part6" -> "abc",
+        "part7" -> "true",
+        "part8" -> "2020-11-23",
+        "part9" -> "2020-11-23 22:13:10.123456")
+      checkPartitions(t, expected)
+      sql(s"ALTER TABLE $t DROP PARTITION ($partSpec)")
+      checkPartitions(t) // no partitions
+    }
+  }
+
+  test("SPARK-33676: not fully specified partition spec") {
+    withNsTable("ns", "tbl") { t =>
+      sql(s"""
+        |CREATE TABLE $t (id bigint, part0 int, part1 string)
+        |$defaultUsing
+        |PARTITIONED BY (part0, part1)""".stripMargin)
+      val errMsg = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $t ADD PARTITION (part0 = 1)")
+      }.getMessage
+      assert(errMsg.contains("Partition spec is invalid. " +
+        "The spec (part0) must match the partition spec (part0, part1)"))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 82d3e2dfe2212..05e0f4f4a538c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -334,10 +334,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     testChangeColumn(isDatasourceTable = true)
   }
 
-  test("alter table: add partition (datasource table)") {
-    testAddPartitions(isDatasourceTable = true)
-  }
-
   test("alter table: drop partition (datasource table)") {
     testDropPartitions(isDatasourceTable = true)
   }
@@ -1621,63 +1617,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     }
   }
 
-  protected def testAddPartitions(isDatasourceTable: Boolean): Unit = {
-    if (!isUsingHiveMetastore) {
-      assert(isDatasourceTable, "InMemoryCatalog only supports data source tables")
-    }
-    val catalog = spark.sessionState.catalog
-    val tableIdent = TableIdentifier("tab1", Some("dbx"))
-    val part1 = Map("a" -> "1", "b" -> "5")
-    val part2 = Map("a" -> "2", "b" -> "6")
-    val part3 = Map("a" -> "3", "b" -> "7")
-    val part4 = Map("a" -> "4", "b" -> "8")
-    val part5 = Map("a" -> "9", "b" -> "9")
-    createDatabase(catalog, "dbx")
-    createTable(catalog, tableIdent, isDatasourceTable)
-    createTablePartition(catalog, part1, tableIdent)
-    assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == Set(part1))
-
-    // basic add partition
-    sql("ALTER TABLE dbx.tab1 ADD IF NOT EXISTS " +
-      "PARTITION (a='2', b='6') LOCATION 'paris' PARTITION (a='3', b='7')")
-    assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == Set(part1, part2, part3))
-    assert(catalog.getPartition(tableIdent, part1).storage.locationUri.isDefined)
-
-    val tableLocation = catalog.getTableMetadata(tableIdent).storage.locationUri
-    assert(tableLocation.isDefined)
-    val partitionLocation = makeQualifiedPath(
-      new Path(tableLocation.get.toString, "paris").toString)
-
-    assert(catalog.getPartition(tableIdent, part2).storage.locationUri == Option(partitionLocation))
-    assert(catalog.getPartition(tableIdent, part3).storage.locationUri.isDefined)
-
-    // add partitions without explicitly specifying database
-    catalog.setCurrentDatabase("dbx")
-    sql("ALTER TABLE tab1 ADD IF NOT EXISTS PARTITION (a='4', b='8')")
-    assert(catalog.listPartitions(tableIdent).map(_.spec).toSet ==
-      Set(part1, part2, part3, part4))
-
-    // table to alter does not exist
-    intercept[AnalysisException] {
-      sql("ALTER TABLE does_not_exist ADD IF NOT EXISTS PARTITION (a='4', b='9')")
-    }
-
-    // partition to add already exists
-    intercept[AnalysisException] {
-      sql("ALTER TABLE tab1 ADD PARTITION (a='4', b='8')")
-    }
-
-    // partition to add already exists when using IF NOT EXISTS
-    sql("ALTER TABLE tab1 ADD IF NOT EXISTS PARTITION (a='4', b='8')")
-    assert(catalog.listPartitions(tableIdent).map(_.spec).toSet ==
-      Set(part1, part2, part3, part4))
-
-    // partition spec in ADD PARTITION should be case insensitive by default
-    sql("ALTER TABLE tab1 ADD PARTITION (A='9', B='9')")
-    assert(catalog.listPartitions(tableIdent).map(_.spec).toSet ==
-      Set(part1, part2, part3, part4, part5))
-  }
-
   protected def testDropPartitions(isDatasourceTable: Boolean): Unit = {
     if (!isUsingHiveMetastore) {
       assert(isDatasourceTable, "InMemoryCatalog only supports data source tables")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
new file mode 100644
index 0000000000000..295ce1d3da13f
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import org.apache.spark.sql.catalyst.analysis.PartitionsAlreadyExistException
+import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.execution.command
+import org.apache.spark.sql.test.SharedSparkSession
+
+trait AlterTableAddPartitionSuiteBase extends command.AlterTableAddPartitionSuiteBase {
+  override def version: String = "V1"
+  override def catalog: String = CatalogManager.SESSION_CATALOG_NAME
+  override def defaultUsing: String = "USING parquet"
+
+  override protected def checkLocation(
+      t: String,
+      spec: TablePartitionSpec,
+      expected: String): Unit = {
+    val tablePath = t.split('.')
+    val tableName = tablePath.last
+    val ns = tablePath.init.mkString(".")
+    val partSpec = spec.map { case (key, value) => s"$key = $value"}.mkString(", ")
+    val information = sql(s"SHOW TABLE EXTENDED IN $ns LIKE '$tableName' PARTITION($partSpec)")
+      .select("information")
+      .first().getString(0)
+    val location = information.split("\\r?\\n").filter(_.startsWith("Location:")).head
+    assert(location.endsWith(expected))
+  }
+}
+
+class AlterTableAddPartitionSuite extends AlterTableAddPartitionSuiteBase with SharedSparkSession {
+  test("partition already exists") {
+    withNsTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
+      sql(s"ALTER TABLE $t ADD PARTITION (id=2) LOCATION 'loc1'")
+
+      val errMsg = intercept[PartitionsAlreadyExistException] {
+        sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'" +
+          " PARTITION (id=2) LOCATION 'loc1'")
+      }.getMessage
+      assert(errMsg.contains("The following partitions already exists"))
+
+      sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (id=1) LOCATION 'loc'" +
+        " PARTITION (id=2) LOCATION 'loc1'")
+      checkPartitions(t, Map("id" -> "1"), Map("id" -> "2"))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala
new file mode 100644
index 0000000000000..b15235d17671a
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis.{PartitionsAlreadyExistException, ResolvePartitionSpec}
+import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import org.apache.spark.sql.connector.{InMemoryPartitionTable, InMemoryPartitionTableCatalog, InMemoryTableCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Implicits, Identifier}
+import org.apache.spark.sql.execution.command
+import org.apache.spark.sql.test.SharedSparkSession
+
+class AlterTableAddPartitionSuite
+  extends command.AlterTableAddPartitionSuiteBase
+  with SharedSparkSession {
+
+  import CatalogV2Implicits._
+
+  override def version: String = "V2"
+  override def catalog: String = "test_catalog"
+  override def defaultUsing: String = "USING _"
+
+  override def sparkConf: SparkConf = super.sparkConf
+    .set(s"spark.sql.catalog.$catalog", classOf[InMemoryPartitionTableCatalog].getName)
+    .set(s"spark.sql.catalog.non_part_$catalog", classOf[InMemoryTableCatalog].getName)
+
+  override protected def checkLocation(
+      t: String,
+      spec: TablePartitionSpec,
+      expected: String): Unit = {
+    val tablePath = t.split('.')
+    val catalogName = tablePath.head
+    val namespaceWithTable = tablePath.tail
+    val namespaces = namespaceWithTable.init
+    val tableName = namespaceWithTable.last
+    val catalogPlugin = spark.sessionState.catalogManager.catalog(catalogName)
+    val partTable = catalogPlugin.asTableCatalog
+      .loadTable(Identifier.of(namespaces, tableName))
+      .asInstanceOf[InMemoryPartitionTable]
+    val ident = ResolvePartitionSpec.convertToPartIdent(spec, partTable.partitionSchema.fields)
+    val partMetadata = partTable.loadPartitionMetadata(ident)
+
+    assert(partMetadata.containsKey("location"))
+    assert(partMetadata.get("location") === expected)
+  }
+
+  test("partition already exists") {
+    withNsTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
+      sql(s"ALTER TABLE $t ADD PARTITION (id=2) LOCATION 'loc1'")
+
+      val errMsg = intercept[PartitionsAlreadyExistException] {
+        sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'" +
+          " PARTITION (id=2) LOCATION 'loc1'")
+      }.getMessage
+      assert(errMsg.contains("The following partitions already exists"))
+
+      sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (id=1) LOCATION 'loc'" +
+        " PARTITION (id=2) LOCATION 'loc1'")
+      checkPartitions(t, Map("id" -> "1"), Map("id" -> "2"))
+    }
+  }
+
+  test("SPARK-33650: add partition into a table which doesn't support partition management") {
+    withNsTable("ns", "tbl", s"non_part_$catalog") { t =>
+      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing")
+      val errMsg = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $t ADD PARTITION (id=1)")
+      }.getMessage
+      assert(errMsg.contains(s"Table $t can not alter partitions"))
+    }
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index d6a4d76386889..070fdf55deb38 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -166,10 +166,6 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA
     testDropPartitions(isDatasourceTable = false)
   }
 
-  test("alter table: add partition") {
-    testAddPartitions(isDatasourceTable = false)
-  }
-
   test("drop table") {
     testDropTable(isDatasourceTable = false)
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala
new file mode 100644
index 0000000000000..ef0ec8d9bd69f
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution.command
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.execution.command.v1
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+
+class AlterTableAddPartitionSuite
+    extends v1.AlterTableAddPartitionSuiteBase
+    with TestHiveSingleton {
+  override def version: String = "Hive V1"
+  override def defaultUsing: String = "USING HIVE"
+
+  test("partition already exists") {
+    withNsTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
+      sql(s"ALTER TABLE $t ADD PARTITION (id=2) LOCATION 'loc1'")
+
+      val errMsg = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'" +
+          " PARTITION (id=2) LOCATION 'loc1'")
+      }.getMessage
+      assert(errMsg.contains("already exists"))
+
+      sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (id=1) LOCATION 'loc'" +
+        " PARTITION (id=2) LOCATION 'loc1'")
+      checkPartitions(t, Map("id" -> "1"), Map("id" -> "2"))
+    }
+  }
+}

From b112e2bfa619d028004cbc7fb8ec1363689729a7 Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Thu, 10 Dec 2020 05:18:34 +0000
Subject: [PATCH 0721/1009] [SPARK-33714][SQL] Migrate ALTER VIEW ... SET/UNSET
 TBLPROPERTIES commands to use UnresolvedView to resolve the identifier

### What changes were proposed in this pull request?

This PR adds `allowTemp` flag to `UnresolvedView` so that `Analyzer` can check whether to resolve temp views or not.

This PR also migrates `ALTER VIEW ... SET/UNSET TBLPROPERTIES` to use `UnresolvedView` to resolve the table/view identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing).

### Why are the changes needed?

To use `UnresolvedView` for view resolution.

One benefit is that the exception message is better for `ALTER VIEW ... SET/UNSET TBLPROPERTIES`. Before, if a temp view is passed, you will just get `NoSuchTableException` with `Table or view 'tmpView' not found in database 'default'`. But with this PR, you will get more description exception message: `tmpView is a temp view. ALTER VIEW ... SET TBLPROPERTIES expects a permanent view`.

### Does this PR introduce _any_ user-facing change?

The exception message changes as describe above.

### How was this patch tested?

Updated existing tests.

Closes #30676 from imback82/alter_view_set_unset_properties.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  7 +++--
 .../sql/catalyst/analysis/CheckAnalysis.scala |  2 +-
 .../catalyst/analysis/ResolveCatalogs.scala   | 12 --------
 .../catalyst/analysis/v2ResolutionPlans.scala |  1 +
 .../sql/catalyst/parser/AstBuilder.scala      | 26 +++++++++++++----
 .../catalyst/plans/logical/statements.scala   | 15 ----------
 .../catalyst/plans/logical/v2Commands.scala   | 19 +++++++++++++
 .../sql/catalyst/parser/DDLParserSuite.scala  | 28 +++++++++++--------
 .../analysis/ResolveSessionCatalog.scala      |  9 +++---
 .../sql/connector/DataSourceV2SQLSuite.scala  | 28 +++++++++++++++----
 .../spark/sql/execution/SQLViewSuite.scala    |  8 ++++--
 .../command/PlanResolutionSuite.scala         |  8 +++---
 .../sql/hive/execution/HiveDDLSuite.scala     | 10 +++++--
 13 files changed, 108 insertions(+), 65 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 6b0cf4be7de74..7d1edbae9cea5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -889,8 +889,11 @@ class Analyzer(override val catalogManager: CatalogManager)
           u.failAnalysis(s"${ident.quoted} is a temp view. '$cmd' expects a table")
         }
         u
-      case u @ UnresolvedView(ident, _, _) =>
+      case u @ UnresolvedView(ident, cmd, allowTemp, _) =>
         lookupTempView(ident).map { _ =>
+          if (!allowTemp) {
+            u.failAnalysis(s"${ident.quoted} is a temp view. '$cmd' expects a permanent view.")
+          }
           ResolvedView(ident.asIdentifier, isTemp = true)
         }
         .getOrElse(u)
@@ -1118,7 +1121,7 @@ class Analyzer(override val catalogManager: CatalogManager)
           case table => table
         }.getOrElse(u)
 
-      case u @ UnresolvedView(identifier, cmd, relationTypeMismatchHint) =>
+      case u @ UnresolvedView(identifier, cmd, _, relationTypeMismatchHint) =>
         lookupTableOrView(identifier).map {
           case v: ResolvedView => v
           case _ =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 39cdea2bd4d2a..119e17196a454 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -104,7 +104,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
       case u: UnresolvedTable =>
         u.failAnalysis(s"Table not found for '${u.commandName}': ${u.multipartIdentifier.quoted}")
 
-      case u @ UnresolvedView(NonSessionCatalogAndIdentifier(catalog, ident), cmd, _) =>
+      case u @ UnresolvedView(NonSessionCatalogAndIdentifier(catalog, ident), cmd, _, _) =>
         u.failAnalysis(
           s"Cannot specify catalog `${catalog.name}` for view ${ident.quoted} " +
             "because view support in v2 catalog has not been implemented yet. " +
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
index b4dfee1330036..14dccd86d2240 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
@@ -121,18 +121,6 @@ class ResolveCatalogs(val catalogManager: CatalogManager)
       val changes = Seq(TableChange.setProperty(TableCatalog.PROP_LOCATION, newLoc))
       createAlterTable(nameParts, catalog, tbl, changes)
 
-    case AlterViewSetPropertiesStatement(
-         NonSessionCatalogAndTable(catalog, tbl), props) =>
-      throw new AnalysisException(
-        s"Can not specify catalog `${catalog.name}` for view ${tbl.quoted} " +
-          s"because view support in catalog has not been implemented yet")
-
-    case AlterViewUnsetPropertiesStatement(
-         NonSessionCatalogAndTable(catalog, tbl), keys, ifExists) =>
-      throw new AnalysisException(
-        s"Can not specify catalog `${catalog.name}` for view ${tbl.quoted} " +
-          s"because view support in catalog has not been implemented yet")
-
     case c @ CreateTableStatement(
          NonSessionCatalogAndTable(catalog, tbl), _, _, _, _, _, _, _, _, _, _, _) =>
       assertNoNullTypeInSchema(c.tableSchema)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
index 2737b5d58bf42..940fd6085dc98 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
@@ -52,6 +52,7 @@ case class UnresolvedTable(
 case class UnresolvedView(
     multipartIdentifier: Seq[String],
     commandName: String,
+    allowTemp: Boolean = true,
     relationTypeMismatchHint: Option[String] = None) extends LeafNode {
   override lazy val resolved: Boolean = false
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 7787e199d3770..3c06a7665a0e2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3161,8 +3161,9 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
     DropView(
       UnresolvedView(
         visitMultipartIdentifier(ctx.multipartIdentifier()),
-        "DROP VIEW",
-        Some("Please use DROP TABLE instead.")),
+        commandName = "DROP VIEW",
+        allowTemp = true,
+        relationTypeMismatchHint = Some("Please use DROP TABLE instead.")),
       ctx.EXISTS != null)
   }
 
@@ -3399,7 +3400,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
   }
 
   /**
-   * Parse [[AlterViewSetPropertiesStatement]] or [[AlterTableSetPropertiesStatement]] commands.
+   * Parse [[AlterViewSetProperties]] or [[AlterTableSetPropertiesStatement]] commands.
    *
    * For example:
    * {{{
@@ -3413,14 +3414,20 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
     val properties = visitPropertyKeyValues(ctx.tablePropertyList)
     val cleanedTableProperties = cleanTableProperties(ctx, properties)
     if (ctx.VIEW != null) {
-      AlterViewSetPropertiesStatement(identifier, cleanedTableProperties)
+      AlterViewSetProperties(
+        UnresolvedView(
+          identifier,
+          commandName = "ALTER VIEW ... SET TBLPROPERTIES",
+          allowTemp = false,
+          relationTypeMismatchHint = Some("Please use ALTER TABLE instead.")),
+        cleanedTableProperties)
     } else {
       AlterTableSetPropertiesStatement(identifier, cleanedTableProperties)
     }
   }
 
   /**
-   * Parse [[AlterViewUnsetPropertiesStatement]] or [[AlterTableUnsetPropertiesStatement]] commands.
+   * Parse [[AlterViewUnsetProperties]] or [[AlterTableUnsetPropertiesStatement]] commands.
    *
    * For example:
    * {{{
@@ -3436,7 +3443,14 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
 
     val ifExists = ctx.EXISTS != null
     if (ctx.VIEW != null) {
-      AlterViewUnsetPropertiesStatement(identifier, cleanedProperties, ifExists)
+      AlterViewUnsetProperties(
+        UnresolvedView(
+          identifier,
+          commandName = "ALTER VIEW ... UNSET TBLPROPERTIES",
+          allowTemp = false,
+          relationTypeMismatchHint = Some("Please use ALTER TABLE instead.")),
+        cleanedProperties,
+        ifExists)
     } else {
       AlterTableUnsetPropertiesStatement(identifier, cleanedProperties, ifExists)
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
index b731b8a2fd8fd..d628bc914dba7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
@@ -315,21 +315,6 @@ case class AlterTableSerDePropertiesStatement(
     serdeProperties: Option[Map[String, String]],
     partitionSpec: Option[TablePartitionSpec]) extends ParsedStatement
 
-/**
- * ALTER VIEW ... SET TBLPROPERTIES command, as parsed from SQL.
- */
-case class AlterViewSetPropertiesStatement(
-    viewName: Seq[String],
-    properties: Map[String, String]) extends ParsedStatement
-
-/**
- * ALTER VIEW ... UNSET TBLPROPERTIES command, as parsed from SQL.
- */
-case class AlterViewUnsetPropertiesStatement(
-    viewName: Seq[String],
-    propertyKeys: Seq[String],
-    ifExists: Boolean) extends ParsedStatement
-
 /**
  * ALTER VIEW ... Query command, as parsed from SQL.
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index e014048f723f5..7d62dde67733b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -742,3 +742,22 @@ case class DropView(
 case class RepairTable(child: LogicalPlan) extends Command {
   override def children: Seq[LogicalPlan] = child :: Nil
 }
+
+/**
+ * The logical plan of the ALTER VIEW ... SET TBLPROPERTIES command.
+ */
+case class AlterViewSetProperties(
+    child: LogicalPlan,
+    properties: Map[String, String]) extends Command {
+  override def children: Seq[LogicalPlan] = child :: Nil
+}
+
+/**
+ * The logical plan of the ALTER VIEW ... UNSET TBLPROPERTIES command.
+ */
+case class AlterViewUnsetProperties(
+    child: LogicalPlan,
+    propertyKeys: Seq[String],
+    ifExists: Boolean) extends Command {
+  override def children: Seq[LogicalPlan] = child :: Nil
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index e194e7112b1d4..af5e48d922a16 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -724,15 +724,15 @@ class DDLParserSuite extends AnalysisTest {
     val cmd = "DROP VIEW"
     val hint = Some("Please use DROP TABLE instead.")
     parseCompare(s"DROP VIEW testcat.db.view",
-      DropView(UnresolvedView(Seq("testcat", "db", "view"), cmd, hint), ifExists = false))
+      DropView(UnresolvedView(Seq("testcat", "db", "view"), cmd, true, hint), ifExists = false))
     parseCompare(s"DROP VIEW db.view",
-      DropView(UnresolvedView(Seq("db", "view"), cmd, hint), ifExists = false))
+      DropView(UnresolvedView(Seq("db", "view"), cmd, true, hint), ifExists = false))
     parseCompare(s"DROP VIEW IF EXISTS db.view",
-      DropView(UnresolvedView(Seq("db", "view"), cmd, hint), ifExists = true))
+      DropView(UnresolvedView(Seq("db", "view"), cmd, true, hint), ifExists = true))
     parseCompare(s"DROP VIEW view",
-      DropView(UnresolvedView(Seq("view"), cmd, hint), ifExists = false))
+      DropView(UnresolvedView(Seq("view"), cmd, true, hint), ifExists = false))
     parseCompare(s"DROP VIEW IF EXISTS view",
-      DropView(UnresolvedView(Seq("view"), cmd, hint), ifExists = true))
+      DropView(UnresolvedView(Seq("view"), cmd, true, hint), ifExists = true))
   }
 
   private def testCreateOrReplaceDdl(
@@ -764,16 +764,22 @@ class DDLParserSuite extends AnalysisTest {
         "'comment' = 'new_comment')"
     val sql2_view = "ALTER VIEW table_name UNSET TBLPROPERTIES ('comment', 'test')"
     val sql3_view = "ALTER VIEW table_name UNSET TBLPROPERTIES IF EXISTS ('comment', 'test')"
+    val hint = Some("Please use ALTER TABLE instead.")
 
     comparePlans(parsePlan(sql1_view),
-      AlterViewSetPropertiesStatement(
-      Seq("table_name"), Map("test" -> "test", "comment" -> "new_comment")))
+      AlterViewSetProperties(
+        UnresolvedView(Seq("table_name"), "ALTER VIEW ... SET TBLPROPERTIES", false, hint),
+        Map("test" -> "test", "comment" -> "new_comment")))
     comparePlans(parsePlan(sql2_view),
-      AlterViewUnsetPropertiesStatement(
-      Seq("table_name"), Seq("comment", "test"), ifExists = false))
+      AlterViewUnsetProperties(
+        UnresolvedView(Seq("table_name"), "ALTER VIEW ... UNSET TBLPROPERTIES", false, hint),
+        Seq("comment", "test"),
+        ifExists = false))
     comparePlans(parsePlan(sql3_view),
-      AlterViewUnsetPropertiesStatement(
-      Seq("table_name"), Seq("comment", "test"), ifExists = true))
+      AlterViewUnsetProperties(
+        UnresolvedView(Seq("table_name"), "ALTER VIEW ... UNSET TBLPROPERTIES", false, hint),
+        Seq("comment", "test"),
+        ifExists = true))
   }
 
   // ALTER TABLE table_name SET TBLPROPERTIES ('comment' = new_comment);
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 817a63aa9aa6e..6e06cb3a1f928 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -209,12 +209,11 @@ class ResolveSessionCatalog(
         createAlterTable(nameParts, catalog, tbl, changes)
       }
 
-    // ALTER VIEW should always use v1 command if the resolved catalog is session catalog.
-    case AlterViewSetPropertiesStatement(SessionCatalogAndTable(_, tbl), props) =>
-      AlterTableSetPropertiesCommand(tbl.asTableIdentifier, props, isView = true)
+    case AlterViewSetProperties(ResolvedView(ident, _), props) =>
+      AlterTableSetPropertiesCommand(ident.asTableIdentifier, props, isView = true)
 
-    case AlterViewUnsetPropertiesStatement(SessionCatalogAndTable(_, tbl), keys, ifExists) =>
-      AlterTableUnsetPropertiesCommand(tbl.asTableIdentifier, keys, ifExists, isView = true)
+    case AlterViewUnsetProperties(ResolvedView(ident, _), keys, ifExists) =>
+      AlterTableUnsetPropertiesCommand(ident.asTableIdentifier, keys, ifExists, isView = true)
 
     case d @ DescribeNamespace(SessionCatalogAndNamespace(_, ns), _) =>
       if (ns.length != 1) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 9020065449cef..8e1e8f88f219f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -2594,11 +2594,29 @@ class DataSourceV2SQLSuite
     }
   }
 
-  test("DROP VIEW is not supported for v2 catalogs") {
-    assertAnalysisError(
-      "DROP VIEW testcat.v",
-      "Cannot specify catalog `testcat` for view v because view support in v2 catalog " +
-        "has not been implemented yet. DROP VIEW expects a view.")
+  test("View commands are not supported in v2 catalogs") {
+    def validateViewCommand(
+        sql: String,
+        catalogName: String,
+        viewName: String,
+        cmdName: String): Unit = {
+      assertAnalysisError(
+        sql,
+        s"Cannot specify catalog `$catalogName` for view $viewName because view support " +
+          s"in v2 catalog has not been implemented yet. $cmdName expects a view.")
+    }
+
+    validateViewCommand("DROP VIEW testcat.v", "testcat", "v", "DROP VIEW")
+    validateViewCommand(
+      "ALTER VIEW testcat.v SET TBLPROPERTIES ('key' = 'val')",
+      "testcat",
+      "v",
+      "ALTER VIEW ... SET TBLPROPERTIES")
+    validateViewCommand(
+      "ALTER VIEW testcat.v UNSET TBLPROPERTIES ('key')",
+      "testcat",
+      "v",
+      "ALTER VIEW ... UNSET TBLPROPERTIES")
   }
 
   private def testNotSupportedV2Command(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index c4303f0f1e19d..7595ae0ec7a53 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -127,8 +127,12 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
     val viewName = "testView"
     withTempView(viewName) {
       spark.range(10).createTempView(viewName)
-      assertNoSuchTable(s"ALTER VIEW $viewName SET TBLPROPERTIES ('p' = 'an')")
-      assertNoSuchTable(s"ALTER VIEW $viewName UNSET TBLPROPERTIES ('p')")
+      assertAnalysisError(
+        s"ALTER VIEW $viewName SET TBLPROPERTIES ('p' = 'an')",
+        "testView is a temp view. 'ALTER VIEW ... SET TBLPROPERTIES' expects a permanent view.")
+      assertAnalysisError(
+        s"ALTER VIEW $viewName UNSET TBLPROPERTIES ('p')",
+        "testView is a temp view. 'ALTER VIEW ... UNSET TBLPROPERTIES' expects a permanent view.")
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index 758540f1a42f5..70cbfa194313f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -721,16 +721,16 @@ class PlanResolutionSuite extends AnalysisTest {
   // ALTER VIEW view_name SET TBLPROPERTIES ('comment' = new_comment);
   // ALTER VIEW view_name UNSET TBLPROPERTIES [IF EXISTS] ('comment', 'key');
   test("alter view: alter view properties") {
-    val sql1_view = "ALTER VIEW table_name SET TBLPROPERTIES ('test' = 'test', " +
+    val sql1_view = "ALTER VIEW view SET TBLPROPERTIES ('test' = 'test', " +
         "'comment' = 'new_comment')"
-    val sql2_view = "ALTER VIEW table_name UNSET TBLPROPERTIES ('comment', 'test')"
-    val sql3_view = "ALTER VIEW table_name UNSET TBLPROPERTIES IF EXISTS ('comment', 'test')"
+    val sql2_view = "ALTER VIEW view UNSET TBLPROPERTIES ('comment', 'test')"
+    val sql3_view = "ALTER VIEW view UNSET TBLPROPERTIES IF EXISTS ('comment', 'test')"
 
     val parsed1_view = parseAndResolve(sql1_view)
     val parsed2_view = parseAndResolve(sql2_view)
     val parsed3_view = parseAndResolve(sql3_view)
 
-    val tableIdent = TableIdentifier("table_name", Some("default"))
+    val tableIdent = TableIdentifier("view", Some("default"))
     val expected1_view = AlterTableSetPropertiesCommand(
       tableIdent, Map("test" -> "test", "comment" -> "new_comment"), isView = true)
     val expected2_view = AlterTableUnsetPropertiesCommand(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 070fdf55deb38..9f75f8797fe37 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -875,11 +875,17 @@ class HiveDDLSuite
 
         assertErrorForAlterTableOnView(s"ALTER TABLE $oldViewName RENAME TO $newViewName")
 
-        assertErrorForAlterViewOnTable(s"ALTER VIEW $tabName SET TBLPROPERTIES ('p' = 'an')")
+        assertAnalysisError(
+          s"ALTER VIEW $tabName SET TBLPROPERTIES ('p' = 'an')",
+          s"$tabName is a table. 'ALTER VIEW ... SET TBLPROPERTIES' expects a view. " +
+            "Please use ALTER TABLE instead.")
 
         assertErrorForAlterTableOnView(s"ALTER TABLE $oldViewName SET TBLPROPERTIES ('p' = 'an')")
 
-        assertErrorForAlterViewOnTable(s"ALTER VIEW $tabName UNSET TBLPROPERTIES ('p')")
+        assertAnalysisError(
+          s"ALTER VIEW $tabName UNSET TBLPROPERTIES ('p')",
+          s"$tabName is a table. 'ALTER VIEW ... UNSET TBLPROPERTIES' expects a view. " +
+            "Please use ALTER TABLE instead.")
 
         assertErrorForAlterTableOnView(s"ALTER TABLE $oldViewName UNSET TBLPROPERTIES ('p')")
 

From 795db05bf6911aa2a66eea57460409a238957b40 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Wed, 9 Dec 2020 22:04:09 -0800
Subject: [PATCH 0722/1009] [SPARK-33732][K8S][TESTS] Kubernetes integration
 tests doesn't work with Minikube 1.9+

### What changes were proposed in this pull request?

This PR changes `Minikube.scala` for Kubernetes integration tests to work with Minikube 1.9+.
`Minikube.scala` assumes that `apiserver.key` and `apiserver.crt` are in `~/.minikube/`.
But as of Minikube 1.9, they are in `~/.minikube/profiles/<profile>`.

### Why are the changes needed?

Currently, Kubernetes integration tests doesn't work with Minikube 1.9+.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

I confirmed the following test passes.
```
$ build/sbt -Pkubernetes -Pkubernetes-integration-tests package 'kubernetes-integration-tests/testOnly -- -z "SparkPi with no"'
```

Closes #30700 from sarutak/minikube-1.9.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../backend/minikube/Minikube.scala           | 20 +++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
index 547427f96d7ec..c33875243c598 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
@@ -16,7 +16,7 @@
  */
 package org.apache.spark.deploy.k8s.integrationtest.backend.minikube
 
-import java.nio.file.Paths
+import java.nio.file.{Files, Paths}
 
 import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient}
 
@@ -68,15 +68,23 @@ private[spark] object Minikube extends Logging {
   def getKubernetesClient: DefaultKubernetesClient = {
     val kubernetesMaster = s"https://${getMinikubeIp}:8443"
     val userHome = System.getProperty("user.home")
+    val minikubeBasePath = Paths.get(userHome, MINIKUBE_PATH).toString
+    val profileDir = if (Files.exists(Paths.get(minikubeBasePath, "apiserver.crt"))) {
+      // For Minikube <1.9
+      ""
+    } else {
+      // For Minikube >=1.9
+      Paths.get("profiles", executeMinikube("profile")(0)).toString
+    }
+    val apiServerCertPath = Paths.get(minikubeBasePath, profileDir, "apiserver.crt")
+    val apiServerKeyPath = Paths.get(minikubeBasePath, profileDir, "apiserver.key")
     val kubernetesConf = new ConfigBuilder()
       .withApiVersion("v1")
       .withMasterUrl(kubernetesMaster)
       .withCaCertFile(
         Paths.get(userHome, MINIKUBE_PATH, "ca.crt").toFile.getAbsolutePath)
-      .withClientCertFile(
-        Paths.get(userHome, MINIKUBE_PATH, "apiserver.crt").toFile.getAbsolutePath)
-      .withClientKeyFile(
-        Paths.get(userHome, MINIKUBE_PATH, "apiserver.key").toFile.getAbsolutePath)
+      .withClientCertFile(apiServerCertPath.toFile.getAbsolutePath)
+      .withClientKeyFile(apiServerKeyPath.toFile.getAbsolutePath)
       .build()
     new DefaultKubernetesClient(kubernetesConf)
   }
@@ -120,7 +128,7 @@ private[spark] object Minikube extends Logging {
 
   def executeMinikube(action: String, args: String*): Seq[String] = {
     ProcessUtils.executeProcess(
-      Array("bash", "-c", s"minikube $action ${args.mkString(" ")}"),
+      Array("bash", "-c", s"MINIKUBE_IN_STYLE=true minikube $action ${args.mkString(" ")}"),
       MINIKUBE_STARTUP_TIMEOUT_SECONDS).filter{x =>
       !x.contains("There is a newer version of minikube") &&
       !x.contains("https://github.com/kubernetes")

From cef28c2c51d06506afd8a5f5ac725a1a0fd53b6d Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Thu, 10 Dec 2020 08:38:24 +0000
Subject: [PATCH 0723/1009] [SPARK-32670][SQL][FOLLOWUP] Group exception
 messages in Catalyst Analyzer in one file

### What changes were proposed in this pull request?
This PR follows up https://github.com/apache/spark/pull/29497.
Because https://github.com/apache/spark/pull/29497 just give us an example to group all `AnalysisExcpetion` in Analyzer into QueryCompilationErrors.
This PR group other `AnalysisExcpetion` into QueryCompilationErrors.

### Why are the changes needed?
It will largely help with standardization of error messages and its maintenance.

### Does this PR introduce _any_ user-facing change?
No. Error messages remain unchanged.

### How was this patch tested?
No new tests - pass all original tests to make sure it doesn't break any existing behavior.

Closes #30564 from beliefer/SPARK-32670-followup.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: Jiaan Geng <beliefer@163.com>
Co-authored-by: beliefer <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/QueryCompilationErrors.scala    | 169 +++++++++++++++++-
 .../sql/catalyst/analysis/Analyzer.scala      | 122 ++++++-------
 2 files changed, 217 insertions(+), 74 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala
index c680502cb328f..87387b18dbab4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala
@@ -18,9 +18,13 @@
 package org.apache.spark.sql.errors
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.{Expression, GroupingID}
+import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.analysis.ResolvedView
+import org.apache.spark.sql.catalyst.expressions.{Alias, Expression, GroupingID, NamedExpression, SpecifiedWindowFrame, WindowFrame, WindowFunction, WindowSpecDefinition}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.trees.TreeNode
 import org.apache.spark.sql.catalyst.util.toPrettySQL
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.connector.catalog.TableChange
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{AbstractDataType, DataType, StructType}
@@ -31,6 +35,7 @@ import org.apache.spark.sql.types.{AbstractDataType, DataType, StructType}
  * org.apache.spark.sql.catalyst.analysis.Analyzer.
  */
 object QueryCompilationErrors {
+
   def groupingIDMismatchError(groupingID: GroupingID, groupByExprs: Seq[Expression]): Throwable = {
     new AnalysisException(
       s"Columns of grouping_id (${groupingID.groupByExprs.mkString(",")}) " +
@@ -159,6 +164,166 @@ object QueryCompilationErrors {
       s"Couldn't find the reference column for $after at $parentName")
   }
 
-}
+  def windowSpecificationNotDefinedError(windowName: String): Throwable = {
+    new AnalysisException(s"Window specification $windowName is not defined in the WINDOW clause.")
+  }
+
+  def selectExprNotInGroupByError(expr: Expression, groupByAliases: Seq[Alias]): Throwable = {
+    new AnalysisException(s"$expr doesn't show up in the GROUP BY list $groupByAliases")
+  }
+
+  def groupingMustWithGroupingSetsOrCubeOrRollupError(): Throwable = {
+    new AnalysisException("grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup")
+  }
+
+  def pandasUDFAggregateNotSupportedInPivotError(): Throwable = {
+    new AnalysisException("Pandas UDF aggregate expressions are currently not supported in pivot.")
+  }
+
+  def aggregateExpressionRequiredForPivotError(sql: String): Throwable = {
+    new AnalysisException(s"Aggregate expression required for pivot, but '$sql' " +
+      "did not appear in any aggregate function.")
+  }
+
+  def expectTableNotTempViewError(quoted: String, cmd: String, t: TreeNode[_]): Throwable = {
+    new AnalysisException(s"$quoted is a temp view. '$cmd' expects a table",
+      t.origin.line, t.origin.startPosition)
+  }
+
+  def expectTableOrPermanentViewNotTempViewError(
+      quoted: String, cmd: String, t: TreeNode[_]): Throwable = {
+    new AnalysisException(s"$quoted is a temp view. '$cmd' expects a table or permanent view.",
+      t.origin.line, t.origin.startPosition)
+  }
+
+  def viewDepthExceedsMaxResolutionDepthError(
+      identifier: TableIdentifier, maxNestedViewDepth: Int, t: TreeNode[_]): Throwable = {
+    new AnalysisException(s"The depth of view $identifier exceeds the maximum " +
+      s"view resolution depth ($maxNestedViewDepth). Analysis is aborted to " +
+      s"avoid errors. Increase the value of ${SQLConf.MAX_NESTED_VIEW_DEPTH.key} to work " +
+      "around this.", t.origin.line, t.origin.startPosition)
+  }
+
+  def insertIntoViewNotAllowedError(identifier: TableIdentifier, t: TreeNode[_]): Throwable = {
+    new AnalysisException(s"Inserting into a view is not allowed. View: $identifier.",
+      t.origin.line, t.origin.startPosition)
+  }
+
+  def writeIntoViewNotAllowedError(identifier: TableIdentifier, t: TreeNode[_]): Throwable = {
+    new AnalysisException(s"Writing into a view is not allowed. View: $identifier.",
+      t.origin.line, t.origin.startPosition)
+  }
+
+  def writeIntoV1TableNotAllowedError(identifier: TableIdentifier, t: TreeNode[_]): Throwable = {
+    new AnalysisException(s"Cannot write into v1 table: $identifier.",
+      t.origin.line, t.origin.startPosition)
+  }
+
+  def expectTableNotViewError(v: ResolvedView, cmd: String, t: TreeNode[_]): Throwable = {
+    val viewStr = if (v.isTemp) "temp view" else "view"
+    new AnalysisException(s"${v.identifier.quoted} is a $viewStr. '$cmd' expects a table.",
+      t.origin.line, t.origin.startPosition)
+  }
+
+  def starNotAllowedWhenGroupByOrdinalPositionUsedError(): Throwable = {
+    new AnalysisException(
+      "Star (*) is not allowed in select list when GROUP BY ordinal position is used")
+  }
+
+  def invalidStarUsageError(prettyName: String): Throwable = {
+    new AnalysisException(s"Invalid usage of '*' in $prettyName")
+  }
+
+  def orderByPositionRangeError(index: Int, size: Int, t: TreeNode[_]): Throwable = {
+    new AnalysisException(s"ORDER BY position $index is not in select list " +
+      s"(valid range is [1, $size])", t.origin.line, t.origin.startPosition)
+  }
+
+  def groupByPositionRangeError(index: Int, size: Int, t: TreeNode[_]): Throwable = {
+    new AnalysisException(s"GROUP BY position $index is not in select list " +
+      s"(valid range is [1, $size])", t.origin.line, t.origin.startPosition)
+  }
 
+  def generatorNotExpectedError(name: FunctionIdentifier, classCanonicalName: String): Throwable = {
+    new AnalysisException(s"$name is expected to be a generator. However, " +
+      s"its class is $classCanonicalName, which is not a generator.")
+  }
 
+  def distinctOrFilterOnlyWithAggregateFunctionError(prettyName: String): Throwable = {
+    new AnalysisException("DISTINCT or FILTER specified, " +
+      s"but $prettyName is not an aggregate function")
+  }
+
+  def nonDeterministicFilterInAggregateError(): Throwable = {
+    new AnalysisException("FILTER expression is non-deterministic, " +
+      "it cannot be used in aggregate functions")
+  }
+
+  def aliasNumberNotMatchColumnNumberError(
+      columnSize: Int, outputSize: Int, t: TreeNode[_]): Throwable = {
+    new AnalysisException("Number of column aliases does not match number of columns. " +
+      s"Number of column aliases: $columnSize; " +
+      s"number of columns: $outputSize.", t.origin.line, t.origin.startPosition)
+  }
+
+  def aliasesNumberNotMatchUDTFOutputError(
+      aliasesSize: Int, aliasesNames: String): Throwable = {
+    new AnalysisException("The number of aliases supplied in the AS clause does not " +
+      s"match the number of columns output by the UDTF expected $aliasesSize " +
+      s"aliases but got $aliasesNames ")
+  }
+
+  def windowAggregateFunctionWithFilterNotSupportedError(): Throwable = {
+    new AnalysisException("window aggregate function with filter predicate is not supported yet.")
+  }
+
+  def windowFunctionInsideAggregateFunctionNotAllowedError(): Throwable = {
+    new AnalysisException("It is not allowed to use a window function inside an aggregate " +
+      "function. Please use the inner window function in a sub-query.")
+  }
+
+  def expressionWithoutWindowExpressionError(expr: NamedExpression): Throwable = {
+    new AnalysisException(s"$expr does not have any WindowExpression.")
+  }
+
+  def expressionWithMultiWindowExpressionsError(
+      expr: NamedExpression, distinctWindowSpec: Seq[WindowSpecDefinition]): Throwable = {
+    new AnalysisException(s"$expr has multiple Window Specifications ($distinctWindowSpec)." +
+      "Please file a bug report with this error message, stack trace, and the query.")
+  }
+
+  def windowFunctionNotAllowedError(clauseName: String): Throwable = {
+    new AnalysisException(s"It is not allowed to use window functions inside $clauseName clause")
+  }
+
+  def cannotSpecifyWindowFrameError(prettyName: String): Throwable = {
+    new AnalysisException(s"Cannot specify window frame for $prettyName function")
+  }
+
+  def windowFrameNotMatchRequiredFrameError(
+      f: SpecifiedWindowFrame, required: WindowFrame): Throwable = {
+    new AnalysisException(s"Window Frame $f must match the required frame $required")
+  }
+
+  def windowFunctionWithWindowFrameNotOrderedError(wf: WindowFunction): Throwable = {
+    new AnalysisException(s"Window function $wf requires window to be ordered, please add " +
+      s"ORDER BY clause. For example SELECT $wf(value_expr) OVER (PARTITION BY window_partition " +
+      "ORDER BY window_ordering) from table")
+  }
+
+  def cannotResolveUserSpecifiedColumnsError(col: String, t: TreeNode[_]): Throwable = {
+    new AnalysisException(s"Cannot resolve column name $col", t.origin.line, t.origin.startPosition)
+  }
+
+  def writeTableWithMismatchedColumnsError(
+      columnSize: Int, outputSize: Int, t: TreeNode[_]): Throwable = {
+    new AnalysisException("Cannot write to table due to mismatched user specified column " +
+      s"size($columnSize) and data column size($outputSize)", t.origin.line, t.origin.startPosition)
+  }
+
+  def multiTimeWindowExpressionsNotSupportedError(t: TreeNode[_]): Throwable = {
+    new AnalysisException("Multiple time window expressions would result in a cartesian product " +
+      "of rows, therefore they are currently not supported.", t.origin.line, t.origin.startPosition)
+  }
+
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 7d1edbae9cea5..0d719b1f53365 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -368,10 +368,8 @@ class Analyzer(override val catalogManager: CatalogManager)
       // Lookup WindowSpecDefinitions. This rule works with unresolved children.
       case WithWindowDefinition(windowDefinitions, child) => child.resolveExpressions {
         case UnresolvedWindowExpression(c, WindowSpecReference(windowName)) =>
-          val errorMessage =
-            s"Window specification $windowName is not defined in the WINDOW clause."
-          val windowSpecDefinition =
-            windowDefinitions.getOrElse(windowName, failAnalysis(errorMessage))
+          val windowSpecDefinition = windowDefinitions.getOrElse(windowName,
+            throw QueryCompilationErrors.windowSpecificationNotDefinedError(windowName))
           WindowExpression(c, windowSpecDefinition)
       }
     }
@@ -515,7 +513,7 @@ class Analyzer(override val catalogManager: CatalogManager)
       val groupingSetsAttributes = selectedGroupByExprs.map { groupingSetExprs =>
         groupingSetExprs.map { expr =>
           val alias = groupByAliases.find(_.child.semanticEquals(expr)).getOrElse(
-            failAnalysis(s"$expr doesn't show up in the GROUP BY list $groupByAliases"))
+            throw QueryCompilationErrors.selectExprNotInGroupByError(expr, groupByAliases))
           // Map alias to expanded attribute.
           expandedAttributes.find(_.semanticEquals(alias.toAttribute)).getOrElse(
             alias.toAttribute)
@@ -619,11 +617,11 @@ class Analyzer(override val catalogManager: CatalogManager)
           val gid = a.groupingExpressions.last
           if (!gid.isInstanceOf[AttributeReference]
             || gid.asInstanceOf[AttributeReference].name != VirtualColumn.groupingIdName) {
-            failAnalysis(s"grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup")
+            throw QueryCompilationErrors.groupingMustWithGroupingSetsOrCubeOrRollupError()
           }
           a.groupingExpressions.take(a.groupingExpressions.length - 1)
       }.getOrElse {
-        failAnalysis(s"grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup")
+        throw QueryCompilationErrors.groupingMustWithGroupingSetsOrCubeOrRollupError()
       }
     }
 
@@ -833,11 +831,9 @@ class Analyzer(override val catalogManager: CatalogManager)
     private def checkValidAggregateExpression(expr: Expression): Unit = expr match {
       case _: AggregateExpression => // OK and leave the argument check to CheckAnalysis.
       case expr: PythonUDF if PythonUDF.isGroupedAggPandasUDF(expr) =>
-        failAnalysis("Pandas UDF aggregate expressions are currently not supported in pivot.")
+        throw QueryCompilationErrors.pandasUDFAggregateNotSupportedInPivotError()
       case e: Attribute =>
-        failAnalysis(
-          s"Aggregate expression required for pivot, but '${e.sql}' " +
-          s"did not appear in any aggregate function.")
+        throw QueryCompilationErrors.aggregateExpressionRequiredForPivotError(e.sql)
       case e => e.children.foreach(checkValidAggregateExpression)
     }
   }
@@ -886,7 +882,7 @@ class Analyzer(override val catalogManager: CatalogManager)
         }
       case u @ UnresolvedTable(ident, cmd) =>
         lookupTempView(ident).foreach { _ =>
-          u.failAnalysis(s"${ident.quoted} is a temp view. '$cmd' expects a table")
+          throw QueryCompilationErrors.expectTableNotTempViewError(ident.quoted, cmd, u)
         }
         u
       case u @ UnresolvedView(ident, cmd, allowTemp, _) =>
@@ -901,8 +897,8 @@ class Analyzer(override val catalogManager: CatalogManager)
         lookupTempView(ident)
           .map { _ =>
             if (!allowTempView) {
-              u.failAnalysis(
-                s"${ident.quoted} is a temp view. '$cmd' expects a table or permanent view.")
+              throw QueryCompilationErrors.expectTableOrPermanentViewNotTempViewError(
+                ident.quoted, cmd, u)
             }
             ResolvedView(ident.asIdentifier, isTemp = true)
           }
@@ -1062,10 +1058,8 @@ class Analyzer(override val catalogManager: CatalogManager)
           val nestedViewDepth = AnalysisContext.get.nestedViewDepth
           val maxNestedViewDepth = AnalysisContext.get.maxNestedViewDepth
           if (nestedViewDepth > maxNestedViewDepth) {
-            view.failAnalysis(s"The depth of view ${desc.identifier} exceeds the maximum " +
-              s"view resolution depth ($maxNestedViewDepth). Analysis is aborted to " +
-              s"avoid errors. Increase the value of ${SQLConf.MAX_NESTED_VIEW_DEPTH.key} to " +
-              "work around this.")
+            throw QueryCompilationErrors.viewDepthExceedsMaxResolutionDepthError(
+              desc.identifier, maxNestedViewDepth, view)
           }
           SQLConf.withExistingConf(View.effectiveSQLConf(desc.viewSQLConfigs, isTempView)) {
             executeSameContext(child)
@@ -1087,7 +1081,7 @@ class Analyzer(override val catalogManager: CatalogManager)
 
         EliminateSubqueryAliases(relation) match {
           case v: View =>
-            table.failAnalysis(s"Inserting into a view is not allowed. View: ${v.desc.identifier}.")
+            throw QueryCompilationErrors.insertIntoViewNotAllowedError(v.desc.identifier, table)
           case other => i.copy(table = other)
         }
 
@@ -1098,10 +1092,11 @@ class Analyzer(override val catalogManager: CatalogManager)
             lookupRelation(u.multipartIdentifier, u.options, false)
               .map(EliminateSubqueryAliases(_))
               .map {
-                case v: View => write.failAnalysis(
-                  s"Writing into a view is not allowed. View: ${v.desc.identifier}.")
-                case u: UnresolvedCatalogRelation => write.failAnalysis(
-                  "Cannot write into v1 table: " + u.tableMeta.identifier)
+                case v: View => throw QueryCompilationErrors.writeIntoViewNotAllowedError(
+                  v.desc.identifier, write)
+                case u: UnresolvedCatalogRelation =>
+                  throw QueryCompilationErrors.writeIntoV1TableNotAllowedError(
+                    u.tableMeta.identifier, write)
                 case r: DataSourceV2Relation => write.withNewTable(r)
                 case other => throw new IllegalStateException(
                   "[BUG] unexpected plan returned by `lookupRelation`: " + other)
@@ -1115,9 +1110,7 @@ class Analyzer(override val catalogManager: CatalogManager)
 
       case u @ UnresolvedTable(identifier, cmd) =>
         lookupTableOrView(identifier).map {
-          case v: ResolvedView =>
-            val viewStr = if (v.isTemp) "temp view" else "view"
-            u.failAnalysis(s"${v.identifier.quoted} is a $viewStr. '$cmd' expects a table.")
+          case v: ResolvedView => throw QueryCompilationErrors.expectTableNotViewError(v, cmd, u)
           case table => table
         }.getOrElse(u)
 
@@ -1488,8 +1481,7 @@ class Analyzer(override val catalogManager: CatalogManager)
       // If the aggregate function argument contains Stars, expand it.
       case a: Aggregate if containsStar(a.aggregateExpressions) =>
         if (a.groupingExpressions.exists(_.isInstanceOf[UnresolvedOrdinal])) {
-          failAnalysis(
-            "Star (*) is not allowed in select list when GROUP BY ordinal position is used")
+          throw QueryCompilationErrors.starNotAllowedWhenGroupByOrdinalPositionUsedError()
         } else {
           a.copy(aggregateExpressions = buildExpandedProjectList(a.aggregateExpressions, a.child))
         }
@@ -1502,7 +1494,7 @@ class Analyzer(override val catalogManager: CatalogManager)
           }
         )
       case g: Generate if containsStar(g.generator.children) =>
-        failAnalysis("Invalid usage of '*' in explode/json_tuple/UDTF")
+        throw QueryCompilationErrors.invalidStarUsageError("explode/json_tuple/UDTF")
 
       // To resolve duplicate expression IDs for Join and Intersect
       case j @ Join(left, right, _, _, _) if !j.duplicateResolved =>
@@ -1762,7 +1754,7 @@ class Analyzer(override val catalogManager: CatalogManager)
           })
         // count(*) has been replaced by count(1)
         case o if containsStar(o.children) =>
-          failAnalysis(s"Invalid usage of '*' in expression '${o.prettyName}'")
+          throw QueryCompilationErrors.invalidStarUsageError(s"expression '${o.prettyName}'")
       }
     }
   }
@@ -1864,9 +1856,7 @@ class Analyzer(override val catalogManager: CatalogManager)
             if (index > 0 && index <= child.output.size) {
               SortOrder(child.output(index - 1), direction, nullOrdering, Seq.empty)
             } else {
-              s.failAnalysis(
-                s"ORDER BY position $index is not in select list " +
-                  s"(valid range is [1, ${child.output.size}])")
+              throw QueryCompilationErrors.orderByPositionRangeError(index, child.output.size, s)
             }
           case o => o
         }
@@ -1880,9 +1870,7 @@ class Analyzer(override val catalogManager: CatalogManager)
           case u @ UnresolvedOrdinal(index) if index > 0 && index <= aggs.size =>
             aggs(index - 1)
           case ordinal @ UnresolvedOrdinal(index) =>
-            ordinal.failAnalysis(
-              s"GROUP BY position $index is not in select list " +
-                s"(valid range is [1, ${aggs.size}])")
+            throw QueryCompilationErrors.groupByPositionRangeError(index, aggs.size, ordinal)
           case o => o
         }
         Aggregate(newGroups, aggs, child)
@@ -2089,9 +2077,8 @@ class Analyzer(override val catalogManager: CatalogManager)
             withPosition(u) {
               v1SessionCatalog.lookupFunction(name, children) match {
                 case generator: Generator => generator
-                case other =>
-                  failAnalysis(s"$name is expected to be a generator. However, " +
-                    s"its class is ${other.getClass.getCanonicalName}, which is not a generator.")
+                case other => throw QueryCompilationErrors.generatorNotExpectedError(
+                  name, other.getClass.getCanonicalName)
               }
             }
           case u @ UnresolvedFunction(funcId, arguments, isDistinct, filter) =>
@@ -2102,22 +2089,21 @@ class Analyzer(override val catalogManager: CatalogManager)
                 // AggregateExpression.
                 case wf: AggregateWindowFunction =>
                   if (isDistinct || filter.isDefined) {
-                    failAnalysis("DISTINCT or FILTER specified, " +
-                      s"but ${wf.prettyName} is not an aggregate function")
+                    throw QueryCompilationErrors.distinctOrFilterOnlyWithAggregateFunctionError(
+                      wf.prettyName)
                   } else {
                     wf
                   }
                 // We get an aggregate function, we need to wrap it in an AggregateExpression.
                 case agg: AggregateFunction =>
                   if (filter.isDefined && !filter.get.deterministic) {
-                    failAnalysis("FILTER expression is non-deterministic, " +
-                      "it cannot be used in aggregate functions")
+                    throw QueryCompilationErrors.nonDeterministicFilterInAggregateError
                   }
                   AggregateExpression(agg, Complete, isDistinct, filter)
                 // This function is not an aggregate function, just return the resolved one.
                 case other if (isDistinct || filter.isDefined) =>
-                  failAnalysis("DISTINCT or FILTER specified, " +
-                    s"but ${other.prettyName} is not an aggregate function")
+                  throw QueryCompilationErrors.distinctOrFilterOnlyWithAggregateFunctionError(
+                    other.prettyName)
                 case e: String2TrimExpression if arguments.size == 2 =>
                   if (trimWarningEnabled.get) {
                     log.warn("Two-parameter TRIM/LTRIM/RTRIM function signatures are deprecated." +
@@ -2256,9 +2242,8 @@ class Analyzer(override val catalogManager: CatalogManager)
         // Checks if the number of the aliases equals to the number of output columns
         // in the subquery.
         if (columnNames.size != outputAttrs.size) {
-          u.failAnalysis("Number of column aliases does not match number of columns. " +
-            s"Number of column aliases: ${columnNames.size}; " +
-            s"number of columns: ${outputAttrs.size}.")
+          throw QueryCompilationErrors.aliasNumberNotMatchColumnNumberError(
+            columnNames.size, outputAttrs.size, u)
         }
         val aliases = outputAttrs.zip(columnNames).map { case (attr, aliasName) =>
           Alias(attr, aliasName)()
@@ -2649,10 +2634,8 @@ class Analyzer(override val catalogManager: CatalogManager)
       } else if (names.isEmpty) {
         elementAttrs
       } else {
-        failAnalysis(
-          "The number of aliases supplied in the AS clause does not match the number of columns " +
-          s"output by the UDTF expected ${elementAttrs.size} aliases but got " +
-          s"${names.mkString(",")} ")
+        throw QueryCompilationErrors.aliasesNumberNotMatchUDTFOutputError(
+          elementAttrs.size, names.mkString(","))
       }
     }
   }
@@ -2761,8 +2744,7 @@ class Analyzer(override val catalogManager: CatalogManager)
             wsc.copy(partitionSpec = newPartitionSpec, orderSpec = newOrderSpec)
 
           case WindowExpression(ae: AggregateExpression, _) if ae.filter.isDefined =>
-            failAnalysis(
-              "window aggregate function with filter predicate is not supported yet.")
+            throw QueryCompilationErrors.windowAggregateFunctionWithFilterNotSupportedError
 
           // Extract Windowed AggregateExpression
           case we @ WindowExpression(
@@ -2775,8 +2757,7 @@ class Analyzer(override val catalogManager: CatalogManager)
             WindowExpression(newAgg, spec)
 
           case AggregateExpression(aggFunc, _, _, _, _) if hasWindowFunction(aggFunc.children) =>
-            failAnalysis("It is not allowed to use a window function inside an aggregate " +
-              "function. Please use the inner window function in a sub-query.")
+            throw QueryCompilationErrors.windowFunctionInsideAggregateFunctionNotAllowedError
 
           // Extracts AggregateExpression. For example, for SUM(x) - Sum(y) OVER (...),
           // we need to extract SUM(x).
@@ -2840,12 +2821,12 @@ class Analyzer(override val catalogManager: CatalogManager)
         // We do a final check and see if we only have a single Window Spec defined in an
         // expressions.
         if (distinctWindowSpec.isEmpty) {
-          failAnalysis(s"$expr does not have any WindowExpression.")
+          throw QueryCompilationErrors.expressionWithoutWindowExpressionError(expr)
         } else if (distinctWindowSpec.length > 1) {
           // newExpressionsWithWindowFunctions only have expressions with a single
           // WindowExpression. If we reach here, we have a bug.
-          failAnalysis(s"$expr has multiple Window Specifications ($distinctWindowSpec)." +
-            s"Please file a bug report with this error message, stack trace, and the query.")
+          throw QueryCompilationErrors.expressionWithMultiWindowExpressionsError(
+            expr, distinctWindowSpec)
         } else {
           val spec = distinctWindowSpec.head
           val specKey = (spec.partitionSpec, spec.orderSpec, WindowFunctionType.functionType(expr))
@@ -2873,10 +2854,10 @@ class Analyzer(override val catalogManager: CatalogManager)
     def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperatorsDown {
 
       case Filter(condition, _) if hasWindowFunction(condition) =>
-        failAnalysis("It is not allowed to use window functions inside WHERE clause")
+        throw QueryCompilationErrors.windowFunctionNotAllowedError("WHERE")
 
       case UnresolvedHaving(condition, _) if hasWindowFunction(condition) =>
-        failAnalysis("It is not allowed to use window functions inside HAVING clause")
+        throw QueryCompilationErrors.windowFunctionNotAllowedError("HAVING")
 
       // Aggregate with Having clause. This rule works with an unresolved Aggregate because
       // a resolved Aggregate will not have Window Functions.
@@ -3076,10 +3057,10 @@ class Analyzer(override val catalogManager: CatalogManager)
     def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions {
       case WindowExpression(wf: FrameLessOffsetWindowFunction,
         WindowSpecDefinition(_, _, f: SpecifiedWindowFrame)) if wf.frame != f =>
-        failAnalysis(s"Cannot specify window frame for ${wf.prettyName} function")
+        throw QueryCompilationErrors.cannotSpecifyWindowFrameError(wf.prettyName)
       case WindowExpression(wf: WindowFunction, WindowSpecDefinition(_, _, f: SpecifiedWindowFrame))
           if wf.frame != UnspecifiedFrame && wf.frame != f =>
-        failAnalysis(s"Window Frame $f must match the required frame ${wf.frame}")
+        throw QueryCompilationErrors.windowFrameNotMatchRequiredFrameError(f, wf.frame)
       case WindowExpression(wf: WindowFunction, s @ WindowSpecDefinition(_, _, UnspecifiedFrame))
           if wf.frame != UnspecifiedFrame =>
         WindowExpression(wf, s.copy(frameSpecification = wf.frame))
@@ -3100,9 +3081,7 @@ class Analyzer(override val catalogManager: CatalogManager)
   object ResolveWindowOrder extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan resolveExpressions {
       case WindowExpression(wf: WindowFunction, spec) if spec.orderSpec.isEmpty =>
-        failAnalysis(s"Window function $wf requires window to be ordered, please add ORDER BY " +
-          s"clause. For example SELECT $wf(value_expr) OVER (PARTITION BY window_partition " +
-          s"ORDER BY window_ordering) from table")
+        throw QueryCompilationErrors.windowFunctionWithWindowFrameNotOrderedError(wf)
       case WindowExpression(rank: RankLike, spec) if spec.resolved =>
         val order = spec.orderSpec.map(_.child)
         WindowExpression(rank.withOrder(order), spec)
@@ -3169,7 +3148,8 @@ class Analyzer(override val catalogManager: CatalogManager)
 
       i.userSpecifiedCols.map { col =>
           i.table.resolve(Seq(col), resolver)
-            .getOrElse(i.table.failAnalysis(s"Cannot resolve column name $col"))
+            .getOrElse(throw QueryCompilationErrors.cannotResolveUserSpecifiedColumnsError(
+              col, i.table))
       }
     }
 
@@ -3178,9 +3158,8 @@ class Analyzer(override val catalogManager: CatalogManager)
         cols: Seq[NamedExpression],
         query: LogicalPlan): LogicalPlan = {
       if (cols.size != query.output.size) {
-        query.failAnalysis(
-          s"Cannot write to table due to mismatched user specified column size(${cols.size}) and" +
-            s" data column size(${query.output.size})")
+        throw QueryCompilationErrors.writeTableWithMismatchedColumnsError(
+          cols.size, query.output.size, query)
       }
       val nameToQueryExpr = cols.zip(query.output).toMap
       // Static partition columns in the table output should not appear in the column list
@@ -3760,8 +3739,7 @@ object TimeWindowing extends Rule[LogicalPlan] {
           renamedPlan.withNewChildren(substitutedPlan :: Nil)
         }
       } else if (numWindowExpr > 1) {
-        p.failAnalysis("Multiple time window expressions would result in a cartesian product " +
-          "of rows, therefore they are currently not supported.")
+        throw QueryCompilationErrors.multiTimeWindowExpressionsNotSupportedError(p)
       } else {
         p // Return unchanged. Analyzer will throw exception later
       }

From 1554977670ffa452242b1433f0bff44c88c35722 Mon Sep 17 00:00:00 2001
From: Linhong Liu <linhong.liu@databricks.com>
Date: Thu, 10 Dec 2020 09:14:07 +0000
Subject: [PATCH 0724/1009] [SPARK-33692][SQL] View should use captured catalog
 and namespace to lookup function

### What changes were proposed in this pull request?
Using the view captured catalog and namespace to lookup function, so the view
referred functions won't be overridden by newly created function with the same name,
but different database or function type (i.e. temporary function)

### Why are the changes needed?
bug fix, without this PR, changing database or create a temporary function with
the same name may cause failure when querying a view.

### Does this PR introduce _any_ user-facing change?
Yes, bug fix.

### How was this patch tested?
newly added and existing test cases.

Closes #30662 from linhongliu-db/SPARK-33692.

Lead-authored-by: Linhong Liu <linhong.liu@databricks.com>
Co-authored-by: Linhong Liu <67896261+linhongliu-db@users.noreply.github.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  6 +++--
 .../sql/catalyst/catalog/SessionCatalog.scala | 26 +++++++++++++++---
 .../sql/execution/SQLViewTestSuite.scala      | 27 +++++++++++++++++++
 3 files changed, 54 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 0d719b1f53365..74edd65fd0479 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -114,7 +114,8 @@ case class AnalysisContext(
     nestedViewDepth: Int = 0,
     maxNestedViewDepth: Int = -1,
     relationCache: mutable.Map[Seq[String], LogicalPlan] = mutable.Map.empty,
-    referredTempViewNames: Seq[Seq[String]] = Seq.empty)
+    referredTempViewNames: Seq[Seq[String]] = Seq.empty,
+    referredTempFunctionNames: Seq[String] = Seq.empty)
 
 object AnalysisContext {
   private val value = new ThreadLocal[AnalysisContext]() {
@@ -139,7 +140,8 @@ object AnalysisContext {
       originContext.nestedViewDepth + 1,
       maxNestedViewDepth,
       originContext.relationCache,
-      viewDesc.viewReferredTempViewNames)
+      viewDesc.viewReferredTempViewNames,
+      viewDesc.viewReferredTempFunctionNames)
     set(context)
     try f finally { set(originContext) }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 4c32870abe621..7c805bdb4b6f1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -1484,16 +1484,36 @@ class SessionCatalog(
   def lookupFunction(
       name: FunctionIdentifier,
       children: Seq[Expression]): Expression = synchronized {
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
     // Note: the implementation of this function is a little bit convoluted.
     // We probably shouldn't use a single FunctionRegistry to register all three kinds of functions
     // (built-in, temp, and external).
     if (name.database.isEmpty && functionRegistry.functionExists(name)) {
-      // This function has been already loaded into the function registry.
-      return functionRegistry.lookupFunction(name, children)
+      val referredTempFunctionNames = AnalysisContext.get.referredTempFunctionNames
+      val isResolvingView = AnalysisContext.get.catalogAndNamespace.nonEmpty
+      // Lookup the function as a temporary or a built-in function (i.e. without database) and
+      // 1. if we are not resolving view, we don't care about the function type and just return it.
+      // 2. if we are resolving view, only return a temp function if it's referred by this view.
+      if (!isResolvingView ||
+          !isTemporaryFunction(name) ||
+          referredTempFunctionNames.contains(name.funcName)) {
+        // This function has been already loaded into the function registry.
+        return functionRegistry.lookupFunction(name, children)
+      }
+    }
+
+    // Get the database from AnalysisContext if it's defined, otherwise, use current database
+    val currentDatabase = AnalysisContext.get.catalogAndNamespace match {
+      case Seq() => getCurrentDatabase
+      case Seq(_, db) => db
+      case Seq(catalog, namespace @ _*) =>
+        throw new AnalysisException(
+          s"V2 catalog does not support functions yet. " +
+            s"catalog: ${catalog}, namespace: '${namespace.quoted}'")
     }
 
     // If the name itself is not qualified, add the current database to it.
-    val database = formatDatabaseName(name.database.getOrElse(getCurrentDatabase))
+    val database = formatDatabaseName(name.database.getOrElse(currentDatabase))
     val qualifiedName = name.copy(database = Some(database))
 
     if (functionRegistry.functionExists(qualifiedName)) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
index f6172e3b65050..3a7a63ed45ce3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
@@ -199,6 +199,33 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils {
       }
     }
   }
+
+  test("SPARK-33692: view should use captured catalog and namespace to lookup function") {
+    val avgFuncClass = "test.org.apache.spark.sql.MyDoubleAvg"
+    val sumFuncClass = "test.org.apache.spark.sql.MyDoubleSum"
+    val functionName = "test_udf"
+    withTempDatabase { dbName =>
+      withUserDefinedFunction(
+        s"default.$functionName" -> false,
+        s"$dbName.$functionName" -> false,
+        functionName -> true) {
+        // create a function in default database
+        sql("USE DEFAULT")
+        sql(s"CREATE FUNCTION $functionName AS '$avgFuncClass'")
+        // create a view using a function in 'default' database
+        val viewName = createView("v1", s"SELECT $functionName(col1) FROM VALUES (1), (2), (3)")
+        // create function in another database with the same function name
+        sql(s"USE $dbName")
+        sql(s"CREATE FUNCTION $functionName AS '$sumFuncClass'")
+        // create temporary function with the same function name
+        sql(s"CREATE TEMPORARY FUNCTION $functionName AS '$sumFuncClass'")
+        withView(viewName) {
+          // view v1 should still using function defined in `default` database
+          checkViewOutput(viewName, Seq(Row(102.0)))
+        }
+      }
+    }
+  }
 }
 
 class LocalTempViewTestSuite extends SQLViewTestSuite with SharedSparkSession {

From 31e0baca30f21f71353a27b827c2acd0e25bd9d8 Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Thu, 10 Dec 2020 16:32:24 -0800
Subject: [PATCH 0725/1009] [SPARK-33740][SQL] hadoop configs in hive-site.xml
 can overrides pre-existing hadoop ones

### What changes were proposed in this pull request?
org.apache.hadoop.conf.Configuration#setIfUnset will ignore those with defaults too

### Why are the changes needed?

fix a regression

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

new tests

Closes #30709 from yaooqinn/SPARK-33740.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../org/apache/spark/sql/internal/SharedState.scala | 13 ++++++++++---
 sql/core/src/test/resources/hive-site.xml           |  5 +++++
 .../spark/sql/internal/SharedStateSuite.scala       | 11 +++++++++++
 3 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index ea430db9f030f..fd34077aba963 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -228,14 +228,21 @@ object SharedState extends Logging {
       sparkConf: SparkConf,
       hadoopConf: Configuration,
       initialConfigs: scala.collection.Map[String, String] = Map.empty): Unit = {
+
+    def containsInSparkConf(key: String): Boolean = {
+      sparkConf.contains(key) || sparkConf.contains("spark.hadoop." + key) ||
+        (key.startsWith("hive") && sparkConf.contains("spark." + key))
+    }
+
     val hiveWarehouseKey = "hive.metastore.warehouse.dir"
-    val configFile = Utils.getContextOrSparkClassLoader.getResource("hive-site.xml")
+    val configFile = Utils.getContextOrSparkClassLoader.getResourceAsStream("hive-site.xml")
     if (configFile != null) {
       logInfo(s"loading hive config file: $configFile")
       val hadoopConfTemp = new Configuration()
+      hadoopConfTemp.clear()
       hadoopConfTemp.addResource(configFile)
-      hadoopConfTemp.asScala.foreach { entry =>
-        hadoopConf.setIfUnset(entry.getKey, entry.getValue)
+      for (entry <- hadoopConfTemp.asScala if !containsInSparkConf(entry.getKey)) {
+        hadoopConf.set(entry.getKey, entry.getValue)
       }
     }
     val sparkWarehouseOption =
diff --git a/sql/core/src/test/resources/hive-site.xml b/sql/core/src/test/resources/hive-site.xml
index 17297b3e22a7e..4bf6189b73ca9 100644
--- a/sql/core/src/test/resources/hive-site.xml
+++ b/sql/core/src/test/resources/hive-site.xml
@@ -23,4 +23,9 @@
       <value>true</value>
       <description>Internal marker for test.</description>
   </property>
+  <property>
+    <name>hadoop.tmp.dir</name>
+    <value>/tmp/hive_one</value>
+    <description>default is /tmp/hadoop-${user.name} and will be overridden </description>
+  </property>
 </configuration>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SharedStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SharedStateSuite.scala
index 81bf15342423c..60a899b89e731 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SharedStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SharedStateSuite.scala
@@ -52,4 +52,15 @@ class SharedStateSuite extends SharedSparkSession {
     assert(conf.isInstanceOf[Configuration])
     assert(conf.asInstanceOf[Configuration].get("fs.defaultFS") == "file:///")
   }
+
+  test("SPARK-33740: hadoop configs in hive-site.xml can overrides pre-existing hadoop ones") {
+    val conf = new SparkConf()
+    val hadoopConf = new Configuration()
+    SharedState.loadHiveConfFile(conf, hadoopConf, Map.empty)
+    assert(hadoopConf.get("hadoop.tmp.dir") === "/tmp/hive_one")
+    hadoopConf.clear()
+    SharedState.loadHiveConfFile(
+      conf.set("spark.hadoop.hadoop.tmp.dir", "noop"), hadoopConf, Map.empty)
+    assert(hadoopConf.get("hadoop.tmp.dir") === null)
+  }
 }

From fab2995972761503563fa2aa547c67047c51bd33 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Thu, 10 Dec 2020 17:49:56 -0800
Subject: [PATCH 0726/1009] [SPARK-33742][SQL] Throw
 PartitionsAlreadyExistException from HiveExternalCatalog.createPartitions()

### What changes were proposed in this pull request?
Throw `PartitionsAlreadyExistException` from `createPartitions()` in Hive external catalog when a partition exists. Currently, `HiveExternalCatalog.createPartitions()` throws `AlreadyExistsException` wrapped by `AnalysisException`.

In the PR, I propose to catch `AlreadyExistsException` in `HiveClientImpl` and replace it by `PartitionsAlreadyExistException`.

### Why are the changes needed?
The behaviour of Hive external catalog deviates from V1/V2 in-memory catalogs that throw `PartitionsAlreadyExistException`. To improve user experience with Spark SQL, it would be better to throw the same exception.

### Does this PR introduce _any_ user-facing change?
Yes

### How was this patch tested?
By running existing test suites:
```
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableAddPartitionSuite"
```

Closes #30711 from MaxGekk/hive-partition-exception.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../AlterTableAddPartitionSuiteBase.scala     | 18 +++++++++++++++
 .../v1/AlterTableAddPartitionSuite.scala      | 20 +---------------
 .../v2/AlterTableAddPartitionSuite.scala      | 19 +--------------
 .../sql/hive/client/HiveClientImpl.scala      | 15 ++++++++++--
 .../spark/sql/hive/client/VersionsSuite.scala | 23 ++++++++++++++++++-
 .../command/AlterTableAddPartitionSuite.scala | 18 ---------------
 6 files changed, 55 insertions(+), 58 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
index 0cf0b395f139b..9d2c58b7e4351 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
@@ -21,6 +21,7 @@ import org.scalactic.source.Position
 import org.scalatest.Tag
 
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.catalyst.analysis.PartitionsAlreadyExistException
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.execution.datasources.PartitioningUtils
 import org.apache.spark.sql.internal.SQLConf
@@ -184,4 +185,21 @@ trait AlterTableAddPartitionSuiteBase extends QueryTest with SQLTestUtils {
         "The spec (part0) must match the partition spec (part0, part1)"))
     }
   }
+
+  test("partition already exists") {
+    withNsTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
+      sql(s"ALTER TABLE $t ADD PARTITION (id=2) LOCATION 'loc1'")
+
+      val errMsg = intercept[PartitionsAlreadyExistException] {
+        sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'" +
+          " PARTITION (id=2) LOCATION 'loc1'")
+      }.getMessage
+      assert(errMsg.contains("The following partitions already exists"))
+
+      sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (id=1) LOCATION 'loc'" +
+        " PARTITION (id=2) LOCATION 'loc1'")
+      checkPartitions(t, Map("id" -> "1"), Map("id" -> "2"))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
index 295ce1d3da13f..b29564e1d81b6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.execution.command.v1
 
-import org.apache.spark.sql.catalyst.analysis.PartitionsAlreadyExistException
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.execution.command
@@ -44,21 +43,4 @@ trait AlterTableAddPartitionSuiteBase extends command.AlterTableAddPartitionSuit
   }
 }
 
-class AlterTableAddPartitionSuite extends AlterTableAddPartitionSuiteBase with SharedSparkSession {
-  test("partition already exists") {
-    withNsTable("ns", "tbl") { t =>
-      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
-      sql(s"ALTER TABLE $t ADD PARTITION (id=2) LOCATION 'loc1'")
-
-      val errMsg = intercept[PartitionsAlreadyExistException] {
-        sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'" +
-          " PARTITION (id=2) LOCATION 'loc1'")
-      }.getMessage
-      assert(errMsg.contains("The following partitions already exists"))
-
-      sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (id=1) LOCATION 'loc'" +
-        " PARTITION (id=2) LOCATION 'loc1'")
-      checkPartitions(t, Map("id" -> "1"), Map("id" -> "2"))
-    }
-  }
-}
+class AlterTableAddPartitionSuite extends AlterTableAddPartitionSuiteBase with SharedSparkSession
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala
index b15235d17671a..09921c8d8a5eb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.command.v2
 
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis.{PartitionsAlreadyExistException, ResolvePartitionSpec}
+import org.apache.spark.sql.catalyst.analysis.ResolvePartitionSpec
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.connector.{InMemoryPartitionTable, InMemoryPartitionTableCatalog, InMemoryTableCatalog}
 import org.apache.spark.sql.connector.catalog.{CatalogV2Implicits, Identifier}
@@ -60,23 +60,6 @@ class AlterTableAddPartitionSuite
     assert(partMetadata.get("location") === expected)
   }
 
-  test("partition already exists") {
-    withNsTable("ns", "tbl") { t =>
-      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
-      sql(s"ALTER TABLE $t ADD PARTITION (id=2) LOCATION 'loc1'")
-
-      val errMsg = intercept[PartitionsAlreadyExistException] {
-        sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'" +
-          " PARTITION (id=2) LOCATION 'loc1'")
-      }.getMessage
-      assert(errMsg.contains("The following partitions already exists"))
-
-      sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (id=1) LOCATION 'loc'" +
-        " PARTITION (id=2) LOCATION 'loc1'")
-      checkPartitions(t, Map("id" -> "1"), Map("id" -> "2"))
-    }
-  }
-
   test("SPARK-33650: add partition into a table which doesn't support partition management") {
     withNsTable("ns", "tbl", s"non_part_$catalog") { t =>
       sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing")
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index b4ebf153fc178..0b19e5e6e8c84 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.hive.client
 
 import java.io.{File, PrintStream}
 import java.lang.{Iterable => JIterable}
+import java.lang.reflect.InvocationTargetException
 import java.nio.charset.StandardCharsets.UTF_8
 import java.util.{Locale, Map => JMap}
 import java.util.concurrent.TimeUnit._
@@ -48,7 +49,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.metrics.source.HiveCatalogMetrics
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchPartitionException}
+import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchPartitionException, PartitionsAlreadyExistException}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.Expression
@@ -598,7 +599,17 @@ private[hive] class HiveClientImpl(
       table: String,
       parts: Seq[CatalogTablePartition],
       ignoreIfExists: Boolean): Unit = withHiveState {
-    shim.createPartitions(client, db, table, parts, ignoreIfExists)
+    def replaceExistException(e: Throwable): Unit = e match {
+      case _: HiveException if e.getCause.isInstanceOf[AlreadyExistsException] =>
+        throw new PartitionsAlreadyExistException(db, table, parts.map(_.spec))
+      case _ => throw e
+    }
+    try {
+      shim.createPartitions(client, db, table, parts, ignoreIfExists)
+    } catch {
+      case e: InvocationTargetException => replaceExistException(e.getCause)
+      case e: Throwable => replaceExistException(e)
+    }
   }
 
   override def dropPartitions(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index 684529aa330a7..b5500eaf47158 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -33,7 +33,7 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchPermanentFunctionException}
+import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchPermanentFunctionException, PartitionsAlreadyExistException}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Literal}
 import org.apache.spark.sql.catalyst.util.quietly
@@ -594,6 +594,27 @@ class VersionsSuite extends SparkFunSuite with Logging {
       assert(client.getPartitionOption("default", "src_part", spec).isEmpty)
     }
 
+    test(s"$version: createPartitions if already exists") {
+      val partitions = Seq(CatalogTablePartition(
+        Map("key1" -> "101", "key2" -> "102"),
+        storageFormat))
+      try {
+        client.createPartitions("default", "src_part", partitions, ignoreIfExists = false)
+        val errMsg = intercept[PartitionsAlreadyExistException] {
+          client.createPartitions("default", "src_part", partitions, ignoreIfExists = false)
+        }.getMessage
+        assert(errMsg.contains("partitions already exists"))
+      } finally {
+        client.dropPartitions(
+          "default",
+          "src_part",
+          partitions.map(_.spec),
+          ignoreIfNotExists = true,
+          purge = false,
+          retainData = false)
+      }
+    }
+
     ///////////////////////////////////////////////////////////////////////////
     // Function related API
     ///////////////////////////////////////////////////////////////////////////
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala
index ef0ec8d9bd69f..73776c3ef79fa 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.hive.execution.command
 
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.execution.command.v1
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 
@@ -26,21 +25,4 @@ class AlterTableAddPartitionSuite
     with TestHiveSingleton {
   override def version: String = "Hive V1"
   override def defaultUsing: String = "USING HIVE"
-
-  test("partition already exists") {
-    withNsTable("ns", "tbl") { t =>
-      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
-      sql(s"ALTER TABLE $t ADD PARTITION (id=2) LOCATION 'loc1'")
-
-      val errMsg = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'" +
-          " PARTITION (id=2) LOCATION 'loc1'")
-      }.getMessage
-      assert(errMsg.contains("already exists"))
-
-      sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (id=1) LOCATION 'loc'" +
-        " PARTITION (id=2) LOCATION 'loc1'")
-      checkPartitions(t, Map("id" -> "1"), Map("id" -> "2"))
-    }
-  }
 }

From 1ba1732beb8e01edfc4f658d9da4eaabf68ed7cf Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 10 Dec 2020 19:15:01 -0800
Subject: [PATCH 0727/1009] [SPARK-33295][BUILD] Upgrade ORC to 1.6.6

### What changes were proposed in this pull request?

This PR aims to upgrade Apache ORC to 1.6.6 for Apache Spark 3.2.0.

### Why are the changes needed?

This brings the latest bug fixes and features.
Apache Iceberg is already using Apache ORC 1.6.6.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

Closes #30715 from dongjoon-hyun/SPARK-33295.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 9 +++++----
 dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 9 +++++----
 pom.xml                                 | 2 +-
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index b731c643aabe7..3f1199478bc67 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -4,8 +4,9 @@ JTransforms/3.1//JTransforms-3.1.jar
 RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar
 ST4/4.0.4//ST4-4.0.4.jar
 activation/1.1.1//activation-1.1.1.jar
-aircompressor/0.10//aircompressor-0.10.jar
+aircompressor/0.16//aircompressor-0.16.jar
 algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar
+annotations/17.0.0//annotations-17.0.0.jar
 antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
 antlr4-runtime/4.8-1//antlr4-runtime-4.8-1.jar
 aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar
@@ -195,9 +196,9 @@ objenesis/2.6//objenesis-2.6.jar
 okhttp/3.12.12//okhttp-3.12.12.jar
 okio/1.14.0//okio-1.14.0.jar
 opencsv/2.3//opencsv-2.3.jar
-orc-core/1.5.12//orc-core-1.5.12.jar
-orc-mapreduce/1.5.12//orc-mapreduce-1.5.12.jar
-orc-shims/1.5.12//orc-shims-1.5.12.jar
+orc-core/1.6.6//orc-core-1.6.6.jar
+orc-mapreduce/1.6.6//orc-mapreduce-1.6.6.jar
+orc-shims/1.6.6//orc-shims-1.6.6.jar
 oro/2.0.8//oro-2.0.8.jar
 osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
 paranamer/2.8//paranamer-2.8.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index 84b44342280a5..d16235339897e 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -5,8 +5,9 @@ RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar
 ST4/4.0.4//ST4-4.0.4.jar
 accessors-smart/1.2//accessors-smart-1.2.jar
 activation/1.1.1//activation-1.1.1.jar
-aircompressor/0.10//aircompressor-0.10.jar
+aircompressor/0.16//aircompressor-0.16.jar
 algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar
+annotations/17.0.0//annotations-17.0.0.jar
 antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
 antlr4-runtime/4.8-1//antlr4-runtime-4.8-1.jar
 aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar
@@ -209,9 +210,9 @@ okhttp/2.7.5//okhttp-2.7.5.jar
 okhttp/3.12.12//okhttp-3.12.12.jar
 okio/1.14.0//okio-1.14.0.jar
 opencsv/2.3//opencsv-2.3.jar
-orc-core/1.5.12//orc-core-1.5.12.jar
-orc-mapreduce/1.5.12//orc-mapreduce-1.5.12.jar
-orc-shims/1.5.12//orc-shims-1.5.12.jar
+orc-core/1.6.6//orc-core-1.6.6.jar
+orc-mapreduce/1.6.6//orc-mapreduce-1.6.6.jar
+orc-shims/1.6.6//orc-shims-1.6.6.jar
 oro/2.0.8//oro-2.0.8.jar
 osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
 paranamer/2.8//paranamer-2.8.jar
diff --git a/pom.xml b/pom.xml
index f449bf7928ecc..8aaa4a504ef0c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -136,7 +136,7 @@
     <kafka.version>2.6.0</kafka.version>
     <derby.version>10.12.1.1</derby.version>
     <parquet.version>1.10.1</parquet.version>
-    <orc.version>1.5.12</orc.version>
+    <orc.version>1.6.6</orc.version>
     <jetty.version>9.4.28.v20200408</jetty.version>
     <javaxservlet.version>3.1.0</javaxservlet.version>
     <chill.version>0.9.5</chill.version>

From cd7a30641f25f99452b7eb46ee2b3c5d59b2c542 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Fri, 11 Dec 2020 14:15:56 +0900
Subject: [PATCH 0728/1009] [SPARK-33749][BUILD][PYTHON] Exclude target
 directory in pycodestyle and flake8

### What changes were proposed in this pull request?

Once you build and ran K8S tests, Python lint fails as below:

```bash
$ ./dev/lint-python
```

Before this PR:

```
starting python compilation test...
python compilation succeeded.

downloading pycodestyle from https://raw.githubusercontent.com/PyCQA/pycodestyle/2.6.0/pycodestyle.py...
starting pycodestyle test...
pycodestyle checks failed:
./resource-managers/kubernetes/integration-tests/target/spark-dist-unpacked/python/pyspark/cloudpickle/cloudpickle.py:15:101: E501 line too long (105 > 100 characters)
./resource-managers/kubernetes/integration-tests/target/spark-dist-unpacked/python/docs/source/conf.py:60:101: E501 line too long (124 > 100 characters)
...
```

After this PR:

```
starting python compilation test...
python compilation succeeded.

downloading pycodestyle from https://raw.githubusercontent.com/PyCQA/pycodestyle/2.6.0/pycodestyle.py...
starting pycodestyle test...
pycodestyle checks passed.

starting flake8 test...
flake8 checks passed.

starting mypy test...
mypy checks passed.

starting sphinx-build tests...
sphinx-build checks passed.
```

This PR excludes target directory to avoid such cases in the future.

### Why are the changes needed?

To make it easier to run linters

### Does this PR introduce _any_ user-facing change?

No, dev-only.

### How was this patch tested?

Manually tested va running `./dev/lint-python`.

Closes #30718 from HyukjinKwon/SPARK-33749.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 dev/tox.ini | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dev/tox.ini b/dev/tox.ini
index 43cd5877dfdb8..68e875f4c54ed 100644
--- a/dev/tox.ini
+++ b/dev/tox.ini
@@ -16,9 +16,9 @@
 [pycodestyle]
 ignore=E226,E241,E305,E402,E722,E731,E741,W503,W504
 max-line-length=100
-exclude=python/pyspark/cloudpickle/*.py,shared.py,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/*
+exclude=*/target/*,python/pyspark/cloudpickle/*.py,shared.py,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/*
 
 [flake8]
 select = E901,E999,F821,F822,F823,F401,F405,B006
-exclude = python/pyspark/cloudpickle/*.py,shared.py*,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/*,python/out,python/pyspark/sql/pandas/functions.pyi,python/pyspark/sql/column.pyi,python/pyspark/worker.pyi,python/pyspark/java_gateway.pyi
+exclude = */target/*,python/pyspark/cloudpickle/*.py,shared.py*,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/*,python/out,python/pyspark/sql/pandas/functions.pyi,python/pyspark/sql/column.pyi,python/pyspark/worker.pyi,python/pyspark/java_gateway.pyi
 max-line-length = 100

From 7895ea1f50700b56930b3841f16c44442d26e719 Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Fri, 11 Dec 2020 14:41:15 +0900
Subject: [PATCH 0729/1009] [SPARK-32910][SS] Remove UninterruptibleThread
 usage from KafkaOffsetReaderAdmin

### What changes were proposed in this pull request?
The Kafka offset reader which uses `AdminClient` still uses `UninterruptibleThread` to call it. Since there is no evidence that `AdminClient` suffers from similar issues like [KAFKA-1894](https://issues.apache.org/jira/browse/KAFKA-1894) I'm removing `UninterruptibleThread` usage. In order to put the `AdminClient` under stress and make sure it works I've created the following standalone application: https://github.com/gaborgsomogyi/kafka-admin-interruption

What this PR contains:
* Removed `UninterruptibleThread` from `KafkaOffsetReaderAdmin`
* Removed/modified comments which are not true
* Adapted `KafkaRelationSuite`
* Renamed `partitionsAssignedToConsumer` to `partitionsAssignedToAdmin`

### Why are the changes needed?
`KafkaOffsetReaderAdmin` doesn't need `UninterruptibleThread` usage.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Existing unit tests + manually with simple Kafka to Kafka query.

Closes #30668 from gaborgsomogyi/SPARK-32910.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
---
 .../sql/kafka010/KafkaOffsetReaderAdmin.scala | 81 ++++++-------------
 .../sql/kafka010/KafkaRelationSuite.scala     | 23 ++----
 2 files changed, 30 insertions(+), 74 deletions(-)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala
index d5905795c626b..f9a714c37cb9e 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala
@@ -33,7 +33,6 @@ import org.apache.spark.SparkEnv
 import org.apache.spark.internal.Logging
 import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-import org.apache.spark.util.{UninterruptibleThread, UninterruptibleThreadRunner}
 
 /**
  * This class uses Kafka's own [[Admin]] API to read data offsets from Kafka.
@@ -58,13 +57,6 @@ private[kafka010] class KafkaOffsetReaderAdmin(
   private[kafka010] val offsetFetchAttemptIntervalMs =
     readerOptions.getOrElse(KafkaSourceProvider.FETCH_OFFSET_RETRY_INTERVAL_MS, "1000").toLong
 
-  /**
-   * [[UninterruptibleThreadRunner]] ensures that all [[Admin]] communication called in an
-   * [[UninterruptibleThread]]. In the case of streaming queries, we are already running in an
-   * [[UninterruptibleThread]], however for batch mode this is not the case.
-   */
-  val uninterruptibleThreadRunner = new UninterruptibleThreadRunner("Kafka Offset Reader")
-
   /**
    * An AdminClient used in the driver to query the latest Kafka offsets.
    * This only queries the offsets because AdminClient has no functionality to commit offsets like
@@ -73,7 +65,6 @@ private[kafka010] class KafkaOffsetReaderAdmin(
   @volatile protected var _admin: Admin = null
 
   protected def admin: Admin = synchronized {
-    assert(Thread.currentThread().isInstanceOf[UninterruptibleThread])
     if (_admin == null) {
       _admin = consumerStrategy.createAdmin(driverKafkaParams)
     }
@@ -121,8 +112,7 @@ private[kafka010] class KafkaOffsetReaderAdmin(
    * Closes the connection to Kafka, and cleans up state.
    */
   override def close(): Unit = {
-    if (_admin != null) uninterruptibleThreadRunner.runUninterruptibly { stopAdmin() }
-    uninterruptibleThreadRunner.shutdown()
+    stopAdmin()
   }
 
   /**
@@ -141,9 +131,7 @@ private[kafka010] class KafkaOffsetReaderAdmin(
       logDebug(s"Assigned partitions: $partitions. Seeking to $partitionOffsets")
       partitionOffsets
     }
-    val partitions = uninterruptibleThreadRunner.runUninterruptibly {
-      consumerStrategy.assignedTopicPartitions(admin)
-    }
+    val partitions = consumerStrategy.assignedTopicPartitions(admin)
     // Obtain TopicPartition offsets with late binding support
     offsetRangeLimit match {
       case EarliestOffsetRangeLimit => partitions.map {
@@ -224,7 +212,7 @@ private[kafka010] class KafkaOffsetReaderAdmin(
       fnAssertParametersWithPartitions: ju.Set[TopicPartition] => Unit,
       fnRetrievePartitionOffsets: ju.Set[TopicPartition] => Map[TopicPartition, Long]
     ): KafkaSourceOffset = {
-    val fetched = partitionsAssignedToConsumer {
+    val fetched = partitionsAssignedToAdmin {
       partitions => {
         fnAssertParametersWithPartitions(partitions)
 
@@ -262,7 +250,7 @@ private[kafka010] class KafkaOffsetReaderAdmin(
    * Fetch the earliest offsets for the topic partitions that are indicated
    * in the [[ConsumerStrategy]].
    */
-  override def fetchEarliestOffsets(): Map[TopicPartition, Long] = partitionsAssignedToConsumer(
+  override def fetchEarliestOffsets(): Map[TopicPartition, Long] = partitionsAssignedToAdmin(
     partitions => {
       val listOffsetsParams = partitions.asScala.map(p => p -> OffsetSpec.earliest()).toMap.asJava
       val partitionOffsets = listOffsets(admin, listOffsetsParams)
@@ -274,19 +262,16 @@ private[kafka010] class KafkaOffsetReaderAdmin(
    * Fetch the latest offsets for the topic partitions that are indicated
    * in the [[ConsumerStrategy]].
    *
-   * Kafka may return earliest offsets when we are requesting latest offsets if `poll` is called
-   * right before `seekToEnd` (KAFKA-7703). As a workaround, we will call `position` right after
-   * `poll` to wait until the potential offset request triggered by `poll(0)` is done.
-   *
-   * In addition, to avoid other unknown issues, we also use the given `knownOffsets` to audit the
+   * In order to avoid unknown issues, we use the given `knownOffsets` to audit the
    * latest offsets returned by Kafka. If we find some incorrect offsets (a latest offset is less
    * than an offset in `knownOffsets`), we will retry at most `maxOffsetFetchAttempts` times. When
    * a topic is recreated, the latest offsets may be less than offsets in `knownOffsets`. We cannot
-   * distinguish this with KAFKA-7703, so we just return whatever we get from Kafka after retrying.
+   * distinguish this with issues like KAFKA-7703, so we just return whatever we get from Kafka
+   * after retrying.
    */
   override def fetchLatestOffsets(
       knownOffsets: Option[PartitionOffsetMap]): PartitionOffsetMap =
-    partitionsAssignedToConsumer { partitions => {
+    partitionsAssignedToAdmin { partitions => {
       val listOffsetsParams = partitions.asScala.map(_ -> OffsetSpec.latest()).toMap.asJava
       if (knownOffsets.isEmpty) {
         val partitionOffsets = listOffsets(admin, listOffsetsParams)
@@ -314,11 +299,10 @@ private[kafka010] class KafkaOffsetReaderAdmin(
         }
 
         // Retry to fetch latest offsets when detecting incorrect offsets. We don't use
-        // `withRetriesWithoutInterrupt` to retry because:
+        // `withRetries` to retry because:
         //
-        // - `withRetriesWithoutInterrupt` will reset the consumer for each attempt but a fresh
-        //    consumer has a much bigger chance to hit KAFKA-7703.
-        // - Avoid calling `consumer.poll(0)` which may cause KAFKA-7703.
+        // - `withRetries` will reset the admin for each attempt but a fresh
+        //    admin has a much bigger chance to hit KAFKA-7703 like issues.
         var incorrectOffsets: Seq[(TopicPartition, Long, Long)] = Nil
         var attempt = 0
         do {
@@ -351,7 +335,7 @@ private[kafka010] class KafkaOffsetReaderAdmin(
     if (newPartitions.isEmpty) {
       Map.empty[TopicPartition, Long]
     } else {
-      partitionsAssignedToConsumer(partitions => {
+      partitionsAssignedToAdmin(partitions => {
         // Get the earliest offset of each partition
         val listOffsetsParams = newPartitions.filter { newPartition =>
           // When deleting topics happen at the same time, some partitions may not be in
@@ -501,11 +485,11 @@ private[kafka010] class KafkaOffsetReaderAdmin(
     rangeCalculator.getRanges(ranges, getSortedExecutorList)
   }
 
-  private def partitionsAssignedToConsumer(
+  private def partitionsAssignedToAdmin(
       body: ju.Set[TopicPartition] => Map[TopicPartition, Long])
-    : Map[TopicPartition, Long] = uninterruptibleThreadRunner.runUninterruptibly {
+    : Map[TopicPartition, Long] = {
 
-    withRetriesWithoutInterrupt {
+    withRetries {
       val partitions = consumerStrategy.assignedTopicPartitions(admin).asJava
       logDebug(s"Partitions assigned: $partitions.")
       body(partitions)
@@ -516,37 +500,23 @@ private[kafka010] class KafkaOffsetReaderAdmin(
    * Helper function that does multiple retries on a body of code that returns offsets.
    * Retries are needed to handle transient failures. For e.g. race conditions between getting
    * assignment and getting position while topics/partitions are deleted can cause NPEs.
-   *
-   * This method also makes sure `body` won't be interrupted to workaround similar issues like in
-   * `KafkaConsumer.poll`. (KAFKA-1894)
    */
-  private def withRetriesWithoutInterrupt(
-      body: => Map[TopicPartition, Long]): Map[TopicPartition, Long] = {
-    assert(Thread.currentThread().isInstanceOf[UninterruptibleThread])
-
+  private def withRetries(body: => Map[TopicPartition, Long]): Map[TopicPartition, Long] = {
     synchronized {
       var result: Option[Map[TopicPartition, Long]] = None
       var attempt = 1
       var lastException: Throwable = null
       while (result.isEmpty && attempt <= maxOffsetFetchAttempts
         && !Thread.currentThread().isInterrupted) {
-        Thread.currentThread match {
-          case ut: UninterruptibleThread =>
-            ut.runUninterruptibly {
-              try {
-                result = Some(body)
-              } catch {
-                case NonFatal(e) =>
-                  lastException = e
-                  logWarning(s"Error in attempt $attempt getting Kafka offsets: ", e)
-                  attempt += 1
-                  Thread.sleep(offsetFetchAttemptIntervalMs)
-                  resetAdmin()
-              }
-            }
-          case _ =>
-            throw new IllegalStateException(
-              "Kafka APIs must be executed on a o.a.spark.util.UninterruptibleThread")
+        try {
+          result = Some(body)
+        } catch {
+          case NonFatal(e) =>
+            lastException = e
+            logWarning(s"Error in attempt $attempt getting Kafka offsets: ", e)
+            attempt += 1
+            Thread.sleep(offsetFetchAttemptIntervalMs)
+            resetAdmin()
         }
       }
       if (Thread.interrupted()) {
@@ -562,7 +532,6 @@ private[kafka010] class KafkaOffsetReaderAdmin(
   }
 
   private def stopAdmin(): Unit = synchronized {
-    assert(Thread.currentThread().isInstanceOf[UninterruptibleThread])
     if (_admin != null) _admin.close()
   }
 
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
index 16fa24a68abe2..6e9d8de9fa5be 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
@@ -21,13 +21,10 @@ import java.nio.charset.StandardCharsets.UTF_8
 import java.util.Locale
 import java.util.concurrent.atomic.AtomicInteger
 
-import scala.annotation.tailrec
-
 import org.apache.kafka.clients.producer.ProducerRecord
 import org.apache.kafka.common.TopicPartition
 
-import org.apache.spark.SparkConf
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkConf, TestUtils}
 import org.apache.spark.sql.{DataFrameReader, QueryTest}
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
@@ -270,7 +267,9 @@ abstract class KafkaRelationSuiteBase extends QueryTest with SharedSparkSession
   test("no matched offset for timestamp - startingOffsets") {
     val (topic, timestamps) = prepareTimestampRelatedUnitTest
 
-    val e = intercept[SparkException] {
+    // KafkaOffsetReaderConsumer and KafkaOffsetReaderAdmin both throws AssertionError
+    // but the UninterruptibleThread used by KafkaOffsetReaderConsumer wraps it with SparkException
+    val e = intercept[Throwable] {
       verifyTimestampRelatedQueryResult({ df =>
         // partition 2 will make query fail
         val startTopicTimestamps = Map(
@@ -283,19 +282,7 @@ abstract class KafkaRelationSuiteBase extends QueryTest with SharedSparkSession
       }, topic, Seq.empty)
     }
 
-    @tailrec
-    def assertionErrorInExceptionChain(e: Throwable): Boolean = {
-      if (e.isInstanceOf[AssertionError]) {
-        true
-      } else if (e.getCause == null) {
-        false
-      } else {
-        assertionErrorInExceptionChain(e.getCause)
-      }
-    }
-
-    assert(assertionErrorInExceptionChain(e),
-      "Cannot find expected AssertionError in chained exceptions")
+    TestUtils.assertExceptionMsg(e, "No offset matched from request")
   }
 
   test("no matched offset for timestamp - endingOffsets") {

From 24d7e45d31181a24a37261480fcd45a9a97db659 Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Fri, 11 Dec 2020 05:52:33 +0000
Subject: [PATCH 0730/1009] [SPARK-33527][SQL] Extend the function of decode so
 as consistent with mainstream databases

### What changes were proposed in this pull request?
In Spark, decode(bin, charset) - Decodes the first argument using the second argument character set.

Unfortunately this is NOT what any other SQL vendor understands `DECODE` to do.
`DECODE` generally is a short hand for a simple case expression:

```
SELECT DECODE(c1, 1, 'Hello', 2, 'World', '!') FROM (VALUES (1), (2), (3)) AS T(c1)
=>
(Hello),
(World)
(!)
```
There are some mainstream database support the syntax.
**Oracle**
https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/DECODE.html#GUID-39341D91-3442-4730-BD34-D3CF5D4701CE
**Vertica**
https://www.vertica.com/docs/9.2.x/HTML/Content/Authoring/SQLReferenceManual/Functions/String/DECODE.htm?tocpath=SQL%20Reference%20Manual%7CSQL%20Functions%7CString%20Functions%7C_____10
**DB2**
https://www.ibm.com/support/knowledgecenter/SSGU8G_14.1.0/com.ibm.sqls.doc/ids_sqs_1447.htm
**Redshift**
https://docs.aws.amazon.com/redshift/latest/dg/r_DECODE_expression.html
**Pig**
https://pig.apache.org/docs/latest/api/org/apache/pig/piggybank/evaluation/decode/Decode.html
**Teradata**
https://docs.teradata.com/reader/756LNiPSFdY~4JcCCcR5Cw/jtCpCycpEaXESG4d63kMjg
**Snowflake**
https://docs.snowflake.com/en/sql-reference/functions/decode.html

### Why are the changes needed?
It is very useful.

### Does this PR introduce _any_ user-facing change?
Yes

### How was this patch tested?
Jenkins test.

Closes #30479 from beliefer/SPARK-33527.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: beliefer <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/stringExpressions.scala       | 62 ++++++++++++++++-
 .../expressions/CodeGenerationSuite.scala     |  2 +-
 .../expressions/StringExpressionsSuite.scala  | 14 ++--
 .../org/apache/spark/sql/functions.scala      |  2 +-
 .../sql-tests/inputs/string-functions.sql     | 10 +++
 .../results/ansi/string-functions.sql.out     | 68 ++++++++++++++++++-
 .../results/string-functions.sql.out          | 68 ++++++++++++++++++-
 7 files changed, 214 insertions(+), 12 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 9f92181b34df1..ae29cfe8119f6 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -26,6 +26,7 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.apache.commons.codec.binary.{Base64 => CommonsBase64}
 
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult}
 import org.apache.spark.sql.catalyst.expressions.codegen._
@@ -2082,6 +2083,65 @@ case class UnBase64(child: Expression)
   }
 }
 
+object Decode {
+  def createExpr(params: Seq[Expression]): Expression = {
+    params.length match {
+      case 0 | 1 =>
+        throw new AnalysisException("Invalid number of arguments for function decode. " +
+          s"Expected: 2; Found: ${params.length}")
+      case 2 => StringDecode(params.head, params.last)
+      case _ =>
+        val input = params.head
+        val other = params.tail
+        val itr = other.iterator
+        var default: Expression = Literal.create(null, StringType)
+        val branches = ArrayBuffer.empty[(Expression, Expression)]
+        while (itr.hasNext) {
+          val search = itr.next
+          if (itr.hasNext) {
+            val condition = EqualTo(input, search)
+            branches += ((condition, itr.next))
+          } else {
+            default = search
+          }
+        }
+        CaseWhen(branches.seq, default)
+    }
+  }
+}
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+            |_FUNC_(bin, charset) - Decodes the first argument using the second argument character set.
+            |
+            |_FUNC_(expr, search, result [, search, result ] ... [, default]) - Decode compares expr
+            |  to each search value one by one. If expr is equal to a search, returns the corresponding result.
+            |  If no match is found, then Oracle returns default. If default is omitted, returns null.
+          """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(encode('abc', 'utf-8'), 'utf-8');
+       abc
+      > SELECT _FUNC_(2, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle', 'Non domestic');
+       San Francisco
+      > SELECT _FUNC_(6, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle', 'Non domestic');
+       Non domestic
+      > SELECT _FUNC_(6, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle');
+       NULL
+  """,
+  since = "3.2.0")
+// scalastyle:on line.size.limit
+case class Decode(params: Seq[Expression], child: Expression) extends RuntimeReplaceable {
+
+  def this(params: Seq[Expression]) = {
+    this(params, Decode.createExpr(params))
+  }
+
+  override def flatArguments: Iterator[Any] = Iterator(params)
+  override def exprsReplaced: Seq[Expression] = params
+}
+
 /**
  * Decodes the first argument into a String using the provided character set
  * (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
@@ -2097,7 +2157,7 @@ case class UnBase64(child: Expression)
   """,
   since = "1.5.0")
 // scalastyle:on line.size.limit
-case class Decode(bin: Expression, charset: Expression)
+case class StringDecode(bin: Expression, charset: Expression)
   extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
   override def left: Expression = bin
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
index bca8c56a1071e..b118dba9e3711 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
@@ -104,7 +104,7 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
   test("SPARK-22543: split large if expressions into blocks due to JVM code size limit") {
     var strExpr: Expression = Literal("abc")
     for (_ <- 1 to 150) {
-      strExpr = Decode(Encode(strExpr, "utf-8"), "utf-8")
+      strExpr = StringDecode(Encode(strExpr, "utf-8"), "utf-8")
     }
 
     val expressions = Seq(If(EqualTo(strExpr, strExpr), strExpr, strExpr))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index 78e9cf82a28b1..11ef1e98c82ae 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -349,23 +349,23 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     // scalastyle:off
     // non ascii characters are not allowed in the code, so we disable the scalastyle here.
     checkEvaluation(
-      Decode(Encode(Literal("大千世界"), Literal("UTF-16LE")), Literal("UTF-16LE")), "大千世界")
+      StringDecode(Encode(Literal("大千世界"), Literal("UTF-16LE")), Literal("UTF-16LE")), "大千世界")
     checkEvaluation(
-      Decode(Encode(a, Literal("utf-8")), Literal("utf-8")), "大千世界", create_row("大千世界"))
+      StringDecode(Encode(a, Literal("utf-8")), Literal("utf-8")), "大千世界", create_row("大千世界"))
     checkEvaluation(
-      Decode(Encode(a, Literal("utf-8")), Literal("utf-8")), "", create_row(""))
+      StringDecode(Encode(a, Literal("utf-8")), Literal("utf-8")), "", create_row(""))
     // scalastyle:on
     checkEvaluation(Encode(a, Literal("utf-8")), null, create_row(null))
     checkEvaluation(Encode(Literal.create(null, StringType), Literal("utf-8")), null)
     checkEvaluation(Encode(a, Literal.create(null, StringType)), null, create_row(""))
 
-    checkEvaluation(Decode(b, Literal("utf-8")), null, create_row(null))
-    checkEvaluation(Decode(Literal.create(null, BinaryType), Literal("utf-8")), null)
-    checkEvaluation(Decode(b, Literal.create(null, StringType)), null, create_row(null))
+    checkEvaluation(StringDecode(b, Literal("utf-8")), null, create_row(null))
+    checkEvaluation(StringDecode(Literal.create(null, BinaryType), Literal("utf-8")), null)
+    checkEvaluation(StringDecode(b, Literal.create(null, StringType)), null, create_row(null))
 
     // Test escaping of charset
     GenerateUnsafeProjection.generate(Encode(a, Literal("\"quote")) :: Nil)
-    GenerateUnsafeProjection.generate(Decode(b, Literal("\"quote")) :: Nil)
+    GenerateUnsafeProjection.generate(StringDecode(b, Literal("\"quote")) :: Nil)
   }
 
   test("initcap unit test") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 5b1ee2deefc10..ede2b52930a17 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2438,7 +2438,7 @@ object functions {
    * @since 1.5.0
    */
   def decode(value: Column, charset: String): Column = withExpr {
-    Decode(value.expr, lit(charset).expr)
+    StringDecode(value.expr, lit(charset).expr)
   }
 
   /**
diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
index f5ed2036dc8ac..80b4b8ca8cd54 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
@@ -53,3 +53,13 @@ SELECT trim(TRAILING 'xy' FROM 'TURNERyxXxy');
 -- Check lpad/rpad with invalid length parameter
 SELECT lpad('hi', 'invalid_length');
 SELECT rpad('hi', 'invalid_length');
+
+-- decode
+select decode();
+select decode(encode('abc', 'utf-8'));
+select decode(encode('abc', 'utf-8'), 'utf-8');
+select decode(1, 1, 'Southlake');
+select decode(2, 1, 'Southlake');
+select decode(2, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle', 'Non domestic');
+select decode(6, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle', 'Non domestic');
+select decode(6, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle');
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
index d5c0acb40bb1e..3164d462f8464 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 36
+-- Number of queries: 44
 
 
 -- !query
@@ -294,3 +294,69 @@ struct<>
 -- !query output
 java.lang.NumberFormatException
 invalid input syntax for type numeric: invalid_length
+
+
+-- !query
+select decode()
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Invalid number of arguments for function decode. Expected: 2; Found: 0;; line 1 pos 7
+
+
+-- !query
+select decode(encode('abc', 'utf-8'))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Invalid number of arguments for function decode. Expected: 2; Found: 1;; line 1 pos 7
+
+
+-- !query
+select decode(encode('abc', 'utf-8'), 'utf-8')
+-- !query schema
+struct<decode(encode(abc, utf-8), utf-8):string>
+-- !query output
+abc
+
+
+-- !query
+select decode(1, 1, 'Southlake')
+-- !query schema
+struct<decode(1, 1, Southlake):string>
+-- !query output
+Southlake
+
+
+-- !query
+select decode(2, 1, 'Southlake')
+-- !query schema
+struct<decode(2, 1, Southlake):string>
+-- !query output
+NULL
+
+
+-- !query
+select decode(2, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle', 'Non domestic')
+-- !query schema
+struct<decode(2, 1, Southlake, 2, San Francisco, 3, New Jersey, 4, Seattle, Non domestic):string>
+-- !query output
+San Francisco
+
+
+-- !query
+select decode(6, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle', 'Non domestic')
+-- !query schema
+struct<decode(6, 1, Southlake, 2, San Francisco, 3, New Jersey, 4, Seattle, Non domestic):string>
+-- !query output
+Non domestic
+
+
+-- !query
+select decode(6, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle')
+-- !query schema
+struct<decode(6, 1, Southlake, 2, San Francisco, 3, New Jersey, 4, Seattle):string>
+-- !query output
+NULL
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
index 20c31b140b009..020a095d72e85 100644
--- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 36
+-- Number of queries: 44
 
 
 -- !query
@@ -290,3 +290,69 @@ SELECT rpad('hi', 'invalid_length')
 struct<rpad(hi, CAST(invalid_length AS INT),  ):string>
 -- !query output
 NULL
+
+
+-- !query
+select decode()
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Invalid number of arguments for function decode. Expected: 2; Found: 0;; line 1 pos 7
+
+
+-- !query
+select decode(encode('abc', 'utf-8'))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Invalid number of arguments for function decode. Expected: 2; Found: 1;; line 1 pos 7
+
+
+-- !query
+select decode(encode('abc', 'utf-8'), 'utf-8')
+-- !query schema
+struct<decode(encode(abc, utf-8), utf-8):string>
+-- !query output
+abc
+
+
+-- !query
+select decode(1, 1, 'Southlake')
+-- !query schema
+struct<decode(1, 1, Southlake):string>
+-- !query output
+Southlake
+
+
+-- !query
+select decode(2, 1, 'Southlake')
+-- !query schema
+struct<decode(2, 1, Southlake):string>
+-- !query output
+NULL
+
+
+-- !query
+select decode(2, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle', 'Non domestic')
+-- !query schema
+struct<decode(2, 1, Southlake, 2, San Francisco, 3, New Jersey, 4, Seattle, Non domestic):string>
+-- !query output
+San Francisco
+
+
+-- !query
+select decode(6, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle', 'Non domestic')
+-- !query schema
+struct<decode(6, 1, Southlake, 2, San Francisco, 3, New Jersey, 4, Seattle, Non domestic):string>
+-- !query output
+Non domestic
+
+
+-- !query
+select decode(6, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle')
+-- !query schema
+struct<decode(6, 1, Southlake, 2, San Francisco, 3, New Jersey, 4, Seattle):string>
+-- !query output
+NULL
\ No newline at end of file

From 8ac86a4c318ddc99d0a979baefd197da2ce1c2b5 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 10 Dec 2020 22:32:23 -0800
Subject: [PATCH 0731/1009] [SPARK-33750][SQL][TESTS] Use `hadoop-3.2`
 distribution in HiveExternalCatalogVersionsSuite

### What changes were proposed in this pull request?

This PR aims to use `hadoop-3.2` distribution in HiveExternalCatalogVersionsSuite if available.

### Why are the changes needed?

Apache Spark 3.1 is using Hadoop 3 by default. We need to focus on Hadoop 3 more to prepare the future.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

Closes #30722 from dongjoon-hyun/SPARK-33750.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/hive/HiveExternalCatalogVersionsSuite.scala   | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index cf070f4611f3b..07d8dacf98252 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -93,7 +93,11 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
       mirrors.distinct :+ "https://archive.apache.org/dist" :+ PROCESS_TABLES.releaseMirror
     logInfo(s"Trying to download Spark $version from $sites")
     for (site <- sites) {
-      val filename = s"spark-$version-bin-hadoop2.7.tgz"
+      val filename = if (version.startsWith("3")) {
+        s"spark-$version-bin-hadoop3.2.tgz"
+      } else {
+        s"spark-$version-bin-hadoop2.7.tgz"
+      }
       val url = s"$site/spark/spark-$version/$filename"
       logInfo(s"Downloading Spark $version from $url")
       try {

From c05f6f98b6b06019d99d6a92b61b877afa822d0b Mon Sep 17 00:00:00 2001
From: Josh Soref <jsoref@users.noreply.github.com>
Date: Fri, 11 Dec 2020 06:49:45 +0000
Subject: [PATCH 0732/1009] [MINOR][SQL] Spelling: enabled -
 legacy_setops_precedence_enbled

### What changes were proposed in this pull request?

Replace `legacy_setops_precedence_enbled` with `legacy_setops_precedence_enabled`

Alternatively, `legacy_setops_precedence_enabled` could be added, and `legacy_setops_precedence_enbled` retained, and if set the code could honor it and warn about the deprecated spelling.

### Why are the changes needed?

`enabled` is misspelled in `legacy_setops_precedence_enbled`

### Does this PR introduce _any_ user-facing change?

Yes.

It would break current consumers.
Examples include:
* https://www.programmersought.com/article/87752082924/
* https://github.com/fugue-project/fugue/blob/125d873c38e18b5f09b032bd01ac47a0c6739ddc/fugue_sql/_antlr/fugue_sqlLexer.py
* https://github.com/search?q=legacy_setops_precedence_enbled&type=code

### How was this patch tested?

It's been included in #30323 for a while (and is now split out here)

Closes #30677 from jsoref/spelling-enabled.

Authored-by: Josh Soref <jsoref@users.noreply.github.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/catalyst/parser/SqlBase.g4       | 8 ++++----
 .../apache/spark/sql/catalyst/parser/ParseDriver.scala    | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index b08451d8a6cfa..d2908a555858d 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -21,7 +21,7 @@ grammar SqlBase;
    * When false, INTERSECT is given the greater precedence over the other set
    * operations (UNION, EXCEPT and MINUS) as per the SQL standard.
    */
-  public boolean legacy_setops_precedence_enbled = false;
+  public boolean legacy_setops_precedence_enabled = false;
 
   /**
    * When false, a literal with an exponent would be converted into
@@ -466,11 +466,11 @@ multiInsertQueryBody
 
 queryTerm
     : queryPrimary                                                                       #queryTermDefault
-    | left=queryTerm {legacy_setops_precedence_enbled}?
+    | left=queryTerm {legacy_setops_precedence_enabled}?
         operator=(INTERSECT | UNION | EXCEPT | SETMINUS) setQuantifier? right=queryTerm  #setOperation
-    | left=queryTerm {!legacy_setops_precedence_enbled}?
+    | left=queryTerm {!legacy_setops_precedence_enabled}?
         operator=INTERSECT setQuantifier? right=queryTerm                                #setOperation
-    | left=queryTerm {!legacy_setops_precedence_enbled}?
+    | left=queryTerm {!legacy_setops_precedence_enabled}?
         operator=(UNION | EXCEPT | SETMINUS) setQuantifier? right=queryTerm              #setOperation
     ;
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index d08be467f96cc..deaa3c9cd725f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -96,7 +96,7 @@ abstract class AbstractSqlParser extends ParserInterface with SQLConfHelper with
     parser.addParseListener(PostProcessor)
     parser.removeErrorListeners()
     parser.addErrorListener(ParseErrorListener)
-    parser.legacy_setops_precedence_enbled = conf.setOpsPrecedenceEnforced
+    parser.legacy_setops_precedence_enabled = conf.setOpsPrecedenceEnforced
     parser.legacy_exponent_literal_as_decimal_enabled = conf.exponentLiteralAsDecimalEnabled
     parser.SQL_standard_keyword_behavior = conf.ansiEnabled
 

From d662b95535f12ebbc671a283b19291f63d2a2b8c Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Fri, 11 Dec 2020 01:52:13 -0800
Subject: [PATCH 0733/1009] [SPARK-33754][K8S][DOCS] Update
 kubernetes/integration-tests/README.md to follow the default Hadoop profile
 updated

### What changes were proposed in this pull request?

This PR updates `kubernetes/integration-tests/README.md`.

### Why are the changes needed?

To follow the current Hadoop profile (hadoop-3.2).

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

I have confirmed that the integration tests pass with the following command for both Hadoop 3.2 an 2.7.
```
build/mvn integration-test -am -pl :spark-kubernetes-integration-tests_2.12 \
  -Pkubernetes \
  -Pkubernetes-integration-tests \
  -Dspark.kubernetes.test.imageTag=${IMAGE_TAG} \
  -Dspark.kubernetes.test.imageRepo=docker.io/kubespark \
  -Dspark.kubernetes.test.namespace=default \
  -Dspark.kubernetes.test.deployMode=minikube \
  -Dtest.include.tags=k8s
```

Closes #30726 from sarutak/update-kube-integ-readme.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 resource-managers/kubernetes/integration-tests/README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/resource-managers/kubernetes/integration-tests/README.md b/resource-managers/kubernetes/integration-tests/README.md
index 6409c227ec287..67d03ec5b48a2 100644
--- a/resource-managers/kubernetes/integration-tests/README.md
+++ b/resource-managers/kubernetes/integration-tests/README.md
@@ -17,9 +17,9 @@ To run tests with Java 11 instead of Java 8, use `--java-image-tag` to specify t
 
     ./dev/dev-run-integration-tests.sh --java-image-tag 11-jre-slim
 
-To run tests with Hadoop 3.2 instead of Hadoop 2.7, use `--hadoop-profile`.
+To run tests with Hadoop 2.7 instead of Hadoop 3.2, use `--hadoop-profile`.
 
-    ./dev/dev-run-integration-tests.sh --hadoop-profile hadoop-3.2
+    ./dev/dev-run-integration-tests.sh --hadoop-profile hadoop-2.7
 
 The minimum tested version of Minikube is 0.23.0. The kube-dns addon must be enabled. Minikube should
 run with a minimum of 4 CPUs and 6G of memory:
@@ -126,7 +126,7 @@ If you prefer to run just the integration tests directly, then you can customise
 properties to Maven.  For example:
 
     mvn integration-test -am -pl :spark-kubernetes-integration-tests_2.12 \
-                            -Pkubernetes -Pkubernetes-integration-tests \ 
+                            -Pkubernetes -Pkubernetes-integration-tests \
                             -Phadoop-2.7 -Dhadoop.version=2.7.4 \
                             -Dspark.kubernetes.test.sparkTgz=spark-3.0.0-SNAPSHOT-bin-example.tgz \
                             -Dspark.kubernetes.test.imageTag=sometag \

From 8377aca60a4f326f2d1533c5e570518fb7de2895 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Fri, 11 Dec 2020 01:53:41 -0800
Subject: [PATCH 0734/1009] [SPARK-33527][SQL][FOLLOWUP] Fix the scala 2.13
 build failure

### What changes were proposed in this pull request?

This PR fixes the Scala 2.13 build failure brought by #30479 .

### Why are the changes needed?

To pass Scala 2.13 build.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Should be done byGitHub Actions.

Closes #30727 from sarutak/fix-scala213-build-failure.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/catalyst/expressions/stringExpressions.scala      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index ae29cfe8119f6..0207b7b55c5af 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -2105,7 +2105,7 @@ object Decode {
             default = search
           }
         }
-        CaseWhen(branches.seq, default)
+        CaseWhen(branches.seq.toSeq, default)
     }
   }
 }

From 8f5db716fae1162e411750cd5d5380a399d410ae Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Fri, 11 Dec 2020 12:39:58 +0000
Subject: [PATCH 0735/1009] [SPARK-33654][SQL] Migrate CACHE TABLE to use
 UnresolvedRelation to resolve identifier

### What changes were proposed in this pull request?

This PR proposes to migrate `CACHE TABLE` to use `UnresolvedRelation` to resolve the table/view identifier in Analyzer as discussed https://github.com/apache/spark/pull/30403/files#r532360022.

### Why are the changes needed?

To resolve the table in the analyzer.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing tests

Closes #30598 from imback82/cache_v2.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 14 +++
 .../sql/catalyst/analysis/CheckAnalysis.scala |  3 +
 .../sql/catalyst/parser/AstBuilder.scala      | 29 ++++++
 .../catalyst/plans/logical/v2Commands.scala   | 19 +++-
 .../sql/catalyst/parser/DDLParserSuite.scala  | 31 +++++++
 .../spark/sql/execution/SparkSqlParser.scala  | 25 ------
 .../spark/sql/execution/command/cache.scala   | 52 +----------
 .../datasources/v2/CacheTableExec.scala       | 89 +++++++++++++++++++
 .../datasources/v2/DataSourceV2Strategy.scala |  6 ++
 .../sql/connector/DataSourceV2SQLSuite.scala  |  9 +-
 .../sql/execution/SparkSqlParserSuite.scala   | 29 ------
 .../apache/spark/sql/hive/test/TestHive.scala |  5 +-
 12 files changed, 197 insertions(+), 114 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 74edd65fd0479..0ceb4226b0f52 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -871,6 +871,10 @@ class Analyzer(override val catalogManager: CatalogManager)
         lookupTempView(ident)
           .map(view => i.copy(table = view))
           .getOrElse(i)
+      case c @ CacheTable(UnresolvedRelation(ident, _, false), _, _, _) =>
+        lookupTempView(ident)
+          .map(view => c.copy(table = view))
+          .getOrElse(c)
       // TODO (SPARK-27484): handle streaming write commands when we have them.
       case write: V2WriteCommand =>
         write.table match {
@@ -996,6 +1000,11 @@ class Analyzer(override val catalogManager: CatalogManager)
           .map(v2Relation => i.copy(table = v2Relation))
           .getOrElse(i)
 
+      case c @ CacheTable(u @ UnresolvedRelation(_, _, false), _, _, _) =>
+        lookupV2Relation(u.multipartIdentifier, u.options, false)
+          .map(v2Relation => c.copy(table = v2Relation))
+          .getOrElse(c)
+
       // TODO (SPARK-27484): handle streaming write commands when we have them.
       case write: V2WriteCommand =>
         write.table match {
@@ -1087,6 +1096,11 @@ class Analyzer(override val catalogManager: CatalogManager)
           case other => i.copy(table = other)
         }
 
+      case c @ CacheTable(u @ UnresolvedRelation(_, _, false), _, _, _) =>
+        lookupRelation(u.multipartIdentifier, u.options, false)
+          .map(v2Relation => c.copy(table = v2Relation))
+          .getOrElse(c)
+
       // TODO (SPARK-27484): handle streaming write commands when we have them.
       case write: V2WriteCommand =>
         write.table match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 119e17196a454..5d4dc21810281 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -124,6 +124,9 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
       case InsertIntoStatement(u: UnresolvedRelation, _, _, _, _, _) =>
         failAnalysis(s"Table not found: ${u.multipartIdentifier.quoted}")
 
+      case CacheTable(u: UnresolvedRelation, _, _, _) =>
+        failAnalysis(s"Table or view not found for `CACHE TABLE`: ${u.multipartIdentifier.quoted}")
+
       // TODO (SPARK-27484): handle streaming write commands when we have them.
       case write: V2WriteCommand if write.table.isInstanceOf[UnresolvedRelation] =>
         val tblName = write.table.asInstanceOf[UnresolvedRelation].multipartIdentifier
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 3c06a7665a0e2..a6df7690c7e47 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3604,6 +3604,35 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
       ctx.SERDE != null)
   }
 
+  /**
+   * Create a [[CacheTable]] or [[CacheTableAsSelect]].
+   *
+   * For example:
+   * {{{
+   *   CACHE [LAZY] TABLE multi_part_name
+   *   [OPTIONS tablePropertyList] [[AS] query]
+   * }}}
+   */
+  override def visitCacheTable(ctx: CacheTableContext): LogicalPlan = withOrigin(ctx) {
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+    val query = Option(ctx.query).map(plan)
+    val tableName = visitMultipartIdentifier(ctx.multipartIdentifier)
+    if (query.isDefined && tableName.length > 1) {
+      val catalogAndNamespace = tableName.init
+      throw new ParseException("It is not allowed to add catalog/namespace " +
+        s"prefix ${catalogAndNamespace.quoted} to " +
+        "the table name in CACHE TABLE AS SELECT", ctx)
+    }
+    val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty)
+    val isLazy = ctx.LAZY != null
+    if (query.isDefined) {
+      CacheTableAsSelect(tableName.head, query.get, isLazy, options)
+    } else {
+      CacheTable(UnresolvedRelation(tableName), tableName, isLazy, options)
+    }
+  }
+
   /**
    * Create a [[TruncateTable]] command.
    *
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index 7d62dde67733b..1a37630a48461 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -711,7 +711,6 @@ case class TruncateTable(
   override def children: Seq[LogicalPlan] = child :: Nil
 }
 
-
 /**
  * The logical plan of the SHOW PARTITIONS command.
  */
@@ -761,3 +760,21 @@ case class AlterViewUnsetProperties(
     ifExists: Boolean) extends Command {
   override def children: Seq[LogicalPlan] = child :: Nil
 }
+
+/**
+ * The logical plan of the CACHE TABLE command.
+ */
+case class CacheTable(
+    table: LogicalPlan,
+    multipartIdentifier: Seq[String],
+    isLazy: Boolean,
+    options: Map[String, String]) extends Command
+
+/**
+ * The logical plan of the CACHE TABLE ... AS SELECT command.
+ */
+case class CacheTableAsSelect(
+    tempViewName: String,
+    plan: LogicalPlan,
+    isLazy: Boolean,
+    options: Map[String, String]) extends Command
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index af5e48d922a16..b860571df0791 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -2001,6 +2001,37 @@ class DDLParserSuite extends AnalysisTest {
         asSerde = true))
   }
 
+  test("CACHE TABLE") {
+    comparePlans(
+      parsePlan("CACHE TABLE a.b.c"),
+      CacheTable(
+        UnresolvedRelation(Seq("a", "b", "c")), Seq("a", "b", "c"), false, Map.empty))
+
+    comparePlans(
+      parsePlan("CACHE TABLE t AS SELECT * FROM testData"),
+      CacheTableAsSelect(
+        "t",
+        Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("testData"))),
+        false,
+        Map.empty))
+
+    comparePlans(
+      parsePlan("CACHE LAZY TABLE a.b.c"),
+      CacheTable(
+        UnresolvedRelation(Seq("a", "b", "c")), Seq("a", "b", "c"), true, Map.empty))
+
+    comparePlans(
+      parsePlan("CACHE LAZY TABLE a.b.c OPTIONS('storageLevel' 'DISK_ONLY')"),
+      CacheTable(
+        UnresolvedRelation(Seq("a", "b", "c")),
+        Seq("a", "b", "c"),
+        true,
+        Map("storageLevel" -> "DISK_ONLY")))
+
+    intercept("CACHE TABLE a.b.c AS SELECT * FROM testData",
+      "It is not allowed to add catalog/namespace prefix a.b")
+  }
+
   test("TRUNCATE table") {
     comparePlans(
       parsePlan("TRUNCATE TABLE a.b.c"),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 7a31b0dcdd43d..ba5874c21f6c4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -192,31 +192,6 @@ class SparkSqlAstBuilder extends AstBuilder {
     unquotedPath
   }
 
-  /**
-   * Create a [[CacheTableCommand]].
-   *
-   * For example:
-   * {{{
-   *   CACHE [LAZY] TABLE multi_part_name
-   *   [OPTIONS tablePropertyList] [[AS] query]
-   * }}}
-   */
-  override def visitCacheTable(ctx: CacheTableContext): LogicalPlan = withOrigin(ctx) {
-    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
-    val query = Option(ctx.query).map(plan)
-    val tableName = visitMultipartIdentifier(ctx.multipartIdentifier)
-    if (query.isDefined && tableName.length > 1) {
-      val catalogAndNamespace = tableName.init
-      throw new ParseException("It is not allowed to add catalog/namespace " +
-        s"prefix ${catalogAndNamespace.quoted} to " +
-        "the table name in CACHE TABLE AS SELECT", ctx)
-    }
-    val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty)
-    CacheTableCommand(tableName, query, ctx.LAZY != null, options)
-  }
-
-
   /**
    * Create an [[UncacheTableCommand]] logical plan.
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala
index 3f0945d1e817b..3f85a1b0f99d6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala
@@ -17,57 +17,9 @@
 
 package org.apache.spark.sql.execution.command
 
-import java.util.Locale
-
-import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, Row, SparkSession}
-import org.apache.spark.sql.catalyst.plans.QueryPlan
-import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan}
-import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.{AnalysisException, DataFrame, Row, SparkSession}
+import org.apache.spark.sql.catalyst.plans.logical.IgnoreCachedData
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper
-import org.apache.spark.storage.StorageLevel
-
-case class CacheTableCommand(
-    multipartIdentifier: Seq[String],
-    plan: Option[LogicalPlan],
-    isLazy: Boolean,
-    options: Map[String, String]) extends RunnableCommand {
-  require(plan.isEmpty || multipartIdentifier.length == 1,
-    "Namespace name is not allowed in CACHE TABLE AS SELECT")
-
-  override def innerChildren: Seq[QueryPlan[_]] = plan.toSeq
-
-  override def run(sparkSession: SparkSession): Seq[Row] = {
-    val tableName = multipartIdentifier.quoted
-    plan.foreach { logicalPlan =>
-      Dataset.ofRows(sparkSession, logicalPlan).createTempView(tableName)
-    }
-
-    val storageLevelKey = "storagelevel"
-    val storageLevelValue =
-      CaseInsensitiveMap(options).get(storageLevelKey).map(_.toUpperCase(Locale.ROOT))
-    val withoutStorageLevel = options.filterKeys(_.toLowerCase(Locale.ROOT) != storageLevelKey)
-    if (withoutStorageLevel.nonEmpty) {
-      logWarning(s"Invalid options: ${withoutStorageLevel.mkString(", ")}")
-    }
-
-    val table = sparkSession.table(tableName)
-    if (storageLevelValue.nonEmpty) {
-      sparkSession.sharedState.cacheManager.cacheQuery(
-        table,
-        Some(tableName),
-        StorageLevel.fromString(storageLevelValue.get))
-    } else {
-      sparkSession.sharedState.cacheManager.cacheQuery(table, Some(tableName))
-    }
-
-    if (!isLazy) {
-      // Performs eager caching
-      table.count()
-    }
-
-    Seq.empty[Row]
-  }
-}
 
 case class UncacheTableCommand(
     multipartIdentifier: Seq[String],
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala
new file mode 100644
index 0000000000000..85107dfc9b2ef
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import java.util.Locale
+
+import org.apache.spark.sql.{DataFrame, Dataset}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper
+import org.apache.spark.storage.StorageLevel
+
+trait BaseCacheTableExec extends V2CommandExec {
+  def relationName: String
+  def dataFrameToCache: DataFrame
+  def isLazy: Boolean
+  def options: Map[String, String]
+
+  override def run(): Seq[InternalRow] = {
+    val storageLevelKey = "storagelevel"
+    val storageLevelValue =
+      CaseInsensitiveMap(options).get(storageLevelKey).map(_.toUpperCase(Locale.ROOT))
+    val withoutStorageLevel = options.filterKeys(_.toLowerCase(Locale.ROOT) != storageLevelKey)
+    if (withoutStorageLevel.nonEmpty) {
+      logWarning(s"Invalid options: ${withoutStorageLevel.mkString(", ")}")
+    }
+
+    val sparkSession = sqlContext.sparkSession
+    val df = dataFrameToCache
+    if (storageLevelValue.nonEmpty) {
+      sparkSession.sharedState.cacheManager.cacheQuery(
+        df,
+        Some(relationName),
+        StorageLevel.fromString(storageLevelValue.get))
+    } else {
+      sparkSession.sharedState.cacheManager.cacheQuery(df, Some(relationName))
+    }
+
+    if (!isLazy) {
+      // Performs eager caching
+      df.count()
+    }
+
+    Seq.empty
+  }
+
+  override def output: Seq[Attribute] = Seq.empty
+}
+
+case class CacheTableExec(
+    relation: LogicalPlan,
+    multipartIdentifier: Seq[String],
+    override val isLazy: Boolean,
+    override val options: Map[String, String]) extends BaseCacheTableExec {
+  override def relationName: String = multipartIdentifier.quoted
+
+  override def dataFrameToCache: DataFrame = Dataset.ofRows(sqlContext.sparkSession, relation)
+}
+
+case class CacheTableAsSelectExec(
+    tempViewName: String,
+    query: LogicalPlan,
+    override val isLazy: Boolean,
+    override val options: Map[String, String]) extends BaseCacheTableExec {
+  override def relationName: String = tempViewName
+
+  override def dataFrameToCache: DataFrame = {
+    val sparkSession = sqlContext.sparkSession
+    Dataset.ofRows(sparkSession, query).createTempView(tempViewName)
+    sparkSession.table(tempViewName)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 37a4dcf081be4..7d278c33b97fc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -344,6 +344,12 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
     case RepairTable(_: ResolvedTable) =>
       throw new AnalysisException("MSCK REPAIR TABLE is not supported for v2 tables.")
 
+    case r: CacheTable =>
+      CacheTableExec(r.table, r.multipartIdentifier, r.isLazy, r.options) :: Nil
+
+    case r: CacheTableAsSelect =>
+      CacheTableAsSelectExec(r.tempViewName, r.plan, r.isLazy, r.options) :: Nil
+
     case _ => Nil
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 8e1e8f88f219f..bc570efb70bdf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -778,6 +778,7 @@ class DataSourceV2SQLSuite
         checkAnswer(sql(s"SELECT * FROM $t"), spark.table("source"))
         checkAnswer(sql(s"SELECT * FROM $view"), spark.table("source").select("id"))
 
+        assert(!spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty)
         sql(s"DROP TABLE $t")
         assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty)
       }
@@ -2619,15 +2620,11 @@ class DataSourceV2SQLSuite
       "ALTER VIEW ... UNSET TBLPROPERTIES")
   }
 
-  private def testNotSupportedV2Command(
-      sqlCommand: String,
-      sqlParams: String,
-      sqlCommandInMessage: Option[String] = None): Unit = {
+  private def testNotSupportedV2Command(sqlCommand: String, sqlParams: String): Unit = {
     val e = intercept[AnalysisException] {
       sql(s"$sqlCommand $sqlParams")
     }
-    val cmdStr = sqlCommandInMessage.getOrElse(sqlCommand)
-    assert(e.message.contains(s"$cmdStr is not supported for v2 tables"))
+    assert(e.message.contains(s"$sqlCommand is not supported for v2 tables"))
   }
 
   private def assertAnalysisError(sqlStatement: String, expectedError: String): Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
index 81ba09f206b92..009c5b3705d2f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
@@ -339,35 +339,6 @@ class SparkSqlParserSuite extends AnalysisTest {
       "LINES TERMINATED BY only supports newline '\\n' right now")
   }
 
-  test("CACHE TABLE") {
-    assertEqual(
-      "CACHE TABLE a.b.c",
-      CacheTableCommand(Seq("a", "b", "c"), None, false, Map.empty))
-
-    assertEqual(
-      "CACHE TABLE t AS SELECT * FROM testData",
-      CacheTableCommand(
-        Seq("t"),
-        Some(Project(Seq(UnresolvedStar(None)), UnresolvedRelation(Seq("testData")))),
-        false,
-        Map.empty))
-
-    assertEqual(
-      "CACHE LAZY TABLE a.b.c",
-      CacheTableCommand(Seq("a", "b", "c"), None, true, Map.empty))
-
-    assertEqual(
-      "CACHE LAZY TABLE a.b.c OPTIONS('storageLevel' 'DISK_ONLY')",
-      CacheTableCommand(
-        Seq("a", "b", "c"),
-        None,
-        true,
-        Map("storageLevel" -> "DISK_ONLY")))
-
-    intercept("CACHE TABLE a.b.c AS SELECT * FROM testData",
-      "It is not allowed to add catalog/namespace prefix a.b")
-  }
-
   test("UNCACHE TABLE") {
     assertEqual(
       "UNCACHE TABLE a.b.c",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
index e996f2c6ec78f..ff5b9e453a482 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -39,10 +39,9 @@ import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener
 import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode
 import org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation}
+import org.apache.spark.sql.catalyst.plans.logical.{CacheTable, LogicalPlan, OneRowRelation}
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.execution.{QueryExecution, SQLExecution}
-import org.apache.spark.sql.execution.command.CacheTableCommand
 import org.apache.spark.sql.hive._
 import org.apache.spark.sql.hive.client.HiveClient
 import org.apache.spark.sql.internal.{SessionState, SharedState, SQLConf, WithTestConf}
@@ -597,7 +596,7 @@ private[hive] class TestHiveQueryExecution(
 
   override lazy val analyzed: LogicalPlan = sparkSession.withActive {
     val describedTables = logical match {
-      case CacheTableCommand(tbl, _, _, _) => tbl.asTableIdentifier :: Nil
+      case CacheTable(_, tbl, _, _) => tbl.asTableIdentifier :: Nil
       case _ => Nil
     }
 

From 8b97b19ffad7ec78e4b1f05cb1168ef79dc647b2 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Fri, 11 Dec 2020 12:48:40 +0000
Subject: [PATCH 0736/1009] [SPARK-33706][SQL] Require fully specified
 partition identifier in partitionExists()

### What changes were proposed in this pull request?
1. Check that the partition identifier passed to `SupportsPartitionManagement.partitionExists()` is fully specified (specifies all values of partition fields).
2. Remove the custom implementation of `partitionExists()` from `InMemoryPartitionTable`, and re-use the default implementation from `SupportsPartitionManagement`.

### Why are the changes needed?
The method is supposed to check existence of one partition but currently it can return `true` for partially specified partition. This can lead to incorrect commands behavior, for instance the commands could modify or place data in the middle of partition path.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running existing test suites:
```
$ build/sbt "test:testOnly *AlterTablePartitionV2SQLSuite"
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *SupportsPartitionManagementSuite"
```

Closes #30667 from MaxGekk/check-len-partitionExists.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalog/SupportsPartitionManagement.java  | 14 +++++++----
 .../connector/InMemoryPartitionTable.scala    |  3 ---
 .../SupportsPartitionManagementSuite.scala    | 23 +++++++++++++++++--
 3 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java
index 9d898f2f477e1..cf86c44e9563b 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.connector.catalog;
 
-import java.util.Arrays;
 import java.util.Map;
 
 import org.apache.spark.annotation.Experimental;
@@ -76,13 +75,18 @@ void createPartition(
     /**
      * Test whether a partition exists using an {@link InternalRow ident} from the table.
      *
-     * @param ident a partition identifier
+     * @param ident a partition identifier which must contain all partition fields in order
      * @return true if the partition exists, false otherwise
      */
     default boolean partitionExists(InternalRow ident) {
-        String[] partitionNames = partitionSchema().names();
-        String[] requiredNames = Arrays.copyOfRange(partitionNames, 0, ident.numFields());
-        return listPartitionIdentifiers(requiredNames, ident).length > 0;
+      String[] partitionNames = partitionSchema().names();
+      if (ident.numFields() == partitionNames.length) {
+        return listPartitionIdentifiers(partitionNames, ident).length > 0;
+      } else {
+        throw new IllegalArgumentException("The number of fields (" + ident.numFields() +
+          ") in the partition identifier is not equal to the partition schema length (" +
+          partitionNames.length + "). The identifier might not refer to one partition.");
+      }
     }
 
     /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala
index 6a8432e635310..e29c78c59f769 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala
@@ -83,9 +83,6 @@ class InMemoryPartitionTable(
     }
   }
 
-  override def partitionExists(ident: InternalRow): Boolean =
-    memoryTablePartitions.containsKey(ident)
-
   override protected def addPartitionKey(key: Seq[Any]): Unit = {
     memoryTablePartitions.put(InternalRow.fromSeq(key), Map.empty[String, String].asJava)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala
index 9de0fe6108c99..dc2df546d6bfd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala
@@ -145,7 +145,7 @@ class SupportsPartitionManagementSuite extends SparkFunSuite {
     assert(!hasPartitions(partTable))
   }
 
-  test("listPartitionByNames") {
+  private def createMultiPartTable(): InMemoryPartitionTable = {
     val partCatalog = new InMemoryPartitionTableCatalog
     partCatalog.initialize("test", CaseInsensitiveStringMap.empty())
     val table = partCatalog.createTable(
@@ -156,8 +156,8 @@ class SupportsPartitionManagementSuite extends SparkFunSuite {
         .add("part1", StringType),
       Array(LogicalExpressions.identity(ref("part0")), LogicalExpressions.identity(ref("part1"))),
       util.Collections.emptyMap[String, String])
-    val partTable = table.asInstanceOf[InMemoryPartitionTable]
 
+    val partTable = table.asInstanceOf[InMemoryPartitionTable]
     Seq(
       InternalRow(0, "abc"),
       InternalRow(0, "def"),
@@ -165,6 +165,12 @@ class SupportsPartitionManagementSuite extends SparkFunSuite {
       partTable.createPartition(partIdent, new util.HashMap[String, String]())
     }
 
+    partTable
+  }
+
+  test("listPartitionByNames") {
+    val partTable = createMultiPartTable()
+
     Seq(
       (Array("part0", "part1"), InternalRow(0, "abc")) -> Set(InternalRow(0, "abc")),
       (Array("part0"), InternalRow(0)) -> Set(InternalRow(0, "abc"), InternalRow(0, "def")),
@@ -185,4 +191,17 @@ class SupportsPartitionManagementSuite extends SparkFunSuite {
       intercept[AssertionError](partTable.listPartitionIdentifiers(names, idents))
     }
   }
+
+  test("partitionExists") {
+    val partTable = createMultiPartTable()
+
+    assert(partTable.partitionExists(InternalRow(0, "def")))
+    assert(!partTable.partitionExists(InternalRow(-1, "def")))
+    assert(!partTable.partitionExists(InternalRow("abc", "def")))
+
+    val errMsg = intercept[IllegalArgumentException] {
+      partTable.partitionExists(InternalRow(0))
+    }.getMessage
+    assert(errMsg.contains("The identifier might not refer to one partition"))
+  }
 }

From 5bab27e00bcad31400c952149ffd0389f841a992 Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Sat, 12 Dec 2020 00:52:33 +0900
Subject: [PATCH 0737/1009] [SPARK-33526][SQL] Add config to control if cancel
 invoke interrupt task on thriftserver

### What changes were proposed in this pull request?

This PR add a new config `spark.sql.thriftServer.forceCancel` to give user a way to interrupt task when cancel statement.

### Why are the changes needed?

After [#29933](https://github.com/apache/spark/pull/29933), we support cancel query if timeout, but the default behavior of `SparkContext.cancelJobGroups` won't interrupt task and just let task finish by itself. In some case it's dangerous, e.g., data skew or exists a heavily shuffle. A task will hold in a long time after do cancel and the resource will not release.

### Does this PR introduce _any_ user-facing change?

Yes, a new config.

### How was this patch tested?

Add test.

Closes #30481 from ulysses-you/SPARK-33526.

Lead-authored-by: ulysses-you <ulyssesyou18@gmail.com>
Co-authored-by: ulysses-you <youxiduo@weidian.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../apache/spark/sql/internal/SQLConf.scala   | 12 +++++-
 .../SparkExecuteStatementOperation.scala      |  6 ++-
 .../ThriftServerWithSparkContextSuite.scala   | 37 +++++++++++++++++++
 3 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 11fe6c7894f76..2220d6f441e8e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -922,13 +922,23 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val THRIFTSERVER_FORCE_CANCEL =
+    buildConf("spark.sql.thriftServer.interruptOnCancel")
+      .doc("When true, all running tasks will be interrupted if one cancels a query. " +
+        "When false, all running tasks will remain until finished.")
+      .version("3.2.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val THRIFTSERVER_QUERY_TIMEOUT =
     buildConf("spark.sql.thriftServer.queryTimeout")
       .doc("Set a query duration timeout in seconds in Thrift Server. If the timeout is set to " +
         "a positive value, a running query will be cancelled automatically when the timeout is " +
         "exceeded, otherwise the query continues to run till completion. If timeout values are " +
         "set for each statement via `java.sql.Statement.setQueryTimeout` and they are smaller " +
-        "than this configuration value, they take precedence.")
+        "than this configuration value, they take precedence. If you set this timeout and prefer" +
+        "to cancel the queries right away without waiting task to finish, consider enabling" +
+        s"${THRIFTSERVER_FORCE_CANCEL.key} together.")
       .version("3.1.0")
       .timeConf(TimeUnit.SECONDS)
       .createWithDefault(0L)
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index c4ae035e1f836..8ca0ab91a73f7 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -63,6 +63,8 @@ private[hive] class SparkExecuteStatementOperation(
     }
   }
 
+  private val forceCancel = sqlContext.conf.getConf(SQLConf.THRIFTSERVER_FORCE_CANCEL)
+
   private val substitutorStatement = SQLConf.withExistingConf(sqlContext.conf) {
     new VariableSubstitution().substitute(statement)
   }
@@ -125,7 +127,7 @@ private[hive] class SparkExecuteStatementOperation(
 
   def getNextRowSet(order: FetchOrientation, maxRowsL: Long): RowSet = withLocalProperties {
     try {
-      sqlContext.sparkContext.setJobGroup(statementId, substitutorStatement)
+      sqlContext.sparkContext.setJobGroup(statementId, substitutorStatement, forceCancel)
       getNextRowSetInternal(order, maxRowsL)
     } finally {
       sqlContext.sparkContext.clearJobGroup()
@@ -284,7 +286,7 @@ private[hive] class SparkExecuteStatementOperation(
         parentSession.getSessionState.getConf.setClassLoader(executionHiveClassLoader)
       }
 
-      sqlContext.sparkContext.setJobGroup(statementId, substitutorStatement)
+      sqlContext.sparkContext.setJobGroup(statementId, substitutorStatement, forceCancel)
       result = sqlContext.sql(statement)
       logDebug(result.queryExecution.toString())
       HiveThriftServer2.eventManager.onStatementParsed(statementId,
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
index fd3a638c4fa44..036eb5850695e 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
@@ -18,9 +18,14 @@
 package org.apache.spark.sql.hive.thriftserver
 
 import java.sql.SQLException
+import java.util.concurrent.atomic.AtomicBoolean
 
 import org.apache.hive.service.cli.HiveSQLException
 
+import org.apache.spark.TaskKilled
+import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
+import org.apache.spark.sql.internal.SQLConf
+
 trait ThriftServerWithSparkContextSuite extends SharedThriftServer {
 
   test("the scratch dir will be deleted during server start but recreated with new operation") {
@@ -79,6 +84,38 @@ trait ThriftServerWithSparkContextSuite extends SharedThriftServer {
         "java.lang.NumberFormatException: invalid input syntax for type numeric: 1.2"))
     }
   }
+
+  test("SPARK-33526: Add config to control if cancel invoke interrupt task on thriftserver") {
+    withJdbcStatement { statement =>
+      val forceCancel = new AtomicBoolean(false)
+      val listener = new SparkListener {
+        override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
+          assert(taskEnd.reason.isInstanceOf[TaskKilled])
+          if (forceCancel.get()) {
+            assert(System.currentTimeMillis() - taskEnd.taskInfo.launchTime < 1000)
+          } else {
+            // avoid accuracy, we check 2s instead of 3s.
+            assert(System.currentTimeMillis() - taskEnd.taskInfo.launchTime >= 2000)
+          }
+        }
+      }
+
+      spark.sparkContext.addSparkListener(listener)
+      try {
+        statement.execute(s"SET ${SQLConf.THRIFTSERVER_QUERY_TIMEOUT.key}=1")
+        Seq(true, false).foreach { force =>
+          statement.execute(s"SET ${SQLConf.THRIFTSERVER_FORCE_CANCEL.key}=$force")
+          forceCancel.set(force)
+          val e1 = intercept[SQLException] {
+            statement.execute("select java_method('java.lang.Thread', 'sleep', 3000L)")
+          }.getMessage
+          assert(e1.contains("Query timed out"))
+        }
+      } finally {
+        spark.sparkContext.removeSparkListener(listener)
+      }
+    }
+  }
 }
 
 
From 29cc5b3f235ff178cf888f16877e6e0fd44253cc Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Sat, 12 Dec 2020 00:53:31 +0900
Subject: [PATCH 0738/1009] [MINOR][INFRA] Add kubernetes-integration-tests to
 GitHub Actions for Scala 2.13 build

### What changes were proposed in this pull request?

This PR adds `kubernetes-integration-tests` to GitHub Actions for Scala 2.13 build.

### Why are the changes needed?

Now that the build pass with `kubernetes-integration-tests` and Scala 2.13, it's better to keep it build-able.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Should be done by GitHub Actions.
I also confirmed that the build passes with the following command.
```
$ build/sbt -Pscala-2.13 -Pkubernetes -Pkubernetes-integration-tests compile test:compile
```

Closes #30731 from sarutak/github-actions-k8s.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .github/workflows/build_and_test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index e40d6362fd23f..426401203fc77 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -414,7 +414,7 @@ jobs:
     - name: Build with SBT
       run: |
         ./dev/change-scala-version.sh 2.13
-        ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pscala-2.13 compile test:compile
+        ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pkubernetes-integration-tests -Pscala-2.13 compile test:compile
 
   hadoop-2:
     name: Hadoop 2 build with SBT

From fb2e3af4b5d92398d57e61b766466cc7efd9d7cb Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Sat, 12 Dec 2020 00:54:40 +0900
Subject: [PATCH 0739/1009] [SPARK-33757][INFRA][R] Fix the R dependencies
 build error on GitHub Actions and AppVeyor

### What changes were proposed in this pull request?

This PR fixes the R dependencies build error on GitHub Actions and AppVeyor.
The reason seems that `usethis` package is updated 2020/12/10.
https://cran.r-project.org/web/packages/usethis/index.html

### Why are the changes needed?

To keep the build clean.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Should be done by GitHub Actions.

Closes #30737 from sarutak/fix-r-dependencies-build-error.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .github/workflows/build_and_test.yml | 3 +++
 appveyor.yml                         | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 426401203fc77..30199eaa41999 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -336,6 +336,9 @@ jobs:
     - name: Install R linter dependencies and SparkR
       run: |
         sudo apt-get install -y libcurl4-openssl-dev
+        # dependencies for usethis 1.6.3.
+        sudo Rscript -e "install.packages(c('clipr', 'cli', 'crayon', 'desc', 'fs', 'gh', 'glue', 'purrr', 'rematch2', 'rlang', 'rprojroot', 'whisker', 'withr', 'yaml', 'git2r', 'rstudioapi'), repos='https://cloud.r-project.org/')"
+        sudo Rscript -e "install.packages('https://cran.r-project.org/src/contrib/Archive/usethis/usethis_1.6.3.tar.gz', repos=NULL, type='source')"
         sudo Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')"
         sudo Rscript -e "devtools::install_github('jimhester/lintr@v2.0.0')"
         ./R/install-dev.sh
diff --git a/appveyor.yml b/appveyor.yml
index c40b23c8341eb..b6a42a02d1ac9 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -41,6 +41,9 @@ cache:
 install:
   # Install maven and dependencies
   - ps: .\dev\appveyor-install-dependencies.ps1
+  # usethis and its dependencies
+  - cmd: Rscript -e "install.packages(c('clipr', 'cli', 'crayon', 'desc', 'fs', 'gh', 'glue', 'purrr', 'rematch2', 'rlang', 'rprojroot', 'whisker', 'withr', 'yaml', 'git2r', 'rstudioapi'), repos='https://cloud.r-project.org/')"
+  - cmd: Rscript -e "install.packages('https://cran.r-project.org/src/contrib/Archive/usethis/usethis_1.6.3.tar.gz', repos=NULL, type='source')"
   # Required package for R unit tests. xml2 is required to use jUnit reporter in testthat.
   - cmd: Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'e1071', 'survival', 'arrow', 'xml2'), repos='https://cloud.r-project.org/')"
   - cmd: Rscript -e "pkg_list <- as.data.frame(installed.packages()[,c(1, 3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]"

From be09d37398f6b62c853e961df64b94b34fd3389d Mon Sep 17 00:00:00 2001
From: Chao Sun <sunchao@apple.com>
Date: Fri, 11 Dec 2020 14:43:51 -0800
Subject: [PATCH 0740/1009] [SPARK-33729][SQL] When refreshing cache, Spark
 should not use cached plan when recaching data

### What changes were proposed in this pull request?

This fixes `CatalogImpl.refreshTable` by using a new logical plan when recache the target table.

### Why are the changes needed?

In `CatalogImpl.refreshTable`, we currently recache the target table via:
```scala
sparkSession.sharedState.cacheManager.cacheQuery(table, cacheName, cacheLevel)
```
However, here `table` is generated before the `tableRelationCache` in `SessionCatalog` is invalidated, and therefore it still refers to old and staled logical plan, which is incorrect.

### Does this PR introduce _any_ user-facing change?

Yes, this fix behavior when a table is refreshed.

### How was this patch tested?

Added a unit test.

Closes #30699 from sunchao/SPARK-33729.

Authored-by: Chao Sun <sunchao@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/internal/CatalogImpl.scala      |  6 +++-
 .../apache/spark/sql/CachedTableSuite.scala   | 33 +++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
index 3e216415c2815..8008a21804f7b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
@@ -538,8 +538,12 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
       val cacheName = cache.get.cachedRepresentation.cacheBuilder.tableName
       val cacheLevel = cache.get.cachedRepresentation.cacheBuilder.storageLevel
 
+      // creates a new logical plan since the old table refers to old relation which
+      // should be refreshed
+      val newTable = sparkSession.table(tableIdent)
+
       // recache with the same name and cache level.
-      sparkSession.sharedState.cacheManager.cacheQuery(table, cacheName, cacheLevel)
+      sparkSession.sharedState.cacheManager.cacheQuery(newTable, cacheName, cacheLevel)
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index 3765093f83bc2..a3a6d6721c993 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -17,6 +17,9 @@
 
 package org.apache.spark.sql
 
+import java.io.{File, FilenameFilter}
+import java.nio.file.{Files, Paths}
+
 import scala.collection.mutable.HashSet
 import scala.concurrent.duration._
 
@@ -1239,4 +1242,34 @@ class CachedTableSuite extends QueryTest with SQLTestUtils
       }
     }
   }
+
+  test("SPARK-33729: REFRESH TABLE should not use cached/stale plan") {
+    def moveParquetFiles(src: File, dst: File): Unit = {
+      src.listFiles(new FilenameFilter {
+        override def accept(dir: File, name: String): Boolean = name.endsWith("parquet")
+      }).foreach { f =>
+        Files.move(f.toPath, Paths.get(dst.getAbsolutePath, f.getName))
+      }
+      // cleanup the rest of the files
+      src.listFiles().foreach(_.delete())
+      src.delete()
+    }
+
+    withTable("t") {
+      withTempDir { dir =>
+        val path1 = new File(dir, "path1")
+        Seq((1 -> "a")).toDF("i", "j").write.parquet(path1.getCanonicalPath)
+        moveParquetFiles(path1, dir)
+        sql(s"CREATE TABLE t (i INT, j STRING) USING parquet LOCATION '${dir.toURI}'")
+        sql("CACHE TABLE t")
+        checkAnswer(sql("SELECT * FROM t"), Row(1, "a") :: Nil)
+
+        val path2 = new File(dir, "path2")
+        Seq(2 -> "b").toDF("i", "j").write.parquet(path2.getCanonicalPath)
+        moveParquetFiles(path2, dir)
+        sql("REFRESH TABLE t")
+        checkAnswer(sql("SELECT * FROM t"), Row(1, "a") :: Row(2, "b") :: Nil)
+      }
+    }
+  }
 }

From e2cdfcebd9b39a1104b34d8eafafbcdc6acf5d3e Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Sun, 13 Dec 2020 10:41:47 +0900
Subject: [PATCH 0741/1009] [SPARK-32447][CORE][PYTHON][FOLLOW-UP] Fix other
 occurrences of 'python' to 'python3'

### What changes were proposed in this pull request?

This PR proposes to change python to python3 in several places missed.

### Why are the changes needed?

To use Python 3 by default safely.

### Does this PR introduce _any_ user-facing change?

Yes, it will uses `python3` as its default Python interpreter.

### How was this patch tested?

It was tested together in https://github.com/apache/spark/pull/30735. The test cases there will verify this change together.

Closes #30750 from HyukjinKwon/SPARK-32447.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala  | 2 +-
 .../org/apache/spark/launcher/SparkSubmitCommandBuilder.java    | 2 +-
 python/pyspark/context.py                                       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
index 7ad92da4e055a..c3f73ed745da4 100644
--- a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
@@ -44,7 +44,7 @@ object PythonRunner {
       .orElse(sparkConf.get(PYSPARK_PYTHON))
       .orElse(sys.env.get("PYSPARK_DRIVER_PYTHON"))
       .orElse(sys.env.get("PYSPARK_PYTHON"))
-      .getOrElse("python")
+      .getOrElse("python3")
 
     // Format python file paths before adding them to the PYTHONPATH
     val formattedPythonFile = formatPath(pythonFile)
diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
index d6ed1e3a3532d..b2c12973bcabd 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
@@ -336,7 +336,7 @@ private List<String> buildPySparkShellCommand(Map<String, String> env) throws IO
       conf.get(SparkLauncher.PYSPARK_PYTHON),
       System.getenv("PYSPARK_DRIVER_PYTHON"),
       System.getenv("PYSPARK_PYTHON"),
-      "python"));
+      "python3"));
     String pyOpts = System.getenv("PYSPARK_DRIVER_PYTHON_OPTS");
     if (conf.containsKey(SparkLauncher.PYSPARK_PYTHON)) {
       // pass conf spark.pyspark.python to python by environment variable.
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 3da535b026137..79fdd22ab13fd 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -227,7 +227,7 @@ def _do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize,
         os.environ["SPARK_BUFFER_SIZE"] = \
             str(self._jvm.PythonUtils.getSparkBufferSize(self._jsc))
 
-        self.pythonExec = os.environ.get("PYSPARK_PYTHON", 'python')
+        self.pythonExec = os.environ.get("PYSPARK_PYTHON", 'python3')
         self.pythonVer = "%d.%d" % sys.version_info[:2]
 
         # Broadcast's __reduce__ method stores Broadcast instances here.

From 0277fddaef17b615354c735a2c89cdced5f1d8f6 Mon Sep 17 00:00:00 2001
From: linzebing <linzebing1995@gmail.com>
Date: Sun, 13 Dec 2020 22:00:05 +0900
Subject: [PATCH 0742/1009] [MINOR][UI] Correct JobPage's skipped/pending
 tableHeaderId

### What changes were proposed in this pull request?

Current Spark Web UI job page's header link of pending/skipped stages is inconsistent with their statuses. See the picture below:
![image](https://user-images.githubusercontent.com/9404831/101998894-1e843180-3c8c-11eb-8d94-10df9edb68e7.png)

### Why are the changes needed?

The code determining the `pendingOrSkippedTableId` has the wrong logic. As explained in the code:
> If the job is completed, then any pending stages are displayed as "skipped" [code pointer](https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala#L266)

This PR fixes the logic for `pendingOrSkippedTableId` which aligns with the stage statuses.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Verified that header link is consistent with stage status with the fix.

Closes #30749 from linzebing/ui_bug.

Authored-by: linzebing <linzebing1995@gmail.com>
Signed-off-by: Kousuke Saruta <sarutak@oss.nttdata.com>
---
 core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
index c40e1bc248a49..1dfbce82c852b 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
@@ -284,9 +284,9 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP
 
     val pendingOrSkippedTableId =
       if (isComplete) {
-        "pending"
-      } else {
         "skipped"
+      } else {
+        "pending"
       }
 
     val activeStagesTable =

From 99848e530f8528283bb21afac2f89984924f2235 Mon Sep 17 00:00:00 2001
From: Nicholas Marion <nmarion@us.ibm.com>
Date: Sun, 13 Dec 2020 14:36:54 -0800
Subject: [PATCH 0743/1009] [SPARK-33762][BUILD] Upgrade commons-codec to 1.15

### What changes were proposed in this pull request?

### Why are the changes needed?

Open Source scans are reporting a potential encoding/decoding issue related to versions of commons-codec prior to 1.13. Commit referenced: https://github.com/apache/commons-codec/commit/48b615756d1d770091ea3322eefc08011ee8b113

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #30740 from n-marion/SPARK-33762_upgrade-commons-codec.

Authored-by: Nicholas Marion <nmarion@us.ibm.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 2 +-
 dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 2 +-
 pom.xml                                 | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index 3f1199478bc67..03ea28271b683 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -33,7 +33,7 @@ chill-java/0.9.5//chill-java-0.9.5.jar
 chill_2.12/0.9.5//chill_2.12-0.9.5.jar
 commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar
 commons-cli/1.2//commons-cli-1.2.jar
-commons-codec/1.10//commons-codec-1.10.jar
+commons-codec/1.15//commons-codec-1.15.jar
 commons-collections/3.2.2//commons-collections-3.2.2.jar
 commons-compiler/3.0.16//commons-compiler-3.0.16.jar
 commons-compress/1.20//commons-compress-1.20.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index d16235339897e..6dd7f87ba1578 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -30,7 +30,7 @@ chill-java/0.9.5//chill-java-0.9.5.jar
 chill_2.12/0.9.5//chill_2.12-0.9.5.jar
 commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar
 commons-cli/1.2//commons-cli-1.2.jar
-commons-codec/1.10//commons-codec-1.10.jar
+commons-codec/1.15//commons-codec-1.15.jar
 commons-collections/3.2.2//commons-collections-3.2.2.jar
 commons-compiler/3.0.16//commons-compiler-3.0.16.jar
 commons-compress/1.20//commons-compress-1.20.jar
diff --git a/pom.xml b/pom.xml
index 8aaa4a504ef0c..f087dba9abb00 100644
--- a/pom.xml
+++ b/pom.xml
@@ -173,7 +173,7 @@
     <fasterxml.jackson-databind.version>2.10.5.1</fasterxml.jackson-databind.version>
     <snappy.version>1.1.8.2</snappy.version>
     <netlib.java.version>1.1.2</netlib.java.version>
-    <commons-codec.version>1.10</commons-codec.version>
+    <commons-codec.version>1.15</commons-codec.version>
     <commons-compress.version>1.20</commons-compress.version>
     <commons-io.version>2.5</commons-io.version>
     <!-- org.apache.commons/commons-lang/-->

From 01b73ae6388279514d61c14a9dc9718a34dad465 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Sun, 13 Dec 2020 14:40:55 -0800
Subject: [PATCH 0744/1009] [SPARK-33766][BUILD] Upgrade Jackson to 2.11.4

### What changes were proposed in this pull request?

This pr upgrade Jackson to 2.11.4.
Jackson Release 2.11: https://github.com/FasterXML/jackson/wiki/Jackson-Release-2.11

### Why are the changes needed?

Make it easy to upgrade dependency because Jackson 2.10 is not compatible with 2.11:
```
com.fasterxml.jackson.databind.JsonMappingException: Scala module 2.10.5 requires Jackson Databind version >= 2.10.0 and < 2.11.0
```
[Avro](https://issues.apache.org/jira/browse/AVRO-2967) has upgraded Jackson to 2.11.3.
[Parquet](https://issues.apache.org/jira/browse/PARQUET-1895) has upgraded Jackson to 2.11.2.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing test.

Closes #30746 from wangyum/SPARK-33766.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 14 +++++++-------
 dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 14 +++++++-------
 pom.xml                                 |  5 ++---
 3 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index 03ea28271b683..c2caef3ae58d9 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -104,17 +104,17 @@ httpclient/4.5.13//httpclient-4.5.13.jar
 httpcore/4.4.12//httpcore-4.4.12.jar
 istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar
 ivy/2.4.0//ivy-2.4.0.jar
-jackson-annotations/2.10.5//jackson-annotations-2.10.5.jar
+jackson-annotations/2.11.4//jackson-annotations-2.11.4.jar
 jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar
-jackson-core/2.10.5//jackson-core-2.10.5.jar
-jackson-databind/2.10.5.1//jackson-databind-2.10.5.1.jar
-jackson-dataformat-yaml/2.10.5//jackson-dataformat-yaml-2.10.5.jar
+jackson-core/2.11.4//jackson-core-2.11.4.jar
+jackson-databind/2.11.4//jackson-databind-2.11.4.jar
+jackson-dataformat-yaml/2.11.4//jackson-dataformat-yaml-2.11.4.jar
 jackson-datatype-jsr310/2.11.2//jackson-datatype-jsr310-2.11.2.jar
 jackson-jaxrs/1.9.13//jackson-jaxrs-1.9.13.jar
 jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar
-jackson-module-jaxb-annotations/2.10.5//jackson-module-jaxb-annotations-2.10.5.jar
-jackson-module-paranamer/2.10.5//jackson-module-paranamer-2.10.5.jar
-jackson-module-scala_2.12/2.10.5//jackson-module-scala_2.12-2.10.5.jar
+jackson-module-jaxb-annotations/2.11.4//jackson-module-jaxb-annotations-2.11.4.jar
+jackson-module-paranamer/2.11.4//jackson-module-paranamer-2.11.4.jar
+jackson-module-scala_2.12/2.11.4//jackson-module-scala_2.12-2.11.4.jar
 jackson-xc/1.9.13//jackson-xc-1.9.13.jar
 jakarta.activation-api/1.2.1//jakarta.activation-api-1.2.1.jar
 jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index 6dd7f87ba1578..87e7a3c2ae1a7 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -103,18 +103,18 @@ httpclient/4.5.13//httpclient-4.5.13.jar
 httpcore/4.4.12//httpcore-4.4.12.jar
 istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar
 ivy/2.4.0//ivy-2.4.0.jar
-jackson-annotations/2.10.5//jackson-annotations-2.10.5.jar
+jackson-annotations/2.11.4//jackson-annotations-2.11.4.jar
 jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar
-jackson-core/2.10.5//jackson-core-2.10.5.jar
-jackson-databind/2.10.5.1//jackson-databind-2.10.5.1.jar
-jackson-dataformat-yaml/2.10.5//jackson-dataformat-yaml-2.10.5.jar
+jackson-core/2.11.4//jackson-core-2.11.4.jar
+jackson-databind/2.11.4//jackson-databind-2.11.4.jar
+jackson-dataformat-yaml/2.11.4//jackson-dataformat-yaml-2.11.4.jar
 jackson-datatype-jsr310/2.11.2//jackson-datatype-jsr310-2.11.2.jar
 jackson-jaxrs-base/2.9.5//jackson-jaxrs-base-2.9.5.jar
 jackson-jaxrs-json-provider/2.9.5//jackson-jaxrs-json-provider-2.9.5.jar
 jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar
-jackson-module-jaxb-annotations/2.10.5//jackson-module-jaxb-annotations-2.10.5.jar
-jackson-module-paranamer/2.10.5//jackson-module-paranamer-2.10.5.jar
-jackson-module-scala_2.12/2.10.5//jackson-module-scala_2.12-2.10.5.jar
+jackson-module-jaxb-annotations/2.11.4//jackson-module-jaxb-annotations-2.11.4.jar
+jackson-module-paranamer/2.11.4//jackson-module-paranamer-2.11.4.jar
+jackson-module-scala_2.12/2.11.4//jackson-module-scala_2.12-2.11.4.jar
 jakarta.activation-api/1.2.1//jakarta.activation-api-1.2.1.jar
 jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar
 jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar
diff --git a/pom.xml b/pom.xml
index f087dba9abb00..09d48a6592ab9 100644
--- a/pom.xml
+++ b/pom.xml
@@ -169,8 +169,7 @@
     <!-- for now, not running scalafmt as part of default verify pipeline -->
     <scalafmt.skip>true</scalafmt.skip>
     <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
-    <fasterxml.jackson.version>2.10.5</fasterxml.jackson.version>
-    <fasterxml.jackson-databind.version>2.10.5.1</fasterxml.jackson-databind.version>
+    <fasterxml.jackson.version>2.11.4</fasterxml.jackson.version>
     <snappy.version>1.1.8.2</snappy.version>
     <netlib.java.version>1.1.2</netlib.java.version>
     <commons-codec.version>1.15</commons-codec.version>
@@ -774,7 +773,7 @@
       <dependency>
         <groupId>com.fasterxml.jackson.core</groupId>
         <artifactId>jackson-databind</artifactId>
-        <version>${fasterxml.jackson-databind.version}</version>
+        <version>${fasterxml.jackson.version}</version>
       </dependency>
       <dependency>
         <groupId>com.fasterxml.jackson.core</groupId>

From 94bc2d61a2598d995df8eb79fe450b0e5f6d7582 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Sun, 13 Dec 2020 14:52:26 -0800
Subject: [PATCH 0745/1009] [SPARK-33589][SQL][FOLLOWUP] Replace Throwable with
 NonFatal

### What changes were proposed in this pull request?

This pr replace `Throwable` with `NonFatal`.

### Why are the changes needed?

Improve code.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

N/A

Closes #30744 from wangyum/SPARK-33589-2.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../sql/hive/thriftserver/SparkSQLSessionManager.scala    | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
index 0c092abb37f3e..89aaa31c35790 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.hive.thriftserver
 
+import scala.util.control.NonFatal
+
 import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hive.service.cli.{HiveSQLException, SessionHandle}
 import org.apache.hive.service.cli.session.SessionManager
@@ -73,12 +75,12 @@ private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext:
       sparkSqlOperationManager.sessionToContexts.put(sessionHandle, ctx)
       sessionHandle
     } catch {
-      case e: Exception =>
+      case NonFatal(e) =>
         try {
           closeSession(sessionHandle)
         } catch {
-          case t: Throwable =>
-            logWarning("Error closing session", t)
+          case NonFatal(inner) =>
+            logWarning("Error closing session", inner)
         }
         throw new HiveSQLException("Failed to open new session: " + e, e)
     }

From 45af3c96889eba1958055206f10524299d0be61c Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Sun, 13 Dec 2020 14:57:09 -0800
Subject: [PATCH 0746/1009] [SPARK-33764][SS] Make state store maintenance
 interval as SQL config

### What changes were proposed in this pull request?

Currently the maintenance interval is hard-coded in `StateStore`. This patch proposes to make it as SQL config.

### Why are the changes needed?

Currently the maintenance interval is hard-coded in `StateStore`. For consistency reason, it should be placed together with other SS configs together. SQLConf also has a better way to have doc and default value setting.

### Does this PR introduce _any_ user-facing change?

Yes. Previously users use Spark config to set the maintenance interval. Now they could use SQL config to set it.

### How was this patch tested?

Unit test.

Closes #30741 from viirya/maintenance-interval-sqlconfig.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../apache/spark/sql/internal/SQLConf.scala   | 13 ++++++++++
 .../streaming/state/StateStore.scala          | 26 ++++++++-----------
 .../streaming/state/StateStoreConf.scala      |  3 +++
 .../streaming/state/StateStoreSuite.scala     |  4 +--
 4 files changed, 29 insertions(+), 17 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 2220d6f441e8e..078928391f560 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1378,6 +1378,17 @@ object SQLConf {
     .intConf
     .createWithDefault(2)
 
+  val STREAMING_MAINTENANCE_INTERVAL =
+    buildConf("spark.sql.streaming.stateStore.maintenanceInterval")
+      .internal()
+      .doc("The interval in milliseconds between triggering maintenance tasks in StateStore. " +
+        "The maintenance task executes background maintenance task in all the loaded store " +
+        "providers if they are still the active instances according to the coordinator. If not, " +
+        "inactive instances of store providers will be closed.")
+      .version("2.0.0")
+      .timeConf(TimeUnit.MILLISECONDS)
+      .createWithDefault(TimeUnit.MINUTES.toMillis(1)) // 1 minute
+
   val STATE_STORE_COMPRESSION_CODEC =
     buildConf("spark.sql.streaming.stateStore.compression.codec")
       .internal()
@@ -3218,6 +3229,8 @@ class SQLConf extends Serializable with Logging {
 
   def maxBatchesToRetainInMemory: Int = getConf(MAX_BATCHES_TO_RETAIN_IN_MEMORY)
 
+  def streamingMaintenanceInterval: Long = getConf(STREAMING_MAINTENANCE_INTERVAL)
+
   def stateStoreCompressionCodec: String = getConf(STATE_STORE_COMPRESSION_CODEC)
 
   def parquetFilterPushDown: Boolean = getConf(PARQUET_FILTER_PUSHDOWN_ENABLED)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
index ab67c19783ff7..f87a2fb30cddc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
@@ -385,8 +385,6 @@ class UnsafeRowPair(var key: UnsafeRow = null, var value: UnsafeRow = null) {
  */
 object StateStore extends Logging {
 
-  val MAINTENANCE_INTERVAL_CONFIG = "spark.sql.streaming.stateStore.maintenanceInterval"
-  val MAINTENANCE_INTERVAL_DEFAULT_SECS = 60
   val PARTITION_ID_TO_CHECK_SCHEMA = 0
 
   @GuardedBy("loadedProviders")
@@ -471,7 +469,7 @@ object StateStore extends Logging {
       storeConf: StateStoreConf,
       hadoopConf: Configuration): StateStoreProvider = {
     loadedProviders.synchronized {
-      startMaintenanceIfNeeded()
+      startMaintenanceIfNeeded(storeConf)
 
       if (storeProviderId.storeId.partitionId == PARTITION_ID_TO_CHECK_SCHEMA) {
         val result = schemaValidated.getOrElseUpdate(storeProviderId, {
@@ -534,19 +532,17 @@ object StateStore extends Logging {
   }
 
   /** Start the periodic maintenance task if not already started and if Spark active */
-  private def startMaintenanceIfNeeded(): Unit = loadedProviders.synchronized {
-    val env = SparkEnv.get
-    if (env != null && !isMaintenanceRunning) {
-      val periodMs = env.conf.getTimeAsMs(
-        MAINTENANCE_INTERVAL_CONFIG, s"${MAINTENANCE_INTERVAL_DEFAULT_SECS}s")
-      maintenanceTask = new MaintenanceTask(
-        periodMs,
-        task = { doMaintenance() },
-        onError = { loadedProviders.synchronized { loadedProviders.clear() } }
-      )
-      logInfo("State Store maintenance task started")
+  private def startMaintenanceIfNeeded(storeConf: StateStoreConf): Unit =
+    loadedProviders.synchronized {
+      if (SparkEnv.get != null && !isMaintenanceRunning) {
+        maintenanceTask = new MaintenanceTask(
+          storeConf.maintenanceInterval,
+          task = { doMaintenance() },
+          onError = { loadedProviders.synchronized { loadedProviders.clear() } }
+        )
+        logInfo("State Store maintenance task started")
+      }
     }
-  }
 
   /**
    * Execute background maintenance task in all the loaded store providers if they are still
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala
index 23cb3be32c85a..58af8272d1c09 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala
@@ -58,6 +58,9 @@ class StateStoreConf(
   /** whether to validate state schema during query run. */
   val stateSchemaCheckEnabled = sqlConf.isStateSchemaCheckEnabled
 
+  /** The interval of maintenance tasks. */
+  val maintenanceInterval = sqlConf.streamingMaintenanceInterval
+
   /**
    * Additional configurations related to state store. This will capture all configs in
    * SQLConf that start with `spark.sql.streaming.stateStore.` and extraOptions for a specific
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
index 0c2083ab98ade..d4cd3cdc39fd7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
@@ -390,8 +390,6 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
     val conf = new SparkConf()
       .setMaster("local")
       .setAppName("test")
-      // Make maintenance thread do snapshots and cleanups very fast
-      .set(StateStore.MAINTENANCE_INTERVAL_CONFIG, "10ms")
       // Make sure that when SparkContext stops, the StateStore maintenance thread 'quickly'
       // fails to talk to the StateStoreCoordinator and unloads all the StateStores
       .set(RPC_NUM_RETRIES, 1)
@@ -400,6 +398,8 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
     val storeProviderId = StateStoreProviderId(StateStoreId(dir, opId, 0), UUID.randomUUID)
     val sqlConf = new SQLConf()
     sqlConf.setConf(SQLConf.MIN_BATCHES_TO_RETAIN, 2)
+    // Make maintenance thread do snapshots and cleanups very fast
+    sqlConf.setConf(SQLConf.STREAMING_MAINTENANCE_INTERVAL, 10L)
     val storeConf = StateStoreConf(sqlConf)
     val hadoopConf = new Configuration()
     val provider = newStoreProvider(storeProviderId.storeId)

From 8197ee3b15265d39f05f192934b7d7e661713eaa Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Sun, 13 Dec 2020 15:04:23 -0800
Subject: [PATCH 0747/1009] [SPARK-33690][SQL] Escape meta-characters in
 showString

### What changes were proposed in this pull request?

This PR intends to escape meta-characters (e.g., \n and \t) in `Dataset.showString`.
Before this PR:
```
scala> Seq("aaa\nbbb\t\tccccc").toDF("value").show()
+--------------+
|         value|
+--------------+
|aaa
bbb		ccccc|
+--------------+
```
After this PR:
```
+-----------------+
|            value|
+-----------------+
|aaa\nbbb\t\tccccc|
+-----------------+
```

### Why are the changes needed?

For better output.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Added a unit test.

Closes #30647 from maropu/EscapeMetaInShow.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 docs/sql-migration-guide.md                   |  2 +
 .../scala/org/apache/spark/sql/Dataset.scala  |  4 +-
 .../org/apache/spark/sql/DataFrameSuite.scala | 38 +++++++++++++++++++
 .../org/apache/spark/sql/ExplainSuite.scala   |  8 ++--
 4 files changed, 47 insertions(+), 5 deletions(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 164bfd42d6e4a..484823b7c07ab 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -26,6 +26,8 @@ license: |
 
   - In Spark 3.2, `spark.sql.adaptive.enabled` is enabled by default. To restore the behavior before Spark 3.2, you can set `spark.sql.adaptive.enabled` to `false`.
 
+  - In Spark 3.2, the meta-characters `\n` and `\t` are escaped in the `show()` action. In Spark 3.1 or earlier, the two metacharacters are output as it is.
+
 ## Upgrading from Spark SQL 3.0 to 3.1
 
   - In Spark 3.1, statistical aggregation function includes `std`, `stddev`, `stddev_samp`, `variance`, `var_samp`, `skewness`, `kurtosis`, `covar_samp`, `corr` will return `NULL` instead of `Double.NaN` when `DivideByZero` occurs during expression evaluation, for example, when `stddev_samp` applied on a single element set. In Spark version 3.0 and earlier, it will return `Double.NaN` in such case. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.statisticalAggregate` to `true`.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 6afbbce3ff8d4..5c273591360cd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -308,7 +308,9 @@ class Dataset[T] private[sql](
         val str = cell match {
           case null => "null"
           case binary: Array[Byte] => binary.map("%02X".format(_)).mkString("[", " ", "]")
-          case _ => cell.toString
+          case _ =>
+            // Escapes meta-characters not to break the `showString` format
+            cell.toString.replaceAll("\n", "\\\\n").replaceAll("\t", "\\\\t")
         }
         if (truncate > 0 && str.length > truncate) {
           // do not show ellipses for strings shorter than 4 characters.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 4fecd625031ba..d777cd45b61ee 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -1235,6 +1235,44 @@ class DataFrameSuite extends QueryTest
     assert(df.showString(10, vertical = true) === expectedAnswer)
   }
 
+  test("SPARK-33690: showString: escape meta-characters") {
+    val df1 = Seq("aaa\nbbb\tccc").toDF("value")
+    assert(df1.showString(1, truncate = 0) ===
+      """+-------------+
+        ||value        |
+        |+-------------+
+        ||aaa\nbbb\tccc|
+        |+-------------+
+        |""".stripMargin)
+
+    val df2 = Seq(Seq("aaa\nbbb\tccc")).toDF("value")
+    assert(df2.showString(1, truncate = 0) ===
+      """+---------------+
+        ||value          |
+        |+---------------+
+        ||[aaa\nbbb\tccc]|
+        |+---------------+
+        |""".stripMargin)
+
+    val df3 = Seq(Map("aaa\nbbb\tccc" -> "aaa\nbbb\tccc")).toDF("value")
+    assert(df3.showString(1, truncate = 0) ===
+      """+--------------------------------+
+        ||value                           |
+        |+--------------------------------+
+        ||{aaa\nbbb\tccc -> aaa\nbbb\tccc}|
+        |+--------------------------------+
+        |""".stripMargin)
+
+    val df4 = Seq("aaa\nbbb\tccc").toDF("value").selectExpr("named_struct('v', value)")
+    assert(df4.showString(1, truncate = 0) ===
+      """+----------------------+
+        ||named_struct(v, value)|
+        |+----------------------+
+        ||{aaa\nbbb\tccc}       |
+        |+----------------------+
+        |""".stripMargin)
+  }
+
   test("SPARK-7319 showString") {
     val expectedAnswer = """+---+-----+
                            ||key|value|
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
index ddc4f1dab8e63..7d3285da25a5d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
@@ -261,11 +261,11 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
             "PartitionFilters: \\[isnotnull\\(k#xL\\), dynamicpruningexpression\\(k#xL " +
               "IN subquery#x\\)\\]"
           val expected_pattern3 =
-            "Location: InMemoryFileIndex \\[.*org.apache.spark.sql.ExplainSuite" +
-              "/df2/.*, ... 99 entries\\]"
+            "Location: InMemoryFileIndex \\[\\S*org.apache.spark.sql.ExplainSuite" +
+              "/df2/\\S*, ... 99 entries\\]"
           val expected_pattern4 =
-            "Location: InMemoryFileIndex \\[.*org.apache.spark.sql.ExplainSuite" +
-              "/df1/.*, ... 999 entries\\]"
+            "Location: InMemoryFileIndex \\[\\S*org.apache.spark.sql.ExplainSuite" +
+              "/df1/\\S*, ... 999 entries\\]"
           withNormalizedExplain(sqlText) { normalizedOutput =>
             assert(expected_pattern1.r.findAllMatchIn(normalizedOutput).length == 1)
             assert(expected_pattern2.r.findAllMatchIn(normalizedOutput).length == 1)

From 6e862792fbc6c0916ad04f1c23dc4acbc5f5a53b Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Mon, 14 Dec 2020 10:22:37 +0900
Subject: [PATCH 0748/1009] [SPARK-33723][SQL] ANSI mode: Casting String to
 Date should throw exception on parse error

### What changes were proposed in this pull request?

Currently, when casting a string as timestamp type in ANSI mode, Spark throws a runtime exception on parsing error.
However, the result for casting a string to date is always null. We should throw an exception on parsing error as well.

### Why are the changes needed?

Add missing feature for ANSI mode

### Does this PR introduce _any_ user-facing change?

Yes for ANSI mode, Casting string to date will throw an exception on parsing error

### How was this patch tested?

Unit test

Closes #30687 from gengliangwang/castDate.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/sql-ref-ansi-compliance.md               |  1 +
 .../spark/sql/catalyst/expressions/Cast.scala | 27 ++++++++----
 .../sql/catalyst/util/DateTimeUtils.scala     | 11 +++--
 .../sql/catalyst/expressions/CastSuite.scala  | 41 +++++++++++++++----
 .../resources/sql-tests/inputs/datetime.sql   |  5 ++-
 .../sql-tests/results/ansi/datetime.sql.out   | 11 ++++-
 .../sql-tests/results/datetime-legacy.sql.out | 10 ++++-
 .../sql-tests/results/datetime.sql.out        | 10 ++++-
 8 files changed, 92 insertions(+), 24 deletions(-)

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index 08ba07aa8de63..8201fd707275d 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -163,6 +163,7 @@ The behavior of some SQL operators can be different under ANSI mode (`spark.sql.
   - `array_col[index]`: This operator throws `ArrayIndexOutOfBoundsException` if using invalid indices.
   - `map_col[key]`: This operator throws `NoSuchElementException` if key does not exist in map.
   - `CAST(string_col AS TIMESTAMP)`: This operator should fail with an exception if the input string can't be parsed.
+  - `CAST(string_col AS DATE)`: This operator should fail with an exception if the input string can't be parsed.
 
 ### SQL Keywords
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 72bd9ca4d3d1c..e1ece732cf15d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -499,7 +499,11 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
   // DateConverter
   private[this] def castToDate(from: DataType): Any => Any = from match {
     case StringType =>
-      buildCast[UTF8String](_, s => DateTimeUtils.stringToDate(s, zoneId).orNull)
+      if (ansiEnabled) {
+        buildCast[UTF8String](_, s => DateTimeUtils.stringToDateAnsi(s, zoneId))
+      } else {
+        buildCast[UTF8String](_, s => DateTimeUtils.stringToDate(s, zoneId).orNull)
+      }
     case TimestampType =>
       // throw valid precision more than seconds, according to Hive.
       // Timestamp.nanos is in 0 to 999,999,999, no more than a second.
@@ -1135,15 +1139,22 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
         val intOpt = ctx.freshVariable("intOpt", classOf[Option[Integer]])
         val zid = getZoneId()
         (c, evPrim, evNull) =>
-          code"""
-          scala.Option<Integer> $intOpt =
-            org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToDate($c, $zid);
-          if ($intOpt.isDefined()) {
-            $evPrim = ((Integer) $intOpt.get()).intValue();
+          if (ansiEnabled) {
+            code"""
+              $evPrim = org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToDateAnsi($c, $zid);
+            """
           } else {
-            $evNull = true;
+            code"""
+              scala.Option<Integer> $intOpt =
+                org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToDate($c, $zid);
+              if ($intOpt.isDefined()) {
+                $evPrim = ((Integer) $intOpt.get()).intValue();
+              } else {
+                $evNull = true;
+              }
+            """
           }
-         """
+
       case TimestampType =>
         val zid = getZoneId()
         (c, evPrim, evNull) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 0543ef99f8947..780d2bad1bab2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -365,11 +365,8 @@ object DateTimeUtils {
   }
 
   def stringToTimestampAnsi(s: UTF8String, timeZoneId: ZoneId): Long = {
-    val timestamp = stringToTimestamp(s, timeZoneId)
-    if (timestamp.isEmpty) {
+    stringToTimestamp(s, timeZoneId).getOrElse {
       throw new DateTimeException(s"Cannot cast $s to TimestampType.")
-    } else {
-      timestamp.get
     }
   }
 
@@ -466,6 +463,12 @@ object DateTimeUtils {
     }
   }
 
+  def stringToDateAnsi(s: UTF8String, zoneId: ZoneId): Int = {
+    stringToDate(s, zoneId).getOrElse {
+      throw new DateTimeException(s"Cannot cast $s to DateType.")
+    }
+  }
+
   // Gets the local date-time parts (year, month, day and time) of the instant expressed as the
   // number of microseconds since the epoch at the given time zone ID.
   private def getLocalDateTime(micros: Long, zoneId: ZoneId): LocalDateTime = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index e46599dc19a8b..c4dd5c412401b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCoercionSuite
 import org.apache.spark.sql.catalyst.expressions.aggregate.{CollectList, CollectSet}
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.catalyst.util.DateTimeConstants._
+import org.apache.spark.sql.catalyst.util.DateTimeTestUtils
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.sql.internal.SQLConf
@@ -93,12 +94,6 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(Cast(Literal("2015-03-18 123142"), DateType), new Date(c.getTimeInMillis))
     checkEvaluation(Cast(Literal("2015-03-18T123123"), DateType), new Date(c.getTimeInMillis))
     checkEvaluation(Cast(Literal("2015-03-18T"), DateType), new Date(c.getTimeInMillis))
-
-    checkEvaluation(Cast(Literal("2015-03-18X"), DateType), null)
-    checkEvaluation(Cast(Literal("2015/03/18"), DateType), null)
-    checkEvaluation(Cast(Literal("2015.03.18"), DateType), null)
-    checkEvaluation(Cast(Literal("20150318"), DateType), null)
-    checkEvaluation(Cast(Literal("2015-031-8"), DateType), null)
   }
 
   test("cast string to timestamp") {
@@ -962,7 +957,7 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase {
 
   test("ANSI mode: cast string to timestamp with parse error") {
     val activeConf = conf
-    new ParVector(ALL_TIMEZONES.toVector).foreach { zid =>
+    DateTimeTestUtils.outstandingZoneIds.foreach { zid =>
       def checkCastWithParseError(str: String): Unit = {
         checkExceptionInExpression[DateTimeException](
           cast(Literal(str), TimestampType, Option(zid.getId)),
@@ -984,6 +979,30 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase {
     }
   }
 
+  test("ANSI mode: cast string to date with parse error") {
+    val activeConf = conf
+    DateTimeTestUtils.outstandingZoneIds.foreach { zid =>
+      def checkCastWithParseError(str: String): Unit = {
+        checkExceptionInExpression[DateTimeException](
+          cast(Literal(str), DateType, Option(zid.getId)),
+          s"Cannot cast $str to DateType.")
+      }
+
+      SQLConf.withExistingConf(activeConf) {
+        checkCastWithParseError("12345")
+        checkCastWithParseError("12345-12-18")
+        checkCastWithParseError("2015-13-18")
+        checkCastWithParseError("2015-03-128")
+        checkCastWithParseError("2015/03/18")
+        checkCastWithParseError("2015.03.18")
+        checkCastWithParseError("20150318")
+        checkCastWithParseError("2015-031-8")
+        checkCastWithParseError("2015-03-18ABC")
+        checkCastWithParseError("abdef")
+      }
+    }
+  }
+
   test("SPARK-26218: Fix the corner case of codegen when casting float to Integer") {
     checkExceptionInExpression[ArithmeticException](
       cast(cast(Literal("2147483648"), FloatType), IntegerType), "overflow")
@@ -1026,6 +1045,14 @@ class CastSuite extends CastSuiteBase {
     checkEvaluation(cast(123, DecimalType(2, 0)), null)
   }
 
+  test("cast string to date #2") {
+    checkEvaluation(Cast(Literal("2015-03-18X"), DateType), null)
+    checkEvaluation(Cast(Literal("2015/03/18"), DateType), null)
+    checkEvaluation(Cast(Literal("2015.03.18"), DateType), null)
+    checkEvaluation(Cast(Literal("20150318"), DateType), null)
+    checkEvaluation(Cast(Literal("2015-031-8"), DateType), null)
+  }
+
   test("casting to fixed-precision decimals") {
     assert(cast(123, DecimalType.USER_DEFAULT).nullable === false)
     assert(cast(10.03f, DecimalType.SYSTEM_DEFAULT).nullable)
diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
index e35266a85d46b..acfd1f50e14c9 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
@@ -161,7 +161,7 @@ select from_json('{"d":"26/October/2015"}', 'd Date', map('dateFormat', 'dd/MMMM
 select from_csv('26/October/2015', 't Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'));
 select from_csv('26/October/2015', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy'));
 
--- Timestamp type parse error
+-- Datetime types parse error
 select to_date("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS");
 select to_date("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS");
 select to_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS");
@@ -170,4 +170,5 @@ select unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS");
 select unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS");
 select to_unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS");
 select to_unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS");
-select cast("Unparseable" as timestamp)
+select cast("Unparseable" as timestamp);
+select cast("Unparseable" as date);
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
index 18a751f573bc2..400c8d6c3c84f 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 122
+-- Number of queries: 123
 
 
 -- !query
@@ -1060,3 +1060,12 @@ struct<>
 -- !query output
 java.time.DateTimeException
 Cannot cast Unparseable to TimestampType.
+
+
+-- !query
+select cast("Unparseable" as date)
+-- !query schema
+struct<>
+-- !query output
+java.time.DateTimeException
+Cannot cast Unparseable to DateType.
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
index be75f6fb994dd..7e4ea78bf46b9 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 122
+-- Number of queries: 123
 
 
 -- !query
@@ -1013,3 +1013,11 @@ select cast("Unparseable" as timestamp)
 struct<CAST(Unparseable AS TIMESTAMP):timestamp>
 -- !query output
 NULL
+
+
+-- !query
+select cast("Unparseable" as date)
+-- !query schema
+struct<CAST(Unparseable AS DATE):date>
+-- !query output
+NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
index 1e963ed16fd96..01db4c1c11fe4 100755
--- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 122
+-- Number of queries: 123
 
 
 -- !query
@@ -1021,3 +1021,11 @@ select cast("Unparseable" as timestamp)
 struct<CAST(Unparseable AS TIMESTAMP):timestamp>
 -- !query output
 NULL
+
+
+-- !query
+select cast("Unparseable" as date)
+-- !query schema
+struct<CAST(Unparseable AS DATE):date>
+-- !query output
+NULL

From b135db3b1a5c0b2170e98b97f6160bcf55903799 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Sun, 13 Dec 2020 17:27:39 -0800
Subject: [PATCH 0749/1009] [SPARK-33757][INFRA][R][FOLLOWUP] Provide more
 simple solution

### What changes were proposed in this pull request?

This PR proposes a better solution for the R build failure on GitHub Actions.
The issue is solved in #30737 but I noticed the following two things.

* We can use the latest `usethis` if we install additional libraries on the GitHub Actions environment.
* For tests on AppVeyor, `usethis` is not necessary, so I partially revert the previous change.

### Why are the changes needed?

For more simple solution.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Confirmed on GitHub Actions and AppVeyor on my account.

Closes #30753 from sarutak/followup-SPARK-33757.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .github/workflows/build_and_test.yml | 5 +----
 appveyor.yml                         | 3 ---
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 30199eaa41999..f133a4132b2a5 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -335,10 +335,7 @@ jobs:
         r-version: 4.0
     - name: Install R linter dependencies and SparkR
       run: |
-        sudo apt-get install -y libcurl4-openssl-dev
-        # dependencies for usethis 1.6.3.
-        sudo Rscript -e "install.packages(c('clipr', 'cli', 'crayon', 'desc', 'fs', 'gh', 'glue', 'purrr', 'rematch2', 'rlang', 'rprojroot', 'whisker', 'withr', 'yaml', 'git2r', 'rstudioapi'), repos='https://cloud.r-project.org/')"
-        sudo Rscript -e "install.packages('https://cran.r-project.org/src/contrib/Archive/usethis/usethis_1.6.3.tar.gz', repos=NULL, type='source')"
+        sudo apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev
         sudo Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')"
         sudo Rscript -e "devtools::install_github('jimhester/lintr@v2.0.0')"
         ./R/install-dev.sh
diff --git a/appveyor.yml b/appveyor.yml
index b6a42a02d1ac9..c40b23c8341eb 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -41,9 +41,6 @@ cache:
 install:
   # Install maven and dependencies
   - ps: .\dev\appveyor-install-dependencies.ps1
-  # usethis and its dependencies
-  - cmd: Rscript -e "install.packages(c('clipr', 'cli', 'crayon', 'desc', 'fs', 'gh', 'glue', 'purrr', 'rematch2', 'rlang', 'rprojroot', 'whisker', 'withr', 'yaml', 'git2r', 'rstudioapi'), repos='https://cloud.r-project.org/')"
-  - cmd: Rscript -e "install.packages('https://cran.r-project.org/src/contrib/Archive/usethis/usethis_1.6.3.tar.gz', repos=NULL, type='source')"
   # Required package for R unit tests. xml2 is required to use jUnit reporter in testthat.
   - cmd: Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'e1071', 'survival', 'arrow', 'xml2'), repos='https://cloud.r-project.org/')"
   - cmd: Rscript -e "pkg_list <- as.data.frame(installed.packages()[,c(1, 3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]"

From 4d47ac4b4b20a475c2f416c7d614318b31323041 Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Mon, 14 Dec 2020 05:14:38 +0000
Subject: [PATCH 0750/1009] [SPARK-33705][SQL][TEST] Fix
 HiveThriftHttpServerSuite flakiness

### What changes were proposed in this pull request?
TO FIX flaky tests:

https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/132345/testReport/
```
org.apache.spark.sql.hive.thriftserver.HiveThriftHttpServerSuite.JDBC query execution
org.apache.spark.sql.hive.thriftserver.HiveThriftHttpServerSuite.Checks Hive version
org.apache.spark.sql.hive.thriftserver.HiveThriftHttpServerSuite.SPARK-24829 Checks cast as float
```

The root cause here is a jar conflict issue.
`NewCookie.isHttpOnly` is not defined in the `jsr311-api.jar` which conflicts
The transitive artifact `jsr311-api.jar` of `hadoop-client` is excluded at the maven side. See https://issues.apache.org/jira/browse/SPARK-27179.

The Jenkins PR builder and Github Action use `SBT` as the compiler tool.

First, the exclusion rule from maven is not followed by sbt, so I was able to see `jsr311-api.jar` from maven cache to be added to the classpath directly. **This seems to be a  bug of `sbt-pom-reader` plugin but I'm not that sure.**

Then I added an `ExcludeRule` for the `hive-thriftserver` module at the SBT side and did see the `jsr311-api.jar` gone, but the CI jobs still failed with the same error.

I added a trace log in ThriftHttpServlet

```s
ERROR ThriftHttpServlet: !!!!!!!!! Suspect???????? --->
file:/home/jenkins/workspace/SparkPullRequestBuilder/assembly/target/scala-2.12/jars/jsr311-api-1.1.1.jar
```
And the log pointed out that the assembly phase copied it to `assembly/target/scala-2.12/jars/` which will be added to the classpath too. With the help of SBT `dependencyTree` tool, I saw the `jsr311-api` again as a transitive of `jersery-core` from `yarn` module with a `test` scope. So **This seems to be another bug from the SBT side of the `sbt-assembly` plugin.**  It copied a test scope transitive artifact to the assembly output.

In this PR, I defined some rules in SparkBuild.scala to bypass the potential bugs from the SBT side.

First, exclude the `jsr311` from all over the project and then add it back separately to the YARN module for SBT.

Additionally, the HiveThriftServerSuites was reflected for reducing flakiness too, but not related to the bugs I have found so far.

### Why are the changes needed?

fix test here

### Does this PR introduce _any_ user-facing change?

NO
### How was this patch tested?

passing jenkins and ga

Closes #30643 from yaooqinn/HiveThriftHttpServerSuite.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 LICENSE-binary                                |   2 +-
 core/pom.xml                                  |   6 +-
 dev/deps/spark-deps-hadoop-2.7-hive-2.3       |   2 +-
 dev/deps/spark-deps-hadoop-3.2-hive-2.3       |   1 +
 pom.xml                                       |   2 +-
 project/SparkBuild.scala                      |  27 ++-
 resource-managers/yarn/pom.xml                |   7 -
 .../HiveThriftServer2Suites.scala             | 199 +++++++++++-------
 .../thriftserver/JdbcConnectionUriSuite.scala |  70 ------
 .../SparkMetadataOperationSuite.scala         |   2 +-
 ...arkThriftServerProtocolVersionsSuite.scala |   2 +-
 .../hive/thriftserver/UISeleniumSuite.scala   |   6 +-
 12 files changed, 156 insertions(+), 170 deletions(-)
 delete mode 100644 sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/JdbcConnectionUriSuite.scala

diff --git a/LICENSE-binary b/LICENSE-binary
index d363661b1cc7e..2a5434e14a3f5 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -521,7 +521,6 @@ Common Development and Distribution License (CDDL) 1.1
 ------------------------------------------------------
 
 javax.el:javax.el-api	https://javaee.github.io/uel-ri/
-javax.servlet:javax.servlet-api   https://javaee.github.io/servlet-spec/
 javax.servlet.jsp:jsp-api
 javax.transaction:jta http://www.oracle.com/technetwork/java/index.html
 javax.xml.bind:jaxb-api    https://github.com/javaee/jaxb-v2
@@ -553,6 +552,7 @@ Eclipse Public License (EPL) 2.0
 --------------------------------
 
 jakarta.annotation:jakarta-annotation-api https://projects.eclipse.org/projects/ee4j.ca
+jakarta.servlet:jakarta.servlet-api https://projects.eclipse.org/projects/ee4j.servlet
 jakarta.ws.rs:jakarta.ws.rs-api https://github.com/eclipse-ee4j/jaxrs-api
 org.glassfish.hk2.external:jakarta.inject
 
diff --git a/core/pom.xml b/core/pom.xml
index 84ca852d1f30a..1f24c5273ad0b 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -161,9 +161,9 @@
       <scope>compile</scope>
     </dependency>
     <dependency>
-      <groupId>javax.servlet</groupId>
-      <artifactId>javax.servlet-api</artifactId>
-      <version>${javaxservlet.version}</version>
+      <groupId>jakarta.servlet</groupId>
+      <artifactId>jakarta.servlet-api</artifactId>
+      <version>${jakartaservlet.version}</version>
     </dependency>
     <dependency>
       <groupId>org.apache.commons</groupId>
diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index c2caef3ae58d9..ceea496d3f1dc 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -119,6 +119,7 @@ jackson-xc/1.9.13//jackson-xc-1.9.13.jar
 jakarta.activation-api/1.2.1//jakarta.activation-api-1.2.1.jar
 jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar
 jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar
+jakarta.servlet-api/4.0.3//jakarta.servlet-api-4.0.3.jar
 jakarta.validation-api/2.0.2//jakarta.validation-api-2.0.2.jar
 jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar
 jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar
@@ -126,7 +127,6 @@ janino/3.0.16//janino-3.0.16.jar
 javassist/3.25.0-GA//javassist-3.25.0-GA.jar
 javax.inject/1//javax.inject-1.jar
 javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar
-javax.servlet-api/3.1.0//javax.servlet-api-3.1.0.jar
 javolution/5.5.1//javolution-5.5.1.jar
 jaxb-api/2.2.2//jaxb-api-2.2.2.jar
 jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index 87e7a3c2ae1a7..d1b811bd73607 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -118,6 +118,7 @@ jackson-module-scala_2.12/2.11.4//jackson-module-scala_2.12-2.11.4.jar
 jakarta.activation-api/1.2.1//jakarta.activation-api-1.2.1.jar
 jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar
 jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar
+jakarta.servlet-api/4.0.3//jakarta.servlet-api-4.0.3.jar
 jakarta.validation-api/2.0.2//jakarta.validation-api-2.0.2.jar
 jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar
 jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar
diff --git a/pom.xml b/pom.xml
index 09d48a6592ab9..78d1fe7d54350 100644
--- a/pom.xml
+++ b/pom.xml
@@ -138,7 +138,7 @@
     <parquet.version>1.10.1</parquet.version>
     <orc.version>1.6.6</orc.version>
     <jetty.version>9.4.28.v20200408</jetty.version>
-    <javaxservlet.version>3.1.0</javaxservlet.version>
+    <jakartaservlet.version>4.0.3</jakartaservlet.version>
     <chill.version>0.9.5</chill.version>
     <ivy.version>2.4.0</ivy.version>
     <oro.version>2.0.8</oro.version>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 23fb73d228e01..a28c2b55b3789 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -395,6 +395,8 @@ object SparkBuild extends PomBuild {
 
   enable(KubernetesIntegrationTests.settings)(kubernetesIntegrationTests)
 
+  enable(YARN.settings)(yarn)
+
   /**
    * Adds the ability to run the spark shell directly from SBT without building an assembly
    * jar.
@@ -654,7 +656,21 @@ object DependencyOverrides {
  */
 object ExcludedDependencies {
   lazy val settings = Seq(
-    libraryDependencies ~= { libs => libs.filterNot(_.name == "groovy-all") }
+    libraryDependencies ~= { libs => libs.filterNot(_.name == "groovy-all") },
+    // SPARK-33705: Due to sbt compiler issues, it brings exclusions defined in maven pom back to
+    // the classpath directly and assemble test scope artifacts to assembly/target/scala-xx/jars,
+    // which is also will be added to the classpath of some unit tests that will build a subprocess
+    // to run `spark-submit`, e.g. HiveThriftServer2Test.
+    //
+    // These artifacts are for the jersey-1 API but Spark use jersey-2 ones, so it cause test
+    // flakiness w/ jar conflicts issues.
+    //
+    // Also jersey-1 is only used by yarn module(see resource-managers/yarn/pom.xml) for testing
+    // purpose only. Here we exclude them from the whole project scope and add them w/ yarn only.
+    excludeDependencies ++= Seq(
+      ExclusionRule(organization = "com.sun.jersey"),
+      ExclusionRule("javax.servlet", "javax.servlet-api"),
+      ExclusionRule("javax.ws.rs", "jsr311-api"))
   )
 }
 
@@ -758,6 +774,15 @@ object Hive {
   )
 }
 
+object YARN {
+  lazy val settings = Seq(
+    excludeDependencies --= Seq(
+      ExclusionRule(organization = "com.sun.jersey"),
+      ExclusionRule("javax.servlet", "javax.servlet-api"),
+      ExclusionRule("javax.ws.rs", "jsr311-api"))
+  )
+}
+
 object Assembly {
   import sbtassembly.AssemblyUtils._
   import sbtassembly.AssemblyPlugin.autoImport._
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 1d3856742f520..c0ce1c8e151ed 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -88,13 +88,6 @@
       <artifactId>hadoop-client</artifactId>
     </dependency>
 
-    <dependency>
-      <groupId>jakarta.servlet</groupId>
-      <artifactId>jakarta.servlet-api</artifactId>
-      <version>4.0.3</version>
-      <scope>test</scope>
-    </dependency>
-
     <!-- Explicit listing of transitive deps that are shaded. Otherwise, odd compiler crashes. -->
     <dependency>
       <groupId>com.google.guava</groupId>
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
index 5bf7892478082..bd0db743b8d4c 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
@@ -29,7 +29,7 @@ import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.{ExecutionContext, Future, Promise}
 import scala.concurrent.duration._
 import scala.io.Source
-import scala.util.{Random, Try}
+import scala.util.Try
 
 import com.google.common.io.Files
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
@@ -41,6 +41,7 @@ import org.apache.hive.service.rpc.thrift.TCLIService.Client
 import org.apache.thrift.protocol.TBinaryProtocol
 import org.apache.thrift.transport.TSocket
 import org.scalatest.BeforeAndAfterAll
+import org.scalatest.concurrent.Eventually._
 
 import org.apache.spark.{SparkException, SparkFunSuite}
 import org.apache.spark.internal.Logging
@@ -60,7 +61,7 @@ object TestData {
   val smallKvWithNull = getTestDataFilePath("small_kv_with_null.txt")
 }
 
-class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
+class HiveThriftBinaryServerSuite extends HiveThriftServer2Test {
   override def mode: ServerMode.Value = ServerMode.binary
 
   private def withCLIServiceClient(f: ThriftCLIServiceClient => Unit): Unit = {
@@ -935,7 +936,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
   }
 }
 
-class SingleSessionSuite extends HiveThriftJdbcTest {
+class SingleSessionSuite extends HiveThriftServer2TestBase {
   override def mode: ServerMode.Value = ServerMode.binary
 
   override protected def extraConf: Seq[String] =
@@ -1046,7 +1047,7 @@ class SingleSessionSuite extends HiveThriftJdbcTest {
   }
 }
 
-class HiveThriftCleanUpScratchDirSuite extends HiveThriftJdbcTest{
+class HiveThriftCleanUpScratchDirSuite extends HiveThriftServer2TestBase {
   var tempScratchDir: File = _
 
   override protected def beforeAll(): Unit = {
@@ -1079,7 +1080,7 @@ class HiveThriftCleanUpScratchDirSuite extends HiveThriftJdbcTest{
   }
 }
 
-class HiveThriftHttpServerSuite extends HiveThriftJdbcTest {
+class HiveThriftHttpServerSuite extends HiveThriftServer2Test {
   override def mode: ServerMode.Value = ServerMode.http
 
   test("JDBC query execution") {
@@ -1122,63 +1123,7 @@ object ServerMode extends Enumeration {
   val binary, http = Value
 }
 
-abstract class HiveThriftJdbcTest extends HiveThriftServer2Test {
-  Utils.classForName(classOf[HiveDriver].getCanonicalName)
-
-  private def jdbcUri = if (mode == ServerMode.http) {
-    s"""jdbc:hive2://localhost:$serverPort/
-       |default?
-       |hive.server2.transport.mode=http;
-       |hive.server2.thrift.http.path=cliservice;
-       |${hiveConfList}#${hiveVarList}
-     """.stripMargin.split("\n").mkString.trim
-  } else {
-    s"jdbc:hive2://localhost:$serverPort/?${hiveConfList}#${hiveVarList}"
-  }
-
-  def withMultipleConnectionJdbcStatement(tableNames: String*)(fs: (Statement => Unit)*): Unit = {
-    val user = System.getProperty("user.name")
-    val connections = fs.map { _ => DriverManager.getConnection(jdbcUri, user, "") }
-    val statements = connections.map(_.createStatement())
-
-    try {
-      statements.zip(fs).foreach { case (s, f) => f(s) }
-    } finally {
-      tableNames.foreach { name =>
-        // TODO: Need a better way to drop the view.
-        if (name.toUpperCase(Locale.ROOT).startsWith("VIEW")) {
-          statements(0).execute(s"DROP VIEW IF EXISTS $name")
-        } else {
-          statements(0).execute(s"DROP TABLE IF EXISTS $name")
-        }
-      }
-      statements.foreach(_.close())
-      connections.foreach(_.close())
-    }
-  }
-
-  def withDatabase(dbNames: String*)(fs: (Statement => Unit)*): Unit = {
-    val user = System.getProperty("user.name")
-    val connections = fs.map { _ => DriverManager.getConnection(jdbcUri, user, "") }
-    val statements = connections.map(_.createStatement())
-
-    try {
-      statements.zip(fs).foreach { case (s, f) => f(s) }
-    } finally {
-      dbNames.foreach { name =>
-        statements(0).execute(s"DROP DATABASE IF EXISTS $name")
-      }
-      statements.foreach(_.close())
-      connections.foreach(_.close())
-    }
-  }
-
-  def withJdbcStatement(tableNames: String*)(f: Statement => Unit): Unit = {
-    withMultipleConnectionJdbcStatement(tableNames: _*)(f)
-  }
-}
-
-abstract class HiveThriftServer2Test extends SparkFunSuite with BeforeAndAfterAll with Logging {
+abstract class HiveThriftServer2TestBase extends SparkFunSuite with BeforeAndAfterAll with Logging {
   def mode: ServerMode.Value
 
   private val CLASS_NAME = HiveThriftServer2.getClass.getCanonicalName.stripSuffix("$")
@@ -1207,7 +1152,7 @@ abstract class HiveThriftServer2Test extends SparkFunSuite with BeforeAndAfterAl
 
   protected def extraConf: Seq[String] = Nil
 
-  protected def serverStartCommand(port: Int) = {
+  protected def serverStartCommand(): Seq[String] = {
     val portConf = if (mode == ServerMode.binary) {
       ConfVars.HIVE_SERVER2_THRIFT_PORT
     } else {
@@ -1220,7 +1165,7 @@ abstract class HiveThriftServer2Test extends SparkFunSuite with BeforeAndAfterAl
       val tempLog4jConf = Utils.createTempDir().getCanonicalPath
 
       Files.write(
-        """log4j.rootCategory=DEBUG, console
+        """log4j.rootCategory=INFO, console
           |log4j.appender.console=org.apache.log4j.ConsoleAppender
           |log4j.appender.console.target=System.err
           |log4j.appender.console.layout=org.apache.log4j.PatternLayout
@@ -1240,7 +1185,7 @@ abstract class HiveThriftServer2Test extends SparkFunSuite with BeforeAndAfterAl
        |  --hiveconf ${ConfVars.HIVE_SERVER2_TRANSPORT_MODE}=$mode
        |  --hiveconf ${ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LOG_LOCATION}=$operationLogPath
        |  --hiveconf ${ConfVars.LOCALSCRATCHDIR}=$lScratchDir
-       |  --hiveconf $portConf=$port
+       |  --hiveconf $portConf=0
        |  --driver-class-path $driverClassPath
        |  --driver-java-options -Dlog4j.debug
        |  --conf spark.ui.enabled=false
@@ -1262,7 +1207,7 @@ abstract class HiveThriftServer2Test extends SparkFunSuite with BeforeAndAfterAl
 
   val SERVER_STARTUP_TIMEOUT = 3.minutes
 
-  private def startThriftServer(port: Int, attempt: Int) = {
+  private def startThriftServer(attempt: Int) = {
     warehousePath = Utils.createTempDir()
     warehousePath.delete()
     metastorePath = Utils.createTempDir()
@@ -1274,18 +1219,16 @@ abstract class HiveThriftServer2Test extends SparkFunSuite with BeforeAndAfterAl
     logPath = null
     logTailingProcess = null
 
-    val command = serverStartCommand(port)
+    val command = serverStartCommand()
 
     diagnosisBuffer ++=
       s"""
          |### Attempt $attempt ###
          |HiveThriftServer2 command line: $command
-         |Listening port: $port
+         |Listening port: 0
          |System user: $user
        """.stripMargin.split("\n")
 
-    logInfo(s"Trying to start HiveThriftServer2: port=$port, mode=$mode, attempt=$attempt")
-
     logPath = {
       val lines = Utils.executeAndGetOutput(
         command = command,
@@ -1312,7 +1255,11 @@ abstract class HiveThriftServer2Test extends SparkFunSuite with BeforeAndAfterAl
 
     // Ensures that the following "tail" command won't fail.
     logPath.createNewFile()
-    val successLines = Seq(THRIFT_BINARY_SERVICE_LIVE, THRIFT_HTTP_SERVICE_LIVE)
+    val successLine = if (mode == ServerMode.http) {
+      THRIFT_HTTP_SERVICE_LIVE
+    } else {
+      THRIFT_BINARY_SERVICE_LIVE
+    }
 
     logTailingProcess = {
       val command = s"/usr/bin/env tail -n +0 -f ${logPath.getCanonicalPath}".split(" ")
@@ -1321,14 +1268,15 @@ abstract class HiveThriftServer2Test extends SparkFunSuite with BeforeAndAfterAl
       val captureOutput = (line: String) => diagnosisBuffer.synchronized {
         diagnosisBuffer += line
 
-        successLines.foreach { r =>
-          if (line.contains(r)) {
-            serverStarted.trySuccess(())
-          }
+        if (line.contains(successLine)) {
+          listeningPort = line.split(" on port ")(1).split(' ').head.toInt
+          logInfo(s"Started HiveThriftServer2: port=$listeningPort, mode=$mode, attempt=$attempt")
+          serverStarted.trySuccess(())
+          ()
         }
       }
 
-        val process = builder.start()
+      val process = builder.start()
 
       new ProcessOutputCapturer(process.getInputStream, captureOutput).start()
       new ProcessOutputCapturer(process.getErrorStream, captureOutput).start()
@@ -1379,16 +1327,18 @@ abstract class HiveThriftServer2Test extends SparkFunSuite with BeforeAndAfterAl
 
   override protected def beforeAll(): Unit = {
     super.beforeAll()
-    // Chooses a random port between 10000 and 19999
-    listeningPort = 10000 + Random.nextInt(10000)
     diagnosisBuffer.clear()
 
     // Retries up to 3 times with different port numbers if the server fails to start
-    (1 to 3).foldLeft(Try(startThriftServer(listeningPort, 0))) { case (started, attempt) =>
+    (1 to 3).foldLeft(Try(startThriftServer(0))) { case (started, attempt) =>
       started.orElse {
-        listeningPort += 1
         stopThriftServer()
-        Try(startThriftServer(listeningPort, attempt))
+        Try {
+          startThriftServer(attempt)
+          eventually(timeout(30.seconds), interval(1.seconds)) {
+            withJdbcStatement() { _.execute("SELECT 1") }
+          }
+        }
       }
     }.recover {
       case cause: Throwable =>
@@ -1407,4 +1357,91 @@ abstract class HiveThriftServer2Test extends SparkFunSuite with BeforeAndAfterAl
       super.afterAll()
     }
   }
+
+  Utils.classForName(classOf[HiveDriver].getCanonicalName)
+
+  protected def jdbcUri(database: String = "default"): String = if (mode == ServerMode.http) {
+    s"""jdbc:hive2://localhost:$serverPort/
+       |$database?
+       |hive.server2.transport.mode=http;
+       |hive.server2.thrift.http.path=cliservice;
+       |${hiveConfList}#${hiveVarList}
+     """.stripMargin.split("\n").mkString.trim
+  } else {
+    s"jdbc:hive2://localhost:$serverPort/$database?${hiveConfList}#${hiveVarList}"
+  }
+
+  private def tryCaptureSysLog(f: => Unit): Unit = {
+    try f catch {
+      case e: Exception =>
+        // Dump the HiveThriftServer2 log if error occurs, e.g. getConnection failure.
+        dumpLogs()
+        throw e
+    }
+  }
+
+  def withMultipleConnectionJdbcStatement(
+      tableNames: String*)(fs: (Statement => Unit)*): Unit = tryCaptureSysLog {
+    val user = System.getProperty("user.name")
+    val connections = fs.map { _ => DriverManager.getConnection(jdbcUri(), user, "") }
+    val statements = connections.map(_.createStatement())
+
+    try {
+      statements.zip(fs).foreach { case (s, f) => f(s) }
+    } finally {
+      tableNames.foreach { name =>
+        // TODO: Need a better way to drop the view.
+        if (name.toUpperCase(Locale.ROOT).startsWith("VIEW")) {
+          statements(0).execute(s"DROP VIEW IF EXISTS $name")
+        } else {
+          statements(0).execute(s"DROP TABLE IF EXISTS $name")
+        }
+      }
+      statements.foreach(_.close())
+      connections.foreach(_.close())
+    }
+  }
+
+  def withDatabase(dbNames: String*)(fs: (Statement => Unit)*): Unit = tryCaptureSysLog {
+    val user = System.getProperty("user.name")
+    val connections = fs.map { _ => DriverManager.getConnection(jdbcUri(), user, "") }
+    val statements = connections.map(_.createStatement())
+
+    try {
+      statements.zip(fs).foreach { case (s, f) => f(s) }
+    } finally {
+      dbNames.foreach { name =>
+        statements(0).execute(s"DROP DATABASE IF EXISTS $name")
+      }
+      statements.foreach(_.close())
+      connections.foreach(_.close())
+    }
+  }
+
+  def withJdbcStatement(tableNames: String*)(f: Statement => Unit): Unit = {
+    withMultipleConnectionJdbcStatement(tableNames: _*)(f)
+  }
+}
+
+/**
+ * Common tests for both binary and http mode thrift server
+ * TODO: SPARK-31914: Move common tests from subclasses to this trait
+ */
+abstract class HiveThriftServer2Test extends HiveThriftServer2TestBase {
+  test("SPARK-17819: Support default database in connection URIs") {
+    withDatabase("spark17819") { statement =>
+      statement.execute(s"CREATE DATABASE IF NOT EXISTS spark17819")
+      val jdbcStr = jdbcUri("spark17819")
+      val connection = DriverManager.getConnection(jdbcStr, user, "")
+      val statementN = connection.createStatement()
+      try {
+        val resultSet = statementN.executeQuery("select current_database()")
+        resultSet.next()
+        assert(resultSet.getString(1) === "spark17819")
+      } finally {
+        statementN.close()
+        connection.close()
+      }
+    }
+  }
 }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/JdbcConnectionUriSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/JdbcConnectionUriSuite.scala
deleted file mode 100644
index fb8a7e273ae44..0000000000000
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/JdbcConnectionUriSuite.scala
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.hive.thriftserver
-
-import java.sql.DriverManager
-
-import org.apache.hive.jdbc.HiveDriver
-
-import org.apache.spark.util.Utils
-
-class JdbcConnectionUriSuite extends HiveThriftServer2Test {
-  Utils.classForName(classOf[HiveDriver].getCanonicalName)
-
-  override def mode: ServerMode.Value = ServerMode.binary
-
-  val JDBC_TEST_DATABASE = "jdbc_test_database"
-  val USER = System.getProperty("user.name")
-  val PASSWORD = ""
-
-  override protected def beforeAll(): Unit = {
-    super.beforeAll()
-
-    val jdbcUri = s"jdbc:hive2://localhost:$serverPort/"
-    val connection = DriverManager.getConnection(jdbcUri, USER, PASSWORD)
-    val statement = connection.createStatement()
-    statement.execute(s"CREATE DATABASE $JDBC_TEST_DATABASE")
-    connection.close()
-  }
-
-  override protected def afterAll(): Unit = {
-    try {
-      val jdbcUri = s"jdbc:hive2://localhost:$serverPort/"
-      val connection = DriverManager.getConnection(jdbcUri, USER, PASSWORD)
-      val statement = connection.createStatement()
-      statement.execute(s"DROP DATABASE $JDBC_TEST_DATABASE")
-      connection.close()
-    } finally {
-      super.afterAll()
-    }
-  }
-
-  test("SPARK-17819 Support default database in connection URIs") {
-    val jdbcUri = s"jdbc:hive2://localhost:$serverPort/$JDBC_TEST_DATABASE"
-    val connection = DriverManager.getConnection(jdbcUri, USER, PASSWORD)
-    val statement = connection.createStatement()
-    try {
-      val resultSet = statement.executeQuery("select current_database()")
-      resultSet.next()
-      assert(resultSet.getString(1) === JDBC_TEST_DATABASE)
-    } finally {
-      statement.close()
-      connection.close()
-    }
-  }
-}
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
index b413b46adcaa1..bb7448293f559 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.types._
 import org.apache.spark.util.VersionUtils
 
-class SparkMetadataOperationSuite extends HiveThriftJdbcTest {
+class SparkMetadataOperationSuite extends HiveThriftServer2TestBase {
 
   override def mode: ServerMode.Value = ServerMode.binary
 
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala
index 52cf429441d16..fd4d7231e8989 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala
@@ -31,7 +31,7 @@ import org.apache.thrift.transport.TSocket
 import org.apache.spark.sql.catalyst.util.NumberConverter
 import org.apache.spark.unsafe.types.UTF8String
 
-class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest {
+class SparkThriftServerProtocolVersionsSuite extends HiveThriftServer2TestBase {
 
   override def mode: ServerMode.Value = ServerMode.binary
 
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala
index d0b829c240327..2d0edb8eb8d48 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala
@@ -32,7 +32,7 @@ import org.scalatestplus.selenium.WebBrowser
 import org.apache.spark.ui.SparkUICssErrorHandler
 
 class UISeleniumSuite
-  extends HiveThriftJdbcTest
+  extends HiveThriftServer2TestBase
   with WebBrowser with Matchers with BeforeAndAfterAll {
 
   implicit var webDriver: WebDriver = _
@@ -57,7 +57,7 @@ class UISeleniumSuite
     }
   }
 
-  override protected def serverStartCommand(port: Int) = {
+  override protected def serverStartCommand(): Seq[String] = {
     val portConf = if (mode == ServerMode.binary) {
       ConfVars.HIVE_SERVER2_THRIFT_PORT
     } else {
@@ -71,7 +71,7 @@ class UISeleniumSuite
         |  --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$warehousePath
         |  --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST}=localhost
         |  --hiveconf ${ConfVars.HIVE_SERVER2_TRANSPORT_MODE}=$mode
-        |  --hiveconf $portConf=$port
+        |  --hiveconf $portConf=0
         |  --driver-class-path ${sys.props("java.class.path")}
         |  --conf spark.ui.enabled=true
         |  --conf spark.ui.port=$uiPort

From 9160d59ae379910ca3bbd04ee25d336afff28abd Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 14 Dec 2020 15:56:46 +0900
Subject: [PATCH 0751/1009] [SPARK-33770][SQL][TESTS] Fix the `ALTER TABLE ..
 DROP PARTITION` tests that delete files out of partition path

### What changes were proposed in this pull request?
Modify the tests that add partitions with `LOCATION`, and where the number of nested folders in `LOCATION` doesn't match to the number of partitioned columns. In that case, `ALTER TABLE .. DROP PARTITION` tries to access (delete) folder out of the "base" path in `LOCATION`.

The problem belongs to Hive's MetaStore method `drop_partition_common`:
https://github.com/apache/hive/blob/8696c82d07d303b6dbb69b4d443ab6f2b241b251/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java#L4876
which tries to delete empty partition sub-folders recursively starting from the most deeper partition sub-folder up to the base folder. In the case when the number of sub-folder is not equal to the number of partitioned columns `part_vals.size()`, the method will try to list and delete folders out of the base path.

### Why are the changes needed?
To fix test failures like https://github.com/apache/spark/pull/30643#issuecomment-743774733:
```
org.apache.spark.sql.hive.execution.command.AlterTableAddPartitionSuite.ALTER TABLE .. ADD PARTITION Hive V1: SPARK-33521: universal type conversions of partition values
sbt.ForkMain$ForkError: org.apache.spark.sql.AnalysisException: org.apache.hadoop.hive.ql.metadata.HiveException: File file:/home/jenkins/workspace/SparkPullRequestBuilder/target/tmp/spark-832cb19c-65fd-41f3-ae0b-937d76c07897 does not exist;
	at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:112)
	at org.apache.spark.sql.hive.HiveExternalCatalog.dropPartitions(HiveExternalCatalog.scala:1014)
...
Caused by: sbt.ForkMain$ForkError: org.apache.hadoop.hive.metastore.api.MetaException: File file:/home/jenkins/workspace/SparkPullRequestBuilder/target/tmp/spark-832cb19c-65fd-41f3-ae0b-937d76c07897 does not exist
	at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.drop_partition_with_environment_context(HiveMetaStore.java:3381)
	at sun.reflect.GeneratedMethodAccessor304.invoke(Unknown Source)
```

The issue can be reproduced by the following steps:
1. Create a base folder, for example: `/Users/maximgekk/tmp/part-location`
2. Create a sub-folder in the base folder and drop permissions for it:
```
$ mkdir /Users/maximgekk/tmp/part-location/aaa
$ chmod a-rwx chmod a-rwx /Users/maximgekk/tmp/part-location/aaa
$ ls -al /Users/maximgekk/tmp/part-location
total 0
drwxr-xr-x   3 maximgekk  staff    96 Dec 13 18:42 .
drwxr-xr-x  33 maximgekk  staff  1056 Dec 13 18:32 ..
d---------   2 maximgekk  staff    64 Dec 13 18:42 aaa
```
3. Create a table with a partition folder in the base folder:
```sql
spark-sql> create table tbl (id int) partitioned by (part0 int, part1 int);
spark-sql> alter table tbl add partition (part0=1,part1=2) location '/Users/maximgekk/tmp/part-location/tbl';
```
4. Try to drop this partition:
```
spark-sql> alter table tbl drop partition (part0=1,part1=2);
20/12/13 18:46:07 ERROR HiveClientImpl:
======================
Attempt to drop the partition specs in table 'tbl' database 'default':
Map(part0 -> 1, part1 -> 2)
In this attempt, the following partitions have been dropped successfully:

The remaining partitions have not been dropped:
[1, 2]
======================

Error in query: org.apache.hadoop.hive.ql.metadata.HiveException: Error accessing file:/Users/maximgekk/tmp/part-location/aaa;
org.apache.spark.sql.AnalysisException: org.apache.hadoop.hive.ql.metadata.HiveException: Error accessing file:/Users/maximgekk/tmp/part-location/aaa;
```
The command fails because it tries to access to the sub-folder `aaa` that is out of the partition path `/Users/maximgekk/tmp/part-location/tbl`.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running the affected tests from local IDEA which does not have access to folders out of partition paths.

Closes #30752 from MaxGekk/fix-drop-partition-location.

Lead-authored-by: Max Gekk <max.gekk@gmail.com>
Co-authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../sql/catalyst/catalog/ExternalCatalogSuite.scala  |  9 +++++++--
 .../command/AlterTableAddPartitionSuiteBase.scala    |  2 +-
 .../org/apache/spark/sql/hive/StatisticsSuite.scala  | 12 ++++++++----
 .../spark/sql/hive/execution/HiveDDLSuite.scala      |  4 ++--
 4 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
index 55712d0da518d..d310538e302de 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
@@ -408,8 +408,8 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
       partitionColumnNames = Seq("partCol1", "partCol2"))
     catalog.createTable(table, ignoreIfExists = false)
 
-    val newLocationPart1 = newUriForDatabase()
-    val newLocationPart2 = newUriForDatabase()
+    val newLocationPart1 = newUriForPartition(Seq("p1=1", "p2=2"))
+    val newLocationPart2 = newUriForPartition(Seq("p1=3", "p2=4"))
 
     val partition1 =
       CatalogTablePartition(Map("partCol1" -> "1", "partCol2" -> "2"),
@@ -991,6 +991,11 @@ abstract class CatalogTestUtils {
 
   def newUriForDatabase(): URI = new URI(Utils.createTempDir().toURI.toString.stripSuffix("/"))
 
+  def newUriForPartition(parts: Seq[String]): URI = {
+    val path = parts.foldLeft(Utils.createTempDir())(new java.io.File(_, _))
+    new URI(path.toURI.toString.stripSuffix("/"))
+  }
+
   def newDb(name: String): CatalogDatabase = {
     CatalogDatabase(name, name + " description", newUriForDatabase(), Map.empty)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
index 9d2c58b7e4351..2457bb9f8b57c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
@@ -154,7 +154,7 @@ trait AlterTableAddPartitionSuiteBase extends QueryTest with SQLTestUtils {
         |  part8 = '2020-11-23',
         |  part9 = '2020-11-23 22:13:10.123456'
         |""".stripMargin
-      sql(s"ALTER TABLE $t ADD PARTITION ($partSpec) LOCATION 'loc1'")
+      sql(s"ALTER TABLE $t ADD PARTITION ($partSpec)")
       val expected = Map(
         "part0" -> "-1",
         "part1" -> "0",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index 2e98a76c52488..5357f4b63d794 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -983,12 +983,16 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
           assert(fetched1.get.colStats.size == 2)
 
           withTempPaths(numPaths = 2) { case Seq(dir1, dir2) =>
-            val file1 = new File(dir1 + "/data")
+            val partDir1 = new File(new File(dir1, "ds=2008-04-09"), "hr=11")
+            val file1 = new File(partDir1, "data")
+            file1.getParentFile.mkdirs()
             Utils.tryWithResource(new PrintWriter(file1)) { writer =>
               writer.write("1,a")
             }
 
-            val file2 = new File(dir2 + "/data")
+            val partDir2 = new File(new File(dir2, "ds=2008-04-09"), "hr=12")
+            val file2 = new File(partDir2, "data")
+            file2.getParentFile.mkdirs()
             Utils.tryWithResource(new PrintWriter(file2)) { writer =>
               writer.write("1,a")
             }
@@ -997,8 +1001,8 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
             sql(
               s"""
                  |ALTER TABLE $table ADD
-                 |PARTITION (ds='2008-04-09', hr='11') LOCATION '${dir1.toURI.toString}'
-                 |PARTITION (ds='2008-04-09', hr='12') LOCATION '${dir2.toURI.toString}'
+                 |PARTITION (ds='2008-04-09', hr='11') LOCATION '${partDir1.toURI.toString}'
+                 |PARTITION (ds='2008-04-09', hr='12') LOCATION '${partDir1.toURI.toString}'
             """.stripMargin)
             if (autoUpdate) {
               val fetched2 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = None)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 9f75f8797fe37..a6c40851b1c4e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -601,8 +601,8 @@ class HiveDDLSuite
     val tab = "tab_with_partitions"
     withTempDir { tmpDir =>
       val basePath = new File(tmpDir.getCanonicalPath)
-      val part1Path = new File(basePath + "/part1")
-      val part2Path = new File(basePath + "/part2")
+      val part1Path = new File(new File(basePath, "part10"), "part11")
+      val part2Path = new File(new File(basePath, "part20"), "part21")
       val dirSet = part1Path :: part2Path :: Nil
 
       // Before data insertion, all the directory are empty

From 817f58ddcb775dacbe1b4b2b99056a74a56f65e9 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 14 Dec 2020 08:16:33 +0000
Subject: [PATCH 0752/1009] [SPARK-33768][SQL] Remove `retainData` from
 `AlterTableDropPartition`

### What changes were proposed in this pull request?
Remove the `retainData` parameter from the logical node `AlterTableDropPartition`.

### Why are the changes needed?
The `AlterTableDropPartition` command reflects the sql statement (see SqlBase.g4):
```
    | ALTER (TABLE | VIEW) multipartIdentifier
        DROP (IF EXISTS)? partitionSpec (',' partitionSpec)* PURGE?    #dropTablePartitions
```
but Spark doesn't allow to specify data retention. So, the parameter can be removed to improve code maintenance.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running the test suite `DDLParserSuite`.

Closes #30748 from MaxGekk/remove-retainData.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/catalyst/analysis/CheckAnalysis.scala  | 2 +-
 .../spark/sql/catalyst/analysis/ResolvePartitionSpec.scala  | 2 +-
 .../org/apache/spark/sql/catalyst/parser/AstBuilder.scala   | 3 +--
 .../spark/sql/catalyst/plans/logical/v2Commands.scala       | 3 +--
 .../apache/spark/sql/catalyst/parser/DDLParserSuite.scala   | 6 ++----
 .../spark/sql/catalyst/analysis/ResolveSessionCatalog.scala | 4 ++--
 .../sql/execution/datasources/v2/DataSourceV2Strategy.scala | 2 +-
 7 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 5d4dc21810281..c8e137e9c18ac 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -586,7 +586,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
           case AlterTableAddPartition(ResolvedTable(_, _, table), parts, _) =>
             checkAlterTablePartition(table, parts)
 
-          case AlterTableDropPartition(ResolvedTable(_, _, table), parts, _, _, _) =>
+          case AlterTableDropPartition(ResolvedTable(_, _, table), parts, _, _) =>
             checkAlterTablePartition(table, parts)
 
           case showPartitions: ShowPartitions => checkShowPartitions(showPartitions)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
index 099ac6172c9e6..35e4820cd710b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
@@ -43,7 +43,7 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] {
         requireExactMatchedPartitionSpec(table.name, _, partitionSchema.fieldNames)))
 
     case r @ AlterTableDropPartition(
-        ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs, _, _, _) =>
+        ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs, _, _) =>
       val partitionSchema = table.partitionSchema()
       r.copy(parts = resolvePartitionSpecs(
         table.name,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index a6df7690c7e47..a7bb2179767c8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3791,8 +3791,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
         "ALTER TABLE ... DROP PARTITION ..."),
       partSpecs.toSeq,
       ifExists = ctx.EXISTS != null,
-      purge = ctx.PURGE != null,
-      retainData = false)
+      purge = ctx.PURGE != null)
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index 1a37630a48461..9446fe383dd9d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -666,8 +666,7 @@ case class AlterTableDropPartition(
     child: LogicalPlan,
     parts: Seq[PartitionSpec],
     ifExists: Boolean,
-    purge: Boolean,
-    retainData: Boolean) extends Command {
+    purge: Boolean) extends Command {
   override lazy val resolved: Boolean =
     childrenResolved && parts.forall(_.isInstanceOf[ResolvedPartitionSpec])
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index b860571df0791..481d7504dda3d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -2143,8 +2143,7 @@ class DDLParserSuite extends AnalysisTest {
         UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us")),
         UnresolvedPartitionSpec(Map("dt" -> "2009-09-09", "country" -> "uk"))),
       ifExists = true,
-      purge = false,
-      retainData = false)
+      purge = false)
     val expected2_table = expected1_table.copy(ifExists = false)
     val expected1_purge = expected1_table.copy(purge = true)
 
@@ -2157,8 +2156,7 @@ class DDLParserSuite extends AnalysisTest {
       UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... DROP PARTITION ..."),
       Seq(UnresolvedPartitionSpec(Map("ds" -> "2017-06-10"))),
       ifExists = true,
-      purge = false,
-      retainData = false)
+      purge = false)
 
     val parsed3_table = parsePlan(sql3_table)
     comparePlans(parsed3_table, expected3_table)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 6e06cb3a1f928..2449f73112bf4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -469,13 +469,13 @@ class ResolveSessionCatalog(
         to)
 
     case AlterTableDropPartition(
-        ResolvedV1TableIdentifier(ident), specs, ifExists, purge, retainData) =>
+        ResolvedV1TableIdentifier(ident), specs, ifExists, purge) =>
       AlterTableDropPartitionCommand(
         ident.asTableIdentifier,
         specs.asUnresolvedPartitionSpecs.map(_.spec),
         ifExists,
         purge,
-        retainData)
+        retainData = false)
 
     case AlterTableSerDePropertiesStatement(tbl, serdeClassName, serdeProperties, partitionSpec) =>
       val v1TableName = parseV1Table(tbl, "ALTER TABLE SerDe Properties")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 7d278c33b97fc..ea6ac6ca92aa0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -316,7 +316,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
         table, parts.asResolvedPartitionSpecs, ignoreIfExists) :: Nil
 
     case AlterTableDropPartition(
-        ResolvedTable(_, _, table: SupportsPartitionManagement), parts, ignoreIfNotExists, _, _) =>
+        ResolvedTable(_, _, table: SupportsPartitionManagement), parts, ignoreIfNotExists, _) =>
       AlterTableDropPartitionExec(
         table, parts.asResolvedPartitionSpecs, ignoreIfNotExists) :: Nil
 

From e7fe92f12991ce4ccc101c2cc01354201c9c5384 Mon Sep 17 00:00:00 2001
From: "xuewei.linxuewei" <xuewei.linxuewei@alibaba-inc.com>
Date: Mon, 14 Dec 2020 08:27:18 +0000
Subject: [PATCH 0753/1009] [SPARK-33546][SQL] Enable row format file format
 validation in CREATE TABLE LIKE

### What changes were proposed in this pull request?

[SPARK-33546] stated the there are three inconsistency behaviors for CREATE TABLE LIKE.

1. CREATE TABLE LIKE does not validate the user-specified hive serde. e.g., STORED AS PARQUET can't be used with ROW FORMAT SERDE.
2. CREATE TABLE LIKE requires STORED AS and ROW FORMAT SERDE to be specified together, which is not necessary.
3. CREATE TABLE LIKE does not respect the default hive serde.

This PR fix No.1, and after investigate, No.2 and No.3 turn out not to be issue.

Within Hive.

CREATE TABLE abc ... ROW FORMAT SERDE 'xxx.xxx.SerdeClass' (Without Stored as) will have
following result. Using the user specific SerdeClass and fetch default input/output format from default textfile format.

```
SerDe Library:          xxx.xxx.SerdeClass
InputFormat:            org.apache.hadoop.mapred.TextInputFormat
OutputFormat:           org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
```

But for
CREATE TABLE dst LIKE src ROW FORMAT SERDE 'xxx.xxx.SerdeClass' (Without Stored as) will just ignore user specific SerdeClass and using (input, output, serdeClass) from src table.

It's better to just throw an exception on such ambiguous behavior, so No.2 is not an issue, but in the PR, we add some comments.

For No.3, in fact, CreateTableLikeCommand is using following logical to try to follow src table's storageFormat if current fileFormat.inputFormat is empty

```
val newStorage = if (fileFormat.inputFormat.isDefined) {
      fileFormat
    } else {
      sourceTableDesc.storage.copy(locationUri = fileFormat.locationUri)
    }
```

If we try to fill the new target table with HiveSerDe.getDefaultStorage if file format and row format is not explicity spefified, it will break the CREATE TABLE LIKE semantic.

### Why are the changes needed?

Bug Fix.

### Does this PR introduce any user-facing change?

No.

### How was this patch tested?

Added UT and Existing UT.

Closes #30705 from leanken/leanken-SPARK-33546.

Authored-by: xuewei.linxuewei <xuewei.linxuewei@alibaba-inc.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/parser/AstBuilder.scala      |   5 +-
 .../spark/sql/execution/SparkSqlParser.scala  |   9 +-
 .../sql/hive/execution/HiveDDLSuite.scala     | 130 ++++++++++++++----
 3 files changed, 108 insertions(+), 36 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index a7bb2179767c8..660d617a07b44 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -2956,9 +2956,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
   protected def getSerdeInfo(
       rowFormatCtx: Seq[RowFormatContext],
       createFileFormatCtx: Seq[CreateFileFormatContext],
-      ctx: ParserRuleContext,
-      skipCheck: Boolean = false): Option[SerdeInfo] = {
-    if (!skipCheck) validateRowFormatFileFormat(rowFormatCtx, createFileFormatCtx, ctx)
+      ctx: ParserRuleContext): Option[SerdeInfo] = {
+    validateRowFormatFileFormat(rowFormatCtx, createFileFormatCtx, ctx)
     val rowFormatSerdeInfo = rowFormatCtx.map(visitRowFormat)
     val fileFormatSerdeInfo = createFileFormatCtx.map(visitCreateFileFormat)
     (fileFormatSerdeInfo ++ rowFormatSerdeInfo).reduceLeftOption((l, r) => l.merge(r))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index ba5874c21f6c4..3ca3461dfbd47 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -447,14 +447,16 @@ class SparkSqlAstBuilder extends AstBuilder {
     checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
     val provider = ctx.tableProvider.asScala.headOption.map(_.multipartIdentifier.getText)
     val location = visitLocationSpecList(ctx.locationSpec())
-    // TODO: Do not skip serde check for CREATE TABLE LIKE.
     val serdeInfo = getSerdeInfo(
-      ctx.rowFormat.asScala.toSeq, ctx.createFileFormat.asScala.toSeq, ctx, skipCheck = true)
+      ctx.rowFormat.asScala.toSeq, ctx.createFileFormat.asScala.toSeq, ctx)
     if (provider.isDefined && serdeInfo.isDefined) {
       operationNotAllowed(s"CREATE TABLE LIKE ... USING ... ${serdeInfo.get.describe}", ctx)
     }
 
-    // TODO: remove this restriction as it seems unnecessary.
+    // For "CREATE TABLE dst LIKE src ROW FORMAT SERDE xxx" which doesn't specify the file format,
+    // it's a bit weird to use the default file format, but it's also weird to get file format
+    // from the source table while the serde class is user-specified.
+    // Here we require both serde and format to be specified, to avoid confusion.
     serdeInfo match {
       case Some(SerdeInfo(storedAs, formatClasses, serde, _)) =>
         if (storedAs.isEmpty && formatClasses.isEmpty && serde.isDefined) {
@@ -463,7 +465,6 @@ class SparkSqlAstBuilder extends AstBuilder {
       case _ =>
     }
 
-    // TODO: also look at `HiveSerDe.getDefaultStorage`.
     val storage = toStorageFormat(location, serdeInfo, ctx)
     val properties = Option(ctx.tableProps).map(visitPropertyKeyValues).getOrElse(Map.empty)
     CreateTableLikeCommand(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index a6c40851b1c4e..b686d040b9644 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.hive.execution
 
 import java.io.File
 import java.net.URI
+import java.util.Locale
 
 import org.apache.hadoop.fs.Path
 import org.apache.parquet.format.converter.ParquetMetadataConverter.NO_FILTER
@@ -2771,7 +2772,7 @@ class HiveDDLSuite
 
   test("Create Table LIKE with row format") {
     val catalog = spark.sessionState.catalog
-    withTable("sourceHiveTable", "sourceDsTable", "targetHiveTable1", "targetHiveTable2") {
+    withTable("sourceHiveTable", "sourceDsTable") {
       sql("CREATE TABLE sourceHiveTable(a INT, b INT) STORED AS PARQUET")
       sql("CREATE TABLE sourceDsTable(a INT, b INT) USING PARQUET")
 
@@ -2817,34 +2818,6 @@ class HiveDDLSuite
             """.stripMargin)
       }.getMessage
       assert(e.contains("Operation not allowed: CREATE TABLE LIKE ... USING ... STORED AS"))
-
-      // row format works with STORED AS hive format (from hive table)
-      spark.sql(
-        """
-          |CREATE TABLE targetHiveTable1 LIKE sourceHiveTable STORED AS PARQUET
-          |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-          |WITH SERDEPROPERTIES ('test' = 'test')
-          """.stripMargin)
-      var table = catalog.getTableMetadata(TableIdentifier("targetHiveTable1"))
-      assert(table.provider === Some("hive"))
-      assert(table.storage.inputFormat ===
-        Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"))
-      assert(table.storage.serde === Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
-      assert(table.storage.properties("test") == "test")
-
-      // row format works with STORED AS hive format (from datasource table)
-      spark.sql(
-        """
-          |CREATE TABLE targetHiveTable2 LIKE sourceDsTable STORED AS PARQUET
-          |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-          |WITH SERDEPROPERTIES ('test' = 'test')
-          """.stripMargin)
-      table = catalog.getTableMetadata(TableIdentifier("targetHiveTable2"))
-      assert(table.provider === Some("hive"))
-      assert(table.storage.inputFormat ===
-        Some("org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat"))
-      assert(table.storage.serde === Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
-      assert(table.storage.properties("test") == "test")
     }
   }
 
@@ -2872,4 +2845,103 @@ class HiveDDLSuite
       assert(sql("SELECT * FROM t2 WHERE c = 'A'").collect().isEmpty)
     }
   }
+
+  test("SPARK-33546: CREATE TABLE LIKE should validate row format & file format") {
+    val catalog = spark.sessionState.catalog
+    withTable("sourceHiveTable", "sourceDsTable") {
+      sql("CREATE TABLE sourceHiveTable(a INT, b INT) STORED AS PARQUET")
+      sql("CREATE TABLE sourceDsTable(a INT, b INT) USING PARQUET")
+
+      // ROW FORMAT SERDE ... STORED AS [SEQUENCEFILE | RCFILE | TEXTFILE]
+      val allowSerdeFileFormats = Seq("TEXTFILE", "SEQUENCEFILE", "RCFILE")
+      Seq("sourceHiveTable", "sourceDsTable").foreach { sourceTable =>
+        allowSerdeFileFormats.foreach { format =>
+          withTable("targetTable") {
+            spark.sql(
+              s"""
+                 |CREATE TABLE targetTable LIKE $sourceTable
+                 |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+                 |STORED AS $format
+             """.stripMargin)
+
+            val expectedSerde = HiveSerDe.sourceToSerDe(format)
+            val table = catalog.getTableMetadata(TableIdentifier("targetTable", Some("default")))
+            assert(table.provider === Some("hive"))
+            assert(table.storage.inputFormat === Some(expectedSerde.get.inputFormat.get))
+            assert(table.storage.outputFormat === Some(expectedSerde.get.outputFormat.get))
+            assert(table.storage.serde ===
+              Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
+          }
+        }
+
+        // negative case
+        hiveFormats.filterNot(allowSerdeFileFormats.contains(_)).foreach { format =>
+          withTable("targetTable") {
+            val ex = intercept[AnalysisException] {
+              spark.sql(
+                s"""
+                   |CREATE TABLE targetTable LIKE $sourceTable
+                   |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+                   |STORED AS $format
+             """.stripMargin)
+            }.getMessage
+            assert(ex.contains(
+              s"ROW FORMAT SERDE is incompatible with format '${format.toLowerCase(Locale.ROOT)}'"))
+          }
+        }
+      }
+
+      // ROW FORMAT DELIMITED ... STORED AS TEXTFILE
+      Seq("sourceHiveTable", "sourceDsTable").foreach { sourceTable =>
+        withTable("targetTable") {
+          spark.sql(
+            s"""
+               |CREATE TABLE targetTable LIKE $sourceTable
+               |ROW FORMAT DELIMITED
+               |STORED AS TEXTFILE
+             """.stripMargin)
+
+          val expectedSerde = HiveSerDe.sourceToSerDe("TEXTFILE")
+          val table = catalog.getTableMetadata(TableIdentifier("targetTable", Some("default")))
+          assert(table.provider === Some("hive"))
+          assert(table.storage.inputFormat === Some(expectedSerde.get.inputFormat.get))
+          assert(table.storage.outputFormat === Some(expectedSerde.get.outputFormat.get))
+          assert(table.storage.serde === Some(expectedSerde.get.serde.get))
+
+          // negative case
+          val ex = intercept[AnalysisException] {
+            spark.sql(
+              s"""
+                 |CREATE TABLE targetTable LIKE $sourceTable
+                 |ROW FORMAT DELIMITED
+                 |STORED AS PARQUET
+             """.stripMargin)
+          }.getMessage
+          assert(ex.contains("ROW FORMAT DELIMITED is only compatible with 'textfile'"))
+        }
+      }
+
+      // ROW FORMAT ... STORED AS INPUTFORMAT ... OUTPUTFORMAT ...
+      hiveFormats.foreach { tableType =>
+        val expectedSerde = HiveSerDe.sourceToSerDe(tableType)
+        Seq("sourceHiveTable", "sourceDsTable").foreach { sourceTable =>
+          withTable("targetTable") {
+            spark.sql(
+              s"""
+                 |CREATE TABLE targetTable LIKE $sourceTable
+                 |ROW FORMAT SERDE '${expectedSerde.get.serde.get}'
+                 |STORED AS INPUTFORMAT '${expectedSerde.get.inputFormat.get}'
+                 |OUTPUTFORMAT '${expectedSerde.get.outputFormat.get}'
+               """.stripMargin)
+
+            val table = catalog.getTableMetadata(TableIdentifier("targetTable", Some("default")))
+            assert(table.provider === Some("hive"))
+            assert(table.storage.inputFormat === Some(expectedSerde.get.inputFormat.get))
+            assert(table.storage.outputFormat === Some(expectedSerde.get.outputFormat.get))
+            assert(table.storage.serde === Some(expectedSerde.get.serde.get))
+          }
+        }
+      }
+    }
+  }
 }

From b7c82101352078fb10ab1822bc745c8b4fbb2590 Mon Sep 17 00:00:00 2001
From: Linhong Liu <linhong.liu@databricks.com>
Date: Mon, 14 Dec 2020 08:31:50 +0000
Subject: [PATCH 0754/1009] [SPARK-33142][SPARK-33647][SQL][FOLLOW-UP] Add docs
 and test cases

### What changes were proposed in this pull request?
Addressed comments in PR #30567, including:
1. add test case for SPARK-33647 and SPARK-33142
2. add migration guide
3. add `getRawTempView` and `getRawGlobalTempView` to return the raw view info (i.e. TemporaryViewRelation)
4. other minor code clean

### Why are the changes needed?
Code clean and more test cases

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing and newly added test cases

Closes #30666 from linhongliu-db/SPARK-33142-followup.

Lead-authored-by: Linhong Liu <linhong.liu@databricks.com>
Co-authored-by: Linhong Liu <67896261+linhongliu-db@users.noreply.github.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide.md                   |  4 +-
 .../sql/catalyst/catalog/SessionCatalog.scala | 44 +++++++++++++++----
 .../plans/logical/basicLogicalOperators.scala | 16 -------
 .../spark/sql/execution/command/views.scala   | 16 ++-----
 .../apache/spark/sql/CachedTableSuite.scala   | 13 ++++++
 .../spark/sql/execution/SQLViewSuite.scala    | 14 ------
 .../sql/execution/SQLViewTestSuite.scala      | 24 +++++++++-
 7 files changed, 79 insertions(+), 52 deletions(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 484823b7c07ab..4b6c2266387f5 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -58,7 +58,9 @@ license: |
 
   - In Spark 3.1, refreshing a table will trigger an uncache operation for all other caches that reference the table, even if the table itself is not cached. In Spark 3.0 the operation will only be triggered if the table itself is cached.
   
-  - In Spark 3.1, creating or altering a view will capture runtime SQL configs and store them as view properties. These configs will be applied during the parsing and analysis phases of the view resolution. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.useCurrentConfigsForView` to `true`.
+  - In Spark 3.1, creating or altering a permanent view will capture runtime SQL configs and store them as view properties. These configs will be applied during the parsing and analysis phases of the view resolution. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.useCurrentConfigsForView` to `true`.
+
+  - In Spark 3.1, the temporary view will have same behaviors with the permanent view, i.e. capture and store runtime SQL configs, SQL text, catalog and namespace. The capatured view properties will be applied during the parsing and analysis phases of the view resolution. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.storeAnalyzedPlanForView` to `true`.
 
   - Since Spark 3.1, CHAR/CHARACTER and VARCHAR types are supported in the table schema. Table scan/insertion will respect the char/varchar semantic. If char/varchar is used in places other than table schema, an exception will be thrown (CAST is an exception that simply treats char/varchar as string like before). To restore the behavior before Spark 3.1, which treats them as STRING types and ignores a length parameter, e.g. `CHAR(4)`, you can set `spark.sql.legacy.charVarcharAsString` to `true`.
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 7c805bdb4b6f1..9814f4b3aa75b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -610,8 +610,16 @@ class SessionCatalog(
   /**
    * Return a local temporary view exactly as it was stored.
    */
+  def getRawTempView(name: String): Option[LogicalPlan] = synchronized {
+    tempViews.get(formatTableName(name))
+  }
+
+  /**
+   * Generate a [[View]] operator from the view description if the view stores sql text,
+   * otherwise, it is same to `getRawTempView`
+   */
   def getTempView(name: String): Option[LogicalPlan] = synchronized {
-    tempViews.get(formatTableName(name)).map(getTempViewPlan)
+    getRawTempView(name).map(getTempViewPlan)
   }
 
   def getTempViewNames(): Seq[String] = synchronized {
@@ -621,8 +629,16 @@ class SessionCatalog(
   /**
    * Return a global temporary view exactly as it was stored.
    */
+  def getRawGlobalTempView(name: String): Option[LogicalPlan] = {
+    globalTempViewManager.get(formatTableName(name))
+  }
+
+  /**
+   * Generate a [[View]] operator from the view description if the view stores sql text,
+   * otherwise, it is same to `getRawGlobalTempView`
+   */
   def getGlobalTempView(name: String): Option[LogicalPlan] = {
-    globalTempViewManager.get(formatTableName(name)).map(getTempViewPlan)
+    getRawGlobalTempView(name).map(getTempViewPlan)
   }
 
   /**
@@ -659,7 +675,7 @@ class SessionCatalog(
   def getTempViewOrPermanentTableMetadata(name: TableIdentifier): CatalogTable = synchronized {
     val table = formatTableName(name.table)
     if (name.database.isEmpty) {
-      getTempView(table).map {
+      tempViews.get(table).map {
         case TemporaryViewRelation(metadata) => metadata
         case plan =>
           CatalogTable(
@@ -669,7 +685,6 @@ class SessionCatalog(
             schema = plan.output.toStructType)
       }.getOrElse(getTableMetadata(name))
     } else if (formatDatabaseName(name.database.get) == globalTempViewManager.database) {
-      val a = globalTempViewManager.get(table)
       globalTempViewManager.get(table).map {
         case TemporaryViewRelation(metadata) => metadata
         case plan =>
@@ -810,21 +825,34 @@ class SessionCatalog(
       // The relation is a view, so we wrap the relation by:
       // 1. Add a [[View]] operator over the relation to keep track of the view desc;
       // 2. Wrap the logical plan in a [[SubqueryAlias]] which tracks the name of the view.
-      val child = View.fromCatalogTable(metadata, isTempView = false, parser)
-      SubqueryAlias(multiParts, child)
+      SubqueryAlias(multiParts, fromCatalogTable(metadata, isTempView = false))
     } else {
       SubqueryAlias(multiParts, UnresolvedCatalogRelation(metadata, options))
     }
   }
 
-  def getTempViewPlan(plan: LogicalPlan): LogicalPlan = {
+  private def getTempViewPlan(plan: LogicalPlan): LogicalPlan = {
     plan match {
       case viewInfo: TemporaryViewRelation =>
-        View.fromCatalogTable(viewInfo.tableMeta, isTempView = true, parser)
+        fromCatalogTable(viewInfo.tableMeta, isTempView = true)
       case v => v
     }
   }
 
+  private def fromCatalogTable(metadata: CatalogTable, isTempView: Boolean): View = {
+    val viewText = metadata.viewText.getOrElse(sys.error("Invalid view without text."))
+    val viewConfigs = metadata.viewSQLConfigs
+    val viewPlan =
+      SQLConf.withExistingConf(View.effectiveSQLConf(viewConfigs, isTempView = isTempView)) {
+        parser.parsePlan(viewText)
+      }
+    View(
+      desc = metadata,
+      isTempView = isTempView,
+      output = metadata.schema.toAttributes,
+      child = viewPlan)
+  }
+
   def lookupTempView(table: String): Option[SubqueryAlias] = {
     val formattedTable = formatTableName(table)
     getTempView(formattedTable).map { view =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 0e4bfa4dc34da..91fb77574a0ca 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -22,7 +22,6 @@ import org.apache.spark.sql.catalyst.analysis.{EliminateView, MultiInstanceRelat
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
-import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, RangePartitioning, RoundRobinPartitioning}
 import org.apache.spark.sql.catalyst.util.truncatedString
@@ -485,21 +484,6 @@ object View {
     }
     sqlConf
   }
-
-  def fromCatalogTable(
-      metadata: CatalogTable, isTempView: Boolean, parser: ParserInterface): View = {
-    val viewText = metadata.viewText.getOrElse(sys.error("Invalid view without text."))
-    val viewConfigs = metadata.viewSQLConfigs
-    val viewPlan =
-      SQLConf.withExistingConf(effectiveSQLConf(viewConfigs, isTempView = isTempView)) {
-        parser.parsePlan(viewText)
-      }
-    View(
-      desc = metadata,
-      isTempView = isTempView,
-      output = metadata.schema.toAttributes,
-      child = viewPlan)
-  }
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index 06b1e03adea50..6f32f9d2bfcbe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -113,12 +113,8 @@ case class CreateViewCommand(
     verifyTemporaryObjectsNotExists(catalog, isTemporary, name, child)
 
     if (viewType == LocalTempView) {
-      val shouldUncache = replace && catalog.getTempView(name.table).exists {
-        // Uncache View logical plan without checking the same result check, since it's unresolved.
-        case _: View => true
-        case other => !other.sameResult(child)
-      }
-      if (shouldUncache) {
+      if (replace && catalog.getRawTempView(name.table).isDefined &&
+          !catalog.getRawTempView(name.table).get.sameResult(child)) {
         logInfo(s"Try to uncache ${name.quotedString} before replacing.")
         checkCyclicViewReference(analyzedPlan, Seq(name), name)
         CommandUtils.uncacheTableOrView(sparkSession, name.quotedString)
@@ -141,12 +137,8 @@ case class CreateViewCommand(
     } else if (viewType == GlobalTempView) {
       val db = sparkSession.sessionState.conf.getConf(StaticSQLConf.GLOBAL_TEMP_DATABASE)
       val viewIdent = TableIdentifier(name.table, Option(db))
-      val shouldUncache = replace && catalog.getGlobalTempView(name.table).exists {
-        // Uncache View logical plan without checking the same result check, since it's unresolved.
-        case _: View => true
-        case other => !other.sameResult(child)
-      }
-      if (shouldUncache) {
+      if (replace && catalog.getRawGlobalTempView(name.table).isDefined &&
+          !catalog.getRawGlobalTempView(name.table).get.sameResult(child)) {
         logInfo(s"Try to uncache ${viewIdent.quotedString} before replacing.")
         checkCyclicViewReference(analyzedPlan, Seq(viewIdent), viewIdent)
         CommandUtils.uncacheTableOrView(sparkSession, viewIdent.quotedString)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index a3a6d6721c993..af8d72309bdea 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -1272,4 +1272,17 @@ class CachedTableSuite extends QueryTest with SQLTestUtils
       }
     }
   }
+
+  test("SPARK-33647: cache table support for permanent view") {
+    withView("v1") {
+      spark.catalog.clearCache()
+      sql("create or replace view v1 as select 1")
+      sql("cache table v1")
+      assert(spark.sharedState.cacheManager.lookupCachedData(sql("select 1")).isDefined)
+      sql("create or replace view v1 as select 1, 2")
+      assert(spark.sharedState.cacheManager.lookupCachedData(sql("select 1")).isEmpty)
+      sql("cache table v1")
+      assert(spark.sharedState.cacheManager.lookupCachedData(sql("select 1, 2")).isDefined)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index 7595ae0ec7a53..50db986490033 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -812,20 +812,6 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
     }
   }
 
-  test("creating local temp view should not affect existing table reference") {
-    withTable("t") {
-      withTempView("t") {
-        withGlobalTempView("v") {
-          val globalTempDB = spark.sharedState.globalTempViewManager.database
-          Seq(2).toDF("c1").write.format("parquet").saveAsTable("t")
-          sql("CREATE GLOBAL TEMPORARY VIEW v AS SELECT * FROM t")
-          sql("CREATE TEMPORARY VIEW t AS SELECT 1")
-          checkAnswer(sql(s"SELECT * FROM ${globalTempDB}.v"), Seq(Row(2)))
-        }
-      }
-    }
-  }
-
   test("SPARK-33141: view should be parsed and analyzed with configs set when creating") {
     withTable("t") {
       withView("v1", "v2", "v3", "v4", "v5") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
index 3a7a63ed45ce3..8c3d92358a975 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
@@ -200,6 +200,29 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils {
     }
   }
 
+  test("view should use captured catalog and namespace to resolve relation") {
+    withTempDatabase { dbName =>
+      withTable("default.t", s"$dbName.t") {
+        withTempView("t") {
+          // create a table in default database
+          sql("USE DEFAULT")
+          Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t")
+          // create a view refer the created table in default database
+          val viewName = createView("v1", "SELECT * FROM t")
+          // using another database to create a table with same name
+          sql(s"USE $dbName")
+          Seq(4, 5, 6).toDF("c1").write.format("parquet").saveAsTable("t")
+          // create a temporary view with the same name
+          sql("CREATE TEMPORARY VIEW t AS SELECT 1")
+          withView(viewName) {
+            // view v1 should still refer the table defined in `default` database
+            checkViewOutput(viewName, Seq(Row(2), Row(3), Row(1)))
+          }
+        }
+      }
+    }
+  }
+
   test("SPARK-33692: view should use captured catalog and namespace to lookup function") {
     val avgFuncClass = "test.org.apache.spark.sql.MyDoubleAvg"
     val sumFuncClass = "test.org.apache.spark.sql.MyDoubleSum"
@@ -231,7 +254,6 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils {
 class LocalTempViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
   override protected def viewTypeString: String = "TEMPORARY VIEW"
   override protected def formattedViewName(viewName: String): String = viewName
-
 }
 
 class GlobalTempViewTestSuite extends SQLViewTestSuite with SharedSparkSession {

From a84c8d842ca027ab0f1b641146e81fc2782d150d Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Mon, 14 Dec 2020 08:39:01 +0000
Subject: [PATCH 0755/1009] [SPARK-33751][SQL] Migrate ALTER VIEW ... AS
 command to use UnresolvedView to resolve the identifier

### What changes were proposed in this pull request?

This PR migrates `ALTER VIEW ... AS` to use `UnresolvedView` to resolve the view identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing).

The `TempViewOrV1Table` extractor in `ResolveSessionCatalog.scala` can now be removed as well.

### Why are the changes needed?

To use `UnresolvedView` for view resolution.

### Does this PR introduce _any_ user-facing change?

The exception message changes if a table is found instead of view:
```
// OLD
`tab1` is not a view"
```
```
// NEW
"tab1 is a table. 'ALTER VIEW ... AS' expects a view."
```

### How was this patch tested?

Updated existing tests.

Closes #30723 from imback82/alter_view_as_statement.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/parser/AstBuilder.scala      |  8 +++++---
 .../catalyst/plans/logical/statements.scala   |  8 --------
 .../catalyst/plans/logical/v2Commands.scala   | 10 ++++++++++
 .../sql/catalyst/parser/DDLParserSuite.scala  |  6 ++++--
 .../analysis/ResolveSessionCatalog.scala      | 19 ++-----------------
 .../spark/sql/execution/command/views.scala   |  3 ---
 .../sql/connector/DataSourceV2SQLSuite.scala  | 13 +++++--------
 .../spark/sql/execution/SQLViewSuite.scala    | 11 ++++++++---
 8 files changed, 34 insertions(+), 44 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 660d617a07b44..1bebf025cc795 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3869,7 +3869,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
   }
 
   /**
-   * Alter the query of a view. This creates a [[AlterViewAsStatement]]
+   * Alter the query of a view. This creates a [[AlterViewAs]]
    *
    * For example:
    * {{{
@@ -3877,8 +3877,10 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    * }}}
    */
   override def visitAlterViewQuery(ctx: AlterViewQueryContext): LogicalPlan = withOrigin(ctx) {
-    AlterViewAsStatement(
-      visitMultipartIdentifier(ctx.multipartIdentifier),
+    AlterViewAs(
+      UnresolvedView(
+        visitMultipartIdentifier(ctx.multipartIdentifier),
+        "ALTER VIEW ... AS"),
       originalText = source(ctx.query),
       query = plan(ctx.query))
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
index d628bc914dba7..a0e11962f9c05 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
@@ -315,14 +315,6 @@ case class AlterTableSerDePropertiesStatement(
     serdeProperties: Option[Map[String, String]],
     partitionSpec: Option[TablePartitionSpec]) extends ParsedStatement
 
-/**
- * ALTER VIEW ... Query command, as parsed from SQL.
- */
-case class AlterViewAsStatement(
-    viewName: Seq[String],
-    originalText: String,
-    query: LogicalPlan) extends ParsedStatement
-
 /**
  * An INSERT INTO statement, as parsed from SQL.
  *
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index 9446fe383dd9d..0f35674055dc4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -741,6 +741,16 @@ case class RepairTable(child: LogicalPlan) extends Command {
   override def children: Seq[LogicalPlan] = child :: Nil
 }
 
+/**
+ * The logical plan of the ALTER VIEW ... AS command.
+ */
+case class AlterViewAs(
+    child: LogicalPlan,
+    originalText: String,
+    query: LogicalPlan) extends Command {
+  override def children: Seq[LogicalPlan] = child :: Nil
+}
+
 /**
  * The logical plan of the ALTER VIEW ... SET TBLPROPERTIES command.
  */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 481d7504dda3d..e8bbc6b22a819 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -2254,8 +2254,10 @@ class DDLParserSuite extends AnalysisTest {
 
   test("alter view: AS Query") {
     val parsed = parsePlan("ALTER VIEW a.b.c AS SELECT 1")
-    val expected = AlterViewAsStatement(
-      Seq("a", "b", "c"), "SELECT 1", parsePlan("SELECT 1"))
+    val expected = AlterViewAs(
+      UnresolvedView(Seq("a", "b", "c"), "ALTER VIEW ... AS", true, None),
+      "SELECT 1",
+      parsePlan("SELECT 1"))
     comparePlans(parsed, expected)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 2449f73112bf4..83dda7db09ac2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -485,10 +485,9 @@ class ResolveSessionCatalog(
         serdeProperties,
         partitionSpec)
 
-    case AlterViewAsStatement(name, originalText, query) =>
-      val viewName = parseTempViewOrV1Table(name, "ALTER VIEW QUERY")
+    case AlterViewAs(ResolvedView(ident, _), originalText, query) =>
       AlterViewAsCommand(
-        viewName.asTableIdentifier,
+        ident.asTableIdentifier,
         originalText,
         query)
 
@@ -582,12 +581,6 @@ class ResolveSessionCatalog(
     case _ => throw new AnalysisException(s"$sql is only supported with v1 tables.")
   }
 
-  private def parseTempViewOrV1Table(
-      nameParts: Seq[String], sql: String): Seq[String] = nameParts match {
-    case TempViewOrV1Table(name) => name
-    case _ => throw new AnalysisException(s"$sql is only supported with temp views or v1 tables.")
-  }
-
   private def getStorageFormatAndProvider(
       provider: Option[String],
       options: Map[String, String],
@@ -688,14 +681,6 @@ class ResolveSessionCatalog(
     }
   }
 
-  object TempViewOrV1Table {
-    def unapply(nameParts: Seq[String]): Option[Seq[String]] = nameParts match {
-      case _ if isTempView(nameParts) => Some(nameParts)
-      case SessionCatalogAndIdentifier(_, tbl) => Some(tbl.asMultipartIdentifier)
-      case _ => None
-    }
-  }
-
   object SessionCatalogAndNamespace {
     def unapply(resolved: ResolvedNamespace): Option[(CatalogPlugin, Seq[String])] =
       if (isSessionCatalog(resolved.catalog)) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index 6f32f9d2bfcbe..6401167458a3e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -285,9 +285,6 @@ case class AlterViewAsCommand(
 
   private def alterPermanentView(session: SparkSession, analyzedPlan: LogicalPlan): Unit = {
     val viewMeta = session.sessionState.catalog.getTableMetadata(name)
-    if (viewMeta.tableType != CatalogTableType.VIEW) {
-      throw new AnalysisException(s"${viewMeta.identifier} is not a view.")
-    }
 
     // Detect cyclic view reference on ALTER VIEW.
     val viewIdent = viewMeta.identifier
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index bc570efb70bdf..0c65e530f67da 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -2105,14 +2105,6 @@ class DataSourceV2SQLSuite
     }
   }
 
-  test("ALTER VIEW AS QUERY") {
-    val v = "testcat.ns1.ns2.v"
-    val e = intercept[AnalysisException] {
-      sql(s"ALTER VIEW $v AS SELECT 1")
-    }
-    assert(e.message.contains("ALTER VIEW QUERY is only supported with temp views or v1 tables"))
-  }
-
   test("CREATE VIEW") {
     val v = "testcat.ns1.ns2.v"
     val e = intercept[AnalysisException] {
@@ -2618,6 +2610,11 @@ class DataSourceV2SQLSuite
       "testcat",
       "v",
       "ALTER VIEW ... UNSET TBLPROPERTIES")
+    validateViewCommand(
+      "ALTER VIEW testcat.v AS SELECT 1",
+      "testcat",
+      "v",
+      "ALTER VIEW ... AS")
   }
 
   private def testNotSupportedV2Command(sqlCommand: String, sqlParams: String): Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index 50db986490033..c60b61a111c3f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -111,7 +111,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
       e = intercept[AnalysisException] {
         sql("ALTER VIEW tab1 AS SELECT * FROM jt")
       }.getMessage
-      assert(e.contains("`tab1` is not a view"))
+      assert(e.contains("tab1 is a table. 'ALTER VIEW ... AS' expects a view."))
     }
   }
 
@@ -448,8 +448,13 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
   }
 
   test("should not allow ALTER VIEW AS when the view does not exist") {
-    assertNoSuchTable("ALTER VIEW testView AS SELECT 1, 2")
-    assertNoSuchTable("ALTER VIEW default.testView AS SELECT 1, 2")
+    assertAnalysisError(
+      "ALTER VIEW testView AS SELECT 1, 2",
+      "View not found for 'ALTER VIEW ... AS': testView")
+
+    assertAnalysisError(
+      "ALTER VIEW default.testView AS SELECT 1, 2",
+      "View not found for 'ALTER VIEW ... AS': default.testView")
   }
 
   test("ALTER VIEW AS should try to alter temp view first if view name has no database part") {

From cd0356df9e3cb8e8690a216b8adfac75bcf1365f Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Mon, 14 Dec 2020 17:51:40 +0800
Subject: [PATCH 0756/1009] [SPARK-33673][SQL] Avoid push down partition
 filters to ParquetScan for DataSourceV2

### What changes were proposed in this pull request?
As described in SPARK-33673, some test suites in `ParquetV2SchemaPruningSuite` will failed when set `parquet.version` to 1.11.1 because Parquet will return empty results for non-existent column since PARQUET-1765.

This pr change to use `readDataSchema()` instead of `schema` to build `pushedParquetFilters` in `ParquetScanBuilder` to avoid push down partition filters to `ParquetScan` for `DataSourceV2`

### Why are the changes needed?
Prepare for upgrade using Parquet 1.11.1.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?

- Pass the Jenkins or GitHub Action

- Manual test as follows:

```
mvn -Dtest=none -DwildcardSuites=org.apache.spark.sql.execution.datasources.parquet.ParquetV2SchemaPruningSuite -Dparquet.version=1.11.1 test -pl sql/core -am
```

**Before**

```
Run completed in 3 minutes, 13 seconds.
Total number of tests run: 134
Suites: completed 2, aborted 0
Tests: succeeded 120, failed 14, canceled 0, ignored 0, pending 0
*** 14 TESTS FAILED ***
```

**After**

```
Run completed in 3 minutes, 46 seconds.
Total number of tests run: 134
Suites: completed 2, aborted 0
Tests: succeeded 134, failed 0, canceled 0, ignored 0, pending 0
All tests passed.
```

Closes #30652 from LuciferYang/SPARK-33673.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Yuming Wang <yumwang@ebay.com>
---
 .../execution/datasources/v2/parquet/ParquetScanBuilder.scala   | 2 +-
 sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala
index 2f861356e9499..44053830defe5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala
@@ -50,7 +50,7 @@ case class ParquetScanBuilder(
     val pushDownInFilterThreshold = sqlConf.parquetFilterPushDownInFilterThreshold
     val isCaseSensitive = sqlConf.caseSensitiveAnalysis
     val parquetSchema =
-      new SparkToParquetSchemaConverter(sparkSession.sessionState.conf).convert(schema)
+      new SparkToParquetSchemaConverter(sparkSession.sessionState.conf).convert(readDataSchema())
     val parquetFilters = new ParquetFilters(parquetSchema, pushDownDate, pushDownTimestamp,
       pushDownDecimal, pushDownStringStartWith, pushDownInFilterThreshold, isCaseSensitive)
     parquetFilters.convertibleFilters(this.filters).toArray
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
index 7d3285da25a5d..75372c5437f25 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
@@ -367,7 +367,7 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
         val basePath = dir.getCanonicalPath + "/" + fmt
         val pushFilterMaps = Map (
           "parquet" ->
-            "|PushedFilers: \\[.*\\(id\\), .*\\(value\\), .*\\(id,1\\), .*\\(value,2\\)\\]",
+            "|PushedFilers: \\[IsNotNull\\(value\\), GreaterThan\\(value,2\\)\\]",
           "orc" ->
             "|PushedFilers: \\[.*\\(id\\), .*\\(value\\), .*\\(id,1\\), .*\\(value,2\\)\\]",
           "csv" ->

From bf2c88ccaebd8e27d9fc27c55c9955129541d3e1 Mon Sep 17 00:00:00 2001
From: Holden Karau <hkarau@apple.com>
Date: Mon, 14 Dec 2020 02:09:59 -0800
Subject: [PATCH 0757/1009] [SPARK-33716][K8S] Fix potential race condition
 during pod termination

### What changes were proposed in this pull request?

Check that the pod state is not pending or running even if there is a deletion timestamp.

### Why are the changes needed?

This can occur when the pod state and deletion timestamp are not updated by etcd in sync & we get a pod snapshot during an inconsistent view.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Manual testing with local version of Minikube on an overloaded computer that caused out of sync updates.

Closes #30693 from holdenk/SPARK-33716-decommissioning-race-condition-during-pod-snapshot.

Authored-by: Holden Karau <hkarau@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala
index be75311bc3d4a..e81d213699e32 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala
@@ -93,7 +93,8 @@ object ExecutorPodsSnapshot extends Logging {
       (
         pod.getStatus == null ||
         pod.getStatus.getPhase == null ||
-        pod.getStatus.getPhase.toLowerCase(Locale.ROOT) != "terminating"
+          (pod.getStatus.getPhase.toLowerCase(Locale.ROOT) != "terminating" &&
+           pod.getStatus.getPhase.toLowerCase(Locale.ROOT) != "running")
       ))
   }
 }

From 5f9a7fea06cbbb6bf2b40cc9b3aa4d539c996301 Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Mon, 14 Dec 2020 14:32:08 +0000
Subject: [PATCH 0758/1009] [SPARK-33428][SQL] Conv UDF use BigInt to avoid 
 Long value overflow

### What changes were proposed in this pull request?
Use Long value store  encode value will overflow and return unexpected result, use BigInt to replace Long value and make logical more simple.

### Why are the changes needed?
Fix value  overflow issue

### Does this PR introduce _any_ user-facing change?
People can sue `conf` function to convert value big then LONG.MAX_VALUE

### How was this patch tested?
Added UT

#### BenchMark
```
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.sql.execution.benchmark

import scala.util.Random

import org.apache.spark.benchmark.Benchmark
import org.apache.spark.sql.functions._
object ConvFuncBenchMark extends SqlBasedBenchmark {

  val charset =
    Array[String]("0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
      "A", "B", "C", "D", "E", "F", "G",
      "H", "I", "J", "K", "L", "M", "N",
      "O", "P", "Q", "R", "S", "T",
      "U", "V", "W", "X", "Y", "Z")

  def constructString(from: Int, length: Int): String = {
    val chars = charset.slice(0, from)
    (0 to length).map(x => {
      val v = Random.nextInt(from)
      chars(v)
    }).mkString("")
  }

  private def doBenchmark(cardinality: Long, length: Int, from: Int, toBase: Int): Unit = {
    spark.range(cardinality)
      .withColumn("str", lit(constructString(from, length)))
      .select(conv(col("str"), from, toBase))
      .noop()
  }

  /**
   * Main process of the whole benchmark.
   * Implementations of this method are supposed to use the wrapper method `runBenchmark`
   * for each benchmark scenario.
   */
  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
    val N = 1000000L
    val benchmark = new Benchmark("conv", N, output = output)
    benchmark.addCase("length 10 from 2 to 16") { _ =>
      doBenchmark(N, 10, 2, 16)
    }

    benchmark.addCase("length 10 from 2 to 10") { _ =>
      doBenchmark(N, 10, 2, 10)
    }

    benchmark.addCase("length 10 from 10 to 16") { _ =>
      doBenchmark(N, 10, 10, 16)
    }

    benchmark.addCase("length 10 from 10 to 36") { _ =>
      doBenchmark(N, 10, 10, 36)
    }

    benchmark.addCase("length 10 from 16 to 10") { _ =>
      doBenchmark(N, 10, 10, 10)
    }

    benchmark.addCase("length 10 from 16 to 36") { _ =>
      doBenchmark(N, 10, 16, 36)
    }

    benchmark.addCase("length 10 from 36 to 10") { _ =>
      doBenchmark(N, 10, 36, 10)
    }

    benchmark.addCase("length 10 from 36 to 16") { _ =>
      doBenchmark(N, 10, 36, 16)
    }

    //
    benchmark.addCase("length 20 from 10 to 16") { _ =>
      doBenchmark(N, 20, 10, 16)
    }

    benchmark.addCase("length 20 from 10 to 36") { _ =>
      doBenchmark(N, 20, 10, 36)
    }

    benchmark.addCase("length 30 from 10 to 16") { _ =>
      doBenchmark(N, 30, 10, 16)
    }

    benchmark.addCase("length 30 from 10 to 36") { _ =>
      doBenchmark(N, 30, 10, 36)
    }

    //
    benchmark.addCase("length 20 from 16 to 10") { _ =>
      doBenchmark(N, 20, 16, 10)
    }

    benchmark.addCase("length 20 from 16 to 36") { _ =>
      doBenchmark(N, 20, 16, 36)
    }

    benchmark.addCase("length 30 from 16 to 10") { _ =>
      doBenchmark(N, 30, 16, 10)
    }

    benchmark.addCase("length 30 from 16 to 36") { _ =>
      doBenchmark(N, 30, 16, 36)
    }

    benchmark.run()
  }

}
```

Result with patch :
```
Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Mac OS X 10.14.6
Intel(R) Core(TM) i5-8259U CPU  2.30GHz
conv:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
length 10 from 2 to 16                               54             73          18         18.7          53.6       1.0X
length 10 from 2 to 10                               43             47           5         23.5          42.5       1.3X
length 10 from 10 to 16                              39             47          12         25.5          39.2       1.4X
length 10 from 10 to 36                              38             42           3         26.5          37.7       1.4X
length 10 from 16 to 10                              39             41           3         25.7          38.9       1.4X
length 10 from 16 to 36                              36             41           4         27.6          36.3       1.5X
length 10 from 36 to 10                              38             40           2         26.3          38.0       1.4X
length 10 from 36 to 16                              37             39           2         26.8          37.2       1.4X
length 20 from 10 to 16                              36             39           2         27.4          36.5       1.5X
length 20 from 10 to 36                              37             39           2         27.2          36.8       1.5X
length 30 from 10 to 16                              37             39           2         27.0          37.0       1.4X
length 30 from 10 to 36                              36             38           2         27.5          36.3       1.5X
length 20 from 16 to 10                              35             38           2         28.3          35.4       1.5X
length 20 from 16 to 36                              34             38           3         29.2          34.3       1.6X
length 30 from 16 to 10                              38             40           2         26.3          38.1       1.4X
length 30 from 16 to 36                              37             38           1         27.2          36.8       1.5X
```
Result without patch:
```
Java HotSpot(TM) 64-Bit Server VM 1.8.0_191-b12 on Mac OS X 10.14.6
Intel(R) Core(TM) i5-8259U CPU  2.30GHz
conv:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
length 10 from 2 to 16                               66            101          29         15.1          66.1       1.0X
length 10 from 2 to 10                               50             55           5         20.2          49.5       1.3X
length 10 from 10 to 16                              46             51           5         21.8          45.9       1.4X
length 10 from 10 to 36                              43             48           4         23.4          42.7       1.5X
length 10 from 16 to 10                              44             47           4         22.9          43.7       1.5X
length 10 from 16 to 36                              40             44           2         24.7          40.5       1.6X
length 10 from 36 to 10                              40             44           4         25.0          40.1       1.6X
length 10 from 36 to 16                              41             43           2         24.3          41.2       1.6X
length 20 from 10 to 16                              39             41           2         25.7          38.9       1.7X
length 20 from 10 to 36                              40             42           2         24.9          40.2       1.6X
length 30 from 10 to 16                              39             40           1         25.9          38.6       1.7X
length 30 from 10 to 36                              40             41           1         25.0          40.0       1.7X
length 20 from 16 to 10                              40             41           1         25.1          39.8       1.7X
length 20 from 16 to 36                              40             42           2         25.2          39.7       1.7X
length 30 from 16 to 10                              39             42           2         25.6          39.0       1.7X
length 30 from 16 to 36                              39             40           2         25.7          38.8       1.7X
```

Closes #30350 from AngersZhuuuu/SPARK-33428.

Authored-by: angerszhu <angers.zhu@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/util/NumberConverter.scala   | 64 ++++---------------
 .../expressions/MathExpressionsSuite.scala    |  6 +-
 .../catalyst/util/NumberConverterSuite.scala  |  4 +-
 .../apache/spark/sql/MathFunctionsSuite.scala |  2 +-
 .../execution/HiveCompatibilitySuite.scala    |  4 +-
 5 files changed, 23 insertions(+), 57 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
index 7dbdd1ef1cdc5..8c9157784e7e0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
@@ -21,64 +21,37 @@ import org.apache.spark.unsafe.types.UTF8String
 
 object NumberConverter {
 
-  /**
-   * Divide x by m as if x is an unsigned 64-bit integer. Examples:
-   * unsignedLongDiv(-1, 2) == Long.MAX_VALUE unsignedLongDiv(6, 3) == 2
-   * unsignedLongDiv(0, 5) == 0
-   *
-   * @param x is treated as unsigned
-   * @param m is treated as signed
-   */
-  private def unsignedLongDiv(x: Long, m: Int): Long = {
-    if (x >= 0) {
-      x / m
-    } else {
-      // Let uval be the value of the unsigned long with the same bits as x
-      // Two's complement => x = uval - 2*MAX - 2
-      // => uval = x + 2*MAX + 2
-      // Now, use the fact: (a+b)/c = a/c + b/c + (a%c+b%c)/c
-      x / m + 2 * (Long.MaxValue / m) + 2 / m + (x % m + 2 * (Long.MaxValue % m) + 2 % m) / m
-    }
-  }
-
   /**
    * Decode v into value[].
    *
-   * @param v is treated as an unsigned 64-bit integer
+   * @param v is treated as an BigInt
    * @param radix must be between MIN_RADIX and MAX_RADIX
    */
-  private def decode(v: Long, radix: Int, value: Array[Byte]): Unit = {
+  private def decode(v: BigInt, radix: Int, value: Array[Byte]): Unit = {
     var tmpV = v
     java.util.Arrays.fill(value, 0.asInstanceOf[Byte])
     var i = value.length - 1
     while (tmpV != 0) {
-      val q = unsignedLongDiv(tmpV, radix)
-      value(i) = (tmpV - q * radix).asInstanceOf[Byte]
+      val q = tmpV / radix
+      value(i) = (tmpV - q * radix).byteValue
       tmpV = q
       i -= 1
     }
   }
 
   /**
-   * Convert value[] into a long. On overflow, return -1 (as mySQL does). If a
-   * negative digit is found, ignore the suffix starting there.
+   * Convert value[] into a BigInt. If a negative digit is found,
+   * ignore the suffix starting there.
    *
    * @param radix  must be between MIN_RADIX and MAX_RADIX
    * @param fromPos is the first element that should be considered
    * @return the result should be treated as an unsigned 64-bit integer.
    */
-  private def encode(radix: Int, fromPos: Int, value: Array[Byte]): Long = {
-    var v: Long = 0L
-    val bound = unsignedLongDiv(-1 - radix, radix) // Possible overflow once
+  private def encode(radix: Int, fromPos: Int, value: Array[Byte]): BigInt = {
+    var v: BigInt = BigInt(0)
     var i = fromPos
     while (i < value.length && value(i) >= 0) {
-      if (v >= bound) {
-        // Check for overflow
-        if (unsignedLongDiv(-1 - value(i), radix) < v) {
-          return -1
-        }
-      }
-      v = v * radix + value(i)
+      v = (v * radix) + BigInt(value(i))
       i += 1
     }
     v
@@ -129,7 +102,7 @@ object NumberConverter {
       return null
     }
 
-    var (negative, first) = if (n(0) == '-') (true, 1) else (false, 0)
+    val (negative, first) = if (n(0) == '-') (true, 1) else (false, 0)
 
     // Copy the digits in the right side of the array
     val temp = new Array[Byte](64)
@@ -140,19 +113,8 @@ object NumberConverter {
     }
     char2byte(fromBase, temp.length - n.length + first, temp)
 
-    // Do the conversion by going through a 64 bit integer
-    var v = encode(fromBase, temp.length - n.length + first, temp)
-    if (negative && toBase > 0) {
-      if (v < 0) {
-        v = -1
-      } else {
-        v = -v
-      }
-    }
-    if (toBase < 0 && v < 0) {
-      v = -v
-      negative = true
-    }
+    // Do the conversion by going through a BigInt
+    val v: BigInt = encode(fromBase, temp.length - n.length + first, temp)
     decode(v, Math.abs(toBase), temp)
 
     // Find the first non-zero digit or the last digits if all are zero.
@@ -163,7 +125,7 @@ object NumberConverter {
     byte2char(Math.abs(toBase), firstNonZeroPos, temp)
 
     var resultStartPos = firstNonZeroPos
-    if (negative && toBase < 0) {
+    if (negative) {
       resultStartPos = firstNonZeroPos - 1
       temp(resultStartPos) = '-'
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
index 6d09e28362e11..4c4df9ef83de9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
@@ -158,7 +158,7 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   test("conv") {
     checkEvaluation(Conv(Literal("3"), Literal(10), Literal(2)), "11")
     checkEvaluation(Conv(Literal("-15"), Literal(10), Literal(-16)), "-F")
-    checkEvaluation(Conv(Literal("-15"), Literal(10), Literal(16)), "FFFFFFFFFFFFFFF1")
+    checkEvaluation(Conv(Literal("-15"), Literal(10), Literal(16)), "-F")
     checkEvaluation(Conv(Literal("big"), Literal(36), Literal(16)), "3A48")
     checkEvaluation(Conv(Literal.create(null, StringType), Literal(36), Literal(16)), null)
     checkEvaluation(Conv(Literal("3"), Literal.create(null, IntegerType), Literal(16)), null)
@@ -168,10 +168,12 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(
       Conv(Literal(""), Literal(10), Literal(16)), null)
     checkEvaluation(
-      Conv(Literal("9223372036854775807"), Literal(36), Literal(16)), "FFFFFFFFFFFFFFFF")
+      Conv(Literal("9223372036854775807"), Literal(36), Literal(16)), "12DDAC15F246BAF8C0D551AC7")
     // If there is an invalid digit in the number, the longest valid prefix should be converted.
     checkEvaluation(
       Conv(Literal("11abc"), Literal(10), Literal(16)), "B")
+    checkEvaluation(Conv(Literal("c8dcdfb41711fc9a1f17928001d7fd61"), Literal(16), Literal(10)),
+      "266992441711411603393340504520074460513")
   }
 
   test("e") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala
index ec73f4518737d..ee7057d914b21 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala
@@ -34,9 +34,9 @@ class NumberConverterSuite extends SparkFunSuite {
   test("convert") {
     checkConv("3", 10, 2, "11")
     checkConv("-15", 10, -16, "-F")
-    checkConv("-15", 10, 16, "FFFFFFFFFFFFFFF1")
+    checkConv("-15", 10, 16, "-F")
     checkConv("big", 36, 16, "3A48")
-    checkConv("9223372036854775807", 36, 16, "FFFFFFFFFFFFFFFF")
+    checkConv("9223372036854775807", 36, 16, "12DDAC15F246BAF8C0D551AC7")
     checkConv("11abc", 10, 16, "B")
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
index cd92976571230..87526b130d4c6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
@@ -200,7 +200,7 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
     checkAnswer(df.selectExpr("""conv("100", 2, 10)"""), Row("4"))
     checkAnswer(df.selectExpr("""conv("-10", 16, -10)"""), Row("-16"))
     checkAnswer(
-      df.selectExpr("""conv("9223372036854775807", 36, -16)"""), Row("-1")) // for overflow
+      df.selectExpr("""conv("9223372036854775807", 36, -16)"""), Row("12DDAC15F246BAF8C0D551AC7"))
   }
 
   test("floor") {
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index c263932c2f535..e7c702baba752 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -525,6 +525,9 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "udf_xpath_short",
     "udf_xpath_string",
 
+    // [SPARK-33428][SQL] CONV UDF use BigInt to avoid Long value overflow
+    "udf_conv",
+
     // These tests DROP TABLE that don't exist (but do not specify IF EXISTS)
     "alter_rename_partition1",
     "date_1",
@@ -1003,7 +1006,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "udf_concat_insert1",
     "udf_concat_insert2",
     "udf_concat_ws",
-    "udf_conv",
     "udf_cos",
     "udf_count",
     "udf_date_add",

From 839d6899adafd9a0695667656d00220d4665895d Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Mon, 14 Dec 2020 14:35:24 +0000
Subject: [PATCH 0759/1009] [SPARK-33733][SQL] PullOutNondeterministic should
 check and collect deterministic field

### What changes were proposed in this pull request?

The deterministic field is wider than `NonDerterministic`, we should keep same range between pull out and check analysis.

### Why are the changes needed?

For example
```
select * from values(1), (4) as t(c1) order by java_method('java.lang.Math', 'abs', c1)
```

We will get exception since `java_method` deterministic field is false but not a `NonDeterministic`
```
Exception in thread "main" org.apache.spark.sql.AnalysisException: nondeterministic expressions are only allowed in
Project, Filter, Aggregate or Window, found:
 java_method('java.lang.Math', 'abs', t.`c1`) ASC NULLS FIRST
in operator Sort [java_method(java.lang.Math, abs, c1#1) ASC NULLS FIRST], true
               ;;
```

### Does this PR introduce _any_ user-facing change?

Yes.

### How was this patch tested?

Add test.

Closes #30703 from ulysses-you/SPARK-33733.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  5 ++++-
 .../expressions/CallMethodViaReflection.scala |  6 ++---
 .../sql/catalyst/analysis/AnalysisSuite.scala | 22 +++++++++++++++++++
 3 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 0ceb4226b0f52..a46f2e3168c6b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -2960,7 +2960,10 @@ class Analyzer(override val catalogManager: CatalogManager)
 
     private def getNondeterToAttr(exprs: Seq[Expression]): Map[Expression, NamedExpression] = {
       exprs.filterNot(_.deterministic).flatMap { expr =>
-        val leafNondeterministic = expr.collect { case n: Nondeterministic => n }
+        val leafNondeterministic = expr.collect {
+          case n: Nondeterministic => n
+          case udf: UserDefinedExpression if !udf.deterministic => udf
+        }
         leafNondeterministic.distinct.map { e =>
           val ne = e match {
             case n: NamedExpression => n
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
index 4bd6418789aa7..0979a18ac97bb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
@@ -54,7 +54,7 @@ import org.apache.spark.util.Utils
   """,
   since = "2.0.0")
 case class CallMethodViaReflection(children: Seq[Expression])
-  extends Expression with CodegenFallback {
+  extends Nondeterministic with CodegenFallback {
 
   override def prettyName: String = getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("reflect")
 
@@ -77,11 +77,11 @@ case class CallMethodViaReflection(children: Seq[Expression])
     }
   }
 
-  override lazy val deterministic: Boolean = false
   override def nullable: Boolean = true
   override val dataType: DataType = StringType
+  override protected def initializeInternal(partitionIndex: Int): Unit = {}
 
-  override def eval(input: InternalRow): Any = {
+  override protected def evalInternal(input: InternalRow): Any = {
     var i = 0
     while (i < argExprs.length) {
       buffer(i) = argExprs(i).eval(input).asInstanceOf[Object]
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 61186c178b083..b206bc9f84f18 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -984,4 +984,26 @@ class AnalysisSuite extends AnalysisTest with Matchers {
         s"please set '${SQLConf.ANALYZER_MAX_ITERATIONS.key}' to a larger value."))
     }
   }
+
+  test("SPARK-33733: PullOutNondeterministic should check and collect deterministic field") {
+    val reflect =
+      CallMethodViaReflection(Seq("java.lang.Math", "abs", testRelation.output.head))
+    val udf = ScalaUDF(
+      (s: String) => s,
+      StringType,
+      Literal.create(null, StringType) :: Nil,
+      Option(ExpressionEncoder[String]().resolveAndBind()) :: Nil,
+      udfDeterministic = false)
+
+    Seq(reflect, udf).foreach { e: Expression =>
+      val plan = Sort(Seq(e.asc), false, testRelation)
+      val projected = Alias(e, "_nondeterministic")()
+      val expect =
+        Project(testRelation.output,
+          Sort(Seq(projected.toAttribute.asc), false,
+            Project(testRelation.output :+ projected,
+              testRelation)))
+      checkAnalysis(plan, expect)
+    }
+  }
 }

From 82aca7eb8f2501dceaf610f1aaa86082153ef5ee Mon Sep 17 00:00:00 2001
From: Anton Okolnychyi <aokolnychyi@apple.com>
Date: Mon, 14 Dec 2020 10:54:18 -0800
Subject: [PATCH 0760/1009] [SPARK-33779][SQL] DataSource V2: API to request
 distribution and ordering on write

### What changes were proposed in this pull request?

This PR adds connector interfaces proposed in the [design doc](https://docs.google.com/document/d/1X0NsQSryvNmXBY9kcvfINeYyKC-AahZarUqg3nS1GQs/edit#) for SPARK-23889.

**Note**: This PR contains a subset of changes discussed in PR #29066.

### Why are the changes needed?

Data sources should be able to request a specific distribution and ordering of data on write. In particular, these scenarios are considered useful:
- global sort
- cluster data and sort within partitions
- local sort within partitions
- no sort

Please see the design doc above for a more detailed explanation of requirements.

### Does this PR introduce _any_ user-facing change?

This PR introduces public changes to the DS V2 by adding a logical write abstraction as we have on the read path as well as additional interfaces to represent distribution and ordering of data (please see the doc for more info).

The existing `Distribution` interface in `read` package is read-specific and not flexible enough like discussed in the design doc. The current proposal is to evolve these interfaces separately until they converge.

### How was this patch tested?

This patch adds only interfaces.

Closes #30706 from aokolnychyi/spark-23889-interfaces.

Authored-by: Anton Okolnychyi <aokolnychyi@apple.com>
Signed-off-by: Ryan Blue <blue@apache.org>
---
 .../distributions/ClusteredDistribution.java  | 35 +++++++
 .../connector/distributions/Distribution.java | 28 ++++++
 .../distributions/Distributions.java          | 56 +++++++++++
 .../distributions/OrderedDistribution.java    | 35 +++++++
 .../UnspecifiedDistribution.java              | 28 ++++++
 .../connector/expressions/Expressions.java    | 11 +++
 .../connector/expressions/NullOrdering.java   | 42 ++++++++
 .../connector/expressions/SortDirection.java  | 42 ++++++++
 .../sql/connector/expressions/SortOrder.java  | 43 +++++++++
 .../RequiresDistributionAndOrdering.java      | 57 +++++++++++
 .../spark/sql/connector/write/Write.java      | 65 +++++++++++++
 .../sql/connector/write/WriteBuilder.java     | 39 ++++++--
 .../distributions/distributions.scala         | 59 ++++++++++++
 .../connector/expressions/expressions.scala   | 96 +++++++++++++++++++
 14 files changed, 626 insertions(+), 10 deletions(-)
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/ClusteredDistribution.java
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/Distribution.java
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/Distributions.java
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/OrderedDistribution.java
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/UnspecifiedDistribution.java
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/NullOrdering.java
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/SortDirection.java
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/SortOrder.java
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RequiresDistributionAndOrdering.java
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/Write.java
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/connector/distributions/distributions.scala

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/ClusteredDistribution.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/ClusteredDistribution.java
new file mode 100644
index 0000000000000..dcc3d191461ce
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/ClusteredDistribution.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.distributions;
+
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.connector.expressions.Expression;
+
+/**
+ * A distribution where tuples that share the same values for clustering expressions are co-located
+ * in the same partition.
+ *
+ * @since 3.2.0
+ */
+@Experimental
+public interface ClusteredDistribution extends Distribution {
+  /**
+   * Returns clustering expressions.
+   */
+  Expression[] clustering();
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/Distribution.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/Distribution.java
new file mode 100644
index 0000000000000..95d68ea2d1abe
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/Distribution.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.distributions;
+
+import org.apache.spark.annotation.Experimental;
+
+/**
+ * An interface that defines how data is distributed across partitions.
+ *
+ * @since 3.2.0
+ */
+@Experimental
+public interface Distribution {}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/Distributions.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/Distributions.java
new file mode 100644
index 0000000000000..da5d6f8c81a3f
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/Distributions.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.distributions;
+
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.connector.expressions.Expression;
+import org.apache.spark.sql.connector.expressions.SortOrder;
+
+/**
+ * Helper methods to create distributions to pass into Spark.
+ *
+ * @since 3.2.0
+ */
+@Experimental
+public class Distributions {
+  private Distributions() {
+  }
+
+  /**
+   * Creates a distribution where no promises are made about co-location of data.
+   */
+  public static UnspecifiedDistribution unspecified() {
+    return LogicalDistributions.unspecified();
+  }
+
+  /**
+   * Creates a distribution where tuples that share the same values for clustering expressions are
+   * co-located in the same partition.
+   */
+  public static ClusteredDistribution clustered(Expression[] clustering) {
+    return LogicalDistributions.clustered(clustering);
+  }
+
+  /**
+   * Creates a distribution where tuples have been ordered across partitions according
+   * to ordering expressions, but not necessarily within a given partition.
+   */
+  public static OrderedDistribution ordered(SortOrder[] ordering) {
+    return LogicalDistributions.ordered(ordering);
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/OrderedDistribution.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/OrderedDistribution.java
new file mode 100644
index 0000000000000..3456178d8e64f
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/OrderedDistribution.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.distributions;
+
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.connector.expressions.SortOrder;
+
+/**
+ * A distribution where tuples have been ordered across partitions according
+ * to ordering expressions, but not necessarily within a given partition.
+ *
+ * @since 3.2.0
+ */
+@Experimental
+public interface OrderedDistribution extends Distribution {
+  /**
+   * Returns ordering expressions.
+   */
+  SortOrder[] ordering();
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/UnspecifiedDistribution.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/UnspecifiedDistribution.java
new file mode 100644
index 0000000000000..ea18d8906cfd0
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/distributions/UnspecifiedDistribution.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.distributions;
+
+import org.apache.spark.annotation.Experimental;
+
+/**
+ * A distribution where no promises are made about co-location of data.
+ *
+ * @since 3.2.0
+ */
+@Experimental
+public interface UnspecifiedDistribution extends Distribution {}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expressions.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expressions.java
index 791dc969ab008..984de6258f84b 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expressions.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expressions.java
@@ -164,4 +164,15 @@ public static Transform hours(String column) {
     return LogicalExpressions.hours(Expressions.column(column));
   }
 
+  /**
+   * Create a sort expression.
+   *
+   * @param expr an expression to produce values to sort
+   * @param direction direction of the sort
+   * @param nullOrder null order of the sort
+   * @return a SortOrder
+   */
+  public static SortOrder sort(Expression expr, SortDirection direction, NullOrdering nullOrder) {
+    return LogicalExpressions.sort(expr, direction, nullOrder);
+  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/NullOrdering.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/NullOrdering.java
new file mode 100644
index 0000000000000..669d1c8443b15
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/NullOrdering.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.expressions;
+
+import org.apache.spark.annotation.Experimental;
+
+/**
+ * A null order used in sorting expressions.
+ *
+ * @since 3.2.0
+ */
+@Experimental
+public enum NullOrdering {
+  NULLS_FIRST, NULLS_LAST;
+
+  @Override
+  public String toString() {
+    switch (this) {
+      case NULLS_FIRST:
+        return "NULLS FIRST";
+      case NULLS_LAST:
+        return "NULLS LAST";
+      default:
+        throw new IllegalArgumentException("Unexpected null order: " + this);
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/SortDirection.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/SortDirection.java
new file mode 100644
index 0000000000000..6946032832d18
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/SortDirection.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.expressions;
+
+import org.apache.spark.annotation.Experimental;
+
+/**
+ * A sort direction used in sorting expressions.
+ *
+ * @since 3.2.0
+ */
+@Experimental
+public enum SortDirection {
+  ASCENDING, DESCENDING;
+
+  @Override
+  public String toString() {
+    switch (this) {
+      case ASCENDING:
+        return "ASC";
+      case DESCENDING:
+        return "DESC";
+      default:
+        throw new IllegalArgumentException("Unexpected sort direction: " + this);
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/SortOrder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/SortOrder.java
new file mode 100644
index 0000000000000..72252457df26e
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/SortOrder.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.expressions;
+
+import org.apache.spark.annotation.Experimental;
+
+/**
+ * Represents a sort order in the public expression API.
+ *
+ * @since 3.2.0
+ */
+@Experimental
+public interface SortOrder extends Expression {
+  /**
+   * Returns the sort expression.
+   */
+  Expression expression();
+
+  /**
+   * Returns the sort direction.
+   */
+  SortDirection direction();
+
+  /**
+   * Returns the null ordering.
+   */
+  NullOrdering nullOrdering();
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RequiresDistributionAndOrdering.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RequiresDistributionAndOrdering.java
new file mode 100644
index 0000000000000..91fd02aae883c
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RequiresDistributionAndOrdering.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.write;
+
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.connector.distributions.Distribution;
+import org.apache.spark.sql.connector.distributions.UnspecifiedDistribution;
+import org.apache.spark.sql.connector.expressions.SortOrder;
+
+/**
+ * A write that requires a specific distribution and ordering of data.
+ *
+ * @since 3.2.0
+ */
+@Experimental
+public interface RequiresDistributionAndOrdering extends Write {
+  /**
+   * Returns the distribution required by this write.
+   * <p>
+   * Spark will distribute incoming records across partitions to satisfy the required distribution
+   * before passing the records to the data source table on write.
+   * <p>
+   * Implementations may return {@link UnspecifiedDistribution} if they don't require any specific
+   * distribution of data on write.
+   *
+   * @return the required distribution
+   */
+  Distribution requiredDistribution();
+
+  /**
+   * Returns the ordering required by this write.
+   * <p>
+   * Spark will order incoming records within partitions to satisfy the required ordering
+   * before passing those records to the data source table on write.
+   * <p>
+   * Implementations may return an empty array if they don't require any specific ordering of data
+   * on write.
+   *
+   * @return the required ordering
+   */
+  SortOrder[] requiredOrdering();
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/Write.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/Write.java
new file mode 100644
index 0000000000000..873680415d447
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/Write.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.write;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.catalog.Table;
+import org.apache.spark.sql.connector.catalog.TableCapability;
+import org.apache.spark.sql.connector.write.streaming.StreamingWrite;
+
+/**
+ * A logical representation of a data source write.
+ * <p>
+ * This logical representation is shared between batch and streaming write. Data sources must
+ * implement the corresponding methods in this interface to match what the table promises
+ * to support. For example, {@link #toBatch()} must be implemented if the {@link Table} that
+ * creates this {@link Write} returns {@link TableCapability#BATCH_WRITE} support in its
+ * {@link Table#capabilities()}.
+ *
+ * @since 3.2.0
+ */
+@Evolving
+public interface Write {
+
+  /**
+   * Returns the description associated with this write.
+   */
+  default String description() {
+    return this.getClass().toString();
+  }
+
+  /**
+   * Returns a {@link BatchWrite} to write data to batch source. By default this method throws
+   * exception, data sources must overwrite this method to provide an implementation, if the
+   * {@link Table} that creates this write returns {@link TableCapability#BATCH_WRITE} support in
+   * its {@link Table#capabilities()}.
+   */
+  default BatchWrite toBatch() {
+    throw new UnsupportedOperationException(description() + ": Batch write is not supported");
+  }
+
+  /**
+   * Returns a {@link StreamingWrite} to write data to streaming source. By default this method
+   * throws exception, data sources must overwrite this method to provide an implementation, if the
+   * {@link Table} that creates this write returns {@link TableCapability#STREAMING_WRITE} support
+   * in its {@link Table#capabilities()}.
+   */
+  default StreamingWrite toStreaming() {
+    throw new UnsupportedOperationException(description() + ": Streaming write is not supported");
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriteBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriteBuilder.java
index 5398ca46e9777..bf344185118a9 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriteBuilder.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriteBuilder.java
@@ -23,10 +23,10 @@
 import org.apache.spark.sql.connector.write.streaming.StreamingWrite;
 
 /**
- * An interface for building the {@link BatchWrite}. Implementations can mix in some interfaces to
+ * An interface for building the {@link Write}. Implementations can mix in some interfaces to
  * support different ways to write data to data sources.
  *
- * Unless modified by a mixin interface, the {@link BatchWrite} configured by this builder is to
+ * Unless modified by a mixin interface, the {@link Write} configured by this builder is to
  * append data without affecting existing data.
  *
  * @since 3.0.0
@@ -35,22 +35,41 @@
 public interface WriteBuilder {
 
   /**
-   * Returns a {@link BatchWrite} to write data to batch source. By default this method throws
-   * exception, data sources must overwrite this method to provide an implementation, if the
-   * {@link Table} that creates this write returns {@link TableCapability#BATCH_WRITE} support in
-   * its {@link Table#capabilities()}.
+   * Returns a logical {@link Write} shared between batch and streaming.
+   *
+   * @since 3.2.0
    */
+  default Write build() {
+    return new Write() {
+      @Override
+      public BatchWrite toBatch() {
+        return buildForBatch();
+      }
+
+      @Override
+      public StreamingWrite toStreaming() {
+        return buildForStreaming();
+      }
+    };
+  }
+
+  /**
+   * Returns a {@link BatchWrite} to write data to batch source.
+   *
+   * @deprecated use {@link #build()} instead.
+   */
+  @Deprecated
   default BatchWrite buildForBatch() {
     throw new UnsupportedOperationException(getClass().getName() +
       " does not support batch write");
   }
 
   /**
-   * Returns a {@link StreamingWrite} to write data to streaming source. By default this method
-   * throws exception, data sources must overwrite this method to provide an implementation, if the
-   * {@link Table} that creates this write returns {@link TableCapability#STREAMING_WRITE} support
-   * in its {@link Table#capabilities()}.
+   * Returns a {@link StreamingWrite} to write data to streaming source.
+   *
+   * @deprecated use {@link #build()} instead.
    */
+  @Deprecated
   default StreamingWrite buildForStreaming() {
     throw new UnsupportedOperationException(getClass().getName() +
       " does not support streaming write");
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/distributions/distributions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/distributions/distributions.scala
new file mode 100644
index 0000000000000..599f82b4dc528
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/distributions/distributions.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.distributions
+
+import org.apache.spark.sql.connector.expressions.{Expression, SortOrder}
+
+private[sql] object LogicalDistributions {
+
+  def unspecified(): UnspecifiedDistribution = {
+    UnspecifiedDistributionImpl
+  }
+
+  def clustered(clustering: Array[Expression]): ClusteredDistribution = {
+    ClusteredDistributionImpl(clustering)
+  }
+
+  def ordered(ordering: Array[SortOrder]): OrderedDistribution = {
+    OrderedDistributionImpl(ordering)
+  }
+}
+
+private[sql] object UnspecifiedDistributionImpl extends UnspecifiedDistribution {
+  override def toString: String = "UnspecifiedDistribution"
+}
+
+private[sql] final case class ClusteredDistributionImpl(
+    clusteringExprs: Seq[Expression]) extends ClusteredDistribution {
+
+  override def clustering: Array[Expression] = clusteringExprs.toArray
+
+  override def toString: String = {
+    s"ClusteredDistribution(${clusteringExprs.map(_.describe).mkString(", ")})"
+  }
+}
+
+private[sql] final case class OrderedDistributionImpl(
+    orderingExprs: Seq[SortOrder]) extends OrderedDistribution {
+
+  override def ordering: Array[SortOrder] = orderingExprs.toArray
+
+  override def toString: String = {
+    s"OrderedDistribution(${orderingExprs.map(_.describe).mkString(", ")})"
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
index 321ea14d376b4..2863d94d198b2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
@@ -54,6 +54,13 @@ private[sql] object LogicalExpressions {
   def days(reference: NamedReference): DaysTransform = DaysTransform(reference)
 
   def hours(reference: NamedReference): HoursTransform = HoursTransform(reference)
+
+  def sort(
+      reference: Expression,
+      direction: SortDirection,
+      nullOrdering: NullOrdering): SortOrder = {
+    SortValue(reference, direction, nullOrdering)
+  }
 }
 
 /**
@@ -110,6 +117,18 @@ private[sql] final case class BucketTransform(
 }
 
 private[sql] object BucketTransform {
+  def unapply(expr: Expression): Option[(Int, FieldReference)] = expr match {
+    case transform: Transform =>
+      transform match {
+        case BucketTransform(n, FieldReference(parts)) =>
+          Some((n, FieldReference(parts)))
+        case _ =>
+          None
+      }
+    case _ =>
+      None
+  }
+
   def unapply(transform: Transform): Option[(Int, NamedReference)] = transform match {
     case NamedTransform("bucket", Seq(
         Lit(value: Int, IntegerType),
@@ -170,6 +189,18 @@ private[sql] final case class IdentityTransform(
 }
 
 private[sql] object IdentityTransform {
+  def unapply(expr: Expression): Option[FieldReference] = expr match {
+    case transform: Transform =>
+      transform match {
+        case IdentityTransform(ref) =>
+          Some(ref)
+        case _ =>
+          None
+      }
+    case _ =>
+      None
+  }
+
   def unapply(transform: Transform): Option[FieldReference] = transform match {
     case NamedTransform("identity", Seq(Ref(parts))) =>
       Some(FieldReference(parts))
@@ -185,6 +216,18 @@ private[sql] final case class YearsTransform(
 }
 
 private[sql] object YearsTransform {
+  def unapply(expr: Expression): Option[FieldReference] = expr match {
+    case transform: Transform =>
+      transform match {
+        case YearsTransform(ref) =>
+          Some(ref)
+        case _ =>
+          None
+      }
+    case _ =>
+      None
+  }
+
   def unapply(transform: Transform): Option[FieldReference] = transform match {
     case NamedTransform("years", Seq(Ref(parts))) =>
       Some(FieldReference(parts))
@@ -200,6 +243,18 @@ private[sql] final case class MonthsTransform(
 }
 
 private[sql] object MonthsTransform {
+  def unapply(expr: Expression): Option[FieldReference] = expr match {
+    case transform: Transform =>
+      transform match {
+        case MonthsTransform(ref) =>
+          Some(ref)
+        case _ =>
+          None
+      }
+    case _ =>
+      None
+  }
+
   def unapply(transform: Transform): Option[FieldReference] = transform match {
     case NamedTransform("months", Seq(Ref(parts))) =>
       Some(FieldReference(parts))
@@ -215,6 +270,18 @@ private[sql] final case class DaysTransform(
 }
 
 private[sql] object DaysTransform {
+  def unapply(expr: Expression): Option[FieldReference] = expr match {
+    case transform: Transform =>
+      transform match {
+        case DaysTransform(ref) =>
+          Some(ref)
+        case _ =>
+          None
+      }
+    case _ =>
+      None
+  }
+
   def unapply(transform: Transform): Option[FieldReference] = transform match {
     case NamedTransform("days", Seq(Ref(parts))) =>
       Some(FieldReference(parts))
@@ -230,6 +297,18 @@ private[sql] final case class HoursTransform(
 }
 
 private[sql] object HoursTransform {
+  def unapply(expr: Expression): Option[FieldReference] = expr match {
+    case transform: Transform =>
+      transform match {
+        case HoursTransform(ref) =>
+          Some(ref)
+        case _ =>
+          None
+      }
+    case _ =>
+      None
+  }
+
   def unapply(transform: Transform): Option[FieldReference] = transform match {
     case NamedTransform("hours", Seq(Ref(parts))) =>
       Some(FieldReference(parts))
@@ -261,3 +340,20 @@ private[sql] object FieldReference {
     LogicalExpressions.parseReference(column)
   }
 }
+
+private[sql] final case class SortValue(
+    expression: Expression,
+    direction: SortDirection,
+    nullOrdering: NullOrdering) extends SortOrder {
+
+  override def describe(): String = s"$expression $direction $nullOrdering"
+}
+
+private[sql] object SortValue {
+  def unapply(expr: Expression): Option[(Expression, SortDirection, NullOrdering)] = expr match {
+    case sort: SortOrder =>
+      Some((sort.expression, sort.direction, sort.nullOrdering))
+    case _ =>
+      None
+  }
+}

From bb60fb1bbd97b70d60e42a0435e15862c3e3f97e Mon Sep 17 00:00:00 2001
From: Anton Okolnychyi <aokolnychyi@apple.com>
Date: Mon, 14 Dec 2020 11:39:42 -0800
Subject: [PATCH 0761/1009] [SPARK-33779][SQL][FOLLOW-UP] Fix Java Linter error

### What changes were proposed in this pull request?

This PR removes unused imports.

### Why are the changes needed?

These changes are required to fix the build.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Via `dev/lint-java`.

Closes #30767 from aokolnychyi/fix-linter.

Authored-by: Anton Okolnychyi <aokolnychyi@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../java/org/apache/spark/sql/connector/write/WriteBuilder.java | 2 --
 1 file changed, 2 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriteBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriteBuilder.java
index bf344185118a9..0c72f31af1c22 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriteBuilder.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriteBuilder.java
@@ -18,8 +18,6 @@
 package org.apache.spark.sql.connector.write;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.connector.catalog.Table;
-import org.apache.spark.sql.connector.catalog.TableCapability;
 import org.apache.spark.sql.connector.write.streaming.StreamingWrite;
 
 /**

From 5885cc15cae9c9780530e235d2bd4bd6beda5dbb Mon Sep 17 00:00:00 2001
From: Holden Karau <hkarau@apple.com>
Date: Mon, 14 Dec 2020 12:05:28 -0800
Subject: [PATCH 0762/1009] [SPARK-33261][K8S] Add a developer API for custom
 feature steps

### What changes were proposed in this pull request?

Add a developer API for custom driver & executor feature steps.

### Why are the changes needed?

While we allow templates for the basis of pod creation, some deployments need more flexibility in how the pods are configured. This adds a developer API for custom deployments.

### Does this PR introduce _any_ user-facing change?

New developer API.

### How was this patch tested?

Extended tests to verify custom step is applied when configured.

Closes #30206 from holdenk/SPARK-33261-allow-people-to-extend-pod-feature-steps.

Authored-by: Holden Karau <hkarau@apple.com>
Signed-off-by: Holden Karau <hkarau@apple.com>
---
 .../org/apache/spark/deploy/k8s/Config.scala  | 20 +++++
 .../apache/spark/deploy/k8s/SparkPod.scala    | 11 ++-
 .../KubernetesFeatureConfigStep.scala         |  7 +-
 .../k8s/submit/KubernetesDriverBuilder.scala  |  8 +-
 .../k8s/KubernetesExecutorBuilder.scala       |  8 +-
 .../spark/deploy/k8s/PodBuilderSuite.scala    | 76 +++++++++++++++++++
 .../submit/KubernetesDriverBuilderSuite.scala |  5 +-
 .../k8s/KubernetesExecutorBuilderSuite.scala  |  4 +
 8 files changed, 134 insertions(+), 5 deletions(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index c28d6fd405ae1..40609aef1e9d8 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -219,6 +219,26 @@ private[spark] object Config extends Logging {
       .stringConf
       .createOptional
 
+  val KUBERNETES_DRIVER_POD_FEATURE_STEPS =
+    ConfigBuilder("spark.kubernetes.driver.pod.featureSteps")
+      .doc("Class names of an extra driver pod feature step implementing " +
+        "KubernetesFeatureConfigStep. This is a developer API. Comma separated. " +
+        "Runs after all of Spark internal feature steps.")
+      .version("3.2.0")
+      .stringConf
+      .toSequence
+      .createWithDefault(Nil)
+
+  val KUBERNETES_EXECUTOR_POD_FEATURE_STEPS =
+    ConfigBuilder("spark.kubernetes.executor.pod.featureSteps")
+      .doc("Class name of an extra executor pod feature step implementing " +
+        "KubernetesFeatureConfigStep. This is a developer API. Comma separated. " +
+        "Runs after all of Spark internal feature steps.")
+      .version("3.2.0")
+      .stringConf
+      .toSequence
+      .createWithDefault(Nil)
+
   val KUBERNETES_ALLOCATION_BATCH_SIZE =
     ConfigBuilder("spark.kubernetes.allocation.batch.size")
       .doc("Number of pods to launch at once in each round of executor allocation.")
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkPod.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkPod.scala
index fd1196368a7ff..c2298e7ca77c6 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkPod.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkPod.scala
@@ -18,7 +18,16 @@ package org.apache.spark.deploy.k8s
 
 import io.fabric8.kubernetes.api.model.{Container, ContainerBuilder, Pod, PodBuilder}
 
-private[spark] case class SparkPod(pod: Pod, container: Container) {
+import org.apache.spark.annotation.{DeveloperApi, Unstable}
+
+/**
+ * :: DeveloperApi ::
+ *
+ * Represents a SparkPod consisting of pod and the container within the pod.
+ */
+@Unstable
+@DeveloperApi
+case class SparkPod(pod: Pod, container: Container) {
 
   /**
    * Convenience method to apply a series of chained transformations to a pod.
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KubernetesFeatureConfigStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KubernetesFeatureConfigStep.scala
index 58cdaa3cadd6b..3fec92644b956 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KubernetesFeatureConfigStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KubernetesFeatureConfigStep.scala
@@ -18,13 +18,18 @@ package org.apache.spark.deploy.k8s.features
 
 import io.fabric8.kubernetes.api.model.HasMetadata
 
+import org.apache.spark.annotation.{DeveloperApi, Unstable}
 import org.apache.spark.deploy.k8s.SparkPod
 
 /**
+ * :: DeveloperApi ::
+ *
  * A collection of functions that together represent a "feature" in pods that are launched for
  * Spark drivers and executors.
  */
-private[spark] trait KubernetesFeatureConfigStep {
+@Unstable
+@DeveloperApi
+trait KubernetesFeatureConfigStep {
 
   /**
    * Apply modifications on the given pod in accordance to this feature. This can include attaching
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala
index 43639a3b7dc1b..3b38dd6e4feef 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala
@@ -22,6 +22,7 @@ import io.fabric8.kubernetes.client.KubernetesClient
 
 import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.features._
+import org.apache.spark.util.Utils
 
 private[spark] class KubernetesDriverBuilder {
 
@@ -37,6 +38,11 @@ private[spark] class KubernetesDriverBuilder {
       }
       .getOrElse(SparkPod.initialPod())
 
+    val userFeatures = conf.get(Config.KUBERNETES_DRIVER_POD_FEATURE_STEPS)
+      .map { className =>
+        Utils.classForName(className).newInstance().asInstanceOf[KubernetesFeatureConfigStep]
+      }
+
     val features = Seq(
       new BasicDriverFeatureStep(conf),
       new DriverKubernetesCredentialsFeatureStep(conf),
@@ -48,7 +54,7 @@ private[spark] class KubernetesDriverBuilder {
       new HadoopConfDriverFeatureStep(conf),
       new KerberosConfDriverFeatureStep(conf),
       new PodTemplateConfigMapStep(conf),
-      new LocalDirsFeatureStep(conf))
+      new LocalDirsFeatureStep(conf)) ++ userFeatures
 
     val spec = KubernetesDriverSpec(
       initialPod,
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
index 5388d185489f2..43328c72a6fdd 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
@@ -24,6 +24,7 @@ import org.apache.spark.SecurityManager
 import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.features._
 import org.apache.spark.resource.ResourceProfile
+import org.apache.spark.util.Utils
 
 private[spark] class KubernetesExecutorBuilder {
 
@@ -41,13 +42,18 @@ private[spark] class KubernetesExecutorBuilder {
       }
       .getOrElse(SparkPod.initialPod())
 
+    val userFeatures = conf.get(Config.KUBERNETES_EXECUTOR_POD_FEATURE_STEPS)
+      .map { className =>
+        Utils.classForName(className).newInstance().asInstanceOf[KubernetesFeatureConfigStep]
+      }
+
     val features = Seq(
       new BasicExecutorFeatureStep(conf, secMgr, resourceProfile),
       new ExecutorKubernetesCredentialsFeatureStep(conf),
       new MountSecretsFeatureStep(conf),
       new EnvSecretsFeatureStep(conf),
       new MountVolumesFeatureStep(conf),
-      new LocalDirsFeatureStep(conf))
+      new LocalDirsFeatureStep(conf)) ++ userFeatures
 
     val spec = KubernetesExecutorSpec(
       initialPod,
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
index 4d4c4baeb12c0..21a5b7a6486fd 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
@@ -26,12 +26,15 @@ import org.mockito.Mockito.{mock, never, verify, when}
 import scala.collection.JavaConverters._
 
 import org.apache.spark.{SparkConf, SparkException, SparkFunSuite}
+import org.apache.spark.deploy.k8s.features.KubernetesFeatureConfigStep
 import org.apache.spark.internal.config.ConfigEntry
 
 abstract class PodBuilderSuite extends SparkFunSuite {
 
   protected def templateFileConf: ConfigEntry[_]
 
+  protected def userFeatureStepsConf: ConfigEntry[_]
+
   protected def buildPod(sparkConf: SparkConf, client: KubernetesClient): SparkPod
 
   private val baseConf = new SparkConf(false)
@@ -50,6 +53,19 @@ abstract class PodBuilderSuite extends SparkFunSuite {
     verifyPod(pod)
   }
 
+  test("configure a custom test step") {
+    val client = mockKubernetesClient()
+    val sparkConf = baseConf.clone()
+      .set(userFeatureStepsConf.key,
+        "org.apache.spark.deploy.k8s.TestStepTwo," +
+        "org.apache.spark.deploy.k8s.TestStep")
+      .set(templateFileConf.key, "template-file.yaml")
+    val pod = buildPod(sparkConf, client)
+    verifyPod(pod)
+    assert(pod.container.getVolumeMounts.asScala.exists(_.getName == "so_long"))
+    assert(pod.container.getVolumeMounts.asScala.exists(_.getName == "so_long_two"))
+  }
+
   test("complain about misconfigured pod template") {
     val client = mockKubernetesClient(
       new PodBuilder()
@@ -173,3 +189,63 @@ abstract class PodBuilderSuite extends SparkFunSuite {
   }
 
 }
+
+/**
+ * A test user feature step.
+ */
+class TestStep extends KubernetesFeatureConfigStep {
+  import io.fabric8.kubernetes.api.model._
+
+  override def configurePod(pod: SparkPod): SparkPod = {
+    val localDirVolumes = Seq(new VolumeBuilder().withName("so_long").build())
+    val localDirVolumeMounts = Seq(
+      new VolumeMountBuilder().withName("so_long")
+        .withMountPath("and_thanks_for_all_the_fish")
+        .build()
+    )
+
+    val podWithLocalDirVolumes = new PodBuilder(pod.pod)
+      .editSpec()
+        .addToVolumes(localDirVolumes: _*)
+        .endSpec()
+      .build()
+    val containerWithLocalDirVolumeMounts = new ContainerBuilder(pod.container)
+      .addNewEnv()
+        .withName("CUSTOM_SPARK_LOCAL_DIRS")
+        .withValue("fishyfishyfishy")
+        .endEnv()
+      .addToVolumeMounts(localDirVolumeMounts: _*)
+      .build()
+    SparkPod(podWithLocalDirVolumes, containerWithLocalDirVolumeMounts)
+  }
+}
+
+/**
+ * A test user feature step.
+ */
+class TestStepTwo extends KubernetesFeatureConfigStep {
+  import io.fabric8.kubernetes.api.model._
+
+  override def configurePod(pod: SparkPod): SparkPod = {
+    val localDirVolumes = Seq(new VolumeBuilder().withName("so_long_two").build())
+    val localDirVolumeMounts = Seq(
+      new VolumeMountBuilder().withName("so_long_two")
+        .withMountPath("and_thanks_for_all_the_fish_eh")
+        .build()
+    )
+
+    val podWithLocalDirVolumes = new PodBuilder(pod.pod)
+      .editSpec()
+        .addToVolumes(localDirVolumes: _*)
+        .endSpec()
+      .build()
+    val containerWithLocalDirVolumeMounts = new ContainerBuilder(pod.container)
+      .addNewEnv()
+        .withName("CUSTOM_SPARK_LOCAL_DIRS_TWO")
+        .withValue("fishyfishyfishyTWO")
+        .endEnv()
+      .addToVolumeMounts(localDirVolumeMounts: _*)
+      .build()
+    SparkPod(podWithLocalDirVolumes, containerWithLocalDirVolumeMounts)
+  }
+}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilderSuite.scala
index 6518c91a1a1fd..f9802ff967f82 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilderSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilderSuite.scala
@@ -28,9 +28,12 @@ class KubernetesDriverBuilderSuite extends PodBuilderSuite {
     Config.KUBERNETES_DRIVER_PODTEMPLATE_FILE
   }
 
+  override protected def userFeatureStepsConf: ConfigEntry[_] = {
+    Config.KUBERNETES_DRIVER_POD_FEATURE_STEPS
+  }
+
   override protected def buildPod(sparkConf: SparkConf, client: KubernetesClient): SparkPod = {
     val conf = KubernetesTestConf.createDriverConf(sparkConf = sparkConf)
     new KubernetesDriverBuilder().buildFromFeatures(conf, client).pod
   }
-
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
index c64b733102dc8..ec60c6fc0bf82 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
@@ -29,6 +29,10 @@ class KubernetesExecutorBuilderSuite extends PodBuilderSuite {
     Config.KUBERNETES_EXECUTOR_PODTEMPLATE_FILE
   }
 
+  override protected def userFeatureStepsConf: ConfigEntry[_] = {
+    Config.KUBERNETES_EXECUTOR_POD_FEATURE_STEPS
+  }
+
   override protected def buildPod(sparkConf: SparkConf, client: KubernetesClient): SparkPod = {
     sparkConf.set("spark.driver.host", "https://driver.host.com")
     val conf = KubernetesTestConf.createExecutorConf(sparkConf = sparkConf)

From 412d86e711188ff1bd8a6387524131aa3c200503 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Mon, 14 Dec 2020 13:34:23 -0800
Subject: [PATCH 0763/1009] [SPARK-33771][SQL][TESTS] Fix Invalid value for
 HourOfAmPm when testing on JDK 14

### What changes were proposed in this pull request?

This pr fix invalid value for HourOfAmPm when testing on JDK 14.

### Why are the changes needed?

Run test on JDK 14.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

N/A

Closes #30754 from wangyum/SPARK-33771.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../sql/catalyst/util/TimestampFormatterSuite.scala    | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
index 103b7a2eded28..c65fec29bc6b8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.util
 import java.time.{DateTimeException, Instant, LocalDateTime, LocalTime}
 import java.util.concurrent.TimeUnit
 
+import org.apache.commons.lang3.{JavaVersion, SystemUtils}
 import org.scalatest.matchers.should.Matchers._
 
 import org.apache.spark.SparkUpgradeException
@@ -355,9 +356,14 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite {
       val micros1 = formatter.parse("2009-12-12 00 am")
       assert(micros1 === date(2009, 12, 12))
 
+      // JDK-8223773: DateTimeFormatter Fails to throw an Exception on Invalid HOUR_OF_AMPM
       // For `KK`, "12:00:00 am" is the same as "00:00:00 pm".
-      val micros2 = formatter.parse("2009-12-12 12 am")
-      assert(micros2 === date(2009, 12, 12, 12))
+      if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_13)) {
+        intercept[DateTimeException](formatter.parse("2009-12-12 12 am"))
+      } else {
+        val micros2 = formatter.parse("2009-12-12 12 am")
+        assert(micros2 === date(2009, 12, 12, 12))
+      }
 
       val micros3 = formatter.parse("2009-12-12 00 pm")
       assert(micros3 === date(2009, 12, 12, 12))

From f156718587fc33b9bf8e5abc4ae1f6fa0a5da887 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 14 Dec 2020 14:28:47 -0800
Subject: [PATCH 0764/1009] [SPARK-33777][SQL] Sort output of V2 SHOW
 PARTITIONS

### What changes were proposed in this pull request?
List partitions returned by the V2 `SHOW PARTITIONS` command in alphabetical order.

### Why are the changes needed?
To have the same behavior as:
1. V1 in-memory catalog, see https://github.com/apache/spark/blob/a28ed86a387b286745b30cd4d90b3d558205a5a7/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala#L546
2. V1 Hive catalogs, see https://github.com/apache/spark/blob/fab2995972761503563fa2aa547c67047c51bd33/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala#L715

### Does this PR introduce _any_ user-facing change?
Yes, after the changes, V2 SHOW PARTITIONS sorts its output.

### How was this patch tested?
Added new UT to the base trait `ShowPartitionsSuiteBase` which contains tests for V1 and V2.

Closes #30764 from MaxGekk/sort-show-partitions.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../datasources/v2/ShowPartitionsExec.scala     |  5 +++--
 .../command/ShowPartitionsSuiteBase.scala       | 17 +++++++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala
index c4b6aa805d58f..416dce6fa28c6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala
@@ -49,7 +49,7 @@ case class ShowPartitionsExec(
     val len = schema.length
     val partitions = new Array[String](len)
     val timeZoneId = SQLConf.get.sessionLocalTimeZone
-    partitionIdentifiers.map { row =>
+    val output = partitionIdentifiers.map { row =>
       var i = 0
       while (i < len) {
         val dataType = schema(i).dataType
@@ -59,7 +59,8 @@ case class ShowPartitionsExec(
         partitions(i) = escapePathName(schema(i).name) + "=" + escapePathName(partValueStr)
         i += 1
       }
-      InternalRow(UTF8String.fromString(partitions.mkString("/")))
+      partitions.mkString("/")
     }
+    output.sorted.map(p => InternalRow(UTF8String.fromString(p)))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
index b695decdb3ec9..56c6e5a325745 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
@@ -173,4 +173,21 @@ trait ShowPartitionsSuiteBase extends QueryTest with SQLTestUtils {
       }
     }
   }
+
+  test("SPARK-33777: sorted output") {
+    withNamespace(s"$catalog.ns") {
+      sql(s"CREATE NAMESPACE $catalog.ns")
+      val table = s"$catalog.ns.dateTable"
+      withTable(table) {
+        sql(s"""
+          |CREATE TABLE $table (id int, part string)
+          |$defaultUsing
+          |PARTITIONED BY (part)""".stripMargin)
+        sql(s"ALTER TABLE $table ADD PARTITION(part = 'b')")
+        sql(s"ALTER TABLE $table ADD PARTITION(part = 'a')")
+        val partitions = sql(s"show partitions $table")
+        assert(partitions.first().getString(0) === "part=a")
+      }
+    }
+  }
 }

From 49d3256497cb47d03a3167a550fb9857bd3afdbd Mon Sep 17 00:00:00 2001
From: Chao Sun <sunchao@apple.com>
Date: Mon, 14 Dec 2020 15:18:50 -0800
Subject: [PATCH 0765/1009] [SPARK-33653][SQL] DSv2: REFRESH TABLE should
 recache the table itself

### What changes were proposed in this pull request?

This changes DSv2 refresh table semantics to also recache the target table itself.

### Why are the changes needed?

Currently "REFRESH TABLE" in DSv2 only invalidate all caches referencing the table. With #30403 merged which adds support for caching a DSv2 table, we should also recache the target table itself to make the behavior consistent with DSv1.

### Does this PR introduce _any_ user-facing change?

Yes, now refreshing table in DSv2 also recache the target table itself.
### How was this patch tested?

Added coverage of this new behavior in the existing UT for v2 refresh table command

Closes #30742 from sunchao/SPARK-33653.

Authored-by: Chao Sun <sunchao@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../datasources/v2/DataSourceV2Strategy.scala | 16 +++++++++++++---
 .../datasources/v2/RefreshTableExec.scala     |  1 -
 .../sql/connector/DataSourceV2SQLSuite.scala  | 19 +++++++++++++++++++
 3 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index ea6ac6ca92aa0..1dd9f551ff8c9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.sql.{AnalysisException, SparkSession, Strategy}
+import org.apache.spark.sql.{AnalysisException, Dataset, SparkSession, Strategy}
 import org.apache.spark.sql.catalyst.analysis.{ResolvedNamespace, ResolvedPartitionSpec, ResolvedTable}
 import org.apache.spark.sql.catalyst.expressions.{And, Expression, NamedExpression, PredicateHelper, SubqueryExpression}
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
@@ -56,9 +56,19 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
     session.sharedState.cacheManager.recacheByPlan(session, r)
   }
 
-  private def invalidateCache(r: ResolvedTable)(): Unit = {
+  private def invalidateCache(r: ResolvedTable, recacheTable: Boolean = false)(): Unit = {
     val v2Relation = DataSourceV2Relation.create(r.table, Some(r.catalog), Some(r.identifier))
+    val cache = session.sharedState.cacheManager.lookupCachedData(v2Relation)
     session.sharedState.cacheManager.uncacheQuery(session, v2Relation, cascade = true)
+    if (recacheTable && cache.isDefined) {
+      // save the cache name and cache level for recreation
+      val cacheName = cache.get.cachedRepresentation.cacheBuilder.tableName
+      val cacheLevel = cache.get.cachedRepresentation.cacheBuilder.storageLevel
+
+      // recache with the same name and cache level.
+      val ds = Dataset.ofRows(session, v2Relation)
+      session.sharedState.cacheManager.cacheQuery(ds, cacheName, cacheLevel)
+    }
   }
 
   override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
@@ -137,7 +147,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       }
 
     case RefreshTable(r: ResolvedTable) =>
-      RefreshTableExec(r.catalog, r.identifier, invalidateCache(r)) :: Nil
+      RefreshTableExec(r.catalog, r.identifier, invalidateCache(r, recacheTable = true)) :: Nil
 
     case ReplaceTable(catalog, ident, schema, parts, props, orCreate) =>
       val propsWithOwner = CatalogV2Util.withDefaultOwnership(props)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RefreshTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RefreshTableExec.scala
index 994583c1e338f..e66f0a18a1326 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RefreshTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RefreshTableExec.scala
@@ -29,7 +29,6 @@ case class RefreshTableExec(
     catalog.invalidateTable(ident)
 
     // invalidate all caches referencing the given table
-    // TODO(SPARK-33437): re-cache the table itself once we support caching a DSv2 table
     invalidateCache()
 
     Seq.empty
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 0c65e530f67da..638f06d618833 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -1749,6 +1749,25 @@ class DataSourceV2SQLSuite
     }
   }
 
+  test("SPARK-33653: REFRESH TABLE should recache the target table itself") {
+    val tblName = "testcat.ns.t"
+    withTable(tblName) {
+      sql(s"CREATE TABLE $tblName (id bigint) USING foo")
+
+      // if the table is not cached, refreshing it should not recache it
+      assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(tblName)).isEmpty)
+      sql(s"REFRESH TABLE $tblName")
+      assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(tblName)).isEmpty)
+
+      sql(s"CACHE TABLE $tblName")
+
+      // after caching & refreshing the table should be recached
+      assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(tblName)).isDefined)
+      sql(s"REFRESH TABLE $tblName")
+      assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(tblName)).isDefined)
+    }
+  }
+
   test("REPLACE TABLE: v1 table") {
     val e = intercept[AnalysisException] {
       sql(s"CREATE OR REPLACE TABLE tbl (a int) USING ${classOf[SimpleScanSource].getName}")

From a99a47ca1df689377dbfbf4dd7258f59aee2be44 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Tue, 15 Dec 2020 08:56:45 +0900
Subject: [PATCH 0766/1009] [SPARK-33748][K8S] Respect environment variables
 and configurations for Python executables

### What changes were proposed in this pull request?

This PR proposes:

- Respect `PYSPARK_PYTHON` and `PYSPARK_DRIVER_PYTHON` environment variables, or `spark.pyspark.python` and `spark.pyspark.driver.python` configurations in Kubernates just like other cluster types in Spark.

- Depreate `spark.kubernetes.pyspark.pythonVersion` and guide users to set the environment variables and configurations for Python executables.
    NOTE that `spark.kubernetes.pyspark.pythonVersion` is already a no-op configuration without this PR. Default is `3` and other values are disallowed.

- In order for Python executable settings to be consistently used, fix `spark.archives` option to unpack into the current working directory in the driver of Kubernates' cluster mode. This behaviour is identical with Yarn's cluster mode. By doing this, users can leverage Conda or virtuenenv in cluster mode as below:

   ```python
    conda create -y -n pyspark_conda_env -c conda-forge pyarrow pandas conda-pack
    conda activate pyspark_conda_env
    conda pack -f -o pyspark_conda_env.tar.gz
    PYSPARK_PYTHON=./environment/bin/python spark-submit --archives pyspark_conda_env.tar.gz#environment app.py
   ```

- Removed several unused or useless codes such as `extractS3Key` and `renameResourcesToLocalFS`

### Why are the changes needed?

- To provide a consistent support of PySpark by using `PYSPARK_PYTHON` and `PYSPARK_DRIVER_PYTHON` environment variables, or `spark.pyspark.python` and `spark.pyspark.driver.python` configurations.
- To provide Conda and virtualenv support via `spark.archives` options.

### Does this PR introduce _any_ user-facing change?

Yes:

- `spark.kubernetes.pyspark.pythonVersion` is deprecated.
- `PYSPARK_PYTHON` and `PYSPARK_DRIVER_PYTHON` environment variables, and `spark.pyspark.python` and `spark.pyspark.driver.python` configurations are respected.

### How was this patch tested?

Manually tested via:

```bash
minikube delete
minikube start --cpus 12 --memory 16384
kubectl create namespace spark-integration-test
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: ServiceAccount
metadata:
  name: spark
  namespace: spark-integration-test
EOF
kubectl create clusterrolebinding spark-role --clusterrole=edit --serviceaccount=spark-integration-test:spark --namespace=spark-integration-test
dev/make-distribution.sh --pip --tgz -Pkubernetes
resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh --spark-tgz `pwd`/spark-3.2.0-SNAPSHOT-bin-3.2.0.tgz  --service-account spark --namespace spark-integration-test
```

Unittests were also added.

Closes #30735 from HyukjinKwon/SPARK-33748.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../org/apache/spark/deploy/SparkSubmit.scala | 54 ++++++------
 docs/running-on-kubernetes.md                 |  5 +-
 .../org/apache/spark/deploy/k8s/Config.scala  | 16 +++-
 .../apache/spark/deploy/k8s/Constants.scala   |  3 +-
 .../features/DriverCommandFeatureStep.scala   | 37 ++++++--
 .../DriverCommandFeatureStepSuite.scala       | 57 +++++++++++--
 .../src/main/dockerfiles/spark/entrypoint.sh  | 10 +--
 .../k8s/integrationtest/DepsTestsSuite.scala  | 85 ++++++++++++++-----
 .../k8s/integrationtest/KubernetesSuite.scala |  6 +-
 .../KubernetesTestComponents.scala            |  5 +-
 .../k8s/integrationtest/ProcessUtils.scala    |  5 +-
 .../deploy/k8s/integrationtest/Utils.scala    |  9 +-
 .../tests/py_container_checks.py              |  2 +-
 .../tests/python_executable_check.py          | 40 +++++++++
 14 files changed, 256 insertions(+), 78 deletions(-)
 create mode 100644 resource-managers/kubernetes/integration-tests/tests/python_executable_check.py

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index ea293f03a2169..bb3a20dce2da4 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -31,7 +31,6 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 import scala.util.{Properties, Try}
 
-import org.apache.commons.io.FilenameUtils
 import org.apache.commons.lang3.StringUtils
 import org.apache.hadoop.conf.{Configuration => HadoopConfiguration}
 import org.apache.hadoop.fs.{FileSystem, Path}
@@ -387,20 +386,40 @@ private[spark] class SparkSubmit extends Logging {
         // Replace with the downloaded local jar path to avoid propagating hadoop compatible uris.
         // Executors will get the jars from the Spark file server.
         // Explicitly download the related files here
-        args.jars = renameResourcesToLocalFS(args.jars, localJars)
+        args.jars = localJars
         val filesLocalFiles = Option(args.files).map {
           downloadFileList(_, targetDir, sparkConf, hadoopConf, secMgr)
         }.orNull
-        val archiveLocalFiles = Option(args.archives).map { uri =>
-          val resolvedUri = Utils.resolveURI(uri)
-          val downloadedUri = downloadFileList(
-            UriBuilder.fromUri(resolvedUri).fragment(null).build().toString,
+        val archiveLocalFiles = Option(args.archives).map { uris =>
+          val resolvedUris = Utils.stringToSeq(uris).map(Utils.resolveURI)
+          val localArchives = downloadFileList(
+            resolvedUris.map(
+              UriBuilder.fromUri(_).fragment(null).build().toString).mkString(","),
             targetDir, sparkConf, hadoopConf, secMgr)
-          UriBuilder.fromUri(downloadedUri).fragment(resolvedUri.getFragment).build().toString
+
+          // SPARK-33748: this mimics the behaviour of Yarn cluster mode. If the driver is running
+          // in cluster mode, the archives should be available in the driver's current working
+          // directory too.
+          Utils.stringToSeq(localArchives).map(Utils.resolveURI).zip(resolvedUris).map {
+            case (localArchive, resolvedUri) =>
+              val source = new File(localArchive.getPath)
+              val dest = new File(
+                ".",
+                if (resolvedUri.getFragment != null) resolvedUri.getFragment else source.getName)
+              logInfo(
+                s"Unpacking an archive $resolvedUri " +
+                  s"from ${source.getAbsolutePath} to ${dest.getAbsolutePath}")
+              Utils.deleteRecursively(dest)
+              Utils.unpack(source, dest)
+
+              // Keep the URIs of local files with the given fragments.
+              UriBuilder.fromUri(
+                localArchive).fragment(resolvedUri.getFragment).build().toString
+          }.mkString(",")
         }.orNull
-        args.files = renameResourcesToLocalFS(args.files, filesLocalFiles)
-        args.archives = renameResourcesToLocalFS(args.archives, archiveLocalFiles)
-        args.pyFiles = renameResourcesToLocalFS(args.pyFiles, localPyFiles)
+        args.files = filesLocalFiles
+        args.archives = archiveLocalFiles
+        args.pyFiles = localPyFiles
       }
     }
 
@@ -836,21 +855,6 @@ private[spark] class SparkSubmit extends Logging {
     (childArgs.toSeq, childClasspath.toSeq, sparkConf, childMainClass)
   }
 
-  private def renameResourcesToLocalFS(resources: String, localResources: String): String = {
-    if (resources != null && localResources != null) {
-      val localResourcesSeq = Utils.stringToSeq(localResources)
-      Utils.stringToSeq(resources).map { resource =>
-        val filenameRemote = FilenameUtils.getName(new URI(resource).getPath)
-        localResourcesSeq.find { localUri =>
-          val filenameLocal = FilenameUtils.getName(new URI(localUri).getPath)
-          filenameRemote == filenameLocal
-        }.getOrElse(resource)
-      }.mkString(",")
-    } else {
-      resources
-    }
-  }
-
   // [SPARK-20328]. HadoopRDD calls into a Hadoop library that fetches delegation tokens with
   // renewer set to the YARN ResourceManager.  Since YARN isn't configured in Mesos or Kubernetes
   // mode, we must trick it into thinking we're YARN.
diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index e735c7493486e..93c6f94790abc 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -1087,7 +1087,10 @@ See the [configuration page](configuration.html) for information on Spark config
   <td><code>spark.kubernetes.pyspark.pythonVersion</code></td>
   <td><code>"3"</code></td>
   <td>
-   This sets the major Python version of the docker image used to run the driver and executor containers. Can be 3.
+   This sets the major Python version of the docker image used to run the driver and executor containers.
+   It can be only "3". This configuration was deprecated from Spark 3.1.0, and is effectively no-op.
+   Users should set 'spark.pyspark.python' and 'spark.pyspark.driver.python' configurations or
+   'PYSPARK_PYTHON' and 'PYSPARK_DRIVER_PYTHON' environment variables.
   </td>
   <td>2.4.0</td>
 </tr>
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index 40609aef1e9d8..6939de4697979 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -20,6 +20,7 @@ import java.util.concurrent.TimeUnit
 
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.{PYSPARK_DRIVER_PYTHON, PYSPARK_PYTHON}
 import org.apache.spark.internal.config.ConfigBuilder
 
 private[spark] object Config extends Logging {
@@ -321,12 +322,19 @@ private[spark] object Config extends Logging {
 
   val PYSPARK_MAJOR_PYTHON_VERSION =
     ConfigBuilder("spark.kubernetes.pyspark.pythonVersion")
-      .doc("This sets the major Python version. Only 3 is available for Python3.")
+      .doc(
+        s"(Deprecated since Spark 3.1, please set '${PYSPARK_PYTHON.key}' and " +
+        s"'${PYSPARK_DRIVER_PYTHON.key}' configurations or $ENV_PYSPARK_PYTHON and " +
+        s"$ENV_PYSPARK_DRIVER_PYTHON environment variables instead.)")
       .version("2.4.0")
       .stringConf
-      .checkValue(pv => List("3").contains(pv),
-        "Ensure that major Python version is Python3")
-      .createWithDefault("3")
+      .checkValue("3" == _,
+        "Python 2 was dropped from Spark 3.1, and only 3 is allowed in " +
+          "this configuration. Note that this configuration was deprecated in Spark 3.1. " +
+          s"Please set '${PYSPARK_PYTHON.key}' and '${PYSPARK_DRIVER_PYTHON.key}' " +
+          s"configurations or $ENV_PYSPARK_PYTHON and $ENV_PYSPARK_DRIVER_PYTHON environment " +
+          "variables instead.")
+      .createOptional
 
   val KUBERNETES_KERBEROS_KRB5_FILE =
     ConfigBuilder("spark.kubernetes.kerberos.krb5.path")
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala
index 4014a964ed950..543ca12594763 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala
@@ -74,7 +74,8 @@ private[spark] object Constants {
   val ENV_HADOOP_TOKEN_FILE_LOCATION = "HADOOP_TOKEN_FILE_LOCATION"
 
   // BINDINGS
-  val ENV_PYSPARK_MAJOR_PYTHON_VERSION = "PYSPARK_MAJOR_PYTHON_VERSION"
+  val ENV_PYSPARK_PYTHON = "PYSPARK_PYTHON"
+  val ENV_PYSPARK_DRIVER_PYTHON = "PYSPARK_DRIVER_PYTHON"
 
   // Pod spec templates
   val EXECUTOR_POD_SPEC_TEMPLATE_FILE_NAME = "pod-spec-template.yml"
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala
index d49381ba897d4..8015a1af3e17d 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala
@@ -24,6 +24,8 @@ import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.submit._
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.{PYSPARK_DRIVER_PYTHON, PYSPARK_PYTHON}
 import org.apache.spark.launcher.SparkLauncher
 
 /**
@@ -31,7 +33,7 @@ import org.apache.spark.launcher.SparkLauncher
  * executors can also find the app code.
  */
 private[spark] class DriverCommandFeatureStep(conf: KubernetesDriverConf)
-  extends KubernetesFeatureConfigStep {
+  extends KubernetesFeatureConfigStep with Logging {
 
   override def configurePod(pod: SparkPod): SparkPod = {
     conf.mainAppResource match {
@@ -70,12 +72,37 @@ private[spark] class DriverCommandFeatureStep(conf: KubernetesDriverConf)
     SparkPod(pod.pod, driverContainer)
   }
 
+  // Exposed for testing purpose.
+  private[spark] def environmentVariables: Map[String, String] = sys.env
+
   private def configureForPython(pod: SparkPod, res: String): SparkPod = {
+    if (conf.get(PYSPARK_MAJOR_PYTHON_VERSION).isDefined) {
+      logWarning(
+          s"${PYSPARK_MAJOR_PYTHON_VERSION.key} was deprecated in Spark 3.1. " +
+          s"Please set '${PYSPARK_PYTHON.key}' and '${PYSPARK_DRIVER_PYTHON.key}' " +
+          s"configurations or $ENV_PYSPARK_PYTHON and $ENV_PYSPARK_DRIVER_PYTHON environment " +
+          "variables instead.")
+    }
+
     val pythonEnvs =
-      Seq(new EnvVarBuilder()
-          .withName(ENV_PYSPARK_MAJOR_PYTHON_VERSION)
-          .withValue(conf.get(PYSPARK_MAJOR_PYTHON_VERSION))
-        .build())
+      Seq(
+        conf.get(PYSPARK_PYTHON)
+          .orElse(environmentVariables.get(ENV_PYSPARK_PYTHON)).map { value =>
+          new EnvVarBuilder()
+            .withName(ENV_PYSPARK_PYTHON)
+            .withValue(value)
+            .build()
+        },
+        conf.get(PYSPARK_DRIVER_PYTHON)
+          .orElse(conf.get(PYSPARK_PYTHON))
+          .orElse(environmentVariables.get(ENV_PYSPARK_DRIVER_PYTHON))
+          .orElse(environmentVariables.get(ENV_PYSPARK_PYTHON)).map { value =>
+          new EnvVarBuilder()
+            .withName(ENV_PYSPARK_DRIVER_PYTHON)
+            .withValue(value)
+            .build()
+        }
+      ).flatten
 
     // re-write primary resource to be the remote one and upload the related file
     val newResName = KubernetesUtils
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala
index a44d465e35087..ebbb42f225c51 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStepSuite.scala
@@ -22,6 +22,7 @@ import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.submit._
+import org.apache.spark.internal.config.{PYSPARK_DRIVER_PYTHON, PYSPARK_PYTHON}
 
 class DriverCommandFeatureStepSuite extends SparkFunSuite {
 
@@ -50,12 +51,51 @@ class DriverCommandFeatureStepSuite extends SparkFunSuite {
       "--properties-file", SPARK_CONF_PATH,
       "--class", KubernetesTestConf.MAIN_CLASS,
       mainResource, "5", "7", "9"))
+  }
+
+  test("python executable precedence") {
+    val mainResource = "local:/main.py"
 
-    val envs = spec.pod.container.getEnv.asScala
-      .map { env => (env.getName, env.getValue) }
-      .toMap
-    val expected = Map(ENV_PYSPARK_MAJOR_PYTHON_VERSION -> "3")
-    assert(envs === expected)
+    val pythonExecutables = Seq(
+      (Some("conf_py"), Some("conf_driver_py"), Some("env_py"), Some("env_driver_py")),
+      (Some("conf_py"), None, Some("env_py"), Some("env_driver_py")),
+      (None, None, Some("env_py"), Some("env_driver_py")),
+      (None, None, Some("env_py"), None)
+    )
+
+    val expectedResults = Seq(
+      ("conf_py", "conf_driver_py"),
+      ("conf_py", "conf_py"),
+      ("env_py", "env_driver_py"),
+      ("env_py", "env_py")
+    )
+
+    pythonExecutables.zip(expectedResults).foreach { case (pythonExecutable, expected) =>
+      val sparkConf = new SparkConf(false)
+      val (confPy, confDriverPy, envPy, envDriverPy) = pythonExecutable
+      confPy.foreach(sparkConf.set(PYSPARK_PYTHON, _))
+      confDriverPy.foreach(sparkConf.set(PYSPARK_DRIVER_PYTHON, _))
+      val pythonEnvs = Map(
+        (
+          envPy.map(v => ENV_PYSPARK_PYTHON -> v :: Nil) ++
+          envDriverPy.map(v => ENV_PYSPARK_DRIVER_PYTHON -> v :: Nil)
+        ).flatten.toArray: _*)
+
+      val spec = applyFeatureStep(
+        PythonMainAppResource(mainResource),
+        conf = sparkConf,
+        appArgs = Array("foo"),
+        env = pythonEnvs)
+
+      val envs = spec.pod.container.getEnv.asScala
+        .map { env => (env.getName, env.getValue) }
+        .toMap
+
+      val (expectedEnvPy, expectedDriverPy) = expected
+      assert(envs === Map(
+        ENV_PYSPARK_PYTHON -> expectedEnvPy,
+        ENV_PYSPARK_DRIVER_PYTHON -> expectedDriverPy))
+    }
   }
 
   test("R resource") {
@@ -123,13 +163,16 @@ class DriverCommandFeatureStepSuite extends SparkFunSuite {
       resource: MainAppResource,
       conf: SparkConf = new SparkConf(false),
       appArgs: Array[String] = Array(),
-      proxyUser: Option[String] = None): KubernetesDriverSpec = {
+      proxyUser: Option[String] = None,
+      env: Map[String, String] = Map.empty[String, String]): KubernetesDriverSpec = {
     val kubernetesConf = KubernetesTestConf.createDriverConf(
       sparkConf = conf,
       mainAppResource = resource,
       appArgs = appArgs,
       proxyUser = proxyUser)
-    val step = new DriverCommandFeatureStep(kubernetesConf)
+    val step = new DriverCommandFeatureStep(kubernetesConf) {
+      private[spark] override val environmentVariables: Map[String, String] = env
+    }
     val pod = step.configurePod(SparkPod.initialPod())
     val props = step.getAdditionalPodSystemProperties()
     KubernetesDriverSpec(pod, Nil, props)
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
index c837e00d2e468..f722471906bfb 100755
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
@@ -44,11 +44,11 @@ if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then
   SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH"
 fi
 
-if [ "$PYSPARK_MAJOR_PYTHON_VERSION" == "3" ]; then
-    pyv3="$(python3 -V 2>&1)"
-    export PYTHON_VERSION="${pyv3:7}"
-    export PYSPARK_PYTHON="python3"
-    export PYSPARK_DRIVER_PYTHON="python3"
+if ! [ -z ${PYSPARK_PYTHON+x} ]; then
+    export PYSPARK_PYTHON
+fi
+if ! [ -z ${PYSPARK_DRIVER_PYTHON+x} ]; then
+    export PYSPARK_DRIVER_PYTHON
 fi
 
 # If HADOOP_HOME is set and SPARK_DIST_CLASSPATH is not set, set it here so Hadoop jars are available to the executor.
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
index a15f7ffa134b8..0d15e0325758d 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
@@ -32,6 +32,7 @@ import org.apache.spark.deploy.k8s.integrationtest.DepsTestsSuite.{DEPS_TIMEOUT,
 import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.{INTERVAL, MinikubeTag, TIMEOUT}
 import org.apache.spark.deploy.k8s.integrationtest.Utils.getExamplesJarName
 import org.apache.spark.deploy.k8s.integrationtest.backend.minikube.Minikube
+import org.apache.spark.internal.config.{ARCHIVES, PYSPARK_DRIVER_PYTHON, PYSPARK_PYTHON}
 
 private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite =>
   import KubernetesSuite.k8sTestTag
@@ -135,7 +136,7 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite =>
       .create(minioStatefulSet))
   }
 
- private def deleteMinioStorage(): Unit = {
+  private def deleteMinioStorage(): Unit = {
     kubernetesTestComponents
       .kubernetesClient
       .apps()
@@ -167,7 +168,7 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite =>
     tryDepsTest {
       val fileName = Utils.createTempFile(FILE_CONTENTS, HOST_PATH)
       Utils.createTarGzFile(s"$HOST_PATH/$fileName", s"$HOST_PATH/$fileName.tar.gz")
-      sparkAppConf.set("spark.archives", s"$HOST_PATH/$fileName.tar.gz#test_tar_gz")
+      sparkAppConf.set(ARCHIVES.key, s"$HOST_PATH/$fileName.tar.gz#test_tar_gz")
       val examplesJar = Utils.getTestFileAbsolutePath(getExamplesJarName(), sparkHomeDir)
       runSparkRemoteCheckAndVerifyCompletion(appResource = examplesJar,
         appArgs = Array(s"test_tar_gz/$fileName"),
@@ -175,40 +176,81 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite =>
     }
   }
 
+  test(
+    "SPARK-33748: Launcher python client respecting PYSPARK_PYTHON", k8sTestTag, MinikubeTag) {
+    val fileName = Utils.createTempFile(
+      """
+        |#!/usr/bin/env bash
+        |export IS_CUSTOM_PYTHON=1
+        |python3 "$@"
+      """.stripMargin, HOST_PATH)
+    Utils.createTarGzFile(s"$HOST_PATH/$fileName", s"$HOST_PATH/$fileName.tgz")
+    sparkAppConf.set(ARCHIVES.key, s"$HOST_PATH/$fileName.tgz#test_env")
+    val pySparkFiles = Utils.getTestFileAbsolutePath("python_executable_check.py", sparkHomeDir)
+    testPython(pySparkFiles,
+      Seq(
+        s"PYSPARK_PYTHON: ./test_env/$fileName",
+        s"PYSPARK_DRIVER_PYTHON: ./test_env/$fileName",
+        "Custom Python used on executor: True",
+        "Custom Python used on driver: True"),
+      env = Map("PYSPARK_PYTHON" -> s"./test_env/$fileName"))
+  }
+
+  test(
+    "SPARK-33748: Launcher python client respecting " +
+      s"${PYSPARK_PYTHON.key} and ${PYSPARK_DRIVER_PYTHON.key}", k8sTestTag, MinikubeTag) {
+    val fileName = Utils.createTempFile(
+      """
+        |#!/usr/bin/env bash
+        |export IS_CUSTOM_PYTHON=1
+        |python3 "$@"
+      """.stripMargin, HOST_PATH)
+    Utils.createTarGzFile(s"$HOST_PATH/$fileName", s"$HOST_PATH/$fileName.tgz")
+    sparkAppConf.set(ARCHIVES.key, s"$HOST_PATH/$fileName.tgz#test_env")
+    sparkAppConf.set(PYSPARK_PYTHON.key, s"./test_env/$fileName")
+    sparkAppConf.set(PYSPARK_DRIVER_PYTHON.key, "python3")
+    val pySparkFiles = Utils.getTestFileAbsolutePath("python_executable_check.py", sparkHomeDir)
+    testPython(pySparkFiles,
+      Seq(
+        s"PYSPARK_PYTHON: ./test_env/$fileName",
+        "PYSPARK_DRIVER_PYTHON: python3",
+        "Custom Python used on executor: True",
+        "Custom Python used on driver: False"))
+  }
+
   test("Launcher python client dependencies using a zip file", k8sTestTag, MinikubeTag) {
+    val pySparkFiles = Utils.getTestFileAbsolutePath("pyfiles.py", sparkHomeDir)
     val inDepsFile = Utils.getTestFileAbsolutePath("py_container_checks.py", sparkHomeDir)
     val outDepsFile = s"${inDepsFile.substring(0, inDepsFile.lastIndexOf("."))}.zip"
     Utils.createZipFile(inDepsFile, outDepsFile)
-    testPythonDeps(outDepsFile)
+    testPython(
+      pySparkFiles,
+      Seq(
+        "Python runtime version check is: True",
+        "Python environment version check is: True",
+        "Python runtime version check for executor is: True"),
+      Some(outDepsFile))
   }
 
-  private def testPythonDeps(depsFile: String): Unit = {
-    tryDepsTest({
-      val pySparkFiles = Utils.getTestFileAbsolutePath("pyfiles.py", sparkHomeDir)
+  private def testPython(
+      pySparkFiles: String,
+      expectedDriverLogs: Seq[String],
+      depsFile: Option[String] = None,
+      env: Map[String, String] = Map.empty[String, String]): Unit = {
+    tryDepsTest {
       setPythonSparkConfProperties(sparkAppConf)
       runSparkApplicationAndVerifyCompletion(
         appResource = pySparkFiles,
         mainClass = "",
-        expectedDriverLogOnCompletion = Seq(
-          "Python runtime version check is: True",
-          "Python environment version check is: True",
-          "Python runtime version check for executor is: True"),
+        expectedDriverLogOnCompletion = expectedDriverLogs,
         appArgs = Array("python3"),
         driverPodChecker = doBasicDriverPyPodCheck,
         executorPodChecker = doBasicExecutorPyPodCheck,
         appLocator = appLocator,
         isJVM = false,
-        pyFiles = Option(depsFile)) })
-  }
-
-  private def extractS3Key(data: String, key: String): String = {
-    data.split("\n")
-      .filter(_.contains(key))
-      .head
-      .split(":")
-      .last
-      .trim
-      .replaceAll("[,|\"]", "")
+        pyFiles = depsFile,
+        env = env)
+    }
   }
 
   private def createS3Bucket(accessKey: String, secretKey: String, endPoint: String): Unit = {
@@ -269,7 +311,6 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite =>
 
   private def setPythonSparkConfProperties(conf: SparkAppConf): Unit = {
     sparkAppConf.set("spark.kubernetes.container.image", pyImage)
-      .set("spark.kubernetes.pyspark.pythonVersion", "3")
   }
 
   private def tryDepsTest(runTest: => Unit): Unit = {
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
index 7b2a2d0820238..494c82512adaf 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
@@ -274,7 +274,8 @@ class KubernetesSuite extends SparkFunSuite
       isJVM: Boolean,
       pyFiles: Option[String] = None,
       executorPatience: Option[(Option[Interval], Option[Timeout])] = None,
-      decommissioningTest: Boolean = false): Unit = {
+      decommissioningTest: Boolean = false,
+      env: Map[String, String] = Map.empty[String, String]): Unit = {
 
   // scalastyle:on argcount
     val appArguments = SparkAppArguments(
@@ -370,7 +371,8 @@ class KubernetesSuite extends SparkFunSuite
       TIMEOUT.value.toSeconds.toInt,
       sparkHomeDir,
       isJVM,
-      pyFiles)
+      pyFiles,
+      env)
 
     val driverPod = kubernetesTestComponents.kubernetesClient
       .pods()
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
index 0bf01e6b66427..0392008fff2f5 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
@@ -109,7 +109,8 @@ private[spark] object SparkAppLauncher extends Logging {
       timeoutSecs: Int,
       sparkHomeDir: Path,
       isJVM: Boolean,
-      pyFiles: Option[String] = None): Unit = {
+      pyFiles: Option[String] = None,
+      env: Map[String, String] = Map.empty[String, String]): Unit = {
     val sparkSubmitExecutable = sparkHomeDir.resolve(Paths.get("bin", "spark-submit"))
     logInfo(s"Launching a spark app with arguments $appArguments and conf $appConf")
     val preCommandLine = if (isJVM) {
@@ -130,6 +131,6 @@ private[spark] object SparkAppLauncher extends Logging {
       commandLine ++= appArguments.appArgs
     }
     logInfo(s"Launching a spark app with command line: ${commandLine.mkString(" ")}")
-    ProcessUtils.executeProcess(commandLine.toArray, timeoutSecs)
+    ProcessUtils.executeProcess(commandLine.toArray, timeoutSecs, env = env)
   }
 }
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ProcessUtils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ProcessUtils.scala
index a1ecd48e747ea..cc05990893e36 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ProcessUtils.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ProcessUtils.scala
@@ -19,6 +19,7 @@ package org.apache.spark.deploy.k8s.integrationtest
 import java.nio.charset.StandardCharsets
 import java.util.concurrent.TimeUnit
 
+import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 import scala.io.Source
 
@@ -32,8 +33,10 @@ object ProcessUtils extends Logging {
   def executeProcess(
       fullCommand: Array[String],
       timeout: Long,
-      dumpErrors: Boolean = true): Seq[String] = {
+      dumpErrors: Boolean = true,
+      env: Map[String, String] = Map.empty[String, String]): Seq[String] = {
     val pb = new ProcessBuilder().command(fullCommand: _*)
+    pb.environment().putAll(env.asJava)
     pb.redirectErrorStream(true)
     val proc = pb.start()
     val outputLines = new ArrayBuffer[String]
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
index 519443130008b..cc258533c2c8d 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
@@ -153,6 +153,7 @@ object Utils extends Logging {
   }
 
   def createTarGzFile(inFile: String, outFile: String): Unit = {
+    val oFile = new File(outFile)
     val fileToTarGz = new File(inFile)
     Utils.tryWithResource(
       new FileInputStream(fileToTarGz)
@@ -160,15 +161,19 @@ object Utils extends Logging {
       Utils.tryWithResource(
         new TarArchiveOutputStream(
           new GzipCompressorOutputStream(
-            new FileOutputStream(
-              new File(outFile))))
+            new FileOutputStream(oFile)))
       ) { tOut =>
         val tarEntry = new TarArchiveEntry(fileToTarGz, fileToTarGz.getName)
+        // Each entry does not keep the file permission from the input file.
+        // Setting permissions in the input file do not work. Just simply set
+        // to 777.
+        tarEntry.setMode(0x81ff)
         tOut.putArchiveEntry(tarEntry)
         IOUtils.copy(fis, tOut)
         tOut.closeArchiveEntry()
         tOut.finish()
       }
     }
+    oFile.deleteOnExit()
   }
 }
diff --git a/resource-managers/kubernetes/integration-tests/tests/py_container_checks.py b/resource-managers/kubernetes/integration-tests/tests/py_container_checks.py
index f6b3be2806c82..e6c0137c0405f 100644
--- a/resource-managers/kubernetes/integration-tests/tests/py_container_checks.py
+++ b/resource-managers/kubernetes/integration-tests/tests/py_container_checks.py
@@ -24,7 +24,7 @@ def version_check(python_env, major_python_version):
         These are various tests to test the Python container image.
         This file will be distributed via --py-files in the e2e tests.
     """
-    env_version = os.environ.get('PYSPARK_PYTHON')
+    env_version = os.environ.get('PYSPARK_PYTHON', 'python3')
     print("Python runtime version check is: " +
           str(sys.version_info[0] == major_python_version))
 
diff --git a/resource-managers/kubernetes/integration-tests/tests/python_executable_check.py b/resource-managers/kubernetes/integration-tests/tests/python_executable_check.py
new file mode 100644
index 0000000000000..89fd2aacab1a3
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/tests/python_executable_check.py
@@ -0,0 +1,40 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os
+
+from pyspark.sql import SparkSession
+
+
+if __name__ == "__main__":
+    spark = SparkSession \
+        .builder \
+        .appName("PythonExecutableTest") \
+        .getOrCreate()
+
+    # Check python executable at executors
+    is_custom_python_executor = spark.range(1).rdd.map(
+        lambda _: "IS_CUSTOM_PYTHON" in os.environ).first()
+
+    print("PYSPARK_PYTHON: %s" % os.environ.get("PYSPARK_PYTHON"))
+    print("PYSPARK_DRIVER_PYTHON: %s" % os.environ.get("PYSPARK_DRIVER_PYTHON"))
+
+    print("Custom Python used on executor: %s" % is_custom_python_executor)
+
+    is_custom_python_driver = "IS_CUSTOM_PYTHON" in os.environ
+    print("Custom Python used on driver: %s" % is_custom_python_driver)
+
+    spark.stop()

From 366beda54a2911e59a994bfed9fb84a97aa2ab8b Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Tue, 15 Dec 2020 05:23:39 +0000
Subject: [PATCH 0767/1009] [SPARK-33785][SQL] Migrate ALTER TABLE ... RECOVER
 PARTITIONS to use UnresolvedTable to resolve the identifier

### What changes were proposed in this pull request?

This PR proposes to migrate `ALTER TABLE ... RECOVER PARTITIONS` to use `UnresolvedTable` to resolve the table identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing).

Note that `ALTER TABLE ... RECOVER PARTITIONS` is not supported for v2 tables.

### Why are the changes needed?

The PR makes the resolution consistent behavior consistent. For example,
```scala
sql("CREATE DATABASE test")
sql("CREATE TABLE spark_catalog.test.t (id bigint, val string) USING csv PARTITIONED BY (id)")
sql("CREATE TEMPORARY VIEW t AS SELECT 2")
sql("USE spark_catalog.test")
sql("ALTER TABLE t RECOVER PARTITIONS") // works fine
```
, but after this PR:
```
sql("ALTER TABLE t RECOVER PARTITIONS")
org.apache.spark.sql.AnalysisException: t is a temp view. 'ALTER TABLE ... RECOVER PARTITIONS' expects a table; line 1 pos 0
```
, which is the consistent behavior with other commands.

### Does this PR introduce _any_ user-facing change?

After this PR, `ALTER TABLE t RECOVER PARTITIONS` in the above example is resolved to a temp view `t` first instead of `spark_catalog.test.t`.

### How was this patch tested?

Updated existing tests.

Closes #30773 from imback82/alter_table_recover_part_v2.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/catalyst/parser/AstBuilder.scala  | 7 +++++--
 .../spark/sql/catalyst/plans/logical/statements.scala      | 6 ------
 .../spark/sql/catalyst/plans/logical/v2Commands.scala      | 7 +++++++
 .../apache/spark/sql/catalyst/parser/DDLParserSuite.scala  | 3 ++-
 .../sql/catalyst/analysis/ResolveSessionCatalog.scala      | 5 ++---
 .../execution/datasources/v2/DataSourceV2Strategy.scala    | 4 ++++
 .../sql/connector/AlterTablePartitionV2SQLSuite.scala      | 3 ++-
 .../org/apache/spark/sql/execution/SQLViewSuite.scala      | 4 +++-
 .../org/apache/spark/sql/hive/execution/HiveDDLSuite.scala | 4 +++-
 9 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 1bebf025cc795..0284d5d01ba96 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3705,7 +3705,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
   }
 
   /**
-   * Create an [[AlterTableRecoverPartitionsStatement]]
+   * Create an [[AlterTableRecoverPartitions]]
    *
    * For example:
    * {{{
@@ -3714,7 +3714,10 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    */
   override def visitRecoverPartitions(
       ctx: RecoverPartitionsContext): LogicalPlan = withOrigin(ctx) {
-    AlterTableRecoverPartitionsStatement(visitMultipartIdentifier(ctx.multipartIdentifier))
+    AlterTableRecoverPartitions(
+      UnresolvedTable(
+        visitMultipartIdentifier(ctx.multipartIdentifier),
+        "ALTER TABLE ... RECOVER PARTITIONS"))
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
index a0e11962f9c05..c8395f375b4ed 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
@@ -292,12 +292,6 @@ case class AlterTableSetLocationStatement(
     partitionSpec: Option[TablePartitionSpec],
     location: String) extends ParsedStatement
 
-/**
- * ALTER TABLE ... RECOVER PARTITIONS command, as parsed from SQL.
- */
-case class AlterTableRecoverPartitionsStatement(
-    tableName: Seq[String]) extends ParsedStatement
-
 /**
  * ALTER TABLE ... RENAME PARTITION command, as parsed from SQL.
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index 0f35674055dc4..2091d92eb67c9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -673,6 +673,13 @@ case class AlterTableDropPartition(
   override def children: Seq[LogicalPlan] = child :: Nil
 }
 
+/**
+ * The logical plan of the ALTER TABLE ... RECOVER PARTITIONS command.
+ */
+case class AlterTableRecoverPartitions(child: LogicalPlan) extends Command {
+  override def children: Seq[LogicalPlan] = child :: Nil
+}
+
 /**
  * The logical plan of the LOAD DATA INTO TABLE command.
  */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index e8bbc6b22a819..9862a087dd93f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -2076,7 +2076,8 @@ class DDLParserSuite extends AnalysisTest {
   test("alter table: recover partitions") {
     comparePlans(
       parsePlan("ALTER TABLE a.b.c RECOVER PARTITIONS"),
-      AlterTableRecoverPartitionsStatement(Seq("a", "b", "c")))
+      AlterTableRecoverPartitions(
+        UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... RECOVER PARTITIONS")))
   }
 
   test("alter view: add partition (not supported)") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 83dda7db09ac2..802068de10d16 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -449,10 +449,9 @@ class ResolveSessionCatalog(
       }
       ShowColumnsCommand(db, v1TableName)
 
-    case AlterTableRecoverPartitionsStatement(tbl) =>
-      val v1TableName = parseV1Table(tbl, "ALTER TABLE RECOVER PARTITIONS")
+    case AlterTableRecoverPartitions(ResolvedV1TableIdentifier(ident)) =>
       AlterTableRecoverPartitionsCommand(
-        v1TableName.asTableIdentifier,
+        ident.asTableIdentifier,
         "ALTER TABLE RECOVER PARTITIONS")
 
     case AlterTableAddPartition(ResolvedV1TableIdentifier(ident), partSpecsAndLocs, ifNotExists) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 1dd9f551ff8c9..6020e42b21900 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -330,6 +330,10 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       AlterTableDropPartitionExec(
         table, parts.asResolvedPartitionSpecs, ignoreIfNotExists) :: Nil
 
+    case AlterTableRecoverPartitions(_: ResolvedTable) =>
+      throw new AnalysisException(
+        "ALTER TABLE ... RECOVER PARTITIONS is not supported for v2 tables.")
+
     case LoadData(_: ResolvedTable, _, _, _, _) =>
       throw new AnalysisException("LOAD DATA is not supported for v2 tables.")
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
index 570976965ec7c..cd80867000932 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
@@ -37,7 +37,8 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase {
       val e = intercept[AnalysisException] {
         sql(s"ALTER TABLE $t RECOVER PARTITIONS")
       }
-      assert(e.message.contains("ALTER TABLE RECOVER PARTITIONS is only supported with v1 tables"))
+      assert(e.message.contains(
+        "ALTER TABLE ... RECOVER PARTITIONS is not supported for v2 tables."))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index c60b61a111c3f..1a248fc18988a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -144,7 +144,9 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
       assertNoSuchTable(s"ALTER TABLE $viewName PARTITION (a=1, b=2) SET SERDE 'whatever'")
       assertNoSuchTable(s"ALTER TABLE $viewName SET SERDEPROPERTIES ('p' = 'an')")
       assertNoSuchTable(s"ALTER TABLE $viewName PARTITION (a='4') RENAME TO PARTITION (a='5')")
-      assertNoSuchTable(s"ALTER TABLE $viewName RECOVER PARTITIONS")
+      assertAnalysisError(
+        s"ALTER TABLE $viewName RECOVER PARTITIONS",
+        s"$viewName is a temp view. 'ALTER TABLE ... RECOVER PARTITIONS' expects a table")
 
       // For v2 ALTER TABLE statements, we have better error message saying view is not supported.
       assertAnalysisError(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index b686d040b9644..488b52aa7bd45 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -899,7 +899,9 @@ class HiveDDLSuite
         assertErrorForAlterTableOnView(
           s"ALTER TABLE $oldViewName PARTITION (a=1, b=2) SET SERDEPROPERTIES ('x' = 'y')")
 
-        assertErrorForAlterTableOnView(s"ALTER TABLE $oldViewName RECOVER PARTITIONS")
+        assertAnalysisError(
+          s"ALTER TABLE $oldViewName RECOVER PARTITIONS",
+          s"$oldViewName is a view. 'ALTER TABLE ... RECOVER PARTITIONS' expects a table.")
 
         assertErrorForAlterTableOnView(
           s"ALTER TABLE $oldViewName PARTITION (a='1') RENAME TO PARTITION (a='100')")

From 141e26d65ba92c96ce1aeaf4d93dc0bfbafda902 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Tue, 15 Dec 2020 05:36:57 +0000
Subject: [PATCH 0768/1009] [SPARK-33767][SQL][TESTS] Unify v1 and v2 ALTER
 TABLE .. DROP PARTITION tests

### What changes were proposed in this pull request?
1. Move the `ALTER TABLE .. DROP PARTITION` parsing tests to `AlterTableDropPartitionParserSuite`
2. Place v1 tests for `ALTER TABLE .. DROP PARTITION` from `DDLSuite` and v2 tests from `AlterTablePartitionV2SQLSuite` to the common trait `AlterTableDropPartitionSuiteBase`, so, the tests will run for V1, Hive V1 and V2 DS.

### Why are the changes needed?
- The unification will allow to run common `ALTER TABLE .. DROP PARTITION` tests for both DSv1 and Hive DSv1, DSv2
- We can detect missing features and differences between DSv1 and DSv2 implementations.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running new test suites:
```
$ build/sbt -Phive -Phive-thriftserver "test:testOnly *AlterTableDropPartitionParserSuite"
$ build/sbt -Phive -Phive-thriftserver "test:testOnly *AlterTableDropPartitionSuite"
```

Closes #30747 from MaxGekk/unify-alter-table-drop-partition-tests.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/parser/DDLParserSuite.scala  |  50 +-----
 .../AlterTablePartitionV2SQLSuite.scala       | 112 -------------
 .../AlterTableDropPartitionParserSuite.scala  |  88 +++++++++++
 .../AlterTableDropPartitionSuiteBase.scala    | 149 ++++++++++++++++++
 .../sql/execution/command/DDLSuite.scala      |  57 -------
 .../v1/AlterTableDropPartitionSuite.scala     |  52 ++++++
 .../v2/AlterTableDropPartitionSuite.scala     |  66 ++++++++
 .../sql/hive/execution/HiveDDLSuite.scala     |   4 -
 .../AlterTableDropPartitionSuite.scala        |  48 ++++++
 9 files changed, 404 insertions(+), 222 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionParserSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 9862a087dd93f..2b3fc6f71a5c0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.parser
 import java.util.Locale
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedNamespace, UnresolvedPartitionSpec, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView, UnresolvedView}
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedNamespace, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView, UnresolvedView}
 import org.apache.spark.sql.catalyst.catalog.{ArchiveResource, BucketSpec, FileResource, FunctionResource, JarResource}
 import org.apache.spark.sql.catalyst.expressions.{EqualTo, Literal}
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -2115,54 +2115,6 @@ class DDLParserSuite extends AnalysisTest {
     comparePlans(parsed2, expected2)
   }
 
-  // ALTER TABLE table_name DROP [IF EXISTS] PARTITION spec1[, PARTITION spec2, ...]
-  // ALTER VIEW table_name DROP [IF EXISTS] PARTITION spec1[, PARTITION spec2, ...]
-  test("alter table: drop partition") {
-    val sql1_table =
-      """
-        |ALTER TABLE table_name DROP IF EXISTS PARTITION
-        |(dt='2008-08-08', country='us'), PARTITION (dt='2009-09-09', country='uk')
-      """.stripMargin
-    val sql2_table =
-      """
-        |ALTER TABLE table_name DROP PARTITION
-        |(dt='2008-08-08', country='us'), PARTITION (dt='2009-09-09', country='uk')
-      """.stripMargin
-    val sql1_view = sql1_table.replace("TABLE", "VIEW")
-    val sql2_view = sql2_table.replace("TABLE", "VIEW")
-
-    val parsed1_table = parsePlan(sql1_table)
-    val parsed2_table = parsePlan(sql2_table)
-    val parsed1_purge = parsePlan(sql1_table + " PURGE")
-
-    assertUnsupported(sql1_view)
-    assertUnsupported(sql2_view)
-
-    val expected1_table = AlterTableDropPartition(
-      UnresolvedTable(Seq("table_name"), "ALTER TABLE ... DROP PARTITION ..."),
-      Seq(
-        UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us")),
-        UnresolvedPartitionSpec(Map("dt" -> "2009-09-09", "country" -> "uk"))),
-      ifExists = true,
-      purge = false)
-    val expected2_table = expected1_table.copy(ifExists = false)
-    val expected1_purge = expected1_table.copy(purge = true)
-
-    comparePlans(parsed1_table, expected1_table)
-    comparePlans(parsed2_table, expected2_table)
-    comparePlans(parsed1_purge, expected1_purge)
-
-    val sql3_table = "ALTER TABLE a.b.c DROP IF EXISTS PARTITION (ds='2017-06-10')"
-    val expected3_table = AlterTableDropPartition(
-      UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... DROP PARTITION ..."),
-      Seq(UnresolvedPartitionSpec(Map("ds" -> "2017-06-10"))),
-      ifExists = true,
-      purge = false)
-
-    val parsed3_table = parsePlan(sql3_table)
-    comparePlans(parsed3_table, expected3_table)
-  }
-
   test("show current namespace") {
     comparePlans(
       parsePlan("SHOW CURRENT NAMESPACE"),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
index cd80867000932..ac4d055eb0e60 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
@@ -18,18 +18,8 @@
 package org.apache.spark.sql.connector
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionsException
-import org.apache.spark.sql.connector.catalog.{CatalogV2Implicits, Identifier}
-import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits
-import org.apache.spark.sql.internal.SQLConf
 
 class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase {
-
-  import CatalogV2Implicits._
-  import DataSourceV2Implicits._
-
-
   test("ALTER TABLE RECOVER PARTITIONS") {
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
@@ -52,106 +42,4 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase {
       assert(e.message.contains("ALTER TABLE RENAME PARTITION is only supported with v1 tables"))
     }
   }
-
-  test("ALTER TABLE DROP PARTITION") {
-    val t = "testpart.ns1.ns2.tbl"
-    withTable(t) {
-      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
-      spark.sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'")
-      spark.sql(s"ALTER TABLE $t DROP PARTITION (id=1)")
-
-      val partTable =
-        catalog("testpart").asTableCatalog.loadTable(Identifier.of(Array("ns1", "ns2"), "tbl"))
-      assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(1))))
-    }
-  }
-
-  test("ALTER TABLE DROP PARTITIONS") {
-    val t = "testpart.ns1.ns2.tbl"
-    withTable(t) {
-      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
-      spark.sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (id=1) LOCATION 'loc'" +
-        " PARTITION (id=2) LOCATION 'loc1'")
-      spark.sql(s"ALTER TABLE $t DROP PARTITION (id=1), PARTITION (id=2)")
-
-      val partTable =
-        catalog("testpart").asTableCatalog.loadTable(Identifier.of(Array("ns1", "ns2"), "tbl"))
-      assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(1))))
-      assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(2))))
-      assert(
-        partTable.asPartitionable.listPartitionIdentifiers(Array.empty, InternalRow.empty).isEmpty)
-    }
-  }
-
-  test("ALTER TABLE DROP PARTITIONS: partition not exists") {
-    val t = "testpart.ns1.ns2.tbl"
-    withTable(t) {
-      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
-      spark.sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'")
-
-      assertThrows[NoSuchPartitionsException](
-        spark.sql(s"ALTER TABLE $t DROP PARTITION (id=1), PARTITION (id=2)"))
-
-      val partTable =
-        catalog("testpart").asTableCatalog.loadTable(Identifier.of(Array("ns1", "ns2"), "tbl"))
-      assert(partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(1))))
-
-      spark.sql(s"ALTER TABLE $t DROP IF EXISTS PARTITION (id=1), PARTITION (id=2)")
-      assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(1))))
-      assert(!partTable.asPartitionable.partitionExists(InternalRow.fromSeq(Seq(2))))
-      assert(
-        partTable.asPartitionable.listPartitionIdentifiers(Array.empty, InternalRow.empty).isEmpty)
-    }
-  }
-
-  test("case sensitivity in resolving partition specs") {
-    val t = "testpart.ns1.ns2.tbl"
-    withTable(t) {
-      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
-      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
-        val errMsg = intercept[AnalysisException] {
-          spark.sql(s"ALTER TABLE $t DROP PARTITION (ID=1)")
-        }.getMessage
-        assert(errMsg.contains(s"ID is not a valid partition column in table $t"))
-      }
-
-      val partTable = catalog("testpart").asTableCatalog
-        .loadTable(Identifier.of(Array("ns1", "ns2"), "tbl"))
-        .asPartitionable
-      assert(!partTable.partitionExists(InternalRow.fromSeq(Seq(1))))
-
-      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
-        spark.sql(s"ALTER TABLE $t ADD PARTITION (ID=1) LOCATION 'loc1'")
-        assert(partTable.partitionExists(InternalRow.fromSeq(Seq(1))))
-        spark.sql(s"ALTER TABLE $t DROP PARTITION (Id=1)")
-        assert(!partTable.partitionExists(InternalRow.fromSeq(Seq(1))))
-      }
-    }
-  }
-
-  test("SPARK-33650: drop partition into a table which doesn't support partition management") {
-    val t = "testcat.ns1.ns2.tbl"
-    withTable(t) {
-      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING _")
-      val errMsg = intercept[AnalysisException] {
-        spark.sql(s"ALTER TABLE $t DROP PARTITION (id=1)")
-      }.getMessage
-      assert(errMsg.contains(s"Table $t can not alter partitions"))
-    }
-  }
-
-  test("SPARK-33676: not fully specified partition spec") {
-    val t = "testpart.ns1.ns2.tbl"
-    withTable(t) {
-      sql(s"""
-        |CREATE TABLE $t (id bigint, part0 int, part1 string)
-        |USING foo
-        |PARTITIONED BY (part0, part1)""".stripMargin)
-      val errMsg = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $t DROP PARTITION (part0 = 1)")
-      }.getMessage
-      assert(errMsg.contains("Partition spec is invalid. " +
-        "The spec (part0) must match the partition spec (part0, part1)"))
-    }
-  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionParserSuite.scala
new file mode 100644
index 0000000000000..53edd5854f289
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionParserSuite.scala
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedPartitionSpec, UnresolvedTable}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
+import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.catalyst.plans.logical.AlterTableDropPartition
+import org.apache.spark.sql.test.SharedSparkSession
+
+class AlterTableDropPartitionParserSuite extends AnalysisTest with SharedSparkSession {
+  test("drop partition") {
+    val sql = """
+      |ALTER TABLE table_name DROP PARTITION
+      |(dt='2008-08-08', country='us'), PARTITION (dt='2009-09-09', country='uk')
+      """.stripMargin
+    val expected = AlterTableDropPartition(
+      UnresolvedTable(Seq("table_name"), "ALTER TABLE ... DROP PARTITION ..."),
+      Seq(
+        UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us")),
+        UnresolvedPartitionSpec(Map("dt" -> "2009-09-09", "country" -> "uk"))),
+      ifExists = false,
+      purge = false)
+
+    comparePlans(parsePlan(sql), expected)
+  }
+
+  test("drop partition if exists") {
+    val sql = """
+      |ALTER TABLE table_name DROP IF EXISTS
+      |PARTITION (dt='2008-08-08', country='us'),
+      |PARTITION (dt='2009-09-09', country='uk')
+      """.stripMargin
+    val expected = AlterTableDropPartition(
+      UnresolvedTable(Seq("table_name"), "ALTER TABLE ... DROP PARTITION ..."),
+      Seq(
+        UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us")),
+        UnresolvedPartitionSpec(Map("dt" -> "2009-09-09", "country" -> "uk"))),
+      ifExists = true,
+      purge = false)
+    comparePlans(parsePlan(sql), expected)
+  }
+
+  test("drop partition in a table with multi-part identifier") {
+    val sql = "ALTER TABLE a.b.c DROP IF EXISTS PARTITION (ds='2017-06-10')"
+    val expected = AlterTableDropPartition(
+      UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... DROP PARTITION ..."),
+      Seq(UnresolvedPartitionSpec(Map("ds" -> "2017-06-10"))),
+      ifExists = true,
+      purge = false)
+
+    comparePlans(parsePlan(sql), expected)
+  }
+
+  test("drop partition with PURGE") {
+    val sql = "ALTER TABLE table_name DROP PARTITION (p=1) PURGE"
+    val expected = AlterTableDropPartition(
+      UnresolvedTable(Seq("table_name"), "ALTER TABLE ... DROP PARTITION ..."),
+      Seq(UnresolvedPartitionSpec(Map("p" -> "1"))),
+      ifExists = false,
+      purge = true)
+
+    comparePlans(parsePlan(sql), expected)
+  }
+
+  test("drop partition from view") {
+    val sql = "ALTER VIEW table_name DROP PARTITION (p=1)"
+    val errMsg = intercept[ParseException] {
+      parsePlan(sql)
+    }.getMessage
+    assert(errMsg.contains("Operation not allowed"))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
new file mode 100644
index 0000000000000..ed479e2824fb7
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.scalactic.source.Position
+import org.scalatest.Tag
+
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.execution.datasources.PartitioningUtils
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SQLTestUtils
+
+trait AlterTableDropPartitionSuiteBase  extends QueryTest with SQLTestUtils {
+  protected def version: String
+  protected def catalog: String
+  protected def defaultUsing: String
+
+  protected def notFullPartitionSpecErr: String
+
+  override def test(testName: String, testTags: Tag*)(testFun: => Any)
+    (implicit pos: Position): Unit = {
+    super.test(s"ALTER TABLE .. DROP PARTITION $version: " + testName, testTags: _*)(testFun)
+  }
+
+  protected def withNsTable(ns: String, tableName: String, cat: String = catalog)
+    (f: String => Unit): Unit = {
+    val nsCat = s"$cat.$ns"
+    withNamespace(nsCat) {
+      sql(s"CREATE NAMESPACE $nsCat")
+      val t = s"$nsCat.$tableName"
+      withTable(t) {
+        f(t)
+      }
+    }
+  }
+
+  protected def checkPartitions(t: String, expected: Map[String, String]*): Unit = {
+    val partitions = sql(s"SHOW PARTITIONS $t")
+      .collect()
+      .toSet
+      .map((row: Row) => row.getString(0))
+      .map(PartitioningUtils.parsePathFragment)
+    assert(partitions === expected.toSet)
+  }
+
+  protected def checkDropPartition(
+      t: String,
+      ifExists: String,
+      specs: Map[String, Any]*): Unit = {
+    checkPartitions(t, specs.map(_.mapValues(_.toString).toMap): _*)
+    val specStr = specs.map(
+      _.map {
+        case (k, v: String) => s"$k = '$v'"
+        case (k, v) => s"$k = $v"
+      }.mkString("PARTITION (", ", ", ")"))
+      .mkString(", ")
+    sql(s"ALTER TABLE $t DROP $ifExists $specStr")
+    checkPartitions(t)
+  }
+
+  test("single partition") {
+    withNsTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
+      Seq("", "IF EXISTS").foreach { ifExists =>
+        sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'")
+        checkDropPartition(t, ifExists, Map("id" -> 1))
+      }
+    }
+  }
+
+  test("multiple partitions") {
+    withNsTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
+      Seq("", "IF EXISTS").foreach { ifExists =>
+        sql(s"""
+          |ALTER TABLE $t ADD
+          |PARTITION (id=1) LOCATION 'loc'
+          |PARTITION (id=2) LOCATION 'loc1'""".stripMargin)
+        checkDropPartition(t, ifExists, Map("id" -> 1), Map("id" -> 2))
+      }
+    }
+  }
+
+  test("multi-part partition") {
+    withNsTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (id bigint, a int, b string) $defaultUsing PARTITIONED BY (a, b)")
+      Seq("", "IF EXISTS").foreach { ifExists =>
+        sql(s"ALTER TABLE $t ADD PARTITION (a = 2, b = 'abc')")
+        checkDropPartition(t, ifExists, Map("a" -> 2, "b" -> "abc"))
+      }
+    }
+  }
+
+  test("table to alter does not exist") {
+    withNsTable("ns", "does_not_exist") { t =>
+      val errMsg = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $t DROP PARTITION (a='4', b='9')")
+      }.getMessage
+      assert(errMsg.contains("Table not found"))
+    }
+  }
+
+  test("case sensitivity in resolving partition specs") {
+    withNsTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+        val errMsg = intercept[AnalysisException] {
+          sql(s"ALTER TABLE $t DROP PARTITION (ID=1)")
+        }.getMessage
+        assert(errMsg.contains("ID is not a valid partition column"))
+      }
+
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+        Seq("", "IF EXISTS").foreach { ifExists =>
+          sql(s"ALTER TABLE $t ADD PARTITION (ID=1) LOCATION 'loc1'")
+          checkDropPartition(t, ifExists, Map("id" -> 1))
+        }
+      }
+    }
+  }
+
+  test("SPARK-33676: not fully specified partition spec") {
+    withNsTable("ns", "tbl") { t =>
+      sql(s"""
+        |CREATE TABLE $t (id bigint, part0 int, part1 string)
+        |$defaultUsing
+        |PARTITIONED BY (part0, part1)""".stripMargin)
+      val errMsg = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $t DROP PARTITION (part0 = 1)")
+      }.getMessage
+      assert(errMsg.contains(notFullPartitionSpecErr))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 05e0f4f4a538c..d6474ae7d5f00 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -334,10 +334,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     testChangeColumn(isDatasourceTable = true)
   }
 
-  test("alter table: drop partition (datasource table)") {
-    testDropPartitions(isDatasourceTable = true)
-  }
-
   test("alter table: rename partition (datasource table)") {
     testRenamePartitions(isDatasourceTable = true)
   }
@@ -1617,59 +1613,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     }
   }
 
-  protected def testDropPartitions(isDatasourceTable: Boolean): Unit = {
-    if (!isUsingHiveMetastore) {
-      assert(isDatasourceTable, "InMemoryCatalog only supports data source tables")
-    }
-    val catalog = spark.sessionState.catalog
-    val tableIdent = TableIdentifier("tab1", Some("dbx"))
-    val part1 = Map("a" -> "1", "b" -> "5")
-    val part2 = Map("a" -> "2", "b" -> "6")
-    val part3 = Map("a" -> "3", "b" -> "7")
-    val part4 = Map("a" -> "4", "b" -> "8")
-    val part5 = Map("a" -> "9", "b" -> "9")
-    createDatabase(catalog, "dbx")
-    createTable(catalog, tableIdent, isDatasourceTable)
-    createTablePartition(catalog, part1, tableIdent)
-    createTablePartition(catalog, part2, tableIdent)
-    createTablePartition(catalog, part3, tableIdent)
-    createTablePartition(catalog, part4, tableIdent)
-    createTablePartition(catalog, part5, tableIdent)
-    assert(catalog.listPartitions(tableIdent).map(_.spec).toSet ==
-      Set(part1, part2, part3, part4, part5))
-
-    // basic drop partition
-    sql("ALTER TABLE dbx.tab1 DROP IF EXISTS PARTITION (a='4', b='8'), PARTITION (a='3', b='7')")
-    assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == Set(part1, part2, part5))
-
-    // drop partitions without explicitly specifying database
-    catalog.setCurrentDatabase("dbx")
-    sql("ALTER TABLE tab1 DROP IF EXISTS PARTITION (a='2', b ='6')")
-    assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == Set(part1, part5))
-
-    // table to alter does not exist
-    intercept[AnalysisException] {
-      sql("ALTER TABLE does_not_exist DROP IF EXISTS PARTITION (a='2')")
-    }
-
-    // partition to drop does not exist
-    intercept[AnalysisException] {
-      sql("ALTER TABLE tab1 DROP PARTITION (a='300')")
-    }
-
-    // partition to drop does not exist when using IF EXISTS
-    sql("ALTER TABLE tab1 DROP IF EXISTS PARTITION (a='300')")
-    assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == Set(part1, part5))
-
-    // partition spec in DROP PARTITION should be case insensitive by default
-    sql("ALTER TABLE tab1 DROP PARTITION (A='1', B='5')")
-    assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == Set(part5))
-
-    // use int literal as partition value for int type partition column
-    sql("ALTER TABLE tab1 DROP PARTITION (a=9, b=9)")
-    assert(catalog.listPartitions(tableIdent).isEmpty)
-  }
-
   protected def testRenamePartitions(isDatasourceTable: Boolean): Unit = {
     if (!isUsingHiveMetastore) {
       assert(isDatasourceTable, "InMemoryCatalog only supports data source tables")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
new file mode 100644
index 0000000000000..5ad182bc689b9
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionsException
+import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.execution.command
+import org.apache.spark.sql.test.SharedSparkSession
+
+trait AlterTableDropPartitionSuiteBase extends command.AlterTableDropPartitionSuiteBase {
+  override def version: String = "V1"
+  override def catalog: String = CatalogManager.SESSION_CATALOG_NAME
+  override def defaultUsing: String = "USING parquet"
+
+  override protected val notFullPartitionSpecErr = "The following partitions not found in table"
+}
+
+class AlterTableDropPartitionSuite
+  extends AlterTableDropPartitionSuiteBase
+  with SharedSparkSession {
+
+  test("partition not exists") {
+    withNsTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
+      sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'")
+
+      val errMsg = intercept[NoSuchPartitionsException] {
+        sql(s"ALTER TABLE $t DROP PARTITION (id=1), PARTITION (id=2)")
+      }.getMessage
+      assert(errMsg.contains("partitions not found in table"))
+
+      checkPartitions(t, Map("id" -> "1"))
+      sql(s"ALTER TABLE $t DROP IF EXISTS PARTITION (id=1), PARTITION (id=2)")
+      checkPartitions(t)
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala
new file mode 100644
index 0000000000000..608e7d7c98f6f
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionsException
+import org.apache.spark.sql.connector.{InMemoryPartitionTableCatalog, InMemoryTableCatalog}
+import org.apache.spark.sql.execution.command
+import org.apache.spark.sql.test.SharedSparkSession
+
+class AlterTableDropPartitionSuite
+  extends command.AlterTableDropPartitionSuiteBase
+  with SharedSparkSession {
+
+  override def version: String = "V2"
+  override def catalog: String = "test_catalog"
+  override def defaultUsing: String = "USING _"
+
+  override protected val notFullPartitionSpecErr = "Partition spec is invalid"
+
+  override def sparkConf: SparkConf = super.sparkConf
+    .set(s"spark.sql.catalog.$catalog", classOf[InMemoryPartitionTableCatalog].getName)
+    .set(s"spark.sql.catalog.non_part_$catalog", classOf[InMemoryTableCatalog].getName)
+
+  test("partition not exists") {
+    withNsTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
+      sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'")
+
+      val errMsg = intercept[NoSuchPartitionsException] {
+        sql(s"ALTER TABLE $t DROP PARTITION (id=1), PARTITION (id=2)")
+      }.getMessage
+      assert(errMsg.contains("partitions not found in table"))
+
+      checkPartitions(t, Map("id" -> "1"))
+      sql(s"ALTER TABLE $t DROP IF EXISTS PARTITION (id=1), PARTITION (id=2)")
+      checkPartitions(t)
+    }
+  }
+
+  test("SPARK-33650: drop partition into a table which doesn't support partition management") {
+    withNsTable("ns", "tbl", s"non_part_$catalog") { t =>
+      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing")
+      val errMsg = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $t DROP PARTITION (id=1)")
+      }.getMessage
+      assert(errMsg.contains("can not alter partitions"))
+    }
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 488b52aa7bd45..f8a5c7f57eec5 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -163,10 +163,6 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA
     testRenamePartitions(isDatasourceTable = false)
   }
 
-  test("alter table: drop partition") {
-    testDropPartitions(isDatasourceTable = false)
-  }
-
   test("drop table") {
     testDropTable(isDatasourceTable = false)
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala
new file mode 100644
index 0000000000000..fe26466cdad62
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution.command
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.execution.command.v1
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+
+class AlterTableDropPartitionSuite
+  extends v1.AlterTableDropPartitionSuiteBase
+  with TestHiveSingleton {
+
+  override def version: String = "Hive V1"
+  override def defaultUsing: String = "USING HIVE"
+
+  override protected val notFullPartitionSpecErr = "No partition is dropped"
+
+  test("partition not exists") {
+    withNsTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
+      sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'")
+
+      val errMsg = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $t DROP PARTITION (id=1), PARTITION (id=2)")
+      }.getMessage
+      assert(errMsg.contains("No partition is dropped"))
+
+      checkPartitions(t, Map("id" -> "1"))
+      sql(s"ALTER TABLE $t DROP IF EXISTS PARTITION (id=1), PARTITION (id=2)")
+      checkPartitions(t)
+    }
+  }
+}

From 03042529e3c7bfd03185e5d751086173766926c3 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 15 Dec 2020 18:29:28 +0900
Subject: [PATCH 0769/1009] [SPARK-33273][SQL] Fix a race condition in subquery
 execution

### What changes were proposed in this pull request?

If we call `SubqueryExec.executeTake`, it will call `SubqueryExec.execute` which will trigger the codegen of the query plan and create an RDD. However, `SubqueryExec` already has a thread (`SubqueryExec.relationFuture`) to execute the query plan, which means we have 2 threads triggering codegen of the same query plan at the same time.

Spark codegen is not thread-safe, as we have places like `HashAggregateExec.bufferVars` that is a shared variable. The bug in `SubqueryExec` may lead to correctness bugs.

Since https://issues.apache.org/jira/browse/SPARK-33119, `ScalarSubquery` will call `SubqueryExec.executeTake`, so flaky tests start to appear.

This PR fixes the bug by reimplementing https://github.com/apache/spark/pull/30016 . We should pass the number of rows we want to collect to `SubqueryExec` at planning time, so that we can use `executeTake` inside `SubqueryExec.relationFuture`, and the caller side should always call `SubqueryExec.executeCollect`. This PR also adds checks so that we can make sure only `SubqueryExec.executeCollect` is called.

### Why are the changes needed?

fix correctness bug.

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

run `build/sbt "sql/testOnly *SQLQueryTestSuite  -- -z scalar-subquery-select"` more than 10 times. Previously it fails, now it passes.

Closes #30765 from cloud-fan/bug.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../adaptive/InsertAdaptiveSparkPlan.scala    |  3 +-
 .../execution/basicPhysicalOperators.scala    | 35 +++++++++++++++----
 .../apache/spark/sql/execution/subquery.scala |  6 ++--
 3 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala
index f8478f860b2d5..cd0503fb8a147 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala
@@ -120,7 +120,8 @@ case class InsertAdaptiveSparkPlan(
           if !subqueryMap.contains(exprId.id) =>
         val executedPlan = compileSubquery(p)
         verifyAdaptivePlan(executedPlan, p)
-        val subquery = SubqueryExec(s"subquery#${exprId.id}", executedPlan)
+        val subquery = SubqueryExec.createForScalarSubquery(
+          s"subquery#${exprId.id}", executedPlan)
         subqueryMap.put(exprId.id, subquery)
       case expressions.InSubquery(_, ListQuery(query, _, exprId, _))
           if !subqueryMap.contains(exprId.id) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index 80a4090ce03f3..fcf77e588fc60 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -765,7 +765,7 @@ abstract class BaseSubqueryExec extends SparkPlan {
 /**
  * Physical plan for a subquery.
  */
-case class SubqueryExec(name: String, child: SparkPlan)
+case class SubqueryExec(name: String, child: SparkPlan, maxNumRows: Option[Int] = None)
   extends BaseSubqueryExec with UnaryExecNode {
 
   override lazy val metrics = Map(
@@ -784,7 +784,11 @@ case class SubqueryExec(name: String, child: SparkPlan)
       SQLExecution.withExecutionId(sqlContext.sparkSession, executionId) {
         val beforeCollect = System.nanoTime()
         // Note that we use .executeCollect() because we don't want to convert data to Scala types
-        val rows: Array[InternalRow] = child.executeCollect()
+        val rows: Array[InternalRow] = if (maxNumRows.isDefined) {
+          child.executeTake(maxNumRows.get)
+        } else {
+          child.executeCollect()
+        }
         val beforeBuild = System.nanoTime()
         longMetric("collectTime") += NANOSECONDS.toMillis(beforeBuild - beforeCollect)
         val dataSize = rows.map(_.asInstanceOf[UnsafeRow].getSizeInBytes.toLong).sum
@@ -797,28 +801,45 @@ case class SubqueryExec(name: String, child: SparkPlan)
   }
 
   protected override def doCanonicalize(): SparkPlan = {
-    SubqueryExec("Subquery", child.canonicalized)
+    SubqueryExec("Subquery", child.canonicalized, maxNumRows)
   }
 
   protected override def doPrepare(): Unit = {
     relationFuture
   }
 
+  // `SubqueryExec` should only be used by calling `executeCollect`. It launches a new thread to
+  // collect the result of `child`. We should not trigger codegen of `child` again in other threads,
+  // as generating code is not thread-safe.
+  override def executeCollect(): Array[InternalRow] = {
+    ThreadUtils.awaitResult(relationFuture, Duration.Inf)
+  }
+
   protected override def doExecute(): RDD[InternalRow] = {
-    child.execute()
+    throw new IllegalStateException("SubqueryExec.doExecute should never be called")
   }
 
-  override def executeCollect(): Array[InternalRow] = {
-    ThreadUtils.awaitResult(relationFuture, Duration.Inf)
+  override def executeTake(n: Int): Array[InternalRow] = {
+    throw new IllegalStateException("SubqueryExec.executeTake should never be called")
+  }
+
+  override def executeTail(n: Int): Array[InternalRow] = {
+    throw new IllegalStateException("SubqueryExec.executeTail should never be called")
   }
 
-  override def stringArgs: Iterator[Any] = super.stringArgs ++ Iterator(s"[id=#$id]")
+  override def stringArgs: Iterator[Any] = Iterator(name, child) ++ Iterator(s"[id=#$id]")
 }
 
 object SubqueryExec {
   private[execution] val executionContext = ExecutionContext.fromExecutorService(
     ThreadUtils.newDaemonCachedThreadPool("subquery",
       SQLConf.get.getConf(StaticSQLConf.SUBQUERY_MAX_THREAD_THRESHOLD)))
+
+  def createForScalarSubquery(name: String, child: SparkPlan): SubqueryExec = {
+    // Scalar subquery needs only one row. We require 2 rows here to validate if the scalar query is
+    // invalid(return more than one row). We don't need all the rows as it may OOM.
+    SubqueryExec(name, child, maxNumRows = Some(2))
+  }
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
index 5e222d2e48769..0080b73575de1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
@@ -80,8 +80,7 @@ case class ScalarSubquery(
   @volatile private var updated: Boolean = false
 
   def updateResult(): Unit = {
-    // Only return the first two rows as an array to avoid Driver OOM.
-    val rows = plan.executeTake(2)
+    val rows = plan.executeCollect()
     if (rows.length > 1) {
       sys.error(s"more than one row returned by a subquery used as an expression:\n$plan")
     }
@@ -178,7 +177,8 @@ case class PlanSubqueries(sparkSession: SparkSession) extends Rule[SparkPlan] {
       case subquery: expressions.ScalarSubquery =>
         val executedPlan = QueryExecution.prepareExecutedPlan(sparkSession, subquery.plan)
         ScalarSubquery(
-          SubqueryExec(s"scalar-subquery#${subquery.exprId.id}", executedPlan),
+          SubqueryExec.createForScalarSubquery(
+            s"scalar-subquery#${subquery.exprId.id}", executedPlan),
           subquery.exprId)
       case expressions.InSubquery(values, ListQuery(query, _, exprId, _)) =>
         val expr = if (values.length == 1) {

From 20f6d63bc109284f6f9daf5da20cb2fef560628a Mon Sep 17 00:00:00 2001
From: Chongguang LIU <chongguang.liu@laposte.fr>
Date: Tue, 15 Dec 2020 18:55:48 +0900
Subject: [PATCH 0770/1009] [SPARK-33769][SQL] Improve the next-day function of
 the sql component to deal with Column type

### What changes were proposed in this pull request?

The proposition of this pull request is described in this JIRA ticket: [https://issues.apache.org/jira/browse/SPARK-33769](url)

It proposes to improve the next-day function of the sql component to deal with Column type for the parameter dayOfWeek.

### Why are the changes needed?

It makes this functionality easier to use.
Actually the signature of this function is:
> def next_day(date: Column, dayOfWeek: String): Column.

It accepts the dayOfWeek parameter as a String. However in some cases, the dayOfWeek is in a Column, so a different value for each row of the dataframe.
A current workaround is to use the NextDay function like this:
> NextDay(dateCol.expr, dayOfWeekCol.expr).

The proposition is to add another signature for this function:
> def next_day(date: Column, dayOfWeek: Column): Column

In fact it is already the case for some other functions in this scala object, exemple:
> def date_sub(start: Column, days: Int): Column = date_sub(start, lit(days))
> def date_sub(start: Column, days: Column): Column = withExpr \{ DateSub(start.expr, days.expr) }

or

> def add_months(startDate: Column, numMonths: Int): Column = add_months(startDate, lit(numMonths))
> def add_months(startDate: Column, numMonths: Column): Column = withExpr {
>  AddMonths(startDate.expr, numMonths.expr)
>  }

This pull request is the same idea for the function next_day.

### Does this PR introduce _any_ user-facing change?

Yes
With this pull request, users of spark will have a new signature of the function:
> def next_day(date: Column, dayOfWeek: Column): Column

But the existing function signature should still work:
> def next_day(date: Column, dayOfWeek: String): Column

So this change should be retrocompatible.

### How was this patch tested?

The unit tests of the next_day function has been enhanced.
It tests the dayOfWeek parameter both as String and Column.
I also added a test case for the existing signature where the dayOfWeek is a non valid String. This should return null.

Closes #30761 from chongguang/SPARK-33769.

Authored-by: Chongguang LIU <chongguang.liu@laposte.fr>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../org/apache/spark/sql/functions.scala      | 22 +++++++++++++++++--
 .../apache/spark/sql/DateFunctionsSuite.scala | 18 +++++++++++----
 2 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index ede2b52930a17..4defcb836a978 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -3074,8 +3074,26 @@ object functions {
    * @group datetime_funcs
    * @since 1.5.0
    */
-  def next_day(date: Column, dayOfWeek: String): Column = withExpr {
-    NextDay(date.expr, lit(dayOfWeek).expr)
+  def next_day(date: Column, dayOfWeek: String): Column = next_day(date, lit(dayOfWeek))
+
+  /**
+   * Returns the first date which is later than the value of the `date` column that is on the
+   * specified day of the week.
+   *
+   * For example, `next_day('2015-07-27', "Sunday")` returns 2015-08-02 because that is the first
+   * Sunday after 2015-07-27.
+   *
+   * @param date      A date, timestamp or string. If a string, the data must be in a format that
+   *                  can be cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @param dayOfWeek A column of the day of week. Case insensitive, and accepts: "Mon", "Tue",
+   *                  "Wed", "Thu", "Fri", "Sat", "Sun"
+   * @return A date, or null if `date` was a string that could not be cast to a date or if
+   *         `dayOfWeek` was an invalid value
+   * @group datetime_funcs
+   * @since 3.2.0
+   */
+  def next_day(date: Column, dayOfWeek: Column): Column = withExpr {
+    NextDay(date.expr, dayOfWeek.expr)
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
index d7bbf597ff983..b545d6097d71d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -372,11 +372,21 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
     val df1 = Seq(("mon", "2015-07-23"), ("tuesday", "2015-07-20")).toDF("dow", "d")
     val df2 = Seq(("th", "2015-07-23 00:11:22"), ("xx", "2015-07-24 11:22:33")).toDF("dow", "t")
     checkAnswer(
-      df1.select(next_day(col("d"), "MONDAY")),
-      Seq(Row(Date.valueOf("2015-07-27")), Row(Date.valueOf("2015-07-27"))))
+      df1.select(
+        next_day(col("d"), "MONDAY"),
+        next_day(col("d"), col("dow")),
+        next_day(col("d"), "NonValidDay")),
+      Seq(
+        Row(Date.valueOf("2015-07-27"), Date.valueOf("2015-07-27"), null),
+        Row(Date.valueOf("2015-07-27"), Date.valueOf("2015-07-21"), null)))
     checkAnswer(
-      df2.select(next_day(col("t"), "th")),
-      Seq(Row(Date.valueOf("2015-07-30")), Row(Date.valueOf("2015-07-30"))))
+      df2.select(
+        next_day(col("t"), "th"),
+        next_day(col("t"), col("dow")),
+        next_day(col("t"), "NonValidDay")),
+      Seq(
+        Row(Date.valueOf("2015-07-30"), Date.valueOf("2015-07-30"), null),
+        Row(Date.valueOf("2015-07-30"), null, null)))
   }
 
   def checkExceptionMessage(df: DataFrame): Unit = {

From 58cb2bae747a09caff194007b5c60f19b84f7c40 Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Tue, 15 Dec 2020 19:20:01 +0900
Subject: [PATCH 0771/1009] [SPARK-33752][SQL] Avoid the getSimpleMessage of
 AnalysisException adds semicolon repeatedly

### What changes were proposed in this pull request?
The current `getSimpleMessage` of `AnalysisException` may adds semicolon repeatedly. There show an example below:
`select decode()`

The output will be:
```
org.apache.spark.sql.AnalysisException
Invalid number of arguments for function decode. Expected: 2; Found: 0;; line 1 pos 7
```

### Why are the changes needed?
Fix a bug, because it adds semicolon repeatedly.

### Does this PR introduce _any_ user-facing change?
Yes. the message of AnalysisException will be correct.

### How was this patch tested?
Jenkins test.

Closes #30724 from beliefer/SPARK-33752.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: beliefer <beliefer@163.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../apache/spark/sql/AnalysisException.scala  |   4 +-
 .../sql-tests/results/ansi/datetime.sql.out   |   4 +-
 .../results/ansi/parse-schema-string.sql.out  |   4 +-
 .../results/ansi/string-functions.sql.out     |   6 +-
 .../sql-tests/results/change-column.sql.out   |  10 +-
 .../results/columnresolution-negative.sql.out |   2 +-
 .../resources/sql-tests/results/count.sql.out |   2 +-
 .../sql-tests/results/csv-functions.sql.out   |  12 +-
 .../sql-tests/results/cte-nested.sql.out      |  16 +-
 .../sql-tests/results/datetime-legacy.sql.out |   4 +-
 .../sql-tests/results/datetime.sql.out        |   4 +-
 .../results/describe-table-column.sql.out     |   4 +-
 .../sql-tests/results/describe.sql.out        |   8 +-
 .../sql-tests/results/except-all.sql.out      |   4 +-
 .../sql-tests/results/extract.sql.out         |  10 +-
 .../sql-tests/results/group-analytics.sql.out |  12 +-
 .../sql-tests/results/group-by-filter.sql.out |  10 +-
 .../results/group-by-ordinal.sql.out          |   6 +-
 .../sql-tests/results/group-by.sql.out        |  18 +--
 .../sql-tests/results/grouping_set.sql.out    |   2 +-
 .../sql-tests/results/having.sql.out          |   2 +-
 .../sql-tests/results/intersect-all.sql.out   |   4 +-
 .../sql-tests/results/json-functions.sql.out  |  12 +-
 .../resources/sql-tests/results/limit.sql.out |  12 +-
 .../resources/sql-tests/results/pivot.sql.out |  14 +-
 .../postgreSQL/aggregates_part1.sql.out       |   2 +-
 .../postgreSQL/aggregates_part3.sql.out       |   2 +-
 .../results/postgreSQL/create_view.sql.out    |  28 ++--
 .../results/postgreSQL/limit.sql.out          |   2 +-
 .../results/postgreSQL/numeric.sql.out        |   2 +-
 .../results/postgreSQL/select_having.sql.out  |   2 +-
 .../results/postgreSQL/strings.sql.out        |  16 +-
 .../results/postgreSQL/window_part3.sql.out   |  10 +-
 .../sql-tests/results/postgreSQL/with.sql.out |   2 +-
 .../results/regexp-functions.sql.out          |   4 +-
 .../sql-tests/results/show-tables.sql.out     |   8 +-
 .../sql-tests/results/show-views.sql.out      |   2 +-
 .../sql-tests/results/show_columns.sql.out    |   2 +-
 .../results/string-functions.sql.out          |   6 +-
 .../subquery/in-subquery/in-basic.sql.out     |   2 +-
 .../invalid-correlation.sql.out               |   9 +-
 .../subq-input-typecheck.sql.out              |  10 +-
 .../native/widenSetOperationTypes.sql.out     | 140 +++++++++---------
 .../postgreSQL/udf-aggregates_part1.sql.out   |   2 +-
 .../postgreSQL/udf-aggregates_part3.sql.out   |   2 +-
 .../udf/postgreSQL/udf-select_having.sql.out  |   2 +-
 .../results/udf/udf-except-all.sql.out        |   4 +-
 .../results/udf/udf-group-analytics.sql.out   |  12 +-
 .../results/udf/udf-group-by.sql.out          |  18 +--
 .../results/udf/udf-intersect-all.sql.out     |   4 +-
 .../sql-tests/results/udf/udf-pivot.sql.out   |  14 +-
 .../sql-tests/results/udf/udf-window.sql.out  |   2 +-
 .../sql-tests/results/window.sql.out          |   8 +-
 .../spark/sql/ColumnExpressionSuite.scala     |   2 +-
 .../apache/spark/sql/SQLInsertTestSuite.scala |   4 +-
 .../command/ShowTablesSuiteBase.scala         |   2 +-
 .../spark/sql/internal/SQLConfSuite.scala     |   4 +-
 .../sql/sources/BucketedWriteSuite.scala      |  10 +-
 .../sql/sources/PartitionedWriteSuite.scala   |   2 +-
 .../sql/streaming/FileStreamSourceSuite.scala |   2 +-
 60 files changed, 264 insertions(+), 265 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
index f5c87677ab9eb..1dfbff5c6df5b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
@@ -48,9 +48,11 @@ class AnalysisException protected[sql] (
 
   // Outputs an exception without the logical plan.
   // For testing only
-  def getSimpleMessage: String = {
+  def getSimpleMessage: String = if (line.isDefined || startPosition.isDefined) {
     val lineAnnotation = line.map(l => s" line $l").getOrElse("")
     val positionAnnotation = startPosition.map(p => s" pos $p").getOrElse("")
     s"$message;$lineAnnotation$positionAnnotation"
+  } else {
+    message
   }
 }
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
index 400c8d6c3c84f..3e307a92c10f0 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
@@ -453,7 +453,7 @@ select date_add('2011-11-11', '1.2')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The second argument of 'date_add' function needs to be an integer.;
+The second argument of 'date_add' function needs to be an integer.
 
 
 -- !query
@@ -494,7 +494,7 @@ select date_sub(date'2011-11-11', '1.2')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The second argument of 'date_sub' function needs to be an integer.;
+The second argument of 'date_sub' function needs to be an integer.
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/parse-schema-string.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/parse-schema-string.sql.out
index e12d988a57672..bfbf11d54489c 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/parse-schema-string.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/parse-schema-string.sql.out
@@ -21,7 +21,7 @@ no viable alternative at input 'create'(line 1, pos 0)
 == SQL ==
 create INT
 ^^^
-;; line 1 pos 7
+; line 1 pos 7
 
 
 -- !query
@@ -51,7 +51,7 @@ no viable alternative at input 'create'(line 1, pos 0)
 == SQL ==
 create INT
 ^^^
-;; line 1 pos 7
+; line 1 pos 7
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
index 3164d462f8464..dd085a6437e13 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
@@ -302,7 +302,7 @@ select decode()
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid number of arguments for function decode. Expected: 2; Found: 0;; line 1 pos 7
+Invalid number of arguments for function decode. Expected: 2; Found: 0; line 1 pos 7
 
 
 -- !query
@@ -311,7 +311,7 @@ select decode(encode('abc', 'utf-8'))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid number of arguments for function decode. Expected: 2; Found: 1;; line 1 pos 7
+Invalid number of arguments for function decode. Expected: 2; Found: 1; line 1 pos 7
 
 
 -- !query
@@ -359,4 +359,4 @@ select decode(6, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattl
 -- !query schema
 struct<decode(6, 1, Southlake, 2, San Francisco, 3, New Jersey, 4, Seattle):string>
 -- !query output
-NULL
\ No newline at end of file
+NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/change-column.sql.out b/sql/core/src/test/resources/sql-tests/results/change-column.sql.out
index b1a32ad1f63e9..96b28d734f5a7 100644
--- a/sql/core/src/test/resources/sql-tests/results/change-column.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/change-column.sql.out
@@ -50,7 +50,7 @@ ALTER TABLE test_change RENAME COLUMN a TO a1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-RENAME COLUMN is only supported with v2 tables.;
+RENAME COLUMN is only supported with v2 tables.
 
 
 -- !query
@@ -69,7 +69,7 @@ ALTER TABLE test_change CHANGE a TYPE STRING
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-ALTER TABLE CHANGE COLUMN is not supported for changing column 'a' with type 'IntegerType' to 'a' with type 'StringType';
+ALTER TABLE CHANGE COLUMN is not supported for changing column 'a' with type 'IntegerType' to 'a' with type 'StringType'
 
 
 -- !query
@@ -88,7 +88,7 @@ ALTER TABLE test_change CHANGE a AFTER b
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-ALTER COLUMN ... FIRST | ALTER is only supported with v2 tables.;
+ALTER COLUMN ... FIRST | ALTER is only supported with v2 tables.
 
 
 -- !query
@@ -97,7 +97,7 @@ ALTER TABLE test_change CHANGE b FIRST
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-ALTER COLUMN ... FIRST | ALTER is only supported with v2 tables.;
+ALTER COLUMN ... FIRST | ALTER is only supported with v2 tables.
 
 
 -- !query
@@ -176,7 +176,7 @@ ALTER TABLE test_change CHANGE invalid_col TYPE INT
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Can't find column `invalid_col` given table data columns [`a`, `b`, `c`];
+Can't find column `invalid_col` given table data columns [`a`, `b`, `c`]
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out b/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out
index 04ddfe0ac128c..ea321638b219e 100644
--- a/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out
@@ -195,7 +195,7 @@ SELECT t1.x.y.* FROM t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 't1.x.y.*' given input columns 'i1';
+cannot resolve 't1.x.y.*' given input columns 'i1'
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/count.sql.out b/sql/core/src/test/resources/sql-tests/results/count.sql.out
index 64614b5b67784..ffd75d6a09e1c 100644
--- a/sql/core/src/test/resources/sql-tests/results/count.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/count.sql.out
@@ -125,4 +125,4 @@ SELECT count() FROM testData
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'count()' due to data type mismatch: count requires at least one argument.; line 1 pos 7
\ No newline at end of file
+cannot resolve 'count()' due to data type mismatch: count requires at least one argument.; line 1 pos 7
diff --git a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out
index ed2341f71a1b0..2131487f3500a 100644
--- a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out
@@ -24,7 +24,7 @@ select from_csv('1', 1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The expression '1' is not a valid schema string.;; line 1 pos 7
+The expression '1' is not a valid schema string.; line 1 pos 7
 
 
 -- !query
@@ -46,7 +46,7 @@ DataType invalidtype is not supported.(line 1, pos 2)
 == SQL ==
 a InvalidType
 --^^^
-;; line 1 pos 7
+; line 1 pos 7
 
 
 -- !query
@@ -55,7 +55,7 @@ select from_csv('1', 'a INT', named_struct('mode', 'PERMISSIVE'))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Must use a map() function for options;; line 1 pos 7
+Must use a map() function for options; line 1 pos 7
 
 
 -- !query
@@ -64,7 +64,7 @@ select from_csv('1', 'a INT', map('mode', 1))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-A type of keys and values in map() must be string, but got map<string,int>;; line 1 pos 7
+A type of keys and values in map() must be string, but got map<string,int>; line 1 pos 7
 
 
 -- !query
@@ -148,7 +148,7 @@ select to_csv(named_struct('a', 1, 'b', 2), named_struct('mode', 'PERMISSIVE'))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Must use a map() function for options;; line 1 pos 7
+Must use a map() function for options; line 1 pos 7
 
 
 -- !query
@@ -157,4 +157,4 @@ select to_csv(named_struct('a', 1, 'b', 2), map('mode', 1))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-A type of keys and values in map() must be string, but got map<string,int>;; line 1 pos 7
+A type of keys and values in map() must be string, but got map<string,int>; line 1 pos 7
diff --git a/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out b/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out
index 2f736c7b4978f..a8db4599dafcc 100644
--- a/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out
@@ -48,7 +48,7 @@ SELECT * FROM t2
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.;
+Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.
 
 
 -- !query
@@ -85,7 +85,7 @@ SELECT * FROM t2
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.;
+Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.
 
 
 -- !query
@@ -139,7 +139,7 @@ SELECT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.;
+Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.
 
 
 -- !query
@@ -154,7 +154,7 @@ SELECT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.;
+Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.
 
 
 -- !query
@@ -170,7 +170,7 @@ SELECT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.;
+Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.
 
 
 -- !query
@@ -184,7 +184,7 @@ WHERE c IN (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.;
+Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.
 
 
 -- !query
@@ -213,7 +213,7 @@ SELECT * FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Name aBc is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.;
+Name aBc is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.
 
 
 -- !query
@@ -226,4 +226,4 @@ SELECT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Name aBc is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.;
+Name aBc is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
index 7e4ea78bf46b9..ed54b72111ed5 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
@@ -430,7 +430,7 @@ select date_add('2011-11-11', '1.2')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The second argument of 'date_add' function needs to be an integer.;
+The second argument of 'date_add' function needs to be an integer.
 
 
 -- !query
@@ -471,7 +471,7 @@ select date_sub(date'2011-11-11', '1.2')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The second argument of 'date_sub' function needs to be an integer.;
+The second argument of 'date_sub' function needs to be an integer.
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
index 01db4c1c11fe4..213895dcb4bcb 100755
--- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
@@ -430,7 +430,7 @@ select date_add('2011-11-11', '1.2')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The second argument of 'date_add' function needs to be an integer.;
+The second argument of 'date_add' function needs to be an integer.
 
 
 -- !query
@@ -471,7 +471,7 @@ select date_sub(date'2011-11-11', '1.2')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The second argument of 'date_sub' function needs to be an integer.;
+The second argument of 'date_sub' function needs to be an integer.
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out b/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out
index c6d3d45879eb1..22ef8e13c36a8 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out
@@ -77,7 +77,7 @@ DESC desc_col_temp_view key1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column key1 does not exist;
+Column key1 does not exist
 
 
 -- !query
@@ -188,7 +188,7 @@ DESC FORMATTED desc_complex_col_table col.x
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-DESC TABLE COLUMN command does not support nested data types: col.x;
+DESC TABLE COLUMN command does not support nested data types: col.x
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
index 2674d055ac450..ebec2e1976b15 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
@@ -332,7 +332,7 @@ struct<>
 org.apache.spark.sql.catalyst.analysis.NoSuchPartitionException
 Partition not found in table 't' database 'default':
 c -> Us
-d -> 2;
+d -> 2
 
 
 -- !query
@@ -341,7 +341,7 @@ DESC t PARTITION (c='Us')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Partition spec is invalid. The spec (c) must match the partition spec (c, d) defined in table '`default`.`t`';
+Partition spec is invalid. The spec (c) must match the partition spec (c, d) defined in table '`default`.`t`'
 
 
 -- !query
@@ -431,7 +431,7 @@ DESC temp_v PARTITION (c='Us', d=1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-DESC PARTITION is not allowed on a temporary view: temp_v;
+DESC PARTITION is not allowed on a temporary view: temp_v
 
 
 -- !query
@@ -510,7 +510,7 @@ DESC v PARTITION (c='Us', d=1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-DESC PARTITION is not allowed on a view: v;
+DESC PARTITION is not allowed on a view: v
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/except-all.sql.out b/sql/core/src/test/resources/sql-tests/results/except-all.sql.out
index 601ff8f024214..a1fe952e2c032 100644
--- a/sql/core/src/test/resources/sql-tests/results/except-all.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/except-all.sql.out
@@ -141,7 +141,7 @@ SELECT array(1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-ExceptAll can only be performed on tables with the compatible column types. array<int> <> int at the first column of the second table;
+ExceptAll can only be performed on tables with the compatible column types. array<int> <> int at the first column of the second table
 
 
 -- !query
@@ -213,7 +213,7 @@ SELECT k, v FROM tab4
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-ExceptAll can only be performed on tables with the same number of columns, but the first table has 1 columns and the second table has 2 columns;
+ExceptAll can only be performed on tables with the same number of columns, but the first table has 1 columns and the second table has 2 columns
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/extract.sql.out b/sql/core/src/test/resources/sql-tests/results/extract.sql.out
index 9d3fe5d17fafa..5415b2c30a308 100644
--- a/sql/core/src/test/resources/sql-tests/results/extract.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/extract.sql.out
@@ -320,7 +320,7 @@ select extract(not_supported from c) from t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Literals of type 'not_supported' are currently not supported for the string type.;; line 1 pos 7
+Literals of type 'not_supported' are currently not supported for the string type.; line 1 pos 7
 
 
 -- !query
@@ -329,7 +329,7 @@ select extract(not_supported from i) from t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Literals of type 'not_supported' are currently not supported for the interval type.;; line 1 pos 7
+Literals of type 'not_supported' are currently not supported for the interval type.; line 1 pos 7
 
 
 -- !query
@@ -642,7 +642,7 @@ select date_part('not_supported', c) from t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Literals of type 'not_supported' are currently not supported for the string type.;; line 1 pos 7
+Literals of type 'not_supported' are currently not supported for the string type.; line 1 pos 7
 
 
 -- !query
@@ -651,7 +651,7 @@ select date_part(c, c) from t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The field parameter needs to be a foldable string value.;; line 1 pos 7
+The field parameter needs to be a foldable string value.; line 1 pos 7
 
 
 -- !query
@@ -668,7 +668,7 @@ select date_part(i, i) from t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The field parameter needs to be a foldable string value.;; line 1 pos 7
+The field parameter needs to be a foldable string value.; line 1 pos 7
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out
index c4f9ea1fe026a..b820fb49b09ba 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out
@@ -210,7 +210,7 @@ SELECT course, year, GROUPING(course) FROM courseSales GROUP BY course, year
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping() can only be used with GroupingSets/Cube/Rollup;
+grouping() can only be used with GroupingSets/Cube/Rollup
 
 
 -- !query
@@ -219,7 +219,7 @@ SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY course,
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping_id() can only be used with GroupingSets/Cube/Rollup;
+grouping_id() can only be used with GroupingSets/Cube/Rollup
 
 
 -- !query
@@ -255,7 +255,7 @@ SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING(cours
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
+grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup
 
 
 -- !query
@@ -264,7 +264,7 @@ SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING_ID(co
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
+grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup
 
 
 -- !query
@@ -319,7 +319,7 @@ SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING(cou
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
+grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup
 
 
 -- !query
@@ -328,7 +328,7 @@ SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING_ID(
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
+grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out
index 149e031e8829c..55a41907dd3b4 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out
@@ -51,7 +51,7 @@ SELECT a, COUNT(b) FILTER (WHERE a >= 2) FROM testData
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping expressions sequence is empty, and 'testdata.`a`' is not an aggregate function. Wrap '(count(testdata.`b`) FILTER (WHERE (testdata.`a` >= 2)) AS `count(b) FILTER (WHERE (a >= 2))`)' in windowing function(s) or wrap 'testdata.`a`' in first() (or first_value) if you don't care which value you get.;
+grouping expressions sequence is empty, and 'testdata.`a`' is not an aggregate function. Wrap '(count(testdata.`b`) FILTER (WHERE (testdata.`a` >= 2)) AS `count(b) FILTER (WHERE (a >= 2))`)' in windowing function(s) or wrap 'testdata.`a`' in first() (or first_value) if you don't care which value you get.
 
 
 -- !query
@@ -231,7 +231,7 @@ SELECT a, COUNT(b) FILTER (WHERE a != 2) FROM testData GROUP BY b
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;
+expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
 
 
 -- !query
@@ -711,7 +711,7 @@ SELECT a + 2, COUNT(b) FILTER (WHERE b IN (1, 2)) FROM testData GROUP BY a + 1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;
+expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
 
 
 -- !query
@@ -804,7 +804,6 @@ IN/EXISTS predicate sub-queries can only be used in Filter/Join and a few comman
       +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
          +- SubqueryAlias EMP
             +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
-;
 
 
 -- !query
@@ -832,7 +831,6 @@ IN/EXISTS predicate sub-queries can only be used in Filter/Join and a few comman
       +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
          +- SubqueryAlias EMP
             +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
-;
 
 
 -- !query
@@ -859,7 +857,6 @@ IN/EXISTS predicate sub-queries can only be used in Filter/Join and a few comman
       +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
          +- SubqueryAlias EMP
             +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
-;
 
 
 -- !query
@@ -886,7 +883,6 @@ IN/EXISTS predicate sub-queries can only be used in Filter/Join and a few comman
       +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
          +- SubqueryAlias EMP
             +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
-;
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
index bf9f606a2224e..fedc7205ae559 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
@@ -122,7 +122,7 @@ select a, b, sum(b) from data group by 3
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-aggregate functions are not allowed in GROUP BY, but found sum(CAST(data.`b` AS BIGINT));
+aggregate functions are not allowed in GROUP BY, but found sum(CAST(data.`b` AS BIGINT))
 
 
 -- !query
@@ -131,7 +131,7 @@ select a, b, sum(b) + 2 from data group by 3
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-aggregate functions are not allowed in GROUP BY, but found (sum(CAST(data.`b` AS BIGINT)) + CAST(2 AS BIGINT));
+aggregate functions are not allowed in GROUP BY, but found (sum(CAST(data.`b` AS BIGINT)) + CAST(2 AS BIGINT))
 
 
 -- !query
@@ -155,7 +155,7 @@ select * from data group by a, b, 1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Star (*) is not allowed in select list when GROUP BY ordinal position is used;
+Star (*) is not allowed in select list when GROUP BY ordinal position is used
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index 5d9553f804059..75bda87b37642 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -18,7 +18,7 @@ SELECT a, COUNT(b) FROM testData
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping expressions sequence is empty, and 'testdata.`a`' is not an aggregate function. Wrap '(count(testdata.`b`) AS `count(b)`)' in windowing function(s) or wrap 'testdata.`a`' in first() (or first_value) if you don't care which value you get.;
+grouping expressions sequence is empty, and 'testdata.`a`' is not an aggregate function. Wrap '(count(testdata.`b`) AS `count(b)`)' in windowing function(s) or wrap 'testdata.`a`' in first() (or first_value) if you don't care which value you get.
 
 
 -- !query
@@ -46,7 +46,7 @@ SELECT a, COUNT(b) FROM testData GROUP BY b
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;
+expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
 
 
 -- !query
@@ -110,7 +110,7 @@ SELECT a + 2, COUNT(b) FROM testData GROUP BY a + 1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;
+expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
 
 
 -- !query
@@ -167,7 +167,7 @@ SELECT COUNT(b) AS k FROM testData GROUP BY k
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-aggregate functions are not allowed in GROUP BY, but found count(testdata.`b`);
+aggregate functions are not allowed in GROUP BY, but found count(testdata.`b`)
 
 
 -- !query
@@ -185,7 +185,7 @@ SELECT k AS a, COUNT(v) FROM testDataHasSameNameWithAlias GROUP BY a
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-expression 'testdatahassamenamewithalias.`k`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;
+expression 'testdatahassamenamewithalias.`k`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
 
 
 -- !query
@@ -274,7 +274,7 @@ SELECT id FROM range(10) HAVING id > 0
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping expressions sequence is empty, and '`id`' is not an aggregate function. Wrap '()' in windowing function(s) or wrap '`id`' in first() (or first_value) if you don't care which value you get.;
+grouping expressions sequence is empty, and '`id`' is not an aggregate function. Wrap '()' in windowing function(s) or wrap '`id`' in first() (or first_value) if you don't care which value you get.
 
 
 -- !query
@@ -548,7 +548,7 @@ org.apache.spark.sql.AnalysisException
 
 Aggregate/Window/Generate expressions are not valid in where clause of the query.
 Expression in where clause: [(count(1) > 1L)]
-Invalid expressions: [count(1)];
+Invalid expressions: [count(1)]
 
 
 -- !query
@@ -560,7 +560,7 @@ org.apache.spark.sql.AnalysisException
 
 Aggregate/Window/Generate expressions are not valid in where clause of the query.
 Expression in where clause: [((count(1) + 1L) > 1L)]
-Invalid expressions: [count(1)];
+Invalid expressions: [count(1)]
 
 
 -- !query
@@ -572,7 +572,7 @@ org.apache.spark.sql.AnalysisException
 
 Aggregate/Window/Generate expressions are not valid in where clause of the query.
 Expression in where clause: [(((test_agg.`k` = 1) OR (test_agg.`k` = 2)) OR (((count(1) + 1L) > 1L) OR (max(test_agg.`k`) > 1)))]
-Invalid expressions: [count(1), max(test_agg.`k`)];
+Invalid expressions: [count(1), max(test_agg.`k`)]
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out b/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out
index 7089e10cdef27..e1f94ddd02fe5 100644
--- a/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out
@@ -165,7 +165,7 @@ SELECT c1 FROM (values (1,2), (3,2)) t(c1, c2) GROUP BY GROUPING SETS (())
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-expression '`c1`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;
+expression '`c1`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/having.sql.out b/sql/core/src/test/resources/sql-tests/results/having.sql.out
index 6508143e6f9fe..237015d06ce81 100644
--- a/sql/core/src/test/resources/sql-tests/results/having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/having.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 9
+-- Number of queries: 13
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/intersect-all.sql.out b/sql/core/src/test/resources/sql-tests/results/intersect-all.sql.out
index b99f63393cc4d..caba8c6942c55 100644
--- a/sql/core/src/test/resources/sql-tests/results/intersect-all.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/intersect-all.sql.out
@@ -98,7 +98,7 @@ SELECT array(1), 2
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-IntersectAll can only be performed on tables with the compatible column types. array<int> <> int at the first column of the second table;
+IntersectAll can only be performed on tables with the compatible column types. array<int> <> int at the first column of the second table
 
 
 -- !query
@@ -109,7 +109,7 @@ SELECT k, v FROM tab2
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-IntersectAll can only be performed on tables with the same number of columns, but the first table has 1 columns and the second table has 2 columns;
+IntersectAll can only be performed on tables with the same number of columns, but the first table has 1 columns and the second table has 2 columns
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
index 838e4607d0324..b14e3e1558fb0 100644
--- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
@@ -72,7 +72,7 @@ select to_json(named_struct('a', 1, 'b', 2), named_struct('mode', 'PERMISSIVE'))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Must use a map() function for options;; line 1 pos 7
+Must use a map() function for options; line 1 pos 7
 
 
 -- !query
@@ -81,7 +81,7 @@ select to_json(named_struct('a', 1, 'b', 2), map('mode', 1))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-A type of keys and values in map() must be string, but got map<string,int>;; line 1 pos 7
+A type of keys and values in map() must be string, but got map<string,int>; line 1 pos 7
 
 
 -- !query
@@ -115,7 +115,7 @@ select from_json('{"a":1}', 1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The expression '1' is not a valid schema string.;; line 1 pos 7
+The expression '1' is not a valid schema string.; line 1 pos 7
 
 
 -- !query
@@ -137,7 +137,7 @@ DataType invalidtype is not supported.(line 1, pos 2)
 == SQL ==
 a InvalidType
 --^^^
-;; line 1 pos 7
+; line 1 pos 7
 
 
 -- !query
@@ -146,7 +146,7 @@ select from_json('{"a":1}', 'a INT', named_struct('mode', 'PERMISSIVE'))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Must use a map() function for options;; line 1 pos 7
+Must use a map() function for options; line 1 pos 7
 
 
 -- !query
@@ -155,7 +155,7 @@ select from_json('{"a":1}', 'a INT', map('mode', 1))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-A type of keys and values in map() must be string, but got map<string,int>;; line 1 pos 7
+A type of keys and values in map() must be string, but got map<string,int>; line 1 pos 7
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/limit.sql.out b/sql/core/src/test/resources/sql-tests/results/limit.sql.out
index 074e7a6d28c47..8e324628c6299 100644
--- a/sql/core/src/test/resources/sql-tests/results/limit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/limit.sql.out
@@ -53,7 +53,7 @@ SELECT * FROM testdata LIMIT -1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The limit expression must be equal to or greater than 0, but got -1;
+The limit expression must be equal to or greater than 0, but got -1
 
 
 -- !query
@@ -62,7 +62,7 @@ SELECT * FROM testData TABLESAMPLE (-1 ROWS)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The limit expression must be equal to or greater than 0, but got -1;
+The limit expression must be equal to or greater than 0, but got -1
 
 
 -- !query
@@ -79,7 +79,7 @@ SELECT * FROM testdata LIMIT CAST(NULL AS INT)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The evaluated limit expression must not be null, but got CAST(NULL AS INT);
+The evaluated limit expression must not be null, but got CAST(NULL AS INT)
 
 
 -- !query
@@ -88,7 +88,7 @@ SELECT * FROM testdata LIMIT key > 3
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The limit expression must evaluate to a constant value, but got (spark_catalog.default.testdata.`key` > 3);
+The limit expression must evaluate to a constant value, but got (spark_catalog.default.testdata.`key` > 3)
 
 
 -- !query
@@ -97,7 +97,7 @@ SELECT * FROM testdata LIMIT true
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The limit expression must be integer type, but got boolean;
+The limit expression must be integer type, but got boolean
 
 
 -- !query
@@ -106,7 +106,7 @@ SELECT * FROM testdata LIMIT 'a'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The limit expression must be integer type, but got string;
+The limit expression must be integer type, but got string
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/pivot.sql.out b/sql/core/src/test/resources/sql-tests/results/pivot.sql.out
index bb0d452fa04a1..968319fbb7efe 100644
--- a/sql/core/src/test/resources/sql-tests/results/pivot.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/pivot.sql.out
@@ -202,7 +202,7 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Aggregate expression required for pivot, but 'coursesales.`earnings`' did not appear in any aggregate function.;
+Aggregate expression required for pivot, but 'coursesales.`earnings`' did not appear in any aggregate function.
 
 
 -- !query
@@ -217,7 +217,7 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Aggregate expression required for pivot, but '__auto_generated_subquery_name.`year`' did not appear in any aggregate function.;
+Aggregate expression required for pivot, but '__auto_generated_subquery_name.`year`' did not appear in any aggregate function.
 
 
 -- !query
@@ -262,7 +262,7 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query.;
+It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query.
 
 
 -- !query
@@ -313,7 +313,7 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid pivot value 'dotNET': value data type string does not match pivot column data type struct<course:string,year:int>;
+Invalid pivot value 'dotNET': value data type string does not match pivot column data type struct<course:string,year:int>
 
 
 -- !query
@@ -339,7 +339,7 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Literal expressions required for pivot values, found 'course#x';
+Literal expressions required for pivot values, found 'course#x'
 
 
 -- !query
@@ -458,7 +458,7 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid pivot column 'm#x'. Pivot columns must be comparable.;
+Invalid pivot column 'm#x'. Pivot columns must be comparable.
 
 
 -- !query
@@ -475,7 +475,7 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid pivot column 'named_struct(course, course#x, m, m#x)'. Pivot columns must be comparable.;
+Invalid pivot column 'named_struct(course, course#x, m, m#x)'. Pivot columns must be comparable.
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out
index 212365f92946c..cc8f99ff4f453 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out
@@ -382,7 +382,7 @@ org.apache.spark.sql.AnalysisException
 
 Aggregate/Window/Generate expressions are not valid in where clause of the query.
 Expression in where clause: [(sum(DISTINCT CAST((outer(a.`four`) + b.`four`) AS BIGINT)) = CAST(b.`four` AS BIGINT))]
-Invalid expressions: [sum(DISTINCT CAST((outer(a.`four`) + b.`four`) AS BIGINT))];
+Invalid expressions: [sum(DISTINCT CAST((outer(a.`four`) + b.`four`) AS BIGINT))]
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part3.sql.out
index e1f735e5fe1dc..86ebb575ebce9 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part3.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part3.sql.out
@@ -8,7 +8,7 @@ select max(min(unique1)) from tenk1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query.;
+It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query.
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
index 7d331f24b9215..1ac7c4a4069b3 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
@@ -56,7 +56,7 @@ CREATE VIEW key_dependent_view AS
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-expression 'spark_catalog.default.view_base_table.`data`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;
+expression 'spark_catalog.default.view_base_table.`data`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
 
 
 -- !query
@@ -266,7 +266,7 @@ CREATE VIEW v1_temp AS SELECT * FROM temp_table
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Not allowed to create a permanent view `temp_view_test`.`v1_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW;
+Not allowed to create a permanent view `temp_view_test`.`v1_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW
 
 
 -- !query
@@ -322,7 +322,7 @@ CREATE VIEW temp_view_test.v3_temp AS SELECT * FROM temp_table
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Not allowed to create a permanent view `temp_view_test`.`v3_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW;
+Not allowed to create a permanent view `temp_view_test`.`v3_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW
 
 
 -- !query
@@ -371,7 +371,7 @@ CREATE VIEW v4_temp AS
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Not allowed to create a permanent view `temp_view_test`.`v4_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW;
+Not allowed to create a permanent view `temp_view_test`.`v4_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW
 
 
 -- !query
@@ -383,7 +383,7 @@ CREATE VIEW v5_temp AS
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Not allowed to create a permanent view `temp_view_test`.`v5_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW;
+Not allowed to create a permanent view `temp_view_test`.`v5_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW
 
 
 -- !query
@@ -542,7 +542,7 @@ CREATE VIEW v6_temp AS SELECT * FROM base_table WHERE id IN (SELECT id FROM temp
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Not allowed to create a permanent view `temp_view_test`.`v6_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW;
+Not allowed to create a permanent view `temp_view_test`.`v6_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW
 
 
 -- !query
@@ -551,7 +551,7 @@ CREATE VIEW v7_temp AS SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM tem
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Not allowed to create a permanent view `temp_view_test`.`v7_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW;
+Not allowed to create a permanent view `temp_view_test`.`v7_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW
 
 
 -- !query
@@ -560,7 +560,7 @@ CREATE VIEW v8_temp AS SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM temp
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Not allowed to create a permanent view `temp_view_test`.`v8_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW;
+Not allowed to create a permanent view `temp_view_test`.`v8_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW
 
 
 -- !query
@@ -569,7 +569,7 @@ CREATE VIEW v9_temp AS SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Not allowed to create a permanent view `temp_view_test`.`v9_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW;
+Not allowed to create a permanent view `temp_view_test`.`v9_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW
 
 
 -- !query
@@ -678,7 +678,7 @@ CREATE VIEW temporal1 AS SELECT * FROM t1 CROSS JOIN tt
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Not allowed to create a permanent view `testviewschm2`.`temporal1` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW;
+Not allowed to create a permanent view `testviewschm2`.`temporal1` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW
 
 
 -- !query
@@ -719,7 +719,7 @@ CREATE VIEW temporal2 AS SELECT * FROM t1 INNER JOIN tt ON t1.num = tt.num2
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Not allowed to create a permanent view `testviewschm2`.`temporal2` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW;
+Not allowed to create a permanent view `testviewschm2`.`temporal2` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW
 
 
 -- !query
@@ -760,7 +760,7 @@ CREATE VIEW temporal3 AS SELECT * FROM t1 LEFT JOIN tt ON t1.num = tt.num2
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Not allowed to create a permanent view `testviewschm2`.`temporal3` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW;
+Not allowed to create a permanent view `testviewschm2`.`temporal3` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW
 
 
 -- !query
@@ -801,7 +801,7 @@ CREATE VIEW temporal4 AS SELECT * FROM t1 LEFT JOIN tt ON t1.num = tt.num2 AND t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Not allowed to create a permanent view `testviewschm2`.`temporal4` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW;
+Not allowed to create a permanent view `testviewschm2`.`temporal4` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW
 
 
 -- !query
@@ -810,7 +810,7 @@ CREATE VIEW temporal5 AS SELECT * FROM t1 WHERE num IN (SELECT num FROM t1 WHERE
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Not allowed to create a permanent view `testviewschm2`.`temporal5` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW;
+Not allowed to create a permanent view `testviewschm2`.`temporal5` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/limit.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/limit.sql.out
index 2c8bc31dbc6ca..b0f3482f0a282 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/limit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/limit.sql.out
@@ -59,7 +59,7 @@ select * from int8_tbl limit (case when random() < 0.5 then bigint(null) end)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The limit expression must evaluate to a constant value, but got CASE WHEN (`_nondeterministic` < CAST(0.5BD AS DOUBLE)) THEN CAST(NULL AS BIGINT) END;
+The limit expression must evaluate to a constant value, but got CASE WHEN (`_nondeterministic` < CAST(0.5BD AS DOUBLE)) THEN CAST(NULL AS BIGINT) END
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out
index fc2961a072e9f..fdad837e14b61 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out
@@ -3830,7 +3830,7 @@ INSERT INTO num_result SELECT t1.id, t2.id, t1.val, t2.val, t1.val * t2.val
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-`default`.`num_result` requires that the data to be inserted have the same number of columns as the target table: target table has 3 column(s) but the inserted data has 5 column(s), including 0 partition column(s) having constant value(s).;
+`default`.`num_result` requires that the data to be inserted have the same number of columns as the target table: target table has 3 column(s) but the inserted data has 5 column(s), including 0 partition column(s) having constant value(s).
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out
index e4b7f3b1f5e88..f504e4b6c6dad 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out
@@ -143,7 +143,7 @@ SELECT a FROM test_having HAVING min(a) < max(a)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping expressions sequence is empty, and 'spark_catalog.default.test_having.`a`' is not an aggregate function. Wrap '(min(spark_catalog.default.test_having.`a`) AS `min(a#x)`, max(spark_catalog.default.test_having.`a`) AS `max(a#x)`)' in windowing function(s) or wrap 'spark_catalog.default.test_having.`a`' in first() (or first_value) if you don't care which value you get.;
+grouping expressions sequence is empty, and 'spark_catalog.default.test_having.`a`' is not an aggregate function. Wrap '(min(spark_catalog.default.test_having.`a`) AS `min(a#x)`, max(spark_catalog.default.test_having.`a`) AS `max(a#x)`)' in windowing function(s) or wrap 'spark_catalog.default.test_having.`a`' in first() (or first_value) if you don't care which value you get.
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/strings.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/strings.sql.out
index e8a3a9b9731a6..13cc8a8754025 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/strings.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/strings.sql.out
@@ -446,7 +446,7 @@ SELECT 'maca' LIKE 'm%aca' ESCAPE '%' AS `true`
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-the pattern 'm%aca' is invalid, the escape character is not allowed to precede 'a';
+the pattern 'm%aca' is invalid, the escape character is not allowed to precede 'a'
 
 
 -- !query
@@ -455,7 +455,7 @@ SELECT 'maca' NOT LIKE 'm%aca' ESCAPE '%' AS `false`
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-the pattern 'm%aca' is invalid, the escape character is not allowed to precede 'a';
+the pattern 'm%aca' is invalid, the escape character is not allowed to precede 'a'
 
 
 -- !query
@@ -464,7 +464,7 @@ SELECT 'ma%a' LIKE 'm%a%%a' ESCAPE '%' AS `true`
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-the pattern 'm%a%%a' is invalid, the escape character is not allowed to precede 'a';
+the pattern 'm%a%%a' is invalid, the escape character is not allowed to precede 'a'
 
 
 -- !query
@@ -473,7 +473,7 @@ SELECT 'ma%a' NOT LIKE 'm%a%%a' ESCAPE '%' AS `false`
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-the pattern 'm%a%%a' is invalid, the escape character is not allowed to precede 'a';
+the pattern 'm%a%%a' is invalid, the escape character is not allowed to precede 'a'
 
 
 -- !query
@@ -482,7 +482,7 @@ SELECT 'bear' LIKE 'b_ear' ESCAPE '_' AS `true`
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-the pattern 'b_ear' is invalid, the escape character is not allowed to precede 'e';
+the pattern 'b_ear' is invalid, the escape character is not allowed to precede 'e'
 
 
 -- !query
@@ -491,7 +491,7 @@ SELECT 'bear' NOT LIKE 'b_ear' ESCAPE '_' AS `false`
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-the pattern 'b_ear' is invalid, the escape character is not allowed to precede 'e';
+the pattern 'b_ear' is invalid, the escape character is not allowed to precede 'e'
 
 
 -- !query
@@ -500,7 +500,7 @@ SELECT 'be_r' LIKE 'b_e__r' ESCAPE '_' AS `true`
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-the pattern 'b_e__r' is invalid, the escape character is not allowed to precede 'e';
+the pattern 'b_e__r' is invalid, the escape character is not allowed to precede 'e'
 
 
 -- !query
@@ -509,7 +509,7 @@ SELECT 'be_r' NOT LIKE 'b_e__r' ESCAPE '_' AS `false`
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-the pattern 'b_e__r' is invalid, the escape character is not allowed to precede 'e';
+the pattern 'b_e__r' is invalid, the escape character is not allowed to precede 'e'
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out
index 0e177f7ea82bd..88aee38c4504e 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out
@@ -295,7 +295,7 @@ SELECT * FROM empsalary WHERE row_number() OVER (ORDER BY salary) < 10
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-It is not allowed to use window functions inside WHERE clause;
+It is not allowed to use window functions inside WHERE clause
 
 
 -- !query
@@ -307,7 +307,7 @@ org.apache.spark.sql.AnalysisException
 
 The query operator `Join` contains one or more unsupported
 expression types Aggregate, Window or Generate.
-Invalid expressions: [row_number() OVER (ORDER BY spark_catalog.default.empsalary.`salary` ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)];
+Invalid expressions: [row_number() OVER (ORDER BY spark_catalog.default.empsalary.`salary` ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)]
 
 
 -- !query
@@ -319,7 +319,7 @@ org.apache.spark.sql.AnalysisException
 
 The query operator `Aggregate` contains one or more unsupported
 expression types Aggregate, Window or Generate.
-Invalid expressions: [RANK() OVER (ORDER BY 1 ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)];
+Invalid expressions: [RANK() OVER (ORDER BY 1 ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)]
 
 
 -- !query
@@ -342,7 +342,7 @@ SELECT * FROM empsalary WHERE (rank() OVER (ORDER BY random())) > 10
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-It is not allowed to use window functions inside WHERE clause;
+It is not allowed to use window functions inside WHERE clause
 
 
 -- !query
@@ -351,7 +351,7 @@ SELECT * FROM empsalary WHERE rank() OVER (ORDER BY random())
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-It is not allowed to use window functions inside WHERE clause;
+It is not allowed to use window functions inside WHERE clause
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/with.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/with.sql.out
index badafc9e659e2..1432bcce42e76 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/with.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/with.sql.out
@@ -385,7 +385,7 @@ WITH test AS (SELECT 42) INSERT INTO test VALUES (1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table not found: test;
+Table not found: test
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out
index 8d471a5bb1c87..f2a4131818bfb 100644
--- a/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 37
+-- Number of queries: 40
 
 
 -- !query
@@ -333,4 +333,4 @@ SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', null)
 -- !query schema
 struct<regexp_replace(healthy, wealthy, and wise, \w, something, NULL):string>
 -- !query output
-NULL
\ No newline at end of file
+NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
index 60c5e6d5642b7..611b0b750c2cd 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
@@ -206,7 +206,7 @@ SHOW TABLE EXTENDED LIKE 'show_t*' PARTITION(c='Us', d=1)
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.analysis.NoSuchTableException
-Table or view 'show_t*' not found in database 'showdb';
+Table or view 'show_t*' not found in database 'showdb'
 
 
 -- !query
@@ -215,7 +215,7 @@ SHOW TABLE EXTENDED LIKE 'show_t1' PARTITION(c='Us')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Partition spec is invalid. The spec (c) must match the partition spec (c, d) defined in table '`showdb`.`show_t1`';
+Partition spec is invalid. The spec (c) must match the partition spec (c, d) defined in table '`showdb`.`show_t1`'
 
 
 -- !query
@@ -224,7 +224,7 @@ SHOW TABLE EXTENDED LIKE 'show_t1' PARTITION(a='Us', d=1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-a is not a valid partition column in table `showdb`.`show_t1`.;
+a is not a valid partition column in table `showdb`.`show_t1`.
 
 
 -- !query
@@ -235,7 +235,7 @@ struct<>
 org.apache.spark.sql.catalyst.analysis.NoSuchPartitionException
 Partition not found in table 'show_t1' database 'showdb':
 c -> Ch
-d -> 1;
+d -> 1
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/show-views.sql.out b/sql/core/src/test/resources/sql-tests/results/show-views.sql.out
index d88790d8b5ec8..c80f8fab433fb 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-views.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-views.sql.out
@@ -142,7 +142,7 @@ SHOW VIEWS IN wrongdb LIKE 'view_*'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException
-Database 'wrongdb' not found;
+Database 'wrongdb' not found
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out b/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out
index 03df876133aa4..851e848ed4ec6 100644
--- a/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out
@@ -112,7 +112,7 @@ SHOW COLUMNS IN showdb.showcolumn1 FROM baddb
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-SHOW COLUMNS with conflicting databases: 'baddb' != 'showdb';
+SHOW COLUMNS with conflicting databases: 'baddb' != 'showdb'
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
index 020a095d72e85..74627e7786997 100644
--- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
@@ -298,7 +298,7 @@ select decode()
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid number of arguments for function decode. Expected: 2; Found: 0;; line 1 pos 7
+Invalid number of arguments for function decode. Expected: 2; Found: 0; line 1 pos 7
 
 
 -- !query
@@ -307,7 +307,7 @@ select decode(encode('abc', 'utf-8'))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid number of arguments for function decode. Expected: 2; Found: 1;; line 1 pos 7
+Invalid number of arguments for function decode. Expected: 2; Found: 1; line 1 pos 7
 
 
 -- !query
@@ -355,4 +355,4 @@ select decode(6, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattl
 -- !query schema
 struct<decode(6, 1, Southlake, 2, San Francisco, 3, New Jersey, 4, Seattle):string>
 -- !query output
-NULL
\ No newline at end of file
+NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out
index a33f78abf27f9..639fe1775d2dc 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out
@@ -49,7 +49,7 @@ number of columns in the output of subquery.
 Left side columns:
 [tab_a.`a1`, tab_a.`b1`].
 Right side columns:
-[`named_struct(a2, a2, b2, b2)`].;
+[`named_struct(a2, a2, b2, b2)`].
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
index cd96eaf1b878b..e77afd886aeab 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
@@ -46,7 +46,7 @@ AND    t2b = (SELECT max(avg)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping expressions sequence is empty, and 't2.`t2b`' is not an aggregate function. Wrap '(avg(CAST(t2.`t2b` AS BIGINT)) AS `avg`)' in windowing function(s) or wrap 't2.`t2b`' in first() (or first_value) if you don't care which value you get.;
+grouping expressions sequence is empty, and 't2.`t2b`' is not an aggregate function. Wrap '(avg(CAST(t2.`t2b` AS BIGINT)) AS `avg`)' in windowing function(s) or wrap 't2.`t2b`' in first() (or first_value) if you don't care which value you get.
 
 
 -- !query
@@ -63,7 +63,7 @@ WHERE  t1a IN (SELECT   min(t2a)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x in operator !Filter t2c#x IN (list#x [t2b#x]).;
+Resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x in operator !Filter t2c#x IN (list#x [t2b#x]).
 
 
 -- !query
@@ -78,7 +78,7 @@ HAVING EXISTS (SELECT t2a
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Found an aggregate expression in a correlated predicate that has both outer and local references, which is not supported yet. Aggregate expression: min((t1.`t1a` + t2.`t2a`)), Outer references: t1.`t1a`, Local references: t2.`t2a`.;
+Found an aggregate expression in a correlated predicate that has both outer and local references, which is not supported yet. Aggregate expression: min((t1.`t1a` + t2.`t2a`)), Outer references: t1.`t1a`, Local references: t2.`t2a`.
 
 
 -- !query
@@ -94,7 +94,7 @@ WHERE  t1a IN (SELECT t2a
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Found an aggregate expression in a correlated predicate that has both outer and local references, which is not supported yet. Aggregate expression: min((t2.`t2a` + t3.`t3a`)), Outer references: t2.`t2a`, Local references: t3.`t3a`.;
+Found an aggregate expression in a correlated predicate that has both outer and local references, which is not supported yet. Aggregate expression: min((t2.`t2a` + t3.`t3a`)), Outer references: t2.`t2a`, Local references: t3.`t3a`.
 
 
 -- !query
@@ -115,4 +115,3 @@ Aggregate [min(outer(t2a#x)) AS min(outer(t2.`t2a`))#x]
       +- Project [t3a#x, t3b#x, t3c#x]
          +- SubqueryAlias t3
             +- LocalRelation [t3a#x, t3b#x, t3c#x]
-;
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out
index 776598127075b..a470775308092 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out
@@ -64,7 +64,7 @@ FROM t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Scalar subquery must return only one column, but got 2;
+Scalar subquery must return only one column, but got 2
 
 
 -- !query
@@ -79,7 +79,7 @@ FROM t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Scalar subquery must return only one column, but got 2;
+Scalar subquery must return only one column, but got 2
 
 
 -- !query
@@ -100,7 +100,7 @@ number of columns in the output of subquery.
 Left side columns:
 [t1.`t1a`].
 Right side columns:
-[t2.`t2a`, t2.`t2b`].;
+[t2.`t2a`, t2.`t2b`].
 
 
 -- !query
@@ -121,7 +121,7 @@ number of columns in the output of subquery.
 Left side columns:
 [t1.`t1a`, t1.`t1b`].
 Right side columns:
-[t2.`t2a`].;
+[t2.`t2a`].
 
 
 -- !query
@@ -143,4 +143,4 @@ Mismatched columns:
 Left side:
 [double, string, string].
 Right side:
-[timestamp, string, bigint].;
+[timestamp, string, bigint].
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/widenSetOperationTypes.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/widenSetOperationTypes.sql.out
index 89b1cdb3e353d..a527b20dc04ff 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/widenSetOperationTypes.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/widenSetOperationTypes.sql.out
@@ -88,7 +88,7 @@ SELECT cast(1 as tinyint) FROM t UNION SELECT cast('2' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. binary <> tinyint at the first column of the second table;
+Union can only be performed on tables with the compatible column types. binary <> tinyint at the first column of the second table
 
 
 -- !query
@@ -97,7 +97,7 @@ SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. boolean <> tinyint at the first column of the second table;
+Union can only be performed on tables with the compatible column types. boolean <> tinyint at the first column of the second table
 
 
 -- !query
@@ -106,7 +106,7 @@ SELECT cast(1 as tinyint) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as ti
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. timestamp <> tinyint at the first column of the second table;
+Union can only be performed on tables with the compatible column types. timestamp <> tinyint at the first column of the second table
 
 
 -- !query
@@ -115,7 +115,7 @@ SELECT cast(1 as tinyint) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. date <> tinyint at the first column of the second table;
+Union can only be performed on tables with the compatible column types. date <> tinyint at the first column of the second table
 
 
 -- !query
@@ -196,7 +196,7 @@ SELECT cast(1 as smallint) FROM t UNION SELECT cast('2' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. binary <> smallint at the first column of the second table;
+Union can only be performed on tables with the compatible column types. binary <> smallint at the first column of the second table
 
 
 -- !query
@@ -205,7 +205,7 @@ SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. boolean <> smallint at the first column of the second table;
+Union can only be performed on tables with the compatible column types. boolean <> smallint at the first column of the second table
 
 
 -- !query
@@ -214,7 +214,7 @@ SELECT cast(1 as smallint) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. timestamp <> smallint at the first column of the second table;
+Union can only be performed on tables with the compatible column types. timestamp <> smallint at the first column of the second table
 
 
 -- !query
@@ -223,7 +223,7 @@ SELECT cast(1 as smallint) FROM t UNION SELECT cast('2017-12-11 09:30:00' as dat
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. date <> smallint at the first column of the second table;
+Union can only be performed on tables with the compatible column types. date <> smallint at the first column of the second table
 
 
 -- !query
@@ -304,7 +304,7 @@ SELECT cast(1 as int) FROM t UNION SELECT cast('2' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. binary <> int at the first column of the second table;
+Union can only be performed on tables with the compatible column types. binary <> int at the first column of the second table
 
 
 -- !query
@@ -313,7 +313,7 @@ SELECT cast(1 as int) FROM t UNION SELECT cast(2 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. boolean <> int at the first column of the second table;
+Union can only be performed on tables with the compatible column types. boolean <> int at the first column of the second table
 
 
 -- !query
@@ -322,7 +322,7 @@ SELECT cast(1 as int) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timest
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. timestamp <> int at the first column of the second table;
+Union can only be performed on tables with the compatible column types. timestamp <> int at the first column of the second table
 
 
 -- !query
@@ -331,7 +331,7 @@ SELECT cast(1 as int) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. date <> int at the first column of the second table;
+Union can only be performed on tables with the compatible column types. date <> int at the first column of the second table
 
 
 -- !query
@@ -412,7 +412,7 @@ SELECT cast(1 as bigint) FROM t UNION SELECT cast('2' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. binary <> bigint at the first column of the second table;
+Union can only be performed on tables with the compatible column types. binary <> bigint at the first column of the second table
 
 
 -- !query
@@ -421,7 +421,7 @@ SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. boolean <> bigint at the first column of the second table;
+Union can only be performed on tables with the compatible column types. boolean <> bigint at the first column of the second table
 
 
 -- !query
@@ -430,7 +430,7 @@ SELECT cast(1 as bigint) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as tim
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. timestamp <> bigint at the first column of the second table;
+Union can only be performed on tables with the compatible column types. timestamp <> bigint at the first column of the second table
 
 
 -- !query
@@ -439,7 +439,7 @@ SELECT cast(1 as bigint) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. date <> bigint at the first column of the second table;
+Union can only be performed on tables with the compatible column types. date <> bigint at the first column of the second table
 
 
 -- !query
@@ -520,7 +520,7 @@ SELECT cast(1 as float) FROM t UNION SELECT cast('2' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. binary <> float at the first column of the second table;
+Union can only be performed on tables with the compatible column types. binary <> float at the first column of the second table
 
 
 -- !query
@@ -529,7 +529,7 @@ SELECT cast(1 as float) FROM t UNION SELECT cast(2 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. boolean <> float at the first column of the second table;
+Union can only be performed on tables with the compatible column types. boolean <> float at the first column of the second table
 
 
 -- !query
@@ -538,7 +538,7 @@ SELECT cast(1 as float) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as time
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. timestamp <> float at the first column of the second table;
+Union can only be performed on tables with the compatible column types. timestamp <> float at the first column of the second table
 
 
 -- !query
@@ -547,7 +547,7 @@ SELECT cast(1 as float) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. date <> float at the first column of the second table;
+Union can only be performed on tables with the compatible column types. date <> float at the first column of the second table
 
 
 -- !query
@@ -628,7 +628,7 @@ SELECT cast(1 as double) FROM t UNION SELECT cast('2' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. binary <> double at the first column of the second table;
+Union can only be performed on tables with the compatible column types. binary <> double at the first column of the second table
 
 
 -- !query
@@ -637,7 +637,7 @@ SELECT cast(1 as double) FROM t UNION SELECT cast(2 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. boolean <> double at the first column of the second table;
+Union can only be performed on tables with the compatible column types. boolean <> double at the first column of the second table
 
 
 -- !query
@@ -646,7 +646,7 @@ SELECT cast(1 as double) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as tim
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. timestamp <> double at the first column of the second table;
+Union can only be performed on tables with the compatible column types. timestamp <> double at the first column of the second table
 
 
 -- !query
@@ -655,7 +655,7 @@ SELECT cast(1 as double) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. date <> double at the first column of the second table;
+Union can only be performed on tables with the compatible column types. date <> double at the first column of the second table
 
 
 -- !query
@@ -736,7 +736,7 @@ SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast('2' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. binary <> decimal(10,0) at the first column of the second table;
+Union can only be performed on tables with the compatible column types. binary <> decimal(10,0) at the first column of the second table
 
 
 -- !query
@@ -745,7 +745,7 @@ SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. boolean <> decimal(10,0) at the first column of the second table;
+Union can only be performed on tables with the compatible column types. boolean <> decimal(10,0) at the first column of the second table
 
 
 -- !query
@@ -754,7 +754,7 @@ SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast('2017-12-11 09:30:00.0
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. timestamp <> decimal(10,0) at the first column of the second table;
+Union can only be performed on tables with the compatible column types. timestamp <> decimal(10,0) at the first column of the second table
 
 
 -- !query
@@ -763,7 +763,7 @@ SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast('2017-12-11 09:30:00'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. date <> decimal(10,0) at the first column of the second table;
+Union can only be performed on tables with the compatible column types. date <> decimal(10,0) at the first column of the second table
 
 
 -- !query
@@ -844,7 +844,7 @@ SELECT cast(1 as string) FROM t UNION SELECT cast('2' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. binary <> string at the first column of the second table;
+Union can only be performed on tables with the compatible column types. binary <> string at the first column of the second table
 
 
 -- !query
@@ -853,7 +853,7 @@ SELECT cast(1 as string) FROM t UNION SELECT cast(2 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. boolean <> string at the first column of the second table;
+Union can only be performed on tables with the compatible column types. boolean <> string at the first column of the second table
 
 
 -- !query
@@ -880,7 +880,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as tinyint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. tinyint <> binary at the first column of the second table;
+Union can only be performed on tables with the compatible column types. tinyint <> binary at the first column of the second table
 
 
 -- !query
@@ -889,7 +889,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as smallint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. smallint <> binary at the first column of the second table;
+Union can only be performed on tables with the compatible column types. smallint <> binary at the first column of the second table
 
 
 -- !query
@@ -898,7 +898,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as int) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. int <> binary at the first column of the second table;
+Union can only be performed on tables with the compatible column types. int <> binary at the first column of the second table
 
 
 -- !query
@@ -907,7 +907,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as bigint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. bigint <> binary at the first column of the second table;
+Union can only be performed on tables with the compatible column types. bigint <> binary at the first column of the second table
 
 
 -- !query
@@ -916,7 +916,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as float) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. float <> binary at the first column of the second table;
+Union can only be performed on tables with the compatible column types. float <> binary at the first column of the second table
 
 
 -- !query
@@ -925,7 +925,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as double) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. double <> binary at the first column of the second table;
+Union can only be performed on tables with the compatible column types. double <> binary at the first column of the second table
 
 
 -- !query
@@ -934,7 +934,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. decimal(10,0) <> binary at the first column of the second table;
+Union can only be performed on tables with the compatible column types. decimal(10,0) <> binary at the first column of the second table
 
 
 -- !query
@@ -943,7 +943,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as string) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. string <> binary at the first column of the second table;
+Union can only be performed on tables with the compatible column types. string <> binary at the first column of the second table
 
 
 -- !query
@@ -961,7 +961,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. boolean <> binary at the first column of the second table;
+Union can only be performed on tables with the compatible column types. boolean <> binary at the first column of the second table
 
 
 -- !query
@@ -970,7 +970,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. timestamp <> binary at the first column of the second table;
+Union can only be performed on tables with the compatible column types. timestamp <> binary at the first column of the second table
 
 
 -- !query
@@ -979,7 +979,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast('2017-12-11 09:30:00' as dat
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. date <> binary at the first column of the second table;
+Union can only be performed on tables with the compatible column types. date <> binary at the first column of the second table
 
 
 -- !query
@@ -988,7 +988,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as tinyint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. tinyint <> boolean at the first column of the second table;
+Union can only be performed on tables with the compatible column types. tinyint <> boolean at the first column of the second table
 
 
 -- !query
@@ -997,7 +997,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as smallint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. smallint <> boolean at the first column of the second table;
+Union can only be performed on tables with the compatible column types. smallint <> boolean at the first column of the second table
 
 
 -- !query
@@ -1006,7 +1006,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as int) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. int <> boolean at the first column of the second table;
+Union can only be performed on tables with the compatible column types. int <> boolean at the first column of the second table
 
 
 -- !query
@@ -1015,7 +1015,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as bigint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. bigint <> boolean at the first column of the second table;
+Union can only be performed on tables with the compatible column types. bigint <> boolean at the first column of the second table
 
 
 -- !query
@@ -1024,7 +1024,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as float) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. float <> boolean at the first column of the second table;
+Union can only be performed on tables with the compatible column types. float <> boolean at the first column of the second table
 
 
 -- !query
@@ -1033,7 +1033,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as double) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. double <> boolean at the first column of the second table;
+Union can only be performed on tables with the compatible column types. double <> boolean at the first column of the second table
 
 
 -- !query
@@ -1042,7 +1042,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. decimal(10,0) <> boolean at the first column of the second table;
+Union can only be performed on tables with the compatible column types. decimal(10,0) <> boolean at the first column of the second table
 
 
 -- !query
@@ -1051,7 +1051,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as string) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. string <> boolean at the first column of the second table;
+Union can only be performed on tables with the compatible column types. string <> boolean at the first column of the second table
 
 
 -- !query
@@ -1060,7 +1060,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast('2' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. binary <> boolean at the first column of the second table;
+Union can only be performed on tables with the compatible column types. binary <> boolean at the first column of the second table
 
 
 -- !query
@@ -1077,7 +1077,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as ti
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. timestamp <> boolean at the first column of the second table;
+Union can only be performed on tables with the compatible column types. timestamp <> boolean at the first column of the second table
 
 
 -- !query
@@ -1086,7 +1086,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. date <> boolean at the first column of the second table;
+Union can only be performed on tables with the compatible column types. date <> boolean at the first column of the second table
 
 
 -- !query
@@ -1095,7 +1095,7 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. tinyint <> timestamp at the first column of the second table;
+Union can only be performed on tables with the compatible column types. tinyint <> timestamp at the first column of the second table
 
 
 -- !query
@@ -1104,7 +1104,7 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. smallint <> timestamp at the first column of the second table;
+Union can only be performed on tables with the compatible column types. smallint <> timestamp at the first column of the second table
 
 
 -- !query
@@ -1113,7 +1113,7 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. int <> timestamp at the first column of the second table;
+Union can only be performed on tables with the compatible column types. int <> timestamp at the first column of the second table
 
 
 -- !query
@@ -1122,7 +1122,7 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. bigint <> timestamp at the first column of the second table;
+Union can only be performed on tables with the compatible column types. bigint <> timestamp at the first column of the second table
 
 
 -- !query
@@ -1131,7 +1131,7 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. float <> timestamp at the first column of the second table;
+Union can only be performed on tables with the compatible column types. float <> timestamp at the first column of the second table
 
 
 -- !query
@@ -1140,7 +1140,7 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. double <> timestamp at the first column of the second table;
+Union can only be performed on tables with the compatible column types. double <> timestamp at the first column of the second table
 
 
 -- !query
@@ -1149,7 +1149,7 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. decimal(10,0) <> timestamp at the first column of the second table;
+Union can only be performed on tables with the compatible column types. decimal(10,0) <> timestamp at the first column of the second table
 
 
 -- !query
@@ -1167,7 +1167,7 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast('2' a
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. binary <> timestamp at the first column of the second table;
+Union can only be performed on tables with the compatible column types. binary <> timestamp at the first column of the second table
 
 
 -- !query
@@ -1176,7 +1176,7 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. boolean <> timestamp at the first column of the second table;
+Union can only be performed on tables with the compatible column types. boolean <> timestamp at the first column of the second table
 
 
 -- !query
@@ -1203,7 +1203,7 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as tinyint
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. tinyint <> date at the first column of the second table;
+Union can only be performed on tables with the compatible column types. tinyint <> date at the first column of the second table
 
 
 -- !query
@@ -1212,7 +1212,7 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as smallin
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. smallint <> date at the first column of the second table;
+Union can only be performed on tables with the compatible column types. smallint <> date at the first column of the second table
 
 
 -- !query
@@ -1221,7 +1221,7 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as int) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. int <> date at the first column of the second table;
+Union can only be performed on tables with the compatible column types. int <> date at the first column of the second table
 
 
 -- !query
@@ -1230,7 +1230,7 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as bigint)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. bigint <> date at the first column of the second table;
+Union can only be performed on tables with the compatible column types. bigint <> date at the first column of the second table
 
 
 -- !query
@@ -1239,7 +1239,7 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as float)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. float <> date at the first column of the second table;
+Union can only be performed on tables with the compatible column types. float <> date at the first column of the second table
 
 
 -- !query
@@ -1248,7 +1248,7 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as double)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. double <> date at the first column of the second table;
+Union can only be performed on tables with the compatible column types. double <> date at the first column of the second table
 
 
 -- !query
@@ -1257,7 +1257,7 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as decimal
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. decimal(10,0) <> date at the first column of the second table;
+Union can only be performed on tables with the compatible column types. decimal(10,0) <> date at the first column of the second table
 
 
 -- !query
@@ -1275,7 +1275,7 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast('2' as binar
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. binary <> date at the first column of the second table;
+Union can only be performed on tables with the compatible column types. binary <> date at the first column of the second table
 
 
 -- !query
@@ -1284,7 +1284,7 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as boolean
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. boolean <> date at the first column of the second table;
+Union can only be performed on tables with the compatible column types. boolean <> date at the first column of the second table
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out
index a428a7a9c923b..0eb21d386378d 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out
@@ -373,7 +373,7 @@ org.apache.spark.sql.AnalysisException
 
 Aggregate/Window/Generate expressions are not valid in where clause of the query.
 Expression in where clause: [(sum(DISTINCT CAST((outer(a.`four`) + b.`four`) AS BIGINT)) = CAST(CAST(udf(ansi_cast(four as string)) AS INT) AS BIGINT))]
-Invalid expressions: [sum(DISTINCT CAST((outer(a.`four`) + b.`four`) AS BIGINT))];
+Invalid expressions: [sum(DISTINCT CAST((outer(a.`four`) + b.`four`) AS BIGINT))]
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part3.sql.out
index f491d9b9ba3a8..17b77a8a7aea9 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part3.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part3.sql.out
@@ -8,7 +8,7 @@ select udf(max(min(unique1))) from tenk1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query.;
+It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query.
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out
index 89fc36a0da827..e3d7eb169e818 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out
@@ -143,7 +143,7 @@ SELECT udf(a) FROM test_having HAVING udf(min(a)) < udf(max(a))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping expressions sequence is empty, and 'spark_catalog.default.test_having.`a`' is not an aggregate function. Wrap '(min(spark_catalog.default.test_having.`a`) AS `min(a#x)`, max(spark_catalog.default.test_having.`a`) AS `max(a#x)`)' in windowing function(s) or wrap 'spark_catalog.default.test_having.`a`' in first() (or first_value) if you don't care which value you get.;
+grouping expressions sequence is empty, and 'spark_catalog.default.test_having.`a`' is not an aggregate function. Wrap '(min(spark_catalog.default.test_having.`a`) AS `min(a#x)`, max(spark_catalog.default.test_having.`a`) AS `max(a#x)`)' in windowing function(s) or wrap 'spark_catalog.default.test_having.`a`' in first() (or first_value) if you don't care which value you get.
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-except-all.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-except-all.sql.out
index 2613120e004df..7a4ae72fac97b 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-except-all.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-except-all.sql.out
@@ -141,7 +141,7 @@ SELECT array(1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-ExceptAll can only be performed on tables with the compatible column types. array<int> <> int at the first column of the second table;
+ExceptAll can only be performed on tables with the compatible column types. array<int> <> int at the first column of the second table
 
 
 -- !query
@@ -213,7 +213,7 @@ SELECT k, v FROM tab4
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-ExceptAll can only be performed on tables with the same number of columns, but the first table has 1 columns and the second table has 2 columns;
+ExceptAll can only be performed on tables with the same number of columns, but the first table has 1 columns and the second table has 2 columns
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out
index f4cf4196298c1..15620e34f2be8 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out
@@ -210,7 +210,7 @@ SELECT course, udf(year), GROUPING(course) FROM courseSales GROUP BY course, udf
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping() can only be used with GroupingSets/Cube/Rollup;
+grouping() can only be used with GroupingSets/Cube/Rollup
 
 
 -- !query
@@ -219,7 +219,7 @@ SELECT course, udf(year), GROUPING_ID(course, year) FROM courseSales GROUP BY ud
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping_id() can only be used with GroupingSets/Cube/Rollup;
+grouping_id() can only be used with GroupingSets/Cube/Rollup
 
 
 -- !query
@@ -255,7 +255,7 @@ SELECT course, udf(year) FROM courseSales GROUP BY udf(course), year HAVING GROU
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
+grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup
 
 
 -- !query
@@ -264,7 +264,7 @@ SELECT course, udf(udf(year)) FROM courseSales GROUP BY course, year HAVING GROU
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
+grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup
 
 
 -- !query
@@ -319,7 +319,7 @@ SELECT course, udf(year) FROM courseSales GROUP BY course, udf(year) ORDER BY GR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
+grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup
 
 
 -- !query
@@ -328,7 +328,7 @@ SELECT course, udf(year) FROM courseSales GROUP BY course, udf(year) ORDER BY GR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup;
+grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out
index da5256f5c0453..18a7708c40685 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out
@@ -18,7 +18,7 @@ SELECT udf(a), udf(COUNT(b)) FROM testData
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping expressions sequence is empty, and 'testdata.`a`' is not an aggregate function. Wrap '(CAST(udf(cast(count(b) as string)) AS BIGINT) AS `CAST(udf(cast(count(b) as string)) AS BIGINT)`)' in windowing function(s) or wrap 'testdata.`a`' in first() (or first_value) if you don't care which value you get.;
+grouping expressions sequence is empty, and 'testdata.`a`' is not an aggregate function. Wrap '(CAST(udf(cast(count(b) as string)) AS BIGINT) AS `CAST(udf(cast(count(b) as string)) AS BIGINT)`)' in windowing function(s) or wrap 'testdata.`a`' in first() (or first_value) if you don't care which value you get.
 
 
 -- !query
@@ -46,7 +46,7 @@ SELECT udf(a), udf(COUNT(udf(b))) FROM testData GROUP BY b
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;
+expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
 
 
 -- !query
@@ -110,7 +110,7 @@ SELECT udf(a + 2), udf(COUNT(b)) FROM testData GROUP BY a + 1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;
+expression 'testdata.`a`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
 
 
 -- !query
@@ -167,7 +167,7 @@ SELECT udf(COUNT(b)) AS k FROM testData GROUP BY k
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-aggregate functions are not allowed in GROUP BY, but found CAST(udf(cast(count(b) as string)) AS BIGINT);
+aggregate functions are not allowed in GROUP BY, but found CAST(udf(cast(count(b) as string)) AS BIGINT)
 
 
 -- !query
@@ -185,7 +185,7 @@ SELECT k AS a, udf(COUNT(udf(v))) FROM testDataHasSameNameWithAlias GROUP BY udf
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-expression 'testdatahassamenamewithalias.`k`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;
+expression 'testdatahassamenamewithalias.`k`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
 
 
 -- !query
@@ -274,7 +274,7 @@ SELECT udf(id) FROM range(10) HAVING id > 0
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping expressions sequence is empty, and '`id`' is not an aggregate function. Wrap '()' in windowing function(s) or wrap '`id`' in first() (or first_value) if you don't care which value you get.;
+grouping expressions sequence is empty, and '`id`' is not an aggregate function. Wrap '()' in windowing function(s) or wrap '`id`' in first() (or first_value) if you don't care which value you get.
 
 
 -- !query
@@ -496,7 +496,7 @@ org.apache.spark.sql.AnalysisException
 
 Aggregate/Window/Generate expressions are not valid in where clause of the query.
 Expression in where clause: [(count(1) > 1L)]
-Invalid expressions: [count(1)];
+Invalid expressions: [count(1)]
 
 
 -- !query
@@ -508,7 +508,7 @@ org.apache.spark.sql.AnalysisException
 
 Aggregate/Window/Generate expressions are not valid in where clause of the query.
 Expression in where clause: [((count(1) + 1L) > 1L)]
-Invalid expressions: [count(1)];
+Invalid expressions: [count(1)]
 
 
 -- !query
@@ -520,4 +520,4 @@ org.apache.spark.sql.AnalysisException
 
 Aggregate/Window/Generate expressions are not valid in where clause of the query.
 Expression in where clause: [(((test_agg.`k` = 1) OR (test_agg.`k` = 2)) OR (((count(1) + 1L) > 1L) OR (max(test_agg.`k`) > 1)))]
-Invalid expressions: [count(1), max(test_agg.`k`)];
+Invalid expressions: [count(1), max(test_agg.`k`)]
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-intersect-all.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-intersect-all.sql.out
index b3735ae153267..e225a3df596c0 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-intersect-all.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-intersect-all.sql.out
@@ -98,7 +98,7 @@ SELECT array(1), udf(2)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-IntersectAll can only be performed on tables with the compatible column types. array<int> <> int at the first column of the second table;
+IntersectAll can only be performed on tables with the compatible column types. array<int> <> int at the first column of the second table
 
 
 -- !query
@@ -109,7 +109,7 @@ SELECT udf(k), udf(v) FROM tab2
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-IntersectAll can only be performed on tables with the same number of columns, but the first table has 1 columns and the second table has 2 columns;
+IntersectAll can only be performed on tables with the same number of columns, but the first table has 1 columns and the second table has 2 columns
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-pivot.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-pivot.sql.out
index 414435e6b781d..bcec61470d4a4 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-pivot.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-pivot.sql.out
@@ -202,7 +202,7 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Aggregate expression required for pivot, but 'coursesales.`earnings`' did not appear in any aggregate function.;
+Aggregate expression required for pivot, but 'coursesales.`earnings`' did not appear in any aggregate function.
 
 
 -- !query
@@ -217,7 +217,7 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Aggregate expression required for pivot, but '__auto_generated_subquery_name.`year`' did not appear in any aggregate function.;
+Aggregate expression required for pivot, but '__auto_generated_subquery_name.`year`' did not appear in any aggregate function.
 
 
 -- !query
@@ -262,7 +262,7 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query.;
+It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query.
 
 
 -- !query
@@ -313,7 +313,7 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid pivot value 'dotNET': value data type string does not match pivot column data type struct<course:string,year:int>;
+Invalid pivot value 'dotNET': value data type string does not match pivot column data type struct<course:string,year:int>
 
 
 -- !query
@@ -339,7 +339,7 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Literal expressions required for pivot values, found 'course#x';
+Literal expressions required for pivot values, found 'course#x'
 
 
 -- !query
@@ -424,7 +424,7 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid pivot column 'm#x'. Pivot columns must be comparable.;
+Invalid pivot column 'm#x'. Pivot columns must be comparable.
 
 
 -- !query
@@ -441,7 +441,7 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid pivot column 'named_struct(course, course#x, m, m#x)'. Pivot columns must be comparable.;
+Invalid pivot column 'named_struct(course, course#x, m, m#x)'. Pivot columns must be comparable.
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out
index 928b9ebb12364..6d97800904971 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out
@@ -321,7 +321,7 @@ SELECT udf(val), cate, row_number() OVER(PARTITION BY cate) FROM testData ORDER
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Window function row_number() requires window to be ordered, please add ORDER BY clause. For example SELECT row_number()(value_expr) OVER (PARTITION BY window_partition ORDER BY window_ordering) from table;
+Window function row_number() requires window to be ordered, please add ORDER BY clause. For example SELECT row_number()(value_expr) OVER (PARTITION BY window_partition ORDER BY window_ordering) from table
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out
index df2ad96649186..c904c43ac84ed 100644
--- a/sql/core/src/test/resources/sql-tests/results/window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out
@@ -19,6 +19,7 @@ struct<>
 -- !query output
 
 
+
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW basic_pays AS SELECT * FROM VALUES
 ('Diane Murphy','Accounting',8435),
@@ -44,6 +45,7 @@ struct<>
 -- !query output
 
 
+
 -- !query
 SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val ROWS CURRENT ROW) FROM testData
 ORDER BY cate, val
@@ -345,7 +347,7 @@ SELECT val, cate, row_number() OVER(PARTITION BY cate) FROM testData ORDER BY ca
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Window function row_number() requires window to be ordered, please add ORDER BY clause. For example SELECT row_number()(value_expr) OVER (PARTITION BY window_partition ORDER BY window_ordering) from table;
+Window function row_number() requires window to be ordered, please add ORDER BY clause. For example SELECT row_number()(value_expr) OVER (PARTITION BY window_partition ORDER BY window_ordering) from table
 
 
 -- !query
@@ -414,7 +416,7 @@ FROM testData ORDER BY cate, val
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-window aggregate function with filter predicate is not supported yet.;
+window aggregate function with filter predicate is not supported yet.
 
 
 -- !query
@@ -773,4 +775,4 @@ WINDOW
 ^^^
     w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING),
     w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING)
-ORDER BY salary DESC
\ No newline at end of file
+ORDER BY salary DESC
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index 937de92bcaba6..01b1508d034c3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -2220,7 +2220,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
       structLevel1
         .select($"a".dropFields("c").as("a"))
         .select($"a".withField("z", $"a.c")).as("a")
-    }.getMessage should include("No such struct field c in a, b;")
+    }.getMessage should include("No such struct field c in a, b")
   }
 
   test("nestedDf should generate nested DataFrames") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala
index e454f0e6d540f..12394a92aed44 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala
@@ -155,7 +155,7 @@ trait SQLInsertTestSuite extends QueryTest with SQLTestUtils {
       val cols = Seq("c1", "c2", "c3")
       createTable("t1", cols, Seq("int", "long", "string"))
       val e1 = intercept[AnalysisException](sql(s"INSERT INTO t1 (c1, c2, c2) values(1, 2, 3)"))
-      assert(e1.getMessage === "Found duplicate column(s) in the column list: `c2`;")
+      assert(e1.getMessage === "Found duplicate column(s) in the column list: `c2`")
     }
   }
 
@@ -164,7 +164,7 @@ trait SQLInsertTestSuite extends QueryTest with SQLTestUtils {
       val cols = Seq("c1", "c2", "c3")
       createTable("t1", cols, Seq("int", "long", "string"))
       val e1 = intercept[AnalysisException](sql(s"INSERT INTO t1 (c1, c2, c4) values(1, 2, 3)"))
-      assert(e1.getMessage === "Cannot resolve column name c4;")
+      assert(e1.getMessage === "Cannot resolve column name c4")
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala
index d7659e25d2c41..58427183eeed5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala
@@ -62,7 +62,7 @@ trait ShowTablesSuiteBase extends QueryTest with SQLTestUtils {
     val msg = intercept[NoSuchNamespaceException] {
       runShowTablesSql(s"SHOW TABLES IN $catalog.unknown", Seq())
     }.getMessage
-    assert(msg.matches("(Database|Namespace) 'unknown' not found;"))
+    assert(msg.matches("(Database|Namespace) 'unknown' not found"))
   }
 
   test("show tables with a pattern") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
index 580e7df6ef63e..1ea2d4fd0b32c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
@@ -190,7 +190,7 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
     assert(spark.conf.get("spark.app.id") === appId, "Should not change spark core ones")
     // spark core conf w/ entry registered
     val e1 = intercept[AnalysisException](sql("RESET spark.executor.cores"))
-    assert(e1.getMessage === "Cannot modify the value of a Spark config: spark.executor.cores;")
+    assert(e1.getMessage === "Cannot modify the value of a Spark config: spark.executor.cores")
 
     // user defined settings
     sql("SET spark.abc=xyz")
@@ -217,7 +217,7 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
     // static sql configs
     val e2 = intercept[AnalysisException](sql(s"RESET ${StaticSQLConf.WAREHOUSE_PATH.key}"))
     assert(e2.getMessage ===
-      s"Cannot modify the value of a static config: ${StaticSQLConf.WAREHOUSE_PATH.key};")
+      s"Cannot modify the value of a static config: ${StaticSQLConf.WAREHOUSE_PATH.key}")
 
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedWriteSuite.scala
index a410f32d4af7e..0a5feda1bd533 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedWriteSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedWriteSuite.scala
@@ -88,7 +88,7 @@ abstract class BucketedWriteSuite extends QueryTest with SQLTestUtils {
     val e = intercept[AnalysisException] {
       df.write.sortBy("j").saveAsTable("tt")
     }
-    assert(e.getMessage == "sortBy must be used together with bucketBy;")
+    assert(e.getMessage == "sortBy must be used together with bucketBy")
   }
 
   test("sorting by non-orderable column") {
@@ -102,7 +102,7 @@ abstract class BucketedWriteSuite extends QueryTest with SQLTestUtils {
     val e = intercept[AnalysisException] {
       df.write.bucketBy(2, "i").parquet("/tmp/path")
     }
-    assert(e.getMessage == "'save' does not support bucketBy right now;")
+    assert(e.getMessage == "'save' does not support bucketBy right now")
   }
 
   test("write bucketed and sorted data using save()") {
@@ -111,7 +111,7 @@ abstract class BucketedWriteSuite extends QueryTest with SQLTestUtils {
     val e = intercept[AnalysisException] {
       df.write.bucketBy(2, "i").sortBy("i").parquet("/tmp/path")
     }
-    assert(e.getMessage == "'save' does not support bucketBy and sortBy right now;")
+    assert(e.getMessage == "'save' does not support bucketBy and sortBy right now")
   }
 
   test("write bucketed data using insertInto()") {
@@ -120,7 +120,7 @@ abstract class BucketedWriteSuite extends QueryTest with SQLTestUtils {
     val e = intercept[AnalysisException] {
       df.write.bucketBy(2, "i").insertInto("tt")
     }
-    assert(e.getMessage == "'insertInto' does not support bucketBy right now;")
+    assert(e.getMessage == "'insertInto' does not support bucketBy right now")
   }
 
   test("write bucketed and sorted data using insertInto()") {
@@ -129,7 +129,7 @@ abstract class BucketedWriteSuite extends QueryTest with SQLTestUtils {
     val e = intercept[AnalysisException] {
       df.write.bucketBy(2, "i").sortBy("i").insertInto("tt")
     }
-    assert(e.getMessage == "'insertInto' does not support bucketBy and sortBy right now;")
+    assert(e.getMessage == "'insertInto' does not support bucketBy and sortBy right now")
   }
 
   private lazy val df = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala
index 52825a155e46a..b9266429f81a5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala
@@ -162,7 +162,7 @@ class PartitionedWriteSuite extends QueryTest with SharedSparkSession {
     withTempPath { f =>
       val e = intercept[AnalysisException](
         Seq((3, 2)).toDF("a", "b").write.partitionBy("b", "b").csv(f.getAbsolutePath))
-      assert(e.getMessage.contains("Found duplicate column(s) b, b: `b`;"))
+      assert(e.getMessage.contains("Found duplicate column(s) b, b: `b`"))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index b240d2058a018..6b9fa9c968fb4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -413,7 +413,7 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
           createFileStreamSourceAndGetSchema(
             format = Some("json"), path = Some(src.getCanonicalPath), schema = None)
         }
-        assert("Unable to infer schema for JSON. It must be specified manually.;" === e.getMessage)
+        assert("Unable to infer schema for JSON. It must be specified manually." === e.getMessage)
       }
     }
   }

From 23083aa594360938c611a45794405d81e59ecaf1 Mon Sep 17 00:00:00 2001
From: Prakhar Jain <prakharjain09@gmail.com>
Date: Tue, 15 Dec 2020 13:46:58 +0000
Subject: [PATCH 0772/1009] [SPARK-33758][SQL] Prune unrequired partitionings
 from AliasAwareOutputPartitionings when some columns are dropped from
 projection

### What changes were proposed in this pull request?
This PR tries to prune the unrequired output partitionings in cases when the columns are dropped from Project/Aggregates etc.

### Why are the changes needed?
Consider this query:
    select t1.id from t1 JOIN t2 on t1.id = t2.id

This query will have top level Project node which will just project t1.id. But the outputPartitioning of this project node will be: PartitioningCollection(HashPartitioning(t1.id), HashPartitioning(t2.id)).

But since we are not propagating t2.id column, so we can drop HashPartitioning(t2.id) from the output partitioning of Project node.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Added UTs.

Closes #30762 from prakharjain09/SPARK-33758-prune-partitioning.

Authored-by: Prakhar Jain <prakharjain09@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../AliasAwareOutputExpression.scala          | 22 +++++++-
 .../spark/sql/execution/PlannerSuite.scala    | 54 ++++++++++++++-----
 2 files changed, 61 insertions(+), 15 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala
index 3cbe1654ea2cd..23a9527a1b349 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala
@@ -17,7 +17,7 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeMap, AttributeReference, Expression, NamedExpression, SortOrder}
-import org.apache.spark.sql.catalyst.plans.physical.Partitioning
+import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, PartitioningCollection, UnknownPartitioning}
 
 /**
  * A trait that provides functionality to handle aliases in the `outputExpressions`.
@@ -44,7 +44,7 @@ trait AliasAwareOutputExpression extends UnaryExecNode {
  */
 trait AliasAwareOutputPartitioning extends AliasAwareOutputExpression {
   final override def outputPartitioning: Partitioning = {
-    if (hasAlias) {
+    val normalizedOutputPartitioning = if (hasAlias) {
       child.outputPartitioning match {
         case e: Expression =>
           normalizeExpression(e).asInstanceOf[Partitioning]
@@ -53,6 +53,24 @@ trait AliasAwareOutputPartitioning extends AliasAwareOutputExpression {
     } else {
       child.outputPartitioning
     }
+
+    flattenPartitioning(normalizedOutputPartitioning).filter {
+      case hashPartitioning: HashPartitioning => hashPartitioning.references.subsetOf(outputSet)
+      case _ => true
+    } match {
+      case Seq() => UnknownPartitioning(child.outputPartitioning.numPartitions)
+      case Seq(singlePartitioning) => singlePartitioning
+      case seqWithMultiplePartitionings => PartitioningCollection(seqWithMultiplePartitionings)
+    }
+  }
+
+  private def flattenPartitioning(partitioning: Partitioning): Seq[Partitioning] = {
+    partitioning match {
+      case PartitioningCollection(childPartitionings) =>
+        childPartitionings.flatMap(flattenPartitioning)
+      case rest =>
+        rest +: Nil
+    }
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index 4e01d1c06f64e..924776ae3ae60 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -921,10 +921,10 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
 
         val projects = planned.collect { case p: ProjectExec => p }
         assert(projects.exists(_.outputPartitioning match {
-          case PartitioningCollection(Seq(HashPartitioning(Seq(k1: AttributeReference), _),
-            HashPartitioning(Seq(k2: AttributeReference), _))) if k1.name == "t1id" =>
+          case HashPartitioning(Seq(k1: AttributeReference), _) if k1.name == "t1id" =>
             true
-          case _ => false
+          case _ =>
+            false
         }))
       }
     }
@@ -1008,17 +1008,11 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
 
         val projects = planned.collect { case p: ProjectExec => p }
         assert(projects.exists(_.outputPartitioning match {
-          case PartitioningCollection(Seq(HashPartitioning(Seq(Multiply(ar1, _, _)), _),
-            HashPartitioning(Seq(Multiply(ar2, _, _)), _))) =>
-            Seq(ar1, ar2) match {
-              case Seq(ar1: AttributeReference, ar2: AttributeReference) =>
-                ar1.name == "t1id" && ar2.name == "id2"
-              case _ =>
-                false
-            }
-          case _ => false
+          case HashPartitioning(Seq(Multiply(ar1: AttributeReference, _, _)), _) =>
+            ar1.name == "t1id"
+          case _ =>
+            false
         }))
-
       }
     }
   }
@@ -1234,6 +1228,40 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
     val numPartitions = range.rdd.getNumPartitions
     assert(numPartitions == 0)
   }
+
+  test("SPARK-33758: Prune unnecessary output partitioning") {
+    withSQLConf(
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+      withTempView("t1", "t2") {
+        spark.range(10).repartition($"id").createTempView("t1")
+        spark.range(20).repartition($"id").createTempView("t2")
+        val planned = sql(
+          """
+            | SELECT t1.id as t1id, t2.id as t2id
+            | FROM t1, t2
+            | WHERE t1.id = t2.id
+          """.stripMargin).queryExecution.executedPlan
+
+        assert(planned.outputPartitioning match {
+          case PartitioningCollection(Seq(HashPartitioning(Seq(k1: AttributeReference), _),
+          HashPartitioning(Seq(k2: AttributeReference), _))) =>
+            k1.name == "t1id" && k2.name == "t2id"
+        })
+
+        val planned2 = sql(
+          """
+            | SELECT t1.id as t1id
+            | FROM t1, t2
+            | WHERE t1.id = t2.id
+          """.stripMargin).queryExecution.executedPlan
+        assert(planned2.outputPartitioning match {
+          case HashPartitioning(Seq(k1: AttributeReference), _) if k1.name == "t1id" =>
+            true
+        })
+      }
+    }
+  }
 }
 
 // Used for unit-testing EnsureRequirements

From 40c37d69fd003ed6079ee8c139dba5c15915c568 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 15 Dec 2020 14:16:43 +0000
Subject: [PATCH 0773/1009] [SPARK-33617][SQL][FOLLOWUP] refine the default
 parallelism SQL config

### What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/30559 . The default parallelism config in Spark core is not good, as it's unclear where it applies. To not inherit this problem in Spark SQL, this PR refines the default parallelism SQL config, to make it clear that it only applies to leaf nodes.

### Why are the changes needed?

Make the config clearer.

### Does this PR introduce _any_ user-facing change?

It changes an unreleased config.

### How was this patch tested?

existing tests

Closes #30736 from cloud-fan/follow.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/sql/internal/SQLConf.scala | 11 +++++------
 .../scala/org/apache/spark/sql/SparkSession.scala     | 10 ++++++----
 .../spark/sql/execution/LocalTableScanExec.scala      |  3 +--
 .../adaptive/CoalesceShufflePartitions.scala          |  2 +-
 .../spark/sql/execution/basicPhysicalOperators.scala  |  3 +--
 .../org/apache/spark/sql/execution/command/ddl.scala  |  3 +--
 .../sql/execution/datasources/FilePartition.scala     |  3 +--
 .../sql/execution/datasources/SchemaMergeUtils.scala  |  3 +--
 .../apache/spark/sql/execution/SparkPlanSuite.scala   |  4 ++--
 9 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 078928391f560..fd6a30ac6a81c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -374,12 +374,13 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
-  val DEFAULT_PARALLELISM = buildConf("spark.sql.default.parallelism")
-    .doc("The number of parallelism for Spark SQL, the default value is " +
-      "`spark.default.parallelism`.")
+  val LEAF_NODE_DEFAULT_PARALLELISM = buildConf("spark.sql.leafNodeDefaultParallelism")
+    .doc("The default parallelism of Spark SQL leaf nodes that produce data, such as the file " +
+      "scan node, the local data scan node, the range node, etc. The default value of this " +
+      "config is 'SparkContext#defaultParallelism'.")
     .version("3.2.0")
     .intConf
-    .checkValue(_ > 0, "The value of spark.sql.default.parallelism must be positive.")
+    .checkValue(_ > 0, "The value of spark.sql.leafNodeDefaultParallelism must be positive.")
     .createOptional
 
   val SHUFFLE_PARTITIONS = buildConf("spark.sql.shuffle.partitions")
@@ -3202,8 +3203,6 @@ class SQLConf extends Serializable with Logging {
 
   def cacheVectorizedReaderEnabled: Boolean = getConf(CACHE_VECTORIZED_READER_ENABLED)
 
-  def defaultParallelism: Option[Int] = getConf(DEFAULT_PARALLELISM)
-
   def defaultNumShufflePartitions: Int = getConf(SHUFFLE_PARTITIONS)
 
   def numShufflePartitions: Int = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index a2c9406f6becf..20a2649322ae0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -523,8 +523,7 @@ class SparkSession private(
    * @since 2.0.0
    */
   def range(start: Long, end: Long): Dataset[java.lang.Long] = {
-    range(start, end, step = 1,
-      numPartitions = sqlContext.conf.defaultParallelism.getOrElse(sparkContext.defaultParallelism))
+    range(start, end, step = 1, numPartitions = leafNodeDefaultParallelism)
   }
 
   /**
@@ -534,8 +533,7 @@ class SparkSession private(
    * @since 2.0.0
    */
   def range(start: Long, end: Long, step: Long): Dataset[java.lang.Long] = {
-    range(start, end, step,
-      numPartitions = sqlContext.conf.defaultParallelism.getOrElse(sparkContext.defaultParallelism))
+    range(start, end, step, numPartitions = leafNodeDefaultParallelism)
   }
 
   /**
@@ -775,6 +773,10 @@ class SparkSession private(
       SparkSession.setActiveSession(old)
     }
   }
+
+  private[sql] def leafNodeDefaultParallelism: Int = {
+    conf.get(SQLConf.LEAF_NODE_DEFAULT_PARALLELISM).getOrElse(sparkContext.defaultParallelism)
+  }
 }
 
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala
index 02a8f46824241..054daa54d1153 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala
@@ -50,8 +50,7 @@ case class LocalTableScanExec(
       sqlContext.sparkContext.emptyRDD
     } else {
       val numSlices = math.min(
-        unsafeRows.length,
-        conf.defaultParallelism.getOrElse(sqlContext.sparkContext.defaultParallelism))
+        unsafeRows.length, sqlContext.sparkSession.leafNodeDefaultParallelism)
       sqlContext.sparkContext.parallelize(unsafeRows, numSlices)
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
index 6149bd214e540..0f482142227d2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
@@ -67,7 +67,7 @@ case class CoalesceShufflePartitions(session: SparkSession) extends CustomShuffl
         // We fall back to Spark default parallelism if the minimum number of coalesced partitions
         // is not set, so to avoid perf regressions compared to no coalescing.
         val minPartitionNum = conf.getConf(SQLConf.COALESCE_PARTITIONS_MIN_PARTITION_NUM)
-          .orElse(conf.defaultParallelism).getOrElse(session.sparkContext.defaultParallelism)
+          .getOrElse(session.sparkContext.defaultParallelism)
         val partitionSpecs = ShufflePartitionsUtil.coalescePartitions(
           validMetrics.toArray,
           advisoryTargetSize = conf.getConf(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index fcf77e588fc60..d74d0bf733c27 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -382,8 +382,7 @@ case class RangeExec(range: org.apache.spark.sql.catalyst.plans.logical.Range)
   val start: Long = range.start
   val end: Long = range.end
   val step: Long = range.step
-  val numSlices: Int = range.numSlices.orElse(sqlContext.conf.defaultParallelism)
-    .getOrElse(sparkContext.defaultParallelism)
+  val numSlices: Int = range.numSlices.getOrElse(sqlContext.sparkSession.leafNodeDefaultParallelism)
   val numElements: BigInt = range.numElements
   val isEmptyRange: Boolean = start == end || (start < end ^ 0 < step)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 6d631e044e917..604de860f04c0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -738,8 +738,7 @@ case class AlterTableRecoverPartitionsCommand(
       // Set the number of parallelism to prevent following file listing from generating many tasks
       // in case of large #defaultParallelism.
       val numParallelism = Math.min(serializedPaths.length,
-        Math.min(spark.sessionState.conf.defaultParallelism
-          .getOrElse(spark.sparkContext.defaultParallelism), 10000))
+        Math.min(spark.sparkContext.defaultParallelism, 10000))
       // gather the fast stats for all the partitions otherwise Hive metastore will list all the
       // files for all the new partitions in sequential way, which is super slow.
       logInfo(s"Gather the fast stats in parallel using $numParallelism tasks.")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala
index 1b35db8d0873c..a4d16a0fd2bb2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala
@@ -89,8 +89,7 @@ object FilePartition extends Logging {
     val defaultMaxSplitBytes = sparkSession.sessionState.conf.filesMaxPartitionBytes
     val openCostInBytes = sparkSession.sessionState.conf.filesOpenCostInBytes
     val minPartitionNum = sparkSession.sessionState.conf.filesMinPartitionNum
-      .orElse(sparkSession.sessionState.conf.defaultParallelism)
-      .getOrElse(sparkSession.sparkContext.defaultParallelism)
+      .getOrElse(sparkSession.leafNodeDefaultParallelism)
     val totalBytes = selectedPartitions.flatMap(_.files.map(_.getLen + openCostInBytes)).sum
     val bytesPerCore = totalBytes / minPartitionNum
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaMergeUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaMergeUtils.scala
index 54d79898bb81b..28097c35401c9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaMergeUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaMergeUtils.scala
@@ -57,8 +57,7 @@ object SchemaMergeUtils extends Logging {
     // Set the number of partitions to prevent following schema reads from generating many tasks
     // in case of a small number of orc files.
     val numParallelism = Math.min(Math.max(partialFileStatusInfo.size, 1),
-      sparkSession.sessionState.conf.defaultParallelism
-        .getOrElse(sparkSession.sparkContext.defaultParallelism))
+      sparkSession.sparkContext.defaultParallelism)
 
     val ignoreCorruptFiles = sparkSession.sessionState.conf.ignoreCorruptFiles
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala
index 254855247ced3..dfec6bccb0c58 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala
@@ -89,9 +89,9 @@ class SparkPlanSuite extends QueryTest with SharedSparkSession {
     assert(LocalTableScanExec(Nil, Nil).execute().getNumPartitions == 0)
   }
 
-  test("SPARK-33617: spark.sql.default.parallelism effective for LocalTableScan") {
+  test("SPARK-33617: change default parallelism of LocalTableScan") {
     Seq(1, 4).foreach { minPartitionNum =>
-      withSQLConf(SQLConf.DEFAULT_PARALLELISM.key -> minPartitionNum.toString) {
+      withSQLConf(SQLConf.LEAF_NODE_DEFAULT_PARALLELISM.key -> minPartitionNum.toString) {
         val df = spark.sql("SELECT * FROM VALUES (1), (2), (3), (4), (5), (6), (7), (8)")
         assert(df.rdd.partitions.length === minPartitionNum)
       }

From 4d56d438386049b5f481ec83b69e3c89807be201 Mon Sep 17 00:00:00 2001
From: Anton Okolnychyi <aokolnychyi@apple.com>
Date: Tue, 15 Dec 2020 13:50:58 -0800
Subject: [PATCH 0774/1009] [SPARK-33735][SQL] Handle UPDATE in
 ReplaceNullWithFalseInPredicate

### What changes were proposed in this pull request?

This PR adds `UpdateTable` to supported plans in `ReplaceNullWithFalseInPredicate`.

### Why are the changes needed?

This change allows Spark to optimize update conditions like we optimize filters.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

This PR extends the existing test cases to also cover `UpdateTable`.

Closes #30787 from aokolnychyi/spark-33735.

Authored-by: Anton Okolnychyi <aokolnychyi@apple.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../ReplaceNullWithFalseInPredicate.scala     |  3 ++-
 ...ReplaceNullWithFalseInPredicateSuite.scala | 23 ++++++++++++++++++-
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
index 698ece4f9e69f..4a71dba663b38 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.optimizer
 import org.apache.spark.sql.catalyst.expressions.{And, ArrayExists, ArrayFilter, CaseWhen, Expression, If}
 import org.apache.spark.sql.catalyst.expressions.{LambdaFunction, Literal, MapFilter, Or}
 import org.apache.spark.sql.catalyst.expressions.Literal.FalseLiteral
-import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, Filter, Join, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, Filter, Join, LogicalPlan, UpdateTable}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.types.BooleanType
 import org.apache.spark.util.Utils
@@ -54,6 +54,7 @@ object ReplaceNullWithFalseInPredicate extends Rule[LogicalPlan] {
     case f @ Filter(cond, _) => f.copy(condition = replaceNullWithFalse(cond))
     case j @ Join(_, _, _, Some(cond), _) => j.copy(condition = Some(replaceNullWithFalse(cond)))
     case d @ DeleteFromTable(_, Some(cond)) => d.copy(condition = Some(replaceNullWithFalse(cond)))
+    case u @ UpdateTable(_, _, Some(cond)) => u.copy(condition = Some(replaceNullWithFalse(cond)))
     case p: LogicalPlan => p transformExpressions {
       case i @ If(pred, _, _) => i.copy(predicate = replaceNullWithFalse(pred))
       case cw @ CaseWhen(branches, _) =>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
index 6fc31c94e47eb..00433a5490574 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions.{And, ArrayExists, ArrayFilter, ArrayTransform, CaseWhen, Expression, GreaterThan, If, LambdaFunction, Literal, MapFilter, NamedExpression, Or, UnresolvedNamedLambdaVariable}
 import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
 import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest}
-import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, LocalRelation, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, LocalRelation, LogicalPlan, UpdateTable}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{BooleanType, IntegerType}
@@ -49,6 +49,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     testFilter(originalCond = Literal(null, BooleanType), expectedCond = FalseLiteral)
     testJoin(originalCond = Literal(null, BooleanType), expectedCond = FalseLiteral)
     testDelete(originalCond = Literal(null, BooleanType), expectedCond = FalseLiteral)
+    testUpdate(originalCond = Literal(null, BooleanType), expectedCond = FalseLiteral)
   }
 
   test("Not expected type - replaceNullWithFalse") {
@@ -66,6 +67,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     testFilter(originalCond, expectedCond = FalseLiteral)
     testJoin(originalCond, expectedCond = FalseLiteral)
     testDelete(originalCond, expectedCond = FalseLiteral)
+    testUpdate(originalCond, expectedCond = FalseLiteral)
   }
 
   test("replace nulls in nested expressions in branches of If") {
@@ -76,6 +78,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     testFilter(originalCond, expectedCond = FalseLiteral)
     testJoin(originalCond, expectedCond = FalseLiteral)
     testDelete(originalCond, expectedCond = FalseLiteral)
+    testUpdate(originalCond, expectedCond = FalseLiteral)
   }
 
   test("replace null in elseValue of CaseWhen") {
@@ -87,6 +90,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     testFilter(originalCond, expectedCond)
     testJoin(originalCond, expectedCond)
     testDelete(originalCond, expectedCond)
+    testUpdate(originalCond, expectedCond)
   }
 
   test("replace null in branch values of CaseWhen") {
@@ -97,6 +101,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     testFilter(originalCond, expectedCond = FalseLiteral)
     testJoin(originalCond, expectedCond = FalseLiteral)
     testDelete(originalCond, expectedCond = FalseLiteral)
+    testUpdate(originalCond, expectedCond = FalseLiteral)
   }
 
   test("replace null in branches of If inside CaseWhen") {
@@ -114,6 +119,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     testFilter(originalCond, expectedCond)
     testJoin(originalCond, expectedCond)
     testDelete(originalCond, expectedCond)
+    testUpdate(originalCond, expectedCond)
   }
 
   test("replace null in complex CaseWhen expressions") {
@@ -134,6 +140,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     testFilter(originalCond, expectedCond)
     testJoin(originalCond, expectedCond)
     testDelete(originalCond, expectedCond)
+    testUpdate(originalCond, expectedCond)
   }
 
   test("replace null in Or") {
@@ -142,6 +149,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     testFilter(originalCond, expectedCond)
     testJoin(originalCond, expectedCond)
     testDelete(originalCond, expectedCond)
+    testUpdate(originalCond, expectedCond)
   }
 
   test("replace null in And") {
@@ -149,6 +157,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     testFilter(originalCond, expectedCond = FalseLiteral)
     testJoin(originalCond, expectedCond = FalseLiteral)
     testDelete(originalCond, expectedCond = FalseLiteral)
+    testUpdate(originalCond, expectedCond = FalseLiteral)
   }
 
   test("replace nulls in nested And/Or expressions") {
@@ -158,6 +167,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     testFilter(originalCond, expectedCond = FalseLiteral)
     testJoin(originalCond, expectedCond = FalseLiteral)
     testDelete(originalCond, expectedCond = FalseLiteral)
+    testUpdate(originalCond, expectedCond = FalseLiteral)
   }
 
   test("replace null in And inside branches of If") {
@@ -168,6 +178,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     testFilter(originalCond, expectedCond = FalseLiteral)
     testJoin(originalCond, expectedCond = FalseLiteral)
     testDelete(originalCond, expectedCond = FalseLiteral)
+    testUpdate(originalCond, expectedCond = FalseLiteral)
   }
 
   test("replace null in branches of If inside And") {
@@ -180,6 +191,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     testFilter(originalCond, expectedCond = FalseLiteral)
     testJoin(originalCond, expectedCond = FalseLiteral)
     testDelete(originalCond, expectedCond = FalseLiteral)
+    testUpdate(originalCond, expectedCond = FalseLiteral)
   }
 
   test("replace null in branches of If inside another If") {
@@ -190,6 +202,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     testFilter(originalCond, expectedCond = FalseLiteral)
     testJoin(originalCond, expectedCond = FalseLiteral)
     testDelete(originalCond, expectedCond = FalseLiteral)
+    testUpdate(originalCond, expectedCond = FalseLiteral)
   }
 
   test("replace null in CaseWhen inside another CaseWhen") {
@@ -198,6 +211,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     testFilter(originalCond, expectedCond = FalseLiteral)
     testJoin(originalCond, expectedCond = FalseLiteral)
     testDelete(originalCond, expectedCond = FalseLiteral)
+    testUpdate(originalCond, expectedCond = FalseLiteral)
   }
 
   test("inability to replace null in non-boolean branches of If") {
@@ -211,6 +225,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     testFilter(originalCond = condition, expectedCond = condition)
     testJoin(originalCond = condition, expectedCond = condition)
     testDelete(originalCond = condition, expectedCond = condition)
+    testUpdate(originalCond = condition, expectedCond = condition)
   }
 
   test("inability to replace null in non-boolean values of CaseWhen") {
@@ -226,6 +241,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     testFilter(originalCond = condition, expectedCond = condition)
     testJoin(originalCond = condition, expectedCond = condition)
     testDelete(originalCond = condition, expectedCond = condition)
+    testUpdate(originalCond = condition, expectedCond = condition)
   }
 
   test("inability to replace null in non-boolean branches of If inside another If") {
@@ -239,6 +255,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     testFilter(originalCond = condition, expectedCond = condition)
     testJoin(originalCond = condition, expectedCond = condition)
     testDelete(originalCond = condition, expectedCond = condition)
+    testUpdate(originalCond = condition, expectedCond = condition)
   }
 
   test("replace null in If used as a join condition") {
@@ -374,6 +391,10 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     test((rel, expr) => DeleteFromTable(rel, Some(expr)), originalCond, expectedCond)
   }
 
+  private def testUpdate(originalCond: Expression, expectedCond: Expression): Unit = {
+    test((rel, expr) => UpdateTable(rel, Seq.empty, Some(expr)), originalCond, expectedCond)
+  }
+
   private def testHigherOrderFunc(
       argument: Expression,
       createExpr: (Expression, Expression) => Expression,

From 87c58367cd8b1815feef754695631ce08c3cde8b Mon Sep 17 00:00:00 2001
From: David McWhorter <david_mcwhorter@premierinc.com>
Date: Tue, 15 Dec 2020 14:00:38 -0800
Subject: [PATCH 0775/1009] [SPARK-22256][MESOS] Introduce
 spark.mesos.driver.memoryOverhead

### What changes were proposed in this pull request?
This is a simple change to support allocating a specified amount of overhead memory for the driver's mesos container.  This is already supported for executors.

### Why are the changes needed?
This is needed to keep the driver process from exceeding memory limits and being killed off when running on mesos.

### Does this PR introduce _any_ user-facing change?
Yes, it adds a `spark.mesos.driver.memoryOverhead` configuration option.  Documentation changes for this option are included in the PR.

### How was this patch tested?
Test cases covering allocation of driver memory overhead are included in the changes.

### Other notes
This is a second attempt to get this change reviewed, accepted and merged.  The original pull request was closed as stale back in January: https://github.com/apache/spark/pull/21006.
For this pull request, I took the original change by pmackles, rebased it onto the current master branch, and added a test case that was requested in the original code review.
I'm happy to make any further edits or do anything needed so that this can be included in a future spark release.  I keep having to build custom spark distributions so that we can use spark within our mesos clusters.

Closes #30739 from dmcwhorter/dmcwhorter-SPARK-22256.

Lead-authored-by: David McWhorter <david_mcwhorter@premierinc.com>
Co-authored-by: Paul Mackles <pmackles@adobe.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 docs/running-on-mesos.md                      |  9 ++
 .../apache/spark/deploy/mesos/config.scala    |  8 ++
 .../cluster/mesos/MesosClusterScheduler.scala |  4 +-
 .../cluster/mesos/MesosSchedulerUtils.scala   | 17 +++-
 .../mesos/MesosClusterSchedulerSuite.scala    | 82 +++++++++++++++++--
 5 files changed, 108 insertions(+), 12 deletions(-)

diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md
index 8c0bac1815bbd..364def8923392 100644
--- a/docs/running-on-mesos.md
+++ b/docs/running-on-mesos.md
@@ -480,6 +480,15 @@ See the [configuration page](configuration.html) for information on Spark config
   </td>
   <td>1.1.1</td>
 </tr>
+<tr>
+  <td><code>spark.mesos.driver.memoryOverhead</code></td>
+  <td>driver memory * 0.10, with minimum of 384</td>
+  <td>
+    The amount of additional memory, specified in MB, to be allocated to the driver. By default,
+    the overhead will be larger of either 384 or 10% of <code>spark.driver.memory</code>. If set,
+    the final overhead will be this value. Only applies to cluster mode.
+  </td>
+</tr>
 <tr>
   <td><code>spark.mesos.uris</code></td>
   <td>(none)</td>
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala
index 5927af176062d..38df43d71b897 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/config.scala
@@ -183,6 +183,14 @@ package object config {
       .stringConf
       .createOptional
 
+  private[spark] val DRIVER_MEMORY_OVERHEAD =
+    ConfigBuilder("spark.mesos.driver.memoryOverhead")
+      .doc("The amount of additional memory, specified in MB, to be allocated to the driver. " +
+        "By default, the overhead will be larger of either 384 or 10% of spark.driver.memory. " +
+        "Only applies to cluster mode.")
+      .intConf
+      .createOptional
+
   private[spark] val EXECUTOR_URI =
     ConfigBuilder("spark.executor.uri").version("0.8.0").stringConf.createOptional
 
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
index b18737cf6126d..c7e0869e4bd5c 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala
@@ -614,7 +614,7 @@ private[spark] class MesosClusterScheduler(
     val (remainingResources, cpuResourcesToUse) =
       partitionResources(offer.remainingResources, "cpus", desc.cores)
     val (finalResources, memResourcesToUse) =
-      partitionResources(remainingResources.asJava, "mem", desc.mem)
+      partitionResources(remainingResources.asJava, "mem", driverContainerMemory(desc))
     offer.remainingResources = finalResources.asJava
 
     val appName = desc.conf.get("spark.app.name")
@@ -646,7 +646,7 @@ private[spark] class MesosClusterScheduler(
       tasks: mutable.HashMap[OfferID, ArrayBuffer[TaskInfo]]): Unit = {
     for (submission <- candidates) {
       val driverCpu = submission.cores
-      val driverMem = submission.mem
+      val driverMem = driverContainerMemory(submission)
       val driverConstraints =
         parseConstraintString(submission.conf.get(config.DRIVER_CONSTRAINTS))
       logTrace(s"Finding offer to launch driver with cpu: $driverCpu, mem: $driverMem, " +
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
index 8dbb70b616df1..38f83df00e428 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
@@ -36,7 +36,7 @@ import org.apache.mesos.protobuf.GeneratedMessageV3
 
 import org.apache.spark.{SparkConf, SparkContext, SparkException}
 import org.apache.spark.TaskState
-import org.apache.spark.deploy.mesos.{config => mesosConfig}
+import org.apache.spark.deploy.mesos.{config => mesosConfig, MesosDriverDescription}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.{Status => _, _}
 import org.apache.spark.util.Utils
@@ -405,6 +405,21 @@ trait MesosSchedulerUtils extends Logging {
       sc.executorMemory
   }
 
+  /**
+   * Return the amount of memory to allocate to each driver, taking into account
+   * container overheads.
+   *
+   * @param driverDesc used to get driver memory
+   * @return memory requirement defined as `DRIVER_MEMORY_OVERHEAD` if set in the config,
+   *         otherwise the larger of `MEMORY_OVERHEAD_MINIMUM (=384MB)` or
+   *         `MEMORY_OVERHEAD_FRACTION (=0.1) * driverMemory`
+   */
+  def driverContainerMemory(driverDesc: MesosDriverDescription): Int = {
+    val defaultMem = math.max(MEMORY_OVERHEAD_FRACTION * driverDesc.mem, MEMORY_OVERHEAD_MINIMUM)
+    driverDesc.conf.get(mesosConfig.DRIVER_MEMORY_OVERHEAD).getOrElse(defaultMem.toInt) +
+      driverDesc.mem
+  }
+
   def setupUris(uris: Seq[String],
                 builder: CommandInfo.Builder,
                 useFetcherCache: Boolean = false): Unit = {
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
index 146a135afd795..9a1862d32dc13 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
@@ -105,7 +105,8 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
     val response = scheduler.submitDriver(
       new MesosDriverDescription("d1", "jar", 1200, 1.5, true,
         command,
-        Map((config.EXECUTOR_HOME.key, "test"), ("spark.app.name", "test")),
+        Map((config.EXECUTOR_HOME.key, "test"), ("spark.app.name", "test"),
+          (config.DRIVER_MEMORY_OVERHEAD.key, "0")),
         "s1",
         new Date()))
     assert(response.success)
@@ -200,6 +201,60 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
     })
   }
 
+  test("SPARK-22256: supports spark.mesos.driver.memoryOverhead with 384mb default") {
+    setScheduler()
+
+    val mem = 1000
+    val cpu = 1
+
+    val response = scheduler.submitDriver(
+      new MesosDriverDescription("d1", "jar", mem, cpu, true,
+        command,
+        Map("spark.mesos.executor.home" -> "test",
+          "spark.app.name" -> "test"),
+        "s1",
+        new Date()))
+    assert(response.success)
+
+    val offer = Utils.createOffer("o1", "s1", mem*2, cpu)
+    scheduler.resourceOffers(driver, List(offer).asJava)
+    val tasks = Utils.verifyTaskLaunched(driver, "o1")
+    // 1384.0
+    val taskMem = tasks.head.getResourcesList
+      .asScala
+      .filter(_.getName.equals("mem"))
+      .map(_.getScalar.getValue)
+      .head
+    assert(1384.0 === taskMem)
+  }
+
+  test("SPARK-22256: supports spark.mesos.driver.memoryOverhead with 10% default") {
+    setScheduler()
+
+    val mem = 10000
+    val cpu = 1
+
+    val response = scheduler.submitDriver(
+      new MesosDriverDescription("d1", "jar", mem, cpu, true,
+        command,
+        Map("spark.mesos.executor.home" -> "test",
+          "spark.app.name" -> "test"),
+        "s1",
+        new Date()))
+    assert(response.success)
+
+    val offer = Utils.createOffer("o1", "s1", mem*2, cpu)
+    scheduler.resourceOffers(driver, List(offer).asJava)
+    val tasks = Utils.verifyTaskLaunched(driver, "o1")
+    // 11000.0
+    val taskMem = tasks.head.getResourcesList
+      .asScala
+      .filter(_.getName.equals("mem"))
+      .map(_.getScalar.getValue)
+      .head
+    assert(11000.0 === taskMem)
+  }
+
   test("supports spark.mesos.driverEnv.*") {
     setScheduler()
 
@@ -211,7 +266,9 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
         command,
         Map(config.EXECUTOR_HOME.key -> "test",
           "spark.app.name" -> "test",
-          config.DRIVER_ENV_PREFIX + "TEST_ENV" -> "TEST_VAL"),
+          config.DRIVER_ENV_PREFIX + "TEST_ENV" -> "TEST_VAL",
+          config.DRIVER_MEMORY_OVERHEAD.key -> "0"
+        ),
         "s1",
         new Date()))
     assert(response.success)
@@ -236,7 +293,8 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
         Map(config.EXECUTOR_HOME.key -> "test",
           "spark.app.name" -> "test",
           config.NETWORK_NAME.key -> "test-network-name",
-          config.NETWORK_LABELS.key -> "key1:val1,key2:val2"),
+          config.NETWORK_LABELS.key -> "key1:val1,key2:val2",
+          config.DRIVER_MEMORY_OVERHEAD.key -> "0"),
         "s1",
         new Date()))
 
@@ -266,7 +324,8 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
         command,
         Map(config.EXECUTOR_HOME.key -> "test",
           config.ENABLE_FETCHER_CACHE.key -> "true",
-          "spark.app.name" -> "test"),
+          "spark.app.name" -> "test",
+          config.DRIVER_MEMORY_OVERHEAD.key -> "0"),
         "s1",
         new Date()))
 
@@ -290,7 +349,8 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
       new MesosDriverDescription("d1", "jar", mem, cpu, true,
         command,
         Map(config.EXECUTOR_HOME.key -> "test",
-          "spark.app.name" -> "test"),
+          "spark.app.name" -> "test",
+          config.DRIVER_MEMORY_OVERHEAD.key -> "0"),
         "s1",
         new Date()))
 
@@ -315,7 +375,8 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
         command,
         Map(config.EXECUTOR_HOME.key -> "test",
           config.ENABLE_FETCHER_CACHE.key -> "false",
-          "spark.app.name" -> "test"),
+          "spark.app.name" -> "test",
+          config.DRIVER_MEMORY_OVERHEAD.key -> "0"),
         "s1",
         new Date()))
 
@@ -349,7 +410,8 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
           command,
           Map(config.EXECUTOR_HOME.key -> "test",
             "spark.app.name" -> "test",
-            config.DRIVER_CONSTRAINTS.key -> driverConstraints),
+            config.DRIVER_CONSTRAINTS.key -> driverConstraints,
+            config.DRIVER_MEMORY_OVERHEAD.key -> "0"),
           "s1",
           new Date()))
       assert(response.success)
@@ -387,7 +449,8 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
         command,
         Map(config.EXECUTOR_HOME.key -> "test",
           "spark.app.name" -> "test",
-          config.DRIVER_LABELS.key -> "key:value"),
+          config.DRIVER_LABELS.key -> "key:value",
+          config.DRIVER_MEMORY_OVERHEAD.key -> "0"),
         "s1",
         new Date()))
 
@@ -745,7 +808,8 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
       true,
       command,
       Map(config.EXECUTOR_HOME.key -> "test",
-        "spark.app.name" -> "test") ++
+        "spark.app.name" -> "test",
+        config.DRIVER_MEMORY_OVERHEAD.key -> "0") ++
         addlSparkConfVars,
       "s1",
       new Date())

From 3dfdcf4f92ef5e739f15c22c93d673bb2233e617 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Wed, 16 Dec 2020 10:03:48 +0900
Subject: [PATCH 0776/1009] [SPARK-33788][SQL] Throw NoSuchPartitionsException
 from HiveExternalCatalog.dropPartitions()

### What changes were proposed in this pull request?
Throw `NoSuchPartitionsException` from `ALTER TABLE .. DROP TABLE` for not existing partitions of a table in V1 Hive external catalog.

### Why are the changes needed?
The behaviour of Hive external catalog deviates from V1/V2 in-memory catalogs that throw `NoSuchPartitionsException`. To improve user experience with Spark SQL, it would be better to throw the same exception.

### Does this PR introduce _any_ user-facing change?
Yes, the command throws `NoSuchPartitionsException` instead of the general exception `AnalysisException`.

### How was this patch tested?
By running tests for `ALTER TABLE .. DROP PARTITION`:
```
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableDropPartitionSuite"
```

Closes #30778 from MaxGekk/hive-drop-partition-exception.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../AlterTableDropPartitionSuiteBase.scala    | 17 ++++++++++++++++
 .../v1/AlterTableDropPartitionSuite.scala     | 20 +------------------
 .../v2/AlterTableDropPartitionSuite.scala     | 17 ----------------
 .../sql/hive/client/HiveClientImpl.scala      |  6 ++----
 .../AlterTableDropPartitionSuite.scala        | 19 ------------------
 5 files changed, 20 insertions(+), 59 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
index ed479e2824fb7..338f13ace891c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
@@ -21,6 +21,7 @@ import org.scalactic.source.Position
 import org.scalatest.Tag
 
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionsException
 import org.apache.spark.sql.execution.datasources.PartitioningUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
@@ -146,4 +147,20 @@ trait AlterTableDropPartitionSuiteBase  extends QueryTest with SQLTestUtils {
       assert(errMsg.contains(notFullPartitionSpecErr))
     }
   }
+
+  test("partition not exists") {
+    withNsTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
+      sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'")
+
+      val errMsg = intercept[NoSuchPartitionsException] {
+        sql(s"ALTER TABLE $t DROP PARTITION (id=1), PARTITION (id=2)")
+      }.getMessage
+      assert(errMsg.contains("partitions not found in table"))
+
+      checkPartitions(t, Map("id" -> "1"))
+      sql(s"ALTER TABLE $t DROP IF EXISTS PARTITION (id=1), PARTITION (id=2)")
+      checkPartitions(t)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
index 5ad182bc689b9..e655debc2fdde 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.execution.command.v1
 
-import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionsException
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.execution.command
 import org.apache.spark.sql.test.SharedSparkSession
@@ -32,21 +31,4 @@ trait AlterTableDropPartitionSuiteBase extends command.AlterTableDropPartitionSu
 
 class AlterTableDropPartitionSuite
   extends AlterTableDropPartitionSuiteBase
-  with SharedSparkSession {
-
-  test("partition not exists") {
-    withNsTable("ns", "tbl") { t =>
-      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
-      sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'")
-
-      val errMsg = intercept[NoSuchPartitionsException] {
-        sql(s"ALTER TABLE $t DROP PARTITION (id=1), PARTITION (id=2)")
-      }.getMessage
-      assert(errMsg.contains("partitions not found in table"))
-
-      checkPartitions(t, Map("id" -> "1"))
-      sql(s"ALTER TABLE $t DROP IF EXISTS PARTITION (id=1), PARTITION (id=2)")
-      checkPartitions(t)
-    }
-  }
-}
+  with SharedSparkSession
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala
index 608e7d7c98f6f..9dc1cad5a002d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.command.v2
 
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionsException
 import org.apache.spark.sql.connector.{InMemoryPartitionTableCatalog, InMemoryTableCatalog}
 import org.apache.spark.sql.execution.command
 import org.apache.spark.sql.test.SharedSparkSession
@@ -38,22 +37,6 @@ class AlterTableDropPartitionSuite
     .set(s"spark.sql.catalog.$catalog", classOf[InMemoryPartitionTableCatalog].getName)
     .set(s"spark.sql.catalog.non_part_$catalog", classOf[InMemoryTableCatalog].getName)
 
-  test("partition not exists") {
-    withNsTable("ns", "tbl") { t =>
-      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
-      sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'")
-
-      val errMsg = intercept[NoSuchPartitionsException] {
-        sql(s"ALTER TABLE $t DROP PARTITION (id=1), PARTITION (id=2)")
-      }.getMessage
-      assert(errMsg.contains("partitions not found in table"))
-
-      checkPartitions(t, Map("id" -> "1"))
-      sql(s"ALTER TABLE $t DROP IF EXISTS PARTITION (id=1), PARTITION (id=2)")
-      checkPartitions(t)
-    }
-  }
-
   test("SPARK-33650: drop partition into a table which doesn't support partition management") {
     withNsTable("ns", "tbl", s"non_part_$catalog") { t =>
       sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing")
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 0b19e5e6e8c84..6a964a0ce3613 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -49,7 +49,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.metrics.source.HiveCatalogMetrics
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchPartitionException, PartitionsAlreadyExistException}
+import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchPartitionException, NoSuchPartitionsException, PartitionsAlreadyExistException}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.Expression
@@ -630,9 +630,7 @@ private[hive] class HiveClientImpl(
         // (b='1', c='1') and (b='1', c='2'), a partial spec of (b='1') will match both.
         val parts = client.getPartitions(hiveTable, s.asJava).asScala
         if (parts.isEmpty && !ignoreIfNotExists) {
-          throw new AnalysisException(
-            s"No partition is dropped. One partition spec '$s' does not exist in table '$table' " +
-            s"database '$db'")
+          throw new NoSuchPartitionsException(db, table, Seq(s))
         }
         parts.map(_.getValues)
       }.distinct
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala
index fe26466cdad62..9c7d76a0caa08 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.hive.execution.command
 
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.execution.command.v1
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 
@@ -27,22 +26,4 @@ class AlterTableDropPartitionSuite
 
   override def version: String = "Hive V1"
   override def defaultUsing: String = "USING HIVE"
-
-  override protected val notFullPartitionSpecErr = "No partition is dropped"
-
-  test("partition not exists") {
-    withNsTable("ns", "tbl") { t =>
-      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
-      sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'")
-
-      val errMsg = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $t DROP PARTITION (id=1), PARTITION (id=2)")
-      }.getMessage
-      assert(errMsg.contains("No partition is dropped"))
-
-      checkPartitions(t, Map("id" -> "1"))
-      sql(s"ALTER TABLE $t DROP IF EXISTS PARTITION (id=1), PARTITION (id=2)")
-      checkPartitions(t)
-    }
-  }
 }

From dd042f58e7a0fd2289f6889c324c0d5e4c18ad7f Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Wed, 16 Dec 2020 10:07:35 +0900
Subject: [PATCH 0777/1009] [SPARK-33796][DOCS] Show hidden text from the left
 menu of Spark Doc

### What changes were proposed in this pull request?

If the text in the left menu of Spark is too long, it will be hidden.
![sql1](https://user-images.githubusercontent.com/1097932/102249583-5ae7a580-3eb7-11eb-813c-f2e2fe019d28.jpeg)

This PR is to fix the style issue.

### Why are the changes needed?

Improve the UI of Spark documentation.

### Does this PR introduce _any_ user-facing change?

No
### How was this patch tested?

Manual test
After changes:
![sql2](https://user-images.githubusercontent.com/1097932/102249603-5fac5980-3eb7-11eb-806d-4e7b8248e6b6.jpeg)

Closes #30786 from gengliangwang/fixDocStyle.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/css/main.css | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/docs/css/main.css b/docs/css/main.css
index 271113c904d26..309ad7b3bdf0b 100755
--- a/docs/css/main.css
+++ b/docs/css/main.css
@@ -333,10 +333,6 @@ a.anchorjs-link:hover { text-decoration: none; }
   overflow-y: scroll;
 }
 
-.left-menu {
-  width: 399px;
-}
-
 .left-menu h3 {
   margin-left: 10px;
   line-height: 30px;

From ddff94fd32f85072cbc5c752c337f3b89ae00bed Mon Sep 17 00:00:00 2001
From: Sander Goos <sander.goos@databricks.com>
Date: Wed, 16 Dec 2020 11:26:54 +0900
Subject: [PATCH 0778/1009] [SPARK-33793][TESTS] Introduce withExecutor to
 ensure proper cleanup in tests

### What changes were proposed in this pull request?
This PR introduces a helper method `withExecutor` that handles the creation of an Executor object and ensures that it is always stopped in a finally block. The tests in ExecutorSuite have been refactored to use this method.

### Why are the changes needed?
Recently an issue was discovered that leaked Executors (which are not explicitly stopped after a test) can cause other tests to fail due to the JVM being killed after 10 min. It is therefore crucial that tests always stop the Executor. By introducing this helper method, a simple pattern is established that can be easily adopted in new tests, which reduces the risk of regressions.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Run the ExecutorSuite locally.

Closes #30783 from sander-goos/SPARK-33793-close-executors.

Authored-by: Sander Goos <sander.goos@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../apache/spark/executor/ExecutorSuite.scala | 99 ++++++++++---------
 1 file changed, 54 insertions(+), 45 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
index 7cf7a81a76133..97ffb36062dbc 100644
--- a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.executor
 
 import java.io.{Externalizable, ObjectInput, ObjectOutput}
 import java.lang.Thread.UncaughtExceptionHandler
+import java.net.URL
 import java.nio.ByteBuffer
 import java.util.Properties
 import java.util.concurrent.{ConcurrentHashMap, CountDownLatch, TimeUnit}
@@ -53,7 +54,7 @@ import org.apache.spark.scheduler.{DirectTaskResult, FakeTask, ResultTask, Task,
 import org.apache.spark.serializer.{JavaSerializer, SerializerInstance, SerializerManager}
 import org.apache.spark.shuffle.FetchFailedException
 import org.apache.spark.storage.{BlockManager, BlockManagerId}
-import org.apache.spark.util.{LongAccumulator, ThreadUtils, UninterruptibleThread}
+import org.apache.spark.util.{LongAccumulator, SparkUncaughtExceptionHandler, ThreadUtils, UninterruptibleThread}
 
 class ExecutorSuite extends SparkFunSuite
     with LocalSparkContext with MockitoSugar with Eventually with PrivateMethodTester {
@@ -64,6 +65,33 @@ class ExecutorSuite extends SparkFunSuite
     super.afterEach()
   }
 
+  /**
+   * Creates an Executor with the provided arguments, is then passed to `f`
+   * and will be stopped after `f` returns.
+   */
+  def withExecutor(
+      executorId: String,
+      executorHostname: String,
+      env: SparkEnv,
+      userClassPath: Seq[URL] = Nil,
+      isLocal: Boolean = true,
+      uncaughtExceptionHandler: UncaughtExceptionHandler
+        = new SparkUncaughtExceptionHandler,
+      resources: immutable.Map[String, ResourceInformation]
+        = immutable.Map.empty[String, ResourceInformation])(f: Executor => Unit): Unit = {
+    var executor: Executor = null
+    try {
+      executor = new Executor(executorId, executorHostname, env, userClassPath, isLocal,
+        uncaughtExceptionHandler, resources)
+
+      f(executor)
+    } finally {
+      if (executor != null) {
+        executor.stop()
+      }
+    }
+  }
+
   test("SPARK-15963: Catch `TaskKilledException` correctly in Executor.TaskRunner") {
     // mock some objects to make Executor.launchTask() happy
     val conf = new SparkConf
@@ -116,10 +144,8 @@ class ExecutorSuite extends SparkFunSuite
         }
       })
 
-    var executor: Executor = null
-    try {
-      executor = new Executor("id", "localhost", env, userClassPath = Nil, isLocal = true,
-        resources = immutable.Map.empty[String, ResourceInformation])
+    withExecutor("id", "localhost", env) { executor =>
+
       // the task will be launched in a dedicated worker thread
       executor.launchTask(mockExecutorBackend, taskDescription)
 
@@ -139,11 +165,6 @@ class ExecutorSuite extends SparkFunSuite
       assert(executorSuiteHelper.testFailedReason.toErrorString === "TaskKilled (test)")
       assert(executorSuiteHelper.taskState === TaskState.KILLED)
     }
-    finally {
-      if (executor != null) {
-        executor.stop()
-      }
-    }
   }
 
   test("SPARK-19276: Handle FetchFailedExceptions that are hidden by user exceptions") {
@@ -255,25 +276,24 @@ class ExecutorSuite extends SparkFunSuite
     confs.foreach { case (k, v) => conf.set(k, v) }
     val serializer = new JavaSerializer(conf)
     val env = createMockEnv(conf, serializer)
-    val executor =
-      new Executor("id", "localhost", SparkEnv.get, userClassPath = Nil, isLocal = true,
-        resources = immutable.Map.empty[String, ResourceInformation])
-    val executorClass = classOf[Executor]
-
-    // Save all heartbeats sent into an ArrayBuffer for verification
-    val heartbeats = ArrayBuffer[Heartbeat]()
-    val mockReceiver = mock[RpcEndpointRef]
-    when(mockReceiver.askSync(any[Heartbeat], any[RpcTimeout])(any))
-      .thenAnswer((invocation: InvocationOnMock) => {
-        val args = invocation.getArguments()
-        heartbeats += args(0).asInstanceOf[Heartbeat]
-        HeartbeatResponse(false)
-      })
-    val receiverRef = executorClass.getDeclaredField("heartbeatReceiverRef")
-    receiverRef.setAccessible(true)
-    receiverRef.set(executor, mockReceiver)
+    withExecutor("id", "localhost", SparkEnv.get) { executor =>
+      val executorClass = classOf[Executor]
+
+      // Save all heartbeats sent into an ArrayBuffer for verification
+      val heartbeats = ArrayBuffer[Heartbeat]()
+      val mockReceiver = mock[RpcEndpointRef]
+      when(mockReceiver.askSync(any[Heartbeat], any[RpcTimeout])(any))
+        .thenAnswer((invocation: InvocationOnMock) => {
+          val args = invocation.getArguments()
+          heartbeats += args(0).asInstanceOf[Heartbeat]
+          HeartbeatResponse(false)
+        })
+      val receiverRef = executorClass.getDeclaredField("heartbeatReceiverRef")
+      receiverRef.setAccessible(true)
+      receiverRef.set(executor, mockReceiver)
 
-    f(executor, heartbeats)
+      f(executor, heartbeats)
+    }
   }
 
   private def heartbeatZeroAccumulatorUpdateTest(dropZeroMetrics: Boolean): Unit = {
@@ -354,10 +374,7 @@ class ExecutorSuite extends SparkFunSuite
     val taskDescription = createResultTaskDescription(serializer, taskBinary, rdd, 0)
 
     val mockBackend = mock[ExecutorBackend]
-    var executor: Executor = null
-    try {
-      executor = new Executor("id", "localhost", SparkEnv.get, userClassPath = Nil, isLocal = true,
-        resources = immutable.Map.empty[String, ResourceInformation])
+    withExecutor("id", "localhost", SparkEnv.get) { executor =>
       executor.launchTask(mockBackend, taskDescription)
 
       // Ensure that the executor's metricsPoller is polled so that values are recorded for
@@ -368,10 +385,6 @@ class ExecutorSuite extends SparkFunSuite
       eventually(timeout(5.seconds), interval(10.milliseconds)) {
         assert(executor.numRunningTasks === 0)
       }
-    } finally {
-      if (executor != null) {
-        executor.stop()
-      }
     }
 
     // Verify that peak values for task metrics get sent in the TaskResult
@@ -535,12 +548,11 @@ class ExecutorSuite extends SparkFunSuite
       poll: Boolean = false): (TaskFailedReason, UncaughtExceptionHandler) = {
     val mockBackend = mock[ExecutorBackend]
     val mockUncaughtExceptionHandler = mock[UncaughtExceptionHandler]
-    var executor: Executor = null
     val timedOut = new AtomicBoolean(false)
-    try {
-      executor = new Executor("id", "localhost", SparkEnv.get, userClassPath = Nil, isLocal = true,
-        uncaughtExceptionHandler = mockUncaughtExceptionHandler,
-        resources = immutable.Map.empty[String, ResourceInformation])
+
+    withExecutor("id", "localhost", SparkEnv.get,
+        uncaughtExceptionHandler = mockUncaughtExceptionHandler) { executor =>
+
       // the task will be launched in a dedicated worker thread
       executor.launchTask(mockBackend, taskDescription)
       if (killTask) {
@@ -573,11 +585,8 @@ class ExecutorSuite extends SparkFunSuite
         assert(executor.numRunningTasks === 0)
       }
       assert(!timedOut.get(), "timed out waiting to be ready to kill tasks")
-    } finally {
-      if (executor != null) {
-        executor.stop()
-      }
     }
+
     val orderedMock = inOrder(mockBackend)
     val statusCaptor = ArgumentCaptor.forClass(classOf[ByteBuffer])
     orderedMock.verify(mockBackend)

From 62be2483d7d78e61fd2f77929cf41c76eff17869 Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Wed, 16 Dec 2020 05:37:56 +0000
Subject: [PATCH 0779/1009] [SPARK-33765][SQL] Migrate UNCACHE TABLE to use
 UnresolvedRelation to resolve identifier

### What changes were proposed in this pull request?

This PR proposes to migrate `UNCACHE TABLE` to use `UnresolvedRelation` to resolve the table/view identifier in Analyzer as discussed https://github.com/apache/spark/pull/30403/files#r532360022.

### Why are the changes needed?

To resolve the table/view in the analyzer.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Updated existing tests

Closes #30743 from imback82/uncache_v2.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 16 +++++++++++-
 .../sql/catalyst/analysis/CheckAnalysis.scala |  5 +++-
 .../ResolveCommandsWithIfExists.scala         |  4 ++-
 .../sql/catalyst/parser/AstBuilder.scala      |  9 +++++++
 .../catalyst/plans/logical/v2Commands.scala   |  8 ++++++
 .../sql/catalyst/parser/DDLParserSuite.scala  | 10 +++++++
 .../spark/sql/execution/SparkSqlParser.scala  |  9 -------
 .../spark/sql/execution/command/cache.scala   | 26 +------------------
 .../datasources/DataSourceStrategy.scala      | 16 +++++++++++-
 .../datasources/v2/CacheTableExec.scala       | 12 +++++++++
 .../datasources/v2/DataSourceV2Strategy.scala |  3 +++
 .../sql/execution/SparkSqlParserSuite.scala   | 10 -------
 .../spark/sql/hive/HiveStrategies.scala       | 12 ++++++++-
 .../spark/sql/hive/CachedTableSuite.scala     |  2 +-
 14 files changed, 92 insertions(+), 50 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index a46f2e3168c6b..0bef6998b177d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -875,6 +875,10 @@ class Analyzer(override val catalogManager: CatalogManager)
         lookupTempView(ident)
           .map(view => c.copy(table = view))
           .getOrElse(c)
+      case c @ UncacheTable(UnresolvedRelation(ident, _, false), _, _) =>
+        lookupTempView(ident)
+          .map(view => c.copy(table = view, isTempView = true))
+          .getOrElse(c)
       // TODO (SPARK-27484): handle streaming write commands when we have them.
       case write: V2WriteCommand =>
         write.table match {
@@ -1005,6 +1009,11 @@ class Analyzer(override val catalogManager: CatalogManager)
           .map(v2Relation => c.copy(table = v2Relation))
           .getOrElse(c)
 
+      case c @ UncacheTable(u @ UnresolvedRelation(_, _, false), _, _) =>
+        lookupV2Relation(u.multipartIdentifier, u.options, false)
+          .map(v2Relation => c.copy(table = v2Relation))
+          .getOrElse(c)
+
       // TODO (SPARK-27484): handle streaming write commands when we have them.
       case write: V2WriteCommand =>
         write.table match {
@@ -1098,7 +1107,12 @@ class Analyzer(override val catalogManager: CatalogManager)
 
       case c @ CacheTable(u @ UnresolvedRelation(_, _, false), _, _, _) =>
         lookupRelation(u.multipartIdentifier, u.options, false)
-          .map(v2Relation => c.copy(table = v2Relation))
+          .map(relation => c.copy(table = EliminateSubqueryAliases(relation)))
+          .getOrElse(c)
+
+      case c @ UncacheTable(u @ UnresolvedRelation(_, _, false), _, _) =>
+        lookupRelation(u.multipartIdentifier, u.options, false)
+          .map(relation => c.copy(table = EliminateSubqueryAliases(relation)))
           .getOrElse(c)
 
       // TODO (SPARK-27484): handle streaming write commands when we have them.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index c8e137e9c18ac..30467685d75a9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -125,7 +125,10 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
         failAnalysis(s"Table not found: ${u.multipartIdentifier.quoted}")
 
       case CacheTable(u: UnresolvedRelation, _, _, _) =>
-        failAnalysis(s"Table or view not found for `CACHE TABLE`: ${u.multipartIdentifier.quoted}")
+        failAnalysis(s"Table or view not found: ${u.multipartIdentifier.quoted}")
+
+      case UncacheTable(u: UnresolvedRelation, _, _) =>
+        failAnalysis(s"Table or view not found: ${u.multipartIdentifier.quoted}")
 
       // TODO (SPARK-27484): handle streaming write commands when we have them.
       case write: V2WriteCommand if write.table.isInstanceOf[UnresolvedRelation] =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCommandsWithIfExists.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCommandsWithIfExists.scala
index 196a07a7f9904..60f86b31a4bdf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCommandsWithIfExists.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCommandsWithIfExists.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import org.apache.spark.sql.catalyst.plans.logical.{DropTable, DropView, LogicalPlan, NoopCommand}
+import org.apache.spark.sql.catalyst.plans.logical.{DropTable, DropView, LogicalPlan, NoopCommand, UncacheTable}
 import org.apache.spark.sql.catalyst.rules.Rule
 
 /**
@@ -31,5 +31,7 @@ object ResolveCommandsWithIfExists extends Rule[LogicalPlan] {
       NoopCommand("DROP TABLE", u.multipartIdentifier)
     case DropView(u: UnresolvedView, ifExists) if ifExists =>
       NoopCommand("DROP VIEW", u.multipartIdentifier)
+    case UncacheTable(u: UnresolvedRelation, ifExists, _) if ifExists =>
+      NoopCommand("UNCACHE TABLE", u.multipartIdentifier)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 0284d5d01ba96..426dff343818b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3632,6 +3632,15 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
     }
   }
 
+  /**
+   * Create an [[UncacheTable]] logical plan.
+   */
+  override def visitUncacheTable(ctx: UncacheTableContext): LogicalPlan = withOrigin(ctx) {
+    UncacheTable(
+      UnresolvedRelation(visitMultipartIdentifier(ctx.multipartIdentifier)),
+      ctx.EXISTS != null)
+  }
+
   /**
    * Create a [[TruncateTable]] command.
    *
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index 2091d92eb67c9..d13ad977910d9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -794,3 +794,11 @@ case class CacheTableAsSelect(
     plan: LogicalPlan,
     isLazy: Boolean,
     options: Map[String, String]) extends Command
+
+/**
+ * The logical plan of the UNCACHE TABLE command.
+ */
+case class UncacheTable(
+    table: LogicalPlan,
+    ifExists: Boolean,
+    isTempView: Boolean = false) extends Command
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 2b3fc6f71a5c0..9bea6517156ae 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -2032,6 +2032,16 @@ class DDLParserSuite extends AnalysisTest {
       "It is not allowed to add catalog/namespace prefix a.b")
   }
 
+  test("UNCACHE TABLE") {
+    comparePlans(
+      parsePlan("UNCACHE TABLE a.b.c"),
+      UncacheTable(UnresolvedRelation(Seq("a", "b", "c")), ifExists = false))
+
+    comparePlans(
+      parsePlan("UNCACHE TABLE IF EXISTS a.b.c"),
+      UncacheTable(UnresolvedRelation(Seq("a", "b", "c")), ifExists = true))
+  }
+
   test("TRUNCATE table") {
     comparePlans(
       parsePlan("TRUNCATE TABLE a.b.c"),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 3ca3461dfbd47..722ca6f992064 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -192,15 +192,6 @@ class SparkSqlAstBuilder extends AstBuilder {
     unquotedPath
   }
 
-  /**
-   * Create an [[UncacheTableCommand]] logical plan.
-   */
-  override def visitUncacheTable(ctx: UncacheTableContext): LogicalPlan = withOrigin(ctx) {
-    UncacheTableCommand(
-      visitMultipartIdentifier(ctx.multipartIdentifier),
-      ctx.EXISTS != null)
-  }
-
   /**
    * Create a [[ClearCacheCommand]] logical plan.
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala
index 3f85a1b0f99d6..2f72af7f4b512 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/cache.scala
@@ -17,32 +17,8 @@
 
 package org.apache.spark.sql.execution.command
 
-import org.apache.spark.sql.{AnalysisException, DataFrame, Row, SparkSession}
+import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.catalyst.plans.logical.IgnoreCachedData
-import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper
-
-case class UncacheTableCommand(
-    multipartIdentifier: Seq[String],
-    ifExists: Boolean) extends RunnableCommand {
-
-  override def run(sparkSession: SparkSession): Seq[Row] = {
-    val tableName = multipartIdentifier.quoted
-    table(sparkSession, tableName).foreach { table =>
-      val cascade = !sparkSession.sessionState.catalog.isTempView(multipartIdentifier)
-      sparkSession.sharedState.cacheManager.uncacheQuery(table, cascade)
-    }
-    Seq.empty[Row]
-  }
-
-  private def table(sparkSession: SparkSession, name: String): Option[DataFrame] = {
-    try {
-      Some(sparkSession.table(name))
-    } catch {
-      case ex: AnalysisException if ifExists && ex.getMessage.contains("Table or view not found") =>
-        None
-    }
-  }
-}
 
 /**
  * Clear all cached data from the in-memory cache.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index e4f001d61a767..a097017222b57 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.ScanOperation
-import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoDir, InsertIntoStatement, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.plans.logical.{CacheTable, InsertIntoDir, InsertIntoStatement, LogicalPlan, Project, UncacheTable}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
 import org.apache.spark.sql.connector.catalog.SupportsRead
@@ -283,6 +283,20 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan]
     case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta, _, false), _, _, _, _, _) =>
       i.copy(table = DDLUtils.readHiveTable(tableMeta))
 
+    case c @ CacheTable(UnresolvedCatalogRelation(tableMeta, options, false), _, _, _)
+      if DDLUtils.isDatasourceTable(tableMeta) =>
+      c.copy(table = readDataSourceTable(tableMeta, options))
+
+    case c @ CacheTable(UnresolvedCatalogRelation(tableMeta, _, false), _, _, _) =>
+      c.copy(table = DDLUtils.readHiveTable(tableMeta))
+
+    case u @ UncacheTable(UnresolvedCatalogRelation(tableMeta, options, false), _, _)
+        if DDLUtils.isDatasourceTable(tableMeta) =>
+      u.copy(table = readDataSourceTable(tableMeta, options))
+
+    case u @ UncacheTable(UnresolvedCatalogRelation(tableMeta, _, false), _, _) =>
+      u.copy(table = DDLUtils.readHiveTable(tableMeta))
+
     case UnresolvedCatalogRelation(tableMeta, options, false)
         if DDLUtils.isDatasourceTable(tableMeta) =>
       readDataSourceTable(tableMeta, options)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala
index 85107dfc9b2ef..2d8e5b5e286b8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala
@@ -87,3 +87,15 @@ case class CacheTableAsSelectExec(
     sparkSession.table(tempViewName)
   }
 }
+
+case class UncacheTableExec(
+    relation: LogicalPlan,
+    cascade: Boolean) extends V2CommandExec {
+  override def run(): Seq[InternalRow] = {
+    val sparkSession = sqlContext.sparkSession
+    sparkSession.sharedState.cacheManager.uncacheQuery(sparkSession, relation, cascade)
+    Seq.empty
+  }
+
+  override def output: Seq[Attribute] = Seq.empty
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 6020e42b21900..120fa5288dda9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -364,6 +364,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
     case r: CacheTableAsSelect =>
       CacheTableAsSelectExec(r.tempViewName, r.plan, r.isLazy, r.options) :: Nil
 
+    case r: UncacheTable =>
+      UncacheTableExec(r.table, cascade = !r.isTempView) :: Nil
+
     case _ => Nil
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
index 009c5b3705d2f..f1788e9c31af8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
@@ -339,16 +339,6 @@ class SparkSqlParserSuite extends AnalysisTest {
       "LINES TERMINATED BY only supports newline '\\n' right now")
   }
 
-  test("UNCACHE TABLE") {
-    assertEqual(
-      "UNCACHE TABLE a.b.c",
-      UncacheTableCommand(Seq("a", "b", "c"), ifExists = false))
-
-    assertEqual(
-      "UNCACHE TABLE IF EXISTS a.b.c",
-      UncacheTableCommand(Seq("a", "b", "c"), ifExists = true))
-  }
-
   test("CLEAR CACHE") {
     assertEqual("CLEAR CACHE", ClearCacheCommand)
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index ff7dc58829fa1..e10233d2573c9 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning._
-import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoDir, InsertIntoStatement, LogicalPlan, ScriptTransformation, Statistics}
+import org.apache.spark.sql.catalyst.plans.logical.{CacheTable, InsertIntoDir, InsertIntoStatement, LogicalPlan, ScriptTransformation, Statistics, UncacheTable}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.connector.catalog.CatalogV2Util.assertNoNullTypeInSchema
 import org.apache.spark.sql.execution._
@@ -231,6 +231,16 @@ case class RelationConversions(
         assertNoNullTypeInSchema(query.schema)
         OptimizedCreateHiveTableAsSelectCommand(
           tableDesc, query, query.output.map(_.name), mode)
+
+      // Cache table
+      case c @ CacheTable(relation: HiveTableRelation, _, _, _)
+          if DDLUtils.isHiveTable(relation.tableMeta) && isConvertible(relation) =>
+        c.copy(table = metastoreCatalog.convert(relation))
+
+      // Uncache table
+      case u @ UncacheTable(relation: HiveTableRelation, _, _)
+          if DDLUtils.isHiveTable(relation.tableMeta) && isConvertible(relation) =>
+        u.copy(table = metastoreCatalog.convert(relation))
     }
   }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
index 81c3f271b18d4..6cb98e92e36fa 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
@@ -113,7 +113,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
     e = intercept[AnalysisException] {
       sql("UNCACHE TABLE nonexistentTable")
     }.getMessage
-    assert(e.contains(s"$expectedErrorMsg nonexistentTable"))
+    assert(e.contains("Table or view not found: nonexistentTable"))
     sql("UNCACHE TABLE IF EXISTS nonexistentTable")
   }
 

From ef7f6903b4fa28c554a1f0b58b9da194979b61ee Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Wed, 16 Dec 2020 05:45:44 +0000
Subject: [PATCH 0780/1009] [SPARK-33786][SQL] The storage level for a cache
 should be respected when a table name is altered

### What changes were proposed in this pull request?

This PR proposes to retain the cache's storage level when a table name is altered by `ALTER TABLE ... RENAME TO ...`.

### Why are the changes needed?

Currently, when a table name is altered, the table's cache is refreshed (if exists), but the storage level is not retained. For example:
```scala
        def getStorageLevel(tableName: String): StorageLevel = {
          val table = spark.table(tableName)
          val cachedData = spark.sharedState.cacheManager.lookupCachedData(table).get
          cachedData.cachedRepresentation.cacheBuilder.storageLevel
        }

        Seq(1 -> "a").toDF("i", "j").write.parquet(path.getCanonicalPath)
        sql(s"CREATE TABLE old USING parquet LOCATION '${path.toURI}'")
        sql("CACHE TABLE old OPTIONS('storageLevel' 'MEMORY_ONLY')")
        val oldStorageLevel = getStorageLevel("old")

        sql("ALTER TABLE old RENAME TO new")
        val newStorageLevel = getStorageLevel("new")
```
`oldStorageLevel` will be `StorageLevel(memory, deserialized, 1 replicas)` whereas `newStorageLevel` will be `StorageLevel(disk, memory, deserialized, 1 replicas)`, which is the default storage level.

### Does this PR introduce _any_ user-facing change?

Yes, now the storage level for the cache will be retained.

### How was this patch tested?

Added a unit test.

Closes #30774 from imback82/alter_table_rename_cache_fix.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/command/tables.scala  | 14 ++++++-------
 .../apache/spark/sql/CachedTableSuite.scala   | 20 +++++++++++++++++++
 2 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 431a103063c68..cf2a6ffb2c682 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -21,7 +21,6 @@ import java.net.{URI, URISyntaxException}
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
-import scala.util.Try
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.fs.{FileContext, FsConstants, Path}
@@ -193,18 +192,19 @@ case class AlterTableRenameCommand(
     } else {
       val table = catalog.getTableMetadata(oldName)
       DDLUtils.verifyAlterTableType(catalog, table, isView)
-      // If an exception is thrown here we can just assume the table is uncached;
-      // this can happen with Hive tables when the underlying catalog is in-memory.
-      val wasCached = Try(sparkSession.catalog.isCached(oldName.unquotedString)).getOrElse(false)
-      if (wasCached) {
+      // If `optStorageLevel` is defined, the old table was cached.
+      val optCachedData = sparkSession.sharedState.cacheManager.lookupCachedData(
+        sparkSession.table(oldName.unquotedString))
+      val optStorageLevel = optCachedData.map(_.cachedRepresentation.cacheBuilder.storageLevel)
+      if (optStorageLevel.isDefined) {
         CommandUtils.uncacheTableOrView(sparkSession, oldName.unquotedString)
       }
       // Invalidate the table last, otherwise uncaching the table would load the logical plan
       // back into the hive metastore cache
       catalog.refreshTable(oldName)
       catalog.renameTable(oldName, newName)
-      if (wasCached) {
-        sparkSession.catalog.cacheTable(newName.unquotedString)
+      optStorageLevel.foreach { storageLevel =>
+        sparkSession.catalog.cacheTable(newName.unquotedString, storageLevel)
       }
     }
     Seq.empty[Row]
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index af8d72309bdea..11eba933284f8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -1285,4 +1285,24 @@ class CachedTableSuite extends QueryTest with SQLTestUtils
       assert(spark.sharedState.cacheManager.lookupCachedData(sql("select 1, 2")).isDefined)
     }
   }
+
+  test("SPARK-33786: Cache's storage level should be respected when a table name is altered.") {
+    withTable("old", "new") {
+      withTempPath { path =>
+        def getStorageLevel(tableName: String): StorageLevel = {
+          val table = spark.table(tableName)
+          val cachedData = spark.sharedState.cacheManager.lookupCachedData(table).get
+          cachedData.cachedRepresentation.cacheBuilder.storageLevel
+        }
+        Seq(1 -> "a").toDF("i", "j").write.parquet(path.getCanonicalPath)
+        sql(s"CREATE TABLE old USING parquet LOCATION '${path.toURI}'")
+        sql("CACHE TABLE old OPTIONS('storageLevel' 'MEMORY_ONLY')")
+        val oldStorageLevel = getStorageLevel("old")
+
+        sql("ALTER TABLE old RENAME TO new")
+        val newStorageLevel = getStorageLevel("new")
+        assert(oldStorageLevel === newStorageLevel)
+      }
+    }
+  }
 }

From 12f3715ed7e0cd06131272845c3d04f4ad1b441c Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Wed, 16 Dec 2020 00:10:13 -0800
Subject: [PATCH 0781/1009] [MINOR][DOCS] Fix Jenkins job badge image and link
 in README.md

### What changes were proposed in this pull request?

This PR proposes to fix the Jenkins job badge:

Before:

![Screen Shot 2020-12-16 at 4 14 14 PM](https://user-images.githubusercontent.com/6477701/102316960-2c9ebe80-3fba-11eb-878d-07ae735fb3a6.png)

After:

![Screen Shot 2020-12-16 at 4 14 09 PM](https://user-images.githubusercontent.com/6477701/102316956-2a3c6480-3fba-11eb-9fa4-b8312edb8a1a.png)

### Why are the changes needed?

To make people can easily check the status of builds.

### Does this PR introduce _any_ user-facing change?

No, dev-only.

### How was this patch tested?

Manually tested via using GitHub.

Closes #30797 from HyukjinKwon/minor-readme.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d7931263b0fc7..aa7d1dd338be0 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@ and Structured Streaming for stream processing.
 
 <https://spark.apache.org/>
 
-[![Jenkins Build](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7-hive-2.3/badge/icon)](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-2.7-hive-2.3)
+[![Jenkins Build](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-3.2/badge/icon)](https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-3.2)
 [![AppVeyor Build](https://img.shields.io/appveyor/ci/ApacheSoftwareFoundation/spark/master.svg?style=plastic&logo=appveyor)](https://ci.appveyor.com/project/ApacheSoftwareFoundation/spark)
 [![PySpark Coverage](https://img.shields.io/badge/dynamic/xml.svg?label=pyspark%20coverage&url=https%3A%2F%2Fspark-test.github.io%2Fpyspark-coverage-site&query=%2Fhtml%2Fbody%2Fdiv%5B1%5D%2Fdiv%2Fh1%2Fspan&colorB=brightgreen&style=plastic)](https://spark-test.github.io/pyspark-coverage-site)
 

From 888a274a88560ebe3c43ff9f003c296751d0c207 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Wed, 16 Dec 2020 17:20:03 +0900
Subject: [PATCH 0782/1009] [SPARK-33802][INFRA] Override name and email
 address explicitly when updating PySpark coverage

### What changes were proposed in this pull request?

The current Jenkins job fails as below (https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-3.2/1726/console)

```
Generating HTML files for PySpark coverage under /home/jenkins/workspace/spark-master-test-sbt-hadoop-3.2/python/test_coverage/htmlcov
/home/jenkins/workspace/spark-master-test-sbt-hadoop-3.2
Cloning into 'pyspark-coverage-site'...

*** Please tell me who you are.

Run

  git config --global user.email "youexample.com"
  git config --global user.name "Your Name"

to set your account's default identity.
Omit --global to set the identity only in this repository.
```

This PR proposes to set both when committing to the coverage site.

### Why are the changes needed?

To make the coverage site keep working.

### Does this PR introduce _any_ user-facing change?

No, dev-only.

### How was this patch tested?

Manually tested in the console but it has to be merged to test in the Jenkins environment.

Closes #30796 from HyukjinKwon/SPARK-33802.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 dev/run-tests.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/dev/run-tests.py b/dev/run-tests.py
index 37a15a758d898..e271b4dec6c74 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -521,10 +521,11 @@ def post_python_tests_results():
         # 6. Commit current HTMLs.
         run_cmd([
             "git",
+            "-c user.name='Apache Spark Test Account'",
+            "-c user.email='sparktestacc@gmail.com'",
             "commit",
             "-am",
-            "Coverage report at latest commit in Apache Spark",
-            '--author="Apache Spark Test Account <sparktestacc@gmail.com>"'])
+            "Coverage report at latest commit in Apache Spark"])
         # 7. Delete the old branch.
         run_cmd(["git", "branch", "-D", "gh-pages"])
         # 8. Rename the temporary branch to master.

From 7845865b8d5c03a4daf82588be0ff2ebb90152a7 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Wed, 16 Dec 2020 13:42:30 +0000
Subject: [PATCH 0783/1009] [SPARK-33803][SQL] Sort table properties by key in
 DESCRIBE TABLE command

### What changes were proposed in this pull request?

This PR proposes to sort table properties in DESCRIBE TABLE command. This is consistent with DSv2 command as well:
https://github.com/apache/spark/blob/e3058ba17cb4512537953eb4ded884e24ee93ba2/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala#L63

This PR fixes the test case in Scala 2.13 build as well where the table properties have different order in the map.

### Why are the changes needed?

To keep the deterministic and pretty output, and fix the tests in Scala 2.13 build.
See https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-maven-hadoop-3.2-scala-2.13/49/testReport/junit/org.apache.spark.sql/SQLQueryTestSuite/describe_sql/

```
describe.sql&#010;Expected "...spark_catalog, view.[query.out.col.2=c, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=default]]", but got "...spark_catalog, view.[catalogAndNamespace.part.1=default, view.query.out.col.2=c, view.referredTempFunctionsNames=[]]]" Result did not match for query #29&#010;DESC FORMATTED v
```

### Does this PR introduce _any_ user-facing change?

Yes, it will change the text output from `DESCRIBE [EXTENDED|FORMATTED] table_name`.
Now the table properties are sorted by its key.

### How was this patch tested?

Related unittests were fixed accordingly.

Closes #30799 from HyukjinKwon/SPARK-33803.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/catalog/interface.scala      |  3 +-
 .../sql-tests/results/describe.sql.out        |  8 +++---
 .../results/postgreSQL/create_view.sql.out    | 28 +++++++++----------
 3 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 6743b052fb3a1..9876ee375cfa6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -388,7 +388,8 @@ case class CatalogTable(
 
   def toLinkedHashMap: mutable.LinkedHashMap[String, String] = {
     val map = new mutable.LinkedHashMap[String, String]()
-    val tableProperties = properties.map(p => p._1 + "=" + p._2).mkString("[", ", ", "]")
+    val tableProperties = properties.toSeq.sortBy(_._1)
+      .map(p => p._1 + "=" + p._2).mkString("[", ", ", "]")
     val partitionColumns = partitionColumnNames.map(quoteIdentifier).mkString("[", ", ", "]")
     val lastAccess = {
       if (lastAccessTime <= 0) "UNKNOWN" else new Date(lastAccessTime).toString
diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
index ebec2e1976b15..93b0cc3fe97e1 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
@@ -130,7 +130,7 @@ Num Buckets         	2
 Bucket Columns      	[`a`]               	                    
 Sort Columns        	[`b`]               	                    
 Comment             	table_comment       	                    
-Table Properties    	[t=test, e=3]       	                    
+Table Properties    	[e=3, t=test]       	                    
 Location [not included in comparison]/{warehouse_dir}/t	                    
 Storage Properties  	[a=1, b=2]          	                    
 Partition Provider  	Catalog
@@ -162,7 +162,7 @@ Num Buckets         	2
 Bucket Columns      	[`a`]               	                    
 Sort Columns        	[`b`]               	                    
 Comment             	table_comment       	                    
-Table Properties    	[t=test, e=3]       	                    
+Table Properties    	[e=3, t=test]       	                    
 Location [not included in comparison]/{warehouse_dir}/t	                    
 Storage Properties  	[a=1, b=2]          	                    
 Partition Provider  	Catalog
@@ -477,7 +477,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Catalog and Namespace	spark_catalog.default	                    
 View Query Output Columns	[a, b, c, d]        	                    
-Table Properties    	[view.query.out.col.3=d, view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=4, view.referredTempViewNames=[], view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=c, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=default]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=default, view.query.out.col.0=a, view.query.out.col.1=b, view.query.out.col.2=c, view.query.out.col.3=d, view.query.out.numCols=4, view.referredTempFunctionsNames=[], view.referredTempViewNames=[]]
 
 
 -- !query
@@ -501,7 +501,7 @@ View Text           	SELECT * FROM t
 View Original Text  	SELECT * FROM t     	                    
 View Catalog and Namespace	spark_catalog.default	                    
 View Query Output Columns	[a, b, c, d]        	                    
-Table Properties    	[view.query.out.col.3=d, view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=4, view.referredTempViewNames=[], view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=c, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=default]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=default, view.query.out.col.0=a, view.query.out.col.1=b, view.query.out.col.2=c, view.query.out.col.3=d, view.query.out.numCols=4, view.referredTempFunctionsNames=[], view.referredTempViewNames=[]]
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
index 1ac7c4a4069b3..c05c9abbcee31 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
@@ -257,7 +257,7 @@ View Text           	SELECT * FROM base_table
 View Original Text  	SELECT * FROM base_table	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[a, id]             	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=a, view.query.out.col.1=id, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]
 
 
 -- !query
@@ -313,7 +313,7 @@ View Text           	SELECT * FROM base_table
 View Original Text  	SELECT * FROM base_table	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[a, id]             	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=a, view.query.out.col.1=id, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]
 
 
 -- !query
@@ -359,7 +359,7 @@ View Original Text  	SELECT t1.a AS t1_a, t2.a AS t2_a
     WHERE t1.id = t2.id	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[t1_a, t2_a]        	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=t1_a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=t2_a, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=t1_a, view.query.out.col.1=t2_a, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]
 
 
 -- !query
@@ -413,7 +413,7 @@ View Text           	SELECT * FROM base_table WHERE id IN (SELECT id FROM base_t
 View Original Text  	SELECT * FROM base_table WHERE id IN (SELECT id FROM base_table2)	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[a, id]             	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=a, view.query.out.col.1=id, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]
 
 
 -- !query
@@ -443,7 +443,7 @@ View Text           	SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_
 View Original Text  	SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_table2) t2	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[id, a]             	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=id, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=a, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=id, view.query.out.col.1=a, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]
 
 
 -- !query
@@ -473,7 +473,7 @@ View Text           	SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_t
 View Original Text  	SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_table2)	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[a, id]             	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=a, view.query.out.col.1=id, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]
 
 
 -- !query
@@ -503,7 +503,7 @@ View Text           	SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM ba
 View Original Text  	SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM base_table2)	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[a, id]             	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=a, view.query.out.col.1=id, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]
 
 
 -- !query
@@ -533,7 +533,7 @@ View Text           	SELECT * FROM base_table WHERE EXISTS (SELECT 1)
 View Original Text  	SELECT * FROM base_table WHERE EXISTS (SELECT 1)	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[a, id]             	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=a, view.query.out.col.1=id, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]
 
 
 -- !query
@@ -669,7 +669,7 @@ View Text           	SELECT * FROM t1 CROSS JOIN t2
 View Original Text  	SELECT * FROM t1 CROSS JOIN t2	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
 View Query Output Columns	[num, name, num2, value]	                    
-Table Properties    	[view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2, view.query.out.col.0=num, view.query.out.col.1=name, view.query.out.col.2=num2, view.query.out.col.3=value, view.query.out.numCols=4, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]
 
 
 -- !query
@@ -710,7 +710,7 @@ View Text           	SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2
 View Original Text  	SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
 View Query Output Columns	[num, name, num2, value]	                    
-Table Properties    	[view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2, view.query.out.col.0=num, view.query.out.col.1=name, view.query.out.col.2=num2, view.query.out.col.3=value, view.query.out.numCols=4, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]
 
 
 -- !query
@@ -751,7 +751,7 @@ View Text           	SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2
 View Original Text  	SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
 View Query Output Columns	[num, name, num2, value]	                    
-Table Properties    	[view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2, view.query.out.col.0=num, view.query.out.col.1=name, view.query.out.col.2=num2, view.query.out.col.3=value, view.query.out.numCols=4, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]
 
 
 -- !query
@@ -792,7 +792,7 @@ View Text           	SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.va
 View Original Text  	SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.value = 'xxx'	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
 View Query Output Columns	[num, name, num2, value]	                    
-Table Properties    	[view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2, view.query.out.col.0=num, view.query.out.col.1=name, view.query.out.col.2=num2, view.query.out.col.3=value, view.query.out.numCols=4, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]
 
 
 -- !query
@@ -894,7 +894,7 @@ BETWEEN (SELECT d FROM tbl2 WHERE c = 1) AND (SELECT e FROM tbl3 WHERE f = 2)
 AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f)	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
 View Query Output Columns	[a, b]              	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2, view.query.out.col.0=a, view.query.out.col.1=b, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]
 
 
 -- !query
@@ -933,7 +933,7 @@ AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f)
 AND NOT EXISTS (SELECT g FROM tbl4 LEFT JOIN tmptbl ON tbl4.h = tmptbl.j)	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
 View Query Output Columns	[a, b]              	                    
-Table Properties    	[view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2]
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2, view.query.out.col.0=a, view.query.out.col.1=b, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]
 
 
 -- !query

From 9d9d4a8e122cf1137edeca857e925f7e76c1ace2 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Wed, 16 Dec 2020 13:49:49 +0000
Subject: [PATCH 0784/1009] [SPARK-33789][SQL][TESTS] Refactor unified V1 and
 V2 datasource tests

### What changes were proposed in this pull request?
1. Move common utility functions such as `test()`, `withNsTable()` and `checkPartitions()` to `DDLCommandTestUtils`.
2. Place common settings such as `version`, `catalog`, `defaultUsing`, `sparkConf` to `CommandSuiteBase`.

### Why are the changes needed?
To improve code maintenance of the unified tests.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running the affected test suites:
```
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *ShowPartitionsSuite"
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *ShowTablesSuite"
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableAddPartitionSuite"
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableDropPartitionSuite"
```

Closes #30779 from MaxGekk/refactor-unified-tests.

Lead-authored-by: Max Gekk <max.gekk@gmail.com>
Co-authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../AlterTableAddPartitionSuiteBase.scala     |  54 ++----
 .../AlterTableDropPartitionSuiteBase.scala    |  53 ++----
 .../command/DDLCommandTestUtils.scala         |  60 +++++++
 .../command/ShowPartitionsSuiteBase.scala     | 167 +++++++-----------
 .../command/ShowTablesSuiteBase.scala         |  45 ++---
 .../v1/AlterTableAddPartitionSuite.scala      |   8 +-
 .../v1/AlterTableDropPartitionSuite.scala     |   8 +-
 .../command/v1/CommandSuiteBase.scala         |  27 +++
 .../command/v1/ShowPartitionsSuite.scala      |   8 +-
 .../command/v1/ShowTablesSuite.scala          |  39 ++--
 .../v2/AlterTableAddPartitionSuite.scala      |  16 +-
 .../v2/AlterTableDropPartitionSuite.scala     |  15 +-
 .../command/v2/CommandSuiteBase.scala         |  32 ++++
 .../command/v2/ShowPartitionsSuite.scala      |  13 +-
 .../command/v2/ShowTablesSuite.scala          |  11 +-
 .../command/AlterTableAddPartitionSuite.scala |   6 +-
 .../AlterTableDropPartitionSuite.scala        |   7 +-
 .../execution/command/CommandSuiteBase.scala  |  27 +++
 .../command/ShowPartitionsSuite.scala         |   6 +-
 .../execution/command/ShowTablesSuite.scala   |   6 +-
 20 files changed, 274 insertions(+), 334 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
index 2457bb9f8b57c..1c1d802b991f5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
@@ -17,50 +17,18 @@
 
 package org.apache.spark.sql.execution.command
 
-import org.scalactic.source.Position
-import org.scalatest.Tag
-
-import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.{AnalysisException, QueryTest}
 import org.apache.spark.sql.catalyst.analysis.PartitionsAlreadyExistException
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
-import org.apache.spark.sql.execution.datasources.PartitioningUtils
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.test.SQLTestUtils
-
-trait AlterTableAddPartitionSuiteBase extends QueryTest with SQLTestUtils {
-  protected def version: String
-  protected def catalog: String
-  protected def defaultUsing: String
 
-  override def test(testName: String, testTags: Tag*)(testFun: => Any)
-    (implicit pos: Position): Unit = {
-    super.test(s"ALTER TABLE .. ADD PARTITION $version: " + testName, testTags: _*)(testFun)
-  }
+trait AlterTableAddPartitionSuiteBase extends QueryTest with DDLCommandTestUtils {
+  override val command = "ALTER TABLE .. ADD PARTITION"
 
-  protected def checkPartitions(t: String, expected: Map[String, String]*): Unit = {
-    val partitions = sql(s"SHOW PARTITIONS $t")
-      .collect()
-      .toSet
-      .map((row: Row) => row.getString(0))
-      .map(PartitioningUtils.parsePathFragment)
-    assert(partitions === expected.toSet)
-  }
   protected def checkLocation(t: String, spec: TablePartitionSpec, expected: String): Unit
 
-  protected def withNsTable(ns: String, tableName: String, cat: String = catalog)
-      (f: String => Unit): Unit = {
-    val nsCat = s"$cat.$ns"
-    withNamespace(nsCat) {
-      sql(s"CREATE NAMESPACE $nsCat")
-      val t = s"$nsCat.$tableName"
-      withTable(t) {
-        f(t)
-      }
-    }
-  }
-
   test("one partition") {
-    withNsTable("ns", "tbl") { t =>
+    withNamespaceAndTable("ns", "tbl") { t =>
       sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
       Seq("", "IF NOT EXISTS").foreach { exists =>
         sql(s"ALTER TABLE $t ADD $exists PARTITION (id=1) LOCATION 'loc'")
@@ -72,7 +40,7 @@ trait AlterTableAddPartitionSuiteBase extends QueryTest with SQLTestUtils {
   }
 
   test("multiple partitions") {
-    withNsTable("ns", "tbl") { t =>
+    withNamespaceAndTable("ns", "tbl") { t =>
       sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
       Seq("", "IF NOT EXISTS").foreach { exists =>
         sql(s"""
@@ -88,7 +56,7 @@ trait AlterTableAddPartitionSuiteBase extends QueryTest with SQLTestUtils {
   }
 
   test("multi-part partition") {
-    withNsTable("ns", "tbl") { t =>
+    withNamespaceAndTable("ns", "tbl") { t =>
       sql(s"CREATE TABLE $t (id bigint, a int, b string) $defaultUsing PARTITIONED BY (a, b)")
       Seq("", "IF NOT EXISTS").foreach { exists =>
         sql(s"ALTER TABLE $t ADD $exists PARTITION (a=2, b='abc')")
@@ -99,7 +67,7 @@ trait AlterTableAddPartitionSuiteBase extends QueryTest with SQLTestUtils {
   }
 
   test("table to alter does not exist") {
-    withNsTable("ns", "does_not_exist") { t =>
+    withNamespaceAndTable("ns", "does_not_exist") { t =>
       val errMsg = intercept[AnalysisException] {
         sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (a='4', b='9')")
       }.getMessage
@@ -108,7 +76,7 @@ trait AlterTableAddPartitionSuiteBase extends QueryTest with SQLTestUtils {
   }
 
   test("case sensitivity in resolving partition specs") {
-    withNsTable("ns", "tbl") { t =>
+    withNamespaceAndTable("ns", "tbl") { t =>
       spark.sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
         val errMsg = intercept[AnalysisException] {
@@ -125,7 +93,7 @@ trait AlterTableAddPartitionSuiteBase extends QueryTest with SQLTestUtils {
   }
 
   test("SPARK-33521: universal type conversions of partition values") {
-    withNsTable("ns", "tbl") { t =>
+    withNamespaceAndTable("ns", "tbl") { t =>
       sql(s"""
         |CREATE TABLE $t (
         |  id int,
@@ -173,7 +141,7 @@ trait AlterTableAddPartitionSuiteBase extends QueryTest with SQLTestUtils {
   }
 
   test("SPARK-33676: not fully specified partition spec") {
-    withNsTable("ns", "tbl") { t =>
+    withNamespaceAndTable("ns", "tbl") { t =>
       sql(s"""
         |CREATE TABLE $t (id bigint, part0 int, part1 string)
         |$defaultUsing
@@ -187,7 +155,7 @@ trait AlterTableAddPartitionSuiteBase extends QueryTest with SQLTestUtils {
   }
 
   test("partition already exists") {
-    withNsTable("ns", "tbl") { t =>
+    withNamespaceAndTable("ns", "tbl") { t =>
       sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
       sql(s"ALTER TABLE $t ADD PARTITION (id=2) LOCATION 'loc1'")
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
index 338f13ace891c..433f24c75083c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
@@ -17,48 +17,15 @@
 
 package org.apache.spark.sql.execution.command
 
-import org.scalactic.source.Position
-import org.scalatest.Tag
-
-import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.{AnalysisException, QueryTest}
 import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionsException
-import org.apache.spark.sql.execution.datasources.PartitioningUtils
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.test.SQLTestUtils
 
-trait AlterTableDropPartitionSuiteBase  extends QueryTest with SQLTestUtils {
-  protected def version: String
-  protected def catalog: String
-  protected def defaultUsing: String
+trait AlterTableDropPartitionSuiteBase extends QueryTest with DDLCommandTestUtils {
+  override val command = "ALTER TABLE .. DROP PARTITION"
 
   protected def notFullPartitionSpecErr: String
 
-  override def test(testName: String, testTags: Tag*)(testFun: => Any)
-    (implicit pos: Position): Unit = {
-    super.test(s"ALTER TABLE .. DROP PARTITION $version: " + testName, testTags: _*)(testFun)
-  }
-
-  protected def withNsTable(ns: String, tableName: String, cat: String = catalog)
-    (f: String => Unit): Unit = {
-    val nsCat = s"$cat.$ns"
-    withNamespace(nsCat) {
-      sql(s"CREATE NAMESPACE $nsCat")
-      val t = s"$nsCat.$tableName"
-      withTable(t) {
-        f(t)
-      }
-    }
-  }
-
-  protected def checkPartitions(t: String, expected: Map[String, String]*): Unit = {
-    val partitions = sql(s"SHOW PARTITIONS $t")
-      .collect()
-      .toSet
-      .map((row: Row) => row.getString(0))
-      .map(PartitioningUtils.parsePathFragment)
-    assert(partitions === expected.toSet)
-  }
-
   protected def checkDropPartition(
       t: String,
       ifExists: String,
@@ -75,7 +42,7 @@ trait AlterTableDropPartitionSuiteBase  extends QueryTest with SQLTestUtils {
   }
 
   test("single partition") {
-    withNsTable("ns", "tbl") { t =>
+    withNamespaceAndTable("ns", "tbl") { t =>
       sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
       Seq("", "IF EXISTS").foreach { ifExists =>
         sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'")
@@ -85,7 +52,7 @@ trait AlterTableDropPartitionSuiteBase  extends QueryTest with SQLTestUtils {
   }
 
   test("multiple partitions") {
-    withNsTable("ns", "tbl") { t =>
+    withNamespaceAndTable("ns", "tbl") { t =>
       sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
       Seq("", "IF EXISTS").foreach { ifExists =>
         sql(s"""
@@ -98,7 +65,7 @@ trait AlterTableDropPartitionSuiteBase  extends QueryTest with SQLTestUtils {
   }
 
   test("multi-part partition") {
-    withNsTable("ns", "tbl") { t =>
+    withNamespaceAndTable("ns", "tbl") { t =>
       sql(s"CREATE TABLE $t (id bigint, a int, b string) $defaultUsing PARTITIONED BY (a, b)")
       Seq("", "IF EXISTS").foreach { ifExists =>
         sql(s"ALTER TABLE $t ADD PARTITION (a = 2, b = 'abc')")
@@ -108,7 +75,7 @@ trait AlterTableDropPartitionSuiteBase  extends QueryTest with SQLTestUtils {
   }
 
   test("table to alter does not exist") {
-    withNsTable("ns", "does_not_exist") { t =>
+    withNamespaceAndTable("ns", "does_not_exist") { t =>
       val errMsg = intercept[AnalysisException] {
         sql(s"ALTER TABLE $t DROP PARTITION (a='4', b='9')")
       }.getMessage
@@ -117,7 +84,7 @@ trait AlterTableDropPartitionSuiteBase  extends QueryTest with SQLTestUtils {
   }
 
   test("case sensitivity in resolving partition specs") {
-    withNsTable("ns", "tbl") { t =>
+    withNamespaceAndTable("ns", "tbl") { t =>
       sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
         val errMsg = intercept[AnalysisException] {
@@ -136,7 +103,7 @@ trait AlterTableDropPartitionSuiteBase  extends QueryTest with SQLTestUtils {
   }
 
   test("SPARK-33676: not fully specified partition spec") {
-    withNsTable("ns", "tbl") { t =>
+    withNamespaceAndTable("ns", "tbl") { t =>
       sql(s"""
         |CREATE TABLE $t (id bigint, part0 int, part1 string)
         |$defaultUsing
@@ -149,7 +116,7 @@ trait AlterTableDropPartitionSuiteBase  extends QueryTest with SQLTestUtils {
   }
 
   test("partition not exists") {
-    withNsTable("ns", "tbl") { t =>
+    withNamespaceAndTable("ns", "tbl") { t =>
       sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
       sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'")
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala
new file mode 100644
index 0000000000000..a4129fe1ffee5
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.scalactic.source.Position
+import org.scalatest.Tag
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.execution.datasources.PartitioningUtils
+import org.apache.spark.sql.test.SQLTestUtils
+
+trait DDLCommandTestUtils extends SQLTestUtils {
+  // The version of the catalog under testing such as "V1", "V2", "Hive V1".
+  protected def version: String
+  // Name of the command as SQL statement, for instance "SHOW PARTITIONS"
+  protected def command: String
+  protected def catalog: String
+  protected def defaultUsing: String
+
+  override def test(testName: String, testTags: Tag*)(testFun: => Any)
+    (implicit pos: Position): Unit = {
+    super.test(s"$command $version: " + testName, testTags: _*)(testFun)
+  }
+
+  protected def withNamespaceAndTable(ns: String, tableName: String, cat: String = catalog)
+      (f: String => Unit): Unit = {
+    val nsCat = s"$cat.$ns"
+    withNamespace(nsCat) {
+      sql(s"CREATE NAMESPACE $nsCat")
+      val t = s"$nsCat.$tableName"
+      withTable(t) {
+        f(t)
+      }
+    }
+  }
+
+  protected def checkPartitions(t: String, expected: Map[String, String]*): Unit = {
+    val partitions = sql(s"SHOW PARTITIONS $t")
+      .collect()
+      .toSet
+      .map((row: Row) => row.getString(0))
+      .map(PartitioningUtils.parsePathFragment)
+    assert(partitions === expected.toSet)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
index 56c6e5a325745..d66c6191fbfa2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
@@ -17,18 +17,12 @@
 
 package org.apache.spark.sql.execution.command
 
-import org.scalactic.source.Position
-import org.scalatest.Tag
-
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types.{StringType, StructType}
 
-trait ShowPartitionsSuiteBase extends QueryTest with SQLTestUtils {
-  protected def version: String
-  protected def catalog: String
-  protected def defaultUsing: String
+trait ShowPartitionsSuiteBase extends QueryTest with DDLCommandTestUtils {
+  override val command = "SHOW PARTITIONS"
   // Gets the schema of `SHOW PARTITIONS`
   private val showSchema: StructType = new StructType().add("partition", StringType, false)
   protected def runShowPartitionsSql(sqlText: String, expected: Seq[Row]): Unit = {
@@ -37,11 +31,6 @@ trait ShowPartitionsSuiteBase extends QueryTest with SQLTestUtils {
     checkAnswer(df, expected)
   }
 
-  override def test(testName: String, testTags: Tag*)(testFun: => Any)
-      (implicit pos: Position): Unit = {
-    super.test(s"SHOW PARTITIONS $version: " + testName, testTags: _*)(testFun)
-  }
-
   protected def createDateTable(table: String): Unit = {
     sql(s"""
       |CREATE TABLE $table (price int, qty int, year int, month int)
@@ -72,122 +61,94 @@ trait ShowPartitionsSuiteBase extends QueryTest with SQLTestUtils {
   }
 
   test("show partitions of non-partitioned table") {
-    withNamespace(s"$catalog.ns") {
-      sql(s"CREATE NAMESPACE $catalog.ns")
-      val table = s"$catalog.ns.not_partitioned_table"
-      withTable(table) {
-        sql(s"CREATE TABLE $table (col1 int) $defaultUsing")
-        val errMsg = intercept[AnalysisException] {
-          sql(s"SHOW PARTITIONS $table")
-        }.getMessage
-        assert(errMsg.contains("not allowed on a table that is not partitioned"))
-      }
+    withNamespaceAndTable("ns", "not_partitioned_table") { t =>
+      sql(s"CREATE TABLE $t (col1 int) $defaultUsing")
+      val errMsg = intercept[AnalysisException] {
+        sql(s"SHOW PARTITIONS $t")
+      }.getMessage
+      assert(errMsg.contains("not allowed on a table that is not partitioned"))
     }
   }
 
   test("non-partitioning columns") {
-    withNamespace(s"$catalog.ns") {
-      sql(s"CREATE NAMESPACE $catalog.ns")
-      val table = s"$catalog.ns.dateTable"
-      withTable(table) {
-        createDateTable(table)
-        val errMsg = intercept[AnalysisException] {
-          sql(s"SHOW PARTITIONS $table PARTITION(abcd=2015, xyz=1)")
-        }.getMessage
-        assert(errMsg.contains("abcd is not a valid partition column"))
-      }
+    withNamespaceAndTable("ns", "dateTable") { t =>
+      createDateTable(t)
+      val errMsg = intercept[AnalysisException] {
+        sql(s"SHOW PARTITIONS $t PARTITION(abcd=2015, xyz=1)")
+      }.getMessage
+      assert(errMsg.contains("abcd is not a valid partition column"))
     }
   }
 
   test("show everything") {
-    withNamespace(s"$catalog.ns") {
-      sql(s"CREATE NAMESPACE $catalog.ns")
-      val table = s"$catalog.ns.dateTable"
-      withTable(table) {
-        createDateTable(table)
-        runShowPartitionsSql(
-          s"show partitions $table",
-          Row("year=2015/month=1") ::
-          Row("year=2015/month=2") ::
-          Row("year=2016/month=2") ::
-          Row("year=2016/month=3") :: Nil)
-      }
+    withNamespaceAndTable("ns", "dateTable") { t =>
+      createDateTable(t)
+      runShowPartitionsSql(
+        s"show partitions $t",
+        Row("year=2015/month=1") ::
+        Row("year=2015/month=2") ::
+        Row("year=2016/month=2") ::
+        Row("year=2016/month=3") :: Nil)
     }
   }
 
   test("filter by partitions") {
-    withNamespace(s"$catalog.ns") {
-      sql(s"CREATE NAMESPACE $catalog.ns")
-      val table = s"$catalog.ns.dateTable"
-      withTable(table) {
-        createDateTable(table)
-        runShowPartitionsSql(
-          s"show partitions $table PARTITION(year=2015)",
-          Row("year=2015/month=1") ::
-          Row("year=2015/month=2") :: Nil)
-        runShowPartitionsSql(
-          s"show partitions $table PARTITION(year=2015, month=1)",
-          Row("year=2015/month=1") :: Nil)
-        runShowPartitionsSql(
-          s"show partitions $table PARTITION(month=2)",
-          Row("year=2015/month=2") ::
-          Row("year=2016/month=2") :: Nil)
-      }
+    withNamespaceAndTable("ns", "dateTable") { t =>
+      createDateTable(t)
+      runShowPartitionsSql(
+        s"show partitions $t PARTITION(year=2015)",
+        Row("year=2015/month=1") ::
+        Row("year=2015/month=2") :: Nil)
+      runShowPartitionsSql(
+        s"show partitions $t PARTITION(year=2015, month=1)",
+        Row("year=2015/month=1") :: Nil)
+      runShowPartitionsSql(
+        s"show partitions $t PARTITION(month=2)",
+        Row("year=2015/month=2") ::
+        Row("year=2016/month=2") :: Nil)
     }
   }
 
   test("show everything more than 5 part keys") {
-    withNamespace(s"$catalog.ns") {
-      sql(s"CREATE NAMESPACE $catalog.ns")
-      val table = s"$catalog.ns.wideTable"
-      withTable(table) {
-        createWideTable(table)
-        runShowPartitionsSql(
-          s"show partitions $table",
-          Row("year=2016/month=3/hour=10/minute=10/sec=10/extra=1") ::
-          Row("year=2016/month=4/hour=10/minute=10/sec=10/extra=1") :: Nil)
-      }
+    withNamespaceAndTable("ns", "wideTable") { t =>
+      createWideTable(t)
+      runShowPartitionsSql(
+        s"show partitions $t",
+        Row("year=2016/month=3/hour=10/minute=10/sec=10/extra=1") ::
+        Row("year=2016/month=4/hour=10/minute=10/sec=10/extra=1") :: Nil)
     }
   }
 
   test("SPARK-33667: case sensitivity of partition spec") {
-    withNamespace(s"$catalog.ns") {
-      sql(s"CREATE NAMESPACE $catalog.ns")
-      val t = s"$catalog.ns.part_table"
-      withTable(t) {
-        sql(s"""
-          |CREATE TABLE $t (price int, qty int, year int, month int)
-          |$defaultUsing
-          |PARTITIONED BY (year, month)""".stripMargin)
-        sql(s"INSERT INTO $t PARTITION(year = 2015, month = 1) SELECT 1, 1")
-        Seq(
-          true -> "PARTITION(year = 2015, month = 1)",
-          false -> "PARTITION(YEAR = 2015, Month = 1)"
-        ).foreach { case (caseSensitive, partitionSpec) =>
-          withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
-            runShowPartitionsSql(
-              s"SHOW PARTITIONS $t $partitionSpec",
-              Row("year=2015/month=1") :: Nil)
-          }
+    withNamespaceAndTable("ns", "part_table") { t =>
+      sql(s"""
+        |CREATE TABLE $t (price int, qty int, year int, month int)
+        |$defaultUsing
+        |PARTITIONED BY (year, month)""".stripMargin)
+      sql(s"INSERT INTO $t PARTITION(year = 2015, month = 1) SELECT 1, 1")
+      Seq(
+        true -> "PARTITION(year = 2015, month = 1)",
+        false -> "PARTITION(YEAR = 2015, Month = 1)"
+      ).foreach { case (caseSensitive, partitionSpec) =>
+        withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+          runShowPartitionsSql(
+            s"SHOW PARTITIONS $t $partitionSpec",
+            Row("year=2015/month=1") :: Nil)
         }
       }
     }
   }
 
   test("SPARK-33777: sorted output") {
-    withNamespace(s"$catalog.ns") {
-      sql(s"CREATE NAMESPACE $catalog.ns")
-      val table = s"$catalog.ns.dateTable"
-      withTable(table) {
-        sql(s"""
-          |CREATE TABLE $table (id int, part string)
-          |$defaultUsing
-          |PARTITIONED BY (part)""".stripMargin)
-        sql(s"ALTER TABLE $table ADD PARTITION(part = 'b')")
-        sql(s"ALTER TABLE $table ADD PARTITION(part = 'a')")
-        val partitions = sql(s"show partitions $table")
-        assert(partitions.first().getString(0) === "part=a")
-      }
+    withNamespaceAndTable("ns", "dateTable") { t =>
+      sql(s"""
+        |CREATE TABLE $t (id int, part string)
+        |$defaultUsing
+        |PARTITIONED BY (part)""".stripMargin)
+      sql(s"ALTER TABLE $t ADD PARTITION(part = 'b')")
+      sql(s"ALTER TABLE $t ADD PARTITION(part = 'a')")
+      val partitions = sql(s"show partitions $t")
+      assert(partitions.first().getString(0) === "part=a")
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala
index 58427183eeed5..5b729a4eb1c85 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala
@@ -17,21 +17,15 @@
 
 package org.apache.spark.sql.execution.command
 
-import org.scalactic.source.Position
-import org.scalatest.Tag
-
 import org.apache.spark.sql.{QueryTest, Row}
 import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types.StructType
 
-trait ShowTablesSuiteBase extends QueryTest with SQLTestUtils {
-  protected def version: String
-  protected def catalog: String
+trait ShowTablesSuiteBase extends QueryTest with DDLCommandTestUtils {
+  override val command = "SHOW TABLES"
   protected def defaultNamespace: Seq[String]
-  protected def defaultUsing: String
   case class ShowRow(namespace: String, table: String, isTemporary: Boolean)
   protected def getRows(showRows: Seq[ShowRow]): Seq[Row]
   // Gets the schema of `SHOW TABLES`
@@ -43,18 +37,10 @@ trait ShowTablesSuiteBase extends QueryTest with SQLTestUtils {
     checkAnswer(df, getRows(expected))
   }
 
-  override def test(testName: String, testTags: Tag*)(testFun: => Any)
-      (implicit pos: Position): Unit = {
-    super.test(s"SHOW TABLES $version: " + testName, testTags: _*)(testFun)
-  }
-
   test("show an existing table") {
-    withNamespace(s"$catalog.ns") {
-      sql(s"CREATE NAMESPACE $catalog.ns")
-      withTable(s"$catalog.ns.table") {
-        sql(s"CREATE TABLE $catalog.ns.table (name STRING, id INT) $defaultUsing")
-        runShowTablesSql(s"SHOW TABLES IN $catalog.ns", Seq(ShowRow("ns", "table", false)))
-      }
+    withNamespaceAndTable("ns", "table") { t =>
+      sql(s"CREATE TABLE $t (name STRING, id INT) $defaultUsing")
+      runShowTablesSql(s"SHOW TABLES IN $catalog.ns", Seq(ShowRow("ns", "table", false)))
     }
   }
 
@@ -117,20 +103,17 @@ trait ShowTablesSuiteBase extends QueryTest with SQLTestUtils {
   }
 
   test("change current catalog and namespace with USE statements") {
-    withNamespace(s"$catalog.ns") {
-      sql(s"CREATE NAMESPACE $catalog.ns")
-      withTable(s"$catalog.ns.table") {
-        sql(s"CREATE TABLE $catalog.ns.table (name STRING, id INT) $defaultUsing")
+    withNamespaceAndTable("ns", "table") { t =>
+      sql(s"CREATE TABLE $t (name STRING, id INT) $defaultUsing")
 
-        sql(s"USE $catalog")
-        // No table is matched since the current namespace is not ["ns"]
-        assert(defaultNamespace != Seq("ns"))
-        runShowTablesSql("SHOW TABLES", Seq())
+      sql(s"USE $catalog")
+      // No table is matched since the current namespace is not ["ns"]
+      assert(defaultNamespace != Seq("ns"))
+      runShowTablesSql("SHOW TABLES", Seq())
 
-        // Update the current namespace to match "ns.tbl".
-        sql(s"USE $catalog.ns")
-        runShowTablesSql("SHOW TABLES", Seq(ShowRow("ns", "table", false)))
-      }
+      // Update the current namespace to match "ns.tbl".
+      sql(s"USE $catalog.ns")
+      runShowTablesSql("SHOW TABLES", Seq(ShowRow("ns", "table", false)))
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
index b29564e1d81b6..1b7c90067e3f5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
@@ -18,15 +18,9 @@
 package org.apache.spark.sql.execution.command.v1
 
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
-import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.execution.command
-import org.apache.spark.sql.test.SharedSparkSession
 
 trait AlterTableAddPartitionSuiteBase extends command.AlterTableAddPartitionSuiteBase {
-  override def version: String = "V1"
-  override def catalog: String = CatalogManager.SESSION_CATALOG_NAME
-  override def defaultUsing: String = "USING parquet"
-
   override protected def checkLocation(
       t: String,
       spec: TablePartitionSpec,
@@ -43,4 +37,4 @@ trait AlterTableAddPartitionSuiteBase extends command.AlterTableAddPartitionSuit
   }
 }
 
-class AlterTableAddPartitionSuite extends AlterTableAddPartitionSuiteBase with SharedSparkSession
+class AlterTableAddPartitionSuite extends AlterTableAddPartitionSuiteBase with CommandSuiteBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
index e655debc2fdde..737af96f5abe3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
@@ -17,18 +17,12 @@
 
 package org.apache.spark.sql.execution.command.v1
 
-import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.execution.command
-import org.apache.spark.sql.test.SharedSparkSession
 
 trait AlterTableDropPartitionSuiteBase extends command.AlterTableDropPartitionSuiteBase {
-  override def version: String = "V1"
-  override def catalog: String = CatalogManager.SESSION_CATALOG_NAME
-  override def defaultUsing: String = "USING parquet"
-
   override protected val notFullPartitionSpecErr = "The following partitions not found in table"
 }
 
 class AlterTableDropPartitionSuite
   extends AlterTableDropPartitionSuiteBase
-  with SharedSparkSession
+  with CommandSuiteBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala
new file mode 100644
index 0000000000000..323f9c9365a11
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.test.SharedSparkSession
+
+trait CommandSuiteBase extends SharedSparkSession {
+  def version: String = "V1"
+  def catalog: String = CatalogManager.SESSION_CATALOG_NAME
+  def defaultUsing: String = "USING parquet"
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
index c752a5f358bb9..8acd24f0e3956 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
@@ -18,15 +18,9 @@
 package org.apache.spark.sql.execution.command.v1
 
 import org.apache.spark.sql.{AnalysisException, Row, SaveMode}
-import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.execution.command
-import org.apache.spark.sql.test.SharedSparkSession
 
 trait ShowPartitionsSuiteBase extends command.ShowPartitionsSuiteBase {
-  override def version: String = "V1"
-  override def catalog: String = CatalogManager.SESSION_CATALOG_NAME
-  override def defaultUsing: String = "USING parquet"
-
   test("show everything in the default database") {
     val table = "dateTable"
     withTable(table) {
@@ -69,7 +63,7 @@ trait ShowPartitionsSuiteBase extends command.ShowPartitionsSuiteBase {
   }
 }
 
-class ShowPartitionsSuite extends ShowPartitionsSuiteBase with SharedSparkSession {
+class ShowPartitionsSuite extends ShowPartitionsSuiteBase with CommandSuiteBase {
   // The test is placed here because it fails with `USING HIVE`:
   // org.apache.spark.sql.AnalysisException:
   //   Hive data source can only be used with tables, you can't use it with CREATE TEMP VIEW USING
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
index 3db880c776365..12b4df269e157 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
@@ -18,17 +18,12 @@
 package org.apache.spark.sql.execution.command.v1
 
 import org.apache.spark.sql.{AnalysisException, Row, SaveMode}
-import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.execution.command
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{BooleanType, StringType, StructType}
 
 trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase {
-  override def version: String = "V1"
-  override def catalog: String = CatalogManager.SESSION_CATALOG_NAME
   override def defaultNamespace: Seq[String] = Seq("default")
-  override def defaultUsing: String = "USING parquet"
   override def showSchema: StructType = {
     new StructType()
       .add("database", StringType, nullable = false)
@@ -87,31 +82,27 @@ trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase {
   }
 
   test("case sensitivity of partition spec") {
-    withNamespace(s"$catalog.ns") {
-      sql(s"CREATE NAMESPACE $catalog.ns")
-      val t = s"$catalog.ns.part_table"
-      withTable(t) {
-        sql(s"""
-          |CREATE TABLE $t (price int, qty int, year int, month int)
-          |$defaultUsing
-          |partitioned by (year, month)""".stripMargin)
-        sql(s"INSERT INTO $t PARTITION(year = 2015, month = 1) SELECT 1, 1")
-        Seq(
-          true -> "PARTITION(year = 2015, month = 1)",
-          false -> "PARTITION(YEAR = 2015, Month = 1)"
-        ).foreach { case (caseSensitive, partitionSpec) =>
-          withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
-            val df = sql(s"SHOW TABLE EXTENDED LIKE 'part_table' $partitionSpec")
-            val information = df.select("information").first().getString(0)
-            assert(information.contains("Partition Values: [year=2015, month=1]"))
-          }
+    withNamespaceAndTable("ns", "part_table") { t =>
+      sql(s"""
+        |CREATE TABLE $t (price int, qty int, year int, month int)
+        |$defaultUsing
+        |partitioned by (year, month)""".stripMargin)
+      sql(s"INSERT INTO $t PARTITION(year = 2015, month = 1) SELECT 1, 1")
+      Seq(
+        true -> "PARTITION(year = 2015, month = 1)",
+        false -> "PARTITION(YEAR = 2015, Month = 1)"
+      ).foreach { case (caseSensitive, partitionSpec) =>
+        withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+          val df = sql(s"SHOW TABLE EXTENDED LIKE 'part_table' $partitionSpec")
+          val information = df.select("information").first().getString(0)
+          assert(information.contains("Partition Values: [year=2015, month=1]"))
         }
       }
     }
   }
 }
 
-class ShowTablesSuite extends ShowTablesSuiteBase with SharedSparkSession {
+class ShowTablesSuite extends ShowTablesSuiteBase with CommandSuiteBase {
   test("SPARK-33670: show partitions from a datasource table") {
     import testImplicits._
     withNamespace(s"$catalog.ns") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala
index 09921c8d8a5eb..b8ecb87ae7595 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala
@@ -17,29 +17,19 @@
 
 package org.apache.spark.sql.execution.command.v2
 
-import org.apache.spark.SparkConf
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.ResolvePartitionSpec
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
-import org.apache.spark.sql.connector.{InMemoryPartitionTable, InMemoryPartitionTableCatalog, InMemoryTableCatalog}
+import org.apache.spark.sql.connector.InMemoryPartitionTable
 import org.apache.spark.sql.connector.catalog.{CatalogV2Implicits, Identifier}
 import org.apache.spark.sql.execution.command
-import org.apache.spark.sql.test.SharedSparkSession
 
 class AlterTableAddPartitionSuite
   extends command.AlterTableAddPartitionSuiteBase
-  with SharedSparkSession {
+  with CommandSuiteBase {
 
   import CatalogV2Implicits._
 
-  override def version: String = "V2"
-  override def catalog: String = "test_catalog"
-  override def defaultUsing: String = "USING _"
-
-  override def sparkConf: SparkConf = super.sparkConf
-    .set(s"spark.sql.catalog.$catalog", classOf[InMemoryPartitionTableCatalog].getName)
-    .set(s"spark.sql.catalog.non_part_$catalog", classOf[InMemoryTableCatalog].getName)
-
   override protected def checkLocation(
       t: String,
       spec: TablePartitionSpec,
@@ -61,7 +51,7 @@ class AlterTableAddPartitionSuite
   }
 
   test("SPARK-33650: add partition into a table which doesn't support partition management") {
-    withNsTable("ns", "tbl", s"non_part_$catalog") { t =>
+    withNamespaceAndTable("ns", "tbl", s"non_part_$catalog") { t =>
       sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing")
       val errMsg = intercept[AnalysisException] {
         sql(s"ALTER TABLE $t ADD PARTITION (id=1)")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala
index 9dc1cad5a002d..ffbfe3f695935 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala
@@ -17,28 +17,17 @@
 
 package org.apache.spark.sql.execution.command.v2
 
-import org.apache.spark.SparkConf
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.connector.{InMemoryPartitionTableCatalog, InMemoryTableCatalog}
 import org.apache.spark.sql.execution.command
-import org.apache.spark.sql.test.SharedSparkSession
 
 class AlterTableDropPartitionSuite
   extends command.AlterTableDropPartitionSuiteBase
-  with SharedSparkSession {
-
-  override def version: String = "V2"
-  override def catalog: String = "test_catalog"
-  override def defaultUsing: String = "USING _"
+  with CommandSuiteBase {
 
   override protected val notFullPartitionSpecErr = "Partition spec is invalid"
 
-  override def sparkConf: SparkConf = super.sparkConf
-    .set(s"spark.sql.catalog.$catalog", classOf[InMemoryPartitionTableCatalog].getName)
-    .set(s"spark.sql.catalog.non_part_$catalog", classOf[InMemoryTableCatalog].getName)
-
   test("SPARK-33650: drop partition into a table which doesn't support partition management") {
-    withNsTable("ns", "tbl", s"non_part_$catalog") { t =>
+    withNamespaceAndTable("ns", "tbl", s"non_part_$catalog") { t =>
       sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing")
       val errMsg = intercept[AnalysisException] {
         sql(s"ALTER TABLE $t DROP PARTITION (id=1)")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala
new file mode 100644
index 0000000000000..b1f6a5b318a32
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.connector.{InMemoryPartitionTableCatalog, InMemoryTableCatalog}
+import org.apache.spark.sql.test.SharedSparkSession
+
+trait CommandSuiteBase extends SharedSparkSession {
+  def version: String = "V2"
+  def catalog: String = "test_catalog"
+  def defaultUsing: String = "USING _"
+
+  override def sparkConf: SparkConf = super.sparkConf
+    .set(s"spark.sql.catalog.$catalog", classOf[InMemoryPartitionTableCatalog].getName)
+    .set(s"spark.sql.catalog.non_part_$catalog", classOf[InMemoryTableCatalog].getName)
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
index 55985a335c94b..e52c60d0f9a95 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
@@ -17,21 +17,10 @@
 
 package org.apache.spark.sql.execution.command.v2
 
-import org.apache.spark.SparkConf
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.connector.{InMemoryPartitionTableCatalog, InMemoryTableCatalog}
 import org.apache.spark.sql.execution.command
-import org.apache.spark.sql.test.SharedSparkSession
-
-class ShowPartitionsSuite extends command.ShowPartitionsSuiteBase with SharedSparkSession {
-  override def version: String = "V2"
-  override def catalog: String = "test_catalog"
-  override def defaultUsing: String = "USING _"
-
-  override def sparkConf: SparkConf = super.sparkConf
-    .set(s"spark.sql.catalog.$catalog", classOf[InMemoryPartitionTableCatalog].getName)
-    .set(s"spark.sql.catalog.non_part_$catalog", classOf[InMemoryTableCatalog].getName)
 
+class ShowPartitionsSuite extends command.ShowPartitionsSuiteBase with CommandSuiteBase {
   test("a table does not support partitioning") {
     val table = s"non_part_$catalog.tab1"
     withTable(table) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala
index 370c8358e64da..cef5eac703ee7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala
@@ -17,18 +17,12 @@
 
 package org.apache.spark.sql.execution.command.v2
 
-import org.apache.spark.SparkConf
 import org.apache.spark.sql.{AnalysisException, Row}
-import org.apache.spark.sql.connector.InMemoryTableCatalog
 import org.apache.spark.sql.execution.command
-import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{StringType, StructType}
 
-class ShowTablesSuite extends command.ShowTablesSuiteBase with SharedSparkSession {
-  override def version: String = "V2"
-  override def catalog: String = "test_catalog"
+class ShowTablesSuite extends command.ShowTablesSuiteBase with CommandSuiteBase {
   override def defaultNamespace: Seq[String] = Nil
-  override def defaultUsing: String = "USING _"
   override def showSchema: StructType = {
     new StructType()
       .add("namespace", StringType, nullable = false)
@@ -40,9 +34,6 @@ class ShowTablesSuite extends command.ShowTablesSuiteBase with SharedSparkSessio
     }
   }
 
-  override def sparkConf: SparkConf = super.sparkConf
-    .set(s"spark.sql.catalog.$catalog", classOf[InMemoryTableCatalog].getName)
-
   // The test fails for V1 catalog with the error:
   // org.apache.spark.sql.AnalysisException:
   //   The namespace in session catalog must have exactly one name part: spark_catalog.n1.n2.db
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala
index 73776c3ef79fa..2a996c3f4690c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala
@@ -18,11 +18,7 @@
 package org.apache.spark.sql.hive.execution.command
 
 import org.apache.spark.sql.execution.command.v1
-import org.apache.spark.sql.hive.test.TestHiveSingleton
 
 class AlterTableAddPartitionSuite
     extends v1.AlterTableAddPartitionSuiteBase
-    with TestHiveSingleton {
-  override def version: String = "Hive V1"
-  override def defaultUsing: String = "USING HIVE"
-}
+    with CommandSuiteBase
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala
index 9c7d76a0caa08..a4f9ab0b0433c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala
@@ -18,12 +18,7 @@
 package org.apache.spark.sql.hive.execution.command
 
 import org.apache.spark.sql.execution.command.v1
-import org.apache.spark.sql.hive.test.TestHiveSingleton
 
 class AlterTableDropPartitionSuite
   extends v1.AlterTableDropPartitionSuiteBase
-  with TestHiveSingleton {
-
-  override def version: String = "Hive V1"
-  override def defaultUsing: String = "USING HIVE"
-}
+  with CommandSuiteBase
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala
new file mode 100644
index 0000000000000..3f603fd6c7ddf
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution.command
+
+import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+
+trait CommandSuiteBase extends TestHiveSingleton {
+  def version: String = "Hive V1"
+  def catalog: String = CatalogManager.SESSION_CATALOG_NAME
+  def defaultUsing: String = "USING HIVE"
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala
index a92478faf0e16..fa8ac4ccaa089 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala
@@ -18,9 +18,5 @@
 package org.apache.spark.sql.hive.execution.command
 
 import org.apache.spark.sql.execution.command.v1
-import org.apache.spark.sql.hive.test.TestHiveSingleton
 
-class ShowPartitionsSuite extends v1.ShowPartitionsSuiteBase with TestHiveSingleton {
-  override def version: String = "Hive V1"
-  override def defaultUsing: String = "USING HIVE"
-}
+class ShowPartitionsSuite extends v1.ShowPartitionsSuiteBase with CommandSuiteBase
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala
index 836f080d28e75..8c00b3fe7f7ca 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala
@@ -18,9 +18,5 @@
 package org.apache.spark.sql.hive.execution.command
 
 import org.apache.spark.sql.execution.command.v1
-import org.apache.spark.sql.hive.test.TestHiveSingleton
 
-class ShowTablesSuite extends v1.ShowTablesSuiteBase with TestHiveSingleton {
-  override def version: String = "Hive V1"
-  override def defaultUsing: String = "USING HIVE"
-}
+class ShowTablesSuite extends v1.ShowTablesSuiteBase with CommandSuiteBase

From 205d8e40bc8446c5953c9a082ffaede3029d1d53 Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Wed, 16 Dec 2020 14:36:38 +0000
Subject: [PATCH 0785/1009] [SPARK-32991][SQL] [FOLLOWUP] Reset command relies
 on session initials first

### What changes were proposed in this pull request?

As a follow-up of https://github.com/apache/spark/pull/30045, we modify the RESET command here to respect the session initial configs per session first then fall back to the `SharedState` conf, which makes each session could maintain a different copy of initial configs for resetting.

### Why are the changes needed?

to make reset command saner.
### Does this PR introduce _any_ user-facing change?

yes, RESET will respect session initials first not always go to the system defaults

### How was this patch tested?

add new tests

Closes #30642 from yaooqinn/SPARK-32991-F.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/internal/SQLConf.scala   | 25 ++++++-
 .../org/apache/spark/sql/SparkSession.scala   |  2 +-
 .../sql/execution/command/SetCommand.scala    | 12 ++--
 .../internal/BaseSessionStateBuilder.scala    | 24 ++-----
 .../spark/sql/internal/SharedState.scala      |  7 +-
 .../spark/sql/SparkSessionBuilderSuite.scala  | 71 ++++++++++++++++---
 6 files changed, 104 insertions(+), 37 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index fd6a30ac6a81c..3f0fd70a6eae6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -29,7 +29,7 @@ import scala.util.matching.Regex
 
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.{SparkContext, TaskContext}
+import org.apache.spark.{SparkConf, SparkContext, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.{IGNORE_MISSING_FILES => SPARK_IGNORE_MISSING_FILES}
@@ -77,6 +77,29 @@ object SQLConf {
     }
   }
 
+  /**
+   * Merge all non-static configs to the SQLConf. For example, when the 1st [[SparkSession]] and
+   * the global [[SharedState]] have been initialized, all static configs have taken affect and
+   * should not be set to other values. Other later created sessions should respect all static
+   * configs and only be able to change non-static configs.
+   */
+  private[sql] def mergeNonStaticSQLConfigs(
+      sqlConf: SQLConf,
+      configs: Map[String, String]): Unit = {
+    for ((k, v) <- configs if !staticConfKeys.contains(k)) {
+      sqlConf.setConfString(k, v)
+    }
+  }
+
+  /**
+   * Extract entries from `SparkConf` and put them in the `SQLConf`
+   */
+  private[sql] def mergeSparkConf(sqlConf: SQLConf, sparkConf: SparkConf): Unit = {
+    sparkConf.getAll.foreach { case (k, v) =>
+      sqlConf.setConfString(k, v)
+    }
+  }
+
   /**
    * Default config. Only used when there is no active SparkSession for the thread.
    * See [[get]] for more information.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 20a2649322ae0..0fada5500edde 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -83,7 +83,7 @@ class SparkSession private(
     @transient private val existingSharedState: Option[SharedState],
     @transient private val parentSessionState: Option[SessionState],
     @transient private[sql] val extensions: SparkSessionExtensions,
-    @transient private val initialSessionOptions: Map[String, String])
+    @transient private[sql] val initialSessionOptions: Map[String, String])
   extends Serializable with Closeable with Logging { self =>
 
   // The call site where this SparkSession was constructed.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
index 00accedf21556..7d92e6e189fb2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
@@ -172,16 +172,18 @@ object SetCommand {
 case class ResetCommand(config: Option[String]) extends RunnableCommand with IgnoreCachedData {
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
-    val defaults = sparkSession.sharedState.conf
+    val globalInitialConfigs = sparkSession.sharedState.conf
     config match {
       case Some(key) =>
         sparkSession.conf.unset(key)
-        defaults.getOption(key).foreach(sparkSession.conf.set(key, _))
+        sparkSession.initialSessionOptions.get(key)
+          .orElse(globalInitialConfigs.getOption(key))
+          .foreach(sparkSession.conf.set(key, _))
       case None =>
         sparkSession.sessionState.conf.clear()
-        defaults.getAll.foreach { case (k, v) =>
-          sparkSession.sessionState.conf.setConfString(k, v)
-        }
+        SQLConf.mergeSparkConf(sparkSession.sessionState.conf, globalInitialConfigs)
+        SQLConf.mergeNonStaticSQLConfigs(sparkSession.sessionState.conf,
+          sparkSession.initialSessionOptions)
     }
     Seq.empty[Row]
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index f51ee11091d02..8fb351a2a3b2b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -16,7 +16,6 @@
  */
 package org.apache.spark.sql.internal
 
-import org.apache.spark.SparkConf
 import org.apache.spark.annotation.Unstable
 import org.apache.spark.sql.{ExperimentalMethods, SparkSession, UDFRegistration, _}
 import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry, ResolveSessionCatalog}
@@ -73,15 +72,6 @@ abstract class BaseSessionStateBuilder(
    */
   protected def extensions: SparkSessionExtensions = session.extensions
 
-  /**
-   * Extract entries from `SparkConf` and put them in the `SQLConf`
-   */
-  protected def mergeSparkConf(sqlConf: SQLConf, sparkConf: SparkConf): Unit = {
-    sparkConf.getAll.foreach { case (k, v) =>
-      sqlConf.setConfString(k, v)
-    }
-  }
-
   /**
    * SQL-specific key-value configurations.
    *
@@ -92,15 +82,15 @@ abstract class BaseSessionStateBuilder(
     parentState.map { s =>
       val cloned = s.conf.clone()
       if (session.sparkContext.conf.get(StaticSQLConf.SQL_LEGACY_SESSION_INIT_WITH_DEFAULTS)) {
-        mergeSparkConf(cloned, session.sparkContext.conf)
+        SQLConf.mergeSparkConf(cloned, session.sparkContext.conf)
       }
       cloned
     }.getOrElse {
       val conf = new SQLConf
-      mergeSparkConf(conf, session.sparkContext.conf)
-      options.foreach {
-        case (k, v) => conf.setConfString(k, v)
-      }
+      SQLConf.mergeSparkConf(conf, session.sharedState.conf)
+      // the later added configs to spark conf shall be respected too
+      SQLConf.mergeNonStaticSQLConfigs(conf, session.sparkContext.conf.getAll.toMap)
+      SQLConf.mergeNonStaticSQLConfigs(conf, session.initialSessionOptions)
       conf
     }
   }
@@ -374,7 +364,7 @@ private[sql] trait WithTestConf { self: BaseSessionStateBuilder =>
     parentState.map { s =>
       val cloned = s.conf.clone()
       if (session.sparkContext.conf.get(StaticSQLConf.SQL_LEGACY_SESSION_INIT_WITH_DEFAULTS)) {
-        mergeSparkConf(conf, session.sparkContext.conf)
+        SQLConf.mergeSparkConf(conf, session.sparkContext.conf)
       }
       cloned
     }.getOrElse {
@@ -386,7 +376,7 @@ private[sql] trait WithTestConf { self: BaseSessionStateBuilder =>
           overrideConfigurations.foreach { case (key, value) => setConfString(key, value) }
         }
       }
-      mergeSparkConf(conf, session.sparkContext.conf)
+      SQLConf.mergeSparkConf(conf, session.sparkContext.conf)
       conf
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index fd34077aba963..6018afb0dce46 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -67,11 +67,12 @@ private[sql] class SharedState(
       case (k, _)  if k == "hive.metastore.warehouse.dir" || k == WAREHOUSE_PATH.key =>
         logWarning(s"Not allowing to set ${WAREHOUSE_PATH.key} or hive.metastore.warehouse.dir " +
           s"in SparkSession's options, it should be set statically for cross-session usages")
-      case (k, v) =>
-        logDebug(s"Applying initial SparkSession options to SparkConf/HadoopConf: $k -> $v")
+      case (k, v) if SQLConf.staticConfKeys.contains(k) =>
+        logDebug(s"Applying static initial session options to SparkConf: $k -> $v")
         confClone.set(k, v)
+      case (k, v) =>
+        logDebug(s"Applying other initial session options to HadoopConf: $k -> $v")
         hadoopConfClone.set(k, v)
-
     }
     (confClone, hadoopConfClone)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
index 1fbce512f976d..e53976854070d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
@@ -24,6 +24,7 @@ import org.apache.spark.internal.config.EXECUTOR_ALLOW_SPARK_CONTEXT
 import org.apache.spark.internal.config.UI.UI_ENABLED
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf._
+import org.apache.spark.util.ThreadUtils
 
 /**
  * Test cases for the builder pattern of [[SparkSession]].
@@ -305,21 +306,18 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach {
     // newly specified values
     val sharedWH = spark.sharedState.conf.get(wh)
     val sharedTD = spark.sharedState.conf.get(td)
-    val sharedCustom = spark.sharedState.conf.get(custom)
     assert(sharedWH === "./data2",
       "The warehouse dir in shared state should be determined by the 1st created spark session")
     assert(sharedTD === "alice",
       "Static sql configs in shared state should be determined by the 1st created spark session")
-    assert(sharedCustom === "kyao",
-      "Dynamic sql configs in shared state should be determined by the 1st created spark session")
+    assert(spark.sharedState.conf.getOption(custom).isEmpty,
+      "Dynamic sql configs is session specific")
 
     assert(spark.conf.get(wh) === sharedWH,
       "The warehouse dir in session conf and shared state conf should be consistent")
     assert(spark.conf.get(td) === sharedTD,
       "Static sql configs in session conf and shared state conf should be consistent")
-    assert(spark.conf.get(custom) === sharedCustom,
-      "Dynamic sql configs in session conf and shared state conf should be consistent before" +
-        " setting to new ones")
+    assert(spark.conf.get(custom) === "kyao", "Dynamic sql configs is session specific")
 
     spark.sql("RESET")
 
@@ -327,12 +325,65 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach {
       "The warehouse dir in shared state should be respect after RESET")
     assert(spark.conf.get(td) === sharedTD,
       "Static sql configs in shared state should be respect after RESET")
-    assert(spark.conf.get(custom) === sharedCustom,
-      "Dynamic sql configs in shared state should be respect after RESET")
+    assert(spark.conf.get(custom) === "kyao",
+      "Dynamic sql configs in session initial map should be respect after RESET")
 
-    val spark2 = SparkSession.builder().getOrCreate()
+    val spark2 = SparkSession.builder()
+      .config(wh, "./data3")
+      .config(custom, "kyaoo").getOrCreate()
     assert(spark2.conf.get(wh) === sharedWH)
     assert(spark2.conf.get(td) === sharedTD)
-    assert(spark2.conf.get(custom) === sharedCustom)
+    assert(spark2.conf.get(custom) === "kyaoo")
+  }
+
+  test("SPARK-32991: RESET should work properly with multi threads") {
+    val wh = "spark.sql.warehouse.dir"
+    val td = "spark.sql.globalTempDatabase"
+    val custom = "spark.sql.custom"
+    val spark = ThreadUtils.runInNewThread("new session 0", false) {
+      SparkSession.builder()
+        .master("local")
+        .config(wh, "./data0")
+        .config(td, "bob")
+        .config(custom, "c0")
+        .getOrCreate()
+    }
+
+    spark.sql(s"SET $custom=c1")
+    assert(spark.conf.get(custom) === "c1")
+    spark.sql("RESET")
+    assert(spark.conf.get(wh) === "./data0",
+      "The warehouse dir in shared state should be respect after RESET")
+    assert(spark.conf.get(td) === "bob",
+      "Static sql configs in shared state should be respect after RESET")
+    assert(spark.conf.get(custom) === "c0",
+      "Dynamic sql configs in shared state should be respect after RESET")
+
+    val spark1 = ThreadUtils.runInNewThread("new session 1", false) {
+      SparkSession.builder().getOrCreate()
+    }
+
+    assert(spark === spark1)
+
+    // TODO: SPARK-33718: After clear sessions, the SharedState will be unreachable, then all
+    // the new static will take effect.
+    SparkSession.clearDefaultSession()
+    val spark2 = ThreadUtils.runInNewThread("new session 2", false) {
+      SparkSession.builder()
+        .master("local")
+        .config(wh, "./data1")
+        .config(td, "alice")
+        .config(custom, "c2")
+        .getOrCreate()
+    }
+
+    assert(spark2 !== spark)
+    spark2.sql(s"SET $custom=c1")
+    assert(spark2.conf.get(custom) === "c1")
+    spark2.sql("RESET")
+    assert(spark2.conf.get(wh) === "./data1")
+    assert(spark2.conf.get(td) === "alice")
+    assert(spark2.conf.get(custom) === "c2")
+
   }
 }

From ddda32b156e4c2e2ba1d1ed37cf34fb2f26d769e Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Wed, 16 Dec 2020 23:42:34 +0900
Subject: [PATCH 0786/1009] [SPARK-33802][INFRA][FOLLOW-UP] Separate arguments
 properly for -c option in git command for PySpark coverage

### What changes were proposed in this pull request?

This PR proposes to separate arguments properly for `-c` options. Otherwise, the space is considered as its part of argument:

```
Cloning into 'pyspark-coverage-site'...
unknown option: -c user.name='Apache Spark Test Account'
usage: git [--version] [--help] [-C <path>] [-c <name>=<value>]
           [--exec-path[=<path>]] [--html-path] [--man-path] [--info-path]
           [-p | --paginate | -P | --no-pager] [--no-replace-objects] [--bare]
           [--git-dir=<path>] [--work-tree=<path>] [--namespace=<name>]
           <command> [<args>]
[error] running git -c user.name='Apache Spark Test Account' -c user.email='sparktestaccgmail.com' commit -am Coverage report at latest commit in Apache Spark ; received return code 129
```

### Why are the changes needed?

To make the build pass (https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-sbt-hadoop-3.2/1728/console).

### Does this PR introduce _any_ user-facing change?

No, dev-only.

### How was this patch tested?

```python
>>> from sparktestsupport.shellutils import run_cmd
>>> run_cmd([
...             "git",
...             "-c",
...             "user.name='Apache Spark Test Account'",
...             "-c",
...             "user.email='sparktestaccgmail.com'",
...             "commit",
...             "-am",
...             "Coverage report at latest commit in Apache Spark"])
[SPARK-33802-followup 80d2565a511] Coverage report at latest commit in Apache Spark
 1 file changed, 1 insertion(+), 1 deletion(-)
CompletedProcess(args=['git', '-c', "user.name='Apache Spark Test Account'", '-c', "user.email='sparktestaccgmail.com'", 'commit', '-am', 'Coverage report at latest commit in Apache Spark'], returncode=0)
```

I cannot run e2e test because it requires the env to have Jenkins secret.

Closes #30804 from HyukjinKwon/SPARK-33802-followup.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 dev/run-tests.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/dev/run-tests.py b/dev/run-tests.py
index e271b4dec6c74..d9d1ac85d5cd9 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -521,8 +521,10 @@ def post_python_tests_results():
         # 6. Commit current HTMLs.
         run_cmd([
             "git",
-            "-c user.name='Apache Spark Test Account'",
-            "-c user.email='sparktestacc@gmail.com'",
+            "-c",
+            "user.name='Apache Spark Test Account'",
+            "-c",
+            "user.email='sparktestacc@gmail.com'",
             "commit",
             "-am",
             "Coverage report at latest commit in Apache Spark"])

From 8666d1c39cb6d49e4aa3cd0b9342b82405541aed Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Wed, 16 Dec 2020 15:56:50 +0000
Subject: [PATCH 0787/1009] [SPARK-33800][SQL] Remove command name in
 AnalysisException message when a relation is not resolved

### What changes were proposed in this pull request?

Based on the discussion https://github.com/apache/spark/pull/30743#discussion_r543124594, this PR proposes to remove the command name in AnalysisException message when a relation is not resolved.

For some of the commands that use `UnresolvedTable`, `UnresolvedView`, and `UnresolvedTableOrView` to resolve an identifier, when the identifier cannot be resolved, the exception will be something like `Table or view not found for 'SHOW TBLPROPERTIES': badtable`. The command name (`SHOW TBLPROPERTIES` in this case) should be dropped to be consistent with other existing commands.

### Why are the changes needed?

To make the exception message consistent.

### Does this PR introduce _any_ user-facing change?

Yes, the exception message will be changed from
```
Table or view not found for 'SHOW TBLPROPERTIES': badtable
```
to
```
Table or view not found: badtable
```
for commands that use `UnresolvedTable`, `UnresolvedView`, and `UnresolvedTableOrView` to resolve an identifier.

### How was this patch tested?

Updated existing tests.

Closes #30794 from imback82/remove_cmd_from_exception_msg.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/analysis/CheckAnalysis.scala       | 6 +++---
 .../test/resources/sql-tests/results/show_columns.sql.out | 8 ++++----
 .../org/apache/spark/sql/StatisticsCollectionSuite.scala  | 2 +-
 .../apache/spark/sql/connector/DataSourceV2SQLSuite.scala | 4 ++--
 .../org/apache/spark/sql/execution/SQLViewSuite.scala     | 4 ++--
 .../datasources/v2/jdbc/JDBCTableCatalogSuite.scala       | 8 ++++----
 .../spark/sql/hive/execution/HiveCommandSuite.scala       | 2 +-
 7 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 30467685d75a9..c5a63546c01e3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -102,7 +102,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
         u.failAnalysis(s"Namespace not found: ${u.multipartIdentifier.quoted}")
 
       case u: UnresolvedTable =>
-        u.failAnalysis(s"Table not found for '${u.commandName}': ${u.multipartIdentifier.quoted}")
+        u.failAnalysis(s"Table not found: ${u.multipartIdentifier.quoted}")
 
       case u @ UnresolvedView(NonSessionCatalogAndIdentifier(catalog, ident), cmd, _, _) =>
         u.failAnalysis(
@@ -111,12 +111,12 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
             s"$cmd expects a view.")
 
       case u: UnresolvedView =>
-        u.failAnalysis(s"View not found for '${u.commandName}': ${u.multipartIdentifier.quoted}")
+        u.failAnalysis(s"View not found: ${u.multipartIdentifier.quoted}")
 
       case u: UnresolvedTableOrView =>
         val viewStr = if (u.allowTempView) "view" else "permanent view"
         u.failAnalysis(
-          s"Table or $viewStr not found for '${u.commandName}': ${u.multipartIdentifier.quoted}")
+          s"Table or $viewStr not found: ${u.multipartIdentifier.quoted}")
 
       case u: UnresolvedRelation =>
         u.failAnalysis(s"Table or view not found: ${u.multipartIdentifier.quoted}")
diff --git a/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out b/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out
index 851e848ed4ec6..3535b30d29c44 100644
--- a/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out
@@ -94,7 +94,7 @@ SHOW COLUMNS IN badtable FROM showdb
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table or view not found for 'SHOW COLUMNS': showdb.badtable; line 1 pos 0
+Table or view not found: showdb.badtable; line 1 pos 0
 
 
 -- !query
@@ -130,7 +130,7 @@ SHOW COLUMNS IN showdb.showcolumn3
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table or view not found for 'SHOW COLUMNS': showdb.showcolumn3; line 1 pos 0
+Table or view not found: showdb.showcolumn3; line 1 pos 0
 
 
 -- !query
@@ -139,7 +139,7 @@ SHOW COLUMNS IN showcolumn3 FROM showdb
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table or view not found for 'SHOW COLUMNS': showdb.showcolumn3; line 1 pos 0
+Table or view not found: showdb.showcolumn3; line 1 pos 0
 
 
 -- !query
@@ -148,7 +148,7 @@ SHOW COLUMNS IN showcolumn4
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table or view not found for 'SHOW COLUMNS': showcolumn4; line 1 pos 0
+Table or view not found: showcolumn4; line 1 pos 0
 
 
 -- !query
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index 3fc679f6b9fc7..3b53a5324445b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -542,7 +542,7 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
       val errMsg1 = intercept[AnalysisException] {
         sql(s"ANALYZE TABLE $globalTempDB.gTempView COMPUTE STATISTICS FOR COLUMNS id")
       }.getMessage
-      assert(errMsg1.contains("Table or view not found for 'ANALYZE TABLE ... FOR COLUMNS ...': " +
+      assert(errMsg1.contains("Table or view not found: " +
         s"$globalTempDB.gTempView"))
       // Analyzes in a global temporary view
       sql("CREATE GLOBAL TEMP VIEW gTempView AS SELECT * FROM range(1, 30)")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 638f06d618833..b49a692d26173 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -731,7 +731,7 @@ class DataSourceV2SQLSuite
     val ex = intercept[AnalysisException] {
       sql("DROP TABLE testcat.db.notbl")
     }
-    assert(ex.getMessage.contains("Table or view not found for 'DROP TABLE': testcat.db.notbl"))
+    assert(ex.getMessage.contains("Table or view not found: testcat.db.notbl"))
     sql("DROP TABLE IF EXISTS testcat.db.notbl")
   }
 
@@ -2015,7 +2015,7 @@ class DataSourceV2SQLSuite
       sql(s"ALTER VIEW testcat.ns.tbl RENAME TO ns.view")
     }
     assert(e.getMessage.contains(
-      "Table or view not found for 'ALTER VIEW ... RENAME TO': testcat.ns.tbl"))
+      "Table or view not found: testcat.ns.tbl"))
   }
 
   test("ANALYZE TABLE") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index 1a248fc18988a..586b31643049f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -452,11 +452,11 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
   test("should not allow ALTER VIEW AS when the view does not exist") {
     assertAnalysisError(
       "ALTER VIEW testView AS SELECT 1, 2",
-      "View not found for 'ALTER VIEW ... AS': testView")
+      "View not found: testView")
 
     assertAnalysisError(
       "ALTER VIEW default.testView AS SELECT 1, 2",
-      "View not found for 'ALTER VIEW ... AS': default.testView")
+      "View not found: default.testView")
   }
 
   test("ALTER VIEW AS should try to alter temp view first if view name has no database part") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
index e764f71867426..2fd976e0b9e17 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
@@ -81,9 +81,9 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
     checkAnswer(sql("SHOW TABLES IN h2.test"), Seq(Row("test", "people")))
     Seq(
       "h2.test.not_existing_table" ->
-        "Table or view not found for 'DROP TABLE': h2.test.not_existing_table",
+        "Table or view not found: h2.test.not_existing_table",
       "h2.bad_test.not_existing_table" ->
-        "Table or view not found for 'DROP TABLE': h2.bad_test.not_existing_table"
+        "Table or view not found: h2.bad_test.not_existing_table"
     ).foreach { case (table, expectedMsg) =>
       val msg = intercept[AnalysisException] {
         sql(s"DROP TABLE $table")
@@ -110,12 +110,12 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       sql("ALTER TABLE h2.test.not_existing_table RENAME TO test.dst_table")
     }
     assert(exp1.getMessage.contains(
-      "Table or view not found for 'ALTER TABLE ... RENAME TO': h2.test.not_existing_table"))
+      "Table or view not found: h2.test.not_existing_table"))
     val exp2 = intercept[AnalysisException] {
       sql("ALTER TABLE h2.bad_test.not_existing_table RENAME TO test.dst_table")
     }
     assert(exp2.getMessage.contains(
-      "Table or view not found for 'ALTER TABLE ... RENAME TO': h2.bad_test.not_existing_table"))
+      "Table or view not found: h2.bad_test.not_existing_table"))
     // Rename to an existing table
     withTable("h2.test.dst_table") {
       withConnection { conn =>
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
index 4feb970ea6f1a..d3398842afb21 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
@@ -137,7 +137,7 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
     val message = intercept[AnalysisException] {
       sql("SHOW TBLPROPERTIES badtable")
     }.getMessage
-    assert(message.contains("Table or view not found for 'SHOW TBLPROPERTIES': badtable"))
+    assert(message.contains("Table or view not found: badtable"))
 
     // When key is not found, a row containing the error is returned.
     checkAnswer(

From 3d0323401f7a3e4369a3d3f4ff98f15d19e8a643 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Wed, 16 Dec 2020 08:34:22 -0800
Subject: [PATCH 0788/1009] [SPARK-33810][TESTS] Reenable test cases disabled
 in SPARK-31732

### What changes were proposed in this pull request?

The test failures were due to machine being slow in Jenkins. We switched to Ubuntu 20 if I am not wrong.
Looks like all machines are functioning properly unlike the past, and the tests pass without a problem anymore.

This PR proposes to enable them back.

### Why are the changes needed?

To restore test coverage.

### Does this PR introduce _any_ user-facing change?

No, dev-only.

### How was this patch tested?

Jenkins jobs in this PR show the flakiness.

Closes #30798 from HyukjinKwon/do-not-merge-test.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala     | 3 +--
 .../org/apache/spark/sql/kafka010/KafkaRelationSuite.scala  | 3 +--
 .../spark/streaming/kafka010/DirectKafkaStreamSuite.scala   | 6 ++----
 .../org/apache/spark/streaming/StreamingContextSuite.scala  | 3 +--
 4 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
index f2be8475151e3..62ba459070c2b 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
@@ -349,8 +349,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
     )
   }
 
-  // TODO (SPARK-31731): re-enable it
-  ignore("subscribing topic by pattern with topic deletions") {
+  test("subscribing topic by pattern with topic deletions") {
     val topicPrefix = newTopic()
     val topic = topicPrefix + "-seems"
     val topic2 = topicPrefix + "-bad"
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
index 6e9d8de9fa5be..9cec37e708dbb 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
@@ -174,8 +174,7 @@ abstract class KafkaRelationSuiteBase extends QueryTest with SharedSparkSession
       ("3", Seq(("e", "f".getBytes(UTF_8)), ("e", "g".getBytes(UTF_8))))).toDF)
   }
 
-  // TODO (SPARK-31729): re-enable it
-  ignore("timestamp provided for starting and ending") {
+  test("timestamp provided for starting and ending") {
     val (topic, timestamps) = prepareTimestampRelatedUnitTest
 
     // timestamp both presented: starting "first" ending "finalized"
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala
index 72cf3e8118228..2b7fef1e0fde3 100644
--- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala
+++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala
@@ -332,8 +332,7 @@ class DirectKafkaStreamSuite
   }
 
   // Test to verify the offset ranges can be recovered from the checkpoints
-  // TODO (SPARK-31722): re-enable it
-  ignore("offset recovery") {
+  test("offset recovery") {
     val topic = "recovery"
     kafkaTestUtils.createTopic(topic)
     testDir = Utils.createTempDir()
@@ -420,8 +419,7 @@ class DirectKafkaStreamSuite
   }
 
   // Test to verify the offsets can be recovered from Kafka
-  // TODO (SPARK-31722): re-enable it
-  ignore("offset recovery from kafka") {
+  test("offset recovery from kafka") {
     val topic = "recoveryfromkafka"
     kafkaTestUtils.createTopic(topic)
 
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index 4eff464dcdafb..1d6637861511f 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -293,8 +293,7 @@ class StreamingContextSuite
     }
   }
 
-  // TODO (SPARK-31728): re-enable it
-  ignore("stop gracefully") {
+  test("stop gracefully") {
     val conf = new SparkConf().setMaster(master).setAppName(appName)
     conf.set("spark.dummyTimeConfig", "3600s")
     val sc = new SparkContext(conf)

From 728a1298afa78c6acd7cdc4c21ee441120c34716 Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Wed, 16 Dec 2020 14:09:28 -0800
Subject: [PATCH 0789/1009] [SPARK-33806][SQL] limit partition num to 1 when
 distributing by foldable expressions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

It seems a very popular way that people use DISTRIBUTE BY clause with a literal to coalesce partition in the pure SQL data processing.

For example
```
insert into table src select * from values (1), (2), (3) t(a) distribute by 1
```

Users may want the final output to be one single data file, but if the reality is not always true. Spark will always create a file for partition 0 whether it contains data or not, so when the data all goes to a partition(IDX >0), there will be always 2 files there and the part-00000 is empty. On the other hand, a lot of empty tasks will be launched too, this is unnecessary.

When users repeat the insert statement daily, hourly, or minutely, it causes small file issues.

```
spark-sql> set spark.sql.shuffle.partitions=3;drop table if exists test2;create table test2 using parquet as select * from values (1), (2), (3) t(a) distribute by 1;

 kentyaohulk  ~/spark   SPARK-33806  tree /Users/kentyao/Downloads/spark/spark-3.1.0-SNAPSHOT-bin-20201202/spark-warehouse/test2/ -s
/Users/kentyao/Downloads/spark/spark-3.1.0-SNAPSHOT-bin-20201202/spark-warehouse/test2/
├── [          0]  _SUCCESS
├── [        298]  part-00000-5dc19733-9405-414b-9681-d25c4d3e9ee6-c000.snappy.parquet
└── [        426]  part-00001-5dc19733-9405-414b-9681-d25c4d3e9ee6-c000.snappy.parquet
```

To avoid this, there are some options you can take.

1. use `distribute by null`, let the data go to the partition 0
2. set spark.sql.adaptive.enabled to true for Spark to automatically coalesce
3. using hints instead of `distribute by`
4. set spark.sql.shuffle.partitions to 1

In this PR, we set the partition number to 1 in this particular case.

### Why are the changes needed?

1. avoid small file issues
2. avoid unnecessary empty tasks when no adaptive execution

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

new test

Closes #30800 from yaooqinn/SPARK-33806.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../plans/logical/basicLogicalOperators.scala     | 11 ++++++++++-
 .../org/apache/spark/sql/SQLQuerySuite.scala      | 15 ++++++++++++++-
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 91fb77574a0ca..8c111aa750809 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -1017,7 +1017,16 @@ case class RepartitionByExpression(
     child: LogicalPlan,
     optNumPartitions: Option[Int]) extends RepartitionOperation {
 
-  val numPartitions = optNumPartitions.getOrElse(SQLConf.get.numShufflePartitions)
+  val numPartitions = if (optNumPartitions.nonEmpty) {
+    optNumPartitions.get
+  } else {
+    if (partitionExpressions.forall(_.foldable)) {
+      1
+    } else {
+      SQLConf.get.numShufflePartitions
+    }
+  }
+
   require(numPartitions > 0, s"Number of partitions ($numPartitions) must be positive.")
 
   val partitioning: Partitioning = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index ebfe8bdd7a749..112b1a7210cb4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
 import org.apache.spark.sql.catalyst.expressions.GenericRow
 import org.apache.spark.sql.catalyst.expressions.aggregate.{Complete, Partial}
 import org.apache.spark.sql.catalyst.optimizer.{ConvertToLocalRelation, NestedColumnAliasingSuite}
-import org.apache.spark.sql.catalyst.plans.logical.Project
+import org.apache.spark.sql.catalyst.plans.logical.{Project, RepartitionByExpression}
 import org.apache.spark.sql.catalyst.util.StringUtils
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec}
@@ -3732,6 +3732,19 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       checkAnswer(sql("SELECT s LIKE 'm@@ca' ESCAPE '@' FROM df"), Row(true))
     }
   }
+
+  test("limit partition num to 1 when distributing by foldable expressions") {
+    withSQLConf((SQLConf.SHUFFLE_PARTITIONS.key, "5")) {
+      Seq(1, "1, 2", null, "version()").foreach { expr =>
+        val plan = sql(s"select * from values (1), (2), (3) t(a) distribute by $expr")
+          .queryExecution.optimizedPlan
+        val res = plan.collect {
+          case r: RepartitionByExpression if r.numPartitions == 1 => true
+        }
+        assert(res.nonEmpty)
+      }
+    }
+  }
 }
 
 case class Foo(bar: Option[String])

From e7e29fd0affe81a24959ecc0286ec4c85f319722 Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Wed, 16 Dec 2020 14:13:02 -0800
Subject: [PATCH 0790/1009] [SPARK-33514][SQL][FOLLOW-UP] Remove unused
 TruncateTableStatement case class

### What changes were proposed in this pull request?

This PR removes unused `TruncateTableStatement`: https://github.com/apache/spark/pull/30457#discussion_r544433820

### Why are the changes needed?

To remove unused `TruncateTableStatement` from #30457.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Not needed.

Closes #30811 from imback82/remove_truncate_table_stmt.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/catalyst/plans/logical/statements.scala      | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
index c8395f375b4ed..58776f549d817 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
@@ -354,13 +354,6 @@ case class CreateNamespaceStatement(
  */
 case class UseStatement(isNamespaceSet: Boolean, nameParts: Seq[String]) extends ParsedStatement
 
-/**
- * A TRUNCATE TABLE statement, as parsed from SQL
- */
-case class TruncateTableStatement(
-    tableName: Seq[String],
-    partitionSpec: Option[TablePartitionSpec]) extends ParsedStatement
-
 /**
  * A SHOW CURRENT NAMESPACE statement, as parsed from SQL
  */

From 477046c63fab281570d26a183be4b0b8b77ac41a Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Wed, 16 Dec 2020 15:06:25 -0800
Subject: [PATCH 0791/1009] [SPARK-33775][BUILD] Suppress sbt compilation
 warnings in Scala 2.13

### What changes were proposed in this pull request?
There are too many compilation warnings in Scala 2.13, this pr add some `-Wconf:msg= regexes` rules to `SparkBuild.scala` to suppress compilation warnings and the suppressed will not be printed to the console.

The suppressed compilation warnings includes:

- All warnings related to `method\value\type\object\trait\inheritance` deprecated since 2.13

- All warnings related to `Widening conversion from XXX to YYY is deprecated because it loses precision`

- Auto-application to `()` is deprecated. Supply the empty argument list `()` explicitly to invoke method `methodName`, or remove the empty argument list from its definition (Java-defined methods are exempt).In Scala 3, an unapplied method like this will be eta-expanded into a function.

- method with a single empty parameter list overrides method without any parameter list

- method without a parameter list overrides a method with a single empty one

Not suppressed compilation warnings includes:

- Unicode escapes in triple quoted strings are deprecated, use the literal character instead.

- view bounds are deprecated

- symbol literal is deprecated

### Why are the changes needed?
Suppress unimportant compilation warnings in Scala 2.13

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass the Jenkins or GitHub Action

Closes #30760 from LuciferYang/SPARK-33775.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 project/SparkBuild.scala | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index a28c2b55b3789..3098060478f40 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -238,7 +238,15 @@ object SparkBuild extends PomBuild {
           "-Wconf:cat=other-match-analysis&site=org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupFunction.catalogFunction:wv",
           "-Wconf:cat=other-pure-statement&site=org.apache.spark.streaming.util.FileBasedWriteAheadLog.readAll.readFile:wv",
           "-Wconf:cat=other-pure-statement&site=org.apache.spark.scheduler.OutputCommitCoordinatorSuite.<local OutputCommitCoordinatorSuite>.futureAction:wv",
-          "-Wconf:cat=other-pure-statement&site=org.apache.spark.sql.streaming.sources.StreamingDataSourceV2Suite.testPositiveCase.\\$anonfun:wv"
+          "-Wconf:cat=other-pure-statement&site=org.apache.spark.sql.streaming.sources.StreamingDataSourceV2Suite.testPositiveCase.\\$anonfun:wv",
+          // SPARK-33775 Suppress compilation warnings that contain the following contents.
+          // TODO(SPARK-33805): Undo the corresponding deprecated usage suppression rule after
+          //  fixed.
+          "-Wconf:msg=^(?=.*?method|value|type|object|trait|inheritance)(?=.*?deprecated)(?=.*?since 2.13).+$:s",
+          "-Wconf:msg=^(?=.*?Widening conversion from)(?=.*?is deprecated because it loses precision).+$:s",
+          "-Wconf:msg=Auto-application to \\`\\(\\)\\` is deprecated:s",
+          "-Wconf:msg=method with a single empty parameter list overrides method without any parameter list:s",
+          "-Wconf:msg=method without a parameter list overrides a method with a single empty one:s"
         )
       }
     }

From 0c129001201ccb63ae96f576b6f354da84024fb3 Mon Sep 17 00:00:00 2001
From: sychen <sychen@ctrip.com>
Date: Thu, 17 Dec 2020 11:36:31 +0900
Subject: [PATCH 0792/1009] [SPARK-33790][CORE] Reduce the rpc call of
 getFileStatus in SingleFileEventLogFileReader

### What changes were proposed in this pull request?
`FsHistoryProvider#checkForLogs` already has `FileStatus` when constructing `SingleFileEventLogFileReader`, and there is no need to get the `FileStatus` again when `SingleFileEventLogFileReader#fileSizeForLastIndex`.

### Why are the changes needed?
This can reduce a lot of rpc calls and improve the speed of the history server.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
exist ut

Closes #30780 from cxzl25/SPARK-33790.

Authored-by: sychen <sychen@ctrip.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../apache/spark/deploy/history/EventLogFileReaders.scala  | 7 ++++---
 project/MimaExcludes.scala                                 | 5 ++++-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala
index 9f63a6441a838..5a34f0b71edef 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala
@@ -116,7 +116,7 @@ object EventLogFileReader {
 
   def apply(fs: FileSystem, status: FileStatus): Option[EventLogFileReader] = {
     if (isSingleEventLog(status)) {
-      Some(new SingleFileEventLogFileReader(fs, status.getPath))
+      Some(new SingleFileEventLogFileReader(fs, status.getPath, Option(status)))
     } else if (isRollingEventLogs(status)) {
       Some(new RollingEventLogFilesFileReader(fs, status.getPath))
     } else {
@@ -166,8 +166,9 @@ object EventLogFileReader {
  */
 class SingleFileEventLogFileReader(
     fs: FileSystem,
-    path: Path) extends EventLogFileReader(fs, path) {
-  private lazy val status = fileSystem.getFileStatus(rootPath)
+    path: Path,
+    maybeStatus: Option[FileStatus] = None) extends EventLogFileReader(fs, path) {
+  private lazy val status = maybeStatus.getOrElse(fileSystem.getFileStatus(rootPath))
 
   override def lastIndex: Option[Long] = None
 
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 33e65c9def41b..8f47d51799dd5 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -109,7 +109,10 @@ object MimaExcludes {
     ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.weightCol"),
 
     // [SPARK-32879] Pass SparkSession.Builder options explicitly to SparkSession
-    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SparkSession.this")
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SparkSession.this"),
+
+    // [SPARK-33790][CORE] Reduce the rpc call of getFileStatus in SingleFileEventLogFileReader
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.deploy.history.SingleFileEventLogFileReader.this")
   )
 
   // Exclude rules for 3.0.x

From 0c19497222c26818ecdde527601c12c757acb4ad Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Thu, 17 Dec 2020 05:25:51 +0000
Subject: [PATCH 0793/1009] [SPARK-33815][SQL] Migrate ALTER TABLE ... SET
 [SERDE|SERDEPROPERTIES] to use UnresolvedTable to resolve the identifier

### What changes were proposed in this pull request?

This PR proposes to migrate `ALTER TABLE ... SET [SERDE|SERDEPROPERTIES` to use `UnresolvedTable` to resolve the table identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing).

Note that `ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]` is not supported for v2 tables.

### Why are the changes needed?

The PR makes the resolution consistent behavior consistent. For example,
```scala
sql("CREATE DATABASE test")
sql("CREATE TABLE spark_catalog.test.t (id bigint, val string) USING csv PARTITIONED BY (id)")
sql("CREATE TEMPORARY VIEW t AS SELECT 2")
sql("USE spark_catalog.test")
sql("ALTER TABLE t SET SERDE 'serdename'") // works fine
```
, but after this PR:
```
sql("ALTER TABLE t SET SERDE 'serdename'")
org.apache.spark.sql.AnalysisException: t is a temp view. 'ALTER TABLE ... SET [SERDE|SERDEPROPERTIES\' expects a table; line 1 pos 0
```
, which is the consistent behavior with other commands.

### Does this PR introduce _any_ user-facing change?

After this PR, `t` in the above example is resolved to a temp view first instead of `spark_catalog.test.t`.

### How was this patch tested?

Updated existing tests.

Closes #30813 from imback82/alter_table_serde_v2.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/parser/AstBuilder.scala      |  8 +++--
 .../catalyst/plans/logical/statements.scala   |  9 -----
 .../catalyst/plans/logical/v2Commands.scala   | 11 ++++++
 .../sql/catalyst/parser/DDLParserSuite.scala  | 34 +++++++++++--------
 .../analysis/ResolveSessionCatalog.scala      |  9 +++--
 .../datasources/v2/DataSourceV2Strategy.scala |  4 +++
 .../sql/connector/DataSourceV2SQLSuite.scala  |  3 +-
 .../spark/sql/execution/SQLViewSuite.scala    | 12 +++++--
 .../sql/hive/execution/HiveDDLSuite.scala     | 13 ++++---
 9 files changed, 66 insertions(+), 37 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 426dff343818b..94589688953d7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3806,7 +3806,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
   }
 
   /**
-   * Create an [[AlterTableSerDePropertiesStatement]]
+   * Create an [[AlterTableSerDeProperties]]
    *
    * For example:
    * {{{
@@ -3816,8 +3816,10 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    * }}}
    */
   override def visitSetTableSerDe(ctx: SetTableSerDeContext): LogicalPlan = withOrigin(ctx) {
-    AlterTableSerDePropertiesStatement(
-      visitMultipartIdentifier(ctx.multipartIdentifier),
+    AlterTableSerDeProperties(
+      UnresolvedTable(
+        visitMultipartIdentifier(ctx.multipartIdentifier),
+        "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]"),
       Option(ctx.STRING).map(string),
       Option(ctx.tablePropertyList).map(visitPropertyKeyValues),
       // TODO a partition spec is allowed to have optional values. This is currently violated.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
index 58776f549d817..59239f6e041a5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
@@ -300,15 +300,6 @@ case class AlterTableRenamePartitionStatement(
     from: TablePartitionSpec,
     to: TablePartitionSpec) extends ParsedStatement
 
-/**
- * ALTER TABLE ... SERDEPROPERTIES command, as parsed from SQL
- */
-case class AlterTableSerDePropertiesStatement(
-    tableName: Seq[String],
-    serdeClassName: Option[String],
-    serdeProperties: Option[Map[String, String]],
-    partitionSpec: Option[TablePartitionSpec]) extends ParsedStatement
-
 /**
  * An INSERT INTO statement, as parsed from SQL.
  *
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index d13ad977910d9..fa67d311c39c3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -777,6 +777,17 @@ case class AlterViewUnsetProperties(
   override def children: Seq[LogicalPlan] = child :: Nil
 }
 
+/**
+ * The logical plan of the ALTER TABLE ... SET [SERDE|SERDEPROPERTIES] command.
+ */
+case class AlterTableSerDeProperties(
+    child: LogicalPlan,
+    serdeClassName: Option[String],
+    serdeProperties: Option[Map[String, String]],
+    partitionSpec: Option[TablePartitionSpec]) extends Command {
+  override def children: Seq[LogicalPlan] = child :: Nil
+}
+
 /**
  * The logical plan of the CACHE TABLE command.
  */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 9bea6517156ae..5eb0c9a39f1e6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -2134,8 +2134,11 @@ class DDLParserSuite extends AnalysisTest {
   test("alter table: SerDe properties") {
     val sql1 = "ALTER TABLE table_name SET SERDE 'org.apache.class'"
     val parsed1 = parsePlan(sql1)
-    val expected1 = AlterTableSerDePropertiesStatement(
-      Seq("table_name"), Some("org.apache.class"), None, None)
+    val expected1 = AlterTableSerDeProperties(
+      UnresolvedTable(Seq("table_name"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]"),
+      Some("org.apache.class"),
+      None,
+      None)
     comparePlans(parsed1, expected1)
 
     val sql2 =
@@ -2144,8 +2147,8 @@ class DDLParserSuite extends AnalysisTest {
         |WITH SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')
       """.stripMargin
     val parsed2 = parsePlan(sql2)
-    val expected2 = AlterTableSerDePropertiesStatement(
-      Seq("table_name"),
+    val expected2 = AlterTableSerDeProperties(
+      UnresolvedTable(Seq("table_name"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]"),
       Some("org.apache.class"),
       Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
       None)
@@ -2157,8 +2160,11 @@ class DDLParserSuite extends AnalysisTest {
         |SET SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')
       """.stripMargin
     val parsed3 = parsePlan(sql3)
-    val expected3 = AlterTableSerDePropertiesStatement(
-      Seq("table_name"), None, Some(Map("columns" -> "foo,bar", "field.delim" -> ",")), None)
+    val expected3 = AlterTableSerDeProperties(
+      UnresolvedTable(Seq("table_name"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]"),
+      None,
+      Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
+      None)
     comparePlans(parsed3, expected3)
 
     val sql4 =
@@ -2168,8 +2174,8 @@ class DDLParserSuite extends AnalysisTest {
         |WITH SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')
       """.stripMargin
     val parsed4 = parsePlan(sql4)
-    val expected4 = AlterTableSerDePropertiesStatement(
-      Seq("table_name"),
+    val expected4 = AlterTableSerDeProperties(
+      UnresolvedTable(Seq("table_name"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]"),
       Some("org.apache.class"),
       Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
       Some(Map("test" -> "1", "dt" -> "2008-08-08", "country" -> "us")))
@@ -2181,8 +2187,8 @@ class DDLParserSuite extends AnalysisTest {
         |SET SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')
       """.stripMargin
     val parsed5 = parsePlan(sql5)
-    val expected5 = AlterTableSerDePropertiesStatement(
-      Seq("table_name"),
+    val expected5 = AlterTableSerDeProperties(
+      UnresolvedTable(Seq("table_name"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]"),
       None,
       Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
       Some(Map("test" -> "1", "dt" -> "2008-08-08", "country" -> "us")))
@@ -2194,8 +2200,8 @@ class DDLParserSuite extends AnalysisTest {
         |WITH SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')
       """.stripMargin
     val parsed6 = parsePlan(sql6)
-    val expected6 = AlterTableSerDePropertiesStatement(
-      Seq("a", "b", "c"),
+    val expected6 = AlterTableSerDeProperties(
+      UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]"),
       Some("org.apache.class"),
       Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
       None)
@@ -2207,8 +2213,8 @@ class DDLParserSuite extends AnalysisTest {
         |SET SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')
       """.stripMargin
     val parsed7 = parsePlan(sql7)
-    val expected7 = AlterTableSerDePropertiesStatement(
-      Seq("a", "b", "c"),
+    val expected7 = AlterTableSerDeProperties(
+      UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]"),
       None,
       Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
       Some(Map("test" -> "1", "dt" -> "2008-08-08", "country" -> "us")))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 802068de10d16..1426d28cbbf88 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -476,10 +476,13 @@ class ResolveSessionCatalog(
         purge,
         retainData = false)
 
-    case AlterTableSerDePropertiesStatement(tbl, serdeClassName, serdeProperties, partitionSpec) =>
-      val v1TableName = parseV1Table(tbl, "ALTER TABLE SerDe Properties")
+    case AlterTableSerDeProperties(
+        ResolvedV1TableIdentifier(ident),
+        serdeClassName,
+        serdeProperties,
+        partitionSpec) =>
       AlterTableSerDePropertiesCommand(
-        v1TableName.asTableIdentifier,
+        ident.asTableIdentifier,
         serdeClassName,
         serdeProperties,
         partitionSpec)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 120fa5288dda9..f9c89051e421a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -334,6 +334,10 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       throw new AnalysisException(
         "ALTER TABLE ... RECOVER PARTITIONS is not supported for v2 tables.")
 
+    case AlterTableSerDeProperties(_: ResolvedTable, _, _, _) =>
+      throw new AnalysisException(
+        "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES] is not supported for v2 tables.")
+
     case LoadData(_: ResolvedTable, _, _, _, _) =>
       throw new AnalysisException("LOAD DATA is not supported for v2 tables.")
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index b49a692d26173..b335dc31a3037 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -2120,7 +2120,8 @@ class DataSourceV2SQLSuite
       val e = intercept[AnalysisException] {
         sql(s"ALTER TABLE $t SET SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')")
       }
-      assert(e.message.contains("ALTER TABLE SerDe Properties is only supported with v1 tables"))
+      assert(e.message.contains(
+        "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES] is not supported for v2 tables"))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index 586b31643049f..6d65fddb1be62 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -140,9 +140,15 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
     val viewName = "testView"
     withTempView(viewName) {
       spark.range(10).createTempView(viewName)
-      assertNoSuchTable(s"ALTER TABLE $viewName SET SERDE 'whatever'")
-      assertNoSuchTable(s"ALTER TABLE $viewName PARTITION (a=1, b=2) SET SERDE 'whatever'")
-      assertNoSuchTable(s"ALTER TABLE $viewName SET SERDEPROPERTIES ('p' = 'an')")
+      assertAnalysisError(
+        s"ALTER TABLE $viewName SET SERDE 'whatever'",
+        s"$viewName is a temp view. 'ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]' expects a table")
+      assertAnalysisError(
+        s"ALTER TABLE $viewName PARTITION (a=1, b=2) SET SERDE 'whatever'",
+        s"$viewName is a temp view. 'ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]' expects a table")
+      assertAnalysisError(
+        s"ALTER TABLE $viewName SET SERDEPROPERTIES ('p' = 'an')",
+        s"$viewName is a temp view. 'ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]' expects a table")
       assertNoSuchTable(s"ALTER TABLE $viewName PARTITION (a='4') RENAME TO PARTITION (a='5')")
       assertAnalysisError(
         s"ALTER TABLE $viewName RECOVER PARTITIONS",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index f8a5c7f57eec5..aac4b88d9e3f8 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -888,12 +888,17 @@ class HiveDDLSuite
 
         assertErrorForAlterTableOnView(s"ALTER TABLE $oldViewName SET LOCATION '/path/to/home'")
 
-        assertErrorForAlterTableOnView(s"ALTER TABLE $oldViewName SET SERDE 'whatever'")
+        assertAnalysisError(
+          s"ALTER TABLE $oldViewName SET SERDE 'whatever'",
+          s"$oldViewName is a view. 'ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]' expects a table.")
 
-        assertErrorForAlterTableOnView(s"ALTER TABLE $oldViewName SET SERDEPROPERTIES ('x' = 'y')")
+        assertAnalysisError(
+          s"ALTER TABLE $oldViewName SET SERDEPROPERTIES ('x' = 'y')",
+          s"$oldViewName is a view. 'ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]' expects a table.")
 
-        assertErrorForAlterTableOnView(
-          s"ALTER TABLE $oldViewName PARTITION (a=1, b=2) SET SERDEPROPERTIES ('x' = 'y')")
+        assertAnalysisError(
+          s"ALTER TABLE $oldViewName PARTITION (a=1, b=2) SET SERDEPROPERTIES ('x' = 'y')",
+          s"$oldViewName is a view. 'ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]' expects a table.")
 
         assertAnalysisError(
           s"ALTER TABLE $oldViewName RECOVER PARTITIONS",

From 1e85707738a830d33598ca267a6740b3f06b1861 Mon Sep 17 00:00:00 2001
From: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com>
Date: Thu, 17 Dec 2020 05:47:44 +0000
Subject: [PATCH 0794/1009] [SPARK-33697][SQL] RemoveRedundantProjects should
 require column ordering by default

### What changes were proposed in this pull request?
This PR changes the rule `RemoveRedundantProjects` from by default passing column ordering requirements from parent nodes to always require column orders regardless of the requirements from parent nodes unless otherwise specified. More specifically, instead of excluding a few nodes like GenerateExec, UnionExec that are known to require children columns to be ordered, the rule now includes a whitelist of nodes that allow passing through the ordering requirements from their parents.

### Why are the changes needed?
Currently, this rule passes through ordering requirements from parents directly to children except for a few excluded nodes. This incorrectly removes the necessary project nodes below a UnionExec since it is not excluded. An earlier PR also fixed a similar issue for GenerateExec (SPARK-32861). In order to prevent similar issues, the rule should be changed to always require column ordering except for a few specific nodes that we know for sure can pass through the requirements.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Unit tests

Closes #30659 from allisonwang-db/spark-33697-remove-project-union.

Authored-by: allisonwang-db <66282705+allisonwang-db@users.noreply.github.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../execution/RemoveRedundantProjects.scala   | 24 +++++++---
 .../RemoveRedundantProjectsSuite.scala        | 47 +++++++++++++++++++
 2 files changed, 65 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala
index bbe3f50492d9f..bfb6e805c0541 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala
@@ -22,6 +22,8 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.{Final, PartialMerge}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.aggregate.BaseAggregateExec
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanExecBase
+import org.apache.spark.sql.execution.joins.BaseJoinExec
+import org.apache.spark.sql.execution.window.WindowExec
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -61,15 +63,25 @@ object RemoveRedundantProjects extends Rule[SparkPlan] {
         val keepOrdering = a.aggregateExpressions
           .exists(ae => ae.mode.equals(Final) || ae.mode.equals(PartialMerge))
         a.mapChildren(removeProject(_, keepOrdering))
-      // GenerateExec requires column ordering since it binds input rows directly with its
-      // requiredChildOutput without using child's output schema.
-      case g: GenerateExec => g.mapChildren(removeProject(_, true))
-      // JoinExec ordering requirement will inherit from its parent. If there is no ProjectExec in
-      // its ancestors, JoinExec should require output columns to be ordered.
-      case o => o.mapChildren(removeProject(_, requireOrdering))
+      case o =>
+        val required = if (canPassThrough(o)) requireOrdering else true
+        o.mapChildren(removeProject(_, requireOrdering = required))
     }
   }
 
+  /**
+   * Check if the given node can pass the ordering requirement from its parent.
+   */
+  private def canPassThrough(plan: SparkPlan): Boolean = plan match {
+    case _: FilterExec => true
+    // JoinExec ordering requirement should inherit from its parent. If there is no ProjectExec in
+    // its ancestors, JoinExec should require output columns to be ordered, and vice versa.
+    case _: BaseJoinExec => true
+    case _: WindowExec => true
+    case _: ExpandExec => true
+    case _ => false
+  }
+
   /**
    * Check if the nullability change is positive. It catches the case when the project output
    * attribute is not nullable, but the child output attribute is nullable.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala
index 2de9d21abca82..040c5189abcb6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantProjectsSuite.scala
@@ -166,6 +166,53 @@ abstract class RemoveRedundantProjectsSuiteBase
       assertProjectExec(query, 0, 1)
     }
   }
+
+  test("SPARK-33697: UnionExec should require column ordering") {
+    withTable("t1", "t2") {
+      spark.range(-10, 20)
+        .selectExpr(
+          "id",
+          "date_add(date '1950-01-01', cast(id as int)) as datecol",
+          "cast(id as string) strcol")
+        .write.mode("overwrite").format("parquet").saveAsTable("t1")
+      spark.range(-10, 20)
+        .selectExpr(
+          "cast(id as string) strcol",
+          "id",
+          "date_add(date '1950-01-01', cast(id as int)) as datecol")
+        .write.mode("overwrite").format("parquet").saveAsTable("t2")
+
+      val queryTemplate =
+        """
+          |SELECT DISTINCT datecol, strcol FROM
+          |(
+          |(SELECT datecol, id, strcol from t1)
+          | %s
+          |(SELECT datecol, id, strcol from t2)
+          |)
+          |""".stripMargin
+
+      Seq(("UNION", 2, 2), ("UNION ALL", 1, 2)).foreach { case (setOperation, enabled, disabled) =>
+        val query = queryTemplate.format(setOperation)
+        assertProjectExec(query, enabled = enabled, disabled = disabled)
+      }
+    }
+  }
+
+  test("SPARK-33697: remove redundant projects under expand") {
+    val query =
+      """
+        |SELECT t1.key, t2.key, sum(t1.a) AS s1, sum(t2.b) AS s2 FROM
+        |(SELECT a, key FROM testView) t1
+        |JOIN
+        |(SELECT b, key FROM testView) t2
+        |ON t1.key = t2.key
+        |GROUP BY t1.key, t2.key GROUPING SETS(t1.key, t2.key)
+        |ORDER BY t1.key, t2.key, s1, s2
+        |LIMIT 10
+        |""".stripMargin
+    assertProjectExec(query, 0, 3)
+  }
 }
 
 class RemoveRedundantProjectsSuite extends RemoveRedundantProjectsSuiteBase

From b1950cc9162999c2200a0a988fa28aee640fb459 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 17 Dec 2020 15:49:40 +0900
Subject: [PATCH 0795/1009] [SPARK-33821][BUILD] Upgrade SBT to 1.4.5

### What changes were proposed in this pull request?

This PR aims to upgrade SBT to 1.4.5 to support Apple Silicon.

### Why are the changes needed?

The following is the release note including `sbt 1.4.5 adds support for Apple silicon (AArch64 also called ARM64)`.
- https://github.com/sbt/sbt/releases/tag/v1.4.5

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

Closes #30817 from dongjoon-hyun/SPARK-33821.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 project/build.properties | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/project/build.properties b/project/build.properties
index c92de941c10be..35ee6fea6d336 100644
--- a/project/build.properties
+++ b/project/build.properties
@@ -14,4 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-sbt.version=1.4.4
+sbt.version=1.4.5

From ed09673fb941830c15e5e5ad748be9de4755935c Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 17 Dec 2020 15:51:04 +0900
Subject: [PATCH 0796/1009] [SPARK-33819][CORE]
 SingleFileEventLogFileReader/RollingEventLogFilesFileReader should be
 `package private`

### What changes were proposed in this pull request?

This PR aims to convert `EventLogFileReader`'s derived classes into `package private`.
- SingleFileEventLogFileReader
- RollingEventLogFilesFileReader

`EventLogFileReader` itself is used in `scheduler` module during tests.

### Why are the changes needed?

This classes were designed to be internal. This PR hides it explicitly to reduce the maintenance burden.

### Does this PR introduce _any_ user-facing change?

Yes, but these were exposed accidentally.

### How was this patch tested?

Pass CIs.

Closes #30814 from dongjoon-hyun/SPARK-33790.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../apache/spark/deploy/history/EventLogFileReaders.scala | 8 +++++---
 project/MimaExcludes.scala                                | 5 +----
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala
index 5a34f0b71edef..b4771c80a175f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala
@@ -164,12 +164,14 @@ object EventLogFileReader {
  * FileNotFoundException could occur if the log file is renamed before getting the
  * status of log file.
  */
-class SingleFileEventLogFileReader(
+private[history] class SingleFileEventLogFileReader(
     fs: FileSystem,
     path: Path,
-    maybeStatus: Option[FileStatus] = None) extends EventLogFileReader(fs, path) {
+    maybeStatus: Option[FileStatus]) extends EventLogFileReader(fs, path) {
   private lazy val status = maybeStatus.getOrElse(fileSystem.getFileStatus(rootPath))
 
+  def this(fs: FileSystem, path: Path) = this(fs, path, None)
+
   override def lastIndex: Option[Long] = None
 
   override def fileSizeForLastIndex: Long = status.getLen
@@ -204,7 +206,7 @@ class SingleFileEventLogFileReader(
  * This reader lists the files only once; if caller would like to play with updated list,
  * it needs to create another reader instance.
  */
-class RollingEventLogFilesFileReader(
+private[history] class RollingEventLogFilesFileReader(
     fs: FileSystem,
     path: Path) extends EventLogFileReader(fs, path) {
   import RollingEventLogFilesWriter._
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 8f47d51799dd5..33e65c9def41b 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -109,10 +109,7 @@ object MimaExcludes {
     ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.weightCol"),
 
     // [SPARK-32879] Pass SparkSession.Builder options explicitly to SparkSession
-    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SparkSession.this"),
-
-    // [SPARK-33790][CORE] Reduce the rpc call of getFileStatus in SingleFileEventLogFileReader
-    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.deploy.history.SingleFileEventLogFileReader.this")
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SparkSession.this")
   )
 
   // Exclude rules for 3.0.x

From 12b69cc27caa476a9a29844f8d096f08263ba6ef Mon Sep 17 00:00:00 2001
From: Michael Chirico <michael.chirico@grabtaxi.com>
Date: Thu, 17 Dec 2020 17:20:45 +0900
Subject: [PATCH 0797/1009] [SPARK-26199][SPARK-31517][R] Fix strategy for
 handling ... names in mutate

### What changes were proposed in this pull request?

Change the strategy for how the varargs are handled in the default `mutate` method

### Why are the changes needed?

Bugfix -- `deparse` + `sapply` not working as intended due to `width.cutoff`

### Does this PR introduce any user-facing change?

Yes, bugfix. Shouldn't change any working code.

### How was this patch tested?

None! yet.

Closes #28386 from MichaelChirico/r-mutate-deparse.

Lead-authored-by: Michael Chirico <michael.chirico@grabtaxi.com>
Co-authored-by: Michael Chirico <michaelchirico4@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 R/pkg/R/DataFrame.R                   | 18 ++++++++++--------
 R/pkg/tests/fulltests/test_sparkSQL.R | 15 +++++++++++++++
 2 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 31a651ea1279b..8ca338f09969b 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -2277,16 +2277,17 @@ setMethod("mutate",
 
             # For named arguments, use the names for arguments as the column names
             # For unnamed arguments, use the argument symbols as the column names
-            args <- sapply(substitute(list(...))[-1], deparse)
             ns <- names(cols)
-            if (!is.null(ns)) {
-              lapply(seq_along(args), function(i) {
-                if (ns[[i]] != "") {
-                  args[[i]] <<- ns[[i]]
-                }
+            if (is.null(ns)) ns <- rep("", length(cols))
+            named_idx <- nzchar(ns)
+            if (!all(named_idx)) {
+              # SPARK-31517: deparse uses width.cutoff on wide input and the
+              #   output is length>1, so need to collapse it to scalar
+              colsub <- substitute(list(...))[-1L]
+              ns[!named_idx] <- sapply(which(!named_idx), function(ii) {
+                paste(gsub("^\\s*|\\s*$", "", deparse(colsub[[ii]])), collapse = " ")
               })
             }
-            ns <- args
 
             # The last column of the same name in the specific columns takes effect
             deDupCols <- list()
@@ -3444,7 +3445,8 @@ setMethod("as.data.frame",
 #' @note attach since 1.6.0
 setMethod("attach",
           signature(what = "SparkDataFrame"),
-          function(what, pos = 2L, name = deparse(substitute(what), backtick = FALSE),
+          function(what, pos = 2L,
+                   name = paste(deparse(substitute(what), backtick = FALSE), collapse = " "),
                    warn.conflicts = TRUE) {
             args <- as.list(environment()) # capture all parameters - this must be the first line
             newEnv <- assignNewEnv(args$what)
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index c623f534f706c..ebf08b9559379 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -2884,6 +2884,15 @@ test_that("mutate(), transform(), rename() and names()", {
   expect_equal(nrow(result), 153)
   expect_equal(ncol(result), 2)
   detach(airquality)
+
+  # ensure long inferred names are handled without error (SPARK-26199)
+  #   test implicitly assumes eval(formals(deparse)$width.cutoff) = 60
+  #   (which has always been true as of 2020-11-15)
+  newDF <- mutate(
+    df,
+    df$age + 12345678901234567890 + 12345678901234567890 + 12345678901234
+  )
+  expect_match(tail(columns(newDF), 1L), "234567890", fixed = TRUE)
 })
 
 test_that("read/write ORC files", {
@@ -3273,6 +3282,12 @@ test_that("attach() on a DataFrame", {
   stat3 <- summary(df[, "age", drop = F])
   expect_equal(collect(stat3)[8, "age"], "30")
   expect_error(age)
+
+  # attach method uses deparse(); ensure no errors from a very long input
+  abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnop <- df # nolint
+  attach(abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnop)
+  expect_true(any(grepl("abcdefghijklmnopqrstuvwxyz", search())))
+  detach("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnop")
 })
 
 test_that("with() on a DataFrame", {

From 34e4d87023535c086a0aa43fe194f794b41e09b7 Mon Sep 17 00:00:00 2001
From: "yi.wu" <yi.wu@databricks.com>
Date: Thu, 17 Dec 2020 08:52:09 -0600
Subject: [PATCH 0798/1009] [SPARK-33774][UI][CORE] Back to Master" returns 500
 error in Standalone cluster

### What changes were proposed in this pull request?

Initiate the `masterWebUiUrl` with the `webUi. webUrl` instead of the `masterPublicAddress`.

### Why are the changes needed?

Since [SPARK-21642](https://issues.apache.org/jira/browse/SPARK-21642), `WebUI` has changed from `localHostName` to `localCanonicalHostName` as the hostname to set up the web UI. However, the `masterPublicAddress` is from `RpcEnv`'s host address, which still uses `localHostName`. As a result, it returns the wrong Master web URL to the Worker.

### Does this PR introduce _any_ user-facing change?

Yes, when users click "Back to Master" in the Worker page:

Before this PR:

<img width="3258" alt="WeChat4acbfd163f51c76a5f9bc388c7479785" src="https://user-images.githubusercontent.com/16397174/102057951-b9664280-3e29-11eb-8749-5ee293902bdf.png">

After this PR:

![image](https://user-images.githubusercontent.com/16397174/102058016-d438b700-3e29-11eb-8641-a23a6b2f542e.png)

(Return to the Master page successfully.)

### How was this patch tested?

Tested manually.

Closes #30759 from Ngone51/fix-back-to-master.

Authored-by: yi.wu <yi.wu@databricks.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 core/src/main/scala/org/apache/spark/deploy/master/Master.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index cccd3da323774..9f1b36ad1c8c1 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -143,7 +143,7 @@ private[deploy] class Master(
     logInfo(s"Running Spark version ${org.apache.spark.SPARK_VERSION}")
     webUi = new MasterWebUI(this, webUiPort)
     webUi.bind()
-    masterWebUiUrl = s"${webUi.scheme}$masterPublicAddress:${webUi.boundPort}"
+    masterWebUiUrl = webUi.webUrl
     if (reverseProxy) {
       val uiReverseProxyUrl = conf.get(UI_REVERSE_PROXY_URL).map(_.stripSuffix("/"))
       if (uiReverseProxyUrl.nonEmpty) {

From 8c81cf7d71baf34dfafe54835a90cc19e7293561 Mon Sep 17 00:00:00 2001
From: suqilong <suqilong@qiyi.com>
Date: Thu, 17 Dec 2020 08:56:45 -0600
Subject: [PATCH 0799/1009] [SPARK-22769] Do not log rpc post message error
 when sparkEnv is already stopped
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

When driver stopping, pending rpc requests will cause error like：

> 17/12/12 18:30:16 ERROR TransportRequestHandler: Error while invoking RpcHandler#receive() for one-way message.
org.apache.spark.SparkException: Could not find CoarseGrainedScheduler.
at org.apache.spark.rpc.netty.Dispatcher.postMessage(Dispatcher.scala:154)
at org.apache.spark.rpc.netty.Dispatcher.postOneWayMessage(Dispatcher.scala:134)
at org.apache.spark.rpc.netty.NettyRpcHandler.receive(NettyRpcEnv.scala:570)
at org.apache.spark.network.server.TransportRequestHandler.processOneWayMessage(TransportRequestHandler.java:180)
at org.apache.spark.network.server.TransportRequestHandler.handle(TransportRequestHandler.java:109)
at org.apache.spark.network.server.TransportChannelHandler.channelRead0(TransportChannelHandler.java:119)
at org.apache.spark.network.server.TransportChannelHandler.channelRead0(TransportChannelHandler.java:51)

Or like:

> 17/12/12 18:20:44 INFO MemoryStore: MemoryStore cleared
17/12/12 18:20:44 INFO BlockManager: BlockManager stopped
17/12/12 18:20:44 INFO BlockManagerMaster: BlockManagerMaster stopped
17/12/12 18:20:44 ERROR TransportRequestHandler: Error while invoking RpcHandler#receive() for one-way message.
org.apache.spark.rpc.RpcEnvStoppedException: RpcEnv already stopped.
at org.apache.spark.rpc.netty.Dispatcher.postMessage(Dispatcher.scala:152)
at org.apache.spark.rpc.netty.Dispatcher.postOneWayMessage(Dispatcher.scala:134)
at org.apache.spark.rpc.netty.NettyRpcHandler.receive(NettyRpcEnv.scala:570)

These are because CoarseGrainedScheduler and rpcEnv are already stopped, they're not error.

The related issue SPARK-22769 was opened on 2017, but the author didn't finish the pull request, so reopen this issue.

### How was this patch tested?
Existing tests

Closes #30658 from sqlwindspeaker/donot-log-rpc-error.

Authored-by: suqilong <suqilong@qiyi.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../scala/org/apache/spark/rpc/netty/Dispatcher.scala     | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala b/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala
index 4a9f551646fc7..14198743c4801 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala
@@ -24,7 +24,7 @@ import scala.collection.JavaConverters._
 import scala.concurrent.Promise
 import scala.util.control.NonFatal
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkEnv, SparkException}
 import org.apache.spark.internal.Logging
 import org.apache.spark.network.client.RpcResponseCallback
 import org.apache.spark.rpc._
@@ -147,13 +147,15 @@ private[netty] class Dispatcher(nettyEnv: NettyRpcEnv, numUsableCores: Int) exte
   /** Posts a one-way message. */
   def postOneWayMessage(message: RequestMessage): Unit = {
     postMessage(message.receiver.name, OneWayMessage(message.senderAddress, message.content),
-      (e) => e match {
+      {
         // SPARK-31922: in local cluster mode, there's always a RpcEnvStoppedException when
         // stop is called due to some asynchronous message handling. We catch the exception
         // and log it at debug level to avoid verbose error message when user stop a local
         // cluster in spark shell.
         case re: RpcEnvStoppedException => logDebug(s"Message $message dropped. ${re.getMessage}")
-        case _ => throw e
+        case e if SparkEnv.get.isStopped =>
+          logWarning(s"Message $message dropped due to sparkEnv is stopped. ${e.getMessage}")
+        case e => throw e
       })
   }
 

From 15616f499aca93c98a71732add2a80de863d3d5f Mon Sep 17 00:00:00 2001
From: "yi.wu" <yi.wu@databricks.com>
Date: Thu, 17 Dec 2020 09:28:17 -0800
Subject: [PATCH 0800/1009] [SPARK-33173][CORE][TESTS][FOLLOWUP] Use `local[2]`
 and AtomicInteger

### What changes were proposed in this pull request?

Use `local[2]` to let tasks launch at the same time. And change counters (`numOnTaskXXX`) to `AtomicInteger` type to ensure thread safe.

### Why are the changes needed?

The test is still flaky after the fix https://github.com/apache/spark/pull/30072. See: https://github.com/apache/spark/pull/30728/checks?check_run_id=1557987642

And it's easy to reproduce if you test it multiple times (e.g. 100) locally.

The test sets up a stage with 2 tasks to run on an executor with 1 core. So these 2 tasks have to be launched one by one.
The task-2 will be launched after task-1 fails. However, since we don't retry failed task in local mode  (MAX_LOCAL_TASK_FAILURES = 1), the stage will abort right away after task-1 fail and cancels the running task-2 at the same time. There's a chance that task-2 gets canceled before calling `PluginContainer.onTaskStart`, which leads to the test failure.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Tested manually after the fix and the test is no longer flaky.

Closes #30823 from Ngone51/debug-flaky-spark-33088.

Authored-by: yi.wu <yi.wu@databricks.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../plugin/PluginContainerSuite.scala         | 27 ++++++++++---------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala b/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala
index 15966e2744491..9ef81d30ff196 100644
--- a/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.internal.plugin
 import java.io.File
 import java.nio.charset.StandardCharsets
 import java.util.{Map => JMap}
+import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.JavaConverters._
 import scala.concurrent.duration._
@@ -138,15 +139,15 @@ class PluginContainerSuite extends SparkFunSuite with BeforeAndAfterEach with Lo
     sc = new SparkContext(conf)
     sc.parallelize(1 to 10, 2).count()
 
-    assert(TestSparkPlugin.executorPlugin.numOnTaskStart == 2)
-    assert(TestSparkPlugin.executorPlugin.numOnTaskSucceeded == 2)
-    assert(TestSparkPlugin.executorPlugin.numOnTaskFailed == 0)
+    assert(TestSparkPlugin.executorPlugin.numOnTaskStart.get() == 2)
+    assert(TestSparkPlugin.executorPlugin.numOnTaskSucceeded.get() == 2)
+    assert(TestSparkPlugin.executorPlugin.numOnTaskFailed.get() == 0)
   }
 
   test("SPARK-33088: executor failed tasks trigger plugin calls") {
     val conf = new SparkConf()
       .setAppName(getClass().getName())
-      .set(SparkLauncher.SPARK_MASTER, "local[1]")
+      .set(SparkLauncher.SPARK_MASTER, "local[2]")
       .set(PLUGINS, Seq(classOf[TestSparkPlugin].getName()))
 
     sc = new SparkContext(conf)
@@ -157,9 +158,9 @@ class PluginContainerSuite extends SparkFunSuite with BeforeAndAfterEach with Lo
     }
 
     eventually(timeout(10.seconds), interval(100.millis)) {
-      assert(TestSparkPlugin.executorPlugin.numOnTaskStart == 2)
-      assert(TestSparkPlugin.executorPlugin.numOnTaskSucceeded == 0)
-      assert(TestSparkPlugin.executorPlugin.numOnTaskFailed == 2)
+      assert(TestSparkPlugin.executorPlugin.numOnTaskStart.get() == 2)
+      assert(TestSparkPlugin.executorPlugin.numOnTaskSucceeded.get() == 0)
+      assert(TestSparkPlugin.executorPlugin.numOnTaskFailed.get() == 2)
     }
   }
 
@@ -343,9 +344,9 @@ private class TestDriverPlugin extends DriverPlugin {
 
 private class TestExecutorPlugin extends ExecutorPlugin {
 
-  var numOnTaskStart: Int = 0
-  var numOnTaskSucceeded: Int = 0
-  var numOnTaskFailed: Int = 0
+  val numOnTaskStart = new AtomicInteger(0)
+  val numOnTaskSucceeded = new AtomicInteger(0)
+  val numOnTaskFailed = new AtomicInteger(0)
 
   override def init(ctx: PluginContext, extraConf: JMap[String, String]): Unit = {
     ctx.metricRegistry().register("executorMetric", new Gauge[Int] {
@@ -355,15 +356,15 @@ private class TestExecutorPlugin extends ExecutorPlugin {
   }
 
   override def onTaskStart(): Unit = {
-    numOnTaskStart += 1
+    numOnTaskStart.incrementAndGet()
   }
 
   override def onTaskSucceeded(): Unit = {
-    numOnTaskSucceeded += 1
+    numOnTaskSucceeded.incrementAndGet()
   }
 
   override def onTaskFailed(failureReason: TaskFailedReason): Unit = {
-    numOnTaskFailed += 1
+    numOnTaskFailed.incrementAndGet()
   }
 }
 

From 51ef4430dcbc934d43315ee6bdc851c9be84a1f2 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Thu, 17 Dec 2020 16:16:05 -0800
Subject: [PATCH 0801/1009] [SPARK-33822][SQL] Use the `CastSupport.cast`
 method in HashJoin

### What changes were proposed in this pull request?

This PR intends to fix the bug that throws a unsupported exception when running [the TPCDS q5](https://github.com/apache/spark/blob/master/sql/core/src/test/resources/tpcds/q5.sql) with AQE enabled ([this option is enabled by default now via SPARK-33679](https://github.com/apache/spark/commit/031c5ef280e0cba8c4718a6457a44b6cccb17f46)):
```
java.lang.UnsupportedOperationException: BroadcastExchange does not support the execute() code path.
  at org.apache.spark.sql.execution.exchange.BroadcastExchangeExec.doExecute(BroadcastExchangeExec.scala:189)
  at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180)
  at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218)
  at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215)
  at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176)
  at org.apache.spark.sql.execution.exchange.ReusedExchangeExec.doExecute(Exchange.scala:60)
  at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180)
  at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218)
  at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215)
  at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176)
  at org.apache.spark.sql.execution.adaptive.QueryStageExec.doExecute(QueryStageExec.scala:115)
  at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180)
  at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218)
  at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215)
  at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176)
  at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:321)
  at org.apache.spark.sql.execution.SparkPlan.executeCollectIterator(SparkPlan.scala:397)
  at org.apache.spark.sql.execution.exchange.BroadcastExchangeExec.$anonfun$relationFuture$1(BroadcastExchangeExec.scala:118)
  at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withThreadLocalCaptured$1(SQLExecution.scala:185)
  at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
  ...
```

I've checked the AQE code and I found `EnsureRequirements` wrongly puts `BroadcastExchange` on a top of `BroadcastQueryStage` in the `reOptimize` phase as follows:
```
+- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#2183]
  +- BroadcastQueryStage 2
    +- ReusedExchange [d_date_sk#1086], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#1963]
```
A root cause is that a `Cast` class in a required child's distribution does not have a `timeZoneId` field (`timeZoneId=None`), and a `Cast` class in `child.outputPartitioning` has it. So, this difference can make the distribution requirement check fail in `EnsureRequirements`:
https://github.com/apache/spark/blob/1e85707738a830d33598ca267a6740b3f06b1861/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala#L47-L50

The `Cast` class that does not have a `timeZoneId` field is generated in the `HashJoin` object. To fix this issue, this PR proposes to use the `CastSupport.cast` method there.

### Why are the changes needed?

Bugfix.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manually checked that q5 passed.

Closes #30818 from maropu/BugfixInAQE.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/execution/joins/HashJoin.scala  | 13 ++++----
 .../execution/joins/BroadcastJoinSuite.scala  | 33 +++++++++++--------
 2 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
index 0c75eda7a4ce2..53bd591d98a2e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
@@ -17,7 +17,8 @@
 
 package org.apache.spark.sql.execution.joins
 
-import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.{InternalRow, SQLConfHelper}
+import org.apache.spark.sql.catalyst.analysis.CastSupport
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences
 import org.apache.spark.sql.catalyst.expressions.codegen._
@@ -756,7 +757,7 @@ trait HashJoin extends BaseJoinExec with CodegenSupport {
   protected def prepareRelation(ctx: CodegenContext): HashedRelationInfo
 }
 
-object HashJoin {
+object HashJoin extends CastSupport with SQLConfHelper {
   /**
    * Try to rewrite the key as LongType so we can use getLong(), if they key can fit with a long.
    *
@@ -771,14 +772,14 @@ object HashJoin {
     }
 
     var keyExpr: Expression = if (keys.head.dataType != LongType) {
-      Cast(keys.head, LongType)
+      cast(keys.head, LongType)
     } else {
       keys.head
     }
     keys.tail.foreach { e =>
       val bits = e.dataType.defaultSize * 8
       keyExpr = BitwiseOr(ShiftLeft(keyExpr, Literal(bits)),
-        BitwiseAnd(Cast(e, LongType), Literal((1L << bits) - 1)))
+        BitwiseAnd(cast(e, LongType), Literal((1L << bits) - 1)))
     }
     keyExpr :: Nil
   }
@@ -791,13 +792,13 @@ object HashJoin {
     // jump over keys that have a higher index value than the required key
     if (keys.size == 1) {
       assert(index == 0)
-      Cast(BoundReference(0, LongType, nullable = false), keys(index).dataType)
+      cast(BoundReference(0, LongType, nullable = false), keys(index).dataType)
     } else {
       val shiftedBits =
         keys.slice(index + 1, keys.size).map(_.dataType.defaultSize * 8).sum
       val mask = (1L << (keys(index).dataType.defaultSize * 8)) - 1
       // build the schema for unpacking the required key
-      Cast(BitwiseAnd(
+      cast(BitwiseAnd(
         ShiftRightUnsigned(BoundReference(0, LongType, nullable = false), Literal(shiftedBits)),
         Literal(mask)), keys(index).dataType)
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
index 044e9ace6243f..98a1089709b92 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
@@ -242,33 +242,40 @@ abstract class BroadcastJoinSuiteBase extends QueryTest with SQLTestUtils
     assert(HashJoin.rewriteKeyExpr(l :: l :: Nil) === l :: l :: Nil)
     assert(HashJoin.rewriteKeyExpr(l :: i :: Nil) === l :: i :: Nil)
 
-    assert(HashJoin.rewriteKeyExpr(i :: Nil) === Cast(i, LongType) :: Nil)
+    assert(HashJoin.rewriteKeyExpr(i :: Nil) ===
+      Cast(i, LongType, Some(conf.sessionLocalTimeZone)) :: Nil)
     assert(HashJoin.rewriteKeyExpr(i :: l :: Nil) === i :: l :: Nil)
     assert(HashJoin.rewriteKeyExpr(i :: i :: Nil) ===
-      BitwiseOr(ShiftLeft(Cast(i, LongType), Literal(32)),
-        BitwiseAnd(Cast(i, LongType), Literal((1L << 32) - 1))) :: Nil)
+      BitwiseOr(ShiftLeft(Cast(i, LongType, Some(conf.sessionLocalTimeZone)), Literal(32)),
+        BitwiseAnd(Cast(i, LongType, Some(conf.sessionLocalTimeZone)), Literal((1L << 32) - 1))) ::
+        Nil)
     assert(HashJoin.rewriteKeyExpr(i :: i :: i :: Nil) === i :: i :: i :: Nil)
 
-    assert(HashJoin.rewriteKeyExpr(s :: Nil) === Cast(s, LongType) :: Nil)
+    assert(HashJoin.rewriteKeyExpr(s :: Nil) ===
+      Cast(s, LongType, Some(conf.sessionLocalTimeZone)) :: Nil)
     assert(HashJoin.rewriteKeyExpr(s :: l :: Nil) === s :: l :: Nil)
     assert(HashJoin.rewriteKeyExpr(s :: s :: Nil) ===
-      BitwiseOr(ShiftLeft(Cast(s, LongType), Literal(16)),
-        BitwiseAnd(Cast(s, LongType), Literal((1L << 16) - 1))) :: Nil)
+      BitwiseOr(ShiftLeft(Cast(s, LongType, Some(conf.sessionLocalTimeZone)), Literal(16)),
+        BitwiseAnd(Cast(s, LongType, Some(conf.sessionLocalTimeZone)), Literal((1L << 16) - 1))) ::
+        Nil)
     assert(HashJoin.rewriteKeyExpr(s :: s :: s :: Nil) ===
       BitwiseOr(ShiftLeft(
-        BitwiseOr(ShiftLeft(Cast(s, LongType), Literal(16)),
-          BitwiseAnd(Cast(s, LongType), Literal((1L << 16) - 1))),
+        BitwiseOr(ShiftLeft(Cast(s, LongType, Some(conf.sessionLocalTimeZone)), Literal(16)),
+          BitwiseAnd(Cast(s, LongType, Some(conf.sessionLocalTimeZone)), Literal((1L << 16) - 1))),
         Literal(16)),
-        BitwiseAnd(Cast(s, LongType), Literal((1L << 16) - 1))) :: Nil)
+        BitwiseAnd(Cast(s, LongType, Some(conf.sessionLocalTimeZone)), Literal((1L << 16) - 1))) ::
+        Nil)
     assert(HashJoin.rewriteKeyExpr(s :: s :: s :: s :: Nil) ===
       BitwiseOr(ShiftLeft(
         BitwiseOr(ShiftLeft(
-          BitwiseOr(ShiftLeft(Cast(s, LongType), Literal(16)),
-            BitwiseAnd(Cast(s, LongType), Literal((1L << 16) - 1))),
+          BitwiseOr(ShiftLeft(Cast(s, LongType, Some(conf.sessionLocalTimeZone)), Literal(16)),
+            BitwiseAnd(Cast(s, LongType, Some(conf.sessionLocalTimeZone)),
+              Literal((1L << 16) - 1))),
           Literal(16)),
-          BitwiseAnd(Cast(s, LongType), Literal((1L << 16) - 1))),
+          BitwiseAnd(Cast(s, LongType, Some(conf.sessionLocalTimeZone)), Literal((1L << 16) - 1))),
         Literal(16)),
-        BitwiseAnd(Cast(s, LongType), Literal((1L << 16) - 1))) :: Nil)
+        BitwiseAnd(Cast(s, LongType, Some(conf.sessionLocalTimeZone)), Literal((1L << 16) - 1))) ::
+        Nil)
     assert(HashJoin.rewriteKeyExpr(s :: s :: s :: s :: s :: Nil) ===
       s :: s :: s :: s :: s :: Nil)
 

From 6315118676c99ccef2566c50ab9873de8876e468 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Fri, 18 Dec 2020 10:03:07 +0900
Subject: [PATCH 0802/1009] [SPARK-33824][PYTHON][DOCS] Restructure and improve
 Python package management page

### What changes were proposed in this pull request?

This PR proposes to restructure and refine the Python dependency management page.
I lately wrote a blog post which will be published soon, and decided contribute some of the contents back to PySpark documentation.
FWIW, it has been reviewed by some tech writers and engineers.

I built the site for making the review easier: https://hyukjin-spark.readthedocs.io/en/stable/user_guide/python_packaging.html

### Why are the changes needed?

For better documentation.

### Does this PR introduce _any_ user-facing change?

It's doc change but only in unreleased bracnhs for now.

### How was this patch tested?

I manually built the docs as:

```bash
cd python/docs
make clean html
open
```

Closes #30822 from HyukjinKwon/SPARK-33824.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../source/user_guide/python_packaging.rst    | 200 +++++++++++-------
 1 file changed, 125 insertions(+), 75 deletions(-)

diff --git a/python/docs/source/user_guide/python_packaging.rst b/python/docs/source/user_guide/python_packaging.rst
index 0aff6dc1d16b4..71d8e532f9361 100644
--- a/python/docs/source/user_guide/python_packaging.rst
+++ b/python/docs/source/user_guide/python_packaging.rst
@@ -17,7 +17,7 @@
 
 
 =========================
-3rd Party Python Packages
+Python Package Management
 =========================
 
 When you want to run your PySpark application on a cluster such as YARN, Kubernetes, Mesos, etc., you need to make
@@ -51,10 +51,11 @@ Here is the script ``app.py`` from the previous example that will be executed on
         main(SparkSession.builder.getOrCreate())
 
 
-There are multiple ways to ship the dependencies to the cluster:
+There are multiple ways to manage Python dependencies in the cluster:
 
 - Using PySpark Native Features
-- Using Zipped Virtual Environment
+- Using Conda
+- Using Virtualenv
 - Using PEX
 
 
@@ -62,54 +63,51 @@ Using PySpark Native Features
 -----------------------------
 
 PySpark allows to upload Python files (``.py``), zipped Python packages (``.zip``), and Egg files (``.egg``)
-to the executors by setting the configuration setting ``spark.submit.pyFiles`` or by directly
-calling :meth:`pyspark.SparkContext.addPyFile`.
+to the executors by:
 
-This is an easy way to ship additional custom Python code to the cluster. You can just add individual files or zip whole
-packages and upload them. Using :meth:`pyspark.SparkContext.addPyFile` allows to upload code
-even after having started your job.
+- Setting the configuration setting ``spark.submit.pyFiles``
+- Setting ``--py-files`` option in Spark scripts
+- Directly calling :meth:`pyspark.SparkContext.addPyFile` in applications
 
-Note that it doesn't allow to add packages built as `Wheels <https://www.python.org/dev/peps/pep-0427/>`_ and therefore doesn't
-allow to include dependencies with native code.
+This is a straightforward method to ship additional custom Python code to the cluster. You can just add individual files or zip whole
+packages and upload them. Using :meth:`pyspark.SparkContext.addPyFile` allows to upload code even after having started your job.
 
+However, it does not allow to add packages built as `Wheels <https://www.python.org/dev/peps/pep-0427/>`_ and therefore
+does not allow to include dependencies with native code.
 
-Using Zipped Virtual Environment
---------------------------------
 
-The idea of zipped environments is to zip your whole `virtual environment <https://docs.python.org/3/tutorial/venv.html>`_, 
-ship it to the cluster, unzip it remotely and target the Python interpreter from inside this zipped environment.
+Using Conda
+-----------
 
-Zip Virtual Environment
-~~~~~~~~~~~~~~~~~~~~~~~
+`Conda <https://docs.conda.io/en/latest/>`_ is one of the most widely-used Python package management systems. PySpark users can directly
+use a Conda environment to ship their third-party Python packages by leveraging
+`conda-pack <https://conda.github.io/conda-pack/spark.html>`_ which is a command line tool creating
+relocatable Conda environments.
 
-You can zip the virtual environment on your own or use tools for doing this:
-
-* `conda-pack <https://conda.github.io/conda-pack/spark.html>`_ for conda environments
-* `venv-pack <https://jcristharif.com/venv-pack/spark.html>`_ for virtual environments
-
-Example with `conda-pack`:
+The example below creates a Conda environment to use on both the driver and executor and packs
+it into an archive file. This archive file captures the Conda environment for Python and stores
+both Python interpreter and all its relevant dependencies.
 
 .. code-block:: bash
 
-    conda create -y -n pyspark_env -c conda-forge pyarrow==2.0.0 pandas==1.1.4 conda-pack==0.5.0
-    conda activate pyspark_env
-    conda pack -f -o pyspark_env.tar.gz
-
-Upload to Spark Executors
-~~~~~~~~~~~~~~~~~~~~~~~~~
+    conda create -y -n pyspark_conda_env -c conda-forge pyarrow pandas conda-pack
+    conda activate pyspark_conda_env
+    conda pack -f -o pyspark_conda_env.tar.gz
 
-Unzipping will be done by Spark when using target ``--archives`` option in spark-submit
-or setting ``spark.archives`` configuration.
+After that, you can ship it together with scripts or in the code by using the ``--archives`` option
+or ``spark.archives`` configuration (``spark.yarn.dist.archives`` in YARN). It automatically unpacks the archive on executors.
 
-Example with ``spark-submit``:
+In the case of a ``spark-submit`` script, you can use it as follows:
 
 .. code-block:: bash
 
     export PYSPARK_DRIVER_PYTHON=python
     export PYSPARK_PYTHON=./environment/bin/python
-    spark-submit --master=... --archives pyspark_env.tar.gz#environment app.py
+    spark-submit --archives pyspark_conda_env.tar.gz#environment app.py
 
-Example using ``SparkSession.builder``:
+Note that ``PYSPARK_DRIVER_PYTHON`` above is not required for cluster modes in YARN or Kubernetes.
+
+If you’re on a regular Python shell or notebook, you can try it as shown below:
 
 .. code-block:: python
 
@@ -118,67 +116,117 @@ Example using ``SparkSession.builder``:
     from app import main
 
     os.environ['PYSPARK_PYTHON'] = "./environment/bin/python"
-    spark = SparkSession.builder.master("...").config("spark.archives", "pyspark_env.tar.gz#environment").getOrCreate()
+    spark = SparkSession.builder.config(
+        "spark.archives",  # 'spark.yarn.dist.archives' in YARN.
+        "pyspark_conda_env.tar.gz#environment").getOrCreate()
     main(spark)
 
-Example with ``pyspark`` shell:
+For a pyspark shell:
 
 .. code-block:: bash
 
     export PYSPARK_DRIVER_PYTHON=python
     export PYSPARK_PYTHON=./environment/bin/python
-    pyspark  --master=... --archives pyspark_env.tar.gz#environment
+    pyspark --archives pyspark_conda_env.tar.gz#environment
 
 
-Using PEX
----------
+Using Virtualenv
+----------------
 
-`PEX <https://github.com/pantsbuild/pex>`_ is a library for generating ``.pex`` (Python EXecutable) files.
-A PEX file is a self-contained executable Python environment. It can be seen as the Python equivalent of Java uber-JARs (a.k.a. fat JARs).
+`Virtualenv <https://virtualenv.pypa.io/en/latest/>`_  is a Python tool to create isolated Python environments.
+Since Python 3.3, a subset of its features has been integrated into Python as a standard library under
+the `venv <https://docs.python.org/3/library/venv.html>`_ module. PySpark users can use virtualenv to manage
+Python dependencies in their clusters by using `venv-pack <https://jcristharif.com/venv-pack/index.html>`_
+in a similar way as conda-pack.
 
-You need to build the PEX file somewhere with all your requirements and then upload it to each Spark executor.
+A virtual environment to use on both driver and executor can be created as demonstrated below.
+It packs the current virtual environment to an archive file, and It self-contains both Python interpreter
+and the dependencies.
 
-Using CLI to Build PEX file
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. code-block:: bash
 
-    pex pyspark==3.0.1 pyarrow==0.15.1 pandas==0.25.3 -o myarchive.pex
+    python -m venv pyspark_venv
+    source pyspark_venv/bin/activate
+    pip install pyarrow pandas venv-pack
+    venv-pack -o pyspark_venv.tar.gz
 
+You can directly pass/unpack the archive file and enable the environment on executors by leveraging
+the ``--archives`` option or ``spark.archives`` configuration (``spark.yarn.dist.archives`` in YARN).
 
-Invoking the PEX file will by default invoke the Python interpreter. pyarrow, pandas and pyspark will be included in the PEX file.
+For ``spark-submit``, you can use it by running the command as follows. Also, notice that
+``PYSPARK_DRIVER_PYTHON`` is not necessary in Kubernetes or YARN cluster modes.
 
 .. code-block:: bash
 
-    ./myarchive.pex
-    Python 3.6.6 (default, Jan 26 2019, 16:53:05)
-    (InteractiveConsole)
-    >>> import pyarrow
-    >>> import pandas
-    >>> import pyspark
-    >>>
+    export PYSPARK_DRIVER_PYTHON=python
+    export PYSPARK_PYTHON=./environment/bin/python
+    spark-submit --archives pyspark_venv.tar.gz#environment app.py
 
-This can also be done directly with the Python API. For more information on how to build PEX files,
-please refer to `Building .pex files <https://pex.readthedocs.io/en/stable/buildingpex.html>`_
+For regular Python shells or notebooks:
 
-Upload to Spark Executors
-~~~~~~~~~~~~~~~~~~~~~~~~~
+.. code-block:: bash
 
-The upload can be done by setting ``--files`` option in spark-submit or setting ``spark.files`` configuration (``spark.yarn.dist.files`` on YARN) 
-and changing the ``PYSPARK_PYTHON`` environment variable to change the Python interpreter to the PEX executable on each executor.
+    import os
+    from pyspark.sql import SparkSession
+    from app import main
 
-..
-   TODO: we should also document the way on other cluster modes.
+    os.environ['PYSPARK_PYTHON'] = "./environment/bin/python"
+    spark = SparkSession.builder.config(
+        "spark.archives",  # 'spark.yarn.dist.archives' in YARN.
+        "pyspark_venv.tar.gz#environment").getOrCreate()
+    main(spark)
 
-Example with ``spark-submit`` on YARN:
+In the case of a pyspark shell:
 
 .. code-block:: bash
 
     export PYSPARK_DRIVER_PYTHON=python
-    export PYSPARK_PYTHON=./myarchive.pex
-    spark-submit --master=yarn --deploy-mode client --files myarchive.pex app.py
+    export PYSPARK_PYTHON=./environment/bin/python
+    pyspark --archives pyspark_venv.tar.gz#environment
+
+
+Using PEX
+---------
 
-Example using ``SparkSession.builder`` on YARN:
+PySpark can also use `PEX <https://github.com/pantsbuild/pex>`_ to ship the Python packages
+together. PEX is a tool that creates a self-contained Python environment. This is similar
+to Conda or virtualenv, but a ``.pex`` file is executable by itself.
+
+The following example creates a ``.pex`` file for the driver and executor to use.
+The file contains the Python dependencies specified with the ``pex`` command.
+
+.. code-block:: bash
+
+    pip install pyarrow pandas pex
+    pex pyspark pyarrow pandas -o pyspark_pex_env.pex
+
+This file behaves similarly with a regular Python interpreter.
+
+.. code-block:: bash
+
+    ./pyspark_pex_env.pex -c "import pandas; print(pandas.__version__)"
+    1.1.5
+
+However, ``.pex`` file does not include a Python interpreter itself under the hood so all
+nodes in a cluster should have the same Python interpreter installed.
+
+In order to transfer and use the ``.pex`` file in a cluster, you should ship it via the
+``spark.files`` configuration (``spark.yarn.dist.files`` in YARN) or ``--files`` option because they are regular files instead
+of directories or archive files.
+
+For application submission, you run the commands as shown below.
+Note that ``PYSPARK_DRIVER_PYTHON`` is not needed for cluster modes in YARN or Kubernetes,
+and you may also need to set ``PYSPARK_PYTHON`` environment variable on
+the AppMaster ``--conf spark.yarn.appMasterEnv.PYSPARK_PYTHON=./myarchive.pex`` in YARN cluster mode.
+
+.. code-block:: bash
+
+    export PYSPARK_DRIVER_PYTHON=python
+    export PYSPARK_PYTHON=./pyspark_pex_env.pex
+    spark-submit --files pyspark_pex_env.pex app.py
+
+For regular Python shells or notebooks:
 
 .. code-block:: python
 
@@ -186,19 +234,21 @@ Example using ``SparkSession.builder`` on YARN:
     from pyspark.sql import SparkSession
     from app import main
 
-    os.environ['PYSPARK_PYTHON']="./myarchive.pex"
-    builder = SparkSession.builder
-    builder.master("yarn") \
-         .config("spark.submit.deployMode", "client") \
-         .config("spark.yarn.dist.files", "myarchive.pex")
-    spark = builder.getOrCreate()
+    os.environ['PYSPARK_PYTHON'] = "./pyspark_pex_env.pex"
+    spark = SparkSession.builder.config(
+        "spark.files",  # 'spark.yarn.dist.files' in YARN.
+        "pyspark_pex_env.pex").getOrCreate()
     main(spark)
 
-Notes
-~~~~~
+For the interactive pyspark shell, the commands are almost the same:
 
-* The Python interpreter that has been used to generate the PEX file must be available on each executor. PEX doesn't include the Python interpreter.
+.. code-block:: bash
 
-* In YARN cluster mode you may also need to set ``PYSPARK_PYTHON`` environment variable on the AppMaster ``--conf spark.yarn.appMasterEnv.PYSPARK_PYTHON=./myarchive.pex``.
+    export PYSPARK_DRIVER_PYTHON=python
+    export PYSPARK_PYTHON=./pyspark_pex_env.pex
+    pyspark --files pyspark_pex_env.pex
 
-* An end-to-end Docker example for deploying a standalone PySpark with ``SparkSession.builder`` and PEX can be found `here <https://github.com/criteo/cluster-pack/blob/master/examples/spark-with-S3/README.md>`_ - it uses cluster-pack, a library on top of PEX that automatizes the the intermediate step of having to create & upload the PEX manually.
+An end-to-end Docker example for deploying a standalone PySpark with ``SparkSession.builder`` and PEX
+can be found `here <https://github.com/criteo/cluster-pack/blob/master/examples/spark-with-S3/README.md>`_
+- it uses cluster-pack, a library on top of PEX that automatizes the the intermediate step of having
+to create & upload the PEX manually.

From 42e1831ebb19be15921a2ac612dfdac47639edeb Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Fri, 18 Dec 2020 10:48:51 +0900
Subject: [PATCH 0803/1009] [SPARK-33797][SS][DOCS] Update SS doc about State
 Store and task locality

### What changes were proposed in this pull request?

This updates SS documentation to document about State Store and task locality.

### Why are the changes needed?

During running some tests for structured streaming, I found state store locality becomes an issue sometimes and it is not very straightforward for end-users. It'd be great if we can document it.

### Does this PR introduce _any_ user-facing change?

No, only doc change.

### How was this patch tested?

No, only doc change.

Closes #30789 from viirya/ss-statestore-doc.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../structured-streaming-programming-guide.md | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index 6995ee2475aee..bea38ed7d805d 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -1689,6 +1689,28 @@ hence the number is not same as the number of original input rows. You'd like to
 There's a known workaround: split your streaming query into multiple queries per stateful operator, and ensure
 end-to-end exactly once per query. Ensuring end-to-end exactly once for the last query is optional.
 
+### State Store and task locality
+
+The stateful operations store states for events in state stores of executors. State stores occupy resources such as memory and disk space to store the states.
+So it is more efficient to keep a state store provider running in the same executor across different streaming batches.
+Changing the location of a state store provider requires the extra overhead of loading checkpointed states. The overhead of loading state from checkpoint depends
+on the external storage and the size of the state, which tends to hurt the latency of micro-batch run. For some use cases such as processing very large state data,
+loading new state store providers from checkpointed states can be very time-consuming and inefficient.
+
+The stateful operations in Structured Streaming queries rely on the preferred location feature of Spark's RDD to run the state store provider on the same executor.
+If in the next batch the corresponding state store provider is scheduled on this executor again, it could reuse the previous states and save the time of loading checkpointed states.
+
+However, generally the preferred location is not a hard requirement and it is still possible that Spark schedules tasks to the executors other than the preferred ones.
+In this case, Spark will load state store providers from checkpointed states on new executors. The state store providers run in the previous batch will not be unloaded immediately.
+Spark runs a maintenance task which checks and unloads the state store providers that are inactive on the executors.
+
+By changing the Spark configurations related to task scheduling, for example `spark.locality.wait`, users can configure Spark how long to wait to launch a data-local task.
+For stateful operations in Structured Streaming, it can be used to let state store providers running on the same executors across batches.
+
+Specifically for built-in HDFS state store provider, users can check the state store metrics such as `loadedMapCacheHitCount` and `loadedMapCacheMissCount`. Ideally,
+it is best if cache missing count is minimized that means Spark won't waste too much time on loading checkpointed state.
+User can increase Spark locality waiting configurations to avoid loading state store providers in different executors across batches.
+
 ## Starting Streaming Queries
 Once you have defined the final result DataFrame/Dataset, all that is left is for you to start the streaming computation. To do that, you have to use the `DataStreamWriter`
 ([Scala](api/scala/org/apache/spark/sql/streaming/DataStreamWriter.html)/[Java](api/java/org/apache/spark/sql/streaming/DataStreamWriter.html)/[Python](api/python/pyspark.sql.html#pyspark.sql.streaming.DataStreamWriter) docs)

From 131a23d88a56280d47584aed93bc8fb617550717 Mon Sep 17 00:00:00 2001
From: Sean Owen <srowen@gmail.com>
Date: Thu, 17 Dec 2020 19:09:57 -0800
Subject: [PATCH 0804/1009] [SPARK-33831][UI] Update to jetty 9.4.34

### What changes were proposed in this pull request?

Update Jetty to 9.4.34

### Why are the changes needed?

Picks up fixes and improvements, including a possible CVE fix.

https://github.com/eclipse/jetty.project/releases/tag/jetty-9.4.33.v20201020
https://github.com/eclipse/jetty.project/releases/tag/jetty-9.4.34.v20201102

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #30828 from srowen/SPARK-33831.

Authored-by: Sean Owen <srowen@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 78d1fe7d54350..72e285bb2ba6e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -137,7 +137,7 @@
     <derby.version>10.12.1.1</derby.version>
     <parquet.version>1.10.1</parquet.version>
     <orc.version>1.6.6</orc.version>
-    <jetty.version>9.4.28.v20200408</jetty.version>
+    <jetty.version>9.4.34.v20201102</jetty.version>
     <jakartaservlet.version>4.0.3</jakartaservlet.version>
     <chill.version>0.9.5</chill.version>
     <ivy.version>2.4.0</ivy.version>

From 0f1a18370a1a95a2b7943519584af7a0dff42ae8 Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Fri, 18 Dec 2020 04:30:15 +0000
Subject: [PATCH 0805/1009] [SPARK-33817][SQL] CACHE TABLE uses a logical plan
 when caching a query to avoid creating a dataframe

### What changes were proposed in this pull request?

This PR proposes to update `CACHE TABLE` to use a `LogicalPlan` when caching a query to avoid creating a `DataFrame` as suggested here: https://github.com/apache/spark/pull/30743#discussion_r543123190

For reference, `UNCACHE TABLE` also uses `LogicalPlan`: https://github.com/apache/spark/blob/0c129001201ccb63ae96f576b6f354da84024fb3/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala#L91-L98

### Why are the changes needed?

To avoid creating an unnecessary dataframe and make it consistent with `uncacheQuery` used in `UNCACHE TABLE`.

### Does this PR introduce _any_ user-facing change?

No, just internal changes.

### How was this patch tested?

Existing tests since this is an internal refactoring change.

Closes #30815 from imback82/cache_with_logical_plan.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  8 +++-
 .../spark/sql/execution/CacheManager.scala    | 26 ++++++++++++-
 .../datasources/v2/CacheTableExec.scala       | 38 ++++++++++++-------
 .../datasources/v2/DataSourceV2Strategy.scala |  5 +--
 4 files changed, 57 insertions(+), 20 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 0bef6998b177d..10c8ac58840f2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1107,12 +1107,16 @@ class Analyzer(override val catalogManager: CatalogManager)
 
       case c @ CacheTable(u @ UnresolvedRelation(_, _, false), _, _, _) =>
         lookupRelation(u.multipartIdentifier, u.options, false)
-          .map(relation => c.copy(table = EliminateSubqueryAliases(relation)))
+          .map(resolveViews)
+          .map(EliminateSubqueryAliases(_))
+          .map(relation => c.copy(table = relation))
           .getOrElse(c)
 
       case c @ UncacheTable(u @ UnresolvedRelation(_, _, false), _, _) =>
         lookupRelation(u.multipartIdentifier, u.options, false)
-          .map(relation => c.copy(table = EliminateSubqueryAliases(relation)))
+          .map(resolveViews)
+          .map(EliminateSubqueryAliases(_))
+          .map(relation => c.copy(table = relation))
           .getOrElse(c)
 
       // TODO (SPARK-27484): handle streaming write commands when we have them.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index f163d85914bc9..b3671945e5891 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -88,12 +88,34 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
       query: Dataset[_],
       tableName: Option[String] = None,
       storageLevel: StorageLevel = MEMORY_AND_DISK): Unit = {
-    val planToCache = query.logicalPlan
+    cacheQuery(query.sparkSession, query.logicalPlan, tableName, storageLevel)
+  }
+
+  /**
+   * Caches the data produced by the given [[LogicalPlan]].
+   * Unlike `RDD.cache()`, the default storage level is set to be `MEMORY_AND_DISK` because
+   * recomputing the in-memory columnar representation of the underlying table is expensive.
+   */
+  def cacheQuery(
+      spark: SparkSession,
+      planToCache: LogicalPlan,
+      tableName: Option[String]): Unit = {
+    cacheQuery(spark, planToCache, tableName, MEMORY_AND_DISK)
+  }
+
+  /**
+   * Caches the data produced by the given [[LogicalPlan]].
+   */
+  def cacheQuery(
+      spark: SparkSession,
+      planToCache: LogicalPlan,
+      tableName: Option[String],
+      storageLevel: StorageLevel): Unit = {
     if (lookupCachedData(planToCache).nonEmpty) {
       logWarning("Asked to cache already cached data.")
     } else {
       val sessionWithConfigsOff = SparkSession.getOrCloneSessionWithConfigsOff(
-        query.sparkSession, forceDisableConfigs)
+        spark, forceDisableConfigs)
       val inMemoryRelation = sessionWithConfigsOff.withActive {
         val qe = sessionWithConfigsOff.sessionState.executePlan(planToCache)
         InMemoryRelation(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala
index 2d8e5b5e286b8..4a7152232e8fa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import java.util.Locale
 
-import org.apache.spark.sql.{DataFrame, Dataset}
+import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
@@ -29,10 +29,13 @@ import org.apache.spark.storage.StorageLevel
 
 trait BaseCacheTableExec extends V2CommandExec {
   def relationName: String
-  def dataFrameToCache: DataFrame
+  def planToCache: LogicalPlan
+  def dataFrameForCachedPlan: DataFrame
   def isLazy: Boolean
   def options: Map[String, String]
 
+  protected val sparkSession: SparkSession = sqlContext.sparkSession
+
   override def run(): Seq[InternalRow] = {
     val storageLevelKey = "storagelevel"
     val storageLevelValue =
@@ -42,20 +45,22 @@ trait BaseCacheTableExec extends V2CommandExec {
       logWarning(s"Invalid options: ${withoutStorageLevel.mkString(", ")}")
     }
 
-    val sparkSession = sqlContext.sparkSession
-    val df = dataFrameToCache
     if (storageLevelValue.nonEmpty) {
       sparkSession.sharedState.cacheManager.cacheQuery(
-        df,
+        sparkSession,
+        planToCache,
         Some(relationName),
         StorageLevel.fromString(storageLevelValue.get))
     } else {
-      sparkSession.sharedState.cacheManager.cacheQuery(df, Some(relationName))
+      sparkSession.sharedState.cacheManager.cacheQuery(
+        sparkSession,
+        planToCache,
+        Some(relationName))
     }
 
     if (!isLazy) {
-      // Performs eager caching
-      df.count()
+      // Performs eager caching.
+      dataFrameForCachedPlan.count()
     }
 
     Seq.empty
@@ -69,9 +74,13 @@ case class CacheTableExec(
     multipartIdentifier: Seq[String],
     override val isLazy: Boolean,
     override val options: Map[String, String]) extends BaseCacheTableExec {
-  override def relationName: String = multipartIdentifier.quoted
+  override lazy val relationName: String = multipartIdentifier.quoted
+
+  override lazy val planToCache: LogicalPlan = relation
 
-  override def dataFrameToCache: DataFrame = Dataset.ofRows(sqlContext.sparkSession, relation)
+  override lazy val dataFrameForCachedPlan: DataFrame = {
+    Dataset.ofRows(sparkSession, planToCache)
+  }
 }
 
 case class CacheTableAsSelectExec(
@@ -79,11 +88,14 @@ case class CacheTableAsSelectExec(
     query: LogicalPlan,
     override val isLazy: Boolean,
     override val options: Map[String, String]) extends BaseCacheTableExec {
-  override def relationName: String = tempViewName
+  override lazy val relationName: String = tempViewName
 
-  override def dataFrameToCache: DataFrame = {
-    val sparkSession = sqlContext.sparkSession
+  override lazy val planToCache: LogicalPlan = {
     Dataset.ofRows(sparkSession, query).createTempView(tempViewName)
+    dataFrameForCachedPlan.logicalPlan
+  }
+
+  override lazy val dataFrameForCachedPlan: DataFrame = {
     sparkSession.table(tempViewName)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index f9c89051e421a..c40d2ab9cba4e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.sql.{AnalysisException, Dataset, SparkSession, Strategy}
+import org.apache.spark.sql.{AnalysisException, SparkSession, Strategy}
 import org.apache.spark.sql.catalyst.analysis.{ResolvedNamespace, ResolvedPartitionSpec, ResolvedTable}
 import org.apache.spark.sql.catalyst.expressions.{And, Expression, NamedExpression, PredicateHelper, SubqueryExpression}
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
@@ -66,8 +66,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       val cacheLevel = cache.get.cachedRepresentation.cacheBuilder.storageLevel
 
       // recache with the same name and cache level.
-      val ds = Dataset.ofRows(session, v2Relation)
-      session.sharedState.cacheManager.cacheQuery(ds, cacheName, cacheLevel)
+      session.sharedState.cacheManager.cacheQuery(session, v2Relation, cacheName, cacheLevel)
     }
   }
 

From 25c6cc25f74e8a24aa424f6596a574f26ae80e1d Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Fri, 18 Dec 2020 14:24:53 +0900
Subject: [PATCH 0806/1009] [SPARK-26341][WEBUI] Expose executor memory metrics
 at the stage level, in the Stages tab

### What changes were proposed in this pull request?
Expose executor memory metrics at the stage level, in the Stages tab,
Current like below, and I am not sure which column we will truly need.
![image](https://user-images.githubusercontent.com/46485123/101170248-2256f900-3679-11eb-8c34-794fcf8e94a8.png)

![image](https://user-images.githubusercontent.com/46485123/101170359-4dd9e380-3679-11eb-984b-b0430f236160.png)

![image](https://user-images.githubusercontent.com/46485123/101314915-86a1d480-3894-11eb-9b6f-8050d326e11f.png)

### Why are the changes needed?
User can know executor jvm usage more directly in SparkUI

### Does this PR introduce any user-facing change?
User can know executor jvm usage more directly in SparkUI

### How was this patch tested?
Manual Tested

Closes #30573 from AngersZhuuuu/SPARK-26341.

Authored-by: angerszhu <angers.zhu@gmail.com>
Signed-off-by: Kousuke Saruta <sarutak@oss.nttdata.com>
---
 .../org/apache/spark/ui/static/stagepage.js   | 163 +++++++++++++++---
 .../spark/ui/static/stagespage-template.html  |   4 +
 .../spark/status/AppStatusListener.scala      |   3 +
 ...xcludeOnFailure_for_stage_expectation.json |  44 +++++
 ...eOnFailure_node_for_stage_expectation.json | 110 ++++++++++++
 .../one_stage_attempt_json_expectation.json   |  22 +++
 .../one_stage_json_expectation.json           |  22 +++
 ...age_with_accumulable_json_expectation.json |  22 +++
 8 files changed, 365 insertions(+), 25 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
index 2877aa819ab9e..336edff509300 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
@@ -243,23 +243,39 @@ function createRowMetadataForColumn(colKey, data, checkboxId) {
 }
 
 function reselectCheckboxesBasedOnTaskTableState() {
-    var allChecked = true;
+    var taskSummaryHasSelected = false;
+    var executorSummaryHasSelected = false;
+    var allTaskSummaryChecked = true;
+    var allExecutorSummaryChecked = true;
     var taskSummaryMetricsTableCurrentFilteredArray = taskSummaryMetricsTableCurrentStateArray.slice();
     if (typeof taskTableSelector !== 'undefined' && taskSummaryMetricsTableCurrentStateArray.length > 0) {
         for (var k = 0; k < optionalColumns.length; k++) {
             if (taskTableSelector.column(optionalColumns[k]).visible()) {
+                taskSummaryHasSelected = true;
                 $("#box-"+optionalColumns[k]).prop('checked', true);
                 taskSummaryMetricsTableCurrentStateArray.push(taskSummaryMetricsTableArray.filter(row => (row.checkboxId).toString() == optionalColumns[k])[0]);
                 taskSummaryMetricsTableCurrentFilteredArray = taskSummaryMetricsTableCurrentStateArray.slice();
             } else {
-                allChecked = false;
+                allTaskSummaryChecked = false;
             }
         }
-        if (allChecked) {
-            $("#box-0").prop('checked', true);
-        }
         createDataTableForTaskSummaryMetricsTable(taskSummaryMetricsTableCurrentFilteredArray);
     }
+
+    if (typeof executorSummaryTableSelector !== 'undefined') {
+        for (var k = 0; k < executorOptionalColumns.length; k++) {
+            if (executorSummaryTableSelector.column(executorOptionalColumns[k]).visible()) {
+                executorSummaryHasSelected = true;
+                $("#executor-box-"+executorOptionalColumns[k]).prop('checked', true);
+            } else {
+                allExecutorSummaryChecked = false;
+            }
+        }
+    }
+
+    if ((taskSummaryHasSelected || executorSummaryHasSelected) && allTaskSummaryChecked && allExecutorSummaryChecked) {
+        $("#box-0").prop('checked', true);
+    }
 }
 
 function getStageAttemptId() {
@@ -278,6 +294,9 @@ var taskSummaryMetricsDataTable;
 var optionalColumns = [11, 12, 13, 14, 15, 16, 17, 21];
 var taskTableSelector;
 
+var executorOptionalColumns = [15, 16, 17, 18];
+var executorSummaryTableSelector;
+
 $(document).ready(function () {
     setDataTableDefaults();
 
@@ -288,14 +307,18 @@ $(document).ready(function () {
         "</a></div>" +
         "<div class='container-fluid-div ml-4 d-none' id='toggle-metrics'>" +
         "<div id='select_all' class='select-all-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-0' data-column='0'> Select All</div>" +
-        "<div id='scheduler_delay' class='scheduler-delay-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-11' data-column='11'> Scheduler Delay</div>" +
-        "<div id='task_deserialization_time' class='task-deserialization-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-12' data-column='12'> Task Deserialization Time</div>" +
-        "<div id='shuffle_read_blocked_time' class='shuffle-read-blocked-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-13' data-column='13'> Shuffle Read Blocked Time</div>" +
-        "<div id='shuffle_remote_reads' class='shuffle-remote-reads-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-14' data-column='14'> Shuffle Remote Reads</div>" +
-        "<div id='shuffle_write_time' class='shuffle-write-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-21' data-column='21'> Shuffle Write Time</div>" +
-        "<div id='result_serialization_time' class='result-serialization-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-15' data-column='15'> Result Serialization Time</div>" +
-        "<div id='getting_result_time' class='getting-result-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-16' data-column='16'> Getting Result Time</div>" +
-        "<div id='peak_execution_memory' class='peak-execution-memory-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-17' data-column='17'> Peak Execution Memory</div>" +
+        "<div id='scheduler_delay' class='scheduler-delay-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-11' data-column='11' data-metrics-type='task'> Scheduler Delay</div>" +
+        "<div id='task_deserialization_time' class='task-deserialization-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-12' data-column='12' data-metrics-type='task'> Task Deserialization Time</div>" +
+        "<div id='shuffle_read_blocked_time' class='shuffle-read-blocked-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-13' data-column='13' data-metrics-type='task'> Shuffle Read Blocked Time</div>" +
+        "<div id='shuffle_remote_reads' class='shuffle-remote-reads-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-14' data-column='14' data-metrics-type='task'> Shuffle Remote Reads</div>" +
+        "<div id='shuffle_write_time' class='shuffle-write-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-21' data-column='21' data-metrics-type='task'> Shuffle Write Time</div>" +
+        "<div id='result_serialization_time' class='result-serialization-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-15' data-column='15' data-metrics-type='task'> Result Serialization Time</div>" +
+        "<div id='getting_result_time' class='getting-result-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-16' data-column='16' data-metrics-type='task'> Getting Result Time</div>" +
+        "<div id='peak_execution_memory' class='peak-execution-memory-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-17' data-column='17' data-metrics-type='task'> Peak Execution Memory</div>" +
+        "<div id='executor_jvm_on_off_heap_memory' class='executor-jvm-metrics-checkbox-div'><input type='checkbox' class='toggle-vis' id='executor-box-15'  data-column='15' data-metrics-type='executor'> Peak JVM Memory OnHeap / OffHeap</div>" +
+        "<div id='executor_on_off_heap_execution_memory' class='executor-jvm-metrics-checkbox-div'><input type='checkbox' class='toggle-vis' id='executor-box-16' data-column='16' data-metrics-type='executor'> Peak Execution Memory OnHeap / OffHeap</div>" +
+        "<div id='executor_on_off_heap_storage_memory' class='executor-jvm-metrics-checkbox-div'><input type='checkbox' class='toggle-vis' id='executor-box-17' data-column='17' data-metrics-type='executor'> Peak Storage Memory OnHeap / OffHeap</div>" +
+        "<div id='executor_direct_mapped_pool_memory' class='executor-jvm-metrics-checkbox-div'><input type='checkbox' class='toggle-vis' id='executor-box-18' data-column='18' data-metrics-type='executor'> Peak Pool Memory Direct / Mapped</div>" +
         "</div>");
 
     $('#scheduler_delay').attr("data-toggle", "tooltip")
@@ -463,15 +486,95 @@ $(document).ready(function () {
                             data : function (row, type) {
                                 return typeof row.diskBytesSpilled != 'undefined' ? formatBytes(row.diskBytesSpilled, type) : "";
                             }
+                        },
+                        {
+                            data : function (row, type) {
+                                var peakMemoryMetrics = row.peakMemoryMetrics;
+                                if (typeof peakMemoryMetrics !== 'undefined') {
+                                    if (type !== 'display')
+                                        return peakMemoryMetrics.JVMHeapMemory;
+                                    else
+                                        return (formatBytes(peakMemoryMetrics.JVMHeapMemory, type) + ' / ' +
+                                            formatBytes(peakMemoryMetrics.JVMOffHeapMemory, type));
+                                } else {
+                                    if (type !== 'display') {
+                                        return 0;
+                                    } else {
+                                        return '0.0 B / 0.0 B';
+                                    }
+                                }
+
+                            }
+                        },
+                        {
+                            data : function (row, type) {
+                                 var peakMemoryMetrics = row.peakMemoryMetrics
+                                 if (typeof peakMemoryMetrics !== 'undefined') {
+                                     if (type !== 'display')
+                                         return peakMemoryMetrics.OnHeapExecutionMemory;
+                                     else
+                                         return (formatBytes(peakMemoryMetrics.OnHeapExecutionMemory, type) + ' / ' +
+                                             formatBytes(peakMemoryMetrics.OffHeapExecutionMemory, type));
+                                 } else {
+                                     if (type !== 'display') {
+                                         return 0;
+                                     } else {
+                                         return '0.0 B / 0.0 B';
+                                     }
+                                  }
+                            }
+                        },
+                        {
+                            data : function (row, type) {
+                                var peakMemoryMetrics = row.peakMemoryMetrics
+                                if (typeof peakMemoryMetrics !== 'undefined') {
+                                    if (type !== 'display')
+                                        return peakMemoryMetrics.OnHeapStorageMemory;
+                                    else
+                                        return (formatBytes(peakMemoryMetrics.OnHeapStorageMemory, type) + ' / ' +
+                                            formatBytes(peakMemoryMetrics.OffHeapStorageMemory, type));
+                                } else {
+                                    if (type !== 'display') {
+                                        return 0;
+                                    } else {
+                                        return '0.0 B / 0.0 B';
+                                    }
+                                }
+                            }
+                        },
+                        {
+                            data : function (row, type) {
+                                var peakMemoryMetrics = row.peakMemoryMetrics
+                                if (typeof peakMemoryMetrics !== 'undefined') {
+                                    if (type !== 'display')
+                                        return peakMemoryMetrics.DirectPoolMemory;
+                                    else
+                                        return (formatBytes(peakMemoryMetrics.DirectPoolMemory, type) + ' / ' +
+                                            formatBytes(peakMemoryMetrics.MappedPoolMemory, type));
+                                } else {
+                                    if (type !== 'display') {
+                                        return 0;
+                                    } else {
+                                        return '0.0 B / 0.0 B';
+                                    }
+                                }
+                            }
                         }
                     ],
+                    "columnDefs": [
+                        { "visible": false, "targets": 15 },
+                        { "visible": false, "targets": 16 },
+                        { "visible": false, "targets": 17 },
+                        { "visible": false, "targets": 18 }
+                    ],
+                    "deferRender": true,
                     "order": [[0, "asc"]],
                     "bAutoWidth": false,
                     "oLanguage": {
                         "sEmptyTable": "No data to show yet"
                     }
                 };
-                var executorSummaryTableSelector =
+                executorSummaryTableSelector =
                     $("#summary-executor-table").DataTable(executorSummaryConf);
                 $('#parent-container [data-toggle="tooltip"]').tooltip();
 
@@ -923,30 +1026,40 @@ $(document).ready(function () {
                     var para = $(this).attr('data-column');
                     if (para == "0") {
                         var allColumns = taskTableSelector.columns(optionalColumns);
+                        var executorAllColumns = executorSummaryTableSelector.columns(executorOptionalColumns);
                         if ($(this).is(":checked")) {
                             $(".toggle-vis").prop('checked', true);
                             allColumns.visible(true);
+                            executorAllColumns.visible(true);
                             createDataTableForTaskSummaryMetricsTable(taskSummaryMetricsTableArray);
                         } else {
                             $(".toggle-vis").prop('checked', false);
                             allColumns.visible(false);
+                            executorAllColumns.visible(false);
                             var taskSummaryMetricsTableFilteredArray =
                                 taskSummaryMetricsTableArray.filter(row => row.checkboxId < 11);
                             createDataTableForTaskSummaryMetricsTable(taskSummaryMetricsTableFilteredArray);
                         }
                     } else {
-                        var column = taskTableSelector.column(para);
-                        // Toggle the visibility
-                        column.visible(!column.visible());
-                        var taskSummaryMetricsTableFilteredArray = [];
-                        if ($(this).is(":checked")) {
-                            taskSummaryMetricsTableCurrentStateArray.push(taskSummaryMetricsTableArray.filter(row => (row.checkboxId).toString() == para)[0]);
-                            taskSummaryMetricsTableFilteredArray = taskSummaryMetricsTableCurrentStateArray.slice();
-                        } else {
-                            taskSummaryMetricsTableFilteredArray =
-                                taskSummaryMetricsTableCurrentStateArray.filter(row => (row.checkboxId).toString() != para);
+                        var dataMetricsType = $(this).attr("data-metrics-type");
+                        if (dataMetricsType === 'task') {
+                            var column = taskTableSelector.column(para);
+                            // Toggle the visibility
+                            column.visible(!column.visible());
+                            var taskSummaryMetricsTableFilteredArray = [];
+                            if ($(this).is(":checked")) {
+                                taskSummaryMetricsTableCurrentStateArray.push(taskSummaryMetricsTableArray.filter(row => (row.checkboxId).toString() == para)[0]);
+                                taskSummaryMetricsTableFilteredArray = taskSummaryMetricsTableCurrentStateArray.slice();
+                            } else {
+                                taskSummaryMetricsTableFilteredArray =
+                                    taskSummaryMetricsTableCurrentStateArray.filter(row => (row.checkboxId).toString() != para);
+                            }
+                            createDataTableForTaskSummaryMetricsTable(taskSummaryMetricsTableFilteredArray);
+                        }
+                        if (dataMetricsType === "executor") {
+                            var column = executorSummaryTableSelector.column(para);
+                            column.visible(!column.visible());
                         }
-                        createDataTableForTaskSummaryMetricsTable(taskSummaryMetricsTableFilteredArray);
                     }
                 });
 
diff --git a/core/src/main/resources/org/apache/spark/ui/static/stagespage-template.html b/core/src/main/resources/org/apache/spark/ui/static/stagespage-template.html
index 9b40d0dc4a230..b938158b77027 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/stagespage-template.html
+++ b/core/src/main/resources/org/apache/spark/ui/static/stagespage-template.html
@@ -59,6 +59,10 @@ <h4 class="title-table">Aggregated Metrics by Executor</h4>
                 <th><span id="executor-summary-shuffle-write">Shuffle Write Size / Records</span></th>
                 <th>Spill (Memory) </th>
                 <th>Spill (Disk) </th>
+                <th>Peak JVM Memory OnHeap / OffHeap</th>
+                <th>Peak Execution Memory OnHeap / OffHeap</th>
+                <th>Peak Storage Memory OnHeap / OffHeap</th>
+                <th>Peak Pool Memory Direct / Mapped</th>
                 </tr>
             </thead>
             <tbody>
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
index 5b0c1dc389af0..0722095cc6533 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
@@ -687,6 +687,9 @@ private[spark] class AppStatusListener(
         stage.killedSummary = killedTasksSummary(event.reason, stage.killedSummary)
       }
       stage.activeTasksPerExecutor(event.taskInfo.executorId) -= 1
+
+      stage.executorSummary(event.taskInfo.executorId).peakExecutorMetrics
+        .compareAndUpdatePeakValues(event.taskExecutorMetrics)
       // [SPARK-24415] Wait for all tasks to finish before removing stage from live list
       val removeStage =
         stage.activeTasks == 0 &&
diff --git a/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json
index a69940fa5a1a5..ab9a8b7ef885f 100644
--- a/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json
@@ -698,6 +698,28 @@
       "memoryBytesSpilled" : 0,
       "diskBytesSpilled" : 0,
       "isBlacklistedForStage" : true,
+      "peakMemoryMetrics" : {
+        "JVMHeapMemory" : 0,
+        "JVMOffHeapMemory" : 0,
+        "OnHeapExecutionMemory" : 0,
+        "OffHeapExecutionMemory" : 0,
+        "OnHeapStorageMemory" : 0,
+        "OffHeapStorageMemory" : 0,
+        "OnHeapUnifiedMemory" : 0,
+        "OffHeapUnifiedMemory" : 0,
+        "DirectPoolMemory" : 0,
+        "MappedPoolMemory" : 0,
+        "ProcessTreeJVMVMemory" : 0,
+        "ProcessTreeJVMRSSMemory" : 0,
+        "ProcessTreePythonVMemory" : 0,
+        "ProcessTreePythonRSSMemory" : 0,
+        "ProcessTreeOtherVMemory" : 0,
+        "ProcessTreeOtherRSSMemory" : 0,
+        "MinorGCCount" : 0,
+        "MinorGCTime" : 0,
+        "MajorGCCount" : 0,
+        "MajorGCTime" : 0
+      },
       "isExcludedForStage" : true
     },
     "1" : {
@@ -716,6 +738,28 @@
       "memoryBytesSpilled" : 0,
       "diskBytesSpilled" : 0,
       "isBlacklistedForStage" : false,
+      "peakMemoryMetrics" : {
+        "JVMHeapMemory" : 0,
+        "JVMOffHeapMemory" : 0,
+        "OnHeapExecutionMemory" : 0,
+        "OffHeapExecutionMemory" : 0,
+        "OnHeapStorageMemory" : 0,
+        "OffHeapStorageMemory" : 0,
+        "OnHeapUnifiedMemory" : 0,
+        "OffHeapUnifiedMemory" : 0,
+        "DirectPoolMemory" : 0,
+        "MappedPoolMemory" : 0,
+        "ProcessTreeJVMVMemory" : 0,
+        "ProcessTreeJVMRSSMemory" : 0,
+        "ProcessTreePythonVMemory" : 0,
+        "ProcessTreePythonRSSMemory" : 0,
+        "ProcessTreeOtherVMemory" : 0,
+        "ProcessTreeOtherRSSMemory" : 0,
+        "MinorGCCount" : 0,
+        "MinorGCTime" : 0,
+        "MajorGCCount" : 0,
+        "MajorGCTime" : 0
+      },
       "isExcludedForStage" : false
     }
   },
diff --git a/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json
index bda9caedbbe81..1c569c19894fd 100644
--- a/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json
@@ -806,6 +806,28 @@
       "memoryBytesSpilled" : 0,
       "diskBytesSpilled" : 0,
       "isBlacklistedForStage" : true,
+      "peakMemoryMetrics" : {
+        "JVMHeapMemory" : 0,
+        "JVMOffHeapMemory" : 0,
+        "OnHeapExecutionMemory" : 0,
+        "OffHeapExecutionMemory" : 0,
+        "OnHeapStorageMemory" : 0,
+        "OffHeapStorageMemory" : 0,
+        "OnHeapUnifiedMemory" : 0,
+        "OffHeapUnifiedMemory" : 0,
+        "DirectPoolMemory" : 0,
+        "MappedPoolMemory" : 0,
+        "ProcessTreeJVMVMemory" : 0,
+        "ProcessTreeJVMRSSMemory" : 0,
+        "ProcessTreePythonVMemory" : 0,
+        "ProcessTreePythonRSSMemory" : 0,
+        "ProcessTreeOtherVMemory" : 0,
+        "ProcessTreeOtherRSSMemory" : 0,
+        "MinorGCCount" : 0,
+        "MinorGCTime" : 0,
+        "MajorGCCount" : 0,
+        "MajorGCTime" : 0
+      },
       "isExcludedForStage" : true
     },
     "5" : {
@@ -824,6 +846,28 @@
       "memoryBytesSpilled" : 0,
       "diskBytesSpilled" : 0,
       "isBlacklistedForStage" : true,
+      "peakMemoryMetrics" : {
+        "JVMHeapMemory" : 0,
+        "JVMOffHeapMemory" : 0,
+        "OnHeapExecutionMemory" : 0,
+        "OffHeapExecutionMemory" : 0,
+        "OnHeapStorageMemory" : 0,
+        "OffHeapStorageMemory" : 0,
+        "OnHeapUnifiedMemory" : 0,
+        "OffHeapUnifiedMemory" : 0,
+        "DirectPoolMemory" : 0,
+        "MappedPoolMemory" : 0,
+        "ProcessTreeJVMVMemory" : 0,
+        "ProcessTreeJVMRSSMemory" : 0,
+        "ProcessTreePythonVMemory" : 0,
+        "ProcessTreePythonRSSMemory" : 0,
+        "ProcessTreeOtherVMemory" : 0,
+        "ProcessTreeOtherRSSMemory" : 0,
+        "MinorGCCount" : 0,
+        "MinorGCTime" : 0,
+        "MajorGCCount" : 0,
+        "MajorGCTime" : 0
+      },
       "isExcludedForStage" : true
     },
     "1" : {
@@ -842,6 +886,28 @@
       "memoryBytesSpilled" : 0,
       "diskBytesSpilled" : 0,
       "isBlacklistedForStage" : false,
+      "peakMemoryMetrics" : {
+        "JVMHeapMemory" : 0,
+        "JVMOffHeapMemory" : 0,
+        "OnHeapExecutionMemory" : 0,
+        "OffHeapExecutionMemory" : 0,
+        "OnHeapStorageMemory" : 0,
+        "OffHeapStorageMemory" : 0,
+        "OnHeapUnifiedMemory" : 0,
+        "OffHeapUnifiedMemory" : 0,
+        "DirectPoolMemory" : 0,
+        "MappedPoolMemory" : 0,
+        "ProcessTreeJVMVMemory" : 0,
+        "ProcessTreeJVMRSSMemory" : 0,
+        "ProcessTreePythonVMemory" : 0,
+        "ProcessTreePythonRSSMemory" : 0,
+        "ProcessTreeOtherVMemory" : 0,
+        "ProcessTreeOtherRSSMemory" : 0,
+        "MinorGCCount" : 0,
+        "MinorGCTime" : 0,
+        "MajorGCCount" : 0,
+        "MajorGCTime" : 0
+      },
       "isExcludedForStage" : false
     },
     "2" : {
@@ -860,6 +926,28 @@
       "memoryBytesSpilled" : 0,
       "diskBytesSpilled" : 0,
       "isBlacklistedForStage" : false,
+      "peakMemoryMetrics" : {
+        "JVMHeapMemory" : 0,
+        "JVMOffHeapMemory" : 0,
+        "OnHeapExecutionMemory" : 0,
+        "OffHeapExecutionMemory" : 0,
+        "OnHeapStorageMemory" : 0,
+        "OffHeapStorageMemory" : 0,
+        "OnHeapUnifiedMemory" : 0,
+        "OffHeapUnifiedMemory" : 0,
+        "DirectPoolMemory" : 0,
+        "MappedPoolMemory" : 0,
+        "ProcessTreeJVMVMemory" : 0,
+        "ProcessTreeJVMRSSMemory" : 0,
+        "ProcessTreePythonVMemory" : 0,
+        "ProcessTreePythonRSSMemory" : 0,
+        "ProcessTreeOtherVMemory" : 0,
+        "ProcessTreeOtherRSSMemory" : 0,
+        "MinorGCCount" : 0,
+        "MinorGCTime" : 0,
+        "MajorGCCount" : 0,
+        "MajorGCTime" : 0
+      },
       "isExcludedForStage" : false
     },
     "3" : {
@@ -878,6 +966,28 @@
       "memoryBytesSpilled" : 0,
       "diskBytesSpilled" : 0,
       "isBlacklistedForStage" : true,
+      "peakMemoryMetrics" : {
+        "JVMHeapMemory" : 0,
+        "JVMOffHeapMemory" : 0,
+        "OnHeapExecutionMemory" : 0,
+        "OffHeapExecutionMemory" : 0,
+        "OnHeapStorageMemory" : 0,
+        "OffHeapStorageMemory" : 0,
+        "OnHeapUnifiedMemory" : 0,
+        "OffHeapUnifiedMemory" : 0,
+        "DirectPoolMemory" : 0,
+        "MappedPoolMemory" : 0,
+        "ProcessTreeJVMVMemory" : 0,
+        "ProcessTreeJVMRSSMemory" : 0,
+        "ProcessTreePythonVMemory" : 0,
+        "ProcessTreePythonRSSMemory" : 0,
+        "ProcessTreeOtherVMemory" : 0,
+        "ProcessTreeOtherRSSMemory" : 0,
+        "MinorGCCount" : 0,
+        "MinorGCTime" : 0,
+        "MajorGCCount" : 0,
+        "MajorGCTime" : 0
+      },
       "isExcludedForStage" : true
     }
   },
diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json
index 41e54c68858ad..b1eab0d7ac196 100644
--- a/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json
@@ -460,6 +460,28 @@
       "memoryBytesSpilled" : 0,
       "diskBytesSpilled" : 0,
       "isBlacklistedForStage" : false,
+      "peakMemoryMetrics" : {
+        "JVMHeapMemory" : 0,
+        "JVMOffHeapMemory" : 0,
+        "OnHeapExecutionMemory" : 0,
+        "OffHeapExecutionMemory" : 0,
+        "OnHeapStorageMemory" : 0,
+        "OffHeapStorageMemory" : 0,
+        "OnHeapUnifiedMemory" : 0,
+        "OffHeapUnifiedMemory" : 0,
+        "DirectPoolMemory" : 0,
+        "MappedPoolMemory" : 0,
+        "ProcessTreeJVMVMemory" : 0,
+        "ProcessTreeJVMRSSMemory" : 0,
+        "ProcessTreePythonVMemory" : 0,
+        "ProcessTreePythonRSSMemory" : 0,
+        "ProcessTreeOtherVMemory" : 0,
+        "ProcessTreeOtherRSSMemory" : 0,
+        "MinorGCCount" : 0,
+        "MinorGCTime" : 0,
+        "MajorGCCount" : 0,
+        "MajorGCTime" : 0
+      },
       "isExcludedForStage" : false
     }
   },
diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json
index 7a6685a609523..6dfdd27cd7d8f 100644
--- a/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json
@@ -460,6 +460,28 @@
       "memoryBytesSpilled" : 0,
       "diskBytesSpilled" : 0,
       "isBlacklistedForStage" : false,
+      "peakMemoryMetrics" : {
+        "JVMHeapMemory" : 0,
+        "JVMOffHeapMemory" : 0,
+        "OnHeapExecutionMemory" : 0,
+        "OffHeapExecutionMemory" : 0,
+        "OnHeapStorageMemory" : 0,
+        "OffHeapStorageMemory" : 0,
+        "OnHeapUnifiedMemory" : 0,
+        "OffHeapUnifiedMemory" : 0,
+        "DirectPoolMemory" : 0,
+        "MappedPoolMemory" : 0,
+        "ProcessTreeJVMVMemory" : 0,
+        "ProcessTreeJVMRSSMemory" : 0,
+        "ProcessTreePythonVMemory" : 0,
+        "ProcessTreePythonRSSMemory" : 0,
+        "ProcessTreeOtherVMemory" : 0,
+        "ProcessTreeOtherRSSMemory" : 0,
+        "MinorGCCount" : 0,
+        "MinorGCTime" : 0,
+        "MajorGCCount" : 0,
+        "MajorGCTime" : 0
+      },
       "isExcludedForStage" : false
     }
   },
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json
index 066b6a4f884a7..a2cfd9d42cc99 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json
@@ -504,6 +504,28 @@
       "memoryBytesSpilled" : 0,
       "diskBytesSpilled" : 0,
       "isBlacklistedForStage" : false,
+      "peakMemoryMetrics" : {
+        "JVMHeapMemory" : 0,
+        "JVMOffHeapMemory" : 0,
+        "OnHeapExecutionMemory" : 0,
+        "OffHeapExecutionMemory" : 0,
+        "OnHeapStorageMemory" : 0,
+        "OffHeapStorageMemory" : 0,
+        "OnHeapUnifiedMemory" : 0,
+        "OffHeapUnifiedMemory" : 0,
+        "DirectPoolMemory" : 0,
+        "MappedPoolMemory" : 0,
+        "ProcessTreeJVMVMemory" : 0,
+        "ProcessTreeJVMRSSMemory" : 0,
+        "ProcessTreePythonVMemory" : 0,
+        "ProcessTreePythonRSSMemory" : 0,
+        "ProcessTreeOtherVMemory" : 0,
+        "ProcessTreeOtherRSSMemory" : 0,
+        "MinorGCCount" : 0,
+        "MinorGCTime" : 0,
+        "MajorGCCount" : 0,
+        "MajorGCTime" : 0
+      },
       "isExcludedForStage" : false
     }
   },

From b0da2bcd464b24d58e2ce56d4f93f1f9527839ff Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Fri, 18 Dec 2020 15:10:13 +0900
Subject: [PATCH 0807/1009] [MINOR][INFRA] Add -Pspark-ganglia-lgpl to the
 build definition with Scala 2.13 on GitHub Actions

### What changes were proposed in this pull request?

This PR adds `-Pspark-ganglia-lgpl` to the build definition with Scala 2.13 on GitHub Actions.

### Why are the changes needed?

Keep the code build-able with Scala 2.13.
With this change, all the sub-modules seems to be built-able with Scala 2.13.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

I confirmed Scala 2.13 build pass with the following command.
```
$ ./dev/change-scala-version.sh 2.13
$ build/sbt -Pspark-ganglia-lgpl -Pscala-2.13 compile test:compile
```

Closes #30834 from sarutak/ganglia-scala-2.13.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .github/workflows/build_and_test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index f133a4132b2a5..0048bc7ffba0d 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -414,7 +414,7 @@ jobs:
     - name: Build with SBT
       run: |
         ./dev/change-scala-version.sh 2.13
-        ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pkubernetes-integration-tests -Pscala-2.13 compile test:compile
+        ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pkubernetes-integration-tests -Pspark-ganglia-lgpl -Pscala-2.13 compile test:compile
 
   hadoop-2:
     name: Hadoop 2 build with SBT

From 0603913c666bae1a9640f2f1469fe50bc59e461d Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Fri, 18 Dec 2020 00:01:13 -0800
Subject: [PATCH 0808/1009] [SPARK-33593][SQL] Vector reader got incorrect data
 with binary partition value

### What changes were proposed in this pull request?

Currently when enable parquet vectorized reader, use binary type as partition col will return incorrect value as below UT
```scala
test("Parquet vector reader incorrect with binary partition value") {
  Seq(false, true).foreach(tag => {
    withSQLConf("spark.sql.parquet.enableVectorizedReader" -> tag.toString) {
      withTable("t1") {
        sql(
          """CREATE TABLE t1(name STRING, id BINARY, part BINARY)
            | USING PARQUET PARTITIONED BY (part)""".stripMargin)
        sql(s"INSERT INTO t1 PARTITION(part = 'Spark SQL') VALUES('a', X'537061726B2053514C')")
        if (tag) {
          checkAnswer(sql("SELECT name, cast(id as string), cast(part as string) FROM t1"),
            Row("a", "Spark SQL", ""))
        } else {
          checkAnswer(sql("SELECT name, cast(id as string), cast(part as string) FROM t1"),
            Row("a", "Spark SQL", "Spark SQL"))
        }
      }
    }
  })
}
```

### Why are the changes needed?
Fix data incorrect issue

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Added UT

Closes #30824 from AngersZhuuuu/SPARK-33593.

Authored-by: angerszhu <angers.zhu@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../vectorized/ColumnVectorUtils.java         |  5 ++
 .../org/apache/spark/sql/SQLQuerySuite.scala  | 26 +++++++
 .../orc/OrcColumnarBatchReaderSuite.scala     | 77 ++++++++++++++++++-
 .../datasources/parquet/ParquetIOSuite.scala  |  9 ++-
 4 files changed, 114 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java
index bce6aa28c42a1..25aabcd086289 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java
@@ -54,6 +54,8 @@ public static void populate(WritableColumnVector col, InternalRow row, int field
     } else {
       if (t == DataTypes.BooleanType) {
         col.putBooleans(0, capacity, row.getBoolean(fieldIdx));
+      } else if (t == DataTypes.BinaryType) {
+        col.putByteArray(0, row.getBinary(fieldIdx));
       } else if (t == DataTypes.ByteType) {
         col.putBytes(0, capacity, row.getByte(fieldIdx));
       } else if (t == DataTypes.ShortType) {
@@ -94,6 +96,9 @@ public static void populate(WritableColumnVector col, InternalRow row, int field
         col.putInts(0, capacity, row.getInt(fieldIdx));
       } else if (t instanceof TimestampType) {
         col.putLongs(0, capacity, row.getLong(fieldIdx));
+      } else {
+        throw new RuntimeException(String.format("DataType %s is not supported" +
+            " in column vectorized reader.", t.sql()));
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 112b1a7210cb4..b7cec55245564 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -3745,6 +3745,32 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       }
     }
   }
+
+  test("SPARK-33593: Vector reader got incorrect data with binary partition value") {
+    Seq("false", "true").foreach(value => {
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> value) {
+        withTable("t1") {
+          sql(
+            """CREATE TABLE t1(name STRING, id BINARY, part BINARY)
+              |USING PARQUET PARTITIONED BY (part)""".stripMargin)
+          sql("INSERT INTO t1 PARTITION(part = 'Spark SQL') VALUES('a', X'537061726B2053514C')")
+          checkAnswer(sql("SELECT name, cast(id as string), cast(part as string) FROM t1"),
+            Row("a", "Spark SQL", "Spark SQL"))
+        }
+      }
+
+      withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> value) {
+        withTable("t2") {
+          sql(
+            """CREATE TABLE t2(name STRING, id BINARY, part BINARY)
+              |USING ORC PARTITIONED BY (part)""".stripMargin)
+          sql("INSERT INTO t2 PARTITION(part = 'Spark SQL') VALUES('a', X'537061726B2053514C')")
+          checkAnswer(sql("SELECT name, cast(id as string), cast(part as string) FROM t2"),
+            Row("a", "Spark SQL", "Spark SQL"))
+        }
+      }
+    })
+  }
 }
 
 case class Foo(bar: Option[String])
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala
index 719bf91e1786b..bfcef46339908 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala
@@ -17,16 +17,29 @@
 
 package org.apache.spark.sql.execution.datasources.orc
 
+import java.io.File
+
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapreduce.{JobID, TaskAttemptID, TaskID, TaskType}
+import org.apache.hadoop.mapreduce.lib.input.FileSplit
+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
 import org.apache.orc.TypeDescription
 
 import org.apache.spark.sql.QueryTest
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase
 import org.apache.spark.sql.execution.vectorized.{OnHeapColumnVector, WritableColumnVector}
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.{StructField, StructType}
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.unsafe.types.UTF8String.fromString
 
 class OrcColumnarBatchReaderSuite extends QueryTest with SharedSparkSession {
+
+  import testImplicits._
+
   private val dataSchema = StructType.fromDDL("col1 int, col2 int")
   private val partitionSchema = StructType.fromDDL("p1 string, p2 string")
   private val partitionValues = InternalRow(fromString("partValue1"), fromString("partValue2"))
@@ -77,4 +90,66 @@ class OrcColumnarBatchReaderSuite extends QueryTest with SharedSparkSession {
       assert(p1.getUTF8String(0) === partitionValues.getUTF8String(0))
     }
   }
+
+  test("SPARK-33593: partition column types") {
+    withTempPath { dir =>
+      Seq(1).toDF().repartition(1).write.orc(dir.getCanonicalPath)
+
+      val dataTypes =
+        Seq(StringType, BooleanType, ByteType, BinaryType, ShortType, IntegerType, LongType,
+          FloatType, DoubleType, DecimalType(25, 5), DateType, TimestampType)
+
+      val constantValues =
+        Seq(
+          UTF8String.fromString("a string"),
+          true,
+          1.toByte,
+          "Spark SQL".getBytes,
+          2.toShort,
+          3,
+          Long.MaxValue,
+          0.25.toFloat,
+          0.75D,
+          Decimal("1234.23456"),
+          DateTimeUtils.fromJavaDate(java.sql.Date.valueOf("2015-01-01")),
+          DateTimeUtils.fromJavaTimestamp(java.sql.Timestamp.valueOf("2015-01-01 23:50:59.123")))
+
+      dataTypes.zip(constantValues).foreach { case (dt, v) =>
+        val schema = StructType(StructField("col1", IntegerType) :: StructField("pcol", dt) :: Nil)
+        val partitionValues = new GenericInternalRow(Array(v))
+        val file = new File(SpecificParquetRecordReaderBase.listDirectory(dir).get(0))
+        val fileSplit = new FileSplit(new Path(file.getCanonicalPath), 0L, file.length, Array.empty)
+        val taskConf = sqlContext.sessionState.newHadoopConf()
+        val orcFileSchema = TypeDescription.fromString(schema.simpleString)
+        val vectorizedReader = new OrcColumnarBatchReader(4096)
+        val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
+        val taskAttemptContext = new TaskAttemptContextImpl(taskConf, attemptId)
+
+        try {
+          vectorizedReader.initialize(fileSplit, taskAttemptContext)
+          vectorizedReader.initBatch(
+            orcFileSchema,
+            schema.toArray,
+            Array(0, -1),
+            Array(-1, 0),
+            partitionValues)
+          vectorizedReader.nextKeyValue()
+          val row = vectorizedReader.getCurrentValue.getRow(0)
+
+          // Use `GenericMutableRow` by explicitly copying rather than `ColumnarBatch`
+          // in order to use get(...) method which is not implemented in `ColumnarBatch`.
+          val actual = row.copy().get(1, dt)
+          val expected = v
+          if (dt.isInstanceOf[BinaryType]) {
+            assert(actual.asInstanceOf[Array[Byte]]
+              sameElements expected.asInstanceOf[Array[Byte]])
+          } else {
+            assert(actual == expected)
+          }
+        } finally {
+          vectorizedReader.close()
+        }
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index d13b3e58a30ff..c69f2e6911ba3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -790,7 +790,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
       Seq(1).toDF().repartition(1).write.parquet(dir.getCanonicalPath)
 
       val dataTypes =
-        Seq(StringType, BooleanType, ByteType, ShortType, IntegerType, LongType,
+        Seq(StringType, BooleanType, ByteType, BinaryType, ShortType, IntegerType, LongType,
           FloatType, DoubleType, DecimalType(25, 5), DateType, TimestampType)
 
       val constantValues =
@@ -798,6 +798,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
           UTF8String.fromString("a string"),
           true,
           1.toByte,
+          "Spark SQL".getBytes,
           2.toShort,
           3,
           Long.MaxValue,
@@ -825,7 +826,11 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
           // in order to use get(...) method which is not implemented in `ColumnarBatch`.
           val actual = row.copy().get(1, dt)
           val expected = v
-          assert(actual == expected)
+          if (dt.isInstanceOf[BinaryType]) {
+            assert(actual.asInstanceOf[Array[Byte]] sameElements expected.asInstanceOf[Array[Byte]])
+          } else {
+            assert(actual == expected)
+          }
         } finally {
           vectorizedReader.close()
         }

From bc46d273e0ae0d13d0e31e30e39198ac19dcd27b Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Fri, 18 Dec 2020 20:27:16 +0900
Subject: [PATCH 0809/1009] [SPARK-33840][DOCS] Add
 spark.sql.files.minPartitionNum to performence tuning doc

### What changes were proposed in this pull request?

Add `spark.sql.files.minPartitionNum` and it's description to sql-performence-tuning.md.

### Why are the changes needed?

Help user to find it.

### Does this PR introduce _any_ user-facing change?

Yes, it's the doc.

### How was this patch tested?

Pass CI.

Closes #30838 from ulysses-you/SPARK-33840.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/sql-performance-tuning.md | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/docs/sql-performance-tuning.md b/docs/sql-performance-tuning.md
index 49b32e7562e0b..e99af41635c9d 100644
--- a/docs/sql-performance-tuning.md
+++ b/docs/sql-performance-tuning.md
@@ -85,6 +85,16 @@ that these options will be deprecated in future release as more optimizations ar
     </td>
     <td>2.0.0</td>
   </tr>
+  <tr>
+    <td><code>spark.sql.files.minPartitionNum</code></td>
+    <td>Default Parallelism</td>
+    <td>
+      The suggested (not guaranteed) minimum number of split file partitions. If not set, the default
+      value is `spark.default.parallelism`. This configuration is effective only when using file-based
+      sources such as Parquet, JSON and ORC.
+    </td>
+    <td>3.1.0</td>
+  </tr>
   <tr>
     <td><code>spark.sql.broadcastTimeout</code></td>
     <td>300</td>

From 06b1bbbbab8cab2ce77d255a3287a2aacdb2df78 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Fri, 18 Dec 2020 13:20:58 +0000
Subject: [PATCH 0810/1009] [SPARK-33798][SQL] Add new rule to push down the
 foldable expressions through CaseWhen/If

### What changes were proposed in this pull request?

This pr add a new rule(`PushFoldableIntoBranches`) to push down the foldable expressions through `CaseWhen/If`. This is a real case from production:
```sql
create table t1 using parquet as select * from range(100);
create table t2 using parquet as select * from range(200);

create temp view v1 as
select 'a' as event_type, * from t1
union all
select CASE WHEN id = 1 THEN 'b' WHEN id = 3 THEN 'c' end as event_type, * from t2

explain select * from v1 where event_type = 'a';
```

Before this PR:
```
== Physical Plan ==
Union
:- *(1) Project [a AS event_type#30533, id#30535L]
:  +- *(1) ColumnarToRow
:     +- FileScan parquet default.t1[id#30535L] Batched: true, DataFilters: [], Format: Parquet
+- *(2) Project [CASE WHEN (id#30536L = 1) THEN b WHEN (id#30536L = 3) THEN c END AS event_type#30534, id#30536L]
   +- *(2) Filter (CASE WHEN (id#30536L = 1) THEN b WHEN (id#30536L = 3) THEN c END = a)
      +- *(2) ColumnarToRow
         +- FileScan parquet default.t2[id#30536L] Batched: true, DataFilters: [(CASE WHEN (id#30536L = 1) THEN b WHEN (id#30536L = 3) THEN c END = a)], Format: Parquet
```

After this PR:
```
== Physical Plan ==
*(1) Project [a AS event_type#8, id#4L]
+- *(1) ColumnarToRow
   +- FileScan parquet default.t1[id#4L] Batched: true, DataFilters: [], Format: Parquet
```

### Why are the changes needed?

Improve query performance.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #30790 from wangyum/SPARK-33798.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/expressions/Expression.scala |   5 +
 .../sql/catalyst/optimizer/Optimizer.scala    |   1 +
 .../sql/catalyst/optimizer/expressions.scala  |  44 +++-
 .../PushFoldableIntoBranchesSuite.scala       | 225 ++++++++++++++++++
 4 files changed, 274 insertions(+), 1 deletion(-)
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 1d23953484046..65f89bbdd0599 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -636,6 +636,11 @@ abstract class BinaryExpression extends Expression {
 }
 
 
+object BinaryExpression {
+  def unapply(e: BinaryExpression): Option[(Expression, Expression)] = Some((e.left, e.right))
+}
+
+
 /**
  * A [[BinaryExpression]] that is an operator, with two properties:
  *
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index aa8540fb44556..fdb9c5b4821dd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -99,6 +99,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
         LikeSimplification,
         BooleanSimplification,
         SimplifyConditionals,
+        PushFoldableIntoBranches,
         RemoveDispensableExpressions,
         SimplifyBinaryComparison,
         ReplaceNullWithFalseInPredicate,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 7666c4a53e5dd..e6730c9275a1e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -21,7 +21,7 @@ import scala.collection.immutable.HashSet
 import scala.collection.mutable.{ArrayBuffer, Stack}
 
 import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, _}
 import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull
@@ -528,6 +528,48 @@ object SimplifyConditionals extends Rule[LogicalPlan] with PredicateHelper {
 }
 
 
+/**
+ * Push the foldable expression into (if / case) branches.
+ */
+object PushFoldableIntoBranches extends Rule[LogicalPlan] with PredicateHelper {
+
+  // To be conservative here: it's only a guaranteed win if all but at most only one branch
+  // end up being not foldable.
+  private def atMostOneUnfoldable(exprs: Seq[Expression]): Boolean = {
+    val (foldables, others) = exprs.partition(_.foldable)
+    foldables.nonEmpty && others.length < 2
+  }
+
+  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+    case q: LogicalPlan => q transformExpressionsUp {
+      case b @ BinaryExpression(i @ If(_, trueValue, falseValue), right)
+          if right.foldable && atMostOneUnfoldable(Seq(trueValue, falseValue)) =>
+        i.copy(
+          trueValue = b.makeCopy(Array(trueValue, right)),
+          falseValue = b.makeCopy(Array(falseValue, right)))
+
+      case b @ BinaryExpression(left, i @ If(_, trueValue, falseValue))
+          if left.foldable && atMostOneUnfoldable(Seq(trueValue, falseValue)) =>
+        i.copy(
+          trueValue = b.makeCopy(Array(left, trueValue)),
+          falseValue = b.makeCopy(Array(left, falseValue)))
+
+      case b @ BinaryExpression(c @ CaseWhen(branches, elseValue), right)
+          if right.foldable && atMostOneUnfoldable(branches.map(_._2) ++ elseValue) =>
+        c.copy(
+          branches.map(e => e.copy(_2 = b.makeCopy(Array(e._2, right)))),
+          elseValue.map(e => b.makeCopy(Array(e, right))))
+
+      case b @ BinaryExpression(left, c @ CaseWhen(branches, elseValue))
+          if left.foldable && atMostOneUnfoldable(branches.map(_._2) ++ elseValue) =>
+        c.copy(
+          branches.map(e => e.copy(_2 = b.makeCopy(Array(left, e._2)))),
+          elseValue.map(e => b.makeCopy(Array(left, e))))
+    }
+  }
+}
+
+
 /**
  * Simplifies LIKE expressions that do not need full regular expressions to evaluate the condition.
  * For example, when the expression is just checking to see if a string starts with a given
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala
new file mode 100644
index 0000000000000..43360af46ffb3
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala
@@ -0,0 +1,225 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import java.sql.Date
+
+import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
+import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules._
+import org.apache.spark.sql.types.{BooleanType, IntegerType}
+
+
+class PushFoldableIntoBranchesSuite
+  extends PlanTest with ExpressionEvalHelper with PredicateHelper {
+
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches = Batch("PushFoldableIntoBranches", FixedPoint(50),
+      BooleanSimplification, ConstantFolding, SimplifyConditionals, PushFoldableIntoBranches) :: Nil
+  }
+
+  private val relation = LocalRelation('a.int, 'b.int, 'c.boolean)
+  private val a = EqualTo(UnresolvedAttribute("a"), Literal(100))
+  private val b = UnresolvedAttribute("b")
+  private val c = EqualTo(UnresolvedAttribute("c"), Literal(true))
+  private val ifExp = If(a, Literal(2), Literal(3))
+  private val caseWhen = CaseWhen(Seq((a, Literal(1)), (c, Literal(2))), Some(Literal(3)))
+
+  protected def assertEquivalent(e1: Expression, e2: Expression): Unit = {
+    val correctAnswer = Project(Alias(e2, "out")() :: Nil, relation).analyze
+    val actual = Optimize.execute(Project(Alias(e1, "out")() :: Nil, relation).analyze)
+    comparePlans(actual, correctAnswer)
+  }
+
+  test("Push down EqualTo through If") {
+    assertEquivalent(EqualTo(ifExp, Literal(4)), FalseLiteral)
+    assertEquivalent(EqualTo(ifExp, Literal(3)), If(a, FalseLiteral, TrueLiteral))
+
+    // Push down at most one not foldable expressions.
+    assertEquivalent(
+      EqualTo(If(a, b, Literal(2)), Literal(2)),
+      If(a, EqualTo(b, Literal(2)), TrueLiteral))
+    assertEquivalent(
+      EqualTo(If(a, b, b + 1), Literal(2)),
+      EqualTo(If(a, b, b + 1), Literal(2)))
+
+    // Push down non-deterministic expressions.
+    val nonDeterministic = If(LessThan(Rand(1), Literal(0.5)), Literal(1), Literal(2))
+    assert(!nonDeterministic.deterministic)
+    assertEquivalent(EqualTo(nonDeterministic, Literal(2)),
+      If(LessThan(Rand(1), Literal(0.5)), FalseLiteral, TrueLiteral))
+    assertEquivalent(EqualTo(nonDeterministic, Literal(3)),
+      If(LessThan(Rand(1), Literal(0.5)), FalseLiteral, FalseLiteral))
+
+    // Handle Null values.
+    assertEquivalent(
+      EqualTo(If(a, Literal(null, IntegerType), Literal(1)), Literal(1)),
+      If(a, Literal(null, BooleanType), TrueLiteral))
+    assertEquivalent(
+      EqualTo(If(a, Literal(null, IntegerType), Literal(1)), Literal(2)),
+      If(a, Literal(null, BooleanType), FalseLiteral))
+    assertEquivalent(
+      EqualTo(If(a, Literal(1), Literal(2)), Literal(null, IntegerType)),
+      Literal(null, BooleanType))
+    assertEquivalent(
+      EqualTo(If(a, Literal(null, IntegerType), Literal(null, IntegerType)), Literal(1)),
+      Literal(null, BooleanType))
+  }
+
+  test("Push down other BinaryComparison through If") {
+    assertEquivalent(EqualNullSafe(ifExp, Literal(4)), FalseLiteral)
+    assertEquivalent(GreaterThan(ifExp, Literal(4)), FalseLiteral)
+    assertEquivalent(GreaterThanOrEqual(ifExp, Literal(4)), FalseLiteral)
+    assertEquivalent(LessThan(ifExp, Literal(4)), TrueLiteral)
+    assertEquivalent(LessThanOrEqual(ifExp, Literal(4)), TrueLiteral)
+  }
+
+  test("Push down other BinaryOperator through If") {
+    assertEquivalent(Add(ifExp, Literal(4)), If(a, Literal(6), Literal(7)))
+    assertEquivalent(Subtract(ifExp, Literal(4)), If(a, Literal(-2), Literal(-1)))
+    assertEquivalent(Multiply(ifExp, Literal(4)), If(a, Literal(8), Literal(12)))
+    assertEquivalent(Pmod(ifExp, Literal(4)), If(a, Literal(2), Literal(3)))
+    assertEquivalent(Remainder(ifExp, Literal(4)), If(a, Literal(2), Literal(3)))
+    assertEquivalent(Divide(If(a, Literal(2.0), Literal(3.0)), Literal(1.0)),
+      If(a, Literal(2.0), Literal(3.0)))
+    assertEquivalent(And(If(a, FalseLiteral, TrueLiteral), TrueLiteral),
+      If(a, FalseLiteral, TrueLiteral))
+    assertEquivalent(Or(If(a, FalseLiteral, TrueLiteral), TrueLiteral), TrueLiteral)
+  }
+
+  test("Push down other BinaryExpression through If") {
+    assertEquivalent(BRound(If(a, Literal(1.23), Literal(1.24)), Literal(1)), Literal(1.2))
+    assertEquivalent(StartsWith(If(a, Literal("ab"), Literal("ac")), Literal("a")), TrueLiteral)
+    assertEquivalent(FindInSet(If(a, Literal("ab"), Literal("ac")), Literal("a")), Literal(0))
+    assertEquivalent(
+      AddMonths(If(a, Literal(Date.valueOf("2020-01-01")), Literal(Date.valueOf("2021-01-01"))),
+        Literal(1)),
+      If(a, Literal(Date.valueOf("2020-02-01")), Literal(Date.valueOf("2021-02-01"))))
+  }
+
+  test("Push down EqualTo through CaseWhen") {
+    assertEquivalent(EqualTo(caseWhen, Literal(4)), FalseLiteral)
+    assertEquivalent(EqualTo(caseWhen, Literal(3)),
+      CaseWhen(Seq((a, FalseLiteral), (c, FalseLiteral)), Some(TrueLiteral)))
+    assertEquivalent(
+      EqualTo(CaseWhen(Seq((a, Literal(1)), (c, Literal(2))), None), Literal(4)),
+      CaseWhen(Seq((a, FalseLiteral), (c, FalseLiteral)), None))
+
+    assertEquivalent(
+      And(EqualTo(caseWhen, Literal(5)), EqualTo(caseWhen, Literal(6))),
+      FalseLiteral)
+
+    // Push down at most one branch is not foldable expressions.
+    assertEquivalent(EqualTo(CaseWhen(Seq((a, b), (c, Literal(1))), None), Literal(1)),
+      CaseWhen(Seq((a, EqualTo(b, Literal(1))), (c, TrueLiteral)), None))
+    assertEquivalent(EqualTo(CaseWhen(Seq((a, b), (c, b + 1)), None), Literal(1)),
+      EqualTo(CaseWhen(Seq((a, b), (c, b + 1)), None), Literal(1)))
+    assertEquivalent(EqualTo(CaseWhen(Seq((a, b)), None), Literal(1)),
+      EqualTo(CaseWhen(Seq((a, b)), None), Literal(1)))
+
+    // Push down non-deterministic expressions.
+    val nonDeterministic =
+      CaseWhen(Seq((LessThan(Rand(1), Literal(0.5)), Literal(1))), Some(Literal(2)))
+    assert(!nonDeterministic.deterministic)
+    assertEquivalent(EqualTo(nonDeterministic, Literal(2)),
+      CaseWhen(Seq((LessThan(Rand(1), Literal(0.5)), FalseLiteral)), Some(TrueLiteral)))
+    assertEquivalent(EqualTo(nonDeterministic, Literal(3)),
+      CaseWhen(Seq((LessThan(Rand(1), Literal(0.5)), FalseLiteral)), Some(FalseLiteral)))
+
+    // Handle Null values.
+    assertEquivalent(
+      EqualTo(CaseWhen(Seq((a, Literal(null, IntegerType))), Some(Literal(1))), Literal(2)),
+      CaseWhen(Seq((a, Literal(null, BooleanType))), Some(FalseLiteral)))
+    assertEquivalent(
+      EqualTo(CaseWhen(Seq((a, Literal(1))), Some(Literal(2))), Literal(null, IntegerType)),
+      Literal(null, BooleanType))
+    assertEquivalent(
+      EqualTo(CaseWhen(Seq((a, Literal(null, IntegerType))), Some(Literal(1))), Literal(1)),
+      CaseWhen(Seq((a, Literal(null, BooleanType))), Some(TrueLiteral)))
+    assertEquivalent(
+      EqualTo(CaseWhen(Seq((a, Literal(null, IntegerType))), Some(Literal(null, IntegerType))),
+        Literal(1)),
+      Literal(null, BooleanType))
+    assertEquivalent(
+      EqualTo(CaseWhen(Seq((a, Literal(null, IntegerType))), Some(Literal(null, IntegerType))),
+        Literal(null, IntegerType)),
+      Literal(null, BooleanType))
+  }
+
+  test("Push down other BinaryComparison through CaseWhen") {
+    assertEquivalent(EqualNullSafe(caseWhen, Literal(4)), FalseLiteral)
+    assertEquivalent(GreaterThan(caseWhen, Literal(4)), FalseLiteral)
+    assertEquivalent(GreaterThanOrEqual(caseWhen, Literal(4)), FalseLiteral)
+    assertEquivalent(LessThan(caseWhen, Literal(4)), TrueLiteral)
+    assertEquivalent(LessThanOrEqual(caseWhen, Literal(4)), TrueLiteral)
+  }
+
+  test("Push down other BinaryOperator through CaseWhen") {
+    assertEquivalent(Add(caseWhen, Literal(4)),
+      CaseWhen(Seq((a, Literal(5)), (c, Literal(6))), Some(Literal(7))))
+    assertEquivalent(Subtract(caseWhen, Literal(4)),
+      CaseWhen(Seq((a, Literal(-3)), (c, Literal(-2))), Some(Literal(-1))))
+    assertEquivalent(Multiply(caseWhen, Literal(4)),
+      CaseWhen(Seq((a, Literal(4)), (c, Literal(8))), Some(Literal(12))))
+    assertEquivalent(Pmod(caseWhen, Literal(4)),
+      CaseWhen(Seq((a, Literal(1)), (c, Literal(2))), Some(Literal(3))))
+    assertEquivalent(Remainder(caseWhen, Literal(4)),
+      CaseWhen(Seq((a, Literal(1)), (c, Literal(2))), Some(Literal(3))))
+    assertEquivalent(Divide(CaseWhen(Seq((a, Literal(1.0)), (c, Literal(2.0))), Some(Literal(3.0))),
+      Literal(1.0)),
+      CaseWhen(Seq((a, Literal(1.0)), (c, Literal(2.0))), Some(Literal(3.0))))
+    assertEquivalent(And(CaseWhen(Seq((a, FalseLiteral), (c, TrueLiteral)), Some(TrueLiteral)),
+      TrueLiteral),
+      CaseWhen(Seq((a, FalseLiteral), (c, TrueLiteral)), Some(TrueLiteral)))
+    assertEquivalent(Or(CaseWhen(Seq((a, FalseLiteral), (c, TrueLiteral)), Some(TrueLiteral)),
+      TrueLiteral), TrueLiteral)
+  }
+
+  test("Push down other BinaryExpression through CaseWhen") {
+    assertEquivalent(
+      BRound(CaseWhen(Seq((a, Literal(1.23)), (c, Literal(1.24))), Some(Literal(1.25))),
+        Literal(1)),
+      Literal(1.2))
+    assertEquivalent(
+      StartsWith(CaseWhen(Seq((a, Literal("ab")), (c, Literal("ac"))), Some(Literal("ad"))),
+        Literal("a")),
+      TrueLiteral)
+    assertEquivalent(
+      FindInSet(CaseWhen(Seq((a, Literal("ab")), (c, Literal("ac"))), Some(Literal("ad"))),
+        Literal("a")),
+      Literal(0))
+    assertEquivalent(
+      AddMonths(CaseWhen(Seq((a, Literal(Date.valueOf("2020-01-01"))),
+        (c, Literal(Date.valueOf("2021-01-01")))),
+        Some(Literal(Date.valueOf("2022-01-01")))),
+        Literal(1)),
+      CaseWhen(Seq((a, Literal(Date.valueOf("2020-02-01"))),
+        (c, Literal(Date.valueOf("2021-02-01")))),
+        Some(Literal(Date.valueOf("2022-02-01")))))
+  }
+
+  test("Push down BinaryExpression through If/CaseWhen backwards") {
+    assertEquivalent(EqualTo(Literal(4), ifExp), FalseLiteral)
+    assertEquivalent(EqualTo(Literal(4), caseWhen), FalseLiteral)
+  }
+}

From f23912880269723f02eadc2af4b2816c957c2357 Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Fri, 18 Dec 2020 13:47:31 +0000
Subject: [PATCH 0811/1009] [SPARK-33597][SQL] Support REGEXP_LIKE for
 consistent with mainstream databases

### What changes were proposed in this pull request?
There are a lot of mainstream databases support regex function `REGEXP_LIKE`.
Currently, Spark supports `RLike` and we just need add a new alias `REGEXP_LIKE` for it.
**Oracle**
https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Pattern-matching-Conditions.html#GUID-D2124F3A-C6E4-4CCA-A40E-2FFCABFD8E19
**Presto**
https://prestodb.io/docs/current/functions/regexp.html
**Vertica**
https://www.vertica.com/docs/9.2.x/HTML/Content/Authoring/SQLReferenceManual/Functions/RegularExpressions/REGEXP_LIKE.htm?tocpath=SQL%20Reference%20Manual%7CSQL%20Functions%7CRegular%20Expression%20Functions%7C_____5
**Snowflake**
https://docs.snowflake.com/en/sql-reference/functions/regexp_like.html

**Additional modifications**

1. Because test case named `check outputs of expression examples` in ExpressionInfoSuite executes the example SQL of built-in function, so the below SQL be executed:
`SELECT '%SystemDrive%\Users\John' regexp_like '%SystemDrive%\\Users.*'`
But Spark SQL not supports this syntax yet.
2. Another reason: `SELECT '%SystemDrive%\Users\John' _FUNC_ '%SystemDrive%\\Users.*';`  is an SQL syntax, not the usecase for function `RLike`.
As the above reason, this PR changes the example SQL of `RLike`.

### Why are the changes needed?
No

### Does this PR introduce _any_ user-facing change?
Make the behavior of Spark SQL consistent with mainstream databases.

### How was this patch tested?
Jenkins test

Closes #30543 from beliefer/SPARK-33597.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: beliefer <beliefer@163.com>
Co-authored-by: Jiaan Geng <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/analysis/FunctionRegistry.scala   |  1 +
 .../expressions/regexpExpressions.scala        | 13 +++++++------
 .../sql-functions/sql-expression-schema.md     |  5 +++--
 .../sql-tests/inputs/regexp-functions.sql      |  6 +++++-
 .../sql-tests/results/regexp-functions.sql.out | 18 +++++++++++++++++-
 5 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 3b46de539ce3d..4e2f01ac2db93 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -356,6 +356,7 @@ object FunctionRegistry {
     expression[RegExpExtract]("regexp_extract"),
     expression[RegExpExtractAll]("regexp_extract_all"),
     expression[RegExpReplace]("regexp_replace"),
+    expression[RLike]("regexp_like", true),
     expression[StringRepeat]("repeat"),
     expression[StringReplace]("replace"),
     expression[Overlay]("overlay"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index 28c9aefb42837..3a421f5075a6f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -68,8 +68,6 @@ abstract class StringRegexExpression extends BinaryExpression
       matches(regex, input1.asInstanceOf[UTF8String].toString)
     }
   }
-
-  override def sql: String = s"${left.sql} ${prettyName.toUpperCase(Locale.ROOT)} ${right.sql}"
 }
 
 // scalastyle:off line.contains.tab
@@ -134,6 +132,8 @@ case class Like(left: Expression, right: Expression, escapeChar: Char)
     case c => s"$left LIKE $right ESCAPE '$c'"
   }
 
+  override def sql: String = s"${left.sql} ${prettyName.toUpperCase(Locale.ROOT)} ${right.sql}"
+
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val patternClass = classOf[Pattern].getName
     val escapeFunc = StringUtils.getClass.getName.stripSuffix("$") + ".escapeLikeRegex"
@@ -330,7 +330,7 @@ case class NotLikeAny(child: Expression, patterns: Seq[UTF8String]) extends Like
 
 // scalastyle:off line.contains.tab
 @ExpressionDescription(
-  usage = "str _FUNC_ regexp - Returns true if `str` matches `regexp`, or false otherwise.",
+  usage = "_FUNC_(str, regexp) - Returns true if `str` matches `regexp`, or false otherwise.",
   arguments = """
     Arguments:
       * str - a string expression
@@ -348,11 +348,11 @@ case class NotLikeAny(child: Expression, patterns: Seq[UTF8String]) extends Like
     Examples:
       > SET spark.sql.parser.escapedStringLiterals=true;
       spark.sql.parser.escapedStringLiterals	true
-      > SELECT '%SystemDrive%\Users\John' _FUNC_ '%SystemDrive%\\Users.*';
+      > SELECT _FUNC_('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*');
       true
       > SET spark.sql.parser.escapedStringLiterals=false;
       spark.sql.parser.escapedStringLiterals	false
-      > SELECT '%SystemDrive%\\Users\\John' _FUNC_ '%SystemDrive%\\\\Users.*';
+      > SELECT _FUNC_('%SystemDrive%\\Users\\John', '%SystemDrive%\\\\Users.*');
       true
   """,
   note = """
@@ -364,7 +364,8 @@ case class RLike(left: Expression, right: Expression) extends StringRegexExpress
 
   override def escape(v: String): String = v
   override def matches(regex: Pattern, str: String): Boolean = regex.matcher(str).find(0)
-  override def toString: String = s"$left RLIKE $right"
+  override def toString: String = s"RLIKE($left, $right)"
+  override def sql: String = s"${prettyName.toUpperCase(Locale.ROOT)}(${left.sql}, ${right.sql})"
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val patternClass = classOf[Pattern].getName
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index a6d041a588a6d..c681730569978 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -1,6 +1,6 @@
 <!-- Automatically generated by ExpressionsSchemaSuite -->
 ## Summary
-  - Number of queries: 347
+  - Number of queries: 348
   - Number of expressions that missing example: 13
   - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint,window
 ## Schema of Built-in Functions
@@ -211,7 +211,8 @@
 | org.apache.spark.sql.catalyst.expressions.Pow | pow | SELECT pow(2, 3) | struct<pow(CAST(2 AS DOUBLE), CAST(3 AS DOUBLE)):double> |
 | org.apache.spark.sql.catalyst.expressions.Pow | power | SELECT power(2, 3) | struct<POWER(CAST(2 AS DOUBLE), CAST(3 AS DOUBLE)):double> |
 | org.apache.spark.sql.catalyst.expressions.Quarter | quarter | SELECT quarter('2016-08-31') | struct<quarter(CAST(2016-08-31 AS DATE)):int> |
-| org.apache.spark.sql.catalyst.expressions.RLike | rlike | SELECT '%SystemDrive%\Users\John' rlike '%SystemDrive%\\Users.*' | struct<%SystemDrive%UsersJohn RLIKE %SystemDrive%\Users.*:boolean> |
+| org.apache.spark.sql.catalyst.expressions.RLike | regexp_like | SELECT regexp_like('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') | struct<REGEXP_LIKE(%SystemDrive%UsersJohn, %SystemDrive%\Users.*):boolean> |
+| org.apache.spark.sql.catalyst.expressions.RLike | rlike | SELECT rlike('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') | struct<RLIKE(%SystemDrive%UsersJohn, %SystemDrive%\Users.*):boolean> |
 | org.apache.spark.sql.catalyst.expressions.RaiseError | raise_error | SELECT raise_error('custom error message') | struct<raise_error(custom error message):null> |
 | org.apache.spark.sql.catalyst.expressions.Rand | rand | SELECT rand() | struct<rand():double> |
 | org.apache.spark.sql.catalyst.expressions.Rand | random | SELECT random() | struct<rand():double> |
diff --git a/sql/core/src/test/resources/sql-tests/inputs/regexp-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/regexp-functions.sql
index 3f3eaaae9ee4e..12b34ff7d54b1 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/regexp-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/regexp-functions.sql
@@ -42,4 +42,8 @@ SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', 8);
 SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', 26);
 SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', 27);
 SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', 30);
-SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', null);
\ No newline at end of file
+SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', null);
+
+-- regexp_like
+SELECT regexp_like('1a 2b 14m', '\\d+b');
+SELECT regexp_like('1a 2b 14m', '[a-z]+b');
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out
index f2a4131818bfb..60b3e7dbb74f1 100644
--- a/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 40
+-- Number of queries: 42
 
 
 -- !query
@@ -334,3 +334,19 @@ SELECT regexp_replace('healthy, wealthy, and wise', '\\w', 'something', null)
 struct<regexp_replace(healthy, wealthy, and wise, \w, something, NULL):string>
 -- !query output
 NULL
+
+
+-- !query
+SELECT regexp_like('1a 2b 14m', '\\d+b')
+-- !query schema
+struct<REGEXP_LIKE(1a 2b 14m, \d+b):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT regexp_like('1a 2b 14m', '[a-z]+b')
+-- !query schema
+struct<REGEXP_LIKE(1a 2b 14m, [a-z]+b):boolean>
+-- !query output
+false
\ No newline at end of file

From 6dca2e5d35c0b1604d0264250872b87bd0b832f6 Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Fri, 18 Dec 2020 14:12:35 +0000
Subject: [PATCH 0812/1009] [SPARK-33599][SQL] Group exception messages in
 catalyst/analysis

### What changes were proposed in this pull request?
This PR group exception messages in `/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis`.

### Why are the changes needed?
It will largely help with standardization of error messages and its maintenance.

### Does this PR introduce _any_ user-facing change?
No. Error messages remain unchanged.

### How was this patch tested?
No new tests - pass all original tests to make sure it doesn't break any existing behavior.

Closes #30717 from beliefer/SPARK-33599.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: beliefer <beliefer@163.com>
Co-authored-by: Jiaan Geng <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/QueryCompilationErrors.scala    | 187 +++++++++++++++++-
 .../spark/sql/QueryExecutionErrors.scala      |  59 ++++++
 .../sql/catalyst/analysis/Analyzer.scala      |  24 +--
 .../sql/catalyst/analysis/CheckAnalysis.scala |  22 +--
 .../catalyst/analysis/FunctionRegistry.scala  |  20 +-
 .../catalyst/analysis/ResolveCatalogs.scala   |   5 +-
 .../sql/catalyst/analysis/ResolveHints.scala  |  14 +-
 .../sql/catalyst/analysis/unresolved.scala    |  16 +-
 .../analysis/ResolveSessionCatalog.scala      |  79 +++-----
 9 files changed, 310 insertions(+), 116 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/QueryExecutionErrors.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala
index 87387b18dbab4..3ef17ab7aed0a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala
@@ -19,20 +19,19 @@ package org.apache.spark.sql.errors
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.ResolvedView
-import org.apache.spark.sql.catalyst.expressions.{Alias, Expression, GroupingID, NamedExpression, SpecifiedWindowFrame, WindowFrame, WindowFunction, WindowSpecDefinition}
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.analysis.{ResolvedNamespace, ResolvedView}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, Expression, GroupingID, NamedExpression, SpecifiedWindowFrame, WindowFrame, WindowFunction, WindowSpecDefinition}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SerdeInfo}
 import org.apache.spark.sql.catalyst.trees.TreeNode
 import org.apache.spark.sql.catalyst.util.toPrettySQL
+import org.apache.spark.sql.connector.catalog.{TableChange, V1Table}
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-import org.apache.spark.sql.connector.catalog.TableChange
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{AbstractDataType, DataType, StructType}
 
 /**
  * Object for grouping all error messages of the query compilation.
- * Currently it includes all AnalysisExcpetions created and thrown directly in
- * org.apache.spark.sql.catalyst.analysis.Analyzer.
+ * Currently it includes all [[AnalysisException]]s.
  */
 object QueryCompilationErrors {
 
@@ -185,6 +184,11 @@ object QueryCompilationErrors {
       "did not appear in any aggregate function.")
   }
 
+  def writeIntoTempViewNotAllowedError(quoted: String): Throwable = {
+    new AnalysisException("Cannot write into temp view " +
+      s"$quoted as it's not a data source v2 relation.")
+  }
+
   def expectTableNotTempViewError(quoted: String, cmd: String, t: TreeNode[_]): Throwable = {
     new AnalysisException(s"$quoted is a temp view. '$cmd' expects a table",
       t.origin.line, t.origin.startPosition)
@@ -196,6 +200,11 @@ object QueryCompilationErrors {
       t.origin.line, t.origin.startPosition)
   }
 
+  def readNonStreamingTempViewError(quoted: String): Throwable = {
+    new AnalysisException(s"$quoted is not a temp view of streaming " +
+      "logical plan, please use batch API such as `DataFrameReader.table` to read it.")
+  }
+
   def viewDepthExceedsMaxResolutionDepthError(
       identifier: TableIdentifier, maxNestedViewDepth: Int, t: TreeNode[_]): Throwable = {
     new AnalysisException(s"The depth of view $identifier exceeds the maximum " +
@@ -225,6 +234,11 @@ object QueryCompilationErrors {
       t.origin.line, t.origin.startPosition)
   }
 
+  def permanentViewNotSupportedByStreamingReadingAPIError(quoted: String): Throwable = {
+    new AnalysisException(s"$quoted is a permanent view, which is not supported by " +
+      "streaming reading API such as `DataStreamReader.table` yet.")
+  }
+
   def starNotAllowedWhenGroupByOrdinalPositionUsedError(): Throwable = {
     new AnalysisException(
       "Star (*) is not allowed in select list when GROUP BY ordinal position is used")
@@ -326,4 +340,165 @@ object QueryCompilationErrors {
       "of rows, therefore they are currently not supported.", t.origin.line, t.origin.startPosition)
   }
 
+  def viewOutputNumberMismatchQueryColumnNamesError(
+      output: Seq[Attribute], queryColumnNames: Seq[String]): Throwable = {
+    new AnalysisException(
+      s"The view output ${output.mkString("[", ",", "]")} doesn't have the same" +
+        "number of columns with the query column names " +
+        s"${queryColumnNames.mkString("[", ",", "]")}")
+  }
+
+  def attributeNotFoundError(colName: String, child: LogicalPlan): Throwable = {
+    new AnalysisException(
+      s"Attribute with name '$colName' is not found in " +
+        s"'${child.output.map(_.name).mkString("(", ",", ")")}'")
+  }
+
+  def cannotUpCastAsAttributeError(
+      fromAttr: Attribute, toAttr: Attribute): Throwable = {
+    new AnalysisException(s"Cannot up cast ${fromAttr.sql} from " +
+      s"${fromAttr.dataType.catalogString} to ${toAttr.dataType.catalogString} " +
+      "as it may truncate")
+  }
+
+  def functionUndefinedError(name: FunctionIdentifier): Throwable = {
+    new AnalysisException(s"undefined function $name")
+  }
+
+  def invalidFunctionArgumentNumberError(
+      validParametersCount: Seq[Int], name: String, params: Seq[Class[Expression]]): Throwable = {
+    val invalidArgumentsMsg = if (validParametersCount.length == 0) {
+      s"Invalid arguments for function $name"
+    } else {
+      val expectedNumberOfParameters = if (validParametersCount.length == 1) {
+        validParametersCount.head.toString
+      } else {
+        validParametersCount.init.mkString("one of ", ", ", " and ") +
+          validParametersCount.last
+      }
+      s"Invalid number of arguments for function $name. " +
+        s"Expected: $expectedNumberOfParameters; Found: ${params.length}"
+    }
+    new AnalysisException(invalidArgumentsMsg)
+  }
+
+  def functionAcceptsOnlyOneArgumentError(name: String): Throwable = {
+    new AnalysisException(s"Function $name accepts only one argument")
+  }
+
+  def alterV2TableSetLocationWithPartitionNotSupportedError(): Throwable = {
+    new AnalysisException("ALTER TABLE SET LOCATION does not support partition for v2 tables.")
+  }
+
+  def joinStrategyHintParameterNotSupportedError(unsupported: Any): Throwable = {
+    new AnalysisException("Join strategy hint parameter " +
+      s"should be an identifier or string but was $unsupported (${unsupported.getClass}")
+  }
+
+  def invalidHintParameterError(
+      hintName: String, invalidParams: Seq[Any]): Throwable = {
+    new AnalysisException(s"$hintName Hint parameter should include columns, but " +
+      s"${invalidParams.mkString(", ")} found")
+  }
+
+  def invalidCoalesceHintParameterError(hintName: String): Throwable = {
+    new AnalysisException(s"$hintName Hint expects a partition number as a parameter")
+  }
+
+  def attributeNameSyntaxError(name: String): Throwable = {
+    new AnalysisException(s"syntax error in attribute name: $name")
+  }
+
+  def starExpandDataTypeNotSupportedError(attributes: Seq[String]): Throwable = {
+    new AnalysisException(s"Can only star expand struct data types. Attribute: `$attributes`")
+  }
+
+  def cannotResolveStarExpandGivenInputColumnsError(
+      targetString: String, columns: String): Throwable = {
+    new AnalysisException(s"cannot resolve '$targetString.*' given input columns '$columns'")
+  }
+
+  def addColumnWithV1TableCannotSpecifyNotNullError(): Throwable = {
+    new AnalysisException("ADD COLUMN with v1 tables cannot specify NOT NULL.")
+  }
+
+  def replaceColumnsOnlySupportedWithV2TableError(): Throwable = {
+    new AnalysisException("REPLACE COLUMNS is only supported with v2 tables.")
+  }
+
+  def alterQualifiedColumnOnlySupportedWithV2TableError(): Throwable = {
+    new AnalysisException("ALTER COLUMN with qualified column is only supported with v2 tables.")
+  }
+
+  def alterColumnWithV1TableCannotSpecifyNotNullError(): Throwable = {
+    new AnalysisException("ALTER COLUMN with v1 tables cannot specify NOT NULL.")
+  }
+
+  def alterOnlySupportedWithV2TableError(): Throwable = {
+    new AnalysisException("ALTER COLUMN ... FIRST | ALTER is only supported with v2 tables.")
+  }
+
+  def alterColumnCannotFindColumnInV1TableError(colName: String, v1Table: V1Table): Throwable = {
+    new AnalysisException(
+      s"ALTER COLUMN cannot find column $colName in v1 table. " +
+        s"Available: ${v1Table.schema.fieldNames.mkString(", ")}")
+  }
+
+  def renameColumnOnlySupportedWithV2TableError(): Throwable = {
+    new AnalysisException("RENAME COLUMN is only supported with v2 tables.")
+  }
+
+  def dropColumnOnlySupportedWithV2TableError(): Throwable = {
+    new AnalysisException("DROP COLUMN is only supported with v2 tables.")
+  }
+
+  def invalidDatabaseNameError(quoted: String): Throwable = {
+    new AnalysisException(s"The database name is not valid: $quoted")
+  }
+
+  def replaceTableOnlySupportedWithV2TableError(): Throwable = {
+    new AnalysisException("REPLACE TABLE is only supported with v2 tables.")
+  }
+
+  def replaceTableAsSelectOnlySupportedWithV2TableError(): Throwable = {
+    new AnalysisException("REPLACE TABLE AS SELECT is only supported with v2 tables.")
+  }
+
+  def cannotDropViewWithDropTableError(): Throwable = {
+    new AnalysisException("Cannot drop a view with DROP TABLE. Please use DROP VIEW instead")
+  }
+
+  def showColumnsWithConflictDatabasesError(
+      db: Seq[String], v1TableName: TableIdentifier): Throwable = {
+    new AnalysisException("SHOW COLUMNS with conflicting databases: " +
+        s"'${db.head}' != '${v1TableName.database.get}'")
+  }
+
+  def externalCatalogNotSupportShowViewsError(resolved: ResolvedNamespace): Throwable = {
+    new AnalysisException(s"Catalog ${resolved.catalog.name} doesn't support " +
+      "SHOW VIEWS, only SessionCatalog supports this command.")
+  }
+
+  def unsupportedFunctionNameError(quoted: String): Throwable = {
+    new AnalysisException(s"Unsupported function name '$quoted'")
+  }
+
+  def sqlOnlySupportedWithV1TablesError(sql: String): Throwable = {
+    new AnalysisException(s"$sql is only supported with v1 tables.")
+  }
+
+  def cannotCreateTableWithBothProviderAndSerdeError(
+      provider: Option[String], maybeSerdeInfo: Option[SerdeInfo]): Throwable = {
+    new AnalysisException(
+      s"Cannot create table with both USING $provider and ${maybeSerdeInfo.get.describe}")
+  }
+
+  def invalidFileFormatForStoredAsError(serdeInfo: SerdeInfo): Throwable = {
+    new AnalysisException(
+      s"STORED AS with file format '${serdeInfo.storedAs.get}' is invalid.")
+  }
+
+  def commandNotSupportNestedColumnError(command: String, quoted: String): Throwable = {
+    new AnalysisException(s"$command does not support nested column: $quoted")
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryExecutionErrors.scala
new file mode 100644
index 0000000000000..65d280ab10037
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryExecutionErrors.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.errors
+
+import org.apache.spark.sql.catalyst.analysis.UnresolvedGenerator
+import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan}
+
+/**
+ * Object for grouping all error messages of the query runtime.
+ * Currently it includes all [[SparkException]]s and RuntimeExceptions(e.g.
+ * UnsupportedOperationException, IllegalStateException).
+ */
+object QueryExecutionErrors {
+
+  def columnChangeUnsupportedError(): Throwable = {
+    new UnsupportedOperationException("Please add an implementation for a column change here")
+  }
+
+  def unexpectedPlanReturnError(plan: LogicalPlan, methodName: String): Throwable = {
+    new IllegalStateException(s"[BUG] unexpected plan returned by `$methodName`: $plan")
+  }
+
+  def logicalHintOperatorNotRemovedDuringAnalysisError(): Throwable = {
+    new IllegalStateException(
+      "Internal error: logical hint operator should have been removed during analysis")
+  }
+
+  def logicalPlanHaveOutputOfCharOrVarcharError(leaf: LeafNode): Throwable = {
+    new IllegalStateException(
+      s"[BUG] logical plan should not have output of char/varchar type: $leaf")
+  }
+
+  def cannotEvaluateGeneratorError(generator: UnresolvedGenerator): Throwable = {
+    new UnsupportedOperationException(s"Cannot evaluate expression: $generator")
+  }
+
+  def cannotGenerateCodeForGeneratorError(generator: UnresolvedGenerator): Throwable = {
+    new UnsupportedOperationException(s"Cannot generate code for expression: $generator")
+  }
+
+  def cannotTerminateGeneratorError(generator: UnresolvedGenerator): Throwable = {
+    new UnsupportedOperationException(s"Cannot terminate expression: $generator")
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 10c8ac58840f2..1a5f33443d8e3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -44,7 +44,7 @@ import org.apache.spark.sql.connector.catalog._
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, After, ColumnChange, ColumnPosition, DeleteColumn, RenameColumn, UpdateColumnComment, UpdateColumnNullability, UpdateColumnPosition, UpdateColumnType}
 import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform, Transform}
-import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.{PartitionOverwriteMode, StoreAssignmentPolicy}
@@ -885,8 +885,7 @@ class Analyzer(override val catalogManager: CatalogManager)
           case UnresolvedRelation(ident, _, false) =>
             lookupTempView(ident).map(EliminateSubqueryAliases(_)).map {
               case r: DataSourceV2Relation => write.withNewTable(r)
-              case _ => throw new AnalysisException("Cannot write into temp view " +
-                s"${ident.quoted} as it's not a data source v2 relation.")
+              case _ => throw QueryCompilationErrors.writeIntoTempViewNotAllowedError(ident.quoted)
             }.getOrElse(write)
           case _ => write
         }
@@ -927,8 +926,7 @@ class Analyzer(override val catalogManager: CatalogManager)
       }
 
       if (isStreaming && tmpView.nonEmpty && !tmpView.get.isStreaming) {
-        throw new AnalysisException(s"${identifier.quoted} is not a temp view of streaming " +
-          s"logical plan, please use batch API such as `DataFrameReader.table` to read it.")
+        throw QueryCompilationErrors.readNonStreamingTempViewError(identifier.quoted)
       }
       tmpView.map(ResolveRelations.resolveViews)
     }
@@ -1020,8 +1018,8 @@ class Analyzer(override val catalogManager: CatalogManager)
           case u: UnresolvedRelation if !u.isStreaming =>
             lookupV2Relation(u.multipartIdentifier, u.options, false).map {
               case r: DataSourceV2Relation => write.withNewTable(r)
-              case other => throw new IllegalStateException(
-                "[BUG] unexpected plan returned by `lookupV2Relation`: " + other)
+              case other =>
+                throw QueryExecutionErrors.unexpectedPlanReturnError(other, "lookupV2Relation")
             }.getOrElse(write)
           case _ => write
         }
@@ -1132,8 +1130,8 @@ class Analyzer(override val catalogManager: CatalogManager)
                   throw QueryCompilationErrors.writeIntoV1TableNotAllowedError(
                     u.tableMeta.identifier, write)
                 case r: DataSourceV2Relation => write.withNewTable(r)
-                case other => throw new IllegalStateException(
-                  "[BUG] unexpected plan returned by `lookupRelation`: " + other)
+                case other =>
+                  throw QueryExecutionErrors.unexpectedPlanReturnError(other, "lookupRelation")
               }.getOrElse(write)
           case _ => write
         }
@@ -1187,9 +1185,8 @@ class Analyzer(override val catalogManager: CatalogManager)
             case v1Table: V1Table =>
               if (isStreaming) {
                 if (v1Table.v1Table.tableType == CatalogTableType.VIEW) {
-                  throw new AnalysisException(s"${identifier.quoted} is a permanent view, " +
-                    "which is not supported by streaming reading API such as " +
-                    "`DataStreamReader.table` yet.")
+                  throw QueryCompilationErrors.permanentViewNotSupportedByStreamingReadingAPIError(
+                    identifier.quoted)
                 }
                 SubqueryAlias(
                   catalog.name +: ident.asMultipartIdentifier,
@@ -3546,8 +3543,7 @@ class Analyzer(override val catalogManager: CatalogManager)
 
           case column: ColumnChange =>
             // This is informational for future developers
-            throw new UnsupportedOperationException(
-              "Please add an implementation for a column change here")
+            throw QueryExecutionErrors.columnChangeUnsupportedError
           case other => Some(other)
         }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index c5a63546c01e3..c0cdcdf2d9577 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, TypeUtils}
 import org.apache.spark.sql.connector.catalog.{LookupCatalog, SupportsAtomicPartitionManagement, SupportsPartitionManagement, Table}
 import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, After, ColumnPosition, DeleteColumn, RenameColumn, UpdateColumnComment, UpdateColumnNullability, UpdateColumnPosition, UpdateColumnType}
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
@@ -95,8 +96,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
       case p if p.analyzed => // Skip already analyzed sub-plans
 
       case leaf: LeafNode if leaf.output.map(_.dataType).exists(CharVarcharUtils.hasCharVarchar) =>
-        throw new IllegalStateException(
-          "[BUG] logical plan should not have output of char/varchar type: " + leaf)
+        throw QueryExecutionErrors.logicalPlanHaveOutputOfCharOrVarcharError(leaf)
 
       case u: UnresolvedNamespace =>
         u.failAnalysis(s"Namespace not found: ${u.multipartIdentifier.quoted}")
@@ -428,18 +428,14 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
               if (output.length != queryColumnNames.length) {
                 // If the view output doesn't have the same number of columns with the query column
                 // names, throw an AnalysisException.
-                throw new AnalysisException(
-                  s"The view output ${output.mkString("[", ",", "]")} doesn't have the same" +
-                    "number of columns with the query column names " +
-                    s"${queryColumnNames.mkString("[", ",", "]")}")
+                throw QueryCompilationErrors.viewOutputNumberMismatchQueryColumnNamesError(
+                  output, queryColumnNames)
               }
               val resolver = SQLConf.get.resolver
               queryColumnNames.map { colName =>
                 child.output.find { attr =>
                   resolver(attr.name, colName)
-                }.getOrElse(throw new AnalysisException(
-                  s"Attribute with name '$colName' is not found in " +
-                    s"'${child.output.map(_.name).mkString("(", ",", ")")}'"))
+                }.getOrElse(throw QueryCompilationErrors.attributeNotFoundError(colName, child))
               }
             } else {
               child.output
@@ -451,9 +447,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
                 // output, so we should cast the attribute to the dataType of the view output
                 // attribute. Will throw an AnalysisException if the cast is not a up-cast.
                 if (!Cast.canUpCast(originAttr.dataType, attr.dataType)) {
-                  throw new AnalysisException(s"Cannot up cast ${originAttr.sql} from " +
-                    s"${originAttr.dataType.catalogString} to ${attr.dataType.catalogString} " +
-                    "as it may truncate\n")
+                  throw QueryCompilationErrors.cannotUpCastAsAttributeError(
+                    originAttr, attr)
                 }
               case _ =>
             }
@@ -671,8 +666,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
                """.stripMargin)
 
           case _: UnresolvedHint =>
-            throw new IllegalStateException(
-              "Internal error: logical hint operator should have been removed during analysis")
+            throw QueryExecutionErrors.logicalHintOperatorNotRemovedDuringAnalysisError
 
           case f @ Filter(condition, _)
             if PlanHelper.specialExpressionsInUnsupportedOperator(f).nonEmpty =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 4e2f01ac2db93..1a1b619336d54 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.expressions.xml._
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.types._
 
 
@@ -115,7 +116,7 @@ class SimpleFunctionRegistry extends FunctionRegistry with Logging {
   override def lookupFunction(name: FunctionIdentifier, children: Seq[Expression]): Expression = {
     val func = synchronized {
       functionBuilders.get(normalizeFuncName(name)).map(_._2).getOrElse {
-        throw new AnalysisException(s"undefined function $name")
+        throw QueryCompilationErrors.functionUndefinedError(name)
       }
     }
     func(children)
@@ -623,19 +624,8 @@ object FunctionRegistry {
           val validParametersCount = constructors
             .filter(_.getParameterTypes.forall(_ == classOf[Expression]))
             .map(_.getParameterCount).distinct.sorted
-          val invalidArgumentsMsg = if (validParametersCount.length == 0) {
-            s"Invalid arguments for function $name"
-          } else {
-            val expectedNumberOfParameters = if (validParametersCount.length == 1) {
-              validParametersCount.head.toString
-            } else {
-              validParametersCount.init.mkString("one of ", ", ", " and ") +
-                validParametersCount.last
-            }
-            s"Invalid number of arguments for function $name. " +
-              s"Expected: $expectedNumberOfParameters; Found: ${params.length}"
-          }
-          throw new AnalysisException(invalidArgumentsMsg)
+          throw QueryCompilationErrors.invalidFunctionArgumentNumberError(
+            validParametersCount, name, params)
         }
         try {
           val exp = f.newInstance(expressions : _*).asInstanceOf[Expression]
@@ -663,7 +653,7 @@ object FunctionRegistry {
       dataType: DataType): (String, (ExpressionInfo, FunctionBuilder)) = {
     val builder = (args: Seq[Expression]) => {
       if (args.size != 1) {
-        throw new AnalysisException(s"Function $name accepts only one argument")
+        throw QueryCompilationErrors.functionAcceptsOnlyOneArgumentError(name)
       }
       Cast(args.head, dataType)
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
index 14dccd86d2240..0249711101899 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
@@ -17,10 +17,10 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, LookupCatalog, TableCatalog, TableChange}
+import org.apache.spark.sql.errors.QueryCompilationErrors
 
 /**
  * Resolves catalogs from the multi-part identifiers in SQL statements, and convert the statements
@@ -115,8 +115,7 @@ class ResolveCatalogs(val catalogManager: CatalogManager)
     case AlterTableSetLocationStatement(
          nameParts @ NonSessionCatalogAndTable(catalog, tbl), partitionSpec, newLoc) =>
       if (partitionSpec.nonEmpty) {
-        throw new AnalysisException(
-          "ALTER TABLE SET LOCATION does not support partition for v2 tables.")
+        throw QueryCompilationErrors.alterV2TableSetLocationWithPartitionNotSupportedError
       }
       val changes = Seq(TableChange.setProperty(TableCatalog.PROP_LOCATION, newLoc))
       createAlterTable(nameParts, catalog, tbl, changes)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
index b44ca20e74bb0..ab7a59d4588ea 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
@@ -21,11 +21,11 @@ import java.util.Locale
 
 import scala.collection.mutable
 
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions.{Ascending, Expression, IntegerLiteral, SortOrder}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.CurrentOrigin
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
 
 
@@ -153,8 +153,8 @@ object ResolveHints {
           val relationNamesInHint = h.parameters.map {
             case tableName: String => UnresolvedAttribute.parseAttributeName(tableName)
             case tableId: UnresolvedAttribute => tableId.nameParts
-            case unsupported => throw new AnalysisException("Join strategy hint parameter " +
-              s"should be an identifier or string but was $unsupported (${unsupported.getClass}")
+            case unsupported =>
+              throw QueryCompilationErrors.joinStrategyHintParameterNotSupportedError(unsupported)
           }.toSet
           val relationsInHintWithMatch = new mutable.HashSet[Seq[String]]
           val applied = applyJoinStrategyHint(
@@ -193,8 +193,7 @@ object ResolveHints {
            """.stripMargin)
         val invalidParams = partitionExprs.filter(!_.isInstanceOf[UnresolvedAttribute])
         if (invalidParams.nonEmpty) {
-          throw new AnalysisException(s"$hintName Hint parameter should include columns, but " +
-            s"${invalidParams.mkString(", ")} found")
+          throw QueryCompilationErrors.invalidHintParameterError(hintName, invalidParams)
         }
         RepartitionByExpression(
           partitionExprs.map(_.asInstanceOf[Expression]), hint.child, numPartitions)
@@ -207,7 +206,7 @@ object ResolveHints {
           Repartition(numPartitions, shuffle, hint.child)
         // The "COALESCE" hint (shuffle = false) must have a partition number only
         case _ if !shuffle =>
-          throw new AnalysisException(s"$hintName Hint expects a partition number as a parameter")
+          throw QueryCompilationErrors.invalidCoalesceHintParameterError(hintName)
 
         case param @ Seq(IntegerLiteral(numPartitions), _*) if shuffle =>
           createRepartitionByExpression(Some(numPartitions), param.tail)
@@ -229,8 +228,7 @@ object ResolveHints {
           numPartitions: Option[Int], partitionExprs: Seq[Any]): RepartitionByExpression = {
         val invalidParams = partitionExprs.filter(!_.isInstanceOf[UnresolvedAttribute])
         if (invalidParams.nonEmpty) {
-          throw new AnalysisException(s"$hintName Hint parameter should include columns, but " +
-            s"${invalidParams.mkString(", ")} found")
+          throw QueryCompilationErrors.invalidHintParameterError(hintName, invalidParams)
         }
         val sortOrder = partitionExprs.map {
           case expr: SortOrder => expr
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index efc9e971df72a..8a73208d42e20 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow, TableIdentifier}
 import org.apache.spark.sql.catalyst.errors.TreeNodeException
 import org.apache.spark.sql.catalyst.expressions._
@@ -27,6 +26,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Unary
 import org.apache.spark.sql.catalyst.trees.TreeNode
 import org.apache.spark.sql.catalyst.util.quoteIdentifier
 import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.types.{DataType, Metadata, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -193,7 +193,7 @@ object UnresolvedAttribute {
    * Escape character is not supported now, so we can't use backtick inside name part.
    */
   def parseAttributeName(name: String): Seq[String] = {
-    def e = new AnalysisException(s"syntax error in attribute name: $name")
+    def e = QueryCompilationErrors.attributeNameSyntaxError(name)
     val nameParts = scala.collection.mutable.ArrayBuffer.empty[String]
     val tmp = scala.collection.mutable.ArrayBuffer.empty[Char]
     var inBacktick = false
@@ -245,13 +245,13 @@ case class UnresolvedGenerator(name: FunctionIdentifier, children: Seq[Expressio
   override def toString: String = s"'$name(${children.mkString(", ")})"
 
   override def eval(input: InternalRow = null): TraversableOnce[InternalRow] =
-    throw new UnsupportedOperationException(s"Cannot evaluate expression: $this")
+    throw QueryExecutionErrors.cannotEvaluateGeneratorError(this)
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
-    throw new UnsupportedOperationException(s"Cannot generate code for expression: $this")
+    throw QueryExecutionErrors.cannotGenerateCodeForGeneratorError(this)
 
   override def terminate(): TraversableOnce[InternalRow] =
-    throw new UnsupportedOperationException(s"Cannot terminate expression: $this")
+    throw QueryExecutionErrors.cannotTerminateGeneratorError(this)
 }
 
 case class UnresolvedFunction(
@@ -358,13 +358,13 @@ case class UnresolvedStar(target: Option[Seq[String]]) extends Star with Unevalu
         }
 
         case _ =>
-          throw new AnalysisException("Can only star expand struct data types. Attribute: `" +
-            target.get + "`")
+          throw QueryCompilationErrors.starExpandDataTypeNotSupportedError(target.get)
       }
     } else {
       val from = input.inputSet.map(_.name).mkString(", ")
       val targetString = target.get.mkString(".")
-      throw new AnalysisException(s"cannot resolve '$targetString.*' given input columns '$from'")
+      throw QueryCompilationErrors.cannotResolveStarExpandGivenInputColumnsError(
+        targetString, from)
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 1426d28cbbf88..723647a4a9207 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -17,13 +17,14 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import org.apache.spark.sql.{AnalysisException, SaveMode}
+import org.apache.spark.sql.SaveMode
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, Identifier, LookupCatalog, SupportsNamespaces, TableCatalog, TableChange, V1Table}
 import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource}
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
@@ -54,8 +55,7 @@ class ResolveSessionCatalog(
           cols.foreach { c =>
             assertTopLevelColumn(c.name, "AlterTableAddColumnsCommand")
             if (!c.nullable) {
-              throw new AnalysisException(
-                "ADD COLUMN with v1 tables cannot specify NOT NULL.")
+              throw QueryCompilationErrors.addColumnWithV1TableCannotSpecifyNotNullError
             }
           }
           AlterTableAddColumnsCommand(tbl.asTableIdentifier, cols.map(convertToStructField))
@@ -76,7 +76,7 @@ class ResolveSessionCatalog(
       cols.foreach(c => failNullType(c.dataType))
       val changes: Seq[TableChange] = loadTable(catalog, tbl.asIdentifier) match {
         case Some(_: V1Table) =>
-          throw new AnalysisException("REPLACE COLUMNS is only supported with v2 tables.")
+          throw QueryCompilationErrors.replaceColumnsOnlySupportedWithV2TableError
         case Some(table) =>
           // REPLACE COLUMNS deletes all the existing columns and adds new columns specified.
           val deleteChanges = table.schema.fieldNames.map { name =>
@@ -101,16 +101,13 @@ class ResolveSessionCatalog(
       loadTable(catalog, tbl.asIdentifier).collect {
         case v1Table: V1Table =>
           if (a.column.length > 1) {
-            throw new AnalysisException(
-              "ALTER COLUMN with qualified column is only supported with v2 tables.")
+            throw QueryCompilationErrors.alterQualifiedColumnOnlySupportedWithV2TableError
           }
           if (a.nullable.isDefined) {
-            throw new AnalysisException(
-              "ALTER COLUMN with v1 tables cannot specify NOT NULL.")
+            throw QueryCompilationErrors.alterColumnWithV1TableCannotSpecifyNotNullError
           }
           if (a.position.isDefined) {
-            throw new AnalysisException("" +
-              "ALTER COLUMN ... FIRST | ALTER is only supported with v2 tables.")
+            throw QueryCompilationErrors.alterOnlySupportedWithV2TableError
           }
           val builder = new MetadataBuilder
           // Add comment to metadata
@@ -120,9 +117,8 @@ class ResolveSessionCatalog(
             v1Table.schema.findNestedField(Seq(colName), resolver = conf.resolver)
               .map(_._2.dataType)
               .getOrElse {
-                throw new AnalysisException(
-                  s"ALTER COLUMN cannot find column ${quoteIfNeeded(colName)} in v1 table. " +
-                    s"Available: ${v1Table.schema.fieldNames.mkString(", ")}")
+                throw QueryCompilationErrors.alterColumnCannotFindColumnInV1TableError(
+                  quoteIfNeeded(colName), v1Table)
               }
           }
           val newColumn = StructField(
@@ -156,7 +152,7 @@ class ResolveSessionCatalog(
          nameParts @ SessionCatalogAndTable(catalog, tbl), col, newName) =>
       loadTable(catalog, tbl.asIdentifier).collect {
         case v1Table: V1Table =>
-          throw new AnalysisException("RENAME COLUMN is only supported with v2 tables.")
+          throw QueryCompilationErrors.renameColumnOnlySupportedWithV2TableError
       }.getOrElse {
         val changes = Seq(TableChange.renameColumn(col.toArray, newName))
         createAlterTable(nameParts, catalog, tbl, changes)
@@ -166,7 +162,7 @@ class ResolveSessionCatalog(
          nameParts @ SessionCatalogAndTable(catalog, tbl), cols) =>
       loadTable(catalog, tbl.asIdentifier).collect {
         case v1Table: V1Table =>
-          throw new AnalysisException("DROP COLUMN is only supported with v2 tables.")
+          throw QueryCompilationErrors.dropColumnOnlySupportedWithV2TableError
       }.getOrElse {
         val changes = cols.map(col => TableChange.deleteColumn(col.toArray))
         createAlterTable(nameParts, catalog, tbl, changes)
@@ -202,8 +198,7 @@ class ResolveSessionCatalog(
           AlterTableSetLocationCommand(tbl.asTableIdentifier, partitionSpec, newLoc)
       }.getOrElse {
         if (partitionSpec.nonEmpty) {
-          throw new AnalysisException(
-            "ALTER TABLE SET LOCATION does not support partition for v2 tables.")
+          throw QueryCompilationErrors.alterV2TableSetLocationWithPartitionNotSupportedError
         }
         val changes = Seq(TableChange.setProperty(TableCatalog.PROP_LOCATION, newLoc))
         createAlterTable(nameParts, catalog, tbl, changes)
@@ -217,22 +212,19 @@ class ResolveSessionCatalog(
 
     case d @ DescribeNamespace(SessionCatalogAndNamespace(_, ns), _) =>
       if (ns.length != 1) {
-        throw new AnalysisException(
-          s"The database name is not valid: ${ns.quoted}")
+        throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted)
       }
       DescribeDatabaseCommand(ns.head, d.extended)
 
     case AlterNamespaceSetProperties(SessionCatalogAndNamespace(_, ns), properties) =>
       if (ns.length != 1) {
-        throw new AnalysisException(
-          s"The database name is not valid: ${ns.quoted}")
+        throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted)
       }
       AlterDatabasePropertiesCommand(ns.head, properties)
 
     case AlterNamespaceSetLocation(SessionCatalogAndNamespace(_, ns), location) =>
       if (ns.length != 1) {
-        throw new AnalysisException(
-          s"The database name is not valid: ${ns.quoted}")
+        throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted)
       }
       AlterDatabaseSetLocationCommand(ns.head, location)
 
@@ -308,7 +300,7 @@ class ResolveSessionCatalog(
       assertNoNullTypeInSchema(c.tableSchema)
       val provider = c.provider.getOrElse(conf.defaultDataSourceName)
       if (!isV2Provider(provider)) {
-        throw new AnalysisException("REPLACE TABLE is only supported with v2 tables.")
+        throw QueryCompilationErrors.replaceTableOnlySupportedWithV2TableError
       } else {
         ReplaceTable(
           catalog.asTableCatalog,
@@ -327,7 +319,7 @@ class ResolveSessionCatalog(
       }
       val provider = c.provider.getOrElse(conf.defaultDataSourceName)
       if (!isV2Provider(provider)) {
-        throw new AnalysisException("REPLACE TABLE AS SELECT is only supported with v2 tables.")
+        throw QueryCompilationErrors.replaceTableAsSelectOnlySupportedWithV2TableError
       } else {
         ReplaceTableAsSelect(
           catalog.asTableCatalog,
@@ -346,8 +338,7 @@ class ResolveSessionCatalog(
     // v1 DROP TABLE supports temp view.
     case DropTable(r: ResolvedView, ifExists, purge) =>
       if (!r.isTemp) {
-        throw new AnalysisException(
-          "Cannot drop a view with DROP TABLE. Please use DROP VIEW instead")
+        throw QueryCompilationErrors.cannotDropViewWithDropTableError
       }
       DropTableCommand(r.identifier.asTableIdentifier, ifExists, isView = false, purge = purge)
 
@@ -357,8 +348,7 @@ class ResolveSessionCatalog(
     case c @ CreateNamespaceStatement(CatalogAndNamespace(catalog, ns), _, _)
         if isSessionCatalog(catalog) =>
       if (ns.length != 1) {
-        throw new AnalysisException(
-          s"The database name is not valid: ${ns.quoted}")
+        throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted)
       }
 
       val comment = c.properties.get(SupportsNamespaces.PROP_COMMENT)
@@ -368,16 +358,14 @@ class ResolveSessionCatalog(
 
     case d @ DropNamespace(SessionCatalogAndNamespace(_, ns), _, _) =>
       if (ns.length != 1) {
-        throw new AnalysisException(
-          s"The database name is not valid: ${ns.quoted}")
+        throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted)
       }
       DropDatabaseCommand(ns.head, d.ifExists, d.cascade)
 
     case ShowTables(SessionCatalogAndNamespace(_, ns), pattern) =>
       assert(ns.nonEmpty)
       if (ns.length != 1) {
-          throw new AnalysisException(
-            s"The database name is not valid: ${ns.quoted}")
+          throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted)
       }
       ShowTablesCommand(Some(ns.head), pattern)
 
@@ -387,8 +375,7 @@ class ResolveSessionCatalog(
         partitionSpec @ (None | Some(UnresolvedPartitionSpec(_, _)))) =>
       assert(ns.nonEmpty)
       if (ns.length != 1) {
-        throw new AnalysisException(
-          s"The database name is not valid: ${ns.quoted}")
+        throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted)
       }
       ShowTablesCommand(
         databaseName = Some(ns.head),
@@ -442,9 +429,7 @@ class ResolveSessionCatalog(
       val resolver = conf.resolver
       val db = ns match {
         case Some(db) if v1TableName.database.exists(!resolver(_, db.head)) =>
-          throw new AnalysisException(
-            "SHOW COLUMNS with conflicting databases: " +
-              s"'${db.head}' != '${v1TableName.database.get}'")
+          throw QueryCompilationErrors.showColumnsWithConflictDatabasesError(db, v1TableName)
         case _ => ns.map(_.head)
       }
       ShowColumnsCommand(db, v1TableName)
@@ -520,12 +505,11 @@ class ResolveSessionCatalog(
           // Fallback to v1 ShowViewsCommand since there is no view API in v2 catalog
           assert(ns.nonEmpty)
           if (ns.length != 1) {
-            throw new AnalysisException(s"The database name is not valid: ${ns.quoted}")
+            throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted)
           }
           ShowViewsCommand(ns.head, pattern)
         case _ =>
-          throw new AnalysisException(s"Catalog ${resolved.catalog.name} doesn't support " +
-            "SHOW VIEWS, only SessionCatalog supports this command.")
+          throw QueryCompilationErrors.externalCatalogNotSupportShowViewsError(resolved)
       }
 
     case ShowTableProperties(ResolvedV1TableOrViewIdentifier(ident), propertyKey) =>
@@ -551,7 +535,7 @@ class ResolveSessionCatalog(
       if (isTemp) {
         // temp func doesn't belong to any catalog and we shouldn't resolve catalog in the name.
         val database = if (nameParts.length > 2) {
-          throw new AnalysisException(s"Unsupported function name '${nameParts.quoted}'")
+          throw QueryCompilationErrors.unsupportedFunctionNameError(nameParts.quoted)
         } else if (nameParts.length == 2) {
           Some(nameParts.head)
         } else {
@@ -580,7 +564,7 @@ class ResolveSessionCatalog(
 
   private def parseV1Table(tableName: Seq[String], sql: String): Seq[String] = tableName match {
     case SessionCatalogAndTable(_, tbl) => tbl
-    case _ => throw new AnalysisException(s"$sql is only supported with v1 tables.")
+    case _ => throw QueryCompilationErrors.sqlOnlySupportedWithV1TablesError(sql)
   }
 
   private def getStorageFormatAndProvider(
@@ -599,8 +583,8 @@ class ResolveSessionCatalog(
     if (provider.isDefined) {
       // The parser guarantees that USING and STORED AS/ROW FORMAT won't co-exist.
       if (maybeSerdeInfo.isDefined) {
-        throw new AnalysisException(
-          s"Cannot create table with both USING $provider and ${maybeSerdeInfo.get.describe}")
+        throw QueryCompilationErrors.cannotCreateTableWithBothProviderAndSerdeError(
+          provider, maybeSerdeInfo)
       }
       (nonHiveStorageFormat, provider.get)
     } else if (maybeSerdeInfo.isDefined) {
@@ -616,8 +600,7 @@ class ResolveSessionCatalog(
               // User specified serde takes precedence over the one inferred from file format.
               serde = serdeInfo.serde.orElse(hiveSerde.serde).orElse(defaultHiveStorage.serde),
               properties = serdeInfo.serdeProperties ++ defaultHiveStorage.properties)
-          case _ => throw new AnalysisException(
-            s"STORED AS with file format '${serdeInfo.storedAs.get}' is invalid.")
+          case _ => throw QueryCompilationErrors.invalidFileFormatForStoredAsError(serdeInfo)
         }
       } else {
         defaultHiveStorage.copy(
@@ -709,7 +692,7 @@ class ResolveSessionCatalog(
 
   private def assertTopLevelColumn(colName: Seq[String], command: String): Unit = {
     if (colName.length > 1) {
-      throw new AnalysisException(s"$command does not support nested column: ${colName.quoted}")
+      throw QueryCompilationErrors.commandNotSupportNestedColumnError(command, colName.quoted)
     }
   }
 

From c17c76dd1647953f9bdb7135ba08a9b9f25460c9 Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Fri, 18 Dec 2020 11:23:38 -0800
Subject: [PATCH 0813/1009] [SPARK-33599][SQL][FOLLOWUP] FIX Github Action with
 unidoc

### What changes were proposed in this pull request?

FIX Github Action with unidoc

### Why are the changes needed?

FIX Github Action with unidoc
### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

Pass GA

Closes #30846 from yaooqinn/SPARK-33599.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../scala/org/apache/spark/sql/QueryCompilationErrors.scala     | 2 +-
 .../main/scala/org/apache/spark/sql/QueryExecutionErrors.scala  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala
index 3ef17ab7aed0a..51a2cb0cb4d92 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.types.{AbstractDataType, DataType, StructType}
 
 /**
  * Object for grouping all error messages of the query compilation.
- * Currently it includes all [[AnalysisException]]s.
+ * Currently it includes all AnalysisExceptions.
  */
 object QueryCompilationErrors {
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryExecutionErrors.scala
index 65d280ab10037..d24e61c699241 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryExecutionErrors.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan}
 
 /**
  * Object for grouping all error messages of the query runtime.
- * Currently it includes all [[SparkException]]s and RuntimeExceptions(e.g.
+ * Currently it includes all SparkExceptions and RuntimeExceptions(e.g.
  * UnsupportedOperationException, IllegalStateException).
  */
 object QueryExecutionErrors {

From 554600c2af0dbc8979955807658fafef5dc66c08 Mon Sep 17 00:00:00 2001
From: Vlad Glinsky <vladhlinsky@gmail.com>
Date: Fri, 18 Dec 2020 13:26:19 -0800
Subject: [PATCH 0814/1009] [SPARK-33841][CORE] Fix issue with jobs
 disappearing intermittently from the SHS under high load

### What changes were proposed in this pull request?

Mark SHS event log entries that were `processing` at the beginning of the `checkForLogs` run as not stale and check for this mark before deleting an event log. This fixes the issue when a particular job was displayed in the SHS and disappeared after some time, but then, in several minutes showed up again.

### Why are the changes needed?

The issue is caused by [SPARK-29043](https://issues.apache.org/jira/browse/SPARK-29043), which is designated to improve the concurrent performance of the History Server. The [change](https://github.com/apache/spark/pull/25797/files#) breaks the ["app deletion" logic](https://github.com/apache/spark/pull/25797/files#diff-128a6af0d78f4a6180774faedb335d6168dfc4defff58f5aa3021fc1bd767bc0R563) because of missing proper synchronization for `processing` event log entries. Since SHS now [filters out](https://github.com/apache/spark/pull/25797/files#diff-128a6af0d78f4a6180774faedb335d6168dfc4defff58f5aa3021fc1bd767bc0R462) all `processing` event log entries, such entries do not have a chance to be [updated with the new `lastProcessed`](https://github.com/apache/spark/pull/25797/files#diff-128a6af0d78f4a6180774faedb335d6168dfc4defff58f5aa3021fc1bd767bc0R472) time and thus any entity that completes processing right after [filtering](https://github.com/apache/spark/pull/25797/files#diff-128a6af0d78f4a6180774faedb335d6168dfc4defff58f5aa3021fc1bd767bc0R462) and before [the check for stale entities](https://github.com/apache/spark/pull/25797/files#diff-128a6af0d78f4a6180774faedb335d6168dfc4defff58f5aa3021fc1bd767bc0R560) will be identified as stale and will be deleted from the UI until the next `checkForLogs` run. This is because [updated `lastProcessed` time is used as criteria](https://github.com/apache/spark/pull/25797/files#diff-128a6af0d78f4a6180774faedb335d6168dfc4defff58f5aa3021fc1bd767bc0R557), and event log entries that missed to be updated with a new time, will match that criteria.

The issue can be reproduced by generating a big number of event logs and uploading them to the SHS event log directory on S3. Essentially, around 236(26.7 MB) copies of an event log directory were created using [shs-monitor](https://github.com/vladhlinsky/shs-monitor/tree/spark-master) script. Strange behavior of SHS counting the total number of applications was noticed - at first, the number was increasing as expected, but with the next page refresh, the total number of applications decreased. No errors were logged by SHS.

58 entities are displayed at `17:35:35`:
![1-58-entries-at-17-35](https://user-images.githubusercontent.com/61428392/102648949-1129e400-4171-11eb-9463-ed1454a8f6b2.png)
25 entities are displayed at `17:36:40`:
![2-25-entries-at-17-36](https://user-images.githubusercontent.com/61428392/102648974-1c7d0f80-4171-11eb-95d8-78c2bb37a168.png)

### Does this PR introduce _any_ user-facing change?

Yes, SHS users won't face the behavior when the number of displayed applications decreases periodically.

### How was this patch tested?

Tested using [shs-monitor](https://github.com/vladhlinsky/shs-monitor/tree/spark-master) script:
* Build SHS with the proposed change
* Download Hadoop AWS and AWS Java SDK
* Prepare S3 bucket and user for programmatic access, grant required roles to the user. Get access key and secret key
* Configure SHS to read event logs from S3
* Start [monitor](https://github.com/vladhlinsky/shs-monitor/blob/spark-master/monitor.sh) script to query SHS API
* Run 5 [producers](https://github.com/vladhlinsky/shs-monitor/blob/spark-master/producer.sh) for ~5 mins, create 125(14.2 MB) event log directory copies
* Wait for SHS to load all the applications
* Verify that the number of loaded applications increases continuously over time

For more details, please refer to the [shs-monitor](https://github.com/vladhlinsky/shs-monitor/tree/spark-master) repository.
> This version of the reproduction uses event log directories instead of single files, since recent optimization
> [SPARK-33790](https://issues.apache.org/jira/browse/SPARK-33790) makes it hard to reproduce the issue with single event log files.

Closes #30845 from vladhlinsky/SPARK-33841.

Authored-by: Vlad Glinsky <vladhlinsky@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../deploy/history/FsHistoryProvider.scala    | 26 ++++++++++++++-----
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index e6df260bdeaa3..d35d8606eb4b4 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -474,9 +474,21 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       val newLastScanTime = clock.getTimeMillis()
       logDebug(s"Scanning $logDir with lastScanTime==$lastScanTime")
 
+      // Mark entries that are processing as not stale. Such entries do not have a chance to be
+      // updated with the new 'lastProcessed' time and thus any entity that completes processing
+      // right after this check and before the check for stale entities will be identified as stale
+      // and will be deleted from the UI until the next 'checkForLogs' run.
+      val notStale = mutable.HashSet[String]()
       val updated = Option(fs.listStatus(new Path(logDir))).map(_.toSeq).getOrElse(Nil)
         .filter { entry => isAccessible(entry.getPath) }
-        .filter { entry => !isProcessing(entry.getPath) }
+        .filter { entry =>
+          if (isProcessing(entry.getPath)) {
+            notStale.add(entry.getPath.toString())
+            false
+          } else {
+            true
+          }
+        }
         .flatMap { entry => EventLogFileReader(fs, entry) }
         .filter { reader =>
           try {
@@ -576,12 +588,14 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         .last(newLastScanTime - 1)
         .asScala
         .toList
-      stale.filterNot(isProcessing).foreach { log =>
-        log.appId.foreach { appId =>
-          cleanAppData(appId, log.attemptId, log.logPath)
-          listing.delete(classOf[LogInfo], log.logPath)
+      stale.filterNot(isProcessing)
+        .filterNot(info => notStale.contains(info.logPath))
+        .foreach { log =>
+          log.appId.foreach { appId =>
+            cleanAppData(appId, log.attemptId, log.logPath)
+            listing.delete(classOf[LogInfo], log.logPath)
+          }
         }
-      }
 
       lastScanTime.set(newLastScanTime)
     } catch {

From de234eec8febce99ede5ef9ae2301e36739a0f85 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Sat, 19 Dec 2020 14:35:28 +0900
Subject: [PATCH 0815/1009] [SPARK-33812][SQL] Split the histogram column stats
 when saving to hive metastore as table property

### What changes were proposed in this pull request?

Hive metastore has a limitation for the table property length. To work around it, Spark split the schema json string into several parts when saving to hive metastore as table properties. We need to do the same for histogram column stats as it can go very big.

This PR refactors the table property splitting code, so that we can share it between the schema json string and histogram column stats.

### Why are the changes needed?

To be able to analyze table when histogram data is big.

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

existing test and new tests

Closes #30809 from cloud-fan/cbo.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../sql/catalyst/catalog/interface.scala      | 54 +++++++++++++++-
 .../apache/spark/sql/internal/SQLConf.scala   | 14 ++++-
 .../apache/spark/sql/RuntimeConfigSuite.scala |  4 +-
 .../spark/sql/StatisticsCollectionSuite.scala |  9 +++
 .../spark/sql/internal/SQLConfSuite.scala     | 16 ++---
 .../spark/sql/hive/HiveExternalCatalog.scala  | 61 +++----------------
 .../sql/hive/client/HiveClientImpl.scala      |  6 +-
 .../sql/hive/MetastoreDataSourcesSuite.scala  |  4 +-
 8 files changed, 97 insertions(+), 71 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 9876ee375cfa6..5cb237688f875 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -472,6 +472,51 @@ object CatalogTable {
 
   val VIEW_REFERRED_TEMP_VIEW_NAMES = VIEW_PREFIX + "referredTempViewNames"
   val VIEW_REFERRED_TEMP_FUNCTION_NAMES = VIEW_PREFIX + "referredTempFunctionsNames"
+
+  def splitLargeTableProp(
+      key: String,
+      value: String,
+      addProp: (String, String) => Unit,
+      defaultThreshold: Int): Unit = {
+    val threshold = SQLConf.get.getConf(SQLConf.HIVE_TABLE_PROPERTY_LENGTH_THRESHOLD)
+      .getOrElse(defaultThreshold)
+    if (value.length <= threshold) {
+      addProp(key, value)
+    } else {
+      val parts = value.grouped(threshold).toSeq
+      addProp(s"$key.numParts", parts.length.toString)
+      parts.zipWithIndex.foreach { case (part, index) =>
+        addProp(s"$key.part.$index", part)
+      }
+    }
+  }
+
+  def readLargeTableProp(props: Map[String, String], key: String): Option[String] = {
+    props.get(key).orElse {
+      if (props.filterKeys(_.startsWith(key)).isEmpty) {
+        None
+      } else {
+        val numParts = props.get(s"$key.numParts")
+        val errorMessage = s"Cannot read table property '$key' as it's corrupted."
+        if (numParts.isEmpty) {
+          throw new AnalysisException(errorMessage)
+        } else {
+          val parts = (0 until numParts.get.toInt).map { index =>
+            props.getOrElse(s"$key.part.$index", {
+              throw new AnalysisException(
+                s"$errorMessage Missing part $index, ${numParts.get} parts are expected.")
+            })
+          }
+          Some(parts.mkString)
+        }
+      }
+    }
+  }
+
+  def isLargeTableProp(originalKey: String, propKey: String): Boolean = {
+    propKey == originalKey || propKey == s"$originalKey.numParts" ||
+      propKey.startsWith(s"$originalKey.part.")
+  }
 }
 
 /**
@@ -546,7 +591,11 @@ case class CatalogColumnStat(
     min.foreach { v => map.put(s"${colName}.${CatalogColumnStat.KEY_MIN_VALUE}", v) }
     max.foreach { v => map.put(s"${colName}.${CatalogColumnStat.KEY_MAX_VALUE}", v) }
     histogram.foreach { h =>
-      map.put(s"${colName}.${CatalogColumnStat.KEY_HISTOGRAM}", HistogramSerializer.serialize(h))
+      CatalogTable.splitLargeTableProp(
+        s"$colName.${CatalogColumnStat.KEY_HISTOGRAM}",
+        HistogramSerializer.serialize(h),
+        map.put,
+        4000)
     }
     map.toMap
   }
@@ -650,7 +699,8 @@ object CatalogColumnStat extends Logging {
         nullCount = map.get(s"${colName}.${KEY_NULL_COUNT}").map(v => BigInt(v.toLong)),
         avgLen = map.get(s"${colName}.${KEY_AVG_LEN}").map(_.toLong),
         maxLen = map.get(s"${colName}.${KEY_MAX_LEN}").map(_.toLong),
-        histogram = map.get(s"${colName}.${KEY_HISTOGRAM}").map(HistogramSerializer.deserialize),
+        histogram = CatalogTable.readLargeTableProp(map, s"$colName.$KEY_HISTOGRAM")
+          .map(HistogramSerializer.deserialize),
         version = map(s"${colName}.${KEY_VERSION}").toInt
       ))
     } catch {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 3f0fd70a6eae6..b5547319f0ab3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -905,6 +905,16 @@ object SQLConf {
     .checkValues(HiveCaseSensitiveInferenceMode.values.map(_.toString))
     .createWithDefault(HiveCaseSensitiveInferenceMode.NEVER_INFER.toString)
 
+  val HIVE_TABLE_PROPERTY_LENGTH_THRESHOLD =
+    buildConf("spark.sql.hive.tablePropertyLengthThreshold")
+      .internal()
+      .doc("The maximum length allowed in a single cell when storing Spark-specific information " +
+        "in Hive's metastore as table properties. Currently it covers 2 things: the schema's " +
+        "JSON string, the histogram of column statistics.")
+      .version("3.2.0")
+      .intConf
+      .createOptional
+
   val OPTIMIZER_METADATA_ONLY = buildConf("spark.sql.optimizer.metadataOnly")
     .internal()
     .doc("When true, enable the metadata-only query optimization that use the table's metadata " +
@@ -3052,7 +3062,9 @@ object SQLConf {
         "Avoid to depend on this optimization to prevent a potential correctness issue. " +
           "If you must use, use 'SparkSessionExtensions' instead to inject it as a custom rule."),
       DeprecatedConfig(CONVERT_CTAS.key, "3.1",
-        s"Set '${LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT.key}' to false instead.")
+        s"Set '${LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT.key}' to false instead."),
+      DeprecatedConfig("spark.sql.sources.schemaStringLengthThreshold", "3.2",
+        s"Use '${HIVE_TABLE_PROPERTY_LENGTH_THRESHOLD.key}' instead.")
     )
 
     Map(configs.map { cfg => cfg.key -> cfg } : _*)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala
index 720d570ca8384..4052130720811 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.internal.config
 import org.apache.spark.sql.internal.SQLConf.CHECKPOINT_LOCATION
-import org.apache.spark.sql.internal.StaticSQLConf.SCHEMA_STRING_LENGTH_THRESHOLD
+import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE
 
 class RuntimeConfigSuite extends SparkFunSuite {
 
@@ -62,7 +62,7 @@ class RuntimeConfigSuite extends SparkFunSuite {
     val conf = newConf()
 
     // SQL configs
-    assert(!conf.isModifiable(SCHEMA_STRING_LENGTH_THRESHOLD.key))
+    assert(!conf.isModifiable(GLOBAL_TEMP_DATABASE.key))
     assert(conf.isModifiable(CHECKPOINT_LOCATION.key))
     // Core configs
     assert(!conf.isModifiable(config.CPUS_PER_TASK.key))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index 3b53a5324445b..cc3d8375db32f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -174,6 +174,15 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
     }
   }
 
+  test("SPARK-33812: column stats round trip serialization with splitting histogram property") {
+    withSQLConf(SQLConf.HIVE_TABLE_PROPERTY_LENGTH_THRESHOLD.key -> "10") {
+      statsWithHgms.foreach { case (k, v) =>
+        val roundtrip = CatalogColumnStat.fromMap("t", k, v.toMap(k))
+        assert(roundtrip == Some(v))
+      }
+    }
+  }
+
   test("analyze column command - result verification") {
     // (data.head.productArity - 1) because the last column does not support stats collection.
     assert(stats.size == data.head.productArity - 1)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
index 1ea2d4fd0b32c..e699c972268a9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
@@ -282,23 +282,23 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
   }
 
   test("static SQL conf comes from SparkConf") {
-    val previousValue = sparkContext.conf.get(SCHEMA_STRING_LENGTH_THRESHOLD)
+    val previousValue = sparkContext.conf.get(GLOBAL_TEMP_DATABASE)
     try {
-      sparkContext.conf.set(SCHEMA_STRING_LENGTH_THRESHOLD, 2000)
+      sparkContext.conf.set(GLOBAL_TEMP_DATABASE, "a")
       val newSession = new SparkSession(sparkContext)
-      assert(newSession.conf.get(SCHEMA_STRING_LENGTH_THRESHOLD) == 2000)
+      assert(newSession.conf.get(GLOBAL_TEMP_DATABASE) == "a")
       checkAnswer(
-        newSession.sql(s"SET ${SCHEMA_STRING_LENGTH_THRESHOLD.key}"),
-        Row(SCHEMA_STRING_LENGTH_THRESHOLD.key, "2000"))
+        newSession.sql(s"SET ${GLOBAL_TEMP_DATABASE.key}"),
+        Row(GLOBAL_TEMP_DATABASE.key, "a"))
     } finally {
-      sparkContext.conf.set(SCHEMA_STRING_LENGTH_THRESHOLD, previousValue)
+      sparkContext.conf.set(GLOBAL_TEMP_DATABASE, previousValue)
     }
   }
 
   test("cannot set/unset static SQL conf") {
-    val e1 = intercept[AnalysisException](sql(s"SET ${SCHEMA_STRING_LENGTH_THRESHOLD.key}=10"))
+    val e1 = intercept[AnalysisException](sql(s"SET ${GLOBAL_TEMP_DATABASE.key}=10"))
     assert(e1.message.contains("Cannot modify the value of a static config"))
-    val e2 = intercept[AnalysisException](spark.conf.unset(SCHEMA_STRING_LENGTH_THRESHOLD.key))
+    val e2 = intercept[AnalysisException](spark.conf.unset(GLOBAL_TEMP_DATABASE.key))
     assert(e2.message.contains("Cannot modify the value of a static config"))
   }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 54c237f78cb9c..b4aa073893df8 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -429,18 +429,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     val properties = new mutable.HashMap[String, String]
 
     properties.put(CREATED_SPARK_VERSION, table.createVersion)
-
-    // Serialized JSON schema string may be too long to be stored into a single metastore table
-    // property. In this case, we split the JSON string and store each part as a separate table
-    // property.
-    val threshold = conf.get(SCHEMA_STRING_LENGTH_THRESHOLD)
-    val schemaJsonString = schema.json
-    // Split the JSON string.
-    val parts = schemaJsonString.grouped(threshold).toSeq
-    properties.put(DATASOURCE_SCHEMA_NUMPARTS, parts.size.toString)
-    parts.zipWithIndex.foreach { case (part, index) =>
-      properties.put(s"$DATASOURCE_SCHEMA_PART_PREFIX$index", part)
-    }
+    CatalogTable.splitLargeTableProp(
+      DATASOURCE_SCHEMA, schema.json, properties.put, conf.get(SCHEMA_STRING_LENGTH_THRESHOLD))
 
     if (partitionColumns.nonEmpty) {
       properties.put(DATASOURCE_SCHEMA_NUMPARTCOLS, partitionColumns.length.toString)
@@ -744,8 +734,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       case None if table.tableType == VIEW =>
         // If this is a view created by Spark 2.2 or higher versions, we should restore its schema
         // from table properties.
-        if (table.properties.contains(DATASOURCE_SCHEMA_NUMPARTS)) {
-          table = table.copy(schema = getSchemaFromTableProperties(table))
+        CatalogTable.readLargeTableProp(table.properties, DATASOURCE_SCHEMA).foreach { schemaJson =>
+          table = table.copy(schema = DataType.fromJson(schemaJson).asInstanceOf[StructType])
         }
 
       // No provider in table properties, which means this is a Hive serde table.
@@ -795,8 +785,9 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
 
     // If this is a Hive serde table created by Spark 2.1 or higher versions, we should restore its
     // schema from table properties.
-    if (table.properties.contains(DATASOURCE_SCHEMA_NUMPARTS)) {
-      val schemaFromTableProps = getSchemaFromTableProperties(table)
+    val schemaJson = CatalogTable.readLargeTableProp(table.properties, DATASOURCE_SCHEMA)
+    if (schemaJson.isDefined) {
+      val schemaFromTableProps = DataType.fromJson(schemaJson.get).asInstanceOf[StructType]
       val partColumnNames = getPartitionColumnsFromTableProperties(table)
       val reorderedSchema = reorderSchema(schema = schemaFromTableProps, partColumnNames)
 
@@ -836,7 +827,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       storageWithLocation.properties.filterKeys(!HIVE_GENERATED_STORAGE_PROPERTIES(_)).toMap)
     val partitionProvider = table.properties.get(TABLE_PARTITION_PROVIDER)
 
-    val schemaFromTableProps = getSchemaFromTableProperties(table)
+    val schemaFromTableProps = CatalogTable.readLargeTableProp(table.properties, DATASOURCE_SCHEMA)
+      .map(json => DataType.fromJson(json).asInstanceOf[StructType]).getOrElse(new StructType())
     val partColumnNames = getPartitionColumnsFromTableProperties(table)
     val reorderedSchema = reorderSchema(schema = schemaFromTableProps, partColumnNames)
 
@@ -1340,7 +1332,6 @@ object HiveExternalCatalog {
   val DATASOURCE_PROVIDER = DATASOURCE_PREFIX + "provider"
   val DATASOURCE_SCHEMA = DATASOURCE_PREFIX + "schema"
   val DATASOURCE_SCHEMA_PREFIX = DATASOURCE_SCHEMA + "."
-  val DATASOURCE_SCHEMA_NUMPARTS = DATASOURCE_SCHEMA_PREFIX + "numParts"
   val DATASOURCE_SCHEMA_NUMPARTCOLS = DATASOURCE_SCHEMA_PREFIX + "numPartCols"
   val DATASOURCE_SCHEMA_NUMSORTCOLS = DATASOURCE_SCHEMA_PREFIX + "numSortCols"
   val DATASOURCE_SCHEMA_NUMBUCKETS = DATASOURCE_SCHEMA_PREFIX + "numBuckets"
@@ -1373,40 +1364,6 @@ object HiveExternalCatalog {
   val EMPTY_DATA_SCHEMA = new StructType()
     .add("col", "array<string>", nullable = true, comment = "from deserializer")
 
-  // A persisted data source table always store its schema in the catalog.
-  private def getSchemaFromTableProperties(metadata: CatalogTable): StructType = {
-    val errorMessage = "Could not read schema from the hive metastore because it is corrupted."
-    val props = metadata.properties
-    val schema = props.get(DATASOURCE_SCHEMA)
-    if (schema.isDefined) {
-      // Originally, we used `spark.sql.sources.schema` to store the schema of a data source table.
-      // After SPARK-6024, we removed this flag.
-      // Although we are not using `spark.sql.sources.schema` any more, we need to still support.
-      DataType.fromJson(schema.get).asInstanceOf[StructType]
-    } else if (props.filterKeys(_.startsWith(DATASOURCE_SCHEMA_PREFIX)).isEmpty) {
-      // If there is no schema information in table properties, it means the schema of this table
-      // was empty when saving into metastore, which is possible in older version(prior to 2.1) of
-      // Spark. We should respect it.
-      new StructType()
-    } else {
-      val numSchemaParts = props.get(DATASOURCE_SCHEMA_NUMPARTS)
-      if (numSchemaParts.isDefined) {
-        val parts = (0 until numSchemaParts.get.toInt).map { index =>
-          val part = metadata.properties.get(s"$DATASOURCE_SCHEMA_PART_PREFIX$index").orNull
-          if (part == null) {
-            throw new AnalysisException(errorMessage +
-              s" (missing part $index of the schema, ${numSchemaParts.get} parts are expected).")
-          }
-          part
-        }
-        // Stick all parts back to a single schema string.
-        DataType.fromJson(parts.mkString).asInstanceOf[StructType]
-      } else {
-        throw new AnalysisException(errorMessage)
-      }
-    }
-  }
-
   private def getColumnNamesByType(
       props: Map[String, String],
       colType: String,
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 6a964a0ce3613..e779a80f7c323 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -57,7 +57,7 @@ import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
 import org.apache.spark.sql.connector.catalog.SupportsNamespaces._
 import org.apache.spark.sql.execution.QueryExecutionException
 import org.apache.spark.sql.hive.HiveExternalCatalog
-import org.apache.spark.sql.hive.HiveExternalCatalog.{DATASOURCE_SCHEMA, DATASOURCE_SCHEMA_NUMPARTS, DATASOURCE_SCHEMA_PART_PREFIX}
+import org.apache.spark.sql.hive.HiveExternalCatalog.DATASOURCE_SCHEMA
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.{CircularBuffer, Utils}
@@ -580,9 +580,7 @@ private[hive] class HiveClientImpl(
     val it = oldTable.getParameters.entrySet.iterator
     while (it.hasNext) {
       val entry = it.next()
-      val isSchemaProp = entry.getKey.startsWith(DATASOURCE_SCHEMA_PART_PREFIX) ||
-        entry.getKey == DATASOURCE_SCHEMA || entry.getKey == DATASOURCE_SCHEMA_NUMPARTS
-      if (isSchemaProp) {
+      if (CatalogTable.isLargeTableProp(DATASOURCE_SCHEMA, entry.getKey)) {
         it.remove()
       }
     }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index 0593dbe7f6653..ecbb104070b70 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -1338,7 +1338,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
       val e = intercept[AnalysisException] {
         sharedState.externalCatalog.getTable("default", "t")
       }.getMessage
-      assert(e.contains(s"Could not read schema from the hive metastore because it is corrupted"))
+      assert(e.contains("Cannot read table property 'spark.sql.sources.schema' as it's corrupted"))
 
       withDebugMode {
         val tableMeta = sharedState.externalCatalog.getTable("default", "t")
@@ -1355,7 +1355,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
       val newSession = sparkSession.newSession()
       newSession.sql("CREATE TABLE abc(i int) USING json")
       val tableMeta = newSession.sessionState.catalog.getTableMetadata(TableIdentifier("abc"))
-      assert(tableMeta.properties(DATASOURCE_SCHEMA_NUMPARTS).toInt == 1)
+      assert(tableMeta.properties.contains(DATASOURCE_SCHEMA))
       assert(tableMeta.properties(DATASOURCE_PROVIDER) == "json")
     }
   }

From 44563a0412257645e0053ee2c44d6eb3447e9d4f Mon Sep 17 00:00:00 2001
From: zhengruifeng <ruifengz@foxmail.com>
Date: Sat, 19 Dec 2020 08:43:48 -0600
Subject: [PATCH 0816/1009] [SPARK-33518][ML] Improve performance of ML ALS
 recommendForAll by GEMV

### What changes were proposed in this pull request?
There were a lot of works on improving ALS's recommendForAll

For now, I found that it maybe futhermore optimized by

1, using GEMV and sharing a pre-allocated buffer per task;

2, using guava.ordering instead of BoundedPriorityQueue;

### Why are the changes needed?
In my test, using `f2jBLAS.sgemv`, it is about 2.3X faster than existing impl.

|Impl| Master | GEMM | GEMV | GEMV + array aggregator | GEMV + guava ordering + array aggregator  | GEMV + guava ordering|
|------|----------|------------|----------|------------|------------|------------|
|Duration|341229|363741|191201|189790|148417|147222|

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
existing testsuites

Closes #30468 from zhengruifeng/als_rec_opt.

Authored-by: zhengruifeng <ruifengz@foxmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../apache/spark/ml/recommendation/ALS.scala  | 53 ++++++++++++-------
 1 file changed, 33 insertions(+), 20 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index 088f6a682be82..1b856bda45e24 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -27,6 +27,7 @@ import scala.util.{Sorting, Try}
 import scala.util.hashing.byteswap64
 
 import com.github.fommil.netlib.BLAS.{getInstance => blas}
+import com.google.common.collect.{Ordering => GuavaOrdering}
 import org.apache.hadoop.fs.Path
 import org.json4s.DefaultFormats
 import org.json4s.JsonDSL._
@@ -47,7 +48,7 @@ import org.apache.spark.sql.{DataFrame, Dataset}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 import org.apache.spark.storage.StorageLevel
-import org.apache.spark.util.{BoundedPriorityQueue, Utils}
+import org.apache.spark.util.Utils
 import org.apache.spark.util.collection.{OpenHashMap, OpenHashSet, SortDataFormat, Sorter}
 import org.apache.spark.util.random.XORShiftRandom
 
@@ -456,30 +457,39 @@ class ALSModel private[ml] (
       num: Int,
       blockSize: Int): DataFrame = {
     import srcFactors.sparkSession.implicits._
+    import scala.collection.JavaConverters._
 
     val srcFactorsBlocked = blockify(srcFactors.as[(Int, Array[Float])], blockSize)
     val dstFactorsBlocked = blockify(dstFactors.as[(Int, Array[Float])], blockSize)
     val ratings = srcFactorsBlocked.crossJoin(dstFactorsBlocked)
-      .as[(Seq[(Int, Array[Float])], Seq[(Int, Array[Float])])]
-      .flatMap { case (srcIter, dstIter) =>
-        val m = srcIter.size
-        val n = math.min(dstIter.size, num)
-        val output = new Array[(Int, Int, Float)](m * n)
-        var i = 0
-        val pq = new BoundedPriorityQueue[(Int, Float)](num)(Ordering.by(_._2))
-        srcIter.foreach { case (srcId, srcFactor) =>
-          dstIter.foreach { case (dstId, dstFactor) =>
-            // We use F2jBLAS which is faster than a call to native BLAS for vector dot product
-            val score = BLAS.f2jBLAS.sdot(rank, srcFactor, 1, dstFactor, 1)
-            pq += dstId -> score
+      .as[(Array[Int], Array[Float], Array[Int], Array[Float])]
+      .mapPartitions { iter =>
+        var scores: Array[Float] = null
+        var idxOrd: GuavaOrdering[Int] = null
+        iter.flatMap { case (srcIds, srcMat, dstIds, dstMat) =>
+          require(srcMat.length == srcIds.length * rank)
+          require(dstMat.length == dstIds.length * rank)
+          val m = srcIds.length
+          val n = dstIds.length
+          if (scores == null || scores.length < n) {
+            scores = Array.ofDim[Float](n)
+            idxOrd = new GuavaOrdering[Int] {
+              override def compare(left: Int, right: Int): Int = {
+                Ordering[Float].compare(scores(left), scores(right))
+              }
+            }
           }
-          pq.foreach { case (dstId, score) =>
-            output(i) = (srcId, dstId, score)
-            i += 1
+
+          Iterator.range(0, m).flatMap { i =>
+            // buffer = i-th vec in srcMat * dstMat
+            BLAS.f2jBLAS.sgemv("T", rank, n, 1.0F, dstMat, 0, rank,
+              srcMat, i * rank, 1, 0.0F, scores, 0, 1)
+
+            val srcId = srcIds(i)
+            idxOrd.greatestOf(Iterator.range(0, n).asJava, num).asScala
+              .iterator.map { j => (srcId, dstIds(j), scores(j)) }
           }
-          pq.clear()
         }
-        output.toSeq
       }
     // We'll force the IDs to be Int. Unfortunately this converts IDs to Int in the output.
     val topKAggregator = new TopByKeyAggregator[Int, Int, Float](num, Ordering.by(_._2))
@@ -499,9 +509,12 @@ class ALSModel private[ml] (
    */
   private def blockify(
       factors: Dataset[(Int, Array[Float])],
-      blockSize: Int): Dataset[Seq[(Int, Array[Float])]] = {
+      blockSize: Int): Dataset[(Array[Int], Array[Float])] = {
     import factors.sparkSession.implicits._
-    factors.mapPartitions(_.grouped(blockSize))
+    factors.mapPartitions { iter =>
+      iter.grouped(blockSize)
+        .map(block => (block.map(_._1).toArray, block.flatMap(_._2).toArray))
+    }
   }
 
 }

From 00642ee19e6969ca7996fb44d16d001fcf17b407 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sat, 19 Dec 2020 06:59:44 -0800
Subject: [PATCH 0817/1009] [SPARK-33843][BUILD] Upgrade to Zstd 1.4.8

### What changes were proposed in this pull request?

This PR aims to upgrade Zstd library to 1.4.8.

### Why are the changes needed?

This will bring Zstd 1.4.7 and 1.4.8 improvement and bug fixes and the following from `zstd-jni`.
- https://github.com/facebook/zstd/releases/tag/v1.4.7
- https://github.com/facebook/zstd/releases/tag/v1.4.8
- https://github.com/luben/zstd-jni/issues/153 (Apple M1 architecture)

### Does this PR introduce _any_ user-facing change?

This will unblock Apple Silicon usage.

### How was this patch tested?

Pass the CIs.

Closes #30848 from dongjoon-hyun/SPARK-33843.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 2 +-
 dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 2 +-
 pom.xml                                 | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index ceea496d3f1dc..199a0d1a31751 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -242,4 +242,4 @@ xmlenc/0.52//xmlenc-0.52.jar
 xz/1.5//xz-1.5.jar
 zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar
 zookeeper/3.4.14//zookeeper-3.4.14.jar
-zstd-jni/1.4.5-6//zstd-jni-1.4.5-6.jar
+zstd-jni/1.4.8-1//zstd-jni-1.4.8-1.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index d1b811bd73607..42e1634b6e66c 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -257,4 +257,4 @@ xbean-asm7-shaded/4.15//xbean-asm7-shaded-4.15.jar
 xz/1.5//xz-1.5.jar
 zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar
 zookeeper/3.4.14//zookeeper-3.4.14.jar
-zstd-jni/1.4.5-6//zstd-jni-1.4.5-6.jar
+zstd-jni/1.4.8-1//zstd-jni-1.4.8-1.jar
diff --git a/pom.xml b/pom.xml
index 72e285bb2ba6e..4781f981a5949 100644
--- a/pom.xml
+++ b/pom.xml
@@ -695,7 +695,7 @@
       <dependency>
         <groupId>com.github.luben</groupId>
         <artifactId>zstd-jni</artifactId>
-        <version>1.4.5-6</version>
+        <version>1.4.8-1</version>
       </dependency>
       <dependency>
         <groupId>com.clearspring.analytics</groupId>

From dd44ba5460c3850c87e93c2c126d980cb1b3a8b4 Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Sat, 19 Dec 2020 08:00:09 -0800
Subject: [PATCH 0818/1009] [SPARK-32976][SQL][FOLLOWUP] SET and RESTORE
 hive.exec.dynamic.partition.mode for HiveSQLInsertTestSuite to avoid
 flakiness

### What changes were proposed in this pull request?

As https://github.com/apache/spark/pull/29893#discussion_r545303780 mentioned:

> We need to set spark.conf.set("hive.exec.dynamic.partition.mode", "nonstrict") before executing this suite; otherwise, test("insert with column list - follow table output order + partitioned table") will fail.
The reason why it does not fail because some test cases [running before this suite] do not change the default value of hive.exec.dynamic.partition.mode back to strict. However, the order of test suite execution is not deterministic.
### Why are the changes needed?

avoid flakiness in tests

### Does this PR introduce _any_ user-facing change?

no
### How was this patch tested?

existing tests

Closes #30843 from yaooqinn/SPARK-32976-F.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/hive/HiveSQLInsertTestSuite.scala   | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSQLInsertTestSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSQLInsertTestSuite.scala
index 49b005bca938e..0b1d511f08511 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSQLInsertTestSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSQLInsertTestSuite.scala
@@ -21,5 +21,20 @@ import org.apache.spark.sql.SQLInsertTestSuite
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 
 class HiveSQLInsertTestSuite extends SQLInsertTestSuite with TestHiveSingleton {
+
+  private val originalPartitionMode = spark.conf.getOption("hive.exec.dynamic.partition.mode")
+
+  override protected def beforeAll(): Unit = {
+    super.beforeAll()
+    spark.conf.set("hive.exec.dynamic.partition.mode", "nonstrict")
+  }
+
+  override protected def afterAll(): Unit = {
+    originalPartitionMode
+      .map(v => spark.conf.set("hive.exec.dynamic.partition.mode", v))
+      .getOrElse(spark.conf.unset("hive.exec.dynamic.partition.mode"))
+    super.afterAll()
+  }
+
   override def format: String = "hive OPTIONS(fileFormat='parquet')"
 }

From 06075d849e07a97f7aba0dceece57ed45cbae040 Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Sat, 19 Dec 2020 08:32:58 -0800
Subject: [PATCH 0819/1009] [SPARK-33829][SQL] Renaming v2 tables should
 recreate the cache

### What changes were proposed in this pull request?

Currently, renaming v2 tables does not invalidate/recreate the cache, leading to an incorrect behavior (cache not being used) when v2 tables are renamed. This PR fixes the behavior.

### Why are the changes needed?

Fixing a bug since the cache associated with the renamed table is not being cleaned up/recreated.

### Does this PR introduce _any_ user-facing change?

Yes, now when a v2 table is renamed, cache is correctly updated.

### How was this patch tested?

Added a new test

Closes #30825 from imback82/rename_recreate_cache_v2.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../datasources/v2/DataSourceV2Strategy.scala | 31 +++++++++++++------
 .../datasources/v2/RenameTableExec.scala      | 17 +++++++++-
 .../sql/connector/DataSourceV2SQLSuite.scala  | 19 ++++++++++++
 3 files changed, 57 insertions(+), 10 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index c40d2ab9cba4e..50bcf81f1ba2d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.execution.datasources.DataSourceStrategy
 import org.apache.spark.sql.execution.streaming.continuous.{WriteToContinuousDataSource, WriteToContinuousDataSourceExec}
 import org.apache.spark.sql.sources.{BaseRelation, TableScan}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.storage.StorageLevel
 
 class DataSourceV2Strategy(session: SparkSession) extends Strategy with PredicateHelper {
 
@@ -56,17 +57,24 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
     session.sharedState.cacheManager.recacheByPlan(session, r)
   }
 
-  private def invalidateCache(r: ResolvedTable, recacheTable: Boolean = false)(): Unit = {
+  // Invalidates the cache associated with the given table. If the invalidated cache matches the
+  // given table, the cache's storage level is returned.
+  private def invalidateCache(
+      r: ResolvedTable,
+      recacheTable: Boolean = false)(): Option[StorageLevel] = {
     val v2Relation = DataSourceV2Relation.create(r.table, Some(r.catalog), Some(r.identifier))
     val cache = session.sharedState.cacheManager.lookupCachedData(v2Relation)
     session.sharedState.cacheManager.uncacheQuery(session, v2Relation, cascade = true)
-    if (recacheTable && cache.isDefined) {
-      // save the cache name and cache level for recreation
-      val cacheName = cache.get.cachedRepresentation.cacheBuilder.tableName
+    if (cache.isDefined) {
       val cacheLevel = cache.get.cachedRepresentation.cacheBuilder.storageLevel
-
-      // recache with the same name and cache level.
-      session.sharedState.cacheManager.cacheQuery(session, v2Relation, cacheName, cacheLevel)
+      if (recacheTable) {
+        val cacheName = cache.get.cachedRepresentation.cacheBuilder.tableName
+        // recache with the same name and cache level.
+        session.sharedState.cacheManager.cacheQuery(session, v2Relation, cacheName, cacheLevel)
+      }
+      Some(cacheLevel)
+    } else {
+      None
     }
   }
 
@@ -266,12 +274,17 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
     case AlterTable(catalog, ident, _, changes) =>
       AlterTableExec(catalog, ident, changes) :: Nil
 
-    case RenameTable(ResolvedTable(catalog, oldIdent, _), newIdent, isView) =>
+    case RenameTable(r @ ResolvedTable(catalog, oldIdent, _), newIdent, isView) =>
       if (isView) {
         throw new AnalysisException(
           "Cannot rename a table with ALTER VIEW. Please use ALTER TABLE instead.")
       }
-      RenameTableExec(catalog, oldIdent, newIdent.asIdentifier) :: Nil
+      RenameTableExec(
+        catalog,
+        oldIdent,
+        newIdent.asIdentifier,
+        invalidateCache(r),
+        session.sharedState.cacheManager.cacheQuery) :: Nil
 
     case AlterNamespaceSetProperties(ResolvedNamespace(catalog, ns), properties) =>
       AlterNamespaceSetPropertiesExec(catalog.asNamespaceCatalog, ns, properties) :: Nil
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RenameTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RenameTableExec.scala
index a650607d5f129..a71dd33a88ba9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RenameTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/RenameTableExec.scala
@@ -17,9 +17,12 @@
 
 package org.apache.spark.sql.execution.datasources.v2
 
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
+import org.apache.spark.storage.StorageLevel
 
 /**
  * Physical plan node for renaming a table.
@@ -27,14 +30,26 @@ import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
 case class RenameTableExec(
     catalog: TableCatalog,
     oldIdent: Identifier,
-    newIdent: Identifier) extends V2CommandExec {
+    newIdent: Identifier,
+    invalidateCache: () => Option[StorageLevel],
+    cacheTable: (SparkSession, LogicalPlan, Option[String], StorageLevel) => Unit)
+  extends V2CommandExec {
 
   override def output: Seq[Attribute] = Seq.empty
 
   override protected def run(): Seq[InternalRow] = {
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
+
+    val optOldStorageLevel = invalidateCache()
     catalog.invalidateTable(oldIdent)
+
     catalog.renameTable(oldIdent, newIdent)
 
+    optOldStorageLevel.foreach { oldStorageLevel =>
+      val tbl = catalog.loadTable(newIdent)
+      val newRelation = DataSourceV2Relation.create(tbl, Some(catalog), Some(newIdent))
+      cacheTable(sqlContext.sparkSession, newRelation, Some(newIdent.quoted), oldStorageLevel)
+    }
     Seq.empty
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index b335dc31a3037..4fdb32c24f104 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -37,6 +37,7 @@ import org.apache.spark.sql.internal.connector.SimpleTableProvider
 import org.apache.spark.sql.sources.SimpleScanSource
 import org.apache.spark.sql.types.{BooleanType, LongType, StringType, StructField, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.storage.StorageLevel
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.Utils
 
@@ -863,6 +864,24 @@ class DataSourceV2SQLSuite
     }
   }
 
+  test("SPARK-33829: Renaming a table should recreate a cache while retaining the old cache info") {
+    withTable("testcat.ns.old", "testcat.ns.new") {
+      def getStorageLevel(tableName: String): StorageLevel = {
+        val table = spark.table(tableName)
+        val optCachedData = spark.sharedState.cacheManager.lookupCachedData(table)
+        assert(optCachedData.isDefined)
+        optCachedData.get.cachedRepresentation.cacheBuilder.storageLevel
+      }
+      sql("CREATE TABLE testcat.ns.old USING foo AS SELECT id, data FROM source")
+      sql("CACHE TABLE testcat.ns.old OPTIONS('storageLevel' 'MEMORY_ONLY')")
+      val oldStorageLevel = getStorageLevel("testcat.ns.old")
+
+      sql("ALTER TABLE testcat.ns.old RENAME TO ns.new")
+      val newStorageLevel = getStorageLevel("testcat.ns.new")
+      assert(oldStorageLevel === newStorageLevel)
+    }
+  }
+
   test("Relation: basic") {
     val t1 = "testcat.ns1.ns2.tbl"
     withTable(t1) {

From 37c4cd8f05316227465ff9cccbba063779827660 Mon Sep 17 00:00:00 2001
From: Ammar Al-Batool <ammar.albatool@gmail.com>
Date: Sat, 19 Dec 2020 14:53:40 -0600
Subject: [PATCH 0820/1009] [MINOR][DOCS] Fix typos in ScalaDocs for
 DataStreamWriter#foreachBatch

The title is pretty self-explanatory.

### What changes were proposed in this pull request?

Fixing typos in the docs for `foreachBatch` functions.

### Why are the changes needed?

To fix typos in JavaDoc/ScalaDoc.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Yes.

Closes #30782 from ammar1x/patch-1.

Lead-authored-by: Ammar Al-Batool <ammar.albatool@gmail.com>
Co-authored-by: Ammar Al-Batool <ammar.al-batool@disneystreaming.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../spark/sql/streaming/DataStreamWriter.scala     | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index 9e8dff37bcfd2..2703119ce1167 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -494,12 +494,13 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
    * :: Experimental ::
    *
    * (Scala-specific) Sets the output of the streaming query to be processed using the provided
-   * function. This is supported only the in the micro-batch execution modes (that is, when the
+   * function. This is supported only in the micro-batch execution modes (that is, when the
    * trigger is not continuous). In every micro-batch, the provided function will be called in
    * every micro-batch with (i) the output rows as a Dataset and (ii) the batch identifier.
-   * The batchId can be used deduplicate and transactionally write the output
+   * The batchId can be used to deduplicate and transactionally write the output
    * (that is, the provided Dataset) to external systems. The output Dataset is guaranteed
-   * to exactly same for the same batchId (assuming all operations are deterministic in the query).
+   * to be exactly the same for the same batchId (assuming all operations are deterministic
+   * in the query).
    *
    * @since 2.4.0
    */
@@ -515,12 +516,13 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
    * :: Experimental ::
    *
    * (Java-specific) Sets the output of the streaming query to be processed using the provided
-   * function. This is supported only the in the micro-batch execution modes (that is, when the
+   * function. This is supported only in the micro-batch execution modes (that is, when the
    * trigger is not continuous). In every micro-batch, the provided function will be called in
    * every micro-batch with (i) the output rows as a Dataset and (ii) the batch identifier.
-   * The batchId can be used deduplicate and transactionally write the output
+   * The batchId can be used to deduplicate and transactionally write the output
    * (that is, the provided Dataset) to external systems. The output Dataset is guaranteed
-   * to exactly same for the same batchId (assuming all operations are deterministic in the query).
+   * to be exactly the same for the same batchId (assuming all operations are deterministic
+   * in the query).
    *
    * @since 2.4.0
    */

From 70da86a085b61a0981c3f9fc6dbd897716472642 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Sat, 19 Dec 2020 14:10:20 -0800
Subject: [PATCH 0821/1009] [SPARK-33850][SQL] EXPLAIN FORMATTED doesn't show
 the plan for subqueries if AQE is enabled

### What changes were proposed in this pull request?

This PR fixes an issue that when AQE is enabled, EXPLAIN FORMATTED doesn't show the plan for subqueries.

```scala
val df = spark.range(1, 100)
df.createTempView("df")
spark.sql("SELECT (SELECT min(id) AS v FROM df)").explain("FORMATTED")

== Physical Plan ==
AdaptiveSparkPlan (3)
+- Project (2)
 +- Scan OneRowRelation (1)

(1) Scan OneRowRelation
Output: []
Arguments: ParallelCollectionRDD[0] at explain at <console>:24, OneRowRelation, UnknownPartitioning(0)

(2) Project
Output [1]: [Subquery subquery#3, [id=#20] AS scalarsubquery()#5L]
Input: []

(3) AdaptiveSparkPlan
Output [1]: [scalarsubquery()#5L]
Arguments: isFinalPlan=false
```

After this change, the plan for the subquerie is shown.
```scala
== Physical Plan ==
* Project (2)
+- * Scan OneRowRelation (1)

(1) Scan OneRowRelation [codegen id : 1]
Output: []
Arguments: ParallelCollectionRDD[0] at explain at <console>:24, OneRowRelation, UnknownPartitioning(0)

(2) Project [codegen id : 1]
Output [1]: [Subquery scalar-subquery#3, [id=#24] AS scalarsubquery()#5L]
Input: []

===== Subqueries =====

Subquery:1 Hosting operator id = 2 Hosting Expression = Subquery scalar-subquery#3, [id=#24]
* HashAggregate (6)
+- Exchange (5)
   +- * HashAggregate (4)
      +- * Range (3)

(3) Range [codegen id : 1]
Output [1]: [id#0L]
Arguments: Range (1, 100, step=1, splits=Some(12))

(4) HashAggregate [codegen id : 1]
Input [1]: [id#0L]
Keys: []
Functions [1]: [partial_min(id#0L)]
Aggregate Attributes [1]: [min#7L]
Results [1]: [min#8L]

(5) Exchange
Input [1]: [min#8L]
Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#20]

(6) HashAggregate [codegen id : 2]
Input [1]: [min#8L]
Keys: []
Functions [1]: [min(id#0L)]
Aggregate Attributes [1]: [min(id#0L)#4L]
Results [1]: [min(id#0L)#4L AS v#2L]
```

### Why are the changes needed?

For better debuggability.

### Does this PR introduce _any_ user-facing change?

Yes. Users can see the formatted plan for subqueries.

### How was this patch tested?

New test.

Closes #30855 from sarutak/fix-aqe-explain.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/execution/ExplainUtils.scala    |   2 +
 .../sql-tests/results/explain-aqe.sql.out     | 263 ++++++++++++++++++
 .../org/apache/spark/sql/ExplainSuite.scala   |  22 ++
 3 files changed, 287 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala
index 20e6fb6f96eaa..f47542ca59bc7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala
@@ -218,6 +218,8 @@ object ExplainUtils extends AdaptiveSparkPlanHelper {
       plan: => QueryPlan[_],
       subqueries: ArrayBuffer[(SparkPlan, Expression, BaseSubqueryExec)]): Unit = {
     plan.foreach {
+      case a: AdaptiveSparkPlanExec =>
+        getSubqueries(a.executedPlan, subqueries)
       case p: SparkPlan =>
         p.expressions.foreach (_.collect {
           case e: PlanExpression[_] =>
diff --git a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out
index 578b0a807fc52..d68989524d486 100644
--- a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out
@@ -407,6 +407,101 @@ Condition : (((isnotnull(key#x) AND isnotnull(val#x)) AND (key#x = Subquery subq
 Output [2]: [key#x, val#x]
 Arguments: isFinalPlan=false
 
+===== Subqueries =====
+
+Subquery:1 Hosting operator id = 2 Hosting Expression = Subquery subquery#x, [id=#x]
+AdaptiveSparkPlan (10)
++- HashAggregate (9)
+   +- Exchange (8)
+      +- HashAggregate (7)
+         +- Project (6)
+            +- Filter (5)
+               +- Scan parquet default.explain_temp2 (4)
+
+
+(4) Scan parquet default.explain_temp2
+Output [2]: [key#x, val#x]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp2]
+PushedFilters: [IsNotNull(key), IsNotNull(val), EqualTo(val,2)]
+ReadSchema: struct<key:int,val:int>
+
+(5) Filter
+Input [2]: [key#x, val#x]
+Condition : (((isnotnull(key#x) AND isnotnull(val#x)) AND (key#x = Subquery subquery#x, [id=#x])) AND (val#x = 2))
+
+(6) Project
+Output [1]: [key#x]
+Input [2]: [key#x, val#x]
+
+(7) HashAggregate
+Input [1]: [key#x]
+Keys: []
+Functions [1]: [partial_max(key#x)]
+Aggregate Attributes [1]: [max#x]
+Results [1]: [max#x]
+
+(8) Exchange
+Input [1]: [max#x]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x]
+
+(9) HashAggregate
+Input [1]: [max#x]
+Keys: []
+Functions [1]: [max(key#x)]
+Aggregate Attributes [1]: [max(key#x)#x]
+Results [1]: [max(key#x)#x AS max(key)#x]
+
+(10) AdaptiveSparkPlan
+Output [1]: [max(key)#x]
+Arguments: isFinalPlan=false
+
+Subquery:2 Hosting operator id = 5 Hosting Expression = Subquery subquery#x, [id=#x]
+AdaptiveSparkPlan (17)
++- HashAggregate (16)
+   +- Exchange (15)
+      +- HashAggregate (14)
+         +- Project (13)
+            +- Filter (12)
+               +- Scan parquet default.explain_temp3 (11)
+
+
+(11) Scan parquet default.explain_temp3
+Output [2]: [key#x, val#x]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp3]
+PushedFilters: [IsNotNull(val), GreaterThan(val,0)]
+ReadSchema: struct<key:int,val:int>
+
+(12) Filter
+Input [2]: [key#x, val#x]
+Condition : (isnotnull(val#x) AND (val#x > 0))
+
+(13) Project
+Output [1]: [key#x]
+Input [2]: [key#x, val#x]
+
+(14) HashAggregate
+Input [1]: [key#x]
+Keys: []
+Functions [1]: [partial_max(key#x)]
+Aggregate Attributes [1]: [max#x]
+Results [1]: [max#x]
+
+(15) Exchange
+Input [1]: [max#x]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x]
+
+(16) HashAggregate
+Input [1]: [max#x]
+Keys: []
+Functions [1]: [max(key#x)]
+Aggregate Attributes [1]: [max(key#x)#x]
+Results [1]: [max(key#x)#x AS max(key)#x]
+
+(17) AdaptiveSparkPlan
+Output [1]: [max(key)#x]
+Arguments: isFinalPlan=false
 
 -- !query
 EXPLAIN FORMATTED
@@ -442,6 +537,101 @@ Condition : ((key#x = Subquery subquery#x, [id=#x]) OR (cast(key#x as double) =
 Output [2]: [key#x, val#x]
 Arguments: isFinalPlan=false
 
+===== Subqueries =====
+
+Subquery:1 Hosting operator id = 2 Hosting Expression = Subquery subquery#x, [id=#x]
+AdaptiveSparkPlan (10)
++- HashAggregate (9)
+   +- Exchange (8)
+      +- HashAggregate (7)
+         +- Project (6)
+            +- Filter (5)
+               +- Scan parquet default.explain_temp2 (4)
+
+
+(4) Scan parquet default.explain_temp2
+Output [2]: [key#x, val#x]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp2]
+PushedFilters: [IsNotNull(val), GreaterThan(val,0)]
+ReadSchema: struct<key:int,val:int>
+
+(5) Filter
+Input [2]: [key#x, val#x]
+Condition : (isnotnull(val#x) AND (val#x > 0))
+
+(6) Project
+Output [1]: [key#x]
+Input [2]: [key#x, val#x]
+
+(7) HashAggregate
+Input [1]: [key#x]
+Keys: []
+Functions [1]: [partial_max(key#x)]
+Aggregate Attributes [1]: [max#x]
+Results [1]: [max#x]
+
+(8) Exchange
+Input [1]: [max#x]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x]
+
+(9) HashAggregate
+Input [1]: [max#x]
+Keys: []
+Functions [1]: [max(key#x)]
+Aggregate Attributes [1]: [max(key#x)#x]
+Results [1]: [max(key#x)#x AS max(key)#x]
+
+(10) AdaptiveSparkPlan
+Output [1]: [max(key)#x]
+Arguments: isFinalPlan=false
+
+Subquery:2 Hosting operator id = 2 Hosting Expression = Subquery subquery#x, [id=#x]
+AdaptiveSparkPlan (17)
++- HashAggregate (16)
+   +- Exchange (15)
+      +- HashAggregate (14)
+         +- Project (13)
+            +- Filter (12)
+               +- Scan parquet default.explain_temp3 (11)
+
+
+(11) Scan parquet default.explain_temp3
+Output [2]: [key#x, val#x]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp3]
+PushedFilters: [IsNotNull(val), GreaterThan(val,0)]
+ReadSchema: struct<key:int,val:int>
+
+(12) Filter
+Input [2]: [key#x, val#x]
+Condition : (isnotnull(val#x) AND (val#x > 0))
+
+(13) Project
+Output [1]: [key#x]
+Input [2]: [key#x, val#x]
+
+(14) HashAggregate
+Input [1]: [key#x]
+Keys: []
+Functions [1]: [partial_avg(cast(key#x as bigint))]
+Aggregate Attributes [2]: [sum#x, count#xL]
+Results [2]: [sum#x, count#xL]
+
+(15) Exchange
+Input [2]: [sum#x, count#xL]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x]
+
+(16) HashAggregate
+Input [2]: [sum#x, count#xL]
+Keys: []
+Functions [1]: [avg(cast(key#x as bigint))]
+Aggregate Attributes [1]: [avg(cast(key#x as bigint))#x]
+Results [1]: [avg(cast(key#x as bigint))#x AS avg(key)#x]
+
+(17) AdaptiveSparkPlan
+Output [1]: [avg(key)#x]
+Arguments: isFinalPlan=false
 
 -- !query
 EXPLAIN FORMATTED
@@ -470,6 +660,79 @@ Input: []
 Output [1]: [(scalarsubquery() + scalarsubquery())#x]
 Arguments: isFinalPlan=false
 
+===== Subqueries =====
+
+Subquery:1 Hosting operator id = 2 Hosting Expression = Subquery subquery#x, [id=#x]
+AdaptiveSparkPlan (8)
++- HashAggregate (7)
+   +- Exchange (6)
+      +- HashAggregate (5)
+         +- Scan parquet default.explain_temp1 (4)
+
+
+(4) Scan parquet default.explain_temp1
+Output [1]: [key#x]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp1]
+ReadSchema: struct<key:int>
+
+(5) HashAggregate
+Input [1]: [key#x]
+Keys: []
+Functions [1]: [partial_avg(cast(key#x as bigint))]
+Aggregate Attributes [2]: [sum#x, count#xL]
+Results [2]: [sum#x, count#xL]
+
+(6) Exchange
+Input [2]: [sum#x, count#xL]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x]
+
+(7) HashAggregate
+Input [2]: [sum#x, count#xL]
+Keys: []
+Functions [1]: [avg(cast(key#x as bigint))]
+Aggregate Attributes [1]: [avg(cast(key#x as bigint))#x]
+Results [1]: [avg(cast(key#x as bigint))#x AS avg(key)#x]
+
+(8) AdaptiveSparkPlan
+Output [1]: [avg(key)#x]
+Arguments: isFinalPlan=false
+
+Subquery:2 Hosting operator id = 2 Hosting Expression = Subquery subquery#x, [id=#x]
+AdaptiveSparkPlan (13)
++- HashAggregate (12)
+   +- Exchange (11)
+      +- HashAggregate (10)
+         +- Scan parquet default.explain_temp1 (9)
+
+
+(9) Scan parquet default.explain_temp1
+Output [1]: [key#x]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/explain_temp1]
+ReadSchema: struct<key:int>
+
+(10) HashAggregate
+Input [1]: [key#x]
+Keys: []
+Functions [1]: [partial_avg(cast(key#x as bigint))]
+Aggregate Attributes [2]: [sum#x, count#xL]
+Results [2]: [sum#x, count#xL]
+
+(11) Exchange
+Input [2]: [sum#x, count#xL]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x]
+
+(12) HashAggregate
+Input [2]: [sum#x, count#xL]
+Keys: []
+Functions [1]: [avg(cast(key#x as bigint))]
+Aggregate Attributes [1]: [avg(cast(key#x as bigint))#x]
+Results [1]: [avg(cast(key#x as bigint))#x AS avg(key)#x]
+
+(13) AdaptiveSparkPlan
+Output [1]: [avg(key)#x]
+Arguments: isFinalPlan=false
 
 -- !query
 EXPLAIN FORMATTED
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
index 75372c5437f25..0ec57c2fcb5ad 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
@@ -277,6 +277,28 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
     }
   }
 
+  test("SPARK-33850: explain formatted - check presence of subquery in case of AQE") {
+    withTable("df1") {
+      withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
+        withTable("df1") {
+          spark.range(1, 100)
+            .write
+            .format("parquet")
+            .mode("overwrite")
+            .saveAsTable("df1")
+
+          val sqlText = "EXPLAIN FORMATTED SELECT (SELECT min(id) FROM df1) as v"
+          val expected_pattern1 =
+            "Subquery:1 Hosting operator id = 2 Hosting Expression = Subquery subquery#x"
+
+          withNormalizedExplain(sqlText) { normalizedOutput =>
+            assert(expected_pattern1.r.findAllMatchIn(normalizedOutput).length == 1)
+          }
+        }
+      }
+    }
+  }
+
   test("Support ExplainMode in Dataset.explain") {
     val df1 = Seq((1, 2), (2, 3)).toDF("k", "v1")
     val df2 = Seq((2, 3), (1, 1)).toDF("k", "v2")

From 2b6ef5606bec1a4547c8e850440bf12cc3422e1d Mon Sep 17 00:00:00 2001
From: William Hyun <williamhyun3@gmail.com>
Date: Sat, 19 Dec 2020 14:19:44 -0800
Subject: [PATCH 0822/1009] [SPARK-33854][BUILD] Use ListBuffer instead of
 Stack in SparkBuild.scala

### What changes were proposed in this pull request?
This PR aims to use ListBuffer instead of Stack in SparkBuild.scala to remove deprecation warning.

### Why are the changes needed?

Stack is deprecated in Scala 2.12.0.

```scala
% build/sbt compile
...
[warn] /Users/william/spark/project/SparkBuild.scala:1112:25:
class Stack in package mutable is deprecated (since 2.12.0):
Stack is an inelegant and potentially poorly-performing wrapper around List.
Use a List assigned to a var instead.
[warn]         val stack = new Stack[File]()
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manual.

Closes #30860 from williamhyun/SPARK-33854.

Authored-by: William Hyun <williamhyun3@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 project/SparkBuild.scala | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 3098060478f40..aa3e2cd65e185 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -23,7 +23,7 @@ import java.util.Locale
 import scala.io.Source
 import scala.util.Properties
 import scala.collection.JavaConverters._
-import scala.collection.mutable.Stack
+import scala.collection.mutable.ListBuffer
 
 import sbt._
 import sbt.Classpaths.publishTask
@@ -1109,14 +1109,14 @@ object TestSettings {
         // Because File.mkdirs() can fail if multiple callers are trying to create the same
         // parent directory, this code tries to create parents one at a time, and avoids
         // failures when the directories have been created by somebody else.
-        val stack = new Stack[File]()
+        val stack = new ListBuffer[File]()
         while (!dir.isDirectory()) {
-          stack.push(dir)
+          stack.prepend(dir)
           dir = dir.getParentFile()
         }
 
         while (stack.nonEmpty) {
-          val d = stack.pop()
+          val d = stack.remove(0)
           require(d.mkdir() || d.isDirectory(), s"Failed to create directory $d")
         }
       }

From df2314b63aaf4992ac86ea0b68dae8554b066828 Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Sat, 19 Dec 2020 14:37:15 -0800
Subject: [PATCH 0823/1009] [SPARK-33852][SQL][TESTS] Use assertAnalysisError
 in HiveDDLSuite.scala

### What changes were proposed in this pull request?

`HiveDDLSuite` has many of the following patterns:
```scala
val e = intercept[AnalysisException] {
  sql(sqlString)
}
assert(e.message.contains(exceptionMessage))
```

However, there already exists `assertAnalysisError` helper function which does exactly the same thing.

### Why are the changes needed?

To refactor code to simplify.

### Does this PR introduce _any_ user-facing change?

No, just refactoring the test code.

### How was this patch tested?

Existing tests

Closes #30857 from imback82/hive_ddl_suite_use_assertAnalysisError.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../sql/hive/execution/HiveDDLSuite.scala     | 363 ++++++++----------
 1 file changed, 157 insertions(+), 206 deletions(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index aac4b88d9e3f8..34f127bade95b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -451,15 +451,15 @@ class HiveDDLSuite
       withTable("tab1", "tab2") {
         (("a", "b") :: Nil).toDF().write.json(tempDir.getCanonicalPath)
 
-        var e = intercept[AnalysisException] { sql("CREATE TABLE tab1 USING hive") }.getMessage
-        assert(e.contains("Unable to infer the schema. The schema specification is required to " +
-          "create the table `default`.`tab1`"))
+        assertAnalysisError(
+          "CREATE TABLE tab1 USING hive",
+          "Unable to infer the schema. The schema specification is required to " +
+            "create the table `default`.`tab1`")
 
-        e = intercept[AnalysisException] {
-          sql(s"CREATE TABLE tab2 USING hive location '${tempDir.getCanonicalPath}'")
-        }.getMessage
-        assert(e.contains("Unable to infer the schema. The schema specification is required to " +
-          "create the table `default`.`tab2`"))
+        assertAnalysisError(
+          s"CREATE TABLE tab2 USING hive location '${tempDir.getCanonicalPath}'",
+          "Unable to infer the schema. The schema specification is required to " +
+            "create the table `default`.`tab2`")
       }
     }
   }
@@ -581,17 +581,15 @@ class HiveDDLSuite
   }
 
   test("create table: partition column names exist in table definition") {
-    val e = intercept[AnalysisException] {
-      sql("CREATE TABLE tbl(a int) PARTITIONED BY (a string)")
-    }
-    assert(e.message == "Found duplicate column(s) in the table definition of `default`.`tbl`: `a`")
+    assertAnalysisError(
+      "CREATE TABLE tbl(a int) PARTITIONED BY (a string)",
+      "Found duplicate column(s) in the table definition of `default`.`tbl`: `a`")
   }
 
   test("create partitioned table without specifying data type for the partition columns") {
-    val e = intercept[AnalysisException] {
-      sql("CREATE TABLE tbl(a int) PARTITIONED BY (b) STORED AS parquet")
-    }
-    assert(e.message.contains("partition column b is not defined in table"))
+    assertAnalysisError(
+      "CREATE TABLE tbl(a int) PARTITIONED BY (b) STORED AS parquet",
+      "partition column b is not defined in table")
   }
 
   test("add/drop partition with location - managed table") {
@@ -643,11 +641,10 @@ class HiveDDLSuite
   test("SPARK-19129: drop partition with a empty string will drop the whole table") {
     val df = spark.createDataFrame(Seq((0, "a"), (1, "b"))).toDF("partCol1", "name")
     df.write.mode("overwrite").partitionBy("partCol1").saveAsTable("partitionedTable")
-    val e = intercept[AnalysisException] {
-      spark.sql("alter table partitionedTable drop partition(partCol1='')")
-    }.getMessage
-    assert(e.contains("Partition spec is invalid. The spec ([partCol1=]) contains an empty " +
-      "partition column value"))
+    assertAnalysisError(
+      "alter table partitionedTable drop partition(partCol1='')",
+      "Partition spec is invalid. The spec ([partCol1=]) contains an empty " +
+        "partition column value")
   }
 
   test("add/drop partitions - external table") {
@@ -692,11 +689,10 @@ class HiveDDLSuite
         // After data insertion, all the directory are not empty
         assert(dirSet.forall(dir => dir.listFiles.nonEmpty))
 
-        val message = intercept[AnalysisException] {
-          sql(s"ALTER TABLE $externalTab DROP PARTITION (ds='2008-04-09', unknownCol='12')")
-        }
-        assert(message.getMessage.contains("unknownCol is not a valid partition column in table " +
-          "`default`.`exttable_with_partitions`"))
+        assertAnalysisError(
+          s"ALTER TABLE $externalTab DROP PARTITION (ds='2008-04-09', unknownCol='12')",
+          "unknownCol is not a valid partition column in table " +
+            "`default`.`exttable_with_partitions`")
 
         sql(
           s"""
@@ -798,11 +794,9 @@ class HiveDDLSuite
         sql(s"ALTER VIEW $viewName UNSET TBLPROPERTIES ('p')")
         checkProperties(Map())
 
-        val message = intercept[AnalysisException] {
-          sql(s"ALTER VIEW $viewName UNSET TBLPROPERTIES ('p')")
-        }.getMessage
-        assert(message.contains(
-          "Attempted to unset non-existent property 'p' in table '`default`.`view1`'"))
+        assertAnalysisError(
+          s"ALTER VIEW $viewName UNSET TBLPROPERTIES ('p')",
+          "Attempted to unset non-existent property 'p' in table '`default`.`view1`'")
       }
     }
   }
@@ -825,10 +819,9 @@ class HiveDDLSuite
   test("create table - SET TBLPROPERTIES EXTERNAL to TRUE") {
     val tabName = "tab1"
     withTable(tabName) {
-      val message = intercept[AnalysisException] {
-        sql(s"CREATE TABLE $tabName (height INT, length INT) TBLPROPERTIES('EXTERNAL'='TRUE')")
-      }.getMessage
-      assert(message.contains("Cannot set or change the preserved property key: 'EXTERNAL'"))
+      assertAnalysisError(
+        s"CREATE TABLE $tabName (height INT, length INT) TBLPROPERTIES('EXTERNAL'='TRUE')",
+        "Cannot set or change the preserved property key: 'EXTERNAL'")
     }
   }
 
@@ -839,10 +832,9 @@ class HiveDDLSuite
       sql(s"CREATE TABLE $tabName (height INT, length INT)")
       assert(
         catalog.getTableMetadata(TableIdentifier(tabName)).tableType == CatalogTableType.MANAGED)
-      val message = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $tabName SET TBLPROPERTIES ('EXTERNAL' = 'TRUE')")
-      }.getMessage
-      assert(message.contains("Cannot set or change the preserved property key: 'EXTERNAL'"))
+      assertAnalysisError(
+        s"ALTER TABLE $tabName SET TBLPROPERTIES ('EXTERNAL' = 'TRUE')",
+        "Cannot set or change the preserved property key: 'EXTERNAL'")
       // The table type is not changed to external
       assert(
         catalog.getTableMetadata(TableIdentifier(tabName)).tableType == CatalogTableType.MANAGED)
@@ -1051,11 +1043,9 @@ class HiveDDLSuite
   test("drop table using drop view") {
     withTable("tab1") {
       sql("CREATE TABLE tab1(c1 int)")
-      val message = intercept[AnalysisException] {
-        sql("DROP VIEW tab1")
-      }.getMessage
-      assert(message.contains(
-        "tab1 is a table. 'DROP VIEW' expects a view. Please use DROP TABLE instead."))
+      assertAnalysisError(
+        "DROP VIEW tab1",
+        "tab1 is a table. 'DROP VIEW' expects a view. Please use DROP TABLE instead.")
     }
   }
 
@@ -1064,10 +1054,9 @@ class HiveDDLSuite
       spark.range(10).write.saveAsTable("tab1")
       withView("view1") {
         sql("CREATE VIEW view1 AS SELECT * FROM tab1")
-        val message = intercept[AnalysisException] {
-          sql("DROP TABLE view1")
-        }.getMessage
-        assert(message.contains("Cannot drop a view with DROP TABLE. Please use DROP VIEW instead"))
+        assertAnalysisError(
+          "DROP TABLE view1",
+          "Cannot drop a view with DROP TABLE. Please use DROP VIEW instead")
       }
     }
   }
@@ -1221,10 +1210,9 @@ class HiveDDLSuite
     sql(s"USE default")
     val sqlDropDatabase = s"DROP DATABASE $dbName ${if (cascade) "CASCADE" else "RESTRICT"}"
     if (tableExists && !cascade) {
-      val message = intercept[AnalysisException] {
-        sql(sqlDropDatabase)
-      }.getMessage
-      assert(message.contains(s"Database $dbName is not empty. One or more tables exist."))
+      assertAnalysisError(
+        sqlDropDatabase,
+        s"Database $dbName is not empty. One or more tables exist.")
       // the database directory was not removed
       assert(fs.exists(new Path(expectedDBLocation)))
     } else {
@@ -1253,17 +1241,15 @@ class HiveDDLSuite
   test("drop default database") {
     Seq("true", "false").foreach { caseSensitive =>
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive) {
-        var message = intercept[AnalysisException] {
-          sql("DROP DATABASE default")
-        }.getMessage
-        assert(message.contains("Can not drop default database"))
+        assertAnalysisError(
+          "DROP DATABASE default",
+          "Can not drop default database")
 
         // SQLConf.CASE_SENSITIVE does not affect the result
         // because the Hive metastore is not case sensitive.
-        message = intercept[AnalysisException] {
-          sql("DROP DATABASE DeFault")
-        }.getMessage
-        assert(message.contains("Can not drop default database"))
+        assertAnalysisError(
+          "DROP DATABASE DeFault",
+          "Can not drop default database")
       }
     }
   }
@@ -1653,10 +1639,9 @@ class HiveDDLSuite
         }
 
         // When tableExists is not invoked, we still can get an AnalysisException
-        val e = intercept[AnalysisException] {
-          sql(s"DESCRIBE $indexTabName")
-        }.getMessage
-        assert(e.contains("Hive index table is not supported."))
+        assertAnalysisError(
+          s"DESCRIBE $indexTabName",
+          "Hive index table is not supported.")
       } finally {
         client.runSqlHive(s"DROP INDEX IF EXISTS $indexName ON $tabName")
       }
@@ -1726,20 +1711,17 @@ class HiveDDLSuite
       sql("CREATE TABLE tbl(a INT) STORED AS parquet")
 
       Seq(DATASOURCE_PREFIX, STATISTICS_PREFIX).foreach { forbiddenPrefix =>
-        val e = intercept[AnalysisException] {
-          sql(s"ALTER TABLE tbl SET TBLPROPERTIES ('${forbiddenPrefix}foo' = 'loser')")
-        }
-        assert(e.getMessage.contains(forbiddenPrefix + "foo"))
+        assertAnalysisError(
+          s"ALTER TABLE tbl SET TBLPROPERTIES ('${forbiddenPrefix}foo' = 'loser')",
+          s"${forbiddenPrefix}foo")
 
-        val e2 = intercept[AnalysisException] {
-          sql(s"ALTER TABLE tbl UNSET TBLPROPERTIES ('${forbiddenPrefix}foo')")
-        }
-        assert(e2.getMessage.contains(forbiddenPrefix + "foo"))
+        assertAnalysisError(
+          s"ALTER TABLE tbl UNSET TBLPROPERTIES ('${forbiddenPrefix}foo')",
+          s"${forbiddenPrefix}foo")
 
-        val e3 = intercept[AnalysisException] {
-          sql(s"CREATE TABLE tbl2 (a INT) TBLPROPERTIES ('${forbiddenPrefix}foo'='anything')")
-        }
-        assert(e3.getMessage.contains(forbiddenPrefix + "foo"))
+        assertAnalysisError(
+          s"CREATE TABLE tbl2 (a INT) TBLPROPERTIES ('${forbiddenPrefix}foo'='anything')",
+          s"${forbiddenPrefix}foo")
       }
     }
   }
@@ -1759,10 +1741,9 @@ class HiveDDLSuite
         assert(spark.table("rectangles").collect().isEmpty)
 
         // not supported since the table is not partitioned
-        val e = intercept[AnalysisException] {
-          sql("TRUNCATE TABLE rectangles PARTITION (width=1)")
-        }
-        assert(e.message.contains("Operation not allowed"))
+        assertAnalysisError(
+          "TRUNCATE TABLE rectangles PARTITION (width=1)",
+          "Operation not allowed")
       }
     }
   }
@@ -1800,10 +1781,9 @@ class HiveDDLSuite
       }
 
       // throw exception if the column in partition spec is not a partition column.
-      val e = intercept[AnalysisException] {
-        sql("TRUNCATE TABLE partTable PARTITION (unknown=1)")
-      }
-      assert(e.message.contains("unknown is not a valid partition column"))
+      assertAnalysisError(
+        "TRUNCATE TABLE partTable PARTITION (unknown=1)",
+        "unknown is not a valid partition column")
     }
   }
 
@@ -2161,10 +2141,9 @@ class HiveDDLSuite
             assert(loc.listFiles().length >= 1)
             checkAnswer(spark.table("t"), Row("1") :: Nil)
           } else {
-            val e = intercept[AnalysisException] {
-              spark.sql("INSERT INTO TABLE t SELECT 1")
-            }.getMessage
-            assert(e.contains("java.net.URISyntaxException: Relative path in absolute URI: a:b"))
+            assertAnalysisError(
+              "INSERT INTO TABLE t SELECT 1",
+              "java.net.URISyntaxException: Relative path in absolute URI: a:b")
           }
         }
 
@@ -2203,15 +2182,13 @@ class HiveDDLSuite
                 Row("1", "2") :: Row("1", "2017-03-03 12:13%3A14") :: Nil)
             }
           } else {
-            val e = intercept[AnalysisException] {
-              spark.sql("INSERT INTO TABLE t1 PARTITION(b=2) SELECT 1")
-            }.getMessage
-            assert(e.contains("java.net.URISyntaxException: Relative path in absolute URI: a:b"))
-
-            val e1 = intercept[AnalysisException] {
-              spark.sql("INSERT INTO TABLE t1 PARTITION(b='2017-03-03 12:13%3A14') SELECT 1")
-            }.getMessage
-            assert(e1.contains("java.net.URISyntaxException: Relative path in absolute URI: a:b"))
+            assertAnalysisError(
+              "INSERT INTO TABLE t1 PARTITION(b=2) SELECT 1",
+              "java.net.URISyntaxException: Relative path in absolute URI: a:b")
+
+            assertAnalysisError(
+              "INSERT INTO TABLE t1 PARTITION(b='2017-03-03 12:13%3A14') SELECT 1",
+              "java.net.URISyntaxException: Relative path in absolute URI: a:b")
           }
         }
       }
@@ -2296,30 +2273,26 @@ class HiveDDLSuite
           sql("CREATE TABLE tab (c1 int) PARTITIONED BY (c2 int) STORED AS PARQUET")
           if (!caseSensitive) {
             // duplicating partitioning column name
-            val e1 = intercept[AnalysisException] {
-              sql("ALTER TABLE tab ADD COLUMNS (C2 string)")
-            }.getMessage
-            assert(e1.contains("Found duplicate column(s)"))
+            assertAnalysisError(
+              "ALTER TABLE tab ADD COLUMNS (C2 string)",
+              "Found duplicate column(s)")
 
             // duplicating data column name
-            val e2 = intercept[AnalysisException] {
-              sql("ALTER TABLE tab ADD COLUMNS (C1 string)")
-            }.getMessage
-            assert(e2.contains("Found duplicate column(s)"))
+            assertAnalysisError(
+              "ALTER TABLE tab ADD COLUMNS (C1 string)",
+              "Found duplicate column(s)")
           } else {
             // hive catalog will still complains that c1 is duplicate column name because hive
             // identifiers are case insensitive.
-            val e1 = intercept[AnalysisException] {
-              sql("ALTER TABLE tab ADD COLUMNS (C2 string)")
-            }.getMessage
-            assert(e1.contains("HiveException"))
+            assertAnalysisError(
+              "ALTER TABLE tab ADD COLUMNS (C2 string)",
+              "HiveException")
 
             // hive catalog will still complains that c1 is duplicate column name because hive
             // identifiers are case insensitive.
-            val e2 = intercept[AnalysisException] {
-              sql("ALTER TABLE tab ADD COLUMNS (C1 string)")
-            }.getMessage
-            assert(e2.contains("HiveException"))
+            assertAnalysisError(
+              "ALTER TABLE tab ADD COLUMNS (C1 string)",
+              "HiveException")
           }
         }
       }
@@ -2341,58 +2314,49 @@ class HiveDDLSuite
 
     // Forbid CTAS with null type
     withTable("t1", "t2", "t3") {
-      val e1 = intercept[AnalysisException] {
-        spark.sql("CREATE TABLE t1 USING PARQUET AS SELECT null as null_col")
-      }.getMessage
-      assert(e1.contains("Cannot create tables with null type"))
+      assertAnalysisError(
+        "CREATE TABLE t1 USING PARQUET AS SELECT null as null_col",
+        "Cannot create tables with null type")
 
-      val e2 = intercept[AnalysisException] {
-        spark.sql("CREATE TABLE t2 AS SELECT null as null_col")
-      }.getMessage
-      assert(e2.contains("Cannot create tables with null type"))
+      assertAnalysisError(
+        "CREATE TABLE t2 AS SELECT null as null_col",
+        "Cannot create tables with null type")
 
-      val e3 = intercept[AnalysisException] {
-        spark.sql("CREATE TABLE t3 STORED AS PARQUET AS SELECT null as null_col")
-      }.getMessage
-      assert(e3.contains("Cannot create tables with null type"))
+      assertAnalysisError(
+        "CREATE TABLE t3 STORED AS PARQUET AS SELECT null as null_col",
+        "Cannot create tables with null type")
     }
 
     // Forbid Replace table AS SELECT with null type
     withTable("t") {
       val v2Source = classOf[FakeV2Provider].getName
-      val e = intercept[AnalysisException] {
-        spark.sql(s"CREATE OR REPLACE TABLE t USING $v2Source AS SELECT null as null_col")
-      }.getMessage
-      assert(e.contains("Cannot create tables with null type"))
+      assertAnalysisError(
+        s"CREATE OR REPLACE TABLE t USING $v2Source AS SELECT null as null_col",
+        "Cannot create tables with null type")
     }
 
     // Forbid creating table with VOID type in Spark
     withTable("t1", "t2", "t3", "t4") {
-      val e1 = intercept[AnalysisException] {
-        spark.sql(s"CREATE TABLE t1 (v VOID) USING PARQUET")
-      }.getMessage
-      assert(e1.contains("Cannot create tables with null type"))
-      val e2 = intercept[AnalysisException] {
-        spark.sql(s"CREATE TABLE t2 (v VOID) USING hive")
-      }.getMessage
-      assert(e2.contains("Cannot create tables with null type"))
-      val e3 = intercept[AnalysisException] {
-        spark.sql(s"CREATE TABLE t3 (v VOID)")
-      }.getMessage
-      assert(e3.contains("Cannot create tables with null type"))
-      val e4 = intercept[AnalysisException] {
-        spark.sql(s"CREATE TABLE t4 (v VOID) STORED AS PARQUET")
-      }.getMessage
-      assert(e4.contains("Cannot create tables with null type"))
+      assertAnalysisError(
+        "CREATE TABLE t1 (v VOID) USING PARQUET",
+        "Cannot create tables with null type")
+      assertAnalysisError(
+        "CREATE TABLE t2 (v VOID) USING hive",
+        "Cannot create tables with null type")
+      assertAnalysisError(
+        "CREATE TABLE t3 (v VOID)",
+        "Cannot create tables with null type")
+      assertAnalysisError(
+        "CREATE TABLE t4 (v VOID) STORED AS PARQUET",
+        "Cannot create tables with null type")
     }
 
     // Forbid Replace table with VOID type
     withTable("t") {
       val v2Source = classOf[FakeV2Provider].getName
-      val e = intercept[AnalysisException] {
-        spark.sql(s"CREATE OR REPLACE TABLE t (v VOID) USING $v2Source")
-      }.getMessage
-      assert(e.contains("Cannot create tables with null type"))
+      assertAnalysisError(
+        s"CREATE OR REPLACE TABLE t (v VOID) USING $v2Source",
+        "Cannot create tables with null type")
     }
 
     // Make sure spark.catalog.createTable with null type will fail
@@ -2626,9 +2590,9 @@ class HiveDDLSuite
   test("load command for non local invalid path validation") {
     withTable("tbl") {
       sql("CREATE TABLE tbl(i INT, j STRING) USING hive")
-      val e = intercept[AnalysisException](
-        sql("load data inpath '/doesnotexist.csv' into table tbl"))
-      assert(e.message.contains("LOAD DATA input path does not exist"))
+      assertAnalysisError(
+        "load data inpath '/doesnotexist.csv' into table tbl",
+        "LOAD DATA input path does not exist")
     }
   }
 
@@ -2780,47 +2744,39 @@ class HiveDDLSuite
       sql("CREATE TABLE sourceDsTable(a INT, b INT) USING PARQUET")
 
       // row format doesn't work in create targetDsTable
-      var e = intercept[AnalysisException] {
-        spark.sql(
-          """
-            |CREATE TABLE targetDsTable LIKE sourceHiveTable USING PARQUET
-            |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-            """.stripMargin)
-      }.getMessage
-      assert(e.contains("Operation not allowed: CREATE TABLE LIKE ... USING ... ROW FORMAT SERDE"))
+      assertAnalysisError(
+        """
+          |CREATE TABLE targetDsTable LIKE sourceHiveTable USING PARQUET
+          |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+        """.stripMargin,
+        "Operation not allowed: CREATE TABLE LIKE ... USING ... ROW FORMAT SERDE")
 
       // row format doesn't work with provider hive
-      e = intercept[AnalysisException] {
-        spark.sql(
-          """
-            |CREATE TABLE targetHiveTable LIKE sourceHiveTable USING hive
-            |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-            |WITH SERDEPROPERTIES ('test' = 'test')
-          """.stripMargin)
-      }.getMessage
-      assert(e.contains("Operation not allowed: CREATE TABLE LIKE ... USING ... ROW FORMAT SERDE"))
+      assertAnalysisError(
+        """
+          |CREATE TABLE targetHiveTable LIKE sourceHiveTable USING hive
+          |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+          |WITH SERDEPROPERTIES ('test' = 'test')
+        """.stripMargin,
+        "Operation not allowed: CREATE TABLE LIKE ... USING ... ROW FORMAT SERDE")
 
       // row format doesn't work without 'STORED AS'
-      e = intercept[AnalysisException] {
-        spark.sql(
-          """
-            |CREATE TABLE targetDsTable LIKE sourceDsTable
-            |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-            |WITH SERDEPROPERTIES ('test' = 'test')
-          """.stripMargin)
-      }.getMessage
-      assert(e.contains("'ROW FORMAT' must be used with 'STORED AS'"))
+      assertAnalysisError(
+        """
+          |CREATE TABLE targetDsTable LIKE sourceDsTable
+          |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+          |WITH SERDEPROPERTIES ('test' = 'test')
+        """.stripMargin,
+        "'ROW FORMAT' must be used with 'STORED AS'")
 
       // 'INPUTFORMAT' and 'OUTPUTFORMAT' conflict with 'USING'
-      e = intercept[AnalysisException] {
-        spark.sql(
-          """
-            |CREATE TABLE targetDsTable LIKE sourceDsTable USING format
-            |STORED AS INPUTFORMAT 'inFormat' OUTPUTFORMAT 'outFormat'
-            |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-            """.stripMargin)
-      }.getMessage
-      assert(e.contains("Operation not allowed: CREATE TABLE LIKE ... USING ... STORED AS"))
+      assertAnalysisError(
+        """
+          |CREATE TABLE targetDsTable LIKE sourceDsTable USING format
+          |STORED AS INPUTFORMAT 'inFormat' OUTPUTFORMAT 'outFormat'
+          |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+        """.stripMargin,
+        "Operation not allowed: CREATE TABLE LIKE ... USING ... STORED AS")
     }
   }
 
@@ -2880,16 +2836,13 @@ class HiveDDLSuite
         // negative case
         hiveFormats.filterNot(allowSerdeFileFormats.contains(_)).foreach { format =>
           withTable("targetTable") {
-            val ex = intercept[AnalysisException] {
-              spark.sql(
-                s"""
-                   |CREATE TABLE targetTable LIKE $sourceTable
-                   |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-                   |STORED AS $format
-             """.stripMargin)
-            }.getMessage
-            assert(ex.contains(
-              s"ROW FORMAT SERDE is incompatible with format '${format.toLowerCase(Locale.ROOT)}'"))
+            assertAnalysisError(
+              s"""
+                 |CREATE TABLE targetTable LIKE $sourceTable
+                 |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+                 |STORED AS $format
+              """.stripMargin,
+              s"ROW FORMAT SERDE is incompatible with format '${format.toLowerCase(Locale.ROOT)}'")
           }
         }
       }
@@ -2912,15 +2865,13 @@ class HiveDDLSuite
           assert(table.storage.serde === Some(expectedSerde.get.serde.get))
 
           // negative case
-          val ex = intercept[AnalysisException] {
-            spark.sql(
-              s"""
-                 |CREATE TABLE targetTable LIKE $sourceTable
-                 |ROW FORMAT DELIMITED
-                 |STORED AS PARQUET
-             """.stripMargin)
-          }.getMessage
-          assert(ex.contains("ROW FORMAT DELIMITED is only compatible with 'textfile'"))
+          assertAnalysisError(
+            s"""
+               |CREATE TABLE targetTable LIKE $sourceTable
+               |ROW FORMAT DELIMITED
+               |STORED AS PARQUET
+            """.stripMargin,
+            "ROW FORMAT DELIMITED is only compatible with 'textfile'")
         }
       }
 

From 13391683e7a863671d3d719dc81e20ec2a870725 Mon Sep 17 00:00:00 2001
From: Xianjin YE <advancedxy@gmail.com>
Date: Sun, 20 Dec 2020 08:51:17 -0600
Subject: [PATCH 0824/1009] [SPARK-33756][SQL] Make BytesToBytesMap's
 MapIterator idempotent

### What changes were proposed in this pull request?
Make MapIterator of BytesToBytesMap `hasNext` method idempotent

### Why are the changes needed?
The `hasNext` maybe called multiple times, if not guarded, second call of hasNext method after reaching the end of iterator will throw NoSuchElement exception.

### Does this PR introduce _any_ user-facing change?
NO.

### How was this patch tested?
Update a unit test to cover this case.

Closes #30728 from advancedxy/SPARK-33756.

Authored-by: Xianjin YE <advancedxy@gmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../org/apache/spark/unsafe/map/BytesToBytesMap.java   | 10 ++++++----
 .../spark/unsafe/map/AbstractBytesToBytesMapSuite.java |  2 ++
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
index d7940fc08e1a5..f474c30b8b3d8 100644
--- a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
+++ b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
@@ -393,10 +393,12 @@ public void remove() {
     }
 
     private void handleFailedDelete() {
-      // remove the spill file from disk
-      File file = spillWriters.removeFirst().getFile();
-      if (file != null && file.exists() && !file.delete()) {
-        logger.error("Was unable to delete spill file {}", file.getAbsolutePath());
+      if (spillWriters.size() > 0) {
+        // remove the spill file from disk
+        File file = spillWriters.removeFirst().getFile();
+        if (file != null && file.exists() && !file.delete()) {
+          logger.error("Was unable to delete spill file {}", file.getAbsolutePath());
+        }
       }
     }
   }
diff --git a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
index f4e952f465e54..f35176a69d94b 100644
--- a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
+++ b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
@@ -576,6 +576,8 @@ public void spillInIterator() throws IOException {
         iter2.next();
       }
       assertFalse(iter2.hasNext());
+      // calls hasNext twice deliberately, make sure it's idempotent
+      assertFalse(iter2.hasNext());
     } finally {
       map.free();
       for (File spillFile : spillFilesCreated) {

From 3c8be3983cd390306e9abbfe078536a08881a5d6 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Mon, 21 Dec 2020 09:40:42 +0900
Subject: [PATCH 0825/1009] [SPARK-33850][SQL][FOLLOWUP] Improve and cleanup
 the test code

### What changes were proposed in this pull request?

This PR mainly improves and cleans up the test code introduced in #30855 based on the comment.
The test code is actually taken from another test `explain formatted - check presence of subquery in case of DPP` so this PR cleans the code too ( removed unnecessary `withTable`).

### Why are the changes needed?

To keep the test code clean.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

`ExplainSuite` passes.

Closes #30861 from sarutak/followup-SPARK-33850.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../org/apache/spark/sql/ExplainSuite.scala   | 25 +++++++------------
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
index 0ec57c2fcb5ad..8b7459fddb59a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
@@ -233,7 +233,6 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
       withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true",
         SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "false",
         SQLConf.EXCHANGE_REUSE_ENABLED.key -> "false") {
-        withTable("df1", "df2") {
           spark.range(1000).select(col("id"), col("id").as("k"))
             .write
             .partitionBy("k")
@@ -273,27 +272,21 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
             assert(expected_pattern4.r.findAllMatchIn(normalizedOutput).length == 1)
           }
         }
-      }
     }
   }
 
   test("SPARK-33850: explain formatted - check presence of subquery in case of AQE") {
-    withTable("df1") {
-      withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
-        withTable("df1") {
-          spark.range(1, 100)
-            .write
-            .format("parquet")
-            .mode("overwrite")
-            .saveAsTable("df1")
+    withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
+      withTempView("df") {
+        val df = spark.range(1, 100)
+        df.createTempView("df")
 
-          val sqlText = "EXPLAIN FORMATTED SELECT (SELECT min(id) FROM df1) as v"
-          val expected_pattern1 =
-            "Subquery:1 Hosting operator id = 2 Hosting Expression = Subquery subquery#x"
+        val sqlText = "EXPLAIN FORMATTED SELECT (SELECT min(id) FROM df) as v"
+        val expected_pattern =
+          "Subquery:1 Hosting operator id = 2 Hosting Expression = Subquery subquery#x"
 
-          withNormalizedExplain(sqlText) { normalizedOutput =>
-            assert(expected_pattern1.r.findAllMatchIn(normalizedOutput).length == 1)
-          }
+        withNormalizedExplain(sqlText) { normalizedOutput =>
+          assert(expected_pattern.r.findAllMatchIn(normalizedOutput).length == 1)
         }
       }
     }

From 8e2633962f789a6ba5eb9448596f6ac4b7b1c2ff Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Sun, 20 Dec 2020 17:38:47 -0800
Subject: [PATCH 0826/1009] [SPARK-26341][WEBUI][FOLLOWUP] Update stage memory
 metrics on stage end

### What changes were proposed in this pull request?

This is a followup PR for #30573 .

After this change applied, stage memory metrics will be updated on stage end.

### Why are the changes needed?

After #30573, executor memory metrics is updated on stage end but stage memory metrics is not updated.
It's better to update both metrics like `updateStageLevelPeakExecutorMetrics` does.

### Does this PR introduce _any_ user-facing change?

Yes. stage memory metrics is updated more accurately.

### How was this patch tested?

After I run a job and visited `/api/v1/<appid>/stages`, I confirmed `peakExecutorMemory` metrics is shown even though the life time of each stage is very short .
I also modify the json files for `HistoryServerSuite`.

Closes #30858 from sarutak/followup-SPARK-26341.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/status/AppStatusListener.scala      |  1 +
 .../complete_stage_list_json_expectation.json | 72 +++++++++++++-
 ...xcludeOnFailure_for_stage_expectation.json | 24 ++++-
 ...eOnFailure_node_for_stage_expectation.json | 24 ++++-
 .../failed_stage_list_json_expectation.json   | 24 ++++-
 .../one_stage_attempt_json_expectation.json   | 24 ++++-
 .../one_stage_json_expectation.json           | 24 ++++-
 .../stage_list_json_expectation.json          | 96 ++++++++++++++++++-
 ...ist_with_accumulable_json_expectation.json | 24 ++++-
 ...age_with_accumulable_json_expectation.json | 24 ++++-
 10 files changed, 323 insertions(+), 14 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
index 0722095cc6533..bf19897e51fb3 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
@@ -688,6 +688,7 @@ private[spark] class AppStatusListener(
       }
       stage.activeTasksPerExecutor(event.taskInfo.executorId) -= 1
 
+      stage.peakExecutorMetrics.compareAndUpdatePeakValues(event.taskExecutorMetrics)
       stage.executorSummary(event.taskInfo.executorId).peakExecutorMetrics
         .compareAndUpdatePeakValues(event.taskExecutorMetrics)
       // [SPARK-24415] Wait for all tasks to finish before removing stage from live list
diff --git a/core/src/test/resources/HistoryServerExpectations/complete_stage_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/complete_stage_list_json_expectation.json
index a452488294547..f04543e037c48 100644
--- a/core/src/test/resources/HistoryServerExpectations/complete_stage_list_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/complete_stage_list_json_expectation.json
@@ -42,7 +42,29 @@
   "rddIds" : [ 6, 5 ],
   "accumulatorUpdates" : [ ],
   "killedTasksSummary" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "peakExecutorMetrics" : {
+    "JVMHeapMemory" : 0,
+    "JVMOffHeapMemory" : 0,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 0,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 0,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 0,
+    "ProcessTreeJVMRSSMemory" : 0,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 0,
+    "MinorGCTime" : 0,
+    "MajorGCCount" : 0,
+    "MajorGCTime" : 0
+  }
 }, {
   "status" : "COMPLETE",
   "stageId" : 1,
@@ -87,7 +109,29 @@
   "rddIds" : [ 1, 0 ],
   "accumulatorUpdates" : [ ],
   "killedTasksSummary" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "peakExecutorMetrics" : {
+    "JVMHeapMemory" : 0,
+    "JVMOffHeapMemory" : 0,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 0,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 0,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 0,
+    "ProcessTreeJVMRSSMemory" : 0,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 0,
+    "MinorGCTime" : 0,
+    "MajorGCCount" : 0,
+    "MajorGCTime" : 0
+  }
 }, {
   "status" : "COMPLETE",
   "stageId" : 0,
@@ -132,5 +176,27 @@
   "rddIds" : [ 0 ],
   "accumulatorUpdates" : [ ],
   "killedTasksSummary" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "peakExecutorMetrics" : {
+    "JVMHeapMemory" : 0,
+    "JVMOffHeapMemory" : 0,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 0,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 0,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 0,
+    "ProcessTreeJVMRSSMemory" : 0,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 0,
+    "MinorGCTime" : 0,
+    "MajorGCCount" : 0,
+    "MajorGCTime" : 0
+  }
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json
index ab9a8b7ef885f..dcad8a6895ed8 100644
--- a/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json
@@ -764,5 +764,27 @@
     }
   },
   "killedTasksSummary" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "peakExecutorMetrics" : {
+    "JVMHeapMemory" : 0,
+    "JVMOffHeapMemory" : 0,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 0,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 0,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 0,
+    "ProcessTreeJVMRSSMemory" : 0,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 0,
+    "MinorGCTime" : 0,
+    "MajorGCCount" : 0,
+    "MajorGCTime" : 0
+  }
 }
diff --git a/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json
index 1c569c19894fd..2ab1546bd4a86 100644
--- a/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json
@@ -992,5 +992,27 @@
     }
   },
   "killedTasksSummary" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "peakExecutorMetrics" : {
+    "JVMHeapMemory" : 0,
+    "JVMOffHeapMemory" : 0,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 0,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 0,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 0,
+    "ProcessTreeJVMRSSMemory" : 0,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 0,
+    "MinorGCTime" : 0,
+    "MajorGCCount" : 0,
+    "MajorGCTime" : 0
+  }
 }
diff --git a/core/src/test/resources/HistoryServerExpectations/failed_stage_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/failed_stage_list_json_expectation.json
index c38741646c64b..5573cf98db26a 100644
--- a/core/src/test/resources/HistoryServerExpectations/failed_stage_list_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/failed_stage_list_json_expectation.json
@@ -43,5 +43,27 @@
   "rddIds" : [ 3, 2 ],
   "accumulatorUpdates" : [ ],
   "killedTasksSummary" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "peakExecutorMetrics" : {
+    "JVMHeapMemory" : 0,
+    "JVMOffHeapMemory" : 0,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 0,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 0,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 0,
+    "ProcessTreeJVMRSSMemory" : 0,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 0,
+    "MinorGCTime" : 0,
+    "MajorGCCount" : 0,
+    "MajorGCTime" : 0
+  }
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json
index b1eab0d7ac196..9edb518132e87 100644
--- a/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json
@@ -486,5 +486,27 @@
     }
   },
   "killedTasksSummary" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "peakExecutorMetrics" : {
+    "JVMHeapMemory" : 0,
+    "JVMOffHeapMemory" : 0,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 0,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 0,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 0,
+    "ProcessTreeJVMRSSMemory" : 0,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 0,
+    "MinorGCTime" : 0,
+    "MajorGCCount" : 0,
+    "MajorGCTime" : 0
+  }
 }
diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json
index 6dfdd27cd7d8f..9e661bdf8a034 100644
--- a/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json
@@ -486,5 +486,27 @@
     }
   },
   "killedTasksSummary" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "peakExecutorMetrics" : {
+    "JVMHeapMemory" : 0,
+    "JVMOffHeapMemory" : 0,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 0,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 0,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 0,
+    "ProcessTreeJVMRSSMemory" : 0,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 0,
+    "MinorGCTime" : 0,
+    "MajorGCCount" : 0,
+    "MajorGCTime" : 0
+  }
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_list_json_expectation.json
index a31c907221388..d109c73b46133 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_list_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_list_json_expectation.json
@@ -42,7 +42,29 @@
   "rddIds" : [ 6, 5 ],
   "accumulatorUpdates" : [ ],
   "killedTasksSummary" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "peakExecutorMetrics" : {
+    "JVMHeapMemory" : 0,
+    "JVMOffHeapMemory" : 0,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 0,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 0,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 0,
+    "ProcessTreeJVMRSSMemory" : 0,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 0,
+    "MinorGCTime" : 0,
+    "MajorGCCount" : 0,
+    "MajorGCTime" : 0
+  }
 }, {
   "status" : "FAILED",
   "stageId" : 2,
@@ -88,7 +110,29 @@
   "rddIds" : [ 3, 2 ],
   "accumulatorUpdates" : [ ],
   "killedTasksSummary" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "peakExecutorMetrics" : {
+    "JVMHeapMemory" : 0,
+    "JVMOffHeapMemory" : 0,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 0,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 0,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 0,
+    "ProcessTreeJVMRSSMemory" : 0,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 0,
+    "MinorGCTime" : 0,
+    "MajorGCCount" : 0,
+    "MajorGCTime" : 0
+  }
 }, {
   "status" : "COMPLETE",
   "stageId" : 1,
@@ -133,7 +177,29 @@
   "rddIds" : [ 1, 0 ],
   "accumulatorUpdates" : [ ],
   "killedTasksSummary" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "peakExecutorMetrics" : {
+    "JVMHeapMemory" : 0,
+    "JVMOffHeapMemory" : 0,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 0,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 0,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 0,
+    "ProcessTreeJVMRSSMemory" : 0,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 0,
+    "MinorGCTime" : 0,
+    "MajorGCCount" : 0,
+    "MajorGCTime" : 0
+  }
 }, {
   "status" : "COMPLETE",
   "stageId" : 0,
@@ -178,5 +244,27 @@
   "rddIds" : [ 0 ],
   "accumulatorUpdates" : [ ],
   "killedTasksSummary" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "peakExecutorMetrics" : {
+    "JVMHeapMemory" : 0,
+    "JVMOffHeapMemory" : 0,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 0,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 0,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 0,
+    "ProcessTreeJVMRSSMemory" : 0,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 0,
+    "MinorGCTime" : 0,
+    "MajorGCCount" : 0,
+    "MajorGCTime" : 0
+  }
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_list_with_accumulable_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_list_with_accumulable_json_expectation.json
index 08089d4f3f65b..7901c4f93367b 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_list_with_accumulable_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_list_with_accumulable_json_expectation.json
@@ -46,5 +46,27 @@
     "value" : "5050"
   } ],
   "killedTasksSummary" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "peakExecutorMetrics" : {
+    "JVMHeapMemory" : 0,
+    "JVMOffHeapMemory" : 0,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 0,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 0,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 0,
+    "ProcessTreeJVMRSSMemory" : 0,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 0,
+    "MinorGCTime" : 0,
+    "MajorGCCount" : 0,
+    "MajorGCTime" : 0
+  }
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json
index a2cfd9d42cc99..a5958e0a093f1 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json
@@ -530,5 +530,27 @@
     }
   },
   "killedTasksSummary" : { },
-  "resourceProfileId" : 0
+  "resourceProfileId" : 0,
+  "peakExecutorMetrics" : {
+    "JVMHeapMemory" : 0,
+    "JVMOffHeapMemory" : 0,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 0,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 0,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 0,
+    "ProcessTreeJVMRSSMemory" : 0,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 0,
+    "MinorGCTime" : 0,
+    "MajorGCCount" : 0,
+    "MajorGCTime" : 0
+  }
 }

From 1c7b79c0578c76629ac68a7e180f33e40aa380d8 Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Mon, 21 Dec 2020 04:58:56 +0000
Subject: [PATCH 0827/1009] [SPARK-33856][SQL] Migrate ALTER TABLE ... RENAME
 TO PARTITION to use UnresolvedTable to resolve the identifier

### What changes were proposed in this pull request?

This PR proposes to migrate `ALTER TABLE ... RENAME TO PARTITION` to use `UnresolvedTable` to resolve the table identifier. This allows consistent resolution rules (temp view first, etc.) to be applied for both v1/v2 commands. More info about the consistent resolution rule proposal can be found in [JIRA](https://issues.apache.org/jira/browse/SPARK-29900) or [proposal doc](https://docs.google.com/document/d/1hvLjGA8y_W_hhilpngXVub1Ebv8RsMap986nENCFnrg/edit?usp=sharing).

Note that `ALTER TABLE ... RENAME TO PARTITION` is not supported for v2 tables.

### Why are the changes needed?

The PR makes the resolution consistent behavior consistent. For example,
```
sql("CREATE DATABASE test")
sql("CREATE TABLE spark_catalog.test.t (id bigint, val string) USING csv PARTITIONED BY (id)")
sql("CREATE TEMPORARY VIEW t AS SELECT 2")
sql("USE spark_catalog.test")
sql("ALTER TABLE t PARTITION (id=1) RENAME TO PARTITION (id=2)") // works fine assuming id=1 exists.
```
, but after this PR:
```
sql("ALTER TABLE t PARTITION (id=1) RENAME TO PARTITION (id=2)")
org.apache.spark.sql.AnalysisException: t is a temp view. 'ALTER TABLE ... RENAME TO PARTITION' expects a table; line 1 pos 0
```
, which is the consistent behavior with other commands.

### Does this PR introduce _any_ user-facing change?

After this PR, `ALTER TABLE` in the above example is resolved to a temp view `t` first instead of `spark_catalog.test.t`.

### How was this patch tested?

Updated existing tests.

Closes #30862 from imback82/alter_table_rename_partition_v2.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/CheckAnalysis.scala |  3 +++
 .../analysis/ResolvePartitionSpec.scala       | 11 +++++++++-
 .../sql/catalyst/parser/AstBuilder.scala      | 10 ++++++----
 .../catalyst/plans/logical/statements.scala   |  8 --------
 .../catalyst/plans/logical/v2Commands.scala   | 13 ++++++++++++
 .../sql/catalyst/parser/DDLParserSuite.scala  | 14 ++++++-------
 .../analysis/ResolveSessionCatalog.scala      |  6 +++---
 .../datasources/v2/DataSourceV2Strategy.scala |  4 ++++
 .../AlterTablePartitionV2SQLSuite.scala       | 20 +++++++++++++------
 .../spark/sql/execution/SQLViewSuite.scala    |  4 +++-
 .../sql/execution/command/DDLSuite.scala      |  5 +++--
 .../sql/hive/execution/HiveDDLSuite.scala     |  5 +++--
 12 files changed, 69 insertions(+), 34 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index c0cdcdf2d9577..472de096b2f22 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -587,6 +587,9 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
           case AlterTableDropPartition(ResolvedTable(_, _, table), parts, _, _) =>
             checkAlterTablePartition(table, parts)
 
+          case AlterTableRenamePartition(ResolvedTable(_, _, table), from, _) =>
+            checkAlterTablePartition(table, Seq(from))
+
           case showPartitions: ShowPartitions => checkShowPartitions(showPartitions)
 
           case _ => // Falls back to the following checks
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
index 35e4820cd710b..2c2bea6f89d49 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Cast, Literal}
-import org.apache.spark.sql.catalyst.plans.logical.{AlterTableAddPartition, AlterTableDropPartition, LogicalPlan, ShowPartitions}
+import org.apache.spark.sql.catalyst.plans.logical.{AlterTableAddPartition, AlterTableDropPartition, AlterTableRenamePartition, LogicalPlan, ShowPartitions}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.connector.catalog.SupportsPartitionManagement
@@ -51,6 +51,15 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] {
         partitionSchema,
         requireExactMatchedPartitionSpec(table.name, _, partitionSchema.fieldNames)))
 
+    case r @ AlterTableRenamePartition(
+        ResolvedTable(_, _, table: SupportsPartitionManagement), from, _) =>
+      val partitionSchema = table.partitionSchema()
+      r.copy(from = resolvePartitionSpecs(
+        table.name,
+        Seq(from),
+        partitionSchema,
+        requireExactMatchedPartitionSpec(table.name, _, partitionSchema.fieldNames)).head)
+
     case r @ ShowPartitions(ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs) =>
       r.copy(pattern = resolvePartitionSpecs(
         table.name,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 94589688953d7..9c265544f3227 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3761,7 +3761,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
   }
 
   /**
-   * Create an [[AlterTableRenamePartitionStatement]]
+   * Create an [[AlterTableRenamePartition]]
    *
    * For example:
    * {{{
@@ -3770,9 +3770,11 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    */
   override def visitRenameTablePartition(
       ctx: RenameTablePartitionContext): LogicalPlan = withOrigin(ctx) {
-    AlterTableRenamePartitionStatement(
-      visitMultipartIdentifier(ctx.multipartIdentifier),
-      visitNonOptionalPartitionSpec(ctx.from),
+    AlterTableRenamePartition(
+      UnresolvedTable(
+        visitMultipartIdentifier(ctx.multipartIdentifier),
+        "ALTER TABLE ... RENAME TO PARTITION"),
+      UnresolvedPartitionSpec(visitNonOptionalPartitionSpec(ctx.from)),
       visitNonOptionalPartitionSpec(ctx.to))
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
index 59239f6e041a5..f6d141ded384a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
@@ -292,14 +292,6 @@ case class AlterTableSetLocationStatement(
     partitionSpec: Option[TablePartitionSpec],
     location: String) extends ParsedStatement
 
-/**
- * ALTER TABLE ... RENAME PARTITION command, as parsed from SQL.
- */
-case class AlterTableRenamePartitionStatement(
-    tableName: Seq[String],
-    from: TablePartitionSpec,
-    to: TablePartitionSpec) extends ParsedStatement
-
 /**
  * An INSERT INTO statement, as parsed from SQL.
  *
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index fa67d311c39c3..87d81d5330574 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -673,6 +673,19 @@ case class AlterTableDropPartition(
   override def children: Seq[LogicalPlan] = child :: Nil
 }
 
+/**
+ * The logical plan of the ALTER TABLE ... RENAME TO PARTITION command.
+ */
+case class AlterTableRenamePartition(
+    child: LogicalPlan,
+    from: PartitionSpec,
+    to: TablePartitionSpec) extends Command {
+  override lazy val resolved: Boolean =
+    childrenResolved && from.isInstanceOf[ResolvedPartitionSpec]
+
+  override def children: Seq[LogicalPlan] = child :: Nil
+}
+
 /**
  * The logical plan of the ALTER TABLE ... RECOVER PARTITIONS command.
  */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 5eb0c9a39f1e6..330a01be4bfb3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.parser
 import java.util.Locale
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedNamespace, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView, UnresolvedView}
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedNamespace, UnresolvedPartitionSpec, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView, UnresolvedView}
 import org.apache.spark.sql.catalyst.catalog.{ArchiveResource, BucketSpec, FileResource, FunctionResource, JarResource}
 import org.apache.spark.sql.catalyst.expressions.{EqualTo, Literal}
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -2106,9 +2106,9 @@ class DDLParserSuite extends AnalysisTest {
         |RENAME TO PARTITION (dt='2008-09-09', country='uk')
       """.stripMargin
     val parsed1 = parsePlan(sql1)
-    val expected1 = AlterTableRenamePartitionStatement(
-      Seq("table_name"),
-      Map("dt" -> "2008-08-08", "country" -> "us"),
+    val expected1 = AlterTableRenamePartition(
+      UnresolvedTable(Seq("table_name"), "ALTER TABLE ... RENAME TO PARTITION"),
+      UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us")),
       Map("dt" -> "2008-09-09", "country" -> "uk"))
     comparePlans(parsed1, expected1)
 
@@ -2118,9 +2118,9 @@ class DDLParserSuite extends AnalysisTest {
         |RENAME TO PARTITION (ds='2018-06-10')
       """.stripMargin
     val parsed2 = parsePlan(sql2)
-    val expected2 = AlterTableRenamePartitionStatement(
-      Seq("a", "b", "c"),
-      Map("ds" -> "2017-06-10"),
+    val expected2 = AlterTableRenamePartition(
+      UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... RENAME TO PARTITION"),
+      UnresolvedPartitionSpec(Map("ds" -> "2017-06-10")),
       Map("ds" -> "2018-06-10"))
     comparePlans(parsed2, expected2)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 723647a4a9207..66d1c406a5603 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -445,10 +445,10 @@ class ResolveSessionCatalog(
         partSpecsAndLocs.asUnresolvedPartitionSpecs.map(spec => (spec.spec, spec.location)),
         ifNotExists)
 
-    case AlterTableRenamePartitionStatement(tbl, from, to) =>
-      val v1TableName = parseV1Table(tbl, "ALTER TABLE RENAME PARTITION")
+    case AlterTableRenamePartition(
+        ResolvedV1TableIdentifier(ident), UnresolvedPartitionSpec(from, _), to) =>
       AlterTableRenamePartitionCommand(
-        v1TableName.asTableIdentifier,
+        ident.asTableIdentifier,
         from,
         to)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 50bcf81f1ba2d..635117a9932ac 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -342,6 +342,10 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       AlterTableDropPartitionExec(
         table, parts.asResolvedPartitionSpecs, ignoreIfNotExists) :: Nil
 
+    case AlterTableRenamePartition(_: ResolvedTable, _: ResolvedPartitionSpec, _) =>
+      throw new AnalysisException(
+        "ALTER TABLE ... RENAME TO PARTITION is not supported for v2 tables.")
+
     case AlterTableRecoverPartitions(_: ResolvedTable) =>
       throw new AnalysisException(
         "ALTER TABLE ... RECOVER PARTITIONS is not supported for v2 tables.")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
index ac4d055eb0e60..bdf2fa5b7ac96 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
@@ -33,13 +33,21 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase {
   }
 
   test("ALTER TABLE RENAME PARTITION") {
-    val t = "testcat.ns1.ns2.tbl"
-    withTable(t) {
-      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
-      val e = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $t PARTITION (id=1) RENAME TO PARTITION (id=2)")
+    val nonPartTbl = "testcat.ns1.ns2.tbl"
+    val partTbl = "testpart.ns1.ns2.tbl"
+    withTable(nonPartTbl, partTbl) {
+      spark.sql(s"CREATE TABLE $nonPartTbl (id bigint, data string) USING foo PARTITIONED BY (id)")
+      val e1 = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $nonPartTbl PARTITION (id=1) RENAME TO PARTITION (id=2)")
+      }
+      assert(e1.message.contains(s"Table $nonPartTbl can not alter partitions"))
+
+      spark.sql(s"CREATE TABLE $partTbl (id bigint, data string) USING foo PARTITIONED BY (id)")
+      val e2 = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $partTbl PARTITION (id=1) RENAME TO PARTITION (id=2)")
       }
-      assert(e.message.contains("ALTER TABLE RENAME PARTITION is only supported with v1 tables"))
+      assert(e2.message.contains(
+        "ALTER TABLE ... RENAME TO PARTITION is not supported for v2 tables."))
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index 6d65fddb1be62..9b84e0fe4bcb7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -149,7 +149,9 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
       assertAnalysisError(
         s"ALTER TABLE $viewName SET SERDEPROPERTIES ('p' = 'an')",
         s"$viewName is a temp view. 'ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]' expects a table")
-      assertNoSuchTable(s"ALTER TABLE $viewName PARTITION (a='4') RENAME TO PARTITION (a='5')")
+      assertAnalysisError(
+        s"ALTER TABLE $viewName PARTITION (a='4') RENAME TO PARTITION (a='5')",
+        s"$viewName is a temp view. 'ALTER TABLE ... RENAME TO PARTITION' expects a table")
       assertAnalysisError(
         s"ALTER TABLE $viewName RECOVER PARTITIONS",
         s"$viewName is a temp view. 'ALTER TABLE ... RECOVER PARTITIONS' expects a table")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index d6474ae7d5f00..7a6076d6d9576 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.internal.config
 import org.apache.spark.internal.config.RDD_PARALLEL_LISTING_THRESHOLD
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, QualifiedTableName, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchDatabaseException, NoSuchFunctionException, NoSuchPartitionException, NoSuchTableException, TempTableAlreadyExistsException}
+import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchDatabaseException, NoSuchFunctionException, NoSuchPartitionException, TempTableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.connector.catalog.SupportsNamespaces.PROP_OWNER
@@ -1642,9 +1642,10 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       Set(Map("a" -> "10", "b" -> "p"), Map("a" -> "20", "b" -> "c"), Map("a" -> "3", "b" -> "p")))
 
     // table to alter does not exist
-    intercept[NoSuchTableException] {
+    val e = intercept[AnalysisException] {
       sql("ALTER TABLE does_not_exist PARTITION (c='3') RENAME TO PARTITION (c='333')")
     }
+    assert(e.getMessage.contains("Table not found: does_not_exist"))
 
     // partition to rename does not exist
     intercept[NoSuchPartitionException] {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 34f127bade95b..e55b2d390a5d9 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -896,8 +896,9 @@ class HiveDDLSuite
           s"ALTER TABLE $oldViewName RECOVER PARTITIONS",
           s"$oldViewName is a view. 'ALTER TABLE ... RECOVER PARTITIONS' expects a table.")
 
-        assertErrorForAlterTableOnView(
-          s"ALTER TABLE $oldViewName PARTITION (a='1') RENAME TO PARTITION (a='100')")
+        assertAnalysisError(
+          s"ALTER TABLE $oldViewName PARTITION (a='1') RENAME TO PARTITION (a='100')",
+          s"$oldViewName is a view. 'ALTER TABLE ... RENAME TO PARTITION' expects a table.")
 
         assertAnalysisError(
           s"ALTER TABLE $oldViewName ADD IF NOT EXISTS PARTITION (a='4', b='8')",

From b313a1e9e6360bb0ac939cb47083b9c4d21e614c Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 21 Dec 2020 08:34:12 +0000
Subject: [PATCH 0828/1009] [SPARK-33849][SQL][TESTS] Unify v1 and v2 DROP
 TABLE tests

### What changes were proposed in this pull request?
1. Move the `DROP TABLE` parsing tests to `DropTableParserSuite`
2. Place the v1 tests for `DROP TABLE` from `DDLSuite` and v2 tests from `DataSourceV2SQLSuite` to the common trait `DropTableSuiteBase`, so, the tests will run for V1, Hive V1 and V2 DS.

### Why are the changes needed?
- The unification will allow to run common `DROP TABLE` tests for both DSv1 and Hive DSv1, DSv2
- We can detect missing features and differences between DSv1 and DSv2 implementations.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running new test suites:
```
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *DropTableParserSuite"
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *DropTableSuite"
```

Closes #30854 from MaxGekk/unify-drop-table-tests.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/parser/DDLParserSuite.scala  | 26 -----
 .../sql/connector/DataSourceV2SQLSuite.scala  | 75 --------------
 .../sql/execution/command/DDLSuite.scala      | 21 ----
 .../command/DropTableParserSuite.scala        | 55 +++++++++++
 .../command/DropTableSuiteBase.scala          | 99 +++++++++++++++++++
 .../execution/command/v1/DropTableSuite.scala | 62 ++++++++++++
 .../execution/command/v2/DropTableSuite.scala | 74 ++++++++++++++
 .../sql/hive/execution/HiveDDLSuite.scala     |  4 -
 .../execution/command/DropTableSuite.scala    | 22 +++++
 9 files changed, 312 insertions(+), 126 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableParserSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableSuiteBase.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropTableSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DropTableSuite.scala

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 330a01be4bfb3..d408019053fb7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -694,32 +694,6 @@ class DDLParserSuite extends AnalysisTest {
     }
   }
 
-  test("drop table") {
-    parseCompare("DROP TABLE testcat.ns1.ns2.tbl",
-      DropTable(
-        UnresolvedTableOrView(Seq("testcat", "ns1", "ns2", "tbl"), "DROP TABLE"),
-        ifExists = false,
-        purge = false))
-    parseCompare(s"DROP TABLE db.tab",
-      DropTable(
-        UnresolvedTableOrView(Seq("db", "tab"), "DROP TABLE"), ifExists = false, purge = false))
-    parseCompare(s"DROP TABLE IF EXISTS db.tab",
-      DropTable(
-        UnresolvedTableOrView(Seq("db", "tab"), "DROP TABLE"), ifExists = true, purge = false))
-    parseCompare(s"DROP TABLE tab",
-      DropTable(
-        UnresolvedTableOrView(Seq("tab"), "DROP TABLE"), ifExists = false, purge = false))
-    parseCompare(s"DROP TABLE IF EXISTS tab",
-      DropTable(
-        UnresolvedTableOrView(Seq("tab"), "DROP TABLE"), ifExists = true, purge = false))
-    parseCompare(s"DROP TABLE tab PURGE",
-      DropTable(
-        UnresolvedTableOrView(Seq("tab"), "DROP TABLE"), ifExists = false, purge = true))
-    parseCompare(s"DROP TABLE IF EXISTS tab PURGE",
-      DropTable(
-        UnresolvedTableOrView(Seq("tab"), "DROP TABLE"), ifExists = true, purge = true))
-  }
-
   test("drop view") {
     val cmd = "DROP VIEW"
     val hint = Some("Please use DROP TABLE instead.")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 4fdb32c24f104..ed4ea567e4f65 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -711,81 +711,6 @@ class DataSourceV2SQLSuite
     assert(t2.v1Table.provider == Some(conf.defaultDataSourceName))
   }
 
-  test("DropTable: basic") {
-    val tableName = "testcat.ns1.ns2.tbl"
-    val ident = Identifier.of(Array("ns1", "ns2"), "tbl")
-    sql(s"CREATE TABLE $tableName USING foo AS SELECT id, data FROM source")
-    assert(catalog("testcat").asTableCatalog.tableExists(ident) === true)
-    sql(s"DROP TABLE $tableName")
-    assert(catalog("testcat").asTableCatalog.tableExists(ident) === false)
-  }
-
-  test("DropTable: table qualified with the session catalog name") {
-    val ident = Identifier.of(Array("default"), "tbl")
-    sql("CREATE TABLE tbl USING json AS SELECT 1 AS i")
-    assert(catalog("spark_catalog").asTableCatalog.tableExists(ident) === true)
-    sql("DROP TABLE spark_catalog.default.tbl")
-    assert(catalog("spark_catalog").asTableCatalog.tableExists(ident) === false)
-  }
-
-  test("DropTable: if exists") {
-    val ex = intercept[AnalysisException] {
-      sql("DROP TABLE testcat.db.notbl")
-    }
-    assert(ex.getMessage.contains("Table or view not found: testcat.db.notbl"))
-    sql("DROP TABLE IF EXISTS testcat.db.notbl")
-  }
-
-  test("DropTable: purge option") {
-    withTable("testcat.ns.t") {
-      sql("CREATE TABLE testcat.ns.t (id bigint) USING foo")
-      val ex = intercept[UnsupportedOperationException] {
-        sql ("DROP TABLE testcat.ns.t PURGE")
-      }
-      // The default TableCatalog.dropTable implementation doesn't support the purge option.
-      assert(ex.getMessage.contains("Purge option is not supported"))
-    }
-  }
-
-  test("SPARK-33174: DROP TABLE should resolve to a temporary view first") {
-    withTable("testcat.ns.t") {
-      withTempView("t") {
-        sql("CREATE TABLE testcat.ns.t (id bigint) USING foo")
-        sql("CREATE TEMPORARY VIEW t AS SELECT 2")
-        sql("USE testcat.ns")
-
-        // Check the temporary view 't' exists.
-        runShowTablesSql(
-          "SHOW TABLES FROM spark_catalog.default LIKE 't'",
-          Seq(Row("", "t", true)),
-          expectV2Catalog = false)
-        sql("DROP TABLE t")
-        // Verify that the temporary view 't' is resolved first and dropped.
-        runShowTablesSql(
-          "SHOW TABLES FROM spark_catalog.default LIKE 't'",
-          Nil,
-          expectV2Catalog = false)
-      }
-    }
-  }
-
-  test("SPARK-33305: DROP TABLE should also invalidate cache") {
-    val t = "testcat.ns.t"
-    val view = "view"
-    withTable(t) {
-      withTempView(view) {
-        sql(s"CREATE TABLE $t USING foo AS SELECT id, data FROM source")
-        sql(s"CACHE TABLE $view AS SELECT id FROM $t")
-        checkAnswer(sql(s"SELECT * FROM $t"), spark.table("source"))
-        checkAnswer(sql(s"SELECT * FROM $view"), spark.table("source").select("id"))
-
-        assert(!spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty)
-        sql(s"DROP TABLE $t")
-        assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty)
-      }
-    }
-  }
-
   test("SPARK-33492: ReplaceTableAsSelect (atomic or non-atomic) should invalidate cache") {
     Seq("testcat.ns.t", "testcat_atomic.ns.t").foreach { t =>
       val view = "view"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 7a6076d6d9576..f92a93d54b1cb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -338,10 +338,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     testRenamePartitions(isDatasourceTable = true)
   }
 
-  test("drop table - data source table") {
-    testDropTable(isDatasourceTable = true)
-  }
-
   test("the qualified path of a database is stored in the catalog") {
     val catalog = spark.sessionState.catalog
 
@@ -1332,23 +1328,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     assert(catalog.listTables("default") == Nil)
   }
 
-  protected def testDropTable(isDatasourceTable: Boolean): Unit = {
-    if (!isUsingHiveMetastore) {
-      assert(isDatasourceTable, "InMemoryCatalog only supports data source tables")
-    }
-    val catalog = spark.sessionState.catalog
-    val tableIdent = TableIdentifier("tab1", Some("dbx"))
-    createDatabase(catalog, "dbx")
-    createTable(catalog, tableIdent, isDatasourceTable)
-    assert(catalog.listTables("dbx") == Seq(tableIdent))
-    sql("DROP TABLE dbx.tab1")
-    assert(catalog.listTables("dbx") == Nil)
-    sql("DROP TABLE IF EXISTS dbx.tab1")
-    intercept[AnalysisException] {
-      sql("DROP TABLE dbx.tab1")
-    }
-  }
-
   test("drop view") {
     val catalog = spark.sessionState.catalog
     val tableIdent = TableIdentifier("tab1", Some("dbx"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableParserSuite.scala
new file mode 100644
index 0000000000000..f88fff8ed326e
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableParserSuite.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedTableOrView}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
+import org.apache.spark.sql.catalyst.plans.logical.{DropTable, LogicalPlan}
+import org.apache.spark.sql.test.SharedSparkSession
+
+class DropTableParserSuite extends AnalysisTest with SharedSparkSession {
+  private def parseCompare(sql: String, expected: LogicalPlan): Unit = {
+    comparePlans(parsePlan(sql), expected, checkAnalysis = false)
+  }
+
+  test("drop table") {
+    parseCompare("DROP TABLE testcat.ns1.ns2.tbl",
+      DropTable(
+        UnresolvedTableOrView(Seq("testcat", "ns1", "ns2", "tbl"), "DROP TABLE"),
+        ifExists = false,
+        purge = false))
+    parseCompare(s"DROP TABLE db.tab",
+      DropTable(
+        UnresolvedTableOrView(Seq("db", "tab"), "DROP TABLE"), ifExists = false, purge = false))
+    parseCompare(s"DROP TABLE IF EXISTS db.tab",
+      DropTable(
+        UnresolvedTableOrView(Seq("db", "tab"), "DROP TABLE"), ifExists = true, purge = false))
+    parseCompare(s"DROP TABLE tab",
+      DropTable(
+        UnresolvedTableOrView(Seq("tab"), "DROP TABLE"), ifExists = false, purge = false))
+    parseCompare(s"DROP TABLE IF EXISTS tab",
+      DropTable(
+        UnresolvedTableOrView(Seq("tab"), "DROP TABLE"), ifExists = true, purge = false))
+    parseCompare(s"DROP TABLE tab PURGE",
+      DropTable(
+        UnresolvedTableOrView(Seq("tab"), "DROP TABLE"), ifExists = false, purge = true))
+    parseCompare(s"DROP TABLE IF EXISTS tab PURGE",
+      DropTable(
+        UnresolvedTableOrView(Seq("tab"), "DROP TABLE"), ifExists = true, purge = true))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableSuiteBase.scala
new file mode 100644
index 0000000000000..dd620d3bd7aa4
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableSuiteBase.scala
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+
+trait DropTableSuiteBase extends QueryTest with DDLCommandTestUtils {
+  override val command = "DROP TABLE"
+
+  protected def createTable(tableName: String): Unit = {
+    sql(s"CREATE TABLE $tableName (c int) $defaultUsing")
+    sql(s"INSERT INTO $tableName SELECT 0")
+  }
+
+  protected def checkTables(namespace: String, expectedTables: String*): Unit = {
+    val tables = sql(s"SHOW TABLES IN $catalog.$namespace").select("tableName")
+    val rows = expectedTables.map(Row(_))
+    checkAnswer(tables, rows)
+  }
+
+  test("basic") {
+    withNamespace(s"$catalog.ns") {
+      sql(s"CREATE NAMESPACE $catalog.ns")
+
+      createTable(s"$catalog.ns.tbl")
+      checkTables("ns", "tbl")
+
+      sql(s"DROP TABLE $catalog.ns.tbl")
+      checkTables("ns") // no tables
+    }
+  }
+
+  test("try to drop a nonexistent table") {
+    withNamespace(s"$catalog.ns") {
+      sql(s"CREATE NAMESPACE $catalog.ns")
+      checkTables("ns") // no tables
+
+      val errMsg = intercept[AnalysisException] {
+        sql(s"DROP TABLE $catalog.ns.tbl")
+      }.getMessage
+      assert(errMsg.contains("Table or view not found"))
+    }
+  }
+
+  test("with IF EXISTS") {
+    withNamespace(s"$catalog.ns") {
+      sql(s"CREATE NAMESPACE $catalog.ns")
+
+      createTable(s"$catalog.ns.tbl")
+      checkTables("ns", "tbl")
+      sql(s"DROP TABLE IF EXISTS $catalog.ns.tbl")
+      checkTables("ns")
+
+      // It must not throw any exceptions
+      sql(s"DROP TABLE IF EXISTS $catalog.ns.tbl")
+      checkTables("ns")
+    }
+  }
+
+  test("SPARK-33174: DROP TABLE should resolve to a temporary view first") {
+    withNamespaceAndTable("ns", "t") { t =>
+      withTempView("t") {
+        sql(s"CREATE TABLE $t (id bigint) $defaultUsing")
+        sql("CREATE TEMPORARY VIEW t AS SELECT 2")
+        sql(s"USE $catalog.ns")
+        try {
+          // Check the temporary view 't' exists.
+          checkAnswer(
+            sql("SHOW TABLES FROM spark_catalog.default LIKE 't'")
+              .select("tableName", "isTemporary"),
+            Row("t", true))
+          sql("DROP TABLE t")
+          // Verify that the temporary view 't' is resolved first and dropped.
+          checkAnswer(
+            sql("SHOW TABLES FROM spark_catalog.default LIKE 't'")
+              .select("tableName", "isTemporary"),
+            Seq.empty)
+        } finally {
+          sql(s"USE spark_catalog")
+        }
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropTableSuite.scala
new file mode 100644
index 0000000000000..4a6956e9ad82d
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropTableSuite.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import org.apache.spark.sql.execution.command
+
+trait DropTableSuiteBase extends command.DropTableSuiteBase {
+  test("purge option") {
+    withNamespace(s"$catalog.ns") {
+      sql(s"CREATE NAMESPACE $catalog.ns")
+
+      createTable(s"$catalog.ns.tbl")
+      checkTables("ns", "tbl")
+
+      sql(s"DROP TABLE $catalog.ns.tbl PURGE")
+      checkTables("ns") // no tables
+    }
+  }
+}
+
+class DropTableSuite extends DropTableSuiteBase with CommandSuiteBase {
+  // The test fails in Hive External catalog with:
+  // org.apache.spark.sql.AnalysisException:
+  //   spark_catalog.ns.tbl is not a valid TableIdentifier as it has more than 2 name parts.
+  test("SPARK-33305: DROP TABLE should also invalidate cache") {
+    val t = s"$catalog.ns.tbl"
+    val view = "view"
+    withNamespace(s"$catalog.ns") {
+      sql(s"CREATE NAMESPACE $catalog.ns")
+      withTempView(view, "source") {
+        val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data")
+        df.createOrReplaceTempView("source")
+        sql(s"CREATE TABLE $t $defaultUsing AS SELECT id, data FROM source")
+        sql(s"CACHE TABLE $view AS SELECT id FROM $t")
+        checkAnswer(sql(s"SELECT * FROM $t"), spark.table("source").collect())
+        checkAnswer(
+          sql(s"SELECT * FROM $view"),
+          spark.table("source").select("id").collect())
+
+        assert(!spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty)
+        sql(s"DROP TABLE $t")
+        assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty)
+      }
+    }
+  }
+}
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala
new file mode 100644
index 0000000000000..a36df8df4dd06
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.connector.InMemoryTableSessionCatalog
+import org.apache.spark.sql.execution.command
+import org.apache.spark.sql.internal.SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION
+
+class DropTableSuite extends command.DropTableSuiteBase with CommandSuiteBase {
+  test("purge option") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      createTable(t)
+      val errMsg = intercept[UnsupportedOperationException] {
+        sql(s"DROP TABLE $catalog.ns.tbl PURGE")
+      }.getMessage
+      // The default TableCatalog.dropTable implementation doesn't support the purge option.
+      assert(errMsg.contains("Purge option is not supported"))
+    }
+  }
+
+  test("table qualified with the session catalog name") {
+    withSQLConf(
+      V2_SESSION_CATALOG_IMPLEMENTATION.key -> classOf[InMemoryTableSessionCatalog].getName) {
+
+      sql("CREATE TABLE tbl USING json AS SELECT 1 AS i")
+      checkAnswer(
+        sql("SHOW TABLES IN spark_catalog.default").select("tableName"),
+        Row("tbl"))
+
+      sql("DROP TABLE spark_catalog.default.tbl")
+      checkAnswer(
+        sql("SHOW TABLES IN spark_catalog.default").select("tableName"),
+        Seq.empty)
+    }
+  }
+
+  test("SPARK-33305: DROP TABLE should also invalidate cache") {
+    val t = s"$catalog.ns.tbl"
+    val view = "view"
+    withNamespace(s"$catalog.ns") {
+      sql(s"CREATE NAMESPACE $catalog.ns")
+      withTempView(view, "source") {
+        val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data")
+        df.createOrReplaceTempView("source")
+        sql(s"CREATE TABLE $t $defaultUsing AS SELECT id, data FROM source")
+        sql(s"CACHE TABLE $view AS SELECT id FROM $t")
+        checkAnswer(sql(s"SELECT * FROM $t"), spark.table("source").collect())
+        checkAnswer(
+          sql(s"SELECT * FROM $view"),
+          spark.table("source").select("id").collect())
+
+        assert(!spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty)
+        sql(s"DROP TABLE $t")
+        assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty)
+      }
+    }
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index e55b2d390a5d9..f13c8704f3b5b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -163,10 +163,6 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA
     testRenamePartitions(isDatasourceTable = false)
   }
 
-  test("drop table") {
-    testDropTable(isDatasourceTable = false)
-  }
-
   test("alter datasource table add columns - orc") {
     testAddColumn("orc")
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DropTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DropTableSuite.scala
new file mode 100644
index 0000000000000..b2a404d7206a6
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DropTableSuite.scala
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution.command
+
+import org.apache.spark.sql.execution.command.v1
+
+class DropTableSuite extends v1.DropTableSuiteBase with CommandSuiteBase

From 8d4d43319191ada0e07e3b27abe41929aa3eefe5 Mon Sep 17 00:00:00 2001
From: Jungtaek Lim <kabhwan.opensource@gmail.com>
Date: Mon, 21 Dec 2020 19:42:59 +0900
Subject: [PATCH 0829/1009] [SPARK-33836][SS][PYTHON] Expose
 DataStreamReader.table and DataStreamWriter.toTable

### What changes were proposed in this pull request?

This PR proposes to expose `DataStreamReader.table` (SPARK-32885) and `DataStreamWriter.toTable` (SPARK-32896) to PySpark, which are the only way to read and write with table in Structured Streaming.

### Why are the changes needed?

Please refer SPARK-32885 and SPARK-32896 for rationalizations of these public APIs. This PR only exposes them to PySpark.

### Does this PR introduce _any_ user-facing change?

Yes, PySpark users will be able to read and write with table in Structured Streaming query.

### How was this patch tested?

Manually tested.

> v1 table

>> create table A and ingest to the table A

```
spark.sql("""
create table table_pyspark_parquet (
    value long,
    `timestamp` timestamp
) USING parquet
""")
df = spark.readStream.format('rate').option('rowsPerSecond', 100).load()
query = df.writeStream.toTable('table_pyspark_parquet', checkpointLocation='/tmp/checkpoint5')
query.lastProgress
query.stop()
```

>> read table A and ingest to the table B which doesn't exist

```
df2 = spark.readStream.table('table_pyspark_parquet')
query2 = df2.writeStream.toTable('table_pyspark_parquet_nonexist', format='parquet', checkpointLocation='/tmp/checkpoint2')
query2.lastProgress
query2.stop()
```

>> select tables

```
spark.sql("DESCRIBE TABLE table_pyspark_parquet").show()
spark.sql("SELECT * FROM table_pyspark_parquet").show()

spark.sql("DESCRIBE TABLE table_pyspark_parquet_nonexist").show()
spark.sql("SELECT * FROM table_pyspark_parquet_nonexist").show()
```

> v2 table (leveraging Apache Iceberg as it provides V2 table and custom catalog as well)

>> create table A and ingest to the table A

```
spark.sql("""
create table iceberg_catalog.default.table_pyspark_v2table (
    value long,
    `timestamp` timestamp
) USING iceberg
""")
df = spark.readStream.format('rate').option('rowsPerSecond', 100).load()
query = df.select('value', 'timestamp').writeStream.toTable('iceberg_catalog.default.table_pyspark_v2table', checkpointLocation='/tmp/checkpoint_v2table_1')
query.lastProgress
query.stop()
```

>> ingest to the non-exist table B

```
df2 = spark.readStream.format('rate').option('rowsPerSecond', 100).load()
query2 = df2.select('value', 'timestamp').writeStream.toTable('iceberg_catalog.default.table_pyspark_v2table_nonexist', checkpointLocation='/tmp/checkpoint_v2table_2')
query2.lastProgress
query2.stop()
```

>> ingest to the non-exist table C partitioned by `value % 10`

```
df3 = spark.readStream.format('rate').option('rowsPerSecond', 100).load()
df3a = df3.selectExpr('value', 'timestamp', 'value % 10 AS partition').repartition('partition')
query3 = df3a.writeStream.partitionBy('partition').toTable('iceberg_catalog.default.table_pyspark_v2table_nonexist_partitioned', checkpointLocation='/tmp/checkpoint_v2table_3')
query3.lastProgress
query3.stop()
```

>> select tables

```
spark.sql("DESCRIBE TABLE iceberg_catalog.default.table_pyspark_v2table").show()
spark.sql("SELECT * FROM iceberg_catalog.default.table_pyspark_v2table").show()

spark.sql("DESCRIBE TABLE iceberg_catalog.default.table_pyspark_v2table_nonexist").show()
spark.sql("SELECT * FROM iceberg_catalog.default.table_pyspark_v2table_nonexist").show()

spark.sql("DESCRIBE TABLE iceberg_catalog.default.table_pyspark_v2table_nonexist_partitioned").show()
spark.sql("SELECT * FROM iceberg_catalog.default.table_pyspark_v2table_nonexist_partitioned").show()
```

Closes #30835 from HeartSaVioR/SPARK-33836.

Lead-authored-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
Co-authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/sql/streaming.py            | 105 ++++++++++++++++++++-
 python/pyspark/sql/streaming.pyi           |  10 ++
 python/pyspark/sql/tests/test_streaming.py |  26 +++++
 3 files changed, 139 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index 365b5f38694a7..2c9c1f06274ce 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -953,6 +953,36 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
         else:
             raise TypeError("path can be only a single string")
 
+    def table(self, tableName):
+        """Define a Streaming DataFrame on a Table. The DataSource corresponding to the table should
+        support streaming mode.
+
+        .. versionadded:: 3.1.0
+
+        Parameters
+        ----------
+        tableName : str
+            string, for the name of the table.
+
+        Returns
+        --------
+        :class:`DataFrame`
+
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
+        >>> csv_sdf = spark.readStream.table('input_table') # doctest: +SKIP
+        >>> csv_sdf.isStreaming # doctest: +SKIP
+        True
+        """
+        if isinstance(tableName, str):
+            return self._df(self._jreader.table(tableName))
+        else:
+            raise TypeError("tableName can be only a single string")
+
 
 class DataStreamWriter(object):
     """
@@ -987,7 +1017,7 @@ def outputMode(self, outputMode):
         * `append`: Only the new rows in the streaming DataFrame/Dataset will be written to
            the sink
         * `complete`: All the rows in the streaming DataFrame/Dataset will be written to the sink
-           every time these is some updates
+           every time these are some updates
         * `update`: only the rows that were updated in the streaming DataFrame/Dataset will be
            written to the sink every time there are some updates. If the query doesn't contain
            aggregations, it will be equivalent to `append` mode.
@@ -1416,7 +1446,7 @@ def start(self, path=None, format=None, outputMode=None, partitionBy=None, query
             * `append`: Only the new rows in the streaming DataFrame/Dataset will be written to the
               sink
             * `complete`: All the rows in the streaming DataFrame/Dataset will be written to the
-              sink every time these is some updates
+              sink every time these are some updates
             * `update`: only the rows that were updated in the streaming DataFrame/Dataset will be
               written to the sink every time there are some updates. If the query doesn't contain
               aggregations, it will be equivalent to `append` mode.
@@ -1464,6 +1494,77 @@ def start(self, path=None, format=None, outputMode=None, partitionBy=None, query
         else:
             return self._sq(self._jwrite.start(path))
 
+    def toTable(self, tableName, format=None, outputMode=None, partitionBy=None, queryName=None,
+                **options):
+        """
+        Starts the execution of the streaming query, which will continually output results to the
+        given table as new data arrives.
+
+        A new table will be created if the table not exists. The returned
+        :class:`StreamingQuery` object can be used to interact with the stream.
+
+        .. versionadded:: 3.1.0
+
+        Parameters
+        ----------
+        tableName : str
+            string, for the name of the table.
+        format : str, optional
+            the format used to save.
+        outputMode : str, optional
+            specifies how data of a streaming DataFrame/Dataset is written to a
+            streaming sink.
+
+            * `append`: Only the new rows in the streaming DataFrame/Dataset will be written to the
+              sink
+            * `complete`: All the rows in the streaming DataFrame/Dataset will be written to the
+              sink every time these are some updates
+            * `update`: only the rows that were updated in the streaming DataFrame/Dataset will be
+              written to the sink every time there are some updates. If the query doesn't contain
+              aggregations, it will be equivalent to `append` mode.
+        partitionBy : str or list, optional
+            names of partitioning columns
+        queryName : str, optional
+            unique name for the query
+        **options : dict
+            All other string options. You may want to provide a `checkpointLocation`.
+
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
+        >>> sq = sdf.writeStream.format('parquet').queryName('this_query').option(
+        ...      'checkpointLocation', '/tmp/checkpoint').toTable('output_table') # doctest: +SKIP
+        >>> sq.isActive # doctest: +SKIP
+        True
+        >>> sq.name # doctest: +SKIP
+        'this_query'
+        >>> sq.stop() # doctest: +SKIP
+        >>> sq.isActive # doctest: +SKIP
+        False
+        >>> sq = sdf.writeStream.trigger(processingTime='5 seconds').toTable(
+        ...     'output_table', queryName='that_query', outputMode="append", format='parquet',
+        ...     checkpointLocation='/tmp/checkpoint') # doctest: +SKIP
+        >>> sq.name # doctest: +SKIP
+        'that_query'
+        >>> sq.isActive # doctest: +SKIP
+        True
+        >>> sq.stop() # doctest: +SKIP
+        """
+        # TODO(SPARK-33659): document the current behavior for DataStreamWriter.toTable API
+        self.options(**options)
+        if outputMode is not None:
+            self.outputMode(outputMode)
+        if partitionBy is not None:
+            self.partitionBy(partitionBy)
+        if format is not None:
+            self.format(format)
+        if queryName is not None:
+            self.queryName(queryName)
+        return self._sq(self._jwrite.toTable(tableName))
+
 
 def _test():
     import doctest
diff --git a/python/pyspark/sql/streaming.pyi b/python/pyspark/sql/streaming.pyi
index 829610ad3b94b..1d05483c012f1 100644
--- a/python/pyspark/sql/streaming.pyi
+++ b/python/pyspark/sql/streaming.pyi
@@ -151,6 +151,7 @@ class DataStreamReader(OptionUtils):
         recursiveFileLookup: Optional[Union[bool, str]] = ...,
         unescapedQuoteHandling: Optional[str] = ...,
     ) -> DataFrame: ...
+    def table(self, tableName: str) -> DataFrame: ...
 
 class DataStreamWriter:
     def __init__(self, df: DataFrame) -> None: ...
@@ -185,3 +186,12 @@ class DataStreamWriter:
     def foreachBatch(
         self, func: Callable[[DataFrame, int], None]
     ) -> DataStreamWriter: ...
+    def toTable(
+        self,
+        tableName: str,
+        format: Optional[str] = ...,
+        outputMode: Optional[str] = ...,
+        partitionBy: Optional[Union[str, List[str]]] = ...,
+        queryName: Optional[str] = ...,
+        **options: OptionalPrimitiveType
+    ) -> StreamingQuery: ...
diff --git a/python/pyspark/sql/tests/test_streaming.py b/python/pyspark/sql/tests/test_streaming.py
index 28a50f9575a0a..44bfb2a7447ca 100644
--- a/python/pyspark/sql/tests/test_streaming.py
+++ b/python/pyspark/sql/tests/test_streaming.py
@@ -19,7 +19,9 @@
 import shutil
 import tempfile
 import time
+from random import randint
 
+from pyspark.sql import Row
 from pyspark.sql.functions import lit
 from pyspark.sql.types import StructType, StructField, IntegerType, StringType
 from pyspark.testing.sqlutils import ReusedSQLTestCase
@@ -569,6 +571,30 @@ def collectBatch(df, id):
             if q:
                 q.stop()
 
+    def test_streaming_read_from_table(self):
+        input_table_name = "sample_input_table_%d" % randint(0, 100000000)
+        self.spark.sql("CREATE TABLE %s (value string) USING parquet" % input_table_name)
+        self.spark.sql("INSERT INTO %s VALUES ('aaa'), ('bbb'), ('ccc')" % input_table_name)
+        df = self.spark.readStream.table(input_table_name)
+        self.assertTrue(df.isStreaming)
+        q = df.writeStream.format('memory').queryName('this_query').start()
+        q.processAllAvailable()
+        q.stop()
+        result = self.spark.sql("SELECT * FROM this_query ORDER BY value").collect()
+        self.assertEqual([Row(value='aaa'), Row(value='bbb'), Row(value='ccc')], result)
+
+    def test_streaming_write_to_table(self):
+        output_table_name = "sample_output_table_%d" % randint(0, 100000000)
+        tmpPath = tempfile.mkdtemp()
+        shutil.rmtree(tmpPath)
+        df = self.spark.readStream.format("rate").option("rowsPerSecond", 10).load()
+        q = df.writeStream.toTable(output_table_name, format='parquet', checkpointLocation=tmpPath)
+        self.assertTrue(q.isActive)
+        time.sleep(3)
+        q.stop()
+        result = self.spark.sql("SELECT value FROM %s" % output_table_name).collect()
+        self.assertTrue(len(result) > 0)
+
 
 if __name__ == "__main__":
     import unittest

From f4e1069bb835e3e132f7758e5842af79f26cd162 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Mon, 21 Dec 2020 03:29:00 -0800
Subject: [PATCH 0830/1009] [SPARK-33853][SQL] EXPLAIN CODEGEN and
 BenchmarkQueryTest don't show subquery code

### What changes were proposed in this pull request?

This PR fixes an issue that `EXPLAIN CODEGEN` and `BenchmarkQueryTest` don't show the corresponding code for subqueries.

The following example is about `EXPLAIN CODEGEN`.
```
spark.conf.set("spark.sql.adaptive.enabled", "false")
val df = spark.range(1, 100)
df.createTempView("df")
spark.sql("SELECT (SELECT min(id) AS v FROM df)").explain("CODEGEN")

scala> spark.sql("SELECT (SELECT min(id) AS v FROM df)").explain("CODEGEN")
Found 1 WholeStageCodegen subtrees.
== Subtree 1 / 1 (maxMethodCodeSize:55; maxConstantPoolSize:97(0.15% used); numInnerClasses:0) ==
*(1) Project [Subquery scalar-subquery#3, [id=#24] AS scalarsubquery()#5L]
:  +- Subquery scalar-subquery#3, [id=#24]
:     +- *(2) HashAggregate(keys=[], functions=[min(id#0L)], output=[v#2L])
:        +- Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#20]
:           +- *(1) HashAggregate(keys=[], functions=[partial_min(id#0L)], output=[min#8L])
:              +- *(1) Range (1, 100, step=1, splits=12)
+- *(1) Scan OneRowRelation[]

Generated code:
/* 001 */ public Object generate(Object[] references) {
/* 002 */   return new GeneratedIteratorForCodegenStage1(references);
/* 003 */ }
/* 004 */
/* 005 */ // codegenStageId=1
/* 006 */ final class GeneratedIteratorForCodegenStage1 extends org.apache.spark.sql.execution.BufferedRowIterator {
/* 007 */   private Object[] references;
/* 008 */   private scala.collection.Iterator[] inputs;
/* 009 */   private scala.collection.Iterator rdd_input_0;
/* 010 */   private org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[] project_mutableStateArray_0 = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[1];
/* 011 */
/* 012 */   public GeneratedIteratorForCodegenStage1(Object[] references) {
/* 013 */     this.references = references;
/* 014 */   }
/* 015 */
/* 016 */   public void init(int index, scala.collection.Iterator[] inputs) {
/* 017 */     partitionIndex = index;
/* 018 */     this.inputs = inputs;
/* 019 */     rdd_input_0 = inputs[0];
/* 020 */     project_mutableStateArray_0[0] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(1, 0);
/* 021 */
/* 022 */   }
/* 023 */
/* 024 */   private void project_doConsume_0() throws java.io.IOException {
/* 025 */     // common sub-expressions
/* 026 */
/* 027 */     project_mutableStateArray_0[0].reset();
/* 028 */
/* 029 */     if (false) {
/* 030 */       project_mutableStateArray_0[0].setNullAt(0);
/* 031 */     } else {
/* 032 */       project_mutableStateArray_0[0].write(0, 1L);
/* 033 */     }
/* 034 */     append((project_mutableStateArray_0[0].getRow()));
/* 035 */
/* 036 */   }
/* 037 */
/* 038 */   protected void processNext() throws java.io.IOException {
/* 039 */     while ( rdd_input_0.hasNext()) {
/* 040 */       InternalRow rdd_row_0 = (InternalRow) rdd_input_0.next();
/* 041 */       ((org.apache.spark.sql.execution.metric.SQLMetric) references[0] /* numOutputRows */).add(1);
/* 042 */       project_doConsume_0();
/* 043 */       if (shouldStop()) return;
/* 044 */     }
/* 045 */   }
/* 046 */
/* 047 */ }
```

After this change, the corresponding code for subqueries are shown.
```
Found 3 WholeStageCodegen subtrees.
== Subtree 1 / 3 (maxMethodCodeSize:282; maxConstantPoolSize:206(0.31% used); numInnerClasses:0) ==
*(1) HashAggregate(keys=[], functions=[partial_min(id#0L)], output=[min#8L])
+- *(1) Range (1, 100, step=1, splits=12)

Generated code:
/* 001 */ public Object generate(Object[] references) {
/* 002 */   return new GeneratedIteratorForCodegenStage1(references);
/* 003 */ }
/* 004 */
/* 005 */ // codegenStageId=1
/* 006 */ final class GeneratedIteratorForCodegenStage1 extends org.apache.spark.sql.execution.BufferedRowIterator {
/* 007 */   private Object[] references;
/* 008 */   private scala.collection.Iterator[] inputs;
/* 009 */   private boolean agg_initAgg_0;
/* 010 */   private boolean agg_bufIsNull_0;
/* 011 */   private long agg_bufValue_0;
/* 012 */   private boolean range_initRange_0;
/* 013 */   private long range_nextIndex_0;
/* 014 */   private TaskContext range_taskContext_0;
/* 015 */   private InputMetrics range_inputMetrics_0;
/* 016 */   private long range_batchEnd_0;
/* 017 */   private long range_numElementsTodo_0;
/* 018 */   private boolean agg_agg_isNull_2_0;
/* 019 */   private org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[] range_mutableStateArray_0 = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter[3];
/* 020 */
/* 021 */   public GeneratedIteratorForCodegenStage1(Object[] references) {
/* 022 */     this.references = references;
/* 023 */   }
/* 024 */
/* 025 */   public void init(int index, scala.collection.Iterator[] inputs) {
/* 026 */     partitionIndex = index;
/* 027 */     this.inputs = inputs;
/* 028 */
/* 029 */     range_taskContext_0 = TaskContext.get();
/* 030 */     range_inputMetrics_0 = range_taskContext_0.taskMetrics().inputMetrics();
/* 031 */     range_mutableStateArray_0[0] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(1, 0);
/* 032 */     range_mutableStateArray_0[1] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(1, 0);
/* 033 */     range_mutableStateArray_0[2] = new org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter(1, 0);
/* 034 */
/* 035 */   }
/* 036 */
/* 037 */   private void agg_doAggregateWithoutKey_0() throws java.io.IOException {
/* 038 */     // initialize aggregation buffer
/* 039 */     agg_bufIsNull_0 = true;
/* 040 */     agg_bufValue_0 = -1L;
/* 041 */
/* 042 */     // initialize Range
/* 043 */     if (!range_initRange_0) {
/* 044 */       range_initRange_0 = true;
/* 045 */       initRange(partitionIndex);
/* 046 */     }
/* 047 */
/* 048 */     while (true) {
/* 049 */       if (range_nextIndex_0 == range_batchEnd_0) {
/* 050 */         long range_nextBatchTodo_0;
/* 051 */         if (range_numElementsTodo_0 > 1000L) {
/* 052 */           range_nextBatchTodo_0 = 1000L;
/* 053 */           range_numElementsTodo_0 -= 1000L;
/* 054 */         } else {
/* 055 */           range_nextBatchTodo_0 = range_numElementsTodo_0;
/* 056 */           range_numElementsTodo_0 = 0;
/* 057 */           if (range_nextBatchTodo_0 == 0) break;
/* 058 */         }
/* 059 */         range_batchEnd_0 += range_nextBatchTodo_0 * 1L;
/* 060 */       }
/* 061 */
/* 062 */       int range_localEnd_0 = (int)((range_batchEnd_0 - range_nextIndex_0) / 1L);
/* 063 */       for (int range_localIdx_0 = 0; range_localIdx_0 < range_localEnd_0; range_localIdx_0++) {
/* 064 */         long range_value_0 = ((long)range_localIdx_0 * 1L) + range_nextIndex_0;
/* 065 */
/* 066 */         agg_doConsume_0(range_value_0);
/* 067 */
/* 068 */         // shouldStop check is eliminated
/* 069 */       }
/* 070 */       range_nextIndex_0 = range_batchEnd_0;
/* 071 */       ((org.apache.spark.sql.execution.metric.SQLMetric) references[0] /* numOutputRows */).add(range_localEnd_0);
/* 072 */       range_inputMetrics_0.incRecordsRead(range_localEnd_0);
/* 073 */       range_taskContext_0.killTaskIfInterrupted();
/* 074 */     }
/* 075 */
/* 076 */   }
/* 077 */
/* 078 */   private void initRange(int idx) {
/* 079 */     java.math.BigInteger index = java.math.BigInteger.valueOf(idx);
/* 080 */     java.math.BigInteger numSlice = java.math.BigInteger.valueOf(12L);
/* 081 */     java.math.BigInteger numElement = java.math.BigInteger.valueOf(99L);
/* 082 */     java.math.BigInteger step = java.math.BigInteger.valueOf(1L);
/* 083 */     java.math.BigInteger start = java.math.BigInteger.valueOf(1L);
/* 084 */     long partitionEnd;
/* 085 */
/* 086 */     java.math.BigInteger st = index.multiply(numElement).divide(numSlice).multiply(step).add(start);
/* 087 */     if (st.compareTo(java.math.BigInteger.valueOf(Long.MAX_VALUE)) > 0) {
/* 088 */       range_nextIndex_0 = Long.MAX_VALUE;
/* 089 */     } else if (st.compareTo(java.math.BigInteger.valueOf(Long.MIN_VALUE)) < 0) {
/* 090 */       range_nextIndex_0 = Long.MIN_VALUE;
/* 091 */     } else {
/* 092 */       range_nextIndex_0 = st.longValue();
/* 093 */     }
/* 094 */     range_batchEnd_0 = range_nextIndex_0;
/* 095 */
/* 096 */     java.math.BigInteger end = index.add(java.math.BigInteger.ONE).multiply(numElement).divide(numSlice)
/* 097 */     .multiply(step).add(start);
/* 098 */     if (end.compareTo(java.math.BigInteger.valueOf(Long.MAX_VALUE)) > 0) {
/* 099 */       partitionEnd = Long.MAX_VALUE;
/* 100 */     } else if (end.compareTo(java.math.BigInteger.valueOf(Long.MIN_VALUE)) < 0) {
/* 101 */       partitionEnd = Long.MIN_VALUE;
/* 102 */     } else {
/* 103 */       partitionEnd = end.longValue();
/* 104 */     }
/* 105 */
/* 106 */     java.math.BigInteger startToEnd = java.math.BigInteger.valueOf(partitionEnd).subtract(
/* 107 */       java.math.BigInteger.valueOf(range_nextIndex_0));
/* 108 */     range_numElementsTodo_0  = startToEnd.divide(step).longValue();
/* 109 */     if (range_numElementsTodo_0 < 0) {
/* 110 */       range_numElementsTodo_0 = 0;
/* 111 */     } else if (startToEnd.remainder(step).compareTo(java.math.BigInteger.valueOf(0L)) != 0) {
/* 112 */       range_numElementsTodo_0++;
/* 113 */     }
/* 114 */   }
/* 115 */
/* 116 */   private void agg_doConsume_0(long agg_expr_0_0) throws java.io.IOException {
/* 117 */     // do aggregate
/* 118 */     // common sub-expressions
/* 119 */
/* 120 */     // evaluate aggregate functions and update aggregation buffers
/* 121 */
/* 122 */     agg_agg_isNull_2_0 = true;
/* 123 */     long agg_value_2 = -1L;
/* 124 */
/* 125 */     if (!agg_bufIsNull_0 && (agg_agg_isNull_2_0 ||
/* 126 */         agg_value_2 > agg_bufValue_0)) {
/* 127 */       agg_agg_isNull_2_0 = false;
/* 128 */       agg_value_2 = agg_bufValue_0;
/* 129 */     }
/* 130 */
/* 131 */     if (!false && (agg_agg_isNull_2_0 ||
/* 132 */         agg_value_2 > agg_expr_0_0)) {
/* 133 */       agg_agg_isNull_2_0 = false;
/* 134 */       agg_value_2 = agg_expr_0_0;
/* 135 */     }
/* 136 */
/* 137 */     agg_bufIsNull_0 = agg_agg_isNull_2_0;
/* 138 */     agg_bufValue_0 = agg_value_2;
/* 139 */
/* 140 */   }
/* 141 */
/* 142 */   protected void processNext() throws java.io.IOException {
/* 143 */     while (!agg_initAgg_0) {
/* 144 */       agg_initAgg_0 = true;
/* 145 */       long agg_beforeAgg_0 = System.nanoTime();
/* 146 */       agg_doAggregateWithoutKey_0();
/* 147 */       ((org.apache.spark.sql.execution.metric.SQLMetric) references[2] /* aggTime */).add((System.nanoTime() - agg_beforeAgg_0) / 1000000);
/* 148 */
/* 149 */       // output the result
/* 150 */
/* 151 */       ((org.apache.spark.sql.execution.metric.SQLMetric) references[1] /* numOutputRows */).add(1);
/* 152 */       range_mutableStateArray_0[2].reset();
/* 153 */
/* 154 */       range_mutableStateArray_0[2].zeroOutNullBytes();
/* 155 */
/* 156 */       if (agg_bufIsNull_0) {
/* 157 */         range_mutableStateArray_0[2].setNullAt(0);
/* 158 */       } else {
/* 159 */         range_mutableStateArray_0[2].write(0, agg_bufValue_0);
/* 160 */       }
/* 161 */       append((range_mutableStateArray_0[2].getRow()));
/* 162 */     }
/* 163 */   }
/* 164 */
/* 165 */ }
```

### Why are the changes needed?

For better debuggability.

### Does this PR introduce _any_ user-facing change?

Yes. After this change, users can see subquery code by `EXPLAIN CODEGEN`.

### How was this patch tested?

New test.

Closes #30859 from sarutak/explain-codegen-subqueries.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/execution/debug/package.scala      | 15 ++++++++++-----
 .../apache/spark/sql/BenchmarkQueryTest.scala    | 14 ++++++++++----
 .../org/apache/spark/sql/ExplainSuite.scala      | 16 ++++++++++++++++
 3 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
index 6c40104e52a5f..3cbebca14f7dc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
@@ -107,12 +107,17 @@ package object debug {
    */
   def codegenStringSeq(plan: SparkPlan): Seq[(String, String, ByteCodeStats)] = {
     val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegenExec]()
-    plan transform {
-      case s: WholeStageCodegenExec =>
-        codegenSubtrees += s
-        s
-      case s => s
+
+    def findSubtrees(plan: SparkPlan): Unit = {
+      plan foreach {
+        case s: WholeStageCodegenExec =>
+          codegenSubtrees += s
+        case s =>
+          s.subqueries.foreach(findSubtrees)
+      }
     }
+
+    findSubtrees(plan)
     codegenSubtrees.toSeq.sortBy(_.codegenStageId).map { subtree =>
       val (_, source) = subtree.doCodeGen()
       val codeStats = try {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala
index 2c3b37a1498ec..d58bf2c6260b1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala
@@ -63,11 +63,17 @@ abstract class BenchmarkQueryTest extends QueryTest with SharedSparkSession {
 
   protected def checkGeneratedCode(plan: SparkPlan, checkMethodCodeSize: Boolean = true): Unit = {
     val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegenExec]()
-    plan foreach {
-      case s: WholeStageCodegenExec =>
-        codegenSubtrees += s
-      case _ =>
+
+    def findSubtrees(plan: SparkPlan): Unit = {
+      plan foreach {
+        case s: WholeStageCodegenExec =>
+          codegenSubtrees += s
+        case s =>
+          s.subqueries.foreach(findSubtrees)
+      }
     }
+
+    findSubtrees(plan)
     codegenSubtrees.toSeq.foreach { subtree =>
       val code = subtree.doCodeGen()._2
       val (_, ByteCodeStats(maxMethodCodeSize, _, _)) = try {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
index 8b7459fddb59a..bf100c0205efa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
@@ -228,6 +228,22 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
     }
   }
 
+  test("SPARK-33853: explain codegen - check presence of subquery") {
+    withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") {
+      withTempView("df") {
+        val df1 = spark.range(1, 100)
+        df1.createTempView("df")
+
+        val sqlText = "EXPLAIN CODEGEN SELECT (SELECT min(id) FROM df)"
+        val expectedText = "Found 3 WholeStageCodegen subtrees."
+
+        withNormalizedExplain(sqlText) { normalizedOutput =>
+          assert(normalizedOutput.contains(expectedText))
+        }
+      }
+    }
+  }
+
   test("explain formatted - check presence of subquery in case of DPP") {
     withTable("df1", "df2") {
       withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true",

From cdd1752ad1bbb03b817870e1ad6b1d9cbda734a1 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 21 Dec 2020 03:37:30 -0800
Subject: [PATCH 0831/1009] [SPARK-33862][SQL] Throw
 `PartitionAlreadyExistsException` if the target partition exists while
 renaming

### What changes were proposed in this pull request?
Throw `PartitionAlreadyExistsException` from `ALTER TABLE .. RENAME TO PARTITION` for a table from Hive V1 External Catalog in the case when the target partition already exists.

### Why are the changes needed?
1. To have the same behavior of V1 In-Memory and Hive External Catalog.
2. To not propagate internal Hive's exceptions to users.

### Does this PR introduce _any_ user-facing change?
Yes. After the changes, the partition renaming command throws `PartitionAlreadyExistsException` for tables from the Hive catalog.

### How was this patch tested?
Added new UT:
```
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *HiveCatalogedDDLSuite"
```

Closes #30866 from MaxGekk/throw-PartitionAlreadyExistsException.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/sql/execution/command/DDLSuite.scala | 8 +++++++-
 .../org/apache/spark/sql/hive/client/HiveClientImpl.scala | 5 ++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index f92a93d54b1cb..49184d0a2e0d0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.internal.config
 import org.apache.spark.internal.config.RDD_PARALLEL_LISTING_THRESHOLD
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, QualifiedTableName, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchDatabaseException, NoSuchFunctionException, NoSuchPartitionException, TempTableAlreadyExistsException}
+import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchDatabaseException, NoSuchFunctionException, NoSuchPartitionException, PartitionAlreadyExistsException, TempTableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.connector.catalog.SupportsNamespaces.PROP_OWNER
@@ -1635,6 +1635,12 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     sql("ALTER TABLE tab1 PARTITION (A='10', B='p') RENAME TO PARTITION (A='1', B='p')")
     assert(catalog.listPartitions(tableIdent).map(_.spec).toSet ==
       Set(Map("a" -> "1", "b" -> "p"), Map("a" -> "20", "b" -> "c"), Map("a" -> "3", "b" -> "p")))
+
+    // target partition already exists
+    val errMsg = intercept[PartitionAlreadyExistsException] {
+      sql("ALTER TABLE tab1 PARTITION (a='1', b='p') RENAME TO PARTITION (a='20', b='c')")
+    }.getMessage
+    assert(errMsg.contains("Partition already exists"))
   }
 
   protected def testChangeColumn(isDatasourceTable: Boolean): Unit = {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index e779a80f7c323..40bcdefbc351e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -49,7 +49,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.metrics.source.HiveCatalogMetrics
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchPartitionException, NoSuchPartitionsException, PartitionsAlreadyExistException}
+import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchPartitionException, NoSuchPartitionsException, PartitionAlreadyExistsException, PartitionsAlreadyExistException}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.Expression
@@ -665,6 +665,9 @@ private[hive] class HiveClientImpl(
     val catalogTable = getTable(db, table)
     val hiveTable = toHiveTable(catalogTable, Some(userName))
     specs.zip(newSpecs).foreach { case (oldSpec, newSpec) =>
+      if (client.getPartition(hiveTable, newSpec.asJava, false) != null) {
+        throw new PartitionAlreadyExistsException(db, table, newSpec)
+      }
       val hivePart = getPartitionOption(catalogTable, oldSpec)
         .map { p => toHivePartition(p.copy(spec = newSpec), hiveTable) }
         .getOrElse { throw new NoSuchPartitionException(db, table, oldSpec) }

From b4bea1aa8972cdfd8901757a0ed990a20fca620f Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 21 Dec 2020 20:59:33 +0900
Subject: [PATCH 0832/1009] [SPARK-28863][SQL][FOLLOWUP] Make sure optimized
 plan will not be re-analyzed

### What changes were proposed in this pull request?

It's a known issue that re-analyzing an optimized plan can lead to various issues. We made several attempts to avoid it from happening, but the current solution `AlreadyOptimized` is still not 100% safe, as people can inject catalyst rules to call analyzer directly.

This PR proposes a simpler and safer idea: we set the `analyzed` flag to true after optimization, and analyzer will skip processing plans whose `analyzed` flag is true.

### Why are the changes needed?

make the code simpler and safer

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

existing tests.

Closes #30777 from cloud-fan/ds.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  1 +
 .../plans/logical/AnalysisHelper.scala        |  7 +-
 .../sql/execution/AlreadyOptimized.scala      | 37 --------
 .../spark/sql/execution/QueryExecution.scala  |  7 +-
 .../datasources/v2/V1FallbackWriters.scala    |  7 +-
 .../sql/execution/AlreadyOptimizedSuite.scala | 85 -------------------
 6 files changed, 16 insertions(+), 128 deletions(-)
 delete mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/AlreadyOptimized.scala
 delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/AlreadyOptimizedSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 1a5f33443d8e3..8d8e00b80c506 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -168,6 +168,7 @@ class Analyzer(override val catalogManager: CatalogManager)
   }
 
   def executeAndCheck(plan: LogicalPlan, tracker: QueryPlanningTracker): LogicalPlan = {
+    if (plan.analyzed) return plan
     AnalysisHelper.markInAnalyzer {
       val analyzed = executeAndTrack(plan, tracker)
       try {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala
index 2c6a716a2ed48..ffd1f784e4670 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala
@@ -46,7 +46,7 @@ trait AnalysisHelper extends QueryPlan[LogicalPlan] { self: LogicalPlan =>
    * This should only be called by
    * [[org.apache.spark.sql.catalyst.analysis.CheckAnalysis]].
    */
-  private[catalyst] def setAnalyzed(): Unit = {
+  private[sql] def setAnalyzed(): Unit = {
     if (!_analyzed) {
       _analyzed = true
       children.foreach(_.setAnalyzed())
@@ -180,6 +180,11 @@ trait AnalysisHelper extends QueryPlan[LogicalPlan] { self: LogicalPlan =>
     super.transformAllExpressions(rule)
   }
 
+  override def clone(): LogicalPlan = {
+    val cloned = super.clone()
+    if (analyzed) cloned.setAnalyzed()
+    cloned
+  }
 }
 
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/AlreadyOptimized.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/AlreadyOptimized.scala
deleted file mode 100644
index e40b1141b43eb..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/AlreadyOptimized.scala
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution
-
-import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
-import org.apache.spark.sql.catalyst.encoders.RowEncoder
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-
-/** Query execution that skips re-analysis and optimize. */
-class AlreadyOptimizedExecution(
-    session: SparkSession,
-    plan: LogicalPlan) extends QueryExecution(session, plan) {
-  override lazy val analyzed: LogicalPlan = plan
-  override lazy val optimizedPlan: LogicalPlan = plan
-}
-
-object AlreadyOptimized {
-  def dataFrame(sparkSession: SparkSession, optimized: LogicalPlan): DataFrame = {
-    val qe = new AlreadyOptimizedExecution(sparkSession, optimized)
-    new Dataset[Row](qe, RowEncoder(qe.analyzed.schema))
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index 0531dd210e539..1d5a884d6e181 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -84,7 +84,12 @@ class QueryExecution(
   lazy val optimizedPlan: LogicalPlan = executePhase(QueryPlanningTracker.OPTIMIZATION) {
     // clone the plan to avoid sharing the plan instance between different stages like analyzing,
     // optimizing and planning.
-    sparkSession.sessionState.optimizer.executeAndTrack(withCachedData.clone(), tracker)
+    val plan = sparkSession.sessionState.optimizer.executeAndTrack(withCachedData.clone(), tracker)
+    // We do not want optimized plans to be re-analyzed as literals that have been constant folded
+    // and such can cause issues during analysis. While `clone` should maintain the `analyzed` state
+    // of the LogicalPlan, we set the plan as analyzed here as well out of paranoia.
+    plan.setAnalyzed()
+    plan
   }
 
   private def assertOptimized(): Unit = optimizedPlan
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala
index 9d2cea9fbaff3..080e977121efb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala
@@ -20,12 +20,13 @@ package org.apache.spark.sql.execution.datasources.v2
 import java.util.UUID
 
 import org.apache.spark.SparkException
+import org.apache.spark.sql.Dataset
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.connector.catalog.SupportsWrite
 import org.apache.spark.sql.connector.write.{LogicalWriteInfoImpl, SupportsOverwrite, SupportsTruncate, V1WriteBuilder, WriteBuilder}
-import org.apache.spark.sql.execution.{AlreadyOptimized, SparkPlan}
+import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.sources.{AlwaysTrue, Filter, InsertableRelation}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -118,9 +119,7 @@ trait SupportsV1Write extends SparkPlan {
   protected def writeWithV1(
       relation: InsertableRelation,
       refreshCache: () => Unit = () => ()): Seq[InternalRow] = {
-    val session = sqlContext.sparkSession
-    // The `plan` is already optimized, we should not analyze and optimize it again.
-    relation.insert(AlreadyOptimized.dataFrame(session, plan), overwrite = false)
+    relation.insert(Dataset.ofRows(sqlContext.sparkSession, plan), overwrite = false)
     refreshCache()
 
     Nil
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/AlreadyOptimizedSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/AlreadyOptimizedSuite.scala
deleted file mode 100644
index c266aa92f01cc..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/AlreadyOptimizedSuite.scala
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution
-
-import org.apache.spark.sql.QueryTest
-import org.apache.spark.sql.execution.adaptive.EnableAdaptiveExecutionSuite
-import org.apache.spark.sql.test.SharedSparkSession
-
-class AlreadyOptimizedSuite extends QueryTest with SharedSparkSession {
-
-  import testImplicits._
-
-  test("simple execution") {
-    val df = spark.range(10)
-    val planned = AlreadyOptimized.dataFrame(spark, df.queryExecution.optimizedPlan)
-
-    checkAnswer(planned, df.toDF().collect())
-  }
-
-  test("planning on top works - projection") {
-    val df = spark.range(10)
-    val planned = AlreadyOptimized.dataFrame(spark, df.queryExecution.optimizedPlan)
-
-    checkAnswer(
-      planned.withColumn("data", 'id + 1),
-      df.withColumn("data", 'id + 1).collect())
-  }
-
-  test("planning on top works - filter") {
-    val df = spark.range(10)
-    val planned = AlreadyOptimized.dataFrame(spark, df.queryExecution.optimizedPlan)
-
-    checkAnswer(planned.where('id < 5), df.where('id < 5).toDF().collect())
-  }
-
-  test("planning on top works - aggregate") {
-    val df = spark.range(10)
-    val planned = AlreadyOptimized.dataFrame(spark, df.queryExecution.optimizedPlan)
-
-    checkAnswer(planned.groupBy('id).count(), df.groupBy('id).count().collect())
-  }
-
-  test("planning on top works - joins") {
-    val df = spark.range(10)
-    val planned = AlreadyOptimized.dataFrame(spark, df.queryExecution.optimizedPlan)
-
-    val plannedLeft = planned.alias("l")
-    val dfLeft = df.alias("l")
-    val plannedRight = planned.alias("r")
-    val dfRight = df.alias("r")
-
-    checkAnswer(
-      plannedLeft.where('id < 3).join(plannedRight, Seq("id")),
-      dfLeft.where('id < 3).join(dfRight, Seq("id")).collect())
-
-    checkAnswer(
-      plannedLeft.where('id < 3).join(plannedRight, plannedLeft("id") === plannedRight("id")),
-      dfLeft.where('id < 3).join(dfRight, dfLeft("id") === dfRight("id")).collect())
-
-    checkAnswer(
-      plannedLeft.join(plannedRight, Seq("id")).where('id < 3),
-      dfLeft.join(dfRight, Seq("id")).where('id < 3).collect())
-
-    checkAnswer(
-      plannedLeft.join(plannedRight, plannedLeft("id") === plannedRight("id")).where($"l.id" < 3),
-      dfLeft.join(dfRight, dfLeft("id") === dfRight("id")).where($"l.id" < 3).collect())
-  }
-}
-
-class AlreadyOptimizedAQESuite extends AlreadyOptimizedSuite with EnableAdaptiveExecutionSuite

From 4b19f49dd01168c006bc5d8a506a1ef3c36c721d Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Mon, 21 Dec 2020 04:15:29 -0800
Subject: [PATCH 0833/1009] [SPARK-33845][SQL] Remove unnecessary if when
 trueValue and falseValue are foldable boolean types

### What changes were proposed in this pull request?

Improve `SimplifyConditionals`.
   Simplify `If(cond, TrueLiteral, FalseLiteral)` to `cond`.
   Simplify `If(cond, FalseLiteral, TrueLiteral)` to `Not(cond)`.

The use case is:
```sql
create table t1 using parquet as select id from range(10);
select if (id > 2, false, true) from t1;
```
Before this pr:
```
== Physical Plan ==
*(1) Project [if ((id#1L > 2)) false else true AS (IF((id > CAST(2 AS BIGINT)), false, true))#2]
+- *(1) ColumnarToRow
   +- FileScan parquet default.t1[id#1L] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex[file:/Users/yumwang/opensource/spark/spark-warehouse/org.apache.spark.sql.DataF..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:bigint>
```
After this pr:
```
== Physical Plan ==
*(1) Project [(id#1L <= 2) AS (IF((id > CAST(2 AS BIGINT)), false, true))#2]
+- *(1) ColumnarToRow
   +- FileScan parquet default.t1[id#1L] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex[file:/Users/yumwang/opensource/spark/spark-warehouse/org.apache.spark.sql.DataF..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:bigint>
```

### Why are the changes needed?

Improve query performance.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #30849 from wangyum/SPARK-33798-2.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/catalyst/optimizer/expressions.scala  |  2 ++
 .../PushFoldableIntoBranchesSuite.scala       |  7 ++---
 ...ReplaceNullWithFalseInPredicateSuite.scala | 31 +++++++++++--------
 .../optimizer/SimplifyConditionalSuite.scala  | 16 ++++++++++
 4 files changed, 39 insertions(+), 17 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index e6730c9275a1e..ac2caaeb15357 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -475,6 +475,8 @@ object SimplifyConditionals extends Rule[LogicalPlan] with PredicateHelper {
       case If(TrueLiteral, trueValue, _) => trueValue
       case If(FalseLiteral, _, falseValue) => falseValue
       case If(Literal(null, _), _, falseValue) => falseValue
+      case If(cond, TrueLiteral, FalseLiteral) => cond
+      case If(cond, FalseLiteral, TrueLiteral) => Not(cond)
       case If(cond, trueValue, falseValue)
         if cond.deterministic && trueValue.semanticEquals(falseValue) => trueValue
       case If(cond, l @ Literal(null, _), FalseLiteral) if !cond.nullable => And(cond, l)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala
index 43360af46ffb3..de4f4be8ec333 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala
@@ -53,7 +53,7 @@ class PushFoldableIntoBranchesSuite
 
   test("Push down EqualTo through If") {
     assertEquivalent(EqualTo(ifExp, Literal(4)), FalseLiteral)
-    assertEquivalent(EqualTo(ifExp, Literal(3)), If(a, FalseLiteral, TrueLiteral))
+    assertEquivalent(EqualTo(ifExp, Literal(3)), Not(a))
 
     // Push down at most one not foldable expressions.
     assertEquivalent(
@@ -67,7 +67,7 @@ class PushFoldableIntoBranchesSuite
     val nonDeterministic = If(LessThan(Rand(1), Literal(0.5)), Literal(1), Literal(2))
     assert(!nonDeterministic.deterministic)
     assertEquivalent(EqualTo(nonDeterministic, Literal(2)),
-      If(LessThan(Rand(1), Literal(0.5)), FalseLiteral, TrueLiteral))
+      GreaterThanOrEqual(Rand(1), Literal(0.5)))
     assertEquivalent(EqualTo(nonDeterministic, Literal(3)),
       If(LessThan(Rand(1), Literal(0.5)), FalseLiteral, FalseLiteral))
 
@@ -102,8 +102,7 @@ class PushFoldableIntoBranchesSuite
     assertEquivalent(Remainder(ifExp, Literal(4)), If(a, Literal(2), Literal(3)))
     assertEquivalent(Divide(If(a, Literal(2.0), Literal(3.0)), Literal(1.0)),
       If(a, Literal(2.0), Literal(3.0)))
-    assertEquivalent(And(If(a, FalseLiteral, TrueLiteral), TrueLiteral),
-      If(a, FalseLiteral, TrueLiteral))
+    assertEquivalent(And(If(a, FalseLiteral, TrueLiteral), TrueLiteral), Not(a))
     assertEquivalent(Or(If(a, FalseLiteral, TrueLiteral), TrueLiteral), TrueLiteral)
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
index 00433a5490574..5da71c31e1990 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.{And, ArrayExists, ArrayFilter, ArrayTransform, CaseWhen, Expression, GreaterThan, If, LambdaFunction, Literal, MapFilter, NamedExpression, Or, UnresolvedNamedLambdaVariable}
+import org.apache.spark.sql.catalyst.expressions.{And, ArrayExists, ArrayFilter, ArrayTransform, CaseWhen, Expression, GreaterThan, If, LambdaFunction, LessThanOrEqual, Literal, MapFilter, NamedExpression, Or, UnresolvedNamedLambdaVariable}
 import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
 import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest}
 import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, LocalRelation, LogicalPlan, UpdateTable}
@@ -236,12 +236,13 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
       Literal(2) === nestedCaseWhen,
       TrueLiteral,
       FalseLiteral)
-    val branches = Seq((UnresolvedAttribute("i") > Literal(10)) -> branchValue)
-    val condition = CaseWhen(branches)
-    testFilter(originalCond = condition, expectedCond = condition)
-    testJoin(originalCond = condition, expectedCond = condition)
-    testDelete(originalCond = condition, expectedCond = condition)
-    testUpdate(originalCond = condition, expectedCond = condition)
+    val condition = CaseWhen(Seq((UnresolvedAttribute("i") > Literal(10)) -> branchValue))
+    val expectedCond =
+      CaseWhen(Seq((UnresolvedAttribute("i") > Literal(10)) -> (Literal(2) === nestedCaseWhen)))
+    testFilter(originalCond = condition, expectedCond = expectedCond)
+    testJoin(originalCond = condition, expectedCond = expectedCond)
+    testDelete(originalCond = condition, expectedCond = expectedCond)
+    testUpdate(originalCond = condition, expectedCond = expectedCond)
   }
 
   test("inability to replace null in non-boolean branches of If inside another If") {
@@ -252,10 +253,14 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
         Literal(3)),
       TrueLiteral,
       FalseLiteral)
-    testFilter(originalCond = condition, expectedCond = condition)
-    testJoin(originalCond = condition, expectedCond = condition)
-    testDelete(originalCond = condition, expectedCond = condition)
-    testUpdate(originalCond = condition, expectedCond = condition)
+    val expectedCond = Literal(5) > If(
+      UnresolvedAttribute("i") === Literal(15),
+      Literal(null, IntegerType),
+      Literal(3))
+    testFilter(originalCond = condition, expectedCond = expectedCond)
+    testJoin(originalCond = condition, expectedCond = expectedCond)
+    testDelete(originalCond = condition, expectedCond = expectedCond)
+    testUpdate(originalCond = condition, expectedCond = expectedCond)
   }
 
   test("replace null in If used as a join condition") {
@@ -405,9 +410,9 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     val lambda1 = LambdaFunction(
       function = If(cond, Literal(null, BooleanType), TrueLiteral),
       arguments = lambdaArgs)
-    // the optimized lambda body is: if(arg > 0, false, true)
+    // the optimized lambda body is: if(arg > 0, false, true) => arg <= 0
     val lambda2 = LambdaFunction(
-      function = If(cond, FalseLiteral, TrueLiteral),
+      function = LessThanOrEqual(condArg, Literal(0)),
       arguments = lambdaArgs)
     testProjection(
       originalExpr = createExpr(argument, lambda1) as 'x,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala
index bac962ced4618..328fc107e1c1b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala
@@ -199,4 +199,20 @@ class SimplifyConditionalSuite extends PlanTest with ExpressionEvalHelper with P
         If(Factorial(5) > 100L, b, nullLiteral).eval(EmptyRow))
     }
   }
+
+  test("SPARK-33845: remove unnecessary if when the outputs are boolean type") {
+    assertEquivalent(
+      If(IsNotNull(UnresolvedAttribute("a")), TrueLiteral, FalseLiteral),
+      IsNotNull(UnresolvedAttribute("a")))
+    assertEquivalent(
+      If(IsNotNull(UnresolvedAttribute("a")), FalseLiteral, TrueLiteral),
+      IsNull(UnresolvedAttribute("a")))
+
+    assertEquivalent(
+      If(GreaterThan(Rand(0), UnresolvedAttribute("a")), TrueLiteral, FalseLiteral),
+      GreaterThan(Rand(0), UnresolvedAttribute("a")))
+    assertEquivalent(
+      If(GreaterThan(Rand(0), UnresolvedAttribute("a")), FalseLiteral, TrueLiteral),
+      LessThanOrEqual(Rand(0), UnresolvedAttribute("a")))
+  }
 }

From 69aa727ff495f6698fe9b37e952dfaf36f1dd5eb Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Mon, 21 Dec 2020 04:24:04 -0800
Subject: [PATCH 0834/1009] [SPARK-33124][SQL] Fills missing group tags and
 re-categorizes all the group tags for built-in functions

### What changes were proposed in this pull request?

This PR proposes to fill missing group tags and re-categorize all the group tags for built-in functions.
New groups below are added in this PR:
 - binary_funcs
 - bitwise_funcs
 - collection_funcs
 - predicate_funcs
 - conditional_funcs
 - conversion_funcs
 - csv_funcs
 - generator_funcs
 - hash_funcs
 - lambda_funcs
 - math_funcs
 - misc_funcs
 - string_funcs
 - struct_funcs
 - xml_funcs

A basic policy to re-categorize functions is that functions in the same file are categorized into the same group. For example, all the functions in `hash.scala` are categorized into `hash_funcs`. But, there are some exceptional/ambiguous cases when categorizing them. Here are some special notes:
 - All the aggregate functions are categorized into `agg_funcs`.
 - `array_funcs` and `map_funcs` are  sub-groups of `collection_funcs`. For example, `array_contains` is used only for arrays, so it is assigned to `array_funcs`. On the other hand, `reverse` is used for both arrays and strings, so it is assigned to `collection_funcs`.
 - Some functions logically belong to multiple groups. In this case, these functions are categorized based on the file that they belong to. For example, `schema_of_csv` can be grouped into both `csv_funcs` and `struct_funcs` in terms of input types, but it is assigned to `csv_funcs` because it belongs to the `csvExpressions.scala` file that holds the other CSV-related functions.
 - Functions in `nullExpressions.scala`, `complexTypeCreator.scala`, `randomExpressions.scala`, and `regexExpressions.scala` are categorized based on their functionalities. For example:
   - `isnull` in `nullExpressions`  is assigned to `predicate_funcs` because this is a predicate function.
   - `array` in `complexTypeCreator.scala` is assigned to `array_funcs`based on its output type (The other functions in `array_funcs` are categorized based on their input types though).

A category list (after this PR) is as follows (the list below includes the exprs that already have a group tag in the current master):
|group|name|class|
|-----|----|-----|
|agg_funcs|any|org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr|
|agg_funcs|approx_count_distinct|org.apache.spark.sql.catalyst.expressions.aggregate.HyperLogLogPlusPlus|
|agg_funcs|approx_percentile|org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile|
|agg_funcs|avg|org.apache.spark.sql.catalyst.expressions.aggregate.Average|
|agg_funcs|bit_and|org.apache.spark.sql.catalyst.expressions.aggregate.BitAndAgg|
|agg_funcs|bit_or|org.apache.spark.sql.catalyst.expressions.aggregate.BitOrAgg|
|agg_funcs|bit_xor|org.apache.spark.sql.catalyst.expressions.aggregate.BitXorAgg|
|agg_funcs|bool_and|org.apache.spark.sql.catalyst.expressions.aggregate.BoolAnd|
|agg_funcs|bool_or|org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr|
|agg_funcs|collect_list|org.apache.spark.sql.catalyst.expressions.aggregate.CollectList|
|agg_funcs|collect_set|org.apache.spark.sql.catalyst.expressions.aggregate.CollectSet|
|agg_funcs|corr|org.apache.spark.sql.catalyst.expressions.aggregate.Corr|
|agg_funcs|count_if|org.apache.spark.sql.catalyst.expressions.aggregate.CountIf|
|agg_funcs|count_min_sketch|org.apache.spark.sql.catalyst.expressions.aggregate.CountMinSketchAgg|
|agg_funcs|count|org.apache.spark.sql.catalyst.expressions.aggregate.Count|
|agg_funcs|covar_pop|org.apache.spark.sql.catalyst.expressions.aggregate.CovPopulation|
|agg_funcs|covar_samp|org.apache.spark.sql.catalyst.expressions.aggregate.CovSample|
|agg_funcs|cube|org.apache.spark.sql.catalyst.expressions.Cube|
|agg_funcs|every|org.apache.spark.sql.catalyst.expressions.aggregate.BoolAnd|
|agg_funcs|first_value|org.apache.spark.sql.catalyst.expressions.aggregate.First|
|agg_funcs|first|org.apache.spark.sql.catalyst.expressions.aggregate.First|
|agg_funcs|grouping_id|org.apache.spark.sql.catalyst.expressions.GroupingID|
|agg_funcs|grouping|org.apache.spark.sql.catalyst.expressions.Grouping|
|agg_funcs|kurtosis|org.apache.spark.sql.catalyst.expressions.aggregate.Kurtosis|
|agg_funcs|last_value|org.apache.spark.sql.catalyst.expressions.aggregate.Last|
|agg_funcs|last|org.apache.spark.sql.catalyst.expressions.aggregate.Last|
|agg_funcs|max_by|org.apache.spark.sql.catalyst.expressions.aggregate.MaxBy|
|agg_funcs|max|org.apache.spark.sql.catalyst.expressions.aggregate.Max|
|agg_funcs|mean|org.apache.spark.sql.catalyst.expressions.aggregate.Average|
|agg_funcs|min_by|org.apache.spark.sql.catalyst.expressions.aggregate.MinBy|
|agg_funcs|min|org.apache.spark.sql.catalyst.expressions.aggregate.Min|
|agg_funcs|percentile_approx|org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile|
|agg_funcs|percentile|org.apache.spark.sql.catalyst.expressions.aggregate.Percentile|
|agg_funcs|rollup|org.apache.spark.sql.catalyst.expressions.Rollup|
|agg_funcs|skewness|org.apache.spark.sql.catalyst.expressions.aggregate.Skewness|
|agg_funcs|some|org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr|
|agg_funcs|stddev_pop|org.apache.spark.sql.catalyst.expressions.aggregate.StddevPop|
|agg_funcs|stddev_samp|org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp|
|agg_funcs|stddev|org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp|
|agg_funcs|std|org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp|
|agg_funcs|sum|org.apache.spark.sql.catalyst.expressions.aggregate.Sum|
|agg_funcs|var_pop|org.apache.spark.sql.catalyst.expressions.aggregate.VariancePop|
|agg_funcs|var_samp|org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp|
|agg_funcs|variance|org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp|
|array_funcs|array_contains|org.apache.spark.sql.catalyst.expressions.ArrayContains|
|array_funcs|array_distinct|org.apache.spark.sql.catalyst.expressions.ArrayDistinct|
|array_funcs|array_except|org.apache.spark.sql.catalyst.expressions.ArrayExcept|
|array_funcs|array_intersect|org.apache.spark.sql.catalyst.expressions.ArrayIntersect|
|array_funcs|array_join|org.apache.spark.sql.catalyst.expressions.ArrayJoin|
|array_funcs|array_max|org.apache.spark.sql.catalyst.expressions.ArrayMax|
|array_funcs|array_min|org.apache.spark.sql.catalyst.expressions.ArrayMin|
|array_funcs|array_position|org.apache.spark.sql.catalyst.expressions.ArrayPosition|
|array_funcs|array_remove|org.apache.spark.sql.catalyst.expressions.ArrayRemove|
|array_funcs|array_repeat|org.apache.spark.sql.catalyst.expressions.ArrayRepeat|
|array_funcs|array_union|org.apache.spark.sql.catalyst.expressions.ArrayUnion|
|array_funcs|arrays_overlap|org.apache.spark.sql.catalyst.expressions.ArraysOverlap|
|array_funcs|arrays_zip|org.apache.spark.sql.catalyst.expressions.ArraysZip|
|array_funcs|array|org.apache.spark.sql.catalyst.expressions.CreateArray|
|array_funcs|flatten|org.apache.spark.sql.catalyst.expressions.Flatten|
|array_funcs|sequence|org.apache.spark.sql.catalyst.expressions.Sequence|
|array_funcs|shuffle|org.apache.spark.sql.catalyst.expressions.Shuffle|
|array_funcs|slice|org.apache.spark.sql.catalyst.expressions.Slice|
|array_funcs|sort_array|org.apache.spark.sql.catalyst.expressions.SortArray|
|bitwise_funcs|&|org.apache.spark.sql.catalyst.expressions.BitwiseAnd|
|bitwise_funcs|^|org.apache.spark.sql.catalyst.expressions.BitwiseXor|
|bitwise_funcs|bit_count|org.apache.spark.sql.catalyst.expressions.BitwiseCount|
|bitwise_funcs|shiftrightunsigned|org.apache.spark.sql.catalyst.expressions.ShiftRightUnsigned|
|bitwise_funcs|shiftright|org.apache.spark.sql.catalyst.expressions.ShiftRight|
|bitwise_funcs|~|org.apache.spark.sql.catalyst.expressions.BitwiseNot|
|collection_funcs|cardinality|org.apache.spark.sql.catalyst.expressions.Size|
|collection_funcs|concat|org.apache.spark.sql.catalyst.expressions.Concat|
|collection_funcs|reverse|org.apache.spark.sql.catalyst.expressions.Reverse|
|collection_funcs|size|org.apache.spark.sql.catalyst.expressions.Size|
|conditional_funcs|coalesce|org.apache.spark.sql.catalyst.expressions.Coalesce|
|conditional_funcs|ifnull|org.apache.spark.sql.catalyst.expressions.IfNull|
|conditional_funcs|if|org.apache.spark.sql.catalyst.expressions.If|
|conditional_funcs|nanvl|org.apache.spark.sql.catalyst.expressions.NaNvl|
|conditional_funcs|nullif|org.apache.spark.sql.catalyst.expressions.NullIf|
|conditional_funcs|nvl2|org.apache.spark.sql.catalyst.expressions.Nvl2|
|conditional_funcs|nvl|org.apache.spark.sql.catalyst.expressions.Nvl|
|conditional_funcs|when|org.apache.spark.sql.catalyst.expressions.CaseWhen|
|conversion_funcs|bigint|org.apache.spark.sql.catalyst.expressions.Cast|
|conversion_funcs|binary|org.apache.spark.sql.catalyst.expressions.Cast|
|conversion_funcs|boolean|org.apache.spark.sql.catalyst.expressions.Cast|
|conversion_funcs|cast|org.apache.spark.sql.catalyst.expressions.Cast|
|conversion_funcs|date|org.apache.spark.sql.catalyst.expressions.Cast|
|conversion_funcs|decimal|org.apache.spark.sql.catalyst.expressions.Cast|
|conversion_funcs|double|org.apache.spark.sql.catalyst.expressions.Cast|
|conversion_funcs|float|org.apache.spark.sql.catalyst.expressions.Cast|
|conversion_funcs|int|org.apache.spark.sql.catalyst.expressions.Cast|
|conversion_funcs|smallint|org.apache.spark.sql.catalyst.expressions.Cast|
|conversion_funcs|string|org.apache.spark.sql.catalyst.expressions.Cast|
|conversion_funcs|timestamp|org.apache.spark.sql.catalyst.expressions.Cast|
|conversion_funcs|tinyint|org.apache.spark.sql.catalyst.expressions.Cast|
|csv_funcs|from_csv|org.apache.spark.sql.catalyst.expressions.CsvToStructs|
|csv_funcs|schema_of_csv|org.apache.spark.sql.catalyst.expressions.SchemaOfCsv|
|csv_funcs|to_csv|org.apache.spark.sql.catalyst.expressions.StructsToCsv|
|datetime_funcs|add_months|org.apache.spark.sql.catalyst.expressions.AddMonths|
|datetime_funcs|current_date|org.apache.spark.sql.catalyst.expressions.CurrentDate|
|datetime_funcs|current_timestamp|org.apache.spark.sql.catalyst.expressions.CurrentTimestamp|
|datetime_funcs|current_timezone|org.apache.spark.sql.catalyst.expressions.CurrentTimeZone|
|datetime_funcs|date_add|org.apache.spark.sql.catalyst.expressions.DateAdd|
|datetime_funcs|date_format|org.apache.spark.sql.catalyst.expressions.DateFormatClass|
|datetime_funcs|date_from_unix_date|org.apache.spark.sql.catalyst.expressions.DateFromUnixDate|
|datetime_funcs|date_part|org.apache.spark.sql.catalyst.expressions.DatePart|
|datetime_funcs|date_sub|org.apache.spark.sql.catalyst.expressions.DateSub|
|datetime_funcs|date_trunc|org.apache.spark.sql.catalyst.expressions.TruncTimestamp|
|datetime_funcs|datediff|org.apache.spark.sql.catalyst.expressions.DateDiff|
|datetime_funcs|dayofmonth|org.apache.spark.sql.catalyst.expressions.DayOfMonth|
|datetime_funcs|dayofweek|org.apache.spark.sql.catalyst.expressions.DayOfWeek|
|datetime_funcs|dayofyear|org.apache.spark.sql.catalyst.expressions.DayOfYear|
|datetime_funcs|day|org.apache.spark.sql.catalyst.expressions.DayOfMonth|
|datetime_funcs|extract|org.apache.spark.sql.catalyst.expressions.Extract|
|datetime_funcs|from_unixtime|org.apache.spark.sql.catalyst.expressions.FromUnixTime|
|datetime_funcs|from_utc_timestamp|org.apache.spark.sql.catalyst.expressions.FromUTCTimestamp|
|datetime_funcs|hour|org.apache.spark.sql.catalyst.expressions.Hour|
|datetime_funcs|last_day|org.apache.spark.sql.catalyst.expressions.LastDay|
|datetime_funcs|make_date|org.apache.spark.sql.catalyst.expressions.MakeDate|
|datetime_funcs|make_interval|org.apache.spark.sql.catalyst.expressions.MakeInterval|
|datetime_funcs|make_timestamp|org.apache.spark.sql.catalyst.expressions.MakeTimestamp|
|datetime_funcs|minute|org.apache.spark.sql.catalyst.expressions.Minute|
|datetime_funcs|months_between|org.apache.spark.sql.catalyst.expressions.MonthsBetween|
|datetime_funcs|month|org.apache.spark.sql.catalyst.expressions.Month|
|datetime_funcs|next_day|org.apache.spark.sql.catalyst.expressions.NextDay|
|datetime_funcs|now|org.apache.spark.sql.catalyst.expressions.Now|
|datetime_funcs|quarter|org.apache.spark.sql.catalyst.expressions.Quarter|
|datetime_funcs|second|org.apache.spark.sql.catalyst.expressions.Second|
|datetime_funcs|timestamp_micros|org.apache.spark.sql.catalyst.expressions.MicrosToTimestamp|
|datetime_funcs|timestamp_millis|org.apache.spark.sql.catalyst.expressions.MillisToTimestamp|
|datetime_funcs|timestamp_seconds|org.apache.spark.sql.catalyst.expressions.SecondsToTimestamp|
|datetime_funcs|to_date|org.apache.spark.sql.catalyst.expressions.ParseToDate|
|datetime_funcs|to_timestamp|org.apache.spark.sql.catalyst.expressions.ParseToTimestamp|
|datetime_funcs|to_unix_timestamp|org.apache.spark.sql.catalyst.expressions.ToUnixTimestamp|
|datetime_funcs|to_utc_timestamp|org.apache.spark.sql.catalyst.expressions.ToUTCTimestamp|
|datetime_funcs|trunc|org.apache.spark.sql.catalyst.expressions.TruncDate|
|datetime_funcs|unix_date|org.apache.spark.sql.catalyst.expressions.UnixDate|
|datetime_funcs|unix_micros|org.apache.spark.sql.catalyst.expressions.UnixMicros|
|datetime_funcs|unix_millis|org.apache.spark.sql.catalyst.expressions.UnixMillis|
|datetime_funcs|unix_seconds|org.apache.spark.sql.catalyst.expressions.UnixSeconds|
|datetime_funcs|unix_timestamp|org.apache.spark.sql.catalyst.expressions.UnixTimestamp|
|datetime_funcs|weekday|org.apache.spark.sql.catalyst.expressions.WeekDay|
|datetime_funcs|weekofyear|org.apache.spark.sql.catalyst.expressions.WeekOfYear|
|datetime_funcs|year|org.apache.spark.sql.catalyst.expressions.Year|
|generator_funcs|explode_outer|org.apache.spark.sql.catalyst.expressions.Explode|
|generator_funcs|explode|org.apache.spark.sql.catalyst.expressions.Explode|
|generator_funcs|inline_outer|org.apache.spark.sql.catalyst.expressions.Inline|
|generator_funcs|inline|org.apache.spark.sql.catalyst.expressions.Inline|
|generator_funcs|posexplode_outer|org.apache.spark.sql.catalyst.expressions.PosExplode|
|generator_funcs|posexplode|org.apache.spark.sql.catalyst.expressions.PosExplode|
|generator_funcs|stack|org.apache.spark.sql.catalyst.expressions.Stack|
|hash_funcs|crc32|org.apache.spark.sql.catalyst.expressions.Crc32|
|hash_funcs|hash|org.apache.spark.sql.catalyst.expressions.Murmur3Hash|
|hash_funcs|md5|org.apache.spark.sql.catalyst.expressions.Md5|
|hash_funcs|sha1|org.apache.spark.sql.catalyst.expressions.Sha1|
|hash_funcs|sha2|org.apache.spark.sql.catalyst.expressions.Sha2|
|hash_funcs|sha|org.apache.spark.sql.catalyst.expressions.Sha1|
|hash_funcs|xxhash64|org.apache.spark.sql.catalyst.expressions.XxHash64|
|json_funcs|from_json|org.apache.spark.sql.catalyst.expressions.JsonToStructs|
|json_funcs|get_json_object|org.apache.spark.sql.catalyst.expressions.GetJsonObject|
|json_funcs|json_array_length|org.apache.spark.sql.catalyst.expressions.LengthOfJsonArray|
|json_funcs|json_object_keys|org.apache.spark.sql.catalyst.expressions.JsonObjectKeys|
|json_funcs|json_tuple|org.apache.spark.sql.catalyst.expressions.JsonTuple|
|json_funcs|schema_of_json|org.apache.spark.sql.catalyst.expressions.SchemaOfJson|
|json_funcs|to_json|org.apache.spark.sql.catalyst.expressions.StructsToJson|
|lambda_funcs|aggregate|org.apache.spark.sql.catalyst.expressions.ArrayAggregate|
|lambda_funcs|array_sort|org.apache.spark.sql.catalyst.expressions.ArraySort|
|lambda_funcs|exists|org.apache.spark.sql.catalyst.expressions.ArrayExists|
|lambda_funcs|filter|org.apache.spark.sql.catalyst.expressions.ArrayFilter|
|lambda_funcs|forall|org.apache.spark.sql.catalyst.expressions.ArrayForAll|
|lambda_funcs|map_filter|org.apache.spark.sql.catalyst.expressions.MapFilter|
|lambda_funcs|map_zip_with|org.apache.spark.sql.catalyst.expressions.MapZipWith|
|lambda_funcs|transform_keys|org.apache.spark.sql.catalyst.expressions.TransformKeys|
|lambda_funcs|transform_values|org.apache.spark.sql.catalyst.expressions.TransformValues|
|lambda_funcs|transform|org.apache.spark.sql.catalyst.expressions.ArrayTransform|
|lambda_funcs|zip_with|org.apache.spark.sql.catalyst.expressions.ZipWith|
|map_funcs|element_at|org.apache.spark.sql.catalyst.expressions.ElementAt|
|map_funcs|map_concat|org.apache.spark.sql.catalyst.expressions.MapConcat|
|map_funcs|map_entries|org.apache.spark.sql.catalyst.expressions.MapEntries|
|map_funcs|map_from_arrays|org.apache.spark.sql.catalyst.expressions.MapFromArrays|
|map_funcs|map_from_entries|org.apache.spark.sql.catalyst.expressions.MapFromEntries|
|map_funcs|map_keys|org.apache.spark.sql.catalyst.expressions.MapKeys|
|map_funcs|map_values|org.apache.spark.sql.catalyst.expressions.MapValues|
|map_funcs|map|org.apache.spark.sql.catalyst.expressions.CreateMap|
|map_funcs|str_to_map|org.apache.spark.sql.catalyst.expressions.StringToMap|
|math_funcs|%|org.apache.spark.sql.catalyst.expressions.Remainder|
|math_funcs|*|org.apache.spark.sql.catalyst.expressions.Multiply|
|math_funcs|+|org.apache.spark.sql.catalyst.expressions.Add|
|math_funcs|-|org.apache.spark.sql.catalyst.expressions.Subtract|
|math_funcs|/|org.apache.spark.sql.catalyst.expressions.Divide|
|math_funcs|abs|org.apache.spark.sql.catalyst.expressions.Abs|
|math_funcs|acosh|org.apache.spark.sql.catalyst.expressions.Acosh|
|math_funcs|acos|org.apache.spark.sql.catalyst.expressions.Acos|
|math_funcs|asinh|org.apache.spark.sql.catalyst.expressions.Asinh|
|math_funcs|asin|org.apache.spark.sql.catalyst.expressions.Asin|
|math_funcs|atan2|org.apache.spark.sql.catalyst.expressions.Atan2|
|math_funcs|atanh|org.apache.spark.sql.catalyst.expressions.Atanh|
|math_funcs|atan|org.apache.spark.sql.catalyst.expressions.Atan|
|math_funcs|bin|org.apache.spark.sql.catalyst.expressions.Bin|
|math_funcs|bround|org.apache.spark.sql.catalyst.expressions.BRound|
|math_funcs|cbrt|org.apache.spark.sql.catalyst.expressions.Cbrt|
|math_funcs|ceiling|org.apache.spark.sql.catalyst.expressions.Ceil|
|math_funcs|ceil|org.apache.spark.sql.catalyst.expressions.Ceil|
|math_funcs|conv|org.apache.spark.sql.catalyst.expressions.Conv|
|math_funcs|cosh|org.apache.spark.sql.catalyst.expressions.Cosh|
|math_funcs|cos|org.apache.spark.sql.catalyst.expressions.Cos|
|math_funcs|cot|org.apache.spark.sql.catalyst.expressions.Cot|
|math_funcs|degrees|org.apache.spark.sql.catalyst.expressions.ToDegrees|
|math_funcs|div|org.apache.spark.sql.catalyst.expressions.IntegralDivide|
|math_funcs|expm1|org.apache.spark.sql.catalyst.expressions.Expm1|
|math_funcs|exp|org.apache.spark.sql.catalyst.expressions.Exp|
|math_funcs|e|org.apache.spark.sql.catalyst.expressions.EulerNumber|
|math_funcs|factorial|org.apache.spark.sql.catalyst.expressions.Factorial|
|math_funcs|floor|org.apache.spark.sql.catalyst.expressions.Floor|
|math_funcs|greatest|org.apache.spark.sql.catalyst.expressions.Greatest|
|math_funcs|hex|org.apache.spark.sql.catalyst.expressions.Hex|
|math_funcs|hypot|org.apache.spark.sql.catalyst.expressions.Hypot|
|math_funcs|least|org.apache.spark.sql.catalyst.expressions.Least|
|math_funcs|ln|org.apache.spark.sql.catalyst.expressions.Log|
|math_funcs|log10|org.apache.spark.sql.catalyst.expressions.Log10|
|math_funcs|log1p|org.apache.spark.sql.catalyst.expressions.Log1p|
|math_funcs|log2|org.apache.spark.sql.catalyst.expressions.Log2|
|math_funcs|log|org.apache.spark.sql.catalyst.expressions.Logarithm|
|math_funcs|mod|org.apache.spark.sql.catalyst.expressions.Remainder|
|math_funcs|negative|org.apache.spark.sql.catalyst.expressions.UnaryMinus|
|math_funcs|pi|org.apache.spark.sql.catalyst.expressions.Pi|
|math_funcs|pmod|org.apache.spark.sql.catalyst.expressions.Pmod|
|math_funcs|positive|org.apache.spark.sql.catalyst.expressions.UnaryPositive|
|math_funcs|power|org.apache.spark.sql.catalyst.expressions.Pow|
|math_funcs|pow|org.apache.spark.sql.catalyst.expressions.Pow|
|math_funcs|radians|org.apache.spark.sql.catalyst.expressions.ToRadians|
|math_funcs|randn|org.apache.spark.sql.catalyst.expressions.Randn|
|math_funcs|random|org.apache.spark.sql.catalyst.expressions.Rand|
|math_funcs|rand|org.apache.spark.sql.catalyst.expressions.Rand|
|math_funcs|rint|org.apache.spark.sql.catalyst.expressions.Rint|
|math_funcs|round|org.apache.spark.sql.catalyst.expressions.Round|
|math_funcs|shiftleft|org.apache.spark.sql.catalyst.expressions.ShiftLeft|
|math_funcs|signum|org.apache.spark.sql.catalyst.expressions.Signum|
|math_funcs|sign|org.apache.spark.sql.catalyst.expressions.Signum|
|math_funcs|sinh|org.apache.spark.sql.catalyst.expressions.Sinh|
|math_funcs|sin|org.apache.spark.sql.catalyst.expressions.Sin|
|math_funcs|sqrt|org.apache.spark.sql.catalyst.expressions.Sqrt|
|math_funcs|tanh|org.apache.spark.sql.catalyst.expressions.Tanh|
|math_funcs|tan|org.apache.spark.sql.catalyst.expressions.Tan|
|math_funcs|unhex|org.apache.spark.sql.catalyst.expressions.Unhex|
|math_funcs|width_bucket|org.apache.spark.sql.catalyst.expressions.WidthBucket|
|misc_funcs|assert_true|org.apache.spark.sql.catalyst.expressions.AssertTrue|
|misc_funcs|current_catalog|org.apache.spark.sql.catalyst.expressions.CurrentCatalog|
|misc_funcs|current_database|org.apache.spark.sql.catalyst.expressions.CurrentDatabase|
|misc_funcs|input_file_block_length|org.apache.spark.sql.catalyst.expressions.InputFileBlockLength|
|misc_funcs|input_file_block_start|org.apache.spark.sql.catalyst.expressions.InputFileBlockStart|
|misc_funcs|input_file_name|org.apache.spark.sql.catalyst.expressions.InputFileName|
|misc_funcs|java_method|org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection|
|misc_funcs|monotonically_increasing_id|org.apache.spark.sql.catalyst.expressions.MonotonicallyIncreasingID|
|misc_funcs|raise_error|org.apache.spark.sql.catalyst.expressions.RaiseError|
|misc_funcs|reflect|org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection|
|misc_funcs|spark_partition_id|org.apache.spark.sql.catalyst.expressions.SparkPartitionID|
|misc_funcs|typeof|org.apache.spark.sql.catalyst.expressions.TypeOf|
|misc_funcs|uuid|org.apache.spark.sql.catalyst.expressions.Uuid|
|misc_funcs|version|org.apache.spark.sql.catalyst.expressions.SparkVersion|
|predicate_funcs|!|org.apache.spark.sql.catalyst.expressions.Not|
|predicate_funcs|<=>|org.apache.spark.sql.catalyst.expressions.EqualNullSafe|
|predicate_funcs|<=|org.apache.spark.sql.catalyst.expressions.LessThanOrEqual|
|predicate_funcs|<|org.apache.spark.sql.catalyst.expressions.LessThan|
|predicate_funcs|==|org.apache.spark.sql.catalyst.expressions.EqualTo|
|predicate_funcs|=|org.apache.spark.sql.catalyst.expressions.EqualTo|
|predicate_funcs|>=|org.apache.spark.sql.catalyst.expressions.GreaterThanOrEqual|
|predicate_funcs|>|org.apache.spark.sql.catalyst.expressions.GreaterThan|
|predicate_funcs|and|org.apache.spark.sql.catalyst.expressions.And|
|predicate_funcs|in|org.apache.spark.sql.catalyst.expressions.In|
|predicate_funcs|isnan|org.apache.spark.sql.catalyst.expressions.IsNaN|
|predicate_funcs|isnotnull|org.apache.spark.sql.catalyst.expressions.IsNotNull|
|predicate_funcs|isnull|org.apache.spark.sql.catalyst.expressions.IsNull|
|predicate_funcs|like|org.apache.spark.sql.catalyst.expressions.Like|
|predicate_funcs|not|org.apache.spark.sql.catalyst.expressions.Not|
|predicate_funcs|or|org.apache.spark.sql.catalyst.expressions.Or|
|predicate_funcs|regexp_like|org.apache.spark.sql.catalyst.expressions.RLike|
|predicate_funcs|rlike|org.apache.spark.sql.catalyst.expressions.RLike|
|string_funcs|ascii|org.apache.spark.sql.catalyst.expressions.Ascii|
|string_funcs|base64|org.apache.spark.sql.catalyst.expressions.Base64|
|string_funcs|bit_length|org.apache.spark.sql.catalyst.expressions.BitLength|
|string_funcs|char_length|org.apache.spark.sql.catalyst.expressions.Length|
|string_funcs|character_length|org.apache.spark.sql.catalyst.expressions.Length|
|string_funcs|char|org.apache.spark.sql.catalyst.expressions.Chr|
|string_funcs|chr|org.apache.spark.sql.catalyst.expressions.Chr|
|string_funcs|concat_ws|org.apache.spark.sql.catalyst.expressions.ConcatWs|
|string_funcs|decode|org.apache.spark.sql.catalyst.expressions.Decode|
|string_funcs|elt|org.apache.spark.sql.catalyst.expressions.Elt|
|string_funcs|encode|org.apache.spark.sql.catalyst.expressions.Encode|
|string_funcs|find_in_set|org.apache.spark.sql.catalyst.expressions.FindInSet|
|string_funcs|format_number|org.apache.spark.sql.catalyst.expressions.FormatNumber|
|string_funcs|format_string|org.apache.spark.sql.catalyst.expressions.FormatString|
|string_funcs|initcap|org.apache.spark.sql.catalyst.expressions.InitCap|
|string_funcs|instr|org.apache.spark.sql.catalyst.expressions.StringInstr|
|string_funcs|lcase|org.apache.spark.sql.catalyst.expressions.Lower|
|string_funcs|left|org.apache.spark.sql.catalyst.expressions.Left|
|string_funcs|length|org.apache.spark.sql.catalyst.expressions.Length|
|string_funcs|levenshtein|org.apache.spark.sql.catalyst.expressions.Levenshtein|
|string_funcs|locate|org.apache.spark.sql.catalyst.expressions.StringLocate|
|string_funcs|lower|org.apache.spark.sql.catalyst.expressions.Lower|
|string_funcs|lpad|org.apache.spark.sql.catalyst.expressions.StringLPad|
|string_funcs|ltrim|org.apache.spark.sql.catalyst.expressions.StringTrimLeft|
|string_funcs|octet_length|org.apache.spark.sql.catalyst.expressions.OctetLength|
|string_funcs|overlay|org.apache.spark.sql.catalyst.expressions.Overlay|
|string_funcs|parse_url|org.apache.spark.sql.catalyst.expressions.ParseUrl|
|string_funcs|position|org.apache.spark.sql.catalyst.expressions.StringLocate|
|string_funcs|printf|org.apache.spark.sql.catalyst.expressions.FormatString|
|string_funcs|regexp_extract_all|org.apache.spark.sql.catalyst.expressions.RegExpExtractAll|
|string_funcs|regexp_extract|org.apache.spark.sql.catalyst.expressions.RegExpExtract|
|string_funcs|regexp_replace|org.apache.spark.sql.catalyst.expressions.RegExpReplace|
|string_funcs|repeat|org.apache.spark.sql.catalyst.expressions.StringRepeat|
|string_funcs|replace|org.apache.spark.sql.catalyst.expressions.StringReplace|
|string_funcs|right|org.apache.spark.sql.catalyst.expressions.Right|
|string_funcs|rpad|org.apache.spark.sql.catalyst.expressions.StringRPad|
|string_funcs|rtrim|org.apache.spark.sql.catalyst.expressions.StringTrimRight|
|string_funcs|sentences|org.apache.spark.sql.catalyst.expressions.Sentences|
|string_funcs|soundex|org.apache.spark.sql.catalyst.expressions.SoundEx|
|string_funcs|space|org.apache.spark.sql.catalyst.expressions.StringSpace|
|string_funcs|split|org.apache.spark.sql.catalyst.expressions.StringSplit|
|string_funcs|substring_index|org.apache.spark.sql.catalyst.expressions.SubstringIndex|
|string_funcs|substring|org.apache.spark.sql.catalyst.expressions.Substring|
|string_funcs|substr|org.apache.spark.sql.catalyst.expressions.Substring|
|string_funcs|translate|org.apache.spark.sql.catalyst.expressions.StringTranslate|
|string_funcs|trim|org.apache.spark.sql.catalyst.expressions.StringTrim|
|string_funcs|ucase|org.apache.spark.sql.catalyst.expressions.Upper|
|string_funcs|unbase64|org.apache.spark.sql.catalyst.expressions.UnBase64|
|string_funcs|upper|org.apache.spark.sql.catalyst.expressions.Upper|
|struct_funcs|named_struct|org.apache.spark.sql.catalyst.expressions.CreateNamedStruct|
|struct_funcs|struct|org.apache.spark.sql.catalyst.expressions.CreateNamedStruct|
|window_funcs|cume_dist|org.apache.spark.sql.catalyst.expressions.CumeDist|
|window_funcs|dense_rank|org.apache.spark.sql.catalyst.expressions.DenseRank|
|window_funcs|lag|org.apache.spark.sql.catalyst.expressions.Lag|
|window_funcs|lead|org.apache.spark.sql.catalyst.expressions.Lead|
|window_funcs|nth_value|org.apache.spark.sql.catalyst.expressions.NthValue|
|window_funcs|ntile|org.apache.spark.sql.catalyst.expressions.NTile|
|window_funcs|percent_rank|org.apache.spark.sql.catalyst.expressions.PercentRank|
|window_funcs|rank|org.apache.spark.sql.catalyst.expressions.Rank|
|window_funcs|row_number|org.apache.spark.sql.catalyst.expressions.RowNumber|
|xml_funcs|xpath_boolean|org.apache.spark.sql.catalyst.expressions.xml.XPathBoolean|
|xml_funcs|xpath_double|org.apache.spark.sql.catalyst.expressions.xml.XPathDouble|
|xml_funcs|xpath_float|org.apache.spark.sql.catalyst.expressions.xml.XPathFloat|
|xml_funcs|xpath_int|org.apache.spark.sql.catalyst.expressions.xml.XPathInt|
|xml_funcs|xpath_long|org.apache.spark.sql.catalyst.expressions.xml.XPathLong|
|xml_funcs|xpath_number|org.apache.spark.sql.catalyst.expressions.xml.XPathDouble|
|xml_funcs|xpath_short|org.apache.spark.sql.catalyst.expressions.xml.XPathShort|
|xml_funcs|xpath_string|org.apache.spark.sql.catalyst.expressions.xml.XPathString|
|xml_funcs|xpath|org.apache.spark.sql.catalyst.expressions.xml.XPathList|

Closes #30040

NOTE: An original author of this PR is tanelk, so the credit should be given to tanelk.

### Why are the changes needed?

For better documents.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Add a test to check if exprs have a group tag in `ExpressionInfoSuite`.

Closes #30867 from maropu/pr30040.

Lead-authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Co-authored-by: tanel.kiis@gmail.com <tanel.kiis@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../catalyst/expressions/ExpressionInfo.java  |   7 +-
 .../catalyst/analysis/FunctionRegistry.scala  |   3 +-
 .../expressions/CallMethodViaReflection.scala |   3 +-
 .../spark/sql/catalyst/expressions/Cast.scala |   3 +-
 .../MonotonicallyIncreasingID.scala           |   3 +-
 .../expressions/SparkPartitionID.scala        |   3 +-
 .../sql/catalyst/expressions/arithmetic.scala |  36 +++--
 .../expressions/bitwiseExpressions.scala      |  15 +-
 .../expressions/collectionOperations.scala    |  10 +-
 .../expressions/complexTypeCreator.scala      |  17 ++-
 .../expressions/conditionalExpressions.scala  |   6 +-
 .../catalyst/expressions/csvExpressions.scala |   9 +-
 .../expressions/datetimeExpressions.scala     |   2 +
 .../sql/catalyst/expressions/generators.scala |  12 +-
 .../sql/catalyst/expressions/grouping.scala   |  12 +-
 .../spark/sql/catalyst/expressions/hash.scala |  21 ++-
 .../expressions/higherOrderFunctions.scala    |  31 ++--
 .../catalyst/expressions/inputFileBlock.scala |   9 +-
 .../expressions/intervalExpressions.scala     |   3 +-
 .../expressions/mathExpressions.scala         | 132 ++++++++++++------
 .../spark/sql/catalyst/expressions/misc.scala |  21 ++-
 .../expressions/nullExpressions.scala         |  27 ++--
 .../sql/catalyst/expressions/predicates.scala |  30 ++--
 .../expressions/randomExpressions.scala       |   6 +-
 .../expressions/regexpExpressions.scala       |  18 ++-
 .../expressions/stringExpressions.scala       | 114 ++++++++++-----
 .../sql/catalyst/expressions/xml/xpath.scala  |  24 ++--
 .../sql/expressions/ExpressionInfoSuite.scala |   7 +-
 28 files changed, 391 insertions(+), 193 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
index a500822b21f02..0975f831bbbe2 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
@@ -40,8 +40,11 @@ public class ExpressionInfo {
     private String deprecated;
 
     private static final Set<String> validGroups =
-        new HashSet<>(Arrays.asList("agg_funcs", "array_funcs", "datetime_funcs",
-            "json_funcs", "map_funcs", "window_funcs"));
+        new HashSet<>(Arrays.asList("agg_funcs", "array_funcs", "binary_funcs", "bitwise_funcs",
+            "collection_funcs", "predicate_funcs", "conditional_funcs", "conversion_funcs",
+            "csv_funcs", "datetime_funcs", "generator_funcs", "hash_funcs", "json_funcs",
+            "lambda_funcs", "map_funcs", "math_funcs", "misc_funcs", "string_funcs", "struct_funcs",
+            "window_funcs", "xml_funcs"));
 
     public String getClassName() {
         return className;
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 1a1b619336d54..912357b47934d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -660,7 +660,8 @@ object FunctionRegistry {
     val clazz = scala.reflect.classTag[Cast].runtimeClass
     val usage = "_FUNC_(expr) - Casts the value `expr` to the target data type `_FUNC_`."
     val expressionInfo =
-      new ExpressionInfo(clazz.getCanonicalName, null, name, usage, "", "", "", "", "2.0.1", "")
+      new ExpressionInfo(clazz.getCanonicalName, null, name, usage, "", "", "",
+        "conversion_funcs", "2.0.1", "")
     (name, (expressionInfo, builder))
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
index 0979a18ac97bb..0de17d420f0c9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
@@ -52,7 +52,8 @@ import org.apache.spark.util.Utils
       > SELECT _FUNC_('java.util.UUID', 'fromString', 'a5cf6c42-0c85-418f-af6c-3e4e5b1328f2');
        a5cf6c42-0c85-418f-af6c-3e4e5b1328f2
   """,
-  since = "2.0.0")
+  since = "2.0.0",
+  group = "misc_funcs")
 case class CallMethodViaReflection(children: Seq[Expression])
   extends Nondeterministic with CodegenFallback {
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index e1ece732cf15d..d19a51b339020 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -1755,7 +1755,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit
       > SELECT _FUNC_('10' as int);
        10
   """,
-  since = "1.0.0")
+  since = "1.0.0",
+  group = "conversion_funcs")
 case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String] = None)
   extends CastBase {
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala
index 8b04c1aa513f9..f228b36ecd472 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala
@@ -46,7 +46,8 @@ import org.apache.spark.sql.types.{DataType, LongType}
       > SELECT _FUNC_();
        0
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "misc_funcs")
 case class MonotonicallyIncreasingID() extends LeafExpression with Stateful {
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala
index 242735b4aebd3..2de89da2318f7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala
@@ -32,7 +32,8 @@ import org.apache.spark.sql.types.{DataType, IntegerType}
       > SELECT _FUNC_();
        0
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "misc_funcs")
 case class SparkPartitionID() extends LeafExpression with Nondeterministic {
 
   override def nullable: Boolean = false
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
index 3fbb798f1fd53..03dfddbdf7e6e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -33,7 +33,8 @@ import org.apache.spark.unsafe.types.CalendarInterval
       > SELECT _FUNC_(1);
        -1
   """,
-  since = "1.0.0")
+  since = "1.0.0",
+  group = "math_funcs")
 case class UnaryMinus(
     child: Expression,
     failOnError: Boolean = SQLConf.get.ansiEnabled)
@@ -105,7 +106,8 @@ case class UnaryMinus(
       > SELECT _FUNC_(1);
        1
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "math_funcs")
 case class UnaryPositive(child: Expression)
   extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
 
@@ -133,7 +135,8 @@ case class UnaryPositive(child: Expression)
       > SELECT _FUNC_(-1);
        1
   """,
-  since = "1.2.0")
+  since = "1.2.0",
+  group = "math_funcs")
 case class Abs(child: Expression)
   extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
 
@@ -236,7 +239,8 @@ object BinaryArithmetic {
       > SELECT 1 _FUNC_ 2;
        3
   """,
-  since = "1.0.0")
+  since = "1.0.0",
+  group = "math_funcs")
 case class Add(
     left: Expression,
     right: Expression,
@@ -274,7 +278,8 @@ case class Add(
       > SELECT 2 _FUNC_ 1;
        1
   """,
-  since = "1.0.0")
+  since = "1.0.0",
+  group = "math_funcs")
 case class Subtract(
     left: Expression,
     right: Expression,
@@ -312,7 +317,8 @@ case class Subtract(
       > SELECT 2 _FUNC_ 3;
        6
   """,
-  since = "1.0.0")
+  since = "1.0.0",
+  group = "math_funcs")
 case class Multiply(
     left: Expression,
     right: Expression,
@@ -436,7 +442,8 @@ trait DivModLike extends BinaryArithmetic {
       > SELECT 2L _FUNC_ 2L;
        1.0
   """,
-  since = "1.0.0")
+  since = "1.0.0",
+  group = "math_funcs")
 // scalastyle:on line.size.limit
 case class Divide(
     left: Expression,
@@ -465,7 +472,8 @@ case class Divide(
       > SELECT 3 _FUNC_ 2;
        1
   """,
-  since = "3.0.0")
+  since = "3.0.0",
+  group = "math_funcs")
 // scalastyle:on line.size.limit
 case class IntegralDivide(
     left: Expression,
@@ -512,7 +520,8 @@ case class IntegralDivide(
       > SELECT MOD(2, 1.8);
        0.2
   """,
-  since = "1.0.0")
+  since = "1.0.0",
+  group = "math_funcs")
 case class Remainder(
     left: Expression,
     right: Expression,
@@ -565,7 +574,8 @@ case class Remainder(
       > SELECT _FUNC_(-10, 3);
        2
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "math_funcs")
 case class Pmod(
     left: Expression,
     right: Expression,
@@ -750,7 +760,8 @@ case class Pmod(
       > SELECT _FUNC_(10, 9, 2, 4, 3);
        2
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "math_funcs")
 case class Least(children: Seq[Expression]) extends ComplexTypeMergingExpression {
 
   override def nullable: Boolean = children.forall(_.nullable)
@@ -824,7 +835,8 @@ case class Least(children: Seq[Expression]) extends ComplexTypeMergingExpression
       > SELECT _FUNC_(10, 9, 2, 4, 3);
        10
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "math_funcs")
 case class Greatest(children: Seq[Expression]) extends ComplexTypeMergingExpression {
 
   override def nullable: Boolean = children.forall(_.nullable)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
index 33ce60875c600..752af4eeeafb5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
@@ -33,7 +33,8 @@ import org.apache.spark.sql.types._
       > SELECT 3 _FUNC_ 5;
        1
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "bitwise_funcs")
 case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithmetic {
 
   protected override val failOnError: Boolean = false
@@ -68,7 +69,8 @@ case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithme
       > SELECT 3 _FUNC_ 5;
        7
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "bitwise_funcs")
 case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmetic {
 
   protected override val failOnError: Boolean = false
@@ -103,7 +105,8 @@ case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmet
       > SELECT 3 _FUNC_ 5;
        6
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "bitwise_funcs")
 case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithmetic {
 
   protected override val failOnError: Boolean = false
@@ -136,7 +139,8 @@ case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithme
       > SELECT _FUNC_ 0;
        -1
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "bitwise_funcs")
 case class BitwiseNot(child: Expression)
   extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
 
@@ -174,7 +178,8 @@ case class BitwiseNot(child: Expression)
       > SELECT _FUNC_(0);
        0
   """,
-  since = "3.0.0")
+  since = "3.0.0",
+  group = "bitwise_funcs")
 case class BitwiseCount(child: Expression)
   extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index 0765bfdd78fa6..33794467fb338 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -90,7 +90,8 @@ trait BinaryArrayExpressionWithImplicitCast extends BinaryExpression
       > SELECT _FUNC_(NULL);
        -1
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "collection_funcs")
 case class Size(child: Expression, legacySizeOfNull: Boolean)
   extends UnaryExpression with ExpectsInputTypes {
 
@@ -1018,7 +1019,7 @@ case class Shuffle(child: Expression, randomSeed: Option[Long] = None)
       > SELECT _FUNC_(array(2, 1, 4, 3));
        [3,4,1,2]
   """,
-  group = "array_funcs",
+  group = "collection_funcs",
   since = "1.5.0",
   note = """
     Reverse logic for arrays is available since 2.4.0.
@@ -1922,7 +1923,8 @@ case class ArrayPosition(left: Expression, right: Expression)
       > SELECT _FUNC_(map(1, 'a', 2, 'b'), 2);
        b
   """,
-  since = "2.4.0")
+  since = "2.4.0",
+  group = "map_funcs")
 case class ElementAt(
     left: Expression,
     right: Expression,
@@ -2097,7 +2099,7 @@ case class ElementAt(
   note = """
     Concat logic for arrays is available since 2.4.0.
   """,
-  group = "array_funcs",
+  group = "collection_funcs",
   since = "1.5.0")
 case class Concat(children: Seq[Expression]) extends ComplexTypeMergingExpression {
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index cb59fbda2b3b9..d29da3ad2a4e4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -52,7 +52,8 @@ trait NoThrow
       > SELECT _FUNC_(1, 2, 3);
        [1,2,3]
   """,
-  since = "1.1.0")
+  since = "1.1.0",
+  group = "array_funcs")
 case class CreateArray(children: Seq[Expression], useStringTypeWhenEmpty: Boolean)
   extends Expression with NoThrow {
 
@@ -170,7 +171,8 @@ private [sql] object GenArrayData {
       > SELECT _FUNC_(1.0, '2', 3.0, '4');
        {1.0:"2",3.0:"4"}
   """,
-  since = "2.0.0")
+  since = "2.0.0",
+  group = "map_funcs")
 case class CreateMap(children: Seq[Expression], useStringTypeWhenEmpty: Boolean)
   extends Expression with NoThrow {
 
@@ -271,7 +273,8 @@ object CreateMap {
       > SELECT _FUNC_(array(1.0, 3.0), array('2', '4'));
        {1.0:"2",3.0:"4"}
   """,
-  since = "2.4.0")
+  since = "2.4.0",
+  group = "map_funcs")
 case class MapFromArrays(left: Expression, right: Expression)
   extends BinaryExpression with ExpectsInputTypes with NullIntolerant {
 
@@ -369,7 +372,7 @@ object CreateStruct {
         |       {"col1":1,"col2":2,"col3":3}
         |  """.stripMargin,
       "",
-      "",
+      "struct_funcs",
       "1.4.0",
       "")
     ("struct", (info, this.create))
@@ -389,7 +392,8 @@ object CreateStruct {
       > SELECT _FUNC_("a", 1, "b", 2, "c", 3);
        {"a":1,"b":2,"c":3}
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "struct_funcs")
 // scalastyle:on line.size.limit
 case class CreateNamedStruct(children: Seq[Expression]) extends Expression with NoThrow {
   lazy val (nameExprs, valExprs) = children.grouped(2).map {
@@ -495,7 +499,8 @@ case class CreateNamedStruct(children: Seq[Expression]) extends Expression with
       > SELECT _FUNC_('a');
        {"a":null}
   """,
-  since = "2.0.1")
+  since = "2.0.1",
+  group = "map_funcs")
 // scalastyle:on line.size.limit
 case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: Expression)
   extends TernaryExpression with ExpectsInputTypes with NullIntolerant {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
index 84065d07e2b4d..7b0be8eb24097 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
@@ -31,7 +31,8 @@ import org.apache.spark.sql.types._
       > SELECT _FUNC_(1 < 2, 'a', 'b');
        a
   """,
-  since = "1.0.0")
+  since = "1.0.0",
+  group = "conditional_funcs")
 // scalastyle:on line.size.limit
 case class If(predicate: Expression, trueValue: Expression, falseValue: Expression)
   extends ComplexTypeMergingExpression {
@@ -118,7 +119,8 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi
       > SELECT CASE WHEN 1 < 0 THEN 1 WHEN 2 < 0 THEN 2.0 END;
        NULL
   """,
-  since = "1.0.1")
+  since = "1.0.1",
+  group = "conditional_funcs")
 // scalastyle:on line.size.limit
 case class CaseWhen(
     branches: Seq[(Expression, Expression)],
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
index 6fad272aa4557..8978d55b98251 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
@@ -44,7 +44,8 @@ import org.apache.spark.unsafe.types.UTF8String
       > SELECT _FUNC_('26/08/2015', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy'));
        {"time":2015-08-26 00:00:00}
   """,
-  since = "3.0.0")
+  since = "3.0.0",
+  group = "csv_funcs")
 // scalastyle:on line.size.limit
 case class CsvToStructs(
     schema: StructType,
@@ -146,7 +147,8 @@ case class CsvToStructs(
       > SELECT _FUNC_('1,abc');
        STRUCT<`_c0`: INT, `_c1`: STRING>
   """,
-  since = "3.0.0")
+  since = "3.0.0",
+  group = "csv_funcs")
 case class SchemaOfCsv(
     child: Expression,
     options: Map[String, String])
@@ -205,7 +207,8 @@ case class SchemaOfCsv(
       > SELECT _FUNC_(named_struct('time', to_timestamp('2015-08-26', 'yyyy-MM-dd')), map('timestampFormat', 'dd/MM/yyyy'));
        26/08/2015
   """,
-  since = "3.0.0")
+  since = "3.0.0",
+  group = "csv_funcs")
 // scalastyle:on line.size.limit
 case class StructsToCsv(
      options: Map[String, String],
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index c20dd6148be3e..99f80e9078aae 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -697,6 +697,7 @@ case class Month(child: Expression) extends GetDateField {
       > SELECT _FUNC_('2009-07-30');
        30
   """,
+  group = "datetime_funcs",
   since = "1.5.0")
 case class DayOfMonth(child: Expression) extends GetDateField {
   override val func = DateTimeUtils.getDayOfMonth
@@ -2247,6 +2248,7 @@ case class DatePart(field: Expression, source: Expression, child: Expression)
   note = """
     The _FUNC_ function is equivalent to `date_part(field, source)`.
   """,
+  group = "datetime_funcs",
   since = "3.0.0")
 // scalastyle:on line.size.limit
 case class Extract(field: Expression, source: Expression, child: Expression)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
index ad6e365f76fa9..c5122b6490ae6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
@@ -136,7 +136,8 @@ case class UserDefinedGenerator(
        1	2
        3	NULL
   """,
-  since = "2.0.0")
+  since = "2.0.0",
+  group = "generator_funcs")
 // scalastyle:on line.size.limit line.contains.tab
 case class Stack(children: Seq[Expression]) extends Generator {
 
@@ -362,7 +363,8 @@ abstract class ExplodeBase extends UnaryExpression with CollectionGenerator with
        10
        20
   """,
-  since = "1.0.0")
+  since = "1.0.0",
+  group = "generator_funcs")
 // scalastyle:on line.size.limit
 case class Explode(child: Expression) extends ExplodeBase {
   override val position: Boolean = false
@@ -386,7 +388,8 @@ case class Explode(child: Expression) extends ExplodeBase {
        0	10
        1	20
   """,
-  since = "2.0.0")
+  since = "2.0.0",
+  group = "generator_funcs")
 // scalastyle:on line.size.limit line.contains.tab
 case class PosExplode(child: Expression) extends ExplodeBase {
   override val position = true
@@ -404,7 +407,8 @@ case class PosExplode(child: Expression) extends ExplodeBase {
        1	a
        2	b
   """,
-  since = "2.0.0")
+  since = "2.0.0",
+  group = "generator_funcs")
 // scalastyle:on line.size.limit line.contains.tab
 case class Inline(child: Expression) extends UnaryExpression with CollectionGenerator {
   override val inline: Boolean = true
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala
index ac0f6b86ccd96..f843c1a2d3594 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala
@@ -56,7 +56,8 @@ trait GroupingSet extends Expression with CodegenFallback {
         Bob	NULL	1
         NULL	5	1
   """,
-  since = "2.0.0")
+  since = "2.0.0",
+  group = "agg_funcs")
 // scalastyle:on line.size.limit line.contains.tab
 case class Cube(groupByExprs: Seq[Expression]) extends GroupingSet {}
 
@@ -75,7 +76,8 @@ case class Cube(groupByExprs: Seq[Expression]) extends GroupingSet {}
         NULL	NULL	2
         Bob	NULL	1
   """,
-  since = "2.0.0")
+  since = "2.0.0",
+  group = "agg_funcs")
 // scalastyle:on line.size.limit line.contains.tab
 case class Rollup(groupByExprs: Seq[Expression]) extends GroupingSet {}
 
@@ -96,7 +98,8 @@ case class Rollup(groupByExprs: Seq[Expression]) extends GroupingSet {}
         Bob	0	5
         NULL	1	7
   """,
-  since = "2.0.0")
+  since = "2.0.0",
+  group = "agg_funcs")
 // scalastyle:on line.size.limit line.contains.tab
 case class Grouping(child: Expression) extends Expression with Unevaluable {
   @transient
@@ -133,7 +136,8 @@ case class Grouping(child: Expression) extends Expression with Unevaluable {
     Input columns should match with grouping columns exactly, or empty (means all the grouping
     columns).
   """,
-  since = "2.0.0")
+  since = "2.0.0",
+  group = "agg_funcs")
 // scalastyle:on line.size.limit line.contains.tab
 case class GroupingID(groupByExprs: Seq[Expression]) extends Expression with Unevaluable {
   @transient
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
index ce177f50956f0..9738559b6d67a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
@@ -53,7 +53,8 @@ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
       > SELECT _FUNC_('Spark');
        8cde774d6f7333752ed72cacddb05126
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "hash_funcs")
 case class Md5(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
@@ -89,7 +90,8 @@ case class Md5(child: Expression)
       > SELECT _FUNC_('Spark', 256);
        529bc3b07127ecb7e53a4dcf1991d9152c24537d919178022b2c42657f79a26b
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "hash_funcs")
 // scalastyle:on line.size.limit
 case class Sha2(left: Expression, right: Expression)
   extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant with Serializable {
@@ -163,7 +165,8 @@ case class Sha2(left: Expression, right: Expression)
       > SELECT _FUNC_('Spark');
        85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "hash_funcs")
 case class Sha1(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
@@ -192,7 +195,8 @@ case class Sha1(child: Expression)
       > SELECT _FUNC_('Spark');
        1557323817
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "hash_funcs")
 case class Crc32(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
@@ -580,7 +584,8 @@ abstract class InterpretedHashFunction {
       > SELECT _FUNC_('Spark', array(123), 2);
        -1321691492
   """,
-  since = "2.0.0")
+  since = "2.0.0",
+  group = "hash_funcs")
 case class Murmur3Hash(children: Seq[Expression], seed: Int) extends HashExpression[Int] {
   def this(arguments: Seq[Expression]) = this(arguments, 42)
 
@@ -619,7 +624,8 @@ object Murmur3HashFunction extends InterpretedHashFunction {
       > SELECT _FUNC_('Spark', array(123), 2);
        5602566077635097486
   """,
-  since = "3.0.0")
+  since = "3.0.0",
+  group = "hash_funcs")
 case class XxHash64(children: Seq[Expression], seed: Long) extends HashExpression[Long] {
   def this(arguments: Seq[Expression]) = this(arguments, 42L)
 
@@ -653,7 +659,8 @@ object XxHash64Function extends InterpretedHashFunction {
  */
 @ExpressionDescription(
   usage = "_FUNC_(expr1, expr2, ...) - Returns a hash value of the arguments.",
-  since = "2.2.0")
+  since = "2.2.0",
+  group = "hash_funcs")
 case class HiveHash(children: Seq[Expression]) extends HashExpression[Int] {
   override val seed = 0
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
index d1dabe732c882..7ad62312250d6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
@@ -239,7 +239,8 @@ trait MapBasedSimpleHigherOrderFunction extends SimpleHigherOrderFunction {
       > SELECT _FUNC_(array(1, 2, 3), (x, i) -> x + i);
        [1,3,5]
   """,
-  since = "2.4.0")
+  since = "2.4.0",
+  group = "lambda_funcs")
 case class ArrayTransform(
     argument: Expression,
     function: Expression)
@@ -309,7 +310,8 @@ case class ArrayTransform(
       > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'));
        ["a","b","c","d",null]
   """,
-  since = "2.4.0")
+  since = "2.4.0",
+  group = "lambda_funcs")
 // scalastyle:on line.size.limit
 case class ArraySort(
     argument: Expression,
@@ -403,7 +405,8 @@ object ArraySort {
       > SELECT _FUNC_(map(1, 0, 2, 2, 3, -1), (k, v) -> k > v);
        {1:0,3:-1}
   """,
-  since = "3.0.0")
+  since = "3.0.0",
+  group = "lambda_funcs")
 case class MapFilter(
     argument: Expression,
     function: Expression)
@@ -458,6 +461,7 @@ case class MapFilter(
        [0,2,3]
   """,
   since = "2.4.0",
+  group = "lambda_funcs",
   note = """
     The inner function may use the index argument since 3.0.0.
   """)
@@ -525,7 +529,8 @@ case class ArrayFilter(
       > SELECT _FUNC_(array(1, 2, 3), x -> x IS NULL);
        false
   """,
-  since = "2.4.0")
+  since = "2.4.0",
+  group = "lambda_funcs")
 case class ArrayExists(
     argument: Expression,
     function: Expression,
@@ -609,7 +614,8 @@ object ArrayExists {
       > SELECT _FUNC_(array(2, null, 8), x -> x % 2 == 0);
        NULL
   """,
-  since = "3.0.0")
+  since = "3.0.0",
+  group = "lambda_funcs")
 case class ArrayForAll(
     argument: Expression,
     function: Expression)
@@ -679,7 +685,8 @@ case class ArrayForAll(
       > SELECT _FUNC_(array(1, 2, 3), 0, (acc, x) -> acc + x, acc -> acc * 10);
        60
   """,
-  since = "2.4.0")
+  since = "2.4.0",
+  group = "lambda_funcs")
 case class ArrayAggregate(
     argument: Expression,
     zero: Expression,
@@ -766,7 +773,8 @@ case class ArrayAggregate(
       > SELECT _FUNC_(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v);
        {2:1,4:2,6:3}
   """,
-  since = "3.0.0")
+  since = "3.0.0",
+  group = "lambda_funcs")
 case class TransformKeys(
     argument: Expression,
     function: Expression)
@@ -818,7 +826,8 @@ case class TransformKeys(
       > SELECT _FUNC_(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + v);
        {1:2,2:4,3:6}
   """,
-  since = "3.0.0")
+  since = "3.0.0",
+  group = "lambda_funcs")
 case class TransformValues(
     argument: Expression,
     function: Expression)
@@ -869,7 +878,8 @@ case class TransformValues(
       > SELECT _FUNC_(map(1, 'a', 2, 'b'), map(1, 'x', 2, 'y'), (k, v1, v2) -> concat(v1, v2));
        {1:"ax",2:"by"}
   """,
-  since = "3.0.0")
+  since = "3.0.0",
+  group = "lambda_funcs")
 case class MapZipWith(left: Expression, right: Expression, function: Expression)
   extends HigherOrderFunction with CodegenFallback {
 
@@ -1047,7 +1057,8 @@ case class MapZipWith(left: Expression, right: Expression, function: Expression)
       > SELECT _FUNC_(array('a', 'b', 'c'), array('d', 'e', 'f'), (x, y) -> concat(x, y));
        ["ad","be","cf"]
   """,
-  since = "2.4.0")
+  since = "2.4.0",
+  group = "lambda_funcs")
 // scalastyle:on line.size.limit
 case class ZipWith(left: Expression, right: Expression, function: Expression)
   extends HigherOrderFunction with CodegenFallback {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala
index e9426223092de..6cd88367aa9a0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala
@@ -32,7 +32,8 @@ import org.apache.spark.unsafe.types.UTF8String
       > SELECT _FUNC_();
 
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "misc_funcs")
 // scalastyle:on whitespace.end.of.line
 case class InputFileName() extends LeafExpression with Nondeterministic {
 
@@ -64,7 +65,8 @@ case class InputFileName() extends LeafExpression with Nondeterministic {
       > SELECT _FUNC_();
        -1
   """,
-  since = "2.2.0")
+  since = "2.2.0",
+  group = "misc_funcs")
 case class InputFileBlockStart() extends LeafExpression with Nondeterministic {
   override def nullable: Boolean = false
 
@@ -93,7 +95,8 @@ case class InputFileBlockStart() extends LeafExpression with Nondeterministic {
       > SELECT _FUNC_();
        -1
   """,
-  since = "2.2.0")
+  since = "2.2.0",
+  group = "misc_funcs")
 case class InputFileBlockLength() extends LeafExpression with Nondeterministic {
   override def nullable: Boolean = false
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
index 27067e17e7f45..fd07aff867abf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
@@ -152,7 +152,8 @@ case class DivideInterval(
       > SELECT _FUNC_(0, 1, 0, 1, 0, 0, 100.000001);
        1 months 1 days 1 minutes 40.000001 seconds
   """,
-  since = "3.0.0")
+  since = "3.0.0",
+  group = "datetime_funcs")
 // scalastyle:on line.size.limit
 case class MakeInterval(
     years: Expression,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
index 931365fb25a1e..43281c2dc3c2f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -148,7 +148,8 @@ abstract class BinaryMathExpression(f: (Double, Double) => Double, name: String)
       > SELECT _FUNC_();
        2.718281828459045
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "math_funcs")
 case class EulerNumber() extends LeafMathExpression(math.E, "E")
 
 /**
@@ -162,7 +163,8 @@ case class EulerNumber() extends LeafMathExpression(math.E, "E")
       > SELECT _FUNC_();
        3.141592653589793
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "math_funcs")
 case class Pi() extends LeafMathExpression(math.Pi, "PI")
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -183,7 +185,8 @@ case class Pi() extends LeafMathExpression(math.Pi, "PI")
       > SELECT _FUNC_(2);
        NaN
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "math_funcs")
 case class Acos(child: Expression) extends UnaryMathExpression(math.acos, "ACOS")
 
 @ExpressionDescription(
@@ -198,7 +201,8 @@ case class Acos(child: Expression) extends UnaryMathExpression(math.acos, "ACOS"
       > SELECT _FUNC_(2);
        NaN
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "math_funcs")
 case class Asin(child: Expression) extends UnaryMathExpression(math.asin, "ASIN")
 
 @ExpressionDescription(
@@ -211,7 +215,8 @@ case class Asin(child: Expression) extends UnaryMathExpression(math.asin, "ASIN"
       > SELECT _FUNC_(0);
        0.0
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "math_funcs")
 case class Atan(child: Expression) extends UnaryMathExpression(math.atan, "ATAN")
 
 @ExpressionDescription(
@@ -221,7 +226,8 @@ case class Atan(child: Expression) extends UnaryMathExpression(math.atan, "ATAN"
       > SELECT _FUNC_(27.0);
        3.0
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "math_funcs")
 case class Cbrt(child: Expression) extends UnaryMathExpression(math.cbrt, "CBRT")
 
 @ExpressionDescription(
@@ -233,7 +239,8 @@ case class Cbrt(child: Expression) extends UnaryMathExpression(math.cbrt, "CBRT"
       > SELECT _FUNC_(5);
        5
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "math_funcs")
 case class Ceil(child: Expression) extends UnaryMathExpression(math.ceil, "CEIL") {
   override def dataType: DataType = child.dataType match {
     case dt @ DecimalType.Fixed(_, 0) => dt
@@ -276,7 +283,8 @@ case class Ceil(child: Expression) extends UnaryMathExpression(math.ceil, "CEIL"
       > SELECT _FUNC_(0);
        1.0
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "math_funcs")
 case class Cos(child: Expression) extends UnaryMathExpression(math.cos, "COS")
 
 @ExpressionDescription(
@@ -293,7 +301,8 @@ case class Cos(child: Expression) extends UnaryMathExpression(math.cos, "COS")
       > SELECT _FUNC_(0);
        1.0
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "math_funcs")
 case class Cosh(child: Expression) extends UnaryMathExpression(math.cosh, "COSH")
 
 @ExpressionDescription(
@@ -307,7 +316,8 @@ case class Cosh(child: Expression) extends UnaryMathExpression(math.cosh, "COSH"
       > SELECT _FUNC_(0);
        NaN
   """,
-  since = "3.0.0")
+  since = "3.0.0",
+  group = "math_funcs")
 case class Acosh(child: Expression)
   extends UnaryMathExpression((x: Double) => StrictMath.log(x + math.sqrt(x * x - 1.0)), "ACOSH") {
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
@@ -332,7 +342,8 @@ case class Acosh(child: Expression)
       > SELECT _FUNC_(-10, 16, -10);
        -16
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "math_funcs")
 case class Conv(numExpr: Expression, fromBaseExpr: Expression, toBaseExpr: Expression)
   extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
@@ -368,7 +379,8 @@ case class Conv(numExpr: Expression, fromBaseExpr: Expression, toBaseExpr: Expre
       > SELECT _FUNC_(0);
        1.0
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "math_funcs")
 case class Exp(child: Expression) extends UnaryMathExpression(StrictMath.exp, "EXP") {
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     defineCodeGen(ctx, ev, c => s"java.lang.StrictMath.exp($c)")
@@ -382,7 +394,8 @@ case class Exp(child: Expression) extends UnaryMathExpression(StrictMath.exp, "E
       > SELECT _FUNC_(0);
        0.0
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "math_funcs")
 case class Expm1(child: Expression) extends UnaryMathExpression(StrictMath.expm1, "EXPM1") {
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     defineCodeGen(ctx, ev, c => s"java.lang.StrictMath.expm1($c)")
@@ -398,7 +411,8 @@ case class Expm1(child: Expression) extends UnaryMathExpression(StrictMath.expm1
       > SELECT _FUNC_(5);
        5
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "math_funcs")
 case class Floor(child: Expression) extends UnaryMathExpression(math.floor, "FLOOR") {
   override def dataType: DataType = child.dataType match {
     case dt @ DecimalType.Fixed(_, 0) => dt
@@ -465,7 +479,8 @@ object Factorial {
       > SELECT _FUNC_(5);
        120
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "math_funcs")
 case class Factorial(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
@@ -506,7 +521,8 @@ case class Factorial(child: Expression)
       > SELECT _FUNC_(1);
        0.0
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "math_funcs")
 case class Log(child: Expression) extends UnaryLogExpression(StrictMath.log, "LOG") {
   override def prettyName: String = getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("ln")
 }
@@ -518,7 +534,8 @@ case class Log(child: Expression) extends UnaryLogExpression(StrictMath.log, "LO
       > SELECT _FUNC_(2);
        1.0
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "math_funcs")
 case class Log2(child: Expression)
   extends UnaryLogExpression((x: Double) => StrictMath.log(x) / StrictMath.log(2), "LOG2") {
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
@@ -541,7 +558,8 @@ case class Log2(child: Expression)
       > SELECT _FUNC_(10);
        1.0
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "math_funcs")
 case class Log10(child: Expression) extends UnaryLogExpression(StrictMath.log10, "LOG10")
 
 @ExpressionDescription(
@@ -551,7 +569,8 @@ case class Log10(child: Expression) extends UnaryLogExpression(StrictMath.log10,
       > SELECT _FUNC_(0);
        0.0
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "math_funcs")
 case class Log1p(child: Expression) extends UnaryLogExpression(StrictMath.log1p, "LOG1P") {
   protected override val yAsymptote: Double = -1.0
 }
@@ -564,7 +583,8 @@ case class Log1p(child: Expression) extends UnaryLogExpression(StrictMath.log1p,
       > SELECT _FUNC_(12.3456);
        12.0
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "math_funcs")
 // scalastyle:on line.size.limit
 case class Rint(child: Expression) extends UnaryMathExpression(math.rint, "ROUND") {
   override def funcName: String = "rint"
@@ -578,7 +598,8 @@ case class Rint(child: Expression) extends UnaryMathExpression(math.rint, "ROUND
       > SELECT _FUNC_(40);
        1.0
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "math_funcs")
 case class Signum(child: Expression) extends UnaryMathExpression(math.signum, "SIGNUM")
 
 @ExpressionDescription(
@@ -592,7 +613,8 @@ case class Signum(child: Expression) extends UnaryMathExpression(math.signum, "S
       > SELECT _FUNC_(0);
        0.0
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "math_funcs")
 case class Sin(child: Expression) extends UnaryMathExpression(math.sin, "SIN")
 
 @ExpressionDescription(
@@ -608,7 +630,8 @@ case class Sin(child: Expression) extends UnaryMathExpression(math.sin, "SIN")
       > SELECT _FUNC_(0);
        0.0
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "math_funcs")
 case class Sinh(child: Expression) extends UnaryMathExpression(math.sinh, "SINH")
 
 @ExpressionDescription(
@@ -620,7 +643,8 @@ case class Sinh(child: Expression) extends UnaryMathExpression(math.sinh, "SINH"
       > SELECT _FUNC_(0);
        0.0
   """,
-  since = "3.0.0")
+  since = "3.0.0",
+  group = "math_funcs")
 case class Asinh(child: Expression)
   extends UnaryMathExpression((x: Double) => x match {
     case Double.NegativeInfinity => Double.NegativeInfinity
@@ -639,7 +663,8 @@ case class Asinh(child: Expression)
       > SELECT _FUNC_(4);
        2.0
   """,
-  since = "1.1.1")
+  since = "1.1.1",
+  group = "math_funcs")
 case class Sqrt(child: Expression) extends UnaryMathExpression(math.sqrt, "SQRT")
 
 @ExpressionDescription(
@@ -655,7 +680,8 @@ case class Sqrt(child: Expression) extends UnaryMathExpression(math.sqrt, "SQRT"
       > SELECT _FUNC_(0);
        0.0
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "math_funcs")
 case class Tan(child: Expression) extends UnaryMathExpression(math.tan, "TAN")
 
 @ExpressionDescription(
@@ -671,7 +697,8 @@ case class Tan(child: Expression) extends UnaryMathExpression(math.tan, "TAN")
       > SELECT _FUNC_(1);
        0.6420926159343306
   """,
-  since = "2.3.0")
+  since = "2.3.0",
+  group = "math_funcs")
 case class Cot(child: Expression)
   extends UnaryMathExpression((x: Double) => 1 / math.tan(x), "COT") {
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
@@ -693,7 +720,8 @@ case class Cot(child: Expression)
       > SELECT _FUNC_(0);
        0.0
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "math_funcs")
 case class Tanh(child: Expression) extends UnaryMathExpression(math.tanh, "TANH")
 
 @ExpressionDescription(
@@ -707,7 +735,8 @@ case class Tanh(child: Expression) extends UnaryMathExpression(math.tanh, "TANH"
       > SELECT _FUNC_(2);
        NaN
   """,
-  since = "3.0.0")
+  since = "3.0.0",
+  group = "math_funcs")
 case class Atanh(child: Expression)
   // SPARK-28519: more accurate express for 1/2 * ln((1 + x) / (1 - x))
   extends UnaryMathExpression((x: Double) =>
@@ -729,7 +758,8 @@ case class Atanh(child: Expression)
       > SELECT _FUNC_(3.141592653589793);
        180.0
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "math_funcs")
 case class ToDegrees(child: Expression) extends UnaryMathExpression(math.toDegrees, "DEGREES") {
   override def funcName: String = "toDegrees"
 }
@@ -745,7 +775,8 @@ case class ToDegrees(child: Expression) extends UnaryMathExpression(math.toDegre
       > SELECT _FUNC_(180);
        3.141592653589793
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "math_funcs")
 case class ToRadians(child: Expression) extends UnaryMathExpression(math.toRadians, "RADIANS") {
   override def funcName: String = "toRadians"
 }
@@ -762,7 +793,8 @@ case class ToRadians(child: Expression) extends UnaryMathExpression(math.toRadia
       > SELECT _FUNC_(13.3);
        1101
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "math_funcs")
 // scalastyle:on line.size.limit
 case class Bin(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant with Serializable {
@@ -864,7 +896,8 @@ object Hex {
       > SELECT _FUNC_('Spark SQL');
        537061726B2053514C
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "math_funcs")
 case class Hex(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
@@ -901,7 +934,8 @@ case class Hex(child: Expression)
       > SELECT decode(_FUNC_('537061726B2053514C'), 'UTF-8');
        Spark SQL
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "math_funcs")
 case class Unhex(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
@@ -947,7 +981,8 @@ case class Unhex(child: Expression)
       > SELECT _FUNC_(0, 0);
        0.0
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "math_funcs")
 case class Atan2(left: Expression, right: Expression)
   extends BinaryMathExpression(math.atan2, "ATAN2") {
 
@@ -968,7 +1003,8 @@ case class Atan2(left: Expression, right: Expression)
       > SELECT _FUNC_(2, 3);
        8.0
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "math_funcs")
 case class Pow(left: Expression, right: Expression)
   extends BinaryMathExpression(StrictMath.pow, "POWER") {
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
@@ -990,7 +1026,8 @@ case class Pow(left: Expression, right: Expression)
       > SELECT _FUNC_(2, 1);
        4
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "math_funcs")
 case class ShiftLeft(left: Expression, right: Expression)
   extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
@@ -1025,7 +1062,8 @@ case class ShiftLeft(left: Expression, right: Expression)
       > SELECT _FUNC_(4, 1);
        2
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "bitwise_funcs")
 case class ShiftRight(left: Expression, right: Expression)
   extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
@@ -1060,7 +1098,8 @@ case class ShiftRight(left: Expression, right: Expression)
       > SELECT _FUNC_(4, 1);
        2
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "bitwise_funcs")
 case class ShiftRightUnsigned(left: Expression, right: Expression)
   extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
@@ -1088,7 +1127,8 @@ case class ShiftRightUnsigned(left: Expression, right: Expression)
       > SELECT _FUNC_(3, 4);
        5.0
   """,
-  since = "1.4.0")
+  since = "1.4.0",
+  group = "math_funcs")
 case class Hypot(left: Expression, right: Expression)
   extends BinaryMathExpression(math.hypot, "HYPOT")
 
@@ -1106,7 +1146,8 @@ case class Hypot(left: Expression, right: Expression)
       > SELECT _FUNC_(10, 100);
        2.0
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "math_funcs")
 case class Logarithm(left: Expression, right: Expression)
   extends BinaryMathExpression((c1, c2) => StrictMath.log(c2) / StrictMath.log(c1), "LOG") {
 
@@ -1337,7 +1378,8 @@ abstract class RoundBase(child: Expression, scale: Expression,
       > SELECT _FUNC_(2.5, 0);
        3
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "math_funcs")
 // scalastyle:on line.size.limit
 case class Round(child: Expression, scale: Expression)
   extends RoundBase(child, scale, BigDecimal.RoundingMode.HALF_UP, "ROUND_HALF_UP")
@@ -1358,7 +1400,8 @@ case class Round(child: Expression, scale: Expression)
       > SELECT _FUNC_(2.5, 0);
        2
   """,
-  since = "2.0.0")
+  since = "2.0.0",
+  group = "math_funcs")
 // scalastyle:on line.size.limit
 case class BRound(child: Expression, scale: Expression)
   extends RoundBase(child, scale, BigDecimal.RoundingMode.HALF_EVEN, "ROUND_HALF_EVEN")
@@ -1434,7 +1477,8 @@ object WidthBucket {
       > SELECT _FUNC_(-0.9, 5.2, 0.5, 2);
        3
   """,
-  since = "3.1.0")
+  since = "3.1.0",
+  group = "math_funcs")
 case class WidthBucket(
     value: Expression,
     minValue: Expression,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index 4e71c8c103889..34a64dddd30fa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -63,7 +63,8 @@ case class PrintToStderr(child: Expression) extends UnaryExpression {
        java.lang.RuntimeException
        custom error message
   """,
-  since = "3.1.0")
+  since = "3.1.0",
+  group = "misc_funcs")
 case class RaiseError(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
 
   override def foldable: Boolean = false
@@ -108,7 +109,8 @@ case class RaiseError(child: Expression) extends UnaryExpression with ImplicitCa
       > SELECT _FUNC_(0 < 1);
        NULL
   """,
-  since = "2.0.0")
+  since = "2.0.0",
+  group = "misc_funcs")
 case class AssertTrue(left: Expression, right: Expression, child: Expression)
   extends RuntimeReplaceable {
 
@@ -140,7 +142,8 @@ object AssertTrue {
       > SELECT _FUNC_();
        default
   """,
-  since = "1.6.0")
+  since = "1.6.0",
+  group = "misc_funcs")
 case class CurrentDatabase() extends LeafExpression with Unevaluable {
   override def dataType: DataType = StringType
   override def nullable: Boolean = false
@@ -157,7 +160,8 @@ case class CurrentDatabase() extends LeafExpression with Unevaluable {
       > SELECT _FUNC_();
        spark_catalog
   """,
-  since = "3.1.0")
+  since = "3.1.0",
+  group = "misc_funcs")
 case class CurrentCatalog() extends LeafExpression with Unevaluable {
   override def dataType: DataType = StringType
   override def nullable: Boolean = false
@@ -175,7 +179,8 @@ case class CurrentCatalog() extends LeafExpression with Unevaluable {
   note = """
     The function is non-deterministic.
   """,
-  since = "2.3.0")
+  since = "2.3.0",
+  group = "misc_funcs")
 // scalastyle:on line.size.limit
 case class Uuid(randomSeed: Option[Long] = None) extends LeafExpression with Stateful
     with ExpressionWithRandomSeed {
@@ -221,7 +226,8 @@ case class Uuid(randomSeed: Option[Long] = None) extends LeafExpression with Sta
       > SELECT _FUNC_();
        3.1.0 a6d6ea3efedbad14d99c24143834cd4e2e52fb40
   """,
-  since = "3.0.0")
+  since = "3.0.0",
+  group = "misc_funcs")
 // scalastyle:on line.size.limit
 case class SparkVersion() extends LeafExpression with CodegenFallback {
   override def nullable: Boolean = false
@@ -242,7 +248,8 @@ case class SparkVersion() extends LeafExpression with CodegenFallback {
       > SELECT _FUNC_(array(1));
        array<int>
   """,
-  since = "3.0.0")
+  since = "3.0.0",
+  group = "misc_funcs")
 case class TypeOf(child: Expression) extends UnaryExpression {
   override def nullable: Boolean = false
   override def foldable: Boolean = true
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
index 09ae2186b2429..4d7582fbd23b7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
@@ -43,7 +43,8 @@ import org.apache.spark.sql.types._
       > SELECT _FUNC_(NULL, 1, NULL);
        1
   """,
-  since = "1.0.0")
+  since = "1.0.0",
+  group = "conditional_funcs")
 // scalastyle:on line.size.limit
 case class Coalesce(children: Seq[Expression]) extends ComplexTypeMergingExpression {
 
@@ -129,7 +130,8 @@ case class Coalesce(children: Seq[Expression]) extends ComplexTypeMergingExpress
       > SELECT _FUNC_(NULL, array('2'));
        ["2"]
   """,
-  since = "2.0.0")
+  since = "2.0.0",
+  group = "conditional_funcs")
 case class IfNull(left: Expression, right: Expression, child: Expression)
   extends RuntimeReplaceable {
 
@@ -149,7 +151,8 @@ case class IfNull(left: Expression, right: Expression, child: Expression)
       > SELECT _FUNC_(2, 2);
        NULL
   """,
-  since = "2.0.0")
+  since = "2.0.0",
+  group = "conditional_funcs")
 case class NullIf(left: Expression, right: Expression, child: Expression)
   extends RuntimeReplaceable {
 
@@ -169,7 +172,8 @@ case class NullIf(left: Expression, right: Expression, child: Expression)
       > SELECT _FUNC_(NULL, array('2'));
        ["2"]
   """,
-  since = "2.0.0")
+  since = "2.0.0",
+  group = "conditional_funcs")
 case class Nvl(left: Expression, right: Expression, child: Expression) extends RuntimeReplaceable {
 
   def this(left: Expression, right: Expression) = {
@@ -189,7 +193,8 @@ case class Nvl(left: Expression, right: Expression, child: Expression) extends R
       > SELECT _FUNC_(NULL, 2, 1);
        1
   """,
-  since = "2.0.0")
+  since = "2.0.0",
+  group = "conditional_funcs")
 // scalastyle:on line.size.limit
 case class Nvl2(expr1: Expression, expr2: Expression, expr3: Expression, child: Expression)
   extends RuntimeReplaceable {
@@ -213,7 +218,8 @@ case class Nvl2(expr1: Expression, expr2: Expression, expr3: Expression, child:
       > SELECT _FUNC_(cast('NaN' as double));
        true
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "predicate_funcs")
 case class IsNaN(child: Expression) extends UnaryExpression
   with Predicate with ImplicitCastInputTypes {
 
@@ -256,7 +262,8 @@ case class IsNaN(child: Expression) extends UnaryExpression
       > SELECT _FUNC_(cast('NaN' as double), 123);
        123.0
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "conditional_funcs")
 case class NaNvl(left: Expression, right: Expression)
     extends BinaryExpression with ImplicitCastInputTypes {
 
@@ -317,7 +324,8 @@ case class NaNvl(left: Expression, right: Expression)
       > SELECT _FUNC_(1);
        false
   """,
-  since = "1.0.0")
+  since = "1.0.0",
+  group = "predicate_funcs")
 case class IsNull(child: Expression) extends UnaryExpression with Predicate {
   override def nullable: Boolean = false
 
@@ -344,7 +352,8 @@ case class IsNull(child: Expression) extends UnaryExpression with Predicate {
       > SELECT _FUNC_(1);
        true
   """,
-  since = "1.0.0")
+  since = "1.0.0",
+  group = "predicate_funcs")
 case class IsNotNull(child: Expression) extends UnaryExpression with Predicate {
   override def nullable: Boolean = false
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 53ac3560bc3b3..250d3fee94cb3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -255,7 +255,8 @@ trait PredicateHelper extends AliasHelper with Logging {
       > SELECT _FUNC_ NULL;
        NULL
   """,
-  since = "1.0.0")
+  since = "1.0.0",
+  group = "predicate_funcs")
 case class Not(child: Expression)
   extends UnaryExpression with Predicate with ImplicitCastInputTypes with NullIntolerant {
 
@@ -358,7 +359,8 @@ case class InSubquery(values: Seq[Expression], query: ListQuery)
       > SELECT named_struct('a', 1, 'b', 2) _FUNC_(named_struct('a', 1, 'b', 2), named_struct('a', 1, 'b', 3));
        true
   """,
-  since = "1.0.0")
+  since = "1.0.0",
+  group = "predicate_funcs")
 // scalastyle:on line.size.limit
 case class In(value: Expression, list: Seq[Expression]) extends Predicate {
 
@@ -594,7 +596,8 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with
       > SELECT false _FUNC_ NULL;
        false
   """,
-  since = "1.0.0")
+  since = "1.0.0",
+  group = "predicate_funcs")
 case class And(left: Expression, right: Expression) extends BinaryOperator with Predicate {
 
   override def inputType: AbstractDataType = BooleanType
@@ -676,7 +679,8 @@ case class And(left: Expression, right: Expression) extends BinaryOperator with
       > SELECT false _FUNC_ NULL;
        NULL
   """,
-  since = "1.0.0")
+  since = "1.0.0",
+  group = "predicate_funcs")
 case class Or(left: Expression, right: Expression) extends BinaryOperator with Predicate {
 
   override def inputType: AbstractDataType = BooleanType
@@ -810,7 +814,8 @@ object Equality {
       > SELECT NULL _FUNC_ NULL;
        NULL
   """,
-  since = "1.0.0")
+  since = "1.0.0",
+  group = "predicate_funcs")
 case class EqualTo(left: Expression, right: Expression)
     extends BinaryComparison with NullIntolerant {
 
@@ -854,7 +859,8 @@ case class EqualTo(left: Expression, right: Expression)
       > SELECT NULL _FUNC_ NULL;
        true
   """,
-  since = "1.1.0")
+  since = "1.1.0",
+  group = "predicate_funcs")
 case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComparison {
 
   override def symbol: String = "<=>"
@@ -912,7 +918,8 @@ case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComp
       > SELECT 1 _FUNC_ NULL;
        NULL
   """,
-  since = "1.0.0")
+  since = "1.0.0",
+  group = "predicate_funcs")
 case class LessThan(left: Expression, right: Expression)
     extends BinaryComparison with NullIntolerant {
 
@@ -943,7 +950,8 @@ case class LessThan(left: Expression, right: Expression)
       > SELECT 1 _FUNC_ NULL;
        NULL
   """,
-  since = "1.0.0")
+  since = "1.0.0",
+  group = "predicate_funcs")
 case class LessThanOrEqual(left: Expression, right: Expression)
     extends BinaryComparison with NullIntolerant {
 
@@ -974,7 +982,8 @@ case class LessThanOrEqual(left: Expression, right: Expression)
       > SELECT 1 _FUNC_ NULL;
        NULL
   """,
-  since = "1.0.0")
+  since = "1.0.0",
+  group = "predicate_funcs")
 case class GreaterThan(left: Expression, right: Expression)
     extends BinaryComparison with NullIntolerant {
 
@@ -1005,7 +1014,8 @@ case class GreaterThan(left: Expression, right: Expression)
       > SELECT 1 _FUNC_ NULL;
        NULL
   """,
-  since = "1.0.0")
+  since = "1.0.0",
+  group = "predicate_funcs")
 case class GreaterThanOrEqual(left: Expression, right: Expression)
     extends BinaryComparison with NullIntolerant {
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
index 6a945173803b7..0fa4d6c315041 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
@@ -81,7 +81,8 @@ trait ExpressionWithRandomSeed {
   note = """
     The function is non-deterministic in general case.
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "math_funcs")
 // scalastyle:on line.size.limit
 case class Rand(child: Expression, hideSeed: Boolean = false)
   extends RDG with ExpressionWithRandomSeed {
@@ -132,7 +133,8 @@ object Rand {
   note = """
     The function is non-deterministic in general case.
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "math_funcs")
 // scalastyle:on line.size.limit
 case class Randn(child: Expression, hideSeed: Boolean = false)
   extends RDG with ExpressionWithRandomSeed {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index 3a421f5075a6f..dae954a579eb3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -116,7 +116,8 @@ abstract class StringRegexExpression extends BinaryExpression
   note = """
     Use RLIKE to match with standard regular expressions.
   """,
-  since = "1.0.0")
+  since = "1.0.0",
+  group = "predicate_funcs")
 // scalastyle:on line.contains.tab
 case class Like(left: Expression, right: Expression, escapeChar: Char)
   extends StringRegexExpression {
@@ -358,7 +359,8 @@ case class NotLikeAny(child: Expression, patterns: Seq[UTF8String]) extends Like
   note = """
     Use LIKE to match with simple string pattern.
   """,
-  since = "1.0.0")
+  since = "1.0.0",
+  group = "predicate_funcs")
 // scalastyle:on line.contains.tab
 case class RLike(left: Expression, right: Expression) extends StringRegexExpression {
 
@@ -436,7 +438,8 @@ case class RLike(left: Expression, right: Expression) extends StringRegexExpress
       > SELECT _FUNC_('oneAtwoBthreeC', '[ABC]', 2);
        ["one","twoBthreeC"]
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 case class StringSplit(str: Expression, regex: Expression, limit: Expression)
   extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
@@ -494,7 +497,8 @@ case class StringSplit(str: Expression, regex: Expression, limit: Expression)
       > SELECT _FUNC_('100-200', '(\\d+)', 'num');
        num-num
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 // scalastyle:on line.size.limit
 case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expression, pos: Expression)
   extends QuaternaryExpression with ImplicitCastInputTypes with NullIntolerant {
@@ -687,7 +691,8 @@ abstract class RegExpExtractBase
       > SELECT _FUNC_('100-200', '(\\d+)-(\\d+)', 1);
        100
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 case class RegExpExtract(subject: Expression, regexp: Expression, idx: Expression)
   extends RegExpExtractBase {
   def this(s: Expression, r: Expression) = this(s, r, Literal(1))
@@ -787,7 +792,8 @@ case class RegExpExtract(subject: Expression, regexp: Expression, idx: Expressio
       > SELECT _FUNC_('100-200, 300-400', '(\\d+)-(\\d+)', 1);
        ["100","300"]
   """,
-  since = "3.1.0")
+  since = "3.1.0",
+  group = "string_funcs")
 case class RegExpExtractAll(subject: Expression, regexp: Expression, idx: Expression)
   extends RegExpExtractBase {
   def this(s: Expression, r: Expression) = this(s, r, Literal(1))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 0207b7b55c5af..6caf4395090f1 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -56,7 +56,8 @@ import org.apache.spark.unsafe.types.{ByteArray, UTF8String}
       > SELECT _FUNC_(' ', 'Spark', 'SQL');
         Spark SQL
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 // scalastyle:on line.size.limit
 case class ConcatWs(children: Seq[Expression])
   extends Expression with ImplicitCastInputTypes {
@@ -244,7 +245,8 @@ case class ConcatWs(children: Seq[Expression])
       > SELECT _FUNC_(1, 'scala', 'java');
        scala
   """,
-  since = "2.0.0")
+  since = "2.0.0",
+  group = "string_funcs")
 // scalastyle:on line.size.limit
 case class Elt(
     children: Seq[Expression],
@@ -389,7 +391,8 @@ trait String2StringExpression extends ImplicitCastInputTypes {
       > SELECT _FUNC_('SparkSql');
        SPARKSQL
   """,
-  since = "1.0.1")
+  since = "1.0.1",
+  group = "string_funcs")
 case class Upper(child: Expression)
   extends UnaryExpression with String2StringExpression with NullIntolerant {
 
@@ -412,7 +415,8 @@ case class Upper(child: Expression)
       > SELECT _FUNC_('SparkSql');
        sparksql
   """,
-  since = "1.0.1")
+  since = "1.0.1",
+  group = "string_funcs")
 case class Lower(child: Expression)
   extends UnaryExpression with String2StringExpression with NullIntolerant {
 
@@ -490,7 +494,8 @@ case class EndsWith(left: Expression, right: Expression) extends StringPredicate
       > SELECT _FUNC_('ABCabc', 'abc', 'DEF');
        ABCDEF
   """,
-  since = "2.3.0")
+  since = "2.3.0",
+  group = "string_funcs")
 // scalastyle:on line.size.limit
 case class StringReplace(srcExpr: Expression, searchExpr: Expression, replaceExpr: Expression)
   extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
@@ -570,7 +575,8 @@ object Overlay {
       > SELECT _FUNC_(encode('Spark SQL', 'utf-8') PLACING encode('tructured', 'utf-8') FROM 2 FOR 4);
        Structured SQL
   """,
-  since = "3.0.0")
+  since = "3.0.0",
+  group = "string_funcs")
 // scalastyle:on line.size.limit
 case class Overlay(input: Expression, replace: Expression, pos: Expression, len: Expression)
   extends QuaternaryExpression with ImplicitCastInputTypes with NullIntolerant {
@@ -657,7 +663,8 @@ object StringTranslate {
       > SELECT _FUNC_('AaBbCc', 'abc', '123');
        A1B2C3
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 // scalastyle:on line.size.limit
 case class StringTranslate(srcExpr: Expression, matchingExpr: Expression, replaceExpr: Expression)
   extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
@@ -722,7 +729,8 @@ case class StringTranslate(srcExpr: Expression, matchingExpr: Expression, replac
       > SELECT _FUNC_('ab','abc,b,ab,c,def');
        3
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 // scalastyle:on line.size.limit
 case class FindInSet(left: Expression, right: Expression) extends BinaryExpression
     with ImplicitCastInputTypes with NullIntolerant {
@@ -830,7 +838,8 @@ object StringTrim {
       > SELECT _FUNC_(TRAILING 'SL' FROM 'SSparkSQLS');
        SSparkSQ
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 case class StringTrim(
     srcStr: Expression,
     trimStr: Option[Expression] = None)
@@ -923,7 +932,8 @@ object StringTrimLeft {
       > SELECT _FUNC_('    SparkSQL   ');
        SparkSQL
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 case class StringTrimLeft(
     srcStr: Expression,
     trimStr: Option[Expression] = None)
@@ -1017,7 +1027,8 @@ object StringTrimRight {
       > SELECT _FUNC_('    SparkSQL   ');
        SparkSQL
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 // scalastyle:on line.size.limit
 case class StringTrimRight(
     srcStr: Expression,
@@ -1094,7 +1105,8 @@ case class StringTrimRight(
       > SELECT _FUNC_('SparkSQL', 'SQL');
        6
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 // scalastyle:on line.size.limit
 case class StringInstr(str: Expression, substr: Expression)
   extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
@@ -1136,7 +1148,8 @@ case class StringInstr(str: Expression, substr: Expression)
       > SELECT _FUNC_('www.apache.org', '.', 2);
        www.apache
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 // scalastyle:on line.size.limit
 case class SubstringIndex(strExpr: Expression, delimExpr: Expression, countExpr: Expression)
  extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
@@ -1176,7 +1189,8 @@ case class SubstringIndex(strExpr: Expression, delimExpr: Expression, countExpr:
       > SELECT POSITION('bar' IN 'foobarbar');
        4
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 // scalastyle:on line.size.limit
 case class StringLocate(substr: Expression, str: Expression, start: Expression)
   extends TernaryExpression with ImplicitCastInputTypes {
@@ -1266,7 +1280,8 @@ case class StringLocate(substr: Expression, str: Expression, start: Expression)
       > SELECT _FUNC_('hi', 5);
           hi
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 case class StringLPad(str: Expression, len: Expression, pad: Expression = Literal(" "))
   extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
@@ -1307,7 +1322,8 @@ case class StringLPad(str: Expression, len: Expression, pad: Expression = Litera
       > SELECT _FUNC_('hi', 5);
        hi
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 case class StringRPad(str: Expression, len: Expression, pad: Expression = Literal(" "))
   extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
@@ -1357,7 +1373,8 @@ object ParseUrl {
       > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'QUERY', 'query');
        1
   """,
-  since = "2.0.0")
+  since = "2.0.0",
+  group = "string_funcs")
 case class ParseUrl(children: Seq[Expression], failOnError: Boolean = SQLConf.get.ansiEnabled)
   extends Expression with ExpectsInputTypes with CodegenFallback {
   def this(children: Seq[Expression]) = this(children, SQLConf.get.ansiEnabled)
@@ -1512,7 +1529,8 @@ case class ParseUrl(children: Seq[Expression], failOnError: Boolean = SQLConf.ge
       > SELECT _FUNC_("Hello World %d %s", 100, "days");
        Hello World 100 days
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 // scalastyle:on line.size.limit
 case class FormatString(children: Expression*) extends Expression with ImplicitCastInputTypes {
 
@@ -1601,7 +1619,8 @@ case class FormatString(children: Expression*) extends Expression with ImplicitC
       > SELECT _FUNC_('sPark sql');
        Spark Sql
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 case class InitCap(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
@@ -1628,7 +1647,8 @@ case class InitCap(child: Expression)
       > SELECT _FUNC_('123', 2);
        123123
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 case class StringRepeat(str: Expression, times: Expression)
   extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
@@ -1658,7 +1678,8 @@ case class StringRepeat(str: Expression, times: Expression)
       > SELECT concat(_FUNC_(2), '1');
          1
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 case class StringSpace(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
@@ -1706,7 +1727,8 @@ case class StringSpace(child: Expression)
       > SELECT _FUNC_('Spark SQL' FROM 5 FOR 1);
        k
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 // scalastyle:on line.size.limit
 case class Substring(str: Expression, pos: Expression, len: Expression)
   extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
@@ -1753,7 +1775,8 @@ case class Substring(str: Expression, pos: Expression, len: Expression)
       > SELECT _FUNC_('Spark SQL', 3);
        SQL
   """,
-  since = "2.3.0")
+  since = "2.3.0",
+  group = "string_funcs")
 // scalastyle:on line.size.limit
 case class Right(str: Expression, len: Expression, child: Expression) extends RuntimeReplaceable {
   def this(str: Expression, len: Expression) = {
@@ -1776,7 +1799,8 @@ case class Right(str: Expression, len: Expression, child: Expression) extends Ru
       > SELECT _FUNC_('Spark SQL', 3);
        Spa
   """,
-  since = "2.3.0")
+  since = "2.3.0",
+  group = "string_funcs")
 // scalastyle:on line.size.limit
 case class Left(str: Expression, len: Expression, child: Expression) extends RuntimeReplaceable {
   def this(str: Expression, len: Expression) = {
@@ -1803,7 +1827,8 @@ case class Left(str: Expression, len: Expression, child: Expression) extends Run
       > SELECT CHARACTER_LENGTH('Spark SQL ');
        10
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 // scalastyle:on line.size.limit
 case class Length(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
@@ -1833,7 +1858,8 @@ case class Length(child: Expression)
       > SELECT _FUNC_('Spark SQL');
        72
   """,
-  since = "2.3.0")
+  since = "2.3.0",
+  group = "string_funcs")
 case class BitLength(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
   override def dataType: DataType = IntegerType
@@ -1865,7 +1891,8 @@ case class BitLength(child: Expression)
       > SELECT _FUNC_('Spark SQL');
        9
   """,
-  since = "2.3.0")
+  since = "2.3.0",
+  group = "string_funcs")
 case class OctetLength(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
   override def dataType: DataType = IntegerType
@@ -1896,7 +1923,8 @@ case class OctetLength(child: Expression)
       > SELECT _FUNC_('kitten', 'sitting');
        3
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 case class Levenshtein(left: Expression, right: Expression) extends BinaryExpression
     with ImplicitCastInputTypes with NullIntolerant {
 
@@ -1922,7 +1950,8 @@ case class Levenshtein(left: Expression, right: Expression) extends BinaryExpres
       > SELECT _FUNC_('Miller');
        M460
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 case class SoundEx(child: Expression)
   extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
 
@@ -1949,7 +1978,8 @@ case class SoundEx(child: Expression)
       > SELECT _FUNC_(2);
        50
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 case class Ascii(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
@@ -1991,7 +2021,8 @@ case class Ascii(child: Expression)
       > SELECT _FUNC_(65);
        A
   """,
-  since = "2.3.0")
+  since = "2.3.0",
+  group = "string_funcs")
 // scalastyle:on line.size.limit
 case class Chr(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
@@ -2036,7 +2067,8 @@ case class Chr(child: Expression)
       > SELECT _FUNC_('Spark SQL');
        U3BhcmsgU1FM
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 case class Base64(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
@@ -2065,7 +2097,8 @@ case class Base64(child: Expression)
       > SELECT _FUNC_('U3BhcmsgU1FM');
        Spark SQL
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 case class UnBase64(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
@@ -2130,7 +2163,8 @@ object Decode {
       > SELECT _FUNC_(6, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle');
        NULL
   """,
-  since = "3.2.0")
+  since = "3.2.0",
+  group = "string_funcs")
 // scalastyle:on line.size.limit
 case class Decode(params: Seq[Expression], child: Expression) extends RuntimeReplaceable {
 
@@ -2155,7 +2189,8 @@ case class Decode(params: Seq[Expression], child: Expression) extends RuntimeRep
       > SELECT _FUNC_(encode('abc', 'utf-8'), 'utf-8');
        abc
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 // scalastyle:on line.size.limit
 case class StringDecode(bin: Expression, charset: Expression)
   extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
@@ -2195,7 +2230,8 @@ case class StringDecode(bin: Expression, charset: Expression)
       > SELECT _FUNC_('abc', 'utf-8');
        abc
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 // scalastyle:on line.size.limit
 case class Encode(value: Expression, charset: Expression)
   extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
@@ -2240,7 +2276,8 @@ case class Encode(value: Expression, charset: Expression)
       > SELECT _FUNC_(12332.123456, '##################.###');
        12332.123
   """,
-  since = "1.5.0")
+  since = "1.5.0",
+  group = "string_funcs")
 case class FormatNumber(x: Expression, d: Expression)
   extends BinaryExpression with ExpectsInputTypes with NullIntolerant {
 
@@ -2411,7 +2448,8 @@ case class FormatNumber(x: Expression, d: Expression)
       > SELECT _FUNC_('Hi there! Good morning.');
        [["Hi","there"],["Good","morning"]]
   """,
-  since = "2.0.0")
+  since = "2.0.0",
+  group = "string_funcs")
 case class Sentences(
     str: Expression,
     language: Expression = Literal(""),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
index 5f10667c55d79..b8fc830f18183 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
@@ -64,7 +64,8 @@ abstract class XPathExtract
       > SELECT _FUNC_('<a><b>1</b></a>','a/b');
        true
   """,
-  since = "2.0.0")
+  since = "2.0.0",
+  group = "xml_funcs")
 // scalastyle:on line.size.limit
 case class XPathBoolean(xml: Expression, path: Expression) extends XPathExtract {
 
@@ -84,7 +85,8 @@ case class XPathBoolean(xml: Expression, path: Expression) extends XPathExtract
       > SELECT _FUNC_('<a><b>1</b><b>2</b></a>', 'sum(a/b)');
        3
   """,
-  since = "2.0.0")
+  since = "2.0.0",
+  group = "xml_funcs")
 // scalastyle:on line.size.limit
 case class XPathShort(xml: Expression, path: Expression) extends XPathExtract {
   override def prettyName: String = "xpath_short"
@@ -104,7 +106,8 @@ case class XPathShort(xml: Expression, path: Expression) extends XPathExtract {
       > SELECT _FUNC_('<a><b>1</b><b>2</b></a>', 'sum(a/b)');
        3
   """,
-  since = "2.0.0")
+  since = "2.0.0",
+  group = "xml_funcs")
 // scalastyle:on line.size.limit
 case class XPathInt(xml: Expression, path: Expression) extends XPathExtract {
   override def prettyName: String = "xpath_int"
@@ -124,7 +127,8 @@ case class XPathInt(xml: Expression, path: Expression) extends XPathExtract {
       > SELECT _FUNC_('<a><b>1</b><b>2</b></a>', 'sum(a/b)');
        3
   """,
-  since = "2.0.0")
+  since = "2.0.0",
+  group = "xml_funcs")
 // scalastyle:on line.size.limit
 case class XPathLong(xml: Expression, path: Expression) extends XPathExtract {
   override def prettyName: String = "xpath_long"
@@ -144,7 +148,8 @@ case class XPathLong(xml: Expression, path: Expression) extends XPathExtract {
       > SELECT _FUNC_('<a><b>1</b><b>2</b></a>', 'sum(a/b)');
        3.0
   """,
-  since = "2.0.0")
+  since = "2.0.0",
+  group = "xml_funcs")
 // scalastyle:on line.size.limit
 case class XPathFloat(xml: Expression, path: Expression) extends XPathExtract {
   override def prettyName: String = "xpath_float"
@@ -164,7 +169,8 @@ case class XPathFloat(xml: Expression, path: Expression) extends XPathExtract {
       > SELECT _FUNC_('<a><b>1</b><b>2</b></a>', 'sum(a/b)');
        3.0
   """,
-  since = "2.0.0")
+  since = "2.0.0",
+  group = "xml_funcs")
 // scalastyle:on line.size.limit
 case class XPathDouble(xml: Expression, path: Expression) extends XPathExtract {
   override def prettyName: String =
@@ -185,7 +191,8 @@ case class XPathDouble(xml: Expression, path: Expression) extends XPathExtract {
       > SELECT _FUNC_('<a><b>b</b><c>cc</c></a>','a/c');
        cc
   """,
-  since = "2.0.0")
+  since = "2.0.0",
+  group = "xml_funcs")
 // scalastyle:on line.size.limit
 case class XPathString(xml: Expression, path: Expression) extends XPathExtract {
   override def prettyName: String = "xpath_string"
@@ -205,7 +212,8 @@ case class XPathString(xml: Expression, path: Expression) extends XPathExtract {
       > SELECT _FUNC_('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>','a/b/text()');
        ["b1","b2","b3"]
   """,
-  since = "2.0.0")
+  since = "2.0.0",
+  group = "xml_funcs")
 // scalastyle:on line.size.limit
 case class XPathList(xml: Expression, path: Expression) extends XPathExtract {
   override def prettyName: String = "xpath"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
index 6085c1f2cccb0..438fd2351ab9f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
@@ -43,6 +43,10 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
   test("group info in ExpressionInfo") {
     val info = spark.sessionState.catalog.lookupFunctionInfo(FunctionIdentifier("sum"))
     assert(info.getGroup === "agg_funcs")
+    Seq("agg_funcs", "array_funcs", "binary_funcs", "bitwise_funcs", "collection_funcs",
+      "predicate_funcs", "conditional_funcs", "conversion_funcs", "csv_funcs", "datetime_funcs",
+      "generator_funcs", "hash_funcs", "json_funcs", "lambda_funcs", "map_funcs", "math_funcs",
+      "misc_funcs", "string_funcs", "struct_funcs", "window_funcs", "xml_funcs")
 
     Seq("agg_funcs", "array_funcs", "datetime_funcs", "json_funcs", "map_funcs", "window_funcs")
         .foreach { groupName =>
@@ -106,7 +110,7 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
   }
 
   test("SPARK-32870: Default expressions in FunctionRegistry should have their " +
-    "usage, examples and since filled") {
+    "usage, examples, since, and group filled") {
     val ignoreSet = Set(
       // Explicitly inherits NonSQLExpression, and has no ExpressionDescription
       "org.apache.spark.sql.catalyst.expressions.TimeWindow",
@@ -121,6 +125,7 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
           assert(info.getExamples.startsWith("\n    Examples:\n"))
           assert(info.getExamples.endsWith("\n  "))
           assert(info.getSince.matches("[0-9]+\\.[0-9]+\\.[0-9]+"))
+          assert(info.getGroup.nonEmpty)
 
           if (info.getArguments.nonEmpty) {
             assert(info.getArguments.startsWith("\n    Arguments:\n"))

From 661ac10901dcdf7d7bd87ef9487f7a045b786573 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 21 Dec 2020 14:06:31 +0000
Subject: [PATCH 0835/1009] [SPARK-33838][SQL][DOCS] Comment the `PURGE` option
 in the DropTable and in AlterTableDropPartition commands

### What changes were proposed in this pull request?
Add comments for the `PURGE` option to the logical nodes `DropTable` and `AlterTableDropPartition`.

### Why are the changes needed?
To improve code maintenance.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running `./dev/scalastyle`

Closes #30837 from MaxGekk/comment-purge-logical-node.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/plans/logical/v2Commands.scala     | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index 87d81d5330574..b3b538ac8b327 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -410,6 +410,14 @@ case class Assignment(key: Expression, value: Expression) extends Expression wit
 
 /**
  * The logical plan of the DROP TABLE command.
+ *
+ * If the `PURGE` option is set, the table catalog must remove table data by skipping the trash
+ * even when the catalog has configured one. The option is applicable only for managed tables.
+ *
+ * The syntax of this command is:
+ * {{{
+ *     DROP TABLE [IF EXISTS] table [PURGE];
+ * }}}
  */
 case class DropTable(
     child: LogicalPlan,
@@ -657,9 +665,12 @@ case class AlterTableAddPartition(
  * The logical plan of the ALTER TABLE DROP PARTITION command.
  * This may remove the data and metadata for this partition.
  *
+ * If the `PURGE` option is set, the table catalog must remove partition data by skipping the trash
+ * even when the catalog has configured one. The option is applicable only for managed tables.
+ *
  * The syntax of this command is:
  * {{{
- *     ALTER TABLE table DROP [IF EXISTS] PARTITION spec1[, PARTITION spec2, ...];
+ *     ALTER TABLE table DROP [IF EXISTS] PARTITION spec1[, PARTITION spec2, ...] [PURGE];
  * }}}
  */
 case class AlterTableDropPartition(

From 1c7760568263235eaa363e8c650c67132c3dcd7a Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Mon, 21 Dec 2020 10:25:23 -0800
Subject: [PATCH 0836/1009] [SPARK-33848][SQL] Push the UnaryExpression into
 (if / case) branches

### What changes were proposed in this pull request?

This pr push the `UnaryExpression` into (if / case) branches. The use case is:
```sql
create table t1 using parquet as select id from range(10);
explain select id from t1 where (CASE WHEN id = 1 THEN '1' WHEN id = 3 THEN '2' end) > 3;
```

Before this pr:
```
== Physical Plan ==
*(1) Filter (cast(CASE WHEN (id#1L = 1) THEN 1 WHEN (id#1L = 3) THEN 2 END as int) > 3)
+- *(1) ColumnarToRow
   +- FileScan parquet default.t1[id#1L] Batched: true, DataFilters: [(cast(CASE WHEN (id#1L = 1) THEN 1 WHEN (id#1L = 3) THEN 2 END as int) > 3)], Format: Parquet, Location: InMemoryFileIndex[file:/Users/yumwang/opensource/spark/spark-warehouse/org.apache.spark.sql.DataF..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:bigint>

```

After this pr:
```
== Physical Plan ==
LocalTableScan <empty>, [id#1L]
```

This change can also improve this case:
https://github.com/apache/spark/blob/a78d6ce376edf2a8836e01f47b9dff5371058d4c/sql/core/src/test/resources/tpcds/q62.sql#L5-L22

### Why are the changes needed?

Improve query performance.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #30853 from wangyum/SPARK-33848.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/catalyst/expressions/Expression.scala |  6 +++
 .../sql/catalyst/optimizer/expressions.scala  | 29 ++++++++++----
 .../PushFoldableIntoBranchesSuite.scala       | 39 ++++++++++++++++++-
 .../approved-plans-v1_4/q21.sf100/explain.txt | 12 +++---
 .../q21.sf100/simplified.txt                  |  2 +-
 .../approved-plans-v1_4/q21/explain.txt       | 12 +++---
 .../approved-plans-v1_4/q21/simplified.txt    |  2 +-
 .../approved-plans-v1_4/q50.sf100/explain.txt | 14 +++----
 .../q50.sf100/simplified.txt                  |  2 +-
 .../approved-plans-v1_4/q50/explain.txt       | 10 ++---
 .../approved-plans-v1_4/q50/simplified.txt    |  2 +-
 .../approved-plans-v1_4/q62.sf100/explain.txt | 10 ++---
 .../q62.sf100/simplified.txt                  |  2 +-
 .../approved-plans-v1_4/q62/explain.txt       | 10 ++---
 .../approved-plans-v1_4/q62/simplified.txt    |  2 +-
 .../approved-plans-v1_4/q97.sf100/explain.txt | 14 +++----
 .../q97.sf100/simplified.txt                  |  2 +-
 .../approved-plans-v1_4/q97/explain.txt       | 14 +++----
 .../approved-plans-v1_4/q97/simplified.txt    |  2 +-
 .../approved-plans-v1_4/q99.sf100/explain.txt | 10 ++---
 .../q99.sf100/simplified.txt                  |  2 +-
 .../approved-plans-v1_4/q99/explain.txt       | 10 ++---
 .../approved-plans-v1_4/q99/simplified.txt    |  2 +-
 23 files changed, 133 insertions(+), 77 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 65f89bbdd0599..1d316bcf811d7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -536,6 +536,12 @@ abstract class UnaryExpression extends Expression {
   }
 }
 
+
+object UnaryExpression {
+  def unapply(e: UnaryExpression): Option[Expression] = Some(e.child)
+}
+
+
 /**
  * An expression with two inputs and one output. The output is by default evaluated to null
  * if any input is evaluated to null.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index ac2caaeb15357..47b968f6ebdd7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -544,29 +544,42 @@ object PushFoldableIntoBranches extends Rule[LogicalPlan] with PredicateHelper {
 
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     case q: LogicalPlan => q transformExpressionsUp {
+      case a: Alias => a // Skip an alias.
+      case u @ UnaryExpression(i @ If(_, trueValue, falseValue))
+          if atMostOneUnfoldable(Seq(trueValue, falseValue)) =>
+        i.copy(
+          trueValue = u.withNewChildren(Array(trueValue)),
+          falseValue = u.withNewChildren(Array(falseValue)))
+
+      case u @ UnaryExpression(c @ CaseWhen(branches, elseValue))
+          if atMostOneUnfoldable(branches.map(_._2) ++ elseValue) =>
+        c.copy(
+          branches.map(e => e.copy(_2 = u.withNewChildren(Array(e._2)))),
+          elseValue.map(e => u.withNewChildren(Array(e))))
+
       case b @ BinaryExpression(i @ If(_, trueValue, falseValue), right)
           if right.foldable && atMostOneUnfoldable(Seq(trueValue, falseValue)) =>
         i.copy(
-          trueValue = b.makeCopy(Array(trueValue, right)),
-          falseValue = b.makeCopy(Array(falseValue, right)))
+          trueValue = b.withNewChildren(Array(trueValue, right)),
+          falseValue = b.withNewChildren(Array(falseValue, right)))
 
       case b @ BinaryExpression(left, i @ If(_, trueValue, falseValue))
           if left.foldable && atMostOneUnfoldable(Seq(trueValue, falseValue)) =>
         i.copy(
-          trueValue = b.makeCopy(Array(left, trueValue)),
-          falseValue = b.makeCopy(Array(left, falseValue)))
+          trueValue = b.withNewChildren(Array(left, trueValue)),
+          falseValue = b.withNewChildren(Array(left, falseValue)))
 
       case b @ BinaryExpression(c @ CaseWhen(branches, elseValue), right)
           if right.foldable && atMostOneUnfoldable(branches.map(_._2) ++ elseValue) =>
         c.copy(
-          branches.map(e => e.copy(_2 = b.makeCopy(Array(e._2, right)))),
-          elseValue.map(e => b.makeCopy(Array(e, right))))
+          branches.map(e => e.copy(_2 = b.withNewChildren(Array(e._2, right)))),
+          elseValue.map(e => b.withNewChildren(Array(e, right))))
 
       case b @ BinaryExpression(left, c @ CaseWhen(branches, elseValue))
           if left.foldable && atMostOneUnfoldable(branches.map(_._2) ++ elseValue) =>
         c.copy(
-          branches.map(e => e.copy(_2 = b.makeCopy(Array(left, e._2)))),
-          elseValue.map(e => b.makeCopy(Array(left, e))))
+          branches.map(e => e.copy(_2 = b.withNewChildren(Array(left, e._2)))),
+          elseValue.map(e => b.withNewChildren(Array(left, e))))
     }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala
index de4f4be8ec333..02307a52ebb89 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLite
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
-import org.apache.spark.sql.types.{BooleanType, IntegerType}
+import org.apache.spark.sql.types.{BooleanType, IntegerType, StringType}
 
 
 class PushFoldableIntoBranchesSuite
@@ -221,4 +221,41 @@ class PushFoldableIntoBranchesSuite
     assertEquivalent(EqualTo(Literal(4), ifExp), FalseLiteral)
     assertEquivalent(EqualTo(Literal(4), caseWhen), FalseLiteral)
   }
+
+  test("SPARK-33848: Push down cast through If/CaseWhen") {
+    assertEquivalent(If(a, Literal(2), Literal(3)).cast(StringType),
+      If(a, Literal("2"), Literal("3")))
+    assertEquivalent(If(a, b, Literal(3)).cast(StringType),
+      If(a, b.cast(StringType), Literal("3")))
+    assertEquivalent(If(a, b, b + 1).cast(StringType),
+      If(a, b, b + 1).cast(StringType))
+
+    assertEquivalent(
+      CaseWhen(Seq((a, Literal(1))), Some(Literal(3))).cast(StringType),
+      CaseWhen(Seq((a, Literal("1"))), Some(Literal("3"))))
+    assertEquivalent(
+      CaseWhen(Seq((a, Literal(1))), Some(b)).cast(StringType),
+      CaseWhen(Seq((a, Literal("1"))), Some(b.cast(StringType))))
+    assertEquivalent(
+      CaseWhen(Seq((a, b)), Some(b + 1)).cast(StringType),
+      CaseWhen(Seq((a, b)), Some(b + 1)).cast(StringType))
+  }
+
+  test("SPARK-33848: Push down abs through If/CaseWhen") {
+    assertEquivalent(Abs(If(a, Literal(-2), Literal(-3))), If(a, Literal(2), Literal(3)))
+    assertEquivalent(
+      Abs(CaseWhen(Seq((a, Literal(-1))), Some(Literal(-3)))),
+      CaseWhen(Seq((a, Literal(1))), Some(Literal(3))))
+  }
+
+  test("SPARK-33848: Push down cast with binary expression through If/CaseWhen") {
+    assertEquivalent(EqualTo(If(a, Literal(2), Literal(3)).cast(StringType), Literal("4")),
+      FalseLiteral)
+    assertEquivalent(
+      EqualTo(CaseWhen(Seq((a, Literal(1))), Some(Literal(3))).cast(StringType), Literal("4")),
+      FalseLiteral)
+    assertEquivalent(
+      EqualTo(CaseWhen(Seq((a, Literal(1)), (c, Literal(2))), None).cast(StringType), Literal("4")),
+      CaseWhen(Seq((a, FalseLiteral), (c, FalseLiteral)), None))
+  }
 }
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/explain.txt
index 9de369f611d0e..094e7aac5cbbd 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/explain.txt
@@ -130,24 +130,24 @@ Input [6]: [inv_warehouse_sk#3, inv_quantity_on_hand#4, i_item_id#6, d_date#10,
 (23) HashAggregate [codegen id : 4]
 Input [4]: [inv_quantity_on_hand#4, w_warehouse_name#13, i_item_id#6, d_date#10]
 Keys [2]: [w_warehouse_name#13, i_item_id#6]
-Functions [2]: [partial_sum(cast(CASE WHEN (d_date#10 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (d_date#10 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))]
+Functions [2]: [partial_sum(CASE WHEN (d_date#10 < 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END), partial_sum(CASE WHEN (d_date#10 >= 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END)]
 Aggregate Attributes [2]: [sum#15, sum#16]
 Results [4]: [w_warehouse_name#13, i_item_id#6, sum#17, sum#18]
 
 (24) Exchange
 Input [4]: [w_warehouse_name#13, i_item_id#6, sum#17, sum#18]
-Arguments: hashpartitioning(w_warehouse_name#13, i_item_id#6, 5), true, [id=#19]
+Arguments: hashpartitioning(w_warehouse_name#13, i_item_id#6, 5), ENSURE_REQUIREMENTS, [id=#19]
 
 (25) HashAggregate [codegen id : 5]
 Input [4]: [w_warehouse_name#13, i_item_id#6, sum#17, sum#18]
 Keys [2]: [w_warehouse_name#13, i_item_id#6]
-Functions [2]: [sum(cast(CASE WHEN (d_date#10 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint)), sum(cast(CASE WHEN (d_date#10 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))]
-Aggregate Attributes [2]: [sum(cast(CASE WHEN (d_date#10 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#20, sum(cast(CASE WHEN (d_date#10 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#21]
-Results [4]: [w_warehouse_name#13, i_item_id#6, sum(cast(CASE WHEN (d_date#10 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#20 AS inv_before#22, sum(cast(CASE WHEN (d_date#10 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#21 AS inv_after#23]
+Functions [2]: [sum(CASE WHEN (d_date#10 < 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END), sum(CASE WHEN (d_date#10 >= 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END)]
+Aggregate Attributes [2]: [sum(CASE WHEN (d_date#10 < 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END)#20, sum(CASE WHEN (d_date#10 >= 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END)#21]
+Results [4]: [w_warehouse_name#13, i_item_id#6, sum(CASE WHEN (d_date#10 < 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END)#20 AS inv_before#22, sum(CASE WHEN (d_date#10 >= 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END)#21 AS inv_after#23]
 
 (26) Filter [codegen id : 5]
 Input [4]: [w_warehouse_name#13, i_item_id#6, inv_before#22, inv_after#23]
-Condition : ((CASE WHEN (inv_before#22 > 0) THEN (cast(inv_after#23 as double) / cast(inv_before#22 as double)) ELSE null END >= 0.666667) AND (CASE WHEN (inv_before#22 > 0) THEN (cast(inv_after#23 as double) / cast(inv_before#22 as double)) ELSE null END <= 1.5))
+Condition : (CASE WHEN (inv_before#22 > 0) THEN ((cast(inv_after#23 as double) / cast(inv_before#22 as double)) >= 0.666667) ELSE false END AND CASE WHEN (inv_before#22 > 0) THEN ((cast(inv_after#23 as double) / cast(inv_before#22 as double)) <= 1.5) ELSE false END)
 
 (27) TakeOrderedAndProject
 Input [4]: [w_warehouse_name#13, i_item_id#6, inv_before#22, inv_after#23]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/simplified.txt
index 0ee47d05af65b..3da4f967ccbd3 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/simplified.txt
@@ -1,7 +1,7 @@
 TakeOrderedAndProject [w_warehouse_name,i_item_id,inv_before,inv_after]
   WholeStageCodegen (5)
     Filter [inv_before,inv_after]
-      HashAggregate [w_warehouse_name,i_item_id,sum,sum] [sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),inv_before,inv_after,sum,sum]
+      HashAggregate [w_warehouse_name,i_item_id,sum,sum] [sum(CASE WHEN (d_date < 11027) THEN cast(inv_quantity_on_hand as bigint) ELSE 0 END),sum(CASE WHEN (d_date >= 11027) THEN cast(inv_quantity_on_hand as bigint) ELSE 0 END),inv_before,inv_after,sum,sum]
         InputAdapter
           Exchange [w_warehouse_name,i_item_id] #1
             WholeStageCodegen (4)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt
index 788d1affde1b8..8edf52683fe7d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt
@@ -130,24 +130,24 @@ Input [6]: [inv_date_sk#1, inv_quantity_on_hand#4, w_warehouse_name#6, i_item_id
 (23) HashAggregate [codegen id : 4]
 Input [4]: [inv_quantity_on_hand#4, w_warehouse_name#6, i_item_id#9, d_date#13]
 Keys [2]: [w_warehouse_name#6, i_item_id#9]
-Functions [2]: [partial_sum(cast(CASE WHEN (d_date#13 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (d_date#13 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))]
+Functions [2]: [partial_sum(CASE WHEN (d_date#13 < 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END), partial_sum(CASE WHEN (d_date#13 >= 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END)]
 Aggregate Attributes [2]: [sum#15, sum#16]
 Results [4]: [w_warehouse_name#6, i_item_id#9, sum#17, sum#18]
 
 (24) Exchange
 Input [4]: [w_warehouse_name#6, i_item_id#9, sum#17, sum#18]
-Arguments: hashpartitioning(w_warehouse_name#6, i_item_id#9, 5), true, [id=#19]
+Arguments: hashpartitioning(w_warehouse_name#6, i_item_id#9, 5), ENSURE_REQUIREMENTS, [id=#19]
 
 (25) HashAggregate [codegen id : 5]
 Input [4]: [w_warehouse_name#6, i_item_id#9, sum#17, sum#18]
 Keys [2]: [w_warehouse_name#6, i_item_id#9]
-Functions [2]: [sum(cast(CASE WHEN (d_date#13 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint)), sum(cast(CASE WHEN (d_date#13 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))]
-Aggregate Attributes [2]: [sum(cast(CASE WHEN (d_date#13 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#20, sum(cast(CASE WHEN (d_date#13 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#21]
-Results [4]: [w_warehouse_name#6, i_item_id#9, sum(cast(CASE WHEN (d_date#13 < 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#20 AS inv_before#22, sum(cast(CASE WHEN (d_date#13 >= 11027) THEN inv_quantity_on_hand#4 ELSE 0 END as bigint))#21 AS inv_after#23]
+Functions [2]: [sum(CASE WHEN (d_date#13 < 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END), sum(CASE WHEN (d_date#13 >= 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END)]
+Aggregate Attributes [2]: [sum(CASE WHEN (d_date#13 < 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END)#20, sum(CASE WHEN (d_date#13 >= 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END)#21]
+Results [4]: [w_warehouse_name#6, i_item_id#9, sum(CASE WHEN (d_date#13 < 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END)#20 AS inv_before#22, sum(CASE WHEN (d_date#13 >= 11027) THEN cast(inv_quantity_on_hand#4 as bigint) ELSE 0 END)#21 AS inv_after#23]
 
 (26) Filter [codegen id : 5]
 Input [4]: [w_warehouse_name#6, i_item_id#9, inv_before#22, inv_after#23]
-Condition : ((CASE WHEN (inv_before#22 > 0) THEN (cast(inv_after#23 as double) / cast(inv_before#22 as double)) ELSE null END >= 0.666667) AND (CASE WHEN (inv_before#22 > 0) THEN (cast(inv_after#23 as double) / cast(inv_before#22 as double)) ELSE null END <= 1.5))
+Condition : (CASE WHEN (inv_before#22 > 0) THEN ((cast(inv_after#23 as double) / cast(inv_before#22 as double)) >= 0.666667) ELSE false END AND CASE WHEN (inv_before#22 > 0) THEN ((cast(inv_after#23 as double) / cast(inv_before#22 as double)) <= 1.5) ELSE false END)
 
 (27) TakeOrderedAndProject
 Input [4]: [w_warehouse_name#6, i_item_id#9, inv_before#22, inv_after#23]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt
index 9b5483bd7191b..b9729a8c80968 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt
@@ -1,7 +1,7 @@
 TakeOrderedAndProject [w_warehouse_name,i_item_id,inv_before,inv_after]
   WholeStageCodegen (5)
     Filter [inv_before,inv_after]
-      HashAggregate [w_warehouse_name,i_item_id,sum,sum] [sum(cast(CASE WHEN (d_date < 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),sum(cast(CASE WHEN (d_date >= 11027) THEN inv_quantity_on_hand ELSE 0 END as bigint)),inv_before,inv_after,sum,sum]
+      HashAggregate [w_warehouse_name,i_item_id,sum,sum] [sum(CASE WHEN (d_date < 11027) THEN cast(inv_quantity_on_hand as bigint) ELSE 0 END),sum(CASE WHEN (d_date >= 11027) THEN cast(inv_quantity_on_hand as bigint) ELSE 0 END),inv_before,inv_after,sum,sum]
         InputAdapter
           Exchange [w_warehouse_name,i_item_id] #1
             WholeStageCodegen (4)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/explain.txt
index 741ee50f800ec..69678ef86a0fc 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/explain.txt
@@ -106,7 +106,7 @@ Input [16]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, s
 
 (16) Exchange
 Input [14]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18]
-Arguments: hashpartitioning(cast(ss_ticket_number#5 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_customer_sk#3 as bigint), 5), true, [id=#20]
+Arguments: hashpartitioning(cast(ss_ticket_number#5 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_customer_sk#3 as bigint), 5), ENSURE_REQUIREMENTS, [id=#20]
 
 (17) Sort [codegen id : 4]
 Input [14]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18]
@@ -159,7 +159,7 @@ Input [5]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_
 
 (28) Exchange
 Input [4]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24]
-Arguments: hashpartitioning(sr_ticket_number#24, sr_item_sk#22, sr_customer_sk#23, 5), true, [id=#29]
+Arguments: hashpartitioning(sr_ticket_number#24, sr_item_sk#22, sr_customer_sk#23, 5), ENSURE_REQUIREMENTS, [id=#29]
 
 (29) Sort [codegen id : 7]
 Input [4]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24]
@@ -177,20 +177,20 @@ Input [18]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_ticket_number
 (32) HashAggregate [codegen id : 8]
 Input [12]: [ss_sold_date_sk#1, sr_returned_date_sk#21, s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18]
 Keys [10]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18]
-Functions [5]: [partial_sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))]
+Functions [5]: [partial_sum(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END)]
 Aggregate Attributes [5]: [sum#30, sum#31, sum#32, sum#33, sum#34]
 Results [15]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, sum#35, sum#36, sum#37, sum#38, sum#39]
 
 (33) Exchange
 Input [15]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, sum#35, sum#36, sum#37, sum#38, sum#39]
-Arguments: hashpartitioning(s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, 5), true, [id=#40]
+Arguments: hashpartitioning(s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, 5), ENSURE_REQUIREMENTS, [id=#40]
 
 (34) HashAggregate [codegen id : 9]
 Input [15]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, sum#35, sum#36, sum#37, sum#38, sum#39]
 Keys [10]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18]
-Functions [5]: [sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))]
-Aggregate Attributes [5]: [sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint))#41, sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint))#42, sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint))#43, sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint))#44, sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))#45]
-Results [15]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint))#41 AS 30 days #46, sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint))#42 AS 31 - 60 days #47, sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint))#43 AS 61 - 90 days #48, sum(cast(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint))#44 AS 91 - 120 days #49, sum(cast(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))#45 AS >120 days #50]
+Functions [5]: [sum(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END)]
+Aggregate Attributes [5]: [sum(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END)#41, sum(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END)#42, sum(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END)#43, sum(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END)#44, sum(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END)#45]
+Results [15]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, sum(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END)#41 AS 30 days #46, sum(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END)#42 AS 31 - 60 days #47, sum(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END)#43 AS 61 - 90 days #48, sum(CASE WHEN (((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END)#44 AS 91 - 120 days #49, sum(CASE WHEN ((sr_returned_date_sk#21 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END)#45 AS >120 days #50]
 
 (35) TakeOrderedAndProject
 Input [15]: [s_store_name#9, s_company_id#10, s_street_number#11, s_street_name#12, s_street_type#13, s_suite_number#14, s_city#15, s_county#16, s_state#17, s_zip#18, 30 days #46, 31 - 60 days #47, 61 - 90 days #48, 91 - 120 days #49, >120 days #50]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/simplified.txt
index be11a69176810..02ab8c946fd31 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/simplified.txt
@@ -1,6 +1,6 @@
 TakeOrderedAndProject [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ]
   WholeStageCodegen (9)
-    HashAggregate [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,sum,sum,sum,sum,sum] [sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint)),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum]
+    HashAggregate [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,sum,sum,sum,sum,sum] [sum(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END),sum(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END),sum(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END),sum(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END),sum(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum]
       InputAdapter
         Exchange [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] #1
           WholeStageCodegen (8)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt
index e083affa7261d..ecbd3ab5d3471 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt
@@ -162,20 +162,20 @@ Input [13]: [ss_sold_date_sk#1, sr_returned_date_sk#6, s_store_name#12, s_compan
 (29) HashAggregate [codegen id : 5]
 Input [12]: [ss_sold_date_sk#1, sr_returned_date_sk#6, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21]
 Keys [10]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21]
-Functions [5]: [partial_sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))]
+Functions [5]: [partial_sum(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END)]
 Aggregate Attributes [5]: [sum#29, sum#30, sum#31, sum#32, sum#33]
 Results [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, sum#34, sum#35, sum#36, sum#37, sum#38]
 
 (30) Exchange
 Input [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, sum#34, sum#35, sum#36, sum#37, sum#38]
-Arguments: hashpartitioning(s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, 5), true, [id=#39]
+Arguments: hashpartitioning(s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, 5), ENSURE_REQUIREMENTS, [id=#39]
 
 (31) HashAggregate [codegen id : 6]
 Input [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, sum#34, sum#35, sum#36, sum#37, sum#38]
 Keys [10]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21]
-Functions [5]: [sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))]
-Aggregate Attributes [5]: [sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint))#40, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint))#41, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint))#42, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint))#43, sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))#44]
-Results [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END as bigint))#40 AS 30 days #45, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint))#41 AS 31 - 60 days #46, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint))#42 AS 61 - 90 days #47, sum(cast(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint))#43 AS 91 - 120 days #48, sum(cast(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END as bigint))#44 AS >120 days #49]
+Functions [5]: [sum(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END)]
+Aggregate Attributes [5]: [sum(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END)#40, sum(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END)#41, sum(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END)#42, sum(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END)#43, sum(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END)#44]
+Results [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, sum(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 30) THEN 1 ELSE 0 END)#40 AS 30 days #45, sum(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 30) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 60)) THEN 1 ELSE 0 END)#41 AS 31 - 60 days #46, sum(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 60) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 90)) THEN 1 ELSE 0 END)#42 AS 61 - 90 days #47, sum(CASE WHEN (((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 90) AND ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) <= 120)) THEN 1 ELSE 0 END)#43 AS 91 - 120 days #48, sum(CASE WHEN ((sr_returned_date_sk#6 - cast(ss_sold_date_sk#1 as bigint)) > 120) THEN 1 ELSE 0 END)#44 AS >120 days #49]
 
 (32) TakeOrderedAndProject
 Input [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, 30 days #45, 31 - 60 days #46, 61 - 90 days #47, 91 - 120 days #48, >120 days #49]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt
index 43e7773855595..4ab50bf6c135d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt
@@ -1,6 +1,6 @@
 TakeOrderedAndProject [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ]
   WholeStageCodegen (6)
-    HashAggregate [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,sum,sum,sum,sum,sum] [sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END as bigint)),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum]
+    HashAggregate [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip,sum,sum,sum,sum,sum] [sum(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 30) THEN 1 ELSE 0 END),sum(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 30) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 60)) THEN 1 ELSE 0 END),sum(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 60) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 90)) THEN 1 ELSE 0 END),sum(CASE WHEN (((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 90) AND ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) <= 120)) THEN 1 ELSE 0 END),sum(CASE WHEN ((sr_returned_date_skL - cast(ss_sold_date_sk as bigint)) > 120) THEN 1 ELSE 0 END),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum]
       InputAdapter
         Exchange [s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip] #1
           WholeStageCodegen (5)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/explain.txt
index b74dfb49c9f03..90e48794201c4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/explain.txt
@@ -162,20 +162,20 @@ Input [7]: [ws_sold_date_sk#1, ws_ship_date_sk#2, ws_warehouse_sk#5, web_name#10
 (29) HashAggregate [codegen id : 5]
 Input [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, w_warehouse_name#16, sm_type#13, web_name#10]
 Keys [3]: [substr(w_warehouse_name#16, 1, 20) AS substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10]
-Functions [5]: [partial_sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))]
+Functions [5]: [partial_sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)]
 Aggregate Attributes [5]: [sum#19, sum#20, sum#21, sum#22, sum#23]
 Results [8]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10, sum#24, sum#25, sum#26, sum#27, sum#28]
 
 (30) Exchange
 Input [8]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10, sum#24, sum#25, sum#26, sum#27, sum#28]
-Arguments: hashpartitioning(substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10, 5), true, [id=#29]
+Arguments: hashpartitioning(substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10, 5), ENSURE_REQUIREMENTS, [id=#29]
 
 (31) HashAggregate [codegen id : 6]
 Input [8]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10, sum#24, sum#25, sum#26, sum#27, sum#28]
 Keys [3]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#13, web_name#10]
-Functions [5]: [sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))]
-Aggregate Attributes [5]: [sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33, sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34]
-Results [8]: [substr(w_warehouse_name#16, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#13, web_name#10, sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30 AS 30 days #36, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31 AS 31 - 60 days #37, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32 AS 61 - 90 days #38, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33 AS 91 - 120 days #39, sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34 AS >120 days #40]
+Functions [5]: [sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)]
+Aggregate Attributes [5]: [sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END)#30, sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END)#31, sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END)#32, sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END)#33, sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)#34]
+Results [8]: [substr(w_warehouse_name#16, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#13, web_name#10, sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END)#30 AS 30 days #36, sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END)#31 AS 31 - 60 days #37, sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END)#32 AS 61 - 90 days #38, sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END)#33 AS 91 - 120 days #39, sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)#34 AS >120 days #40]
 
 (32) TakeOrderedAndProject
 Input [8]: [substr(w_warehouse_name, 1, 20)#35, sm_type#13, web_name#10, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/simplified.txt
index 9b16b44792ca4..a2e1d28e1b911 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/simplified.txt
@@ -1,6 +1,6 @@
 TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,web_name,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ]
   WholeStageCodegen (6)
-    HashAggregate [substr(w_warehouse_name, 1, 20),sm_type,web_name,sum,sum,sum,sum,sum] [sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),substr(w_warehouse_name, 1, 20),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum]
+    HashAggregate [substr(w_warehouse_name, 1, 20),sm_type,web_name,sum,sum,sum,sum,sum] [sum(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END),sum(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END),sum(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END),sum(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END),sum(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END),substr(w_warehouse_name, 1, 20),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum]
       InputAdapter
         Exchange [substr(w_warehouse_name, 1, 20),sm_type,web_name] #1
           WholeStageCodegen (5)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/explain.txt
index 05ce467c349a3..b6c467d0e9863 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/explain.txt
@@ -162,20 +162,20 @@ Input [6]: [ws_sold_date_sk#1, ws_ship_date_sk#2, w_warehouse_name#7, sm_type#10
 (29) HashAggregate [codegen id : 5]
 Input [5]: [ws_sold_date_sk#1, ws_ship_date_sk#2, w_warehouse_name#7, sm_type#10, web_name#13]
 Keys [3]: [substr(w_warehouse_name#7, 1, 20) AS substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13]
-Functions [5]: [partial_sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))]
+Functions [5]: [partial_sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)]
 Aggregate Attributes [5]: [sum#19, sum#20, sum#21, sum#22, sum#23]
 Results [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13, sum#24, sum#25, sum#26, sum#27, sum#28]
 
 (30) Exchange
 Input [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13, sum#24, sum#25, sum#26, sum#27, sum#28]
-Arguments: hashpartitioning(substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13, 5), true, [id=#29]
+Arguments: hashpartitioning(substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13, 5), ENSURE_REQUIREMENTS, [id=#29]
 
 (31) HashAggregate [codegen id : 6]
 Input [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13, sum#24, sum#25, sum#26, sum#27, sum#28]
 Keys [3]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, web_name#13]
-Functions [5]: [sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))]
-Aggregate Attributes [5]: [sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33, sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34]
-Results [8]: [substr(w_warehouse_name#7, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#10, web_name#13, sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30 AS 30 days #36, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31 AS 31 - 60 days #37, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32 AS 61 - 90 days #38, sum(cast(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33 AS 91 - 120 days #39, sum(cast(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34 AS >120 days #40]
+Functions [5]: [sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)]
+Aggregate Attributes [5]: [sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END)#30, sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END)#31, sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END)#32, sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END)#33, sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)#34]
+Results [8]: [substr(w_warehouse_name#7, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#10, web_name#13, sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END)#30 AS 30 days #36, sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 30) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END)#31 AS 31 - 60 days #37, sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 60) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END)#32 AS 61 - 90 days #38, sum(CASE WHEN (((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 90) AND ((ws_ship_date_sk#2 - ws_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END)#33 AS 91 - 120 days #39, sum(CASE WHEN ((ws_ship_date_sk#2 - ws_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)#34 AS >120 days #40]
 
 (32) TakeOrderedAndProject
 Input [8]: [substr(w_warehouse_name, 1, 20)#35, sm_type#10, web_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt
index 803326b2afd30..017ba3adcefe9 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt
@@ -1,6 +1,6 @@
 TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,web_name,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ]
   WholeStageCodegen (6)
-    HashAggregate [substr(w_warehouse_name, 1, 20),sm_type,web_name,sum,sum,sum,sum,sum] [sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),substr(w_warehouse_name, 1, 20),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum]
+    HashAggregate [substr(w_warehouse_name, 1, 20),sm_type,web_name,sum,sum,sum,sum,sum] [sum(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) <= 30) THEN 1 ELSE 0 END),sum(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 30) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 60)) THEN 1 ELSE 0 END),sum(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 60) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 90)) THEN 1 ELSE 0 END),sum(CASE WHEN (((ws_ship_date_sk - ws_sold_date_sk) > 90) AND ((ws_ship_date_sk - ws_sold_date_sk) <= 120)) THEN 1 ELSE 0 END),sum(CASE WHEN ((ws_ship_date_sk - ws_sold_date_sk) > 120) THEN 1 ELSE 0 END),substr(w_warehouse_name, 1, 20),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum]
       InputAdapter
         Exchange [substr(w_warehouse_name, 1, 20),sm_type,web_name] #1
           WholeStageCodegen (5)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt
index e904ad94dd8fa..fadad48be3d6c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt
@@ -84,7 +84,7 @@ Results [2]: [ss_customer_sk#3, ss_item_sk#2]
 
 (12) Exchange
 Input [2]: [ss_customer_sk#3, ss_item_sk#2]
-Arguments: hashpartitioning(ss_customer_sk#3, ss_item_sk#2, 5), true, [id=#7]
+Arguments: hashpartitioning(ss_customer_sk#3, ss_item_sk#2, 5), ENSURE_REQUIREMENTS, [id=#7]
 
 (13) HashAggregate [codegen id : 3]
 Input [2]: [ss_customer_sk#3, ss_item_sk#2]
@@ -132,7 +132,7 @@ Results [2]: [cs_bill_customer_sk#11, cs_item_sk#12]
 
 (22) Exchange
 Input [2]: [cs_bill_customer_sk#11, cs_item_sk#12]
-Arguments: hashpartitioning(cs_bill_customer_sk#11, cs_item_sk#12, 5), true, [id=#13]
+Arguments: hashpartitioning(cs_bill_customer_sk#11, cs_item_sk#12, 5), ENSURE_REQUIREMENTS, [id=#13]
 
 (23) HashAggregate [codegen id : 6]
 Input [2]: [cs_bill_customer_sk#11, cs_item_sk#12]
@@ -157,18 +157,18 @@ Input [4]: [customer_sk#8, item_sk#9, customer_sk#14, item_sk#15]
 (27) HashAggregate [codegen id : 7]
 Input [2]: [customer_sk#8, customer_sk#14]
 Keys: []
-Functions [3]: [partial_sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))]
+Functions [3]: [partial_sum(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)]
 Aggregate Attributes [3]: [sum#16, sum#17, sum#18]
 Results [3]: [sum#19, sum#20, sum#21]
 
 (28) Exchange
 Input [3]: [sum#19, sum#20, sum#21]
-Arguments: SinglePartition, true, [id=#22]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#22]
 
 (29) HashAggregate [codegen id : 8]
 Input [3]: [sum#19, sum#20, sum#21]
 Keys: []
-Functions [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))]
-Aggregate Attributes [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25]
-Results [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23 AS store_only#26, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24 AS catalog_only#27, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25 AS store_and_catalog#28]
+Functions [3]: [sum(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END), sum(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END), sum(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)]
+Aggregate Attributes [3]: [sum(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END)#23, sum(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)#24, sum(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)#25]
+Results [3]: [sum(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END)#23 AS store_only#26, sum(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)#24 AS catalog_only#27, sum(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)#25 AS store_and_catalog#28]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt
index c5921a11cd889..dc149c443c20f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt
@@ -1,5 +1,5 @@
 WholeStageCodegen (8)
-  HashAggregate [sum,sum,sum] [sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_only,catalog_only,store_and_catalog,sum,sum,sum]
+  HashAggregate [sum,sum,sum] [sum(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END),sum(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END),sum(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END),store_only,catalog_only,store_and_catalog,sum,sum,sum]
     InputAdapter
       Exchange #1
         WholeStageCodegen (7)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt
index e904ad94dd8fa..fadad48be3d6c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt
@@ -84,7 +84,7 @@ Results [2]: [ss_customer_sk#3, ss_item_sk#2]
 
 (12) Exchange
 Input [2]: [ss_customer_sk#3, ss_item_sk#2]
-Arguments: hashpartitioning(ss_customer_sk#3, ss_item_sk#2, 5), true, [id=#7]
+Arguments: hashpartitioning(ss_customer_sk#3, ss_item_sk#2, 5), ENSURE_REQUIREMENTS, [id=#7]
 
 (13) HashAggregate [codegen id : 3]
 Input [2]: [ss_customer_sk#3, ss_item_sk#2]
@@ -132,7 +132,7 @@ Results [2]: [cs_bill_customer_sk#11, cs_item_sk#12]
 
 (22) Exchange
 Input [2]: [cs_bill_customer_sk#11, cs_item_sk#12]
-Arguments: hashpartitioning(cs_bill_customer_sk#11, cs_item_sk#12, 5), true, [id=#13]
+Arguments: hashpartitioning(cs_bill_customer_sk#11, cs_item_sk#12, 5), ENSURE_REQUIREMENTS, [id=#13]
 
 (23) HashAggregate [codegen id : 6]
 Input [2]: [cs_bill_customer_sk#11, cs_item_sk#12]
@@ -157,18 +157,18 @@ Input [4]: [customer_sk#8, item_sk#9, customer_sk#14, item_sk#15]
 (27) HashAggregate [codegen id : 7]
 Input [2]: [customer_sk#8, customer_sk#14]
 Keys: []
-Functions [3]: [partial_sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))]
+Functions [3]: [partial_sum(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)]
 Aggregate Attributes [3]: [sum#16, sum#17, sum#18]
 Results [3]: [sum#19, sum#20, sum#21]
 
 (28) Exchange
 Input [3]: [sum#19, sum#20, sum#21]
-Arguments: SinglePartition, true, [id=#22]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#22]
 
 (29) HashAggregate [codegen id : 8]
 Input [3]: [sum#19, sum#20, sum#21]
 Keys: []
-Functions [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))]
-Aggregate Attributes [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25]
-Results [3]: [sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#23 AS store_only#26, sum(cast(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#24 AS catalog_only#27, sum(cast(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END as bigint))#25 AS store_and_catalog#28]
+Functions [3]: [sum(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END), sum(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END), sum(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)]
+Aggregate Attributes [3]: [sum(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END)#23, sum(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)#24, sum(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)#25]
+Results [3]: [sum(CASE WHEN (isnotnull(customer_sk#8) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END)#23 AS store_only#26, sum(CASE WHEN (isnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)#24 AS catalog_only#27, sum(CASE WHEN (isnotnull(customer_sk#8) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)#25 AS store_and_catalog#28]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt
index c5921a11cd889..dc149c443c20f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt
@@ -1,5 +1,5 @@
 WholeStageCodegen (8)
-  HashAggregate [sum,sum,sum] [sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END as bigint)),store_only,catalog_only,store_and_catalog,sum,sum,sum]
+  HashAggregate [sum,sum,sum] [sum(CASE WHEN (isnotnull(customer_sk) AND isnull(customer_sk)) THEN 1 ELSE 0 END),sum(CASE WHEN (isnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END),sum(CASE WHEN (isnotnull(customer_sk) AND isnotnull(customer_sk)) THEN 1 ELSE 0 END),store_only,catalog_only,store_and_catalog,sum,sum,sum]
     InputAdapter
       Exchange #1
         WholeStageCodegen (7)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/explain.txt
index 34eba382992c3..5d9c5794ae33b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/explain.txt
@@ -162,20 +162,20 @@ Input [7]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_warehouse_sk#5, sm_type#10,
 (29) HashAggregate [codegen id : 5]
 Input [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, w_warehouse_name#16, sm_type#10, cc_name#13]
 Keys [3]: [substr(w_warehouse_name#16, 1, 20) AS substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13]
-Functions [5]: [partial_sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))]
+Functions [5]: [partial_sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)]
 Aggregate Attributes [5]: [sum#19, sum#20, sum#21, sum#22, sum#23]
 Results [8]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28]
 
 (30) Exchange
 Input [8]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28]
-Arguments: hashpartitioning(substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13, 5), true, [id=#29]
+Arguments: hashpartitioning(substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13, 5), ENSURE_REQUIREMENTS, [id=#29]
 
 (31) HashAggregate [codegen id : 6]
 Input [8]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28]
 Keys [3]: [substr(w_warehouse_name#16, 1, 20)#18, sm_type#10, cc_name#13]
-Functions [5]: [sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))]
-Aggregate Attributes [5]: [sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33, sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34]
-Results [8]: [substr(w_warehouse_name#16, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30 AS 30 days #36, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31 AS 31 - 60 days #37, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32 AS 61 - 90 days #38, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33 AS 91 - 120 days #39, sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34 AS >120 days #40]
+Functions [5]: [sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)]
+Aggregate Attributes [5]: [sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END)#30, sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END)#31, sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END)#32, sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END)#33, sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)#34]
+Results [8]: [substr(w_warehouse_name#16, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END)#30 AS 30 days #36, sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END)#31 AS 31 - 60 days #37, sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END)#32 AS 61 - 90 days #38, sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END)#33 AS 91 - 120 days #39, sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)#34 AS >120 days #40]
 
 (32) TakeOrderedAndProject
 Input [8]: [substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/simplified.txt
index b25b16136992c..3526a87fad82e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/simplified.txt
@@ -1,6 +1,6 @@
 TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,cc_name,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ]
   WholeStageCodegen (6)
-    HashAggregate [substr(w_warehouse_name, 1, 20),sm_type,cc_name,sum,sum,sum,sum,sum] [sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),substr(w_warehouse_name, 1, 20),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum]
+    HashAggregate [substr(w_warehouse_name, 1, 20),sm_type,cc_name,sum,sum,sum,sum,sum] [sum(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END),sum(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END),sum(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END),sum(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END),sum(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END),substr(w_warehouse_name, 1, 20),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum]
       InputAdapter
         Exchange [substr(w_warehouse_name, 1, 20),sm_type,cc_name] #1
           WholeStageCodegen (5)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt
index 595cb2984ab75..b7dcf12fb7166 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt
@@ -162,20 +162,20 @@ Input [6]: [cs_sold_date_sk#1, cs_ship_date_sk#2, w_warehouse_name#7, sm_type#10
 (29) HashAggregate [codegen id : 5]
 Input [5]: [cs_sold_date_sk#1, cs_ship_date_sk#2, w_warehouse_name#7, sm_type#10, cc_name#13]
 Keys [3]: [substr(w_warehouse_name#7, 1, 20) AS substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13]
-Functions [5]: [partial_sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), partial_sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))]
+Functions [5]: [partial_sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)]
 Aggregate Attributes [5]: [sum#19, sum#20, sum#21, sum#22, sum#23]
 Results [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28]
 
 (30) Exchange
 Input [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28]
-Arguments: hashpartitioning(substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13, 5), true, [id=#29]
+Arguments: hashpartitioning(substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13, 5), ENSURE_REQUIREMENTS, [id=#29]
 
 (31) HashAggregate [codegen id : 6]
 Input [8]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28]
 Keys [3]: [substr(w_warehouse_name#7, 1, 20)#18, sm_type#10, cc_name#13]
-Functions [5]: [sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint)), sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))]
-Aggregate Attributes [5]: [sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33, sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34]
-Results [8]: [substr(w_warehouse_name#7, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END as bigint))#30 AS 30 days #36, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END as bigint))#31 AS 31 - 60 days #37, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END as bigint))#32 AS 61 - 90 days #38, sum(cast(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END as bigint))#33 AS 91 - 120 days #39, sum(cast(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END as bigint))#34 AS >120 days #40]
+Functions [5]: [sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)]
+Aggregate Attributes [5]: [sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END)#30, sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END)#31, sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END)#32, sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END)#33, sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)#34]
+Results [8]: [substr(w_warehouse_name#7, 1, 20)#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 30) THEN 1 ELSE 0 END)#30 AS 30 days #36, sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 30) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 60)) THEN 1 ELSE 0 END)#31 AS 31 - 60 days #37, sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 60) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 90)) THEN 1 ELSE 0 END)#32 AS 61 - 90 days #38, sum(CASE WHEN (((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 90) AND ((cs_ship_date_sk#2 - cs_sold_date_sk#1) <= 120)) THEN 1 ELSE 0 END)#33 AS 91 - 120 days #39, sum(CASE WHEN ((cs_ship_date_sk#2 - cs_sold_date_sk#1) > 120) THEN 1 ELSE 0 END)#34 AS >120 days #40]
 
 (32) TakeOrderedAndProject
 Input [8]: [substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt
index 9ebaaac52930a..79f7b4f13350d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt
@@ -1,6 +1,6 @@
 TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,cc_name,30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ]
   WholeStageCodegen (6)
-    HashAggregate [substr(w_warehouse_name, 1, 20),sm_type,cc_name,sum,sum,sum,sum,sum] [sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END as bigint)),sum(cast(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END as bigint)),substr(w_warehouse_name, 1, 20),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum]
+    HashAggregate [substr(w_warehouse_name, 1, 20),sm_type,cc_name,sum,sum,sum,sum,sum] [sum(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) <= 30) THEN 1 ELSE 0 END),sum(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 30) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 60)) THEN 1 ELSE 0 END),sum(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 60) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 90)) THEN 1 ELSE 0 END),sum(CASE WHEN (((cs_ship_date_sk - cs_sold_date_sk) > 90) AND ((cs_ship_date_sk - cs_sold_date_sk) <= 120)) THEN 1 ELSE 0 END),sum(CASE WHEN ((cs_ship_date_sk - cs_sold_date_sk) > 120) THEN 1 ELSE 0 END),substr(w_warehouse_name, 1, 20),30 days ,31 - 60 days ,61 - 90 days ,91 - 120 days ,>120 days ,sum,sum,sum,sum,sum]
       InputAdapter
         Exchange [substr(w_warehouse_name, 1, 20),sm_type,cc_name] #1
           WholeStageCodegen (5)

From 38bbccab7560f2cfd00f9f85ca800434efe950b4 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Mon, 21 Dec 2020 11:11:25 -0800
Subject: [PATCH 0837/1009] [SPARK-33869][PYTHON][SQL][TESTS] Have a separate
 metastore directory for each PySpark test job

### What changes were proposed in this pull request?

This PR proposes to have its own metastore directory to avoid potential conflict in catalog operations.

### Why are the changes needed?

To make PySpark tests less flaky.

### Does this PR introduce _any_ user-facing change?

No, dev-only.

### How was this patch tested?

Manually tested by trying some sleeps in https://github.com/apache/spark/pull/30873.

Closes #30875 from HyukjinKwon/SPARK-33869.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 python/run-tests.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/python/run-tests.py b/python/run-tests.py
index 34800b0e9fa54..a13828d81f04f 100755
--- a/python/run-tests.py
+++ b/python/run-tests.py
@@ -83,12 +83,17 @@ def run_individual_python_test(target_dir, test_name, pyspark_python):
         tmp_dir = os.path.join(target_dir, str(uuid.uuid4()))
     os.mkdir(tmp_dir)
     env["TMPDIR"] = tmp_dir
+    metastore_dir = os.path.join(tmp_dir, str(uuid.uuid4()))
+    while os.path.isdir(metastore_dir):
+        metastore_dir = os.path.join(metastore_dir, str(uuid.uuid4()))
+    os.mkdir(metastore_dir)
 
     # Also override the JVM's temp directory by setting driver and executor options.
     java_options = "-Djava.io.tmpdir={0} -Dio.netty.tryReflectionSetAccessible=true".format(tmp_dir)
     spark_args = [
         "--conf", "spark.driver.extraJavaOptions='{0}'".format(java_options),
         "--conf", "spark.executor.extraJavaOptions='{0}'".format(java_options),
+        "--conf", "spark.sql.warehouse.dir='{0}'".format(metastore_dir),
         "pyspark-shell"
     ]
     env["PYSPARK_SUBMIT_ARGS"] = " ".join(spark_args)

From 4106731fdd508c1af6e15b4f9dc2bb139e047174 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Tue, 22 Dec 2020 06:27:27 +0900
Subject: [PATCH 0838/1009] [SPARK-33836][SS][PYTHON][FOLLOW-UP] Use test utils
 and clean up doctests in table and toTable

### What changes were proposed in this pull request?

This PR proposes to:

- Make doctests simpler to show the usage (since we're not running them now).
- Use the test utils to drop the tables if exists.

### Why are the changes needed?

Better docs and code readability.

### Does this PR introduce _any_ user-facing change?

No, dev-only. It includes some doc changes in unreleased branches.

### How was this patch tested?

Manually tested.

```bash
cd python
./run-tests --python-executable=python3.9,python3.8 --testnames "pyspark.sql.tests.test_streaming StreamingTests"
```

Closes #30873 from HyukjinKwon/SPARK-33836.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 python/pyspark/sql/streaming.py            | 28 +++++----------
 python/pyspark/sql/tests/test_streaming.py | 40 ++++++++++------------
 2 files changed, 28 insertions(+), 40 deletions(-)

diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index 2c9c1f06274ce..5f122293f4a0a 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -974,9 +974,7 @@ def table(self, tableName):
 
         Examples
         --------
-        >>> csv_sdf = spark.readStream.table('input_table') # doctest: +SKIP
-        >>> csv_sdf.isStreaming # doctest: +SKIP
-        True
+        >>> spark.readStream.table('input_table') # doctest: +SKIP
         """
         if isinstance(tableName, str):
             return self._df(self._jreader.table(tableName))
@@ -1535,23 +1533,15 @@ def toTable(self, tableName, format=None, outputMode=None, partitionBy=None, que
 
         Examples
         --------
-        >>> sq = sdf.writeStream.format('parquet').queryName('this_query').option(
-        ...      'checkpointLocation', '/tmp/checkpoint').toTable('output_table') # doctest: +SKIP
-        >>> sq.isActive # doctest: +SKIP
-        True
-        >>> sq.name # doctest: +SKIP
-        'this_query'
-        >>> sq.stop() # doctest: +SKIP
-        >>> sq.isActive # doctest: +SKIP
-        False
-        >>> sq = sdf.writeStream.trigger(processingTime='5 seconds').toTable(
-        ...     'output_table', queryName='that_query', outputMode="append", format='parquet',
+        >>> sdf.writeStream.format('parquet').queryName('query').toTable('output_table')
+        ... # doctest: +SKIP
+
+        >>> sdf.writeStream.trigger(processingTime='5 seconds').toTable(
+        ...     'output_table',
+        ...     queryName='that_query',
+        ...     outputMode="append",
+        ...     format='parquet',
         ...     checkpointLocation='/tmp/checkpoint') # doctest: +SKIP
-        >>> sq.name # doctest: +SKIP
-        'that_query'
-        >>> sq.isActive # doctest: +SKIP
-        True
-        >>> sq.stop() # doctest: +SKIP
         """
         # TODO(SPARK-33659): document the current behavior for DataStreamWriter.toTable API
         self.options(**options)
diff --git a/python/pyspark/sql/tests/test_streaming.py b/python/pyspark/sql/tests/test_streaming.py
index 44bfb2a7447ca..44cdde0f2e8a9 100644
--- a/python/pyspark/sql/tests/test_streaming.py
+++ b/python/pyspark/sql/tests/test_streaming.py
@@ -19,7 +19,6 @@
 import shutil
 import tempfile
 import time
-from random import randint
 
 from pyspark.sql import Row
 from pyspark.sql.functions import lit
@@ -572,28 +571,27 @@ def collectBatch(df, id):
                 q.stop()
 
     def test_streaming_read_from_table(self):
-        input_table_name = "sample_input_table_%d" % randint(0, 100000000)
-        self.spark.sql("CREATE TABLE %s (value string) USING parquet" % input_table_name)
-        self.spark.sql("INSERT INTO %s VALUES ('aaa'), ('bbb'), ('ccc')" % input_table_name)
-        df = self.spark.readStream.table(input_table_name)
-        self.assertTrue(df.isStreaming)
-        q = df.writeStream.format('memory').queryName('this_query').start()
-        q.processAllAvailable()
-        q.stop()
-        result = self.spark.sql("SELECT * FROM this_query ORDER BY value").collect()
-        self.assertEqual([Row(value='aaa'), Row(value='bbb'), Row(value='ccc')], result)
+        with self.table("input_table", "this_query"):
+            self.spark.sql("CREATE TABLE input_table (value string) USING parquet")
+            self.spark.sql("INSERT INTO input_table VALUES ('aaa'), ('bbb'), ('ccc')")
+            df = self.spark.readStream.table("input_table")
+            self.assertTrue(df.isStreaming)
+            q = df.writeStream.format('memory').queryName('this_query').start()
+            q.processAllAvailable()
+            q.stop()
+            result = self.spark.sql("SELECT * FROM this_query ORDER BY value").collect()
+            self.assertEqual(
+                set([Row(value='aaa'), Row(value='bbb'), Row(value='ccc')]), set(result))
 
     def test_streaming_write_to_table(self):
-        output_table_name = "sample_output_table_%d" % randint(0, 100000000)
-        tmpPath = tempfile.mkdtemp()
-        shutil.rmtree(tmpPath)
-        df = self.spark.readStream.format("rate").option("rowsPerSecond", 10).load()
-        q = df.writeStream.toTable(output_table_name, format='parquet', checkpointLocation=tmpPath)
-        self.assertTrue(q.isActive)
-        time.sleep(3)
-        q.stop()
-        result = self.spark.sql("SELECT value FROM %s" % output_table_name).collect()
-        self.assertTrue(len(result) > 0)
+        with self.table("output_table"), tempfile.TemporaryDirectory() as tmpdir:
+            df = self.spark.readStream.format("rate").option("rowsPerSecond", 10).load()
+            q = df.writeStream.toTable("output_table", format='parquet', checkpointLocation=tmpdir)
+            self.assertTrue(q.isActive)
+            time.sleep(3)
+            q.stop()
+            result = self.spark.sql("SELECT value FROM output_table").collect()
+            self.assertTrue(len(result) > 0)
 
 
 if __name__ == "__main__":

From 0bf3828ac42ca994daa296a3ce20e511db568321 Mon Sep 17 00:00:00 2001
From: Kyle Krueger <kyle.s.krueger@gmail.com>
Date: Mon, 21 Dec 2020 14:17:09 -0800
Subject: [PATCH 0839/1009] [MINOR] update dstream.py with more accurate
 exceptions

### What changes were proposed in this pull request?

Reopened from https://github.com/apache/spark/pull/27525.
The exception messages for dstream.py when using windows were improved to be specific about what sliding duration is important.

### Why are the changes needed?

The batch interval of dstreams are improperly named as sliding windows. The term sliding window is also used to reference the new window of a dstream collected over a window of rdds in a parent dstream. We should probably fix the naming convention of sliding window used in the dstream class, but for now more this more explicit exception message may reduce confusion.

### Does this PR introduce any user-facing change?

No

### How was this patch tested?

It wasn't since this is only a change of the exception message

Closes #30871 from kykrueger/kykrueger-patch-1.

Authored-by: Kyle Krueger <kyle.s.krueger@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 python/pyspark/streaming/dstream.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/streaming/dstream.py b/python/pyspark/streaming/dstream.py
index afb85709c771c..6ef164ae5a11a 100644
--- a/python/pyspark/streaming/dstream.py
+++ b/python/pyspark/streaming/dstream.py
@@ -421,10 +421,12 @@ def slice(self, begin, end):
     def _validate_window_param(self, window, slide):
         duration = self._jdstream.dstream().slideDuration().milliseconds()
         if int(window * 1000) % duration != 0:
-            raise ValueError("windowDuration must be multiple of the slide duration (%d ms)"
+            raise ValueError("windowDuration must be multiple of the parent "
+                             "dstream's slide (batch) duration (%d ms)"
                              % duration)
         if slide and int(slide * 1000) % duration != 0:
-            raise ValueError("slideDuration must be multiple of the slide duration (%d ms)"
+            raise ValueError("slideDuration must be multiple of the parent "
+                             "dstream's slide (batch) duration (%d ms)"
                              % duration)
 
     def window(self, windowDuration, slideDuration=None):

From f62e957b31a281c542514c27da32ccda8e4bda46 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Mon, 21 Dec 2020 16:35:04 -0800
Subject: [PATCH 0840/1009] [SPARK-33873][CORE][TESTS] Test all compression
 codecs with encrypted spilling

### What changes were proposed in this pull request?

This PR aims to test all compression codecs for encrypted spilling.

### Why are the changes needed?

To improve test coverage. Currently, only `CompressionCodec.DEFAULT_COMPRESSION_CODEC` is under testing.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs with the updated test cases.

Closes #30879 from dongjoon-hyun/SPARK-33873.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../util/collection/ExternalAppendOnlyMapSuite.scala   | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
index 83595ba22aa57..81a145906d33c 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
@@ -220,13 +220,13 @@ class ExternalAppendOnlyMapSuite extends SparkFunSuite
     testSimpleSpilling()
   }
 
-  test("spilling with compression") {
+  private def testSimpleSpillingForAllCodecs(encrypt: Boolean) {
     // Keep track of which compression codec we're using to report in test failure messages
     var lastCompressionCodec: Option[String] = None
     try {
       allCompressionCodecs.foreach { c =>
         lastCompressionCodec = Some(c)
-        testSimpleSpilling(Some(c))
+        testSimpleSpilling(Some(c), encrypt)
       }
     } catch {
       // Include compression codec used in test failure message
@@ -241,8 +241,12 @@ class ExternalAppendOnlyMapSuite extends SparkFunSuite
     }
   }
 
+  test("spilling with compression") {
+    testSimpleSpillingForAllCodecs(encrypt = false)
+  }
+
   test("spilling with compression and encryption") {
-    testSimpleSpilling(Some(CompressionCodec.DEFAULT_COMPRESSION_CODEC), encrypt = true)
+    testSimpleSpillingForAllCodecs(encrypt = true)
   }
 
   /**

From 7466031632c5f1771cad3f3131bc1a3e52be173a Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Tue, 22 Dec 2020 11:37:59 +0900
Subject: [PATCH 0841/1009] [SPARK-32106][SQL] Implement script transform in
 sql/core

### What changes were proposed in this pull request?

 * Implement `SparkScriptTransformationExec` based on `BaseScriptTransformationExec`
 * Implement `SparkScriptTransformationWriterThread` based on `BaseScriptTransformationWriterThread` of writing data
 * Add rule `SparkScripts` to support convert script LogicalPlan to SparkPlan in Spark SQL (without hive mode)
 * Add `SparkScriptTransformationSuite` test spark spec case
 * add test in `SQLQueryTestSuite`

And we will close #29085 .

### Why are the changes needed?
Support user use Script Transform without Hive

### Does this PR introduce _any_ user-facing change?
User can use Script Transformation without hive in no serde mode.
Such as :
**default no serde **
```
SELECT TRANSFORM(a, b, c)
USING 'cat' AS (a int, b string, c long)
FROM testData
```
**no serde with spec ROW FORMAT DELIMITED**
```
SELECT TRANSFORM(a, b, c)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY '\u0002'
MAP KEYS TERMINATED BY '\u0003'
LINES TERMINATED BY '\n'
NULL DEFINED AS 'null'
USING 'cat' AS (a, b, c)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY '\u0004'
MAP KEYS TERMINATED BY '\u0005'
LINES TERMINATED BY '\n'
NULL DEFINED AS 'NULL'
FROM testData
```

### How was this patch tested?
Added UT

Closes #29414 from AngersZhuuuu/SPARK-32106-MINOR.

Authored-by: angerszhu <angers.zhu@gmail.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../sql/catalyst/parser/AstBuilder.scala      |  52 ++-
 .../sql/catalyst/parser/PlanParserSuite.scala | 113 +++++-
 .../spark/sql/execution/SparkPlanner.scala    |   1 +
 .../SparkScriptTransformationExec.scala       |  91 +++++
 .../spark/sql/execution/SparkSqlParser.scala  | 115 +++---
 .../spark/sql/execution/SparkStrategies.scala |  14 +
 .../resources/sql-tests/inputs/transform.sql  | 195 ++++++++++
 .../sql-tests/results/transform.sql.out       | 357 ++++++++++++++++++
 .../apache/spark/sql/SQLQueryTestSuite.scala  |   5 +-
 .../SparkScriptTransformationSuite.scala      | 102 +++++
 .../HiveScriptTransformationExec.scala        |   2 +
 11 files changed, 982 insertions(+), 65 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/SparkScriptTransformationExec.scala
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/transform.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/transform.sql.out
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/SparkScriptTransformationSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 9c265544f3227..2af84fa079d97 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -743,8 +743,33 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
     selectClause.hints.asScala.foldRight(withWindow)(withHints)
   }
 
+  // Script Transform's input/output format.
+  type ScriptIOFormat =
+    (Seq[(String, String)], Option[String], Seq[(String, String)], Option[String])
+
+  protected def getRowFormatDelimited(ctx: RowFormatDelimitedContext): ScriptIOFormat = {
+    // TODO we should use the visitRowFormatDelimited function here. However HiveScriptIOSchema
+    // expects a seq of pairs in which the old parsers' token names are used as keys.
+    // Transforming the result of visitRowFormatDelimited would be quite a bit messier than
+    // retrieving the key value pairs ourselves.
+    val entries = entry("TOK_TABLEROWFORMATFIELD", ctx.fieldsTerminatedBy) ++
+      entry("TOK_TABLEROWFORMATCOLLITEMS", ctx.collectionItemsTerminatedBy) ++
+      entry("TOK_TABLEROWFORMATMAPKEYS", ctx.keysTerminatedBy) ++
+      entry("TOK_TABLEROWFORMATNULL", ctx.nullDefinedAs) ++
+      Option(ctx.linesSeparatedBy).toSeq.map { token =>
+        val value = string(token)
+        validate(
+          value == "\n",
+          s"LINES TERMINATED BY only supports newline '\\n' right now: $value",
+          ctx)
+        "TOK_TABLEROWFORMATLINES" -> value
+      }
+
+    (entries, None, Seq.empty, None)
+  }
+
   /**
-   * Create a (Hive based) [[ScriptInputOutputSchema]].
+   * Create a [[ScriptInputOutputSchema]].
    */
   protected def withScriptIOSchema(
       ctx: ParserRuleContext,
@@ -753,7 +778,30 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
       outRowFormat: RowFormatContext,
       recordReader: Token,
       schemaLess: Boolean): ScriptInputOutputSchema = {
-    throw new ParseException("Script Transform is not supported", ctx)
+
+    def format(fmt: RowFormatContext): ScriptIOFormat = fmt match {
+      case c: RowFormatDelimitedContext =>
+        getRowFormatDelimited(c)
+
+      case c: RowFormatSerdeContext =>
+        throw new ParseException("TRANSFORM with serde is only supported in hive mode", ctx)
+
+      // SPARK-32106: When there is no definition about format, we return empty result
+      // to use a built-in default Serde in SparkScriptTransformationExec.
+      case null =>
+        (Nil, None, Seq.empty, None)
+    }
+
+    val (inFormat, inSerdeClass, inSerdeProps, reader) = format(inRowFormat)
+
+    val (outFormat, outSerdeClass, outSerdeProps, writer) = format(outRowFormat)
+
+    ScriptInputOutputSchema(
+      inFormat, outFormat,
+      inSerdeClass, outSerdeClass,
+      inSerdeProps, outSerdeProps,
+      reader, writer,
+      schemaLess)
   }
 
   /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 6fef18babedb6..54018198f619d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.IntegerType
+import org.apache.spark.sql.types.{IntegerType, LongType, StringType}
 
 /**
  * Parser test cases for rules defined in [[CatalystSqlParser]] / [[AstBuilder]].
@@ -1031,4 +1031,115 @@ class PlanParserSuite extends AnalysisTest {
     assertEqual("select a, b from db.c;;;", table("db", "c").select('a, 'b))
     assertEqual("select a, b from db.c; ;;  ;", table("db", "c").select('a, 'b))
   }
+
+  test("SPARK-32106: TRANSFORM plan") {
+    // verify schema less
+    assertEqual(
+      """
+        |SELECT TRANSFORM(a, b, c)
+        |USING 'cat'
+        |FROM testData
+      """.stripMargin,
+      ScriptTransformation(
+        Seq('a, 'b, 'c),
+        "cat",
+        Seq(AttributeReference("key", StringType)(),
+          AttributeReference("value", StringType)()),
+        UnresolvedRelation(TableIdentifier("testData")),
+        ScriptInputOutputSchema(List.empty, List.empty, None, None,
+          List.empty, List.empty, None, None, true))
+    )
+
+    // verify without output schema
+    assertEqual(
+      """
+        |SELECT TRANSFORM(a, b, c)
+        |USING 'cat' AS (a, b, c)
+        |FROM testData
+      """.stripMargin,
+      ScriptTransformation(
+        Seq('a, 'b, 'c),
+        "cat",
+        Seq(AttributeReference("a", StringType)(),
+          AttributeReference("b", StringType)(),
+          AttributeReference("c", StringType)()),
+        UnresolvedRelation(TableIdentifier("testData")),
+        ScriptInputOutputSchema(List.empty, List.empty, None, None,
+          List.empty, List.empty, None, None, false)))
+
+    // verify with output schema
+    assertEqual(
+      """
+        |SELECT TRANSFORM(a, b, c)
+        |USING 'cat' AS (a int, b string, c long)
+        |FROM testData
+      """.stripMargin,
+      ScriptTransformation(
+        Seq('a, 'b, 'c),
+        "cat",
+        Seq(AttributeReference("a", IntegerType)(),
+          AttributeReference("b", StringType)(),
+          AttributeReference("c", LongType)()),
+        UnresolvedRelation(TableIdentifier("testData")),
+        ScriptInputOutputSchema(List.empty, List.empty, None, None,
+          List.empty, List.empty, None, None, false)))
+
+    // verify with ROW FORMAT DELIMETED
+    assertEqual(
+      """
+        |SELECT TRANSFORM(a, b, c)
+        |  ROW FORMAT DELIMITED
+        |  FIELDS TERMINATED BY '\t'
+        |  COLLECTION ITEMS TERMINATED BY '\u0002'
+        |  MAP KEYS TERMINATED BY '\u0003'
+        |  LINES TERMINATED BY '\n'
+        |  NULL DEFINED AS 'null'
+        |  USING 'cat' AS (a, b, c)
+        |  ROW FORMAT DELIMITED
+        |  FIELDS TERMINATED BY '\t'
+        |  COLLECTION ITEMS TERMINATED BY '\u0004'
+        |  MAP KEYS TERMINATED BY '\u0005'
+        |  LINES TERMINATED BY '\n'
+        |  NULL DEFINED AS 'NULL'
+        |FROM testData
+      """.stripMargin,
+      ScriptTransformation(
+        Seq('a, 'b, 'c),
+        "cat",
+        Seq(AttributeReference("a", StringType)(),
+          AttributeReference("b", StringType)(),
+          AttributeReference("c", StringType)()),
+        UnresolvedRelation(TableIdentifier("testData")),
+        ScriptInputOutputSchema(
+          Seq(("TOK_TABLEROWFORMATFIELD", "\t"),
+            ("TOK_TABLEROWFORMATCOLLITEMS", "\u0002"),
+            ("TOK_TABLEROWFORMATMAPKEYS", "\u0003"),
+            ("TOK_TABLEROWFORMATNULL", "null"),
+            ("TOK_TABLEROWFORMATLINES", "\n")),
+          Seq(("TOK_TABLEROWFORMATFIELD", "\t"),
+            ("TOK_TABLEROWFORMATCOLLITEMS", "\u0004"),
+            ("TOK_TABLEROWFORMATMAPKEYS", "\u0005"),
+            ("TOK_TABLEROWFORMATNULL", "NULL"),
+            ("TOK_TABLEROWFORMATLINES", "\n")), None, None,
+          List.empty, List.empty, None, None, false)))
+
+    // verify with ROW FORMAT SERDE
+    intercept(
+      """
+        |SELECT TRANSFORM(a, b, c)
+        |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
+        |  WITH SERDEPROPERTIES(
+        |    "separatorChar" = "\t",
+        |    "quoteChar" = "'",
+        |    "escapeChar" = "\\")
+        |  USING 'cat' AS (a, b, c)
+        |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
+        |  WITH SERDEPROPERTIES(
+        |    "separatorChar" = "\t",
+        |    "quoteChar" = "'",
+        |    "escapeChar" = "\\")
+        |FROM testData
+      """.stripMargin,
+      "TRANSFORM with serde is only supported in hive mode")
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala
index c88fcecc9983b..6994aaf47dfba 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala
@@ -43,6 +43,7 @@ class SparkPlanner(val session: SparkSession, val experimentalMethods: Experimen
       Window ::
       JoinSelection ::
       InMemoryScans ::
+      SparkScripts ::
       BasicOperators :: Nil)
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkScriptTransformationExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkScriptTransformationExec.scala
new file mode 100644
index 0000000000000..75c91667012a3
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkScriptTransformationExec.scala
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import java.io._
+
+import org.apache.hadoop.conf.Configuration
+
+import org.apache.spark.TaskContext
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.types._
+import org.apache.spark.util.CircularBuffer
+
+/**
+ * Transforms the input by forking and running the specified script.
+ *
+ * @param input the set of expression that should be passed to the script.
+ * @param script the command that should be executed.
+ * @param output the attributes that are produced by the script.
+ * @param child logical plan whose output is transformed.
+ * @param ioschema the class set that defines how to handle input/output data.
+ */
+case class SparkScriptTransformationExec(
+    input: Seq[Expression],
+    script: String,
+    output: Seq[Attribute],
+    child: SparkPlan,
+    ioschema: ScriptTransformationIOSchema)
+  extends BaseScriptTransformationExec {
+
+  override def processIterator(
+      inputIterator: Iterator[InternalRow],
+      hadoopConf: Configuration): Iterator[InternalRow] = {
+
+    val (outputStream, proc, inputStream, stderrBuffer) = initProc
+
+    val outputProjection = new InterpretedProjection(inputExpressionsWithoutSerde, child.output)
+
+    // This new thread will consume the ScriptTransformation's input rows and write them to the
+    // external process. That process's output will be read by this current thread.
+    val writerThread = SparkScriptTransformationWriterThread(
+      inputIterator.map(outputProjection),
+      inputExpressionsWithoutSerde.map(_.dataType),
+      ioschema,
+      outputStream,
+      proc,
+      stderrBuffer,
+      TaskContext.get(),
+      hadoopConf
+    )
+
+    val outputIterator =
+      createOutputIteratorWithoutSerde(writerThread, inputStream, proc, stderrBuffer)
+
+    writerThread.start()
+
+    outputIterator
+  }
+}
+
+case class SparkScriptTransformationWriterThread(
+    iter: Iterator[InternalRow],
+    inputSchema: Seq[DataType],
+    ioSchema: ScriptTransformationIOSchema,
+    outputStream: OutputStream,
+    proc: Process,
+    stderrBuffer: CircularBuffer,
+    taskContext: TaskContext,
+    conf: Configuration)
+  extends BaseScriptTransformationWriterThread {
+
+  override def processRows(): Unit = {
+    processRowsWithoutSerde()
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 722ca6f992064..e530b4c9407a6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -36,6 +36,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeConstants
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf, VariableSubstitution}
+import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 
 /**
  * Concrete parser for Spark SQL statements.
@@ -478,70 +479,62 @@ class SparkSqlAstBuilder extends AstBuilder {
         "Unsupported operation: Used defined record reader/writer classes.", ctx)
     }
 
-    // Decode and input/output format.
-    type Format = (Seq[(String, String)], Option[String], Seq[(String, String)], Option[String])
-    def format(
-        fmt: RowFormatContext,
-        configKey: String,
-        defaultConfigValue: String): Format = fmt match {
-      case c: RowFormatDelimitedContext =>
-        // TODO we should use the visitRowFormatDelimited function here. However HiveScriptIOSchema
-        // expects a seq of pairs in which the old parsers' token names are used as keys.
-        // Transforming the result of visitRowFormatDelimited would be quite a bit messier than
-        // retrieving the key value pairs ourselves.
-        val entries = entry("TOK_TABLEROWFORMATFIELD", c.fieldsTerminatedBy) ++
-          entry("TOK_TABLEROWFORMATCOLLITEMS", c.collectionItemsTerminatedBy) ++
-          entry("TOK_TABLEROWFORMATMAPKEYS", c.keysTerminatedBy) ++
-          entry("TOK_TABLEROWFORMATNULL", c.nullDefinedAs) ++
-          Option(c.linesSeparatedBy).toSeq.map { token =>
-            val value = string(token)
-            validate(
-              value == "\n",
-              s"LINES TERMINATED BY only supports newline '\\n' right now: $value",
-              c)
-            "TOK_TABLEROWFORMATLINES" -> value
+    if (!conf.getConf(CATALOG_IMPLEMENTATION).equals("hive")) {
+      super.withScriptIOSchema(
+        ctx,
+        inRowFormat,
+        recordWriter,
+        outRowFormat,
+        recordReader,
+        schemaLess)
+    } else {
+      def format(
+          fmt: RowFormatContext,
+          configKey: String,
+          defaultConfigValue: String): ScriptIOFormat = fmt match {
+        case c: RowFormatDelimitedContext =>
+          getRowFormatDelimited(c)
+
+        case c: RowFormatSerdeContext =>
+          // Use a serde format.
+          val SerdeInfo(None, None, Some(name), props) = visitRowFormatSerde(c)
+
+          // SPARK-10310: Special cases LazySimpleSerDe
+          val recordHandler = if (name == "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe") {
+            Option(conf.getConfString(configKey, defaultConfigValue))
+          } else {
+            None
           }
+          (Seq.empty, Option(name), props.toSeq, recordHandler)
+
+        case null =>
+          // Use default (serde) format.
+          val name = conf.getConfString("hive.script.serde",
+            "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")
+          val props = Seq(
+            "field.delim" -> "\t",
+            "serialization.last.column.takes.rest" -> "true")
+          val recordHandler = Option(conf.getConfString(configKey, defaultConfigValue))
+          (Nil, Option(name), props, recordHandler)
+      }
 
-        (entries, None, Seq.empty, None)
-
-      case c: RowFormatSerdeContext =>
-        // Use a serde format.
-        val SerdeInfo(None, None, Some(name), props) = visitRowFormatSerde(c)
-
-        // SPARK-10310: Special cases LazySimpleSerDe
-        val recordHandler = if (name == "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe") {
-          Option(conf.getConfString(configKey, defaultConfigValue))
-        } else {
-          None
-        }
-        (Seq.empty, Option(name), props.toSeq, recordHandler)
-
-      case null =>
-        // Use default (serde) format.
-        val name = conf.getConfString("hive.script.serde",
-          "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe")
-        val props = Seq(
-          "field.delim" -> "\t",
-          "serialization.last.column.takes.rest" -> "true")
-        val recordHandler = Option(conf.getConfString(configKey, defaultConfigValue))
-        (Nil, Option(name), props, recordHandler)
+      val (inFormat, inSerdeClass, inSerdeProps, reader) =
+        format(
+          inRowFormat, "hive.script.recordreader",
+          "org.apache.hadoop.hive.ql.exec.TextRecordReader")
+
+      val (outFormat, outSerdeClass, outSerdeProps, writer) =
+        format(
+          outRowFormat, "hive.script.recordwriter",
+          "org.apache.hadoop.hive.ql.exec.TextRecordWriter")
+
+      ScriptInputOutputSchema(
+        inFormat, outFormat,
+        inSerdeClass, outSerdeClass,
+        inSerdeProps, outSerdeProps,
+        reader, writer,
+        schemaLess)
     }
-
-    val (inFormat, inSerdeClass, inSerdeProps, reader) =
-      format(
-        inRowFormat, "hive.script.recordreader", "org.apache.hadoop.hive.ql.exec.TextRecordReader")
-
-    val (outFormat, outSerdeClass, outSerdeProps, writer) =
-      format(
-        outRowFormat, "hive.script.recordwriter",
-        "org.apache.hadoop.hive.ql.exec.TextRecordWriter")
-
-    ScriptInputOutputSchema(
-      inFormat, outFormat,
-      inSerdeClass, outSerdeClass,
-      inSerdeProps, outSerdeProps,
-      reader, writer,
-      schemaLess)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index f5f77b03c2b1b..a8d788f59d271 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -594,6 +594,20 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
     }
   }
 
+  object SparkScripts extends Strategy {
+    def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
+      case logical.ScriptTransformation(input, script, output, child, ioschema) =>
+        SparkScriptTransformationExec(
+          input,
+          script,
+          output,
+          planLater(child),
+          ScriptTransformationIOSchema(ioschema)
+        ) :: Nil
+      case _ => Nil
+    }
+  }
+
   object BasicOperators extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
       case d: DataWritingCommand => DataWritingCommandExec(d, planLater(d.query)) :: Nil
diff --git a/sql/core/src/test/resources/sql-tests/inputs/transform.sql b/sql/core/src/test/resources/sql-tests/inputs/transform.sql
new file mode 100644
index 0000000000000..65b060eca3a62
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/transform.sql
@@ -0,0 +1,195 @@
+-- Test data.
+CREATE OR REPLACE TEMPORARY VIEW t AS SELECT * FROM VALUES
+('1', true, unhex('537061726B2053514C'), tinyint(1), 1, smallint(100), bigint(1), float(1.0), 1.0, Decimal(1.0), timestamp('1997-01-02'), date('2000-04-01')),
+('2', false, unhex('537061726B2053514C'), tinyint(2), 2,  smallint(200), bigint(2), float(2.0), 2.0, Decimal(2.0), timestamp('1997-01-02 03:04:05'), date('2000-04-02')),
+('3', true, unhex('537061726B2053514C'), tinyint(3), 3, smallint(300), bigint(3), float(3.0), 3.0, Decimal(3.0), timestamp('1997-02-10 17:32:01-08'), date('2000-04-03'))
+AS t(a, b, c, d, e, f, g, h, i, j, k, l);
+
+SELECT TRANSFORM(a)
+USING 'cat' AS (a)
+FROM t;
+
+-- with non-exist command
+SELECT TRANSFORM(a)
+USING 'some_non_existent_command' AS (a)
+FROM t;
+
+-- with non-exist file
+SELECT TRANSFORM(a)
+USING 'python some_non_existent_file' AS (a)
+FROM t;
+
+-- common supported data types between no serde and serde transform
+SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM (
+  SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l)
+  USING 'cat' AS (
+    a string,
+    b boolean,
+    c binary,
+    d tinyint,
+    e int,
+    f smallint,
+    g long,
+    h float,
+    i double,
+    j decimal(38, 18),
+    k timestamp,
+    l date)
+  FROM t
+) tmp;
+
+-- common supported data types between no serde and serde transform
+SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM (
+  SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l)
+  USING 'cat' AS (
+    a string,
+    b string,
+    c string,
+    d string,
+    e string,
+    f string,
+    g string,
+    h string,
+    i string,
+    j string,
+    k string,
+    l string)
+  FROM t
+) tmp;
+
+-- SPARK-32388 handle schema less
+SELECT TRANSFORM(a)
+USING 'cat'
+FROM t;
+
+SELECT TRANSFORM(a, b)
+USING 'cat'
+FROM t;
+
+SELECT TRANSFORM(a, b, c)
+USING 'cat'
+FROM t;
+
+-- return null when return string incompatible (no serde)
+SELECT TRANSFORM(a, b, c, d, e, f, g, h, i)
+USING 'cat' AS (a int, b short, c long, d byte, e float, f double, g decimal(38, 18), h date, i timestamp)
+FROM VALUES
+('a','','1231a','a','213.21a','213.21a','0a.21d','2000-04-01123','1997-0102 00:00:') tmp(a, b, c, d, e, f, g, h, i);
+
+-- SPARK-28227: transform can't run with aggregation
+SELECT TRANSFORM(b, max(a), sum(f))
+USING 'cat' AS (a, b)
+FROM t
+GROUP BY b;
+
+-- transform use MAP
+MAP a, b USING 'cat' AS (a, b) FROM t;
+
+-- transform use REDUCE
+REDUCE a, b USING 'cat' AS (a, b) FROM t;
+
+-- transform with defined row format delimit
+SELECT TRANSFORM(a, b, c, null)
+  ROW FORMAT DELIMITED
+  FIELDS TERMINATED BY '@'
+  LINES TERMINATED BY '\n'
+  NULL DEFINED AS 'NULL'
+USING 'cat' AS (a, b, c, d)
+  ROW FORMAT DELIMITED
+  FIELDS TERMINATED BY '@'
+  LINES TERMINATED BY '\n'
+  NULL DEFINED AS 'NULL'
+FROM t;
+
+SELECT TRANSFORM(a, b, c, null)
+  ROW FORMAT DELIMITED
+  FIELDS TERMINATED BY '@'
+  LINES TERMINATED BY '\n'
+  NULL DEFINED AS 'NULL'
+USING 'cat' AS (d)
+  ROW FORMAT DELIMITED
+  FIELDS TERMINATED BY '@'
+  LINES TERMINATED BY '\n'
+  NULL DEFINED AS 'NULL'
+FROM t;
+
+-- transform with defined row format delimit handle schema with correct type
+SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM (
+  SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l)
+    ROW FORMAT DELIMITED
+    FIELDS TERMINATED BY ','
+    LINES TERMINATED BY '\n'
+    NULL DEFINED AS 'NULL'
+    USING 'cat' AS (
+      a string,
+      b boolean,
+      c binary,
+      d tinyint,
+      e int,
+      f smallint,
+      g long,
+      h float,
+      i double,
+      j decimal(38, 18),
+      k timestamp,
+      l date)
+    ROW FORMAT DELIMITED
+    FIELDS TERMINATED BY ','
+    LINES TERMINATED BY '\n'
+    NULL DEFINED AS 'NULL'
+  FROM t
+) tmp;
+
+-- transform with defined row format delimit handle schema with wrong type
+SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM (
+  SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l)
+    ROW FORMAT DELIMITED
+    FIELDS TERMINATED BY ','
+    LINES TERMINATED BY '\n'
+    NULL DEFINED AS 'NULL'
+    USING 'cat' AS (
+      a string,
+      b long,
+      c binary,
+      d tinyint,
+      e int,
+      f smallint,
+      g long,
+      h float,
+      i double,
+      j decimal(38, 18),
+      k int,
+      l long)
+    ROW FORMAT DELIMITED
+    FIELDS TERMINATED BY ','
+    LINES TERMINATED BY '\n'
+    NULL DEFINED AS 'NULL'
+  FROM t
+) tmp;
+
+-- transform with defined row format delimit LINE TERMINATED BY only support '\n'
+SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM (
+  SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l)
+    ROW FORMAT DELIMITED
+    FIELDS TERMINATED BY ','
+    LINES TERMINATED BY '@'
+    NULL DEFINED AS 'NULL'
+    USING 'cat' AS (
+      a string,
+      b string,
+      c string,
+      d string,
+      e string,
+      f string,
+      g string,
+      h string,
+      i string,
+      j string,
+      k string,
+      l string)
+    ROW FORMAT DELIMITED
+    FIELDS TERMINATED BY ','
+    LINES TERMINATED BY '@'
+    NULL DEFINED AS 'NULL'
+  FROM t
+) tmp;
diff --git a/sql/core/src/test/resources/sql-tests/results/transform.sql.out b/sql/core/src/test/resources/sql-tests/results/transform.sql.out
new file mode 100644
index 0000000000000..83ab5cb729c24
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/transform.sql.out
@@ -0,0 +1,357 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 18
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW t AS SELECT * FROM VALUES
+('1', true, unhex('537061726B2053514C'), tinyint(1), 1, smallint(100), bigint(1), float(1.0), 1.0, Decimal(1.0), timestamp('1997-01-02'), date('2000-04-01')),
+('2', false, unhex('537061726B2053514C'), tinyint(2), 2,  smallint(200), bigint(2), float(2.0), 2.0, Decimal(2.0), timestamp('1997-01-02 03:04:05'), date('2000-04-02')),
+('3', true, unhex('537061726B2053514C'), tinyint(3), 3, smallint(300), bigint(3), float(3.0), 3.0, Decimal(3.0), timestamp('1997-02-10 17:32:01-08'), date('2000-04-03'))
+AS t(a, b, c, d, e, f, g, h, i, j, k, l)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT TRANSFORM(a)
+USING 'cat' AS (a)
+FROM t
+-- !query schema
+struct<a:string>
+-- !query output
+1
+2
+3
+
+
+-- !query
+SELECT TRANSFORM(a)
+USING 'some_non_existent_command' AS (a)
+FROM t
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkException
+Subprocess exited with status 127. Error: /bin/bash: some_non_existent_command: command not found
+
+
+-- !query
+SELECT TRANSFORM(a)
+USING 'python some_non_existent_file' AS (a)
+FROM t
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkException
+Subprocess exited with status 2. Error: python: can't open file 'some_non_existent_file': [Errno 2] No such file or directory
+
+
+-- !query
+SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM (
+  SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l)
+  USING 'cat' AS (
+    a string,
+    b boolean,
+    c binary,
+    d tinyint,
+    e int,
+    f smallint,
+    g long,
+    h float,
+    i double,
+    j decimal(38, 18),
+    k timestamp,
+    l date)
+  FROM t
+) tmp
+-- !query schema
+struct<a:string,b:boolean,decode(c, UTF-8):string,d:tinyint,e:int,f:smallint,g:bigint,h:float,i:double,j:decimal(38,18),k:timestamp,l:date>
+-- !query output
+1	true	Spark SQL	1	1	100	1	1.0	1.0	1.000000000000000000	1997-01-02 00:00:00	2000-04-01
+2	false	Spark SQL	2	2	200	2	2.0	2.0	2.000000000000000000	1997-01-02 03:04:05	2000-04-02
+3	true	Spark SQL	3	3	300	3	3.0	3.0	3.000000000000000000	1997-02-10 17:32:01	2000-04-03
+
+
+-- !query
+SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM (
+  SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l)
+  USING 'cat' AS (
+    a string,
+    b string,
+    c string,
+    d string,
+    e string,
+    f string,
+    g string,
+    h string,
+    i string,
+    j string,
+    k string,
+    l string)
+  FROM t
+) tmp
+-- !query schema
+struct<a:string,b:string,decode(c, UTF-8):string,d:string,e:string,f:string,g:string,h:string,i:string,j:string,k:string,l:string>
+-- !query output
+1	true	Spark SQL	1	1	100	1	1.0	1.0	1	1997-01-02 00:00:00	2000-04-01
+2	false	Spark SQL	2	2	200	2	2.0	2.0	2	1997-01-02 03:04:05	2000-04-02
+3	true	Spark SQL	3	3	300	3	3.0	3.0	3	1997-02-10 17:32:01	2000-04-03
+
+
+-- !query
+SELECT TRANSFORM(a)
+USING 'cat'
+FROM t
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+1	NULL
+2	NULL
+3	NULL
+
+
+-- !query
+SELECT TRANSFORM(a, b)
+USING 'cat'
+FROM t
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+1	true
+2	false
+3	true
+
+
+-- !query
+SELECT TRANSFORM(a, b, c)
+USING 'cat'
+FROM t
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+1	true
+2	false
+3	true
+
+
+-- !query
+SELECT TRANSFORM(a, b, c, d, e, f, g, h, i)
+USING 'cat' AS (a int, b short, c long, d byte, e float, f double, g decimal(38, 18), h date, i timestamp)
+FROM VALUES
+('a','','1231a','a','213.21a','213.21a','0a.21d','2000-04-01123','1997-0102 00:00:') tmp(a, b, c, d, e, f, g, h, i)
+-- !query schema
+struct<a:int,b:smallint,c:bigint,d:tinyint,e:float,f:double,g:decimal(38,18),h:date,i:timestamp>
+-- !query output
+NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL
+
+
+-- !query
+SELECT TRANSFORM(b, max(a), sum(f))
+USING 'cat' AS (a, b)
+FROM t
+GROUP BY b
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+mismatched input 'GROUP' expecting {<EOF>, ';'}(line 4, pos 0)
+
+== SQL ==
+SELECT TRANSFORM(b, max(a), sum(f))
+USING 'cat' AS (a, b)
+FROM t
+GROUP BY b
+^^^
+
+
+-- !query
+MAP a, b USING 'cat' AS (a, b) FROM t
+-- !query schema
+struct<a:string,b:string>
+-- !query output
+1	true
+2	false
+3	true
+
+
+-- !query
+REDUCE a, b USING 'cat' AS (a, b) FROM t
+-- !query schema
+struct<a:string,b:string>
+-- !query output
+1	true
+2	false
+3	true
+
+
+-- !query
+SELECT TRANSFORM(a, b, c, null)
+  ROW FORMAT DELIMITED
+  FIELDS TERMINATED BY '@'
+  LINES TERMINATED BY '\n'
+  NULL DEFINED AS 'NULL'
+USING 'cat' AS (a, b, c, d)
+  ROW FORMAT DELIMITED
+  FIELDS TERMINATED BY '@'
+  LINES TERMINATED BY '\n'
+  NULL DEFINED AS 'NULL'
+FROM t
+-- !query schema
+struct<a:string,b:string,c:string,d:string>
+-- !query output
+1	true	Spark SQL	null
+2	false	Spark SQL	null
+3	true	Spark SQL	null
+
+
+-- !query
+SELECT TRANSFORM(a, b, c, null)
+  ROW FORMAT DELIMITED
+  FIELDS TERMINATED BY '@'
+  LINES TERMINATED BY '\n'
+  NULL DEFINED AS 'NULL'
+USING 'cat' AS (d)
+  ROW FORMAT DELIMITED
+  FIELDS TERMINATED BY '@'
+  LINES TERMINATED BY '\n'
+  NULL DEFINED AS 'NULL'
+FROM t
+-- !query schema
+struct<d:string>
+-- !query output
+1
+2
+3
+
+
+-- !query
+SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM (
+  SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l)
+    ROW FORMAT DELIMITED
+    FIELDS TERMINATED BY ','
+    LINES TERMINATED BY '\n'
+    NULL DEFINED AS 'NULL'
+    USING 'cat' AS (
+      a string,
+      b boolean,
+      c binary,
+      d tinyint,
+      e int,
+      f smallint,
+      g long,
+      h float,
+      i double,
+      j decimal(38, 18),
+      k timestamp,
+      l date)
+    ROW FORMAT DELIMITED
+    FIELDS TERMINATED BY ','
+    LINES TERMINATED BY '\n'
+    NULL DEFINED AS 'NULL'
+  FROM t
+) tmp
+-- !query schema
+struct<a:string,b:boolean,decode(c, UTF-8):string,d:tinyint,e:int,f:smallint,g:bigint,h:float,i:double,j:decimal(38,18),k:timestamp,l:date>
+-- !query output
+1	true	Spark SQL	1	1	100	1	1.0	1.0	1.000000000000000000	1997-01-02 00:00:00	2000-04-01
+2	false	Spark SQL	2	2	200	2	2.0	2.0	2.000000000000000000	1997-01-02 03:04:05	2000-04-02
+3	true	Spark SQL	3	3	300	3	3.0	3.0	3.000000000000000000	1997-02-10 17:32:01	2000-04-03
+
+
+-- !query
+SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM (
+  SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l)
+    ROW FORMAT DELIMITED
+    FIELDS TERMINATED BY ','
+    LINES TERMINATED BY '\n'
+    NULL DEFINED AS 'NULL'
+    USING 'cat' AS (
+      a string,
+      b long,
+      c binary,
+      d tinyint,
+      e int,
+      f smallint,
+      g long,
+      h float,
+      i double,
+      j decimal(38, 18),
+      k int,
+      l long)
+    ROW FORMAT DELIMITED
+    FIELDS TERMINATED BY ','
+    LINES TERMINATED BY '\n'
+    NULL DEFINED AS 'NULL'
+  FROM t
+) tmp
+-- !query schema
+struct<a:string,b:bigint,decode(c, UTF-8):string,d:tinyint,e:int,f:smallint,g:bigint,h:float,i:double,j:decimal(38,18),k:int,l:bigint>
+-- !query output
+1	NULL	Spark SQL	1	1	100	1	1.0	1.0	1.000000000000000000	NULL	NULL
+2	NULL	Spark SQL	2	2	200	2	2.0	2.0	2.000000000000000000	NULL	NULL
+3	NULL	Spark SQL	3	3	300	3	3.0	3.0	3.000000000000000000	NULL	NULL
+
+
+-- !query
+SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM (
+  SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l)
+    ROW FORMAT DELIMITED
+    FIELDS TERMINATED BY ','
+    LINES TERMINATED BY '@'
+    NULL DEFINED AS 'NULL'
+    USING 'cat' AS (
+      a string,
+      b string,
+      c string,
+      d string,
+      e string,
+      f string,
+      g string,
+      h string,
+      i string,
+      j string,
+      k string,
+      l string)
+    ROW FORMAT DELIMITED
+    FIELDS TERMINATED BY ','
+    LINES TERMINATED BY '@'
+    NULL DEFINED AS 'NULL'
+  FROM t
+) tmp
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+LINES TERMINATED BY only supports newline '\n' right now: @(line 3, pos 4)
+
+== SQL ==
+SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM (
+  SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l)
+    ROW FORMAT DELIMITED
+----^^^
+    FIELDS TERMINATED BY ','
+    LINES TERMINATED BY '@'
+    NULL DEFINED AS 'NULL'
+    USING 'cat' AS (
+      a string,
+      b string,
+      c string,
+      d string,
+      e string,
+      f string,
+      g string,
+      h string,
+      i string,
+      j string,
+      k string,
+      l string)
+    ROW FORMAT DELIMITED
+    FIELDS TERMINATED BY ','
+    LINES TERMINATED BY '@'
+    NULL DEFINED AS 'NULL'
+  FROM t
+) tmp
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index 02c6fba9725d3..eb2caa61e1590 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -24,7 +24,7 @@ import java.util.Locale
 import scala.collection.mutable.ArrayBuffer
 import scala.util.control.NonFatal
 
-import org.apache.spark.{SparkConf, SparkException}
+import org.apache.spark.{SparkConf, SparkException, TestUtils}
 import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.SQLHelper
@@ -260,6 +260,9 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
       newLine.startsWith("--") && !newLine.startsWith("--QUERY-DELIMITER")
     }
 
+    // SPARK-32106 Since we add SQL test 'transform.sql' will use `cat` command,
+    // here we need to check command available
+    assume(TestUtils.testCommandAvailable("/bin/bash"))
     val input = fileToString(new File(testCase.inputFile))
 
     val (comments, code) = splitCommentsAndCodes(input)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkScriptTransformationSuite.scala
new file mode 100644
index 0000000000000..6ff7c5d6d2f3a
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkScriptTransformationSuite.scala
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.apache.spark.{SparkException, TestUtils}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
+import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.test.SharedSparkSession
+
+class SparkScriptTransformationSuite extends BaseScriptTransformationSuite with SharedSparkSession {
+  import testImplicits._
+
+  override def createScriptTransformationExec(
+      input: Seq[Expression],
+      script: String,
+      output: Seq[Attribute],
+      child: SparkPlan,
+      ioschema: ScriptTransformationIOSchema): BaseScriptTransformationExec = {
+    SparkScriptTransformationExec(
+      input = input,
+      script = script,
+      output = output,
+      child = child,
+      ioschema = ioschema
+    )
+  }
+
+  test("SPARK-32106: TRANSFORM with serde without hive should throw exception") {
+    assume(TestUtils.testCommandAvailable("/bin/bash"))
+    withTempView("v") {
+      val df = Seq("a", "b", "c").map(Tuple1.apply).toDF("a")
+      df.createTempView("v")
+
+      val e = intercept[ParseException] {
+        sql(
+          """
+            |SELECT TRANSFORM (a)
+            |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+            |USING 'cat' AS (a)
+            |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+            |FROM v
+          """.stripMargin)
+      }.getMessage
+      assert(e.contains("TRANSFORM with serde is only supported in hive mode"))
+    }
+  }
+
+  test("SPARK-32106: TRANSFORM doesn't support ArrayType/MapType/StructType " +
+    "as output data type (no serde)") {
+    assume(TestUtils.testCommandAvailable("/bin/bash"))
+    // check for ArrayType
+    val e1 = intercept[SparkException] {
+      sql(
+        """
+          |SELECT TRANSFORM(a)
+          |USING 'cat' AS (a array<int>)
+          |FROM VALUES (array(1, 1), map('1', 1), struct(1, 'a')) t(a, b, c)
+        """.stripMargin).collect()
+    }.getMessage
+    assert(e1.contains("SparkScriptTransformation without serde does not support" +
+      " ArrayType as output data type"))
+
+    // check for MapType
+    val e2 = intercept[SparkException] {
+      sql(
+        """
+          |SELECT TRANSFORM(b)
+          |USING 'cat' AS (b map<int, string>)
+          |FROM VALUES (array(1, 1), map('1', 1), struct(1, 'a')) t(a, b, c)
+        """.stripMargin).collect()
+    }.getMessage
+    assert(e2.contains("SparkScriptTransformation without serde does not support" +
+      " MapType as output data type"))
+
+    // check for StructType
+    val e3 = intercept[SparkException] {
+      sql(
+        """
+          |SELECT TRANSFORM(c)
+          |USING 'cat' AS (c struct<col1:int, col2:string>)
+          |FROM VALUES (array(1, 1), map('1', 1), struct(1, 'a')) t(a, b, c)
+        """.stripMargin).collect()
+    }.getMessage
+    assert(e3.contains("SparkScriptTransformation without serde does not support" +
+      " StructType as output data type"))
+  }
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala
index 26baff3d83eec..4b03cff5e8c8e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala
@@ -45,6 +45,8 @@ import org.apache.spark.util.{CircularBuffer, Utils}
  * @param input the set of expression that should be passed to the script.
  * @param script the command that should be executed.
  * @param output the attributes that are produced by the script.
+ * @param child logical plan whose output is transformed.
+ * @param ioschema the class set that defines how to handle input/output data.
  */
 case class HiveScriptTransformationExec(
     input: Seq[Expression],

From f5fd10b1bc519cc05c98f5235fda3d59155cda9d Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Tue, 22 Dec 2020 03:07:26 +0000
Subject: [PATCH 0842/1009] [SPARK-33834][SQL] Verify ALTER TABLE CHANGE COLUMN
 with Char and Varchar

### What changes were proposed in this pull request?

Verify ALTER TABLE CHANGE COLUMN with Char and Varchar and avoid unexpected change
For v1 table, changing type is not allowed, we fix a regression that uses the replaced string instead of the original char/varchar type when altering char/varchar columns

For v2 table,
char/varchar to string,
char(x) to char(x),
char(x)/varchar(x) to varchar(y) if x <=y are valid cases,
other changes are invalid

### Why are the changes needed?

Verify ALTER TABLE CHANGE COLUMN with Char and Varchar and avoid unexpected change

### Does this PR introduce _any_ user-facing change?

no
### How was this patch tested?

new test

Closes #30833 from yaooqinn/SPARK-33834.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |   3 +-
 .../sql/catalyst/analysis/CheckAnalysis.scala |  18 +-
 .../sql/catalyst/catalog/SessionCatalog.scala |  18 +-
 .../spark/sql/execution/command/ddl.scala     |   2 +-
 .../command/CharVarcharDDLTestBase.scala      | 159 ++++++++++++++++++
 .../spark/sql/HiveCharVarcharTestSuite.scala  |  24 +++
 6 files changed, 216 insertions(+), 8 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/CharVarcharDDLTestBase.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 8d8e00b80c506..ba24914cb6835 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -3481,7 +3481,8 @@ class Analyzer(override val catalogManager: CatalogManager)
               Some(typeChange)
             } else {
               val (fieldNames, field) = fieldOpt.get
-              if (field.dataType == typeChange.newDataType()) {
+              val dt = CharVarcharUtils.getRawType(field.metadata).getOrElse(field.dataType)
+              if (dt == typeChange.newDataType()) {
                 // The user didn't want the field to change, so remove this change
                 None
               } else {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 472de096b2f22..a4dfbe85abfd7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -523,7 +523,12 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
                 TypeUtils.failWithIntervalType(add.dataType())
                 colsToAdd(parentName) = fieldsAdded :+ add.fieldNames().last
               case update: UpdateColumnType =>
-                val field = findField("update", update.fieldNames)
+                val field = {
+                  val f = findField("update", update.fieldNames)
+                  CharVarcharUtils.getRawType(f.metadata)
+                    .map(dt => f.copy(dataType = dt))
+                    .getOrElse(f)
+                }
                 val fieldName = update.fieldNames.quoted
                 update.newDataType match {
                   case _: StructType =>
@@ -544,7 +549,16 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
                   case _ =>
                     // update is okay
                 }
-                if (!Cast.canUpCast(field.dataType, update.newDataType)) {
+
+                // We don't need to handle nested types here which shall fail before
+                def canAlterColumnType(from: DataType, to: DataType): Boolean = (from, to) match {
+                  case (CharType(l1), CharType(l2)) => l1 == l2
+                  case (CharType(l1), VarcharType(l2)) => l1 <= l2
+                  case (VarcharType(l1), VarcharType(l2)) => l1 <= l2
+                  case _ => Cast.canUpCast(from, to)
+                }
+
+                if (!canAlterColumnType(field.dataType, update.newDataType)) {
                   alter.failAnalysis(
                     s"Cannot update ${table.name} field $fieldName: " +
                         s"${field.dataType.simpleString} cannot be cast to " +
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 9814f4b3aa75b..9b542d6bd95ce 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -470,18 +470,28 @@ class SessionCatalog(
   /**
    * Retrieve the metadata of an existing permanent table/view. If no database is specified,
    * assume the table/view is in the current database.
+   * We replace char/varchar with "annotated" string type in the table schema, as the query
+   * engine doesn't support char/varchar yet.
    */
   @throws[NoSuchDatabaseException]
   @throws[NoSuchTableException]
   def getTableMetadata(name: TableIdentifier): CatalogTable = {
+    val t = getTableRawMetadata(name)
+    t.copy(schema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(t.schema))
+  }
+
+  /**
+   * Retrieve the metadata of an existing permanent table/view. If no database is specified,
+   * assume the table/view is in the current database.
+   */
+  @throws[NoSuchDatabaseException]
+  @throws[NoSuchTableException]
+  def getTableRawMetadata(name: TableIdentifier): CatalogTable = {
     val db = formatDatabaseName(name.database.getOrElse(getCurrentDatabase))
     val table = formatTableName(name.table)
     requireDbExists(db)
     requireTableExists(TableIdentifier(table, Some(db)))
-    val t = externalCatalog.getTable(db, table)
-    // We replace char/varchar with "annotated" string type in the table schema, as the query
-    // engine doesn't support char/varchar yet.
-    t.copy(schema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(t.schema))
+    externalCatalog.getTable(db, table)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 604de860f04c0..9300e25b8650e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -342,7 +342,7 @@ case class AlterTableChangeColumnCommand(
   // TODO: support change column name/dataType/metadata/position.
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val catalog = sparkSession.sessionState.catalog
-    val table = catalog.getTableMetadata(tableName)
+    val table = catalog.getTableRawMetadata(tableName)
     val resolver = sparkSession.sessionState.conf.resolver
     DDLUtils.verifyAlterTableType(catalog, table, isView = false)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CharVarcharDDLTestBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CharVarcharDDLTestBase.scala
new file mode 100644
index 0000000000000..748dd7ee10c34
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CharVarcharDDLTestBase.scala
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.{AnalysisException, QueryTest}
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
+import org.apache.spark.sql.connector.InMemoryPartitionTableCatalog
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
+import org.apache.spark.sql.types._
+
+trait CharVarcharDDLTestBase extends QueryTest with SQLTestUtils {
+
+  def format: String
+
+  def checkColType(f: StructField, dt: DataType): Unit = {
+    assert(f.dataType == CharVarcharUtils.replaceCharVarcharWithString(dt))
+    assert(CharVarcharUtils.getRawType(f.metadata).contains(dt))
+  }
+
+  test("allow to change column for char(x) to char(y), x == y") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(i STRING, c CHAR(4)) USING $format")
+      sql("ALTER TABLE t CHANGE COLUMN c TYPE CHAR(4)")
+      checkColType(spark.table("t").schema(1), CharType(4))
+    }
+  }
+
+  test("not allow to change column for char(x) to char(y), x != y") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(i STRING, c CHAR(4)) USING $format")
+      val e = intercept[AnalysisException] {
+        sql("ALTER TABLE t CHANGE COLUMN c TYPE CHAR(5)")
+      }
+      val v1 = e.getMessage contains "'CharType(4)' to 'c' with type 'CharType(5)'"
+      val v2 = e.getMessage contains "char(4) cannot be cast to char(5)"
+      assert(v1 || v2)
+    }
+  }
+
+  test("not allow to change column from string to char type") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(i STRING, c STRING) USING $format")
+      val e = intercept[AnalysisException] {
+        sql("ALTER TABLE t CHANGE COLUMN c TYPE CHAR(5)")
+      }
+      val v1 = e.getMessage contains "'StringType' to 'c' with type 'CharType(5)'"
+      val v2 = e.getMessage contains "string cannot be cast to char(5)"
+      assert(v1 || v2)
+    }
+  }
+
+  test("not allow to change column from int to char type") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(i int, c CHAR(4)) USING $format")
+      val e = intercept[AnalysisException] {
+        sql("ALTER TABLE t CHANGE COLUMN i TYPE CHAR(5)")
+      }
+      val v1 = e.getMessage contains "'IntegerType' to 'i' with type 'CharType(5)'"
+      val v2 = e.getMessage contains "int cannot be cast to char(5)"
+      assert(v1 || v2)
+    }
+  }
+
+  test("allow to change column for varchar(x) to varchar(y), x == y") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(i STRING, c VARCHAR(4)) USING $format")
+      sql("ALTER TABLE t CHANGE COLUMN c TYPE VARCHAR(4)")
+      checkColType(spark.table("t").schema(1), VarcharType(4))
+    }
+  }
+
+  test("not allow to change column for varchar(x) to varchar(y), x > y") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(i STRING, c VARCHAR(4)) USING $format")
+      val e = intercept[AnalysisException] {
+        sql("ALTER TABLE t CHANGE COLUMN c TYPE VARCHAR(3)")
+      }
+      val v1 = e.getMessage contains "'VarcharType(4)' to 'c' with type 'VarcharType(3)'"
+      val v2 = e.getMessage contains "varchar(4) cannot be cast to varchar(3)"
+      assert(v1 || v2)
+    }
+  }
+}
+
+class FileSourceCharVarcharDDLTestSuite extends CharVarcharDDLTestBase with SharedSparkSession {
+  override def format: String = "parquet"
+  override protected def sparkConf: SparkConf = {
+    super.sparkConf.set(SQLConf.USE_V1_SOURCE_LIST, "parquet")
+  }
+}
+
+class DSV2CharVarcharDDLTestSuite extends CharVarcharDDLTestBase
+  with SharedSparkSession {
+  override def format: String = "foo"
+  protected override def sparkConf = {
+    super.sparkConf
+      .set("spark.sql.catalog.testcat", classOf[InMemoryPartitionTableCatalog].getName)
+      .set(SQLConf.DEFAULT_CATALOG.key, "testcat")
+  }
+
+  test("allow to change change column from char to string type") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(i STRING, c CHAR(4)) USING $format")
+      sql("ALTER TABLE t CHANGE COLUMN c TYPE STRING")
+      assert(spark.table("t").schema(1).dataType === StringType)
+    }
+  }
+
+  test("allow to change column from char(x) to varchar(y) type x <= y") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(i STRING, c CHAR(4)) USING $format")
+      sql("ALTER TABLE t CHANGE COLUMN c TYPE VARCHAR(4)")
+      checkColType(spark.table("t").schema(1), VarcharType(4))
+    }
+    withTable("t") {
+      sql(s"CREATE TABLE t(i STRING, c CHAR(4)) USING $format")
+      sql("ALTER TABLE t CHANGE COLUMN c TYPE VARCHAR(5)")
+      checkColType(spark.table("t").schema(1), VarcharType(5))
+    }
+  }
+
+  test("allow to change column from varchar(x) to varchar(y) type x <= y") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(i STRING, c VARCHAR(4)) USING $format")
+      sql("ALTER TABLE t CHANGE COLUMN c TYPE VARCHAR(4)")
+      checkColType(spark.table("t").schema(1), VarcharType(4))
+      sql("ALTER TABLE t CHANGE COLUMN c TYPE VARCHAR(5)")
+      checkColType(spark.table("t").schema(1), VarcharType(5))
+
+    }
+  }
+
+  test("not allow to change column from char(x) to varchar(y) type x > y") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(i STRING, c CHAR(4)) USING $format")
+      val e = intercept[AnalysisException] {
+        sql("ALTER TABLE t CHANGE COLUMN c TYPE VARCHAR(3)")
+      }
+      assert(e.getMessage contains "char(4) cannot be cast to varchar(3)")
+    }
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala
index 55d305fda4f96..f48cfb8dfb899 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql
 
+import org.apache.spark.sql.execution.command.CharVarcharDDLTestBase
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 
 class HiveCharVarcharTestSuite extends CharVarcharTestSuite with TestHiveSingleton {
@@ -41,3 +42,26 @@ class HiveCharVarcharTestSuite extends CharVarcharTestSuite with TestHiveSinglet
     super.afterAll()
   }
 }
+
+class HiveCharVarcharDDLTestSuite extends CharVarcharDDLTestBase with TestHiveSingleton {
+
+  // The default Hive serde doesn't support nested null values.
+  override def format: String = "hive OPTIONS(fileFormat='parquet')"
+
+  private var originalPartitionMode = ""
+
+  override protected def beforeAll(): Unit = {
+    super.beforeAll()
+    originalPartitionMode = spark.conf.get("hive.exec.dynamic.partition.mode", "")
+    spark.conf.set("hive.exec.dynamic.partition.mode", "nonstrict")
+  }
+
+  override protected def afterAll(): Unit = {
+    if (originalPartitionMode == "") {
+      spark.conf.unset("hive.exec.dynamic.partition.mode")
+    } else {
+      spark.conf.set("hive.exec.dynamic.partition.mode", originalPartitionMode)
+    }
+    super.afterAll()
+  }
+}

From 16ae3a5c12f1bbd6c9f5f735bfad0cf51fdf2182 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Mon, 21 Dec 2020 19:48:58 -0800
Subject: [PATCH 0843/1009] [MINOR][CORE] Remove unused variable
 CompressionCodec.DEFAULT_COMPRESSION_CODEC

### What changes were proposed in this pull request?

This PR removed an unused variable `CompressionCodec.DEFAULT_COMPRESSION_CODEC`.

### Why are the changes needed?

Apache Spark 3.0.0 centralized this default value to `IO_COMPRESSION_CODEC.defaultValue` via [SPARK-26462](https://github.com/apache/spark/pull/23447).

We had better remove this variable to avoid any potential confusion in the future.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CI compilation.

Closes #30880 from dongjoon-hyun/minor.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 core/src/main/scala/org/apache/spark/io/CompressionCodec.scala | 1 -
 1 file changed, 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
index 5205a2d568ac3..fa663a32d4929 100644
--- a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
+++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
@@ -107,7 +107,6 @@ private[spark] object CompressionCodec {
   }
 
   val FALLBACK_COMPRESSION_CODEC = "snappy"
-  val DEFAULT_COMPRESSION_CODEC = "lz4"
   val ALL_COMPRESSION_CODECS = shortCompressionCodecNames.values.toSeq
 }
 

From b88745565b96ba1f9ec55b369a4aefab77684981 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Mon, 21 Dec 2020 20:24:23 -0800
Subject: [PATCH 0844/1009] [SPARK-33700][SQL] Avoid file meta reading when
 enableFilterPushDown is true and filters is empty for Orc

### What changes were proposed in this pull request?
Orc support filter push down optimization, but this optimization will read file meta from external storage even if filters is empty.

This pr add a extra `filters.nonEmpty` when `spark.sql.orc.filterPushdown` is true

### Why are the changes needed?
Orc filters push down operation should only triggered when `filters.nonEmpty` is true

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass the Jenkins or GitHub Action

Closes #30663 from LuciferYang/pushdownfilter-when-filter-nonempty.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/execution/datasources/orc/OrcFileFormat.scala     | 2 +-
 .../datasources/v2/orc/OrcPartitionReaderFactory.scala          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
index 2671682e18f31..83504d8c4458a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
@@ -184,7 +184,7 @@ class OrcFileFormat
         Iterator.empty
       } else {
         // ORC predicate pushdown
-        if (orcFilterPushDown) {
+        if (orcFilterPushDown && filters.nonEmpty) {
           OrcUtils.readCatalystSchema(filePath, conf, ignoreCorruptFiles).foreach { fileSchema =>
             OrcFilters.createFilter(fileSchema, filters).foreach { f =>
               OrcInputFormat.setSearchArgument(conf, f, fileSchema.fieldNames)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
index b0ddee0a6b336..6f9a3ae4c67fe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
@@ -68,7 +68,7 @@ case class OrcPartitionReaderFactory(
   }
 
   private def pushDownPredicates(filePath: Path, conf: Configuration): Unit = {
-    if (orcFilterPushDown) {
+    if (orcFilterPushDown && filters.nonEmpty) {
       OrcUtils.readCatalystSchema(filePath, conf, ignoreCorruptFiles).foreach { fileSchema =>
         OrcFilters.createFilter(fileSchema, filters).foreach { f =>
           OrcInputFormat.setSearchArgument(conf, f, fileSchema.fieldNames)

From 1dd63dccd893162f8ef969e42273a794ad73e49c Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Tue, 22 Dec 2020 15:10:46 +0900
Subject: [PATCH 0845/1009] [SPARK-33860][SQL] Make
 CatalystTypeConverters.convertToCatalyst match special Array value

### What changes were proposed in this pull request?

Add some case to match Array whose element type is primitive.

### Why are the changes needed?

We will get exception when use `Literal.create(Array(1, 2, 3), ArrayType(IntegerType))` .
```
Exception in thread "main" java.lang.IllegalArgumentException: requirement failed: Literal must have a corresponding value to array<int>, but class int[] found.
	at scala.Predef$.require(Predef.scala:281)
	at org.apache.spark.sql.catalyst.expressions.Literal$.validateLiteralValue(literals.scala:215)
	at org.apache.spark.sql.catalyst.expressions.Literal.<init>(literals.scala:292)
	at org.apache.spark.sql.catalyst.expressions.Literal$.create(literals.scala:140)
```
And same problem with other array whose element is primitive.

### Does this PR introduce _any_ user-facing change?

Yes.

### How was this patch tested?

Add test.

Closes #30868 from ulysses-you/SPARK-33860.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../spark/sql/catalyst/CatalystTypeConverters.scala   |  4 +++-
 .../catalyst/expressions/LiteralExpressionSuite.scala | 11 +++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
index 971d61518c026..907b5877b3ac0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
@@ -457,7 +457,9 @@ object CatalystTypeConverters {
     case d: JavaBigDecimal => new DecimalConverter(DecimalType(d.precision, d.scale)).toCatalyst(d)
     case seq: Seq[Any] => new GenericArrayData(seq.map(convertToCatalyst).toArray)
     case r: Row => InternalRow(r.toSeq.map(convertToCatalyst): _*)
-    case arr: Array[Any] => new GenericArrayData(arr.map(convertToCatalyst))
+    case arr: Array[Byte] => arr
+    case arr: Array[Char] => StringConverter.toCatalyst(arr)
+    case arr: Array[_] => new GenericArrayData(arr.map(convertToCatalyst))
     case map: Map[_, _] =>
       ArrayBasedMapData(
         map,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
index 7a482641def3d..1440f1e3a0668 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
@@ -326,4 +326,15 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
       assert(literalStr === expected)
     }
   }
+
+  test("SPARK-33860: Make CatalystTypeConverters.convertToCatalyst match special Array value") {
+    assert(Literal(Array(1, 2, 3)) == Literal.create(Array(1, 2, 3), ArrayType(IntegerType)))
+    assert(Literal(Array(1L, 2L, 3L)) == Literal.create(Array(1L, 2L, 3L), ArrayType(LongType)))
+    assert(Literal(Array(1D, 2D, 3D)) == Literal.create(Array(1D, 2D, 3D), ArrayType(DoubleType)))
+    assert(Literal("123") == Literal.create(Array('1', '2', '3'), StringType))
+    assert(Literal(Array(1.toByte, 2.toByte, 3.toByte)) ==
+      Literal.create(Array(1.toByte, 2.toByte, 3.toByte), BinaryType))
+    assert(Literal(Array("1", "2", "3")) ==
+      Literal.create(Array("1", "2", "3"), ArrayType(StringType)))
+  }
 }

From 2562183987684c94f1ef5552495c342a10e2ed3d Mon Sep 17 00:00:00 2001
From: Anton Okolnychyi <aokolnychyi@apple.com>
Date: Tue, 22 Dec 2020 08:23:56 +0000
Subject: [PATCH 0846/1009] [SPARK-33808][SQL] DataSource V2: Build logical
 writes in the optimizer

### What changes were proposed in this pull request?

This PR adds logic to build logical writes introduced in SPARK-33779.

Note: This PR contains a subset of changes discussed in PR #29066.

### Why are the changes needed?

These changes are the next step as discussed in the [design doc](https://docs.google.com/document/d/1X0NsQSryvNmXBY9kcvfINeYyKC-AahZarUqg3nS1GQs/edit#) for SPARK-23889.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #30806 from aokolnychyi/spark-33808.

Authored-by: Anton Okolnychyi <aokolnychyi@apple.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 project/MimaExcludes.scala                    |  2 +
 .../connector/catalog/TableCapability.java    |  2 +-
 .../catalyst/plans/logical/v2Commands.scala   | 10 +-
 .../spark/sql/connector/write/V1Write.java    | 33 +++++++
 .../sql/connector/write/V1WriteBuilder.java   | 45 ---------
 .../spark/sql/execution/SparkOptimizer.scala  |  7 +-
 .../datasources/v2/DataSourceV2Strategy.scala | 56 ++++++-----
 .../datasources/v2/TableCapabilityCheck.scala |  6 +-
 .../datasources/v2/V1FallbackWriters.scala    | 66 +++----------
 .../execution/datasources/v2/V2Writes.scala   | 95 +++++++++++++++++++
 .../v2/WriteToDataSourceV2Exec.scala          | 72 +++++---------
 .../v2/jdbc/JDBCWriteBuilder.scala            |  6 +-
 .../sql/connector/V1WriteFallbackSuite.scala  | 12 +--
 .../command/PlanResolutionSuite.scala         |  2 +-
 14 files changed, 223 insertions(+), 191 deletions(-)
 create mode 100644 sql/core/src/main/java/org/apache/spark/sql/connector/write/V1Write.java
 delete mode 100644 sql/core/src/main/java/org/apache/spark/sql/connector/write/V1WriteBuilder.java
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala

diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 33e65c9def41b..ba879c03795d1 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -36,6 +36,8 @@ object MimaExcludes {
 
   // Exclude rules for 3.2.x
   lazy val v32excludes = v31excludes ++ Seq(
+    // [SPARK-33808][SQL] DataSource V2: Build logical writes in the optimizer
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.connector.write.V1WriteBuilder")
   )
 
   // Exclude rules for 3.1.x
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java
index 68161d7225fcf..5bb42fb4b313d 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java
@@ -96,7 +96,7 @@ public enum TableCapability {
   /**
    * Signals that the table supports append writes using the V1 InsertableRelation interface.
    * <p>
-   * Tables that return this capability must create a V1WriteBuilder and may also support additional
+   * Tables that return this capability must create a V1Write and may also support additional
    * write modes, like {@link #TRUNCATE}, and {@link #OVERWRITE_BY_FILTER}, but cannot support
    * {@link #OVERWRITE_DYNAMIC}.
    */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index b3b538ac8b327..02fb3a86db5d5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.connector.catalog._
 import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, ColumnChange}
 import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.connector.write.Write
 import org.apache.spark.sql.types.{BooleanType, DataType, MetadataBuilder, StringType, StructType}
 
 /**
@@ -65,7 +66,8 @@ case class AppendData(
     table: NamedRelation,
     query: LogicalPlan,
     writeOptions: Map[String, String],
-    isByName: Boolean) extends V2WriteCommand {
+    isByName: Boolean,
+    write: Option[Write] = None) extends V2WriteCommand {
   override def withNewQuery(newQuery: LogicalPlan): AppendData = copy(query = newQuery)
   override def withNewTable(newTable: NamedRelation): AppendData = copy(table = newTable)
 }
@@ -94,7 +96,8 @@ case class OverwriteByExpression(
     deleteExpr: Expression,
     query: LogicalPlan,
     writeOptions: Map[String, String],
-    isByName: Boolean) extends V2WriteCommand {
+    isByName: Boolean,
+    write: Option[Write] = None) extends V2WriteCommand {
   override lazy val resolved: Boolean = {
     table.resolved && query.resolved && outputResolved && deleteExpr.resolved
   }
@@ -132,7 +135,8 @@ case class OverwritePartitionsDynamic(
     table: NamedRelation,
     query: LogicalPlan,
     writeOptions: Map[String, String],
-    isByName: Boolean) extends V2WriteCommand {
+    isByName: Boolean,
+    write: Option[Write] = None) extends V2WriteCommand {
   override def withNewQuery(newQuery: LogicalPlan): OverwritePartitionsDynamic = {
     copy(query = newQuery)
   }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/connector/write/V1Write.java b/sql/core/src/main/java/org/apache/spark/sql/connector/write/V1Write.java
new file mode 100644
index 0000000000000..a299967ee8bcf
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/connector/write/V1Write.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.write;
+
+import org.apache.spark.annotation.Unstable;
+import org.apache.spark.sql.connector.catalog.TableCapability;
+import org.apache.spark.sql.sources.InsertableRelation;
+
+/**
+ * A logical write that should be executed using V1 InsertableRelation interface.
+ * <p>
+ * Tables that have {@link TableCapability#V1_BATCH_WRITE} in the list of their capabilities
+ * must build {@link V1Write}.
+ */
+@Unstable
+public interface V1Write extends Write {
+  InsertableRelation toInsertableRelation();
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/connector/write/V1WriteBuilder.java b/sql/core/src/main/java/org/apache/spark/sql/connector/write/V1WriteBuilder.java
deleted file mode 100644
index 89b567b5231ac..0000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/connector/write/V1WriteBuilder.java
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.connector.write;
-
-import org.apache.spark.annotation.Unstable;
-import org.apache.spark.sql.sources.InsertableRelation;
-
-/**
- * A trait that should be implemented by V1 DataSources that would like to leverage the DataSource
- * V2 write code paths. The InsertableRelation will be used only to Append data. Other
- * instances of the [[WriteBuilder]] interface such as [[SupportsOverwrite]], [[SupportsTruncate]]
- * should be extended as well to support additional operations other than data appends.
- *
- * This interface is designed to provide Spark DataSources time to migrate to DataSource V2 and
- * will be removed in a future Spark release.
- *
- * @since 3.0.0
- */
-@Unstable
-public interface V1WriteBuilder extends WriteBuilder {
-  /**
-   * Creates an InsertableRelation that allows appending a DataFrame to a
-   * a destination (using data source-specific parameters). The insert method will only be
-   * called with `overwrite=false`. The DataSource should implement the overwrite behavior as
-   * part of the [[SupportsOverwrite]], and [[SupportsTruncate]] interfaces.
-   *
-   * @since 3.0.0
-   */
-  InsertableRelation buildForV1Write();
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
index 33b86a2b5340c..dde5dc2be0556 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.execution.datasources.PruneFileSourcePartitions
 import org.apache.spark.sql.execution.datasources.SchemaPruning
-import org.apache.spark.sql.execution.datasources.v2.V2ScanRelationPushDown
+import org.apache.spark.sql.execution.datasources.v2.{V2ScanRelationPushDown, V2Writes}
 import org.apache.spark.sql.execution.dynamicpruning.{CleanupDynamicPruningFilters, PartitionPruning}
 import org.apache.spark.sql.execution.python.{ExtractGroupingPythonUDFFromAggregate, ExtractPythonUDFFromAggregate, ExtractPythonUDFs}
 
@@ -37,7 +37,7 @@ class SparkOptimizer(
 
   override def earlyScanPushDownRules: Seq[Rule[LogicalPlan]] =
     // TODO: move SchemaPruning into catalyst
-    SchemaPruning :: V2ScanRelationPushDown :: PruneFileSourcePartitions :: Nil
+    SchemaPruning :: V2ScanRelationPushDown :: V2Writes :: PruneFileSourcePartitions :: Nil
 
   override def defaultBatches: Seq[Batch] = (preOptimizationBatches ++ super.defaultBatches :+
     Batch("Optimize Metadata Only Query", Once, OptimizeMetadataOnlyQuery(catalog)) :+
@@ -70,7 +70,8 @@ class SparkOptimizer(
     ExtractPythonUDFFromJoinCondition.ruleName :+
     ExtractPythonUDFFromAggregate.ruleName :+ ExtractGroupingPythonUDFFromAggregate.ruleName :+
     ExtractPythonUDFs.ruleName :+
-    V2ScanRelationPushDown.ruleName
+    V2ScanRelationPushDown.ruleName :+
+    V2Writes.ruleName
 
   /**
    * Optimization batches that are executed before the regular optimization batches (also before
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 635117a9932ac..0c92945dc6ca5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -24,8 +24,9 @@ import org.apache.spark.sql.catalyst.analysis.{ResolvedNamespace, ResolvedPartit
 import org.apache.spark.sql.catalyst.expressions.{And, Expression, NamedExpression, PredicateHelper, SubqueryExpression}
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, StagingTableCatalog, SupportsNamespaces, SupportsPartitionManagement, TableCapability, TableCatalog, TableChange}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, StagingTableCatalog, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, TableCapability, TableCatalog, TableChange}
 import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream}
+import org.apache.spark.sql.connector.write.V1Write
 import org.apache.spark.sql.execution.{FilterExec, LeafExecNode, LocalTableScanExec, ProjectExec, RowDataSourceScanExec, SparkPlan}
 import org.apache.spark.sql.execution.datasources.DataSourceStrategy
 import org.apache.spark.sql.execution.streaming.continuous.{WriteToContinuousDataSource, WriteToContinuousDataSourceExec}
@@ -195,33 +196,42 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
             orCreate = orCreate) :: Nil
       }
 
-    case AppendData(r: DataSourceV2Relation, query, writeOptions, _) =>
-      r.table.asWritable match {
-        case v1 if v1.supports(TableCapability.V1_BATCH_WRITE) =>
-          AppendDataExecV1(v1, writeOptions.asOptions, query, refreshCache(r)) :: Nil
-        case v2 =>
-          AppendDataExec(v2, writeOptions.asOptions, planLater(query), refreshCache(r)) :: Nil
+    case AppendData(r @ DataSourceV2Relation(v1: SupportsWrite, _, _, _, _), query, writeOptions,
+        _, Some(write)) if v1.supports(TableCapability.V1_BATCH_WRITE) =>
+      write match {
+        case v1Write: V1Write =>
+          AppendDataExecV1(v1, writeOptions.asOptions, query, refreshCache(r), v1Write) :: Nil
+        case v2Write =>
+          throw new AnalysisException(
+            s"Table ${v1.name} declares ${TableCapability.V1_BATCH_WRITE} capability but " +
+            s"${v2Write.getClass.getName} is not an instance of ${classOf[V1Write].getName}")
       }
 
-    case OverwriteByExpression(r: DataSourceV2Relation, deleteExpr, query, writeOptions, _) =>
-      // fail if any filter cannot be converted. correctness depends on removing all matching data.
-      val filters = splitConjunctivePredicates(deleteExpr).map {
-        filter => DataSourceStrategy.translateFilter(deleteExpr,
-          supportNestedPredicatePushdown = true).getOrElse(
-            throw new AnalysisException(s"Cannot translate expression to source filter: $filter"))
-      }.toArray
-      r.table.asWritable match {
-        case v1 if v1.supports(TableCapability.V1_BATCH_WRITE) =>
-          OverwriteByExpressionExecV1(v1, filters, writeOptions.asOptions,
-            query, refreshCache(r)) :: Nil
-        case v2 =>
-          OverwriteByExpressionExec(v2, filters,
-            writeOptions.asOptions, planLater(query), refreshCache(r)) :: Nil
+    case AppendData(r @ DataSourceV2Relation(v2: SupportsWrite, _, _, _, _), query, writeOptions,
+        _, Some(write)) =>
+      AppendDataExec(v2, writeOptions.asOptions, planLater(query), refreshCache(r), write) :: Nil
+
+    case OverwriteByExpression(r @ DataSourceV2Relation(v1: SupportsWrite, _, _, _, _), _, query,
+        writeOptions, _, Some(write)) if v1.supports(TableCapability.V1_BATCH_WRITE) =>
+      write match {
+        case v1Write: V1Write =>
+          OverwriteByExpressionExecV1(
+            v1, writeOptions.asOptions, query, refreshCache(r), v1Write) :: Nil
+        case v2Write =>
+          throw new AnalysisException(
+            s"Table ${v1.name} declares ${TableCapability.V1_BATCH_WRITE} capability but " +
+            s"${v2Write.getClass.getName} is not an instance of ${classOf[V1Write].getName}")
       }
 
-    case OverwritePartitionsDynamic(r: DataSourceV2Relation, query, writeOptions, _) =>
+    case OverwriteByExpression(r @ DataSourceV2Relation(v2: SupportsWrite, _, _, _, _), _, query,
+        writeOptions, _, Some(write)) =>
+      OverwriteByExpressionExec(
+        v2, writeOptions.asOptions, planLater(query), refreshCache(r), write) :: Nil
+
+    case OverwritePartitionsDynamic(r: DataSourceV2Relation, query, writeOptions, _, Some(write)) =>
       OverwritePartitionsDynamicExec(
-        r.table.asWritable, writeOptions.asOptions, planLater(query), refreshCache(r)) :: Nil
+        r.table.asWritable, writeOptions.asOptions, planLater(query),
+        refreshCache(r), write) :: Nil
 
     case DeleteFromTable(relation, condition) =>
       relation match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala
index cb4a2994de1f4..f697aba46d0df 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala
@@ -49,14 +49,14 @@ object TableCapabilityCheck extends (LogicalPlan => Unit) {
 
       // TODO: check STREAMING_WRITE capability. It's not doable now because we don't have a
       //       a logical plan for streaming write.
-      case AppendData(r: DataSourceV2Relation, _, _, _) if !supportsBatchWrite(r.table) =>
+      case AppendData(r: DataSourceV2Relation, _, _, _, _) if !supportsBatchWrite(r.table) =>
         failAnalysis(s"Table ${r.table.name()} does not support append in batch mode.")
 
-      case OverwritePartitionsDynamic(r: DataSourceV2Relation, _, _, _)
+      case OverwritePartitionsDynamic(r: DataSourceV2Relation, _, _, _, _)
         if !r.table.supports(BATCH_WRITE) || !r.table.supports(OVERWRITE_DYNAMIC) =>
         failAnalysis(s"Table ${r.table.name()} does not support dynamic overwrite in batch mode.")
 
-      case OverwriteByExpression(r: DataSourceV2Relation, expr, _, _, _) =>
+      case OverwriteByExpression(r: DataSourceV2Relation, expr, _, _, _, _) =>
         expr match {
           case Literal(true, BooleanType) =>
             if (!supportsBatchWrite(r.table) ||
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala
index 080e977121efb..3363172a85286 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala
@@ -17,17 +17,14 @@
 
 package org.apache.spark.sql.execution.datasources.v2
 
-import java.util.UUID
-
-import org.apache.spark.SparkException
 import org.apache.spark.sql.Dataset
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.connector.catalog.SupportsWrite
-import org.apache.spark.sql.connector.write.{LogicalWriteInfoImpl, SupportsOverwrite, SupportsTruncate, V1WriteBuilder, WriteBuilder}
+import org.apache.spark.sql.connector.write.V1Write
 import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.sources.{AlwaysTrue, Filter, InsertableRelation}
+import org.apache.spark.sql.sources.InsertableRelation
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
@@ -39,12 +36,8 @@ case class AppendDataExecV1(
     table: SupportsWrite,
     writeOptions: CaseInsensitiveStringMap,
     plan: LogicalPlan,
-    refreshCache: () => Unit) extends V1FallbackWriters {
-
-  override protected def run(): Seq[InternalRow] = {
-    writeWithV1(newWriteBuilder().buildForV1Write(), refreshCache = refreshCache)
-  }
-}
+    refreshCache: () => Unit,
+    write: V1Write) extends V1FallbackWriters
 
 /**
  * Physical plan node for overwrite into a v2 table with V1 write interfaces. Note that when this
@@ -59,29 +52,10 @@ case class AppendDataExecV1(
  */
 case class OverwriteByExpressionExecV1(
     table: SupportsWrite,
-    deleteWhere: Array[Filter],
     writeOptions: CaseInsensitiveStringMap,
     plan: LogicalPlan,
-    refreshCache: () => Unit) extends V1FallbackWriters {
-
-  private def isTruncate(filters: Array[Filter]): Boolean = {
-    filters.length == 1 && filters(0).isInstanceOf[AlwaysTrue]
-  }
-
-  override protected def run(): Seq[InternalRow] = {
-    newWriteBuilder() match {
-      case builder: SupportsTruncate if isTruncate(deleteWhere) =>
-        writeWithV1(builder.truncate().asV1Builder.buildForV1Write(), refreshCache = refreshCache)
-
-      case builder: SupportsOverwrite =>
-        writeWithV1(builder.overwrite(deleteWhere).asV1Builder.buildForV1Write(),
-          refreshCache = refreshCache)
-
-      case _ =>
-        throw new SparkException(s"Table does not support overwrite by expression: $table")
-    }
-  }
-}
+    refreshCache: () => Unit,
+    write: V1Write) extends V1FallbackWriters
 
 /** Some helper interfaces that use V2 write semantics through the V1 writer interface. */
 sealed trait V1FallbackWriters extends V2CommandExec with SupportsV1Write {
@@ -90,23 +64,13 @@ sealed trait V1FallbackWriters extends V2CommandExec with SupportsV1Write {
 
   def table: SupportsWrite
   def writeOptions: CaseInsensitiveStringMap
+  def refreshCache: () => Unit
+  def write: V1Write
 
-  protected implicit class toV1WriteBuilder(builder: WriteBuilder) {
-    def asV1Builder: V1WriteBuilder = builder match {
-      case v1: V1WriteBuilder => v1
-      case other => throw new IllegalStateException(
-        s"The returned writer ${other} was no longer a V1WriteBuilder.")
-    }
-  }
-
-  protected def newWriteBuilder(): V1WriteBuilder = {
-    val info = LogicalWriteInfoImpl(
-      queryId = UUID.randomUUID().toString,
-      schema = plan.schema,
-      options = writeOptions)
-    val writeBuilder = table.newWriteBuilder(info)
-
-    writeBuilder.asV1Builder
+  override def run(): Seq[InternalRow] = {
+    val writtenRows = writeWithV1(write.toInsertableRelation)
+    refreshCache()
+    writtenRows
   }
 }
 
@@ -116,12 +80,8 @@ sealed trait V1FallbackWriters extends V2CommandExec with SupportsV1Write {
 trait SupportsV1Write extends SparkPlan {
   def plan: LogicalPlan
 
-  protected def writeWithV1(
-      relation: InsertableRelation,
-      refreshCache: () => Unit = () => ()): Seq[InternalRow] = {
+  protected def writeWithV1(relation: InsertableRelation): Seq[InternalRow] = {
     relation.insert(Dataset.ofRows(sqlContext.sparkSession, plan), overwrite = false)
-    refreshCache()
-
     Nil
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala
new file mode 100644
index 0000000000000..a8e0731edf14c
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import java.util.UUID
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.expressions.PredicateHelper
+import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.connector.catalog.Table
+import org.apache.spark.sql.connector.write.{LogicalWriteInfoImpl, SupportsDynamicOverwrite, SupportsOverwrite, SupportsTruncate, WriteBuilder}
+import org.apache.spark.sql.execution.datasources.DataSourceStrategy
+import org.apache.spark.sql.sources.{AlwaysTrue, Filter}
+
+/**
+ * A rule that constructs logical writes.
+ */
+object V2Writes extends Rule[LogicalPlan] with PredicateHelper {
+
+  import DataSourceV2Implicits._
+
+  override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
+    case a @ AppendData(r: DataSourceV2Relation, query, options, _, None) =>
+      val writeBuilder = newWriteBuilder(r.table, query, options)
+      val write = writeBuilder.build()
+      a.copy(write = Some(write))
+
+    case o @ OverwriteByExpression(r: DataSourceV2Relation, deleteExpr, query, options, _, None) =>
+      // fail if any filter cannot be converted. correctness depends on removing all matching data.
+      val filters = splitConjunctivePredicates(deleteExpr).flatMap { pred =>
+        val filter = DataSourceStrategy.translateFilter(pred, supportNestedPredicatePushdown = true)
+        if (filter.isEmpty) {
+          throw new AnalysisException(s"Cannot translate expression to source filter: $pred")
+        }
+        filter
+      }.toArray
+
+      val table = r.table
+      val writeBuilder = newWriteBuilder(table, query, options)
+      val write = writeBuilder match {
+        case builder: SupportsTruncate if isTruncate(filters) =>
+          builder.truncate().build()
+        case builder: SupportsOverwrite =>
+          builder.overwrite(filters).build()
+        case _ =>
+          throw new SparkException(s"Table does not support overwrite by expression: $table")
+      }
+
+      o.copy(write = Some(write))
+
+    case o @ OverwritePartitionsDynamic(r: DataSourceV2Relation, query, options, _, None) =>
+      val table = r.table
+      val writeBuilder = newWriteBuilder(table, query, options)
+      val write = writeBuilder match {
+        case builder: SupportsDynamicOverwrite =>
+          builder.overwriteDynamicPartitions().build()
+        case _ =>
+          throw new SparkException(s"Table does not support dynamic partition overwrite: $table")
+      }
+      o.copy(write = Some(write))
+  }
+
+  private def isTruncate(filters: Array[Filter]): Boolean = {
+    filters.length == 1 && filters(0).isInstanceOf[AlwaysTrue]
+  }
+
+  private def newWriteBuilder(
+      table: Table,
+      query: LogicalPlan,
+      writeOptions: Map[String, String]): WriteBuilder = {
+
+    val info = LogicalWriteInfoImpl(
+      queryId = UUID.randomUUID().toString,
+      query.schema,
+      writeOptions.asOptions)
+    table.asWritable.newWriteBuilder(info)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
index f5f77d38b8716..e0887d52cc376 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
@@ -33,9 +33,8 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.connector.catalog.{Identifier, StagedTable, StagingTableCatalog, SupportsWrite, Table, TableCatalog}
 import org.apache.spark.sql.connector.expressions.Transform
-import org.apache.spark.sql.connector.write.{BatchWrite, DataWriterFactory, LogicalWriteInfoImpl, PhysicalWriteInfoImpl, SupportsDynamicOverwrite, SupportsOverwrite, SupportsTruncate, V1WriteBuilder, WriteBuilder, WriterCommitMessage}
+import org.apache.spark.sql.connector.write.{BatchWrite, DataWriterFactory, LogicalWriteInfoImpl, PhysicalWriteInfoImpl, V1Write, Write, WriteBuilder, WriterCommitMessage}
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
-import org.apache.spark.sql.sources.{AlwaysTrue, Filter}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.{LongAccumulator, Utils}
 
@@ -216,14 +215,8 @@ case class AppendDataExec(
     table: SupportsWrite,
     writeOptions: CaseInsensitiveStringMap,
     query: SparkPlan,
-    refreshCache: () => Unit) extends V2TableWriteExec with BatchWriteHelper {
-
-  override protected def run(): Seq[InternalRow] = {
-    val writtenRows = writeWithV2(newWriteBuilder().buildForBatch())
-    refreshCache()
-    writtenRows
-  }
-}
+    refreshCache: () => Unit,
+    write: Write) extends V2ExistingTableWriteExec with BatchWriteHelper
 
 /**
  * Physical plan node for overwrite into a v2 table.
@@ -237,31 +230,10 @@ case class AppendDataExec(
  */
 case class OverwriteByExpressionExec(
     table: SupportsWrite,
-    deleteWhere: Array[Filter],
     writeOptions: CaseInsensitiveStringMap,
     query: SparkPlan,
-    refreshCache: () => Unit) extends V2TableWriteExec with BatchWriteHelper {
-
-  private def isTruncate(filters: Array[Filter]): Boolean = {
-    filters.length == 1 && filters(0).isInstanceOf[AlwaysTrue]
-  }
-
-  override protected def run(): Seq[InternalRow] = {
-    val writtenRows = newWriteBuilder() match {
-      case builder: SupportsTruncate if isTruncate(deleteWhere) =>
-        writeWithV2(builder.truncate().buildForBatch())
-
-      case builder: SupportsOverwrite =>
-        writeWithV2(builder.overwrite(deleteWhere).buildForBatch())
-
-      case _ =>
-        throw new SparkException(s"Table does not support overwrite by expression: $table")
-    }
-    refreshCache()
-    writtenRows
-  }
-}
-
+    refreshCache: () => Unit,
+    write: Write) extends V2ExistingTableWriteExec with BatchWriteHelper
 
 /**
  * Physical plan node for dynamic partition overwrite into a v2 table.
@@ -276,20 +248,8 @@ case class OverwritePartitionsDynamicExec(
     table: SupportsWrite,
     writeOptions: CaseInsensitiveStringMap,
     query: SparkPlan,
-    refreshCache: () => Unit) extends V2TableWriteExec with BatchWriteHelper {
-
-  override protected def run(): Seq[InternalRow] = {
-    val writtenRows = newWriteBuilder() match {
-      case builder: SupportsDynamicOverwrite =>
-        writeWithV2(builder.overwriteDynamicPartitions().buildForBatch())
-
-      case _ =>
-        throw new SparkException(s"Table does not support dynamic partition overwrite: $table")
-    }
-    refreshCache()
-    writtenRows
-  }
-}
+    refreshCache: () => Unit,
+    write: Write) extends V2ExistingTableWriteExec with BatchWriteHelper
 
 case class WriteToDataSourceV2Exec(
     batchWrite: BatchWrite,
@@ -319,6 +279,17 @@ trait BatchWriteHelper {
   }
 }
 
+trait V2ExistingTableWriteExec extends V2TableWriteExec {
+  def refreshCache: () => Unit
+  def write: Write
+
+  override protected def run(): Seq[InternalRow] = {
+    val writtenRows = writeWithV2(write.toBatch)
+    refreshCache()
+    writtenRows
+  }
+}
+
 /**
  * The base physical plan for writing data into data source v2.
  */
@@ -477,9 +448,10 @@ private[v2] trait TableWriteExecHelper extends V2TableWriteExec with SupportsV1W
             writeOptions)
           val writeBuilder = table.newWriteBuilder(info)
 
-          val writtenRows = writeBuilder match {
-            case v1: V1WriteBuilder => writeWithV1(v1.buildForV1Write())
-            case v2 => writeWithV2(v2.buildForBatch())
+          val write = writeBuilder.build()
+          val writtenRows = write match {
+            case v1: V1Write => writeWithV1(v1.toInsertableRelation)
+            case v2 => writeWithV2(v2.toBatch)
           }
 
           table match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCWriteBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCWriteBuilder.scala
index a9f7a32bf4c69..0e6c72c2cc331 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCWriteBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCWriteBuilder.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.InsertableRelation
 import org.apache.spark.sql.types.StructType
 
-case class JDBCWriteBuilder(schema: StructType, options: JdbcOptionsInWrite) extends V1WriteBuilder
+case class JDBCWriteBuilder(schema: StructType, options: JdbcOptionsInWrite) extends WriteBuilder
   with SupportsTruncate {
 
   private var isTruncate = false
@@ -33,8 +33,8 @@ case class JDBCWriteBuilder(schema: StructType, options: JdbcOptionsInWrite) ext
     this
   }
 
-  override def buildForV1Write(): InsertableRelation = new InsertableRelation {
-    override def insert(data: DataFrame, overwrite: Boolean): Unit = {
+  override def build(): V1Write = new V1Write {
+    override def toInsertableRelation: InsertableRelation = (data: DataFrame, _: Boolean) => {
       // TODO (SPARK-32595): do truncate and append atomically.
       if (isTruncate) {
         val conn = JdbcUtils.createConnectionFactory(options)()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
index cba7dd35fb3bc..45ddc6a6fcfc6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.trees.TreeNodeTag
 import org.apache.spark.sql.connector.catalog.{Identifier, SupportsRead, SupportsWrite, Table, TableCapability}
 import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform, Transform}
 import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, V1Scan}
-import org.apache.spark.sql.connector.write.{LogicalWriteInfo, LogicalWriteInfoImpl, SupportsOverwrite, SupportsTruncate, V1WriteBuilder, WriteBuilder}
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, LogicalWriteInfoImpl, SupportsOverwrite, SupportsTruncate, V1Write, WriteBuilder}
 import org.apache.spark.sql.execution.datasources.DataSourceUtils
 import org.apache.spark.sql.functions.lit
 import org.apache.spark.sql.internal.SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION
@@ -311,7 +311,8 @@ class InMemoryV1Provider
     if (mode == SaveMode.Overwrite) {
       writer.asInstanceOf[SupportsTruncate].truncate()
     }
-    writer.asInstanceOf[V1WriteBuilder].buildForV1Write().insert(data, overwrite = false)
+    val write = writer.build()
+    write.asInstanceOf[V1Write].toInsertableRelation.insert(data, overwrite = false)
     getRelation
   }
 }
@@ -348,7 +349,6 @@ class InMemoryTableWithV1Fallback(
 
   private class FallbackWriteBuilder(options: CaseInsensitiveStringMap)
     extends WriteBuilder
-    with V1WriteBuilder
     with SupportsTruncate
     with SupportsOverwrite {
 
@@ -371,9 +371,9 @@ class InMemoryTableWithV1Fallback(
       partIndexes.map(row.get)
     }
 
-    override def buildForV1Write(): InsertableRelation = {
-      new InsertableRelation {
-        override def insert(data: DataFrame, overwrite: Boolean): Unit = {
+    override def build(): V1Write = new V1Write {
+      override def toInsertableRelation: InsertableRelation = {
+        (data: DataFrame, overwrite: Boolean) => {
           assert(!overwrite, "V1 write fallbacks cannot be called with overwrite=true")
           val rows = data.collect()
           rows.groupBy(getPartitionValues).foreach { case (partition, elements) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index 70cbfa194313f..6571e27b928bb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -1199,7 +1199,7 @@ class PlanResolutionSuite extends AnalysisTest {
         case Project(_, AsDataSourceV2Relation(r)) =>
           assert(r.catalog.exists(_ == catalogIdent))
           assert(r.identifier.exists(_.name() == tableIdent))
-        case AppendData(r: DataSourceV2Relation, _, _, _) =>
+        case AppendData(r: DataSourceV2Relation, _, _, _, _) =>
           assert(r.catalog.exists(_ == catalogIdent))
           assert(r.identifier.exists(_.name() == tableIdent))
         case DescribeRelation(r: ResolvedTable, _, _) =>

From 7bbcbb84c266b6ff418cd2c3361aa7350299d0ae Mon Sep 17 00:00:00 2001
From: Anton Okolnychyi <aokolnychyi@apple.com>
Date: Tue, 22 Dec 2020 08:29:22 +0000
Subject: [PATCH 0847/1009] [SPARK-33784][SQL] Rename dataSourceRewriteRules
 batch

### What changes were proposed in this pull request?

This PR tries to rename `dataSourceRewriteRules` into something more generic.

### Why are the changes needed?

These changes are needed to address the post-review discussion [here](https://github.com/apache/spark/pull/30558#discussion_r533885837).

### Does this PR introduce _any_ user-facing change?

Yes but the changes haven't been released yet.

### How was this patch tested?

Existing tests.

Closes #30808 from aokolnychyi/spark-33784.

Authored-by: Anton Okolnychyi <aokolnychyi@apple.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/optimizer/Optimizer.scala     | 12 ++++++------
 .../spark/sql/SparkSessionExtensions.scala     | 18 +++++++++---------
 .../sql/internal/BaseSessionStateBuilder.scala | 12 ++++++------
 .../spark/sql/SparkSessionExtensionSuite.scala |  6 +++---
 4 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index fdb9c5b4821dd..61bcf9038b845 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -186,9 +186,9 @@ abstract class Optimizer(catalogManager: CatalogManager)
       RemoveLiteralFromGroupExpressions,
       RemoveRepetitionFromGroupExpressions) :: Nil ++
     operatorOptimizationBatch) :+
-    // This batch rewrites data source plans and should be run after the operator
-    // optimization batch and before any batches that depend on stats.
-    Batch("Data Source Rewrite Rules", Once, dataSourceRewriteRules: _*) :+
+    // This batch rewrites plans after the operator optimization and
+    // before any batches that depend on stats.
+    Batch("Pre CBO Rules", Once, preCBORules: _*) :+
     // This batch pushes filters and projections into scan nodes. Before this batch, the logical
     // plan may contain nodes that do not report stats. Anything that uses stats must run after
     // this batch.
@@ -294,10 +294,10 @@ abstract class Optimizer(catalogManager: CatalogManager)
   def earlyScanPushDownRules: Seq[Rule[LogicalPlan]] = Nil
 
   /**
-   * Override to provide additional rules for rewriting data source plans. Such rules will be
-   * applied after operator optimization rules and before any rules that depend on stats.
+   * Override to provide additional rules for rewriting plans after operator optimization rules and
+   * before any cost-based optimization rules that depend on stats.
    */
-  def dataSourceRewriteRules: Seq[Rule[LogicalPlan]] = Nil
+  def preCBORules: Seq[Rule[LogicalPlan]] = Nil
 
   /**
    * Returns (defaultBatches - (excludedRules - nonExcludableRules)), the rule batches that
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
index d5d969032a5e1..074906a971b1b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
@@ -40,7 +40,7 @@ import org.apache.spark.sql.execution.{ColumnarRule, SparkPlan}
  * <li>Analyzer Rules.</li>
  * <li>Check Analysis Rules.</li>
  * <li>Optimizer Rules.</li>
- * <li>Data Source Rewrite Rules.</li>
+ * <li>Pre CBO Rules.</li>
  * <li>Planning Strategies.</li>
  * <li>Customized Parser.</li>
  * <li>(External) Catalog listeners.</li>
@@ -200,19 +200,19 @@ class SparkSessionExtensions {
     optimizerRules += builder
   }
 
-  private[this] val dataSourceRewriteRules = mutable.Buffer.empty[RuleBuilder]
+  private[this] val preCBORules = mutable.Buffer.empty[RuleBuilder]
 
-  private[sql] def buildDataSourceRewriteRules(session: SparkSession): Seq[Rule[LogicalPlan]] = {
-    dataSourceRewriteRules.map(_.apply(session)).toSeq
+  private[sql] def buildPreCBORules(session: SparkSession): Seq[Rule[LogicalPlan]] = {
+    preCBORules.map(_.apply(session)).toSeq
   }
 
   /**
-   * Inject an optimizer `Rule` builder that rewrites data source plans into the [[SparkSession]].
-   * The injected rules will be executed after the operator optimization batch and before rules
-   * that depend on stats.
+   * Inject an optimizer `Rule` builder that rewrites logical plans into the [[SparkSession]].
+   * The injected rules will be executed once after the operator optimization batch and
+   * before any cost-based optimization rules that depend on stats.
    */
-  def injectDataSourceRewriteRule(builder: RuleBuilder): Unit = {
-    dataSourceRewriteRules += builder
+  def injectPreCBORule(builder: RuleBuilder): Unit = {
+    preCBORules += builder
   }
 
   private[this] val plannerStrategyBuilders = mutable.Buffer.empty[StrategyBuilder]
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 8fb351a2a3b2b..6b84f0e636c1c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -231,8 +231,8 @@ abstract class BaseSessionStateBuilder(
       override def earlyScanPushDownRules: Seq[Rule[LogicalPlan]] =
         super.earlyScanPushDownRules ++ customEarlyScanPushDownRules
 
-      override def dataSourceRewriteRules: Seq[Rule[LogicalPlan]] =
-        super.dataSourceRewriteRules ++ customDataSourceRewriteRules
+      override def preCBORules: Seq[Rule[LogicalPlan]] =
+        super.preCBORules ++ customPreCBORules
 
       override def extendedOperatorOptimizationRules: Seq[Rule[LogicalPlan]] =
         super.extendedOperatorOptimizationRules ++ customOperatorOptimizationRules
@@ -258,13 +258,13 @@ abstract class BaseSessionStateBuilder(
   protected def customEarlyScanPushDownRules: Seq[Rule[LogicalPlan]] = Nil
 
   /**
-   * Custom rules for rewriting data source plans to add to the Optimizer. Prefer overriding
-   * this instead of creating your own Optimizer.
+   * Custom rules for rewriting plans after operator optimization and before CBO.
+   * Prefer overriding this instead of creating your own Optimizer.
    *
    * Note that this may NOT depend on the `optimizer` function.
    */
-  protected def customDataSourceRewriteRules: Seq[Rule[LogicalPlan]] = {
-    extensions.buildDataSourceRewriteRules(session)
+  protected def customPreCBORules: Seq[Rule[LogicalPlan]] = {
+    extensions.buildPreCBORules(session)
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
index 7c19f98b762f4..35d2513835611 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
@@ -88,9 +88,9 @@ class SparkSessionExtensionSuite extends SparkFunSuite {
     }
   }
 
-  test("SPARK-33621: inject data source rewrite rule") {
-    withSession(Seq(_.injectDataSourceRewriteRule(MyRule))) { session =>
-      assert(session.sessionState.optimizer.dataSourceRewriteRules.contains(MyRule(session)))
+  test("SPARK-33621: inject a pre CBO rule") {
+    withSession(Seq(_.injectPreCBORule(MyRule))) { session =>
+      assert(session.sessionState.optimizer.preCBORules.contains(MyRule(session)))
     }
   }
 

From 43a562035cd79083d06d9422a66488dba801066a Mon Sep 17 00:00:00 2001
From: Jacob Kim <me@jacobkim.io>
Date: Tue, 22 Dec 2020 17:55:16 +0900
Subject: [PATCH 0848/1009] [SPARK-33846][SQL] Include Comments for a nested
 schema in StructType.toDDL

### What changes were proposed in this pull request?

```scala
val nestedStruct = new StructType()
  .add(StructField("b", StringType).withComment("Nested comment"))
val struct = new StructType()
  .add(StructField("a", nestedStruct).withComment("comment"))

struct.toDDL
```

Currently, returns:
```
`a` STRUCT<`b`: STRING> COMMENT 'comment'`
```

With this PR, the code above returns:
```
`a` STRUCT<`b`: STRING COMMENT 'Nested comment'> COMMENT 'comment'`
```

### Why are the changes needed?

My team is using nested columns as first citizens, and I thought it would be nice to have comments for nested columns.

### Does this PR introduce _any_ user-facing change?

Now, when users call something like this,
```scala
spark.table("foo.bar").schema.fields.map(_.toDDL).mkString(", ")
```
they will get comments for the nested columns.

### How was this patch tested?

I added unit tests under `org.apache.spark.sql.types.StructTypeSuite`. They test if nested StructType's comment is included in the DDL string.

Closes #30851 from jacobhjkim/structtype-toddl.

Authored-by: Jacob Kim <me@jacobkim.io>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../apache/spark/sql/types/StructField.scala  | 19 ++++++----
 .../apache/spark/sql/types/StructType.scala   |  7 +---
 .../spark/sql/types/StructTypeSuite.scala     | 38 +++++++++++++++++++
 3 files changed, 52 insertions(+), 12 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
index 93478af425955..f0e17b24c80a9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
@@ -82,17 +82,22 @@ case class StructField(
     if (metadata.contains("comment")) Option(metadata.getString("comment")) else None
   }
 
+  private def getDDLComment = getComment()
+    .map(escapeSingleQuotedString)
+    .map(" COMMENT '" + _ + "'")
+    .getOrElse("")
+
+  /**
+   * Returns a string containing a schema in SQL format. For example the following value:
+   * `StructField("eventId", IntegerType)` will be converted to `eventId`: INT.
+   */
+  private[sql] def sql = s"${quoteIdentifier(name)}: ${dataType.sql}$getDDLComment"
+
   /**
    * Returns a string containing a schema in DDL format. For example, the following value:
    * `StructField("eventId", IntegerType)` will be converted to `eventId` INT.
    *
    * @since 2.4.0
    */
-  def toDDL: String = {
-    val comment = getComment()
-      .map(escapeSingleQuotedString)
-      .map(" COMMENT '" + _ + "'")
-
-    s"${quoteIdentifier(name)} ${dataType.sql}${comment.getOrElse("")}"
-  }
+  def toDDL: String = s"${quoteIdentifier(name)} ${dataType.sql}$getDDLComment"
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
index c5e76c160ff46..a223344e921ee 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.Resolver
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, InterpretedOrdering}
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, LegacyTypeStringParser}
-import org.apache.spark.sql.catalyst.util.{quoteIdentifier, truncatedString, StringUtils}
+import org.apache.spark.sql.catalyst.util.{truncatedString, StringUtils}
 import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat
 import org.apache.spark.sql.internal.SQLConf
 
@@ -445,10 +445,7 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
     stringConcat.toString
   }
 
-  override def sql: String = {
-    val fieldTypes = fields.map(f => s"${quoteIdentifier(f.name)}: ${f.dataType.sql}")
-    s"STRUCT<${fieldTypes.mkString(", ")}>"
-  }
+  override def sql: String = s"STRUCT<${fields.map(_.sql).mkString(", ")}>"
 
   /**
    * Returns a string containing a schema in DDL format. For example, the following value:
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
index 645e65f06508d..be06a31788f17 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
@@ -73,6 +73,44 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
     assert(struct.toDDL == """`b` BOOLEAN COMMENT 'Field\'s comment'""")
   }
 
+  private val nestedStruct = new StructType()
+    .add(StructField("a", new StructType()
+      .add(StructField("b", new StructType()
+        .add(StructField("c", StringType
+        ).withComment("Deep Nested comment"))
+      ).withComment("Nested comment"))
+    ).withComment("comment"))
+
+  test("SPARK-33846: toDDL should output nested field's comment") {
+    val ddl = "`a` STRUCT<`b`: STRUCT<`c`: STRING COMMENT 'Deep Nested comment'> " +
+      "COMMENT 'Nested comment'> COMMENT 'comment'"
+    assert(nestedStruct.toDDL == ddl)
+  }
+
+  test("SPARK-33846: fromDDL should parse nested field's comment") {
+    val ddl = "`a` STRUCT<`b`: STRUCT<`c`: STRING COMMENT 'Deep Nested comment'> " +
+      "COMMENT 'Nested comment'> COMMENT 'comment'"
+    assert(StructType.fromDDL(ddl) == nestedStruct)
+  }
+
+  test("SPARK-33846: round trip toDDL -> fromDDL - nested struct") {
+    assert(StructType.fromDDL(nestedStruct.toDDL) == nestedStruct)
+  }
+
+  private val structWithEmptyString = new StructType()
+    .add(StructField("a b", StringType).withComment("comment"))
+
+  test("SPARK-33846: empty string in a column's name should be respected by toDDL") {
+    val ddl = "`a b` STRING COMMENT 'comment'"
+
+    assert(structWithEmptyString.toDDL == ddl)
+  }
+
+  test("SPARK-33846: empty string in a column's name should be respected by fromDDL") {
+    val ddl = "`a b` STRING COMMENT 'comment'"
+
+    assert(StructType.fromDDL(ddl) == structWithEmptyString)
+  }
 
   test("Print up to the given level") {
     val schema = StructType.fromDDL(

From 84bf07bbd77e42495d36a6b1e0f592184a12022f Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Tue, 22 Dec 2020 12:37:16 +0000
Subject: [PATCH 0849/1009] [SPARK-33878][SQL][TESTS] Fix resolving of
 `spark_catalog` in v1 Hive catalog tests

### What changes were proposed in this pull request?
1. Recognize `spark_catalog` as the default session catalog in the checks of `TestHiveQueryExecution`.
2. Move v2 and v1 in-memory catalog test `"SPARK-33305: DROP TABLE should also invalidate cache"` to the common trait `command/DropTableSuiteBase`, and run it with v1 Hive external catalog.

### Why are the changes needed?
To run In-memory catalog tests in Hive catalog.

### Does this PR introduce _any_ user-facing change?
No, the changes influence only on tests.

### How was this patch tested?
By running the affected test suites for `DROP TABLE`:
```
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *DropTableSuite"
```

Closes #30883 from MaxGekk/fix-spark_catalog-hive-tests.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../command/DropTableSuiteBase.scala          | 22 +++++++++++++++
 .../execution/command/v1/DropTableSuite.scala | 27 +------------------
 .../execution/command/v2/DropTableSuite.scala | 22 ---------------
 .../apache/spark/sql/hive/test/TestHive.scala | 10 ++++---
 4 files changed, 30 insertions(+), 51 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableSuiteBase.scala
index dd620d3bd7aa4..9cba67f04a351 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableSuiteBase.scala
@@ -96,4 +96,26 @@ trait DropTableSuiteBase extends QueryTest with DDLCommandTestUtils {
       }
     }
   }
+
+  test("SPARK-33305: DROP TABLE should also invalidate cache") {
+    val t = s"$catalog.ns.tbl"
+    val view = "view"
+    withNamespace(s"$catalog.ns") {
+      sql(s"CREATE NAMESPACE $catalog.ns")
+      withTempView(view, "source") {
+        val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data")
+        df.createOrReplaceTempView("source")
+        sql(s"CREATE TABLE $t $defaultUsing AS SELECT id, data FROM source")
+        sql(s"CACHE TABLE $view AS SELECT id FROM $t")
+        checkAnswer(sql(s"SELECT * FROM $t"), spark.table("source").collect())
+        checkAnswer(
+          sql(s"SELECT * FROM $view"),
+          spark.table("source").select("id").collect())
+
+        assert(!spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty)
+        sql(s"DROP TABLE $t")
+        assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty)
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropTableSuite.scala
index 4a6956e9ad82d..530d18cb6f7b0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropTableSuite.scala
@@ -33,30 +33,5 @@ trait DropTableSuiteBase extends command.DropTableSuiteBase {
   }
 }
 
-class DropTableSuite extends DropTableSuiteBase with CommandSuiteBase {
-  // The test fails in Hive External catalog with:
-  // org.apache.spark.sql.AnalysisException:
-  //   spark_catalog.ns.tbl is not a valid TableIdentifier as it has more than 2 name parts.
-  test("SPARK-33305: DROP TABLE should also invalidate cache") {
-    val t = s"$catalog.ns.tbl"
-    val view = "view"
-    withNamespace(s"$catalog.ns") {
-      sql(s"CREATE NAMESPACE $catalog.ns")
-      withTempView(view, "source") {
-        val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data")
-        df.createOrReplaceTempView("source")
-        sql(s"CREATE TABLE $t $defaultUsing AS SELECT id, data FROM source")
-        sql(s"CACHE TABLE $view AS SELECT id FROM $t")
-        checkAnswer(sql(s"SELECT * FROM $t"), spark.table("source").collect())
-        checkAnswer(
-          sql(s"SELECT * FROM $view"),
-          spark.table("source").select("id").collect())
-
-        assert(!spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty)
-        sql(s"DROP TABLE $t")
-        assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty)
-      }
-    }
-  }
-}
+class DropTableSuite extends DropTableSuiteBase with CommandSuiteBase
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala
index a36df8df4dd06..16283d5ad6644 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala
@@ -49,26 +49,4 @@ class DropTableSuite extends command.DropTableSuiteBase with CommandSuiteBase {
         Seq.empty)
     }
   }
-
-  test("SPARK-33305: DROP TABLE should also invalidate cache") {
-    val t = s"$catalog.ns.tbl"
-    val view = "view"
-    withNamespace(s"$catalog.ns") {
-      sql(s"CREATE NAMESPACE $catalog.ns")
-      withTempView(view, "source") {
-        val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data")
-        df.createOrReplaceTempView("source")
-        sql(s"CREATE TABLE $t $defaultUsing AS SELECT id, data FROM source")
-        sql(s"CACHE TABLE $view AS SELECT id FROM $t")
-        checkAnswer(sql(s"SELECT * FROM $t"), spark.table("source").collect())
-        checkAnswer(
-          sql(s"SELECT * FROM $view"),
-          spark.table("source").select("id").collect())
-
-        assert(!spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty)
-        sql(s"DROP TABLE $t")
-        assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty)
-      }
-    }
-  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
index ff5b9e453a482..962efa8303f9b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -40,6 +40,7 @@ import org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener
 import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode
 import org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation
 import org.apache.spark.sql.catalyst.plans.logical.{CacheTable, LogicalPlan, OneRowRelation}
+import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.execution.{QueryExecution, SQLExecution}
 import org.apache.spark.sql.hive._
@@ -601,9 +602,12 @@ private[hive] class TestHiveQueryExecution(
     }
 
     // Make sure any test tables referenced are loaded.
-    val referencedTables =
-      describedTables ++
-        logical.collect { case UnresolvedRelation(ident, _, _) => ident.asTableIdentifier }
+    val referencedTables = describedTables ++ logical.collect {
+      case UnresolvedRelation(ident, _, _) =>
+        if (ident.length > 1 && ident.head.equalsIgnoreCase(CatalogManager.SESSION_CATALOG_NAME)) {
+          ident.tail.asTableIdentifier
+        } else ident.asTableIdentifier
+    }
     val resolver = sparkSession.sessionState.conf.resolver
     val referencedTestTables = referencedTables.flatMap { tbl =>
       val testTableOpt = sparkSession.testTables.keys.find(resolver(_, tbl.table))

From 6da5cdf1dbfc35cee0ce32aa9e44c0b4187373d9 Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Tue, 22 Dec 2020 14:24:12 +0000
Subject: [PATCH 0850/1009] [SPARK-33876][SQL] Add length-check for reading
 char/varchar from tables w/ a external location

### What changes were proposed in this pull request?
This PR adds the length check to the existing ApplyCharPadding rule. Tables will have external locations when users execute
SET LOCATION or CREATE TABLE ... LOCATION. If the location contains over length values we should FAIL ON READ.

### Why are the changes needed?

```sql
spark-sql> INSERT INTO t2 VALUES ('1', 'b12345');
Time taken: 0.141 seconds
spark-sql> alter table t set location '/tmp/hive_one/t2';
Time taken: 0.095 seconds
spark-sql> select * from t;
1 b1234
```
the above case should fail rather than implicitly applying truncation

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

new tests

Closes #30882 from yaooqinn/SPARK-33876.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/util/CharVarcharUtils.scala  | 29 +++++++---
 ...PaddingAndLengthCheckForCharVarchar.scala} | 20 ++++---
 .../internal/BaseSessionStateBuilder.scala    |  2 +-
 .../spark/sql/CharVarcharTestSuite.scala      | 55 +++++++++++++++++++
 .../sql/hive/HiveSessionStateBuilder.scala    |  2 +-
 5 files changed, 89 insertions(+), 19 deletions(-)
 rename sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/{ApplyCharTypePadding.scala => PaddingAndLengthCheckForCharVarchar.scala} (86%)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
index e42e384e4b86b..cfdc50d1defb5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
@@ -127,25 +127,36 @@ object CharVarcharUtils extends Logging {
   }
 
   /**
-   * Returns expressions to apply read-side char type padding for the given attributes. String
-   * values should be right-padded to N characters if it's from a CHAR(N) column/field.
+   * Returns expressions to apply read-side char type padding for the given attributes.
+   *
+   * For a CHAR(N) column/field and the length of string value is M
+   * If M > N, raise runtime error
+   * If M <= N, the value should be right-padded to N characters.
+   *
+   * For a VARCHAR(N) column/field and the length of string value is M
+   * If M > N, raise runtime error
+   * If M <= N, the value should be remained.
    */
-  def charTypePadding(output: Seq[AttributeReference]): Seq[NamedExpression] = {
+  def paddingWithLengthCheck(output: Seq[AttributeReference]): Seq[NamedExpression] = {
     output.map { attr =>
       getRawType(attr.metadata).filter { rawType =>
-        rawType.existsRecursively(_.isInstanceOf[CharType])
+        rawType.existsRecursively(dt => dt.isInstanceOf[CharType] || dt.isInstanceOf[VarcharType])
       }.map { rawType =>
-        Alias(charTypePadding(attr, rawType), attr.name)(explicitMetadata = Some(attr.metadata))
+        Alias(paddingWithLengthCheck(attr, rawType), attr.name)(
+          explicitMetadata = Some(attr.metadata))
       }.getOrElse(attr)
     }
   }
 
-  private def charTypePadding(expr: Expression, dt: DataType): Expression = dt match {
-    case CharType(length) => StringRPad(expr, Literal(length))
+  private def paddingWithLengthCheck(expr: Expression, dt: DataType): Expression = dt match {
+    case CharType(length) => StringRPad(stringLengthCheck(expr, dt), Literal(length))
+
+    case VarcharType(_) => stringLengthCheck(expr, dt)
 
     case StructType(fields) =>
       val struct = CreateNamedStruct(fields.zipWithIndex.flatMap { case (f, i) =>
-        Seq(Literal(f.name), charTypePadding(GetStructField(expr, i, Some(f.name)), f.dataType))
+        Seq(Literal(f.name),
+          paddingWithLengthCheck(GetStructField(expr, i, Some(f.name)), f.dataType))
       })
       if (expr.nullable) {
         If(IsNull(expr), Literal(null, struct.dataType), struct)
@@ -166,7 +177,7 @@ object CharVarcharUtils extends Logging {
   private def charTypePaddingInArray(
       arr: Expression, et: DataType, containsNull: Boolean): Expression = {
     val param = NamedLambdaVariable("x", replaceCharVarcharWithString(et), containsNull)
-    val func = LambdaFunction(charTypePadding(param, et), Seq(param))
+    val func = LambdaFunction(paddingWithLengthCheck(param, et), Seq(param))
     ArrayTransform(arr, func)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PaddingAndLengthCheckForCharVarchar.scala
similarity index 86%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PaddingAndLengthCheckForCharVarchar.scala
index 35bb86f178eb1..f268d5185000f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PaddingAndLengthCheckForCharVarchar.scala
@@ -27,17 +27,21 @@ import org.apache.spark.sql.types.{CharType, StringType}
 import org.apache.spark.unsafe.types.UTF8String
 
 /**
- * This rule applies char type padding in two places:
- *   1. When reading values from column/field of type CHAR(N), right-pad the values to length N.
- *   2. When comparing char type column/field with string literal or char type column/field,
- *      right-pad the shorter one to the longer length.
+ * This rule performs char type padding and length check for both char and varchar.
+ *
+ * When reading values from column/field of type CHAR(N) or VARCHAR(N), the underlying string value
+ * might be over length (e.g. tables w/ external locations), it will fail in this case.
+ * Otherwise, right-pad the values to length N for CHAR(N) and remain the same for VARCHAR(N).
+ *
+ * When comparing char type column/field with string literal or char type column/field,
+ * right-pad the shorter one to the longer length.
  */
-object ApplyCharTypePadding extends Rule[LogicalPlan] {
+object PaddingAndLengthCheckForCharVarchar extends Rule[LogicalPlan] {
 
   override def apply(plan: LogicalPlan): LogicalPlan = {
     val padded = plan.resolveOperatorsUpWithNewOutput {
       case r: LogicalRelation =>
-        val projectList = CharVarcharUtils.charTypePadding(r.output)
+        val projectList = CharVarcharUtils.paddingWithLengthCheck(r.output)
         if (projectList == r.output) {
           r -> Nil
         } else {
@@ -47,7 +51,7 @@ object ApplyCharTypePadding extends Rule[LogicalPlan] {
         }
 
       case r: DataSourceV2Relation =>
-        val projectList = CharVarcharUtils.charTypePadding(r.output)
+        val projectList = CharVarcharUtils.paddingWithLengthCheck(r.output)
         if (projectList == r.output) {
           r -> Nil
         } else {
@@ -57,7 +61,7 @@ object ApplyCharTypePadding extends Rule[LogicalPlan] {
         }
 
       case r: HiveTableRelation =>
-        val projectList = CharVarcharUtils.charTypePadding(r.output)
+        val projectList = CharVarcharUtils.paddingWithLengthCheck(r.output)
         if (projectList == r.output) {
           r -> Nil
         } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 6b84f0e636c1c..34b9af12607ed 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -179,7 +179,7 @@ abstract class BaseSessionStateBuilder(
         PreprocessTableCreation(session) +:
         PreprocessTableInsertion +:
         DataSourceAnalysis +:
-        ApplyCharTypePadding +:
+        PaddingAndLengthCheckForCharVarchar +:
         customPostHocResolutionRules
 
     override val extendedCheckRules: Seq[LogicalPlan => Unit] =
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
index b0f1198e46440..d7b84a0971e0c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
@@ -528,6 +528,61 @@ class FileSourceCharVarcharTestSuite extends CharVarcharTestSuite with SharedSpa
   override protected def sparkConf: SparkConf = {
     super.sparkConf.set(SQLConf.USE_V1_SOURCE_LIST, "parquet")
   }
+
+  test("create table w/ location and fit length values") {
+    Seq("char", "varchar").foreach { typ =>
+      withTempPath { dir =>
+        withTable("t") {
+          sql("SELECT '12' as col").write.format(format).save(dir.toString)
+          sql(s"CREATE TABLE t (col $typ(2)) using $format LOCATION '$dir'")
+          val df = sql("select * from t")
+          checkAnswer(sql("select * from t"), Row("12"))
+        }
+      }
+    }
+  }
+
+  test("create table w/ location and over length values") {
+    Seq("char", "varchar").foreach { typ =>
+      withTempPath { dir =>
+        withTable("t") {
+          sql("SELECT '123456' as col").write.format(format).save(dir.toString)
+          sql(s"CREATE TABLE t (col $typ(2)) using $format LOCATION '$dir'")
+          val e = intercept[SparkException] { sql("select * from t").collect() }
+          assert(e.getCause.getMessage.contains(
+            s"input string of length 6 exceeds $typ type length limitation: 2"))
+        }
+      }
+    }
+  }
+
+  test("alter table set location w/ fit length values") {
+    Seq("char", "varchar").foreach { typ =>
+      withTempPath { dir =>
+        withTable("t") {
+          sql("SELECT '12' as col").write.format(format).save(dir.toString)
+          sql(s"CREATE TABLE t (col $typ(2)) using $format")
+          sql(s"ALTER TABLE t SET LOCATION '$dir'")
+          checkAnswer(spark.table("t"), Row("12"))
+        }
+      }
+    }
+  }
+
+  test("alter table set location w/ over length values") {
+    Seq("char", "varchar").foreach { typ =>
+      withTempPath { dir =>
+        withTable("t") {
+          sql("SELECT '123456' as col").write.format(format).save(dir.toString)
+          sql(s"CREATE TABLE t (col $typ(2)) using $format")
+          sql(s"ALTER TABLE t SET LOCATION '$dir'")
+          val e = intercept[SparkException] { spark.table("t").collect() }
+          assert(e.getCause.getMessage.contains(
+            s"input string of length 6 exceeds $typ type length limitation: 2"))
+        }
+      }
+    }
+  }
 }
 
 class DSV2CharVarcharTestSuite extends CharVarcharTestSuite
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index da37b61688951..5963a71f55035 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -90,7 +90,7 @@ class HiveSessionStateBuilder(
         PreprocessTableCreation(session) +:
         PreprocessTableInsertion +:
         DataSourceAnalysis +:
-        ApplyCharTypePadding +:
+        PaddingAndLengthCheckForCharVarchar +:
         HiveAnalysis +:
         customPostHocResolutionRules
 

From 1d450250eb1db7e4f40451f369db830a8f01ec15 Mon Sep 17 00:00:00 2001
From: Enrico Minack <github@enrico.minack.dev>
Date: Wed, 23 Dec 2020 00:22:42 +0900
Subject: [PATCH 0851/1009] [BUILD][MINOR] Do not publish snapshots from forks

### What changes were proposed in this pull request?
The GitHub workflow `Publish Snapshot` publishes master and 3.1 branch via Nexus. For this, the workflow uses `secrets.NEXUS_USER` and `secrets.NEXUS_PW` secrets. These are not available in forks where this workflow fails every day:

- https://github.com/G-Research/spark/actions/runs/431626797
- https://github.com/G-Research/spark/actions/runs/433153049
- https://github.com/G-Research/spark/actions/runs/434680048
- https://github.com/G-Research/spark/actions/runs/436958780

### Why are the changes needed?
Avoid attempting to publish snapshots from forked repositories.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Code review only.

Closes #30884 from EnricoMi/branch-do-not-publish-snapshots-from-forks.

Authored-by: Enrico Minack <github@enrico.minack.dev>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .github/workflows/publish_snapshot.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/publish_snapshot.yml b/.github/workflows/publish_snapshot.yml
index 504d702fd1f22..c5dbc8d057964 100644
--- a/.github/workflows/publish_snapshot.yml
+++ b/.github/workflows/publish_snapshot.yml
@@ -6,6 +6,7 @@ on:
 
 jobs:
   publish-snapshot:
+    if: github.repository == 'apache/spark'
     runs-on: ubuntu-latest
     strategy:
       fail-fast: false

From 303b8c87737fdff83c96855084c16d6504b0b50f Mon Sep 17 00:00:00 2001
From: Erik Krogen <xkrogen@apache.org>
Date: Tue, 22 Dec 2020 09:55:33 -0800
Subject: [PATCH 0852/1009] [SPARK-23862][SQL] Support Java enums from Scala
 Dataset API

### What changes were proposed in this pull request?
Add support for Java Enums (`java.lang.Enum`) from the Scala typed Dataset APIs. This involves adding an implicit for `Encoder` creation in `SQLImplicits`, and updating `ScalaReflection` to handle Java Enums on the serialization and deserialization pathways.

Enums are mapped to a `StringType` which is just the name of the Enum value.

### Why are the changes needed?
In [SPARK-21255](https://issues.apache.org/jira/browse/SPARK-21255), support for (de)serialization of Java Enums was added, but only when called from Java code. It is common for Scala code to rely on Java libraries that are out of control of the Scala developer. Today, if there is a dependency on some Java code which defines an Enum, it would be necessary to define a corresponding Scala class. This change brings closer feature parity between Scala and Java APIs.

### Does this PR introduce _any_ user-facing change?
Yes, previously something like:
```
val ds = Seq(MyJavaEnum.VALUE1, MyJavaEnum.VALUE2).toDS
// or
val ds = Seq(CaseClass(MyJavaEnum.VALUE1), CaseClass(MyJavaEnum.VALUE2)).toDS
```
would fail. Now, it will succeed.

### How was this patch tested?
Additional unit tests are added in `DatasetSuite`. Tests include validating top-level enums, enums inside of case classes, enums inside of arrays, and validating that the Enum is stored as the expected string.

Closes #30877 from xkrogen/xkrogen-SPARK-23862-scalareflection-java-enums.

Lead-authored-by: Erik Krogen <xkrogen@apache.org>
Co-authored-by: Fangshi Li <fli@linkedin.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/catalyst/ScalaReflection.scala  |  9 ++++++
 .../sql/catalyst/SerializerBuildHelper.scala  |  3 ++
 .../org/apache/spark/sql/SQLImplicits.scala   |  4 +++
 .../org/apache/spark/sql/DatasetSuite.scala   | 31 +++++++++++++++++++
 4 files changed, 47 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index 53c7f17ee6b2e..361c3476f5941 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -232,6 +232,11 @@ object ScalaReflection extends ScalaReflection {
       case t if isSubtype(t, localTypeOf[java.time.Instant]) =>
         createDeserializerForInstant(path)
 
+      case t if isSubtype(t, localTypeOf[java.lang.Enum[_]]) =>
+        createDeserializerForTypesSupportValueOf(
+          Invoke(path, "toString", ObjectType(classOf[String]), returnNullable = false),
+          getClassFromType(t))
+
       case t if isSubtype(t, localTypeOf[java.sql.Timestamp]) =>
         createDeserializerForSqlTimestamp(path)
 
@@ -526,6 +531,9 @@ object ScalaReflection extends ScalaReflection {
       case t if isSubtype(t, localTypeOf[java.math.BigInteger]) =>
         createSerializerForJavaBigInteger(inputObject)
 
+      case t if isSubtype(t, localTypeOf[java.lang.Enum[_]]) =>
+        createSerializerForJavaEnum(inputObject)
+
       case t if isSubtype(t, localTypeOf[scala.math.BigInt]) =>
         createSerializerForScalaBigInt(inputObject)
 
@@ -749,6 +757,7 @@ object ScalaReflection extends ScalaReflection {
       case t if isSubtype(t, localTypeOf[java.lang.Short]) => Schema(ShortType, nullable = true)
       case t if isSubtype(t, localTypeOf[java.lang.Byte]) => Schema(ByteType, nullable = true)
       case t if isSubtype(t, localTypeOf[java.lang.Boolean]) => Schema(BooleanType, nullable = true)
+      case t if isSubtype(t, localTypeOf[java.lang.Enum[_]]) => Schema(StringType, nullable = true)
       case t if isSubtype(t, definitions.IntTpe) => Schema(IntegerType, nullable = false)
       case t if isSubtype(t, definitions.LongTpe) => Schema(LongType, nullable = false)
       case t if isSubtype(t, definitions.DoubleTpe) => Schema(DoubleType, nullable = false)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala
index 85acaa11230b4..0554f0f76708b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala
@@ -74,6 +74,9 @@ object SerializerBuildHelper {
       returnNullable = false)
   }
 
+  def createSerializerForJavaEnum(inputObject: Expression): Expression =
+    createSerializerForString(Invoke(inputObject, "name", ObjectType(classOf[String])))
+
   def createSerializerForSqlTimestamp(inputObject: Expression): Expression = {
     StaticInvoke(
       DateTimeUtils.getClass,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
index 71cbc3ab14d97..1135c8848bc23 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
@@ -88,6 +88,10 @@ abstract class SQLImplicits extends LowPrioritySQLImplicits {
   /** @since 3.0.0 */
   implicit def newInstantEncoder: Encoder[java.time.Instant] = Encoders.INSTANT
 
+  /** @since 3.2.0 */
+  implicit def newJavaEnumEncoder[A <: java.lang.Enum[_] : TypeTag]: Encoder[A] =
+    ExpressionEncoder()
+
   // Boxed primitives
 
   /** @since 2.0.0 */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 67e3ad6a80642..3a169e487827a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -1693,6 +1693,33 @@ class DatasetSuite extends QueryTest
     checkDataset(ds1.select("_2._2"), ds2.select("_2._2").collect(): _*)
   }
 
+  test("SPARK-23862: Spark ExpressionEncoder should support Java Enum type from Scala") {
+    val saveModeSeq =
+      Seq(SaveMode.Append, SaveMode.Overwrite, SaveMode.ErrorIfExists, SaveMode.Ignore, null)
+    assert(saveModeSeq.toDS().collect().toSeq === saveModeSeq)
+    assert(saveModeSeq.toDS().schema === new StructType().add("value", StringType, nullable = true))
+
+    val saveModeCaseSeq = saveModeSeq.map(SaveModeCase.apply)
+    assert(saveModeCaseSeq.toDS().collect().toSet === saveModeCaseSeq.toSet)
+    assert(saveModeCaseSeq.toDS().schema ===
+      new StructType().add("mode", StringType, nullable = true))
+
+    val saveModeArrayCaseSeq =
+      Seq(SaveModeArrayCase(Array()), SaveModeArrayCase(saveModeSeq.toArray))
+    val collected = saveModeArrayCaseSeq.toDS().collect()
+    assert(collected.length === 2)
+    val sortedByLength = collected.sortBy(_.modes.length)
+    assert(sortedByLength(0).modes === Array())
+    assert(sortedByLength(1).modes === saveModeSeq.toArray)
+    assert(saveModeArrayCaseSeq.toDS().schema ===
+      new StructType().add("modes", ArrayType(StringType, containsNull = true), nullable = true))
+
+    // Enum is stored as string, so it is possible to convert to/from string
+    val stringSeq = saveModeSeq.map(Option.apply).map(_.map(_.toString).orNull)
+    assert(stringSeq.toDS().as[SaveMode].collect().toSet === saveModeSeq.toSet)
+    assert(saveModeSeq.toDS().as[String].collect().toSet === stringSeq.toSet)
+  }
+
   test("SPARK-24571: filtering of string values by char literal") {
     val df = Seq("Amsterdam", "San Francisco", "X").toDF("city")
     checkAnswer(df.where($"city" === 'X'), Seq(Row("X")))
@@ -2053,3 +2080,7 @@ case class CircularReferenceClassD(map: Map[String, CircularReferenceClassE])
 case class CircularReferenceClassE(id: String, list: List[CircularReferenceClassD])
 
 case class SpecialCharClass(`field.1`: String, `field 2`: String)
+
+/** Used to test Java Enums from Scala code */
+case class SaveModeCase(mode: SaveMode)
+case class SaveModeArrayCase(modes: Array[SaveMode])

From ec1560af251d2c3580f5bccfabc750f1c7af09df Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 23 Dec 2020 11:47:13 +0900
Subject: [PATCH 0853/1009] [SPARK-33364][SQL][FOLLOWUP] Refine the catalog v2
 API to purge a table

### What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/30267

Inspired by https://github.com/apache/spark/pull/30886, it's better to have 2 methods `def dropTable` and `def purgeTable`, than `def dropTable(ident)` and `def dropTable(ident, purge)`.

### Why are the changes needed?

1. make the APIs orthogonal. Previously, `def dropTable(ident, purge)` calls `def dropTable(ident)` and is a superset.
2. simplifies the catalog implementation a little bit. Now the `if (purge) ... else ...` check is done at the Spark side.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

existing tests

Closes #30890 from cloud-fan/purgeTable.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../catalog/DelegatingCatalogExtension.java     |  5 +++++
 .../sql/connector/catalog/TableCatalog.java     | 17 +++++++----------
 .../connector/catalog/TableCatalogSuite.scala   |  5 +++++
 .../datasources/v2/DropTableExec.scala          |  2 +-
 .../execution/command/v2/DropTableSuite.scala   |  4 ++--
 5 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java
index d07d299d65a58..34f07b12b3666 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java
@@ -99,6 +99,11 @@ public boolean dropTable(Identifier ident) {
     return asTableCatalog().dropTable(ident);
   }
 
+  @Override
+  public boolean purgeTable(Identifier ident) {
+    return asTableCatalog().purgeTable(ident);
+  }
+
   @Override
   public void renameTable(
       Identifier oldIdent,
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
index 52a74ab9dd9f5..4163d86bcc54b 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
@@ -173,26 +173,23 @@ Table alterTable(
   boolean dropTable(Identifier ident);
 
   /**
-   * Drop a table in the catalog with an option to purge.
+   * Drop a table in the catalog and completely remove its data by skipping a trash even if it is
+   * supported.
    * <p>
    * If the catalog supports views and contains a view for the identifier and not a table, this
    * must not drop the view and must return false.
    * <p>
-   * If the catalog supports the option to purge a table, this method must be overridden.
-   * The default implementation falls back to {@link #dropTable(Identifier)} dropTable} if the
-   * purge option is set to false. Otherwise, it throws {@link UnsupportedOperationException}.
+   * If the catalog supports to purge a table, this method should be overridden.
+   * The default implementation throws {@link UnsupportedOperationException}.
    *
    * @param ident a table identifier
-   * @param purge whether a table should be purged
    * @return true if a table was deleted, false if no table exists for the identifier
+   * @throws UnsupportedOperationException If table purging is not supported
    *
    * @since 3.1.0
    */
-  default boolean dropTable(Identifier ident, boolean purge) {
-    if (purge) {
-      throw new UnsupportedOperationException("Purge option is not supported.");
-    }
-    return dropTable(ident);
+  default boolean purgeTable(Identifier ident) throws UnsupportedOperationException {
+    throw new UnsupportedOperationException("Purge table is not supported.");
   }
 
   /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/TableCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/TableCatalogSuite.scala
index dab20911bbdc7..ef342e7ec5539 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/TableCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/TableCatalogSuite.scala
@@ -643,6 +643,11 @@ class TableCatalogSuite extends SparkFunSuite {
     assert(!catalog.tableExists(testIdent))
   }
 
+  test("purgeTable") {
+    val catalog = newCatalog()
+    intercept[UnsupportedOperationException](catalog.purgeTable(testIdent))
+  }
+
   test("renameTable") {
     val catalog = newCatalog()
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala
index f89b89096772a..100eaf9021863 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala
@@ -35,7 +35,7 @@ case class DropTableExec(
   override def run(): Seq[InternalRow] = {
     if (catalog.tableExists(ident)) {
       invalidateCache()
-      catalog.dropTable(ident, purge)
+      if (purge) catalog.purgeTable(ident) else catalog.dropTable(ident)
     } else if (!ifExists) {
       throw new NoSuchTableException(ident)
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala
index 16283d5ad6644..a272f649288f6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala
@@ -29,8 +29,8 @@ class DropTableSuite extends command.DropTableSuiteBase with CommandSuiteBase {
       val errMsg = intercept[UnsupportedOperationException] {
         sql(s"DROP TABLE $catalog.ns.tbl PURGE")
       }.getMessage
-      // The default TableCatalog.dropTable implementation doesn't support the purge option.
-      assert(errMsg.contains("Purge option is not supported"))
+      // The default TableCatalog.purgeTable implementation throws an exception.
+      assert(errMsg.contains("Purge table is not supported"))
     }
   }
 

From a3dd8dacee8f6b316be90500f9fd8ec8997a5784 Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Tue, 22 Dec 2020 19:46:37 -0800
Subject: [PATCH 0854/1009] [SPARK-33877][SQL] SQL reference documents for
 INSERT w/ a column list

We support a column list of INSERT for Spark v3.1.0 (See: SPARK-32976 (https://github.com/apache/spark/pull/29893)). So, this PR targets at documenting it in the SQL documents.

### What changes were proposed in this pull request?

improve doc
### Why are the changes needed?

### Does this PR introduce _any_ user-facing change?

doc
### How was this patch tested?

passing GA doc gen.

![image](https://user-images.githubusercontent.com/8326978/102954876-8994fa00-450f-11eb-81f9-931af6d1f69b.png)
![image](https://user-images.githubusercontent.com/8326978/102954900-99acd980-450f-11eb-9733-115ad37d2319.png)

![image](https://user-images.githubusercontent.com/8326978/102954935-af220380-450f-11eb-9aaa-fdae0725d41e.png)
![image](https://user-images.githubusercontent.com/8326978/102954949-bc3ef280-450f-11eb-8a0d-d7b688efa7bb.png)

Closes #30888 from yaooqinn/SPARK-33877.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/sql-ref-syntax-dml-insert-into.md        | 41 +++++++++++++++++-
 ...l-ref-syntax-dml-insert-overwrite-table.md | 43 ++++++++++++++++++-
 2 files changed, 80 insertions(+), 4 deletions(-)

diff --git a/docs/sql-ref-syntax-dml-insert-into.md b/docs/sql-ref-syntax-dml-insert-into.md
index 39d15808d033e..96a95b1a629e9 100644
--- a/docs/sql-ref-syntax-dml-insert-into.md
+++ b/docs/sql-ref-syntax-dml-insert-into.md
@@ -26,7 +26,7 @@ The `INSERT INTO` statement inserts new rows into a table. The inserted rows can
 ### Syntax
 
 ```sql
-INSERT INTO [ TABLE ] table_identifier [ partition_spec ]
+INSERT INTO [ TABLE ] table_identifier [ partition_spec ] [ ( column_list ) ]
     { VALUES ( { value | NULL } [ , ... ] ) [ , ( ... ) ] | query }
 ```
 
@@ -40,11 +40,20 @@ INSERT INTO [ TABLE ] table_identifier [ partition_spec ]
 
 * **partition_spec**
 
-    An optional parameter that specifies a comma separated list of key and value pairs
+    An optional parameter that specifies a comma-separated list of key and value pairs
     for partitions.
 
     **Syntax:** `PARTITION ( partition_col_name  = partition_col_val [ , ... ] )`
 
+* **column_list**
+
+    An optional parameter that specifies a comma-separated list of columns belonging to the `table_identifier` table.
+
+    **Note:**The current behaviour has some limitations:
+    - All specified columns should exist in the table and not be duplicated from each other. It includes all columns except the static partition columns.
+    - The size of the column list should be exactly the size of the data from `VALUES` clause or query.
+    - The order of the column list is alterable and determines how the data from `VALUES` clause or query to be inserted by position.
+
 * **VALUES ( { value `|` NULL } [ , ... ] ) [ , ( ... ) ]**
 
     Specifies the values to be inserted. Either an explicitly specified value or a NULL can be inserted.
@@ -198,6 +207,34 @@ SELECT * FROM students;
 +-------------+--------------------------+----------+
 ```
 
+#### Insert with a column list
+
+```sql
+INSERT INTO students (address, name, student_id) VALUES
+    ('Hangzhou, China', 'Kent Yao', 11215016);
+
+SELECT * FROM students WHERE name = 'Kent Yao';
++---------+----------------------+----------+
+|     name|               address|student_id|
++---------+----------------------+----------+
+|Kent Yao |       Hangzhou, China|  11215016|
++---------+----------------------+----------+
+```
+
+#### Insert with both a partition spec and a column list
+
+```sql
+INSERT INTO students PARTITION (student_id = 11215017) (address, name) VALUES
+    ('Hangzhou, China', 'Kent Yao Jr.');
+
+SELECT * FROM students WHERE student_id = 11215017;
++------------+----------------------+----------+
+|        name|               address|student_id|
++------------+----------------------+----------+
+|Kent Yao Jr.|       Hangzhou, China|  11215017|
++------------+----------------------+----------+
+```
+
 ### Related Statements
 
 * [INSERT OVERWRITE statement](sql-ref-syntax-dml-insert-overwrite-table.html)
diff --git a/docs/sql-ref-syntax-dml-insert-overwrite-table.md b/docs/sql-ref-syntax-dml-insert-overwrite-table.md
index 638dcb34bb1d2..f2413fb72464f 100644
--- a/docs/sql-ref-syntax-dml-insert-overwrite-table.md
+++ b/docs/sql-ref-syntax-dml-insert-overwrite-table.md
@@ -26,7 +26,7 @@ The `INSERT OVERWRITE` statement overwrites the existing data in the table using
 ### Syntax
 
 ```sql
-INSERT OVERWRITE [ TABLE ] table_identifier [ partition_spec [ IF NOT EXISTS ] ]
+INSERT OVERWRITE [ TABLE ] table_identifier [ partition_spec [ IF NOT EXISTS ] ] [ ( column_list ) ]
     { VALUES ( { value | NULL } [ , ... ] ) [ , ( ... ) ] | query }
 ```
 
@@ -40,11 +40,22 @@ INSERT OVERWRITE [ TABLE ] table_identifier [ partition_spec [ IF NOT EXISTS ] ]
 
 * **partition_spec**
 
-    An optional parameter that specifies a comma separated list of key and value pairs
+    An optional parameter that specifies a comma-separated list of key and value pairs
     for partitions.
 
     **Syntax:** `PARTITION ( partition_col_name [ = partition_col_val ] [ , ... ] )`
 
+* **column_list**
+
+    An optional parameter that specifies a comma-separated list of columns belonging to the `table_identifier` table.
+
+    **Note**
+
+    The current behaviour has some limitations:
+    - All specified columns should exist in the table and not be duplicated from each other. It includes all columns except the static partition columns.
+    - The size of the column list should be exactly the size of the data from `VALUES` clause or query.
+    - The order of the column list is alterable and determines how the data from `VALUES` clause or query to be inserted by position.
+
 * **VALUES ( { value `|` NULL } [ , ... ] ) [ , ( ... ) ]**
 
     Specifies the values to be inserted. Either an explicitly specified value or a NULL can be inserted.
@@ -169,6 +180,34 @@ SELECT * FROM students;
 +-----------+-------------------------+----------+
 ```
 
+#### Insert with a column list
+
+```sql
+INSERT OVERWRITE students (address, name, student_id) VALUES
+    ('Hangzhou, China', 'Kent Yao', 11215016);
+
+SELECT * FROM students WHERE name = 'Kent Yao';
++---------+----------------------+----------+
+|     name|               address|student_id|
++---------+----------------------+----------+
+|Kent Yao |       Hangzhou, China|  11215016|
++---------+----------------------+----------+
+```
+
+#### Insert with both a partition spec and a column list
+
+```sql
+INSERT OVERWRITE students PARTITION (student_id = 11215016) (address, name) VALUES
+    ('Hangzhou, China', 'Kent Yao Jr.');
+
+SELECT * FROM students WHERE student_id = 11215016;
++------------+----------------------+----------+
+|        name|               address|student_id|
++------------+----------------------+----------+
+|Kent Yao Jr.|       Hangzhou, China|  11215016|
++------------+----------------------+----------+
+```
+
 ### Related Statements
 
 * [INSERT INTO statement](sql-ref-syntax-dml-insert-into.html)

From ea37717f7c709a86985e006a192bf040f8958da3 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Wed, 23 Dec 2020 13:50:05 +0900
Subject: [PATCH 0855/1009] [SPARK-32106][SQL][FOLLOWUP] Fix flaky tests in
 transform.sql

### What changes were proposed in this pull request?

This PR intends to fix flaky GitHub Actions (GA) tests below in `transform.sql` (this flakiness does not seem to happen in the Jenkins tests):
- https://github.com/apache/spark/runs/1592987501
- https://github.com/apache/spark/runs/1593196242
- https://github.com/apache/spark/runs/1595496305
- https://github.com/apache/spark/runs/1596309555

This is because the error message is different between test runs in GA (the error message seems to be truncated indeterministically) ,e.g.,
```
# https://github.com/apache/spark/runs/1592987501
Expected "...h status 127. Error:[ /bin/bash: some_non_existent_command: command not found]", but got "...h status 127. Error:[]" Result did not match for query #2

# https://github.com/apache/spark/runs/1593196242
Expected "...istent_command: comm[and not found]", but got "...istent_command: comm[]" Result did not match for query #2
```
The root cause of this indeterministic behaviour happening only in GA is not clear though, this test throws SparkException consistently even in GA. So, this PR proposes to make the test just check if it will be thrown when running it.

This PR comes from the dongjoon-hyun comment: https://github.com/apache/spark/pull/29414/files#r547414513

### Why are the changes needed?

Bugfix.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Added tests.

Closes #30896 from maropu/SPARK-32106-FOLLOWUP.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../resources/sql-tests/inputs/transform.sql  | 10 --------
 .../sql-tests/results/transform.sql.out       | 24 +------------------
 .../BaseScriptTransformationSuite.scala       | 20 ++++++++++++++++
 3 files changed, 21 insertions(+), 33 deletions(-)

diff --git a/sql/core/src/test/resources/sql-tests/inputs/transform.sql b/sql/core/src/test/resources/sql-tests/inputs/transform.sql
index 65b060eca3a62..3f39700a95913 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/transform.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/transform.sql
@@ -9,16 +9,6 @@ SELECT TRANSFORM(a)
 USING 'cat' AS (a)
 FROM t;
 
--- with non-exist command
-SELECT TRANSFORM(a)
-USING 'some_non_existent_command' AS (a)
-FROM t;
-
--- with non-exist file
-SELECT TRANSFORM(a)
-USING 'python some_non_existent_file' AS (a)
-FROM t;
-
 -- common supported data types between no serde and serde transform
 SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM (
   SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l)
diff --git a/sql/core/src/test/resources/sql-tests/results/transform.sql.out b/sql/core/src/test/resources/sql-tests/results/transform.sql.out
index 83ab5cb729c24..3267a7625a7d9 100644
--- a/sql/core/src/test/resources/sql-tests/results/transform.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/transform.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 18
+-- Number of queries: 16
 
 
 -- !query
@@ -26,28 +26,6 @@ struct<a:string>
 3
 
 
--- !query
-SELECT TRANSFORM(a)
-USING 'some_non_existent_command' AS (a)
-FROM t
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkException
-Subprocess exited with status 127. Error: /bin/bash: some_non_existent_command: command not found
-
-
--- !query
-SELECT TRANSFORM(a)
-USING 'python some_non_existent_file' AS (a)
-FROM t
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkException
-Subprocess exited with status 2. Error: python: can't open file 'some_non_existent_file': [Errno 2] No such file or directory
-
-
 -- !query
 SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM (
   SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
index 81f292809df4a..863657a7862a6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
@@ -420,6 +420,26 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
         'b.cast("string").as("b"),
         lit(null), lit(null)).collect())
   }
+
+  test("SPARK-32106: TRANSFORM with non-existent command/file") {
+    Seq(
+      s"""
+         |SELECT TRANSFORM(a)
+         |USING 'some_non_existent_command' AS (a)
+         |FROM VALUES (1) t(a)
+       """.stripMargin,
+      s"""
+         |SELECT TRANSFORM(a)
+         |USING 'python some_non_existent_file' AS (a)
+         |FROM VALUES (1) t(a)
+       """.stripMargin).foreach { query =>
+      intercept[SparkException] {
+        // Since an error message is shell-dependent, this test just checks
+        // if the expected exception will be thrown.
+        sql(query).collect()
+      }
+    }
+  }
 }
 
 case class ExceptionInjectingOperator(child: SparkPlan) extends UnaryExecNode {

From 90d6f8600117da33bbb570dee6d893cfd8d35263 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 22 Dec 2020 21:59:53 -0800
Subject: [PATCH 0856/1009] [SPARK-33870][CORE] Enable
 spark.storage.replication.proactive by default

### What changes were proposed in this pull request?

This PR aims to enable `spark.storage.replication.proactive` by default for Apache Spark 3.2.0.

### Why are the changes needed?

`spark.storage.replication.proactive` is added by SPARK-15355 at Apache Spark 2.2.0 and has been helpful when the block manager loss occurs frequently like K8s environment.

### Does this PR introduce _any_ user-facing change?

Yes, this will make the Spark jobs more robust.

### How was this patch tested?

Pass the existing UTs.

Closes #30876 from dongjoon-hyun/SPARK-33870.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../main/scala/org/apache/spark/internal/config/package.scala | 2 +-
 docs/core-migration-guide.md                                  | 4 ++++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index f6de5e4128ca5..cbf4a971e3d0d 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -384,7 +384,7 @@ package object config {
         "get the replication level of the block to the initial number")
       .version("2.2.0")
       .booleanConf
-      .createWithDefault(false)
+      .createWithDefault(true)
 
   private[spark] val STORAGE_MEMORY_MAP_THRESHOLD =
     ConfigBuilder("spark.storage.memoryMapThreshold")
diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md
index 11d3e0019617f..822975b4edf27 100644
--- a/docs/core-migration-guide.md
+++ b/docs/core-migration-guide.md
@@ -22,6 +22,10 @@ license: |
 * Table of contents
 {:toc}
 
+## Upgrading from Core 3.1 to 3.2
+
+- Since Spark 3.2, `spark.storage.replication.proactive` is enabled by default which means Spark tries to replenish in case of the loss of cached RDD block replicas due to executor failures. To restore the behavior before Spark 3.2, you can set `spark.storage.replication.proactive` to `false`.
+
 ## Upgrading from Core 3.0 to 3.1
 
 - In Spark 3.0 and below, `SparkContext` can be created in executors. Since Spark 3.1, an exception will be thrown when creating `SparkContext` in executors. You can allow it by setting the configuration `spark.executor.allowSparkContext` when creating `SparkContext` in executors.

From e853f068f6c8f9c2aebad37115b0fad1191650ee Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Tue, 22 Dec 2020 22:43:03 -0800
Subject: [PATCH 0857/1009] [SPARK-33526][SQL][FOLLOWUP] Fix flaky test due to
 timeout and fix docs

### What changes were proposed in this pull request?

Make test stable and fix docs.

### Why are the changes needed?

Query timeout sometime since we set an another config after set query timeout.
```
sbt.ForkMain$ForkError: java.sql.SQLTimeoutException: Query timed out after 0 seconds
	at org.apache.hive.jdbc.HiveStatement.waitForOperationToComplete(HiveStatement.java:381)
	at org.apache.hive.jdbc.HiveStatement.execute(HiveStatement.java:254)
	at org.apache.spark.sql.hive.thriftserver.ThriftServerWithSparkContextSuite.$anonfun$$init$$13(ThriftServerWithSparkContextSuite.scala:107)
	at org.apache.spark.sql.hive.thriftserver.ThriftServerWithSparkContextSuite.$anonfun$$init$$13$adapted(ThriftServerWithSparkContextSuite.scala:106)
	at scala.collection.immutable.List.foreach(List.scala:392)
	at org.apache.spark.sql.hive.thriftserver.ThriftServerWithSparkContextSuite.$anonfun$$init$$12(ThriftServerWithSparkContextSuite.scala:106)
	at org.apache.spark.sql.hive.thriftserver.ThriftServerWithSparkContextSuite.$anonfun$$init$$12$adapted(ThriftServerWithSparkContextSuite.scala:89)
	at org.apache.spark.sql.hive.thriftserver.SharedThriftServer.$anonfun$withJdbcStatement$4(SharedThriftServer.scala:95)
	at org.apache.spark.sql.hive.thriftserver.SharedThriftServer.$anonfun$withJdbcStatement$4$adapted(SharedThriftServer.scala:95)
```

The reason is:
1. we execute `set spark.sql.thriftServer.queryTimeout = 1`, then all the option will be limited in 1s.
2. we execute `set spark.sql.thriftServer.interruptOnCancel = false/true`. This sql will get timeout exception if there is something hung within 1s. It's not our expected.

Reset the timeout before we do the step2 can avoid this problem.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Fix test.

Closes #30897 from ulysses-you/SPARK-33526-followup.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../main/scala/org/apache/spark/sql/internal/SQLConf.scala | 4 ++--
 .../thriftserver/ThriftServerWithSparkContextSuite.scala   | 7 ++++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index b5547319f0ab3..d14d136a81e7f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -970,8 +970,8 @@ object SQLConf {
         "a positive value, a running query will be cancelled automatically when the timeout is " +
         "exceeded, otherwise the query continues to run till completion. If timeout values are " +
         "set for each statement via `java.sql.Statement.setQueryTimeout` and they are smaller " +
-        "than this configuration value, they take precedence. If you set this timeout and prefer" +
-        "to cancel the queries right away without waiting task to finish, consider enabling" +
+        "than this configuration value, they take precedence. If you set this timeout and prefer " +
+        "to cancel the queries right away without waiting task to finish, consider enabling " +
         s"${THRIFTSERVER_FORCE_CANCEL.key} together.")
       .version("3.1.0")
       .timeConf(TimeUnit.SECONDS)
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
index 036eb5850695e..3598f966b6259 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
@@ -102,14 +102,15 @@ trait ThriftServerWithSparkContextSuite extends SharedThriftServer {
 
       spark.sparkContext.addSparkListener(listener)
       try {
-        statement.execute(s"SET ${SQLConf.THRIFTSERVER_QUERY_TIMEOUT.key}=1")
         Seq(true, false).foreach { force =>
+          statement.setQueryTimeout(0)
           statement.execute(s"SET ${SQLConf.THRIFTSERVER_FORCE_CANCEL.key}=$force")
+          statement.setQueryTimeout(1)
           forceCancel.set(force)
-          val e1 = intercept[SQLException] {
+          val e = intercept[SQLException] {
             statement.execute("select java_method('java.lang.Thread', 'sleep', 3000L)")
           }.getMessage
-          assert(e1.contains("Query timed out"))
+          assert(e.contains("Query timed out"))
         }
       } finally {
         spark.sparkContext.removeSparkListener(listener)

From 2287f56a3e105e04cf4e86283eaee12f270c09a7 Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Wed, 23 Dec 2020 16:14:27 +0900
Subject: [PATCH 0858/1009] [SPARK-33879][SQL] Char Varchar values fails w/
 match error as partition columns

### What changes were proposed in this pull request?

```sql
spark-sql> select * from t10 where c0='abcd';
20/12/22 15:43:38 ERROR SparkSQLDriver: Failed in [select * from t10 where c0='abcd']
scala.MatchError: CharType(10) (of class org.apache.spark.sql.types.CharType)
	at org.apache.spark.sql.catalyst.expressions.CastBase.cast(Cast.scala:815)
	at org.apache.spark.sql.catalyst.expressions.CastBase.cast$lzycompute(Cast.scala:842)
	at org.apache.spark.sql.catalyst.expressions.CastBase.cast(Cast.scala:842)
	at org.apache.spark.sql.catalyst.expressions.CastBase.nullSafeEval(Cast.scala:844)
	at org.apache.spark.sql.catalyst.expressions.UnaryExpression.eval(Expression.scala:476)
	at org.apache.spark.sql.catalyst.catalog.CatalogTablePartition.$anonfun$toRow$2(interface.scala:164)
	at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238)
	at scala.collection.Iterator.foreach(Iterator.scala:941)
	at scala.collection.Iterator.foreach$(Iterator.scala:941)
	at scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
	at scala.collection.IterableLike.foreach(IterableLike.scala:74)
	at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
	at org.apache.spark.sql.types.StructType.foreach(StructType.scala:102)
	at scala.collection.TraversableLike.map(TraversableLike.scala:238)
	at scala.collection.TraversableLike.map$(TraversableLike.scala:231)
	at org.apache.spark.sql.types.StructType.map(StructType.scala:102)
	at org.apache.spark.sql.catalyst.catalog.CatalogTablePartition.toRow(interface.scala:158)
	at org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils$.$anonfun$prunePartitionsByFilter$3(ExternalCatalogUtils.scala:157)
	at org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils$.$anonfun$prunePartitionsByFilter$3$adapted(ExternalCatalogUtils.scala:156)
```
c0 is a partition column, it fails in the partition pruning rule

In this PR, we relace char/varchar w/ string type before the CAST happends

### Why are the changes needed?

bugfix, see the case above

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

yes, new tests

Closes #30887 from yaooqinn/SPARK-33879.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../catalog/ExternalCatalogUtils.scala        |  4 +++-
 .../spark/sql/CharVarcharTestSuite.scala      | 20 +++++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
index ae3b75dc3334b..00445a1614257 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.Resolver
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, BoundReference, Expression, Predicate}
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 
 object ExternalCatalogUtils {
   // This duplicates default value of Hive `ConfVars.DEFAULTPARTITIONNAME`, since catalyst doesn't
@@ -135,7 +136,8 @@ object ExternalCatalogUtils {
     if (predicates.isEmpty) {
       inputPartitions
     } else {
-      val partitionSchema = catalogTable.partitionSchema
+      val partitionSchema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(
+        catalogTable.partitionSchema)
       val partitionColumnNames = catalogTable.partitionColumnNames.toSet
 
       val nonPartitionPruningPredicates = predicates.filterNot {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
index d7b84a0971e0c..8ab8c37d5e790 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
@@ -356,6 +356,26 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
     }
   }
 
+  test("char type comparison: partition pruning") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(i INT, c1 CHAR(2), c2 VARCHAR(5)) USING $format PARTITIONED BY (c1, c2)")
+      sql("INSERT INTO t VALUES (1, 'a', 'a')")
+      Seq(("c1 = 'a'", true),
+        ("'a' = c1", true),
+        ("c1 = 'a  '", true),
+        ("c1 > 'a'", false),
+        ("c1 IN ('a', 'b')", true),
+        ("c2 = 'a  '", false),
+        ("c2 = 'a'", true),
+        ("c2 IN ('a', 'b')", true)).foreach { case (con, res) =>
+        val df = spark.table("t")
+        withClue(con) {
+          checkAnswer(df.where(con), df.where(res.toString))
+        }
+      }
+    }
+  }
+
   test("char type comparison: join") {
     withTable("t1", "t2") {
       sql(s"CREATE TABLE t1(c CHAR(2)) USING $format")

From d98c216e1959e276877c3d0a9562cc4cdd8b41bb Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Wed, 23 Dec 2020 18:04:28 +0900
Subject: [PATCH 0859/1009] [SPARK-31960][YARN][DOCS][FOLLOW-UP] Document the
 behaviour change of Hadoop's classpath propagation in migration guide

### What changes were proposed in this pull request?

This PR is a followup of https://github.com/apache/spark/pull/28788, and proposes to update migration guide.

### Why are the changes needed?

To tell users about the behaviour change.

### Does this PR introduce _any_ user-facing change?

Yes, it updates migration guides for users.

### How was this patch tested?

GitHub Actions' documentation build should test it.

Closes #30903 from HyukjinKwon/SPARK-31960-followup.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/core-migration-guide.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md
index 822975b4edf27..ec7c3ab9cb568 100644
--- a/docs/core-migration-guide.md
+++ b/docs/core-migration-guide.md
@@ -30,6 +30,8 @@ license: |
 
 - In Spark 3.0 and below, `SparkContext` can be created in executors. Since Spark 3.1, an exception will be thrown when creating `SparkContext` in executors. You can allow it by setting the configuration `spark.executor.allowSparkContext` when creating `SparkContext` in executors.
 
+- In Spark 3.0 and below, Spark propagated the Hadoop classpath from `yarn.application.classpath` and `mapreduce.application.classpath` into the Spark application submitted to YARN when Spark distribution is with the built-in Hadoop. Since Spark 3.1, it does not propagate anymore when the Spark distribution is with the built-in Hadoop in order to prevent the failure from the different transitive dependencies picked up from the Hadoop cluster such as Guava and Jackson. To restore the behavior before Spark 3.1, you can set `spark.yarn.populateHadoopClasspath` to `true`.
+
 ## Upgrading from Core 2.4 to 3.0
 
 - The `org.apache.spark.ExecutorPlugin` interface and related configuration has been replaced with

From 34bfb3a31d505a08e15454214d8f78933310ebb3 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Wed, 23 Dec 2020 09:09:48 +0000
Subject: [PATCH 0860/1009] [SPARK-33787][SQL] Allow partition purge for v2
 tables

### What changes were proposed in this pull request?
1. Add new methods `purgePartition()`/`purgePartitions()` to the interfaces `SupportsPartitionManagement`/`SupportsAtomicPartitionManagement`.
2. Default implementation of new methods throw the exception `UnsupportedOperationException`.
3. Add tests for new methods to `SupportsPartitionManagementSuite`/`SupportsAtomicPartitionManagementSuite`.
4. Add `ALTER TABLE .. DROP PARTITION` tests for DS v1 and v2.

Closes #30776
Closes #30821

### Why are the changes needed?
Currently, the `PURGE` option that user can set in `ALTER TABLE .. DROP PARTITION` is completely ignored. We should pass this flag to the catalog implementation, so, the catalog should decide how to handle the flag.

### Does this PR introduce _any_ user-facing change?
The changes can impact on behavior of `ALTER TABLE .. DROP PARTITION` for v2 tables.

### How was this patch tested?
By running the affected test suites, for instance:
```
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableDropPartitionSuite"
```

Closes #30886 from MaxGekk/purge-partition.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../SupportsAtomicPartitionManagement.java    | 23 +++++++++++++++++++
 .../catalog/SupportsPartitionManagement.java  | 19 +++++++++++++++
 ...pportsAtomicPartitionManagementSuite.scala | 14 +++++++++++
 .../SupportsPartitionManagementSuite.scala    | 10 ++++++++
 .../v2/AlterTableDropPartitionExec.scala      |  9 +++++---
 .../datasources/v2/DataSourceV2Strategy.scala |  4 ++--
 .../v1/AlterTableDropPartitionSuite.scala     | 10 ++++++++
 .../v2/AlterTableDropPartitionSuite.scala     | 15 ++++++++++++
 8 files changed, 99 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagement.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagement.java
index 754203125cdc2..665946fcf3e2a 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagement.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagement.java
@@ -21,6 +21,7 @@
 
 import org.apache.spark.annotation.Experimental;
 import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionException;
 import org.apache.spark.sql.catalyst.analysis.PartitionAlreadyExistsException;
 import org.apache.spark.sql.catalyst.analysis.PartitionsAlreadyExistException;
 
@@ -33,6 +34,9 @@
  *     add an array of partitions and any data they contain to the table
  * ${@link #dropPartitions}:
  *     remove an array of partitions and any data they contain from the table
+ * ${@link #purgePartitions}:
+ *     remove an array of partitions and any data they contain from the table by skipping
+ *     a trash even if it is supported
  *
  * @since 3.1.0
  */
@@ -82,4 +86,23 @@ void createPartitions(
    * @return true if partitions were deleted, false if any partition not exists
    */
   boolean dropPartitions(InternalRow[] idents);
+
+  /**
+   * Drop an array of partitions atomically from table, and completely remove partitions data
+   * by skipping a trash even if it is supported.
+   * <p>
+   * If any partition doesn't exists,
+   * the operation of purgePartitions need to be safely rolled back.
+   *
+   * @param idents an array of partition identifiers
+   * @return true if partitions were deleted, false if any partition not exists
+   * @throws NoSuchPartitionException If any partition identifier to alter doesn't exist
+   * @throws UnsupportedOperationException If partition purging is not supported
+   *
+   * @since 3.2.0
+   */
+  default boolean purgePartitions(InternalRow[] idents)
+    throws NoSuchPartitionException, UnsupportedOperationException {
+    throw new UnsupportedOperationException("Partition purge is not supported");
+  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java
index cf86c44e9563b..409ab3f5f9335 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java
@@ -36,6 +36,9 @@
  *     add a partition and any data it contains to the table
  * ${@link #dropPartition}:
  *     remove a partition and any data it contains from the table
+ * ${@link #purgePartition}:
+ *     remove a partition and any data it contains from the table by skipping a trash
+ *     even if it is supported.
  * ${@link #replacePartitionMetadata}:
  *     point a partition to a new location, which will swap one location's data for the other
  *
@@ -72,6 +75,22 @@ void createPartition(
      */
     boolean dropPartition(InternalRow ident);
 
+    /**
+     * Drop a partition from the table and completely remove partition data by skipping a trash
+     * even if it is supported.
+     *
+     * @param ident a partition identifier
+     * @return true if a partition was deleted, false if no partition exists for the identifier
+     * @throws NoSuchPartitionException If the partition identifier to alter doesn't exist
+     * @throws UnsupportedOperationException If partition purging is not supported
+     *
+     * @since 3.2.0
+     */
+    default boolean purgePartition(InternalRow ident)
+      throws NoSuchPartitionException, UnsupportedOperationException {
+      throw new UnsupportedOperationException("Partition purge is not supported");
+    }
+
     /**
      * Test whether a partition exists using an {@link InternalRow ident} from the table.
      *
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagementSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagementSuite.scala
index ad2631650b7ef..d8a0b56928b34 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagementSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagementSuite.scala
@@ -110,6 +110,20 @@ class SupportsAtomicPartitionManagementSuite extends SparkFunSuite {
     assert(!hasPartitions(partTable))
   }
 
+  test("purgePartitions") {
+    val table = catalog.loadTable(ident)
+    val partTable = new InMemoryAtomicPartitionTable(
+      table.name(), table.schema(), table.partitioning(), table.properties())
+    val partIdents = Array(InternalRow.apply("3"), InternalRow.apply("4"))
+    partTable.createPartitions(
+      partIdents,
+      Array(new util.HashMap[String, String](), new util.HashMap[String, String]()))
+    val errMsg = intercept[UnsupportedOperationException] {
+      partTable.purgePartitions(partIdents)
+    }.getMessage
+    assert(errMsg.contains("purge is not supported"))
+  }
+
   test("dropPartitions failed if partition not exists") {
     val table = catalog.loadTable(ident)
     val partTable = new InMemoryAtomicPartitionTable(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala
index dc2df546d6bfd..31494c7c2dd50 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala
@@ -85,6 +85,16 @@ class SupportsPartitionManagementSuite extends SparkFunSuite {
     assert(!hasPartitions(partTable))
   }
 
+  test("purgePartition") {
+    val table = catalog.loadTable(ident)
+    val partTable = new InMemoryPartitionTable(
+      table.name(), table.schema(), table.partitioning(), table.properties())
+    val errMsg = intercept[UnsupportedOperationException] {
+      partTable.purgePartition(InternalRow.apply("3"))
+    }.getMessage
+    assert(errMsg.contains("purge is not supported"))
+  }
+
   test("replacePartitionMetadata") {
     val table = catalog.loadTable(ident)
     val partTable = new InMemoryPartitionTable(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala
index c7a68ecb2bbee..90714c3c726f3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala
@@ -28,7 +28,8 @@ import org.apache.spark.sql.connector.catalog.{SupportsAtomicPartitionManagement
 case class AlterTableDropPartitionExec(
     table: SupportsPartitionManagement,
     partSpecs: Seq[ResolvedPartitionSpec],
-    ignoreIfNotExists: Boolean) extends V2CommandExec {
+    ignoreIfNotExists: Boolean,
+    purge: Boolean) extends V2CommandExec {
   import DataSourceV2Implicits._
 
   override def output: Seq[Attribute] = Seq.empty
@@ -45,9 +46,11 @@ case class AlterTableDropPartitionExec(
     existsPartIdents match {
       case Seq() => // Nothing will be done
       case Seq(partIdent) =>
-        table.dropPartition(partIdent)
+        if (purge) table.purgePartition(partIdent) else table.dropPartition(partIdent)
       case _ if table.isInstanceOf[SupportsAtomicPartitionManagement] =>
-        table.asAtomicPartitionable.dropPartitions(existsPartIdents.toArray)
+        val idents = existsPartIdents.toArray
+        val atomicTable = table.asAtomicPartitionable
+        if (purge) atomicTable.purgePartitions(idents) else atomicTable.dropPartitions(idents)
       case _ =>
         throw new UnsupportedOperationException(
           s"Nonatomic partition table ${table.name()} can not drop multiple partitions.")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 0c92945dc6ca5..4667bb7cca998 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -348,9 +348,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
         table, parts.asResolvedPartitionSpecs, ignoreIfExists) :: Nil
 
     case AlterTableDropPartition(
-        ResolvedTable(_, _, table: SupportsPartitionManagement), parts, ignoreIfNotExists, _) =>
+        ResolvedTable(_, _, table: SupportsPartitionManagement), parts, ignoreIfNotExists, purge) =>
       AlterTableDropPartitionExec(
-        table, parts.asResolvedPartitionSpecs, ignoreIfNotExists) :: Nil
+        table, parts.asResolvedPartitionSpecs, ignoreIfNotExists, purge) :: Nil
 
     case AlterTableRenamePartition(_: ResolvedTable, _: ResolvedPartitionSpec, _) =>
       throw new AnalysisException(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
index 737af96f5abe3..12a99933f6633 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
@@ -21,6 +21,16 @@ import org.apache.spark.sql.execution.command
 
 trait AlterTableDropPartitionSuiteBase extends command.AlterTableDropPartitionSuiteBase {
   override protected val notFullPartitionSpecErr = "The following partitions not found in table"
+
+  test("purge partition data") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
+      sql(s"ALTER TABLE $t ADD PARTITION (id = 1)")
+      checkPartitions(t, Map("id" -> "1"))
+      sql(s"ALTER TABLE $t DROP PARTITION (id = 1) PURGE")
+      checkPartitions(t) // no partitions
+    }
+  }
 }
 
 class AlterTableDropPartitionSuite
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala
index ffbfe3f695935..e2762f0439cb3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala
@@ -35,4 +35,19 @@ class AlterTableDropPartitionSuite
       assert(errMsg.contains("can not alter partitions"))
     }
   }
+
+  test("purge partition data") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
+      sql(s"ALTER TABLE $t ADD PARTITION (id=1)")
+      try {
+        val errMsg = intercept[UnsupportedOperationException] {
+          sql(s"ALTER TABLE $t DROP PARTITION (id=1) PURGE")
+        }.getMessage
+        assert(errMsg.contains("purge is not supported"))
+      } finally {
+        sql(s"ALTER TABLE $t DROP PARTITION (id=1)")
+      }
+    }
+  }
 }

From f421c172d976bf6844b44b0ab9d1e1fa55f380e3 Mon Sep 17 00:00:00 2001
From: ulysses-you <youxiduo@weidian.com>
Date: Wed, 23 Dec 2020 09:20:49 +0000
Subject: [PATCH 0861/1009] [SPARK-33497][SQL] Override maxRows in some
 LogicalPlan

### What changes were proposed in this pull request?

This PR aims to override maxRows method in these follow `LogicalPlan`:
* `ReturnAnswer`
* `Join`
* `Range`
* `Sample`
* `RepartitionOperation`
* `Deduplicate`
* `LocalRelation`
* `Window`

### Why are the changes needed?

1. Logically, we know the max rows info with these `LogicalPlan`.
2. Before this PR, we already have some max rows with `LogicalPlan`, so we can eliminate limit with more case if we expand more.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Add test.

Closes #30443 from ulysses-you/SPARK-33497.

Lead-authored-by: ulysses-you <youxiduo@weidian.com>
Co-authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/dsl/package.scala      |  13 ++
 .../plans/logical/LocalRelation.scala         |   2 +
 .../plans/logical/basicLogicalOperators.scala |  35 ++-
 .../optimizer/CombiningLimitsSuite.scala      | 129 +++++++++++-
 .../optimizer/EliminateSortsSuite.scala       |  11 +-
 .../optimizer/LimitPushdownSuite.scala        |  13 +-
 .../approved-plans-v1_4/q28.sf100/explain.txt | 169 ++++++++-------
 .../q28.sf100/simplified.txt                  | 125 ++++++-----
 .../approved-plans-v1_4/q28/explain.txt       | 169 ++++++++-------
 .../approved-plans-v1_4/q28/simplified.txt    | 125 ++++++-----
 .../approved-plans-v1_4/q61.sf100/explain.txt | 147 ++++++-------
 .../q61.sf100/simplified.txt                  | 187 ++++++++--------
 .../approved-plans-v1_4/q61/explain.txt       | 153 +++++++-------
 .../approved-plans-v1_4/q61/simplified.txt    | 199 +++++++++---------
 .../approved-plans-v1_4/q90.sf100/explain.txt | 109 +++++-----
 .../q90.sf100/simplified.txt                  | 143 +++++++------
 .../approved-plans-v1_4/q90/explain.txt       | 109 +++++-----
 .../approved-plans-v1_4/q90/simplified.txt    | 141 +++++++------
 .../spark/sql/streaming/StreamSuite.scala     |   2 +-
 19 files changed, 1091 insertions(+), 890 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index 5a778d2785a67..6371fd942597e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -248,6 +248,9 @@ package object dsl {
       override def expr: Expression = Literal(s)
       def attr: UnresolvedAttribute = analysis.UnresolvedAttribute(s)
     }
+    implicit class DslAttr(attr: UnresolvedAttribute) extends ImplicitAttribute {
+      def s: String = attr.name
+    }
 
     abstract class ImplicitAttribute extends ImplicitOperators {
       def s: String
@@ -456,6 +459,16 @@ package object dsl {
 
       def hint(name: String, parameters: Any*): LogicalPlan =
         UnresolvedHint(name, parameters, logicalPlan)
+
+      def sample(
+          lowerBound: Double,
+          upperBound: Double,
+          withReplacement: Boolean,
+          seed: Long): LogicalPlan = {
+        Sample(lowerBound, upperBound, withReplacement, seed, logicalPlan)
+      }
+
+      def deduplicate(colNames: Attribute*): LogicalPlan = Deduplicate(colNames, logicalPlan)
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala
index 8c4828a4cef23..7f5a78b6217b4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala
@@ -91,4 +91,6 @@ case class LocalRelation(
       " AS " + inlineTableName +
       output.map(_.name).mkString("(", ", ", ")")
   }
+
+  override def maxRows: Option[Long] = Some(data.length.toLong)
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 8c111aa750809..97bc0083276bc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -37,6 +37,7 @@ import org.apache.spark.util.random.RandomSampler
  * at the top of the logical query plan.
  */
 case class ReturnAnswer(child: LogicalPlan) extends UnaryNode {
+  override def maxRows: Option[Long] = child.maxRows
   override def output: Seq[Attribute] = child.output
 }
 
@@ -326,6 +327,25 @@ case class Join(
     hint: JoinHint)
   extends BinaryNode with PredicateHelper {
 
+  override def maxRows: Option[Long] = {
+    joinType match {
+      case Inner | Cross | FullOuter | LeftOuter | RightOuter
+        if left.maxRows.isDefined && right.maxRows.isDefined =>
+        val maxRows = BigInt(left.maxRows.get) * BigInt(right.maxRows.get)
+        if (maxRows.isValidLong) {
+          Some(maxRows.toLong)
+        } else {
+          None
+        }
+
+      case LeftSemi | LeftAnti =>
+        left.maxRows
+
+      case _ =>
+        None
+    }
+  }
+
   override def output: Seq[Attribute] = {
     joinType match {
       case j: ExistenceJoin =>
@@ -574,6 +594,14 @@ case class Range(
     s"Range ($start, $end, step=$step, splits=$numSlices)"
   }
 
+  override def maxRows: Option[Long] = {
+    if (numElements.isValidLong) {
+      Some(numElements.toLong)
+    } else {
+      None
+    }
+  }
+
   override def computeStats(): Statistics = {
     Statistics(sizeInBytes = LongType.defaultSize * numElements)
   }
@@ -635,7 +663,7 @@ case class Window(
     partitionSpec: Seq[Expression],
     orderSpec: Seq[SortOrder],
     child: LogicalPlan) extends UnaryNode {
-
+  override def maxRows: Option[Long] = child.maxRows
   override def output: Seq[Attribute] =
     child.output ++ windowExpressions.map(_.toAttribute)
 
@@ -974,6 +1002,7 @@ case class Sample(
       s"Sampling fraction ($fraction) must be on interval [0, 1] without replacement")
   }
 
+  override def maxRows: Option[Long] = child.maxRows
   override def output: Seq[Attribute] = child.output
 }
 
@@ -991,6 +1020,7 @@ case class Distinct(child: LogicalPlan) extends UnaryNode {
 abstract class RepartitionOperation extends UnaryNode {
   def shuffle: Boolean
   def numPartitions: Int
+  override final def maxRows: Option[Long] = child.maxRows
   override def output: Seq[Attribute] = child.output
 }
 
@@ -1050,7 +1080,6 @@ case class RepartitionByExpression(
     }
   }
 
-  override def maxRows: Option[Long] = child.maxRows
   override def shuffle: Boolean = true
 }
 
@@ -1083,7 +1112,7 @@ case class OneRowRelation() extends LeafNode {
 case class Deduplicate(
     keys: Seq[Attribute],
     child: LogicalPlan) extends UnaryNode {
-
+  override def maxRows: Option[Long] = child.maxRows
   override def output: Seq[Attribute] = child.output
 }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
index 70f130f834c68..11f908ac180bc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
@@ -17,9 +17,11 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
+import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 
@@ -39,7 +41,16 @@ class CombiningLimitsSuite extends PlanTest {
         SimplifyConditionals) :: Nil
   }
 
-  val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
+  val testRelation = LocalRelation.fromExternalRows(
+    Seq("a".attr.int, "b".attr.int, "c".attr.int),
+    1.to(10).map(_ => Row(1, 2, 3))
+  )
+  val testRelation2 = LocalRelation.fromExternalRows(
+    Seq("x".attr.int, "y".attr.int, "z".attr.int),
+    Seq(Row(1, 2, 3), Row(2, 3, 4))
+  )
+  val testRelation3 = RelationWithoutMaxRows(Seq("i".attr.int))
+  val testRelation4 = LongMaxRelation(Seq("j".attr.int))
 
   test("limits: combines two limits") {
     val originalQuery =
@@ -117,4 +128,118 @@ class CombiningLimitsSuite extends PlanTest {
       testRelation.select().groupBy()(count(1)).orderBy(count(1).asc).analyze)
     comparePlans(optimized4, expected4)
   }
+
+  test("SPARK-33497: Eliminate Limit if LocalRelation max rows not larger than Limit") {
+    checkPlanAndMaxRow(
+      testRelation.select().limit(10),
+      testRelation.select(),
+      10
+    )
+  }
+
+  test("SPARK-33497: Eliminate Limit if Range max rows not larger than Limit") {
+    checkPlanAndMaxRow(
+      Range(0, 100, 1, None).select().limit(200),
+      Range(0, 100, 1, None).select(),
+      100
+    )
+    checkPlanAndMaxRow(
+      Range(-1, Long.MaxValue, 1, None).select().limit(1),
+      Range(-1, Long.MaxValue, 1, None).select().limit(1),
+      1
+    )
+  }
+
+  test("SPARK-33497: Eliminate Limit if Sample max rows not larger than Limit") {
+    checkPlanAndMaxRow(
+      testRelation.select().sample(0, 0.2, false, 1).limit(10),
+      testRelation.select().sample(0, 0.2, false, 1),
+      10
+    )
+  }
+
+  test("SPARK-33497: Eliminate Limit if Deduplicate max rows not larger than Limit") {
+    checkPlanAndMaxRow(
+      testRelation.deduplicate("a".attr).limit(10),
+      testRelation.deduplicate("a".attr),
+      10
+    )
+  }
+
+  test("SPARK-33497: Eliminate Limit if Repartition max rows not larger than Limit") {
+    checkPlanAndMaxRow(
+      testRelation.repartition(2).limit(10),
+      testRelation.repartition(2),
+      10
+    )
+    checkPlanAndMaxRow(
+      testRelation.distribute("a".attr)(2).limit(10),
+      testRelation.distribute("a".attr)(2),
+      10
+    )
+  }
+
+  test("SPARK-33497: Eliminate Limit if Join max rows not larger than Limit") {
+    Seq(Inner, FullOuter, LeftOuter, RightOuter).foreach { joinType =>
+      checkPlanAndMaxRow(
+        testRelation.join(testRelation2, joinType).limit(20),
+        testRelation.join(testRelation2, joinType),
+        20
+      )
+      checkPlanAndMaxRow(
+        testRelation.join(testRelation2, joinType).limit(10),
+        testRelation.join(testRelation2, joinType).limit(10),
+        10
+      )
+      // without maxRow
+      checkPlanAndMaxRow(
+        testRelation.join(testRelation3, joinType).limit(100),
+        testRelation.join(testRelation3, joinType).limit(100),
+        100
+      )
+      // maxRow is not valid long
+      checkPlanAndMaxRow(
+        testRelation.join(testRelation4, joinType).limit(100),
+        testRelation.join(testRelation4, joinType).limit(100),
+        100
+      )
+    }
+
+    Seq(LeftSemi, LeftAnti).foreach { joinType =>
+      checkPlanAndMaxRow(
+        testRelation.join(testRelation2, joinType).limit(5),
+        testRelation.join(testRelation2.select(), joinType).limit(5),
+        5
+      )
+      checkPlanAndMaxRow(
+        testRelation.join(testRelation2, joinType).limit(10),
+        testRelation.join(testRelation2.select(), joinType),
+        10
+      )
+    }
+  }
+
+  test("SPARK-33497: Eliminate Limit if Window max rows not larger than Limit") {
+    checkPlanAndMaxRow(
+      testRelation.window(
+        Seq(count(1).as("c")), Seq("a".attr), Seq("b".attr.asc)).limit(20),
+      testRelation.window(
+        Seq(count(1).as("c")), Seq("a".attr), Seq("b".attr.asc)),
+      10
+    )
+  }
+
+  private def checkPlanAndMaxRow(
+      optimized: LogicalPlan, expected: LogicalPlan, expectedMaxRow: Long): Unit = {
+    comparePlans(Optimize.execute(optimized.analyze), expected.analyze)
+    assert(expected.maxRows.get == expectedMaxRow)
+  }
+}
+
+case class RelationWithoutMaxRows(output: Seq[Attribute]) extends LeafNode {
+  override def maxRows: Option[Long] = None
+}
+
+case class LongMaxRelation(output: Seq[Attribute]) extends LeafNode {
+  override def maxRows: Option[Long] = Some(Long.MaxValue)
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
index 62deebd930752..01ecbd808c251 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.optimizer
 
 import org.apache.spark.api.python.PythonEvalType
+import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.analysis.AnalysisTest
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
@@ -48,8 +49,14 @@ class EliminateSortsSuite extends AnalysisTest {
       Batch("Limit PushDown", FixedPoint(10), LimitPushDown) :: Nil
   }
 
-  val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
-  val testRelationB = LocalRelation('d.int)
+  val testRelation = LocalRelation.fromExternalRows(
+    Seq("a".attr.int, "b".attr.int, "c".attr.int),
+    1.to(12).map(_ => Row(1, 2, 3))
+  )
+  val testRelationB = LocalRelation.fromExternalRows(
+    Seq("d".attr.int),
+    1.to(12).map(_ => Row(1))
+  )
 
   test("Empty order by clause") {
     val x = testRelation
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala
index e365e3300096e..bb23b63c03cea 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
+import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
@@ -38,8 +39,12 @@ class LimitPushdownSuite extends PlanTest {
         BooleanSimplification) :: Nil
   }
 
-  private val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
-  private val testRelation2 = LocalRelation('d.int, 'e.int, 'f.int)
+  private val testRelation = LocalRelation.fromExternalRows(
+    Seq("a".attr.int, "b".attr.int, "c".attr.int),
+    1.to(6).map(_ => Row(1, 2, 3)))
+  private val testRelation2 = LocalRelation.fromExternalRows(
+    Seq("d".attr.int, "e".attr.int, "f".attr.int),
+    1.to(6).map(_ => Row(1, 2, 3)))
   private val x = testRelation.subquery('x)
   private val y = testRelation.subquery('y)
 
@@ -148,7 +153,7 @@ class LimitPushdownSuite extends PlanTest {
   }
 
   test("full outer join where neither side is limited and left side has larger statistics") {
-    val xBig = testRelation.copy(data = Seq.fill(2)(null)).subquery('x)
+    val xBig = testRelation.copy(data = Seq.fill(10)(null)).subquery('x)
     assert(xBig.stats.sizeInBytes > y.stats.sizeInBytes)
     val originalQuery = xBig.join(y, FullOuter).limit(1).analyze
     val optimized = Optimize.execute(originalQuery)
@@ -157,7 +162,7 @@ class LimitPushdownSuite extends PlanTest {
   }
 
   test("full outer join where neither side is limited and right side has larger statistics") {
-    val yBig = testRelation.copy(data = Seq.fill(2)(null)).subquery('y)
+    val yBig = testRelation.copy(data = Seq.fill(10)(null)).subquery('y)
     assert(x.stats.sizeInBytes < yBig.stats.sizeInBytes)
     val originalQuery = x.join(yBig, FullOuter).limit(1).analyze
     val optimized = Optimize.execute(originalQuery)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/explain.txt
index 9788040bbe6de..a0f029c9b9325 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/explain.txt
@@ -1,82 +1,81 @@
 == Physical Plan ==
-CollectLimit (71)
-+- BroadcastNestedLoopJoin Inner BuildRight (70)
-   :- BroadcastNestedLoopJoin Inner BuildRight (58)
-   :  :- BroadcastNestedLoopJoin Inner BuildRight (46)
-   :  :  :- BroadcastNestedLoopJoin Inner BuildRight (34)
-   :  :  :  :- BroadcastNestedLoopJoin Inner BuildRight (22)
-   :  :  :  :  :- * HashAggregate (10)
-   :  :  :  :  :  +- Exchange (9)
-   :  :  :  :  :     +- * HashAggregate (8)
-   :  :  :  :  :        +- * HashAggregate (7)
-   :  :  :  :  :           +- Exchange (6)
-   :  :  :  :  :              +- * HashAggregate (5)
-   :  :  :  :  :                 +- * Project (4)
-   :  :  :  :  :                    +- * Filter (3)
-   :  :  :  :  :                       +- * ColumnarToRow (2)
-   :  :  :  :  :                          +- Scan parquet default.store_sales (1)
-   :  :  :  :  +- BroadcastExchange (21)
-   :  :  :  :     +- * HashAggregate (20)
-   :  :  :  :        +- Exchange (19)
-   :  :  :  :           +- * HashAggregate (18)
-   :  :  :  :              +- * HashAggregate (17)
-   :  :  :  :                 +- Exchange (16)
-   :  :  :  :                    +- * HashAggregate (15)
-   :  :  :  :                       +- * Project (14)
-   :  :  :  :                          +- * Filter (13)
-   :  :  :  :                             +- * ColumnarToRow (12)
-   :  :  :  :                                +- Scan parquet default.store_sales (11)
-   :  :  :  +- BroadcastExchange (33)
-   :  :  :     +- * HashAggregate (32)
-   :  :  :        +- Exchange (31)
-   :  :  :           +- * HashAggregate (30)
-   :  :  :              +- * HashAggregate (29)
-   :  :  :                 +- Exchange (28)
-   :  :  :                    +- * HashAggregate (27)
-   :  :  :                       +- * Project (26)
-   :  :  :                          +- * Filter (25)
-   :  :  :                             +- * ColumnarToRow (24)
-   :  :  :                                +- Scan parquet default.store_sales (23)
-   :  :  +- BroadcastExchange (45)
-   :  :     +- * HashAggregate (44)
-   :  :        +- Exchange (43)
-   :  :           +- * HashAggregate (42)
-   :  :              +- * HashAggregate (41)
-   :  :                 +- Exchange (40)
-   :  :                    +- * HashAggregate (39)
-   :  :                       +- * Project (38)
-   :  :                          +- * Filter (37)
-   :  :                             +- * ColumnarToRow (36)
-   :  :                                +- Scan parquet default.store_sales (35)
-   :  +- BroadcastExchange (57)
-   :     +- * HashAggregate (56)
-   :        +- Exchange (55)
-   :           +- * HashAggregate (54)
-   :              +- * HashAggregate (53)
-   :                 +- Exchange (52)
-   :                    +- * HashAggregate (51)
-   :                       +- * Project (50)
-   :                          +- * Filter (49)
-   :                             +- * ColumnarToRow (48)
-   :                                +- Scan parquet default.store_sales (47)
-   +- BroadcastExchange (69)
-      +- * HashAggregate (68)
-         +- Exchange (67)
-            +- * HashAggregate (66)
-               +- * HashAggregate (65)
-                  +- Exchange (64)
-                     +- * HashAggregate (63)
-                        +- * Project (62)
-                           +- * Filter (61)
-                              +- * ColumnarToRow (60)
-                                 +- Scan parquet default.store_sales (59)
+BroadcastNestedLoopJoin Inner BuildRight (70)
+:- BroadcastNestedLoopJoin Inner BuildRight (58)
+:  :- BroadcastNestedLoopJoin Inner BuildRight (46)
+:  :  :- BroadcastNestedLoopJoin Inner BuildRight (34)
+:  :  :  :- BroadcastNestedLoopJoin Inner BuildRight (22)
+:  :  :  :  :- * HashAggregate (10)
+:  :  :  :  :  +- Exchange (9)
+:  :  :  :  :     +- * HashAggregate (8)
+:  :  :  :  :        +- * HashAggregate (7)
+:  :  :  :  :           +- Exchange (6)
+:  :  :  :  :              +- * HashAggregate (5)
+:  :  :  :  :                 +- * Project (4)
+:  :  :  :  :                    +- * Filter (3)
+:  :  :  :  :                       +- * ColumnarToRow (2)
+:  :  :  :  :                          +- Scan parquet default.store_sales (1)
+:  :  :  :  +- BroadcastExchange (21)
+:  :  :  :     +- * HashAggregate (20)
+:  :  :  :        +- Exchange (19)
+:  :  :  :           +- * HashAggregate (18)
+:  :  :  :              +- * HashAggregate (17)
+:  :  :  :                 +- Exchange (16)
+:  :  :  :                    +- * HashAggregate (15)
+:  :  :  :                       +- * Project (14)
+:  :  :  :                          +- * Filter (13)
+:  :  :  :                             +- * ColumnarToRow (12)
+:  :  :  :                                +- Scan parquet default.store_sales (11)
+:  :  :  +- BroadcastExchange (33)
+:  :  :     +- * HashAggregate (32)
+:  :  :        +- Exchange (31)
+:  :  :           +- * HashAggregate (30)
+:  :  :              +- * HashAggregate (29)
+:  :  :                 +- Exchange (28)
+:  :  :                    +- * HashAggregate (27)
+:  :  :                       +- * Project (26)
+:  :  :                          +- * Filter (25)
+:  :  :                             +- * ColumnarToRow (24)
+:  :  :                                +- Scan parquet default.store_sales (23)
+:  :  +- BroadcastExchange (45)
+:  :     +- * HashAggregate (44)
+:  :        +- Exchange (43)
+:  :           +- * HashAggregate (42)
+:  :              +- * HashAggregate (41)
+:  :                 +- Exchange (40)
+:  :                    +- * HashAggregate (39)
+:  :                       +- * Project (38)
+:  :                          +- * Filter (37)
+:  :                             +- * ColumnarToRow (36)
+:  :                                +- Scan parquet default.store_sales (35)
+:  +- BroadcastExchange (57)
+:     +- * HashAggregate (56)
+:        +- Exchange (55)
+:           +- * HashAggregate (54)
+:              +- * HashAggregate (53)
+:                 +- Exchange (52)
+:                    +- * HashAggregate (51)
+:                       +- * Project (50)
+:                          +- * Filter (49)
+:                             +- * ColumnarToRow (48)
+:                                +- Scan parquet default.store_sales (47)
++- BroadcastExchange (69)
+   +- * HashAggregate (68)
+      +- Exchange (67)
+         +- * HashAggregate (66)
+            +- * HashAggregate (65)
+               +- Exchange (64)
+                  +- * HashAggregate (63)
+                     +- * Project (62)
+                        +- * Filter (61)
+                           +- * ColumnarToRow (60)
+                              +- Scan parquet default.store_sales (59)
 
 
 (1) Scan parquet default.store_sales
 Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
-PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,0), LessThanOrEqual(ss_quantity,5)]
+PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,0), LessThanOrEqual(ss_quantity,5), Or(Or(And(GreaterThanOrEqual(ss_list_price,8.00),LessThanOrEqual(ss_list_price,18.00)),And(GreaterThanOrEqual(ss_coupon_amt,459.00),LessThanOrEqual(ss_coupon_amt,1459.00))),And(GreaterThanOrEqual(ss_wholesale_cost,57.00),LessThanOrEqual(ss_wholesale_cost,77.00)))]
 ReadSchema: struct<ss_quantity:int,ss_wholesale_cost:decimal(7,2),ss_list_price:decimal(7,2),ss_coupon_amt:decimal(7,2)>
 
 (2) ColumnarToRow [codegen id : 1]
@@ -84,7 +83,7 @@ Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4
 
 (3) Filter [codegen id : 1]
 Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4]
-Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 0)) AND (ss_quantity#1 <= 5)) AND ((((ss_list_price#3 >= 8.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 18.00)) OR ((ss_coupon_amt#4 >= 459.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 1459.00))) OR ((ss_wholesale_cost#2 >= 57.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 77.00))))
+Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 0)) AND (ss_quantity#1 <= 5)) AND ((((ss_list_price#3 >= 8.00) AND (ss_list_price#3 <= 18.00)) OR ((ss_coupon_amt#4 >= 459.00) AND (ss_coupon_amt#4 <= 1459.00))) OR ((ss_wholesale_cost#2 >= 57.00) AND (ss_wholesale_cost#2 <= 77.00))))
 
 (4) Project [codegen id : 1]
 Output [1]: [ss_list_price#3]
@@ -130,7 +129,7 @@ Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#5 / 100.0) as decimal(11
 Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
-PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,6), LessThanOrEqual(ss_quantity,10)]
+PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,6), LessThanOrEqual(ss_quantity,10), Or(Or(And(GreaterThanOrEqual(ss_list_price,90.00),LessThanOrEqual(ss_list_price,100.00)),And(GreaterThanOrEqual(ss_coupon_amt,2323.00),LessThanOrEqual(ss_coupon_amt,3323.00))),And(GreaterThanOrEqual(ss_wholesale_cost,31.00),LessThanOrEqual(ss_wholesale_cost,51.00)))]
 ReadSchema: struct<ss_quantity:int,ss_wholesale_cost:decimal(7,2),ss_list_price:decimal(7,2),ss_coupon_amt:decimal(7,2)>
 
 (12) ColumnarToRow [codegen id : 4]
@@ -138,7 +137,7 @@ Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4
 
 (13) Filter [codegen id : 4]
 Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4]
-Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 6)) AND (ss_quantity#1 <= 10)) AND ((((ss_list_price#3 >= 90.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 100.00)) OR ((ss_coupon_amt#4 >= 2323.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 3323.00))) OR ((ss_wholesale_cost#2 >= 31.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 51.00))))
+Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 6)) AND (ss_quantity#1 <= 10)) AND ((((ss_list_price#3 >= 90.00) AND (ss_list_price#3 <= 100.00)) OR ((ss_coupon_amt#4 >= 2323.00) AND (ss_coupon_amt#4 <= 3323.00))) OR ((ss_wholesale_cost#2 >= 31.00) AND (ss_wholesale_cost#2 <= 51.00))))
 
 (14) Project [codegen id : 4]
 Output [1]: [ss_list_price#3]
@@ -191,7 +190,7 @@ Join condition: None
 Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
-PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,11), LessThanOrEqual(ss_quantity,15)]
+PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,11), LessThanOrEqual(ss_quantity,15), Or(Or(And(GreaterThanOrEqual(ss_list_price,142.00),LessThanOrEqual(ss_list_price,152.00)),And(GreaterThanOrEqual(ss_coupon_amt,12214.00),LessThanOrEqual(ss_coupon_amt,13214.00))),And(GreaterThanOrEqual(ss_wholesale_cost,79.00),LessThanOrEqual(ss_wholesale_cost,99.00)))]
 ReadSchema: struct<ss_quantity:int,ss_wholesale_cost:decimal(7,2),ss_list_price:decimal(7,2),ss_coupon_amt:decimal(7,2)>
 
 (24) ColumnarToRow [codegen id : 7]
@@ -199,7 +198,7 @@ Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4
 
 (25) Filter [codegen id : 7]
 Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4]
-Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 11)) AND (ss_quantity#1 <= 15)) AND ((((ss_list_price#3 >= 142.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 152.00)) OR ((ss_coupon_amt#4 >= 12214.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 13214.00))) OR ((ss_wholesale_cost#2 >= 79.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 99.00))))
+Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 11)) AND (ss_quantity#1 <= 15)) AND ((((ss_list_price#3 >= 142.00) AND (ss_list_price#3 <= 152.00)) OR ((ss_coupon_amt#4 >= 12214.00) AND (ss_coupon_amt#4 <= 13214.00))) OR ((ss_wholesale_cost#2 >= 79.00) AND (ss_wholesale_cost#2 <= 99.00))))
 
 (26) Project [codegen id : 7]
 Output [1]: [ss_list_price#3]
@@ -252,7 +251,7 @@ Join condition: None
 Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
-PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,16), LessThanOrEqual(ss_quantity,20)]
+PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,16), LessThanOrEqual(ss_quantity,20), Or(Or(And(GreaterThanOrEqual(ss_list_price,135.00),LessThanOrEqual(ss_list_price,145.00)),And(GreaterThanOrEqual(ss_coupon_amt,6071.00),LessThanOrEqual(ss_coupon_amt,7071.00))),And(GreaterThanOrEqual(ss_wholesale_cost,38.00),LessThanOrEqual(ss_wholesale_cost,58.00)))]
 ReadSchema: struct<ss_quantity:int,ss_wholesale_cost:decimal(7,2),ss_list_price:decimal(7,2),ss_coupon_amt:decimal(7,2)>
 
 (36) ColumnarToRow [codegen id : 10]
@@ -260,7 +259,7 @@ Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4
 
 (37) Filter [codegen id : 10]
 Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4]
-Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 16)) AND (ss_quantity#1 <= 20)) AND ((((ss_list_price#3 >= 135.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 145.00)) OR ((ss_coupon_amt#4 >= 6071.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 7071.00))) OR ((ss_wholesale_cost#2 >= 38.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 58.00))))
+Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 16)) AND (ss_quantity#1 <= 20)) AND ((((ss_list_price#3 >= 135.00) AND (ss_list_price#3 <= 145.00)) OR ((ss_coupon_amt#4 >= 6071.00) AND (ss_coupon_amt#4 <= 7071.00))) OR ((ss_wholesale_cost#2 >= 38.00) AND (ss_wholesale_cost#2 <= 58.00))))
 
 (38) Project [codegen id : 10]
 Output [1]: [ss_list_price#3]
@@ -313,7 +312,7 @@ Join condition: None
 Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
-PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,25)]
+PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,25), Or(Or(And(GreaterThanOrEqual(ss_list_price,122.00),LessThanOrEqual(ss_list_price,132.00)),And(GreaterThanOrEqual(ss_coupon_amt,836.00),LessThanOrEqual(ss_coupon_amt,1836.00))),And(GreaterThanOrEqual(ss_wholesale_cost,17.00),LessThanOrEqual(ss_wholesale_cost,37.00)))]
 ReadSchema: struct<ss_quantity:int,ss_wholesale_cost:decimal(7,2),ss_list_price:decimal(7,2),ss_coupon_amt:decimal(7,2)>
 
 (48) ColumnarToRow [codegen id : 13]
@@ -321,7 +320,7 @@ Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4
 
 (49) Filter [codegen id : 13]
 Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4]
-Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 21)) AND (ss_quantity#1 <= 25)) AND ((((ss_list_price#3 >= 122.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 132.00)) OR ((ss_coupon_amt#4 >= 836.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 1836.00))) OR ((ss_wholesale_cost#2 >= 17.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 37.00))))
+Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 21)) AND (ss_quantity#1 <= 25)) AND ((((ss_list_price#3 >= 122.00) AND (ss_list_price#3 <= 132.00)) OR ((ss_coupon_amt#4 >= 836.00) AND (ss_coupon_amt#4 <= 1836.00))) OR ((ss_wholesale_cost#2 >= 17.00) AND (ss_wholesale_cost#2 <= 37.00))))
 
 (50) Project [codegen id : 13]
 Output [1]: [ss_list_price#3]
@@ -374,7 +373,7 @@ Join condition: None
 Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
-PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,26), LessThanOrEqual(ss_quantity,30)]
+PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,26), LessThanOrEqual(ss_quantity,30), Or(Or(And(GreaterThanOrEqual(ss_list_price,154.00),LessThanOrEqual(ss_list_price,164.00)),And(GreaterThanOrEqual(ss_coupon_amt,7326.00),LessThanOrEqual(ss_coupon_amt,8326.00))),And(GreaterThanOrEqual(ss_wholesale_cost,7.00),LessThanOrEqual(ss_wholesale_cost,27.00)))]
 ReadSchema: struct<ss_quantity:int,ss_wholesale_cost:decimal(7,2),ss_list_price:decimal(7,2),ss_coupon_amt:decimal(7,2)>
 
 (60) ColumnarToRow [codegen id : 16]
@@ -382,7 +381,7 @@ Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4
 
 (61) Filter [codegen id : 16]
 Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4]
-Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 26)) AND (ss_quantity#1 <= 30)) AND ((((ss_list_price#3 >= 154.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 164.00)) OR ((ss_coupon_amt#4 >= 7326.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 8326.00))) OR ((ss_wholesale_cost#2 >= 7.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 27.00))))
+Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 26)) AND (ss_quantity#1 <= 30)) AND ((((ss_list_price#3 >= 154.00) AND (ss_list_price#3 <= 164.00)) OR ((ss_coupon_amt#4 >= 7326.00) AND (ss_coupon_amt#4 <= 8326.00))) OR ((ss_wholesale_cost#2 >= 7.00) AND (ss_wholesale_cost#2 <= 27.00))))
 
 (62) Project [codegen id : 16]
 Output [1]: [ss_list_price#3]
@@ -431,7 +430,3 @@ Arguments: IdentityBroadcastMode, [id=#81]
 (70) BroadcastNestedLoopJoin
 Join condition: None
 
-(71) CollectLimit
-Input [18]: [B1_LP#14, B1_CNT#15, B1_CNTD#16, B2_LP#26, B2_CNT#27, B2_CNTD#28, B3_LP#39, B3_CNT#40, B3_CNTD#41, B4_LP#52, B4_CNT#53, B4_CNTD#54, B5_LP#65, B5_CNT#66, B5_CNTD#67, B6_LP#78, B6_CNT#79, B6_CNTD#80]
-Arguments: 100
-
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/simplified.txt
index d896002b0965d..77afa321d3ee4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/simplified.txt
@@ -1,107 +1,106 @@
-CollectLimit
+BroadcastNestedLoopJoin
   BroadcastNestedLoopJoin
     BroadcastNestedLoopJoin
       BroadcastNestedLoopJoin
         BroadcastNestedLoopJoin
-          BroadcastNestedLoopJoin
-            WholeStageCodegen (3)
-              HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B1_LP,B1_CNT,B1_CNTD,sum,count,count,count]
-                InputAdapter
-                  Exchange #1
-                    WholeStageCodegen (2)
-                      HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count]
-                        HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
-                          InputAdapter
-                            Exchange [ss_list_price] #2
-                              WholeStageCodegen (1)
-                                HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
-                                  Project [ss_list_price]
-                                    Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
-                                      ColumnarToRow
-                                        InputAdapter
-                                          Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt]
-            BroadcastExchange #3
-              WholeStageCodegen (6)
-                HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B2_LP,B2_CNT,B2_CNTD,sum,count,count,count]
-                  InputAdapter
-                    Exchange #4
-                      WholeStageCodegen (5)
-                        HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count]
-                          HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
-                            InputAdapter
-                              Exchange [ss_list_price] #5
-                                WholeStageCodegen (4)
-                                  HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
-                                    Project [ss_list_price]
-                                      Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
-                                        ColumnarToRow
-                                          InputAdapter
-                                            Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt]
-          BroadcastExchange #6
-            WholeStageCodegen (9)
-              HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B3_LP,B3_CNT,B3_CNTD,sum,count,count,count]
+          WholeStageCodegen (3)
+            HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B1_LP,B1_CNT,B1_CNTD,sum,count,count,count]
+              InputAdapter
+                Exchange #1
+                  WholeStageCodegen (2)
+                    HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count]
+                      HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
+                        InputAdapter
+                          Exchange [ss_list_price] #2
+                            WholeStageCodegen (1)
+                              HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
+                                Project [ss_list_price]
+                                  Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
+                                    ColumnarToRow
+                                      InputAdapter
+                                        Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt]
+          BroadcastExchange #3
+            WholeStageCodegen (6)
+              HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B2_LP,B2_CNT,B2_CNTD,sum,count,count,count]
                 InputAdapter
-                  Exchange #7
-                    WholeStageCodegen (8)
+                  Exchange #4
+                    WholeStageCodegen (5)
                       HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count]
                         HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
                           InputAdapter
-                            Exchange [ss_list_price] #8
-                              WholeStageCodegen (7)
+                            Exchange [ss_list_price] #5
+                              WholeStageCodegen (4)
                                 HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
                                   Project [ss_list_price]
                                     Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
                                       ColumnarToRow
                                         InputAdapter
                                           Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt]
-        BroadcastExchange #9
-          WholeStageCodegen (12)
-            HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B4_LP,B4_CNT,B4_CNTD,sum,count,count,count]
+        BroadcastExchange #6
+          WholeStageCodegen (9)
+            HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B3_LP,B3_CNT,B3_CNTD,sum,count,count,count]
               InputAdapter
-                Exchange #10
-                  WholeStageCodegen (11)
+                Exchange #7
+                  WholeStageCodegen (8)
                     HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count]
                       HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
                         InputAdapter
-                          Exchange [ss_list_price] #11
-                            WholeStageCodegen (10)
+                          Exchange [ss_list_price] #8
+                            WholeStageCodegen (7)
                               HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
                                 Project [ss_list_price]
                                   Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
                                     ColumnarToRow
                                       InputAdapter
                                         Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt]
-      BroadcastExchange #12
-        WholeStageCodegen (15)
-          HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B5_LP,B5_CNT,B5_CNTD,sum,count,count,count]
+      BroadcastExchange #9
+        WholeStageCodegen (12)
+          HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B4_LP,B4_CNT,B4_CNTD,sum,count,count,count]
             InputAdapter
-              Exchange #13
-                WholeStageCodegen (14)
+              Exchange #10
+                WholeStageCodegen (11)
                   HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count]
                     HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
                       InputAdapter
-                        Exchange [ss_list_price] #14
-                          WholeStageCodegen (13)
+                        Exchange [ss_list_price] #11
+                          WholeStageCodegen (10)
                             HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
                               Project [ss_list_price]
                                 Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
                                   ColumnarToRow
                                     InputAdapter
                                       Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt]
-    BroadcastExchange #15
-      WholeStageCodegen (18)
-        HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B6_LP,B6_CNT,B6_CNTD,sum,count,count,count]
+    BroadcastExchange #12
+      WholeStageCodegen (15)
+        HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B5_LP,B5_CNT,B5_CNTD,sum,count,count,count]
           InputAdapter
-            Exchange #16
-              WholeStageCodegen (17)
+            Exchange #13
+              WholeStageCodegen (14)
                 HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count]
                   HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
                     InputAdapter
-                      Exchange [ss_list_price] #17
-                        WholeStageCodegen (16)
+                      Exchange [ss_list_price] #14
+                        WholeStageCodegen (13)
                           HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
                             Project [ss_list_price]
                               Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
                                 ColumnarToRow
                                   InputAdapter
                                     Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt]
+  BroadcastExchange #15
+    WholeStageCodegen (18)
+      HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B6_LP,B6_CNT,B6_CNTD,sum,count,count,count]
+        InputAdapter
+          Exchange #16
+            WholeStageCodegen (17)
+              HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count]
+                HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
+                  InputAdapter
+                    Exchange [ss_list_price] #17
+                      WholeStageCodegen (16)
+                        HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
+                          Project [ss_list_price]
+                            Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
+                              ColumnarToRow
+                                InputAdapter
+                                  Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt
index 9788040bbe6de..a0f029c9b9325 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt
@@ -1,82 +1,81 @@
 == Physical Plan ==
-CollectLimit (71)
-+- BroadcastNestedLoopJoin Inner BuildRight (70)
-   :- BroadcastNestedLoopJoin Inner BuildRight (58)
-   :  :- BroadcastNestedLoopJoin Inner BuildRight (46)
-   :  :  :- BroadcastNestedLoopJoin Inner BuildRight (34)
-   :  :  :  :- BroadcastNestedLoopJoin Inner BuildRight (22)
-   :  :  :  :  :- * HashAggregate (10)
-   :  :  :  :  :  +- Exchange (9)
-   :  :  :  :  :     +- * HashAggregate (8)
-   :  :  :  :  :        +- * HashAggregate (7)
-   :  :  :  :  :           +- Exchange (6)
-   :  :  :  :  :              +- * HashAggregate (5)
-   :  :  :  :  :                 +- * Project (4)
-   :  :  :  :  :                    +- * Filter (3)
-   :  :  :  :  :                       +- * ColumnarToRow (2)
-   :  :  :  :  :                          +- Scan parquet default.store_sales (1)
-   :  :  :  :  +- BroadcastExchange (21)
-   :  :  :  :     +- * HashAggregate (20)
-   :  :  :  :        +- Exchange (19)
-   :  :  :  :           +- * HashAggregate (18)
-   :  :  :  :              +- * HashAggregate (17)
-   :  :  :  :                 +- Exchange (16)
-   :  :  :  :                    +- * HashAggregate (15)
-   :  :  :  :                       +- * Project (14)
-   :  :  :  :                          +- * Filter (13)
-   :  :  :  :                             +- * ColumnarToRow (12)
-   :  :  :  :                                +- Scan parquet default.store_sales (11)
-   :  :  :  +- BroadcastExchange (33)
-   :  :  :     +- * HashAggregate (32)
-   :  :  :        +- Exchange (31)
-   :  :  :           +- * HashAggregate (30)
-   :  :  :              +- * HashAggregate (29)
-   :  :  :                 +- Exchange (28)
-   :  :  :                    +- * HashAggregate (27)
-   :  :  :                       +- * Project (26)
-   :  :  :                          +- * Filter (25)
-   :  :  :                             +- * ColumnarToRow (24)
-   :  :  :                                +- Scan parquet default.store_sales (23)
-   :  :  +- BroadcastExchange (45)
-   :  :     +- * HashAggregate (44)
-   :  :        +- Exchange (43)
-   :  :           +- * HashAggregate (42)
-   :  :              +- * HashAggregate (41)
-   :  :                 +- Exchange (40)
-   :  :                    +- * HashAggregate (39)
-   :  :                       +- * Project (38)
-   :  :                          +- * Filter (37)
-   :  :                             +- * ColumnarToRow (36)
-   :  :                                +- Scan parquet default.store_sales (35)
-   :  +- BroadcastExchange (57)
-   :     +- * HashAggregate (56)
-   :        +- Exchange (55)
-   :           +- * HashAggregate (54)
-   :              +- * HashAggregate (53)
-   :                 +- Exchange (52)
-   :                    +- * HashAggregate (51)
-   :                       +- * Project (50)
-   :                          +- * Filter (49)
-   :                             +- * ColumnarToRow (48)
-   :                                +- Scan parquet default.store_sales (47)
-   +- BroadcastExchange (69)
-      +- * HashAggregate (68)
-         +- Exchange (67)
-            +- * HashAggregate (66)
-               +- * HashAggregate (65)
-                  +- Exchange (64)
-                     +- * HashAggregate (63)
-                        +- * Project (62)
-                           +- * Filter (61)
-                              +- * ColumnarToRow (60)
-                                 +- Scan parquet default.store_sales (59)
+BroadcastNestedLoopJoin Inner BuildRight (70)
+:- BroadcastNestedLoopJoin Inner BuildRight (58)
+:  :- BroadcastNestedLoopJoin Inner BuildRight (46)
+:  :  :- BroadcastNestedLoopJoin Inner BuildRight (34)
+:  :  :  :- BroadcastNestedLoopJoin Inner BuildRight (22)
+:  :  :  :  :- * HashAggregate (10)
+:  :  :  :  :  +- Exchange (9)
+:  :  :  :  :     +- * HashAggregate (8)
+:  :  :  :  :        +- * HashAggregate (7)
+:  :  :  :  :           +- Exchange (6)
+:  :  :  :  :              +- * HashAggregate (5)
+:  :  :  :  :                 +- * Project (4)
+:  :  :  :  :                    +- * Filter (3)
+:  :  :  :  :                       +- * ColumnarToRow (2)
+:  :  :  :  :                          +- Scan parquet default.store_sales (1)
+:  :  :  :  +- BroadcastExchange (21)
+:  :  :  :     +- * HashAggregate (20)
+:  :  :  :        +- Exchange (19)
+:  :  :  :           +- * HashAggregate (18)
+:  :  :  :              +- * HashAggregate (17)
+:  :  :  :                 +- Exchange (16)
+:  :  :  :                    +- * HashAggregate (15)
+:  :  :  :                       +- * Project (14)
+:  :  :  :                          +- * Filter (13)
+:  :  :  :                             +- * ColumnarToRow (12)
+:  :  :  :                                +- Scan parquet default.store_sales (11)
+:  :  :  +- BroadcastExchange (33)
+:  :  :     +- * HashAggregate (32)
+:  :  :        +- Exchange (31)
+:  :  :           +- * HashAggregate (30)
+:  :  :              +- * HashAggregate (29)
+:  :  :                 +- Exchange (28)
+:  :  :                    +- * HashAggregate (27)
+:  :  :                       +- * Project (26)
+:  :  :                          +- * Filter (25)
+:  :  :                             +- * ColumnarToRow (24)
+:  :  :                                +- Scan parquet default.store_sales (23)
+:  :  +- BroadcastExchange (45)
+:  :     +- * HashAggregate (44)
+:  :        +- Exchange (43)
+:  :           +- * HashAggregate (42)
+:  :              +- * HashAggregate (41)
+:  :                 +- Exchange (40)
+:  :                    +- * HashAggregate (39)
+:  :                       +- * Project (38)
+:  :                          +- * Filter (37)
+:  :                             +- * ColumnarToRow (36)
+:  :                                +- Scan parquet default.store_sales (35)
+:  +- BroadcastExchange (57)
+:     +- * HashAggregate (56)
+:        +- Exchange (55)
+:           +- * HashAggregate (54)
+:              +- * HashAggregate (53)
+:                 +- Exchange (52)
+:                    +- * HashAggregate (51)
+:                       +- * Project (50)
+:                          +- * Filter (49)
+:                             +- * ColumnarToRow (48)
+:                                +- Scan parquet default.store_sales (47)
++- BroadcastExchange (69)
+   +- * HashAggregate (68)
+      +- Exchange (67)
+         +- * HashAggregate (66)
+            +- * HashAggregate (65)
+               +- Exchange (64)
+                  +- * HashAggregate (63)
+                     +- * Project (62)
+                        +- * Filter (61)
+                           +- * ColumnarToRow (60)
+                              +- Scan parquet default.store_sales (59)
 
 
 (1) Scan parquet default.store_sales
 Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
-PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,0), LessThanOrEqual(ss_quantity,5)]
+PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,0), LessThanOrEqual(ss_quantity,5), Or(Or(And(GreaterThanOrEqual(ss_list_price,8.00),LessThanOrEqual(ss_list_price,18.00)),And(GreaterThanOrEqual(ss_coupon_amt,459.00),LessThanOrEqual(ss_coupon_amt,1459.00))),And(GreaterThanOrEqual(ss_wholesale_cost,57.00),LessThanOrEqual(ss_wholesale_cost,77.00)))]
 ReadSchema: struct<ss_quantity:int,ss_wholesale_cost:decimal(7,2),ss_list_price:decimal(7,2),ss_coupon_amt:decimal(7,2)>
 
 (2) ColumnarToRow [codegen id : 1]
@@ -84,7 +83,7 @@ Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4
 
 (3) Filter [codegen id : 1]
 Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4]
-Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 0)) AND (ss_quantity#1 <= 5)) AND ((((ss_list_price#3 >= 8.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 18.00)) OR ((ss_coupon_amt#4 >= 459.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 1459.00))) OR ((ss_wholesale_cost#2 >= 57.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 77.00))))
+Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 0)) AND (ss_quantity#1 <= 5)) AND ((((ss_list_price#3 >= 8.00) AND (ss_list_price#3 <= 18.00)) OR ((ss_coupon_amt#4 >= 459.00) AND (ss_coupon_amt#4 <= 1459.00))) OR ((ss_wholesale_cost#2 >= 57.00) AND (ss_wholesale_cost#2 <= 77.00))))
 
 (4) Project [codegen id : 1]
 Output [1]: [ss_list_price#3]
@@ -130,7 +129,7 @@ Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#5 / 100.0) as decimal(11
 Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
-PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,6), LessThanOrEqual(ss_quantity,10)]
+PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,6), LessThanOrEqual(ss_quantity,10), Or(Or(And(GreaterThanOrEqual(ss_list_price,90.00),LessThanOrEqual(ss_list_price,100.00)),And(GreaterThanOrEqual(ss_coupon_amt,2323.00),LessThanOrEqual(ss_coupon_amt,3323.00))),And(GreaterThanOrEqual(ss_wholesale_cost,31.00),LessThanOrEqual(ss_wholesale_cost,51.00)))]
 ReadSchema: struct<ss_quantity:int,ss_wholesale_cost:decimal(7,2),ss_list_price:decimal(7,2),ss_coupon_amt:decimal(7,2)>
 
 (12) ColumnarToRow [codegen id : 4]
@@ -138,7 +137,7 @@ Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4
 
 (13) Filter [codegen id : 4]
 Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4]
-Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 6)) AND (ss_quantity#1 <= 10)) AND ((((ss_list_price#3 >= 90.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 100.00)) OR ((ss_coupon_amt#4 >= 2323.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 3323.00))) OR ((ss_wholesale_cost#2 >= 31.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 51.00))))
+Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 6)) AND (ss_quantity#1 <= 10)) AND ((((ss_list_price#3 >= 90.00) AND (ss_list_price#3 <= 100.00)) OR ((ss_coupon_amt#4 >= 2323.00) AND (ss_coupon_amt#4 <= 3323.00))) OR ((ss_wholesale_cost#2 >= 31.00) AND (ss_wholesale_cost#2 <= 51.00))))
 
 (14) Project [codegen id : 4]
 Output [1]: [ss_list_price#3]
@@ -191,7 +190,7 @@ Join condition: None
 Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
-PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,11), LessThanOrEqual(ss_quantity,15)]
+PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,11), LessThanOrEqual(ss_quantity,15), Or(Or(And(GreaterThanOrEqual(ss_list_price,142.00),LessThanOrEqual(ss_list_price,152.00)),And(GreaterThanOrEqual(ss_coupon_amt,12214.00),LessThanOrEqual(ss_coupon_amt,13214.00))),And(GreaterThanOrEqual(ss_wholesale_cost,79.00),LessThanOrEqual(ss_wholesale_cost,99.00)))]
 ReadSchema: struct<ss_quantity:int,ss_wholesale_cost:decimal(7,2),ss_list_price:decimal(7,2),ss_coupon_amt:decimal(7,2)>
 
 (24) ColumnarToRow [codegen id : 7]
@@ -199,7 +198,7 @@ Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4
 
 (25) Filter [codegen id : 7]
 Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4]
-Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 11)) AND (ss_quantity#1 <= 15)) AND ((((ss_list_price#3 >= 142.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 152.00)) OR ((ss_coupon_amt#4 >= 12214.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 13214.00))) OR ((ss_wholesale_cost#2 >= 79.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 99.00))))
+Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 11)) AND (ss_quantity#1 <= 15)) AND ((((ss_list_price#3 >= 142.00) AND (ss_list_price#3 <= 152.00)) OR ((ss_coupon_amt#4 >= 12214.00) AND (ss_coupon_amt#4 <= 13214.00))) OR ((ss_wholesale_cost#2 >= 79.00) AND (ss_wholesale_cost#2 <= 99.00))))
 
 (26) Project [codegen id : 7]
 Output [1]: [ss_list_price#3]
@@ -252,7 +251,7 @@ Join condition: None
 Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
-PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,16), LessThanOrEqual(ss_quantity,20)]
+PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,16), LessThanOrEqual(ss_quantity,20), Or(Or(And(GreaterThanOrEqual(ss_list_price,135.00),LessThanOrEqual(ss_list_price,145.00)),And(GreaterThanOrEqual(ss_coupon_amt,6071.00),LessThanOrEqual(ss_coupon_amt,7071.00))),And(GreaterThanOrEqual(ss_wholesale_cost,38.00),LessThanOrEqual(ss_wholesale_cost,58.00)))]
 ReadSchema: struct<ss_quantity:int,ss_wholesale_cost:decimal(7,2),ss_list_price:decimal(7,2),ss_coupon_amt:decimal(7,2)>
 
 (36) ColumnarToRow [codegen id : 10]
@@ -260,7 +259,7 @@ Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4
 
 (37) Filter [codegen id : 10]
 Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4]
-Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 16)) AND (ss_quantity#1 <= 20)) AND ((((ss_list_price#3 >= 135.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 145.00)) OR ((ss_coupon_amt#4 >= 6071.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 7071.00))) OR ((ss_wholesale_cost#2 >= 38.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 58.00))))
+Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 16)) AND (ss_quantity#1 <= 20)) AND ((((ss_list_price#3 >= 135.00) AND (ss_list_price#3 <= 145.00)) OR ((ss_coupon_amt#4 >= 6071.00) AND (ss_coupon_amt#4 <= 7071.00))) OR ((ss_wholesale_cost#2 >= 38.00) AND (ss_wholesale_cost#2 <= 58.00))))
 
 (38) Project [codegen id : 10]
 Output [1]: [ss_list_price#3]
@@ -313,7 +312,7 @@ Join condition: None
 Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
-PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,25)]
+PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,25), Or(Or(And(GreaterThanOrEqual(ss_list_price,122.00),LessThanOrEqual(ss_list_price,132.00)),And(GreaterThanOrEqual(ss_coupon_amt,836.00),LessThanOrEqual(ss_coupon_amt,1836.00))),And(GreaterThanOrEqual(ss_wholesale_cost,17.00),LessThanOrEqual(ss_wholesale_cost,37.00)))]
 ReadSchema: struct<ss_quantity:int,ss_wholesale_cost:decimal(7,2),ss_list_price:decimal(7,2),ss_coupon_amt:decimal(7,2)>
 
 (48) ColumnarToRow [codegen id : 13]
@@ -321,7 +320,7 @@ Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4
 
 (49) Filter [codegen id : 13]
 Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4]
-Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 21)) AND (ss_quantity#1 <= 25)) AND ((((ss_list_price#3 >= 122.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 132.00)) OR ((ss_coupon_amt#4 >= 836.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 1836.00))) OR ((ss_wholesale_cost#2 >= 17.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 37.00))))
+Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 21)) AND (ss_quantity#1 <= 25)) AND ((((ss_list_price#3 >= 122.00) AND (ss_list_price#3 <= 132.00)) OR ((ss_coupon_amt#4 >= 836.00) AND (ss_coupon_amt#4 <= 1836.00))) OR ((ss_wholesale_cost#2 >= 17.00) AND (ss_wholesale_cost#2 <= 37.00))))
 
 (50) Project [codegen id : 13]
 Output [1]: [ss_list_price#3]
@@ -374,7 +373,7 @@ Join condition: None
 Output [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
-PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,26), LessThanOrEqual(ss_quantity,30)]
+PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,26), LessThanOrEqual(ss_quantity,30), Or(Or(And(GreaterThanOrEqual(ss_list_price,154.00),LessThanOrEqual(ss_list_price,164.00)),And(GreaterThanOrEqual(ss_coupon_amt,7326.00),LessThanOrEqual(ss_coupon_amt,8326.00))),And(GreaterThanOrEqual(ss_wholesale_cost,7.00),LessThanOrEqual(ss_wholesale_cost,27.00)))]
 ReadSchema: struct<ss_quantity:int,ss_wholesale_cost:decimal(7,2),ss_list_price:decimal(7,2),ss_coupon_amt:decimal(7,2)>
 
 (60) ColumnarToRow [codegen id : 16]
@@ -382,7 +381,7 @@ Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4
 
 (61) Filter [codegen id : 16]
 Input [4]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4]
-Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 26)) AND (ss_quantity#1 <= 30)) AND ((((ss_list_price#3 >= 154.00) AND (cast(ss_list_price#3 as decimal(12,2)) <= 164.00)) OR ((ss_coupon_amt#4 >= 7326.00) AND (cast(ss_coupon_amt#4 as decimal(12,2)) <= 8326.00))) OR ((ss_wholesale_cost#2 >= 7.00) AND (cast(ss_wholesale_cost#2 as decimal(12,2)) <= 27.00))))
+Condition : (((isnotnull(ss_quantity#1) AND (ss_quantity#1 >= 26)) AND (ss_quantity#1 <= 30)) AND ((((ss_list_price#3 >= 154.00) AND (ss_list_price#3 <= 164.00)) OR ((ss_coupon_amt#4 >= 7326.00) AND (ss_coupon_amt#4 <= 8326.00))) OR ((ss_wholesale_cost#2 >= 7.00) AND (ss_wholesale_cost#2 <= 27.00))))
 
 (62) Project [codegen id : 16]
 Output [1]: [ss_list_price#3]
@@ -431,7 +430,3 @@ Arguments: IdentityBroadcastMode, [id=#81]
 (70) BroadcastNestedLoopJoin
 Join condition: None
 
-(71) CollectLimit
-Input [18]: [B1_LP#14, B1_CNT#15, B1_CNTD#16, B2_LP#26, B2_CNT#27, B2_CNTD#28, B3_LP#39, B3_CNT#40, B3_CNTD#41, B4_LP#52, B4_CNT#53, B4_CNTD#54, B5_LP#65, B5_CNT#66, B5_CNTD#67, B6_LP#78, B6_CNT#79, B6_CNTD#80]
-Arguments: 100
-
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt
index d896002b0965d..77afa321d3ee4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt
@@ -1,107 +1,106 @@
-CollectLimit
+BroadcastNestedLoopJoin
   BroadcastNestedLoopJoin
     BroadcastNestedLoopJoin
       BroadcastNestedLoopJoin
         BroadcastNestedLoopJoin
-          BroadcastNestedLoopJoin
-            WholeStageCodegen (3)
-              HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B1_LP,B1_CNT,B1_CNTD,sum,count,count,count]
-                InputAdapter
-                  Exchange #1
-                    WholeStageCodegen (2)
-                      HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count]
-                        HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
-                          InputAdapter
-                            Exchange [ss_list_price] #2
-                              WholeStageCodegen (1)
-                                HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
-                                  Project [ss_list_price]
-                                    Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
-                                      ColumnarToRow
-                                        InputAdapter
-                                          Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt]
-            BroadcastExchange #3
-              WholeStageCodegen (6)
-                HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B2_LP,B2_CNT,B2_CNTD,sum,count,count,count]
-                  InputAdapter
-                    Exchange #4
-                      WholeStageCodegen (5)
-                        HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count]
-                          HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
-                            InputAdapter
-                              Exchange [ss_list_price] #5
-                                WholeStageCodegen (4)
-                                  HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
-                                    Project [ss_list_price]
-                                      Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
-                                        ColumnarToRow
-                                          InputAdapter
-                                            Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt]
-          BroadcastExchange #6
-            WholeStageCodegen (9)
-              HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B3_LP,B3_CNT,B3_CNTD,sum,count,count,count]
+          WholeStageCodegen (3)
+            HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B1_LP,B1_CNT,B1_CNTD,sum,count,count,count]
+              InputAdapter
+                Exchange #1
+                  WholeStageCodegen (2)
+                    HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count]
+                      HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
+                        InputAdapter
+                          Exchange [ss_list_price] #2
+                            WholeStageCodegen (1)
+                              HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
+                                Project [ss_list_price]
+                                  Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
+                                    ColumnarToRow
+                                      InputAdapter
+                                        Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt]
+          BroadcastExchange #3
+            WholeStageCodegen (6)
+              HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B2_LP,B2_CNT,B2_CNTD,sum,count,count,count]
                 InputAdapter
-                  Exchange #7
-                    WholeStageCodegen (8)
+                  Exchange #4
+                    WholeStageCodegen (5)
                       HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count]
                         HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
                           InputAdapter
-                            Exchange [ss_list_price] #8
-                              WholeStageCodegen (7)
+                            Exchange [ss_list_price] #5
+                              WholeStageCodegen (4)
                                 HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
                                   Project [ss_list_price]
                                     Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
                                       ColumnarToRow
                                         InputAdapter
                                           Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt]
-        BroadcastExchange #9
-          WholeStageCodegen (12)
-            HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B4_LP,B4_CNT,B4_CNTD,sum,count,count,count]
+        BroadcastExchange #6
+          WholeStageCodegen (9)
+            HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B3_LP,B3_CNT,B3_CNTD,sum,count,count,count]
               InputAdapter
-                Exchange #10
-                  WholeStageCodegen (11)
+                Exchange #7
+                  WholeStageCodegen (8)
                     HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count]
                       HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
                         InputAdapter
-                          Exchange [ss_list_price] #11
-                            WholeStageCodegen (10)
+                          Exchange [ss_list_price] #8
+                            WholeStageCodegen (7)
                               HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
                                 Project [ss_list_price]
                                   Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
                                     ColumnarToRow
                                       InputAdapter
                                         Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt]
-      BroadcastExchange #12
-        WholeStageCodegen (15)
-          HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B5_LP,B5_CNT,B5_CNTD,sum,count,count,count]
+      BroadcastExchange #9
+        WholeStageCodegen (12)
+          HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B4_LP,B4_CNT,B4_CNTD,sum,count,count,count]
             InputAdapter
-              Exchange #13
-                WholeStageCodegen (14)
+              Exchange #10
+                WholeStageCodegen (11)
                   HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count]
                     HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
                       InputAdapter
-                        Exchange [ss_list_price] #14
-                          WholeStageCodegen (13)
+                        Exchange [ss_list_price] #11
+                          WholeStageCodegen (10)
                             HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
                               Project [ss_list_price]
                                 Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
                                   ColumnarToRow
                                     InputAdapter
                                       Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt]
-    BroadcastExchange #15
-      WholeStageCodegen (18)
-        HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B6_LP,B6_CNT,B6_CNTD,sum,count,count,count]
+    BroadcastExchange #12
+      WholeStageCodegen (15)
+        HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B5_LP,B5_CNT,B5_CNTD,sum,count,count,count]
           InputAdapter
-            Exchange #16
-              WholeStageCodegen (17)
+            Exchange #13
+              WholeStageCodegen (14)
                 HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count]
                   HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
                     InputAdapter
-                      Exchange [ss_list_price] #17
-                        WholeStageCodegen (16)
+                      Exchange [ss_list_price] #14
+                        WholeStageCodegen (13)
                           HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
                             Project [ss_list_price]
                               Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
                                 ColumnarToRow
                                   InputAdapter
                                     Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt]
+  BroadcastExchange #15
+    WholeStageCodegen (18)
+      HashAggregate [sum,count,count,count] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),B6_LP,B6_CNT,B6_CNTD,sum,count,count,count]
+        InputAdapter
+          Exchange #16
+            WholeStageCodegen (17)
+              HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),count(ss_list_price),sum,count,count,count,sum,count,count,count]
+                HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
+                  InputAdapter
+                    Exchange [ss_list_price] #17
+                      WholeStageCodegen (16)
+                        HashAggregate [ss_list_price] [avg(UnscaledValue(ss_list_price)),count(ss_list_price),sum,count,count,sum,count,count]
+                          Project [ss_list_price]
+                            Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
+                              ColumnarToRow
+                                InputAdapter
+                                  Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt
index 58a60763b2b57..5574e5b16c578 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt
@@ -1,73 +1,74 @@
 == Physical Plan ==
-TakeOrderedAndProject (69)
-+- * Project (68)
-   +- BroadcastNestedLoopJoin Inner BuildRight (67)
-      :- * HashAggregate (47)
-      :  +- Exchange (46)
-      :     +- * HashAggregate (45)
-      :        +- * Project (44)
-      :           +- * BroadcastHashJoin Inner BuildRight (43)
-      :              :- * Project (31)
-      :              :  +- * BroadcastHashJoin Inner BuildRight (30)
-      :              :     :- * Project (24)
-      :              :     :  +- * BroadcastHashJoin Inner BuildRight (23)
-      :              :     :     :- * Project (17)
-      :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (16)
-      :              :     :     :     :- * Project (10)
-      :              :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
-      :              :     :     :     :     :- * Filter (3)
-      :              :     :     :     :     :  +- * ColumnarToRow (2)
-      :              :     :     :     :     :     +- Scan parquet default.store_sales (1)
-      :              :     :     :     :     +- BroadcastExchange (8)
-      :              :     :     :     :        +- * Project (7)
-      :              :     :     :     :           +- * Filter (6)
-      :              :     :     :     :              +- * ColumnarToRow (5)
-      :              :     :     :     :                 +- Scan parquet default.date_dim (4)
-      :              :     :     :     +- BroadcastExchange (15)
-      :              :     :     :        +- * Project (14)
-      :              :     :     :           +- * Filter (13)
-      :              :     :     :              +- * ColumnarToRow (12)
-      :              :     :     :                 +- Scan parquet default.item (11)
-      :              :     :     +- BroadcastExchange (22)
-      :              :     :        +- * Project (21)
-      :              :     :           +- * Filter (20)
-      :              :     :              +- * ColumnarToRow (19)
-      :              :     :                 +- Scan parquet default.promotion (18)
-      :              :     +- BroadcastExchange (29)
-      :              :        +- * Project (28)
-      :              :           +- * Filter (27)
-      :              :              +- * ColumnarToRow (26)
-      :              :                 +- Scan parquet default.store (25)
-      :              +- BroadcastExchange (42)
-      :                 +- * Project (41)
-      :                    +- * BroadcastHashJoin Inner BuildRight (40)
-      :                       :- * Filter (34)
-      :                       :  +- * ColumnarToRow (33)
-      :                       :     +- Scan parquet default.customer (32)
-      :                       +- BroadcastExchange (39)
-      :                          +- * Project (38)
-      :                             +- * Filter (37)
-      :                                +- * ColumnarToRow (36)
-      :                                   +- Scan parquet default.customer_address (35)
-      +- BroadcastExchange (66)
-         +- * HashAggregate (65)
-            +- Exchange (64)
-               +- * HashAggregate (63)
-                  +- * Project (62)
-                     +- * BroadcastHashJoin Inner BuildRight (61)
-                        :- * Project (59)
-                        :  +- * BroadcastHashJoin Inner BuildRight (58)
-                        :     :- * Project (56)
-                        :     :  +- * BroadcastHashJoin Inner BuildRight (55)
-                        :     :     :- * Project (53)
-                        :     :     :  +- * BroadcastHashJoin Inner BuildRight (52)
-                        :     :     :     :- * Filter (50)
-                        :     :     :     :  +- * ColumnarToRow (49)
-                        :     :     :     :     +- Scan parquet default.store_sales (48)
-                        :     :     :     +- ReusedExchange (51)
-                        :     :     +- ReusedExchange (54)
-                        :     +- ReusedExchange (57)
-                        +- ReusedExchange (60)
+* Sort (70)
++- Exchange (69)
+   +- * Project (68)
+      +- BroadcastNestedLoopJoin Inner BuildRight (67)
+         :- * HashAggregate (47)
+         :  +- Exchange (46)
+         :     +- * HashAggregate (45)
+         :        +- * Project (44)
+         :           +- * BroadcastHashJoin Inner BuildRight (43)
+         :              :- * Project (31)
+         :              :  +- * BroadcastHashJoin Inner BuildRight (30)
+         :              :     :- * Project (24)
+         :              :     :  +- * BroadcastHashJoin Inner BuildRight (23)
+         :              :     :     :- * Project (17)
+         :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (16)
+         :              :     :     :     :- * Project (10)
+         :              :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
+         :              :     :     :     :     :- * Filter (3)
+         :              :     :     :     :     :  +- * ColumnarToRow (2)
+         :              :     :     :     :     :     +- Scan parquet default.store_sales (1)
+         :              :     :     :     :     +- BroadcastExchange (8)
+         :              :     :     :     :        +- * Project (7)
+         :              :     :     :     :           +- * Filter (6)
+         :              :     :     :     :              +- * ColumnarToRow (5)
+         :              :     :     :     :                 +- Scan parquet default.date_dim (4)
+         :              :     :     :     +- BroadcastExchange (15)
+         :              :     :     :        +- * Project (14)
+         :              :     :     :           +- * Filter (13)
+         :              :     :     :              +- * ColumnarToRow (12)
+         :              :     :     :                 +- Scan parquet default.item (11)
+         :              :     :     +- BroadcastExchange (22)
+         :              :     :        +- * Project (21)
+         :              :     :           +- * Filter (20)
+         :              :     :              +- * ColumnarToRow (19)
+         :              :     :                 +- Scan parquet default.promotion (18)
+         :              :     +- BroadcastExchange (29)
+         :              :        +- * Project (28)
+         :              :           +- * Filter (27)
+         :              :              +- * ColumnarToRow (26)
+         :              :                 +- Scan parquet default.store (25)
+         :              +- BroadcastExchange (42)
+         :                 +- * Project (41)
+         :                    +- * BroadcastHashJoin Inner BuildRight (40)
+         :                       :- * Filter (34)
+         :                       :  +- * ColumnarToRow (33)
+         :                       :     +- Scan parquet default.customer (32)
+         :                       +- BroadcastExchange (39)
+         :                          +- * Project (38)
+         :                             +- * Filter (37)
+         :                                +- * ColumnarToRow (36)
+         :                                   +- Scan parquet default.customer_address (35)
+         +- BroadcastExchange (66)
+            +- * HashAggregate (65)
+               +- Exchange (64)
+                  +- * HashAggregate (63)
+                     +- * Project (62)
+                        +- * BroadcastHashJoin Inner BuildRight (61)
+                           :- * Project (59)
+                           :  +- * BroadcastHashJoin Inner BuildRight (58)
+                           :     :- * Project (56)
+                           :     :  +- * BroadcastHashJoin Inner BuildRight (55)
+                           :     :     :- * Project (53)
+                           :     :     :  +- * BroadcastHashJoin Inner BuildRight (52)
+                           :     :     :     :- * Filter (50)
+                           :     :     :     :  +- * ColumnarToRow (49)
+                           :     :     :     :     +- Scan parquet default.store_sales (48)
+                           :     :     :     +- ReusedExchange (51)
+                           :     :     +- ReusedExchange (54)
+                           :     +- ReusedExchange (57)
+                           +- ReusedExchange (60)
 
 
 (1) Scan parquet default.store_sales
@@ -375,7 +376,11 @@ Join condition: None
 Output [3]: [promotions#32, total#37, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#32 as decimal(15,4))) / promote_precision(cast(total#37 as decimal(15,4)))), DecimalType(35,20), true)) * 100.00000000000000000000), DecimalType(38,19), true) AS (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39]
 Input [2]: [promotions#32, total#37]
 
-(69) TakeOrderedAndProject
+(69) Exchange
 Input [3]: [promotions#32, total#37, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39]
-Arguments: 100, [promotions#32 ASC NULLS FIRST, total#37 ASC NULLS FIRST], [promotions#32, total#37, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39]
+Arguments: rangepartitioning(promotions#32 ASC NULLS FIRST, total#37 ASC NULLS FIRST, 5), true, [id=#40]
+
+(70) Sort [codegen id : 17]
+Input [3]: [promotions#32, total#37, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39]
+Arguments: [promotions#32 ASC NULLS FIRST, total#37 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/simplified.txt
index 87f2b3ae03746..1ebad2d825be6 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/simplified.txt
@@ -1,101 +1,104 @@
-TakeOrderedAndProject [promotions,total,(CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))]
-  WholeStageCodegen (16)
-    Project [promotions,total]
-      InputAdapter
-        BroadcastNestedLoopJoin
-          WholeStageCodegen (8)
-            HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),promotions,sum]
-              InputAdapter
-                Exchange #1
-                  WholeStageCodegen (7)
-                    HashAggregate [ss_ext_sales_price] [sum,sum]
-                      Project [ss_ext_sales_price]
-                        BroadcastHashJoin [ss_customer_sk,c_customer_sk]
-                          Project [ss_customer_sk,ss_ext_sales_price]
-                            BroadcastHashJoin [ss_store_sk,s_store_sk]
-                              Project [ss_customer_sk,ss_store_sk,ss_ext_sales_price]
-                                BroadcastHashJoin [ss_promo_sk,p_promo_sk]
-                                  Project [ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price]
-                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                      Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price]
-                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                          Filter [ss_store_sk,ss_promo_sk,ss_sold_date_sk,ss_customer_sk,ss_item_sk]
+WholeStageCodegen (17)
+  Sort [promotions,total]
+    InputAdapter
+      Exchange [promotions,total] #1
+        WholeStageCodegen (16)
+          Project [promotions,total]
+            InputAdapter
+              BroadcastNestedLoopJoin
+                WholeStageCodegen (8)
+                  HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),promotions,sum]
+                    InputAdapter
+                      Exchange #2
+                        WholeStageCodegen (7)
+                          HashAggregate [ss_ext_sales_price] [sum,sum]
+                            Project [ss_ext_sales_price]
+                              BroadcastHashJoin [ss_customer_sk,c_customer_sk]
+                                Project [ss_customer_sk,ss_ext_sales_price]
+                                  BroadcastHashJoin [ss_store_sk,s_store_sk]
+                                    Project [ss_customer_sk,ss_store_sk,ss_ext_sales_price]
+                                      BroadcastHashJoin [ss_promo_sk,p_promo_sk]
+                                        Project [ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price]
+                                          BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                            Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price]
+                                              BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                Filter [ss_store_sk,ss_promo_sk,ss_sold_date_sk,ss_customer_sk,ss_item_sk]
+                                                  ColumnarToRow
+                                                    InputAdapter
+                                                      Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price]
+                                                InputAdapter
+                                                  BroadcastExchange #3
+                                                    WholeStageCodegen (1)
+                                                      Project [d_date_sk]
+                                                        Filter [d_year,d_moy,d_date_sk]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                            InputAdapter
+                                              BroadcastExchange #4
+                                                WholeStageCodegen (2)
+                                                  Project [i_item_sk]
+                                                    Filter [i_category,i_item_sk]
+                                                      ColumnarToRow
+                                                        InputAdapter
+                                                          Scan parquet default.item [i_item_sk,i_category]
+                                        InputAdapter
+                                          BroadcastExchange #5
+                                            WholeStageCodegen (3)
+                                              Project [p_promo_sk]
+                                                Filter [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk]
+                                                  ColumnarToRow
+                                                    InputAdapter
+                                                      Scan parquet default.promotion [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv]
+                                    InputAdapter
+                                      BroadcastExchange #6
+                                        WholeStageCodegen (4)
+                                          Project [s_store_sk]
+                                            Filter [s_gmt_offset,s_store_sk]
+                                              ColumnarToRow
+                                                InputAdapter
+                                                  Scan parquet default.store [s_store_sk,s_gmt_offset]
+                                InputAdapter
+                                  BroadcastExchange #7
+                                    WholeStageCodegen (6)
+                                      Project [c_customer_sk]
+                                        BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
+                                          Filter [c_customer_sk,c_current_addr_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price]
+                                                Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
                                           InputAdapter
-                                            BroadcastExchange #2
-                                              WholeStageCodegen (1)
-                                                Project [d_date_sk]
-                                                  Filter [d_year,d_moy,d_date_sk]
+                                            BroadcastExchange #8
+                                              WholeStageCodegen (5)
+                                                Project [ca_address_sk]
+                                                  Filter [ca_gmt_offset,ca_address_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
-                                      InputAdapter
-                                        BroadcastExchange #3
-                                          WholeStageCodegen (2)
-                                            Project [i_item_sk]
-                                              Filter [i_category,i_item_sk]
+                                                        Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset]
+                BroadcastExchange #9
+                  WholeStageCodegen (15)
+                    HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),total,sum]
+                      InputAdapter
+                        Exchange #10
+                          WholeStageCodegen (14)
+                            HashAggregate [ss_ext_sales_price] [sum,sum]
+                              Project [ss_ext_sales_price]
+                                BroadcastHashJoin [ss_customer_sk,c_customer_sk]
+                                  Project [ss_customer_sk,ss_ext_sales_price]
+                                    BroadcastHashJoin [ss_store_sk,s_store_sk]
+                                      Project [ss_customer_sk,ss_store_sk,ss_ext_sales_price]
+                                        BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                          Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price]
+                                            BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                              Filter [ss_store_sk,ss_sold_date_sk,ss_customer_sk,ss_item_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.item [i_item_sk,i_category]
-                                  InputAdapter
-                                    BroadcastExchange #4
-                                      WholeStageCodegen (3)
-                                        Project [p_promo_sk]
-                                          Filter [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk]
-                                            ColumnarToRow
+                                                    Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price]
                                               InputAdapter
-                                                Scan parquet default.promotion [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv]
-                              InputAdapter
-                                BroadcastExchange #5
-                                  WholeStageCodegen (4)
-                                    Project [s_store_sk]
-                                      Filter [s_gmt_offset,s_store_sk]
-                                        ColumnarToRow
+                                                ReusedExchange [d_date_sk] #3
                                           InputAdapter
-                                            Scan parquet default.store [s_store_sk,s_gmt_offset]
-                          InputAdapter
-                            BroadcastExchange #6
-                              WholeStageCodegen (6)
-                                Project [c_customer_sk]
-                                  BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
-                                    Filter [c_customer_sk,c_current_addr_sk]
-                                      ColumnarToRow
-                                        InputAdapter
-                                          Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
-                                    InputAdapter
-                                      BroadcastExchange #7
-                                        WholeStageCodegen (5)
-                                          Project [ca_address_sk]
-                                            Filter [ca_gmt_offset,ca_address_sk]
-                                              ColumnarToRow
-                                                InputAdapter
-                                                  Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset]
-          BroadcastExchange #8
-            WholeStageCodegen (15)
-              HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),total,sum]
-                InputAdapter
-                  Exchange #9
-                    WholeStageCodegen (14)
-                      HashAggregate [ss_ext_sales_price] [sum,sum]
-                        Project [ss_ext_sales_price]
-                          BroadcastHashJoin [ss_customer_sk,c_customer_sk]
-                            Project [ss_customer_sk,ss_ext_sales_price]
-                              BroadcastHashJoin [ss_store_sk,s_store_sk]
-                                Project [ss_customer_sk,ss_store_sk,ss_ext_sales_price]
-                                  BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                    Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price]
-                                      BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                        Filter [ss_store_sk,ss_sold_date_sk,ss_customer_sk,ss_item_sk]
-                                          ColumnarToRow
-                                            InputAdapter
-                                              Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price]
-                                        InputAdapter
-                                          ReusedExchange [d_date_sk] #2
-                                    InputAdapter
-                                      ReusedExchange [i_item_sk] #3
-                                InputAdapter
-                                  ReusedExchange [s_store_sk] #5
-                            InputAdapter
-                              ReusedExchange [c_customer_sk] #6
+                                            ReusedExchange [i_item_sk] #4
+                                      InputAdapter
+                                        ReusedExchange [s_store_sk] #6
+                                  InputAdapter
+                                    ReusedExchange [c_customer_sk] #7
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt
index f56f48726c4ad..8025461181031 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt
@@ -1,76 +1,77 @@
 == Physical Plan ==
-TakeOrderedAndProject (72)
-+- * Project (71)
-   +- BroadcastNestedLoopJoin Inner BuildRight (70)
-      :- * HashAggregate (47)
-      :  +- Exchange (46)
-      :     +- * HashAggregate (45)
-      :        +- * Project (44)
-      :           +- * BroadcastHashJoin Inner BuildRight (43)
-      :              :- * Project (37)
-      :              :  +- * BroadcastHashJoin Inner BuildRight (36)
-      :              :     :- * Project (30)
-      :              :     :  +- * BroadcastHashJoin Inner BuildRight (29)
-      :              :     :     :- * Project (24)
-      :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (23)
-      :              :     :     :     :- * Project (17)
-      :              :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (16)
-      :              :     :     :     :     :- * Project (10)
-      :              :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
-      :              :     :     :     :     :     :- * Filter (3)
-      :              :     :     :     :     :     :  +- * ColumnarToRow (2)
-      :              :     :     :     :     :     :     +- Scan parquet default.store_sales (1)
-      :              :     :     :     :     :     +- BroadcastExchange (8)
-      :              :     :     :     :     :        +- * Project (7)
-      :              :     :     :     :     :           +- * Filter (6)
-      :              :     :     :     :     :              +- * ColumnarToRow (5)
-      :              :     :     :     :     :                 +- Scan parquet default.store (4)
-      :              :     :     :     :     +- BroadcastExchange (15)
-      :              :     :     :     :        +- * Project (14)
-      :              :     :     :     :           +- * Filter (13)
-      :              :     :     :     :              +- * ColumnarToRow (12)
-      :              :     :     :     :                 +- Scan parquet default.promotion (11)
-      :              :     :     :     +- BroadcastExchange (22)
-      :              :     :     :        +- * Project (21)
-      :              :     :     :           +- * Filter (20)
-      :              :     :     :              +- * ColumnarToRow (19)
-      :              :     :     :                 +- Scan parquet default.date_dim (18)
-      :              :     :     +- BroadcastExchange (28)
-      :              :     :        +- * Filter (27)
-      :              :     :           +- * ColumnarToRow (26)
-      :              :     :              +- Scan parquet default.customer (25)
-      :              :     +- BroadcastExchange (35)
-      :              :        +- * Project (34)
-      :              :           +- * Filter (33)
-      :              :              +- * ColumnarToRow (32)
-      :              :                 +- Scan parquet default.customer_address (31)
-      :              +- BroadcastExchange (42)
-      :                 +- * Project (41)
-      :                    +- * Filter (40)
-      :                       +- * ColumnarToRow (39)
-      :                          +- Scan parquet default.item (38)
-      +- BroadcastExchange (69)
-         +- * HashAggregate (68)
-            +- Exchange (67)
-               +- * HashAggregate (66)
-                  +- * Project (65)
-                     +- * BroadcastHashJoin Inner BuildRight (64)
-                        :- * Project (62)
-                        :  +- * BroadcastHashJoin Inner BuildRight (61)
-                        :     :- * Project (59)
-                        :     :  +- * BroadcastHashJoin Inner BuildRight (58)
-                        :     :     :- * Project (56)
-                        :     :     :  +- * BroadcastHashJoin Inner BuildRight (55)
-                        :     :     :     :- * Project (53)
-                        :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (52)
-                        :     :     :     :     :- * Filter (50)
-                        :     :     :     :     :  +- * ColumnarToRow (49)
-                        :     :     :     :     :     +- Scan parquet default.store_sales (48)
-                        :     :     :     :     +- ReusedExchange (51)
-                        :     :     :     +- ReusedExchange (54)
-                        :     :     +- ReusedExchange (57)
-                        :     +- ReusedExchange (60)
-                        +- ReusedExchange (63)
+* Sort (73)
++- Exchange (72)
+   +- * Project (71)
+      +- BroadcastNestedLoopJoin Inner BuildRight (70)
+         :- * HashAggregate (47)
+         :  +- Exchange (46)
+         :     +- * HashAggregate (45)
+         :        +- * Project (44)
+         :           +- * BroadcastHashJoin Inner BuildRight (43)
+         :              :- * Project (37)
+         :              :  +- * BroadcastHashJoin Inner BuildRight (36)
+         :              :     :- * Project (30)
+         :              :     :  +- * BroadcastHashJoin Inner BuildRight (29)
+         :              :     :     :- * Project (24)
+         :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (23)
+         :              :     :     :     :- * Project (17)
+         :              :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (16)
+         :              :     :     :     :     :- * Project (10)
+         :              :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
+         :              :     :     :     :     :     :- * Filter (3)
+         :              :     :     :     :     :     :  +- * ColumnarToRow (2)
+         :              :     :     :     :     :     :     +- Scan parquet default.store_sales (1)
+         :              :     :     :     :     :     +- BroadcastExchange (8)
+         :              :     :     :     :     :        +- * Project (7)
+         :              :     :     :     :     :           +- * Filter (6)
+         :              :     :     :     :     :              +- * ColumnarToRow (5)
+         :              :     :     :     :     :                 +- Scan parquet default.store (4)
+         :              :     :     :     :     +- BroadcastExchange (15)
+         :              :     :     :     :        +- * Project (14)
+         :              :     :     :     :           +- * Filter (13)
+         :              :     :     :     :              +- * ColumnarToRow (12)
+         :              :     :     :     :                 +- Scan parquet default.promotion (11)
+         :              :     :     :     +- BroadcastExchange (22)
+         :              :     :     :        +- * Project (21)
+         :              :     :     :           +- * Filter (20)
+         :              :     :     :              +- * ColumnarToRow (19)
+         :              :     :     :                 +- Scan parquet default.date_dim (18)
+         :              :     :     +- BroadcastExchange (28)
+         :              :     :        +- * Filter (27)
+         :              :     :           +- * ColumnarToRow (26)
+         :              :     :              +- Scan parquet default.customer (25)
+         :              :     +- BroadcastExchange (35)
+         :              :        +- * Project (34)
+         :              :           +- * Filter (33)
+         :              :              +- * ColumnarToRow (32)
+         :              :                 +- Scan parquet default.customer_address (31)
+         :              +- BroadcastExchange (42)
+         :                 +- * Project (41)
+         :                    +- * Filter (40)
+         :                       +- * ColumnarToRow (39)
+         :                          +- Scan parquet default.item (38)
+         +- BroadcastExchange (69)
+            +- * HashAggregate (68)
+               +- Exchange (67)
+                  +- * HashAggregate (66)
+                     +- * Project (65)
+                        +- * BroadcastHashJoin Inner BuildRight (64)
+                           :- * Project (62)
+                           :  +- * BroadcastHashJoin Inner BuildRight (61)
+                           :     :- * Project (59)
+                           :     :  +- * BroadcastHashJoin Inner BuildRight (58)
+                           :     :     :- * Project (56)
+                           :     :     :  +- * BroadcastHashJoin Inner BuildRight (55)
+                           :     :     :     :- * Project (53)
+                           :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (52)
+                           :     :     :     :     :- * Filter (50)
+                           :     :     :     :     :  +- * ColumnarToRow (49)
+                           :     :     :     :     :     +- Scan parquet default.store_sales (48)
+                           :     :     :     :     +- ReusedExchange (51)
+                           :     :     :     +- ReusedExchange (54)
+                           :     :     +- ReusedExchange (57)
+                           :     +- ReusedExchange (60)
+                           +- ReusedExchange (63)
 
 
 (1) Scan parquet default.store_sales
@@ -390,7 +391,11 @@ Join condition: None
 Output [3]: [promotions#32, total#37, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#32 as decimal(15,4))) / promote_precision(cast(total#37 as decimal(15,4)))), DecimalType(35,20), true)) * 100.00000000000000000000), DecimalType(38,19), true) AS (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39]
 Input [2]: [promotions#32, total#37]
 
-(72) TakeOrderedAndProject
+(72) Exchange
 Input [3]: [promotions#32, total#37, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39]
-Arguments: 100, [promotions#32 ASC NULLS FIRST, total#37 ASC NULLS FIRST], [promotions#32, total#37, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39]
+Arguments: rangepartitioning(promotions#32 ASC NULLS FIRST, total#37 ASC NULLS FIRST, 5), true, [id=#40]
+
+(73) Sort [codegen id : 17]
+Input [3]: [promotions#32, total#37, (CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))#39]
+Arguments: [promotions#32 ASC NULLS FIRST, total#37 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt
index da75651673cfe..3b476544403e0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt
@@ -1,105 +1,108 @@
-TakeOrderedAndProject [promotions,total,(CAST((CAST(CAST(promotions AS DECIMAL(15,4)) AS DECIMAL(15,4)) / CAST(CAST(total AS DECIMAL(15,4)) AS DECIMAL(15,4))) AS DECIMAL(35,20)) * CAST(CAST(100 AS DECIMAL(3,0)) AS DECIMAL(35,20)))]
-  WholeStageCodegen (16)
-    Project [promotions,total]
-      InputAdapter
-        BroadcastNestedLoopJoin
-          WholeStageCodegen (8)
-            HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),promotions,sum]
-              InputAdapter
-                Exchange #1
-                  WholeStageCodegen (7)
-                    HashAggregate [ss_ext_sales_price] [sum,sum]
-                      Project [ss_ext_sales_price]
-                        BroadcastHashJoin [ss_item_sk,i_item_sk]
-                          Project [ss_item_sk,ss_ext_sales_price]
-                            BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
-                              Project [ss_item_sk,ss_ext_sales_price,c_current_addr_sk]
-                                BroadcastHashJoin [ss_customer_sk,c_customer_sk]
-                                  Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price]
-                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                      Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_ext_sales_price]
-                                        BroadcastHashJoin [ss_promo_sk,p_promo_sk]
-                                          Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_promo_sk,ss_ext_sales_price]
-                                            BroadcastHashJoin [ss_store_sk,s_store_sk]
-                                              Filter [ss_store_sk,ss_promo_sk,ss_sold_date_sk,ss_customer_sk,ss_item_sk]
+WholeStageCodegen (17)
+  Sort [promotions,total]
+    InputAdapter
+      Exchange [promotions,total] #1
+        WholeStageCodegen (16)
+          Project [promotions,total]
+            InputAdapter
+              BroadcastNestedLoopJoin
+                WholeStageCodegen (8)
+                  HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),promotions,sum]
+                    InputAdapter
+                      Exchange #2
+                        WholeStageCodegen (7)
+                          HashAggregate [ss_ext_sales_price] [sum,sum]
+                            Project [ss_ext_sales_price]
+                              BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                Project [ss_item_sk,ss_ext_sales_price]
+                                  BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
+                                    Project [ss_item_sk,ss_ext_sales_price,c_current_addr_sk]
+                                      BroadcastHashJoin [ss_customer_sk,c_customer_sk]
+                                        Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price]
+                                          BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                            Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_ext_sales_price]
+                                              BroadcastHashJoin [ss_promo_sk,p_promo_sk]
+                                                Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_promo_sk,ss_ext_sales_price]
+                                                  BroadcastHashJoin [ss_store_sk,s_store_sk]
+                                                    Filter [ss_store_sk,ss_promo_sk,ss_sold_date_sk,ss_customer_sk,ss_item_sk]
+                                                      ColumnarToRow
+                                                        InputAdapter
+                                                          Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price]
+                                                    InputAdapter
+                                                      BroadcastExchange #3
+                                                        WholeStageCodegen (1)
+                                                          Project [s_store_sk]
+                                                            Filter [s_gmt_offset,s_store_sk]
+                                                              ColumnarToRow
+                                                                InputAdapter
+                                                                  Scan parquet default.store [s_store_sk,s_gmt_offset]
+                                                InputAdapter
+                                                  BroadcastExchange #4
+                                                    WholeStageCodegen (2)
+                                                      Project [p_promo_sk]
+                                                        Filter [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet default.promotion [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv]
+                                            InputAdapter
+                                              BroadcastExchange #5
+                                                WholeStageCodegen (3)
+                                                  Project [d_date_sk]
+                                                    Filter [d_year,d_moy,d_date_sk]
+                                                      ColumnarToRow
+                                                        InputAdapter
+                                                          Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                        InputAdapter
+                                          BroadcastExchange #6
+                                            WholeStageCodegen (4)
+                                              Filter [c_customer_sk,c_current_addr_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price]
-                                              InputAdapter
-                                                BroadcastExchange #2
-                                                  WholeStageCodegen (1)
-                                                    Project [s_store_sk]
-                                                      Filter [s_gmt_offset,s_store_sk]
-                                                        ColumnarToRow
-                                                          InputAdapter
-                                                            Scan parquet default.store [s_store_sk,s_gmt_offset]
-                                          InputAdapter
-                                            BroadcastExchange #3
-                                              WholeStageCodegen (2)
-                                                Project [p_promo_sk]
-                                                  Filter [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk]
+                                                    Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
+                                    InputAdapter
+                                      BroadcastExchange #7
+                                        WholeStageCodegen (5)
+                                          Project [ca_address_sk]
+                                            Filter [ca_gmt_offset,ca_address_sk]
+                                              ColumnarToRow
+                                                InputAdapter
+                                                  Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset]
+                                InputAdapter
+                                  BroadcastExchange #8
+                                    WholeStageCodegen (6)
+                                      Project [i_item_sk]
+                                        Filter [i_category,i_item_sk]
+                                          ColumnarToRow
+                                            InputAdapter
+                                              Scan parquet default.item [i_item_sk,i_category]
+                BroadcastExchange #9
+                  WholeStageCodegen (15)
+                    HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),total,sum]
+                      InputAdapter
+                        Exchange #10
+                          WholeStageCodegen (14)
+                            HashAggregate [ss_ext_sales_price] [sum,sum]
+                              Project [ss_ext_sales_price]
+                                BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                  Project [ss_item_sk,ss_ext_sales_price]
+                                    BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
+                                      Project [ss_item_sk,ss_ext_sales_price,c_current_addr_sk]
+                                        BroadcastHashJoin [ss_customer_sk,c_customer_sk]
+                                          Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price]
+                                            BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                              Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_ext_sales_price]
+                                                BroadcastHashJoin [ss_store_sk,s_store_sk]
+                                                  Filter [ss_store_sk,ss_sold_date_sk,ss_customer_sk,ss_item_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.promotion [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv]
-                                      InputAdapter
-                                        BroadcastExchange #4
-                                          WholeStageCodegen (3)
-                                            Project [d_date_sk]
-                                              Filter [d_year,d_moy,d_date_sk]
-                                                ColumnarToRow
+                                                        Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price]
                                                   InputAdapter
-                                                    Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
-                                  InputAdapter
-                                    BroadcastExchange #5
-                                      WholeStageCodegen (4)
-                                        Filter [c_customer_sk,c_current_addr_sk]
-                                          ColumnarToRow
-                                            InputAdapter
-                                              Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
-                              InputAdapter
-                                BroadcastExchange #6
-                                  WholeStageCodegen (5)
-                                    Project [ca_address_sk]
-                                      Filter [ca_gmt_offset,ca_address_sk]
-                                        ColumnarToRow
+                                                    ReusedExchange [s_store_sk] #3
+                                              InputAdapter
+                                                ReusedExchange [d_date_sk] #5
                                           InputAdapter
-                                            Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset]
-                          InputAdapter
-                            BroadcastExchange #7
-                              WholeStageCodegen (6)
-                                Project [i_item_sk]
-                                  Filter [i_category,i_item_sk]
-                                    ColumnarToRow
+                                            ReusedExchange [c_customer_sk,c_current_addr_sk] #6
                                       InputAdapter
-                                        Scan parquet default.item [i_item_sk,i_category]
-          BroadcastExchange #8
-            WholeStageCodegen (15)
-              HashAggregate [sum] [sum(UnscaledValue(ss_ext_sales_price)),total,sum]
-                InputAdapter
-                  Exchange #9
-                    WholeStageCodegen (14)
-                      HashAggregate [ss_ext_sales_price] [sum,sum]
-                        Project [ss_ext_sales_price]
-                          BroadcastHashJoin [ss_item_sk,i_item_sk]
-                            Project [ss_item_sk,ss_ext_sales_price]
-                              BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
-                                Project [ss_item_sk,ss_ext_sales_price,c_current_addr_sk]
-                                  BroadcastHashJoin [ss_customer_sk,c_customer_sk]
-                                    Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price]
-                                      BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                        Project [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_ext_sales_price]
-                                          BroadcastHashJoin [ss_store_sk,s_store_sk]
-                                            Filter [ss_store_sk,ss_sold_date_sk,ss_customer_sk,ss_item_sk]
-                                              ColumnarToRow
-                                                InputAdapter
-                                                  Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price]
-                                            InputAdapter
-                                              ReusedExchange [s_store_sk] #2
-                                        InputAdapter
-                                          ReusedExchange [d_date_sk] #4
-                                    InputAdapter
-                                      ReusedExchange [c_customer_sk,c_current_addr_sk] #5
-                                InputAdapter
-                                  ReusedExchange [ca_address_sk] #6
-                            InputAdapter
-                              ReusedExchange [i_item_sk] #7
+                                        ReusedExchange [ca_address_sk] #7
+                                  InputAdapter
+                                    ReusedExchange [i_item_sk] #8
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/explain.txt
index 3f787bfb99b67..e279902a125c5 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/explain.txt
@@ -1,54 +1,55 @@
 == Physical Plan ==
-TakeOrderedAndProject (50)
-+- * Project (49)
-   +- BroadcastNestedLoopJoin Inner BuildRight (48)
-      :- * HashAggregate (27)
-      :  +- Exchange (26)
-      :     +- * HashAggregate (25)
-      :        +- * Project (24)
-      :           +- * BroadcastHashJoin Inner BuildRight (23)
-      :              :- * Project (17)
-      :              :  +- * BroadcastHashJoin Inner BuildRight (16)
-      :              :     :- * Project (10)
-      :              :     :  +- * BroadcastHashJoin Inner BuildRight (9)
-      :              :     :     :- * Filter (3)
-      :              :     :     :  +- * ColumnarToRow (2)
-      :              :     :     :     +- Scan parquet default.web_sales (1)
-      :              :     :     +- BroadcastExchange (8)
-      :              :     :        +- * Project (7)
-      :              :     :           +- * Filter (6)
-      :              :     :              +- * ColumnarToRow (5)
-      :              :     :                 +- Scan parquet default.web_page (4)
-      :              :     +- BroadcastExchange (15)
-      :              :        +- * Project (14)
-      :              :           +- * Filter (13)
-      :              :              +- * ColumnarToRow (12)
-      :              :                 +- Scan parquet default.household_demographics (11)
-      :              +- BroadcastExchange (22)
-      :                 +- * Project (21)
-      :                    +- * Filter (20)
-      :                       +- * ColumnarToRow (19)
-      :                          +- Scan parquet default.time_dim (18)
-      +- BroadcastExchange (47)
-         +- * HashAggregate (46)
-            +- Exchange (45)
-               +- * HashAggregate (44)
-                  +- * Project (43)
-                     +- * BroadcastHashJoin Inner BuildRight (42)
-                        :- * Project (36)
-                        :  +- * BroadcastHashJoin Inner BuildRight (35)
-                        :     :- * Project (33)
-                        :     :  +- * BroadcastHashJoin Inner BuildRight (32)
-                        :     :     :- * Filter (30)
-                        :     :     :  +- * ColumnarToRow (29)
-                        :     :     :     +- Scan parquet default.web_sales (28)
-                        :     :     +- ReusedExchange (31)
-                        :     +- ReusedExchange (34)
-                        +- BroadcastExchange (41)
-                           +- * Project (40)
-                              +- * Filter (39)
-                                 +- * ColumnarToRow (38)
-                                    +- Scan parquet default.time_dim (37)
+* Sort (51)
++- Exchange (50)
+   +- * Project (49)
+      +- BroadcastNestedLoopJoin Inner BuildRight (48)
+         :- * HashAggregate (27)
+         :  +- Exchange (26)
+         :     +- * HashAggregate (25)
+         :        +- * Project (24)
+         :           +- * BroadcastHashJoin Inner BuildRight (23)
+         :              :- * Project (17)
+         :              :  +- * BroadcastHashJoin Inner BuildRight (16)
+         :              :     :- * Project (10)
+         :              :     :  +- * BroadcastHashJoin Inner BuildRight (9)
+         :              :     :     :- * Filter (3)
+         :              :     :     :  +- * ColumnarToRow (2)
+         :              :     :     :     +- Scan parquet default.web_sales (1)
+         :              :     :     +- BroadcastExchange (8)
+         :              :     :        +- * Project (7)
+         :              :     :           +- * Filter (6)
+         :              :     :              +- * ColumnarToRow (5)
+         :              :     :                 +- Scan parquet default.web_page (4)
+         :              :     +- BroadcastExchange (15)
+         :              :        +- * Project (14)
+         :              :           +- * Filter (13)
+         :              :              +- * ColumnarToRow (12)
+         :              :                 +- Scan parquet default.household_demographics (11)
+         :              +- BroadcastExchange (22)
+         :                 +- * Project (21)
+         :                    +- * Filter (20)
+         :                       +- * ColumnarToRow (19)
+         :                          +- Scan parquet default.time_dim (18)
+         +- BroadcastExchange (47)
+            +- * HashAggregate (46)
+               +- Exchange (45)
+                  +- * HashAggregate (44)
+                     +- * Project (43)
+                        +- * BroadcastHashJoin Inner BuildRight (42)
+                           :- * Project (36)
+                           :  +- * BroadcastHashJoin Inner BuildRight (35)
+                           :     :- * Project (33)
+                           :     :  +- * BroadcastHashJoin Inner BuildRight (32)
+                           :     :     :- * Filter (30)
+                           :     :     :  +- * ColumnarToRow (29)
+                           :     :     :     +- Scan parquet default.web_sales (28)
+                           :     :     +- ReusedExchange (31)
+                           :     +- ReusedExchange (34)
+                           +- BroadcastExchange (41)
+                              +- * Project (40)
+                                 +- * Filter (39)
+                                    +- * ColumnarToRow (38)
+                                       +- Scan parquet default.time_dim (37)
 
 
 (1) Scan parquet default.web_sales
@@ -274,7 +275,11 @@ Join condition: None
 Output [1]: [CheckOverflow((promote_precision(cast(amc#17 as decimal(15,4))) / promote_precision(cast(pmc#23 as decimal(15,4)))), DecimalType(35,20), true) AS am_pm_ratio#25]
 Input [2]: [amc#17, pmc#23]
 
-(50) TakeOrderedAndProject
+(50) Exchange
 Input [1]: [am_pm_ratio#25]
-Arguments: 100, [am_pm_ratio#25 ASC NULLS FIRST], [am_pm_ratio#25]
+Arguments: rangepartitioning(am_pm_ratio#25 ASC NULLS FIRST, 5), true, [id=#26]
+
+(51) Sort [codegen id : 12]
+Input [1]: [am_pm_ratio#25]
+Arguments: [am_pm_ratio#25 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/simplified.txt
index 1fe0442eab13f..5b33a90675699 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/simplified.txt
@@ -1,74 +1,77 @@
-TakeOrderedAndProject [am_pm_ratio]
-  WholeStageCodegen (11)
-    Project [amc,pmc]
-      InputAdapter
-        BroadcastNestedLoopJoin
-          WholeStageCodegen (5)
-            HashAggregate [count] [count(1),amc,count]
-              InputAdapter
-                Exchange #1
-                  WholeStageCodegen (4)
-                    HashAggregate [count,count]
-                      Project
-                        BroadcastHashJoin [ws_sold_time_sk,t_time_sk]
-                          Project [ws_sold_time_sk]
-                            BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk]
-                              Project [ws_sold_time_sk,ws_ship_hdemo_sk]
-                                BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
-                                  Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
-                                    ColumnarToRow
-                                      InputAdapter
-                                        Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk]
-                                  InputAdapter
-                                    BroadcastExchange #2
-                                      WholeStageCodegen (1)
-                                        Project [wp_web_page_sk]
-                                          Filter [wp_char_count,wp_web_page_sk]
+WholeStageCodegen (12)
+  Sort [am_pm_ratio]
+    InputAdapter
+      Exchange [am_pm_ratio] #1
+        WholeStageCodegen (11)
+          Project [amc,pmc]
+            InputAdapter
+              BroadcastNestedLoopJoin
+                WholeStageCodegen (5)
+                  HashAggregate [count] [count(1),amc,count]
+                    InputAdapter
+                      Exchange #2
+                        WholeStageCodegen (4)
+                          HashAggregate [count,count]
+                            Project
+                              BroadcastHashJoin [ws_sold_time_sk,t_time_sk]
+                                Project [ws_sold_time_sk]
+                                  BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk]
+                                    Project [ws_sold_time_sk,ws_ship_hdemo_sk]
+                                      BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
+                                        Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
+                                          ColumnarToRow
+                                            InputAdapter
+                                              Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk]
+                                        InputAdapter
+                                          BroadcastExchange #3
+                                            WholeStageCodegen (1)
+                                              Project [wp_web_page_sk]
+                                                Filter [wp_char_count,wp_web_page_sk]
+                                                  ColumnarToRow
+                                                    InputAdapter
+                                                      Scan parquet default.web_page [wp_web_page_sk,wp_char_count]
+                                    InputAdapter
+                                      BroadcastExchange #4
+                                        WholeStageCodegen (2)
+                                          Project [hd_demo_sk]
+                                            Filter [hd_dep_count,hd_demo_sk]
+                                              ColumnarToRow
+                                                InputAdapter
+                                                  Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count]
+                                InputAdapter
+                                  BroadcastExchange #5
+                                    WholeStageCodegen (3)
+                                      Project [t_time_sk]
+                                        Filter [t_hour,t_time_sk]
+                                          ColumnarToRow
+                                            InputAdapter
+                                              Scan parquet default.time_dim [t_time_sk,t_hour]
+                BroadcastExchange #6
+                  WholeStageCodegen (10)
+                    HashAggregate [count] [count(1),pmc,count]
+                      InputAdapter
+                        Exchange #7
+                          WholeStageCodegen (9)
+                            HashAggregate [count,count]
+                              Project
+                                BroadcastHashJoin [ws_sold_time_sk,t_time_sk]
+                                  Project [ws_sold_time_sk]
+                                    BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk]
+                                      Project [ws_sold_time_sk,ws_ship_hdemo_sk]
+                                        BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
+                                          Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.web_page [wp_web_page_sk,wp_char_count]
-                              InputAdapter
-                                BroadcastExchange #3
-                                  WholeStageCodegen (2)
-                                    Project [hd_demo_sk]
-                                      Filter [hd_dep_count,hd_demo_sk]
-                                        ColumnarToRow
+                                                Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk]
                                           InputAdapter
-                                            Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count]
-                          InputAdapter
-                            BroadcastExchange #4
-                              WholeStageCodegen (3)
-                                Project [t_time_sk]
-                                  Filter [t_hour,t_time_sk]
-                                    ColumnarToRow
+                                            ReusedExchange [wp_web_page_sk] #3
                                       InputAdapter
-                                        Scan parquet default.time_dim [t_time_sk,t_hour]
-          BroadcastExchange #5
-            WholeStageCodegen (10)
-              HashAggregate [count] [count(1),pmc,count]
-                InputAdapter
-                  Exchange #6
-                    WholeStageCodegen (9)
-                      HashAggregate [count,count]
-                        Project
-                          BroadcastHashJoin [ws_sold_time_sk,t_time_sk]
-                            Project [ws_sold_time_sk]
-                              BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk]
-                                Project [ws_sold_time_sk,ws_ship_hdemo_sk]
-                                  BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
-                                    Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
-                                      ColumnarToRow
-                                        InputAdapter
-                                          Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk]
-                                    InputAdapter
-                                      ReusedExchange [wp_web_page_sk] #2
-                                InputAdapter
-                                  ReusedExchange [hd_demo_sk] #3
-                            InputAdapter
-                              BroadcastExchange #7
-                                WholeStageCodegen (8)
-                                  Project [t_time_sk]
-                                    Filter [t_hour,t_time_sk]
-                                      ColumnarToRow
-                                        InputAdapter
-                                          Scan parquet default.time_dim [t_time_sk,t_hour]
+                                        ReusedExchange [hd_demo_sk] #4
+                                  InputAdapter
+                                    BroadcastExchange #8
+                                      WholeStageCodegen (8)
+                                        Project [t_time_sk]
+                                          Filter [t_hour,t_time_sk]
+                                            ColumnarToRow
+                                              InputAdapter
+                                                Scan parquet default.time_dim [t_time_sk,t_hour]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt
index 550bf89ce3b99..7a21808803aaa 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt
@@ -1,54 +1,55 @@
 == Physical Plan ==
-TakeOrderedAndProject (50)
-+- * Project (49)
-   +- BroadcastNestedLoopJoin Inner BuildRight (48)
-      :- * HashAggregate (27)
-      :  +- Exchange (26)
-      :     +- * HashAggregate (25)
-      :        +- * Project (24)
-      :           +- * BroadcastHashJoin Inner BuildRight (23)
-      :              :- * Project (17)
-      :              :  +- * BroadcastHashJoin Inner BuildRight (16)
-      :              :     :- * Project (10)
-      :              :     :  +- * BroadcastHashJoin Inner BuildRight (9)
-      :              :     :     :- * Filter (3)
-      :              :     :     :  +- * ColumnarToRow (2)
-      :              :     :     :     +- Scan parquet default.web_sales (1)
-      :              :     :     +- BroadcastExchange (8)
-      :              :     :        +- * Project (7)
-      :              :     :           +- * Filter (6)
-      :              :     :              +- * ColumnarToRow (5)
-      :              :     :                 +- Scan parquet default.household_demographics (4)
-      :              :     +- BroadcastExchange (15)
-      :              :        +- * Project (14)
-      :              :           +- * Filter (13)
-      :              :              +- * ColumnarToRow (12)
-      :              :                 +- Scan parquet default.time_dim (11)
-      :              +- BroadcastExchange (22)
-      :                 +- * Project (21)
-      :                    +- * Filter (20)
-      :                       +- * ColumnarToRow (19)
-      :                          +- Scan parquet default.web_page (18)
-      +- BroadcastExchange (47)
-         +- * HashAggregate (46)
-            +- Exchange (45)
-               +- * HashAggregate (44)
-                  +- * Project (43)
-                     +- * BroadcastHashJoin Inner BuildRight (42)
-                        :- * Project (40)
-                        :  +- * BroadcastHashJoin Inner BuildRight (39)
-                        :     :- * Project (33)
-                        :     :  +- * BroadcastHashJoin Inner BuildRight (32)
-                        :     :     :- * Filter (30)
-                        :     :     :  +- * ColumnarToRow (29)
-                        :     :     :     +- Scan parquet default.web_sales (28)
-                        :     :     +- ReusedExchange (31)
-                        :     +- BroadcastExchange (38)
-                        :        +- * Project (37)
-                        :           +- * Filter (36)
-                        :              +- * ColumnarToRow (35)
-                        :                 +- Scan parquet default.time_dim (34)
-                        +- ReusedExchange (41)
+* Sort (51)
++- Exchange (50)
+   +- * Project (49)
+      +- BroadcastNestedLoopJoin Inner BuildRight (48)
+         :- * HashAggregate (27)
+         :  +- Exchange (26)
+         :     +- * HashAggregate (25)
+         :        +- * Project (24)
+         :           +- * BroadcastHashJoin Inner BuildRight (23)
+         :              :- * Project (17)
+         :              :  +- * BroadcastHashJoin Inner BuildRight (16)
+         :              :     :- * Project (10)
+         :              :     :  +- * BroadcastHashJoin Inner BuildRight (9)
+         :              :     :     :- * Filter (3)
+         :              :     :     :  +- * ColumnarToRow (2)
+         :              :     :     :     +- Scan parquet default.web_sales (1)
+         :              :     :     +- BroadcastExchange (8)
+         :              :     :        +- * Project (7)
+         :              :     :           +- * Filter (6)
+         :              :     :              +- * ColumnarToRow (5)
+         :              :     :                 +- Scan parquet default.household_demographics (4)
+         :              :     +- BroadcastExchange (15)
+         :              :        +- * Project (14)
+         :              :           +- * Filter (13)
+         :              :              +- * ColumnarToRow (12)
+         :              :                 +- Scan parquet default.time_dim (11)
+         :              +- BroadcastExchange (22)
+         :                 +- * Project (21)
+         :                    +- * Filter (20)
+         :                       +- * ColumnarToRow (19)
+         :                          +- Scan parquet default.web_page (18)
+         +- BroadcastExchange (47)
+            +- * HashAggregate (46)
+               +- Exchange (45)
+                  +- * HashAggregate (44)
+                     +- * Project (43)
+                        +- * BroadcastHashJoin Inner BuildRight (42)
+                           :- * Project (40)
+                           :  +- * BroadcastHashJoin Inner BuildRight (39)
+                           :     :- * Project (33)
+                           :     :  +- * BroadcastHashJoin Inner BuildRight (32)
+                           :     :     :- * Filter (30)
+                           :     :     :  +- * ColumnarToRow (29)
+                           :     :     :     +- Scan parquet default.web_sales (28)
+                           :     :     +- ReusedExchange (31)
+                           :     +- BroadcastExchange (38)
+                           :        +- * Project (37)
+                           :           +- * Filter (36)
+                           :              +- * ColumnarToRow (35)
+                           :                 +- Scan parquet default.time_dim (34)
+                           +- ReusedExchange (41)
 
 
 (1) Scan parquet default.web_sales
@@ -274,7 +275,11 @@ Join condition: None
 Output [1]: [CheckOverflow((promote_precision(cast(amc#17 as decimal(15,4))) / promote_precision(cast(pmc#23 as decimal(15,4)))), DecimalType(35,20), true) AS am_pm_ratio#25]
 Input [2]: [amc#17, pmc#23]
 
-(50) TakeOrderedAndProject
+(50) Exchange
 Input [1]: [am_pm_ratio#25]
-Arguments: 100, [am_pm_ratio#25 ASC NULLS FIRST], [am_pm_ratio#25]
+Arguments: rangepartitioning(am_pm_ratio#25 ASC NULLS FIRST, 5), true, [id=#26]
+
+(51) Sort [codegen id : 12]
+Input [1]: [am_pm_ratio#25]
+Arguments: [am_pm_ratio#25 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt
index 121d84d9dde2f..bf3cfc9cbc037 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt
@@ -1,74 +1,77 @@
-TakeOrderedAndProject [am_pm_ratio]
-  WholeStageCodegen (11)
-    Project [amc,pmc]
-      InputAdapter
-        BroadcastNestedLoopJoin
-          WholeStageCodegen (5)
-            HashAggregate [count] [count(1),amc,count]
-              InputAdapter
-                Exchange #1
-                  WholeStageCodegen (4)
-                    HashAggregate [count,count]
-                      Project
-                        BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
-                          Project [ws_web_page_sk]
-                            BroadcastHashJoin [ws_sold_time_sk,t_time_sk]
-                              Project [ws_sold_time_sk,ws_web_page_sk]
-                                BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk]
-                                  Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
-                                    ColumnarToRow
-                                      InputAdapter
-                                        Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk]
-                                  InputAdapter
-                                    BroadcastExchange #2
-                                      WholeStageCodegen (1)
-                                        Project [hd_demo_sk]
-                                          Filter [hd_dep_count,hd_demo_sk]
-                                            ColumnarToRow
-                                              InputAdapter
-                                                Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count]
-                              InputAdapter
-                                BroadcastExchange #3
-                                  WholeStageCodegen (2)
-                                    Project [t_time_sk]
-                                      Filter [t_hour,t_time_sk]
-                                        ColumnarToRow
-                                          InputAdapter
-                                            Scan parquet default.time_dim [t_time_sk,t_hour]
-                          InputAdapter
-                            BroadcastExchange #4
-                              WholeStageCodegen (3)
-                                Project [wp_web_page_sk]
-                                  Filter [wp_char_count,wp_web_page_sk]
-                                    ColumnarToRow
-                                      InputAdapter
-                                        Scan parquet default.web_page [wp_web_page_sk,wp_char_count]
-          BroadcastExchange #5
-            WholeStageCodegen (10)
-              HashAggregate [count] [count(1),pmc,count]
-                InputAdapter
-                  Exchange #6
-                    WholeStageCodegen (9)
-                      HashAggregate [count,count]
-                        Project
-                          BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
-                            Project [ws_web_page_sk]
-                              BroadcastHashJoin [ws_sold_time_sk,t_time_sk]
-                                Project [ws_sold_time_sk,ws_web_page_sk]
-                                  BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk]
-                                    Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
-                                      ColumnarToRow
+WholeStageCodegen (12)
+  Sort [am_pm_ratio]
+    InputAdapter
+      Exchange [am_pm_ratio] #1
+        WholeStageCodegen (11)
+          Project [amc,pmc]
+            InputAdapter
+              BroadcastNestedLoopJoin
+                WholeStageCodegen (5)
+                  HashAggregate [count] [count(1),amc,count]
+                    InputAdapter
+                      Exchange #2
+                        WholeStageCodegen (4)
+                          HashAggregate [count,count]
+                            Project
+                              BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
+                                Project [ws_web_page_sk]
+                                  BroadcastHashJoin [ws_sold_time_sk,t_time_sk]
+                                    Project [ws_sold_time_sk,ws_web_page_sk]
+                                      BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk]
+                                        Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
+                                          ColumnarToRow
+                                            InputAdapter
+                                              Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk]
                                         InputAdapter
-                                          Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk]
+                                          BroadcastExchange #3
+                                            WholeStageCodegen (1)
+                                              Project [hd_demo_sk]
+                                                Filter [hd_dep_count,hd_demo_sk]
+                                                  ColumnarToRow
+                                                    InputAdapter
+                                                      Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count]
                                     InputAdapter
-                                      ReusedExchange [hd_demo_sk] #2
+                                      BroadcastExchange #4
+                                        WholeStageCodegen (2)
+                                          Project [t_time_sk]
+                                            Filter [t_hour,t_time_sk]
+                                              ColumnarToRow
+                                                InputAdapter
+                                                  Scan parquet default.time_dim [t_time_sk,t_hour]
                                 InputAdapter
-                                  BroadcastExchange #7
-                                    WholeStageCodegen (7)
-                                      Project [t_time_sk]
-                                        Filter [t_hour,t_time_sk]
+                                  BroadcastExchange #5
+                                    WholeStageCodegen (3)
+                                      Project [wp_web_page_sk]
+                                        Filter [wp_char_count,wp_web_page_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.time_dim [t_time_sk,t_hour]
-                            InputAdapter
-                              ReusedExchange [wp_web_page_sk] #4
+                                              Scan parquet default.web_page [wp_web_page_sk,wp_char_count]
+                BroadcastExchange #6
+                  WholeStageCodegen (10)
+                    HashAggregate [count] [count(1),pmc,count]
+                      InputAdapter
+                        Exchange #7
+                          WholeStageCodegen (9)
+                            HashAggregate [count,count]
+                              Project
+                                BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
+                                  Project [ws_web_page_sk]
+                                    BroadcastHashJoin [ws_sold_time_sk,t_time_sk]
+                                      Project [ws_sold_time_sk,ws_web_page_sk]
+                                        BroadcastHashJoin [ws_ship_hdemo_sk,hd_demo_sk]
+                                          Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
+                                            ColumnarToRow
+                                              InputAdapter
+                                                Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk]
+                                          InputAdapter
+                                            ReusedExchange [hd_demo_sk] #3
+                                      InputAdapter
+                                        BroadcastExchange #8
+                                          WholeStageCodegen (7)
+                                            Project [t_time_sk]
+                                              Filter [t_hour,t_time_sk]
+                                                ColumnarToRow
+                                                  InputAdapter
+                                                    Scan parquet default.time_dim [t_time_sk,t_hour]
+                                  InputAdapter
+                                    ReusedExchange [wp_web_page_sk] #5
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index ed284df10aced..440fe997ae133 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -1130,7 +1130,7 @@ class StreamSuite extends StreamTest {
     verifyLocalLimit(inputDF.dropDuplicates().repartition(1).limit(1), expectStreamingLimit = false)
 
     // Should be LocalLimitExec in the first place, not from optimization of StreamingLocalLimitExec
-    val staticDF = spark.range(1).toDF("value").limit(1)
+    val staticDF = spark.range(2).toDF("value").limit(1)
     verifyLocalLimit(inputDF.toDF("value").join(staticDF, "value"), expectStreamingLimit = false)
 
     verifyLocalLimit(

From cc23581e2645c91fa8d6e6c81dc87b4221718bb1 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Wed, 23 Dec 2020 12:19:07 +0000
Subject: [PATCH 0862/1009] [SPARK-33858][SQL][TESTS] Unify v1 and v2 ALTER
 TABLE .. RENAME PARTITION tests

### What changes were proposed in this pull request?
1. Move the `ALTER TABLE .. RENAME PARTITION` parsing tests to `AlterTableRenamePartitionParserSuite`
2. Place the v1 tests for `ALTER TABLE .. RENAME PARTITION` from `DDLSuite` to `v1.AlterTableRenamePartitionSuite` and v2 tests from `AlterTablePartitionV2SQLSuite` to `v2.AlterTableRenamePartitionSuite`, so, the tests will run for V1, Hive V1 and V2 DS.

### Why are the changes needed?
- The unification will allow to run common `ALTER TABLE .. RENAME PARTITION` tests for both DSv1 and Hive DSv1, DSv2
- We can detect missing features and differences between DSv1 and DSv2 implementations.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running new test suites:
```
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableRenamePartitionParserSuite"
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableRenamePartitionSuite"
```

Closes #30863 from MaxGekk/unify-rename-partition-tests.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/parser/DDLParserSuite.scala  |  28 +--
 .../AlterTablePartitionV2SQLSuite.scala       |  19 --
 ...AlterTableRenamePartitionParserSuite.scala |  51 ++++++
 .../AlterTableRenamePartitionSuiteBase.scala  |  24 +++
 .../command/DDLCommandTestUtils.scala         |  18 ++
 .../sql/execution/command/DDLSuite.scala      |  57 +-----
 .../command/ShowPartitionsSuiteBase.scala     |  18 --
 .../v1/AlterTableRenamePartitionSuite.scala   | 169 ++++++++++++++++++
 .../v2/AlterTableRenamePartitionSuite.scala   |  37 ++++
 .../sql/hive/execution/HiveDDLSuite.scala     |   4 -
 .../AlterTableRenamePartitionSuite.scala      |  24 +++
 11 files changed, 325 insertions(+), 124 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionParserSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenamePartitionSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRenamePartitionSuite.scala
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableRenamePartitionSuite.scala

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index d408019053fb7..4612e72a54510 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.parser
 import java.util.Locale
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedNamespace, UnresolvedPartitionSpec, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView, UnresolvedView}
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, PersistedView, UnresolvedAttribute, UnresolvedFunc, UnresolvedNamespace, UnresolvedRelation, UnresolvedStar, UnresolvedTable, UnresolvedTableOrView, UnresolvedView}
 import org.apache.spark.sql.catalyst.catalog.{ArchiveResource, BucketSpec, FileResource, FunctionResource, JarResource}
 import org.apache.spark.sql.catalyst.expressions.{EqualTo, Literal}
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -2073,32 +2073,6 @@ class DDLParserSuite extends AnalysisTest {
       """.stripMargin)
   }
 
-  test("alter table: rename partition") {
-    val sql1 =
-      """
-        |ALTER TABLE table_name PARTITION (dt='2008-08-08', country='us')
-        |RENAME TO PARTITION (dt='2008-09-09', country='uk')
-      """.stripMargin
-    val parsed1 = parsePlan(sql1)
-    val expected1 = AlterTableRenamePartition(
-      UnresolvedTable(Seq("table_name"), "ALTER TABLE ... RENAME TO PARTITION"),
-      UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us")),
-      Map("dt" -> "2008-09-09", "country" -> "uk"))
-    comparePlans(parsed1, expected1)
-
-    val sql2 =
-      """
-        |ALTER TABLE a.b.c PARTITION (ds='2017-06-10')
-        |RENAME TO PARTITION (ds='2018-06-10')
-      """.stripMargin
-    val parsed2 = parsePlan(sql2)
-    val expected2 = AlterTableRenamePartition(
-      UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... RENAME TO PARTITION"),
-      UnresolvedPartitionSpec(Map("ds" -> "2017-06-10")),
-      Map("ds" -> "2018-06-10"))
-    comparePlans(parsed2, expected2)
-  }
-
   test("show current namespace") {
     comparePlans(
       parsePlan("SHOW CURRENT NAMESPACE"),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
index bdf2fa5b7ac96..f8d4a0970ff89 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTablePartitionV2SQLSuite.scala
@@ -31,23 +31,4 @@ class AlterTablePartitionV2SQLSuite extends DatasourceV2SQLBase {
         "ALTER TABLE ... RECOVER PARTITIONS is not supported for v2 tables."))
     }
   }
-
-  test("ALTER TABLE RENAME PARTITION") {
-    val nonPartTbl = "testcat.ns1.ns2.tbl"
-    val partTbl = "testpart.ns1.ns2.tbl"
-    withTable(nonPartTbl, partTbl) {
-      spark.sql(s"CREATE TABLE $nonPartTbl (id bigint, data string) USING foo PARTITIONED BY (id)")
-      val e1 = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $nonPartTbl PARTITION (id=1) RENAME TO PARTITION (id=2)")
-      }
-      assert(e1.message.contains(s"Table $nonPartTbl can not alter partitions"))
-
-      spark.sql(s"CREATE TABLE $partTbl (id bigint, data string) USING foo PARTITIONED BY (id)")
-      val e2 = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $partTbl PARTITION (id=1) RENAME TO PARTITION (id=2)")
-      }
-      assert(e2.message.contains(
-        "ALTER TABLE ... RENAME TO PARTITION is not supported for v2 tables."))
-    }
-  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionParserSuite.scala
new file mode 100644
index 0000000000000..db6506c85bcec
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionParserSuite.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedPartitionSpec, UnresolvedTable}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
+import org.apache.spark.sql.catalyst.plans.logical.AlterTableRenamePartition
+import org.apache.spark.sql.test.SharedSparkSession
+
+class AlterTableRenamePartitionParserSuite extends AnalysisTest with SharedSparkSession {
+  test("rename a partition with single part") {
+    val sql = """
+      |ALTER TABLE a.b.c PARTITION (ds='2017-06-10')
+      |RENAME TO PARTITION (ds='2018-06-10')
+      """.stripMargin
+    val parsed = parsePlan(sql)
+    val expected = AlterTableRenamePartition(
+      UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... RENAME TO PARTITION"),
+      UnresolvedPartitionSpec(Map("ds" -> "2017-06-10")),
+      Map("ds" -> "2018-06-10"))
+    comparePlans(parsed, expected)
+  }
+
+  test("rename a partition with multi parts") {
+    val sql = """
+      |ALTER TABLE table_name PARTITION (dt='2008-08-08', country='us')
+      |RENAME TO PARTITION (dt='2008-09-09', country='uk')
+      """.stripMargin
+    val parsed = parsePlan(sql)
+    val expected = AlterTableRenamePartition(
+      UnresolvedTable(Seq("table_name"), "ALTER TABLE ... RENAME TO PARTITION"),
+      UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us")),
+      Map("dt" -> "2008-09-09", "country" -> "uk"))
+    comparePlans(parsed, expected)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala
new file mode 100644
index 0000000000000..a29cf6cabba49
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.QueryTest
+
+trait AlterTableRenamePartitionSuiteBase extends QueryTest with DDLCommandTestUtils {
+  override val command = "ALTER TABLE .. RENAME PARTITION"
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala
index a4129fe1ffee5..6ea2fea41f284 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala
@@ -57,4 +57,22 @@ trait DDLCommandTestUtils extends SQLTestUtils {
       .map(PartitioningUtils.parsePathFragment)
     assert(partitions === expected.toSet)
   }
+
+  protected def createWideTable(table: String): Unit = {
+    sql(s"""
+      |CREATE TABLE $table (
+      |  price int, qty int,
+      |  year int, month int, hour int, minute int, sec int, extra int)
+      |$defaultUsing
+      |PARTITIONED BY (year, month, hour, minute, sec, extra)
+      |""".stripMargin)
+    sql(s"""
+      |INSERT INTO $table
+      |PARTITION(year = 2016, month = 3, hour = 10, minute = 10, sec = 10, extra = 1) SELECT 3, 3
+      |""".stripMargin)
+    sql(s"""
+      |ALTER TABLE $table
+      |ADD PARTITION(year = 2016, month = 4, hour = 10, minute = 10, sec = 10, extra = 1)
+      |""".stripMargin)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 49184d0a2e0d0..4e2b67e532933 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.internal.config
 import org.apache.spark.internal.config.RDD_PARALLEL_LISTING_THRESHOLD
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, QualifiedTableName, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchDatabaseException, NoSuchFunctionException, NoSuchPartitionException, PartitionAlreadyExistsException, TempTableAlreadyExistsException}
+import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchDatabaseException, NoSuchFunctionException, NoSuchPartitionException, TempTableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.connector.catalog.SupportsNamespaces.PROP_OWNER
@@ -334,10 +334,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     testChangeColumn(isDatasourceTable = true)
   }
 
-  test("alter table: rename partition (datasource table)") {
-    testRenamePartitions(isDatasourceTable = true)
-  }
-
   test("the qualified path of a database is stored in the catalog") {
     val catalog = spark.sessionState.catalog
 
@@ -1592,57 +1588,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     }
   }
 
-  protected def testRenamePartitions(isDatasourceTable: Boolean): Unit = {
-    if (!isUsingHiveMetastore) {
-      assert(isDatasourceTable, "InMemoryCatalog only supports data source tables")
-    }
-    val catalog = spark.sessionState.catalog
-    val tableIdent = TableIdentifier("tab1", Some("dbx"))
-    val part1 = Map("a" -> "1", "b" -> "q")
-    val part2 = Map("a" -> "2", "b" -> "c")
-    val part3 = Map("a" -> "3", "b" -> "p")
-    createDatabase(catalog, "dbx")
-    createTable(catalog, tableIdent, isDatasourceTable)
-    createTablePartition(catalog, part1, tableIdent)
-    createTablePartition(catalog, part2, tableIdent)
-    createTablePartition(catalog, part3, tableIdent)
-    assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == Set(part1, part2, part3))
-
-    // basic rename partition
-    sql("ALTER TABLE dbx.tab1 PARTITION (a='1', b='q') RENAME TO PARTITION (a='100', b='p')")
-    sql("ALTER TABLE dbx.tab1 PARTITION (a='2', b='c') RENAME TO PARTITION (a='20', b='c')")
-    assert(catalog.listPartitions(tableIdent).map(_.spec).toSet ==
-      Set(Map("a" -> "100", "b" -> "p"), Map("a" -> "20", "b" -> "c"), Map("a" -> "3", "b" -> "p")))
-
-    // rename without explicitly specifying database
-    catalog.setCurrentDatabase("dbx")
-    sql("ALTER TABLE tab1 PARTITION (a='100', b='p') RENAME TO PARTITION (a='10', b='p')")
-    assert(catalog.listPartitions(tableIdent).map(_.spec).toSet ==
-      Set(Map("a" -> "10", "b" -> "p"), Map("a" -> "20", "b" -> "c"), Map("a" -> "3", "b" -> "p")))
-
-    // table to alter does not exist
-    val e = intercept[AnalysisException] {
-      sql("ALTER TABLE does_not_exist PARTITION (c='3') RENAME TO PARTITION (c='333')")
-    }
-    assert(e.getMessage.contains("Table not found: does_not_exist"))
-
-    // partition to rename does not exist
-    intercept[NoSuchPartitionException] {
-      sql("ALTER TABLE tab1 PARTITION (a='not_found', b='1') RENAME TO PARTITION (a='1', b='2')")
-    }
-
-    // partition spec in RENAME PARTITION should be case insensitive by default
-    sql("ALTER TABLE tab1 PARTITION (A='10', B='p') RENAME TO PARTITION (A='1', B='p')")
-    assert(catalog.listPartitions(tableIdent).map(_.spec).toSet ==
-      Set(Map("a" -> "1", "b" -> "p"), Map("a" -> "20", "b" -> "c"), Map("a" -> "3", "b" -> "p")))
-
-    // target partition already exists
-    val errMsg = intercept[PartitionAlreadyExistsException] {
-      sql("ALTER TABLE tab1 PARTITION (a='1', b='p') RENAME TO PARTITION (a='20', b='c')")
-    }.getMessage
-    assert(errMsg.contains("Partition already exists"))
-  }
-
   protected def testChangeColumn(isDatasourceTable: Boolean): Unit = {
     if (!isUsingHiveMetastore) {
       assert(isDatasourceTable, "InMemoryCatalog only supports data source tables")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
index d66c6191fbfa2..83808ab82d3b2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
@@ -42,24 +42,6 @@ trait ShowPartitionsSuiteBase extends QueryTest with DDLCommandTestUtils {
     sql(s"ALTER TABLE $table ADD PARTITION(year = 2016, month = 3)")
   }
 
-  protected def createWideTable(table: String): Unit = {
-    sql(s"""
-      |CREATE TABLE $table (
-      |  price int, qty int,
-      |  year int, month int, hour int, minute int, sec int, extra int)
-      |$defaultUsing
-      |PARTITIONED BY (year, month, hour, minute, sec, extra)
-      |""".stripMargin)
-    sql(s"""
-      |INSERT INTO $table
-      |PARTITION(year = 2016, month = 3, hour = 10, minute = 10, sec = 10, extra = 1) SELECT 3, 3
-      |""".stripMargin)
-    sql(s"""
-      |ALTER TABLE $table
-      |ADD PARTITION(year = 2016, month = 4, hour = 10, minute = 10, sec = 10, extra = 1)
-      |""".stripMargin)
-  }
-
   test("show partitions of non-partitioned table") {
     withNamespaceAndTable("ns", "not_partitioned_table") { t =>
       sql(s"CREATE TABLE $t (col1 int) $defaultUsing")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenamePartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenamePartitionSuite.scala
new file mode 100644
index 0000000000000..89d5e5f4635d0
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenamePartitionSuite.scala
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, PartitionAlreadyExistsException}
+import org.apache.spark.sql.execution.command
+import org.apache.spark.sql.internal.SQLConf
+
+trait AlterTableRenamePartitionSuiteBase extends command.AlterTableRenamePartitionSuiteBase {
+  protected def createSinglePartTable(t: String): Unit = {
+    sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
+    sql(s"INSERT INTO $t PARTITION (id = 1) SELECT 'abc'")
+  }
+
+  test("rename without explicitly specifying database") {
+    val t = "tbl"
+    withTable(t) {
+      createSinglePartTable(t)
+      checkPartitions(t, Map("id" -> "1"))
+
+      sql(s"ALTER TABLE $t PARTITION (id = 1) RENAME TO PARTITION (id = 2)")
+      checkPartitions(t, Map("id" -> "2"))
+      checkAnswer(sql(s"SELECT id, data FROM $t"), Row(2, "abc"))
+    }
+  }
+
+  test("table to alter does not exist") {
+    withNamespace(s"$catalog.ns") {
+      sql(s"CREATE NAMESPACE $catalog.ns")
+      val errMsg = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $catalog.ns.no_tbl PARTITION (id=1) RENAME TO PARTITION (id=2)")
+      }.getMessage
+      assert(errMsg.contains("Table not found"))
+    }
+  }
+
+  test("partition to rename does not exist") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      createSinglePartTable(t)
+      checkPartitions(t, Map("id" -> "1"))
+      val errMsg = intercept[NoSuchPartitionException] {
+        sql(s"ALTER TABLE $t PARTITION (id = 3) RENAME TO PARTITION (id = 2)")
+      }.getMessage
+      assert(errMsg.contains("Partition not found in table"))
+    }
+  }
+
+  test("target partition exists") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      createSinglePartTable(t)
+      sql(s"INSERT INTO $t PARTITION (id = 2) SELECT 'def'")
+      checkPartitions(t, Map("id" -> "1"), Map("id" -> "2"))
+      val errMsg = intercept[PartitionAlreadyExistsException] {
+        sql(s"ALTER TABLE $t PARTITION (id = 1) RENAME TO PARTITION (id = 2)")
+      }.getMessage
+      assert(errMsg.contains("Partition already exists"))
+    }
+  }
+
+  test("single part partition") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      createSinglePartTable(t)
+      checkPartitions(t, Map("id" -> "1"))
+
+      sql(s"ALTER TABLE $t PARTITION (id = 1) RENAME TO PARTITION (id = 2)")
+      checkPartitions(t, Map("id" -> "2"))
+      checkAnswer(sql(s"SELECT id, data FROM $t"), Row(2, "abc"))
+    }
+  }
+
+  test("multi part partition") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      createWideTable(t)
+      checkPartitions(t,
+        Map(
+          "year" -> "2016",
+          "month" -> "3",
+          "hour" -> "10",
+          "minute" -> "10",
+          "sec" -> "10",
+          "extra" -> "1"),
+        Map(
+          "year" -> "2016",
+          "month" -> "4",
+          "hour" -> "10",
+          "minute" -> "10",
+          "sec" -> "10",
+          "extra" -> "1"))
+
+      sql(s"""
+        |ALTER TABLE $t
+        |PARTITION (
+        |  year = 2016, month = 3, hour = 10, minute = 10, sec = 10, extra = 1
+        |) RENAME TO PARTITION (
+        |  year = 2016, month = 3, hour = 10, minute = 10, sec = 123, extra = 1
+        |)""".stripMargin)
+      checkPartitions(t,
+        Map(
+          "year" -> "2016",
+          "month" -> "3",
+          "hour" -> "10",
+          "minute" -> "10",
+          "sec" -> "123",
+          "extra" -> "1"),
+        Map(
+          "year" -> "2016",
+          "month" -> "4",
+          "hour" -> "10",
+          "minute" -> "10",
+          "sec" -> "10",
+          "extra" -> "1"))
+      checkAnswer(sql(s"SELECT month, sec, price FROM $t"), Row(3, 123, 3))
+    }
+  }
+
+  test("with location") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      createSinglePartTable(t)
+      sql(s"ALTER TABLE $t ADD PARTITION (id = 2) LOCATION 'loc1'")
+      sql(s"INSERT INTO $t PARTITION (id = 2) SELECT 'def'")
+      checkPartitions(t, Map("id" -> "1"), Map("id" -> "2"))
+
+      sql(s"ALTER TABLE $t PARTITION (id = 2) RENAME TO PARTITION (id = 3)")
+      checkPartitions(t, Map("id" -> "1"), Map("id" -> "3"))
+      checkAnswer(sql(s"SELECT id, data FROM $t"), Seq(Row(1, "abc"), Row(3, "def")))
+    }
+  }
+
+  test("partition spec in RENAME PARTITION should be case insensitive") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      createSinglePartTable(t)
+      checkPartitions(t, Map("id" -> "1"))
+
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+        val errMsg = intercept[AnalysisException] {
+          sql(s"ALTER TABLE $t PARTITION (ID = 1) RENAME TO PARTITION (id = 2)")
+        }.getMessage
+        assert(errMsg.contains("ID is not a valid partition column"))
+        checkPartitions(t, Map("id" -> "1"))
+      }
+
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+        sql(s"ALTER TABLE $t PARTITION (ID = 1) RENAME TO PARTITION (id = 2)")
+        checkPartitions(t, Map("id" -> "2"))
+        checkAnswer(sql(s"SELECT id, data FROM $t"), Row(2, "abc"))
+      }
+    }
+  }
+}
+
+class AlterTableRenamePartitionSuite
+  extends AlterTableRenamePartitionSuiteBase
+  with CommandSuiteBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRenamePartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRenamePartitionSuite.scala
new file mode 100644
index 0000000000000..026f1dcc33a1a
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRenamePartitionSuite.scala
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.execution.command
+
+class AlterTableRenamePartitionSuite
+  extends command.AlterTableRenamePartitionSuiteBase
+  with CommandSuiteBase {
+
+  // TODO(SPARK-33859): Support V2 ALTER TABLE .. RENAME PARTITION
+  test("single part partition") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
+      val errMsg = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $t PARTITION (id=1) RENAME TO PARTITION (id=2)")
+      }.getMessage
+      assert(errMsg.contains("ALTER TABLE ... RENAME TO PARTITION is not supported for v2 tables"))
+    }
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index f13c8704f3b5b..b8a37a84735e3 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -159,10 +159,6 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA
     testChangeColumn(isDatasourceTable = false)
   }
 
-  test("alter table: rename partition") {
-    testRenamePartitions(isDatasourceTable = false)
-  }
-
   test("alter datasource table add columns - orc") {
     testAddColumn("orc")
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableRenamePartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableRenamePartitionSuite.scala
new file mode 100644
index 0000000000000..86edab74ab998
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableRenamePartitionSuite.scala
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution.command
+
+import org.apache.spark.sql.execution.command.v1
+
+class AlterTableRenamePartitionSuite
+  extends v1.AlterTableRenamePartitionSuiteBase
+  with CommandSuiteBase

From 303df64b466b7734b3c497955d1cca3e34fb663e Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Wed, 23 Dec 2020 14:34:01 +0000
Subject: [PATCH 0863/1009] [SPARK-33889][SQL] Fix NPE from `SHOW PARTITIONS`
 on V2 tables

### What changes were proposed in this pull request?
At `ShowPartitionsExec.run()`, check that a row returned by `listPartitionIdentifiers()` contains a `null` field, and convert it to `"null"`.

### Why are the changes needed?
Because `SHOW PARTITIONS` throws NPE on V2 table with `null` partition values.

### Does this PR introduce _any_ user-facing change?
Yes

### How was this patch tested?
Added new UT to `v2.ShowPartitionsSuite`.

Closes #30904 from MaxGekk/fix-npe-show-partitions.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../datasources/v2/ShowPartitionsExec.scala      |  6 +++---
 .../command/v2/ShowPartitionsSuite.scala         | 16 +++++++++++++++-
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala
index 416dce6fa28c6..ac24094f9089e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowPartitionsExec.scala
@@ -53,9 +53,9 @@ case class ShowPartitionsExec(
       var i = 0
       while (i < len) {
         val dataType = schema(i).dataType
-        val partValue = row.get(i, dataType)
-        val partValueStr = Cast(Literal(partValue, dataType), StringType, Some(timeZoneId))
-          .eval().toString
+        val partValueUTF8String =
+          Cast(Literal(row.get(i, dataType), dataType), StringType, Some(timeZoneId)).eval()
+        val partValueStr = if (partValueUTF8String == null) "null" else partValueUTF8String.toString
         partitions(i) = escapePathName(schema(i).name) + "=" + escapePathName(partValueStr)
         i += 1
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
index e52c60d0f9a95..ed0a7dff62440 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.command.v2
 
-import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.{AnalysisException, Row, SaveMode}
 import org.apache.spark.sql.execution.command
 
 class ShowPartitionsSuite extends command.ShowPartitionsSuiteBase with CommandSuiteBase {
@@ -34,4 +34,18 @@ class ShowPartitionsSuite extends command.ShowPartitionsSuiteBase with CommandSu
         "SHOW PARTITIONS cannot run for a table which does not support partitioning"))
     }
   }
+
+  test("SPARK-33889: null and empty string as partition values") {
+    import testImplicits._
+    withNamespaceAndTable("ns", "tbl") { t =>
+      val df = Seq((0, ""), (1, null)).toDF("a", "part")
+      df.write
+        .partitionBy("part")
+        .format("parquet")
+        .mode(SaveMode.Overwrite)
+        .saveAsTable(t)
+
+      runShowPartitionsSql(s"SHOW PARTITIONS $t", Row("part=") :: Row("part=null") :: Nil)
+    }
+  }
 }

From 7ffcfcf7db57fb62941130e0c7bf61bca08aa758 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Wed, 23 Dec 2020 14:35:46 +0000
Subject: [PATCH 0864/1009] [SPARK-33847][SQL] Simplify CaseWhen if elseValue
 is None

### What changes were proposed in this pull request?

1. Enhance `ReplaceNullWithFalseInPredicate` to replace None of elseValue inside `CaseWhen` with `FalseLiteral` if all branches are `FalseLiteral` . The use case is:
```sql
create table t1 using parquet as select id from range(10);
explain select id from t1 where (CASE WHEN id = 1 THEN 'a' WHEN id = 3 THEN 'b' end) = 'c';
```

Before this pr:
```
== Physical Plan ==
*(1) Filter CASE WHEN (id#1L = 1) THEN false WHEN (id#1L = 3) THEN false END
+- *(1) ColumnarToRow
   +- FileScan parquet default.t1[id#1L] Batched: true, DataFilters: [CASE WHEN (id#1L = 1) THEN false WHEN (id#1L = 3) THEN false END], Format: Parquet, Location: InMemoryFileIndex[file:/Users/yumwang/opensource/spark/spark-warehouse/org.apache.spark.sql.DataF..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:bigint>

```

After this pr:
```
== Physical Plan ==
LocalTableScan <empty>, [id#1L]
```

2. Enhance `SimplifyConditionals` if elseValue is None and all outputs are null.

### Why are the changes needed?

Improve query performance.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #30852 from wangyum/SPARK-33847.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../ReplaceNullWithFalseInPredicate.scala     |  8 +++--
 .../sql/catalyst/optimizer/expressions.scala  |  4 +++
 .../PushFoldableIntoBranchesSuite.scala       | 11 +++++++
 ...ReplaceNullWithFalseInPredicateSuite.scala | 33 +++++++++++++++++++
 .../optimizer/SimplifyConditionalSuite.scala  |  8 +++++
 ...ullWithFalseInPredicateEndToEndSuite.scala | 21 +++++++++---
 6 files changed, 78 insertions(+), 7 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
index 4a71dba663b38..92401131e8b82 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
@@ -93,8 +93,12 @@ object ReplaceNullWithFalseInPredicate extends Rule[LogicalPlan] {
       val newBranches = cw.branches.map { case (cond, value) =>
         replaceNullWithFalse(cond) -> replaceNullWithFalse(value)
       }
-      val newElseValue = cw.elseValue.map(replaceNullWithFalse)
-      CaseWhen(newBranches, newElseValue)
+      if (newBranches.forall(_._2 == FalseLiteral) && cw.elseValue.isEmpty) {
+        FalseLiteral
+      } else {
+        val newElseValue = cw.elseValue.map(replaceNullWithFalse)
+        CaseWhen(newBranches, newElseValue)
+      }
     case i @ If(pred, trueVal, falseVal) if i.dataType == BooleanType =>
       If(replaceNullWithFalse(pred), replaceNullWithFalse(trueVal), replaceNullWithFalse(falseVal))
     case e if e.dataType == BooleanType =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 47b968f6ebdd7..f01df5e5e6768 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -525,6 +525,10 @@ object SimplifyConditionals extends Rule[LogicalPlan] with PredicateHelper {
         } else {
           e.copy(branches = branches.take(i).map(branch => (branch._1, elseValue)))
         }
+
+      case e @ CaseWhen(branches, None)
+          if branches.forall(_._2.semanticEquals(Literal(null, e.dataType))) =>
+        Literal(null, e.dataType)
     }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala
index 02307a52ebb89..2d826e7b55a68 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala
@@ -258,4 +258,15 @@ class PushFoldableIntoBranchesSuite
       EqualTo(CaseWhen(Seq((a, Literal(1)), (c, Literal(2))), None).cast(StringType), Literal("4")),
       CaseWhen(Seq((a, FalseLiteral), (c, FalseLiteral)), None))
   }
+
+  test("SPARK-33847: Remove the CaseWhen if elseValue is empty and other outputs are null") {
+    Seq(a, LessThan(Rand(1), Literal(0.5))).foreach { condition =>
+      assertEquivalent(
+        EqualTo(CaseWhen(Seq((condition, Literal.create(null, IntegerType)))), Literal(2)),
+        Literal.create(null, BooleanType))
+      assertEquivalent(
+        EqualTo(CaseWhen(Seq((condition, Literal("str")))).cast(IntegerType), Literal(2)),
+        Literal.create(null, BooleanType))
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
index 5da71c31e1990..f49e6921fd46a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
@@ -380,6 +380,39 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     testProjection(originalExpr = column, expectedExpr = column)
   }
 
+  test("replace None of elseValue inside CaseWhen if all branches are FalseLiteral") {
+    val allFalseBranches = Seq(
+      (UnresolvedAttribute("i") < Literal(10)) -> FalseLiteral,
+      (UnresolvedAttribute("i") > Literal(40)) -> FalseLiteral)
+    val allFalseCond = CaseWhen(allFalseBranches)
+
+    val nonAllFalseBranches = Seq(
+      (UnresolvedAttribute("i") < Literal(10)) -> FalseLiteral,
+      (UnresolvedAttribute("i") > Literal(40)) -> TrueLiteral)
+    val nonAllFalseCond = CaseWhen(nonAllFalseBranches, FalseLiteral)
+
+    testFilter(allFalseCond, FalseLiteral)
+    testJoin(allFalseCond, FalseLiteral)
+    testDelete(allFalseCond, FalseLiteral)
+    testUpdate(allFalseCond, FalseLiteral)
+
+    testFilter(nonAllFalseCond, nonAllFalseCond)
+    testJoin(nonAllFalseCond, nonAllFalseCond)
+    testDelete(nonAllFalseCond, nonAllFalseCond)
+    testUpdate(nonAllFalseCond, nonAllFalseCond)
+  }
+
+  test("replace None of elseValue inside CaseWhen if all branches are null") {
+    val allNullBranches = Seq(
+      (UnresolvedAttribute("i") < Literal(10)) -> Literal.create(null, BooleanType),
+      (UnresolvedAttribute("i") > Literal(40)) -> Literal.create(null, BooleanType))
+    val allFalseCond = CaseWhen(allNullBranches)
+    testFilter(allFalseCond, FalseLiteral)
+    testJoin(allFalseCond, FalseLiteral)
+    testDelete(allFalseCond, FalseLiteral)
+    testUpdate(allFalseCond, FalseLiteral)
+  }
+
   private def testFilter(originalCond: Expression, expectedCond: Expression): Unit = {
     test((rel, exp) => rel.where(exp), originalCond, expectedCond)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala
index 328fc107e1c1b..1876be21dea4b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala
@@ -215,4 +215,12 @@ class SimplifyConditionalSuite extends PlanTest with ExpressionEvalHelper with P
       If(GreaterThan(Rand(0), UnresolvedAttribute("a")), FalseLiteral, TrueLiteral),
       LessThanOrEqual(Rand(0), UnresolvedAttribute("a")))
   }
+
+  test("SPARK-33847: Remove the CaseWhen if elseValue is empty and other outputs are null") {
+    Seq(GreaterThan('a, 1), GreaterThan(Rand(0), 1)).foreach { condition =>
+      assertEquivalent(
+        CaseWhen((condition, Literal.create(null, IntegerType)) :: Nil, None),
+        Literal.create(null, IntegerType))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ReplaceNullWithFalseInPredicateEndToEndSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ReplaceNullWithFalseInPredicateEndToEndSuite.scala
index bdbb741f24bc6..739b4052ee90d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ReplaceNullWithFalseInPredicateEndToEndSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ReplaceNullWithFalseInPredicateEndToEndSuite.scala
@@ -27,6 +27,12 @@ import org.apache.spark.sql.types.BooleanType
 class ReplaceNullWithFalseInPredicateEndToEndSuite extends QueryTest with SharedSparkSession {
   import testImplicits._
 
+  private def checkPlanIsEmptyLocalScan(df: DataFrame): Unit =
+    df.queryExecution.executedPlan match {
+      case s: LocalTableScanExec => assert(s.rows.isEmpty)
+      case p => fail(s"$p is not LocalTableScanExec")
+    }
+
   test("SPARK-25860: Replace Literal(null, _) with FalseLiteral whenever possible") {
     withTable("t1", "t2") {
       Seq((1, true), (2, false)).toDF("l", "b").write.saveAsTable("t1")
@@ -64,11 +70,6 @@ class ReplaceNullWithFalseInPredicateEndToEndSuite extends QueryTest with Shared
 
       checkAnswer(df1.where("IF(l > 10, false, b OR null)"), Row(1, true))
     }
-
-    def checkPlanIsEmptyLocalScan(df: DataFrame): Unit = df.queryExecution.executedPlan match {
-      case s: LocalTableScanExec => assert(s.rows.isEmpty)
-      case p => fail(s"$p is not LocalTableScanExec")
-    }
   }
 
   test("SPARK-26107: Replace Literal(null, _) with FalseLiteral in higher-order functions") {
@@ -112,4 +113,14 @@ class ReplaceNullWithFalseInPredicateEndToEndSuite extends QueryTest with Shared
       assertNoLiteralNullInPlan(q3)
     }
   }
+
+  test("SPARK-33847: replace None of elseValue inside CaseWhen to FalseLiteral") {
+    withTable("t1") {
+      Seq((1, 1), (2, 2)).toDF("a", "b").write.saveAsTable("t1")
+      val t1 = spark.table("t1")
+      val q1 = t1.filter("(CASE WHEN a > 1 THEN 1 END) = 0")
+      checkAnswer(q1, Seq.empty)
+      checkPlanIsEmptyLocalScan(q1)
+    }
+  }
 }

From 47d1aa4e93f668774fd0b16c780d3b1f6200bcd8 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Wed, 23 Dec 2020 23:43:21 +0900
Subject: [PATCH 0865/1009] [SPARK-33891][DOCS][CORE] Update dynamic allocation
 related documents

### What changes were proposed in this pull request?

This PR aims to update the followings.
- Remove the outdated requirement for `spark.shuffle.service.enabled` in `configuration.md`
- Dynamic allocation section in `job-scheduling.md`

### Why are the changes needed?

To make the document up-to-date.

### Does this PR introduce _any_ user-facing change?

No, it's a documentation update.

### How was this patch tested?

Manual.

**BEFORE**
![Screen Shot 2020-12-23 at 2 22 04 AM](https://user-images.githubusercontent.com/9700541/102986441-ae647f80-44c5-11eb-97a3-87c2d368952a.png)
![Screen Shot 2020-12-23 at 2 22 34 AM](https://user-images.githubusercontent.com/9700541/102986473-bcb29b80-44c5-11eb-8eae-6802001c6dfa.png)

**AFTER**
![Screen Shot 2020-12-23 at 2 25 36 AM](https://user-images.githubusercontent.com/9700541/102986767-2df24e80-44c6-11eb-8540-e74856a4c313.png)
![Screen Shot 2020-12-23 at 2 21 13 AM](https://user-images.githubusercontent.com/9700541/102986366-8e34c080-44c5-11eb-8054-1efd07c9458c.png)

Closes #30906 from dongjoon-hyun/SPARK-33891.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/configuration.md  |  3 +--
 docs/job-scheduling.md | 17 +++++++++--------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index 21506e6901263..fe1fc3e47369b 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -928,8 +928,7 @@ Apart from these, the following properties are also available, and may be useful
   <td>false</td>
   <td>
     Enables the external shuffle service. This service preserves the shuffle files written by
-    executors so the executors can be safely removed. This must be enabled if
-    <code>spark.dynamicAllocation.enabled</code> is "true". The external shuffle service
+    executors so the executors can be safely removed. The external shuffle service
     must be set up in order to enable it. See
     <a href="job-scheduling.html#configuration-and-setup">dynamic allocation
     configuration and setup documentation</a> for more information.
diff --git a/docs/job-scheduling.md b/docs/job-scheduling.md
index 7c7385b325a7f..f2b77cdfcd2c3 100644
--- a/docs/job-scheduling.md
+++ b/docs/job-scheduling.md
@@ -79,18 +79,19 @@ are no longer used and request them again later when there is demand. This featu
 useful if multiple applications share resources in your Spark cluster.
 
 This feature is disabled by default and available on all coarse-grained cluster managers, i.e.
-[standalone mode](spark-standalone.html), [YARN mode](running-on-yarn.html), and
-[Mesos coarse-grained mode](running-on-mesos.html#mesos-run-modes).
+[standalone mode](spark-standalone.html), [YARN mode](running-on-yarn.html),
+[Mesos coarse-grained mode](running-on-mesos.html#mesos-run-modes) and [K8s mode](running-on-kubernetes.html).
+
 
 ### Configuration and Setup
 
-There are two requirements for using this feature. First, your application must set
-`spark.dynamicAllocation.enabled` to `true`. Second, you must set up an *external shuffle service*
-on each worker node in the same cluster and set `spark.shuffle.service.enabled` to true in your
-application. The purpose of the external shuffle service is to allow executors to be removed
+There are two ways for using this feature.
+First, your application must set both `spark.dynamicAllocation.enabled` and `spark.dynamicAllocation.shuffleTracking.enabled` to `true`.
+Second, your application must set both `spark.dynamicAllocation.enabled` and `spark.shuffle.service.enabled` to `true`
+after you set up an *external shuffle service* on each worker node in the same cluster.
+The purpose of the shuffle tracking or the external shuffle service is to allow executors to be removed
 without deleting shuffle files written by them (more detail described
-[below](job-scheduling.html#graceful-decommission-of-executors)). The way to set up this service
-varies across cluster managers:
+[below](job-scheduling.html#graceful-decommission-of-executors)). While it is simple to enable shuffle tracking, the way to set up the external shuffle service varies across cluster managers:
 
 In standalone mode, simply start your workers with `spark.shuffle.service.enabled` set to `true`.
 

From 0677c39009de0830d995da77332f0756c76d6b56 Mon Sep 17 00:00:00 2001
From: Chandni Singh <singh.chandni@gmail.com>
Date: Wed, 23 Dec 2020 12:42:18 -0600
Subject: [PATCH 0866/1009] [SPARK-32916][SHUFFLE][TEST-MAVEN][TEST-HADOOP2.7]
 Ensure the number of chunks in meta file and index file are equal

### What changes were proposed in this pull request?
1. Fixes for bugs in `RemoteBlockPushResolver` where the number of chunks in meta file and index file are inconsistent due to exceptions while writing to either index file or meta file. This java class was introduced in https://github.com/apache/spark/pull/30062.
 - If the writing to index file fails, the position of meta file is not reset. This means that the number of chunks in meta file is inconsistent with index file.
- During the exception handling while writing to index/meta file, we just set the pointer to the start position. If the files are closed just after this then it doesn't get rid of any the extra bytes written to it.
2. Adds an IOException threshold. If the `RemoteBlockPushResolver` encounters IOExceptions greater than this threshold  while updating data/meta/index file of a shuffle partition, then it responds to the client with  exception- `IOExceptions exceeded the threshold` so that client can stop pushing data for this shuffle partition.
3. When the update to metadata fails, exception is not propagated back to the client. This results in the increased size of the current chunk. However, with (2) in place, the current chunk will still be of a manageable size.

### Why are the changes needed?
This fix is needed for the bugs mentioned above.
1. Moved writing to meta file after index file. This fixes the issue because if there is an exception writing to meta file, then the index file position is not updated. With this change, if there is an exception writing to index file, then none of the files are effectively updated and the same is true vice-versa.
2. Truncating the lengths of data/index/meta files when the partition is finalized.
3. When the IOExceptions have reached the threshold, it is most likely that future blocks will also face the issue. So, it is better to let the clients know so that they can stop pushing the blocks for that partition.
4. When just the meta update fails, client retries pushing the block which was successfully merged to data file. This can be avoided by letting the chunk grow slightly.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Added unit tests for all the bugs and threshold.

Closes #30433 from otterc/SPARK-32916-followup.

Authored-by: Chandni Singh <singh.chandni@gmail.com>
Signed-off-by: Mridul Muralidharan <mridul<at>gmail.com>
---
 .../spark/network/util/TransportConf.java     |  10 +
 .../spark/network/shuffle/ErrorHandler.java   |   9 +
 .../shuffle/RemoteBlockPushResolver.java      | 301 ++++++++++----
 .../shuffle/RemoteBlockPushResolverSuite.java | 380 ++++++++++++++++++
 4 files changed, 629 insertions(+), 71 deletions(-)

diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
index fd287b022618b..d305dfa8e83cf 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
@@ -398,4 +398,14 @@ public long mergedIndexCacheSize() {
     return JavaUtils.byteStringAsBytes(
       conf.get("spark.shuffle.server.mergedIndexCacheSize", "100m"));
   }
+
+  /**
+   * The threshold for number of IOExceptions while merging shuffle blocks to a shuffle partition.
+   * When the number of IOExceptions while writing to merged shuffle data/index/meta file exceed
+   * this threshold then the shuffle server will respond back to client to stop pushing shuffle
+   * blocks for this shuffle partition.
+   */
+  public int ioExceptionsThresholdDuringMerge() {
+    return conf.getInt("spark.shuffle.server.ioExceptionsThresholdDuringMerge", 4);
+  }
 }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java
index d13a0272744a0..968777fba785d 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java
@@ -71,6 +71,15 @@ class BlockPushErrorHandler implements ErrorHandler {
     public static final String BLOCK_APPEND_COLLISION_DETECTED_MSG_PREFIX =
       "Couldn't find an opportunity to write block";
 
+    /**
+     * String constant used for generating exception messages indicating the server encountered
+     * IOExceptions multiple times, greater than the configured threshold, while trying to merged
+     * shuffle blocks of the same shuffle partition. When the client receives this this response,
+     * it will stop pushing any more blocks for the same shuffle partition.
+     */
+    public static final String IOEXCEPTIONS_EXCEEDED_THRESHOLD_PREFIX =
+      "IOExceptions exceeded the threshold";
+
     @Override
     public boolean shouldRetryError(Throwable t) {
       // If it is a connection time out or a connection closed exception, no need to retry.
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java
index 76abb05c99bb4..0e2355646465d 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java
@@ -17,15 +17,16 @@
 
 package org.apache.spark.network.shuffle;
 
+import java.io.DataOutputStream;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
-import java.io.RandomAccessFile;
 import java.nio.ByteBuffer;
 import java.nio.channels.FileChannel;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Iterator;
@@ -45,6 +46,8 @@
 import com.google.common.cache.LoadingCache;
 import com.google.common.cache.Weigher;
 import com.google.common.collect.Maps;
+import com.google.common.primitives.Ints;
+import com.google.common.primitives.Longs;
 import org.roaringbitmap.RoaringBitmap;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -78,6 +81,7 @@ public class RemoteBlockPushResolver implements MergedShuffleFileManager {
   private final Executor directoryCleaner;
   private final TransportConf conf;
   private final int minChunkSize;
+  private final int ioExceptionsThresholdDuringMerge;
   private final ErrorHandler.BlockPushErrorHandler errorHandler;
 
   @SuppressWarnings("UnstableApiUsage")
@@ -92,6 +96,7 @@ public RemoteBlockPushResolver(TransportConf conf) {
       // Add `spark` prefix because it will run in NM in Yarn mode.
       NettyUtils.createThreadFactory("spark-shuffle-merged-shuffle-directory-cleaner"));
     this.minChunkSize = conf.minChunkSizeInMergedShuffleFile();
+    this.ioExceptionsThresholdDuringMerge = conf.ioExceptionsThresholdDuringMerge();
     CacheLoader<File, ShuffleIndexInformation> indexCacheLoader =
       new CacheLoader<File, ShuffleIndexInformation>() {
         public ShuffleIndexInformation load(File file) throws IOException {
@@ -132,7 +137,7 @@ private AppShufflePartitionInfo getOrCreateAppShufflePartitionInfo(
         if (dataFile.exists()) {
           return null;
         } else {
-          return new AppShufflePartitionInfo(appShuffleId, reduceId, dataFile, indexFile, metaFile);
+          return newAppShufflePartitionInfo(appShuffleId, reduceId, dataFile, indexFile, metaFile);
         }
       } catch (IOException e) {
         logger.error(
@@ -146,6 +151,17 @@ private AppShufflePartitionInfo getOrCreateAppShufflePartitionInfo(
     });
   }
 
+  @VisibleForTesting
+  AppShufflePartitionInfo newAppShufflePartitionInfo(
+      AppShuffleId appShuffleId,
+      int reduceId,
+      File dataFile,
+      File indexFile,
+      File metaFile) throws IOException {
+    return new AppShufflePartitionInfo(appShuffleId, reduceId, dataFile,
+      new MergeShuffleFile(indexFile), new MergeShuffleFile(metaFile));
+  }
+
   @Override
   public MergedBlockMeta getMergedBlockMeta(String appId, int shuffleId, int reduceId) {
     AppShuffleId appShuffleId = new AppShuffleId(appId, shuffleId);
@@ -370,26 +386,19 @@ public MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) throws IOExc
         new MergeStatuses(msg.shuffleId, new RoaringBitmap[0], new int[0], new long[0]);
     } else {
       Collection<AppShufflePartitionInfo> partitionsToFinalize = shufflePartitions.values();
-      int totalPartitions = partitionsToFinalize.size();
-      RoaringBitmap[] bitmaps = new RoaringBitmap[totalPartitions];
-      int[] reduceIds = new int[totalPartitions];
-      long[] sizes = new long[totalPartitions];
+      List<RoaringBitmap> bitmaps = new ArrayList<>(partitionsToFinalize.size());
+      List<Integer> reduceIds = new ArrayList<>(partitionsToFinalize.size());
+      List<Long> sizes = new ArrayList<>(partitionsToFinalize.size());
       Iterator<AppShufflePartitionInfo> partitionsIter = partitionsToFinalize.iterator();
-      int idx = 0;
       while (partitionsIter.hasNext()) {
         AppShufflePartitionInfo partition = partitionsIter.next();
         synchronized (partition) {
-          // Get rid of any partial block data at the end of the file. This could either
-          // be due to failure or a request still being processed when the shuffle
-          // merge gets finalized.
           try {
-            partition.dataChannel.truncate(partition.getPosition());
-            if (partition.getPosition() != partition.getLastChunkOffset()) {
-              partition.updateChunkInfo(partition.getPosition(), partition.lastMergedMapIndex);
-            }
-            bitmaps[idx] = partition.mapTracker;
-            reduceIds[idx] = partition.reduceId;
-            sizes[idx++] = partition.getPosition();
+            // This can throw IOException which will marks this shuffle partition as not merged.
+            partition.finalizePartition();
+            bitmaps.add(partition.mapTracker);
+            reduceIds.add(partition.reduceId);
+            sizes.add(partition.getLastChunkOffset());
           } catch (IOException ioe) {
             logger.warn("Exception while finalizing shuffle partition {} {} {}", msg.appId,
               msg.shuffleId, partition.reduceId, ioe);
@@ -401,7 +410,9 @@ public MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) throws IOExc
           }
         }
       }
-      mergeStatuses = new MergeStatuses(msg.shuffleId, bitmaps, reduceIds, sizes);
+      mergeStatuses = new MergeStatuses(msg.shuffleId,
+        bitmaps.toArray(new RoaringBitmap[bitmaps.size()]), Ints.toArray(reduceIds),
+        Longs.toArray(sizes));
     }
     partitions.remove(appShuffleId);
     logger.info("Finalized shuffle {} from Application {}.", msg.shuffleId, msg.appId);
@@ -450,6 +461,7 @@ private PushBlockStreamCallback(
       this.streamId = streamId;
       this.partitionInfo = Preconditions.checkNotNull(partitionInfo);
       this.mapIndex = mapIndex;
+      abortIfNecessary();
     }
 
     @Override
@@ -466,11 +478,11 @@ public String getID() {
     private void writeBuf(ByteBuffer buf) throws IOException {
       while (buf.hasRemaining()) {
         if (partitionInfo.isEncounteredFailure()) {
-          long updatedPos = partitionInfo.getPosition() + length;
+          long updatedPos = partitionInfo.getDataFilePos() + length;
           logger.debug(
             "{} shuffleId {} reduceId {} encountered failure current pos {} updated pos {}",
             partitionInfo.appShuffleId.appId, partitionInfo.appShuffleId.shuffleId,
-            partitionInfo.reduceId, partitionInfo.getPosition(), updatedPos);
+            partitionInfo.reduceId, partitionInfo.getDataFilePos(), updatedPos);
           length += partitionInfo.dataChannel.write(buf, updatedPos);
         } else {
           length += partitionInfo.dataChannel.write(buf);
@@ -510,15 +522,35 @@ private boolean isDuplicateBlock() {
      * This is only invoked when the stream is able to write. The stream first writes any deferred
      * block parts buffered in memory.
      */
-    private void writeAnyDeferredBufs() throws IOException {
-      if (deferredBufs != null && !deferredBufs.isEmpty()) {
-        for (ByteBuffer deferredBuf : deferredBufs) {
-          writeBuf(deferredBuf);
-        }
+    private void writeDeferredBufs() throws IOException {
+      for (ByteBuffer deferredBuf : deferredBufs) {
+        writeBuf(deferredBuf);
+      }
+      deferredBufs = null;
+    }
+
+    /**
+     * This throws RuntimeException if the number of IOExceptions have exceeded threshold.
+     */
+    private void abortIfNecessary() {
+      if (partitionInfo.shouldAbort(mergeManager.ioExceptionsThresholdDuringMerge)) {
         deferredBufs = null;
+        throw new RuntimeException(String.format("%s when merging %s",
+          ErrorHandler.BlockPushErrorHandler.IOEXCEPTIONS_EXCEEDED_THRESHOLD_PREFIX,
+          streamId));
       }
     }
 
+    /**
+     * This increments the number of IOExceptions and throws RuntimeException if it exceeds the
+     * threshold which will abort the merge of a particular shuffle partition.
+     */
+    private void incrementIOExceptionsAndAbortIfNecessary() {
+      // Update the count of IOExceptions
+      partitionInfo.incrementIOExceptions();
+      abortIfNecessary();
+    }
+
     @Override
     public void onData(String streamId, ByteBuffer buf) throws IOException {
       // When handling the block data using StreamInterceptor, it can help to reduce the amount
@@ -556,6 +588,7 @@ public void onData(String streamId, ByteBuffer buf) throws IOException {
             deferredBufs = null;
             return;
           }
+          abortIfNecessary();
           logger.trace("{} shuffleId {} reduceId {} onData writable",
             partitionInfo.appShuffleId.appId, partitionInfo.appShuffleId.shuffleId,
             partitionInfo.reduceId);
@@ -565,8 +598,17 @@ public void onData(String streamId, ByteBuffer buf) throws IOException {
 
           // If we got here, it's safe to write the block data to the merged shuffle file. We
           // first write any deferred block.
-          writeAnyDeferredBufs();
-          writeBuf(buf);
+          try {
+            if (deferredBufs != null && !deferredBufs.isEmpty()) {
+              writeDeferredBufs();
+            }
+            writeBuf(buf);
+          } catch (IOException ioe) {
+            incrementIOExceptionsAndAbortIfNecessary();
+            // If the above doesn't throw a RuntimeException, then we propagate the IOException
+            // back to the client so the block could be retried.
+            throw ioe;
+          }
           // If we got here, it means we successfully write the current chunk of block to merged
           // shuffle file. If we encountered failure while writing the previous block, we should
           // reset the file channel position and the status of partitionInfo to indicate that we
@@ -574,7 +616,7 @@ public void onData(String streamId, ByteBuffer buf) throws IOException {
           // position tracked by partitionInfo here. That is only updated while the entire block
           // is successfully written to merged shuffle file.
           if (partitionInfo.isEncounteredFailure()) {
-            partitionInfo.dataChannel.position(partitionInfo.getPosition() + length);
+            partitionInfo.dataChannel.position(partitionInfo.getDataFilePos() + length);
             partitionInfo.setEncounteredFailure(false);
           }
         } else {
@@ -636,15 +678,33 @@ public void onComplete(String streamId) throws IOException {
             return;
           }
           if (partitionInfo.getCurrentMapIndex() < 0) {
-            writeAnyDeferredBufs();
+            try {
+              if (deferredBufs != null && !deferredBufs.isEmpty()) {
+                abortIfNecessary();
+                writeDeferredBufs();
+              }
+            } catch (IOException ioe) {
+              incrementIOExceptionsAndAbortIfNecessary();
+              // If the above doesn't throw a RuntimeException, then we propagate the IOException
+              // back to the client so the block could be retried.
+              throw ioe;
+            }
           }
-          long updatedPos = partitionInfo.getPosition() + length;
+          long updatedPos = partitionInfo.getDataFilePos() + length;
           boolean indexUpdated = false;
           if (updatedPos - partitionInfo.getLastChunkOffset() >= mergeManager.minChunkSize) {
-            partitionInfo.updateChunkInfo(updatedPos, mapIndex);
-            indexUpdated = true;
+            try {
+              partitionInfo.updateChunkInfo(updatedPos, mapIndex);
+              indexUpdated = true;
+            } catch (IOException ioe) {
+              incrementIOExceptionsAndAbortIfNecessary();
+              // If the above doesn't throw a RuntimeException, then we do not propagate the
+              // IOException to the client. This may increase the chunk size however the increase is
+              // still limited because of the limit on the number of IOExceptions for a
+              // particular shuffle partition.
+            }
           }
-          partitionInfo.setPosition(updatedPos);
+          partitionInfo.setDataFilePos(updatedPos);
           partitionInfo.setCurrentMapIndex(-1);
 
           // update merged results
@@ -687,6 +747,11 @@ public void onFailure(String streamId, Throwable throwable) throws IOException {
         }
       }
     }
+
+    @VisibleForTesting
+    AppShufflePartitionInfo getPartitionInfo() {
+      return partitionInfo;
+    }
   }
 
   /**
@@ -736,7 +801,7 @@ public static class AppShufflePartitionInfo {
     // The merged shuffle data file channel
     public FileChannel dataChannel;
     // Location offset of the last successfully merged block for this shuffle partition
-    private long position;
+    private long dataFilePos;
     // Indicating whether failure was encountered when merging the previous block
     private boolean encounteredFailure;
     // Track the map index whose block is being merged for this shuffle partition
@@ -744,44 +809,46 @@ public static class AppShufflePartitionInfo {
     // Bitmap tracking which mapper's blocks have been merged for this shuffle partition
     private RoaringBitmap mapTracker;
     // The index file for a particular merged shuffle contains the chunk offsets.
-    private RandomAccessFile indexFile;
+    private MergeShuffleFile indexFile;
     // The meta file for a particular merged shuffle contains all the map indices that belong to
     // every chunk. The entry per chunk is a serialized bitmap.
-    private RandomAccessFile metaFile;
+    private MergeShuffleFile metaFile;
     // The offset for the last chunk tracked in the index file for this shuffle partition
     private long lastChunkOffset;
     private int lastMergedMapIndex = -1;
     // Bitmap tracking which mapper's blocks are in the current shuffle chunk
     private RoaringBitmap chunkTracker;
+    private int numIOExceptions = 0;
+    private boolean indexMetaUpdateFailed;
 
     AppShufflePartitionInfo(
         AppShuffleId appShuffleId,
         int reduceId,
         File dataFile,
-        File indexFile,
-        File metaFile) throws IOException {
+        MergeShuffleFile indexFile,
+        MergeShuffleFile metaFile) throws IOException {
       this.appShuffleId = Preconditions.checkNotNull(appShuffleId, "app shuffle id");
       this.reduceId = reduceId;
       this.dataChannel = new FileOutputStream(dataFile).getChannel();
-      this.indexFile = new RandomAccessFile(indexFile, "rw");
-      this.metaFile = new RandomAccessFile(metaFile, "rw");
+      this.indexFile = indexFile;
+      this.metaFile = metaFile;
       this.currentMapIndex = -1;
       // Writing 0 offset so that we can reuse ShuffleIndexInformation.getIndex()
       updateChunkInfo(0L, -1);
-      this.position = 0;
+      this.dataFilePos = 0;
       this.encounteredFailure = false;
       this.mapTracker = new RoaringBitmap();
       this.chunkTracker = new RoaringBitmap();
     }
 
-    public long getPosition() {
-      return position;
+    public long getDataFilePos() {
+      return dataFilePos;
     }
 
-    public void setPosition(long position) {
+    public void setDataFilePos(long dataFilePos) {
       logger.trace("{} shuffleId {} reduceId {} current pos {} update pos {}", appShuffleId.appId,
-        appShuffleId.shuffleId, reduceId, this.position, position);
-      this.position = position;
+        appShuffleId.shuffleId, reduceId, this.dataFilePos, dataFilePos);
+      this.dataFilePos = dataFilePos;
     }
 
     boolean isEncounteredFailure() {
@@ -825,25 +892,29 @@ void resetChunkTracker() {
      * @param mapIndex the map index to be added to chunk tracker.
      */
     void updateChunkInfo(long chunkOffset, int mapIndex) throws IOException {
-      long idxStartPos = -1;
       try {
-        // update the chunk tracker to meta file before index file
+        logger.trace("{} shuffleId {} reduceId {} index current {} updated {}",
+          appShuffleId.appId, appShuffleId.shuffleId, reduceId, this.lastChunkOffset, chunkOffset);
+        if (indexMetaUpdateFailed) {
+          indexFile.getChannel().position(indexFile.getPos());
+        }
+        indexFile.getDos().writeLong(chunkOffset);
+        // Chunk bitmap should be written to the meta file after the index file because if there are
+        // any exceptions during writing the offset to the index file, meta file should not be
+        // updated. If the update to the index file is successful but the update to meta file isn't
+        // then the index file position is not updated.
         writeChunkTracker(mapIndex);
-        idxStartPos = indexFile.getFilePointer();
-        logger.trace("{} shuffleId {} reduceId {} updated index current {} updated {}",
-          appShuffleId.appId, appShuffleId.shuffleId, reduceId, this.lastChunkOffset,
-          chunkOffset);
-        indexFile.writeLong(chunkOffset);
+        indexFile.updatePos(8);
+        this.lastChunkOffset = chunkOffset;
+        indexMetaUpdateFailed = false;
       } catch (IOException ioe) {
-        if (idxStartPos != -1) {
-          // reset the position to avoid corrupting index files during exception.
-          logger.warn("{} shuffleId {} reduceId {} reset index to position {}",
-            appShuffleId.appId, appShuffleId.shuffleId, reduceId, idxStartPos);
-          indexFile.seek(idxStartPos);
-        }
+        logger.warn("{} shuffleId {} reduceId {} update to index/meta failed", appShuffleId.appId,
+          appShuffleId.shuffleId, reduceId);
+        indexMetaUpdateFailed = true;
+        // Any exception here is propagated to the caller and the caller can decide whether to
+        // abort or not.
         throw ioe;
       }
-      this.lastChunkOffset = chunkOffset;
     }
 
     private void writeChunkTracker(int mapIndex) throws IOException {
@@ -851,17 +922,38 @@ private void writeChunkTracker(int mapIndex) throws IOException {
         return;
       }
       chunkTracker.add(mapIndex);
-      long metaStartPos = metaFile.getFilePointer();
-      try {
-        logger.trace("{} shuffleId {} reduceId {} mapIndex {} write chunk to meta file",
-          appShuffleId.appId, appShuffleId.shuffleId, reduceId, mapIndex);
-        chunkTracker.serialize(metaFile);
-      } catch (IOException ioe) {
-        logger.warn("{} shuffleId {} reduceId {} mapIndex {} reset position of meta file to {}",
-          appShuffleId.appId, appShuffleId.shuffleId, reduceId, mapIndex, metaStartPos);
-        metaFile.seek(metaStartPos);
-        throw ioe;
+      logger.trace("{} shuffleId {} reduceId {} mapIndex {} write chunk to meta file",
+        appShuffleId.appId, appShuffleId.shuffleId, reduceId, mapIndex);
+      if (indexMetaUpdateFailed) {
+        metaFile.getChannel().position(metaFile.getPos());
+      }
+      chunkTracker.serialize(metaFile.getDos());
+      metaFile.updatePos(metaFile.getChannel().position() - metaFile.getPos());
+    }
+
+    private void incrementIOExceptions() {
+      numIOExceptions++;
+    }
+
+    private boolean shouldAbort(int ioExceptionsThresholdDuringMerge) {
+      return numIOExceptions > ioExceptionsThresholdDuringMerge;
+    }
+
+    private void finalizePartition() throws IOException {
+      if (dataFilePos != lastChunkOffset) {
+        try {
+          updateChunkInfo(dataFilePos, lastMergedMapIndex);
+        } catch (IOException ioe) {
+          // Any exceptions here while updating the meta files can be ignored. If the files
+          // aren't successfully updated they will be truncated.
+        }
       }
+      // Get rid of any partial block data at the end of the file. This could either
+      // be due to failure, or a request still being processed when the shuffle
+      // merge gets finalized, or any exceptions while updating index/meta files.
+      dataChannel.truncate(lastChunkOffset);
+      indexFile.getChannel().truncate(indexFile.getPos());
+      metaFile.getChannel().truncate(metaFile.getPos());
     }
 
     void closeAllFiles() {
@@ -877,7 +969,6 @@ void closeAllFiles() {
       }
       if (metaFile != null) {
         try {
-          // if the stream is closed, channel get's closed as well.
           metaFile.close();
         } catch (IOException ioe) {
           logger.warn("Error closing meta file for {} shuffleId {} reduceId {}",
@@ -902,6 +993,26 @@ void closeAllFiles() {
     protected void finalize() throws Throwable {
       closeAllFiles();
     }
+
+    @VisibleForTesting
+    MergeShuffleFile getIndexFile() {
+      return indexFile;
+    }
+
+    @VisibleForTesting
+    MergeShuffleFile getMetaFile() {
+      return metaFile;
+    }
+
+    @VisibleForTesting
+    FileChannel getDataChannel() {
+      return dataChannel;
+    }
+
+    @VisibleForTesting
+    int getNumIOExceptions() {
+      return numIOExceptions;
+    }
   }
 
   /**
@@ -931,4 +1042,52 @@ private AppPathsInfo(
       }
     }
   }
+
+  @VisibleForTesting
+  static class MergeShuffleFile {
+    private FileChannel channel;
+    private DataOutputStream dos;
+    private long pos;
+
+    @VisibleForTesting
+    MergeShuffleFile(File file) throws IOException {
+      FileOutputStream fos = new FileOutputStream(file);
+      channel = fos.getChannel();
+      dos = new DataOutputStream(fos);
+    }
+
+    @VisibleForTesting
+    MergeShuffleFile(FileChannel channel, DataOutputStream dos) {
+      this.channel = channel;
+      this.dos = dos;
+    }
+
+    private void updatePos(long numBytes) {
+      pos += numBytes;
+    }
+
+    void close() throws IOException {
+      try {
+        dos.close();
+      } finally {
+        dos = null;
+        channel = null;
+      }
+    }
+
+    @VisibleForTesting
+    DataOutputStream getDos() {
+      return dos;
+    }
+
+    @VisibleForTesting
+    FileChannel getChannel() {
+      return channel;
+    }
+
+    @VisibleForTesting
+    long getPos() {
+      return pos;
+    }
+  }
 }
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java
index 0f200dc721963..8c6f7434748ec 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java
@@ -17,9 +17,12 @@
 
 package org.apache.spark.network.shuffle;
 
+import java.io.DataOutputStream;
 import java.io.File;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
@@ -42,6 +45,7 @@
 
 import org.apache.spark.network.buffer.FileSegmentManagedBuffer;
 import org.apache.spark.network.client.StreamCallbackWithID;
+import org.apache.spark.network.shuffle.RemoteBlockPushResolver.MergeShuffleFile;
 import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
 import org.apache.spark.network.shuffle.protocol.FinalizeShuffleMerge;
 import org.apache.spark.network.shuffle.protocol.MergeStatuses;
@@ -411,6 +415,347 @@ void deleteExecutorDirs(Path[] dirs) {
     }
   }
 
+  @Test
+  public void testRecoverIndexFileAfterIOExceptions() throws IOException {
+    useTestFiles(true, false);
+    RemoteBlockPushResolver.PushBlockStreamCallback callback1 =
+      (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream(
+        new PushBlockStream(TEST_APP, 0, 0, 0, 0));
+    callback1.onData(callback1.getID(), ByteBuffer.wrap(new byte[4]));
+    callback1.onComplete(callback1.getID());
+    RemoteBlockPushResolver.AppShufflePartitionInfo partitionInfo = callback1.getPartitionInfo();
+    // Close the index stream so it throws IOException
+    TestMergeShuffleFile testIndexFile = (TestMergeShuffleFile) partitionInfo.getIndexFile();
+    testIndexFile.close();
+    StreamCallbackWithID callback2 = pushResolver.receiveBlockDataAsStream(
+      new PushBlockStream(TEST_APP, 0, 1, 0, 0));
+    callback2.onData(callback2.getID(), ByteBuffer.wrap(new byte[5]));
+    // This will complete without any IOExceptions because number of IOExceptions are less than
+    // the threshold but the update to index file will be unsuccessful.
+    callback2.onComplete(callback2.getID());
+    assertEquals("index position", 16, testIndexFile.getPos());
+    // Restore the index stream so it can write successfully again.
+    testIndexFile.restore();
+    StreamCallbackWithID callback3 = pushResolver.receiveBlockDataAsStream(
+      new PushBlockStream(TEST_APP, 0, 2, 0, 0));
+    callback3.onData(callback3.getID(), ByteBuffer.wrap(new byte[2]));
+    callback3.onComplete(callback3.getID());
+    assertEquals("index position", 24, testIndexFile.getPos());
+    MergeStatuses statuses = pushResolver.finalizeShuffleMerge(
+      new FinalizeShuffleMerge(TEST_APP, 0));
+    validateMergeStatuses(statuses, new int[] {0}, new long[] {11});
+    MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0);
+    validateChunks(TEST_APP, 0, 0, blockMeta, new int[] {4, 7}, new int[][] {{0}, {1, 2}});
+  }
+
+  @Test
+  public void testRecoverIndexFileAfterIOExceptionsInFinalize() throws IOException {
+    useTestFiles(true, false);
+    RemoteBlockPushResolver.PushBlockStreamCallback callback1 =
+      (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream(
+        new PushBlockStream(TEST_APP, 0, 0, 0, 0));
+    callback1.onData(callback1.getID(), ByteBuffer.wrap(new byte[4]));
+    callback1.onComplete(callback1.getID());
+    RemoteBlockPushResolver.AppShufflePartitionInfo partitionInfo = callback1.getPartitionInfo();
+    // Close the index stream so it throws IOException
+    TestMergeShuffleFile testIndexFile = (TestMergeShuffleFile) partitionInfo.getIndexFile();
+    testIndexFile.close();
+    StreamCallbackWithID callback2 = pushResolver.receiveBlockDataAsStream(
+      new PushBlockStream(TEST_APP, 0, 1, 0, 0));
+    callback2.onData(callback2.getID(), ByteBuffer.wrap(new byte[5]));
+    // This will complete without any IOExceptions because number of IOExceptions are less than
+    // the threshold but the update to index file will be unsuccessful.
+    callback2.onComplete(callback2.getID());
+    assertEquals("index position", 16, testIndexFile.getPos());
+    // The last update to index was unsuccessful however any further updates will be successful.
+    // Restore the index stream so it can write successfully again.
+    testIndexFile.restore();
+    MergeStatuses statuses = pushResolver.finalizeShuffleMerge(
+      new FinalizeShuffleMerge(TEST_APP, 0));
+    assertEquals("index position", 24, testIndexFile.getPos());
+    validateMergeStatuses(statuses, new int[] {0}, new long[] {9});
+    MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0);
+    validateChunks(TEST_APP, 0, 0, blockMeta, new int[] {4, 5}, new int[][] {{0}, {1}});
+  }
+
+  @Test
+  public void testRecoverMetaFileAfterIOExceptions() throws IOException {
+    useTestFiles(false, true);
+    RemoteBlockPushResolver.PushBlockStreamCallback callback1 =
+      (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream(
+        new PushBlockStream(TEST_APP, 0, 0, 0, 0));
+    callback1.onData(callback1.getID(), ByteBuffer.wrap(new byte[4]));
+    callback1.onComplete(callback1.getID());
+    RemoteBlockPushResolver.AppShufflePartitionInfo partitionInfo = callback1.getPartitionInfo();
+    // Close the meta stream so it throws IOException
+    TestMergeShuffleFile testMetaFile = (TestMergeShuffleFile) partitionInfo.getMetaFile();
+    long metaPosBeforeClose = testMetaFile.getPos();
+    testMetaFile.close();
+    StreamCallbackWithID callback2 = pushResolver.receiveBlockDataAsStream(
+      new PushBlockStream(TEST_APP, 0, 1, 0, 0));
+    callback2.onData(callback2.getID(), ByteBuffer.wrap(new byte[5]));
+    // This will complete without any IOExceptions because number of IOExceptions are less than
+    // the threshold but the update to index and meta file will be unsuccessful.
+    callback2.onComplete(callback2.getID());
+    assertEquals("index position", 16, partitionInfo.getIndexFile().getPos());
+    assertEquals("meta position", metaPosBeforeClose, testMetaFile.getPos());
+    // Restore the meta stream so it can write successfully again.
+    testMetaFile.restore();
+    StreamCallbackWithID callback3 = pushResolver.receiveBlockDataAsStream(
+      new PushBlockStream(TEST_APP, 0, 2, 0, 0));
+    callback3.onData(callback3.getID(), ByteBuffer.wrap(new byte[2]));
+    callback3.onComplete(callback3.getID());
+    assertEquals("index position", 24, partitionInfo.getIndexFile().getPos());
+    assertTrue("meta position", testMetaFile.getPos() > metaPosBeforeClose);
+    MergeStatuses statuses = pushResolver.finalizeShuffleMerge(
+      new FinalizeShuffleMerge(TEST_APP, 0));
+    validateMergeStatuses(statuses, new int[] {0}, new long[] {11});
+    MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0);
+    validateChunks(TEST_APP, 0, 0, blockMeta, new int[] {4, 7}, new int[][] {{0}, {1, 2}});
+  }
+
+  @Test
+  public void testRecoverMetaFileAfterIOExceptionsInFinalize() throws IOException {
+    useTestFiles(false, true);
+    RemoteBlockPushResolver.PushBlockStreamCallback callback1 =
+      (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream(
+        new PushBlockStream(TEST_APP, 0, 0, 0, 0));
+    callback1.onData(callback1.getID(), ByteBuffer.wrap(new byte[4]));
+    callback1.onComplete(callback1.getID());
+    RemoteBlockPushResolver.AppShufflePartitionInfo partitionInfo = callback1.getPartitionInfo();
+    // Close the meta stream so it throws IOException
+    TestMergeShuffleFile testMetaFile = (TestMergeShuffleFile) partitionInfo.getMetaFile();
+    long metaPosBeforeClose = testMetaFile.getPos();
+    testMetaFile.close();
+    StreamCallbackWithID callback2 = pushResolver.receiveBlockDataAsStream(
+      new PushBlockStream(TEST_APP, 0, 1, 0, 0));
+    callback2.onData(callback2.getID(), ByteBuffer.wrap(new byte[5]));
+    // This will complete without any IOExceptions because number of IOExceptions are less than
+    // the threshold but the update to index and meta file will be unsuccessful.
+    callback2.onComplete(callback2.getID());
+    MergeShuffleFile indexFile = partitionInfo.getIndexFile();
+    assertEquals("index position", 16, indexFile.getPos());
+    assertEquals("meta position", metaPosBeforeClose, testMetaFile.getPos());
+    // Restore the meta stream so it can write successfully again.
+    testMetaFile.restore();
+    MergeStatuses statuses = pushResolver.finalizeShuffleMerge(
+      new FinalizeShuffleMerge(TEST_APP, 0));
+    assertEquals("index position", 24, indexFile.getPos());
+    assertTrue("meta position", testMetaFile.getPos() > metaPosBeforeClose);
+    validateMergeStatuses(statuses, new int[] {0}, new long[] {9});
+    MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0);
+    validateChunks(TEST_APP, 0, 0, blockMeta, new int[] {4, 5}, new int[][] {{0}, {1}});
+  }
+
+  @Test (expected = RuntimeException.class)
+  public void testIOExceptionsExceededThreshold() throws IOException {
+    RemoteBlockPushResolver.PushBlockStreamCallback callback =
+      (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream(
+        new PushBlockStream(TEST_APP, 0, 0, 0, 0));
+    RemoteBlockPushResolver.AppShufflePartitionInfo partitionInfo = callback.getPartitionInfo();
+    callback.onData(callback.getID(), ByteBuffer.wrap(new byte[4]));
+    callback.onComplete(callback.getID());
+    // Close the data stream so it throws continuous IOException
+    partitionInfo.getDataChannel().close();
+    for (int i = 1; i < 5; i++) {
+      RemoteBlockPushResolver.PushBlockStreamCallback callback1 =
+        (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream(
+          new PushBlockStream(TEST_APP, 0, i, 0, 0));
+      try {
+        callback1.onData(callback1.getID(), ByteBuffer.wrap(new byte[2]));
+      } catch (IOException ioe) {
+        // this will throw IOException so the client can retry.
+        callback1.onFailure(callback1.getID(), ioe);
+      }
+    }
+    assertEquals(4, partitionInfo.getNumIOExceptions());
+    // After 4 IOException, the server will respond with IOExceptions exceeded threshold
+    try {
+      RemoteBlockPushResolver.PushBlockStreamCallback callback2 =
+        (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream(
+          new PushBlockStream(TEST_APP, 0, 5, 0, 0));
+      callback2.onData(callback.getID(), ByteBuffer.wrap(new byte[1]));
+    } catch (Throwable t) {
+      assertEquals("IOExceptions exceeded the threshold when merging shufflePush_0_5_0",
+        t.getMessage());
+      throw t;
+    }
+  }
+
+  @Test (expected = RuntimeException.class)
+  public void testIOExceptionsDuringMetaUpdateIncreasesExceptionCount() throws IOException {
+    useTestFiles(true, false);
+    RemoteBlockPushResolver.PushBlockStreamCallback callback =
+      (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream(
+        new PushBlockStream(TEST_APP, 0, 0, 0, 0));
+    RemoteBlockPushResolver.AppShufflePartitionInfo partitionInfo = callback.getPartitionInfo();
+    callback.onData(callback.getID(), ByteBuffer.wrap(new byte[4]));
+    callback.onComplete(callback.getID());
+    TestMergeShuffleFile testIndexFile = (TestMergeShuffleFile) partitionInfo.getIndexFile();
+    testIndexFile.close();
+    for (int i = 1; i < 5; i++) {
+      RemoteBlockPushResolver.PushBlockStreamCallback callback1 =
+        (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream(
+          new PushBlockStream(TEST_APP, 0, i, 0, 0));
+      callback1.onData(callback1.getID(), ByteBuffer.wrap(new byte[5]));
+      // This will complete without any exceptions but the exception count is increased.
+      callback1.onComplete(callback1.getID());
+    }
+    assertEquals(4, partitionInfo.getNumIOExceptions());
+    // After 4 IOException, the server will respond with IOExceptions exceeded threshold for any
+    // new request for this partition.
+    try {
+      RemoteBlockPushResolver.PushBlockStreamCallback callback2 =
+      (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream(
+        new PushBlockStream(TEST_APP, 0, 5, 0, 0));
+      callback2.onData(callback2.getID(), ByteBuffer.wrap(new byte[4]));
+      callback2.onComplete(callback2.getID());
+    } catch (Throwable t) {
+      assertEquals("IOExceptions exceeded the threshold when merging shufflePush_0_5_0",
+        t.getMessage());
+      throw t;
+    }
+  }
+
+  @Test (expected = RuntimeException.class)
+  public void testRequestForAbortedShufflePartitionThrowsException() {
+    try {
+      testIOExceptionsDuringMetaUpdateIncreasesExceptionCount();
+    } catch (Throwable t) {
+      // No more blocks can be merged to this partition.
+    }
+    try {
+      pushResolver.receiveBlockDataAsStream(
+        new PushBlockStream(TEST_APP, 0, 10, 0, 0));
+    } catch (Throwable t) {
+      assertEquals("IOExceptions exceeded the threshold when merging shufflePush_0_10_0",
+        t.getMessage());
+      throw t;
+    }
+  }
+
+  @Test (expected = RuntimeException.class)
+  public void testPendingBlockIsAbortedImmediately() throws IOException {
+    useTestFiles(true, false);
+    RemoteBlockPushResolver.PushBlockStreamCallback callback =
+      (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream(
+        new PushBlockStream(TEST_APP, 0, 0, 0, 0));
+    RemoteBlockPushResolver.AppShufflePartitionInfo partitionInfo = callback.getPartitionInfo();
+    TestMergeShuffleFile testIndexFile = (TestMergeShuffleFile) partitionInfo.getIndexFile();
+    testIndexFile.close();
+    for (int i = 1; i < 6; i++) {
+      RemoteBlockPushResolver.PushBlockStreamCallback callback1 =
+        (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream(
+          new PushBlockStream(TEST_APP, 0, i, 0, 0));
+      try {
+        callback1.onData(callback1.getID(), ByteBuffer.wrap(new byte[5]));
+        // This will complete without any exceptions but the exception count is increased.
+        callback1.onComplete(callback1.getID());
+      } catch (Throwable t) {
+        callback1.onFailure(callback1.getID(), t);
+      }
+    }
+    assertEquals(5, partitionInfo.getNumIOExceptions());
+    // The server will respond with IOExceptions exceeded threshold for any additional attempts
+    // to write.
+    try {
+      callback.onData(callback.getID(), ByteBuffer.wrap(new byte[4]));
+    } catch (Throwable t) {
+      assertEquals("IOExceptions exceeded the threshold when merging shufflePush_0_0_0",
+        t.getMessage());
+      throw t;
+    }
+  }
+
+  @Test (expected = RuntimeException.class)
+  public void testWritingPendingBufsIsAbortedImmediatelyDuringComplete() throws IOException {
+    useTestFiles(true, false);
+    RemoteBlockPushResolver.PushBlockStreamCallback callback =
+      (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream(
+        new PushBlockStream(TEST_APP, 0, 0, 0, 0));
+    RemoteBlockPushResolver.AppShufflePartitionInfo partitionInfo = callback.getPartitionInfo();
+    TestMergeShuffleFile testIndexFile = (TestMergeShuffleFile) partitionInfo.getIndexFile();
+    testIndexFile.close();
+    for (int i = 1; i < 5; i++) {
+      RemoteBlockPushResolver.PushBlockStreamCallback callback1 =
+        (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream(
+          new PushBlockStream(TEST_APP, 0, i, 0, 0));
+      try {
+        callback1.onData(callback1.getID(), ByteBuffer.wrap(new byte[5]));
+        // This will complete without any exceptions but the exception count is increased.
+        callback1.onComplete(callback1.getID());
+      } catch (Throwable t) {
+        callback1.onFailure(callback1.getID(), t);
+      }
+    }
+    assertEquals(4, partitionInfo.getNumIOExceptions());
+    RemoteBlockPushResolver.PushBlockStreamCallback callback2 =
+      (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream(
+        new PushBlockStream(TEST_APP, 0, 5, 0, 0));
+    callback2.onData(callback2.getID(), ByteBuffer.wrap(new byte[5]));
+    // This is deferred
+    callback.onData(callback.getID(), ByteBuffer.wrap(new byte[4]));
+    // Callback2 completes which will throw another exception.
+    try {
+      callback2.onComplete(callback2.getID());
+    } catch (Throwable t) {
+      callback2.onFailure(callback2.getID(), t);
+    }
+    assertEquals(5, partitionInfo.getNumIOExceptions());
+    // Restore index file so that any further writes to it are successful and any exceptions are
+    // due to IOExceptions exceeding threshold.
+    testIndexFile.restore();
+    try {
+      callback.onComplete(callback.getID());
+    } catch (Throwable t) {
+      assertEquals("IOExceptions exceeded the threshold when merging shufflePush_0_0_0",
+        t.getMessage());
+      throw t;
+    }
+  }
+
+  @Test
+  public void testFailureWhileTruncatingFiles() throws IOException {
+    useTestFiles(true, false);
+    PushBlock[] pushBlocks = new PushBlock[] {
+      new PushBlock(0, 0, 0, ByteBuffer.wrap(new byte[2])),
+      new PushBlock(0, 1, 0, ByteBuffer.wrap(new byte[3])),
+      new PushBlock(0, 0, 1, ByteBuffer.wrap(new byte[5])),
+      new PushBlock(0, 1, 1, ByteBuffer.wrap(new byte[3]))
+    };
+    pushBlockHelper(TEST_APP, pushBlocks);
+    RemoteBlockPushResolver.PushBlockStreamCallback callback =
+      (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream(
+        new PushBlockStream(TEST_APP, 0, 2, 0, 0));
+    callback.onData(callback.getID(), ByteBuffer.wrap(new byte[2]));
+    callback.onComplete(callback.getID());
+    RemoteBlockPushResolver.AppShufflePartitionInfo partitionInfo = callback.getPartitionInfo();
+    TestMergeShuffleFile testIndexFile = (TestMergeShuffleFile) partitionInfo.getIndexFile();
+    // Close the index file so truncate throws IOException
+    testIndexFile.close();
+    MergeStatuses statuses = pushResolver.finalizeShuffleMerge(
+      new FinalizeShuffleMerge(TEST_APP, 0));
+    validateMergeStatuses(statuses, new int[] {1}, new long[] {8});
+    MergedBlockMeta meta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 1);
+    validateChunks(TEST_APP, 0, 1, meta, new int[]{5, 3}, new int[][]{{0},{1}});
+  }
+
+  private void useTestFiles(boolean useTestIndexFile, boolean useTestMetaFile) throws IOException {
+    pushResolver = new RemoteBlockPushResolver(conf) {
+      @Override
+      AppShufflePartitionInfo newAppShufflePartitionInfo(AppShuffleId appShuffleId, int reduceId,
+        File dataFile, File indexFile, File metaFile) throws IOException {
+        MergeShuffleFile mergedIndexFile = useTestIndexFile ? new TestMergeShuffleFile(indexFile)
+          : new MergeShuffleFile(indexFile);
+        MergeShuffleFile mergedMetaFile = useTestMetaFile ? new TestMergeShuffleFile(metaFile) :
+          new MergeShuffleFile(metaFile);
+        return new AppShufflePartitionInfo(appShuffleId, reduceId, dataFile, mergedIndexFile,
+          mergedMetaFile);
+      }
+    };
+    registerExecutor(TEST_APP, prepareLocalDirs(localDirs));
+  }
+
   private Path[] createLocalDirs(int numLocalDirs) throws IOException {
     Path[] localDirs = new Path[numLocalDirs];
     for (int i = 0; i < localDirs.length; i++) {
@@ -493,4 +838,39 @@ private static class PushBlock {
       this.buffer = buffer;
     }
   }
+
+  private static class TestMergeShuffleFile extends MergeShuffleFile {
+    private DataOutputStream activeDos;
+    private File file;
+    private FileChannel channel;
+
+    private TestMergeShuffleFile(File file) throws IOException {
+      super(null, null);
+      this.file = file;
+      FileOutputStream fos = new FileOutputStream(file);
+      channel = fos.getChannel();
+      activeDos = new DataOutputStream(fos);
+    }
+
+    @Override
+    DataOutputStream getDos() {
+      return activeDos;
+    }
+
+    @Override
+    FileChannel getChannel() {
+      return channel;
+    }
+
+    @Override
+    void close() throws IOException {
+      activeDos.close();
+    }
+
+    void restore() throws IOException {
+      FileOutputStream fos = new FileOutputStream(file, true);
+      channel = fos.getChannel();
+      activeDos = new DataOutputStream(fos);
+    }
+  }
 }

From 5c9b421c3711ba373b4d5cbbd83a8ece91291ed0 Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@databricks.com>
Date: Wed, 23 Dec 2020 14:48:01 -0800
Subject: [PATCH 0867/1009] [SPARK-33277][PYSPARK][SQL] Use
 ContextAwareIterator to stop consuming after the task ends

### What changes were proposed in this pull request?

This is a retry of #30177.

This is not a complete fix, but it would take long time to complete (#30242).
As discussed offline, at least using `ContextAwareIterator` should be helpful enough for many cases.

As the Python evaluation consumes the parent iterator in a separate thread, it could consume more data from the parent even after the task ends and the parent is closed. Thus, we should use `ContextAwareIterator` to stop consuming after the task ends.

### Why are the changes needed?

Python/Pandas UDF right after off-heap vectorized reader could cause executor crash.

E.g.,:

```py
spark.range(0, 100000, 1, 1).write.parquet(path)

spark.conf.set("spark.sql.columnVector.offheap.enabled", True)

def f(x):
    return 0

fUdf = udf(f, LongType())

spark.read.parquet(path).select(fUdf('id')).head()
```

This is because, the Python evaluation consumes the parent iterator in a separate thread and it consumes more data from the parent even after the task ends and the parent is closed. If an off-heap column vector exists in the parent iterator, it could cause segmentation fault which crashes the executor.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Added tests, and manually.

Closes #30899 from ueshin/issues/SPARK-33277/context_aware_iterator.

Authored-by: Takuya UESHIN <ueshin@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../apache/spark/ContextAwareIterator.scala   | 40 +++++++++++++++++++
 .../sql/execution/python/EvalPythonExec.scala |  5 ++-
 .../execution/python/MapInPandasExec.scala    |  9 +++--
 3 files changed, 48 insertions(+), 6 deletions(-)
 create mode 100644 core/src/main/scala/org/apache/spark/ContextAwareIterator.scala

diff --git a/core/src/main/scala/org/apache/spark/ContextAwareIterator.scala b/core/src/main/scala/org/apache/spark/ContextAwareIterator.scala
new file mode 100644
index 0000000000000..c4d0dd8aceab0
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ContextAwareIterator.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import org.apache.spark.annotation.DeveloperApi
+
+/**
+ * :: DeveloperApi ::
+ * A TaskContext aware iterator.
+ *
+ * As the Python evaluation consumes the parent iterator in a separate thread,
+ * it could consume more data from the parent even after the task ends and the parent is closed.
+ * If an off-heap access exists in the parent iterator, it could cause segmentation fault
+ * which crashes the executor.
+ * Thus, we should use [[ContextAwareIterator]] to stop consuming after the task ends.
+ */
+@DeveloperApi
+class ContextAwareIterator[+T](val context: TaskContext, val delegate: Iterator[T])
+  extends Iterator[T] {
+
+  override def hasNext: Boolean =
+    !context.isCompleted() && !context.isInterrupted() && delegate.hasNext
+
+  override def next(): T = delegate.next()
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala
index 7c476ab03c002..fca43e454bff5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala
@@ -21,7 +21,7 @@ import java.io.File
 
 import scala.collection.mutable.ArrayBuffer
 
-import org.apache.spark.{SparkEnv, TaskContext}
+import org.apache.spark.{ContextAwareIterator, SparkEnv, TaskContext}
 import org.apache.spark.api.python.ChainedPythonFunctions
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
@@ -89,6 +89,7 @@ trait EvalPythonExec extends UnaryExecNode {
 
     inputRDD.mapPartitions { iter =>
       val context = TaskContext.get()
+      val contextAwareIterator = new ContextAwareIterator(context, iter)
 
       // The queue used to buffer input rows so we can drain it to
       // combine input with output from Python.
@@ -120,7 +121,7 @@ trait EvalPythonExec extends UnaryExecNode {
       }.toSeq)
 
       // Add rows to queue to join later with the result.
-      val projectedRowIter = iter.map { inputRow =>
+      val projectedRowIter = contextAwareIterator.map { inputRow =>
         queue.add(inputRow.asInstanceOf[UnsafeRow])
         projection(inputRow)
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInPandasExec.scala
index 2bb808119c0ae..71f51f1abc6f5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInPandasExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInPandasExec.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.python
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.TaskContext
+import org.apache.spark.{ContextAwareIterator, TaskContext}
 import org.apache.spark.api.python.{ChainedPythonFunctions, PythonEvalType}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
@@ -61,16 +61,17 @@ case class MapInPandasExec(
       val pythonRunnerConf = ArrowUtils.getPythonRunnerConfMap(conf)
       val outputTypes = child.schema
 
+      val context = TaskContext.get()
+      val contextAwareIterator = new ContextAwareIterator(context, inputIter)
+
       // Here we wrap it via another row so that Python sides understand it
       // as a DataFrame.
-      val wrappedIter = inputIter.map(InternalRow(_))
+      val wrappedIter = contextAwareIterator.map(InternalRow(_))
 
       // DO NOT use iter.grouped(). See BatchIterator.
       val batchIter =
         if (batchSize > 0) new BatchIterator(wrappedIter, batchSize) else Iterator(wrappedIter)
 
-      val context = TaskContext.get()
-
       val columnarBatchIter = new ArrowPythonRunner(
         chainedFunc,
         PythonEvalType.SQL_MAP_PANDAS_ITER_UDF,

From d467d817260d6ca605c34f493e68d0877209170f Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Wed, 23 Dec 2020 15:31:56 -0800
Subject: [PATCH 0868/1009] [SPARK-33893][CORE] Exclude fallback block manager
 from executorList

### What changes were proposed in this pull request?

This PR aims to exclude fallback block manager from `executorList` function.

### Why are the changes needed?

When a fallback storage is used, the executors UI tab hangs because the executor list REST API result doesn't have `peakMemoryMetrics` of `ExecutorMetrics`. The root cause is that the block manager id used by fallback storage is included in the API result and it doesn't have `peakMemoryMetrics` because it's populated during HeartBeat reporting. We should hide it.

### Does this PR introduce _any_ user-facing change?

No. This is a bug fix on UI.

### How was this patch tested?

Manual. Run the following and visit Spark `executors` tab UI with browser.
```
bin/spark-shell -c spark.storage.decommission.fallbackStorage.path=file:///tmp/spark-storage/
```

Closes #30911 from dongjoon-hyun/SPARK-33893.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../main/scala/org/apache/spark/status/AppStatusStore.scala    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
index affa85b76cf19..b9cc9145feb4d 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
@@ -24,6 +24,7 @@ import scala.collection.mutable.HashMap
 
 import org.apache.spark.{JobExecutionStatus, SparkConf}
 import org.apache.spark.status.api.v1
+import org.apache.spark.storage.FallbackStorage.FALLBACK_BLOCK_MANAGER_ID
 import org.apache.spark.ui.scope._
 import org.apache.spark.util.Utils
 import org.apache.spark.util.kvstore.{InMemoryStore, KVStore}
@@ -88,7 +89,7 @@ private[spark] class AppStatusStore(
     } else {
       base
     }
-    filtered.asScala.map(_.info).toSeq
+    filtered.asScala.map(_.info).filter(_.id != FALLBACK_BLOCK_MANAGER_ID.executorId).toSeq
   }
 
   def executorSummary(executorId: String): v1.ExecutorSummary = {

From 368a2c341d8f3315c759e1c2362439534a9d44e7 Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Wed, 23 Dec 2020 15:38:32 -0800
Subject: [PATCH 0869/1009] [SPARK-33877][SQL][FOLLOWUP] SQL reference
 documents for INSERT w/ a column list

### What changes were proposed in this pull request?

followup of https://github.com/apache/spark/commit/a3dd8dacee8f6b316be90500f9fd8ec8997a5784 via suggestion https://github.com/apache/spark/pull/30888#discussion_r547822642
### Why are the changes needed?

doc improvement
### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

passing GA doc

Closes #30909 from yaooqinn/SPARK-33877-F.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/sql-ref-syntax-dml-insert-into.md            | 3 +--
 docs/sql-ref-syntax-dml-insert-overwrite-table.md | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/docs/sql-ref-syntax-dml-insert-into.md b/docs/sql-ref-syntax-dml-insert-into.md
index 96a95b1a629e9..15400780289e2 100644
--- a/docs/sql-ref-syntax-dml-insert-into.md
+++ b/docs/sql-ref-syntax-dml-insert-into.md
@@ -47,12 +47,11 @@ INSERT INTO [ TABLE ] table_identifier [ partition_spec ] [ ( column_list ) ]
 
 * **column_list**
 
-    An optional parameter that specifies a comma-separated list of columns belonging to the `table_identifier` table.
+    An optional parameter that specifies a comma-separated list of columns belonging to the `table_identifier` table. Spark will reorder the columns of the input query to match the table schema according to the specified column list.
 
     **Note:**The current behaviour has some limitations:
     - All specified columns should exist in the table and not be duplicated from each other. It includes all columns except the static partition columns.
     - The size of the column list should be exactly the size of the data from `VALUES` clause or query.
-    - The order of the column list is alterable and determines how the data from `VALUES` clause or query to be inserted by position.
 
 * **VALUES ( { value `|` NULL } [ , ... ] ) [ , ( ... ) ]**
 
diff --git a/docs/sql-ref-syntax-dml-insert-overwrite-table.md b/docs/sql-ref-syntax-dml-insert-overwrite-table.md
index f2413fb72464f..5fd0880fe45d7 100644
--- a/docs/sql-ref-syntax-dml-insert-overwrite-table.md
+++ b/docs/sql-ref-syntax-dml-insert-overwrite-table.md
@@ -47,14 +47,13 @@ INSERT OVERWRITE [ TABLE ] table_identifier [ partition_spec [ IF NOT EXISTS ] ]
 
 * **column_list**
 
-    An optional parameter that specifies a comma-separated list of columns belonging to the `table_identifier` table.
+    An optional parameter that specifies a comma-separated list of columns belonging to the `table_identifier` table. Spark will reorder the columns of the input query to match the table schema according to the specified column list.
 
     **Note**
 
     The current behaviour has some limitations:
     - All specified columns should exist in the table and not be duplicated from each other. It includes all columns except the static partition columns.
     - The size of the column list should be exactly the size of the data from `VALUES` clause or query.
-    - The order of the column list is alterable and determines how the data from `VALUES` clause or query to be inserted by position.
 
 * **VALUES ( { value `|` NULL } [ , ... ] ) [ , ( ... ) ]**
 

From 61881bb6988aa0320b4bacfabbc0ee6f05f287cb Mon Sep 17 00:00:00 2001
From: offthewall123 <dingyu.xu@intel.com>
Date: Wed, 23 Dec 2020 20:01:53 -0600
Subject: [PATCH 0870/1009] [SPARK-33835][CORE] Refector
 AbstractCommandBuilder.buildJavaCommand: use firstNonEmpty

### What changes were proposed in this pull request?
refector AbstractCommandBuilder.buildJavaCommand: use firstNonEmpty

### Why are the changes needed?
For better code understanding, and firstNonEmpty can detect javaHome = "   ", an empty string.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
End to End.

Closes #30831 from offthewall123/refector_AbstractCommandBuilder.

Authored-by: offthewall123 <dingyu.xu@intel.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../spark/launcher/AbstractCommandBuilder.java     | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
index 778fd46b91fa1..24ad9cbdba087 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
@@ -92,17 +92,13 @@ abstract List<String> buildCommand(Map<String, String> env)
   List<String> buildJavaCommand(String extraClassPath) throws IOException {
     List<String> cmd = new ArrayList<>();
 
-    String[] candidateJavaHomes = new String[] {
-      javaHome,
+    String firstJavaHome = firstNonEmpty(javaHome,
       childEnv.get("JAVA_HOME"),
       System.getenv("JAVA_HOME"),
-      System.getProperty("java.home")
-    };
-    for (String javaHome : candidateJavaHomes) {
-      if (javaHome != null) {
-        cmd.add(join(File.separator, javaHome, "bin", "java"));
-        break;
-      }
+      System.getProperty("java.home"));
+
+    if (firstJavaHome != null) {
+      cmd.add(join(File.separator, firstJavaHome, "bin", "java"));
     }
 
     // Load extra JAVA_OPTS from conf/java-opts, if it exists.

From 86c1cfc5791dae5f2ee8ccd5095dbeb2243baba6 Mon Sep 17 00:00:00 2001
From: Yuanjian Li <yuanjian.li@databricks.com>
Date: Thu, 24 Dec 2020 12:44:37 +0900
Subject: [PATCH 0871/1009] [SPARK-33659][SS] Document the current behavior for
 DataStreamWriter.toTable API

### What changes were proposed in this pull request?
Follow up work for #30521, document the following behaviors in the API doc:

- Figure out the effects when configurations are (provider/partitionBy) conflicting with the existing table.
- Document the lack of functionality on creating a v2 table, and guide that the users should ensure a table is created in prior to avoid the behavior unintended/insufficient table is being created.

### Why are the changes needed?
We didn't have full support for the V2 table created in the API now. (TODO SPARK-33638)

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Document only.

Closes #30885 from xuanyuanking/SPARK-33659.

Authored-by: Yuanjian Li <yuanjian.li@databricks.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/sql/streaming.py                    | 13 ++++++++++---
 .../spark/sql/streaming/DataStreamWriter.scala     | 14 ++++++++++++--
 .../streaming/test/DataStreamTableAPISuite.scala   |  6 +++---
 3 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index 5f122293f4a0a..51941a6269074 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -1498,8 +1498,7 @@ def toTable(self, tableName, format=None, outputMode=None, partitionBy=None, que
         Starts the execution of the streaming query, which will continually output results to the
         given table as new data arrives.
 
-        A new table will be created if the table not exists. The returned
-        :class:`StreamingQuery` object can be used to interact with the stream.
+        The returned :class:`StreamingQuery` object can be used to interact with the stream.
 
         .. versionadded:: 3.1.0
 
@@ -1531,6 +1530,15 @@ def toTable(self, tableName, format=None, outputMode=None, partitionBy=None, que
         -----
         This API is evolving.
 
+        For v1 table, partitioning columns provided by `partitionBy` will be respected no matter
+        the table exists or not. A new table will be created if the table not exists.
+
+        For v2 table, `partitionBy` will be ignored if the table already exists. `partitionBy` will
+        be respected only if the v2 table does not exist. Besides, the v2 table created by this API
+        lacks some functionalities (e.g., customized properties, options, and serde info). If you
+        need them, please create the v2 table manually before the execution to avoid creating a
+        table with incomplete information.
+
         Examples
         --------
         >>> sdf.writeStream.format('parquet').queryName('query').toTable('output_table')
@@ -1543,7 +1551,6 @@ def toTable(self, tableName, format=None, outputMode=None, partitionBy=None, que
         ...     format='parquet',
         ...     checkpointLocation='/tmp/checkpoint') # doctest: +SKIP
         """
-        # TODO(SPARK-33659): document the current behavior for DataStreamWriter.toTable API
         self.options(**options)
         if outputMode is not None:
             self.outputMode(outputMode)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index 2703119ce1167..1be09e0e5f97e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -302,11 +302,21 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
 
   /**
    * Starts the execution of the streaming query, which will continually output results to the given
-   * table as new data arrives. A new table will be created if the table not exists. The returned
-   * [[StreamingQuery]] object can be used to interact with the stream.
+   * table as new data arrives. The returned [[StreamingQuery]] object can be used to interact with
+   * the stream.
+   *
+   * For v1 table, partitioning columns provided by `partitionBy` will be respected no matter the
+   * table exists or not. A new table will be created if the table not exists.
+   *
+   * For v2 table, `partitionBy` will be ignored if the table already exists. `partitionBy` will be
+   * respected only if the v2 table does not exist. Besides, the v2 table created by this API lacks
+   * some functionalities (e.g., customized properties, options, and serde info). If you need them,
+   * please create the v2 table manually before the execution to avoid creating a table with
+   * incomplete information.
    *
    * @since 3.1.0
    */
+  @Evolving
   @throws[TimeoutException]
   def toTable(tableName: String): StreamingQuery = {
     this.tableName = tableName
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
index 9cf649605ed1c..4c5c5e63cecb6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
@@ -275,7 +275,7 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
       val tableName = "stream_test"
       withTable(tableName) {
         // The file written by batch will not be seen after the table was written by a streaming
-        // query. This is because we loads files from the metadata log instead of listing them
+        // query. This is because we load files from the metadata log instead of listing them
         // using HDFS API.
         Seq(4, 5, 6).toDF("value").write.format("parquet")
           .option("path", dir.getCanonicalPath).saveAsTable(tableName)
@@ -289,7 +289,7 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
     val tableName = "stream_test"
     withTable(tableName) {
       // The file written by batch will not be seen after the table was written by a streaming
-      // query. This is because we loads files from the metadata log instead of listing them
+      // query. This is because we load files from the metadata log instead of listing them
       // using HDFS API.
       Seq(4, 5, 6).toDF("value").write.format("parquet").saveAsTable(tableName)
 
@@ -302,7 +302,7 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
       val tableName = "stream_test"
       withTable(tableName) {
         // The file written by batch will not be seen after the table was written by a streaming
-        // query. This is because we loads files from the metadata log instead of listing them
+        // query. This is because we load files from the metadata log instead of listing them
         // using HDFS API.
         Seq(4, 5, 6).toDF("value").write
           .mode("append").format("parquet").save(dir.getCanonicalPath)

From f1d37972910d94c713c6a7cb7bd6ea2b52576d00 Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Thu, 24 Dec 2020 05:21:39 +0000
Subject: [PATCH 0872/1009] [SPARK-33886][SQL] UnresolvedTable should retain
 SQL text position for DDL commands

### What changes were proposed in this pull request?

Currently, there are many DDL commands where the position of the unresolved identifiers are incorrect:
```
scala> sql("MSCK REPAIR TABLE unknown")
org.apache.spark.sql.AnalysisException: Table not found: unknown; line 1 pos 0;
```
, whereas the `pos` should be 18.

This PR proposes to fix this issue for commands using `UnresolvedTable`:
```
MSCK REPAIR TABLE t
LOAD DATA LOCAL INPATH 'filepath' INTO TABLE t
TRUNCATE TABLE t
SHOW PARTITIONS t
ALTER TABLE t RECOVER PARTITIONS
ALTER TABLE t ADD PARTITION (p=1)
ALTER TABLE t PARTITION (p=1) RENAME TO PARTITION (p=2)
ALTER TABLE t DROP PARTITION (p=1)
ALTER TABLE t SET SERDEPROPERTIES ('a'='b')
COMMENT ON TABLE t IS 'hello'"
```

### Why are the changes needed?

To fix a bug.

### Does this PR introduce _any_ user-facing change?

Yes, now the above example will print the following:
```
org.apache.spark.sql.AnalysisException: Table not found: unknown; line 1 pos 18;
```

### How was this patch tested?

Add a new suite of tests.

Closes #30900 from imback82/position_Fix.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/parser/AstBuilder.scala      | 41 ++++++++++--------
 .../AnalysisExceptionPositionSuite.scala      | 43 +++++++++++++++++++
 2 files changed, 67 insertions(+), 17 deletions(-)
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExceptionPositionSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 2af84fa079d97..9ac7b06d0a132 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -2160,6 +2160,15 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
     }
   }
 
+  /**
+   * Create an [[UnresolvedTable]] from a multi-part identifier context.
+   */
+  private def createUnresolvedTable(
+      ctx: MultipartIdentifierContext,
+      commandName: String): LogicalPlan = withOrigin(ctx) {
+    UnresolvedTable(visitMultipartIdentifier(ctx), commandName)
+  }
+
   /**
    * Create a [[CalendarInterval]] literal expression. Two syntaxes are supported:
    * - multiple unit value pairs, for instance: interval 2 months 2 days.
@@ -3616,8 +3625,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    * }}}
    */
   override def visitRepairTable(ctx: RepairTableContext): LogicalPlan = withOrigin(ctx) {
-    RepairTable(
-      UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier()), "MSCK REPAIR TABLE"))
+    RepairTable(createUnresolvedTable(ctx.multipartIdentifier, "MSCK REPAIR TABLE"))
   }
 
   /**
@@ -3631,7 +3639,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    */
   override def visitLoadData(ctx: LoadDataContext): LogicalPlan = withOrigin(ctx) {
     LoadData(
-      child = UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier), "LOAD DATA"),
+      child = createUnresolvedTable(ctx.multipartIdentifier, "LOAD DATA"),
       path = string(ctx.path),
       isLocal = ctx.LOCAL != null,
       isOverwrite = ctx.OVERWRITE != null,
@@ -3699,7 +3707,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    */
   override def visitTruncateTable(ctx: TruncateTableContext): LogicalPlan = withOrigin(ctx) {
     TruncateTable(
-      UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier), "TRUNCATE TABLE"),
+      createUnresolvedTable(ctx.multipartIdentifier, "TRUNCATE TABLE"),
       Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec))
   }
 
@@ -3719,7 +3727,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
       UnresolvedPartitionSpec(visitNonOptionalPartitionSpec(specCtx), None)
     }
     ShowPartitions(
-      UnresolvedTable(visitMultipartIdentifier(ctx.multipartIdentifier()), "SHOW PARTITIONS"),
+      createUnresolvedTable(ctx.multipartIdentifier(), "SHOW PARTITIONS"),
       partitionKeys)
   }
 
@@ -3772,8 +3780,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
   override def visitRecoverPartitions(
       ctx: RecoverPartitionsContext): LogicalPlan = withOrigin(ctx) {
     AlterTableRecoverPartitions(
-      UnresolvedTable(
-        visitMultipartIdentifier(ctx.multipartIdentifier),
+      createUnresolvedTable(
+        ctx.multipartIdentifier,
         "ALTER TABLE ... RECOVER PARTITIONS"))
   }
 
@@ -3801,8 +3809,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
       UnresolvedPartitionSpec(spec, location)
     }
     AlterTableAddPartition(
-      UnresolvedTable(
-        visitMultipartIdentifier(ctx.multipartIdentifier),
+      createUnresolvedTable(
+        ctx.multipartIdentifier,
         "ALTER TABLE ... ADD PARTITION ..."),
       specsAndLocs.toSeq,
       ctx.EXISTS != null)
@@ -3819,8 +3827,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
   override def visitRenameTablePartition(
       ctx: RenameTablePartitionContext): LogicalPlan = withOrigin(ctx) {
     AlterTableRenamePartition(
-      UnresolvedTable(
-        visitMultipartIdentifier(ctx.multipartIdentifier),
+      createUnresolvedTable(
+        ctx.multipartIdentifier,
         "ALTER TABLE ... RENAME TO PARTITION"),
       UnresolvedPartitionSpec(visitNonOptionalPartitionSpec(ctx.from)),
       visitNonOptionalPartitionSpec(ctx.to))
@@ -3847,8 +3855,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
     val partSpecs = ctx.partitionSpec.asScala.map(visitNonOptionalPartitionSpec)
       .map(spec => UnresolvedPartitionSpec(spec))
     AlterTableDropPartition(
-      UnresolvedTable(
-        visitMultipartIdentifier(ctx.multipartIdentifier),
+      createUnresolvedTable(
+        ctx.multipartIdentifier,
         "ALTER TABLE ... DROP PARTITION ..."),
       partSpecs.toSeq,
       ifExists = ctx.EXISTS != null,
@@ -3867,8 +3875,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    */
   override def visitSetTableSerDe(ctx: SetTableSerDeContext): LogicalPlan = withOrigin(ctx) {
     AlterTableSerDeProperties(
-      UnresolvedTable(
-        visitMultipartIdentifier(ctx.multipartIdentifier),
+      createUnresolvedTable(
+        ctx.multipartIdentifier,
         "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]"),
       Option(ctx.STRING).map(string),
       Option(ctx.tablePropertyList).map(visitPropertyKeyValues),
@@ -4084,7 +4092,6 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
       case SqlBaseParser.NULL => ""
       case _ => string(ctx.STRING)
     }
-    val nameParts = visitMultipartIdentifier(ctx.multipartIdentifier)
-    CommentOnTable(UnresolvedTable(nameParts, "COMMENT ON TABLE"), comment)
+    CommentOnTable(createUnresolvedTable(ctx.multipartIdentifier, "COMMENT ON TABLE"), comment)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExceptionPositionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExceptionPositionSuite.scala
new file mode 100644
index 0000000000000..276cb4b5987f6
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExceptionPositionSuite.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
+
+class AnalysisExceptionPositionSuite extends AnalysisTest {
+  test("SPARK-33886: UnresolvedTable should retain sql text position") {
+    verifyTablePosition("MSCK REPAIR TABLE unknown", "unknown")
+    verifyTablePosition("LOAD DATA LOCAL INPATH 'filepath' INTO TABLE unknown", "unknown")
+    verifyTablePosition("TRUNCATE TABLE unknown", "unknown")
+    verifyTablePosition("SHOW PARTITIONS unknown", "unknown")
+    verifyTablePosition("ALTER TABLE unknown RECOVER PARTITIONS", "unknown")
+    verifyTablePosition("ALTER TABLE unknown ADD PARTITION (p=1)", "unknown")
+    verifyTablePosition("ALTER TABLE unknown PARTITION (p=1) RENAME TO PARTITION (p=2)", "unknown")
+    verifyTablePosition("ALTER TABLE unknown DROP PARTITION (p=1)", "unknown")
+    verifyTablePosition("ALTER TABLE unknown SET SERDEPROPERTIES ('a'='b')", "unknown")
+    verifyTablePosition("COMMENT ON TABLE unknown IS 'hello'", "unknown")
+  }
+
+  private def verifyTablePosition(sql: String, table: String): Unit = {
+    val expectedPos = sql.indexOf(table)
+    assert(expectedPos != -1)
+    assertAnalysisError(
+      parsePlan(sql),
+      Seq(s"Table not found: $table; line 1 pos $expectedPos"))
+  }
+}

From d7dc42d5f6bbe861c7e4ac1bb49e0830af5e19f4 Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Thu, 24 Dec 2020 07:40:38 +0000
Subject: [PATCH 0873/1009] [SPARK-33895][SQL] Char and Varchar fail in
 MetaOperation of ThriftServer

### What changes were proposed in this pull request?

```
Caused by: java.lang.IllegalArgumentException: Unrecognized type name: CHAR(10)
	at org.apache.spark.sql.hive.thriftserver.SparkGetColumnsOperation.toJavaSQLType(SparkGetColumnsOperation.scala:187)
	at org.apache.spark.sql.hive.thriftserver.SparkGetColumnsOperation.$anonfun$addToRowSet$1(SparkGetColumnsOperation.scala:203)
	at scala.collection.immutable.List.foreach(List.scala:392)
	at org.apache.spark.sql.hive.thriftserver.SparkGetColumnsOperation.addToRowSet(SparkGetColumnsOperation.scala:195)
	at org.apache.spark.sql.hive.thriftserver.SparkGetColumnsOperation.$anonfun$runInternal$4(SparkGetColumnsOperation.scala:99)
	at org.apache.spark.sql.hive.thriftserver.SparkGetColumnsOperation.$anonfun$runInternal$4$adapted(SparkGetColumnsOperation.scala:98)
	at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
	at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
```

meta operation is targeting raw table schema, we need to handle these types there.

### Why are the changes needed?

bugfix, see the above case
### Does this PR introduce _any_ user-facing change?

no
### How was this patch tested?

new tests

locally

![image](https://user-images.githubusercontent.com/8326978/103069196-cdfcc480-45f9-11eb-9c6a-d4c42123c6e3.png)

Closes #30914 from yaooqinn/SPARK-33895.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../hive/thriftserver/SparkGetColumnsOperation.scala   |  3 +++
 .../hive/thriftserver/SparkGetTypeInfoOperation.scala  |  2 +-
 .../thriftserver/SparkMetadataOperationSuite.scala     | 10 +++++++---
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala
index 66e6cf82922b7..1f9c05c330ace 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala
@@ -133,6 +133,7 @@ private[hive] class SparkGetColumnsOperation(
     case dt @ (BooleanType | _: NumericType | DateType | TimestampType |
                CalendarIntervalType | NullType) =>
       Some(dt.defaultSize)
+    case CharType(n) => Some(n)
     case StructType(fields) =>
       val sizeArr = fields.map(f => getColumnSize(f.dataType))
       if (sizeArr.contains(None)) {
@@ -176,6 +177,8 @@ private[hive] class SparkGetColumnsOperation(
     case DoubleType => java.sql.Types.DOUBLE
     case _: DecimalType => java.sql.Types.DECIMAL
     case StringType => java.sql.Types.VARCHAR
+    case VarcharType(_) => java.sql.Types.VARCHAR
+    case CharType(_) => java.sql.Types.CHAR
     case BinaryType => java.sql.Types.BINARY
     case DateType => java.sql.Types.DATE
     case TimestampType => java.sql.Types.TIMESTAMP
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala
index 26b5f8ad8cee1..bd6feeaff08e8 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala
@@ -99,6 +99,6 @@ private[hive] object SparkGetTypeInfoUtil {
       TINYINT_TYPE, SMALLINT_TYPE, INT_TYPE, BIGINT_TYPE,
       FLOAT_TYPE, DOUBLE_TYPE, DECIMAL_TYPE,
       DATE_TYPE, TIMESTAMP_TYPE,
-      ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE)
+      ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE, CHAR_TYPE, VARCHAR_TYPE)
   }
 }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
index bb7448293f559..897ea00975a05 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
@@ -283,6 +283,8 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase {
       .add("c14", "timestamp", nullable = false, "14")
       .add("c15", "struct<X: bigint,Y: double>", nullable = true, "15")
       .add("c16", "binary", nullable = false, "16")
+      .add("c17", "char(255)", nullable = true, "17")
+      .add("c18", "varchar(1024)", nullable = false, "18")
 
     val ddl =
       s"""
@@ -299,7 +301,8 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase {
 
       import java.sql.Types._
       val expectedJavaTypes = Seq(BOOLEAN, TINYINT, SMALLINT, INTEGER, BIGINT, FLOAT, DOUBLE,
-        DECIMAL, DECIMAL, VARCHAR, ARRAY, ARRAY, JAVA_OBJECT, DATE, TIMESTAMP, STRUCT, BINARY)
+        DECIMAL, DECIMAL, VARCHAR, ARRAY, ARRAY, JAVA_OBJECT, DATE, TIMESTAMP, STRUCT, BINARY,
+        CHAR, VARCHAR)
 
       var pos = 0
 
@@ -313,7 +316,8 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase {
 
         val colSize = rowSet.getInt("COLUMN_SIZE")
         schema(pos).dataType match {
-          case StringType | BinaryType | _: ArrayType | _: MapType => assert(colSize === 0)
+          case StringType | BinaryType | _: ArrayType | _: MapType | _: VarcharType =>
+            assert(colSize === 0)
           case o => assert(colSize === o.defaultSize)
         }
 
@@ -342,7 +346,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase {
         pos += 1
       }
 
-      assert(pos === 17, "all columns should have been verified")
+      assert(pos === 19, "all columns should have been verified")
     }
   }
 

From 32d4a2b06220861efda1058b26d9a2ed3a1b2c74 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Thu, 24 Dec 2020 08:10:28 +0000
Subject: [PATCH 0874/1009] [SPARK-33861][SQL] Simplify conditional in
 predicate

### What changes were proposed in this pull request?

This pr simplify conditional in predicate, after this change we can push down the filter to datasource:

Expression | After simplify
-- | --
IF(cond, trueVal, false)                   | AND(cond, trueVal)
IF(cond, trueVal, true)                    | OR(NOT(cond), trueVal)
IF(cond, false, falseVal)                  | AND(NOT(cond), elseVal)
IF(cond, true, falseVal)                   | OR(cond, elseVal)
CASE WHEN cond THEN trueVal ELSE false END | AND(cond, trueVal)
CASE WHEN cond THEN trueVal END            | AND(cond, trueVal)
CASE WHEN cond THEN trueVal ELSE null END  | AND(cond, trueVal)
CASE WHEN cond THEN trueVal ELSE true END  | OR(NOT(cond), trueVal)
CASE WHEN cond THEN false ELSE elseVal END | AND(NOT(cond), elseVal)
CASE WHEN cond THEN false END              | false
CASE WHEN cond THEN true ELSE elseVal END  | OR(cond, elseVal)
CASE WHEN cond THEN true END               | cond

### Why are the changes needed?

Improve query performance.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #30865 from wangyum/SPARK-33861.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/optimizer/Optimizer.scala    |   1 +
 .../SimplifyConditionalsInPredicate.scala     |  82 ++++++
 ...SimplifyConditionalsInPredicateSuite.scala | 237 ++++++++++++++++++
 .../q34.sf100/explain.txt                     |  12 +-
 .../q34.sf100/simplified.txt                  |   2 +-
 .../approved-plans-modified/q34/explain.txt   |   8 +-
 .../q34/simplified.txt                        |   2 +-
 .../q73.sf100/explain.txt                     |   8 +-
 .../q73.sf100/simplified.txt                  |   2 +-
 .../approved-plans-modified/q73/explain.txt   |   8 +-
 .../q73/simplified.txt                        |   2 +-
 .../approved-plans-v1_4/q34.sf100/explain.txt |  12 +-
 .../q34.sf100/simplified.txt                  |   2 +-
 .../approved-plans-v1_4/q34/explain.txt       |   8 +-
 .../approved-plans-v1_4/q34/simplified.txt    |   2 +-
 .../approved-plans-v1_4/q73.sf100/explain.txt |  12 +-
 .../q73.sf100/simplified.txt                  |   2 +-
 .../approved-plans-v1_4/q73/explain.txt       |   8 +-
 .../approved-plans-v1_4/q73/simplified.txt    |   2 +-
 .../approved-plans-v2_7/q34.sf100/explain.txt |  12 +-
 .../q34.sf100/simplified.txt                  |   2 +-
 .../approved-plans-v2_7/q34/explain.txt       |   8 +-
 .../approved-plans-v2_7/q34/simplified.txt    |   2 +-
 23 files changed, 378 insertions(+), 58 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 61bcf9038b845..7b9b99bba5574 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -103,6 +103,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
         RemoveDispensableExpressions,
         SimplifyBinaryComparison,
         ReplaceNullWithFalseInPredicate,
+        SimplifyConditionalsInPredicate,
         PruneFilters,
         SimplifyCasts,
         SimplifyCaseConversionExpressions,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala
new file mode 100644
index 0000000000000..1ea85085bccdb
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.expressions.{And, CaseWhen, Expression, If, Literal, Not, Or}
+import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.types.BooleanType
+
+/**
+ * A rule that converts conditional expressions to predicate expressions, if possible, in the
+ * search condition of the WHERE/HAVING/ON(JOIN) clauses, which contain an implicit Boolean operator
+ * "(search condition) = TRUE". After this converting, we can potentially push the filter down to
+ * the data source.
+ *
+ * Supported cases are:
+ * - IF(cond, trueVal, false)                   => AND(cond, trueVal)
+ * - IF(cond, trueVal, true)                    => OR(NOT(cond), trueVal)
+ * - IF(cond, false, falseVal)                  => AND(NOT(cond), elseVal)
+ * - IF(cond, true, falseVal)                   => OR(cond, elseVal)
+ * - CASE WHEN cond THEN trueVal ELSE false END => AND(cond, trueVal)
+ * - CASE WHEN cond THEN trueVal END            => AND(cond, trueVal)
+ * - CASE WHEN cond THEN trueVal ELSE null END  => AND(cond, trueVal)
+ * - CASE WHEN cond THEN trueVal ELSE true END  => OR(NOT(cond), trueVal)
+ * - CASE WHEN cond THEN false ELSE elseVal END => AND(NOT(cond), elseVal)
+ * - CASE WHEN cond THEN false END              => false
+ * - CASE WHEN cond THEN true ELSE elseVal END  => OR(cond, elseVal)
+ * - CASE WHEN cond THEN true END               => cond
+ */
+object SimplifyConditionalsInPredicate extends Rule[LogicalPlan] {
+
+  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+    case f @ Filter(cond, _) => f.copy(condition = simplifyConditional(cond))
+    case j @ Join(_, _, _, Some(cond), _) => j.copy(condition = Some(simplifyConditional(cond)))
+    case d @ DeleteFromTable(_, Some(cond)) => d.copy(condition = Some(simplifyConditional(cond)))
+    case u @ UpdateTable(_, _, Some(cond)) => u.copy(condition = Some(simplifyConditional(cond)))
+  }
+
+  private def simplifyConditional(e: Expression): Expression = e match {
+    case And(left, right) => And(simplifyConditional(left), simplifyConditional(right))
+    case Or(left, right) => Or(simplifyConditional(left), simplifyConditional(right))
+    case If(cond, trueValue, FalseLiteral) => And(cond, trueValue)
+    case If(cond, trueValue, TrueLiteral) => Or(Not(cond), trueValue)
+    case If(cond, FalseLiteral, falseValue) => And(Not(cond), falseValue)
+    case If(cond, TrueLiteral, falseValue) => Or(cond, falseValue)
+    case CaseWhen(Seq((cond, trueValue)),
+        Some(FalseLiteral) | Some(Literal(null, BooleanType)) | None) =>
+      And(cond, trueValue)
+    case CaseWhen(Seq((cond, trueValue)), Some(TrueLiteral)) =>
+      Or(Not(cond), trueValue)
+    case CaseWhen(Seq((_, FalseLiteral)), Some(FalseLiteral) | None) =>
+      FalseLiteral
+    case CaseWhen(Seq((cond, FalseLiteral)), Some(elseValue)) =>
+      And(Not(cond), elseValue)
+    case CaseWhen(Seq((cond, TrueLiteral)), Some(FalseLiteral) | None) =>
+      cond
+    case CaseWhen(Seq((cond, TrueLiteral)), Some(elseValue)) =>
+      Or(cond, elseValue)
+    case e if e.dataType == BooleanType => e
+    case e =>
+      assert(e.dataType != BooleanType,
+      "Expected a Boolean type expression in SimplifyConditionalsInPredicate, " +
+        s"but got the type `${e.dataType.catalogString}` in `${e.sql}`.")
+      e
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala
new file mode 100644
index 0000000000000..1f3c24bdbb664
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala
@@ -0,0 +1,237 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions.{And, CaseWhen, Expression, If, IsNotNull, Literal, Or}
+import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
+import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest}
+import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, LocalRelation, LogicalPlan, UpdateTable}
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.types.{BooleanType, IntegerType}
+
+class SimplifyConditionalsInPredicateSuite extends PlanTest {
+
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches =
+      Batch("SimplifyConditionalsInPredicate", FixedPoint(10),
+        NullPropagation,
+        ConstantFolding,
+        BooleanSimplification,
+        SimplifyConditionals,
+        SimplifyConditionalsInPredicate) :: Nil
+  }
+
+  private val testRelation =
+    LocalRelation('i.int, 'b.boolean, 'a.array(IntegerType), 'm.map(IntegerType, IntegerType))
+  private val anotherTestRelation = LocalRelation('d.int)
+
+  test("IF(cond, trueVal, false) => AND(cond, trueVal)") {
+    val originalCond = If(
+      UnresolvedAttribute("i") > Literal(10),
+      UnresolvedAttribute("b"),
+      FalseLiteral)
+    val expectedCond = And(
+      UnresolvedAttribute("i") > Literal(10),
+      UnresolvedAttribute("b"))
+    testFilter(originalCond, expectedCond = expectedCond)
+    testJoin(originalCond, expectedCond = expectedCond)
+    testDelete(originalCond, expectedCond = expectedCond)
+    testUpdate(originalCond, expectedCond = expectedCond)
+    testProjection(originalCond, expectedExpr = originalCond)
+  }
+
+  test("IF(cond, trueVal, true) => OR(NOT(cond), trueVal)") {
+    val originalCond = If(
+      UnresolvedAttribute("i") > Literal(10),
+      UnresolvedAttribute("b"),
+      TrueLiteral)
+    val expectedCond = Or(
+      UnresolvedAttribute("i") <= Literal(10),
+      UnresolvedAttribute("b"))
+    testFilter(originalCond, expectedCond = expectedCond)
+    testJoin(originalCond, expectedCond = expectedCond)
+    testDelete(originalCond, expectedCond = expectedCond)
+    testUpdate(originalCond, expectedCond = expectedCond)
+    testProjection(originalCond, expectedExpr = originalCond)
+  }
+
+  test("IF(cond, false, falseVal) => AND(NOT(cond), elseVal)") {
+    val originalCond = If(
+      UnresolvedAttribute("i") > Literal(10),
+      FalseLiteral,
+      UnresolvedAttribute("b"))
+    val expectedCond = And(
+      UnresolvedAttribute("i") <= Literal(10),
+      UnresolvedAttribute("b"))
+    testFilter(originalCond, expectedCond = expectedCond)
+    testJoin(originalCond, expectedCond = expectedCond)
+    testDelete(originalCond, expectedCond = expectedCond)
+    testUpdate(originalCond, expectedCond = expectedCond)
+    testProjection(originalCond, expectedExpr = originalCond)
+  }
+
+  test("IF(cond, true, falseVal) => OR(cond, elseVal)") {
+    val originalCond = If(
+      UnresolvedAttribute("i") > Literal(10),
+      TrueLiteral,
+      UnresolvedAttribute("b"))
+    val expectedCond = Or(
+      UnresolvedAttribute("i") > Literal(10),
+      UnresolvedAttribute("b"))
+    testFilter(originalCond, expectedCond = expectedCond)
+    testJoin(originalCond, expectedCond = expectedCond)
+    testDelete(originalCond, expectedCond = expectedCond)
+    testUpdate(originalCond, expectedCond = expectedCond)
+    testProjection(originalCond, expectedExpr = originalCond)
+  }
+
+  test("CASE WHEN cond THEN trueVal ELSE false END => AND(cond, trueVal)") {
+    Seq(Some(FalseLiteral), None, Some(Literal(null, BooleanType))).foreach { elseExp =>
+      val originalCond = CaseWhen(
+        Seq((UnresolvedAttribute("i") > Literal(10), UnresolvedAttribute("b"))),
+        elseExp)
+      val expectedCond = And(
+        UnresolvedAttribute("i") > Literal(10),
+        UnresolvedAttribute("b"))
+      testFilter(originalCond, expectedCond = expectedCond)
+      testJoin(originalCond, expectedCond = expectedCond)
+      testDelete(originalCond, expectedCond = expectedCond)
+      testUpdate(originalCond, expectedCond = expectedCond)
+      testProjection(originalCond, expectedExpr = originalCond)
+    }
+  }
+
+  test("CASE WHEN cond THEN trueVal ELSE true END => OR(NOT(cond), trueVal)") {
+    val originalCond = CaseWhen(
+      Seq((UnresolvedAttribute("i") > Literal(10), UnresolvedAttribute("b"))),
+      TrueLiteral)
+    val expectedCond = Or(
+      UnresolvedAttribute("i") <= Literal(10),
+      UnresolvedAttribute("b"))
+    testFilter(originalCond, expectedCond = expectedCond)
+    testJoin(originalCond, expectedCond = expectedCond)
+    testDelete(originalCond, expectedCond = expectedCond)
+    testUpdate(originalCond, expectedCond = expectedCond)
+    testProjection(originalCond, expectedExpr = originalCond)
+  }
+
+  test("CASE WHEN cond THEN false ELSE elseVal END => AND(NOT(cond), elseVal)") {
+    val originalCond = CaseWhen(
+      Seq((UnresolvedAttribute("i") > Literal(10), FalseLiteral)),
+      UnresolvedAttribute("b"))
+    val expectedCond = And(
+      UnresolvedAttribute("i") <= Literal(10),
+      UnresolvedAttribute("b"))
+    testFilter(originalCond, expectedCond = expectedCond)
+    testJoin(originalCond, expectedCond = expectedCond)
+    testDelete(originalCond, expectedCond = expectedCond)
+    testUpdate(originalCond, expectedCond = expectedCond)
+    testProjection(originalCond, expectedExpr = originalCond)
+  }
+
+  test("CASE WHEN cond THEN false END => false") {
+    val originalCond = CaseWhen(
+      Seq((UnresolvedAttribute("i") > Literal(10), FalseLiteral)))
+    testFilter(originalCond, expectedCond = FalseLiteral)
+    testJoin(originalCond, expectedCond = FalseLiteral)
+    testDelete(originalCond, expectedCond = FalseLiteral)
+    testUpdate(originalCond, expectedCond = FalseLiteral)
+    testProjection(originalCond, expectedExpr = originalCond)
+  }
+
+  test("CASE WHEN cond THEN true ELSE elseVal END  => OR(cond, elseVal)") {
+    val originalCond = CaseWhen(
+      Seq((UnresolvedAttribute("i") > Literal(10), TrueLiteral)),
+      UnresolvedAttribute("b"))
+    val expectedCond = Or(
+      UnresolvedAttribute("i") > Literal(10),
+      UnresolvedAttribute("b"))
+    testFilter(originalCond, expectedCond = expectedCond)
+    testJoin(originalCond, expectedCond = expectedCond)
+    testDelete(originalCond, expectedCond = expectedCond)
+    testUpdate(originalCond, expectedCond = expectedCond)
+    testProjection(originalCond, expectedExpr = originalCond)
+  }
+
+  test("CASE WHEN cond THEN true END => cond") {
+    val originalCond = CaseWhen(
+      Seq((UnresolvedAttribute("i") > Literal(10), TrueLiteral)))
+    val expectedCond = UnresolvedAttribute("i") > Literal(10)
+    testFilter(originalCond, expectedCond = expectedCond)
+    testJoin(originalCond, expectedCond = expectedCond)
+    testDelete(originalCond, expectedCond = expectedCond)
+    testUpdate(originalCond, expectedCond = expectedCond)
+    testProjection(originalCond, expectedExpr = originalCond)
+  }
+
+  test("Simplify conditional in conditions of CaseWhen inside another CaseWhen") {
+    val nestedCaseWhen = CaseWhen(
+      Seq((UnresolvedAttribute("i") > Literal(10)) -> UnresolvedAttribute("b")),
+      FalseLiteral)
+    val originalCond = CaseWhen(Seq(IsNotNull(nestedCaseWhen) -> FalseLiteral))
+    val expectedCond = FalseLiteral
+
+    testFilter(originalCond, expectedCond = expectedCond)
+    testJoin(originalCond, expectedCond = expectedCond)
+    testDelete(originalCond, expectedCond = expectedCond)
+    testUpdate(originalCond, expectedCond = expectedCond)
+    testProjection(originalCond, expectedExpr = originalCond)
+  }
+
+  test("Not expected type - SimplifyConditionalsInPredicate") {
+    val e = intercept[AnalysisException] {
+      testFilter(originalCond = Literal(null, IntegerType), expectedCond = FalseLiteral)
+    }.getMessage
+    assert(e.contains("'CAST(NULL AS INT)' of type int is not a boolean"))
+  }
+
+  private def testFilter(originalCond: Expression, expectedCond: Expression): Unit = {
+    test((rel, exp) => rel.where(exp), originalCond, expectedCond)
+  }
+
+  private def testJoin(originalCond: Expression, expectedCond: Expression): Unit = {
+    test((rel, exp) => rel.join(anotherTestRelation, Inner, Some(exp)), originalCond, expectedCond)
+  }
+
+  private def testProjection(originalExpr: Expression, expectedExpr: Expression): Unit = {
+    test((rel, exp) => rel.select(exp), originalExpr, expectedExpr)
+  }
+
+  private def testDelete(originalCond: Expression, expectedCond: Expression): Unit = {
+    test((rel, expr) => DeleteFromTable(rel, Some(expr)), originalCond, expectedCond)
+  }
+
+  private def testUpdate(originalCond: Expression, expectedCond: Expression): Unit = {
+    test((rel, expr) => UpdateTable(rel, Seq.empty, Some(expr)), originalCond, expectedCond)
+  }
+
+  private def test(
+      func: (LogicalPlan, Expression) => LogicalPlan,
+      originalExpr: Expression,
+      expectedExpr: Expression): Unit = {
+
+    val originalPlan = func(testRelation, originalExpr).analyze
+    val optimizedPlan = Optimize.execute(originalPlan)
+    val expectedPlan = func(testRelation, expectedExpr).analyze
+    comparePlans(optimizedPlan, expectedPlan)
+  }
+}
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/explain.txt
index ac1fca4f67a02..547806128e64a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/explain.txt
@@ -120,7 +120,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5,
 Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
-PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)]
+PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)]
 ReadSchema: struct<hd_demo_sk:int,hd_buy_potential:string,hd_dep_count:int,hd_vehicle_count:int>
 
 (19) ColumnarToRow [codegen id : 3]
@@ -128,7 +128,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun
 
 (20) Filter [codegen id : 3]
 Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16]
-Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.2)) AND isnotnull(hd_demo_sk#13))
+Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.2)) AND isnotnull(hd_demo_sk#13))
 
 (21) Project [codegen id : 3]
 Output [1]: [hd_demo_sk#13]
@@ -156,7 +156,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
 
 (26) Exchange
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
-Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20]
+Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20]
 
 (27) HashAggregate [codegen id : 5]
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
@@ -171,7 +171,7 @@ Condition : ((cnt#22 >= 15) AND (cnt#22 <= 20))
 
 (29) Exchange
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22]
-Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#23]
+Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#23]
 
 (30) Sort [codegen id : 6]
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22]
@@ -193,7 +193,7 @@ Condition : isnotnull(c_customer_sk#24)
 
 (34) Exchange
 Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28]
-Arguments: hashpartitioning(c_customer_sk#24, 5), true, [id=#29]
+Arguments: hashpartitioning(c_customer_sk#24, 5), ENSURE_REQUIREMENTS, [id=#29]
 
 (35) Sort [codegen id : 8]
 Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28]
@@ -210,7 +210,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#24, c_sa
 
 (38) Exchange
 Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22]
-Arguments: rangepartitioning(c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, c_salutation#25 ASC NULLS FIRST, c_preferred_cust_flag#28 DESC NULLS LAST, 5), true, [id=#30]
+Arguments: rangepartitioning(c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, c_salutation#25 ASC NULLS FIRST, c_preferred_cust_flag#28 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#30]
 
 (39) Sort [codegen id : 10]
 Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/simplified.txt
index d9b416ddba9ef..c9945cda67746 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/simplified.txt
@@ -47,7 +47,7 @@ WholeStageCodegen (10)
                                             BroadcastExchange #6
                                               WholeStageCodegen (3)
                                                 Project [hd_demo_sk]
-                                                  Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk]
+                                                  Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk]
                                                     ColumnarToRow
                                                       InputAdapter
                                                         Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/explain.txt
index 898d37403d6a0..74bbb52c55fbc 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/explain.txt
@@ -117,7 +117,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5,
 Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
-PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)]
+PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)]
 ReadSchema: struct<hd_demo_sk:int,hd_buy_potential:string,hd_dep_count:int,hd_vehicle_count:int>
 
 (19) ColumnarToRow [codegen id : 3]
@@ -125,7 +125,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun
 
 (20) Filter [codegen id : 3]
 Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16]
-Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.2)) AND isnotnull(hd_demo_sk#13))
+Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.2)) AND isnotnull(hd_demo_sk#13))
 
 (21) Project [codegen id : 3]
 Output [1]: [hd_demo_sk#13]
@@ -153,7 +153,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
 
 (26) Exchange
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
-Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20]
+Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20]
 
 (27) HashAggregate [codegen id : 6]
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
@@ -195,7 +195,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#23, c_sa
 
 (35) Exchange
 Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22]
-Arguments: rangepartitioning(c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST, 5), true, [id=#29]
+Arguments: rangepartitioning(c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#29]
 
 (36) Sort [codegen id : 7]
 Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/simplified.txt
index 5af07f1d4ddef..4484587f65355 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/simplified.txt
@@ -41,7 +41,7 @@ WholeStageCodegen (7)
                                 BroadcastExchange #5
                                   WholeStageCodegen (3)
                                     Project [hd_demo_sk]
-                                      Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk]
+                                      Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk]
                                         ColumnarToRow
                                           InputAdapter
                                             Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/explain.txt
index 25da173c8ecde..51b480ef64ab2 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/explain.txt
@@ -117,7 +117,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5,
 Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
-PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)]
+PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)]
 ReadSchema: struct<hd_demo_sk:int,hd_buy_potential:string,hd_dep_count:int,hd_vehicle_count:int>
 
 (19) ColumnarToRow [codegen id : 3]
@@ -125,7 +125,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun
 
 (20) Filter [codegen id : 3]
 Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16]
-Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.0)) AND isnotnull(hd_demo_sk#13))
+Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.0)) AND isnotnull(hd_demo_sk#13))
 
 (21) Project [codegen id : 3]
 Output [1]: [hd_demo_sk#13]
@@ -153,7 +153,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
 
 (26) Exchange
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
-Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20]
+Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20]
 
 (27) HashAggregate [codegen id : 5]
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
@@ -195,7 +195,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#24, c_sa
 
 (35) Exchange
 Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22]
-Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), true, [id=#29]
+Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#29]
 
 (36) Sort [codegen id : 7]
 Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/simplified.txt
index 7496388d3430c..8695f9da17114 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/simplified.txt
@@ -44,7 +44,7 @@ WholeStageCodegen (7)
                                       BroadcastExchange #6
                                         WholeStageCodegen (3)
                                           Project [hd_demo_sk]
-                                            Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk]
+                                            Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk]
                                               ColumnarToRow
                                                 InputAdapter
                                                   Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/explain.txt
index e420b656c3ad0..56ad4f4d926eb 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/explain.txt
@@ -117,7 +117,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5,
 Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
-PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)]
+PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(`hd_dep_count`), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,Unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)]
 ReadSchema: struct<hd_demo_sk:int,hd_buy_potential:string,hd_dep_count:int,hd_vehicle_count:int>
 
 (19) ColumnarToRow [codegen id : 3]
@@ -125,7 +125,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun
 
 (20) Filter [codegen id : 3]
 Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16]
-Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.0)) AND isnotnull(hd_demo_sk#13))
+Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = Unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.0)) AND isnotnull(hd_demo_sk#13))
 
 (21) Project [codegen id : 3]
 Output [1]: [hd_demo_sk#13]
@@ -153,7 +153,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
 
 (26) Exchange
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
-Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20]
+Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20]
 
 (27) HashAggregate [codegen id : 6]
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
@@ -195,7 +195,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#23, c_sa
 
 (35) Exchange
 Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22]
-Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), true, [id=#29]
+Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#29]
 
 (36) Sort [codegen id : 7]
 Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/simplified.txt
index 46b7241565719..5e49f6cb603d5 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/simplified.txt
@@ -41,7 +41,7 @@ WholeStageCodegen (7)
                                 BroadcastExchange #5
                                   WholeStageCodegen (3)
                                     Project [hd_demo_sk]
-                                      Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk]
+                                      Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk]
                                         ColumnarToRow
                                           InputAdapter
                                             Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/explain.txt
index 17bb0e7e71d27..6fa9bb85f0b79 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/explain.txt
@@ -120,7 +120,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5,
 Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
-PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)]
+PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)]
 ReadSchema: struct<hd_demo_sk:int,hd_buy_potential:string,hd_dep_count:int,hd_vehicle_count:int>
 
 (19) ColumnarToRow [codegen id : 3]
@@ -128,7 +128,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun
 
 (20) Filter [codegen id : 3]
 Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16]
-Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.2)) AND isnotnull(hd_demo_sk#13))
+Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.2)) AND isnotnull(hd_demo_sk#13))
 
 (21) Project [codegen id : 3]
 Output [1]: [hd_demo_sk#13]
@@ -156,7 +156,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
 
 (26) Exchange
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
-Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20]
+Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20]
 
 (27) HashAggregate [codegen id : 5]
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
@@ -171,7 +171,7 @@ Condition : ((cnt#22 >= 15) AND (cnt#22 <= 20))
 
 (29) Exchange
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22]
-Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#23]
+Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#23]
 
 (30) Sort [codegen id : 6]
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22]
@@ -193,7 +193,7 @@ Condition : isnotnull(c_customer_sk#24)
 
 (34) Exchange
 Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28]
-Arguments: hashpartitioning(c_customer_sk#24, 5), true, [id=#29]
+Arguments: hashpartitioning(c_customer_sk#24, 5), ENSURE_REQUIREMENTS, [id=#29]
 
 (35) Sort [codegen id : 8]
 Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28]
@@ -210,7 +210,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#24, c_sa
 
 (38) Exchange
 Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22]
-Arguments: rangepartitioning(c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, c_salutation#25 ASC NULLS FIRST, c_preferred_cust_flag#28 DESC NULLS LAST, 5), true, [id=#30]
+Arguments: rangepartitioning(c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, c_salutation#25 ASC NULLS FIRST, c_preferred_cust_flag#28 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#30]
 
 (39) Sort [codegen id : 10]
 Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/simplified.txt
index d9b416ddba9ef..c9945cda67746 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/simplified.txt
@@ -47,7 +47,7 @@ WholeStageCodegen (10)
                                             BroadcastExchange #6
                                               WholeStageCodegen (3)
                                                 Project [hd_demo_sk]
-                                                  Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk]
+                                                  Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk]
                                                     ColumnarToRow
                                                       InputAdapter
                                                         Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt
index 18f465caea20d..1aea77422b14f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt
@@ -117,7 +117,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5,
 Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
-PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)]
+PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)]
 ReadSchema: struct<hd_demo_sk:int,hd_buy_potential:string,hd_dep_count:int,hd_vehicle_count:int>
 
 (19) ColumnarToRow [codegen id : 3]
@@ -125,7 +125,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun
 
 (20) Filter [codegen id : 3]
 Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16]
-Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.2)) AND isnotnull(hd_demo_sk#13))
+Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.2)) AND isnotnull(hd_demo_sk#13))
 
 (21) Project [codegen id : 3]
 Output [1]: [hd_demo_sk#13]
@@ -153,7 +153,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
 
 (26) Exchange
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
-Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20]
+Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20]
 
 (27) HashAggregate [codegen id : 6]
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
@@ -195,7 +195,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#23, c_sa
 
 (35) Exchange
 Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22]
-Arguments: rangepartitioning(c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST, 5), true, [id=#29]
+Arguments: rangepartitioning(c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#29]
 
 (36) Sort [codegen id : 7]
 Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt
index 5af07f1d4ddef..4484587f65355 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt
@@ -41,7 +41,7 @@ WholeStageCodegen (7)
                                 BroadcastExchange #5
                                   WholeStageCodegen (3)
                                     Project [hd_demo_sk]
-                                      Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk]
+                                      Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk]
                                         ColumnarToRow
                                           InputAdapter
                                             Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/explain.txt
index 4af604ca3f65f..f88f1f48ac2b7 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/explain.txt
@@ -120,7 +120,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5,
 Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
-PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)]
+PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)]
 ReadSchema: struct<hd_demo_sk:int,hd_buy_potential:string,hd_dep_count:int,hd_vehicle_count:int>
 
 (19) ColumnarToRow [codegen id : 3]
@@ -128,7 +128,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun
 
 (20) Filter [codegen id : 3]
 Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16]
-Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.0)) AND isnotnull(hd_demo_sk#13))
+Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.0)) AND isnotnull(hd_demo_sk#13))
 
 (21) Project [codegen id : 3]
 Output [1]: [hd_demo_sk#13]
@@ -156,7 +156,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
 
 (26) Exchange
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
-Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20]
+Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20]
 
 (27) HashAggregate [codegen id : 5]
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
@@ -171,7 +171,7 @@ Condition : ((cnt#22 >= 1) AND (cnt#22 <= 5))
 
 (29) Exchange
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22]
-Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#23]
+Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#23]
 
 (30) Sort [codegen id : 6]
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22]
@@ -193,7 +193,7 @@ Condition : isnotnull(c_customer_sk#24)
 
 (34) Exchange
 Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28]
-Arguments: hashpartitioning(c_customer_sk#24, 5), true, [id=#29]
+Arguments: hashpartitioning(c_customer_sk#24, 5), ENSURE_REQUIREMENTS, [id=#29]
 
 (35) Sort [codegen id : 8]
 Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28]
@@ -210,7 +210,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#24, c_sa
 
 (38) Exchange
 Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22]
-Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), true, [id=#30]
+Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#30]
 
 (39) Sort [codegen id : 10]
 Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/simplified.txt
index af8527f155c8e..9de2f2ab4cd68 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/simplified.txt
@@ -47,7 +47,7 @@ WholeStageCodegen (10)
                                             BroadcastExchange #6
                                               WholeStageCodegen (3)
                                                 Project [hd_demo_sk]
-                                                  Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk]
+                                                  Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk]
                                                     ColumnarToRow
                                                       InputAdapter
                                                         Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt
index f4565c3edb172..43c73f3c7af61 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt
@@ -117,7 +117,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5,
 Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
-PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)]
+PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)]
 ReadSchema: struct<hd_demo_sk:int,hd_buy_potential:string,hd_dep_count:int,hd_vehicle_count:int>
 
 (19) ColumnarToRow [codegen id : 3]
@@ -125,7 +125,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun
 
 (20) Filter [codegen id : 3]
 Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16]
-Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.0)) AND isnotnull(hd_demo_sk#13))
+Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.0)) AND isnotnull(hd_demo_sk#13))
 
 (21) Project [codegen id : 3]
 Output [1]: [hd_demo_sk#13]
@@ -153,7 +153,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
 
 (26) Exchange
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
-Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20]
+Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20]
 
 (27) HashAggregate [codegen id : 6]
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
@@ -195,7 +195,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#23, c_sa
 
 (35) Exchange
 Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22]
-Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), true, [id=#29]
+Arguments: rangepartitioning(cnt#22 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#29]
 
 (36) Sort [codegen id : 7]
 Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt
index 46b7241565719..5e49f6cb603d5 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt
@@ -41,7 +41,7 @@ WholeStageCodegen (7)
                                 BroadcastExchange #5
                                   WholeStageCodegen (3)
                                     Project [hd_demo_sk]
-                                      Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk]
+                                      Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk]
                                         ColumnarToRow
                                           InputAdapter
                                             Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/explain.txt
index c7b8685b64bea..5d8f0d04161bf 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/explain.txt
@@ -120,7 +120,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5,
 Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
-PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)]
+PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)]
 ReadSchema: struct<hd_demo_sk:int,hd_buy_potential:string,hd_dep_count:int,hd_vehicle_count:int>
 
 (19) ColumnarToRow [codegen id : 3]
@@ -128,7 +128,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun
 
 (20) Filter [codegen id : 3]
 Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16]
-Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.2)) AND isnotnull(hd_demo_sk#13))
+Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.2)) AND isnotnull(hd_demo_sk#13))
 
 (21) Project [codegen id : 3]
 Output [1]: [hd_demo_sk#13]
@@ -156,7 +156,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
 
 (26) Exchange
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
-Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20]
+Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20]
 
 (27) HashAggregate [codegen id : 5]
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
@@ -171,7 +171,7 @@ Condition : ((cnt#22 >= 15) AND (cnt#22 <= 20))
 
 (29) Exchange
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22]
-Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#23]
+Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#23]
 
 (30) Sort [codegen id : 6]
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22]
@@ -193,7 +193,7 @@ Condition : isnotnull(c_customer_sk#24)
 
 (34) Exchange
 Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28]
-Arguments: hashpartitioning(c_customer_sk#24, 5), true, [id=#29]
+Arguments: hashpartitioning(c_customer_sk#24, 5), ENSURE_REQUIREMENTS, [id=#29]
 
 (35) Sort [codegen id : 8]
 Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28]
@@ -210,7 +210,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#24, c_sa
 
 (38) Exchange
 Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22]
-Arguments: rangepartitioning(c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, c_salutation#25 ASC NULLS FIRST, c_preferred_cust_flag#28 DESC NULLS LAST, ss_ticket_number#5 ASC NULLS FIRST, 5), true, [id=#30]
+Arguments: rangepartitioning(c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, c_salutation#25 ASC NULLS FIRST, c_preferred_cust_flag#28 DESC NULLS LAST, ss_ticket_number#5 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#30]
 
 (39) Sort [codegen id : 10]
 Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#5, cnt#22]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/simplified.txt
index 451659e2c617c..244478fd68825 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/simplified.txt
@@ -47,7 +47,7 @@ WholeStageCodegen (10)
                                             BroadcastExchange #6
                                               WholeStageCodegen (3)
                                                 Project [hd_demo_sk]
-                                                  Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk]
+                                                  Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk]
                                                     ColumnarToRow
                                                       InputAdapter
                                                         Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/explain.txt
index 01b5f46bd5dd4..e588993073a91 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/explain.txt
@@ -117,7 +117,7 @@ Input [5]: [ss_customer_sk#2, ss_hdemo_sk#3, ss_store_sk#4, ss_ticket_number#5,
 Output [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
-PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)]
+PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000),EqualTo(hd_buy_potential,unknown)), GreaterThan(hd_vehicle_count,0), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)]
 ReadSchema: struct<hd_demo_sk:int,hd_buy_potential:string,hd_dep_count:int,hd_vehicle_count:int>
 
 (19) ColumnarToRow [codegen id : 3]
@@ -125,7 +125,7 @@ Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_coun
 
 (20) Filter [codegen id : 3]
 Input [4]: [hd_demo_sk#13, hd_buy_potential#14, hd_dep_count#15, hd_vehicle_count#16]
-Condition : ((((isnotnull(hd_vehicle_count#16) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND (CASE WHEN (hd_vehicle_count#16 > 0) THEN (cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) ELSE null END > 1.2)) AND isnotnull(hd_demo_sk#13))
+Condition : (((((isnotnull(hd_vehicle_count#16) AND isnotnull(hd_dep_count#15)) AND ((hd_buy_potential#14 = >10000) OR (hd_buy_potential#14 = unknown))) AND (hd_vehicle_count#16 > 0)) AND ((cast(hd_dep_count#15 as double) / cast(hd_vehicle_count#16 as double)) > 1.2)) AND isnotnull(hd_demo_sk#13))
 
 (21) Project [codegen id : 3]
 Output [1]: [hd_demo_sk#13]
@@ -153,7 +153,7 @@ Results [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
 
 (26) Exchange
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
-Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), true, [id=#20]
+Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20]
 
 (27) HashAggregate [codegen id : 6]
 Input [3]: [ss_ticket_number#5, ss_customer_sk#2, count#19]
@@ -195,7 +195,7 @@ Input [8]: [ss_ticket_number#5, ss_customer_sk#2, cnt#22, c_customer_sk#23, c_sa
 
 (35) Exchange
 Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22]
-Arguments: rangepartitioning(c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST, ss_ticket_number#5 ASC NULLS FIRST, 5), true, [id=#29]
+Arguments: rangepartitioning(c_last_name#26 ASC NULLS FIRST, c_first_name#25 ASC NULLS FIRST, c_salutation#24 ASC NULLS FIRST, c_preferred_cust_flag#27 DESC NULLS LAST, ss_ticket_number#5 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#29]
 
 (36) Sort [codegen id : 7]
 Input [6]: [c_last_name#26, c_first_name#25, c_salutation#24, c_preferred_cust_flag#27, ss_ticket_number#5, cnt#22]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/simplified.txt
index 8aa32fed5a176..22cab3a42862f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/simplified.txt
@@ -41,7 +41,7 @@ WholeStageCodegen (7)
                                 BroadcastExchange #5
                                   WholeStageCodegen (3)
                                     Project [hd_demo_sk]
-                                      Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk]
+                                      Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk]
                                         ColumnarToRow
                                           InputAdapter
                                             Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]

From 3e9821edfd636d2bc8be8f9cc5fc87be48bebc79 Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Thu, 24 Dec 2020 08:13:48 +0000
Subject: [PATCH 0875/1009] [SPARK-33443][SQL] LEAD/LAG should support [ IGNORE
 NULLS | RESPECT NULLS ]

### What changes were proposed in this pull request?
The mainstream database support `[ IGNORE NULLS | RESPECT NULLS ]` for `LEAD`/`LAG`/`NTH_VALUE`/`FIRST_VALUE`/`LAST_VALUE`.
But the current implement of `LEAD`/`LAG` don't support this syntax.

**Oracle**
https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/LEAD.html#GUID-0A0481F1-E98F-4535-A739-FCCA8D1B5B77

**Presto**
https://prestodb.io/docs/current/functions/window.html

**Redshift**
https://docs.aws.amazon.com/redshift/latest/dg/r_WF_LEAD.html

**DB2**
https://www.ibm.com/support/knowledgecenter/SSGU8G_14.1.0/com.ibm.sqls.doc/ids_sqs_1513.htm

**Teradata**
https://docs.teradata.com/r/756LNiPSFdY~4JcCCcR5Cw/GjCT6l7trjkIEjt~7Dhx4w

**Snowflake**
https://docs.snowflake.com/en/sql-reference/functions/lead.html
https://docs.snowflake.com/en/sql-reference/functions/lag.html

### Why are the changes needed?
Support `[ IGNORE NULLS | RESPECT NULLS ]` for `LEAD`/`LAG` is very useful.

### Does this PR introduce _any_ user-facing change?
'Yes'.

### How was this patch tested?
Jenkins test.

Closes #30387 from beliefer/SPARK-33443.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: beliefer <beliefer@163.com>
Co-authored-by: Jiaan Geng <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/windowExpressions.scala       |  16 ++-
 .../sql/execution/window/WindowExecBase.scala |  29 ++--
 .../window/WindowFunctionFrame.scala          | 132 ++++++++++++++++--
 .../org/apache/spark/sql/functions.scala      |  40 +++++-
 .../sql/DataFrameWindowFunctionsSuite.scala   |  55 ++++++++
 5 files changed, 238 insertions(+), 34 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index 43ecbd6a83fdb..b167499620c0f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -387,8 +387,6 @@ abstract class FrameLessOffsetWindowFunction
 
   override def nullable: Boolean = default == null || default.nullable || input.nullable
 
-  override val ignoreNulls = false
-
   override lazy val frame: WindowFrame = fakeFrame
 
   override def checkInputDataTypes(): TypeCheckResult = {
@@ -443,9 +441,13 @@ abstract class FrameLessOffsetWindowFunction
   since = "2.0.0",
   group = "window_funcs")
 // scalastyle:on line.size.limit line.contains.tab
-case class Lead(input: Expression, offset: Expression, default: Expression)
+case class Lead(
+    input: Expression, offset: Expression, default: Expression, ignoreNulls: Boolean)
     extends FrameLessOffsetWindowFunction {
 
+  def this(input: Expression, offset: Expression, default: Expression) =
+    this(input, offset, default, false)
+
   def this(input: Expression, offset: Expression) = this(input, offset, Literal(null))
 
   def this(input: Expression) = this(input, Literal(1))
@@ -485,10 +487,14 @@ case class Lead(input: Expression, offset: Expression, default: Expression)
   since = "2.0.0",
   group = "window_funcs")
 // scalastyle:on line.size.limit line.contains.tab
-case class Lag(input: Expression, inputOffset: Expression, default: Expression)
+case class Lag(
+    input: Expression, inputOffset: Expression, default: Expression, ignoreNulls: Boolean)
     extends FrameLessOffsetWindowFunction {
 
-  def this(input: Expression, offset: Expression) = this(input, offset, Literal(null))
+  def this(input: Expression, inputOffset: Expression, default: Expression) =
+    this(input, inputOffset, default, false)
+
+  def this(input: Expression, inputOffset: Expression) = this(input, inputOffset, Literal(null))
 
   def this(input: Expression) = this(input, Literal(1))
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
index 9832e5cd74ae7..5d999cb143f50 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
@@ -119,13 +119,21 @@ trait WindowExecBase extends UnaryExecNode {
    * [[WindowExpression]]s and factory function for the [[WindowFrameFunction]].
    */
   protected lazy val windowFrameExpressionFactoryPairs = {
-    type FrameKey = (String, FrameType, Expression, Expression)
+    type FrameKey = (String, FrameType, Expression, Expression, Seq[Expression])
     type ExpressionBuffer = mutable.Buffer[Expression]
     val framedFunctions = mutable.Map.empty[FrameKey, (ExpressionBuffer, ExpressionBuffer)]
 
     // Add a function and its function to the map for a given frame.
     def collect(tpe: String, fr: SpecifiedWindowFrame, e: Expression, fn: Expression): Unit = {
-      val key = (tpe, fr.frameType, fr.lower, fr.upper)
+      val key = fn match {
+        // This branch is used for Lead/Lag to support ignoring null.
+        // All window frames move in rows. If there are multiple Leads or Lags acting on a row
+        // and operating on different input expressions, they should not be moved uniformly
+        // by row. Therefore, we put these functions in different window frames.
+        case f: FrameLessOffsetWindowFunction if f.ignoreNulls =>
+          (tpe, fr.frameType, fr.lower, fr.upper, f.children.map(_.canonicalized))
+        case _ => (tpe, fr.frameType, fr.lower, fr.upper, Nil)
+      }
       val (es, fns) = framedFunctions.getOrElseUpdate(
         key, (ArrayBuffer.empty[Expression], ArrayBuffer.empty[Expression]))
       es += e
@@ -183,7 +191,7 @@ trait WindowExecBase extends UnaryExecNode {
         // Create the factory to produce WindowFunctionFrame.
         val factory = key match {
           // Frameless offset Frame
-          case ("FRAME_LESS_OFFSET", _, IntegerLiteral(offset), _) =>
+          case ("FRAME_LESS_OFFSET", _, IntegerLiteral(offset), _, expr) =>
             target: InternalRow =>
               new FrameLessOffsetWindowFunctionFrame(
                 target,
@@ -193,8 +201,9 @@ trait WindowExecBase extends UnaryExecNode {
                 child.output,
                 (expressions, schema) =>
                   MutableProjection.create(expressions, schema),
-                offset)
-          case ("UNBOUNDED_OFFSET", _, IntegerLiteral(offset), _) =>
+                offset,
+                expr.nonEmpty)
+          case ("UNBOUNDED_OFFSET", _, IntegerLiteral(offset), _, _) =>
             target: InternalRow => {
               new UnboundedOffsetWindowFunctionFrame(
                 target,
@@ -206,7 +215,7 @@ trait WindowExecBase extends UnaryExecNode {
                   MutableProjection.create(expressions, schema),
                 offset)
             }
-          case ("UNBOUNDED_PRECEDING_OFFSET", _, IntegerLiteral(offset), _) =>
+          case ("UNBOUNDED_PRECEDING_OFFSET", _, IntegerLiteral(offset), _, _) =>
             target: InternalRow => {
               new UnboundedPrecedingOffsetWindowFunctionFrame(
                 target,
@@ -220,13 +229,13 @@ trait WindowExecBase extends UnaryExecNode {
             }
 
           // Entire Partition Frame.
-          case ("AGGREGATE", _, UnboundedPreceding, UnboundedFollowing) =>
+          case ("AGGREGATE", _, UnboundedPreceding, UnboundedFollowing, _) =>
             target: InternalRow => {
               new UnboundedWindowFunctionFrame(target, processor)
             }
 
           // Growing Frame.
-          case ("AGGREGATE", frameType, UnboundedPreceding, upper) =>
+          case ("AGGREGATE", frameType, UnboundedPreceding, upper, _) =>
             target: InternalRow => {
               new UnboundedPrecedingWindowFunctionFrame(
                 target,
@@ -235,7 +244,7 @@ trait WindowExecBase extends UnaryExecNode {
             }
 
           // Shrinking Frame.
-          case ("AGGREGATE", frameType, lower, UnboundedFollowing) =>
+          case ("AGGREGATE", frameType, lower, UnboundedFollowing, _) =>
             target: InternalRow => {
               new UnboundedFollowingWindowFunctionFrame(
                 target,
@@ -244,7 +253,7 @@ trait WindowExecBase extends UnaryExecNode {
             }
 
           // Moving Frame.
-          case ("AGGREGATE", frameType, lower, upper) =>
+          case ("AGGREGATE", frameType, lower, upper, _) =>
             target: InternalRow => {
               new SlidingWindowFunctionFrame(
                 target,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
index 2a4b957c35426..0408deb4b8a41 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
@@ -97,13 +97,15 @@ abstract class OffsetWindowFunctionFrameBase(
   /** Index of the input row currently used for output. */
   protected var inputIndex = 0
 
+  /** Attributes of the input row currently used for output. */
+  protected val inputAttrs = inputSchema.map(_.withNullability(true))
+
   /**
    * Create the projection used when the offset row exists.
    * Please note that this project always respect null input values (like PostgreSQL).
    */
   protected val projection = {
     // Collect the expressions and bind them.
-    val inputAttrs = inputSchema.map(_.withNullability(true))
     val boundExpressions = Seq.fill(ordinal)(NoOp) ++ bindReferences(
       expressions.toSeq.map(_.input), inputAttrs)
 
@@ -114,7 +116,6 @@ abstract class OffsetWindowFunctionFrameBase(
   /** Create the projection used when the offset row DOES NOT exists. */
   protected val fillDefaultValue = {
     // Collect the expressions and bind them.
-    val inputAttrs: AttributeSeq = inputSchema.map(_.withNullability(true))
     val boundExpressions = Seq.fill(ordinal)(NoOp) ++ expressions.toSeq.map { e =>
       if (e.default == null || e.default.foldable && e.default.eval() == null) {
         // The default value is null.
@@ -147,31 +148,132 @@ class FrameLessOffsetWindowFunctionFrame(
     expressions: Array[OffsetWindowFunction],
     inputSchema: Seq[Attribute],
     newMutableProjection: (Seq[Expression], Seq[Attribute]) => MutableProjection,
-    offset: Int)
+    offset: Int,
+    ignoreNulls: Boolean = false)
   extends OffsetWindowFunctionFrameBase(
     target, ordinal, expressions, inputSchema, newMutableProjection, offset) {
 
+  /** Holder the UnsafeRow where the input operator by function is not null. */
+  private var nextSelectedRow = EmptyRow
+
+  // The number of rows skipped to get the next UnsafeRow where the input operator by function
+  // is not null.
+  private var skippedNonNullCount = 0
+
+  /** Create the projection to determine whether input is null. */
+  private val project = UnsafeProjection.create(Seq(IsNull(expressions.head.input)), inputSchema)
+
+  /** Check if the output value of the first index is null. */
+  private def nullCheck(row: InternalRow): Boolean = project(row).getBoolean(0)
+
+  /** find the offset row whose input is not null */
+  private def findNextRowWithNonNullInput(): Unit = {
+    while (skippedNonNullCount < offset && inputIndex < input.length) {
+      val r = WindowFunctionFrame.getNextOrNull(inputIterator)
+      if (!nullCheck(r)) {
+        nextSelectedRow = r
+        skippedNonNullCount += 1
+      }
+      inputIndex += 1
+    }
+  }
+
   override def prepare(rows: ExternalAppendOnlyUnsafeRowArray): Unit = {
     input = rows
     inputIterator = input.generateIterator()
     // drain the first few rows if offset is larger than zero
     inputIndex = 0
-    while (inputIndex < offset) {
-      if (inputIterator.hasNext) inputIterator.next()
-      inputIndex += 1
+    if (ignoreNulls) {
+      findNextRowWithNonNullInput()
+    } else {
+      while (inputIndex < offset) {
+        if (inputIterator.hasNext) inputIterator.next()
+        inputIndex += 1
+      }
+      inputIndex = offset
     }
-    inputIndex = offset
+  }
+
+  private val doWrite = if (ignoreNulls && offset > 0) {
+    // For illustration, here is one example: the input data contains nine rows,
+    // and the input values of each row are: null, x, null, null, y, null, z, v, null.
+    // We use lead(input, 2) with IGNORE NULLS and the process is as follows:
+    // 1. current row -> null, next selected row -> y, output: y;
+    // 2. current row -> x, next selected row -> z, output: z;
+    // 3. current row -> null, next selected row -> z, output: z;
+    // 4. current row -> null, next selected row -> z, output: z;
+    // 5. current row -> y, next selected row -> v, output: v;
+    // 6. current row -> null, next selected row -> v, output: v;
+    // 7. current row -> z, next selected row -> empty, output: null;
+    // ... next selected row is empty, all following return null.
+    (current: InternalRow) =>
+      if (nextSelectedRow == EmptyRow) {
+        // Use default values since the offset row whose input value is not null does not exist.
+        fillDefaultValue(current)
+      } else {
+        if (nullCheck(current)) {
+          projection(nextSelectedRow)
+        } else {
+          skippedNonNullCount -= 1
+          findNextRowWithNonNullInput()
+          if (skippedNonNullCount == offset) {
+            projection(nextSelectedRow)
+          } else {
+            // Use default values since the offset row whose input value is not null does not exist.
+            fillDefaultValue(current)
+            nextSelectedRow = EmptyRow
+          }
+        }
+      }
+  } else if (ignoreNulls && offset < 0) {
+    // For illustration, here is one example: the input data contains nine rows,
+    // and the input values of each row are: null, x, null, null, y, null, z, v, null.
+    // We use lag(input, 1) with IGNORE NULLS and the process is as follows:
+    // 1. current row -> null, next selected row -> empty, output: null;
+    // 2. current row -> x, next selected row -> empty, output: null;
+    // 3. current row -> null, next selected row -> x, output: x;
+    // 4. current row -> null, next selected row -> x, output: x;
+    // 5. current row -> y, next selected row -> x, output: x;
+    // 6. current row -> null, next selected row -> y, output: y;
+    // 7. current row -> z, next selected row -> y, output: y;
+    // 8. current row -> v, next selected row -> z, output: z;
+    // 9. current row -> null, next selected row -> v, output: v;
+    val absOffset = Math.abs(offset)
+    (current: InternalRow) =>
+      if (skippedNonNullCount == absOffset) {
+        nextSelectedRow = EmptyRow
+        skippedNonNullCount -= 1
+        while (nextSelectedRow == EmptyRow && inputIndex < input.length) {
+          val r = WindowFunctionFrame.getNextOrNull(inputIterator)
+          if (!nullCheck(r)) {
+            nextSelectedRow = r
+          }
+          inputIndex += 1
+        }
+      }
+      if (nextSelectedRow == EmptyRow) {
+        // Use default values since the offset row whose input value is not null does not exist.
+        fillDefaultValue(current)
+      } else {
+        projection(nextSelectedRow)
+      }
+      if (!nullCheck(current)) {
+        skippedNonNullCount += 1
+      }
+  } else {
+    (current: InternalRow) =>
+      if (inputIndex >= 0 && inputIndex < input.length) {
+        val r = WindowFunctionFrame.getNextOrNull(inputIterator)
+        projection(r)
+      } else {
+        // Use default values since the offset row does not exist.
+        fillDefaultValue(current)
+      }
+      inputIndex += 1
   }
 
   override def write(index: Int, current: InternalRow): Unit = {
-    if (inputIndex >= 0 && inputIndex < input.length) {
-      val r = WindowFunctionFrame.getNextOrNull(inputIterator)
-      projection(r)
-    } else {
-      // Use default values since the offset row does not exist.
-      fillDefaultValue(current)
-    }
-    inputIndex += 1
+    doWrite(current)
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 4defcb836a978..764e08862a09e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -937,8 +937,24 @@ object functions {
    * @group window_funcs
    * @since 1.4.0
    */
-  def lag(e: Column, offset: Int, defaultValue: Any): Column = withExpr {
-    Lag(e.expr, Literal(offset), Literal(defaultValue))
+  def lag(e: Column, offset: Int, defaultValue: Any): Column = {
+    lag(e, offset, defaultValue, false)
+  }
+
+  /**
+   * Window function: returns the value that is `offset` rows before the current row, and
+   * `defaultValue` if there is less than `offset` rows before the current row. `ignoreNulls`
+   * determines whether null values of row are included in or eliminated from the calculation.
+   * For example, an `offset` of one will return the previous row at any given point in the
+   * window partition.
+   *
+   * This is equivalent to the LAG function in SQL.
+   *
+   * @group window_funcs
+   * @since 3.2.0
+   */
+  def lag(e: Column, offset: Int, defaultValue: Any, ignoreNulls: Boolean): Column = withExpr {
+    Lag(e.expr, Literal(offset), Literal(defaultValue), ignoreNulls)
   }
 
   /**
@@ -989,8 +1005,24 @@ object functions {
    * @group window_funcs
    * @since 1.4.0
    */
-  def lead(e: Column, offset: Int, defaultValue: Any): Column = withExpr {
-    Lead(e.expr, Literal(offset), Literal(defaultValue))
+  def lead(e: Column, offset: Int, defaultValue: Any): Column = {
+    lead(e, offset, defaultValue, false)
+  }
+
+  /**
+   * Window function: returns the value that is `offset` rows after the current row, and
+   * `defaultValue` if there is less than `offset` rows after the current row. `ignoreNulls`
+   * determines whether null values of row are included in or eliminated from the calculation.
+   * The default value of `ignoreNulls` is false. For example, an `offset` of one will return
+   * the next row at any given point in the window partition.
+   *
+   * This is equivalent to the LEAD function in SQL.
+   *
+   * @group window_funcs
+   * @since 3.2.0
+   */
+  def lead(e: Column, offset: Int, defaultValue: Any, ignoreNulls: Boolean): Column = withExpr {
+    Lead(e.expr, Literal(offset), Literal(defaultValue), ignoreNulls)
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index 207b2963f0b3b..3568ad3a7343d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -700,6 +700,61 @@ class DataFrameWindowFunctionsSuite extends QueryTest
         Row("b", 3, null, null, null)))
   }
 
+  test("lead/lag with ignoreNulls") {
+    val nullStr: String = null
+    val df = Seq(
+      ("a", 0, nullStr),
+      ("a", 1, "x"),
+      ("b", 2, nullStr),
+      ("c", 3, nullStr),
+      ("a", 4, "y"),
+      ("b", 5, nullStr),
+      ("a", 6, "z"),
+      ("a", 7, "v"),
+      ("a", 8, nullStr)).
+      toDF("key", "order", "value")
+    val window = Window.orderBy($"order")
+    checkAnswer(
+      df.select(
+        $"key",
+        $"order",
+        $"value",
+        lead($"value", 1).over(window),
+        lead($"value", 2).over(window),
+        lead($"value", 0, null, true).over(window),
+        lead($"value", 1, null, true).over(window),
+        lead($"value", 2, null, true).over(window),
+        lead($"value", 3, null, true).over(window),
+        lead(concat($"value", $"key"), 1, null, true).over(window),
+        lag($"value", 1).over(window),
+        lag($"value", 2).over(window),
+        lag($"value", 0, null, true).over(window),
+        lag($"value", 1, null, true).over(window),
+        lag($"value", 2, null, true).over(window),
+        lag($"value", 3, null, true).over(window),
+        lag(concat($"value", $"key"), 1, null, true).over(window))
+        .orderBy($"order"),
+      Seq(
+        Row("a", 0, null, "x", null, null, "x", "y", "z", "xa",
+          null, null, null, null, null, null, null),
+        Row("a", 1, "x", null, null, "x", "y", "z", "v", "ya",
+          null, null, "x", null, null, null, null),
+        Row("b", 2, null, null, "y", null, "y", "z", "v", "ya",
+          "x", null, null, "x", null, null, "xa"),
+        Row("c", 3, null, "y", null, null, "y", "z", "v", "ya",
+          null, "x", null, "x", null, null, "xa"),
+        Row("a", 4, "y", null, "z", "y", "z", "v", null, "za",
+          null, null, "y", "x", null, null, "xa"),
+        Row("b", 5, null, "z", "v", null, "z", "v", null, "za",
+          "y", null, null, "y", "x", null, "ya"),
+        Row("a", 6, "z", "v", null, "z", "v", null, null, "va",
+          null, "y", "z", "y", "x", null, "ya"),
+        Row("a", 7, "v", null, null, "v", null, null, null, null,
+          "z", null, "v", "z", "y", "x", "za"),
+        Row("a", 8, null, null, null, null, null, null, null, null,
+          "v", "z", null, "v", "z", "y", "va")))
+  }
+
   test("SPARK-12989 ExtractWindowExpressions treats alias as regular attribute") {
     val src = Seq((0, 3, 5)).toDF("a", "b", "c")
       .withColumn("Data", struct("a", "b"))

From 54a67842e678a54e976160c5ad249767165fab0f Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Thu, 24 Dec 2020 08:54:53 +0000
Subject: [PATCH 0876/1009] [SPARK-33881][SQL][TESTS] Check null and empty
 string as partition values in DS v1 and v2 tests

### What changes were proposed in this pull request?
Add tests to check handling `null` and `''` (empty string) as partition values in commands `SHOW PARTITIONS`, `ALTER TABLE .. ADD PARTITION`, `ALTER TABLE .. DROP PARTITION`.

### Why are the changes needed?
To improve test coverage.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running the modified test suites:
```
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *.ShowPartitionsSuite"
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *.AlterTableAddPartitionSuite"
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *.AlterTableDropPartitionSuite"
```

Closes #30893 from MaxGekk/partition-value-empty-string.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../v1/AlterTableAddPartitionSuite.scala      | 12 ++++++++++
 .../v1/AlterTableDropPartitionSuite.scala     | 15 +++++++++++-
 .../command/v1/ShowPartitionsSuite.scala      | 19 +++++++++++++++
 .../v2/AlterTableAddPartitionSuite.scala      |  8 +++++++
 .../v2/AlterTableDropPartitionSuite.scala     |  9 +++++++
 .../command/v2/ShowPartitionsSuite.scala      |  8 ++++++-
 .../command/ShowPartitionsSuite.scala         | 24 ++++++++++++++++++-
 7 files changed, 92 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
index 1b7c90067e3f5..a749b1e3dd14d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.execution.command.v1
 
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.execution.command
 
@@ -35,6 +36,17 @@ trait AlterTableAddPartitionSuiteBase extends command.AlterTableAddPartitionSuit
     val location = information.split("\\r?\\n").filter(_.startsWith("Location:")).head
     assert(location.endsWith(expected))
   }
+
+  test("empty string as partition value") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (col1 INT, p1 STRING) $defaultUsing PARTITIONED BY (p1)")
+      val errMsg = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $t ADD PARTITION (p1 = '')")
+      }.getMessage
+      assert(errMsg.contains("Partition spec is invalid. " +
+        "The spec ([p1=]) contains an empty partition column value"))
+    }
+  }
 }
 
 class AlterTableAddPartitionSuite extends AlterTableAddPartitionSuiteBase with CommandSuiteBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
index 12a99933f6633..71032eefee2bb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.execution.command.v1
 
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.execution.command
 
 trait AlterTableDropPartitionSuiteBase extends command.AlterTableDropPartitionSuiteBase {
@@ -35,4 +36,16 @@ trait AlterTableDropPartitionSuiteBase extends command.AlterTableDropPartitionSu
 
 class AlterTableDropPartitionSuite
   extends AlterTableDropPartitionSuiteBase
-  with CommandSuiteBase
+  with CommandSuiteBase {
+
+  test("empty string as partition value") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (col1 INT, p1 STRING) $defaultUsing PARTITIONED BY (p1)")
+      val errMsg = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $t DROP PARTITION (p1 = '')")
+      }.getMessage
+      assert(errMsg.contains("Partition spec is invalid. " +
+        "The spec ([p1=]) contains an empty partition column value"))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
index 8acd24f0e3956..5d992d18890e4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
@@ -93,4 +93,23 @@ class ShowPartitionsSuite extends ShowPartitionsSuiteBase with CommandSuiteBase
       assert(sql("SHOW PARTITIONS part_datasrc").count() == 3)
     }
   }
+
+  test("null and empty string as partition values") {
+    import testImplicits._
+    withTable("t") {
+      val df = Seq((0, ""), (1, null)).toDF("a", "part")
+      df.write
+        .partitionBy("part")
+        .format("parquet")
+        .mode(SaveMode.Overwrite)
+        .saveAsTable("t")
+
+      runShowPartitionsSql(
+        "SHOW PARTITIONS t",
+        Row("part=__HIVE_DEFAULT_PARTITION__") :: Nil)
+      checkAnswer(spark.table("t"),
+        Row(0, null) ::
+        Row(1, null) :: Nil)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala
index b8ecb87ae7595..b0d0f6ced9346 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala
@@ -59,4 +59,12 @@ class AlterTableAddPartitionSuite
       assert(errMsg.contains(s"Table $t can not alter partitions"))
     }
   }
+
+  test("empty string as partition value") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (col1 INT, p1 STRING) $defaultUsing PARTITIONED BY (p1)")
+      sql(s"ALTER TABLE $t ADD PARTITION (p1 = '')")
+      checkPartitions(t, Map("p1" -> ""))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala
index e2762f0439cb3..97ef10e256515 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala
@@ -50,4 +50,13 @@ class AlterTableDropPartitionSuite
       }
     }
   }
+
+  test("empty string as partition value") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (col1 INT, p1 STRING) $defaultUsing PARTITIONED BY (p1)")
+      sql(s"ALTER TABLE $t ADD PARTITION (p1 = '')")
+      sql(s"ALTER TABLE $t DROP PARTITION (p1 = '')")
+      checkPartitions(t)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
index ed0a7dff62440..431f64baf4b78 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
@@ -45,7 +45,13 @@ class ShowPartitionsSuite extends command.ShowPartitionsSuiteBase with CommandSu
         .mode(SaveMode.Overwrite)
         .saveAsTable(t)
 
-      runShowPartitionsSql(s"SHOW PARTITIONS $t", Row("part=") :: Row("part=null") :: Nil)
+      runShowPartitionsSql(
+        s"SHOW PARTITIONS $t",
+        Row("part=") ::
+        Row("part=null") :: Nil)
+      checkAnswer(spark.table(t),
+        Row(0, "") ::
+        Row(1, null) :: Nil)
     }
   }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala
index fa8ac4ccaa089..eaac8f5e8146c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala
@@ -17,6 +17,28 @@
 
 package org.apache.spark.sql.hive.execution.command
 
+import org.apache.spark.sql.{Row, SaveMode}
 import org.apache.spark.sql.execution.command.v1
 
-class ShowPartitionsSuite extends v1.ShowPartitionsSuiteBase with CommandSuiteBase
+class ShowPartitionsSuite extends v1.ShowPartitionsSuiteBase with CommandSuiteBase {
+  test("null and empty string as partition values") {
+    import testImplicits._
+    withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
+      withTable("t") {
+        val df = Seq((0, ""), (1, null)).toDF("a", "part")
+        df.write
+          .partitionBy("part")
+          .format("hive")
+          .mode(SaveMode.Overwrite)
+          .saveAsTable("t")
+
+        runShowPartitionsSql(
+          "SHOW PARTITIONS t",
+          Row("part=__HIVE_DEFAULT_PARTITION__") :: Nil)
+        checkAnswer(spark.table("t"),
+          Row(0, "__HIVE_DEFAULT_PARTITION__") ::
+          Row(1, "__HIVE_DEFAULT_PARTITION__") :: Nil)
+      }
+    }
+  }
+}

From 29cca68e9e55fae8389378de6f30d0dfa7a74010 Mon Sep 17 00:00:00 2001
From: Kent Yao <yaooqinn@hotmail.com>
Date: Thu, 24 Dec 2020 08:56:02 +0000
Subject: [PATCH 0877/1009] [SPARK-33892][SQL] Display char/varchar in DESC and
 SHOW CREATE TABLE

### What changes were proposed in this pull request?

Display char/varchar in
  - DESC table
  - DESC column
  - SHOW CREATE TABLE

### Why are the changes needed?

show the correct definition for users

### Does this PR introduce _any_ user-facing change?

yes, char/varchar  column's will print char/varchar instead of string
### How was this patch tested?

new tests

Closes #30908 from yaooqinn/SPARK-33892.

Authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/command/tables.scala  | 12 ++++----
 .../v2/ShowTablePropertiesExec.scala          |  4 +--
 .../datasources/v2/V2CommandExec.scala        |  4 +++
 .../spark/sql/CharVarcharTestSuite.scala      | 29 +++++++++++++++++++
 .../spark/sql/HiveCharVarcharTestSuite.scala  |  9 ++++++
 5 files changed, 50 insertions(+), 8 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index cf2a6ffb2c682..0fcf8f2717041 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier, CaseInsensitiveMap}
+import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier, CaseInsensitiveMap, CharVarcharUtils}
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
 import org.apache.spark.sql.execution.datasources.json.JsonFileFormat
@@ -631,7 +631,7 @@ case class DescribeTableCommand(
       }
       describeSchema(catalog.lookupRelation(table).schema, result, header = false)
     } else {
-      val metadata = catalog.getTableMetadata(table)
+      val metadata = catalog.getTableRawMetadata(table)
       if (metadata.schema.isEmpty) {
         // In older version(prior to 2.1) of Spark, the table schema can be empty and should be
         // inferred at runtime. We should still support it.
@@ -782,9 +782,11 @@ case class DescribeColumnCommand(
       None
     }
 
+    val dataType = CharVarcharUtils.getRawType(field.metadata)
+      .getOrElse(field.dataType).catalogString
     val buffer = ArrayBuffer[Row](
       Row("col_name", field.name),
-      Row("data_type", field.dataType.catalogString),
+      Row("data_type", dataType),
       Row("comment", comment.getOrElse("NULL"))
     )
     if (isExtended) {
@@ -1111,7 +1113,7 @@ case class ShowCreateTableCommand(table: TableIdentifier)
       throw new AnalysisException(
         s"SHOW CREATE TABLE is not supported on a temporary view: ${table.identifier}")
     } else {
-      val tableMetadata = catalog.getTableMetadata(table)
+      val tableMetadata = catalog.getTableRawMetadata(table)
 
       // TODO: [SPARK-28692] unify this after we unify the
       //  CREATE TABLE syntax for hive serde and data source table.
@@ -1262,7 +1264,7 @@ case class ShowCreateTableAsSerdeCommand(table: TableIdentifier)
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val catalog = sparkSession.sessionState.catalog
-    val tableMetadata = catalog.getTableMetadata(table)
+    val tableMetadata = catalog.getTableRawMetadata(table)
 
     val stmt = if (DDLUtils.isDatasourceTable(tableMetadata)) {
       throw new AnalysisException(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala
index 7ceee1edee180..6d3a94ef15631 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, GenericRowWithSchema}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema}
 import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Table}
 
 /**
@@ -30,8 +30,6 @@ case class ShowTablePropertiesExec(
     catalogTable: Table,
     propertyKey: Option[String]) extends V2CommandExec {
 
-  override def producedAttributes: AttributeSet = AttributeSet(output)
-
   override protected def run(): Seq[InternalRow] = {
     import scala.collection.JavaConverters._
     val toRow = RowEncoder(schema).resolveAndBind().createSerializer()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala
index 7738f26dfd266..6b193674cc71a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.AttributeSet
 import org.apache.spark.sql.execution.SparkPlan
 
 /**
@@ -55,4 +56,7 @@ abstract class V2CommandExec extends SparkPlan {
   }
 
   override def children: Seq[SparkPlan] = Nil
+
+  override def producedAttributes: AttributeSet = outputSet
+
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
index 8ab8c37d5e790..9d4b7c4f82ed2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
@@ -443,6 +443,14 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
         ("c1 IN (c2)", true)))
     }
   }
+
+  test("SPARK-33892: DESCRIBE TABLE w/ char/varchar") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(v VARCHAR(3), c CHAR(5)) USING $format")
+      checkAnswer(sql("desc t").selectExpr("data_type").where("data_type like '%char%'"),
+        Seq(Row("char(5)"), Row("varchar(3)")))
+    }
+  }
 }
 
 // Some basic char/varchar tests which doesn't rely on table implementation.
@@ -603,6 +611,27 @@ class FileSourceCharVarcharTestSuite extends CharVarcharTestSuite with SharedSpa
       }
     }
   }
+
+  // TODO(SPARK-33875): Move these tests to super after DESCRIBE COLUMN v2 implemented
+  test("SPARK-33892: DESCRIBE COLUMN w/ char/varchar") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(v VARCHAR(3), c CHAR(5)) USING $format")
+      checkAnswer(sql("desc t v").selectExpr("info_value").where("info_value like '%char%'"),
+        Row("varchar(3)"))
+      checkAnswer(sql("desc t c").selectExpr("info_value").where("info_value like '%char%'"),
+        Row("char(5)"))
+    }
+  }
+
+  // TODO(SPARK-33898): Move these tests to super after SHOW CREATE TABLE for v2 implemented
+  test("SPARK-33892: SHOW CREATE TABLE w/ char/varchar") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(v VARCHAR(3), c CHAR(5)) USING $format")
+      val rest = sql("SHOW CREATE TABLE t").head().getString(0)
+      assert(rest.contains("VARCHAR(3)"))
+      assert(rest.contains("CHAR(5)"))
+    }
+  }
 }
 
 class DSV2CharVarcharTestSuite extends CharVarcharTestSuite
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala
index f48cfb8dfb899..bb7918c881c7e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala
@@ -41,6 +41,15 @@ class HiveCharVarcharTestSuite extends CharVarcharTestSuite with TestHiveSinglet
     }
     super.afterAll()
   }
+
+  test("SPARK-33892: SHOW CREATE TABLE AS SERDE w/ char/varchar") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(v VARCHAR(3), c CHAR(5)) USING $format")
+      val rest = sql("SHOW CREATE TABLE t AS SERDE").head().getString(0)
+      assert(rest.contains("VARCHAR(3)"))
+      assert(rest.contains("CHAR(5)"))
+    }
+  }
 }
 
 class HiveCharVarcharDDLTestSuite extends CharVarcharDDLTestBase with TestHiveSingleton {

From 700f5ab65c1c84522302ce92d176adf229c34daa Mon Sep 17 00:00:00 2001
From: sychen <sychen@ctrip.com>
Date: Fri, 25 Dec 2020 00:54:26 +0900
Subject: [PATCH 0878/1009] [SPARK-33900][WEBUI] Show shuffle read size /
 records correctly when only remotebytesread is available

### What changes were proposed in this pull request?
Shuffle Read Size / Records can also be displayed in remoteBytesRead>0 localBytesRead=0.

current:
![image](https://user-images.githubusercontent.com/3898450/103079421-c4ca2280-460e-11eb-9e2f-49d35b5d324d.png)
fix:
![image](https://user-images.githubusercontent.com/3898450/103079439-cc89c700-460e-11eb-9a41-6b2882980d11.png)

### Why are the changes needed?
At present, the page only displays the data of Shuffle Read Size / Records when localBytesRead>0.
When there is only remote reading, metrics cannot be seen on the stage page.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
manual test

Closes #30916 from cxzl25/SPARK-33900.

Authored-by: sychen <sychen@ctrip.com>
Signed-off-by: Kousuke Saruta <sarutak@oss.nttdata.com>
---
 .../src/main/resources/org/apache/spark/ui/static/stagepage.js | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
index 336edff509300..ebb79f542168d 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
@@ -946,7 +946,8 @@ $(document).ready(function () {
                         },
                         {
                             data : function (row, type) {
-                                if (row.taskMetrics && row.taskMetrics.shuffleReadMetrics && row.taskMetrics.shuffleReadMetrics.localBytesRead > 0) {
+                                if (row.taskMetrics && row.taskMetrics.shuffleReadMetrics &&
+                                    (row.taskMetrics.shuffleReadMetrics.localBytesRead > 0 || row.taskMetrics.shuffleReadMetrics.remoteBytesRead > 0)) {
                                     var totalBytesRead = parseInt(row.taskMetrics.shuffleReadMetrics.localBytesRead) + parseInt(row.taskMetrics.shuffleReadMetrics.remoteBytesRead);
                                     if (type === 'display') {
                                         return formatBytes(totalBytesRead, type) + " / " + row.taskMetrics.shuffleReadMetrics.recordsRead;

From 9c30116fb428f87543155323617cf5fb700e84cd Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Thu, 24 Dec 2020 14:30:34 -0800
Subject: [PATCH 0879/1009] [SPARK-33857][SQL] Unify the default seed of random
 functions

### What changes were proposed in this pull request?

Unify the seed of random functions
1. Add a hold place expression `UnresolvedSeed ` as the defualt seed.
2. Change `Rand`,`Randn`,`Uuid`,`Shuffle` default seed to `UnresolvedSeed `.
3. Replace `UnresolvedSeed ` to real seed at `ResolveRandomSeed` rule.

### Why are the changes needed?

`Uuid` and `Shuffle` use the `ResolveRandomSeed` rule to set the seed if user doesn't give a seed value. `Rand` and `Randn` do this at constructing.

It's better to unify the default seed at Analyzer side since we have used `ExpressionWithRandomSeed` at streaming query.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass exists test and add test.

Closes #30864 from ulysses-you/SPARK-33857.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  4 ++--
 .../sql/catalyst/analysis/unresolved.scala    |  9 +++++++
 .../expressions/collectionOperations.scala    |  4 +++-
 .../spark/sql/catalyst/expressions/misc.scala |  3 +++
 .../expressions/randomExpressions.scala       | 24 ++++++++++---------
 .../sql/catalyst/analysis/AnalysisSuite.scala | 12 ++++++++++
 6 files changed, 42 insertions(+), 14 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index ba24914cb6835..8af692d9fe008 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -3000,8 +3000,8 @@ class Analyzer(override val catalogManager: CatalogManager)
     override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
       case p if p.resolved => p
       case p => p transformExpressionsUp {
-        case Uuid(None) => Uuid(Some(random.nextLong()))
-        case Shuffle(child, None) => Shuffle(child, Some(random.nextLong()))
+        case e: ExpressionWithRandomSeed if e.seedExpression == UnresolvedSeed =>
+          e.withNewSeed(random.nextLong())
       }
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 8a73208d42e20..84614886348aa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -561,3 +561,12 @@ case class UnresolvedHaving(
   override lazy val resolved: Boolean = false
   override def output: Seq[Attribute] = child.output
 }
+
+/**
+ * A place holder expression used in random functions, will be replaced after analyze.
+ */
+case object UnresolvedSeed extends LeafExpression with Unevaluable {
+  override def nullable: Boolean = throw new UnresolvedException(this, "nullable")
+  override def dataType: DataType = throw new UnresolvedException(this, "dataType")
+  override lazy val resolved = false
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index 33794467fb338..17b45bc44a28e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -23,7 +23,7 @@ import scala.collection.mutable
 import scala.reflect.ClassTag
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion}
+import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion, UnresolvedSeed}
 import org.apache.spark.sql.catalyst.expressions.ArraySortLike.NullOrder
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
@@ -943,6 +943,8 @@ case class Shuffle(child: Expression, randomSeed: Option[Long] = None)
 
   def this(child: Expression) = this(child, None)
 
+  override def seedExpression: Expression = randomSeed.map(Literal.apply).getOrElse(UnresolvedSeed)
+
   override def withNewSeed(seed: Long): Shuffle = copy(randomSeed = Some(seed))
 
   override lazy val resolved: Boolean =
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index 34a64dddd30fa..4ad4c4d61f10c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.{SPARK_REVISION, SPARK_VERSION_SHORT}
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.UnresolvedSeed
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.util.RandomUUIDGenerator
@@ -187,6 +188,8 @@ case class Uuid(randomSeed: Option[Long] = None) extends LeafExpression with Sta
 
   def this() = this(None)
 
+  override def seedExpression: Expression = randomSeed.map(Literal.apply).getOrElse(UnresolvedSeed)
+
   override def withNewSeed(seed: Long): Uuid = Uuid(Some(seed))
 
   override lazy val resolved: Boolean = randomSeed.isDefined
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
index 0fa4d6c315041..630c934f79533 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
@@ -19,10 +19,10 @@ package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.UnresolvedSeed
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, FalseLiteral}
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.types._
-import org.apache.spark.util.Utils
 import org.apache.spark.util.random.XORShiftRandom
 
 /**
@@ -32,7 +32,8 @@ import org.apache.spark.util.random.XORShiftRandom
  *
  * Since this expression is stateful, it cannot be a case object.
  */
-abstract class RDG extends UnaryExpression with ExpectsInputTypes with Stateful {
+abstract class RDG extends UnaryExpression with ExpectsInputTypes with Stateful
+  with ExpressionWithRandomSeed {
   /**
    * Record ID within each partition. By being transient, the Random Number Generator is
    * reset every time we serialize and deserialize and initialize it.
@@ -43,7 +44,9 @@ abstract class RDG extends UnaryExpression with ExpectsInputTypes with Stateful
     rng = new XORShiftRandom(seed + partitionIndex)
   }
 
-  @transient protected lazy val seed: Long = child match {
+  override def seedExpression: Expression = child
+
+  @transient protected lazy val seed: Long = seedExpression match {
     case Literal(s, IntegerType) => s.asInstanceOf[Int]
     case Literal(s, LongType) => s.asInstanceOf[Long]
     case _ => throw new AnalysisException(
@@ -62,6 +65,7 @@ abstract class RDG extends UnaryExpression with ExpectsInputTypes with Stateful
  * Usually the random seed needs to be renewed at each execution under streaming queries.
  */
 trait ExpressionWithRandomSeed {
+  def seedExpression: Expression
   def withNewSeed(seed: Long): Expression
 }
 
@@ -84,14 +88,13 @@ trait ExpressionWithRandomSeed {
   since = "1.5.0",
   group = "math_funcs")
 // scalastyle:on line.size.limit
-case class Rand(child: Expression, hideSeed: Boolean = false)
-  extends RDG with ExpressionWithRandomSeed {
+case class Rand(child: Expression, hideSeed: Boolean = false) extends RDG {
 
-  def this() = this(Literal(Utils.random.nextLong(), LongType), true)
+  def this() = this(UnresolvedSeed, true)
 
   def this(child: Expression) = this(child, false)
 
-  override def withNewSeed(seed: Long): Rand = Rand(Literal(seed, LongType))
+  override def withNewSeed(seed: Long): Rand = Rand(Literal(seed, LongType), hideSeed)
 
   override protected def evalInternal(input: InternalRow): Double = rng.nextDouble()
 
@@ -136,14 +139,13 @@ object Rand {
   since = "1.5.0",
   group = "math_funcs")
 // scalastyle:on line.size.limit
-case class Randn(child: Expression, hideSeed: Boolean = false)
-  extends RDG with ExpressionWithRandomSeed {
+case class Randn(child: Expression, hideSeed: Boolean = false) extends RDG {
 
-  def this() = this(Literal(Utils.random.nextLong(), LongType), true)
+  def this() = this(UnresolvedSeed, true)
 
   def this(child: Expression) = this(child, false)
 
-  override def withNewSeed(seed: Long): Randn = Randn(Literal(seed, LongType))
+  override def withNewSeed(seed: Long): Randn = Randn(Literal(seed, LongType), hideSeed)
 
   override protected def evalInternal(input: InternalRow): Double = rng.nextGaussian()
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index b206bc9f84f18..f66871ee75ecc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -1006,4 +1006,16 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       checkAnalysis(plan, expect)
     }
   }
+
+  test("SPARK-33857: Unify the default seed of random functions") {
+    Seq(new Rand(), new Randn(), Shuffle(Literal(Array(1))), Uuid()).foreach { r =>
+      assert(r.seedExpression == UnresolvedSeed)
+      val p = getAnalyzer.execute(Project(Seq(r.as("r")), testRelation))
+      assert(
+        p.asInstanceOf[Project].projectList.head.asInstanceOf[Alias]
+          .child.asInstanceOf[ExpressionWithRandomSeed]
+          .seedExpression.isInstanceOf[Literal]
+      )
+    }
+  }
 }

From 65a9ac2ff4d902976bf3ef89d1d3e29c1e6d5414 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Thu, 24 Dec 2020 14:44:16 -0800
Subject: [PATCH 0880/1009] [SPARK-30027][SQL] Support codegen for aggregate
 filters in HashAggregateExec

### What changes were proposed in this pull request?

This pr intends to support code generation for `HashAggregateExec` with filters.

Quick benchmark results:
```
$ ./bin/spark-shell --master=local[1] --conf spark.driver.memory=8g --conf spark.sql.shuffle.partitions=1 -v

scala> spark.range(100000000).selectExpr("id % 3 as k1", "id % 5 as k2", "rand() as v1", "rand() as v2").write.saveAsTable("t")
scala> sql("SELECT k1, k2, AVG(v1) FILTER (WHERE v2 > 0.5) FROM t GROUP BY k1, k2").write.format("noop").mode("overwrite").save()

>> Before this PR
Elapsed time: 16.170697619s

>> After this PR
Elapsed time: 6.7825313s
```

The query above is compiled into code below;

```
...
/* 285 */   private void agg_doAggregate_avg_0(boolean agg_exprIsNull_2_0, org.apache.spark.sql.catalyst.InternalRow agg_unsafeRowAggBuffer_0, double agg_expr_2_0) throws java.io.IOException {
/* 286 */     // evaluate aggregate function for avg
/* 287 */     boolean agg_isNull_10 = true;
/* 288 */     double agg_value_12 = -1.0;
/* 289 */     boolean agg_isNull_11 = agg_unsafeRowAggBuffer_0.isNullAt(0);
/* 290 */     double agg_value_13 = agg_isNull_11 ?
/* 291 */     -1.0 : (agg_unsafeRowAggBuffer_0.getDouble(0));
/* 292 */     if (!agg_isNull_11) {
/* 293 */       agg_agg_isNull_12_0 = true;
/* 294 */       double agg_value_14 = -1.0;
/* 295 */       do {
/* 296 */         if (!agg_exprIsNull_2_0) {
/* 297 */           agg_agg_isNull_12_0 = false;
/* 298 */           agg_value_14 = agg_expr_2_0;
/* 299 */           continue;
/* 300 */         }
/* 301 */
/* 302 */         if (!false) {
/* 303 */           agg_agg_isNull_12_0 = false;
/* 304 */           agg_value_14 = 0.0D;
/* 305 */           continue;
/* 306 */         }
/* 307 */
/* 308 */       } while (false);
/* 309 */
/* 310 */       agg_isNull_10 = false; // resultCode could change nullability.
/* 311 */
/* 312 */       agg_value_12 = agg_value_13 + agg_value_14;
/* 313 */
/* 314 */     }
/* 315 */     boolean agg_isNull_15 = false;
/* 316 */     long agg_value_17 = -1L;
/* 317 */     if (!false && agg_exprIsNull_2_0) {
/* 318 */       boolean agg_isNull_18 = agg_unsafeRowAggBuffer_0.isNullAt(1);
/* 319 */       long agg_value_20 = agg_isNull_18 ?
/* 320 */       -1L : (agg_unsafeRowAggBuffer_0.getLong(1));
/* 321 */       agg_isNull_15 = agg_isNull_18;
/* 322 */       agg_value_17 = agg_value_20;
/* 323 */     } else {
/* 324 */       boolean agg_isNull_19 = true;
/* 325 */       long agg_value_21 = -1L;
/* 326 */       boolean agg_isNull_20 = agg_unsafeRowAggBuffer_0.isNullAt(1);
/* 327 */       long agg_value_22 = agg_isNull_20 ?
/* 328 */       -1L : (agg_unsafeRowAggBuffer_0.getLong(1));
/* 329 */       if (!agg_isNull_20) {
/* 330 */         agg_isNull_19 = false; // resultCode could change nullability.
/* 331 */
/* 332 */         agg_value_21 = agg_value_22 + 1L;
/* 333 */
/* 334 */       }
/* 335 */       agg_isNull_15 = agg_isNull_19;
/* 336 */       agg_value_17 = agg_value_21;
/* 337 */     }
/* 338 */     // update unsafe row buffer
/* 339 */     if (!agg_isNull_10) {
/* 340 */       agg_unsafeRowAggBuffer_0.setDouble(0, agg_value_12);
/* 341 */     } else {
/* 342 */       agg_unsafeRowAggBuffer_0.setNullAt(0);
/* 343 */     }
/* 344 */
/* 345 */     if (!agg_isNull_15) {
/* 346 */       agg_unsafeRowAggBuffer_0.setLong(1, agg_value_17);
/* 347 */     } else {
/* 348 */       agg_unsafeRowAggBuffer_0.setNullAt(1);
/* 349 */     }
/* 350 */   }
...
```

### Why are the changes needed?

For high performance.

### Does this PR introduce any user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #27019 from maropu/AggregateFilterCodegen.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/catalyst/expressions/predicates.scala |  18 +++
 .../aggregate/HashAggregateExec.scala         | 100 +++++++-------
 .../execution/basicPhysicalOperators.scala    | 130 ++++++++++--------
 .../sql-tests/inputs/group-by-filter.sql      |   5 +-
 .../sql-tests/results/explain.sql.out         |   4 +-
 5 files changed, 151 insertions(+), 106 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 250d3fee94cb3..c61d24758617c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -242,6 +242,24 @@ trait PredicateHelper extends AliasHelper with Logging {
         None
       }
   }
+
+  // If one expression and its children are null intolerant, it is null intolerant.
+  protected def isNullIntolerant(expr: Expression): Boolean = expr match {
+    case e: NullIntolerant => e.children.forall(isNullIntolerant)
+    case _ => false
+  }
+
+  protected def outputWithNullability(
+      output: Seq[Attribute],
+      nonNullAttrExprIds: Seq[ExprId]): Seq[Attribute] = {
+    output.map { a =>
+      if (a.nullable && nonNullAttrExprIds.contains(a.exprId)) {
+        a.withNullability(false)
+      } else {
+        a
+      }
+    }
+  }
 }
 
 @ExpressionDescription(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index 52d0450afb181..cdad9de00620b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -53,7 +53,8 @@ case class HashAggregateExec(
     resultExpressions: Seq[NamedExpression],
     child: SparkPlan)
   extends BaseAggregateExec
-  with BlockingOperatorWithCodegen {
+  with BlockingOperatorWithCodegen
+  with GeneratePredicateHelper {
 
   require(HashAggregateExec.supportsAggregate(aggregateBufferAttributes))
 
@@ -131,10 +132,8 @@ case class HashAggregateExec(
   override def usedInputs: AttributeSet = inputSet
 
   override def supportCodegen: Boolean = {
-    // ImperativeAggregate and filter predicate are not supported right now
-    // TODO: SPARK-30027 Support codegen for filter exprs in HashAggregateExec
-    !(aggregateExpressions.exists(_.aggregateFunction.isInstanceOf[ImperativeAggregate]) ||
-        aggregateExpressions.exists(_.filter.isDefined))
+    // ImperativeAggregate are not supported right now
+    !aggregateExpressions.exists(_.aggregateFunction.isInstanceOf[ImperativeAggregate])
   }
 
   override def inputRDDs(): Seq[RDD[InternalRow]] = {
@@ -254,7 +253,7 @@ case class HashAggregateExec(
       aggNames: Seq[String],
       aggBufferUpdatingExprs: Seq[Seq[Expression]],
       aggCodeBlocks: Seq[Block],
-      subExprs: Map[Expression, SubExprEliminationState]): Option[String] = {
+      subExprs: Map[Expression, SubExprEliminationState]): Option[Seq[String]] = {
     val exprValsInSubExprs = subExprs.flatMap { case (_, s) => s.value :: s.isNull :: Nil }
     if (exprValsInSubExprs.exists(_.isInstanceOf[SimpleExprValue])) {
       // `SimpleExprValue`s cannot be used as an input variable for split functions, so
@@ -293,7 +292,7 @@ case class HashAggregateExec(
           val inputVariables = args.map(_.variableName).mkString(", ")
           s"$doAggFuncName($inputVariables);"
         }
-        Some(splitCodes.mkString("\n").trim)
+        Some(splitCodes)
       } else {
         val errMsg = "Failed to split aggregate code into small functions because the parameter " +
           "length of at least one split function went over the JVM limit: " +
@@ -308,6 +307,39 @@ case class HashAggregateExec(
     }
   }
 
+  private def generateEvalCodeForAggFuncs(
+      ctx: CodegenContext,
+      input: Seq[ExprCode],
+      inputAttrs: Seq[Attribute],
+      boundUpdateExprs: Seq[Seq[Expression]],
+      aggNames: Seq[String],
+      aggCodeBlocks: Seq[Block],
+      subExprs: SubExprCodes): String = {
+    val aggCodes = if (conf.codegenSplitAggregateFunc &&
+      aggCodeBlocks.map(_.length).sum > conf.methodSplitThreshold) {
+      val maybeSplitCodes = splitAggregateExpressions(
+        ctx, aggNames, boundUpdateExprs, aggCodeBlocks, subExprs.states)
+
+      maybeSplitCodes.getOrElse(aggCodeBlocks.map(_.code))
+    } else {
+      aggCodeBlocks.map(_.code)
+    }
+
+    aggCodes.zip(aggregateExpressions.map(ae => (ae.mode, ae.filter))).map {
+      case (aggCode, (Partial | Complete, Some(condition))) =>
+        // Note: wrap in "do { } while(false);", so the generated checks can jump out
+        // with "continue;"
+        s"""
+           |do {
+           |  ${generatePredicateCode(ctx, condition, inputAttrs, input)}
+           |  $aggCode
+           |} while(false);
+         """.stripMargin
+      case (aggCode, _) =>
+        aggCode
+    }.mkString("\n")
+  }
+
   private def doConsumeWithoutKeys(ctx: CodegenContext, input: Seq[ExprCode]): String = {
     // only have DeclarativeAggregate
     val functions = aggregateExpressions.map(_.aggregateFunction.asInstanceOf[DeclarativeAggregate])
@@ -354,24 +386,14 @@ case class HashAggregateExec(
        """.stripMargin
     }
 
-    val codeToEvalAggFunc = if (conf.codegenSplitAggregateFunc &&
-        aggCodeBlocks.map(_.length).sum > conf.methodSplitThreshold) {
-      val maybeSplitCode = splitAggregateExpressions(
-        ctx, aggNames, boundUpdateExprs, aggCodeBlocks, subExprs.states)
-
-      maybeSplitCode.getOrElse {
-        aggCodeBlocks.fold(EmptyBlock)(_ + _).code
-      }
-    } else {
-      aggCodeBlocks.fold(EmptyBlock)(_ + _).code
-    }
-
+    val codeToEvalAggFuncs = generateEvalCodeForAggFuncs(
+      ctx, input, inputAttrs, boundUpdateExprs, aggNames, aggCodeBlocks, subExprs)
     s"""
        |// do aggregate
        |// common sub-expressions
        |$effectiveCodes
        |// evaluate aggregate functions and update aggregation buffers
-       |$codeToEvalAggFunc
+       |$codeToEvalAggFuncs
      """.stripMargin
   }
 
@@ -908,7 +930,7 @@ case class HashAggregateExec(
       }
     }
 
-    val inputAttr = aggregateBufferAttributes ++ inputAttributes
+    val inputAttrs = aggregateBufferAttributes ++ inputAttributes
     // Here we set `currentVars(0)` to `currentVars(numBufferSlots)` to null, so that when
     // generating code for buffer columns, we use `INPUT_ROW`(will be the buffer row), while
     // generating input columns, we use `currentVars`.
@@ -930,7 +952,7 @@ case class HashAggregateExec(
     val updateRowInRegularHashMap: String = {
       ctx.INPUT_ROW = unsafeRowBuffer
       val boundUpdateExprs = updateExprs.map { updateExprsForOneFunc =>
-        bindReferences(updateExprsForOneFunc, inputAttr)
+        bindReferences(updateExprsForOneFunc, inputAttrs)
       }
       val subExprs = ctx.subexpressionEliminationForWholeStageCodegen(boundUpdateExprs.flatten)
       val effectiveCodes = subExprs.codes.mkString("\n")
@@ -961,23 +983,13 @@ case class HashAggregateExec(
          """.stripMargin
       }
 
-      val codeToEvalAggFunc = if (conf.codegenSplitAggregateFunc &&
-          aggCodeBlocks.map(_.length).sum > conf.methodSplitThreshold) {
-        val maybeSplitCode = splitAggregateExpressions(
-          ctx, aggNames, boundUpdateExprs, aggCodeBlocks, subExprs.states)
-
-        maybeSplitCode.getOrElse {
-          aggCodeBlocks.fold(EmptyBlock)(_ + _).code
-        }
-      } else {
-        aggCodeBlocks.fold(EmptyBlock)(_ + _).code
-      }
-
+      val codeToEvalAggFuncs = generateEvalCodeForAggFuncs(
+        ctx, input, inputAttrs, boundUpdateExprs, aggNames, aggCodeBlocks, subExprs)
       s"""
          |// common sub-expressions
          |$effectiveCodes
          |// evaluate aggregate functions and update aggregation buffers
-         |$codeToEvalAggFunc
+         |$codeToEvalAggFuncs
        """.stripMargin
     }
 
@@ -986,7 +998,7 @@ case class HashAggregateExec(
         if (isVectorizedHashMapEnabled) {
           ctx.INPUT_ROW = fastRowBuffer
           val boundUpdateExprs = updateExprs.map { updateExprsForOneFunc =>
-            bindReferences(updateExprsForOneFunc, inputAttr)
+            bindReferences(updateExprsForOneFunc, inputAttrs)
           }
           val subExprs = ctx.subexpressionEliminationForWholeStageCodegen(boundUpdateExprs.flatten)
           val effectiveCodes = subExprs.codes.mkString("\n")
@@ -1016,18 +1028,8 @@ case class HashAggregateExec(
              """.stripMargin
           }
 
-
-          val codeToEvalAggFunc = if (conf.codegenSplitAggregateFunc &&
-              aggCodeBlocks.map(_.length).sum > conf.methodSplitThreshold) {
-            val maybeSplitCode = splitAggregateExpressions(
-              ctx, aggNames, boundUpdateExprs, aggCodeBlocks, subExprs.states)
-
-            maybeSplitCode.getOrElse {
-              aggCodeBlocks.fold(EmptyBlock)(_ + _).code
-            }
-          } else {
-            aggCodeBlocks.fold(EmptyBlock)(_ + _).code
-          }
+          val codeToEvalAggFuncs = generateEvalCodeForAggFuncs(
+            ctx, input, inputAttrs, boundUpdateExprs, aggNames, aggCodeBlocks, subExprs)
 
           // If vectorized fast hash map is on, we first generate code to update row
           // in vectorized fast hash map, if the previous loop up hit vectorized fast hash map.
@@ -1037,7 +1039,7 @@ case class HashAggregateExec(
              |  // common sub-expressions
              |  $effectiveCodes
              |  // evaluate aggregate functions and update aggregation buffers
-             |  $codeToEvalAggFunc
+             |  $codeToEvalAggFuncs
              |} else {
              |  $updateRowInRegularHashMap
              |}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index d74d0bf733c27..abd336006848b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -109,59 +109,39 @@ case class ProjectExec(projectList: Seq[NamedExpression], child: SparkPlan)
   }
 }
 
-/** Physical plan for Filter. */
-case class FilterExec(condition: Expression, child: SparkPlan)
-  extends UnaryExecNode with CodegenSupport with PredicateHelper {
-
-  // Split out all the IsNotNulls from condition.
-  private val (notNullPreds, otherPreds) = splitConjunctivePredicates(condition).partition {
-    case IsNotNull(a) => isNullIntolerant(a) && a.references.subsetOf(child.outputSet)
-    case _ => false
-  }
-
-  // If one expression and its children are null intolerant, it is null intolerant.
-  private def isNullIntolerant(expr: Expression): Boolean = expr match {
-    case e: NullIntolerant => e.children.forall(isNullIntolerant)
-    case _ => false
-  }
-
-  // The columns that will filtered out by `IsNotNull` could be considered as not nullable.
-  private val notNullAttributes = notNullPreds.flatMap(_.references).distinct.map(_.exprId)
-
-  // Mark this as empty. We'll evaluate the input during doConsume(). We don't want to evaluate
-  // all the variables at the beginning to take advantage of short circuiting.
-  override def usedInputs: AttributeSet = AttributeSet.empty
-
-  override def output: Seq[Attribute] = {
-    child.output.map { a =>
-      if (a.nullable && notNullAttributes.contains(a.exprId)) {
-        a.withNullability(false)
-      } else {
-        a
-      }
+trait GeneratePredicateHelper extends PredicateHelper {
+  self: CodegenSupport =>
+
+  protected def generatePredicateCode(
+      ctx: CodegenContext,
+      condition: Expression,
+      inputAttrs: Seq[Attribute],
+      inputExprCode: Seq[ExprCode]): String = {
+    val (notNullPreds, otherPreds) = splitConjunctivePredicates(condition).partition {
+      case IsNotNull(a) => isNullIntolerant(a) && a.references.subsetOf(AttributeSet(inputAttrs))
+      case _ => false
     }
-  }
-
-  override lazy val metrics = Map(
-    "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
-
-  override def inputRDDs(): Seq[RDD[InternalRow]] = {
-    child.asInstanceOf[CodegenSupport].inputRDDs()
-  }
-
-  protected override def doProduce(ctx: CodegenContext): String = {
-    child.asInstanceOf[CodegenSupport].produce(ctx, this)
-  }
-
-  override def doConsume(ctx: CodegenContext, input: Seq[ExprCode], row: ExprCode): String = {
-    val numOutput = metricTerm(ctx, "numOutputRows")
-
+    val nonNullAttrExprIds = notNullPreds.flatMap(_.references).distinct.map(_.exprId)
+    val outputAttrs = outputWithNullability(inputAttrs, nonNullAttrExprIds)
+    generatePredicateCode(
+      ctx, inputAttrs, inputExprCode, outputAttrs, notNullPreds, otherPreds,
+      nonNullAttrExprIds)
+  }
+
+  protected def generatePredicateCode(
+      ctx: CodegenContext,
+      inputAttrs: Seq[Attribute],
+      inputExprCode: Seq[ExprCode],
+      outputAttrs: Seq[Attribute],
+      notNullPreds: Seq[Expression],
+      otherPreds: Seq[Expression],
+      nonNullAttrExprIds: Seq[ExprId]): String = {
     /**
      * Generates code for `c`, using `in` for input attributes and `attrs` for nullability.
      */
     def genPredicate(c: Expression, in: Seq[ExprCode], attrs: Seq[Attribute]): String = {
       val bound = BindReferences.bindReference(c, attrs)
-      val evaluated = evaluateRequiredVariables(child.output, in, c.references)
+      val evaluated = evaluateRequiredVariables(inputAttrs, in, c.references)
 
       // Generate the code for the predicate.
       val ev = ExpressionCanonicalizer.execute(bound).genCode(ctx)
@@ -195,10 +175,10 @@ case class FilterExec(condition: Expression, child: SparkPlan)
         if (idx != -1 && !generatedIsNotNullChecks(idx)) {
           generatedIsNotNullChecks(idx) = true
           // Use the child's output. The nullability is what the child produced.
-          genPredicate(notNullPreds(idx), input, child.output)
-        } else if (notNullAttributes.contains(r.exprId) && !extraIsNotNullAttrs.contains(r)) {
+          genPredicate(notNullPreds(idx), inputExprCode, inputAttrs)
+        } else if (nonNullAttrExprIds.contains(r.exprId) && !extraIsNotNullAttrs.contains(r)) {
           extraIsNotNullAttrs += r
-          genPredicate(IsNotNull(r), input, child.output)
+          genPredicate(IsNotNull(r), inputExprCode, inputAttrs)
         } else {
           ""
         }
@@ -208,18 +188,61 @@ case class FilterExec(condition: Expression, child: SparkPlan)
       // enforced them with the IsNotNull checks above.
       s"""
          |$nullChecks
-         |${genPredicate(c, input, output)}
+         |${genPredicate(c, inputExprCode, outputAttrs)}
        """.stripMargin.trim
     }.mkString("\n")
 
     val nullChecks = notNullPreds.zipWithIndex.map { case (c, idx) =>
       if (!generatedIsNotNullChecks(idx)) {
-        genPredicate(c, input, child.output)
+        genPredicate(c, inputExprCode, inputAttrs)
       } else {
         ""
       }
     }.mkString("\n")
 
+    s"""
+       |$generated
+       |$nullChecks
+     """.stripMargin
+  }
+}
+
+/** Physical plan for Filter. */
+case class FilterExec(condition: Expression, child: SparkPlan)
+  extends UnaryExecNode with CodegenSupport with GeneratePredicateHelper {
+
+  // Split out all the IsNotNulls from condition.
+  private val (notNullPreds, otherPreds) = splitConjunctivePredicates(condition).partition {
+    case IsNotNull(a) => isNullIntolerant(a) && a.references.subsetOf(child.outputSet)
+    case _ => false
+  }
+
+  // The columns that will filtered out by `IsNotNull` could be considered as not nullable.
+  private val notNullAttributes = notNullPreds.flatMap(_.references).distinct.map(_.exprId)
+
+  // Mark this as empty. We'll evaluate the input during doConsume(). We don't want to evaluate
+  // all the variables at the beginning to take advantage of short circuiting.
+  override def usedInputs: AttributeSet = AttributeSet.empty
+
+  override def output: Seq[Attribute] = outputWithNullability(child.output, notNullAttributes)
+
+  override lazy val metrics = Map(
+    "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
+
+  override def inputRDDs(): Seq[RDD[InternalRow]] = {
+    child.asInstanceOf[CodegenSupport].inputRDDs()
+  }
+
+  protected override def doProduce(ctx: CodegenContext): String = {
+    child.asInstanceOf[CodegenSupport].produce(ctx, this)
+  }
+
+  override def doConsume(ctx: CodegenContext, input: Seq[ExprCode], row: ExprCode): String = {
+    val numOutput = metricTerm(ctx, "numOutputRows")
+
+    val predicateCode = generatePredicateCode(
+      ctx, child.output, input, output, notNullPreds, otherPreds, notNullAttributes)
+
     // Reset the isNull to false for the not-null columns, then the followed operators could
     // generate better code (remove dead branches).
     val resultVars = input.zipWithIndex.map { case (ev, i) =>
@@ -232,8 +255,7 @@ case class FilterExec(condition: Expression, child: SparkPlan)
     // Note: wrap in "do { } while(false);", so the generated checks can jump out with "continue;"
     s"""
        |do {
-       |  $generated
-       |  $nullChecks
+       |  $predicateCode
        |  $numOutput.add(1);
        |  ${consume(ctx, resultVars)}
        |} while(false);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by-filter.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by-filter.sql
index e4193d845f2e2..c1ccb654ee085 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by-filter.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by-filter.sql
@@ -1,4 +1,7 @@
--- Test filter clause for aggregate expression.
+-- Test filter clause for aggregate expression with codegen on and off.
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
 
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=true
 --CONFIG_DIM1 spark.sql.optimizeNullAwareAntiJoin=false
diff --git a/sql/core/src/test/resources/sql-tests/results/explain.sql.out b/sql/core/src/test/resources/sql-tests/results/explain.sql.out
index 886b98e538d28..a4c92382750e8 100644
--- a/sql/core/src/test/resources/sql-tests/results/explain.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/explain.sql.out
@@ -878,7 +878,7 @@ struct<plan:string>
 == Physical Plan ==
 * HashAggregate (5)
 +- Exchange (4)
-   +- HashAggregate (3)
+   +- * HashAggregate (3)
       +- * ColumnarToRow (2)
          +- Scan parquet default.explain_temp1 (1)
 
@@ -892,7 +892,7 @@ ReadSchema: struct<key:int,val:int>
 (2) ColumnarToRow [codegen id : 1]
 Input [2]: [key#x, val#x]
 
-(3) HashAggregate
+(3) HashAggregate [codegen id : 1]
 Input [2]: [key#x, val#x]
 Keys: []
 Functions [3]: [partial_count(val#x), partial_sum(cast(key#x as bigint)), partial_count(key#x) FILTER (WHERE (val#x > 1))]

From 10b6466e91d2e954386c74bf6ab7d94f23dd6810 Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Fri, 25 Dec 2020 09:07:48 +0900
Subject: [PATCH 0881/1009] [SPARK-33084][CORE][SQL] Add jar support ivy path

### What changes were proposed in this pull request?
Support add jar with ivy path

### Why are the changes needed?
Since submit app can support ivy, add jar we can also support ivy now.

### Does this PR introduce _any_ user-facing change?
User can add jar with sql like
```
add jar ivy:://group:artifict:version?exclude=xxx,xxx&transitive=true
add jar ivy:://group:artifict:version?exclude=xxx,xxx&transitive=false
```

core api
```
sparkContext.addJar("ivy:://group:artifict:version?exclude=xxx,xxx&transitive=true")
sparkContext.addJar("ivy:://group:artifict:version?exclude=xxx,xxx&transitive=false")
```

#### Doc Update snapshot
![image](https://user-images.githubusercontent.com/46485123/101227738-de451200-36d3-11eb-813d-78a8b879da4f.png)

### How was this patch tested?
Added UT

Closes #29966 from AngersZhuuuu/support-add-jar-ivy.

Lead-authored-by: angerszhu <angers.zhu@gmail.com>
Co-authored-by: AngersZhuuuu <angers.zhu@gmail.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../scala/org/apache/spark/SparkContext.scala |  45 +++---
 .../org/apache/spark/deploy/SparkSubmit.scala |   8 +-
 .../spark/deploy/worker/DriverWrapper.scala   |  16 +-
 .../{deploy => util}/DependencyUtils.scala    | 137 +++++++++++++++++-
 .../org/apache/spark/SparkContextSuite.scala  | 116 +++++++++++++++
 .../spark/deploy/SparkSubmitSuite.scala       |   2 +-
 .../spark/deploy/SparkSubmitUtilsSuite.scala  |  14 +-
 .../apache/spark/util/DependencyUtils.scala   |  60 ++++++++
 ...ql-ref-syntax-aux-resource-mgmt-add-jar.md |  16 +-
 .../spark/sql/internal/SessionState.scala     |  30 ++--
 sql/core/src/test/resources/SPARK-33084.jar   | Bin 0 -> 6322 bytes
 .../org/apache/spark/sql/SQLQuerySuite.scala  |  54 +++++++
 .../sql/hive/HiveSessionStateBuilder.scala    |   9 +-
 .../hive/client/IsolatedClientLoader.scala    |   1 +
 .../sql/hive/execution/HiveQuerySuite.scala   |  17 +++
 15 files changed, 475 insertions(+), 50 deletions(-)
 rename core/src/main/scala/org/apache/spark/{deploy => util}/DependencyUtils.scala (54%)
 create mode 100644 core/src/test/scala/org/apache/spark/util/DependencyUtils.scala
 create mode 100644 sql/core/src/test/resources/SPARK-33084.jar

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 17ceb5f1887c6..aae340953c5b2 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1929,7 +1929,7 @@ class SparkContext(config: SparkConf) extends Logging {
   }
 
   private def addJar(path: String, addedOnSubmit: Boolean): Unit = {
-    def addLocalJarFile(file: File): String = {
+    def addLocalJarFile(file: File): Seq[String] = {
       try {
         if (!file.exists()) {
           throw new FileNotFoundException(s"Jar ${file.getAbsolutePath} not found")
@@ -1938,15 +1938,15 @@ class SparkContext(config: SparkConf) extends Logging {
           throw new IllegalArgumentException(
             s"Directory ${file.getAbsoluteFile} is not allowed for addJar")
         }
-        env.rpcEnv.fileServer.addJar(file)
+        Seq(env.rpcEnv.fileServer.addJar(file))
       } catch {
         case NonFatal(e) =>
           logError(s"Failed to add $path to Spark environment", e)
-          null
+          Nil
       }
     }
 
-    def checkRemoteJarFile(path: String): String = {
+    def checkRemoteJarFile(path: String): Seq[String] = {
       val hadoopPath = new Path(path)
       val scheme = hadoopPath.toUri.getScheme
       if (!Array("http", "https", "ftp").contains(scheme)) {
@@ -1959,28 +1959,29 @@ class SparkContext(config: SparkConf) extends Logging {
             throw new IllegalArgumentException(
               s"Directory ${path} is not allowed for addJar")
           }
-          path
+          Seq(path)
         } catch {
           case NonFatal(e) =>
             logError(s"Failed to add $path to Spark environment", e)
-            null
+            Nil
         }
       } else {
-        path
+        Seq(path)
       }
     }
 
     if (path == null || path.isEmpty) {
       logWarning("null or empty path specified as parameter to addJar")
     } else {
-      val key = if (path.contains("\\") && Utils.isWindows) {
+      val (keys, scheme) = if (path.contains("\\") && Utils.isWindows) {
         // For local paths with backslashes on Windows, URI throws an exception
-        addLocalJarFile(new File(path))
+        (addLocalJarFile(new File(path)), "local")
       } else {
         val uri = new Path(path).toUri
         // SPARK-17650: Make sure this is a valid URL before adding it to the list of dependencies
         Utils.validateURL(uri)
-        uri.getScheme match {
+        val uriScheme = uri.getScheme
+        val jarPaths = uriScheme match {
           // A JAR file which exists only on the driver node
           case null =>
             // SPARK-22585 path without schema is not url encoded
@@ -1988,18 +1989,28 @@ class SparkContext(config: SparkConf) extends Logging {
           // A JAR file which exists only on the driver node
           case "file" => addLocalJarFile(new File(uri.getPath))
           // A JAR file which exists locally on every worker node
-          case "local" => "file:" + uri.getPath
+          case "local" => Seq("file:" + uri.getPath)
+          case "ivy" =>
+            // Since `new Path(path).toUri` will lose query information,
+            // so here we use `URI.create(path)`
+            DependencyUtils.resolveMavenDependencies(URI.create(path))
+              .flatMap(jar => addLocalJarFile(new File(jar)))
           case _ => checkRemoteJarFile(path)
         }
+        (jarPaths, uriScheme)
       }
-      if (key != null) {
+      if (keys.nonEmpty) {
         val timestamp = if (addedOnSubmit) startTime else System.currentTimeMillis
-        if (addedJars.putIfAbsent(key, timestamp).isEmpty) {
-          logInfo(s"Added JAR $path at $key with timestamp $timestamp")
+        val (added, existed) = keys.partition(addedJars.putIfAbsent(_, timestamp).isEmpty)
+        if (added.nonEmpty) {
+          val jarMessage = if (scheme != "ivy") "JAR" else "dependency jars of Ivy URI"
+          logInfo(s"Added $jarMessage $path at ${added.mkString(",")} with timestamp $timestamp")
           postEnvironmentUpdate()
-        } else {
-          logWarning(s"The jar $path has been added already. Overwriting of added jars " +
-            "is not supported in the current version.")
+        }
+        if (existed.nonEmpty) {
+          val jarMessage = if (scheme != "ivy") "JAR" else "dependency jars of Ivy URI"
+          logInfo(s"The $jarMessage $path at ${existed.mkString(",")} has been added already." +
+            " Overwriting of added jar is not supported in the current version.")
         }
       }
     }
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index bb3a20dce2da4..ad95b18ecaeb0 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -304,8 +304,8 @@ private[spark] class SparkSubmit extends Logging {
       // Resolve maven dependencies if there are any and add classpath to jars. Add them to py-files
       // too for packages that include Python code
       val resolvedMavenCoordinates = DependencyUtils.resolveMavenDependencies(
-        args.packagesExclusions, args.packages, args.repositories, args.ivyRepoPath,
-        args.ivySettingsPath)
+        packagesTransitive = true, args.packagesExclusions, args.packages,
+        args.repositories, args.ivyRepoPath, args.ivySettingsPath)
 
       if (!StringUtils.isBlank(resolvedMavenCoordinates)) {
         // In K8s client mode, when in the driver, add resolved jars early as we might need
@@ -1360,6 +1360,7 @@ private[spark] object SparkSubmitUtils {
    * Resolves any dependencies that were supplied through maven coordinates
    * @param coordinates Comma-delimited string of maven coordinates
    * @param ivySettings An IvySettings containing resolvers to use
+   * @param transitive Whether resolving transitive dependencies, default is true
    * @param exclusions Exclusions to apply when resolving transitive dependencies
    * @return The comma-delimited path to the jars of the given maven artifacts including their
    *         transitive dependencies
@@ -1367,6 +1368,7 @@ private[spark] object SparkSubmitUtils {
   def resolveMavenCoordinates(
       coordinates: String,
       ivySettings: IvySettings,
+      transitive: Boolean,
       exclusions: Seq[String] = Nil,
       isTest: Boolean = false): String = {
     if (coordinates == null || coordinates.trim.isEmpty) {
@@ -1396,7 +1398,7 @@ private[spark] object SparkSubmitUtils {
         val ivy = Ivy.newInstance(ivySettings)
         // Set resolve options to download transitive dependencies as well
         val resolveOptions = new ResolveOptions
-        resolveOptions.setTransitive(true)
+        resolveOptions.setTransitive(transitive)
         val retrieveOptions = new RetrieveOptions
         // Turn downloading and logging off for testing
         if (isTest) {
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
index 45ffdde58d6c3..c1288d64c53f7 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
@@ -22,7 +22,7 @@ import java.io.File
 import org.apache.commons.lang3.StringUtils
 
 import org.apache.spark.{SecurityManager, SparkConf}
-import org.apache.spark.deploy.{DependencyUtils, SparkHadoopUtil}
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.rpc.RpcEnv
 import org.apache.spark.util._
@@ -79,17 +79,11 @@ object DriverWrapper extends Logging {
     val secMgr = new SecurityManager(sparkConf)
     val hadoopConf = SparkHadoopUtil.newConfiguration(sparkConf)
 
-    val Seq(packagesExclusions, packages, repositories, ivyRepoPath, ivySettingsPath) =
-      Seq(
-        "spark.jars.excludes",
-        "spark.jars.packages",
-        "spark.jars.repositories",
-        "spark.jars.ivy",
-        "spark.jars.ivySettings"
-      ).map(sys.props.get(_).orNull)
+    val ivyProperties = DependencyUtils.getIvyProperties()
 
-    val resolvedMavenCoordinates = DependencyUtils.resolveMavenDependencies(packagesExclusions,
-      packages, repositories, ivyRepoPath, Option(ivySettingsPath))
+    val resolvedMavenCoordinates = DependencyUtils.resolveMavenDependencies(true,
+      ivyProperties.packagesExclusions, ivyProperties.packages, ivyProperties.repositories,
+      ivyProperties.ivyRepoPath, Option(ivyProperties.ivySettingsPath))
     val jars = {
       val jarsProp = sys.props.get(config.JARS.key).orNull
       if (!StringUtils.isBlank(resolvedMavenCoordinates)) {
diff --git a/core/src/main/scala/org/apache/spark/deploy/DependencyUtils.scala b/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala
similarity index 54%
rename from core/src/main/scala/org/apache/spark/deploy/DependencyUtils.scala
rename to core/src/main/scala/org/apache/spark/util/DependencyUtils.scala
index 5a17a6b6e169c..9956ccedf5842 100644
--- a/core/src/main/scala/org/apache/spark/deploy/DependencyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.deploy
+package org.apache.spark.util
 
 import java.io.File
 import java.net.URI
@@ -25,12 +25,140 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
+import org.apache.spark.deploy.SparkSubmitUtils
 import org.apache.spark.internal.Logging
-import org.apache.spark.util.{MutableURLClassLoader, Utils}
 
-private[deploy] object DependencyUtils extends Logging {
+case class IvyProperties(
+    packagesExclusions: String,
+    packages: String,
+    repositories: String,
+    ivyRepoPath: String,
+    ivySettingsPath: String)
+
+private[spark] object DependencyUtils extends Logging {
+
+  def getIvyProperties(): IvyProperties = {
+    val Seq(packagesExclusions, packages, repositories, ivyRepoPath, ivySettingsPath) = Seq(
+      "spark.jars.excludes",
+      "spark.jars.packages",
+      "spark.jars.repositories",
+      "spark.jars.ivy",
+      "spark.jars.ivySettings"
+    ).map(sys.props.get(_).orNull)
+    IvyProperties(packagesExclusions, packages, repositories, ivyRepoPath, ivySettingsPath)
+  }
+
+  private def isInvalidQueryString(tokens: Array[String]): Boolean = {
+    tokens.length != 2 || StringUtils.isBlank(tokens(0)) || StringUtils.isBlank(tokens(1))
+  }
+
+  /**
+   * Parse URI query string's parameter value of `transitive` and `exclude`.
+   * Other invalid parameters will be ignored.
+   *
+   * @param uri Ivy URI need to be downloaded.
+   * @return Tuple value of parameter `transitive` and `exclude` value.
+   *
+   *         1. transitive: whether to download dependency jar of Ivy URI, default value is false
+   *            and this parameter value is case-sensitive. Invalid value will be treat as false.
+   *            Example: Input:  exclude=org.mortbay.jetty:jetty&transitive=true
+   *            Output:  true
+   *
+   *         2. exclude: comma separated exclusions to apply when resolving transitive dependencies,
+   *            consists of `group:module` pairs separated by commas.
+   *            Example: Input:  excludeorg.mortbay.jetty:jetty,org.eclipse.jetty:jetty-http
+   *            Output:  [org.mortbay.jetty:jetty,org.eclipse.jetty:jetty-http]
+   */
+  private def parseQueryParams(uri: URI): (Boolean, String) = {
+    val uriQuery = uri.getQuery
+    if (uriQuery == null) {
+      (false, "")
+    } else {
+      val mapTokens = uriQuery.split("&").map(_.split("="))
+      if (mapTokens.exists(isInvalidQueryString)) {
+        throw new IllegalArgumentException(
+          s"Invalid query string in Ivy URI ${uri.toString}: $uriQuery")
+      }
+      val groupedParams = mapTokens.map(kv => (kv(0), kv(1))).groupBy(_._1)
+
+      // Parse transitive parameters (e.g., transitive=true) in an Ivy URI, default value is false
+      val transitiveParams = groupedParams.get("transitive")
+      if (transitiveParams.map(_.size).getOrElse(0) > 1) {
+        logWarning("It's best to specify `transitive` parameter in ivy URI query only once." +
+          " If there are multiple `transitive` parameter, we will select the last one")
+      }
+      val transitive =
+        transitiveParams.flatMap(_.takeRight(1).map(_._2 == "true").headOption).getOrElse(false)
+
+      // Parse an excluded list (e.g., exclude=org.mortbay.jetty:jetty,org.eclipse.jetty:jetty-http)
+      // in an Ivy URI. When download Ivy URI jar, Spark won't download transitive jar
+      // in a excluded list.
+      val exclusionList = groupedParams.get("exclude").map { params =>
+        params.map(_._2).flatMap { excludeString =>
+          val excludes = excludeString.split(",")
+          if (excludes.map(_.split(":")).exists(isInvalidQueryString)) {
+            throw new IllegalArgumentException(
+              s"Invalid exclude string in Ivy URI ${uri.toString}:" +
+                " expected 'org:module,org:module,..', found " + excludeString)
+          }
+          excludes
+        }.mkString(",")
+      }.getOrElse("")
+
+      val validParams = Set("transitive", "exclude")
+      val invalidParams = groupedParams.keys.filterNot(validParams.contains).toSeq
+      if (invalidParams.nonEmpty) {
+        logWarning(s"Invalid parameters `${invalidParams.sorted.mkString(",")}` found " +
+          s"in Ivy URI query `$uriQuery`.")
+      }
+
+      (transitive, exclusionList)
+    }
+  }
+
+  /**
+   * Download Ivy URI's dependency jars.
+   *
+   * @param uri Ivy URI need to be downloaded. The URI format should be:
+   *              `ivy://group:module:version[?query]`
+   *            Ivy URI query part format should be:
+   *              `parameter=value&parameter=value...`
+   *            Note that currently Ivy URI query part support two parameters:
+   *             1. transitive: whether to download dependent jars related to your Ivy URI.
+   *                transitive=false or `transitive=true`, if not set, the default value is false.
+   *             2. exclude: exclusion list when download Ivy URI jar and dependency jars.
+   *                The `exclude` parameter content is a ',' separated `group:module` pair string :
+   *                `exclude=group:module,group:module...`
+   * @return Comma separated string list of jars downloaded.
+   */
+  def resolveMavenDependencies(uri: URI): Seq[String] = {
+    val ivyProperties = DependencyUtils.getIvyProperties()
+    val authority = uri.getAuthority
+    if (authority == null) {
+      throw new IllegalArgumentException(
+        s"Invalid Ivy URI authority in uri ${uri.toString}:" +
+          " Expected 'org:module:version', found null.")
+    }
+    if (authority.split(":").length != 3) {
+      throw new IllegalArgumentException(
+        s"Invalid Ivy URI authority in uri ${uri.toString}:" +
+          s" Expected 'org:module:version', found $authority.")
+    }
+
+    val (transitive, exclusionList) = parseQueryParams(uri)
+
+    resolveMavenDependencies(
+      transitive,
+      exclusionList,
+      authority,
+      ivyProperties.repositories,
+      ivyProperties.ivyRepoPath,
+      Option(ivyProperties.ivySettingsPath)
+    ).split(",")
+  }
 
   def resolveMavenDependencies(
+      packagesTransitive: Boolean,
       packagesExclusions: String,
       packages: String,
       repositories: String,
@@ -51,7 +179,8 @@ private[deploy] object DependencyUtils extends Logging {
         SparkSubmitUtils.buildIvySettings(Option(repositories), Option(ivyRepoPath))
     }
 
-    SparkSubmitUtils.resolveMavenCoordinates(packages, ivySettings, exclusions = exclusions)
+    SparkSubmitUtils.resolveMavenCoordinates(packages, ivySettings,
+      transitive = packagesTransitive, exclusions = exclusions)
   }
 
   def resolveAndDownloadJars(
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index 55bfa70f21fc2..770ffeef4106f 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -1034,6 +1034,122 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
         .set(EXECUTOR_ALLOW_SPARK_CONTEXT, true)).stop()
     }
   }
+
+  test("SPARK-33084: Add jar support Ivy URI -- default transitive = false") {
+    sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]"))
+    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0")
+    assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
+    assert(!sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
+
+    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=true")
+    assert(sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
+  }
+
+  test("SPARK-33084: Add jar support Ivy URI -- invalid transitive use default false") {
+    sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]"))
+    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=foo")
+    assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
+    assert(!sc.listJars().exists(_.contains("org.slf4j_slf4j-api-1.7.10.jar")))
+    assert(!sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
+  }
+
+  test("SPARK-33084: Add jar support Ivy URI -- transitive=true will download dependency jars") {
+    val logAppender = new LogAppender("transitive=true will download dependency jars")
+    withLogAppender(logAppender) {
+      sc = new SparkContext(
+        new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]"))
+      sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=true")
+      val dependencyJars = Array(
+        "org.apache.hive_hive-storage-api-2.7.0.jar",
+        "org.slf4j_slf4j-api-1.7.10.jar",
+        "commons-lang_commons-lang-2.6.jar")
+
+      dependencyJars.foreach(jar => assert(sc.listJars().exists(_.contains(jar))))
+
+      assert(logAppender.loggingEvents.count(_.getRenderedMessage.contains(
+        "Added dependency jars of Ivy URI" +
+          " ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=true")) == 1)
+
+      // test dependency jars exist
+      sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=true")
+      assert(logAppender.loggingEvents.count(_.getRenderedMessage.contains(
+        "The dependency jars of Ivy URI" +
+          " ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=true")) == 1)
+      val existMsg = logAppender.loggingEvents.filter(_.getRenderedMessage.contains(
+        "The dependency jars of Ivy URI" +
+          " ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=true"))
+        .head.getRenderedMessage
+      dependencyJars.foreach(jar => assert(existMsg.contains(jar)))
+    }
+  }
+
+  test("SPARK-33084: Add jar support Ivy URI -- test exclude param when transitive=true") {
+    sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]"))
+    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0" +
+      "?exclude=commons-lang:commons-lang&transitive=true")
+    assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
+    assert(sc.listJars().exists(_.contains("org.slf4j_slf4j-api-1.7.10.jar")))
+    assert(!sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
+  }
+
+  test("SPARK-33084: Add jar support Ivy URI -- test different version") {
+    sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]"))
+    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0")
+    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.6.0")
+    assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
+    assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.6.0.jar")))
+  }
+
+  test("SPARK-33084: Add jar support Ivy URI -- test invalid param") {
+    val logAppender = new LogAppender("test log when have invalid parameter")
+    withLogAppender(logAppender) {
+      sc = new SparkContext(
+        new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]"))
+      sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?" +
+        "invalidParam1=foo&invalidParam2=boo")
+      assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
+      assert(logAppender.loggingEvents.exists(_.getRenderedMessage.contains(
+        "Invalid parameters `invalidParam1,invalidParam2` found in Ivy URI query" +
+          " `invalidParam1=foo&invalidParam2=boo`.")))
+    }
+  }
+
+  test("SPARK-33084: Add jar support Ivy URI -- test multiple transitive params") {
+    sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]"))
+    // transitive=invalidValue will win and treated as false
+    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?" +
+      "transitive=true&transitive=invalidValue")
+    assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
+    assert(!sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
+
+    // transitive=true will win
+    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?" +
+      "transitive=false&transitive=invalidValue&transitive=true")
+    assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
+    assert(sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
+  }
+
+  test("SPARK-33084: Add jar support Ivy URI -- test param key case sensitive") {
+    sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]"))
+    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?TRANSITIVE=true")
+    assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
+    assert(!sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
+
+    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=true")
+    assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
+    assert(sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
+  }
+
+  test("SPARK-33084: Add jar support Ivy URI -- test transitive value case sensitive") {
+    sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local-cluster[3, 1, 1024]"))
+    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=TRUE")
+    assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
+    assert(!sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
+
+    sc.addJar("ivy://org.apache.hive:hive-storage-api:2.7.0?transitive=true")
+    assert(sc.listJars().exists(_.contains("org.apache.hive_hive-storage-api-2.7.0.jar")))
+    assert(sc.listJars().exists(_.contains("commons-lang_commons-lang-2.6.jar")))
+  }
 }
 
 object SparkContextSuite {
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index dcd35f3f6b93f..c64f1b5814c20 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -47,7 +47,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.launcher.SparkLauncher
-import org.apache.spark.util.{CommandLineUtils, ResetSystemProperties, Utils}
+import org.apache.spark.util.{CommandLineUtils, DependencyUtils, ResetSystemProperties, Utils}
 
 trait TestPrematureExit {
   suite: SparkFunSuite =>
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
index 2a37f75d86a41..eaa06ce2aa057 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
@@ -135,6 +135,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
       val jarPath = SparkSubmitUtils.resolveMavenCoordinates(
         main.toString,
         SparkSubmitUtils.buildIvySettings(Option(repo), Some(tempIvyPath)),
+        transitive = true,
         isTest = true)
       assert(jarPath.indexOf(tempIvyPath) >= 0, "should use non-default ivy path")
     }
@@ -148,6 +149,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
       val jarPath = SparkSubmitUtils.resolveMavenCoordinates(
         main.toString,
         SparkSubmitUtils.buildIvySettings(None, Some(tempIvyPath)),
+        transitive = true,
         isTest = true)
       assert(jarPath.indexOf("mylib") >= 0, "should find artifact")
       assert(jarPath.indexOf("mydep") >= 0, "should find dependency")
@@ -159,6 +161,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
       val jarPath = SparkSubmitUtils.resolveMavenCoordinates(
         main.toString,
         SparkSubmitUtils.buildIvySettings(None, Some(tempIvyPath)),
+        transitive = true,
         isTest = true)
       assert(jarPath.indexOf("mylib") >= 0, "should find artifact")
       assert(jarPath.indexOf("mydep") >= 0, "should find dependency")
@@ -171,6 +174,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
       val jarPath = SparkSubmitUtils.resolveMavenCoordinates(
         main.toString,
         SparkSubmitUtils.buildIvySettings(None, Some(tempIvyPath)),
+        transitive = true,
         isTest = true)
       assert(jarPath.indexOf("mylib") >= 0, "should find artifact")
       assert(jarPath.indexOf(tempIvyPath) >= 0, "should be in new ivy path")
@@ -183,6 +187,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
       SparkSubmitUtils.resolveMavenCoordinates(
       "a:b:c",
       SparkSubmitUtils.buildIvySettings(None, Some(tempIvyPath)),
+        transitive = true,
       isTest = true)
     }
   }
@@ -195,6 +200,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
     val path = SparkSubmitUtils.resolveMavenCoordinates(
       coordinates,
       SparkSubmitUtils.buildIvySettings(None, Some(tempIvyPath)),
+      transitive = true,
       isTest = true)
     assert(path === "", "should return empty path")
     val main = MavenCoordinate("org.apache.spark", "spark-streaming-kafka-assembly_2.12", "1.2.0")
@@ -202,6 +208,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
       val files = SparkSubmitUtils.resolveMavenCoordinates(
         coordinates + "," + main.toString,
         SparkSubmitUtils.buildIvySettings(Some(repo), Some(tempIvyPath)),
+        transitive = true,
         isTest = true)
       assert(files.indexOf(main.artifactId) >= 0, "Did not return artifact")
     }
@@ -214,7 +221,8 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
       val files = SparkSubmitUtils.resolveMavenCoordinates(
         main.toString,
         SparkSubmitUtils.buildIvySettings(Some(repo), Some(tempIvyPath)),
-        Seq("my.great.dep:mydep"),
+        exclusions = Seq("my.great.dep:mydep"),
+        transitive = true,
         isTest = true)
       assert(files.indexOf(main.artifactId) >= 0, "Did not return artifact")
       assert(files.indexOf("my.great.dep") < 0, "Returned excluded artifact")
@@ -250,7 +258,8 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
     testUtilSettings.setDefaultIvyUserDir(new File(tempIvyPath))
     IvyTestUtils.withRepository(main, Some(dep), Some(dummyIvyLocal), useIvyLayout = true,
       ivySettings = testUtilSettings) { repo =>
-      val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, settings, isTest = true)
+      val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, settings,
+        transitive = true, isTest = true)
       assert(jarPath.indexOf("mylib") >= 0, "should find artifact")
       assert(jarPath.indexOf(tempIvyPath) >= 0, "should be in new ivy path")
       assert(jarPath.indexOf("mydep") >= 0, "should find dependency")
@@ -265,6 +274,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
       val jarPath = SparkSubmitUtils.resolveMavenCoordinates(
         main.toString,
         ivySettings,
+        transitive = true,
         isTest = true)
       val r = """.*org.apache.spark-spark-submit-parent-.*""".r
       assert(!ivySettings.getDefaultCache.listFiles.map(_.getName)
diff --git a/core/src/test/scala/org/apache/spark/util/DependencyUtils.scala b/core/src/test/scala/org/apache/spark/util/DependencyUtils.scala
new file mode 100644
index 0000000000000..d181d4d8ce669
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/DependencyUtils.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import java.net.URI
+
+import org.apache.spark.SparkFunSuite
+
+class DependencyUtilsSuite extends SparkFunSuite {
+
+  test("SPARK-33084: Add jar support Ivy URI -- test invalid ivy uri") {
+    val e1 = intercept[IllegalArgumentException] {
+      DependencyUtils.resolveMavenDependencies(URI.create("ivy://"))
+    }.getMessage
+    assert(e1.contains("Expected authority at index 6: ivy://"))
+
+    val e2 = intercept[IllegalArgumentException] {
+      DependencyUtils.resolveMavenDependencies(URI.create("ivy://org.apache.hive:hive-contrib"))
+    }.getMessage
+    assert(e2.contains("Invalid Ivy URI authority in uri ivy://org.apache.hive:hive-contrib:" +
+      " Expected 'org:module:version', found org.apache.hive:hive-contrib."))
+
+    val e3 = intercept[IllegalArgumentException] {
+      DependencyUtils.resolveMavenDependencies(
+        URI.create("ivy://org.apache.hive:hive-contrib:2.3.7?foo="))
+    }.getMessage
+    assert(e3.contains("Invalid query string in Ivy URI" +
+      " ivy://org.apache.hive:hive-contrib:2.3.7?foo=:"))
+
+    val e4 = intercept[IllegalArgumentException] {
+      DependencyUtils.resolveMavenDependencies(
+        URI.create("ivy://org.apache.hive:hive-contrib:2.3.7?bar=&baz=foo"))
+    }.getMessage
+    assert(e4.contains("Invalid query string in Ivy URI" +
+      " ivy://org.apache.hive:hive-contrib:2.3.7?bar=&baz=foo: bar=&baz=foo"))
+
+    val e5 = intercept[IllegalArgumentException] {
+      DependencyUtils.resolveMavenDependencies(
+        URI.create("ivy://org.apache.hive:hive-contrib:2.3.7?exclude=org.pentaho"))
+    }.getMessage
+    assert(e5.contains("Invalid exclude string in Ivy URI" +
+      " ivy://org.apache.hive:hive-contrib:2.3.7?exclude=org.pentaho:" +
+      " expected 'org:module,org:module,..', found org.pentaho"))
+  }
+}
diff --git a/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md b/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md
index 4694bff99daf5..6d31125fd612d 100644
--- a/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md
+++ b/docs/sql-ref-syntax-aux-resource-mgmt-add-jar.md
@@ -33,8 +33,18 @@ ADD JAR file_name
 
 * **file_name**
 
-    The name of the JAR file to be added. It could be either on a local file system or a distributed file system.
+    The name of the JAR file to be added. It could be either on a local file system or a distributed file system or an Ivy URI.
+    Apache Ivy is a popular dependency manager focusing on flexibility and simplicity. Now we support two parameter in URI query string:
 
+     * transitive: whether to download dependent jars related to your ivy URL. It is case-sensitive and only take last one if multiple transitive parameters are specified.
+     * exclude: exclusion list during downloading Ivy URI jar and dependent jars.
+
+    User can write Ivy URI such as:
+
+      ivy://group:module:version
+      ivy://group:module:version?transitive=[true|false]
+      ivy://group:module:version?transitive=[true|false]&exclude=group:module,group:module
+        
 ### Examples
 
 ```sql
@@ -42,6 +52,10 @@ ADD JAR /tmp/test.jar;
 ADD JAR "/path/to/some.jar";
 ADD JAR '/some/other.jar';
 ADD JAR "/path with space/abc.jar";
+ADD JAR "ivy://group:module:version";
+ADD JAR "ivy://group:module:version?transitive=false"
+ADD JAR "ivy://group:module:version?transitive=true"
+ADD JAR "ivy://group:module:version?exclude=group:module&transitive=true"
 ```
 
 ### Related Statements
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index 48d8c3d325347..60ca06dbe0d52 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.internal
 
 import java.io.File
+import java.net.URI
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
@@ -34,6 +35,7 @@ import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.streaming.StreamingQueryManager
 import org.apache.spark.sql.util.ExecutionListenerManager
+import org.apache.spark.util.DependencyUtils
 
 /**
  * A class that holds all session-specific state in a given [[SparkSession]].
@@ -159,6 +161,13 @@ class SessionResourceLoader(session: SparkSession) extends FunctionResourceLoade
     }
   }
 
+  def resolveJars(path: URI): Seq[String] = {
+    path.getScheme match {
+      case "ivy" => DependencyUtils.resolveMavenDependencies(path)
+      case _ => path.toString :: Nil
+    }
+  }
+
   /**
    * Add a jar path to [[SparkContext]] and the classloader.
    *
@@ -167,16 +176,19 @@ class SessionResourceLoader(session: SparkSession) extends FunctionResourceLoade
    * [[SessionState]].
    */
   def addJar(path: String): Unit = {
-    session.sparkContext.addJar(path)
-    val uri = new Path(path).toUri
-    val jarURL = if (uri.getScheme == null) {
-      // `path` is a local file path without a URL scheme
-      new File(path).toURI.toURL
-    } else {
-      // `path` is a URL with a scheme
-      uri.toURL
+    val uri = URI.create(path)
+    resolveJars(uri).foreach { p =>
+      session.sparkContext.addJar(p)
+      val uri = new Path(p).toUri
+      val jarURL = if (uri.getScheme == null) {
+        // `path` is a local file path without a URL scheme
+        new File(p).toURI.toURL
+      } else {
+        // `path` is a URL with a scheme
+        uri.toURL
+      }
+      session.sharedState.jarClassLoader.addURL(jarURL)
     }
-    session.sharedState.jarClassLoader.addURL(jarURL)
     Thread.currentThread().setContextClassLoader(session.sharedState.jarClassLoader)
   }
 }
diff --git a/sql/core/src/test/resources/SPARK-33084.jar b/sql/core/src/test/resources/SPARK-33084.jar
new file mode 100644
index 0000000000000000000000000000000000000000..1dc5e9303b707f9b788c28fc23a0f0aa5b2c323b
GIT binary patch
literal 6322
zcmbVR2|Sc*`yN~NeQB)8R>m4*36(u&7z~Y_WM?ppbwUZ#A~{(@)+GBf_AJ?Dm!*=j
zi^@T^VURzk)H$cl`M&Re&+lF4H}`ct_x(Qa``qt+jXsowlnQWY&Qb9Of1G^VQ63z1
zHH=imv|*YO`rq6r08R&P5w-k4&x4<D9&9XscGFdXX=`d28j0&_uIcu4>p;cCU(iCu
zKz%*^WpK%1>B%`SQApo&QK*<I1^H|iB9=|0QNX|EAw(Jt@lF91vM{X;hUDc*K>}(X
zM!=b&aKpytdt|pe*>kBHZrkSuP_<*SdC@9KaPN9JPG!?%fxVT}jAPa7vlkKIC`15&
z;`U!NAw3Au!^`n|5<h!TfAg?G+1NVS|BEBzUmejX8!y*?ap(AzyS=}S8wzQU{udn2
zzv9rI$nV&%6XG~DrjH{B0P^Vp0P+8bH9U-3T3S*bEN+XmL8Bidw2fz8vo`2eibh?x
zqzPxwv(p)Ezy1c8)a-q!jYOYCmgPasosaQuqZG(a6DBpkY+c#<KG3yvxggk}=mN|O
z7OWg>1S_(elGka|t{Vtysh`*z=N@0Xu)k4t4bTYQO;t8=H<EAYJ5Emq%Uz4Kg~`vm
zZset-y0qiEKS4il9hZ2*&P!_bZt!JX*cwJnpT!`a#U0AX!JZ)sQ581L1&2`;Q$-{h
z8F?lUQHK=mE5kq<4PDH@J`R$ild<`#26L+fQ3-^A)d)wk646ss1{GDt7bkx5&vsI7
z{LIL(s%UUPi>3+3923Ocu-&_+Lz5yDcWdw-yeWtrBE)d}tidKZpVjd$gLzDk4}+mE
zVh@pxZq(>dmq^nAZ?@M3o>b!!A;ZHu29PyU?*;j(TM^ndYOB|KrIOGq$bAbNVM!h;
zhU0B@AI}#v4(i671?eiOk*s_1YA75NV`j{DJ#A1_;Lwk_AjGiWoDA_Le296T@bE%u
z$#g$LXz%mz8x-E%?Lv0p)}x#7{oNO@DCP9V`=4*q%1Eo$StVt^B+=#^k;{{MyRs`p
zc$fGVZP2Z~tWe*qUvmquZo(&9ihNhdJrE!GEJ|#>N2G(hsp^Fz+8)|dTlPLpoI0PR
zXZ8!SsTmiPs#hu|DAOPvy|^}bF7j0F><~jQ?%aFK3D?Mkw(+HRogoPq<02OqTlsF6
z-TMMRt>%DP(2`4r#10xRH5hrJuso|SgOkX!DX21{;p5iT2}0yn3H<jqbQN`V(;_i3
zn{;>X3E`kA&fT2!Hn8SaGo?+v<ykCOJkZo;)F(<%mN}?<LvI9uTyv}m-Lk=u%*X=e
zd$&8(xJpx3&*Pcq{Y+&%V(Ikze{aRu(CVwiqR$~+G~ZW+`u2RRCBpP4@JSqVj=~!e
z)R8AJwf(2OYZI3T#21xo-BXiXrZG;OoV+~2p|6iuJ`rL5qW-p76J)W!=!~%q6i(q9
zYeG9_vbmUhw?<EQY)@NxIGo!~g~8QK2E`5XTwQpQGqEFFloxS+mA5el9YtOFw1qkD
z(?IH-wnr?XG}RQ6H5EMw-;=k_?a1#=)^REgOabJ)&$o|co;HHg_!ra1#^;;q;#!!A
zo5=5$;f&6hZl)*StgbMkbqJzc2!BGV8fA;!3ckBE-#<PqYeA!M8GxnmUfsZK@R=}m
z)gdm0dJ@N@>?UaYCu^8xUYm0aX?>bfdQTcB@fj2qtjU}6=q^)9kRT+JkklwQwa9p_
z3w_BdZ=qa0S8AWZ?AZOgNw2TP_CK%WRAunJLC;5;DpTB_X)#i)SLbxL6ftDfpwC);
zsq{`r1k>fRv=>suvQyVR9|%m+Wu}VCJR@~@6IF(gSG~UdT<yB4fdS(R^{umvD5_!<
ze*@`wm`Czlc+7Uk=3UOTLiSVf>j|U<A2V_ofVFOzeoid+ytvaGZlKy75s^8vn111m
zhG~qEVjV{QA)yB7KNDvC;U+Gjr#z_A%xpWTno$~uwd!&23d|lFi;a9Mrf``kp28n;
z0|a(4xqS!3Mk&9M#Il{YCAJzEL~pmlnd-{3N!V+zh-CzeDvEw~08noAUgT^MdPyoS
zm`2@&6>^&n3*jQVsB(THb|e1OiomY;AVP(}2e%5v!%V_`h|&Afr;e>Y*Uq;UTJf%*
znp31=O}3}5OevvMBwccDxtNmHxU|pQP^&99E6rb0?)ibqACtyLa-wur-TqQOA|}U)
zR^pT2>r-2A$jDb{8yHws+}h12F!NYqmJP#%^*tts#bDxv1|cvAsG6NPVd9$gss`0%
zEW_Wdh9_;5CqpdGs_j&vXRx+83L(UGb{3jV;QS<IJ^7)c2@B0Vg@+3oLy|?P84jbA
z%Vf0lDe+LJ*NJTx((=s;dBL&)BwYz{rAAfT*qr{}Ht{+g`nkcY4Nb4sgPS5+V~n?_
zJ?|AKgZk<UWbQO)=I9aRlENFrK$)qQ+R>NzYn)y?@7h|%=Pd-7g1UjFpY}@{r>6?i
z_a9!@wF&G;RnpNc0m8&HJ}pm<t@&81?hDdbt2AykY_UsJLMJng8J^u3eU=U)o8PK9
z3E&+d3x3xwPby(UCm5nmZKdi@-PI$rYwXv^$yw~-F*~p<Ptcg?R1B;g3v@JVF#Jt6
zNH233Uy!h}neFD7Tv%aWOVh7A<~bCP`{Lg+IOd>TAv)1S)#u&K+B!r+5ODF_CFV3P
z;jBmsO*7MnsXeA|!4F^m<d*aRML11;m10Nip7V_u@rTH!h7U9J&o!bx4p+|=HyW;-
z0!El+#)~klhf9&|R?O$H9#46{PF}LWBD#|DS@29~M1M|%$}BkSnmYC*);48mEjxnk
ziEvEJc>;hnE3*u(f2@}NSW63AOH!vKfuY#bb!WPF;jS`0>DU8xbDd1yB%MsuaG$z5
z+r?wdNXMAY%tU(Ya_y-dr=d^y)ie(~+2MF9>+VGQe&g+8BX2>*28ZTns=1u<?In;o
zYvmeKXC2PGN8ul<!|yQHsr9VR^sGMW2*wVjEvX|Byl#AEQ?IaWw9iyNG@r9fFleNE
z)<(9QaKm8abL2QUsWr!!U%%b+E|aYIG5$!+il*gt)eNF9v`6nc6o0(yXksw~K4buZ
zBP{?R@jq05fp6vC*cHuu2^b8Ej$V>|9qq-eqRglJv?4?7G*FyPwc<B6CkUCkee%QX
zF?Zjg`q*hp$K7VV75c3<_Q!oJvFz8bU+DUL&auiB`3C%`r{rqiC_X54zAgbhcwwKg
zM-OoP%&pev>k?3(VaygV?zYh9m0J8^1KatouJ+3N?kmp7#h&f5L;+ySV>Yx++ba$_
zKg*WNx4V`JiW&`<P3jb^^3T>~Ukz-o0`Uat^7Rei>ED-_ztBxNdDbW2zsmbuOM0Bb
zOT)_x4JK!3yH~`eTgz(BV$U6)1Cz1dxdX%iF*Ny8SM8hgnGMC0UF{hf$77Be#GdOY
z_Adhqtq*at!!0oExReYBD&fVZ^JGC<eyWl-sIsKiFTHLSWxCJYeP(#ovLn%*l`a{T
zE$Txsb30HRx4{a?$`-hw!#;K5(SxN1e4`nh%XmcrCb|AG61y#6I7j`|1Cw|rv6)=B
zF2oRIYG)?e7hSM9nmnCqHF7S+A7-nZZpY4J4}}u)QssQ|g@%EM!U4K0EdUO{<E<W-
z%-klMGYtS^XK?9jtYhGsf~`7(;+@p<9;>_MHt@RfEApmq+92)1#g~DOKsY<DtZ=du
zRTQ9D^*(_j{Wn)~6Df-*z{NA!OuS^JP;A^3q;jON$CxTbR*TAGhC2?lt)$j$4z?In
zSe6h)IhKu17g;a2Yh)I@ZkY3#@!FxF%+;=Ya>Zg{u?U;^T4vdx>rIu>i_P1DixHn{
z*O4)$IvqPyLMucq;~;#s;S<~Tjk{E#AUZBJxyXL4`8jUh<o=G_%Vo)q{DqcrDk9I<
zL@0P$?(^!<h4<#H?OwVXSu5Qj1C0^LW{(ZH>h_YYw|gM1{=O@Ss#Gi5W9Hr+^9MH7
zD`_%<XP`;D$Lw$T`bjF*=>Bf7*FUAk>gO3{bF=TG(>@LUi{4hHUrOnT?nd9vnV6{+
z7(zQxN6@i)sKNPOj18ZnFzk`u=N$4h<+lbhOJtwf(%iH3RW2<iMJ%(V@GDaD5)}Mg
z>|$wF6`3M6Z^ssY%$DE}MBW5Ew`BRigsQemnJRKCir)BwERs2qiHM=H45GzO`J%>Z
zl5!%<V-#vGzBC>nr6I?)IqY0cTns9>!*R8M?_|-NR+(DJ%X-15yHEBAN`4MLAJW7}
zjr!0}ZEposzrN_WkZD<8sEDy#XDMA0yNa<7fOv!nxr}b6P^C$>W>txrT^Y>eOw%)?
zmesgabU94m)B>KyC;YWiBpm!Pd!HulMM2v5wDqdsmHzt}p;PKuW7SZ_S(ZomMg@ZP
zbmQK7h7XrzM<;Eaa9Yhg5L9TZKGs-6Yq1rsW4rY*^{N2whJ?TqApN9e?sc=1H>#Wj
zVawA(yN}%_=WdD=p2)uCcgz3w&PUm|xk5`<T7x3niSUy3ZB?xbLRExK*=-@d2~K%d
z0$q4RX{(;O5?&^tov%On^a~yTaRDX9fnc$sI^_Toe1*EaKbuPK;4DM<a!HsXusRHN
z1sqOJx+54_3)CezN!^T6y|=6#aqHuP{;ph4#|p`@ifXor#v3L%ZRs9Tu9Rj*Egzz8
zQB!&b3Z@~|U9`K26(C9XrsVhBff=O(-hAn|z>(Ob)`gR7+IRGyv}QfM3C~~@F1Sfm
z;rkTJ)Ei;`2z@6QVV4x^bf1Y9n!!Zd-3y7=YQq_+zT*NVQmoy7Nld9%aeX6qZ!Nxu
z4*Zyp`~G9+YZ3b}jk{fM+{JKlg&OflF<gUZ^!u<u@fYU535Oa@jZ+u6(KDQe^J#SX
z-WkrB>rmC`nnWwH@3M;Yzj^*fJ#xm?c_iFp!7}}sFNQP=X+LU+J8Ngr)^eX!qMtF*
zM~IJ4BzDR?<Fm)n3jTB7ebB=F=)2u)eC^%8H+w%j^Zx97(CLbQ?PJCN-k$!w#TWfA
z(1!XdaH!ZH-QxE+C;*lRD!?Q#33iqQ00^T20Hps%CX|PpxW5~+7h&l-Cdm@CTufnK
zTp67m%A}<BoG?&mE$|qrrxJJRVv1-$^uw&Xd-xChz5*T2WoIB6EZ*KrBQ?es<zSGe
zG)Z{9k4oaMcb$G77m<FWpu-u_lV+0-BDmg;TZ9Z%pTI|Pqs!y7Y>E;2{xaotF~QdW
z4VIRD-j|8^o3n1v*l-JWYMHG>^XE`Fl5@Qr>nKq!S4zRp@A-?B&8+9Vf>Nd0V4}i@
zM-7vCiHIDuY9=oQ+<&Z~n_>{JzpJHOZ)>7|dX?h-VE5?n{sb8gx*f+Slq)DTb9W~C
z9s}qyOJ;<)ZrsEfmfWQ-Q)1Pg67Mx=f%FyUc#hvF;J{b6VbdXBh7#3Ys}o-s1`sD$
zFtxSGnv$GTY<%7YPZohcG;}u0wYTq;4JCs)#-ki5>P3d;W5&xnV*%Z{Bp{WcUhaxj
zVl+lTvQ7U=BA~?^r<~JmF4q=Y-bI<b6hHjN-!a@LcT6RjPuk*2ybZQ;PG+D|a-s-E
z87@lS;}Vd1Mh?8no1QCDULg1iMpzL+y<Bc=(Y$It;7Wx*pTixAW&EH5_)unqo>@O0
zN4Me1Q$9P2&$L#_n`n-<lb5D%E11x%+tr>>t*UY_Skq@r8sQH$CZ6h7Ul4XzIz=`(
zb6;}aJmqP{awbc>%S%S*6$M<x%mua4*qN6bxS@*8y+v=C#r))t_m<(l8;Fic8O^xF
zEH7$8Nfd_+7o7bHCs?EIG-aFF15+Qk4yVwLGaYsKbvi+wh5O8{qx_vBL}O3ic{J^f
zCvYHlJoRS_uh7P7LiRgc*2_gRRd!!^%hGGy453_zMf8`oECsJ+y<Nt)%aA7m_#u$n
z)|E|P#_a>!PTx!{E7pn2S)Z#gxX^rF_;)NZuHfDIYZ4j}z=SApcFAem#9W|fNJ^&|
zeFP~fkxCSvC#F%9Kbua=<J~Ft&^)H<!pG}!)=V?NO|>R!3W?q8{LJbZ!z6lQLJx({
z8JEwPMf<XEF3A}KFF2dBAZWxWx@RPY*0yKg6WjwmgU*lej70{?Y{OoREtYj|c?J4^
zDcXA$M7#lcYV+={!_M|&%kEV_De(SU)^@^se=+B~i9CBmN6W`o`R5%@TgBtx33NZ$
z6#gGiBFf7HW$)$fY>zhd)$4_dK}5B64Ycx%R&+(7y)$q+Vckn8ZC!+<6kHNqB~?)_
z1vW&{(HTfeNlHzEzR7zs2kIU~qDw+-MW?h^0`dwfs-@GXH6kLeCEE2KA`0r%f$E48
zb&-+F9}D=z!O3az0ClL@5fQVH{ImXekjsHF0Df0FJpM_IJc9jNk34ifw5*U*2m9}^
zKdF;{9{5*s;UDln2JSjI{VE<FjrnW&@CRb?8{)f|_|L#YIq|C<=EzF)b*OK^qt*ML
z;fM0#54ifT;C~Vu|C#4dZv4R$_^&))#m6JAU(1h2Y*``I|B>tOV&spR`E_&igS+xT
zp+20Mzg({G`5wmkgZo>n^Mm~7QhHc-zZwHzf8YP$>-QVys0trt{WF;N2zzkP?C(GR
z8T^Z?{LgHMb@{6uvd#P#Y=5oNN6f#j(?{G{Az}<aCHYgFe`fwxwSQd2UoSsD2B>2G
c-v{`y80kaFjvaE*9ej8WMEu~x{~7@Q2O^~=-v9sr

literal 0
HcmV?d00001

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index b7cec55245564..0ba58e1634f06 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -22,6 +22,8 @@ import java.net.{MalformedURLException, URL}
 import java.sql.{Date, Timestamp}
 import java.util.concurrent.atomic.AtomicBoolean
 
+import org.apache.commons.io.FileUtils
+
 import org.apache.spark.{AccumulatorSuite, SparkException}
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
 import org.apache.spark.sql.catalyst.expressions.GenericRow
@@ -3719,6 +3721,25 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     }
   }
 
+  test("SPARK-33084: Add jar support Ivy URI in SQL") {
+    val sc = spark.sparkContext
+    // default transitive=false, only download specified jar
+    sql("ADD JAR ivy://org.apache.hive.hcatalog:hive-hcatalog-core:2.3.7")
+    assert(sc.listJars()
+      .exists(_.contains("org.apache.hive.hcatalog_hive-hcatalog-core-2.3.7.jar")))
+
+    // test download ivy URL jar return multiple jars
+    sql("ADD JAR ivy://org.scala-js:scalajs-test-interface_2.12:1.2.0?transitive=true")
+    assert(sc.listJars().exists(_.contains("scalajs-library_2.12")))
+    assert(sc.listJars().exists(_.contains("scalajs-test-interface_2.12")))
+
+    sql("ADD JAR ivy://org.apache.hive:hive-contrib:2.3.7" +
+      "?exclude=org.pentaho:pentaho-aggdesigner-algorithm&transitive=true")
+    assert(sc.listJars().exists(_.contains("org.apache.hive_hive-contrib-2.3.7.jar")))
+    assert(sc.listJars().exists(_.contains("org.apache.hive_hive-exec-2.3.7.jar")))
+    assert(!sc.listJars().exists(_.contains("org.pentaho.pentaho_aggdesigner-algorithm")))
+  }
+
   test("SPARK-33677: LikeSimplification should be skipped if pattern contains any escapeChar") {
     withTempView("df") {
       Seq("m@ca").toDF("s").createOrReplaceTempView("df")
@@ -3771,6 +3792,39 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       }
     })
   }
+
+  test("SPARK-33084: Add jar support Ivy URI in SQL -- jar contains udf class") {
+    val sumFuncClass = "org.apache.spark.examples.sql.Spark33084"
+    val functionName = "test_udf"
+    withTempDir { dir =>
+      System.setProperty("ivy.home", dir.getAbsolutePath)
+      val sourceJar = new File(Thread.currentThread().getContextClassLoader
+        .getResource("SPARK-33084.jar").getFile)
+      val targetCacheJarDir = new File(dir.getAbsolutePath +
+        "/local/org.apache.spark/SPARK-33084/1.0/jars/")
+      targetCacheJarDir.mkdir()
+      // copy jar to local cache
+      FileUtils.copyFileToDirectory(sourceJar, targetCacheJarDir)
+      withTempView("v1") {
+        withUserDefinedFunction(
+          s"default.$functionName" -> false,
+          functionName -> true) {
+          // create temporary function without class
+          val e = intercept[AnalysisException] {
+            sql(s"CREATE TEMPORARY FUNCTION $functionName AS '$sumFuncClass'")
+          }.getMessage
+          assert(e.contains("Can not load class 'org.apache.spark.examples.sql.Spark33084"))
+          sql("ADD JAR ivy://org.apache.spark:SPARK-33084:1.0")
+          sql(s"CREATE TEMPORARY FUNCTION $functionName AS '$sumFuncClass'")
+          // create a view using a function in 'default' database
+          sql(s"CREATE TEMPORARY VIEW v1 AS SELECT $functionName(col1) FROM VALUES (1), (2), (3)")
+          // view v1 should still using function defined in `default` database
+          checkAnswer(sql("SELECT * FROM v1"), Seq(Row(2.0)))
+        }
+      }
+      System.clearProperty("ivy.home")
+    }
+  }
 }
 
 case class Foo(bar: Option[String])
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index 5963a71f55035..654f9f62ebdd3 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.hive
 
+import java.net.URI
+
 import org.apache.spark.annotation.Unstable
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.analysis.{Analyzer, ResolveSessionCatalog}
@@ -127,7 +129,10 @@ class HiveSessionResourceLoader(
   extends SessionResourceLoader(session) {
   private lazy val client = clientBuilder()
   override def addJar(path: String): Unit = {
-    client.addJar(path)
-    super.addJar(path)
+    val uri = URI.create(path)
+    resolveJars(uri).foreach { p =>
+      client.addJar(p)
+      super.addJar(p)
+    }
   }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index c0758dcdfc879..97e685efd27de 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -124,6 +124,7 @@ private[hive] object IsolatedClientLoader extends Logging {
         SparkSubmitUtils.buildIvySettings(
           Some(remoteRepos),
           ivyPath),
+        transitive = true,
         exclusions = version.exclusions)
     }
     val allFiles = classpath.split(",").map(new File(_)).toSet
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 1cabf6033e8d8..21cc6af398eec 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -1219,6 +1219,23 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
       }
     }
   }
+
+  test("SPARK-33084: Add jar support Ivy URI in SQL") {
+    val testData = TestHive.getHiveFile("data/files/sample.json").toURI
+    withTable("t") {
+      sql("ADD JAR ivy://org.apache.hive.hcatalog:hive-hcatalog-core:2.3.7")
+      sql(
+        """CREATE TABLE t(a string, b string)
+          |ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe'""".stripMargin)
+      sql(s"""LOAD DATA LOCAL INPATH "$testData" INTO TABLE t""")
+      sql("SELECT * FROM src JOIN t on src.key = t.a")
+      assert(sql("LIST JARS").filter(_.getString(0).contains(
+        "org.apache.hive.hcatalog_hive-hcatalog-core-2.3.7.jar")).count() > 0)
+      assert(sql("LIST JAR").
+        filter(_.getString(0).contains(
+          "org.apache.hive.hcatalog_hive-hcatalog-core-2.3.7.jar")).count() > 0)
+    }
+  }
 }
 
 // for SPARK-2180 test

From 2553d53dc85fdf1127446941e2bc749e721c1b57 Mon Sep 17 00:00:00 2001
From: kozakana <goki727@gmail.com>
Date: Sat, 26 Dec 2020 16:30:50 +0900
Subject: [PATCH 0882/1009] [SPARK-33897][SQL] Can't set option 'cross' in join
 method

### What changes were proposed in this pull request?

[The PySpark documentation](https://spark.apache.org/docs/3.0.1/api/python/pyspark.sql.html#pyspark.sql.DataFrame.join) says "Must be one of: inner, cross, outer, full, fullouter, full_outer, left, leftouter, left_outer, right, rightouter, right_outer, semi, leftsemi, left_semi, anti, leftanti and left_anti."
However, I get the following error when I set the cross option.

```
scala> val df1 = spark.createDataFrame(Seq((1,"a"),(2,"b")))
df1: org.apache.spark.sql.DataFrame = [_1: int, _2: string]

scala> val df2 = spark.createDataFrame(Seq((1,"A"),(2,"B"), (3, "C")))
df2: org.apache.spark.sql.DataFrame = [_1: int, _2: string]

scala> df1.join(right = df2, usingColumns = Seq("_1"), joinType = "cross").show()
java.lang.IllegalArgumentException: requirement failed: Unsupported using join type Cross
  at scala.Predef$.require(Predef.scala:281)
  at org.apache.spark.sql.catalyst.plans.UsingJoin.<init>(joinTypes.scala:106)
  at org.apache.spark.sql.Dataset.join(Dataset.scala:1025)
  ... 53 elided
```

### Why are the changes needed?

The documentation says cross option can be set, but when I try to set it, I get an java.lang.IllegalArgumentException.

### Does this PR introduce _any_ user-facing change?

Accepting this PR fix will behave the same as the documentation.

### How was this patch tested?

There is already a test for [JoinTypes](https://github.com/apache/spark/blob/1b9fd67904671ea08526bfb7a97d694815d47665/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/JoinTypesTest.scala), but I can't find a test for the join option itself.

Closes #30803 from kozakana/allow_cross_option.

Authored-by: kozakana <goki727@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../apache/spark/sql/catalyst/plans/joinTypes.scala    |  2 +-
 .../org/apache/spark/sql/DataFrameJoinSuite.scala      | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
index feea1d2177ef0..da3cfb4c9de07 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
@@ -102,7 +102,7 @@ case class NaturalJoin(tpe: JoinType) extends JoinType {
 }
 
 case class UsingJoin(tpe: JoinType, usingColumns: Seq[String]) extends JoinType {
-  require(Seq(Inner, LeftOuter, LeftSemi, RightOuter, FullOuter, LeftAnti).contains(tpe),
+  require(Seq(Inner, LeftOuter, LeftSemi, RightOuter, FullOuter, LeftAnti, Cross).contains(tpe),
     "Unsupported using join type " + tpe)
   override def sql: String = "USING " + tpe.sql
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
index c317f562c65dc..1513c2e90e27c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -122,6 +122,16 @@ class DataFrameJoinSuite extends QueryTest
       df2.crossJoin(df1),
       Row(2, "2", 1, "1") :: Row(2, "2", 3, "3") ::
         Row(4, "4", 1, "1") :: Row(4, "4", 3, "3") :: Nil)
+
+    checkAnswer(
+      df1.join(df2, Nil, "cross"),
+      Row(1, "1", 2, "2") :: Row(1, "1", 4, "4") ::
+        Row(3, "3", 2, "2") :: Row(3, "3", 4, "4") :: Nil)
+
+    checkAnswer(
+      df2.join(df1, Nil, "cross"),
+      Row(2, "2", 1, "1") :: Row(2, "2", 3, "3") ::
+        Row(4, "4", 1, "1") :: Row(4, "4", 3, "3") :: Nil)
   }
 
   test("broadcast join hint using broadcast function") {

From 37ae0a608670c660ba4c92b9ebb9cb9fb2bd67e6 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Sat, 26 Dec 2020 17:40:19 -0600
Subject: [PATCH 0883/1009] [SPARK-33560][TEST-MAVEN][BUILD] Add
 "unused-import" check to Maven compilation process
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

Similar to SPARK-33441, this pr add `unused-import` check to Maven compilation process. After this pr `unused-import` will trigger Maven compilation error.

For Scala 2.13 profile, this pr also left TODO(SPARK-33499) similar to SPARK-33441 because `scala.language.higherKinds` no longer needs to be imported explicitly since Scala 2.13.1

### Why are the changes needed?
Let Maven build also check for unused imports as compilation error.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

- Pass the Jenkins or GitHub Action

- Local manual test：add an unused import intentionally to trigger maven compilation error.

Closes #30784 from LuciferYang/SPARK-33560.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 pom.xml                                       | 43 +++++++++++++++++++
 .../sources/StreamingDataSourceV2Suite.scala  |  2 +-
 2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 4781f981a5949..609c9fc0ab0c3 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2508,6 +2508,9 @@
               <arg>-feature</arg>
               <arg>-explaintypes</arg>
               <arg>-target:jvm-1.8</arg>
+              <arg>-Xfatal-warnings</arg>
+              <arg>-Ywarn-unused:imports</arg>
+              <arg>-P:silencer:globalFilters=.*deprecated.*</arg>
             </args>
             <jvmArgs>
               <jvmArg>-Xms1024m</jvmArg>
@@ -2521,6 +2524,13 @@
               <javacArg>${java.version}</javacArg>
               <javacArg>-Xlint:all,-serial,-path,-try</javacArg>
             </javacArgs>
+            <compilerPlugins>
+              <compilerPlugin>
+                <groupId>com.github.ghik</groupId>
+                <artifactId>silencer-plugin_${scala.version}</artifactId>
+                <version>1.6.0</version>
+              </compilerPlugin>
+            </compilerPlugins>
           </configuration>
         </plugin>
         <plugin>
@@ -3243,6 +3253,39 @@
           </dependency>
         </dependencies>
       </dependencyManagement>
+      <build>
+        <pluginManagement>
+          <plugins>
+            <plugin>
+              <groupId>net.alchim31.maven</groupId>
+              <artifactId>scala-maven-plugin</artifactId>
+              <configuration>
+                <args>
+                  <arg>-unchecked</arg>
+                  <arg>-deprecation</arg>
+                  <arg>-feature</arg>
+                  <arg>-explaintypes</arg>
+                  <arg>-target:jvm-1.8</arg>
+                  <arg>-Wconf:cat=deprecation:wv,any:e</arg>
+                  <!--
+                    TODO(SPARK-33805): Undo the corresponding deprecated usage suppression rule after fixed
+                    <arg>-Wunused:imports</arg>
+                  -->
+                  <arg>-Wconf:cat=scaladoc:wv</arg>
+                  <arg>-Wconf:cat=lint-multiarg-infix:wv</arg>
+                  <arg>-Wconf:cat=other-nullary-override:wv</arg>
+                  <arg>-Wconf:cat=other-match-analysis&amp;site=org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupFunction.catalogFunction:wv</arg>
+                  <arg>-Wconf:cat=other-pure-statement&amp;site=org.apache.spark.streaming.util.FileBasedWriteAheadLog.readAll.readFile:wv</arg>
+                  <arg>-Wconf:cat=other-pure-statement&amp;site=org.apache.spark.scheduler.OutputCommitCoordinatorSuite.&lt;local OutputCommitCoordinatorSuite&gt;.futureAction:wv</arg>
+                </args>
+                <compilerPlugins combine.self="override">
+                </compilerPlugins>
+              </configuration>
+            </plugin>
+          </plugins>
+        </pluginManagement>
+      </build>
+
     </profile>
 
     <!--
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
index 66544a8dc4693..ae0dba746d8a8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
@@ -290,7 +290,7 @@ class StreamingDataSourceV2Suite extends StreamTest {
     Trigger.Continuous(1000))
 
   private def testPositiveCase(readFormat: String, writeFormat: String, trigger: Trigger): Unit = {
-    testPositiveCaseWithQuery(readFormat, writeFormat, trigger)(() => _)
+    testPositiveCaseWithQuery(readFormat, writeFormat, trigger)(_ => ())
   }
 
   private def testPositiveCaseWithQuery(

From b2dfeae18bb8262c3bf3dab8d0d955ad46b1a0f5 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Sun, 27 Dec 2020 17:57:42 +0900
Subject: [PATCH 0884/1009] [SPARK-33911][SQL][DOCS] Update the SQL migration
 guide about changes in `HiveClientImpl`

### What changes were proposed in this pull request?
Update the SQL migration guide about the changes made by:
- https://github.com/apache/spark/pull/30778
- https://github.com/apache/spark/pull/30711
- https://github.com/apache/spark/pull/30866

### Why are the changes needed?
To inform users about the recent changes in the upcoming releases.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
N/A

Closes #30925 from MaxGekk/sql-migr-guide-hiveclientimpl.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/sql-migration-guide.md | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 4b6c2266387f5..cbb1de53c8896 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -28,6 +28,8 @@ license: |
 
   - In Spark 3.2, the meta-characters `\n` and `\t` are escaped in the `show()` action. In Spark 3.1 or earlier, the two metacharacters are output as it is.
 
+  - In Spark 3.2, `ALTER TABLE .. RENAME TO PARTITION` throws `PartitionAlreadyExistsException` instead of `AnalysisException` for tables from Hive external when the target partition already exists.
+
 ## Upgrading from Spark SQL 3.0 to 3.1
 
   - In Spark 3.1, statistical aggregation function includes `std`, `stddev`, `stddev_samp`, `variance`, `var_samp`, `skewness`, `kurtosis`, `covar_samp`, `corr` will return `NULL` instead of `Double.NaN` when `DivideByZero` occurs during expression evaluation, for example, when `stddev_samp` applied on a single element set. In Spark version 3.0 and earlier, it will return `Double.NaN` in such case. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.statisticalAggregate` to `true`.
@@ -64,6 +66,16 @@ license: |
 
   - Since Spark 3.1, CHAR/CHARACTER and VARCHAR types are supported in the table schema. Table scan/insertion will respect the char/varchar semantic. If char/varchar is used in places other than table schema, an exception will be thrown (CAST is an exception that simply treats char/varchar as string like before). To restore the behavior before Spark 3.1, which treats them as STRING types and ignores a length parameter, e.g. `CHAR(4)`, you can set `spark.sql.legacy.charVarcharAsString` to `true`.
 
+  - In Spark 3.1, `AnalysisException` is replaced by its sub-classes that are thrown for tables from Hive external catalog in the following situations:
+    * `ALTER TABLE .. ADD PARTITION` throws `PartitionsAlreadyExistException` if new partition exists already
+    * `ALTER TABLE .. DROP PARTITION` throws `NoSuchPartitionsException` for not existing partitions
+
+## Upgrading from Spark SQL 3.0.1 to 3.0.2
+
+  - In Spark 3.0.2, `AnalysisException` is replaced by its sub-classes that are thrown for tables from Hive external catalog in the following situations:
+    * `ALTER TABLE .. ADD PARTITION` throws `PartitionsAlreadyExistException` if new partition exists already
+    * `ALTER TABLE .. DROP PARTITION` throws `NoSuchPartitionsException` for not existing partitions
+
 ## Upgrading from Spark SQL 3.0 to 3.0.1
 
 - In Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they match to the pattern defined by the JSON option `timestampFormat`. Since version 3.0.1, the timestamp type inference is disabled by default. Set the JSON option `inferTimestamp` to `true` to enable such type inference.
@@ -257,6 +269,12 @@ license: |
 
     * The decimal string representation can be different between Hive 1.2 and Hive 2.3 when using `TRANSFORM` operator in SQL for script transformation, which depends on hive's behavior. In Hive 1.2, the string representation omits trailing zeroes. But in Hive 2.3, it is always padded to 18 digits with trailing zeroes if necessary.
 
+## Upgrading from Spark SQL 2.4.7 to 2.4.8
+
+  - In Spark 2.4.8, `AnalysisException` is replaced by its sub-classes that are thrown for tables from Hive external catalog in the following situations:
+    * `ALTER TABLE .. ADD PARTITION` throws `PartitionsAlreadyExistException` if new partition exists already
+    * `ALTER TABLE .. DROP PARTITION` throws `NoSuchPartitionsException` for not existing partitions
+    
 ## Upgrading from Spark SQL 2.4.5 to 2.4.6
 
   - In Spark 2.4.6, the `RESET` command does not reset the static SQL configuration values to the default. It only clears the runtime SQL configuration values.

From 678294ddc20207a8e8f02dfdad71cd51819299c8 Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Mon, 28 Dec 2020 09:52:42 +0900
Subject: [PATCH 0885/1009] [SPARK-33824][PYTHON][DOCS][FOLLOW-UP] Clarify
 about PYSPARK_DRIVER_PYTHON and spark.yarn.appMasterEnv.PYSPARK_PYTHON

### What changes were proposed in this pull request?

This PR proposes to clarify:
- `PYSPARK_DRIVER_PYTHON` should not be set for cluster modes in YARN and Kubernates.
- `spark.yarn.appMasterEnv.PYSPARK_PYTHON` is not required in YARN. This is just another way to set `PYSPARK_PYTHON` that is specific for a Spark application.

### Why are the changes needed?

To clarify what's required and not.

### Does this PR introduce _any_ user-facing change?

Yes, this is a user-facing doc change.

### How was this patch tested?

Manually tested.

Note that this credits to gaborgsomogyi who actually tested and raised a doubt about this offline to me.
I also manually tested all again to double check.

Closes #30938 from HyukjinKwon/SPARK-33824-followup.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/docs/source/user_guide/python_packaging.rst | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/python/docs/source/user_guide/python_packaging.rst b/python/docs/source/user_guide/python_packaging.rst
index 71d8e532f9361..f57acdd449438 100644
--- a/python/docs/source/user_guide/python_packaging.rst
+++ b/python/docs/source/user_guide/python_packaging.rst
@@ -101,11 +101,11 @@ In the case of a ``spark-submit`` script, you can use it as follows:
 
 .. code-block:: bash
 
-    export PYSPARK_DRIVER_PYTHON=python
+    export PYSPARK_DRIVER_PYTHON=python # Do not set in cluster modes.
     export PYSPARK_PYTHON=./environment/bin/python
     spark-submit --archives pyspark_conda_env.tar.gz#environment app.py
 
-Note that ``PYSPARK_DRIVER_PYTHON`` above is not required for cluster modes in YARN or Kubernetes.
+Note that ``PYSPARK_DRIVER_PYTHON`` above should not be set for cluster modes in YARN or Kubernetes.
 
 If you’re on a regular Python shell or notebook, you can try it as shown below:
 
@@ -155,11 +155,11 @@ You can directly pass/unpack the archive file and enable the environment on exec
 the ``--archives`` option or ``spark.archives`` configuration (``spark.yarn.dist.archives`` in YARN).
 
 For ``spark-submit``, you can use it by running the command as follows. Also, notice that
-``PYSPARK_DRIVER_PYTHON`` is not necessary in Kubernetes or YARN cluster modes.
+``PYSPARK_DRIVER_PYTHON`` has to be unset in Kubernetes or YARN cluster modes.
 
 .. code-block:: bash
 
-    export PYSPARK_DRIVER_PYTHON=python
+    export PYSPARK_DRIVER_PYTHON=python # Do not set in cluster modes.
     export PYSPARK_PYTHON=./environment/bin/python
     spark-submit --archives pyspark_venv.tar.gz#environment app.py
 
@@ -216,13 +216,11 @@ In order to transfer and use the ``.pex`` file in a cluster, you should ship it
 of directories or archive files.
 
 For application submission, you run the commands as shown below.
-Note that ``PYSPARK_DRIVER_PYTHON`` is not needed for cluster modes in YARN or Kubernetes,
-and you may also need to set ``PYSPARK_PYTHON`` environment variable on
-the AppMaster ``--conf spark.yarn.appMasterEnv.PYSPARK_PYTHON=./myarchive.pex`` in YARN cluster mode.
+Note that ``PYSPARK_DRIVER_PYTHON`` should not be set for cluster modes in YARN or Kubernetes.
 
 .. code-block:: bash
 
-    export PYSPARK_DRIVER_PYTHON=python
+    export PYSPARK_DRIVER_PYTHON=python  # Do not set in cluster modes.
     export PYSPARK_PYTHON=./pyspark_pex_env.pex
     spark-submit --files pyspark_pex_env.pex app.py
 

From e6f019836c099398542b443f7700f79de81da0d5 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Mon, 28 Dec 2020 14:07:50 +0900
Subject: [PATCH 0886/1009] [SPARK-33532][SQL] Add comments to a unreachable
 branch in SpecificParquetRecordReaderBase.initialize method

### What changes were proposed in this pull request?
This pr mainly adds a comment for the 'rowgroupoffsets! = null' branch in `SpecificParquetRecordReaderBase.init(InputSplit, TaskAttemptContext)` to indicate that spark read parquet process will not enter this branch after SPARK-13883 and SPARK-13989.  It is not deleted because PARQUET-131 wants to move `SpecificParquetRecordReaderBase` into the parquet-mr project.

### Why are the changes needed?
Add a useful comment.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass the Jenkins or GitHub Action

Closes #30484 from LuciferYang/SPARK-33532.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../parquet/SpecificParquetRecordReaderBase.java           | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java
index c975e52734e01..be68880e49a82 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java
@@ -107,6 +107,13 @@ public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptCont
       FilterCompat.Filter filter = getFilter(configuration);
       blocks = filterRowGroups(filter, footer.getBlocks(), fileSchema);
     } else {
+      // SPARK-33532: After SPARK-13883 and SPARK-13989, the parquet read process will
+      // no longer enter this branch because `ParquetInputSplit` only be constructed in
+      // `ParquetFileFormat.buildReaderWithPartitionValues` and
+      // `ParquetPartitionReaderFactory.buildReaderBase` method,
+      // and the `rowGroupOffsets` in `ParquetInputSplit` set to null explicitly.
+      // We didn't delete this branch because PARQUET-131 wanted to move this to the
+      // parquet-mr project.
       // otherwise we find the row groups that were selected on the client
       footer = readFooter(configuration, file, NO_FILTER);
       Set<Long> offsets = new HashSet<>();

From fe33262c91bd25bf34f70702cb2c67187e440eac Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Mon, 28 Dec 2020 05:45:40 +0000
Subject: [PATCH 0887/1009] [SPARK-33918][SQL] UnresolvedView should retain SQL
 text position for DDL commands

### What changes were proposed in this pull request?

Currently, there are many DDL commands where the position of the unresolved identifiers are incorrect:
```
scala> sql("DROP VIEW unknown")
org.apache.spark.sql.AnalysisException: View not found: unknown; line 1 pos 0;
```
, whereas the `pos` should be `10`.

This PR proposes to fix this issue for commands using `UnresolvedTable`:
```
DROP VIEW v
ALTER VIEW v SET TBLPROPERTIES ('k'='v')
ALTER VIEW v UNSET TBLPROPERTIES ('k')
ALTER VIEW v AS SELECT 1
```

### Why are the changes needed?

To fix a bug.

### Does this PR introduce _any_ user-facing change?

Yes, now the above example will print the following:
```
org.apache.spark.sql.AnalysisException: View not found: unknown; line 1 pos 10;
```

### How was this patch tested?

Add a new suite of tests.

Closes #30936 from imback82/position_view_fix.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/analysis/v2ResolutionPlans.scala |  4 +-
 .../sql/catalyst/parser/AstBuilder.scala      | 38 ++++++++++++-------
 .../AnalysisExceptionPositionSuite.scala      | 17 ++++++++-
 3 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
index 940fd6085dc98..de53702d15a69 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
@@ -52,8 +52,8 @@ case class UnresolvedTable(
 case class UnresolvedView(
     multipartIdentifier: Seq[String],
     commandName: String,
-    allowTemp: Boolean = true,
-    relationTypeMismatchHint: Option[String] = None) extends LeafNode {
+    allowTemp: Boolean,
+    relationTypeMismatchHint: Option[String]) extends LeafNode {
   override lazy val resolved: Boolean = false
 
   override def output: Seq[Attribute] = Nil
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 9ac7b06d0a132..c5707812e44bb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -2169,6 +2169,17 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
     UnresolvedTable(visitMultipartIdentifier(ctx), commandName)
   }
 
+  /**
+   * Create an [[UnresolvedView]] from a multi-part identifier context.
+   */
+  private def createUnresolvedView(
+      ctx: MultipartIdentifierContext,
+      commandName: String,
+      allowTemp: Boolean = true,
+      relationTypeMismatchHint: Option[String] = None): LogicalPlan = withOrigin(ctx) {
+    UnresolvedView(visitMultipartIdentifier(ctx), commandName, allowTemp, relationTypeMismatchHint)
+  }
+
   /**
    * Create a [[CalendarInterval]] literal expression. Two syntaxes are supported:
    * - multiple unit value pairs, for instance: interval 2 months 2 days.
@@ -3215,8 +3226,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    */
   override def visitDropView(ctx: DropViewContext): AnyRef = withOrigin(ctx) {
     DropView(
-      UnresolvedView(
-        visitMultipartIdentifier(ctx.multipartIdentifier()),
+      createUnresolvedView(
+        ctx.multipartIdentifier(),
         commandName = "DROP VIEW",
         allowTemp = true,
         relationTypeMismatchHint = Some("Please use DROP TABLE instead.")),
@@ -3466,19 +3477,20 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    */
   override def visitSetTableProperties(
       ctx: SetTablePropertiesContext): LogicalPlan = withOrigin(ctx) {
-    val identifier = visitMultipartIdentifier(ctx.multipartIdentifier)
     val properties = visitPropertyKeyValues(ctx.tablePropertyList)
     val cleanedTableProperties = cleanTableProperties(ctx, properties)
     if (ctx.VIEW != null) {
       AlterViewSetProperties(
-        UnresolvedView(
-          identifier,
+        createUnresolvedView(
+          ctx.multipartIdentifier,
           commandName = "ALTER VIEW ... SET TBLPROPERTIES",
           allowTemp = false,
           relationTypeMismatchHint = Some("Please use ALTER TABLE instead.")),
         cleanedTableProperties)
     } else {
-      AlterTableSetPropertiesStatement(identifier, cleanedTableProperties)
+      AlterTableSetPropertiesStatement(
+        visitMultipartIdentifier(ctx.multipartIdentifier),
+        cleanedTableProperties)
     }
   }
 
@@ -3493,22 +3505,24 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    */
   override def visitUnsetTableProperties(
       ctx: UnsetTablePropertiesContext): LogicalPlan = withOrigin(ctx) {
-    val identifier = visitMultipartIdentifier(ctx.multipartIdentifier)
     val properties = visitPropertyKeys(ctx.tablePropertyList)
     val cleanedProperties = cleanTableProperties(ctx, properties.map(_ -> "").toMap).keys.toSeq
 
     val ifExists = ctx.EXISTS != null
     if (ctx.VIEW != null) {
       AlterViewUnsetProperties(
-        UnresolvedView(
-          identifier,
+        createUnresolvedView(
+          ctx.multipartIdentifier,
           commandName = "ALTER VIEW ... UNSET TBLPROPERTIES",
           allowTemp = false,
           relationTypeMismatchHint = Some("Please use ALTER TABLE instead.")),
         cleanedProperties,
         ifExists)
     } else {
-      AlterTableUnsetPropertiesStatement(identifier, cleanedProperties, ifExists)
+      AlterTableUnsetPropertiesStatement(
+        visitMultipartIdentifier(ctx.multipartIdentifier),
+        cleanedProperties,
+        ifExists)
     }
   }
 
@@ -3950,9 +3964,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    */
   override def visitAlterViewQuery(ctx: AlterViewQueryContext): LogicalPlan = withOrigin(ctx) {
     AlterViewAs(
-      UnresolvedView(
-        visitMultipartIdentifier(ctx.multipartIdentifier),
-        "ALTER VIEW ... AS"),
+      createUnresolvedView(ctx.multipartIdentifier, "ALTER VIEW ... AS"),
       originalText = source(ctx.query),
       query = plan(ctx.query))
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExceptionPositionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExceptionPositionSuite.scala
index 276cb4b5987f6..08c1b27cae6a4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExceptionPositionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExceptionPositionSuite.scala
@@ -33,11 +33,26 @@ class AnalysisExceptionPositionSuite extends AnalysisTest {
     verifyTablePosition("COMMENT ON TABLE unknown IS 'hello'", "unknown")
   }
 
+  test("SPARK-33918: UnresolvedView should retain sql text position") {
+    verifyViewPosition("DROP VIEW unknown", "unknown")
+    verifyViewPosition("ALTER VIEW unknown SET TBLPROPERTIES ('k'='v')", "unknown")
+    verifyViewPosition("ALTER VIEW unknown UNSET TBLPROPERTIES ('k')", "unknown")
+    verifyViewPosition("ALTER VIEW unknown AS SELECT 1", "unknown")
+  }
+
   private def verifyTablePosition(sql: String, table: String): Unit = {
+    verifyPosition(sql, table, "Table")
+  }
+
+  private def verifyViewPosition(sql: String, table: String): Unit = {
+    verifyPosition(sql, table, "View")
+  }
+
+  private def verifyPosition(sql: String, table: String, msgPrefix: String): Unit = {
     val expectedPos = sql.indexOf(table)
     assert(expectedPos != -1)
     assertAnalysisError(
       parsePlan(sql),
-      Seq(s"Table not found: $table; line 1 pos $expectedPos"))
+      Seq(s"$msgPrefix not found: $table; line 1 pos $expectedPos"))
   }
 }

From 1be9e7e40bdf7ee83c85c2d47e688ef0cab2d05b Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Mon, 28 Dec 2020 15:29:09 +0900
Subject: [PATCH 0888/1009] [SPAKR-33801][CORE][SQL] Fix compilation warnings
 about 'Unicode escapes in triple quoted strings are deprecated'

### What changes were proposed in this pull request?
There are total 15 compilation warnings about `Unicode escapes in triple quoted strings are deprecated` in Spark code now:
```
[WARNING] /spark-source/core/src/main/scala/org/apache/spark/util/Utils.scala:2930: Unicode escapes in triple quoted strings are deprecated, use the literal character instead
[WARNING] /spark-source/core/src/main/scala/org/apache/spark/util/Utils.scala:2931: Unicode escapes in triple quoted strings are deprecated, use the literal character instead
[WARNING] /spark-source/core/src/main/scala/org/apache/spark/util/Utils.scala:2932: Unicode escapes in triple quoted strings are deprecated, use the literal character instead
[WARNING] /spark-source/core/src/main/scala/org/apache/spark/util/Utils.scala:2933: Unicode escapes in triple quoted strings are deprecated, use the literal character instead
[WARNING] /spark-source/core/src/main/scala/org/apache/spark/util/Utils.scala:2934: Unicode escapes in triple quoted strings are deprecated, use the literal character instead
[WARNING] /spark-source/core/src/main/scala/org/apache/spark/util/Utils.scala:2935: Unicode escapes in triple quoted strings are deprecated, use the literal character instead
[WARNING] /spark-source/core/src/main/scala/org/apache/spark/util/Utils.scala:2936: Unicode escapes in triple quoted strings are deprecated, use the literal character instead
[WARNING] /spark-source/core/src/main/scala/org/apache/spark/util/Utils.scala:2937: Unicode escapes in triple quoted strings are deprecated, use the literal character instead
[WARNING] /spark-source/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala:82: Unicode escapes in triple quoted strings are deprecated, use the literal character instead
[WARNING] /spark-source/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala:32: Unicode escapes in triple quoted strings are deprecated, use the literal character instead
[WARNING] /spark-source/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala:79: Unicode escapes in triple quoted strings are deprecated, use the literal character instead
[WARNING] /spark-source/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala:97: Unicode escapes in triple quoted strings are deprecated, use the literal character instead
[WARNING] /spark-source/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala:101: Unicode escapes in triple quoted strings are deprecated, use the literal character instead
[WARNING] /spark-source/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala:76: Unicode escapes in triple quoted strings are deprecated, use the literal character instead
[WARNING] /spark-source/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala:83: Unicode escapes in triple quoted strings are deprecated, use the literal character instead
```

This pr try to fix these warnnings.

### Why are the changes needed?
Cleanup compilation warnings about `Unicode escapes in triple quoted strings are deprecated`

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass the Jenkins or GitHub Action

Closes #30926 from LuciferYang/SPARK-33801.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../main/scala/org/apache/spark/util/Utils.scala | 16 ++++++++--------
 .../spark/sql/catalyst/csv/CSVExprUtils.scala    |  2 +-
 .../sql/catalyst/csv/CSVExprUtilsSuite.scala     |  4 ++--
 .../sql/catalyst/parser/ParserUtilsSuite.scala   |  4 ++--
 .../json/JsonParsingOptionsSuite.scala           |  4 ++--
 5 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index ae4df146b0a4c..416fc43dc44aa 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2927,14 +2927,14 @@ private[spark] object Utils extends Logging {
    */
   private val fullWidthRegex = ("""[""" +
     // scalastyle:off nonascii
-    """\u1100-\u115F""" +
-    """\u2E80-\uA4CF""" +
-    """\uAC00-\uD7A3""" +
-    """\uF900-\uFAFF""" +
-    """\uFE10-\uFE19""" +
-    """\uFE30-\uFE6F""" +
-    """\uFF00-\uFF60""" +
-    """\uFFE0-\uFFE6""" +
+    "\u1100-\u115F" +
+    "\u2E80-\uA4CF" +
+    "\uAC00-\uD7A3" +
+    "\uF900-\uFAFF" +
+    "\uFE10-\uFE19" +
+    "\uFE30-\uFE6F" +
+    "\uFF00-\uFF60" +
+    "\uFFE0-\uFFE6" +
     // scalastyle:on nonascii
     """]""").r
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala
index efe4188377142..b800caccbf852 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala
@@ -79,7 +79,7 @@ object CSVExprUtils {
       case Seq('\\', '\"') => '\"'
       case Seq('\\', '\'') => '\''
       case Seq('\\', '\\') => '\\'
-      case _ if str == """\u0000""" => '\u0000'
+      case _ if str == "\u0000" => '\u0000'
       case Seq('\\', _) =>
         throw new IllegalArgumentException(s"Unsupported special character for delimiter: $str")
       case _ =>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala
index 06ffb4231062c..fcb10c98243d9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala
@@ -29,7 +29,7 @@ class CSVExprUtilsSuite extends SparkFunSuite {
     assert(CSVExprUtils.toChar("""\f""") === '\f')
     assert(CSVExprUtils.toChar("""\"""") === '\"')
     assert(CSVExprUtils.toChar("""\'""") === '\'')
-    assert(CSVExprUtils.toChar("""\u0000""") === '\u0000')
+    assert(CSVExprUtils.toChar("\u0000") === '\u0000')
     assert(CSVExprUtils.toChar("""\\""") === '\\')
   }
 
@@ -76,7 +76,7 @@ class CSVExprUtilsSuite extends SparkFunSuite {
     // tab in the middle of some other letters
     ("""ba\tr""", Some("ba\tr"), None),
     // null character, expressed in Unicode literal syntax
-    ("""\u0000""", Some("\u0000"), None),
+    ("\u0000", Some("\u0000"), None),
     // and specified directly
     ("\u0000", Some("\u0000"), None)
   )
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala
index 6008d091022de..a4d1b5d5e6f29 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala
@@ -94,11 +94,11 @@ class ParserUtilsSuite extends SparkFunSuite {
     assert(unescapeSQLString(""""\256"""") == "256")
 
     // String including a '\u0000' style literal characters (\u732B is a cat in Kanji).
-    assert(unescapeSQLString(""""How cute \u732B are"""")  == "How cute \u732B are")
+    assert(unescapeSQLString("\"How cute \u732B are\"")  == "How cute \u732B are")
 
     // String including a surrogate pair character
     // (\uD867\uDE3D is Okhotsk atka mackerel in Kanji).
-    assert(unescapeSQLString(""""\uD867\uDE3D is a fish"""") == "\uD867\uDE3D is a fish")
+    assert(unescapeSQLString("\"\uD867\uDE3D is a fish\"") == "\uD867\uDE3D is a fish")
 
     // scalastyle:on nonascii
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
index 7cc3a1cf9f3b8..e9fe79a0641b9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
@@ -73,14 +73,14 @@ class JsonParsingOptionsSuite extends QueryTest with SharedSparkSession {
   }
 
   test("allowUnquotedControlChars off") {
-    val str = """{"name": "a\u0001b"}"""
+    val str = "{\"name\": \"a\u0001b\"}"
     val df = spark.read.json(Seq(str).toDS())
 
     assert(df.schema.head.name == "_corrupt_record")
   }
 
   test("allowUnquotedControlChars on") {
-    val str = """{"name": "a\u0001b"}"""
+    val str = "{\"name\": \"a\u0001b\"}"
     val df = spark.read.option("allowUnquotedControlChars", "true").json(Seq(str).toDS())
 
     assert(df.schema.head.name == "name")

From 3fdbc48373cdf12b8ba05632bc65ad49b7af1afb Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Mon, 28 Dec 2020 06:48:27 +0000
Subject: [PATCH 0889/1009] [SPARK-33901][SQL] Fix Char and Varchar display
 error after DDLs

### What changes were proposed in this pull request?

After CTAS / CREATE TABLE LIKE / CVAS/ alter table add columns, the target tables will display string instead of char/varchar

### Why are the changes needed?

bugfix

### Does this PR introduce _any_ user-facing change?

no
### How was this patch tested?

new tests

Closes #30918 from yaooqinn/SPARK-33901.

Lead-authored-by: Kent Yao <yao@apache.org>
Co-authored-by: Kent Yao <yaooqinn@hotmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/util/CharVarcharUtils.scala  |  10 +
 .../command/createDataSourceTables.scala      |   4 +-
 .../spark/sql/execution/command/ddl.scala     |   8 +-
 .../spark/sql/execution/command/tables.scala  |   6 +-
 .../spark/sql/execution/command/views.scala   |   7 +-
 .../v2/WriteToDataSourceV2Exec.scala          |  12 +-
 .../spark/sql/internal/CatalogImpl.scala      |   3 +-
 .../sql-tests/inputs/charvarchar.sql          |  62 ++
 .../sql-tests/results/charvarchar.sql.out     | 663 ++++++++++++++++++
 .../command/CharVarcharDDLTestBase.scala      |  42 +-
 .../CreateHiveTableAsSelectCommand.scala      |   4 +-
 11 files changed, 805 insertions(+), 16 deletions(-)
 create mode 100644 sql/core/src/test/resources/sql-tests/inputs/charvarchar.sql
 create mode 100644 sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
index cfdc50d1defb5..eaafe35fc00eb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
@@ -126,6 +126,16 @@ object CharVarcharUtils extends Logging {
     }
   }
 
+  /**
+   * Re-construct the original schema from the type string in the given metadata of each field.
+   */
+  def getRawSchema(schema: StructType): StructType = {
+    val fields = schema.map { field =>
+      getRawType(field.metadata).map(dt => field.copy(dataType = dt)).getOrElse(field)
+    }
+    StructType(fields)
+  }
+
   /**
    * Returns expressions to apply read-side char type padding for the given attributes.
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
index 6ed40aacd1125..be7fa7b1b447e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
@@ -22,6 +22,7 @@ import java.net.URI
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.sources.BaseRelation
@@ -174,12 +175,13 @@ case class CreateDataSourceTableAsSelectCommand(
       }
       val result = saveDataIntoTable(
         sparkSession, table, tableLocation, child, SaveMode.Overwrite, tableExists = false)
+      val tableSchema = CharVarcharUtils.getRawSchema(result.schema)
       val newTable = table.copy(
         storage = table.storage.copy(locationUri = tableLocation),
         // We will use the schema of resolved.relation as the schema of the table (instead of
         // the schema of df). It is important since the nullability may be changed by the relation
         // provider (for example, see org.apache.spark.sql.parquet.DefaultSource).
-        schema = result.schema)
+        schema = tableSchema)
       // Table location is already validated. No need to check it again during table creation.
       sessionState.catalog.createTable(newTable, ignoreIfExists = false, validateLocation = false)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 9300e25b8650e..601594bc6b677 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -270,7 +270,7 @@ case class AlterTableSetPropertiesCommand(
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val catalog = sparkSession.sessionState.catalog
-    val table = catalog.getTableMetadata(tableName)
+    val table = catalog.getTableRawMetadata(tableName)
     DDLUtils.verifyAlterTableType(catalog, table, isView)
     // This overrides old properties and update the comment parameter of CatalogTable
     // with the newly added/modified comment since CatalogTable also holds comment as its
@@ -302,7 +302,7 @@ case class AlterTableUnsetPropertiesCommand(
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val catalog = sparkSession.sessionState.catalog
-    val table = catalog.getTableMetadata(tableName)
+    val table = catalog.getTableRawMetadata(tableName)
     DDLUtils.verifyAlterTableType(catalog, table, isView)
     if (!ifExists) {
       propKeys.foreach { k =>
@@ -414,7 +414,7 @@ case class AlterTableSerDePropertiesCommand(
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val catalog = sparkSession.sessionState.catalog
-    val table = catalog.getTableMetadata(tableName)
+    val table = catalog.getTableRawMetadata(tableName)
     DDLUtils.verifyAlterTableType(catalog, table, isView = false)
     // For datasource tables, disallow setting serde or specifying partition
     if (partSpec.isDefined && DDLUtils.isDatasourceTable(table)) {
@@ -629,7 +629,7 @@ case class AlterTableRecoverPartitionsCommand(
 
   override def run(spark: SparkSession): Seq[Row] = {
     val catalog = spark.sessionState.catalog
-    val table = catalog.getTableMetadata(tableName)
+    val table = catalog.getTableRawMetadata(tableName)
     val tableIdentWithDB = table.identifier.quotedString
     DDLUtils.verifyAlterTableType(catalog, table, isView = false)
     if (table.partitionColumnNames.isEmpty) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 0fcf8f2717041..91c5a886e1d0a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -115,12 +115,13 @@ case class CreateTableLikeCommand(
       CatalogTableType.EXTERNAL
     }
 
+    val newTableSchema = CharVarcharUtils.getRawSchema(sourceTableDesc.schema)
     val newTableDesc =
       CatalogTable(
         identifier = targetTable,
         tableType = tblType,
         storage = newStorage,
-        schema = sourceTableDesc.schema,
+        schema = newTableSchema,
         provider = newProvider,
         partitionColumnNames = sourceTableDesc.partitionColumnNames,
         bucketSpec = sourceTableDesc.bucketSpec,
@@ -236,7 +237,8 @@ case class AlterTableAddColumnsCommand(
       conf.caseSensitiveAnalysis)
     DDLUtils.checkDataColNames(catalogTable, colsToAdd.map(_.name))
 
-    catalog.alterTableDataSchema(table, StructType(catalogTable.dataSchema ++ colsToAdd))
+    val existingSchema = CharVarcharUtils.getRawSchema(catalogTable.dataSchema)
+    catalog.alterTableDataSchema(table, StructType(existingSchema ++ colsToAdd))
     Seq.empty[Row]
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index 6401167458a3e..eed4d92706bcf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable
 import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, SubqueryExpression}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, View}
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper
 import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 import org.apache.spark.sql.types.{BooleanType, MetadataBuilder, StringType, StructType}
@@ -219,7 +220,8 @@ case class CreateViewCommand(
       throw new AnalysisException(
         "It is not allowed to create a persisted view from the Dataset API")
     }
-    val aliasedSchema = aliasPlan(session, analyzedPlan).schema
+    val aliasedSchema = CharVarcharUtils.getRawSchema(
+      aliasPlan(session, analyzedPlan).schema)
     val newProperties = generateViewProperties(
       properties, session, analyzedPlan, aliasedSchema.fieldNames)
 
@@ -293,8 +295,9 @@ case class AlterViewAsCommand(
     val newProperties = generateViewProperties(
       viewMeta.properties, session, analyzedPlan, analyzedPlan.schema.fieldNames)
 
+    val newSchema = CharVarcharUtils.getRawSchema(analyzedPlan.schema)
     val updatedViewMeta = viewMeta.copy(
-      schema = analyzedPlan.schema,
+      schema = newSchema,
       properties = newProperties,
       viewOriginalText = Some(originalText),
       viewText = Some(originalText))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
index e0887d52cc376..fea8bd25f5a21 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.connector.catalog.{Identifier, StagedTable, StagingTableCatalog, SupportsWrite, Table, TableCatalog}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.connector.write.{BatchWrite, DataWriterFactory, LogicalWriteInfoImpl, PhysicalWriteInfoImpl, V1Write, Write, WriteBuilder, WriterCommitMessage}
@@ -77,7 +78,8 @@ case class CreateTableAsSelectExec(
       throw new TableAlreadyExistsException(ident)
     }
 
-    val table = catalog.createTable(ident, query.schema.asNullable,
+    val schema = CharVarcharUtils.getRawSchema(query.schema).asNullable
+    val table = catalog.createTable(ident, schema,
       partitioning.toArray, properties.asJava)
     writeToTable(catalog, table, writeOptions, ident)
   }
@@ -110,8 +112,9 @@ case class AtomicCreateTableAsSelectExec(
 
       throw new TableAlreadyExistsException(ident)
     }
+    val schema = CharVarcharUtils.getRawSchema(query.schema).asNullable
     val stagedTable = catalog.stageCreate(
-      ident, query.schema.asNullable, partitioning.toArray, properties.asJava)
+      ident, schema, partitioning.toArray, properties.asJava)
     writeToTable(catalog, stagedTable, writeOptions, ident)
   }
 }
@@ -153,8 +156,9 @@ case class ReplaceTableAsSelectExec(
     } else if (!orCreate) {
       throw new CannotReplaceMissingTableException(ident)
     }
+    val schema = CharVarcharUtils.getRawSchema(query.schema).asNullable
     val table = catalog.createTable(
-      ident, query.schema.asNullable, partitioning.toArray, properties.asJava)
+      ident, schema, partitioning.toArray, properties.asJava)
     writeToTable(catalog, table, writeOptions, ident)
   }
 }
@@ -183,7 +187,7 @@ case class AtomicReplaceTableAsSelectExec(
     orCreate: Boolean) extends TableWriteExecHelper {
 
   override protected def run(): Seq[InternalRow] = {
-    val schema = query.schema.asNullable
+    val schema = CharVarcharUtils.getRawSchema(query.schema).asNullable
     if (catalog.tableExists(ident)) {
       val table = catalog.loadTable(ident)
       uncacheTable(session, catalog, table, ident)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
index 8008a21804f7b..d817fed529a5a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.{DefinedByConstructorParams, FunctionIdenti
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.execution.command.AlterTableRecoverPartitionsCommand
 import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource}
 import org.apache.spark.sql.types.StructType
@@ -181,7 +182,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
       new Column(
         name = c.name,
         description = c.getComment().orNull,
-        dataType = c.dataType.catalogString,
+        dataType = CharVarcharUtils.getRawType(c.metadata).getOrElse(c.dataType).catalogString,
         nullable = c.nullable,
         isPartition = partitionColumnNames.contains(c.name),
         isBucket = bucketColumnNames.contains(c.name))
diff --git a/sql/core/src/test/resources/sql-tests/inputs/charvarchar.sql b/sql/core/src/test/resources/sql-tests/inputs/charvarchar.sql
new file mode 100644
index 0000000000000..dbdb8ccee738c
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/charvarchar.sql
@@ -0,0 +1,62 @@
+create table char_tbl(c char(5), v varchar(6)) using parquet;
+desc formatted char_tbl;
+desc formatted char_tbl c;
+show create table char_tbl;
+
+create table char_tbl2 using parquet as select * from char_tbl;
+show create table char_tbl2;
+desc formatted char_tbl2;
+desc formatted char_tbl2 c;
+
+create table char_tbl3 like char_tbl;
+desc formatted char_tbl3;
+desc formatted char_tbl3 c;
+show create table char_tbl3;
+
+create view char_view as select * from char_tbl;
+desc formatted char_view;
+desc formatted char_view c;
+show create table char_view;
+
+alter table char_tbl rename to char_tbl1;
+desc formatted char_tbl1;
+
+alter table char_tbl1 change column c type char(6);
+alter table char_tbl1 change column c type char(5);
+desc formatted char_tbl1;
+
+alter table char_tbl1 add columns (d char(5));
+desc formatted char_tbl1;
+
+alter view char_view as select * from char_tbl2;
+desc formatted char_view;
+
+alter table char_tbl1 SET TBLPROPERTIES('yes'='no');
+desc formatted char_tbl1;
+
+alter view char_view SET TBLPROPERTIES('yes'='no');
+desc formatted char_view;
+
+alter table char_tbl1 UNSET TBLPROPERTIES('yes');
+desc formatted char_tbl1;
+
+alter view char_view UNSET TBLPROPERTIES('yes');
+desc formatted char_view;
+
+alter table char_tbl1 SET SERDEPROPERTIES('yes'='no');
+desc formatted char_tbl1;
+
+create table char_part(c1 char(5), c2 char(2), v1 varchar(6), v2 varchar(2)) using parquet partitioned by (v2, c2);
+desc formatted char_part;
+
+alter table char_part add partition (v2='ke', c2='nt') location 'loc1';
+desc formatted char_part;
+
+alter table char_part partition (v2='ke') rename to partition (v2='nt');
+desc formatted char_part;
+
+alter table char_part partition (v2='ke', c2='nt') set location 'loc2';
+desc formatted char_part;
+
+MSCK REPAIR TABLE char_part;
+desc formatted char_part;
diff --git a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
new file mode 100644
index 0000000000000..774235e084191
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
@@ -0,0 +1,663 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 45
+
+
+-- !query
+create table char_tbl(c char(5), v varchar(6)) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_tbl
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c                   	char(5)             	                    
+v                   	varchar(6)          	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	default             	                    
+Table               	char_tbl            	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	MANAGED             	                    
+Provider            	parquet             	                    
+Location [not included in comparison]/{warehouse_dir}/char_tbl
+
+
+-- !query
+desc formatted char_tbl c
+-- !query schema
+struct<info_name:string,info_value:string>
+-- !query output
+col_name	c
+data_type	char(5)
+comment	NULL
+min	NULL
+max	NULL
+num_nulls	NULL
+distinct_count	NULL
+avg_col_len	NULL
+max_col_len	NULL
+histogram	NULL
+
+
+-- !query
+show create table char_tbl
+-- !query schema
+struct<createtab_stmt:string>
+-- !query output
+CREATE TABLE `default`.`char_tbl` (
+  `c` CHAR(5),
+  `v` VARCHAR(6))
+USING parquet
+
+
+-- !query
+create table char_tbl2 using parquet as select * from char_tbl
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+show create table char_tbl2
+-- !query schema
+struct<createtab_stmt:string>
+-- !query output
+CREATE TABLE `default`.`char_tbl2` (
+  `c` CHAR(5),
+  `v` VARCHAR(6))
+USING parquet
+
+
+-- !query
+desc formatted char_tbl2
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c                   	char(5)             	                    
+v                   	varchar(6)          	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	default             	                    
+Table               	char_tbl2           	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	MANAGED             	                    
+Provider            	parquet             	                    
+Location [not included in comparison]/{warehouse_dir}/char_tbl2
+
+
+-- !query
+desc formatted char_tbl2 c
+-- !query schema
+struct<info_name:string,info_value:string>
+-- !query output
+col_name	c
+data_type	char(5)
+comment	NULL
+min	NULL
+max	NULL
+num_nulls	NULL
+distinct_count	NULL
+avg_col_len	NULL
+max_col_len	NULL
+histogram	NULL
+
+
+-- !query
+create table char_tbl3 like char_tbl
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_tbl3
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c                   	char(5)             	                    
+v                   	varchar(6)          	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	default             	                    
+Table               	char_tbl3           	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	MANAGED             	                    
+Provider            	parquet             	                    
+Location [not included in comparison]/{warehouse_dir}/char_tbl3
+
+
+-- !query
+desc formatted char_tbl3 c
+-- !query schema
+struct<info_name:string,info_value:string>
+-- !query output
+col_name	c
+data_type	char(5)
+comment	NULL
+min	NULL
+max	NULL
+num_nulls	NULL
+distinct_count	NULL
+avg_col_len	NULL
+max_col_len	NULL
+histogram	NULL
+
+
+-- !query
+show create table char_tbl3
+-- !query schema
+struct<createtab_stmt:string>
+-- !query output
+CREATE TABLE `default`.`char_tbl3` (
+  `c` CHAR(5),
+  `v` VARCHAR(6))
+USING parquet
+
+
+-- !query
+create view char_view as select * from char_tbl
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_view
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c                   	char(5)             	                    
+v                   	varchar(6)          	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	default             	                    
+Table               	char_view           	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	select * from char_tbl	                    
+View Original Text  	select * from char_tbl	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c, v]              	                    
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=default, view.query.out.col.0=c, view.query.out.col.1=v, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[]]
+
+
+-- !query
+desc formatted char_view c
+-- !query schema
+struct<info_name:string,info_value:string>
+-- !query output
+col_name	c
+data_type	char(5)
+comment	NULL
+min	NULL
+max	NULL
+num_nulls	NULL
+distinct_count	NULL
+avg_col_len	NULL
+max_col_len	NULL
+histogram	NULL
+
+
+-- !query
+show create table char_view
+-- !query schema
+struct<createtab_stmt:string>
+-- !query output
+CREATE VIEW `default`.`char_view` (
+  `c`,
+  `v`)
+AS select * from char_tbl
+
+
+-- !query
+alter table char_tbl rename to char_tbl1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_tbl1
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c                   	char(5)             	                    
+v                   	varchar(6)          	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	default             	                    
+Table               	char_tbl1           	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	MANAGED             	                    
+Provider            	parquet             	                    
+Location [not included in comparison]/{warehouse_dir}/char_tbl1
+
+
+-- !query
+alter table char_tbl1 change column c type char(6)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+ALTER TABLE CHANGE COLUMN is not supported for changing column 'c' with type 'CharType(5)' to 'c' with type 'CharType(6)'
+
+
+-- !query
+alter table char_tbl1 change column c type char(5)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_tbl1
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c                   	char(5)             	                    
+v                   	varchar(6)          	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	default             	                    
+Table               	char_tbl1           	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	MANAGED             	                    
+Provider            	parquet             	                    
+Location [not included in comparison]/{warehouse_dir}/char_tbl1
+
+
+-- !query
+alter table char_tbl1 add columns (d char(5))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_tbl1
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c                   	char(5)             	                    
+v                   	varchar(6)          	                    
+d                   	char(5)             	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	default             	                    
+Table               	char_tbl1           	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	MANAGED             	                    
+Provider            	parquet             	                    
+Location [not included in comparison]/{warehouse_dir}/char_tbl1
+
+
+-- !query
+alter view char_view as select * from char_tbl2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_view
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c                   	char(5)             	                    
+v                   	varchar(6)          	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	default             	                    
+Table               	char_view           	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	select * from char_tbl2	                    
+View Original Text  	select * from char_tbl2	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c, v]              	                    
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=default, view.query.out.col.0=c, view.query.out.col.1=v, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[]]
+
+
+-- !query
+alter table char_tbl1 SET TBLPROPERTIES('yes'='no')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_tbl1
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c                   	char(5)             	                    
+v                   	varchar(6)          	                    
+d                   	char(5)             	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	default             	                    
+Table               	char_tbl1           	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	MANAGED             	                    
+Provider            	parquet             	                    
+Table Properties    	[yes=no]            	                    
+Location [not included in comparison]/{warehouse_dir}/char_tbl1
+
+
+-- !query
+alter view char_view SET TBLPROPERTIES('yes'='no')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_view
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c                   	char(5)             	                    
+v                   	varchar(6)          	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	default             	                    
+Table               	char_view           	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	select * from char_tbl2	                    
+View Original Text  	select * from char_tbl2	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c, v]              	                    
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=default, view.query.out.col.0=c, view.query.out.col.1=v, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], yes=no]
+
+
+-- !query
+alter table char_tbl1 UNSET TBLPROPERTIES('yes')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_tbl1
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c                   	char(5)             	                    
+v                   	varchar(6)          	                    
+d                   	char(5)             	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	default             	                    
+Table               	char_tbl1           	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	MANAGED             	                    
+Provider            	parquet             	                    
+Location [not included in comparison]/{warehouse_dir}/char_tbl1
+
+
+-- !query
+alter view char_view UNSET TBLPROPERTIES('yes')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_view
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c                   	char(5)             	                    
+v                   	varchar(6)          	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	default             	                    
+Table               	char_view           	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	select * from char_tbl2	                    
+View Original Text  	select * from char_tbl2	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c, v]              	                    
+Table Properties    	[view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=default, view.query.out.col.0=c, view.query.out.col.1=v, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[]]
+
+
+-- !query
+alter table char_tbl1 SET SERDEPROPERTIES('yes'='no')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_tbl1
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c                   	char(5)             	                    
+v                   	varchar(6)          	                    
+d                   	char(5)             	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	default             	                    
+Table               	char_tbl1           	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	MANAGED             	                    
+Provider            	parquet             	                    
+Location [not included in comparison]/{warehouse_dir}/char_tbl1	                    
+Storage Properties  	[yes=no]
+
+
+-- !query
+create table char_part(c1 char(5), c2 char(2), v1 varchar(6), v2 varchar(2)) using parquet partitioned by (v2, c2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_part
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	char(5)             	                    
+v1                  	varchar(6)          	                    
+v2                  	varchar(2)          	                    
+c2                  	char(2)             	                    
+# Partition Information	                    	                    
+# col_name          	data_type           	comment             
+v2                  	varchar(2)          	                    
+c2                  	char(2)             	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	default             	                    
+Table               	char_part           	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	MANAGED             	                    
+Provider            	parquet             	                    
+Location [not included in comparison]/{warehouse_dir}/char_part	                    
+Partition Provider  	Catalog
+
+
+-- !query
+alter table char_part add partition (v2='ke', c2='nt') location 'loc1'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_part
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	char(5)             	                    
+v1                  	varchar(6)          	                    
+v2                  	varchar(2)          	                    
+c2                  	char(2)             	                    
+# Partition Information	                    	                    
+# col_name          	data_type           	comment             
+v2                  	varchar(2)          	                    
+c2                  	char(2)             	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	default             	                    
+Table               	char_part           	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	MANAGED             	                    
+Provider            	parquet             	                    
+Location [not included in comparison]/{warehouse_dir}/char_part	                    
+Partition Provider  	Catalog
+
+
+-- !query
+alter table char_part partition (v2='ke') rename to partition (v2='nt')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Partition spec is invalid. The spec (v2) must match the partition spec (v2, c2) defined in table '`default`.`char_part`'
+
+
+-- !query
+desc formatted char_part
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	char(5)             	                    
+v1                  	varchar(6)          	                    
+v2                  	varchar(2)          	                    
+c2                  	char(2)             	                    
+# Partition Information	                    	                    
+# col_name          	data_type           	comment             
+v2                  	varchar(2)          	                    
+c2                  	char(2)             	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	default             	                    
+Table               	char_part           	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	MANAGED             	                    
+Provider            	parquet             	                    
+Location [not included in comparison]/{warehouse_dir}/char_part	                    
+Partition Provider  	Catalog
+
+
+-- !query
+alter table char_part partition (v2='ke', c2='nt') set location 'loc2'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_part
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	char(5)             	                    
+v1                  	varchar(6)          	                    
+v2                  	varchar(2)          	                    
+c2                  	char(2)             	                    
+# Partition Information	                    	                    
+# col_name          	data_type           	comment             
+v2                  	varchar(2)          	                    
+c2                  	char(2)             	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	default             	                    
+Table               	char_part           	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	MANAGED             	                    
+Provider            	parquet             	                    
+Location [not included in comparison]/{warehouse_dir}/char_part	                    
+Partition Provider  	Catalog
+
+
+-- !query
+MSCK REPAIR TABLE char_part
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+desc formatted char_part
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	char(5)             	                    
+v1                  	varchar(6)          	                    
+v2                  	varchar(2)          	                    
+c2                  	char(2)             	                    
+# Partition Information	                    	                    
+# col_name          	data_type           	comment             
+v2                  	varchar(2)          	                    
+c2                  	char(2)             	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Database            	default             	                    
+Table               	char_part           	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	MANAGED             	                    
+Provider            	parquet             	                    
+Location [not included in comparison]/{warehouse_dir}/char_part	                    
+Partition Provider  	Catalog
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CharVarcharDDLTestBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CharVarcharDDLTestBase.scala
index 748dd7ee10c34..1f47744ce4abd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CharVarcharDDLTestBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CharVarcharDDLTestBase.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.execution.command
 
 import org.apache.spark.SparkConf
-import org.apache.spark.sql.{AnalysisException, QueryTest}
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.connector.InMemoryPartitionTableCatalog
 import org.apache.spark.sql.internal.SQLConf
@@ -97,6 +97,26 @@ trait CharVarcharDDLTestBase extends QueryTest with SQLTestUtils {
       assert(v1 || v2)
     }
   }
+
+  def checkTableSchemaTypeStr(expected: Seq[Row]): Unit = {
+    checkAnswer(sql("desc t").selectExpr("data_type").where("data_type like '%char%'"), expected)
+  }
+
+  test("SPARK-33901: alter table add columns should not change original table's schema") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(i CHAR(5), c VARCHAR(4)) USING $format")
+      sql("ALTER TABLE t ADD COLUMNS (d VARCHAR(5))")
+      checkTableSchemaTypeStr(Seq(Row("char(5)"), Row("varchar(4)"), Row("varchar(5)")))
+    }
+  }
+
+  test("SPARK-33901: ctas should should not change table's schema") {
+    withTable("t", "tt") {
+      sql(s"CREATE TABLE tt(i CHAR(5), c VARCHAR(4)) USING $format")
+      sql(s"CREATE TABLE t USING $format AS SELECT * FROM tt")
+      checkTableSchemaTypeStr(Seq(Row("char(5)"), Row("varchar(4)")))
+    }
+  }
 }
 
 class FileSourceCharVarcharDDLTestSuite extends CharVarcharDDLTestBase with SharedSparkSession {
@@ -104,6 +124,26 @@ class FileSourceCharVarcharDDLTestSuite extends CharVarcharDDLTestBase with Shar
   override protected def sparkConf: SparkConf = {
     super.sparkConf.set(SQLConf.USE_V1_SOURCE_LIST, "parquet")
   }
+
+  // TODO(SPARK-33902): MOVE TO SUPER CLASS AFTER THE TARGET TICKET RESOLVED
+  test("SPARK-33901: create table like should should not change table's schema") {
+    withTable("t", "tt") {
+      sql(s"CREATE TABLE tt(i CHAR(5), c VARCHAR(4)) USING $format")
+      sql("CREATE TABLE t LIKE tt")
+      checkTableSchemaTypeStr(Seq(Row("char(5)"), Row("varchar(4)")))
+    }
+  }
+
+  // TODO(SPARK-33903): MOVE TO SUPER CLASS AFTER THE TARGET TICKET RESOLVED
+  test("SPARK-33901: cvas should should not change view's schema") {
+    withTable( "tt") {
+      sql(s"CREATE TABLE tt(i CHAR(5), c VARCHAR(4)) USING $format")
+      withView("t") {
+        sql("CREATE VIEW t AS SELECT * FROM tt")
+        checkTableSchemaTypeStr(Seq(Row("char(5)"), Row("varchar(4)")))
+      }
+    }
+  }
 }
 
 class DSV2CharVarcharDDLTestSuite extends CharVarcharDDLTestBase
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
index 9f79997e2979a..ccaa4502d9d2a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
@@ -22,6 +22,7 @@ import scala.util.control.NonFatal
 import org.apache.spark.sql.{AnalysisException, Row, SaveMode, SparkSession}
 import org.apache.spark.sql.catalyst.catalog.{CatalogTable, SessionCatalog}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.command.{DataWritingCommand, DDLUtils}
 import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, InsertIntoHadoopFsRelationCommand, LogicalRelation}
@@ -58,9 +59,10 @@ trait CreateHiveTableAsSelectBase extends DataWritingCommand {
       // TODO ideally, we should get the output data ready first and then
       // add the relation into catalog, just in case of failure occurs while data
       // processing.
+      val tableSchema = CharVarcharUtils.getRawSchema(outputColumns.toStructType)
       assert(tableDesc.schema.isEmpty)
       catalog.createTable(
-        tableDesc.copy(schema = outputColumns.toStructType), ignoreIfExists = false)
+        tableDesc.copy(schema = tableSchema), ignoreIfExists = false)
 
       try {
         // Read back the metadata of the table which was created just now.

From 0a3f3d609ddb538734ab79b8053532d47c8dd08d Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Mon, 28 Dec 2020 16:00:24 +0900
Subject: [PATCH 0890/1009] [SPARK-33908][CORE] Refactor
 SparkSubmitUtils.resolveMavenCoordinates() 's return parameter

### What changes were proposed in this pull request?
Per discuss in  https://github.com/apache/spark/pull/29966#discussion_r531917374
We'd better change `SparkSubmitUtils.resolveMavenCoordinates()` 's return value as `Seq[String]`

### Why are the changes needed?
refactor code

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existed UT

Closes #30922 from AngersZhuuuu/SPARK-33908.

Authored-by: angerszhu <angers.zhu@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../org/apache/spark/deploy/SparkSubmit.scala | 21 +++++-----
 .../spark/deploy/worker/DriverWrapper.scala   |  7 ++--
 .../apache/spark/util/DependencyUtils.scala   |  4 +-
 .../spark/deploy/SparkSubmitUtilsSuite.scala  | 38 +++++++++----------
 .../hive/client/IsolatedClientLoader.scala    |  4 +-
 5 files changed, 35 insertions(+), 39 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index ad95b18ecaeb0..7b7d9dd72a344 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -307,7 +307,7 @@ private[spark] class SparkSubmit extends Logging {
         packagesTransitive = true, args.packagesExclusions, args.packages,
         args.repositories, args.ivyRepoPath, args.ivySettingsPath)
 
-      if (!StringUtils.isBlank(resolvedMavenCoordinates)) {
+      if (resolvedMavenCoordinates.nonEmpty) {
         // In K8s client mode, when in the driver, add resolved jars early as we might need
         // them at the submit time for artifact downloading.
         // For example we might use the dependencies for downloading
@@ -315,17 +315,18 @@ private[spark] class SparkSubmit extends Logging {
         // --packages com.amazonaws:aws-java-sdk:1.7.4:org.apache.hadoop:hadoop-aws:2.7.6
         if (isKubernetesClusterModeDriver) {
           val loader = getSubmitClassLoader(sparkConf)
-          for (jar <- resolvedMavenCoordinates.split(",")) {
+          for (jar <- resolvedMavenCoordinates) {
             addJarToClasspath(jar, loader)
           }
         } else if (isKubernetesCluster) {
           // We need this in K8s cluster mode so that we can upload local deps
           // via the k8s application, like in cluster mode driver
-          childClasspath ++= resolvedMavenCoordinates.split(",")
+          childClasspath ++= resolvedMavenCoordinates
         } else {
-          args.jars = mergeFileLists(args.jars, resolvedMavenCoordinates)
+          args.jars = mergeFileLists(args.jars, mergeFileLists(resolvedMavenCoordinates: _*))
           if (args.isPython || isInternal(args.primaryResource)) {
-            args.pyFiles = mergeFileLists(args.pyFiles, resolvedMavenCoordinates)
+            args.pyFiles = mergeFileLists(args.pyFiles,
+              mergeFileLists(resolvedMavenCoordinates: _*))
           }
         }
       }
@@ -1201,7 +1202,7 @@ private[spark] object SparkSubmitUtils {
    */
   def resolveDependencyPaths(
       artifacts: Array[AnyRef],
-      cacheDirectory: File): String = {
+      cacheDirectory: File): Seq[String] = {
     artifacts.map { artifactInfo =>
       val artifact = artifactInfo.asInstanceOf[Artifact].getModuleRevisionId
       val extraAttrs = artifactInfo.asInstanceOf[Artifact].getExtraAttributes
@@ -1212,7 +1213,7 @@ private[spark] object SparkSubmitUtils {
       }
       cacheDirectory.getAbsolutePath + File.separator +
         s"${artifact.getOrganisation}_${artifact.getName}-${artifact.getRevision}$classifier.jar"
-    }.mkString(",")
+    }
   }
 
   /** Adds the given maven coordinates to Ivy's module descriptor. */
@@ -1362,7 +1363,7 @@ private[spark] object SparkSubmitUtils {
    * @param ivySettings An IvySettings containing resolvers to use
    * @param transitive Whether resolving transitive dependencies, default is true
    * @param exclusions Exclusions to apply when resolving transitive dependencies
-   * @return The comma-delimited path to the jars of the given maven artifacts including their
+   * @return Seq of path to the jars of the given maven artifacts including their
    *         transitive dependencies
    */
   def resolveMavenCoordinates(
@@ -1370,9 +1371,9 @@ private[spark] object SparkSubmitUtils {
       ivySettings: IvySettings,
       transitive: Boolean,
       exclusions: Seq[String] = Nil,
-      isTest: Boolean = false): String = {
+      isTest: Boolean = false): Seq[String] = {
     if (coordinates == null || coordinates.trim.isEmpty) {
-      ""
+      Nil
     } else {
       val sysOut = System.out
       // Default configuration name for ivy
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
index c1288d64c53f7..7cf961f42112c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
@@ -19,8 +19,6 @@ package org.apache.spark.deploy.worker
 
 import java.io.File
 
-import org.apache.commons.lang3.StringUtils
-
 import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.{config, Logging}
@@ -86,8 +84,9 @@ object DriverWrapper extends Logging {
       ivyProperties.ivyRepoPath, Option(ivyProperties.ivySettingsPath))
     val jars = {
       val jarsProp = sys.props.get(config.JARS.key).orNull
-      if (!StringUtils.isBlank(resolvedMavenCoordinates)) {
-        DependencyUtils.mergeFileLists(jarsProp, resolvedMavenCoordinates)
+      if (resolvedMavenCoordinates.nonEmpty) {
+        DependencyUtils.mergeFileLists(jarsProp,
+          DependencyUtils.mergeFileLists(resolvedMavenCoordinates: _*))
       } else {
         jarsProp
       }
diff --git a/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala b/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala
index 9956ccedf5842..0d78af2dafc99 100644
--- a/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala
@@ -154,7 +154,7 @@ private[spark] object DependencyUtils extends Logging {
       ivyProperties.repositories,
       ivyProperties.ivyRepoPath,
       Option(ivyProperties.ivySettingsPath)
-    ).split(",")
+    )
   }
 
   def resolveMavenDependencies(
@@ -163,7 +163,7 @@ private[spark] object DependencyUtils extends Logging {
       packages: String,
       repositories: String,
       ivyRepoPath: String,
-      ivySettingsPath: Option[String]): String = {
+      ivySettingsPath: Option[String]): Seq[String] = {
     val exclusions: Seq[String] =
       if (!StringUtils.isBlank(packagesExclusions)) {
         packagesExclusions.split(",")
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
index eaa06ce2aa057..7819b3aef4d6c 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
@@ -123,12 +123,8 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
   test("ivy path works correctly") {
     val md = SparkSubmitUtils.getModuleDescriptor
     val artifacts = for (i <- 0 until 3) yield new MDArtifact(md, s"jar-$i", "jar", "jar")
-    var jPaths = SparkSubmitUtils.resolveDependencyPaths(artifacts.toArray, new File(tempIvyPath))
-    for (i <- 0 until 3) {
-      val index = jPaths.indexOf(tempIvyPath)
-      assert(index >= 0)
-      jPaths = jPaths.substring(index + tempIvyPath.length)
-    }
+    val jPaths = SparkSubmitUtils.resolveDependencyPaths(artifacts.toArray, new File(tempIvyPath))
+    assert(jPaths.count(_.startsWith(tempIvyPath)) >= 3)
     val main = MavenCoordinate("my.awesome.lib", "mylib", "0.1")
     IvyTestUtils.withRepository(main, None, None) { repo =>
       // end to end
@@ -137,7 +133,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
         SparkSubmitUtils.buildIvySettings(Option(repo), Some(tempIvyPath)),
         transitive = true,
         isTest = true)
-      assert(jarPath.indexOf(tempIvyPath) >= 0, "should use non-default ivy path")
+      assert(jarPath.forall(_.indexOf(tempIvyPath) >= 0), "should use non-default ivy path")
     }
   }
 
@@ -151,8 +147,8 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
         SparkSubmitUtils.buildIvySettings(None, Some(tempIvyPath)),
         transitive = true,
         isTest = true)
-      assert(jarPath.indexOf("mylib") >= 0, "should find artifact")
-      assert(jarPath.indexOf("mydep") >= 0, "should find dependency")
+      assert(jarPath.exists(_.indexOf("mylib") >= 0), "should find artifact")
+      assert(jarPath.exists(_.indexOf("mydep") >= 0), "should find dependency")
     }
     // Local Ivy Repository
     val settings = new IvySettings
@@ -163,8 +159,8 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
         SparkSubmitUtils.buildIvySettings(None, Some(tempIvyPath)),
         transitive = true,
         isTest = true)
-      assert(jarPath.indexOf("mylib") >= 0, "should find artifact")
-      assert(jarPath.indexOf("mydep") >= 0, "should find dependency")
+      assert(jarPath.exists(_.indexOf("mylib") >= 0), "should find artifact")
+      assert(jarPath.exists(_.indexOf("mydep") >= 0), "should find dependency")
     }
     // Local ivy repository with modified home
     val dummyIvyLocal = new File(tempIvyPath, "local" + File.separator)
@@ -176,9 +172,9 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
         SparkSubmitUtils.buildIvySettings(None, Some(tempIvyPath)),
         transitive = true,
         isTest = true)
-      assert(jarPath.indexOf("mylib") >= 0, "should find artifact")
-      assert(jarPath.indexOf(tempIvyPath) >= 0, "should be in new ivy path")
-      assert(jarPath.indexOf("mydep") >= 0, "should find dependency")
+      assert(jarPath.exists(_.indexOf("mylib") >= 0), "should find artifact")
+      assert(jarPath.forall(_.indexOf(tempIvyPath) >= 0), "should be in new ivy path")
+      assert(jarPath.exists(_.indexOf("mydep") >= 0), "should find dependency")
     }
   }
 
@@ -202,7 +198,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
       SparkSubmitUtils.buildIvySettings(None, Some(tempIvyPath)),
       transitive = true,
       isTest = true)
-    assert(path === "", "should return empty path")
+    assert(path.isEmpty, "should return empty path")
     val main = MavenCoordinate("org.apache.spark", "spark-streaming-kafka-assembly_2.12", "1.2.0")
     IvyTestUtils.withRepository(main, None, None) { repo =>
       val files = SparkSubmitUtils.resolveMavenCoordinates(
@@ -210,7 +206,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
         SparkSubmitUtils.buildIvySettings(Some(repo), Some(tempIvyPath)),
         transitive = true,
         isTest = true)
-      assert(files.indexOf(main.artifactId) >= 0, "Did not return artifact")
+      assert(files.forall(_.indexOf(main.artifactId) >= 0), "Did not return artifact")
     }
   }
 
@@ -224,8 +220,8 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
         exclusions = Seq("my.great.dep:mydep"),
         transitive = true,
         isTest = true)
-      assert(files.indexOf(main.artifactId) >= 0, "Did not return artifact")
-      assert(files.indexOf("my.great.dep") < 0, "Returned excluded artifact")
+      assert(files.forall(_.indexOf(main.artifactId) >= 0), "Did not return artifact")
+      assert(files.forall(_.indexOf("my.great.dep") < 0), "Returned excluded artifact")
     }
   }
 
@@ -260,9 +256,9 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
       ivySettings = testUtilSettings) { repo =>
       val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, settings,
         transitive = true, isTest = true)
-      assert(jarPath.indexOf("mylib") >= 0, "should find artifact")
-      assert(jarPath.indexOf(tempIvyPath) >= 0, "should be in new ivy path")
-      assert(jarPath.indexOf("mydep") >= 0, "should find dependency")
+      assert(jarPath.exists(_.indexOf("mylib") >= 0), "should find artifact")
+      assert(jarPath.forall(_.indexOf(tempIvyPath) >= 0), "should be in new ivy path")
+      assert(jarPath.exists(_.indexOf("mydep") >= 0), "should find dependency")
     }
   }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index 97e685efd27de..02bf86533c89e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -118,7 +118,7 @@ private[hive] object IsolatedClientLoader extends Logging {
       Seq("com.google.guava:guava:14.0.1",
         s"org.apache.hadoop:hadoop-client:$hadoopVersion")
 
-    val classpath = quietly {
+    val classpaths = quietly {
       SparkSubmitUtils.resolveMavenCoordinates(
         hiveArtifacts.mkString(","),
         SparkSubmitUtils.buildIvySettings(
@@ -127,7 +127,7 @@ private[hive] object IsolatedClientLoader extends Logging {
         transitive = true,
         exclusions = version.exclusions)
     }
-    val allFiles = classpath.split(",").map(new File(_)).toSet
+    val allFiles = classpaths.map(new File(_)).toSet
 
     // TODO: Remove copy logic.
     val tempDir = Utils.createTempDir(namePrefix = s"hive-${version}")

From 4a61fc1a929d32a780b95673226b3885815cf8b7 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 28 Dec 2020 07:03:29 +0000
Subject: [PATCH 0891/1009] [SPARK-33914][SQL][DOCS] Describe the structure of
 unified DS v1 and v2 tests

### What changes were proposed in this pull request?
Add comments for the unified datasource tests, describe what kind of tests they contain, and put refs to other test suits.

### Why are the changes needed?
To improve code maintenance.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running `./dev/scalastyle`.

Closes #30929 from MaxGekk/doc-unified-tests.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../command/AlterTableAddPartitionSuiteBase.scala  | 14 ++++++++++++++
 .../command/AlterTableDropPartitionSuiteBase.scala | 14 ++++++++++++++
 .../AlterTableRenamePartitionSuiteBase.scala       | 14 ++++++++++++++
 .../execution/command/DDLCommandTestUtils.scala    | 13 +++++++++++++
 .../sql/execution/command/DropTableSuiteBase.scala | 10 ++++++++++
 .../command/ShowPartitionsSuiteBase.scala          | 11 +++++++++++
 .../execution/command/ShowTablesSuiteBase.scala    | 10 ++++++++++
 .../command/v1/AlterTableAddPartitionSuite.scala   | 14 ++++++++++++++
 .../command/v1/AlterTableDropPartitionSuite.scala  | 14 ++++++++++++++
 .../v1/AlterTableRenamePartitionSuite.scala        | 14 ++++++++++++++
 .../execution/command/v1/CommandSuiteBase.scala    | 10 ++++++++--
 .../sql/execution/command/v1/DropTableSuite.scala  | 11 +++++++++++
 .../execution/command/v1/ShowPartitionsSuite.scala | 11 +++++++++++
 .../sql/execution/command/v1/ShowTablesSuite.scala | 11 +++++++++++
 .../command/v2/AlterTableAddPartitionSuite.scala   |  4 ++++
 .../command/v2/AlterTableDropPartitionSuite.scala  |  4 ++++
 .../v2/AlterTableRenamePartitionSuite.scala        |  4 ++++
 .../execution/command/v2/CommandSuiteBase.scala    | 13 ++++++++++---
 .../sql/execution/command/v2/DropTableSuite.scala  |  3 +++
 .../execution/command/v2/ShowPartitionsSuite.scala |  3 +++
 .../sql/execution/command/v2/ShowTablesSuite.scala |  3 +++
 .../command/AlterTableAddPartitionSuite.scala      |  4 ++++
 .../command/AlterTableDropPartitionSuite.scala     |  4 ++++
 .../command/AlterTableRenamePartitionSuite.scala   |  4 ++++
 .../hive/execution/command/CommandSuiteBase.scala  | 10 ++++++++--
 .../hive/execution/command/DropTableSuite.scala    |  3 +++
 .../execution/command/ShowPartitionsSuite.scala    |  4 ++++
 .../hive/execution/command/ShowTablesSuite.scala   |  3 +++
 28 files changed, 230 insertions(+), 7 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
index 1c1d802b991f5..aa0668ccaaf53 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
@@ -22,6 +22,20 @@ import org.apache.spark.sql.catalyst.analysis.PartitionsAlreadyExistException
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.internal.SQLConf
 
+/**
+ * This base suite contains unified tests for the `ALTER TABLE .. ADD PARTITION` command that
+ * check V1 and V2 table catalogs. The tests that cannot run for all supported catalogs are
+ * located in more specific test suites:
+ *
+ *   - V2 table catalog tests:
+ *     `org.apache.spark.sql.execution.command.v2.AlterTableAddPartitionSuite`
+ *   - V1 table catalog tests:
+ *     `org.apache.spark.sql.execution.command.v1.AlterTableAddPartitionSuiteBase`
+ *     - V1 In-Memory catalog:
+ *       `org.apache.spark.sql.execution.command.v1.AlterTableAddPartitionSuite`
+ *     - V1 Hive External catalog:
+ *       `org.apache.spark.sql.hive.execution.command.AlterTableAddPartitionSuite`
+ */
 trait AlterTableAddPartitionSuiteBase extends QueryTest with DDLCommandTestUtils {
   override val command = "ALTER TABLE .. ADD PARTITION"
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
index 433f24c75083c..cf8a1e9de5e0e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
@@ -21,6 +21,20 @@ import org.apache.spark.sql.{AnalysisException, QueryTest}
 import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionsException
 import org.apache.spark.sql.internal.SQLConf
 
+/**
+ * This base suite contains unified tests for the `ALTER TABLE .. DROP PARTITION` command that
+ * check V1 and V2 table catalogs. The tests that cannot run for all supported catalogs are
+ * located in more specific test suites:
+ *
+ *   - V2 table catalog tests:
+ *     `org.apache.spark.sql.execution.command.v2.AlterTableDropPartitionSuite`
+ *   - V1 table catalog tests:
+ *     `org.apache.spark.sql.execution.command.v1.AlterTableDropPartitionSuiteBase`
+ *     - V1 In-Memory catalog:
+ *       `org.apache.spark.sql.execution.command.v1.AlterTableDropPartitionSuite`
+ *     - V1 Hive External catalog:
+ *       `org.apache.spark.sql.hive.execution.command.AlterTableDropPartitionSuite`
+ */
 trait AlterTableDropPartitionSuiteBase extends QueryTest with DDLCommandTestUtils {
   override val command = "ALTER TABLE .. DROP PARTITION"
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala
index a29cf6cabba49..40c167ce424a0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala
@@ -19,6 +19,20 @@ package org.apache.spark.sql.execution.command
 
 import org.apache.spark.sql.QueryTest
 
+/**
+ * This base suite contains unified tests for the `ALTER TABLE .. RENAME PARTITION` command that
+ * check V1 and V2 table catalogs. The tests that cannot run for all supported catalogs are
+ * located in more specific test suites:
+ *
+ *   - V2 table catalog tests:
+ *     `org.apache.spark.sql.execution.command.v2.AlterTableRenamePartitionSuite`
+ *   - V1 table catalog tests:
+ *     `org.apache.spark.sql.execution.command.v1.AlterTableRenamePartitionSuiteBase`
+ *     - V1 In-Memory catalog:
+ *       `org.apache.spark.sql.execution.command.v1.AlterTableRenamePartitionSuite`
+ *     - V1 Hive External catalog:
+ *       `org.apache.spark.sql.hive.execution.command.AlterTableRenamePartitionSuite`
+ */
 trait AlterTableRenamePartitionSuiteBase extends QueryTest with DDLCommandTestUtils {
   override val command = "ALTER TABLE .. RENAME PARTITION"
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala
index 6ea2fea41f284..a613978ce375a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala
@@ -24,14 +24,26 @@ import org.apache.spark.sql.Row
 import org.apache.spark.sql.execution.datasources.PartitioningUtils
 import org.apache.spark.sql.test.SQLTestUtils
 
+/**
+ * The common settings and utility functions for all v1 and v2 test suites. When a function
+ * is not applicable to all supported catalogs, it should be placed to a specific trait:
+ *
+ *   - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.CommandSuiteBase`
+ *   - V1 Hive External catalog: `org.apache.spark.sql.hive.execution.command.CommandSuiteBase`
+ *   - V2 In-Memory catalog: `org.apache.spark.sql.execution.command.v2.CommandSuiteBase`
+ */
 trait DDLCommandTestUtils extends SQLTestUtils {
   // The version of the catalog under testing such as "V1", "V2", "Hive V1".
   protected def version: String
   // Name of the command as SQL statement, for instance "SHOW PARTITIONS"
   protected def command: String
+  // The catalog name which can be used in SQL statements under testing
   protected def catalog: String
+  // The clause is used in creating tables for testing
   protected def defaultUsing: String
 
+  // Overrides the `test` method, and adds a prefix to easily find identify the catalog to which
+  // the failed test in logs belongs to.
   override def test(testName: String, testTags: Tag*)(testFun: => Any)
     (implicit pos: Position): Unit = {
     super.test(s"$command $version: " + testName, testTags: _*)(testFun)
@@ -49,6 +61,7 @@ trait DDLCommandTestUtils extends SQLTestUtils {
     }
   }
 
+  // Checks that the table `t` contains only the `expected` partitions.
   protected def checkPartitions(t: String, expected: Map[String, String]*): Unit = {
     val partitions = sql(s"SHOW PARTITIONS $t")
       .collect()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableSuiteBase.scala
index 9cba67f04a351..bb76bfd878f48 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableSuiteBase.scala
@@ -19,6 +19,16 @@ package org.apache.spark.sql.execution.command
 
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 
+/**
+ * This base suite contains unified tests for the `DROP TABLE` command that check V1 and V2
+ * table catalogs. The tests that cannot run for all supported catalogs are located in more
+ * specific test suites:
+ *
+ *   - V2 table catalog tests: `org.apache.spark.sql.execution.command.v2.DropTableSuite`
+ *   - V1 table catalog tests: `org.apache.spark.sql.execution.command.v1.DropTableSuiteBase`
+ *     - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.DropTableSuite`
+ *     - V1 Hive External catalog: `org.apache.spark.sql.hive.execution.command.DropTableSuite`
+ */
 trait DropTableSuiteBase extends QueryTest with DDLCommandTestUtils {
   override val command = "DROP TABLE"
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
index 83808ab82d3b2..9a942d348a181 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
@@ -21,6 +21,17 @@ import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{StringType, StructType}
 
+/**
+ * This base suite contains unified tests for the `SHOW PARTITIONS` command that check V1 and V2
+ * table catalogs. The tests that cannot run for all supported catalogs are located in more
+ * specific test suites:
+ *
+ *   - V2 table catalog tests: `org.apache.spark.sql.execution.command.v2.ShowPartitionsSuite`
+ *   - V1 table catalog tests: `org.apache.spark.sql.execution.command.v1.ShowPartitionsSuiteBase`
+ *     - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.ShowPartitionsSuite`
+ *     - V1 Hive External catalog:
+ *       `org.apache.spark.sql.hive.execution.command.ShowPartitionsSuite`
+ */
 trait ShowPartitionsSuiteBase extends QueryTest with DDLCommandTestUtils {
   override val command = "SHOW PARTITIONS"
   // Gets the schema of `SHOW PARTITIONS`
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala
index 5b729a4eb1c85..6a1337ef5ac8b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala
@@ -23,6 +23,16 @@ import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
 
+/**
+ * This base suite contains unified tests for the `SHOW TABLES` command that check V1 and V2
+ * table catalogs. The tests that cannot run for all supported catalogs are located in more
+ * specific test suites:
+ *
+ *   - V2 table catalog tests: `org.apache.spark.sql.execution.command.v2.ShowTablesSuite`
+ *   - V1 table catalog tests: `org.apache.spark.sql.execution.command.v1.ShowTablesSuiteBase`
+ *     - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.ShowTablesSuite`
+ *     - V1 Hive External catalog: `org.apache.spark.sql.hive.execution.command.ShowTablesSuite`
+ */
 trait ShowTablesSuiteBase extends QueryTest with DDLCommandTestUtils {
   override val command = "SHOW TABLES"
   protected def defaultNamespace: Seq[String]
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
index a749b1e3dd14d..808eab8340524 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
@@ -21,6 +21,16 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.execution.command
 
+/**
+ * This base suite contains unified tests for the `ALTER TABLE .. ADD PARTITION` command that
+ * check V1 table catalogs. The tests that cannot run for all V1 catalogs are located in more
+ * specific test suites:
+ *
+ *   - V1 In-Memory catalog:
+ *     `org.apache.spark.sql.execution.command.v1.AlterTableAddPartitionSuite`
+ *   - V1 Hive External catalog:
+ *     `org.apache.spark.sql.hive.execution.command.AlterTableAddPartitionSuite`
+ */
 trait AlterTableAddPartitionSuiteBase extends command.AlterTableAddPartitionSuiteBase {
   override protected def checkLocation(
       t: String,
@@ -49,4 +59,8 @@ trait AlterTableAddPartitionSuiteBase extends command.AlterTableAddPartitionSuit
   }
 }
 
+/**
+ * The class contains tests for the `ALTER TABLE .. ADD PARTITION` command to check
+ * V1 In-Memory table catalog.
+ */
 class AlterTableAddPartitionSuite extends AlterTableAddPartitionSuiteBase with CommandSuiteBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
index 71032eefee2bb..a6490ebdb950c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
@@ -20,6 +20,16 @@ package org.apache.spark.sql.execution.command.v1
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.execution.command
 
+/**
+ * This base suite contains unified tests for the `ALTER TABLE .. DROP PARTITION` command that
+ * check V1 table catalogs. The tests that cannot run for all V1 catalogs are located in more
+ * specific test suites:
+ *
+ *   - V1 In-Memory catalog:
+ *     `org.apache.spark.sql.execution.command.v1.AlterTableDropPartitionSuite`
+ *   - V1 Hive External catalog:
+ *     `org.apache.spark.sql.hive.execution.command.AlterTableDropPartitionSuite`
+ */
 trait AlterTableDropPartitionSuiteBase extends command.AlterTableDropPartitionSuiteBase {
   override protected val notFullPartitionSpecErr = "The following partitions not found in table"
 
@@ -34,6 +44,10 @@ trait AlterTableDropPartitionSuiteBase extends command.AlterTableDropPartitionSu
   }
 }
 
+/**
+ * The class contains tests for the `ALTER TABLE .. DROP PARTITION` command to check
+ * V1 In-Memory table catalog.
+ */
 class AlterTableDropPartitionSuite
   extends AlterTableDropPartitionSuiteBase
   with CommandSuiteBase {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenamePartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenamePartitionSuite.scala
index 89d5e5f4635d0..d923886fbdb9a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenamePartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenamePartitionSuite.scala
@@ -22,6 +22,16 @@ import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, Partiti
 import org.apache.spark.sql.execution.command
 import org.apache.spark.sql.internal.SQLConf
 
+/**
+ * This base suite contains unified tests for the `ALTER TABLE .. RENAME PARTITION` command that
+ * check V1 table catalogs. The tests that cannot run for all V1 catalogs are located in more
+ * specific test suites:
+ *
+ *   - V1 In-Memory catalog:
+ *     `org.apache.spark.sql.execution.command.v1.AlterTableRenamePartitionSuite`
+ *   - V1 Hive External catalog:
+ *     `org.apache.spark.sql.hive.execution.command.AlterTableRenamePartitionSuite`
+ */
 trait AlterTableRenamePartitionSuiteBase extends command.AlterTableRenamePartitionSuiteBase {
   protected def createSinglePartTable(t: String): Unit = {
     sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
@@ -164,6 +174,10 @@ trait AlterTableRenamePartitionSuiteBase extends command.AlterTableRenamePartiti
   }
 }
 
+/**
+ * The class contains tests for the `ALTER TABLE .. RENAME PARTITION` command to check
+ * V1 In-Memory table catalog.
+ */
 class AlterTableRenamePartitionSuite
   extends AlterTableRenamePartitionSuiteBase
   with CommandSuiteBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala
index 323f9c9365a11..c4ecf1c98bb6e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala
@@ -20,8 +20,14 @@ package org.apache.spark.sql.execution.command.v1
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.test.SharedSparkSession
 
+/**
+ * The trait contains settings and utility functions. It can be mixed to the test suites for
+ * datasource v1 In-Memory catalog. This trait complements the common trait
+ * `org.apache.spark.sql.execution.command.DDLCommandTestUtils` with utility functions and
+ * settings for all unified datasource V1 and V2 test suites.
+ */
 trait CommandSuiteBase extends SharedSparkSession {
-  def version: String = "V1"
+  def version: String = "V1" // The prefix is added to test names
   def catalog: String = CatalogManager.SESSION_CATALOG_NAME
-  def defaultUsing: String = "USING parquet"
+  def defaultUsing: String = "USING parquet" // The clause is used in creating tables under testing
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropTableSuite.scala
index 530d18cb6f7b0..497624f0a18de 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropTableSuite.scala
@@ -19,6 +19,14 @@ package org.apache.spark.sql.execution.command.v1
 
 import org.apache.spark.sql.execution.command
 
+/**
+ * This base suite contains unified tests for the `DROP TABLE` command that check V1
+ * table catalogs. The tests that cannot run for all V1 catalogs are located in more
+ * specific test suites:
+ *
+ *   - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.DropTableSuite`
+ *   - V1 Hive External catalog: `org.apache.spark.sql.hive.execution.command.DropTableSuite`
+ */
 trait DropTableSuiteBase extends command.DropTableSuiteBase {
   test("purge option") {
     withNamespace(s"$catalog.ns") {
@@ -33,5 +41,8 @@ trait DropTableSuiteBase extends command.DropTableSuiteBase {
   }
 }
 
+/**
+ * The class contains tests for the `DROP TABLE` command to check V1 In-Memory table catalog.
+ */
 class DropTableSuite extends DropTableSuiteBase with CommandSuiteBase
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
index 5d992d18890e4..5be5e28d01706 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
@@ -20,6 +20,14 @@ package org.apache.spark.sql.execution.command.v1
 import org.apache.spark.sql.{AnalysisException, Row, SaveMode}
 import org.apache.spark.sql.execution.command
 
+/**
+ * This base suite contains unified tests for the `SHOW PARTITIONS` command that check V1
+ * table catalogs. The tests that cannot run for all V1 catalogs are located in more
+ * specific test suites:
+ *
+ *   - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.ShowPartitionsSuite`
+ *   - V1 Hive External catalog: `org.apache.spark.sql.hive.execution.command.ShowPartitionsSuite`
+ */
 trait ShowPartitionsSuiteBase extends command.ShowPartitionsSuiteBase {
   test("show everything in the default database") {
     val table = "dateTable"
@@ -63,6 +71,9 @@ trait ShowPartitionsSuiteBase extends command.ShowPartitionsSuiteBase {
   }
 }
 
+/**
+ * The class contains tests for the `SHOW PARTITIONS` command to check V1 In-Memory table catalog.
+ */
 class ShowPartitionsSuite extends ShowPartitionsSuiteBase with CommandSuiteBase {
   // The test is placed here because it fails with `USING HIVE`:
   // org.apache.spark.sql.AnalysisException:
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
index 12b4df269e157..2cf50b7ddf251 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
@@ -22,6 +22,14 @@ import org.apache.spark.sql.execution.command
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{BooleanType, StringType, StructType}
 
+/**
+ * This base suite contains unified tests for the `SHOW TABLES` command that check V1
+ * table catalogs. The tests that cannot run for all V1 catalogs are located in more
+ * specific test suites:
+ *
+ *   - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.ShowTablesSuite`
+ *   - V1 Hive External catalog: `org.apache.spark.sql.hive.execution.command.ShowTablesSuite`
+ */
 trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase {
   override def defaultNamespace: Seq[String] = Seq("default")
   override def showSchema: StructType = {
@@ -102,6 +110,9 @@ trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase {
   }
 }
 
+/**
+ * The class contains tests for the `SHOW TABLES` command to check V1 In-Memory table catalog.
+ */
 class ShowTablesSuite extends ShowTablesSuiteBase with CommandSuiteBase {
   test("SPARK-33670: show partitions from a datasource table") {
     import testImplicits._
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala
index b0d0f6ced9346..0f0f8fa389321 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala
@@ -24,6 +24,10 @@ import org.apache.spark.sql.connector.InMemoryPartitionTable
 import org.apache.spark.sql.connector.catalog.{CatalogV2Implicits, Identifier}
 import org.apache.spark.sql.execution.command
 
+/**
+ * The class contains tests for the `ALTER TABLE .. ADD PARTITION` command
+ * to check V2 table catalogs.
+ */
 class AlterTableAddPartitionSuite
   extends command.AlterTableAddPartitionSuiteBase
   with CommandSuiteBase {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala
index 97ef10e256515..d6890d6faef70 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala
@@ -20,6 +20,10 @@ package org.apache.spark.sql.execution.command.v2
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.execution.command
 
+/**
+ * The class contains tests for the `ALTER TABLE .. DROP PARTITION` command
+ * to check V2 table catalogs.
+ */
 class AlterTableDropPartitionSuite
   extends command.AlterTableDropPartitionSuiteBase
   with CommandSuiteBase {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRenamePartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRenamePartitionSuite.scala
index 026f1dcc33a1a..d1c252adde369 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRenamePartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRenamePartitionSuite.scala
@@ -20,6 +20,10 @@ package org.apache.spark.sql.execution.command.v2
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.execution.command
 
+/**
+ * The class contains tests for the `ALTER TABLE .. RENAME PARTITION` command
+ * to check V2 table catalogs.
+ */
 class AlterTableRenamePartitionSuite
   extends command.AlterTableRenamePartitionSuiteBase
   with CommandSuiteBase {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala
index b1f6a5b318a32..0978126f27fd1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala
@@ -21,11 +21,18 @@ import org.apache.spark.SparkConf
 import org.apache.spark.sql.connector.{InMemoryPartitionTableCatalog, InMemoryTableCatalog}
 import org.apache.spark.sql.test.SharedSparkSession
 
+/**
+ * The trait contains settings and utility functions. It can be mixed to the test suites for
+ * datasource v2 catalogs (in-memory test catalogs). This trait complements the trait
+ * `org.apache.spark.sql.execution.command.DDLCommandTestUtils` with common utility functions
+ * for all unified datasource V1 and V2 test suites.
+ */
 trait CommandSuiteBase extends SharedSparkSession {
-  def version: String = "V2"
-  def catalog: String = "test_catalog"
-  def defaultUsing: String = "USING _"
+  def version: String = "V2" // The prefix is added to test names
+  def catalog: String = "test_catalog" // The default V2 catalog for testing
+  def defaultUsing: String = "USING _" // The clause is used in creating v2 tables under testing
 
+  // V2 catalogs created and used especially for testing
   override def sparkConf: SparkConf = super.sparkConf
     .set(s"spark.sql.catalog.$catalog", classOf[InMemoryPartitionTableCatalog].getName)
     .set(s"spark.sql.catalog.non_part_$catalog", classOf[InMemoryTableCatalog].getName)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala
index a272f649288f6..9c9b7d3049c7a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala
@@ -22,6 +22,9 @@ import org.apache.spark.sql.connector.InMemoryTableSessionCatalog
 import org.apache.spark.sql.execution.command
 import org.apache.spark.sql.internal.SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION
 
+/**
+ * The class contains tests for the `DROP TABLE` command to check V2 table catalogs.
+ */
 class DropTableSuite extends command.DropTableSuiteBase with CommandSuiteBase {
   test("purge option") {
     withNamespaceAndTable("ns", "tbl") { t =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
index 431f64baf4b78..44d8b57ce1596 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
@@ -20,6 +20,9 @@ package org.apache.spark.sql.execution.command.v2
 import org.apache.spark.sql.{AnalysisException, Row, SaveMode}
 import org.apache.spark.sql.execution.command
 
+/**
+ * The class contains tests for the `SHOW PARTITIONS` command to check V2 table catalogs.
+ */
 class ShowPartitionsSuite extends command.ShowPartitionsSuiteBase with CommandSuiteBase {
   test("a table does not support partitioning") {
     val table = s"non_part_$catalog.tab1"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala
index cef5eac703ee7..6a9a9399b9563 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala
@@ -21,6 +21,9 @@ import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.execution.command
 import org.apache.spark.sql.types.{StringType, StructType}
 
+/**
+ * The class contains tests for the `SHOW TABLES` command to check V2 table catalogs.
+ */
 class ShowTablesSuite extends command.ShowTablesSuiteBase with CommandSuiteBase {
   override def defaultNamespace: Seq[String] = Nil
   override def showSchema: StructType = {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala
index 2a996c3f4690c..f8fe23f643cda 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddPartitionSuite.scala
@@ -19,6 +19,10 @@ package org.apache.spark.sql.hive.execution.command
 
 import org.apache.spark.sql.execution.command.v1
 
+/**
+ * The class contains tests for the `ALTER TABLE .. ADD PARTITION` command to check
+ * V1 Hive external table catalog.
+ */
 class AlterTableAddPartitionSuite
     extends v1.AlterTableAddPartitionSuiteBase
     with CommandSuiteBase
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala
index a4f9ab0b0433c..5cac27f0d254a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala
@@ -19,6 +19,10 @@ package org.apache.spark.sql.hive.execution.command
 
 import org.apache.spark.sql.execution.command.v1
 
+/**
+ * The class contains tests for the `ALTER TABLE .. DROP PARTITION` command to check
+ * V1 Hive external table catalog.
+ */
 class AlterTableDropPartitionSuite
   extends v1.AlterTableDropPartitionSuiteBase
   with CommandSuiteBase
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableRenamePartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableRenamePartitionSuite.scala
index 86edab74ab998..5cd5122a2a7fa 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableRenamePartitionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableRenamePartitionSuite.scala
@@ -19,6 +19,10 @@ package org.apache.spark.sql.hive.execution.command
 
 import org.apache.spark.sql.execution.command.v1
 
+/**
+ * The class contains tests for the `ALTER TABLE .. RENAME PARTITION` command to check
+ * V1 Hive external table catalog.
+ */
 class AlterTableRenamePartitionSuite
   extends v1.AlterTableRenamePartitionSuiteBase
   with CommandSuiteBase
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala
index 3f603fd6c7ddf..39b4be61449cb 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala
@@ -20,8 +20,14 @@ package org.apache.spark.sql.hive.execution.command
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 
+/**
+ * The trait contains settings and utility functions. It can be mixed to the test suites for
+ * datasource v1 Hive external catalog. This trait complements the common trait
+ * `org.apache.spark.sql.execution.command.DDLCommandTestUtils` with utility functions and
+ * settings for all unified datasource V1 and V2 test suites.
+ */
 trait CommandSuiteBase extends TestHiveSingleton {
-  def version: String = "Hive V1"
+  def version: String = "Hive V1" // The prefix is added to test names
   def catalog: String = CatalogManager.SESSION_CATALOG_NAME
-  def defaultUsing: String = "USING HIVE"
+  def defaultUsing: String = "USING HIVE" // The clause is used in creating tables under testing
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DropTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DropTableSuite.scala
index b2a404d7206a6..c1f17d1280641 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DropTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DropTableSuite.scala
@@ -19,4 +19,7 @@ package org.apache.spark.sql.hive.execution.command
 
 import org.apache.spark.sql.execution.command.v1
 
+/**
+ * The class contains tests for the `DROP TABLE` command to check V1 Hive external table catalog.
+ */
 class DropTableSuite extends v1.DropTableSuiteBase with CommandSuiteBase
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala
index eaac8f5e8146c..904c6c40b938f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala
@@ -20,6 +20,10 @@ package org.apache.spark.sql.hive.execution.command
 import org.apache.spark.sql.{Row, SaveMode}
 import org.apache.spark.sql.execution.command.v1
 
+/**
+ * The class contains tests for the `SHOW PARTITIONS` command to check
+ * V1 Hive external table catalog.
+ */
 class ShowPartitionsSuite extends v1.ShowPartitionsSuiteBase with CommandSuiteBase {
   test("null and empty string as partition values") {
     import testImplicits._
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala
index 8c00b3fe7f7ca..7b3652a86034a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala
@@ -19,4 +19,7 @@ package org.apache.spark.sql.hive.execution.command
 
 import org.apache.spark.sql.execution.command.v1
 
+/**
+ * The class contains tests for the `SHOW TABLES` command to check V1 Hive external table catalog.
+ */
 class ShowTablesSuite extends v1.ShowTablesSuiteBase with CommandSuiteBase

From c75f779fd7f5361c661c73883c3c8e78cd4808c3 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 28 Dec 2020 16:52:56 +0900
Subject: [PATCH 0892/1009] [SPARK-33827][SS] Unload inactive state store as
 soon as possible

### What changes were proposed in this pull request?

This patch proposes to unload inactive state store as soon as possible. The timing of unload inactive state stores, happens when we get to load active state store provider at executors. At the time, state store coordinator will return back the state store provider list including loaded stores that are already loaded by other executors in new batch. Each state store provider in the list will go to unload.

### Why are the changes needed?

Per the discussion at #30770, it makes sense to me we should unload inactive state store asap. Now we run a maintenance task periodically to unload inactive state stores. So there will be some delays between a state store becomes inactive and it is unloaded.

However, we can force Spark to always allocate a state store to same executor, by using task locality configuration. This can reduce the possibility to have inactive state store.

Normally, with locality configuration, we might not able to see inactive state store generally. There is still chance an executor can be failed and reallocated, but in this case, inactive state store is also lost too. So it is not an issue.

Making driver-executor bi-directional for unloading inactive state store looks non-trivial, and seems to me, it is not worth, after considering what we can do with locality.

This proposes a simpler but effective approach. We can check if loaded state store is already loaded at other executor during reporting active state store to the coordinator. If so, it means the loaded store is inactive now, and it is going to be unload by the next maintenance task. Then we unload that store immediately.

How do we make sure the loaded state store in previous batch is loaded at other executor in this batch before reporting in this executor? With task locality and preferred location, once an executor is ready to be scheduled, Spark should assign the state store provider previously loaded at the executor. So when this executor gets a new assignment other than previously loaded state store, it means the previously loaded one is already assigned to other executor.

There is still a delay between the state store is loaded at other executor, and unloading it when reporting active state store at this executor. But it should be minimized now. And there won't be multiple state store belonging to same operator are loaded at the same time at one single executor, because once the executor reports any active store, it will unload all inactive stores. This should not be an issue IMHO.

This is a minimal change to unload inactive state store asap without significant change.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Unit test.

Closes #30827 from viirya/SPARK-33827.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../streaming/state/StateStore.scala          | 16 ++++++--
 .../state/StateStoreCoordinator.scala         | 34 +++++++++++----
 .../state/StateStoreCoordinatorSuite.scala    | 12 +++---
 .../streaming/state/StateStoreRDDSuite.scala  |  4 +-
 .../streaming/state/StateStoreSuite.scala     | 41 ++++++++++++-------
 .../sql/streaming/StreamingJoinSuite.scala    |  2 +-
 6 files changed, 75 insertions(+), 34 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
index f87a2fb30cddc..d9d6768a40772 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
@@ -494,7 +494,9 @@ object StateStore extends Logging {
         StateStoreProvider.createAndInit(
           storeProviderId, keySchema, valueSchema, indexOrdinal, storeConf, hadoopConf)
       )
-      reportActiveStoreInstance(storeProviderId)
+      val otherProviderIds = loadedProviders.keys.filter(_ != storeProviderId).toSeq
+      val providerIdsToUnload = reportActiveStoreInstance(storeProviderId, otherProviderIds)
+      providerIdsToUnload.foreach(unload(_))
       provider
     }
   }
@@ -569,12 +571,20 @@ object StateStore extends Logging {
     }
   }
 
-  private def reportActiveStoreInstance(storeProviderId: StateStoreProviderId): Unit = {
+  private def reportActiveStoreInstance(
+      storeProviderId: StateStoreProviderId,
+      otherProviderIds: Seq[StateStoreProviderId]): Seq[StateStoreProviderId] = {
     if (SparkEnv.get != null) {
       val host = SparkEnv.get.blockManager.blockManagerId.host
       val executorId = SparkEnv.get.blockManager.blockManagerId.executorId
-      coordinatorRef.foreach(_.reportActiveInstance(storeProviderId, host, executorId))
+      val providerIdsToUnload = coordinatorRef
+        .map(_.reportActiveInstance(storeProviderId, host, executorId, otherProviderIds))
+        .getOrElse(Seq.empty[StateStoreProviderId])
       logInfo(s"Reported that the loaded instance $storeProviderId is active")
+      logDebug(s"The loaded instances are going to unload: ${providerIdsToUnload.mkString(", ")}")
+      providerIdsToUnload
+    } else {
+      Seq.empty[StateStoreProviderId]
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinator.scala
index 2b14d37ee21ee..84b77efea3caf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinator.scala
@@ -31,10 +31,19 @@ import org.apache.spark.util.RpcUtils
 private sealed trait StateStoreCoordinatorMessage extends Serializable
 
 /** Classes representing messages */
+
+/**
+ * This message is used to report active instance of a state store provider
+ * to [[StateStoreCoordinator]]. This message also carries other loaded state
+ * store providers on the same executor. [[StateStoreCoordinator]] will check
+ * if these providers are inactive now. Inactive providers will be returned
+ * back to the sender of the message for unloading.
+ */
 private case class ReportActiveInstance(
     storeId: StateStoreProviderId,
     host: String,
-    executorId: String)
+    executorId: String,
+    providerIdsToCheck: Seq[StateStoreProviderId])
   extends StateStoreCoordinatorMessage
 
 private case class VerifyIfInstanceActive(storeId: StateStoreProviderId, executorId: String)
@@ -87,8 +96,10 @@ class StateStoreCoordinatorRef private(rpcEndpointRef: RpcEndpointRef) {
   private[sql] def reportActiveInstance(
       stateStoreProviderId: StateStoreProviderId,
       host: String,
-      executorId: String): Unit = {
-    rpcEndpointRef.send(ReportActiveInstance(stateStoreProviderId, host, executorId))
+      executorId: String,
+      otherProviderIds: Seq[StateStoreProviderId]): Seq[StateStoreProviderId] = {
+    rpcEndpointRef.askSync[Seq[StateStoreProviderId]](
+      ReportActiveInstance(stateStoreProviderId, host, executorId, otherProviderIds))
   }
 
   /** Verify whether the given executor has the active instance of a state store */
@@ -122,13 +133,20 @@ private class StateStoreCoordinator(override val rpcEnv: RpcEnv)
     extends ThreadSafeRpcEndpoint with Logging {
   private val instances = new mutable.HashMap[StateStoreProviderId, ExecutorCacheTaskLocation]
 
-  override def receive: PartialFunction[Any, Unit] = {
-    case ReportActiveInstance(id, host, executorId) =>
+  override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
+    case ReportActiveInstance(id, host, executorId, providerIdsToCheck) =>
       logDebug(s"Reported state store $id is active at $executorId")
-      instances.put(id, ExecutorCacheTaskLocation(host, executorId))
-  }
+      val taskLocation = ExecutorCacheTaskLocation(host, executorId)
+      instances.put(id, taskLocation)
+
+      // Check if any loaded provider id is already loaded in other executor.
+      val providerIdsToUnload = providerIdsToCheck.filter { providerId =>
+        val providerLoc = instances.get(providerId)
+        // This provider is is already loaded in other executor. Marked it to unload.
+        providerLoc.map(_ != taskLocation).getOrElse(false)
+      }
+      context.reply(providerIdsToUnload)
 
-  override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
     case VerifyIfInstanceActive(id, execId) =>
       val response = instances.get(id) match {
         case Some(location) => location.executorId == execId
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala
index 7bca225dfdd8f..d039c72bb7d18 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreCoordinatorSuite.scala
@@ -41,7 +41,7 @@ class StateStoreCoordinatorSuite extends SparkFunSuite with SharedSparkContext {
       assert(coordinatorRef.verifyIfInstanceActive(id, "exec1") === false)
       assert(coordinatorRef.getLocation(id) === None)
 
-      coordinatorRef.reportActiveInstance(id, "hostX", "exec1")
+      coordinatorRef.reportActiveInstance(id, "hostX", "exec1", Seq.empty)
       eventually(timeout(5.seconds)) {
         assert(coordinatorRef.verifyIfInstanceActive(id, "exec1"))
         assert(
@@ -49,7 +49,7 @@ class StateStoreCoordinatorSuite extends SparkFunSuite with SharedSparkContext {
             Some(ExecutorCacheTaskLocation("hostX", "exec1").toString))
       }
 
-      coordinatorRef.reportActiveInstance(id, "hostX", "exec2")
+      coordinatorRef.reportActiveInstance(id, "hostX", "exec2", Seq.empty)
 
       eventually(timeout(5.seconds)) {
         assert(coordinatorRef.verifyIfInstanceActive(id, "exec1") === false)
@@ -72,9 +72,9 @@ class StateStoreCoordinatorSuite extends SparkFunSuite with SharedSparkContext {
       val host = "hostX"
       val exec = "exec1"
 
-      coordinatorRef.reportActiveInstance(id1, host, exec)
-      coordinatorRef.reportActiveInstance(id2, host, exec)
-      coordinatorRef.reportActiveInstance(id3, host, exec)
+      coordinatorRef.reportActiveInstance(id1, host, exec, Seq.empty)
+      coordinatorRef.reportActiveInstance(id2, host, exec, Seq.empty)
+      coordinatorRef.reportActiveInstance(id3, host, exec, Seq.empty)
 
       eventually(timeout(5.seconds)) {
         assert(coordinatorRef.verifyIfInstanceActive(id1, exec))
@@ -106,7 +106,7 @@ class StateStoreCoordinatorSuite extends SparkFunSuite with SharedSparkContext {
 
       val id = StateStoreProviderId(StateStoreId("x", 0, 0), UUID.randomUUID)
 
-      coordRef1.reportActiveInstance(id, "hostX", "exec1")
+      coordRef1.reportActiveInstance(id, "hostX", "exec1", Seq.empty)
 
       eventually(timeout(5.seconds)) {
         assert(coordRef2.verifyIfInstanceActive(id, "exec1"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDDSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDDSuite.scala
index 015415a534ff5..378aa1dca139f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDDSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDDSuite.scala
@@ -159,8 +159,8 @@ class StateStoreRDDSuite extends SparkFunSuite with BeforeAndAfter with BeforeAn
         val coordinatorRef = sqlContext.streams.stateStoreCoordinator
         val storeProviderId1 = StateStoreProviderId(StateStoreId(path, opId, 0), queryRunId)
         val storeProviderId2 = StateStoreProviderId(StateStoreId(path, opId, 1), queryRunId)
-        coordinatorRef.reportActiveInstance(storeProviderId1, "host1", "exec1")
-        coordinatorRef.reportActiveInstance(storeProviderId2, "host2", "exec2")
+        coordinatorRef.reportActiveInstance(storeProviderId1, "host1", "exec1", Seq.empty)
+        coordinatorRef.reportActiveInstance(storeProviderId2, "host2", "exec2", Seq.empty)
 
         require(
           coordinatorRef.getLocation(storeProviderId1) ===
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
index d4cd3cdc39fd7..291c05fb9078d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
@@ -394,21 +394,23 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
       // fails to talk to the StateStoreCoordinator and unloads all the StateStores
       .set(RPC_NUM_RETRIES, 1)
     val opId = 0
-    val dir = newDir()
-    val storeProviderId = StateStoreProviderId(StateStoreId(dir, opId, 0), UUID.randomUUID)
+    val dir1 = newDir()
+    val storeProviderId1 = StateStoreProviderId(StateStoreId(dir1, opId, 0), UUID.randomUUID)
+    val dir2 = newDir()
+    val storeProviderId2 = StateStoreProviderId(StateStoreId(dir2, opId, 1), UUID.randomUUID)
     val sqlConf = new SQLConf()
     sqlConf.setConf(SQLConf.MIN_BATCHES_TO_RETAIN, 2)
     // Make maintenance thread do snapshots and cleanups very fast
     sqlConf.setConf(SQLConf.STREAMING_MAINTENANCE_INTERVAL, 10L)
     val storeConf = StateStoreConf(sqlConf)
     val hadoopConf = new Configuration()
-    val provider = newStoreProvider(storeProviderId.storeId)
+    val provider = newStoreProvider(storeProviderId1.storeId)
 
     var latestStoreVersion = 0
 
     def generateStoreVersions(): Unit = {
       for (i <- 1 to 20) {
-        val store = StateStore.get(storeProviderId, keySchema, valueSchema, None,
+        val store = StateStore.get(storeProviderId1, keySchema, valueSchema, None,
           latestStoreVersion, storeConf, hadoopConf)
         put(store, "a", i)
         store.commit()
@@ -428,7 +430,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
 
           eventually(timeout(timeoutDuration)) {
             // Store should have been reported to the coordinator
-            assert(coordinatorRef.getLocation(storeProviderId).nonEmpty,
+            assert(coordinatorRef.getLocation(storeProviderId1).nonEmpty,
               "active instance was not reported")
 
             // Background maintenance should clean up and generate snapshots
@@ -452,33 +454,44 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
 
           // If driver decides to deactivate all stores related to a query run,
           // then this instance should be unloaded
-          coordinatorRef.deactivateInstances(storeProviderId.queryRunId)
+          coordinatorRef.deactivateInstances(storeProviderId1.queryRunId)
           eventually(timeout(timeoutDuration)) {
-            assert(!StateStore.isLoaded(storeProviderId))
+            assert(!StateStore.isLoaded(storeProviderId1))
           }
 
           // Reload the store and verify
-          StateStore.get(storeProviderId, keySchema, valueSchema, indexOrdinal = None,
+          StateStore.get(storeProviderId1, keySchema, valueSchema, indexOrdinal = None,
             latestStoreVersion, storeConf, hadoopConf)
-          assert(StateStore.isLoaded(storeProviderId))
+          assert(StateStore.isLoaded(storeProviderId1))
 
           // If some other executor loads the store, then this instance should be unloaded
-          coordinatorRef.reportActiveInstance(storeProviderId, "other-host", "other-exec")
+          coordinatorRef
+            .reportActiveInstance(storeProviderId1, "other-host", "other-exec", Seq.empty)
           eventually(timeout(timeoutDuration)) {
-            assert(!StateStore.isLoaded(storeProviderId))
+            assert(!StateStore.isLoaded(storeProviderId1))
           }
 
           // Reload the store and verify
-          StateStore.get(storeProviderId, keySchema, valueSchema, indexOrdinal = None,
+          StateStore.get(storeProviderId1, keySchema, valueSchema, indexOrdinal = None,
             latestStoreVersion, storeConf, hadoopConf)
-          assert(StateStore.isLoaded(storeProviderId))
+          assert(StateStore.isLoaded(storeProviderId1))
+
+          // If some other executor loads the store, and when this executor loads other store,
+          // then this executor should unload inactive instances immediately.
+          coordinatorRef
+            .reportActiveInstance(storeProviderId1, "other-host", "other-exec", Seq.empty)
+          StateStore.get(storeProviderId2, keySchema, valueSchema, indexOrdinal = None,
+            0, storeConf, hadoopConf)
+          assert(!StateStore.isLoaded(storeProviderId1))
+          assert(StateStore.isLoaded(storeProviderId2))
         }
       }
 
       // Verify if instance is unloaded if SparkContext is stopped
       eventually(timeout(timeoutDuration)) {
         require(SparkEnv.get === null)
-        assert(!StateStore.isLoaded(storeProviderId))
+        assert(!StateStore.isLoaded(storeProviderId1))
+        assert(!StateStore.isLoaded(storeProviderId2))
         assert(!StateStore.isMaintenanceRunning)
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
index d264886c8cf46..40131e822c5ce 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
@@ -523,7 +523,7 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
       }.toMap
       partitionAndStoreNameToLocation.foreach { case ((partIndex, storeName), hostName) =>
         val providerId = StateStoreProviderId(stateInfo, partIndex, storeName)
-        coordinatorRef.reportActiveInstance(providerId, hostName, s"exec-$hostName")
+        coordinatorRef.reportActiveInstance(providerId, hostName, s"exec-$hostName", Seq.empty)
         require(
           coordinatorRef.getLocation(providerId) ===
             Some(ExecutorCacheTaskLocation(hostName, s"exec-$hostName").toString))

From 00fa49aeaa601f50df81adb25184f141ba0a44ee Mon Sep 17 00:00:00 2001
From: "yi.wu" <yi.wu@databricks.com>
Date: Mon, 28 Dec 2020 00:03:45 -0800
Subject: [PATCH 0893/1009] [SPARK-33923][SQL][TESTS] Fix some tests with AQE
 enabled

### What changes were proposed in this pull request?

* Remove the explicit AQE disable confs
* Use `AdaptiveSparkPlanHelper` to check plans
* No longer extending `DisableAdaptiveExecutionSuite` for `BucketedReadSuite` but only disable AQE for two certain tests there.

### Why are the changes needed?

Some tests that are fixed in https://github.com/apache/spark/pull/30655 doesn't really require AQE off.  Instead, they could use `AdaptiveSparkPlanHelper` to pass when AQE on. It's better to run tests with AQE on since we've turned it on by default.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Pass all tests and the updated tests.

Closes #30941 from Ngone51/SPARK-33680-follow-up.

Authored-by: yi.wu <yi.wu@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/DataFrameAggregateSuite.scala   | 10 +-
 .../apache/spark/sql/DataFrameJoinSuite.scala |  8 +-
 .../org/apache/spark/sql/JoinSuite.scala      | 24 ++---
 .../spark/sql/execution/PlannerSuite.scala    | 93 +++++++------------
 .../spark/sql/sources/BucketedReadSuite.scala | 15 +--
 .../BucketedReadWithHiveSupportSuite.scala    |  4 +-
 6 files changed, 62 insertions(+), 92 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 78983a4bd1a29..6603fc0bedca7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -1002,8 +1002,7 @@ class DataFrameAggregateSuite extends QueryTest
   Seq(true, false).foreach { value =>
     test(s"SPARK-31620: agg with subquery (whole-stage-codegen = $value)") {
       withSQLConf(
-        SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> value.toString,
-        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+        SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> value.toString) {
         withTempView("t1", "t2") {
           sql("create temporary view t1 as select * from values (1, 2) as t1(a, b)")
           sql("create temporary view t2 as select * from values (3, 4) as t2(c, d)")
@@ -1026,14 +1025,13 @@ class DataFrameAggregateSuite extends QueryTest
 
           // test SortAggregateExec
           var df = sql("select max(if(c > (select a from t1), 'str1', 'str2')) as csum from t2")
-          assert(df.queryExecution.executedPlan
-            .find { case _: SortAggregateExec => true }.isDefined)
+          assert(find(df.queryExecution.executedPlan)(_.isInstanceOf[SortAggregateExec]).isDefined)
           checkAnswer(df, Row("str1") :: Nil)
 
           // test ObjectHashAggregateExec
           df = sql("select collect_list(d), sum(if(c > (select a from t1), d, 0)) as csum from t2")
-          assert(df.queryExecution.executedPlan
-            .find { case _: ObjectHashAggregateExec => true }.isDefined)
+          assert(
+            find(df.queryExecution.executedPlan)(_.isInstanceOf[ObjectHashAggregateExec]).isDefined)
           checkAnswer(df, Row(Array(4), 4) :: Nil)
         }
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
index 1513c2e90e27c..ad13d7dcc2a71 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -345,15 +345,13 @@ class DataFrameJoinSuite extends QueryTest
 
     withTempDatabase { dbName =>
       withTable(table1Name, table2Name) {
-        withSQLConf(
-          SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
-          SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+        withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
           spark.range(50).write.saveAsTable(s"$dbName.$table1Name")
           spark.range(100).write.saveAsTable(s"$dbName.$table2Name")
 
           def checkIfHintApplied(df: DataFrame): Unit = {
             val sparkPlan = df.queryExecution.executedPlan
-            val broadcastHashJoins = sparkPlan.collect { case p: BroadcastHashJoinExec => p }
+            val broadcastHashJoins = collect(sparkPlan) { case p: BroadcastHashJoinExec => p }
             assert(broadcastHashJoins.size == 1)
             val broadcastExchanges = broadcastHashJoins.head.collect {
               case p: BroadcastExchangeExec => p
@@ -368,7 +366,7 @@ class DataFrameJoinSuite extends QueryTest
 
           def checkIfHintNotApplied(df: DataFrame): Unit = {
             val sparkPlan = df.queryExecution.executedPlan
-            val broadcastHashJoins = sparkPlan.collect { case p: BroadcastHashJoinExec => p }
+            val broadcastHashJoins = collect(sparkPlan) { case p: BroadcastHashJoinExec => p }
             assert(broadcastHashJoins.isEmpty)
           }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index a728e5cc17001..1bdfdb5ab9c54 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -1107,7 +1107,6 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
 
   test("SPARK-32330: Preserve shuffled hash join build side partitioning") {
     withSQLConf(
-        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false",
         SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "50",
         SQLConf.SHUFFLE_PARTITIONS.key -> "2",
         SQLConf.PREFER_SORTMERGEJOIN.key -> "false") {
@@ -1116,9 +1115,9 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
       Seq("inner", "cross").foreach(joinType => {
         val plan = df1.join(df2, $"k1" === $"k2", joinType).groupBy($"k1").count()
           .queryExecution.executedPlan
-        assert(plan.collect { case _: ShuffledHashJoinExec => true }.size === 1)
+        assert(collect(plan) { case _: ShuffledHashJoinExec => true }.size === 1)
         // No extra shuffle before aggregate
-        assert(plan.collect { case _: ShuffleExchangeExec => true }.size === 2)
+        assert(collect(plan) { case _: ShuffleExchangeExec => true }.size === 2)
       })
     }
   }
@@ -1131,7 +1130,6 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
 
     // Test broadcast hash join
     withSQLConf(
-      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false",
       SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "50") {
       Seq("inner", "left_outer").foreach(joinType => {
         val plan = df1.join(df2, $"k1" === $"k2", joinType)
@@ -1139,16 +1137,15 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
           .join(df4, $"k1" === $"k4", joinType)
           .queryExecution
           .executedPlan
-        assert(plan.collect { case _: SortMergeJoinExec => true }.size === 2)
-        assert(plan.collect { case _: BroadcastHashJoinExec => true }.size === 1)
+        assert(collect(plan) { case _: SortMergeJoinExec => true }.size === 2)
+        assert(collect(plan) { case _: BroadcastHashJoinExec => true }.size === 1)
         // No extra sort before last sort merge join
-        assert(plan.collect { case _: SortExec => true }.size === 3)
+        assert(collect(plan) { case _: SortExec => true }.size === 3)
       })
     }
 
     // Test shuffled hash join
     withSQLConf(
-      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false",
       SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "50",
       SQLConf.SHUFFLE_PARTITIONS.key -> "2",
       SQLConf.PREFER_SORTMERGEJOIN.key -> "false") {
@@ -1160,10 +1157,10 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
           .join(df4, $"k1" === $"k4", joinType)
           .queryExecution
           .executedPlan
-        assert(plan.collect { case _: SortMergeJoinExec => true }.size === 2)
-        assert(plan.collect { case _: ShuffledHashJoinExec => true }.size === 1)
+        assert(collect(plan) { case _: SortMergeJoinExec => true }.size === 2)
+        assert(collect(plan) { case _: ShuffledHashJoinExec => true }.size === 1)
         // No extra sort before last sort merge join
-        assert(plan.collect { case _: SortExec => true }.size === 3)
+        assert(collect(plan) { case _: SortExec => true }.size === 3)
       })
     }
   }
@@ -1256,17 +1253,16 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
       withSQLConf(
         // Set broadcast join threshold and number of shuffle partitions,
         // as shuffled hash join depends on these two configs.
-        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false",
         SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80",
         SQLConf.SHUFFLE_PARTITIONS.key -> "2") {
         val smjDF = df1.join(df2, joinExprs, "full")
-        assert(smjDF.queryExecution.executedPlan.collect {
+        assert(collect(smjDF.queryExecution.executedPlan) {
           case _: SortMergeJoinExec => true }.size === 1)
         val smjResult = smjDF.collect()
 
         withSQLConf(SQLConf.PREFER_SORTMERGEJOIN.key -> "false") {
           val shjDF = df1.join(df2, joinExprs, "full")
-          assert(shjDF.queryExecution.executedPlan.collect {
+          assert(collect(shjDF.queryExecution.executedPlan) {
             case _: ShuffledHashJoinExec => true }.size === 1)
           // Same result between shuffled hash join and sort merge join
           checkAnswer(shjDF, smjResult)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index 924776ae3ae60..8924d2ef6eab8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -877,9 +877,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
   }
 
   test("aliases in the project should not introduce extra shuffle") {
-    withSQLConf(
-      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
-      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
       withTempView("df1", "df2") {
         spark.range(10).selectExpr("id AS key", "0").repartition($"key").createTempView("df1")
         spark.range(20).selectExpr("id AS key", "0").repartition($"key").createTempView("df2")
@@ -891,7 +889,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
             |  (SELECT key AS k from df2) t2
             |ON t1.k = t2.k
           """.stripMargin).queryExecution.executedPlan
-        val exchanges = planned.collect { case s: ShuffleExchangeExec => s }
+        val exchanges = collect(planned) { case s: ShuffleExchangeExec => s }
         assert(exchanges.size == 2)
       }
     }
@@ -899,9 +897,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
 
   test("SPARK-33399: aliases should be handled properly in PartitioningCollection output" +
     " partitioning") {
-    withSQLConf(
-      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
-      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
       withTempView("t1", "t2", "t3") {
         spark.range(10).repartition($"id").createTempView("t1")
         spark.range(20).repartition($"id").createTempView("t2")
@@ -916,10 +912,10 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
             |) t12, t3
             |WHERE t1id = t3.id
           """.stripMargin).queryExecution.executedPlan
-        val exchanges = planned.collect { case s: ShuffleExchangeExec => s }
+        val exchanges = collect(planned) { case s: ShuffleExchangeExec => s }
         assert(exchanges.size == 3)
 
-        val projects = planned.collect { case p: ProjectExec => p }
+        val projects = collect(planned) { case p: ProjectExec => p }
         assert(projects.exists(_.outputPartitioning match {
           case HashPartitioning(Seq(k1: AttributeReference), _) if k1.name == "t1id" =>
             true
@@ -931,9 +927,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
   }
 
   test("SPARK-33399: aliases should be handled properly in HashPartitioning") {
-    withSQLConf(
-      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
-      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
       withTempView("t1", "t2", "t3") {
         spark.range(10).repartition($"id").createTempView("t1")
         spark.range(20).repartition($"id").createTempView("t2")
@@ -948,10 +942,10 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
             |) t12 INNER JOIN t3
             |WHERE t1id = t3.id
           """.stripMargin).queryExecution.executedPlan
-        val exchanges = planned.collect { case s: ShuffleExchangeExec => s }
+        val exchanges = collect(planned) { case s: ShuffleExchangeExec => s }
         assert(exchanges.size == 3)
 
-        val projects = planned.collect { case p: ProjectExec => p }
+        val projects = collect(planned) { case p: ProjectExec => p }
         assert(projects.exists(_.outputPartitioning match {
           case HashPartitioning(Seq(a: AttributeReference), _) => a.name == "t1id"
           case _ => false
@@ -961,19 +955,17 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
   }
 
   test("SPARK-33399: alias handling should happen properly for RangePartitioning") {
-    withSQLConf(
-      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
-      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
       val df = spark.range(1, 100)
         .select(col("id").as("id1")).groupBy("id1").count()
       // Plan for this will be Range -> ProjectWithAlias -> HashAggregate -> HashAggregate
       // if Project normalizes alias in its Range outputPartitioning, then no Exchange should come
       // in between HashAggregates
       val planned = df.queryExecution.executedPlan
-      val exchanges = planned.collect { case s: ShuffleExchangeExec => s }
+      val exchanges = collect(planned) { case s: ShuffleExchangeExec => s }
       assert(exchanges.isEmpty)
 
-      val projects = planned.collect { case p: ProjectExec => p }
+      val projects = collect(planned) { case p: ProjectExec => p }
       assert(projects.exists(_.outputPartitioning match {
         case RangePartitioning(Seq(SortOrder(ar: AttributeReference, _, _, _)), _) =>
           ar.name == "id1"
@@ -984,9 +976,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
 
   test("SPARK-33399: aliased should be handled properly " +
     "for partitioning and sortorder involving complex expressions") {
-    withSQLConf(
-      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
-      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
       withTempView("t1", "t2", "t3") {
         spark.range(10).select(col("id").as("id1")).createTempView("t1")
         spark.range(20).select(col("id").as("id2")).createTempView("t2")
@@ -1001,12 +991,12 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
             |) t12, t3
             |WHERE t1id * 10 = t3.id3 * 10
           """.stripMargin).queryExecution.executedPlan
-        val sortNodes = planned.collect { case s: SortExec => s }
+        val sortNodes = collect(planned) { case s: SortExec => s }
         assert(sortNodes.size == 3)
-        val exchangeNodes = planned.collect { case e: ShuffleExchangeExec => e }
+        val exchangeNodes = collect(planned) { case e: ShuffleExchangeExec => e }
         assert(exchangeNodes.size == 3)
 
-        val projects = planned.collect { case p: ProjectExec => p }
+        val projects = collect(planned) { case p: ProjectExec => p }
         assert(projects.exists(_.outputPartitioning match {
           case HashPartitioning(Seq(Multiply(ar1: AttributeReference, _, _)), _) =>
             ar1.name == "t1id"
@@ -1018,16 +1008,14 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
   }
 
   test("SPARK-33399: alias handling should happen properly for SinglePartition") {
-    withSQLConf(
-      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
-      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
       val df = spark.range(1, 100, 1, 1)
         .select(col("id").as("id1")).groupBy("id1").count()
       val planned = df.queryExecution.executedPlan
-      val exchanges = planned.collect { case s: ShuffleExchangeExec => s }
+      val exchanges = collect(planned) { case s: ShuffleExchangeExec => s }
       assert(exchanges.isEmpty)
 
-      val projects = planned.collect { case p: ProjectExec => p }
+      val projects = collect(planned) { case p: ProjectExec => p }
       assert(projects.exists(_.outputPartitioning match {
         case SinglePartition => true
         case _ => false
@@ -1037,9 +1025,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
 
   test("SPARK-33399: No extra exchanges in case of" +
     " [Inner Join -> Project with aliases -> HashAggregate]") {
-    withSQLConf(
-      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
-      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
       withTempView("t1", "t2") {
         spark.range(10).repartition($"id").createTempView("t1")
         spark.range(20).repartition($"id").createTempView("t2")
@@ -1053,10 +1039,10 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
             |) t12
             |GROUP BY t1id, t2id
           """.stripMargin).queryExecution.executedPlan
-        val exchanges = planned.collect { case s: ShuffleExchangeExec => s }
+        val exchanges = collect(planned) { case s: ShuffleExchangeExec => s }
         assert(exchanges.size == 2)
 
-        val projects = planned.collect { case p: ProjectExec => p }
+        val projects = collect(planned) { case p: ProjectExec => p }
         assert(projects.exists(_.outputPartitioning match {
           case PartitioningCollection(Seq(HashPartitioning(Seq(k1: AttributeReference), _),
           HashPartitioning(Seq(k2: AttributeReference), _))) =>
@@ -1068,9 +1054,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
   }
 
   test("SPARK-33400: Normalization of sortOrder should take care of sameOrderExprs") {
-    withSQLConf(
-      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
-      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
       withTempView("t1", "t2", "t3") {
         spark.range(10).repartition($"id").createTempView("t1")
         spark.range(20).repartition($"id").createTempView("t2")
@@ -1086,10 +1070,10 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
             |WHERE t2id = t3.id
           """.stripMargin).queryExecution.executedPlan
 
-        val sortNodes = planned.collect { case s: SortExec => s }
+        val sortNodes = collect(planned) { case s: SortExec => s }
         assert(sortNodes.size == 3)
 
-        val projects = planned.collect { case p: ProjectExec => p }
+        val projects = collect(planned) { case p: ProjectExec => p }
         assert(projects.exists(_.outputOrdering match {
           case Seq(SortOrder(_, Ascending, NullsFirst, sameOrderExprs)) =>
             sameOrderExprs.size == 1 && sameOrderExprs.head.isInstanceOf[AttributeReference] &&
@@ -1129,9 +1113,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
   }
 
   test("aliases to expressions should not be replaced") {
-    withSQLConf(
-      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
-      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
       withTempView("df1", "df2") {
         spark.range(10).selectExpr("id AS key", "0").repartition($"key").createTempView("df1")
         spark.range(20).selectExpr("id AS key", "0").repartition($"key").createTempView("df2")
@@ -1143,7 +1125,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
             |  (SELECT key + 1 AS k2 from df2) t2
             |ON t1.k1 = t2.k2
             |""".stripMargin).queryExecution.executedPlan
-        val exchanges = planned.collect { case s: ShuffleExchangeExec => s }
+        val exchanges = collect(planned) { case s: ShuffleExchangeExec => s }
 
         // Make sure aliases to an expression (key + 1) are not replaced.
         Seq("k1", "k2").foreach { alias =>
@@ -1157,9 +1139,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
   }
 
   test("aliases in the aggregate expressions should not introduce extra shuffle") {
-    withSQLConf(
-      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
-      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
       val t1 = spark.range(10).selectExpr("floor(id/4) as k1")
       val t2 = spark.range(20).selectExpr("floor(id/4) as k2")
 
@@ -1168,17 +1148,15 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
 
       val planned = agg1.join(agg2, $"k1" === $"k3").queryExecution.executedPlan
 
-      assert(planned.collect { case h: HashAggregateExec => h }.nonEmpty)
+      assert(collect(planned) { case h: HashAggregateExec => h }.nonEmpty)
 
-      val exchanges = planned.collect { case s: ShuffleExchangeExec => s }
+      val exchanges = collect(planned) { case s: ShuffleExchangeExec => s }
       assert(exchanges.size == 2)
     }
   }
 
   test("aliases in the object hash/sort aggregate expressions should not introduce extra shuffle") {
-    withSQLConf(
-      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
-      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
       Seq(true, false).foreach { useObjectHashAgg =>
         withSQLConf(SQLConf.USE_OBJECT_HASH_AGG.key -> useObjectHashAgg.toString) {
           val t1 = spark.range(10).selectExpr("floor(id/4) as k1")
@@ -1190,12 +1168,12 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
           val planned = agg1.join(agg2, $"k1" === $"k3").queryExecution.executedPlan
 
           if (useObjectHashAgg) {
-            assert(planned.collect { case o: ObjectHashAggregateExec => o }.nonEmpty)
+            assert(collect(planned) { case o: ObjectHashAggregateExec => o }.nonEmpty)
           } else {
-            assert(planned.collect { case s: SortAggregateExec => s }.nonEmpty)
+            assert(collect(planned) { case s: SortAggregateExec => s }.nonEmpty)
           }
 
-          val exchanges = planned.collect { case s: ShuffleExchangeExec => s }
+          val exchanges = collect(planned) { case s: ShuffleExchangeExec => s }
           assert(exchanges.size == 2)
         }
       }
@@ -1205,7 +1183,6 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
   test("aliases in the sort aggregate expressions should not introduce extra sort") {
     withSQLConf(
       SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
-      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false",
       SQLConf.USE_OBJECT_HASH_AGG.key -> "false") {
       val t1 = spark.range(10).selectExpr("floor(id/4) as k1")
       val t2 = spark.range(20).selectExpr("floor(id/4) as k2")
@@ -1214,10 +1191,10 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
       val agg2 = t2.groupBy("k2").agg(collect_list("k2"))
 
       val planned = agg1.join(agg2, $"k3" === $"k2").queryExecution.executedPlan
-      assert(planned.collect { case s: SortAggregateExec => s }.nonEmpty)
+      assert(collect(planned) { case s: SortAggregateExec => s }.nonEmpty)
 
       // We expect two SortExec nodes on each side of join.
-      val sorts = planned.collect { case s: SortExec => s }
+      val sorts = collect(planned) { case s: SortExec => s }
       assert(sorts.size == 4)
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
index 4ae8cdbeb4f1e..9dcc0cfda93f1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
 import org.apache.spark.sql.execution.{FileSourceScanExec, SortExec, SparkPlan}
-import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, DisableAdaptiveExecutionSuite}
+import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, AdaptiveSparkPlanHelper, DisableAdaptiveExecution}
 import org.apache.spark.sql.execution.datasources.BucketingUtils
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
 import org.apache.spark.sql.execution.joins.SortMergeJoinExec
@@ -40,7 +40,7 @@ import org.apache.spark.util.Utils
 import org.apache.spark.util.collection.BitSet
 
 class BucketedReadWithoutHiveSupportSuite
-  extends BucketedReadSuite with DisableAdaptiveExecutionSuite with SharedSparkSession {
+  extends BucketedReadSuite with SharedSparkSession {
   protected override def beforeAll(): Unit = {
     super.beforeAll()
     assert(spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "in-memory")
@@ -48,7 +48,7 @@ class BucketedReadWithoutHiveSupportSuite
 }
 
 
-abstract class BucketedReadSuite extends QueryTest with SQLTestUtils {
+abstract class BucketedReadSuite extends QueryTest with SQLTestUtils with AdaptiveSparkPlanHelper {
   import testImplicits._
 
   protected override def beforeAll(): Unit = {
@@ -104,7 +104,7 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils {
   }
 
   private def getFileScan(plan: SparkPlan): FileSourceScanExec = {
-    val fileScan = plan.collect { case f: FileSourceScanExec => f }
+    val fileScan = collect(plan) { case f: FileSourceScanExec => f }
     assert(fileScan.nonEmpty, plan)
     fileScan.head
   }
@@ -930,7 +930,9 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils {
   }
 
   test("bucket coalescing eliminates shuffle") {
-    withSQLConf(SQLConf.COALESCE_BUCKETS_IN_JOIN_ENABLED.key -> "true") {
+    withSQLConf(
+      SQLConf.COALESCE_BUCKETS_IN_JOIN_ENABLED.key -> "true",
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
       // The side with bucketedTableTestSpec1 will be coalesced to have 4 output partitions.
       // Currently, sort will be introduced for the side that is coalesced.
       val testSpec1 = BucketedTableTestSpec(
@@ -997,7 +999,8 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils {
     }
   }
 
-  test("bucket coalescing is applied when join expressions match with partitioning expressions") {
+  test("bucket coalescing is applied when join expressions match with partitioning expressions",
+    DisableAdaptiveExecution("Expected shuffle num mismatched")) {
     withTable("t1", "t2") {
       df1.write.format("parquet").bucketBy(8, "i", "j").saveAsTable("t1")
       df2.write.format("parquet").bucketBy(4, "i", "j").saveAsTable("t2")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedReadWithHiveSupportSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedReadWithHiveSupportSuite.scala
index 07901351fc0fc..35dab79ff6dff 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedReadWithHiveSupportSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedReadWithHiveSupportSuite.scala
@@ -17,12 +17,10 @@
 
 package org.apache.spark.sql.sources
 
-import org.apache.spark.sql.execution.adaptive.DisableAdaptiveExecutionSuite
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 
-class BucketedReadWithHiveSupportSuite
-  extends BucketedReadSuite with DisableAdaptiveExecutionSuite with TestHiveSingleton {
+class BucketedReadWithHiveSupportSuite extends BucketedReadSuite with TestHiveSingleton {
   protected override def beforeAll(): Unit = {
     super.beforeAll()
     assert(spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "hive")

From fc508d189820f7b64a507709738786856dd89f8a Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Mon, 28 Dec 2020 08:23:01 +0000
Subject: [PATCH 0894/1009] [SPARK-32685][SQL] When specify serde, default
 filed.delim is '\t'

### What changes were proposed in this pull request?
In hive script transform, when we use specified serde, the `filed.delim` is '\t'
![image](https://user-images.githubusercontent.com/46485123/103187960-7dd77800-4901-11eb-8241-f4636e66fbc8.png)
And change to other serde and explain query plan, `filed.delim` is same.

In spark current code, the result is as below:
![image](https://user-images.githubusercontent.com/46485123/103187999-95aefc00-4901-11eb-9850-5c385000b78c.png)

We should keep same as hive.

Notic:
the result's NULL value is   different is another issue  https://issues.apache.org/jira/browse/SPARK-32684

### Why are the changes needed?
Keep same with hive serde

### Does this PR introduce _any_ user-facing change?
In script transform, is not specified,  `field.delim` keep same with hive as `\t`

### How was this patch tested?
UT added

Closes #30942 from AngersZhuuuu/SPARK-32685.

Authored-by: angerszhu <angers.zhu@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/SparkSqlParser.scala  |  3 +-
 .../HiveScriptTransformationSuite.scala       | 63 +++++++++++++++++++
 2 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index e530b4c9407a6..16e19141c3bb6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -505,7 +505,8 @@ class SparkSqlAstBuilder extends AstBuilder {
           } else {
             None
           }
-          (Seq.empty, Option(name), props.toSeq, recordHandler)
+          val finalProps = props ++ Seq("field.delim" -> props.getOrElse("field.delim", "\t"))
+          (Seq.empty, Option(name), finalProps.toSeq, recordHandler)
 
         case null =>
           // Use default (serde) format.
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
index 0876709c31899..266c526b1a24b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
@@ -23,6 +23,7 @@ import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
 import org.scalatest.exceptions.TestFailedException
 
 import org.apache.spark.{SparkException, TestUtils}
+import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression}
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.functions._
@@ -438,4 +439,66 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
       assert(e2.contains("array<double> cannot be converted to Hive TypeInfo"))
     }
   }
+
+  test("SPARK-32685: When use specified serde, filed.delim's default value is '\t'") {
+    val query1 = sql(
+      """
+        |SELECT split(value, "\t") FROM (
+        |SELECT TRANSFORM(a, b, c)
+        |USING 'cat'
+        |FROM (SELECT 1 AS a, 2 AS b, 3 AS c) t
+        |) temp;
+      """.stripMargin)
+    checkAnswer(query1, identity, Row(Seq("2", "3")) :: Nil)
+
+    val query2 = sql(
+      """
+        |SELECT split(value, "\t") FROM (
+        |SELECT TRANSFORM(a, b, c)
+        |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+        |USING 'cat'
+        |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+        |  WITH SERDEPROPERTIES (
+        |   'serialization.last.column.takes.rest' = 'true'
+        |  )
+        |FROM (SELECT 1 AS a, 2 AS b, 3 AS c) t
+        |) temp;
+      """.stripMargin)
+    checkAnswer(query2, identity, Row(Seq("2", "3")) :: Nil)
+
+    val query3 = sql(
+      """
+        |SELECT split(value, "&") FROM (
+        |SELECT TRANSFORM(a, b, c)
+        |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+        |  WITH SERDEPROPERTIES (
+        |   'field.delim' = '&'
+        |  )
+        |USING 'cat'
+        |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+        |  WITH SERDEPROPERTIES (
+        |   'serialization.last.column.takes.rest' = 'true',
+        |   'field.delim' = '&'
+        |  )
+        |FROM (SELECT 1 AS a, 2 AS b, 3 AS c) t
+        |) temp;
+      """.stripMargin)
+    checkAnswer(query3, identity, Row(Seq("2", "3")) :: Nil)
+
+    val query4 = sql(
+      """
+        |SELECT split(value, "&") FROM (
+        |SELECT TRANSFORM(a, b, c)
+        |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+        |USING 'cat'
+        |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+        |  WITH SERDEPROPERTIES (
+        |   'serialization.last.column.takes.rest' = 'true',
+        |   'field.delim' = '&'
+        |  )
+        |FROM (SELECT 1 AS a, 2 AS b, 3 AS c) t
+        |) temp;
+      """.stripMargin)
+    checkAnswer(query4, identity, Row(null) :: Nil)
+  }
 }

From 0617dfce7beb34662ab30a607721e9b46e65c21e Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 28 Dec 2020 09:07:21 +0000
Subject: [PATCH 0895/1009] [SPARK-33899][SQL] Fix assert failure in v1 SHOW
 TABLES/VIEWS on `spark_catalog`

### What changes were proposed in this pull request?
Remove `assert(ns.nonEmpty)` in `ResolveSessionCatalog` for:
- `SHOW TABLES`
- `SHOW TABLE EXTENDED`
- `SHOW VIEWS`

### Why are the changes needed?
Spark SQL shouldn't fail with internal assert failures even for invalid user inputs. For instance:
```sql
spark-sql> show tables in spark_catalog;
20/12/24 11:19:46 ERROR SparkSQLDriver: Failed in [show tables in spark_catalog]
java.lang.AssertionError: assertion failed
	at scala.Predef$.assert(Predef.scala:208)
	at org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog$$anonfun$apply$1.applyOrElse(ResolveSessionCatalog.scala:366)
	at org.apache.spark.sql.catalyst.analysis.ResolveSessionCatalog$$anonfun$apply$1.applyOrElse(ResolveSessionCatalog.scala:49)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.$anonfun$resolveOperatorsUp$3(AnalysisHelper.scala:90)
	at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:73)
```

### Does this PR introduce _any_ user-facing change?
Yes. After the changes, for the example above:
```sql
spark-sql> show tables in spark_catalog;
Error in query: multi-part identifier cannot be empty.
```

### How was this patch tested?
Added new UT to `v1/ShowTablesSuite`.

Closes #30915 from MaxGekk/remove-assert-ns-nonempty.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/ResolveSessionCatalog.scala |  3 ---
 .../sql/execution/command/v1/ShowTablesSuite.scala    | 11 +++++++++++
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 66d1c406a5603..925c7741eefe3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -363,7 +363,6 @@ class ResolveSessionCatalog(
       DropDatabaseCommand(ns.head, d.ifExists, d.cascade)
 
     case ShowTables(SessionCatalogAndNamespace(_, ns), pattern) =>
-      assert(ns.nonEmpty)
       if (ns.length != 1) {
           throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted)
       }
@@ -373,7 +372,6 @@ class ResolveSessionCatalog(
         SessionCatalogAndNamespace(_, ns),
         pattern,
         partitionSpec @ (None | Some(UnresolvedPartitionSpec(_, _)))) =>
-      assert(ns.nonEmpty)
       if (ns.length != 1) {
         throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted)
       }
@@ -503,7 +501,6 @@ class ResolveSessionCatalog(
       resolved match {
         case SessionCatalogAndNamespace(_, ns) =>
           // Fallback to v1 ShowViewsCommand since there is no view API in v2 catalog
-          assert(ns.nonEmpty)
           if (ns.length != 1) {
             throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted)
           }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
index 2cf50b7ddf251..74298c020415d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
@@ -108,6 +108,17 @@ trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase {
       }
     }
   }
+
+  test("no database specified") {
+    Seq(
+      s"SHOW TABLES IN $catalog",
+      s"SHOW TABLE EXTENDED IN $catalog LIKE '*tbl'").foreach { showTableCmd =>
+      val errMsg = intercept[AnalysisException] {
+        sql(showTableCmd)
+      }.getMessage
+      assert(errMsg.contains("multi-part identifier cannot be empty"))
+    }
+  }
 }
 
 /**

From 6497ccbbda1874187ee60a4f6368e6d9ae6580ff Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Mon, 28 Dec 2020 16:33:01 -0800
Subject: [PATCH 0896/1009] [SPARK-33916][CORE] Fix fallback storage offset and
 improve compression codec test coverage

### What changes were proposed in this pull request?

This PR aims to fix offset bug and improve compression codec test coverage.

### Why are the changes needed?

When the user choose a non-default codec, it causes a failure.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the extended test suite.

Closes #30934 from dongjoon-hyun/SPARK-33916.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/storage/FallbackStorage.scala       |  2 +-
 .../spark/storage/FallbackStorageSuite.scala  | 67 ++++++++++---------
 2 files changed, 36 insertions(+), 33 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala b/core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala
index 9221731f77a59..41126357f8983 100644
--- a/core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala
+++ b/core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala
@@ -158,7 +158,7 @@ object FallbackStorage extends Logging {
         val name = ShuffleDataBlockId(shuffleId, mapId, NOOP_REDUCE_ID).name
         val dataFile = new Path(fallbackPath, s"$appId/$shuffleId/$name")
         val f = fallbackFileSystem.open(dataFile)
-        val size = nextOffset - 1 - offset
+        val size = nextOffset - offset
         logDebug(s"To byte array $size")
         val array = new Array[Byte](size.toInt)
         val startTimeNs = System.nanoTime()
diff --git a/core/src/test/scala/org/apache/spark/storage/FallbackStorageSuite.scala b/core/src/test/scala/org/apache/spark/storage/FallbackStorageSuite.scala
index 2eeae2ecad5eb..c07edb65efb53 100644
--- a/core/src/test/scala/org/apache/spark/storage/FallbackStorageSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/FallbackStorageSuite.scala
@@ -59,6 +59,7 @@ class FallbackStorageSuite extends SparkFunSuite with LocalSparkContext {
   test("fallback storage APIs - copy/exists") {
     val conf = new SparkConf(false)
       .set("spark.app.id", "testId")
+      .set(SHUFFLE_COMPRESS, false)
       .set(STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED, true)
       .set(STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH,
         Files.createTempDirectory("tmp").toFile.getAbsolutePath + "/")
@@ -227,43 +228,45 @@ class FallbackStorageSuite extends SparkFunSuite with LocalSparkContext {
     }
   }
 
-  test("Newly added executors should access old data from remote storage") {
-    sc = new SparkContext(getSparkConf(2, 0))
-    withSpark(sc) { sc =>
-      TestUtils.waitUntilExecutorsUp(sc, 2, 60000)
-      val rdd1 = sc.parallelize(1 to 10, 2)
-      val rdd2 = rdd1.map(x => (x % 2, 1))
-      val rdd3 = rdd2.reduceByKey(_ + _)
-      assert(rdd3.collect() === Array((0, 5), (1, 5)))
+  Seq("lz4", "lzf", "snappy", "zstd").foreach { codec =>
+    test(s"$codec - Newly added executors should access old data from remote storage") {
+      sc = new SparkContext(getSparkConf(2, 0).set(IO_COMPRESSION_CODEC, codec))
+      withSpark(sc) { sc =>
+        TestUtils.waitUntilExecutorsUp(sc, 2, 60000)
+        val rdd1 = sc.parallelize(1 to 10, 2)
+        val rdd2 = rdd1.map(x => (x % 2, 1))
+        val rdd3 = rdd2.reduceByKey(_ + _)
+        assert(rdd3.collect() === Array((0, 5), (1, 5)))
+
+        // Decommission all
+        val sched = sc.schedulerBackend.asInstanceOf[StandaloneSchedulerBackend]
+        sc.getExecutorIds().foreach {
+          sched.decommissionExecutor(_, ExecutorDecommissionInfo(""), false)
+        }
 
-      // Decommission all
-      val sched = sc.schedulerBackend.asInstanceOf[StandaloneSchedulerBackend]
-      sc.getExecutorIds().foreach {
-        sched.decommissionExecutor(_, ExecutorDecommissionInfo(""), false)
-      }
+        // Make it sure that fallback storage are ready
+        val fallbackStorage = new FallbackStorage(sc.getConf)
+        eventually(timeout(10.seconds), interval(1.seconds)) {
+          Seq(
+            "shuffle_0_0_0.index", "shuffle_0_0_0.data",
+            "shuffle_0_1_0.index", "shuffle_0_1_0.data").foreach { file =>
+            assert(fallbackStorage.exists(0, file))
+          }
+        }
 
-      // Make it sure that fallback storage are ready
-      val fallbackStorage = new FallbackStorage(sc.getConf)
-      eventually(timeout(10.seconds), interval(1.seconds)) {
-        Seq(
-          "shuffle_0_0_0.index", "shuffle_0_0_0.data",
-          "shuffle_0_1_0.index", "shuffle_0_1_0.data").foreach { file =>
-          assert(fallbackStorage.exists(0, file))
+        // Since the data is safe, force to shrink down to zero executor
+        sc.getExecutorIds().foreach { id =>
+          sched.killExecutor(id)
+        }
+        eventually(timeout(20.seconds), interval(1.seconds)) {
+          assert(sc.getExecutorIds().isEmpty)
         }
-      }
 
-      // Since the data is safe, force to shrink down to zero executor
-      sc.getExecutorIds().foreach { id =>
-        sched.killExecutor(id)
-      }
-      eventually(timeout(20.seconds), interval(1.seconds)) {
-        assert(sc.getExecutorIds().isEmpty)
+        // Dynamic allocation will start new executors
+        assert(rdd3.collect() === Array((0, 5), (1, 5)))
+        assert(rdd3.sortByKey().count() == 2)
+        assert(sc.getExecutorIds().nonEmpty)
       }
-
-      // Dynamic allocation will start new executors
-      assert(rdd3.collect() === Array((0, 5), (1, 5)))
-      assert(rdd3.sortByKey().count() == 2)
-      assert(sc.getExecutorIds().nonEmpty)
     }
   }
 }

From c2eac1de020bd64501acbdfe341f2f4b6657a6e9 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 28 Dec 2020 16:44:57 -0800
Subject: [PATCH 0897/1009] [SPARK-33845][SQL][FOLLOWUP] fix
 SimplifyConditionals

### What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/30849, to fix a correctness issue caused by null value handling.

### Why are the changes needed?

Fix a correctness issue. `If(null, true, false)` should return false, not true.

### Does this PR introduce _any_ user-facing change?

Yes, but the bug only exist in the master branch.

### How was this patch tested?

updated tests.

Closes #30953 from cloud-fan/bug.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../ReplaceNullWithFalseInPredicate.scala     | 11 +++--
 .../sql/catalyst/optimizer/expressions.scala  |  6 ++-
 .../PushFoldableIntoBranchesSuite.scala       |  4 +-
 ...ReplaceNullWithFalseInPredicateSuite.scala | 14 +++---
 .../optimizer/SimplifyConditionalSuite.scala  | 44 ++++++++++++++-----
 5 files changed, 53 insertions(+), 26 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
index 92401131e8b82..df3da3e8a9982 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
@@ -17,9 +17,8 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import org.apache.spark.sql.catalyst.expressions.{And, ArrayExists, ArrayFilter, CaseWhen, Expression, If}
-import org.apache.spark.sql.catalyst.expressions.{LambdaFunction, Literal, MapFilter, Or}
-import org.apache.spark.sql.catalyst.expressions.Literal.FalseLiteral
+import org.apache.spark.sql.catalyst.expressions.{And, ArrayExists, ArrayFilter, CaseWhen, EqualNullSafe, Expression, If, LambdaFunction, Literal, MapFilter, Or}
+import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
 import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, Filter, Join, LogicalPlan, UpdateTable}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.types.BooleanType
@@ -56,6 +55,12 @@ object ReplaceNullWithFalseInPredicate extends Rule[LogicalPlan] {
     case d @ DeleteFromTable(_, Some(cond)) => d.copy(condition = Some(replaceNullWithFalse(cond)))
     case u @ UpdateTable(_, _, Some(cond)) => u.copy(condition = Some(replaceNullWithFalse(cond)))
     case p: LogicalPlan => p transformExpressions {
+      // For `EqualNullSafe` with a `TrueLiteral`, whether the other side is null or false has no
+      // difference, as `null <=> true` and `false <=> true` both return false.
+      case EqualNullSafe(left, TrueLiteral) =>
+        EqualNullSafe(replaceNullWithFalse(left), TrueLiteral)
+      case EqualNullSafe(TrueLiteral, right) =>
+        EqualNullSafe(TrueLiteral, replaceNullWithFalse(right))
       case i @ If(pred, _, _) => i.copy(predicate = replaceNullWithFalse(pred))
       case cw @ CaseWhen(branches, _) =>
         val newBranches = branches.map { case (cond, value) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index f01df5e5e6768..b2625bddeecf4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -475,8 +475,10 @@ object SimplifyConditionals extends Rule[LogicalPlan] with PredicateHelper {
       case If(TrueLiteral, trueValue, _) => trueValue
       case If(FalseLiteral, _, falseValue) => falseValue
       case If(Literal(null, _), _, falseValue) => falseValue
-      case If(cond, TrueLiteral, FalseLiteral) => cond
-      case If(cond, FalseLiteral, TrueLiteral) => Not(cond)
+      case If(cond, TrueLiteral, FalseLiteral) =>
+        if (cond.nullable) EqualNullSafe(cond, TrueLiteral) else cond
+      case If(cond, FalseLiteral, TrueLiteral) =>
+        if (cond.nullable) Not(EqualNullSafe(cond, TrueLiteral)) else Not(cond)
       case If(cond, trueValue, falseValue)
         if cond.deterministic && trueValue.semanticEquals(falseValue) => trueValue
       case If(cond, l @ Literal(null, _), FalseLiteral) if !cond.nullable => And(cond, l)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala
index 2d826e7b55a68..7c9a67d7554e2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala
@@ -53,7 +53,7 @@ class PushFoldableIntoBranchesSuite
 
   test("Push down EqualTo through If") {
     assertEquivalent(EqualTo(ifExp, Literal(4)), FalseLiteral)
-    assertEquivalent(EqualTo(ifExp, Literal(3)), Not(a))
+    assertEquivalent(EqualTo(ifExp, Literal(3)), Not(a <=> TrueLiteral))
 
     // Push down at most one not foldable expressions.
     assertEquivalent(
@@ -102,7 +102,7 @@ class PushFoldableIntoBranchesSuite
     assertEquivalent(Remainder(ifExp, Literal(4)), If(a, Literal(2), Literal(3)))
     assertEquivalent(Divide(If(a, Literal(2.0), Literal(3.0)), Literal(1.0)),
       If(a, Literal(2.0), Literal(3.0)))
-    assertEquivalent(And(If(a, FalseLiteral, TrueLiteral), TrueLiteral), Not(a))
+    assertEquivalent(And(If(a, FalseLiteral, TrueLiteral), TrueLiteral), Not(a <=> TrueLiteral))
     assertEquivalent(Or(If(a, FalseLiteral, TrueLiteral), TrueLiteral), TrueLiteral)
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
index f49e6921fd46a..ae97d53256837 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.{And, ArrayExists, ArrayFilter, ArrayTransform, CaseWhen, Expression, GreaterThan, If, LambdaFunction, LessThanOrEqual, Literal, MapFilter, NamedExpression, Or, UnresolvedNamedLambdaVariable}
+import org.apache.spark.sql.catalyst.expressions.{And, ArrayExists, ArrayFilter, ArrayTransform, CaseWhen, Expression, GreaterThan, If, LambdaFunction, Literal, MapFilter, NamedExpression, Or, UnresolvedNamedLambdaVariable}
 import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
 import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest}
 import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, LocalRelation, LogicalPlan, UpdateTable}
@@ -237,8 +237,8 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
       TrueLiteral,
       FalseLiteral)
     val condition = CaseWhen(Seq((UnresolvedAttribute("i") > Literal(10)) -> branchValue))
-    val expectedCond =
-      CaseWhen(Seq((UnresolvedAttribute("i") > Literal(10)) -> (Literal(2) === nestedCaseWhen)))
+    val expectedCond = CaseWhen(Seq(
+      (UnresolvedAttribute("i") > Literal(10), (Literal(2) === nestedCaseWhen) <=> TrueLiteral)))
     testFilter(originalCond = condition, expectedCond = expectedCond)
     testJoin(originalCond = condition, expectedCond = expectedCond)
     testDelete(originalCond = condition, expectedCond = expectedCond)
@@ -253,10 +253,10 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
         Literal(3)),
       TrueLiteral,
       FalseLiteral)
-    val expectedCond = Literal(5) > If(
+    val expectedCond = (Literal(5) > If(
       UnresolvedAttribute("i") === Literal(15),
       Literal(null, IntegerType),
-      Literal(3))
+      Literal(3))) <=> TrueLiteral
     testFilter(originalCond = condition, expectedCond = expectedCond)
     testJoin(originalCond = condition, expectedCond = expectedCond)
     testDelete(originalCond = condition, expectedCond = expectedCond)
@@ -443,9 +443,9 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     val lambda1 = LambdaFunction(
       function = If(cond, Literal(null, BooleanType), TrueLiteral),
       arguments = lambdaArgs)
-    // the optimized lambda body is: if(arg > 0, false, true) => arg <= 0
+    // the optimized lambda body is: if(arg > 0, false, true) => !((arg > 0) <=> true)
     val lambda2 = LambdaFunction(
-      function = LessThanOrEqual(condArg, Literal(0)),
+      function = !(cond <=> TrueLiteral),
       arguments = lambdaArgs)
     testProjection(
       originalExpr = createExpr(argument, lambda1) as 'x,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala
index 1876be21dea4b..317984eba2261 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala
@@ -201,19 +201,39 @@ class SimplifyConditionalSuite extends PlanTest with ExpressionEvalHelper with P
   }
 
   test("SPARK-33845: remove unnecessary if when the outputs are boolean type") {
-    assertEquivalent(
-      If(IsNotNull(UnresolvedAttribute("a")), TrueLiteral, FalseLiteral),
-      IsNotNull(UnresolvedAttribute("a")))
-    assertEquivalent(
-      If(IsNotNull(UnresolvedAttribute("a")), FalseLiteral, TrueLiteral),
-      IsNull(UnresolvedAttribute("a")))
+    // verify the boolean equivalence of all transformations involved
+    val fields = Seq(
+      'cond.boolean.notNull,
+      'cond_nullable.boolean,
+      'a.boolean,
+      'b.boolean
+    )
+    val Seq(cond, cond_nullable, a, b) = fields.zipWithIndex.map { case (f, i) => f.at(i) }
+
+    val exprs = Seq(
+      // actual expressions of the transformations: original -> transformed
+      If(cond, true, false) -> cond,
+      If(cond, false, true) -> !cond,
+      If(cond_nullable, true, false) -> (cond_nullable <=> true),
+      If(cond_nullable, false, true) -> (!(cond_nullable <=> true)))
+
+    // check plans
+    for ((originalExpr, expectedExpr) <- exprs) {
+      assertEquivalent(originalExpr, expectedExpr)
+    }
 
-    assertEquivalent(
-      If(GreaterThan(Rand(0), UnresolvedAttribute("a")), TrueLiteral, FalseLiteral),
-      GreaterThan(Rand(0), UnresolvedAttribute("a")))
-    assertEquivalent(
-      If(GreaterThan(Rand(0), UnresolvedAttribute("a")), FalseLiteral, TrueLiteral),
-      LessThanOrEqual(Rand(0), UnresolvedAttribute("a")))
+    // check evaluation
+    val binaryBooleanValues = Seq(true, false)
+    val ternaryBooleanValues = Seq(true, false, null)
+    for (condVal <- binaryBooleanValues;
+         condNullableVal <- ternaryBooleanValues;
+         aVal <- ternaryBooleanValues;
+         bVal <- ternaryBooleanValues;
+         (originalExpr, expectedExpr) <- exprs) {
+      val inputRow = create_row(condVal, condNullableVal, aVal, bVal)
+      val optimizedVal = evaluateWithoutCodegen(expectedExpr, inputRow)
+      checkEvaluation(originalExpr, optimizedVal, inputRow)
+    }
   }
 
   test("SPARK-33847: Remove the CaseWhen if elseValue is empty and other outputs are null") {

From b33fa53385928c0bdfa2585513e9a859a62caaba Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Mon, 28 Dec 2020 16:58:42 -0800
Subject: [PATCH 0898/1009] [SPARK-33925][CORE] Remove unused SecurityManager
 in Utils.fetchFile

### What changes were proposed in this pull request?

This is kind of a followup of https://github.com/apache/spark/pull/24033.
The first and last usage of that argument `SecurityManager` was removed in https://github.com/apache/spark/pull/24033.
After that,  we don't need to pass `SecurityManager` anymore in `Utils.fetchFile` and related code paths.

This PR proposes to remove it out.

### Why are the changes needed?

For better readability of codes.

### Does this PR introduce _any_ user-facing change?

No, dev-only.

### How was this patch tested?

Manually complied. GitHub Actions and Jenkins build should test it out as well.

Closes #30945 from HyukjinKwon/SPARK-33925.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../scala/org/apache/spark/SparkContext.scala |  4 ++--
 .../org/apache/spark/deploy/SparkSubmit.scala | 12 +++++-----
 .../spark/deploy/worker/DriverRunner.scala    |  1 -
 .../spark/deploy/worker/DriverWrapper.scala   |  3 +--
 .../org/apache/spark/executor/Executor.scala  |  6 ++---
 .../apache/spark/util/DependencyUtils.scala   | 20 ++++++----------
 .../scala/org/apache/spark/util/Utils.scala   |  6 ++---
 .../spark/deploy/SparkSubmitSuite.scala       | 23 +++++++------------
 .../org/apache/spark/rpc/RpcEnvSuite.scala    |  7 +++---
 .../HiveExternalCatalogVersionsSuite.scala    |  5 ++--
 .../apache/spark/sql/hive/test/TestHive.scala |  2 +-
 11 files changed, 35 insertions(+), 54 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index aae340953c5b2..f6e8a5694dbdf 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -1631,7 +1631,7 @@ class SparkContext(config: SparkConf) extends Logging {
       // Fetch the file locally so that closures which are run on the driver can still use the
       // SparkFiles API to access files.
       Utils.fetchFile(uri.toString, new File(SparkFiles.getRootDirectory()), conf,
-        env.securityManager, hadoopConfiguration, timestamp, useCache = false)
+        hadoopConfiguration, timestamp, useCache = false)
       postEnvironmentUpdate()
     } else if (
       isArchive &&
@@ -1643,7 +1643,7 @@ class SparkContext(config: SparkConf) extends Logging {
       val uriToUse = if (!isLocal && scheme == "file") uri else new URI(key)
       val uriToDownload = UriBuilder.fromUri(uriToUse).fragment(null).build()
       val source = Utils.fetchFile(uriToDownload.toString, Utils.createTempDir(), conf,
-        env.securityManager, hadoopConfiguration, timestamp, useCache = false, shouldUntar = false)
+        hadoopConfiguration, timestamp, useCache = false, shouldUntar = false)
       val dest = new File(
         SparkFiles.getRootDirectory(),
         if (uri.getFragment != null) uri.getFragment else source.getName)
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 7b7d9dd72a344..0cf309f148156 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -374,13 +374,13 @@ private[spark] class SparkSubmit extends Logging {
     var localPyFiles: String = null
     if (deployMode == CLIENT) {
       localPrimaryResource = Option(args.primaryResource).map {
-        downloadFile(_, targetDir, sparkConf, hadoopConf, secMgr)
+        downloadFile(_, targetDir, sparkConf, hadoopConf)
       }.orNull
       localJars = Option(args.jars).map {
-        downloadFileList(_, targetDir, sparkConf, hadoopConf, secMgr)
+        downloadFileList(_, targetDir, sparkConf, hadoopConf)
       }.orNull
       localPyFiles = Option(args.pyFiles).map {
-        downloadFileList(_, targetDir, sparkConf, hadoopConf, secMgr)
+        downloadFileList(_, targetDir, sparkConf, hadoopConf)
       }.orNull
 
       if (isKubernetesClusterModeDriver) {
@@ -389,14 +389,14 @@ private[spark] class SparkSubmit extends Logging {
         // Explicitly download the related files here
         args.jars = localJars
         val filesLocalFiles = Option(args.files).map {
-          downloadFileList(_, targetDir, sparkConf, hadoopConf, secMgr)
+          downloadFileList(_, targetDir, sparkConf, hadoopConf)
         }.orNull
         val archiveLocalFiles = Option(args.archives).map { uris =>
           val resolvedUris = Utils.stringToSeq(uris).map(Utils.resolveURI)
           val localArchives = downloadFileList(
             resolvedUris.map(
               UriBuilder.fromUri(_).fragment(null).build().toString).mkString(","),
-            targetDir, sparkConf, hadoopConf, secMgr)
+            targetDir, sparkConf, hadoopConf)
 
           // SPARK-33748: this mimics the behaviour of Yarn cluster mode. If the driver is running
           // in cluster mode, the archives should be available in the driver's current working
@@ -447,7 +447,7 @@ private[spark] class SparkSubmit extends Logging {
             if (file.exists()) {
               file.toURI.toString
             } else {
-              downloadFile(resource, targetDir, sparkConf, hadoopConf, secMgr)
+              downloadFile(resource, targetDir, sparkConf, hadoopConf)
             }
           case _ => uri.toString
         }
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
index 4f9c497fc3d76..776d9164cdbbe 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
@@ -160,7 +160,6 @@ private[deploy] class DriverRunner(
         driverDesc.jarUrl,
         driverDir,
         conf,
-        securityManager,
         SparkHadoopUtil.get.newConfiguration(conf),
         System.currentTimeMillis(),
         useCache = false)
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
index 7cf961f42112c..61fb92999cfe3 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
@@ -91,8 +91,7 @@ object DriverWrapper extends Logging {
         jarsProp
       }
     }
-    val localJars = DependencyUtils.resolveAndDownloadJars(jars, userJar, sparkConf, hadoopConf,
-      secMgr)
+    val localJars = DependencyUtils.resolveAndDownloadJars(jars, userJar, sparkConf, hadoopConf)
     DependencyUtils.addJarsToClassPath(localJars, loader)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index e7f1b8f3cf17a..c58009c166a60 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -924,7 +924,7 @@ private[spark] class Executor(
         logInfo(s"Fetching $name with timestamp $timestamp")
         // Fetch file with useCache mode, close cache for local mode.
         Utils.fetchFile(name, new File(SparkFiles.getRootDirectory()), conf,
-          env.securityManager, hadoopConf, timestamp, useCache = !isLocal)
+          hadoopConf, timestamp, useCache = !isLocal)
         currentFiles(name) = timestamp
       }
       for ((name, timestamp) <- newArchives if currentArchives.getOrElse(name, -1L) < timestamp) {
@@ -932,7 +932,7 @@ private[spark] class Executor(
         val sourceURI = new URI(name)
         val uriToDownload = UriBuilder.fromUri(sourceURI).fragment(null).build()
         val source = Utils.fetchFile(uriToDownload.toString, Utils.createTempDir(), conf,
-          env.securityManager, hadoopConf, timestamp, useCache = !isLocal, shouldUntar = false)
+          hadoopConf, timestamp, useCache = !isLocal, shouldUntar = false)
         val dest = new File(
           SparkFiles.getRootDirectory(),
           if (sourceURI.getFragment != null) sourceURI.getFragment else source.getName)
@@ -951,7 +951,7 @@ private[spark] class Executor(
           logInfo(s"Fetching $name with timestamp $timestamp")
           // Fetch file with useCache mode, close cache for local mode.
           Utils.fetchFile(name, new File(SparkFiles.getRootDirectory()), conf,
-            env.securityManager, hadoopConf, timestamp, useCache = !isLocal)
+            hadoopConf, timestamp, useCache = !isLocal)
           currentJars(name) = timestamp
           // Add it to our class loader
           val url = new File(SparkFiles.getRootDirectory(), localName).toURI.toURL
diff --git a/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala b/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala
index 0d78af2dafc99..789811fa5f3a4 100644
--- a/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala
@@ -24,7 +24,7 @@ import org.apache.commons.lang3.StringUtils
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path}
 
-import org.apache.spark.{SecurityManager, SparkConf, SparkException}
+import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.deploy.SparkSubmitUtils
 import org.apache.spark.internal.Logging
 
@@ -187,8 +187,7 @@ private[spark] object DependencyUtils extends Logging {
       jars: String,
       userJar: String,
       sparkConf: SparkConf,
-      hadoopConf: Configuration,
-      secMgr: SecurityManager): String = {
+      hadoopConf: Configuration): String = {
     val targetDir = Utils.createTempDir()
     val userJarName = userJar.split(File.separatorChar).last
     Option(jars)
@@ -199,7 +198,7 @@ private[spark] object DependencyUtils extends Logging {
           .mkString(",")
       }
       .filterNot(_ == "")
-      .map(downloadFileList(_, targetDir, sparkConf, hadoopConf, secMgr))
+      .map(downloadFileList(_, targetDir, sparkConf, hadoopConf))
       .orNull
   }
 
@@ -219,18 +218,16 @@ private[spark] object DependencyUtils extends Logging {
    * @param targetDir A temporary directory for which downloaded files.
    * @param sparkConf Spark configuration.
    * @param hadoopConf Hadoop configuration.
-   * @param secMgr Spark security manager.
    * @return A comma separated local files list.
    */
   def downloadFileList(
       fileList: String,
       targetDir: File,
       sparkConf: SparkConf,
-      hadoopConf: Configuration,
-      secMgr: SecurityManager): String = {
+      hadoopConf: Configuration): String = {
     require(fileList != null, "fileList cannot be null.")
     Utils.stringToSeq(fileList)
-      .map(downloadFile(_, targetDir, sparkConf, hadoopConf, secMgr))
+      .map(downloadFile(_, targetDir, sparkConf, hadoopConf))
       .mkString(",")
   }
 
@@ -242,15 +239,13 @@ private[spark] object DependencyUtils extends Logging {
    * @param targetDir A temporary directory for which downloaded files.
    * @param sparkConf Spark configuration.
    * @param hadoopConf Hadoop configuration.
-   * @param secMgr Spark security manager.
    * @return Path to the local file.
    */
   def downloadFile(
       path: String,
       targetDir: File,
       sparkConf: SparkConf,
-      hadoopConf: Configuration,
-      secMgr: SecurityManager): String = {
+      hadoopConf: Configuration): String = {
     require(path != null, "path cannot be null.")
     val uri = Utils.resolveURI(path)
 
@@ -263,8 +258,7 @@ private[spark] object DependencyUtils extends Logging {
         new File(targetDir, file.getName).toURI.toString
       case _ =>
         val fname = new Path(uri).getName()
-        val localFile = Utils.doFetchFile(uri.toString(), targetDir, fname, sparkConf, secMgr,
-          hadoopConf)
+        val localFile = Utils.doFetchFile(uri.toString(), targetDir, fname, sparkConf, hadoopConf)
         localFile.toURI().toString()
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 416fc43dc44aa..5e68dcd9df7fc 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -496,7 +496,6 @@ private[spark] object Utils extends Logging {
       url: String,
       targetDir: File,
       conf: SparkConf,
-      securityMgr: SecurityManager,
       hadoopConf: Configuration,
       timestamp: Long,
       useCache: Boolean,
@@ -525,7 +524,7 @@ private[spark] object Utils extends Logging {
       val cachedFile = new File(localDir, cachedFileName)
       try {
         if (!cachedFile.exists()) {
-          doFetchFile(url, localDir, cachedFileName, conf, securityMgr, hadoopConf)
+          doFetchFile(url, localDir, cachedFileName, conf, hadoopConf)
         }
       } finally {
         lock.release()
@@ -538,7 +537,7 @@ private[spark] object Utils extends Logging {
         conf.getBoolean("spark.files.overwrite", false)
       )
     } else {
-      doFetchFile(url, targetDir, fileName, conf, securityMgr, hadoopConf)
+      doFetchFile(url, targetDir, fileName, conf, hadoopConf)
     }
 
     if (shouldUntar) {
@@ -741,7 +740,6 @@ private[spark] object Utils extends Logging {
       targetDir: File,
       filename: String,
       conf: SparkConf,
-      securityMgr: SecurityManager,
       hadoopConf: Configuration): File = {
     val targetFile = new File(targetDir, filename)
     val uri = new URI(url)
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index c64f1b5814c20..edcebf5fc60dd 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -1121,8 +1121,7 @@ class SparkSubmitSuite
     val sparkConf = new SparkConf(false)
     intercept[IOException] {
       DependencyUtils.downloadFile(
-        "abc:/my/file", Utils.createTempDir(), sparkConf, new Configuration(),
-        new SecurityManager(sparkConf))
+        "abc:/my/file", Utils.createTempDir(), sparkConf, new Configuration())
     }
   }
 
@@ -1132,19 +1131,17 @@ class SparkSubmitSuite
     val tmpDir = Utils.createTempDir()
     updateConfWithFakeS3Fs(hadoopConf)
     intercept[FileNotFoundException] {
-      DependencyUtils.downloadFile("s3a:/no/such/file", tmpDir, sparkConf, hadoopConf,
-        new SecurityManager(sparkConf))
+      DependencyUtils.downloadFile("s3a:/no/such/file", tmpDir, sparkConf, hadoopConf)
     }
   }
 
   test("downloadFile does not download local file") {
     val sparkConf = new SparkConf(false)
-    val secMgr = new SecurityManager(sparkConf)
     // empty path is considered as local file.
     val tmpDir = Files.createTempDirectory("tmp").toFile
-    assert(DependencyUtils.downloadFile("", tmpDir, sparkConf, new Configuration(), secMgr) === "")
-    assert(DependencyUtils.downloadFile("/local/file", tmpDir, sparkConf, new Configuration(),
-      secMgr) === "/local/file")
+    assert(DependencyUtils.downloadFile("", tmpDir, sparkConf, new Configuration()) === "")
+    assert(DependencyUtils.downloadFile(
+      "/local/file", tmpDir, sparkConf, new Configuration()) === "/local/file")
   }
 
   test("download one file to local") {
@@ -1157,8 +1154,7 @@ class SparkSubmitSuite
     val tmpDir = Files.createTempDirectory("tmp").toFile
     updateConfWithFakeS3Fs(hadoopConf)
     val sourcePath = s"s3a://${jarFile.toURI.getPath}"
-    val outputPath = DependencyUtils.downloadFile(sourcePath, tmpDir, sparkConf, hadoopConf,
-      new SecurityManager(sparkConf))
+    val outputPath = DependencyUtils.downloadFile(sourcePath, tmpDir, sparkConf, hadoopConf)
     checkDownloadedFile(sourcePath, outputPath)
     deleteTempOutputFile(outputPath)
   }
@@ -1174,8 +1170,7 @@ class SparkSubmitSuite
     updateConfWithFakeS3Fs(hadoopConf)
     val sourcePaths = Seq("/local/file", s"s3a://${jarFile.toURI.getPath}")
     val outputPaths = DependencyUtils
-      .downloadFileList(sourcePaths.mkString(","), tmpDir, sparkConf, hadoopConf,
-        new SecurityManager(sparkConf))
+      .downloadFileList(sourcePaths.mkString(","), tmpDir, sparkConf, hadoopConf)
       .split(",")
 
     assert(outputPaths.length === sourcePaths.length)
@@ -1189,7 +1184,6 @@ class SparkSubmitSuite
     val fs = File.separator
     val sparkConf = new SparkConf(false)
     val hadoopConf = new Configuration()
-    val secMgr = new SecurityManager(sparkConf)
 
     val appJarName = "myApp.jar"
     val jar1Name = "myJar1.jar"
@@ -1197,8 +1191,7 @@ class SparkSubmitSuite
     val userJar = s"file:/path${fs}to${fs}app${fs}jar$fs$appJarName"
     val jars = s"file:/$jar1Name,file:/$appJarName,file:/$jar2Name"
 
-    val resolvedJars = DependencyUtils
-      .resolveAndDownloadJars(jars, userJar, sparkConf, hadoopConf, secMgr)
+    val resolvedJars = DependencyUtils.resolveAndDownloadJars(jars, userJar, sparkConf, hadoopConf)
 
     assert(!resolvedJars.contains(appJarName))
     assert(resolvedJars.contains(jar1Name) && resolvedJars.contains(jar2Name))
diff --git a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
index d25fd20340d48..bec96e523e9e5 100644
--- a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
@@ -33,7 +33,7 @@ import org.mockito.Mockito.{mock, never, verify, when}
 import org.scalatest.BeforeAndAfterAll
 import org.scalatest.concurrent.Eventually._
 
-import org.apache.spark.{SecurityManager, SparkConf, SparkEnv, SparkException, SparkFunSuite}
+import org.apache.spark.{SparkConf, SparkEnv, SparkException, SparkFunSuite}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.config._
 import org.apache.spark.util.{ThreadUtils, Utils}
@@ -901,7 +901,6 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
           }
         }
 
-        val sm = new SecurityManager(conf)
         val hc = SparkHadoopUtil.get.conf
 
         val files = Seq(
@@ -913,7 +912,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
           (subFile2, dir2Uri + "/file2"))
         files.foreach { case (f, uri) =>
           val destFile = new File(destDir, f.getName())
-          Utils.fetchFile(uri, destDir, conf, sm, hc, 0L, false)
+          Utils.fetchFile(uri, destDir, conf, hc, 0L, false)
           assert(Files.equal(f, destFile))
         }
 
@@ -921,7 +920,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
         Seq("files", "jars", "dir1").foreach { root =>
           intercept[Exception] {
             val uri = env.address.toSparkURL + s"/$root/doesNotExist"
-            Utils.fetchFile(uri, destDir, conf, sm, hc, 0L, false)
+            Utils.fetchFile(uri, destDir, conf, hc, 0L, false)
           }
         }
       }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index 07d8dacf98252..37287fc394647 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -27,7 +27,7 @@ import scala.util.control.NonFatal
 import org.apache.commons.lang3.{JavaVersion, SystemUtils}
 import org.apache.hadoop.conf.Configuration
 
-import org.apache.spark.{SecurityManager, SparkConf, TestUtils}
+import org.apache.spark.{SparkConf, TestUtils}
 import org.apache.spark.internal.config.MASTER_REST_SERVER_ENABLED
 import org.apache.spark.internal.config.UI.UI_ENABLED
 import org.apache.spark.sql.{QueryTest, Row, SparkSession}
@@ -134,7 +134,6 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
     // if the caller passes the name of an existing file, we want doFetchFile to write over it with
     // the contents from the specified url.
     conf.set("spark.files.overwrite", "true")
-    val securityManager = new SecurityManager(conf)
     val hadoopConf = new Configuration
 
     val outDir = new File(targetDir)
@@ -143,7 +142,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
     }
 
     // propagate exceptions up to the caller of getFileFromUrl
-    Utils.doFetchFile(urlString, outDir, filename, conf, securityManager, hadoopConf)
+    Utils.doFetchFile(urlString, outDir, filename, conf, hadoopConf)
   }
 
   private def getStringFromUrl(urlString: String): String = {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 962efa8303f9b..b70afd3e6b98f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -686,7 +686,7 @@ private[hive] object HiveTestJars {
     val fileName = urlString.split("/").last
     val targetFile = new File(hiveTestJarsDir, fileName)
     if (!targetFile.exists()) {
-      Utils.doFetchFile(urlString, hiveTestJarsDir, fileName, new SparkConf, null, null)
+      Utils.doFetchFile(urlString, hiveTestJarsDir, fileName, new SparkConf, null)
     }
     targetFile
   }

From 379afcd2ce872fe1922efa379b0bf7d31b0c3981 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Tue, 29 Dec 2020 06:49:26 +0000
Subject: [PATCH 0899/1009] [SPARK-33924][SQL][TESTS] Preserve partition
 metadata by INSERT INTO in v2 table catalog

### What changes were proposed in this pull request?
For `InMemoryPartitionTable` used in tests, set empty partition metadata only when a partition doesn't exists.

### Why are the changes needed?
This bug fix is needed to use `INSERT INTO .. PARTITION` in other tests.

### Does this PR introduce _any_ user-facing change?
No. It affects only the v2 table catalog used in tests.

### How was this patch tested?
Added new UT to `DataSourceV2SQLSuite`, and run the affected test suite by:
```
$ build/sbt -Phive -Phive-thriftserver "test:testOnly org.apache.spark.sql.connector.DataSourceV2SQLSuite"
```

Closes #30952 from MaxGekk/fix-insert-into-partition-v2.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../connector/InMemoryPartitionTable.scala    |  2 +-
 .../sql/connector/DataSourceV2SQLSuite.scala  | 21 +++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala
index e29c78c59f769..83183a2ef6e2b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala
@@ -84,7 +84,7 @@ class InMemoryPartitionTable(
   }
 
   override protected def addPartitionKey(key: Seq[Any]): Unit = {
-    memoryTablePartitions.put(InternalRow.fromSeq(key), Map.empty[String, String].asJava)
+    memoryTablePartitions.putIfAbsent(InternalRow.fromSeq(key), Map.empty[String, String].asJava)
   }
 
   override def listPartitionIdentifiers(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index ed4ea567e4f65..b8d58217efa6e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -2581,6 +2581,27 @@ class DataSourceV2SQLSuite
       "ALTER VIEW ... AS")
   }
 
+  test("SPARK-33924: INSERT INTO .. PARTITION preserves the partition location") {
+    val t = "testpart.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"""
+        |CREATE TABLE $t (id bigint, city string, data string)
+        |USING foo
+        |PARTITIONED BY (id, city)""".stripMargin)
+      val partTable = catalog("testpart").asTableCatalog
+        .loadTable(Identifier.of(Array("ns1", "ns2"), "tbl")).asInstanceOf[InMemoryPartitionTable]
+
+      val loc = "partition_location"
+      sql(s"ALTER TABLE $t ADD PARTITION (id = 1, city = 'NY') LOCATION '$loc'")
+
+      val ident = InternalRow.fromSeq(Seq(1, UTF8String.fromString("NY")))
+      assert(partTable.loadPartitionMetadata(ident).get("location") === loc)
+
+      sql(s"INSERT INTO $t PARTITION(id = 1, city = 'NY') SELECT 'abc'")
+      assert(partTable.loadPartitionMetadata(ident).get("location") === loc)
+    }
+  }
+
   private def testNotSupportedV2Command(sqlCommand: String, sqlParams: String): Unit = {
     val e = intercept[AnalysisException] {
       sql(s"$sqlCommand $sqlParams")

From f7bdea334a08dcc7c4e8182a807f276baecc3d7d Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Tue, 29 Dec 2020 07:09:11 +0000
Subject: [PATCH 0900/1009] [SPARK-33884][SQL] Simplify CaseWhenclauses with
 (true and false) and (false and true)

### What changes were proposed in this pull request?

This pr simplify `CaseWhen`clauses with (true and false) and (false and true):

Expression | cond.nullable | After simplify
-- | -- | --
case when cond then true else false end | true | cond <=> true
case when cond then true else false end | false | cond
case when cond then false else true end | true | !(cond <=> true)
case when cond then false else true end | false | !cond

### Why are the changes needed?

Improve query performance.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #30898 from wangyum/SPARK-33884.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/optimizer/expressions.scala  |  5 +++
 .../PushFoldableIntoBranchesSuite.scala       | 11 +++++-
 .../optimizer/SimplifyConditionalSuite.scala  | 36 +++++++++++++++++++
 3 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index b2625bddeecf4..6c5dec133d2a7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -486,6 +486,11 @@ object SimplifyConditionals extends Rule[LogicalPlan] with PredicateHelper {
       case If(cond, FalseLiteral, l @ Literal(null, _)) if !cond.nullable => And(Not(cond), l)
       case If(cond, TrueLiteral, l @ Literal(null, _)) if !cond.nullable => Or(cond, l)
 
+      case CaseWhen(Seq((cond, TrueLiteral)), Some(FalseLiteral)) =>
+        if (cond.nullable) EqualNullSafe(cond, TrueLiteral) else cond
+      case CaseWhen(Seq((cond, FalseLiteral)), Some(TrueLiteral)) =>
+        if (cond.nullable) Not(EqualNullSafe(cond, TrueLiteral)) else Not(cond)
+
       case e @ CaseWhen(branches, elseValue) if branches.exists(x => falseOrNullLiteral(x._1)) =>
         // If there are branches that are always false, remove them.
         // If there are no more branches left, just use the else value.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala
index 7c9a67d7554e2..0d5218ac629e3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala
@@ -141,7 +141,7 @@ class PushFoldableIntoBranchesSuite
       CaseWhen(Seq((LessThan(Rand(1), Literal(0.5)), Literal(1))), Some(Literal(2)))
     assert(!nonDeterministic.deterministic)
     assertEquivalent(EqualTo(nonDeterministic, Literal(2)),
-      CaseWhen(Seq((LessThan(Rand(1), Literal(0.5)), FalseLiteral)), Some(TrueLiteral)))
+      GreaterThanOrEqual(Rand(1), Literal(0.5)))
     assertEquivalent(EqualTo(nonDeterministic, Literal(3)),
       CaseWhen(Seq((LessThan(Rand(1), Literal(0.5)), FalseLiteral)), Some(FalseLiteral)))
 
@@ -269,4 +269,13 @@ class PushFoldableIntoBranchesSuite
         Literal.create(null, BooleanType))
     }
   }
+
+  test("SPARK-33884: simplify CaseWhen clauses with (true and false) and (false and true)") {
+    assertEquivalent(
+      EqualTo(CaseWhen(Seq(('a > 10, Literal(0))), Literal(1)), Literal(0)),
+      'a > 10 <=> TrueLiteral)
+    assertEquivalent(
+      EqualTo(CaseWhen(Seq(('a > 10, Literal(0))), Literal(1)), Literal(1)),
+      Not('a > 10 <=> TrueLiteral))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala
index 317984eba2261..f3edd70bcfb12 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala
@@ -243,4 +243,40 @@ class SimplifyConditionalSuite extends PlanTest with ExpressionEvalHelper with P
         Literal.create(null, IntegerType))
     }
   }
+
+  test("SPARK-33884: simplify CaseWhen clauses with (true and false) and (false and true)") {
+    // verify the boolean equivalence of all transformations involved
+    val fields = Seq(
+      'cond.boolean.notNull,
+      'cond_nullable.boolean,
+      'a.boolean,
+      'b.boolean
+    )
+    val Seq(cond, cond_nullable, a, b) = fields.zipWithIndex.map { case (f, i) => f.at(i) }
+
+    val exprs = Seq(
+      // actual expressions of the transformations: original -> transformed
+      CaseWhen(Seq((cond, TrueLiteral)), FalseLiteral) -> cond,
+      CaseWhen(Seq((cond, FalseLiteral)), TrueLiteral) -> !cond,
+      CaseWhen(Seq((cond_nullable, TrueLiteral)), FalseLiteral) -> (cond_nullable <=> true),
+      CaseWhen(Seq((cond_nullable, FalseLiteral)), TrueLiteral) -> (!(cond_nullable <=> true)))
+
+    // check plans
+    for ((originalExpr, expectedExpr) <- exprs) {
+      assertEquivalent(originalExpr, expectedExpr)
+    }
+
+    // check evaluation
+    val binaryBooleanValues = Seq(true, false)
+    val ternaryBooleanValues = Seq(true, false, null)
+    for (condVal <- binaryBooleanValues;
+         condNullableVal <- ternaryBooleanValues;
+         aVal <- ternaryBooleanValues;
+         bVal <- ternaryBooleanValues;
+         (originalExpr, expectedExpr) <- exprs) {
+      val inputRow = create_row(condVal, condNullableVal, aVal, bVal)
+      val optimizedVal = evaluateWithoutCodegen(expectedExpr, inputRow)
+      checkEvaluation(originalExpr, optimizedVal, inputRow)
+    }
+  }
 }

From 1ef7ddd38aa28dcd8166a60a485c722c5a8ded7a Mon Sep 17 00:00:00 2001
From: "yi.wu" <yi.wu@databricks.com>
Date: Tue, 29 Dec 2020 07:35:45 +0000
Subject: [PATCH 0901/1009] [SPARK-33928][SPARK-23365][TEST][CORE] Fix flaky
 o.a.s.ExecutorAllocationManagerSuite - " Don't update target num executors
 when killing idle executors"

### What changes were proposed in this pull request?

Use the testing mode for the test to fix the flaky.

### Why are the changes needed?

The test is flaky:

```scala
[info] - SPARK-23365 Don't update target num executors when killing idle executors *** FAILED *** (126 milliseconds)
[info] 1 did not equal 2 (ExecutorAllocationManagerSuite.scala:1615)
[info] org.scalatest.exceptions.TestFailedException:
[info] at org.scalatest.Assertions.newAssertionFailedException(Assertions.scala:530)
[info] at org.scalatest.Assertions.newAssertionFailedException$(Assertions.scala:529)
[info] at org.scalatest.FunSuite.newAssertionFailedException(FunSuite.scala:1560)
[info] at org.scalatest.Assertions$AssertionsHelper.macroAssert(Assertions.scala:503)
[info] at org.apache.spark.ExecutorAllocationManagerSuite.$anonfun$new$84(ExecutorAllocationManagerSuite.scala:1617)
...
```
The root cause should be the same as https://github.com/apache/spark/pull/29773 since the test run under non-testing mode.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manually checked. Flaky is gone by running the test hundreds of times after this fix.

Closes #30956 from Ngone51/fix-flaky-SPARK-23365.

Authored-by: yi.wu <yi.wu@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/ExecutorAllocationManagerSuite.scala | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
index c1269a9c91049..5ae596b03d5fe 100644
--- a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
@@ -1588,7 +1588,7 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
   test("SPARK-23365 Don't update target num executors when killing idle executors") {
     val clock = new ManualClock()
     val manager = createManager(
-      createConf(1, 2, 1).set(config.DYN_ALLOCATION_TESTING, false),
+      createConf(1, 2, 1),
       clock = clock)
 
     when(client.requestTotalExecutors(any(), any(), any())).thenReturn(true)
@@ -1616,19 +1616,17 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
     clock.advance(1000)
     manager invokePrivate _updateAndSyncNumExecutorsTarget(clock.nanoTime())
     assert(numExecutorsTargetForDefaultProfileId(manager) === 1)
-    verify(client, never).killExecutors(any(), any(), any(), any())
+    assert(manager.executorMonitor.executorsPendingToRemove().isEmpty)
 
     // now we cross the idle timeout for executor-1, so we kill it.  the really important
     // thing here is that we do *not* ask the executor allocation client to adjust the target
     // number of executors down
-    when(client.killExecutors(Seq("executor-1"), false, false, false))
-      .thenReturn(Seq("executor-1"))
     clock.advance(3000)
     schedule(manager)
     assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) === 1)
     assert(numExecutorsTargetForDefaultProfileId(manager) === 1)
     // here's the important verify -- we did kill the executors, but did not adjust the target count
-    verify(client).killExecutors(Seq("executor-1"), false, false, false)
+    assert(manager.executorMonitor.executorsPendingToRemove() === Set("executor-1"))
   }
 
   test("SPARK-26758 check executor target number after idle time out ") {

From 2627825647c32fab79cf356917ad99d3ff668a9b Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 29 Dec 2020 20:51:57 +0900
Subject: [PATCH 0902/1009] [SPARK-33931][INFRA] Recover GitHub Action
 `build_and_test` job

### What changes were proposed in this pull request?

This PR aims to recover GitHub Action `build_and_test` job.

### Why are the changes needed?

Currently, `build_and_test` job fails to start because of  the following in master/branch-3.1 at least.
```
r-lib/actions/setup-rv1 is not allowed to be used in apache/spark.
Actions in this workflow must be: created by GitHub, verified in the GitHub Marketplace,
within a repository owned by apache or match the following:
adoptopenjdk/*, apache/*, gradle/wrapper-validation-action.
```
- https://github.com/apache/spark/actions/runs/449826457

![Screen Shot 2020-12-28 at 10 06 11 PM](https://user-images.githubusercontent.com/9700541/103262174-f1f13a80-4958-11eb-8ceb-631527155775.png)

### Does this PR introduce _any_ user-facing change?

No. This is a test infra.

### How was this patch tested?

To check GitHub Action `build_and_test` job on this PR.

Closes #30959 from dongjoon-hyun/SPARK-33931.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .github/workflows/build_and_test.yml | 32 +++++++++++-----------------
 1 file changed, 13 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 0048bc7ffba0d..2011104a19b8a 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -285,6 +285,8 @@ jobs:
   lint:
     name: Linters, licenses, dependencies and documentation generation
     runs-on: ubuntu-20.04
+    container:
+      image: dongjoon/apache-spark-github-action-image:20201025
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v2
@@ -315,10 +317,6 @@ jobs:
         key: docs-maven-${{ hashFiles('**/pom.xml') }}
         restore-keys: |
           docs-maven-
-    - name: Install Java 8
-      uses: actions/setup-java@v1
-      with:
-        java-version: 8
     - name: Install Python 3.6
       uses: actions/setup-python@v2
       with:
@@ -328,30 +326,24 @@ jobs:
       run: |
         # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
         #   See also https://github.com/sphinx-doc/sphinx/issues/7551.
-        pip3 install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme ipython nbsphinx mypy numpydoc
-    - name: Install R 4.0
-      uses: r-lib/actions/setup-r@v1
-      with:
-        r-version: 4.0
+        python3.6 -m pip install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme ipython nbsphinx mypy numpydoc
     - name: Install R linter dependencies and SparkR
       run: |
-        sudo apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev
-        sudo Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')"
-        sudo Rscript -e "devtools::install_github('jimhester/lintr@v2.0.0')"
+        apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev
+        Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')"
+        Rscript -e "devtools::install_github('jimhester/lintr@v2.0.0')"
         ./R/install-dev.sh
-    - name: Install Ruby 2.7 for documentation generation
-      uses: actions/setup-ruby@v1
-      with:
-        ruby-version: 2.7
     - name: Install dependencies for documentation generation
       run: |
         # pandoc is required to generate PySpark APIs as well in nbsphinx.
-        sudo apt-get install -y libcurl4-openssl-dev pandoc
+        apt-get install -y libcurl4-openssl-dev pandoc
         # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
         #   See also https://github.com/sphinx-doc/sphinx/issues/7551.
-        pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx numpydoc
+        python3.6 -m pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx numpydoc
+        apt-get update -y
+        apt-get install -y ruby ruby-dev
         gem install jekyll jekyll-redirect-from rouge
-        sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"
+        Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"
     - name: Scala linter
       run: ./dev/lint-scala
     - name: Java linter
@@ -367,6 +359,8 @@ jobs:
     - name: Run documentation build
       run: |
         cd docs
+        export LC_ALL=C.UTF-8
+        export LANG=C.UTF-8
         jekyll build
 
   java-11:

From f9fe74244245a066d023783a1af3614824c79bd7 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Tue, 29 Dec 2020 21:37:17 +0900
Subject: [PATCH 0903/1009] [SPARK-32968][SQL] Prune unnecessary columns from
 CsvToStructs

### What changes were proposed in this pull request?

This patch proposes to do column pruning for CsvToStructs expression if we only require some fields from it.

### Why are the changes needed?

`CsvToStructs` takes a schema parameter used to tell CSV Parser what fields are needed to parse. If `CsvToStructs` is followed by GetStructField. We can prune the schema to only parse certain field.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Unit test

Closes #30912 from viirya/SPARK-32968.

Lead-authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../catalyst/expressions/csvExpressions.scala |  12 +-
 .../optimizer/OptimizeCsvJsonExprs.scala      | 120 ++++++++++++++++++
 .../optimizer/OptimizeJsonExprs.scala         |  96 --------------
 .../sql/catalyst/optimizer/Optimizer.scala    |   2 +-
 .../apache/spark/sql/internal/SQLConf.scala   |  10 ++
 .../optimizer/OptimizeCsvExprsSuite.scala     |  83 ++++++++++++
 .../optimizer/OptimizeJsonExprsSuite.scala    |   2 +-
 .../apache/spark/sql/CsvFunctionsSuite.scala  |  48 +++++++
 8 files changed, 272 insertions(+), 101 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvJsonExprs.scala
 delete mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvExprsSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
index 8978d55b98251..d987704b269f0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
@@ -51,7 +51,8 @@ case class CsvToStructs(
     schema: StructType,
     options: Map[String, String],
     child: Expression,
-    timeZoneId: Option[String] = None)
+    timeZoneId: Option[String] = None,
+    requiredSchema: Option[StructType] = None)
   extends UnaryExpression
     with TimeZoneAwareExpression
     with CodegenFallback
@@ -113,7 +114,12 @@ case class CsvToStructs(
 
     val actualSchema =
       StructType(nullableSchema.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord))
-    val rawParser = new UnivocityParser(actualSchema, actualSchema, parsedOptions)
+    val actualRequiredSchema =
+      StructType(requiredSchema.map(_.asNullable).getOrElse(nullableSchema)
+        .filterNot(_.name == parsedOptions.columnNameOfCorruptRecord))
+    val rawParser = new UnivocityParser(actualSchema,
+      actualRequiredSchema,
+      parsedOptions)
     new FailureSafeParser[String](
       input => rawParser.parse(input),
       mode,
@@ -121,7 +127,7 @@ case class CsvToStructs(
       parsedOptions.columnNameOfCorruptRecord)
   }
 
-  override def dataType: DataType = nullableSchema
+  override def dataType: DataType = requiredSchema.getOrElse(schema).asNullable
 
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = {
     copy(timeZoneId = Option(timeZoneId))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvJsonExprs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvJsonExprs.scala
new file mode 100644
index 0000000000000..9c32f8be736a4
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvJsonExprs.scala
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{ArrayType, StructType}
+
+/**
+ * Simplify redundant csv/json related expressions.
+ *
+ * The optimization includes:
+ * 1. JsonToStructs(StructsToJson(child)) => child.
+ * 2. Prune unnecessary columns from GetStructField/GetArrayStructFields + JsonToStructs.
+ * 3. CreateNamedStruct(JsonToStructs(json).col1, JsonToStructs(json).col2, ...) =>
+ *      If(IsNull(json), nullStruct, KnownNotNull(JsonToStructs(prunedSchema, ..., json)))
+ *      if JsonToStructs(json) is shared among all fields of CreateNamedStruct. `prunedSchema`
+ *      contains all accessed fields in original CreateNamedStruct.
+ * 4. Prune unnecessary columns from GetStructField + CsvToStructs.
+ */
+object OptimizeCsvJsonExprs extends Rule[LogicalPlan] {
+  private def nameOfCorruptRecord = SQLConf.get.getConf(SQLConf.COLUMN_NAME_OF_CORRUPT_RECORD)
+
+  override def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+    case p =>
+      val optimized = if (SQLConf.get.jsonExpressionOptimization) {
+        p.transformExpressions(jsonOptimization)
+      } else {
+        p
+      }
+
+      if (SQLConf.get.csvExpressionOptimization) {
+        optimized.transformExpressions(csvOptimization)
+      } else {
+        optimized
+      }
+  }
+
+  private val jsonOptimization: PartialFunction[Expression, Expression] = {
+    case c: CreateNamedStruct
+        // If we create struct from various fields of the same `JsonToStructs`.
+        if c.valExprs.forall { v =>
+          v.isInstanceOf[GetStructField] &&
+            v.asInstanceOf[GetStructField].child.isInstanceOf[JsonToStructs] &&
+            v.children.head.semanticEquals(c.valExprs.head.children.head)
+        } =>
+      val jsonToStructs = c.valExprs.map(_.children.head)
+      val sameFieldName = c.names.zip(c.valExprs).forall {
+        case (name, valExpr: GetStructField) =>
+          name.toString == valExpr.childSchema(valExpr.ordinal).name
+        case _ => false
+      }
+
+      // Although `CreateNamedStruct` allows duplicated field names, e.g. "a int, a int",
+      // `JsonToStructs` does not support parsing json with duplicated field names.
+      val duplicateFields = c.names.map(_.toString).distinct.length != c.names.length
+
+      // If we create struct from various fields of the same `JsonToStructs` and we don't
+      // alias field names and there is no duplicated field in the struct.
+      if (sameFieldName && !duplicateFields) {
+        val fromJson = jsonToStructs.head.asInstanceOf[JsonToStructs].copy(schema = c.dataType)
+        val nullFields = c.children.grouped(2).flatMap {
+          case Seq(name, value) => Seq(name, Literal(null, value.dataType))
+        }.toSeq
+
+        If(IsNull(fromJson.child), c.copy(children = nullFields), KnownNotNull(fromJson))
+      } else {
+        c
+      }
+
+    case jsonToStructs @ JsonToStructs(_, options1,
+      StructsToJson(options2, child, timeZoneId2), timeZoneId1)
+        if options1.isEmpty && options2.isEmpty && timeZoneId1 == timeZoneId2 &&
+          jsonToStructs.dataType == child.dataType =>
+      // `StructsToJson` only fails when `JacksonGenerator` encounters data types it
+      // cannot convert to JSON. But `StructsToJson.checkInputDataTypes` already
+      // verifies its child's data types is convertible to JSON. But in
+      // `StructsToJson(JsonToStructs(...))` case, we cannot verify input json string
+      // so `JsonToStructs` might throw error in runtime. Thus we cannot optimize
+      // this case similarly.
+      child
+
+    case g @ GetStructField(j @ JsonToStructs(schema: StructType, _, _, _), ordinal, _)
+        if schema.length > 1 =>
+      val prunedSchema = StructType(Seq(schema(ordinal)))
+      g.copy(child = j.copy(schema = prunedSchema), ordinal = 0)
+
+    case g @ GetArrayStructFields(j @ JsonToStructs(schema: ArrayType, _, _, _), _, _, _, _)
+        if schema.elementType.asInstanceOf[StructType].length > 1 =>
+      val prunedSchema = ArrayType(StructType(Seq(g.field)), g.containsNull)
+      g.copy(child = j.copy(schema = prunedSchema), ordinal = 0, numFields = 1)
+  }
+
+  private val csvOptimization: PartialFunction[Expression, Expression] = {
+    case g @ GetStructField(c @ CsvToStructs(schema: StructType, _, _, _, None), ordinal, _)
+        if schema.length > 1 && c.options.isEmpty && schema(ordinal).name != nameOfCorruptRecord =>
+        // When the parse mode is permissive, and corrupt column is not selected, we can prune here
+        // from `GetStructField`. To be more conservative, it does not optimize when any option
+        // is set.
+      val prunedSchema = StructType(Seq(schema(ordinal)))
+      g.copy(child = c.copy(requiredSchema = Some(prunedSchema)), ordinal = 0)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala
deleted file mode 100644
index ce86d8cdd4999..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprs.scala
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.optimizer
-
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{ArrayType, StructType}
-
-/**
- * Simplify redundant json related expressions.
- *
- * The optimization includes:
- * 1. JsonToStructs(StructsToJson(child)) => child.
- * 2. Prune unnecessary columns from GetStructField/GetArrayStructFields + JsonToStructs.
- * 3. CreateNamedStruct(JsonToStructs(json).col1, JsonToStructs(json).col2, ...) =>
- *      If(IsNull(json), nullStruct, KnownNotNull(JsonToStructs(prunedSchema, ..., json)))
- *      if JsonToStructs(json) is shared among all fields of CreateNamedStruct. `prunedSchema`
- *      contains all accessed fields in original CreateNamedStruct.
- */
-object OptimizeJsonExprs extends Rule[LogicalPlan] {
-  override def apply(plan: LogicalPlan): LogicalPlan = plan transform {
-    case p if SQLConf.get.jsonExpressionOptimization => p.transformExpressions {
-
-      case c: CreateNamedStruct
-          // If we create struct from various fields of the same `JsonToStructs`.
-          if c.valExprs.forall { v =>
-            v.isInstanceOf[GetStructField] &&
-              v.asInstanceOf[GetStructField].child.isInstanceOf[JsonToStructs] &&
-              v.children.head.semanticEquals(c.valExprs.head.children.head)
-          } =>
-        val jsonToStructs = c.valExprs.map(_.children.head)
-        val sameFieldName = c.names.zip(c.valExprs).forall {
-          case (name, valExpr: GetStructField) =>
-            name.toString == valExpr.childSchema(valExpr.ordinal).name
-          case _ => false
-        }
-
-        // Although `CreateNamedStruct` allows duplicated field names, e.g. "a int, a int",
-        // `JsonToStructs` does not support parsing json with duplicated field names.
-        val duplicateFields = c.names.map(_.toString).distinct.length != c.names.length
-
-        // If we create struct from various fields of the same `JsonToStructs` and we don't
-        // alias field names and there is no duplicated field in the struct.
-        if (sameFieldName && !duplicateFields) {
-          val fromJson = jsonToStructs.head.asInstanceOf[JsonToStructs].copy(schema = c.dataType)
-          val nullFields = c.children.grouped(2).flatMap {
-            case Seq(name, value) => Seq(name, Literal(null, value.dataType))
-          }.toSeq
-
-          If(IsNull(fromJson.child), c.copy(children = nullFields), KnownNotNull(fromJson))
-        } else {
-          c
-        }
-
-      case jsonToStructs @ JsonToStructs(_, options1,
-        StructsToJson(options2, child, timeZoneId2), timeZoneId1)
-          if options1.isEmpty && options2.isEmpty && timeZoneId1 == timeZoneId2 &&
-            jsonToStructs.dataType == child.dataType =>
-        // `StructsToJson` only fails when `JacksonGenerator` encounters data types it
-        // cannot convert to JSON. But `StructsToJson.checkInputDataTypes` already
-        // verifies its child's data types is convertible to JSON. But in
-        // `StructsToJson(JsonToStructs(...))` case, we cannot verify input json string
-        // so `JsonToStructs` might throw error in runtime. Thus we cannot optimize
-        // this case similarly.
-        child
-
-      case g @ GetStructField(j @ JsonToStructs(schema: StructType, _, _, _), ordinal, _)
-          if schema.length > 1 =>
-        val prunedSchema = StructType(Seq(schema(ordinal)))
-        g.copy(child = j.copy(schema = prunedSchema), ordinal = 0)
-
-      case g @ GetArrayStructFields(j @ JsonToStructs(schema: ArrayType, _, _, _), _, _, _, _)
-          if schema.elementType.asInstanceOf[StructType].length > 1 =>
-        val prunedSchema = ArrayType(StructType(Seq(g.field)), g.containsNull)
-        g.copy(child = j.copy(schema = prunedSchema), ordinal = 0, numFields = 1)
-
-    }
-  }
-}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 7b9b99bba5574..47260cfb59bb1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -114,7 +114,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
         RemoveNoopOperators,
         OptimizeUpdateFields,
         SimplifyExtractValueOps,
-        OptimizeJsonExprs,
+        OptimizeCsvJsonExprs,
         CombineConcats) ++
         extendedOperatorOptimizationRules
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index d14d136a81e7f..6fcab887dd6af 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1631,6 +1631,14 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  val CSV_EXPRESSION_OPTIMIZATION =
+    buildConf("spark.sql.optimizer.enableCsvExpressionOptimization")
+      .doc("Whether to optimize CSV expressions in SQL optimizer. It includes pruning " +
+        "unnecessary columns from from_csv.")
+      .version("3.2.0")
+      .booleanConf
+      .createWithDefault(true)
+
   val FILE_SINK_LOG_DELETION = buildConf("spark.sql.streaming.fileSink.log.deletion")
     .internal()
     .doc("Whether to delete the expired log files in file stream sink.")
@@ -3489,6 +3497,8 @@ class SQLConf extends Serializable with Logging {
 
   def jsonExpressionOptimization: Boolean = getConf(SQLConf.JSON_EXPRESSION_OPTIMIZATION)
 
+  def csvExpressionOptimization: Boolean = getConf(SQLConf.CSV_EXPRESSION_OPTIMIZATION)
+
   def parallelFileListingInStatsComputation: Boolean =
     getConf(SQLConf.PARALLEL_FILE_LISTING_IN_STATS_COMPUTATION)
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvExprsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvExprsSuite.scala
new file mode 100644
index 0000000000000..9b208cf2b57c4
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvExprsSuite.scala
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+
+class OptimizeCsvExprsSuite extends PlanTest with ExpressionEvalHelper {
+
+  private var csvExpressionOptimizeEnabled: Boolean = _
+  protected override def beforeAll(): Unit = {
+    csvExpressionOptimizeEnabled = SQLConf.get.csvExpressionOptimization
+  }
+
+  protected override def afterAll(): Unit = {
+    SQLConf.get.setConf(SQLConf.CSV_EXPRESSION_OPTIMIZATION, csvExpressionOptimizeEnabled)
+  }
+
+  object Optimizer extends RuleExecutor[LogicalPlan] {
+    val batches = Batch("Csv optimization", FixedPoint(10), OptimizeCsvJsonExprs) :: Nil
+  }
+
+  val schema = StructType.fromDDL("a int, b int")
+
+  private val csvAttr = 'csv.string
+  private val testRelation = LocalRelation(csvAttr)
+
+  test("SPARK-32968: prune unnecessary columns from GetStructField + from_csv") {
+    val options = Map.empty[String, String]
+
+    val query1 = testRelation
+      .select(GetStructField(CsvToStructs(schema, options, 'csv), 0))
+    val optimized1 = Optimizer.execute(query1.analyze)
+
+    val prunedSchema1 = StructType.fromDDL("a int")
+    val expected1 = testRelation
+      .select(GetStructField(CsvToStructs(schema, options, 'csv, None, Some(prunedSchema1)), 0))
+      .analyze
+    comparePlans(optimized1, expected1)
+
+    val query2 = testRelation
+      .select(GetStructField(CsvToStructs(schema, options, 'csv), 1))
+    val optimized2 = Optimizer.execute(query2.analyze)
+
+    val prunedSchema2 = StructType.fromDDL("b int")
+    val expected2 = testRelation
+      .select(GetStructField(CsvToStructs(schema, options, 'csv, None, Some(prunedSchema2)), 0))
+      .analyze
+    comparePlans(optimized2, expected2)
+  }
+
+  test("SPARK-32968: don't prune columns if options is not empty") {
+    val options = Map("mode" -> "failfast")
+
+    val query = testRelation
+      .select(GetStructField(CsvToStructs(schema, options, 'csv), 0))
+    val optimized = Optimizer.execute(query.analyze)
+
+    val expected = query.analyze
+    comparePlans(optimized, expected)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
index 4129a37eb69a2..05d47706ba297 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
@@ -39,7 +39,7 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper {
   }
 
   object Optimizer extends RuleExecutor[LogicalPlan] {
-    val batches = Batch("Json optimization", FixedPoint(10), OptimizeJsonExprs) :: Nil
+    val batches = Batch("Json optimization", FixedPoint(10), OptimizeCsvJsonExprs) :: Nil
   }
 
   val schema = StructType.fromDDL("a int, b int")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
index abccaf19084b2..16b92d6d11c91 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
@@ -250,4 +250,52 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
          | """.stripMargin)
     checkAnswer(toDF("yyyy-MM-dd'T'HH:mm:ss.SSSXXX"), toDF("yyyy-MM-dd'T'HH:mm:ss[.SSS][XXX]"))
   }
+
+  test("SPARK-32968: Pruning csv field should not change result") {
+    Seq("true", "false").foreach { enabled =>
+      withSQLConf(SQLConf.CSV_EXPRESSION_OPTIMIZATION.key -> enabled) {
+        val df1 = sparkContext.parallelize(Seq("a,b")).toDF("csv")
+          .selectExpr("from_csv(csv, 'a string, b string', map('mode', 'failfast')) as parsed")
+        checkAnswer(df1.selectExpr("parsed.a"), Seq(Row("a")))
+        checkAnswer(df1.selectExpr("parsed.b"), Seq(Row("b")))
+
+        val df2 = sparkContext.parallelize(Seq("a,b")).toDF("csv")
+          .selectExpr("from_csv(csv, 'a string, b string') as parsed")
+        checkAnswer(df2.selectExpr("parsed.a"), Seq(Row("a")))
+        checkAnswer(df2.selectExpr("parsed.b"), Seq(Row("b")))
+      }
+    }
+  }
+
+  test("SPARK-32968: bad csv input with csv pruning optimization") {
+    Seq("true", "false").foreach { enabled =>
+      withSQLConf(SQLConf.CSV_EXPRESSION_OPTIMIZATION.key -> enabled) {
+        val df = sparkContext.parallelize(Seq("1,\u0001\u0000\u0001234")).toDF("csv")
+          .selectExpr("from_csv(csv, 'a int, b int', map('mode', 'failfast')) as parsed")
+
+        val err1 = intercept[SparkException] {
+          df.selectExpr("parsed.a").collect
+        }
+
+        val err2 = intercept[SparkException] {
+          df.selectExpr("parsed.b").collect
+        }
+
+        assert(err1.getMessage.contains("Malformed records are detected in record parsing"))
+        assert(err2.getMessage.contains("Malformed records are detected in record parsing"))
+      }
+    }
+  }
+
+  test("SPARK-32968: csv pruning optimization with corrupt record field") {
+    Seq("true", "false").foreach { enabled =>
+      withSQLConf(SQLConf.CSV_EXPRESSION_OPTIMIZATION.key -> enabled) {
+        val df = sparkContext.parallelize(Seq("a,b,c,d")).toDF("csv")
+          .selectExpr("from_csv(csv, 'a string, b string, _corrupt_record string') as parsed")
+          .selectExpr("parsed._corrupt_record")
+
+        checkAnswer(df, Seq(Row("a,b,c,d")))
+      }
+    }
+  }
 }

From 9d6dbe0fe5f06c8ca18775fefc5c7f59e8b64f5c Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Tue, 29 Dec 2020 21:42:00 +0900
Subject: [PATCH 0904/1009] [SPARK-33775][FOLLOWUP][TEST-MAVEN][BUILD] Suppress
 maven compilation warnings in Scala 2.13
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
This pr is followup of SPARK-33775, the main change of this pr this sync suppression rules from `SparkBuild.scala` to `pom.xml` to let maven build have the same suppression ability for compilation warnings in Scala 2.13

### Why are the changes needed?
Suppress unimportant compilation warnings in Scala 2.13 with maven build.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

- Pass the Jenkins or GitHub Action
- Local manual test：The suppressed compilation warnings are no longer printed to the console.

Closes #30951 from LuciferYang/SPARK-33775-FOLLOWUP.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 pom.xml | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/pom.xml b/pom.xml
index 609c9fc0ab0c3..39ce502ab0e3f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3277,6 +3277,15 @@
                   <arg>-Wconf:cat=other-match-analysis&amp;site=org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupFunction.catalogFunction:wv</arg>
                   <arg>-Wconf:cat=other-pure-statement&amp;site=org.apache.spark.streaming.util.FileBasedWriteAheadLog.readAll.readFile:wv</arg>
                   <arg>-Wconf:cat=other-pure-statement&amp;site=org.apache.spark.scheduler.OutputCommitCoordinatorSuite.&lt;local OutputCommitCoordinatorSuite&gt;.futureAction:wv</arg>
+                  <!--
+                    SPARK-33775 Suppress compilation warnings that contain the following contents.
+                    TODO(SPARK-33805): Undo the corresponding deprecated usage suppression rule after fixed.
+                  -->
+                  <arg>-Wconf:msg=^(?=.*?method|value|type|object|trait|inheritance)(?=.*?deprecated)(?=.*?since 2.13).+$:s</arg>
+                  <arg>-Wconf:msg=^(?=.*?Widening conversion from)(?=.*?is deprecated because it loses precision).+$:s</arg>
+                  <arg>-Wconf:msg=Auto-application to \`\(\)\` is deprecated:s</arg>
+                  <arg>-Wconf:msg=method with a single empty parameter list overrides method without any parameter list:s</arg>
+                  <arg>-Wconf:msg=method without a parameter list overrides a method with a single empty one:s</arg>
                 </args>
                 <compilerPlugins combine.self="override">
                 </compilerPlugins>

From e0d2ffec3109d973b106adeab5de5ce0c91a4a68 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Tue, 29 Dec 2020 13:29:48 +0000
Subject: [PATCH 0905/1009] [SPARK-33859][SQL] Support V2 ALTER TABLE .. RENAME
 PARTITION

### What changes were proposed in this pull request?
1. Add `renamePartition()` to the `SupportsPartitionManagement`
2. Implement `renamePartition()` in `InMemoryPartitionTable`
3. Add v2 execution node `AlterTableRenamePartitionExec`
4. Resolve the logical node `AlterTableRenamePartition` to `AlterTableRenamePartitionExec` for v2 tables that support `SupportsPartitionManagement`
5. Move v1 tests to the base suite `org.apache.spark.sql.execution.command.AlterTableRenamePartitionSuiteBase` to run them for v2 table catalogs.

### Why are the changes needed?
To have feature parity with Datasource V1.

### Does this PR introduce _any_ user-facing change?
Yes

### How was this patch tested?
By running the unified tests:
```
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableRenamePartitionSuite"
```

Closes #30935 from MaxGekk/alter-table-rename-partition-v2.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalog/SupportsPartitionManagement.java  |  17 +++
 .../analysis/ResolvePartitionSpec.scala       |   9 +-
 .../sql/catalyst/parser/AstBuilder.scala      |   2 +-
 .../catalyst/plans/logical/v2Commands.scala   |   6 +-
 .../connector/InMemoryPartitionTable.scala    |  13 ++
 .../spark/sql/connector/InMemoryTable.scala   |  22 +++
 .../SupportsPartitionManagementSuite.scala    |  19 +++
 .../analysis/ResolveSessionCatalog.scala      |   9 +-
 .../v2/AlterTableRenamePartitionExec.scala    |  39 +++++
 .../datasources/v2/DataSourceV2Strategy.scala |   9 +-
 .../AlterTableAddPartitionSuiteBase.scala     |   3 -
 ...AlterTableRenamePartitionParserSuite.scala |   4 +-
 .../AlterTableRenamePartitionSuiteBase.scala  | 130 ++++++++++++++++-
 .../command/DDLCommandTestUtils.scala         |   3 +
 .../v1/AlterTableAddPartitionSuite.scala      |  16 ---
 .../v1/AlterTableRenamePartitionSuite.scala   | 136 +-----------------
 .../command/v1/CommandSuiteBase.scala         |  17 +++
 .../v2/AlterTableAddPartitionSuite.scala      |  27 ----
 .../v2/AlterTableRenamePartitionSuite.scala   |  22 +--
 .../command/v2/CommandSuiteBase.scala         |  27 +++-
 .../execution/command/CommandSuiteBase.scala  |  17 +++
 21 files changed, 343 insertions(+), 204 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableRenamePartitionExec.scala

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java
index 409ab3f5f9335..a7008293a3e19 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java
@@ -139,4 +139,21 @@ Map<String, String> loadPartitionMetadata(InternalRow ident)
      * @return an array of Identifiers for the partitions
      */
     InternalRow[] listPartitionIdentifiers(String[] names, InternalRow ident);
+
+    /**
+     * Rename an existing partition of the table.
+     *
+     * @param from an existing partition identifier to rename
+     * @param to new partition identifier
+     * @return true if renaming completes successfully otherwise false
+     * @throws UnsupportedOperationException If partition renaming is not supported
+     * @throws PartitionAlreadyExistsException If the `to` partition exists already
+     * @throws NoSuchPartitionException If the `from` partition does not exist
+     */
+    default boolean renamePartition(InternalRow from, InternalRow to)
+        throws UnsupportedOperationException,
+               PartitionAlreadyExistsException,
+               NoSuchPartitionException {
+      throw new UnsupportedOperationException("Partition renaming is not supported");
+    }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
index 2c2bea6f89d49..84be3f294a6ea 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
@@ -52,13 +52,14 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] {
         requireExactMatchedPartitionSpec(table.name, _, partitionSchema.fieldNames)))
 
     case r @ AlterTableRenamePartition(
-        ResolvedTable(_, _, table: SupportsPartitionManagement), from, _) =>
+        ResolvedTable(_, _, table: SupportsPartitionManagement), from, to) =>
       val partitionSchema = table.partitionSchema()
-      r.copy(from = resolvePartitionSpecs(
+      val Seq(resolvedFrom, resolvedTo) = resolvePartitionSpecs(
         table.name,
-        Seq(from),
+        Seq(from, to),
         partitionSchema,
-        requireExactMatchedPartitionSpec(table.name, _, partitionSchema.fieldNames)).head)
+        requireExactMatchedPartitionSpec(table.name, _, partitionSchema.fieldNames))
+      r.copy(from = resolvedFrom, to = resolvedTo)
 
     case r @ ShowPartitions(ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs) =>
       r.copy(pattern = resolvePartitionSpecs(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index c5707812e44bb..771bb5a1708b0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3845,7 +3845,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
         ctx.multipartIdentifier,
         "ALTER TABLE ... RENAME TO PARTITION"),
       UnresolvedPartitionSpec(visitNonOptionalPartitionSpec(ctx.from)),
-      visitNonOptionalPartitionSpec(ctx.to))
+      UnresolvedPartitionSpec(visitNonOptionalPartitionSpec(ctx.to)))
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index 02fb3a86db5d5..c51291d370c80 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -694,9 +694,11 @@ case class AlterTableDropPartition(
 case class AlterTableRenamePartition(
     child: LogicalPlan,
     from: PartitionSpec,
-    to: TablePartitionSpec) extends Command {
+    to: PartitionSpec) extends Command {
   override lazy val resolved: Boolean =
-    childrenResolved && from.isInstanceOf[ResolvedPartitionSpec]
+    childrenResolved &&
+      from.isInstanceOf[ResolvedPartitionSpec] &&
+      to.isInstanceOf[ResolvedPartitionSpec]
 
   override def children: Seq[LogicalPlan] = child :: Nil
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala
index 83183a2ef6e2b..a3d610af2c06d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala
@@ -107,4 +107,17 @@ class InMemoryPartitionTable(
       currentRow == ident
     }.toArray
   }
+
+  override def renamePartition(from: InternalRow, to: InternalRow): Boolean = {
+    if (memoryTablePartitions.containsKey(to)) {
+      throw new PartitionAlreadyExistsException(name, to, partitionSchema)
+    } else {
+      val partValue = memoryTablePartitions.remove(from)
+      if (partValue == null) {
+        throw new NoSuchPartitionException(name, from, partitionSchema)
+      }
+      memoryTablePartitions.put(to, partValue) == null &&
+        renamePartitionKey(partitionSchema, from.toSeq(schema), to.toSeq(schema))
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
index c4c5835d9d1f5..201d67a815bea 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
@@ -165,6 +165,28 @@ class InMemoryTable(
 
   protected def addPartitionKey(key: Seq[Any]): Unit = {}
 
+  protected def renamePartitionKey(
+      partitionSchema: StructType,
+      from: Seq[Any],
+      to: Seq[Any]): Boolean = {
+    val rows = dataMap.remove(from).getOrElse(new BufferedRows(from.mkString("/")))
+    val newRows = new BufferedRows(to.mkString("/"))
+    rows.rows.foreach { r =>
+      val newRow = new GenericInternalRow(r.numFields)
+      for (i <- 0 until r.numFields) newRow.update(i, r.get(i, schema(i).dataType))
+      for (i <- 0 until partitionSchema.length) {
+        val j = schema.fieldIndex(partitionSchema(i).name)
+        newRow.update(j, to(i))
+      }
+      newRows.withRow(newRow)
+    }
+    dataMap.put(to, newRows).foreach { _ =>
+      throw new IllegalStateException(
+        s"The ${to.mkString("[", ", ", "]")} partition exists already")
+    }
+    true
+  }
+
   def withData(data: Array[BufferedRows]): InMemoryTable = dataMap.synchronized {
     data.foreach(_.rows.foreach { row =>
       val key = getKey(row)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala
index 31494c7c2dd50..99441c81d9add 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala
@@ -23,6 +23,7 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, PartitionAlreadyExistsException}
 import org.apache.spark.sql.connector.{InMemoryPartitionTable, InMemoryPartitionTableCatalog, InMemoryTableCatalog}
 import org.apache.spark.sql.connector.expressions.{LogicalExpressions, NamedReference}
 import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
@@ -214,4 +215,22 @@ class SupportsPartitionManagementSuite extends SparkFunSuite {
     }.getMessage
     assert(errMsg.contains("The identifier might not refer to one partition"))
   }
+
+  test("renamePartition") {
+    val partTable = createMultiPartTable()
+
+    val errMsg1 = intercept[PartitionAlreadyExistsException] {
+      partTable.renamePartition(InternalRow(0, "abc"), InternalRow(1, "abc"))
+    }.getMessage
+    assert(errMsg1.contains("Partition already exists"))
+
+    val newPart = InternalRow(2, "xyz")
+    val errMsg2 = intercept[NoSuchPartitionException] {
+      partTable.renamePartition(newPart, InternalRow(3, "abc"))
+    }.getMessage
+    assert(errMsg2.contains("Partition not found"))
+
+    assert(partTable.renamePartition(InternalRow(0, "abc"), newPart))
+    assert(partTable.partitionExists(newPart))
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 925c7741eefe3..dec1300d66f35 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -444,11 +444,10 @@ class ResolveSessionCatalog(
         ifNotExists)
 
     case AlterTableRenamePartition(
-        ResolvedV1TableIdentifier(ident), UnresolvedPartitionSpec(from, _), to) =>
-      AlterTableRenamePartitionCommand(
-        ident.asTableIdentifier,
-        from,
-        to)
+        ResolvedV1TableIdentifier(ident),
+        UnresolvedPartitionSpec(from, _),
+        UnresolvedPartitionSpec(to, _)) =>
+      AlterTableRenamePartitionCommand(ident.asTableIdentifier, from, to)
 
     case AlterTableDropPartition(
         ResolvedV1TableIdentifier(ident), specs, ifExists, purge) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableRenamePartitionExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableRenamePartitionExec.scala
new file mode 100644
index 0000000000000..38b83e3ad74e7
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableRenamePartitionExec.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.ResolvedPartitionSpec
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.connector.catalog.SupportsPartitionManagement
+
+/**
+ * Physical plan node for renaming a table partition.
+ */
+case class AlterTableRenamePartitionExec(
+    table: SupportsPartitionManagement,
+    from: ResolvedPartitionSpec,
+    to: ResolvedPartitionSpec) extends V2CommandExec {
+
+  override def output: Seq[Attribute] = Seq.empty
+
+  override protected def run(): Seq[InternalRow] = {
+    table.renamePartition(from.ident, to.ident)
+    Seq.empty
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 4667bb7cca998..2674aaf4f2e88 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -352,9 +352,12 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       AlterTableDropPartitionExec(
         table, parts.asResolvedPartitionSpecs, ignoreIfNotExists, purge) :: Nil
 
-    case AlterTableRenamePartition(_: ResolvedTable, _: ResolvedPartitionSpec, _) =>
-      throw new AnalysisException(
-        "ALTER TABLE ... RENAME TO PARTITION is not supported for v2 tables.")
+    case AlterTableRenamePartition(
+        ResolvedTable(_, _, table: SupportsPartitionManagement), from, to) =>
+      AlterTableRenamePartitionExec(
+        table,
+        Seq(from).asResolvedPartitionSpecs.head,
+        Seq(to).asResolvedPartitionSpecs.head) :: Nil
 
     case AlterTableRecoverPartitions(_: ResolvedTable) =>
       throw new AnalysisException(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
index aa0668ccaaf53..2705adb8b3c67 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.command
 
 import org.apache.spark.sql.{AnalysisException, QueryTest}
 import org.apache.spark.sql.catalyst.analysis.PartitionsAlreadyExistException
-import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -39,8 +38,6 @@ import org.apache.spark.sql.internal.SQLConf
 trait AlterTableAddPartitionSuiteBase extends QueryTest with DDLCommandTestUtils {
   override val command = "ALTER TABLE .. ADD PARTITION"
 
-  protected def checkLocation(t: String, spec: TablePartitionSpec, expected: String): Unit
-
   test("one partition") {
     withNamespaceAndTable("ns", "tbl") { t =>
       sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionParserSuite.scala
index db6506c85bcec..c9a6732796729 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionParserSuite.scala
@@ -32,7 +32,7 @@ class AlterTableRenamePartitionParserSuite extends AnalysisTest with SharedSpark
     val expected = AlterTableRenamePartition(
       UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... RENAME TO PARTITION"),
       UnresolvedPartitionSpec(Map("ds" -> "2017-06-10")),
-      Map("ds" -> "2018-06-10"))
+      UnresolvedPartitionSpec(Map("ds" -> "2018-06-10")))
     comparePlans(parsed, expected)
   }
 
@@ -45,7 +45,7 @@ class AlterTableRenamePartitionParserSuite extends AnalysisTest with SharedSpark
     val expected = AlterTableRenamePartition(
       UnresolvedTable(Seq("table_name"), "ALTER TABLE ... RENAME TO PARTITION"),
       UnresolvedPartitionSpec(Map("dt" -> "2008-08-08", "country" -> "us")),
-      Map("dt" -> "2008-09-09", "country" -> "uk"))
+      UnresolvedPartitionSpec(Map("dt" -> "2008-09-09", "country" -> "uk")))
     comparePlans(parsed, expected)
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala
index 40c167ce424a0..58055262d3f11 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala
@@ -17,7 +17,9 @@
 
 package org.apache.spark.sql.execution.command
 
-import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, PartitionAlreadyExistsException}
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * This base suite contains unified tests for the `ALTER TABLE .. RENAME PARTITION` command that
@@ -35,4 +37,130 @@ import org.apache.spark.sql.QueryTest
  */
 trait AlterTableRenamePartitionSuiteBase extends QueryTest with DDLCommandTestUtils {
   override val command = "ALTER TABLE .. RENAME PARTITION"
+
+  protected def createSinglePartTable(t: String): Unit = {
+    sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
+    sql(s"INSERT INTO $t PARTITION (id = 1) SELECT 'abc'")
+  }
+
+  test("rename without explicitly specifying database") {
+    withSQLConf(SQLConf.DEFAULT_CATALOG.key -> catalog) {
+      createSinglePartTable("t")
+      checkPartitions("t", Map("id" -> "1"))
+
+      sql(s"ALTER TABLE t PARTITION (id = 1) RENAME TO PARTITION (id = 2)")
+      checkPartitions("t", Map("id" -> "2"))
+      checkAnswer(sql(s"SELECT id, data FROM t"), Row(2, "abc"))
+    }
+  }
+
+  test("table to alter does not exist") {
+    withNamespace(s"$catalog.ns") {
+      sql(s"CREATE NAMESPACE $catalog.ns")
+      val errMsg = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $catalog.ns.no_tbl PARTITION (id=1) RENAME TO PARTITION (id=2)")
+      }.getMessage
+      assert(errMsg.contains("Table not found"))
+    }
+  }
+
+  test("partition to rename does not exist") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      createSinglePartTable(t)
+      checkPartitions(t, Map("id" -> "1"))
+      val errMsg = intercept[NoSuchPartitionException] {
+        sql(s"ALTER TABLE $t PARTITION (id = 3) RENAME TO PARTITION (id = 2)")
+      }.getMessage
+      assert(errMsg.contains("Partition not found in table"))
+    }
+  }
+
+  test("target partition exists") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      createSinglePartTable(t)
+      sql(s"INSERT INTO $t PARTITION (id = 2) SELECT 'def'")
+      checkPartitions(t, Map("id" -> "1"), Map("id" -> "2"))
+      val errMsg = intercept[PartitionAlreadyExistsException] {
+        sql(s"ALTER TABLE $t PARTITION (id = 1) RENAME TO PARTITION (id = 2)")
+      }.getMessage
+      assert(errMsg.contains("Partition already exists"))
+    }
+  }
+
+  test("single part partition") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      createSinglePartTable(t)
+      checkPartitions(t, Map("id" -> "1"))
+
+      sql(s"ALTER TABLE $t PARTITION (id = 1) RENAME TO PARTITION (id = 2)")
+      checkPartitions(t, Map("id" -> "2"))
+      checkAnswer(sql(s"SELECT id, data FROM $t"), Row(2, "abc"))
+    }
+  }
+
+  test("multi part partition") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      createWideTable(t)
+      checkPartitions(t,
+        Map(
+          "year" -> "2016",
+          "month" -> "3",
+          "hour" -> "10",
+          "minute" -> "10",
+          "sec" -> "10",
+          "extra" -> "1"),
+        Map(
+          "year" -> "2016",
+          "month" -> "4",
+          "hour" -> "10",
+          "minute" -> "10",
+          "sec" -> "10",
+          "extra" -> "1"))
+
+      sql(s"""
+        |ALTER TABLE $t
+        |PARTITION (
+        |  year = 2016, month = 3, hour = 10, minute = 10, sec = 10, extra = 1
+        |) RENAME TO PARTITION (
+        |  year = 2016, month = 3, hour = 10, minute = 10, sec = 123, extra = 1
+        |)""".stripMargin)
+      checkPartitions(t,
+        Map(
+          "year" -> "2016",
+          "month" -> "3",
+          "hour" -> "10",
+          "minute" -> "10",
+          "sec" -> "123",
+          "extra" -> "1"),
+        Map(
+          "year" -> "2016",
+          "month" -> "4",
+          "hour" -> "10",
+          "minute" -> "10",
+          "sec" -> "10",
+          "extra" -> "1"))
+      checkAnswer(sql(s"SELECT month, sec, price FROM $t"), Row(3, 123, 3))
+    }
+  }
+
+  test("partition spec in RENAME PARTITION should be case insensitive") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      createSinglePartTable(t)
+      checkPartitions(t, Map("id" -> "1"))
+
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+        val errMsg = intercept[AnalysisException] {
+          sql(s"ALTER TABLE $t PARTITION (ID = 1) RENAME TO PARTITION (id = 2)")
+        }.getMessage
+        assert(errMsg.contains("ID is not a valid partition column"))
+        checkPartitions(t, Map("id" -> "1"))
+      }
+
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+        sql(s"ALTER TABLE $t PARTITION (ID = 1) RENAME TO PARTITION (id = 2)")
+        checkPartitions(t, Map("id" -> "2"))
+        checkAnswer(sql(s"SELECT id, data FROM $t"), Row(2, "abc"))
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala
index a613978ce375a..f4b84d8ee0059 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandTestUtils.scala
@@ -21,6 +21,7 @@ import org.scalactic.source.Position
 import org.scalatest.Tag
 
 import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.execution.datasources.PartitioningUtils
 import org.apache.spark.sql.test.SQLTestUtils
 
@@ -88,4 +89,6 @@ trait DDLCommandTestUtils extends SQLTestUtils {
       |ADD PARTITION(year = 2016, month = 4, hour = 10, minute = 10, sec = 10, extra = 1)
       |""".stripMargin)
   }
+
+  protected def checkLocation(t: String, spec: TablePartitionSpec, expected: String): Unit
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
index 808eab8340524..b3c118def70b7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.execution.command.v1
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.execution.command
 
 /**
@@ -32,21 +31,6 @@ import org.apache.spark.sql.execution.command
  *     `org.apache.spark.sql.hive.execution.command.AlterTableAddPartitionSuite`
  */
 trait AlterTableAddPartitionSuiteBase extends command.AlterTableAddPartitionSuiteBase {
-  override protected def checkLocation(
-      t: String,
-      spec: TablePartitionSpec,
-      expected: String): Unit = {
-    val tablePath = t.split('.')
-    val tableName = tablePath.last
-    val ns = tablePath.init.mkString(".")
-    val partSpec = spec.map { case (key, value) => s"$key = $value"}.mkString(", ")
-    val information = sql(s"SHOW TABLE EXTENDED IN $ns LIKE '$tableName' PARTITION($partSpec)")
-      .select("information")
-      .first().getString(0)
-    val location = information.split("\\r?\\n").filter(_.startsWith("Location:")).head
-    assert(location.endsWith(expected))
-  }
-
   test("empty string as partition value") {
     withNamespaceAndTable("ns", "tbl") { t =>
       sql(s"CREATE TABLE $t (col1 INT, p1 STRING) $defaultUsing PARTITIONED BY (p1)")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenamePartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenamePartitionSuite.scala
index d923886fbdb9a..bde77106a3ab7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenamePartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenamePartitionSuite.scala
@@ -17,10 +17,8 @@
 
 package org.apache.spark.sql.execution.command.v1
 
-import org.apache.spark.sql.{AnalysisException, Row}
-import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, PartitionAlreadyExistsException}
+import org.apache.spark.sql.Row
 import org.apache.spark.sql.execution.command
-import org.apache.spark.sql.internal.SQLConf
 
 /**
  * This base suite contains unified tests for the `ALTER TABLE .. RENAME PARTITION` command that
@@ -33,143 +31,19 @@ import org.apache.spark.sql.internal.SQLConf
  *     `org.apache.spark.sql.hive.execution.command.AlterTableRenamePartitionSuite`
  */
 trait AlterTableRenamePartitionSuiteBase extends command.AlterTableRenamePartitionSuiteBase {
-  protected def createSinglePartTable(t: String): Unit = {
-    sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
-    sql(s"INSERT INTO $t PARTITION (id = 1) SELECT 'abc'")
-  }
-
-  test("rename without explicitly specifying database") {
-    val t = "tbl"
-    withTable(t) {
-      createSinglePartTable(t)
-      checkPartitions(t, Map("id" -> "1"))
-
-      sql(s"ALTER TABLE $t PARTITION (id = 1) RENAME TO PARTITION (id = 2)")
-      checkPartitions(t, Map("id" -> "2"))
-      checkAnswer(sql(s"SELECT id, data FROM $t"), Row(2, "abc"))
-    }
-  }
-
-  test("table to alter does not exist") {
-    withNamespace(s"$catalog.ns") {
-      sql(s"CREATE NAMESPACE $catalog.ns")
-      val errMsg = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $catalog.ns.no_tbl PARTITION (id=1) RENAME TO PARTITION (id=2)")
-      }.getMessage
-      assert(errMsg.contains("Table not found"))
-    }
-  }
-
-  test("partition to rename does not exist") {
-    withNamespaceAndTable("ns", "tbl") { t =>
-      createSinglePartTable(t)
-      checkPartitions(t, Map("id" -> "1"))
-      val errMsg = intercept[NoSuchPartitionException] {
-        sql(s"ALTER TABLE $t PARTITION (id = 3) RENAME TO PARTITION (id = 2)")
-      }.getMessage
-      assert(errMsg.contains("Partition not found in table"))
-    }
-  }
-
-  test("target partition exists") {
-    withNamespaceAndTable("ns", "tbl") { t =>
-      createSinglePartTable(t)
-      sql(s"INSERT INTO $t PARTITION (id = 2) SELECT 'def'")
-      checkPartitions(t, Map("id" -> "1"), Map("id" -> "2"))
-      val errMsg = intercept[PartitionAlreadyExistsException] {
-        sql(s"ALTER TABLE $t PARTITION (id = 1) RENAME TO PARTITION (id = 2)")
-      }.getMessage
-      assert(errMsg.contains("Partition already exists"))
-    }
-  }
-
-  test("single part partition") {
-    withNamespaceAndTable("ns", "tbl") { t =>
-      createSinglePartTable(t)
-      checkPartitions(t, Map("id" -> "1"))
-
-      sql(s"ALTER TABLE $t PARTITION (id = 1) RENAME TO PARTITION (id = 2)")
-      checkPartitions(t, Map("id" -> "2"))
-      checkAnswer(sql(s"SELECT id, data FROM $t"), Row(2, "abc"))
-    }
-  }
-
-  test("multi part partition") {
-    withNamespaceAndTable("ns", "tbl") { t =>
-      createWideTable(t)
-      checkPartitions(t,
-        Map(
-          "year" -> "2016",
-          "month" -> "3",
-          "hour" -> "10",
-          "minute" -> "10",
-          "sec" -> "10",
-          "extra" -> "1"),
-        Map(
-          "year" -> "2016",
-          "month" -> "4",
-          "hour" -> "10",
-          "minute" -> "10",
-          "sec" -> "10",
-          "extra" -> "1"))
-
-      sql(s"""
-        |ALTER TABLE $t
-        |PARTITION (
-        |  year = 2016, month = 3, hour = 10, minute = 10, sec = 10, extra = 1
-        |) RENAME TO PARTITION (
-        |  year = 2016, month = 3, hour = 10, minute = 10, sec = 123, extra = 1
-        |)""".stripMargin)
-      checkPartitions(t,
-        Map(
-          "year" -> "2016",
-          "month" -> "3",
-          "hour" -> "10",
-          "minute" -> "10",
-          "sec" -> "123",
-          "extra" -> "1"),
-        Map(
-          "year" -> "2016",
-          "month" -> "4",
-          "hour" -> "10",
-          "minute" -> "10",
-          "sec" -> "10",
-          "extra" -> "1"))
-      checkAnswer(sql(s"SELECT month, sec, price FROM $t"), Row(3, 123, 3))
-    }
-  }
-
   test("with location") {
     withNamespaceAndTable("ns", "tbl") { t =>
       createSinglePartTable(t)
       sql(s"ALTER TABLE $t ADD PARTITION (id = 2) LOCATION 'loc1'")
       sql(s"INSERT INTO $t PARTITION (id = 2) SELECT 'def'")
       checkPartitions(t, Map("id" -> "1"), Map("id" -> "2"))
+      checkLocation(t, Map("id" -> "2"), "loc1")
 
       sql(s"ALTER TABLE $t PARTITION (id = 2) RENAME TO PARTITION (id = 3)")
       checkPartitions(t, Map("id" -> "1"), Map("id" -> "3"))
-      checkAnswer(sql(s"SELECT id, data FROM $t"), Seq(Row(1, "abc"), Row(3, "def")))
-    }
-  }
-
-  test("partition spec in RENAME PARTITION should be case insensitive") {
-    withNamespaceAndTable("ns", "tbl") { t =>
-      createSinglePartTable(t)
-      checkPartitions(t, Map("id" -> "1"))
-
-      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
-        val errMsg = intercept[AnalysisException] {
-          sql(s"ALTER TABLE $t PARTITION (ID = 1) RENAME TO PARTITION (id = 2)")
-        }.getMessage
-        assert(errMsg.contains("ID is not a valid partition column"))
-        checkPartitions(t, Map("id" -> "1"))
-      }
-
-      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
-        sql(s"ALTER TABLE $t PARTITION (ID = 1) RENAME TO PARTITION (id = 2)")
-        checkPartitions(t, Map("id" -> "2"))
-        checkAnswer(sql(s"SELECT id, data FROM $t"), Row(2, "abc"))
-      }
+      // V1 catalogs rename the partition location of managed tables
+      checkLocation(t, Map("id" -> "3"), "id=3")
+      checkAnswer(sql(s"SELECT id, data FROM $t WHERE id = 3"), Row(3, "def"))
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala
index c4ecf1c98bb6e..80c552de567ba 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/CommandSuiteBase.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.execution.command.v1
 
+import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.test.SharedSparkSession
 
@@ -30,4 +31,20 @@ trait CommandSuiteBase extends SharedSparkSession {
   def version: String = "V1" // The prefix is added to test names
   def catalog: String = CatalogManager.SESSION_CATALOG_NAME
   def defaultUsing: String = "USING parquet" // The clause is used in creating tables under testing
+
+  // TODO(SPARK-33393): Move this to `DDLCommandTestUtils`
+  def checkLocation(
+      t: String,
+      spec: TablePartitionSpec,
+      expected: String): Unit = {
+    val tablePath = t.split('.')
+    val tableName = tablePath.last
+    val ns = tablePath.init.mkString(".")
+    val partSpec = spec.map { case (key, value) => s"$key = $value"}.mkString(", ")
+    val information = sql(s"SHOW TABLE EXTENDED IN $ns LIKE '$tableName' PARTITION($partSpec)")
+      .select("information")
+      .first().getString(0)
+    val location = information.split("\\r?\\n").filter(_.startsWith("Location:")).head
+    assert(location.endsWith(expected))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala
index 0f0f8fa389321..65494a7266756 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala
@@ -18,10 +18,6 @@
 package org.apache.spark.sql.execution.command.v2
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis.ResolvePartitionSpec
-import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
-import org.apache.spark.sql.connector.InMemoryPartitionTable
-import org.apache.spark.sql.connector.catalog.{CatalogV2Implicits, Identifier}
 import org.apache.spark.sql.execution.command
 
 /**
@@ -31,29 +27,6 @@ import org.apache.spark.sql.execution.command
 class AlterTableAddPartitionSuite
   extends command.AlterTableAddPartitionSuiteBase
   with CommandSuiteBase {
-
-  import CatalogV2Implicits._
-
-  override protected def checkLocation(
-      t: String,
-      spec: TablePartitionSpec,
-      expected: String): Unit = {
-    val tablePath = t.split('.')
-    val catalogName = tablePath.head
-    val namespaceWithTable = tablePath.tail
-    val namespaces = namespaceWithTable.init
-    val tableName = namespaceWithTable.last
-    val catalogPlugin = spark.sessionState.catalogManager.catalog(catalogName)
-    val partTable = catalogPlugin.asTableCatalog
-      .loadTable(Identifier.of(namespaces, tableName))
-      .asInstanceOf[InMemoryPartitionTable]
-    val ident = ResolvePartitionSpec.convertToPartIdent(spec, partTable.partitionSchema.fields)
-    val partMetadata = partTable.loadPartitionMetadata(ident)
-
-    assert(partMetadata.containsKey("location"))
-    assert(partMetadata.get("location") === expected)
-  }
-
   test("SPARK-33650: add partition into a table which doesn't support partition management") {
     withNamespaceAndTable("ns", "tbl", s"non_part_$catalog") { t =>
       sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRenamePartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRenamePartitionSuite.scala
index d1c252adde369..bb06818da48b1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRenamePartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRenamePartitionSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.command.v2
 
-import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.Row
 import org.apache.spark.sql.execution.command
 
 /**
@@ -28,14 +28,20 @@ class AlterTableRenamePartitionSuite
   extends command.AlterTableRenamePartitionSuiteBase
   with CommandSuiteBase {
 
-  // TODO(SPARK-33859): Support V2 ALTER TABLE .. RENAME PARTITION
-  test("single part partition") {
+  test("with location") {
     withNamespaceAndTable("ns", "tbl") { t =>
-      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
-      val errMsg = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $t PARTITION (id=1) RENAME TO PARTITION (id=2)")
-      }.getMessage
-      assert(errMsg.contains("ALTER TABLE ... RENAME TO PARTITION is not supported for v2 tables"))
+      createSinglePartTable(t)
+      val loc = "location1"
+      sql(s"ALTER TABLE $t ADD PARTITION (id = 2) LOCATION '$loc'")
+      sql(s"INSERT INTO $t PARTITION (id = 2) SELECT 'def'")
+      checkPartitions(t, Map("id" -> "1"), Map("id" -> "2"))
+      checkLocation(t, Map("id" -> "2"), loc)
+
+      sql(s"ALTER TABLE $t PARTITION (id = 2) RENAME TO PARTITION (id = 3)")
+      checkPartitions(t, Map("id" -> "1"), Map("id" -> "3"))
+      // `InMemoryPartitionTableCatalog` should keep the original location
+      checkLocation(t, Map("id" -> "3"), loc)
+      checkAnswer(sql(s"SELECT id, data FROM $t WHERE id = 3"), Row(3, "def"))
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala
index 0978126f27fd1..2dd80b7bb6a02 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala
@@ -18,7 +18,10 @@
 package org.apache.spark.sql.execution.command.v2
 
 import org.apache.spark.SparkConf
-import org.apache.spark.sql.connector.{InMemoryPartitionTableCatalog, InMemoryTableCatalog}
+import org.apache.spark.sql.catalyst.analysis.ResolvePartitionSpec
+import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import org.apache.spark.sql.connector.{InMemoryPartitionTable, InMemoryPartitionTableCatalog, InMemoryTableCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Implicits, Identifier}
 import org.apache.spark.sql.test.SharedSparkSession
 
 /**
@@ -36,4 +39,26 @@ trait CommandSuiteBase extends SharedSparkSession {
   override def sparkConf: SparkConf = super.sparkConf
     .set(s"spark.sql.catalog.$catalog", classOf[InMemoryPartitionTableCatalog].getName)
     .set(s"spark.sql.catalog.non_part_$catalog", classOf[InMemoryTableCatalog].getName)
+
+  def checkLocation(
+      t: String,
+      spec: TablePartitionSpec,
+      expected: String): Unit = {
+    import CatalogV2Implicits._
+
+    val tablePath = t.split('.')
+    val catalogName = tablePath.head
+    val namespaceWithTable = tablePath.tail
+    val namespaces = namespaceWithTable.init
+    val tableName = namespaceWithTable.last
+    val catalogPlugin = spark.sessionState.catalogManager.catalog(catalogName)
+    val partTable = catalogPlugin.asTableCatalog
+      .loadTable(Identifier.of(namespaces, tableName))
+      .asInstanceOf[InMemoryPartitionTable]
+    val ident = ResolvePartitionSpec.convertToPartIdent(spec, partTable.partitionSchema.fields)
+    val partMetadata = partTable.loadPartitionMetadata(ident)
+
+    assert(partMetadata.containsKey("location"))
+    assert(partMetadata.get("location") === expected)
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala
index 39b4be61449cb..a1c808647c891 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CommandSuiteBase.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.hive.execution.command
 
+import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 
@@ -30,4 +31,20 @@ trait CommandSuiteBase extends TestHiveSingleton {
   def version: String = "Hive V1" // The prefix is added to test names
   def catalog: String = CatalogManager.SESSION_CATALOG_NAME
   def defaultUsing: String = "USING HIVE" // The clause is used in creating tables under testing
+
+  def checkLocation(
+      t: String,
+      spec: TablePartitionSpec,
+      expected: String): Unit = {
+    val tablePath = t.split('.')
+    val tableName = tablePath.last
+    val ns = tablePath.init.mkString(".")
+    val partSpec = spec.map { case (key, value) => s"$key = $value"}.mkString(", ")
+    val information =
+      spark.sql(s"SHOW TABLE EXTENDED IN $ns LIKE '$tableName' PARTITION($partSpec)")
+        .select("information")
+        .first().getString(0)
+    val location = information.split("\\r?\\n").filter(_.startsWith("Location:")).head
+    assert(location.endsWith(expected))
+  }
 }

From 3b1b209e90076e60eb18eedfaec0ecdad659376f Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Tue, 29 Dec 2020 13:33:06 +0000
Subject: [PATCH 0906/1009] [SPARK-33909][SQL] Check rand functions seed is
 legal at analyer side

### What changes were proposed in this pull request?

Move seed is legal check to `CheckAnalysis`.

### Why are the changes needed?

It's better to check seed expression is legal at analyzer side instead of execution, and user can get exception as soon as possible.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Add test.

Closes #30923 from ulysses-you/SPARK-33909.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/analysis/CheckAnalysis.scala |  4 ++++
 .../catalyst/expressions/randomExpressions.scala    |  9 +++------
 .../sql/catalyst/analysis/AnalysisErrorSuite.scala  | 13 +++++++++++++
 .../optimizer/LeftSemiAntiJoinPushDownSuite.scala   |  2 +-
 4 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index a4dfbe85abfd7..89076fbb9ce0f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -215,6 +215,10 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
           case s: SubqueryExpression =>
             checkSubqueryExpression(operator, s)
             s
+
+          case e: ExpressionWithRandomSeed if !e.seedExpression.foldable =>
+            failAnalysis(
+              s"Input argument to ${e.prettyName} must be a constant.")
         }
 
         operator match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
index 630c934f79533..0a4c6e27d51d9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.UnresolvedSeed
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, FalseLiteral}
@@ -47,10 +46,8 @@ abstract class RDG extends UnaryExpression with ExpectsInputTypes with Stateful
   override def seedExpression: Expression = child
 
   @transient protected lazy val seed: Long = seedExpression match {
-    case Literal(s, IntegerType) => s.asInstanceOf[Int]
-    case Literal(s, LongType) => s.asInstanceOf[Long]
-    case _ => throw new AnalysisException(
-      s"Input argument to $prettyName must be an integer, long or null literal.")
+    case e if e.dataType == IntegerType => e.eval().asInstanceOf[Int]
+    case e if e.dataType == LongType => e.eval().asInstanceOf[Long]
   }
 
   override def nullable: Boolean = false
@@ -64,7 +61,7 @@ abstract class RDG extends UnaryExpression with ExpectsInputTypes with Stateful
  * Represents the behavior of expressions which have a random seed and can renew the seed.
  * Usually the random seed needs to be renewed at each execution under streaming queries.
  */
-trait ExpressionWithRandomSeed {
+trait ExpressionWithRandomSeed extends Expression {
   def seedExpression: Expression
   def withNewSeed(seed: Long): Expression
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index 44128c4419951..004d577c7ad52 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -700,4 +700,17 @@ class AnalysisErrorSuite extends AnalysisTest {
           UnresolvedRelation(TableIdentifier("t", Option("nonexist")))))))
     assertAnalysisError(plan, "Table or view not found:" :: Nil)
   }
+
+  test("SPARK-33909: Check rand functions seed is legal at analyer side") {
+    Seq(Rand("a".attr), Randn("a".attr)).foreach { r =>
+      val plan = Project(Seq(r.as("r")), testRelation)
+      assertAnalysisError(plan,
+        s"Input argument to ${r.prettyName} must be a constant." :: Nil)
+    }
+    Seq(Rand(1.0), Rand("1"), Randn("a")).foreach { r =>
+      val plan = Project(Seq(r.as("r")), testRelation)
+      assertAnalysisError(plan,
+        s"data type mismatch: argument 1 requires (int or bigint) type" :: Nil)
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala
index 729a1e9f06ca5..d4b85b036b64c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala
@@ -60,7 +60,7 @@ class LeftSemiPushdownSuite extends PlanTest {
 
   test("Project: LeftSemiAnti join no pushdown because of non-deterministic proj exprs") {
     val originalQuery = testRelation
-      .select(Rand('a), 'b, 'c)
+      .select(Rand(1), 'b, 'c)
       .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd))
 
     val optimized = Optimize.execute(originalQuery.analyze)

From 872107f67fd6c2093531e8a8976ff713359cba01 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Tue, 29 Dec 2020 13:34:43 +0000
Subject: [PATCH 0907/1009] [SPARK-33848][SQL][FOLLOWUP] Introduce allowList
 for push into (if / case) branches

### What changes were proposed in this pull request?

Introduce allowList push into (if / case) branches to fix potential bug.

### Why are the changes needed?

 Fix potential bug.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing test.

Closes #30955 from wangyum/SPARK-33848-2.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/optimizer/expressions.scala  | 41 +++++++++++++++----
 1 file changed, 34 insertions(+), 7 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 6c5dec133d2a7..1b93d514964e6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -553,41 +553,68 @@ object PushFoldableIntoBranches extends Rule[LogicalPlan] with PredicateHelper {
     foldables.nonEmpty && others.length < 2
   }
 
+  // Not all UnaryExpression can be pushed into (if / case) branches, e.g. Alias.
+  private def supportedUnaryExpression(e: UnaryExpression): Boolean = e match {
+    case _: IsNull | _: IsNotNull => true
+    case _: UnaryMathExpression | _: Abs | _: Bin | _: Factorial | _: Hex => true
+    case _: String2StringExpression | _: Ascii | _: Base64 | _: BitLength | _: Chr | _: Length =>
+      true
+    case _: CastBase => true
+    case _: GetDateField | _: LastDay => true
+    case _: ExtractIntervalPart => true
+    case _: ArraySetLike => true
+    case _: ExtractValue => true
+    case _ => false
+  }
+
+  // Not all BinaryExpression can be pushed into (if / case) branches.
+  private def supportedBinaryExpression(e: BinaryExpression): Boolean = e match {
+    case _: BinaryComparison | _: StringPredicate | _: StringRegexExpression => true
+    case _: BinaryArithmetic => true
+    case _: BinaryMathExpression => true
+    case _: AddMonths | _: DateAdd | _: DateAddInterval | _: DateDiff | _: DateSub => true
+    case _: FindInSet | _: RoundBase => true
+    case _ => false
+  }
+
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     case q: LogicalPlan => q transformExpressionsUp {
-      case a: Alias => a // Skip an alias.
       case u @ UnaryExpression(i @ If(_, trueValue, falseValue))
-          if atMostOneUnfoldable(Seq(trueValue, falseValue)) =>
+          if supportedUnaryExpression(u) && atMostOneUnfoldable(Seq(trueValue, falseValue)) =>
         i.copy(
           trueValue = u.withNewChildren(Array(trueValue)),
           falseValue = u.withNewChildren(Array(falseValue)))
 
       case u @ UnaryExpression(c @ CaseWhen(branches, elseValue))
-          if atMostOneUnfoldable(branches.map(_._2) ++ elseValue) =>
+          if supportedUnaryExpression(u) && atMostOneUnfoldable(branches.map(_._2) ++ elseValue) =>
         c.copy(
           branches.map(e => e.copy(_2 = u.withNewChildren(Array(e._2)))),
           elseValue.map(e => u.withNewChildren(Array(e))))
 
       case b @ BinaryExpression(i @ If(_, trueValue, falseValue), right)
-          if right.foldable && atMostOneUnfoldable(Seq(trueValue, falseValue)) =>
+          if supportedBinaryExpression(b) && right.foldable &&
+            atMostOneUnfoldable(Seq(trueValue, falseValue)) =>
         i.copy(
           trueValue = b.withNewChildren(Array(trueValue, right)),
           falseValue = b.withNewChildren(Array(falseValue, right)))
 
       case b @ BinaryExpression(left, i @ If(_, trueValue, falseValue))
-          if left.foldable && atMostOneUnfoldable(Seq(trueValue, falseValue)) =>
+          if supportedBinaryExpression(b) && left.foldable &&
+            atMostOneUnfoldable(Seq(trueValue, falseValue)) =>
         i.copy(
           trueValue = b.withNewChildren(Array(left, trueValue)),
           falseValue = b.withNewChildren(Array(left, falseValue)))
 
       case b @ BinaryExpression(c @ CaseWhen(branches, elseValue), right)
-          if right.foldable && atMostOneUnfoldable(branches.map(_._2) ++ elseValue) =>
+          if supportedBinaryExpression(b) && right.foldable &&
+            atMostOneUnfoldable(branches.map(_._2) ++ elseValue) =>
         c.copy(
           branches.map(e => e.copy(_2 = b.withNewChildren(Array(e._2, right)))),
           elseValue.map(e => b.withNewChildren(Array(e, right))))
 
       case b @ BinaryExpression(left, c @ CaseWhen(branches, elseValue))
-          if left.foldable && atMostOneUnfoldable(branches.map(_._2) ++ elseValue) =>
+          if supportedBinaryExpression(b) && left.foldable &&
+            atMostOneUnfoldable(branches.map(_._2) ++ elseValue) =>
         c.copy(
           branches.map(e => e.copy(_2 = b.withNewChildren(Array(left, e._2)))),
           elseValue.map(e => b.withNewChildren(Array(left, e))))

From aadda4b561ace638fb88147a93b5e15db3527d5a Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Tue, 29 Dec 2020 23:26:27 +0900
Subject: [PATCH 0908/1009] [SPARK-33930][SQL] Script Transform default FIELD
 DELIMIT should be \u0001 for no serde

### What changes were proposed in this pull request?
For same SQL
```
SELECT TRANSFORM(a, b, c, null)
ROW FORMAT DELIMITED
USING 'cat'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '&'
FROM (select 1 as a, 2 as b, 3  as c) t
```
In hive:
```
hive> SELECT TRANSFORM(a, b, c, null)
    > ROW FORMAT DELIMITED
    > USING 'cat'
    > ROW FORMAT DELIMITED
    > FIELDS TERMINATED BY '&'
    > FROM (select 1 as a, 2 as b, 3  as c) t;
OK
123\N	NULL
Time taken: 14.519 seconds, Fetched: 1 row(s)
hive> packet_write_wait: Connection to 10.191.58.100 port 32200: Broken pipe
```

In Spark
```
Spark master: local[*], Application Id: local-1609225830376
spark-sql> SELECT TRANSFORM(a, b, c, null)
         > ROW FORMAT DELIMITED
         > USING 'cat'
         > ROW FORMAT DELIMITED
         > FIELDS TERMINATED BY '&'
         > FROM (select 1 as a, 2 as b, 3  as c) t;
1	2	3	null	NULL
Time taken: 4.297 seconds, Fetched 1 row(s)
spark-sql>
```
We should keep same. Change default ROW FORMAT FIELD DELIMIT to `\u0001`

In hive default value is '1' to char is '\u0001'
```
bucket_count -1
column.name.delimiter ,
columns
columns.comments
columns.types
file.inputformat org.apache.hadoop.hive.ql.io.NullRowsInputFormat
```

### Why are the changes needed?
Keep same behavior with hive

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Added UT

Closes #30958 from AngersZhuuuu/SPARK-33930.

Authored-by: angerszhu <angers.zhu@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/sql-migration-guide.md                   |  2 ++
 .../BaseScriptTransformationExec.scala        |  2 +-
 .../BaseScriptTransformationSuite.scala       | 32 ++++++++++++++++++-
 3 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index cbb1de53c8896..bd54554baa09d 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -30,6 +30,8 @@ license: |
 
   - In Spark 3.2, `ALTER TABLE .. RENAME TO PARTITION` throws `PartitionAlreadyExistsException` instead of `AnalysisException` for tables from Hive external when the target partition already exists.
 
+  - In Spark 3.2, script transform default FIELD DELIMIT is `\u0001` for no serde mode. In Spark 3.1 or earlier, the default FIELD DELIMIT is `\t`.
+
 ## Upgrading from Spark SQL 3.0 to 3.1
 
   - In Spark 3.1, statistical aggregation function includes `std`, `stddev`, `stddev_samp`, `variance`, `var_samp`, `skewness`, `kurtosis`, `covar_samp`, `corr` will return `NULL` instead of `Double.NaN` when `DivideByZero` occurs during expression evaluation, for example, when `stddev_samp` applied on a single element set. In Spark version 3.0 and earlier, it will return `Double.NaN` in such case. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.statisticalAggregate` to `true`.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
index 74e5aa716ad67..1c87c48ae7cb3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
@@ -335,7 +335,7 @@ case class ScriptTransformationIOSchema(
 
 object ScriptTransformationIOSchema {
   val defaultFormat = Map(
-    ("TOK_TABLEROWFORMATFIELD", "\t"),
+    ("TOK_TABLEROWFORMATFIELD", "\u0001"),
     ("TOK_TABLEROWFORMATLINES", "\n")
   )
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
index 863657a7862a6..cf9ee1ef6db72 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
@@ -28,6 +28,7 @@ import org.scalatest.exceptions.TestFailedException
 
 import org.apache.spark.{SparkException, TaskContext, TestUtils}
 import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, GenericInternalRow}
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
@@ -123,7 +124,11 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
         s"""
            |SELECT
            |TRANSFORM(a, b, c, d, e)
-           |USING 'python $scriptFilePath' AS (a, b, c, d, e)
+           |  ROW FORMAT DELIMITED
+           |  FIELDS TERMINATED BY '\t'
+           |  USING 'python $scriptFilePath' AS (a, b, c, d, e)
+           |  ROW FORMAT DELIMITED
+           |  FIELDS TERMINATED BY '\t'
            |FROM v
         """.stripMargin)
 
@@ -440,6 +445,31 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
       }
     }
   }
+
+  test("SPARK-33930: Script Transform default FIELD DELIMIT should be \u0001 (no serde)") {
+    withTempView("v") {
+      val df = Seq(
+        (1, 2, 3),
+        (2, 3, 4),
+        (3, 4, 5)
+      ).toDF("a", "b", "c")
+      df.createTempView("v")
+
+      checkAnswer(
+        sql(
+          s"""
+             |SELECT TRANSFORM(a, b, c)
+             |  ROW FORMAT DELIMITED
+             |  USING 'cat' AS (a)
+             |  ROW FORMAT DELIMITED
+             |  FIELDS TERMINATED BY '&'
+             |FROM v
+        """.stripMargin), identity,
+        Row("1\u00012\u00013") ::
+          Row("2\u00013\u00014") ::
+          Row("3\u00014\u00015") :: Nil)
+    }
+  }
 }
 
 case class ExceptionInjectingOperator(child: SparkPlan) extends UnaryExecNode {

From 16c594de7967ae535a87c157d5383c3af3bbccc5 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Tue, 29 Dec 2020 14:30:37 +0000
Subject: [PATCH 0909/1009] [SPARK-33859][SQL][FOLLOWUP] Add version to
 `SupportsPartitionManagement.renamePartition()`

### What changes were proposed in this pull request?
Add the version 3.2.0 to new method `renamePartition()` in the `SupportsPartitionManagement` interface.

### Why are the changes needed?
To inform Spark devs when the method appears in the interface.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
`./dev/scalastyle`

Closes #30964 from MaxGekk/alter-table-rename-partition-v2-followup.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/connector/catalog/SupportsPartitionManagement.java      | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java
index a7008293a3e19..20af0e0f8c67c 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java
@@ -149,6 +149,8 @@ Map<String, String> loadPartitionMetadata(InternalRow ident)
      * @throws UnsupportedOperationException If partition renaming is not supported
      * @throws PartitionAlreadyExistsException If the `to` partition exists already
      * @throws NoSuchPartitionException If the `from` partition does not exist
+     *
+     * @since 3.2.0
      */
     default boolean renamePartition(InternalRow from, InternalRow to)
         throws UnsupportedOperationException,

From c42502493a0d1012ab8ba496363fca27014b9229 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Tue, 29 Dec 2020 14:35:01 +0000
Subject: [PATCH 0910/1009] [SPARK-33847][SQL][FOLLOWUP] Remove the CaseWhen
 should consider deterministic

### What changes were proposed in this pull request?

This pr fix remove the `CaseWhen` if elseValue is empty and other outputs are null because of we should consider deterministic.

### Why are the changes needed?

Fix bug.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #30960 from wangyum/SPARK-33847-2.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../ReplaceNullWithFalseInPredicate.scala         |  8 ++------
 .../sql/catalyst/optimizer/expressions.scala      |  9 +++------
 .../optimizer/PushFoldableIntoBranchesSuite.scala | 15 +++++++--------
 .../ReplaceNullWithFalseInPredicateSuite.scala    |  7 ++++---
 .../optimizer/SimplifyConditionalSuite.scala      | 12 +++++++-----
 5 files changed, 23 insertions(+), 28 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
index df3da3e8a9982..2f95f242c851c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
@@ -98,12 +98,8 @@ object ReplaceNullWithFalseInPredicate extends Rule[LogicalPlan] {
       val newBranches = cw.branches.map { case (cond, value) =>
         replaceNullWithFalse(cond) -> replaceNullWithFalse(value)
       }
-      if (newBranches.forall(_._2 == FalseLiteral) && cw.elseValue.isEmpty) {
-        FalseLiteral
-      } else {
-        val newElseValue = cw.elseValue.map(replaceNullWithFalse)
-        CaseWhen(newBranches, newElseValue)
-      }
+      val newElseValue = cw.elseValue.map(replaceNullWithFalse).getOrElse(FalseLiteral)
+      CaseWhen(newBranches, newElseValue)
     case i @ If(pred, trueVal, falseVal) if i.dataType == BooleanType =>
       If(replaceNullWithFalse(pred), replaceNullWithFalse(trueVal), replaceNullWithFalse(falseVal))
     case e if e.dataType == BooleanType =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 1b93d514964e6..819bffeafb643 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -515,8 +515,9 @@ object SimplifyConditionals extends Rule[LogicalPlan] with PredicateHelper {
         val (h, t) = branches.span(_._1 != TrueLiteral)
         CaseWhen( h :+ t.head, None)
 
-      case e @ CaseWhen(branches, Some(elseValue))
-          if branches.forall(_._2.semanticEquals(elseValue)) =>
+      case e @ CaseWhen(branches, elseOpt)
+          if branches.forall(_._2.semanticEquals(elseOpt.getOrElse(Literal(null, e.dataType)))) =>
+        val elseValue = elseOpt.getOrElse(Literal(null, e.dataType))
         // For non-deterministic conditions with side effect, we can not remove it, or change
         // the ordering. As a result, we try to remove the deterministic conditions from the tail.
         var hitNonDeterministicCond = false
@@ -532,10 +533,6 @@ object SimplifyConditionals extends Rule[LogicalPlan] with PredicateHelper {
         } else {
           e.copy(branches = branches.take(i).map(branch => (branch._1, elseValue)))
         }
-
-      case e @ CaseWhen(branches, None)
-          if branches.forall(_._2.semanticEquals(Literal(null, e.dataType))) =>
-        Literal(null, e.dataType)
     }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala
index 0d5218ac629e3..cb90a398604f2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala
@@ -260,14 +260,13 @@ class PushFoldableIntoBranchesSuite
   }
 
   test("SPARK-33847: Remove the CaseWhen if elseValue is empty and other outputs are null") {
-    Seq(a, LessThan(Rand(1), Literal(0.5))).foreach { condition =>
-      assertEquivalent(
-        EqualTo(CaseWhen(Seq((condition, Literal.create(null, IntegerType)))), Literal(2)),
-        Literal.create(null, BooleanType))
-      assertEquivalent(
-        EqualTo(CaseWhen(Seq((condition, Literal("str")))).cast(IntegerType), Literal(2)),
-        Literal.create(null, BooleanType))
-    }
+    assertEquivalent(
+      EqualTo(CaseWhen(Seq((a, Literal.create(null, IntegerType)))), Literal(2)),
+      Literal.create(null, BooleanType))
+    assertEquivalent(
+      EqualTo(CaseWhen(Seq((LessThan(Rand(1), Literal(0.5)), Literal("str")))).cast(IntegerType),
+        Literal(2)),
+      CaseWhen(Seq((LessThan(Rand(1), Literal(0.5)), Literal.create(null, BooleanType)))))
   }
 
   test("SPARK-33884: simplify CaseWhen clauses with (true and false) and (false and true)") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
index ae97d53256837..ffab358721e1a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
@@ -114,7 +114,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
     val expectedBranches = Seq(
       (UnresolvedAttribute("i") < Literal(10)) -> FalseLiteral,
       (UnresolvedAttribute("i") > Literal(40)) -> TrueLiteral)
-    val expectedCond = CaseWhen(expectedBranches)
+    val expectedCond = CaseWhen(expectedBranches, FalseLiteral)
 
     testFilter(originalCond, expectedCond)
     testJoin(originalCond, expectedCond)
@@ -135,7 +135,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
       (UnresolvedAttribute("i") < Literal(10)) -> TrueLiteral,
       (UnresolvedAttribute("i") > Literal(10)) -> FalseLiteral,
       TrueLiteral -> TrueLiteral)
-    val expectedCond = CaseWhen(expectedBranches)
+    val expectedCond = CaseWhen(expectedBranches, FalseLiteral)
 
     testFilter(originalCond, expectedCond)
     testJoin(originalCond, expectedCond)
@@ -238,7 +238,8 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
       FalseLiteral)
     val condition = CaseWhen(Seq((UnresolvedAttribute("i") > Literal(10)) -> branchValue))
     val expectedCond = CaseWhen(Seq(
-      (UnresolvedAttribute("i") > Literal(10), (Literal(2) === nestedCaseWhen) <=> TrueLiteral)))
+      (UnresolvedAttribute("i") > Literal(10), (Literal(2) === nestedCaseWhen) <=> TrueLiteral)),
+      FalseLiteral)
     testFilter(originalCond = condition, expectedCond = expectedCond)
     testJoin(originalCond = condition, expectedCond = expectedCond)
     testDelete(originalCond = condition, expectedCond = expectedCond)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala
index f3edd70bcfb12..2a685bfeefcb2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala
@@ -237,11 +237,13 @@ class SimplifyConditionalSuite extends PlanTest with ExpressionEvalHelper with P
   }
 
   test("SPARK-33847: Remove the CaseWhen if elseValue is empty and other outputs are null") {
-    Seq(GreaterThan('a, 1), GreaterThan(Rand(0), 1)).foreach { condition =>
-      assertEquivalent(
-        CaseWhen((condition, Literal.create(null, IntegerType)) :: Nil, None),
-        Literal.create(null, IntegerType))
-    }
+    assertEquivalent(
+      CaseWhen((GreaterThan('a, 1), Literal.create(null, IntegerType)) :: Nil, None),
+      Literal.create(null, IntegerType))
+
+    assertEquivalent(
+      CaseWhen((GreaterThan(Rand(0), 0.5), Literal.create(null, IntegerType)) :: Nil, None),
+      CaseWhen((GreaterThan(Rand(0), 0.5), Literal.create(null, IntegerType)) :: Nil, None))
   }
 
   test("SPARK-33884: simplify CaseWhen clauses with (true and false) and (false and true)") {

From 2b6836cdc289bdaaf5e9fdcc0d7da05bfcb63cab Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Tue, 29 Dec 2020 12:26:25 -0800
Subject: [PATCH 0911/1009] [SPARK-33936][SQL] Add the version when connector's
 methods and interfaces were updated

### What changes were proposed in this pull request?
Add the `since` tag to methods and interfaces added recently.

### Why are the changes needed?
1. To follow the existing convention for Spark API.
2. To inform devs when Spark API was changed.

### Does this PR introduce _any_ user-facing change?
Should not.

### How was this patch tested?
`dev/scalastyle`

Closes #30966 from MaxGekk/spark-23889-interfaces-followup.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/sql/connector/catalog/MetadataColumn.java  | 2 ++
 .../org/apache/spark/sql/connector/catalog/SupportsDelete.java  | 2 ++
 .../spark/sql/connector/catalog/SupportsMetadataColumns.java    | 2 ++
 .../org/apache/spark/sql/connector/expressions/Expressions.java | 2 ++
 4 files changed, 8 insertions(+)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java
index cdfa082ced317..65f31229764fe 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java
@@ -33,6 +33,8 @@
  * example, a partition value produced by bucket(id, 16) could be exposed by a metadata column. In
  * this case, {@link #transform()} should return a non-null {@link Transform} that produced the
  * metadata column's values.
+ *
+ * @since 3.1.0
  */
 @Evolving
 public interface MetadataColumn {
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDelete.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDelete.java
index 261e5344be7b9..8f51f4e1e835d 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDelete.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDelete.java
@@ -44,6 +44,8 @@ public interface SupportsDelete {
    *
    * @param filters filter expressions, used to select rows to delete when all expressions match
    * @return true if the delete operation can be performed
+   *
+   * @since 3.1.0
    */
   default boolean canDeleteWhere(Filter[] filters) {
     return true;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java
index 208abfc302582..b7b715bd456ab 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java
@@ -37,6 +37,8 @@
  * If a table column and a metadata column have the same name, the metadata column will never be
  * requested. It is recommended that Table implementations reject data column name that conflict
  * with metadata column names.
+ *
+ * @since 3.1.0
  */
 @Evolving
 public interface SupportsMetadataColumns extends Table {
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expressions.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expressions.java
index 984de6258f84b..7b472fa800821 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expressions.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expressions.java
@@ -171,6 +171,8 @@ public static Transform hours(String column) {
    * @param direction direction of the sort
    * @param nullOrder null order of the sort
    * @return a SortOrder
+   *
+   * @since 3.2.0
    */
   public static SortOrder sort(Expression expr, SortDirection direction, NullOrdering nullOrder) {
     return LogicalExpressions.sort(expr, direction, nullOrder);

From 951afc3acc4009e8bb55238db59376891ef091b6 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Wed, 30 Dec 2020 10:20:54 +0900
Subject: [PATCH 0912/1009] [SPARK-33932][SS] Clean up KafkaOffsetReader API
 document

### What changes were proposed in this pull request?

This patch cleans up KafkaOffsetReader API document.

### Why are the changes needed?

KafkaOffsetReader API documents are duplicated among KafkaOffsetReaderConsumer and KafkaOffsetReaderAdmin. It seems to be good if the doc is centralized.

This also adds missing API doc too.

### Does this PR introduce _any_ user-facing change?

No, dev only.

### How was this patch tested?

Doc only.

Closes #30961 from viirya/SPARK-33932.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../sql/kafka010/KafkaOffsetReader.scala      | 66 ++++++++++++++++++-
 .../sql/kafka010/KafkaOffsetReaderAdmin.scala | 47 -------------
 .../kafka010/KafkaOffsetReaderConsumer.scala  | 46 +------------
 3 files changed, 66 insertions(+), 93 deletions(-)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
index b1992c1dc6a0a..546970507a2ed 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
@@ -28,7 +28,6 @@ import org.apache.spark.sql.internal.SQLConf
 /**
  * Base trait to fetch offsets from Kafka. The implementations are
  * [[KafkaOffsetReaderConsumer]] and [[KafkaOffsetReaderAdmin]].
- * Please see the documentation and API description there.
  */
 private[kafka010] trait KafkaOffsetReader {
 
@@ -39,22 +38,87 @@ private[kafka010] trait KafkaOffsetReader {
   // This is needed here because of KafkaContinuousStream
   val driverKafkaParams: ju.Map[String, Object]
 
+  /**
+   * Closes the connection to Kafka, and cleans up state.
+   */
   def close(): Unit
+
+  /**
+   * Fetch the partition offsets for the topic partitions that are indicated
+   * in the [[ConsumerStrategy]] and [[KafkaOffsetRangeLimit]].
+   */
   def fetchPartitionOffsets(
       offsetRangeLimit: KafkaOffsetRangeLimit,
       isStartingOffsets: Boolean): Map[TopicPartition, Long]
+
+  /**
+   * Resolves the specific offsets based on Kafka seek positions.
+   * This method resolves offset value -1 to the latest and -2 to the
+   * earliest Kafka seek position.
+   *
+   * @param partitionOffsets the specific offsets to resolve
+   * @param reportDataLoss callback to either report or log data loss depending on setting
+   */
   def fetchSpecificOffsets(
       partitionOffsets: Map[TopicPartition, Long],
       reportDataLoss: String => Unit): KafkaSourceOffset
+
+  /**
+   * Resolves the specific offsets based on timestamp per topic-partition.
+   * The returned offset for each partition is the earliest offset whose timestamp is greater
+   * than or equal to the given timestamp in the corresponding partition. If the matched offset
+   * doesn't exist, depending on `failsOnNoMatchingOffset` parameter, the offset will be set to
+   * latest or this method throws an error.
+   *
+   * @param partitionTimestamps the timestamp per topic-partition.
+   * @param failsOnNoMatchingOffset whether to fail the query when no matched offset can be found.
+   */
   def fetchSpecificTimestampBasedOffsets(
       partitionTimestamps: Map[TopicPartition, Long],
       failsOnNoMatchingOffset: Boolean): KafkaSourceOffset
+
+  /**
+   * Fetch the earliest offsets for the topic partitions that are indicated
+   * in the [[ConsumerStrategy]].
+   */
   def fetchEarliestOffsets(): Map[TopicPartition, Long]
+
+  /**
+   * Fetch the latest offsets for the topic partitions that are indicated
+   * in the [[ConsumerStrategy]].
+   *
+   * In order to avoid unknown issues, we use the given `knownOffsets` to audit the
+   * latest offsets returned by Kafka. If we find some incorrect offsets (a latest offset is less
+   * than an offset in `knownOffsets`), we will retry at most `maxOffsetFetchAttempts` times. When
+   * a topic is recreated, the latest offsets may be less than offsets in `knownOffsets`. We cannot
+   * distinguish this with issues like KAFKA-7703, so we just return whatever we get from Kafka
+   * after retrying.
+   */
   def fetchLatestOffsets(knownOffsets: Option[PartitionOffsetMap]): PartitionOffsetMap
+
+  /**
+   * Fetch the earliest offsets for specific topic partitions.
+   * The return result may not contain some partitions if they are deleted.
+   */
   def fetchEarliestOffsets(newPartitions: Seq[TopicPartition]): Map[TopicPartition, Long]
+
+  /**
+   * Return the offset ranges for a Kafka batch query. If `minPartitions` is set, this method may
+   * split partitions to respect it. Since offsets can be early and late binding which are evaluated
+   * on the executors, in order to divvy up the partitions we need to perform some substitutions. We
+   * don't want to send exact offsets to the executors, because data may age out before we can
+   * consume the data. This method makes some approximate splitting, and replaces the special offset
+   * values in the final output.
+   */
   def getOffsetRangesFromUnresolvedOffsets(
       startingOffsets: KafkaOffsetRangeLimit,
       endingOffsets: KafkaOffsetRangeLimit): Seq[KafkaOffsetRange]
+
+  /**
+   * Return the offset ranges for a Kafka streaming batch. If `minPartitions` is set, this method
+   * may split partitions to respect it. If any data lost issue is detected, `reportDataLoss` will
+   * be called.
+   */
   def getOffsetRangesFromResolvedOffsets(
       fromPartitionOffsets: PartitionOffsetMap,
       untilPartitionOffsets: PartitionOffsetMap,
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala
index f9a714c37cb9e..6f4cb895f363d 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala
@@ -108,17 +108,10 @@ private[kafka010] class KafkaOffsetReaderAdmin(
 
   override def toString(): String = consumerStrategy.toString
 
-  /**
-   * Closes the connection to Kafka, and cleans up state.
-   */
   override def close(): Unit = {
     stopAdmin()
   }
 
-  /**
-   * Fetch the partition offsets for the topic partitions that are indicated
-   * in the [[ConsumerStrategy]] and [[KafkaOffsetRangeLimit]].
-   */
   override def fetchPartitionOffsets(
       offsetRangeLimit: KafkaOffsetRangeLimit,
       isStartingOffsets: Boolean): Map[TopicPartition, Long] = {
@@ -148,14 +141,6 @@ private[kafka010] class KafkaOffsetReaderAdmin(
     }
   }
 
-  /**
-   * Resolves the specific offsets based on Kafka seek positions.
-   * This method resolves offset value -1 to the latest and -2 to the
-   * earliest Kafka seek position.
-   *
-   * @param partitionOffsets the specific offsets to resolve
-   * @param reportDataLoss callback to either report or log data loss depending on setting
-   */
   override def fetchSpecificOffsets(
       partitionOffsets: Map[TopicPartition, Long],
       reportDataLoss: String => Unit): KafkaSourceOffset = {
@@ -246,10 +231,6 @@ private[kafka010] class KafkaOffsetReaderAdmin(
     KafkaSourceOffset(fetched)
   }
 
-  /**
-   * Fetch the earliest offsets for the topic partitions that are indicated
-   * in the [[ConsumerStrategy]].
-   */
   override def fetchEarliestOffsets(): Map[TopicPartition, Long] = partitionsAssignedToAdmin(
     partitions => {
       val listOffsetsParams = partitions.asScala.map(p => p -> OffsetSpec.earliest()).toMap.asJava
@@ -258,17 +239,6 @@ private[kafka010] class KafkaOffsetReaderAdmin(
       partitionOffsets
     })
 
-  /**
-   * Fetch the latest offsets for the topic partitions that are indicated
-   * in the [[ConsumerStrategy]].
-   *
-   * In order to avoid unknown issues, we use the given `knownOffsets` to audit the
-   * latest offsets returned by Kafka. If we find some incorrect offsets (a latest offset is less
-   * than an offset in `knownOffsets`), we will retry at most `maxOffsetFetchAttempts` times. When
-   * a topic is recreated, the latest offsets may be less than offsets in `knownOffsets`. We cannot
-   * distinguish this with issues like KAFKA-7703, so we just return whatever we get from Kafka
-   * after retrying.
-   */
   override def fetchLatestOffsets(
       knownOffsets: Option[PartitionOffsetMap]): PartitionOffsetMap =
     partitionsAssignedToAdmin { partitions => {
@@ -326,10 +296,6 @@ private[kafka010] class KafkaOffsetReaderAdmin(
     }
   }
 
-  /**
-   * Fetch the earliest offsets for specific topic partitions.
-   * The return result may not contain some partitions if they are deleted.
-   */
   override def fetchEarliestOffsets(
       newPartitions: Seq[TopicPartition]): Map[TopicPartition, Long] = {
     if (newPartitions.isEmpty) {
@@ -349,14 +315,6 @@ private[kafka010] class KafkaOffsetReaderAdmin(
     }
   }
 
-  /**
-   * Return the offset ranges for a Kafka batch query. If `minPartitions` is set, this method may
-   * split partitions to respect it. Since offsets can be early and late binding which are evaluated
-   * on the executors, in order to divvy up the partitions we need to perform some substitutions. We
-   * don't want to send exact offsets to the executors, because data may age out before we can
-   * consume the data. This method makes some approximate splitting, and replaces the special offset
-   * values in the final output.
-   */
   override def getOffsetRangesFromUnresolvedOffsets(
       startingOffsets: KafkaOffsetRangeLimit,
       endingOffsets: KafkaOffsetRangeLimit): Seq[KafkaOffsetRange] = {
@@ -429,11 +387,6 @@ private[kafka010] class KafkaOffsetReaderAdmin(
       .map(_.toString)
   }
 
-  /**
-   * Return the offset ranges for a Kafka streaming batch. If `minPartitions` is set, this method
-   * may split partitions to respect it. If any data lost issue is detected, `reportDataLoss` will
-   * be called.
-   */
   override def getOffsetRangesFromResolvedOffsets(
       fromPartitionOffsets: PartitionOffsetMap,
       untilPartitionOffsets: PartitionOffsetMap,
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala
index eca41c510f1f2..ead819e4c27aa 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala
@@ -116,9 +116,6 @@ private[kafka010] class KafkaOffsetReaderConsumer(
 
   override def toString(): String = consumerStrategy.toString
 
-  /**
-   * Closes the connection to Kafka, and cleans up state.
-   */
   override def close(): Unit = {
     if (_consumer != null) uninterruptibleThreadRunner.runUninterruptibly { stopConsumer() }
     uninterruptibleThreadRunner.shutdown()
@@ -137,10 +134,6 @@ private[kafka010] class KafkaOffsetReaderConsumer(
       partitions.asScala.toSet
   }
 
-  /**
-   * Fetch the partition offsets for the topic partitions that are indicated
-   * in the [[ConsumerStrategy]] and [[KafkaOffsetRangeLimit]].
-   */
   override def fetchPartitionOffsets(
       offsetRangeLimit: KafkaOffsetRangeLimit,
       isStartingOffsets: Boolean): Map[TopicPartition, Long] = {
@@ -170,14 +163,6 @@ private[kafka010] class KafkaOffsetReaderConsumer(
     }
   }
 
-  /**
-   * Resolves the specific offsets based on Kafka seek positions.
-   * This method resolves offset value -1 to the latest and -2 to the
-   * earliest Kafka seek position.
-   *
-   * @param partitionOffsets the specific offsets to resolve
-   * @param reportDataLoss callback to either report or log data loss depending on setting
-   */
   override def fetchSpecificOffsets(
       partitionOffsets: Map[TopicPartition, Long],
       reportDataLoss: String => Unit): KafkaSourceOffset = {
@@ -278,10 +263,6 @@ private[kafka010] class KafkaOffsetReaderConsumer(
     KafkaSourceOffset(fetched)
   }
 
-  /**
-   * Fetch the earliest offsets for the topic partitions that are indicated
-   * in the [[ConsumerStrategy]].
-   */
   override def fetchEarliestOffsets(): Map[TopicPartition, Long] = partitionsAssignedToConsumer(
     partitions => {
       logDebug("Seeking to the beginning")
@@ -293,18 +274,10 @@ private[kafka010] class KafkaOffsetReaderConsumer(
     }, fetchingEarliestOffset = true)
 
   /**
-   * Fetch the latest offsets for the topic partitions that are indicated
-   * in the [[ConsumerStrategy]].
-   *
+   * Specific to `KafkaOffsetReaderConsumer`:
    * Kafka may return earliest offsets when we are requesting latest offsets if `poll` is called
    * right before `seekToEnd` (KAFKA-7703). As a workaround, we will call `position` right after
    * `poll` to wait until the potential offset request triggered by `poll(0)` is done.
-   *
-   * In addition, to avoid other unknown issues, we also use the given `knownOffsets` to audit the
-   * latest offsets returned by Kafka. If we find some incorrect offsets (a latest offset is less
-   * than an offset in `knownOffsets`), we will retry at most `maxOffsetFetchAttempts` times. When
-   * a topic is recreated, the latest offsets may be less than offsets in `knownOffsets`. We cannot
-   * distinguish this with KAFKA-7703, so we just return whatever we get from Kafka after retrying.
    */
   override def fetchLatestOffsets(
       knownOffsets: Option[PartitionOffsetMap]): PartitionOffsetMap =
@@ -364,10 +337,6 @@ private[kafka010] class KafkaOffsetReaderConsumer(
     }
   }
 
-  /**
-   * Fetch the earliest offsets for specific topic partitions.
-   * The return result may not contain some partitions if they are deleted.
-   */
   override def fetchEarliestOffsets(
       newPartitions: Seq[TopicPartition]): Map[TopicPartition, Long] = {
     if (newPartitions.isEmpty) {
@@ -387,14 +356,6 @@ private[kafka010] class KafkaOffsetReaderConsumer(
     }
   }
 
-  /**
-   * Return the offset ranges for a Kafka batch query. If `minPartitions` is set, this method may
-   * split partitions to respect it. Since offsets can be early and late binding which are evaluated
-   * on the executors, in order to divvy up the partitions we need to perform some substitutions. We
-   * don't want to send exact offsets to the executors, because data may age out before we can
-   * consume the data. This method makes some approximate splitting, and replaces the special offset
-   * values in the final output.
-   */
   override def getOffsetRangesFromUnresolvedOffsets(
       startingOffsets: KafkaOffsetRangeLimit,
       endingOffsets: KafkaOffsetRangeLimit): Seq[KafkaOffsetRange] = {
@@ -467,11 +428,6 @@ private[kafka010] class KafkaOffsetReaderConsumer(
       .map(_.toString)
   }
 
-  /**
-   * Return the offset ranges for a Kafka streaming batch. If `minPartitions` is set, this method
-   * may split partitions to respect it. If any data lost issue is detected, `reportDataLoss` will
-   * be called.
-   */
   override def getOffsetRangesFromResolvedOffsets(
       fromPartitionOffsets: PartitionOffsetMap,
       untilPartitionOffsets: PartitionOffsetMap,

From 448494ebcf88b4cd0a89ee933bd042d5e45169a1 Mon Sep 17 00:00:00 2001
From: Holden Karau <hkarau@apple.com>
Date: Wed, 30 Dec 2020 14:06:34 +0900
Subject: [PATCH 0913/1009] [SPARK-33874][K8S] Handle long lived sidecars

### What changes were proposed in this pull request?

For liveness check when checkAllContainers is not set, we check the liveness status of the Spark container if we can find it.

### Why are the changes needed?

Some environments may deploy long lived logs collecting side cars which outlive the Spark application. Just because they remain alive does not mean the Spark executor should keep running.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Extended the existing pod status tests.

Closes #30892 from holdenk/SPARK-33874-handle-long-lived-sidecars.

Lead-authored-by: Holden Karau <hkarau@apple.com>
Co-authored-by: Holden Karau <holden@pigscanfly.ca>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../cluster/k8s/ExecutorPodsSnapshot.scala    | 28 ++++++++++++++++++-
 .../k8s/KubernetesClusterManager.scala        |  4 +++
 ...erministicExecutorPodsSnapshotsStore.scala |  4 +++
 .../k8s/ExecutorLifecycleTestUtils.scala      | 25 +++++++++++++++++
 .../k8s/ExecutorPodsSnapshotSuite.scala       |  4 ++-
 5 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala
index e81d213699e32..71355c7af10fa 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala
@@ -18,6 +18,8 @@ package org.apache.spark.scheduler.cluster.k8s
 
 import java.util.Locale
 
+import scala.collection.JavaConverters._
+
 import io.fabric8.kubernetes.api.model.ContainerStateTerminated
 import io.fabric8.kubernetes.api.model.Pod
 
@@ -39,6 +41,7 @@ private[spark] case class ExecutorPodsSnapshot(executorPods: Map[Long, ExecutorP
 
 object ExecutorPodsSnapshot extends Logging {
   private var shouldCheckAllContainers: Boolean = _
+  private var sparkContainerName: String = _
 
   def apply(executorPods: Seq[Pod]): ExecutorPodsSnapshot = {
     ExecutorPodsSnapshot(toStatesByExecutorId(executorPods))
@@ -50,6 +53,10 @@ object ExecutorPodsSnapshot extends Logging {
     shouldCheckAllContainers = watchAllContainers
   }
 
+  def setSparkContainerName(containerName: String): Unit = {
+    sparkContainerName = containerName
+  }
+
   private def toStatesByExecutorId(executorPods: Seq[Pod]): Map[Long, ExecutorPodState] = {
     executorPods.map { pod =>
       (pod.getMetadata.getLabels.get(SPARK_EXECUTOR_ID_LABEL).toLong, toState(pod))
@@ -65,6 +72,7 @@ object ExecutorPodsSnapshot extends Logging {
         case "pending" =>
           PodPending(pod)
         case "running" =>
+          // If we're checking all containers look for any non-zero exits
           if (shouldCheckAllContainers &&
             "Never" == pod.getSpec.getRestartPolicy &&
             pod.getStatus.getContainerStatuses.stream
@@ -72,7 +80,25 @@ object ExecutorPodsSnapshot extends Logging {
               .anyMatch(t => t != null && t.getExitCode != 0)) {
             PodFailed(pod)
           } else {
-            PodRunning(pod)
+            // Otherwise look for the Spark container
+            val sparkContainerStatusOpt = pod.getStatus.getContainerStatuses.asScala
+              .find(_.getName() == sparkContainerName)
+            sparkContainerStatusOpt match {
+              case Some(sparkContainerStatus) =>
+                sparkContainerStatus.getState.getTerminated match {
+                  case t if t.getExitCode != 0 =>
+                    PodFailed(pod)
+                  case t if t.getExitCode == 0 =>
+                    PodSucceeded(pod)
+                  case _ =>
+                    PodRunning(pod)
+                }
+              // If we can't find the Spark container status, fall back to the pod status
+              case _ =>
+                logWarning(s"Unable to find container ${sparkContainerName} in pod ${pod} " +
+                  "defaulting to entire pod status (running).")
+                PodRunning(pod)
+            }
           }
         case "failed" =>
           PodFailed(pod)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
index 151e98ba17e3b..939a4ee9c7721 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
@@ -25,6 +25,7 @@ import io.fabric8.kubernetes.client.Config
 import org.apache.spark.SparkContext
 import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesUtils, SparkKubernetesClientFactory}
 import org.apache.spark.deploy.k8s.Config._
+import org.apache.spark.deploy.k8s.Constants.DEFAULT_EXECUTOR_CONTAINER_NAME
 import org.apache.spark.internal.Logging
 import org.apache.spark.scheduler.{ExternalClusterManager, SchedulerBackend, TaskScheduler, TaskSchedulerImpl}
 import org.apache.spark.util.{SystemClock, ThreadUtils}
@@ -96,6 +97,9 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit
 
     ExecutorPodsSnapshot.setShouldCheckAllContainers(
       sc.conf.get(KUBERNETES_EXECUTOR_CHECK_ALL_CONTAINERS))
+    val sparkContainerName = sc.conf.get(KUBERNETES_EXECUTOR_PODTEMPLATE_CONTAINER_NAME)
+      .getOrElse(DEFAULT_EXECUTOR_CONTAINER_NAME)
+    ExecutorPodsSnapshot.setSparkContainerName(sparkContainerName)
     val subscribersExecutor = ThreadUtils
       .newDaemonThreadPoolScheduledExecutor(
         "kubernetes-executor-snapshots-subscribers", 2)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/DeterministicExecutorPodsSnapshotsStore.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/DeterministicExecutorPodsSnapshotsStore.scala
index 6e989316310e6..c30efde7b02be 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/DeterministicExecutorPodsSnapshotsStore.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/DeterministicExecutorPodsSnapshotsStore.scala
@@ -19,9 +19,13 @@ package org.apache.spark.scheduler.cluster.k8s
 import io.fabric8.kubernetes.api.model.Pod
 import scala.collection.mutable
 
+import org.apache.spark.deploy.k8s.Constants.DEFAULT_EXECUTOR_CONTAINER_NAME
+
+
 class DeterministicExecutorPodsSnapshotsStore extends ExecutorPodsSnapshotsStore {
 
   ExecutorPodsSnapshot.setShouldCheckAllContainers(false)
+  ExecutorPodsSnapshot.setSparkContainerName(DEFAULT_EXECUTOR_CONTAINER_NAME)
 
   private val snapshotsBuffer = mutable.Buffer.empty[ExecutorPodsSnapshot]
   private val subscribers = mutable.Buffer.empty[Seq[ExecutorPodsSnapshot] => Unit]
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala
index ad79e3a39832b..225278c2aad71 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala
@@ -106,6 +106,31 @@ object ExecutorLifecycleTestUtils {
       .build()
   }
 
+  /**
+   * This creates a pod with a finished executor and running sidecar
+   */
+  def finishedExecutorWithRunningSidecar(
+      executorId: Long, exitCode: Int): Pod = {
+    new PodBuilder(podWithAttachedContainerForId(executorId, DEFAULT_RESOURCE_PROFILE_ID))
+      .editOrNewStatus()
+        .withPhase("running")
+        .addNewContainerStatus()
+          .withNewState()
+            .withNewTerminated()
+              .withExitCode(exitCode)
+            .endTerminated()
+          .endState()
+        .endContainerStatus()
+        .addNewContainerStatus()
+          .withNewState()
+            .withNewRunning()
+            .endRunning()
+          .endState()
+        .endContainerStatus()
+      .endStatus()
+      .build()
+  }
+
   def succeededExecutor(executorId: Long, rpId: Int = DEFAULT_RESOURCE_PROFILE_ID): Pod = {
     new PodBuilder(podWithAttachedContainerForId(executorId, rpId))
       .editOrNewStatus()
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotSuite.scala
index ad12461bfaf8c..8d285abe753d5 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotSuite.scala
@@ -43,7 +43,9 @@ class ExecutorPodsSnapshotSuite extends SparkFunSuite {
       testCase(succeededExecutor(2), PodSucceeded),
       testCase(failedExecutorWithoutDeletion(3), PodFailed),
       testCase(deletedExecutor(4), PodDeleted),
-      testCase(unknownExecutor(5), PodUnknown)
+      testCase(unknownExecutor(5), PodUnknown),
+      testCase(finishedExecutorWithRunningSidecar(6, 0), PodSucceeded),
+      testCase(finishedExecutorWithRunningSidecar(7, 1), PodFailed)
     )
     doTest(testCases)
   }

From 49aa6ebef112bdd4169bbf6b4c85b6712281bac0 Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Wed, 30 Dec 2020 05:28:01 +0000
Subject: [PATCH 0914/1009] [SPARK-32684][SQL][TESTS] Add a test case to check
 if null value is same as Hive's '\\N' in script transformation

### What changes were proposed in this pull request?
In hive script transform serde mode, NULL format default is `\\N`
```
String nullString = tbl.getProperty(
    serdeConstants.SERIALIZATION_NULL_FORMAT, "\\N");
nullSequence = new Text(nullString);
```

I make a mistake that in Spark's code we need to fix and keep same with hive too.  So add some test case to show this issue.

### Why are the changes needed?
add UT

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Added UT

Closes #30946 from AngersZhuuuu/SPARK-32684.

Authored-by: angerszhu <angers.zhu@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../HiveScriptTransformationSuite.scala       | 27 +++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
index 266c526b1a24b..3892caa51eca9 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
@@ -501,4 +501,31 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
       """.stripMargin)
     checkAnswer(query4, identity, Row(null) :: Nil)
   }
+
+  test("SPARK-32684: Script transform hive serde mode null format is same with hive as '\\N'") {
+    val query1 = sql(
+      """
+        |SELECT TRANSFORM(null, null, null)
+        |USING 'cat'
+        |FROM (SELECT 1 AS a) t
+      """.stripMargin)
+    checkAnswer(query1, identity, Row(null, "\\N\t\\N") :: Nil)
+
+    val query2 = sql(
+      """
+        |SELECT TRANSFORM(null, null, null)
+        |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+        |  WITH SERDEPROPERTIES (
+        |   'field.delim' = ','
+        |  )
+        |USING 'cat' AS (a)
+        |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+        |  WITH SERDEPROPERTIES (
+        |   'field.delim' = '&'
+        |  )
+        |FROM (SELECT 1 AS a) t
+      """.stripMargin)
+    checkAnswer(query2, identity, Row("\\N,\\N,\\N") :: Nil)
+
+  }
 }

From 687f465244301112a1f6cafa5d9361b2c7d7b4a5 Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Wed, 30 Dec 2020 06:06:17 +0000
Subject: [PATCH 0915/1009] [SPARK-33890][SQL] Improve the implement of
 trim/trimleft/trimright

### What changes were proposed in this pull request?
The current implement of trim/trimleft/trimright have somewhat redundant.

### Why are the changes needed?
Improve the implement of trim/trimleft/trimright

### Does this PR introduce _any_ user-facing change?
'No'.

### How was this patch tested?
Jenkins test

Closes #30905 from beliefer/SPARK-33890.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: beliefer <beliefer@163.com>
Co-authored-by: Jiaan Geng <beliefer@163.com>
Co-authored-by: Wenchen Fan <cloud0fan@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/stringExpressions.scala       | 202 ++++++------------
 1 file changed, 64 insertions(+), 138 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 6caf4395090f1..9317684d0376f 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -764,6 +764,55 @@ trait String2TrimExpression extends Expression with ImplicitCastInputTypes {
   override def nullable: Boolean = children.exists(_.nullable)
   override def foldable: Boolean = children.forall(_.foldable)
 
+  protected def doEval(srcString: UTF8String): UTF8String
+  protected def doEval(srcString: UTF8String, trimString: UTF8String): UTF8String
+
+  override def eval(input: InternalRow): Any = {
+    val srcString = srcStr.eval(input).asInstanceOf[UTF8String]
+    if (srcString == null) {
+      null
+    } else if (trimStr.isDefined) {
+      doEval(srcString, trimStr.get.eval(input).asInstanceOf[UTF8String])
+    } else {
+      doEval(srcString)
+    }
+  }
+
+  protected val trimMethod: String
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val evals = children.map(_.genCode(ctx))
+    val srcString = evals(0)
+
+    if (evals.length == 1) {
+      ev.copy(code = code"""
+         |${srcString.code}
+         |boolean ${ev.isNull} = false;
+         |UTF8String ${ev.value} = null;
+         |if (${srcString.isNull}) {
+         |  ${ev.isNull} = true;
+         |} else {
+         |  ${ev.value} = ${srcString.value}.$trimMethod();
+         |}""".stripMargin)
+    } else {
+      val trimString = evals(1)
+      ev.copy(code = code"""
+         |${srcString.code}
+         |boolean ${ev.isNull} = false;
+         |UTF8String ${ev.value} = null;
+         |if (${srcString.isNull}) {
+         |  ${ev.isNull} = true;
+         |} else {
+         |  ${trimString.code}
+         |  if (${trimString.isNull}) {
+         |    ${ev.isNull} = true;
+         |  } else {
+         |    ${ev.value} = ${srcString.value}.$trimMethod(${trimString.value});
+         |  }
+         |}""".stripMargin)
+    }
+  }
+
   override def sql: String = if (trimStr.isDefined) {
     s"TRIM($direction ${trimStr.get.sql} FROM ${srcStr.sql})"
   } else {
@@ -840,9 +889,7 @@ object StringTrim {
   """,
   since = "1.5.0",
   group = "string_funcs")
-case class StringTrim(
-    srcStr: Expression,
-    trimStr: Option[Expression] = None)
+case class StringTrim(srcStr: Expression, trimStr: Option[Expression] = None)
   extends String2TrimExpression {
 
   def this(trimStr: Expression, srcStr: Expression) = this(srcStr, Option(trimStr))
@@ -853,51 +900,12 @@ case class StringTrim(
 
   override protected def direction: String = "BOTH"
 
-  override def eval(input: InternalRow): Any = {
-    val srcString = srcStr.eval(input).asInstanceOf[UTF8String]
-    if (srcString == null) {
-      null
-    } else {
-      if (trimStr.isDefined) {
-        srcString.trim(trimStr.get.eval(input).asInstanceOf[UTF8String])
-      } else {
-        srcString.trim()
-      }
-    }
-  }
+  override def doEval(srcString: UTF8String): UTF8String = srcString.trim()
 
-  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val evals = children.map(_.genCode(ctx))
-    val srcString = evals(0)
+  override def doEval(srcString: UTF8String, trimString: UTF8String): UTF8String =
+    srcString.trim(trimString)
 
-    if (evals.length == 1) {
-      ev.copy(evals.map(_.code) :+ code"""
-        boolean ${ev.isNull} = false;
-        UTF8String ${ev.value} = null;
-        if (${srcString.isNull}) {
-          ${ev.isNull} = true;
-        } else {
-          ${ev.value} = ${srcString.value}.trim();
-        }""")
-    } else {
-      val trimString = evals(1)
-      val getTrimFunction =
-        s"""
-        if (${trimString.isNull}) {
-          ${ev.isNull} = true;
-        } else {
-          ${ev.value} = ${srcString.value}.trim(${trimString.value});
-        }"""
-      ev.copy(evals.map(_.code) :+ code"""
-        boolean ${ev.isNull} = false;
-        UTF8String ${ev.value} = null;
-        if (${srcString.isNull}) {
-          ${ev.isNull} = true;
-        } else {
-          $getTrimFunction
-        }""")
-    }
-  }
+  override val trimMethod: String = "trim"
 }
 
 object StringTrimLeft {
@@ -934,9 +942,7 @@ object StringTrimLeft {
   """,
   since = "1.5.0",
   group = "string_funcs")
-case class StringTrimLeft(
-    srcStr: Expression,
-    trimStr: Option[Expression] = None)
+case class StringTrimLeft(srcStr: Expression, trimStr: Option[Expression] = None)
   extends String2TrimExpression {
 
   def this(trimStr: Expression, srcStr: Expression) = this(srcStr, Option(trimStr))
@@ -947,51 +953,12 @@ case class StringTrimLeft(
 
   override protected def direction: String = "LEADING"
 
-  override def eval(input: InternalRow): Any = {
-    val srcString = srcStr.eval(input).asInstanceOf[UTF8String]
-    if (srcString == null) {
-      null
-    } else {
-      if (trimStr.isDefined) {
-        srcString.trimLeft(trimStr.get.eval(input).asInstanceOf[UTF8String])
-      } else {
-        srcString.trimLeft()
-      }
-    }
-  }
+  override def doEval(srcString: UTF8String): UTF8String = srcString.trimLeft()
 
-  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val evals = children.map(_.genCode(ctx))
-    val srcString = evals(0)
+  override def doEval(srcString: UTF8String, trimString: UTF8String): UTF8String =
+    srcString.trimLeft(trimString)
 
-    if (evals.length == 1) {
-      ev.copy(evals.map(_.code) :+ code"""
-        boolean ${ev.isNull} = false;
-        UTF8String ${ev.value} = null;
-        if (${srcString.isNull}) {
-          ${ev.isNull} = true;
-        } else {
-          ${ev.value} = ${srcString.value}.trimLeft();
-        }""")
-    } else {
-      val trimString = evals(1)
-      val getTrimLeftFunction =
-        s"""
-        if (${trimString.isNull}) {
-          ${ev.isNull} = true;
-        } else {
-          ${ev.value} = ${srcString.value}.trimLeft(${trimString.value});
-        }"""
-      ev.copy(evals.map(_.code) :+ code"""
-        boolean ${ev.isNull} = false;
-        UTF8String ${ev.value} = null;
-        if (${srcString.isNull}) {
-          ${ev.isNull} = true;
-        } else {
-          $getTrimLeftFunction
-        }""")
-    }
-  }
+  override val trimMethod: String = "trimLeft"
 }
 
 object StringTrimRight {
@@ -1030,9 +997,7 @@ object StringTrimRight {
   since = "1.5.0",
   group = "string_funcs")
 // scalastyle:on line.size.limit
-case class StringTrimRight(
-    srcStr: Expression,
-    trimStr: Option[Expression] = None)
+case class StringTrimRight(srcStr: Expression, trimStr: Option[Expression] = None)
   extends String2TrimExpression {
 
   def this(trimStr: Expression, srcStr: Expression) = this(srcStr, Option(trimStr))
@@ -1043,51 +1008,12 @@ case class StringTrimRight(
 
   override protected def direction: String = "TRAILING"
 
-  override def eval(input: InternalRow): Any = {
-    val srcString = srcStr.eval(input).asInstanceOf[UTF8String]
-    if (srcString == null) {
-      null
-    } else {
-      if (trimStr.isDefined) {
-        srcString.trimRight(trimStr.get.eval(input).asInstanceOf[UTF8String])
-      } else {
-        srcString.trimRight()
-      }
-    }
-  }
+  override def doEval(srcString: UTF8String): UTF8String = srcString.trimRight()
 
-  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val evals = children.map(_.genCode(ctx))
-    val srcString = evals(0)
+  override def doEval(srcString: UTF8String, trimString: UTF8String): UTF8String =
+    srcString.trimRight(trimString)
 
-    if (evals.length == 1) {
-      ev.copy(evals.map(_.code) :+ code"""
-        boolean ${ev.isNull} = false;
-        UTF8String ${ev.value} = null;
-        if (${srcString.isNull}) {
-          ${ev.isNull} = true;
-        } else {
-          ${ev.value} = ${srcString.value}.trimRight();
-        }""")
-    } else {
-      val trimString = evals(1)
-      val getTrimRightFunction =
-        s"""
-        if (${trimString.isNull}) {
-          ${ev.isNull} = true;
-        } else {
-          ${ev.value} = ${srcString.value}.trimRight(${trimString.value});
-        }"""
-      ev.copy(evals.map(_.code) :+ code"""
-        boolean ${ev.isNull} = false;
-        UTF8String ${ev.value} = null;
-        if (${srcString.isNull}) {
-          ${ev.isNull} = true;
-        } else {
-          $getTrimRightFunction
-        }""")
-    }
-  }
+  override val trimMethod: String = "trimRight"
 }
 
 /**

From 4a669f583089fc704cdc46cff8f1680470a068ee Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Wed, 30 Dec 2020 16:15:41 +0900
Subject: [PATCH 0916/1009] [MINOR][SS] Call fetchEarliestOffsets when it is
 necessary

### What changes were proposed in this pull request?

This minor patch changes two variables where calling `fetchEarliestOffsets` to `lazy` because these values are not always necessary.

### Why are the changes needed?

To avoid unnecessary Kafka RPC calls.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Unit test.

Closes #30969 from viirya/ss-minor3.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala   | 2 +-
 .../main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
index c25b8b4e510a0..d6fd3aeb7f670 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
@@ -164,7 +164,7 @@ private[kafka010] class KafkaMicroBatchStream(
       limit: Long,
       from: PartitionOffsetMap,
       until: PartitionOffsetMap): PartitionOffsetMap = {
-    val fromNew = kafkaOffsetReader.fetchEarliestOffsets(until.keySet.diff(from.keySet).toSeq)
+    lazy val fromNew = kafkaOffsetReader.fetchEarliestOffsets(until.keySet.diff(from.keySet).toSeq)
     val sizes = until.flatMap {
       case (tp, end) =>
         // If begin isn't defined, something's wrong, but let alert logic in getBatch handle it
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index 71ccb5f952f0a..b4e5a8db7d344 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -154,7 +154,7 @@ private[kafka010] class KafkaSource(
       limit: Long,
       from: Map[TopicPartition, Long],
       until: Map[TopicPartition, Long]): Map[TopicPartition, Long] = {
-    val fromNew = kafkaReader.fetchEarliestOffsets(until.keySet.diff(from.keySet).toSeq)
+    lazy val fromNew = kafkaReader.fetchEarliestOffsets(until.keySet.diff(from.keySet).toSeq)
     val sizes = until.flatMap {
       case (tp, end) =>
         // If begin isn't defined, something's wrong, but let alert logic in getBatch handle it

From 403bf55cbef1e4cf50dc868202cccfb867279bbd Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 30 Dec 2020 16:37:23 +0900
Subject: [PATCH 0917/1009] [SPARK-33927][BUILD] Fix Dockerfile for Spark
 release to work

### What changes were proposed in this pull request?

This PR proposes to fix the `Dockerfile` for Spark release.

- Port https://github.com/apache/spark/commit/b135db3b1a5c0b2170e98b97f6160bcf55903799 to `Dockerfile`
- Upgrade Ubuntu 18.04 -> 20.04 (because of porting b135db3)
- Remove Python 2 (because of Ubuntu upgrade)
- Use built-in Python 3.8.5 (because of Ubuntu upgrade)
- Node.js 11 -> 12 (because of Ubuntu upgrade)
- Ruby 2.5 -> 2.7 (because of Ubuntu upgrade)
- Python dependencies and Jekyll + plugins upgrade to the latest as it's used in GitHub Actions build (unrelated to the issue itself)

### Why are the changes needed?

To make a Spark release :-).

### Does this PR introduce _any_ user-facing change?

No, dev-only.

### How was this patch tested?

Manually tested via:

```bash
cd dev/create-release/spark-rm
docker build -t spark-rm --build-arg UID=$UID .
```

```
...
Successfully built 516d7943634f
Successfully tagged spark-rm:latest
```

Closes #30971 from HyukjinKwon/SPARK-33927.

Lead-authored-by: Hyukjin Kwon <gurwls223@apache.org>
Co-authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 dev/create-release/spark-rm/Dockerfile | 32 +++++++++++++-------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile
index 6b32f10490719..8735d1fd23ce2 100644
--- a/dev/create-release/spark-rm/Dockerfile
+++ b/dev/create-release/spark-rm/Dockerfile
@@ -15,16 +15,20 @@
 # limitations under the License.
 #
 
-# Image for building Spark releases. Based on Ubuntu 18.04.
+# Image for building Spark releases. Based on Ubuntu 20.04.
 #
 # Includes:
 # * Java 8
 # * Ivy
-# * Python (2.7.15/3.6.7)
-# * R-base/R-base-dev (4.0.2)
-# * Ruby 2.3 build utilities
+# * Python (3.8.5)
+# * R-base/R-base-dev (4.0.3)
+# * Ruby (2.7.0)
+#
+# You can test it as below:
+#   cd dev/create-release/spark-rm
+#   docker build -t spark-rm --build-arg UID=$UID .
 
-FROM ubuntu:18.04
+FROM ubuntu:20.04
 
 # For apt to be noninteractive
 ENV DEBIAN_FRONTEND noninteractive
@@ -36,8 +40,8 @@ ARG APT_INSTALL="apt-get install --no-install-recommends -y"
 # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
 #   See also https://github.com/sphinx-doc/sphinx/issues/7551.
 #   We should use the latest Sphinx version once this is fixed.
-ARG PIP_PKGS="sphinx==3.0.4 mkdocs==1.0.4 numpy==1.18.1 pydata_sphinx_theme==0.3.1 ipython==7.16.1 nbsphinx==0.7.1 numpydoc==1.1.0"
-ARG GEM_PKGS="jekyll:4.0.0 jekyll-redirect-from:0.16.0 rouge:3.15.0"
+ARG PIP_PKGS="sphinx==3.0.4 mkdocs==1.1.2 numpy==1.19.4 pydata_sphinx_theme==0.4.1 ipython==7.19.0 nbsphinx==0.8.0 numpydoc==1.1.0"
+ARG GEM_PKGS="jekyll:4.2.0 jekyll-redirect-from:0.16.0 rouge:3.26.0"
 
 # Install extra needed repos and refresh.
 # - CRAN repo
@@ -46,7 +50,7 @@ ARG GEM_PKGS="jekyll:4.0.0 jekyll-redirect-from:0.16.0 rouge:3.15.0"
 # This is all in a single "RUN" command so that if anything changes, "apt update" is run to fetch
 # the most current package versions (instead of potentially using old versions cached by docker).
 RUN apt-get clean && apt-get update && $APT_INSTALL gnupg ca-certificates && \
-  echo 'deb https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/' >> /etc/apt/sources.list && \
+  echo 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/' >> /etc/apt/sources.list && \
   gpg --keyserver keyserver.ubuntu.com --recv-key E298A3A825C0D65DFD57CBB651716619E084DAB9 && \
   gpg -a --export E084DAB9 | apt-key add - && \
   apt-get clean && \
@@ -54,7 +58,6 @@ RUN apt-get clean && apt-get update && $APT_INSTALL gnupg ca-certificates && \
   apt-get clean && \
   apt-get update && \
   $APT_INSTALL software-properties-common && \
-  apt-add-repository -y ppa:brightbox/ruby-ng && \
   apt-get update && \
   # Install openjdk 8.
   $APT_INSTALL openjdk-8-jdk && \
@@ -62,26 +65,23 @@ RUN apt-get clean && apt-get update && $APT_INSTALL gnupg ca-certificates && \
   # Install build / source control tools
   $APT_INSTALL curl wget git maven ivy subversion make gcc lsof libffi-dev \
     pandoc pandoc-citeproc libssl-dev libcurl4-openssl-dev libxml2-dev && \
-  curl -sL https://deb.nodesource.com/setup_11.x | bash && \
+  curl -sL https://deb.nodesource.com/setup_12.x | bash && \
   $APT_INSTALL nodejs && \
   # Install needed python packages. Use pip for installing packages (for consistency).
-  $APT_INSTALL libpython3-dev python3-pip python3-setuptools && \
+  $APT_INSTALL python3-pip python3-setuptools && \
   # qpdf is required for CRAN checks to pass.
   $APT_INSTALL qpdf jq && \
-  # Change default python version to python3.
-  update-alternatives --install /usr/bin/python python /usr/bin/python2.7 1 && \
-  update-alternatives --install /usr/bin/python python /usr/bin/python3.6 2 && \
-  update-alternatives --set python /usr/bin/python3.6 && \
   pip3 install $PIP_PKGS && \
   # Install R packages and dependencies used when building.
   # R depends on pandoc*, libssl (which are installed above).
   # Note that PySpark doc generation also needs pandoc due to nbsphinx
   $APT_INSTALL r-base r-base-dev && \
+  $APT_INSTALL libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev && \
   $APT_INSTALL texlive-latex-base texlive texlive-fonts-extra texinfo qpdf && \
   Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')" && \
   Rscript -e "devtools::install_github('jimhester/lintr')" && \
   # Install tools needed to build the documentation.
-  $APT_INSTALL ruby2.5 ruby2.5-dev && \
+  $APT_INSTALL ruby2.7 ruby2.7-dev && \
   gem install --no-document $GEM_PKGS
 
 WORKDIR /opt/spark-rm/output

From 0eb4961ca8d8d20d215768862e0ea2e1f92c46fb Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Wed, 30 Dec 2020 07:52:34 +0000
Subject: [PATCH 0918/1009] [SPARK-33926][SQL] Improve the error message from
 resolving of v1 database name

### What changes were proposed in this pull request?
1. Replace `SessionCatalogAndNamespace` by `DatabaseInSessionCatalog` in resolving database name from v1 session catalog.
2. Throw more precise errors from `DatabaseInSessionCatalog`
3. Fix expected error messages in `v1.ShowTablesSuiteBase`

Closes #30947

### Why are the changes needed?
Current error message "multi-part identifier cannot be empty" may confuse users. And this error message is just a consequence of "incorrectly" applied an implicit class. For example, `SHOW TABLES IN spark_catalog`:

1. Spark cuts off `spark_catalog` from namespaces in `SessionCatalogAndNamespace`, so, `ns == Seq.empty` here: https://github.com/apache/spark/blob/0617dfce7beb34662ab30a607721e9b46e65c21e/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala#L365
2. Then `ns.length != 1` is `true` and Spark tries to raise the exception at https://github.com/apache/spark/blob/0617dfce7beb34662ab30a607721e9b46e65c21e/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala#L367
3.  ... but `ns.quoted` triggers implicit wrapping `Seq.empty` by `MultipartIdentifierHelper`, and hit to the second check `if (parts.isEmpty)` at https://github.com/apache/spark/blob/156704ba0dfcae39a80b8f0ce778b73913db03b2/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala#L120-L122

So, Spark throws the exception at third step instead of `new AnalysisException(s"The database name is not valid: $quoted")` on the second step. And even on the second step, the exception doesn't show actual reason as it is pretty generic.

### Does this PR introduce _any_ user-facing change?
Yes in the case of v1 DDL commands when a database is not specified or nested databases is set.

### How was this patch tested?
By running the affected test suites:
```
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *DDLSuite"
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *ShowTablesSuite"
```

Closes #30963 from MaxGekk/database-in-session-catalog.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../analysis/ResolveSessionCatalog.scala      | 73 +++++++------------
 .../sql/connector/DataSourceV2SQLSuite.scala  |  3 +-
 .../command/v1/ShowTablesSuite.scala          | 10 +--
 3 files changed, 34 insertions(+), 52 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index dec1300d66f35..3c5157bea9470 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import org.apache.spark.sql.SaveMode
+import org.apache.spark.sql.{AnalysisException, SaveMode}
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils}
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -210,23 +210,14 @@ class ResolveSessionCatalog(
     case AlterViewUnsetProperties(ResolvedView(ident, _), keys, ifExists) =>
       AlterTableUnsetPropertiesCommand(ident.asTableIdentifier, keys, ifExists, isView = true)
 
-    case d @ DescribeNamespace(SessionCatalogAndNamespace(_, ns), _) =>
-      if (ns.length != 1) {
-        throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted)
-      }
-      DescribeDatabaseCommand(ns.head, d.extended)
+    case d @ DescribeNamespace(DatabaseInSessionCatalog(db), _) =>
+      DescribeDatabaseCommand(db, d.extended)
 
-    case AlterNamespaceSetProperties(SessionCatalogAndNamespace(_, ns), properties) =>
-      if (ns.length != 1) {
-        throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted)
-      }
-      AlterDatabasePropertiesCommand(ns.head, properties)
+    case AlterNamespaceSetProperties(DatabaseInSessionCatalog(db), properties) =>
+      AlterDatabasePropertiesCommand(db, properties)
 
-    case AlterNamespaceSetLocation(SessionCatalogAndNamespace(_, ns), location) =>
-      if (ns.length != 1) {
-        throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted)
-      }
-      AlterDatabaseSetLocationCommand(ns.head, location)
+    case AlterNamespaceSetLocation(DatabaseInSessionCatalog(db), location) =>
+      AlterDatabaseSetLocationCommand(db, location)
 
     case RenameTable(ResolvedV1TableOrViewIdentifier(oldName), newName, isView) =>
       AlterTableRenameCommand(oldName.asTableIdentifier, newName.asTableIdentifier, isView)
@@ -356,27 +347,18 @@ class ResolveSessionCatalog(
       val newProperties = c.properties -- CatalogV2Util.NAMESPACE_RESERVED_PROPERTIES
       CreateDatabaseCommand(ns.head, c.ifNotExists, location, comment, newProperties)
 
-    case d @ DropNamespace(SessionCatalogAndNamespace(_, ns), _, _) =>
-      if (ns.length != 1) {
-        throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted)
-      }
-      DropDatabaseCommand(ns.head, d.ifExists, d.cascade)
+    case d @ DropNamespace(DatabaseInSessionCatalog(db), _, _) =>
+      DropDatabaseCommand(db, d.ifExists, d.cascade)
 
-    case ShowTables(SessionCatalogAndNamespace(_, ns), pattern) =>
-      if (ns.length != 1) {
-          throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted)
-      }
-      ShowTablesCommand(Some(ns.head), pattern)
+    case ShowTables(DatabaseInSessionCatalog(db), pattern) =>
+      ShowTablesCommand(Some(db), pattern)
 
     case ShowTableExtended(
-        SessionCatalogAndNamespace(_, ns),
+        DatabaseInSessionCatalog(db),
         pattern,
         partitionSpec @ (None | Some(UnresolvedPartitionSpec(_, _)))) =>
-      if (ns.length != 1) {
-        throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted)
-      }
       ShowTablesCommand(
-        databaseName = Some(ns.head),
+        databaseName = Some(db),
         tableIdentifierPattern = Some(pattern),
         isExtended = true,
         partitionSpec.map(_.asInstanceOf[UnresolvedPartitionSpec].spec))
@@ -498,12 +480,7 @@ class ResolveSessionCatalog(
 
     case ShowViews(resolved: ResolvedNamespace, pattern) =>
       resolved match {
-        case SessionCatalogAndNamespace(_, ns) =>
-          // Fallback to v1 ShowViewsCommand since there is no view API in v2 catalog
-          if (ns.length != 1) {
-            throw QueryCompilationErrors.invalidDatabaseNameError(ns.quoted)
-          }
-          ShowViewsCommand(ns.head, pattern)
+        case DatabaseInSessionCatalog(db) => ShowViewsCommand(db, pattern)
         case _ =>
           throw QueryCompilationErrors.externalCatalogNotSupportShowViewsError(resolved)
       }
@@ -662,15 +639,6 @@ class ResolveSessionCatalog(
     }
   }
 
-  object SessionCatalogAndNamespace {
-    def unapply(resolved: ResolvedNamespace): Option[(CatalogPlugin, Seq[String])] =
-      if (isSessionCatalog(resolved.catalog)) {
-        Some(resolved.catalog -> resolved.namespace)
-      } else {
-        None
-      }
-  }
-
   object ResolvedV1TableIdentifier {
     def unapply(resolved: LogicalPlan): Option[Identifier] = resolved match {
       case ResolvedTable(catalog, ident, _: V1Table) if isSessionCatalog(catalog) => Some(ident)
@@ -709,4 +677,17 @@ class ResolveSessionCatalog(
       case _ => false
     }
   }
+
+  private object DatabaseInSessionCatalog {
+    def unapply(resolved: ResolvedNamespace): Option[String] = resolved match {
+      case ResolvedNamespace(catalog, _) if !isSessionCatalog(catalog) => None
+      case ResolvedNamespace(_, Seq()) =>
+        throw new AnalysisException("Database from v1 session catalog is not specified")
+      case ResolvedNamespace(_, Seq(dbName)) => Some(dbName)
+      case _ =>
+        assert(resolved.namespace.length > 1)
+        throw new AnalysisException("Nested databases are not supported by " +
+          s"v1 session catalog: ${resolved.namespace.map(quoteIfNeeded).mkString(".")}")
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index b8d58217efa6e..f821335690aeb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -937,7 +937,8 @@ class DataSourceV2SQLSuite
       sql("SHOW VIEWS FROM a.b")
     }
 
-    assert(exception.getMessage.contains("The database name is not valid: a.b"))
+    assert(exception.getMessage.contains(
+      "Nested databases are not supported by v1 session catalog: a.b"))
   }
 
   test("ShowViews: using v2 catalog, command not supported.") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
index 74298c020415d..5f5bcc8170aa2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
@@ -63,11 +63,11 @@ trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase {
     }
   }
 
-  test("v1 SHOW TABLES only support single-level namespace") {
-    val exception = intercept[AnalysisException] {
+  test("only support single-level namespace") {
+    val errMsg = intercept[AnalysisException] {
       runShowTablesSql("SHOW TABLES FROM a.b", Seq())
-    }
-    assert(exception.getMessage.contains("The database name is not valid: a.b"))
+    }.getMessage
+    assert(errMsg.contains("Nested databases are not supported by v1 session catalog: a.b"))
   }
 
   test("SHOW TABLE EXTENDED from default") {
@@ -116,7 +116,7 @@ trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase {
       val errMsg = intercept[AnalysisException] {
         sql(showTableCmd)
       }.getMessage
-      assert(errMsg.contains("multi-part identifier cannot be empty"))
+      assert(errMsg.contains("Database from v1 session catalog is not specified"))
     }
   }
 }

From 2afd1fb49243e28152b3e581923b49d3aaab0dd7 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Wed, 30 Dec 2020 07:56:34 +0000
Subject: [PATCH 0919/1009] [SPARK-33904][SQL] Recognize `spark_catalog` in
 `saveAsTable()` and `insertInto()`

### What changes were proposed in this pull request?
In the `saveAsTable()` and `insertInto()` methods of `DataFrameWriter`, recognize `spark_catalog` as the default session catalog in table names.

### Why are the changes needed?
1. To simplify writing of unified v1 and v2 tests
2. To improve Spark SQL user experience. `insertInto()` should have feature parity with the `INSERT INTO` sql command. Currently, `insertInto()` fails on a table from a namespace in `spark_catalog`:
```scala
scala> sql("CREATE NAMESPACE spark_catalog.ns")
scala> Seq(0).toDF().write.saveAsTable("spark_catalog.ns.tbl")
org.apache.spark.sql.AnalysisException: Couldn't find a catalog to handle the identifier spark_catalog.ns.tbl.
  at org.apache.spark.sql.DataFrameWriter.saveAsTable(DataFrameWriter.scala:629)
  ... 47 elided
scala> Seq(0).toDF().write.insertInto("spark_catalog.ns.tbl")
org.apache.spark.sql.AnalysisException: Couldn't find a catalog to handle the identifier spark_catalog.ns.tbl.
  at org.apache.spark.sql.DataFrameWriter.insertInto(DataFrameWriter.scala:498)
  ... 47 elided
```
but `INSERT INTO` succeed:
```sql
spark-sql> create table spark_catalog.ns.tbl (c int);
spark-sql> insert into spark_catalog.ns.tbl select 0;
spark-sql> select * from spark_catalog.ns.tbl;
0
```

### Does this PR introduce _any_ user-facing change?
Yes. After the changes for the example above:
```scala
scala> Seq(0).toDF().write.saveAsTable("spark_catalog.ns.tbl")
scala> Seq(1).toDF().write.insertInto("spark_catalog.ns.tbl")
scala> spark.table("spark_catalog.ns.tbl").show(false)
+-----+
|value|
+-----+
|0    |
|1    |
+-----+
```

### How was this patch tested?
By running the affected test suites:
```
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *.ShowPartitionsSuite"
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *.FileFormatWriterSuite"
```

Closes #30919 from MaxGekk/insert-into-spark_catalog.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/connector/catalog/LookupCatalog.scala | 27 ++++++++++---------
 .../command/ShowPartitionsSuiteBase.scala     | 12 ++++++++-
 .../command/v1/ShowPartitionsSuite.scala      | 19 ++++---------
 .../command/v2/ShowPartitionsSuite.scala      | 22 ++++-----------
 .../datasources/FileFormatWriterSuite.scala   | 13 +++++++++
 .../command/ShowPartitionsSuite.scala         | 19 +++++--------
 6 files changed, 55 insertions(+), 57 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala
index d8cdecce0d172..16416faeb2859 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala
@@ -140,19 +140,22 @@ private[sql] trait LookupCatalog extends Logging {
    * For legacy support only. Please use [[CatalogAndIdentifier]] instead on DSv2 code paths.
    */
   object AsTableIdentifier {
-    def unapply(parts: Seq[String]): Option[TableIdentifier] = parts match {
-      case CatalogAndMultipartIdentifier(None, names)
+    def unapply(parts: Seq[String]): Option[TableIdentifier] = {
+      def namesToTableIdentifier(names: Seq[String]): Option[TableIdentifier] = names match {
+        case Seq(name) => Some(TableIdentifier(name))
+        case Seq(database, name) => Some(TableIdentifier(name, Some(database)))
+        case _ => None
+      }
+      parts match {
+        case CatalogAndMultipartIdentifier(None, names)
           if CatalogV2Util.isSessionCatalog(currentCatalog) =>
-        names match {
-          case Seq(name) =>
-            Some(TableIdentifier(name))
-          case Seq(database, name) =>
-            Some(TableIdentifier(name, Some(database)))
-          case _ =>
-            None
-        }
-      case _ =>
-        None
+          namesToTableIdentifier(names)
+        case CatalogAndMultipartIdentifier(Some(catalog), names)
+          if CatalogV2Util.isSessionCatalog(catalog) &&
+             CatalogV2Util.isSessionCatalog(currentCatalog) =>
+          namesToTableIdentifier(names)
+        case _ => None
+      }
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
index 9a942d348a181..29edb8fb51cf8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsSuiteBase.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.command
 
-import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{StringType, StructType}
 
@@ -53,6 +53,16 @@ trait ShowPartitionsSuiteBase extends QueryTest with DDLCommandTestUtils {
     sql(s"ALTER TABLE $table ADD PARTITION(year = 2016, month = 3)")
   }
 
+  protected def createNullPartTable(table: String, format: String): Unit = {
+    import testImplicits._
+    val df = Seq((0, ""), (1, null)).toDF("a", "part")
+    df.write
+      .partitionBy("part")
+      .format(format)
+      .mode(SaveMode.Overwrite)
+      .saveAsTable(table)
+  }
+
   test("show partitions of non-partitioned table") {
     withNamespaceAndTable("ns", "not_partitioned_table") { t =>
       sql(s"CREATE TABLE $t (col1 int) $defaultUsing")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
index 5be5e28d01706..e85d62c51ef45 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
@@ -105,22 +105,13 @@ class ShowPartitionsSuite extends ShowPartitionsSuiteBase with CommandSuiteBase
     }
   }
 
-  test("null and empty string as partition values") {
-    import testImplicits._
-    withTable("t") {
-      val df = Seq((0, ""), (1, null)).toDF("a", "part")
-      df.write
-        .partitionBy("part")
-        .format("parquet")
-        .mode(SaveMode.Overwrite)
-        .saveAsTable("t")
-
+  test("SPARK-33904: null and empty string as partition values") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      createNullPartTable(t, "parquet")
       runShowPartitionsSql(
-        "SHOW PARTITIONS t",
+        s"SHOW PARTITIONS $t",
         Row("part=__HIVE_DEFAULT_PARTITION__") :: Nil)
-      checkAnswer(spark.table("t"),
-        Row(0, null) ::
-        Row(1, null) :: Nil)
+      checkAnswer(spark.table(t), Row(0, null) :: Row(1, null) :: Nil)
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
index 44d8b57ce1596..42f05ee55504a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.command.v2
 
-import org.apache.spark.sql.{AnalysisException, Row, SaveMode}
+import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.execution.command
 
 /**
@@ -38,23 +38,11 @@ class ShowPartitionsSuite extends command.ShowPartitionsSuiteBase with CommandSu
     }
   }
 
-  test("SPARK-33889: null and empty string as partition values") {
-    import testImplicits._
+  test("SPARK-33889, SPARK-33904: null and empty string as partition values") {
     withNamespaceAndTable("ns", "tbl") { t =>
-      val df = Seq((0, ""), (1, null)).toDF("a", "part")
-      df.write
-        .partitionBy("part")
-        .format("parquet")
-        .mode(SaveMode.Overwrite)
-        .saveAsTable(t)
-
-      runShowPartitionsSql(
-        s"SHOW PARTITIONS $t",
-        Row("part=") ::
-        Row("part=null") :: Nil)
-      checkAnswer(spark.table(t),
-        Row(0, "") ::
-        Row(1, null) :: Nil)
+      createNullPartTable(t, "parquet")
+      runShowPartitionsSql(s"SHOW PARTITIONS $t", Row("part=") :: Row("part=null") :: Nil)
+      checkAnswer(spark.table(t), Row(0, "") :: Row(1, null) :: Nil)
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala
index ce511842e6356..f492fc653653e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala
@@ -61,4 +61,17 @@ class FileFormatWriterSuite
       checkAnswer(spark.table("t2").sort("id"), Seq(Row(0, null), Row(1, null), Row(2, null)))
     }
   }
+
+  test("SPARK-33904: save and insert into a table in a namespace of spark_catalog") {
+    val ns = "spark_catalog.ns"
+    withNamespace(ns) {
+      spark.sql(s"CREATE NAMESPACE $ns")
+      val t = s"$ns.tbl"
+      withTable(t) {
+        spark.range(1).write.saveAsTable(t)
+        Seq(100).toDF().write.insertInto(t)
+        checkAnswer(spark.table(t), Seq(Row(0), Row(100)))
+      }
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala
index 904c6c40b938f..ded53cc3ea7f0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowPartitionsSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.hive.execution.command
 
-import org.apache.spark.sql.{Row, SaveMode}
+import org.apache.spark.sql.Row
 import org.apache.spark.sql.execution.command.v1
 
 /**
@@ -25,21 +25,14 @@ import org.apache.spark.sql.execution.command.v1
  * V1 Hive external table catalog.
  */
 class ShowPartitionsSuite extends v1.ShowPartitionsSuiteBase with CommandSuiteBase {
-  test("null and empty string as partition values") {
-    import testImplicits._
+  test("SPARK-33904: null and empty string as partition values") {
     withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
-      withTable("t") {
-        val df = Seq((0, ""), (1, null)).toDF("a", "part")
-        df.write
-          .partitionBy("part")
-          .format("hive")
-          .mode(SaveMode.Overwrite)
-          .saveAsTable("t")
-
+      withNamespaceAndTable("ns", "tbl") { t =>
+        createNullPartTable(t, "hive")
         runShowPartitionsSql(
-          "SHOW PARTITIONS t",
+          s"SHOW PARTITIONS $t",
           Row("part=__HIVE_DEFAULT_PARTITION__") :: Nil)
-        checkAnswer(spark.table("t"),
+        checkAnswer(spark.table(t),
           Row(0, "__HIVE_DEFAULT_PARTITION__") ::
           Row(1, "__HIVE_DEFAULT_PARTITION__") :: Nil)
       }

From ba974ea8e4cc8075056682c2badab5ca64b90047 Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Wed, 30 Dec 2020 13:14:31 +0000
Subject: [PATCH 0920/1009] [SPARK-30789][SQL] Support (IGNORE | RESPECT) NULLS
 for LEAD/LAG/NTH_VALUE/FIRST_VALUE/LAST_VALUE

### What changes were proposed in this pull request?
All of `LEAD`/`LAG`/`NTH_VALUE`/`FIRST_VALUE`/`LAST_VALUE` should support IGNORE NULLS | RESPECT NULLS. For example:
```
LEAD (value_expr [, offset ])
[ IGNORE NULLS | RESPECT NULLS ]
OVER ( [ PARTITION BY window_partition ] ORDER BY window_ordering )
```

```
LAG (value_expr [, offset ])
[ IGNORE NULLS | RESPECT NULLS ]
OVER ( [ PARTITION BY window_partition ] ORDER BY window_ordering )
```

```
NTH_VALUE (expr, offset)
[ IGNORE NULLS | RESPECT NULLS ]
OVER
( [ PARTITION BY window_partition ]
[ ORDER BY window_ordering
 frame_clause ] )
```

The mainstream database or engine supports this syntax contains:
**Oracle**
https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/NTH_VALUE.html#GUID-F8A0E88C-67E5-4AA6-9515-95D03A7F9EA0

**Redshift**
https://docs.aws.amazon.com/redshift/latest/dg/r_WF_NTH.html

**Presto**
https://prestodb.io/docs/current/functions/window.html

**DB2**
https://www.ibm.com/support/knowledgecenter/SSGU8G_14.1.0/com.ibm.sqls.doc/ids_sqs_1513.htm

**Teradata**
https://docs.teradata.com/r/756LNiPSFdY~4JcCCcR5Cw/GjCT6l7trjkIEjt~7Dhx4w

**Snowflake**
https://docs.snowflake.com/en/sql-reference/functions/lead.html
https://docs.snowflake.com/en/sql-reference/functions/lag.html
https://docs.snowflake.com/en/sql-reference/functions/nth_value.html
https://docs.snowflake.com/en/sql-reference/functions/first_value.html
https://docs.snowflake.com/en/sql-reference/functions/last_value.html

**Exasol**
https://docs.exasol.com/sql_references/functions/alphabeticallistfunctions/lead.htm
https://docs.exasol.com/sql_references/functions/alphabeticallistfunctions/lag.htm
https://docs.exasol.com/sql_references/functions/alphabeticallistfunctions/nth_value.htm
https://docs.exasol.com/sql_references/functions/alphabeticallistfunctions/first_value.htm
https://docs.exasol.com/sql_references/functions/alphabeticallistfunctions/last_value.htm

### Why are the changes needed?
Support `(IGNORE | RESPECT) NULLS` for `LEAD`/`LAG`/`NTH_VALUE`/`FIRST_VALUE`/`LAST_VALUE `is very useful.

### Does this PR introduce _any_ user-facing change?
Yes.

### How was this patch tested?
Jenkins test

Closes #30943 from beliefer/SPARK-30789.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: beliefer <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-ref-ansi-compliance.md               |   1 +
 .../spark/sql/catalyst/parser/SqlBase.g4      |   6 +-
 .../spark/sql/QueryCompilationErrors.scala    |   4 +
 .../sql/catalyst/analysis/Analyzer.scala      |  45 ++-
 .../analysis/higherOrderFunctions.scala       |   6 +-
 .../sql/catalyst/analysis/unresolved.scala    |   3 +-
 .../sql/catalyst/parser/AstBuilder.scala      |   4 +-
 .../analysis/AnalysisErrorSuite.scala         |  20 ++
 .../resources/sql-tests/inputs/window.sql     | 148 ++++++++-
 .../sql-tests/results/window.sql.out          | 280 +++++++++++++++++-
 10 files changed, 508 insertions(+), 9 deletions(-)

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index 8201fd707275d..16059a5a08e9a 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -363,6 +363,7 @@ Below is a list of all the keywords in Spark SQL.
 |REPAIR|non-reserved|non-reserved|non-reserved|
 |REPLACE|non-reserved|non-reserved|non-reserved|
 |RESET|non-reserved|non-reserved|non-reserved|
+|RESPECT|non-reserved|non-reserved|non-reserved|
 |RESTRICT|non-reserved|non-reserved|non-reserved|
 |REVOKE|non-reserved|non-reserved|reserved|
 |RIGHT|reserved|strict-non-reserved|reserved|
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index d2908a555858d..ab4b7833503fb 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -803,7 +803,8 @@ primaryExpression
     | '(' namedExpression (',' namedExpression)+ ')'                                           #rowConstructor
     | '(' query ')'                                                                            #subqueryExpression
     | functionName '(' (setQuantifier? argument+=expression (',' argument+=expression)*)? ')'
-       (FILTER '(' WHERE where=booleanExpression ')')? (OVER windowSpec)?                      #functionCall
+       (FILTER '(' WHERE where=booleanExpression ')')?
+       (nullsOption=(IGNORE | RESPECT) NULLS)? ( OVER windowSpec)?                             #functionCall
     | identifier '->' expression                                                               #lambda
     | '(' identifier (',' identifier)+ ')' '->' expression                                     #lambda
     | value=primaryExpression '[' index=valueExpression ']'                                    #subscript
@@ -1143,6 +1144,7 @@ ansiNonReserved
     | REPAIR
     | REPLACE
     | RESET
+    | RESPECT
     | RESTRICT
     | REVOKE
     | RLIKE
@@ -1397,6 +1399,7 @@ nonReserved
     | REPAIR
     | REPLACE
     | RESET
+    | RESPECT
     | RESTRICT
     | REVOKE
     | RLIKE
@@ -1651,6 +1654,7 @@ RENAME: 'RENAME';
 REPAIR: 'REPAIR';
 REPLACE: 'REPLACE';
 RESET: 'RESET';
+RESPECT: 'RESPECT';
 RESTRICT: 'RESTRICT';
 REVOKE: 'REVOKE';
 RIGHT: 'RIGHT';
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala
index 51a2cb0cb4d92..e4a1f3f8efeee 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala
@@ -268,6 +268,10 @@ object QueryCompilationErrors {
       s"but $prettyName is not an aggregate function")
   }
 
+  def ignoreNullsWithUnsupportedFunctionError(prettyName: String): Throwable = {
+    new AnalysisException(s"Function $prettyName does not support IGNORE NULLS")
+  }
+
   def nonDeterministicFilterInAggregateError(): Throwable = {
     new AnalysisException("FILTER expression is non-deterministic, " +
       "it cannot be used in aggregate functions")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 8af692d9fe008..5e86368f6f4b3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst._
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.encoders.OuterScopes
-import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.{FrameLessOffsetWindowFunction, _}
 import org.apache.spark.sql.catalyst.expressions.SubExprUtils._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.expressions.objects._
@@ -2113,7 +2113,7 @@ class Analyzer(override val catalogManager: CatalogManager)
                   name, other.getClass.getCanonicalName)
               }
             }
-          case u @ UnresolvedFunction(funcId, arguments, isDistinct, filter) =>
+          case u @ UnresolvedFunction(funcId, arguments, isDistinct, filter, ignoreNulls) =>
             withPosition(u) {
               v1SessionCatalog.lookupFunction(funcId, arguments) match {
                 // AggregateWindowFunctions are AggregateFunctions that can only be evaluated within
@@ -2123,19 +2123,58 @@ class Analyzer(override val catalogManager: CatalogManager)
                   if (isDistinct || filter.isDefined) {
                     throw QueryCompilationErrors.distinctOrFilterOnlyWithAggregateFunctionError(
                       wf.prettyName)
+                  } else if (ignoreNulls) {
+                    wf match {
+                      case nthValue: NthValue =>
+                        nthValue.copy(ignoreNulls = ignoreNulls)
+                      case _ =>
+                        throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError(
+                          wf.prettyName)
+                    }
                   } else {
                     wf
                   }
+                case owf: FrameLessOffsetWindowFunction =>
+                  if (isDistinct || filter.isDefined) {
+                    throw QueryCompilationErrors.distinctOrFilterOnlyWithAggregateFunctionError(
+                      owf.prettyName)
+                  } else if (ignoreNulls) {
+                    owf match {
+                      case lead: Lead =>
+                        lead.copy(ignoreNulls = ignoreNulls)
+                      case lag: Lag =>
+                        lag.copy(ignoreNulls = ignoreNulls)
+                      case _ =>
+                        throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError(
+                          owf.prettyName)
+                    }
+                  } else {
+                    owf
+                  }
                 // We get an aggregate function, we need to wrap it in an AggregateExpression.
                 case agg: AggregateFunction =>
                   if (filter.isDefined && !filter.get.deterministic) {
                     throw QueryCompilationErrors.nonDeterministicFilterInAggregateError
                   }
-                  AggregateExpression(agg, Complete, isDistinct, filter)
+                  if (ignoreNulls) {
+                    val aggFunc = agg match {
+                      case first: First => first.copy(ignoreNulls = ignoreNulls)
+                      case last: Last => last.copy(ignoreNulls = ignoreNulls)
+                      case _ =>
+                        throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError(
+                          agg.prettyName)
+                    }
+                    AggregateExpression(aggFunc, Complete, isDistinct, filter)
+                  } else {
+                    AggregateExpression(agg, Complete, isDistinct, filter)
+                  }
                 // This function is not an aggregate function, just return the resolved one.
                 case other if (isDistinct || filter.isDefined) =>
                   throw QueryCompilationErrors.distinctOrFilterOnlyWithAggregateFunctionError(
                     other.prettyName)
+                case other if (ignoreNulls) =>
+                  throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError(
+                    other.prettyName)
                 case e: String2TrimExpression if arguments.size == 2 =>
                   if (trimWarningEnabled.get) {
                     log.warn("Two-parameter TRIM/LTRIM/RTRIM function signatures are deprecated." +
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
index 2fa6bf0acea67..6115b4ed5a117 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
@@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.catalog.SessionCatalog
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.types.DataType
 
 /**
@@ -32,13 +33,16 @@ import org.apache.spark.sql.types.DataType
 case class ResolveHigherOrderFunctions(catalog: SessionCatalog) extends Rule[LogicalPlan] {
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveExpressions {
-    case u @ UnresolvedFunction(fn, children, false, filter)
+    case u @ UnresolvedFunction(fn, children, false, filter, ignoreNulls)
         if hasLambdaAndResolvedArguments(children) =>
       withPosition(u) {
         catalog.lookupFunction(fn, children) match {
           case func: HigherOrderFunction =>
             filter.foreach(_.failAnalysis("FILTER predicate specified, " +
               s"but ${func.prettyName} is not an aggregate function"))
+            if (ignoreNulls) {
+              throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError(func.prettyName)
+            }
             func
           case other => other.failAnalysis(
             "A lambda function should only be used in a higher order function. However, " +
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 84614886348aa..afeef3f16b289 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -258,7 +258,8 @@ case class UnresolvedFunction(
     name: FunctionIdentifier,
     arguments: Seq[Expression],
     isDistinct: Boolean,
-    filter: Option[Expression] = None)
+    filter: Option[Expression] = None,
+    ignoreNulls: Boolean = false)
   extends Expression with Unevaluable {
 
   override def children: Seq[Expression] = arguments ++ filter.toSeq
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 771bb5a1708b0..a2f59b914a10d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1697,8 +1697,10 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
         expressions
     }
     val filter = Option(ctx.where).map(expression(_))
+    val ignoreNulls =
+      Option(ctx.nullsOption).map(_.getType == SqlBaseParser.IGNORE).getOrElse(false)
     val function = UnresolvedFunction(
-      getFunctionIdentifier(ctx.functionName), arguments, isDistinct, filter)
+      getFunctionIdentifier(ctx.functionName), arguments, isDistinct, filter, ignoreNulls)
 
     // Check if the function is evaluated in a windowed context.
     ctx.windowSpec match {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index 004d577c7ad52..ec2a8a41bf38c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -212,6 +212,26 @@ class AnalysisErrorSuite extends AnalysisTest {
     CatalystSqlParser.parsePlan("SELECT count(a) FILTER (WHERE rand(int(c)) > 1) FROM TaBlE2"),
     "FILTER expression is non-deterministic, it cannot be used in aggregate functions" :: Nil)
 
+  errorTest(
+    "function don't support ignore nulls",
+    CatalystSqlParser.parsePlan("SELECT hex(a) IGNORE NULLS FROM TaBlE2"),
+    "Function hex does not support IGNORE NULLS" :: Nil)
+
+  errorTest(
+    "some window function don't support ignore nulls",
+    CatalystSqlParser.parsePlan("SELECT percent_rank(a) IGNORE NULLS FROM TaBlE2"),
+    "Function percent_rank does not support IGNORE NULLS" :: Nil)
+
+  errorTest(
+    "aggregate function don't support ignore nulls",
+    CatalystSqlParser.parsePlan("SELECT count(a) IGNORE NULLS FROM TaBlE2"),
+    "Function count does not support IGNORE NULLS" :: Nil)
+
+  errorTest(
+    "higher order function don't support ignore nulls",
+    CatalystSqlParser.parsePlan("SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x) " +
+      "IGNORE NULLS"), "Function aggregate does not support IGNORE NULLS" :: Nil)
+
   errorTest(
     "nested aggregate functions",
     testRelation.groupBy($"a")(
diff --git a/sql/core/src/test/resources/sql-tests/inputs/window.sql b/sql/core/src/test/resources/sql-tests/inputs/window.sql
index f0336d764bdea..56f2b0b20c165 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/window.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/window.sql
@@ -36,6 +36,18 @@ CREATE OR REPLACE TEMPORARY VIEW basic_pays AS SELECT * FROM VALUES
 ('Barry Jones','SCM',10586)
 AS basic_pays(employee_name, department, salary);
 
+CREATE OR REPLACE TEMPORARY VIEW test_ignore_null AS SELECT * FROM VALUES
+('a', 0, null),
+('a', 1, 'x'),
+('b', 2, null),
+('c', 3, null),
+('a', 4, 'y'),
+('b', 5, null),
+('a', 6, 'z'),
+('a', 7, 'v'),
+('a', 8, null)
+AS test_ignore_null(content, id, v);
+
 -- RowsBetween
 SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val ROWS CURRENT ROW) FROM testData
 ORDER BY cate, val;
@@ -262,4 +274,138 @@ FROM
 WINDOW
     w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING),
     w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING)
-ORDER BY salary DESC;
\ No newline at end of file
+ORDER BY salary DESC;
+
+SELECT
+    content,
+    id,
+    v,
+    lead(v, 0) IGNORE NULLS OVER w lead_0,
+    lead(v, 1) IGNORE NULLS OVER w lead_1,
+    lead(v, 2) IGNORE NULLS OVER w lead_2,
+    lead(v, 3) IGNORE NULLS OVER w lead_3,
+    lag(v, 0) IGNORE NULLS OVER w lag_0,
+    lag(v, 1) IGNORE NULLS OVER w lag_1,
+    lag(v, 2) IGNORE NULLS OVER w lag_2,
+    lag(v, 3) IGNORE NULLS OVER w lag_3,
+    nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
+    nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
+    nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
+    first_value(v) IGNORE NULLS OVER w first_value,
+    last_value(v) IGNORE NULLS OVER w last_value
+FROM
+    test_ignore_null
+WINDOW w AS (ORDER BY id)
+ORDER BY id;
+
+SELECT
+    content,
+    id,
+    v,
+    nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
+    nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
+    nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
+    first_value(v) IGNORE NULLS OVER w first_value,
+    last_value(v) IGNORE NULLS OVER w last_value
+FROM
+    test_ignore_null
+WINDOW w AS (ORDER BY id RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
+ORDER BY id;
+
+SELECT
+    content,
+    id,
+    v,
+    nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
+    nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
+    nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
+    first_value(v) IGNORE NULLS OVER w first_value,
+    last_value(v) IGNORE NULLS OVER w last_value
+FROM
+    test_ignore_null
+WINDOW w AS (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
+ORDER BY id;
+
+SELECT
+    content,
+    id,
+    v,
+    nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
+    nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
+    nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
+    first_value(v) IGNORE NULLS OVER w first_value,
+    last_value(v) IGNORE NULLS OVER w last_value
+FROM
+    test_ignore_null
+WINDOW w AS (ORDER BY id RANGE BETWEEN 2 PRECEDING AND 2 FOLLOWING)
+ORDER BY id;
+
+SELECT
+    content,
+    id,
+    v,
+    nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
+    nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
+    nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
+    first_value(v) IGNORE NULLS OVER w first_value,
+    last_value(v) IGNORE NULLS OVER w last_value
+FROM
+    test_ignore_null
+WINDOW w AS (ORDER BY id ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)
+ORDER BY id;
+
+SELECT
+    content,
+    id,
+    v,
+    nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
+    nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
+    nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
+    first_value(v) IGNORE NULLS OVER w first_value,
+    last_value(v) IGNORE NULLS OVER w last_value
+FROM
+    test_ignore_null
+WINDOW w AS (ORDER BY id RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+ORDER BY id;
+
+SELECT
+    content,
+    id,
+    v,
+    nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
+    nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
+    nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
+    first_value(v) IGNORE NULLS OVER w first_value,
+    last_value(v) IGNORE NULLS OVER w last_value
+FROM
+    test_ignore_null
+WINDOW w AS (ORDER BY id RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
+ORDER BY id;
+
+SELECT
+    content,
+    id,
+    v,
+    nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
+    nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
+    nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
+    first_value(v) IGNORE NULLS OVER w first_value,
+    last_value(v) IGNORE NULLS OVER w last_value
+FROM
+    test_ignore_null
+WINDOW w AS (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
+ORDER BY id;
+
+SELECT
+    content,
+    id,
+    v,
+    nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
+    nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
+    nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
+    first_value(v) IGNORE NULLS OVER w first_value,
+    last_value(v) IGNORE NULLS OVER w last_value
+FROM
+    test_ignore_null
+WINDOW w AS (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
+ORDER BY id;
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out
index c904c43ac84ed..e3fd0cd77cb6f 100644
--- a/sql/core/src/test/resources/sql-tests/results/window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 36
+-- Number of queries: 46
 
 
 -- !query
@@ -46,6 +46,24 @@ struct<>
 
 
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW test_ignore_null AS SELECT * FROM VALUES
+('a', 0, null),
+('a', 1, 'x'),
+('b', 2, null),
+('c', 3, null),
+('a', 4, 'y'),
+('b', 5, null),
+('a', 6, 'z'),
+('a', 7, 'v'),
+('a', 8, null)
+AS test_ignore_null(content, id, v)
+-- !query schema
+struct<>
+-- !query output
+
+
+
 -- !query
 SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val ROWS CURRENT ROW) FROM testData
 ORDER BY cate, val
@@ -776,3 +794,263 @@ WINDOW
     w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING),
     w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING)
 ORDER BY salary DESC
+
+
+-- !query
+SELECT
+    content,
+    id,
+    v,
+    lead(v, 0) IGNORE NULLS OVER w lead_0,
+    lead(v, 1) IGNORE NULLS OVER w lead_1,
+    lead(v, 2) IGNORE NULLS OVER w lead_2,
+    lead(v, 3) IGNORE NULLS OVER w lead_3,
+    lag(v, 0) IGNORE NULLS OVER w lag_0,
+    lag(v, 1) IGNORE NULLS OVER w lag_1,
+    lag(v, 2) IGNORE NULLS OVER w lag_2,
+    lag(v, 3) IGNORE NULLS OVER w lag_3,
+    nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
+    nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
+    nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
+    first_value(v) IGNORE NULLS OVER w first_value,
+    last_value(v) IGNORE NULLS OVER w last_value
+FROM
+    test_ignore_null
+WINDOW w AS (ORDER BY id)
+ORDER BY id
+-- !query schema
+struct<content:string,id:int,v:string,lead_0:string,lead_1:string,lead_2:string,lead_3:string,lag_0:string,lag_1:string,lag_2:string,lag_3:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string>
+-- !query output
+a	0	NULL	NULL	x	y	z	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL
+a	1	x	x	y	z	v	x	NULL	NULL	NULL	x	NULL	NULL	x	x
+b	2	NULL	NULL	y	z	v	NULL	x	NULL	NULL	x	NULL	NULL	x	x
+c	3	NULL	NULL	y	z	v	NULL	x	NULL	NULL	x	NULL	NULL	x	x
+a	4	y	y	z	v	NULL	y	x	NULL	NULL	x	y	NULL	x	y
+b	5	NULL	NULL	z	v	NULL	NULL	y	x	NULL	x	y	NULL	x	y
+a	6	z	z	v	NULL	NULL	z	y	x	NULL	x	y	z	x	z
+a	7	v	v	NULL	NULL	NULL	v	z	y	x	x	y	z	x	v
+a	8	NULL	NULL	NULL	NULL	NULL	NULL	v	z	y	x	y	z	x	v
+
+
+-- !query
+SELECT
+    content,
+    id,
+    v,
+    nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
+    nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
+    nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
+    first_value(v) IGNORE NULLS OVER w first_value,
+    last_value(v) IGNORE NULLS OVER w last_value
+FROM
+    test_ignore_null
+WINDOW w AS (ORDER BY id RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
+ORDER BY id
+-- !query schema
+struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string>
+-- !query output
+a	0	NULL	NULL	NULL	NULL	NULL	NULL
+a	1	x	x	NULL	NULL	x	x
+b	2	NULL	x	NULL	NULL	x	x
+c	3	NULL	x	NULL	NULL	x	x
+a	4	y	x	y	NULL	x	y
+b	5	NULL	x	y	NULL	x	y
+a	6	z	x	y	z	x	z
+a	7	v	x	y	z	x	v
+a	8	NULL	x	y	z	x	v
+
+
+-- !query
+SELECT
+    content,
+    id,
+    v,
+    nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
+    nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
+    nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
+    first_value(v) IGNORE NULLS OVER w first_value,
+    last_value(v) IGNORE NULLS OVER w last_value
+FROM
+    test_ignore_null
+WINDOW w AS (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
+ORDER BY id
+-- !query schema
+struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string>
+-- !query output
+a	0	NULL	NULL	NULL	NULL	NULL	NULL
+a	1	x	x	NULL	NULL	x	x
+b	2	NULL	x	NULL	NULL	x	x
+c	3	NULL	x	NULL	NULL	x	x
+a	4	y	x	y	NULL	x	y
+b	5	NULL	x	y	NULL	x	y
+a	6	z	x	y	z	x	z
+a	7	v	x	y	z	x	v
+a	8	NULL	x	y	z	x	v
+
+
+-- !query
+SELECT
+    content,
+    id,
+    v,
+    nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
+    nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
+    nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
+    first_value(v) IGNORE NULLS OVER w first_value,
+    last_value(v) IGNORE NULLS OVER w last_value
+FROM
+    test_ignore_null
+WINDOW w AS (ORDER BY id RANGE BETWEEN 2 PRECEDING AND 2 FOLLOWING)
+ORDER BY id
+-- !query schema
+struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string>
+-- !query output
+a	0	NULL	x	NULL	NULL	x	x
+a	1	x	x	NULL	NULL	x	x
+b	2	NULL	x	y	NULL	x	y
+c	3	NULL	x	y	NULL	x	y
+a	4	y	y	z	NULL	y	z
+b	5	NULL	y	z	v	y	v
+a	6	z	y	z	v	y	v
+a	7	v	z	v	NULL	z	v
+a	8	NULL	z	v	NULL	z	v
+
+
+-- !query
+SELECT
+    content,
+    id,
+    v,
+    nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
+    nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
+    nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
+    first_value(v) IGNORE NULLS OVER w first_value,
+    last_value(v) IGNORE NULLS OVER w last_value
+FROM
+    test_ignore_null
+WINDOW w AS (ORDER BY id ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)
+ORDER BY id
+-- !query schema
+struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string>
+-- !query output
+a	0	NULL	x	NULL	NULL	x	x
+a	1	x	x	NULL	NULL	x	x
+b	2	NULL	x	y	NULL	x	y
+c	3	NULL	x	y	NULL	x	y
+a	4	y	y	z	NULL	y	z
+b	5	NULL	y	z	v	y	v
+a	6	z	y	z	v	y	v
+a	7	v	z	v	NULL	z	v
+a	8	NULL	z	v	NULL	z	v
+
+
+-- !query
+SELECT
+    content,
+    id,
+    v,
+    nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
+    nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
+    nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
+    first_value(v) IGNORE NULLS OVER w first_value,
+    last_value(v) IGNORE NULLS OVER w last_value
+FROM
+    test_ignore_null
+WINDOW w AS (ORDER BY id RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
+ORDER BY id
+-- !query schema
+struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string>
+-- !query output
+a	0	NULL	x	y	z	x	v
+a	1	x	x	y	z	x	v
+b	2	NULL	y	z	v	y	v
+c	3	NULL	y	z	v	y	v
+a	4	y	y	z	v	y	v
+b	5	NULL	z	v	NULL	z	v
+a	6	z	z	v	NULL	z	v
+a	7	v	v	NULL	NULL	v	v
+a	8	NULL	NULL	NULL	NULL	NULL	NULL
+
+
+-- !query
+SELECT
+    content,
+    id,
+    v,
+    nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
+    nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
+    nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
+    first_value(v) IGNORE NULLS OVER w first_value,
+    last_value(v) IGNORE NULLS OVER w last_value
+FROM
+    test_ignore_null
+WINDOW w AS (ORDER BY id RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
+ORDER BY id
+-- !query schema
+struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string>
+-- !query output
+a	0	NULL	x	y	z	x	v
+a	1	x	x	y	z	x	v
+b	2	NULL	x	y	z	x	v
+c	3	NULL	x	y	z	x	v
+a	4	y	x	y	z	x	v
+b	5	NULL	x	y	z	x	v
+a	6	z	x	y	z	x	v
+a	7	v	x	y	z	x	v
+a	8	NULL	x	y	z	x	v
+
+
+-- !query
+SELECT
+    content,
+    id,
+    v,
+    nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
+    nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
+    nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
+    first_value(v) IGNORE NULLS OVER w first_value,
+    last_value(v) IGNORE NULLS OVER w last_value
+FROM
+    test_ignore_null
+WINDOW w AS (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
+ORDER BY id
+-- !query schema
+struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string>
+-- !query output
+a	0	NULL	x	y	z	x	v
+a	1	x	x	y	z	x	v
+b	2	NULL	x	y	z	x	v
+c	3	NULL	x	y	z	x	v
+a	4	y	x	y	z	x	v
+b	5	NULL	x	y	z	x	v
+a	6	z	x	y	z	x	v
+a	7	v	x	y	z	x	v
+a	8	NULL	x	y	z	x	v
+
+
+-- !query
+SELECT
+    content,
+    id,
+    v,
+    nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
+    nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
+    nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
+    first_value(v) IGNORE NULLS OVER w first_value,
+    last_value(v) IGNORE NULLS OVER w last_value
+FROM
+    test_ignore_null
+WINDOW w AS (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
+ORDER BY id
+-- !query schema
+struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string>
+-- !query output
+a	0	NULL	x	NULL	NULL	x	x
+a	1	x	x	NULL	NULL	x	x
+b	2	NULL	x	NULL	NULL	x	x
+c	3	NULL	x	y	NULL	x	y
+a	4	y	x	y	NULL	x	y
+b	5	NULL	x	y	z	x	z
+a	6	z	x	y	z	x	v
+a	7	v	x	y	z	x	v
+a	8	NULL	x	y	z	x	v
\ No newline at end of file

From f38265ddda62f99aea802e422e6b440ee72f2483 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Wed, 30 Dec 2020 09:57:15 -0800
Subject: [PATCH 0921/1009] [SPARK-33907][SQL] Only prune columns of from_json
 if parsing options is empty

### What changes were proposed in this pull request?

As a follow-up task to SPARK-32958, this patch takes safer approach to only prune columns from JsonToStructs if the parsing option is empty. It is to avoid unexpected behavior change regarding parsing.

This patch also adds a few e2e tests to make sure failfast parsing behavior is not changed.

### Why are the changes needed?

It is to avoid unexpected behavior change regarding parsing.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Unit test.

Closes #30970 from viirya/SPARK-33907-3.2.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Liang-Chi Hsieh <viirya@gmail.com>
---
 .../optimizer/OptimizeCsvJsonExprs.scala      |  9 ++-
 .../optimizer/OptimizeJsonExprsSuite.scala    | 20 ++++++
 .../apache/spark/sql/JsonFunctionsSuite.scala | 65 +++++++++++++++++++
 3 files changed, 92 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvJsonExprs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvJsonExprs.scala
index 9c32f8be736a4..5f0f3f921bdf1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvJsonExprs.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvJsonExprs.scala
@@ -98,12 +98,17 @@ object OptimizeCsvJsonExprs extends Rule[LogicalPlan] {
       child
 
     case g @ GetStructField(j @ JsonToStructs(schema: StructType, _, _, _), ordinal, _)
-        if schema.length > 1 =>
+        if schema.length > 1 && j.options.isEmpty =>
+        // Options here should be empty because the optimization should not be enabled
+        // for some options. For example, when the parse mode is failfast it should not
+        // optimize, and should force to parse the whole input JSON with failing fast for
+        // an invalid input.
+        // To be more conservative, it does not optimize when any option is set for now.
       val prunedSchema = StructType(Seq(schema(ordinal)))
       g.copy(child = j.copy(schema = prunedSchema), ordinal = 0)
 
     case g @ GetArrayStructFields(j @ JsonToStructs(schema: ArrayType, _, _, _), _, _, _, _)
-        if schema.elementType.asInstanceOf[StructType].length > 1 =>
+        if schema.elementType.asInstanceOf[StructType].length > 1 && j.options.isEmpty =>
       val prunedSchema = ArrayType(StructType(Seq(g.field)), g.containsNull)
       g.copy(child = j.copy(schema = prunedSchema), ordinal = 0, numFields = 1)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
index 05d47706ba297..ccbc61e8a4987 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
@@ -209,6 +209,26 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper {
     comparePlans(optimized2, expected2)
   }
 
+  test("SPARK-33907: do not prune unnecessary columns if options is not empty") {
+    val options = Map("mode" -> "failfast")
+
+    val query1 = testRelation2
+      .select(GetStructField(JsonToStructs(schema, options, 'json), 0))
+    val optimized1 = Optimizer.execute(query1.analyze)
+
+    comparePlans(optimized1, query1.analyze)
+
+    val schema1 = ArrayType(StructType.fromDDL("a int, b int"), containsNull = true)
+    val field1 = schema1.elementType.asInstanceOf[StructType](0)
+
+    val query2 = testRelation2
+      .select(GetArrayStructFields(
+        JsonToStructs(schema1, options, 'json), field1, 0, 2, true).as("a"))
+    val optimized2 = Optimizer.execute(query2.analyze)
+
+    comparePlans(optimized2, query2.analyze)
+  }
+
   test("SPARK-33007: simplify named_struct + from_json") {
     val options = Map.empty[String, String]
     val schema = StructType.fromDDL("a int, b int, c long, d string")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index 2e515ee92bceb..310e170e8c1b1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -775,4 +775,69 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
     }.getMessage
     assert(errMsg3.contains("DataType cow is not supported"))
   }
+
+  test("SPARK-33907: bad json input with json pruning optimization: GetStructField") {
+    Seq("true", "false").foreach { enabled =>
+      withSQLConf(SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> enabled) {
+        val schema = new StructType()
+          .add("a", IntegerType)
+          .add("b", IntegerType)
+        val badRec = """{"a" 1, "b": 11}"""
+        val df = Seq(badRec, """{"a": 2, "b": 12}""").toDS()
+
+        val exception1 = intercept[SparkException] {
+          df.select(from_json($"value", schema, Map("mode" -> "FAILFAST"))("b")).collect()
+        }.getMessage
+        assert(exception1.contains(
+          "Malformed records are detected in record parsing. Parse Mode: FAILFAST."))
+
+        val exception2 = intercept[SparkException] {
+          df.select(from_json($"value", schema, Map("mode" -> "FAILFAST"))("a")).collect()
+        }.getMessage
+        assert(exception2.contains(
+          "Malformed records are detected in record parsing. Parse Mode: FAILFAST."))
+      }
+    }
+  }
+
+  test("SPARK-33907: bad json input with json pruning optimization: GetArrayStructFields") {
+    Seq("true", "false").foreach { enabled =>
+      withSQLConf(SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> enabled) {
+        val schema = ArrayType(new StructType()
+          .add("a", IntegerType)
+          .add("b", IntegerType))
+        val badRec = """{"a" 1, "b": 11}"""
+        val df = Seq(s"""[$badRec, {"a": 2, "b": 12}]""").toDS()
+
+        val exception1 = intercept[SparkException] {
+          df.select(from_json($"value", schema, Map("mode" -> "FAILFAST"))("b")).collect()
+        }.getMessage
+        assert(exception1.contains(
+          "Malformed records are detected in record parsing. Parse Mode: FAILFAST."))
+
+        val exception2 = intercept[SparkException] {
+          df.select(from_json($"value", schema, Map("mode" -> "FAILFAST"))("a")).collect()
+        }.getMessage
+        assert(exception2.contains(
+          "Malformed records are detected in record parsing. Parse Mode: FAILFAST."))
+      }
+    }
+  }
+
+  test("SPARK-33907: json pruning optimization with corrupt record field") {
+    Seq("true", "false").foreach { enabled =>
+      withSQLConf(SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> enabled) {
+        val schema = new StructType()
+          .add("a", IntegerType)
+          .add("b", IntegerType)
+        val badRec = """{"a" 1, "b": 11}"""
+
+        val df = Seq(badRec, """{"a": 2, "b": 12}""").toDS()
+          .selectExpr("from_json(value, 'a int, b int, _corrupt_record string') as parsed")
+          .selectExpr("parsed._corrupt_record")
+
+        checkAnswer(df, Seq(Row("""{"a" 1, "b": 11}"""), Row(null)))
+      }
+    }
+  }
 }

From 85de64473310a32c91da7878abb0bea4d371c11d Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Wed, 30 Dec 2020 13:57:44 -0600
Subject: [PATCH 0922/1009] [SPARK-33804][CORE] Fix compilation warnings about
 'view bounds are deprecated'

### What changes were proposed in this pull request?

There are only 3 compilation warnings related to `view bounds are deprecated` in Spark Code:
```
[WARNING] /spark-source/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala:35: view bounds are deprecated; use an implicit parameter instead.
[WARNING] /spark-source/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala:35: view bounds are deprecated; use an implicit parameter instead.
[WARNING] /spark-source/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala:55: view bounds are deprecated; use an implicit parameter instead.
```

This pr try to fix these compilation warnings.

### Why are the changes needed?
Fix compilation warnings about ` view bounds are deprecated`

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass the Jenkins or GitHub Action

Closes #30924 from LuciferYang/SPARK-33804.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../org/apache/spark/rdd/SequenceFileRDDFunctions.scala    | 7 ++-----
 core/src/main/scala/org/apache/spark/rdd/package.scala     | 6 +++++-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala
index 02def89dd8c2b..2f6ff0acdf024 100644
--- a/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala
@@ -32,16 +32,13 @@ import org.apache.spark.internal.Logging
  * @note This can't be part of PairRDDFunctions because we need more implicit parameters to
  * convert our keys and values to Writable.
  */
-class SequenceFileRDDFunctions[K <% Writable: ClassTag, V <% Writable : ClassTag](
+class SequenceFileRDDFunctions[K: IsWritable: ClassTag, V: IsWritable: ClassTag](
     self: RDD[(K, V)],
     _keyWritableClass: Class[_ <: Writable],
     _valueWritableClass: Class[_ <: Writable])
   extends Logging
   with Serializable {
 
-  // TODO the context bound (<%) above should be replaced with simple type bound and implicit
-  // conversion but is a breaking change. This should be fixed in Spark 3.x.
-
   /**
    * Output the RDD as a Hadoop SequenceFile using the Writable types we infer from the RDD's key
    * and value types. If the key or value are Writable, then we use their classes directly;
@@ -52,7 +49,7 @@ class SequenceFileRDDFunctions[K <% Writable: ClassTag, V <% Writable : ClassTag
   def saveAsSequenceFile(
       path: String,
       codec: Option[Class[_ <: CompressionCodec]] = None): Unit = self.withScope {
-    def anyToWritable[U <% Writable](u: U): Writable = u
+    def anyToWritable[U: IsWritable](u: U): Writable = u
 
     // TODO We cannot force the return type of `anyToWritable` be same as keyWritableClass and
     // valueWritableClass at the compile time. To implement that, we need to add type parameters to
diff --git a/core/src/main/scala/org/apache/spark/rdd/package.scala b/core/src/main/scala/org/apache/spark/rdd/package.scala
index 55fc6e4d2b4df..43ca6d7643b17 100644
--- a/core/src/main/scala/org/apache/spark/rdd/package.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/package.scala
@@ -17,7 +17,11 @@
 
 package org.apache.spark
 
+import org.apache.hadoop.io.Writable
+
 /**
  * Provides several RDD implementations. See [[org.apache.spark.rdd.RDD]].
  */
-package object rdd
+package object rdd {
+  type IsWritable[A] = A => Writable
+}

From 13e8c2840969a17d5ba113686501abd3c23e3c23 Mon Sep 17 00:00:00 2001
From: "Pradyumn Agrawal (pradyumn.ag)" <pradyumn.ag@media.net>
Date: Wed, 30 Dec 2020 17:25:46 -0800
Subject: [PATCH 0923/1009] [SPARK-33942][DOCS] Remove `hiveClientCalls.count`
 in `CodeGenerator` metrics docs

### What changes were proposed in this pull request?
Removed the **hiveClientCalls.count** in CodeGenerator metrics in Component instance = Executor

### Why are the changes needed?
Wrong information regarding metrics was being displayed on Monitoring Documentation. I had added referred documentation for adding metrics logging in Graphite. This metric was not being reported. I had to check if the issue was at my application end or spark code or documentation. Documentation had the wrong info.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Manual, checked it on my forked repository feature branch [SPARK-33942](https://github.com/coderbond007/spark/blob/SPARK-33942/docs/monitoring.md)

Closes #30976 from coderbond007/SPARK-33942.

Authored-by: Pradyumn Agrawal (pradyumn.ag) <pradyumn.ag@media.net>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/monitoring.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/monitoring.md b/docs/monitoring.md
index c6105188f07ec..5b3278bca031d 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -1276,7 +1276,6 @@ These metrics are exposed by Spark executors.
   - compilationTime (histogram)
   - generatedClassSize (histogram)
   - generatedMethodSize (histogram)
-  - hiveClientCalls.count
   - sourceCodeSize (histogram)
 
 - namespace=plugin.\<Plugin Class Name>

From 3fe5614a7cc8f5b65a90924e9a4a535fcaf76a98 Mon Sep 17 00:00:00 2001
From: "yi.wu" <yi.wu@databricks.com>
Date: Thu, 31 Dec 2020 13:13:02 -0800
Subject: [PATCH 0924/1009] [SPARK-31946][CORE] Make worker/executor
 decommission signal configurable

### What changes were proposed in this pull request?

This PR proposed to make worker/executor decommission signal configurable.

* Added confs: `spark.worker.decommission.signal` / `spark.executor.decommission.signal`
* Rename `WorkerSigPWRReceived`/ `ExecutorSigPWRReceived` to `WorkerDecomSigReceived`/ `ExecutorDecomSigReceived`

### Why are the changes needed?

The current signal `PWR` can't work on macOS since it's not compliant with POSIX while macOS does.  So the developers currently can't do end-to-end decommission test on their macOS environment.

Besides, the configuration becomes more flexible for users in case the default signal (`PWR`) gets conflicted with their own applications/environment.

### Does this PR introduce _any_ user-facing change?

No (it's a new API for 3.2)

### How was this patch tested?

Manually tested.

Closes #30968 from Ngone51/configurable-decom-signal.

Authored-by: yi.wu <yi.wu@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/deploy/DeployMessage.scala     |  4 ++--
 .../org/apache/spark/deploy/worker/Worker.scala     | 13 +++++++------
 .../executor/CoarseGrainedExecutorBackend.scala     |  9 +++++----
 .../org/apache/spark/internal/config/Worker.scala   |  7 +++++++
 .../org/apache/spark/internal/config/package.scala  |  7 +++++++
 .../cluster/CoarseGrainedClusterMessage.scala       |  4 ++--
 6 files changed, 30 insertions(+), 14 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
index d5b5375d64f4d..727cdbc4ef2d1 100644
--- a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
@@ -77,10 +77,10 @@ private[deploy] object DeployMessages {
   object DecommissionWorker extends DeployMessage
 
   /**
-   * A message that sent by the Worker to itself when it receives PWR signal,
+   * A message that sent by the Worker to itself when it receives a signal,
    * indicating the Worker starts to decommission.
    */
-  object WorkerSigPWRReceived extends DeployMessage
+  object WorkerDecommissionSigReceived extends DeployMessage
 
   /**
    * A message sent from Worker to Master to tell Master that the Worker has started
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index a6092f637a9cb..a3c73751a2136 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -66,16 +66,17 @@ private[deploy] class Worker(
   Utils.checkHost(host)
   assert (port > 0)
 
-  // If worker decommissioning is enabled register a handler on PWR to shutdown.
+  // If worker decommissioning is enabled register a handler on the configured signal to shutdown.
   if (conf.get(config.DECOMMISSION_ENABLED)) {
-    logInfo("Registering SIGPWR handler to trigger decommissioning.")
-    SignalUtils.register("PWR", "Failed to register SIGPWR handler - " +
+    val signal = conf.get(config.Worker.WORKER_DECOMMISSION_SIGNAL)
+    logInfo(s"Registering SIG$signal handler to trigger decommissioning.")
+    SignalUtils.register(signal, s"Failed to register SIG$signal handler - " +
       "disabling worker decommission feature.") {
-       self.send(WorkerSigPWRReceived)
+       self.send(WorkerDecommissionSigReceived)
        true
     }
   } else {
-    logInfo("Worker decommissioning not enabled, SIGPWR will result in exiting.")
+    logInfo("Worker decommissioning not enabled.")
   }
 
   // A scheduled executor used to send messages at the specified time.
@@ -682,7 +683,7 @@ private[deploy] class Worker(
     case DecommissionWorker =>
       decommissionSelf()
 
-    case WorkerSigPWRReceived =>
+    case WorkerDecommissionSigReceived =>
       decommissionSelf()
       // Tell the Master that we are starting decommissioning
       // so it stops trying to launch executor/driver on us
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index 6a1fd57873c3a..e1d3009598b8c 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -82,9 +82,10 @@ private[spark] class CoarseGrainedExecutorBackend(
 
   override def onStart(): Unit = {
     if (env.conf.get(DECOMMISSION_ENABLED)) {
-      logInfo("Registering PWR handler to trigger decommissioning.")
-      SignalUtils.register("PWR", "Failed to register SIGPWR handler - " +
-        "disabling executor decommission feature.") (self.askSync[Boolean](ExecutorSigPWRReceived))
+      val signal = env.conf.get(EXECUTOR_DECOMMISSION_SIGNAL)
+      logInfo(s"Registering SIG$signal handler to trigger decommissioning.")
+      SignalUtils.register(signal, s"Failed to register SIG$signal handler - disabling" +
+        s" executor decommission feature.") (self.askSync[Boolean](ExecutorDecommissionSigReceived))
     }
 
     logInfo("Connecting to driver: " + driverUrl)
@@ -208,7 +209,7 @@ private[spark] class CoarseGrainedExecutorBackend(
   }
 
   override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
-    case ExecutorSigPWRReceived =>
+    case ExecutorDecommissionSigReceived =>
       var driverNotified = false
       try {
         driver.foreach { driverRef =>
diff --git a/core/src/main/scala/org/apache/spark/internal/config/Worker.scala b/core/src/main/scala/org/apache/spark/internal/config/Worker.scala
index a8072712c46ce..fda3a57546b67 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/Worker.scala
@@ -82,4 +82,11 @@ private[spark] object Worker {
       .version("2.0.2")
       .intConf
       .createWithDefault(100)
+
+  val WORKER_DECOMMISSION_SIGNAL =
+    ConfigBuilder("spark.worker.decommission.signal")
+      .doc("The signal that used to trigger the worker to start decommission.")
+      .version("3.2.0")
+      .stringConf
+      .createWithDefaultString("PWR")
 }
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index cbf4a971e3d0d..adaf92d5a8aa1 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -1927,6 +1927,13 @@ package object config {
       .timeConf(TimeUnit.SECONDS)
       .createOptional
 
+  private[spark] val EXECUTOR_DECOMMISSION_SIGNAL =
+    ConfigBuilder("spark.executor.decommission.signal")
+      .doc("The signal that used to trigger the executor to start decommission.")
+      .version("3.2.0")
+      .stringConf
+      .createWithDefaultString("PWR")
+
   private[spark] val STAGING_DIR = ConfigBuilder("spark.yarn.stagingDir")
     .doc("Staging directory used while submitting applications.")
     .version("2.0.0")
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
index e084453be0789..2f171433bbb5c 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
@@ -102,9 +102,9 @@ private[spark] object CoarseGrainedClusterMessages {
   // It's used for Standalone's cases, where decommission is triggered at MasterWebUI or Worker.
   object DecommissionExecutor extends CoarseGrainedClusterMessage
 
-  // A message that sent to the executor itself when it receives PWR signal,
+  // A message that sent to the executor itself when it receives a signal,
   // indicating the executor starts to decommission.
-  object ExecutorSigPWRReceived extends CoarseGrainedClusterMessage
+  object ExecutorDecommissionSigReceived extends CoarseGrainedClusterMessage
 
   case class RemoveWorker(workerId: String, host: String, message: String)
     extends CoarseGrainedClusterMessage

From 771c538620e66be2d0fb0e383e4aa37b4d29f7eb Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Thu, 31 Dec 2020 13:18:31 -0800
Subject: [PATCH 0925/1009] [SPARK-33084][SQL][TESTS][FOLLOW-UP] Fix Scala 2.13
 UT failure

### What changes were proposed in this pull request?
Fix UT according to  https://github.com/apache/spark/pull/29966#issuecomment-752830046

Change StructType construct from
```
def inputSchema: StructType = StructType(StructField("inputColumn", LongType) :: Nil)
```
to
```
  def inputSchema: StructType = new StructType().add("inputColumn", LongType)
```
The whole udf class is :

```
package org.apache.spark.examples.sql

import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
import org.apache.spark.sql.types._
import org.apache.spark.sql.Row

class Spark33084 extends UserDefinedAggregateFunction {
  // Data types of input arguments of this aggregate function
  def inputSchema: StructType = new StructType().add("inputColumn", LongType)

  // Data types of values in the aggregation buffer
  def bufferSchema: StructType =
    new StructType().add("sum", LongType).add("count", LongType)
  // The data type of the returned value
  def dataType: DataType = DoubleType
  // Whether this function always returns the same output on the identical input
  def deterministic: Boolean = true
  // Initializes the given aggregation buffer. The buffer itself is a `Row` that in addition to
  // standard methods like retrieving a value at an index (e.g., get(), getBoolean()), provides
  // the opportunity to update its values. Note that arrays and maps inside the buffer are still
  // immutable.
  def initialize(buffer: MutableAggregationBuffer): Unit = {
    buffer(0) = 0L
    buffer(1) = 0L
  }
  // Updates the given aggregation buffer `buffer` with new input data from `input`
  def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
    if (!input.isNullAt(0)) {
      buffer(0) = buffer.getLong(0) + input.getLong(0)
      buffer(1) = buffer.getLong(1) + 1
    }
  }
  // Merges two aggregation buffers and stores the updated buffer values back to `buffer1`
  def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = {
    buffer1(0) = buffer1.getLong(0) + buffer2.getLong(0)
    buffer1(1) = buffer1.getLong(1) + buffer2.getLong(1)
  }
  // Calculates the final result
  def evaluate(buffer: Row): Double = buffer.getLong(0).toDouble / buffer.getLong(1)
}
```

### Why are the changes needed?
Fix UT for scala 2.13

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existed UT

Closes #30980 from AngersZhuuuu/spark-33084-followup.

Authored-by: angerszhu <angers.zhu@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 sql/core/src/test/resources/SPARK-33084.jar | Bin 6322 -> 6119 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/sql/core/src/test/resources/SPARK-33084.jar b/sql/core/src/test/resources/SPARK-33084.jar
index 1dc5e9303b707f9b788c28fc23a0f0aa5b2c323b..61e1663ad3a2804eb7c39960ea51e56a0ebfaf94 100644
GIT binary patch
delta 2087
zcmZXVdpy(oAIE28Ti7tM+9?vLg_%opOFQJ&B$eE9kD?4C97$g*jdD9IxfQugF3V*r
z_egHZ(DsvCa*3iL_v6ly$2sSp-{+6d>+ya+Ua$B2{oj*`8^?-R5cvi6f_^<-U`D)%
z9RH8Nj8gSjKKPDake%!i7#9SAFe0>%d?$e`)}MR#4H`?eS-*wj(d;vV_hLYCIN|#A
zKi9x;XM+-0IV>ff)OV~@-qkexRcC0*xZV<Ck$&^F8X|?*!$VBO@pw-Zb(LWZh83cJ
zoVi?H$HE$~_0$2I3Lr+9iA+IYDtYcA!@-0+eQ~PhA+g7Gohk20o+h1e#3dyFtyt+|
zUjkHj)#S5fLJ?LcA-A<4tI5gZq5@XK&9LBlYI|&#M1W_T#SQJBegq_HO~lW%@S)O}
z%H>yrbFp0>rW&Ulao6`%=0{d>;<*C7ybuR2bV&iCb^e;F7Q}Y!s;q(E*m3Dm+sAtz
z#}=6-&p(b6gS(5+k#y?1@?(2I?ul=<u#xLs#0T5u-{1MCe~ew$7shLjxFDHM>>TYE
z3wKLX6^gzTEdL=>)9ul*-nw{{tj-$L3V8Q>=M#HbY5QVHSs{P1ubCAJ4Lu}%sx8y}
zTO?Mu-rJbz=A7UdO*R;@%&xS)Q@*VFU}a{rkL?^8ieASYI!pRVebi0|EL5vcYuw@L
z5mD8LNAzL2PC(lHqp{TjyW^9qo?n+)iUFdnnt7$R&!W!L5uBMhZhl06#|om?Ar{lL
z;X?<L--~?D3GLt}I)@d<=Df!9t96EByV1Aa0X5Qgo+cs40dAjN;`deDP+_BxQnjz{
zv;7OKBhMgx?3Lfz7LEXat)V*Q(DM_st=z`Zo_0Sni5w*tY+a(4q)>Ro%PYAvmsjaN
zV{=li@&k2~s#@9oo?dfDVCkFj;5%-k(UI@knzxu;AF6GEW-UUget`{&%{d<$9)gxP
zn?C39twJJ@17o%=K=iarF}hKZq|eo{i`CrVd0Fe`igQLqXMxNIm6|R^jbY`Yh+tF$
zwnU7VNmH3gfok0M3k~lqD)`EYMzX5Z>eB_8V;s531*hZ1k)vsxkx9e<c`qS6qyjR(
zl$a<5kwY2x<3uxTj&`(XCw_N5mYB0@qjt!?`a`1B-b(fcE?o!vu`7P+tlL=9kNzJX
zTQUnz+bvuaJ%Koe{ejzi(z0Sc{T9{yD8mc6xhWrb*>DD>u_@YO)62@Bzo$fmjo!44
zUlIY|QV$&4BQ0D}FLWP2=zpWl<AnJvGPx4+Di3oG=|{Z}5vbta-7+NmQ{QH<A6t&*
zI-)2Sq=Xw#^P-ah;331P;-(Y0LGi>~Q(LRlAL}LaS%9)##P)zp@L}lTg*>DHYE!=(
z$_Bh{x6mIP{>v)H$bdKC7SSDDhCKB3*~LpDhC%w_VJvT&tV$3te|rk&x}2YH^jf};
zd^bF4>P>Rp@hfvNRnBl%fja@FN!lcKRc@QciJ9vH1K0MwcO`C}Vt%RMuh`aC-d`2D
zf=|K~xdSi5a;_TNSbiB6h4$f9sAs&1bBx<cp6&)(jWCFlxbNjplLns@1;B_bozmKz
zv?!B1`xCrP-tMI=ZKoh1PKOmn#TO_q&@tZ2m3_sv%{~VGjLnvkpHvMKv5M?2y~zy$
z^R0~8(EeA9S{As??wqOfhO!bo-F2k3Kb>>K9k7Uh=-tv$^CyJ?xeL*nYR}CH8YOv8
z|KWlxV_%YDZ-TE)d4nfvl>$;x`bB9&XMA2oo6}E}y4#4shr~dW62V~N@-*-ijh(8m
zMIY0Ad!-eV>`g+V9)4e6s&313q8P9|94jceV%4KBx-W#3MAjifjH)pKaKkn|3(HbK
zFRn4xt7Ez;Ga<(*Z)x1s*emVjJ^cORtf<#``8lo?+1fJu60OY5UtBS$aNVTaO^yC2
zi)UrT@%^|q-fLwy5q!ln50#r%CptWLMmRYRU4mpR!#XY!Mxo9V(!-}VVN@_h+GT^^
zZ0lgpG0mp(hW?=X{k3l#9H0#nXTgBeqA&Ag$BGDis0n5U9O&$czz;U0QEbsZFnVRn
z26UmE1X=e=Nq-3vDx1DXBI1BI?muQ&Ik%rgO~f!2gT&(WR|(G@PB-u%CPLAtRx%;q
z$}F2z<VGb2JbD_bYR2T111@#-IHIp=IIU&2@vk}KG5u_N$L9b=cMm`Xf+>;{h*`FM
zZ!V!`PEn;|qU;60*PlaZAWohWmrM#v>@ppbXm&Gm*>7(5^iZI~qNdX4r)-Ykl*~Tb
zvnXaS$woLRMELVbgAqMPA+znT(-UT+c<0=>haa!X5;)gn3HceOVz!SJkM@S0q$%l0
z^JNl8LNX_pLY92WRdsrRq(_8KTTo~Df>S=~gUp6MdQi8-b2wE>kFa`Ov&zBKUb>(#
zg0}|Ur?7&(n&dQ~d^&JTL33+_7Wrkb8|jQ-hmw+{W9H&;dVZVdaJUbd@3aEyGIEHA
zFEnM`C<nVAY{9q`7QR%s`y@D&^57kK1M)8&p&--13|0fIZV&$sCTR~4m|bNebar(X
zVa$JO$Dl20A+Xzskx>5yQdXlx=>L)Qyren*&NSL{Ni=v8N^_CK(8eW^yRprk)=N0;
zfYia=Qd<fQ?uXOr5puK;sl75gn=kB=`kznnzd{tWy^TWb7-eZUMTHeDh<soP{$Esw
yf_BcjFpNqy*Ww3(!i7N~wf{mOP!7fQ-w=|fj>3Sa#Q&KgN=rZ;5M&{Lng0fy+t}9t

delta 2326
zcmZWrXHb)i68#{d6A}nbgNh)ak)jwNf&~cW(n2C36cuDZYJec9m<t31VxhfjFwr0_
zdJ#b>LD~gF2L&`pQ52;&fk&?a%|qXe^Jd=e%%0sdXLk1Ao+&-faR|;11eOGTMnD?l
zFUTQ~e;T6{l`15&i+I1D0USpR01Bb>E+IpzjmucS0@txZJ1zF2bmE=@ANx<Ov7eP$
zjS<c*Ae_9R{DbPdGmPL*;-ojOa+vFR4u-GZA*ne=(b#V7Y3vnjjA;x3TjVo>wr{bi
z?TT)CHS~RO-{8tw!CEC1s7LK&nYxA$(A;-QP!a62l@xC*dMaqGAcx#;bH8KOZi%|7
zr1x}>D%j&|_lNrlD-;Y)9?y^uv6J1qCs*6n{D|8#RDxu&WD<)&2xAIK#T5xmu}Dj9
zJ6!qQUQp4&^g?s|#4=x7k9f$VZ*Su%p?}D^l9IVU{ME&leDs6W;iQDhd#D>tR<3&|
z<ZRD7wMJC8S&7GUZg;1iYls%J)uh7ioW!q-sd*^1<EJQ{S7A=qh~J5M;q{hn7JAwC
zsP)#`sDl`lqaqvFwk}e&{-lPwR5Q`08nYbxR-YBVND^G|I&y5kBuuHLcIHg6Y_|jb
zB+}s&2DExb)zVm02QHfzbQoXs+?Pi@s|gb{X4qcif1|%<J~~@kGRh-rQok?tekN~(
z1fR`&vHAGcIl<2Rj}k_XgS^*U5Q7uuwH~az4<H*vpHYGSm&F}T{?~_J!tot83&yV+
zan-jsEL_zLOG#f}?CWAg)urjIb{=ip*HHQ1FZ{AyXHD5;wysnmsB`=WsiBb*o#j}n
zqhY{3ac^#=`*e!dv+*9-TMqT~B;`O*3bSQ!{_C4KruF@lxw&SwJ7uXq&K<`1Ql|ef
z%CJrEcADoBu8?W_mjk<pNhdSOWkS75o>fduF%PDC>Y9UzgF|)-g}yFxH&v5kml@E3
zfO=sYo83>XJ1&gVR2a%`UZ1X}X&AzzJJuZgh@=(2>iA7B3TVtwnTme9)rL_i&00RQ
zAvbm1%^);g2FLq%GsO#nGfNLYO$xM{tc<_bIa4D<;W5?pL<7<`2z(3D0ZI)|E28G%
zLYMBGX-!BLD{z$Jk3gvIkBRSBs(5-7{=?!+qZRUkU@m~-9d#sAWuPJ4kF6MZKBD>F
zXxrAPN2u@Vtt{+0D#o>27hez*xIcqU>r;`i=3KA1L&3KdwHHEO<cEwZOKrD2mXDXN
z5<ga5-buW6@b>9#^v-ZC;#AiNU=(qtwU2$6U?&|}3{7Vgx;t>1;KB`J$z>eEakurH
zj9XRZ1c-06%yi;Yuz8v{Z8Ii$eu_8PYj{E0_!2-9k65OzQP$L4<=ShB&hcTw46@G<
zgg0CbH|RgVx95-9kyDf4`+7^rv=}SZ{Kv_1CD9tTY(A^rXk<>7+8*xgQ84||;+ehx
z<}UhIGOM4O&U;;fFo#{c2~`7U85Hx_7y61FYyD4_5_<?-oI;gz>D{;_xl3i)@AZWZ
zM`FVs9HI`(u(Pxc>cGCA)5?fw^VqG|m{>PFUUpIH_DNZ?WHDKt3m!}e&6r4}Z?&x_
zBeGxY(PFGJ!Ov%M^I^(0!4w{Xwr@(;e}dCh6+%p6_s!*;J#OhnCz#Yy(2w}l%8_FU
zp5Jb9n4K@9E8N|;qN`+2aA+Q#A?;ClJp<{~lrK8QmxLJNk+wIHs6f{{cae$`=rxx7
zR>7vua#S?bXB&|fw11udy|p}D7Nu>Xz2pl>Y`(QdG--YS>uO|6wbL|%M-$>ygsjcZ
z45hCzv=$HT=yns$_-f}o;y19aiC2Zg1t+vbmtWfydTTC5yc(G>kyOaIELD+NB4I)W
z&j&PFXJ*&W3*g)u2cz*5>LoA3zR5*WvK2uGO2;iOI~Nk^`Th{S+3WpUo1aC*79m`i
zf?06u`60>_O;~=-iMje+&UY?Gc$%w;LMof*Jsffkdh)TF-0ota-iX-%*P}&<bK{up
z%J4CocfN6)aIMb;s|Hjd%hVCwLoFJ*VD;Vj(X?gHu_E%=UP9(25ePJsVdvlfu;pxa
zq5BI}l;I6fJM(@ip>m6s&wJaVTWb%U=>C)2@NqS!A*q?}vNal(TAYD=SNq)HZX-M2
zk#EFG<mw>VSr={YIjdLu_Xq5FUt|<a-*7{AD3{I(O6o^Pp63W2#X5LJQF-JF8R>Z-
zK{t1HVR&HWs;9X?L)z1<ev`YoN3X(enC$|qyZNau2PrbOS$+^u?GlOk+KLA2dC6$R
zSx9-9M@qGK8tk}SuSXz?Lqo^A7SMdlp*JQ`H3Lz8?p&vT8Ady@dpDjl7uWNG{W4yZ
zU#^knIShpLFgQOVo4N<6zBc9BLk*JeB03bBdqDg{fnhts2$vE>IV(Qf9f!sI4Q<-!
zjhzi<Js|TBOZ_Rnt&<vXldk)S)WH2V27PUrHq%=*QC#n|sHL3b&So5ittRS=?37RC
zD=1}7u8NgR%WE%YE@>Q(PvYe#nT?|os1~$?H1EuwmAoXyr$?xC`WZf;@Pu6!juWkc
ziZ(SVHnH9u<HL%>g0@HBPA8i}!2=I0&fBw9S@vvl?>h?%MQc$w$&dbq{SfN;(q?4a
zzh`!1IXl$Hu$Lj}+3^s{bJ;2;L}<uzeH-h{pCQn#CAJfurqyl%_K1SVi8ED+cj2{|
z&gHSr<;QI?_Ozbtc?%LzHCWAk<ReWHQfKz9@$^L|zFwxTMP!G06W_O#GKgX|=U-FD
zwT30j8S09vr&yIYEUcR63jO%~>cs)^JB+eT@j4OE?$4+HBZK^fQ!*yMPy=cWx|qf&
z3x(pqzlcUS>ZdCx+k!{`pDjpGz^d+U!&}iPT~|c_zz+fddcW+%f6J4&3JLmgAvpb_
z0)(z5A*+qE6B3pO{dOGq|Iq@_-vbN~2qK|$N5y@=E(ldb2>%VG$3WHSMT*kDs8_qx
z*iTBAF0Lf4x_dBwSeb=A2?BrwX#hC!dldjYiPM4Y3JvH1N=V^K_-?CDH-<|p;_O63
Rf68Qbb3YgWV&s2L{{hoAGr9l(


From ed9f7288019be4803cdb1ee570ca21ad76af371a Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Thu, 31 Dec 2020 13:20:31 -0800
Subject: [PATCH 0926/1009] [SPARK-33944][SQL] Incorrect logging for warehouse
 keys in SharedState options

### What changes were proposed in this pull request?

While using SparkSession's initial options to generate the sharable Spark conf and Hadoop conf in ShardState, we shall put the log in the codeblock that the warehouse keys being handled.

### Why are the changes needed?

bugfix, rm ambiguous log when setting spark.sql.warehouse.dir in SparkSession.builder.config, but only warn setting hive.metastore.warehouse.dir
### Does this PR introduce _any_ user-facing change?

no
### How was this patch tested?

 new tests

Closes #30978 from yaooqinn/SPARK-33944.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/internal/SharedState.scala      | 16 +++++++-----
 .../spark/sql/SparkSessionBuilderSuite.scala  | 26 +++++++++++++++++++
 2 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index 6018afb0dce46..cc21def3fb367 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -56,17 +56,15 @@ private[sql] class SharedState(
   private[sql] val (conf, hadoopConf) = {
     // Load hive-site.xml into hadoopConf and determine the warehouse path which will be set into
     // both spark conf and hadoop conf avoiding be affected by any SparkSession level options
-    SharedState.loadHiveConfFile(
+    val initialConfigsWithoutWarehouse = SharedState.loadHiveConfFile(
       sparkContext.conf, sparkContext.hadoopConfiguration, initialConfigs)
+
     val confClone = sparkContext.conf.clone()
     val hadoopConfClone = new Configuration(sparkContext.hadoopConfiguration)
     // If `SparkSession` is instantiated using an existing `SparkContext` instance and no existing
     // `SharedState`, all `SparkSession` level configurations have higher priority to generate a
     // `SharedState` instance. This will be done only once then shared across `SparkSession`s
-    initialConfigs.foreach {
-      case (k, _)  if k == "hive.metastore.warehouse.dir" || k == WAREHOUSE_PATH.key =>
-        logWarning(s"Not allowing to set ${WAREHOUSE_PATH.key} or hive.metastore.warehouse.dir " +
-          s"in SparkSession's options, it should be set statically for cross-session usages")
+    initialConfigsWithoutWarehouse.foreach {
       case (k, v) if SQLConf.staticConfKeys.contains(k) =>
         logDebug(s"Applying static initial session options to SparkConf: $k -> $v")
         confClone.set(k, v)
@@ -228,7 +226,8 @@ object SharedState extends Logging {
   def loadHiveConfFile(
       sparkConf: SparkConf,
       hadoopConf: Configuration,
-      initialConfigs: scala.collection.Map[String, String] = Map.empty): Unit = {
+      initialConfigs: scala.collection.Map[String, String] = Map.empty)
+    : scala.collection.Map[String, String] = {
 
     def containsInSparkConf(key: String): Boolean = {
       sparkConf.contains(key) || sparkConf.contains("spark.hadoop." + key) ||
@@ -248,6 +247,10 @@ object SharedState extends Logging {
     }
     val sparkWarehouseOption =
       initialConfigs.get(WAREHOUSE_PATH.key).orElse(sparkConf.getOption(WAREHOUSE_PATH.key))
+    if (initialConfigs.contains(hiveWarehouseKey)) {
+      logWarning(s"Not allowing to set $hiveWarehouseKey in SparkSession's options, please use " +
+        s"${WAREHOUSE_PATH.key} to set statically for cross-session usages")
+    }
     // hive.metastore.warehouse.dir only stay in hadoopConf
     sparkConf.remove(hiveWarehouseKey)
     // Set the Hive metastore warehouse path to the one we use
@@ -272,5 +275,6 @@ object SharedState extends Logging {
       sparkWarehouseDir
     }
     logInfo(s"Warehouse path is '$warehousePath'.")
+    initialConfigs -- Seq(WAREHOUSE_PATH.key, hiveWarehouseKey)
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
index e53976854070d..1f16bb69b3a16 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
@@ -386,4 +386,30 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach {
     assert(spark2.conf.get(custom) === "c2")
 
   }
+
+  test("SPARK-33944: warning setting hive.metastore.warehouse.dir using session options") {
+    val msg = "Not allowing to set hive.metastore.warehouse.dir in SparkSession's options"
+    val logAppender = new LogAppender(msg)
+    withLogAppender(logAppender) {
+      SparkSession.builder()
+        .master("local")
+        .config("hive.metastore.warehouse.dir", "any")
+        .getOrCreate()
+        .sharedState
+    }
+    assert(logAppender.loggingEvents.exists(_.getRenderedMessage.contains(msg)))
+  }
+
+  test("SPARK-33944: no warning setting spark.sql.warehouse.dir using session options") {
+    val msg = "Not allowing to set hive.metastore.warehouse.dir in SparkSession's options"
+    val logAppender = new LogAppender(msg)
+    withLogAppender(logAppender) {
+      SparkSession.builder()
+        .master("local")
+        .config("spark.sql.warehouse.dir", "any")
+        .getOrCreate()
+        .sharedState
+    }
+    assert(!logAppender.loggingEvents.exists(_.getRenderedMessage.contains(msg)))
+  }
 }

From 45df6db906b39646f5b5f6b4a88addf1adcbe107 Mon Sep 17 00:00:00 2001
From: Baohe Zhang <baohe.zhang@verizonmedia.com>
Date: Thu, 31 Dec 2020 13:34:55 -0800
Subject: [PATCH 0927/1009] [SPARK-33906][WEBUI] Fix the bug of UI Executor
 page stuck due to undefined peakMemoryMetrics

### What changes were proposed in this pull request?
Check if the executorSummary.peakMemoryMetrics is defined before accessing it. Without checking, the UI has risked being stuck at the Executors page.

### Why are the changes needed?
App live UI may stuck at Executors page without this fix.
Steps to reproduce (with master branch):
In mac OS standalone mode, open a spark-shell
$SPARK_HOME/bin/spark-shell --master spark://localhost:7077

val x = sc.makeRDD(1 to 100000, 5)
x.count()

Then open the app UI in the browser, and click the Executors page, will get stuck at this page:
![image](https://user-images.githubusercontent.com/26694233/103105677-ca1a7380-45f4-11eb-9245-c69f4a4e816b.png)

Also, the return JSON from API endpoint http://localhost:4040/api/v1/applications/app-20201224134418-0003/executors miss "peakMemoryMetrics" for executor objects. I attached the full json text in https://issues.apache.org/jira/browse/SPARK-33906.

I debugged it and observed that ExecutorMetricsPoller
.getExecutorUpdates returns an empty map, which causes peakExecutorMetrics to None in https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/status/LiveEntity.scala#L345. The possible reason for returning the empty map is that the stage completion time is shorter than the heartbeat interval, so the stage entry in stageTCMP has already been removed before the reportHeartbeat is called.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Manual test, rerun the steps of bug reproduce and see the bug is gone.

Closes #30920 from baohe-zhang/SPARK-33906.

Authored-by: Baohe Zhang <baohe.zhang@verizonmedia.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../apache/spark/ui/static/executorspage.js   | 76 ++++++++++++++-----
 1 file changed, 56 insertions(+), 20 deletions(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
index 1d3f628f5fab6..c8dc61991114a 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
@@ -414,38 +414,74 @@ $(document).ready(function () {
                         },
                         {
                             data: function (row, type) {
-                                if (type !== 'display')
-                                    return row.peakMemoryMetrics.JVMHeapMemory;
-                                else
-                                    return (formatBytes(row.peakMemoryMetrics.JVMHeapMemory, type) + ' / ' +
-                                        formatBytes(row.peakMemoryMetrics.JVMOffHeapMemory, type));
+                                var peakMemoryMetrics = row.peakMemoryMetrics;
+                                if (typeof peakMemoryMetrics !== 'undefined') {
+                                    if (type !== 'display')
+                                        return peakMemoryMetrics.JVMHeapMemory;
+                                    else
+                                        return (formatBytes(peakMemoryMetrics.JVMHeapMemory, type) + ' / ' +
+                                            formatBytes(peakMemoryMetrics.JVMOffHeapMemory, type));
+                                } else {
+                                    if (type !== 'display') {
+                                        return 0;
+                                    } else {
+                                        return '0.0 B / 0.0 B';
+                                    }
+                                }
                             }
                         },
                         {
                             data: function (row, type) {
-                                if (type !== 'display')
-                                    return row.peakMemoryMetrics.OnHeapExecutionMemory;
-                                else
-                                    return (formatBytes(row.peakMemoryMetrics.OnHeapExecutionMemory, type) + ' / ' +
-                                        formatBytes(row.peakMemoryMetrics.OffHeapExecutionMemory, type));
+                                var peakMemoryMetrics = row.peakMemoryMetrics;
+                                if (typeof peakMemoryMetrics !== 'undefined') {
+                                    if (type !== 'display')
+                                        return peakMemoryMetrics.OnHeapExecutionMemory;
+                                    else
+                                        return (formatBytes(peakMemoryMetrics.OnHeapExecutionMemory, type) + ' / ' +
+                                            formatBytes(peakMemoryMetrics.OffHeapExecutionMemory, type));
+                                } else {
+                                    if (type !== 'display') {
+                                        return 0;
+                                    } else {
+                                        return '0.0 B / 0.0 B';
+                                    }
+                                }
                             }
                         },
                         {
                             data: function (row, type) {
-                                if (type !== 'display')
-                                    return row.peakMemoryMetrics.OnHeapStorageMemory;
-                                else
-                                    return (formatBytes(row.peakMemoryMetrics.OnHeapStorageMemory, type) + ' / ' +
-                                        formatBytes(row.peakMemoryMetrics.OffHeapStorageMemory, type));
+                                var peakMemoryMetrics = row.peakMemoryMetrics;
+                                if (typeof peakMemoryMetrics !== 'undefined') {
+                                    if (type !== 'display')
+                                        return peakMemoryMetrics.OnHeapStorageMemory;
+                                    else
+                                        return (formatBytes(peakMemoryMetrics.OnHeapStorageMemory, type) + ' / ' +
+                                            formatBytes(peakMemoryMetrics.OffHeapStorageMemory, type));
+                                } else {
+                                    if (type !== 'display') {
+                                        return 0;
+                                    } else {
+                                        return '0.0 B / 0.0 B';
+                                    }
+                                }
                             }
                         },
                         {
                             data: function (row, type) {
-                                if (type !== 'display')
-                                    return row.peakMemoryMetrics.DirectPoolMemory;
-                                else
-                                    return (formatBytes(row.peakMemoryMetrics.DirectPoolMemory, type) + ' / ' +
-                                        formatBytes(row.peakMemoryMetrics.MappedPoolMemory, type));
+                                var peakMemoryMetrics = row.peakMemoryMetrics;
+                                if (typeof peakMemoryMetrics !== 'undefined') {
+                                    if (type !== 'display')
+                                        return peakMemoryMetrics.DirectPoolMemory;
+                                    else
+                                        return (formatBytes(peakMemoryMetrics.DirectPoolMemory, type) + ' / ' +
+                                            formatBytes(peakMemoryMetrics.MappedPoolMemory, type));
+                                } else {
+                                    if (type !== 'display') {
+                                        return 0;
+                                    } else {
+                                        return '0.0 B / 0.0 B';
+                                    }
+                                }
                             }
                         },
                         {data: 'diskUsed', render: formatBytes},

From bd346f4a2d078dd36d2fcadf3d5025389b124814 Mon Sep 17 00:00:00 2001
From: William Hyun <williamhyun3@gmail.com>
Date: Fri, 1 Jan 2021 19:59:17 -0800
Subject: [PATCH 0928/1009] [SPARK-33957][BUILD] Update commons-lang3 to 3.11

### What changes were proposed in this pull request?

This PR aims to update commons-lang3 to 3.11 to support Java 16+ better.

### Why are the changes needed?

commons-lang3 has the following bug fixes and Java 16 support.
- https://commons.apache.org/proper/commons-lang/changes-report.html#a3.11

### Does this PR introduce _any_ user-facing change?

N/A

### How was this patch tested?
Pass the CIs.

Closes #30990 from williamhyun/Commons-lang3.

Authored-by: William Hyun <williamhyun3@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 2 +-
 dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 2 +-
 pom.xml                                 | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index 199a0d1a31751..fc3b669e721ac 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -44,7 +44,7 @@ commons-digester/1.8//commons-digester-1.8.jar
 commons-httpclient/3.1//commons-httpclient-3.1.jar
 commons-io/2.4//commons-io-2.4.jar
 commons-lang/2.6//commons-lang-2.6.jar
-commons-lang3/3.10//commons-lang3-3.10.jar
+commons-lang3/3.11//commons-lang3-3.11.jar
 commons-logging/1.1.3//commons-logging-1.1.3.jar
 commons-math3/3.4.1//commons-math3-3.4.1.jar
 commons-net/3.1//commons-net-3.1.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index 42e1634b6e66c..0ff30ce0c0a2d 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -41,7 +41,7 @@ commons-dbcp/1.4//commons-dbcp-1.4.jar
 commons-httpclient/3.1//commons-httpclient-3.1.jar
 commons-io/2.5//commons-io-2.5.jar
 commons-lang/2.6//commons-lang-2.6.jar
-commons-lang3/3.10//commons-lang3-3.10.jar
+commons-lang3/3.11//commons-lang3-3.11.jar
 commons-logging/1.1.3//commons-logging-1.1.3.jar
 commons-math3/3.4.1//commons-math3-3.4.1.jar
 commons-net/3.1//commons-net-3.1.jar
diff --git a/pom.xml b/pom.xml
index 39ce502ab0e3f..5ff84cf806649 100644
--- a/pom.xml
+++ b/pom.xml
@@ -178,7 +178,7 @@
     <!-- org.apache.commons/commons-lang/-->
     <commons-lang2.version>2.6</commons-lang2.version>
     <!-- org.apache.commons/commons-lang3/-->
-    <commons-lang3.version>3.10</commons-lang3.version>
+    <commons-lang3.version>3.11</commons-lang3.version>
     <!-- org.apache.commons/commons-pool2/-->
     <commons-pool2.version>2.6.2</commons-pool2.version>
     <datanucleus-core.version>4.1.17</datanucleus-core.version>

From 4cd680581a948fb4d7701842ac2cd9e12328089d Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Sat, 2 Jan 2021 08:58:48 -0800
Subject: [PATCH 0929/1009] [SPARK-33956][SQL] Add rowCount for Range operator

### What changes were proposed in this pull request?

This pr add rowCount for `Range` operator:
```scala
spark.sql("set spark.sql.cbo.enabled=true")
spark.sql("select id from range(100)").explain("cost")
```

Before this pr:
```
== Optimized Logical Plan ==
Range (0, 100, step=1, splits=None), Statistics(sizeInBytes=800.0 B)
```

After this pr:
```
== Optimized Logical Plan ==
Range (0, 100, step=1, splits=None), Statistics(sizeInBytes=800.0 B, rowCount=100)
```

### Why are the changes needed?

 [`JoinEstimation.estimateInnerOuterJoin`](https://github.com/apache/spark/blob/d6a68e0b67ff7de58073c176dd097070e88ac831/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala#L55-L156) need the row count.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #30989 from wangyum/SPARK-33956.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/catalyst/plans/logical/basicLogicalOperators.scala      | 2 +-
 .../catalyst/statsEstimation/BasicStatsEstimationSuite.scala    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 97bc0083276bc..ee7db7ae83542 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -603,7 +603,7 @@ case class Range(
   }
 
   override def computeStats(): Statistics = {
-    Statistics(sizeInBytes = LongType.defaultSize * numElements)
+    Statistics(sizeInBytes = LongType.defaultSize * numElements, rowCount = Some(numElements))
   }
 
   override def outputOrdering: Seq[SortOrder] = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
index dfe790dca54d8..72e8b524cf339 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
@@ -44,7 +44,7 @@ class BasicStatsEstimationSuite extends PlanTest with StatsEstimationTestBase {
 
   test("range") {
     val range = Range(1, 5, 1, None)
-    val rangeStats = Statistics(sizeInBytes = 4 * 8)
+    val rangeStats = Statistics(sizeInBytes = 4 * 8, Some(4))
     checkStats(
       range,
       expectedStatsCboOn = rangeStats,

From 1c25bea0bbe8365523d2a3d6b06da03d67f25794 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Sat, 2 Jan 2021 14:49:03 -0800
Subject: [PATCH 0930/1009] [SPARK-33961][BUILD] Upgrade SBT to 1.4.6

### What changes were proposed in this pull request?

This PR aims to upgrade SBT to 1.4.6 to fix the SBT regression.

### Why are the changes needed?

[SBT 1.4.6](https://github.com/sbt/sbt/releases/tag/v1.4.6) has the following fixes
- Updates to Coursier 2.0.8, which fixes the cache directory setting on Windows
- Fixes performance regression in shell tab completion
- Fixes match error when using withDottyCompat
- Fixes thread-safety in AnalysisCallback handler

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

Closes #30993 from dongjoon-hyun/SPARK-SBT-1.4.6.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 project/build.properties | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/project/build.properties b/project/build.properties
index 35ee6fea6d336..e80b124bf3de6 100644
--- a/project/build.properties
+++ b/project/build.properties
@@ -14,4 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-sbt.version=1.4.5
+sbt.version=1.4.6

From 6c5ba8169ae64fdcefd8530c2b38326178f5fa92 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Sun, 3 Jan 2021 10:59:12 +0900
Subject: [PATCH 0931/1009] [SPARK-33959][SQL] Improve the statistics
 estimation of the Tail

### What changes were proposed in this pull request?

This pr improve the statistics estimation of the `Tail`:

```scala
spark.sql("set spark.sql.cbo.enabled=true")
spark.range(100).selectExpr("id as a", "id as b", "id as c", "id as e").write.saveAsTable("t1")
println(Tail(Literal(5), spark.sql("SELECT * FROM t1").queryExecution.logical).queryExecution.stringWithStats)
```

Before this pr:
```
== Optimized Logical Plan ==
Tail 5, Statistics(sizeInBytes=3.8 KiB)
+- Relation[a#24L,b#25L,c#26L,e#27L] parquet, Statistics(sizeInBytes=3.8 KiB)
```

After this pr:
```
== Optimized Logical Plan ==
Tail 5, Statistics(sizeInBytes=200.0 B, rowCount=5)
+- Relation[a#24L,b#25L,c#26L,e#27L] parquet, Statistics(sizeInBytes=3.8 KiB)
```

### Why are the changes needed?

Import statistics estimation.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #30991 from wangyum/SPARK-33959.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../sql/catalyst/plans/logical/LogicalPlanVisitor.scala  | 3 +++
 .../logical/statsEstimation/BasicStatsPlanVisitor.scala  | 4 ++++
 .../SizeInBytesOnlyStatsPlanVisitor.scala                | 9 +++++++++
 .../statsEstimation/BasicStatsEstimationSuite.scala      | 6 ++++++
 4 files changed, 22 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala
index 18baced8f3d61..9cf599167405b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala
@@ -41,6 +41,7 @@ trait LogicalPlanVisitor[T] {
     case p: ScriptTransformation => visitScriptTransform(p)
     case p: Union => visitUnion(p)
     case p: Window => visitWindow(p)
+    case p: Tail => visitTail(p)
     case p: LogicalPlan => default(p)
   }
 
@@ -81,4 +82,6 @@ trait LogicalPlanVisitor[T] {
   def visitUnion(p: Union): T
 
   def visitWindow(p: Window): T
+
+  def visitTail(p: Tail): T
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala
index b8c652dc8f12e..ec0c1001b1caa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala
@@ -72,4 +72,8 @@ object BasicStatsPlanVisitor extends LogicalPlanVisitor[Statistics] {
   override def visitUnion(p: Union): Statistics = fallback(p)
 
   override def visitWindow(p: Window): Statistics = fallback(p)
+
+  override def visitTail(p: Tail): Statistics = {
+    fallback(p)
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala
index a586988fd3253..f02f4e2a90626 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala
@@ -150,4 +150,13 @@ object SizeInBytesOnlyStatsPlanVisitor extends LogicalPlanVisitor[Statistics] {
   }
 
   override def visitWindow(p: Window): Statistics = visitUnaryNode(p)
+
+  override def visitTail(p: Tail): Statistics = {
+    val limit = p.limitExpr.eval().asInstanceOf[Int]
+    val childStats = p.child.stats
+    val rowCount: BigInt = childStats.rowCount.map(_.min(limit)).getOrElse(limit)
+    Statistics(
+      sizeInBytes = EstimationUtils.getOutputSize(p.output, rowCount, childStats.attributeStats),
+      rowCount = Some(rowCount))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
index 72e8b524cf339..d682165e08e32 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
@@ -84,6 +84,12 @@ class BasicStatsEstimationSuite extends PlanTest with StatsEstimationTestBase {
     checkStats(globalLimit, stats)
   }
 
+  test("tail estimation") {
+    checkStats(Tail(Literal(1), plan), Statistics(sizeInBytes = 12, rowCount = Some(1)))
+    checkStats(Tail(Literal(20), plan), plan.stats.copy(attributeStats = AttributeMap(Nil)))
+    checkStats(Tail(Literal(0), plan), Statistics(sizeInBytes = 1, rowCount = Some(0)))
+  }
+
   test("sample estimation") {
     val sample = Sample(0.0, 0.5, withReplacement = false, (math.random * 1000).toLong, plan)
     checkStats(sample, Statistics(sizeInBytes = 60, rowCount = Some(5)))

From fc7d0165d29e04a8e78577c853a701bdd8a2af4a Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Sun, 3 Jan 2021 11:23:46 +0900
Subject: [PATCH 0932/1009] [SPARK-33963][SQL] Canonicalize `HiveTableRelation`
 w/o table stats

### What changes were proposed in this pull request?
Skip table stats in canonicalizing of `HiveTableRelation`.

### Why are the changes needed?
The changes fix a regression comparing to Spark 3.0, see SPARK-33963.

### Does this PR introduce _any_ user-facing change?
Yes. After changes Spark behaves as in the version 3.0.1.

### How was this patch tested?
By running new UT:
```
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *CachedTableSuite"
```

Closes #30995 from MaxGekk/fix-caching-hive-table.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../apache/spark/sql/catalyst/catalog/interface.scala  |  3 ++-
 .../org/apache/spark/sql/hive/CachedTableSuite.scala   | 10 ++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 5cb237688f875..d25b1fe46d569 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -797,7 +797,8 @@ case class HiveTableRelation(
     },
     partitionCols = partitionCols.zipWithIndex.map {
       case (attr, index) => attr.withExprId(ExprId(index + dataCols.length))
-    }
+    },
+    tableStats = None
   )
 
   override def computeStats(): Statistics = {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
index 6cb98e92e36fa..ee93af7643b21 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
@@ -429,4 +429,14 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
       }
     }
   }
+
+  test("SPARK-33963: do not use table stats while looking in table cache") {
+    val t = "table_on_test"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (col int)")
+      assert(!spark.catalog.isCached(t))
+      sql(s"CACHE TABLE $t")
+      assert(spark.catalog.isCached(t))
+    }
+  }
 }

From cfd4a083987f985da4659333c718561c19e0cbfe Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Sun, 3 Jan 2021 01:29:12 -0800
Subject: [PATCH 0933/1009] [SPARK-33962][SS] Fix incorrect min partition
 condition

### What changes were proposed in this pull request?

This patch fixes an incorrect condition when comparing offset range size and min partition config.

### Why are the changes needed?

When calculating offset ranges, we consider `minPartitions` configuration. If `minPartitions` is not set or is less than or equal the size of given ranges, it means there are enough partitions at Kafka so we don't need to split offsets to satisfy min partition requirement. But the current condition is `offsetRanges.size > minPartitions.get` and is not correct. Currently `getRanges` will split offsets in unnecessary case.

Besides, in non-split case, we can assign preferred executor location and reuse `KafkaConsumer`. So unnecessary splitting offset range will miss the chance to reuse `KafkaConsumer`.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Unit test.

Manual test in Spark cluster with Kafka.

Closes #30994 from viirya/ss-minor4.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/kafka010/KafkaOffsetRangeCalculator.scala  |  2 +-
 .../kafka010/KafkaOffsetRangeCalculatorSuite.scala | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala
index f7183f7add14b..1e9a62ecce025 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala
@@ -46,7 +46,7 @@ private[kafka010] class KafkaOffsetRangeCalculator(val minPartitions: Option[Int
     val offsetRanges = ranges.filter(_.size > 0)
 
     // If minPartitions not set or there are enough partitions to satisfy minPartitions
-    if (minPartitions.isEmpty || offsetRanges.size > minPartitions.get) {
+    if (minPartitions.isEmpty || offsetRanges.size >= minPartitions.get) {
       // Assign preferred executor locations to each range such that the same topic-partition is
       // preferentially read from the same executor and the KafkaConsumer can be reused.
       offsetRanges.map { range =>
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculatorSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculatorSuite.scala
index 5d010cd553521..751b877df9c78 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculatorSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculatorSuite.scala
@@ -71,6 +71,20 @@ class KafkaOffsetRangeCalculatorSuite extends SparkFunSuite {
         KafkaOffsetRange(tp3, 1, 2, None)))
   }
 
+  testWithMinPartitions("N TopicPartitions to N offset ranges with executors", 3) { calc =>
+    assert(
+      calc.getRanges(
+        Seq(
+          KafkaOffsetRange(tp1, 1, 2),
+          KafkaOffsetRange(tp2, 1, 2),
+          KafkaOffsetRange(tp3, 1, 2)),
+        Seq("exec1", "exec2", "exec3")) ===
+        Seq(
+          KafkaOffsetRange(tp1, 1, 2, Some("exec3")),
+          KafkaOffsetRange(tp2, 1, 2, Some("exec1")),
+          KafkaOffsetRange(tp3, 1, 2, Some("exec2"))))
+  }
+
   testWithMinPartitions("1 TopicPartition to N offset ranges", 4) { calc =>
     assert(
       calc.getRanges(

From 963c60fe49a54c05cc1c50cb7abce864c5322bdf Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Sun, 3 Jan 2021 01:31:38 -0800
Subject: [PATCH 0934/1009] [SPARK-33955][SS] Add latest offsets to source
 progress

### What changes were proposed in this pull request?

This patch proposes to add latest offset to source progress for streaming queries.

### Why are the changes needed?

Currently we record start and end offsets per source in streaming process. Latest offset is an important information for streaming process but the progress lacks of this info. We can use it to track the process lag and adjust streaming queries. We should add latest offset to source progress.

### Does this PR introduce _any_ user-facing change?

Yes, for new metric about latest source offset in source progress.

### How was this patch tested?

Unit test. Manually test in Spark cluster:

```
    "description" : "KafkaV2[Subscribe[page_view_events]]",
    "startOffset" : {
      "page_view_events" : {
        "2" : 582370921,
        "4" : 391910836,
        "1" : 631009201,
        "3" : 406601346,
        "0" : 195799112
      }
    },
    "endOffset" : {
      "page_view_events" : {
        "2" : 583764414,
        "4" : 392338002,
        "1" : 632183480,
        "3" : 407101489,
        "0" : 197304028
      }
    },
    "latestOffset" : {
      "page_view_events" : {
        "2" : 589852545,
        "4" : 394204277,
        "1" : 637313869,
        "3" : 409286602,
        "0" : 203878962
      }
    },
    "numInputRows" : 4999997,
    "inputRowsPerSecond" : 29287.70501405811,
```

Closes #30988 from viirya/latest-offset.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/kafka010/KafkaMicroBatchStream.scala  |  8 +++++-
 .../spark/sql/kafka010/KafkaSource.scala      | 10 ++++++++
 project/MimaExcludes.scala                    |  5 +++-
 .../streaming/SupportsAdmissionControl.java   |  8 ++++++
 .../streaming/MicroBatchExecution.scala       | 25 +++++++++++++------
 .../streaming/ProgressReporter.scala          | 13 ++++++++--
 .../execution/streaming/StreamExecution.scala |  9 +++++++
 .../continuous/ContinuousExecution.scala      |  2 +-
 .../apache/spark/sql/streaming/progress.scala |  3 +++
 ...StreamingQueryStatusAndProgressSuite.scala |  4 +++
 10 files changed, 75 insertions(+), 12 deletions(-)

diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
index d6fd3aeb7f670..1c816ab82d3ec 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
@@ -64,6 +64,8 @@ private[kafka010] class KafkaMicroBatchStream(
 
   private var endPartitionOffsets: KafkaSourceOffset = _
 
+  private var latestPartitionOffsets: PartitionOffsetMap = _
+
   /**
    * Lazily initialize `initialPartitionOffsets` to make sure that `KafkaConsumer.poll` is only
    * called in StreamExecutionThread. Otherwise, interrupting a thread while running
@@ -77,6 +79,10 @@ private[kafka010] class KafkaMicroBatchStream(
     maxOffsetsPerTrigger.map(ReadLimit.maxRows).getOrElse(super.getDefaultReadLimit)
   }
 
+  override def reportLatestOffset(): Offset = {
+    KafkaSourceOffset(latestPartitionOffsets)
+  }
+
   override def latestOffset(): Offset = {
     throw new UnsupportedOperationException(
       "latestOffset(Offset, ReadLimit) should be called instead of this method")
@@ -84,7 +90,7 @@ private[kafka010] class KafkaMicroBatchStream(
 
   override def latestOffset(start: Offset, readLimit: ReadLimit): Offset = {
     val startPartitionOffsets = start.asInstanceOf[KafkaSourceOffset].partitionToOffsets
-    val latestPartitionOffsets = kafkaOffsetReader.fetchLatestOffsets(Some(startPartitionOffsets))
+    latestPartitionOffsets = kafkaOffsetReader.fetchLatestOffsets(Some(startPartitionOffsets))
     endPartitionOffsets = KafkaSourceOffset(readLimit match {
       case rows: ReadMaxRows =>
         rateLimit(rows.maxRows(), startPartitionOffsets, latestPartitionOffsets)
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index b4e5a8db7d344..1e17f9a7407a9 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -115,8 +115,13 @@ private[kafka010] class KafkaSource(
     maxOffsetsPerTrigger.map(ReadLimit.maxRows).getOrElse(super.getDefaultReadLimit)
   }
 
+  // The offsets for each topic-partition currently read to process. Note this maybe not necessarily
+  // to be latest offsets because we possibly apply a read limit.
   private var currentPartitionOffsets: Option[Map[TopicPartition, Long]] = None
 
+  // The latest offsets for each topic-partition.
+  private var latestPartitionOffsets: Option[Map[TopicPartition, Long]] = None
+
   private val converter = new KafkaRecordToRowConverter()
 
   override def schema: StructType = KafkaRecordToRowConverter.kafkaSchema(includeHeaders)
@@ -127,6 +132,10 @@ private[kafka010] class KafkaSource(
       "latestOffset(Offset, ReadLimit) should be called instead of this method")
   }
 
+  override def reportLatestOffset(): streaming.Offset = {
+    latestPartitionOffsets.map(KafkaSourceOffset(_)).getOrElse(null)
+  }
+
   override def latestOffset(startOffset: streaming.Offset, limit: ReadLimit): streaming.Offset = {
     // Make sure initialPartitionOffsets is initialized
     initialPartitionOffsets
@@ -145,6 +154,7 @@ private[kafka010] class KafkaSource(
     }
 
     currentPartitionOffsets = Some(offsets)
+    latestPartitionOffsets = Some(latest)
     logDebug(s"GetOffset: ${offsets.toSeq.map(_.toString).sorted}")
     KafkaSourceOffset(offsets)
   }
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index ba879c03795d1..cc1b831b6f4d5 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -37,7 +37,10 @@ object MimaExcludes {
   // Exclude rules for 3.2.x
   lazy val v32excludes = v31excludes ++ Seq(
     // [SPARK-33808][SQL] DataSource V2: Build logical writes in the optimizer
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.connector.write.V1WriteBuilder")
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.connector.write.V1WriteBuilder"),
+
+    // [SPARK-33955] Add latest offsets to source progress
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.SourceProgress.this")
   )
 
   // Exclude rules for 3.1.x
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SupportsAdmissionControl.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SupportsAdmissionControl.java
index 027763ce6fcdf..c808b9a3066b0 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SupportsAdmissionControl.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SupportsAdmissionControl.java
@@ -53,4 +53,12 @@ public interface SupportsAdmissionControl extends SparkDataStream {
    * for the very first micro-batch. The source can return `null` if there is no data to process.
    */
   Offset latestOffset(Offset startOffset, ReadLimit limit);
+
+  /**
+   * Returns the most recent offset available.
+   *
+   * The source can return `null`, if there is no data to process or the source does not support
+   * to this method.
+   */
+  default Offset reportLatestOffset() { return null; }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index c485d0f7d8b2d..a9cb345c4a06e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -212,7 +212,10 @@ class MicroBatchExecution(
           }
 
           // Record the trigger offset range for progress reporting *before* processing the batch
-          recordTriggerOffsets(from = committedOffsets, to = availableOffsets)
+          recordTriggerOffsets(
+            from = committedOffsets,
+            to = availableOffsets,
+            latest = latestOffsets)
 
           // Remember whether the current batch has data or not. This will be required later
           // for bookkeeping after running the batch, when `isNewDataAvailable` will have changed
@@ -379,7 +382,7 @@ class MicroBatchExecution(
     if (isCurrentBatchConstructed) return true
 
     // Generate a map from each unique source to the next available offset.
-    val latestOffsets: Map[SparkDataStream, Option[OffsetV2]] = uniqueSources.map {
+    val (nextOffsets, recentOffsets) = uniqueSources.toSeq.map {
       case (s: SupportsAdmissionControl, limit) =>
         updateStatusMessage(s"Getting offsets from $s")
         reportTimeTaken("latestOffset") {
@@ -391,23 +394,31 @@ class MicroBatchExecution(
               startOffsetOpt.map(offset => v2.deserializeOffset(offset.json))
                 .getOrElse(v2.initialOffset())
           }
-          (s, Option(s.latestOffset(startOffset, limit)))
+          val next = s.latestOffset(startOffset, limit)
+          val latest = s.reportLatestOffset()
+          ((s, Option(next)), (s, Option(latest)))
         }
       case (s: Source, _) =>
         updateStatusMessage(s"Getting offsets from $s")
         reportTimeTaken("getOffset") {
-          (s, s.getOffset)
+          val offset = s.getOffset
+          ((s, offset), (s, offset))
         }
       case (s: MicroBatchStream, _) =>
         updateStatusMessage(s"Getting offsets from $s")
         reportTimeTaken("latestOffset") {
-          (s, Option(s.latestOffset()))
+          val latest = s.latestOffset()
+          ((s, Option(latest)), (s, Option(latest)))
         }
       case (s, _) =>
         // for some reason, the compiler is unhappy and thinks the match is not exhaustive
         throw new IllegalStateException(s"Unexpected source: $s")
-    }
-    availableOffsets ++= latestOffsets.filter { case (_, o) => o.nonEmpty }.mapValues(_.get)
+    }.unzip
+
+    availableOffsets ++= nextOffsets.filter { case (_, o) => o.nonEmpty }
+      .map(p => p._1 -> p._2.get).toMap
+    latestOffsets ++= recentOffsets.filter { case (_, o) => o.nonEmpty }
+      .map(p => p._1 -> p._2.get).toMap
 
     // Update the query metadata
     offsetSeqMetadata = offsetSeqMetadata.copy(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index 57cb551bba17d..2ab473d737a23 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -71,6 +71,8 @@ trait ProgressReporter extends Logging {
   private var currentTriggerEndTimestamp = -1L
   private var currentTriggerStartOffsets: Map[SparkDataStream, String] = _
   private var currentTriggerEndOffsets: Map[SparkDataStream, String] = _
+  private var currentTriggerLatestOffsets: Map[SparkDataStream, String] = _
+
   // TODO: Restore this from the checkpoint when possible.
   private var lastTriggerStartTimestamp = -1L
 
@@ -119,6 +121,7 @@ trait ProgressReporter extends Logging {
     currentTriggerStartTimestamp = triggerClock.getTimeMillis()
     currentTriggerStartOffsets = null
     currentTriggerEndOffsets = null
+    currentTriggerLatestOffsets = null
     currentDurationsMs.clear()
   }
 
@@ -126,9 +129,13 @@ trait ProgressReporter extends Logging {
    * Record the offsets range this trigger will process. Call this before updating
    * `committedOffsets` in `StreamExecution` to make sure that the correct range is recorded.
    */
-  protected def recordTriggerOffsets(from: StreamProgress, to: StreamProgress): Unit = {
+  protected def recordTriggerOffsets(
+      from: StreamProgress,
+      to: StreamProgress,
+      latest: StreamProgress): Unit = {
     currentTriggerStartOffsets = from.mapValues(_.json).toMap
     currentTriggerEndOffsets = to.mapValues(_.json).toMap
+    currentTriggerLatestOffsets = latest.mapValues(_.json).toMap
   }
 
   private def updateProgress(newProgress: StreamingQueryProgress): Unit = {
@@ -151,7 +158,8 @@ trait ProgressReporter extends Logging {
    *                    though the sources don't have any new data.
    */
   protected def finishTrigger(hasNewData: Boolean, hasExecuted: Boolean): Unit = {
-    assert(currentTriggerStartOffsets != null && currentTriggerEndOffsets != null)
+    assert(currentTriggerStartOffsets != null && currentTriggerEndOffsets != null &&
+      currentTriggerLatestOffsets != null)
     currentTriggerEndTimestamp = triggerClock.getTimeMillis()
 
     val executionStats = extractExecutionStats(hasNewData, hasExecuted)
@@ -171,6 +179,7 @@ trait ProgressReporter extends Logging {
         description = source.toString,
         startOffset = currentTriggerStartOffsets.get(source).orNull,
         endOffset = currentTriggerEndOffsets.get(source).orNull,
+        latestOffset = currentTriggerLatestOffsets.get(source).orNull,
         numInputRows = numRecords,
         inputRowsPerSecond = numRecords / inputTimeSec,
         processedRowsPerSecond = numRecords / processingTimeSec
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 6b0d33b819a20..c9f40fa22bf9e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -161,6 +161,15 @@ abstract class StreamExecution(
   @volatile
   var availableOffsets = new StreamProgress
 
+  /**
+   * Tracks the latest offsets for each input source.
+   * Only the scheduler thread should modify this field, and only in atomic steps.
+   * Other threads should make a shallow copy if they are going to access this field more than
+   * once, since the field's value may change at any time.
+   */
+  @volatile
+  var latestOffsets = new StreamProgress
+
   @volatile
   var sinkCommitProgress: Option[StreamWriterCommitProgress] = None
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
index 6eb28d4c66ded..ad041ceeba723 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -332,7 +332,7 @@ class ContinuousExecution(
 
     synchronized {
       // Record offsets before updating `committedOffsets`
-      recordTriggerOffsets(from = committedOffsets, to = availableOffsets)
+      recordTriggerOffsets(from = committedOffsets, to = availableOffsets, latest = latestOffsets)
       if (queryExecutionThread.isAlive) {
         commitLog.add(epoch, CommitMetadata())
         val offset =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
index 59dc5bc1f37df..1a8939e42a412 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
@@ -173,6 +173,7 @@ class StreamingQueryProgress private[sql](
  * @param description            Description of the source.
  * @param startOffset            The starting offset for data being read.
  * @param endOffset              The ending offset for data being read.
+ * @param latestOffset           The latest offset from this source.
  * @param numInputRows           The number of records read from this source.
  * @param inputRowsPerSecond     The rate at which data is arriving from this source.
  * @param processedRowsPerSecond The rate at which data from this source is being processed by
@@ -184,6 +185,7 @@ class SourceProgress protected[sql](
   val description: String,
   val startOffset: String,
   val endOffset: String,
+  val latestOffset: String,
   val numInputRows: Long,
   val inputRowsPerSecond: Double,
   val processedRowsPerSecond: Double) extends Serializable {
@@ -204,6 +206,7 @@ class SourceProgress protected[sql](
     ("description" -> JString(description)) ~
       ("startOffset" -> tryParse(startOffset)) ~
       ("endOffset" -> tryParse(endOffset)) ~
+      ("latestOffset" -> tryParse(latestOffset)) ~
       ("numInputRows" -> JInt(numInputRows)) ~
       ("inputRowsPerSecond" -> safeDoubleToJValue(inputRowsPerSecond)) ~
       ("processedRowsPerSecond" -> safeDoubleToJValue(processedRowsPerSecond))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
index ec61102804ea3..c0aefb8120808 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
@@ -75,6 +75,7 @@ class StreamingQueryStatusAndProgressSuite extends StreamTest with Eventually {
         |    "description" : "source",
         |    "startOffset" : 123,
         |    "endOffset" : 456,
+        |    "latestOffset" : 789,
         |    "numInputRows" : 678,
         |    "inputRowsPerSecond" : 10.0
         |  } ],
@@ -121,6 +122,7 @@ class StreamingQueryStatusAndProgressSuite extends StreamTest with Eventually {
          |    "description" : "source",
          |    "startOffset" : 123,
          |    "endOffset" : 456,
+         |    "latestOffset" : 789,
          |    "numInputRows" : 678
          |  } ],
          |  "sink" : {
@@ -333,6 +335,7 @@ object StreamingQueryStatusAndProgressSuite {
         description = "source",
         startOffset = "123",
         endOffset = "456",
+        latestOffset = "789",
         numInputRows = 678,
         inputRowsPerSecond = 10.0,
         processedRowsPerSecond = Double.PositiveInfinity  // should not be present in the json
@@ -361,6 +364,7 @@ object StreamingQueryStatusAndProgressSuite {
         description = "source",
         startOffset = "123",
         endOffset = "456",
+        latestOffset = "789",
         numInputRows = 678,
         inputRowsPerSecond = Double.NaN, // should not be present in the json
         processedRowsPerSecond = Double.NegativeInfinity // should not be present in the json

From 6b7527e381591bcd51be205853aea3e349893139 Mon Sep 17 00:00:00 2001
From: Ruifeng Zheng <ruifengz@foxmail.com>
Date: Sun, 3 Jan 2021 11:52:46 -0600
Subject: [PATCH 0935/1009] [SPARK-33398] Fix loading tree models prior to
 Spark 3.0

### What changes were proposed in this pull request?
In https://github.com/apache/spark/pull/21632/files#diff-0fdae8a6782091746ed20ea43f77b639f9c6a5f072dd2f600fcf9a7b37db4f47, a new field `rawCount` was added into `NodeData`, which cause that a tree model trained in 2.4 can not be loaded in 3.0/3.1/master;
field `rawCount` is only used in training, and not used in `transform`/`predict`/`featureImportance`. So I just set it to -1L.

### Why are the changes needed?
to support load old tree model in 3.0/3.1/master

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
added testsuites

Closes #30889 from zhengruifeng/fix_tree_load.

Authored-by: Ruifeng Zheng <ruifengz@foxmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../org/apache/spark/ml/tree/treeModels.scala |  48 ++++++++++++------
 .../ml-models/dtc-2.4.7/data/._SUCCESS.crc    | Bin 0 -> 8 bytes
 ...-894c-ca4eac67c690-c000.snappy.parquet.crc | Bin 0 -> 36 bytes
 .../ml-models/dtc-2.4.7/data/_SUCCESS         |   0
 ...406c-894c-ca4eac67c690-c000.snappy.parquet | Bin 0 -> 3242 bytes
 .../dtc-2.4.7/metadata/._SUCCESS.crc          | Bin 0 -> 8 bytes
 .../dtc-2.4.7/metadata/.part-00000.crc        | Bin 0 -> 16 bytes
 .../ml-models/dtc-2.4.7/metadata/_SUCCESS     |   0
 .../ml-models/dtc-2.4.7/metadata/part-00000   |   1 +
 .../ml-models/dtr-2.4.7/data/._SUCCESS.crc    | Bin 0 -> 8 bytes
 ...-84af-d861adcb9ca8-c000.snappy.parquet.crc | Bin 0 -> 36 bytes
 .../ml-models/dtr-2.4.7/data/_SUCCESS         |   0
 ...4b3d-84af-d861adcb9ca8-c000.snappy.parquet | Bin 0 -> 3264 bytes
 .../dtr-2.4.7/metadata/._SUCCESS.crc          | Bin 0 -> 8 bytes
 .../dtr-2.4.7/metadata/.part-00000.crc        | Bin 0 -> 12 bytes
 .../ml-models/dtr-2.4.7/metadata/_SUCCESS     |   0
 .../ml-models/dtr-2.4.7/metadata/part-00000   |   1 +
 .../ml-models/gbtc-2.4.7/data/._SUCCESS.crc   | Bin 0 -> 8 bytes
 ...-91c0-6da8cc01fb43-c000.snappy.parquet.crc | Bin 0 -> 44 bytes
 .../ml-models/gbtc-2.4.7/data/_SUCCESS        |   0
 ...41c7-91c0-6da8cc01fb43-c000.snappy.parquet | Bin 0 -> 4542 bytes
 .../gbtc-2.4.7/metadata/._SUCCESS.crc         | Bin 0 -> 8 bytes
 .../gbtc-2.4.7/metadata/.part-00000.crc       | Bin 0 -> 16 bytes
 .../ml-models/gbtc-2.4.7/metadata/_SUCCESS    |   0
 .../ml-models/gbtc-2.4.7/metadata/part-00000  |   1 +
 .../gbtc-2.4.7/treesMetadata/._SUCCESS.crc    | Bin 0 -> 8 bytes
 ...-813c-ddc394101e21-c000.snappy.parquet.crc | Bin 0 -> 36 bytes
 .../gbtc-2.4.7/treesMetadata/_SUCCESS         |   0
 ...4a90-813c-ddc394101e21-c000.snappy.parquet | Bin 0 -> 3075 bytes
 .../ml-models/gbtr-2.4.7/data/._SUCCESS.crc   | Bin 0 -> 8 bytes
 ...-9aab-639288bfae6d-c000.snappy.parquet.crc | Bin 0 -> 40 bytes
 .../ml-models/gbtr-2.4.7/data/_SUCCESS        |   0
 ...4511-9aab-639288bfae6d-c000.snappy.parquet | Bin 0 -> 3740 bytes
 .../gbtr-2.4.7/metadata/._SUCCESS.crc         | Bin 0 -> 8 bytes
 .../gbtr-2.4.7/metadata/.part-00000.crc       | Bin 0 -> 16 bytes
 .../ml-models/gbtr-2.4.7/metadata/_SUCCESS    |   0
 .../ml-models/gbtr-2.4.7/metadata/part-00000  |   1 +
 .../gbtr-2.4.7/treesMetadata/._SUCCESS.crc    | Bin 0 -> 8 bytes
 ...-ad9c-4be239c2215a-c000.snappy.parquet.crc | Bin 0 -> 32 bytes
 .../gbtr-2.4.7/treesMetadata/_SUCCESS         |   0
 ...4fd8-ad9c-4be239c2215a-c000.snappy.parquet | Bin 0 -> 3038 bytes
 .../ml-models/rfc-2.4.7/data/._SUCCESS.crc    | Bin 0 -> 8 bytes
 ...-b112-25b4b11c9009-c000.snappy.parquet.crc | Bin 0 -> 40 bytes
 .../ml-models/rfc-2.4.7/data/_SUCCESS         |   0
 ...4485-b112-25b4b11c9009-c000.snappy.parquet | Bin 0 -> 3836 bytes
 .../rfc-2.4.7/metadata/._SUCCESS.crc          | Bin 0 -> 8 bytes
 .../rfc-2.4.7/metadata/.part-00000.crc        | Bin 0 -> 16 bytes
 .../ml-models/rfc-2.4.7/metadata/_SUCCESS     |   0
 .../ml-models/rfc-2.4.7/metadata/part-00000   |   1 +
 .../rfc-2.4.7/treesMetadata/._SUCCESS.crc     | Bin 0 -> 8 bytes
 ...-a823-70c7afdcbdc5-c000.snappy.parquet.crc | Bin 0 -> 36 bytes
 .../rfc-2.4.7/treesMetadata/_SUCCESS          |   0
 ...4c4e-a823-70c7afdcbdc5-c000.snappy.parquet | Bin 0 -> 3391 bytes
 .../ml-models/rfr-2.4.7/data/._SUCCESS.crc    | Bin 0 -> 8 bytes
 ...-b681-981caaeca996-c000.snappy.parquet.crc | Bin 0 -> 40 bytes
 .../ml-models/rfr-2.4.7/data/_SUCCESS         |   0
 ...40fc-b681-981caaeca996-c000.snappy.parquet | Bin 0 -> 3797 bytes
 .../rfr-2.4.7/metadata/._SUCCESS.crc          | Bin 0 -> 8 bytes
 .../rfr-2.4.7/metadata/.part-00000.crc        | Bin 0 -> 16 bytes
 .../ml-models/rfr-2.4.7/metadata/_SUCCESS     |   0
 .../ml-models/rfr-2.4.7/metadata/part-00000   |   1 +
 .../rfr-2.4.7/treesMetadata/._SUCCESS.crc     | Bin 0 -> 8 bytes
 ...-9b86-d95edaabcde8-c000.snappy.parquet.crc | Bin 0 -> 32 bytes
 .../rfr-2.4.7/treesMetadata/_SUCCESS          |   0
 ...447a-9b86-d95edaabcde8-c000.snappy.parquet | Bin 0 -> 3055 bytes
 .../DecisionTreeClassifierSuite.scala         |  12 +++++
 .../classification/GBTClassifierSuite.scala   |  14 +++++
 .../MultilayerPerceptronClassifierSuite.scala |   2 +-
 .../RandomForestClassifierSuite.scala         |  16 +++++-
 .../spark/ml/feature/HashingTFSuite.scala     |   2 +-
 .../spark/ml/feature/StringIndexerSuite.scala |   2 +-
 .../DecisionTreeRegressorSuite.scala          |  16 +++++-
 .../ml/regression/GBTRegressorSuite.scala     |  12 +++++
 .../RandomForestRegressorSuite.scala          |  12 +++++
 74 files changed, 122 insertions(+), 20 deletions(-)
 create mode 100644 mllib/src/test/resources/ml-models/dtc-2.4.7/data/._SUCCESS.crc
 create mode 100644 mllib/src/test/resources/ml-models/dtc-2.4.7/data/.part-00000-bd7ae42f-c890-406c-894c-ca4eac67c690-c000.snappy.parquet.crc
 create mode 100644 mllib/src/test/resources/ml-models/dtc-2.4.7/data/_SUCCESS
 create mode 100644 mllib/src/test/resources/ml-models/dtc-2.4.7/data/part-00000-bd7ae42f-c890-406c-894c-ca4eac67c690-c000.snappy.parquet
 create mode 100644 mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/._SUCCESS.crc
 create mode 100644 mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/.part-00000.crc
 create mode 100644 mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/_SUCCESS
 create mode 100644 mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/part-00000
 create mode 100644 mllib/src/test/resources/ml-models/dtr-2.4.7/data/._SUCCESS.crc
 create mode 100644 mllib/src/test/resources/ml-models/dtr-2.4.7/data/.part-00000-39b027f0-a437-4b3d-84af-d861adcb9ca8-c000.snappy.parquet.crc
 create mode 100644 mllib/src/test/resources/ml-models/dtr-2.4.7/data/_SUCCESS
 create mode 100644 mllib/src/test/resources/ml-models/dtr-2.4.7/data/part-00000-39b027f0-a437-4b3d-84af-d861adcb9ca8-c000.snappy.parquet
 create mode 100644 mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/._SUCCESS.crc
 create mode 100644 mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/.part-00000.crc
 create mode 100644 mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/_SUCCESS
 create mode 100644 mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/part-00000
 create mode 100644 mllib/src/test/resources/ml-models/gbtc-2.4.7/data/._SUCCESS.crc
 create mode 100644 mllib/src/test/resources/ml-models/gbtc-2.4.7/data/.part-00000-dacbde64-c861-41c7-91c0-6da8cc01fb43-c000.snappy.parquet.crc
 create mode 100644 mllib/src/test/resources/ml-models/gbtc-2.4.7/data/_SUCCESS
 create mode 100644 mllib/src/test/resources/ml-models/gbtc-2.4.7/data/part-00000-dacbde64-c861-41c7-91c0-6da8cc01fb43-c000.snappy.parquet
 create mode 100644 mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/._SUCCESS.crc
 create mode 100644 mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/.part-00000.crc
 create mode 100644 mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/_SUCCESS
 create mode 100644 mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/part-00000
 create mode 100644 mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/._SUCCESS.crc
 create mode 100644 mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/.part-00000-81137d9f-31e3-4a90-813c-ddc394101e21-c000.snappy.parquet.crc
 create mode 100644 mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/_SUCCESS
 create mode 100644 mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/part-00000-81137d9f-31e3-4a90-813c-ddc394101e21-c000.snappy.parquet
 create mode 100644 mllib/src/test/resources/ml-models/gbtr-2.4.7/data/._SUCCESS.crc
 create mode 100644 mllib/src/test/resources/ml-models/gbtr-2.4.7/data/.part-00000-3b5433ff-d346-4511-9aab-639288bfae6d-c000.snappy.parquet.crc
 create mode 100644 mllib/src/test/resources/ml-models/gbtr-2.4.7/data/_SUCCESS
 create mode 100644 mllib/src/test/resources/ml-models/gbtr-2.4.7/data/part-00000-3b5433ff-d346-4511-9aab-639288bfae6d-c000.snappy.parquet
 create mode 100644 mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/._SUCCESS.crc
 create mode 100644 mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/.part-00000.crc
 create mode 100644 mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/_SUCCESS
 create mode 100644 mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/part-00000
 create mode 100644 mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/._SUCCESS.crc
 create mode 100644 mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/.part-00000-6b9124f5-87fe-4fd8-ad9c-4be239c2215a-c000.snappy.parquet.crc
 create mode 100644 mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/_SUCCESS
 create mode 100644 mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/part-00000-6b9124f5-87fe-4fd8-ad9c-4be239c2215a-c000.snappy.parquet
 create mode 100644 mllib/src/test/resources/ml-models/rfc-2.4.7/data/._SUCCESS.crc
 create mode 100644 mllib/src/test/resources/ml-models/rfc-2.4.7/data/.part-00000-e41a7b98-91f8-4485-b112-25b4b11c9009-c000.snappy.parquet.crc
 create mode 100644 mllib/src/test/resources/ml-models/rfc-2.4.7/data/_SUCCESS
 create mode 100644 mllib/src/test/resources/ml-models/rfc-2.4.7/data/part-00000-e41a7b98-91f8-4485-b112-25b4b11c9009-c000.snappy.parquet
 create mode 100644 mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/._SUCCESS.crc
 create mode 100644 mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/.part-00000.crc
 create mode 100644 mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/_SUCCESS
 create mode 100644 mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/part-00000
 create mode 100644 mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/._SUCCESS.crc
 create mode 100644 mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/.part-00000-21082d24-b666-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet.crc
 create mode 100644 mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/_SUCCESS
 create mode 100644 mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/part-00000-21082d24-b666-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet
 create mode 100644 mllib/src/test/resources/ml-models/rfr-2.4.7/data/._SUCCESS.crc
 create mode 100644 mllib/src/test/resources/ml-models/rfr-2.4.7/data/.part-00000-4a69607d-6edb-40fc-b681-981caaeca996-c000.snappy.parquet.crc
 create mode 100644 mllib/src/test/resources/ml-models/rfr-2.4.7/data/_SUCCESS
 create mode 100644 mllib/src/test/resources/ml-models/rfr-2.4.7/data/part-00000-4a69607d-6edb-40fc-b681-981caaeca996-c000.snappy.parquet
 create mode 100644 mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/._SUCCESS.crc
 create mode 100644 mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/.part-00000.crc
 create mode 100644 mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/_SUCCESS
 create mode 100644 mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/part-00000
 create mode 100644 mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/._SUCCESS.crc
 create mode 100644 mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/.part-00000-dfe4db51-d349-447a-9b86-d95edaabcde8-c000.snappy.parquet.crc
 create mode 100644 mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/_SUCCESS
 create mode 100644 mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/part-00000-dfe4db51-d349-447a-9b86-d95edaabcde8-c000.snappy.parquet

diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
index 162641f605264..67b9166a0f44d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
@@ -31,8 +31,10 @@ import org.apache.spark.ml.util.{DefaultParamsReader, DefaultParamsWriter}
 import org.apache.spark.ml.util.DefaultParamsReader.Metadata
 import org.apache.spark.mllib.tree.impurity.ImpurityCalculator
 import org.apache.spark.mllib.tree.model.{DecisionTreeModel => OldDecisionTreeModel}
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{Dataset, SparkSession}
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.functions.{col, lit, struct}
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.VersionUtils
 import org.apache.spark.util.collection.OpenHashMap
 
 /**
@@ -401,8 +403,13 @@ private[ml] object DecisionTreeModelReadWrite {
     }
 
     val dataPath = new Path(path, "data").toString
-    val data = sparkSession.read.parquet(dataPath).as[NodeData]
-    buildTreeFromNodes(data.collect(), impurityType)
+    var df = sparkSession.read.parquet(dataPath)
+    val (major, minor) = VersionUtils.majorMinorVersion(metadata.sparkVersion)
+    if (major.toInt < 3) {
+      df = df.withColumn("rawCount", lit(-1L))
+    }
+
+    buildTreeFromNodes(df.as[NodeData].collect(), impurityType)
   }
 
   /**
@@ -497,25 +504,36 @@ private[ml] object EnsembleModelReadWrite {
     }
 
     val treesMetadataPath = new Path(path, "treesMetadata").toString
-    val treesMetadataRDD: RDD[(Int, (Metadata, Double))] = sql.read.parquet(treesMetadataPath)
-      .select("treeID", "metadata", "weights").as[(Int, String, Double)].rdd.map {
-      case (treeID: Int, json: String, weights: Double) =>
+    val treesMetadataRDD = sql.read.parquet(treesMetadataPath)
+      .select("treeID", "metadata", "weights")
+      .as[(Int, String, Double)].rdd
+      .map { case (treeID: Int, json: String, weights: Double) =>
         treeID -> ((DefaultParamsReader.parseMetadata(json, treeClassName), weights))
-    }
+      }
 
     val treesMetadataWeights = treesMetadataRDD.sortByKey().values.collect()
     val treesMetadata = treesMetadataWeights.map(_._1)
     val treesWeights = treesMetadataWeights.map(_._2)
 
     val dataPath = new Path(path, "data").toString
-    val nodeData: Dataset[EnsembleNodeData] =
-      sql.read.parquet(dataPath).as[EnsembleNodeData]
-    val rootNodesRDD: RDD[(Int, Node)] =
-      nodeData.rdd.map(d => (d.treeID, d.nodeData)).groupByKey().map {
-        case (treeID: Int, nodeData: Iterable[NodeData]) =>
-          treeID -> DecisionTreeModelReadWrite.buildTreeFromNodes(nodeData.toArray, impurityType)
+    var df = sql.read.parquet(dataPath)
+    val (major, minor) = VersionUtils.majorMinorVersion(metadata.sparkVersion)
+    if (major.toInt < 3) {
+      val newNodeDataCol = df.schema("nodeData").dataType match {
+        case StructType(fields) =>
+          val cols = fields.map(f => col(s"nodeData.${f.name}")) :+ lit(-1L).as("rawCount")
+          struct(cols: _*)
+      }
+      df = df.withColumn("nodeData", newNodeDataCol)
+    }
+
+    val rootNodesRDD = df.as[EnsembleNodeData].rdd
+      .map(d => (d.treeID, d.nodeData))
+      .groupByKey()
+      .map { case (treeID: Int, nodeData: Iterable[NodeData]) =>
+        treeID -> DecisionTreeModelReadWrite.buildTreeFromNodes(nodeData.toArray, impurityType)
       }
-    val rootNodes: Array[Node] = rootNodesRDD.sortByKey().values.collect()
+    val rootNodes = rootNodesRDD.sortByKey().values.collect()
     (metadata, treesMetadata.zip(rootNodes), treesWeights)
   }
 
diff --git a/mllib/src/test/resources/ml-models/dtc-2.4.7/data/._SUCCESS.crc b/mllib/src/test/resources/ml-models/dtc-2.4.7/data/._SUCCESS.crc
new file mode 100644
index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c
GIT binary patch
literal 8
PcmYc;N@ieSU}69O2$TUk

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/dtc-2.4.7/data/.part-00000-bd7ae42f-c890-406c-894c-ca4eac67c690-c000.snappy.parquet.crc b/mllib/src/test/resources/ml-models/dtc-2.4.7/data/.part-00000-bd7ae42f-c890-406c-894c-ca4eac67c690-c000.snappy.parquet.crc
new file mode 100644
index 0000000000000000000000000000000000000000..3ac562a8b2b843f0df27c8239a8aa4e4e7102065
GIT binary patch
literal 36
scmYc;N@ieSU}9jt&;0(&Q}%+r><dq4D=gCAdHdGOKu*_BIk%eY0R2-C8UO$Q

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/dtc-2.4.7/data/_SUCCESS b/mllib/src/test/resources/ml-models/dtc-2.4.7/data/_SUCCESS
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/mllib/src/test/resources/ml-models/dtc-2.4.7/data/part-00000-bd7ae42f-c890-406c-894c-ca4eac67c690-c000.snappy.parquet b/mllib/src/test/resources/ml-models/dtc-2.4.7/data/part-00000-bd7ae42f-c890-406c-894c-ca4eac67c690-c000.snappy.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..09c38d602c10529ce5d11d0012f25259b3dd8ae9
GIT binary patch
literal 3242
zcmcInO=u%k9Dg&JPN%b}^pH2qK%`~dhOERP-F~JNTUnG<qE_9TuFItJ(hN>Mc4nfM
zQYa$gLG~a$_OOSth!h0zuy}C6lLrrrcwQF|9y}G#ivHi5?<Q%xsG(`{|G)QszyH_!
zO}qKRn{yc9GOkpx3|}NH6;(k%2&wcJQcyW%fum5TZVui&ymRoT`>=p#@Z~yufHk!S
z9(bLs2cNKvu!v>KAOs9juyV4lhTj;|5HIY@NnB2ZgH^mx2N)nw-}&v2>7Tl{ep!F_
z*S~(Szq^+vl5jpv6z$9s4SIn{$n&`)9+)=qOLZD6LS;ymllEWS;Kn7iU!Ra9$ZBi~
z+~KRf0l7VkjS4QnK@p3^kZ8ZYk)>n@GG6fDO<KXRFL_$b0-qa`K7REXR?c*`_iDxG
z7p5Pb%J$>|>sKb*XlF!y@&Fnr4SIns&Q`_6u&((^)Yre^HGnSAAXR~`(XmLZQD}5B
zXnw=d=xTzNSfkLshTyCOt$@{_&mtRHx|*OR795Z-vr(O9-9>m6uhN5bB;#&XTT!>}
zzOkW*<zNysUI2PpWk4v$iRddXUquLZgC8N-$woV0$1p&&xM9vyxR8#LdXp#<3m~Lo
z5GBfCUi)+Edfw}q?t8bqtrlsUcnV>D7OA{#AuPiex#tqg-tz5kCvZr1yVrMZe+Mn{
z>SScR<(s|-A&F>FPzA^Ke9Q;?$B9GQq~iycux;7^L@6d5()6#jYzLUAExY!1D|7(#
znAdY`9{|~6zDZ2K?~<207Wt4eYwWb?lkKi+6Yq8RM$08$tLs?jL_w)0JN@>x_$*Rn
zt*9NBq#_h#Rze!h9`lN<e8efAa3yK&458AB&|ruW3R}9v6|t<%3X+yC8$<e*Q@-b5
z!&-b{&Q~O@0<9Kk9jWXwhKGF25ch%M|HvsnbESto;15a;y_$nwGz9-h2=GO1CEHro
z(x{cdSWxcp%BOtkNQg4ZC=O+$;|fw1%{#S*FdmC>Mr<}x*&K_2Jt!$7gk}i#e$Y|c
zk7U>w!2zcna;1IRSz6cRrX<#|U=JtQh_D7H)0#p&oQP+I-Z`pXDZLpI;(eJSE((&U
zm4k`TOge+U%PC)QrTa2KCvrof6Khzok1w&xB2l{@AdkK>r@JdF-*E6#dVDGBZbqR-
z=vMkF988|hD|_cyVxMu!0arShgv1hsqKPHeki?YVClwj|s@iq84YOy0a2ZU)jQ5G_
zfl#SqSB(W@8L3wWc6hfP1LW>@pZJE`{lI7w-?U8MR6ifz)%~3w(N}fPcl%quUeTL2
zaV$??eS25$m~FabTi_12*&Prc;)0{ocO0|fz=o#jc%-67sQT*e-d-g>ok(edVs-oU
z<i$ZE#r+Q;m}tj1asH<3nmgbQh5JTOl{nC?ZpQ}^_g;gn!;JUR;Y0OCmw?KD%&5nh
z7p-9Y$Vf1ryba0C{WmD=@e~8Z#XDno(vPtCp#J3Xk9_jQpm8++XDc($(3twd`P@8j
zXm0Ky^_B3hKG$|<!pl7~XUtWNxtVgK4}Ut$m@74_xzeoF=C3xENONgve%`7sRhMdH
gvAR`jEZ3H+4brgYu4u<agwFUuPfZ}S3IC=118407;Q#;t

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/._SUCCESS.crc b/mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/._SUCCESS.crc
new file mode 100644
index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c
GIT binary patch
literal 8
PcmYc;N@ieSU}69O2$TUk

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/.part-00000.crc b/mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/.part-00000.crc
new file mode 100644
index 0000000000000000000000000000000000000000..22b1eb8089ce39e1d8cd08b5e9ea7f33adda65ab
GIT binary patch
literal 16
XcmYc;N@ieSU}E^OEy8bC-p?8UDO?6D

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/_SUCCESS b/mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/_SUCCESS
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/part-00000 b/mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/part-00000
new file mode 100644
index 0000000000000..ef92265f89fff
--- /dev/null
+++ b/mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/part-00000
@@ -0,0 +1 @@
+{"class":"org.apache.spark.ml.classification.DecisionTreeClassificationModel","timestamp":1608687929358,"sparkVersion":"2.4.7","uid":"dtc_bc7ad285bb73","paramMap":{},"defaultParamMap":{"impurity":"gini","maxDepth":5,"labelCol":"label","maxMemoryInMB":256,"featuresCol":"features","predictionCol":"prediction","minInfoGain":0.0,"seed":159147643,"rawPredictionCol":"rawPrediction","minInstancesPerNode":1,"cacheNodeIds":false,"probabilityCol":"probability","maxBins":32,"checkpointInterval":10},"numFeatures":692,"numClasses":2}
diff --git a/mllib/src/test/resources/ml-models/dtr-2.4.7/data/._SUCCESS.crc b/mllib/src/test/resources/ml-models/dtr-2.4.7/data/._SUCCESS.crc
new file mode 100644
index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c
GIT binary patch
literal 8
PcmYc;N@ieSU}69O2$TUk

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/dtr-2.4.7/data/.part-00000-39b027f0-a437-4b3d-84af-d861adcb9ca8-c000.snappy.parquet.crc b/mllib/src/test/resources/ml-models/dtr-2.4.7/data/.part-00000-39b027f0-a437-4b3d-84af-d861adcb9ca8-c000.snappy.parquet.crc
new file mode 100644
index 0000000000000000000000000000000000000000..f6465e27d902b970d0c02f55a6643c503c23b6cd
GIT binary patch
literal 36
scmYc;N@ieSU}6xJ`mkO@eOl+-&f@EauS{ZgO;pQ{o*O%5t=s1W0OT|dod5s;

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/dtr-2.4.7/data/_SUCCESS b/mllib/src/test/resources/ml-models/dtr-2.4.7/data/_SUCCESS
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/mllib/src/test/resources/ml-models/dtr-2.4.7/data/part-00000-39b027f0-a437-4b3d-84af-d861adcb9ca8-c000.snappy.parquet b/mllib/src/test/resources/ml-models/dtr-2.4.7/data/part-00000-39b027f0-a437-4b3d-84af-d861adcb9ca8-c000.snappy.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..2904f8433b78f0af84b859c8c0411e40577ae440
GIT binary patch
literal 3264
zcmcIn&2QUe7=P_Ju45%t4}Fa-LR&>^l%Zgy>u0Ks6pc*<MNnXu398AdoyNlXnC$>n
zsf0LisyIv&TsTZ(2M!eoG;w4*O+8E?5E47UW$GVL^$ddNef^mxX*&T`Qu%q__x=5z
zulMJ6?WH$oFv0~~EMgHJ1T17_jzb8^<QI}qA!dPNyh_{*d|CJ=;7j&l4zJ*st8g5w
zsShuSzkS)c^rUk2nRWN)E1y?jN9^aYLW$CZk@FY@Z~3qy`%jd4fEV_0X*O|?fRiOW
zSH(PGl;1fxn*61G<EM)D<gb16htERBex_h&FH^tQXS&M*QwOhA@hD+hLMX{_*H@Aw
z+(*@vz@e)dfjg|EWOOAnb}mS=x&pk<W3`AU;53i<Y`}JXC3uQw8217OUL$o)e%~Y|
zo?sKn)qCImQGfPW%6H4z7w0B_JeKm|{r;4LodNaXeQ2xD?*%qeT$-|svwm^Y#h}}N
z!%rVNM}m|&vPOm_vWB5S73tgzMkA{cT4W7F`woKBVbMn$$Fet8JRM}R8lgoN91zdZ
z0iLA!BD{>3y&DkgR;dwm>&_c15?}D<vBwKQ&&d=BB^Vxk!=y_H!EW#)1UsQ;0!~*k
zOc5!rALSSxk4H(oQIwGd5JJ|A5+yLJ{5iIob~>8<?hR+7VKg;7hA=ygWY*LX7U4nc
z*oJOyxMsWMIRvxW>Ds2dh2~i~6Buu~n(IJFJeucZ&N3Yrv)=v@!!nvi%k?a7Q!@dG
zP^2uQ?p|(~7BCT8w9U<i?*Qne)3Hn!0O?}7ZfI`THePAz#s`F1p{F(1*lgRT;k<5N
zYuJX<Xj}RzQ4p$3tJ}OBo<)+VWR<<VkcEOw3rHc^V^$KSj~VF`CNEq(LB5nMj4#9p
zh0TA;WU;7BbAl2t8%6q^kq#Kxa520vr|XhZgjRE;jzkP8!x0-Ygngj+j~MA!CV#{N
ze!uRJtJ&{G1MnYm9zL%u^|qF_G-xF-PDo#~(k(XskP9=4D-K~K<BCw+JBaZyA7(^n
zBbLo$9<ci*Wr)xW$Y#&$DCtKuY?R<OBYnZ-_ef_+U89>4S%ZSzPtg&f4UVQYhPa;!
zXNBB3qFyn*84%)uND${afmaIN#P>`(g}%c`UorUu5uhWvA<&UEDA?UJtt{e|RS$Xi
zl{wyBQM$*#C%>Bxy4#~rLv$;C6&_@c=hb`XXkxpJbeqXP$UtI|LXpHGYd~Vs-l!yk
zUzXeUrmA%`5H6K!nEIY!J0MhY@`5_2E+YAS-wx-drGnhu>>93Wx8GNrhO6nCtI4-U
zwsY=Q$H*<`9M|q{xVd7kZW@;E<d)yw&b72ASu%BS``gSGh>u}|qt&%6t!BZ7x@I{>
zF&CibmbZ6yis9)<N+T4#-6bc_4iYHte*i&6JH!d|*KAwc0=F;R*SxBPfo`;0E{M4E
z8f5Kfyb})}syDm@ME*m@T!?wr3Wl$Y2;=G75Z&B=gF;_VAwXEX6NV@L3JVYBo<9De
zZ@v&TjOPDrWzRb_q&{;#>!%IP`VNv`_3!EnO?%3}+*32^Oi7)YD%860r^A%CRMzWD
y^>TUkLT$mQFD%T?>ZOI!LfM!vZIo+^<;7CXsOdB3mAx!NCw$Oo3ZXUl-{l{OEe~}7

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/._SUCCESS.crc b/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/._SUCCESS.crc
new file mode 100644
index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c
GIT binary patch
literal 8
PcmYc;N@ieSU}69O2$TUk

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/.part-00000.crc b/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/.part-00000.crc
new file mode 100644
index 0000000000000000000000000000000000000000..bbad108766e144fcfa28032a8cab16b15aa8f5b3
GIT binary patch
literal 12
TcmYc;N@ieSU}7lORyYL!5a$BN

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/_SUCCESS b/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/_SUCCESS
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/part-00000 b/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/part-00000
new file mode 100644
index 0000000000000..2895223cffde6
--- /dev/null
+++ b/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/part-00000
@@ -0,0 +1 @@
+{"class":"org.apache.spark.ml.regression.DecisionTreeRegressionModel","timestamp":1608687932847,"sparkVersion":"2.4.7","uid":"dtr_c16a90fcdaf8","paramMap":{},"defaultParamMap":{"labelCol":"label","checkpointInterval":10,"minInfoGain":0.0,"maxMemoryInMB":256,"minInstancesPerNode":1,"maxBins":32,"seed":926680331,"cacheNodeIds":false,"maxDepth":5,"predictionCol":"prediction","featuresCol":"features","impurity":"variance"},"numFeatures":692}
diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/data/._SUCCESS.crc b/mllib/src/test/resources/ml-models/gbtc-2.4.7/data/._SUCCESS.crc
new file mode 100644
index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c
GIT binary patch
literal 8
PcmYc;N@ieSU}69O2$TUk

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/data/.part-00000-dacbde64-c861-41c7-91c0-6da8cc01fb43-c000.snappy.parquet.crc b/mllib/src/test/resources/ml-models/gbtc-2.4.7/data/.part-00000-dacbde64-c861-41c7-91c0-6da8cc01fb43-c000.snappy.parquet.crc
new file mode 100644
index 0000000000000000000000000000000000000000..13fc4ed696effdbf3b5bba54547271c8fca20e04
GIT binary patch
literal 44
zcmV+{0Mq|ta$^7h00IE8y8<UAFdxJ=g4po#7#Fn1_DTd{N#RHX{=1i5*IIT`sS*Tb
CK@p+=

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/data/_SUCCESS b/mllib/src/test/resources/ml-models/gbtc-2.4.7/data/_SUCCESS
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/data/part-00000-dacbde64-c861-41c7-91c0-6da8cc01fb43-c000.snappy.parquet b/mllib/src/test/resources/ml-models/gbtc-2.4.7/data/part-00000-dacbde64-c861-41c7-91c0-6da8cc01fb43-c000.snappy.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..5682038fcc7641206146fbdf497eae505c430a28
GIT binary patch
literal 4542
zcmcIoe{2)i9e-!vIp+`;L%8EiUFuzfWs2+C`C*dG+FhEYOV_jo1~gsUB~E<C)*L4}
z+fc}mGDs<iw2N3r0ks{g)@g0i3_6Wch_I0uWh(8aPD0Zdkww#tl`cf<#`uGE`@XY%
z=fn<8_v7TReeZkkeLvq{@AEEM)3DCNkc^pG(h5H$LbMgZMSw7fDP?YRGER6vGqjZ>
zv6h252(ieKHv$D^CiH150aJGq%yUF5!((Gj(~WpGUZ(@mgW%;9hykPoga$E!l!7b(
zDFZQq+y=4`#0&zSxR}a}CkDk6tkpcxMxb8WhM}AURbt!SAewhBiLF_;x8@%<1ilUH
zn6o+Gb`#>KYmYB~`s!(6$aUh%D|@Gsh$7JRB03=a>DSksXV{~HFY&!=H*J@aZ+b>n
zF@xjD5fhC@L5_eN1xb0qeZgcSg*S|xDEvG#O*oqUE$|x$3)i&3<?fP@$=rRO7AED#
z;c4-_d-w;#-x_(R#De2U6HE%?u;Lu|iXZ>=&JTOquL)0H{^z;ysbc~%5d)K}H(xmO
z%z5E{YDe&+(dUINXYT0x`X7Hhw(KVj@HsTLfg;GKea3sqVIQ^7yy4Zc7xohb`O`5Y
zM@}b=bj6XeLkE>>41#bl&b;K)IlP%c6lx)eC9RoakR@ima{cBVD^I+4&q*A?fPon&
zTbT|V!2t9Hxbv7um5|5@X}#Y>lg%lLBL|v|W;9?j(~vTXJBM1ut#5dD?fb<t@#1S+
zu2qbA#e?Xzi8l?^snkQ^WhaOVvE%5Yb*BcZQ{{i2yuR+)yF{wCX`oSTo_cZey9X*$
z9UCu&j-C99v7-sm;(aSlrx$*7J-IC{|7_#$J~o9~kT|MTZYufks)OlsmP^72Wv0iI
zt&|zPZ*qwDiJx5@zVhJRp48)xjh_#E+h%GL8^qwYDq)q^ozkCrB`RHAZsKUOn4((f
z&yyn6LjFZCn#pnaqTd&`QE6Mfn5K4}`TpjWrdqK{T=1g@zxwRrDzWTj`_LV39@cVV
zBbA~JVn2NUT?iVVGTD&Z)LXG!6cwLLA{68h!MsQ~nL)hY42;2=tHfhs_HLj#r8QQ*
z7Haa474Nn3W8D`i{j{LRn{trQ8_GpQ?-Z8|`nh-ePz&!^aJu>fee;J+8}|444<1|l
z!(S!FOLpz5T^<O(9D04!di9r6c)Q|S9wC^wpc`3S%Qo@VI_uY}@0`-Qks_uWrjz&9
z@BPhC;5|XkHsoj2_J;P~zTo@Ao|k^V=cV1g4a$}9VVekb)Mb8;_zoytxW`x6hB=^I
z@YBj(D%66dS#8KW_<eWt*!32swUt?n9j_S5vDCld%4aIPJ#&my;01|!iX-tF%!%4)
z7OKv(p11R|LqTn0p2()lIRelMFJcRr`*2cmeK0p-j>O!E`3-YnA$o@Sa_HI!CeJJ8
zw;<>MOvV;Ifcu?r{|?C8AWy)!9i$Ky&(vbpMzAQ0s#7+61CxsxG3N;80JN+VtmSpA
zJCy5KbVFddEEUesg3-Vlg+(%XnegtLINN>K+XNyA`tVwjD4~%%WTZ*)5hK0GjM63>
z4aO)3iCCkJ6dyIx4zx@JGr*Mr5sFSTWvz@37o9A_frCqM{^c>8Qcf*EJSQ^(6SP(F
zMTeY>&T?~cOIfVfFMp>!))kVv{a};Ptw5Y1E=$q_O{hl4(b1lO)Z~x*8FS{DE*yY^
z6(*JT%2FWQ6%Y4BGYUG~-J6iZ@vV5BDL43L+#ipDy9QK4a%3bNi!-{+{1Pc5bxYBB
zW<+lChrtkrH%6pjyfGAx01_UT%Hd6+tO86^vEE2H4hEHnWkJawPsq{((SY<6Mz$!c
z{c&kij~td_kI3sovJ?yTL;_#n1%@(36WxvKTEsdnR?C>3wnDO2khCS28AiuC+2;s$
zKVheRw|KFz;Fk{)2aI<s&b<9)0voF#X@kX~CKr;bm>kuy$8|6TY-%qBcw=fKRK(^5
zy_G4QZ8NVvitP(}c35w}p!cbSlXqZiY@2_$7sHw~6v0wUuOJ=Pvx9p3qydl$PgRUm
zs4a%{i4h~MAPtrlHBmF$N@=HD8v*nn!5*jC110uPjB1qmr-v40zP1?ljzvYHR9aMt
za9YP+*V%V00+d3+#wdl_VkkGti}Zrl{R-3s!G22EZ<IqzXzUCSd81;cC*bX>pw03&
zl4ARB1bdUPPcUk`VXDBk^O(3uyQP<^wrkbOWOn6@-I(WpdMRLNOp7tJQ8E9|Ta@A>
zgQaa2#N7LPb~`xPvpROa&VJsK>xLQ3JO^I`Fy5}pX1#p-oRQ9C3MJGbf_<K_U%nke
z)i@W2s*Q?-%3fK{Izi8IJ@O`(zt;~`>{3|SwONv5K(8EC>8f_EM%>DR8L_^I3kbX~
zA;n#C&lXp=6!!;!t-06fem=gnSK{mWSX@qY#d#+m3`>zfjIV#ZpO5;x@uLjS!=vof
za1>}$l0gwoL?Zsq2uuq4BQeRz=PdK}{oA)YRSUV}LA5a-*l<9d&VPAG?cC9<1bPzK
z+4=jOJD&arCX^FSwWG%Gmu3G}P-joM^_gO*j&}7#<8b1|9)?6`le|6OeJI-8S;uFk
zipZ-r=UX)OeAR4x`7&sS@_%DRIm=Z8YVvN;v-#(}y6`_}qd8v_RHLdl|Ibp+c;Qf?
z=N-?JU(_`7n1Hx%WI5!XZrPS49h=AHal1S=M`r?F#BBcBx<IftSXWn7+3A&nUT;-Z
p!0mN=>!ccYS6%1oy4CJZsWaeNX&Hn6z})(S`r$8xHSnLCe*+~Bn1}!X

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/._SUCCESS.crc b/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/._SUCCESS.crc
new file mode 100644
index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c
GIT binary patch
literal 8
PcmYc;N@ieSU}69O2$TUk

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/.part-00000.crc b/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/.part-00000.crc
new file mode 100644
index 0000000000000000000000000000000000000000..a810dd9107fe7a9e566a46e6250dde025f3242d1
GIT binary patch
literal 16
XcmYc;N@ieSU}9K*@!2Z#Q*ti>DzFAq

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/_SUCCESS b/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/_SUCCESS
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/part-00000 b/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/part-00000
new file mode 100644
index 0000000000000..675fea29ba9e2
--- /dev/null
+++ b/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/part-00000
@@ -0,0 +1 @@
+{"class":"org.apache.spark.ml.classification.GBTClassificationModel","timestamp":1608687932103,"sparkVersion":"2.4.7","uid":"gbtc_81db008b4f25","paramMap":{"maxIter":2},"defaultParamMap":{"seed":-1287390502,"maxMemoryInMB":256,"stepSize":0.1,"validationTol":0.01,"maxBins":32,"checkpointInterval":10,"predictionCol":"prediction","lossType":"logistic","rawPredictionCol":"rawPrediction","featuresCol":"features","cacheNodeIds":false,"maxIter":20,"featureSubsetStrategy":"all","impurity":"gini","minInstancesPerNode":1,"minInfoGain":0.0,"maxDepth":5,"subsamplingRate":1.0,"labelCol":"label","probabilityCol":"probability"},"numFeatures":692,"numTrees":2}
diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/._SUCCESS.crc b/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/._SUCCESS.crc
new file mode 100644
index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c
GIT binary patch
literal 8
PcmYc;N@ieSU}69O2$TUk

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/.part-00000-81137d9f-31e3-4a90-813c-ddc394101e21-c000.snappy.parquet.crc b/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/.part-00000-81137d9f-31e3-4a90-813c-ddc394101e21-c000.snappy.parquet.crc
new file mode 100644
index 0000000000000000000000000000000000000000..101c2071193880d091fe121057591aa15c9e1ba4
GIT binary patch
literal 36
scmYc;N@ieSU}9JsB^xi8eS~r2^3y!Yc`qA2)kv%{(JuB5_hRG(0NL9NVgLXD

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/_SUCCESS b/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/_SUCCESS
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/part-00000-81137d9f-31e3-4a90-813c-ddc394101e21-c000.snappy.parquet b/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/part-00000-81137d9f-31e3-4a90-813c-ddc394101e21-c000.snappy.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..e232de340042335ad16e22327618ad8714d80f2b
GIT binary patch
literal 3075
zcmeHJ&5zqe6dz}k;4Mp4q}sJbKw;F4b|qu%xN+iDq{4P32<ld=6>tDm)x@4;P5o7S
zoOZR!2?=rHzyXOpaPDEn9mIcv)WhBo9C|=pkPy#KvJOfL+nzYAWhI`O_kM5Yea&z0
z-F;m^!WH4FB9!5S7x<b~<`6<s8Gew6Hs(e!!Pm0sOaP>RdPVq}pD0#A(L@`MSB|jl
zktD%eIE;rXiHLnb)g&VEebx8Wm=0r_ByJd}JJfcwj}K!?_s^zxLx*};!OZn(!iXQ?
zE#1^QW~Xf#P2JEHoMU}V<4hN<G}Rkw8x&I4fsw=Fd$wa}9n!Yire?qbm?r)%0idI?
zf*m>_smJz;NgT!0acu^0w?YpviZN&>lmf19n5JpmfITR9KnY9X5EJ3q2t<81=mmrD
zJ>tT?G*yEc@@U%)z!yeS!LA>rvC9s@!idB!32X{V;2ryZ6uJTH1&qcc0z1$_)z18O
z2b|G!Kz~5Igywj=)DPpsUa-53x0)?8pWC4kJHT75=@!6kfSv5oIMW9>1$(4Vy~&32
zPdpw=|A$Z+8f{yI*zLB}I!~yc%UcVFezt!gPz#QoMRp<1iz#uAI4@0(B5*~{E#%;M
z48``O`t()_{#RBWL+!nIja9CFd5#L>Zu3xJT=)0O+(m_DReTnx)z7aN<;A_TF;`uc
zCzY3ZD6GFuICwWtl=<ZT>V!k~_iuBRU)4VlQu=Rl(lqCqpK$umtJ|+$-oIKx{ujS+
zYgwvnzfrP&MA1j-YN!>t0qD?m>%#T9X3axZ!1MH`#2<f~JwM!proChqgsyaRSrDFy
z3MKjW`)heiy_Z0B`9!E9p|p;qDuce<+d;zGdP(Aa%7{Z4S%<!V307ZvNZsK9OORNW
zYjU^2*8pjqNAgThD2Zk9GcjoJ-G8}H<Qkj@?7_AgqnfZLw|G>Q3v)TjH?N7m35`+r
z;(EEbUY?H$%va#W^|GAe!1Xd;g{NXoxHPHcS&ipKc+834ag9^4D=ld)&uBPvxZ?5a
zVj0PbBrUMQ(S*uLm7K{TX>&m$dElwQ;vS@wsd4yFoi!urlsm%gFanN)6Beg7gCGxF
z>SdJa(<2-ZKU<nKGFY050@tEL8pA}8dO#995SkIJX`9Qq;#fIDm>DcEz?>Tl{{aCT
z$SB%!glW4h0Gu$*4FA_2?v9c4!Sw8J`Eh;fth%o1nyS|~`l;(Vb<(k%L1$nAMedt)
l094g*G*dGz+R|*R4^&v|)4ro`%3YxElNbG2L1+)?|KH>&tQi0R

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/._SUCCESS.crc b/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/._SUCCESS.crc
new file mode 100644
index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c
GIT binary patch
literal 8
PcmYc;N@ieSU}69O2$TUk

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/.part-00000-3b5433ff-d346-4511-9aab-639288bfae6d-c000.snappy.parquet.crc b/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/.part-00000-3b5433ff-d346-4511-9aab-639288bfae6d-c000.snappy.parquet.crc
new file mode 100644
index 0000000000000000000000000000000000000000..c35b81f9cb25f9937724e672820a28927b2d5819
GIT binary patch
literal 40
wcmYc;N@ieSU}A848!YST&7zunY2MT`+>_*IG-_^M(ACJ<-{3LthfZ!Q01OBZNdN!<

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/_SUCCESS b/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/_SUCCESS
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/part-00000-3b5433ff-d346-4511-9aab-639288bfae6d-c000.snappy.parquet b/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/part-00000-3b5433ff-d346-4511-9aab-639288bfae6d-c000.snappy.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..ba26a44f46f11305e83139315761785711bd35ed
GIT binary patch
literal 3740
zcmcInO>7%Q6rNpgoQ>nOQPo-2V6~Q&SgGV%&VL-GO;FP+MHU3;feNa!&W@86{_E}<
zh(e^CIdDKINI6BJ2UPU{2t^#K_KJD{!38czsLFw3gnFt#@ZPR>b{%ihlpiaxciz1B
zeeZAH*o!Z|HcTbjPfr)&Ln1^@5_k#lPYI3ESeC|NL#A?4l~AgJ4j~E|3amg7k%dSK
zEN18oTA(Rfpt5h_uRwfeOhmg`cys8wP7J}5gASj-IY;M<Kn#-92Nz;Leb%_}X!go;
zH|!rSf9e>aq5ImO1;W!*b$xZ1Wp77qJvlyyNI@?);CdoP_IiOgPsfVjKJcn<?friC
z$NJ6hXSGLverw#{<h-4Ho@P5=r?idlb`1C$bg@WJAm0=rU4nFduB%(xDE7z_xzZ;~
z`?EbNx!l)3;6_A9aSqr==tPzdfOV8clOE^wIqxYLZBPmriMN4?bnjLp81+CT(!KEY
zm%o%>IPWAq82s%*^5WRp?+ygN|DZE?Pt&8{e*g_4cS?cKPqq`hN|UW*$Fta8=+j^D
z>fn%YN0o37o6lbzjft^F4#mV>AJbnQjrkQgs{5D<eLm2567Kq#{t66whtQSCotPMb
z;PJ3r^bQLzVt&Od;`Xa^iag}DY?~A?&ZwAx2$6*7!rm?>ju6myK0z1yLZ<N|6|t+N
zcq^f&s1kG(rM{#73P5^NEk{X&iitl?EyS&cX1;UNT3TUM4M3?lNYtopGIn*IjEkyV
ztLtoDvo+f9?umv@XbhBM4U_4{l5Ny$u0S@bjizbXt7zBfEnc@Z+X8>1WL#3EiecGQ
zboIMgg;iP2c30A}W&jY<_f%NfzO-UgfC<+z(^y_<2>`mwYE%px0G-WvnQ3;@WLIlC
zdmot-j<sg9<+^Dw>lO3bipi{%dPRSV7f97tYgR9DTcX4h$;7>koP=ZzN^-)>3>B5Q
z@}Z!7EM(-FBgSVU3k8!fV4Qnc+02%JVB<20CWd%&A*l}Jrl4#JpaPg~&tl#2RQM~B
zNCFtdj$mJiNg7LxOL7n_P#6dFH$mAKKy`*6;#u!0I8F8gQWE0$wiLp}Ywj@pEGWMU
znQaM}I_Dz}j!tP9(?%4T2Bm0XfoG__pPWAO`Uj{7lwDET5;Gf79%OJhA~j}`kf1b-
zdJ9V3kx-{K0|9P9%|m7&vV=^=kW@bDaeAP=Y<|}}RNI2`xsZ9%1FhhXL~I3r6&Aow
zZwPeY00ppjdU>1Sj6iI+&2SZl{V>k4r+8u9)7W+j9gZDAxg%sA#(~2hmB`_*!Z;rH
zIkkt=@x}p)!_W1>Zip+pg7S@!dEDo9L)(-+WcCKn)!mbaM%q3hoKQb<PVL>35GtlF
zbo6$1f$~Ye5{DG1YTaDUX$=iVaL$>=xp$dq!B|zhMsj1hNur+X(6DY*axl1WHJP0=
z>+j{N%+_?6(&`;~Ep4whSb92b*=BRePG{3)gH?1ZJ^kidx~5fe%boAA)lxNTFfN%1
zf?Bgu(MlCiDr*&sWz!yVdU|brJ<AciYk^}Nht|-!divQZ{rkp8(d$ig_V|8#*V2Cg
z;oNK-hsUp(rnU;=*6nu9Ee3aVsa~_;rn6p#M7NT>9&{gyc6iqDj^vDKj(Oao@q5+B
zc=j^*m+`+raqe&qz>{}G&j#;#Zuk${=+LhR9F%+Wf0nZCiw37Yc04zp)-=aZAnHr4
z8FIdArdmTMHJlsH=Y~^5r6zm<OKDRDy*yPe6h=o%6RbQjF*>T}C-M^oHlAN9lqL(4
Z`4TJX!{-wB;P>Uxf5eIrvIxIR{{~z=dsP4c

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/._SUCCESS.crc b/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/._SUCCESS.crc
new file mode 100644
index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c
GIT binary patch
literal 8
PcmYc;N@ieSU}69O2$TUk

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/.part-00000.crc b/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/.part-00000.crc
new file mode 100644
index 0000000000000000000000000000000000000000..7dc6e149db71e18a5578bb119d6cccf8ff106d34
GIT binary patch
literal 16
XcmYc;N@ieSU}AWubJ20OmhChEB{c;9

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/_SUCCESS b/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/_SUCCESS
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/part-00000 b/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/part-00000
new file mode 100644
index 0000000000000..a9a712e626510
--- /dev/null
+++ b/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/part-00000
@@ -0,0 +1 @@
+{"class":"org.apache.spark.ml.regression.GBTRegressionModel","timestamp":1608687942434,"sparkVersion":"2.4.7","uid":"gbtr_0a74cb2536ff","paramMap":{"maxIter":2},"defaultParamMap":{"impurity":"variance","maxMemoryInMB":256,"maxDepth":5,"subsamplingRate":1.0,"validationTol":0.01,"labelCol":"label","maxIter":20,"checkpointInterval":10,"minInfoGain":0.0,"predictionCol":"prediction","stepSize":0.1,"cacheNodeIds":false,"lossType":"squared","seed":-131597770,"featureSubsetStrategy":"all","featuresCol":"features","minInstancesPerNode":1,"maxBins":32},"numFeatures":692,"numTrees":2}
diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/._SUCCESS.crc b/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/._SUCCESS.crc
new file mode 100644
index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c
GIT binary patch
literal 8
PcmYc;N@ieSU}69O2$TUk

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/.part-00000-6b9124f5-87fe-4fd8-ad9c-4be239c2215a-c000.snappy.parquet.crc b/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/.part-00000-6b9124f5-87fe-4fd8-ad9c-4be239c2215a-c000.snappy.parquet.crc
new file mode 100644
index 0000000000000000000000000000000000000000..b681b9f615bf5fd53072363c0a42bb2759ce32e3
GIT binary patch
literal 32
ocmYc;N@ieSU}D(PYL`-J`#$%RNMP*72{FP6dkmCyJ~ayf0I@y`tN;K2

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/_SUCCESS b/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/_SUCCESS
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/part-00000-6b9124f5-87fe-4fd8-ad9c-4be239c2215a-c000.snappy.parquet b/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/part-00000-6b9124f5-87fe-4fd8-ad9c-4be239c2215a-c000.snappy.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9a7e77acf546461ce1f1efb2814d5164828a9af4
GIT binary patch
literal 3038
zcmeHJ&5zqe6ragvQ@2{s7GY!4AVr3aQpvUKd^pJ}P-s^`mIkF2>IGG0Vvn<?wv*c9
zl+_XhH~s>&-2(?sJ?uY##0??wKOk}7!gY^GNEL7Lz1=KZA%QroWhLV`Z{Fwp-h1tL
z?wBGH*Tt7LF%1hr6tZ#}BZTBM{2&Px7QTQWWI35L0QV0sh(BK#AI!i(3l*NM?5Va-
z;#l2Odr?;>ePZuWJ?@j}fgbpJM7t4<W3LzLTh#XW;-iStJ2P^t=TKkOROSUVW+dpV
zo2F&dtXj2RE|x7*Q%ADy(uki0Ek(VoSK&b7Iq>DM=zf({Xw9fnr(zP=0M#UD5fHRD
z)KrJMB=K2$PE-T(*%s}y9d)y!sXpmY|7OnzS)(Ngu(=B#f?Z7q+;WK@Qy}h8`$50w
zg{&De8Vv}XFb$0hYtf(=?KZ>K4Ry0vv4G}<%@CXkZ5p>}#6<x!ctR=IY?ds`DwV7C
zlBT+puml1$HemY2ortL8*$iABZ_U496LjA2La?n=)Ko9%Cy~c?!GQsZJg$_pa1-3#
zd&Gm78M-kX%KwX0)eN)bRH$37mbjPmtSnE}-0dY{I2q1kfq6Crr|0F^g;*ZXf8&}=
z)BQ~HheH6{ubHEpDfnMmc>>t`=NMak<?H7_7``O{z;NRT?&*I4F#O~GbK(oPK0gay
z_=7LcL>K<*@{9PV+zeC~uPEzjRm1F)Q-OpN{QBD`Q1tW=!+Y(puyB#_?_V5Z<&SLv
zzuU&?fV}zYTWRb*=F598-Mx*G|Fn?WE~Lz(i@y}6NmqwMD5yE{jfFZaK*HfUx-JU`
zkNNZcb*RTnsKB(|81Vx0lvPYAx87eHRm=M^l&gS<86>7Qkep#qa+_O7T-!*=LO>aD
z2qPO%(l5d8g-6uu?ywk1X(g*P@<J9QZ3sx2o)c42TKZH9^FrfqT7{AYe?T9Yl^<lq
zHKih;j51d@lJd<J={qq$Xq<PBQ58SyYM*zGd6QoN+<E6%J~lw-SVZBzloc<Hkvzrt
z2!sb%`WEN+rG~tuw473K7UW6?uSsd7XtI1_;2VsmG<kJ2q#^n036A)ouS3^=m{6ui
zy+``A7|FlkJ(cbDc>{`Bl-LZC?0VGaBiP4#YDfaUH7;bZHDQHujCN@RgfQ`;NBh7u
z8Q7+h`4~9VW)f!PnG8_ph23K!zyKKeW~|30Ov>fl2&b2D#b?=YZ;0gWiSO5fC^rdK
z&eTmqH*<wf;`vUF)as5~bL;g|v13seCdHCtScX-n6~nG~s`aYTp&iG(sx)9SAHV3&
KRfO6w+5ZJm3ZA|I

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/data/._SUCCESS.crc b/mllib/src/test/resources/ml-models/rfc-2.4.7/data/._SUCCESS.crc
new file mode 100644
index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c
GIT binary patch
literal 8
PcmYc;N@ieSU}69O2$TUk

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/data/.part-00000-e41a7b98-91f8-4485-b112-25b4b11c9009-c000.snappy.parquet.crc b/mllib/src/test/resources/ml-models/rfc-2.4.7/data/.part-00000-e41a7b98-91f8-4485-b112-25b4b11c9009-c000.snappy.parquet.crc
new file mode 100644
index 0000000000000000000000000000000000000000..5bb3a22617543ca15ec103fe962cebdd5230711f
GIT binary patch
literal 40
wcmYc;N@ieSU}De|FucBw?a{4JH!Z!gJ4teXR<7A1-pG>lahiSnrXFu!02kN~uK)l5

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/data/_SUCCESS b/mllib/src/test/resources/ml-models/rfc-2.4.7/data/_SUCCESS
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/data/part-00000-e41a7b98-91f8-4485-b112-25b4b11c9009-c000.snappy.parquet b/mllib/src/test/resources/ml-models/rfc-2.4.7/data/part-00000-e41a7b98-91f8-4485-b112-25b4b11c9009-c000.snappy.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..d9ec35a4de580d57b00c6375223f941923fe74b2
GIT binary patch
literal 3836
zcmcInU5Fc16uvV%$<BtPO(l1jXwVS4A%!&Tc7L-hY@|iEHri$FgP>I>J2$(7lVm$H
zv9@Jl1s{A+L0EjTf<hm(O7X#mJ}j({eGn;F-<IzB=0m|nL_~yo&P?Vfo7tcGGvt!o
zbI<+0^LNjk%so9nOa+>xsVaO4goqge4+n!1D$;n7reHy&Vn!BFDuWIo5-9{`AQ0kQ
z<c~hkoTSrLdIXU&I%?t`9!4Jw+!}!qg#qXpnsv~lHabdubbkioBclkB;`$=*Cf^@}
z%>vER;~k5W)36qJc4h!OMdbjt=q&Hd$emxd2O(0>iye2kxX5)5c;;xeN@M6xA^Z3T
zb4PyvWow2cxK~!5`s4@w&#jr!!TTRdzuld=_3TeqQ@6H|yW20()$Hc(l>)c#=j>>H
z!1i;MqZg`>A26T-(d5L{=PO)4n0_uH9^*(gktF$qNOB4B_*qbZXZ4v17;%b@7ij?;
zh|*}LLk(NIjd1jXQjnxy$2|4lycxql6#Dx5#T#+q+!wz_Fz$17t~VYLPTkq~^2Wpa
zpMC!0%>0v=Kl=LPjeR2G#?duqX9Ma<?3Dt2NJQ++ix`mPG*;I`c+gU;FmPO^nLy?R
zl*$6?V9ok75XXy7ruj(fRok}$Ep~iMRdES-VI5-4VoZ8+D>zl^qWd#Y4zWCl|2>$F
z``(7{i!cf>dht=qx89%m_<UcaQ!9kl=sdbkf;q38_i~=BNb!PG^j%T_8=kg9LHgl$
zWOKIp1R3B&P#PE@NvN!o6_7(^l`BvI&=jha$M_3z^Z@vveFS5UQr~KS1`uM#CgMqo
z^69@Gn~j^xit)y4=E4$dD1b@%K_W*jgR!$`$QUn+&6di}D3(H#&Kj?&gvLNAwrns}
zTd=fN(-DYTW4Uc;))m~Ba3?QXie-YiQ8FgTg07hs<sJQgrn3fXTFy*ZR5SoW`T?EQ
zt<y`I4oo<Y8QS7fM*z?fb6M9c0JIn5b*5NtgPm=v>@8$Y+q)HuEw&7endglQO9nHS
zTDp3G6-bq6wi~Cty+lf-GwJobn1Nso3S!!g4CSSi^d2Xzad~lipT3+fLcRoa7>_PS
zK7XA<urWc5rVCzhA*eRw9ZtH-feK*y{TI)TKO-TS5BLm??Zyn`CxHEcpY~4u!QD87
zYh4K8@=|Xzt@6?;pI;Y%skh5wr|Ff3d9@xzra>W^p7jFNT~>BExn%^@1?dhieG1f3
z*U7_l+z=->A>%y(wJX$h@EtzC38@chntQa;5SHy&NGbTmZ&Q86N#Ah!?HE*q-#d48
z(h%sicnEah9NE~nIq6$2zZUnZ4LgNjZ8!_d%e{n$J?SOJIh0+8F~D&z0UUn!L=Jxz
z#<6zP&OM2yU)n{n_wn9c4Jm1plfDB7kGj>+)phsi#pw*3RcI}ER=1OTdfMG6Y*(Fr
zu$=@~F?68~xHnbO&&Q+`gg};C#$riXR-hA??513LlNly-S2=R3G+LS<@`)Y|^O9bI
z{(Y&<tdh~XTxu{&QK4DOH^pl?>&i0AP326>XfIf~Vy><+T{Ux4FJH?wl?E<3Z6207
zs#+7eCNn_LZ0ovG(?O}O=q4-XT;$x;wUw2khv4209>!s4HPutkJvya-?f58atBuAU
z-fs7H`VS!N3(mvw{8tP^xdP(OC3nF|hG*zPt7*Z7XTAV|?gV)yXg(xu@2KNd=`rR!
z%)=(lyIy^aM^A%)C;uB1`zrSUyx{Fqv%%}$+xQR4XwSC<57e{f|4e1q`-Vq-Xn!sq
zR5XWBAo6pa9&)l_WIIhKJ6swbDh+1~wKlwmWtGXYTA!?!%Oj_1<E%bDJ~E;XjSr2N
h+1SuRxi(Rr7^<<FI(#C%4*y^G|B$OOLgwIq@81~LkIDc5

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/._SUCCESS.crc b/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/._SUCCESS.crc
new file mode 100644
index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c
GIT binary patch
literal 8
PcmYc;N@ieSU}69O2$TUk

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/.part-00000.crc b/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/.part-00000.crc
new file mode 100644
index 0000000000000000000000000000000000000000..58bda6df9cca76cbe0c755e25286b3d2cded154b
GIT binary patch
literal 16
XcmYc;N@ieSU}9*_y6ih`{)WW>CKUzH

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/_SUCCESS b/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/_SUCCESS
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/part-00000 b/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/part-00000
new file mode 100644
index 0000000000000..07748b070ee84
--- /dev/null
+++ b/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/part-00000
@@ -0,0 +1 @@
+{"class":"org.apache.spark.ml.classification.RandomForestClassificationModel","timestamp":1608687930713,"sparkVersion":"2.4.7","uid":"rfc_db1adb353f1e","paramMap":{"numTrees":2},"defaultParamMap":{"impurity":"gini","predictionCol":"prediction","numTrees":20,"maxDepth":5,"featureSubsetStrategy":"auto","subsamplingRate":1.0,"featuresCol":"features","checkpointInterval":10,"rawPredictionCol":"rawPrediction","cacheNodeIds":false,"labelCol":"label","seed":207336481,"probabilityCol":"probability","maxBins":32,"minInstancesPerNode":1,"minInfoGain":0.0,"maxMemoryInMB":256},"numFeatures":692,"numClasses":2,"numTrees":2}
diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/._SUCCESS.crc b/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/._SUCCESS.crc
new file mode 100644
index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c
GIT binary patch
literal 8
PcmYc;N@ieSU}69O2$TUk

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/.part-00000-21082d24-b666-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet.crc b/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/.part-00000-21082d24-b666-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet.crc
new file mode 100644
index 0000000000000000000000000000000000000000..729c5bb30d7fe171323f7f76f91dc0f90530d162
GIT binary patch
literal 36
scmYc;N@ieSU}7*02|4&HvPGAZZL(bE?HEzD3k&Z*`aEN*!Pl-b0Nk+-?*IS*

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/_SUCCESS b/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/_SUCCESS
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/part-00000-21082d24-b666-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet b/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/part-00000-21082d24-b666-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..610882198c79067d9362d7d976664b3389ff8033
GIT binary patch
literal 3391
zcmeHK&1)M+6rWwMlvU!UPLmb8L?!l88wZp{T3IW}2+}wXt%4myFr|=&Fxs7!C)p2W
zcO}Od-P@jeZo%{vOi!hU-un;q++)e9he8a#m%dqjN2;A%N)9%{ntAi)y*Kmb{eDmN
zI}h^`k`|<ESxJC_D2W+a;1EKx0RK=Lt&Yt=AZD1DDS-ALA4<Q7y^TrOXrR^Ca|f#F
z;4oCzRX=EIxP{GKqJ=FSJkwl9i`S@4O&n3*(>94oLm2D?gxqD@hD+PNMI1G&M${!?
zgx!|9o;Px3qg<*Ka-}sRtHw6JAOYJC-gIqEE5U{~wP0pN<`ZHTv1ylxUaXj~03l#^
z8-t;PZdSF3joVIC9|=_#zt|?OAMDq>?G1HZFM<;Y-h9^bsTb9}hy)$%KpeR&)7_vR
zoU)*U<$ze!WCyzI!}{19+`81OdA9!<rVwRLV|$qd&g7XStdrmYoD__LFLqUIR<ppc
zjh&D%+kOK#s6(Uuf$^aLtW&qu4qy$uHmOHJ5)uLza4T<=jPhDV&y}*OgB!%@?T&|_
zWfL5=0}}S6gBc4Kzt|+LXjfe?X4L@i)!#VVgh=gn<^L<=5YCwy*X@Scu<{LiO@G^r
zkFq#@m8T?sbVt+qosofwTkGBVga|HvMv`Vd$aj1^r##F-(x;xlsZXxkh0_1~!jC+B
z&CR_AAULDoxy+ZtqModG{W|*kj523Q=XOM>a_-aPUuRbN`=9eAVR4vp<!WY->(uqc
zC8Q>H%Wr_r2maP9a9HYamp3PofhAXol!eu?f=)$OE-#fn8KFih^YIlC3iY28?&S%G
zjBii4)iGo|JmEHWxNE(q{_rnuAufn}H!9V<l>ch!CxLqF2|n@B%Omb-z&-e$zi;yQ
zim4wGm%LY;;Nr)>>EH0FJNi|`oB)Ram26481nkkH3WSbt162r8Oce0N7)FHG1AoW2
zM<ZaG8B$Q<1vDq|%GT%eas59DAyyYlNhI;hNKQsT9kop)%`fw^=#mIqIKs<7PZweJ
z!XBZ`-6%w9LCGl96)^*rmPMot_DOtNNIy+`D`NE&+EX%c0`LdXtaLKcyiycVQW?36
zt^9r?eI&1Rs^=`&d+j4uWVCY5f@96iBo4ky-*e7_aa;Btu;4f*9Kz(HvMh+o5OHEi
zIO5Vjb1R2Rbp&&x(SHD*!un|euqFmg207_Jm(l`Kva&qYQFnUnubhaxUnJk0(g>eB
z8g#zT+a%Hge@`1Ar2L9IP^0}8w4Q1h1#L58BtnV9de27(s)t>+)I(QT>Z?K{OqwJB
zf!B7RWpO~M->D8Pv;Ml99a<QQhc>_+^_mkVAOPrfhpLkn`WQY6!t&e9@mVoE?jreq
z|LnKiV6lH$i+L@d)AEa}jW%_xMO>~}cG<2}3VOpJwqX<sR?f&76;jNZl}4#j$~6cy
Ul{b|t{PgL4(056M>hRO)U#^e?8vp<R

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/data/._SUCCESS.crc b/mllib/src/test/resources/ml-models/rfr-2.4.7/data/._SUCCESS.crc
new file mode 100644
index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c
GIT binary patch
literal 8
PcmYc;N@ieSU}69O2$TUk

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/data/.part-00000-4a69607d-6edb-40fc-b681-981caaeca996-c000.snappy.parquet.crc b/mllib/src/test/resources/ml-models/rfr-2.4.7/data/.part-00000-4a69607d-6edb-40fc-b681-981caaeca996-c000.snappy.parquet.crc
new file mode 100644
index 0000000000000000000000000000000000000000..52cf21f6701ca76c5bd96f3e5caf318e14601856
GIT binary patch
literal 40
wcmYc;N@ieSU}9idr)<=mvrtcvY1^%p=O3+9i2G5rC}784V?mjv+$$IY0Tlrb{r~^~

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/data/_SUCCESS b/mllib/src/test/resources/ml-models/rfr-2.4.7/data/_SUCCESS
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/data/part-00000-4a69607d-6edb-40fc-b681-981caaeca996-c000.snappy.parquet b/mllib/src/test/resources/ml-models/rfr-2.4.7/data/part-00000-4a69607d-6edb-40fc-b681-981caaeca996-c000.snappy.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..75a3f0459864c1eb15eda041d202cd6469b1c172
GIT binary patch
literal 3797
zcmcInU5Fc16ux&do1INJWm<ZNi3E(H4JpK!ZQXRYtnNy;Wh<dwY9B;dbarR5J2*+U
zGZSkqh4rZ-ic)+KpB51*B5P@(4^k}nP>N#dL*Fc=P#=mf2A_%wdd^Jd&SW#&Zv7b&
zl5_64-*^7*xykGc$3~dQWVWLQA0i=AmcT>d%?L}fR0-BZCNU|iil|k=h7g4mBN8YQ
z5>4C%!y+rySPFr5)X)&O2)GV!+zFrxd%_1zdZ4HcK|=}DTZ*_<risM6ho%91mQ8oT
zi@7`BKY#X?cyE$WDz5xGB#ESxmUa>{^uu!>T)2I2vX4jtD0n)PYWJHLMT{1VBE;1r
zKs?1JYLEkPE|+_9^^dRTZr|{UKOB;ZVE?WB&L@}hH;{A(QzOwyfpsrwzZO7JJN;;d
z#OG_k`4MHW*VuL>oq;@3Y2}qEIw+BqqZsULS`s^x11h8BU|J<}5c{zy0D<Sw6u2?Q
zCQ7UVFiDoo2IO;7!MAAaQ7vrGT3Ca@%Z;DVok~I+l%LwRZDjhOesab=xh@$eE}dPJ
zzxpwfjLy}KlM$>0UUaTPA4&aM;9Zvtm1S!X3V1eL3R>zfc={49qSLB~YuKZq)QkBU
z#F1k=yG4~JFl1~|+=qMmanT~g4JA;o^EHZ#y}t$yBknKo*^i4BA#NyzxPsRZgw@y#
zb{3vWUec9OUeX7XN~-9$bB`8)3S$_P>>a?w&($UwpyGqcfx#je!U8<cOhG~<He6!@
zCRrRthk**_LZq9$w1&>2jV}R$l=V0SA~PZL=k}RlXIZn~c+FW@G8!6SGGP}{ldf$T
zho{M?pi0e_ZcJ;g#$<m@Fm=LGV3b<64c%OD%~sP_NM>WXZJX{X+?Ni7mt4(tz}+Mn
z6;;tP9hV8d{h(nP4WsG$lDMdu0EF~ImQi;PESVNCp`5bK#igzSpa+~~%X9(ITO6(%
zn%lOG!%f|I3z;+CZp}3oTefL9NA2TFw&5(bEd3E)AXU2AZXDoyi6UpRnJWb;3(49g
zN|_)tOi*OyJ*u3e1!>|T12<EGdWje?@jjr&pHc)H6{TdR$de06^&l@&<r@knfa$j`
z799^rD3KV<TSAtl)?-HM<6-|!mEUOLmN3DO^`l2@4A(U=hKrZpW4ce3`?PRP1g8E8
zild}o8yD4D5}9_1$;=E-RBu~(?G&^TP*;>Yf^tzPtR;Dn(W!~lm{3BZ+BoXFQ1QNm
zdL0@G@Gg`-W-1~}%&d$_<@HodD}>GOQLR$tJS|*LK}X=h4Nr9_#RbsW5(6DMM;`Wh
zs$8Ij&KBNnI4QzzLn#h>HO;ZdcwziQ*>e~p9II*I2!|$egi;*G&8=STNiy^DI*N^t
z_xf&-m1|V_juvii4Z5Lc;BGL6Gg&yRFj^qlg>&+Tk@gM>FVy#`@-c-y5GtlFELAAQ
zgsOb8O_4#bsx5o5tSxIWiOb$lF28Bm4op{dU{Cqk@;;*O?z3=CSY?>sC)$Qvwp%C5
z4a3!R7}n~S(&@Z=YT3x|&pWQ&UU2iJeBCrG-O2BN^>n_eHE_uv^RU!4HJdOs4I32A
zwq<Ga78unv%P~s%06D+^^qDgyju6}q9Ah)Irq0dtk53t1J0XhRYNNB8_dB?q{sRc_
zg5x+me$BSEQ=sl%a>xB*a7PzfO&2aa=LjUao8&W5_n~MTXC1Ff&Y0(zn=Kl@UPFw>
zFGF}I{~HwVD(3(^c@OE?=ylIG{)0B!@K*u{<=*_CrR@2=!KpVL&)lP$W)lWPeX%=3
zo^IH=?$F7Nlt(J%kz8@U4Zp*3+Ss^WAFGd#@7*(BHR{#s-o1LITB(j3qm_m6`F-R2
aD)Yv?KC(M=1^$>m{6*eQ5i$#ZX#WP9+>Les

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/._SUCCESS.crc b/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/._SUCCESS.crc
new file mode 100644
index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c
GIT binary patch
literal 8
PcmYc;N@ieSU}69O2$TUk

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/.part-00000.crc b/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/.part-00000.crc
new file mode 100644
index 0000000000000000000000000000000000000000..1a72b8e0298442276506431fafc8c925497ded55
GIT binary patch
literal 16
XcmYc;N@ieSU}6aL`H*sfwdOPcBd!HV

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/_SUCCESS b/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/_SUCCESS
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/part-00000 b/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/part-00000
new file mode 100644
index 0000000000000..cccbb8f9f3b05
--- /dev/null
+++ b/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/part-00000
@@ -0,0 +1 @@
+{"class":"org.apache.spark.ml.regression.RandomForestRegressionModel","timestamp":1608687933536,"sparkVersion":"2.4.7","uid":"rfr_d946d96b7ff0","paramMap":{"numTrees":2},"defaultParamMap":{"numTrees":20,"featureSubsetStrategy":"auto","maxDepth":5,"minInstancesPerNode":1,"labelCol":"label","cacheNodeIds":false,"checkpointInterval":10,"featuresCol":"features","maxMemoryInMB":256,"predictionCol":"prediction","minInfoGain":0.0,"subsamplingRate":1.0,"impurity":"variance","seed":235498149,"maxBins":32},"numFeatures":692,"numTrees":2}
diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/._SUCCESS.crc b/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/._SUCCESS.crc
new file mode 100644
index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c
GIT binary patch
literal 8
PcmYc;N@ieSU}69O2$TUk

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/.part-00000-dfe4db51-d349-447a-9b86-d95edaabcde8-c000.snappy.parquet.crc b/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/.part-00000-dfe4db51-d349-447a-9b86-d95edaabcde8-c000.snappy.parquet.crc
new file mode 100644
index 0000000000000000000000000000000000000000..8081f8842670e94dabc89246bcee00c2bd49a151
GIT binary patch
literal 32
ocmYc;N@ieSU}88t<>t$D>o-D<hnc3>EAC8n2q~DiVZs+v0Kj|=7XSbN

literal 0
HcmV?d00001

diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/_SUCCESS b/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/_SUCCESS
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/part-00000-dfe4db51-d349-447a-9b86-d95edaabcde8-c000.snappy.parquet b/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/part-00000-dfe4db51-d349-447a-9b86-d95edaabcde8-c000.snappy.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..093c3468db30122ef0c7b69316fa611f342d48a9
GIT binary patch
literal 3055
zcmeHJ&1>UE6dx&CuM)CL3YAu3T}X%8WI+{@9m}yX4b6sj9h?%vQfNyFV`=PYS07$|
zcsB$cdMTw83WfHt=N@wKp~q5s=&exbztF?}1^Ej~pJY4Ahj-Hy%3flOJ@aPf{pP(l
z@Aux$`@6b;geBokMc`n-3w%-H7=)0-!4DGA%BdL$_+lzfH^A;+ZwY_g93L#eK?AKE
z%^hOXAz_G{*bn+D84z=ys^NeHk5$)E1KJO07}~z4Zc@`u2M+^EcTdFIzC|6Z;K+7q
z7!h}Xo4TQ`8Ef@MwOTXk3eI$WOoQ|+pj6aVwGIbj+k%-D1$zcvU8N0b8P!z-766*K
z+XNIHCJMG_kHk*2lM69i(0n}bZ7*tj5e<gK0ZdK7E_t>|2hl!m)&P}K(63hw1JuG@
zvgHli14FA71@|b4VzBJK4~I`?fbNhkb;i<c2qMtCO<g}YXnWfmxLK(gAhW%;2ljd<
z4R>hp5qJU66l|vMOJ{8hkb1-k>11=y|B%?=08NFf4gzY~W&|D?L*;Y8v)w@)*wF!8
zY)Aq-#lT4-{okPit1b<dWl~xpb<L{R{#B@EnQ|@?nJ3RU=S&=D#3Ogs4Cl>=`&B~q
z(s%KSM|bXVBnc$jUxnlQEd0;S9YOB>_Z+)%=c`LHEDame;UU48*75f<6YRA|jrr}J
zJZSThyz~H``T0oQBAr<Y_4Yb*yK_35rXiaD^o`p;gPyBI=I;^n<%mJ`uSd+vDcbpN
z#5{U|mNNDzeY>^CsDCoLzOl}1{my*QWPTYjoNK?=WItJ$|3Y5ku(JCN%OwdgFc5J^
zU<KgbG0`G)ybeUm34BVk@R#y44-Wx8$LskBczT8uHnAL<6Ii+PU@=R^y%5ZEiBLcS
zTS8JH0`hHdB4M$_O1w)WVv&fHfZP{g_2yG*_xGa^iJV-NTV=iono2y9r{@G#<itm!
zSLR!<C>nAREC4*XO?g-p7Udd`3UUsYY5C!<_@huBwytZ)bq&eO{dEn2az7=g3%$iv
zm<pZF!slX9SddE`FQ4RimV`%)_!CopF1B)F$wx1t7<o)gY?|U+mgM5mTOx;KMUpOb
zfWvXyCf&%oHYDA<up@lpsL-)JiD{$;{!?|zNzx1E5Jv|CXp3+d1+f_cRrhS_q<zt6
zhu9--x-{l9Sek5wrjGV$00J*|p!PekY1&0i*e>l06Xit16foBTaJJV!qXI6F_F=O%
zOjt8lVEJ(heAyj7Pmr`VvHe{)Se`g*Syy#U)t6VgvF%vPWUXQK)_RRbwbC_c58BbH
jr5T#hpf$~GbnA_})}>ubzbCh#RUdy5TR><BTK#_j#xab|

literal 0
HcmV?d00001

diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
index d1ade85cea049..13efdf13d9e54 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
@@ -446,6 +446,18 @@ class DecisionTreeClassifierSuite extends MLTest with DefaultReadWriteTest {
 
     testDefaultReadWrite(model)
   }
+
+  test("SPARK-33398: Load DecisionTreeClassificationModel prior to Spark 3.0") {
+    val path = testFile("ml-models/dtc-2.4.7")
+    val model = DecisionTreeClassificationModel.load(path)
+    assert(model.numClasses === 2)
+    assert(model.numFeatures === 692)
+    assert(model.numNodes === 5)
+
+    val metadata = spark.read.json(s"$path/metadata")
+    val sparkVersionStr = metadata.select("sparkVersion").first().getString(0)
+    assert(sparkVersionStr === "2.4.7")
+  }
 }
 
 private[ml] object DecisionTreeClassifierSuite extends SparkFunSuite {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
index a2208edcb8394..37e695ef88b14 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
@@ -545,6 +545,20 @@ class GBTClassifierSuite extends MLTest with DefaultReadWriteTest {
     testEstimatorAndModelReadWrite(gbt, continuousData, allParamSettings,
       allParamSettings, checkModelData)
   }
+
+  test("SPARK-33398: Load GBTClassificationModel prior to Spark 3.0") {
+    val path = testFile("ml-models/gbtc-2.4.7")
+    val model = GBTClassificationModel.load(path)
+    assert(model.numClasses === 2)
+    assert(model.numFeatures === 692)
+    assert(model.getNumTrees === 2)
+    assert(model.totalNumNodes === 22)
+    assert(model.trees.map(_.numNodes) === Array(5, 17))
+
+    val metadata = spark.read.json(s"$path/metadata")
+    val sparkVersionStr = metadata.select("sparkVersion").first().getString(0)
+    assert(sparkVersionStr === "2.4.7")
+  }
 }
 
 private object GBTClassifierSuite extends SparkFunSuite {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
index c909e72c689bc..cc52bd8cf2c7e 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
@@ -240,7 +240,7 @@ class MultilayerPerceptronClassifierSuite extends MLTest with DefaultReadWriteTe
 
     val metadata = spark.read.json(s"$mlpPath/metadata")
     val sparkVersionStr = metadata.select("sparkVersion").first().getString(0)
-    assert(sparkVersionStr == "2.4.4")
+    assert(sparkVersionStr === "2.4.4")
   }
 
   test("summary and training summary") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
index 645a436fa0ad6..7be007a89f6fc 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
@@ -25,10 +25,10 @@ import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.tree._
 import org.apache.spark.ml.tree.impl.TreeTests
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
+import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.mllib.regression.{LabeledPoint => OldLabeledPoint}
 import org.apache.spark.mllib.tree.{EnsembleTestHelper, RandomForest => OldRandomForest}
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo}
-import org.apache.spark.mllib.util.TestingUtils._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.functions._
@@ -429,6 +429,20 @@ class RandomForestClassifierSuite extends MLTest with DefaultReadWriteTest {
     testEstimatorAndModelReadWrite(rf, continuousData, allParamSettings,
       allParamSettings, checkModelData)
   }
+
+  test("SPARK-33398: Load RandomForestClassificationModel prior to Spark 3.0") {
+    val path = testFile("ml-models/rfc-2.4.7")
+    val model = RandomForestClassificationModel.load(path)
+    assert(model.numClasses === 2)
+    assert(model.numFeatures === 692)
+    assert(model.getNumTrees === 2)
+    assert(model.totalNumNodes === 10)
+    assert(model.trees.map(_.numNodes) === Array(3, 7))
+
+    val metadata = spark.read.json(s"$path/metadata")
+    val sparkVersionStr = metadata.select("sparkVersion").first().getString(0)
+    assert(sparkVersionStr === "2.4.7")
+  }
 }
 
 private object RandomForestClassifierSuite extends SparkFunSuite {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala
index 8fd192fa56500..861bf1e0b1292 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala
@@ -99,7 +99,7 @@ class HashingTFSuite extends MLTest with DefaultReadWriteTest {
 
     val metadata = spark.read.json(s"$hashingTFPath/metadata")
     val sparkVersionStr = metadata.select("sparkVersion").first().getString(0)
-    assert(sparkVersionStr == "2.4.4")
+    assert(sparkVersionStr === "2.4.4")
 
     intercept[IllegalArgumentException] {
       loadedHashingTF.save(hashingTFPath)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
index 948140897d8cc..c8247b9c8f3bf 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
@@ -483,6 +483,6 @@ class StringIndexerSuite extends MLTest with DefaultReadWriteTest {
 
     val metadata = spark.read.json(s"$modelPath/metadata")
     val sparkVersionStr = metadata.select("sparkVersion").first().getString(0)
-    assert(sparkVersionStr == "2.4.4")
+    assert(sparkVersionStr === "2.4.4")
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
index 49ebcb385640e..9cb0345400bc4 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.regression
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.feature.LabeledPoint
-import org.apache.spark.ml.linalg.Vector
+import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.tree.impl.TreeTests
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
@@ -236,6 +236,20 @@ class DecisionTreeRegressorSuite extends MLTest with DefaultReadWriteTest {
       TreeTests.allParamSettings ++ Map("maxDepth" -> 0),
       TreeTests.allParamSettings ++ Map("maxDepth" -> 0), checkModelData)
   }
+
+  test("SPARK-33398: Load DecisionTreeRegressionModel prior to Spark 3.0") {
+    val path = testFile("ml-models/dtr-2.4.7")
+    val model = DecisionTreeRegressionModel.load(path)
+    assert(model.numFeatures === 692)
+    assert(model.numNodes === 5)
+    assert(model.featureImportances ~==
+      Vectors.sparse(692, Array(100, 434),
+        Array(0.03987240829346093, 0.960127591706539)) absTol 1e-4)
+
+    val metadata = spark.read.json(s"$path/metadata")
+    val sparkVersionStr = metadata.select("sparkVersion").first().getString(0)
+    assert(sparkVersionStr === "2.4.7")
+  }
 }
 
 private[ml] object DecisionTreeRegressorSuite extends SparkFunSuite {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
index 04b0d4b8470f3..7d84df6326397 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
@@ -370,6 +370,18 @@ class GBTRegressorSuite extends MLTest with DefaultReadWriteTest {
     testEstimatorAndModelReadWrite(gbt, continuousData, allParamSettings,
       allParamSettings, checkModelData)
   }
+
+  test("SPARK-33398: Load GBTRegressionModel prior to Spark 3.0") {
+    val path = testFile("ml-models/gbtr-2.4.7")
+    val model = GBTRegressionModel.load(path)
+    assert(model.numFeatures === 692)
+    assert(model.totalNumNodes === 6)
+    assert(model.trees.map(_.numNodes) === Array(5, 1))
+
+    val metadata = spark.read.json(s"$path/metadata")
+    val sparkVersionStr = metadata.select("sparkVersion").first().getString(0)
+    assert(sparkVersionStr === "2.4.7")
+  }
 }
 
 private object GBTRegressorSuite extends SparkFunSuite {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
index aeddb5ac7b13e..7ec30de301779 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
@@ -221,6 +221,18 @@ class RandomForestRegressorSuite extends MLTest with DefaultReadWriteTest{
     testEstimatorAndModelReadWrite(rf, continuousData, allParamSettings,
       allParamSettings, checkModelData)
   }
+
+  test("SPARK-33398: Load RandomForestRegressionModel prior to Spark 3.0") {
+    val path = testFile("ml-models/rfr-2.4.7")
+    val model = RandomForestRegressionModel.load(path)
+    assert(model.numFeatures === 692)
+    assert(model.totalNumNodes === 8)
+    assert(model.trees.map(_.numNodes) === Array(5, 3))
+
+    val metadata = spark.read.json(s"$path/metadata")
+    val sparkVersionStr = metadata.select("sparkVersion").first().getString(0)
+    assert(sparkVersionStr === "2.4.7")
+  }
 }
 
 private object RandomForestRegressorSuite extends SparkFunSuite {

From 67195d0d977caa5a458e8a609c434205f9b54d1b Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 4 Jan 2021 04:11:39 +0000
Subject: [PATCH 0936/1009] [SPARK-33950][SQL] Refresh cache in v1 `ALTER TABLE
 .. DROP PARTITION`

### What changes were proposed in this pull request?
Invoke `refreshTable()` from `AlterTableDropPartitionCommand.run()` after partitions dropping. In particular, this invalidates the cache associated with the modified table.

### Why are the changes needed?
This fixes the issues portrayed by the example:
```sql
spark-sql> CREATE TABLE tbl1 (col0 int, part0 int) USING parquet PARTITIONED BY (part0);
spark-sql> INSERT INTO tbl1 PARTITION (part0=0) SELECT 0;
spark-sql> INSERT INTO tbl1 PARTITION (part0=1) SELECT 1;
spark-sql> CACHE TABLE tbl1;
spark-sql> SELECT * FROM tbl1;
0	0
1	1
spark-sql> ALTER TABLE tbl1 DROP PARTITION (part0=0);
spark-sql> SELECT * FROM tbl1;
0	0
1	1
```
The last query must not return `0	0` since it was deleted by previous command.

### Does this PR introduce _any_ user-facing change?
Yes. After the changes for the example above:
```sql
...
spark-sql> ALTER TABLE tbl1 DROP PARTITION (part0=0);
spark-sql> SELECT * FROM tbl1;
1	1
```

### How was this patch tested?
By running the affected test suite:
```
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableDropPartitionSuite"
```

Closes #30983 from MaxGekk/drop-partition-refresh-cache.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/command/ddl.scala       |  1 +
 .../v1/AlterTableDropPartitionSuite.scala       | 17 ++++++++++++++++-
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 601594bc6b677..5e3a67927e75a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -581,6 +581,7 @@ case class AlterTableDropPartitionCommand(
       table.identifier, normalizedSpecs, ignoreIfNotExists = ifExists, purge = purge,
       retainData = retainData)
 
+    sparkSession.catalog.refreshTable(table.identifier.quotedString)
     CommandUtils.updateTableStats(sparkSession, table)
 
     Seq.empty[Row]
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
index a6490ebdb950c..2f2c62427d5ad 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.command.v1
 
-import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.execution.command
 
 /**
@@ -42,6 +42,21 @@ trait AlterTableDropPartitionSuiteBase extends command.AlterTableDropPartitionSu
       checkPartitions(t) // no partitions
     }
   }
+
+  test("SPARK-33950: refresh cache after partition dropping") {
+    withTable("t") {
+      sql(s"CREATE TABLE t (id int, part int) $defaultUsing PARTITIONED BY (part)")
+      sql("INSERT INTO t PARTITION (part=0) SELECT 0")
+      sql("INSERT INTO t PARTITION (part=1) SELECT 1")
+      assert(!spark.catalog.isCached("t"))
+      sql("CACHE TABLE t")
+      assert(spark.catalog.isCached("t"))
+      checkAnswer(sql("SELECT * FROM t"), Seq(Row(0, 0), Row(1, 1)))
+      sql("ALTER TABLE t DROP PARTITION (part=0)")
+      assert(spark.catalog.isCached("t"))
+      checkAnswer(sql("SELECT * FROM t"), Seq(Row(1, 1)))
+    }
+  }
 }
 
 /**

From b037930952a341f4ed956a8f1839852992feaadc Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Mon, 4 Jan 2021 05:44:00 +0000
Subject: [PATCH 0937/1009] [SPARK-33951][SQL] Distinguish the error between
 filter and distinct

### What changes were proposed in this pull request?
The error messages for specifying filter and distinct for the aggregate function are mixed together and should be separated. This can increase readability and ease of use.

### Why are the changes needed?
increase readability and ease of use.

### Does this PR introduce _any_ user-facing change?
'Yes'.

### How was this patch tested?
Jenkins test

Closes #30982 from beliefer/SPARK-33951.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: beliefer <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/QueryCompilationErrors.scala    |  9 +---
 .../sql/catalyst/analysis/Analyzer.scala      | 45 +++++++++++--------
 .../analysis/higherOrderFunctions.scala       |  3 +-
 .../analysis/AnalysisErrorSuite.scala         |  8 ++--
 4 files changed, 35 insertions(+), 30 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala
index e4a1f3f8efeee..f4c91327a9e11 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala
@@ -263,13 +263,8 @@ object QueryCompilationErrors {
       s"its class is $classCanonicalName, which is not a generator.")
   }
 
-  def distinctOrFilterOnlyWithAggregateFunctionError(prettyName: String): Throwable = {
-    new AnalysisException("DISTINCT or FILTER specified, " +
-      s"but $prettyName is not an aggregate function")
-  }
-
-  def ignoreNullsWithUnsupportedFunctionError(prettyName: String): Throwable = {
-    new AnalysisException(s"Function $prettyName does not support IGNORE NULLS")
+  def functionWithUnsupportedSyntaxError(prettyName: String, syntax: String): Throwable = {
+    new AnalysisException(s"Function $prettyName does not support $syntax")
   }
 
   def nonDeterministicFilterInAggregateError(): Throwable = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 5e86368f6f4b3..fdd1cd0146c24 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -2120,24 +2120,30 @@ class Analyzer(override val catalogManager: CatalogManager)
                 // the context of a Window clause. They do not need to be wrapped in an
                 // AggregateExpression.
                 case wf: AggregateWindowFunction =>
-                  if (isDistinct || filter.isDefined) {
-                    throw QueryCompilationErrors.distinctOrFilterOnlyWithAggregateFunctionError(
-                      wf.prettyName)
+                  if (isDistinct) {
+                    throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+                      wf.prettyName, "DISTINCT")
+                  } else if (filter.isDefined) {
+                    throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+                      wf.prettyName, "FILTER clause")
                   } else if (ignoreNulls) {
                     wf match {
                       case nthValue: NthValue =>
                         nthValue.copy(ignoreNulls = ignoreNulls)
                       case _ =>
-                        throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError(
-                          wf.prettyName)
+                        throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+                          wf.prettyName, "IGNORE NULLS")
                     }
                   } else {
                     wf
                   }
                 case owf: FrameLessOffsetWindowFunction =>
-                  if (isDistinct || filter.isDefined) {
-                    throw QueryCompilationErrors.distinctOrFilterOnlyWithAggregateFunctionError(
-                      owf.prettyName)
+                  if (isDistinct) {
+                    throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+                      owf.prettyName, "DISTINCT")
+                  } else if (filter.isDefined) {
+                    throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+                      owf.prettyName, "FILTER clause")
                   } else if (ignoreNulls) {
                     owf match {
                       case lead: Lead =>
@@ -2145,8 +2151,8 @@ class Analyzer(override val catalogManager: CatalogManager)
                       case lag: Lag =>
                         lag.copy(ignoreNulls = ignoreNulls)
                       case _ =>
-                        throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError(
-                          owf.prettyName)
+                        throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+                          owf.prettyName, "IGNORE NULLS")
                     }
                   } else {
                     owf
@@ -2161,20 +2167,23 @@ class Analyzer(override val catalogManager: CatalogManager)
                       case first: First => first.copy(ignoreNulls = ignoreNulls)
                       case last: Last => last.copy(ignoreNulls = ignoreNulls)
                       case _ =>
-                        throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError(
-                          agg.prettyName)
+                        throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+                          agg.prettyName, "IGNORE NULLS")
                     }
                     AggregateExpression(aggFunc, Complete, isDistinct, filter)
                   } else {
                     AggregateExpression(agg, Complete, isDistinct, filter)
                   }
                 // This function is not an aggregate function, just return the resolved one.
-                case other if (isDistinct || filter.isDefined) =>
-                  throw QueryCompilationErrors.distinctOrFilterOnlyWithAggregateFunctionError(
-                    other.prettyName)
-                case other if (ignoreNulls) =>
-                  throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError(
-                    other.prettyName)
+                case other if isDistinct =>
+                  throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+                    other.prettyName, "DISTINCT")
+                case other if filter.isDefined =>
+                  throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+                    other.prettyName, "FILTER clause")
+                case other if ignoreNulls =>
+                  throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+                    other.prettyName, "IGNORE NULLS")
                 case e: String2TrimExpression if arguments.size == 2 =>
                   if (trimWarningEnabled.get) {
                     log.warn("Two-parameter TRIM/LTRIM/RTRIM function signatures are deprecated." +
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
index 6115b4ed5a117..7d74c0d1cd14f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
@@ -41,7 +41,8 @@ case class ResolveHigherOrderFunctions(catalog: SessionCatalog) extends Rule[Log
             filter.foreach(_.failAnalysis("FILTER predicate specified, " +
               s"but ${func.prettyName} is not an aggregate function"))
             if (ignoreNulls) {
-              throw QueryCompilationErrors.ignoreNullsWithUnsupportedFunctionError(func.prettyName)
+              throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+                func.prettyName, "IGNORE NULLS")
             }
             func
           case other => other.failAnalysis(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index ec2a8a41bf38c..01d223d18b32b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -184,22 +184,22 @@ class AnalysisErrorSuite extends AnalysisTest {
   errorTest(
     "distinct function",
     CatalystSqlParser.parsePlan("SELECT hex(DISTINCT a) FROM TaBlE"),
-    "DISTINCT or FILTER specified, but hex is not an aggregate function" :: Nil)
+    "Function hex does not support DISTINCT" :: Nil)
 
   errorTest(
     "non aggregate function with filter predicate",
     CatalystSqlParser.parsePlan("SELECT hex(a) FILTER (WHERE c = 1) FROM TaBlE2"),
-    "DISTINCT or FILTER specified, but hex is not an aggregate function" :: Nil)
+    "Function hex does not support FILTER clause" :: Nil)
 
   errorTest(
     "distinct window function",
     CatalystSqlParser.parsePlan("SELECT percent_rank(DISTINCT a) OVER () FROM TaBlE"),
-    "DISTINCT or FILTER specified, but percent_rank is not an aggregate function" :: Nil)
+    "Function percent_rank does not support DISTINCT" :: Nil)
 
   errorTest(
     "window function with filter predicate",
     CatalystSqlParser.parsePlan("SELECT percent_rank(a) FILTER (WHERE c > 1) OVER () FROM TaBlE2"),
-    "DISTINCT or FILTER specified, but percent_rank is not an aggregate function" :: Nil)
+    "Function percent_rank does not support FILTER clause" :: Nil)
 
   errorTest(
     "higher order function with filter predicate",

From 2a68ed71e4402c2864202aa78a54d9921c257990 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Mon, 4 Jan 2021 05:53:14 +0000
Subject: [PATCH 0938/1009] [SPARK-33954][SQL] Some operator missing rowCount
 when enable CBO

### What changes were proposed in this pull request?

This pr fix some operator missing rowCount when enable CBO, e.g.:
```scala
spark.range(1000).selectExpr("id as a", "id as b").write.saveAsTable("t1")
spark.sql("ANALYZE TABLE t1 COMPUTE STATISTICS FOR ALL COLUMNS")
spark.sql("set spark.sql.cbo.enabled=true")
spark.sql("set spark.sql.cbo.planStats.enabled=true")
spark.sql("select * from (select * from t1 distribute by a limit 100) distribute by b").explain("cost")
```

Before this pr:
```
== Optimized Logical Plan ==
RepartitionByExpression [b#2129L], Statistics(sizeInBytes=2.3 KiB)
+- GlobalLimit 100, Statistics(sizeInBytes=2.3 KiB, rowCount=100)
   +- LocalLimit 100, Statistics(sizeInBytes=23.4 KiB)
      +- RepartitionByExpression [a#2128L], Statistics(sizeInBytes=23.4 KiB)
         +- Relation[a#2128L,b#2129L] parquet, Statistics(sizeInBytes=23.4 KiB, rowCount=1.00E+3)
```

After this pr:
```
== Optimized Logical Plan ==
RepartitionByExpression [b#2129L], Statistics(sizeInBytes=2.3 KiB, rowCount=100)
+- GlobalLimit 100, Statistics(sizeInBytes=2.3 KiB, rowCount=100)
   +- LocalLimit 100, Statistics(sizeInBytes=23.4 KiB, rowCount=1.00E+3)
      +- RepartitionByExpression [a#2128L], Statistics(sizeInBytes=23.4 KiB, rowCount=1.00E+3)
         +- Relation[a#2128L,b#2129L] parquet, Statistics(sizeInBytes=23.4 KiB, rowCount=1.00E+3)

```

### Why are the changes needed?

 [`JoinEstimation.estimateInnerOuterJoin`](https://github.com/apache/spark/blob/d6a68e0b67ff7de58073c176dd097070e88ac831/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala#L55-L156) need the row count.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #30987 from wangyum/SPARK-33954.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../BasicStatsPlanVisitor.scala               | 24 +++++++++++++------
 .../BasicStatsEstimationSuite.scala           |  7 ++++++
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala
index ec0c1001b1caa..34baf5b90e54e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala
@@ -27,13 +27,23 @@ object BasicStatsPlanVisitor extends LogicalPlanVisitor[Statistics] {
   /** Falls back to the estimation computed by [[SizeInBytesOnlyStatsPlanVisitor]]. */
   private def fallback(p: LogicalPlan): Statistics = SizeInBytesOnlyStatsPlanVisitor.visit(p)
 
-  override def default(p: LogicalPlan): Statistics = fallback(p)
+  override def default(p: LogicalPlan): Statistics = p match {
+    case p: LeafNode => p.computeStats()
+    case _: LogicalPlan =>
+      val stats = p.children.map(_.stats)
+      val rowCount = if (stats.exists(_.rowCount.isEmpty)) {
+        None
+      } else {
+        Some(stats.map(_.rowCount.get).filter(_ > 0L).product)
+      }
+      Statistics(sizeInBytes = stats.map(_.sizeInBytes).filter(_ > 0L).product, rowCount = rowCount)
+  }
 
   override def visitAggregate(p: Aggregate): Statistics = {
     AggregateEstimation.estimate(p).getOrElse(fallback(p))
   }
 
-  override def visitDistinct(p: Distinct): Statistics = fallback(p)
+  override def visitDistinct(p: Distinct): Statistics = default(p)
 
   override def visitExcept(p: Except): Statistics = fallback(p)
 
@@ -43,7 +53,7 @@ object BasicStatsPlanVisitor extends LogicalPlanVisitor[Statistics] {
     FilterEstimation(p).estimate.getOrElse(fallback(p))
   }
 
-  override def visitGenerate(p: Generate): Statistics = fallback(p)
+  override def visitGenerate(p: Generate): Statistics = default(p)
 
   override def visitGlobalLimit(p: GlobalLimit): Statistics = fallback(p)
 
@@ -55,19 +65,19 @@ object BasicStatsPlanVisitor extends LogicalPlanVisitor[Statistics] {
 
   override def visitLocalLimit(p: LocalLimit): Statistics = fallback(p)
 
-  override def visitPivot(p: Pivot): Statistics = fallback(p)
+  override def visitPivot(p: Pivot): Statistics = default(p)
 
   override def visitProject(p: Project): Statistics = {
     ProjectEstimation.estimate(p).getOrElse(fallback(p))
   }
 
-  override def visitRepartition(p: Repartition): Statistics = fallback(p)
+  override def visitRepartition(p: Repartition): Statistics = default(p)
 
-  override def visitRepartitionByExpr(p: RepartitionByExpression): Statistics = fallback(p)
+  override def visitRepartitionByExpr(p: RepartitionByExpression): Statistics = default(p)
 
   override def visitSample(p: Sample): Statistics = fallback(p)
 
-  override def visitScriptTransform(p: ScriptTransformation): Statistics = fallback(p)
+  override def visitScriptTransform(p: ScriptTransformation): Statistics = default(p)
 
   override def visitUnion(p: Union): Statistics = fallback(p)
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
index d682165e08e32..91f8fc406a43d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
@@ -134,6 +134,13 @@ class BasicStatsEstimationSuite extends PlanTest with StatsEstimationTestBase {
       expectedStatsCboOff = Statistics.DUMMY)
   }
 
+  test("SPARK-33954: Some operator missing rowCount when enable CBO") {
+    checkStats(
+      plan.repartition(10),
+      expectedStatsCboOn = Statistics(sizeInBytes = 120, rowCount = Some(10)),
+      expectedStatsCboOff = Statistics(sizeInBytes = 120))
+  }
+
   /** Check estimated stats when cbo is turned on/off. */
   private def checkStats(
       plan: LogicalPlan,

From adac633f93f05442f57456f7dcc0d59822d14c2b Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Mon, 4 Jan 2021 15:46:49 +0900
Subject: [PATCH 0939/1009] [SPARK-33934][SQL] Add SparkFile's root dir to env
 property PATH

### What changes were proposed in this pull request?
In hive we always use
```
add file /path/to/script.py;
select transform(col1, col2, ..)
using 'script.py' as (col1, col2, ...)
from ...
```
Since in spark we wrapper script command with `/bash/bin -c`, in this case we will throw `script.py command not found`.

This pr add a SparkFile's root dir path to execution env property `PATH`, then  sub-processor will find `scrip.py` as program under `PATH`.

### Why are the changes needed?
Support SQL migration form Hive to Spark.

### Does this PR introduce _any_ user-facing change?
User can direct use script file name as program in script transform SQL.

```
add file /path/to/script.py;
select transform(col1, col2, ..)
using 'script.py' as (col1, col2, ...)
from ...
```
### How was this patch tested?
UT

Closes #30973 from AngersZhuuuu/SPARK-33934.

Authored-by: angerszhu <angers.zhu@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../BaseScriptTransformationExec.scala        |   8 +-
 sql/core/src/test/resources/test_script.py    |   2 +
 .../BaseScriptTransformationSuite.scala       | 113 ++++++++++++++++++
 3 files changed, 121 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
index 1c87c48ae7cb3..b66f94ae1107a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution
 
-import java.io.{BufferedReader, InputStream, InputStreamReader, OutputStream}
+import java.io._
 import java.nio.charset.StandardCharsets
 import java.util.concurrent.TimeUnit
 
@@ -26,7 +26,7 @@ import scala.util.control.NonFatal
 
 import org.apache.hadoop.conf.Configuration
 
-import org.apache.spark.{SparkException, TaskContext}
+import org.apache.spark.{SparkException, SparkFiles, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
@@ -72,6 +72,10 @@ trait BaseScriptTransformationExec extends UnaryExecNode {
   protected def initProc: (OutputStream, Process, InputStream, CircularBuffer) = {
     val cmd = List("/bin/bash", "-c", script)
     val builder = new ProcessBuilder(cmd.asJava)
+      .directory(new File(SparkFiles.getRootDirectory()))
+    val path = System.getenv("PATH") + File.pathSeparator +
+      SparkFiles.getRootDirectory()
+    builder.environment().put("PATH", path)
 
     val proc = builder.start()
     val inputStream = proc.getInputStream
diff --git a/sql/core/src/test/resources/test_script.py b/sql/core/src/test/resources/test_script.py
index 82ef7b38f0c1b..75b4f106d3a1a 100644
--- a/sql/core/src/test/resources/test_script.py
+++ b/sql/core/src/test/resources/test_script.py
@@ -1,3 +1,5 @@
+#! /usr/bin/python
+
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
index cf9ee1ef6db72..a25e4b8f8ea07 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
@@ -470,6 +470,119 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
           Row("3\u00014\u00015") :: Nil)
     }
   }
+
+  test("SPARK-33934: Add SparkFile's root dir to env property PATH") {
+    assume(TestUtils.testCommandAvailable("python"))
+    val scriptFilePath = copyAndGetResourceFile("test_script.py", ".py").getAbsoluteFile
+    withTempView("v") {
+      val df = Seq(
+        (1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)),
+        (2, "2", 2.0, BigDecimal(2.0), new Timestamp(2)),
+        (3, "3", 3.0, BigDecimal(3.0), new Timestamp(3))
+      ).toDF("a", "b", "c", "d", "e") // Note column d's data type is Decimal(38, 18)
+      df.createTempView("v")
+
+      // test 'python /path/to/script.py' with local file
+      checkAnswer(
+        sql(
+          s"""
+             |SELECT
+             |TRANSFORM(a, b, c, d, e)
+             |  ROW FORMAT DELIMITED
+             |  FIELDS TERMINATED BY '\t'
+             |  USING 'python $scriptFilePath' AS (a, b, c, d, e)
+             |  ROW FORMAT DELIMITED
+             |  FIELDS TERMINATED BY '\t'
+             |FROM v
+        """.stripMargin), identity, df.select(
+          'a.cast("string"),
+          'b.cast("string"),
+          'c.cast("string"),
+          'd.cast("string"),
+          'e.cast("string")).collect())
+
+      // test '/path/to/script.py' with script not executable
+      val e1 = intercept[TestFailedException] {
+        checkAnswer(
+          sql(
+            s"""
+               |SELECT
+               |TRANSFORM(a, b, c, d, e)
+               |  ROW FORMAT DELIMITED
+               |  FIELDS TERMINATED BY '\t'
+               |  USING '$scriptFilePath' AS (a, b, c, d, e)
+               |  ROW FORMAT DELIMITED
+               |  FIELDS TERMINATED BY '\t'
+               |FROM v
+        """.stripMargin), identity, df.select(
+            'a.cast("string"),
+            'b.cast("string"),
+            'c.cast("string"),
+            'd.cast("string"),
+            'e.cast("string")).collect())
+      }.getMessage
+      assert(e1.contains("Permission denied"))
+
+      // test `/path/to/script.py' with script executable
+      scriptFilePath.setExecutable(true)
+      checkAnswer(
+        sql(
+          s"""
+             |SELECT
+             |TRANSFORM(a, b, c, d, e)
+             |  ROW FORMAT DELIMITED
+             |  FIELDS TERMINATED BY '\t'
+             |  USING '$scriptFilePath' AS (a, b, c, d, e)
+             |  ROW FORMAT DELIMITED
+             |  FIELDS TERMINATED BY '\t'
+             |FROM v
+        """.stripMargin), identity, df.select(
+          'a.cast("string"),
+          'b.cast("string"),
+          'c.cast("string"),
+          'd.cast("string"),
+          'e.cast("string")).collect())
+
+      scriptFilePath.setExecutable(false)
+      sql(s"ADD FILE ${scriptFilePath.getAbsolutePath}")
+
+      // test `script.py` when file added
+      checkAnswer(
+        sql(
+          s"""
+             |SELECT TRANSFORM(a, b, c, d, e)
+             |  ROW FORMAT DELIMITED
+             |  FIELDS TERMINATED BY '\t'
+             |  USING '${scriptFilePath.getName}' AS (a, b, c, d, e)
+             |  ROW FORMAT DELIMITED
+             |  FIELDS TERMINATED BY '\t'
+             |FROM v
+        """.stripMargin), identity, df.select(
+          'a.cast("string"),
+          'b.cast("string"),
+          'c.cast("string"),
+          'd.cast("string"),
+          'e.cast("string")).collect())
+
+      // test `python script.py` when file added
+      checkAnswer(
+        sql(
+          s"""
+             |SELECT TRANSFORM(a, b, c, d, e)
+             |  ROW FORMAT DELIMITED
+             |  FIELDS TERMINATED BY '\t'
+             |  USING 'python ${scriptFilePath.getName}' AS (a, b, c, d, e)
+             |  ROW FORMAT DELIMITED
+             |  FIELDS TERMINATED BY '\t'
+             |FROM v
+        """.stripMargin), identity, df.select(
+          'a.cast("string"),
+          'b.cast("string"),
+          'c.cast("string"),
+          'd.cast("string"),
+          'e.cast("string")).collect())
+    }
+  }
 }
 
 case class ExceptionInjectingOperator(child: SparkPlan) extends UnaryExecNode {

From 0b647fe69cf201b4dcbc0f4dfc0eb504a523571d Mon Sep 17 00:00:00 2001
From: Hoa <hoameomu@gmail.com>
Date: Mon, 4 Jan 2021 06:53:12 +0000
Subject: [PATCH 0940/1009] [SPARK-33888][SQL] JDBC SQL TIME type represents
 incorrectly as TimestampType, it should be physical Int in millis

### What changes were proposed in this pull request?
JDBC SQL TIME type represents incorrectly as TimestampType, we change it to be physical Int in millis for now.

### Why are the changes needed?
Currently, for JDBC, SQL TIME type represents incorrectly as Spark TimestampType. This should be represent as physical int in millis Represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one millisecond. It stores the number of milliseconds after midnight, 00:00:00.000.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

Close #30902

Closes #30902 from saikocat/SPARK-33888.

Lead-authored-by: Hoa <hoameomu@gmail.com>
Co-authored-by: Hoa <saikocatz@gmail.com>
Co-authored-by: Duc Hoa, Nguyen <hoa.nd@teko.vn>
Co-authored-by: Duc Hoa, Nguyen <hoameomu@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../datasources/jdbc/JdbcUtils.scala          | 36 +++++++++++++++++--
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 26 +++++++++++++-
 2 files changed, 58 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index f997e57b23206..85a05f42c77fa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.datasources.jdbc
 
 import java.sql.{Connection, Driver, JDBCType, PreparedStatement, ResultSet, ResultSetMetaData, SQLException, SQLFeatureNotSupportedException}
 import java.util.Locale
+import java.util.concurrent.TimeUnit
 
 import scala.util.Try
 import scala.util.control.NonFatal
@@ -226,7 +227,7 @@ object JdbcUtils extends Logging {
       case java.sql.Types.SMALLINT      => IntegerType
       case java.sql.Types.SQLXML        => StringType
       case java.sql.Types.STRUCT        => StringType
-      case java.sql.Types.TIME          => TimestampType
+      case java.sql.Types.TIME          => IntegerType
       case java.sql.Types.TIME_WITH_TIMEZONE
                                         => null
       case java.sql.Types.TIMESTAMP     => TimestampType
@@ -303,11 +304,23 @@ object JdbcUtils extends Logging {
       } else {
         rsmd.isNullable(i + 1) != ResultSetMetaData.columnNoNulls
       }
-      val metadata = new MetadataBuilder().putLong("scale", fieldScale)
+      val metadata = new MetadataBuilder()
+      // SPARK-33888
+      // - include scale in metadata for only DECIMAL & NUMERIC
+      // - include TIME type metadata
+      // - always build the metadata
+      dataType match {
+        // scalastyle:off
+        case java.sql.Types.NUMERIC => metadata.putLong("scale", fieldScale)
+        case java.sql.Types.DECIMAL => metadata.putLong("scale", fieldScale)
+        case java.sql.Types.TIME    => metadata.putBoolean("logical_time_type", true)
+        case _                      =>
+        // scalastyle:on
+      }
       val columnType =
         dialect.getCatalystType(dataType, typeName, fieldSize, metadata).getOrElse(
           getCatalystType(dataType, fieldSize, fieldScale, isSigned))
-      fields(i) = StructField(columnName, columnType, nullable)
+      fields(i) = StructField(columnName, columnType, nullable, metadata.build())
       i = i + 1
     }
     new StructType(fields)
@@ -408,6 +421,23 @@ object JdbcUtils extends Logging {
       (rs: ResultSet, row: InternalRow, pos: Int) =>
         row.setFloat(pos, rs.getFloat(pos + 1))
 
+
+    // SPARK-33888 - sql TIME type represents as physical int in millis
+    // Represents a time of day, with no reference to a particular calendar,
+    // time zone or date, with a precision of one millisecond.
+    // It stores the number of milliseconds after midnight, 00:00:00.000.
+    case IntegerType if metadata.contains("logical_time_type") =>
+      (rs: ResultSet, row: InternalRow, pos: Int) => {
+        val rawTime = rs.getTime(pos + 1)
+        if (rawTime != null) {
+          val rawTimeInNano = rawTime.toLocalTime().toNanoOfDay()
+          val timeInMillis = Math.toIntExact(TimeUnit.NANOSECONDS.toMillis(rawTimeInNano))
+          row.setInt(pos, timeInMillis)
+        } else {
+          row.update(pos, null)
+        }
+      }
+
     case IntegerType =>
       (rs: ResultSet, row: InternalRow, pos: Int) =>
         row.setInt(pos, rs.getInt(pos + 1))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index ede5fe538a028..639fd0e6fd0f4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.jdbc
 import java.math.BigDecimal
 import java.sql.{Date, DriverManager, SQLException, Timestamp}
 import java.util.{Calendar, GregorianCalendar, Properties}
+import java.util.concurrent.TimeUnit
 
 import scala.collection.JavaConverters._
 
@@ -610,7 +611,13 @@ class JDBCSuite extends QueryTest
   test("H2 time types") {
     val rows = sql("SELECT * FROM timetypes").collect()
     val cal = new GregorianCalendar(java.util.Locale.ROOT)
-    cal.setTime(rows(0).getAs[java.sql.Timestamp](0))
+    val epochMillis = java.time.LocalTime.ofNanoOfDay(
+      TimeUnit.MILLISECONDS.toNanos(rows(0).getAs[Int](0)))
+      .atDate(java.time.LocalDate.ofEpochDay(0))
+      .atZone(java.time.ZoneId.systemDefault())
+      .toInstant()
+      .toEpochMilli()
+    cal.setTime(new Date(epochMillis))
     assert(cal.get(Calendar.HOUR_OF_DAY) === 12)
     assert(cal.get(Calendar.MINUTE) === 34)
     assert(cal.get(Calendar.SECOND) === 56)
@@ -625,9 +632,26 @@ class JDBCSuite extends QueryTest
     assert(cal.get(Calendar.HOUR) === 11)
     assert(cal.get(Calendar.MINUTE) === 22)
     assert(cal.get(Calendar.SECOND) === 33)
+    assert(cal.get(Calendar.MILLISECOND) === 543)
     assert(rows(0).getAs[java.sql.Timestamp](2).getNanos === 543543000)
   }
 
+  test("SPARK-33888: test TIME types") {
+    val rows = spark.read.jdbc(
+      urlWithUserAndPass, "TEST.TIMETYPES", new Properties()).collect()
+    val cachedRows = spark.read.jdbc(urlWithUserAndPass, "TEST.TIMETYPES", new Properties())
+      .cache().collect()
+    val expectedTimeRaw = java.sql.Time.valueOf("12:34:56")
+    val expectedTimeMillis = Math.toIntExact(
+      java.util.concurrent.TimeUnit.NANOSECONDS.toMillis(
+        expectedTimeRaw.toLocalTime().toNanoOfDay()
+      )
+    )
+    assert(rows(0).getAs[Int](0) === expectedTimeMillis)
+    assert(rows(1).getAs[Int](0) === expectedTimeMillis)
+    assert(cachedRows(0).getAs[Int](0) === expectedTimeMillis)
+  }
+
   test("test DATE types") {
     val rows = spark.read.jdbc(
       urlWithUserAndPass, "TEST.TIMETYPES", new Properties()).collect()

From 8b3fb43f408594ebcb9313b0c0d4c5982ba1ae31 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 4 Jan 2021 08:28:26 +0000
Subject: [PATCH 0941/1009] [SPARK-33965][SQL][TESTS] Recognize `spark_catalog`
 by `CACHE TABLE` in Hive table names

### What changes were proposed in this pull request?
Remove special handling of `CacheTable` in `TestHiveQueryExecution. analyzed` because it does not allow to support of `spark_catalog` in Hive table names. `spark_catalog` could be handled by a few lines below:
```scala
      case UnresolvedRelation(ident, _, _) =>
        if (ident.length > 1 && ident.head.equalsIgnoreCase(CatalogManager.SESSION_CATALOG_NAME)) {
```
added by https://github.com/apache/spark/pull/30883.

### Why are the changes needed?
1. To have feature parity with v1 In-Memory catalog.
2. To be able to write unified tests for In-Memory and Hive external catalogs.

### Does this PR introduce _any_ user-facing change?
Should not.

### How was this patch tested?
By running the test suite with new UT:
```
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *CachedTableSuite"
```

Closes #30997 from MaxGekk/cache-table-spark_catalog.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/hive/CachedTableSuite.scala    | 13 +++++++++++++
 .../org/apache/spark/sql/hive/test/TestHive.scala   |  9 ++-------
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
index ee93af7643b21..7044e6ff78d4a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
@@ -439,4 +439,17 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
       assert(spark.catalog.isCached(t))
     }
   }
+
+  test("SPARK-33965: cache table in spark_catalog") {
+    withNamespace("spark_catalog.ns") {
+      sql("CREATE NAMESPACE spark_catalog.ns")
+      val t = "spark_catalog.ns.tbl"
+      withTable(t) {
+        sql(s"CREATE TABLE $t (col int)")
+        assert(!spark.catalog.isCached(t))
+        sql(s"CACHE TABLE $t")
+        assert(spark.catalog.isCached(t))
+      }
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
index b70afd3e6b98f..cbba9be32b77c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -39,7 +39,7 @@ import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener
 import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode
 import org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation
-import org.apache.spark.sql.catalyst.plans.logical.{CacheTable, LogicalPlan, OneRowRelation}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OneRowRelation}
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.execution.{QueryExecution, SQLExecution}
@@ -596,13 +596,8 @@ private[hive] class TestHiveQueryExecution(
   }
 
   override lazy val analyzed: LogicalPlan = sparkSession.withActive {
-    val describedTables = logical match {
-      case CacheTable(_, tbl, _, _) => tbl.asTableIdentifier :: Nil
-      case _ => Nil
-    }
-
     // Make sure any test tables referenced are loaded.
-    val referencedTables = describedTables ++ logical.collect {
+    val referencedTables = logical.collect {
       case UnresolvedRelation(ident, _, _) =>
         if (ident.length > 1 && ident.head.equalsIgnoreCase(CatalogManager.SESSION_CATALOG_NAME)) {
           ident.tail.asTableIdentifier

From 271c4f6e00b7bc7c47d84a8e59018e84a19c9822 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Mon, 4 Jan 2021 00:54:47 -0800
Subject: [PATCH 0942/1009] [SPARK-33978][SQL] Support ZSTD compression in ORC
 data source

### What changes were proposed in this pull request?

This PR aims to support ZSTD compression in ORC data source.

### Why are the changes needed?

Apache ORC 1.6 supports ZSTD compression to generate more compact files and save the storage cost.
- https://issues.apache.org/jira/browse/ORC-363

**BEFORE**
```scala
scala> spark.range(10).write.option("compression", "zstd").orc("/tmp/zstd")
java.lang.IllegalArgumentException: Codec [zstd] is not available. Available codecs are uncompressed, lzo, snappy, zlib, none.
```

**AFTER**
```scala
scala> spark.range(10).write.option("compression", "zstd").orc("/tmp/zstd")
```

```bash
$ orc-tools meta /tmp/zstd
Processing data file file:/tmp/zstd/part-00011-a63d9a17-456f-42d3-87a1-d922112ed28c-c000.orc [length: 230]
Structure for file:/tmp/zstd/part-00011-a63d9a17-456f-42d3-87a1-d922112ed28c-c000.orc
File Version: 0.12 with ORC_14
Rows: 1
Compression: ZSTD
Compression size: 262144
Calendar: Julian/Gregorian
Type: struct<id:bigint>

Stripe Statistics:
  Stripe 1:
    Column 0: count: 1 hasNull: false
    Column 1: count: 1 hasNull: false bytesOnDisk: 6 min: 9 max: 9 sum: 9

File Statistics:
  Column 0: count: 1 hasNull: false
  Column 1: count: 1 hasNull: false bytesOnDisk: 6 min: 9 max: 9 sum: 9

Stripes:
  Stripe: offset: 3 data: 6 rows: 1 tail: 35 index: 35
    Stream: column 0 section ROW_INDEX start: 3 length 11
    Stream: column 1 section ROW_INDEX start: 14 length 24
    Stream: column 1 section DATA start: 38 length 6
    Encoding column 0: DIRECT
    Encoding column 1: DIRECT_V2

File length: 230 bytes
Padding length: 0 bytes
Padding ratio: 0%

User Metadata:
  org.apache.spark.version=3.2.0
```

### Does this PR introduce _any_ user-facing change?

Yes, this is a new feature.

### How was this patch tested?

Pass the newly added test case.

Closes #31002 from dongjoon-hyun/SPARK-33978.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 python/pyspark/sql/readwriter.py                       |  2 +-
 .../scala/org/apache/spark/sql/internal/SQLConf.scala  |  4 ++--
 .../scala/org/apache/spark/sql/DataFrameWriter.scala   |  2 +-
 .../sql/execution/datasources/orc/OrcOptions.scala     |  3 ++-
 .../sql/execution/datasources/orc/OrcSourceSuite.scala | 10 +++++++++-
 5 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index d120daa5a9434..53122d6c44602 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -1391,7 +1391,7 @@ def orc(self, path, mode=None, partitionBy=None, compression=None):
             names of partitioning columns
         compression : str, optional
             compression codec to use when saving to file. This can be one of the
-            known case-insensitive shorten names (none, snappy, zlib, and lzo).
+            known case-insensitive shorten names (none, snappy, zlib, lzo, and zstd).
             This will override ``orc.compress`` and
             ``spark.sql.orc.compression.codec``. If None is set, it uses the value
             specified in ``spark.sql.orc.compression.codec``.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 6fcab887dd6af..50cc47d0f80f2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -796,11 +796,11 @@ object SQLConf {
     .doc("Sets the compression codec used when writing ORC files. If either `compression` or " +
       "`orc.compress` is specified in the table-specific options/properties, the precedence " +
       "would be `compression`, `orc.compress`, `spark.sql.orc.compression.codec`." +
-      "Acceptable values include: none, uncompressed, snappy, zlib, lzo.")
+      "Acceptable values include: none, uncompressed, snappy, zlib, lzo, zstd.")
     .version("2.3.0")
     .stringConf
     .transform(_.toLowerCase(Locale.ROOT))
-    .checkValues(Set("none", "uncompressed", "snappy", "zlib", "lzo"))
+    .checkValues(Set("none", "uncompressed", "snappy", "zlib", "lzo", "zstd"))
     .createWithDefault("snappy")
 
   val ORC_IMPLEMENTATION = buildConf("spark.sql.orc.impl")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index c5f2a3d568e97..1dba17b451bb0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -885,7 +885,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    * <ul>
    * <li>`compression` (default is the value specified in `spark.sql.orc.compression.codec`):
    * compression codec to use when saving to file. This can be one of the known case-insensitive
-   * shorten names(`none`, `snappy`, `zlib`, and `lzo`). This will override
+   * shorten names(`none`, `snappy`, `zlib`, `lzo`, and `zstd`). This will override
    * `orc.compress` and `spark.sql.orc.compression.codec`. If `orc.compress` is given,
    * it overrides `spark.sql.orc.compression.codec`.</li>
    * </ul>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala
index 25f022bcdde89..af92d94d68be9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala
@@ -77,7 +77,8 @@ object OrcOptions {
     "uncompressed" -> "NONE",
     "snappy" -> "SNAPPY",
     "zlib" -> "ZLIB",
-    "lzo" -> "LZO")
+    "lzo" -> "LZO",
+    "zstd" -> "ZSTD")
 
   def getORCCompressionCodecName(name: String): String = shortOrcCompressionCodecNames(name)
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
index 4c489bdcc649e..c763f4c9428c8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
@@ -337,7 +337,7 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll with CommonFileDa
     }
 
     // Test all the valid options of spark.sql.orc.compression.codec
-    Seq("NONE", "UNCOMPRESSED", "SNAPPY", "ZLIB", "LZO").foreach { c =>
+    Seq("NONE", "UNCOMPRESSED", "SNAPPY", "ZLIB", "LZO", "ZSTD").foreach { c =>
       withSQLConf(SQLConf.ORC_COMPRESSION.key -> c) {
         val expected = if (c == "UNCOMPRESSED") "NONE" else c
         assert(new OrcOptions(Map.empty[String, String], conf).compressionCodec == expected)
@@ -594,4 +594,12 @@ class OrcSourceSuite extends OrcSuite with SharedSparkSession {
     val df = readResourceOrcFile("test-data/TestStringDictionary.testRowIndex.orc")
     assert(df.where("str < 'row 001000'").count() === 1000)
   }
+
+  test("SPARK-33978: Write and read a file with ZSTD compression") {
+    withTempPath { dir =>
+      val path = dir.getAbsolutePath
+      spark.range(3).write.option("compression", "zstd").orc(path)
+      checkAnswer(spark.read.orc(path), Seq(Row(0), Row(1), Row(2)))
+    }
+  }
 }

From 8583a4605f74cd439bbf109bf9d551e0ec697910 Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Mon, 4 Jan 2021 09:43:15 +0000
Subject: [PATCH 0943/1009] [SPARK-33844][SQL] InsertIntoHiveDir command should
 check col name too
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

In hive-1.2.1, hive serde just split `serdeConstants.LIST_COLUMNS` and `serdeConstants.LIST_COLUMN_TYPES` use comma.

When we use spark 2.4 with UT
```
  test("insert overwrite directory with comma col name") {
    withTempDir { dir =>
      val path = dir.toURI.getPath

      val v1 =
        s"""
           | INSERT OVERWRITE DIRECTORY '${path}'
           | STORED AS TEXTFILE
           | SELECT 1 as a, 'c' as b, if(1 = 1, "true", "false")
         """.stripMargin

      sql(v1).explain(true)

      sql(v1).show()
    }
  }
```
failed with as below since column name contains `,` then column names and column types size not equal.
```
19:56:05.618 ERROR org.apache.spark.sql.execution.datasources.FileFormatWriter:  [ angerszhu ] Aborting job dd774f18-93fa-431f-9468-3534c7d8acda.
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 1 times, most recent failure: Lost task 0.0 in stage 0.0 (TID 0, localhost, executor driver): org.apache.hadoop.hive.serde2.SerDeException: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe: columns has 5 elements while columns.types has 3 elements!
	at org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters.extractColumnInfo(LazySerDeParameters.java:145)
	at org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters.<init>(LazySerDeParameters.java:85)
	at org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.initialize(LazySimpleSerDe.java:125)
	at org.apache.spark.sql.hive.execution.HiveOutputWriter.<init>(HiveFileFormat.scala:119)
	at org.apache.spark.sql.hive.execution.HiveFileFormat$$anon$1.newInstance(HiveFileFormat.scala:103)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.newOutputWriter(FileFormatDataWriter.scala:120)
	at org.apache.spark.sql.execution.datasources.SingleDirectoryDataWriter.<init>(FileFormatDataWriter.scala:108)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:287)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:219)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:218)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:121)
	at org.apache.spark.executor.Executor$TaskRunner$$anonfun$12.apply(Executor.scala:461)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:467)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:748)

```

After hive-2.3 we will set COLUMN_NAME_DELIMITER to special char when col name cntains ','：
https://github.com/apache/hive/blob/6f4c35c9e904d226451c465effdc5bfd31d395a0/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java#L1180-L1188
https://github.com/apache/hive/blob/6f4c35c9e904d226451c465effdc5bfd31d395a0/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java#L1044-L1075

And in script transform, we parse column name  to avoid this problem
https://github.com/apache/spark/blob/554600c2af0dbc8979955807658fafef5dc66c08/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationExec.scala#L257-L261

So I think in `InsertIntoHiveDirComman`, we should do same thing too. And I have verified this method can make spark-2.4 work well.

### Why are the changes needed?
More save use serde

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

Closes #30850 from AngersZhuuuu/SPARK-33844.

Authored-by: angerszhu <angers.zhu@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../hive/execution/InsertIntoHiveDirCommand.scala  |  9 +++++++--
 .../spark/sql/hive/execution/HiveDDLSuite.scala    | 14 ++++++++++++++
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala
index b66c302a7d7ea..7ef637ed553ad 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.hive.client.HiveClientImpl
 import org.apache.spark.sql.util.SchemaUtils
 
@@ -63,12 +64,16 @@ case class InsertIntoHiveDirCommand(
       s"when inserting into ${storage.locationUri.get}",
       sparkSession.sessionState.conf.caseSensitiveAnalysis)
 
-    val hiveTable = HiveClientImpl.toHiveTable(CatalogTable(
+    val table = CatalogTable(
       identifier = TableIdentifier(storage.locationUri.get.toString, Some("default")),
+      provider = Some(DDLUtils.HIVE_PROVIDER),
       tableType = org.apache.spark.sql.catalyst.catalog.CatalogTableType.VIEW,
       storage = storage,
       schema = outputColumns.toStructType
-    ))
+    )
+    DDLUtils.checkDataColNames(table)
+
+    val hiveTable = HiveClientImpl.toHiveTable(table)
     hiveTable.getMetadata.put(serdeConstants.SERIALIZATION_LIB,
       storage.serde.getOrElse(classOf[LazySimpleSerDe].getName))
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index b8a37a84735e3..50b1dd952c61e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -2891,4 +2891,18 @@ class HiveDDLSuite
       }
     }
   }
+
+  test("SPARK-33844: Insert overwrite directory should check schema too") {
+    withView("v") {
+      spark.range(1).createTempView("v")
+      withTempPath { path =>
+        val e = intercept[AnalysisException] {
+          spark.sql(s"INSERT OVERWRITE LOCAL DIRECTORY '${path.getCanonicalPath}' " +
+            s"STORED AS PARQUET SELECT ID, if(1=1, 1, 0), abs(id), '^-' FROM v")
+        }.getMessage
+        assert(e.contains("Attribute name \"(IF((1 = 1), 1, 0))\" contains" +
+          " invalid character(s) among \" ,;{}()\\n\\t=\". Please use alias to rename it."))
+      }
+    }
+  }
 }

From ddc0d5148ac6decde160cca847b5db5d6de1be58 Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Mon, 4 Jan 2021 16:14:33 +0000
Subject: [PATCH 0944/1009] [SPARK-33875][SQL] Implement DESCRIBE COLUMN for v2
 tables

### What changes were proposed in this pull request?

This PR proposes to implement `DESCRIBE COLUMN` for v2 tables.

Note that `isExnteded` option is not implemented in this PR.

### Why are the changes needed?

Parity with v1 tables.

### Does this PR introduce _any_ user-facing change?

Yes, now, `DESCRIBE COLUMN` works for v2 tables.
```scala
sql("CREATE TABLE testcat.tbl (id bigint, data string COMMENT 'hello') USING foo")
sql("DESCRIBE testcat.tbl data").show
```
```
+---------+----------+
|info_name|info_value|
+---------+----------+
| col_name|      data|
|data_type|    string|
|  comment|     hello|
+---------+----------+
```

Before this PR, the command would fail with: `Describing columns is not supported for v2 tables.`

### How was this patch tested?

Added new test.

Closes #30881 from imback82/describe_col_v2.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/QueryCompilationErrors.scala    |  4 ++
 .../sql/catalyst/analysis/Analyzer.scala      |  6 +-
 .../sql/catalyst/analysis/CheckAnalysis.scala | 14 ++---
 .../analysis/ResolvePartitionSpec.scala       |  9 +--
 .../catalyst/analysis/v2ResolutionPlans.scala | 22 +++++++-
 .../sql/catalyst/parser/AstBuilder.scala      |  2 +-
 .../catalyst/plans/logical/v2Commands.scala   |  2 +-
 .../sql/catalyst/parser/DDLParserSuite.scala  | 28 +++++++---
 .../analysis/ResolveSessionCatalog.scala      | 32 +++++++++--
 .../spark/sql/execution/command/tables.scala  |  7 ++-
 .../datasources/v2/DataSourceV2Strategy.scala | 29 +++++++---
 .../datasources/v2/DescribeColumnExec.scala   | 56 +++++++++++++++++++
 .../inputs/describe-table-column.sql          |  8 ++-
 .../results/describe-table-column.sql.out     | 24 +++++++-
 .../sql-tests/results/describe.sql.out        |  2 +-
 .../sql/connector/DataSourceV2SQLSuite.scala  | 53 ++++++++++++++++--
 .../command/PlanResolutionSuite.scala         |  8 +--
 17 files changed, 250 insertions(+), 56 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala
index f4c91327a9e11..ff4c54df96f31 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala
@@ -500,4 +500,8 @@ object QueryCompilationErrors {
   def commandNotSupportNestedColumnError(command: String, quoted: String): Throwable = {
     new AnalysisException(s"$command does not support nested column: $quoted")
   }
+
+  def columnDoesNotExistError(colName: String): Throwable = {
+    new AnalysisException(s"Column $colName does not exist")
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index fdd1cd0146c24..e41d3de642d51 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -989,12 +989,12 @@ class Analyzer(override val catalogManager: CatalogManager)
 
       case u @ UnresolvedTable(NonSessionCatalogAndIdentifier(catalog, ident), _) =>
         CatalogV2Util.loadTable(catalog, ident)
-          .map(ResolvedTable(catalog.asTableCatalog, ident, _))
+          .map(table => ResolvedTable.create(catalog.asTableCatalog, ident, table))
           .getOrElse(u)
 
       case u @ UnresolvedTableOrView(NonSessionCatalogAndIdentifier(catalog, ident), _, _) =>
         CatalogV2Util.loadTable(catalog, ident)
-          .map(ResolvedTable(catalog.asTableCatalog, ident, _))
+          .map(table => ResolvedTable.create(catalog.asTableCatalog, ident, table))
           .getOrElse(u)
 
       case i @ InsertIntoStatement(u @ UnresolvedRelation(_, _, false), _, _, _, _, _)
@@ -1166,7 +1166,7 @@ class Analyzer(override val catalogManager: CatalogManager)
             case v1Table: V1Table if v1Table.v1Table.tableType == CatalogTableType.VIEW =>
               ResolvedView(ident, isTemp = false)
             case table =>
-              ResolvedTable(catalog.asTableCatalog, ident, table)
+              ResolvedTable.create(catalog.asTableCatalog, ident, table)
           }
         case _ => None
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 89076fbb9ce0f..95ea942be4abb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -599,14 +599,14 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
               // no validation needed for set and remove property
             }
 
-          case AlterTableAddPartition(ResolvedTable(_, _, table), parts, _) =>
-            checkAlterTablePartition(table, parts)
+          case AlterTableAddPartition(r: ResolvedTable, parts, _) =>
+            checkAlterTablePartition(r.table, parts)
 
-          case AlterTableDropPartition(ResolvedTable(_, _, table), parts, _, _) =>
-            checkAlterTablePartition(table, parts)
+          case AlterTableDropPartition(r: ResolvedTable, parts, _, _) =>
+            checkAlterTablePartition(r.table, parts)
 
-          case AlterTableRenamePartition(ResolvedTable(_, _, table), from, _) =>
-            checkAlterTablePartition(table, Seq(from))
+          case AlterTableRenamePartition(r: ResolvedTable, from, _) =>
+            checkAlterTablePartition(r.table, Seq(from))
 
           case showPartitions: ShowPartitions => checkShowPartitions(showPartitions)
 
@@ -1047,7 +1047,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
     case ShowPartitions(rt: ResolvedTable, _)
         if !rt.table.isInstanceOf[SupportsPartitionManagement] =>
       failAnalysis(s"SHOW PARTITIONS cannot run for a table which does not support partitioning")
-    case ShowPartitions(ResolvedTable(_, _, partTable: SupportsPartitionManagement), _)
+    case ShowPartitions(ResolvedTable(_, _, partTable: SupportsPartitionManagement, _), _)
         if partTable.partitionSchema().isEmpty =>
       failAnalysis(
         s"SHOW PARTITIONS is not allowed on a table that is not partitioned: ${partTable.name()}")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
index 84be3f294a6ea..0ed5671d2dcc0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
@@ -34,7 +34,7 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] {
 
   def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     case r @ AlterTableAddPartition(
-        ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs, _) =>
+        ResolvedTable(_, _, table: SupportsPartitionManagement, _), partSpecs, _) =>
       val partitionSchema = table.partitionSchema()
       r.copy(parts = resolvePartitionSpecs(
         table.name,
@@ -43,7 +43,7 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] {
         requireExactMatchedPartitionSpec(table.name, _, partitionSchema.fieldNames)))
 
     case r @ AlterTableDropPartition(
-        ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs, _, _) =>
+        ResolvedTable(_, _, table: SupportsPartitionManagement, _), partSpecs, _, _) =>
       val partitionSchema = table.partitionSchema()
       r.copy(parts = resolvePartitionSpecs(
         table.name,
@@ -52,7 +52,7 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] {
         requireExactMatchedPartitionSpec(table.name, _, partitionSchema.fieldNames)))
 
     case r @ AlterTableRenamePartition(
-        ResolvedTable(_, _, table: SupportsPartitionManagement), from, to) =>
+        ResolvedTable(_, _, table: SupportsPartitionManagement, _), from, to) =>
       val partitionSchema = table.partitionSchema()
       val Seq(resolvedFrom, resolvedTo) = resolvePartitionSpecs(
         table.name,
@@ -61,7 +61,8 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] {
         requireExactMatchedPartitionSpec(table.name, _, partitionSchema.fieldNames))
       r.copy(from = resolvedFrom, to = resolvedTo)
 
-    case r @ ShowPartitions(ResolvedTable(_, _, table: SupportsPartitionManagement), partSpecs) =>
+    case r @ ShowPartitions(
+        ResolvedTable(_, _, table: SupportsPartitionManagement, _), partSpecs) =>
       r.copy(pattern = resolvePartitionSpecs(
         table.name,
         partSpecs.toSeq,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
index de53702d15a69..52e69480dc815 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
@@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LeafNode
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, Table, TableCatalog}
 
 /**
@@ -97,9 +98,26 @@ case class ResolvedNamespace(catalog: CatalogPlugin, namespace: Seq[String])
 /**
  * A plan containing resolved table.
  */
-case class ResolvedTable(catalog: TableCatalog, identifier: Identifier, table: Table)
+case class ResolvedTable(
+    catalog: TableCatalog,
+    identifier: Identifier,
+    table: Table,
+    outputAttributes: Seq[Attribute])
   extends LeafNode {
-  override def output: Seq[Attribute] = Nil
+  override def output: Seq[Attribute] = {
+    val qualifier = catalog.name +: identifier.namespace :+ identifier.name
+    outputAttributes.map(_.withQualifier(qualifier))
+  }
+}
+
+object ResolvedTable {
+  def create(
+      catalog: TableCatalog,
+      identifier: Identifier,
+      table: Table): ResolvedTable = {
+    val schema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(table.schema)
+    ResolvedTable(catalog, identifier, table, schema.toAttributes)
+  }
 }
 
 case class ResolvedPartitionSpec(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index a2f59b914a10d..3ea86c6ea2abf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3557,7 +3557,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
       } else {
         DescribeColumn(
           relation,
-          ctx.describeColName.nameParts.asScala.map(_.getText).toSeq,
+          UnresolvedAttribute(ctx.describeColName.nameParts.asScala.map(_.getText).toSeq),
           isExtended)
       }
     } else {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index c51291d370c80..5728c1ed47993 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -348,7 +348,7 @@ case class DescribeRelation(
  */
 case class DescribeColumn(
     relation: LogicalPlan,
-    colNameParts: Seq[String],
+    column: Expression,
     isExtended: Boolean) extends Command {
   override def children: Seq[LogicalPlan] = Seq(relation)
   override def output: Seq[Attribute] = DescribeCommandSchema.describeColumnAttributes()
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 4612e72a54510..9ec22a982a588 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -1103,26 +1103,40 @@ class DDLParserSuite extends AnalysisTest {
   test("describe table column") {
     comparePlans(parsePlan("DESCRIBE t col"),
       DescribeColumn(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("col"), isExtended = false))
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"),
+        UnresolvedAttribute(Seq("col")),
+        isExtended = false))
     comparePlans(parsePlan("DESCRIBE t `abc.xyz`"),
       DescribeColumn(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("abc.xyz"), isExtended = false))
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"),
+        UnresolvedAttribute(Seq("abc.xyz")),
+        isExtended = false))
     comparePlans(parsePlan("DESCRIBE t abc.xyz"),
       DescribeColumn(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("abc", "xyz"), isExtended = false))
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"),
+        UnresolvedAttribute(Seq("abc", "xyz")),
+        isExtended = false))
     comparePlans(parsePlan("DESCRIBE t `a.b`.`x.y`"),
       DescribeColumn(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("a.b", "x.y"), isExtended = false))
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"),
+        UnresolvedAttribute(Seq("a.b", "x.y")),
+        isExtended = false))
 
     comparePlans(parsePlan("DESCRIBE TABLE t col"),
       DescribeColumn(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("col"), isExtended = false))
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"),
+        UnresolvedAttribute(Seq("col")),
+        isExtended = false))
     comparePlans(parsePlan("DESCRIBE TABLE EXTENDED t col"),
       DescribeColumn(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("col"), isExtended = true))
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"),
+        UnresolvedAttribute(Seq("col")),
+        isExtended = true))
     comparePlans(parsePlan("DESCRIBE TABLE FORMATTED t col"),
       DescribeColumn(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"), Seq("col"), isExtended = true))
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE"),
+        UnresolvedAttribute(Seq("col")),
+        isExtended = true))
 
     val caught = intercept[AnalysisException](
       parsePlan("DESCRIBE TABLE t PARTITION (ds='1970-01-01') col"))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 3c5157bea9470..16cd2068fce52 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -20,8 +20,10 @@ package org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.{AnalysisException, SaveMode}
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.util.toPrettySQL
 import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, Identifier, LookupCatalog, SupportsNamespaces, TableCatalog, TableChange, V1Table}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.errors.QueryCompilationErrors
@@ -226,8 +228,23 @@ class ResolveSessionCatalog(
     case DescribeRelation(ResolvedV1TableOrViewIdentifier(ident), partitionSpec, isExtended) =>
       DescribeTableCommand(ident.asTableIdentifier, partitionSpec, isExtended)
 
-    case DescribeColumn(ResolvedV1TableOrViewIdentifier(ident), colNameParts, isExtended) =>
-      DescribeColumnCommand(ident.asTableIdentifier, colNameParts, isExtended)
+    case DescribeColumn(ResolvedViewIdentifier(ident), column: UnresolvedAttribute, isExtended) =>
+      // For views, the column will not be resolved by `ResolveReferences` because
+      // `ResolvedView` stores only the identifier.
+      DescribeColumnCommand(ident.asTableIdentifier, column.nameParts, isExtended)
+
+    case DescribeColumn(ResolvedV1TableIdentifier(ident), column, isExtended) =>
+      column match {
+        case u: UnresolvedAttribute =>
+          throw QueryCompilationErrors.columnDoesNotExistError(u.name)
+        case a: Attribute =>
+          DescribeColumnCommand(ident.asTableIdentifier, a.qualifier :+ a.name, isExtended)
+        case Alias(child, _) =>
+          throw QueryCompilationErrors.commandNotSupportNestedColumnError(
+            "DESC TABLE COLUMN", toPrettySQL(child))
+        case other =>
+          throw new AnalysisException(s"[BUG] unexpected column expression: $other")
+      }
 
     // For CREATE TABLE [AS SELECT], we should use the v1 command if the catalog is resolved to the
     // session catalog and the table provider is not v2.
@@ -639,9 +656,16 @@ class ResolveSessionCatalog(
     }
   }
 
+  object ResolvedViewIdentifier {
+    def unapply(resolved: LogicalPlan): Option[Identifier] = resolved match {
+      case ResolvedView(ident, _) => Some(ident)
+      case _ => None
+    }
+  }
+
   object ResolvedV1TableIdentifier {
     def unapply(resolved: LogicalPlan): Option[Identifier] = resolved match {
-      case ResolvedTable(catalog, ident, _: V1Table) if isSessionCatalog(catalog) => Some(ident)
+      case ResolvedTable(catalog, ident, _: V1Table, _) if isSessionCatalog(catalog) => Some(ident)
       case _ => None
     }
   }
@@ -649,7 +673,7 @@ class ResolveSessionCatalog(
   object ResolvedV1TableOrViewIdentifier {
     def unapply(resolved: LogicalPlan): Option[Identifier] = resolved match {
       case ResolvedV1TableIdentifier(ident) => Some(ident)
-      case ResolvedView(ident, _) => Some(ident)
+      case ResolvedViewIdentifier(ident) => Some(ident)
       case _ => None
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 91c5a886e1d0a..cb72264b9f004 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -36,6 +36,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIdentifier, CaseInsensitiveMap, CharVarcharUtils}
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
 import org.apache.spark.sql.execution.datasources.json.JsonFileFormat
@@ -764,13 +765,13 @@ case class DescribeColumnCommand(
     val colName = UnresolvedAttribute(colNameParts).name
     val field = {
       relation.resolve(colNameParts, resolver).getOrElse {
-        throw new AnalysisException(s"Column $colName does not exist")
+        throw QueryCompilationErrors.columnDoesNotExistError(colName)
       }
     }
     if (!field.isInstanceOf[Attribute]) {
       // If the field is not an attribute after `resolve`, then it's a nested field.
-      throw new AnalysisException(
-        s"DESC TABLE COLUMN command does not support nested data types: $colName")
+      throw QueryCompilationErrors.commandNotSupportNestedColumnError(
+        "DESC TABLE COLUMN", colName)
     }
 
     val catalogTable = catalog.getTempViewOrPermanentTableMetadata(table)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 2674aaf4f2e88..faba204dcb8f4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -21,12 +21,14 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.sql.{AnalysisException, SparkSession, Strategy}
 import org.apache.spark.sql.catalyst.analysis.{ResolvedNamespace, ResolvedPartitionSpec, ResolvedTable}
-import org.apache.spark.sql.catalyst.expressions.{And, Expression, NamedExpression, PredicateHelper, SubqueryExpression}
+import org.apache.spark.sql.catalyst.expressions.{And, Attribute, Expression, NamedExpression, PredicateHelper, SubqueryExpression}
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.util.toPrettySQL
 import org.apache.spark.sql.connector.catalog.{CatalogV2Util, StagingTableCatalog, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, TableCapability, TableCatalog, TableChange}
 import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream}
 import org.apache.spark.sql.connector.write.V1Write
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.{FilterExec, LeafExecNode, LocalTableScanExec, ProjectExec, RowDataSourceScanExec, SparkPlan}
 import org.apache.spark.sql.execution.datasources.DataSourceStrategy
 import org.apache.spark.sql.execution.streaming.continuous.{WriteToContinuousDataSource, WriteToContinuousDataSourceExec}
@@ -272,8 +274,14 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       }
       DescribeTableExec(desc.output, r.table, isExtended) :: Nil
 
-    case DescribeColumn(_: ResolvedTable, _, _) =>
-      throw new AnalysisException("Describing columns is not supported for v2 tables.")
+    case desc @ DescribeColumn(_: ResolvedTable, column, isExtended) =>
+      column match {
+        case c: Attribute =>
+          DescribeColumnExec(desc.output, c, isExtended) :: Nil
+        case nested =>
+          throw QueryCompilationErrors.commandNotSupportNestedColumnError(
+            "DESC TABLE COLUMN", toPrettySQL(nested))
+      }
 
     case DropTable(r: ResolvedTable, ifExists, purge) =>
       DropTableExec(r.catalog, r.identifier, ifExists, purge, invalidateCache(r)) :: Nil
@@ -284,7 +292,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
     case AlterTable(catalog, ident, _, changes) =>
       AlterTableExec(catalog, ident, changes) :: Nil
 
-    case RenameTable(r @ ResolvedTable(catalog, oldIdent, _), newIdent, isView) =>
+    case RenameTable(r @ ResolvedTable(catalog, oldIdent, _, _), newIdent, isView) =>
       if (isView) {
         throw new AnalysisException(
           "Cannot rename a table with ALTER VIEW. Please use ALTER TABLE instead.")
@@ -311,7 +319,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
         ns,
         Map(SupportsNamespaces.PROP_COMMENT -> comment)) :: Nil
 
-    case CommentOnTable(ResolvedTable(catalog, identifier, _), comment) =>
+    case CommentOnTable(ResolvedTable(catalog, identifier, _, _), comment) =>
       val changes = TableChange.setProperty(TableCatalog.PROP_COMMENT, comment)
       AlterTableExec(catalog, identifier, Seq(changes)) :: Nil
 
@@ -343,17 +351,20 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       throw new AnalysisException("ANALYZE TABLE is not supported for v2 tables.")
 
     case AlterTableAddPartition(
-        ResolvedTable(_, _, table: SupportsPartitionManagement), parts, ignoreIfExists) =>
+        ResolvedTable(_, _, table: SupportsPartitionManagement, _), parts, ignoreIfExists) =>
       AlterTableAddPartitionExec(
         table, parts.asResolvedPartitionSpecs, ignoreIfExists) :: Nil
 
     case AlterTableDropPartition(
-        ResolvedTable(_, _, table: SupportsPartitionManagement), parts, ignoreIfNotExists, purge) =>
+        ResolvedTable(_, _, table: SupportsPartitionManagement, _),
+        parts,
+        ignoreIfNotExists,
+        purge) =>
       AlterTableDropPartitionExec(
         table, parts.asResolvedPartitionSpecs, ignoreIfNotExists, purge) :: Nil
 
     case AlterTableRenamePartition(
-        ResolvedTable(_, _, table: SupportsPartitionManagement), from, to) =>
+        ResolvedTable(_, _, table: SupportsPartitionManagement, _), from, to) =>
       AlterTableRenamePartitionExec(
         table,
         Seq(from).asResolvedPartitionSpecs.head,
@@ -380,7 +391,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       throw new AnalysisException("SHOW COLUMNS is not supported for v2 tables.")
 
     case r @ ShowPartitions(
-        ResolvedTable(catalog, _, table: SupportsPartitionManagement),
+        ResolvedTable(catalog, _, table: SupportsPartitionManagement, _),
         pattern @ (None | Some(_: ResolvedPartitionSpec))) =>
       ShowPartitionsExec(
         r.output,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala
new file mode 100644
index 0000000000000..c7ce69f744cce
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.encoders.RowEncoder
+import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema}
+import org.apache.spark.sql.types.StructType
+
+case class DescribeColumnExec(
+    override val output: Seq[Attribute],
+    column: Attribute,
+    isExtended: Boolean) extends V2CommandExec {
+  private val toRow = {
+    RowEncoder(StructType.fromAttributes(output)).resolveAndBind().createSerializer()
+  }
+
+  override protected def run(): Seq[InternalRow] = {
+    val rows = new ArrayBuffer[InternalRow]()
+
+    val comment = if (column.metadata.contains("comment")) {
+      column.metadata.getString("comment")
+    } else {
+      "NULL"
+    }
+
+    rows += toCatalystRow("col_name", column.name)
+    rows += toCatalystRow("data_type", column.dataType.catalogString)
+    rows += toCatalystRow("comment", comment)
+
+    // TODO: The extended description (isExtended = true) can be added here.
+
+    rows.toSeq
+  }
+
+  private def toCatalystRow(strs: String*): InternalRow = {
+    toRow(new GenericRowWithSchema(strs.toArray, schema)).copy()
+  }
+}
diff --git a/sql/core/src/test/resources/sql-tests/inputs/describe-table-column.sql b/sql/core/src/test/resources/sql-tests/inputs/describe-table-column.sql
index d55e398329b76..146977c806182 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/describe-table-column.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/describe-table-column.sql
@@ -1,5 +1,5 @@
 -- Test temp table
-CREATE TEMPORARY VIEW desc_col_temp_view (key int COMMENT 'column_comment') USING PARQUET;
+CREATE TEMPORARY VIEW desc_col_temp_view (key int COMMENT 'column_comment', col struct<x:int, y:string>) USING PARQUET;
 
 DESC desc_col_temp_view key;
 
@@ -13,6 +13,9 @@ DESC FORMATTED desc_col_temp_view desc_col_temp_view.key;
 -- Describe a non-existent column
 DESC desc_col_temp_view key1;
 
+-- Describe a nested column
+DESC desc_col_temp_view col.x;
+
 -- Test persistent table
 CREATE TABLE desc_col_table (key int COMMENT 'column_comment') USING PARQUET;
 
@@ -24,6 +27,9 @@ DESC EXTENDED desc_col_table key;
 
 DESC FORMATTED desc_col_table key;
 
+-- Describe a non-existent column
+DESC desc_col_table key1;
+
 -- Test complex columns
 CREATE TABLE desc_complex_col_table (`a.b` int, col struct<x:int, y:string>) USING PARQUET;
 
diff --git a/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out b/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out
index 22ef8e13c36a8..cc5b836b74109 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out
@@ -1,9 +1,9 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 28
+-- Number of queries: 30
 
 
 -- !query
-CREATE TEMPORARY VIEW desc_col_temp_view (key int COMMENT 'column_comment') USING PARQUET
+CREATE TEMPORARY VIEW desc_col_temp_view (key int COMMENT 'column_comment', col struct<x:int, y:string>) USING PARQUET
 -- !query schema
 struct<>
 -- !query output
@@ -80,6 +80,15 @@ org.apache.spark.sql.AnalysisException
 Column key1 does not exist
 
 
+-- !query
+DESC desc_col_temp_view col.x
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+DESC TABLE COLUMN does not support nested column: col.x
+
+
 -- !query
 CREATE TABLE desc_col_table (key int COMMENT 'column_comment') USING PARQUET
 -- !query schema
@@ -140,6 +149,15 @@ max_col_len	4
 histogram	NULL
 
 
+-- !query
+DESC desc_col_table key1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+Column key1 does not exist
+
+
 -- !query
 CREATE TABLE desc_complex_col_table (`a.b` int, col struct<x:int, y:string>) USING PARQUET
 -- !query schema
@@ -188,7 +206,7 @@ DESC FORMATTED desc_complex_col_table col.x
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-DESC TABLE COLUMN command does not support nested data types: col.x
+DESC TABLE COLUMN does not support nested column: col.x
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
index 93b0cc3fe97e1..3b5d8a1396283 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
@@ -561,7 +561,7 @@ struct<plan:string>
 -- !query output
 == Physical Plan ==
 Execute DescribeColumnCommand
-   +- DescribeColumnCommand `default`.`t`, [b], false
+   +- DescribeColumnCommand `default`.`t`, [spark_catalog, default, t, b], false
 
 
 -- !query
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index f821335690aeb..47829b68cc617 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -154,13 +154,54 @@ class DataSourceV2SQLSuite
       Array("Table Properties", "[bar=baz]", "")))
   }
 
-  test("Describe column is not supported for v2 catalog") {
-    withTable("testcat.tbl") {
-      spark.sql("CREATE TABLE testcat.tbl (id bigint) USING foo")
-      val ex = intercept[AnalysisException] {
-        spark.sql("DESCRIBE testcat.tbl id")
+  test("Describe column for v2 catalog") {
+    val t = "testcat.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, data string COMMENT 'hello') USING foo")
+      val df1 = sql(s"DESCRIBE $t id")
+      assert(df1.schema.map(field => (field.name, field.dataType))
+        === Seq(("info_name", StringType), ("info_value", StringType)))
+      assert(df1.collect === Seq(
+        Row("col_name", "id"),
+        Row("data_type", "bigint"),
+        Row("comment", "NULL")))
+      val df2 = sql(s"DESCRIBE $t data")
+      assert(df2.schema.map(field => (field.name, field.dataType))
+        === Seq(("info_name", StringType), ("info_value", StringType)))
+      assert(df2.collect === Seq(
+        Row("col_name", "data"),
+        Row("data_type", "string"),
+        Row("comment", "hello")))
+
+      assertAnalysisError(
+        s"DESCRIBE $t invalid_col",
+        "cannot resolve '`invalid_col`' given input columns: [testcat.tbl.data, testcat.tbl.id]")
+    }
+  }
+
+  test("Describe column for v2 catalog should work with qualified columns") {
+    val t = "testcat.ns.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint) USING foo")
+      Seq("testcat.ns.tbl.id", "ns.tbl.id", "tbl.id", "id").foreach { col =>
+        val df = sql(s"DESCRIBE $t $col")
+        assert(df.schema.map(field => (field.name, field.dataType))
+          === Seq(("info_name", StringType), ("info_value", StringType)))
+        assert(df.collect === Seq(
+          Row("col_name", "id"),
+          Row("data_type", "bigint"),
+          Row("comment", "NULL")))
       }
-      assert(ex.message.contains("Describing columns is not supported for v2 tables"))
+    }
+  }
+
+  test("Describing nested column for v2 catalog is not supported") {
+    val t = "testcat.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (d struct<a: INT, b: INT>) USING foo")
+      assertAnalysisError(
+        s"describe $t d.a",
+        "DESC TABLE COLUMN does not support nested column")
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index 6571e27b928bb..ee2af085c0fa6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -680,13 +680,13 @@ class PlanResolutionSuite extends AnalysisTest {
     val tableIdent2 = Identifier.of(Array.empty, "tab")
 
     parseResolveCompare(s"DROP TABLE $tableName1",
-      DropTable(ResolvedTable(testCat, tableIdent1, table), ifExists = false, purge = false))
+      DropTable(ResolvedTable.create(testCat, tableIdent1, table), ifExists = false, purge = false))
     parseResolveCompare(s"DROP TABLE IF EXISTS $tableName1",
-      DropTable(ResolvedTable(testCat, tableIdent1, table), ifExists = true, purge = false))
+      DropTable(ResolvedTable.create(testCat, tableIdent1, table), ifExists = true, purge = false))
     parseResolveCompare(s"DROP TABLE $tableName2",
-      DropTable(ResolvedTable(testCat, tableIdent2, table), ifExists = false, purge = false))
+      DropTable(ResolvedTable.create(testCat, tableIdent2, table), ifExists = false, purge = false))
     parseResolveCompare(s"DROP TABLE IF EXISTS $tableName2",
-      DropTable(ResolvedTable(testCat, tableIdent2, table), ifExists = true, purge = false))
+      DropTable(ResolvedTable.create(testCat, tableIdent2, table), ifExists = true, purge = false))
   }
 
   test("drop view") {

From 6b86aa0b524b4d19b91ab434d2088667c9a1e662 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Mon, 4 Jan 2021 10:23:38 -0800
Subject: [PATCH 0945/1009] [SPARK-33984][PYTHON] Upgrade to Py4J 0.10.9.1

### What changes were proposed in this pull request?

This PR upgrade Py4J from 0.10.9 to 0.10.9.1 that contains some bug fixes and improvements.
It contains one bug fix (https://github.com/bartdag/py4j/commit/4152353ac142a7c6d177e0d8f5d420d92c846a30).

### Why are the changes needed?

To leverage fixes from the upstream in Py4J.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Jenkins build and GitHub Actions will test it out.

Closes #31009 from HyukjinKwon/SPARK-33984.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 bin/pyspark                                   |   2 +-
 bin/pyspark2.cmd                              |   2 +-
 core/pom.xml                                  |   2 +-
 .../apache/spark/api/python/PythonUtils.scala |   2 +-
 dev/deps/spark-deps-hadoop-2.7-hive-2.3       |   2 +-
 dev/deps/spark-deps-hadoop-3.2-hive-2.3       |   2 +-
 python/docs/Makefile                          |   2 +-
 python/docs/make2.bat                         |   2 +-
 ...j-0.10.9-src.zip => py4j-0.10.9.1-src.zip} | Bin 41587 -> 41589 bytes
 python/setup.py                               |   2 +-
 sbin/spark-config.sh                          |   2 +-
 11 files changed, 10 insertions(+), 10 deletions(-)
 rename python/lib/{py4j-0.10.9-src.zip => py4j-0.10.9.1-src.zip} (94%)

diff --git a/bin/pyspark b/bin/pyspark
index 463a2dcfc7e6c..251bfef5c80a8 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -50,7 +50,7 @@ export PYSPARK_DRIVER_PYTHON_OPTS
 
 # Add the PySpark classes to the Python path:
 export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH"
-export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9-src.zip:$PYTHONPATH"
+export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.1-src.zip:$PYTHONPATH"
 
 # Load the PySpark shell.py script when ./pyspark is used interactively:
 export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"
diff --git a/bin/pyspark2.cmd b/bin/pyspark2.cmd
index dc34be1a41706..5741480fe5501 100644
--- a/bin/pyspark2.cmd
+++ b/bin/pyspark2.cmd
@@ -30,7 +30,7 @@ if "x%PYSPARK_DRIVER_PYTHON%"=="x" (
 )
 
 set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH%
-set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.9-src.zip;%PYTHONPATH%
+set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.9.1-src.zip;%PYTHONPATH%
 
 set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%
 set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py
diff --git a/core/pom.xml b/core/pom.xml
index 1f24c5273ad0b..09fa153c8f20b 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -414,7 +414,7 @@
     <dependency>
       <groupId>net.sf.py4j</groupId>
       <artifactId>py4j</artifactId>
-      <version>0.10.9</version>
+      <version>0.10.9.1</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
index 2f47d28f09103..717eb4db6dd93 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
@@ -27,7 +27,7 @@ import org.apache.spark.SparkContext
 import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
 
 private[spark] object PythonUtils {
-  val PY4J_ZIP_NAME = "py4j-0.10.9-src.zip"
+  val PY4J_ZIP_NAME = "py4j-0.10.9.1-src.zip"
 
   /** Get the PYTHONPATH for PySpark, either from SPARK_HOME, if it is set, or from our JAR */
   def sparkPythonPath: String = {
diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index fc3b669e721ac..9c516203dd3fa 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -209,7 +209,7 @@ parquet-format/2.4.0//parquet-format-2.4.0.jar
 parquet-hadoop/1.10.1//parquet-hadoop-1.10.1.jar
 parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar
 protobuf-java/2.5.0//protobuf-java-2.5.0.jar
-py4j/0.10.9//py4j-0.10.9.jar
+py4j/0.10.9.1//py4j-0.10.9.1.jar
 pyrolite/4.30//pyrolite-4.30.jar
 scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar
 scala-compiler/2.12.10//scala-compiler-2.12.10.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index 0ff30ce0c0a2d..1d80fadb5762a 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -224,7 +224,7 @@ parquet-format/2.4.0//parquet-format-2.4.0.jar
 parquet-hadoop/1.10.1//parquet-hadoop-1.10.1.jar
 parquet-jackson/1.10.1//parquet-jackson-1.10.1.jar
 protobuf-java/2.5.0//protobuf-java-2.5.0.jar
-py4j/0.10.9//py4j-0.10.9.jar
+py4j/0.10.9.1//py4j-0.10.9.1.jar
 pyrolite/4.30//pyrolite-4.30.jar
 re2j/1.1//re2j-1.1.jar
 scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar
diff --git a/python/docs/Makefile b/python/docs/Makefile
index 763f493a0eb58..090ad7d62bed1 100644
--- a/python/docs/Makefile
+++ b/python/docs/Makefile
@@ -6,7 +6,7 @@ SPHINXBUILD   ?= sphinx-build
 SOURCEDIR     ?= source
 BUILDDIR      ?= build
 
-export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.9-src.zip)
+export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.9.1-src.zip)
 
 # Put it first so that "make" without argument is like "make help".
 help:
diff --git a/python/docs/make2.bat b/python/docs/make2.bat
index 2f87032820f42..485b5eda19735 100644
--- a/python/docs/make2.bat
+++ b/python/docs/make2.bat
@@ -8,7 +8,7 @@ if "%SPHINXBUILD%" == "" (
 set SOURCEDIR=source
 set BUILDDIR=build
 
-set PYTHONPATH=..;..\lib\py4j-0.10.9-src.zip
+set PYTHONPATH=..;..\lib\py4j-0.10.9.1-src.zip
 
 if "%1" == "" goto help
 
diff --git a/python/lib/py4j-0.10.9-src.zip b/python/lib/py4j-0.10.9.1-src.zip
similarity index 94%
rename from python/lib/py4j-0.10.9-src.zip
rename to python/lib/py4j-0.10.9.1-src.zip
index 2c498361470305cd2f2cdecec81c2aa21a4a6f3d..11eb331b6f2c67326dfbc2e600fb97b30ae6f47b 100644
GIT binary patch
delta 879
zcmex-gz4)MCcXe~W)=|!1_lm>4GbEAlMl{Q<+t_;i$DA5Q+#O!Hv=Qfx5;1Uh4UaQ
zn#iXKR&x%aW}{oF1Pj=J{SwOP25kN)@skN6`%YR7LsnVlDKkWtM?n`u)<fYbFGSW*
z4`MBn(>7=7EeeOo^0#3)e6w?#oDf7db%qLt$unjgW#t9AS^A%(q9iZ~BqwLhldebe
zJtG4HIDF#c%TkMqGxPJ};}vWb)D83u4fHJa4Ar>;ycwC~m=Qs!lE8e!7id~O5DPF!
z07(W0pdk!f8bPdrN|P-8`1s7c%#!$cy@JZn5LO0ekd@HDW}N(SzOW+5Rlhshdy|3M
zZ8I4dL{Q9`&cHBHR-6x2<Kz$XMHE2#cmsK>o-+Y`!^yzFhoa9za`J`wqTEQffiyXT
zGzI@!+;W+jf#H$>1A`!nrg9mmqti0;5_2-EQj4&-8e|S6pyL-vuxx3ZsxY~FfjqiR
z5EXkDNU&~c+@;4b*+CDRC!D~ZU|V*ZM~H#pswH|L&S-<Wr=TdmBtJPn2d{H-7fM8I
zX?!;Ws;4+J9q5T-ELvdE42exHP*j041Uzj3`TQsW^J?<lh2ok>fr&7Jl?|kw69|QY
KVd^s<!~*~})AeWo

delta 957
zcmex*gz57UCcXe~W)=|!1_lm>D#b#-iAy#4E42G!JjMLtN-MY-7+Jn<yjv>40+O2?
zCaH`rxw%8~Clf@rRYna%c9+akW)?qb%Yey?6?FNZsOrQa8NB(v!c$&|><K+hblFY%
zi^3rWFKbi9F!)xRoDf9opBXCXS~u&@Jj%-Z6KJi!2Tz7L&~xIG8Rtv$tE!{eKUsUe
ztbjTfS^)52ST@lqT$7PWju{b3zdPD{lO-4!Y%>`cL>Pb!AYcUIC5;S|&(4=o28AFr
z^nk+P;0^FbHDWT)0tuTXjT?cQ3o1>r^po>*3ldB83MxZGSQ(g!F(6@q1j~}f2hx*k
z7D&_+&;W`8#wSy+U*=_CD0F84IvLr~|Ky>L$Vx0rj8D$b$w^Hv$;{6yhB*fmad`aW
zCoO9kaG`-^=Y((u2DM5?v@lVgY_L#9nSdGK1eU*0B63M1V;j^t1x5KK`9SBuoI!*q
z&;6R!wwaZIfm4uyfgi<_;gcUM6tBnW3NCOYqNEw12&xgm^PmnZOD!q}h7ZhLpmc>N
zctBoLNnpO=%fK)>dA^{_lE%$2gW}^e^D;}~<6$OY^&u-8$ZecJD9p^j;58q_0{{S$
B<;DO2

diff --git a/python/setup.py b/python/setup.py
index f5836ecf5fbfc..7bb8a00171d37 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -250,7 +250,7 @@ def run(self):
         license='http://www.apache.org/licenses/LICENSE-2.0',
         # Don't forget to update python/docs/source/getting_started/install.rst
         # if you're updating the versions or dependencies.
-        install_requires=['py4j==0.10.9'],
+        install_requires=['py4j==0.10.9.1'],
         extras_require={
             'ml': ['numpy>=1.7'],
             'mllib': ['numpy>=1.7'],
diff --git a/sbin/spark-config.sh b/sbin/spark-config.sh
index b53442ec096a1..7389416bb3192 100755
--- a/sbin/spark-config.sh
+++ b/sbin/spark-config.sh
@@ -28,6 +28,6 @@ export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"${SPARK_HOME}/conf"}"
 # Add the PySpark classes to the PYTHONPATH:
 if [ -z "${PYSPARK_PYTHONPATH_SET}" ]; then
   export PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}"
-  export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9-src.zip:${PYTHONPATH}"
+  export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.1-src.zip:${PYTHONPATH}"
   export PYSPARK_PYTHONPATH_SET=1
 fi

From fc3f22645e5c542e80a086d96da384feb6afe121 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 4 Jan 2021 10:26:39 -0800
Subject: [PATCH 0946/1009] [SPARK-33990][SQL][TESTS] Remove partition data by
 v2 `ALTER TABLE .. DROP PARTITION`

### What changes were proposed in this pull request?
Remove partition data by `ALTER TABLE .. DROP PARTITION` in V2 table catalog used in tests.

### Why are the changes needed?
This is a bug fix. Before the fix, `ALTER TABLE .. DROP PARTITION` does not remove the data belongs to the dropped partition. As a consequence of that, the `select` query returns removed data.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running tests suites for v1 and v2 catalogs:
```
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableDropPartitionSuite"
```

Closes #31014 from MaxGekk/fix-drop-partition-v2.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../connector/InMemoryAtomicPartitionTable.scala    |  1 +
 .../sql/connector/InMemoryPartitionTable.scala      |  1 +
 .../apache/spark/sql/connector/InMemoryTable.scala  |  4 ++++
 .../command/AlterTableDropPartitionSuiteBase.scala  | 13 ++++++++++++-
 4 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryAtomicPartitionTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryAtomicPartitionTable.scala
index c2a95cc3b8b07..f313c6c389ee4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryAtomicPartitionTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryAtomicPartitionTable.scala
@@ -49,6 +49,7 @@ class InMemoryAtomicPartitionTable (
   override def dropPartition(ident: InternalRow): Boolean = {
     if (memoryTablePartitions.containsKey(ident)) {
       memoryTablePartitions.remove(ident)
+      removePartitionKey(ident.toSeq(schema))
       true
     } else {
       false
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala
index a3d610af2c06d..9e3555b9bb515 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryPartitionTable.scala
@@ -61,6 +61,7 @@ class InMemoryPartitionTable(
   def dropPartition(ident: InternalRow): Boolean = {
     if (memoryTablePartitions.containsKey(ident)) {
       memoryTablePartitions.remove(ident)
+      removePartitionKey(ident.toSeq(schema))
       true
     } else {
       false
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
index 201d67a815bea..a1253dfe67e7a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
@@ -187,6 +187,10 @@ class InMemoryTable(
     true
   }
 
+  protected def removePartitionKey(key: Seq[Any]): Unit = dataMap.synchronized {
+    dataMap.remove(key)
+  }
+
   def withData(data: Array[BufferedRows]): InMemoryTable = dataMap.synchronized {
     data.foreach(_.rows.foreach { row =>
       val key = getKey(row)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
index cf8a1e9de5e0e..d8a8920deadc7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.command
 
-import org.apache.spark.sql.{AnalysisException, QueryTest}
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionsException
 import org.apache.spark.sql.internal.SQLConf
 
@@ -144,4 +144,15 @@ trait AlterTableDropPartitionSuiteBase extends QueryTest with DDLCommandTestUtil
       checkPartitions(t)
     }
   }
+
+  test("SPARK-33990: don not return data from dropped partition") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (id int, part int) $defaultUsing PARTITIONED BY (part)")
+      sql(s"INSERT INTO $t PARTITION (part=0) SELECT 0")
+      sql(s"INSERT INTO $t PARTITION (part=1) SELECT 1")
+      QueryTest.checkAnswer(sql(s"SELECT * FROM $t"), Seq(Row(0, 0), Row(1, 1)))
+      sql(s"ALTER TABLE $t DROP PARTITION (part=0)")
+      QueryTest.checkAnswer(sql(s"SELECT * FROM $t"), Seq(Row(1, 1)))
+    }
+  }
 }

From 414d323d6c92584beb87e1c426e4beab5ddbd452 Mon Sep 17 00:00:00 2001
From: Takeshi Yamamuro <yamamuro@apache.org>
Date: Mon, 4 Jan 2021 10:31:20 -0800
Subject: [PATCH 0947/1009] [SPARK-33988][SQL][TEST] Add an option to enable
 CBO in TPCDSQueryBenchmark

### What changes were proposed in this pull request?

This PR intends to add a new option `--cbo` to enable CBO in TPCDSQueryBenchmark. I think this option is useful so as to monitor performance changes with CBO enabled.

### Why are the changes needed?

To monitor performance chaneges with CBO enabled.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manually checked.

Closes #31011 from maropu/AddOptionForCBOInTPCDSBenchmark.

Authored-by: Takeshi Yamamuro <yamamuro@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../benchmark/TPCDSQueryBenchmark.scala       | 39 +++++++++++++++++--
 .../TPCDSQueryBenchmarkArguments.scala        |  6 +++
 2 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
index f931914b19c6c..b34eac5df8090 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
@@ -19,11 +19,15 @@ package org.apache.spark.sql.execution.benchmark
 
 import org.apache.spark.SparkConf
 import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
 import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_SECOND
 import org.apache.spark.sql.execution.datasources.LogicalRelation
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.util.Utils
 
 /**
  * Benchmark to measure TPCDS query performance.
@@ -38,7 +42,10 @@ import org.apache.spark.sql.execution.datasources.LogicalRelation
  *      Results will be written to "benchmarks/TPCDSQueryBenchmark-results.txt".
  * }}}
  */
-object TPCDSQueryBenchmark extends SqlBasedBenchmark {
+object TPCDSQueryBenchmark extends SqlBasedBenchmark with Logging {
+
+  private lazy val warehousePath =
+    Utils.createTempDir(namePrefix = "spark-warehouse").getAbsolutePath
 
   override def getSparkSession: SparkSession = {
     val conf = new SparkConf()
@@ -50,6 +57,7 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark {
       .set("spark.executor.memory", "3g")
       .set("spark.sql.autoBroadcastJoinThreshold", (20 * 1024 * 1024).toString)
       .set("spark.sql.crossJoin.enabled", "true")
+      .set("spark.sql.warehouse.dir", warehousePath)
 
     SparkSession.builder.config(conf).getOrCreate()
   }
@@ -60,9 +68,14 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark {
     "web_returns", "web_site", "reason", "call_center", "warehouse", "ship_mode", "income_band",
     "time_dim", "web_page")
 
-  def setupTables(dataLocation: String): Map[String, Long] = {
+  def setupTables(dataLocation: String, createTempView: Boolean): Map[String, Long] = {
     tables.map { tableName =>
-      spark.read.parquet(s"$dataLocation/$tableName").createOrReplaceTempView(tableName)
+      val df = spark.read.parquet(s"$dataLocation/$tableName")
+      if (createTempView) {
+        df.createOrReplaceTempView(tableName)
+      } else {
+        df.write.saveAsTable(tableName)
+      }
       tableName -> spark.table(tableName).count()
     }.toMap
   }
@@ -146,7 +159,25 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark {
         s"Empty queries to run. Bad query name filter: ${benchmarkArgs.queryFilter}")
     }
 
-    val tableSizes = setupTables(benchmarkArgs.dataLocation)
+    val tableSizes = setupTables(benchmarkArgs.dataLocation,
+      createTempView = !benchmarkArgs.cboEnabled)
+    if (benchmarkArgs.cboEnabled) {
+      spark.sql(s"SET ${SQLConf.CBO_ENABLED.key}=true")
+      spark.sql(s"SET ${SQLConf.PLAN_STATS_ENABLED.key}=true")
+      spark.sql(s"SET ${SQLConf.JOIN_REORDER_ENABLED.key}=true")
+      spark.sql(s"SET ${SQLConf.HISTOGRAM_ENABLED.key}=true")
+
+      // Analyze all the tables before running TPCDS queries
+      val startTime = System.nanoTime()
+      tables.foreach { tableName =>
+        spark.sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR ALL COLUMNS")
+      }
+      logInfo("The elapsed time to analyze all the tables is " +
+        s"${(System.nanoTime() - startTime) / NANOS_PER_SECOND.toDouble} seconds")
+    } else {
+      spark.sql(s"SET ${SQLConf.CBO_ENABLED.key}=false")
+    }
+
     runTpcdsQueries(queryLocation = "tpcds", queries = queriesV1_4ToRun, tableSizes)
     runTpcdsQueries(queryLocation = "tpcds-v2.7.0", queries = queriesV2_7ToRun, tableSizes,
       nameSuffix = nameSuffixForQueriesV2_7)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmarkArguments.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmarkArguments.scala
index 184ffff94298a..80a6bffc61ea4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmarkArguments.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmarkArguments.scala
@@ -23,6 +23,7 @@ import java.util.Locale
 class TPCDSQueryBenchmarkArguments(val args: Array[String]) {
   var dataLocation: String = null
   var queryFilter: Set[String] = Set.empty
+  var cboEnabled: Boolean = false
 
   parseArgs(args.toList)
   validateArguments()
@@ -44,6 +45,10 @@ class TPCDSQueryBenchmarkArguments(val args: Array[String]) {
           queryFilter = value.toLowerCase(Locale.ROOT).split(",").map(_.trim).toSet
           args = tail
 
+        case optName :: tail if optionMatch("--cbo", optName) =>
+          cboEnabled = true
+          args = tail
+
         case _ =>
           // scalastyle:off println
           System.err.println("Unknown/unsupported param " + args)
@@ -60,6 +65,7 @@ class TPCDSQueryBenchmarkArguments(val args: Array[String]) {
       |Options:
       |  --data-location      Path to TPCDS data
       |  --query-filter       Queries to filter, e.g., q3,q5,q13
+      |  --cbo                Whether to enable cost-based optimization
       |
       |------------------------------------------------------------------------------------------------------------------
       |In order to run this benchmark, please follow the instructions at

From d6322bf70c622f4068e510975e9f53c8e18bf59c Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Mon, 4 Jan 2021 10:36:31 -0800
Subject: [PATCH 0948/1009] [SPARK-33983][PYTHON] Update cloudpickle to v1.6.0

### What changes were proposed in this pull request?

This PR proposes to upgrade cloudpickle from 1.5.0 to 1.6.0.
It virtually contains one fix:

https://github.com/cloudpipe/cloudpickle/commit/4510be850d55bc60decf86953324f98bc3199f9e

From a cursory look, this isn't a regression, and not even properly supported in Python:

```python
>>> import pickle
>>> pickle.dumps({}.keys())
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
TypeError: cannot pickle 'dict_keys' object
```

So it seems fine not to backport.

### Why are the changes needed?

To leverage bug fixes from the cloudpickle upstream.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Jenkins build and GitHub actions build will test it out.

Closes #31007 from HyukjinKwon/cloudpickle-upgrade.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 python/pyspark/cloudpickle/__init__.py        |  6 ++-
 python/pyspark/cloudpickle/cloudpickle.py     | 22 ++++++++---
 .../pyspark/cloudpickle/cloudpickle_fast.py   | 37 +++++++++++++++----
 3 files changed, 52 insertions(+), 13 deletions(-)

diff --git a/python/pyspark/cloudpickle/__init__.py b/python/pyspark/cloudpickle/__init__.py
index 4e85b637800dc..56506d95fa1be 100644
--- a/python/pyspark/cloudpickle/__init__.py
+++ b/python/pyspark/cloudpickle/__init__.py
@@ -4,4 +4,8 @@
 from pyspark.cloudpickle.cloudpickle import *  # noqa
 from pyspark.cloudpickle.cloudpickle_fast import CloudPickler, dumps, dump  # noqa
 
-__version__ = '1.5.0'
+# Conform to the convention used by python serialization libraries, which
+# expose their Pickler subclass at top-level under the  "Pickler" name.
+Pickler = CloudPickler
+
+__version__ = '1.6.0'
diff --git a/python/pyspark/cloudpickle/cloudpickle.py b/python/pyspark/cloudpickle/cloudpickle.py
index 58c274bd79720..05d52afa0da96 100644
--- a/python/pyspark/cloudpickle/cloudpickle.py
+++ b/python/pyspark/cloudpickle/cloudpickle.py
@@ -88,7 +88,7 @@ def g():
 DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL
 
 # Track the provenance of reconstructed dynamic classes to make it possible to
-# reconstruct instances from the matching singleton class definition when
+# recontruct instances from the matching singleton class definition when
 # appropriate and preserve the usual "isinstance" semantics of Python objects.
 _DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary()
 _DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary()
@@ -236,7 +236,7 @@ def _extract_code_globals(co):
         out_names = {names[oparg] for _, oparg in _walk_global_ops(co)}
 
         # Declaring a function inside another one using the "def ..."
-        # syntax generates a constant code object corresponding to the one
+        # syntax generates a constant code object corresonding to the one
         # of the nested function's As the nested function may itself need
         # global variables, we need to introspect its code, extract its
         # globals, (look for code object in it's co_consts attribute..) and
@@ -457,7 +457,7 @@ def _is_parametrized_type_hint(obj):
         is_typing = getattr(obj, '__origin__', None) is not None
 
         # typing_extensions.Literal
-        is_literal = getattr(obj, '__values__', None) is not None
+        is_litteral = getattr(obj, '__values__', None) is not None
 
         # typing_extensions.Final
         is_final = getattr(obj, '__type__', None) is not None
@@ -469,7 +469,7 @@ def _is_parametrized_type_hint(obj):
             getattr(obj, '__result__', None) is not None and
             getattr(obj, '__args__', None) is not None
         )
-        return any((is_typing, is_literal, is_final, is_union, is_tuple,
+        return any((is_typing, is_litteral, is_final, is_union, is_tuple,
                     is_callable))
 
     def _create_parametrized_type_hint(origin, args):
@@ -699,7 +699,7 @@ def _make_skel_func(code, cell_count, base_globals=None):
     """
     # This function is deprecated and should be removed in cloudpickle 1.7
     warnings.warn(
-        "A pickle file created using an old (<=1.4.1) version of cloudpickle "
+        "A pickle file created using an old (<=1.4.1) version of cloudpicke "
         "is currently being loaded. This is not supported by cloudpickle and "
         "will break in cloudpickle 1.7", category=UserWarning
     )
@@ -828,3 +828,15 @@ def _get_bases(typ):
         # For regular class objects
         bases_attr = '__bases__'
     return getattr(typ, bases_attr)
+
+
+def _make_dict_keys(obj):
+    return dict.fromkeys(obj).keys()
+
+
+def _make_dict_values(obj):
+    return {i: _ for i, _ in enumerate(obj)}.values()
+
+
+def _make_dict_items(obj):
+    return obj.items()
diff --git a/python/pyspark/cloudpickle/cloudpickle_fast.py b/python/pyspark/cloudpickle/cloudpickle_fast.py
index 3c48ff7b0a885..fa8da0f635c49 100644
--- a/python/pyspark/cloudpickle/cloudpickle_fast.py
+++ b/python/pyspark/cloudpickle/cloudpickle_fast.py
@@ -6,10 +6,11 @@
 is only available for Python versions 3.8+, a lot of backward-compatibility
 code is also removed.
 
-Note that the C Pickler subclassing API is CPython-specific. Therefore, some
+Note that the C Pickler sublassing API is CPython-specific. Therefore, some
 guards present in cloudpickle.py that were written to handle PyPy specificities
 are not present in cloudpickle_fast.py
 """
+import _collections_abc
 import abc
 import copyreg
 import io
@@ -33,8 +34,8 @@
     _typevar_reduce, _get_bases, _make_cell, _make_empty_cell, CellType,
     _is_parametrized_type_hint, PYPY, cell_set,
     parametrized_type_hint_getinitargs, _create_parametrized_type_hint,
-    builtin_code_type
-
+    builtin_code_type,
+    _make_dict_keys, _make_dict_values, _make_dict_items,
 )
 
 
@@ -179,7 +180,7 @@ def _class_getstate(obj):
     clsdict.pop('__weakref__', None)
 
     if issubclass(type(obj), abc.ABCMeta):
-        # If obj is an instance of an ABCMeta subclass, don't pickle the
+        # If obj is an instance of an ABCMeta subclass, dont pickle the
         # cache/negative caches populated during isinstance/issubclass
         # checks, but pickle the list of registered subclasses of obj.
         clsdict.pop('_abc_cache', None)
@@ -400,6 +401,24 @@ def _class_reduce(obj):
     return NotImplemented
 
 
+def _dict_keys_reduce(obj):
+    # Safer not to ship the full dict as sending the rest might
+    # be unintended and could potentially cause leaking of
+    # sensitive information
+    return _make_dict_keys, (list(obj), )
+
+
+def _dict_values_reduce(obj):
+    # Safer not to ship the full dict as sending the rest might
+    # be unintended and could potentially cause leaking of
+    # sensitive information
+    return _make_dict_values, (list(obj), )
+
+
+def _dict_items_reduce(obj):
+    return _make_dict_items, (dict(obj), )
+
+
 # COLLECTIONS OF OBJECTS STATE SETTERS
 # ------------------------------------
 # state setters are called at unpickling time, once the object is created and
@@ -407,7 +426,7 @@ def _class_reduce(obj):
 
 
 def _function_setstate(obj, state):
-    """Update the state of a dynamic function.
+    """Update the state of a dynaamic function.
 
     As __closure__ and __globals__ are readonly attributes of a function, we
     cannot rely on the native setstate routine of pickle.load_build, that calls
@@ -473,6 +492,10 @@ class CloudPickler(Pickler):
     _dispatch_table[types.MappingProxyType] = _mappingproxy_reduce
     _dispatch_table[weakref.WeakSet] = _weakset_reduce
     _dispatch_table[typing.TypeVar] = _typevar_reduce
+    _dispatch_table[_collections_abc.dict_keys] = _dict_keys_reduce
+    _dispatch_table[_collections_abc.dict_values] = _dict_values_reduce
+    _dispatch_table[_collections_abc.dict_items] = _dict_items_reduce
+
 
     dispatch_table = ChainMap(_dispatch_table, copyreg.dispatch_table)
 
@@ -556,7 +579,7 @@ def dump(self, obj):
         # `dispatch` attribute.  Earlier versions of the protocol 5 CloudPickler
         # used `CloudPickler.dispatch` as a class-level attribute storing all
         # reducers implemented by cloudpickle, but the attribute name was not a
-        # great choice given the meaning of `CloudPickler.dispatch` when
+        # great choice given the meaning of `Cloudpickler.dispatch` when
         # `CloudPickler` extends the pure-python pickler.
         dispatch = dispatch_table
 
@@ -630,7 +653,7 @@ def reducer_override(self, obj):
                 return self._function_reduce(obj)
             else:
                 # fallback to save_global, including the Pickler's
-                # dispatch_table
+                # distpatch_table
                 return NotImplemented
 
     else:

From ac4651a7d19b248c86290d419ac3f6d69ed2b61e Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Mon, 4 Jan 2021 12:59:45 -0800
Subject: [PATCH 0949/1009] [SPARK-33980][SS] Invalidate char/varchar in
 spark.readStream.schema

### What changes were proposed in this pull request?

invalidate char/varchar in `spark.readStream.schema` just like what we've done for `spark.read.schema` in da72b87374a7be5416b99ed016dc2fc9da0ed88a

### Why are the changes needed?

bugfix, char/varchar is only for table schema while `spark.sql.legacy.charVarcharAsString=false`

### Does this PR introduce _any_ user-facing change?

yes, char/varchar will fail to define ss readers when `spark.sql.legacy.charVarcharAsString=false`

### How was this patch tested?

new tests

Closes #31003 from yaooqinn/SPARK-33980.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/streaming/DataStreamReader.scala    |  7 +++++--
 .../apache/spark/sql/CharVarcharTestSuite.scala   | 15 +++++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index eb7bb5c87a990..d82fa9e88592f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -64,7 +64,8 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
    * @since 2.0.0
    */
   def schema(schema: StructType): DataStreamReader = {
-    this.userSpecifiedSchema = Option(CharVarcharUtils.replaceCharVarcharWithStringInSchema(schema))
+    val replaced = CharVarcharUtils.failIfHasCharVarchar(schema).asInstanceOf[StructType]
+    this.userSpecifiedSchema = Option(replaced)
     this
   }
 
@@ -76,7 +77,9 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
    * @since 2.3.0
    */
   def schema(schemaString: String): DataStreamReader = {
-    this.userSpecifiedSchema = Option(StructType.fromDDL(schemaString))
+    val rawSchema = StructType.fromDDL(schemaString)
+    val schema = CharVarcharUtils.failIfHasCharVarchar(rawSchema).asInstanceOf[StructType]
+    this.userSpecifiedSchema = Option(schema)
     this
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
index 9d4b7c4f82ed2..62d0f51e5ff75 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
@@ -549,6 +549,21 @@ class BasicCharVarcharTestSuite extends QueryTest with SharedSparkSession {
       assert(df2.schema.head.dataType === StringType)
     }
   }
+
+  test("invalidate char/varchar in spark.readStream.schema") {
+    failWithInvalidCharUsage(spark.readStream.schema(new StructType().add("id", CharType(5))))
+    failWithInvalidCharUsage(spark.readStream.schema("id char(5)"))
+    withSQLConf((SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key, "true")) {
+      withTempPath { dir =>
+        spark.range(2).write.save(dir.toString)
+        val df1 = spark.readStream.schema(new StructType().add("id", CharType(5)))
+          .load(dir.toString)
+        assert(df1.schema.map(_.dataType) == Seq(StringType))
+        val df2 = spark.readStream.schema("id char(5)").load(dir.toString)
+        assert(df2.schema.map(_.dataType) == Seq(StringType))
+      }
+    }
+  }
 }
 
 class FileSourceCharVarcharTestSuite extends CharVarcharTestSuite with SharedSparkSession {

From 90f4ecf8cc07505c7cdea90b07fc60151c62ee2d Mon Sep 17 00:00:00 2001
From: William Hyun <williamhyun3@gmail.com>
Date: Mon, 4 Jan 2021 14:54:16 -0800
Subject: [PATCH 0950/1009] [SPARK-33996][BUILD] Upgrade checkstyle plugins

### What changes were proposed in this pull request?

This PR aims to upgrade `checkstyle` Maven plugins and its dependency, `com.puppycrawl.tools:checkstyle`.

### Why are the changes needed?

The changes are needed to support Java 14+ better.
- https://checkstyle.org/releasenotes.html#Release_8.39
- https://checkstyle.org/releasenotes.html#Release_8.38
- https://checkstyle.org/releasenotes.html#Release_8.37
- https://checkstyle.org/releasenotes.html#Release_8.36
- https://checkstyle.org/releasenotes.html#Release_8.35
- https://checkstyle.org/releasenotes.html#Release_8.34
- https://checkstyle.org/releasenotes.html#Release_8.33
- https://checkstyle.org/releasenotes.html#Release_8.32
- https://checkstyle.org/releasenotes.html#Release_8.31
- https://checkstyle.org/releasenotes.html#Release_8.30

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Pass the CI.

Closes #31019 from williamhyun/checkstyle.

Authored-by: William Hyun <williamhyun3@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index 5ff84cf806649..91ca0398a076e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2988,7 +2988,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-checkstyle-plugin</artifactId>
-        <version>3.1.0</version>
+        <version>3.1.1</version>
         <configuration>
           <failOnViolation>false</failOnViolation>
           <includeTestSourceDirectory>true</includeTestSourceDirectory>
@@ -3008,7 +3008,7 @@
           <dependency>
             <groupId>com.puppycrawl.tools</groupId>
             <artifactId>checkstyle</artifactId>
-            <version>8.29</version>
+            <version>8.39</version>
           </dependency>
         </dependencies>
         <executions>

From 84c1f436690c76bfd3bd1a664dba303cfc8381da Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 4 Jan 2021 15:00:48 -0800
Subject: [PATCH 0951/1009] [SPARK-33987][SQL] Refresh cache in v2 `ALTER TABLE
 .. DROP PARTITION`

### What changes were proposed in this pull request?
1. Refresh the cache associated with tables from v2 table catalogs in the `ALTER TABLE .. DROP PARTITION` command.
2. Port the test for v1 catalogs to the base suite to run it for v2 table catalog.

### Why are the changes needed?
The changes fix incorrect query results from cached V2 table altered by `ALTER TABLE .. DROP PARTITION`, see the added test and SPARK-33987.

### Does this PR introduce _any_ user-facing change?
Yes, it could if users have v2 table catalogs.

### How was this patch tested?
By running unified tests for `ALTER TABLE .. DROP PARTITION`:
```
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableDropPartitionSuite"
```

Closes #31017 from MaxGekk/drop-partition-refresh-cache-v2.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../v2/AlterTableDropPartitionExec.scala        |  8 +++++---
 .../datasources/v2/DataSourceV2Strategy.scala   |  8 ++++++--
 .../AlterTableDropPartitionSuiteBase.scala      | 15 +++++++++++++++
 .../v1/AlterTableDropPartitionSuite.scala       | 17 +----------------
 4 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala
index 90714c3c726f3..f3137abbd1ba6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableDropPartitionExec.scala
@@ -29,7 +29,8 @@ case class AlterTableDropPartitionExec(
     table: SupportsPartitionManagement,
     partSpecs: Seq[ResolvedPartitionSpec],
     ignoreIfNotExists: Boolean,
-    purge: Boolean) extends V2CommandExec {
+    purge: Boolean,
+    refreshCache: () => Unit) extends V2CommandExec {
   import DataSourceV2Implicits._
 
   override def output: Seq[Attribute] = Seq.empty
@@ -43,8 +44,8 @@ case class AlterTableDropPartitionExec(
         table.name(), notExistsPartIdents, table.partitionSchema())
     }
 
-    existsPartIdents match {
-      case Seq() => // Nothing will be done
+    val isTableAltered = existsPartIdents match {
+      case Seq() => false // Nothing will be done
       case Seq(partIdent) =>
         if (purge) table.purgePartition(partIdent) else table.dropPartition(partIdent)
       case _ if table.isInstanceOf[SupportsAtomicPartitionManagement] =>
@@ -55,6 +56,7 @@ case class AlterTableDropPartitionExec(
         throw new UnsupportedOperationException(
           s"Nonatomic partition table ${table.name()} can not drop multiple partitions.")
     }
+    if (isTableAltered) refreshCache()
     Seq.empty
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index faba204dcb8f4..1537ebf8f305c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -356,12 +356,16 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
         table, parts.asResolvedPartitionSpecs, ignoreIfExists) :: Nil
 
     case AlterTableDropPartition(
-        ResolvedTable(_, _, table: SupportsPartitionManagement, _),
+        r @ ResolvedTable(_, _, table: SupportsPartitionManagement, _),
         parts,
         ignoreIfNotExists,
         purge) =>
       AlterTableDropPartitionExec(
-        table, parts.asResolvedPartitionSpecs, ignoreIfNotExists, purge) :: Nil
+        table,
+        parts.asResolvedPartitionSpecs,
+        ignoreIfNotExists,
+        purge,
+        invalidateCache(r, recacheTable = true)) :: Nil
 
     case AlterTableRenamePartition(
         ResolvedTable(_, _, table: SupportsPartitionManagement, _), from, to) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
index d8a8920deadc7..aadcda490b82b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
@@ -155,4 +155,19 @@ trait AlterTableDropPartitionSuiteBase extends QueryTest with DDLCommandTestUtil
       QueryTest.checkAnswer(sql(s"SELECT * FROM $t"), Seq(Row(1, 1)))
     }
   }
+
+  test("SPARK-33950, SPARK-33987: refresh cache after partition dropping") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (id int, part int) $defaultUsing PARTITIONED BY (part)")
+      sql(s"INSERT INTO $t PARTITION (part=0) SELECT 0")
+      sql(s"INSERT INTO $t PARTITION (part=1) SELECT 1")
+      assert(!spark.catalog.isCached(t))
+      sql(s"CACHE TABLE $t")
+      assert(spark.catalog.isCached(t))
+      QueryTest.checkAnswer(sql(s"SELECT * FROM $t"), Seq(Row(0, 0), Row(1, 1)))
+      sql(s"ALTER TABLE $t DROP PARTITION (part=0)")
+      assert(spark.catalog.isCached(t))
+      QueryTest.checkAnswer(sql(s"SELECT * FROM $t"), Seq(Row(1, 1)))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
index 2f2c62427d5ad..a6490ebdb950c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.command.v1
 
-import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.execution.command
 
 /**
@@ -42,21 +42,6 @@ trait AlterTableDropPartitionSuiteBase extends command.AlterTableDropPartitionSu
       checkPartitions(t) // no partitions
     }
   }
-
-  test("SPARK-33950: refresh cache after partition dropping") {
-    withTable("t") {
-      sql(s"CREATE TABLE t (id int, part int) $defaultUsing PARTITIONED BY (part)")
-      sql("INSERT INTO t PARTITION (part=0) SELECT 0")
-      sql("INSERT INTO t PARTITION (part=1) SELECT 1")
-      assert(!spark.catalog.isCached("t"))
-      sql("CACHE TABLE t")
-      assert(spark.catalog.isCached("t"))
-      checkAnswer(sql("SELECT * FROM t"), Seq(Row(0, 0), Row(1, 1)))
-      sql("ALTER TABLE t DROP PARTITION (part=0)")
-      assert(spark.catalog.isCached("t"))
-      checkAnswer(sql("SELECT * FROM t"), Seq(Row(1, 1)))
-    }
-  }
 }
 
 /**

From 9b4173fa95047fed94e2fe323ad281fb48deffda Mon Sep 17 00:00:00 2001
From: Koert Kuipers <koert@tresata.com>
Date: Mon, 4 Jan 2021 15:40:32 -0800
Subject: [PATCH 0952/1009] [SPARK-33894][SQL] Change visibility of private
 case classes in mllib to avoid runtime compilation errors with Scala 2.13

### What changes were proposed in this pull request?
Change visibility modifier of two case classes defined inside objects in mllib from private to private[OuterClass]

### Why are the changes needed?
Without this change when running tests for Scala 2.13 you get runtime code generation errors. These errors look like this:
```
[info] Cause: java.util.concurrent.ExecutionException: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 73, Column 65: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 73, Column 65: No applicable constructor/method found for zero actual parameters; candidates are: "public java.lang.String org.apache.spark.ml.feature.Word2VecModel$Data.word()"
```

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing tests now pass for Scala 2.13

Closes #31018 from koertkuipers/feat-visibility-scala213.

Authored-by: Koert Kuipers <koert@tresata.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala | 2 +-
 .../scala/org/apache/spark/mllib/clustering/KMeansModel.scala | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
index 0b9c1b570d943..9afbc9b884168 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
@@ -344,7 +344,7 @@ class Word2VecModel private[ml] (
 @Since("1.6.0")
 object Word2VecModel extends MLReadable[Word2VecModel] {
 
-  private case class Data(word: String, vector: Array[Float])
+  private[Word2VecModel] case class Data(word: String, vector: Array[Float])
 
   private[Word2VecModel]
   class Word2VecModelWriter(instance: Word2VecModel) extends MLWriter {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
index 04a3b6dd413b4..a24493bb7a8f9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala
@@ -145,9 +145,9 @@ object KMeansModel extends Loader[KMeansModel] {
     }
   }
 
-  private case class Cluster(id: Int, point: Vector)
+  private[KMeansModel] case class Cluster(id: Int, point: Vector)
 
-  private object Cluster {
+  private[KMeansModel] object Cluster {
     def apply(r: Row): Cluster = {
       Cluster(r.getInt(0), r.getAs[Vector](1))
     }

From 559f411da83856a81ac39cf79df8487cc5a06134 Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Mon, 4 Jan 2021 15:44:42 -0800
Subject: [PATCH 0953/1009] [SPARK-33908][CORE][FOLLOWUP] Correct Scaladoc of
 resolveDependencyPaths/resolveMavenDependencies

### What changes were proposed in this pull request?
Fix un-correct doc of last change https://github.com/apache/spark/pull/30922#discussion_r551453193

### Why are the changes needed?
FIx doc

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Builds finished correctly.

Closes #31016 from AngersZhuuuu/SPARK-33908-FOLLOW-UP.

Authored-by: angerszhu <angers.zhu@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../main/scala/org/apache/spark/deploy/SparkSubmit.scala    | 6 +++---
 .../main/scala/org/apache/spark/util/DependencyUtils.scala  | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 0cf309f148156..8bf7795b7bfe4 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -1194,11 +1194,11 @@ private[spark] object SparkSubmitUtils {
   }
 
   /**
-   * Output a comma-delimited list of paths for the downloaded jars to be added to the classpath
+   * Output a list of paths for the downloaded jars to be added to the classpath
    * (will append to jars in SparkSubmit).
    * @param artifacts Sequence of dependencies that were resolved and retrieved
-   * @param cacheDirectory directory where jars are cached
-   * @return a comma-delimited list of paths for the dependencies
+   * @param cacheDirectory Directory where jars are cached
+   * @return List of paths for the dependencies
    */
   def resolveDependencyPaths(
       artifacts: Array[AnyRef],
diff --git a/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala b/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala
index 789811fa5f3a4..60e866a556796 100644
--- a/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala
@@ -129,7 +129,7 @@ private[spark] object DependencyUtils extends Logging {
    *             2. exclude: exclusion list when download Ivy URI jar and dependency jars.
    *                The `exclude` parameter content is a ',' separated `group:module` pair string :
    *                `exclude=group:module,group:module...`
-   * @return Comma separated string list of jars downloaded.
+   * @return List of jars downloaded.
    */
   def resolveMavenDependencies(uri: URI): Seq[String] = {
     val ivyProperties = DependencyUtils.getIvyProperties()

From bb6d6b560287ac83e79012fe8dcbe5dcd2e7a904 Mon Sep 17 00:00:00 2001
From: "tanel.kiis@gmail.com" <tanel.kiis@gmail.com>
Date: Tue, 5 Jan 2021 11:01:31 +0900
Subject: [PATCH 0954/1009] [SPARK-33964][SQL] Combine distinct unions in more
 cases

### What changes were proposed in this pull request?

Added the `RemoveNoopOperators` rule to optimization batch `Union`.  Also made sure that the `RemoveNoopOperators` would be idempotent.

### Why are the changes needed?

In several TPCDS queries the `CombineUnions` rule does not manage to combine unions, because they have noop `Project`s between them.
The `Project`s will be removed by `RemoveNoopOperators`, but by then `ReplaceDistinctWithAggregate` has been applied and there are aggregates between the unions. Adding a copy of `RemoveNoopOperators` earlier in the optimization chain allows `CombineUnions` to work on more queries.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

New UTs and the output of `PlanStabilitySuite`

Closes #30996 from tanelk/SPARK-33964_combine_unions.

Authored-by: tanel.kiis@gmail.com <tanel.kiis@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../sql/catalyst/optimizer/Optimizer.scala    |    3 +-
 .../optimizer/RemoveNoopOperatorsSuite.scala  |   57 +
 .../approved-plans-v1_4/q75.sf100/explain.txt |  732 ++++++------
 .../q75.sf100/simplified.txt                  |  314 +++--
 .../approved-plans-v1_4/q75/explain.txt       |  606 +++++-----
 .../approved-plans-v1_4/q75/simplified.txt    |  236 ++--
 .../q14a.sf100/explain.txt                    | 1042 ++++++++---------
 .../q14a.sf100/simplified.txt                 |  736 ++++++------
 .../approved-plans-v2_7/q14a/explain.txt      |  994 ++++++++--------
 .../approved-plans-v2_7/q14a/simplified.txt   |  646 +++++-----
 .../q36a.sf100/explain.txt                    |  192 ++-
 .../q36a.sf100/simplified.txt                 |  114 +-
 .../approved-plans-v2_7/q36a/explain.txt      |  192 ++-
 .../approved-plans-v2_7/q36a/simplified.txt   |  112 +-
 .../approved-plans-v2_7/q5a.sf100/explain.txt |  272 ++---
 .../q5a.sf100/simplified.txt                  |  274 +++--
 .../approved-plans-v2_7/q5a/explain.txt       |  262 ++---
 .../approved-plans-v2_7/q5a/simplified.txt    |  258 ++--
 .../q70a.sf100/explain.txt                    |  226 ++--
 .../q70a.sf100/simplified.txt                 |  160 ++-
 .../approved-plans-v2_7/q70a/explain.txt      |  226 ++--
 .../approved-plans-v2_7/q70a/simplified.txt   |  160 ++-
 .../approved-plans-v2_7/q75.sf100/explain.txt |  732 ++++++------
 .../q75.sf100/simplified.txt                  |  314 +++--
 .../approved-plans-v2_7/q75/explain.txt       |  606 +++++-----
 .../approved-plans-v2_7/q75/simplified.txt    |  236 ++--
 .../q77a.sf100/explain.txt                    |  294 +++--
 .../q77a.sf100/simplified.txt                 |  290 +++--
 .../approved-plans-v2_7/q77a/explain.txt      |  294 +++--
 .../approved-plans-v2_7/q77a/simplified.txt   |  290 +++--
 .../q80a.sf100/explain.txt                    |  332 +++---
 .../q80a.sf100/simplified.txt                 |  356 +++---
 .../approved-plans-v2_7/q80a/explain.txt      |  302 +++--
 .../approved-plans-v2_7/q80a/simplified.txt   |  306 +++--
 .../q86a.sf100/explain.txt                    |  178 ++-
 .../q86a.sf100/simplified.txt                 |   94 +-
 .../approved-plans-v2_7/q86a/explain.txt      |  178 ++-
 .../approved-plans-v2_7/q86a/simplified.txt   |   94 +-
 .../org/apache/spark/sql/SQLQuerySuite.scala  |   18 +
 39 files changed, 5986 insertions(+), 6742 deletions(-)
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveNoopOperatorsSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 47260cfb59bb1..f61fad7c3ef54 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -156,6 +156,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
     // - Call CombineUnions again in Batch("Operator Optimizations"),
     //   since the other rules might make two separate Unions operators adjacent.
     Batch("Union", Once,
+      RemoveNoopOperators,
       CombineUnions) ::
     Batch("OptimizeLimitZero", Once,
       OptimizeLimitZero) ::
@@ -490,7 +491,7 @@ object RemoveRedundantAliases extends Rule[LogicalPlan] {
  * Remove no-op operators from the query plan that do not make any modifications.
  */
 object RemoveNoopOperators extends Rule[LogicalPlan] {
-  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+  def apply(plan: LogicalPlan): LogicalPlan = plan transformUp {
     // Eliminate no-op Projects
     case p @ Project(_, child) if child.sameOutput(p) => child
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveNoopOperatorsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveNoopOperatorsSuite.scala
new file mode 100644
index 0000000000000..cedd21d2bf522
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveNoopOperatorsSuite.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
+
+class RemoveNoopOperatorsSuite extends PlanTest {
+
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches =
+      Batch("RemoveNoopOperators", Once,
+        RemoveNoopOperators) :: Nil
+  }
+
+  val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
+
+  test("Remove all redundant projections in one iteration") {
+    val originalQuery = testRelation
+      .select('a, 'b, 'c)
+      .select('a, 'b, 'c)
+      .analyze
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+
+    comparePlans(optimized, testRelation)
+  }
+
+  test("Remove all redundant windows in one iteration") {
+    val originalQuery = testRelation
+      .window(Nil, Nil, Nil)
+      .window(Nil, Nil, Nil)
+      .analyze
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+
+    comparePlans(optimized, testRelation)
+  }
+}
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/explain.txt
index 39748bdd2772b..1d546a445b202 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/explain.txt
@@ -1,142 +1,134 @@
 == Physical Plan ==
-TakeOrderedAndProject (138)
-+- * Project (137)
-   +- * SortMergeJoin Inner (136)
-      :- * Sort (74)
-      :  +- Exchange (73)
-      :     +- * HashAggregate (72)
-      :        +- Exchange (71)
-      :           +- * HashAggregate (70)
-      :              +- * HashAggregate (69)
-      :                 +- Exchange (68)
-      :                    +- * HashAggregate (67)
-      :                       +- Union (66)
-      :                          :- * HashAggregate (47)
-      :                          :  +- Exchange (46)
-      :                          :     +- * HashAggregate (45)
-      :                          :        +- Union (44)
-      :                          :           :- * Project (25)
-      :                          :           :  +- SortMergeJoin LeftOuter (24)
-      :                          :           :     :- * Sort (18)
-      :                          :           :     :  +- Exchange (17)
-      :                          :           :     :     +- * Project (16)
-      :                          :           :     :        +- * BroadcastHashJoin Inner BuildRight (15)
-      :                          :           :     :           :- * Project (10)
-      :                          :           :     :           :  +- * BroadcastHashJoin Inner BuildRight (9)
-      :                          :           :     :           :     :- * Filter (3)
-      :                          :           :     :           :     :  +- * ColumnarToRow (2)
-      :                          :           :     :           :     :     +- Scan parquet default.catalog_sales (1)
-      :                          :           :     :           :     +- BroadcastExchange (8)
-      :                          :           :     :           :        +- * Project (7)
-      :                          :           :     :           :           +- * Filter (6)
-      :                          :           :     :           :              +- * ColumnarToRow (5)
-      :                          :           :     :           :                 +- Scan parquet default.item (4)
-      :                          :           :     :           +- BroadcastExchange (14)
-      :                          :           :     :              +- * Filter (13)
-      :                          :           :     :                 +- * ColumnarToRow (12)
-      :                          :           :     :                    +- Scan parquet default.date_dim (11)
-      :                          :           :     +- * Sort (23)
-      :                          :           :        +- Exchange (22)
-      :                          :           :           +- * Filter (21)
-      :                          :           :              +- * ColumnarToRow (20)
-      :                          :           :                 +- Scan parquet default.catalog_returns (19)
-      :                          :           +- * Project (43)
-      :                          :              +- SortMergeJoin LeftOuter (42)
-      :                          :                 :- * Sort (36)
-      :                          :                 :  +- Exchange (35)
-      :                          :                 :     +- * Project (34)
-      :                          :                 :        +- * BroadcastHashJoin Inner BuildRight (33)
-      :                          :                 :           :- * Project (31)
-      :                          :                 :           :  +- * BroadcastHashJoin Inner BuildRight (30)
-      :                          :                 :           :     :- * Filter (28)
-      :                          :                 :           :     :  +- * ColumnarToRow (27)
-      :                          :                 :           :     :     +- Scan parquet default.store_sales (26)
-      :                          :                 :           :     +- ReusedExchange (29)
-      :                          :                 :           +- ReusedExchange (32)
-      :                          :                 +- * Sort (41)
-      :                          :                    +- Exchange (40)
-      :                          :                       +- * Filter (39)
-      :                          :                          +- * ColumnarToRow (38)
-      :                          :                             +- Scan parquet default.store_returns (37)
-      :                          +- * Project (65)
-      :                             +- SortMergeJoin LeftOuter (64)
-      :                                :- * Sort (58)
-      :                                :  +- Exchange (57)
-      :                                :     +- * Project (56)
-      :                                :        +- * BroadcastHashJoin Inner BuildRight (55)
-      :                                :           :- * Project (53)
-      :                                :           :  +- * BroadcastHashJoin Inner BuildRight (52)
-      :                                :           :     :- * Filter (50)
-      :                                :           :     :  +- * ColumnarToRow (49)
-      :                                :           :     :     +- Scan parquet default.web_sales (48)
-      :                                :           :     +- ReusedExchange (51)
-      :                                :           +- ReusedExchange (54)
-      :                                +- * Sort (63)
-      :                                   +- Exchange (62)
-      :                                      +- * Filter (61)
-      :                                         +- * ColumnarToRow (60)
-      :                                            +- Scan parquet default.web_returns (59)
-      +- * Sort (135)
-         +- Exchange (134)
-            +- * HashAggregate (133)
-               +- Exchange (132)
-                  +- * HashAggregate (131)
-                     +- * HashAggregate (130)
-                        +- Exchange (129)
-                           +- * HashAggregate (128)
-                              +- Union (127)
-                                 :- * HashAggregate (111)
-                                 :  +- Exchange (110)
-                                 :     +- * HashAggregate (109)
-                                 :        +- Union (108)
-                                 :           :- * Project (92)
-                                 :           :  +- SortMergeJoin LeftOuter (91)
-                                 :           :     :- * Sort (88)
-                                 :           :     :  +- Exchange (87)
-                                 :           :     :     +- * Project (86)
-                                 :           :     :        +- * BroadcastHashJoin Inner BuildRight (85)
-                                 :           :     :           :- * Project (80)
-                                 :           :     :           :  +- * BroadcastHashJoin Inner BuildRight (79)
-                                 :           :     :           :     :- * Filter (77)
-                                 :           :     :           :     :  +- * ColumnarToRow (76)
-                                 :           :     :           :     :     +- Scan parquet default.catalog_sales (75)
-                                 :           :     :           :     +- ReusedExchange (78)
-                                 :           :     :           +- BroadcastExchange (84)
-                                 :           :     :              +- * Filter (83)
-                                 :           :     :                 +- * ColumnarToRow (82)
-                                 :           :     :                    +- Scan parquet default.date_dim (81)
-                                 :           :     +- * Sort (90)
-                                 :           :        +- ReusedExchange (89)
-                                 :           +- * Project (107)
-                                 :              +- SortMergeJoin LeftOuter (106)
-                                 :                 :- * Sort (103)
-                                 :                 :  +- Exchange (102)
-                                 :                 :     +- * Project (101)
-                                 :                 :        +- * BroadcastHashJoin Inner BuildRight (100)
-                                 :                 :           :- * Project (98)
-                                 :                 :           :  +- * BroadcastHashJoin Inner BuildRight (97)
-                                 :                 :           :     :- * Filter (95)
-                                 :                 :           :     :  +- * ColumnarToRow (94)
-                                 :                 :           :     :     +- Scan parquet default.store_sales (93)
-                                 :                 :           :     +- ReusedExchange (96)
-                                 :                 :           +- ReusedExchange (99)
-                                 :                 +- * Sort (105)
-                                 :                    +- ReusedExchange (104)
-                                 +- * Project (126)
-                                    +- SortMergeJoin LeftOuter (125)
-                                       :- * Sort (122)
-                                       :  +- Exchange (121)
-                                       :     +- * Project (120)
-                                       :        +- * BroadcastHashJoin Inner BuildRight (119)
-                                       :           :- * Project (117)
-                                       :           :  +- * BroadcastHashJoin Inner BuildRight (116)
-                                       :           :     :- * Filter (114)
-                                       :           :     :  +- * ColumnarToRow (113)
-                                       :           :     :     +- Scan parquet default.web_sales (112)
-                                       :           :     +- ReusedExchange (115)
-                                       :           +- ReusedExchange (118)
-                                       +- * Sort (124)
-                                          +- ReusedExchange (123)
+TakeOrderedAndProject (130)
++- * Project (129)
+   +- * SortMergeJoin Inner (128)
+      :- * Sort (70)
+      :  +- Exchange (69)
+      :     +- * HashAggregate (68)
+      :        +- Exchange (67)
+      :           +- * HashAggregate (66)
+      :              +- * HashAggregate (65)
+      :                 +- Exchange (64)
+      :                    +- * HashAggregate (63)
+      :                       +- Union (62)
+      :                          :- * Project (25)
+      :                          :  +- SortMergeJoin LeftOuter (24)
+      :                          :     :- * Sort (18)
+      :                          :     :  +- Exchange (17)
+      :                          :     :     +- * Project (16)
+      :                          :     :        +- * BroadcastHashJoin Inner BuildRight (15)
+      :                          :     :           :- * Project (10)
+      :                          :     :           :  +- * BroadcastHashJoin Inner BuildRight (9)
+      :                          :     :           :     :- * Filter (3)
+      :                          :     :           :     :  +- * ColumnarToRow (2)
+      :                          :     :           :     :     +- Scan parquet default.catalog_sales (1)
+      :                          :     :           :     +- BroadcastExchange (8)
+      :                          :     :           :        +- * Project (7)
+      :                          :     :           :           +- * Filter (6)
+      :                          :     :           :              +- * ColumnarToRow (5)
+      :                          :     :           :                 +- Scan parquet default.item (4)
+      :                          :     :           +- BroadcastExchange (14)
+      :                          :     :              +- * Filter (13)
+      :                          :     :                 +- * ColumnarToRow (12)
+      :                          :     :                    +- Scan parquet default.date_dim (11)
+      :                          :     +- * Sort (23)
+      :                          :        +- Exchange (22)
+      :                          :           +- * Filter (21)
+      :                          :              +- * ColumnarToRow (20)
+      :                          :                 +- Scan parquet default.catalog_returns (19)
+      :                          :- * Project (43)
+      :                          :  +- SortMergeJoin LeftOuter (42)
+      :                          :     :- * Sort (36)
+      :                          :     :  +- Exchange (35)
+      :                          :     :     +- * Project (34)
+      :                          :     :        +- * BroadcastHashJoin Inner BuildRight (33)
+      :                          :     :           :- * Project (31)
+      :                          :     :           :  +- * BroadcastHashJoin Inner BuildRight (30)
+      :                          :     :           :     :- * Filter (28)
+      :                          :     :           :     :  +- * ColumnarToRow (27)
+      :                          :     :           :     :     +- Scan parquet default.store_sales (26)
+      :                          :     :           :     +- ReusedExchange (29)
+      :                          :     :           +- ReusedExchange (32)
+      :                          :     +- * Sort (41)
+      :                          :        +- Exchange (40)
+      :                          :           +- * Filter (39)
+      :                          :              +- * ColumnarToRow (38)
+      :                          :                 +- Scan parquet default.store_returns (37)
+      :                          +- * Project (61)
+      :                             +- SortMergeJoin LeftOuter (60)
+      :                                :- * Sort (54)
+      :                                :  +- Exchange (53)
+      :                                :     +- * Project (52)
+      :                                :        +- * BroadcastHashJoin Inner BuildRight (51)
+      :                                :           :- * Project (49)
+      :                                :           :  +- * BroadcastHashJoin Inner BuildRight (48)
+      :                                :           :     :- * Filter (46)
+      :                                :           :     :  +- * ColumnarToRow (45)
+      :                                :           :     :     +- Scan parquet default.web_sales (44)
+      :                                :           :     +- ReusedExchange (47)
+      :                                :           +- ReusedExchange (50)
+      :                                +- * Sort (59)
+      :                                   +- Exchange (58)
+      :                                      +- * Filter (57)
+      :                                         +- * ColumnarToRow (56)
+      :                                            +- Scan parquet default.web_returns (55)
+      +- * Sort (127)
+         +- Exchange (126)
+            +- * HashAggregate (125)
+               +- Exchange (124)
+                  +- * HashAggregate (123)
+                     +- * HashAggregate (122)
+                        +- Exchange (121)
+                           +- * HashAggregate (120)
+                              +- Union (119)
+                                 :- * Project (88)
+                                 :  +- SortMergeJoin LeftOuter (87)
+                                 :     :- * Sort (84)
+                                 :     :  +- Exchange (83)
+                                 :     :     +- * Project (82)
+                                 :     :        +- * BroadcastHashJoin Inner BuildRight (81)
+                                 :     :           :- * Project (76)
+                                 :     :           :  +- * BroadcastHashJoin Inner BuildRight (75)
+                                 :     :           :     :- * Filter (73)
+                                 :     :           :     :  +- * ColumnarToRow (72)
+                                 :     :           :     :     +- Scan parquet default.catalog_sales (71)
+                                 :     :           :     +- ReusedExchange (74)
+                                 :     :           +- BroadcastExchange (80)
+                                 :     :              +- * Filter (79)
+                                 :     :                 +- * ColumnarToRow (78)
+                                 :     :                    +- Scan parquet default.date_dim (77)
+                                 :     +- * Sort (86)
+                                 :        +- ReusedExchange (85)
+                                 :- * Project (103)
+                                 :  +- SortMergeJoin LeftOuter (102)
+                                 :     :- * Sort (99)
+                                 :     :  +- Exchange (98)
+                                 :     :     +- * Project (97)
+                                 :     :        +- * BroadcastHashJoin Inner BuildRight (96)
+                                 :     :           :- * Project (94)
+                                 :     :           :  +- * BroadcastHashJoin Inner BuildRight (93)
+                                 :     :           :     :- * Filter (91)
+                                 :     :           :     :  +- * ColumnarToRow (90)
+                                 :     :           :     :     +- Scan parquet default.store_sales (89)
+                                 :     :           :     +- ReusedExchange (92)
+                                 :     :           +- ReusedExchange (95)
+                                 :     +- * Sort (101)
+                                 :        +- ReusedExchange (100)
+                                 +- * Project (118)
+                                    +- SortMergeJoin LeftOuter (117)
+                                       :- * Sort (114)
+                                       :  +- Exchange (113)
+                                       :     +- * Project (112)
+                                       :        +- * BroadcastHashJoin Inner BuildRight (111)
+                                       :           :- * Project (109)
+                                       :           :  +- * BroadcastHashJoin Inner BuildRight (108)
+                                       :           :     :- * Filter (106)
+                                       :           :     :  +- * ColumnarToRow (105)
+                                       :           :     :     +- Scan parquet default.web_sales (104)
+                                       :           :     +- ReusedExchange (107)
+                                       :           +- ReusedExchange (110)
+                                       +- * Sort (116)
+                                          +- ReusedExchange (115)
 
 
 (1) Scan parquet default.catalog_sales
@@ -213,7 +205,7 @@ Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4,
 
 (17) Exchange
 Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14]
-Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), true, [id=#16]
+Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), ENSURE_REQUIREMENTS, [id=#16]
 
 (18) Sort [codegen id : 4]
 Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14]
@@ -235,7 +227,7 @@ Condition : (isnotnull(cr_order_number#18) AND isnotnull(cr_item_sk#17))
 
 (22) Exchange
 Input [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20]
-Arguments: hashpartitioning(cr_order_number#18, cr_item_sk#17, 5), true, [id=#21]
+Arguments: hashpartitioning(cr_order_number#18, cr_item_sk#17, 5), ENSURE_REQUIREMENTS, [id=#21]
 
 (23) Sort [codegen id : 6]
 Input [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20]
@@ -290,7 +282,7 @@ Input [11]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity
 
 (35) Exchange
 Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14]
-Arguments: hashpartitioning(cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint), 5), true, [id=#29]
+Arguments: hashpartitioning(cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint), 5), ENSURE_REQUIREMENTS, [id=#29]
 
 (36) Sort [codegen id : 11]
 Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14]
@@ -312,7 +304,7 @@ Condition : (isnotnull(sr_ticket_number#31) AND isnotnull(sr_item_sk#30))
 
 (40) Exchange
 Input [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33]
-Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), true, [id=#34]
+Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), ENSURE_REQUIREMENTS, [id=#34]
 
 (41) Sort [codegen id : 13]
 Input [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33]
@@ -327,426 +319,386 @@ Join condition: None
 Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ss_quantity#27 - coalesce(sr_return_quantity#32, 0)) AS sales_cnt#35, CheckOverflow((promote_precision(cast(ss_ext_sales_price#28 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#33, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#36]
 Input [13]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33]
 
-(44) Union
-
-(45) HashAggregate [codegen id : 15]
-Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
-Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
-Functions: []
-Aggregate Attributes: []
-Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
-
-(46) Exchange
-Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
-Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23, 5), true, [id=#37]
-
-(47) HashAggregate [codegen id : 16]
-Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
-Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
-Functions: []
-Aggregate Attributes: []
-Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
-
-(48) Scan parquet default.web_sales
-Output [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42]
+(44) Scan parquet default.web_sales
+Output [5]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
 PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)]
 ReadSchema: struct<ws_sold_date_sk:int,ws_item_sk:int,ws_order_number:int,ws_quantity:int,ws_ext_sales_price:decimal(7,2)>
 
-(49) ColumnarToRow [codegen id : 19]
-Input [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42]
+(45) ColumnarToRow [codegen id : 17]
+Input [5]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41]
 
-(50) Filter [codegen id : 19]
-Input [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42]
-Condition : (isnotnull(ws_item_sk#39) AND isnotnull(ws_sold_date_sk#38))
+(46) Filter [codegen id : 17]
+Input [5]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41]
+Condition : (isnotnull(ws_item_sk#38) AND isnotnull(ws_sold_date_sk#37))
 
-(51) ReusedExchange [Reuses operator id: 8]
+(47) ReusedExchange [Reuses operator id: 8]
 Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
 
-(52) BroadcastHashJoin [codegen id : 19]
-Left keys [1]: [ws_item_sk#39]
+(48) BroadcastHashJoin [codegen id : 17]
+Left keys [1]: [ws_item_sk#38]
 Right keys [1]: [i_item_sk#6]
 Join condition: None
 
-(53) Project [codegen id : 19]
-Output [9]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
-Input [10]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
+(49) Project [codegen id : 17]
+Output [9]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
+Input [10]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
 
-(54) ReusedExchange [Reuses operator id: 14]
+(50) ReusedExchange [Reuses operator id: 14]
 Output [2]: [d_date_sk#13, d_year#14]
 
-(55) BroadcastHashJoin [codegen id : 19]
-Left keys [1]: [ws_sold_date_sk#38]
+(51) BroadcastHashJoin [codegen id : 17]
+Left keys [1]: [ws_sold_date_sk#37]
 Right keys [1]: [d_date_sk#13]
 Join condition: None
 
-(56) Project [codegen id : 19]
-Output [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14]
-Input [11]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14]
+(52) Project [codegen id : 17]
+Output [9]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14]
+Input [11]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14]
 
-(57) Exchange
-Input [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14]
-Arguments: hashpartitioning(cast(ws_order_number#40 as bigint), cast(ws_item_sk#39 as bigint), 5), true, [id=#43]
+(53) Exchange
+Input [9]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14]
+Arguments: hashpartitioning(cast(ws_order_number#39 as bigint), cast(ws_item_sk#38 as bigint), 5), ENSURE_REQUIREMENTS, [id=#42]
 
-(58) Sort [codegen id : 20]
-Input [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14]
-Arguments: [cast(ws_order_number#40 as bigint) ASC NULLS FIRST, cast(ws_item_sk#39 as bigint) ASC NULLS FIRST], false, 0
+(54) Sort [codegen id : 18]
+Input [9]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14]
+Arguments: [cast(ws_order_number#39 as bigint) ASC NULLS FIRST, cast(ws_item_sk#38 as bigint) ASC NULLS FIRST], false, 0
 
-(59) Scan parquet default.web_returns
-Output [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47]
+(55) Scan parquet default.web_returns
+Output [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
 PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)]
 ReadSchema: struct<wr_item_sk:bigint,wr_order_number:bigint,wr_return_quantity:int,wr_return_amt:decimal(7,2)>
 
-(60) ColumnarToRow [codegen id : 21]
-Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47]
+(56) ColumnarToRow [codegen id : 19]
+Input [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46]
 
-(61) Filter [codegen id : 21]
-Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47]
-Condition : (isnotnull(wr_order_number#45) AND isnotnull(wr_item_sk#44))
+(57) Filter [codegen id : 19]
+Input [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46]
+Condition : (isnotnull(wr_order_number#44) AND isnotnull(wr_item_sk#43))
 
-(62) Exchange
-Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47]
-Arguments: hashpartitioning(wr_order_number#45, wr_item_sk#44, 5), true, [id=#48]
+(58) Exchange
+Input [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46]
+Arguments: hashpartitioning(wr_order_number#44, wr_item_sk#43, 5), ENSURE_REQUIREMENTS, [id=#47]
 
-(63) Sort [codegen id : 22]
-Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47]
-Arguments: [wr_order_number#45 ASC NULLS FIRST, wr_item_sk#44 ASC NULLS FIRST], false, 0
+(59) Sort [codegen id : 20]
+Input [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46]
+Arguments: [wr_order_number#44 ASC NULLS FIRST, wr_item_sk#43 ASC NULLS FIRST], false, 0
 
-(64) SortMergeJoin
-Left keys [2]: [cast(ws_order_number#40 as bigint), cast(ws_item_sk#39 as bigint)]
-Right keys [2]: [wr_order_number#45, wr_item_sk#44]
+(60) SortMergeJoin
+Left keys [2]: [cast(ws_order_number#39 as bigint), cast(ws_item_sk#38 as bigint)]
+Right keys [2]: [wr_order_number#44, wr_item_sk#43]
 Join condition: None
 
-(65) Project [codegen id : 23]
-Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ws_quantity#41 - coalesce(wr_return_quantity#46, 0)) AS sales_cnt#49, CheckOverflow((promote_precision(cast(ws_ext_sales_price#42 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#47, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#50]
-Input [13]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47]
+(61) Project [codegen id : 21]
+Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ws_quantity#40 - coalesce(wr_return_quantity#45, 0)) AS sales_cnt#48, CheckOverflow((promote_precision(cast(ws_ext_sales_price#41 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#46, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#49]
+Input [13]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46]
 
-(66) Union
+(62) Union
 
-(67) HashAggregate [codegen id : 24]
+(63) HashAggregate [codegen id : 22]
 Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
 Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
 Functions: []
 Aggregate Attributes: []
 Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
 
-(68) Exchange
+(64) Exchange
 Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
-Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23, 5), true, [id=#51]
+Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23, 5), ENSURE_REQUIREMENTS, [id=#50]
 
-(69) HashAggregate [codegen id : 25]
+(65) HashAggregate [codegen id : 23]
 Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
 Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
 Functions: []
 Aggregate Attributes: []
 Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
 
-(70) HashAggregate [codegen id : 25]
+(66) HashAggregate [codegen id : 23]
 Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
 Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
 Functions [2]: [partial_sum(cast(sales_cnt#22 as bigint)), partial_sum(UnscaledValue(sales_amt#23))]
-Aggregate Attributes [2]: [sum#52, sum#53]
-Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#54, sum#55]
+Aggregate Attributes [2]: [sum#51, sum#52]
+Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#53, sum#54]
 
-(71) Exchange
-Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#54, sum#55]
-Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), true, [id=#56]
+(67) Exchange
+Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#53, sum#54]
+Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [id=#55]
 
-(72) HashAggregate [codegen id : 26]
-Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#54, sum#55]
+(68) HashAggregate [codegen id : 24]
+Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#53, sum#54]
 Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
 Functions [2]: [sum(cast(sales_cnt#22 as bigint)), sum(UnscaledValue(sales_amt#23))]
-Aggregate Attributes [2]: [sum(cast(sales_cnt#22 as bigint))#57, sum(UnscaledValue(sales_amt#23))#58]
-Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum(cast(sales_cnt#22 as bigint))#57 AS sales_cnt#59, MakeDecimal(sum(UnscaledValue(sales_amt#23))#58,18,2) AS sales_amt#60]
+Aggregate Attributes [2]: [sum(cast(sales_cnt#22 as bigint))#56, sum(UnscaledValue(sales_amt#23))#57]
+Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum(cast(sales_cnt#22 as bigint))#56 AS sales_cnt#58, MakeDecimal(sum(UnscaledValue(sales_amt#23))#57,18,2) AS sales_amt#59]
 
-(73) Exchange
-Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#59, sales_amt#60]
-Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), true, [id=#61]
+(69) Exchange
+Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#58, sales_amt#59]
+Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [id=#60]
 
-(74) Sort [codegen id : 27]
-Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#59, sales_amt#60]
+(70) Sort [codegen id : 25]
+Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#58, sales_amt#59]
 Arguments: [i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST, i_manufact_id#11 ASC NULLS FIRST], false, 0
 
-(75) Scan parquet default.catalog_sales
+(71) Scan parquet default.catalog_sales
 Output [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_sales]
 PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)]
 ReadSchema: struct<cs_sold_date_sk:int,cs_item_sk:int,cs_order_number:int,cs_quantity:int,cs_ext_sales_price:decimal(7,2)>
 
-(76) ColumnarToRow [codegen id : 30]
+(72) ColumnarToRow [codegen id : 28]
 Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5]
 
-(77) Filter [codegen id : 30]
+(73) Filter [codegen id : 28]
 Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5]
 Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_sold_date_sk#1))
 
-(78) ReusedExchange [Reuses operator id: 8]
-Output [5]: [i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66]
+(74) ReusedExchange [Reuses operator id: 8]
+Output [5]: [i_item_sk#61, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65]
 
-(79) BroadcastHashJoin [codegen id : 30]
+(75) BroadcastHashJoin [codegen id : 28]
 Left keys [1]: [cs_item_sk#2]
-Right keys [1]: [i_item_sk#62]
+Right keys [1]: [i_item_sk#61]
 Join condition: None
 
-(80) Project [codegen id : 30]
-Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66]
-Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66]
+(76) Project [codegen id : 28]
+Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65]
+Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#61, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65]
 
-(81) Scan parquet default.date_dim
-Output [2]: [d_date_sk#67, d_year#68]
+(77) Scan parquet default.date_dim
+Output [2]: [d_date_sk#66, d_year#67]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(82) ColumnarToRow [codegen id : 29]
-Input [2]: [d_date_sk#67, d_year#68]
+(78) ColumnarToRow [codegen id : 27]
+Input [2]: [d_date_sk#66, d_year#67]
 
-(83) Filter [codegen id : 29]
-Input [2]: [d_date_sk#67, d_year#68]
-Condition : ((isnotnull(d_year#68) AND (d_year#68 = 2001)) AND isnotnull(d_date_sk#67))
+(79) Filter [codegen id : 27]
+Input [2]: [d_date_sk#66, d_year#67]
+Condition : ((isnotnull(d_year#67) AND (d_year#67 = 2001)) AND isnotnull(d_date_sk#66))
 
-(84) BroadcastExchange
-Input [2]: [d_date_sk#67, d_year#68]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#69]
+(80) BroadcastExchange
+Input [2]: [d_date_sk#66, d_year#67]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#68]
 
-(85) BroadcastHashJoin [codegen id : 30]
+(81) BroadcastHashJoin [codegen id : 28]
 Left keys [1]: [cs_sold_date_sk#1]
-Right keys [1]: [d_date_sk#67]
+Right keys [1]: [d_date_sk#66]
 Join condition: None
 
-(86) Project [codegen id : 30]
-Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68]
-Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_date_sk#67, d_year#68]
+(82) Project [codegen id : 28]
+Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67]
+Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_date_sk#66, d_year#67]
 
-(87) Exchange
-Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68]
-Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), true, [id=#70]
+(83) Exchange
+Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67]
+Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), ENSURE_REQUIREMENTS, [id=#69]
 
-(88) Sort [codegen id : 31]
-Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68]
+(84) Sort [codegen id : 29]
+Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67]
 Arguments: [cs_order_number#3 ASC NULLS FIRST, cs_item_sk#2 ASC NULLS FIRST], false, 0
 
-(89) ReusedExchange [Reuses operator id: 22]
+(85) ReusedExchange [Reuses operator id: 22]
 Output [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20]
 
-(90) Sort [codegen id : 33]
+(86) Sort [codegen id : 31]
 Input [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20]
 Arguments: [cr_order_number#18 ASC NULLS FIRST, cr_item_sk#17 ASC NULLS FIRST], false, 0
 
-(91) SortMergeJoin
+(87) SortMergeJoin
 Left keys [2]: [cs_order_number#3, cs_item_sk#2]
 Right keys [2]: [cr_order_number#18, cr_item_sk#17]
 Join condition: None
 
-(92) Project [codegen id : 34]
-Output [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, (cs_quantity#4 - coalesce(cr_return_quantity#19, 0)) AS sales_cnt#22, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#20, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#23]
-Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68, cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20]
+(88) Project [codegen id : 32]
+Output [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, (cs_quantity#4 - coalesce(cr_return_quantity#19, 0)) AS sales_cnt#22, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#20, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#23]
+Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67, cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20]
 
-(93) Scan parquet default.store_sales
+(89) Scan parquet default.store_sales
 Output [5]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
 PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)]
 ReadSchema: struct<ss_sold_date_sk:int,ss_item_sk:int,ss_ticket_number:int,ss_quantity:int,ss_ext_sales_price:decimal(7,2)>
 
-(94) ColumnarToRow [codegen id : 37]
+(90) ColumnarToRow [codegen id : 35]
 Input [5]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28]
 
-(95) Filter [codegen id : 37]
+(91) Filter [codegen id : 35]
 Input [5]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28]
 Condition : (isnotnull(ss_item_sk#25) AND isnotnull(ss_sold_date_sk#24))
 
-(96) ReusedExchange [Reuses operator id: 8]
-Output [5]: [i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66]
+(92) ReusedExchange [Reuses operator id: 8]
+Output [5]: [i_item_sk#61, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65]
 
-(97) BroadcastHashJoin [codegen id : 37]
+(93) BroadcastHashJoin [codegen id : 35]
 Left keys [1]: [ss_item_sk#25]
-Right keys [1]: [i_item_sk#62]
+Right keys [1]: [i_item_sk#61]
 Join condition: None
 
-(98) Project [codegen id : 37]
-Output [9]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66]
-Input [10]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66]
+(94) Project [codegen id : 35]
+Output [9]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65]
+Input [10]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_item_sk#61, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65]
 
-(99) ReusedExchange [Reuses operator id: 84]
-Output [2]: [d_date_sk#67, d_year#68]
+(95) ReusedExchange [Reuses operator id: 80]
+Output [2]: [d_date_sk#66, d_year#67]
 
-(100) BroadcastHashJoin [codegen id : 37]
+(96) BroadcastHashJoin [codegen id : 35]
 Left keys [1]: [ss_sold_date_sk#24]
-Right keys [1]: [d_date_sk#67]
+Right keys [1]: [d_date_sk#66]
 Join condition: None
 
-(101) Project [codegen id : 37]
-Output [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68]
-Input [11]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_date_sk#67, d_year#68]
+(97) Project [codegen id : 35]
+Output [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67]
+Input [11]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_date_sk#66, d_year#67]
 
-(102) Exchange
-Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68]
-Arguments: hashpartitioning(cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint), 5), true, [id=#71]
+(98) Exchange
+Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67]
+Arguments: hashpartitioning(cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint), 5), ENSURE_REQUIREMENTS, [id=#70]
 
-(103) Sort [codegen id : 38]
-Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68]
+(99) Sort [codegen id : 36]
+Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67]
 Arguments: [cast(ss_ticket_number#26 as bigint) ASC NULLS FIRST, cast(ss_item_sk#25 as bigint) ASC NULLS FIRST], false, 0
 
-(104) ReusedExchange [Reuses operator id: 40]
+(100) ReusedExchange [Reuses operator id: 40]
 Output [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33]
 
-(105) Sort [codegen id : 40]
+(101) Sort [codegen id : 38]
 Input [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33]
 Arguments: [sr_ticket_number#31 ASC NULLS FIRST, sr_item_sk#30 ASC NULLS FIRST], false, 0
 
-(106) SortMergeJoin
+(102) SortMergeJoin
 Left keys [2]: [cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint)]
 Right keys [2]: [sr_ticket_number#31, sr_item_sk#30]
 Join condition: None
 
-(107) Project [codegen id : 41]
-Output [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, (ss_quantity#27 - coalesce(sr_return_quantity#32, 0)) AS sales_cnt#72, CheckOverflow((promote_precision(cast(ss_ext_sales_price#28 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#33, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#73]
-Input [13]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68, sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33]
-
-(108) Union
-
-(109) HashAggregate [codegen id : 42]
-Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
-Keys [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
-Functions: []
-Aggregate Attributes: []
-Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
-
-(110) Exchange
-Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
-Arguments: hashpartitioning(d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23, 5), true, [id=#74]
-
-(111) HashAggregate [codegen id : 43]
-Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
-Keys [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
-Functions: []
-Aggregate Attributes: []
-Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
+(103) Project [codegen id : 39]
+Output [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, (ss_quantity#27 - coalesce(sr_return_quantity#32, 0)) AS sales_cnt#71, CheckOverflow((promote_precision(cast(ss_ext_sales_price#28 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#33, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#72]
+Input [13]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67, sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33]
 
-(112) Scan parquet default.web_sales
-Output [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42]
+(104) Scan parquet default.web_sales
+Output [5]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
 PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)]
 ReadSchema: struct<ws_sold_date_sk:int,ws_item_sk:int,ws_order_number:int,ws_quantity:int,ws_ext_sales_price:decimal(7,2)>
 
-(113) ColumnarToRow [codegen id : 46]
-Input [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42]
+(105) ColumnarToRow [codegen id : 42]
+Input [5]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41]
 
-(114) Filter [codegen id : 46]
-Input [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42]
-Condition : (isnotnull(ws_item_sk#39) AND isnotnull(ws_sold_date_sk#38))
+(106) Filter [codegen id : 42]
+Input [5]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41]
+Condition : (isnotnull(ws_item_sk#38) AND isnotnull(ws_sold_date_sk#37))
 
-(115) ReusedExchange [Reuses operator id: 8]
-Output [5]: [i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66]
+(107) ReusedExchange [Reuses operator id: 8]
+Output [5]: [i_item_sk#61, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65]
 
-(116) BroadcastHashJoin [codegen id : 46]
-Left keys [1]: [ws_item_sk#39]
-Right keys [1]: [i_item_sk#62]
+(108) BroadcastHashJoin [codegen id : 42]
+Left keys [1]: [ws_item_sk#38]
+Right keys [1]: [i_item_sk#61]
 Join condition: None
 
-(117) Project [codegen id : 46]
-Output [9]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66]
-Input [10]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66]
+(109) Project [codegen id : 42]
+Output [9]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65]
+Input [10]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_item_sk#61, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65]
 
-(118) ReusedExchange [Reuses operator id: 84]
-Output [2]: [d_date_sk#67, d_year#68]
+(110) ReusedExchange [Reuses operator id: 80]
+Output [2]: [d_date_sk#66, d_year#67]
 
-(119) BroadcastHashJoin [codegen id : 46]
-Left keys [1]: [ws_sold_date_sk#38]
-Right keys [1]: [d_date_sk#67]
+(111) BroadcastHashJoin [codegen id : 42]
+Left keys [1]: [ws_sold_date_sk#37]
+Right keys [1]: [d_date_sk#66]
 Join condition: None
 
-(120) Project [codegen id : 46]
-Output [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68]
-Input [11]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_date_sk#67, d_year#68]
+(112) Project [codegen id : 42]
+Output [9]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67]
+Input [11]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_date_sk#66, d_year#67]
 
-(121) Exchange
-Input [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68]
-Arguments: hashpartitioning(cast(ws_order_number#40 as bigint), cast(ws_item_sk#39 as bigint), 5), true, [id=#75]
+(113) Exchange
+Input [9]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67]
+Arguments: hashpartitioning(cast(ws_order_number#39 as bigint), cast(ws_item_sk#38 as bigint), 5), ENSURE_REQUIREMENTS, [id=#73]
 
-(122) Sort [codegen id : 47]
-Input [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68]
-Arguments: [cast(ws_order_number#40 as bigint) ASC NULLS FIRST, cast(ws_item_sk#39 as bigint) ASC NULLS FIRST], false, 0
+(114) Sort [codegen id : 43]
+Input [9]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67]
+Arguments: [cast(ws_order_number#39 as bigint) ASC NULLS FIRST, cast(ws_item_sk#38 as bigint) ASC NULLS FIRST], false, 0
 
-(123) ReusedExchange [Reuses operator id: 62]
-Output [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47]
+(115) ReusedExchange [Reuses operator id: 58]
+Output [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46]
 
-(124) Sort [codegen id : 49]
-Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47]
-Arguments: [wr_order_number#45 ASC NULLS FIRST, wr_item_sk#44 ASC NULLS FIRST], false, 0
+(116) Sort [codegen id : 45]
+Input [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46]
+Arguments: [wr_order_number#44 ASC NULLS FIRST, wr_item_sk#43 ASC NULLS FIRST], false, 0
 
-(125) SortMergeJoin
-Left keys [2]: [cast(ws_order_number#40 as bigint), cast(ws_item_sk#39 as bigint)]
-Right keys [2]: [wr_order_number#45, wr_item_sk#44]
+(117) SortMergeJoin
+Left keys [2]: [cast(ws_order_number#39 as bigint), cast(ws_item_sk#38 as bigint)]
+Right keys [2]: [wr_order_number#44, wr_item_sk#43]
 Join condition: None
 
-(126) Project [codegen id : 50]
-Output [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, (ws_quantity#41 - coalesce(wr_return_quantity#46, 0)) AS sales_cnt#76, CheckOverflow((promote_precision(cast(ws_ext_sales_price#42 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#47, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#77]
-Input [13]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68, wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47]
+(118) Project [codegen id : 46]
+Output [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, (ws_quantity#40 - coalesce(wr_return_quantity#45, 0)) AS sales_cnt#74, CheckOverflow((promote_precision(cast(ws_ext_sales_price#41 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#46, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#75]
+Input [13]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67, wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46]
 
-(127) Union
+(119) Union
 
-(128) HashAggregate [codegen id : 51]
-Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
-Keys [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
+(120) HashAggregate [codegen id : 47]
+Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23]
+Keys [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23]
 Functions: []
 Aggregate Attributes: []
-Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
+Results [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23]
 
-(129) Exchange
-Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
-Arguments: hashpartitioning(d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23, 5), true, [id=#78]
+(121) Exchange
+Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23]
+Arguments: hashpartitioning(d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23, 5), ENSURE_REQUIREMENTS, [id=#76]
 
-(130) HashAggregate [codegen id : 52]
-Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
-Keys [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
+(122) HashAggregate [codegen id : 48]
+Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23]
+Keys [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23]
 Functions: []
 Aggregate Attributes: []
-Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
+Results [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23]
 
-(131) HashAggregate [codegen id : 52]
-Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
-Keys [5]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66]
+(123) HashAggregate [codegen id : 48]
+Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23]
+Keys [5]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65]
 Functions [2]: [partial_sum(cast(sales_cnt#22 as bigint)), partial_sum(UnscaledValue(sales_amt#23))]
-Aggregate Attributes [2]: [sum#79, sum#80]
-Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sum#81, sum#82]
+Aggregate Attributes [2]: [sum#77, sum#78]
+Results [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sum#79, sum#80]
 
-(132) Exchange
-Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sum#81, sum#82]
-Arguments: hashpartitioning(d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, 5), true, [id=#83]
+(124) Exchange
+Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sum#79, sum#80]
+Arguments: hashpartitioning(d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, 5), ENSURE_REQUIREMENTS, [id=#81]
 
-(133) HashAggregate [codegen id : 53]
-Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sum#81, sum#82]
-Keys [5]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66]
+(125) HashAggregate [codegen id : 49]
+Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sum#79, sum#80]
+Keys [5]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65]
 Functions [2]: [sum(cast(sales_cnt#22 as bigint)), sum(UnscaledValue(sales_amt#23))]
-Aggregate Attributes [2]: [sum(cast(sales_cnt#22 as bigint))#84, sum(UnscaledValue(sales_amt#23))#85]
-Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sum(cast(sales_cnt#22 as bigint))#84 AS sales_cnt#86, MakeDecimal(sum(UnscaledValue(sales_amt#23))#85,18,2) AS sales_amt#87]
+Aggregate Attributes [2]: [sum(cast(sales_cnt#22 as bigint))#82, sum(UnscaledValue(sales_amt#23))#83]
+Results [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sum(cast(sales_cnt#22 as bigint))#82 AS sales_cnt#84, MakeDecimal(sum(UnscaledValue(sales_amt#23))#83,18,2) AS sales_amt#85]
 
-(134) Exchange
-Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#86, sales_amt#87]
-Arguments: hashpartitioning(i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, 5), true, [id=#88]
+(126) Exchange
+Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#84, sales_amt#85]
+Arguments: hashpartitioning(i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, 5), ENSURE_REQUIREMENTS, [id=#86]
 
-(135) Sort [codegen id : 54]
-Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#86, sales_amt#87]
-Arguments: [i_brand_id#63 ASC NULLS FIRST, i_class_id#64 ASC NULLS FIRST, i_category_id#65 ASC NULLS FIRST, i_manufact_id#66 ASC NULLS FIRST], false, 0
+(127) Sort [codegen id : 50]
+Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#84, sales_amt#85]
+Arguments: [i_brand_id#62 ASC NULLS FIRST, i_class_id#63 ASC NULLS FIRST, i_category_id#64 ASC NULLS FIRST, i_manufact_id#65 ASC NULLS FIRST], false, 0
 
-(136) SortMergeJoin [codegen id : 55]
+(128) SortMergeJoin [codegen id : 51]
 Left keys [4]: [i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
-Right keys [4]: [i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66]
-Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#59 as decimal(17,2))) / promote_precision(cast(sales_cnt#86 as decimal(17,2)))), DecimalType(37,20), true) < 0.90000000000000000000)
+Right keys [4]: [i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65]
+Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#58 as decimal(17,2))) / promote_precision(cast(sales_cnt#84 as decimal(17,2)))), DecimalType(37,20), true) < 0.90000000000000000000)
 
-(137) Project [codegen id : 55]
-Output [10]: [d_year#68 AS prev_year#89, d_year#14 AS year#90, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#86 AS prev_yr_cnt#91, sales_cnt#59 AS curr_yr_cnt#92, (sales_cnt#59 - sales_cnt#86) AS sales_cnt_diff#93, CheckOverflow((promote_precision(cast(sales_amt#60 as decimal(19,2))) - promote_precision(cast(sales_amt#87 as decimal(19,2)))), DecimalType(19,2), true) AS sales_amt_diff#94]
-Input [14]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#59, sales_amt#60, d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#86, sales_amt#87]
+(129) Project [codegen id : 51]
+Output [10]: [d_year#67 AS prev_year#87, d_year#14 AS year#88, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#84 AS prev_yr_cnt#89, sales_cnt#58 AS curr_yr_cnt#90, (sales_cnt#58 - sales_cnt#84) AS sales_cnt_diff#91, CheckOverflow((promote_precision(cast(sales_amt#59 as decimal(19,2))) - promote_precision(cast(sales_amt#85 as decimal(19,2)))), DecimalType(19,2), true) AS sales_amt_diff#92]
+Input [14]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#58, sales_amt#59, d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#84, sales_amt#85]
 
-(138) TakeOrderedAndProject
-Input [10]: [prev_year#89, year#90, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#91, curr_yr_cnt#92, sales_cnt_diff#93, sales_amt_diff#94]
-Arguments: 100, [sales_cnt_diff#93 ASC NULLS FIRST], [prev_year#89, year#90, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#91, curr_yr_cnt#92, sales_cnt_diff#93, sales_amt_diff#94]
+(130) TakeOrderedAndProject
+Input [10]: [prev_year#87, year#88, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#89, curr_yr_cnt#90, sales_cnt_diff#91, sales_amt_diff#92]
+Arguments: 100, [sales_cnt_diff#91 ASC NULLS FIRST], [prev_year#87, year#88, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#89, curr_yr_cnt#90, sales_cnt_diff#91, sales_amt_diff#92]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/simplified.txt
index d8d1a3976559d..bac8f252c2983 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/simplified.txt
@@ -1,113 +1,105 @@
 TakeOrderedAndProject [sales_cnt_diff,prev_year,year,i_brand_id,i_class_id,i_category_id,i_manufact_id,prev_yr_cnt,curr_yr_cnt,sales_amt_diff]
-  WholeStageCodegen (55)
+  WholeStageCodegen (51)
     Project [d_year,d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt,sales_amt,sales_amt]
       SortMergeJoin [i_brand_id,i_class_id,i_category_id,i_manufact_id,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt]
         InputAdapter
-          WholeStageCodegen (27)
+          WholeStageCodegen (25)
             Sort [i_brand_id,i_class_id,i_category_id,i_manufact_id]
               InputAdapter
                 Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #1
-                  WholeStageCodegen (26)
+                  WholeStageCodegen (24)
                     HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(cast(sales_cnt as bigint)),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum]
                       InputAdapter
                         Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #2
-                          WholeStageCodegen (25)
+                          WholeStageCodegen (23)
                             HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum]
                               HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
                                 InputAdapter
                                   Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #3
-                                    WholeStageCodegen (24)
+                                    WholeStageCodegen (22)
                                       HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
                                         InputAdapter
                                           Union
-                                            WholeStageCodegen (16)
-                                              HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
+                                            WholeStageCodegen (7)
+                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
                                                 InputAdapter
-                                                  Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #4
-                                                    WholeStageCodegen (15)
-                                                      HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
+                                                  SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
+                                                    WholeStageCodegen (4)
+                                                      Sort [cs_order_number,cs_item_sk]
                                                         InputAdapter
-                                                          Union
-                                                            WholeStageCodegen (7)
-                                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
-                                                                InputAdapter
-                                                                  SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
-                                                                    WholeStageCodegen (4)
-                                                                      Sort [cs_order_number,cs_item_sk]
-                                                                        InputAdapter
-                                                                          Exchange [cs_order_number,cs_item_sk] #5
-                                                                            WholeStageCodegen (3)
-                                                                              Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                                BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                                  Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                                    BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                                      Filter [cs_item_sk,cs_sold_date_sk]
-                                                                                        ColumnarToRow
-                                                                                          InputAdapter
-                                                                                            Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price]
-                                                                                      InputAdapter
-                                                                                        BroadcastExchange #6
-                                                                                          WholeStageCodegen (1)
-                                                                                            Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                                              Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                                                ColumnarToRow
-                                                                                                  InputAdapter
-                                                                                                    Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id]
-                                                                                  InputAdapter
-                                                                                    BroadcastExchange #7
-                                                                                      WholeStageCodegen (2)
-                                                                                        Filter [d_year,d_date_sk]
-                                                                                          ColumnarToRow
-                                                                                            InputAdapter
-                                                                                              Scan parquet default.date_dim [d_date_sk,d_year]
-                                                                    WholeStageCodegen (6)
-                                                                      Sort [cr_order_number,cr_item_sk]
-                                                                        InputAdapter
-                                                                          Exchange [cr_order_number,cr_item_sk] #8
-                                                                            WholeStageCodegen (5)
-                                                                              Filter [cr_order_number,cr_item_sk]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount]
-                                                            WholeStageCodegen (14)
-                                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
-                                                                InputAdapter
-                                                                  SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
-                                                                    WholeStageCodegen (11)
-                                                                      Sort [ss_ticket_number,ss_item_sk]
-                                                                        InputAdapter
-                                                                          Exchange [ss_ticket_number,ss_item_sk] #9
-                                                                            WholeStageCodegen (10)
-                                                                              Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                  Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                                      Filter [ss_item_sk,ss_sold_date_sk]
-                                                                                        ColumnarToRow
-                                                                                          InputAdapter
-                                                                                            Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price]
-                                                                                      InputAdapter
-                                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
-                                                                                  InputAdapter
-                                                                                    ReusedExchange [d_date_sk,d_year] #7
-                                                                    WholeStageCodegen (13)
-                                                                      Sort [sr_ticket_number,sr_item_sk]
-                                                                        InputAdapter
-                                                                          Exchange [sr_ticket_number,sr_item_sk] #10
-                                                                            WholeStageCodegen (12)
-                                                                              Filter [sr_ticket_number,sr_item_sk]
+                                                          Exchange [cs_order_number,cs_item_sk] #4
+                                                            WholeStageCodegen (3)
+                                                              Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                  Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                    BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                                      Filter [cs_item_sk,cs_sold_date_sk]
+                                                                        ColumnarToRow
+                                                                          InputAdapter
+                                                                            Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price]
+                                                                      InputAdapter
+                                                                        BroadcastExchange #5
+                                                                          WholeStageCodegen (1)
+                                                                            Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                              Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt]
-                                            WholeStageCodegen (23)
+                                                                                    Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id]
+                                                                  InputAdapter
+                                                                    BroadcastExchange #6
+                                                                      WholeStageCodegen (2)
+                                                                        Filter [d_year,d_date_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet default.date_dim [d_date_sk,d_year]
+                                                    WholeStageCodegen (6)
+                                                      Sort [cr_order_number,cr_item_sk]
+                                                        InputAdapter
+                                                          Exchange [cr_order_number,cr_item_sk] #7
+                                                            WholeStageCodegen (5)
+                                                              Filter [cr_order_number,cr_item_sk]
+                                                                ColumnarToRow
+                                                                  InputAdapter
+                                                                    Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount]
+                                            WholeStageCodegen (14)
+                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
+                                                InputAdapter
+                                                  SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
+                                                    WholeStageCodegen (11)
+                                                      Sort [ss_ticket_number,ss_item_sk]
+                                                        InputAdapter
+                                                          Exchange [ss_ticket_number,ss_item_sk] #8
+                                                            WholeStageCodegen (10)
+                                                              Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                  Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                                      Filter [ss_item_sk,ss_sold_date_sk]
+                                                                        ColumnarToRow
+                                                                          InputAdapter
+                                                                            Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price]
+                                                                      InputAdapter
+                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5
+                                                                  InputAdapter
+                                                                    ReusedExchange [d_date_sk,d_year] #6
+                                                    WholeStageCodegen (13)
+                                                      Sort [sr_ticket_number,sr_item_sk]
+                                                        InputAdapter
+                                                          Exchange [sr_ticket_number,sr_item_sk] #9
+                                                            WholeStageCodegen (12)
+                                                              Filter [sr_ticket_number,sr_item_sk]
+                                                                ColumnarToRow
+                                                                  InputAdapter
+                                                                    Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt]
+                                            WholeStageCodegen (21)
                                               Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt]
                                                 InputAdapter
                                                   SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk]
-                                                    WholeStageCodegen (20)
+                                                    WholeStageCodegen (18)
                                                       Sort [ws_order_number,ws_item_sk]
                                                         InputAdapter
-                                                          Exchange [ws_order_number,ws_item_sk] #11
-                                                            WholeStageCodegen (19)
+                                                          Exchange [ws_order_number,ws_item_sk] #10
+                                                            WholeStageCodegen (17)
                                                               Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
                                                                 BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                                                   Project [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
@@ -117,108 +109,100 @@ TakeOrderedAndProject [sales_cnt_diff,prev_year,year,i_brand_id,i_class_id,i_cat
                                                                           InputAdapter
                                                                             Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price]
                                                                       InputAdapter
-                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
+                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5
                                                                   InputAdapter
-                                                                    ReusedExchange [d_date_sk,d_year] #7
-                                                    WholeStageCodegen (22)
+                                                                    ReusedExchange [d_date_sk,d_year] #6
+                                                    WholeStageCodegen (20)
                                                       Sort [wr_order_number,wr_item_sk]
                                                         InputAdapter
-                                                          Exchange [wr_order_number,wr_item_sk] #12
-                                                            WholeStageCodegen (21)
+                                                          Exchange [wr_order_number,wr_item_sk] #11
+                                                            WholeStageCodegen (19)
                                                               Filter [wr_order_number,wr_item_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
                                                                     Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt]
         InputAdapter
-          WholeStageCodegen (54)
+          WholeStageCodegen (50)
             Sort [i_brand_id,i_class_id,i_category_id,i_manufact_id]
               InputAdapter
-                Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #13
-                  WholeStageCodegen (53)
+                Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #12
+                  WholeStageCodegen (49)
                     HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(cast(sales_cnt as bigint)),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum]
                       InputAdapter
-                        Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #14
-                          WholeStageCodegen (52)
+                        Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #13
+                          WholeStageCodegen (48)
                             HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum]
                               HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
                                 InputAdapter
-                                  Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #15
-                                    WholeStageCodegen (51)
+                                  Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #14
+                                    WholeStageCodegen (47)
                                       HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
                                         InputAdapter
                                           Union
-                                            WholeStageCodegen (43)
-                                              HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
+                                            WholeStageCodegen (32)
+                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
                                                 InputAdapter
-                                                  Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #16
-                                                    WholeStageCodegen (42)
-                                                      HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
+                                                  SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
+                                                    WholeStageCodegen (29)
+                                                      Sort [cs_order_number,cs_item_sk]
                                                         InputAdapter
-                                                          Union
-                                                            WholeStageCodegen (34)
-                                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
-                                                                InputAdapter
-                                                                  SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
-                                                                    WholeStageCodegen (31)
-                                                                      Sort [cs_order_number,cs_item_sk]
-                                                                        InputAdapter
-                                                                          Exchange [cs_order_number,cs_item_sk] #17
-                                                                            WholeStageCodegen (30)
-                                                                              Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                                BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                                  Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                                    BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                                      Filter [cs_item_sk,cs_sold_date_sk]
-                                                                                        ColumnarToRow
-                                                                                          InputAdapter
-                                                                                            Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price]
-                                                                                      InputAdapter
-                                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
-                                                                                  InputAdapter
-                                                                                    BroadcastExchange #18
-                                                                                      WholeStageCodegen (29)
-                                                                                        Filter [d_year,d_date_sk]
-                                                                                          ColumnarToRow
-                                                                                            InputAdapter
-                                                                                              Scan parquet default.date_dim [d_date_sk,d_year]
-                                                                    WholeStageCodegen (33)
-                                                                      Sort [cr_order_number,cr_item_sk]
-                                                                        InputAdapter
-                                                                          ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #8
-                                                            WholeStageCodegen (41)
-                                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
-                                                                InputAdapter
-                                                                  SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
-                                                                    WholeStageCodegen (38)
-                                                                      Sort [ss_ticket_number,ss_item_sk]
-                                                                        InputAdapter
-                                                                          Exchange [ss_ticket_number,ss_item_sk] #19
-                                                                            WholeStageCodegen (37)
-                                                                              Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                  Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                                      Filter [ss_item_sk,ss_sold_date_sk]
-                                                                                        ColumnarToRow
-                                                                                          InputAdapter
-                                                                                            Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price]
-                                                                                      InputAdapter
-                                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
-                                                                                  InputAdapter
-                                                                                    ReusedExchange [d_date_sk,d_year] #18
-                                                                    WholeStageCodegen (40)
-                                                                      Sort [sr_ticket_number,sr_item_sk]
-                                                                        InputAdapter
-                                                                          ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #10
-                                            WholeStageCodegen (50)
+                                                          Exchange [cs_order_number,cs_item_sk] #15
+                                                            WholeStageCodegen (28)
+                                                              Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                  Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                    BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                                      Filter [cs_item_sk,cs_sold_date_sk]
+                                                                        ColumnarToRow
+                                                                          InputAdapter
+                                                                            Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price]
+                                                                      InputAdapter
+                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5
+                                                                  InputAdapter
+                                                                    BroadcastExchange #16
+                                                                      WholeStageCodegen (27)
+                                                                        Filter [d_year,d_date_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet default.date_dim [d_date_sk,d_year]
+                                                    WholeStageCodegen (31)
+                                                      Sort [cr_order_number,cr_item_sk]
+                                                        InputAdapter
+                                                          ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #7
+                                            WholeStageCodegen (39)
+                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
+                                                InputAdapter
+                                                  SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
+                                                    WholeStageCodegen (36)
+                                                      Sort [ss_ticket_number,ss_item_sk]
+                                                        InputAdapter
+                                                          Exchange [ss_ticket_number,ss_item_sk] #17
+                                                            WholeStageCodegen (35)
+                                                              Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                  Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                                      Filter [ss_item_sk,ss_sold_date_sk]
+                                                                        ColumnarToRow
+                                                                          InputAdapter
+                                                                            Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price]
+                                                                      InputAdapter
+                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5
+                                                                  InputAdapter
+                                                                    ReusedExchange [d_date_sk,d_year] #16
+                                                    WholeStageCodegen (38)
+                                                      Sort [sr_ticket_number,sr_item_sk]
+                                                        InputAdapter
+                                                          ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #9
+                                            WholeStageCodegen (46)
                                               Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt]
                                                 InputAdapter
                                                   SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk]
-                                                    WholeStageCodegen (47)
+                                                    WholeStageCodegen (43)
                                                       Sort [ws_order_number,ws_item_sk]
                                                         InputAdapter
-                                                          Exchange [ws_order_number,ws_item_sk] #20
-                                                            WholeStageCodegen (46)
+                                                          Exchange [ws_order_number,ws_item_sk] #18
+                                                            WholeStageCodegen (42)
                                                               Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
                                                                 BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                                                   Project [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
@@ -228,10 +212,10 @@ TakeOrderedAndProject [sales_cnt_diff,prev_year,year,i_brand_id,i_class_id,i_cat
                                                                           InputAdapter
                                                                             Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price]
                                                                       InputAdapter
-                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
+                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5
                                                                   InputAdapter
-                                                                    ReusedExchange [d_date_sk,d_year] #18
-                                                    WholeStageCodegen (49)
+                                                                    ReusedExchange [d_date_sk,d_year] #16
+                                                    WholeStageCodegen (45)
                                                       Sort [wr_order_number,wr_item_sk]
                                                         InputAdapter
-                                                          ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #12
+                                                          ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #11
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt
index 292a44930ed3d..3d52a795bb44e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt
@@ -1,121 +1,113 @@
 == Physical Plan ==
-TakeOrderedAndProject (117)
-+- * Project (116)
-   +- * BroadcastHashJoin Inner BuildRight (115)
-      :- * HashAggregate (63)
-      :  +- Exchange (62)
-      :     +- * HashAggregate (61)
-      :        +- * HashAggregate (60)
-      :           +- Exchange (59)
-      :              +- * HashAggregate (58)
-      :                 +- Union (57)
-      :                    :- * HashAggregate (41)
-      :                    :  +- Exchange (40)
-      :                    :     +- * HashAggregate (39)
-      :                    :        +- Union (38)
-      :                    :           :- * Project (22)
-      :                    :           :  +- * BroadcastHashJoin LeftOuter BuildRight (21)
-      :                    :           :     :- * Project (16)
-      :                    :           :     :  +- * BroadcastHashJoin Inner BuildRight (15)
-      :                    :           :     :     :- * Project (10)
-      :                    :           :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
-      :                    :           :     :     :     :- * Filter (3)
-      :                    :           :     :     :     :  +- * ColumnarToRow (2)
-      :                    :           :     :     :     :     +- Scan parquet default.catalog_sales (1)
-      :                    :           :     :     :     +- BroadcastExchange (8)
-      :                    :           :     :     :        +- * Project (7)
-      :                    :           :     :     :           +- * Filter (6)
-      :                    :           :     :     :              +- * ColumnarToRow (5)
-      :                    :           :     :     :                 +- Scan parquet default.item (4)
-      :                    :           :     :     +- BroadcastExchange (14)
-      :                    :           :     :        +- * Filter (13)
-      :                    :           :     :           +- * ColumnarToRow (12)
-      :                    :           :     :              +- Scan parquet default.date_dim (11)
-      :                    :           :     +- BroadcastExchange (20)
-      :                    :           :        +- * Filter (19)
-      :                    :           :           +- * ColumnarToRow (18)
-      :                    :           :              +- Scan parquet default.catalog_returns (17)
-      :                    :           +- * Project (37)
-      :                    :              +- * BroadcastHashJoin LeftOuter BuildRight (36)
-      :                    :                 :- * Project (31)
-      :                    :                 :  +- * BroadcastHashJoin Inner BuildRight (30)
-      :                    :                 :     :- * Project (28)
-      :                    :                 :     :  +- * BroadcastHashJoin Inner BuildRight (27)
-      :                    :                 :     :     :- * Filter (25)
-      :                    :                 :     :     :  +- * ColumnarToRow (24)
-      :                    :                 :     :     :     +- Scan parquet default.store_sales (23)
-      :                    :                 :     :     +- ReusedExchange (26)
-      :                    :                 :     +- ReusedExchange (29)
-      :                    :                 +- BroadcastExchange (35)
-      :                    :                    +- * Filter (34)
-      :                    :                       +- * ColumnarToRow (33)
-      :                    :                          +- Scan parquet default.store_returns (32)
-      :                    +- * Project (56)
-      :                       +- * BroadcastHashJoin LeftOuter BuildRight (55)
-      :                          :- * Project (50)
-      :                          :  +- * BroadcastHashJoin Inner BuildRight (49)
-      :                          :     :- * Project (47)
-      :                          :     :  +- * BroadcastHashJoin Inner BuildRight (46)
-      :                          :     :     :- * Filter (44)
-      :                          :     :     :  +- * ColumnarToRow (43)
-      :                          :     :     :     +- Scan parquet default.web_sales (42)
-      :                          :     :     +- ReusedExchange (45)
-      :                          :     +- ReusedExchange (48)
-      :                          +- BroadcastExchange (54)
-      :                             +- * Filter (53)
-      :                                +- * ColumnarToRow (52)
-      :                                   +- Scan parquet default.web_returns (51)
-      +- BroadcastExchange (114)
-         +- * HashAggregate (113)
-            +- Exchange (112)
-               +- * HashAggregate (111)
-                  +- * HashAggregate (110)
-                     +- Exchange (109)
-                        +- * HashAggregate (108)
-                           +- Union (107)
-                              :- * HashAggregate (94)
-                              :  +- Exchange (93)
-                              :     +- * HashAggregate (92)
-                              :        +- Union (91)
-                              :           :- * Project (78)
-                              :           :  +- * BroadcastHashJoin LeftOuter BuildRight (77)
-                              :           :     :- * Project (75)
-                              :           :     :  +- * BroadcastHashJoin Inner BuildRight (74)
-                              :           :     :     :- * Project (69)
-                              :           :     :     :  +- * BroadcastHashJoin Inner BuildRight (68)
-                              :           :     :     :     :- * Filter (66)
-                              :           :     :     :     :  +- * ColumnarToRow (65)
-                              :           :     :     :     :     +- Scan parquet default.catalog_sales (64)
-                              :           :     :     :     +- ReusedExchange (67)
-                              :           :     :     +- BroadcastExchange (73)
-                              :           :     :        +- * Filter (72)
-                              :           :     :           +- * ColumnarToRow (71)
-                              :           :     :              +- Scan parquet default.date_dim (70)
-                              :           :     +- ReusedExchange (76)
-                              :           +- * Project (90)
-                              :              +- * BroadcastHashJoin LeftOuter BuildRight (89)
-                              :                 :- * Project (87)
-                              :                 :  +- * BroadcastHashJoin Inner BuildRight (86)
-                              :                 :     :- * Project (84)
-                              :                 :     :  +- * BroadcastHashJoin Inner BuildRight (83)
-                              :                 :     :     :- * Filter (81)
-                              :                 :     :     :  +- * ColumnarToRow (80)
-                              :                 :     :     :     +- Scan parquet default.store_sales (79)
-                              :                 :     :     +- ReusedExchange (82)
-                              :                 :     +- ReusedExchange (85)
-                              :                 +- ReusedExchange (88)
-                              +- * Project (106)
-                                 +- * BroadcastHashJoin LeftOuter BuildRight (105)
-                                    :- * Project (103)
-                                    :  +- * BroadcastHashJoin Inner BuildRight (102)
-                                    :     :- * Project (100)
-                                    :     :  +- * BroadcastHashJoin Inner BuildRight (99)
-                                    :     :     :- * Filter (97)
-                                    :     :     :  +- * ColumnarToRow (96)
-                                    :     :     :     +- Scan parquet default.web_sales (95)
-                                    :     :     +- ReusedExchange (98)
-                                    :     +- ReusedExchange (101)
-                                    +- ReusedExchange (104)
+TakeOrderedAndProject (109)
++- * Project (108)
+   +- * BroadcastHashJoin Inner BuildRight (107)
+      :- * HashAggregate (59)
+      :  +- Exchange (58)
+      :     +- * HashAggregate (57)
+      :        +- * HashAggregate (56)
+      :           +- Exchange (55)
+      :              +- * HashAggregate (54)
+      :                 +- Union (53)
+      :                    :- * Project (22)
+      :                    :  +- * BroadcastHashJoin LeftOuter BuildRight (21)
+      :                    :     :- * Project (16)
+      :                    :     :  +- * BroadcastHashJoin Inner BuildRight (15)
+      :                    :     :     :- * Project (10)
+      :                    :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
+      :                    :     :     :     :- * Filter (3)
+      :                    :     :     :     :  +- * ColumnarToRow (2)
+      :                    :     :     :     :     +- Scan parquet default.catalog_sales (1)
+      :                    :     :     :     +- BroadcastExchange (8)
+      :                    :     :     :        +- * Project (7)
+      :                    :     :     :           +- * Filter (6)
+      :                    :     :     :              +- * ColumnarToRow (5)
+      :                    :     :     :                 +- Scan parquet default.item (4)
+      :                    :     :     +- BroadcastExchange (14)
+      :                    :     :        +- * Filter (13)
+      :                    :     :           +- * ColumnarToRow (12)
+      :                    :     :              +- Scan parquet default.date_dim (11)
+      :                    :     +- BroadcastExchange (20)
+      :                    :        +- * Filter (19)
+      :                    :           +- * ColumnarToRow (18)
+      :                    :              +- Scan parquet default.catalog_returns (17)
+      :                    :- * Project (37)
+      :                    :  +- * BroadcastHashJoin LeftOuter BuildRight (36)
+      :                    :     :- * Project (31)
+      :                    :     :  +- * BroadcastHashJoin Inner BuildRight (30)
+      :                    :     :     :- * Project (28)
+      :                    :     :     :  +- * BroadcastHashJoin Inner BuildRight (27)
+      :                    :     :     :     :- * Filter (25)
+      :                    :     :     :     :  +- * ColumnarToRow (24)
+      :                    :     :     :     :     +- Scan parquet default.store_sales (23)
+      :                    :     :     :     +- ReusedExchange (26)
+      :                    :     :     +- ReusedExchange (29)
+      :                    :     +- BroadcastExchange (35)
+      :                    :        +- * Filter (34)
+      :                    :           +- * ColumnarToRow (33)
+      :                    :              +- Scan parquet default.store_returns (32)
+      :                    +- * Project (52)
+      :                       +- * BroadcastHashJoin LeftOuter BuildRight (51)
+      :                          :- * Project (46)
+      :                          :  +- * BroadcastHashJoin Inner BuildRight (45)
+      :                          :     :- * Project (43)
+      :                          :     :  +- * BroadcastHashJoin Inner BuildRight (42)
+      :                          :     :     :- * Filter (40)
+      :                          :     :     :  +- * ColumnarToRow (39)
+      :                          :     :     :     +- Scan parquet default.web_sales (38)
+      :                          :     :     +- ReusedExchange (41)
+      :                          :     +- ReusedExchange (44)
+      :                          +- BroadcastExchange (50)
+      :                             +- * Filter (49)
+      :                                +- * ColumnarToRow (48)
+      :                                   +- Scan parquet default.web_returns (47)
+      +- BroadcastExchange (106)
+         +- * HashAggregate (105)
+            +- Exchange (104)
+               +- * HashAggregate (103)
+                  +- * HashAggregate (102)
+                     +- Exchange (101)
+                        +- * HashAggregate (100)
+                           +- Union (99)
+                              :- * Project (74)
+                              :  +- * BroadcastHashJoin LeftOuter BuildRight (73)
+                              :     :- * Project (71)
+                              :     :  +- * BroadcastHashJoin Inner BuildRight (70)
+                              :     :     :- * Project (65)
+                              :     :     :  +- * BroadcastHashJoin Inner BuildRight (64)
+                              :     :     :     :- * Filter (62)
+                              :     :     :     :  +- * ColumnarToRow (61)
+                              :     :     :     :     +- Scan parquet default.catalog_sales (60)
+                              :     :     :     +- ReusedExchange (63)
+                              :     :     +- BroadcastExchange (69)
+                              :     :        +- * Filter (68)
+                              :     :           +- * ColumnarToRow (67)
+                              :     :              +- Scan parquet default.date_dim (66)
+                              :     +- ReusedExchange (72)
+                              :- * Project (86)
+                              :  +- * BroadcastHashJoin LeftOuter BuildRight (85)
+                              :     :- * Project (83)
+                              :     :  +- * BroadcastHashJoin Inner BuildRight (82)
+                              :     :     :- * Project (80)
+                              :     :     :  +- * BroadcastHashJoin Inner BuildRight (79)
+                              :     :     :     :- * Filter (77)
+                              :     :     :     :  +- * ColumnarToRow (76)
+                              :     :     :     :     +- Scan parquet default.store_sales (75)
+                              :     :     :     +- ReusedExchange (78)
+                              :     :     +- ReusedExchange (81)
+                              :     +- ReusedExchange (84)
+                              +- * Project (98)
+                                 +- * BroadcastHashJoin LeftOuter BuildRight (97)
+                                    :- * Project (95)
+                                    :  +- * BroadcastHashJoin Inner BuildRight (94)
+                                    :     :- * Project (92)
+                                    :     :  +- * BroadcastHashJoin Inner BuildRight (91)
+                                    :     :     :- * Filter (89)
+                                    :     :     :  +- * ColumnarToRow (88)
+                                    :     :     :     +- Scan parquet default.web_sales (87)
+                                    :     :     +- ReusedExchange (90)
+                                    :     +- ReusedExchange (93)
+                                    +- ReusedExchange (96)
 
 
 (1) Scan parquet default.catalog_sales
@@ -282,366 +274,326 @@ Join condition: None
 Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ss_quantity#26 - coalesce(sr_return_quantity#30, 0)) AS sales_cnt#33, CheckOverflow((promote_precision(cast(ss_ext_sales_price#27 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#31, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#34]
 Input [13]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31]
 
-(38) Union
-
-(39) HashAggregate [codegen id : 9]
-Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
-Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
-Functions: []
-Aggregate Attributes: []
-Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
-
-(40) Exchange
-Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
-Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22, 5), true, [id=#35]
-
-(41) HashAggregate [codegen id : 10]
-Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
-Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
-Functions: []
-Aggregate Attributes: []
-Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
-
-(42) Scan parquet default.web_sales
-Output [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40]
+(38) Scan parquet default.web_sales
+Output [5]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
 PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)]
 ReadSchema: struct<ws_sold_date_sk:int,ws_item_sk:int,ws_order_number:int,ws_quantity:int,ws_ext_sales_price:decimal(7,2)>
 
-(43) ColumnarToRow [codegen id : 14]
-Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40]
+(39) ColumnarToRow [codegen id : 12]
+Input [5]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39]
 
-(44) Filter [codegen id : 14]
-Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40]
-Condition : (isnotnull(ws_item_sk#37) AND isnotnull(ws_sold_date_sk#36))
+(40) Filter [codegen id : 12]
+Input [5]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39]
+Condition : (isnotnull(ws_item_sk#36) AND isnotnull(ws_sold_date_sk#35))
 
-(45) ReusedExchange [Reuses operator id: 8]
+(41) ReusedExchange [Reuses operator id: 8]
 Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
 
-(46) BroadcastHashJoin [codegen id : 14]
-Left keys [1]: [ws_item_sk#37]
+(42) BroadcastHashJoin [codegen id : 12]
+Left keys [1]: [ws_item_sk#36]
 Right keys [1]: [i_item_sk#6]
 Join condition: None
 
-(47) Project [codegen id : 14]
-Output [9]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
-Input [10]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
+(43) Project [codegen id : 12]
+Output [9]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
+Input [10]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
 
-(48) ReusedExchange [Reuses operator id: 14]
+(44) ReusedExchange [Reuses operator id: 14]
 Output [2]: [d_date_sk#13, d_year#14]
 
-(49) BroadcastHashJoin [codegen id : 14]
-Left keys [1]: [ws_sold_date_sk#36]
+(45) BroadcastHashJoin [codegen id : 12]
+Left keys [1]: [ws_sold_date_sk#35]
 Right keys [1]: [d_date_sk#13]
 Join condition: None
 
-(50) Project [codegen id : 14]
-Output [9]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14]
-Input [11]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14]
+(46) Project [codegen id : 12]
+Output [9]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14]
+Input [11]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14]
 
-(51) Scan parquet default.web_returns
-Output [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44]
+(47) Scan parquet default.web_returns
+Output [4]: [wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
 PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)]
 ReadSchema: struct<wr_item_sk:bigint,wr_order_number:bigint,wr_return_quantity:int,wr_return_amt:decimal(7,2)>
 
-(52) ColumnarToRow [codegen id : 13]
-Input [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44]
+(48) ColumnarToRow [codegen id : 11]
+Input [4]: [wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43]
 
-(53) Filter [codegen id : 13]
-Input [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44]
-Condition : (isnotnull(wr_order_number#42) AND isnotnull(wr_item_sk#41))
+(49) Filter [codegen id : 11]
+Input [4]: [wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43]
+Condition : (isnotnull(wr_order_number#41) AND isnotnull(wr_item_sk#40))
 
-(54) BroadcastExchange
-Input [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44]
-Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#45]
+(50) BroadcastExchange
+Input [4]: [wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43]
+Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#44]
 
-(55) BroadcastHashJoin [codegen id : 14]
-Left keys [2]: [cast(ws_order_number#38 as bigint), cast(ws_item_sk#37 as bigint)]
-Right keys [2]: [wr_order_number#42, wr_item_sk#41]
+(51) BroadcastHashJoin [codegen id : 12]
+Left keys [2]: [cast(ws_order_number#37 as bigint), cast(ws_item_sk#36 as bigint)]
+Right keys [2]: [wr_order_number#41, wr_item_sk#40]
 Join condition: None
 
-(56) Project [codegen id : 14]
-Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ws_quantity#39 - coalesce(wr_return_quantity#43, 0)) AS sales_cnt#46, CheckOverflow((promote_precision(cast(ws_ext_sales_price#40 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#44, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#47]
-Input [13]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44]
+(52) Project [codegen id : 12]
+Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ws_quantity#38 - coalesce(wr_return_quantity#42, 0)) AS sales_cnt#45, CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#43, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#46]
+Input [13]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43]
 
-(57) Union
+(53) Union
 
-(58) HashAggregate [codegen id : 15]
+(54) HashAggregate [codegen id : 13]
 Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
 Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
 Functions: []
 Aggregate Attributes: []
 Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
 
-(59) Exchange
+(55) Exchange
 Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
-Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22, 5), true, [id=#48]
+Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22, 5), ENSURE_REQUIREMENTS, [id=#47]
 
-(60) HashAggregate [codegen id : 16]
+(56) HashAggregate [codegen id : 14]
 Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
 Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
 Functions: []
 Aggregate Attributes: []
 Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
 
-(61) HashAggregate [codegen id : 16]
+(57) HashAggregate [codegen id : 14]
 Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
 Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
 Functions [2]: [partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))]
-Aggregate Attributes [2]: [sum#49, sum#50]
-Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52]
+Aggregate Attributes [2]: [sum#48, sum#49]
+Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#50, sum#51]
 
-(62) Exchange
-Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52]
-Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), true, [id=#53]
+(58) Exchange
+Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#50, sum#51]
+Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [id=#52]
 
-(63) HashAggregate [codegen id : 34]
-Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52]
+(59) HashAggregate [codegen id : 30]
+Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#50, sum#51]
 Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
 Functions [2]: [sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))]
-Aggregate Attributes [2]: [sum(cast(sales_cnt#21 as bigint))#54, sum(UnscaledValue(sales_amt#22))#55]
-Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum(cast(sales_cnt#21 as bigint))#54 AS sales_cnt#56, MakeDecimal(sum(UnscaledValue(sales_amt#22))#55,18,2) AS sales_amt#57]
+Aggregate Attributes [2]: [sum(cast(sales_cnt#21 as bigint))#53, sum(UnscaledValue(sales_amt#22))#54]
+Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum(cast(sales_cnt#21 as bigint))#53 AS sales_cnt#55, MakeDecimal(sum(UnscaledValue(sales_amt#22))#54,18,2) AS sales_amt#56]
 
-(64) Scan parquet default.catalog_sales
+(60) Scan parquet default.catalog_sales
 Output [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_sales]
 PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)]
 ReadSchema: struct<cs_sold_date_sk:int,cs_item_sk:int,cs_order_number:int,cs_quantity:int,cs_ext_sales_price:decimal(7,2)>
 
-(65) ColumnarToRow [codegen id : 20]
+(61) ColumnarToRow [codegen id : 18]
 Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5]
 
-(66) Filter [codegen id : 20]
+(62) Filter [codegen id : 18]
 Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5]
 Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_sold_date_sk#1))
 
-(67) ReusedExchange [Reuses operator id: 8]
-Output [5]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62]
+(63) ReusedExchange [Reuses operator id: 8]
+Output [5]: [i_item_sk#57, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61]
 
-(68) BroadcastHashJoin [codegen id : 20]
+(64) BroadcastHashJoin [codegen id : 18]
 Left keys [1]: [cs_item_sk#2]
-Right keys [1]: [i_item_sk#58]
+Right keys [1]: [i_item_sk#57]
 Join condition: None
 
-(69) Project [codegen id : 20]
-Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62]
-Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62]
+(65) Project [codegen id : 18]
+Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61]
+Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#57, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61]
 
-(70) Scan parquet default.date_dim
-Output [2]: [d_date_sk#63, d_year#64]
+(66) Scan parquet default.date_dim
+Output [2]: [d_date_sk#62, d_year#63]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(71) ColumnarToRow [codegen id : 18]
-Input [2]: [d_date_sk#63, d_year#64]
+(67) ColumnarToRow [codegen id : 16]
+Input [2]: [d_date_sk#62, d_year#63]
 
-(72) Filter [codegen id : 18]
-Input [2]: [d_date_sk#63, d_year#64]
-Condition : ((isnotnull(d_year#64) AND (d_year#64 = 2001)) AND isnotnull(d_date_sk#63))
+(68) Filter [codegen id : 16]
+Input [2]: [d_date_sk#62, d_year#63]
+Condition : ((isnotnull(d_year#63) AND (d_year#63 = 2001)) AND isnotnull(d_date_sk#62))
 
-(73) BroadcastExchange
-Input [2]: [d_date_sk#63, d_year#64]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#65]
+(69) BroadcastExchange
+Input [2]: [d_date_sk#62, d_year#63]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#64]
 
-(74) BroadcastHashJoin [codegen id : 20]
+(70) BroadcastHashJoin [codegen id : 18]
 Left keys [1]: [cs_sold_date_sk#1]
-Right keys [1]: [d_date_sk#63]
+Right keys [1]: [d_date_sk#62]
 Join condition: None
 
-(75) Project [codegen id : 20]
-Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64]
-Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_date_sk#63, d_year#64]
+(71) Project [codegen id : 18]
+Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_year#63]
+Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_date_sk#62, d_year#63]
 
-(76) ReusedExchange [Reuses operator id: 20]
+(72) ReusedExchange [Reuses operator id: 20]
 Output [4]: [cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19]
 
-(77) BroadcastHashJoin [codegen id : 20]
+(73) BroadcastHashJoin [codegen id : 18]
 Left keys [2]: [cs_order_number#3, cs_item_sk#2]
 Right keys [2]: [cr_order_number#17, cr_item_sk#16]
 Join condition: None
 
-(78) Project [codegen id : 20]
-Output [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, (cs_quantity#4 - coalesce(cr_return_quantity#18, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#19, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#22]
-Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64, cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19]
+(74) Project [codegen id : 18]
+Output [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, (cs_quantity#4 - coalesce(cr_return_quantity#18, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#19, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#22]
+Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_year#63, cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19]
 
-(79) Scan parquet default.store_sales
+(75) Scan parquet default.store_sales
 Output [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
 PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)]
 ReadSchema: struct<ss_sold_date_sk:int,ss_item_sk:int,ss_ticket_number:int,ss_quantity:int,ss_ext_sales_price:decimal(7,2)>
 
-(80) ColumnarToRow [codegen id : 24]
+(76) ColumnarToRow [codegen id : 22]
 Input [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27]
 
-(81) Filter [codegen id : 24]
+(77) Filter [codegen id : 22]
 Input [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27]
 Condition : (isnotnull(ss_item_sk#24) AND isnotnull(ss_sold_date_sk#23))
 
-(82) ReusedExchange [Reuses operator id: 8]
-Output [5]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62]
+(78) ReusedExchange [Reuses operator id: 8]
+Output [5]: [i_item_sk#57, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61]
 
-(83) BroadcastHashJoin [codegen id : 24]
+(79) BroadcastHashJoin [codegen id : 22]
 Left keys [1]: [ss_item_sk#24]
-Right keys [1]: [i_item_sk#58]
+Right keys [1]: [i_item_sk#57]
 Join condition: None
 
-(84) Project [codegen id : 24]
-Output [9]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62]
-Input [10]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62]
+(80) Project [codegen id : 22]
+Output [9]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61]
+Input [10]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_item_sk#57, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61]
 
-(85) ReusedExchange [Reuses operator id: 73]
-Output [2]: [d_date_sk#63, d_year#64]
+(81) ReusedExchange [Reuses operator id: 69]
+Output [2]: [d_date_sk#62, d_year#63]
 
-(86) BroadcastHashJoin [codegen id : 24]
+(82) BroadcastHashJoin [codegen id : 22]
 Left keys [1]: [ss_sold_date_sk#23]
-Right keys [1]: [d_date_sk#63]
+Right keys [1]: [d_date_sk#62]
 Join condition: None
 
-(87) Project [codegen id : 24]
-Output [9]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64]
-Input [11]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_date_sk#63, d_year#64]
+(83) Project [codegen id : 22]
+Output [9]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_year#63]
+Input [11]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_date_sk#62, d_year#63]
 
-(88) ReusedExchange [Reuses operator id: 35]
+(84) ReusedExchange [Reuses operator id: 35]
 Output [4]: [sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31]
 
-(89) BroadcastHashJoin [codegen id : 24]
+(85) BroadcastHashJoin [codegen id : 22]
 Left keys [2]: [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#24 as bigint)]
 Right keys [2]: [sr_ticket_number#29, sr_item_sk#28]
 Join condition: None
 
-(90) Project [codegen id : 24]
-Output [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, (ss_quantity#26 - coalesce(sr_return_quantity#30, 0)) AS sales_cnt#66, CheckOverflow((promote_precision(cast(ss_ext_sales_price#27 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#31, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#67]
-Input [13]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64, sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31]
-
-(91) Union
-
-(92) HashAggregate [codegen id : 25]
-Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
-Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
-Functions: []
-Aggregate Attributes: []
-Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
-
-(93) Exchange
-Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
-Arguments: hashpartitioning(d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22, 5), true, [id=#68]
-
-(94) HashAggregate [codegen id : 26]
-Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
-Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
-Functions: []
-Aggregate Attributes: []
-Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
+(86) Project [codegen id : 22]
+Output [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, (ss_quantity#26 - coalesce(sr_return_quantity#30, 0)) AS sales_cnt#65, CheckOverflow((promote_precision(cast(ss_ext_sales_price#27 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#31, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#66]
+Input [13]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_year#63, sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31]
 
-(95) Scan parquet default.web_sales
-Output [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40]
+(87) Scan parquet default.web_sales
+Output [5]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
 PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)]
 ReadSchema: struct<ws_sold_date_sk:int,ws_item_sk:int,ws_order_number:int,ws_quantity:int,ws_ext_sales_price:decimal(7,2)>
 
-(96) ColumnarToRow [codegen id : 30]
-Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40]
+(88) ColumnarToRow [codegen id : 26]
+Input [5]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39]
 
-(97) Filter [codegen id : 30]
-Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40]
-Condition : (isnotnull(ws_item_sk#37) AND isnotnull(ws_sold_date_sk#36))
+(89) Filter [codegen id : 26]
+Input [5]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39]
+Condition : (isnotnull(ws_item_sk#36) AND isnotnull(ws_sold_date_sk#35))
 
-(98) ReusedExchange [Reuses operator id: 8]
-Output [5]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62]
+(90) ReusedExchange [Reuses operator id: 8]
+Output [5]: [i_item_sk#57, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61]
 
-(99) BroadcastHashJoin [codegen id : 30]
-Left keys [1]: [ws_item_sk#37]
-Right keys [1]: [i_item_sk#58]
+(91) BroadcastHashJoin [codegen id : 26]
+Left keys [1]: [ws_item_sk#36]
+Right keys [1]: [i_item_sk#57]
 Join condition: None
 
-(100) Project [codegen id : 30]
-Output [9]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62]
-Input [10]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62]
+(92) Project [codegen id : 26]
+Output [9]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61]
+Input [10]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_item_sk#57, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61]
 
-(101) ReusedExchange [Reuses operator id: 73]
-Output [2]: [d_date_sk#63, d_year#64]
+(93) ReusedExchange [Reuses operator id: 69]
+Output [2]: [d_date_sk#62, d_year#63]
 
-(102) BroadcastHashJoin [codegen id : 30]
-Left keys [1]: [ws_sold_date_sk#36]
-Right keys [1]: [d_date_sk#63]
+(94) BroadcastHashJoin [codegen id : 26]
+Left keys [1]: [ws_sold_date_sk#35]
+Right keys [1]: [d_date_sk#62]
 Join condition: None
 
-(103) Project [codegen id : 30]
-Output [9]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64]
-Input [11]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_date_sk#63, d_year#64]
+(95) Project [codegen id : 26]
+Output [9]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_year#63]
+Input [11]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_date_sk#62, d_year#63]
 
-(104) ReusedExchange [Reuses operator id: 54]
-Output [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44]
+(96) ReusedExchange [Reuses operator id: 50]
+Output [4]: [wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43]
 
-(105) BroadcastHashJoin [codegen id : 30]
-Left keys [2]: [cast(ws_order_number#38 as bigint), cast(ws_item_sk#37 as bigint)]
-Right keys [2]: [wr_order_number#42, wr_item_sk#41]
+(97) BroadcastHashJoin [codegen id : 26]
+Left keys [2]: [cast(ws_order_number#37 as bigint), cast(ws_item_sk#36 as bigint)]
+Right keys [2]: [wr_order_number#41, wr_item_sk#40]
 Join condition: None
 
-(106) Project [codegen id : 30]
-Output [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, (ws_quantity#39 - coalesce(wr_return_quantity#43, 0)) AS sales_cnt#69, CheckOverflow((promote_precision(cast(ws_ext_sales_price#40 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#44, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#70]
-Input [13]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64, wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44]
+(98) Project [codegen id : 26]
+Output [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, (ws_quantity#38 - coalesce(wr_return_quantity#42, 0)) AS sales_cnt#67, CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#43, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#68]
+Input [13]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_year#63, wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43]
 
-(107) Union
+(99) Union
 
-(108) HashAggregate [codegen id : 31]
-Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
-Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
+(100) HashAggregate [codegen id : 27]
+Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22]
+Keys [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22]
 Functions: []
 Aggregate Attributes: []
-Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
+Results [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22]
 
-(109) Exchange
-Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
-Arguments: hashpartitioning(d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22, 5), true, [id=#71]
+(101) Exchange
+Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22]
+Arguments: hashpartitioning(d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22, 5), ENSURE_REQUIREMENTS, [id=#69]
 
-(110) HashAggregate [codegen id : 32]
-Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
-Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
+(102) HashAggregate [codegen id : 28]
+Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22]
+Keys [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22]
 Functions: []
 Aggregate Attributes: []
-Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
+Results [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22]
 
-(111) HashAggregate [codegen id : 32]
-Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
-Keys [5]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62]
+(103) HashAggregate [codegen id : 28]
+Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22]
+Keys [5]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61]
 Functions [2]: [partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))]
-Aggregate Attributes [2]: [sum#72, sum#73]
-Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum#74, sum#75]
+Aggregate Attributes [2]: [sum#70, sum#71]
+Results [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sum#72, sum#73]
 
-(112) Exchange
-Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum#74, sum#75]
-Arguments: hashpartitioning(d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, 5), true, [id=#76]
+(104) Exchange
+Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sum#72, sum#73]
+Arguments: hashpartitioning(d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, 5), ENSURE_REQUIREMENTS, [id=#74]
 
-(113) HashAggregate [codegen id : 33]
-Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum#74, sum#75]
-Keys [5]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62]
+(105) HashAggregate [codegen id : 29]
+Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sum#72, sum#73]
+Keys [5]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61]
 Functions [2]: [sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))]
-Aggregate Attributes [2]: [sum(cast(sales_cnt#21 as bigint))#77, sum(UnscaledValue(sales_amt#22))#78]
-Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum(cast(sales_cnt#21 as bigint))#77 AS sales_cnt#79, MakeDecimal(sum(UnscaledValue(sales_amt#22))#78,18,2) AS sales_amt#80]
+Aggregate Attributes [2]: [sum(cast(sales_cnt#21 as bigint))#75, sum(UnscaledValue(sales_amt#22))#76]
+Results [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sum(cast(sales_cnt#21 as bigint))#75 AS sales_cnt#77, MakeDecimal(sum(UnscaledValue(sales_amt#22))#76,18,2) AS sales_amt#78]
 
-(114) BroadcastExchange
-Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#79, sales_amt#80]
-Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true], input[4, int, true]),false), [id=#81]
+(106) BroadcastExchange
+Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#77, sales_amt#78]
+Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true], input[4, int, true]),false), [id=#79]
 
-(115) BroadcastHashJoin [codegen id : 34]
+(107) BroadcastHashJoin [codegen id : 30]
 Left keys [4]: [i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
-Right keys [4]: [i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62]
-Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#56 as decimal(17,2))) / promote_precision(cast(sales_cnt#79 as decimal(17,2)))), DecimalType(37,20), true) < 0.90000000000000000000)
+Right keys [4]: [i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61]
+Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#55 as decimal(17,2))) / promote_precision(cast(sales_cnt#77 as decimal(17,2)))), DecimalType(37,20), true) < 0.90000000000000000000)
 
-(116) Project [codegen id : 34]
-Output [10]: [d_year#64 AS prev_year#82, d_year#14 AS year#83, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#79 AS prev_yr_cnt#84, sales_cnt#56 AS curr_yr_cnt#85, (sales_cnt#56 - sales_cnt#79) AS sales_cnt_diff#86, CheckOverflow((promote_precision(cast(sales_amt#57 as decimal(19,2))) - promote_precision(cast(sales_amt#80 as decimal(19,2)))), DecimalType(19,2), true) AS sales_amt_diff#87]
-Input [14]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#56, sales_amt#57, d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#79, sales_amt#80]
+(108) Project [codegen id : 30]
+Output [10]: [d_year#63 AS prev_year#80, d_year#14 AS year#81, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#77 AS prev_yr_cnt#82, sales_cnt#55 AS curr_yr_cnt#83, (sales_cnt#55 - sales_cnt#77) AS sales_cnt_diff#84, CheckOverflow((promote_precision(cast(sales_amt#56 as decimal(19,2))) - promote_precision(cast(sales_amt#78 as decimal(19,2)))), DecimalType(19,2), true) AS sales_amt_diff#85]
+Input [14]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#55, sales_amt#56, d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#77, sales_amt#78]
 
-(117) TakeOrderedAndProject
-Input [10]: [prev_year#82, year#83, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#84, curr_yr_cnt#85, sales_cnt_diff#86, sales_amt_diff#87]
-Arguments: 100, [sales_cnt_diff#86 ASC NULLS FIRST], [prev_year#82, year#83, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#84, curr_yr_cnt#85, sales_cnt_diff#86, sales_amt_diff#87]
+(109) TakeOrderedAndProject
+Input [10]: [prev_year#80, year#81, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#82, curr_yr_cnt#83, sales_cnt_diff#84, sales_amt_diff#85]
+Arguments: 100, [sales_cnt_diff#84 ASC NULLS FIRST], [prev_year#80, year#81, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#82, curr_yr_cnt#83, sales_cnt_diff#84, sales_amt_diff#85]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/simplified.txt
index 298a06b87762f..0eeca93ed7d08 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/simplified.txt
@@ -1,83 +1,75 @@
 TakeOrderedAndProject [sales_cnt_diff,prev_year,year,i_brand_id,i_class_id,i_category_id,i_manufact_id,prev_yr_cnt,curr_yr_cnt,sales_amt_diff]
-  WholeStageCodegen (34)
+  WholeStageCodegen (30)
     Project [d_year,d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt,sales_amt,sales_amt]
       BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_manufact_id,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt]
         HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(cast(sales_cnt as bigint)),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum]
           InputAdapter
             Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #1
-              WholeStageCodegen (16)
+              WholeStageCodegen (14)
                 HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum]
                   HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
                     InputAdapter
                       Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #2
-                        WholeStageCodegen (15)
+                        WholeStageCodegen (13)
                           HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
                             InputAdapter
                               Union
-                                WholeStageCodegen (10)
-                                  HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
-                                    InputAdapter
-                                      Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #3
-                                        WholeStageCodegen (9)
-                                          HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
-                                            InputAdapter
-                                              Union
-                                                WholeStageCodegen (4)
-                                                  Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
-                                                    BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
-                                                      Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                        BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                          Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                            BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                              Filter [cs_item_sk,cs_sold_date_sk]
-                                                                ColumnarToRow
-                                                                  InputAdapter
-                                                                    Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price]
-                                                              InputAdapter
-                                                                BroadcastExchange #4
-                                                                  WholeStageCodegen (1)
-                                                                    Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                      Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id]
-                                                          InputAdapter
-                                                            BroadcastExchange #5
-                                                              WholeStageCodegen (2)
-                                                                Filter [d_year,d_date_sk]
-                                                                  ColumnarToRow
-                                                                    InputAdapter
-                                                                      Scan parquet default.date_dim [d_date_sk,d_year]
-                                                      InputAdapter
-                                                        BroadcastExchange #6
-                                                          WholeStageCodegen (3)
-                                                            Filter [cr_order_number,cr_item_sk]
-                                                              ColumnarToRow
-                                                                InputAdapter
-                                                                  Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount]
-                                                WholeStageCodegen (8)
-                                                  Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
-                                                    BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
-                                                      Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                          Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                            BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                              Filter [ss_item_sk,ss_sold_date_sk]
-                                                                ColumnarToRow
-                                                                  InputAdapter
-                                                                    Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price]
-                                                              InputAdapter
-                                                                ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #4
+                                WholeStageCodegen (4)
+                                  Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
+                                    BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
+                                      Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                        BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                          Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                            BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                              Filter [cs_item_sk,cs_sold_date_sk]
+                                                ColumnarToRow
+                                                  InputAdapter
+                                                    Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price]
+                                              InputAdapter
+                                                BroadcastExchange #3
+                                                  WholeStageCodegen (1)
+                                                    Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                      Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                        ColumnarToRow
                                                           InputAdapter
-                                                            ReusedExchange [d_date_sk,d_year] #5
-                                                      InputAdapter
-                                                        BroadcastExchange #7
-                                                          WholeStageCodegen (7)
-                                                            Filter [sr_ticket_number,sr_item_sk]
-                                                              ColumnarToRow
-                                                                InputAdapter
-                                                                  Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt]
-                                WholeStageCodegen (14)
+                                                            Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id]
+                                          InputAdapter
+                                            BroadcastExchange #4
+                                              WholeStageCodegen (2)
+                                                Filter [d_year,d_date_sk]
+                                                  ColumnarToRow
+                                                    InputAdapter
+                                                      Scan parquet default.date_dim [d_date_sk,d_year]
+                                      InputAdapter
+                                        BroadcastExchange #5
+                                          WholeStageCodegen (3)
+                                            Filter [cr_order_number,cr_item_sk]
+                                              ColumnarToRow
+                                                InputAdapter
+                                                  Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount]
+                                WholeStageCodegen (8)
+                                  Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
+                                    BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
+                                      Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                          Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                            BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                              Filter [ss_item_sk,ss_sold_date_sk]
+                                                ColumnarToRow
+                                                  InputAdapter
+                                                    Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price]
+                                              InputAdapter
+                                                ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #3
+                                          InputAdapter
+                                            ReusedExchange [d_date_sk,d_year] #4
+                                      InputAdapter
+                                        BroadcastExchange #6
+                                          WholeStageCodegen (7)
+                                            Filter [sr_ticket_number,sr_item_sk]
+                                              ColumnarToRow
+                                                InputAdapter
+                                                  Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt]
+                                WholeStageCodegen (12)
                                   Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt]
                                     BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk]
                                       Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
@@ -89,79 +81,71 @@ TakeOrderedAndProject [sales_cnt_diff,prev_year,year,i_brand_id,i_class_id,i_cat
                                                   InputAdapter
                                                     Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price]
                                               InputAdapter
-                                                ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #4
+                                                ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #3
                                           InputAdapter
-                                            ReusedExchange [d_date_sk,d_year] #5
+                                            ReusedExchange [d_date_sk,d_year] #4
                                       InputAdapter
-                                        BroadcastExchange #8
-                                          WholeStageCodegen (13)
+                                        BroadcastExchange #7
+                                          WholeStageCodegen (11)
                                             Filter [wr_order_number,wr_item_sk]
                                               ColumnarToRow
                                                 InputAdapter
                                                   Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt]
         InputAdapter
-          BroadcastExchange #9
-            WholeStageCodegen (33)
+          BroadcastExchange #8
+            WholeStageCodegen (29)
               HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(cast(sales_cnt as bigint)),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum]
                 InputAdapter
-                  Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #10
-                    WholeStageCodegen (32)
+                  Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #9
+                    WholeStageCodegen (28)
                       HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum]
                         HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
                           InputAdapter
-                            Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #11
-                              WholeStageCodegen (31)
+                            Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #10
+                              WholeStageCodegen (27)
                                 HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
                                   InputAdapter
                                     Union
+                                      WholeStageCodegen (18)
+                                        Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
+                                          BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
+                                            Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                              BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                  BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                    Filter [cs_item_sk,cs_sold_date_sk]
+                                                      ColumnarToRow
+                                                        InputAdapter
+                                                          Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price]
+                                                    InputAdapter
+                                                      ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #3
+                                                InputAdapter
+                                                  BroadcastExchange #11
+                                                    WholeStageCodegen (16)
+                                                      Filter [d_year,d_date_sk]
+                                                        ColumnarToRow
+                                                          InputAdapter
+                                                            Scan parquet default.date_dim [d_date_sk,d_year]
+                                            InputAdapter
+                                              ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #5
+                                      WholeStageCodegen (22)
+                                        Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
+                                          BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
+                                            Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                              BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                  BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                    Filter [ss_item_sk,ss_sold_date_sk]
+                                                      ColumnarToRow
+                                                        InputAdapter
+                                                          Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price]
+                                                    InputAdapter
+                                                      ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #3
+                                                InputAdapter
+                                                  ReusedExchange [d_date_sk,d_year] #11
+                                            InputAdapter
+                                              ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #6
                                       WholeStageCodegen (26)
-                                        HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
-                                          InputAdapter
-                                            Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #12
-                                              WholeStageCodegen (25)
-                                                HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
-                                                  InputAdapter
-                                                    Union
-                                                      WholeStageCodegen (20)
-                                                        Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
-                                                          BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
-                                                            Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                              BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                  BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                    Filter [cs_item_sk,cs_sold_date_sk]
-                                                                      ColumnarToRow
-                                                                        InputAdapter
-                                                                          Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price]
-                                                                    InputAdapter
-                                                                      ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #4
-                                                                InputAdapter
-                                                                  BroadcastExchange #13
-                                                                    WholeStageCodegen (18)
-                                                                      Filter [d_year,d_date_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.date_dim [d_date_sk,d_year]
-                                                            InputAdapter
-                                                              ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #6
-                                                      WholeStageCodegen (24)
-                                                        Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
-                                                          BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
-                                                            Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                              BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                  BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                    Filter [ss_item_sk,ss_sold_date_sk]
-                                                                      ColumnarToRow
-                                                                        InputAdapter
-                                                                          Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price]
-                                                                    InputAdapter
-                                                                      ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #4
-                                                                InputAdapter
-                                                                  ReusedExchange [d_date_sk,d_year] #13
-                                                            InputAdapter
-                                                              ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #7
-                                      WholeStageCodegen (30)
                                         Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt]
                                           BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk]
                                             Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
@@ -173,8 +157,8 @@ TakeOrderedAndProject [sales_cnt_diff,prev_year,year,i_brand_id,i_class_id,i_cat
                                                         InputAdapter
                                                           Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price]
                                                     InputAdapter
-                                                      ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #4
+                                                      ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #3
                                                 InputAdapter
-                                                  ReusedExchange [d_date_sk,d_year] #13
+                                                  ReusedExchange [d_date_sk,d_year] #11
                                             InputAdapter
-                                              ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #8
+                                              ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #7
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt
index 38292528b42fc..7be9447d16b45 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt
@@ -1,226 +1,214 @@
 == Physical Plan ==
-TakeOrderedAndProject (222)
-+- * HashAggregate (221)
-   +- Exchange (220)
-      +- * HashAggregate (219)
-         +- Union (218)
-            :- * HashAggregate (198)
-            :  +- Exchange (197)
-            :     +- * HashAggregate (196)
-            :        +- Union (195)
-            :           :- * HashAggregate (175)
-            :           :  +- Exchange (174)
-            :           :     +- * HashAggregate (173)
-            :           :        +- Union (172)
-            :           :           :- * HashAggregate (152)
-            :           :           :  +- Exchange (151)
-            :           :           :     +- * HashAggregate (150)
-            :           :           :        +- Union (149)
-            :           :           :           :- * HashAggregate (129)
-            :           :           :           :  +- Exchange (128)
-            :           :           :           :     +- * HashAggregate (127)
-            :           :           :           :        +- Union (126)
-            :           :           :           :           :- * Project (87)
-            :           :           :           :           :  +- * Filter (86)
-            :           :           :           :           :     +- * HashAggregate (85)
-            :           :           :           :           :        +- Exchange (84)
-            :           :           :           :           :           +- * HashAggregate (83)
-            :           :           :           :           :              +- * Project (82)
-            :           :           :           :           :                 +- * BroadcastHashJoin Inner BuildRight (81)
-            :           :           :           :           :                    :- * Project (71)
-            :           :           :           :           :                    :  +- * BroadcastHashJoin Inner BuildRight (70)
-            :           :           :           :           :                    :     :- SortMergeJoin LeftSemi (64)
-            :           :           :           :           :                    :     :  :- * Sort (5)
-            :           :           :           :           :                    :     :  :  +- Exchange (4)
-            :           :           :           :           :                    :     :  :     +- * Filter (3)
-            :           :           :           :           :                    :     :  :        +- * ColumnarToRow (2)
-            :           :           :           :           :                    :     :  :           +- Scan parquet default.store_sales (1)
-            :           :           :           :           :                    :     :  +- * Sort (63)
-            :           :           :           :           :                    :     :     +- Exchange (62)
-            :           :           :           :           :                    :     :        +- * Project (61)
-            :           :           :           :           :                    :     :           +- * BroadcastHashJoin Inner BuildRight (60)
-            :           :           :           :           :                    :     :              :- * Filter (8)
-            :           :           :           :           :                    :     :              :  +- * ColumnarToRow (7)
-            :           :           :           :           :                    :     :              :     +- Scan parquet default.item (6)
-            :           :           :           :           :                    :     :              +- BroadcastExchange (59)
-            :           :           :           :           :                    :     :                 +- * HashAggregate (58)
-            :           :           :           :           :                    :     :                    +- * HashAggregate (57)
-            :           :           :           :           :                    :     :                       +- * HashAggregate (56)
-            :           :           :           :           :                    :     :                          +- Exchange (55)
-            :           :           :           :           :                    :     :                             +- * HashAggregate (54)
-            :           :           :           :           :                    :     :                                +- SortMergeJoin LeftSemi (53)
-            :           :           :           :           :                    :     :                                   :- SortMergeJoin LeftSemi (41)
-            :           :           :           :           :                    :     :                                   :  :- * Sort (26)
-            :           :           :           :           :                    :     :                                   :  :  +- Exchange (25)
-            :           :           :           :           :                    :     :                                   :  :     +- * Project (24)
-            :           :           :           :           :                    :     :                                   :  :        +- * BroadcastHashJoin Inner BuildRight (23)
-            :           :           :           :           :                    :     :                                   :  :           :- * Project (18)
-            :           :           :           :           :                    :     :                                   :  :           :  +- * BroadcastHashJoin Inner BuildRight (17)
-            :           :           :           :           :                    :     :                                   :  :           :     :- * Filter (11)
-            :           :           :           :           :                    :     :                                   :  :           :     :  +- * ColumnarToRow (10)
-            :           :           :           :           :                    :     :                                   :  :           :     :     +- Scan parquet default.store_sales (9)
-            :           :           :           :           :                    :     :                                   :  :           :     +- BroadcastExchange (16)
-            :           :           :           :           :                    :     :                                   :  :           :        +- * Project (15)
-            :           :           :           :           :                    :     :                                   :  :           :           +- * Filter (14)
-            :           :           :           :           :                    :     :                                   :  :           :              +- * ColumnarToRow (13)
-            :           :           :           :           :                    :     :                                   :  :           :                 +- Scan parquet default.date_dim (12)
-            :           :           :           :           :                    :     :                                   :  :           +- BroadcastExchange (22)
-            :           :           :           :           :                    :     :                                   :  :              +- * Filter (21)
-            :           :           :           :           :                    :     :                                   :  :                 +- * ColumnarToRow (20)
-            :           :           :           :           :                    :     :                                   :  :                    +- Scan parquet default.item (19)
-            :           :           :           :           :                    :     :                                   :  +- * Sort (40)
-            :           :           :           :           :                    :     :                                   :     +- Exchange (39)
-            :           :           :           :           :                    :     :                                   :        +- * Project (38)
-            :           :           :           :           :                    :     :                                   :           +- * BroadcastHashJoin Inner BuildRight (37)
-            :           :           :           :           :                    :     :                                   :              :- * Project (32)
-            :           :           :           :           :                    :     :                                   :              :  +- * BroadcastHashJoin Inner BuildRight (31)
-            :           :           :           :           :                    :     :                                   :              :     :- * Filter (29)
-            :           :           :           :           :                    :     :                                   :              :     :  +- * ColumnarToRow (28)
-            :           :           :           :           :                    :     :                                   :              :     :     +- Scan parquet default.catalog_sales (27)
-            :           :           :           :           :                    :     :                                   :              :     +- ReusedExchange (30)
-            :           :           :           :           :                    :     :                                   :              +- BroadcastExchange (36)
-            :           :           :           :           :                    :     :                                   :                 +- * Filter (35)
-            :           :           :           :           :                    :     :                                   :                    +- * ColumnarToRow (34)
-            :           :           :           :           :                    :     :                                   :                       +- Scan parquet default.item (33)
-            :           :           :           :           :                    :     :                                   +- * Sort (52)
-            :           :           :           :           :                    :     :                                      +- Exchange (51)
-            :           :           :           :           :                    :     :                                         +- * Project (50)
-            :           :           :           :           :                    :     :                                            +- * BroadcastHashJoin Inner BuildRight (49)
-            :           :           :           :           :                    :     :                                               :- * Project (47)
-            :           :           :           :           :                    :     :                                               :  +- * BroadcastHashJoin Inner BuildRight (46)
-            :           :           :           :           :                    :     :                                               :     :- * Filter (44)
-            :           :           :           :           :                    :     :                                               :     :  +- * ColumnarToRow (43)
-            :           :           :           :           :                    :     :                                               :     :     +- Scan parquet default.web_sales (42)
-            :           :           :           :           :                    :     :                                               :     +- ReusedExchange (45)
-            :           :           :           :           :                    :     :                                               +- ReusedExchange (48)
-            :           :           :           :           :                    :     +- BroadcastExchange (69)
-            :           :           :           :           :                    :        +- * Project (68)
-            :           :           :           :           :                    :           +- * Filter (67)
-            :           :           :           :           :                    :              +- * ColumnarToRow (66)
-            :           :           :           :           :                    :                 +- Scan parquet default.date_dim (65)
-            :           :           :           :           :                    +- BroadcastExchange (80)
-            :           :           :           :           :                       +- SortMergeJoin LeftSemi (79)
-            :           :           :           :           :                          :- * Sort (76)
-            :           :           :           :           :                          :  +- Exchange (75)
-            :           :           :           :           :                          :     +- * Filter (74)
-            :           :           :           :           :                          :        +- * ColumnarToRow (73)
-            :           :           :           :           :                          :           +- Scan parquet default.item (72)
-            :           :           :           :           :                          +- * Sort (78)
-            :           :           :           :           :                             +- ReusedExchange (77)
-            :           :           :           :           :- * Project (106)
-            :           :           :           :           :  +- * Filter (105)
-            :           :           :           :           :     +- * HashAggregate (104)
-            :           :           :           :           :        +- Exchange (103)
-            :           :           :           :           :           +- * HashAggregate (102)
-            :           :           :           :           :              +- * Project (101)
-            :           :           :           :           :                 +- * BroadcastHashJoin Inner BuildRight (100)
-            :           :           :           :           :                    :- * Project (98)
-            :           :           :           :           :                    :  +- * BroadcastHashJoin Inner BuildRight (97)
-            :           :           :           :           :                    :     :- SortMergeJoin LeftSemi (95)
-            :           :           :           :           :                    :     :  :- * Sort (92)
-            :           :           :           :           :                    :     :  :  +- Exchange (91)
-            :           :           :           :           :                    :     :  :     +- * Filter (90)
-            :           :           :           :           :                    :     :  :        +- * ColumnarToRow (89)
-            :           :           :           :           :                    :     :  :           +- Scan parquet default.catalog_sales (88)
-            :           :           :           :           :                    :     :  +- * Sort (94)
-            :           :           :           :           :                    :     :     +- ReusedExchange (93)
-            :           :           :           :           :                    :     +- ReusedExchange (96)
-            :           :           :           :           :                    +- ReusedExchange (99)
-            :           :           :           :           +- * Project (125)
-            :           :           :           :              +- * Filter (124)
-            :           :           :           :                 +- * HashAggregate (123)
-            :           :           :           :                    +- Exchange (122)
-            :           :           :           :                       +- * HashAggregate (121)
-            :           :           :           :                          +- * Project (120)
-            :           :           :           :                             +- * BroadcastHashJoin Inner BuildRight (119)
-            :           :           :           :                                :- * Project (117)
-            :           :           :           :                                :  +- * BroadcastHashJoin Inner BuildRight (116)
-            :           :           :           :                                :     :- SortMergeJoin LeftSemi (114)
-            :           :           :           :                                :     :  :- * Sort (111)
-            :           :           :           :                                :     :  :  +- Exchange (110)
-            :           :           :           :                                :     :  :     +- * Filter (109)
-            :           :           :           :                                :     :  :        +- * ColumnarToRow (108)
-            :           :           :           :                                :     :  :           +- Scan parquet default.web_sales (107)
-            :           :           :           :                                :     :  +- * Sort (113)
-            :           :           :           :                                :     :     +- ReusedExchange (112)
-            :           :           :           :                                :     +- ReusedExchange (115)
-            :           :           :           :                                +- ReusedExchange (118)
-            :           :           :           +- * HashAggregate (148)
-            :           :           :              +- Exchange (147)
-            :           :           :                 +- * HashAggregate (146)
-            :           :           :                    +- * HashAggregate (145)
-            :           :           :                       +- Exchange (144)
-            :           :           :                          +- * HashAggregate (143)
-            :           :           :                             +- Union (142)
-            :           :           :                                :- * Project (133)
-            :           :           :                                :  +- * Filter (132)
-            :           :           :                                :     +- * HashAggregate (131)
-            :           :           :                                :        +- ReusedExchange (130)
-            :           :           :                                :- * Project (137)
-            :           :           :                                :  +- * Filter (136)
-            :           :           :                                :     +- * HashAggregate (135)
-            :           :           :                                :        +- ReusedExchange (134)
-            :           :           :                                +- * Project (141)
-            :           :           :                                   +- * Filter (140)
-            :           :           :                                      +- * HashAggregate (139)
-            :           :           :                                         +- ReusedExchange (138)
-            :           :           +- * HashAggregate (171)
-            :           :              +- Exchange (170)
-            :           :                 +- * HashAggregate (169)
-            :           :                    +- * HashAggregate (168)
-            :           :                       +- Exchange (167)
-            :           :                          +- * HashAggregate (166)
-            :           :                             +- Union (165)
-            :           :                                :- * Project (156)
-            :           :                                :  +- * Filter (155)
-            :           :                                :     +- * HashAggregate (154)
-            :           :                                :        +- ReusedExchange (153)
-            :           :                                :- * Project (160)
-            :           :                                :  +- * Filter (159)
-            :           :                                :     +- * HashAggregate (158)
-            :           :                                :        +- ReusedExchange (157)
-            :           :                                +- * Project (164)
-            :           :                                   +- * Filter (163)
-            :           :                                      +- * HashAggregate (162)
-            :           :                                         +- ReusedExchange (161)
-            :           +- * HashAggregate (194)
-            :              +- Exchange (193)
-            :                 +- * HashAggregate (192)
-            :                    +- * HashAggregate (191)
-            :                       +- Exchange (190)
-            :                          +- * HashAggregate (189)
-            :                             +- Union (188)
-            :                                :- * Project (179)
-            :                                :  +- * Filter (178)
-            :                                :     +- * HashAggregate (177)
-            :                                :        +- ReusedExchange (176)
-            :                                :- * Project (183)
-            :                                :  +- * Filter (182)
-            :                                :     +- * HashAggregate (181)
-            :                                :        +- ReusedExchange (180)
-            :                                +- * Project (187)
-            :                                   +- * Filter (186)
-            :                                      +- * HashAggregate (185)
-            :                                         +- ReusedExchange (184)
-            +- * HashAggregate (217)
-               +- Exchange (216)
-                  +- * HashAggregate (215)
-                     +- * HashAggregate (214)
-                        +- Exchange (213)
-                           +- * HashAggregate (212)
-                              +- Union (211)
-                                 :- * Project (202)
-                                 :  +- * Filter (201)
-                                 :     +- * HashAggregate (200)
-                                 :        +- ReusedExchange (199)
-                                 :- * Project (206)
-                                 :  +- * Filter (205)
-                                 :     +- * HashAggregate (204)
-                                 :        +- ReusedExchange (203)
-                                 +- * Project (210)
-                                    +- * Filter (209)
-                                       +- * HashAggregate (208)
-                                          +- ReusedExchange (207)
+TakeOrderedAndProject (210)
++- * HashAggregate (209)
+   +- Exchange (208)
+      +- * HashAggregate (207)
+         +- Union (206)
+            :- * HashAggregate (129)
+            :  +- Exchange (128)
+            :     +- * HashAggregate (127)
+            :        +- Union (126)
+            :           :- * Project (87)
+            :           :  +- * Filter (86)
+            :           :     +- * HashAggregate (85)
+            :           :        +- Exchange (84)
+            :           :           +- * HashAggregate (83)
+            :           :              +- * Project (82)
+            :           :                 +- * BroadcastHashJoin Inner BuildRight (81)
+            :           :                    :- * Project (71)
+            :           :                    :  +- * BroadcastHashJoin Inner BuildRight (70)
+            :           :                    :     :- SortMergeJoin LeftSemi (64)
+            :           :                    :     :  :- * Sort (5)
+            :           :                    :     :  :  +- Exchange (4)
+            :           :                    :     :  :     +- * Filter (3)
+            :           :                    :     :  :        +- * ColumnarToRow (2)
+            :           :                    :     :  :           +- Scan parquet default.store_sales (1)
+            :           :                    :     :  +- * Sort (63)
+            :           :                    :     :     +- Exchange (62)
+            :           :                    :     :        +- * Project (61)
+            :           :                    :     :           +- * BroadcastHashJoin Inner BuildRight (60)
+            :           :                    :     :              :- * Filter (8)
+            :           :                    :     :              :  +- * ColumnarToRow (7)
+            :           :                    :     :              :     +- Scan parquet default.item (6)
+            :           :                    :     :              +- BroadcastExchange (59)
+            :           :                    :     :                 +- * HashAggregate (58)
+            :           :                    :     :                    +- * HashAggregate (57)
+            :           :                    :     :                       +- * HashAggregate (56)
+            :           :                    :     :                          +- Exchange (55)
+            :           :                    :     :                             +- * HashAggregate (54)
+            :           :                    :     :                                +- SortMergeJoin LeftSemi (53)
+            :           :                    :     :                                   :- SortMergeJoin LeftSemi (41)
+            :           :                    :     :                                   :  :- * Sort (26)
+            :           :                    :     :                                   :  :  +- Exchange (25)
+            :           :                    :     :                                   :  :     +- * Project (24)
+            :           :                    :     :                                   :  :        +- * BroadcastHashJoin Inner BuildRight (23)
+            :           :                    :     :                                   :  :           :- * Project (18)
+            :           :                    :     :                                   :  :           :  +- * BroadcastHashJoin Inner BuildRight (17)
+            :           :                    :     :                                   :  :           :     :- * Filter (11)
+            :           :                    :     :                                   :  :           :     :  +- * ColumnarToRow (10)
+            :           :                    :     :                                   :  :           :     :     +- Scan parquet default.store_sales (9)
+            :           :                    :     :                                   :  :           :     +- BroadcastExchange (16)
+            :           :                    :     :                                   :  :           :        +- * Project (15)
+            :           :                    :     :                                   :  :           :           +- * Filter (14)
+            :           :                    :     :                                   :  :           :              +- * ColumnarToRow (13)
+            :           :                    :     :                                   :  :           :                 +- Scan parquet default.date_dim (12)
+            :           :                    :     :                                   :  :           +- BroadcastExchange (22)
+            :           :                    :     :                                   :  :              +- * Filter (21)
+            :           :                    :     :                                   :  :                 +- * ColumnarToRow (20)
+            :           :                    :     :                                   :  :                    +- Scan parquet default.item (19)
+            :           :                    :     :                                   :  +- * Sort (40)
+            :           :                    :     :                                   :     +- Exchange (39)
+            :           :                    :     :                                   :        +- * Project (38)
+            :           :                    :     :                                   :           +- * BroadcastHashJoin Inner BuildRight (37)
+            :           :                    :     :                                   :              :- * Project (32)
+            :           :                    :     :                                   :              :  +- * BroadcastHashJoin Inner BuildRight (31)
+            :           :                    :     :                                   :              :     :- * Filter (29)
+            :           :                    :     :                                   :              :     :  +- * ColumnarToRow (28)
+            :           :                    :     :                                   :              :     :     +- Scan parquet default.catalog_sales (27)
+            :           :                    :     :                                   :              :     +- ReusedExchange (30)
+            :           :                    :     :                                   :              +- BroadcastExchange (36)
+            :           :                    :     :                                   :                 +- * Filter (35)
+            :           :                    :     :                                   :                    +- * ColumnarToRow (34)
+            :           :                    :     :                                   :                       +- Scan parquet default.item (33)
+            :           :                    :     :                                   +- * Sort (52)
+            :           :                    :     :                                      +- Exchange (51)
+            :           :                    :     :                                         +- * Project (50)
+            :           :                    :     :                                            +- * BroadcastHashJoin Inner BuildRight (49)
+            :           :                    :     :                                               :- * Project (47)
+            :           :                    :     :                                               :  +- * BroadcastHashJoin Inner BuildRight (46)
+            :           :                    :     :                                               :     :- * Filter (44)
+            :           :                    :     :                                               :     :  +- * ColumnarToRow (43)
+            :           :                    :     :                                               :     :     +- Scan parquet default.web_sales (42)
+            :           :                    :     :                                               :     +- ReusedExchange (45)
+            :           :                    :     :                                               +- ReusedExchange (48)
+            :           :                    :     +- BroadcastExchange (69)
+            :           :                    :        +- * Project (68)
+            :           :                    :           +- * Filter (67)
+            :           :                    :              +- * ColumnarToRow (66)
+            :           :                    :                 +- Scan parquet default.date_dim (65)
+            :           :                    +- BroadcastExchange (80)
+            :           :                       +- SortMergeJoin LeftSemi (79)
+            :           :                          :- * Sort (76)
+            :           :                          :  +- Exchange (75)
+            :           :                          :     +- * Filter (74)
+            :           :                          :        +- * ColumnarToRow (73)
+            :           :                          :           +- Scan parquet default.item (72)
+            :           :                          +- * Sort (78)
+            :           :                             +- ReusedExchange (77)
+            :           :- * Project (106)
+            :           :  +- * Filter (105)
+            :           :     +- * HashAggregate (104)
+            :           :        +- Exchange (103)
+            :           :           +- * HashAggregate (102)
+            :           :              +- * Project (101)
+            :           :                 +- * BroadcastHashJoin Inner BuildRight (100)
+            :           :                    :- * Project (98)
+            :           :                    :  +- * BroadcastHashJoin Inner BuildRight (97)
+            :           :                    :     :- SortMergeJoin LeftSemi (95)
+            :           :                    :     :  :- * Sort (92)
+            :           :                    :     :  :  +- Exchange (91)
+            :           :                    :     :  :     +- * Filter (90)
+            :           :                    :     :  :        +- * ColumnarToRow (89)
+            :           :                    :     :  :           +- Scan parquet default.catalog_sales (88)
+            :           :                    :     :  +- * Sort (94)
+            :           :                    :     :     +- ReusedExchange (93)
+            :           :                    :     +- ReusedExchange (96)
+            :           :                    +- ReusedExchange (99)
+            :           +- * Project (125)
+            :              +- * Filter (124)
+            :                 +- * HashAggregate (123)
+            :                    +- Exchange (122)
+            :                       +- * HashAggregate (121)
+            :                          +- * Project (120)
+            :                             +- * BroadcastHashJoin Inner BuildRight (119)
+            :                                :- * Project (117)
+            :                                :  +- * BroadcastHashJoin Inner BuildRight (116)
+            :                                :     :- SortMergeJoin LeftSemi (114)
+            :                                :     :  :- * Sort (111)
+            :                                :     :  :  +- Exchange (110)
+            :                                :     :  :     +- * Filter (109)
+            :                                :     :  :        +- * ColumnarToRow (108)
+            :                                :     :  :           +- Scan parquet default.web_sales (107)
+            :                                :     :  +- * Sort (113)
+            :                                :     :     +- ReusedExchange (112)
+            :                                :     +- ReusedExchange (115)
+            :                                +- ReusedExchange (118)
+            :- * HashAggregate (148)
+            :  +- Exchange (147)
+            :     +- * HashAggregate (146)
+            :        +- * HashAggregate (145)
+            :           +- Exchange (144)
+            :              +- * HashAggregate (143)
+            :                 +- Union (142)
+            :                    :- * Project (133)
+            :                    :  +- * Filter (132)
+            :                    :     +- * HashAggregate (131)
+            :                    :        +- ReusedExchange (130)
+            :                    :- * Project (137)
+            :                    :  +- * Filter (136)
+            :                    :     +- * HashAggregate (135)
+            :                    :        +- ReusedExchange (134)
+            :                    +- * Project (141)
+            :                       +- * Filter (140)
+            :                          +- * HashAggregate (139)
+            :                             +- ReusedExchange (138)
+            :- * HashAggregate (167)
+            :  +- Exchange (166)
+            :     +- * HashAggregate (165)
+            :        +- * HashAggregate (164)
+            :           +- Exchange (163)
+            :              +- * HashAggregate (162)
+            :                 +- Union (161)
+            :                    :- * Project (152)
+            :                    :  +- * Filter (151)
+            :                    :     +- * HashAggregate (150)
+            :                    :        +- ReusedExchange (149)
+            :                    :- * Project (156)
+            :                    :  +- * Filter (155)
+            :                    :     +- * HashAggregate (154)
+            :                    :        +- ReusedExchange (153)
+            :                    +- * Project (160)
+            :                       +- * Filter (159)
+            :                          +- * HashAggregate (158)
+            :                             +- ReusedExchange (157)
+            :- * HashAggregate (186)
+            :  +- Exchange (185)
+            :     +- * HashAggregate (184)
+            :        +- * HashAggregate (183)
+            :           +- Exchange (182)
+            :              +- * HashAggregate (181)
+            :                 +- Union (180)
+            :                    :- * Project (171)
+            :                    :  +- * Filter (170)
+            :                    :     +- * HashAggregate (169)
+            :                    :        +- ReusedExchange (168)
+            :                    :- * Project (175)
+            :                    :  +- * Filter (174)
+            :                    :     +- * HashAggregate (173)
+            :                    :        +- ReusedExchange (172)
+            :                    +- * Project (179)
+            :                       +- * Filter (178)
+            :                          +- * HashAggregate (177)
+            :                             +- ReusedExchange (176)
+            +- * HashAggregate (205)
+               +- Exchange (204)
+                  +- * HashAggregate (203)
+                     +- * HashAggregate (202)
+                        +- Exchange (201)
+                           +- * HashAggregate (200)
+                              +- Union (199)
+                                 :- * Project (190)
+                                 :  +- * Filter (189)
+                                 :     +- * HashAggregate (188)
+                                 :        +- ReusedExchange (187)
+                                 :- * Project (194)
+                                 :  +- * Filter (193)
+                                 :     +- * HashAggregate (192)
+                                 :        +- ReusedExchange (191)
+                                 +- * Project (198)
+                                    +- * Filter (197)
+                                       +- * HashAggregate (196)
+                                          +- ReusedExchange (195)
 
 
 (1) Scan parquet default.store_sales
@@ -239,7 +227,7 @@ Condition : (isnotnull(ss_item_sk#2) AND isnotnull(ss_sold_date_sk#1))
 
 (4) Exchange
 Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4]
-Arguments: hashpartitioning(ss_item_sk#2, 5), true, [id=#5]
+Arguments: hashpartitioning(ss_item_sk#2, 5), ENSURE_REQUIREMENTS, [id=#5]
 
 (5) Sort [codegen id : 2]
 Input [4]: [ss_sold_date_sk#1, ss_item_sk#2, ss_quantity#3, ss_list_price#4]
@@ -333,7 +321,7 @@ Input [5]: [ss_item_sk#2, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id
 
 (25) Exchange
 Input [3]: [brand_id#14, class_id#15, category_id#16]
-Arguments: hashpartitioning(coalesce(brand_id#14, 0), isnull(brand_id#14), coalesce(class_id#15, 0), isnull(class_id#15), coalesce(category_id#16, 0), isnull(category_id#16), 5), true, [id=#17]
+Arguments: hashpartitioning(coalesce(brand_id#14, 0), isnull(brand_id#14), coalesce(class_id#15, 0), isnull(class_id#15), coalesce(category_id#16, 0), isnull(category_id#16), 5), ENSURE_REQUIREMENTS, [id=#17]
 
 (26) Sort [codegen id : 6]
 Input [3]: [brand_id#14, class_id#15, category_id#16]
@@ -394,7 +382,7 @@ Input [5]: [cs_item_sk#19, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_i
 
 (39) Exchange
 Input [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
-Arguments: hashpartitioning(coalesce(i_brand_id#7, 0), isnull(i_brand_id#7), coalesce(i_class_id#8, 0), isnull(i_class_id#8), coalesce(i_category_id#9, 0), isnull(i_category_id#9), 5), true, [id=#21]
+Arguments: hashpartitioning(coalesce(i_brand_id#7, 0), isnull(i_brand_id#7), coalesce(i_class_id#8, 0), isnull(i_class_id#8), coalesce(i_category_id#9, 0), isnull(i_category_id#9), 5), ENSURE_REQUIREMENTS, [id=#21]
 
 (40) Sort [codegen id : 10]
 Input [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
@@ -445,7 +433,7 @@ Input [5]: [ws_item_sk#23, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_i
 
 (51) Exchange
 Input [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
-Arguments: hashpartitioning(coalesce(i_brand_id#7, 0), isnull(i_brand_id#7), coalesce(i_class_id#8, 0), isnull(i_class_id#8), coalesce(i_category_id#9, 0), isnull(i_category_id#9), 5), true, [id=#24]
+Arguments: hashpartitioning(coalesce(i_brand_id#7, 0), isnull(i_brand_id#7), coalesce(i_class_id#8, 0), isnull(i_class_id#8), coalesce(i_category_id#9, 0), isnull(i_category_id#9), 5), ENSURE_REQUIREMENTS, [id=#24]
 
 (52) Sort [codegen id : 14]
 Input [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
@@ -465,7 +453,7 @@ Results [3]: [brand_id#14, class_id#15, category_id#16]
 
 (55) Exchange
 Input [3]: [brand_id#14, class_id#15, category_id#16]
-Arguments: hashpartitioning(brand_id#14, class_id#15, category_id#16, 5), true, [id=#25]
+Arguments: hashpartitioning(brand_id#14, class_id#15, category_id#16, 5), ENSURE_REQUIREMENTS, [id=#25]
 
 (56) HashAggregate [codegen id : 16]
 Input [3]: [brand_id#14, class_id#15, category_id#16]
@@ -503,7 +491,7 @@ Input [7]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, brand_id#1
 
 (62) Exchange
 Input [1]: [ss_item_sk#27]
-Arguments: hashpartitioning(ss_item_sk#27, 5), true, [id=#28]
+Arguments: hashpartitioning(ss_item_sk#27, 5), ENSURE_REQUIREMENTS, [id=#28]
 
 (63) Sort [codegen id : 18]
 Input [1]: [ss_item_sk#27]
@@ -561,7 +549,7 @@ Condition : isnotnull(i_item_sk#6)
 
 (75) Exchange
 Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
-Arguments: hashpartitioning(i_item_sk#6, 5), true, [id=#31]
+Arguments: hashpartitioning(i_item_sk#6, 5), ENSURE_REQUIREMENTS, [id=#31]
 
 (76) Sort [codegen id : 21]
 Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
@@ -601,7 +589,7 @@ Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#36, isEmpty#37, c
 
 (84) Exchange
 Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#36, isEmpty#37, count#38]
-Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#39]
+Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, 5), ENSURE_REQUIREMENTS, [id=#39]
 
 (85) HashAggregate [codegen id : 39]
 Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#36, isEmpty#37, count#38]
@@ -634,7 +622,7 @@ Condition : (isnotnull(cs_item_sk#19) AND isnotnull(cs_sold_date_sk#18))
 
 (91) Exchange
 Input [4]: [cs_sold_date_sk#18, cs_item_sk#19, cs_quantity#48, cs_list_price#49]
-Arguments: hashpartitioning(cs_item_sk#19, 5), true, [id=#50]
+Arguments: hashpartitioning(cs_item_sk#19, 5), ENSURE_REQUIREMENTS, [id=#50]
 
 (92) Sort [codegen id : 41]
 Input [4]: [cs_sold_date_sk#18, cs_item_sk#19, cs_quantity#48, cs_list_price#49]
@@ -685,7 +673,7 @@ Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#54, isEmpty#55, c
 
 (103) Exchange
 Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#54, isEmpty#55, count#56]
-Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#57]
+Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, 5), ENSURE_REQUIREMENTS, [id=#57]
 
 (104) HashAggregate [codegen id : 78]
 Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#54, isEmpty#55, count#56]
@@ -718,7 +706,7 @@ Condition : (isnotnull(ws_item_sk#23) AND isnotnull(ws_sold_date_sk#22))
 
 (110) Exchange
 Input [4]: [ws_sold_date_sk#22, ws_item_sk#23, ws_quantity#64, ws_list_price#65]
-Arguments: hashpartitioning(ws_item_sk#23, 5), true, [id=#66]
+Arguments: hashpartitioning(ws_item_sk#23, 5), ENSURE_REQUIREMENTS, [id=#66]
 
 (111) Sort [codegen id : 80]
 Input [4]: [ws_sold_date_sk#22, ws_item_sk#23, ws_quantity#64, ws_list_price#65]
@@ -769,7 +757,7 @@ Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#70, isEmpty#71, c
 
 (122) Exchange
 Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#70, isEmpty#71, count#72]
-Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#73]
+Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, 5), ENSURE_REQUIREMENTS, [id=#73]
 
 (123) HashAggregate [codegen id : 117]
 Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#70, isEmpty#71, count#72]
@@ -797,7 +785,7 @@ Results [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#83, i
 
 (128) Exchange
 Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#83, isEmpty#84, sum#85]
-Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#86]
+Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), ENSURE_REQUIREMENTS, [id=#86]
 
 (129) HashAggregate [codegen id : 119]
 Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#83, isEmpty#84, sum#85]
@@ -871,7 +859,7 @@ Results [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#114,
 
 (144) Exchange
 Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#114, isEmpty#115, sum#116]
-Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#117]
+Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), ENSURE_REQUIREMENTS, [id=#117]
 
 (145) HashAggregate [codegen id : 238]
 Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#114, isEmpty#115, sum#116]
@@ -889,7 +877,7 @@ Results [6]: [channel#47, i_brand_id#7, i_class_id#8, sum#123, isEmpty#124, sum#
 
 (147) Exchange
 Input [6]: [channel#47, i_brand_id#7, i_class_id#8, sum#123, isEmpty#124, sum#125]
-Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, 5), true, [id=#126]
+Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, 5), ENSURE_REQUIREMENTS, [id=#126]
 
 (148) HashAggregate [codegen id : 239]
 Input [6]: [channel#47, i_brand_id#7, i_class_id#8, sum#123, isEmpty#124, sum#125]
@@ -898,536 +886,476 @@ Functions [2]: [sum(sum_sales#89), sum(number_sales#90)]
 Aggregate Attributes [2]: [sum(sum_sales#89)#127, sum(number_sales#90)#128]
 Results [6]: [channel#47, i_brand_id#7, i_class_id#8, null AS i_category_id#129, sum(sum_sales#89)#127 AS sum(sum_sales)#130, sum(number_sales#90)#128 AS sum(number_sales)#131]
 
-(149) Union
+(149) ReusedExchange [Reuses operator id: 84]
+Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#132, isEmpty#133, count#134]
 
-(150) HashAggregate [codegen id : 240]
-Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Functions: []
-Aggregate Attributes: []
-Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-
-(151) Exchange
-Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#132]
-
-(152) HashAggregate [codegen id : 241]
-Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Functions: []
-Aggregate Attributes: []
-Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-
-(153) ReusedExchange [Reuses operator id: 84]
-Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#133, isEmpty#134, count#135]
-
-(154) HashAggregate [codegen id : 280]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#133, isEmpty#134, count#135]
+(150) HashAggregate [codegen id : 278]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#132, isEmpty#133, count#134]
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#136, count(1)#137]
-Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#136 AS sales#42, count(1)#137 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#136 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#138]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#135, count(1)#136]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#135 AS sales#42, count(1)#136 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#135 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#137]
 
-(155) Filter [codegen id : 280]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#138]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#138) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#138 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
+(151) Filter [codegen id : 278]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#137]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#137) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#137 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
 
-(156) Project [codegen id : 280]
+(152) Project [codegen id : 278]
 Output [6]: [store AS channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#138]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#137]
 
-(157) ReusedExchange [Reuses operator id: 103]
-Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#139, isEmpty#140, count#141]
+(153) ReusedExchange [Reuses operator id: 103]
+Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#138, isEmpty#139, count#140]
 
-(158) HashAggregate [codegen id : 319]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#139, isEmpty#140, count#141]
+(154) HashAggregate [codegen id : 317]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#138, isEmpty#139, count#140]
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#142, count(1)#143]
-Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#142 AS sales#60, count(1)#143 AS number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#142 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#144]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#141, count(1)#142]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#141 AS sales#60, count(1)#142 AS number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#141 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#143]
 
-(159) Filter [codegen id : 319]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#144]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#144) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#144 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
+(155) Filter [codegen id : 317]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#143]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#143) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#143 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
 
-(160) Project [codegen id : 319]
-Output [6]: [catalog AS channel#145, i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#144]
+(156) Project [codegen id : 317]
+Output [6]: [catalog AS channel#144, i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#143]
 
-(161) ReusedExchange [Reuses operator id: 122]
-Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#146, isEmpty#147, count#148]
+(157) ReusedExchange [Reuses operator id: 122]
+Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#145, isEmpty#146, count#147]
 
-(162) HashAggregate [codegen id : 358]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#146, isEmpty#147, count#148]
+(158) HashAggregate [codegen id : 356]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#145, isEmpty#146, count#147]
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#149, count(1)#150]
-Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#149 AS sales#76, count(1)#150 AS number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#149 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#151]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#148, count(1)#149]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#148 AS sales#76, count(1)#149 AS number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#148 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#150]
 
-(163) Filter [codegen id : 358]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#151]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#151) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#151 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
+(159) Filter [codegen id : 356]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#150]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#150) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#150 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
 
-(164) Project [codegen id : 358]
-Output [6]: [web AS channel#152, i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#151]
+(160) Project [codegen id : 356]
+Output [6]: [web AS channel#151, i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#150]
 
-(165) Union
+(161) Union
 
-(166) HashAggregate [codegen id : 359]
+(162) HashAggregate [codegen id : 357]
 Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43]
 Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [partial_sum(sales#42), partial_sum(number_sales#43)]
-Aggregate Attributes [3]: [sum#153, isEmpty#154, sum#155]
-Results [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#156, isEmpty#157, sum#158]
+Aggregate Attributes [3]: [sum#152, isEmpty#153, sum#154]
+Results [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#155, isEmpty#156, sum#157]
 
-(167) Exchange
-Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#156, isEmpty#157, sum#158]
-Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#159]
+(163) Exchange
+Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#155, isEmpty#156, sum#157]
+Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), ENSURE_REQUIREMENTS, [id=#158]
 
-(168) HashAggregate [codegen id : 360]
-Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#156, isEmpty#157, sum#158]
+(164) HashAggregate [codegen id : 358]
+Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#155, isEmpty#156, sum#157]
 Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(sales#42), sum(number_sales#43)]
-Aggregate Attributes [2]: [sum(sales#42)#160, sum(number_sales#43)#161]
-Results [4]: [channel#47, i_brand_id#7, sum(sales#42)#160 AS sum_sales#89, sum(number_sales#43)#161 AS number_sales#90]
+Aggregate Attributes [2]: [sum(sales#42)#159, sum(number_sales#43)#160]
+Results [4]: [channel#47, i_brand_id#7, sum(sales#42)#159 AS sum_sales#89, sum(number_sales#43)#160 AS number_sales#90]
 
-(169) HashAggregate [codegen id : 360]
+(165) HashAggregate [codegen id : 358]
 Input [4]: [channel#47, i_brand_id#7, sum_sales#89, number_sales#90]
 Keys [2]: [channel#47, i_brand_id#7]
 Functions [2]: [partial_sum(sum_sales#89), partial_sum(number_sales#90)]
-Aggregate Attributes [3]: [sum#162, isEmpty#163, sum#164]
-Results [5]: [channel#47, i_brand_id#7, sum#165, isEmpty#166, sum#167]
+Aggregate Attributes [3]: [sum#161, isEmpty#162, sum#163]
+Results [5]: [channel#47, i_brand_id#7, sum#164, isEmpty#165, sum#166]
 
-(170) Exchange
-Input [5]: [channel#47, i_brand_id#7, sum#165, isEmpty#166, sum#167]
-Arguments: hashpartitioning(channel#47, i_brand_id#7, 5), true, [id=#168]
+(166) Exchange
+Input [5]: [channel#47, i_brand_id#7, sum#164, isEmpty#165, sum#166]
+Arguments: hashpartitioning(channel#47, i_brand_id#7, 5), ENSURE_REQUIREMENTS, [id=#167]
 
-(171) HashAggregate [codegen id : 361]
-Input [5]: [channel#47, i_brand_id#7, sum#165, isEmpty#166, sum#167]
+(167) HashAggregate [codegen id : 359]
+Input [5]: [channel#47, i_brand_id#7, sum#164, isEmpty#165, sum#166]
 Keys [2]: [channel#47, i_brand_id#7]
 Functions [2]: [sum(sum_sales#89), sum(number_sales#90)]
-Aggregate Attributes [2]: [sum(sum_sales#89)#169, sum(number_sales#90)#170]
-Results [6]: [channel#47, i_brand_id#7, null AS i_class_id#171, null AS i_category_id#172, sum(sum_sales#89)#169 AS sum(sum_sales)#173, sum(number_sales#90)#170 AS sum(number_sales)#174]
-
-(172) Union
-
-(173) HashAggregate [codegen id : 362]
-Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Functions: []
-Aggregate Attributes: []
-Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+Aggregate Attributes [2]: [sum(sum_sales#89)#168, sum(number_sales#90)#169]
+Results [6]: [channel#47, i_brand_id#7, null AS i_class_id#170, null AS i_category_id#171, sum(sum_sales#89)#168 AS sum(sum_sales)#172, sum(number_sales#90)#169 AS sum(number_sales)#173]
 
-(174) Exchange
-Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#175]
+(168) ReusedExchange [Reuses operator id: 84]
+Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#174, isEmpty#175, count#176]
 
-(175) HashAggregate [codegen id : 363]
-Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Functions: []
-Aggregate Attributes: []
-Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-
-(176) ReusedExchange [Reuses operator id: 84]
-Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#176, isEmpty#177, count#178]
-
-(177) HashAggregate [codegen id : 402]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#176, isEmpty#177, count#178]
+(169) HashAggregate [codegen id : 398]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#174, isEmpty#175, count#176]
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#179, count(1)#180]
-Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#179 AS sales#42, count(1)#180 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#179 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#181]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#177, count(1)#178]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#177 AS sales#42, count(1)#178 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#177 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#179]
 
-(178) Filter [codegen id : 402]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#181]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#181) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#181 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
+(170) Filter [codegen id : 398]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#179]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#179) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#179 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
 
-(179) Project [codegen id : 402]
+(171) Project [codegen id : 398]
 Output [6]: [store AS channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#181]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#179]
 
-(180) ReusedExchange [Reuses operator id: 103]
-Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#182, isEmpty#183, count#184]
+(172) ReusedExchange [Reuses operator id: 103]
+Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#180, isEmpty#181, count#182]
 
-(181) HashAggregate [codegen id : 441]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#182, isEmpty#183, count#184]
+(173) HashAggregate [codegen id : 437]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#180, isEmpty#181, count#182]
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#185, count(1)#186]
-Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#185 AS sales#60, count(1)#186 AS number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#185 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#187]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#183, count(1)#184]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#183 AS sales#60, count(1)#184 AS number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#183 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#185]
 
-(182) Filter [codegen id : 441]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#187]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#187) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#187 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
+(174) Filter [codegen id : 437]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#185]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#185) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#185 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
 
-(183) Project [codegen id : 441]
-Output [6]: [catalog AS channel#188, i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#187]
+(175) Project [codegen id : 437]
+Output [6]: [catalog AS channel#186, i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#185]
 
-(184) ReusedExchange [Reuses operator id: 122]
-Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#189, isEmpty#190, count#191]
+(176) ReusedExchange [Reuses operator id: 122]
+Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#187, isEmpty#188, count#189]
 
-(185) HashAggregate [codegen id : 480]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#189, isEmpty#190, count#191]
+(177) HashAggregate [codegen id : 476]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#187, isEmpty#188, count#189]
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#192, count(1)#193]
-Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#192 AS sales#76, count(1)#193 AS number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#192 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#194]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#190, count(1)#191]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#190 AS sales#76, count(1)#191 AS number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#190 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#192]
 
-(186) Filter [codegen id : 480]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#194]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#194) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#194 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
+(178) Filter [codegen id : 476]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#192]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#192) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#192 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
 
-(187) Project [codegen id : 480]
-Output [6]: [web AS channel#195, i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#194]
+(179) Project [codegen id : 476]
+Output [6]: [web AS channel#193, i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#192]
 
-(188) Union
+(180) Union
 
-(189) HashAggregate [codegen id : 481]
+(181) HashAggregate [codegen id : 477]
 Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43]
 Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [partial_sum(sales#42), partial_sum(number_sales#43)]
-Aggregate Attributes [3]: [sum#196, isEmpty#197, sum#198]
-Results [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#199, isEmpty#200, sum#201]
+Aggregate Attributes [3]: [sum#194, isEmpty#195, sum#196]
+Results [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#197, isEmpty#198, sum#199]
 
-(190) Exchange
-Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#199, isEmpty#200, sum#201]
-Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#202]
+(182) Exchange
+Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#197, isEmpty#198, sum#199]
+Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), ENSURE_REQUIREMENTS, [id=#200]
 
-(191) HashAggregate [codegen id : 482]
-Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#199, isEmpty#200, sum#201]
+(183) HashAggregate [codegen id : 478]
+Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#197, isEmpty#198, sum#199]
 Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(sales#42), sum(number_sales#43)]
-Aggregate Attributes [2]: [sum(sales#42)#203, sum(number_sales#43)#204]
-Results [3]: [channel#47, sum(sales#42)#203 AS sum_sales#89, sum(number_sales#43)#204 AS number_sales#90]
+Aggregate Attributes [2]: [sum(sales#42)#201, sum(number_sales#43)#202]
+Results [3]: [channel#47, sum(sales#42)#201 AS sum_sales#89, sum(number_sales#43)#202 AS number_sales#90]
 
-(192) HashAggregate [codegen id : 482]
+(184) HashAggregate [codegen id : 478]
 Input [3]: [channel#47, sum_sales#89, number_sales#90]
 Keys [1]: [channel#47]
 Functions [2]: [partial_sum(sum_sales#89), partial_sum(number_sales#90)]
-Aggregate Attributes [3]: [sum#205, isEmpty#206, sum#207]
-Results [4]: [channel#47, sum#208, isEmpty#209, sum#210]
+Aggregate Attributes [3]: [sum#203, isEmpty#204, sum#205]
+Results [4]: [channel#47, sum#206, isEmpty#207, sum#208]
 
-(193) Exchange
-Input [4]: [channel#47, sum#208, isEmpty#209, sum#210]
-Arguments: hashpartitioning(channel#47, 5), true, [id=#211]
+(185) Exchange
+Input [4]: [channel#47, sum#206, isEmpty#207, sum#208]
+Arguments: hashpartitioning(channel#47, 5), ENSURE_REQUIREMENTS, [id=#209]
 
-(194) HashAggregate [codegen id : 483]
-Input [4]: [channel#47, sum#208, isEmpty#209, sum#210]
+(186) HashAggregate [codegen id : 479]
+Input [4]: [channel#47, sum#206, isEmpty#207, sum#208]
 Keys [1]: [channel#47]
 Functions [2]: [sum(sum_sales#89), sum(number_sales#90)]
-Aggregate Attributes [2]: [sum(sum_sales#89)#212, sum(number_sales#90)#213]
-Results [6]: [channel#47, null AS i_brand_id#214, null AS i_class_id#215, null AS i_category_id#216, sum(sum_sales#89)#212 AS sum(sum_sales)#217, sum(number_sales#90)#213 AS sum(number_sales)#218]
+Aggregate Attributes [2]: [sum(sum_sales#89)#210, sum(number_sales#90)#211]
+Results [6]: [channel#47, null AS i_brand_id#212, null AS i_class_id#213, null AS i_category_id#214, sum(sum_sales#89)#210 AS sum(sum_sales)#215, sum(number_sales#90)#211 AS sum(number_sales)#216]
 
-(195) Union
-
-(196) HashAggregate [codegen id : 484]
-Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Functions: []
-Aggregate Attributes: []
-Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
+(187) ReusedExchange [Reuses operator id: 84]
+Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#217, isEmpty#218, count#219]
 
-(197) Exchange
-Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#219]
-
-(198) HashAggregate [codegen id : 485]
-Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Functions: []
-Aggregate Attributes: []
-Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-
-(199) ReusedExchange [Reuses operator id: 84]
-Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#220, isEmpty#221, count#222]
-
-(200) HashAggregate [codegen id : 524]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#220, isEmpty#221, count#222]
+(188) HashAggregate [codegen id : 518]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#217, isEmpty#218, count#219]
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#223, count(1)#224]
-Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#223 AS sales#42, count(1)#224 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#223 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#225]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#220, count(1)#221]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#220 AS sales#42, count(1)#221 AS number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#220 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#222]
 
-(201) Filter [codegen id : 524]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#225]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#225) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#225 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
+(189) Filter [codegen id : 518]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#222]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#222) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#222 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
 
-(202) Project [codegen id : 524]
+(190) Project [codegen id : 518]
 Output [6]: [store AS channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#225]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#222]
 
-(203) ReusedExchange [Reuses operator id: 103]
-Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#226, isEmpty#227, count#228]
+(191) ReusedExchange [Reuses operator id: 103]
+Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#223, isEmpty#224, count#225]
 
-(204) HashAggregate [codegen id : 563]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#226, isEmpty#227, count#228]
+(192) HashAggregate [codegen id : 557]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#223, isEmpty#224, count#225]
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#229, count(1)#230]
-Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#229 AS sales#60, count(1)#230 AS number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#229 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#231]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#226, count(1)#227]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#226 AS sales#60, count(1)#227 AS number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#226 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#228]
 
-(205) Filter [codegen id : 563]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#231]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#231) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#231 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
+(193) Filter [codegen id : 557]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#228]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#228) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#228 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
 
-(206) Project [codegen id : 563]
-Output [6]: [catalog AS channel#232, i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#231]
+(194) Project [codegen id : 557]
+Output [6]: [catalog AS channel#229, i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#60, number_sales#61, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#48 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#49 as decimal(12,2)))), DecimalType(18,2), true))#228]
 
-(207) ReusedExchange [Reuses operator id: 122]
-Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#233, isEmpty#234, count#235]
+(195) ReusedExchange [Reuses operator id: 122]
+Output [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#230, isEmpty#231, count#232]
 
-(208) HashAggregate [codegen id : 602]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#233, isEmpty#234, count#235]
+(196) HashAggregate [codegen id : 596]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum#230, isEmpty#231, count#232]
 Keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#236, count(1)#237]
-Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#236 AS sales#76, count(1)#237 AS number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#236 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#238]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#233, count(1)#234]
+Results [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#233 AS sales#76, count(1)#234 AS number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#233 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#235]
 
-(209) Filter [codegen id : 602]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#238]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#238) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#238 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
+(197) Filter [codegen id : 596]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#235]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#235) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#235 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#45, [id=#46] as decimal(32,6))))
 
-(210) Project [codegen id : 602]
-Output [6]: [web AS channel#239, i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77]
-Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#238]
+(198) Project [codegen id : 596]
+Output [6]: [web AS channel#236, i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77]
+Input [6]: [i_brand_id#7, i_class_id#8, i_category_id#9, sales#76, number_sales#77, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#64 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#65 as decimal(12,2)))), DecimalType(18,2), true))#235]
 
-(211) Union
+(199) Union
 
-(212) HashAggregate [codegen id : 603]
+(200) HashAggregate [codegen id : 597]
 Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sales#42, number_sales#43]
 Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [partial_sum(sales#42), partial_sum(number_sales#43)]
-Aggregate Attributes [3]: [sum#240, isEmpty#241, sum#242]
-Results [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#243, isEmpty#244, sum#245]
+Aggregate Attributes [3]: [sum#237, isEmpty#238, sum#239]
+Results [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#240, isEmpty#241, sum#242]
 
-(213) Exchange
-Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#243, isEmpty#244, sum#245]
-Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), true, [id=#246]
+(201) Exchange
+Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#240, isEmpty#241, sum#242]
+Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, 5), ENSURE_REQUIREMENTS, [id=#243]
 
-(214) HashAggregate [codegen id : 604]
-Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#243, isEmpty#244, sum#245]
+(202) HashAggregate [codegen id : 598]
+Input [7]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum#240, isEmpty#241, sum#242]
 Keys [4]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9]
 Functions [2]: [sum(sales#42), sum(number_sales#43)]
-Aggregate Attributes [2]: [sum(sales#42)#247, sum(number_sales#43)#248]
-Results [2]: [sum(sales#42)#247 AS sum_sales#89, sum(number_sales#43)#248 AS number_sales#90]
+Aggregate Attributes [2]: [sum(sales#42)#244, sum(number_sales#43)#245]
+Results [2]: [sum(sales#42)#244 AS sum_sales#89, sum(number_sales#43)#245 AS number_sales#90]
 
-(215) HashAggregate [codegen id : 604]
+(203) HashAggregate [codegen id : 598]
 Input [2]: [sum_sales#89, number_sales#90]
 Keys: []
 Functions [2]: [partial_sum(sum_sales#89), partial_sum(number_sales#90)]
-Aggregate Attributes [3]: [sum#249, isEmpty#250, sum#251]
-Results [3]: [sum#252, isEmpty#253, sum#254]
+Aggregate Attributes [3]: [sum#246, isEmpty#247, sum#248]
+Results [3]: [sum#249, isEmpty#250, sum#251]
 
-(216) Exchange
-Input [3]: [sum#252, isEmpty#253, sum#254]
-Arguments: SinglePartition, true, [id=#255]
+(204) Exchange
+Input [3]: [sum#249, isEmpty#250, sum#251]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#252]
 
-(217) HashAggregate [codegen id : 605]
-Input [3]: [sum#252, isEmpty#253, sum#254]
+(205) HashAggregate [codegen id : 599]
+Input [3]: [sum#249, isEmpty#250, sum#251]
 Keys: []
 Functions [2]: [sum(sum_sales#89), sum(number_sales#90)]
-Aggregate Attributes [2]: [sum(sum_sales#89)#256, sum(number_sales#90)#257]
-Results [6]: [null AS channel#258, null AS i_brand_id#259, null AS i_class_id#260, null AS i_category_id#261, sum(sum_sales#89)#256 AS sum(sum_sales)#262, sum(number_sales#90)#257 AS sum(number_sales)#263]
+Aggregate Attributes [2]: [sum(sum_sales#89)#253, sum(number_sales#90)#254]
+Results [6]: [null AS channel#255, null AS i_brand_id#256, null AS i_class_id#257, null AS i_category_id#258, sum(sum_sales#89)#253 AS sum(sum_sales)#259, sum(number_sales#90)#254 AS sum(number_sales)#260]
 
-(218) Union
+(206) Union
 
-(219) HashAggregate [codegen id : 606]
+(207) HashAggregate [codegen id : 600]
 Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
 Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
 Functions: []
 Aggregate Attributes: []
 Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
 
-(220) Exchange
+(208) Exchange
 Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
-Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), true, [id=#264]
+Arguments: hashpartitioning(channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90, 5), ENSURE_REQUIREMENTS, [id=#261]
 
-(221) HashAggregate [codegen id : 607]
+(209) HashAggregate [codegen id : 601]
 Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
 Keys [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
 Functions: []
 Aggregate Attributes: []
 Results [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
 
-(222) TakeOrderedAndProject
+(210) TakeOrderedAndProject
 Input [6]: [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
 Arguments: 100, [channel#47 ASC NULLS FIRST, i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST], [channel#47, i_brand_id#7, i_class_id#8, i_category_id#9, sum_sales#89, number_sales#90]
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 86 Hosting Expression = Subquery scalar-subquery#45, [id=#46]
-* HashAggregate (252)
-+- Exchange (251)
-   +- * HashAggregate (250)
-      +- Union (249)
-         :- * Project (232)
-         :  +- * BroadcastHashJoin Inner BuildRight (231)
-         :     :- * Filter (225)
-         :     :  +- * ColumnarToRow (224)
-         :     :     +- Scan parquet default.store_sales (223)
-         :     +- BroadcastExchange (230)
-         :        +- * Project (229)
-         :           +- * Filter (228)
-         :              +- * ColumnarToRow (227)
-         :                 +- Scan parquet default.date_dim (226)
-         :- * Project (242)
-         :  +- * BroadcastHashJoin Inner BuildRight (241)
-         :     :- * Filter (235)
-         :     :  +- * ColumnarToRow (234)
-         :     :     +- Scan parquet default.catalog_sales (233)
-         :     +- BroadcastExchange (240)
-         :        +- * Project (239)
-         :           +- * Filter (238)
-         :              +- * ColumnarToRow (237)
-         :                 +- Scan parquet default.date_dim (236)
-         +- * Project (248)
-            +- * BroadcastHashJoin Inner BuildRight (247)
-               :- * Filter (245)
-               :  +- * ColumnarToRow (244)
-               :     +- Scan parquet default.web_sales (243)
-               +- ReusedExchange (246)
-
-
-(223) Scan parquet default.store_sales
+* HashAggregate (240)
++- Exchange (239)
+   +- * HashAggregate (238)
+      +- Union (237)
+         :- * Project (220)
+         :  +- * BroadcastHashJoin Inner BuildRight (219)
+         :     :- * Filter (213)
+         :     :  +- * ColumnarToRow (212)
+         :     :     +- Scan parquet default.store_sales (211)
+         :     +- BroadcastExchange (218)
+         :        +- * Project (217)
+         :           +- * Filter (216)
+         :              +- * ColumnarToRow (215)
+         :                 +- Scan parquet default.date_dim (214)
+         :- * Project (230)
+         :  +- * BroadcastHashJoin Inner BuildRight (229)
+         :     :- * Filter (223)
+         :     :  +- * ColumnarToRow (222)
+         :     :     +- Scan parquet default.catalog_sales (221)
+         :     +- BroadcastExchange (228)
+         :        +- * Project (227)
+         :           +- * Filter (226)
+         :              +- * ColumnarToRow (225)
+         :                 +- Scan parquet default.date_dim (224)
+         +- * Project (236)
+            +- * BroadcastHashJoin Inner BuildRight (235)
+               :- * Filter (233)
+               :  +- * ColumnarToRow (232)
+               :     +- Scan parquet default.web_sales (231)
+               +- ReusedExchange (234)
+
+
+(211) Scan parquet default.store_sales
 Output [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
 PushedFilters: [IsNotNull(ss_sold_date_sk)]
 ReadSchema: struct<ss_sold_date_sk:int,ss_quantity:int,ss_list_price:decimal(7,2)>
 
-(224) ColumnarToRow [codegen id : 2]
+(212) ColumnarToRow [codegen id : 2]
 Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4]
 
-(225) Filter [codegen id : 2]
+(213) Filter [codegen id : 2]
 Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4]
 Condition : isnotnull(ss_sold_date_sk#1)
 
-(226) Scan parquet default.date_dim
+(214) Scan parquet default.date_dim
 Output [2]: [d_date_sk#10, d_year#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(227) ColumnarToRow [codegen id : 1]
+(215) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#10, d_year#11]
 
-(228) Filter [codegen id : 1]
+(216) Filter [codegen id : 1]
 Input [2]: [d_date_sk#10, d_year#11]
 Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1999)) AND (d_year#11 <= 2001)) AND isnotnull(d_date_sk#10))
 
-(229) Project [codegen id : 1]
+(217) Project [codegen id : 1]
 Output [1]: [d_date_sk#10]
 Input [2]: [d_date_sk#10, d_year#11]
 
-(230) BroadcastExchange
+(218) BroadcastExchange
 Input [1]: [d_date_sk#10]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#265]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#262]
 
-(231) BroadcastHashJoin [codegen id : 2]
+(219) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#1]
 Right keys [1]: [d_date_sk#10]
 Join condition: None
 
-(232) Project [codegen id : 2]
-Output [2]: [ss_quantity#3 AS quantity#266, ss_list_price#4 AS list_price#267]
+(220) Project [codegen id : 2]
+Output [2]: [ss_quantity#3 AS quantity#263, ss_list_price#4 AS list_price#264]
 Input [4]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, d_date_sk#10]
 
-(233) Scan parquet default.catalog_sales
+(221) Scan parquet default.catalog_sales
 Output [3]: [cs_sold_date_sk#18, cs_quantity#48, cs_list_price#49]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_sales]
 PushedFilters: [IsNotNull(cs_sold_date_sk)]
 ReadSchema: struct<cs_sold_date_sk:int,cs_quantity:int,cs_list_price:decimal(7,2)>
 
-(234) ColumnarToRow [codegen id : 4]
+(222) ColumnarToRow [codegen id : 4]
 Input [3]: [cs_sold_date_sk#18, cs_quantity#48, cs_list_price#49]
 
-(235) Filter [codegen id : 4]
+(223) Filter [codegen id : 4]
 Input [3]: [cs_sold_date_sk#18, cs_quantity#48, cs_list_price#49]
 Condition : isnotnull(cs_sold_date_sk#18)
 
-(236) Scan parquet default.date_dim
+(224) Scan parquet default.date_dim
 Output [2]: [d_date_sk#10, d_year#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1998), LessThanOrEqual(d_year,2000), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(237) ColumnarToRow [codegen id : 3]
+(225) ColumnarToRow [codegen id : 3]
 Input [2]: [d_date_sk#10, d_year#11]
 
-(238) Filter [codegen id : 3]
+(226) Filter [codegen id : 3]
 Input [2]: [d_date_sk#10, d_year#11]
 Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1998)) AND (d_year#11 <= 2000)) AND isnotnull(d_date_sk#10))
 
-(239) Project [codegen id : 3]
+(227) Project [codegen id : 3]
 Output [1]: [d_date_sk#10]
 Input [2]: [d_date_sk#10, d_year#11]
 
-(240) BroadcastExchange
+(228) BroadcastExchange
 Input [1]: [d_date_sk#10]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#268]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#265]
 
-(241) BroadcastHashJoin [codegen id : 4]
+(229) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_sold_date_sk#18]
 Right keys [1]: [d_date_sk#10]
 Join condition: None
 
-(242) Project [codegen id : 4]
-Output [2]: [cs_quantity#48 AS quantity#269, cs_list_price#49 AS list_price#270]
+(230) Project [codegen id : 4]
+Output [2]: [cs_quantity#48 AS quantity#266, cs_list_price#49 AS list_price#267]
 Input [4]: [cs_sold_date_sk#18, cs_quantity#48, cs_list_price#49, d_date_sk#10]
 
-(243) Scan parquet default.web_sales
+(231) Scan parquet default.web_sales
 Output [3]: [ws_sold_date_sk#22, ws_quantity#64, ws_list_price#65]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
 PushedFilters: [IsNotNull(ws_sold_date_sk)]
 ReadSchema: struct<ws_sold_date_sk:int,ws_quantity:int,ws_list_price:decimal(7,2)>
 
-(244) ColumnarToRow [codegen id : 6]
+(232) ColumnarToRow [codegen id : 6]
 Input [3]: [ws_sold_date_sk#22, ws_quantity#64, ws_list_price#65]
 
-(245) Filter [codegen id : 6]
+(233) Filter [codegen id : 6]
 Input [3]: [ws_sold_date_sk#22, ws_quantity#64, ws_list_price#65]
 Condition : isnotnull(ws_sold_date_sk#22)
 
-(246) ReusedExchange [Reuses operator id: 240]
+(234) ReusedExchange [Reuses operator id: 228]
 Output [1]: [d_date_sk#10]
 
-(247) BroadcastHashJoin [codegen id : 6]
+(235) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ws_sold_date_sk#22]
 Right keys [1]: [d_date_sk#10]
 Join condition: None
 
-(248) Project [codegen id : 6]
-Output [2]: [ws_quantity#64 AS quantity#271, ws_list_price#65 AS list_price#272]
+(236) Project [codegen id : 6]
+Output [2]: [ws_quantity#64 AS quantity#268, ws_list_price#65 AS list_price#269]
 Input [4]: [ws_sold_date_sk#22, ws_quantity#64, ws_list_price#65, d_date_sk#10]
 
-(249) Union
+(237) Union
 
-(250) HashAggregate [codegen id : 7]
-Input [2]: [quantity#266, list_price#267]
+(238) HashAggregate [codegen id : 7]
+Input [2]: [quantity#263, list_price#264]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#266 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#267 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#273, count#274]
-Results [2]: [sum#275, count#276]
+Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#263 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#264 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#270, count#271]
+Results [2]: [sum#272, count#273]
 
-(251) Exchange
-Input [2]: [sum#275, count#276]
-Arguments: SinglePartition, true, [id=#277]
+(239) Exchange
+Input [2]: [sum#272, count#273]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#274]
 
-(252) HashAggregate [codegen id : 8]
-Input [2]: [sum#275, count#276]
+(240) HashAggregate [codegen id : 8]
+Input [2]: [sum#272, count#273]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#266 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#267 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#266 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#267 as decimal(12,2)))), DecimalType(18,2), true))#278]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#266 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#267 as decimal(12,2)))), DecimalType(18,2), true))#278 AS average_sales#279]
+Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#263 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#264 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#263 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#264 as decimal(12,2)))), DecimalType(18,2), true))#275]
+Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#263 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#264 as decimal(12,2)))), DecimalType(18,2), true))#275 AS average_sales#276]
 
 Subquery:2 Hosting operator id = 105 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
 
@@ -1439,22 +1367,22 @@ Subquery:5 Hosting operator id = 136 Hosting Expression = ReusedSubquery Subquer
 
 Subquery:6 Hosting operator id = 140 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
 
-Subquery:7 Hosting operator id = 155 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
+Subquery:7 Hosting operator id = 151 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
 
-Subquery:8 Hosting operator id = 159 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
+Subquery:8 Hosting operator id = 155 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
 
-Subquery:9 Hosting operator id = 163 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
+Subquery:9 Hosting operator id = 159 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
 
-Subquery:10 Hosting operator id = 178 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
+Subquery:10 Hosting operator id = 170 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
 
-Subquery:11 Hosting operator id = 182 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
+Subquery:11 Hosting operator id = 174 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
 
-Subquery:12 Hosting operator id = 186 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
+Subquery:12 Hosting operator id = 178 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
 
-Subquery:13 Hosting operator id = 201 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
+Subquery:13 Hosting operator id = 189 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
 
-Subquery:14 Hosting operator id = 205 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
+Subquery:14 Hosting operator id = 193 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
 
-Subquery:15 Hosting operator id = 209 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
+Subquery:15 Hosting operator id = 197 Hosting Expression = ReusedSubquery Subquery scalar-subquery#45, [id=#46]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt
index 30856e02f2b62..c63f1b8a75643 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt
@@ -1,427 +1,403 @@
 TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales]
-  WholeStageCodegen (607)
+  WholeStageCodegen (601)
     HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales]
       InputAdapter
         Exchange [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] #1
-          WholeStageCodegen (606)
+          WholeStageCodegen (600)
             HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales]
               InputAdapter
                 Union
-                  WholeStageCodegen (485)
-                    HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales]
+                  WholeStageCodegen (119)
+                    HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
                       InputAdapter
-                        Exchange [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] #2
-                          WholeStageCodegen (484)
-                            HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales]
+                        Exchange [channel,i_brand_id,i_class_id,i_category_id] #2
+                          WholeStageCodegen (118)
+                            HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
                               InputAdapter
                                 Union
-                                  WholeStageCodegen (363)
-                                    HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales]
-                                      InputAdapter
-                                        Exchange [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] #3
-                                          WholeStageCodegen (362)
-                                            HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales]
+                                  WholeStageCodegen (39)
+                                    Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                      Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                        Subquery #1
+                                          WholeStageCodegen (8)
+                                            HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
                                               InputAdapter
-                                                Union
-                                                  WholeStageCodegen (241)
-                                                    HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales]
+                                                Exchange #17
+                                                  WholeStageCodegen (7)
+                                                    HashAggregate [quantity,list_price] [sum,count,sum,count]
                                                       InputAdapter
-                                                        Exchange [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] #4
-                                                          WholeStageCodegen (240)
-                                                            HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales]
-                                                              InputAdapter
-                                                                Union
-                                                                  WholeStageCodegen (119)
-                                                                    HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
-                                                                      InputAdapter
-                                                                        Exchange [channel,i_brand_id,i_class_id,i_category_id] #5
-                                                                          WholeStageCodegen (118)
-                                                                            HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
-                                                                              InputAdapter
-                                                                                Union
-                                                                                  WholeStageCodegen (39)
-                                                                                    Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
-                                                                                      Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                                                                        Subquery #1
-                                                                                          WholeStageCodegen (8)
-                                                                                            HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
-                                                                                              InputAdapter
-                                                                                                Exchange #20
-                                                                                                  WholeStageCodegen (7)
-                                                                                                    HashAggregate [quantity,list_price] [sum,count,sum,count]
-                                                                                                      InputAdapter
-                                                                                                        Union
-                                                                                                          WholeStageCodegen (2)
-                                                                                                            Project [ss_quantity,ss_list_price]
-                                                                                                              BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                                                Filter [ss_sold_date_sk]
-                                                                                                                  ColumnarToRow
-                                                                                                                    InputAdapter
-                                                                                                                      Scan parquet default.store_sales [ss_sold_date_sk,ss_quantity,ss_list_price]
-                                                                                                                InputAdapter
-                                                                                                                  BroadcastExchange #21
-                                                                                                                    WholeStageCodegen (1)
-                                                                                                                      Project [d_date_sk]
-                                                                                                                        Filter [d_year,d_date_sk]
-                                                                                                                          ColumnarToRow
-                                                                                                                            InputAdapter
-                                                                                                                              Scan parquet default.date_dim [d_date_sk,d_year]
-                                                                                                          WholeStageCodegen (4)
-                                                                                                            Project [cs_quantity,cs_list_price]
-                                                                                                              BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                                                                Filter [cs_sold_date_sk]
-                                                                                                                  ColumnarToRow
-                                                                                                                    InputAdapter
-                                                                                                                      Scan parquet default.catalog_sales [cs_sold_date_sk,cs_quantity,cs_list_price]
-                                                                                                                InputAdapter
-                                                                                                                  BroadcastExchange #22
-                                                                                                                    WholeStageCodegen (3)
-                                                                                                                      Project [d_date_sk]
-                                                                                                                        Filter [d_year,d_date_sk]
-                                                                                                                          ColumnarToRow
-                                                                                                                            InputAdapter
-                                                                                                                              Scan parquet default.date_dim [d_date_sk,d_year]
-                                                                                                          WholeStageCodegen (6)
-                                                                                                            Project [ws_quantity,ws_list_price]
-                                                                                                              BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                                                                Filter [ws_sold_date_sk]
-                                                                                                                  ColumnarToRow
-                                                                                                                    InputAdapter
-                                                                                                                      Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price]
-                                                                                                                InputAdapter
-                                                                                                                  ReusedExchange [d_date_sk] #22
-                                                                                        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
-                                                                                          InputAdapter
-                                                                                            Exchange [i_brand_id,i_class_id,i_category_id] #6
-                                                                                              WholeStageCodegen (38)
-                                                                                                HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count]
-                                                                                                  Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id]
-                                                                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                                                      Project [ss_item_sk,ss_quantity,ss_list_price]
-                                                                                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                        Union
+                                                          WholeStageCodegen (2)
+                                                            Project [ss_quantity,ss_list_price]
+                                                              BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                Filter [ss_sold_date_sk]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet default.store_sales [ss_sold_date_sk,ss_quantity,ss_list_price]
+                                                                InputAdapter
+                                                                  BroadcastExchange #18
+                                                                    WholeStageCodegen (1)
+                                                                      Project [d_date_sk]
+                                                                        Filter [d_year,d_date_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet default.date_dim [d_date_sk,d_year]
+                                                          WholeStageCodegen (4)
+                                                            Project [cs_quantity,cs_list_price]
+                                                              BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                Filter [cs_sold_date_sk]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet default.catalog_sales [cs_sold_date_sk,cs_quantity,cs_list_price]
+                                                                InputAdapter
+                                                                  BroadcastExchange #19
+                                                                    WholeStageCodegen (3)
+                                                                      Project [d_date_sk]
+                                                                        Filter [d_year,d_date_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet default.date_dim [d_date_sk,d_year]
+                                                          WholeStageCodegen (6)
+                                                            Project [ws_quantity,ws_list_price]
+                                                              BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                                Filter [ws_sold_date_sk]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price]
+                                                                InputAdapter
+                                                                  ReusedExchange [d_date_sk] #19
+                                        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                          InputAdapter
+                                            Exchange [i_brand_id,i_class_id,i_category_id] #3
+                                              WholeStageCodegen (38)
+                                                HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count]
+                                                  Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id]
+                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                      Project [ss_item_sk,ss_quantity,ss_list_price]
+                                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                          InputAdapter
+                                                            SortMergeJoin [ss_item_sk,ss_item_sk]
+                                                              WholeStageCodegen (2)
+                                                                Sort [ss_item_sk]
+                                                                  InputAdapter
+                                                                    Exchange [ss_item_sk] #4
+                                                                      WholeStageCodegen (1)
+                                                                        Filter [ss_item_sk,ss_sold_date_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price]
+                                                              WholeStageCodegen (18)
+                                                                Sort [ss_item_sk]
+                                                                  InputAdapter
+                                                                    Exchange [ss_item_sk] #5
+                                                                      WholeStageCodegen (17)
+                                                                        Project [i_item_sk]
+                                                                          BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id]
+                                                                            Filter [i_brand_id,i_class_id,i_category_id]
+                                                                              ColumnarToRow
+                                                                                InputAdapter
+                                                                                  Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                            InputAdapter
+                                                                              BroadcastExchange #6
+                                                                                WholeStageCodegen (16)
+                                                                                  HashAggregate [brand_id,class_id,category_id]
+                                                                                    HashAggregate [brand_id,class_id,category_id]
+                                                                                      HashAggregate [brand_id,class_id,category_id]
+                                                                                        InputAdapter
+                                                                                          Exchange [brand_id,class_id,category_id] #7
+                                                                                            WholeStageCodegen (15)
+                                                                                              HashAggregate [brand_id,class_id,category_id]
+                                                                                                InputAdapter
+                                                                                                  SortMergeJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
+                                                                                                    SortMergeJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
+                                                                                                      WholeStageCodegen (6)
+                                                                                                        Sort [brand_id,class_id,category_id]
                                                                                                           InputAdapter
-                                                                                                            SortMergeJoin [ss_item_sk,ss_item_sk]
-                                                                                                              WholeStageCodegen (2)
-                                                                                                                Sort [ss_item_sk]
-                                                                                                                  InputAdapter
-                                                                                                                    Exchange [ss_item_sk] #7
-                                                                                                                      WholeStageCodegen (1)
+                                                                                                            Exchange [brand_id,class_id,category_id] #8
+                                                                                                              WholeStageCodegen (5)
+                                                                                                                Project [i_brand_id,i_class_id,i_category_id]
+                                                                                                                  BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                                                                                    Project [ss_item_sk]
+                                                                                                                      BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                                                                                                         Filter [ss_item_sk,ss_sold_date_sk]
                                                                                                                           ColumnarToRow
                                                                                                                             InputAdapter
-                                                                                                                              Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price]
-                                                                                                              WholeStageCodegen (18)
-                                                                                                                Sort [ss_item_sk]
-                                                                                                                  InputAdapter
-                                                                                                                    Exchange [ss_item_sk] #8
-                                                                                                                      WholeStageCodegen (17)
-                                                                                                                        Project [i_item_sk]
-                                                                                                                          BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id]
-                                                                                                                            Filter [i_brand_id,i_class_id,i_category_id]
-                                                                                                                              ColumnarToRow
-                                                                                                                                InputAdapter
-                                                                                                                                  Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                                            InputAdapter
-                                                                                                                              BroadcastExchange #9
-                                                                                                                                WholeStageCodegen (16)
-                                                                                                                                  HashAggregate [brand_id,class_id,category_id]
-                                                                                                                                    HashAggregate [brand_id,class_id,category_id]
-                                                                                                                                      HashAggregate [brand_id,class_id,category_id]
-                                                                                                                                        InputAdapter
-                                                                                                                                          Exchange [brand_id,class_id,category_id] #10
-                                                                                                                                            WholeStageCodegen (15)
-                                                                                                                                              HashAggregate [brand_id,class_id,category_id]
-                                                                                                                                                InputAdapter
-                                                                                                                                                  SortMergeJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
-                                                                                                                                                    SortMergeJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
-                                                                                                                                                      WholeStageCodegen (6)
-                                                                                                                                                        Sort [brand_id,class_id,category_id]
-                                                                                                                                                          InputAdapter
-                                                                                                                                                            Exchange [brand_id,class_id,category_id] #11
-                                                                                                                                                              WholeStageCodegen (5)
-                                                                                                                                                                Project [i_brand_id,i_class_id,i_category_id]
-                                                                                                                                                                  BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                                                                                                                    Project [ss_item_sk]
-                                                                                                                                                                      BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                                                                                                        Filter [ss_item_sk,ss_sold_date_sk]
-                                                                                                                                                                          ColumnarToRow
-                                                                                                                                                                            InputAdapter
-                                                                                                                                                                              Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk]
-                                                                                                                                                                        InputAdapter
-                                                                                                                                                                          BroadcastExchange #12
-                                                                                                                                                                            WholeStageCodegen (3)
-                                                                                                                                                                              Project [d_date_sk]
-                                                                                                                                                                                Filter [d_year,d_date_sk]
-                                                                                                                                                                                  ColumnarToRow
-                                                                                                                                                                                    InputAdapter
-                                                                                                                                                                                      Scan parquet default.date_dim [d_date_sk,d_year]
-                                                                                                                                                                    InputAdapter
-                                                                                                                                                                      BroadcastExchange #13
-                                                                                                                                                                        WholeStageCodegen (4)
-                                                                                                                                                                          Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                                                                                            ColumnarToRow
-                                                                                                                                                                              InputAdapter
-                                                                                                                                                                                Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                                                                      WholeStageCodegen (10)
-                                                                                                                                                        Sort [i_brand_id,i_class_id,i_category_id]
-                                                                                                                                                          InputAdapter
-                                                                                                                                                            Exchange [i_brand_id,i_class_id,i_category_id] #14
-                                                                                                                                                              WholeStageCodegen (9)
-                                                                                                                                                                Project [i_brand_id,i_class_id,i_category_id]
-                                                                                                                                                                  BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                                                                                                                    Project [cs_item_sk]
-                                                                                                                                                                      BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                                                                                                                        Filter [cs_item_sk,cs_sold_date_sk]
-                                                                                                                                                                          ColumnarToRow
-                                                                                                                                                                            InputAdapter
-                                                                                                                                                                              Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk]
-                                                                                                                                                                        InputAdapter
-                                                                                                                                                                          ReusedExchange [d_date_sk] #12
-                                                                                                                                                                    InputAdapter
-                                                                                                                                                                      BroadcastExchange #15
-                                                                                                                                                                        WholeStageCodegen (8)
-                                                                                                                                                                          Filter [i_item_sk]
-                                                                                                                                                                            ColumnarToRow
-                                                                                                                                                                              InputAdapter
-                                                                                                                                                                                Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                                                                    WholeStageCodegen (14)
-                                                                                                                                                      Sort [i_brand_id,i_class_id,i_category_id]
-                                                                                                                                                        InputAdapter
-                                                                                                                                                          Exchange [i_brand_id,i_class_id,i_category_id] #16
-                                                                                                                                                            WholeStageCodegen (13)
-                                                                                                                                                              Project [i_brand_id,i_class_id,i_category_id]
-                                                                                                                                                                BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                                                                                                                  Project [ws_item_sk]
-                                                                                                                                                                    BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                                                                                                                      Filter [ws_item_sk,ws_sold_date_sk]
-                                                                                                                                                                        ColumnarToRow
-                                                                                                                                                                          InputAdapter
-                                                                                                                                                                            Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk]
-                                                                                                                                                                      InputAdapter
-                                                                                                                                                                        ReusedExchange [d_date_sk] #12
-                                                                                                                                                                  InputAdapter
-                                                                                                                                                                    ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #15
-                                                                                                          InputAdapter
-                                                                                                            BroadcastExchange #17
-                                                                                                              WholeStageCodegen (19)
-                                                                                                                Project [d_date_sk]
-                                                                                                                  Filter [d_year,d_moy,d_date_sk]
-                                                                                                                    ColumnarToRow
-                                                                                                                      InputAdapter
-                                                                                                                        Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
-                                                                                                      InputAdapter
-                                                                                                        BroadcastExchange #18
-                                                                                                          SortMergeJoin [i_item_sk,ss_item_sk]
-                                                                                                            WholeStageCodegen (21)
-                                                                                                              Sort [i_item_sk]
-                                                                                                                InputAdapter
-                                                                                                                  Exchange [i_item_sk] #19
-                                                                                                                    WholeStageCodegen (20)
-                                                                                                                      Filter [i_item_sk]
-                                                                                                                        ColumnarToRow
-                                                                                                                          InputAdapter
-                                                                                                                            Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                            WholeStageCodegen (37)
-                                                                                                              Sort [ss_item_sk]
-                                                                                                                InputAdapter
-                                                                                                                  ReusedExchange [ss_item_sk] #8
-                                                                                  WholeStageCodegen (78)
-                                                                                    Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
-                                                                                      Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                                                                        ReusedSubquery [average_sales] #1
-                                                                                        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
-                                                                                          InputAdapter
-                                                                                            Exchange [i_brand_id,i_class_id,i_category_id] #23
-                                                                                              WholeStageCodegen (77)
-                                                                                                HashAggregate [i_brand_id,i_class_id,i_category_id,cs_quantity,cs_list_price] [sum,isEmpty,count,sum,isEmpty,count]
-                                                                                                  Project [cs_quantity,cs_list_price,i_brand_id,i_class_id,i_category_id]
-                                                                                                    BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                                                      Project [cs_item_sk,cs_quantity,cs_list_price]
-                                                                                                        BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                                                                              Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk]
+                                                                                                                        InputAdapter
+                                                                                                                          BroadcastExchange #9
+                                                                                                                            WholeStageCodegen (3)
+                                                                                                                              Project [d_date_sk]
+                                                                                                                                Filter [d_year,d_date_sk]
+                                                                                                                                  ColumnarToRow
+                                                                                                                                    InputAdapter
+                                                                                                                                      Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                                                    InputAdapter
+                                                                                                                      BroadcastExchange #10
+                                                                                                                        WholeStageCodegen (4)
+                                                                                                                          Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                                            ColumnarToRow
+                                                                                                                              InputAdapter
+                                                                                                                                Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                      WholeStageCodegen (10)
+                                                                                                        Sort [i_brand_id,i_class_id,i_category_id]
                                                                                                           InputAdapter
-                                                                                                            SortMergeJoin [cs_item_sk,ss_item_sk]
-                                                                                                              WholeStageCodegen (41)
-                                                                                                                Sort [cs_item_sk]
-                                                                                                                  InputAdapter
-                                                                                                                    Exchange [cs_item_sk] #24
-                                                                                                                      WholeStageCodegen (40)
+                                                                                                            Exchange [i_brand_id,i_class_id,i_category_id] #11
+                                                                                                              WholeStageCodegen (9)
+                                                                                                                Project [i_brand_id,i_class_id,i_category_id]
+                                                                                                                  BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                                                                                    Project [cs_item_sk]
+                                                                                                                      BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                                                                                                         Filter [cs_item_sk,cs_sold_date_sk]
                                                                                                                           ColumnarToRow
                                                                                                                             InputAdapter
-                                                                                                                              Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_list_price]
-                                                                                                              WholeStageCodegen (57)
-                                                                                                                Sort [ss_item_sk]
-                                                                                                                  InputAdapter
-                                                                                                                    ReusedExchange [ss_item_sk] #8
-                                                                                                          InputAdapter
-                                                                                                            ReusedExchange [d_date_sk] #17
-                                                                                                      InputAdapter
-                                                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #18
-                                                                                  WholeStageCodegen (117)
-                                                                                    Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
-                                                                                      Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                                                                        ReusedSubquery [average_sales] #1
-                                                                                        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
-                                                                                          InputAdapter
-                                                                                            Exchange [i_brand_id,i_class_id,i_category_id] #25
-                                                                                              WholeStageCodegen (116)
-                                                                                                HashAggregate [i_brand_id,i_class_id,i_category_id,ws_quantity,ws_list_price] [sum,isEmpty,count,sum,isEmpty,count]
-                                                                                                  Project [ws_quantity,ws_list_price,i_brand_id,i_class_id,i_category_id]
-                                                                                                    BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                                                      Project [ws_item_sk,ws_quantity,ws_list_price]
-                                                                                                        BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                                                          InputAdapter
-                                                                                                            SortMergeJoin [ws_item_sk,ss_item_sk]
-                                                                                                              WholeStageCodegen (80)
-                                                                                                                Sort [ws_item_sk]
-                                                                                                                  InputAdapter
-                                                                                                                    Exchange [ws_item_sk] #26
-                                                                                                                      WholeStageCodegen (79)
-                                                                                                                        Filter [ws_item_sk,ws_sold_date_sk]
-                                                                                                                          ColumnarToRow
-                                                                                                                            InputAdapter
-                                                                                                                              Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_quantity,ws_list_price]
-                                                                                                              WholeStageCodegen (96)
-                                                                                                                Sort [ss_item_sk]
+                                                                                                                              Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk]
+                                                                                                                        InputAdapter
+                                                                                                                          ReusedExchange [d_date_sk] #9
+                                                                                                                    InputAdapter
+                                                                                                                      BroadcastExchange #12
+                                                                                                                        WholeStageCodegen (8)
+                                                                                                                          Filter [i_item_sk]
+                                                                                                                            ColumnarToRow
+                                                                                                                              InputAdapter
+                                                                                                                                Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                    WholeStageCodegen (14)
+                                                                                                      Sort [i_brand_id,i_class_id,i_category_id]
+                                                                                                        InputAdapter
+                                                                                                          Exchange [i_brand_id,i_class_id,i_category_id] #13
+                                                                                                            WholeStageCodegen (13)
+                                                                                                              Project [i_brand_id,i_class_id,i_category_id]
+                                                                                                                BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                                                                                  Project [ws_item_sk]
+                                                                                                                    BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                                                                                      Filter [ws_item_sk,ws_sold_date_sk]
+                                                                                                                        ColumnarToRow
+                                                                                                                          InputAdapter
+                                                                                                                            Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk]
+                                                                                                                      InputAdapter
+                                                                                                                        ReusedExchange [d_date_sk] #9
                                                                                                                   InputAdapter
-                                                                                                                    ReusedExchange [ss_item_sk] #8
-                                                                                                          InputAdapter
-                                                                                                            ReusedExchange [d_date_sk] #17
-                                                                                                      InputAdapter
-                                                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #18
-                                                                  WholeStageCodegen (239)
-                                                                    HashAggregate [channel,i_brand_id,i_class_id,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum]
+                                                                                                                    ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #12
+                                                          InputAdapter
+                                                            BroadcastExchange #14
+                                                              WholeStageCodegen (19)
+                                                                Project [d_date_sk]
+                                                                  Filter [d_year,d_moy,d_date_sk]
+                                                                    ColumnarToRow
                                                                       InputAdapter
-                                                                        Exchange [channel,i_brand_id,i_class_id] #27
-                                                                          WholeStageCodegen (238)
-                                                                            HashAggregate [channel,i_brand_id,i_class_id,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
-                                                                              HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
-                                                                                InputAdapter
-                                                                                  Exchange [channel,i_brand_id,i_class_id,i_category_id] #28
-                                                                                    WholeStageCodegen (237)
-                                                                                      HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
-                                                                                        InputAdapter
-                                                                                          Union
-                                                                                            WholeStageCodegen (158)
-                                                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
-                                                                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                                                                                  ReusedSubquery [average_sales] #1
-                                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
-                                                                                                    InputAdapter
-                                                                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6
-                                                                                            WholeStageCodegen (197)
-                                                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
-                                                                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                                                                                  ReusedSubquery [average_sales] #1
-                                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
-                                                                                                    InputAdapter
-                                                                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #23
-                                                                                            WholeStageCodegen (236)
-                                                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
-                                                                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                                                                                  ReusedSubquery [average_sales] #1
-                                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
-                                                                                                    InputAdapter
-                                                                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #25
-                                                  WholeStageCodegen (361)
-                                                    HashAggregate [channel,i_brand_id,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum]
+                                                                        Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
                                                       InputAdapter
-                                                        Exchange [channel,i_brand_id] #29
-                                                          WholeStageCodegen (360)
-                                                            HashAggregate [channel,i_brand_id,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
-                                                              HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
+                                                        BroadcastExchange #15
+                                                          SortMergeJoin [i_item_sk,ss_item_sk]
+                                                            WholeStageCodegen (21)
+                                                              Sort [i_item_sk]
                                                                 InputAdapter
-                                                                  Exchange [channel,i_brand_id,i_class_id,i_category_id] #30
-                                                                    WholeStageCodegen (359)
-                                                                      HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
-                                                                        InputAdapter
-                                                                          Union
-                                                                            WholeStageCodegen (280)
-                                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
-                                                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                                                                  ReusedSubquery [average_sales] #1
-                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
-                                                                                    InputAdapter
-                                                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6
-                                                                            WholeStageCodegen (319)
-                                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
-                                                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                                                                  ReusedSubquery [average_sales] #1
-                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
-                                                                                    InputAdapter
-                                                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #23
-                                                                            WholeStageCodegen (358)
-                                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
-                                                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                                                                  ReusedSubquery [average_sales] #1
-                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
-                                                                                    InputAdapter
-                                                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #25
-                                  WholeStageCodegen (483)
-                                    HashAggregate [channel,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum]
-                                      InputAdapter
-                                        Exchange [channel] #31
-                                          WholeStageCodegen (482)
-                                            HashAggregate [channel,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
-                                              HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
-                                                InputAdapter
-                                                  Exchange [channel,i_brand_id,i_class_id,i_category_id] #32
-                                                    WholeStageCodegen (481)
-                                                      HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
-                                                        InputAdapter
-                                                          Union
-                                                            WholeStageCodegen (402)
-                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
-                                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                                                  ReusedSubquery [average_sales] #1
-                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
-                                                                    InputAdapter
-                                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6
-                                                            WholeStageCodegen (441)
-                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
-                                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                                                  ReusedSubquery [average_sales] #1
-                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
-                                                                    InputAdapter
-                                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #23
-                                                            WholeStageCodegen (480)
-                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
-                                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                                                  ReusedSubquery [average_sales] #1
-                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
-                                                                    InputAdapter
-                                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #25
-                  WholeStageCodegen (605)
+                                                                  Exchange [i_item_sk] #16
+                                                                    WholeStageCodegen (20)
+                                                                      Filter [i_item_sk]
+                                                                        ColumnarToRow
+                                                                          InputAdapter
+                                                                            Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                            WholeStageCodegen (37)
+                                                              Sort [ss_item_sk]
+                                                                InputAdapter
+                                                                  ReusedExchange [ss_item_sk] #5
+                                  WholeStageCodegen (78)
+                                    Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                      Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                        ReusedSubquery [average_sales] #1
+                                        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                          InputAdapter
+                                            Exchange [i_brand_id,i_class_id,i_category_id] #20
+                                              WholeStageCodegen (77)
+                                                HashAggregate [i_brand_id,i_class_id,i_category_id,cs_quantity,cs_list_price] [sum,isEmpty,count,sum,isEmpty,count]
+                                                  Project [cs_quantity,cs_list_price,i_brand_id,i_class_id,i_category_id]
+                                                    BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                      Project [cs_item_sk,cs_quantity,cs_list_price]
+                                                        BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                          InputAdapter
+                                                            SortMergeJoin [cs_item_sk,ss_item_sk]
+                                                              WholeStageCodegen (41)
+                                                                Sort [cs_item_sk]
+                                                                  InputAdapter
+                                                                    Exchange [cs_item_sk] #21
+                                                                      WholeStageCodegen (40)
+                                                                        Filter [cs_item_sk,cs_sold_date_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_list_price]
+                                                              WholeStageCodegen (57)
+                                                                Sort [ss_item_sk]
+                                                                  InputAdapter
+                                                                    ReusedExchange [ss_item_sk] #5
+                                                          InputAdapter
+                                                            ReusedExchange [d_date_sk] #14
+                                                      InputAdapter
+                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #15
+                                  WholeStageCodegen (117)
+                                    Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                      Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                        ReusedSubquery [average_sales] #1
+                                        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                          InputAdapter
+                                            Exchange [i_brand_id,i_class_id,i_category_id] #22
+                                              WholeStageCodegen (116)
+                                                HashAggregate [i_brand_id,i_class_id,i_category_id,ws_quantity,ws_list_price] [sum,isEmpty,count,sum,isEmpty,count]
+                                                  Project [ws_quantity,ws_list_price,i_brand_id,i_class_id,i_category_id]
+                                                    BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                      Project [ws_item_sk,ws_quantity,ws_list_price]
+                                                        BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                          InputAdapter
+                                                            SortMergeJoin [ws_item_sk,ss_item_sk]
+                                                              WholeStageCodegen (80)
+                                                                Sort [ws_item_sk]
+                                                                  InputAdapter
+                                                                    Exchange [ws_item_sk] #23
+                                                                      WholeStageCodegen (79)
+                                                                        Filter [ws_item_sk,ws_sold_date_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_quantity,ws_list_price]
+                                                              WholeStageCodegen (96)
+                                                                Sort [ss_item_sk]
+                                                                  InputAdapter
+                                                                    ReusedExchange [ss_item_sk] #5
+                                                          InputAdapter
+                                                            ReusedExchange [d_date_sk] #14
+                                                      InputAdapter
+                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #15
+                  WholeStageCodegen (239)
+                    HashAggregate [channel,i_brand_id,i_class_id,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum]
+                      InputAdapter
+                        Exchange [channel,i_brand_id,i_class_id] #24
+                          WholeStageCodegen (238)
+                            HashAggregate [channel,i_brand_id,i_class_id,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
+                              HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
+                                InputAdapter
+                                  Exchange [channel,i_brand_id,i_class_id,i_category_id] #25
+                                    WholeStageCodegen (237)
+                                      HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
+                                        InputAdapter
+                                          Union
+                                            WholeStageCodegen (158)
+                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                                  ReusedSubquery [average_sales] #1
+                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                    InputAdapter
+                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #3
+                                            WholeStageCodegen (197)
+                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                                  ReusedSubquery [average_sales] #1
+                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                    InputAdapter
+                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #20
+                                            WholeStageCodegen (236)
+                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                                  ReusedSubquery [average_sales] #1
+                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                    InputAdapter
+                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #22
+                  WholeStageCodegen (359)
+                    HashAggregate [channel,i_brand_id,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum]
+                      InputAdapter
+                        Exchange [channel,i_brand_id] #26
+                          WholeStageCodegen (358)
+                            HashAggregate [channel,i_brand_id,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
+                              HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
+                                InputAdapter
+                                  Exchange [channel,i_brand_id,i_class_id,i_category_id] #27
+                                    WholeStageCodegen (357)
+                                      HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
+                                        InputAdapter
+                                          Union
+                                            WholeStageCodegen (278)
+                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                                  ReusedSubquery [average_sales] #1
+                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                    InputAdapter
+                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #3
+                                            WholeStageCodegen (317)
+                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                                  ReusedSubquery [average_sales] #1
+                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                    InputAdapter
+                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #20
+                                            WholeStageCodegen (356)
+                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                                  ReusedSubquery [average_sales] #1
+                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                    InputAdapter
+                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #22
+                  WholeStageCodegen (479)
+                    HashAggregate [channel,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum]
+                      InputAdapter
+                        Exchange [channel] #28
+                          WholeStageCodegen (478)
+                            HashAggregate [channel,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
+                              HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
+                                InputAdapter
+                                  Exchange [channel,i_brand_id,i_class_id,i_category_id] #29
+                                    WholeStageCodegen (477)
+                                      HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
+                                        InputAdapter
+                                          Union
+                                            WholeStageCodegen (398)
+                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                                  ReusedSubquery [average_sales] #1
+                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                    InputAdapter
+                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #3
+                                            WholeStageCodegen (437)
+                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                                  ReusedSubquery [average_sales] #1
+                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                    InputAdapter
+                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #20
+                                            WholeStageCodegen (476)
+                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                                  ReusedSubquery [average_sales] #1
+                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                    InputAdapter
+                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #22
+                  WholeStageCodegen (599)
                     HashAggregate [sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),channel,i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum]
                       InputAdapter
-                        Exchange #33
-                          WholeStageCodegen (604)
+                        Exchange #30
+                          WholeStageCodegen (598)
                             HashAggregate [sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
                               HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
                                 InputAdapter
-                                  Exchange [channel,i_brand_id,i_class_id,i_category_id] #34
-                                    WholeStageCodegen (603)
+                                  Exchange [channel,i_brand_id,i_class_id,i_category_id] #31
+                                    WholeStageCodegen (597)
                                       HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
                                         InputAdapter
                                           Union
-                                            WholeStageCodegen (524)
+                                            WholeStageCodegen (518)
                                               Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                   ReusedSubquery [average_sales] #1
                                                   HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                     InputAdapter
-                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6
-                                            WholeStageCodegen (563)
+                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #3
+                                            WholeStageCodegen (557)
                                               Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                   ReusedSubquery [average_sales] #1
                                                   HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                     InputAdapter
-                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #23
-                                            WholeStageCodegen (602)
+                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #20
+                                            WholeStageCodegen (596)
                                               Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                   ReusedSubquery [average_sales] #1
                                                   HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                     InputAdapter
-                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #25
+                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #22
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt
index 238053a3428e3..4e60a9b6b1547 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt
@@ -1,210 +1,198 @@
 == Physical Plan ==
-TakeOrderedAndProject (206)
-+- * HashAggregate (205)
-   +- Exchange (204)
-      +- * HashAggregate (203)
-         +- Union (202)
-            :- * HashAggregate (182)
-            :  +- Exchange (181)
-            :     +- * HashAggregate (180)
-            :        +- Union (179)
-            :           :- * HashAggregate (159)
-            :           :  +- Exchange (158)
-            :           :     +- * HashAggregate (157)
-            :           :        +- Union (156)
-            :           :           :- * HashAggregate (136)
-            :           :           :  +- Exchange (135)
-            :           :           :     +- * HashAggregate (134)
-            :           :           :        +- Union (133)
-            :           :           :           :- * HashAggregate (113)
-            :           :           :           :  +- Exchange (112)
-            :           :           :           :     +- * HashAggregate (111)
-            :           :           :           :        +- Union (110)
-            :           :           :           :           :- * Project (77)
-            :           :           :           :           :  +- * Filter (76)
-            :           :           :           :           :     +- * HashAggregate (75)
-            :           :           :           :           :        +- Exchange (74)
-            :           :           :           :           :           +- * HashAggregate (73)
-            :           :           :           :           :              +- * Project (72)
-            :           :           :           :           :                 +- * BroadcastHashJoin Inner BuildRight (71)
-            :           :           :           :           :                    :- * Project (65)
-            :           :           :           :           :                    :  +- * BroadcastHashJoin Inner BuildRight (64)
-            :           :           :           :           :                    :     :- * BroadcastHashJoin LeftSemi BuildRight (57)
-            :           :           :           :           :                    :     :  :- * Filter (3)
-            :           :           :           :           :                    :     :  :  +- * ColumnarToRow (2)
-            :           :           :           :           :                    :     :  :     +- Scan parquet default.store_sales (1)
-            :           :           :           :           :                    :     :  +- BroadcastExchange (56)
-            :           :           :           :           :                    :     :     +- * Project (55)
-            :           :           :           :           :                    :     :        +- * BroadcastHashJoin Inner BuildRight (54)
-            :           :           :           :           :                    :     :           :- * Filter (6)
-            :           :           :           :           :                    :     :           :  +- * ColumnarToRow (5)
-            :           :           :           :           :                    :     :           :     +- Scan parquet default.item (4)
-            :           :           :           :           :                    :     :           +- BroadcastExchange (53)
-            :           :           :           :           :                    :     :              +- * HashAggregate (52)
-            :           :           :           :           :                    :     :                 +- * HashAggregate (51)
-            :           :           :           :           :                    :     :                    +- * HashAggregate (50)
-            :           :           :           :           :                    :     :                       +- Exchange (49)
-            :           :           :           :           :                    :     :                          +- * HashAggregate (48)
-            :           :           :           :           :                    :     :                             +- * BroadcastHashJoin LeftSemi BuildRight (47)
-            :           :           :           :           :                    :     :                                :- * BroadcastHashJoin LeftSemi BuildRight (36)
-            :           :           :           :           :                    :     :                                :  :- * Project (22)
-            :           :           :           :           :                    :     :                                :  :  +- * BroadcastHashJoin Inner BuildRight (21)
-            :           :           :           :           :                    :     :                                :  :     :- * Project (15)
-            :           :           :           :           :                    :     :                                :  :     :  +- * BroadcastHashJoin Inner BuildRight (14)
-            :           :           :           :           :                    :     :                                :  :     :     :- * Filter (9)
-            :           :           :           :           :                    :     :                                :  :     :     :  +- * ColumnarToRow (8)
-            :           :           :           :           :                    :     :                                :  :     :     :     +- Scan parquet default.store_sales (7)
-            :           :           :           :           :                    :     :                                :  :     :     +- BroadcastExchange (13)
-            :           :           :           :           :                    :     :                                :  :     :        +- * Filter (12)
-            :           :           :           :           :                    :     :                                :  :     :           +- * ColumnarToRow (11)
-            :           :           :           :           :                    :     :                                :  :     :              +- Scan parquet default.item (10)
-            :           :           :           :           :                    :     :                                :  :     +- BroadcastExchange (20)
-            :           :           :           :           :                    :     :                                :  :        +- * Project (19)
-            :           :           :           :           :                    :     :                                :  :           +- * Filter (18)
-            :           :           :           :           :                    :     :                                :  :              +- * ColumnarToRow (17)
-            :           :           :           :           :                    :     :                                :  :                 +- Scan parquet default.date_dim (16)
-            :           :           :           :           :                    :     :                                :  +- BroadcastExchange (35)
-            :           :           :           :           :                    :     :                                :     +- * Project (34)
-            :           :           :           :           :                    :     :                                :        +- * BroadcastHashJoin Inner BuildRight (33)
-            :           :           :           :           :                    :     :                                :           :- * Project (31)
-            :           :           :           :           :                    :     :                                :           :  +- * BroadcastHashJoin Inner BuildRight (30)
-            :           :           :           :           :                    :     :                                :           :     :- * Filter (25)
-            :           :           :           :           :                    :     :                                :           :     :  +- * ColumnarToRow (24)
-            :           :           :           :           :                    :     :                                :           :     :     +- Scan parquet default.catalog_sales (23)
-            :           :           :           :           :                    :     :                                :           :     +- BroadcastExchange (29)
-            :           :           :           :           :                    :     :                                :           :        +- * Filter (28)
-            :           :           :           :           :                    :     :                                :           :           +- * ColumnarToRow (27)
-            :           :           :           :           :                    :     :                                :           :              +- Scan parquet default.item (26)
-            :           :           :           :           :                    :     :                                :           +- ReusedExchange (32)
-            :           :           :           :           :                    :     :                                +- BroadcastExchange (46)
-            :           :           :           :           :                    :     :                                   +- * Project (45)
-            :           :           :           :           :                    :     :                                      +- * BroadcastHashJoin Inner BuildRight (44)
-            :           :           :           :           :                    :     :                                         :- * Project (42)
-            :           :           :           :           :                    :     :                                         :  +- * BroadcastHashJoin Inner BuildRight (41)
-            :           :           :           :           :                    :     :                                         :     :- * Filter (39)
-            :           :           :           :           :                    :     :                                         :     :  +- * ColumnarToRow (38)
-            :           :           :           :           :                    :     :                                         :     :     +- Scan parquet default.web_sales (37)
-            :           :           :           :           :                    :     :                                         :     +- ReusedExchange (40)
-            :           :           :           :           :                    :     :                                         +- ReusedExchange (43)
-            :           :           :           :           :                    :     +- BroadcastExchange (63)
-            :           :           :           :           :                    :        +- * BroadcastHashJoin LeftSemi BuildRight (62)
-            :           :           :           :           :                    :           :- * Filter (60)
-            :           :           :           :           :                    :           :  +- * ColumnarToRow (59)
-            :           :           :           :           :                    :           :     +- Scan parquet default.item (58)
-            :           :           :           :           :                    :           +- ReusedExchange (61)
-            :           :           :           :           :                    +- BroadcastExchange (70)
-            :           :           :           :           :                       +- * Project (69)
-            :           :           :           :           :                          +- * Filter (68)
-            :           :           :           :           :                             +- * ColumnarToRow (67)
-            :           :           :           :           :                                +- Scan parquet default.date_dim (66)
-            :           :           :           :           :- * Project (93)
-            :           :           :           :           :  +- * Filter (92)
-            :           :           :           :           :     +- * HashAggregate (91)
-            :           :           :           :           :        +- Exchange (90)
-            :           :           :           :           :           +- * HashAggregate (89)
-            :           :           :           :           :              +- * Project (88)
-            :           :           :           :           :                 +- * BroadcastHashJoin Inner BuildRight (87)
-            :           :           :           :           :                    :- * Project (85)
-            :           :           :           :           :                    :  +- * BroadcastHashJoin Inner BuildRight (84)
-            :           :           :           :           :                    :     :- * BroadcastHashJoin LeftSemi BuildRight (82)
-            :           :           :           :           :                    :     :  :- * Filter (80)
-            :           :           :           :           :                    :     :  :  +- * ColumnarToRow (79)
-            :           :           :           :           :                    :     :  :     +- Scan parquet default.catalog_sales (78)
-            :           :           :           :           :                    :     :  +- ReusedExchange (81)
-            :           :           :           :           :                    :     +- ReusedExchange (83)
-            :           :           :           :           :                    +- ReusedExchange (86)
-            :           :           :           :           +- * Project (109)
-            :           :           :           :              +- * Filter (108)
-            :           :           :           :                 +- * HashAggregate (107)
-            :           :           :           :                    +- Exchange (106)
-            :           :           :           :                       +- * HashAggregate (105)
-            :           :           :           :                          +- * Project (104)
-            :           :           :           :                             +- * BroadcastHashJoin Inner BuildRight (103)
-            :           :           :           :                                :- * Project (101)
-            :           :           :           :                                :  +- * BroadcastHashJoin Inner BuildRight (100)
-            :           :           :           :                                :     :- * BroadcastHashJoin LeftSemi BuildRight (98)
-            :           :           :           :                                :     :  :- * Filter (96)
-            :           :           :           :                                :     :  :  +- * ColumnarToRow (95)
-            :           :           :           :                                :     :  :     +- Scan parquet default.web_sales (94)
-            :           :           :           :                                :     :  +- ReusedExchange (97)
-            :           :           :           :                                :     +- ReusedExchange (99)
-            :           :           :           :                                +- ReusedExchange (102)
-            :           :           :           +- * HashAggregate (132)
-            :           :           :              +- Exchange (131)
-            :           :           :                 +- * HashAggregate (130)
-            :           :           :                    +- * HashAggregate (129)
-            :           :           :                       +- Exchange (128)
-            :           :           :                          +- * HashAggregate (127)
-            :           :           :                             +- Union (126)
-            :           :           :                                :- * Project (117)
-            :           :           :                                :  +- * Filter (116)
-            :           :           :                                :     +- * HashAggregate (115)
-            :           :           :                                :        +- ReusedExchange (114)
-            :           :           :                                :- * Project (121)
-            :           :           :                                :  +- * Filter (120)
-            :           :           :                                :     +- * HashAggregate (119)
-            :           :           :                                :        +- ReusedExchange (118)
-            :           :           :                                +- * Project (125)
-            :           :           :                                   +- * Filter (124)
-            :           :           :                                      +- * HashAggregate (123)
-            :           :           :                                         +- ReusedExchange (122)
-            :           :           +- * HashAggregate (155)
-            :           :              +- Exchange (154)
-            :           :                 +- * HashAggregate (153)
-            :           :                    +- * HashAggregate (152)
-            :           :                       +- Exchange (151)
-            :           :                          +- * HashAggregate (150)
-            :           :                             +- Union (149)
-            :           :                                :- * Project (140)
-            :           :                                :  +- * Filter (139)
-            :           :                                :     +- * HashAggregate (138)
-            :           :                                :        +- ReusedExchange (137)
-            :           :                                :- * Project (144)
-            :           :                                :  +- * Filter (143)
-            :           :                                :     +- * HashAggregate (142)
-            :           :                                :        +- ReusedExchange (141)
-            :           :                                +- * Project (148)
-            :           :                                   +- * Filter (147)
-            :           :                                      +- * HashAggregate (146)
-            :           :                                         +- ReusedExchange (145)
-            :           +- * HashAggregate (178)
-            :              +- Exchange (177)
-            :                 +- * HashAggregate (176)
-            :                    +- * HashAggregate (175)
-            :                       +- Exchange (174)
-            :                          +- * HashAggregate (173)
-            :                             +- Union (172)
-            :                                :- * Project (163)
-            :                                :  +- * Filter (162)
-            :                                :     +- * HashAggregate (161)
-            :                                :        +- ReusedExchange (160)
-            :                                :- * Project (167)
-            :                                :  +- * Filter (166)
-            :                                :     +- * HashAggregate (165)
-            :                                :        +- ReusedExchange (164)
-            :                                +- * Project (171)
-            :                                   +- * Filter (170)
-            :                                      +- * HashAggregate (169)
-            :                                         +- ReusedExchange (168)
-            +- * HashAggregate (201)
-               +- Exchange (200)
-                  +- * HashAggregate (199)
-                     +- * HashAggregate (198)
-                        +- Exchange (197)
-                           +- * HashAggregate (196)
-                              +- Union (195)
-                                 :- * Project (186)
-                                 :  +- * Filter (185)
-                                 :     +- * HashAggregate (184)
-                                 :        +- ReusedExchange (183)
-                                 :- * Project (190)
-                                 :  +- * Filter (189)
-                                 :     +- * HashAggregate (188)
-                                 :        +- ReusedExchange (187)
-                                 +- * Project (194)
-                                    +- * Filter (193)
-                                       +- * HashAggregate (192)
-                                          +- ReusedExchange (191)
+TakeOrderedAndProject (194)
++- * HashAggregate (193)
+   +- Exchange (192)
+      +- * HashAggregate (191)
+         +- Union (190)
+            :- * HashAggregate (113)
+            :  +- Exchange (112)
+            :     +- * HashAggregate (111)
+            :        +- Union (110)
+            :           :- * Project (77)
+            :           :  +- * Filter (76)
+            :           :     +- * HashAggregate (75)
+            :           :        +- Exchange (74)
+            :           :           +- * HashAggregate (73)
+            :           :              +- * Project (72)
+            :           :                 +- * BroadcastHashJoin Inner BuildRight (71)
+            :           :                    :- * Project (65)
+            :           :                    :  +- * BroadcastHashJoin Inner BuildRight (64)
+            :           :                    :     :- * BroadcastHashJoin LeftSemi BuildRight (57)
+            :           :                    :     :  :- * Filter (3)
+            :           :                    :     :  :  +- * ColumnarToRow (2)
+            :           :                    :     :  :     +- Scan parquet default.store_sales (1)
+            :           :                    :     :  +- BroadcastExchange (56)
+            :           :                    :     :     +- * Project (55)
+            :           :                    :     :        +- * BroadcastHashJoin Inner BuildRight (54)
+            :           :                    :     :           :- * Filter (6)
+            :           :                    :     :           :  +- * ColumnarToRow (5)
+            :           :                    :     :           :     +- Scan parquet default.item (4)
+            :           :                    :     :           +- BroadcastExchange (53)
+            :           :                    :     :              +- * HashAggregate (52)
+            :           :                    :     :                 +- * HashAggregate (51)
+            :           :                    :     :                    +- * HashAggregate (50)
+            :           :                    :     :                       +- Exchange (49)
+            :           :                    :     :                          +- * HashAggregate (48)
+            :           :                    :     :                             +- * BroadcastHashJoin LeftSemi BuildRight (47)
+            :           :                    :     :                                :- * BroadcastHashJoin LeftSemi BuildRight (36)
+            :           :                    :     :                                :  :- * Project (22)
+            :           :                    :     :                                :  :  +- * BroadcastHashJoin Inner BuildRight (21)
+            :           :                    :     :                                :  :     :- * Project (15)
+            :           :                    :     :                                :  :     :  +- * BroadcastHashJoin Inner BuildRight (14)
+            :           :                    :     :                                :  :     :     :- * Filter (9)
+            :           :                    :     :                                :  :     :     :  +- * ColumnarToRow (8)
+            :           :                    :     :                                :  :     :     :     +- Scan parquet default.store_sales (7)
+            :           :                    :     :                                :  :     :     +- BroadcastExchange (13)
+            :           :                    :     :                                :  :     :        +- * Filter (12)
+            :           :                    :     :                                :  :     :           +- * ColumnarToRow (11)
+            :           :                    :     :                                :  :     :              +- Scan parquet default.item (10)
+            :           :                    :     :                                :  :     +- BroadcastExchange (20)
+            :           :                    :     :                                :  :        +- * Project (19)
+            :           :                    :     :                                :  :           +- * Filter (18)
+            :           :                    :     :                                :  :              +- * ColumnarToRow (17)
+            :           :                    :     :                                :  :                 +- Scan parquet default.date_dim (16)
+            :           :                    :     :                                :  +- BroadcastExchange (35)
+            :           :                    :     :                                :     +- * Project (34)
+            :           :                    :     :                                :        +- * BroadcastHashJoin Inner BuildRight (33)
+            :           :                    :     :                                :           :- * Project (31)
+            :           :                    :     :                                :           :  +- * BroadcastHashJoin Inner BuildRight (30)
+            :           :                    :     :                                :           :     :- * Filter (25)
+            :           :                    :     :                                :           :     :  +- * ColumnarToRow (24)
+            :           :                    :     :                                :           :     :     +- Scan parquet default.catalog_sales (23)
+            :           :                    :     :                                :           :     +- BroadcastExchange (29)
+            :           :                    :     :                                :           :        +- * Filter (28)
+            :           :                    :     :                                :           :           +- * ColumnarToRow (27)
+            :           :                    :     :                                :           :              +- Scan parquet default.item (26)
+            :           :                    :     :                                :           +- ReusedExchange (32)
+            :           :                    :     :                                +- BroadcastExchange (46)
+            :           :                    :     :                                   +- * Project (45)
+            :           :                    :     :                                      +- * BroadcastHashJoin Inner BuildRight (44)
+            :           :                    :     :                                         :- * Project (42)
+            :           :                    :     :                                         :  +- * BroadcastHashJoin Inner BuildRight (41)
+            :           :                    :     :                                         :     :- * Filter (39)
+            :           :                    :     :                                         :     :  +- * ColumnarToRow (38)
+            :           :                    :     :                                         :     :     +- Scan parquet default.web_sales (37)
+            :           :                    :     :                                         :     +- ReusedExchange (40)
+            :           :                    :     :                                         +- ReusedExchange (43)
+            :           :                    :     +- BroadcastExchange (63)
+            :           :                    :        +- * BroadcastHashJoin LeftSemi BuildRight (62)
+            :           :                    :           :- * Filter (60)
+            :           :                    :           :  +- * ColumnarToRow (59)
+            :           :                    :           :     +- Scan parquet default.item (58)
+            :           :                    :           +- ReusedExchange (61)
+            :           :                    +- BroadcastExchange (70)
+            :           :                       +- * Project (69)
+            :           :                          +- * Filter (68)
+            :           :                             +- * ColumnarToRow (67)
+            :           :                                +- Scan parquet default.date_dim (66)
+            :           :- * Project (93)
+            :           :  +- * Filter (92)
+            :           :     +- * HashAggregate (91)
+            :           :        +- Exchange (90)
+            :           :           +- * HashAggregate (89)
+            :           :              +- * Project (88)
+            :           :                 +- * BroadcastHashJoin Inner BuildRight (87)
+            :           :                    :- * Project (85)
+            :           :                    :  +- * BroadcastHashJoin Inner BuildRight (84)
+            :           :                    :     :- * BroadcastHashJoin LeftSemi BuildRight (82)
+            :           :                    :     :  :- * Filter (80)
+            :           :                    :     :  :  +- * ColumnarToRow (79)
+            :           :                    :     :  :     +- Scan parquet default.catalog_sales (78)
+            :           :                    :     :  +- ReusedExchange (81)
+            :           :                    :     +- ReusedExchange (83)
+            :           :                    +- ReusedExchange (86)
+            :           +- * Project (109)
+            :              +- * Filter (108)
+            :                 +- * HashAggregate (107)
+            :                    +- Exchange (106)
+            :                       +- * HashAggregate (105)
+            :                          +- * Project (104)
+            :                             +- * BroadcastHashJoin Inner BuildRight (103)
+            :                                :- * Project (101)
+            :                                :  +- * BroadcastHashJoin Inner BuildRight (100)
+            :                                :     :- * BroadcastHashJoin LeftSemi BuildRight (98)
+            :                                :     :  :- * Filter (96)
+            :                                :     :  :  +- * ColumnarToRow (95)
+            :                                :     :  :     +- Scan parquet default.web_sales (94)
+            :                                :     :  +- ReusedExchange (97)
+            :                                :     +- ReusedExchange (99)
+            :                                +- ReusedExchange (102)
+            :- * HashAggregate (132)
+            :  +- Exchange (131)
+            :     +- * HashAggregate (130)
+            :        +- * HashAggregate (129)
+            :           +- Exchange (128)
+            :              +- * HashAggregate (127)
+            :                 +- Union (126)
+            :                    :- * Project (117)
+            :                    :  +- * Filter (116)
+            :                    :     +- * HashAggregate (115)
+            :                    :        +- ReusedExchange (114)
+            :                    :- * Project (121)
+            :                    :  +- * Filter (120)
+            :                    :     +- * HashAggregate (119)
+            :                    :        +- ReusedExchange (118)
+            :                    +- * Project (125)
+            :                       +- * Filter (124)
+            :                          +- * HashAggregate (123)
+            :                             +- ReusedExchange (122)
+            :- * HashAggregate (151)
+            :  +- Exchange (150)
+            :     +- * HashAggregate (149)
+            :        +- * HashAggregate (148)
+            :           +- Exchange (147)
+            :              +- * HashAggregate (146)
+            :                 +- Union (145)
+            :                    :- * Project (136)
+            :                    :  +- * Filter (135)
+            :                    :     +- * HashAggregate (134)
+            :                    :        +- ReusedExchange (133)
+            :                    :- * Project (140)
+            :                    :  +- * Filter (139)
+            :                    :     +- * HashAggregate (138)
+            :                    :        +- ReusedExchange (137)
+            :                    +- * Project (144)
+            :                       +- * Filter (143)
+            :                          +- * HashAggregate (142)
+            :                             +- ReusedExchange (141)
+            :- * HashAggregate (170)
+            :  +- Exchange (169)
+            :     +- * HashAggregate (168)
+            :        +- * HashAggregate (167)
+            :           +- Exchange (166)
+            :              +- * HashAggregate (165)
+            :                 +- Union (164)
+            :                    :- * Project (155)
+            :                    :  +- * Filter (154)
+            :                    :     +- * HashAggregate (153)
+            :                    :        +- ReusedExchange (152)
+            :                    :- * Project (159)
+            :                    :  +- * Filter (158)
+            :                    :     +- * HashAggregate (157)
+            :                    :        +- ReusedExchange (156)
+            :                    +- * Project (163)
+            :                       +- * Filter (162)
+            :                          +- * HashAggregate (161)
+            :                             +- ReusedExchange (160)
+            +- * HashAggregate (189)
+               +- Exchange (188)
+                  +- * HashAggregate (187)
+                     +- * HashAggregate (186)
+                        +- Exchange (185)
+                           +- * HashAggregate (184)
+                              +- Union (183)
+                                 :- * Project (174)
+                                 :  +- * Filter (173)
+                                 :     +- * HashAggregate (172)
+                                 :        +- ReusedExchange (171)
+                                 :- * Project (178)
+                                 :  +- * Filter (177)
+                                 :     +- * HashAggregate (176)
+                                 :        +- ReusedExchange (175)
+                                 +- * Project (182)
+                                    +- * Filter (181)
+                                       +- * HashAggregate (180)
+                                          +- ReusedExchange (179)
 
 
 (1) Scan parquet default.store_sales
@@ -425,7 +413,7 @@ Results [3]: [brand_id#13, class_id#14, category_id#15]
 
 (49) Exchange
 Input [3]: [brand_id#13, class_id#14, category_id#15]
-Arguments: hashpartitioning(brand_id#13, class_id#14, category_id#15, 5), true, [id=#23]
+Arguments: hashpartitioning(brand_id#13, class_id#14, category_id#15, 5), ENSURE_REQUIREMENTS, [id=#23]
 
 (50) HashAggregate [codegen id : 10]
 Input [3]: [brand_id#13, class_id#14, category_id#15]
@@ -545,7 +533,7 @@ Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#33, isEmpty#34, c
 
 (74) Exchange
 Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#33, isEmpty#34, count#35]
-Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#36]
+Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), ENSURE_REQUIREMENTS, [id=#36]
 
 (75) HashAggregate [codegen id : 26]
 Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#33, isEmpty#34, count#35]
@@ -617,7 +605,7 @@ Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#50, isEmpty#51, c
 
 (90) Exchange
 Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#50, isEmpty#51, count#52]
-Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#53]
+Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), ENSURE_REQUIREMENTS, [id=#53]
 
 (91) HashAggregate [codegen id : 52]
 Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#50, isEmpty#51, count#52]
@@ -689,7 +677,7 @@ Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#65, isEmpty#66, c
 
 (106) Exchange
 Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#65, isEmpty#66, count#67]
-Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#68]
+Arguments: hashpartitioning(i_brand_id#6, i_class_id#7, i_category_id#8, 5), ENSURE_REQUIREMENTS, [id=#68]
 
 (107) HashAggregate [codegen id : 78]
 Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#65, isEmpty#66, count#67]
@@ -717,7 +705,7 @@ Results [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#78, i
 
 (112) Exchange
 Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#78, isEmpty#79, sum#80]
-Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#81]
+Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), ENSURE_REQUIREMENTS, [id=#81]
 
 (113) HashAggregate [codegen id : 80]
 Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#78, isEmpty#79, sum#80]
@@ -791,7 +779,7 @@ Results [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#109,
 
 (128) Exchange
 Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#109, isEmpty#110, sum#111]
-Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#112]
+Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), ENSURE_REQUIREMENTS, [id=#112]
 
 (129) HashAggregate [codegen id : 160]
 Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#109, isEmpty#110, sum#111]
@@ -809,7 +797,7 @@ Results [6]: [channel#44, i_brand_id#6, i_class_id#7, sum#118, isEmpty#119, sum#
 
 (131) Exchange
 Input [6]: [channel#44, i_brand_id#6, i_class_id#7, sum#118, isEmpty#119, sum#120]
-Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, 5), true, [id=#121]
+Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, 5), ENSURE_REQUIREMENTS, [id=#121]
 
 (132) HashAggregate [codegen id : 161]
 Input [6]: [channel#44, i_brand_id#6, i_class_id#7, sum#118, isEmpty#119, sum#120]
@@ -818,536 +806,476 @@ Functions [2]: [sum(sum_sales#84), sum(number_sales#85)]
 Aggregate Attributes [2]: [sum(sum_sales#84)#122, sum(number_sales#85)#123]
 Results [6]: [channel#44, i_brand_id#6, i_class_id#7, null AS i_category_id#124, sum(sum_sales#84)#122 AS sum(sum_sales)#125, sum(number_sales#85)#123 AS sum(number_sales)#126]
 
-(133) Union
+(133) ReusedExchange [Reuses operator id: 74]
+Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#127, isEmpty#128, count#129]
 
-(134) HashAggregate [codegen id : 162]
-Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Functions: []
-Aggregate Attributes: []
-Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-
-(135) Exchange
-Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#127]
-
-(136) HashAggregate [codegen id : 163]
-Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Functions: []
-Aggregate Attributes: []
-Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-
-(137) ReusedExchange [Reuses operator id: 74]
-Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#128, isEmpty#129, count#130]
-
-(138) HashAggregate [codegen id : 189]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#128, isEmpty#129, count#130]
+(134) HashAggregate [codegen id : 187]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#127, isEmpty#128, count#129]
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#131, count(1)#132]
-Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#131 AS sales#39, count(1)#132 AS number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#131 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#133]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#130, count(1)#131]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#130 AS sales#39, count(1)#131 AS number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#130 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#132]
 
-(139) Filter [codegen id : 189]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#133]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#133) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#133 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
+(135) Filter [codegen id : 187]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#132]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#132) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#132 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
 
-(140) Project [codegen id : 189]
+(136) Project [codegen id : 187]
 Output [6]: [store AS channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#133]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#132]
 
-(141) ReusedExchange [Reuses operator id: 90]
-Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#134, isEmpty#135, count#136]
+(137) ReusedExchange [Reuses operator id: 90]
+Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#133, isEmpty#134, count#135]
 
-(142) HashAggregate [codegen id : 215]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#134, isEmpty#135, count#136]
+(138) HashAggregate [codegen id : 213]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#133, isEmpty#134, count#135]
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#137, count(1)#138]
-Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#137 AS sales#56, count(1)#138 AS number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#137 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#139]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#136, count(1)#137]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#136 AS sales#56, count(1)#137 AS number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#136 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#138]
 
-(143) Filter [codegen id : 215]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#139]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#139) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#139 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
+(139) Filter [codegen id : 213]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#138]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#138) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#138 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
 
-(144) Project [codegen id : 215]
-Output [6]: [catalog AS channel#140, i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#139]
+(140) Project [codegen id : 213]
+Output [6]: [catalog AS channel#139, i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#138]
 
-(145) ReusedExchange [Reuses operator id: 106]
-Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#141, isEmpty#142, count#143]
+(141) ReusedExchange [Reuses operator id: 106]
+Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#140, isEmpty#141, count#142]
 
-(146) HashAggregate [codegen id : 241]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#141, isEmpty#142, count#143]
+(142) HashAggregate [codegen id : 239]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#140, isEmpty#141, count#142]
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#144, count(1)#145]
-Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#144 AS sales#71, count(1)#145 AS number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#144 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#146]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#143, count(1)#144]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#143 AS sales#71, count(1)#144 AS number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#143 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#145]
 
-(147) Filter [codegen id : 241]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#146]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#146) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#146 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
+(143) Filter [codegen id : 239]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#145]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#145) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#145 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
 
-(148) Project [codegen id : 241]
-Output [6]: [web AS channel#147, i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#146]
+(144) Project [codegen id : 239]
+Output [6]: [web AS channel#146, i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#145]
 
-(149) Union
+(145) Union
 
-(150) HashAggregate [codegen id : 242]
+(146) HashAggregate [codegen id : 240]
 Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40]
 Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [partial_sum(sales#39), partial_sum(number_sales#40)]
-Aggregate Attributes [3]: [sum#148, isEmpty#149, sum#150]
-Results [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#151, isEmpty#152, sum#153]
+Aggregate Attributes [3]: [sum#147, isEmpty#148, sum#149]
+Results [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#150, isEmpty#151, sum#152]
 
-(151) Exchange
-Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#151, isEmpty#152, sum#153]
-Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#154]
+(147) Exchange
+Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#150, isEmpty#151, sum#152]
+Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), ENSURE_REQUIREMENTS, [id=#153]
 
-(152) HashAggregate [codegen id : 243]
-Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#151, isEmpty#152, sum#153]
+(148) HashAggregate [codegen id : 241]
+Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#150, isEmpty#151, sum#152]
 Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(sales#39), sum(number_sales#40)]
-Aggregate Attributes [2]: [sum(sales#39)#155, sum(number_sales#40)#156]
-Results [4]: [channel#44, i_brand_id#6, sum(sales#39)#155 AS sum_sales#84, sum(number_sales#40)#156 AS number_sales#85]
+Aggregate Attributes [2]: [sum(sales#39)#154, sum(number_sales#40)#155]
+Results [4]: [channel#44, i_brand_id#6, sum(sales#39)#154 AS sum_sales#84, sum(number_sales#40)#155 AS number_sales#85]
 
-(153) HashAggregate [codegen id : 243]
+(149) HashAggregate [codegen id : 241]
 Input [4]: [channel#44, i_brand_id#6, sum_sales#84, number_sales#85]
 Keys [2]: [channel#44, i_brand_id#6]
 Functions [2]: [partial_sum(sum_sales#84), partial_sum(number_sales#85)]
-Aggregate Attributes [3]: [sum#157, isEmpty#158, sum#159]
-Results [5]: [channel#44, i_brand_id#6, sum#160, isEmpty#161, sum#162]
+Aggregate Attributes [3]: [sum#156, isEmpty#157, sum#158]
+Results [5]: [channel#44, i_brand_id#6, sum#159, isEmpty#160, sum#161]
 
-(154) Exchange
-Input [5]: [channel#44, i_brand_id#6, sum#160, isEmpty#161, sum#162]
-Arguments: hashpartitioning(channel#44, i_brand_id#6, 5), true, [id=#163]
+(150) Exchange
+Input [5]: [channel#44, i_brand_id#6, sum#159, isEmpty#160, sum#161]
+Arguments: hashpartitioning(channel#44, i_brand_id#6, 5), ENSURE_REQUIREMENTS, [id=#162]
 
-(155) HashAggregate [codegen id : 244]
-Input [5]: [channel#44, i_brand_id#6, sum#160, isEmpty#161, sum#162]
+(151) HashAggregate [codegen id : 242]
+Input [5]: [channel#44, i_brand_id#6, sum#159, isEmpty#160, sum#161]
 Keys [2]: [channel#44, i_brand_id#6]
 Functions [2]: [sum(sum_sales#84), sum(number_sales#85)]
-Aggregate Attributes [2]: [sum(sum_sales#84)#164, sum(number_sales#85)#165]
-Results [6]: [channel#44, i_brand_id#6, null AS i_class_id#166, null AS i_category_id#167, sum(sum_sales#84)#164 AS sum(sum_sales)#168, sum(number_sales#85)#165 AS sum(number_sales)#169]
-
-(156) Union
-
-(157) HashAggregate [codegen id : 245]
-Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Functions: []
-Aggregate Attributes: []
-Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+Aggregate Attributes [2]: [sum(sum_sales#84)#163, sum(number_sales#85)#164]
+Results [6]: [channel#44, i_brand_id#6, null AS i_class_id#165, null AS i_category_id#166, sum(sum_sales#84)#163 AS sum(sum_sales)#167, sum(number_sales#85)#164 AS sum(number_sales)#168]
 
-(158) Exchange
-Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#170]
+(152) ReusedExchange [Reuses operator id: 74]
+Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#169, isEmpty#170, count#171]
 
-(159) HashAggregate [codegen id : 246]
-Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Functions: []
-Aggregate Attributes: []
-Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-
-(160) ReusedExchange [Reuses operator id: 74]
-Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#171, isEmpty#172, count#173]
-
-(161) HashAggregate [codegen id : 272]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#171, isEmpty#172, count#173]
+(153) HashAggregate [codegen id : 268]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#169, isEmpty#170, count#171]
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#174, count(1)#175]
-Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#174 AS sales#39, count(1)#175 AS number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#174 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#176]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#172, count(1)#173]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#172 AS sales#39, count(1)#173 AS number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#172 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#174]
 
-(162) Filter [codegen id : 272]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#176]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#176) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#176 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
+(154) Filter [codegen id : 268]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#174]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#174) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#174 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
 
-(163) Project [codegen id : 272]
+(155) Project [codegen id : 268]
 Output [6]: [store AS channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#176]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#174]
 
-(164) ReusedExchange [Reuses operator id: 90]
-Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#177, isEmpty#178, count#179]
+(156) ReusedExchange [Reuses operator id: 90]
+Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#175, isEmpty#176, count#177]
 
-(165) HashAggregate [codegen id : 298]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#177, isEmpty#178, count#179]
+(157) HashAggregate [codegen id : 294]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#175, isEmpty#176, count#177]
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#180, count(1)#181]
-Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#180 AS sales#56, count(1)#181 AS number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#180 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#182]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#178, count(1)#179]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#178 AS sales#56, count(1)#179 AS number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#178 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#180]
 
-(166) Filter [codegen id : 298]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#182]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#182) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#182 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
+(158) Filter [codegen id : 294]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#180]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#180) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#180 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
 
-(167) Project [codegen id : 298]
-Output [6]: [catalog AS channel#183, i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#182]
+(159) Project [codegen id : 294]
+Output [6]: [catalog AS channel#181, i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#180]
 
-(168) ReusedExchange [Reuses operator id: 106]
-Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#184, isEmpty#185, count#186]
+(160) ReusedExchange [Reuses operator id: 106]
+Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#182, isEmpty#183, count#184]
 
-(169) HashAggregate [codegen id : 324]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#184, isEmpty#185, count#186]
+(161) HashAggregate [codegen id : 320]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#182, isEmpty#183, count#184]
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#187, count(1)#188]
-Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#187 AS sales#71, count(1)#188 AS number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#187 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#189]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#185, count(1)#186]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#185 AS sales#71, count(1)#186 AS number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#185 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#187]
 
-(170) Filter [codegen id : 324]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#189]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#189) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#189 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
+(162) Filter [codegen id : 320]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#187]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#187) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#187 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
 
-(171) Project [codegen id : 324]
-Output [6]: [web AS channel#190, i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#189]
+(163) Project [codegen id : 320]
+Output [6]: [web AS channel#188, i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#187]
 
-(172) Union
+(164) Union
 
-(173) HashAggregate [codegen id : 325]
+(165) HashAggregate [codegen id : 321]
 Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40]
 Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [partial_sum(sales#39), partial_sum(number_sales#40)]
-Aggregate Attributes [3]: [sum#191, isEmpty#192, sum#193]
-Results [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#194, isEmpty#195, sum#196]
+Aggregate Attributes [3]: [sum#189, isEmpty#190, sum#191]
+Results [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#192, isEmpty#193, sum#194]
 
-(174) Exchange
-Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#194, isEmpty#195, sum#196]
-Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#197]
+(166) Exchange
+Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#192, isEmpty#193, sum#194]
+Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), ENSURE_REQUIREMENTS, [id=#195]
 
-(175) HashAggregate [codegen id : 326]
-Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#194, isEmpty#195, sum#196]
+(167) HashAggregate [codegen id : 322]
+Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#192, isEmpty#193, sum#194]
 Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(sales#39), sum(number_sales#40)]
-Aggregate Attributes [2]: [sum(sales#39)#198, sum(number_sales#40)#199]
-Results [3]: [channel#44, sum(sales#39)#198 AS sum_sales#84, sum(number_sales#40)#199 AS number_sales#85]
+Aggregate Attributes [2]: [sum(sales#39)#196, sum(number_sales#40)#197]
+Results [3]: [channel#44, sum(sales#39)#196 AS sum_sales#84, sum(number_sales#40)#197 AS number_sales#85]
 
-(176) HashAggregate [codegen id : 326]
+(168) HashAggregate [codegen id : 322]
 Input [3]: [channel#44, sum_sales#84, number_sales#85]
 Keys [1]: [channel#44]
 Functions [2]: [partial_sum(sum_sales#84), partial_sum(number_sales#85)]
-Aggregate Attributes [3]: [sum#200, isEmpty#201, sum#202]
-Results [4]: [channel#44, sum#203, isEmpty#204, sum#205]
+Aggregate Attributes [3]: [sum#198, isEmpty#199, sum#200]
+Results [4]: [channel#44, sum#201, isEmpty#202, sum#203]
 
-(177) Exchange
-Input [4]: [channel#44, sum#203, isEmpty#204, sum#205]
-Arguments: hashpartitioning(channel#44, 5), true, [id=#206]
+(169) Exchange
+Input [4]: [channel#44, sum#201, isEmpty#202, sum#203]
+Arguments: hashpartitioning(channel#44, 5), ENSURE_REQUIREMENTS, [id=#204]
 
-(178) HashAggregate [codegen id : 327]
-Input [4]: [channel#44, sum#203, isEmpty#204, sum#205]
+(170) HashAggregate [codegen id : 323]
+Input [4]: [channel#44, sum#201, isEmpty#202, sum#203]
 Keys [1]: [channel#44]
 Functions [2]: [sum(sum_sales#84), sum(number_sales#85)]
-Aggregate Attributes [2]: [sum(sum_sales#84)#207, sum(number_sales#85)#208]
-Results [6]: [channel#44, null AS i_brand_id#209, null AS i_class_id#210, null AS i_category_id#211, sum(sum_sales#84)#207 AS sum(sum_sales)#212, sum(number_sales#85)#208 AS sum(number_sales)#213]
+Aggregate Attributes [2]: [sum(sum_sales#84)#205, sum(number_sales#85)#206]
+Results [6]: [channel#44, null AS i_brand_id#207, null AS i_class_id#208, null AS i_category_id#209, sum(sum_sales#84)#205 AS sum(sum_sales)#210, sum(number_sales#85)#206 AS sum(number_sales)#211]
 
-(179) Union
-
-(180) HashAggregate [codegen id : 328]
-Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Functions: []
-Aggregate Attributes: []
-Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
+(171) ReusedExchange [Reuses operator id: 74]
+Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#212, isEmpty#213, count#214]
 
-(181) Exchange
-Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#214]
-
-(182) HashAggregate [codegen id : 329]
-Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Functions: []
-Aggregate Attributes: []
-Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-
-(183) ReusedExchange [Reuses operator id: 74]
-Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#215, isEmpty#216, count#217]
-
-(184) HashAggregate [codegen id : 355]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#215, isEmpty#216, count#217]
+(172) HashAggregate [codegen id : 349]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#212, isEmpty#213, count#214]
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#218, count(1)#219]
-Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#218 AS sales#39, count(1)#219 AS number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#218 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#220]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#215, count(1)#216]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#215 AS sales#39, count(1)#216 AS number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#215 AS sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#217]
 
-(185) Filter [codegen id : 355]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#220]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#220) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#220 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
+(173) Filter [codegen id : 349]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#217]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#217) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#217 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
 
-(186) Project [codegen id : 355]
+(174) Project [codegen id : 349]
 Output [6]: [store AS channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#220]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40, sum(CheckOverflow((promote_precision(cast(cast(ss_quantity#3 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price#4 as decimal(12,2)))), DecimalType(18,2), true))#217]
 
-(187) ReusedExchange [Reuses operator id: 90]
-Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#221, isEmpty#222, count#223]
+(175) ReusedExchange [Reuses operator id: 90]
+Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#218, isEmpty#219, count#220]
 
-(188) HashAggregate [codegen id : 381]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#221, isEmpty#222, count#223]
+(176) HashAggregate [codegen id : 375]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#218, isEmpty#219, count#220]
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#224, count(1)#225]
-Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#224 AS sales#56, count(1)#225 AS number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#224 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#226]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#221, count(1)#222]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#221 AS sales#56, count(1)#222 AS number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#221 AS sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#223]
 
-(189) Filter [codegen id : 381]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#226]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#226) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#226 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
+(177) Filter [codegen id : 375]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#223]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#223) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#223 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
 
-(190) Project [codegen id : 381]
-Output [6]: [catalog AS channel#227, i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#226]
+(178) Project [codegen id : 375]
+Output [6]: [catalog AS channel#224, i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#56, number_sales#57, sum(CheckOverflow((promote_precision(cast(cast(cs_quantity#45 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price#46 as decimal(12,2)))), DecimalType(18,2), true))#223]
 
-(191) ReusedExchange [Reuses operator id: 106]
-Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#228, isEmpty#229, count#230]
+(179) ReusedExchange [Reuses operator id: 106]
+Output [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#225, isEmpty#226, count#227]
 
-(192) HashAggregate [codegen id : 407]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#228, isEmpty#229, count#230]
+(180) HashAggregate [codegen id : 401]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum#225, isEmpty#226, count#227]
 Keys [3]: [i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true)), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#231, count(1)#232]
-Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#231 AS sales#71, count(1)#232 AS number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#231 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#233]
+Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#228, count(1)#229]
+Results [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#228 AS sales#71, count(1)#229 AS number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#228 AS sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#230]
 
-(193) Filter [codegen id : 407]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#233]
-Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#233) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#233 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
+(181) Filter [codegen id : 401]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#230]
+Condition : (isnotnull(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#230) AND (cast(sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#230 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#42, [id=#43] as decimal(32,6))))
 
-(194) Project [codegen id : 407]
-Output [6]: [web AS channel#234, i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72]
-Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#233]
+(182) Project [codegen id : 401]
+Output [6]: [web AS channel#231, i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72]
+Input [6]: [i_brand_id#6, i_class_id#7, i_category_id#8, sales#71, number_sales#72, sum(CheckOverflow((promote_precision(cast(cast(ws_quantity#60 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price#61 as decimal(12,2)))), DecimalType(18,2), true))#230]
 
-(195) Union
+(183) Union
 
-(196) HashAggregate [codegen id : 408]
+(184) HashAggregate [codegen id : 402]
 Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sales#39, number_sales#40]
 Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [partial_sum(sales#39), partial_sum(number_sales#40)]
-Aggregate Attributes [3]: [sum#235, isEmpty#236, sum#237]
-Results [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#238, isEmpty#239, sum#240]
+Aggregate Attributes [3]: [sum#232, isEmpty#233, sum#234]
+Results [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#235, isEmpty#236, sum#237]
 
-(197) Exchange
-Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#238, isEmpty#239, sum#240]
-Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), true, [id=#241]
+(185) Exchange
+Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#235, isEmpty#236, sum#237]
+Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, 5), ENSURE_REQUIREMENTS, [id=#238]
 
-(198) HashAggregate [codegen id : 409]
-Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#238, isEmpty#239, sum#240]
+(186) HashAggregate [codegen id : 403]
+Input [7]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum#235, isEmpty#236, sum#237]
 Keys [4]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8]
 Functions [2]: [sum(sales#39), sum(number_sales#40)]
-Aggregate Attributes [2]: [sum(sales#39)#242, sum(number_sales#40)#243]
-Results [2]: [sum(sales#39)#242 AS sum_sales#84, sum(number_sales#40)#243 AS number_sales#85]
+Aggregate Attributes [2]: [sum(sales#39)#239, sum(number_sales#40)#240]
+Results [2]: [sum(sales#39)#239 AS sum_sales#84, sum(number_sales#40)#240 AS number_sales#85]
 
-(199) HashAggregate [codegen id : 409]
+(187) HashAggregate [codegen id : 403]
 Input [2]: [sum_sales#84, number_sales#85]
 Keys: []
 Functions [2]: [partial_sum(sum_sales#84), partial_sum(number_sales#85)]
-Aggregate Attributes [3]: [sum#244, isEmpty#245, sum#246]
-Results [3]: [sum#247, isEmpty#248, sum#249]
+Aggregate Attributes [3]: [sum#241, isEmpty#242, sum#243]
+Results [3]: [sum#244, isEmpty#245, sum#246]
 
-(200) Exchange
-Input [3]: [sum#247, isEmpty#248, sum#249]
-Arguments: SinglePartition, true, [id=#250]
+(188) Exchange
+Input [3]: [sum#244, isEmpty#245, sum#246]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#247]
 
-(201) HashAggregate [codegen id : 410]
-Input [3]: [sum#247, isEmpty#248, sum#249]
+(189) HashAggregate [codegen id : 404]
+Input [3]: [sum#244, isEmpty#245, sum#246]
 Keys: []
 Functions [2]: [sum(sum_sales#84), sum(number_sales#85)]
-Aggregate Attributes [2]: [sum(sum_sales#84)#251, sum(number_sales#85)#252]
-Results [6]: [null AS channel#253, null AS i_brand_id#254, null AS i_class_id#255, null AS i_category_id#256, sum(sum_sales#84)#251 AS sum(sum_sales)#257, sum(number_sales#85)#252 AS sum(number_sales)#258]
+Aggregate Attributes [2]: [sum(sum_sales#84)#248, sum(number_sales#85)#249]
+Results [6]: [null AS channel#250, null AS i_brand_id#251, null AS i_class_id#252, null AS i_category_id#253, sum(sum_sales#84)#248 AS sum(sum_sales)#254, sum(number_sales#85)#249 AS sum(number_sales)#255]
 
-(202) Union
+(190) Union
 
-(203) HashAggregate [codegen id : 411]
+(191) HashAggregate [codegen id : 405]
 Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
 Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
 Functions: []
 Aggregate Attributes: []
 Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
 
-(204) Exchange
+(192) Exchange
 Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
-Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), true, [id=#259]
+Arguments: hashpartitioning(channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85, 5), ENSURE_REQUIREMENTS, [id=#256]
 
-(205) HashAggregate [codegen id : 412]
+(193) HashAggregate [codegen id : 406]
 Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
 Keys [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
 Functions: []
 Aggregate Attributes: []
 Results [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
 
-(206) TakeOrderedAndProject
+(194) TakeOrderedAndProject
 Input [6]: [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
 Arguments: 100, [channel#44 ASC NULLS FIRST, i_brand_id#6 ASC NULLS FIRST, i_class_id#7 ASC NULLS FIRST, i_category_id#8 ASC NULLS FIRST], [channel#44, i_brand_id#6, i_class_id#7, i_category_id#8, sum_sales#84, number_sales#85]
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 76 Hosting Expression = Subquery scalar-subquery#42, [id=#43]
-* HashAggregate (236)
-+- Exchange (235)
-   +- * HashAggregate (234)
-      +- Union (233)
-         :- * Project (216)
-         :  +- * BroadcastHashJoin Inner BuildRight (215)
-         :     :- * Filter (209)
-         :     :  +- * ColumnarToRow (208)
-         :     :     +- Scan parquet default.store_sales (207)
-         :     +- BroadcastExchange (214)
-         :        +- * Project (213)
-         :           +- * Filter (212)
-         :              +- * ColumnarToRow (211)
-         :                 +- Scan parquet default.date_dim (210)
-         :- * Project (226)
-         :  +- * BroadcastHashJoin Inner BuildRight (225)
-         :     :- * Filter (219)
-         :     :  +- * ColumnarToRow (218)
-         :     :     +- Scan parquet default.catalog_sales (217)
-         :     +- BroadcastExchange (224)
-         :        +- * Project (223)
-         :           +- * Filter (222)
-         :              +- * ColumnarToRow (221)
-         :                 +- Scan parquet default.date_dim (220)
-         +- * Project (232)
-            +- * BroadcastHashJoin Inner BuildRight (231)
-               :- * Filter (229)
-               :  +- * ColumnarToRow (228)
-               :     +- Scan parquet default.web_sales (227)
-               +- ReusedExchange (230)
-
-
-(207) Scan parquet default.store_sales
+* HashAggregate (224)
++- Exchange (223)
+   +- * HashAggregate (222)
+      +- Union (221)
+         :- * Project (204)
+         :  +- * BroadcastHashJoin Inner BuildRight (203)
+         :     :- * Filter (197)
+         :     :  +- * ColumnarToRow (196)
+         :     :     +- Scan parquet default.store_sales (195)
+         :     +- BroadcastExchange (202)
+         :        +- * Project (201)
+         :           +- * Filter (200)
+         :              +- * ColumnarToRow (199)
+         :                 +- Scan parquet default.date_dim (198)
+         :- * Project (214)
+         :  +- * BroadcastHashJoin Inner BuildRight (213)
+         :     :- * Filter (207)
+         :     :  +- * ColumnarToRow (206)
+         :     :     +- Scan parquet default.catalog_sales (205)
+         :     +- BroadcastExchange (212)
+         :        +- * Project (211)
+         :           +- * Filter (210)
+         :              +- * ColumnarToRow (209)
+         :                 +- Scan parquet default.date_dim (208)
+         +- * Project (220)
+            +- * BroadcastHashJoin Inner BuildRight (219)
+               :- * Filter (217)
+               :  +- * ColumnarToRow (216)
+               :     +- Scan parquet default.web_sales (215)
+               +- ReusedExchange (218)
+
+
+(195) Scan parquet default.store_sales
 Output [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
 PushedFilters: [IsNotNull(ss_sold_date_sk)]
 ReadSchema: struct<ss_sold_date_sk:int,ss_quantity:int,ss_list_price:decimal(7,2)>
 
-(208) ColumnarToRow [codegen id : 2]
+(196) ColumnarToRow [codegen id : 2]
 Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4]
 
-(209) Filter [codegen id : 2]
+(197) Filter [codegen id : 2]
 Input [3]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4]
 Condition : isnotnull(ss_sold_date_sk#1)
 
-(210) Scan parquet default.date_dim
+(198) Scan parquet default.date_dim
 Output [2]: [d_date_sk#10, d_year#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(211) ColumnarToRow [codegen id : 1]
+(199) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#10, d_year#11]
 
-(212) Filter [codegen id : 1]
+(200) Filter [codegen id : 1]
 Input [2]: [d_date_sk#10, d_year#11]
 Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1999)) AND (d_year#11 <= 2001)) AND isnotnull(d_date_sk#10))
 
-(213) Project [codegen id : 1]
+(201) Project [codegen id : 1]
 Output [1]: [d_date_sk#10]
 Input [2]: [d_date_sk#10, d_year#11]
 
-(214) BroadcastExchange
+(202) BroadcastExchange
 Input [1]: [d_date_sk#10]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#260]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#257]
 
-(215) BroadcastHashJoin [codegen id : 2]
+(203) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#1]
 Right keys [1]: [d_date_sk#10]
 Join condition: None
 
-(216) Project [codegen id : 2]
-Output [2]: [ss_quantity#3 AS quantity#261, ss_list_price#4 AS list_price#262]
+(204) Project [codegen id : 2]
+Output [2]: [ss_quantity#3 AS quantity#258, ss_list_price#4 AS list_price#259]
 Input [4]: [ss_sold_date_sk#1, ss_quantity#3, ss_list_price#4, d_date_sk#10]
 
-(217) Scan parquet default.catalog_sales
+(205) Scan parquet default.catalog_sales
 Output [3]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_sales]
 PushedFilters: [IsNotNull(cs_sold_date_sk)]
 ReadSchema: struct<cs_sold_date_sk:int,cs_quantity:int,cs_list_price:decimal(7,2)>
 
-(218) ColumnarToRow [codegen id : 4]
+(206) ColumnarToRow [codegen id : 4]
 Input [3]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46]
 
-(219) Filter [codegen id : 4]
+(207) Filter [codegen id : 4]
 Input [3]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46]
 Condition : isnotnull(cs_sold_date_sk#16)
 
-(220) Scan parquet default.date_dim
+(208) Scan parquet default.date_dim
 Output [2]: [d_date_sk#10, d_year#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1998), LessThanOrEqual(d_year,2000), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(221) ColumnarToRow [codegen id : 3]
+(209) ColumnarToRow [codegen id : 3]
 Input [2]: [d_date_sk#10, d_year#11]
 
-(222) Filter [codegen id : 3]
+(210) Filter [codegen id : 3]
 Input [2]: [d_date_sk#10, d_year#11]
 Condition : (((isnotnull(d_year#11) AND (d_year#11 >= 1998)) AND (d_year#11 <= 2000)) AND isnotnull(d_date_sk#10))
 
-(223) Project [codegen id : 3]
+(211) Project [codegen id : 3]
 Output [1]: [d_date_sk#10]
 Input [2]: [d_date_sk#10, d_year#11]
 
-(224) BroadcastExchange
+(212) BroadcastExchange
 Input [1]: [d_date_sk#10]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#263]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#260]
 
-(225) BroadcastHashJoin [codegen id : 4]
+(213) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_sold_date_sk#16]
 Right keys [1]: [d_date_sk#10]
 Join condition: None
 
-(226) Project [codegen id : 4]
-Output [2]: [cs_quantity#45 AS quantity#264, cs_list_price#46 AS list_price#265]
+(214) Project [codegen id : 4]
+Output [2]: [cs_quantity#45 AS quantity#261, cs_list_price#46 AS list_price#262]
 Input [4]: [cs_sold_date_sk#16, cs_quantity#45, cs_list_price#46, d_date_sk#10]
 
-(227) Scan parquet default.web_sales
+(215) Scan parquet default.web_sales
 Output [3]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
 PushedFilters: [IsNotNull(ws_sold_date_sk)]
 ReadSchema: struct<ws_sold_date_sk:int,ws_quantity:int,ws_list_price:decimal(7,2)>
 
-(228) ColumnarToRow [codegen id : 6]
+(216) ColumnarToRow [codegen id : 6]
 Input [3]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61]
 
-(229) Filter [codegen id : 6]
+(217) Filter [codegen id : 6]
 Input [3]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61]
 Condition : isnotnull(ws_sold_date_sk#20)
 
-(230) ReusedExchange [Reuses operator id: 224]
+(218) ReusedExchange [Reuses operator id: 212]
 Output [1]: [d_date_sk#10]
 
-(231) BroadcastHashJoin [codegen id : 6]
+(219) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ws_sold_date_sk#20]
 Right keys [1]: [d_date_sk#10]
 Join condition: None
 
-(232) Project [codegen id : 6]
-Output [2]: [ws_quantity#60 AS quantity#266, ws_list_price#61 AS list_price#267]
+(220) Project [codegen id : 6]
+Output [2]: [ws_quantity#60 AS quantity#263, ws_list_price#61 AS list_price#264]
 Input [4]: [ws_sold_date_sk#20, ws_quantity#60, ws_list_price#61, d_date_sk#10]
 
-(233) Union
+(221) Union
 
-(234) HashAggregate [codegen id : 7]
-Input [2]: [quantity#261, list_price#262]
+(222) HashAggregate [codegen id : 7]
+Input [2]: [quantity#258, list_price#259]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#261 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#262 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [2]: [sum#268, count#269]
-Results [2]: [sum#270, count#271]
+Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(cast(quantity#258 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#259 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [2]: [sum#265, count#266]
+Results [2]: [sum#267, count#268]
 
-(235) Exchange
-Input [2]: [sum#270, count#271]
-Arguments: SinglePartition, true, [id=#272]
+(223) Exchange
+Input [2]: [sum#267, count#268]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#269]
 
-(236) HashAggregate [codegen id : 8]
-Input [2]: [sum#270, count#271]
+(224) HashAggregate [codegen id : 8]
+Input [2]: [sum#267, count#268]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#261 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#262 as decimal(12,2)))), DecimalType(18,2), true))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#261 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#262 as decimal(12,2)))), DecimalType(18,2), true))#273]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#261 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#262 as decimal(12,2)))), DecimalType(18,2), true))#273 AS average_sales#274]
+Functions [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#258 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#259 as decimal(12,2)))), DecimalType(18,2), true))]
+Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#258 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#259 as decimal(12,2)))), DecimalType(18,2), true))#270]
+Results [1]: [avg(CheckOverflow((promote_precision(cast(cast(quantity#258 as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price#259 as decimal(12,2)))), DecimalType(18,2), true))#270 AS average_sales#271]
 
 Subquery:2 Hosting operator id = 92 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
 
@@ -1359,22 +1287,22 @@ Subquery:5 Hosting operator id = 120 Hosting Expression = ReusedSubquery Subquer
 
 Subquery:6 Hosting operator id = 124 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
 
-Subquery:7 Hosting operator id = 139 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
+Subquery:7 Hosting operator id = 135 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
 
-Subquery:8 Hosting operator id = 143 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
+Subquery:8 Hosting operator id = 139 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
 
-Subquery:9 Hosting operator id = 147 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
+Subquery:9 Hosting operator id = 143 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
 
-Subquery:10 Hosting operator id = 162 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
+Subquery:10 Hosting operator id = 154 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
 
-Subquery:11 Hosting operator id = 166 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
+Subquery:11 Hosting operator id = 158 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
 
-Subquery:12 Hosting operator id = 170 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
+Subquery:12 Hosting operator id = 162 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
 
-Subquery:13 Hosting operator id = 185 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
+Subquery:13 Hosting operator id = 173 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
 
-Subquery:14 Hosting operator id = 189 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
+Subquery:14 Hosting operator id = 177 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
 
-Subquery:15 Hosting operator id = 193 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
+Subquery:15 Hosting operator id = 181 Hosting Expression = ReusedSubquery Subquery scalar-subquery#42, [id=#43]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt
index e96f1d6fed14f..18484308fecaf 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt
@@ -1,387 +1,363 @@
 TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales]
-  WholeStageCodegen (412)
+  WholeStageCodegen (406)
     HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales]
       InputAdapter
         Exchange [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] #1
-          WholeStageCodegen (411)
+          WholeStageCodegen (405)
             HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales]
               InputAdapter
                 Union
-                  WholeStageCodegen (329)
-                    HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales]
+                  WholeStageCodegen (80)
+                    HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
                       InputAdapter
-                        Exchange [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] #2
-                          WholeStageCodegen (328)
-                            HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales]
+                        Exchange [channel,i_brand_id,i_class_id,i_category_id] #2
+                          WholeStageCodegen (79)
+                            HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
                               InputAdapter
                                 Union
-                                  WholeStageCodegen (246)
-                                    HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales]
-                                      InputAdapter
-                                        Exchange [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] #3
-                                          WholeStageCodegen (245)
-                                            HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales]
+                                  WholeStageCodegen (26)
+                                    Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                      Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                        Subquery #1
+                                          WholeStageCodegen (8)
+                                            HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
                                               InputAdapter
-                                                Union
-                                                  WholeStageCodegen (163)
-                                                    HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales]
+                                                Exchange #14
+                                                  WholeStageCodegen (7)
+                                                    HashAggregate [quantity,list_price] [sum,count,sum,count]
                                                       InputAdapter
-                                                        Exchange [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] #4
-                                                          WholeStageCodegen (162)
-                                                            HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales]
-                                                              InputAdapter
-                                                                Union
-                                                                  WholeStageCodegen (80)
-                                                                    HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
+                                                        Union
+                                                          WholeStageCodegen (2)
+                                                            Project [ss_quantity,ss_list_price]
+                                                              BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                Filter [ss_sold_date_sk]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet default.store_sales [ss_sold_date_sk,ss_quantity,ss_list_price]
+                                                                InputAdapter
+                                                                  BroadcastExchange #15
+                                                                    WholeStageCodegen (1)
+                                                                      Project [d_date_sk]
+                                                                        Filter [d_year,d_date_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet default.date_dim [d_date_sk,d_year]
+                                                          WholeStageCodegen (4)
+                                                            Project [cs_quantity,cs_list_price]
+                                                              BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                Filter [cs_sold_date_sk]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet default.catalog_sales [cs_sold_date_sk,cs_quantity,cs_list_price]
+                                                                InputAdapter
+                                                                  BroadcastExchange #16
+                                                                    WholeStageCodegen (3)
+                                                                      Project [d_date_sk]
+                                                                        Filter [d_year,d_date_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet default.date_dim [d_date_sk,d_year]
+                                                          WholeStageCodegen (6)
+                                                            Project [ws_quantity,ws_list_price]
+                                                              BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                                Filter [ws_sold_date_sk]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price]
+                                                                InputAdapter
+                                                                  ReusedExchange [d_date_sk] #16
+                                        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                          InputAdapter
+                                            Exchange [i_brand_id,i_class_id,i_category_id] #3
+                                              WholeStageCodegen (25)
+                                                HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count]
+                                                  Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id]
+                                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                      Project [ss_sold_date_sk,ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id]
+                                                        BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                          BroadcastHashJoin [ss_item_sk,ss_item_sk]
+                                                            Filter [ss_item_sk,ss_sold_date_sk]
+                                                              ColumnarToRow
+                                                                InputAdapter
+                                                                  Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price]
+                                                            InputAdapter
+                                                              BroadcastExchange #4
+                                                                WholeStageCodegen (11)
+                                                                  Project [i_item_sk]
+                                                                    BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id]
+                                                                      Filter [i_brand_id,i_class_id,i_category_id]
+                                                                        ColumnarToRow
+                                                                          InputAdapter
+                                                                            Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                                                       InputAdapter
-                                                                        Exchange [channel,i_brand_id,i_class_id,i_category_id] #5
-                                                                          WholeStageCodegen (79)
-                                                                            HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
-                                                                              InputAdapter
-                                                                                Union
-                                                                                  WholeStageCodegen (26)
-                                                                                    Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
-                                                                                      Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                                                                        Subquery #1
-                                                                                          WholeStageCodegen (8)
-                                                                                            HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(cast(quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2), true)),average_sales,sum,count]
-                                                                                              InputAdapter
-                                                                                                Exchange #17
-                                                                                                  WholeStageCodegen (7)
-                                                                                                    HashAggregate [quantity,list_price] [sum,count,sum,count]
+                                                                        BroadcastExchange #5
+                                                                          WholeStageCodegen (10)
+                                                                            HashAggregate [brand_id,class_id,category_id]
+                                                                              HashAggregate [brand_id,class_id,category_id]
+                                                                                HashAggregate [brand_id,class_id,category_id]
+                                                                                  InputAdapter
+                                                                                    Exchange [brand_id,class_id,category_id] #6
+                                                                                      WholeStageCodegen (9)
+                                                                                        HashAggregate [brand_id,class_id,category_id]
+                                                                                          BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
+                                                                                            BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
+                                                                                              Project [i_brand_id,i_class_id,i_category_id]
+                                                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                                                  Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                                                                      Filter [ss_item_sk,ss_sold_date_sk]
+                                                                                                        ColumnarToRow
+                                                                                                          InputAdapter
+                                                                                                            Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk]
                                                                                                       InputAdapter
-                                                                                                        Union
-                                                                                                          WholeStageCodegen (2)
-                                                                                                            Project [ss_quantity,ss_list_price]
-                                                                                                              BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                                                Filter [ss_sold_date_sk]
-                                                                                                                  ColumnarToRow
-                                                                                                                    InputAdapter
-                                                                                                                      Scan parquet default.store_sales [ss_sold_date_sk,ss_quantity,ss_list_price]
-                                                                                                                InputAdapter
-                                                                                                                  BroadcastExchange #18
-                                                                                                                    WholeStageCodegen (1)
-                                                                                                                      Project [d_date_sk]
-                                                                                                                        Filter [d_year,d_date_sk]
-                                                                                                                          ColumnarToRow
-                                                                                                                            InputAdapter
-                                                                                                                              Scan parquet default.date_dim [d_date_sk,d_year]
-                                                                                                          WholeStageCodegen (4)
-                                                                                                            Project [cs_quantity,cs_list_price]
-                                                                                                              BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                                                                Filter [cs_sold_date_sk]
-                                                                                                                  ColumnarToRow
-                                                                                                                    InputAdapter
-                                                                                                                      Scan parquet default.catalog_sales [cs_sold_date_sk,cs_quantity,cs_list_price]
-                                                                                                                InputAdapter
-                                                                                                                  BroadcastExchange #19
-                                                                                                                    WholeStageCodegen (3)
-                                                                                                                      Project [d_date_sk]
-                                                                                                                        Filter [d_year,d_date_sk]
-                                                                                                                          ColumnarToRow
-                                                                                                                            InputAdapter
-                                                                                                                              Scan parquet default.date_dim [d_date_sk,d_year]
-                                                                                                          WholeStageCodegen (6)
-                                                                                                            Project [ws_quantity,ws_list_price]
-                                                                                                              BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                                                                Filter [ws_sold_date_sk]
-                                                                                                                  ColumnarToRow
-                                                                                                                    InputAdapter
-                                                                                                                      Scan parquet default.web_sales [ws_sold_date_sk,ws_quantity,ws_list_price]
+                                                                                                        BroadcastExchange #7
+                                                                                                          WholeStageCodegen (1)
+                                                                                                            Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                              ColumnarToRow
                                                                                                                 InputAdapter
-                                                                                                                  ReusedExchange [d_date_sk] #19
-                                                                                        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
-                                                                                          InputAdapter
-                                                                                            Exchange [i_brand_id,i_class_id,i_category_id] #6
-                                                                                              WholeStageCodegen (25)
-                                                                                                HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count]
-                                                                                                  Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id]
-                                                                                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                                      Project [ss_sold_date_sk,ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id]
-                                                                                                        BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                                                          BroadcastHashJoin [ss_item_sk,ss_item_sk]
-                                                                                                            Filter [ss_item_sk,ss_sold_date_sk]
+                                                                                                                  Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                  InputAdapter
+                                                                                                    BroadcastExchange #8
+                                                                                                      WholeStageCodegen (2)
+                                                                                                        Project [d_date_sk]
+                                                                                                          Filter [d_year,d_date_sk]
+                                                                                                            ColumnarToRow
+                                                                                                              InputAdapter
+                                                                                                                Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                              InputAdapter
+                                                                                                BroadcastExchange #9
+                                                                                                  WholeStageCodegen (5)
+                                                                                                    Project [i_brand_id,i_class_id,i_category_id]
+                                                                                                      BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                                                        Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                          BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                                                                            Filter [cs_item_sk,cs_sold_date_sk]
                                                                                                               ColumnarToRow
                                                                                                                 InputAdapter
-                                                                                                                  Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_quantity,ss_list_price]
+                                                                                                                  Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk]
                                                                                                             InputAdapter
-                                                                                                              BroadcastExchange #7
-                                                                                                                WholeStageCodegen (11)
-                                                                                                                  Project [i_item_sk]
-                                                                                                                    BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id]
-                                                                                                                      Filter [i_brand_id,i_class_id,i_category_id]
-                                                                                                                        ColumnarToRow
-                                                                                                                          InputAdapter
-                                                                                                                            Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                                      InputAdapter
-                                                                                                                        BroadcastExchange #8
-                                                                                                                          WholeStageCodegen (10)
-                                                                                                                            HashAggregate [brand_id,class_id,category_id]
-                                                                                                                              HashAggregate [brand_id,class_id,category_id]
-                                                                                                                                HashAggregate [brand_id,class_id,category_id]
-                                                                                                                                  InputAdapter
-                                                                                                                                    Exchange [brand_id,class_id,category_id] #9
-                                                                                                                                      WholeStageCodegen (9)
-                                                                                                                                        HashAggregate [brand_id,class_id,category_id]
-                                                                                                                                          BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
-                                                                                                                                            BroadcastHashJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
-                                                                                                                                              Project [i_brand_id,i_class_id,i_category_id]
-                                                                                                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                                                                                  Project [ss_sold_date_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                                                                                                      Filter [ss_item_sk,ss_sold_date_sk]
-                                                                                                                                                        ColumnarToRow
-                                                                                                                                                          InputAdapter
-                                                                                                                                                            Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk]
-                                                                                                                                                      InputAdapter
-                                                                                                                                                        BroadcastExchange #10
-                                                                                                                                                          WholeStageCodegen (1)
-                                                                                                                                                            Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                                                                              ColumnarToRow
-                                                                                                                                                                InputAdapter
-                                                                                                                                                                  Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                                                                  InputAdapter
-                                                                                                                                                    BroadcastExchange #11
-                                                                                                                                                      WholeStageCodegen (2)
-                                                                                                                                                        Project [d_date_sk]
-                                                                                                                                                          Filter [d_year,d_date_sk]
-                                                                                                                                                            ColumnarToRow
-                                                                                                                                                              InputAdapter
-                                                                                                                                                                Scan parquet default.date_dim [d_date_sk,d_year]
-                                                                                                                                              InputAdapter
-                                                                                                                                                BroadcastExchange #12
-                                                                                                                                                  WholeStageCodegen (5)
-                                                                                                                                                    Project [i_brand_id,i_class_id,i_category_id]
-                                                                                                                                                      BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                                                                                                        Project [cs_sold_date_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                                                                          BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                                                                                                            Filter [cs_item_sk,cs_sold_date_sk]
-                                                                                                                                                              ColumnarToRow
-                                                                                                                                                                InputAdapter
-                                                                                                                                                                  Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk]
-                                                                                                                                                            InputAdapter
-                                                                                                                                                              BroadcastExchange #13
-                                                                                                                                                                WholeStageCodegen (3)
-                                                                                                                                                                  Filter [i_item_sk]
-                                                                                                                                                                    ColumnarToRow
-                                                                                                                                                                      InputAdapter
-                                                                                                                                                                        Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                                                                        InputAdapter
-                                                                                                                                                          ReusedExchange [d_date_sk] #11
-                                                                                                                                            InputAdapter
-                                                                                                                                              BroadcastExchange #14
-                                                                                                                                                WholeStageCodegen (8)
-                                                                                                                                                  Project [i_brand_id,i_class_id,i_category_id]
-                                                                                                                                                    BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                                                                                                      Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                                                                        BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                                                                                                          Filter [ws_item_sk,ws_sold_date_sk]
-                                                                                                                                                            ColumnarToRow
-                                                                                                                                                              InputAdapter
-                                                                                                                                                                Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk]
-                                                                                                                                                          InputAdapter
-                                                                                                                                                            ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #13
-                                                                                                                                                      InputAdapter
-                                                                                                                                                        ReusedExchange [d_date_sk] #11
-                                                                                                          InputAdapter
-                                                                                                            BroadcastExchange #15
-                                                                                                              WholeStageCodegen (23)
-                                                                                                                BroadcastHashJoin [i_item_sk,ss_item_sk]
+                                                                                                              BroadcastExchange #10
+                                                                                                                WholeStageCodegen (3)
                                                                                                                   Filter [i_item_sk]
                                                                                                                     ColumnarToRow
                                                                                                                       InputAdapter
                                                                                                                         Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                                  InputAdapter
-                                                                                                                    ReusedExchange [ss_item_sk] #7
-                                                                                                      InputAdapter
-                                                                                                        BroadcastExchange #16
-                                                                                                          WholeStageCodegen (24)
-                                                                                                            Project [d_date_sk]
-                                                                                                              Filter [d_year,d_moy,d_date_sk]
-                                                                                                                ColumnarToRow
-                                                                                                                  InputAdapter
-                                                                                                                    Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
-                                                                                  WholeStageCodegen (52)
-                                                                                    Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
-                                                                                      Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                                                                        ReusedSubquery [average_sales] #1
-                                                                                        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
-                                                                                          InputAdapter
-                                                                                            Exchange [i_brand_id,i_class_id,i_category_id] #20
-                                                                                              WholeStageCodegen (51)
-                                                                                                HashAggregate [i_brand_id,i_class_id,i_category_id,cs_quantity,cs_list_price] [sum,isEmpty,count,sum,isEmpty,count]
-                                                                                                  Project [cs_quantity,cs_list_price,i_brand_id,i_class_id,i_category_id]
-                                                                                                    BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                                                      Project [cs_sold_date_sk,cs_quantity,cs_list_price,i_brand_id,i_class_id,i_category_id]
-                                                                                                        BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                                                          BroadcastHashJoin [cs_item_sk,ss_item_sk]
-                                                                                                            Filter [cs_item_sk,cs_sold_date_sk]
-                                                                                                              ColumnarToRow
-                                                                                                                InputAdapter
-                                                                                                                  Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_list_price]
-                                                                                                            InputAdapter
-                                                                                                              ReusedExchange [ss_item_sk] #7
-                                                                                                          InputAdapter
-                                                                                                            ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #15
-                                                                                                      InputAdapter
-                                                                                                        ReusedExchange [d_date_sk] #16
-                                                                                  WholeStageCodegen (78)
-                                                                                    Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
-                                                                                      Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                                                                        ReusedSubquery [average_sales] #1
-                                                                                        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
-                                                                                          InputAdapter
-                                                                                            Exchange [i_brand_id,i_class_id,i_category_id] #21
-                                                                                              WholeStageCodegen (77)
-                                                                                                HashAggregate [i_brand_id,i_class_id,i_category_id,ws_quantity,ws_list_price] [sum,isEmpty,count,sum,isEmpty,count]
-                                                                                                  Project [ws_quantity,ws_list_price,i_brand_id,i_class_id,i_category_id]
+                                                                                                        InputAdapter
+                                                                                                          ReusedExchange [d_date_sk] #8
+                                                                                            InputAdapter
+                                                                                              BroadcastExchange #11
+                                                                                                WholeStageCodegen (8)
+                                                                                                  Project [i_brand_id,i_class_id,i_category_id]
                                                                                                     BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                                                      Project [ws_sold_date_sk,ws_quantity,ws_list_price,i_brand_id,i_class_id,i_category_id]
+                                                                                                      Project [ws_sold_date_sk,i_brand_id,i_class_id,i_category_id]
                                                                                                         BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                                                          BroadcastHashJoin [ws_item_sk,ss_item_sk]
-                                                                                                            Filter [ws_item_sk,ws_sold_date_sk]
-                                                                                                              ColumnarToRow
-                                                                                                                InputAdapter
-                                                                                                                  Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_quantity,ws_list_price]
-                                                                                                            InputAdapter
-                                                                                                              ReusedExchange [ss_item_sk] #7
+                                                                                                          Filter [ws_item_sk,ws_sold_date_sk]
+                                                                                                            ColumnarToRow
+                                                                                                              InputAdapter
+                                                                                                                Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk]
                                                                                                           InputAdapter
-                                                                                                            ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #15
+                                                                                                            ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #10
                                                                                                       InputAdapter
-                                                                                                        ReusedExchange [d_date_sk] #16
-                                                                  WholeStageCodegen (161)
-                                                                    HashAggregate [channel,i_brand_id,i_class_id,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum]
+                                                                                                        ReusedExchange [d_date_sk] #8
+                                                          InputAdapter
+                                                            BroadcastExchange #12
+                                                              WholeStageCodegen (23)
+                                                                BroadcastHashJoin [i_item_sk,ss_item_sk]
+                                                                  Filter [i_item_sk]
+                                                                    ColumnarToRow
                                                                       InputAdapter
-                                                                        Exchange [channel,i_brand_id,i_class_id] #22
-                                                                          WholeStageCodegen (160)
-                                                                            HashAggregate [channel,i_brand_id,i_class_id,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
-                                                                              HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
-                                                                                InputAdapter
-                                                                                  Exchange [channel,i_brand_id,i_class_id,i_category_id] #23
-                                                                                    WholeStageCodegen (159)
-                                                                                      HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
-                                                                                        InputAdapter
-                                                                                          Union
-                                                                                            WholeStageCodegen (106)
-                                                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
-                                                                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                                                                                  ReusedSubquery [average_sales] #1
-                                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
-                                                                                                    InputAdapter
-                                                                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6
-                                                                                            WholeStageCodegen (132)
-                                                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
-                                                                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                                                                                  ReusedSubquery [average_sales] #1
-                                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
-                                                                                                    InputAdapter
-                                                                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #20
-                                                                                            WholeStageCodegen (158)
-                                                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
-                                                                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                                                                                  ReusedSubquery [average_sales] #1
-                                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
-                                                                                                    InputAdapter
-                                                                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #21
-                                                  WholeStageCodegen (244)
-                                                    HashAggregate [channel,i_brand_id,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum]
+                                                                        Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                  InputAdapter
+                                                                    ReusedExchange [ss_item_sk] #4
                                                       InputAdapter
-                                                        Exchange [channel,i_brand_id] #24
-                                                          WholeStageCodegen (243)
-                                                            HashAggregate [channel,i_brand_id,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
-                                                              HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
+                                                        BroadcastExchange #13
+                                                          WholeStageCodegen (24)
+                                                            Project [d_date_sk]
+                                                              Filter [d_year,d_moy,d_date_sk]
+                                                                ColumnarToRow
+                                                                  InputAdapter
+                                                                    Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                  WholeStageCodegen (52)
+                                    Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                      Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                        ReusedSubquery [average_sales] #1
+                                        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                          InputAdapter
+                                            Exchange [i_brand_id,i_class_id,i_category_id] #17
+                                              WholeStageCodegen (51)
+                                                HashAggregate [i_brand_id,i_class_id,i_category_id,cs_quantity,cs_list_price] [sum,isEmpty,count,sum,isEmpty,count]
+                                                  Project [cs_quantity,cs_list_price,i_brand_id,i_class_id,i_category_id]
+                                                    BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                      Project [cs_sold_date_sk,cs_quantity,cs_list_price,i_brand_id,i_class_id,i_category_id]
+                                                        BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                          BroadcastHashJoin [cs_item_sk,ss_item_sk]
+                                                            Filter [cs_item_sk,cs_sold_date_sk]
+                                                              ColumnarToRow
                                                                 InputAdapter
-                                                                  Exchange [channel,i_brand_id,i_class_id,i_category_id] #25
-                                                                    WholeStageCodegen (242)
-                                                                      HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
-                                                                        InputAdapter
-                                                                          Union
-                                                                            WholeStageCodegen (189)
-                                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
-                                                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                                                                  ReusedSubquery [average_sales] #1
-                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
-                                                                                    InputAdapter
-                                                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6
-                                                                            WholeStageCodegen (215)
-                                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
-                                                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                                                                  ReusedSubquery [average_sales] #1
-                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
-                                                                                    InputAdapter
-                                                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #20
-                                                                            WholeStageCodegen (241)
-                                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
-                                                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                                                                  ReusedSubquery [average_sales] #1
-                                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
-                                                                                    InputAdapter
-                                                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #21
-                                  WholeStageCodegen (327)
-                                    HashAggregate [channel,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum]
-                                      InputAdapter
-                                        Exchange [channel] #26
-                                          WholeStageCodegen (326)
-                                            HashAggregate [channel,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
-                                              HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
-                                                InputAdapter
-                                                  Exchange [channel,i_brand_id,i_class_id,i_category_id] #27
-                                                    WholeStageCodegen (325)
-                                                      HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
-                                                        InputAdapter
-                                                          Union
-                                                            WholeStageCodegen (272)
-                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
-                                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                                                  ReusedSubquery [average_sales] #1
-                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
-                                                                    InputAdapter
-                                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6
-                                                            WholeStageCodegen (298)
-                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
-                                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                                                  ReusedSubquery [average_sales] #1
-                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
-                                                                    InputAdapter
-                                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #20
-                                                            WholeStageCodegen (324)
-                                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
-                                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
-                                                                  ReusedSubquery [average_sales] #1
-                                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
-                                                                    InputAdapter
-                                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #21
-                  WholeStageCodegen (410)
+                                                                  Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_quantity,cs_list_price]
+                                                            InputAdapter
+                                                              ReusedExchange [ss_item_sk] #4
+                                                          InputAdapter
+                                                            ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #12
+                                                      InputAdapter
+                                                        ReusedExchange [d_date_sk] #13
+                                  WholeStageCodegen (78)
+                                    Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                      Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                        ReusedSubquery [average_sales] #1
+                                        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                          InputAdapter
+                                            Exchange [i_brand_id,i_class_id,i_category_id] #18
+                                              WholeStageCodegen (77)
+                                                HashAggregate [i_brand_id,i_class_id,i_category_id,ws_quantity,ws_list_price] [sum,isEmpty,count,sum,isEmpty,count]
+                                                  Project [ws_quantity,ws_list_price,i_brand_id,i_class_id,i_category_id]
+                                                    BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                      Project [ws_sold_date_sk,ws_quantity,ws_list_price,i_brand_id,i_class_id,i_category_id]
+                                                        BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                          BroadcastHashJoin [ws_item_sk,ss_item_sk]
+                                                            Filter [ws_item_sk,ws_sold_date_sk]
+                                                              ColumnarToRow
+                                                                InputAdapter
+                                                                  Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_quantity,ws_list_price]
+                                                            InputAdapter
+                                                              ReusedExchange [ss_item_sk] #4
+                                                          InputAdapter
+                                                            ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #12
+                                                      InputAdapter
+                                                        ReusedExchange [d_date_sk] #13
+                  WholeStageCodegen (161)
+                    HashAggregate [channel,i_brand_id,i_class_id,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum]
+                      InputAdapter
+                        Exchange [channel,i_brand_id,i_class_id] #19
+                          WholeStageCodegen (160)
+                            HashAggregate [channel,i_brand_id,i_class_id,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
+                              HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
+                                InputAdapter
+                                  Exchange [channel,i_brand_id,i_class_id,i_category_id] #20
+                                    WholeStageCodegen (159)
+                                      HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
+                                        InputAdapter
+                                          Union
+                                            WholeStageCodegen (106)
+                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                                  ReusedSubquery [average_sales] #1
+                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                    InputAdapter
+                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #3
+                                            WholeStageCodegen (132)
+                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                                  ReusedSubquery [average_sales] #1
+                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                    InputAdapter
+                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #17
+                                            WholeStageCodegen (158)
+                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                                  ReusedSubquery [average_sales] #1
+                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                    InputAdapter
+                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #18
+                  WholeStageCodegen (242)
+                    HashAggregate [channel,i_brand_id,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum]
+                      InputAdapter
+                        Exchange [channel,i_brand_id] #21
+                          WholeStageCodegen (241)
+                            HashAggregate [channel,i_brand_id,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
+                              HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
+                                InputAdapter
+                                  Exchange [channel,i_brand_id,i_class_id,i_category_id] #22
+                                    WholeStageCodegen (240)
+                                      HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
+                                        InputAdapter
+                                          Union
+                                            WholeStageCodegen (187)
+                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                                  ReusedSubquery [average_sales] #1
+                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                    InputAdapter
+                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #3
+                                            WholeStageCodegen (213)
+                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                                  ReusedSubquery [average_sales] #1
+                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                    InputAdapter
+                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #17
+                                            WholeStageCodegen (239)
+                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                                  ReusedSubquery [average_sales] #1
+                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                    InputAdapter
+                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #18
+                  WholeStageCodegen (323)
+                    HashAggregate [channel,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum]
+                      InputAdapter
+                        Exchange [channel] #23
+                          WholeStageCodegen (322)
+                            HashAggregate [channel,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
+                              HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
+                                InputAdapter
+                                  Exchange [channel,i_brand_id,i_class_id,i_category_id] #24
+                                    WholeStageCodegen (321)
+                                      HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
+                                        InputAdapter
+                                          Union
+                                            WholeStageCodegen (268)
+                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                                  ReusedSubquery [average_sales] #1
+                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                    InputAdapter
+                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #3
+                                            WholeStageCodegen (294)
+                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                                  ReusedSubquery [average_sales] #1
+                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                    InputAdapter
+                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #17
+                                            WholeStageCodegen (320)
+                                              Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
+                                                Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
+                                                  ReusedSubquery [average_sales] #1
+                                                  HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
+                                                    InputAdapter
+                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #18
+                  WholeStageCodegen (404)
                     HashAggregate [sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),channel,i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum]
                       InputAdapter
-                        Exchange #28
-                          WholeStageCodegen (409)
+                        Exchange #25
+                          WholeStageCodegen (403)
                             HashAggregate [sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
                               HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
                                 InputAdapter
-                                  Exchange [channel,i_brand_id,i_class_id,i_category_id] #29
-                                    WholeStageCodegen (408)
+                                  Exchange [channel,i_brand_id,i_class_id,i_category_id] #26
+                                    WholeStageCodegen (402)
                                       HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
                                         InputAdapter
                                           Union
-                                            WholeStageCodegen (355)
+                                            WholeStageCodegen (349)
                                               Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                   ReusedSubquery [average_sales] #1
                                                   HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ss_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                     InputAdapter
-                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #6
-                                            WholeStageCodegen (381)
+                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #3
+                                            WholeStageCodegen (375)
                                               Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                   ReusedSubquery [average_sales] #1
                                                   HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(cs_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                     InputAdapter
-                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #20
-                                            WholeStageCodegen (407)
+                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #17
+                                            WholeStageCodegen (401)
                                               Project [i_brand_id,i_class_id,i_category_id,sales,number_sales]
                                                 Filter [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true))]
                                                   ReusedSubquery [average_sales] #1
                                                   HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),count(1),sales,number_sales,sum(CheckOverflow((promote_precision(cast(cast(ws_quantity as decimal(10,0)) as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2), true)),sum,isEmpty,count]
                                                     InputAdapter
-                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #21
+                                                      ReusedExchange [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] #18
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/explain.txt
index 107343f091fb2..20ea78c9140e6 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/explain.txt
@@ -1,53 +1,49 @@
 == Physical Plan ==
-TakeOrderedAndProject (49)
-+- * Project (48)
-   +- Window (47)
-      +- * Sort (46)
-         +- Exchange (45)
-            +- * HashAggregate (44)
-               +- Exchange (43)
-                  +- * HashAggregate (42)
-                     +- Union (41)
-                        :- * HashAggregate (35)
-                        :  +- Exchange (34)
-                        :     +- * HashAggregate (33)
-                        :        +- Union (32)
-                        :           :- * HashAggregate (26)
-                        :           :  +- Exchange (25)
-                        :           :     +- * HashAggregate (24)
-                        :           :        +- * Project (23)
-                        :           :           +- * BroadcastHashJoin Inner BuildRight (22)
-                        :           :              :- * Project (17)
-                        :           :              :  +- * BroadcastHashJoin Inner BuildRight (16)
-                        :           :              :     :- * Project (10)
-                        :           :              :     :  +- * BroadcastHashJoin Inner BuildRight (9)
-                        :           :              :     :     :- * Filter (3)
-                        :           :              :     :     :  +- * ColumnarToRow (2)
-                        :           :              :     :     :     +- Scan parquet default.store_sales (1)
-                        :           :              :     :     +- BroadcastExchange (8)
-                        :           :              :     :        +- * Project (7)
-                        :           :              :     :           +- * Filter (6)
-                        :           :              :     :              +- * ColumnarToRow (5)
-                        :           :              :     :                 +- Scan parquet default.date_dim (4)
-                        :           :              :     +- BroadcastExchange (15)
-                        :           :              :        +- * Project (14)
-                        :           :              :           +- * Filter (13)
-                        :           :              :              +- * ColumnarToRow (12)
-                        :           :              :                 +- Scan parquet default.store (11)
-                        :           :              +- BroadcastExchange (21)
-                        :           :                 +- * Filter (20)
-                        :           :                    +- * ColumnarToRow (19)
-                        :           :                       +- Scan parquet default.item (18)
-                        :           +- * HashAggregate (31)
-                        :              +- Exchange (30)
-                        :                 +- * HashAggregate (29)
-                        :                    +- * HashAggregate (28)
-                        :                       +- ReusedExchange (27)
-                        +- * HashAggregate (40)
-                           +- Exchange (39)
-                              +- * HashAggregate (38)
-                                 +- * HashAggregate (37)
-                                    +- ReusedExchange (36)
+TakeOrderedAndProject (45)
++- * Project (44)
+   +- Window (43)
+      +- * Sort (42)
+         +- Exchange (41)
+            +- * HashAggregate (40)
+               +- Exchange (39)
+                  +- * HashAggregate (38)
+                     +- Union (37)
+                        :- * HashAggregate (26)
+                        :  +- Exchange (25)
+                        :     +- * HashAggregate (24)
+                        :        +- * Project (23)
+                        :           +- * BroadcastHashJoin Inner BuildRight (22)
+                        :              :- * Project (17)
+                        :              :  +- * BroadcastHashJoin Inner BuildRight (16)
+                        :              :     :- * Project (10)
+                        :              :     :  +- * BroadcastHashJoin Inner BuildRight (9)
+                        :              :     :     :- * Filter (3)
+                        :              :     :     :  +- * ColumnarToRow (2)
+                        :              :     :     :     +- Scan parquet default.store_sales (1)
+                        :              :     :     +- BroadcastExchange (8)
+                        :              :     :        +- * Project (7)
+                        :              :     :           +- * Filter (6)
+                        :              :     :              +- * ColumnarToRow (5)
+                        :              :     :                 +- Scan parquet default.date_dim (4)
+                        :              :     +- BroadcastExchange (15)
+                        :              :        +- * Project (14)
+                        :              :           +- * Filter (13)
+                        :              :              +- * ColumnarToRow (12)
+                        :              :                 +- Scan parquet default.store (11)
+                        :              +- BroadcastExchange (21)
+                        :                 +- * Filter (20)
+                        :                    +- * ColumnarToRow (19)
+                        :                       +- Scan parquet default.item (18)
+                        :- * HashAggregate (31)
+                        :  +- Exchange (30)
+                        :     +- * HashAggregate (29)
+                        :        +- * HashAggregate (28)
+                        :           +- ReusedExchange (27)
+                        +- * HashAggregate (36)
+                           +- Exchange (35)
+                              +- * HashAggregate (34)
+                                 +- * HashAggregate (33)
+                                    +- ReusedExchange (32)
 
 
 (1) Scan parquet default.store_sales
@@ -162,7 +158,7 @@ Results [4]: [i_category#14, i_class#13, sum#18, sum#19]
 
 (25) Exchange
 Input [4]: [i_category#14, i_class#13, sum#18, sum#19]
-Arguments: hashpartitioning(i_category#14, i_class#13, 5), true, [id=#20]
+Arguments: hashpartitioning(i_category#14, i_class#13, 5), ENSURE_REQUIREMENTS, [id=#20]
 
 (26) HashAggregate [codegen id : 5]
 Input [4]: [i_category#14, i_class#13, sum#18, sum#19]
@@ -190,7 +186,7 @@ Results [5]: [i_category#14, sum#37, isEmpty#38, sum#39, isEmpty#40]
 
 (30) Exchange
 Input [5]: [i_category#14, sum#37, isEmpty#38, sum#39, isEmpty#40]
-Arguments: hashpartitioning(i_category#14, 5), true, [id=#41]
+Arguments: hashpartitioning(i_category#14, 5), ENSURE_REQUIREMENTS, [id=#41]
 
 (31) HashAggregate [codegen id : 11]
 Input [5]: [i_category#14, sum#37, isEmpty#38, sum#39, isEmpty#40]
@@ -199,91 +195,71 @@ Functions [2]: [sum(ss_net_profit#31), sum(ss_ext_sales_price#32)]
 Aggregate Attributes [2]: [sum(ss_net_profit#31)#42, sum(ss_ext_sales_price#32)#43]
 Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#31)#42) / promote_precision(sum(ss_ext_sales_price#32)#43)), DecimalType(38,11), true) as decimal(38,20)) AS gross_margin#44, i_category#14, null AS i_class#45, 0 AS t_category#46, 1 AS t_class#47, 1 AS lochierarchy#48]
 
-(32) Union
+(32) ReusedExchange [Reuses operator id: 25]
+Output [4]: [i_category#14, i_class#13, sum#49, sum#50]
 
-(33) HashAggregate [codegen id : 12]
-Input [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26]
-Keys [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26]
-Functions: []
-Aggregate Attributes: []
-Results [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26]
-
-(34) Exchange
-Input [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26]
-Arguments: hashpartitioning(gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26, 5), true, [id=#49]
-
-(35) HashAggregate [codegen id : 13]
-Input [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26]
-Keys [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26]
-Functions: []
-Aggregate Attributes: []
-Results [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26]
-
-(36) ReusedExchange [Reuses operator id: 25]
-Output [4]: [i_category#14, i_class#13, sum#50, sum#51]
-
-(37) HashAggregate [codegen id : 18]
-Input [4]: [i_category#14, i_class#13, sum#50, sum#51]
+(33) HashAggregate [codegen id : 16]
+Input [4]: [i_category#14, i_class#13, sum#49, sum#50]
 Keys [2]: [i_category#14, i_class#13]
 Functions [2]: [sum(UnscaledValue(ss_net_profit#5)), sum(UnscaledValue(ss_ext_sales_price#4))]
-Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#5))#52, sum(UnscaledValue(ss_ext_sales_price#4))#53]
-Results [2]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#52,17,2) AS ss_net_profit#31, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#53,17,2) AS ss_ext_sales_price#32]
+Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#5))#51, sum(UnscaledValue(ss_ext_sales_price#4))#52]
+Results [2]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#51,17,2) AS ss_net_profit#31, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#52,17,2) AS ss_ext_sales_price#32]
 
-(38) HashAggregate [codegen id : 18]
+(34) HashAggregate [codegen id : 16]
 Input [2]: [ss_net_profit#31, ss_ext_sales_price#32]
 Keys: []
 Functions [2]: [partial_sum(ss_net_profit#31), partial_sum(ss_ext_sales_price#32)]
-Aggregate Attributes [4]: [sum#54, isEmpty#55, sum#56, isEmpty#57]
-Results [4]: [sum#58, isEmpty#59, sum#60, isEmpty#61]
+Aggregate Attributes [4]: [sum#53, isEmpty#54, sum#55, isEmpty#56]
+Results [4]: [sum#57, isEmpty#58, sum#59, isEmpty#60]
 
-(39) Exchange
-Input [4]: [sum#58, isEmpty#59, sum#60, isEmpty#61]
-Arguments: SinglePartition, true, [id=#62]
+(35) Exchange
+Input [4]: [sum#57, isEmpty#58, sum#59, isEmpty#60]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#61]
 
-(40) HashAggregate [codegen id : 19]
-Input [4]: [sum#58, isEmpty#59, sum#60, isEmpty#61]
+(36) HashAggregate [codegen id : 17]
+Input [4]: [sum#57, isEmpty#58, sum#59, isEmpty#60]
 Keys: []
 Functions [2]: [sum(ss_net_profit#31), sum(ss_ext_sales_price#32)]
-Aggregate Attributes [2]: [sum(ss_net_profit#31)#63, sum(ss_ext_sales_price#32)#64]
-Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#31)#63) / promote_precision(sum(ss_ext_sales_price#32)#64)), DecimalType(38,11), true) as decimal(38,20)) AS gross_margin#65, null AS i_category#66, null AS i_class#67, 1 AS t_category#68, 1 AS t_class#69, 2 AS lochierarchy#70]
+Aggregate Attributes [2]: [sum(ss_net_profit#31)#62, sum(ss_ext_sales_price#32)#63]
+Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#31)#62) / promote_precision(sum(ss_ext_sales_price#32)#63)), DecimalType(38,11), true) as decimal(38,20)) AS gross_margin#64, null AS i_category#65, null AS i_class#66, 1 AS t_category#67, 1 AS t_class#68, 2 AS lochierarchy#69]
 
-(41) Union
+(37) Union
 
-(42) HashAggregate [codegen id : 20]
+(38) HashAggregate [codegen id : 18]
 Input [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26]
 Keys [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26]
 Functions: []
 Aggregate Attributes: []
 Results [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26]
 
-(43) Exchange
+(39) Exchange
 Input [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26]
-Arguments: hashpartitioning(gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26, 5), true, [id=#71]
+Arguments: hashpartitioning(gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26, 5), ENSURE_REQUIREMENTS, [id=#70]
 
-(44) HashAggregate [codegen id : 21]
+(40) HashAggregate [codegen id : 19]
 Input [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26]
 Keys [6]: [gross_margin#23, i_category#14, i_class#13, t_category#24, t_class#25, lochierarchy#26]
 Functions: []
 Aggregate Attributes: []
-Results [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, CASE WHEN (t_class#25 = 0) THEN i_category#14 END AS _w0#72]
+Results [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, CASE WHEN (t_class#25 = 0) THEN i_category#14 END AS _w0#71]
 
-(45) Exchange
-Input [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, _w0#72]
-Arguments: hashpartitioning(lochierarchy#26, _w0#72, 5), true, [id=#73]
+(41) Exchange
+Input [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, _w0#71]
+Arguments: hashpartitioning(lochierarchy#26, _w0#71, 5), ENSURE_REQUIREMENTS, [id=#72]
 
-(46) Sort [codegen id : 22]
-Input [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, _w0#72]
-Arguments: [lochierarchy#26 ASC NULLS FIRST, _w0#72 ASC NULLS FIRST, gross_margin#23 ASC NULLS FIRST], false, 0
+(42) Sort [codegen id : 20]
+Input [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, _w0#71]
+Arguments: [lochierarchy#26 ASC NULLS FIRST, _w0#71 ASC NULLS FIRST, gross_margin#23 ASC NULLS FIRST], false, 0
 
-(47) Window
-Input [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, _w0#72]
-Arguments: [rank(gross_margin#23) windowspecdefinition(lochierarchy#26, _w0#72, gross_margin#23 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#74], [lochierarchy#26, _w0#72], [gross_margin#23 ASC NULLS FIRST]
+(43) Window
+Input [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, _w0#71]
+Arguments: [rank(gross_margin#23) windowspecdefinition(lochierarchy#26, _w0#71, gross_margin#23 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#73], [lochierarchy#26, _w0#71], [gross_margin#23 ASC NULLS FIRST]
 
-(48) Project [codegen id : 23]
-Output [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, rank_within_parent#74]
-Input [6]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, _w0#72, rank_within_parent#74]
+(44) Project [codegen id : 21]
+Output [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, rank_within_parent#73]
+Input [6]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, _w0#71, rank_within_parent#73]
 
-(49) TakeOrderedAndProject
-Input [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, rank_within_parent#74]
-Arguments: 100, [lochierarchy#26 DESC NULLS LAST, CASE WHEN (lochierarchy#26 = 0) THEN i_category#14 END ASC NULLS FIRST, rank_within_parent#74 ASC NULLS FIRST], [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, rank_within_parent#74]
+(45) TakeOrderedAndProject
+Input [5]: [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, rank_within_parent#73]
+Arguments: 100, [lochierarchy#26 DESC NULLS LAST, CASE WHEN (lochierarchy#26 = 0) THEN i_category#14 END ASC NULLS FIRST, rank_within_parent#73 ASC NULLS FIRST], [gross_margin#23, i_category#14, i_class#13, lochierarchy#26, rank_within_parent#73]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/simplified.txt
index aa85d4870683d..f1cf7e8587cc1 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/simplified.txt
@@ -1,82 +1,74 @@
 TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i_class]
-  WholeStageCodegen (23)
+  WholeStageCodegen (21)
     Project [gross_margin,i_category,i_class,lochierarchy,rank_within_parent]
       InputAdapter
         Window [gross_margin,lochierarchy,_w0]
-          WholeStageCodegen (22)
+          WholeStageCodegen (20)
             Sort [lochierarchy,_w0,gross_margin]
               InputAdapter
                 Exchange [lochierarchy,_w0] #1
-                  WholeStageCodegen (21)
+                  WholeStageCodegen (19)
                     HashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] [_w0]
                       InputAdapter
                         Exchange [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] #2
-                          WholeStageCodegen (20)
+                          WholeStageCodegen (18)
                             HashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy]
                               InputAdapter
                                 Union
-                                  WholeStageCodegen (13)
-                                    HashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy]
+                                  WholeStageCodegen (5)
+                                    HashAggregate [i_category,i_class,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),gross_margin,t_category,t_class,lochierarchy,sum,sum]
                                       InputAdapter
-                                        Exchange [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] #3
-                                          WholeStageCodegen (12)
-                                            HashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy]
-                                              InputAdapter
-                                                Union
-                                                  WholeStageCodegen (5)
-                                                    HashAggregate [i_category,i_class,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),gross_margin,t_category,t_class,lochierarchy,sum,sum]
-                                                      InputAdapter
-                                                        Exchange [i_category,i_class] #4
-                                                          WholeStageCodegen (4)
-                                                            HashAggregate [i_category,i_class,ss_net_profit,ss_ext_sales_price] [sum,sum,sum,sum]
-                                                              Project [ss_ext_sales_price,ss_net_profit,i_class,i_category]
-                                                                BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                  Project [ss_item_sk,ss_ext_sales_price,ss_net_profit]
-                                                                    BroadcastHashJoin [ss_store_sk,s_store_sk]
-                                                                      Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit]
-                                                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                          Filter [ss_sold_date_sk,ss_item_sk,ss_store_sk]
-                                                                            ColumnarToRow
-                                                                              InputAdapter
-                                                                                Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit]
-                                                                          InputAdapter
-                                                                            BroadcastExchange #5
-                                                                              WholeStageCodegen (1)
-                                                                                Project [d_date_sk]
-                                                                                  Filter [d_year,d_date_sk]
-                                                                                    ColumnarToRow
-                                                                                      InputAdapter
-                                                                                        Scan parquet default.date_dim [d_date_sk,d_year]
+                                        Exchange [i_category,i_class] #3
+                                          WholeStageCodegen (4)
+                                            HashAggregate [i_category,i_class,ss_net_profit,ss_ext_sales_price] [sum,sum,sum,sum]
+                                              Project [ss_ext_sales_price,ss_net_profit,i_class,i_category]
+                                                BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                  Project [ss_item_sk,ss_ext_sales_price,ss_net_profit]
+                                                    BroadcastHashJoin [ss_store_sk,s_store_sk]
+                                                      Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit]
+                                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                          Filter [ss_sold_date_sk,ss_item_sk,ss_store_sk]
+                                                            ColumnarToRow
+                                                              InputAdapter
+                                                                Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit]
+                                                          InputAdapter
+                                                            BroadcastExchange #4
+                                                              WholeStageCodegen (1)
+                                                                Project [d_date_sk]
+                                                                  Filter [d_year,d_date_sk]
+                                                                    ColumnarToRow
                                                                       InputAdapter
-                                                                        BroadcastExchange #6
-                                                                          WholeStageCodegen (2)
-                                                                            Project [s_store_sk]
-                                                                              Filter [s_state,s_store_sk]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.store [s_store_sk,s_state]
-                                                                  InputAdapter
-                                                                    BroadcastExchange #7
-                                                                      WholeStageCodegen (3)
-                                                                        Filter [i_item_sk]
-                                                                          ColumnarToRow
-                                                                            InputAdapter
-                                                                              Scan parquet default.item [i_item_sk,i_class,i_category]
-                                                  WholeStageCodegen (11)
-                                                    HashAggregate [i_category,sum,isEmpty,sum,isEmpty] [sum(ss_net_profit),sum(ss_ext_sales_price),gross_margin,i_class,t_category,t_class,lochierarchy,sum,isEmpty,sum,isEmpty]
+                                                                        Scan parquet default.date_dim [d_date_sk,d_year]
                                                       InputAdapter
-                                                        Exchange [i_category] #8
-                                                          WholeStageCodegen (10)
-                                                            HashAggregate [i_category,ss_net_profit,ss_ext_sales_price] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
-                                                              HashAggregate [i_category,i_class,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),ss_net_profit,ss_ext_sales_price,sum,sum]
-                                                                InputAdapter
-                                                                  ReusedExchange [i_category,i_class,sum,sum] #4
-                                  WholeStageCodegen (19)
+                                                        BroadcastExchange #5
+                                                          WholeStageCodegen (2)
+                                                            Project [s_store_sk]
+                                                              Filter [s_state,s_store_sk]
+                                                                ColumnarToRow
+                                                                  InputAdapter
+                                                                    Scan parquet default.store [s_store_sk,s_state]
+                                                  InputAdapter
+                                                    BroadcastExchange #6
+                                                      WholeStageCodegen (3)
+                                                        Filter [i_item_sk]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet default.item [i_item_sk,i_class,i_category]
+                                  WholeStageCodegen (11)
+                                    HashAggregate [i_category,sum,isEmpty,sum,isEmpty] [sum(ss_net_profit),sum(ss_ext_sales_price),gross_margin,i_class,t_category,t_class,lochierarchy,sum,isEmpty,sum,isEmpty]
+                                      InputAdapter
+                                        Exchange [i_category] #7
+                                          WholeStageCodegen (10)
+                                            HashAggregate [i_category,ss_net_profit,ss_ext_sales_price] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                                              HashAggregate [i_category,i_class,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),ss_net_profit,ss_ext_sales_price,sum,sum]
+                                                InputAdapter
+                                                  ReusedExchange [i_category,i_class,sum,sum] #3
+                                  WholeStageCodegen (17)
                                     HashAggregate [sum,isEmpty,sum,isEmpty] [sum(ss_net_profit),sum(ss_ext_sales_price),gross_margin,i_category,i_class,t_category,t_class,lochierarchy,sum,isEmpty,sum,isEmpty]
                                       InputAdapter
-                                        Exchange #9
-                                          WholeStageCodegen (18)
+                                        Exchange #8
+                                          WholeStageCodegen (16)
                                             HashAggregate [ss_net_profit,ss_ext_sales_price] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                                               HashAggregate [i_category,i_class,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),ss_net_profit,ss_ext_sales_price,sum,sum]
                                                 InputAdapter
-                                                  ReusedExchange [i_category,i_class,sum,sum] #4
+                                                  ReusedExchange [i_category,i_class,sum,sum] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/explain.txt
index 0d6dfa6f90a86..40b823563a890 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/explain.txt
@@ -1,53 +1,49 @@
 == Physical Plan ==
-TakeOrderedAndProject (49)
-+- * Project (48)
-   +- Window (47)
-      +- * Sort (46)
-         +- Exchange (45)
-            +- * HashAggregate (44)
-               +- Exchange (43)
-                  +- * HashAggregate (42)
-                     +- Union (41)
-                        :- * HashAggregate (35)
-                        :  +- Exchange (34)
-                        :     +- * HashAggregate (33)
-                        :        +- Union (32)
-                        :           :- * HashAggregate (26)
-                        :           :  +- Exchange (25)
-                        :           :     +- * HashAggregate (24)
-                        :           :        +- * Project (23)
-                        :           :           +- * BroadcastHashJoin Inner BuildRight (22)
-                        :           :              :- * Project (16)
-                        :           :              :  +- * BroadcastHashJoin Inner BuildRight (15)
-                        :           :              :     :- * Project (10)
-                        :           :              :     :  +- * BroadcastHashJoin Inner BuildRight (9)
-                        :           :              :     :     :- * Filter (3)
-                        :           :              :     :     :  +- * ColumnarToRow (2)
-                        :           :              :     :     :     +- Scan parquet default.store_sales (1)
-                        :           :              :     :     +- BroadcastExchange (8)
-                        :           :              :     :        +- * Project (7)
-                        :           :              :     :           +- * Filter (6)
-                        :           :              :     :              +- * ColumnarToRow (5)
-                        :           :              :     :                 +- Scan parquet default.date_dim (4)
-                        :           :              :     +- BroadcastExchange (14)
-                        :           :              :        +- * Filter (13)
-                        :           :              :           +- * ColumnarToRow (12)
-                        :           :              :              +- Scan parquet default.item (11)
-                        :           :              +- BroadcastExchange (21)
-                        :           :                 +- * Project (20)
-                        :           :                    +- * Filter (19)
-                        :           :                       +- * ColumnarToRow (18)
-                        :           :                          +- Scan parquet default.store (17)
-                        :           +- * HashAggregate (31)
-                        :              +- Exchange (30)
-                        :                 +- * HashAggregate (29)
-                        :                    +- * HashAggregate (28)
-                        :                       +- ReusedExchange (27)
-                        +- * HashAggregate (40)
-                           +- Exchange (39)
-                              +- * HashAggregate (38)
-                                 +- * HashAggregate (37)
-                                    +- ReusedExchange (36)
+TakeOrderedAndProject (45)
++- * Project (44)
+   +- Window (43)
+      +- * Sort (42)
+         +- Exchange (41)
+            +- * HashAggregate (40)
+               +- Exchange (39)
+                  +- * HashAggregate (38)
+                     +- Union (37)
+                        :- * HashAggregate (26)
+                        :  +- Exchange (25)
+                        :     +- * HashAggregate (24)
+                        :        +- * Project (23)
+                        :           +- * BroadcastHashJoin Inner BuildRight (22)
+                        :              :- * Project (16)
+                        :              :  +- * BroadcastHashJoin Inner BuildRight (15)
+                        :              :     :- * Project (10)
+                        :              :     :  +- * BroadcastHashJoin Inner BuildRight (9)
+                        :              :     :     :- * Filter (3)
+                        :              :     :     :  +- * ColumnarToRow (2)
+                        :              :     :     :     +- Scan parquet default.store_sales (1)
+                        :              :     :     +- BroadcastExchange (8)
+                        :              :     :        +- * Project (7)
+                        :              :     :           +- * Filter (6)
+                        :              :     :              +- * ColumnarToRow (5)
+                        :              :     :                 +- Scan parquet default.date_dim (4)
+                        :              :     +- BroadcastExchange (14)
+                        :              :        +- * Filter (13)
+                        :              :           +- * ColumnarToRow (12)
+                        :              :              +- Scan parquet default.item (11)
+                        :              +- BroadcastExchange (21)
+                        :                 +- * Project (20)
+                        :                    +- * Filter (19)
+                        :                       +- * ColumnarToRow (18)
+                        :                          +- Scan parquet default.store (17)
+                        :- * HashAggregate (31)
+                        :  +- Exchange (30)
+                        :     +- * HashAggregate (29)
+                        :        +- * HashAggregate (28)
+                        :           +- ReusedExchange (27)
+                        +- * HashAggregate (36)
+                           +- Exchange (35)
+                              +- * HashAggregate (34)
+                                 +- * HashAggregate (33)
+                                    +- ReusedExchange (32)
 
 
 (1) Scan parquet default.store_sales
@@ -162,7 +158,7 @@ Results [4]: [i_category#11, i_class#10, sum#18, sum#19]
 
 (25) Exchange
 Input [4]: [i_category#11, i_class#10, sum#18, sum#19]
-Arguments: hashpartitioning(i_category#11, i_class#10, 5), true, [id=#20]
+Arguments: hashpartitioning(i_category#11, i_class#10, 5), ENSURE_REQUIREMENTS, [id=#20]
 
 (26) HashAggregate [codegen id : 5]
 Input [4]: [i_category#11, i_class#10, sum#18, sum#19]
@@ -190,7 +186,7 @@ Results [5]: [i_category#11, sum#37, isEmpty#38, sum#39, isEmpty#40]
 
 (30) Exchange
 Input [5]: [i_category#11, sum#37, isEmpty#38, sum#39, isEmpty#40]
-Arguments: hashpartitioning(i_category#11, 5), true, [id=#41]
+Arguments: hashpartitioning(i_category#11, 5), ENSURE_REQUIREMENTS, [id=#41]
 
 (31) HashAggregate [codegen id : 11]
 Input [5]: [i_category#11, sum#37, isEmpty#38, sum#39, isEmpty#40]
@@ -199,91 +195,71 @@ Functions [2]: [sum(ss_net_profit#31), sum(ss_ext_sales_price#32)]
 Aggregate Attributes [2]: [sum(ss_net_profit#31)#42, sum(ss_ext_sales_price#32)#43]
 Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#31)#42) / promote_precision(sum(ss_ext_sales_price#32)#43)), DecimalType(38,11), true) as decimal(38,20)) AS gross_margin#44, i_category#11, null AS i_class#45, 0 AS t_category#46, 1 AS t_class#47, 1 AS lochierarchy#48]
 
-(32) Union
+(32) ReusedExchange [Reuses operator id: 25]
+Output [4]: [i_category#11, i_class#10, sum#49, sum#50]
 
-(33) HashAggregate [codegen id : 12]
-Input [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26]
-Keys [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26]
-Functions: []
-Aggregate Attributes: []
-Results [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26]
-
-(34) Exchange
-Input [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26]
-Arguments: hashpartitioning(gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26, 5), true, [id=#49]
-
-(35) HashAggregate [codegen id : 13]
-Input [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26]
-Keys [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26]
-Functions: []
-Aggregate Attributes: []
-Results [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26]
-
-(36) ReusedExchange [Reuses operator id: 25]
-Output [4]: [i_category#11, i_class#10, sum#50, sum#51]
-
-(37) HashAggregate [codegen id : 18]
-Input [4]: [i_category#11, i_class#10, sum#50, sum#51]
+(33) HashAggregate [codegen id : 16]
+Input [4]: [i_category#11, i_class#10, sum#49, sum#50]
 Keys [2]: [i_category#11, i_class#10]
 Functions [2]: [sum(UnscaledValue(ss_net_profit#5)), sum(UnscaledValue(ss_ext_sales_price#4))]
-Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#5))#52, sum(UnscaledValue(ss_ext_sales_price#4))#53]
-Results [2]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#52,17,2) AS ss_net_profit#31, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#53,17,2) AS ss_ext_sales_price#32]
+Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#5))#51, sum(UnscaledValue(ss_ext_sales_price#4))#52]
+Results [2]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#51,17,2) AS ss_net_profit#31, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#52,17,2) AS ss_ext_sales_price#32]
 
-(38) HashAggregate [codegen id : 18]
+(34) HashAggregate [codegen id : 16]
 Input [2]: [ss_net_profit#31, ss_ext_sales_price#32]
 Keys: []
 Functions [2]: [partial_sum(ss_net_profit#31), partial_sum(ss_ext_sales_price#32)]
-Aggregate Attributes [4]: [sum#54, isEmpty#55, sum#56, isEmpty#57]
-Results [4]: [sum#58, isEmpty#59, sum#60, isEmpty#61]
+Aggregate Attributes [4]: [sum#53, isEmpty#54, sum#55, isEmpty#56]
+Results [4]: [sum#57, isEmpty#58, sum#59, isEmpty#60]
 
-(39) Exchange
-Input [4]: [sum#58, isEmpty#59, sum#60, isEmpty#61]
-Arguments: SinglePartition, true, [id=#62]
+(35) Exchange
+Input [4]: [sum#57, isEmpty#58, sum#59, isEmpty#60]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#61]
 
-(40) HashAggregate [codegen id : 19]
-Input [4]: [sum#58, isEmpty#59, sum#60, isEmpty#61]
+(36) HashAggregate [codegen id : 17]
+Input [4]: [sum#57, isEmpty#58, sum#59, isEmpty#60]
 Keys: []
 Functions [2]: [sum(ss_net_profit#31), sum(ss_ext_sales_price#32)]
-Aggregate Attributes [2]: [sum(ss_net_profit#31)#63, sum(ss_ext_sales_price#32)#64]
-Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#31)#63) / promote_precision(sum(ss_ext_sales_price#32)#64)), DecimalType(38,11), true) as decimal(38,20)) AS gross_margin#65, null AS i_category#66, null AS i_class#67, 1 AS t_category#68, 1 AS t_class#69, 2 AS lochierarchy#70]
+Aggregate Attributes [2]: [sum(ss_net_profit#31)#62, sum(ss_ext_sales_price#32)#63]
+Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#31)#62) / promote_precision(sum(ss_ext_sales_price#32)#63)), DecimalType(38,11), true) as decimal(38,20)) AS gross_margin#64, null AS i_category#65, null AS i_class#66, 1 AS t_category#67, 1 AS t_class#68, 2 AS lochierarchy#69]
 
-(41) Union
+(37) Union
 
-(42) HashAggregate [codegen id : 20]
+(38) HashAggregate [codegen id : 18]
 Input [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26]
 Keys [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26]
 Functions: []
 Aggregate Attributes: []
 Results [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26]
 
-(43) Exchange
+(39) Exchange
 Input [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26]
-Arguments: hashpartitioning(gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26, 5), true, [id=#71]
+Arguments: hashpartitioning(gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26, 5), ENSURE_REQUIREMENTS, [id=#70]
 
-(44) HashAggregate [codegen id : 21]
+(40) HashAggregate [codegen id : 19]
 Input [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26]
 Keys [6]: [gross_margin#23, i_category#11, i_class#10, t_category#24, t_class#25, lochierarchy#26]
 Functions: []
 Aggregate Attributes: []
-Results [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, CASE WHEN (t_class#25 = 0) THEN i_category#11 END AS _w0#72]
+Results [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, CASE WHEN (t_class#25 = 0) THEN i_category#11 END AS _w0#71]
 
-(45) Exchange
-Input [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, _w0#72]
-Arguments: hashpartitioning(lochierarchy#26, _w0#72, 5), true, [id=#73]
+(41) Exchange
+Input [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, _w0#71]
+Arguments: hashpartitioning(lochierarchy#26, _w0#71, 5), ENSURE_REQUIREMENTS, [id=#72]
 
-(46) Sort [codegen id : 22]
-Input [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, _w0#72]
-Arguments: [lochierarchy#26 ASC NULLS FIRST, _w0#72 ASC NULLS FIRST, gross_margin#23 ASC NULLS FIRST], false, 0
+(42) Sort [codegen id : 20]
+Input [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, _w0#71]
+Arguments: [lochierarchy#26 ASC NULLS FIRST, _w0#71 ASC NULLS FIRST, gross_margin#23 ASC NULLS FIRST], false, 0
 
-(47) Window
-Input [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, _w0#72]
-Arguments: [rank(gross_margin#23) windowspecdefinition(lochierarchy#26, _w0#72, gross_margin#23 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#74], [lochierarchy#26, _w0#72], [gross_margin#23 ASC NULLS FIRST]
+(43) Window
+Input [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, _w0#71]
+Arguments: [rank(gross_margin#23) windowspecdefinition(lochierarchy#26, _w0#71, gross_margin#23 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#73], [lochierarchy#26, _w0#71], [gross_margin#23 ASC NULLS FIRST]
 
-(48) Project [codegen id : 23]
-Output [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, rank_within_parent#74]
-Input [6]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, _w0#72, rank_within_parent#74]
+(44) Project [codegen id : 21]
+Output [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, rank_within_parent#73]
+Input [6]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, _w0#71, rank_within_parent#73]
 
-(49) TakeOrderedAndProject
-Input [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, rank_within_parent#74]
-Arguments: 100, [lochierarchy#26 DESC NULLS LAST, CASE WHEN (lochierarchy#26 = 0) THEN i_category#11 END ASC NULLS FIRST, rank_within_parent#74 ASC NULLS FIRST], [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, rank_within_parent#74]
+(45) TakeOrderedAndProject
+Input [5]: [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, rank_within_parent#73]
+Arguments: 100, [lochierarchy#26 DESC NULLS LAST, CASE WHEN (lochierarchy#26 = 0) THEN i_category#11 END ASC NULLS FIRST, rank_within_parent#73 ASC NULLS FIRST], [gross_margin#23, i_category#11, i_class#10, lochierarchy#26, rank_within_parent#73]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/simplified.txt
index a72781e1da0ed..297c414a18cb0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/simplified.txt
@@ -1,82 +1,74 @@
 TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i_class]
-  WholeStageCodegen (23)
+  WholeStageCodegen (21)
     Project [gross_margin,i_category,i_class,lochierarchy,rank_within_parent]
       InputAdapter
         Window [gross_margin,lochierarchy,_w0]
-          WholeStageCodegen (22)
+          WholeStageCodegen (20)
             Sort [lochierarchy,_w0,gross_margin]
               InputAdapter
                 Exchange [lochierarchy,_w0] #1
-                  WholeStageCodegen (21)
+                  WholeStageCodegen (19)
                     HashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] [_w0]
                       InputAdapter
                         Exchange [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] #2
-                          WholeStageCodegen (20)
+                          WholeStageCodegen (18)
                             HashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy]
                               InputAdapter
                                 Union
-                                  WholeStageCodegen (13)
-                                    HashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy]
+                                  WholeStageCodegen (5)
+                                    HashAggregate [i_category,i_class,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),gross_margin,t_category,t_class,lochierarchy,sum,sum]
                                       InputAdapter
-                                        Exchange [gross_margin,i_category,i_class,t_category,t_class,lochierarchy] #3
-                                          WholeStageCodegen (12)
-                                            HashAggregate [gross_margin,i_category,i_class,t_category,t_class,lochierarchy]
-                                              InputAdapter
-                                                Union
-                                                  WholeStageCodegen (5)
-                                                    HashAggregate [i_category,i_class,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),gross_margin,t_category,t_class,lochierarchy,sum,sum]
-                                                      InputAdapter
-                                                        Exchange [i_category,i_class] #4
-                                                          WholeStageCodegen (4)
-                                                            HashAggregate [i_category,i_class,ss_net_profit,ss_ext_sales_price] [sum,sum,sum,sum]
-                                                              Project [ss_ext_sales_price,ss_net_profit,i_class,i_category]
-                                                                BroadcastHashJoin [ss_store_sk,s_store_sk]
-                                                                  Project [ss_store_sk,ss_ext_sales_price,ss_net_profit,i_class,i_category]
-                                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                      Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit]
-                                                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                          Filter [ss_sold_date_sk,ss_item_sk,ss_store_sk]
-                                                                            ColumnarToRow
-                                                                              InputAdapter
-                                                                                Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit]
-                                                                          InputAdapter
-                                                                            BroadcastExchange #5
-                                                                              WholeStageCodegen (1)
-                                                                                Project [d_date_sk]
-                                                                                  Filter [d_year,d_date_sk]
-                                                                                    ColumnarToRow
-                                                                                      InputAdapter
-                                                                                        Scan parquet default.date_dim [d_date_sk,d_year]
+                                        Exchange [i_category,i_class] #3
+                                          WholeStageCodegen (4)
+                                            HashAggregate [i_category,i_class,ss_net_profit,ss_ext_sales_price] [sum,sum,sum,sum]
+                                              Project [ss_ext_sales_price,ss_net_profit,i_class,i_category]
+                                                BroadcastHashJoin [ss_store_sk,s_store_sk]
+                                                  Project [ss_store_sk,ss_ext_sales_price,ss_net_profit,i_class,i_category]
+                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                      Project [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit]
+                                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                          Filter [ss_sold_date_sk,ss_item_sk,ss_store_sk]
+                                                            ColumnarToRow
+                                                              InputAdapter
+                                                                Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit]
+                                                          InputAdapter
+                                                            BroadcastExchange #4
+                                                              WholeStageCodegen (1)
+                                                                Project [d_date_sk]
+                                                                  Filter [d_year,d_date_sk]
+                                                                    ColumnarToRow
                                                                       InputAdapter
-                                                                        BroadcastExchange #6
-                                                                          WholeStageCodegen (2)
-                                                                            Filter [i_item_sk]
-                                                                              ColumnarToRow
-                                                                                InputAdapter
-                                                                                  Scan parquet default.item [i_item_sk,i_class,i_category]
-                                                                  InputAdapter
-                                                                    BroadcastExchange #7
-                                                                      WholeStageCodegen (3)
-                                                                        Project [s_store_sk]
-                                                                          Filter [s_state,s_store_sk]
-                                                                            ColumnarToRow
-                                                                              InputAdapter
-                                                                                Scan parquet default.store [s_store_sk,s_state]
-                                                  WholeStageCodegen (11)
-                                                    HashAggregate [i_category,sum,isEmpty,sum,isEmpty] [sum(ss_net_profit),sum(ss_ext_sales_price),gross_margin,i_class,t_category,t_class,lochierarchy,sum,isEmpty,sum,isEmpty]
+                                                                        Scan parquet default.date_dim [d_date_sk,d_year]
                                                       InputAdapter
-                                                        Exchange [i_category] #8
-                                                          WholeStageCodegen (10)
-                                                            HashAggregate [i_category,ss_net_profit,ss_ext_sales_price] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
-                                                              HashAggregate [i_category,i_class,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),ss_net_profit,ss_ext_sales_price,sum,sum]
+                                                        BroadcastExchange #5
+                                                          WholeStageCodegen (2)
+                                                            Filter [i_item_sk]
+                                                              ColumnarToRow
                                                                 InputAdapter
-                                                                  ReusedExchange [i_category,i_class,sum,sum] #4
-                                  WholeStageCodegen (19)
+                                                                  Scan parquet default.item [i_item_sk,i_class,i_category]
+                                                  InputAdapter
+                                                    BroadcastExchange #6
+                                                      WholeStageCodegen (3)
+                                                        Project [s_store_sk]
+                                                          Filter [s_state,s_store_sk]
+                                                            ColumnarToRow
+                                                              InputAdapter
+                                                                Scan parquet default.store [s_store_sk,s_state]
+                                  WholeStageCodegen (11)
+                                    HashAggregate [i_category,sum,isEmpty,sum,isEmpty] [sum(ss_net_profit),sum(ss_ext_sales_price),gross_margin,i_class,t_category,t_class,lochierarchy,sum,isEmpty,sum,isEmpty]
+                                      InputAdapter
+                                        Exchange [i_category] #7
+                                          WholeStageCodegen (10)
+                                            HashAggregate [i_category,ss_net_profit,ss_ext_sales_price] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                                              HashAggregate [i_category,i_class,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),ss_net_profit,ss_ext_sales_price,sum,sum]
+                                                InputAdapter
+                                                  ReusedExchange [i_category,i_class,sum,sum] #3
+                                  WholeStageCodegen (17)
                                     HashAggregate [sum,isEmpty,sum,isEmpty] [sum(ss_net_profit),sum(ss_ext_sales_price),gross_margin,i_category,i_class,t_category,t_class,lochierarchy,sum,isEmpty,sum,isEmpty]
                                       InputAdapter
-                                        Exchange #9
-                                          WholeStageCodegen (18)
+                                        Exchange #8
+                                          WholeStageCodegen (16)
                                             HashAggregate [ss_net_profit,ss_ext_sales_price] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                                               HashAggregate [i_category,i_class,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),ss_net_profit,ss_ext_sales_price,sum,sum]
                                                 InputAdapter
-                                                  ReusedExchange [i_category,i_class,sum,sum] #4
+                                                  ReusedExchange [i_category,i_class,sum,sum] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt
index 471d38c89e601..432ef4db6b1eb 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt
@@ -1,102 +1,98 @@
 == Physical Plan ==
-TakeOrderedAndProject (98)
-+- * HashAggregate (97)
-   +- Exchange (96)
-      +- * HashAggregate (95)
-         +- Union (94)
-            :- * HashAggregate (88)
-            :  +- Exchange (87)
-            :     +- * HashAggregate (86)
-            :        +- Union (85)
-            :           :- * HashAggregate (79)
-            :           :  +- Exchange (78)
-            :           :     +- * HashAggregate (77)
-            :           :        +- Union (76)
-            :           :           :- * HashAggregate (25)
-            :           :           :  +- Exchange (24)
-            :           :           :     +- * HashAggregate (23)
-            :           :           :        +- * Project (22)
-            :           :           :           +- * BroadcastHashJoin Inner BuildRight (21)
-            :           :           :              :- * Project (16)
-            :           :           :              :  +- * BroadcastHashJoin Inner BuildRight (15)
-            :           :           :              :     :- Union (9)
-            :           :           :              :     :  :- * Project (4)
-            :           :           :              :     :  :  +- * Filter (3)
-            :           :           :              :     :  :     +- * ColumnarToRow (2)
-            :           :           :              :     :  :        +- Scan parquet default.store_sales (1)
-            :           :           :              :     :  +- * Project (8)
-            :           :           :              :     :     +- * Filter (7)
-            :           :           :              :     :        +- * ColumnarToRow (6)
-            :           :           :              :     :           +- Scan parquet default.store_returns (5)
-            :           :           :              :     +- BroadcastExchange (14)
-            :           :           :              :        +- * Project (13)
-            :           :           :              :           +- * Filter (12)
-            :           :           :              :              +- * ColumnarToRow (11)
-            :           :           :              :                 +- Scan parquet default.date_dim (10)
-            :           :           :              +- BroadcastExchange (20)
-            :           :           :                 +- * Filter (19)
-            :           :           :                    +- * ColumnarToRow (18)
-            :           :           :                       +- Scan parquet default.store (17)
-            :           :           :- * HashAggregate (46)
-            :           :           :  +- Exchange (45)
-            :           :           :     +- * HashAggregate (44)
-            :           :           :        +- * Project (43)
-            :           :           :           +- * BroadcastHashJoin Inner BuildRight (42)
-            :           :           :              :- * Project (37)
-            :           :           :              :  +- * BroadcastHashJoin Inner BuildRight (36)
-            :           :           :              :     :- Union (34)
-            :           :           :              :     :  :- * Project (29)
-            :           :           :              :     :  :  +- * Filter (28)
-            :           :           :              :     :  :     +- * ColumnarToRow (27)
-            :           :           :              :     :  :        +- Scan parquet default.catalog_sales (26)
-            :           :           :              :     :  +- * Project (33)
-            :           :           :              :     :     +- * Filter (32)
-            :           :           :              :     :        +- * ColumnarToRow (31)
-            :           :           :              :     :           +- Scan parquet default.catalog_returns (30)
-            :           :           :              :     +- ReusedExchange (35)
-            :           :           :              +- BroadcastExchange (41)
-            :           :           :                 +- * Filter (40)
-            :           :           :                    +- * ColumnarToRow (39)
-            :           :           :                       +- Scan parquet default.catalog_page (38)
-            :           :           +- * HashAggregate (75)
-            :           :              +- Exchange (74)
-            :           :                 +- * HashAggregate (73)
-            :           :                    +- * Project (72)
-            :           :                       +- * BroadcastHashJoin Inner BuildRight (71)
-            :           :                          :- * Project (66)
-            :           :                          :  +- * BroadcastHashJoin Inner BuildRight (65)
-            :           :                          :     :- Union (63)
-            :           :                          :     :  :- * Project (50)
-            :           :                          :     :  :  +- * Filter (49)
-            :           :                          :     :  :     +- * ColumnarToRow (48)
-            :           :                          :     :  :        +- Scan parquet default.web_sales (47)
-            :           :                          :     :  +- * Project (62)
-            :           :                          :     :     +- * SortMergeJoin Inner (61)
-            :           :                          :     :        :- * Sort (55)
-            :           :                          :     :        :  +- Exchange (54)
-            :           :                          :     :        :     +- * Filter (53)
-            :           :                          :     :        :        +- * ColumnarToRow (52)
-            :           :                          :     :        :           +- Scan parquet default.web_returns (51)
-            :           :                          :     :        +- * Sort (60)
-            :           :                          :     :           +- Exchange (59)
-            :           :                          :     :              +- * Filter (58)
-            :           :                          :     :                 +- * ColumnarToRow (57)
-            :           :                          :     :                    +- Scan parquet default.web_sales (56)
-            :           :                          :     +- ReusedExchange (64)
-            :           :                          +- BroadcastExchange (70)
-            :           :                             +- * Filter (69)
-            :           :                                +- * ColumnarToRow (68)
-            :           :                                   +- Scan parquet default.web_site (67)
-            :           +- * HashAggregate (84)
-            :              +- Exchange (83)
-            :                 +- * HashAggregate (82)
-            :                    +- * HashAggregate (81)
-            :                       +- ReusedExchange (80)
-            +- * HashAggregate (93)
-               +- Exchange (92)
-                  +- * HashAggregate (91)
-                     +- * HashAggregate (90)
-                        +- ReusedExchange (89)
+TakeOrderedAndProject (94)
++- * HashAggregate (93)
+   +- Exchange (92)
+      +- * HashAggregate (91)
+         +- Union (90)
+            :- * HashAggregate (79)
+            :  +- Exchange (78)
+            :     +- * HashAggregate (77)
+            :        +- Union (76)
+            :           :- * HashAggregate (25)
+            :           :  +- Exchange (24)
+            :           :     +- * HashAggregate (23)
+            :           :        +- * Project (22)
+            :           :           +- * BroadcastHashJoin Inner BuildRight (21)
+            :           :              :- * Project (16)
+            :           :              :  +- * BroadcastHashJoin Inner BuildRight (15)
+            :           :              :     :- Union (9)
+            :           :              :     :  :- * Project (4)
+            :           :              :     :  :  +- * Filter (3)
+            :           :              :     :  :     +- * ColumnarToRow (2)
+            :           :              :     :  :        +- Scan parquet default.store_sales (1)
+            :           :              :     :  +- * Project (8)
+            :           :              :     :     +- * Filter (7)
+            :           :              :     :        +- * ColumnarToRow (6)
+            :           :              :     :           +- Scan parquet default.store_returns (5)
+            :           :              :     +- BroadcastExchange (14)
+            :           :              :        +- * Project (13)
+            :           :              :           +- * Filter (12)
+            :           :              :              +- * ColumnarToRow (11)
+            :           :              :                 +- Scan parquet default.date_dim (10)
+            :           :              +- BroadcastExchange (20)
+            :           :                 +- * Filter (19)
+            :           :                    +- * ColumnarToRow (18)
+            :           :                       +- Scan parquet default.store (17)
+            :           :- * HashAggregate (46)
+            :           :  +- Exchange (45)
+            :           :     +- * HashAggregate (44)
+            :           :        +- * Project (43)
+            :           :           +- * BroadcastHashJoin Inner BuildRight (42)
+            :           :              :- * Project (37)
+            :           :              :  +- * BroadcastHashJoin Inner BuildRight (36)
+            :           :              :     :- Union (34)
+            :           :              :     :  :- * Project (29)
+            :           :              :     :  :  +- * Filter (28)
+            :           :              :     :  :     +- * ColumnarToRow (27)
+            :           :              :     :  :        +- Scan parquet default.catalog_sales (26)
+            :           :              :     :  +- * Project (33)
+            :           :              :     :     +- * Filter (32)
+            :           :              :     :        +- * ColumnarToRow (31)
+            :           :              :     :           +- Scan parquet default.catalog_returns (30)
+            :           :              :     +- ReusedExchange (35)
+            :           :              +- BroadcastExchange (41)
+            :           :                 +- * Filter (40)
+            :           :                    +- * ColumnarToRow (39)
+            :           :                       +- Scan parquet default.catalog_page (38)
+            :           +- * HashAggregate (75)
+            :              +- Exchange (74)
+            :                 +- * HashAggregate (73)
+            :                    +- * Project (72)
+            :                       +- * BroadcastHashJoin Inner BuildRight (71)
+            :                          :- * Project (66)
+            :                          :  +- * BroadcastHashJoin Inner BuildRight (65)
+            :                          :     :- Union (63)
+            :                          :     :  :- * Project (50)
+            :                          :     :  :  +- * Filter (49)
+            :                          :     :  :     +- * ColumnarToRow (48)
+            :                          :     :  :        +- Scan parquet default.web_sales (47)
+            :                          :     :  +- * Project (62)
+            :                          :     :     +- * SortMergeJoin Inner (61)
+            :                          :     :        :- * Sort (55)
+            :                          :     :        :  +- Exchange (54)
+            :                          :     :        :     +- * Filter (53)
+            :                          :     :        :        +- * ColumnarToRow (52)
+            :                          :     :        :           +- Scan parquet default.web_returns (51)
+            :                          :     :        +- * Sort (60)
+            :                          :     :           +- Exchange (59)
+            :                          :     :              +- * Filter (58)
+            :                          :     :                 +- * ColumnarToRow (57)
+            :                          :     :                    +- Scan parquet default.web_sales (56)
+            :                          :     +- ReusedExchange (64)
+            :                          +- BroadcastExchange (70)
+            :                             +- * Filter (69)
+            :                                +- * ColumnarToRow (68)
+            :                                   +- Scan parquet default.web_site (67)
+            :- * HashAggregate (84)
+            :  +- Exchange (83)
+            :     +- * HashAggregate (82)
+            :        +- * HashAggregate (81)
+            :           +- ReusedExchange (80)
+            +- * HashAggregate (89)
+               +- Exchange (88)
+                  +- * HashAggregate (87)
+                     +- * HashAggregate (86)
+                        +- ReusedExchange (85)
 
 
 (1) Scan parquet default.store_sales
@@ -203,7 +199,7 @@ Results [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34]
 
 (24) Exchange
 Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34]
-Arguments: hashpartitioning(s_store_id#25, 5), true, [id=#35]
+Arguments: hashpartitioning(s_store_id#25, 5), ENSURE_REQUIREMENTS, [id=#35]
 
 (25) HashAggregate [codegen id : 6]
 Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34]
@@ -298,7 +294,7 @@ Results [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75]
 
 (45) Exchange
 Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75]
-Arguments: hashpartitioning(cp_catalog_page_id#66, 5), true, [id=#76]
+Arguments: hashpartitioning(cp_catalog_page_id#66, 5), ENSURE_REQUIREMENTS, [id=#76]
 
 (46) HashAggregate [codegen id : 12]
 Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75]
@@ -341,7 +337,7 @@ Condition : isnotnull(wr_returned_date_sk#96)
 
 (54) Exchange
 Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100]
-Arguments: hashpartitioning(wr_item_sk#97, wr_order_number#98, 5), true, [id=#101]
+Arguments: hashpartitioning(wr_item_sk#97, wr_order_number#98, 5), ENSURE_REQUIREMENTS, [id=#101]
 
 (55) Sort [codegen id : 15]
 Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100]
@@ -363,7 +359,7 @@ Condition : ((isnotnull(ws_item_sk#102) AND isnotnull(ws_order_number#103)) AND
 
 (59) Exchange
 Input [3]: [ws_item_sk#102, ws_web_site_sk#87, ws_order_number#103]
-Arguments: hashpartitioning(cast(ws_item_sk#102 as bigint), cast(ws_order_number#103 as bigint), 5), true, [id=#104]
+Arguments: hashpartitioning(cast(ws_item_sk#102 as bigint), cast(ws_order_number#103 as bigint), 5), ENSURE_REQUIREMENTS, [id=#104]
 
 (60) Sort [codegen id : 17]
 Input [3]: [ws_item_sk#102, ws_web_site_sk#87, ws_order_number#103]
@@ -428,7 +424,7 @@ Results [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121]
 
 (74) Exchange
 Input [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121]
-Arguments: hashpartitioning(web_site_id#112, 5), true, [id=#122]
+Arguments: hashpartitioning(web_site_id#112, 5), ENSURE_REQUIREMENTS, [id=#122]
 
 (75) HashAggregate [codegen id : 22]
 Input [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121]
@@ -448,7 +444,7 @@ Results [8]: [channel#40, id#41, sum#138, isEmpty#139, sum#140, isEmpty#141, sum
 
 (78) Exchange
 Input [8]: [channel#40, id#41, sum#138, isEmpty#139, sum#140, isEmpty#141, sum#142, isEmpty#143]
-Arguments: hashpartitioning(channel#40, id#41, 5), true, [id=#144]
+Arguments: hashpartitioning(channel#40, id#41, 5), ENSURE_REQUIREMENTS, [id=#144]
 
 (79) HashAggregate [codegen id : 24]
 Input [8]: [channel#40, id#41, sum#138, isEmpty#139, sum#140, isEmpty#141, sum#142, isEmpty#143]
@@ -476,7 +472,7 @@ Results [7]: [channel#40, sum#170, isEmpty#171, sum#172, isEmpty#173, sum#174, i
 
 (83) Exchange
 Input [7]: [channel#40, sum#170, isEmpty#171, sum#172, isEmpty#173, sum#174, isEmpty#175]
-Arguments: hashpartitioning(channel#40, 5), true, [id=#176]
+Arguments: hashpartitioning(channel#40, 5), ENSURE_REQUIREMENTS, [id=#176]
 
 (84) HashAggregate [codegen id : 49]
 Input [7]: [channel#40, sum#170, isEmpty#171, sum#172, isEmpty#173, sum#174, isEmpty#175]
@@ -485,75 +481,55 @@ Functions [3]: [sum(sales#161), sum(returns#162), sum(profit#163)]
 Aggregate Attributes [3]: [sum(sales#161)#177, sum(returns#162)#178, sum(profit#163)#179]
 Results [5]: [channel#40, null AS id#180, sum(sales#161)#177 AS sum(sales)#181, sum(returns#162)#178 AS sum(returns)#182, sum(profit#163)#179 AS sum(profit)#183]
 
-(85) Union
+(85) ReusedExchange [Reuses operator id: 78]
+Output [8]: [channel#40, id#41, sum#184, isEmpty#185, sum#186, isEmpty#187, sum#188, isEmpty#189]
 
-(86) HashAggregate [codegen id : 50]
-Input [5]: [channel#40, id#41, sales#148, returns#149, profit#150]
-Keys [5]: [channel#40, id#41, sales#148, returns#149, profit#150]
-Functions: []
-Aggregate Attributes: []
-Results [5]: [channel#40, id#41, sales#148, returns#149, profit#150]
-
-(87) Exchange
-Input [5]: [channel#40, id#41, sales#148, returns#149, profit#150]
-Arguments: hashpartitioning(channel#40, id#41, sales#148, returns#149, profit#150, 5), true, [id=#184]
-
-(88) HashAggregate [codegen id : 51]
-Input [5]: [channel#40, id#41, sales#148, returns#149, profit#150]
-Keys [5]: [channel#40, id#41, sales#148, returns#149, profit#150]
-Functions: []
-Aggregate Attributes: []
-Results [5]: [channel#40, id#41, sales#148, returns#149, profit#150]
-
-(89) ReusedExchange [Reuses operator id: 78]
-Output [8]: [channel#40, id#41, sum#185, isEmpty#186, sum#187, isEmpty#188, sum#189, isEmpty#190]
-
-(90) HashAggregate [codegen id : 75]
-Input [8]: [channel#40, id#41, sum#185, isEmpty#186, sum#187, isEmpty#188, sum#189, isEmpty#190]
+(86) HashAggregate [codegen id : 73]
+Input [8]: [channel#40, id#41, sum#184, isEmpty#185, sum#186, isEmpty#187, sum#188, isEmpty#189]
 Keys [2]: [channel#40, id#41]
-Functions [3]: [sum(sales#42), sum(returns#43), sum(profit#191)]
-Aggregate Attributes [3]: [sum(sales#42)#192, sum(returns#43)#193, sum(profit#191)#194]
-Results [3]: [sum(sales#42)#192 AS sales#161, sum(returns#43)#193 AS returns#162, sum(profit#191)#194 AS profit#163]
+Functions [3]: [sum(sales#42), sum(returns#43), sum(profit#190)]
+Aggregate Attributes [3]: [sum(sales#42)#191, sum(returns#43)#192, sum(profit#190)#193]
+Results [3]: [sum(sales#42)#191 AS sales#161, sum(returns#43)#192 AS returns#162, sum(profit#190)#193 AS profit#163]
 
-(91) HashAggregate [codegen id : 75]
+(87) HashAggregate [codegen id : 73]
 Input [3]: [sales#161, returns#162, profit#163]
 Keys: []
 Functions [3]: [partial_sum(sales#161), partial_sum(returns#162), partial_sum(profit#163)]
-Aggregate Attributes [6]: [sum#195, isEmpty#196, sum#197, isEmpty#198, sum#199, isEmpty#200]
-Results [6]: [sum#201, isEmpty#202, sum#203, isEmpty#204, sum#205, isEmpty#206]
+Aggregate Attributes [6]: [sum#194, isEmpty#195, sum#196, isEmpty#197, sum#198, isEmpty#199]
+Results [6]: [sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205]
 
-(92) Exchange
-Input [6]: [sum#201, isEmpty#202, sum#203, isEmpty#204, sum#205, isEmpty#206]
-Arguments: SinglePartition, true, [id=#207]
+(88) Exchange
+Input [6]: [sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#206]
 
-(93) HashAggregate [codegen id : 76]
-Input [6]: [sum#201, isEmpty#202, sum#203, isEmpty#204, sum#205, isEmpty#206]
+(89) HashAggregate [codegen id : 74]
+Input [6]: [sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205]
 Keys: []
 Functions [3]: [sum(sales#161), sum(returns#162), sum(profit#163)]
-Aggregate Attributes [3]: [sum(sales#161)#208, sum(returns#162)#209, sum(profit#163)#210]
-Results [5]: [null AS channel#211, null AS id#212, sum(sales#161)#208 AS sum(sales)#213, sum(returns#162)#209 AS sum(returns)#214, sum(profit#163)#210 AS sum(profit)#215]
+Aggregate Attributes [3]: [sum(sales#161)#207, sum(returns#162)#208, sum(profit#163)#209]
+Results [5]: [null AS channel#210, null AS id#211, sum(sales#161)#207 AS sum(sales)#212, sum(returns#162)#208 AS sum(returns)#213, sum(profit#163)#209 AS sum(profit)#214]
 
-(94) Union
+(90) Union
 
-(95) HashAggregate [codegen id : 77]
+(91) HashAggregate [codegen id : 75]
 Input [5]: [channel#40, id#41, sales#148, returns#149, profit#150]
 Keys [5]: [channel#40, id#41, sales#148, returns#149, profit#150]
 Functions: []
 Aggregate Attributes: []
 Results [5]: [channel#40, id#41, sales#148, returns#149, profit#150]
 
-(96) Exchange
+(92) Exchange
 Input [5]: [channel#40, id#41, sales#148, returns#149, profit#150]
-Arguments: hashpartitioning(channel#40, id#41, sales#148, returns#149, profit#150, 5), true, [id=#216]
+Arguments: hashpartitioning(channel#40, id#41, sales#148, returns#149, profit#150, 5), ENSURE_REQUIREMENTS, [id=#215]
 
-(97) HashAggregate [codegen id : 78]
+(93) HashAggregate [codegen id : 76]
 Input [5]: [channel#40, id#41, sales#148, returns#149, profit#150]
 Keys [5]: [channel#40, id#41, sales#148, returns#149, profit#150]
 Functions: []
 Aggregate Attributes: []
 Results [5]: [channel#40, id#41, sales#148, returns#149, profit#150]
 
-(98) TakeOrderedAndProject
+(94) TakeOrderedAndProject
 Input [5]: [channel#40, id#41, sales#148, returns#149, profit#150]
 Arguments: 100, [channel#40 ASC NULLS FIRST, id#41 ASC NULLS FIRST], [channel#40, id#41, sales#148, returns#149, profit#150]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/simplified.txt
index 81b4178b7a9ca..233af6d8cc813 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/simplified.txt
@@ -1,165 +1,157 @@
 TakeOrderedAndProject [channel,id,sales,returns,profit]
-  WholeStageCodegen (78)
+  WholeStageCodegen (76)
     HashAggregate [channel,id,sales,returns,profit]
       InputAdapter
         Exchange [channel,id,sales,returns,profit] #1
-          WholeStageCodegen (77)
+          WholeStageCodegen (75)
             HashAggregate [channel,id,sales,returns,profit]
               InputAdapter
                 Union
-                  WholeStageCodegen (51)
-                    HashAggregate [channel,id,sales,returns,profit]
+                  WholeStageCodegen (24)
+                    HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
-                        Exchange [channel,id,sales,returns,profit] #2
-                          WholeStageCodegen (50)
-                            HashAggregate [channel,id,sales,returns,profit]
+                        Exchange [channel,id] #2
+                          WholeStageCodegen (23)
+                            HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                               InputAdapter
                                 Union
-                                  WholeStageCodegen (24)
-                                    HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                                  WholeStageCodegen (6)
+                                    HashAggregate [s_store_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum]
                                       InputAdapter
-                                        Exchange [channel,id] #3
-                                          WholeStageCodegen (23)
-                                            HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
-                                              InputAdapter
-                                                Union
-                                                  WholeStageCodegen (6)
-                                                    HashAggregate [s_store_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum]
+                                        Exchange [s_store_id] #3
+                                          WholeStageCodegen (5)
+                                            HashAggregate [s_store_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum]
+                                              Project [sales_price,profit,return_amt,net_loss,s_store_id]
+                                                BroadcastHashJoin [store_sk,s_store_sk]
+                                                  Project [store_sk,sales_price,profit,return_amt,net_loss]
+                                                    BroadcastHashJoin [date_sk,d_date_sk]
                                                       InputAdapter
-                                                        Exchange [s_store_id] #4
-                                                          WholeStageCodegen (5)
-                                                            HashAggregate [s_store_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum]
-                                                              Project [sales_price,profit,return_amt,net_loss,s_store_id]
-                                                                BroadcastHashJoin [store_sk,s_store_sk]
-                                                                  Project [store_sk,sales_price,profit,return_amt,net_loss]
-                                                                    BroadcastHashJoin [date_sk,d_date_sk]
-                                                                      InputAdapter
-                                                                        Union
-                                                                          WholeStageCodegen (1)
-                                                                            Project [ss_store_sk,ss_sold_date_sk,ss_ext_sales_price,ss_net_profit]
-                                                                              Filter [ss_sold_date_sk,ss_store_sk]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit]
-                                                                          WholeStageCodegen (2)
-                                                                            Project [sr_store_sk,sr_returned_date_sk,sr_return_amt,sr_net_loss]
-                                                                              Filter [sr_returned_date_sk,sr_store_sk]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.store_returns [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss]
-                                                                      InputAdapter
-                                                                        BroadcastExchange #5
-                                                                          WholeStageCodegen (3)
-                                                                            Project [d_date_sk]
-                                                                              Filter [d_date,d_date_sk]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.date_dim [d_date_sk,d_date]
+                                                        Union
+                                                          WholeStageCodegen (1)
+                                                            Project [ss_store_sk,ss_sold_date_sk,ss_ext_sales_price,ss_net_profit]
+                                                              Filter [ss_sold_date_sk,ss_store_sk]
+                                                                ColumnarToRow
+                                                                  InputAdapter
+                                                                    Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit]
+                                                          WholeStageCodegen (2)
+                                                            Project [sr_store_sk,sr_returned_date_sk,sr_return_amt,sr_net_loss]
+                                                              Filter [sr_returned_date_sk,sr_store_sk]
+                                                                ColumnarToRow
                                                                   InputAdapter
-                                                                    BroadcastExchange #6
-                                                                      WholeStageCodegen (4)
-                                                                        Filter [s_store_sk]
-                                                                          ColumnarToRow
-                                                                            InputAdapter
-                                                                              Scan parquet default.store [s_store_sk,s_store_id]
-                                                  WholeStageCodegen (12)
-                                                    HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum]
+                                                                    Scan parquet default.store_returns [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss]
                                                       InputAdapter
-                                                        Exchange [cp_catalog_page_id] #7
-                                                          WholeStageCodegen (11)
-                                                            HashAggregate [cp_catalog_page_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum]
-                                                              Project [sales_price,profit,return_amt,net_loss,cp_catalog_page_id]
-                                                                BroadcastHashJoin [page_sk,cp_catalog_page_sk]
-                                                                  Project [page_sk,sales_price,profit,return_amt,net_loss]
-                                                                    BroadcastHashJoin [date_sk,d_date_sk]
-                                                                      InputAdapter
-                                                                        Union
-                                                                          WholeStageCodegen (7)
-                                                                            Project [cs_catalog_page_sk,cs_sold_date_sk,cs_ext_sales_price,cs_net_profit]
-                                                                              Filter [cs_sold_date_sk,cs_catalog_page_sk]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.catalog_sales [cs_sold_date_sk,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit]
-                                                                          WholeStageCodegen (8)
-                                                                            Project [cr_catalog_page_sk,cr_returned_date_sk,cr_return_amount,cr_net_loss]
-                                                                              Filter [cr_returned_date_sk,cr_catalog_page_sk]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.catalog_returns [cr_returned_date_sk,cr_catalog_page_sk,cr_return_amount,cr_net_loss]
-                                                                      InputAdapter
-                                                                        ReusedExchange [d_date_sk] #5
+                                                        BroadcastExchange #4
+                                                          WholeStageCodegen (3)
+                                                            Project [d_date_sk]
+                                                              Filter [d_date,d_date_sk]
+                                                                ColumnarToRow
                                                                   InputAdapter
-                                                                    BroadcastExchange #8
-                                                                      WholeStageCodegen (10)
-                                                                        Filter [cp_catalog_page_sk]
-                                                                          ColumnarToRow
-                                                                            InputAdapter
-                                                                              Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
-                                                  WholeStageCodegen (22)
-                                                    HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum]
+                                                                    Scan parquet default.date_dim [d_date_sk,d_date]
+                                                  InputAdapter
+                                                    BroadcastExchange #5
+                                                      WholeStageCodegen (4)
+                                                        Filter [s_store_sk]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet default.store [s_store_sk,s_store_id]
+                                  WholeStageCodegen (12)
+                                    HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum]
+                                      InputAdapter
+                                        Exchange [cp_catalog_page_id] #6
+                                          WholeStageCodegen (11)
+                                            HashAggregate [cp_catalog_page_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum]
+                                              Project [sales_price,profit,return_amt,net_loss,cp_catalog_page_id]
+                                                BroadcastHashJoin [page_sk,cp_catalog_page_sk]
+                                                  Project [page_sk,sales_price,profit,return_amt,net_loss]
+                                                    BroadcastHashJoin [date_sk,d_date_sk]
+                                                      InputAdapter
+                                                        Union
+                                                          WholeStageCodegen (7)
+                                                            Project [cs_catalog_page_sk,cs_sold_date_sk,cs_ext_sales_price,cs_net_profit]
+                                                              Filter [cs_sold_date_sk,cs_catalog_page_sk]
+                                                                ColumnarToRow
+                                                                  InputAdapter
+                                                                    Scan parquet default.catalog_sales [cs_sold_date_sk,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit]
+                                                          WholeStageCodegen (8)
+                                                            Project [cr_catalog_page_sk,cr_returned_date_sk,cr_return_amount,cr_net_loss]
+                                                              Filter [cr_returned_date_sk,cr_catalog_page_sk]
+                                                                ColumnarToRow
+                                                                  InputAdapter
+                                                                    Scan parquet default.catalog_returns [cr_returned_date_sk,cr_catalog_page_sk,cr_return_amount,cr_net_loss]
+                                                      InputAdapter
+                                                        ReusedExchange [d_date_sk] #4
+                                                  InputAdapter
+                                                    BroadcastExchange #7
+                                                      WholeStageCodegen (10)
+                                                        Filter [cp_catalog_page_sk]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
+                                  WholeStageCodegen (22)
+                                    HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum]
+                                      InputAdapter
+                                        Exchange [web_site_id] #8
+                                          WholeStageCodegen (21)
+                                            HashAggregate [web_site_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum]
+                                              Project [sales_price,profit,return_amt,net_loss,web_site_id]
+                                                BroadcastHashJoin [wsr_web_site_sk,web_site_sk]
+                                                  Project [wsr_web_site_sk,sales_price,profit,return_amt,net_loss]
+                                                    BroadcastHashJoin [date_sk,d_date_sk]
                                                       InputAdapter
-                                                        Exchange [web_site_id] #9
-                                                          WholeStageCodegen (21)
-                                                            HashAggregate [web_site_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum]
-                                                              Project [sales_price,profit,return_amt,net_loss,web_site_id]
-                                                                BroadcastHashJoin [wsr_web_site_sk,web_site_sk]
-                                                                  Project [wsr_web_site_sk,sales_price,profit,return_amt,net_loss]
-                                                                    BroadcastHashJoin [date_sk,d_date_sk]
+                                                        Union
+                                                          WholeStageCodegen (13)
+                                                            Project [ws_web_site_sk,ws_sold_date_sk,ws_ext_sales_price,ws_net_profit]
+                                                              Filter [ws_sold_date_sk,ws_web_site_sk]
+                                                                ColumnarToRow
+                                                                  InputAdapter
+                                                                    Scan parquet default.web_sales [ws_sold_date_sk,ws_web_site_sk,ws_ext_sales_price,ws_net_profit]
+                                                          WholeStageCodegen (18)
+                                                            Project [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss]
+                                                              SortMergeJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number]
+                                                                InputAdapter
+                                                                  WholeStageCodegen (15)
+                                                                    Sort [wr_item_sk,wr_order_number]
                                                                       InputAdapter
-                                                                        Union
-                                                                          WholeStageCodegen (13)
-                                                                            Project [ws_web_site_sk,ws_sold_date_sk,ws_ext_sales_price,ws_net_profit]
-                                                                              Filter [ws_sold_date_sk,ws_web_site_sk]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.web_sales [ws_sold_date_sk,ws_web_site_sk,ws_ext_sales_price,ws_net_profit]
-                                                                          WholeStageCodegen (18)
-                                                                            Project [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss]
-                                                                              SortMergeJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number]
-                                                                                InputAdapter
-                                                                                  WholeStageCodegen (15)
-                                                                                    Sort [wr_item_sk,wr_order_number]
-                                                                                      InputAdapter
-                                                                                        Exchange [wr_item_sk,wr_order_number] #10
-                                                                                          WholeStageCodegen (14)
-                                                                                            Filter [wr_returned_date_sk]
-                                                                                              ColumnarToRow
-                                                                                                InputAdapter
-                                                                                                  Scan parquet default.web_returns [wr_returned_date_sk,wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss]
+                                                                        Exchange [wr_item_sk,wr_order_number] #9
+                                                                          WholeStageCodegen (14)
+                                                                            Filter [wr_returned_date_sk]
+                                                                              ColumnarToRow
                                                                                 InputAdapter
-                                                                                  WholeStageCodegen (17)
-                                                                                    Sort [ws_item_sk,ws_order_number]
-                                                                                      InputAdapter
-                                                                                        Exchange [ws_item_sk,ws_order_number] #11
-                                                                                          WholeStageCodegen (16)
-                                                                                            Filter [ws_item_sk,ws_order_number,ws_web_site_sk]
-                                                                                              ColumnarToRow
-                                                                                                InputAdapter
-                                                                                                  Scan parquet default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number]
+                                                                                  Scan parquet default.web_returns [wr_returned_date_sk,wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss]
+                                                                InputAdapter
+                                                                  WholeStageCodegen (17)
+                                                                    Sort [ws_item_sk,ws_order_number]
                                                                       InputAdapter
-                                                                        ReusedExchange [d_date_sk] #5
-                                                                  InputAdapter
-                                                                    BroadcastExchange #12
-                                                                      WholeStageCodegen (20)
-                                                                        Filter [web_site_sk]
-                                                                          ColumnarToRow
-                                                                            InputAdapter
-                                                                              Scan parquet default.web_site [web_site_sk,web_site_id]
-                                  WholeStageCodegen (49)
-                                    HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sum(sales),sum(returns),sum(profit),sum,isEmpty,sum,isEmpty,sum,isEmpty]
-                                      InputAdapter
-                                        Exchange [channel] #13
-                                          WholeStageCodegen (48)
-                                            HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
-                                              HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
-                                                InputAdapter
-                                                  ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #3
-                  WholeStageCodegen (76)
+                                                                        Exchange [ws_item_sk,ws_order_number] #10
+                                                                          WholeStageCodegen (16)
+                                                                            Filter [ws_item_sk,ws_order_number,ws_web_site_sk]
+                                                                              ColumnarToRow
+                                                                                InputAdapter
+                                                                                  Scan parquet default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number]
+                                                      InputAdapter
+                                                        ReusedExchange [d_date_sk] #4
+                                                  InputAdapter
+                                                    BroadcastExchange #11
+                                                      WholeStageCodegen (20)
+                                                        Filter [web_site_sk]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet default.web_site [web_site_sk,web_site_id]
+                  WholeStageCodegen (49)
+                    HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sum(sales),sum(returns),sum(profit),sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                      InputAdapter
+                        Exchange [channel] #12
+                          WholeStageCodegen (48)
+                            HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                              HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                                InputAdapter
+                                  ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2
+                  WholeStageCodegen (74)
                     HashAggregate [sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),channel,id,sum(sales),sum(returns),sum(profit),sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
-                        Exchange #14
-                          WholeStageCodegen (75)
+                        Exchange #13
+                          WholeStageCodegen (73)
                             HashAggregate [sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                               HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                                 InputAdapter
-                                  ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #3
+                                  ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/explain.txt
index fa2435de73e02..d4c1b5f93a0d2 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/explain.txt
@@ -1,99 +1,95 @@
 == Physical Plan ==
-TakeOrderedAndProject (95)
-+- * HashAggregate (94)
-   +- Exchange (93)
-      +- * HashAggregate (92)
-         +- Union (91)
-            :- * HashAggregate (85)
-            :  +- Exchange (84)
-            :     +- * HashAggregate (83)
-            :        +- Union (82)
-            :           :- * HashAggregate (76)
-            :           :  +- Exchange (75)
-            :           :     +- * HashAggregate (74)
-            :           :        +- Union (73)
-            :           :           :- * HashAggregate (25)
-            :           :           :  +- Exchange (24)
-            :           :           :     +- * HashAggregate (23)
-            :           :           :        +- * Project (22)
-            :           :           :           +- * BroadcastHashJoin Inner BuildRight (21)
-            :           :           :              :- * Project (16)
-            :           :           :              :  +- * BroadcastHashJoin Inner BuildRight (15)
-            :           :           :              :     :- Union (9)
-            :           :           :              :     :  :- * Project (4)
-            :           :           :              :     :  :  +- * Filter (3)
-            :           :           :              :     :  :     +- * ColumnarToRow (2)
-            :           :           :              :     :  :        +- Scan parquet default.store_sales (1)
-            :           :           :              :     :  +- * Project (8)
-            :           :           :              :     :     +- * Filter (7)
-            :           :           :              :     :        +- * ColumnarToRow (6)
-            :           :           :              :     :           +- Scan parquet default.store_returns (5)
-            :           :           :              :     +- BroadcastExchange (14)
-            :           :           :              :        +- * Project (13)
-            :           :           :              :           +- * Filter (12)
-            :           :           :              :              +- * ColumnarToRow (11)
-            :           :           :              :                 +- Scan parquet default.date_dim (10)
-            :           :           :              +- BroadcastExchange (20)
-            :           :           :                 +- * Filter (19)
-            :           :           :                    +- * ColumnarToRow (18)
-            :           :           :                       +- Scan parquet default.store (17)
-            :           :           :- * HashAggregate (46)
-            :           :           :  +- Exchange (45)
-            :           :           :     +- * HashAggregate (44)
-            :           :           :        +- * Project (43)
-            :           :           :           +- * BroadcastHashJoin Inner BuildRight (42)
-            :           :           :              :- * Project (37)
-            :           :           :              :  +- * BroadcastHashJoin Inner BuildRight (36)
-            :           :           :              :     :- Union (34)
-            :           :           :              :     :  :- * Project (29)
-            :           :           :              :     :  :  +- * Filter (28)
-            :           :           :              :     :  :     +- * ColumnarToRow (27)
-            :           :           :              :     :  :        +- Scan parquet default.catalog_sales (26)
-            :           :           :              :     :  +- * Project (33)
-            :           :           :              :     :     +- * Filter (32)
-            :           :           :              :     :        +- * ColumnarToRow (31)
-            :           :           :              :     :           +- Scan parquet default.catalog_returns (30)
-            :           :           :              :     +- ReusedExchange (35)
-            :           :           :              +- BroadcastExchange (41)
-            :           :           :                 +- * Filter (40)
-            :           :           :                    +- * ColumnarToRow (39)
-            :           :           :                       +- Scan parquet default.catalog_page (38)
-            :           :           +- * HashAggregate (72)
-            :           :              +- Exchange (71)
-            :           :                 +- * HashAggregate (70)
-            :           :                    +- * Project (69)
-            :           :                       +- * BroadcastHashJoin Inner BuildRight (68)
-            :           :                          :- * Project (63)
-            :           :                          :  +- * BroadcastHashJoin Inner BuildRight (62)
-            :           :                          :     :- Union (60)
-            :           :                          :     :  :- * Project (50)
-            :           :                          :     :  :  +- * Filter (49)
-            :           :                          :     :  :     +- * ColumnarToRow (48)
-            :           :                          :     :  :        +- Scan parquet default.web_sales (47)
-            :           :                          :     :  +- * Project (59)
-            :           :                          :     :     +- * BroadcastHashJoin Inner BuildRight (58)
-            :           :                          :     :        :- * Filter (53)
-            :           :                          :     :        :  +- * ColumnarToRow (52)
-            :           :                          :     :        :     +- Scan parquet default.web_returns (51)
-            :           :                          :     :        +- BroadcastExchange (57)
-            :           :                          :     :           +- * Filter (56)
-            :           :                          :     :              +- * ColumnarToRow (55)
-            :           :                          :     :                 +- Scan parquet default.web_sales (54)
-            :           :                          :     +- ReusedExchange (61)
-            :           :                          +- BroadcastExchange (67)
-            :           :                             +- * Filter (66)
-            :           :                                +- * ColumnarToRow (65)
-            :           :                                   +- Scan parquet default.web_site (64)
-            :           +- * HashAggregate (81)
-            :              +- Exchange (80)
-            :                 +- * HashAggregate (79)
-            :                    +- * HashAggregate (78)
-            :                       +- ReusedExchange (77)
-            +- * HashAggregate (90)
-               +- Exchange (89)
-                  +- * HashAggregate (88)
-                     +- * HashAggregate (87)
-                        +- ReusedExchange (86)
+TakeOrderedAndProject (91)
++- * HashAggregate (90)
+   +- Exchange (89)
+      +- * HashAggregate (88)
+         +- Union (87)
+            :- * HashAggregate (76)
+            :  +- Exchange (75)
+            :     +- * HashAggregate (74)
+            :        +- Union (73)
+            :           :- * HashAggregate (25)
+            :           :  +- Exchange (24)
+            :           :     +- * HashAggregate (23)
+            :           :        +- * Project (22)
+            :           :           +- * BroadcastHashJoin Inner BuildRight (21)
+            :           :              :- * Project (16)
+            :           :              :  +- * BroadcastHashJoin Inner BuildRight (15)
+            :           :              :     :- Union (9)
+            :           :              :     :  :- * Project (4)
+            :           :              :     :  :  +- * Filter (3)
+            :           :              :     :  :     +- * ColumnarToRow (2)
+            :           :              :     :  :        +- Scan parquet default.store_sales (1)
+            :           :              :     :  +- * Project (8)
+            :           :              :     :     +- * Filter (7)
+            :           :              :     :        +- * ColumnarToRow (6)
+            :           :              :     :           +- Scan parquet default.store_returns (5)
+            :           :              :     +- BroadcastExchange (14)
+            :           :              :        +- * Project (13)
+            :           :              :           +- * Filter (12)
+            :           :              :              +- * ColumnarToRow (11)
+            :           :              :                 +- Scan parquet default.date_dim (10)
+            :           :              +- BroadcastExchange (20)
+            :           :                 +- * Filter (19)
+            :           :                    +- * ColumnarToRow (18)
+            :           :                       +- Scan parquet default.store (17)
+            :           :- * HashAggregate (46)
+            :           :  +- Exchange (45)
+            :           :     +- * HashAggregate (44)
+            :           :        +- * Project (43)
+            :           :           +- * BroadcastHashJoin Inner BuildRight (42)
+            :           :              :- * Project (37)
+            :           :              :  +- * BroadcastHashJoin Inner BuildRight (36)
+            :           :              :     :- Union (34)
+            :           :              :     :  :- * Project (29)
+            :           :              :     :  :  +- * Filter (28)
+            :           :              :     :  :     +- * ColumnarToRow (27)
+            :           :              :     :  :        +- Scan parquet default.catalog_sales (26)
+            :           :              :     :  +- * Project (33)
+            :           :              :     :     +- * Filter (32)
+            :           :              :     :        +- * ColumnarToRow (31)
+            :           :              :     :           +- Scan parquet default.catalog_returns (30)
+            :           :              :     +- ReusedExchange (35)
+            :           :              +- BroadcastExchange (41)
+            :           :                 +- * Filter (40)
+            :           :                    +- * ColumnarToRow (39)
+            :           :                       +- Scan parquet default.catalog_page (38)
+            :           +- * HashAggregate (72)
+            :              +- Exchange (71)
+            :                 +- * HashAggregate (70)
+            :                    +- * Project (69)
+            :                       +- * BroadcastHashJoin Inner BuildRight (68)
+            :                          :- * Project (63)
+            :                          :  +- * BroadcastHashJoin Inner BuildRight (62)
+            :                          :     :- Union (60)
+            :                          :     :  :- * Project (50)
+            :                          :     :  :  +- * Filter (49)
+            :                          :     :  :     +- * ColumnarToRow (48)
+            :                          :     :  :        +- Scan parquet default.web_sales (47)
+            :                          :     :  +- * Project (59)
+            :                          :     :     +- * BroadcastHashJoin Inner BuildRight (58)
+            :                          :     :        :- * Filter (53)
+            :                          :     :        :  +- * ColumnarToRow (52)
+            :                          :     :        :     +- Scan parquet default.web_returns (51)
+            :                          :     :        +- BroadcastExchange (57)
+            :                          :     :           +- * Filter (56)
+            :                          :     :              +- * ColumnarToRow (55)
+            :                          :     :                 +- Scan parquet default.web_sales (54)
+            :                          :     +- ReusedExchange (61)
+            :                          +- BroadcastExchange (67)
+            :                             +- * Filter (66)
+            :                                +- * ColumnarToRow (65)
+            :                                   +- Scan parquet default.web_site (64)
+            :- * HashAggregate (81)
+            :  +- Exchange (80)
+            :     +- * HashAggregate (79)
+            :        +- * HashAggregate (78)
+            :           +- ReusedExchange (77)
+            +- * HashAggregate (86)
+               +- Exchange (85)
+                  +- * HashAggregate (84)
+                     +- * HashAggregate (83)
+                        +- ReusedExchange (82)
 
 
 (1) Scan parquet default.store_sales
@@ -200,7 +196,7 @@ Results [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34]
 
 (24) Exchange
 Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34]
-Arguments: hashpartitioning(s_store_id#25, 5), true, [id=#35]
+Arguments: hashpartitioning(s_store_id#25, 5), ENSURE_REQUIREMENTS, [id=#35]
 
 (25) HashAggregate [codegen id : 6]
 Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34]
@@ -295,7 +291,7 @@ Results [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75]
 
 (45) Exchange
 Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75]
-Arguments: hashpartitioning(cp_catalog_page_id#66, 5), true, [id=#76]
+Arguments: hashpartitioning(cp_catalog_page_id#66, 5), ENSURE_REQUIREMENTS, [id=#76]
 
 (46) HashAggregate [codegen id : 12]
 Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75]
@@ -413,7 +409,7 @@ Results [5]: [web_site_id#111, sum#117, sum#118, sum#119, sum#120]
 
 (71) Exchange
 Input [5]: [web_site_id#111, sum#117, sum#118, sum#119, sum#120]
-Arguments: hashpartitioning(web_site_id#111, 5), true, [id=#121]
+Arguments: hashpartitioning(web_site_id#111, 5), ENSURE_REQUIREMENTS, [id=#121]
 
 (72) HashAggregate [codegen id : 19]
 Input [5]: [web_site_id#111, sum#117, sum#118, sum#119, sum#120]
@@ -433,7 +429,7 @@ Results [8]: [channel#40, id#41, sum#137, isEmpty#138, sum#139, isEmpty#140, sum
 
 (75) Exchange
 Input [8]: [channel#40, id#41, sum#137, isEmpty#138, sum#139, isEmpty#140, sum#141, isEmpty#142]
-Arguments: hashpartitioning(channel#40, id#41, 5), true, [id=#143]
+Arguments: hashpartitioning(channel#40, id#41, 5), ENSURE_REQUIREMENTS, [id=#143]
 
 (76) HashAggregate [codegen id : 21]
 Input [8]: [channel#40, id#41, sum#137, isEmpty#138, sum#139, isEmpty#140, sum#141, isEmpty#142]
@@ -461,7 +457,7 @@ Results [7]: [channel#40, sum#169, isEmpty#170, sum#171, isEmpty#172, sum#173, i
 
 (80) Exchange
 Input [7]: [channel#40, sum#169, isEmpty#170, sum#171, isEmpty#172, sum#173, isEmpty#174]
-Arguments: hashpartitioning(channel#40, 5), true, [id=#175]
+Arguments: hashpartitioning(channel#40, 5), ENSURE_REQUIREMENTS, [id=#175]
 
 (81) HashAggregate [codegen id : 43]
 Input [7]: [channel#40, sum#169, isEmpty#170, sum#171, isEmpty#172, sum#173, isEmpty#174]
@@ -470,75 +466,55 @@ Functions [3]: [sum(sales#160), sum(returns#161), sum(profit#162)]
 Aggregate Attributes [3]: [sum(sales#160)#176, sum(returns#161)#177, sum(profit#162)#178]
 Results [5]: [channel#40, null AS id#179, sum(sales#160)#176 AS sum(sales)#180, sum(returns#161)#177 AS sum(returns)#181, sum(profit#162)#178 AS sum(profit)#182]
 
-(82) Union
+(82) ReusedExchange [Reuses operator id: 75]
+Output [8]: [channel#40, id#41, sum#183, isEmpty#184, sum#185, isEmpty#186, sum#187, isEmpty#188]
 
-(83) HashAggregate [codegen id : 44]
-Input [5]: [channel#40, id#41, sales#147, returns#148, profit#149]
-Keys [5]: [channel#40, id#41, sales#147, returns#148, profit#149]
-Functions: []
-Aggregate Attributes: []
-Results [5]: [channel#40, id#41, sales#147, returns#148, profit#149]
-
-(84) Exchange
-Input [5]: [channel#40, id#41, sales#147, returns#148, profit#149]
-Arguments: hashpartitioning(channel#40, id#41, sales#147, returns#148, profit#149, 5), true, [id=#183]
-
-(85) HashAggregate [codegen id : 45]
-Input [5]: [channel#40, id#41, sales#147, returns#148, profit#149]
-Keys [5]: [channel#40, id#41, sales#147, returns#148, profit#149]
-Functions: []
-Aggregate Attributes: []
-Results [5]: [channel#40, id#41, sales#147, returns#148, profit#149]
-
-(86) ReusedExchange [Reuses operator id: 75]
-Output [8]: [channel#40, id#41, sum#184, isEmpty#185, sum#186, isEmpty#187, sum#188, isEmpty#189]
-
-(87) HashAggregate [codegen id : 66]
-Input [8]: [channel#40, id#41, sum#184, isEmpty#185, sum#186, isEmpty#187, sum#188, isEmpty#189]
+(83) HashAggregate [codegen id : 64]
+Input [8]: [channel#40, id#41, sum#183, isEmpty#184, sum#185, isEmpty#186, sum#187, isEmpty#188]
 Keys [2]: [channel#40, id#41]
-Functions [3]: [sum(sales#42), sum(returns#43), sum(profit#190)]
-Aggregate Attributes [3]: [sum(sales#42)#191, sum(returns#43)#192, sum(profit#190)#193]
-Results [3]: [sum(sales#42)#191 AS sales#160, sum(returns#43)#192 AS returns#161, sum(profit#190)#193 AS profit#162]
+Functions [3]: [sum(sales#42), sum(returns#43), sum(profit#189)]
+Aggregate Attributes [3]: [sum(sales#42)#190, sum(returns#43)#191, sum(profit#189)#192]
+Results [3]: [sum(sales#42)#190 AS sales#160, sum(returns#43)#191 AS returns#161, sum(profit#189)#192 AS profit#162]
 
-(88) HashAggregate [codegen id : 66]
+(84) HashAggregate [codegen id : 64]
 Input [3]: [sales#160, returns#161, profit#162]
 Keys: []
 Functions [3]: [partial_sum(sales#160), partial_sum(returns#161), partial_sum(profit#162)]
-Aggregate Attributes [6]: [sum#194, isEmpty#195, sum#196, isEmpty#197, sum#198, isEmpty#199]
-Results [6]: [sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205]
+Aggregate Attributes [6]: [sum#193, isEmpty#194, sum#195, isEmpty#196, sum#197, isEmpty#198]
+Results [6]: [sum#199, isEmpty#200, sum#201, isEmpty#202, sum#203, isEmpty#204]
 
-(89) Exchange
-Input [6]: [sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205]
-Arguments: SinglePartition, true, [id=#206]
+(85) Exchange
+Input [6]: [sum#199, isEmpty#200, sum#201, isEmpty#202, sum#203, isEmpty#204]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#205]
 
-(90) HashAggregate [codegen id : 67]
-Input [6]: [sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205]
+(86) HashAggregate [codegen id : 65]
+Input [6]: [sum#199, isEmpty#200, sum#201, isEmpty#202, sum#203, isEmpty#204]
 Keys: []
 Functions [3]: [sum(sales#160), sum(returns#161), sum(profit#162)]
-Aggregate Attributes [3]: [sum(sales#160)#207, sum(returns#161)#208, sum(profit#162)#209]
-Results [5]: [null AS channel#210, null AS id#211, sum(sales#160)#207 AS sum(sales)#212, sum(returns#161)#208 AS sum(returns)#213, sum(profit#162)#209 AS sum(profit)#214]
+Aggregate Attributes [3]: [sum(sales#160)#206, sum(returns#161)#207, sum(profit#162)#208]
+Results [5]: [null AS channel#209, null AS id#210, sum(sales#160)#206 AS sum(sales)#211, sum(returns#161)#207 AS sum(returns)#212, sum(profit#162)#208 AS sum(profit)#213]
 
-(91) Union
+(87) Union
 
-(92) HashAggregate [codegen id : 68]
+(88) HashAggregate [codegen id : 66]
 Input [5]: [channel#40, id#41, sales#147, returns#148, profit#149]
 Keys [5]: [channel#40, id#41, sales#147, returns#148, profit#149]
 Functions: []
 Aggregate Attributes: []
 Results [5]: [channel#40, id#41, sales#147, returns#148, profit#149]
 
-(93) Exchange
+(89) Exchange
 Input [5]: [channel#40, id#41, sales#147, returns#148, profit#149]
-Arguments: hashpartitioning(channel#40, id#41, sales#147, returns#148, profit#149, 5), true, [id=#215]
+Arguments: hashpartitioning(channel#40, id#41, sales#147, returns#148, profit#149, 5), ENSURE_REQUIREMENTS, [id=#214]
 
-(94) HashAggregate [codegen id : 69]
+(90) HashAggregate [codegen id : 67]
 Input [5]: [channel#40, id#41, sales#147, returns#148, profit#149]
 Keys [5]: [channel#40, id#41, sales#147, returns#148, profit#149]
 Functions: []
 Aggregate Attributes: []
 Results [5]: [channel#40, id#41, sales#147, returns#148, profit#149]
 
-(95) TakeOrderedAndProject
+(91) TakeOrderedAndProject
 Input [5]: [channel#40, id#41, sales#147, returns#148, profit#149]
 Arguments: 100, [channel#40 ASC NULLS FIRST, id#41 ASC NULLS FIRST], [channel#40, id#41, sales#147, returns#148, profit#149]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/simplified.txt
index 6bb223e2f4488..f5a22c77a8e30 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/simplified.txt
@@ -1,156 +1,148 @@
 TakeOrderedAndProject [channel,id,sales,returns,profit]
-  WholeStageCodegen (69)
+  WholeStageCodegen (67)
     HashAggregate [channel,id,sales,returns,profit]
       InputAdapter
         Exchange [channel,id,sales,returns,profit] #1
-          WholeStageCodegen (68)
+          WholeStageCodegen (66)
             HashAggregate [channel,id,sales,returns,profit]
               InputAdapter
                 Union
-                  WholeStageCodegen (45)
-                    HashAggregate [channel,id,sales,returns,profit]
+                  WholeStageCodegen (21)
+                    HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
-                        Exchange [channel,id,sales,returns,profit] #2
-                          WholeStageCodegen (44)
-                            HashAggregate [channel,id,sales,returns,profit]
+                        Exchange [channel,id] #2
+                          WholeStageCodegen (20)
+                            HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                               InputAdapter
                                 Union
-                                  WholeStageCodegen (21)
-                                    HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                                  WholeStageCodegen (6)
+                                    HashAggregate [s_store_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum]
                                       InputAdapter
-                                        Exchange [channel,id] #3
-                                          WholeStageCodegen (20)
-                                            HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
-                                              InputAdapter
-                                                Union
-                                                  WholeStageCodegen (6)
-                                                    HashAggregate [s_store_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum]
+                                        Exchange [s_store_id] #3
+                                          WholeStageCodegen (5)
+                                            HashAggregate [s_store_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum]
+                                              Project [sales_price,profit,return_amt,net_loss,s_store_id]
+                                                BroadcastHashJoin [store_sk,s_store_sk]
+                                                  Project [store_sk,sales_price,profit,return_amt,net_loss]
+                                                    BroadcastHashJoin [date_sk,d_date_sk]
                                                       InputAdapter
-                                                        Exchange [s_store_id] #4
-                                                          WholeStageCodegen (5)
-                                                            HashAggregate [s_store_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum]
-                                                              Project [sales_price,profit,return_amt,net_loss,s_store_id]
-                                                                BroadcastHashJoin [store_sk,s_store_sk]
-                                                                  Project [store_sk,sales_price,profit,return_amt,net_loss]
-                                                                    BroadcastHashJoin [date_sk,d_date_sk]
-                                                                      InputAdapter
-                                                                        Union
-                                                                          WholeStageCodegen (1)
-                                                                            Project [ss_store_sk,ss_sold_date_sk,ss_ext_sales_price,ss_net_profit]
-                                                                              Filter [ss_sold_date_sk,ss_store_sk]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit]
-                                                                          WholeStageCodegen (2)
-                                                                            Project [sr_store_sk,sr_returned_date_sk,sr_return_amt,sr_net_loss]
-                                                                              Filter [sr_returned_date_sk,sr_store_sk]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.store_returns [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss]
-                                                                      InputAdapter
-                                                                        BroadcastExchange #5
-                                                                          WholeStageCodegen (3)
-                                                                            Project [d_date_sk]
-                                                                              Filter [d_date,d_date_sk]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.date_dim [d_date_sk,d_date]
+                                                        Union
+                                                          WholeStageCodegen (1)
+                                                            Project [ss_store_sk,ss_sold_date_sk,ss_ext_sales_price,ss_net_profit]
+                                                              Filter [ss_sold_date_sk,ss_store_sk]
+                                                                ColumnarToRow
                                                                   InputAdapter
-                                                                    BroadcastExchange #6
-                                                                      WholeStageCodegen (4)
-                                                                        Filter [s_store_sk]
-                                                                          ColumnarToRow
-                                                                            InputAdapter
-                                                                              Scan parquet default.store [s_store_sk,s_store_id]
-                                                  WholeStageCodegen (12)
-                                                    HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum]
+                                                                    Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit]
+                                                          WholeStageCodegen (2)
+                                                            Project [sr_store_sk,sr_returned_date_sk,sr_return_amt,sr_net_loss]
+                                                              Filter [sr_returned_date_sk,sr_store_sk]
+                                                                ColumnarToRow
+                                                                  InputAdapter
+                                                                    Scan parquet default.store_returns [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss]
                                                       InputAdapter
-                                                        Exchange [cp_catalog_page_id] #7
-                                                          WholeStageCodegen (11)
-                                                            HashAggregate [cp_catalog_page_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum]
-                                                              Project [sales_price,profit,return_amt,net_loss,cp_catalog_page_id]
-                                                                BroadcastHashJoin [page_sk,cp_catalog_page_sk]
-                                                                  Project [page_sk,sales_price,profit,return_amt,net_loss]
-                                                                    BroadcastHashJoin [date_sk,d_date_sk]
-                                                                      InputAdapter
-                                                                        Union
-                                                                          WholeStageCodegen (7)
-                                                                            Project [cs_catalog_page_sk,cs_sold_date_sk,cs_ext_sales_price,cs_net_profit]
-                                                                              Filter [cs_sold_date_sk,cs_catalog_page_sk]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.catalog_sales [cs_sold_date_sk,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit]
-                                                                          WholeStageCodegen (8)
-                                                                            Project [cr_catalog_page_sk,cr_returned_date_sk,cr_return_amount,cr_net_loss]
-                                                                              Filter [cr_returned_date_sk,cr_catalog_page_sk]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.catalog_returns [cr_returned_date_sk,cr_catalog_page_sk,cr_return_amount,cr_net_loss]
-                                                                      InputAdapter
-                                                                        ReusedExchange [d_date_sk] #5
+                                                        BroadcastExchange #4
+                                                          WholeStageCodegen (3)
+                                                            Project [d_date_sk]
+                                                              Filter [d_date,d_date_sk]
+                                                                ColumnarToRow
                                                                   InputAdapter
-                                                                    BroadcastExchange #8
-                                                                      WholeStageCodegen (10)
-                                                                        Filter [cp_catalog_page_sk]
-                                                                          ColumnarToRow
-                                                                            InputAdapter
-                                                                              Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
-                                                  WholeStageCodegen (19)
-                                                    HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum]
+                                                                    Scan parquet default.date_dim [d_date_sk,d_date]
+                                                  InputAdapter
+                                                    BroadcastExchange #5
+                                                      WholeStageCodegen (4)
+                                                        Filter [s_store_sk]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet default.store [s_store_sk,s_store_id]
+                                  WholeStageCodegen (12)
+                                    HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum]
+                                      InputAdapter
+                                        Exchange [cp_catalog_page_id] #6
+                                          WholeStageCodegen (11)
+                                            HashAggregate [cp_catalog_page_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum]
+                                              Project [sales_price,profit,return_amt,net_loss,cp_catalog_page_id]
+                                                BroadcastHashJoin [page_sk,cp_catalog_page_sk]
+                                                  Project [page_sk,sales_price,profit,return_amt,net_loss]
+                                                    BroadcastHashJoin [date_sk,d_date_sk]
                                                       InputAdapter
-                                                        Exchange [web_site_id] #9
-                                                          WholeStageCodegen (18)
-                                                            HashAggregate [web_site_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum]
-                                                              Project [sales_price,profit,return_amt,net_loss,web_site_id]
-                                                                BroadcastHashJoin [wsr_web_site_sk,web_site_sk]
-                                                                  Project [wsr_web_site_sk,sales_price,profit,return_amt,net_loss]
-                                                                    BroadcastHashJoin [date_sk,d_date_sk]
-                                                                      InputAdapter
-                                                                        Union
-                                                                          WholeStageCodegen (13)
-                                                                            Project [ws_web_site_sk,ws_sold_date_sk,ws_ext_sales_price,ws_net_profit]
-                                                                              Filter [ws_sold_date_sk,ws_web_site_sk]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.web_sales [ws_sold_date_sk,ws_web_site_sk,ws_ext_sales_price,ws_net_profit]
-                                                                          WholeStageCodegen (15)
-                                                                            Project [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss]
-                                                                              BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number]
-                                                                                Filter [wr_returned_date_sk]
-                                                                                  ColumnarToRow
-                                                                                    InputAdapter
-                                                                                      Scan parquet default.web_returns [wr_returned_date_sk,wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss]
-                                                                                InputAdapter
-                                                                                  BroadcastExchange #10
-                                                                                    WholeStageCodegen (14)
-                                                                                      Filter [ws_item_sk,ws_order_number,ws_web_site_sk]
-                                                                                        ColumnarToRow
-                                                                                          InputAdapter
-                                                                                            Scan parquet default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number]
-                                                                      InputAdapter
-                                                                        ReusedExchange [d_date_sk] #5
+                                                        Union
+                                                          WholeStageCodegen (7)
+                                                            Project [cs_catalog_page_sk,cs_sold_date_sk,cs_ext_sales_price,cs_net_profit]
+                                                              Filter [cs_sold_date_sk,cs_catalog_page_sk]
+                                                                ColumnarToRow
+                                                                  InputAdapter
+                                                                    Scan parquet default.catalog_sales [cs_sold_date_sk,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit]
+                                                          WholeStageCodegen (8)
+                                                            Project [cr_catalog_page_sk,cr_returned_date_sk,cr_return_amount,cr_net_loss]
+                                                              Filter [cr_returned_date_sk,cr_catalog_page_sk]
+                                                                ColumnarToRow
                                                                   InputAdapter
-                                                                    BroadcastExchange #11
-                                                                      WholeStageCodegen (17)
-                                                                        Filter [web_site_sk]
-                                                                          ColumnarToRow
-                                                                            InputAdapter
-                                                                              Scan parquet default.web_site [web_site_sk,web_site_id]
-                                  WholeStageCodegen (43)
-                                    HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sum(sales),sum(returns),sum(profit),sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                                                                    Scan parquet default.catalog_returns [cr_returned_date_sk,cr_catalog_page_sk,cr_return_amount,cr_net_loss]
+                                                      InputAdapter
+                                                        ReusedExchange [d_date_sk] #4
+                                                  InputAdapter
+                                                    BroadcastExchange #7
+                                                      WholeStageCodegen (10)
+                                                        Filter [cp_catalog_page_sk]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
+                                  WholeStageCodegen (19)
+                                    HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum]
                                       InputAdapter
-                                        Exchange [channel] #12
-                                          WholeStageCodegen (42)
-                                            HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
-                                              HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
-                                                InputAdapter
-                                                  ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #3
-                  WholeStageCodegen (67)
+                                        Exchange [web_site_id] #8
+                                          WholeStageCodegen (18)
+                                            HashAggregate [web_site_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum]
+                                              Project [sales_price,profit,return_amt,net_loss,web_site_id]
+                                                BroadcastHashJoin [wsr_web_site_sk,web_site_sk]
+                                                  Project [wsr_web_site_sk,sales_price,profit,return_amt,net_loss]
+                                                    BroadcastHashJoin [date_sk,d_date_sk]
+                                                      InputAdapter
+                                                        Union
+                                                          WholeStageCodegen (13)
+                                                            Project [ws_web_site_sk,ws_sold_date_sk,ws_ext_sales_price,ws_net_profit]
+                                                              Filter [ws_sold_date_sk,ws_web_site_sk]
+                                                                ColumnarToRow
+                                                                  InputAdapter
+                                                                    Scan parquet default.web_sales [ws_sold_date_sk,ws_web_site_sk,ws_ext_sales_price,ws_net_profit]
+                                                          WholeStageCodegen (15)
+                                                            Project [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss]
+                                                              BroadcastHashJoin [wr_item_sk,wr_order_number,ws_item_sk,ws_order_number]
+                                                                Filter [wr_returned_date_sk]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet default.web_returns [wr_returned_date_sk,wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss]
+                                                                InputAdapter
+                                                                  BroadcastExchange #9
+                                                                    WholeStageCodegen (14)
+                                                                      Filter [ws_item_sk,ws_order_number,ws_web_site_sk]
+                                                                        ColumnarToRow
+                                                                          InputAdapter
+                                                                            Scan parquet default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number]
+                                                      InputAdapter
+                                                        ReusedExchange [d_date_sk] #4
+                                                  InputAdapter
+                                                    BroadcastExchange #10
+                                                      WholeStageCodegen (17)
+                                                        Filter [web_site_sk]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet default.web_site [web_site_sk,web_site_id]
+                  WholeStageCodegen (43)
+                    HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sum(sales),sum(returns),sum(profit),sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                      InputAdapter
+                        Exchange [channel] #11
+                          WholeStageCodegen (42)
+                            HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                              HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                                InputAdapter
+                                  ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2
+                  WholeStageCodegen (65)
                     HashAggregate [sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),channel,id,sum(sales),sum(returns),sum(profit),sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
-                        Exchange #13
-                          WholeStageCodegen (66)
+                        Exchange #12
+                          WholeStageCodegen (64)
                             HashAggregate [sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                               HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                                 InputAdapter
-                                  ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #3
+                                  ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/explain.txt
index 628ca0ad4711c..214e5eadd0eac 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/explain.txt
@@ -1,68 +1,64 @@
 == Physical Plan ==
-TakeOrderedAndProject (64)
-+- * Project (63)
-   +- Window (62)
-      +- * Sort (61)
-         +- Exchange (60)
-            +- * HashAggregate (59)
-               +- Exchange (58)
-                  +- * HashAggregate (57)
-                     +- Union (56)
-                        :- * HashAggregate (50)
-                        :  +- Exchange (49)
-                        :     +- * HashAggregate (48)
-                        :        +- Union (47)
-                        :           :- * HashAggregate (41)
-                        :           :  +- Exchange (40)
-                        :           :     +- * HashAggregate (39)
-                        :           :        +- * Project (38)
-                        :           :           +- * BroadcastHashJoin Inner BuildRight (37)
-                        :           :              :- * Project (10)
-                        :           :              :  +- * BroadcastHashJoin Inner BuildRight (9)
-                        :           :              :     :- * Filter (3)
-                        :           :              :     :  +- * ColumnarToRow (2)
-                        :           :              :     :     +- Scan parquet default.store_sales (1)
-                        :           :              :     +- BroadcastExchange (8)
-                        :           :              :        +- * Project (7)
-                        :           :              :           +- * Filter (6)
-                        :           :              :              +- * ColumnarToRow (5)
-                        :           :              :                 +- Scan parquet default.date_dim (4)
-                        :           :              +- BroadcastExchange (36)
-                        :           :                 +- * BroadcastHashJoin LeftSemi BuildRight (35)
-                        :           :                    :- * Filter (13)
-                        :           :                    :  +- * ColumnarToRow (12)
-                        :           :                    :     +- Scan parquet default.store (11)
-                        :           :                    +- BroadcastExchange (34)
-                        :           :                       +- * Project (33)
-                        :           :                          +- * Filter (32)
-                        :           :                             +- Window (31)
-                        :           :                                +- * Sort (30)
-                        :           :                                   +- Exchange (29)
-                        :           :                                      +- * HashAggregate (28)
-                        :           :                                         +- Exchange (27)
-                        :           :                                            +- * HashAggregate (26)
-                        :           :                                               +- * Project (25)
-                        :           :                                                  +- * BroadcastHashJoin Inner BuildRight (24)
-                        :           :                                                     :- * Project (19)
-                        :           :                                                     :  +- * BroadcastHashJoin Inner BuildRight (18)
-                        :           :                                                     :     :- * Filter (16)
-                        :           :                                                     :     :  +- * ColumnarToRow (15)
-                        :           :                                                     :     :     +- Scan parquet default.store_sales (14)
-                        :           :                                                     :     +- ReusedExchange (17)
-                        :           :                                                     +- BroadcastExchange (23)
-                        :           :                                                        +- * Filter (22)
-                        :           :                                                           +- * ColumnarToRow (21)
-                        :           :                                                              +- Scan parquet default.store (20)
-                        :           +- * HashAggregate (46)
-                        :              +- Exchange (45)
-                        :                 +- * HashAggregate (44)
-                        :                    +- * HashAggregate (43)
-                        :                       +- ReusedExchange (42)
-                        +- * HashAggregate (55)
-                           +- Exchange (54)
-                              +- * HashAggregate (53)
-                                 +- * HashAggregate (52)
-                                    +- ReusedExchange (51)
+TakeOrderedAndProject (60)
++- * Project (59)
+   +- Window (58)
+      +- * Sort (57)
+         +- Exchange (56)
+            +- * HashAggregate (55)
+               +- Exchange (54)
+                  +- * HashAggregate (53)
+                     +- Union (52)
+                        :- * HashAggregate (41)
+                        :  +- Exchange (40)
+                        :     +- * HashAggregate (39)
+                        :        +- * Project (38)
+                        :           +- * BroadcastHashJoin Inner BuildRight (37)
+                        :              :- * Project (10)
+                        :              :  +- * BroadcastHashJoin Inner BuildRight (9)
+                        :              :     :- * Filter (3)
+                        :              :     :  +- * ColumnarToRow (2)
+                        :              :     :     +- Scan parquet default.store_sales (1)
+                        :              :     +- BroadcastExchange (8)
+                        :              :        +- * Project (7)
+                        :              :           +- * Filter (6)
+                        :              :              +- * ColumnarToRow (5)
+                        :              :                 +- Scan parquet default.date_dim (4)
+                        :              +- BroadcastExchange (36)
+                        :                 +- * BroadcastHashJoin LeftSemi BuildRight (35)
+                        :                    :- * Filter (13)
+                        :                    :  +- * ColumnarToRow (12)
+                        :                    :     +- Scan parquet default.store (11)
+                        :                    +- BroadcastExchange (34)
+                        :                       +- * Project (33)
+                        :                          +- * Filter (32)
+                        :                             +- Window (31)
+                        :                                +- * Sort (30)
+                        :                                   +- Exchange (29)
+                        :                                      +- * HashAggregate (28)
+                        :                                         +- Exchange (27)
+                        :                                            +- * HashAggregate (26)
+                        :                                               +- * Project (25)
+                        :                                                  +- * BroadcastHashJoin Inner BuildRight (24)
+                        :                                                     :- * Project (19)
+                        :                                                     :  +- * BroadcastHashJoin Inner BuildRight (18)
+                        :                                                     :     :- * Filter (16)
+                        :                                                     :     :  +- * ColumnarToRow (15)
+                        :                                                     :     :     +- Scan parquet default.store_sales (14)
+                        :                                                     :     +- ReusedExchange (17)
+                        :                                                     +- BroadcastExchange (23)
+                        :                                                        +- * Filter (22)
+                        :                                                           +- * ColumnarToRow (21)
+                        :                                                              +- Scan parquet default.store (20)
+                        :- * HashAggregate (46)
+                        :  +- Exchange (45)
+                        :     +- * HashAggregate (44)
+                        :        +- * HashAggregate (43)
+                        :           +- ReusedExchange (42)
+                        +- * HashAggregate (51)
+                           +- Exchange (50)
+                              +- * HashAggregate (49)
+                                 +- * HashAggregate (48)
+                                    +- ReusedExchange (47)
 
 
 (1) Scan parquet default.store_sales
@@ -186,7 +182,7 @@ Results [2]: [s_state#9, sum#12]
 
 (27) Exchange
 Input [2]: [s_state#9, sum#12]
-Arguments: hashpartitioning(s_state#9, 5), true, [id=#13]
+Arguments: hashpartitioning(s_state#9, 5), ENSURE_REQUIREMENTS, [id=#13]
 
 (28) HashAggregate [codegen id : 5]
 Input [2]: [s_state#9, sum#12]
@@ -197,7 +193,7 @@ Results [3]: [s_state#9 AS s_state#15, s_state#9, MakeDecimal(sum(UnscaledValue(
 
 (29) Exchange
 Input [3]: [s_state#15, s_state#9, _w2#16]
-Arguments: hashpartitioning(s_state#9, 5), true, [id=#17]
+Arguments: hashpartitioning(s_state#9, 5), ENSURE_REQUIREMENTS, [id=#17]
 
 (30) Sort [codegen id : 6]
 Input [3]: [s_state#15, s_state#9, _w2#16]
@@ -246,7 +242,7 @@ Results [3]: [s_state#9, s_county#8, sum#22]
 
 (40) Exchange
 Input [3]: [s_state#9, s_county#8, sum#22]
-Arguments: hashpartitioning(s_state#9, s_county#8, 5), true, [id=#23]
+Arguments: hashpartitioning(s_state#9, s_county#8, 5), ENSURE_REQUIREMENTS, [id=#23]
 
 (41) HashAggregate [codegen id : 10]
 Input [3]: [s_state#9, s_county#8, sum#22]
@@ -274,7 +270,7 @@ Results [3]: [s_state#9, sum#34, isEmpty#35]
 
 (45) Exchange
 Input [3]: [s_state#9, sum#34, isEmpty#35]
-Arguments: hashpartitioning(s_state#9, 5), true, [id=#36]
+Arguments: hashpartitioning(s_state#9, 5), ENSURE_REQUIREMENTS, [id=#36]
 
 (46) HashAggregate [codegen id : 21]
 Input [3]: [s_state#9, sum#34, isEmpty#35]
@@ -283,91 +279,71 @@ Functions [1]: [sum(total_sum#31)]
 Aggregate Attributes [1]: [sum(total_sum#31)#37]
 Results [6]: [sum(total_sum#31)#37 AS total_sum#38, s_state#9, null AS s_county#39, 0 AS g_state#40, 1 AS g_county#41, 1 AS lochierarchy#42]
 
-(47) Union
+(47) ReusedExchange [Reuses operator id: 40]
+Output [3]: [s_state#9, s_county#8, sum#43]
 
-(48) HashAggregate [codegen id : 22]
-Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28]
-Keys [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28]
-Functions: []
-Aggregate Attributes: []
-Results [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28]
-
-(49) Exchange
-Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28]
-Arguments: hashpartitioning(total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28, 5), true, [id=#43]
-
-(50) HashAggregate [codegen id : 23]
-Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28]
-Keys [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28]
-Functions: []
-Aggregate Attributes: []
-Results [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28]
-
-(51) ReusedExchange [Reuses operator id: 40]
-Output [3]: [s_state#9, s_county#8, sum#44]
-
-(52) HashAggregate [codegen id : 33]
-Input [3]: [s_state#9, s_county#8, sum#44]
+(48) HashAggregate [codegen id : 31]
+Input [3]: [s_state#9, s_county#8, sum#43]
 Keys [2]: [s_state#9, s_county#8]
 Functions [1]: [sum(UnscaledValue(ss_net_profit#3))]
-Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#45]
-Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#45,17,2) AS total_sum#31]
+Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#44]
+Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#44,17,2) AS total_sum#31]
 
-(53) HashAggregate [codegen id : 33]
+(49) HashAggregate [codegen id : 31]
 Input [1]: [total_sum#31]
 Keys: []
 Functions [1]: [partial_sum(total_sum#31)]
-Aggregate Attributes [2]: [sum#46, isEmpty#47]
-Results [2]: [sum#48, isEmpty#49]
+Aggregate Attributes [2]: [sum#45, isEmpty#46]
+Results [2]: [sum#47, isEmpty#48]
 
-(54) Exchange
-Input [2]: [sum#48, isEmpty#49]
-Arguments: SinglePartition, true, [id=#50]
+(50) Exchange
+Input [2]: [sum#47, isEmpty#48]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#49]
 
-(55) HashAggregate [codegen id : 34]
-Input [2]: [sum#48, isEmpty#49]
+(51) HashAggregate [codegen id : 32]
+Input [2]: [sum#47, isEmpty#48]
 Keys: []
 Functions [1]: [sum(total_sum#31)]
-Aggregate Attributes [1]: [sum(total_sum#31)#51]
-Results [6]: [sum(total_sum#31)#51 AS total_sum#52, null AS s_state#53, null AS s_county#54, 1 AS g_state#55, 1 AS g_county#56, 2 AS lochierarchy#57]
+Aggregate Attributes [1]: [sum(total_sum#31)#50]
+Results [6]: [sum(total_sum#31)#50 AS total_sum#51, null AS s_state#52, null AS s_county#53, 1 AS g_state#54, 1 AS g_county#55, 2 AS lochierarchy#56]
 
-(56) Union
+(52) Union
 
-(57) HashAggregate [codegen id : 35]
+(53) HashAggregate [codegen id : 33]
 Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28]
 Keys [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28]
 Functions: []
 Aggregate Attributes: []
 Results [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28]
 
-(58) Exchange
+(54) Exchange
 Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28]
-Arguments: hashpartitioning(total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28, 5), true, [id=#58]
+Arguments: hashpartitioning(total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28, 5), ENSURE_REQUIREMENTS, [id=#57]
 
-(59) HashAggregate [codegen id : 36]
+(55) HashAggregate [codegen id : 34]
 Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28]
 Keys [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28]
 Functions: []
 Aggregate Attributes: []
-Results [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, CASE WHEN (g_county#27 = 0) THEN s_state#9 END AS _w0#59]
+Results [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, CASE WHEN (g_county#27 = 0) THEN s_state#9 END AS _w0#58]
 
-(60) Exchange
-Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#59]
-Arguments: hashpartitioning(lochierarchy#28, _w0#59, 5), true, [id=#60]
+(56) Exchange
+Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#58]
+Arguments: hashpartitioning(lochierarchy#28, _w0#58, 5), ENSURE_REQUIREMENTS, [id=#59]
 
-(61) Sort [codegen id : 37]
-Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#59]
-Arguments: [lochierarchy#28 ASC NULLS FIRST, _w0#59 ASC NULLS FIRST, total_sum#25 DESC NULLS LAST], false, 0
+(57) Sort [codegen id : 35]
+Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#58]
+Arguments: [lochierarchy#28 ASC NULLS FIRST, _w0#58 ASC NULLS FIRST, total_sum#25 DESC NULLS LAST], false, 0
 
-(62) Window
-Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#59]
-Arguments: [rank(total_sum#25) windowspecdefinition(lochierarchy#28, _w0#59, total_sum#25 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#61], [lochierarchy#28, _w0#59], [total_sum#25 DESC NULLS LAST]
+(58) Window
+Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#58]
+Arguments: [rank(total_sum#25) windowspecdefinition(lochierarchy#28, _w0#58, total_sum#25 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#60], [lochierarchy#28, _w0#58], [total_sum#25 DESC NULLS LAST]
 
-(63) Project [codegen id : 38]
-Output [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#61]
-Input [6]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#59, rank_within_parent#61]
+(59) Project [codegen id : 36]
+Output [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#60]
+Input [6]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#58, rank_within_parent#60]
 
-(64) TakeOrderedAndProject
-Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#61]
-Arguments: 100, [lochierarchy#28 DESC NULLS LAST, CASE WHEN (lochierarchy#28 = 0) THEN s_state#9 END ASC NULLS FIRST, rank_within_parent#61 ASC NULLS FIRST], [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#61]
+(60) TakeOrderedAndProject
+Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#60]
+Arguments: 100, [lochierarchy#28 DESC NULLS LAST, CASE WHEN (lochierarchy#28 = 0) THEN s_state#9 END ASC NULLS FIRST, rank_within_parent#60 ASC NULLS FIRST], [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#60]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/simplified.txt
index b3dbc1612539a..6b02f5692b0eb 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/simplified.txt
@@ -1,107 +1,99 @@
 TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_county]
-  WholeStageCodegen (38)
+  WholeStageCodegen (36)
     Project [total_sum,s_state,s_county,lochierarchy,rank_within_parent]
       InputAdapter
         Window [total_sum,lochierarchy,_w0]
-          WholeStageCodegen (37)
+          WholeStageCodegen (35)
             Sort [lochierarchy,_w0,total_sum]
               InputAdapter
                 Exchange [lochierarchy,_w0] #1
-                  WholeStageCodegen (36)
+                  WholeStageCodegen (34)
                     HashAggregate [total_sum,s_state,s_county,g_state,g_county,lochierarchy] [_w0]
                       InputAdapter
                         Exchange [total_sum,s_state,s_county,g_state,g_county,lochierarchy] #2
-                          WholeStageCodegen (35)
+                          WholeStageCodegen (33)
                             HashAggregate [total_sum,s_state,s_county,g_state,g_county,lochierarchy]
                               InputAdapter
                                 Union
-                                  WholeStageCodegen (23)
-                                    HashAggregate [total_sum,s_state,s_county,g_state,g_county,lochierarchy]
+                                  WholeStageCodegen (10)
+                                    HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,g_state,g_county,lochierarchy,sum]
                                       InputAdapter
-                                        Exchange [total_sum,s_state,s_county,g_state,g_county,lochierarchy] #3
-                                          WholeStageCodegen (22)
-                                            HashAggregate [total_sum,s_state,s_county,g_state,g_county,lochierarchy]
-                                              InputAdapter
-                                                Union
-                                                  WholeStageCodegen (10)
-                                                    HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,g_state,g_county,lochierarchy,sum]
+                                        Exchange [s_state,s_county] #3
+                                          WholeStageCodegen (9)
+                                            HashAggregate [s_state,s_county,ss_net_profit] [sum,sum]
+                                              Project [ss_net_profit,s_county,s_state]
+                                                BroadcastHashJoin [ss_store_sk,s_store_sk]
+                                                  Project [ss_store_sk,ss_net_profit]
+                                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                      Filter [ss_sold_date_sk,ss_store_sk]
+                                                        ColumnarToRow
+                                                          InputAdapter
+                                                            Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit]
                                                       InputAdapter
-                                                        Exchange [s_state,s_county] #4
-                                                          WholeStageCodegen (9)
-                                                            HashAggregate [s_state,s_county,ss_net_profit] [sum,sum]
-                                                              Project [ss_net_profit,s_county,s_state]
-                                                                BroadcastHashJoin [ss_store_sk,s_store_sk]
-                                                                  Project [ss_store_sk,ss_net_profit]
-                                                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                      Filter [ss_sold_date_sk,ss_store_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit]
-                                                                      InputAdapter
-                                                                        BroadcastExchange #5
-                                                                          WholeStageCodegen (1)
-                                                                            Project [d_date_sk]
-                                                                              Filter [d_month_seq,d_date_sk]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                        BroadcastExchange #4
+                                                          WholeStageCodegen (1)
+                                                            Project [d_date_sk]
+                                                              Filter [d_month_seq,d_date_sk]
+                                                                ColumnarToRow
                                                                   InputAdapter
-                                                                    BroadcastExchange #6
-                                                                      WholeStageCodegen (8)
-                                                                        BroadcastHashJoin [s_state,s_state]
-                                                                          Filter [s_store_sk]
-                                                                            ColumnarToRow
-                                                                              InputAdapter
-                                                                                Scan parquet default.store [s_store_sk,s_county,s_state]
-                                                                          InputAdapter
-                                                                            BroadcastExchange #7
-                                                                              WholeStageCodegen (7)
-                                                                                Project [s_state]
-                                                                                  Filter [ranking]
+                                                                    Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                  InputAdapter
+                                                    BroadcastExchange #5
+                                                      WholeStageCodegen (8)
+                                                        BroadcastHashJoin [s_state,s_state]
+                                                          Filter [s_store_sk]
+                                                            ColumnarToRow
+                                                              InputAdapter
+                                                                Scan parquet default.store [s_store_sk,s_county,s_state]
+                                                          InputAdapter
+                                                            BroadcastExchange #6
+                                                              WholeStageCodegen (7)
+                                                                Project [s_state]
+                                                                  Filter [ranking]
+                                                                    InputAdapter
+                                                                      Window [_w2,s_state]
+                                                                        WholeStageCodegen (6)
+                                                                          Sort [s_state,_w2]
+                                                                            InputAdapter
+                                                                              Exchange [s_state] #7
+                                                                                WholeStageCodegen (5)
+                                                                                  HashAggregate [s_state,sum] [sum(UnscaledValue(ss_net_profit)),s_state,_w2,sum]
                                                                                     InputAdapter
-                                                                                      Window [_w2,s_state]
-                                                                                        WholeStageCodegen (6)
-                                                                                          Sort [s_state,_w2]
-                                                                                            InputAdapter
-                                                                                              Exchange [s_state] #8
-                                                                                                WholeStageCodegen (5)
-                                                                                                  HashAggregate [s_state,sum] [sum(UnscaledValue(ss_net_profit)),s_state,_w2,sum]
+                                                                                      Exchange [s_state] #8
+                                                                                        WholeStageCodegen (4)
+                                                                                          HashAggregate [s_state,ss_net_profit] [sum,sum]
+                                                                                            Project [ss_net_profit,s_state]
+                                                                                              BroadcastHashJoin [ss_store_sk,s_store_sk]
+                                                                                                Project [ss_store_sk,ss_net_profit]
+                                                                                                  BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                                                    Filter [ss_store_sk,ss_sold_date_sk]
+                                                                                                      ColumnarToRow
+                                                                                                        InputAdapter
+                                                                                                          Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit]
                                                                                                     InputAdapter
-                                                                                                      Exchange [s_state] #9
-                                                                                                        WholeStageCodegen (4)
-                                                                                                          HashAggregate [s_state,ss_net_profit] [sum,sum]
-                                                                                                            Project [ss_net_profit,s_state]
-                                                                                                              BroadcastHashJoin [ss_store_sk,s_store_sk]
-                                                                                                                Project [ss_store_sk,ss_net_profit]
-                                                                                                                  BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                                                    Filter [ss_store_sk,ss_sold_date_sk]
-                                                                                                                      ColumnarToRow
-                                                                                                                        InputAdapter
-                                                                                                                          Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit]
-                                                                                                                    InputAdapter
-                                                                                                                      ReusedExchange [d_date_sk] #5
-                                                                                                                InputAdapter
-                                                                                                                  BroadcastExchange #10
-                                                                                                                    WholeStageCodegen (3)
-                                                                                                                      Filter [s_store_sk]
-                                                                                                                        ColumnarToRow
-                                                                                                                          InputAdapter
-                                                                                                                            Scan parquet default.store [s_store_sk,s_state]
-                                                  WholeStageCodegen (21)
-                                                    HashAggregate [s_state,sum,isEmpty] [sum(total_sum),total_sum,s_county,g_state,g_county,lochierarchy,sum,isEmpty]
-                                                      InputAdapter
-                                                        Exchange [s_state] #11
-                                                          WholeStageCodegen (20)
-                                                            HashAggregate [s_state,total_sum] [sum,isEmpty,sum,isEmpty]
-                                                              HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,sum]
-                                                                InputAdapter
-                                                                  ReusedExchange [s_state,s_county,sum] #4
-                                  WholeStageCodegen (34)
+                                                                                                      ReusedExchange [d_date_sk] #4
+                                                                                                InputAdapter
+                                                                                                  BroadcastExchange #9
+                                                                                                    WholeStageCodegen (3)
+                                                                                                      Filter [s_store_sk]
+                                                                                                        ColumnarToRow
+                                                                                                          InputAdapter
+                                                                                                            Scan parquet default.store [s_store_sk,s_state]
+                                  WholeStageCodegen (21)
+                                    HashAggregate [s_state,sum,isEmpty] [sum(total_sum),total_sum,s_county,g_state,g_county,lochierarchy,sum,isEmpty]
+                                      InputAdapter
+                                        Exchange [s_state] #10
+                                          WholeStageCodegen (20)
+                                            HashAggregate [s_state,total_sum] [sum,isEmpty,sum,isEmpty]
+                                              HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,sum]
+                                                InputAdapter
+                                                  ReusedExchange [s_state,s_county,sum] #3
+                                  WholeStageCodegen (32)
                                     HashAggregate [sum,isEmpty] [sum(total_sum),total_sum,s_state,s_county,g_state,g_county,lochierarchy,sum,isEmpty]
                                       InputAdapter
-                                        Exchange #12
-                                          WholeStageCodegen (33)
+                                        Exchange #11
+                                          WholeStageCodegen (31)
                                             HashAggregate [total_sum] [sum,isEmpty,sum,isEmpty]
                                               HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,sum]
                                                 InputAdapter
-                                                  ReusedExchange [s_state,s_county,sum] #4
+                                                  ReusedExchange [s_state,s_county,sum] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/explain.txt
index 705d1b3f91342..e41dc814cbd2e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/explain.txt
@@ -1,68 +1,64 @@
 == Physical Plan ==
-TakeOrderedAndProject (64)
-+- * Project (63)
-   +- Window (62)
-      +- * Sort (61)
-         +- Exchange (60)
-            +- * HashAggregate (59)
-               +- Exchange (58)
-                  +- * HashAggregate (57)
-                     +- Union (56)
-                        :- * HashAggregate (50)
-                        :  +- Exchange (49)
-                        :     +- * HashAggregate (48)
-                        :        +- Union (47)
-                        :           :- * HashAggregate (41)
-                        :           :  +- Exchange (40)
-                        :           :     +- * HashAggregate (39)
-                        :           :        +- * Project (38)
-                        :           :           +- * BroadcastHashJoin Inner BuildRight (37)
-                        :           :              :- * Project (10)
-                        :           :              :  +- * BroadcastHashJoin Inner BuildRight (9)
-                        :           :              :     :- * Filter (3)
-                        :           :              :     :  +- * ColumnarToRow (2)
-                        :           :              :     :     +- Scan parquet default.store_sales (1)
-                        :           :              :     +- BroadcastExchange (8)
-                        :           :              :        +- * Project (7)
-                        :           :              :           +- * Filter (6)
-                        :           :              :              +- * ColumnarToRow (5)
-                        :           :              :                 +- Scan parquet default.date_dim (4)
-                        :           :              +- BroadcastExchange (36)
-                        :           :                 +- * BroadcastHashJoin LeftSemi BuildRight (35)
-                        :           :                    :- * Filter (13)
-                        :           :                    :  +- * ColumnarToRow (12)
-                        :           :                    :     +- Scan parquet default.store (11)
-                        :           :                    +- BroadcastExchange (34)
-                        :           :                       +- * Project (33)
-                        :           :                          +- * Filter (32)
-                        :           :                             +- Window (31)
-                        :           :                                +- * Sort (30)
-                        :           :                                   +- Exchange (29)
-                        :           :                                      +- * HashAggregate (28)
-                        :           :                                         +- Exchange (27)
-                        :           :                                            +- * HashAggregate (26)
-                        :           :                                               +- * Project (25)
-                        :           :                                                  +- * BroadcastHashJoin Inner BuildRight (24)
-                        :           :                                                     :- * Project (22)
-                        :           :                                                     :  +- * BroadcastHashJoin Inner BuildRight (21)
-                        :           :                                                     :     :- * Filter (16)
-                        :           :                                                     :     :  +- * ColumnarToRow (15)
-                        :           :                                                     :     :     +- Scan parquet default.store_sales (14)
-                        :           :                                                     :     +- BroadcastExchange (20)
-                        :           :                                                     :        +- * Filter (19)
-                        :           :                                                     :           +- * ColumnarToRow (18)
-                        :           :                                                     :              +- Scan parquet default.store (17)
-                        :           :                                                     +- ReusedExchange (23)
-                        :           +- * HashAggregate (46)
-                        :              +- Exchange (45)
-                        :                 +- * HashAggregate (44)
-                        :                    +- * HashAggregate (43)
-                        :                       +- ReusedExchange (42)
-                        +- * HashAggregate (55)
-                           +- Exchange (54)
-                              +- * HashAggregate (53)
-                                 +- * HashAggregate (52)
-                                    +- ReusedExchange (51)
+TakeOrderedAndProject (60)
++- * Project (59)
+   +- Window (58)
+      +- * Sort (57)
+         +- Exchange (56)
+            +- * HashAggregate (55)
+               +- Exchange (54)
+                  +- * HashAggregate (53)
+                     +- Union (52)
+                        :- * HashAggregate (41)
+                        :  +- Exchange (40)
+                        :     +- * HashAggregate (39)
+                        :        +- * Project (38)
+                        :           +- * BroadcastHashJoin Inner BuildRight (37)
+                        :              :- * Project (10)
+                        :              :  +- * BroadcastHashJoin Inner BuildRight (9)
+                        :              :     :- * Filter (3)
+                        :              :     :  +- * ColumnarToRow (2)
+                        :              :     :     +- Scan parquet default.store_sales (1)
+                        :              :     +- BroadcastExchange (8)
+                        :              :        +- * Project (7)
+                        :              :           +- * Filter (6)
+                        :              :              +- * ColumnarToRow (5)
+                        :              :                 +- Scan parquet default.date_dim (4)
+                        :              +- BroadcastExchange (36)
+                        :                 +- * BroadcastHashJoin LeftSemi BuildRight (35)
+                        :                    :- * Filter (13)
+                        :                    :  +- * ColumnarToRow (12)
+                        :                    :     +- Scan parquet default.store (11)
+                        :                    +- BroadcastExchange (34)
+                        :                       +- * Project (33)
+                        :                          +- * Filter (32)
+                        :                             +- Window (31)
+                        :                                +- * Sort (30)
+                        :                                   +- Exchange (29)
+                        :                                      +- * HashAggregate (28)
+                        :                                         +- Exchange (27)
+                        :                                            +- * HashAggregate (26)
+                        :                                               +- * Project (25)
+                        :                                                  +- * BroadcastHashJoin Inner BuildRight (24)
+                        :                                                     :- * Project (22)
+                        :                                                     :  +- * BroadcastHashJoin Inner BuildRight (21)
+                        :                                                     :     :- * Filter (16)
+                        :                                                     :     :  +- * ColumnarToRow (15)
+                        :                                                     :     :     +- Scan parquet default.store_sales (14)
+                        :                                                     :     +- BroadcastExchange (20)
+                        :                                                     :        +- * Filter (19)
+                        :                                                     :           +- * ColumnarToRow (18)
+                        :                                                     :              +- Scan parquet default.store (17)
+                        :                                                     +- ReusedExchange (23)
+                        :- * HashAggregate (46)
+                        :  +- Exchange (45)
+                        :     +- * HashAggregate (44)
+                        :        +- * HashAggregate (43)
+                        :           +- ReusedExchange (42)
+                        +- * HashAggregate (51)
+                           +- Exchange (50)
+                              +- * HashAggregate (49)
+                                 +- * HashAggregate (48)
+                                    +- ReusedExchange (47)
 
 
 (1) Scan parquet default.store_sales
@@ -186,7 +182,7 @@ Results [2]: [s_state#9, sum#12]
 
 (27) Exchange
 Input [2]: [s_state#9, sum#12]
-Arguments: hashpartitioning(s_state#9, 5), true, [id=#13]
+Arguments: hashpartitioning(s_state#9, 5), ENSURE_REQUIREMENTS, [id=#13]
 
 (28) HashAggregate [codegen id : 5]
 Input [2]: [s_state#9, sum#12]
@@ -197,7 +193,7 @@ Results [3]: [s_state#9 AS s_state#15, s_state#9, MakeDecimal(sum(UnscaledValue(
 
 (29) Exchange
 Input [3]: [s_state#15, s_state#9, _w2#16]
-Arguments: hashpartitioning(s_state#9, 5), true, [id=#17]
+Arguments: hashpartitioning(s_state#9, 5), ENSURE_REQUIREMENTS, [id=#17]
 
 (30) Sort [codegen id : 6]
 Input [3]: [s_state#15, s_state#9, _w2#16]
@@ -246,7 +242,7 @@ Results [3]: [s_state#9, s_county#8, sum#22]
 
 (40) Exchange
 Input [3]: [s_state#9, s_county#8, sum#22]
-Arguments: hashpartitioning(s_state#9, s_county#8, 5), true, [id=#23]
+Arguments: hashpartitioning(s_state#9, s_county#8, 5), ENSURE_REQUIREMENTS, [id=#23]
 
 (41) HashAggregate [codegen id : 10]
 Input [3]: [s_state#9, s_county#8, sum#22]
@@ -274,7 +270,7 @@ Results [3]: [s_state#9, sum#34, isEmpty#35]
 
 (45) Exchange
 Input [3]: [s_state#9, sum#34, isEmpty#35]
-Arguments: hashpartitioning(s_state#9, 5), true, [id=#36]
+Arguments: hashpartitioning(s_state#9, 5), ENSURE_REQUIREMENTS, [id=#36]
 
 (46) HashAggregate [codegen id : 21]
 Input [3]: [s_state#9, sum#34, isEmpty#35]
@@ -283,91 +279,71 @@ Functions [1]: [sum(total_sum#31)]
 Aggregate Attributes [1]: [sum(total_sum#31)#37]
 Results [6]: [sum(total_sum#31)#37 AS total_sum#38, s_state#9, null AS s_county#39, 0 AS g_state#40, 1 AS g_county#41, 1 AS lochierarchy#42]
 
-(47) Union
+(47) ReusedExchange [Reuses operator id: 40]
+Output [3]: [s_state#9, s_county#8, sum#43]
 
-(48) HashAggregate [codegen id : 22]
-Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28]
-Keys [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28]
-Functions: []
-Aggregate Attributes: []
-Results [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28]
-
-(49) Exchange
-Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28]
-Arguments: hashpartitioning(total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28, 5), true, [id=#43]
-
-(50) HashAggregate [codegen id : 23]
-Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28]
-Keys [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28]
-Functions: []
-Aggregate Attributes: []
-Results [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28]
-
-(51) ReusedExchange [Reuses operator id: 40]
-Output [3]: [s_state#9, s_county#8, sum#44]
-
-(52) HashAggregate [codegen id : 33]
-Input [3]: [s_state#9, s_county#8, sum#44]
+(48) HashAggregate [codegen id : 31]
+Input [3]: [s_state#9, s_county#8, sum#43]
 Keys [2]: [s_state#9, s_county#8]
 Functions [1]: [sum(UnscaledValue(ss_net_profit#3))]
-Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#45]
-Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#45,17,2) AS total_sum#31]
+Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#3))#44]
+Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#44,17,2) AS total_sum#31]
 
-(53) HashAggregate [codegen id : 33]
+(49) HashAggregate [codegen id : 31]
 Input [1]: [total_sum#31]
 Keys: []
 Functions [1]: [partial_sum(total_sum#31)]
-Aggregate Attributes [2]: [sum#46, isEmpty#47]
-Results [2]: [sum#48, isEmpty#49]
+Aggregate Attributes [2]: [sum#45, isEmpty#46]
+Results [2]: [sum#47, isEmpty#48]
 
-(54) Exchange
-Input [2]: [sum#48, isEmpty#49]
-Arguments: SinglePartition, true, [id=#50]
+(50) Exchange
+Input [2]: [sum#47, isEmpty#48]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#49]
 
-(55) HashAggregate [codegen id : 34]
-Input [2]: [sum#48, isEmpty#49]
+(51) HashAggregate [codegen id : 32]
+Input [2]: [sum#47, isEmpty#48]
 Keys: []
 Functions [1]: [sum(total_sum#31)]
-Aggregate Attributes [1]: [sum(total_sum#31)#51]
-Results [6]: [sum(total_sum#31)#51 AS total_sum#52, null AS s_state#53, null AS s_county#54, 1 AS g_state#55, 1 AS g_county#56, 2 AS lochierarchy#57]
+Aggregate Attributes [1]: [sum(total_sum#31)#50]
+Results [6]: [sum(total_sum#31)#50 AS total_sum#51, null AS s_state#52, null AS s_county#53, 1 AS g_state#54, 1 AS g_county#55, 2 AS lochierarchy#56]
 
-(56) Union
+(52) Union
 
-(57) HashAggregate [codegen id : 35]
+(53) HashAggregate [codegen id : 33]
 Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28]
 Keys [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28]
 Functions: []
 Aggregate Attributes: []
 Results [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28]
 
-(58) Exchange
+(54) Exchange
 Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28]
-Arguments: hashpartitioning(total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28, 5), true, [id=#58]
+Arguments: hashpartitioning(total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28, 5), ENSURE_REQUIREMENTS, [id=#57]
 
-(59) HashAggregate [codegen id : 36]
+(55) HashAggregate [codegen id : 34]
 Input [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28]
 Keys [6]: [total_sum#25, s_state#9, s_county#8, g_state#26, g_county#27, lochierarchy#28]
 Functions: []
 Aggregate Attributes: []
-Results [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, CASE WHEN (g_county#27 = 0) THEN s_state#9 END AS _w0#59]
+Results [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, CASE WHEN (g_county#27 = 0) THEN s_state#9 END AS _w0#58]
 
-(60) Exchange
-Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#59]
-Arguments: hashpartitioning(lochierarchy#28, _w0#59, 5), true, [id=#60]
+(56) Exchange
+Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#58]
+Arguments: hashpartitioning(lochierarchy#28, _w0#58, 5), ENSURE_REQUIREMENTS, [id=#59]
 
-(61) Sort [codegen id : 37]
-Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#59]
-Arguments: [lochierarchy#28 ASC NULLS FIRST, _w0#59 ASC NULLS FIRST, total_sum#25 DESC NULLS LAST], false, 0
+(57) Sort [codegen id : 35]
+Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#58]
+Arguments: [lochierarchy#28 ASC NULLS FIRST, _w0#58 ASC NULLS FIRST, total_sum#25 DESC NULLS LAST], false, 0
 
-(62) Window
-Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#59]
-Arguments: [rank(total_sum#25) windowspecdefinition(lochierarchy#28, _w0#59, total_sum#25 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#61], [lochierarchy#28, _w0#59], [total_sum#25 DESC NULLS LAST]
+(58) Window
+Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#58]
+Arguments: [rank(total_sum#25) windowspecdefinition(lochierarchy#28, _w0#58, total_sum#25 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#60], [lochierarchy#28, _w0#58], [total_sum#25 DESC NULLS LAST]
 
-(63) Project [codegen id : 38]
-Output [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#61]
-Input [6]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#59, rank_within_parent#61]
+(59) Project [codegen id : 36]
+Output [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#60]
+Input [6]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, _w0#58, rank_within_parent#60]
 
-(64) TakeOrderedAndProject
-Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#61]
-Arguments: 100, [lochierarchy#28 DESC NULLS LAST, CASE WHEN (lochierarchy#28 = 0) THEN s_state#9 END ASC NULLS FIRST, rank_within_parent#61 ASC NULLS FIRST], [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#61]
+(60) TakeOrderedAndProject
+Input [5]: [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#60]
+Arguments: 100, [lochierarchy#28 DESC NULLS LAST, CASE WHEN (lochierarchy#28 = 0) THEN s_state#9 END ASC NULLS FIRST, rank_within_parent#60 ASC NULLS FIRST], [total_sum#25, s_state#9, s_county#8, lochierarchy#28, rank_within_parent#60]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/simplified.txt
index bd0bd7e87251f..b32ed8ecf2857 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/simplified.txt
@@ -1,107 +1,99 @@
 TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_county]
-  WholeStageCodegen (38)
+  WholeStageCodegen (36)
     Project [total_sum,s_state,s_county,lochierarchy,rank_within_parent]
       InputAdapter
         Window [total_sum,lochierarchy,_w0]
-          WholeStageCodegen (37)
+          WholeStageCodegen (35)
             Sort [lochierarchy,_w0,total_sum]
               InputAdapter
                 Exchange [lochierarchy,_w0] #1
-                  WholeStageCodegen (36)
+                  WholeStageCodegen (34)
                     HashAggregate [total_sum,s_state,s_county,g_state,g_county,lochierarchy] [_w0]
                       InputAdapter
                         Exchange [total_sum,s_state,s_county,g_state,g_county,lochierarchy] #2
-                          WholeStageCodegen (35)
+                          WholeStageCodegen (33)
                             HashAggregate [total_sum,s_state,s_county,g_state,g_county,lochierarchy]
                               InputAdapter
                                 Union
-                                  WholeStageCodegen (23)
-                                    HashAggregate [total_sum,s_state,s_county,g_state,g_county,lochierarchy]
+                                  WholeStageCodegen (10)
+                                    HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,g_state,g_county,lochierarchy,sum]
                                       InputAdapter
-                                        Exchange [total_sum,s_state,s_county,g_state,g_county,lochierarchy] #3
-                                          WholeStageCodegen (22)
-                                            HashAggregate [total_sum,s_state,s_county,g_state,g_county,lochierarchy]
-                                              InputAdapter
-                                                Union
-                                                  WholeStageCodegen (10)
-                                                    HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,g_state,g_county,lochierarchy,sum]
+                                        Exchange [s_state,s_county] #3
+                                          WholeStageCodegen (9)
+                                            HashAggregate [s_state,s_county,ss_net_profit] [sum,sum]
+                                              Project [ss_net_profit,s_county,s_state]
+                                                BroadcastHashJoin [ss_store_sk,s_store_sk]
+                                                  Project [ss_store_sk,ss_net_profit]
+                                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                      Filter [ss_sold_date_sk,ss_store_sk]
+                                                        ColumnarToRow
+                                                          InputAdapter
+                                                            Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit]
                                                       InputAdapter
-                                                        Exchange [s_state,s_county] #4
-                                                          WholeStageCodegen (9)
-                                                            HashAggregate [s_state,s_county,ss_net_profit] [sum,sum]
-                                                              Project [ss_net_profit,s_county,s_state]
-                                                                BroadcastHashJoin [ss_store_sk,s_store_sk]
-                                                                  Project [ss_store_sk,ss_net_profit]
-                                                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                      Filter [ss_sold_date_sk,ss_store_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit]
-                                                                      InputAdapter
-                                                                        BroadcastExchange #5
-                                                                          WholeStageCodegen (1)
-                                                                            Project [d_date_sk]
-                                                                              Filter [d_month_seq,d_date_sk]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                        BroadcastExchange #4
+                                                          WholeStageCodegen (1)
+                                                            Project [d_date_sk]
+                                                              Filter [d_month_seq,d_date_sk]
+                                                                ColumnarToRow
                                                                   InputAdapter
-                                                                    BroadcastExchange #6
-                                                                      WholeStageCodegen (8)
-                                                                        BroadcastHashJoin [s_state,s_state]
-                                                                          Filter [s_store_sk]
-                                                                            ColumnarToRow
-                                                                              InputAdapter
-                                                                                Scan parquet default.store [s_store_sk,s_county,s_state]
-                                                                          InputAdapter
-                                                                            BroadcastExchange #7
-                                                                              WholeStageCodegen (7)
-                                                                                Project [s_state]
-                                                                                  Filter [ranking]
+                                                                    Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                  InputAdapter
+                                                    BroadcastExchange #5
+                                                      WholeStageCodegen (8)
+                                                        BroadcastHashJoin [s_state,s_state]
+                                                          Filter [s_store_sk]
+                                                            ColumnarToRow
+                                                              InputAdapter
+                                                                Scan parquet default.store [s_store_sk,s_county,s_state]
+                                                          InputAdapter
+                                                            BroadcastExchange #6
+                                                              WholeStageCodegen (7)
+                                                                Project [s_state]
+                                                                  Filter [ranking]
+                                                                    InputAdapter
+                                                                      Window [_w2,s_state]
+                                                                        WholeStageCodegen (6)
+                                                                          Sort [s_state,_w2]
+                                                                            InputAdapter
+                                                                              Exchange [s_state] #7
+                                                                                WholeStageCodegen (5)
+                                                                                  HashAggregate [s_state,sum] [sum(UnscaledValue(ss_net_profit)),s_state,_w2,sum]
                                                                                     InputAdapter
-                                                                                      Window [_w2,s_state]
-                                                                                        WholeStageCodegen (6)
-                                                                                          Sort [s_state,_w2]
-                                                                                            InputAdapter
-                                                                                              Exchange [s_state] #8
-                                                                                                WholeStageCodegen (5)
-                                                                                                  HashAggregate [s_state,sum] [sum(UnscaledValue(ss_net_profit)),s_state,_w2,sum]
+                                                                                      Exchange [s_state] #8
+                                                                                        WholeStageCodegen (4)
+                                                                                          HashAggregate [s_state,ss_net_profit] [sum,sum]
+                                                                                            Project [ss_net_profit,s_state]
+                                                                                              BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                                                Project [ss_sold_date_sk,ss_net_profit,s_state]
+                                                                                                  BroadcastHashJoin [ss_store_sk,s_store_sk]
+                                                                                                    Filter [ss_store_sk,ss_sold_date_sk]
+                                                                                                      ColumnarToRow
+                                                                                                        InputAdapter
+                                                                                                          Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit]
                                                                                                     InputAdapter
-                                                                                                      Exchange [s_state] #9
-                                                                                                        WholeStageCodegen (4)
-                                                                                                          HashAggregate [s_state,ss_net_profit] [sum,sum]
-                                                                                                            Project [ss_net_profit,s_state]
-                                                                                                              BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                                                Project [ss_sold_date_sk,ss_net_profit,s_state]
-                                                                                                                  BroadcastHashJoin [ss_store_sk,s_store_sk]
-                                                                                                                    Filter [ss_store_sk,ss_sold_date_sk]
-                                                                                                                      ColumnarToRow
-                                                                                                                        InputAdapter
-                                                                                                                          Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_net_profit]
-                                                                                                                    InputAdapter
-                                                                                                                      BroadcastExchange #10
-                                                                                                                        WholeStageCodegen (2)
-                                                                                                                          Filter [s_store_sk]
-                                                                                                                            ColumnarToRow
-                                                                                                                              InputAdapter
-                                                                                                                                Scan parquet default.store [s_store_sk,s_state]
-                                                                                                                InputAdapter
-                                                                                                                  ReusedExchange [d_date_sk] #5
-                                                  WholeStageCodegen (21)
-                                                    HashAggregate [s_state,sum,isEmpty] [sum(total_sum),total_sum,s_county,g_state,g_county,lochierarchy,sum,isEmpty]
-                                                      InputAdapter
-                                                        Exchange [s_state] #11
-                                                          WholeStageCodegen (20)
-                                                            HashAggregate [s_state,total_sum] [sum,isEmpty,sum,isEmpty]
-                                                              HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,sum]
-                                                                InputAdapter
-                                                                  ReusedExchange [s_state,s_county,sum] #4
-                                  WholeStageCodegen (34)
+                                                                                                      BroadcastExchange #9
+                                                                                                        WholeStageCodegen (2)
+                                                                                                          Filter [s_store_sk]
+                                                                                                            ColumnarToRow
+                                                                                                              InputAdapter
+                                                                                                                Scan parquet default.store [s_store_sk,s_state]
+                                                                                                InputAdapter
+                                                                                                  ReusedExchange [d_date_sk] #4
+                                  WholeStageCodegen (21)
+                                    HashAggregate [s_state,sum,isEmpty] [sum(total_sum),total_sum,s_county,g_state,g_county,lochierarchy,sum,isEmpty]
+                                      InputAdapter
+                                        Exchange [s_state] #10
+                                          WholeStageCodegen (20)
+                                            HashAggregate [s_state,total_sum] [sum,isEmpty,sum,isEmpty]
+                                              HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,sum]
+                                                InputAdapter
+                                                  ReusedExchange [s_state,s_county,sum] #3
+                                  WholeStageCodegen (32)
                                     HashAggregate [sum,isEmpty] [sum(total_sum),total_sum,s_state,s_county,g_state,g_county,lochierarchy,sum,isEmpty]
                                       InputAdapter
-                                        Exchange #12
-                                          WholeStageCodegen (33)
+                                        Exchange #11
+                                          WholeStageCodegen (31)
                                             HashAggregate [total_sum] [sum,isEmpty,sum,isEmpty]
                                               HashAggregate [s_state,s_county,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,sum]
                                                 InputAdapter
-                                                  ReusedExchange [s_state,s_county,sum] #4
+                                                  ReusedExchange [s_state,s_county,sum] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/explain.txt
index 3f452dc9272dc..ce1206c0ba906 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/explain.txt
@@ -1,142 +1,134 @@
 == Physical Plan ==
-TakeOrderedAndProject (138)
-+- * Project (137)
-   +- * SortMergeJoin Inner (136)
-      :- * Sort (74)
-      :  +- Exchange (73)
-      :     +- * HashAggregate (72)
-      :        +- Exchange (71)
-      :           +- * HashAggregate (70)
-      :              +- * HashAggregate (69)
-      :                 +- Exchange (68)
-      :                    +- * HashAggregate (67)
-      :                       +- Union (66)
-      :                          :- * HashAggregate (47)
-      :                          :  +- Exchange (46)
-      :                          :     +- * HashAggregate (45)
-      :                          :        +- Union (44)
-      :                          :           :- * Project (25)
-      :                          :           :  +- SortMergeJoin LeftOuter (24)
-      :                          :           :     :- * Sort (18)
-      :                          :           :     :  +- Exchange (17)
-      :                          :           :     :     +- * Project (16)
-      :                          :           :     :        +- * BroadcastHashJoin Inner BuildRight (15)
-      :                          :           :     :           :- * Project (10)
-      :                          :           :     :           :  +- * BroadcastHashJoin Inner BuildRight (9)
-      :                          :           :     :           :     :- * Filter (3)
-      :                          :           :     :           :     :  +- * ColumnarToRow (2)
-      :                          :           :     :           :     :     +- Scan parquet default.catalog_sales (1)
-      :                          :           :     :           :     +- BroadcastExchange (8)
-      :                          :           :     :           :        +- * Project (7)
-      :                          :           :     :           :           +- * Filter (6)
-      :                          :           :     :           :              +- * ColumnarToRow (5)
-      :                          :           :     :           :                 +- Scan parquet default.item (4)
-      :                          :           :     :           +- BroadcastExchange (14)
-      :                          :           :     :              +- * Filter (13)
-      :                          :           :     :                 +- * ColumnarToRow (12)
-      :                          :           :     :                    +- Scan parquet default.date_dim (11)
-      :                          :           :     +- * Sort (23)
-      :                          :           :        +- Exchange (22)
-      :                          :           :           +- * Filter (21)
-      :                          :           :              +- * ColumnarToRow (20)
-      :                          :           :                 +- Scan parquet default.catalog_returns (19)
-      :                          :           +- * Project (43)
-      :                          :              +- SortMergeJoin LeftOuter (42)
-      :                          :                 :- * Sort (36)
-      :                          :                 :  +- Exchange (35)
-      :                          :                 :     +- * Project (34)
-      :                          :                 :        +- * BroadcastHashJoin Inner BuildRight (33)
-      :                          :                 :           :- * Project (31)
-      :                          :                 :           :  +- * BroadcastHashJoin Inner BuildRight (30)
-      :                          :                 :           :     :- * Filter (28)
-      :                          :                 :           :     :  +- * ColumnarToRow (27)
-      :                          :                 :           :     :     +- Scan parquet default.store_sales (26)
-      :                          :                 :           :     +- ReusedExchange (29)
-      :                          :                 :           +- ReusedExchange (32)
-      :                          :                 +- * Sort (41)
-      :                          :                    +- Exchange (40)
-      :                          :                       +- * Filter (39)
-      :                          :                          +- * ColumnarToRow (38)
-      :                          :                             +- Scan parquet default.store_returns (37)
-      :                          +- * Project (65)
-      :                             +- SortMergeJoin LeftOuter (64)
-      :                                :- * Sort (58)
-      :                                :  +- Exchange (57)
-      :                                :     +- * Project (56)
-      :                                :        +- * BroadcastHashJoin Inner BuildRight (55)
-      :                                :           :- * Project (53)
-      :                                :           :  +- * BroadcastHashJoin Inner BuildRight (52)
-      :                                :           :     :- * Filter (50)
-      :                                :           :     :  +- * ColumnarToRow (49)
-      :                                :           :     :     +- Scan parquet default.web_sales (48)
-      :                                :           :     +- ReusedExchange (51)
-      :                                :           +- ReusedExchange (54)
-      :                                +- * Sort (63)
-      :                                   +- Exchange (62)
-      :                                      +- * Filter (61)
-      :                                         +- * ColumnarToRow (60)
-      :                                            +- Scan parquet default.web_returns (59)
-      +- * Sort (135)
-         +- Exchange (134)
-            +- * HashAggregate (133)
-               +- Exchange (132)
-                  +- * HashAggregate (131)
-                     +- * HashAggregate (130)
-                        +- Exchange (129)
-                           +- * HashAggregate (128)
-                              +- Union (127)
-                                 :- * HashAggregate (111)
-                                 :  +- Exchange (110)
-                                 :     +- * HashAggregate (109)
-                                 :        +- Union (108)
-                                 :           :- * Project (92)
-                                 :           :  +- SortMergeJoin LeftOuter (91)
-                                 :           :     :- * Sort (88)
-                                 :           :     :  +- Exchange (87)
-                                 :           :     :     +- * Project (86)
-                                 :           :     :        +- * BroadcastHashJoin Inner BuildRight (85)
-                                 :           :     :           :- * Project (80)
-                                 :           :     :           :  +- * BroadcastHashJoin Inner BuildRight (79)
-                                 :           :     :           :     :- * Filter (77)
-                                 :           :     :           :     :  +- * ColumnarToRow (76)
-                                 :           :     :           :     :     +- Scan parquet default.catalog_sales (75)
-                                 :           :     :           :     +- ReusedExchange (78)
-                                 :           :     :           +- BroadcastExchange (84)
-                                 :           :     :              +- * Filter (83)
-                                 :           :     :                 +- * ColumnarToRow (82)
-                                 :           :     :                    +- Scan parquet default.date_dim (81)
-                                 :           :     +- * Sort (90)
-                                 :           :        +- ReusedExchange (89)
-                                 :           +- * Project (107)
-                                 :              +- SortMergeJoin LeftOuter (106)
-                                 :                 :- * Sort (103)
-                                 :                 :  +- Exchange (102)
-                                 :                 :     +- * Project (101)
-                                 :                 :        +- * BroadcastHashJoin Inner BuildRight (100)
-                                 :                 :           :- * Project (98)
-                                 :                 :           :  +- * BroadcastHashJoin Inner BuildRight (97)
-                                 :                 :           :     :- * Filter (95)
-                                 :                 :           :     :  +- * ColumnarToRow (94)
-                                 :                 :           :     :     +- Scan parquet default.store_sales (93)
-                                 :                 :           :     +- ReusedExchange (96)
-                                 :                 :           +- ReusedExchange (99)
-                                 :                 +- * Sort (105)
-                                 :                    +- ReusedExchange (104)
-                                 +- * Project (126)
-                                    +- SortMergeJoin LeftOuter (125)
-                                       :- * Sort (122)
-                                       :  +- Exchange (121)
-                                       :     +- * Project (120)
-                                       :        +- * BroadcastHashJoin Inner BuildRight (119)
-                                       :           :- * Project (117)
-                                       :           :  +- * BroadcastHashJoin Inner BuildRight (116)
-                                       :           :     :- * Filter (114)
-                                       :           :     :  +- * ColumnarToRow (113)
-                                       :           :     :     +- Scan parquet default.web_sales (112)
-                                       :           :     +- ReusedExchange (115)
-                                       :           +- ReusedExchange (118)
-                                       +- * Sort (124)
-                                          +- ReusedExchange (123)
+TakeOrderedAndProject (130)
++- * Project (129)
+   +- * SortMergeJoin Inner (128)
+      :- * Sort (70)
+      :  +- Exchange (69)
+      :     +- * HashAggregate (68)
+      :        +- Exchange (67)
+      :           +- * HashAggregate (66)
+      :              +- * HashAggregate (65)
+      :                 +- Exchange (64)
+      :                    +- * HashAggregate (63)
+      :                       +- Union (62)
+      :                          :- * Project (25)
+      :                          :  +- SortMergeJoin LeftOuter (24)
+      :                          :     :- * Sort (18)
+      :                          :     :  +- Exchange (17)
+      :                          :     :     +- * Project (16)
+      :                          :     :        +- * BroadcastHashJoin Inner BuildRight (15)
+      :                          :     :           :- * Project (10)
+      :                          :     :           :  +- * BroadcastHashJoin Inner BuildRight (9)
+      :                          :     :           :     :- * Filter (3)
+      :                          :     :           :     :  +- * ColumnarToRow (2)
+      :                          :     :           :     :     +- Scan parquet default.catalog_sales (1)
+      :                          :     :           :     +- BroadcastExchange (8)
+      :                          :     :           :        +- * Project (7)
+      :                          :     :           :           +- * Filter (6)
+      :                          :     :           :              +- * ColumnarToRow (5)
+      :                          :     :           :                 +- Scan parquet default.item (4)
+      :                          :     :           +- BroadcastExchange (14)
+      :                          :     :              +- * Filter (13)
+      :                          :     :                 +- * ColumnarToRow (12)
+      :                          :     :                    +- Scan parquet default.date_dim (11)
+      :                          :     +- * Sort (23)
+      :                          :        +- Exchange (22)
+      :                          :           +- * Filter (21)
+      :                          :              +- * ColumnarToRow (20)
+      :                          :                 +- Scan parquet default.catalog_returns (19)
+      :                          :- * Project (43)
+      :                          :  +- SortMergeJoin LeftOuter (42)
+      :                          :     :- * Sort (36)
+      :                          :     :  +- Exchange (35)
+      :                          :     :     +- * Project (34)
+      :                          :     :        +- * BroadcastHashJoin Inner BuildRight (33)
+      :                          :     :           :- * Project (31)
+      :                          :     :           :  +- * BroadcastHashJoin Inner BuildRight (30)
+      :                          :     :           :     :- * Filter (28)
+      :                          :     :           :     :  +- * ColumnarToRow (27)
+      :                          :     :           :     :     +- Scan parquet default.store_sales (26)
+      :                          :     :           :     +- ReusedExchange (29)
+      :                          :     :           +- ReusedExchange (32)
+      :                          :     +- * Sort (41)
+      :                          :        +- Exchange (40)
+      :                          :           +- * Filter (39)
+      :                          :              +- * ColumnarToRow (38)
+      :                          :                 +- Scan parquet default.store_returns (37)
+      :                          +- * Project (61)
+      :                             +- SortMergeJoin LeftOuter (60)
+      :                                :- * Sort (54)
+      :                                :  +- Exchange (53)
+      :                                :     +- * Project (52)
+      :                                :        +- * BroadcastHashJoin Inner BuildRight (51)
+      :                                :           :- * Project (49)
+      :                                :           :  +- * BroadcastHashJoin Inner BuildRight (48)
+      :                                :           :     :- * Filter (46)
+      :                                :           :     :  +- * ColumnarToRow (45)
+      :                                :           :     :     +- Scan parquet default.web_sales (44)
+      :                                :           :     +- ReusedExchange (47)
+      :                                :           +- ReusedExchange (50)
+      :                                +- * Sort (59)
+      :                                   +- Exchange (58)
+      :                                      +- * Filter (57)
+      :                                         +- * ColumnarToRow (56)
+      :                                            +- Scan parquet default.web_returns (55)
+      +- * Sort (127)
+         +- Exchange (126)
+            +- * HashAggregate (125)
+               +- Exchange (124)
+                  +- * HashAggregate (123)
+                     +- * HashAggregate (122)
+                        +- Exchange (121)
+                           +- * HashAggregate (120)
+                              +- Union (119)
+                                 :- * Project (88)
+                                 :  +- SortMergeJoin LeftOuter (87)
+                                 :     :- * Sort (84)
+                                 :     :  +- Exchange (83)
+                                 :     :     +- * Project (82)
+                                 :     :        +- * BroadcastHashJoin Inner BuildRight (81)
+                                 :     :           :- * Project (76)
+                                 :     :           :  +- * BroadcastHashJoin Inner BuildRight (75)
+                                 :     :           :     :- * Filter (73)
+                                 :     :           :     :  +- * ColumnarToRow (72)
+                                 :     :           :     :     +- Scan parquet default.catalog_sales (71)
+                                 :     :           :     +- ReusedExchange (74)
+                                 :     :           +- BroadcastExchange (80)
+                                 :     :              +- * Filter (79)
+                                 :     :                 +- * ColumnarToRow (78)
+                                 :     :                    +- Scan parquet default.date_dim (77)
+                                 :     +- * Sort (86)
+                                 :        +- ReusedExchange (85)
+                                 :- * Project (103)
+                                 :  +- SortMergeJoin LeftOuter (102)
+                                 :     :- * Sort (99)
+                                 :     :  +- Exchange (98)
+                                 :     :     +- * Project (97)
+                                 :     :        +- * BroadcastHashJoin Inner BuildRight (96)
+                                 :     :           :- * Project (94)
+                                 :     :           :  +- * BroadcastHashJoin Inner BuildRight (93)
+                                 :     :           :     :- * Filter (91)
+                                 :     :           :     :  +- * ColumnarToRow (90)
+                                 :     :           :     :     +- Scan parquet default.store_sales (89)
+                                 :     :           :     +- ReusedExchange (92)
+                                 :     :           +- ReusedExchange (95)
+                                 :     +- * Sort (101)
+                                 :        +- ReusedExchange (100)
+                                 +- * Project (118)
+                                    +- SortMergeJoin LeftOuter (117)
+                                       :- * Sort (114)
+                                       :  +- Exchange (113)
+                                       :     +- * Project (112)
+                                       :        +- * BroadcastHashJoin Inner BuildRight (111)
+                                       :           :- * Project (109)
+                                       :           :  +- * BroadcastHashJoin Inner BuildRight (108)
+                                       :           :     :- * Filter (106)
+                                       :           :     :  +- * ColumnarToRow (105)
+                                       :           :     :     +- Scan parquet default.web_sales (104)
+                                       :           :     +- ReusedExchange (107)
+                                       :           +- ReusedExchange (110)
+                                       +- * Sort (116)
+                                          +- ReusedExchange (115)
 
 
 (1) Scan parquet default.catalog_sales
@@ -213,7 +205,7 @@ Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4,
 
 (17) Exchange
 Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14]
-Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), true, [id=#16]
+Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), ENSURE_REQUIREMENTS, [id=#16]
 
 (18) Sort [codegen id : 4]
 Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14]
@@ -235,7 +227,7 @@ Condition : (isnotnull(cr_order_number#18) AND isnotnull(cr_item_sk#17))
 
 (22) Exchange
 Input [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20]
-Arguments: hashpartitioning(cr_order_number#18, cr_item_sk#17, 5), true, [id=#21]
+Arguments: hashpartitioning(cr_order_number#18, cr_item_sk#17, 5), ENSURE_REQUIREMENTS, [id=#21]
 
 (23) Sort [codegen id : 6]
 Input [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20]
@@ -290,7 +282,7 @@ Input [11]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity
 
 (35) Exchange
 Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14]
-Arguments: hashpartitioning(cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint), 5), true, [id=#29]
+Arguments: hashpartitioning(cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint), 5), ENSURE_REQUIREMENTS, [id=#29]
 
 (36) Sort [codegen id : 11]
 Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14]
@@ -312,7 +304,7 @@ Condition : (isnotnull(sr_ticket_number#31) AND isnotnull(sr_item_sk#30))
 
 (40) Exchange
 Input [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33]
-Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), true, [id=#34]
+Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), ENSURE_REQUIREMENTS, [id=#34]
 
 (41) Sort [codegen id : 13]
 Input [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33]
@@ -327,426 +319,386 @@ Join condition: None
 Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ss_quantity#27 - coalesce(sr_return_quantity#32, 0)) AS sales_cnt#35, CheckOverflow((promote_precision(cast(ss_ext_sales_price#28 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#33, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#36]
 Input [13]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33]
 
-(44) Union
-
-(45) HashAggregate [codegen id : 15]
-Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
-Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
-Functions: []
-Aggregate Attributes: []
-Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
-
-(46) Exchange
-Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
-Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23, 5), true, [id=#37]
-
-(47) HashAggregate [codegen id : 16]
-Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
-Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
-Functions: []
-Aggregate Attributes: []
-Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
-
-(48) Scan parquet default.web_sales
-Output [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42]
+(44) Scan parquet default.web_sales
+Output [5]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
 PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)]
 ReadSchema: struct<ws_sold_date_sk:int,ws_item_sk:int,ws_order_number:int,ws_quantity:int,ws_ext_sales_price:decimal(7,2)>
 
-(49) ColumnarToRow [codegen id : 19]
-Input [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42]
+(45) ColumnarToRow [codegen id : 17]
+Input [5]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41]
 
-(50) Filter [codegen id : 19]
-Input [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42]
-Condition : (isnotnull(ws_item_sk#39) AND isnotnull(ws_sold_date_sk#38))
+(46) Filter [codegen id : 17]
+Input [5]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41]
+Condition : (isnotnull(ws_item_sk#38) AND isnotnull(ws_sold_date_sk#37))
 
-(51) ReusedExchange [Reuses operator id: 8]
+(47) ReusedExchange [Reuses operator id: 8]
 Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
 
-(52) BroadcastHashJoin [codegen id : 19]
-Left keys [1]: [ws_item_sk#39]
+(48) BroadcastHashJoin [codegen id : 17]
+Left keys [1]: [ws_item_sk#38]
 Right keys [1]: [i_item_sk#6]
 Join condition: None
 
-(53) Project [codegen id : 19]
-Output [9]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
-Input [10]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
+(49) Project [codegen id : 17]
+Output [9]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
+Input [10]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
 
-(54) ReusedExchange [Reuses operator id: 14]
+(50) ReusedExchange [Reuses operator id: 14]
 Output [2]: [d_date_sk#13, d_year#14]
 
-(55) BroadcastHashJoin [codegen id : 19]
-Left keys [1]: [ws_sold_date_sk#38]
+(51) BroadcastHashJoin [codegen id : 17]
+Left keys [1]: [ws_sold_date_sk#37]
 Right keys [1]: [d_date_sk#13]
 Join condition: None
 
-(56) Project [codegen id : 19]
-Output [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14]
-Input [11]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14]
+(52) Project [codegen id : 17]
+Output [9]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14]
+Input [11]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14]
 
-(57) Exchange
-Input [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14]
-Arguments: hashpartitioning(cast(ws_order_number#40 as bigint), cast(ws_item_sk#39 as bigint), 5), true, [id=#43]
+(53) Exchange
+Input [9]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14]
+Arguments: hashpartitioning(cast(ws_order_number#39 as bigint), cast(ws_item_sk#38 as bigint), 5), ENSURE_REQUIREMENTS, [id=#42]
 
-(58) Sort [codegen id : 20]
-Input [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14]
-Arguments: [cast(ws_order_number#40 as bigint) ASC NULLS FIRST, cast(ws_item_sk#39 as bigint) ASC NULLS FIRST], false, 0
+(54) Sort [codegen id : 18]
+Input [9]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14]
+Arguments: [cast(ws_order_number#39 as bigint) ASC NULLS FIRST, cast(ws_item_sk#38 as bigint) ASC NULLS FIRST], false, 0
 
-(59) Scan parquet default.web_returns
-Output [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47]
+(55) Scan parquet default.web_returns
+Output [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
 PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)]
 ReadSchema: struct<wr_item_sk:bigint,wr_order_number:bigint,wr_return_quantity:int,wr_return_amt:decimal(7,2)>
 
-(60) ColumnarToRow [codegen id : 21]
-Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47]
+(56) ColumnarToRow [codegen id : 19]
+Input [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46]
 
-(61) Filter [codegen id : 21]
-Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47]
-Condition : (isnotnull(wr_order_number#45) AND isnotnull(wr_item_sk#44))
+(57) Filter [codegen id : 19]
+Input [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46]
+Condition : (isnotnull(wr_order_number#44) AND isnotnull(wr_item_sk#43))
 
-(62) Exchange
-Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47]
-Arguments: hashpartitioning(wr_order_number#45, wr_item_sk#44, 5), true, [id=#48]
+(58) Exchange
+Input [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46]
+Arguments: hashpartitioning(wr_order_number#44, wr_item_sk#43, 5), ENSURE_REQUIREMENTS, [id=#47]
 
-(63) Sort [codegen id : 22]
-Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47]
-Arguments: [wr_order_number#45 ASC NULLS FIRST, wr_item_sk#44 ASC NULLS FIRST], false, 0
+(59) Sort [codegen id : 20]
+Input [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46]
+Arguments: [wr_order_number#44 ASC NULLS FIRST, wr_item_sk#43 ASC NULLS FIRST], false, 0
 
-(64) SortMergeJoin
-Left keys [2]: [cast(ws_order_number#40 as bigint), cast(ws_item_sk#39 as bigint)]
-Right keys [2]: [wr_order_number#45, wr_item_sk#44]
+(60) SortMergeJoin
+Left keys [2]: [cast(ws_order_number#39 as bigint), cast(ws_item_sk#38 as bigint)]
+Right keys [2]: [wr_order_number#44, wr_item_sk#43]
 Join condition: None
 
-(65) Project [codegen id : 23]
-Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ws_quantity#41 - coalesce(wr_return_quantity#46, 0)) AS sales_cnt#49, CheckOverflow((promote_precision(cast(ws_ext_sales_price#42 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#47, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#50]
-Input [13]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47]
+(61) Project [codegen id : 21]
+Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ws_quantity#40 - coalesce(wr_return_quantity#45, 0)) AS sales_cnt#48, CheckOverflow((promote_precision(cast(ws_ext_sales_price#41 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#46, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#49]
+Input [13]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46]
 
-(66) Union
+(62) Union
 
-(67) HashAggregate [codegen id : 24]
+(63) HashAggregate [codegen id : 22]
 Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
 Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
 Functions: []
 Aggregate Attributes: []
 Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
 
-(68) Exchange
+(64) Exchange
 Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
-Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23, 5), true, [id=#51]
+Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23, 5), ENSURE_REQUIREMENTS, [id=#50]
 
-(69) HashAggregate [codegen id : 25]
+(65) HashAggregate [codegen id : 23]
 Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
 Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
 Functions: []
 Aggregate Attributes: []
 Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
 
-(70) HashAggregate [codegen id : 25]
+(66) HashAggregate [codegen id : 23]
 Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#22, sales_amt#23]
 Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
 Functions [2]: [partial_sum(cast(sales_cnt#22 as bigint)), partial_sum(UnscaledValue(sales_amt#23))]
-Aggregate Attributes [2]: [sum#52, sum#53]
-Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#54, sum#55]
+Aggregate Attributes [2]: [sum#51, sum#52]
+Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#53, sum#54]
 
-(71) Exchange
-Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#54, sum#55]
-Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), true, [id=#56]
+(67) Exchange
+Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#53, sum#54]
+Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [id=#55]
 
-(72) HashAggregate [codegen id : 26]
-Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#54, sum#55]
+(68) HashAggregate [codegen id : 24]
+Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#53, sum#54]
 Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
 Functions [2]: [sum(cast(sales_cnt#22 as bigint)), sum(UnscaledValue(sales_amt#23))]
-Aggregate Attributes [2]: [sum(cast(sales_cnt#22 as bigint))#57, sum(UnscaledValue(sales_amt#23))#58]
-Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum(cast(sales_cnt#22 as bigint))#57 AS sales_cnt#59, MakeDecimal(sum(UnscaledValue(sales_amt#23))#58,18,2) AS sales_amt#60]
+Aggregate Attributes [2]: [sum(cast(sales_cnt#22 as bigint))#56, sum(UnscaledValue(sales_amt#23))#57]
+Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum(cast(sales_cnt#22 as bigint))#56 AS sales_cnt#58, MakeDecimal(sum(UnscaledValue(sales_amt#23))#57,18,2) AS sales_amt#59]
 
-(73) Exchange
-Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#59, sales_amt#60]
-Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), true, [id=#61]
+(69) Exchange
+Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#58, sales_amt#59]
+Arguments: hashpartitioning(i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [id=#60]
 
-(74) Sort [codegen id : 27]
-Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#59, sales_amt#60]
+(70) Sort [codegen id : 25]
+Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#58, sales_amt#59]
 Arguments: [i_brand_id#7 ASC NULLS FIRST, i_class_id#8 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST, i_manufact_id#11 ASC NULLS FIRST], false, 0
 
-(75) Scan parquet default.catalog_sales
+(71) Scan parquet default.catalog_sales
 Output [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_sales]
 PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)]
 ReadSchema: struct<cs_sold_date_sk:int,cs_item_sk:int,cs_order_number:int,cs_quantity:int,cs_ext_sales_price:decimal(7,2)>
 
-(76) ColumnarToRow [codegen id : 30]
+(72) ColumnarToRow [codegen id : 28]
 Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5]
 
-(77) Filter [codegen id : 30]
+(73) Filter [codegen id : 28]
 Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5]
 Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_sold_date_sk#1))
 
-(78) ReusedExchange [Reuses operator id: 8]
-Output [5]: [i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66]
+(74) ReusedExchange [Reuses operator id: 8]
+Output [5]: [i_item_sk#61, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65]
 
-(79) BroadcastHashJoin [codegen id : 30]
+(75) BroadcastHashJoin [codegen id : 28]
 Left keys [1]: [cs_item_sk#2]
-Right keys [1]: [i_item_sk#62]
+Right keys [1]: [i_item_sk#61]
 Join condition: None
 
-(80) Project [codegen id : 30]
-Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66]
-Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66]
+(76) Project [codegen id : 28]
+Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65]
+Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#61, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65]
 
-(81) Scan parquet default.date_dim
-Output [2]: [d_date_sk#67, d_year#68]
+(77) Scan parquet default.date_dim
+Output [2]: [d_date_sk#66, d_year#67]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(82) ColumnarToRow [codegen id : 29]
-Input [2]: [d_date_sk#67, d_year#68]
+(78) ColumnarToRow [codegen id : 27]
+Input [2]: [d_date_sk#66, d_year#67]
 
-(83) Filter [codegen id : 29]
-Input [2]: [d_date_sk#67, d_year#68]
-Condition : ((isnotnull(d_year#68) AND (d_year#68 = 2001)) AND isnotnull(d_date_sk#67))
+(79) Filter [codegen id : 27]
+Input [2]: [d_date_sk#66, d_year#67]
+Condition : ((isnotnull(d_year#67) AND (d_year#67 = 2001)) AND isnotnull(d_date_sk#66))
 
-(84) BroadcastExchange
-Input [2]: [d_date_sk#67, d_year#68]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#69]
+(80) BroadcastExchange
+Input [2]: [d_date_sk#66, d_year#67]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#68]
 
-(85) BroadcastHashJoin [codegen id : 30]
+(81) BroadcastHashJoin [codegen id : 28]
 Left keys [1]: [cs_sold_date_sk#1]
-Right keys [1]: [d_date_sk#67]
+Right keys [1]: [d_date_sk#66]
 Join condition: None
 
-(86) Project [codegen id : 30]
-Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68]
-Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_date_sk#67, d_year#68]
+(82) Project [codegen id : 28]
+Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67]
+Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_date_sk#66, d_year#67]
 
-(87) Exchange
-Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68]
-Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), true, [id=#70]
+(83) Exchange
+Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67]
+Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), ENSURE_REQUIREMENTS, [id=#69]
 
-(88) Sort [codegen id : 31]
-Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68]
+(84) Sort [codegen id : 29]
+Input [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67]
 Arguments: [cs_order_number#3 ASC NULLS FIRST, cs_item_sk#2 ASC NULLS FIRST], false, 0
 
-(89) ReusedExchange [Reuses operator id: 22]
+(85) ReusedExchange [Reuses operator id: 22]
 Output [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20]
 
-(90) Sort [codegen id : 33]
+(86) Sort [codegen id : 31]
 Input [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20]
 Arguments: [cr_order_number#18 ASC NULLS FIRST, cr_item_sk#17 ASC NULLS FIRST], false, 0
 
-(91) SortMergeJoin
+(87) SortMergeJoin
 Left keys [2]: [cs_order_number#3, cs_item_sk#2]
 Right keys [2]: [cr_order_number#18, cr_item_sk#17]
 Join condition: None
 
-(92) Project [codegen id : 34]
-Output [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, (cs_quantity#4 - coalesce(cr_return_quantity#19, 0)) AS sales_cnt#22, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#20, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#23]
-Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68, cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20]
+(88) Project [codegen id : 32]
+Output [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, (cs_quantity#4 - coalesce(cr_return_quantity#19, 0)) AS sales_cnt#22, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#20, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#23]
+Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67, cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20]
 
-(93) Scan parquet default.store_sales
+(89) Scan parquet default.store_sales
 Output [5]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
 PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)]
 ReadSchema: struct<ss_sold_date_sk:int,ss_item_sk:int,ss_ticket_number:int,ss_quantity:int,ss_ext_sales_price:decimal(7,2)>
 
-(94) ColumnarToRow [codegen id : 37]
+(90) ColumnarToRow [codegen id : 35]
 Input [5]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28]
 
-(95) Filter [codegen id : 37]
+(91) Filter [codegen id : 35]
 Input [5]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28]
 Condition : (isnotnull(ss_item_sk#25) AND isnotnull(ss_sold_date_sk#24))
 
-(96) ReusedExchange [Reuses operator id: 8]
-Output [5]: [i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66]
+(92) ReusedExchange [Reuses operator id: 8]
+Output [5]: [i_item_sk#61, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65]
 
-(97) BroadcastHashJoin [codegen id : 37]
+(93) BroadcastHashJoin [codegen id : 35]
 Left keys [1]: [ss_item_sk#25]
-Right keys [1]: [i_item_sk#62]
+Right keys [1]: [i_item_sk#61]
 Join condition: None
 
-(98) Project [codegen id : 37]
-Output [9]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66]
-Input [10]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66]
+(94) Project [codegen id : 35]
+Output [9]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65]
+Input [10]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_item_sk#61, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65]
 
-(99) ReusedExchange [Reuses operator id: 84]
-Output [2]: [d_date_sk#67, d_year#68]
+(95) ReusedExchange [Reuses operator id: 80]
+Output [2]: [d_date_sk#66, d_year#67]
 
-(100) BroadcastHashJoin [codegen id : 37]
+(96) BroadcastHashJoin [codegen id : 35]
 Left keys [1]: [ss_sold_date_sk#24]
-Right keys [1]: [d_date_sk#67]
+Right keys [1]: [d_date_sk#66]
 Join condition: None
 
-(101) Project [codegen id : 37]
-Output [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68]
-Input [11]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_date_sk#67, d_year#68]
+(97) Project [codegen id : 35]
+Output [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67]
+Input [11]: [ss_sold_date_sk#24, ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_date_sk#66, d_year#67]
 
-(102) Exchange
-Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68]
-Arguments: hashpartitioning(cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint), 5), true, [id=#71]
+(98) Exchange
+Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67]
+Arguments: hashpartitioning(cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint), 5), ENSURE_REQUIREMENTS, [id=#70]
 
-(103) Sort [codegen id : 38]
-Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68]
+(99) Sort [codegen id : 36]
+Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67]
 Arguments: [cast(ss_ticket_number#26 as bigint) ASC NULLS FIRST, cast(ss_item_sk#25 as bigint) ASC NULLS FIRST], false, 0
 
-(104) ReusedExchange [Reuses operator id: 40]
+(100) ReusedExchange [Reuses operator id: 40]
 Output [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33]
 
-(105) Sort [codegen id : 40]
+(101) Sort [codegen id : 38]
 Input [4]: [sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33]
 Arguments: [sr_ticket_number#31 ASC NULLS FIRST, sr_item_sk#30 ASC NULLS FIRST], false, 0
 
-(106) SortMergeJoin
+(102) SortMergeJoin
 Left keys [2]: [cast(ss_ticket_number#26 as bigint), cast(ss_item_sk#25 as bigint)]
 Right keys [2]: [sr_ticket_number#31, sr_item_sk#30]
 Join condition: None
 
-(107) Project [codegen id : 41]
-Output [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, (ss_quantity#27 - coalesce(sr_return_quantity#32, 0)) AS sales_cnt#72, CheckOverflow((promote_precision(cast(ss_ext_sales_price#28 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#33, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#73]
-Input [13]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68, sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33]
-
-(108) Union
-
-(109) HashAggregate [codegen id : 42]
-Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
-Keys [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
-Functions: []
-Aggregate Attributes: []
-Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
-
-(110) Exchange
-Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
-Arguments: hashpartitioning(d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23, 5), true, [id=#74]
-
-(111) HashAggregate [codegen id : 43]
-Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
-Keys [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
-Functions: []
-Aggregate Attributes: []
-Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
+(103) Project [codegen id : 39]
+Output [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, (ss_quantity#27 - coalesce(sr_return_quantity#32, 0)) AS sales_cnt#71, CheckOverflow((promote_precision(cast(ss_ext_sales_price#28 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#33, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#72]
+Input [13]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67, sr_item_sk#30, sr_ticket_number#31, sr_return_quantity#32, sr_return_amt#33]
 
-(112) Scan parquet default.web_sales
-Output [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42]
+(104) Scan parquet default.web_sales
+Output [5]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
 PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)]
 ReadSchema: struct<ws_sold_date_sk:int,ws_item_sk:int,ws_order_number:int,ws_quantity:int,ws_ext_sales_price:decimal(7,2)>
 
-(113) ColumnarToRow [codegen id : 46]
-Input [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42]
+(105) ColumnarToRow [codegen id : 42]
+Input [5]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41]
 
-(114) Filter [codegen id : 46]
-Input [5]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42]
-Condition : (isnotnull(ws_item_sk#39) AND isnotnull(ws_sold_date_sk#38))
+(106) Filter [codegen id : 42]
+Input [5]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41]
+Condition : (isnotnull(ws_item_sk#38) AND isnotnull(ws_sold_date_sk#37))
 
-(115) ReusedExchange [Reuses operator id: 8]
-Output [5]: [i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66]
+(107) ReusedExchange [Reuses operator id: 8]
+Output [5]: [i_item_sk#61, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65]
 
-(116) BroadcastHashJoin [codegen id : 46]
-Left keys [1]: [ws_item_sk#39]
-Right keys [1]: [i_item_sk#62]
+(108) BroadcastHashJoin [codegen id : 42]
+Left keys [1]: [ws_item_sk#38]
+Right keys [1]: [i_item_sk#61]
 Join condition: None
 
-(117) Project [codegen id : 46]
-Output [9]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66]
-Input [10]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66]
+(109) Project [codegen id : 42]
+Output [9]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65]
+Input [10]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_item_sk#61, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65]
 
-(118) ReusedExchange [Reuses operator id: 84]
-Output [2]: [d_date_sk#67, d_year#68]
+(110) ReusedExchange [Reuses operator id: 80]
+Output [2]: [d_date_sk#66, d_year#67]
 
-(119) BroadcastHashJoin [codegen id : 46]
-Left keys [1]: [ws_sold_date_sk#38]
-Right keys [1]: [d_date_sk#67]
+(111) BroadcastHashJoin [codegen id : 42]
+Left keys [1]: [ws_sold_date_sk#37]
+Right keys [1]: [d_date_sk#66]
 Join condition: None
 
-(120) Project [codegen id : 46]
-Output [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68]
-Input [11]: [ws_sold_date_sk#38, ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_date_sk#67, d_year#68]
+(112) Project [codegen id : 42]
+Output [9]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67]
+Input [11]: [ws_sold_date_sk#37, ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_date_sk#66, d_year#67]
 
-(121) Exchange
-Input [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68]
-Arguments: hashpartitioning(cast(ws_order_number#40 as bigint), cast(ws_item_sk#39 as bigint), 5), true, [id=#75]
+(113) Exchange
+Input [9]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67]
+Arguments: hashpartitioning(cast(ws_order_number#39 as bigint), cast(ws_item_sk#38 as bigint), 5), ENSURE_REQUIREMENTS, [id=#73]
 
-(122) Sort [codegen id : 47]
-Input [9]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68]
-Arguments: [cast(ws_order_number#40 as bigint) ASC NULLS FIRST, cast(ws_item_sk#39 as bigint) ASC NULLS FIRST], false, 0
+(114) Sort [codegen id : 43]
+Input [9]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67]
+Arguments: [cast(ws_order_number#39 as bigint) ASC NULLS FIRST, cast(ws_item_sk#38 as bigint) ASC NULLS FIRST], false, 0
 
-(123) ReusedExchange [Reuses operator id: 62]
-Output [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47]
+(115) ReusedExchange [Reuses operator id: 58]
+Output [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46]
 
-(124) Sort [codegen id : 49]
-Input [4]: [wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47]
-Arguments: [wr_order_number#45 ASC NULLS FIRST, wr_item_sk#44 ASC NULLS FIRST], false, 0
+(116) Sort [codegen id : 45]
+Input [4]: [wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46]
+Arguments: [wr_order_number#44 ASC NULLS FIRST, wr_item_sk#43 ASC NULLS FIRST], false, 0
 
-(125) SortMergeJoin
-Left keys [2]: [cast(ws_order_number#40 as bigint), cast(ws_item_sk#39 as bigint)]
-Right keys [2]: [wr_order_number#45, wr_item_sk#44]
+(117) SortMergeJoin
+Left keys [2]: [cast(ws_order_number#39 as bigint), cast(ws_item_sk#38 as bigint)]
+Right keys [2]: [wr_order_number#44, wr_item_sk#43]
 Join condition: None
 
-(126) Project [codegen id : 50]
-Output [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, (ws_quantity#41 - coalesce(wr_return_quantity#46, 0)) AS sales_cnt#76, CheckOverflow((promote_precision(cast(ws_ext_sales_price#42 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#47, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#77]
-Input [13]: [ws_item_sk#39, ws_order_number#40, ws_quantity#41, ws_ext_sales_price#42, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, d_year#68, wr_item_sk#44, wr_order_number#45, wr_return_quantity#46, wr_return_amt#47]
+(118) Project [codegen id : 46]
+Output [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, (ws_quantity#40 - coalesce(wr_return_quantity#45, 0)) AS sales_cnt#74, CheckOverflow((promote_precision(cast(ws_ext_sales_price#41 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#46, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#75]
+Input [13]: [ws_item_sk#38, ws_order_number#39, ws_quantity#40, ws_ext_sales_price#41, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, d_year#67, wr_item_sk#43, wr_order_number#44, wr_return_quantity#45, wr_return_amt#46]
 
-(127) Union
+(119) Union
 
-(128) HashAggregate [codegen id : 51]
-Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
-Keys [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
+(120) HashAggregate [codegen id : 47]
+Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23]
+Keys [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23]
 Functions: []
 Aggregate Attributes: []
-Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
+Results [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23]
 
-(129) Exchange
-Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
-Arguments: hashpartitioning(d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23, 5), true, [id=#78]
+(121) Exchange
+Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23]
+Arguments: hashpartitioning(d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23, 5), ENSURE_REQUIREMENTS, [id=#76]
 
-(130) HashAggregate [codegen id : 52]
-Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
-Keys [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
+(122) HashAggregate [codegen id : 48]
+Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23]
+Keys [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23]
 Functions: []
 Aggregate Attributes: []
-Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
+Results [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23]
 
-(131) HashAggregate [codegen id : 52]
-Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#22, sales_amt#23]
-Keys [5]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66]
+(123) HashAggregate [codegen id : 48]
+Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#22, sales_amt#23]
+Keys [5]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65]
 Functions [2]: [partial_sum(cast(sales_cnt#22 as bigint)), partial_sum(UnscaledValue(sales_amt#23))]
-Aggregate Attributes [2]: [sum#79, sum#80]
-Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sum#81, sum#82]
+Aggregate Attributes [2]: [sum#77, sum#78]
+Results [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sum#79, sum#80]
 
-(132) Exchange
-Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sum#81, sum#82]
-Arguments: hashpartitioning(d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, 5), true, [id=#83]
+(124) Exchange
+Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sum#79, sum#80]
+Arguments: hashpartitioning(d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, 5), ENSURE_REQUIREMENTS, [id=#81]
 
-(133) HashAggregate [codegen id : 53]
-Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sum#81, sum#82]
-Keys [5]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66]
+(125) HashAggregate [codegen id : 49]
+Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sum#79, sum#80]
+Keys [5]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65]
 Functions [2]: [sum(cast(sales_cnt#22 as bigint)), sum(UnscaledValue(sales_amt#23))]
-Aggregate Attributes [2]: [sum(cast(sales_cnt#22 as bigint))#84, sum(UnscaledValue(sales_amt#23))#85]
-Results [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sum(cast(sales_cnt#22 as bigint))#84 AS sales_cnt#86, MakeDecimal(sum(UnscaledValue(sales_amt#23))#85,18,2) AS sales_amt#87]
+Aggregate Attributes [2]: [sum(cast(sales_cnt#22 as bigint))#82, sum(UnscaledValue(sales_amt#23))#83]
+Results [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sum(cast(sales_cnt#22 as bigint))#82 AS sales_cnt#84, MakeDecimal(sum(UnscaledValue(sales_amt#23))#83,18,2) AS sales_amt#85]
 
-(134) Exchange
-Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#86, sales_amt#87]
-Arguments: hashpartitioning(i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, 5), true, [id=#88]
+(126) Exchange
+Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#84, sales_amt#85]
+Arguments: hashpartitioning(i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, 5), ENSURE_REQUIREMENTS, [id=#86]
 
-(135) Sort [codegen id : 54]
-Input [7]: [d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#86, sales_amt#87]
-Arguments: [i_brand_id#63 ASC NULLS FIRST, i_class_id#64 ASC NULLS FIRST, i_category_id#65 ASC NULLS FIRST, i_manufact_id#66 ASC NULLS FIRST], false, 0
+(127) Sort [codegen id : 50]
+Input [7]: [d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#84, sales_amt#85]
+Arguments: [i_brand_id#62 ASC NULLS FIRST, i_class_id#63 ASC NULLS FIRST, i_category_id#64 ASC NULLS FIRST, i_manufact_id#65 ASC NULLS FIRST], false, 0
 
-(136) SortMergeJoin [codegen id : 55]
+(128) SortMergeJoin [codegen id : 51]
 Left keys [4]: [i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
-Right keys [4]: [i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66]
-Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#59 as decimal(17,2))) / promote_precision(cast(sales_cnt#86 as decimal(17,2)))), DecimalType(37,20), true) < 0.90000000000000000000)
+Right keys [4]: [i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65]
+Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#58 as decimal(17,2))) / promote_precision(cast(sales_cnt#84 as decimal(17,2)))), DecimalType(37,20), true) < 0.90000000000000000000)
 
-(137) Project [codegen id : 55]
-Output [10]: [d_year#68 AS prev_year#89, d_year#14 AS year#90, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#86 AS prev_yr_cnt#91, sales_cnt#59 AS curr_yr_cnt#92, (sales_cnt#59 - sales_cnt#86) AS sales_cnt_diff#93, CheckOverflow((promote_precision(cast(sales_amt#60 as decimal(19,2))) - promote_precision(cast(sales_amt#87 as decimal(19,2)))), DecimalType(19,2), true) AS sales_amt_diff#94]
-Input [14]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#59, sales_amt#60, d_year#68, i_brand_id#63, i_class_id#64, i_category_id#65, i_manufact_id#66, sales_cnt#86, sales_amt#87]
+(129) Project [codegen id : 51]
+Output [10]: [d_year#67 AS prev_year#87, d_year#14 AS year#88, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#84 AS prev_yr_cnt#89, sales_cnt#58 AS curr_yr_cnt#90, (sales_cnt#58 - sales_cnt#84) AS sales_cnt_diff#91, CheckOverflow((promote_precision(cast(sales_amt#59 as decimal(19,2))) - promote_precision(cast(sales_amt#85 as decimal(19,2)))), DecimalType(19,2), true) AS sales_amt_diff#92]
+Input [14]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#58, sales_amt#59, d_year#67, i_brand_id#62, i_class_id#63, i_category_id#64, i_manufact_id#65, sales_cnt#84, sales_amt#85]
 
-(138) TakeOrderedAndProject
-Input [10]: [prev_year#89, year#90, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#91, curr_yr_cnt#92, sales_cnt_diff#93, sales_amt_diff#94]
-Arguments: 100, [sales_cnt_diff#93 ASC NULLS FIRST, sales_amt_diff#94 ASC NULLS FIRST], [prev_year#89, year#90, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#91, curr_yr_cnt#92, sales_cnt_diff#93, sales_amt_diff#94]
+(130) TakeOrderedAndProject
+Input [10]: [prev_year#87, year#88, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#89, curr_yr_cnt#90, sales_cnt_diff#91, sales_amt_diff#92]
+Arguments: 100, [sales_cnt_diff#91 ASC NULLS FIRST, sales_amt_diff#92 ASC NULLS FIRST], [prev_year#87, year#88, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#89, curr_yr_cnt#90, sales_cnt_diff#91, sales_amt_diff#92]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/simplified.txt
index 69f8b6a5b6789..b44ed2a7a3894 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/simplified.txt
@@ -1,113 +1,105 @@
 TakeOrderedAndProject [sales_cnt_diff,sales_amt_diff,prev_year,year,i_brand_id,i_class_id,i_category_id,i_manufact_id,prev_yr_cnt,curr_yr_cnt]
-  WholeStageCodegen (55)
+  WholeStageCodegen (51)
     Project [d_year,d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt,sales_amt,sales_amt]
       SortMergeJoin [i_brand_id,i_class_id,i_category_id,i_manufact_id,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt]
         InputAdapter
-          WholeStageCodegen (27)
+          WholeStageCodegen (25)
             Sort [i_brand_id,i_class_id,i_category_id,i_manufact_id]
               InputAdapter
                 Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #1
-                  WholeStageCodegen (26)
+                  WholeStageCodegen (24)
                     HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(cast(sales_cnt as bigint)),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum]
                       InputAdapter
                         Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #2
-                          WholeStageCodegen (25)
+                          WholeStageCodegen (23)
                             HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum]
                               HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
                                 InputAdapter
                                   Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #3
-                                    WholeStageCodegen (24)
+                                    WholeStageCodegen (22)
                                       HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
                                         InputAdapter
                                           Union
-                                            WholeStageCodegen (16)
-                                              HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
+                                            WholeStageCodegen (7)
+                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
                                                 InputAdapter
-                                                  Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #4
-                                                    WholeStageCodegen (15)
-                                                      HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
+                                                  SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
+                                                    WholeStageCodegen (4)
+                                                      Sort [cs_order_number,cs_item_sk]
                                                         InputAdapter
-                                                          Union
-                                                            WholeStageCodegen (7)
-                                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
-                                                                InputAdapter
-                                                                  SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
-                                                                    WholeStageCodegen (4)
-                                                                      Sort [cs_order_number,cs_item_sk]
-                                                                        InputAdapter
-                                                                          Exchange [cs_order_number,cs_item_sk] #5
-                                                                            WholeStageCodegen (3)
-                                                                              Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                                BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                                  Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                                    BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                                      Filter [cs_item_sk,cs_sold_date_sk]
-                                                                                        ColumnarToRow
-                                                                                          InputAdapter
-                                                                                            Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price]
-                                                                                      InputAdapter
-                                                                                        BroadcastExchange #6
-                                                                                          WholeStageCodegen (1)
-                                                                                            Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                                              Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                                                ColumnarToRow
-                                                                                                  InputAdapter
-                                                                                                    Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id]
-                                                                                  InputAdapter
-                                                                                    BroadcastExchange #7
-                                                                                      WholeStageCodegen (2)
-                                                                                        Filter [d_year,d_date_sk]
-                                                                                          ColumnarToRow
-                                                                                            InputAdapter
-                                                                                              Scan parquet default.date_dim [d_date_sk,d_year]
-                                                                    WholeStageCodegen (6)
-                                                                      Sort [cr_order_number,cr_item_sk]
-                                                                        InputAdapter
-                                                                          Exchange [cr_order_number,cr_item_sk] #8
-                                                                            WholeStageCodegen (5)
-                                                                              Filter [cr_order_number,cr_item_sk]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount]
-                                                            WholeStageCodegen (14)
-                                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
-                                                                InputAdapter
-                                                                  SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
-                                                                    WholeStageCodegen (11)
-                                                                      Sort [ss_ticket_number,ss_item_sk]
-                                                                        InputAdapter
-                                                                          Exchange [ss_ticket_number,ss_item_sk] #9
-                                                                            WholeStageCodegen (10)
-                                                                              Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                  Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                                      Filter [ss_item_sk,ss_sold_date_sk]
-                                                                                        ColumnarToRow
-                                                                                          InputAdapter
-                                                                                            Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price]
-                                                                                      InputAdapter
-                                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
-                                                                                  InputAdapter
-                                                                                    ReusedExchange [d_date_sk,d_year] #7
-                                                                    WholeStageCodegen (13)
-                                                                      Sort [sr_ticket_number,sr_item_sk]
-                                                                        InputAdapter
-                                                                          Exchange [sr_ticket_number,sr_item_sk] #10
-                                                                            WholeStageCodegen (12)
-                                                                              Filter [sr_ticket_number,sr_item_sk]
+                                                          Exchange [cs_order_number,cs_item_sk] #4
+                                                            WholeStageCodegen (3)
+                                                              Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                  Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                    BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                                      Filter [cs_item_sk,cs_sold_date_sk]
+                                                                        ColumnarToRow
+                                                                          InputAdapter
+                                                                            Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price]
+                                                                      InputAdapter
+                                                                        BroadcastExchange #5
+                                                                          WholeStageCodegen (1)
+                                                                            Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                              Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt]
-                                            WholeStageCodegen (23)
+                                                                                    Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id]
+                                                                  InputAdapter
+                                                                    BroadcastExchange #6
+                                                                      WholeStageCodegen (2)
+                                                                        Filter [d_year,d_date_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet default.date_dim [d_date_sk,d_year]
+                                                    WholeStageCodegen (6)
+                                                      Sort [cr_order_number,cr_item_sk]
+                                                        InputAdapter
+                                                          Exchange [cr_order_number,cr_item_sk] #7
+                                                            WholeStageCodegen (5)
+                                                              Filter [cr_order_number,cr_item_sk]
+                                                                ColumnarToRow
+                                                                  InputAdapter
+                                                                    Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount]
+                                            WholeStageCodegen (14)
+                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
+                                                InputAdapter
+                                                  SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
+                                                    WholeStageCodegen (11)
+                                                      Sort [ss_ticket_number,ss_item_sk]
+                                                        InputAdapter
+                                                          Exchange [ss_ticket_number,ss_item_sk] #8
+                                                            WholeStageCodegen (10)
+                                                              Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                  Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                                      Filter [ss_item_sk,ss_sold_date_sk]
+                                                                        ColumnarToRow
+                                                                          InputAdapter
+                                                                            Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price]
+                                                                      InputAdapter
+                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5
+                                                                  InputAdapter
+                                                                    ReusedExchange [d_date_sk,d_year] #6
+                                                    WholeStageCodegen (13)
+                                                      Sort [sr_ticket_number,sr_item_sk]
+                                                        InputAdapter
+                                                          Exchange [sr_ticket_number,sr_item_sk] #9
+                                                            WholeStageCodegen (12)
+                                                              Filter [sr_ticket_number,sr_item_sk]
+                                                                ColumnarToRow
+                                                                  InputAdapter
+                                                                    Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt]
+                                            WholeStageCodegen (21)
                                               Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt]
                                                 InputAdapter
                                                   SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk]
-                                                    WholeStageCodegen (20)
+                                                    WholeStageCodegen (18)
                                                       Sort [ws_order_number,ws_item_sk]
                                                         InputAdapter
-                                                          Exchange [ws_order_number,ws_item_sk] #11
-                                                            WholeStageCodegen (19)
+                                                          Exchange [ws_order_number,ws_item_sk] #10
+                                                            WholeStageCodegen (17)
                                                               Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
                                                                 BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                                                   Project [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
@@ -117,108 +109,100 @@ TakeOrderedAndProject [sales_cnt_diff,sales_amt_diff,prev_year,year,i_brand_id,i
                                                                           InputAdapter
                                                                             Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price]
                                                                       InputAdapter
-                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
+                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5
                                                                   InputAdapter
-                                                                    ReusedExchange [d_date_sk,d_year] #7
-                                                    WholeStageCodegen (22)
+                                                                    ReusedExchange [d_date_sk,d_year] #6
+                                                    WholeStageCodegen (20)
                                                       Sort [wr_order_number,wr_item_sk]
                                                         InputAdapter
-                                                          Exchange [wr_order_number,wr_item_sk] #12
-                                                            WholeStageCodegen (21)
+                                                          Exchange [wr_order_number,wr_item_sk] #11
+                                                            WholeStageCodegen (19)
                                                               Filter [wr_order_number,wr_item_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
                                                                     Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt]
         InputAdapter
-          WholeStageCodegen (54)
+          WholeStageCodegen (50)
             Sort [i_brand_id,i_class_id,i_category_id,i_manufact_id]
               InputAdapter
-                Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #13
-                  WholeStageCodegen (53)
+                Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #12
+                  WholeStageCodegen (49)
                     HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(cast(sales_cnt as bigint)),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum]
                       InputAdapter
-                        Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #14
-                          WholeStageCodegen (52)
+                        Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #13
+                          WholeStageCodegen (48)
                             HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum]
                               HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
                                 InputAdapter
-                                  Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #15
-                                    WholeStageCodegen (51)
+                                  Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #14
+                                    WholeStageCodegen (47)
                                       HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
                                         InputAdapter
                                           Union
-                                            WholeStageCodegen (43)
-                                              HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
+                                            WholeStageCodegen (32)
+                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
                                                 InputAdapter
-                                                  Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #16
-                                                    WholeStageCodegen (42)
-                                                      HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
+                                                  SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
+                                                    WholeStageCodegen (29)
+                                                      Sort [cs_order_number,cs_item_sk]
                                                         InputAdapter
-                                                          Union
-                                                            WholeStageCodegen (34)
-                                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
-                                                                InputAdapter
-                                                                  SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
-                                                                    WholeStageCodegen (31)
-                                                                      Sort [cs_order_number,cs_item_sk]
-                                                                        InputAdapter
-                                                                          Exchange [cs_order_number,cs_item_sk] #17
-                                                                            WholeStageCodegen (30)
-                                                                              Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                                BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                                  Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                                    BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                                      Filter [cs_item_sk,cs_sold_date_sk]
-                                                                                        ColumnarToRow
-                                                                                          InputAdapter
-                                                                                            Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price]
-                                                                                      InputAdapter
-                                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
-                                                                                  InputAdapter
-                                                                                    BroadcastExchange #18
-                                                                                      WholeStageCodegen (29)
-                                                                                        Filter [d_year,d_date_sk]
-                                                                                          ColumnarToRow
-                                                                                            InputAdapter
-                                                                                              Scan parquet default.date_dim [d_date_sk,d_year]
-                                                                    WholeStageCodegen (33)
-                                                                      Sort [cr_order_number,cr_item_sk]
-                                                                        InputAdapter
-                                                                          ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #8
-                                                            WholeStageCodegen (41)
-                                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
-                                                                InputAdapter
-                                                                  SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
-                                                                    WholeStageCodegen (38)
-                                                                      Sort [ss_ticket_number,ss_item_sk]
-                                                                        InputAdapter
-                                                                          Exchange [ss_ticket_number,ss_item_sk] #19
-                                                                            WholeStageCodegen (37)
-                                                                              Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                  Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                                      Filter [ss_item_sk,ss_sold_date_sk]
-                                                                                        ColumnarToRow
-                                                                                          InputAdapter
-                                                                                            Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price]
-                                                                                      InputAdapter
-                                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
-                                                                                  InputAdapter
-                                                                                    ReusedExchange [d_date_sk,d_year] #18
-                                                                    WholeStageCodegen (40)
-                                                                      Sort [sr_ticket_number,sr_item_sk]
-                                                                        InputAdapter
-                                                                          ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #10
-                                            WholeStageCodegen (50)
+                                                          Exchange [cs_order_number,cs_item_sk] #15
+                                                            WholeStageCodegen (28)
+                                                              Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                  Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                    BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                                      Filter [cs_item_sk,cs_sold_date_sk]
+                                                                        ColumnarToRow
+                                                                          InputAdapter
+                                                                            Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price]
+                                                                      InputAdapter
+                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5
+                                                                  InputAdapter
+                                                                    BroadcastExchange #16
+                                                                      WholeStageCodegen (27)
+                                                                        Filter [d_year,d_date_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet default.date_dim [d_date_sk,d_year]
+                                                    WholeStageCodegen (31)
+                                                      Sort [cr_order_number,cr_item_sk]
+                                                        InputAdapter
+                                                          ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #7
+                                            WholeStageCodegen (39)
+                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
+                                                InputAdapter
+                                                  SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
+                                                    WholeStageCodegen (36)
+                                                      Sort [ss_ticket_number,ss_item_sk]
+                                                        InputAdapter
+                                                          Exchange [ss_ticket_number,ss_item_sk] #17
+                                                            WholeStageCodegen (35)
+                                                              Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                  Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                                      Filter [ss_item_sk,ss_sold_date_sk]
+                                                                        ColumnarToRow
+                                                                          InputAdapter
+                                                                            Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price]
+                                                                      InputAdapter
+                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5
+                                                                  InputAdapter
+                                                                    ReusedExchange [d_date_sk,d_year] #16
+                                                    WholeStageCodegen (38)
+                                                      Sort [sr_ticket_number,sr_item_sk]
+                                                        InputAdapter
+                                                          ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #9
+                                            WholeStageCodegen (46)
                                               Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt]
                                                 InputAdapter
                                                   SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk]
-                                                    WholeStageCodegen (47)
+                                                    WholeStageCodegen (43)
                                                       Sort [ws_order_number,ws_item_sk]
                                                         InputAdapter
-                                                          Exchange [ws_order_number,ws_item_sk] #20
-                                                            WholeStageCodegen (46)
+                                                          Exchange [ws_order_number,ws_item_sk] #18
+                                                            WholeStageCodegen (42)
                                                               Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
                                                                 BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                                                   Project [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
@@ -228,10 +212,10 @@ TakeOrderedAndProject [sales_cnt_diff,sales_amt_diff,prev_year,year,i_brand_id,i
                                                                           InputAdapter
                                                                             Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price]
                                                                       InputAdapter
-                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
+                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #5
                                                                   InputAdapter
-                                                                    ReusedExchange [d_date_sk,d_year] #18
-                                                    WholeStageCodegen (49)
+                                                                    ReusedExchange [d_date_sk,d_year] #16
+                                                    WholeStageCodegen (45)
                                                       Sort [wr_order_number,wr_item_sk]
                                                         InputAdapter
-                                                          ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #12
+                                                          ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #11
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/explain.txt
index 1d8aab417f188..ae7442399ebd4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/explain.txt
@@ -1,121 +1,113 @@
 == Physical Plan ==
-TakeOrderedAndProject (117)
-+- * Project (116)
-   +- * BroadcastHashJoin Inner BuildRight (115)
-      :- * HashAggregate (63)
-      :  +- Exchange (62)
-      :     +- * HashAggregate (61)
-      :        +- * HashAggregate (60)
-      :           +- Exchange (59)
-      :              +- * HashAggregate (58)
-      :                 +- Union (57)
-      :                    :- * HashAggregate (41)
-      :                    :  +- Exchange (40)
-      :                    :     +- * HashAggregate (39)
-      :                    :        +- Union (38)
-      :                    :           :- * Project (22)
-      :                    :           :  +- * BroadcastHashJoin LeftOuter BuildRight (21)
-      :                    :           :     :- * Project (16)
-      :                    :           :     :  +- * BroadcastHashJoin Inner BuildRight (15)
-      :                    :           :     :     :- * Project (10)
-      :                    :           :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
-      :                    :           :     :     :     :- * Filter (3)
-      :                    :           :     :     :     :  +- * ColumnarToRow (2)
-      :                    :           :     :     :     :     +- Scan parquet default.catalog_sales (1)
-      :                    :           :     :     :     +- BroadcastExchange (8)
-      :                    :           :     :     :        +- * Project (7)
-      :                    :           :     :     :           +- * Filter (6)
-      :                    :           :     :     :              +- * ColumnarToRow (5)
-      :                    :           :     :     :                 +- Scan parquet default.item (4)
-      :                    :           :     :     +- BroadcastExchange (14)
-      :                    :           :     :        +- * Filter (13)
-      :                    :           :     :           +- * ColumnarToRow (12)
-      :                    :           :     :              +- Scan parquet default.date_dim (11)
-      :                    :           :     +- BroadcastExchange (20)
-      :                    :           :        +- * Filter (19)
-      :                    :           :           +- * ColumnarToRow (18)
-      :                    :           :              +- Scan parquet default.catalog_returns (17)
-      :                    :           +- * Project (37)
-      :                    :              +- * BroadcastHashJoin LeftOuter BuildRight (36)
-      :                    :                 :- * Project (31)
-      :                    :                 :  +- * BroadcastHashJoin Inner BuildRight (30)
-      :                    :                 :     :- * Project (28)
-      :                    :                 :     :  +- * BroadcastHashJoin Inner BuildRight (27)
-      :                    :                 :     :     :- * Filter (25)
-      :                    :                 :     :     :  +- * ColumnarToRow (24)
-      :                    :                 :     :     :     +- Scan parquet default.store_sales (23)
-      :                    :                 :     :     +- ReusedExchange (26)
-      :                    :                 :     +- ReusedExchange (29)
-      :                    :                 +- BroadcastExchange (35)
-      :                    :                    +- * Filter (34)
-      :                    :                       +- * ColumnarToRow (33)
-      :                    :                          +- Scan parquet default.store_returns (32)
-      :                    +- * Project (56)
-      :                       +- * BroadcastHashJoin LeftOuter BuildRight (55)
-      :                          :- * Project (50)
-      :                          :  +- * BroadcastHashJoin Inner BuildRight (49)
-      :                          :     :- * Project (47)
-      :                          :     :  +- * BroadcastHashJoin Inner BuildRight (46)
-      :                          :     :     :- * Filter (44)
-      :                          :     :     :  +- * ColumnarToRow (43)
-      :                          :     :     :     +- Scan parquet default.web_sales (42)
-      :                          :     :     +- ReusedExchange (45)
-      :                          :     +- ReusedExchange (48)
-      :                          +- BroadcastExchange (54)
-      :                             +- * Filter (53)
-      :                                +- * ColumnarToRow (52)
-      :                                   +- Scan parquet default.web_returns (51)
-      +- BroadcastExchange (114)
-         +- * HashAggregate (113)
-            +- Exchange (112)
-               +- * HashAggregate (111)
-                  +- * HashAggregate (110)
-                     +- Exchange (109)
-                        +- * HashAggregate (108)
-                           +- Union (107)
-                              :- * HashAggregate (94)
-                              :  +- Exchange (93)
-                              :     +- * HashAggregate (92)
-                              :        +- Union (91)
-                              :           :- * Project (78)
-                              :           :  +- * BroadcastHashJoin LeftOuter BuildRight (77)
-                              :           :     :- * Project (75)
-                              :           :     :  +- * BroadcastHashJoin Inner BuildRight (74)
-                              :           :     :     :- * Project (69)
-                              :           :     :     :  +- * BroadcastHashJoin Inner BuildRight (68)
-                              :           :     :     :     :- * Filter (66)
-                              :           :     :     :     :  +- * ColumnarToRow (65)
-                              :           :     :     :     :     +- Scan parquet default.catalog_sales (64)
-                              :           :     :     :     +- ReusedExchange (67)
-                              :           :     :     +- BroadcastExchange (73)
-                              :           :     :        +- * Filter (72)
-                              :           :     :           +- * ColumnarToRow (71)
-                              :           :     :              +- Scan parquet default.date_dim (70)
-                              :           :     +- ReusedExchange (76)
-                              :           +- * Project (90)
-                              :              +- * BroadcastHashJoin LeftOuter BuildRight (89)
-                              :                 :- * Project (87)
-                              :                 :  +- * BroadcastHashJoin Inner BuildRight (86)
-                              :                 :     :- * Project (84)
-                              :                 :     :  +- * BroadcastHashJoin Inner BuildRight (83)
-                              :                 :     :     :- * Filter (81)
-                              :                 :     :     :  +- * ColumnarToRow (80)
-                              :                 :     :     :     +- Scan parquet default.store_sales (79)
-                              :                 :     :     +- ReusedExchange (82)
-                              :                 :     +- ReusedExchange (85)
-                              :                 +- ReusedExchange (88)
-                              +- * Project (106)
-                                 +- * BroadcastHashJoin LeftOuter BuildRight (105)
-                                    :- * Project (103)
-                                    :  +- * BroadcastHashJoin Inner BuildRight (102)
-                                    :     :- * Project (100)
-                                    :     :  +- * BroadcastHashJoin Inner BuildRight (99)
-                                    :     :     :- * Filter (97)
-                                    :     :     :  +- * ColumnarToRow (96)
-                                    :     :     :     +- Scan parquet default.web_sales (95)
-                                    :     :     +- ReusedExchange (98)
-                                    :     +- ReusedExchange (101)
-                                    +- ReusedExchange (104)
+TakeOrderedAndProject (109)
++- * Project (108)
+   +- * BroadcastHashJoin Inner BuildRight (107)
+      :- * HashAggregate (59)
+      :  +- Exchange (58)
+      :     +- * HashAggregate (57)
+      :        +- * HashAggregate (56)
+      :           +- Exchange (55)
+      :              +- * HashAggregate (54)
+      :                 +- Union (53)
+      :                    :- * Project (22)
+      :                    :  +- * BroadcastHashJoin LeftOuter BuildRight (21)
+      :                    :     :- * Project (16)
+      :                    :     :  +- * BroadcastHashJoin Inner BuildRight (15)
+      :                    :     :     :- * Project (10)
+      :                    :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
+      :                    :     :     :     :- * Filter (3)
+      :                    :     :     :     :  +- * ColumnarToRow (2)
+      :                    :     :     :     :     +- Scan parquet default.catalog_sales (1)
+      :                    :     :     :     +- BroadcastExchange (8)
+      :                    :     :     :        +- * Project (7)
+      :                    :     :     :           +- * Filter (6)
+      :                    :     :     :              +- * ColumnarToRow (5)
+      :                    :     :     :                 +- Scan parquet default.item (4)
+      :                    :     :     +- BroadcastExchange (14)
+      :                    :     :        +- * Filter (13)
+      :                    :     :           +- * ColumnarToRow (12)
+      :                    :     :              +- Scan parquet default.date_dim (11)
+      :                    :     +- BroadcastExchange (20)
+      :                    :        +- * Filter (19)
+      :                    :           +- * ColumnarToRow (18)
+      :                    :              +- Scan parquet default.catalog_returns (17)
+      :                    :- * Project (37)
+      :                    :  +- * BroadcastHashJoin LeftOuter BuildRight (36)
+      :                    :     :- * Project (31)
+      :                    :     :  +- * BroadcastHashJoin Inner BuildRight (30)
+      :                    :     :     :- * Project (28)
+      :                    :     :     :  +- * BroadcastHashJoin Inner BuildRight (27)
+      :                    :     :     :     :- * Filter (25)
+      :                    :     :     :     :  +- * ColumnarToRow (24)
+      :                    :     :     :     :     +- Scan parquet default.store_sales (23)
+      :                    :     :     :     +- ReusedExchange (26)
+      :                    :     :     +- ReusedExchange (29)
+      :                    :     +- BroadcastExchange (35)
+      :                    :        +- * Filter (34)
+      :                    :           +- * ColumnarToRow (33)
+      :                    :              +- Scan parquet default.store_returns (32)
+      :                    +- * Project (52)
+      :                       +- * BroadcastHashJoin LeftOuter BuildRight (51)
+      :                          :- * Project (46)
+      :                          :  +- * BroadcastHashJoin Inner BuildRight (45)
+      :                          :     :- * Project (43)
+      :                          :     :  +- * BroadcastHashJoin Inner BuildRight (42)
+      :                          :     :     :- * Filter (40)
+      :                          :     :     :  +- * ColumnarToRow (39)
+      :                          :     :     :     +- Scan parquet default.web_sales (38)
+      :                          :     :     +- ReusedExchange (41)
+      :                          :     +- ReusedExchange (44)
+      :                          +- BroadcastExchange (50)
+      :                             +- * Filter (49)
+      :                                +- * ColumnarToRow (48)
+      :                                   +- Scan parquet default.web_returns (47)
+      +- BroadcastExchange (106)
+         +- * HashAggregate (105)
+            +- Exchange (104)
+               +- * HashAggregate (103)
+                  +- * HashAggregate (102)
+                     +- Exchange (101)
+                        +- * HashAggregate (100)
+                           +- Union (99)
+                              :- * Project (74)
+                              :  +- * BroadcastHashJoin LeftOuter BuildRight (73)
+                              :     :- * Project (71)
+                              :     :  +- * BroadcastHashJoin Inner BuildRight (70)
+                              :     :     :- * Project (65)
+                              :     :     :  +- * BroadcastHashJoin Inner BuildRight (64)
+                              :     :     :     :- * Filter (62)
+                              :     :     :     :  +- * ColumnarToRow (61)
+                              :     :     :     :     +- Scan parquet default.catalog_sales (60)
+                              :     :     :     +- ReusedExchange (63)
+                              :     :     +- BroadcastExchange (69)
+                              :     :        +- * Filter (68)
+                              :     :           +- * ColumnarToRow (67)
+                              :     :              +- Scan parquet default.date_dim (66)
+                              :     +- ReusedExchange (72)
+                              :- * Project (86)
+                              :  +- * BroadcastHashJoin LeftOuter BuildRight (85)
+                              :     :- * Project (83)
+                              :     :  +- * BroadcastHashJoin Inner BuildRight (82)
+                              :     :     :- * Project (80)
+                              :     :     :  +- * BroadcastHashJoin Inner BuildRight (79)
+                              :     :     :     :- * Filter (77)
+                              :     :     :     :  +- * ColumnarToRow (76)
+                              :     :     :     :     +- Scan parquet default.store_sales (75)
+                              :     :     :     +- ReusedExchange (78)
+                              :     :     +- ReusedExchange (81)
+                              :     +- ReusedExchange (84)
+                              +- * Project (98)
+                                 +- * BroadcastHashJoin LeftOuter BuildRight (97)
+                                    :- * Project (95)
+                                    :  +- * BroadcastHashJoin Inner BuildRight (94)
+                                    :     :- * Project (92)
+                                    :     :  +- * BroadcastHashJoin Inner BuildRight (91)
+                                    :     :     :- * Filter (89)
+                                    :     :     :  +- * ColumnarToRow (88)
+                                    :     :     :     +- Scan parquet default.web_sales (87)
+                                    :     :     +- ReusedExchange (90)
+                                    :     +- ReusedExchange (93)
+                                    +- ReusedExchange (96)
 
 
 (1) Scan parquet default.catalog_sales
@@ -282,366 +274,326 @@ Join condition: None
 Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ss_quantity#26 - coalesce(sr_return_quantity#30, 0)) AS sales_cnt#33, CheckOverflow((promote_precision(cast(ss_ext_sales_price#27 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#31, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#34]
 Input [13]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31]
 
-(38) Union
-
-(39) HashAggregate [codegen id : 9]
-Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
-Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
-Functions: []
-Aggregate Attributes: []
-Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
-
-(40) Exchange
-Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
-Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22, 5), true, [id=#35]
-
-(41) HashAggregate [codegen id : 10]
-Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
-Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
-Functions: []
-Aggregate Attributes: []
-Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
-
-(42) Scan parquet default.web_sales
-Output [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40]
+(38) Scan parquet default.web_sales
+Output [5]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
 PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)]
 ReadSchema: struct<ws_sold_date_sk:int,ws_item_sk:int,ws_order_number:int,ws_quantity:int,ws_ext_sales_price:decimal(7,2)>
 
-(43) ColumnarToRow [codegen id : 14]
-Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40]
+(39) ColumnarToRow [codegen id : 12]
+Input [5]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39]
 
-(44) Filter [codegen id : 14]
-Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40]
-Condition : (isnotnull(ws_item_sk#37) AND isnotnull(ws_sold_date_sk#36))
+(40) Filter [codegen id : 12]
+Input [5]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39]
+Condition : (isnotnull(ws_item_sk#36) AND isnotnull(ws_sold_date_sk#35))
 
-(45) ReusedExchange [Reuses operator id: 8]
+(41) ReusedExchange [Reuses operator id: 8]
 Output [5]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
 
-(46) BroadcastHashJoin [codegen id : 14]
-Left keys [1]: [ws_item_sk#37]
+(42) BroadcastHashJoin [codegen id : 12]
+Left keys [1]: [ws_item_sk#36]
 Right keys [1]: [i_item_sk#6]
 Join condition: None
 
-(47) Project [codegen id : 14]
-Output [9]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
-Input [10]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
+(43) Project [codegen id : 12]
+Output [9]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
+Input [10]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
 
-(48) ReusedExchange [Reuses operator id: 14]
+(44) ReusedExchange [Reuses operator id: 14]
 Output [2]: [d_date_sk#13, d_year#14]
 
-(49) BroadcastHashJoin [codegen id : 14]
-Left keys [1]: [ws_sold_date_sk#36]
+(45) BroadcastHashJoin [codegen id : 12]
+Left keys [1]: [ws_sold_date_sk#35]
 Right keys [1]: [d_date_sk#13]
 Join condition: None
 
-(50) Project [codegen id : 14]
-Output [9]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14]
-Input [11]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14]
+(46) Project [codegen id : 12]
+Output [9]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14]
+Input [11]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_date_sk#13, d_year#14]
 
-(51) Scan parquet default.web_returns
-Output [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44]
+(47) Scan parquet default.web_returns
+Output [4]: [wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
 PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)]
 ReadSchema: struct<wr_item_sk:bigint,wr_order_number:bigint,wr_return_quantity:int,wr_return_amt:decimal(7,2)>
 
-(52) ColumnarToRow [codegen id : 13]
-Input [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44]
+(48) ColumnarToRow [codegen id : 11]
+Input [4]: [wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43]
 
-(53) Filter [codegen id : 13]
-Input [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44]
-Condition : (isnotnull(wr_order_number#42) AND isnotnull(wr_item_sk#41))
+(49) Filter [codegen id : 11]
+Input [4]: [wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43]
+Condition : (isnotnull(wr_order_number#41) AND isnotnull(wr_item_sk#40))
 
-(54) BroadcastExchange
-Input [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44]
-Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#45]
+(50) BroadcastExchange
+Input [4]: [wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43]
+Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#44]
 
-(55) BroadcastHashJoin [codegen id : 14]
-Left keys [2]: [cast(ws_order_number#38 as bigint), cast(ws_item_sk#37 as bigint)]
-Right keys [2]: [wr_order_number#42, wr_item_sk#41]
+(51) BroadcastHashJoin [codegen id : 12]
+Left keys [2]: [cast(ws_order_number#37 as bigint), cast(ws_item_sk#36 as bigint)]
+Right keys [2]: [wr_order_number#41, wr_item_sk#40]
 Join condition: None
 
-(56) Project [codegen id : 14]
-Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ws_quantity#39 - coalesce(wr_return_quantity#43, 0)) AS sales_cnt#46, CheckOverflow((promote_precision(cast(ws_ext_sales_price#40 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#44, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#47]
-Input [13]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44]
+(52) Project [codegen id : 12]
+Output [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, (ws_quantity#38 - coalesce(wr_return_quantity#42, 0)) AS sales_cnt#45, CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#43, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#46]
+Input [13]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, d_year#14, wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43]
 
-(57) Union
+(53) Union
 
-(58) HashAggregate [codegen id : 15]
+(54) HashAggregate [codegen id : 13]
 Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
 Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
 Functions: []
 Aggregate Attributes: []
 Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
 
-(59) Exchange
+(55) Exchange
 Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
-Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22, 5), true, [id=#48]
+Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22, 5), ENSURE_REQUIREMENTS, [id=#47]
 
-(60) HashAggregate [codegen id : 16]
+(56) HashAggregate [codegen id : 14]
 Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
 Keys [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
 Functions: []
 Aggregate Attributes: []
 Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
 
-(61) HashAggregate [codegen id : 16]
+(57) HashAggregate [codegen id : 14]
 Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#21, sales_amt#22]
 Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
 Functions [2]: [partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))]
-Aggregate Attributes [2]: [sum#49, sum#50]
-Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52]
+Aggregate Attributes [2]: [sum#48, sum#49]
+Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#50, sum#51]
 
-(62) Exchange
-Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52]
-Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), true, [id=#53]
+(58) Exchange
+Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#50, sum#51]
+Arguments: hashpartitioning(d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [id=#52]
 
-(63) HashAggregate [codegen id : 34]
-Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#51, sum#52]
+(59) HashAggregate [codegen id : 30]
+Input [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum#50, sum#51]
 Keys [5]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
 Functions [2]: [sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))]
-Aggregate Attributes [2]: [sum(cast(sales_cnt#21 as bigint))#54, sum(UnscaledValue(sales_amt#22))#55]
-Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum(cast(sales_cnt#21 as bigint))#54 AS sales_cnt#56, MakeDecimal(sum(UnscaledValue(sales_amt#22))#55,18,2) AS sales_amt#57]
+Aggregate Attributes [2]: [sum(cast(sales_cnt#21 as bigint))#53, sum(UnscaledValue(sales_amt#22))#54]
+Results [7]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sum(cast(sales_cnt#21 as bigint))#53 AS sales_cnt#55, MakeDecimal(sum(UnscaledValue(sales_amt#22))#54,18,2) AS sales_amt#56]
 
-(64) Scan parquet default.catalog_sales
+(60) Scan parquet default.catalog_sales
 Output [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_sales]
 PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)]
 ReadSchema: struct<cs_sold_date_sk:int,cs_item_sk:int,cs_order_number:int,cs_quantity:int,cs_ext_sales_price:decimal(7,2)>
 
-(65) ColumnarToRow [codegen id : 20]
+(61) ColumnarToRow [codegen id : 18]
 Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5]
 
-(66) Filter [codegen id : 20]
+(62) Filter [codegen id : 18]
 Input [5]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5]
 Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_sold_date_sk#1))
 
-(67) ReusedExchange [Reuses operator id: 8]
-Output [5]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62]
+(63) ReusedExchange [Reuses operator id: 8]
+Output [5]: [i_item_sk#57, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61]
 
-(68) BroadcastHashJoin [codegen id : 20]
+(64) BroadcastHashJoin [codegen id : 18]
 Left keys [1]: [cs_item_sk#2]
-Right keys [1]: [i_item_sk#58]
+Right keys [1]: [i_item_sk#57]
 Join condition: None
 
-(69) Project [codegen id : 20]
-Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62]
-Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62]
+(65) Project [codegen id : 18]
+Output [9]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61]
+Input [10]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_item_sk#57, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61]
 
-(70) Scan parquet default.date_dim
-Output [2]: [d_date_sk#63, d_year#64]
+(66) Scan parquet default.date_dim
+Output [2]: [d_date_sk#62, d_year#63]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(71) ColumnarToRow [codegen id : 18]
-Input [2]: [d_date_sk#63, d_year#64]
+(67) ColumnarToRow [codegen id : 16]
+Input [2]: [d_date_sk#62, d_year#63]
 
-(72) Filter [codegen id : 18]
-Input [2]: [d_date_sk#63, d_year#64]
-Condition : ((isnotnull(d_year#64) AND (d_year#64 = 2001)) AND isnotnull(d_date_sk#63))
+(68) Filter [codegen id : 16]
+Input [2]: [d_date_sk#62, d_year#63]
+Condition : ((isnotnull(d_year#63) AND (d_year#63 = 2001)) AND isnotnull(d_date_sk#62))
 
-(73) BroadcastExchange
-Input [2]: [d_date_sk#63, d_year#64]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#65]
+(69) BroadcastExchange
+Input [2]: [d_date_sk#62, d_year#63]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#64]
 
-(74) BroadcastHashJoin [codegen id : 20]
+(70) BroadcastHashJoin [codegen id : 18]
 Left keys [1]: [cs_sold_date_sk#1]
-Right keys [1]: [d_date_sk#63]
+Right keys [1]: [d_date_sk#62]
 Join condition: None
 
-(75) Project [codegen id : 20]
-Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64]
-Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_date_sk#63, d_year#64]
+(71) Project [codegen id : 18]
+Output [9]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_year#63]
+Input [11]: [cs_sold_date_sk#1, cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_date_sk#62, d_year#63]
 
-(76) ReusedExchange [Reuses operator id: 20]
+(72) ReusedExchange [Reuses operator id: 20]
 Output [4]: [cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19]
 
-(77) BroadcastHashJoin [codegen id : 20]
+(73) BroadcastHashJoin [codegen id : 18]
 Left keys [2]: [cs_order_number#3, cs_item_sk#2]
 Right keys [2]: [cr_order_number#17, cr_item_sk#16]
 Join condition: None
 
-(78) Project [codegen id : 20]
-Output [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, (cs_quantity#4 - coalesce(cr_return_quantity#18, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#19, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#22]
-Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64, cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19]
+(74) Project [codegen id : 18]
+Output [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, (cs_quantity#4 - coalesce(cr_return_quantity#18, 0)) AS sales_cnt#21, CheckOverflow((promote_precision(cast(cs_ext_sales_price#5 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#19, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#22]
+Input [13]: [cs_item_sk#2, cs_order_number#3, cs_quantity#4, cs_ext_sales_price#5, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_year#63, cr_item_sk#16, cr_order_number#17, cr_return_quantity#18, cr_return_amount#19]
 
-(79) Scan parquet default.store_sales
+(75) Scan parquet default.store_sales
 Output [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
 PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_date_sk)]
 ReadSchema: struct<ss_sold_date_sk:int,ss_item_sk:int,ss_ticket_number:int,ss_quantity:int,ss_ext_sales_price:decimal(7,2)>
 
-(80) ColumnarToRow [codegen id : 24]
+(76) ColumnarToRow [codegen id : 22]
 Input [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27]
 
-(81) Filter [codegen id : 24]
+(77) Filter [codegen id : 22]
 Input [5]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27]
 Condition : (isnotnull(ss_item_sk#24) AND isnotnull(ss_sold_date_sk#23))
 
-(82) ReusedExchange [Reuses operator id: 8]
-Output [5]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62]
+(78) ReusedExchange [Reuses operator id: 8]
+Output [5]: [i_item_sk#57, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61]
 
-(83) BroadcastHashJoin [codegen id : 24]
+(79) BroadcastHashJoin [codegen id : 22]
 Left keys [1]: [ss_item_sk#24]
-Right keys [1]: [i_item_sk#58]
+Right keys [1]: [i_item_sk#57]
 Join condition: None
 
-(84) Project [codegen id : 24]
-Output [9]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62]
-Input [10]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62]
+(80) Project [codegen id : 22]
+Output [9]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61]
+Input [10]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_item_sk#57, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61]
 
-(85) ReusedExchange [Reuses operator id: 73]
-Output [2]: [d_date_sk#63, d_year#64]
+(81) ReusedExchange [Reuses operator id: 69]
+Output [2]: [d_date_sk#62, d_year#63]
 
-(86) BroadcastHashJoin [codegen id : 24]
+(82) BroadcastHashJoin [codegen id : 22]
 Left keys [1]: [ss_sold_date_sk#23]
-Right keys [1]: [d_date_sk#63]
+Right keys [1]: [d_date_sk#62]
 Join condition: None
 
-(87) Project [codegen id : 24]
-Output [9]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64]
-Input [11]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_date_sk#63, d_year#64]
+(83) Project [codegen id : 22]
+Output [9]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_year#63]
+Input [11]: [ss_sold_date_sk#23, ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_date_sk#62, d_year#63]
 
-(88) ReusedExchange [Reuses operator id: 35]
+(84) ReusedExchange [Reuses operator id: 35]
 Output [4]: [sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31]
 
-(89) BroadcastHashJoin [codegen id : 24]
+(85) BroadcastHashJoin [codegen id : 22]
 Left keys [2]: [cast(ss_ticket_number#25 as bigint), cast(ss_item_sk#24 as bigint)]
 Right keys [2]: [sr_ticket_number#29, sr_item_sk#28]
 Join condition: None
 
-(90) Project [codegen id : 24]
-Output [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, (ss_quantity#26 - coalesce(sr_return_quantity#30, 0)) AS sales_cnt#66, CheckOverflow((promote_precision(cast(ss_ext_sales_price#27 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#31, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#67]
-Input [13]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64, sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31]
-
-(91) Union
-
-(92) HashAggregate [codegen id : 25]
-Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
-Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
-Functions: []
-Aggregate Attributes: []
-Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
-
-(93) Exchange
-Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
-Arguments: hashpartitioning(d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22, 5), true, [id=#68]
-
-(94) HashAggregate [codegen id : 26]
-Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
-Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
-Functions: []
-Aggregate Attributes: []
-Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
+(86) Project [codegen id : 22]
+Output [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, (ss_quantity#26 - coalesce(sr_return_quantity#30, 0)) AS sales_cnt#65, CheckOverflow((promote_precision(cast(ss_ext_sales_price#27 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#31, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#66]
+Input [13]: [ss_item_sk#24, ss_ticket_number#25, ss_quantity#26, ss_ext_sales_price#27, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_year#63, sr_item_sk#28, sr_ticket_number#29, sr_return_quantity#30, sr_return_amt#31]
 
-(95) Scan parquet default.web_sales
-Output [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40]
+(87) Scan parquet default.web_sales
+Output [5]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
 PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk)]
 ReadSchema: struct<ws_sold_date_sk:int,ws_item_sk:int,ws_order_number:int,ws_quantity:int,ws_ext_sales_price:decimal(7,2)>
 
-(96) ColumnarToRow [codegen id : 30]
-Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40]
+(88) ColumnarToRow [codegen id : 26]
+Input [5]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39]
 
-(97) Filter [codegen id : 30]
-Input [5]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40]
-Condition : (isnotnull(ws_item_sk#37) AND isnotnull(ws_sold_date_sk#36))
+(89) Filter [codegen id : 26]
+Input [5]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39]
+Condition : (isnotnull(ws_item_sk#36) AND isnotnull(ws_sold_date_sk#35))
 
-(98) ReusedExchange [Reuses operator id: 8]
-Output [5]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62]
+(90) ReusedExchange [Reuses operator id: 8]
+Output [5]: [i_item_sk#57, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61]
 
-(99) BroadcastHashJoin [codegen id : 30]
-Left keys [1]: [ws_item_sk#37]
-Right keys [1]: [i_item_sk#58]
+(91) BroadcastHashJoin [codegen id : 26]
+Left keys [1]: [ws_item_sk#36]
+Right keys [1]: [i_item_sk#57]
 Join condition: None
 
-(100) Project [codegen id : 30]
-Output [9]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62]
-Input [10]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62]
+(92) Project [codegen id : 26]
+Output [9]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61]
+Input [10]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_item_sk#57, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61]
 
-(101) ReusedExchange [Reuses operator id: 73]
-Output [2]: [d_date_sk#63, d_year#64]
+(93) ReusedExchange [Reuses operator id: 69]
+Output [2]: [d_date_sk#62, d_year#63]
 
-(102) BroadcastHashJoin [codegen id : 30]
-Left keys [1]: [ws_sold_date_sk#36]
-Right keys [1]: [d_date_sk#63]
+(94) BroadcastHashJoin [codegen id : 26]
+Left keys [1]: [ws_sold_date_sk#35]
+Right keys [1]: [d_date_sk#62]
 Join condition: None
 
-(103) Project [codegen id : 30]
-Output [9]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64]
-Input [11]: [ws_sold_date_sk#36, ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_date_sk#63, d_year#64]
+(95) Project [codegen id : 26]
+Output [9]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_year#63]
+Input [11]: [ws_sold_date_sk#35, ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_date_sk#62, d_year#63]
 
-(104) ReusedExchange [Reuses operator id: 54]
-Output [4]: [wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44]
+(96) ReusedExchange [Reuses operator id: 50]
+Output [4]: [wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43]
 
-(105) BroadcastHashJoin [codegen id : 30]
-Left keys [2]: [cast(ws_order_number#38 as bigint), cast(ws_item_sk#37 as bigint)]
-Right keys [2]: [wr_order_number#42, wr_item_sk#41]
+(97) BroadcastHashJoin [codegen id : 26]
+Left keys [2]: [cast(ws_order_number#37 as bigint), cast(ws_item_sk#36 as bigint)]
+Right keys [2]: [wr_order_number#41, wr_item_sk#40]
 Join condition: None
 
-(106) Project [codegen id : 30]
-Output [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, (ws_quantity#39 - coalesce(wr_return_quantity#43, 0)) AS sales_cnt#69, CheckOverflow((promote_precision(cast(ws_ext_sales_price#40 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#44, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#70]
-Input [13]: [ws_item_sk#37, ws_order_number#38, ws_quantity#39, ws_ext_sales_price#40, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, d_year#64, wr_item_sk#41, wr_order_number#42, wr_return_quantity#43, wr_return_amt#44]
+(98) Project [codegen id : 26]
+Output [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, (ws_quantity#38 - coalesce(wr_return_quantity#42, 0)) AS sales_cnt#67, CheckOverflow((promote_precision(cast(ws_ext_sales_price#39 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#43, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#68]
+Input [13]: [ws_item_sk#36, ws_order_number#37, ws_quantity#38, ws_ext_sales_price#39, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, d_year#63, wr_item_sk#40, wr_order_number#41, wr_return_quantity#42, wr_return_amt#43]
 
-(107) Union
+(99) Union
 
-(108) HashAggregate [codegen id : 31]
-Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
-Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
+(100) HashAggregate [codegen id : 27]
+Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22]
+Keys [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22]
 Functions: []
 Aggregate Attributes: []
-Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
+Results [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22]
 
-(109) Exchange
-Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
-Arguments: hashpartitioning(d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22, 5), true, [id=#71]
+(101) Exchange
+Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22]
+Arguments: hashpartitioning(d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22, 5), ENSURE_REQUIREMENTS, [id=#69]
 
-(110) HashAggregate [codegen id : 32]
-Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
-Keys [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
+(102) HashAggregate [codegen id : 28]
+Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22]
+Keys [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22]
 Functions: []
 Aggregate Attributes: []
-Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
+Results [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22]
 
-(111) HashAggregate [codegen id : 32]
-Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#21, sales_amt#22]
-Keys [5]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62]
+(103) HashAggregate [codegen id : 28]
+Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#21, sales_amt#22]
+Keys [5]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61]
 Functions [2]: [partial_sum(cast(sales_cnt#21 as bigint)), partial_sum(UnscaledValue(sales_amt#22))]
-Aggregate Attributes [2]: [sum#72, sum#73]
-Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum#74, sum#75]
+Aggregate Attributes [2]: [sum#70, sum#71]
+Results [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sum#72, sum#73]
 
-(112) Exchange
-Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum#74, sum#75]
-Arguments: hashpartitioning(d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, 5), true, [id=#76]
+(104) Exchange
+Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sum#72, sum#73]
+Arguments: hashpartitioning(d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, 5), ENSURE_REQUIREMENTS, [id=#74]
 
-(113) HashAggregate [codegen id : 33]
-Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum#74, sum#75]
-Keys [5]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62]
+(105) HashAggregate [codegen id : 29]
+Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sum#72, sum#73]
+Keys [5]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61]
 Functions [2]: [sum(cast(sales_cnt#21 as bigint)), sum(UnscaledValue(sales_amt#22))]
-Aggregate Attributes [2]: [sum(cast(sales_cnt#21 as bigint))#77, sum(UnscaledValue(sales_amt#22))#78]
-Results [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sum(cast(sales_cnt#21 as bigint))#77 AS sales_cnt#79, MakeDecimal(sum(UnscaledValue(sales_amt#22))#78,18,2) AS sales_amt#80]
+Aggregate Attributes [2]: [sum(cast(sales_cnt#21 as bigint))#75, sum(UnscaledValue(sales_amt#22))#76]
+Results [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sum(cast(sales_cnt#21 as bigint))#75 AS sales_cnt#77, MakeDecimal(sum(UnscaledValue(sales_amt#22))#76,18,2) AS sales_amt#78]
 
-(114) BroadcastExchange
-Input [7]: [d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#79, sales_amt#80]
-Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true], input[4, int, true]),false), [id=#81]
+(106) BroadcastExchange
+Input [7]: [d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#77, sales_amt#78]
+Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true], input[4, int, true]),false), [id=#79]
 
-(115) BroadcastHashJoin [codegen id : 34]
+(107) BroadcastHashJoin [codegen id : 30]
 Left keys [4]: [i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11]
-Right keys [4]: [i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62]
-Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#56 as decimal(17,2))) / promote_precision(cast(sales_cnt#79 as decimal(17,2)))), DecimalType(37,20), true) < 0.90000000000000000000)
+Right keys [4]: [i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61]
+Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#55 as decimal(17,2))) / promote_precision(cast(sales_cnt#77 as decimal(17,2)))), DecimalType(37,20), true) < 0.90000000000000000000)
 
-(116) Project [codegen id : 34]
-Output [10]: [d_year#64 AS prev_year#82, d_year#14 AS year#83, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#79 AS prev_yr_cnt#84, sales_cnt#56 AS curr_yr_cnt#85, (sales_cnt#56 - sales_cnt#79) AS sales_cnt_diff#86, CheckOverflow((promote_precision(cast(sales_amt#57 as decimal(19,2))) - promote_precision(cast(sales_amt#80 as decimal(19,2)))), DecimalType(19,2), true) AS sales_amt_diff#87]
-Input [14]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#56, sales_amt#57, d_year#64, i_brand_id#59, i_class_id#60, i_category_id#61, i_manufact_id#62, sales_cnt#79, sales_amt#80]
+(108) Project [codegen id : 30]
+Output [10]: [d_year#63 AS prev_year#80, d_year#14 AS year#81, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#77 AS prev_yr_cnt#82, sales_cnt#55 AS curr_yr_cnt#83, (sales_cnt#55 - sales_cnt#77) AS sales_cnt_diff#84, CheckOverflow((promote_precision(cast(sales_amt#56 as decimal(19,2))) - promote_precision(cast(sales_amt#78 as decimal(19,2)))), DecimalType(19,2), true) AS sales_amt_diff#85]
+Input [14]: [d_year#14, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, sales_cnt#55, sales_amt#56, d_year#63, i_brand_id#58, i_class_id#59, i_category_id#60, i_manufact_id#61, sales_cnt#77, sales_amt#78]
 
-(117) TakeOrderedAndProject
-Input [10]: [prev_year#82, year#83, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#84, curr_yr_cnt#85, sales_cnt_diff#86, sales_amt_diff#87]
-Arguments: 100, [sales_cnt_diff#86 ASC NULLS FIRST, sales_amt_diff#87 ASC NULLS FIRST], [prev_year#82, year#83, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#84, curr_yr_cnt#85, sales_cnt_diff#86, sales_amt_diff#87]
+(109) TakeOrderedAndProject
+Input [10]: [prev_year#80, year#81, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#82, curr_yr_cnt#83, sales_cnt_diff#84, sales_amt_diff#85]
+Arguments: 100, [sales_cnt_diff#84 ASC NULLS FIRST, sales_amt_diff#85 ASC NULLS FIRST], [prev_year#80, year#81, i_brand_id#7, i_class_id#8, i_category_id#9, i_manufact_id#11, prev_yr_cnt#82, curr_yr_cnt#83, sales_cnt_diff#84, sales_amt_diff#85]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/simplified.txt
index d1c20801ec5fd..068187c44771a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/simplified.txt
@@ -1,83 +1,75 @@
 TakeOrderedAndProject [sales_cnt_diff,sales_amt_diff,prev_year,year,i_brand_id,i_class_id,i_category_id,i_manufact_id,prev_yr_cnt,curr_yr_cnt]
-  WholeStageCodegen (34)
+  WholeStageCodegen (30)
     Project [d_year,d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt,sales_amt,sales_amt]
       BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_manufact_id,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_cnt]
         HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(cast(sales_cnt as bigint)),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum]
           InputAdapter
             Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #1
-              WholeStageCodegen (16)
+              WholeStageCodegen (14)
                 HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum]
                   HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
                     InputAdapter
                       Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #2
-                        WholeStageCodegen (15)
+                        WholeStageCodegen (13)
                           HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
                             InputAdapter
                               Union
-                                WholeStageCodegen (10)
-                                  HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
-                                    InputAdapter
-                                      Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #3
-                                        WholeStageCodegen (9)
-                                          HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
-                                            InputAdapter
-                                              Union
-                                                WholeStageCodegen (4)
-                                                  Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
-                                                    BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
-                                                      Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                        BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                          Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                            BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                              Filter [cs_item_sk,cs_sold_date_sk]
-                                                                ColumnarToRow
-                                                                  InputAdapter
-                                                                    Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price]
-                                                              InputAdapter
-                                                                BroadcastExchange #4
-                                                                  WholeStageCodegen (1)
-                                                                    Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                      Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id]
-                                                          InputAdapter
-                                                            BroadcastExchange #5
-                                                              WholeStageCodegen (2)
-                                                                Filter [d_year,d_date_sk]
-                                                                  ColumnarToRow
-                                                                    InputAdapter
-                                                                      Scan parquet default.date_dim [d_date_sk,d_year]
-                                                      InputAdapter
-                                                        BroadcastExchange #6
-                                                          WholeStageCodegen (3)
-                                                            Filter [cr_order_number,cr_item_sk]
-                                                              ColumnarToRow
-                                                                InputAdapter
-                                                                  Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount]
-                                                WholeStageCodegen (8)
-                                                  Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
-                                                    BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
-                                                      Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                          Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                            BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                              Filter [ss_item_sk,ss_sold_date_sk]
-                                                                ColumnarToRow
-                                                                  InputAdapter
-                                                                    Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price]
-                                                              InputAdapter
-                                                                ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #4
+                                WholeStageCodegen (4)
+                                  Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
+                                    BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
+                                      Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                        BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                          Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                            BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                              Filter [cs_item_sk,cs_sold_date_sk]
+                                                ColumnarToRow
+                                                  InputAdapter
+                                                    Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price]
+                                              InputAdapter
+                                                BroadcastExchange #3
+                                                  WholeStageCodegen (1)
+                                                    Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                      Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                        ColumnarToRow
                                                           InputAdapter
-                                                            ReusedExchange [d_date_sk,d_year] #5
-                                                      InputAdapter
-                                                        BroadcastExchange #7
-                                                          WholeStageCodegen (7)
-                                                            Filter [sr_ticket_number,sr_item_sk]
-                                                              ColumnarToRow
-                                                                InputAdapter
-                                                                  Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt]
-                                WholeStageCodegen (14)
+                                                            Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id]
+                                          InputAdapter
+                                            BroadcastExchange #4
+                                              WholeStageCodegen (2)
+                                                Filter [d_year,d_date_sk]
+                                                  ColumnarToRow
+                                                    InputAdapter
+                                                      Scan parquet default.date_dim [d_date_sk,d_year]
+                                      InputAdapter
+                                        BroadcastExchange #5
+                                          WholeStageCodegen (3)
+                                            Filter [cr_order_number,cr_item_sk]
+                                              ColumnarToRow
+                                                InputAdapter
+                                                  Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount]
+                                WholeStageCodegen (8)
+                                  Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
+                                    BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
+                                      Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                          Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                            BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                              Filter [ss_item_sk,ss_sold_date_sk]
+                                                ColumnarToRow
+                                                  InputAdapter
+                                                    Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price]
+                                              InputAdapter
+                                                ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #3
+                                          InputAdapter
+                                            ReusedExchange [d_date_sk,d_year] #4
+                                      InputAdapter
+                                        BroadcastExchange #6
+                                          WholeStageCodegen (7)
+                                            Filter [sr_ticket_number,sr_item_sk]
+                                              ColumnarToRow
+                                                InputAdapter
+                                                  Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt]
+                                WholeStageCodegen (12)
                                   Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt]
                                     BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk]
                                       Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
@@ -89,79 +81,71 @@ TakeOrderedAndProject [sales_cnt_diff,sales_amt_diff,prev_year,year,i_brand_id,i
                                                   InputAdapter
                                                     Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price]
                                               InputAdapter
-                                                ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #4
+                                                ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #3
                                           InputAdapter
-                                            ReusedExchange [d_date_sk,d_year] #5
+                                            ReusedExchange [d_date_sk,d_year] #4
                                       InputAdapter
-                                        BroadcastExchange #8
-                                          WholeStageCodegen (13)
+                                        BroadcastExchange #7
+                                          WholeStageCodegen (11)
                                             Filter [wr_order_number,wr_item_sk]
                                               ColumnarToRow
                                                 InputAdapter
                                                   Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt]
         InputAdapter
-          BroadcastExchange #9
-            WholeStageCodegen (33)
+          BroadcastExchange #8
+            WholeStageCodegen (29)
               HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(cast(sales_cnt as bigint)),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum]
                 InputAdapter
-                  Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #10
-                    WholeStageCodegen (32)
+                  Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #9
+                    WholeStageCodegen (28)
                       HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum]
                         HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
                           InputAdapter
-                            Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #11
-                              WholeStageCodegen (31)
+                            Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #10
+                              WholeStageCodegen (27)
                                 HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
                                   InputAdapter
                                     Union
+                                      WholeStageCodegen (18)
+                                        Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
+                                          BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
+                                            Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                              BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                  BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                    Filter [cs_item_sk,cs_sold_date_sk]
+                                                      ColumnarToRow
+                                                        InputAdapter
+                                                          Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price]
+                                                    InputAdapter
+                                                      ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #3
+                                                InputAdapter
+                                                  BroadcastExchange #11
+                                                    WholeStageCodegen (16)
+                                                      Filter [d_year,d_date_sk]
+                                                        ColumnarToRow
+                                                          InputAdapter
+                                                            Scan parquet default.date_dim [d_date_sk,d_year]
+                                            InputAdapter
+                                              ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #5
+                                      WholeStageCodegen (22)
+                                        Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
+                                          BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
+                                            Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                              BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                  BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                    Filter [ss_item_sk,ss_sold_date_sk]
+                                                      ColumnarToRow
+                                                        InputAdapter
+                                                          Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price]
+                                                    InputAdapter
+                                                      ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #3
+                                                InputAdapter
+                                                  ReusedExchange [d_date_sk,d_year] #11
+                                            InputAdapter
+                                              ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #6
                                       WholeStageCodegen (26)
-                                        HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
-                                          InputAdapter
-                                            Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #12
-                                              WholeStageCodegen (25)
-                                                HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
-                                                  InputAdapter
-                                                    Union
-                                                      WholeStageCodegen (20)
-                                                        Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
-                                                          BroadcastHashJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
-                                                            Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                              BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                Project [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                  BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                    Filter [cs_item_sk,cs_sold_date_sk]
-                                                                      ColumnarToRow
-                                                                        InputAdapter
-                                                                          Scan parquet default.catalog_sales [cs_sold_date_sk,cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price]
-                                                                    InputAdapter
-                                                                      ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #4
-                                                                InputAdapter
-                                                                  BroadcastExchange #13
-                                                                    WholeStageCodegen (18)
-                                                                      Filter [d_year,d_date_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.date_dim [d_date_sk,d_year]
-                                                            InputAdapter
-                                                              ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #6
-                                                      WholeStageCodegen (24)
-                                                        Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
-                                                          BroadcastHashJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
-                                                            Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                              BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                Project [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                  BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                    Filter [ss_item_sk,ss_sold_date_sk]
-                                                                      ColumnarToRow
-                                                                        InputAdapter
-                                                                          Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price]
-                                                                    InputAdapter
-                                                                      ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #4
-                                                                InputAdapter
-                                                                  ReusedExchange [d_date_sk,d_year] #13
-                                                            InputAdapter
-                                                              ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #7
-                                      WholeStageCodegen (30)
                                         Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt]
                                           BroadcastHashJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk]
                                             Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
@@ -173,8 +157,8 @@ TakeOrderedAndProject [sales_cnt_diff,sales_amt_diff,prev_year,year,i_brand_id,i
                                                         InputAdapter
                                                           Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price]
                                                     InputAdapter
-                                                      ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #4
+                                                      ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #3
                                                 InputAdapter
-                                                  ReusedExchange [d_date_sk,d_year] #13
+                                                  ReusedExchange [d_date_sk,d_year] #11
                                             InputAdapter
-                                              ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #8
+                                              ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #7
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/explain.txt
index ac49cc0548c08..56a010e2ddb91 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/explain.txt
@@ -1,112 +1,108 @@
 == Physical Plan ==
-TakeOrderedAndProject (108)
-+- * HashAggregate (107)
-   +- Exchange (106)
-      +- * HashAggregate (105)
-         +- Union (104)
-            :- * HashAggregate (98)
-            :  +- Exchange (97)
-            :     +- * HashAggregate (96)
-            :        +- Union (95)
-            :           :- * HashAggregate (89)
-            :           :  +- Exchange (88)
-            :           :     +- * HashAggregate (87)
-            :           :        +- Union (86)
-            :           :           :- * Project (34)
-            :           :           :  +- * BroadcastHashJoin LeftOuter BuildRight (33)
-            :           :           :     :- * HashAggregate (19)
-            :           :           :     :  +- Exchange (18)
-            :           :           :     :     +- * HashAggregate (17)
-            :           :           :     :        +- * Project (16)
-            :           :           :     :           +- * BroadcastHashJoin Inner BuildRight (15)
-            :           :           :     :              :- * Project (10)
-            :           :           :     :              :  +- * BroadcastHashJoin Inner BuildRight (9)
-            :           :           :     :              :     :- * Filter (3)
-            :           :           :     :              :     :  +- * ColumnarToRow (2)
-            :           :           :     :              :     :     +- Scan parquet default.store_sales (1)
-            :           :           :     :              :     +- BroadcastExchange (8)
-            :           :           :     :              :        +- * Project (7)
-            :           :           :     :              :           +- * Filter (6)
-            :           :           :     :              :              +- * ColumnarToRow (5)
-            :           :           :     :              :                 +- Scan parquet default.date_dim (4)
-            :           :           :     :              +- BroadcastExchange (14)
-            :           :           :     :                 +- * Filter (13)
-            :           :           :     :                    +- * ColumnarToRow (12)
-            :           :           :     :                       +- Scan parquet default.store (11)
-            :           :           :     +- BroadcastExchange (32)
-            :           :           :        +- * HashAggregate (31)
-            :           :           :           +- Exchange (30)
-            :           :           :              +- * HashAggregate (29)
-            :           :           :                 +- * Project (28)
-            :           :           :                    +- * BroadcastHashJoin Inner BuildRight (27)
-            :           :           :                       :- * Project (25)
-            :           :           :                       :  +- * BroadcastHashJoin Inner BuildRight (24)
-            :           :           :                       :     :- * Filter (22)
-            :           :           :                       :     :  +- * ColumnarToRow (21)
-            :           :           :                       :     :     +- Scan parquet default.store_returns (20)
-            :           :           :                       :     +- ReusedExchange (23)
-            :           :           :                       +- ReusedExchange (26)
-            :           :           :- * Project (55)
-            :           :           :  +- BroadcastNestedLoopJoin Inner BuildRight (54)
-            :           :           :     :- * HashAggregate (43)
-            :           :           :     :  +- Exchange (42)
-            :           :           :     :     +- * HashAggregate (41)
-            :           :           :     :        +- * Project (40)
-            :           :           :     :           +- * BroadcastHashJoin Inner BuildRight (39)
-            :           :           :     :              :- * Filter (37)
-            :           :           :     :              :  +- * ColumnarToRow (36)
-            :           :           :     :              :     +- Scan parquet default.catalog_sales (35)
-            :           :           :     :              +- ReusedExchange (38)
-            :           :           :     +- BroadcastExchange (53)
-            :           :           :        +- * HashAggregate (52)
-            :           :           :           +- Exchange (51)
-            :           :           :              +- * HashAggregate (50)
-            :           :           :                 +- * Project (49)
-            :           :           :                    +- * BroadcastHashJoin Inner BuildRight (48)
-            :           :           :                       :- * Filter (46)
-            :           :           :                       :  +- * ColumnarToRow (45)
-            :           :           :                       :     +- Scan parquet default.catalog_returns (44)
-            :           :           :                       +- ReusedExchange (47)
-            :           :           +- * Project (85)
-            :           :              +- * BroadcastHashJoin LeftOuter BuildRight (84)
-            :           :                 :- * HashAggregate (70)
-            :           :                 :  +- Exchange (69)
-            :           :                 :     +- * HashAggregate (68)
-            :           :                 :        +- * Project (67)
-            :           :                 :           +- * BroadcastHashJoin Inner BuildRight (66)
-            :           :                 :              :- * Project (61)
-            :           :                 :              :  +- * BroadcastHashJoin Inner BuildRight (60)
-            :           :                 :              :     :- * Filter (58)
-            :           :                 :              :     :  +- * ColumnarToRow (57)
-            :           :                 :              :     :     +- Scan parquet default.web_sales (56)
-            :           :                 :              :     +- ReusedExchange (59)
-            :           :                 :              +- BroadcastExchange (65)
-            :           :                 :                 +- * Filter (64)
-            :           :                 :                    +- * ColumnarToRow (63)
-            :           :                 :                       +- Scan parquet default.web_page (62)
-            :           :                 +- BroadcastExchange (83)
-            :           :                    +- * HashAggregate (82)
-            :           :                       +- Exchange (81)
-            :           :                          +- * HashAggregate (80)
-            :           :                             +- * Project (79)
-            :           :                                +- * BroadcastHashJoin Inner BuildRight (78)
-            :           :                                   :- * Project (76)
-            :           :                                   :  +- * BroadcastHashJoin Inner BuildRight (75)
-            :           :                                   :     :- * Filter (73)
-            :           :                                   :     :  +- * ColumnarToRow (72)
-            :           :                                   :     :     +- Scan parquet default.web_returns (71)
-            :           :                                   :     +- ReusedExchange (74)
-            :           :                                   +- ReusedExchange (77)
-            :           +- * HashAggregate (94)
-            :              +- Exchange (93)
-            :                 +- * HashAggregate (92)
-            :                    +- * HashAggregate (91)
-            :                       +- ReusedExchange (90)
-            +- * HashAggregate (103)
-               +- Exchange (102)
-                  +- * HashAggregate (101)
-                     +- * HashAggregate (100)
-                        +- ReusedExchange (99)
+TakeOrderedAndProject (104)
++- * HashAggregate (103)
+   +- Exchange (102)
+      +- * HashAggregate (101)
+         +- Union (100)
+            :- * HashAggregate (89)
+            :  +- Exchange (88)
+            :     +- * HashAggregate (87)
+            :        +- Union (86)
+            :           :- * Project (34)
+            :           :  +- * BroadcastHashJoin LeftOuter BuildRight (33)
+            :           :     :- * HashAggregate (19)
+            :           :     :  +- Exchange (18)
+            :           :     :     +- * HashAggregate (17)
+            :           :     :        +- * Project (16)
+            :           :     :           +- * BroadcastHashJoin Inner BuildRight (15)
+            :           :     :              :- * Project (10)
+            :           :     :              :  +- * BroadcastHashJoin Inner BuildRight (9)
+            :           :     :              :     :- * Filter (3)
+            :           :     :              :     :  +- * ColumnarToRow (2)
+            :           :     :              :     :     +- Scan parquet default.store_sales (1)
+            :           :     :              :     +- BroadcastExchange (8)
+            :           :     :              :        +- * Project (7)
+            :           :     :              :           +- * Filter (6)
+            :           :     :              :              +- * ColumnarToRow (5)
+            :           :     :              :                 +- Scan parquet default.date_dim (4)
+            :           :     :              +- BroadcastExchange (14)
+            :           :     :                 +- * Filter (13)
+            :           :     :                    +- * ColumnarToRow (12)
+            :           :     :                       +- Scan parquet default.store (11)
+            :           :     +- BroadcastExchange (32)
+            :           :        +- * HashAggregate (31)
+            :           :           +- Exchange (30)
+            :           :              +- * HashAggregate (29)
+            :           :                 +- * Project (28)
+            :           :                    +- * BroadcastHashJoin Inner BuildRight (27)
+            :           :                       :- * Project (25)
+            :           :                       :  +- * BroadcastHashJoin Inner BuildRight (24)
+            :           :                       :     :- * Filter (22)
+            :           :                       :     :  +- * ColumnarToRow (21)
+            :           :                       :     :     +- Scan parquet default.store_returns (20)
+            :           :                       :     +- ReusedExchange (23)
+            :           :                       +- ReusedExchange (26)
+            :           :- * Project (55)
+            :           :  +- BroadcastNestedLoopJoin Inner BuildRight (54)
+            :           :     :- * HashAggregate (43)
+            :           :     :  +- Exchange (42)
+            :           :     :     +- * HashAggregate (41)
+            :           :     :        +- * Project (40)
+            :           :     :           +- * BroadcastHashJoin Inner BuildRight (39)
+            :           :     :              :- * Filter (37)
+            :           :     :              :  +- * ColumnarToRow (36)
+            :           :     :              :     +- Scan parquet default.catalog_sales (35)
+            :           :     :              +- ReusedExchange (38)
+            :           :     +- BroadcastExchange (53)
+            :           :        +- * HashAggregate (52)
+            :           :           +- Exchange (51)
+            :           :              +- * HashAggregate (50)
+            :           :                 +- * Project (49)
+            :           :                    +- * BroadcastHashJoin Inner BuildRight (48)
+            :           :                       :- * Filter (46)
+            :           :                       :  +- * ColumnarToRow (45)
+            :           :                       :     +- Scan parquet default.catalog_returns (44)
+            :           :                       +- ReusedExchange (47)
+            :           +- * Project (85)
+            :              +- * BroadcastHashJoin LeftOuter BuildRight (84)
+            :                 :- * HashAggregate (70)
+            :                 :  +- Exchange (69)
+            :                 :     +- * HashAggregate (68)
+            :                 :        +- * Project (67)
+            :                 :           +- * BroadcastHashJoin Inner BuildRight (66)
+            :                 :              :- * Project (61)
+            :                 :              :  +- * BroadcastHashJoin Inner BuildRight (60)
+            :                 :              :     :- * Filter (58)
+            :                 :              :     :  +- * ColumnarToRow (57)
+            :                 :              :     :     +- Scan parquet default.web_sales (56)
+            :                 :              :     +- ReusedExchange (59)
+            :                 :              +- BroadcastExchange (65)
+            :                 :                 +- * Filter (64)
+            :                 :                    +- * ColumnarToRow (63)
+            :                 :                       +- Scan parquet default.web_page (62)
+            :                 +- BroadcastExchange (83)
+            :                    +- * HashAggregate (82)
+            :                       +- Exchange (81)
+            :                          +- * HashAggregate (80)
+            :                             +- * Project (79)
+            :                                +- * BroadcastHashJoin Inner BuildRight (78)
+            :                                   :- * Project (76)
+            :                                   :  +- * BroadcastHashJoin Inner BuildRight (75)
+            :                                   :     :- * Filter (73)
+            :                                   :     :  +- * ColumnarToRow (72)
+            :                                   :     :     +- Scan parquet default.web_returns (71)
+            :                                   :     +- ReusedExchange (74)
+            :                                   +- ReusedExchange (77)
+            :- * HashAggregate (94)
+            :  +- Exchange (93)
+            :     +- * HashAggregate (92)
+            :        +- * HashAggregate (91)
+            :           +- ReusedExchange (90)
+            +- * HashAggregate (99)
+               +- Exchange (98)
+                  +- * HashAggregate (97)
+                     +- * HashAggregate (96)
+                        +- ReusedExchange (95)
 
 
 (1) Scan parquet default.store_sales
@@ -190,7 +186,7 @@ Results [3]: [s_store_sk#8, sum#12, sum#13]
 
 (18) Exchange
 Input [3]: [s_store_sk#8, sum#12, sum#13]
-Arguments: hashpartitioning(s_store_sk#8, 5), true, [id=#14]
+Arguments: hashpartitioning(s_store_sk#8, 5), ENSURE_REQUIREMENTS, [id=#14]
 
 (19) HashAggregate [codegen id : 8]
 Input [3]: [s_store_sk#8, sum#12, sum#13]
@@ -246,7 +242,7 @@ Results [3]: [s_store_sk#23, sum#26, sum#27]
 
 (30) Exchange
 Input [3]: [s_store_sk#23, sum#26, sum#27]
-Arguments: hashpartitioning(s_store_sk#23, 5), true, [id=#28]
+Arguments: hashpartitioning(s_store_sk#23, 5), ENSURE_REQUIREMENTS, [id=#28]
 
 (31) HashAggregate [codegen id : 7]
 Input [3]: [s_store_sk#23, sum#26, sum#27]
@@ -303,7 +299,7 @@ Results [3]: [cs_call_center_sk#39, sum#44, sum#45]
 
 (42) Exchange
 Input [3]: [cs_call_center_sk#39, sum#44, sum#45]
-Arguments: hashpartitioning(cs_call_center_sk#39, 5), true, [id=#46]
+Arguments: hashpartitioning(cs_call_center_sk#39, 5), ENSURE_REQUIREMENTS, [id=#46]
 
 (43) HashAggregate [codegen id : 11]
 Input [3]: [cs_call_center_sk#39, sum#44, sum#45]
@@ -347,7 +343,7 @@ Results [2]: [sum#56, sum#57]
 
 (51) Exchange
 Input [2]: [sum#56, sum#57]
-Arguments: SinglePartition, true, [id=#58]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#58]
 
 (52) HashAggregate [codegen id : 14]
 Input [2]: [sum#56, sum#57]
@@ -429,7 +425,7 @@ Results [3]: [wp_web_page_sk#71, sum#75, sum#76]
 
 (69) Exchange
 Input [3]: [wp_web_page_sk#71, sum#75, sum#76]
-Arguments: hashpartitioning(wp_web_page_sk#71, 5), true, [id=#77]
+Arguments: hashpartitioning(wp_web_page_sk#71, 5), ENSURE_REQUIREMENTS, [id=#77]
 
 (70) HashAggregate [codegen id : 23]
 Input [3]: [wp_web_page_sk#71, sum#75, sum#76]
@@ -485,7 +481,7 @@ Results [3]: [wp_web_page_sk#86, sum#89, sum#90]
 
 (81) Exchange
 Input [3]: [wp_web_page_sk#86, sum#89, sum#90]
-Arguments: hashpartitioning(wp_web_page_sk#86, 5), true, [id=#91]
+Arguments: hashpartitioning(wp_web_page_sk#86, 5), ENSURE_REQUIREMENTS, [id=#91]
 
 (82) HashAggregate [codegen id : 22]
 Input [3]: [wp_web_page_sk#86, sum#89, sum#90]
@@ -518,7 +514,7 @@ Results [8]: [channel#34, id#35, sum#107, isEmpty#108, sum#109, isEmpty#110, sum
 
 (88) Exchange
 Input [8]: [channel#34, id#35, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112]
-Arguments: hashpartitioning(channel#34, id#35, 5), true, [id=#113]
+Arguments: hashpartitioning(channel#34, id#35, 5), ENSURE_REQUIREMENTS, [id=#113]
 
 (89) HashAggregate [codegen id : 25]
 Input [8]: [channel#34, id#35, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112]
@@ -546,7 +542,7 @@ Results [7]: [channel#34, sum#139, isEmpty#140, sum#141, isEmpty#142, sum#143, i
 
 (93) Exchange
 Input [7]: [channel#34, sum#139, isEmpty#140, sum#141, isEmpty#142, sum#143, isEmpty#144]
-Arguments: hashpartitioning(channel#34, 5), true, [id=#145]
+Arguments: hashpartitioning(channel#34, 5), ENSURE_REQUIREMENTS, [id=#145]
 
 (94) HashAggregate [codegen id : 51]
 Input [7]: [channel#34, sum#139, isEmpty#140, sum#141, isEmpty#142, sum#143, isEmpty#144]
@@ -555,75 +551,55 @@ Functions [3]: [sum(sales#130), sum(returns#131), sum(profit#132)]
 Aggregate Attributes [3]: [sum(sales#130)#146, sum(returns#131)#147, sum(profit#132)#148]
 Results [5]: [channel#34, null AS id#149, sum(sales#130)#146 AS sales#150, sum(returns#131)#147 AS returns#151, sum(profit#132)#148 AS profit#152]
 
-(95) Union
+(95) ReusedExchange [Reuses operator id: 88]
+Output [8]: [channel#34, id#35, sum#153, isEmpty#154, sum#155, isEmpty#156, sum#157, isEmpty#158]
 
-(96) HashAggregate [codegen id : 52]
-Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
-Keys [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
-Functions: []
-Aggregate Attributes: []
-Results [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
-
-(97) Exchange
-Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
-Arguments: hashpartitioning(channel#34, id#35, sales#117, returns#118, profit#119, 5), true, [id=#153]
-
-(98) HashAggregate [codegen id : 53]
-Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
-Keys [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
-Functions: []
-Aggregate Attributes: []
-Results [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
-
-(99) ReusedExchange [Reuses operator id: 88]
-Output [8]: [channel#34, id#35, sum#154, isEmpty#155, sum#156, isEmpty#157, sum#158, isEmpty#159]
-
-(100) HashAggregate [codegen id : 78]
-Input [8]: [channel#34, id#35, sum#154, isEmpty#155, sum#156, isEmpty#157, sum#158, isEmpty#159]
+(96) HashAggregate [codegen id : 76]
+Input [8]: [channel#34, id#35, sum#153, isEmpty#154, sum#155, isEmpty#156, sum#157, isEmpty#158]
 Keys [2]: [channel#34, id#35]
-Functions [3]: [sum(sales#17), sum(returns#36), sum(profit#160)]
-Aggregate Attributes [3]: [sum(sales#17)#161, sum(returns#36)#162, sum(profit#160)#163]
-Results [3]: [sum(sales#17)#161 AS sales#130, sum(returns#36)#162 AS returns#131, sum(profit#160)#163 AS profit#132]
+Functions [3]: [sum(sales#17), sum(returns#36), sum(profit#159)]
+Aggregate Attributes [3]: [sum(sales#17)#160, sum(returns#36)#161, sum(profit#159)#162]
+Results [3]: [sum(sales#17)#160 AS sales#130, sum(returns#36)#161 AS returns#131, sum(profit#159)#162 AS profit#132]
 
-(101) HashAggregate [codegen id : 78]
+(97) HashAggregate [codegen id : 76]
 Input [3]: [sales#130, returns#131, profit#132]
 Keys: []
 Functions [3]: [partial_sum(sales#130), partial_sum(returns#131), partial_sum(profit#132)]
-Aggregate Attributes [6]: [sum#164, isEmpty#165, sum#166, isEmpty#167, sum#168, isEmpty#169]
-Results [6]: [sum#170, isEmpty#171, sum#172, isEmpty#173, sum#174, isEmpty#175]
+Aggregate Attributes [6]: [sum#163, isEmpty#164, sum#165, isEmpty#166, sum#167, isEmpty#168]
+Results [6]: [sum#169, isEmpty#170, sum#171, isEmpty#172, sum#173, isEmpty#174]
 
-(102) Exchange
-Input [6]: [sum#170, isEmpty#171, sum#172, isEmpty#173, sum#174, isEmpty#175]
-Arguments: SinglePartition, true, [id=#176]
+(98) Exchange
+Input [6]: [sum#169, isEmpty#170, sum#171, isEmpty#172, sum#173, isEmpty#174]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#175]
 
-(103) HashAggregate [codegen id : 79]
-Input [6]: [sum#170, isEmpty#171, sum#172, isEmpty#173, sum#174, isEmpty#175]
+(99) HashAggregate [codegen id : 77]
+Input [6]: [sum#169, isEmpty#170, sum#171, isEmpty#172, sum#173, isEmpty#174]
 Keys: []
 Functions [3]: [sum(sales#130), sum(returns#131), sum(profit#132)]
-Aggregate Attributes [3]: [sum(sales#130)#177, sum(returns#131)#178, sum(profit#132)#179]
-Results [5]: [null AS channel#180, null AS id#181, sum(sales#130)#177 AS sales#182, sum(returns#131)#178 AS returns#183, sum(profit#132)#179 AS profit#184]
+Aggregate Attributes [3]: [sum(sales#130)#176, sum(returns#131)#177, sum(profit#132)#178]
+Results [5]: [null AS channel#179, null AS id#180, sum(sales#130)#176 AS sales#181, sum(returns#131)#177 AS returns#182, sum(profit#132)#178 AS profit#183]
 
-(104) Union
+(100) Union
 
-(105) HashAggregate [codegen id : 80]
+(101) HashAggregate [codegen id : 78]
 Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
 Keys [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
 Functions: []
 Aggregate Attributes: []
 Results [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
 
-(106) Exchange
+(102) Exchange
 Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
-Arguments: hashpartitioning(channel#34, id#35, sales#117, returns#118, profit#119, 5), true, [id=#185]
+Arguments: hashpartitioning(channel#34, id#35, sales#117, returns#118, profit#119, 5), ENSURE_REQUIREMENTS, [id=#184]
 
-(107) HashAggregate [codegen id : 81]
+(103) HashAggregate [codegen id : 79]
 Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
 Keys [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
 Functions: []
 Aggregate Attributes: []
 Results [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
 
-(108) TakeOrderedAndProject
+(104) TakeOrderedAndProject
 Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
 Arguments: 100, [channel#34 ASC NULLS FIRST, id#35 ASC NULLS FIRST], [channel#34, id#35, sales#117, returns#118, profit#119]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/simplified.txt
index 92c25891f940e..3a5d78047c24b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/simplified.txt
@@ -1,172 +1,164 @@
 TakeOrderedAndProject [channel,id,sales,returns,profit]
-  WholeStageCodegen (81)
+  WholeStageCodegen (79)
     HashAggregate [channel,id,sales,returns,profit]
       InputAdapter
         Exchange [channel,id,sales,returns,profit] #1
-          WholeStageCodegen (80)
+          WholeStageCodegen (78)
             HashAggregate [channel,id,sales,returns,profit]
               InputAdapter
                 Union
-                  WholeStageCodegen (53)
-                    HashAggregate [channel,id,sales,returns,profit]
+                  WholeStageCodegen (25)
+                    HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
-                        Exchange [channel,id,sales,returns,profit] #2
-                          WholeStageCodegen (52)
-                            HashAggregate [channel,id,sales,returns,profit]
+                        Exchange [channel,id] #2
+                          WholeStageCodegen (24)
+                            HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                               InputAdapter
                                 Union
-                                  WholeStageCodegen (25)
-                                    HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
-                                      InputAdapter
-                                        Exchange [channel,id] #3
-                                          WholeStageCodegen (24)
-                                            HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
-                                              InputAdapter
-                                                Union
-                                                  WholeStageCodegen (8)
-                                                    Project [s_store_sk,sales,returns,profit,profit_loss]
-                                                      BroadcastHashJoin [s_store_sk,s_store_sk]
-                                                        HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit)),sales,profit,sum,sum]
+                                  WholeStageCodegen (8)
+                                    Project [s_store_sk,sales,returns,profit,profit_loss]
+                                      BroadcastHashJoin [s_store_sk,s_store_sk]
+                                        HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit)),sales,profit,sum,sum]
+                                          InputAdapter
+                                            Exchange [s_store_sk] #3
+                                              WholeStageCodegen (3)
+                                                HashAggregate [s_store_sk,ss_ext_sales_price,ss_net_profit] [sum,sum,sum,sum]
+                                                  Project [ss_ext_sales_price,ss_net_profit,s_store_sk]
+                                                    BroadcastHashJoin [ss_store_sk,s_store_sk]
+                                                      Project [ss_store_sk,ss_ext_sales_price,ss_net_profit]
+                                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                          Filter [ss_sold_date_sk,ss_store_sk]
+                                                            ColumnarToRow
+                                                              InputAdapter
+                                                                Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit]
                                                           InputAdapter
-                                                            Exchange [s_store_sk] #4
-                                                              WholeStageCodegen (3)
-                                                                HashAggregate [s_store_sk,ss_ext_sales_price,ss_net_profit] [sum,sum,sum,sum]
-                                                                  Project [ss_ext_sales_price,ss_net_profit,s_store_sk]
-                                                                    BroadcastHashJoin [ss_store_sk,s_store_sk]
-                                                                      Project [ss_store_sk,ss_ext_sales_price,ss_net_profit]
-                                                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                          Filter [ss_sold_date_sk,ss_store_sk]
-                                                                            ColumnarToRow
-                                                                              InputAdapter
-                                                                                Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit]
-                                                                          InputAdapter
-                                                                            BroadcastExchange #5
-                                                                              WholeStageCodegen (1)
-                                                                                Project [d_date_sk]
-                                                                                  Filter [d_date,d_date_sk]
-                                                                                    ColumnarToRow
-                                                                                      InputAdapter
-                                                                                        Scan parquet default.date_dim [d_date_sk,d_date]
+                                                            BroadcastExchange #4
+                                                              WholeStageCodegen (1)
+                                                                Project [d_date_sk]
+                                                                  Filter [d_date,d_date_sk]
+                                                                    ColumnarToRow
                                                                       InputAdapter
-                                                                        BroadcastExchange #6
-                                                                          WholeStageCodegen (2)
-                                                                            Filter [s_store_sk]
-                                                                              ColumnarToRow
-                                                                                InputAdapter
-                                                                                  Scan parquet default.store [s_store_sk]
-                                                        InputAdapter
-                                                          BroadcastExchange #7
-                                                            WholeStageCodegen (7)
-                                                              HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss)),returns,profit_loss,sum,sum]
-                                                                InputAdapter
-                                                                  Exchange [s_store_sk] #8
-                                                                    WholeStageCodegen (6)
-                                                                      HashAggregate [s_store_sk,sr_return_amt,sr_net_loss] [sum,sum,sum,sum]
-                                                                        Project [sr_return_amt,sr_net_loss,s_store_sk]
-                                                                          BroadcastHashJoin [sr_returned_date_sk,d_date_sk]
-                                                                            Project [sr_returned_date_sk,sr_return_amt,sr_net_loss,s_store_sk]
-                                                                              BroadcastHashJoin [sr_store_sk,s_store_sk]
-                                                                                Filter [sr_returned_date_sk,sr_store_sk]
-                                                                                  ColumnarToRow
-                                                                                    InputAdapter
-                                                                                      Scan parquet default.store_returns [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss]
-                                                                                InputAdapter
-                                                                                  ReusedExchange [s_store_sk] #6
-                                                                            InputAdapter
-                                                                              ReusedExchange [d_date_sk] #5
-                                                  WholeStageCodegen (15)
-                                                    Project [cs_call_center_sk,sales,returns,profit,profit_loss]
+                                                                        Scan parquet default.date_dim [d_date_sk,d_date]
                                                       InputAdapter
-                                                        BroadcastNestedLoopJoin
-                                                          WholeStageCodegen (11)
-                                                            HashAggregate [cs_call_center_sk,sum,sum] [sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit)),sales,profit,sum,sum]
+                                                        BroadcastExchange #5
+                                                          WholeStageCodegen (2)
+                                                            Filter [s_store_sk]
+                                                              ColumnarToRow
+                                                                InputAdapter
+                                                                  Scan parquet default.store [s_store_sk]
+                                        InputAdapter
+                                          BroadcastExchange #6
+                                            WholeStageCodegen (7)
+                                              HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss)),returns,profit_loss,sum,sum]
+                                                InputAdapter
+                                                  Exchange [s_store_sk] #7
+                                                    WholeStageCodegen (6)
+                                                      HashAggregate [s_store_sk,sr_return_amt,sr_net_loss] [sum,sum,sum,sum]
+                                                        Project [sr_return_amt,sr_net_loss,s_store_sk]
+                                                          BroadcastHashJoin [sr_returned_date_sk,d_date_sk]
+                                                            Project [sr_returned_date_sk,sr_return_amt,sr_net_loss,s_store_sk]
+                                                              BroadcastHashJoin [sr_store_sk,s_store_sk]
+                                                                Filter [sr_returned_date_sk,sr_store_sk]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet default.store_returns [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss]
+                                                                InputAdapter
+                                                                  ReusedExchange [s_store_sk] #5
+                                                            InputAdapter
+                                                              ReusedExchange [d_date_sk] #4
+                                  WholeStageCodegen (15)
+                                    Project [cs_call_center_sk,sales,returns,profit,profit_loss]
+                                      InputAdapter
+                                        BroadcastNestedLoopJoin
+                                          WholeStageCodegen (11)
+                                            HashAggregate [cs_call_center_sk,sum,sum] [sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit)),sales,profit,sum,sum]
+                                              InputAdapter
+                                                Exchange [cs_call_center_sk] #8
+                                                  WholeStageCodegen (10)
+                                                    HashAggregate [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] [sum,sum,sum,sum]
+                                                      Project [cs_call_center_sk,cs_ext_sales_price,cs_net_profit]
+                                                        BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                          Filter [cs_sold_date_sk]
+                                                            ColumnarToRow
                                                               InputAdapter
-                                                                Exchange [cs_call_center_sk] #9
-                                                                  WholeStageCodegen (10)
-                                                                    HashAggregate [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] [sum,sum,sum,sum]
-                                                                      Project [cs_call_center_sk,cs_ext_sales_price,cs_net_profit]
-                                                                        BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                          Filter [cs_sold_date_sk]
-                                                                            ColumnarToRow
-                                                                              InputAdapter
-                                                                                Scan parquet default.catalog_sales [cs_sold_date_sk,cs_call_center_sk,cs_ext_sales_price,cs_net_profit]
-                                                                          InputAdapter
-                                                                            ReusedExchange [d_date_sk] #5
-                                                          BroadcastExchange #10
-                                                            WholeStageCodegen (14)
-                                                              HashAggregate [sum,sum] [sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss)),returns,profit_loss,sum,sum]
+                                                                Scan parquet default.catalog_sales [cs_sold_date_sk,cs_call_center_sk,cs_ext_sales_price,cs_net_profit]
+                                                          InputAdapter
+                                                            ReusedExchange [d_date_sk] #4
+                                          BroadcastExchange #9
+                                            WholeStageCodegen (14)
+                                              HashAggregate [sum,sum] [sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss)),returns,profit_loss,sum,sum]
+                                                InputAdapter
+                                                  Exchange #10
+                                                    WholeStageCodegen (13)
+                                                      HashAggregate [cr_return_amount,cr_net_loss] [sum,sum,sum,sum]
+                                                        Project [cr_return_amount,cr_net_loss]
+                                                          BroadcastHashJoin [cr_returned_date_sk,d_date_sk]
+                                                            Filter [cr_returned_date_sk]
+                                                              ColumnarToRow
                                                                 InputAdapter
-                                                                  Exchange #11
-                                                                    WholeStageCodegen (13)
-                                                                      HashAggregate [cr_return_amount,cr_net_loss] [sum,sum,sum,sum]
-                                                                        Project [cr_return_amount,cr_net_loss]
-                                                                          BroadcastHashJoin [cr_returned_date_sk,d_date_sk]
-                                                                            Filter [cr_returned_date_sk]
-                                                                              ColumnarToRow
-                                                                                InputAdapter
-                                                                                  Scan parquet default.catalog_returns [cr_returned_date_sk,cr_return_amount,cr_net_loss]
-                                                                            InputAdapter
-                                                                              ReusedExchange [d_date_sk] #5
-                                                  WholeStageCodegen (23)
-                                                    Project [wp_web_page_sk,sales,returns,profit,profit_loss]
-                                                      BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk]
-                                                        HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit)),sales,profit,sum,sum]
+                                                                  Scan parquet default.catalog_returns [cr_returned_date_sk,cr_return_amount,cr_net_loss]
+                                                            InputAdapter
+                                                              ReusedExchange [d_date_sk] #4
+                                  WholeStageCodegen (23)
+                                    Project [wp_web_page_sk,sales,returns,profit,profit_loss]
+                                      BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk]
+                                        HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit)),sales,profit,sum,sum]
+                                          InputAdapter
+                                            Exchange [wp_web_page_sk] #11
+                                              WholeStageCodegen (18)
+                                                HashAggregate [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum]
+                                                  Project [ws_ext_sales_price,ws_net_profit,wp_web_page_sk]
+                                                    BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
+                                                      Project [ws_web_page_sk,ws_ext_sales_price,ws_net_profit]
+                                                        BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                          Filter [ws_sold_date_sk,ws_web_page_sk]
+                                                            ColumnarToRow
+                                                              InputAdapter
+                                                                Scan parquet default.web_sales [ws_sold_date_sk,ws_web_page_sk,ws_ext_sales_price,ws_net_profit]
                                                           InputAdapter
-                                                            Exchange [wp_web_page_sk] #12
-                                                              WholeStageCodegen (18)
-                                                                HashAggregate [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum]
-                                                                  Project [ws_ext_sales_price,ws_net_profit,wp_web_page_sk]
-                                                                    BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
-                                                                      Project [ws_web_page_sk,ws_ext_sales_price,ws_net_profit]
-                                                                        BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                          Filter [ws_sold_date_sk,ws_web_page_sk]
-                                                                            ColumnarToRow
-                                                                              InputAdapter
-                                                                                Scan parquet default.web_sales [ws_sold_date_sk,ws_web_page_sk,ws_ext_sales_price,ws_net_profit]
-                                                                          InputAdapter
-                                                                            ReusedExchange [d_date_sk] #5
-                                                                      InputAdapter
-                                                                        BroadcastExchange #13
-                                                                          WholeStageCodegen (17)
-                                                                            Filter [wp_web_page_sk]
-                                                                              ColumnarToRow
-                                                                                InputAdapter
-                                                                                  Scan parquet default.web_page [wp_web_page_sk]
-                                                        InputAdapter
-                                                          BroadcastExchange #14
-                                                            WholeStageCodegen (22)
-                                                              HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(wr_return_amt)),sum(UnscaledValue(wr_net_loss)),returns,profit_loss,sum,sum]
+                                                            ReusedExchange [d_date_sk] #4
+                                                      InputAdapter
+                                                        BroadcastExchange #12
+                                                          WholeStageCodegen (17)
+                                                            Filter [wp_web_page_sk]
+                                                              ColumnarToRow
                                                                 InputAdapter
-                                                                  Exchange [wp_web_page_sk] #15
-                                                                    WholeStageCodegen (21)
-                                                                      HashAggregate [wp_web_page_sk,wr_return_amt,wr_net_loss] [sum,sum,sum,sum]
-                                                                        Project [wr_return_amt,wr_net_loss,wp_web_page_sk]
-                                                                          BroadcastHashJoin [wr_returned_date_sk,d_date_sk]
-                                                                            Project [wr_returned_date_sk,wr_return_amt,wr_net_loss,wp_web_page_sk]
-                                                                              BroadcastHashJoin [wr_web_page_sk,wp_web_page_sk]
-                                                                                Filter [wr_returned_date_sk,wr_web_page_sk]
-                                                                                  ColumnarToRow
-                                                                                    InputAdapter
-                                                                                      Scan parquet default.web_returns [wr_returned_date_sk,wr_web_page_sk,wr_return_amt,wr_net_loss]
-                                                                                InputAdapter
-                                                                                  ReusedExchange [wp_web_page_sk] #13
-                                                                            InputAdapter
-                                                                              ReusedExchange [d_date_sk] #5
-                                  WholeStageCodegen (51)
-                                    HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
-                                      InputAdapter
-                                        Exchange [channel] #16
-                                          WholeStageCodegen (50)
-                                            HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
-                                              HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                                                                  Scan parquet default.web_page [wp_web_page_sk]
+                                        InputAdapter
+                                          BroadcastExchange #13
+                                            WholeStageCodegen (22)
+                                              HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(wr_return_amt)),sum(UnscaledValue(wr_net_loss)),returns,profit_loss,sum,sum]
                                                 InputAdapter
-                                                  ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #3
-                  WholeStageCodegen (79)
+                                                  Exchange [wp_web_page_sk] #14
+                                                    WholeStageCodegen (21)
+                                                      HashAggregate [wp_web_page_sk,wr_return_amt,wr_net_loss] [sum,sum,sum,sum]
+                                                        Project [wr_return_amt,wr_net_loss,wp_web_page_sk]
+                                                          BroadcastHashJoin [wr_returned_date_sk,d_date_sk]
+                                                            Project [wr_returned_date_sk,wr_return_amt,wr_net_loss,wp_web_page_sk]
+                                                              BroadcastHashJoin [wr_web_page_sk,wp_web_page_sk]
+                                                                Filter [wr_returned_date_sk,wr_web_page_sk]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet default.web_returns [wr_returned_date_sk,wr_web_page_sk,wr_return_amt,wr_net_loss]
+                                                                InputAdapter
+                                                                  ReusedExchange [wp_web_page_sk] #12
+                                                            InputAdapter
+                                                              ReusedExchange [d_date_sk] #4
+                  WholeStageCodegen (51)
+                    HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                      InputAdapter
+                        Exchange [channel] #15
+                          WholeStageCodegen (50)
+                            HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                              HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                                InputAdapter
+                                  ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2
+                  WholeStageCodegen (77)
                     HashAggregate [sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),channel,id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
-                        Exchange #17
-                          WholeStageCodegen (78)
+                        Exchange #16
+                          WholeStageCodegen (76)
                             HashAggregate [sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                               HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                                 InputAdapter
-                                  ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #3
+                                  ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/explain.txt
index c18698ebc5b45..2d3ca673c2b08 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/explain.txt
@@ -1,112 +1,108 @@
 == Physical Plan ==
-TakeOrderedAndProject (108)
-+- * HashAggregate (107)
-   +- Exchange (106)
-      +- * HashAggregate (105)
-         +- Union (104)
-            :- * HashAggregate (98)
-            :  +- Exchange (97)
-            :     +- * HashAggregate (96)
-            :        +- Union (95)
-            :           :- * HashAggregate (89)
-            :           :  +- Exchange (88)
-            :           :     +- * HashAggregate (87)
-            :           :        +- Union (86)
-            :           :           :- * Project (34)
-            :           :           :  +- * BroadcastHashJoin LeftOuter BuildRight (33)
-            :           :           :     :- * HashAggregate (19)
-            :           :           :     :  +- Exchange (18)
-            :           :           :     :     +- * HashAggregate (17)
-            :           :           :     :        +- * Project (16)
-            :           :           :     :           +- * BroadcastHashJoin Inner BuildRight (15)
-            :           :           :     :              :- * Project (10)
-            :           :           :     :              :  +- * BroadcastHashJoin Inner BuildRight (9)
-            :           :           :     :              :     :- * Filter (3)
-            :           :           :     :              :     :  +- * ColumnarToRow (2)
-            :           :           :     :              :     :     +- Scan parquet default.store_sales (1)
-            :           :           :     :              :     +- BroadcastExchange (8)
-            :           :           :     :              :        +- * Project (7)
-            :           :           :     :              :           +- * Filter (6)
-            :           :           :     :              :              +- * ColumnarToRow (5)
-            :           :           :     :              :                 +- Scan parquet default.date_dim (4)
-            :           :           :     :              +- BroadcastExchange (14)
-            :           :           :     :                 +- * Filter (13)
-            :           :           :     :                    +- * ColumnarToRow (12)
-            :           :           :     :                       +- Scan parquet default.store (11)
-            :           :           :     +- BroadcastExchange (32)
-            :           :           :        +- * HashAggregate (31)
-            :           :           :           +- Exchange (30)
-            :           :           :              +- * HashAggregate (29)
-            :           :           :                 +- * Project (28)
-            :           :           :                    +- * BroadcastHashJoin Inner BuildRight (27)
-            :           :           :                       :- * Project (25)
-            :           :           :                       :  +- * BroadcastHashJoin Inner BuildRight (24)
-            :           :           :                       :     :- * Filter (22)
-            :           :           :                       :     :  +- * ColumnarToRow (21)
-            :           :           :                       :     :     +- Scan parquet default.store_returns (20)
-            :           :           :                       :     +- ReusedExchange (23)
-            :           :           :                       +- ReusedExchange (26)
-            :           :           :- * Project (55)
-            :           :           :  +- BroadcastNestedLoopJoin Inner BuildLeft (54)
-            :           :           :     :- BroadcastExchange (44)
-            :           :           :     :  +- * HashAggregate (43)
-            :           :           :     :     +- Exchange (42)
-            :           :           :     :        +- * HashAggregate (41)
-            :           :           :     :           +- * Project (40)
-            :           :           :     :              +- * BroadcastHashJoin Inner BuildRight (39)
-            :           :           :     :                 :- * Filter (37)
-            :           :           :     :                 :  +- * ColumnarToRow (36)
-            :           :           :     :                 :     +- Scan parquet default.catalog_sales (35)
-            :           :           :     :                 +- ReusedExchange (38)
-            :           :           :     +- * HashAggregate (53)
-            :           :           :        +- Exchange (52)
-            :           :           :           +- * HashAggregate (51)
-            :           :           :              +- * Project (50)
-            :           :           :                 +- * BroadcastHashJoin Inner BuildRight (49)
-            :           :           :                    :- * Filter (47)
-            :           :           :                    :  +- * ColumnarToRow (46)
-            :           :           :                    :     +- Scan parquet default.catalog_returns (45)
-            :           :           :                    +- ReusedExchange (48)
-            :           :           +- * Project (85)
-            :           :              +- * BroadcastHashJoin LeftOuter BuildRight (84)
-            :           :                 :- * HashAggregate (70)
-            :           :                 :  +- Exchange (69)
-            :           :                 :     +- * HashAggregate (68)
-            :           :                 :        +- * Project (67)
-            :           :                 :           +- * BroadcastHashJoin Inner BuildRight (66)
-            :           :                 :              :- * Project (61)
-            :           :                 :              :  +- * BroadcastHashJoin Inner BuildRight (60)
-            :           :                 :              :     :- * Filter (58)
-            :           :                 :              :     :  +- * ColumnarToRow (57)
-            :           :                 :              :     :     +- Scan parquet default.web_sales (56)
-            :           :                 :              :     +- ReusedExchange (59)
-            :           :                 :              +- BroadcastExchange (65)
-            :           :                 :                 +- * Filter (64)
-            :           :                 :                    +- * ColumnarToRow (63)
-            :           :                 :                       +- Scan parquet default.web_page (62)
-            :           :                 +- BroadcastExchange (83)
-            :           :                    +- * HashAggregate (82)
-            :           :                       +- Exchange (81)
-            :           :                          +- * HashAggregate (80)
-            :           :                             +- * Project (79)
-            :           :                                +- * BroadcastHashJoin Inner BuildRight (78)
-            :           :                                   :- * Project (76)
-            :           :                                   :  +- * BroadcastHashJoin Inner BuildRight (75)
-            :           :                                   :     :- * Filter (73)
-            :           :                                   :     :  +- * ColumnarToRow (72)
-            :           :                                   :     :     +- Scan parquet default.web_returns (71)
-            :           :                                   :     +- ReusedExchange (74)
-            :           :                                   +- ReusedExchange (77)
-            :           +- * HashAggregate (94)
-            :              +- Exchange (93)
-            :                 +- * HashAggregate (92)
-            :                    +- * HashAggregate (91)
-            :                       +- ReusedExchange (90)
-            +- * HashAggregate (103)
-               +- Exchange (102)
-                  +- * HashAggregate (101)
-                     +- * HashAggregate (100)
-                        +- ReusedExchange (99)
+TakeOrderedAndProject (104)
++- * HashAggregate (103)
+   +- Exchange (102)
+      +- * HashAggregate (101)
+         +- Union (100)
+            :- * HashAggregate (89)
+            :  +- Exchange (88)
+            :     +- * HashAggregate (87)
+            :        +- Union (86)
+            :           :- * Project (34)
+            :           :  +- * BroadcastHashJoin LeftOuter BuildRight (33)
+            :           :     :- * HashAggregate (19)
+            :           :     :  +- Exchange (18)
+            :           :     :     +- * HashAggregate (17)
+            :           :     :        +- * Project (16)
+            :           :     :           +- * BroadcastHashJoin Inner BuildRight (15)
+            :           :     :              :- * Project (10)
+            :           :     :              :  +- * BroadcastHashJoin Inner BuildRight (9)
+            :           :     :              :     :- * Filter (3)
+            :           :     :              :     :  +- * ColumnarToRow (2)
+            :           :     :              :     :     +- Scan parquet default.store_sales (1)
+            :           :     :              :     +- BroadcastExchange (8)
+            :           :     :              :        +- * Project (7)
+            :           :     :              :           +- * Filter (6)
+            :           :     :              :              +- * ColumnarToRow (5)
+            :           :     :              :                 +- Scan parquet default.date_dim (4)
+            :           :     :              +- BroadcastExchange (14)
+            :           :     :                 +- * Filter (13)
+            :           :     :                    +- * ColumnarToRow (12)
+            :           :     :                       +- Scan parquet default.store (11)
+            :           :     +- BroadcastExchange (32)
+            :           :        +- * HashAggregate (31)
+            :           :           +- Exchange (30)
+            :           :              +- * HashAggregate (29)
+            :           :                 +- * Project (28)
+            :           :                    +- * BroadcastHashJoin Inner BuildRight (27)
+            :           :                       :- * Project (25)
+            :           :                       :  +- * BroadcastHashJoin Inner BuildRight (24)
+            :           :                       :     :- * Filter (22)
+            :           :                       :     :  +- * ColumnarToRow (21)
+            :           :                       :     :     +- Scan parquet default.store_returns (20)
+            :           :                       :     +- ReusedExchange (23)
+            :           :                       +- ReusedExchange (26)
+            :           :- * Project (55)
+            :           :  +- BroadcastNestedLoopJoin Inner BuildLeft (54)
+            :           :     :- BroadcastExchange (44)
+            :           :     :  +- * HashAggregate (43)
+            :           :     :     +- Exchange (42)
+            :           :     :        +- * HashAggregate (41)
+            :           :     :           +- * Project (40)
+            :           :     :              +- * BroadcastHashJoin Inner BuildRight (39)
+            :           :     :                 :- * Filter (37)
+            :           :     :                 :  +- * ColumnarToRow (36)
+            :           :     :                 :     +- Scan parquet default.catalog_sales (35)
+            :           :     :                 +- ReusedExchange (38)
+            :           :     +- * HashAggregate (53)
+            :           :        +- Exchange (52)
+            :           :           +- * HashAggregate (51)
+            :           :              +- * Project (50)
+            :           :                 +- * BroadcastHashJoin Inner BuildRight (49)
+            :           :                    :- * Filter (47)
+            :           :                    :  +- * ColumnarToRow (46)
+            :           :                    :     +- Scan parquet default.catalog_returns (45)
+            :           :                    +- ReusedExchange (48)
+            :           +- * Project (85)
+            :              +- * BroadcastHashJoin LeftOuter BuildRight (84)
+            :                 :- * HashAggregate (70)
+            :                 :  +- Exchange (69)
+            :                 :     +- * HashAggregate (68)
+            :                 :        +- * Project (67)
+            :                 :           +- * BroadcastHashJoin Inner BuildRight (66)
+            :                 :              :- * Project (61)
+            :                 :              :  +- * BroadcastHashJoin Inner BuildRight (60)
+            :                 :              :     :- * Filter (58)
+            :                 :              :     :  +- * ColumnarToRow (57)
+            :                 :              :     :     +- Scan parquet default.web_sales (56)
+            :                 :              :     +- ReusedExchange (59)
+            :                 :              +- BroadcastExchange (65)
+            :                 :                 +- * Filter (64)
+            :                 :                    +- * ColumnarToRow (63)
+            :                 :                       +- Scan parquet default.web_page (62)
+            :                 +- BroadcastExchange (83)
+            :                    +- * HashAggregate (82)
+            :                       +- Exchange (81)
+            :                          +- * HashAggregate (80)
+            :                             +- * Project (79)
+            :                                +- * BroadcastHashJoin Inner BuildRight (78)
+            :                                   :- * Project (76)
+            :                                   :  +- * BroadcastHashJoin Inner BuildRight (75)
+            :                                   :     :- * Filter (73)
+            :                                   :     :  +- * ColumnarToRow (72)
+            :                                   :     :     +- Scan parquet default.web_returns (71)
+            :                                   :     +- ReusedExchange (74)
+            :                                   +- ReusedExchange (77)
+            :- * HashAggregate (94)
+            :  +- Exchange (93)
+            :     +- * HashAggregate (92)
+            :        +- * HashAggregate (91)
+            :           +- ReusedExchange (90)
+            +- * HashAggregate (99)
+               +- Exchange (98)
+                  +- * HashAggregate (97)
+                     +- * HashAggregate (96)
+                        +- ReusedExchange (95)
 
 
 (1) Scan parquet default.store_sales
@@ -190,7 +186,7 @@ Results [3]: [s_store_sk#8, sum#12, sum#13]
 
 (18) Exchange
 Input [3]: [s_store_sk#8, sum#12, sum#13]
-Arguments: hashpartitioning(s_store_sk#8, 5), true, [id=#14]
+Arguments: hashpartitioning(s_store_sk#8, 5), ENSURE_REQUIREMENTS, [id=#14]
 
 (19) HashAggregate [codegen id : 8]
 Input [3]: [s_store_sk#8, sum#12, sum#13]
@@ -246,7 +242,7 @@ Results [3]: [s_store_sk#23, sum#26, sum#27]
 
 (30) Exchange
 Input [3]: [s_store_sk#23, sum#26, sum#27]
-Arguments: hashpartitioning(s_store_sk#23, 5), true, [id=#28]
+Arguments: hashpartitioning(s_store_sk#23, 5), ENSURE_REQUIREMENTS, [id=#28]
 
 (31) HashAggregate [codegen id : 7]
 Input [3]: [s_store_sk#23, sum#26, sum#27]
@@ -303,7 +299,7 @@ Results [3]: [cs_call_center_sk#39, sum#44, sum#45]
 
 (42) Exchange
 Input [3]: [cs_call_center_sk#39, sum#44, sum#45]
-Arguments: hashpartitioning(cs_call_center_sk#39, 5), true, [id=#46]
+Arguments: hashpartitioning(cs_call_center_sk#39, 5), ENSURE_REQUIREMENTS, [id=#46]
 
 (43) HashAggregate [codegen id : 11]
 Input [3]: [cs_call_center_sk#39, sum#44, sum#45]
@@ -351,7 +347,7 @@ Results [2]: [sum#57, sum#58]
 
 (52) Exchange
 Input [2]: [sum#57, sum#58]
-Arguments: SinglePartition, true, [id=#59]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#59]
 
 (53) HashAggregate [codegen id : 14]
 Input [2]: [sum#57, sum#58]
@@ -429,7 +425,7 @@ Results [3]: [wp_web_page_sk#71, sum#75, sum#76]
 
 (69) Exchange
 Input [3]: [wp_web_page_sk#71, sum#75, sum#76]
-Arguments: hashpartitioning(wp_web_page_sk#71, 5), true, [id=#77]
+Arguments: hashpartitioning(wp_web_page_sk#71, 5), ENSURE_REQUIREMENTS, [id=#77]
 
 (70) HashAggregate [codegen id : 23]
 Input [3]: [wp_web_page_sk#71, sum#75, sum#76]
@@ -485,7 +481,7 @@ Results [3]: [wp_web_page_sk#86, sum#89, sum#90]
 
 (81) Exchange
 Input [3]: [wp_web_page_sk#86, sum#89, sum#90]
-Arguments: hashpartitioning(wp_web_page_sk#86, 5), true, [id=#91]
+Arguments: hashpartitioning(wp_web_page_sk#86, 5), ENSURE_REQUIREMENTS, [id=#91]
 
 (82) HashAggregate [codegen id : 22]
 Input [3]: [wp_web_page_sk#86, sum#89, sum#90]
@@ -518,7 +514,7 @@ Results [8]: [channel#34, id#35, sum#107, isEmpty#108, sum#109, isEmpty#110, sum
 
 (88) Exchange
 Input [8]: [channel#34, id#35, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112]
-Arguments: hashpartitioning(channel#34, id#35, 5), true, [id=#113]
+Arguments: hashpartitioning(channel#34, id#35, 5), ENSURE_REQUIREMENTS, [id=#113]
 
 (89) HashAggregate [codegen id : 25]
 Input [8]: [channel#34, id#35, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112]
@@ -546,7 +542,7 @@ Results [7]: [channel#34, sum#139, isEmpty#140, sum#141, isEmpty#142, sum#143, i
 
 (93) Exchange
 Input [7]: [channel#34, sum#139, isEmpty#140, sum#141, isEmpty#142, sum#143, isEmpty#144]
-Arguments: hashpartitioning(channel#34, 5), true, [id=#145]
+Arguments: hashpartitioning(channel#34, 5), ENSURE_REQUIREMENTS, [id=#145]
 
 (94) HashAggregate [codegen id : 51]
 Input [7]: [channel#34, sum#139, isEmpty#140, sum#141, isEmpty#142, sum#143, isEmpty#144]
@@ -555,75 +551,55 @@ Functions [3]: [sum(sales#130), sum(returns#131), sum(profit#132)]
 Aggregate Attributes [3]: [sum(sales#130)#146, sum(returns#131)#147, sum(profit#132)#148]
 Results [5]: [channel#34, null AS id#149, sum(sales#130)#146 AS sales#150, sum(returns#131)#147 AS returns#151, sum(profit#132)#148 AS profit#152]
 
-(95) Union
+(95) ReusedExchange [Reuses operator id: 88]
+Output [8]: [channel#34, id#35, sum#153, isEmpty#154, sum#155, isEmpty#156, sum#157, isEmpty#158]
 
-(96) HashAggregate [codegen id : 52]
-Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
-Keys [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
-Functions: []
-Aggregate Attributes: []
-Results [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
-
-(97) Exchange
-Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
-Arguments: hashpartitioning(channel#34, id#35, sales#117, returns#118, profit#119, 5), true, [id=#153]
-
-(98) HashAggregate [codegen id : 53]
-Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
-Keys [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
-Functions: []
-Aggregate Attributes: []
-Results [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
-
-(99) ReusedExchange [Reuses operator id: 88]
-Output [8]: [channel#34, id#35, sum#154, isEmpty#155, sum#156, isEmpty#157, sum#158, isEmpty#159]
-
-(100) HashAggregate [codegen id : 78]
-Input [8]: [channel#34, id#35, sum#154, isEmpty#155, sum#156, isEmpty#157, sum#158, isEmpty#159]
+(96) HashAggregate [codegen id : 76]
+Input [8]: [channel#34, id#35, sum#153, isEmpty#154, sum#155, isEmpty#156, sum#157, isEmpty#158]
 Keys [2]: [channel#34, id#35]
-Functions [3]: [sum(sales#17), sum(returns#36), sum(profit#160)]
-Aggregate Attributes [3]: [sum(sales#17)#161, sum(returns#36)#162, sum(profit#160)#163]
-Results [3]: [sum(sales#17)#161 AS sales#130, sum(returns#36)#162 AS returns#131, sum(profit#160)#163 AS profit#132]
+Functions [3]: [sum(sales#17), sum(returns#36), sum(profit#159)]
+Aggregate Attributes [3]: [sum(sales#17)#160, sum(returns#36)#161, sum(profit#159)#162]
+Results [3]: [sum(sales#17)#160 AS sales#130, sum(returns#36)#161 AS returns#131, sum(profit#159)#162 AS profit#132]
 
-(101) HashAggregate [codegen id : 78]
+(97) HashAggregate [codegen id : 76]
 Input [3]: [sales#130, returns#131, profit#132]
 Keys: []
 Functions [3]: [partial_sum(sales#130), partial_sum(returns#131), partial_sum(profit#132)]
-Aggregate Attributes [6]: [sum#164, isEmpty#165, sum#166, isEmpty#167, sum#168, isEmpty#169]
-Results [6]: [sum#170, isEmpty#171, sum#172, isEmpty#173, sum#174, isEmpty#175]
+Aggregate Attributes [6]: [sum#163, isEmpty#164, sum#165, isEmpty#166, sum#167, isEmpty#168]
+Results [6]: [sum#169, isEmpty#170, sum#171, isEmpty#172, sum#173, isEmpty#174]
 
-(102) Exchange
-Input [6]: [sum#170, isEmpty#171, sum#172, isEmpty#173, sum#174, isEmpty#175]
-Arguments: SinglePartition, true, [id=#176]
+(98) Exchange
+Input [6]: [sum#169, isEmpty#170, sum#171, isEmpty#172, sum#173, isEmpty#174]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#175]
 
-(103) HashAggregate [codegen id : 79]
-Input [6]: [sum#170, isEmpty#171, sum#172, isEmpty#173, sum#174, isEmpty#175]
+(99) HashAggregate [codegen id : 77]
+Input [6]: [sum#169, isEmpty#170, sum#171, isEmpty#172, sum#173, isEmpty#174]
 Keys: []
 Functions [3]: [sum(sales#130), sum(returns#131), sum(profit#132)]
-Aggregate Attributes [3]: [sum(sales#130)#177, sum(returns#131)#178, sum(profit#132)#179]
-Results [5]: [null AS channel#180, null AS id#181, sum(sales#130)#177 AS sales#182, sum(returns#131)#178 AS returns#183, sum(profit#132)#179 AS profit#184]
+Aggregate Attributes [3]: [sum(sales#130)#176, sum(returns#131)#177, sum(profit#132)#178]
+Results [5]: [null AS channel#179, null AS id#180, sum(sales#130)#176 AS sales#181, sum(returns#131)#177 AS returns#182, sum(profit#132)#178 AS profit#183]
 
-(104) Union
+(100) Union
 
-(105) HashAggregate [codegen id : 80]
+(101) HashAggregate [codegen id : 78]
 Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
 Keys [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
 Functions: []
 Aggregate Attributes: []
 Results [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
 
-(106) Exchange
+(102) Exchange
 Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
-Arguments: hashpartitioning(channel#34, id#35, sales#117, returns#118, profit#119, 5), true, [id=#185]
+Arguments: hashpartitioning(channel#34, id#35, sales#117, returns#118, profit#119, 5), ENSURE_REQUIREMENTS, [id=#184]
 
-(107) HashAggregate [codegen id : 81]
+(103) HashAggregate [codegen id : 79]
 Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
 Keys [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
 Functions: []
 Aggregate Attributes: []
 Results [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
 
-(108) TakeOrderedAndProject
+(104) TakeOrderedAndProject
 Input [5]: [channel#34, id#35, sales#117, returns#118, profit#119]
 Arguments: 100, [channel#34 ASC NULLS FIRST, id#35 ASC NULLS FIRST], [channel#34, id#35, sales#117, returns#118, profit#119]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/simplified.txt
index 864039e512231..47b743fee91dd 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/simplified.txt
@@ -1,172 +1,164 @@
 TakeOrderedAndProject [channel,id,sales,returns,profit]
-  WholeStageCodegen (81)
+  WholeStageCodegen (79)
     HashAggregate [channel,id,sales,returns,profit]
       InputAdapter
         Exchange [channel,id,sales,returns,profit] #1
-          WholeStageCodegen (80)
+          WholeStageCodegen (78)
             HashAggregate [channel,id,sales,returns,profit]
               InputAdapter
                 Union
-                  WholeStageCodegen (53)
-                    HashAggregate [channel,id,sales,returns,profit]
+                  WholeStageCodegen (25)
+                    HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
-                        Exchange [channel,id,sales,returns,profit] #2
-                          WholeStageCodegen (52)
-                            HashAggregate [channel,id,sales,returns,profit]
+                        Exchange [channel,id] #2
+                          WholeStageCodegen (24)
+                            HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                               InputAdapter
                                 Union
-                                  WholeStageCodegen (25)
-                                    HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
-                                      InputAdapter
-                                        Exchange [channel,id] #3
-                                          WholeStageCodegen (24)
-                                            HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
-                                              InputAdapter
-                                                Union
-                                                  WholeStageCodegen (8)
-                                                    Project [s_store_sk,sales,returns,profit,profit_loss]
-                                                      BroadcastHashJoin [s_store_sk,s_store_sk]
-                                                        HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit)),sales,profit,sum,sum]
+                                  WholeStageCodegen (8)
+                                    Project [s_store_sk,sales,returns,profit,profit_loss]
+                                      BroadcastHashJoin [s_store_sk,s_store_sk]
+                                        HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(ss_ext_sales_price)),sum(UnscaledValue(ss_net_profit)),sales,profit,sum,sum]
+                                          InputAdapter
+                                            Exchange [s_store_sk] #3
+                                              WholeStageCodegen (3)
+                                                HashAggregate [s_store_sk,ss_ext_sales_price,ss_net_profit] [sum,sum,sum,sum]
+                                                  Project [ss_ext_sales_price,ss_net_profit,s_store_sk]
+                                                    BroadcastHashJoin [ss_store_sk,s_store_sk]
+                                                      Project [ss_store_sk,ss_ext_sales_price,ss_net_profit]
+                                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                          Filter [ss_sold_date_sk,ss_store_sk]
+                                                            ColumnarToRow
+                                                              InputAdapter
+                                                                Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit]
                                                           InputAdapter
-                                                            Exchange [s_store_sk] #4
-                                                              WholeStageCodegen (3)
-                                                                HashAggregate [s_store_sk,ss_ext_sales_price,ss_net_profit] [sum,sum,sum,sum]
-                                                                  Project [ss_ext_sales_price,ss_net_profit,s_store_sk]
-                                                                    BroadcastHashJoin [ss_store_sk,s_store_sk]
-                                                                      Project [ss_store_sk,ss_ext_sales_price,ss_net_profit]
-                                                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                          Filter [ss_sold_date_sk,ss_store_sk]
-                                                                            ColumnarToRow
-                                                                              InputAdapter
-                                                                                Scan parquet default.store_sales [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit]
-                                                                          InputAdapter
-                                                                            BroadcastExchange #5
-                                                                              WholeStageCodegen (1)
-                                                                                Project [d_date_sk]
-                                                                                  Filter [d_date,d_date_sk]
-                                                                                    ColumnarToRow
-                                                                                      InputAdapter
-                                                                                        Scan parquet default.date_dim [d_date_sk,d_date]
+                                                            BroadcastExchange #4
+                                                              WholeStageCodegen (1)
+                                                                Project [d_date_sk]
+                                                                  Filter [d_date,d_date_sk]
+                                                                    ColumnarToRow
                                                                       InputAdapter
-                                                                        BroadcastExchange #6
-                                                                          WholeStageCodegen (2)
-                                                                            Filter [s_store_sk]
-                                                                              ColumnarToRow
-                                                                                InputAdapter
-                                                                                  Scan parquet default.store [s_store_sk]
-                                                        InputAdapter
-                                                          BroadcastExchange #7
-                                                            WholeStageCodegen (7)
-                                                              HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss)),returns,profit_loss,sum,sum]
-                                                                InputAdapter
-                                                                  Exchange [s_store_sk] #8
-                                                                    WholeStageCodegen (6)
-                                                                      HashAggregate [s_store_sk,sr_return_amt,sr_net_loss] [sum,sum,sum,sum]
-                                                                        Project [sr_return_amt,sr_net_loss,s_store_sk]
-                                                                          BroadcastHashJoin [sr_store_sk,s_store_sk]
-                                                                            Project [sr_store_sk,sr_return_amt,sr_net_loss]
-                                                                              BroadcastHashJoin [sr_returned_date_sk,d_date_sk]
-                                                                                Filter [sr_returned_date_sk,sr_store_sk]
-                                                                                  ColumnarToRow
-                                                                                    InputAdapter
-                                                                                      Scan parquet default.store_returns [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss]
-                                                                                InputAdapter
-                                                                                  ReusedExchange [d_date_sk] #5
-                                                                            InputAdapter
-                                                                              ReusedExchange [s_store_sk] #6
-                                                  WholeStageCodegen (15)
-                                                    Project [cs_call_center_sk,sales,returns,profit,profit_loss]
+                                                                        Scan parquet default.date_dim [d_date_sk,d_date]
                                                       InputAdapter
-                                                        BroadcastNestedLoopJoin
-                                                          BroadcastExchange #9
-                                                            WholeStageCodegen (11)
-                                                              HashAggregate [cs_call_center_sk,sum,sum] [sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit)),sales,profit,sum,sum]
+                                                        BroadcastExchange #5
+                                                          WholeStageCodegen (2)
+                                                            Filter [s_store_sk]
+                                                              ColumnarToRow
+                                                                InputAdapter
+                                                                  Scan parquet default.store [s_store_sk]
+                                        InputAdapter
+                                          BroadcastExchange #6
+                                            WholeStageCodegen (7)
+                                              HashAggregate [s_store_sk,sum,sum] [sum(UnscaledValue(sr_return_amt)),sum(UnscaledValue(sr_net_loss)),returns,profit_loss,sum,sum]
+                                                InputAdapter
+                                                  Exchange [s_store_sk] #7
+                                                    WholeStageCodegen (6)
+                                                      HashAggregate [s_store_sk,sr_return_amt,sr_net_loss] [sum,sum,sum,sum]
+                                                        Project [sr_return_amt,sr_net_loss,s_store_sk]
+                                                          BroadcastHashJoin [sr_store_sk,s_store_sk]
+                                                            Project [sr_store_sk,sr_return_amt,sr_net_loss]
+                                                              BroadcastHashJoin [sr_returned_date_sk,d_date_sk]
+                                                                Filter [sr_returned_date_sk,sr_store_sk]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet default.store_returns [sr_returned_date_sk,sr_store_sk,sr_return_amt,sr_net_loss]
                                                                 InputAdapter
-                                                                  Exchange [cs_call_center_sk] #10
-                                                                    WholeStageCodegen (10)
-                                                                      HashAggregate [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] [sum,sum,sum,sum]
-                                                                        Project [cs_call_center_sk,cs_ext_sales_price,cs_net_profit]
-                                                                          BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                            Filter [cs_sold_date_sk]
-                                                                              ColumnarToRow
-                                                                                InputAdapter
-                                                                                  Scan parquet default.catalog_sales [cs_sold_date_sk,cs_call_center_sk,cs_ext_sales_price,cs_net_profit]
-                                                                            InputAdapter
-                                                                              ReusedExchange [d_date_sk] #5
-                                                          WholeStageCodegen (14)
-                                                            HashAggregate [sum,sum] [sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss)),returns,profit_loss,sum,sum]
+                                                                  ReusedExchange [d_date_sk] #4
+                                                            InputAdapter
+                                                              ReusedExchange [s_store_sk] #5
+                                  WholeStageCodegen (15)
+                                    Project [cs_call_center_sk,sales,returns,profit,profit_loss]
+                                      InputAdapter
+                                        BroadcastNestedLoopJoin
+                                          BroadcastExchange #8
+                                            WholeStageCodegen (11)
+                                              HashAggregate [cs_call_center_sk,sum,sum] [sum(UnscaledValue(cs_ext_sales_price)),sum(UnscaledValue(cs_net_profit)),sales,profit,sum,sum]
+                                                InputAdapter
+                                                  Exchange [cs_call_center_sk] #9
+                                                    WholeStageCodegen (10)
+                                                      HashAggregate [cs_call_center_sk,cs_ext_sales_price,cs_net_profit] [sum,sum,sum,sum]
+                                                        Project [cs_call_center_sk,cs_ext_sales_price,cs_net_profit]
+                                                          BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                            Filter [cs_sold_date_sk]
+                                                              ColumnarToRow
+                                                                InputAdapter
+                                                                  Scan parquet default.catalog_sales [cs_sold_date_sk,cs_call_center_sk,cs_ext_sales_price,cs_net_profit]
+                                                            InputAdapter
+                                                              ReusedExchange [d_date_sk] #4
+                                          WholeStageCodegen (14)
+                                            HashAggregate [sum,sum] [sum(UnscaledValue(cr_return_amount)),sum(UnscaledValue(cr_net_loss)),returns,profit_loss,sum,sum]
+                                              InputAdapter
+                                                Exchange #10
+                                                  WholeStageCodegen (13)
+                                                    HashAggregate [cr_return_amount,cr_net_loss] [sum,sum,sum,sum]
+                                                      Project [cr_return_amount,cr_net_loss]
+                                                        BroadcastHashJoin [cr_returned_date_sk,d_date_sk]
+                                                          Filter [cr_returned_date_sk]
+                                                            ColumnarToRow
                                                               InputAdapter
-                                                                Exchange #11
-                                                                  WholeStageCodegen (13)
-                                                                    HashAggregate [cr_return_amount,cr_net_loss] [sum,sum,sum,sum]
-                                                                      Project [cr_return_amount,cr_net_loss]
-                                                                        BroadcastHashJoin [cr_returned_date_sk,d_date_sk]
-                                                                          Filter [cr_returned_date_sk]
-                                                                            ColumnarToRow
-                                                                              InputAdapter
-                                                                                Scan parquet default.catalog_returns [cr_returned_date_sk,cr_return_amount,cr_net_loss]
-                                                                          InputAdapter
-                                                                            ReusedExchange [d_date_sk] #5
-                                                  WholeStageCodegen (23)
-                                                    Project [wp_web_page_sk,sales,returns,profit,profit_loss]
-                                                      BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk]
-                                                        HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit)),sales,profit,sum,sum]
+                                                                Scan parquet default.catalog_returns [cr_returned_date_sk,cr_return_amount,cr_net_loss]
                                                           InputAdapter
-                                                            Exchange [wp_web_page_sk] #12
-                                                              WholeStageCodegen (18)
-                                                                HashAggregate [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum]
-                                                                  Project [ws_ext_sales_price,ws_net_profit,wp_web_page_sk]
-                                                                    BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
-                                                                      Project [ws_web_page_sk,ws_ext_sales_price,ws_net_profit]
-                                                                        BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                          Filter [ws_sold_date_sk,ws_web_page_sk]
-                                                                            ColumnarToRow
-                                                                              InputAdapter
-                                                                                Scan parquet default.web_sales [ws_sold_date_sk,ws_web_page_sk,ws_ext_sales_price,ws_net_profit]
-                                                                          InputAdapter
-                                                                            ReusedExchange [d_date_sk] #5
-                                                                      InputAdapter
-                                                                        BroadcastExchange #13
-                                                                          WholeStageCodegen (17)
-                                                                            Filter [wp_web_page_sk]
-                                                                              ColumnarToRow
-                                                                                InputAdapter
-                                                                                  Scan parquet default.web_page [wp_web_page_sk]
-                                                        InputAdapter
-                                                          BroadcastExchange #14
-                                                            WholeStageCodegen (22)
-                                                              HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(wr_return_amt)),sum(UnscaledValue(wr_net_loss)),returns,profit_loss,sum,sum]
+                                                            ReusedExchange [d_date_sk] #4
+                                  WholeStageCodegen (23)
+                                    Project [wp_web_page_sk,sales,returns,profit,profit_loss]
+                                      BroadcastHashJoin [wp_web_page_sk,wp_web_page_sk]
+                                        HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(ws_ext_sales_price)),sum(UnscaledValue(ws_net_profit)),sales,profit,sum,sum]
+                                          InputAdapter
+                                            Exchange [wp_web_page_sk] #11
+                                              WholeStageCodegen (18)
+                                                HashAggregate [wp_web_page_sk,ws_ext_sales_price,ws_net_profit] [sum,sum,sum,sum]
+                                                  Project [ws_ext_sales_price,ws_net_profit,wp_web_page_sk]
+                                                    BroadcastHashJoin [ws_web_page_sk,wp_web_page_sk]
+                                                      Project [ws_web_page_sk,ws_ext_sales_price,ws_net_profit]
+                                                        BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                          Filter [ws_sold_date_sk,ws_web_page_sk]
+                                                            ColumnarToRow
+                                                              InputAdapter
+                                                                Scan parquet default.web_sales [ws_sold_date_sk,ws_web_page_sk,ws_ext_sales_price,ws_net_profit]
+                                                          InputAdapter
+                                                            ReusedExchange [d_date_sk] #4
+                                                      InputAdapter
+                                                        BroadcastExchange #12
+                                                          WholeStageCodegen (17)
+                                                            Filter [wp_web_page_sk]
+                                                              ColumnarToRow
                                                                 InputAdapter
-                                                                  Exchange [wp_web_page_sk] #15
-                                                                    WholeStageCodegen (21)
-                                                                      HashAggregate [wp_web_page_sk,wr_return_amt,wr_net_loss] [sum,sum,sum,sum]
-                                                                        Project [wr_return_amt,wr_net_loss,wp_web_page_sk]
-                                                                          BroadcastHashJoin [wr_web_page_sk,wp_web_page_sk]
-                                                                            Project [wr_web_page_sk,wr_return_amt,wr_net_loss]
-                                                                              BroadcastHashJoin [wr_returned_date_sk,d_date_sk]
-                                                                                Filter [wr_returned_date_sk,wr_web_page_sk]
-                                                                                  ColumnarToRow
-                                                                                    InputAdapter
-                                                                                      Scan parquet default.web_returns [wr_returned_date_sk,wr_web_page_sk,wr_return_amt,wr_net_loss]
-                                                                                InputAdapter
-                                                                                  ReusedExchange [d_date_sk] #5
-                                                                            InputAdapter
-                                                                              ReusedExchange [wp_web_page_sk] #13
-                                  WholeStageCodegen (51)
-                                    HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
-                                      InputAdapter
-                                        Exchange [channel] #16
-                                          WholeStageCodegen (50)
-                                            HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
-                                              HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                                                                  Scan parquet default.web_page [wp_web_page_sk]
+                                        InputAdapter
+                                          BroadcastExchange #13
+                                            WholeStageCodegen (22)
+                                              HashAggregate [wp_web_page_sk,sum,sum] [sum(UnscaledValue(wr_return_amt)),sum(UnscaledValue(wr_net_loss)),returns,profit_loss,sum,sum]
                                                 InputAdapter
-                                                  ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #3
-                  WholeStageCodegen (79)
+                                                  Exchange [wp_web_page_sk] #14
+                                                    WholeStageCodegen (21)
+                                                      HashAggregate [wp_web_page_sk,wr_return_amt,wr_net_loss] [sum,sum,sum,sum]
+                                                        Project [wr_return_amt,wr_net_loss,wp_web_page_sk]
+                                                          BroadcastHashJoin [wr_web_page_sk,wp_web_page_sk]
+                                                            Project [wr_web_page_sk,wr_return_amt,wr_net_loss]
+                                                              BroadcastHashJoin [wr_returned_date_sk,d_date_sk]
+                                                                Filter [wr_returned_date_sk,wr_web_page_sk]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet default.web_returns [wr_returned_date_sk,wr_web_page_sk,wr_return_amt,wr_net_loss]
+                                                                InputAdapter
+                                                                  ReusedExchange [d_date_sk] #4
+                                                            InputAdapter
+                                                              ReusedExchange [wp_web_page_sk] #12
+                  WholeStageCodegen (51)
+                    HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                      InputAdapter
+                        Exchange [channel] #15
+                          WholeStageCodegen (50)
+                            HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                              HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                                InputAdapter
+                                  ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2
+                  WholeStageCodegen (77)
                     HashAggregate [sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),channel,id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
-                        Exchange #17
-                          WholeStageCodegen (78)
+                        Exchange #16
+                          WholeStageCodegen (76)
                             HashAggregate [sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                               HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                                 InputAdapter
-                                  ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #3
+                                  ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt
index 025e5a6f94741..4aa23cbe8b905 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt
@@ -1,129 +1,125 @@
 == Physical Plan ==
-TakeOrderedAndProject (125)
-+- * HashAggregate (124)
-   +- Exchange (123)
-      +- * HashAggregate (122)
-         +- Union (121)
-            :- * HashAggregate (115)
-            :  +- Exchange (114)
-            :     +- * HashAggregate (113)
-            :        +- Union (112)
-            :           :- * HashAggregate (106)
-            :           :  +- Exchange (105)
-            :           :     +- * HashAggregate (104)
-            :           :        +- Union (103)
-            :           :           :- * HashAggregate (42)
-            :           :           :  +- Exchange (41)
-            :           :           :     +- * HashAggregate (40)
-            :           :           :        +- * Project (39)
-            :           :           :           +- * BroadcastHashJoin Inner BuildRight (38)
-            :           :           :              :- * Project (33)
-            :           :           :              :  +- * BroadcastHashJoin Inner BuildRight (32)
-            :           :           :              :     :- * Project (26)
-            :           :           :              :     :  +- * BroadcastHashJoin Inner BuildRight (25)
-            :           :           :              :     :     :- * Project (19)
-            :           :           :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (18)
-            :           :           :              :     :     :     :- * Project (12)
-            :           :           :              :     :     :     :  +- SortMergeJoin LeftOuter (11)
-            :           :           :              :     :     :     :     :- * Sort (5)
-            :           :           :              :     :     :     :     :  +- Exchange (4)
-            :           :           :              :     :     :     :     :     +- * Filter (3)
-            :           :           :              :     :     :     :     :        +- * ColumnarToRow (2)
-            :           :           :              :     :     :     :     :           +- Scan parquet default.store_sales (1)
-            :           :           :              :     :     :     :     +- * Sort (10)
-            :           :           :              :     :     :     :        +- Exchange (9)
-            :           :           :              :     :     :     :           +- * Filter (8)
-            :           :           :              :     :     :     :              +- * ColumnarToRow (7)
-            :           :           :              :     :     :     :                 +- Scan parquet default.store_returns (6)
-            :           :           :              :     :     :     +- BroadcastExchange (17)
-            :           :           :              :     :     :        +- * Project (16)
-            :           :           :              :     :     :           +- * Filter (15)
-            :           :           :              :     :     :              +- * ColumnarToRow (14)
-            :           :           :              :     :     :                 +- Scan parquet default.item (13)
-            :           :           :              :     :     +- BroadcastExchange (24)
-            :           :           :              :     :        +- * Project (23)
-            :           :           :              :     :           +- * Filter (22)
-            :           :           :              :     :              +- * ColumnarToRow (21)
-            :           :           :              :     :                 +- Scan parquet default.promotion (20)
-            :           :           :              :     +- BroadcastExchange (31)
-            :           :           :              :        +- * Project (30)
-            :           :           :              :           +- * Filter (29)
-            :           :           :              :              +- * ColumnarToRow (28)
-            :           :           :              :                 +- Scan parquet default.date_dim (27)
-            :           :           :              +- BroadcastExchange (37)
-            :           :           :                 +- * Filter (36)
-            :           :           :                    +- * ColumnarToRow (35)
-            :           :           :                       +- Scan parquet default.store (34)
-            :           :           :- * HashAggregate (72)
-            :           :           :  +- Exchange (71)
-            :           :           :     +- * HashAggregate (70)
-            :           :           :        +- * Project (69)
-            :           :           :           +- * BroadcastHashJoin Inner BuildRight (68)
-            :           :           :              :- * Project (63)
-            :           :           :              :  +- * BroadcastHashJoin Inner BuildRight (62)
-            :           :           :              :     :- * Project (60)
-            :           :           :              :     :  +- * BroadcastHashJoin Inner BuildRight (59)
-            :           :           :              :     :     :- * Project (57)
-            :           :           :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (56)
-            :           :           :              :     :     :     :- * Project (54)
-            :           :           :              :     :     :     :  +- SortMergeJoin LeftOuter (53)
-            :           :           :              :     :     :     :     :- * Sort (47)
-            :           :           :              :     :     :     :     :  +- Exchange (46)
-            :           :           :              :     :     :     :     :     +- * Filter (45)
-            :           :           :              :     :     :     :     :        +- * ColumnarToRow (44)
-            :           :           :              :     :     :     :     :           +- Scan parquet default.catalog_sales (43)
-            :           :           :              :     :     :     :     +- * Sort (52)
-            :           :           :              :     :     :     :        +- Exchange (51)
-            :           :           :              :     :     :     :           +- * Filter (50)
-            :           :           :              :     :     :     :              +- * ColumnarToRow (49)
-            :           :           :              :     :     :     :                 +- Scan parquet default.catalog_returns (48)
-            :           :           :              :     :     :     +- ReusedExchange (55)
-            :           :           :              :     :     +- ReusedExchange (58)
-            :           :           :              :     +- ReusedExchange (61)
-            :           :           :              +- BroadcastExchange (67)
-            :           :           :                 +- * Filter (66)
-            :           :           :                    +- * ColumnarToRow (65)
-            :           :           :                       +- Scan parquet default.catalog_page (64)
-            :           :           +- * HashAggregate (102)
-            :           :              +- Exchange (101)
-            :           :                 +- * HashAggregate (100)
-            :           :                    +- * Project (99)
-            :           :                       +- * BroadcastHashJoin Inner BuildRight (98)
-            :           :                          :- * Project (93)
-            :           :                          :  +- * BroadcastHashJoin Inner BuildRight (92)
-            :           :                          :     :- * Project (90)
-            :           :                          :     :  +- * BroadcastHashJoin Inner BuildRight (89)
-            :           :                          :     :     :- * Project (87)
-            :           :                          :     :     :  +- * BroadcastHashJoin Inner BuildRight (86)
-            :           :                          :     :     :     :- * Project (84)
-            :           :                          :     :     :     :  +- SortMergeJoin LeftOuter (83)
-            :           :                          :     :     :     :     :- * Sort (77)
-            :           :                          :     :     :     :     :  +- Exchange (76)
-            :           :                          :     :     :     :     :     +- * Filter (75)
-            :           :                          :     :     :     :     :        +- * ColumnarToRow (74)
-            :           :                          :     :     :     :     :           +- Scan parquet default.web_sales (73)
-            :           :                          :     :     :     :     +- * Sort (82)
-            :           :                          :     :     :     :        +- Exchange (81)
-            :           :                          :     :     :     :           +- * Filter (80)
-            :           :                          :     :     :     :              +- * ColumnarToRow (79)
-            :           :                          :     :     :     :                 +- Scan parquet default.web_returns (78)
-            :           :                          :     :     :     +- ReusedExchange (85)
-            :           :                          :     :     +- ReusedExchange (88)
-            :           :                          :     +- ReusedExchange (91)
-            :           :                          +- BroadcastExchange (97)
-            :           :                             +- * Filter (96)
-            :           :                                +- * ColumnarToRow (95)
-            :           :                                   +- Scan parquet default.web_site (94)
-            :           +- * HashAggregate (111)
-            :              +- Exchange (110)
-            :                 +- * HashAggregate (109)
-            :                    +- * HashAggregate (108)
-            :                       +- ReusedExchange (107)
-            +- * HashAggregate (120)
-               +- Exchange (119)
-                  +- * HashAggregate (118)
-                     +- * HashAggregate (117)
-                        +- ReusedExchange (116)
+TakeOrderedAndProject (121)
++- * HashAggregate (120)
+   +- Exchange (119)
+      +- * HashAggregate (118)
+         +- Union (117)
+            :- * HashAggregate (106)
+            :  +- Exchange (105)
+            :     +- * HashAggregate (104)
+            :        +- Union (103)
+            :           :- * HashAggregate (42)
+            :           :  +- Exchange (41)
+            :           :     +- * HashAggregate (40)
+            :           :        +- * Project (39)
+            :           :           +- * BroadcastHashJoin Inner BuildRight (38)
+            :           :              :- * Project (33)
+            :           :              :  +- * BroadcastHashJoin Inner BuildRight (32)
+            :           :              :     :- * Project (26)
+            :           :              :     :  +- * BroadcastHashJoin Inner BuildRight (25)
+            :           :              :     :     :- * Project (19)
+            :           :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (18)
+            :           :              :     :     :     :- * Project (12)
+            :           :              :     :     :     :  +- SortMergeJoin LeftOuter (11)
+            :           :              :     :     :     :     :- * Sort (5)
+            :           :              :     :     :     :     :  +- Exchange (4)
+            :           :              :     :     :     :     :     +- * Filter (3)
+            :           :              :     :     :     :     :        +- * ColumnarToRow (2)
+            :           :              :     :     :     :     :           +- Scan parquet default.store_sales (1)
+            :           :              :     :     :     :     +- * Sort (10)
+            :           :              :     :     :     :        +- Exchange (9)
+            :           :              :     :     :     :           +- * Filter (8)
+            :           :              :     :     :     :              +- * ColumnarToRow (7)
+            :           :              :     :     :     :                 +- Scan parquet default.store_returns (6)
+            :           :              :     :     :     +- BroadcastExchange (17)
+            :           :              :     :     :        +- * Project (16)
+            :           :              :     :     :           +- * Filter (15)
+            :           :              :     :     :              +- * ColumnarToRow (14)
+            :           :              :     :     :                 +- Scan parquet default.item (13)
+            :           :              :     :     +- BroadcastExchange (24)
+            :           :              :     :        +- * Project (23)
+            :           :              :     :           +- * Filter (22)
+            :           :              :     :              +- * ColumnarToRow (21)
+            :           :              :     :                 +- Scan parquet default.promotion (20)
+            :           :              :     +- BroadcastExchange (31)
+            :           :              :        +- * Project (30)
+            :           :              :           +- * Filter (29)
+            :           :              :              +- * ColumnarToRow (28)
+            :           :              :                 +- Scan parquet default.date_dim (27)
+            :           :              +- BroadcastExchange (37)
+            :           :                 +- * Filter (36)
+            :           :                    +- * ColumnarToRow (35)
+            :           :                       +- Scan parquet default.store (34)
+            :           :- * HashAggregate (72)
+            :           :  +- Exchange (71)
+            :           :     +- * HashAggregate (70)
+            :           :        +- * Project (69)
+            :           :           +- * BroadcastHashJoin Inner BuildRight (68)
+            :           :              :- * Project (63)
+            :           :              :  +- * BroadcastHashJoin Inner BuildRight (62)
+            :           :              :     :- * Project (60)
+            :           :              :     :  +- * BroadcastHashJoin Inner BuildRight (59)
+            :           :              :     :     :- * Project (57)
+            :           :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (56)
+            :           :              :     :     :     :- * Project (54)
+            :           :              :     :     :     :  +- SortMergeJoin LeftOuter (53)
+            :           :              :     :     :     :     :- * Sort (47)
+            :           :              :     :     :     :     :  +- Exchange (46)
+            :           :              :     :     :     :     :     +- * Filter (45)
+            :           :              :     :     :     :     :        +- * ColumnarToRow (44)
+            :           :              :     :     :     :     :           +- Scan parquet default.catalog_sales (43)
+            :           :              :     :     :     :     +- * Sort (52)
+            :           :              :     :     :     :        +- Exchange (51)
+            :           :              :     :     :     :           +- * Filter (50)
+            :           :              :     :     :     :              +- * ColumnarToRow (49)
+            :           :              :     :     :     :                 +- Scan parquet default.catalog_returns (48)
+            :           :              :     :     :     +- ReusedExchange (55)
+            :           :              :     :     +- ReusedExchange (58)
+            :           :              :     +- ReusedExchange (61)
+            :           :              +- BroadcastExchange (67)
+            :           :                 +- * Filter (66)
+            :           :                    +- * ColumnarToRow (65)
+            :           :                       +- Scan parquet default.catalog_page (64)
+            :           +- * HashAggregate (102)
+            :              +- Exchange (101)
+            :                 +- * HashAggregate (100)
+            :                    +- * Project (99)
+            :                       +- * BroadcastHashJoin Inner BuildRight (98)
+            :                          :- * Project (93)
+            :                          :  +- * BroadcastHashJoin Inner BuildRight (92)
+            :                          :     :- * Project (90)
+            :                          :     :  +- * BroadcastHashJoin Inner BuildRight (89)
+            :                          :     :     :- * Project (87)
+            :                          :     :     :  +- * BroadcastHashJoin Inner BuildRight (86)
+            :                          :     :     :     :- * Project (84)
+            :                          :     :     :     :  +- SortMergeJoin LeftOuter (83)
+            :                          :     :     :     :     :- * Sort (77)
+            :                          :     :     :     :     :  +- Exchange (76)
+            :                          :     :     :     :     :     +- * Filter (75)
+            :                          :     :     :     :     :        +- * ColumnarToRow (74)
+            :                          :     :     :     :     :           +- Scan parquet default.web_sales (73)
+            :                          :     :     :     :     +- * Sort (82)
+            :                          :     :     :     :        +- Exchange (81)
+            :                          :     :     :     :           +- * Filter (80)
+            :                          :     :     :     :              +- * ColumnarToRow (79)
+            :                          :     :     :     :                 +- Scan parquet default.web_returns (78)
+            :                          :     :     :     +- ReusedExchange (85)
+            :                          :     :     +- ReusedExchange (88)
+            :                          :     +- ReusedExchange (91)
+            :                          +- BroadcastExchange (97)
+            :                             +- * Filter (96)
+            :                                +- * ColumnarToRow (95)
+            :                                   +- Scan parquet default.web_site (94)
+            :- * HashAggregate (111)
+            :  +- Exchange (110)
+            :     +- * HashAggregate (109)
+            :        +- * HashAggregate (108)
+            :           +- ReusedExchange (107)
+            +- * HashAggregate (116)
+               +- Exchange (115)
+                  +- * HashAggregate (114)
+                     +- * HashAggregate (113)
+                        +- ReusedExchange (112)
 
 
 (1) Scan parquet default.store_sales
@@ -142,7 +138,7 @@ Condition : (((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_store_sk#3)) AND is
 
 (4) Exchange
 Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7]
-Arguments: hashpartitioning(cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint), 5), true, [id=#8]
+Arguments: hashpartitioning(cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint), 5), ENSURE_REQUIREMENTS, [id=#8]
 
 (5) Sort [codegen id : 2]
 Input [7]: [ss_sold_date_sk#1, ss_item_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_net_profit#7]
@@ -164,7 +160,7 @@ Condition : (isnotnull(sr_item_sk#9) AND isnotnull(sr_ticket_number#10))
 
 (9) Exchange
 Input [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12]
-Arguments: hashpartitioning(sr_item_sk#9, sr_ticket_number#10, 5), true, [id=#13]
+Arguments: hashpartitioning(sr_item_sk#9, sr_ticket_number#10, 5), ENSURE_REQUIREMENTS, [id=#13]
 
 (10) Sort [codegen id : 4]
 Input [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12]
@@ -308,7 +304,7 @@ Results [6]: [s_store_id#24, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35]
 
 (41) Exchange
 Input [6]: [s_store_id#24, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35]
-Arguments: hashpartitioning(s_store_id#24, 5), true, [id=#36]
+Arguments: hashpartitioning(s_store_id#24, 5), ENSURE_REQUIREMENTS, [id=#36]
 
 (42) HashAggregate [codegen id : 10]
 Input [6]: [s_store_id#24, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35]
@@ -333,7 +329,7 @@ Condition : (((isnotnull(cs_sold_date_sk#45) AND isnotnull(cs_catalog_page_sk#46
 
 (46) Exchange
 Input [7]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_item_sk#47, cs_promo_sk#48, cs_order_number#49, cs_ext_sales_price#50, cs_net_profit#51]
-Arguments: hashpartitioning(cs_item_sk#47, cs_order_number#49, 5), true, [id=#52]
+Arguments: hashpartitioning(cs_item_sk#47, cs_order_number#49, 5), ENSURE_REQUIREMENTS, [id=#52]
 
 (47) Sort [codegen id : 12]
 Input [7]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_item_sk#47, cs_promo_sk#48, cs_order_number#49, cs_ext_sales_price#50, cs_net_profit#51]
@@ -355,7 +351,7 @@ Condition : (isnotnull(cr_item_sk#53) AND isnotnull(cr_order_number#54))
 
 (51) Exchange
 Input [4]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56]
-Arguments: hashpartitioning(cr_item_sk#53, cr_order_number#54, 5), true, [id=#57]
+Arguments: hashpartitioning(cr_item_sk#53, cr_order_number#54, 5), ENSURE_REQUIREMENTS, [id=#57]
 
 (52) Sort [codegen id : 14]
 Input [4]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56]
@@ -442,7 +438,7 @@ Results [6]: [cp_catalog_page_id#59, sum#66, sum#67, isEmpty#68, sum#69, isEmpty
 
 (71) Exchange
 Input [6]: [cp_catalog_page_id#59, sum#66, sum#67, isEmpty#68, sum#69, isEmpty#70]
-Arguments: hashpartitioning(cp_catalog_page_id#59, 5), true, [id=#71]
+Arguments: hashpartitioning(cp_catalog_page_id#59, 5), ENSURE_REQUIREMENTS, [id=#71]
 
 (72) HashAggregate [codegen id : 20]
 Input [6]: [cp_catalog_page_id#59, sum#66, sum#67, isEmpty#68, sum#69, isEmpty#70]
@@ -467,7 +463,7 @@ Condition : (((isnotnull(ws_sold_date_sk#80) AND isnotnull(ws_web_site_sk#82)) A
 
 (76) Exchange
 Input [7]: [ws_sold_date_sk#80, ws_item_sk#81, ws_web_site_sk#82, ws_promo_sk#83, ws_order_number#84, ws_ext_sales_price#85, ws_net_profit#86]
-Arguments: hashpartitioning(cast(ws_item_sk#81 as bigint), cast(ws_order_number#84 as bigint), 5), true, [id=#87]
+Arguments: hashpartitioning(cast(ws_item_sk#81 as bigint), cast(ws_order_number#84 as bigint), 5), ENSURE_REQUIREMENTS, [id=#87]
 
 (77) Sort [codegen id : 22]
 Input [7]: [ws_sold_date_sk#80, ws_item_sk#81, ws_web_site_sk#82, ws_promo_sk#83, ws_order_number#84, ws_ext_sales_price#85, ws_net_profit#86]
@@ -489,7 +485,7 @@ Condition : (isnotnull(wr_item_sk#88) AND isnotnull(wr_order_number#89))
 
 (81) Exchange
 Input [4]: [wr_item_sk#88, wr_order_number#89, wr_return_amt#90, wr_net_loss#91]
-Arguments: hashpartitioning(wr_item_sk#88, wr_order_number#89, 5), true, [id=#92]
+Arguments: hashpartitioning(wr_item_sk#88, wr_order_number#89, 5), ENSURE_REQUIREMENTS, [id=#92]
 
 (82) Sort [codegen id : 24]
 Input [4]: [wr_item_sk#88, wr_order_number#89, wr_return_amt#90, wr_net_loss#91]
@@ -576,7 +572,7 @@ Results [6]: [web_site_id#94, sum#101, sum#102, isEmpty#103, sum#104, isEmpty#10
 
 (101) Exchange
 Input [6]: [web_site_id#94, sum#101, sum#102, isEmpty#103, sum#104, isEmpty#105]
-Arguments: hashpartitioning(web_site_id#94, 5), true, [id=#106]
+Arguments: hashpartitioning(web_site_id#94, 5), ENSURE_REQUIREMENTS, [id=#106]
 
 (102) HashAggregate [codegen id : 30]
 Input [6]: [web_site_id#94, sum#101, sum#102, isEmpty#103, sum#104, isEmpty#105]
@@ -596,7 +592,7 @@ Results [8]: [channel#40, id#41, sum#121, isEmpty#122, sum#123, isEmpty#124, sum
 
 (105) Exchange
 Input [8]: [channel#40, id#41, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126]
-Arguments: hashpartitioning(channel#40, id#41, 5), true, [id=#127]
+Arguments: hashpartitioning(channel#40, id#41, 5), ENSURE_REQUIREMENTS, [id=#127]
 
 (106) HashAggregate [codegen id : 32]
 Input [8]: [channel#40, id#41, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126]
@@ -624,7 +620,7 @@ Results [7]: [channel#40, sum#152, isEmpty#153, sum#154, isEmpty#155, sum#156, i
 
 (110) Exchange
 Input [7]: [channel#40, sum#152, isEmpty#153, sum#154, isEmpty#155, sum#156, isEmpty#157]
-Arguments: hashpartitioning(channel#40, 5), true, [id=#158]
+Arguments: hashpartitioning(channel#40, 5), ENSURE_REQUIREMENTS, [id=#158]
 
 (111) HashAggregate [codegen id : 65]
 Input [7]: [channel#40, sum#152, isEmpty#153, sum#154, isEmpty#155, sum#156, isEmpty#157]
@@ -633,75 +629,55 @@ Functions [3]: [sum(sales#143), sum(returns#144), sum(profit#145)]
 Aggregate Attributes [3]: [sum(sales#143)#159, sum(returns#144)#160, sum(profit#145)#161]
 Results [5]: [channel#40, null AS id#162, sum(sales#143)#159 AS sales#163, sum(returns#144)#160 AS returns#164, sum(profit#145)#161 AS profit#165]
 
-(112) Union
+(112) ReusedExchange [Reuses operator id: 105]
+Output [8]: [channel#40, id#41, sum#166, isEmpty#167, sum#168, isEmpty#169, sum#170, isEmpty#171]
 
-(113) HashAggregate [codegen id : 66]
-Input [5]: [channel#40, id#41, sales#131, returns#132, profit#133]
-Keys [5]: [channel#40, id#41, sales#131, returns#132, profit#133]
-Functions: []
-Aggregate Attributes: []
-Results [5]: [channel#40, id#41, sales#131, returns#132, profit#133]
-
-(114) Exchange
-Input [5]: [channel#40, id#41, sales#131, returns#132, profit#133]
-Arguments: hashpartitioning(channel#40, id#41, sales#131, returns#132, profit#133, 5), true, [id=#166]
-
-(115) HashAggregate [codegen id : 67]
-Input [5]: [channel#40, id#41, sales#131, returns#132, profit#133]
-Keys [5]: [channel#40, id#41, sales#131, returns#132, profit#133]
-Functions: []
-Aggregate Attributes: []
-Results [5]: [channel#40, id#41, sales#131, returns#132, profit#133]
-
-(116) ReusedExchange [Reuses operator id: 105]
-Output [8]: [channel#40, id#41, sum#167, isEmpty#168, sum#169, isEmpty#170, sum#171, isEmpty#172]
-
-(117) HashAggregate [codegen id : 99]
-Input [8]: [channel#40, id#41, sum#167, isEmpty#168, sum#169, isEmpty#170, sum#171, isEmpty#172]
+(113) HashAggregate [codegen id : 97]
+Input [8]: [channel#40, id#41, sum#166, isEmpty#167, sum#168, isEmpty#169, sum#170, isEmpty#171]
 Keys [2]: [channel#40, id#41]
 Functions [3]: [sum(sales#42), sum(returns#43), sum(profit#44)]
-Aggregate Attributes [3]: [sum(sales#42)#173, sum(returns#43)#174, sum(profit#44)#175]
-Results [3]: [sum(sales#42)#173 AS sales#143, sum(returns#43)#174 AS returns#144, sum(profit#44)#175 AS profit#145]
+Aggregate Attributes [3]: [sum(sales#42)#172, sum(returns#43)#173, sum(profit#44)#174]
+Results [3]: [sum(sales#42)#172 AS sales#143, sum(returns#43)#173 AS returns#144, sum(profit#44)#174 AS profit#145]
 
-(118) HashAggregate [codegen id : 99]
+(114) HashAggregate [codegen id : 97]
 Input [3]: [sales#143, returns#144, profit#145]
 Keys: []
 Functions [3]: [partial_sum(sales#143), partial_sum(returns#144), partial_sum(profit#145)]
-Aggregate Attributes [6]: [sum#176, isEmpty#177, sum#178, isEmpty#179, sum#180, isEmpty#181]
-Results [6]: [sum#182, isEmpty#183, sum#184, isEmpty#185, sum#186, isEmpty#187]
+Aggregate Attributes [6]: [sum#175, isEmpty#176, sum#177, isEmpty#178, sum#179, isEmpty#180]
+Results [6]: [sum#181, isEmpty#182, sum#183, isEmpty#184, sum#185, isEmpty#186]
 
-(119) Exchange
-Input [6]: [sum#182, isEmpty#183, sum#184, isEmpty#185, sum#186, isEmpty#187]
-Arguments: SinglePartition, true, [id=#188]
+(115) Exchange
+Input [6]: [sum#181, isEmpty#182, sum#183, isEmpty#184, sum#185, isEmpty#186]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#187]
 
-(120) HashAggregate [codegen id : 100]
-Input [6]: [sum#182, isEmpty#183, sum#184, isEmpty#185, sum#186, isEmpty#187]
+(116) HashAggregate [codegen id : 98]
+Input [6]: [sum#181, isEmpty#182, sum#183, isEmpty#184, sum#185, isEmpty#186]
 Keys: []
 Functions [3]: [sum(sales#143), sum(returns#144), sum(profit#145)]
-Aggregate Attributes [3]: [sum(sales#143)#189, sum(returns#144)#190, sum(profit#145)#191]
-Results [5]: [null AS channel#192, null AS id#193, sum(sales#143)#189 AS sales#194, sum(returns#144)#190 AS returns#195, sum(profit#145)#191 AS profit#196]
+Aggregate Attributes [3]: [sum(sales#143)#188, sum(returns#144)#189, sum(profit#145)#190]
+Results [5]: [null AS channel#191, null AS id#192, sum(sales#143)#188 AS sales#193, sum(returns#144)#189 AS returns#194, sum(profit#145)#190 AS profit#195]
 
-(121) Union
+(117) Union
 
-(122) HashAggregate [codegen id : 101]
+(118) HashAggregate [codegen id : 99]
 Input [5]: [channel#40, id#41, sales#131, returns#132, profit#133]
 Keys [5]: [channel#40, id#41, sales#131, returns#132, profit#133]
 Functions: []
 Aggregate Attributes: []
 Results [5]: [channel#40, id#41, sales#131, returns#132, profit#133]
 
-(123) Exchange
+(119) Exchange
 Input [5]: [channel#40, id#41, sales#131, returns#132, profit#133]
-Arguments: hashpartitioning(channel#40, id#41, sales#131, returns#132, profit#133, 5), true, [id=#197]
+Arguments: hashpartitioning(channel#40, id#41, sales#131, returns#132, profit#133, 5), ENSURE_REQUIREMENTS, [id=#196]
 
-(124) HashAggregate [codegen id : 102]
+(120) HashAggregate [codegen id : 100]
 Input [5]: [channel#40, id#41, sales#131, returns#132, profit#133]
 Keys [5]: [channel#40, id#41, sales#131, returns#132, profit#133]
 Functions: []
 Aggregate Attributes: []
 Results [5]: [channel#40, id#41, sales#131, returns#132, profit#133]
 
-(125) TakeOrderedAndProject
+(121) TakeOrderedAndProject
 Input [5]: [channel#40, id#41, sales#131, returns#132, profit#133]
 Arguments: 100, [channel#40 ASC NULLS FIRST, id#41 ASC NULLS FIRST], [channel#40, id#41, sales#131, returns#132, profit#133]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt
index ad59968740aaa..c26c5b81259e6 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt
@@ -1,205 +1,197 @@
 TakeOrderedAndProject [channel,id,sales,returns,profit]
-  WholeStageCodegen (102)
+  WholeStageCodegen (100)
     HashAggregate [channel,id,sales,returns,profit]
       InputAdapter
         Exchange [channel,id,sales,returns,profit] #1
-          WholeStageCodegen (101)
+          WholeStageCodegen (99)
             HashAggregate [channel,id,sales,returns,profit]
               InputAdapter
                 Union
-                  WholeStageCodegen (67)
-                    HashAggregate [channel,id,sales,returns,profit]
+                  WholeStageCodegen (32)
+                    HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
-                        Exchange [channel,id,sales,returns,profit] #2
-                          WholeStageCodegen (66)
-                            HashAggregate [channel,id,sales,returns,profit]
+                        Exchange [channel,id] #2
+                          WholeStageCodegen (31)
+                            HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                               InputAdapter
                                 Union
-                                  WholeStageCodegen (32)
-                                    HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                                  WholeStageCodegen (10)
+                                    HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
                                       InputAdapter
-                                        Exchange [channel,id] #3
-                                          WholeStageCodegen (31)
-                                            HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
-                                              InputAdapter
-                                                Union
-                                                  WholeStageCodegen (10)
-                                                    HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
-                                                      InputAdapter
-                                                        Exchange [s_store_id] #4
-                                                          WholeStageCodegen (9)
-                                                            HashAggregate [s_store_id,ss_ext_sales_price,sr_return_amt,ss_net_profit,sr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty]
-                                                              Project [ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id]
-                                                                BroadcastHashJoin [ss_store_sk,s_store_sk]
-                                                                  Project [ss_store_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss]
-                                                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                      Project [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss]
-                                                                        BroadcastHashJoin [ss_promo_sk,p_promo_sk]
-                                                                          Project [ss_sold_date_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss]
-                                                                            BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                              Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss]
-                                                                                InputAdapter
-                                                                                  SortMergeJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number]
-                                                                                    WholeStageCodegen (2)
-                                                                                      Sort [ss_item_sk,ss_ticket_number]
-                                                                                        InputAdapter
-                                                                                          Exchange [ss_item_sk,ss_ticket_number] #5
-                                                                                            WholeStageCodegen (1)
-                                                                                              Filter [ss_sold_date_sk,ss_store_sk,ss_item_sk,ss_promo_sk]
-                                                                                                ColumnarToRow
-                                                                                                  InputAdapter
-                                                                                                    Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit]
-                                                                                    WholeStageCodegen (4)
-                                                                                      Sort [sr_item_sk,sr_ticket_number]
-                                                                                        InputAdapter
-                                                                                          Exchange [sr_item_sk,sr_ticket_number] #6
-                                                                                            WholeStageCodegen (3)
-                                                                                              Filter [sr_item_sk,sr_ticket_number]
-                                                                                                ColumnarToRow
-                                                                                                  InputAdapter
-                                                                                                    Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss]
-                                                                              InputAdapter
-                                                                                BroadcastExchange #7
-                                                                                  WholeStageCodegen (5)
-                                                                                    Project [i_item_sk]
-                                                                                      Filter [i_current_price,i_item_sk]
-                                                                                        ColumnarToRow
-                                                                                          InputAdapter
-                                                                                            Scan parquet default.item [i_item_sk,i_current_price]
-                                                                          InputAdapter
-                                                                            BroadcastExchange #8
-                                                                              WholeStageCodegen (6)
-                                                                                Project [p_promo_sk]
-                                                                                  Filter [p_channel_tv,p_promo_sk]
-                                                                                    ColumnarToRow
-                                                                                      InputAdapter
-                                                                                        Scan parquet default.promotion [p_promo_sk,p_channel_tv]
-                                                                      InputAdapter
-                                                                        BroadcastExchange #9
-                                                                          WholeStageCodegen (7)
-                                                                            Project [d_date_sk]
-                                                                              Filter [d_date,d_date_sk]
+                                        Exchange [s_store_id] #3
+                                          WholeStageCodegen (9)
+                                            HashAggregate [s_store_id,ss_ext_sales_price,sr_return_amt,ss_net_profit,sr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty]
+                                              Project [ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id]
+                                                BroadcastHashJoin [ss_store_sk,s_store_sk]
+                                                  Project [ss_store_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss]
+                                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                      Project [ss_sold_date_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss]
+                                                        BroadcastHashJoin [ss_promo_sk,p_promo_sk]
+                                                          Project [ss_sold_date_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss]
+                                                            BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                              Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss]
+                                                                InputAdapter
+                                                                  SortMergeJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number]
+                                                                    WholeStageCodegen (2)
+                                                                      Sort [ss_item_sk,ss_ticket_number]
+                                                                        InputAdapter
+                                                                          Exchange [ss_item_sk,ss_ticket_number] #4
+                                                                            WholeStageCodegen (1)
+                                                                              Filter [ss_sold_date_sk,ss_store_sk,ss_item_sk,ss_promo_sk]
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.date_dim [d_date_sk,d_date]
-                                                                  InputAdapter
-                                                                    BroadcastExchange #10
-                                                                      WholeStageCodegen (8)
-                                                                        Filter [s_store_sk]
-                                                                          ColumnarToRow
-                                                                            InputAdapter
-                                                                              Scan parquet default.store [s_store_sk,s_store_id]
-                                                  WholeStageCodegen (20)
-                                                    HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
-                                                      InputAdapter
-                                                        Exchange [cp_catalog_page_id] #11
-                                                          WholeStageCodegen (19)
-                                                            HashAggregate [cp_catalog_page_id,cs_ext_sales_price,cr_return_amount,cs_net_profit,cr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty]
-                                                              Project [cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id]
-                                                                BroadcastHashJoin [cs_catalog_page_sk,cp_catalog_page_sk]
-                                                                  Project [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss]
-                                                                    BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                      Project [cs_sold_date_sk,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss]
-                                                                        BroadcastHashJoin [cs_promo_sk,p_promo_sk]
-                                                                          Project [cs_sold_date_sk,cs_catalog_page_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss]
-                                                                            BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                              Project [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss]
-                                                                                InputAdapter
-                                                                                  SortMergeJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number]
-                                                                                    WholeStageCodegen (12)
-                                                                                      Sort [cs_item_sk,cs_order_number]
-                                                                                        InputAdapter
-                                                                                          Exchange [cs_item_sk,cs_order_number] #12
-                                                                                            WholeStageCodegen (11)
-                                                                                              Filter [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk]
-                                                                                                ColumnarToRow
-                                                                                                  InputAdapter
-                                                                                                    Scan parquet default.catalog_sales [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit]
-                                                                                    WholeStageCodegen (14)
-                                                                                      Sort [cr_item_sk,cr_order_number]
-                                                                                        InputAdapter
-                                                                                          Exchange [cr_item_sk,cr_order_number] #13
-                                                                                            WholeStageCodegen (13)
-                                                                                              Filter [cr_item_sk,cr_order_number]
-                                                                                                ColumnarToRow
-                                                                                                  InputAdapter
-                                                                                                    Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss]
-                                                                              InputAdapter
-                                                                                ReusedExchange [i_item_sk] #7
+                                                                                    Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit]
+                                                                    WholeStageCodegen (4)
+                                                                      Sort [sr_item_sk,sr_ticket_number]
+                                                                        InputAdapter
+                                                                          Exchange [sr_item_sk,sr_ticket_number] #5
+                                                                            WholeStageCodegen (3)
+                                                                              Filter [sr_item_sk,sr_ticket_number]
+                                                                                ColumnarToRow
+                                                                                  InputAdapter
+                                                                                    Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss]
+                                                              InputAdapter
+                                                                BroadcastExchange #6
+                                                                  WholeStageCodegen (5)
+                                                                    Project [i_item_sk]
+                                                                      Filter [i_current_price,i_item_sk]
+                                                                        ColumnarToRow
                                                                           InputAdapter
-                                                                            ReusedExchange [p_promo_sk] #8
+                                                                            Scan parquet default.item [i_item_sk,i_current_price]
+                                                          InputAdapter
+                                                            BroadcastExchange #7
+                                                              WholeStageCodegen (6)
+                                                                Project [p_promo_sk]
+                                                                  Filter [p_channel_tv,p_promo_sk]
+                                                                    ColumnarToRow
                                                                       InputAdapter
-                                                                        ReusedExchange [d_date_sk] #9
-                                                                  InputAdapter
-                                                                    BroadcastExchange #14
-                                                                      WholeStageCodegen (18)
-                                                                        Filter [cp_catalog_page_sk]
-                                                                          ColumnarToRow
-                                                                            InputAdapter
-                                                                              Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
-                                                  WholeStageCodegen (30)
-                                                    HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
+                                                                        Scan parquet default.promotion [p_promo_sk,p_channel_tv]
                                                       InputAdapter
-                                                        Exchange [web_site_id] #15
-                                                          WholeStageCodegen (29)
-                                                            HashAggregate [web_site_id,ws_ext_sales_price,wr_return_amt,ws_net_profit,wr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty]
-                                                              Project [ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id]
-                                                                BroadcastHashJoin [ws_web_site_sk,web_site_sk]
-                                                                  Project [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss]
-                                                                    BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                      Project [ws_sold_date_sk,ws_web_site_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss]
-                                                                        BroadcastHashJoin [ws_promo_sk,p_promo_sk]
-                                                                          Project [ws_sold_date_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss]
-                                                                            BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                              Project [ws_sold_date_sk,ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss]
-                                                                                InputAdapter
-                                                                                  SortMergeJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number]
-                                                                                    WholeStageCodegen (22)
-                                                                                      Sort [ws_item_sk,ws_order_number]
-                                                                                        InputAdapter
-                                                                                          Exchange [ws_item_sk,ws_order_number] #16
-                                                                                            WholeStageCodegen (21)
-                                                                                              Filter [ws_sold_date_sk,ws_web_site_sk,ws_item_sk,ws_promo_sk]
-                                                                                                ColumnarToRow
-                                                                                                  InputAdapter
-                                                                                                    Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit]
-                                                                                    WholeStageCodegen (24)
-                                                                                      Sort [wr_item_sk,wr_order_number]
-                                                                                        InputAdapter
-                                                                                          Exchange [wr_item_sk,wr_order_number] #17
-                                                                                            WholeStageCodegen (23)
-                                                                                              Filter [wr_item_sk,wr_order_number]
-                                                                                                ColumnarToRow
-                                                                                                  InputAdapter
-                                                                                                    Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss]
-                                                                              InputAdapter
-                                                                                ReusedExchange [i_item_sk] #7
-                                                                          InputAdapter
-                                                                            ReusedExchange [p_promo_sk] #8
-                                                                      InputAdapter
-                                                                        ReusedExchange [d_date_sk] #9
+                                                        BroadcastExchange #8
+                                                          WholeStageCodegen (7)
+                                                            Project [d_date_sk]
+                                                              Filter [d_date,d_date_sk]
+                                                                ColumnarToRow
                                                                   InputAdapter
-                                                                    BroadcastExchange #18
-                                                                      WholeStageCodegen (28)
-                                                                        Filter [web_site_sk]
-                                                                          ColumnarToRow
-                                                                            InputAdapter
-                                                                              Scan parquet default.web_site [web_site_sk,web_site_id]
-                                  WholeStageCodegen (65)
-                                    HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                                                                    Scan parquet default.date_dim [d_date_sk,d_date]
+                                                  InputAdapter
+                                                    BroadcastExchange #9
+                                                      WholeStageCodegen (8)
+                                                        Filter [s_store_sk]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet default.store [s_store_sk,s_store_id]
+                                  WholeStageCodegen (20)
+                                    HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
+                                      InputAdapter
+                                        Exchange [cp_catalog_page_id] #10
+                                          WholeStageCodegen (19)
+                                            HashAggregate [cp_catalog_page_id,cs_ext_sales_price,cr_return_amount,cs_net_profit,cr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty]
+                                              Project [cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id]
+                                                BroadcastHashJoin [cs_catalog_page_sk,cp_catalog_page_sk]
+                                                  Project [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss]
+                                                    BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                      Project [cs_sold_date_sk,cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss]
+                                                        BroadcastHashJoin [cs_promo_sk,p_promo_sk]
+                                                          Project [cs_sold_date_sk,cs_catalog_page_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss]
+                                                            BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                              Project [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss]
+                                                                InputAdapter
+                                                                  SortMergeJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number]
+                                                                    WholeStageCodegen (12)
+                                                                      Sort [cs_item_sk,cs_order_number]
+                                                                        InputAdapter
+                                                                          Exchange [cs_item_sk,cs_order_number] #11
+                                                                            WholeStageCodegen (11)
+                                                                              Filter [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk]
+                                                                                ColumnarToRow
+                                                                                  InputAdapter
+                                                                                    Scan parquet default.catalog_sales [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit]
+                                                                    WholeStageCodegen (14)
+                                                                      Sort [cr_item_sk,cr_order_number]
+                                                                        InputAdapter
+                                                                          Exchange [cr_item_sk,cr_order_number] #12
+                                                                            WholeStageCodegen (13)
+                                                                              Filter [cr_item_sk,cr_order_number]
+                                                                                ColumnarToRow
+                                                                                  InputAdapter
+                                                                                    Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss]
+                                                              InputAdapter
+                                                                ReusedExchange [i_item_sk] #6
+                                                          InputAdapter
+                                                            ReusedExchange [p_promo_sk] #7
+                                                      InputAdapter
+                                                        ReusedExchange [d_date_sk] #8
+                                                  InputAdapter
+                                                    BroadcastExchange #13
+                                                      WholeStageCodegen (18)
+                                                        Filter [cp_catalog_page_sk]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
+                                  WholeStageCodegen (30)
+                                    HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
                                       InputAdapter
-                                        Exchange [channel] #19
-                                          WholeStageCodegen (64)
-                                            HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
-                                              HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
-                                                InputAdapter
-                                                  ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #3
-                  WholeStageCodegen (100)
+                                        Exchange [web_site_id] #14
+                                          WholeStageCodegen (29)
+                                            HashAggregate [web_site_id,ws_ext_sales_price,wr_return_amt,ws_net_profit,wr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty]
+                                              Project [ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id]
+                                                BroadcastHashJoin [ws_web_site_sk,web_site_sk]
+                                                  Project [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss]
+                                                    BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                      Project [ws_sold_date_sk,ws_web_site_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss]
+                                                        BroadcastHashJoin [ws_promo_sk,p_promo_sk]
+                                                          Project [ws_sold_date_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss]
+                                                            BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                              Project [ws_sold_date_sk,ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss]
+                                                                InputAdapter
+                                                                  SortMergeJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number]
+                                                                    WholeStageCodegen (22)
+                                                                      Sort [ws_item_sk,ws_order_number]
+                                                                        InputAdapter
+                                                                          Exchange [ws_item_sk,ws_order_number] #15
+                                                                            WholeStageCodegen (21)
+                                                                              Filter [ws_sold_date_sk,ws_web_site_sk,ws_item_sk,ws_promo_sk]
+                                                                                ColumnarToRow
+                                                                                  InputAdapter
+                                                                                    Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit]
+                                                                    WholeStageCodegen (24)
+                                                                      Sort [wr_item_sk,wr_order_number]
+                                                                        InputAdapter
+                                                                          Exchange [wr_item_sk,wr_order_number] #16
+                                                                            WholeStageCodegen (23)
+                                                                              Filter [wr_item_sk,wr_order_number]
+                                                                                ColumnarToRow
+                                                                                  InputAdapter
+                                                                                    Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss]
+                                                              InputAdapter
+                                                                ReusedExchange [i_item_sk] #6
+                                                          InputAdapter
+                                                            ReusedExchange [p_promo_sk] #7
+                                                      InputAdapter
+                                                        ReusedExchange [d_date_sk] #8
+                                                  InputAdapter
+                                                    BroadcastExchange #17
+                                                      WholeStageCodegen (28)
+                                                        Filter [web_site_sk]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet default.web_site [web_site_sk,web_site_id]
+                  WholeStageCodegen (65)
+                    HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                      InputAdapter
+                        Exchange [channel] #18
+                          WholeStageCodegen (64)
+                            HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                              HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                                InputAdapter
+                                  ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2
+                  WholeStageCodegen (98)
                     HashAggregate [sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),channel,id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
-                        Exchange #20
-                          WholeStageCodegen (99)
+                        Exchange #19
+                          WholeStageCodegen (97)
                             HashAggregate [sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                               HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                                 InputAdapter
-                                  ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #3
+                                  ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/explain.txt
index ddfdeadcf8eb3..9e687a07c2ca0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/explain.txt
@@ -1,120 +1,116 @@
 == Physical Plan ==
-TakeOrderedAndProject (116)
-+- * HashAggregate (115)
-   +- Exchange (114)
-      +- * HashAggregate (113)
-         +- Union (112)
-            :- * HashAggregate (106)
-            :  +- Exchange (105)
-            :     +- * HashAggregate (104)
-            :        +- Union (103)
-            :           :- * HashAggregate (97)
-            :           :  +- Exchange (96)
-            :           :     +- * HashAggregate (95)
-            :           :        +- Union (94)
-            :           :           :- * HashAggregate (39)
-            :           :           :  +- Exchange (38)
-            :           :           :     +- * HashAggregate (37)
-            :           :           :        +- * Project (36)
-            :           :           :           +- * BroadcastHashJoin Inner BuildRight (35)
-            :           :           :              :- * Project (29)
-            :           :           :              :  +- * BroadcastHashJoin Inner BuildRight (28)
-            :           :           :              :     :- * Project (22)
-            :           :           :              :     :  +- * BroadcastHashJoin Inner BuildRight (21)
-            :           :           :              :     :     :- * Project (16)
-            :           :           :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (15)
-            :           :           :              :     :     :     :- * Project (9)
-            :           :           :              :     :     :     :  +- * BroadcastHashJoin LeftOuter BuildRight (8)
-            :           :           :              :     :     :     :     :- * Filter (3)
-            :           :           :              :     :     :     :     :  +- * ColumnarToRow (2)
-            :           :           :              :     :     :     :     :     +- Scan parquet default.store_sales (1)
-            :           :           :              :     :     :     :     +- BroadcastExchange (7)
-            :           :           :              :     :     :     :        +- * Filter (6)
-            :           :           :              :     :     :     :           +- * ColumnarToRow (5)
-            :           :           :              :     :     :     :              +- Scan parquet default.store_returns (4)
-            :           :           :              :     :     :     +- BroadcastExchange (14)
-            :           :           :              :     :     :        +- * Project (13)
-            :           :           :              :     :     :           +- * Filter (12)
-            :           :           :              :     :     :              +- * ColumnarToRow (11)
-            :           :           :              :     :     :                 +- Scan parquet default.date_dim (10)
-            :           :           :              :     :     +- BroadcastExchange (20)
-            :           :           :              :     :        +- * Filter (19)
-            :           :           :              :     :           +- * ColumnarToRow (18)
-            :           :           :              :     :              +- Scan parquet default.store (17)
-            :           :           :              :     +- BroadcastExchange (27)
-            :           :           :              :        +- * Project (26)
-            :           :           :              :           +- * Filter (25)
-            :           :           :              :              +- * ColumnarToRow (24)
-            :           :           :              :                 +- Scan parquet default.item (23)
-            :           :           :              +- BroadcastExchange (34)
-            :           :           :                 +- * Project (33)
-            :           :           :                    +- * Filter (32)
-            :           :           :                       +- * ColumnarToRow (31)
-            :           :           :                          +- Scan parquet default.promotion (30)
-            :           :           :- * HashAggregate (66)
-            :           :           :  +- Exchange (65)
-            :           :           :     +- * HashAggregate (64)
-            :           :           :        +- * Project (63)
-            :           :           :           +- * BroadcastHashJoin Inner BuildRight (62)
-            :           :           :              :- * Project (60)
-            :           :           :              :  +- * BroadcastHashJoin Inner BuildRight (59)
-            :           :           :              :     :- * Project (57)
-            :           :           :              :     :  +- * BroadcastHashJoin Inner BuildRight (56)
-            :           :           :              :     :     :- * Project (51)
-            :           :           :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (50)
-            :           :           :              :     :     :     :- * Project (48)
-            :           :           :              :     :     :     :  +- * BroadcastHashJoin LeftOuter BuildRight (47)
-            :           :           :              :     :     :     :     :- * Filter (42)
-            :           :           :              :     :     :     :     :  +- * ColumnarToRow (41)
-            :           :           :              :     :     :     :     :     +- Scan parquet default.catalog_sales (40)
-            :           :           :              :     :     :     :     +- BroadcastExchange (46)
-            :           :           :              :     :     :     :        +- * Filter (45)
-            :           :           :              :     :     :     :           +- * ColumnarToRow (44)
-            :           :           :              :     :     :     :              +- Scan parquet default.catalog_returns (43)
-            :           :           :              :     :     :     +- ReusedExchange (49)
-            :           :           :              :     :     +- BroadcastExchange (55)
-            :           :           :              :     :        +- * Filter (54)
-            :           :           :              :     :           +- * ColumnarToRow (53)
-            :           :           :              :     :              +- Scan parquet default.catalog_page (52)
-            :           :           :              :     +- ReusedExchange (58)
-            :           :           :              +- ReusedExchange (61)
-            :           :           +- * HashAggregate (93)
-            :           :              +- Exchange (92)
-            :           :                 +- * HashAggregate (91)
-            :           :                    +- * Project (90)
-            :           :                       +- * BroadcastHashJoin Inner BuildRight (89)
-            :           :                          :- * Project (87)
-            :           :                          :  +- * BroadcastHashJoin Inner BuildRight (86)
-            :           :                          :     :- * Project (84)
-            :           :                          :     :  +- * BroadcastHashJoin Inner BuildRight (83)
-            :           :                          :     :     :- * Project (78)
-            :           :                          :     :     :  +- * BroadcastHashJoin Inner BuildRight (77)
-            :           :                          :     :     :     :- * Project (75)
-            :           :                          :     :     :     :  +- * BroadcastHashJoin LeftOuter BuildRight (74)
-            :           :                          :     :     :     :     :- * Filter (69)
-            :           :                          :     :     :     :     :  +- * ColumnarToRow (68)
-            :           :                          :     :     :     :     :     +- Scan parquet default.web_sales (67)
-            :           :                          :     :     :     :     +- BroadcastExchange (73)
-            :           :                          :     :     :     :        +- * Filter (72)
-            :           :                          :     :     :     :           +- * ColumnarToRow (71)
-            :           :                          :     :     :     :              +- Scan parquet default.web_returns (70)
-            :           :                          :     :     :     +- ReusedExchange (76)
-            :           :                          :     :     +- BroadcastExchange (82)
-            :           :                          :     :        +- * Filter (81)
-            :           :                          :     :           +- * ColumnarToRow (80)
-            :           :                          :     :              +- Scan parquet default.web_site (79)
-            :           :                          :     +- ReusedExchange (85)
-            :           :                          +- ReusedExchange (88)
-            :           +- * HashAggregate (102)
-            :              +- Exchange (101)
-            :                 +- * HashAggregate (100)
-            :                    +- * HashAggregate (99)
-            :                       +- ReusedExchange (98)
-            +- * HashAggregate (111)
-               +- Exchange (110)
-                  +- * HashAggregate (109)
-                     +- * HashAggregate (108)
-                        +- ReusedExchange (107)
+TakeOrderedAndProject (112)
++- * HashAggregate (111)
+   +- Exchange (110)
+      +- * HashAggregate (109)
+         +- Union (108)
+            :- * HashAggregate (97)
+            :  +- Exchange (96)
+            :     +- * HashAggregate (95)
+            :        +- Union (94)
+            :           :- * HashAggregate (39)
+            :           :  +- Exchange (38)
+            :           :     +- * HashAggregate (37)
+            :           :        +- * Project (36)
+            :           :           +- * BroadcastHashJoin Inner BuildRight (35)
+            :           :              :- * Project (29)
+            :           :              :  +- * BroadcastHashJoin Inner BuildRight (28)
+            :           :              :     :- * Project (22)
+            :           :              :     :  +- * BroadcastHashJoin Inner BuildRight (21)
+            :           :              :     :     :- * Project (16)
+            :           :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (15)
+            :           :              :     :     :     :- * Project (9)
+            :           :              :     :     :     :  +- * BroadcastHashJoin LeftOuter BuildRight (8)
+            :           :              :     :     :     :     :- * Filter (3)
+            :           :              :     :     :     :     :  +- * ColumnarToRow (2)
+            :           :              :     :     :     :     :     +- Scan parquet default.store_sales (1)
+            :           :              :     :     :     :     +- BroadcastExchange (7)
+            :           :              :     :     :     :        +- * Filter (6)
+            :           :              :     :     :     :           +- * ColumnarToRow (5)
+            :           :              :     :     :     :              +- Scan parquet default.store_returns (4)
+            :           :              :     :     :     +- BroadcastExchange (14)
+            :           :              :     :     :        +- * Project (13)
+            :           :              :     :     :           +- * Filter (12)
+            :           :              :     :     :              +- * ColumnarToRow (11)
+            :           :              :     :     :                 +- Scan parquet default.date_dim (10)
+            :           :              :     :     +- BroadcastExchange (20)
+            :           :              :     :        +- * Filter (19)
+            :           :              :     :           +- * ColumnarToRow (18)
+            :           :              :     :              +- Scan parquet default.store (17)
+            :           :              :     +- BroadcastExchange (27)
+            :           :              :        +- * Project (26)
+            :           :              :           +- * Filter (25)
+            :           :              :              +- * ColumnarToRow (24)
+            :           :              :                 +- Scan parquet default.item (23)
+            :           :              +- BroadcastExchange (34)
+            :           :                 +- * Project (33)
+            :           :                    +- * Filter (32)
+            :           :                       +- * ColumnarToRow (31)
+            :           :                          +- Scan parquet default.promotion (30)
+            :           :- * HashAggregate (66)
+            :           :  +- Exchange (65)
+            :           :     +- * HashAggregate (64)
+            :           :        +- * Project (63)
+            :           :           +- * BroadcastHashJoin Inner BuildRight (62)
+            :           :              :- * Project (60)
+            :           :              :  +- * BroadcastHashJoin Inner BuildRight (59)
+            :           :              :     :- * Project (57)
+            :           :              :     :  +- * BroadcastHashJoin Inner BuildRight (56)
+            :           :              :     :     :- * Project (51)
+            :           :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (50)
+            :           :              :     :     :     :- * Project (48)
+            :           :              :     :     :     :  +- * BroadcastHashJoin LeftOuter BuildRight (47)
+            :           :              :     :     :     :     :- * Filter (42)
+            :           :              :     :     :     :     :  +- * ColumnarToRow (41)
+            :           :              :     :     :     :     :     +- Scan parquet default.catalog_sales (40)
+            :           :              :     :     :     :     +- BroadcastExchange (46)
+            :           :              :     :     :     :        +- * Filter (45)
+            :           :              :     :     :     :           +- * ColumnarToRow (44)
+            :           :              :     :     :     :              +- Scan parquet default.catalog_returns (43)
+            :           :              :     :     :     +- ReusedExchange (49)
+            :           :              :     :     +- BroadcastExchange (55)
+            :           :              :     :        +- * Filter (54)
+            :           :              :     :           +- * ColumnarToRow (53)
+            :           :              :     :              +- Scan parquet default.catalog_page (52)
+            :           :              :     +- ReusedExchange (58)
+            :           :              +- ReusedExchange (61)
+            :           +- * HashAggregate (93)
+            :              +- Exchange (92)
+            :                 +- * HashAggregate (91)
+            :                    +- * Project (90)
+            :                       +- * BroadcastHashJoin Inner BuildRight (89)
+            :                          :- * Project (87)
+            :                          :  +- * BroadcastHashJoin Inner BuildRight (86)
+            :                          :     :- * Project (84)
+            :                          :     :  +- * BroadcastHashJoin Inner BuildRight (83)
+            :                          :     :     :- * Project (78)
+            :                          :     :     :  +- * BroadcastHashJoin Inner BuildRight (77)
+            :                          :     :     :     :- * Project (75)
+            :                          :     :     :     :  +- * BroadcastHashJoin LeftOuter BuildRight (74)
+            :                          :     :     :     :     :- * Filter (69)
+            :                          :     :     :     :     :  +- * ColumnarToRow (68)
+            :                          :     :     :     :     :     +- Scan parquet default.web_sales (67)
+            :                          :     :     :     :     +- BroadcastExchange (73)
+            :                          :     :     :     :        +- * Filter (72)
+            :                          :     :     :     :           +- * ColumnarToRow (71)
+            :                          :     :     :     :              +- Scan parquet default.web_returns (70)
+            :                          :     :     :     +- ReusedExchange (76)
+            :                          :     :     +- BroadcastExchange (82)
+            :                          :     :        +- * Filter (81)
+            :                          :     :           +- * ColumnarToRow (80)
+            :                          :     :              +- Scan parquet default.web_site (79)
+            :                          :     +- ReusedExchange (85)
+            :                          +- ReusedExchange (88)
+            :- * HashAggregate (102)
+            :  +- Exchange (101)
+            :     +- * HashAggregate (100)
+            :        +- * HashAggregate (99)
+            :           +- ReusedExchange (98)
+            +- * HashAggregate (107)
+               +- Exchange (106)
+                  +- * HashAggregate (105)
+                     +- * HashAggregate (104)
+                        +- ReusedExchange (103)
 
 
 (1) Scan parquet default.store_sales
@@ -287,7 +283,7 @@ Results [6]: [s_store_id#17, sum#30, sum#31, isEmpty#32, sum#33, isEmpty#34]
 
 (38) Exchange
 Input [6]: [s_store_id#17, sum#30, sum#31, isEmpty#32, sum#33, isEmpty#34]
-Arguments: hashpartitioning(s_store_id#17, 5), true, [id=#35]
+Arguments: hashpartitioning(s_store_id#17, 5), ENSURE_REQUIREMENTS, [id=#35]
 
 (39) HashAggregate [codegen id : 7]
 Input [6]: [s_store_id#17, sum#30, sum#31, isEmpty#32, sum#33, isEmpty#34]
@@ -409,7 +405,7 @@ Results [6]: [cp_catalog_page_id#57, sum#64, sum#65, isEmpty#66, sum#67, isEmpty
 
 (65) Exchange
 Input [6]: [cp_catalog_page_id#57, sum#64, sum#65, isEmpty#66, sum#67, isEmpty#68]
-Arguments: hashpartitioning(cp_catalog_page_id#57, 5), true, [id=#69]
+Arguments: hashpartitioning(cp_catalog_page_id#57, 5), ENSURE_REQUIREMENTS, [id=#69]
 
 (66) HashAggregate [codegen id : 14]
 Input [6]: [cp_catalog_page_id#57, sum#64, sum#65, isEmpty#66, sum#67, isEmpty#68]
@@ -531,7 +527,7 @@ Results [6]: [web_site_id#91, sum#98, sum#99, isEmpty#100, sum#101, isEmpty#102]
 
 (92) Exchange
 Input [6]: [web_site_id#91, sum#98, sum#99, isEmpty#100, sum#101, isEmpty#102]
-Arguments: hashpartitioning(web_site_id#91, 5), true, [id=#103]
+Arguments: hashpartitioning(web_site_id#91, 5), ENSURE_REQUIREMENTS, [id=#103]
 
 (93) HashAggregate [codegen id : 21]
 Input [6]: [web_site_id#91, sum#98, sum#99, isEmpty#100, sum#101, isEmpty#102]
@@ -551,7 +547,7 @@ Results [8]: [channel#39, id#40, sum#118, isEmpty#119, sum#120, isEmpty#121, sum
 
 (96) Exchange
 Input [8]: [channel#39, id#40, sum#118, isEmpty#119, sum#120, isEmpty#121, sum#122, isEmpty#123]
-Arguments: hashpartitioning(channel#39, id#40, 5), true, [id=#124]
+Arguments: hashpartitioning(channel#39, id#40, 5), ENSURE_REQUIREMENTS, [id=#124]
 
 (97) HashAggregate [codegen id : 23]
 Input [8]: [channel#39, id#40, sum#118, isEmpty#119, sum#120, isEmpty#121, sum#122, isEmpty#123]
@@ -579,7 +575,7 @@ Results [7]: [channel#39, sum#149, isEmpty#150, sum#151, isEmpty#152, sum#153, i
 
 (101) Exchange
 Input [7]: [channel#39, sum#149, isEmpty#150, sum#151, isEmpty#152, sum#153, isEmpty#154]
-Arguments: hashpartitioning(channel#39, 5), true, [id=#155]
+Arguments: hashpartitioning(channel#39, 5), ENSURE_REQUIREMENTS, [id=#155]
 
 (102) HashAggregate [codegen id : 47]
 Input [7]: [channel#39, sum#149, isEmpty#150, sum#151, isEmpty#152, sum#153, isEmpty#154]
@@ -588,75 +584,55 @@ Functions [3]: [sum(sales#140), sum(returns#141), sum(profit#142)]
 Aggregate Attributes [3]: [sum(sales#140)#156, sum(returns#141)#157, sum(profit#142)#158]
 Results [5]: [channel#39, null AS id#159, sum(sales#140)#156 AS sales#160, sum(returns#141)#157 AS returns#161, sum(profit#142)#158 AS profit#162]
 
-(103) Union
+(103) ReusedExchange [Reuses operator id: 96]
+Output [8]: [channel#39, id#40, sum#163, isEmpty#164, sum#165, isEmpty#166, sum#167, isEmpty#168]
 
-(104) HashAggregate [codegen id : 48]
-Input [5]: [channel#39, id#40, sales#128, returns#129, profit#130]
-Keys [5]: [channel#39, id#40, sales#128, returns#129, profit#130]
-Functions: []
-Aggregate Attributes: []
-Results [5]: [channel#39, id#40, sales#128, returns#129, profit#130]
-
-(105) Exchange
-Input [5]: [channel#39, id#40, sales#128, returns#129, profit#130]
-Arguments: hashpartitioning(channel#39, id#40, sales#128, returns#129, profit#130, 5), true, [id=#163]
-
-(106) HashAggregate [codegen id : 49]
-Input [5]: [channel#39, id#40, sales#128, returns#129, profit#130]
-Keys [5]: [channel#39, id#40, sales#128, returns#129, profit#130]
-Functions: []
-Aggregate Attributes: []
-Results [5]: [channel#39, id#40, sales#128, returns#129, profit#130]
-
-(107) ReusedExchange [Reuses operator id: 96]
-Output [8]: [channel#39, id#40, sum#164, isEmpty#165, sum#166, isEmpty#167, sum#168, isEmpty#169]
-
-(108) HashAggregate [codegen id : 72]
-Input [8]: [channel#39, id#40, sum#164, isEmpty#165, sum#166, isEmpty#167, sum#168, isEmpty#169]
+(104) HashAggregate [codegen id : 70]
+Input [8]: [channel#39, id#40, sum#163, isEmpty#164, sum#165, isEmpty#166, sum#167, isEmpty#168]
 Keys [2]: [channel#39, id#40]
 Functions [3]: [sum(sales#41), sum(returns#42), sum(profit#43)]
-Aggregate Attributes [3]: [sum(sales#41)#170, sum(returns#42)#171, sum(profit#43)#172]
-Results [3]: [sum(sales#41)#170 AS sales#140, sum(returns#42)#171 AS returns#141, sum(profit#43)#172 AS profit#142]
+Aggregate Attributes [3]: [sum(sales#41)#169, sum(returns#42)#170, sum(profit#43)#171]
+Results [3]: [sum(sales#41)#169 AS sales#140, sum(returns#42)#170 AS returns#141, sum(profit#43)#171 AS profit#142]
 
-(109) HashAggregate [codegen id : 72]
+(105) HashAggregate [codegen id : 70]
 Input [3]: [sales#140, returns#141, profit#142]
 Keys: []
 Functions [3]: [partial_sum(sales#140), partial_sum(returns#141), partial_sum(profit#142)]
-Aggregate Attributes [6]: [sum#173, isEmpty#174, sum#175, isEmpty#176, sum#177, isEmpty#178]
-Results [6]: [sum#179, isEmpty#180, sum#181, isEmpty#182, sum#183, isEmpty#184]
+Aggregate Attributes [6]: [sum#172, isEmpty#173, sum#174, isEmpty#175, sum#176, isEmpty#177]
+Results [6]: [sum#178, isEmpty#179, sum#180, isEmpty#181, sum#182, isEmpty#183]
 
-(110) Exchange
-Input [6]: [sum#179, isEmpty#180, sum#181, isEmpty#182, sum#183, isEmpty#184]
-Arguments: SinglePartition, true, [id=#185]
+(106) Exchange
+Input [6]: [sum#178, isEmpty#179, sum#180, isEmpty#181, sum#182, isEmpty#183]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#184]
 
-(111) HashAggregate [codegen id : 73]
-Input [6]: [sum#179, isEmpty#180, sum#181, isEmpty#182, sum#183, isEmpty#184]
+(107) HashAggregate [codegen id : 71]
+Input [6]: [sum#178, isEmpty#179, sum#180, isEmpty#181, sum#182, isEmpty#183]
 Keys: []
 Functions [3]: [sum(sales#140), sum(returns#141), sum(profit#142)]
-Aggregate Attributes [3]: [sum(sales#140)#186, sum(returns#141)#187, sum(profit#142)#188]
-Results [5]: [null AS channel#189, null AS id#190, sum(sales#140)#186 AS sales#191, sum(returns#141)#187 AS returns#192, sum(profit#142)#188 AS profit#193]
+Aggregate Attributes [3]: [sum(sales#140)#185, sum(returns#141)#186, sum(profit#142)#187]
+Results [5]: [null AS channel#188, null AS id#189, sum(sales#140)#185 AS sales#190, sum(returns#141)#186 AS returns#191, sum(profit#142)#187 AS profit#192]
 
-(112) Union
+(108) Union
 
-(113) HashAggregate [codegen id : 74]
+(109) HashAggregate [codegen id : 72]
 Input [5]: [channel#39, id#40, sales#128, returns#129, profit#130]
 Keys [5]: [channel#39, id#40, sales#128, returns#129, profit#130]
 Functions: []
 Aggregate Attributes: []
 Results [5]: [channel#39, id#40, sales#128, returns#129, profit#130]
 
-(114) Exchange
+(110) Exchange
 Input [5]: [channel#39, id#40, sales#128, returns#129, profit#130]
-Arguments: hashpartitioning(channel#39, id#40, sales#128, returns#129, profit#130, 5), true, [id=#194]
+Arguments: hashpartitioning(channel#39, id#40, sales#128, returns#129, profit#130, 5), ENSURE_REQUIREMENTS, [id=#193]
 
-(115) HashAggregate [codegen id : 75]
+(111) HashAggregate [codegen id : 73]
 Input [5]: [channel#39, id#40, sales#128, returns#129, profit#130]
 Keys [5]: [channel#39, id#40, sales#128, returns#129, profit#130]
 Functions: []
 Aggregate Attributes: []
 Results [5]: [channel#39, id#40, sales#128, returns#129, profit#130]
 
-(116) TakeOrderedAndProject
+(112) TakeOrderedAndProject
 Input [5]: [channel#39, id#40, sales#128, returns#129, profit#130]
 Arguments: 100, [channel#39 ASC NULLS FIRST, id#40 ASC NULLS FIRST], [channel#39, id#40, sales#128, returns#129, profit#130]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/simplified.txt
index 602a670a49116..142af3f0755f3 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/simplified.txt
@@ -1,181 +1,173 @@
 TakeOrderedAndProject [channel,id,sales,returns,profit]
-  WholeStageCodegen (75)
+  WholeStageCodegen (73)
     HashAggregate [channel,id,sales,returns,profit]
       InputAdapter
         Exchange [channel,id,sales,returns,profit] #1
-          WholeStageCodegen (74)
+          WholeStageCodegen (72)
             HashAggregate [channel,id,sales,returns,profit]
               InputAdapter
                 Union
-                  WholeStageCodegen (49)
-                    HashAggregate [channel,id,sales,returns,profit]
+                  WholeStageCodegen (23)
+                    HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
-                        Exchange [channel,id,sales,returns,profit] #2
-                          WholeStageCodegen (48)
-                            HashAggregate [channel,id,sales,returns,profit]
+                        Exchange [channel,id] #2
+                          WholeStageCodegen (22)
+                            HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                               InputAdapter
                                 Union
-                                  WholeStageCodegen (23)
-                                    HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                                  WholeStageCodegen (7)
+                                    HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
                                       InputAdapter
-                                        Exchange [channel,id] #3
-                                          WholeStageCodegen (22)
-                                            HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
-                                              InputAdapter
-                                                Union
-                                                  WholeStageCodegen (7)
-                                                    HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
-                                                      InputAdapter
-                                                        Exchange [s_store_id] #4
-                                                          WholeStageCodegen (6)
-                                                            HashAggregate [s_store_id,ss_ext_sales_price,sr_return_amt,ss_net_profit,sr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty]
-                                                              Project [ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id]
-                                                                BroadcastHashJoin [ss_promo_sk,p_promo_sk]
-                                                                  Project [ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id]
-                                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                      Project [ss_item_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id]
-                                                                        BroadcastHashJoin [ss_store_sk,s_store_sk]
-                                                                          Project [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss]
-                                                                            BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                              Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss]
-                                                                                BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number]
-                                                                                  Filter [ss_sold_date_sk,ss_store_sk,ss_item_sk,ss_promo_sk]
-                                                                                    ColumnarToRow
-                                                                                      InputAdapter
-                                                                                        Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit]
-                                                                                  InputAdapter
-                                                                                    BroadcastExchange #5
-                                                                                      WholeStageCodegen (1)
-                                                                                        Filter [sr_item_sk,sr_ticket_number]
-                                                                                          ColumnarToRow
-                                                                                            InputAdapter
-                                                                                              Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss]
-                                                                              InputAdapter
-                                                                                BroadcastExchange #6
-                                                                                  WholeStageCodegen (2)
-                                                                                    Project [d_date_sk]
-                                                                                      Filter [d_date,d_date_sk]
-                                                                                        ColumnarToRow
-                                                                                          InputAdapter
-                                                                                            Scan parquet default.date_dim [d_date_sk,d_date]
-                                                                          InputAdapter
-                                                                            BroadcastExchange #7
-                                                                              WholeStageCodegen (3)
-                                                                                Filter [s_store_sk]
-                                                                                  ColumnarToRow
-                                                                                    InputAdapter
-                                                                                      Scan parquet default.store [s_store_sk,s_store_id]
+                                        Exchange [s_store_id] #3
+                                          WholeStageCodegen (6)
+                                            HashAggregate [s_store_id,ss_ext_sales_price,sr_return_amt,ss_net_profit,sr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty]
+                                              Project [ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id]
+                                                BroadcastHashJoin [ss_promo_sk,p_promo_sk]
+                                                  Project [ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id]
+                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                      Project [ss_item_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id]
+                                                        BroadcastHashJoin [ss_store_sk,s_store_sk]
+                                                          Project [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss]
+                                                            BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                              Project [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss]
+                                                                BroadcastHashJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number]
+                                                                  Filter [ss_sold_date_sk,ss_store_sk,ss_item_sk,ss_promo_sk]
+                                                                    ColumnarToRow
                                                                       InputAdapter
-                                                                        BroadcastExchange #8
-                                                                          WholeStageCodegen (4)
-                                                                            Project [i_item_sk]
-                                                                              Filter [i_current_price,i_item_sk]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.item [i_item_sk,i_current_price]
+                                                                        Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit]
                                                                   InputAdapter
-                                                                    BroadcastExchange #9
-                                                                      WholeStageCodegen (5)
-                                                                        Project [p_promo_sk]
-                                                                          Filter [p_channel_tv,p_promo_sk]
-                                                                            ColumnarToRow
-                                                                              InputAdapter
-                                                                                Scan parquet default.promotion [p_promo_sk,p_channel_tv]
-                                                  WholeStageCodegen (14)
-                                                    HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
-                                                      InputAdapter
-                                                        Exchange [cp_catalog_page_id] #10
-                                                          WholeStageCodegen (13)
-                                                            HashAggregate [cp_catalog_page_id,cs_ext_sales_price,cr_return_amount,cs_net_profit,cr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty]
-                                                              Project [cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id]
-                                                                BroadcastHashJoin [cs_promo_sk,p_promo_sk]
-                                                                  Project [cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id]
-                                                                    BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                      Project [cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id]
-                                                                        BroadcastHashJoin [cs_catalog_page_sk,cp_catalog_page_sk]
-                                                                          Project [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss]
-                                                                            BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                              Project [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss]
-                                                                                BroadcastHashJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number]
-                                                                                  Filter [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk]
-                                                                                    ColumnarToRow
-                                                                                      InputAdapter
-                                                                                        Scan parquet default.catalog_sales [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit]
-                                                                                  InputAdapter
-                                                                                    BroadcastExchange #11
-                                                                                      WholeStageCodegen (8)
-                                                                                        Filter [cr_item_sk,cr_order_number]
-                                                                                          ColumnarToRow
-                                                                                            InputAdapter
-                                                                                              Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss]
-                                                                              InputAdapter
-                                                                                ReusedExchange [d_date_sk] #6
+                                                                    BroadcastExchange #4
+                                                                      WholeStageCodegen (1)
+                                                                        Filter [sr_item_sk,sr_ticket_number]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss]
+                                                              InputAdapter
+                                                                BroadcastExchange #5
+                                                                  WholeStageCodegen (2)
+                                                                    Project [d_date_sk]
+                                                                      Filter [d_date,d_date_sk]
+                                                                        ColumnarToRow
                                                                           InputAdapter
-                                                                            BroadcastExchange #12
-                                                                              WholeStageCodegen (10)
-                                                                                Filter [cp_catalog_page_sk]
-                                                                                  ColumnarToRow
-                                                                                    InputAdapter
-                                                                                      Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
+                                                                            Scan parquet default.date_dim [d_date_sk,d_date]
+                                                          InputAdapter
+                                                            BroadcastExchange #6
+                                                              WholeStageCodegen (3)
+                                                                Filter [s_store_sk]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet default.store [s_store_sk,s_store_id]
+                                                      InputAdapter
+                                                        BroadcastExchange #7
+                                                          WholeStageCodegen (4)
+                                                            Project [i_item_sk]
+                                                              Filter [i_current_price,i_item_sk]
+                                                                ColumnarToRow
+                                                                  InputAdapter
+                                                                    Scan parquet default.item [i_item_sk,i_current_price]
+                                                  InputAdapter
+                                                    BroadcastExchange #8
+                                                      WholeStageCodegen (5)
+                                                        Project [p_promo_sk]
+                                                          Filter [p_channel_tv,p_promo_sk]
+                                                            ColumnarToRow
+                                                              InputAdapter
+                                                                Scan parquet default.promotion [p_promo_sk,p_channel_tv]
+                                  WholeStageCodegen (14)
+                                    HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
+                                      InputAdapter
+                                        Exchange [cp_catalog_page_id] #9
+                                          WholeStageCodegen (13)
+                                            HashAggregate [cp_catalog_page_id,cs_ext_sales_price,cr_return_amount,cs_net_profit,cr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty]
+                                              Project [cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id]
+                                                BroadcastHashJoin [cs_promo_sk,p_promo_sk]
+                                                  Project [cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id]
+                                                    BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                      Project [cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id]
+                                                        BroadcastHashJoin [cs_catalog_page_sk,cp_catalog_page_sk]
+                                                          Project [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss]
+                                                            BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                              Project [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss]
+                                                                BroadcastHashJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number]
+                                                                  Filter [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk]
+                                                                    ColumnarToRow
                                                                       InputAdapter
-                                                                        ReusedExchange [i_item_sk] #8
+                                                                        Scan parquet default.catalog_sales [cs_sold_date_sk,cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit]
                                                                   InputAdapter
-                                                                    ReusedExchange [p_promo_sk] #9
-                                                  WholeStageCodegen (21)
-                                                    HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
+                                                                    BroadcastExchange #10
+                                                                      WholeStageCodegen (8)
+                                                                        Filter [cr_item_sk,cr_order_number]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss]
+                                                              InputAdapter
+                                                                ReusedExchange [d_date_sk] #5
+                                                          InputAdapter
+                                                            BroadcastExchange #11
+                                                              WholeStageCodegen (10)
+                                                                Filter [cp_catalog_page_sk]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
                                                       InputAdapter
-                                                        Exchange [web_site_id] #13
-                                                          WholeStageCodegen (20)
-                                                            HashAggregate [web_site_id,ws_ext_sales_price,wr_return_amt,ws_net_profit,wr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty]
-                                                              Project [ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id]
-                                                                BroadcastHashJoin [ws_promo_sk,p_promo_sk]
-                                                                  Project [ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id]
-                                                                    BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                      Project [ws_item_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id]
-                                                                        BroadcastHashJoin [ws_web_site_sk,web_site_sk]
-                                                                          Project [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss]
-                                                                            BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                              Project [ws_sold_date_sk,ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss]
-                                                                                BroadcastHashJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number]
-                                                                                  Filter [ws_sold_date_sk,ws_web_site_sk,ws_item_sk,ws_promo_sk]
-                                                                                    ColumnarToRow
-                                                                                      InputAdapter
-                                                                                        Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit]
-                                                                                  InputAdapter
-                                                                                    BroadcastExchange #14
-                                                                                      WholeStageCodegen (15)
-                                                                                        Filter [wr_item_sk,wr_order_number]
-                                                                                          ColumnarToRow
-                                                                                            InputAdapter
-                                                                                              Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss]
-                                                                              InputAdapter
-                                                                                ReusedExchange [d_date_sk] #6
-                                                                          InputAdapter
-                                                                            BroadcastExchange #15
-                                                                              WholeStageCodegen (17)
-                                                                                Filter [web_site_sk]
-                                                                                  ColumnarToRow
-                                                                                    InputAdapter
-                                                                                      Scan parquet default.web_site [web_site_sk,web_site_id]
+                                                        ReusedExchange [i_item_sk] #7
+                                                  InputAdapter
+                                                    ReusedExchange [p_promo_sk] #8
+                                  WholeStageCodegen (21)
+                                    HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2), true)),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
+                                      InputAdapter
+                                        Exchange [web_site_id] #12
+                                          WholeStageCodegen (20)
+                                            HashAggregate [web_site_id,ws_ext_sales_price,wr_return_amt,ws_net_profit,wr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty]
+                                              Project [ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id]
+                                                BroadcastHashJoin [ws_promo_sk,p_promo_sk]
+                                                  Project [ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id]
+                                                    BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                      Project [ws_item_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id]
+                                                        BroadcastHashJoin [ws_web_site_sk,web_site_sk]
+                                                          Project [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss]
+                                                            BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                              Project [ws_sold_date_sk,ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss]
+                                                                BroadcastHashJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number]
+                                                                  Filter [ws_sold_date_sk,ws_web_site_sk,ws_item_sk,ws_promo_sk]
+                                                                    ColumnarToRow
                                                                       InputAdapter
-                                                                        ReusedExchange [i_item_sk] #8
+                                                                        Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit]
                                                                   InputAdapter
-                                                                    ReusedExchange [p_promo_sk] #9
-                                  WholeStageCodegen (47)
-                                    HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
-                                      InputAdapter
-                                        Exchange [channel] #16
-                                          WholeStageCodegen (46)
-                                            HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
-                                              HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
-                                                InputAdapter
-                                                  ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #3
-                  WholeStageCodegen (73)
+                                                                    BroadcastExchange #13
+                                                                      WholeStageCodegen (15)
+                                                                        Filter [wr_item_sk,wr_order_number]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss]
+                                                              InputAdapter
+                                                                ReusedExchange [d_date_sk] #5
+                                                          InputAdapter
+                                                            BroadcastExchange #14
+                                                              WholeStageCodegen (17)
+                                                                Filter [web_site_sk]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet default.web_site [web_site_sk,web_site_id]
+                                                      InputAdapter
+                                                        ReusedExchange [i_item_sk] #7
+                                                  InputAdapter
+                                                    ReusedExchange [p_promo_sk] #8
+                  WholeStageCodegen (47)
+                    HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                      InputAdapter
+                        Exchange [channel] #15
+                          WholeStageCodegen (46)
+                            HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                              HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                                InputAdapter
+                                  ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2
+                  WholeStageCodegen (71)
                     HashAggregate [sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),channel,id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
-                        Exchange #17
-                          WholeStageCodegen (72)
+                        Exchange #16
+                          WholeStageCodegen (70)
                             HashAggregate [sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                               HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                                 InputAdapter
-                                  ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #3
+                                  ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/explain.txt
index f61c214640e33..96f13872a2ba2 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/explain.txt
@@ -1,46 +1,42 @@
 == Physical Plan ==
-TakeOrderedAndProject (42)
-+- * Project (41)
-   +- Window (40)
-      +- * Sort (39)
-         +- Exchange (38)
-            +- * HashAggregate (37)
-               +- Exchange (36)
-                  +- * HashAggregate (35)
-                     +- Union (34)
-                        :- * HashAggregate (28)
-                        :  +- Exchange (27)
-                        :     +- * HashAggregate (26)
-                        :        +- Union (25)
-                        :           :- * HashAggregate (19)
-                        :           :  +- Exchange (18)
-                        :           :     +- * HashAggregate (17)
-                        :           :        +- * Project (16)
-                        :           :           +- * BroadcastHashJoin Inner BuildRight (15)
-                        :           :              :- * Project (10)
-                        :           :              :  +- * BroadcastHashJoin Inner BuildRight (9)
-                        :           :              :     :- * Filter (3)
-                        :           :              :     :  +- * ColumnarToRow (2)
-                        :           :              :     :     +- Scan parquet default.web_sales (1)
-                        :           :              :     +- BroadcastExchange (8)
-                        :           :              :        +- * Project (7)
-                        :           :              :           +- * Filter (6)
-                        :           :              :              +- * ColumnarToRow (5)
-                        :           :              :                 +- Scan parquet default.date_dim (4)
-                        :           :              +- BroadcastExchange (14)
-                        :           :                 +- * Filter (13)
-                        :           :                    +- * ColumnarToRow (12)
-                        :           :                       +- Scan parquet default.item (11)
-                        :           +- * HashAggregate (24)
-                        :              +- Exchange (23)
-                        :                 +- * HashAggregate (22)
-                        :                    +- * HashAggregate (21)
-                        :                       +- ReusedExchange (20)
-                        +- * HashAggregate (33)
-                           +- Exchange (32)
-                              +- * HashAggregate (31)
-                                 +- * HashAggregate (30)
-                                    +- ReusedExchange (29)
+TakeOrderedAndProject (38)
++- * Project (37)
+   +- Window (36)
+      +- * Sort (35)
+         +- Exchange (34)
+            +- * HashAggregate (33)
+               +- Exchange (32)
+                  +- * HashAggregate (31)
+                     +- Union (30)
+                        :- * HashAggregate (19)
+                        :  +- Exchange (18)
+                        :     +- * HashAggregate (17)
+                        :        +- * Project (16)
+                        :           +- * BroadcastHashJoin Inner BuildRight (15)
+                        :              :- * Project (10)
+                        :              :  +- * BroadcastHashJoin Inner BuildRight (9)
+                        :              :     :- * Filter (3)
+                        :              :     :  +- * ColumnarToRow (2)
+                        :              :     :     +- Scan parquet default.web_sales (1)
+                        :              :     +- BroadcastExchange (8)
+                        :              :        +- * Project (7)
+                        :              :           +- * Filter (6)
+                        :              :              +- * ColumnarToRow (5)
+                        :              :                 +- Scan parquet default.date_dim (4)
+                        :              +- BroadcastExchange (14)
+                        :                 +- * Filter (13)
+                        :                    +- * ColumnarToRow (12)
+                        :                       +- Scan parquet default.item (11)
+                        :- * HashAggregate (24)
+                        :  +- Exchange (23)
+                        :     +- * HashAggregate (22)
+                        :        +- * HashAggregate (21)
+                        :           +- ReusedExchange (20)
+                        +- * HashAggregate (29)
+                           +- Exchange (28)
+                              +- * HashAggregate (27)
+                                 +- * HashAggregate (26)
+                                    +- ReusedExchange (25)
 
 
 (1) Scan parquet default.web_sales
@@ -124,7 +120,7 @@ Results [3]: [i_category#9, i_class#8, sum#12]
 
 (18) Exchange
 Input [3]: [i_category#9, i_class#8, sum#12]
-Arguments: hashpartitioning(i_category#9, i_class#8, 5), true, [id=#13]
+Arguments: hashpartitioning(i_category#9, i_class#8, 5), ENSURE_REQUIREMENTS, [id=#13]
 
 (19) HashAggregate [codegen id : 4]
 Input [3]: [i_category#9, i_class#8, sum#12]
@@ -152,7 +148,7 @@ Results [3]: [i_category#9, sum#24, isEmpty#25]
 
 (23) Exchange
 Input [3]: [i_category#9, sum#24, isEmpty#25]
-Arguments: hashpartitioning(i_category#9, 5), true, [id=#26]
+Arguments: hashpartitioning(i_category#9, 5), ENSURE_REQUIREMENTS, [id=#26]
 
 (24) HashAggregate [codegen id : 9]
 Input [3]: [i_category#9, sum#24, isEmpty#25]
@@ -161,91 +157,71 @@ Functions [1]: [sum(total_sum#21)]
 Aggregate Attributes [1]: [sum(total_sum#21)#27]
 Results [6]: [sum(total_sum#21)#27 AS total_sum#28, i_category#9, null AS i_class#29, 0 AS g_category#30, 1 AS g_class#31, 1 AS lochierarchy#32]
 
-(25) Union
+(25) ReusedExchange [Reuses operator id: 18]
+Output [3]: [i_category#9, i_class#8, sum#33]
 
-(26) HashAggregate [codegen id : 10]
-Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18]
-Keys [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18]
-Functions: []
-Aggregate Attributes: []
-Results [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18]
-
-(27) Exchange
-Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18]
-Arguments: hashpartitioning(total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18, 5), true, [id=#33]
-
-(28) HashAggregate [codegen id : 11]
-Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18]
-Keys [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18]
-Functions: []
-Aggregate Attributes: []
-Results [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18]
-
-(29) ReusedExchange [Reuses operator id: 18]
-Output [3]: [i_category#9, i_class#8, sum#34]
-
-(30) HashAggregate [codegen id : 15]
-Input [3]: [i_category#9, i_class#8, sum#34]
+(26) HashAggregate [codegen id : 13]
+Input [3]: [i_category#9, i_class#8, sum#33]
 Keys [2]: [i_category#9, i_class#8]
 Functions [1]: [sum(UnscaledValue(ws_net_paid#3))]
-Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#3))#35]
-Results [1]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#3))#35,17,2) AS total_sum#21]
+Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#3))#34]
+Results [1]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#3))#34,17,2) AS total_sum#21]
 
-(31) HashAggregate [codegen id : 15]
+(27) HashAggregate [codegen id : 13]
 Input [1]: [total_sum#21]
 Keys: []
 Functions [1]: [partial_sum(total_sum#21)]
-Aggregate Attributes [2]: [sum#36, isEmpty#37]
-Results [2]: [sum#38, isEmpty#39]
+Aggregate Attributes [2]: [sum#35, isEmpty#36]
+Results [2]: [sum#37, isEmpty#38]
 
-(32) Exchange
-Input [2]: [sum#38, isEmpty#39]
-Arguments: SinglePartition, true, [id=#40]
+(28) Exchange
+Input [2]: [sum#37, isEmpty#38]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#39]
 
-(33) HashAggregate [codegen id : 16]
-Input [2]: [sum#38, isEmpty#39]
+(29) HashAggregate [codegen id : 14]
+Input [2]: [sum#37, isEmpty#38]
 Keys: []
 Functions [1]: [sum(total_sum#21)]
-Aggregate Attributes [1]: [sum(total_sum#21)#41]
-Results [6]: [sum(total_sum#21)#41 AS total_sum#42, null AS i_category#43, null AS i_class#44, 1 AS g_category#45, 1 AS g_class#46, 2 AS lochierarchy#47]
+Aggregate Attributes [1]: [sum(total_sum#21)#40]
+Results [6]: [sum(total_sum#21)#40 AS total_sum#41, null AS i_category#42, null AS i_class#43, 1 AS g_category#44, 1 AS g_class#45, 2 AS lochierarchy#46]
 
-(34) Union
+(30) Union
 
-(35) HashAggregate [codegen id : 17]
+(31) HashAggregate [codegen id : 15]
 Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18]
 Keys [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18]
 Functions: []
 Aggregate Attributes: []
 Results [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18]
 
-(36) Exchange
+(32) Exchange
 Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18]
-Arguments: hashpartitioning(total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18, 5), true, [id=#48]
+Arguments: hashpartitioning(total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18, 5), ENSURE_REQUIREMENTS, [id=#47]
 
-(37) HashAggregate [codegen id : 18]
+(33) HashAggregate [codegen id : 16]
 Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18]
 Keys [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18]
 Functions: []
 Aggregate Attributes: []
-Results [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, CASE WHEN (g_class#17 = 0) THEN i_category#9 END AS _w0#49]
+Results [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, CASE WHEN (g_class#17 = 0) THEN i_category#9 END AS _w0#48]
 
-(38) Exchange
-Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#49]
-Arguments: hashpartitioning(lochierarchy#18, _w0#49, 5), true, [id=#50]
+(34) Exchange
+Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#48]
+Arguments: hashpartitioning(lochierarchy#18, _w0#48, 5), ENSURE_REQUIREMENTS, [id=#49]
 
-(39) Sort [codegen id : 19]
-Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#49]
-Arguments: [lochierarchy#18 ASC NULLS FIRST, _w0#49 ASC NULLS FIRST, total_sum#15 DESC NULLS LAST], false, 0
+(35) Sort [codegen id : 17]
+Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#48]
+Arguments: [lochierarchy#18 ASC NULLS FIRST, _w0#48 ASC NULLS FIRST, total_sum#15 DESC NULLS LAST], false, 0
 
-(40) Window
-Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#49]
-Arguments: [rank(total_sum#15) windowspecdefinition(lochierarchy#18, _w0#49, total_sum#15 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#51], [lochierarchy#18, _w0#49], [total_sum#15 DESC NULLS LAST]
+(36) Window
+Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#48]
+Arguments: [rank(total_sum#15) windowspecdefinition(lochierarchy#18, _w0#48, total_sum#15 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#50], [lochierarchy#18, _w0#48], [total_sum#15 DESC NULLS LAST]
 
-(41) Project [codegen id : 20]
-Output [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#51]
-Input [6]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#49, rank_within_parent#51]
+(37) Project [codegen id : 18]
+Output [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#50]
+Input [6]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#48, rank_within_parent#50]
 
-(42) TakeOrderedAndProject
-Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#51]
-Arguments: 100, [lochierarchy#18 DESC NULLS LAST, CASE WHEN (lochierarchy#18 = 0) THEN i_category#9 END ASC NULLS FIRST, rank_within_parent#51 ASC NULLS FIRST], [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#51]
+(38) TakeOrderedAndProject
+Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#50]
+Arguments: 100, [lochierarchy#18 DESC NULLS LAST, CASE WHEN (lochierarchy#18 = 0) THEN i_category#9 END ASC NULLS FIRST, rank_within_parent#50 ASC NULLS FIRST], [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#50]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/simplified.txt
index 2bd128100f527..d2d6b37e90f71 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/simplified.txt
@@ -1,72 +1,64 @@
 TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_class]
-  WholeStageCodegen (20)
+  WholeStageCodegen (18)
     Project [total_sum,i_category,i_class,lochierarchy,rank_within_parent]
       InputAdapter
         Window [total_sum,lochierarchy,_w0]
-          WholeStageCodegen (19)
+          WholeStageCodegen (17)
             Sort [lochierarchy,_w0,total_sum]
               InputAdapter
                 Exchange [lochierarchy,_w0] #1
-                  WholeStageCodegen (18)
+                  WholeStageCodegen (16)
                     HashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy] [_w0]
                       InputAdapter
                         Exchange [total_sum,i_category,i_class,g_category,g_class,lochierarchy] #2
-                          WholeStageCodegen (17)
+                          WholeStageCodegen (15)
                             HashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy]
                               InputAdapter
                                 Union
-                                  WholeStageCodegen (11)
-                                    HashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy]
+                                  WholeStageCodegen (4)
+                                    HashAggregate [i_category,i_class,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,g_category,g_class,lochierarchy,sum]
                                       InputAdapter
-                                        Exchange [total_sum,i_category,i_class,g_category,g_class,lochierarchy] #3
-                                          WholeStageCodegen (10)
-                                            HashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy]
-                                              InputAdapter
-                                                Union
-                                                  WholeStageCodegen (4)
-                                                    HashAggregate [i_category,i_class,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,g_category,g_class,lochierarchy,sum]
+                                        Exchange [i_category,i_class] #3
+                                          WholeStageCodegen (3)
+                                            HashAggregate [i_category,i_class,ws_net_paid] [sum,sum]
+                                              Project [ws_net_paid,i_class,i_category]
+                                                BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                  Project [ws_item_sk,ws_net_paid]
+                                                    BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                      Filter [ws_sold_date_sk,ws_item_sk]
+                                                        ColumnarToRow
+                                                          InputAdapter
+                                                            Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_net_paid]
                                                       InputAdapter
-                                                        Exchange [i_category,i_class] #4
-                                                          WholeStageCodegen (3)
-                                                            HashAggregate [i_category,i_class,ws_net_paid] [sum,sum]
-                                                              Project [ws_net_paid,i_class,i_category]
-                                                                BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                  Project [ws_item_sk,ws_net_paid]
-                                                                    BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                      Filter [ws_sold_date_sk,ws_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_net_paid]
-                                                                      InputAdapter
-                                                                        BroadcastExchange #5
-                                                                          WholeStageCodegen (1)
-                                                                            Project [d_date_sk]
-                                                                              Filter [d_month_seq,d_date_sk]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                        BroadcastExchange #4
+                                                          WholeStageCodegen (1)
+                                                            Project [d_date_sk]
+                                                              Filter [d_month_seq,d_date_sk]
+                                                                ColumnarToRow
                                                                   InputAdapter
-                                                                    BroadcastExchange #6
-                                                                      WholeStageCodegen (2)
-                                                                        Filter [i_item_sk]
-                                                                          ColumnarToRow
-                                                                            InputAdapter
-                                                                              Scan parquet default.item [i_item_sk,i_class,i_category]
-                                                  WholeStageCodegen (9)
-                                                    HashAggregate [i_category,sum,isEmpty] [sum(total_sum),total_sum,i_class,g_category,g_class,lochierarchy,sum,isEmpty]
-                                                      InputAdapter
-                                                        Exchange [i_category] #7
-                                                          WholeStageCodegen (8)
-                                                            HashAggregate [i_category,total_sum] [sum,isEmpty,sum,isEmpty]
-                                                              HashAggregate [i_category,i_class,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,sum]
-                                                                InputAdapter
-                                                                  ReusedExchange [i_category,i_class,sum] #4
-                                  WholeStageCodegen (16)
+                                                                    Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                  InputAdapter
+                                                    BroadcastExchange #5
+                                                      WholeStageCodegen (2)
+                                                        Filter [i_item_sk]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet default.item [i_item_sk,i_class,i_category]
+                                  WholeStageCodegen (9)
+                                    HashAggregate [i_category,sum,isEmpty] [sum(total_sum),total_sum,i_class,g_category,g_class,lochierarchy,sum,isEmpty]
+                                      InputAdapter
+                                        Exchange [i_category] #6
+                                          WholeStageCodegen (8)
+                                            HashAggregate [i_category,total_sum] [sum,isEmpty,sum,isEmpty]
+                                              HashAggregate [i_category,i_class,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,sum]
+                                                InputAdapter
+                                                  ReusedExchange [i_category,i_class,sum] #3
+                                  WholeStageCodegen (14)
                                     HashAggregate [sum,isEmpty] [sum(total_sum),total_sum,i_category,i_class,g_category,g_class,lochierarchy,sum,isEmpty]
                                       InputAdapter
-                                        Exchange #8
-                                          WholeStageCodegen (15)
+                                        Exchange #7
+                                          WholeStageCodegen (13)
                                             HashAggregate [total_sum] [sum,isEmpty,sum,isEmpty]
                                               HashAggregate [i_category,i_class,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,sum]
                                                 InputAdapter
-                                                  ReusedExchange [i_category,i_class,sum] #4
+                                                  ReusedExchange [i_category,i_class,sum] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/explain.txt
index f61c214640e33..96f13872a2ba2 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/explain.txt
@@ -1,46 +1,42 @@
 == Physical Plan ==
-TakeOrderedAndProject (42)
-+- * Project (41)
-   +- Window (40)
-      +- * Sort (39)
-         +- Exchange (38)
-            +- * HashAggregate (37)
-               +- Exchange (36)
-                  +- * HashAggregate (35)
-                     +- Union (34)
-                        :- * HashAggregate (28)
-                        :  +- Exchange (27)
-                        :     +- * HashAggregate (26)
-                        :        +- Union (25)
-                        :           :- * HashAggregate (19)
-                        :           :  +- Exchange (18)
-                        :           :     +- * HashAggregate (17)
-                        :           :        +- * Project (16)
-                        :           :           +- * BroadcastHashJoin Inner BuildRight (15)
-                        :           :              :- * Project (10)
-                        :           :              :  +- * BroadcastHashJoin Inner BuildRight (9)
-                        :           :              :     :- * Filter (3)
-                        :           :              :     :  +- * ColumnarToRow (2)
-                        :           :              :     :     +- Scan parquet default.web_sales (1)
-                        :           :              :     +- BroadcastExchange (8)
-                        :           :              :        +- * Project (7)
-                        :           :              :           +- * Filter (6)
-                        :           :              :              +- * ColumnarToRow (5)
-                        :           :              :                 +- Scan parquet default.date_dim (4)
-                        :           :              +- BroadcastExchange (14)
-                        :           :                 +- * Filter (13)
-                        :           :                    +- * ColumnarToRow (12)
-                        :           :                       +- Scan parquet default.item (11)
-                        :           +- * HashAggregate (24)
-                        :              +- Exchange (23)
-                        :                 +- * HashAggregate (22)
-                        :                    +- * HashAggregate (21)
-                        :                       +- ReusedExchange (20)
-                        +- * HashAggregate (33)
-                           +- Exchange (32)
-                              +- * HashAggregate (31)
-                                 +- * HashAggregate (30)
-                                    +- ReusedExchange (29)
+TakeOrderedAndProject (38)
++- * Project (37)
+   +- Window (36)
+      +- * Sort (35)
+         +- Exchange (34)
+            +- * HashAggregate (33)
+               +- Exchange (32)
+                  +- * HashAggregate (31)
+                     +- Union (30)
+                        :- * HashAggregate (19)
+                        :  +- Exchange (18)
+                        :     +- * HashAggregate (17)
+                        :        +- * Project (16)
+                        :           +- * BroadcastHashJoin Inner BuildRight (15)
+                        :              :- * Project (10)
+                        :              :  +- * BroadcastHashJoin Inner BuildRight (9)
+                        :              :     :- * Filter (3)
+                        :              :     :  +- * ColumnarToRow (2)
+                        :              :     :     +- Scan parquet default.web_sales (1)
+                        :              :     +- BroadcastExchange (8)
+                        :              :        +- * Project (7)
+                        :              :           +- * Filter (6)
+                        :              :              +- * ColumnarToRow (5)
+                        :              :                 +- Scan parquet default.date_dim (4)
+                        :              +- BroadcastExchange (14)
+                        :                 +- * Filter (13)
+                        :                    +- * ColumnarToRow (12)
+                        :                       +- Scan parquet default.item (11)
+                        :- * HashAggregate (24)
+                        :  +- Exchange (23)
+                        :     +- * HashAggregate (22)
+                        :        +- * HashAggregate (21)
+                        :           +- ReusedExchange (20)
+                        +- * HashAggregate (29)
+                           +- Exchange (28)
+                              +- * HashAggregate (27)
+                                 +- * HashAggregate (26)
+                                    +- ReusedExchange (25)
 
 
 (1) Scan parquet default.web_sales
@@ -124,7 +120,7 @@ Results [3]: [i_category#9, i_class#8, sum#12]
 
 (18) Exchange
 Input [3]: [i_category#9, i_class#8, sum#12]
-Arguments: hashpartitioning(i_category#9, i_class#8, 5), true, [id=#13]
+Arguments: hashpartitioning(i_category#9, i_class#8, 5), ENSURE_REQUIREMENTS, [id=#13]
 
 (19) HashAggregate [codegen id : 4]
 Input [3]: [i_category#9, i_class#8, sum#12]
@@ -152,7 +148,7 @@ Results [3]: [i_category#9, sum#24, isEmpty#25]
 
 (23) Exchange
 Input [3]: [i_category#9, sum#24, isEmpty#25]
-Arguments: hashpartitioning(i_category#9, 5), true, [id=#26]
+Arguments: hashpartitioning(i_category#9, 5), ENSURE_REQUIREMENTS, [id=#26]
 
 (24) HashAggregate [codegen id : 9]
 Input [3]: [i_category#9, sum#24, isEmpty#25]
@@ -161,91 +157,71 @@ Functions [1]: [sum(total_sum#21)]
 Aggregate Attributes [1]: [sum(total_sum#21)#27]
 Results [6]: [sum(total_sum#21)#27 AS total_sum#28, i_category#9, null AS i_class#29, 0 AS g_category#30, 1 AS g_class#31, 1 AS lochierarchy#32]
 
-(25) Union
+(25) ReusedExchange [Reuses operator id: 18]
+Output [3]: [i_category#9, i_class#8, sum#33]
 
-(26) HashAggregate [codegen id : 10]
-Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18]
-Keys [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18]
-Functions: []
-Aggregate Attributes: []
-Results [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18]
-
-(27) Exchange
-Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18]
-Arguments: hashpartitioning(total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18, 5), true, [id=#33]
-
-(28) HashAggregate [codegen id : 11]
-Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18]
-Keys [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18]
-Functions: []
-Aggregate Attributes: []
-Results [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18]
-
-(29) ReusedExchange [Reuses operator id: 18]
-Output [3]: [i_category#9, i_class#8, sum#34]
-
-(30) HashAggregate [codegen id : 15]
-Input [3]: [i_category#9, i_class#8, sum#34]
+(26) HashAggregate [codegen id : 13]
+Input [3]: [i_category#9, i_class#8, sum#33]
 Keys [2]: [i_category#9, i_class#8]
 Functions [1]: [sum(UnscaledValue(ws_net_paid#3))]
-Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#3))#35]
-Results [1]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#3))#35,17,2) AS total_sum#21]
+Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#3))#34]
+Results [1]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#3))#34,17,2) AS total_sum#21]
 
-(31) HashAggregate [codegen id : 15]
+(27) HashAggregate [codegen id : 13]
 Input [1]: [total_sum#21]
 Keys: []
 Functions [1]: [partial_sum(total_sum#21)]
-Aggregate Attributes [2]: [sum#36, isEmpty#37]
-Results [2]: [sum#38, isEmpty#39]
+Aggregate Attributes [2]: [sum#35, isEmpty#36]
+Results [2]: [sum#37, isEmpty#38]
 
-(32) Exchange
-Input [2]: [sum#38, isEmpty#39]
-Arguments: SinglePartition, true, [id=#40]
+(28) Exchange
+Input [2]: [sum#37, isEmpty#38]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#39]
 
-(33) HashAggregate [codegen id : 16]
-Input [2]: [sum#38, isEmpty#39]
+(29) HashAggregate [codegen id : 14]
+Input [2]: [sum#37, isEmpty#38]
 Keys: []
 Functions [1]: [sum(total_sum#21)]
-Aggregate Attributes [1]: [sum(total_sum#21)#41]
-Results [6]: [sum(total_sum#21)#41 AS total_sum#42, null AS i_category#43, null AS i_class#44, 1 AS g_category#45, 1 AS g_class#46, 2 AS lochierarchy#47]
+Aggregate Attributes [1]: [sum(total_sum#21)#40]
+Results [6]: [sum(total_sum#21)#40 AS total_sum#41, null AS i_category#42, null AS i_class#43, 1 AS g_category#44, 1 AS g_class#45, 2 AS lochierarchy#46]
 
-(34) Union
+(30) Union
 
-(35) HashAggregate [codegen id : 17]
+(31) HashAggregate [codegen id : 15]
 Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18]
 Keys [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18]
 Functions: []
 Aggregate Attributes: []
 Results [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18]
 
-(36) Exchange
+(32) Exchange
 Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18]
-Arguments: hashpartitioning(total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18, 5), true, [id=#48]
+Arguments: hashpartitioning(total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18, 5), ENSURE_REQUIREMENTS, [id=#47]
 
-(37) HashAggregate [codegen id : 18]
+(33) HashAggregate [codegen id : 16]
 Input [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18]
 Keys [6]: [total_sum#15, i_category#9, i_class#8, g_category#16, g_class#17, lochierarchy#18]
 Functions: []
 Aggregate Attributes: []
-Results [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, CASE WHEN (g_class#17 = 0) THEN i_category#9 END AS _w0#49]
+Results [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, CASE WHEN (g_class#17 = 0) THEN i_category#9 END AS _w0#48]
 
-(38) Exchange
-Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#49]
-Arguments: hashpartitioning(lochierarchy#18, _w0#49, 5), true, [id=#50]
+(34) Exchange
+Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#48]
+Arguments: hashpartitioning(lochierarchy#18, _w0#48, 5), ENSURE_REQUIREMENTS, [id=#49]
 
-(39) Sort [codegen id : 19]
-Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#49]
-Arguments: [lochierarchy#18 ASC NULLS FIRST, _w0#49 ASC NULLS FIRST, total_sum#15 DESC NULLS LAST], false, 0
+(35) Sort [codegen id : 17]
+Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#48]
+Arguments: [lochierarchy#18 ASC NULLS FIRST, _w0#48 ASC NULLS FIRST, total_sum#15 DESC NULLS LAST], false, 0
 
-(40) Window
-Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#49]
-Arguments: [rank(total_sum#15) windowspecdefinition(lochierarchy#18, _w0#49, total_sum#15 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#51], [lochierarchy#18, _w0#49], [total_sum#15 DESC NULLS LAST]
+(36) Window
+Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#48]
+Arguments: [rank(total_sum#15) windowspecdefinition(lochierarchy#18, _w0#48, total_sum#15 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#50], [lochierarchy#18, _w0#48], [total_sum#15 DESC NULLS LAST]
 
-(41) Project [codegen id : 20]
-Output [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#51]
-Input [6]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#49, rank_within_parent#51]
+(37) Project [codegen id : 18]
+Output [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#50]
+Input [6]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, _w0#48, rank_within_parent#50]
 
-(42) TakeOrderedAndProject
-Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#51]
-Arguments: 100, [lochierarchy#18 DESC NULLS LAST, CASE WHEN (lochierarchy#18 = 0) THEN i_category#9 END ASC NULLS FIRST, rank_within_parent#51 ASC NULLS FIRST], [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#51]
+(38) TakeOrderedAndProject
+Input [5]: [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#50]
+Arguments: 100, [lochierarchy#18 DESC NULLS LAST, CASE WHEN (lochierarchy#18 = 0) THEN i_category#9 END ASC NULLS FIRST, rank_within_parent#50 ASC NULLS FIRST], [total_sum#15, i_category#9, i_class#8, lochierarchy#18, rank_within_parent#50]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/simplified.txt
index 2bd128100f527..d2d6b37e90f71 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/simplified.txt
@@ -1,72 +1,64 @@
 TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_class]
-  WholeStageCodegen (20)
+  WholeStageCodegen (18)
     Project [total_sum,i_category,i_class,lochierarchy,rank_within_parent]
       InputAdapter
         Window [total_sum,lochierarchy,_w0]
-          WholeStageCodegen (19)
+          WholeStageCodegen (17)
             Sort [lochierarchy,_w0,total_sum]
               InputAdapter
                 Exchange [lochierarchy,_w0] #1
-                  WholeStageCodegen (18)
+                  WholeStageCodegen (16)
                     HashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy] [_w0]
                       InputAdapter
                         Exchange [total_sum,i_category,i_class,g_category,g_class,lochierarchy] #2
-                          WholeStageCodegen (17)
+                          WholeStageCodegen (15)
                             HashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy]
                               InputAdapter
                                 Union
-                                  WholeStageCodegen (11)
-                                    HashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy]
+                                  WholeStageCodegen (4)
+                                    HashAggregate [i_category,i_class,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,g_category,g_class,lochierarchy,sum]
                                       InputAdapter
-                                        Exchange [total_sum,i_category,i_class,g_category,g_class,lochierarchy] #3
-                                          WholeStageCodegen (10)
-                                            HashAggregate [total_sum,i_category,i_class,g_category,g_class,lochierarchy]
-                                              InputAdapter
-                                                Union
-                                                  WholeStageCodegen (4)
-                                                    HashAggregate [i_category,i_class,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,g_category,g_class,lochierarchy,sum]
+                                        Exchange [i_category,i_class] #3
+                                          WholeStageCodegen (3)
+                                            HashAggregate [i_category,i_class,ws_net_paid] [sum,sum]
+                                              Project [ws_net_paid,i_class,i_category]
+                                                BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                  Project [ws_item_sk,ws_net_paid]
+                                                    BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                      Filter [ws_sold_date_sk,ws_item_sk]
+                                                        ColumnarToRow
+                                                          InputAdapter
+                                                            Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_net_paid]
                                                       InputAdapter
-                                                        Exchange [i_category,i_class] #4
-                                                          WholeStageCodegen (3)
-                                                            HashAggregate [i_category,i_class,ws_net_paid] [sum,sum]
-                                                              Project [ws_net_paid,i_class,i_category]
-                                                                BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                  Project [ws_item_sk,ws_net_paid]
-                                                                    BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                      Filter [ws_sold_date_sk,ws_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_net_paid]
-                                                                      InputAdapter
-                                                                        BroadcastExchange #5
-                                                                          WholeStageCodegen (1)
-                                                                            Project [d_date_sk]
-                                                                              Filter [d_month_seq,d_date_sk]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                        BroadcastExchange #4
+                                                          WholeStageCodegen (1)
+                                                            Project [d_date_sk]
+                                                              Filter [d_month_seq,d_date_sk]
+                                                                ColumnarToRow
                                                                   InputAdapter
-                                                                    BroadcastExchange #6
-                                                                      WholeStageCodegen (2)
-                                                                        Filter [i_item_sk]
-                                                                          ColumnarToRow
-                                                                            InputAdapter
-                                                                              Scan parquet default.item [i_item_sk,i_class,i_category]
-                                                  WholeStageCodegen (9)
-                                                    HashAggregate [i_category,sum,isEmpty] [sum(total_sum),total_sum,i_class,g_category,g_class,lochierarchy,sum,isEmpty]
-                                                      InputAdapter
-                                                        Exchange [i_category] #7
-                                                          WholeStageCodegen (8)
-                                                            HashAggregate [i_category,total_sum] [sum,isEmpty,sum,isEmpty]
-                                                              HashAggregate [i_category,i_class,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,sum]
-                                                                InputAdapter
-                                                                  ReusedExchange [i_category,i_class,sum] #4
-                                  WholeStageCodegen (16)
+                                                                    Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                  InputAdapter
+                                                    BroadcastExchange #5
+                                                      WholeStageCodegen (2)
+                                                        Filter [i_item_sk]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet default.item [i_item_sk,i_class,i_category]
+                                  WholeStageCodegen (9)
+                                    HashAggregate [i_category,sum,isEmpty] [sum(total_sum),total_sum,i_class,g_category,g_class,lochierarchy,sum,isEmpty]
+                                      InputAdapter
+                                        Exchange [i_category] #6
+                                          WholeStageCodegen (8)
+                                            HashAggregate [i_category,total_sum] [sum,isEmpty,sum,isEmpty]
+                                              HashAggregate [i_category,i_class,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,sum]
+                                                InputAdapter
+                                                  ReusedExchange [i_category,i_class,sum] #3
+                                  WholeStageCodegen (14)
                                     HashAggregate [sum,isEmpty] [sum(total_sum),total_sum,i_category,i_class,g_category,g_class,lochierarchy,sum,isEmpty]
                                       InputAdapter
-                                        Exchange #8
-                                          WholeStageCodegen (15)
+                                        Exchange #7
+                                          WholeStageCodegen (13)
                                             HashAggregate [total_sum] [sum,isEmpty,sum,isEmpty]
                                               HashAggregate [i_category,i_class,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,sum]
                                                 InputAdapter
-                                                  ReusedExchange [i_category,i_class,sum] #4
+                                                  ReusedExchange [i_category,i_class,sum] #3
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 0ba58e1634f06..44f3c3449ddda 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.{Complete, Partial}
 import org.apache.spark.sql.catalyst.optimizer.{ConvertToLocalRelation, NestedColumnAliasingSuite}
 import org.apache.spark.sql.catalyst.plans.logical.{Project, RepartitionByExpression}
 import org.apache.spark.sql.catalyst.util.StringUtils
+import org.apache.spark.sql.execution.UnionExec
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec}
 import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
@@ -3825,6 +3826,23 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       System.clearProperty("ivy.home")
     }
   }
+
+  test("SPARK-33964: Combine distinct unions that have noop project between them") {
+    val df = sql("""
+      |SELECT a, b FROM (
+      |  SELECT a, b FROM testData2
+      |  UNION
+      |  SELECT a, sum(b) FROM testData2 GROUP BY a
+      |  UNION
+      |  SELECT null AS a, sum(b) FROM testData2
+      |)""".stripMargin)
+
+    val unions = df.queryExecution.sparkPlan.collect {
+      case u: UnionExec => u
+    }
+
+    assert(unions.size == 1)
+  }
 }
 
 case class Foo(bar: Option[String])

From 976e97a80de66d520167f58bdc9082e4bbbc9639 Mon Sep 17 00:00:00 2001
From: Chongguang LIU <chongguang.liu@laposte.fr>
Date: Tue, 5 Jan 2021 05:20:16 +0000
Subject: [PATCH 0955/1009] [SPARK-33794][SQL] NextDay expression throw runtime
 IllegalArgumentException when receiving invalid input under ANSI mode

### What changes were proposed in this pull request?

Instead of returning NULL, the next_day function throws runtime IllegalArgumentException when ansiMode is enable and receiving invalid input of the dayOfWeek parameter.

### Why are the changes needed?

For ansiMode.

### Does this PR introduce _any_ user-facing change?

Yes.
When spark.sql.ansi.enabled = true, the next_day function will throw IllegalArgumentException when receiving invalid input of the dayOfWeek parameter.
When spark.sql.ansi.enabled = false, same behaviour as before.

### How was this patch tested?

Ansi mode is tested with existing tests.
End-to-end tests have been added.

Closes #30807 from chongguang/SPARK-33794.

Authored-by: Chongguang LIU <chongguang.liu@laposte.fr>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-ref-ansi-compliance.md               |  1 +
 .../expressions/datetimeExpressions.scala     | 72 +++++++++++++------
 .../sql/catalyst/util/DateTimeUtils.scala     |  6 +-
 .../expressions/DateExpressionsSuite.scala    | 34 +++++++--
 .../catalyst/util/DateTimeUtilsSuite.scala    |  7 ++
 .../resources/sql-tests/inputs/datetime.sql   |  7 ++
 .../sql-tests/results/ansi/datetime.sql.out   | 44 +++++++++++-
 .../sql-tests/results/datetime-legacy.sql.out | 42 ++++++++++-
 .../sql-tests/results/datetime.sql.out        | 42 ++++++++++-
 9 files changed, 220 insertions(+), 35 deletions(-)

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index 16059a5a08e9a..22f4cf78f5223 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -156,6 +156,7 @@ The behavior of some SQL functions can be different under ANSI mode (`spark.sql.
   - `make_date`: This function should fail with an exception if the result date is invalid.
   - `make_timestamp`: This function should fail with an exception if the result timestamp is invalid.
   - `make_interval`:  This function should fail with an exception if the result interval is invalid.
+  - `next_day`: This function throws `IllegalArgumentException` if input is not a valid day of week.
 
 ### SQL Operators
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 99f80e9078aae..c9a9ac38559ea 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -1162,7 +1162,12 @@ case class LastDay(startDate: Expression)
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(start_date, day_of_week) - Returns the first date which is later than `start_date` and named as indicated.",
+  usage =
+    """_FUNC_(start_date, day_of_week) - Returns the first date which is later than `start_date` and named as indicated.
+      The function returns NULL if at least one of the input parameters is NULL.
+      When both of the input parameters are not NULL and day_of_week is an invalid input,
+      the function throws IllegalArgumentException if `spark.sql.ansi.enabled` is set to true, otherwise NULL.
+      """,
   examples = """
     Examples:
       > SELECT _FUNC_('2015-01-14', 'TU');
@@ -1171,52 +1176,73 @@ case class LastDay(startDate: Expression)
   group = "datetime_funcs",
   since = "1.5.0")
 // scalastyle:on line.size.limit
-case class NextDay(startDate: Expression, dayOfWeek: Expression)
+case class NextDay(
+    startDate: Expression,
+    dayOfWeek: Expression,
+    failOnError: Boolean = SQLConf.get.ansiEnabled)
   extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
   override def left: Expression = startDate
   override def right: Expression = dayOfWeek
 
+  def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled)
+
   override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType)
 
   override def dataType: DataType = DateType
   override def nullable: Boolean = true
 
   override def nullSafeEval(start: Any, dayOfW: Any): Any = {
-    val dow = DateTimeUtils.getDayOfWeekFromString(dayOfW.asInstanceOf[UTF8String])
-    if (dow == -1) {
-      null
-    } else {
+    try {
+      val dow = DateTimeUtils.getDayOfWeekFromString(dayOfW.asInstanceOf[UTF8String])
       val sd = start.asInstanceOf[Int]
       DateTimeUtils.getNextDateForDayOfWeek(sd, dow)
+    } catch {
+      case _: IllegalArgumentException if !failOnError => null
+    }
+  }
+
+  private def dateTimeUtilClass: String = DateTimeUtils.getClass.getName.stripSuffix("$")
+
+  private def nextDayGenCode(
+      ev: ExprCode,
+      dayOfWeekTerm: String,
+      sd: String,
+      dowS: String): String = {
+    if (failOnError) {
+      s"""
+       |int $dayOfWeekTerm = $dateTimeUtilClass.getDayOfWeekFromString($dowS);
+       |${ev.value} = $dateTimeUtilClass.getNextDateForDayOfWeek($sd, $dayOfWeekTerm);
+       |""".stripMargin
+    } else {
+      s"""
+       |try {
+       |  int $dayOfWeekTerm = $dateTimeUtilClass.getDayOfWeekFromString($dowS);
+       |  ${ev.value} = $dateTimeUtilClass.getNextDateForDayOfWeek($sd, $dayOfWeekTerm);
+       |} catch (IllegalArgumentException e) {
+       |  ${ev.isNull} = true;
+       |}
+       |""".stripMargin
     }
   }
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     nullSafeCodeGen(ctx, ev, (sd, dowS) => {
-      val dateTimeUtilClass = DateTimeUtils.getClass.getName.stripSuffix("$")
       val dayOfWeekTerm = ctx.freshName("dayOfWeek")
       if (dayOfWeek.foldable) {
         val input = dayOfWeek.eval().asInstanceOf[UTF8String]
-        if ((input eq null) || DateTimeUtils.getDayOfWeekFromString(input) == -1) {
-          s"""
-             |${ev.isNull} = true;
-           """.stripMargin
+        if (input eq null) {
+          s"""${ev.isNull} = true;"""
         } else {
-          val dayOfWeekValue = DateTimeUtils.getDayOfWeekFromString(input)
-          s"""
-             |${ev.value} = $dateTimeUtilClass.getNextDateForDayOfWeek($sd, $dayOfWeekValue);
-           """.stripMargin
+          try {
+            val dayOfWeekValue = DateTimeUtils.getDayOfWeekFromString(input)
+            s"${ev.value} = $dateTimeUtilClass.getNextDateForDayOfWeek($sd, $dayOfWeekValue);"
+          } catch {
+            case _: IllegalArgumentException => nextDayGenCode(ev, dayOfWeekTerm, sd, dowS)
+          }
         }
       } else {
-        s"""
-           |int $dayOfWeekTerm = $dateTimeUtilClass.getDayOfWeekFromString($dowS);
-           |if ($dayOfWeekTerm == -1) {
-           |  ${ev.isNull} = true;
-           |} else {
-           |  ${ev.value} = $dateTimeUtilClass.getNextDateForDayOfWeek($sd, $dayOfWeekTerm);
-           |}
-         """.stripMargin
+        nextDayGenCode(ev, dayOfWeekTerm, sd, dowS)
       }
     })
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 780d2bad1bab2..b4f12db439f7f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -670,9 +670,10 @@ object DateTimeUtils {
   private val FRIDAY = 1
   private val SATURDAY = 2
 
-  /*
+  /**
    * Returns day of week from String. Starting from Thursday, marked as 0.
    * (Because 1970-01-01 is Thursday).
+   * @throws IllegalArgumentException if the input is not a valid day of week.
    */
   def getDayOfWeekFromString(string: UTF8String): Int = {
     val dowString = string.toString.toUpperCase(Locale.ROOT)
@@ -684,7 +685,8 @@ object DateTimeUtils {
       case "TH" | "THU" | "THURSDAY" => THURSDAY
       case "FR" | "FRI" | "FRIDAY" => FRIDAY
       case "SA" | "SAT" | "SATURDAY" => SATURDAY
-      case _ => -1
+      case _ =>
+        throw new IllegalArgumentException(s"""Illegal input for day of week: $string""")
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 79770505ec35d..1af8fe882847c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -640,13 +640,33 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     testNextDay("2015-07-23", "Fri", "2015-07-24")
     testNextDay("2015-07-23", "fr", "2015-07-24")
 
-    checkEvaluation(NextDay(Literal(Date.valueOf("2015-07-23")), Literal("xx")), null)
-    checkEvaluation(NextDay(Literal.create(null, DateType), Literal("xx")), null)
-    checkEvaluation(
-      NextDay(Literal(Date.valueOf("2015-07-23")), Literal.create(null, StringType)), null)
-    // Test escaping of dayOfWeek
-    GenerateUnsafeProjection.generate(
-      NextDay(Literal(Date.valueOf("2015-07-23")), Literal("\"quote")) :: Nil)
+    Seq(true, false).foreach { ansiEnabled =>
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiEnabled.toString) {
+        var expr: Expression = NextDay(Literal(Date.valueOf("2015-07-23")), Literal("xx"))
+        if (ansiEnabled) {
+          val errMsg = "Illegal input for day of week: xx"
+          checkExceptionInExpression[Exception](expr, errMsg)
+        } else {
+          checkEvaluation(expr, null)
+        }
+
+        expr = NextDay(Literal.create(null, DateType), Literal("xx"))
+        checkEvaluation(expr, null)
+
+        expr = NextDay(Literal(Date.valueOf("2015-07-23")), Literal.create(null, StringType))
+        checkEvaluation(expr, null)
+
+        // Test escaping of dayOfWeek
+        expr = NextDay(Literal(Date.valueOf("2015-07-23")), Literal("\"quote"))
+        GenerateUnsafeProjection.generate(expr :: Nil)
+        if (ansiEnabled) {
+          val errMsg = """Illegal input for day of week: "quote"""
+          checkExceptionInExpression[Exception](expr, errMsg)
+        } else {
+          checkEvaluation(expr, null)
+        }
+      }
+    }
   }
 
   private def testTruncDate(input: Date, fmt: String, expected: Date): Unit = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 3d841f32379ff..b9b55da5a2080 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -675,4 +675,11 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
       assert(toDate("tomorrow CET ", zoneId).get === today + 1)
     }
   }
+
+  test("parsing day of week") {
+    assert(getDayOfWeekFromString(UTF8String.fromString("THU")) == 0)
+    assert(getDayOfWeekFromString(UTF8String.fromString("MONDAY")) == 4)
+    intercept[IllegalArgumentException](getDayOfWeekFromString(UTF8String.fromString("xx")))
+    intercept[IllegalArgumentException](getDayOfWeekFromString(UTF8String.fromString("\"quote")))
+  }
 }
diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
index acfd1f50e14c9..0493d8653c01f 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql
@@ -172,3 +172,10 @@ select to_unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS");
 select to_unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS");
 select cast("Unparseable" as timestamp);
 select cast("Unparseable" as date);
+
+-- next_day
+select next_day("2015-07-23", "Mon");
+select next_day("2015-07-23", "xx");
+select next_day("xx", "Mon");
+select next_day(null, "Mon");
+select next_day(null, "xx");
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
index 3e307a92c10f0..9a0c8ff02c5bb 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 123
+-- Number of queries: 128
 
 
 -- !query
@@ -1069,3 +1069,45 @@ struct<>
 -- !query output
 java.time.DateTimeException
 Cannot cast Unparseable to DateType.
+
+
+-- !query
+select next_day("2015-07-23", "Mon")
+-- !query schema
+struct<next_day(CAST(2015-07-23 AS DATE), Mon):date>
+-- !query output
+2015-07-27
+
+
+-- !query
+select next_day("2015-07-23", "xx")
+-- !query schema
+struct<>
+-- !query output
+java.lang.IllegalArgumentException
+Illegal input for day of week: xx
+
+
+-- !query
+select next_day("xx", "Mon")
+-- !query schema
+struct<>
+-- !query output
+java.time.DateTimeException
+Cannot cast xx to DateType.
+
+
+-- !query
+select next_day(null, "Mon")
+-- !query schema
+struct<next_day(CAST(NULL AS DATE), Mon):date>
+-- !query output
+NULL
+
+
+-- !query
+select next_day(null, "xx")
+-- !query schema
+struct<next_day(CAST(NULL AS DATE), xx):date>
+-- !query output
+NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
index ed54b72111ed5..d93843b231804 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 123
+-- Number of queries: 128
 
 
 -- !query
@@ -1021,3 +1021,43 @@ select cast("Unparseable" as date)
 struct<CAST(Unparseable AS DATE):date>
 -- !query output
 NULL
+
+
+-- !query
+select next_day("2015-07-23", "Mon")
+-- !query schema
+struct<next_day(CAST(2015-07-23 AS DATE), Mon):date>
+-- !query output
+2015-07-27
+
+
+-- !query
+select next_day("2015-07-23", "xx")
+-- !query schema
+struct<next_day(CAST(2015-07-23 AS DATE), xx):date>
+-- !query output
+NULL
+
+
+-- !query
+select next_day("xx", "Mon")
+-- !query schema
+struct<next_day(CAST(xx AS DATE), Mon):date>
+-- !query output
+NULL
+
+
+-- !query
+select next_day(null, "Mon")
+-- !query schema
+struct<next_day(CAST(NULL AS DATE), Mon):date>
+-- !query output
+NULL
+
+
+-- !query
+select next_day(null, "xx")
+-- !query schema
+struct<next_day(CAST(NULL AS DATE), xx):date>
+-- !query output
+NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
index 213895dcb4bcb..b07b68ce2600d 100755
--- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 123
+-- Number of queries: 128
 
 
 -- !query
@@ -1029,3 +1029,43 @@ select cast("Unparseable" as date)
 struct<CAST(Unparseable AS DATE):date>
 -- !query output
 NULL
+
+
+-- !query
+select next_day("2015-07-23", "Mon")
+-- !query schema
+struct<next_day(CAST(2015-07-23 AS DATE), Mon):date>
+-- !query output
+2015-07-27
+
+
+-- !query
+select next_day("2015-07-23", "xx")
+-- !query schema
+struct<next_day(CAST(2015-07-23 AS DATE), xx):date>
+-- !query output
+NULL
+
+
+-- !query
+select next_day("xx", "Mon")
+-- !query schema
+struct<next_day(CAST(xx AS DATE), Mon):date>
+-- !query output
+NULL
+
+
+-- !query
+select next_day(null, "Mon")
+-- !query schema
+struct<next_day(CAST(NULL AS DATE), Mon):date>
+-- !query output
+NULL
+
+
+-- !query
+select next_day(null, "xx")
+-- !query schema
+struct<next_day(CAST(NULL AS DATE), xx):date>
+-- !query output
+NULL

From 6b00fdc756e85ce7affded605d6d8e0a5308c1ed Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Tue, 5 Jan 2021 05:32:36 +0000
Subject: [PATCH 0956/1009] [SPARK-33998][SQL] Provide an API to create an
 InternalRow in V2CommandExec

### What changes were proposed in this pull request?

There are many v2 commands such as `SHOW TABLES`, `DESCRIBE TABLE`, etc. that require creating `InternalRow`s. Currently, the code to create `InternalRow`s are duplicated across many commands and it can be moved into `V2CommandExec` to remove duplicate code.

### Why are the changes needed?

To clean up duplicate code.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing test since this is just refactoring.

Closes #31020 from imback82/refactor_v2_command.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../datasources/v2/DescribeColumnExec.scala         | 11 +----------
 .../datasources/v2/DescribeNamespaceExec.scala      | 12 +-----------
 .../datasources/v2/DescribeTableExec.scala          | 13 +------------
 .../datasources/v2/ShowCurrentNamespaceExec.scala   | 10 ++--------
 .../datasources/v2/ShowNamespacesExec.scala         |  7 ++-----
 .../datasources/v2/ShowTablePropertiesExec.scala    | 10 ++++------
 .../execution/datasources/v2/ShowTablesExec.scala   |  9 ++-------
 .../execution/datasources/v2/V2CommandExec.scala    | 11 ++++++++++-
 8 files changed, 23 insertions(+), 60 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala
index c7ce69f744cce..ab8c5617aa36b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala
@@ -20,17 +20,12 @@ package org.apache.spark.sql.execution.datasources.v2
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.encoders.RowEncoder
-import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema}
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.catalyst.expressions.Attribute
 
 case class DescribeColumnExec(
     override val output: Seq[Attribute],
     column: Attribute,
     isExtended: Boolean) extends V2CommandExec {
-  private val toRow = {
-    RowEncoder(StructType.fromAttributes(output)).resolveAndBind().createSerializer()
-  }
 
   override protected def run(): Seq[InternalRow] = {
     val rows = new ArrayBuffer[InternalRow]()
@@ -49,8 +44,4 @@ case class DescribeColumnExec(
 
     rows.toSeq
   }
-
-  private def toCatalystRow(strs: String*): InternalRow = {
-    toRow(new GenericRowWithSchema(strs.toArray, schema)).copy()
-  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeNamespaceExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeNamespaceExec.scala
index e273abf90e3bc..2da96b769a41a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeNamespaceExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeNamespaceExec.scala
@@ -21,10 +21,8 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.encoders.RowEncoder
-import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema}
+import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsNamespaces}
-import org.apache.spark.sql.types.StructType
 
 /**
  * Physical plan node for describing a namespace.
@@ -34,10 +32,6 @@ case class DescribeNamespaceExec(
     catalog: SupportsNamespaces,
     namespace: Seq[String],
     isExtended: Boolean) extends V2CommandExec {
-  private val toRow = {
-    RowEncoder(StructType.fromAttributes(output)).resolveAndBind().createSerializer()
-  }
-
   override protected def run(): Seq[InternalRow] = {
     val rows = new ArrayBuffer[InternalRow]()
     val ns = namespace.toArray
@@ -57,8 +51,4 @@ case class DescribeNamespaceExec(
     }
     rows.toSeq
   }
-
-  private def toCatalystRow(strs: String*): InternalRow = {
-    toRow(new GenericRowWithSchema(strs.toArray, schema)).copy()
-  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
index 0ca442baeea2f..769d76a9b1c2c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
@@ -21,20 +21,13 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.encoders.RowEncoder
-import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema}
+import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, Table}
-import org.apache.spark.sql.types.StructType
 
 case class DescribeTableExec(
     output: Seq[Attribute],
     table: Table,
     isExtended: Boolean) extends V2CommandExec {
-
-  private val toRow = {
-    RowEncoder(StructType.fromAttributes(output)).resolveAndBind().createSerializer()
-  }
-
   override protected def run(): Seq[InternalRow] = {
     val rows = new ArrayBuffer[InternalRow]()
     addSchema(rows)
@@ -99,8 +92,4 @@ case class DescribeTableExec(
   }
 
   private def emptyRow(): InternalRow = toCatalystRow("", "", "")
-
-  private def toCatalystRow(strs: String*): InternalRow = {
-    toRow(new GenericRowWithSchema(strs.toArray, schema)).copy()
-  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCurrentNamespaceExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCurrentNamespaceExec.scala
index 5f7b6f4061467..121ae1c5b1176 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCurrentNamespaceExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCurrentNamespaceExec.scala
@@ -18,8 +18,7 @@
 package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.encoders.RowEncoder
-import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema}
+import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper
 
@@ -31,11 +30,6 @@ case class ShowCurrentNamespaceExec(
     catalogManager: CatalogManager)
   extends V2CommandExec {
   override protected def run(): Seq[InternalRow] = {
-    val toRow = RowEncoder(schema).resolveAndBind().createSerializer()
-    val result = new GenericRowWithSchema(Array[Any](
-      catalogManager.currentCatalog.name,
-      catalogManager.currentNamespace.quoted),
-      schema)
-    Seq(toRow(result).copy())
+    Seq(toCatalystRow(catalogManager.currentCatalog.name, catalogManager.currentNamespace.quoted))
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowNamespacesExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowNamespacesExec.scala
index ceeed0f840700..9dafbd79a527e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowNamespacesExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowNamespacesExec.scala
@@ -20,8 +20,7 @@ package org.apache.spark.sql.execution.datasources.v2
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.encoders.RowEncoder
-import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema}
+import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.util.StringUtils
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper
 import org.apache.spark.sql.connector.catalog.SupportsNamespaces
@@ -44,11 +43,9 @@ case class ShowNamespacesExec(
     }
 
     val rows = new ArrayBuffer[InternalRow]()
-    val toRow = RowEncoder(schema).resolveAndBind().createSerializer()
-
     namespaces.map(_.quoted).map { ns =>
       if (pattern.map(StringUtils.filterPattern(Seq(ns), _).nonEmpty).getOrElse(true)) {
-        rows += toRow(new GenericRowWithSchema(Array(ns), schema)).copy()
+        rows += toCatalystRow(ns)
       }
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala
index 6d3a94ef15631..4e1633e1460ec 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala
@@ -18,8 +18,7 @@
 package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.encoders.RowEncoder
-import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema}
+import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Table}
 
 /**
@@ -32,19 +31,18 @@ case class ShowTablePropertiesExec(
 
   override protected def run(): Seq[InternalRow] = {
     import scala.collection.JavaConverters._
-    val toRow = RowEncoder(schema).resolveAndBind().createSerializer()
 
     // The reserved properties are accessible through DESCRIBE
     val properties = catalogTable.properties.asScala
-      .filter { case (k, v) => !CatalogV2Util.TABLE_RESERVED_PROPERTIES.contains(k) }
+      .filter { case (k, _) => !CatalogV2Util.TABLE_RESERVED_PROPERTIES.contains(k) }
     propertyKey match {
       case Some(p) =>
         val propValue = properties
           .getOrElse(p, s"Table ${catalogTable.name} does not have property: $p")
-        Seq(toRow(new GenericRowWithSchema(Array(p, propValue), schema)).copy())
+        Seq(toCatalystRow(p, propValue))
       case None =>
         properties.keys.map(k =>
-          toRow(new GenericRowWithSchema(Array(k, properties(k)), schema)).copy()).toSeq
+          toCatalystRow(k, properties(k))).toSeq
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablesExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablesExec.scala
index 5ba01deae9513..7ada8d2e5c39d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablesExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablesExec.scala
@@ -20,8 +20,7 @@ package org.apache.spark.sql.execution.datasources.v2
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.encoders.RowEncoder
-import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema}
+import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.util.StringUtils
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper
 import org.apache.spark.sql.connector.catalog.TableCatalog
@@ -37,15 +36,11 @@ case class ShowTablesExec(
     pattern: Option[String]) extends V2CommandExec with LeafExecNode {
   override protected def run(): Seq[InternalRow] = {
     val rows = new ArrayBuffer[InternalRow]()
-    val toRow = RowEncoder(schema).resolveAndBind().createSerializer()
 
     val tables = catalog.listTables(namespace.toArray)
     tables.map { table =>
       if (pattern.map(StringUtils.filterPattern(Seq(table.name()), _).nonEmpty).getOrElse(true)) {
-        val result = new GenericRowWithSchema(
-          Array(table.namespace().quoted, table.name()),
-          schema)
-        rows += toRow(result).copy()
+        rows += toCatalystRow(table.namespace().quoted, table.name())
       }
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala
index 6b193674cc71a..b54c46fc15e7c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2CommandExec.scala
@@ -19,8 +19,10 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.AttributeSet
+import org.apache.spark.sql.catalyst.encoders.RowEncoder
+import org.apache.spark.sql.catalyst.expressions.{AttributeSet, GenericRowWithSchema}
 import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.types.StructType
 
 /**
  * A physical operator that executes run() and saves the result to prevent multiple executions.
@@ -59,4 +61,11 @@ abstract class V2CommandExec extends SparkPlan {
 
   override def producedAttributes: AttributeSet = outputSet
 
+  protected def toCatalystRow(strs: String*): InternalRow = {
+    rowSerializer(new GenericRowWithSchema(strs.toArray, schema)).copy()
+  }
+
+  private lazy val rowSerializer = {
+    RowEncoder(StructType.fromAttributes(output)).resolveAndBind().createSerializer()
+  }
 }

From 15a863fd54aa76cbb0f2a076bd94773529536add Mon Sep 17 00:00:00 2001
From: Terry Kim <yuminkim@gmail.com>
Date: Mon, 4 Jan 2021 21:32:49 -0800
Subject: [PATCH 0957/1009] [SPARK-34001][SQL][TESTS] Remove unused
 runShowTablesSql() in DataSourceV2SQLSuite.scala

### What changes were proposed in this pull request?

After #30287, `runShowTablesSql()` in `DataSourceV2SQLSuite.scala` is no longer used. This PR removes the unused method.

### Why are the changes needed?

To remove unused method.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing test.

Closes #31022 from imback82/33382-followup.

Authored-by: Terry Kim <yuminkim@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/connector/DataSourceV2SQLSuite.scala  | 22 +------------------
 1 file changed, 1 insertion(+), 21 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 47829b68cc617..0d61306628a44 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 import org.apache.spark.sql.internal.SQLConf.{PARTITION_OVERWRITE_MODE, PartitionOverwriteMode, V2_SESSION_CATALOG_IMPLEMENTATION}
 import org.apache.spark.sql.internal.connector.SimpleTableProvider
 import org.apache.spark.sql.sources.SimpleScanSource
-import org.apache.spark.sql.types.{BooleanType, LongType, StringType, StructField, StructType}
+import org.apache.spark.sql.types.{LongType, StringType, StructField, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.unsafe.types.UTF8String
@@ -991,26 +991,6 @@ class DataSourceV2SQLSuite
       " only SessionCatalog supports this command."))
   }
 
-  private def runShowTablesSql(
-      sqlText: String,
-      expected: Seq[Row],
-      expectV2Catalog: Boolean = true): Unit = {
-    val schema = if (expectV2Catalog) {
-      new StructType()
-        .add("namespace", StringType, nullable = false)
-        .add("tableName", StringType, nullable = false)
-    } else {
-      new StructType()
-        .add("database", StringType, nullable = false)
-        .add("tableName", StringType, nullable = false)
-        .add("isTemporary", BooleanType, nullable = false)
-    }
-
-    val df = spark.sql(sqlText)
-    assert(df.schema === schema)
-    assert(expected === df.collect())
-  }
-
   test("CreateNameSpace: basic tests") {
     // Session catalog is used.
     withNamespace("ns") {

From f0ffe0cd652188873f2ec007e4e282744717a0b3 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Tue, 5 Jan 2021 05:34:11 +0000
Subject: [PATCH 0958/1009] [SPARK-33992][SQL] override
 transformUpWithNewOutput to add allowInvokingTransformsInAnalyzer

### What changes were proposed in this pull request?

In https://github.com/apache/spark/pull/29643, we move  the plan rewriting methods to QueryPlan. we need to override transformUpWithNewOutput to add allowInvokingTransformsInAnalyzer
 because it and resolveOperatorsUpWithNewOutput are called in the analyzer.
For example,

PaddingAndLengthCheckForCharVarchar could fail query when resolveOperatorsUpWithNewOutput
with
```logtalk
[info] - char/varchar resolution in sub query  *** FAILED *** (367 milliseconds)
[info]   java.lang.RuntimeException: This method should not be called in the analyzer
[info]   at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.assertNotAnalysisRule(AnalysisHelper.scala:150)
[info]   at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.assertNotAnalysisRule$(AnalysisHelper.scala:146)
[info]   at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.assertNotAnalysisRule(LogicalPlan.scala:29)
[info]   at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDown(AnalysisHelper.scala:161)
[info]   at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDown$(AnalysisHelper.scala:160)
[info]   at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
[info]   at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29)
[info]   at org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$updateOuterReferencesInSubquery(QueryPlan.scala:267)
```
### Why are the changes needed?

trivial bugfix
### Does this PR introduce _any_ user-facing change?

no
### How was this patch tested?

new tests

Closes #31013 from yaooqinn/SPARK-33992.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/plans/logical/AnalysisHelper.scala   |  9 +++++++++
 .../apache/spark/sql/CharVarcharTestSuite.scala   | 15 +++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala
index ffd1f784e4670..54b01416381c6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/AnalysisHelper.scala
@@ -133,6 +133,15 @@ trait AnalysisHelper extends QueryPlan[LogicalPlan] { self: LogicalPlan =>
     }
   }
 
+  override def transformUpWithNewOutput(
+      rule: PartialFunction[LogicalPlan, (LogicalPlan, Seq[(Attribute, Attribute)])],
+      skipCond: LogicalPlan => Boolean,
+      canGetOutput: LogicalPlan => Boolean): LogicalPlan = {
+    AnalysisHelper.allowInvokingTransformsInAnalyzer {
+      super.transformUpWithNewOutput(rule, skipCond, canGetOutput)
+    }
+  }
+
   /**
    * Recursively transforms the expressions of a tree, skipping nodes that have already
    * been analyzed.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
index 62d0f51e5ff75..d20cee0815d4d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
@@ -451,6 +451,21 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
         Seq(Row("char(5)"), Row("varchar(3)")))
     }
   }
+
+  test("SPARK-33992: char/varchar resolution in correlated sub query") {
+    withTable("t1", "t2") {
+      sql(s"CREATE TABLE t1(v VARCHAR(3), c CHAR(5)) USING $format")
+      sql(s"CREATE TABLE t2(v VARCHAR(3), c CHAR(5)) USING $format")
+      sql("INSERT INTO t1 VALUES ('c', 'b')")
+      sql("INSERT INTO t2 VALUES ('a', 'b')")
+
+      checkAnswer(sql(
+        """
+          |SELECT v FROM t1
+          |WHERE 'a' IN (SELECT v FROM t2 WHERE t1.c = t2.c )""".stripMargin),
+        Row("c"))
+    }
+  }
 }
 
 // Some basic char/varchar tests which doesn't rely on table implementation.

From a7d3fcd354289c1d0f5c80887b4f33beb3ad96a2 Mon Sep 17 00:00:00 2001
From: LantaoJin <jinlantao@gmail.com>
Date: Mon, 4 Jan 2021 21:37:26 -0800
Subject: [PATCH 0959/1009] [SPARK-34000][CORE] Fix
 stageAttemptToNumSpeculativeTasks java.util.NoSuchElementException

### What changes were proposed in this pull request?
From below log, Stage 600 could be removed from `stageAttemptToNumSpeculativeTasks` by `onStageCompleted()`, but the speculative task 306.1 in stage 600 threw `NoSuchElementException` when it entered into `onTaskEnd()`.
```
21/01/04 03:00:32,259 WARN [task-result-getter-2] scheduler.TaskSetManager:69 : Lost task 306.1 in stage 600.0 (TID 283610, hdc49-mcc10-01-0510-4108-039-tess0097.stratus.rno.ebay.com, executor 27): TaskKilled (another attempt succeeded)
21/01/04 03:00:32,259 INFO [task-result-getter-2] scheduler.TaskSetManager:57 : Task 306.1 in stage 600.0 (TID 283610) failed, but the task will not be re-executed (either because the task failed with a shuffle data fetch failure, so the
previous stage needs to be re-run, or because a different copy of the task has already succeeded).
21/01/04 03:00:32,259 INFO [task-result-getter-2] cluster.YarnClusterScheduler:57 : Removed TaskSet 600.0, whose tasks have all completed, from pool default
21/01/04 03:00:32,259 INFO [HiveServer2-Handler-Pool: Thread-5853] thriftserver.SparkExecuteStatementOperation:190 : Returning result set with 50 rows from offsets [5378600, 5378650) with 1fe245f8-a7f9-4ec0-bcb5-8cf324cbbb47
21/01/04 03:00:32,260 ERROR [spark-listener-group-executorManagement] scheduler.AsyncEventQueue:94 : Listener ExecutorAllocationListener threw an exception
java.util.NoSuchElementException: key not found: Stage 600 (Attempt 0)
        at scala.collection.MapLike.default(MapLike.scala:235)
        at scala.collection.MapLike.default$(MapLike.scala:234)
        at scala.collection.AbstractMap.default(Map.scala:63)
        at scala.collection.mutable.HashMap.apply(HashMap.scala:69)
        at org.apache.spark.ExecutorAllocationManager$ExecutorAllocationListener.onTaskEnd(ExecutorAllocationManager.scala:621)
        at org.apache.spark.scheduler.SparkListenerBus.doPostEvent(SparkListenerBus.scala:45)
        at org.apache.spark.scheduler.SparkListenerBus.doPostEvent$(SparkListenerBus.scala:28)
        at org.apache.spark.scheduler.AsyncEventQueue.doPostEvent(AsyncEventQueue.scala:38)
        at org.apache.spark.scheduler.AsyncEventQueue.doPostEvent(AsyncEventQueue.scala:38)
        at org.apache.spark.util.ListenerBus.postToAll(ListenerBus.scala:115)
        at org.apache.spark.util.ListenerBus.postToAll$(ListenerBus.scala:99)
        at org.apache.spark.scheduler.AsyncEventQueue.super$postToAll(AsyncEventQueue.scala:116)
        at org.apache.spark.scheduler.AsyncEventQueue.$anonfun$dispatch$1(AsyncEventQueue.scala:116)
        at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
        at org.apache.spark.scheduler.AsyncEventQueue.org$apache$spark$scheduler$AsyncEventQueue$$dispatch(AsyncEventQueue.scala:102)
        at org.apache.spark.scheduler.AsyncEventQueue$$anon$2.$anonfun$run$1(AsyncEventQueue.scala:97)
        at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1320)
        at org.apache.spark.scheduler.AsyncEventQueue$$anon$2.run(AsyncEventQueue.scala:97)
```

### Why are the changes needed?
To avoid throwing the java.util.NoSuchElementException

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
This is a protective patch and it's not easy to reproduce in UT due to the event order is not fixed in a async queue.

Closes #31025 from LantaoJin/SPARK-34000.

Authored-by: LantaoJin <jinlantao@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../scala/org/apache/spark/ExecutorAllocationManager.scala  | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index 61ab63584269b..a83762ff01ccb 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -798,7 +798,11 @@ private[spark] class ExecutorAllocationManager(
         }
         if (taskEnd.taskInfo.speculative) {
           stageAttemptToSpeculativeTaskIndices.get(stageAttempt).foreach {_.remove{taskIndex}}
-          stageAttemptToNumSpeculativeTasks(stageAttempt) -= 1
+          // If the previous task attempt succeeded first and it was the last task in a stage,
+          // the stage may have been removed before handing this speculative TaskEnd event.
+          if (stageAttemptToNumSpeculativeTasks.contains(stageAttempt)) {
+            stageAttemptToNumSpeculativeTasks(stageAttempt) -= 1
+          }
         }
 
         taskEnd.reason match {

From a071826f72cd717a58bf37b877f805490f7a147f Mon Sep 17 00:00:00 2001
From: fwang12 <fwang12@ebay.com>
Date: Tue, 5 Jan 2021 15:55:30 +0900
Subject: [PATCH 0960/1009] [SPARK-33100][SQL] Ignore a semicolon inside a
 bracketed comment in spark-sql

### What changes were proposed in this pull request?
Now the spark-sql does not support parse the sql statements with bracketed comments.
For the sql statements:
```
/* SELECT 'test'; */
SELECT 'test';
```
Would be split to two statements:
The first one: `/* SELECT 'test'`
The second one: `*/ SELECT 'test'`

Then it would throw an exception because the first one is illegal.
In this PR, we ignore the content in bracketed comments while splitting the sql statements.
Besides, we ignore the comment without any content.

### Why are the changes needed?
Spark-sql might split the statements inside bracketed comments and it is not correct.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Added UT.

Closes #29982 from turboFei/SPARK-33110.

Lead-authored-by: fwang12 <fwang12@ebay.com>
Co-authored-by: turbofei <fwang12@ebay.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../hive/thriftserver/SparkSQLCLIDriver.scala | 40 +++++++++++++++----
 .../sql/hive/thriftserver/CliSuite.scala      | 23 +++++++++++
 2 files changed, 55 insertions(+), 8 deletions(-)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index f2fd373bf6cc0..9155eacfa4896 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -522,14 +522,22 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
   // Note: [SPARK-31595] if there is a `'` in a double quoted string, or a `"` in a single quoted
   // string, the origin implementation from Hive will not drop the trailing semicolon as expected,
   // hence we refined this function a little bit.
+  // Note: [SPARK-33100] Ignore a semicolon inside a bracketed comment in spark-sql.
   private def splitSemiColon(line: String): JList[String] = {
     var insideSingleQuote = false
     var insideDoubleQuote = false
-    var insideComment = false
+    var insideSimpleComment = false
+    var bracketedCommentLevel = 0
     var escape = false
     var beginIndex = 0
+    var includingStatement = false
     val ret = new JArrayList[String]
 
+    def insideBracketedComment: Boolean = bracketedCommentLevel > 0
+    def insideComment: Boolean = insideSimpleComment || insideBracketedComment
+    def statementBegin(index: Int): Boolean = includingStatement || (!insideComment &&
+      index > beginIndex && !s"${line.charAt(index)}".trim.isEmpty)
+
     for (index <- 0 until line.length) {
       if (line.charAt(index) == '\'' && !insideComment) {
         // take a look to see if it is escaped
@@ -553,21 +561,33 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
           // Sample query: select "quoted value --"
           //                                    ^^ avoids starting a comment if it's inside quotes.
         } else if (hasNext && line.charAt(index + 1) == '-') {
-          // ignore quotes and ;
-          insideComment = true
+          // ignore quotes and ; in simple comment
+          insideSimpleComment = true
         }
       } else if (line.charAt(index) == ';') {
         if (insideSingleQuote || insideDoubleQuote || insideComment) {
           // do not split
         } else {
-          // split, do not include ; itself
-          ret.add(line.substring(beginIndex, index))
+          if (includingStatement) {
+            // split, do not include ; itself
+            ret.add(line.substring(beginIndex, index))
+          }
           beginIndex = index + 1
+          includingStatement = false
         }
       } else if (line.charAt(index) == '\n') {
-        // with a new line the inline comment should end.
+        // with a new line the inline simple comment should end.
         if (!escape) {
-          insideComment = false
+          insideSimpleComment = false
+        }
+      } else if (line.charAt(index) == '/' && !insideSimpleComment) {
+        val hasNext = index + 1 < line.length
+        if (insideSingleQuote || insideDoubleQuote) {
+          // Ignores '/' in any case of quotes
+        } else if (insideBracketedComment && line.charAt(index - 1) == '*' ) {
+          bracketedCommentLevel -= 1
+        } else if (hasNext && !insideBracketedComment && line.charAt(index + 1) == '*') {
+          bracketedCommentLevel += 1
         }
       }
       // set the escape
@@ -576,8 +596,12 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
       } else if (line.charAt(index) == '\\') {
         escape = true
       }
+
+      includingStatement = statementBegin(index)
+    }
+    if (includingStatement) {
+      ret.add(line.substring(beginIndex))
     }
-    ret.add(line.substring(beginIndex))
     ret
   }
 }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index d39b94503fe40..6708cf99e7f41 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -571,4 +571,27 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
     // the date formatter for `java.sql.LocalDate` must output negative years with sign.
     runCliWithin(1.minute)("SELECT MAKE_DATE(-44, 3, 15);" -> "-0044-03-15")
   }
+
+  test("SPARK-33100: Ignore a semicolon inside a bracketed comment in spark-sql") {
+    runCliWithin(4.minute)(
+      "/* SELECT 'test';*/ SELECT 'test';" -> "test",
+      ";;/* SELECT 'test';*/ SELECT 'test';" -> "test",
+      "/* SELECT 'test';*/;; SELECT 'test';" -> "test",
+      "SELECT 'test'; -- SELECT 'test';" -> "",
+      "SELECT 'test'; /* SELECT 'test';*/;" -> "",
+      "/*$meta chars{^\\;}*/ SELECT 'test';" -> "test",
+      "/*\nmulti-line\n*/ SELECT 'test';" -> "test",
+      "/*/* multi-level bracketed*/ SELECT 'test';" -> "test"
+    )
+  }
+
+  test("SPARK-33100: test sql statements with hint in bracketed comment") {
+    runCliWithin(2.minute)(
+      "CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES(1, 2) AS t1(k, v);" -> "",
+      "CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES(2, 1) AS t2(k, v);" -> "",
+      "EXPLAIN SELECT /*+ MERGEJOIN(t1) */ t1.* FROM t1 JOIN t2 ON t1.k = t2.v;" -> "SortMergeJoin",
+      "EXPLAIN SELECT /* + MERGEJOIN(t1) */ t1.* FROM t1 JOIN t2 ON t1.k = t2.v;"
+        -> "BroadcastHashJoin"
+    )
+  }
 }

From f252a9334e49dc359dd9255fcfe17a6bc75b8781 Mon Sep 17 00:00:00 2001
From: "tanel.kiis@gmail.com" <tanel.kiis@gmail.com>
Date: Tue, 5 Jan 2021 16:00:24 +0900
Subject: [PATCH 0961/1009] [SPARK-33935][SQL] Fix CBO cost function

### What changes were proposed in this pull request?

Changed the cost function in CBO to match documentation.

### Why are the changes needed?

The parameter `spark.sql.cbo.joinReorder.card.weight` is documented as:
```
The weight of cardinality (number of rows) for plan cost comparison in join reorder: rows * weight + size * (1 - weight).
```
The implementation in `JoinReorderDP.betterThan` does not match this documentaiton:
```
def betterThan(other: JoinPlan, conf: SQLConf): Boolean = {
      if (other.planCost.card == 0 || other.planCost.size == 0) {
        false
      } else {
        val relativeRows = BigDecimal(this.planCost.card) / BigDecimal(other.planCost.card)
        val relativeSize = BigDecimal(this.planCost.size) / BigDecimal(other.planCost.size)
        relativeRows * conf.joinReorderCardWeight +
          relativeSize * (1 - conf.joinReorderCardWeight) < 1
      }
    }
```

This different implementation has an unfortunate consequence:
given two plans A and B, both A betterThan B and B betterThan A might give the same results. This happes when one has many rows with small sizes and other has few rows with large sizes.

A example values, that have this fenomen with the default weight value (0.7):
A.card = 500, B.card = 300
A.size = 30, B.size = 80
Both A betterThan B and B betterThan A would have score above 1 and would return false.

This happens with several of the TPCDS queries.

The new implementation does not have this behavior.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

New and existing UTs

Closes #30965 from tanelk/SPARK-33935_cbo_cost_function.

Authored-by: tanel.kiis@gmail.com <tanel.kiis@gmail.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../optimizer/CostBasedJoinReorder.scala      |  13 +-
 .../joinReorder/JoinReorderSuite.scala        |  15 +
 .../StarJoinCostBasedReorderSuite.scala       |   8 +-
 .../approved-plans-v1_4/q13.sf100/explain.txt | 132 ++--
 .../q13.sf100/simplified.txt                  |  34 +-
 .../approved-plans-v1_4/q17.sf100/explain.txt | 194 +++---
 .../q17.sf100/simplified.txt                  | 130 ++--
 .../approved-plans-v1_4/q18.sf100/explain.txt | 158 ++---
 .../q18.sf100/simplified.txt                  |  50 +-
 .../approved-plans-v1_4/q19.sf100/explain.txt | 368 +++++------
 .../q19.sf100/simplified.txt                  | 116 ++--
 .../q24a.sf100/explain.txt                    | 118 ++--
 .../q24a.sf100/simplified.txt                 |  34 +-
 .../q24b.sf100/explain.txt                    | 118 ++--
 .../q24b.sf100/simplified.txt                 |  34 +-
 .../approved-plans-v1_4/q25.sf100/explain.txt | 194 +++---
 .../q25.sf100/simplified.txt                  | 130 ++--
 .../approved-plans-v1_4/q33.sf100/explain.txt | 264 ++++----
 .../q33.sf100/simplified.txt                  |  58 +-
 .../approved-plans-v1_4/q52.sf100/explain.txt | 138 ++---
 .../q52.sf100/simplified.txt                  |  26 +-
 .../approved-plans-v1_4/q55.sf100/explain.txt | 134 ++--
 .../q55.sf100/simplified.txt                  |  26 +-
 .../approved-plans-v1_4/q72.sf100/explain.txt | 264 ++++----
 .../q72.sf100/simplified.txt                  | 150 ++---
 .../approved-plans-v1_4/q81.sf100/explain.txt | 570 +++++++++---------
 .../q81.sf100/simplified.txt                  | 142 ++---
 .../approved-plans-v1_4/q91.sf100/explain.txt | 306 +++++-----
 .../q91.sf100/simplified.txt                  |  62 +-
 .../q18a.sf100/explain.txt                    | 306 +++++-----
 .../q18a.sf100/simplified.txt                 |  54 +-
 .../approved-plans-v2_7/q72.sf100/explain.txt | 264 ++++----
 .../q72.sf100/simplified.txt                  | 150 ++---
 33 files changed, 2386 insertions(+), 2374 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
index 11b675e75869e..c41686da79487 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
@@ -349,14 +349,11 @@ object JoinReorderDP extends PredicateHelper with Logging {
     }
 
     def betterThan(other: JoinPlan, conf: SQLConf): Boolean = {
-      if (other.planCost.card == 0 || other.planCost.size == 0) {
-        false
-      } else {
-        val relativeRows = BigDecimal(this.planCost.card) / BigDecimal(other.planCost.card)
-        val relativeSize = BigDecimal(this.planCost.size) / BigDecimal(other.planCost.size)
-        relativeRows * conf.joinReorderCardWeight +
-          relativeSize * (1 - conf.joinReorderCardWeight) < 1
-      }
+      val thisCost = BigDecimal(this.planCost.card) * conf.joinReorderCardWeight +
+        BigDecimal(this.planCost.size) * (1 - conf.joinReorderCardWeight)
+      val otherCost = BigDecimal(other.planCost.card) * conf.joinReorderCardWeight +
+        BigDecimal(other.planCost.size) * (1 - conf.joinReorderCardWeight)
+      thisCost < otherCost
     }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/JoinReorderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/JoinReorderSuite.scala
index b84207397e5cc..2e1cf4a137e25 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/JoinReorderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/JoinReorderSuite.scala
@@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap}
 import org.apache.spark.sql.catalyst.optimizer._
+import org.apache.spark.sql.catalyst.optimizer.JoinReorderDP.JoinPlan
 import org.apache.spark.sql.catalyst.plans.{Cross, Inner}
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
@@ -363,4 +364,18 @@ class JoinReorderSuite extends JoinReorderPlanTestBase with StatsEstimationTestB
 
     assertEqualJoinPlans(Optimize, originalPlan3, bestPlan3)
   }
+
+  test("SPARK-33935: betterThan should be consistent") {
+    val plan1 = JoinPlan(null, null, null, Cost(300, 80))
+    val plan2 = JoinPlan(null, null, null, Cost(500, 30))
+
+    // cost1 = 300*0.7 + 80*0.3 = 234
+    // cost2 = 500*0.7 + 30*0.3 = 359
+
+    assert(!plan1.betterThan(plan1, conf))
+    assert(!plan2.betterThan(plan2, conf))
+
+    assert(plan1.betterThan(plan2, conf))
+    assert(!plan2.betterThan(plan1, conf))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/StarJoinCostBasedReorderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/StarJoinCostBasedReorderSuite.scala
index 703be48c6a2a9..a42914765dcc8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/StarJoinCostBasedReorderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/StarJoinCostBasedReorderSuite.scala
@@ -294,12 +294,12 @@ class StarJoinCostBasedReorderSuite extends JoinReorderPlanTestBase with StatsEs
           (nameToAttr("f1_fk2") === nameToAttr("d2_pk")))
 
     val expected =
-      f1.join(d2, Inner, Some(nameToAttr("f1_fk2") === nameToAttr("d2_pk")))
-        .join(d1, Inner, Some(nameToAttr("f1_fk1") === nameToAttr("d1_pk")))
-        .join(t3.join(t4, Inner, Some(nameToAttr("t3_c1") === nameToAttr("t4_c1"))), Inner,
-          Some(nameToAttr("t3_c1") === nameToAttr("t4_c1")))
+      t3.join(t4, Inner, Some(nameToAttr("t3_c1") === nameToAttr("t4_c1")))
         .join(t1.join(t2, Inner, Some(nameToAttr("t1_c1") === nameToAttr("t2_c1"))), Inner,
           Some(nameToAttr("t1_c2") === nameToAttr("t4_c2")))
+        .join(f1
+          .join(d2, Inner, Some(nameToAttr("f1_fk2") === nameToAttr("d2_pk")))
+          .join(d1, Inner, Some(nameToAttr("f1_fk1") === nameToAttr("d1_pk"))))
         .select(outputsOf(d1, t1, t2, t3, t4, f1, d2): _*)
 
     assertEqualJoinPlans(Optimize, query, expected)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt
index 8ee427262b332..327e7db702faa 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt
@@ -4,8 +4,8 @@
    +- * HashAggregate (36)
       +- * Project (35)
          +- * BroadcastHashJoin Inner BuildRight (34)
-            :- * Project (28)
-            :  +- * BroadcastHashJoin Inner BuildRight (27)
+            :- * Project (29)
+            :  +- * BroadcastHashJoin Inner BuildRight (28)
             :     :- * Project (22)
             :     :  +- * BroadcastHashJoin Inner BuildRight (21)
             :     :     :- * Project (15)
@@ -27,16 +27,16 @@
             :     :        +- * Project (19)
             :     :           +- * Filter (18)
             :     :              +- * ColumnarToRow (17)
-            :     :                 +- Scan parquet default.date_dim (16)
-            :     +- BroadcastExchange (26)
-            :        +- * Filter (25)
-            :           +- * ColumnarToRow (24)
-            :              +- Scan parquet default.store (23)
+            :     :                 +- Scan parquet default.customer_address (16)
+            :     +- BroadcastExchange (27)
+            :        +- * Project (26)
+            :           +- * Filter (25)
+            :              +- * ColumnarToRow (24)
+            :                 +- Scan parquet default.date_dim (23)
             +- BroadcastExchange (33)
-               +- * Project (32)
-                  +- * Filter (31)
-                     +- * ColumnarToRow (30)
-                        +- Scan parquet default.customer_address (29)
+               +- * Filter (32)
+                  +- * ColumnarToRow (31)
+                     +- Scan parquet default.store (30)
 
 
 (1) Scan parquet default.store_sales
@@ -107,94 +107,94 @@ Join condition: (((((((cd_marital_status#12 = M) AND (cd_education_status#13 = A
 Output [7]: [ss_sold_date_sk#1, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10]
 Input [13]: [ss_sold_date_sk#1, ss_hdemo_sk#3, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_sales_price#7, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, cd_marital_status#12, cd_education_status#13, hd_demo_sk#15, hd_dep_count#16]
 
-(16) Scan parquet default.date_dim
-Output [2]: [d_date_sk#18, d_year#19]
+(16) Scan parquet default.customer_address
+Output [3]: [ca_address_sk#18, ca_state#19, ca_country#20]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int>
+Location [not included in comparison]/{warehouse_dir}/customer_address]
+PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [TX,OH]),In(ca_state, [OR,NM,KY])),In(ca_state, [VA,TX,MS]))]
+ReadSchema: struct<ca_address_sk:int,ca_state:string,ca_country:string>
 
 (17) ColumnarToRow [codegen id : 3]
-Input [2]: [d_date_sk#18, d_year#19]
+Input [3]: [ca_address_sk#18, ca_state#19, ca_country#20]
 
 (18) Filter [codegen id : 3]
-Input [2]: [d_date_sk#18, d_year#19]
-Condition : ((isnotnull(d_year#19) AND (d_year#19 = 2001)) AND isnotnull(d_date_sk#18))
+Input [3]: [ca_address_sk#18, ca_state#19, ca_country#20]
+Condition : (((isnotnull(ca_country#20) AND (ca_country#20 = United States)) AND isnotnull(ca_address_sk#18)) AND ((ca_state#19 IN (TX,OH) OR ca_state#19 IN (OR,NM,KY)) OR ca_state#19 IN (VA,TX,MS)))
 
 (19) Project [codegen id : 3]
-Output [1]: [d_date_sk#18]
-Input [2]: [d_date_sk#18, d_year#19]
+Output [2]: [ca_address_sk#18, ca_state#19]
+Input [3]: [ca_address_sk#18, ca_state#19, ca_country#20]
 
 (20) BroadcastExchange
-Input [1]: [d_date_sk#18]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20]
+Input [2]: [ca_address_sk#18, ca_state#19]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21]
 
 (21) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [ss_sold_date_sk#1]
-Right keys [1]: [d_date_sk#18]
-Join condition: None
+Left keys [1]: [ss_addr_sk#4]
+Right keys [1]: [ca_address_sk#18]
+Join condition: ((((ca_state#19 IN (TX,OH) AND (ss_net_profit#10 >= 100.00)) AND (ss_net_profit#10 <= 200.00)) OR ((ca_state#19 IN (OR,NM,KY) AND (ss_net_profit#10 >= 150.00)) AND (ss_net_profit#10 <= 300.00))) OR ((ca_state#19 IN (VA,TX,MS) AND (ss_net_profit#10 >= 50.00)) AND (ss_net_profit#10 <= 250.00)))
 
 (22) Project [codegen id : 6]
-Output [6]: [ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10]
-Input [8]: [ss_sold_date_sk#1, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, d_date_sk#18]
+Output [5]: [ss_sold_date_sk#1, ss_store_sk#5, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9]
+Input [9]: [ss_sold_date_sk#1, ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, ca_address_sk#18, ca_state#19]
 
-(23) Scan parquet default.store
-Output [1]: [s_store_sk#21]
+(23) Scan parquet default.date_dim
+Output [2]: [d_date_sk#22, d_year#23]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/store]
-PushedFilters: [IsNotNull(s_store_sk)]
-ReadSchema: struct<s_store_sk:int>
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int>
 
 (24) ColumnarToRow [codegen id : 4]
-Input [1]: [s_store_sk#21]
+Input [2]: [d_date_sk#22, d_year#23]
 
 (25) Filter [codegen id : 4]
-Input [1]: [s_store_sk#21]
-Condition : isnotnull(s_store_sk#21)
+Input [2]: [d_date_sk#22, d_year#23]
+Condition : ((isnotnull(d_year#23) AND (d_year#23 = 2001)) AND isnotnull(d_date_sk#22))
 
-(26) BroadcastExchange
-Input [1]: [s_store_sk#21]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22]
+(26) Project [codegen id : 4]
+Output [1]: [d_date_sk#22]
+Input [2]: [d_date_sk#22, d_year#23]
 
-(27) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [ss_store_sk#5]
-Right keys [1]: [s_store_sk#21]
+(27) BroadcastExchange
+Input [1]: [d_date_sk#22]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#24]
+
+(28) BroadcastHashJoin [codegen id : 6]
+Left keys [1]: [ss_sold_date_sk#1]
+Right keys [1]: [d_date_sk#22]
 Join condition: None
 
-(28) Project [codegen id : 6]
-Output [5]: [ss_addr_sk#4, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10]
-Input [7]: [ss_addr_sk#4, ss_store_sk#5, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, s_store_sk#21]
+(29) Project [codegen id : 6]
+Output [4]: [ss_store_sk#5, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9]
+Input [6]: [ss_sold_date_sk#1, ss_store_sk#5, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, d_date_sk#22]
 
-(29) Scan parquet default.customer_address
-Output [3]: [ca_address_sk#23, ca_state#24, ca_country#25]
+(30) Scan parquet default.store
+Output [1]: [s_store_sk#25]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer_address]
-PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [TX,OH]),In(ca_state, [OR,NM,KY])),In(ca_state, [VA,TX,MS]))]
-ReadSchema: struct<ca_address_sk:int,ca_state:string,ca_country:string>
-
-(30) ColumnarToRow [codegen id : 5]
-Input [3]: [ca_address_sk#23, ca_state#24, ca_country#25]
+Location [not included in comparison]/{warehouse_dir}/store]
+PushedFilters: [IsNotNull(s_store_sk)]
+ReadSchema: struct<s_store_sk:int>
 
-(31) Filter [codegen id : 5]
-Input [3]: [ca_address_sk#23, ca_state#24, ca_country#25]
-Condition : (((isnotnull(ca_country#25) AND (ca_country#25 = United States)) AND isnotnull(ca_address_sk#23)) AND ((ca_state#24 IN (TX,OH) OR ca_state#24 IN (OR,NM,KY)) OR ca_state#24 IN (VA,TX,MS)))
+(31) ColumnarToRow [codegen id : 5]
+Input [1]: [s_store_sk#25]
 
-(32) Project [codegen id : 5]
-Output [2]: [ca_address_sk#23, ca_state#24]
-Input [3]: [ca_address_sk#23, ca_state#24, ca_country#25]
+(32) Filter [codegen id : 5]
+Input [1]: [s_store_sk#25]
+Condition : isnotnull(s_store_sk#25)
 
 (33) BroadcastExchange
-Input [2]: [ca_address_sk#23, ca_state#24]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26]
+Input [1]: [s_store_sk#25]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26]
 
 (34) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [ss_addr_sk#4]
-Right keys [1]: [ca_address_sk#23]
-Join condition: ((((ca_state#24 IN (TX,OH) AND (ss_net_profit#10 >= 100.00)) AND (ss_net_profit#10 <= 200.00)) OR ((ca_state#24 IN (OR,NM,KY) AND (ss_net_profit#10 >= 150.00)) AND (ss_net_profit#10 <= 300.00))) OR ((ca_state#24 IN (VA,TX,MS) AND (ss_net_profit#10 >= 50.00)) AND (ss_net_profit#10 <= 250.00)))
+Left keys [1]: [ss_store_sk#5]
+Right keys [1]: [s_store_sk#25]
+Join condition: None
 
 (35) Project [codegen id : 6]
 Output [3]: [ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9]
-Input [7]: [ss_addr_sk#4, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, ss_net_profit#10, ca_address_sk#23, ca_state#24]
+Input [5]: [ss_store_sk#5, ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9, s_store_sk#25]
 
 (36) HashAggregate [codegen id : 6]
 Input [3]: [ss_quantity#6, ss_ext_sales_price#8, ss_ext_wholesale_cost#9]
@@ -205,7 +205,7 @@ Results [7]: [sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40]
 
 (37) Exchange
 Input [7]: [sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40]
-Arguments: SinglePartition, true, [id=#41]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#41]
 
 (38) HashAggregate [codegen id : 7]
 Input [7]: [sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/simplified.txt
index b457788dbd0b2..45d6c8f3b0bae 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/simplified.txt
@@ -5,11 +5,11 @@ WholeStageCodegen (7)
         WholeStageCodegen (6)
           HashAggregate [ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost] [sum,count,sum,count,sum,count,sum,sum,count,sum,count,sum,count,sum]
             Project [ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost]
-              BroadcastHashJoin [ss_addr_sk,ca_address_sk,ca_state,ss_net_profit]
-                Project [ss_addr_sk,ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit]
-                  BroadcastHashJoin [ss_store_sk,s_store_sk]
-                    Project [ss_addr_sk,ss_store_sk,ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit]
-                      BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+              BroadcastHashJoin [ss_store_sk,s_store_sk]
+                Project [ss_store_sk,ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost]
+                  BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                    Project [ss_sold_date_sk,ss_store_sk,ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost]
+                      BroadcastHashJoin [ss_addr_sk,ca_address_sk,ca_state,ss_net_profit]
                         Project [ss_sold_date_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit]
                           BroadcastHashJoin [ss_hdemo_sk,hd_demo_sk,cd_marital_status,cd_education_status,ss_sales_price,hd_dep_count]
                             Project [ss_sold_date_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,cd_marital_status,cd_education_status]
@@ -35,23 +35,23 @@ WholeStageCodegen (7)
                         InputAdapter
                           BroadcastExchange #4
                             WholeStageCodegen (3)
-                              Project [d_date_sk]
-                                Filter [d_year,d_date_sk]
+                              Project [ca_address_sk,ca_state]
+                                Filter [ca_country,ca_address_sk,ca_state]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.date_dim [d_date_sk,d_year]
+                                      Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country]
                     InputAdapter
                       BroadcastExchange #5
                         WholeStageCodegen (4)
-                          Filter [s_store_sk]
-                            ColumnarToRow
-                              InputAdapter
-                                Scan parquet default.store [s_store_sk]
+                          Project [d_date_sk]
+                            Filter [d_year,d_date_sk]
+                              ColumnarToRow
+                                InputAdapter
+                                  Scan parquet default.date_dim [d_date_sk,d_year]
                 InputAdapter
                   BroadcastExchange #6
                     WholeStageCodegen (5)
-                      Project [ca_address_sk,ca_state]
-                        Filter [ca_country,ca_address_sk,ca_state]
-                          ColumnarToRow
-                            InputAdapter
-                              Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country]
+                      Filter [s_store_sk]
+                        ColumnarToRow
+                          InputAdapter
+                            Scan parquet default.store [s_store_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt
index a17356ae04a03..a9ab8c3690a00 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt
@@ -5,57 +5,57 @@ TakeOrderedAndProject (57)
       +- * HashAggregate (54)
          +- * Project (53)
             +- * SortMergeJoin Inner (52)
-               :- * Sort (43)
-               :  +- Exchange (42)
-               :     +- * Project (41)
-               :        +- * SortMergeJoin Inner (40)
-               :           :- * Sort (27)
-               :           :  +- Exchange (26)
-               :           :     +- * Project (25)
-               :           :        +- * SortMergeJoin Inner (24)
-               :           :           :- * Sort (18)
-               :           :           :  +- Exchange (17)
-               :           :           :     +- * Project (16)
-               :           :           :        +- * BroadcastHashJoin Inner BuildRight (15)
-               :           :           :           :- * Project (10)
-               :           :           :           :  +- * BroadcastHashJoin Inner BuildRight (9)
-               :           :           :           :     :- * Filter (3)
-               :           :           :           :     :  +- * ColumnarToRow (2)
-               :           :           :           :     :     +- Scan parquet default.store_sales (1)
-               :           :           :           :     +- BroadcastExchange (8)
-               :           :           :           :        +- * Project (7)
-               :           :           :           :           +- * Filter (6)
-               :           :           :           :              +- * ColumnarToRow (5)
-               :           :           :           :                 +- Scan parquet default.date_dim (4)
-               :           :           :           +- BroadcastExchange (14)
-               :           :           :              +- * Filter (13)
-               :           :           :                 +- * ColumnarToRow (12)
-               :           :           :                    +- Scan parquet default.store (11)
-               :           :           +- * Sort (23)
-               :           :              +- Exchange (22)
-               :           :                 +- * Filter (21)
-               :           :                    +- * ColumnarToRow (20)
-               :           :                       +- Scan parquet default.item (19)
-               :           +- * Sort (39)
-               :              +- Exchange (38)
-               :                 +- * Project (37)
-               :                    +- * BroadcastHashJoin Inner BuildRight (36)
-               :                       :- * Filter (30)
-               :                       :  +- * ColumnarToRow (29)
-               :                       :     +- Scan parquet default.store_returns (28)
-               :                       +- BroadcastExchange (35)
-               :                          +- * Project (34)
-               :                             +- * Filter (33)
-               :                                +- * ColumnarToRow (32)
-               :                                   +- Scan parquet default.date_dim (31)
+               :- * Sort (27)
+               :  +- Exchange (26)
+               :     +- * Project (25)
+               :        +- * SortMergeJoin Inner (24)
+               :           :- * Sort (18)
+               :           :  +- Exchange (17)
+               :           :     +- * Project (16)
+               :           :        +- * BroadcastHashJoin Inner BuildRight (15)
+               :           :           :- * Project (10)
+               :           :           :  +- * BroadcastHashJoin Inner BuildRight (9)
+               :           :           :     :- * Filter (3)
+               :           :           :     :  +- * ColumnarToRow (2)
+               :           :           :     :     +- Scan parquet default.store_sales (1)
+               :           :           :     +- BroadcastExchange (8)
+               :           :           :        +- * Project (7)
+               :           :           :           +- * Filter (6)
+               :           :           :              +- * ColumnarToRow (5)
+               :           :           :                 +- Scan parquet default.date_dim (4)
+               :           :           +- BroadcastExchange (14)
+               :           :              +- * Filter (13)
+               :           :                 +- * ColumnarToRow (12)
+               :           :                    +- Scan parquet default.store (11)
+               :           +- * Sort (23)
+               :              +- Exchange (22)
+               :                 +- * Filter (21)
+               :                    +- * ColumnarToRow (20)
+               :                       +- Scan parquet default.item (19)
                +- * Sort (51)
                   +- Exchange (50)
                      +- * Project (49)
-                        +- * BroadcastHashJoin Inner BuildRight (48)
-                           :- * Filter (46)
-                           :  +- * ColumnarToRow (45)
-                           :     +- Scan parquet default.catalog_sales (44)
-                           +- ReusedExchange (47)
+                        +- * SortMergeJoin Inner (48)
+                           :- * Sort (39)
+                           :  +- Exchange (38)
+                           :     +- * Project (37)
+                           :        +- * BroadcastHashJoin Inner BuildRight (36)
+                           :           :- * Filter (30)
+                           :           :  +- * ColumnarToRow (29)
+                           :           :     +- Scan parquet default.store_returns (28)
+                           :           +- BroadcastExchange (35)
+                           :              +- * Project (34)
+                           :                 +- * Filter (33)
+                           :                    +- * ColumnarToRow (32)
+                           :                       +- Scan parquet default.date_dim (31)
+                           +- * Sort (47)
+                              +- Exchange (46)
+                                 +- * Project (45)
+                                    +- * BroadcastHashJoin Inner BuildRight (44)
+                                       :- * Filter (42)
+                                       :  +- * ColumnarToRow (41)
+                                       :     +- Scan parquet default.catalog_sales (40)
+                                       +- ReusedExchange (43)
 
 
 (1) Scan parquet default.store_sales
@@ -132,7 +132,7 @@ Input [7]: [ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, s
 
 (17) Exchange
 Input [5]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_state#11]
-Arguments: hashpartitioning(ss_item_sk#2, 5), true, [id=#13]
+Arguments: hashpartitioning(ss_item_sk#2, 5), ENSURE_REQUIREMENTS, [id=#13]
 
 (18) Sort [codegen id : 4]
 Input [5]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_state#11]
@@ -154,7 +154,7 @@ Condition : isnotnull(i_item_sk#14)
 
 (22) Exchange
 Input [3]: [i_item_sk#14, i_item_id#15, i_item_desc#16]
-Arguments: hashpartitioning(i_item_sk#14, 5), true, [id=#17]
+Arguments: hashpartitioning(i_item_sk#14, 5), ENSURE_REQUIREMENTS, [id=#17]
 
 (23) Sort [codegen id : 6]
 Input [3]: [i_item_sk#14, i_item_id#15, i_item_desc#16]
@@ -171,7 +171,7 @@ Input [8]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s
 
 (26) Exchange
 Input [7]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16]
-Arguments: hashpartitioning(cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint), 5), true, [id=#18]
+Arguments: hashpartitioning(cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint), 5), ENSURE_REQUIREMENTS, [id=#18]
 
 (27) Sort [codegen id : 8]
 Input [7]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16]
@@ -224,89 +224,89 @@ Input [6]: [sr_returned_date_sk#19, sr_item_sk#20, sr_customer_sk#21, sr_ticket_
 
 (38) Exchange
 Input [4]: [sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23]
-Arguments: hashpartitioning(sr_customer_sk#21, sr_item_sk#20, sr_ticket_number#22, 5), true, [id=#27]
+Arguments: hashpartitioning(sr_customer_sk#21, sr_item_sk#20, 5), ENSURE_REQUIREMENTS, [id=#27]
 
 (39) Sort [codegen id : 11]
 Input [4]: [sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23]
-Arguments: [sr_customer_sk#21 ASC NULLS FIRST, sr_item_sk#20 ASC NULLS FIRST, sr_ticket_number#22 ASC NULLS FIRST], false, 0
-
-(40) SortMergeJoin [codegen id : 12]
-Left keys [3]: [cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)]
-Right keys [3]: [sr_customer_sk#21, sr_item_sk#20, sr_ticket_number#22]
-Join condition: None
-
-(41) Project [codegen id : 12]
-Output [7]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#20, sr_customer_sk#21, sr_return_quantity#23]
-Input [11]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23]
-
-(42) Exchange
-Input [7]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#20, sr_customer_sk#21, sr_return_quantity#23]
-Arguments: hashpartitioning(sr_customer_sk#21, sr_item_sk#20, 5), true, [id=#28]
-
-(43) Sort [codegen id : 13]
-Input [7]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#20, sr_customer_sk#21, sr_return_quantity#23]
 Arguments: [sr_customer_sk#21 ASC NULLS FIRST, sr_item_sk#20 ASC NULLS FIRST], false, 0
 
-(44) Scan parquet default.catalog_sales
-Output [4]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32]
+(40) Scan parquet default.catalog_sales
+Output [4]: [cs_sold_date_sk#28, cs_bill_customer_sk#29, cs_item_sk#30, cs_quantity#31]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_sales]
 PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)]
 ReadSchema: struct<cs_sold_date_sk:int,cs_bill_customer_sk:int,cs_item_sk:int,cs_quantity:int>
 
-(45) ColumnarToRow [codegen id : 15]
-Input [4]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32]
+(41) ColumnarToRow [codegen id : 13]
+Input [4]: [cs_sold_date_sk#28, cs_bill_customer_sk#29, cs_item_sk#30, cs_quantity#31]
+
+(42) Filter [codegen id : 13]
+Input [4]: [cs_sold_date_sk#28, cs_bill_customer_sk#29, cs_item_sk#30, cs_quantity#31]
+Condition : ((isnotnull(cs_bill_customer_sk#29) AND isnotnull(cs_item_sk#30)) AND isnotnull(cs_sold_date_sk#28))
 
-(46) Filter [codegen id : 15]
-Input [4]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32]
-Condition : ((isnotnull(cs_bill_customer_sk#30) AND isnotnull(cs_item_sk#31)) AND isnotnull(cs_sold_date_sk#29))
+(43) ReusedExchange [Reuses operator id: 35]
+Output [1]: [d_date_sk#32]
 
-(47) ReusedExchange [Reuses operator id: 35]
-Output [1]: [d_date_sk#33]
+(44) BroadcastHashJoin [codegen id : 13]
+Left keys [1]: [cs_sold_date_sk#28]
+Right keys [1]: [d_date_sk#32]
+Join condition: None
+
+(45) Project [codegen id : 13]
+Output [3]: [cs_bill_customer_sk#29, cs_item_sk#30, cs_quantity#31]
+Input [5]: [cs_sold_date_sk#28, cs_bill_customer_sk#29, cs_item_sk#30, cs_quantity#31, d_date_sk#32]
+
+(46) Exchange
+Input [3]: [cs_bill_customer_sk#29, cs_item_sk#30, cs_quantity#31]
+Arguments: hashpartitioning(cast(cs_bill_customer_sk#29 as bigint), cast(cs_item_sk#30 as bigint), 5), ENSURE_REQUIREMENTS, [id=#33]
 
-(48) BroadcastHashJoin [codegen id : 15]
-Left keys [1]: [cs_sold_date_sk#29]
-Right keys [1]: [d_date_sk#33]
+(47) Sort [codegen id : 14]
+Input [3]: [cs_bill_customer_sk#29, cs_item_sk#30, cs_quantity#31]
+Arguments: [cast(cs_bill_customer_sk#29 as bigint) ASC NULLS FIRST, cast(cs_item_sk#30 as bigint) ASC NULLS FIRST], false, 0
+
+(48) SortMergeJoin [codegen id : 15]
+Left keys [2]: [sr_customer_sk#21, sr_item_sk#20]
+Right keys [2]: [cast(cs_bill_customer_sk#29 as bigint), cast(cs_item_sk#30 as bigint)]
 Join condition: None
 
 (49) Project [codegen id : 15]
-Output [3]: [cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32]
-Input [5]: [cs_sold_date_sk#29, cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32, d_date_sk#33]
+Output [5]: [sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23, cs_quantity#31]
+Input [7]: [sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23, cs_bill_customer_sk#29, cs_item_sk#30, cs_quantity#31]
 
 (50) Exchange
-Input [3]: [cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32]
-Arguments: hashpartitioning(cast(cs_bill_customer_sk#30 as bigint), cast(cs_item_sk#31 as bigint), 5), true, [id=#34]
+Input [5]: [sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23, cs_quantity#31]
+Arguments: hashpartitioning(sr_customer_sk#21, sr_item_sk#20, sr_ticket_number#22, 5), ENSURE_REQUIREMENTS, [id=#34]
 
 (51) Sort [codegen id : 16]
-Input [3]: [cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32]
-Arguments: [cast(cs_bill_customer_sk#30 as bigint) ASC NULLS FIRST, cast(cs_item_sk#31 as bigint) ASC NULLS FIRST], false, 0
+Input [5]: [sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23, cs_quantity#31]
+Arguments: [sr_customer_sk#21 ASC NULLS FIRST, sr_item_sk#20 ASC NULLS FIRST, sr_ticket_number#22 ASC NULLS FIRST], false, 0
 
 (52) SortMergeJoin [codegen id : 17]
-Left keys [2]: [sr_customer_sk#21, sr_item_sk#20]
-Right keys [2]: [cast(cs_bill_customer_sk#30 as bigint), cast(cs_item_sk#31 as bigint)]
+Left keys [3]: [cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)]
+Right keys [3]: [sr_customer_sk#21, sr_item_sk#20, sr_ticket_number#22]
 Join condition: None
 
 (53) Project [codegen id : 17]
-Output [6]: [ss_quantity#6, sr_return_quantity#23, cs_quantity#32, s_state#11, i_item_id#15, i_item_desc#16]
-Input [10]: [ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#20, sr_customer_sk#21, sr_return_quantity#23, cs_bill_customer_sk#30, cs_item_sk#31, cs_quantity#32]
+Output [6]: [ss_quantity#6, sr_return_quantity#23, cs_quantity#31, s_state#11, i_item_id#15, i_item_desc#16]
+Input [12]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_quantity#6, s_state#11, i_item_id#15, i_item_desc#16, sr_item_sk#20, sr_customer_sk#21, sr_ticket_number#22, sr_return_quantity#23, cs_quantity#31]
 
 (54) HashAggregate [codegen id : 17]
-Input [6]: [ss_quantity#6, sr_return_quantity#23, cs_quantity#32, s_state#11, i_item_id#15, i_item_desc#16]
+Input [6]: [ss_quantity#6, sr_return_quantity#23, cs_quantity#31, s_state#11, i_item_id#15, i_item_desc#16]
 Keys [3]: [i_item_id#15, i_item_desc#16, s_state#11]
-Functions [9]: [partial_count(ss_quantity#6), partial_avg(cast(ss_quantity#6 as bigint)), partial_stddev_samp(cast(ss_quantity#6 as double)), partial_count(sr_return_quantity#23), partial_avg(cast(sr_return_quantity#23 as bigint)), partial_stddev_samp(cast(sr_return_quantity#23 as double)), partial_count(cs_quantity#32), partial_avg(cast(cs_quantity#32 as bigint)), partial_stddev_samp(cast(cs_quantity#32 as double))]
+Functions [9]: [partial_count(ss_quantity#6), partial_avg(cast(ss_quantity#6 as bigint)), partial_stddev_samp(cast(ss_quantity#6 as double)), partial_count(sr_return_quantity#23), partial_avg(cast(sr_return_quantity#23 as bigint)), partial_stddev_samp(cast(sr_return_quantity#23 as double)), partial_count(cs_quantity#31), partial_avg(cast(cs_quantity#31 as bigint)), partial_stddev_samp(cast(cs_quantity#31 as double))]
 Aggregate Attributes [18]: [count#35, sum#36, count#37, n#38, avg#39, m2#40, count#41, sum#42, count#43, n#44, avg#45, m2#46, count#47, sum#48, count#49, n#50, avg#51, m2#52]
 Results [21]: [i_item_id#15, i_item_desc#16, s_state#11, count#53, sum#54, count#55, n#56, avg#57, m2#58, count#59, sum#60, count#61, n#62, avg#63, m2#64, count#65, sum#66, count#67, n#68, avg#69, m2#70]
 
 (55) Exchange
 Input [21]: [i_item_id#15, i_item_desc#16, s_state#11, count#53, sum#54, count#55, n#56, avg#57, m2#58, count#59, sum#60, count#61, n#62, avg#63, m2#64, count#65, sum#66, count#67, n#68, avg#69, m2#70]
-Arguments: hashpartitioning(i_item_id#15, i_item_desc#16, s_state#11, 5), true, [id=#71]
+Arguments: hashpartitioning(i_item_id#15, i_item_desc#16, s_state#11, 5), ENSURE_REQUIREMENTS, [id=#71]
 
 (56) HashAggregate [codegen id : 18]
 Input [21]: [i_item_id#15, i_item_desc#16, s_state#11, count#53, sum#54, count#55, n#56, avg#57, m2#58, count#59, sum#60, count#61, n#62, avg#63, m2#64, count#65, sum#66, count#67, n#68, avg#69, m2#70]
 Keys [3]: [i_item_id#15, i_item_desc#16, s_state#11]
-Functions [9]: [count(ss_quantity#6), avg(cast(ss_quantity#6 as bigint)), stddev_samp(cast(ss_quantity#6 as double)), count(sr_return_quantity#23), avg(cast(sr_return_quantity#23 as bigint)), stddev_samp(cast(sr_return_quantity#23 as double)), count(cs_quantity#32), avg(cast(cs_quantity#32 as bigint)), stddev_samp(cast(cs_quantity#32 as double))]
-Aggregate Attributes [9]: [count(ss_quantity#6)#72, avg(cast(ss_quantity#6 as bigint))#73, stddev_samp(cast(ss_quantity#6 as double))#74, count(sr_return_quantity#23)#75, avg(cast(sr_return_quantity#23 as bigint))#76, stddev_samp(cast(sr_return_quantity#23 as double))#77, count(cs_quantity#32)#78, avg(cast(cs_quantity#32 as bigint))#79, stddev_samp(cast(cs_quantity#32 as double))#80]
-Results [15]: [i_item_id#15, i_item_desc#16, s_state#11, count(ss_quantity#6)#72 AS store_sales_quantitycount#81, avg(cast(ss_quantity#6 as bigint))#73 AS store_sales_quantityave#82, stddev_samp(cast(ss_quantity#6 as double))#74 AS store_sales_quantitystdev#83, (stddev_samp(cast(ss_quantity#6 as double))#74 / avg(cast(ss_quantity#6 as bigint))#73) AS store_sales_quantitycov#84, count(sr_return_quantity#23)#75 AS as_store_returns_quantitycount#85, avg(cast(sr_return_quantity#23 as bigint))#76 AS as_store_returns_quantityave#86, stddev_samp(cast(sr_return_quantity#23 as double))#77 AS as_store_returns_quantitystdev#87, (stddev_samp(cast(sr_return_quantity#23 as double))#77 / avg(cast(sr_return_quantity#23 as bigint))#76) AS store_returns_quantitycov#88, count(cs_quantity#32)#78 AS catalog_sales_quantitycount#89, avg(cast(cs_quantity#32 as bigint))#79 AS catalog_sales_quantityave#90, (stddev_samp(cast(cs_quantity#32 as double))#80 / avg(cast(cs_quantity#32 as bigint))#79) AS catalog_sales_quantitystdev#91, (stddev_samp(cast(cs_quantity#32 as double))#80 / avg(cast(cs_quantity#32 as bigint))#79) AS catalog_sales_quantitycov#92]
+Functions [9]: [count(ss_quantity#6), avg(cast(ss_quantity#6 as bigint)), stddev_samp(cast(ss_quantity#6 as double)), count(sr_return_quantity#23), avg(cast(sr_return_quantity#23 as bigint)), stddev_samp(cast(sr_return_quantity#23 as double)), count(cs_quantity#31), avg(cast(cs_quantity#31 as bigint)), stddev_samp(cast(cs_quantity#31 as double))]
+Aggregate Attributes [9]: [count(ss_quantity#6)#72, avg(cast(ss_quantity#6 as bigint))#73, stddev_samp(cast(ss_quantity#6 as double))#74, count(sr_return_quantity#23)#75, avg(cast(sr_return_quantity#23 as bigint))#76, stddev_samp(cast(sr_return_quantity#23 as double))#77, count(cs_quantity#31)#78, avg(cast(cs_quantity#31 as bigint))#79, stddev_samp(cast(cs_quantity#31 as double))#80]
+Results [15]: [i_item_id#15, i_item_desc#16, s_state#11, count(ss_quantity#6)#72 AS store_sales_quantitycount#81, avg(cast(ss_quantity#6 as bigint))#73 AS store_sales_quantityave#82, stddev_samp(cast(ss_quantity#6 as double))#74 AS store_sales_quantitystdev#83, (stddev_samp(cast(ss_quantity#6 as double))#74 / avg(cast(ss_quantity#6 as bigint))#73) AS store_sales_quantitycov#84, count(sr_return_quantity#23)#75 AS as_store_returns_quantitycount#85, avg(cast(sr_return_quantity#23 as bigint))#76 AS as_store_returns_quantityave#86, stddev_samp(cast(sr_return_quantity#23 as double))#77 AS as_store_returns_quantitystdev#87, (stddev_samp(cast(sr_return_quantity#23 as double))#77 / avg(cast(sr_return_quantity#23 as bigint))#76) AS store_returns_quantitycov#88, count(cs_quantity#31)#78 AS catalog_sales_quantitycount#89, avg(cast(cs_quantity#31 as bigint))#79 AS catalog_sales_quantityave#90, (stddev_samp(cast(cs_quantity#31 as double))#80 / avg(cast(cs_quantity#31 as bigint))#79) AS catalog_sales_quantitystdev#91, (stddev_samp(cast(cs_quantity#31 as double))#80 / avg(cast(cs_quantity#31 as bigint))#79) AS catalog_sales_quantitycov#92]
 
 (57) TakeOrderedAndProject
 Input [15]: [i_item_id#15, i_item_desc#16, s_state#11, store_sales_quantitycount#81, store_sales_quantityave#82, store_sales_quantitystdev#83, store_sales_quantitycov#84, as_store_returns_quantitycount#85, as_store_returns_quantityave#86, as_store_returns_quantitystdev#87, store_returns_quantitycov#88, catalog_sales_quantitycount#89, catalog_sales_quantityave#90, catalog_sales_quantitystdev#91, catalog_sales_quantitycov#92]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/simplified.txt
index bfb59441f483b..79226a34e6768 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/simplified.txt
@@ -6,67 +6,67 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,s
           WholeStageCodegen (17)
             HashAggregate [i_item_id,i_item_desc,s_state,ss_quantity,sr_return_quantity,cs_quantity] [count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2,count,sum,count,n,avg,m2]
               Project [ss_quantity,sr_return_quantity,cs_quantity,s_state,i_item_id,i_item_desc]
-                SortMergeJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk]
+                SortMergeJoin [ss_customer_sk,ss_item_sk,ss_ticket_number,sr_customer_sk,sr_item_sk,sr_ticket_number]
                   InputAdapter
-                    WholeStageCodegen (13)
-                      Sort [sr_customer_sk,sr_item_sk]
+                    WholeStageCodegen (8)
+                      Sort [ss_customer_sk,ss_item_sk,ss_ticket_number]
                         InputAdapter
-                          Exchange [sr_customer_sk,sr_item_sk] #2
-                            WholeStageCodegen (12)
-                              Project [ss_quantity,s_state,i_item_id,i_item_desc,sr_item_sk,sr_customer_sk,sr_return_quantity]
-                                SortMergeJoin [ss_customer_sk,ss_item_sk,ss_ticket_number,sr_customer_sk,sr_item_sk,sr_ticket_number]
+                          Exchange [ss_customer_sk,ss_item_sk,ss_ticket_number] #2
+                            WholeStageCodegen (7)
+                              Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,s_state,i_item_id,i_item_desc]
+                                SortMergeJoin [ss_item_sk,i_item_sk]
                                   InputAdapter
-                                    WholeStageCodegen (8)
-                                      Sort [ss_customer_sk,ss_item_sk,ss_ticket_number]
+                                    WholeStageCodegen (4)
+                                      Sort [ss_item_sk]
                                         InputAdapter
-                                          Exchange [ss_customer_sk,ss_item_sk,ss_ticket_number] #3
-                                            WholeStageCodegen (7)
-                                              Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,s_state,i_item_id,i_item_desc]
-                                                SortMergeJoin [ss_item_sk,i_item_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (4)
-                                                      Sort [ss_item_sk]
-                                                        InputAdapter
-                                                          Exchange [ss_item_sk] #4
-                                                            WholeStageCodegen (3)
-                                                              Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,s_state]
-                                                                BroadcastHashJoin [ss_store_sk,s_store_sk]
-                                                                  Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity]
-                                                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                      Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity]
-                                                                      InputAdapter
-                                                                        BroadcastExchange #5
-                                                                          WholeStageCodegen (1)
-                                                                            Project [d_date_sk]
-                                                                              Filter [d_quarter_name,d_date_sk]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.date_dim [d_date_sk,d_quarter_name]
-                                                                  InputAdapter
-                                                                    BroadcastExchange #6
-                                                                      WholeStageCodegen (2)
-                                                                        Filter [s_store_sk]
-                                                                          ColumnarToRow
-                                                                            InputAdapter
-                                                                              Scan parquet default.store [s_store_sk,s_state]
-                                                  InputAdapter
-                                                    WholeStageCodegen (6)
-                                                      Sort [i_item_sk]
-                                                        InputAdapter
-                                                          Exchange [i_item_sk] #7
-                                                            WholeStageCodegen (5)
-                                                              Filter [i_item_sk]
+                                          Exchange [ss_item_sk] #3
+                                            WholeStageCodegen (3)
+                                              Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,s_state]
+                                                BroadcastHashJoin [ss_store_sk,s_store_sk]
+                                                  Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity]
+                                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                      Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk]
+                                                        ColumnarToRow
+                                                          InputAdapter
+                                                            Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity]
+                                                      InputAdapter
+                                                        BroadcastExchange #4
+                                                          WholeStageCodegen (1)
+                                                            Project [d_date_sk]
+                                                              Filter [d_quarter_name,d_date_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.item [i_item_sk,i_item_id,i_item_desc]
+                                                                    Scan parquet default.date_dim [d_date_sk,d_quarter_name]
+                                                  InputAdapter
+                                                    BroadcastExchange #5
+                                                      WholeStageCodegen (2)
+                                                        Filter [s_store_sk]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet default.store [s_store_sk,s_state]
+                                  InputAdapter
+                                    WholeStageCodegen (6)
+                                      Sort [i_item_sk]
+                                        InputAdapter
+                                          Exchange [i_item_sk] #6
+                                            WholeStageCodegen (5)
+                                              Filter [i_item_sk]
+                                                ColumnarToRow
+                                                  InputAdapter
+                                                    Scan parquet default.item [i_item_sk,i_item_id,i_item_desc]
+                  InputAdapter
+                    WholeStageCodegen (16)
+                      Sort [sr_customer_sk,sr_item_sk,sr_ticket_number]
+                        InputAdapter
+                          Exchange [sr_customer_sk,sr_item_sk,sr_ticket_number] #7
+                            WholeStageCodegen (15)
+                              Project [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,cs_quantity]
+                                SortMergeJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk]
                                   InputAdapter
                                     WholeStageCodegen (11)
-                                      Sort [sr_customer_sk,sr_item_sk,sr_ticket_number]
+                                      Sort [sr_customer_sk,sr_item_sk]
                                         InputAdapter
-                                          Exchange [sr_customer_sk,sr_item_sk,sr_ticket_number] #8
+                                          Exchange [sr_customer_sk,sr_item_sk] #8
                                             WholeStageCodegen (10)
                                               Project [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity]
                                                 BroadcastHashJoin [sr_returned_date_sk,d_date_sk]
@@ -82,17 +82,17 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,s
                                                             ColumnarToRow
                                                               InputAdapter
                                                                 Scan parquet default.date_dim [d_date_sk,d_quarter_name]
-                  InputAdapter
-                    WholeStageCodegen (16)
-                      Sort [cs_bill_customer_sk,cs_item_sk]
-                        InputAdapter
-                          Exchange [cs_bill_customer_sk,cs_item_sk] #10
-                            WholeStageCodegen (15)
-                              Project [cs_bill_customer_sk,cs_item_sk,cs_quantity]
-                                BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                  Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk]
-                                    ColumnarToRow
-                                      InputAdapter
-                                        Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity]
                                   InputAdapter
-                                    ReusedExchange [d_date_sk] #9
+                                    WholeStageCodegen (14)
+                                      Sort [cs_bill_customer_sk,cs_item_sk]
+                                        InputAdapter
+                                          Exchange [cs_bill_customer_sk,cs_item_sk] #10
+                                            WholeStageCodegen (13)
+                                              Project [cs_bill_customer_sk,cs_item_sk,cs_quantity]
+                                                BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                  Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk]
+                                                    ColumnarToRow
+                                                      InputAdapter
+                                                        Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_quantity]
+                                                  InputAdapter
+                                                    ReusedExchange [d_date_sk] #9
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/explain.txt
index 516f782057631..12e95ba50cd0d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/explain.txt
@@ -34,24 +34,24 @@ TakeOrderedAndProject (53)
                   +- * Sort (46)
                      +- Exchange (45)
                         +- * Project (44)
-                           +- * SortMergeJoin Inner (43)
-                              :- * Sort (37)
-                              :  +- Exchange (36)
-                              :     +- * Project (35)
-                              :        +- * BroadcastHashJoin Inner BuildRight (34)
-                              :           :- * Project (29)
-                              :           :  +- * Filter (28)
-                              :           :     +- * ColumnarToRow (27)
-                              :           :        +- Scan parquet default.customer (26)
-                              :           +- BroadcastExchange (33)
-                              :              +- * Filter (32)
-                              :                 +- * ColumnarToRow (31)
-                              :                    +- Scan parquet default.customer_address (30)
-                              +- * Sort (42)
-                                 +- Exchange (41)
-                                    +- * Filter (40)
-                                       +- * ColumnarToRow (39)
-                                          +- Scan parquet default.customer_demographics (38)
+                           +- * BroadcastHashJoin Inner BuildRight (43)
+                              :- * Project (38)
+                              :  +- * SortMergeJoin Inner (37)
+                              :     :- * Sort (31)
+                              :     :  +- Exchange (30)
+                              :     :     +- * Project (29)
+                              :     :        +- * Filter (28)
+                              :     :           +- * ColumnarToRow (27)
+                              :     :              +- Scan parquet default.customer (26)
+                              :     +- * Sort (36)
+                              :        +- Exchange (35)
+                              :           +- * Filter (34)
+                              :              +- * ColumnarToRow (33)
+                              :                 +- Scan parquet default.customer_demographics (32)
+                              +- BroadcastExchange (42)
+                                 +- * Filter (41)
+                                    +- * ColumnarToRow (40)
+                                       +- Scan parquet default.customer_address (39)
 
 
 (1) Scan parquet default.catalog_sales
@@ -159,7 +159,7 @@ Input [10]: [cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6
 
 (24) Exchange
 Input [8]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19]
-Arguments: hashpartitioning(cs_bill_customer_sk#2, 5), true, [id=#21]
+Arguments: hashpartitioning(cs_bill_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#21]
 
 (25) Sort [codegen id : 5]
 Input [8]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19]
@@ -172,89 +172,89 @@ Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [In(c_birth_month, [1,6,8,9,12,2]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_addr_sk)]
 ReadSchema: struct<c_customer_sk:int,c_current_cdemo_sk:int,c_current_addr_sk:int,c_birth_month:int,c_birth_year:int>
 
-(27) ColumnarToRow [codegen id : 7]
+(27) ColumnarToRow [codegen id : 6]
 Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26]
 
-(28) Filter [codegen id : 7]
+(28) Filter [codegen id : 6]
 Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26]
 Condition : (((c_birth_month#25 IN (1,6,8,9,12,2) AND isnotnull(c_customer_sk#22)) AND isnotnull(c_current_cdemo_sk#23)) AND isnotnull(c_current_addr_sk#24))
 
-(29) Project [codegen id : 7]
+(29) Project [codegen id : 6]
 Output [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26]
 Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26]
 
-(30) Scan parquet default.customer_address
-Output [4]: [ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer_address]
-PushedFilters: [In(ca_state, [MS,IN,ND,OK,NM,VA]), IsNotNull(ca_address_sk)]
-ReadSchema: struct<ca_address_sk:int,ca_county:string,ca_state:string,ca_country:string>
-
-(31) ColumnarToRow [codegen id : 6]
-Input [4]: [ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30]
-
-(32) Filter [codegen id : 6]
-Input [4]: [ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30]
-Condition : (ca_state#29 IN (MS,IN,ND,OK,NM,VA) AND isnotnull(ca_address_sk#27))
-
-(33) BroadcastExchange
-Input [4]: [ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#31]
-
-(34) BroadcastHashJoin [codegen id : 7]
-Left keys [1]: [c_current_addr_sk#24]
-Right keys [1]: [ca_address_sk#27]
-Join condition: None
-
-(35) Project [codegen id : 7]
-Output [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30]
-Input [8]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26, ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30]
-
-(36) Exchange
-Input [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30]
-Arguments: hashpartitioning(c_current_cdemo_sk#23, 5), true, [id=#32]
+(30) Exchange
+Input [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26]
+Arguments: hashpartitioning(c_current_cdemo_sk#23, 5), ENSURE_REQUIREMENTS, [id=#27]
 
-(37) Sort [codegen id : 8]
-Input [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30]
+(31) Sort [codegen id : 7]
+Input [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26]
 Arguments: [c_current_cdemo_sk#23 ASC NULLS FIRST], false, 0
 
-(38) Scan parquet default.customer_demographics
-Output [1]: [cd_demo_sk#33]
+(32) Scan parquet default.customer_demographics
+Output [1]: [cd_demo_sk#28]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
 PushedFilters: [IsNotNull(cd_demo_sk)]
 ReadSchema: struct<cd_demo_sk:int>
 
-(39) ColumnarToRow [codegen id : 9]
-Input [1]: [cd_demo_sk#33]
+(33) ColumnarToRow [codegen id : 8]
+Input [1]: [cd_demo_sk#28]
 
-(40) Filter [codegen id : 9]
-Input [1]: [cd_demo_sk#33]
-Condition : isnotnull(cd_demo_sk#33)
+(34) Filter [codegen id : 8]
+Input [1]: [cd_demo_sk#28]
+Condition : isnotnull(cd_demo_sk#28)
 
-(41) Exchange
-Input [1]: [cd_demo_sk#33]
-Arguments: hashpartitioning(cd_demo_sk#33, 5), true, [id=#34]
+(35) Exchange
+Input [1]: [cd_demo_sk#28]
+Arguments: hashpartitioning(cd_demo_sk#28, 5), ENSURE_REQUIREMENTS, [id=#29]
 
-(42) Sort [codegen id : 10]
-Input [1]: [cd_demo_sk#33]
-Arguments: [cd_demo_sk#33 ASC NULLS FIRST], false, 0
+(36) Sort [codegen id : 9]
+Input [1]: [cd_demo_sk#28]
+Arguments: [cd_demo_sk#28 ASC NULLS FIRST], false, 0
 
-(43) SortMergeJoin [codegen id : 11]
+(37) SortMergeJoin [codegen id : 11]
 Left keys [1]: [c_current_cdemo_sk#23]
-Right keys [1]: [cd_demo_sk#33]
+Right keys [1]: [cd_demo_sk#28]
+Join condition: None
+
+(38) Project [codegen id : 11]
+Output [3]: [c_customer_sk#22, c_current_addr_sk#24, c_birth_year#26]
+Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26, cd_demo_sk#28]
+
+(39) Scan parquet default.customer_address
+Output [4]: [ca_address_sk#30, ca_county#31, ca_state#32, ca_country#33]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/customer_address]
+PushedFilters: [In(ca_state, [MS,IN,ND,OK,NM,VA]), IsNotNull(ca_address_sk)]
+ReadSchema: struct<ca_address_sk:int,ca_county:string,ca_state:string,ca_country:string>
+
+(40) ColumnarToRow [codegen id : 10]
+Input [4]: [ca_address_sk#30, ca_county#31, ca_state#32, ca_country#33]
+
+(41) Filter [codegen id : 10]
+Input [4]: [ca_address_sk#30, ca_county#31, ca_state#32, ca_country#33]
+Condition : (ca_state#32 IN (MS,IN,ND,OK,NM,VA) AND isnotnull(ca_address_sk#30))
+
+(42) BroadcastExchange
+Input [4]: [ca_address_sk#30, ca_county#31, ca_state#32, ca_country#33]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#34]
+
+(43) BroadcastHashJoin [codegen id : 11]
+Left keys [1]: [c_current_addr_sk#24]
+Right keys [1]: [ca_address_sk#30]
 Join condition: None
 
 (44) Project [codegen id : 11]
-Output [5]: [c_customer_sk#22, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30]
-Input [7]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30, cd_demo_sk#33]
+Output [5]: [c_customer_sk#22, c_birth_year#26, ca_county#31, ca_state#32, ca_country#33]
+Input [7]: [c_customer_sk#22, c_current_addr_sk#24, c_birth_year#26, ca_address_sk#30, ca_county#31, ca_state#32, ca_country#33]
 
 (45) Exchange
-Input [5]: [c_customer_sk#22, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30]
-Arguments: hashpartitioning(c_customer_sk#22, 5), true, [id=#35]
+Input [5]: [c_customer_sk#22, c_birth_year#26, ca_county#31, ca_state#32, ca_country#33]
+Arguments: hashpartitioning(c_customer_sk#22, 5), ENSURE_REQUIREMENTS, [id=#35]
 
 (46) Sort [codegen id : 12]
-Input [5]: [c_customer_sk#22, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30]
+Input [5]: [c_customer_sk#22, c_birth_year#26, ca_county#31, ca_state#32, ca_country#33]
 Arguments: [c_customer_sk#22 ASC NULLS FIRST], false, 0
 
 (47) SortMergeJoin [codegen id : 13]
@@ -263,12 +263,12 @@ Right keys [1]: [c_customer_sk#22]
 Join condition: None
 
 (48) Project [codegen id : 13]
-Output [11]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, ca_country#30, ca_state#29, ca_county#28]
-Input [13]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19, c_customer_sk#22, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30]
+Output [11]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, ca_country#33, ca_state#32, ca_county#31]
+Input [13]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19, c_customer_sk#22, c_birth_year#26, ca_county#31, ca_state#32, ca_country#33]
 
 (49) Expand [codegen id : 13]
-Input [11]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, ca_country#30, ca_state#29, ca_county#28]
-Arguments: [List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, ca_country#30, ca_state#29, ca_county#28, 0), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, ca_country#30, ca_state#29, null, 1), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, ca_country#30, null, null, 3), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, null, null, null, 7), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, null, null, null, null, 15)], [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#36, ca_country#37, ca_state#38, ca_county#39, spark_grouping_id#40]
+Input [11]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, ca_country#33, ca_state#32, ca_county#31]
+Arguments: [List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, ca_country#33, ca_state#32, ca_county#31, 0), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, ca_country#33, ca_state#32, null, 1), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, ca_country#33, null, null, 3), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#19, null, null, null, 7), List(cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, null, null, null, null, 15)], [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#36, ca_country#37, ca_state#38, ca_county#39, spark_grouping_id#40]
 
 (50) HashAggregate [codegen id : 13]
 Input [12]: [cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, c_birth_year#26, i_item_id#36, ca_country#37, ca_state#38, ca_county#39, spark_grouping_id#40]
@@ -279,7 +279,7 @@ Results [19]: [i_item_id#36, ca_country#37, ca_state#38, ca_county#39, spark_gro
 
 (51) Exchange
 Input [19]: [i_item_id#36, ca_country#37, ca_state#38, ca_county#39, spark_grouping_id#40, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68]
-Arguments: hashpartitioning(i_item_id#36, ca_country#37, ca_state#38, ca_county#39, spark_grouping_id#40, 5), true, [id=#69]
+Arguments: hashpartitioning(i_item_id#36, ca_country#37, ca_state#38, ca_county#39, spark_grouping_id#40, 5), ENSURE_REQUIREMENTS, [id=#69]
 
 (52) HashAggregate [codegen id : 14]
 Input [19]: [i_item_id#36, ca_country#37, ca_state#38, ca_county#39, spark_grouping_id#40, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/simplified.txt
index 8c76e7cab3310..8069d43c3451a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/simplified.txt
@@ -54,34 +54,34 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                             Exchange [c_customer_sk] #6
                               WholeStageCodegen (11)
                                 Project [c_customer_sk,c_birth_year,ca_county,ca_state,ca_country]
-                                  SortMergeJoin [c_current_cdemo_sk,cd_demo_sk]
-                                    InputAdapter
-                                      WholeStageCodegen (8)
-                                        Sort [c_current_cdemo_sk]
-                                          InputAdapter
-                                            Exchange [c_current_cdemo_sk] #7
-                                              WholeStageCodegen (7)
-                                                Project [c_customer_sk,c_current_cdemo_sk,c_birth_year,ca_county,ca_state,ca_country]
-                                                  BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
+                                  BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
+                                    Project [c_customer_sk,c_current_addr_sk,c_birth_year]
+                                      SortMergeJoin [c_current_cdemo_sk,cd_demo_sk]
+                                        InputAdapter
+                                          WholeStageCodegen (7)
+                                            Sort [c_current_cdemo_sk]
+                                              InputAdapter
+                                                Exchange [c_current_cdemo_sk] #7
+                                                  WholeStageCodegen (6)
                                                     Project [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year]
                                                       Filter [c_birth_month,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
                                                         ColumnarToRow
                                                           InputAdapter
                                                             Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year]
-                                                    InputAdapter
-                                                      BroadcastExchange #8
-                                                        WholeStageCodegen (6)
-                                                          Filter [ca_state,ca_address_sk]
-                                                            ColumnarToRow
-                                                              InputAdapter
-                                                                Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state,ca_country]
+                                        InputAdapter
+                                          WholeStageCodegen (9)
+                                            Sort [cd_demo_sk]
+                                              InputAdapter
+                                                Exchange [cd_demo_sk] #8
+                                                  WholeStageCodegen (8)
+                                                    Filter [cd_demo_sk]
+                                                      ColumnarToRow
+                                                        InputAdapter
+                                                          Scan parquet default.customer_demographics [cd_demo_sk]
                                     InputAdapter
-                                      WholeStageCodegen (10)
-                                        Sort [cd_demo_sk]
-                                          InputAdapter
-                                            Exchange [cd_demo_sk] #9
-                                              WholeStageCodegen (9)
-                                                Filter [cd_demo_sk]
-                                                  ColumnarToRow
-                                                    InputAdapter
-                                                      Scan parquet default.customer_demographics [cd_demo_sk]
+                                      BroadcastExchange #9
+                                        WholeStageCodegen (10)
+                                          Filter [ca_state,ca_address_sk]
+                                            ColumnarToRow
+                                              InputAdapter
+                                                Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state,ca_country]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/explain.txt
index 88b5168f6049c..4627bc19f25f0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/explain.txt
@@ -4,248 +4,248 @@ TakeOrderedAndProject (45)
    +- Exchange (43)
       +- * HashAggregate (42)
          +- * Project (41)
-            +- * SortMergeJoin Inner (40)
-               :- * Sort (25)
-               :  +- Exchange (24)
-               :     +- * Project (23)
-               :        +- * BroadcastHashJoin Inner BuildRight (22)
-               :           :- * Project (17)
-               :           :  +- * BroadcastHashJoin Inner BuildRight (16)
-               :           :     :- * Project (10)
-               :           :     :  +- * BroadcastHashJoin Inner BuildRight (9)
-               :           :     :     :- * Filter (3)
-               :           :     :     :  +- * ColumnarToRow (2)
-               :           :     :     :     +- Scan parquet default.store_sales (1)
-               :           :     :     +- BroadcastExchange (8)
-               :           :     :        +- * Project (7)
-               :           :     :           +- * Filter (6)
-               :           :     :              +- * ColumnarToRow (5)
-               :           :     :                 +- Scan parquet default.item (4)
-               :           :     +- BroadcastExchange (15)
-               :           :        +- * Project (14)
-               :           :           +- * Filter (13)
-               :           :              +- * ColumnarToRow (12)
-               :           :                 +- Scan parquet default.date_dim (11)
-               :           +- BroadcastExchange (21)
-               :              +- * Filter (20)
-               :                 +- * ColumnarToRow (19)
-               :                    +- Scan parquet default.store (18)
-               +- * Sort (39)
-                  +- Exchange (38)
-                     +- * Project (37)
-                        +- * SortMergeJoin Inner (36)
-                           :- * Sort (30)
-                           :  +- Exchange (29)
-                           :     +- * Filter (28)
-                           :        +- * ColumnarToRow (27)
-                           :           +- Scan parquet default.customer (26)
-                           +- * Sort (35)
-                              +- Exchange (34)
-                                 +- * Filter (33)
-                                    +- * ColumnarToRow (32)
-                                       +- Scan parquet default.customer_address (31)
-
-
-(1) Scan parquet default.store_sales
-Output [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5]
+            +- * BroadcastHashJoin Inner BuildRight (40)
+               :- * Project (34)
+               :  +- * SortMergeJoin Inner (33)
+               :     :- * Sort (18)
+               :     :  +- Exchange (17)
+               :     :     +- * Project (16)
+               :     :        +- * BroadcastHashJoin Inner BuildRight (15)
+               :     :           :- * Project (10)
+               :     :           :  +- * BroadcastHashJoin Inner BuildLeft (9)
+               :     :           :     :- BroadcastExchange (5)
+               :     :           :     :  +- * Project (4)
+               :     :           :     :     +- * Filter (3)
+               :     :           :     :        +- * ColumnarToRow (2)
+               :     :           :     :           +- Scan parquet default.date_dim (1)
+               :     :           :     +- * Filter (8)
+               :     :           :        +- * ColumnarToRow (7)
+               :     :           :           +- Scan parquet default.store_sales (6)
+               :     :           +- BroadcastExchange (14)
+               :     :              +- * Filter (13)
+               :     :                 +- * ColumnarToRow (12)
+               :     :                    +- Scan parquet default.store (11)
+               :     +- * Sort (32)
+               :        +- Exchange (31)
+               :           +- * Project (30)
+               :              +- * SortMergeJoin Inner (29)
+               :                 :- * Sort (23)
+               :                 :  +- Exchange (22)
+               :                 :     +- * Filter (21)
+               :                 :        +- * ColumnarToRow (20)
+               :                 :           +- Scan parquet default.customer (19)
+               :                 +- * Sort (28)
+               :                    +- Exchange (27)
+               :                       +- * Filter (26)
+               :                          +- * ColumnarToRow (25)
+               :                             +- Scan parquet default.customer_address (24)
+               +- BroadcastExchange (39)
+                  +- * Project (38)
+                     +- * Filter (37)
+                        +- * ColumnarToRow (36)
+                           +- Scan parquet default.item (35)
+
+
+(1) Scan parquet default.date_dim
+Output [3]: [d_date_sk#1, d_year#2, d_moy#3]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/store_sales]
-PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store_sk)]
-ReadSchema: struct<ss_sold_date_sk:int,ss_item_sk:int,ss_customer_sk:int,ss_store_sk:int,ss_ext_sales_price:decimal(7,2)>
-
-(2) ColumnarToRow [codegen id : 4]
-Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5]
-
-(3) Filter [codegen id : 4]
-Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5]
-Condition : (((isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_item_sk#2)) AND isnotnull(ss_customer_sk#3)) AND isnotnull(ss_store_sk#4))
-
-(4) Scan parquet default.item
-Output [6]: [i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, i_manager_id#11]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/item]
-PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,8), IsNotNull(i_item_sk)]
-ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_brand:string,i_manufact_id:int,i_manufact:string,i_manager_id:int>
-
-(5) ColumnarToRow [codegen id : 1]
-Input [6]: [i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, i_manager_id#11]
-
-(6) Filter [codegen id : 1]
-Input [6]: [i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, i_manager_id#11]
-Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 8)) AND isnotnull(i_item_sk#6))
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1998), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
-(7) Project [codegen id : 1]
-Output [5]: [i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10]
-Input [6]: [i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, i_manager_id#11]
+(2) ColumnarToRow [codegen id : 1]
+Input [3]: [d_date_sk#1, d_year#2, d_moy#3]
 
-(8) BroadcastExchange
-Input [5]: [i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12]
+(3) Filter [codegen id : 1]
+Input [3]: [d_date_sk#1, d_year#2, d_moy#3]
+Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 1998)) AND isnotnull(d_date_sk#1))
 
-(9) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [ss_item_sk#2]
-Right keys [1]: [i_item_sk#6]
-Join condition: None
+(4) Project [codegen id : 1]
+Output [1]: [d_date_sk#1]
+Input [3]: [d_date_sk#1, d_year#2, d_moy#3]
 
-(10) Project [codegen id : 4]
-Output [8]: [ss_sold_date_sk#1, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10]
-Input [10]: [ss_sold_date_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5, i_item_sk#6, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10]
+(5) BroadcastExchange
+Input [1]: [d_date_sk#1]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#4]
 
-(11) Scan parquet default.date_dim
-Output [3]: [d_date_sk#13, d_year#14, d_moy#15]
+(6) Scan parquet default.store_sales
+Output [5]: [ss_sold_date_sk#5, ss_item_sk#6, ss_customer_sk#7, ss_store_sk#8, ss_ext_sales_price#9]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1998), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
-
-(12) ColumnarToRow [codegen id : 2]
-Input [3]: [d_date_sk#13, d_year#14, d_moy#15]
-
-(13) Filter [codegen id : 2]
-Input [3]: [d_date_sk#13, d_year#14, d_moy#15]
-Condition : ((((isnotnull(d_moy#15) AND isnotnull(d_year#14)) AND (d_moy#15 = 11)) AND (d_year#14 = 1998)) AND isnotnull(d_date_sk#13))
+Location [not included in comparison]/{warehouse_dir}/store_sales]
+PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store_sk)]
+ReadSchema: struct<ss_sold_date_sk:int,ss_item_sk:int,ss_customer_sk:int,ss_store_sk:int,ss_ext_sales_price:decimal(7,2)>
 
-(14) Project [codegen id : 2]
-Output [1]: [d_date_sk#13]
-Input [3]: [d_date_sk#13, d_year#14, d_moy#15]
+(7) ColumnarToRow
+Input [5]: [ss_sold_date_sk#5, ss_item_sk#6, ss_customer_sk#7, ss_store_sk#8, ss_ext_sales_price#9]
 
-(15) BroadcastExchange
-Input [1]: [d_date_sk#13]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16]
+(8) Filter
+Input [5]: [ss_sold_date_sk#5, ss_item_sk#6, ss_customer_sk#7, ss_store_sk#8, ss_ext_sales_price#9]
+Condition : (((isnotnull(ss_sold_date_sk#5) AND isnotnull(ss_item_sk#6)) AND isnotnull(ss_customer_sk#7)) AND isnotnull(ss_store_sk#8))
 
-(16) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [ss_sold_date_sk#1]
-Right keys [1]: [d_date_sk#13]
+(9) BroadcastHashJoin [codegen id : 3]
+Left keys [1]: [d_date_sk#1]
+Right keys [1]: [ss_sold_date_sk#5]
 Join condition: None
 
-(17) Project [codegen id : 4]
-Output [7]: [ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10]
-Input [9]: [ss_sold_date_sk#1, ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, d_date_sk#13]
+(10) Project [codegen id : 3]
+Output [4]: [ss_item_sk#6, ss_customer_sk#7, ss_store_sk#8, ss_ext_sales_price#9]
+Input [6]: [d_date_sk#1, ss_sold_date_sk#5, ss_item_sk#6, ss_customer_sk#7, ss_store_sk#8, ss_ext_sales_price#9]
 
-(18) Scan parquet default.store
-Output [2]: [s_store_sk#17, s_zip#18]
+(11) Scan parquet default.store
+Output [2]: [s_store_sk#10, s_zip#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
 PushedFilters: [IsNotNull(s_zip), IsNotNull(s_store_sk)]
 ReadSchema: struct<s_store_sk:int,s_zip:string>
 
-(19) ColumnarToRow [codegen id : 3]
-Input [2]: [s_store_sk#17, s_zip#18]
+(12) ColumnarToRow [codegen id : 2]
+Input [2]: [s_store_sk#10, s_zip#11]
 
-(20) Filter [codegen id : 3]
-Input [2]: [s_store_sk#17, s_zip#18]
-Condition : (isnotnull(s_zip#18) AND isnotnull(s_store_sk#17))
+(13) Filter [codegen id : 2]
+Input [2]: [s_store_sk#10, s_zip#11]
+Condition : (isnotnull(s_zip#11) AND isnotnull(s_store_sk#10))
 
-(21) BroadcastExchange
-Input [2]: [s_store_sk#17, s_zip#18]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19]
+(14) BroadcastExchange
+Input [2]: [s_store_sk#10, s_zip#11]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12]
 
-(22) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [ss_store_sk#4]
-Right keys [1]: [s_store_sk#17]
+(15) BroadcastHashJoin [codegen id : 3]
+Left keys [1]: [ss_store_sk#8]
+Right keys [1]: [s_store_sk#10]
 Join condition: None
 
-(23) Project [codegen id : 4]
-Output [7]: [ss_customer_sk#3, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, s_zip#18]
-Input [9]: [ss_customer_sk#3, ss_store_sk#4, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, s_store_sk#17, s_zip#18]
+(16) Project [codegen id : 3]
+Output [4]: [ss_item_sk#6, ss_customer_sk#7, ss_ext_sales_price#9, s_zip#11]
+Input [6]: [ss_item_sk#6, ss_customer_sk#7, ss_store_sk#8, ss_ext_sales_price#9, s_store_sk#10, s_zip#11]
 
-(24) Exchange
-Input [7]: [ss_customer_sk#3, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, s_zip#18]
-Arguments: hashpartitioning(ss_customer_sk#3, 5), true, [id=#20]
+(17) Exchange
+Input [4]: [ss_item_sk#6, ss_customer_sk#7, ss_ext_sales_price#9, s_zip#11]
+Arguments: hashpartitioning(ss_customer_sk#7, 5), ENSURE_REQUIREMENTS, [id=#13]
 
-(25) Sort [codegen id : 5]
-Input [7]: [ss_customer_sk#3, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, s_zip#18]
-Arguments: [ss_customer_sk#3 ASC NULLS FIRST], false, 0
+(18) Sort [codegen id : 4]
+Input [4]: [ss_item_sk#6, ss_customer_sk#7, ss_ext_sales_price#9, s_zip#11]
+Arguments: [ss_customer_sk#7 ASC NULLS FIRST], false, 0
 
-(26) Scan parquet default.customer
-Output [2]: [c_customer_sk#21, c_current_addr_sk#22]
+(19) Scan parquet default.customer
+Output [2]: [c_customer_sk#14, c_current_addr_sk#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)]
 ReadSchema: struct<c_customer_sk:int,c_current_addr_sk:int>
 
-(27) ColumnarToRow [codegen id : 6]
-Input [2]: [c_customer_sk#21, c_current_addr_sk#22]
+(20) ColumnarToRow [codegen id : 5]
+Input [2]: [c_customer_sk#14, c_current_addr_sk#15]
 
-(28) Filter [codegen id : 6]
-Input [2]: [c_customer_sk#21, c_current_addr_sk#22]
-Condition : (isnotnull(c_customer_sk#21) AND isnotnull(c_current_addr_sk#22))
+(21) Filter [codegen id : 5]
+Input [2]: [c_customer_sk#14, c_current_addr_sk#15]
+Condition : (isnotnull(c_customer_sk#14) AND isnotnull(c_current_addr_sk#15))
 
-(29) Exchange
-Input [2]: [c_customer_sk#21, c_current_addr_sk#22]
-Arguments: hashpartitioning(c_current_addr_sk#22, 5), true, [id=#23]
+(22) Exchange
+Input [2]: [c_customer_sk#14, c_current_addr_sk#15]
+Arguments: hashpartitioning(c_current_addr_sk#15, 5), ENSURE_REQUIREMENTS, [id=#16]
 
-(30) Sort [codegen id : 7]
-Input [2]: [c_customer_sk#21, c_current_addr_sk#22]
-Arguments: [c_current_addr_sk#22 ASC NULLS FIRST], false, 0
+(23) Sort [codegen id : 6]
+Input [2]: [c_customer_sk#14, c_current_addr_sk#15]
+Arguments: [c_current_addr_sk#15 ASC NULLS FIRST], false, 0
 
-(31) Scan parquet default.customer_address
-Output [2]: [ca_address_sk#24, ca_zip#25]
+(24) Scan parquet default.customer_address
+Output [2]: [ca_address_sk#17, ca_zip#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
 PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip)]
 ReadSchema: struct<ca_address_sk:int,ca_zip:string>
 
-(32) ColumnarToRow [codegen id : 8]
-Input [2]: [ca_address_sk#24, ca_zip#25]
+(25) ColumnarToRow [codegen id : 7]
+Input [2]: [ca_address_sk#17, ca_zip#18]
 
-(33) Filter [codegen id : 8]
-Input [2]: [ca_address_sk#24, ca_zip#25]
-Condition : (isnotnull(ca_address_sk#24) AND isnotnull(ca_zip#25))
+(26) Filter [codegen id : 7]
+Input [2]: [ca_address_sk#17, ca_zip#18]
+Condition : (isnotnull(ca_address_sk#17) AND isnotnull(ca_zip#18))
 
-(34) Exchange
-Input [2]: [ca_address_sk#24, ca_zip#25]
-Arguments: hashpartitioning(ca_address_sk#24, 5), true, [id=#26]
+(27) Exchange
+Input [2]: [ca_address_sk#17, ca_zip#18]
+Arguments: hashpartitioning(ca_address_sk#17, 5), ENSURE_REQUIREMENTS, [id=#19]
 
-(35) Sort [codegen id : 9]
-Input [2]: [ca_address_sk#24, ca_zip#25]
-Arguments: [ca_address_sk#24 ASC NULLS FIRST], false, 0
+(28) Sort [codegen id : 8]
+Input [2]: [ca_address_sk#17, ca_zip#18]
+Arguments: [ca_address_sk#17 ASC NULLS FIRST], false, 0
 
-(36) SortMergeJoin [codegen id : 10]
-Left keys [1]: [c_current_addr_sk#22]
-Right keys [1]: [ca_address_sk#24]
+(29) SortMergeJoin [codegen id : 9]
+Left keys [1]: [c_current_addr_sk#15]
+Right keys [1]: [ca_address_sk#17]
 Join condition: None
 
-(37) Project [codegen id : 10]
-Output [2]: [c_customer_sk#21, ca_zip#25]
-Input [4]: [c_customer_sk#21, c_current_addr_sk#22, ca_address_sk#24, ca_zip#25]
+(30) Project [codegen id : 9]
+Output [2]: [c_customer_sk#14, ca_zip#18]
+Input [4]: [c_customer_sk#14, c_current_addr_sk#15, ca_address_sk#17, ca_zip#18]
+
+(31) Exchange
+Input [2]: [c_customer_sk#14, ca_zip#18]
+Arguments: hashpartitioning(c_customer_sk#14, 5), ENSURE_REQUIREMENTS, [id=#20]
+
+(32) Sort [codegen id : 10]
+Input [2]: [c_customer_sk#14, ca_zip#18]
+Arguments: [c_customer_sk#14 ASC NULLS FIRST], false, 0
 
-(38) Exchange
-Input [2]: [c_customer_sk#21, ca_zip#25]
-Arguments: hashpartitioning(c_customer_sk#21, 5), true, [id=#27]
+(33) SortMergeJoin [codegen id : 12]
+Left keys [1]: [ss_customer_sk#7]
+Right keys [1]: [c_customer_sk#14]
+Join condition: NOT (substr(ca_zip#18, 1, 5) = substr(s_zip#11, 1, 5))
 
-(39) Sort [codegen id : 11]
-Input [2]: [c_customer_sk#21, ca_zip#25]
-Arguments: [c_customer_sk#21 ASC NULLS FIRST], false, 0
+(34) Project [codegen id : 12]
+Output [2]: [ss_item_sk#6, ss_ext_sales_price#9]
+Input [6]: [ss_item_sk#6, ss_customer_sk#7, ss_ext_sales_price#9, s_zip#11, c_customer_sk#14, ca_zip#18]
 
-(40) SortMergeJoin [codegen id : 12]
-Left keys [1]: [ss_customer_sk#3]
-Right keys [1]: [c_customer_sk#21]
-Join condition: NOT (substr(ca_zip#25, 1, 5) = substr(s_zip#18, 1, 5))
+(35) Scan parquet default.item
+Output [6]: [i_item_sk#21, i_brand_id#22, i_brand#23, i_manufact_id#24, i_manufact#25, i_manager_id#26]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/item]
+PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,8), IsNotNull(i_item_sk)]
+ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_brand:string,i_manufact_id:int,i_manufact:string,i_manager_id:int>
+
+(36) ColumnarToRow [codegen id : 11]
+Input [6]: [i_item_sk#21, i_brand_id#22, i_brand#23, i_manufact_id#24, i_manufact#25, i_manager_id#26]
+
+(37) Filter [codegen id : 11]
+Input [6]: [i_item_sk#21, i_brand_id#22, i_brand#23, i_manufact_id#24, i_manufact#25, i_manager_id#26]
+Condition : ((isnotnull(i_manager_id#26) AND (i_manager_id#26 = 8)) AND isnotnull(i_item_sk#21))
+
+(38) Project [codegen id : 11]
+Output [5]: [i_item_sk#21, i_brand_id#22, i_brand#23, i_manufact_id#24, i_manufact#25]
+Input [6]: [i_item_sk#21, i_brand_id#22, i_brand#23, i_manufact_id#24, i_manufact#25, i_manager_id#26]
+
+(39) BroadcastExchange
+Input [5]: [i_item_sk#21, i_brand_id#22, i_brand#23, i_manufact_id#24, i_manufact#25]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27]
+
+(40) BroadcastHashJoin [codegen id : 12]
+Left keys [1]: [ss_item_sk#6]
+Right keys [1]: [i_item_sk#21]
+Join condition: None
 
 (41) Project [codegen id : 12]
-Output [5]: [ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10]
-Input [9]: [ss_customer_sk#3, ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10, s_zip#18, c_customer_sk#21, ca_zip#25]
+Output [5]: [ss_ext_sales_price#9, i_brand_id#22, i_brand#23, i_manufact_id#24, i_manufact#25]
+Input [7]: [ss_item_sk#6, ss_ext_sales_price#9, i_item_sk#21, i_brand_id#22, i_brand#23, i_manufact_id#24, i_manufact#25]
 
 (42) HashAggregate [codegen id : 12]
-Input [5]: [ss_ext_sales_price#5, i_brand_id#7, i_brand#8, i_manufact_id#9, i_manufact#10]
-Keys [4]: [i_brand#8, i_brand_id#7, i_manufact_id#9, i_manufact#10]
-Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))]
+Input [5]: [ss_ext_sales_price#9, i_brand_id#22, i_brand#23, i_manufact_id#24, i_manufact#25]
+Keys [4]: [i_brand#23, i_brand_id#22, i_manufact_id#24, i_manufact#25]
+Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#9))]
 Aggregate Attributes [1]: [sum#28]
-Results [5]: [i_brand#8, i_brand_id#7, i_manufact_id#9, i_manufact#10, sum#29]
+Results [5]: [i_brand#23, i_brand_id#22, i_manufact_id#24, i_manufact#25, sum#29]
 
 (43) Exchange
-Input [5]: [i_brand#8, i_brand_id#7, i_manufact_id#9, i_manufact#10, sum#29]
-Arguments: hashpartitioning(i_brand#8, i_brand_id#7, i_manufact_id#9, i_manufact#10, 5), true, [id=#30]
+Input [5]: [i_brand#23, i_brand_id#22, i_manufact_id#24, i_manufact#25, sum#29]
+Arguments: hashpartitioning(i_brand#23, i_brand_id#22, i_manufact_id#24, i_manufact#25, 5), ENSURE_REQUIREMENTS, [id=#30]
 
 (44) HashAggregate [codegen id : 13]
-Input [5]: [i_brand#8, i_brand_id#7, i_manufact_id#9, i_manufact#10, sum#29]
-Keys [4]: [i_brand#8, i_brand_id#7, i_manufact_id#9, i_manufact#10]
-Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))]
-Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#31]
-Results [5]: [i_brand_id#7 AS brand_id#32, i_brand#8 AS brand#33, i_manufact_id#9, i_manufact#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#31,17,2) AS ext_price#34]
+Input [5]: [i_brand#23, i_brand_id#22, i_manufact_id#24, i_manufact#25, sum#29]
+Keys [4]: [i_brand#23, i_brand_id#22, i_manufact_id#24, i_manufact#25]
+Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#9))]
+Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#9))#31]
+Results [5]: [i_brand_id#22 AS brand_id#32, i_brand#23 AS brand#33, i_manufact_id#24, i_manufact#25, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#9))#31,17,2) AS ext_price#34]
 
 (45) TakeOrderedAndProject
-Input [5]: [brand_id#32, brand#33, i_manufact_id#9, i_manufact#10, ext_price#34]
-Arguments: 100, [ext_price#34 DESC NULLS LAST, brand#33 ASC NULLS FIRST, brand_id#32 ASC NULLS FIRST, i_manufact_id#9 ASC NULLS FIRST, i_manufact#10 ASC NULLS FIRST], [brand_id#32, brand#33, i_manufact_id#9, i_manufact#10, ext_price#34]
+Input [5]: [brand_id#32, brand#33, i_manufact_id#24, i_manufact#25, ext_price#34]
+Arguments: 100, [ext_price#34 DESC NULLS LAST, brand#33 ASC NULLS FIRST, brand_id#32 ASC NULLS FIRST, i_manufact_id#24 ASC NULLS FIRST, i_manufact#25 ASC NULLS FIRST], [brand_id#32, brand#33, i_manufact_id#24, i_manufact#25, ext_price#34]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/simplified.txt
index 05fa3f82e27df..b6441c5fe72c1 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/simplified.txt
@@ -6,71 +6,71 @@ TakeOrderedAndProject [ext_price,brand,brand_id,i_manufact_id,i_manufact]
           WholeStageCodegen (12)
             HashAggregate [i_brand,i_brand_id,i_manufact_id,i_manufact,ss_ext_sales_price] [sum,sum]
               Project [ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact]
-                SortMergeJoin [ss_customer_sk,c_customer_sk,ca_zip,s_zip]
-                  InputAdapter
-                    WholeStageCodegen (5)
-                      Sort [ss_customer_sk]
-                        InputAdapter
-                          Exchange [ss_customer_sk] #2
-                            WholeStageCodegen (4)
-                              Project [ss_customer_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact,s_zip]
-                                BroadcastHashJoin [ss_store_sk,s_store_sk]
-                                  Project [ss_customer_sk,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact]
-                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                      Project [ss_sold_date_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,i_brand_id,i_brand,i_manufact_id,i_manufact]
-                                        BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                          Filter [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk]
-                                            ColumnarToRow
-                                              InputAdapter
-                                                Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price]
+                BroadcastHashJoin [ss_item_sk,i_item_sk]
+                  Project [ss_item_sk,ss_ext_sales_price]
+                    SortMergeJoin [ss_customer_sk,c_customer_sk,ca_zip,s_zip]
+                      InputAdapter
+                        WholeStageCodegen (4)
+                          Sort [ss_customer_sk]
+                            InputAdapter
+                              Exchange [ss_customer_sk] #2
+                                WholeStageCodegen (3)
+                                  Project [ss_item_sk,ss_customer_sk,ss_ext_sales_price,s_zip]
+                                    BroadcastHashJoin [ss_store_sk,s_store_sk]
+                                      Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price]
+                                        BroadcastHashJoin [d_date_sk,ss_sold_date_sk]
                                           InputAdapter
                                             BroadcastExchange #3
                                               WholeStageCodegen (1)
-                                                Project [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact]
-                                                  Filter [i_manager_id,i_item_sk]
+                                                Project [d_date_sk]
+                                                  Filter [d_moy,d_year,d_date_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact,i_manager_id]
+                                                        Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                          Filter [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk]
+                                            ColumnarToRow
+                                              InputAdapter
+                                                Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price]
                                       InputAdapter
                                         BroadcastExchange #4
                                           WholeStageCodegen (2)
-                                            Project [d_date_sk]
-                                              Filter [d_moy,d_year,d_date_sk]
-                                                ColumnarToRow
-                                                  InputAdapter
-                                                    Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
-                                  InputAdapter
-                                    BroadcastExchange #5
-                                      WholeStageCodegen (3)
-                                        Filter [s_zip,s_store_sk]
-                                          ColumnarToRow
+                                            Filter [s_zip,s_store_sk]
+                                              ColumnarToRow
+                                                InputAdapter
+                                                  Scan parquet default.store [s_store_sk,s_zip]
+                      InputAdapter
+                        WholeStageCodegen (10)
+                          Sort [c_customer_sk]
+                            InputAdapter
+                              Exchange [c_customer_sk] #5
+                                WholeStageCodegen (9)
+                                  Project [c_customer_sk,ca_zip]
+                                    SortMergeJoin [c_current_addr_sk,ca_address_sk]
+                                      InputAdapter
+                                        WholeStageCodegen (6)
+                                          Sort [c_current_addr_sk]
+                                            InputAdapter
+                                              Exchange [c_current_addr_sk] #6
+                                                WholeStageCodegen (5)
+                                                  Filter [c_customer_sk,c_current_addr_sk]
+                                                    ColumnarToRow
+                                                      InputAdapter
+                                                        Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
+                                      InputAdapter
+                                        WholeStageCodegen (8)
+                                          Sort [ca_address_sk]
                                             InputAdapter
-                                              Scan parquet default.store [s_store_sk,s_zip]
+                                              Exchange [ca_address_sk] #7
+                                                WholeStageCodegen (7)
+                                                  Filter [ca_address_sk,ca_zip]
+                                                    ColumnarToRow
+                                                      InputAdapter
+                                                        Scan parquet default.customer_address [ca_address_sk,ca_zip]
                   InputAdapter
-                    WholeStageCodegen (11)
-                      Sort [c_customer_sk]
-                        InputAdapter
-                          Exchange [c_customer_sk] #6
-                            WholeStageCodegen (10)
-                              Project [c_customer_sk,ca_zip]
-                                SortMergeJoin [c_current_addr_sk,ca_address_sk]
-                                  InputAdapter
-                                    WholeStageCodegen (7)
-                                      Sort [c_current_addr_sk]
-                                        InputAdapter
-                                          Exchange [c_current_addr_sk] #7
-                                            WholeStageCodegen (6)
-                                              Filter [c_customer_sk,c_current_addr_sk]
-                                                ColumnarToRow
-                                                  InputAdapter
-                                                    Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
-                                  InputAdapter
-                                    WholeStageCodegen (9)
-                                      Sort [ca_address_sk]
-                                        InputAdapter
-                                          Exchange [ca_address_sk] #8
-                                            WholeStageCodegen (8)
-                                              Filter [ca_address_sk,ca_zip]
-                                                ColumnarToRow
-                                                  InputAdapter
-                                                    Scan parquet default.customer_address [ca_address_sk,ca_zip]
+                    BroadcastExchange #8
+                      WholeStageCodegen (11)
+                        Project [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact]
+                          Filter [i_manager_id,i_item_sk]
+                            ColumnarToRow
+                              InputAdapter
+                                Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact,i_manager_id]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt
index ffcf6bd4f6d47..093c4eed6cf11 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt
@@ -92,7 +92,7 @@ Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4,
 
 (10) Exchange
 Input [10]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
-Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#13]
+Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#13]
 
 (11) Sort [codegen id : 3]
 Input [10]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
@@ -114,7 +114,7 @@ Condition : (isnotnull(c_customer_sk#14) AND isnotnull(c_birth_country#17))
 
 (15) Exchange
 Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
-Arguments: hashpartitioning(c_customer_sk#14, 5), true, [id=#18]
+Arguments: hashpartitioning(c_customer_sk#14, 5), ENSURE_REQUIREMENTS, [id=#18]
 
 (16) Sort [codegen id : 5]
 Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
@@ -189,7 +189,7 @@ Input [17]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_c
 
 (32) Exchange
 Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, s_store_name#20, s_state#22, ca_state#25]
-Arguments: hashpartitioning(cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint), 5), true, [id=#29]
+Arguments: hashpartitioning(cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint), 5), ENSURE_REQUIREMENTS, [id=#29]
 
 (33) Sort [codegen id : 9]
 Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, s_store_name#20, s_state#22, ca_state#25]
@@ -211,7 +211,7 @@ Condition : (isnotnull(sr_ticket_number#31) AND isnotnull(sr_item_sk#30))
 
 (37) Exchange
 Input [2]: [sr_item_sk#30, sr_ticket_number#31]
-Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), true, [id=#32]
+Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), ENSURE_REQUIREMENTS, [id=#32]
 
 (38) Sort [codegen id : 11]
 Input [2]: [sr_item_sk#30, sr_ticket_number#31]
@@ -235,7 +235,7 @@ Results [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_
 
 (42) Exchange
 Input [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#34]
-Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, 5), true, [id=#35]
+Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, 5), ENSURE_REQUIREMENTS, [id=#35]
 
 (43) HashAggregate [codegen id : 13]
 Input [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#34]
@@ -253,7 +253,7 @@ Results [5]: [c_last_name#16, c_first_name#15, s_store_name#20, sum#40, isEmpty#
 
 (45) Exchange
 Input [5]: [c_last_name#16, c_first_name#15, s_store_name#20, sum#40, isEmpty#41]
-Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, 5), true, [id=#42]
+Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, 5), ENSURE_REQUIREMENTS, [id=#42]
 
 (46) HashAggregate [codegen id : 14]
 Input [5]: [c_last_name#16, c_first_name#15, s_store_name#20, sum#40, isEmpty#41]
@@ -309,12 +309,12 @@ Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery scalar-subquer
                      :           :           :              +- Exchange (64)
                      :           :           :                 +- * Filter (63)
                      :           :           :                    +- * ColumnarToRow (62)
-                     :           :           :                       +- Scan parquet default.item (61)
+                     :           :           :                       +- Scan parquet default.customer (61)
                      :           :           +- * Sort (74)
                      :           :              +- Exchange (73)
                      :           :                 +- * Filter (72)
                      :           :                    +- * ColumnarToRow (71)
-                     :           :                       +- Scan parquet default.customer (70)
+                     :           :                       +- Scan parquet default.item (70)
                      :           +- * Sort (83)
                      :              +- Exchange (82)
                      :                 +- * Filter (81)
@@ -374,88 +374,88 @@ Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, s
 
 (59) Exchange
 Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23]
-Arguments: hashpartitioning(ss_item_sk#1, 5), true, [id=#49]
+Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#49]
 
 (60) Sort [codegen id : 3]
 Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23]
-Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0
+Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0
 
-(61) Scan parquet default.item
-Output [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
+(61) Scan parquet default.customer
+Output [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/item]
-PushedFilters: [IsNotNull(i_item_sk)]
-ReadSchema: struct<i_item_sk:int,i_current_price:decimal(7,2),i_size:string,i_color:string,i_units:string,i_manager_id:int>
+Location [not included in comparison]/{warehouse_dir}/customer]
+PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)]
+ReadSchema: struct<c_customer_sk:int,c_first_name:string,c_last_name:string,c_birth_country:string>
 
 (62) ColumnarToRow [codegen id : 4]
-Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
+Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
 
 (63) Filter [codegen id : 4]
-Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
-Condition : isnotnull(i_item_sk#6)
+Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
+Condition : (isnotnull(c_customer_sk#14) AND isnotnull(c_birth_country#17))
 
 (64) Exchange
-Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
-Arguments: hashpartitioning(i_item_sk#6, 5), true, [id=#50]
+Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
+Arguments: hashpartitioning(c_customer_sk#14, 5), ENSURE_REQUIREMENTS, [id=#50]
 
 (65) Sort [codegen id : 5]
-Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
-Arguments: [i_item_sk#6 ASC NULLS FIRST], false, 0
+Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
+Arguments: [c_customer_sk#14 ASC NULLS FIRST], false, 0
 
 (66) SortMergeJoin [codegen id : 6]
-Left keys [1]: [ss_item_sk#1]
-Right keys [1]: [i_item_sk#6]
+Left keys [1]: [ss_customer_sk#2]
+Right keys [1]: [c_customer_sk#14]
 Join condition: None
 
 (67) Project [codegen id : 6]
-Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
-Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
+Output [9]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17]
+Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
 
 (68) Exchange
-Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
-Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#51]
+Input [9]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17]
+Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#51]
 
 (69) Sort [codegen id : 7]
-Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
-Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0
+Input [9]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17]
+Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0
 
-(70) Scan parquet default.customer
-Output [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
+(70) Scan parquet default.item
+Output [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer]
-PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)]
-ReadSchema: struct<c_customer_sk:int,c_first_name:string,c_last_name:string,c_birth_country:string>
+Location [not included in comparison]/{warehouse_dir}/item]
+PushedFilters: [IsNotNull(i_item_sk)]
+ReadSchema: struct<i_item_sk:int,i_current_price:decimal(7,2),i_size:string,i_color:string,i_units:string,i_manager_id:int>
 
 (71) ColumnarToRow [codegen id : 8]
-Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
+Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
 
 (72) Filter [codegen id : 8]
-Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
-Condition : (isnotnull(c_customer_sk#14) AND isnotnull(c_birth_country#17))
+Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
+Condition : isnotnull(i_item_sk#6)
 
 (73) Exchange
-Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
-Arguments: hashpartitioning(c_customer_sk#14, 5), true, [id=#52]
+Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
+Arguments: hashpartitioning(i_item_sk#6, 5), ENSURE_REQUIREMENTS, [id=#52]
 
 (74) Sort [codegen id : 9]
-Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
-Arguments: [c_customer_sk#14 ASC NULLS FIRST], false, 0
+Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
+Arguments: [i_item_sk#6 ASC NULLS FIRST], false, 0
 
 (75) SortMergeJoin [codegen id : 10]
-Left keys [1]: [ss_customer_sk#2]
-Right keys [1]: [c_customer_sk#14]
+Left keys [1]: [ss_item_sk#1]
+Right keys [1]: [i_item_sk#6]
 Join condition: None
 
 (76) Project [codegen id : 10]
-Output [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17]
-Input [16]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
+Output [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
+Input [15]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17, i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
 
 (77) Exchange
-Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17]
-Arguments: hashpartitioning(c_birth_country#17, s_zip#23, 5), true, [id=#53]
+Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
+Arguments: hashpartitioning(c_birth_country#17, s_zip#23, 5), ENSURE_REQUIREMENTS, [id=#53]
 
 (78) Sort [codegen id : 11]
-Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17]
+Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
 Arguments: [c_birth_country#17 ASC NULLS FIRST, s_zip#23 ASC NULLS FIRST], false, 0
 
 (79) Scan parquet default.customer_address
@@ -474,7 +474,7 @@ Condition : (isnotnull(ca_country#27) AND isnotnull(ca_zip#26))
 
 (82) Exchange
 Input [3]: [ca_state#25, ca_zip#26, ca_country#27]
-Arguments: hashpartitioning(upper(ca_country#27), ca_zip#26, 5), true, [id=#54]
+Arguments: hashpartitioning(upper(ca_country#27), ca_zip#26, 5), ENSURE_REQUIREMENTS, [id=#54]
 
 (83) Sort [codegen id : 13]
 Input [3]: [ca_state#25, ca_zip#26, ca_country#27]
@@ -486,15 +486,15 @@ Right keys [2]: [upper(ca_country#27), ca_zip#26]
 Join condition: None
 
 (85) Project [codegen id : 14]
-Output [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25]
-Input [17]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17, ca_state#25, ca_zip#26, ca_country#27]
+Output [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, c_first_name#15, c_last_name#16, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, ca_state#25]
+Input [17]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, ca_state#25, ca_zip#26, ca_country#27]
 
 (86) Exchange
-Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25]
-Arguments: hashpartitioning(cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint), 5), true, [id=#55]
+Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, c_first_name#15, c_last_name#16, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, ca_state#25]
+Arguments: hashpartitioning(cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint), 5), ENSURE_REQUIREMENTS, [id=#55]
 
 (87) Sort [codegen id : 15]
-Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25]
+Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, c_first_name#15, c_last_name#16, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, ca_state#25]
 Arguments: [cast(ss_ticket_number#4 as bigint) ASC NULLS FIRST, cast(ss_item_sk#1 as bigint) ASC NULLS FIRST], false, 0
 
 (88) Scan parquet default.store_returns
@@ -513,7 +513,7 @@ Condition : (isnotnull(sr_ticket_number#31) AND isnotnull(sr_item_sk#30))
 
 (91) Exchange
 Input [2]: [sr_item_sk#30, sr_ticket_number#31]
-Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), true, [id=#56]
+Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), ENSURE_REQUIREMENTS, [id=#56]
 
 (92) Sort [codegen id : 17]
 Input [2]: [sr_item_sk#30, sr_ticket_number#31]
@@ -526,7 +526,7 @@ Join condition: None
 
 (94) Project [codegen id : 18]
 Output [11]: [ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25]
-Input [15]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25, sr_item_sk#30, sr_ticket_number#31]
+Input [15]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, c_first_name#15, c_last_name#16, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, ca_state#25, sr_item_sk#30, sr_ticket_number#31]
 
 (95) HashAggregate [codegen id : 18]
 Input [11]: [ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25]
@@ -537,7 +537,7 @@ Results [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_
 
 (96) Exchange
 Input [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#58]
-Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, 5), true, [id=#59]
+Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, 5), ENSURE_REQUIREMENTS, [id=#59]
 
 (97) HashAggregate [codegen id : 19]
 Input [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#58]
@@ -555,7 +555,7 @@ Results [2]: [sum#63, count#64]
 
 (99) Exchange
 Input [2]: [sum#63, count#64]
-Arguments: SinglePartition, true, [id=#65]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#65]
 
 (100) HashAggregate [codegen id : 20]
 Input [2]: [sum#63, count#64]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/simplified.txt
index 10f874f8f5543..7de562c5d59a1 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/simplified.txt
@@ -21,7 +21,7 @@ WholeStageCodegen (14)
                                         InputAdapter
                                           Exchange [ss_ticket_number,ss_item_sk] #12
                                             WholeStageCodegen (14)
-                                              Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state]
+                                              Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,c_first_name,c_last_name,i_current_price,i_size,i_color,i_units,i_manager_id,ca_state]
                                                 SortMergeJoin [c_birth_country,s_zip,ca_country,ca_zip]
                                                   InputAdapter
                                                     WholeStageCodegen (11)
@@ -29,21 +29,21 @@ WholeStageCodegen (14)
                                                         InputAdapter
                                                           Exchange [c_birth_country,s_zip] #13
                                                             WholeStageCodegen (10)
-                                                              Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country]
-                                                                SortMergeJoin [ss_customer_sk,c_customer_sk]
+                                                              Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip,c_first_name,c_last_name,c_birth_country,i_current_price,i_size,i_color,i_units,i_manager_id]
+                                                                SortMergeJoin [ss_item_sk,i_item_sk]
                                                                   InputAdapter
                                                                     WholeStageCodegen (7)
-                                                                      Sort [ss_customer_sk]
+                                                                      Sort [ss_item_sk]
                                                                         InputAdapter
-                                                                          Exchange [ss_customer_sk] #14
+                                                                          Exchange [ss_item_sk] #14
                                                                             WholeStageCodegen (6)
-                                                                              Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id]
-                                                                                SortMergeJoin [ss_item_sk,i_item_sk]
+                                                                              Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip,c_first_name,c_last_name,c_birth_country]
+                                                                                SortMergeJoin [ss_customer_sk,c_customer_sk]
                                                                                   InputAdapter
                                                                                     WholeStageCodegen (3)
-                                                                                      Sort [ss_item_sk]
+                                                                                      Sort [ss_customer_sk]
                                                                                         InputAdapter
-                                                                                          Exchange [ss_item_sk] #15
+                                                                                          Exchange [ss_customer_sk] #15
                                                                                             WholeStageCodegen (2)
                                                                                               Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip]
                                                                                                 BroadcastHashJoin [ss_store_sk,s_store_sk]
@@ -61,24 +61,24 @@ WholeStageCodegen (14)
                                                                                                                 Scan parquet default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip]
                                                                                   InputAdapter
                                                                                     WholeStageCodegen (5)
-                                                                                      Sort [i_item_sk]
+                                                                                      Sort [c_customer_sk]
                                                                                         InputAdapter
-                                                                                          Exchange [i_item_sk] #17
+                                                                                          Exchange [c_customer_sk] #17
                                                                                             WholeStageCodegen (4)
-                                                                                              Filter [i_item_sk]
+                                                                                              Filter [c_customer_sk,c_birth_country]
                                                                                                 ColumnarToRow
                                                                                                   InputAdapter
-                                                                                                    Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
+                                                                                                    Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country]
                                                                   InputAdapter
                                                                     WholeStageCodegen (9)
-                                                                      Sort [c_customer_sk]
+                                                                      Sort [i_item_sk]
                                                                         InputAdapter
-                                                                          Exchange [c_customer_sk] #18
+                                                                          Exchange [i_item_sk] #18
                                                                             WholeStageCodegen (8)
-                                                                              Filter [c_customer_sk,c_birth_country]
+                                                                              Filter [i_item_sk]
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country]
+                                                                                    Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
                                                   InputAdapter
                                                     WholeStageCodegen (13)
                                                       Sort [ca_country,ca_zip]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt
index 73f36e3a9ca23..273950bed3546 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt
@@ -92,7 +92,7 @@ Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4,
 
 (10) Exchange
 Input [10]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
-Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#13]
+Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#13]
 
 (11) Sort [codegen id : 3]
 Input [10]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
@@ -114,7 +114,7 @@ Condition : (isnotnull(c_customer_sk#14) AND isnotnull(c_birth_country#17))
 
 (15) Exchange
 Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
-Arguments: hashpartitioning(c_customer_sk#14, 5), true, [id=#18]
+Arguments: hashpartitioning(c_customer_sk#14, 5), ENSURE_REQUIREMENTS, [id=#18]
 
 (16) Sort [codegen id : 5]
 Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
@@ -189,7 +189,7 @@ Input [17]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_c
 
 (32) Exchange
 Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, s_store_name#20, s_state#22, ca_state#25]
-Arguments: hashpartitioning(cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint), 5), true, [id=#29]
+Arguments: hashpartitioning(cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint), 5), ENSURE_REQUIREMENTS, [id=#29]
 
 (33) Sort [codegen id : 9]
 Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, s_store_name#20, s_state#22, ca_state#25]
@@ -211,7 +211,7 @@ Condition : (isnotnull(sr_ticket_number#31) AND isnotnull(sr_item_sk#30))
 
 (37) Exchange
 Input [2]: [sr_item_sk#30, sr_ticket_number#31]
-Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), true, [id=#32]
+Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), ENSURE_REQUIREMENTS, [id=#32]
 
 (38) Sort [codegen id : 11]
 Input [2]: [sr_item_sk#30, sr_ticket_number#31]
@@ -235,7 +235,7 @@ Results [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_
 
 (42) Exchange
 Input [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#34]
-Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, 5), true, [id=#35]
+Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, 5), ENSURE_REQUIREMENTS, [id=#35]
 
 (43) HashAggregate [codegen id : 13]
 Input [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#34]
@@ -253,7 +253,7 @@ Results [5]: [c_last_name#16, c_first_name#15, s_store_name#20, sum#40, isEmpty#
 
 (45) Exchange
 Input [5]: [c_last_name#16, c_first_name#15, s_store_name#20, sum#40, isEmpty#41]
-Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, 5), true, [id=#42]
+Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, 5), ENSURE_REQUIREMENTS, [id=#42]
 
 (46) HashAggregate [codegen id : 14]
 Input [5]: [c_last_name#16, c_first_name#15, s_store_name#20, sum#40, isEmpty#41]
@@ -309,12 +309,12 @@ Subquery:1 Hosting operator id = 47 Hosting Expression = Subquery scalar-subquer
                      :           :           :              +- Exchange (64)
                      :           :           :                 +- * Filter (63)
                      :           :           :                    +- * ColumnarToRow (62)
-                     :           :           :                       +- Scan parquet default.item (61)
+                     :           :           :                       +- Scan parquet default.customer (61)
                      :           :           +- * Sort (74)
                      :           :              +- Exchange (73)
                      :           :                 +- * Filter (72)
                      :           :                    +- * ColumnarToRow (71)
-                     :           :                       +- Scan parquet default.customer (70)
+                     :           :                       +- Scan parquet default.item (70)
                      :           +- * Sort (83)
                      :              +- Exchange (82)
                      :                 +- * Filter (81)
@@ -374,88 +374,88 @@ Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, s
 
 (59) Exchange
 Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23]
-Arguments: hashpartitioning(ss_item_sk#1, 5), true, [id=#49]
+Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#49]
 
 (60) Sort [codegen id : 3]
 Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23]
-Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0
+Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0
 
-(61) Scan parquet default.item
-Output [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
+(61) Scan parquet default.customer
+Output [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/item]
-PushedFilters: [IsNotNull(i_item_sk)]
-ReadSchema: struct<i_item_sk:int,i_current_price:decimal(7,2),i_size:string,i_color:string,i_units:string,i_manager_id:int>
+Location [not included in comparison]/{warehouse_dir}/customer]
+PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)]
+ReadSchema: struct<c_customer_sk:int,c_first_name:string,c_last_name:string,c_birth_country:string>
 
 (62) ColumnarToRow [codegen id : 4]
-Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
+Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
 
 (63) Filter [codegen id : 4]
-Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
-Condition : isnotnull(i_item_sk#6)
+Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
+Condition : (isnotnull(c_customer_sk#14) AND isnotnull(c_birth_country#17))
 
 (64) Exchange
-Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
-Arguments: hashpartitioning(i_item_sk#6, 5), true, [id=#50]
+Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
+Arguments: hashpartitioning(c_customer_sk#14, 5), ENSURE_REQUIREMENTS, [id=#50]
 
 (65) Sort [codegen id : 5]
-Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
-Arguments: [i_item_sk#6 ASC NULLS FIRST], false, 0
+Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
+Arguments: [c_customer_sk#14 ASC NULLS FIRST], false, 0
 
 (66) SortMergeJoin [codegen id : 6]
-Left keys [1]: [ss_item_sk#1]
-Right keys [1]: [i_item_sk#6]
+Left keys [1]: [ss_customer_sk#2]
+Right keys [1]: [c_customer_sk#14]
 Join condition: None
 
 (67) Project [codegen id : 6]
-Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
-Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
+Output [9]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17]
+Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
 
 (68) Exchange
-Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
-Arguments: hashpartitioning(ss_customer_sk#2, 5), true, [id=#51]
+Input [9]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17]
+Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#51]
 
 (69) Sort [codegen id : 7]
-Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
-Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0
+Input [9]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17]
+Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0
 
-(70) Scan parquet default.customer
-Output [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
+(70) Scan parquet default.item
+Output [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer]
-PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)]
-ReadSchema: struct<c_customer_sk:int,c_first_name:string,c_last_name:string,c_birth_country:string>
+Location [not included in comparison]/{warehouse_dir}/item]
+PushedFilters: [IsNotNull(i_item_sk)]
+ReadSchema: struct<i_item_sk:int,i_current_price:decimal(7,2),i_size:string,i_color:string,i_units:string,i_manager_id:int>
 
 (71) ColumnarToRow [codegen id : 8]
-Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
+Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
 
 (72) Filter [codegen id : 8]
-Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
-Condition : (isnotnull(c_customer_sk#14) AND isnotnull(c_birth_country#17))
+Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
+Condition : isnotnull(i_item_sk#6)
 
 (73) Exchange
-Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
-Arguments: hashpartitioning(c_customer_sk#14, 5), true, [id=#52]
+Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
+Arguments: hashpartitioning(i_item_sk#6, 5), ENSURE_REQUIREMENTS, [id=#52]
 
 (74) Sort [codegen id : 9]
-Input [4]: [c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
-Arguments: [c_customer_sk#14 ASC NULLS FIRST], false, 0
+Input [6]: [i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
+Arguments: [i_item_sk#6 ASC NULLS FIRST], false, 0
 
 (75) SortMergeJoin [codegen id : 10]
-Left keys [1]: [ss_customer_sk#2]
-Right keys [1]: [c_customer_sk#14]
+Left keys [1]: [ss_item_sk#1]
+Right keys [1]: [i_item_sk#6]
 Join condition: None
 
 (76) Project [codegen id : 10]
-Output [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17]
-Input [16]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_customer_sk#14, c_first_name#15, c_last_name#16, c_birth_country#17]
+Output [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
+Input [15]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17, i_item_sk#6, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
 
 (77) Exchange
-Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17]
-Arguments: hashpartitioning(c_birth_country#17, s_zip#23, 5), true, [id=#53]
+Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
+Arguments: hashpartitioning(c_birth_country#17, s_zip#23, 5), ENSURE_REQUIREMENTS, [id=#53]
 
 (78) Sort [codegen id : 11]
-Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17]
+Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11]
 Arguments: [c_birth_country#17 ASC NULLS FIRST, s_zip#23 ASC NULLS FIRST], false, 0
 
 (79) Scan parquet default.customer_address
@@ -474,7 +474,7 @@ Condition : (isnotnull(ca_country#27) AND isnotnull(ca_zip#26))
 
 (82) Exchange
 Input [3]: [ca_state#25, ca_zip#26, ca_country#27]
-Arguments: hashpartitioning(upper(ca_country#27), ca_zip#26, 5), true, [id=#54]
+Arguments: hashpartitioning(upper(ca_country#27), ca_zip#26, 5), ENSURE_REQUIREMENTS, [id=#54]
 
 (83) Sort [codegen id : 13]
 Input [3]: [ca_state#25, ca_zip#26, ca_country#27]
@@ -486,15 +486,15 @@ Right keys [2]: [upper(ca_country#27), ca_zip#26]
 Join condition: None
 
 (85) Project [codegen id : 14]
-Output [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25]
-Input [17]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, c_birth_country#17, ca_state#25, ca_zip#26, ca_country#27]
+Output [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, c_first_name#15, c_last_name#16, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, ca_state#25]
+Input [17]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, s_zip#23, c_first_name#15, c_last_name#16, c_birth_country#17, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, ca_state#25, ca_zip#26, ca_country#27]
 
 (86) Exchange
-Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25]
-Arguments: hashpartitioning(cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint), 5), true, [id=#55]
+Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, c_first_name#15, c_last_name#16, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, ca_state#25]
+Arguments: hashpartitioning(cast(ss_ticket_number#4 as bigint), cast(ss_item_sk#1 as bigint), 5), ENSURE_REQUIREMENTS, [id=#55]
 
 (87) Sort [codegen id : 15]
-Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25]
+Input [13]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, c_first_name#15, c_last_name#16, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, ca_state#25]
 Arguments: [cast(ss_ticket_number#4 as bigint) ASC NULLS FIRST, cast(ss_item_sk#1 as bigint) ASC NULLS FIRST], false, 0
 
 (88) Scan parquet default.store_returns
@@ -513,7 +513,7 @@ Condition : (isnotnull(sr_ticket_number#31) AND isnotnull(sr_item_sk#30))
 
 (91) Exchange
 Input [2]: [sr_item_sk#30, sr_ticket_number#31]
-Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), true, [id=#56]
+Arguments: hashpartitioning(sr_ticket_number#31, sr_item_sk#30, 5), ENSURE_REQUIREMENTS, [id=#56]
 
 (92) Sort [codegen id : 17]
 Input [2]: [sr_item_sk#30, sr_ticket_number#31]
@@ -526,7 +526,7 @@ Join condition: None
 
 (94) Project [codegen id : 18]
 Output [11]: [ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25]
-Input [15]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25, sr_item_sk#30, sr_ticket_number#31]
+Input [15]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#20, s_state#22, c_first_name#15, c_last_name#16, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, ca_state#25, sr_item_sk#30, sr_ticket_number#31]
 
 (95) HashAggregate [codegen id : 18]
 Input [11]: [ss_net_paid#5, s_store_name#20, s_state#22, i_current_price#7, i_size#8, i_color#9, i_units#10, i_manager_id#11, c_first_name#15, c_last_name#16, ca_state#25]
@@ -537,7 +537,7 @@ Results [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_
 
 (96) Exchange
 Input [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#58]
-Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, 5), true, [id=#59]
+Arguments: hashpartitioning(c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, 5), ENSURE_REQUIREMENTS, [id=#59]
 
 (97) HashAggregate [codegen id : 19]
 Input [11]: [c_last_name#16, c_first_name#15, s_store_name#20, ca_state#25, s_state#22, i_color#9, i_current_price#7, i_manager_id#11, i_units#10, i_size#8, sum#58]
@@ -555,7 +555,7 @@ Results [2]: [sum#63, count#64]
 
 (99) Exchange
 Input [2]: [sum#63, count#64]
-Arguments: SinglePartition, true, [id=#65]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#65]
 
 (100) HashAggregate [codegen id : 20]
 Input [2]: [sum#63, count#64]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/simplified.txt
index 10f874f8f5543..7de562c5d59a1 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/simplified.txt
@@ -21,7 +21,7 @@ WholeStageCodegen (14)
                                         InputAdapter
                                           Exchange [ss_ticket_number,ss_item_sk] #12
                                             WholeStageCodegen (14)
-                                              Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,ca_state]
+                                              Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,c_first_name,c_last_name,i_current_price,i_size,i_color,i_units,i_manager_id,ca_state]
                                                 SortMergeJoin [c_birth_country,s_zip,ca_country,ca_zip]
                                                   InputAdapter
                                                     WholeStageCodegen (11)
@@ -29,21 +29,21 @@ WholeStageCodegen (14)
                                                         InputAdapter
                                                           Exchange [c_birth_country,s_zip] #13
                                                             WholeStageCodegen (10)
-                                                              Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id,c_first_name,c_last_name,c_birth_country]
-                                                                SortMergeJoin [ss_customer_sk,c_customer_sk]
+                                                              Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip,c_first_name,c_last_name,c_birth_country,i_current_price,i_size,i_color,i_units,i_manager_id]
+                                                                SortMergeJoin [ss_item_sk,i_item_sk]
                                                                   InputAdapter
                                                                     WholeStageCodegen (7)
-                                                                      Sort [ss_customer_sk]
+                                                                      Sort [ss_item_sk]
                                                                         InputAdapter
-                                                                          Exchange [ss_customer_sk] #14
+                                                                          Exchange [ss_item_sk] #14
                                                                             WholeStageCodegen (6)
-                                                                              Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip,i_current_price,i_size,i_color,i_units,i_manager_id]
-                                                                                SortMergeJoin [ss_item_sk,i_item_sk]
+                                                                              Project [ss_item_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip,c_first_name,c_last_name,c_birth_country]
+                                                                                SortMergeJoin [ss_customer_sk,c_customer_sk]
                                                                                   InputAdapter
                                                                                     WholeStageCodegen (3)
-                                                                                      Sort [ss_item_sk]
+                                                                                      Sort [ss_customer_sk]
                                                                                         InputAdapter
-                                                                                          Exchange [ss_item_sk] #15
+                                                                                          Exchange [ss_customer_sk] #15
                                                                                             WholeStageCodegen (2)
                                                                                               Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_paid,s_store_name,s_state,s_zip]
                                                                                                 BroadcastHashJoin [ss_store_sk,s_store_sk]
@@ -61,24 +61,24 @@ WholeStageCodegen (14)
                                                                                                                 Scan parquet default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip]
                                                                                   InputAdapter
                                                                                     WholeStageCodegen (5)
-                                                                                      Sort [i_item_sk]
+                                                                                      Sort [c_customer_sk]
                                                                                         InputAdapter
-                                                                                          Exchange [i_item_sk] #17
+                                                                                          Exchange [c_customer_sk] #17
                                                                                             WholeStageCodegen (4)
-                                                                                              Filter [i_item_sk]
+                                                                                              Filter [c_customer_sk,c_birth_country]
                                                                                                 ColumnarToRow
                                                                                                   InputAdapter
-                                                                                                    Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
+                                                                                                    Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country]
                                                                   InputAdapter
                                                                     WholeStageCodegen (9)
-                                                                      Sort [c_customer_sk]
+                                                                      Sort [i_item_sk]
                                                                         InputAdapter
-                                                                          Exchange [c_customer_sk] #18
+                                                                          Exchange [i_item_sk] #18
                                                                             WholeStageCodegen (8)
-                                                                              Filter [c_customer_sk,c_birth_country]
+                                                                              Filter [i_item_sk]
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country]
+                                                                                    Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
                                                   InputAdapter
                                                     WholeStageCodegen (13)
                                                       Sort [ca_country,ca_zip]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt
index c6dc3db869003..3100e574e60e3 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt
@@ -5,57 +5,57 @@ TakeOrderedAndProject (57)
       +- * HashAggregate (54)
          +- * Project (53)
             +- * SortMergeJoin Inner (52)
-               :- * Sort (43)
-               :  +- Exchange (42)
-               :     +- * Project (41)
-               :        +- * SortMergeJoin Inner (40)
-               :           :- * Sort (27)
-               :           :  +- Exchange (26)
-               :           :     +- * Project (25)
-               :           :        +- * SortMergeJoin Inner (24)
-               :           :           :- * Sort (18)
-               :           :           :  +- Exchange (17)
-               :           :           :     +- * Project (16)
-               :           :           :        +- * BroadcastHashJoin Inner BuildRight (15)
-               :           :           :           :- * Project (10)
-               :           :           :           :  +- * BroadcastHashJoin Inner BuildRight (9)
-               :           :           :           :     :- * Filter (3)
-               :           :           :           :     :  +- * ColumnarToRow (2)
-               :           :           :           :     :     +- Scan parquet default.store_sales (1)
-               :           :           :           :     +- BroadcastExchange (8)
-               :           :           :           :        +- * Project (7)
-               :           :           :           :           +- * Filter (6)
-               :           :           :           :              +- * ColumnarToRow (5)
-               :           :           :           :                 +- Scan parquet default.date_dim (4)
-               :           :           :           +- BroadcastExchange (14)
-               :           :           :              +- * Filter (13)
-               :           :           :                 +- * ColumnarToRow (12)
-               :           :           :                    +- Scan parquet default.store (11)
-               :           :           +- * Sort (23)
-               :           :              +- Exchange (22)
-               :           :                 +- * Filter (21)
-               :           :                    +- * ColumnarToRow (20)
-               :           :                       +- Scan parquet default.item (19)
-               :           +- * Sort (39)
-               :              +- Exchange (38)
-               :                 +- * Project (37)
-               :                    +- * BroadcastHashJoin Inner BuildRight (36)
-               :                       :- * Filter (30)
-               :                       :  +- * ColumnarToRow (29)
-               :                       :     +- Scan parquet default.store_returns (28)
-               :                       +- BroadcastExchange (35)
-               :                          +- * Project (34)
-               :                             +- * Filter (33)
-               :                                +- * ColumnarToRow (32)
-               :                                   +- Scan parquet default.date_dim (31)
+               :- * Sort (27)
+               :  +- Exchange (26)
+               :     +- * Project (25)
+               :        +- * SortMergeJoin Inner (24)
+               :           :- * Sort (18)
+               :           :  +- Exchange (17)
+               :           :     +- * Project (16)
+               :           :        +- * BroadcastHashJoin Inner BuildRight (15)
+               :           :           :- * Project (10)
+               :           :           :  +- * BroadcastHashJoin Inner BuildRight (9)
+               :           :           :     :- * Filter (3)
+               :           :           :     :  +- * ColumnarToRow (2)
+               :           :           :     :     +- Scan parquet default.store_sales (1)
+               :           :           :     +- BroadcastExchange (8)
+               :           :           :        +- * Project (7)
+               :           :           :           +- * Filter (6)
+               :           :           :              +- * ColumnarToRow (5)
+               :           :           :                 +- Scan parquet default.date_dim (4)
+               :           :           +- BroadcastExchange (14)
+               :           :              +- * Filter (13)
+               :           :                 +- * ColumnarToRow (12)
+               :           :                    +- Scan parquet default.store (11)
+               :           +- * Sort (23)
+               :              +- Exchange (22)
+               :                 +- * Filter (21)
+               :                    +- * ColumnarToRow (20)
+               :                       +- Scan parquet default.item (19)
                +- * Sort (51)
                   +- Exchange (50)
                      +- * Project (49)
-                        +- * BroadcastHashJoin Inner BuildRight (48)
-                           :- * Filter (46)
-                           :  +- * ColumnarToRow (45)
-                           :     +- Scan parquet default.catalog_sales (44)
-                           +- ReusedExchange (47)
+                        +- * SortMergeJoin Inner (48)
+                           :- * Sort (39)
+                           :  +- Exchange (38)
+                           :     +- * Project (37)
+                           :        +- * BroadcastHashJoin Inner BuildRight (36)
+                           :           :- * Filter (30)
+                           :           :  +- * ColumnarToRow (29)
+                           :           :     +- Scan parquet default.store_returns (28)
+                           :           +- BroadcastExchange (35)
+                           :              +- * Project (34)
+                           :                 +- * Filter (33)
+                           :                    +- * ColumnarToRow (32)
+                           :                       +- Scan parquet default.date_dim (31)
+                           +- * Sort (47)
+                              +- Exchange (46)
+                                 +- * Project (45)
+                                    +- * BroadcastHashJoin Inner BuildRight (44)
+                                       :- * Filter (42)
+                                       :  +- * ColumnarToRow (41)
+                                       :     +- Scan parquet default.catalog_sales (40)
+                                       +- ReusedExchange (43)
 
 
 (1) Scan parquet default.store_sales
@@ -132,7 +132,7 @@ Input [8]: [ss_item_sk#2, ss_customer_sk#3, ss_store_sk#4, ss_ticket_number#5, s
 
 (17) Exchange
 Input [6]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6, s_store_id#12, s_store_name#13]
-Arguments: hashpartitioning(ss_item_sk#2, 5), true, [id=#15]
+Arguments: hashpartitioning(ss_item_sk#2, 5), ENSURE_REQUIREMENTS, [id=#15]
 
 (18) Sort [codegen id : 4]
 Input [6]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6, s_store_id#12, s_store_name#13]
@@ -154,7 +154,7 @@ Condition : isnotnull(i_item_sk#16)
 
 (22) Exchange
 Input [3]: [i_item_sk#16, i_item_id#17, i_item_desc#18]
-Arguments: hashpartitioning(i_item_sk#16, 5), true, [id=#19]
+Arguments: hashpartitioning(i_item_sk#16, 5), ENSURE_REQUIREMENTS, [id=#19]
 
 (23) Sort [codegen id : 6]
 Input [3]: [i_item_sk#16, i_item_id#17, i_item_desc#18]
@@ -171,7 +171,7 @@ Input [9]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6,
 
 (26) Exchange
 Input [8]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18]
-Arguments: hashpartitioning(cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint), 5), true, [id=#20]
+Arguments: hashpartitioning(cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint), 5), ENSURE_REQUIREMENTS, [id=#20]
 
 (27) Sort [codegen id : 8]
 Input [8]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18]
@@ -224,89 +224,89 @@ Input [6]: [sr_returned_date_sk#21, sr_item_sk#22, sr_customer_sk#23, sr_ticket_
 
 (38) Exchange
 Input [4]: [sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25]
-Arguments: hashpartitioning(sr_customer_sk#23, sr_item_sk#22, sr_ticket_number#24, 5), true, [id=#30]
+Arguments: hashpartitioning(sr_customer_sk#23, sr_item_sk#22, 5), ENSURE_REQUIREMENTS, [id=#30]
 
 (39) Sort [codegen id : 11]
 Input [4]: [sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25]
-Arguments: [sr_customer_sk#23 ASC NULLS FIRST, sr_item_sk#22 ASC NULLS FIRST, sr_ticket_number#24 ASC NULLS FIRST], false, 0
-
-(40) SortMergeJoin [codegen id : 12]
-Left keys [3]: [cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)]
-Right keys [3]: [sr_customer_sk#23, sr_item_sk#22, sr_ticket_number#24]
-Join condition: None
-
-(41) Project [codegen id : 12]
-Output [8]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_net_loss#25]
-Input [12]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25]
-
-(42) Exchange
-Input [8]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_net_loss#25]
-Arguments: hashpartitioning(sr_customer_sk#23, sr_item_sk#22, 5), true, [id=#31]
-
-(43) Sort [codegen id : 13]
-Input [8]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_net_loss#25]
 Arguments: [sr_customer_sk#23 ASC NULLS FIRST, sr_item_sk#22 ASC NULLS FIRST], false, 0
 
-(44) Scan parquet default.catalog_sales
-Output [4]: [cs_sold_date_sk#32, cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35]
+(40) Scan parquet default.catalog_sales
+Output [4]: [cs_sold_date_sk#31, cs_bill_customer_sk#32, cs_item_sk#33, cs_net_profit#34]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_sales]
 PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk)]
 ReadSchema: struct<cs_sold_date_sk:int,cs_bill_customer_sk:int,cs_item_sk:int,cs_net_profit:decimal(7,2)>
 
-(45) ColumnarToRow [codegen id : 15]
-Input [4]: [cs_sold_date_sk#32, cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35]
+(41) ColumnarToRow [codegen id : 13]
+Input [4]: [cs_sold_date_sk#31, cs_bill_customer_sk#32, cs_item_sk#33, cs_net_profit#34]
+
+(42) Filter [codegen id : 13]
+Input [4]: [cs_sold_date_sk#31, cs_bill_customer_sk#32, cs_item_sk#33, cs_net_profit#34]
+Condition : ((isnotnull(cs_bill_customer_sk#32) AND isnotnull(cs_item_sk#33)) AND isnotnull(cs_sold_date_sk#31))
 
-(46) Filter [codegen id : 15]
-Input [4]: [cs_sold_date_sk#32, cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35]
-Condition : ((isnotnull(cs_bill_customer_sk#33) AND isnotnull(cs_item_sk#34)) AND isnotnull(cs_sold_date_sk#32))
+(43) ReusedExchange [Reuses operator id: 35]
+Output [1]: [d_date_sk#35]
 
-(47) ReusedExchange [Reuses operator id: 35]
-Output [1]: [d_date_sk#36]
+(44) BroadcastHashJoin [codegen id : 13]
+Left keys [1]: [cs_sold_date_sk#31]
+Right keys [1]: [d_date_sk#35]
+Join condition: None
+
+(45) Project [codegen id : 13]
+Output [3]: [cs_bill_customer_sk#32, cs_item_sk#33, cs_net_profit#34]
+Input [5]: [cs_sold_date_sk#31, cs_bill_customer_sk#32, cs_item_sk#33, cs_net_profit#34, d_date_sk#35]
+
+(46) Exchange
+Input [3]: [cs_bill_customer_sk#32, cs_item_sk#33, cs_net_profit#34]
+Arguments: hashpartitioning(cast(cs_bill_customer_sk#32 as bigint), cast(cs_item_sk#33 as bigint), 5), ENSURE_REQUIREMENTS, [id=#36]
 
-(48) BroadcastHashJoin [codegen id : 15]
-Left keys [1]: [cs_sold_date_sk#32]
-Right keys [1]: [d_date_sk#36]
+(47) Sort [codegen id : 14]
+Input [3]: [cs_bill_customer_sk#32, cs_item_sk#33, cs_net_profit#34]
+Arguments: [cast(cs_bill_customer_sk#32 as bigint) ASC NULLS FIRST, cast(cs_item_sk#33 as bigint) ASC NULLS FIRST], false, 0
+
+(48) SortMergeJoin [codegen id : 15]
+Left keys [2]: [sr_customer_sk#23, sr_item_sk#22]
+Right keys [2]: [cast(cs_bill_customer_sk#32 as bigint), cast(cs_item_sk#33 as bigint)]
 Join condition: None
 
 (49) Project [codegen id : 15]
-Output [3]: [cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35]
-Input [5]: [cs_sold_date_sk#32, cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35, d_date_sk#36]
+Output [5]: [sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25, cs_net_profit#34]
+Input [7]: [sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25, cs_bill_customer_sk#32, cs_item_sk#33, cs_net_profit#34]
 
 (50) Exchange
-Input [3]: [cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35]
-Arguments: hashpartitioning(cast(cs_bill_customer_sk#33 as bigint), cast(cs_item_sk#34 as bigint), 5), true, [id=#37]
+Input [5]: [sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25, cs_net_profit#34]
+Arguments: hashpartitioning(sr_customer_sk#23, sr_item_sk#22, sr_ticket_number#24, 5), ENSURE_REQUIREMENTS, [id=#37]
 
 (51) Sort [codegen id : 16]
-Input [3]: [cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35]
-Arguments: [cast(cs_bill_customer_sk#33 as bigint) ASC NULLS FIRST, cast(cs_item_sk#34 as bigint) ASC NULLS FIRST], false, 0
+Input [5]: [sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25, cs_net_profit#34]
+Arguments: [sr_customer_sk#23 ASC NULLS FIRST, sr_item_sk#22 ASC NULLS FIRST, sr_ticket_number#24 ASC NULLS FIRST], false, 0
 
 (52) SortMergeJoin [codegen id : 17]
-Left keys [2]: [sr_customer_sk#23, sr_item_sk#22]
-Right keys [2]: [cast(cs_bill_customer_sk#33 as bigint), cast(cs_item_sk#34 as bigint)]
+Left keys [3]: [cast(ss_customer_sk#3 as bigint), cast(ss_item_sk#2 as bigint), cast(ss_ticket_number#5 as bigint)]
+Right keys [3]: [sr_customer_sk#23, sr_item_sk#22, sr_ticket_number#24]
 Join condition: None
 
 (53) Project [codegen id : 17]
-Output [7]: [ss_net_profit#6, sr_net_loss#25, cs_net_profit#35, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18]
-Input [11]: [ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_net_loss#25, cs_bill_customer_sk#33, cs_item_sk#34, cs_net_profit#35]
+Output [7]: [ss_net_profit#6, sr_net_loss#25, cs_net_profit#34, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18]
+Input [13]: [ss_item_sk#2, ss_customer_sk#3, ss_ticket_number#5, ss_net_profit#6, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18, sr_item_sk#22, sr_customer_sk#23, sr_ticket_number#24, sr_net_loss#25, cs_net_profit#34]
 
 (54) HashAggregate [codegen id : 17]
-Input [7]: [ss_net_profit#6, sr_net_loss#25, cs_net_profit#35, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18]
+Input [7]: [ss_net_profit#6, sr_net_loss#25, cs_net_profit#34, s_store_id#12, s_store_name#13, i_item_id#17, i_item_desc#18]
 Keys [4]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13]
-Functions [3]: [partial_sum(UnscaledValue(ss_net_profit#6)), partial_sum(UnscaledValue(sr_net_loss#25)), partial_sum(UnscaledValue(cs_net_profit#35))]
+Functions [3]: [partial_sum(UnscaledValue(ss_net_profit#6)), partial_sum(UnscaledValue(sr_net_loss#25)), partial_sum(UnscaledValue(cs_net_profit#34))]
 Aggregate Attributes [3]: [sum#38, sum#39, sum#40]
 Results [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, sum#41, sum#42, sum#43]
 
 (55) Exchange
 Input [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, sum#41, sum#42, sum#43]
-Arguments: hashpartitioning(i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, 5), true, [id=#44]
+Arguments: hashpartitioning(i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, 5), ENSURE_REQUIREMENTS, [id=#44]
 
 (56) HashAggregate [codegen id : 18]
 Input [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, sum#41, sum#42, sum#43]
 Keys [4]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13]
-Functions [3]: [sum(UnscaledValue(ss_net_profit#6)), sum(UnscaledValue(sr_net_loss#25)), sum(UnscaledValue(cs_net_profit#35))]
-Aggregate Attributes [3]: [sum(UnscaledValue(ss_net_profit#6))#45, sum(UnscaledValue(sr_net_loss#25))#46, sum(UnscaledValue(cs_net_profit#35))#47]
-Results [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, MakeDecimal(sum(UnscaledValue(ss_net_profit#6))#45,17,2) AS store_sales_profit#48, MakeDecimal(sum(UnscaledValue(sr_net_loss#25))#46,17,2) AS store_returns_loss#49, MakeDecimal(sum(UnscaledValue(cs_net_profit#35))#47,17,2) AS catalog_sales_profit#50]
+Functions [3]: [sum(UnscaledValue(ss_net_profit#6)), sum(UnscaledValue(sr_net_loss#25)), sum(UnscaledValue(cs_net_profit#34))]
+Aggregate Attributes [3]: [sum(UnscaledValue(ss_net_profit#6))#45, sum(UnscaledValue(sr_net_loss#25))#46, sum(UnscaledValue(cs_net_profit#34))#47]
+Results [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, MakeDecimal(sum(UnscaledValue(ss_net_profit#6))#45,17,2) AS store_sales_profit#48, MakeDecimal(sum(UnscaledValue(sr_net_loss#25))#46,17,2) AS store_returns_loss#49, MakeDecimal(sum(UnscaledValue(cs_net_profit#34))#47,17,2) AS catalog_sales_profit#50]
 
 (57) TakeOrderedAndProject
 Input [7]: [i_item_id#17, i_item_desc#18, s_store_id#12, s_store_name#13, store_sales_profit#48, store_returns_loss#49, catalog_sales_profit#50]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/simplified.txt
index ad9fa718ff2bd..9b53cdaa5dc67 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/simplified.txt
@@ -6,67 +6,67 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
           WholeStageCodegen (17)
             HashAggregate [i_item_id,i_item_desc,s_store_id,s_store_name,ss_net_profit,sr_net_loss,cs_net_profit] [sum,sum,sum,sum,sum,sum]
               Project [ss_net_profit,sr_net_loss,cs_net_profit,s_store_id,s_store_name,i_item_id,i_item_desc]
-                SortMergeJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk]
+                SortMergeJoin [ss_customer_sk,ss_item_sk,ss_ticket_number,sr_customer_sk,sr_item_sk,sr_ticket_number]
                   InputAdapter
-                    WholeStageCodegen (13)
-                      Sort [sr_customer_sk,sr_item_sk]
+                    WholeStageCodegen (8)
+                      Sort [ss_customer_sk,ss_item_sk,ss_ticket_number]
                         InputAdapter
-                          Exchange [sr_customer_sk,sr_item_sk] #2
-                            WholeStageCodegen (12)
-                              Project [ss_net_profit,s_store_id,s_store_name,i_item_id,i_item_desc,sr_item_sk,sr_customer_sk,sr_net_loss]
-                                SortMergeJoin [ss_customer_sk,ss_item_sk,ss_ticket_number,sr_customer_sk,sr_item_sk,sr_ticket_number]
+                          Exchange [ss_customer_sk,ss_item_sk,ss_ticket_number] #2
+                            WholeStageCodegen (7)
+                              Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_profit,s_store_id,s_store_name,i_item_id,i_item_desc]
+                                SortMergeJoin [ss_item_sk,i_item_sk]
                                   InputAdapter
-                                    WholeStageCodegen (8)
-                                      Sort [ss_customer_sk,ss_item_sk,ss_ticket_number]
+                                    WholeStageCodegen (4)
+                                      Sort [ss_item_sk]
                                         InputAdapter
-                                          Exchange [ss_customer_sk,ss_item_sk,ss_ticket_number] #3
-                                            WholeStageCodegen (7)
-                                              Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_profit,s_store_id,s_store_name,i_item_id,i_item_desc]
-                                                SortMergeJoin [ss_item_sk,i_item_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (4)
-                                                      Sort [ss_item_sk]
-                                                        InputAdapter
-                                                          Exchange [ss_item_sk] #4
-                                                            WholeStageCodegen (3)
-                                                              Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_profit,s_store_id,s_store_name]
-                                                                BroadcastHashJoin [ss_store_sk,s_store_sk]
-                                                                  Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit]
-                                                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                      Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit]
-                                                                      InputAdapter
-                                                                        BroadcastExchange #5
-                                                                          WholeStageCodegen (1)
-                                                                            Project [d_date_sk]
-                                                                              Filter [d_moy,d_year,d_date_sk]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
-                                                                  InputAdapter
-                                                                    BroadcastExchange #6
-                                                                      WholeStageCodegen (2)
-                                                                        Filter [s_store_sk]
-                                                                          ColumnarToRow
-                                                                            InputAdapter
-                                                                              Scan parquet default.store [s_store_sk,s_store_id,s_store_name]
-                                                  InputAdapter
-                                                    WholeStageCodegen (6)
-                                                      Sort [i_item_sk]
-                                                        InputAdapter
-                                                          Exchange [i_item_sk] #7
-                                                            WholeStageCodegen (5)
-                                                              Filter [i_item_sk]
+                                          Exchange [ss_item_sk] #3
+                                            WholeStageCodegen (3)
+                                              Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_net_profit,s_store_id,s_store_name]
+                                                BroadcastHashJoin [ss_store_sk,s_store_sk]
+                                                  Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit]
+                                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                      Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_sold_date_sk,ss_store_sk]
+                                                        ColumnarToRow
+                                                          InputAdapter
+                                                            Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit]
+                                                      InputAdapter
+                                                        BroadcastExchange #4
+                                                          WholeStageCodegen (1)
+                                                            Project [d_date_sk]
+                                                              Filter [d_moy,d_year,d_date_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.item [i_item_sk,i_item_id,i_item_desc]
+                                                                    Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                  InputAdapter
+                                                    BroadcastExchange #5
+                                                      WholeStageCodegen (2)
+                                                        Filter [s_store_sk]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet default.store [s_store_sk,s_store_id,s_store_name]
+                                  InputAdapter
+                                    WholeStageCodegen (6)
+                                      Sort [i_item_sk]
+                                        InputAdapter
+                                          Exchange [i_item_sk] #6
+                                            WholeStageCodegen (5)
+                                              Filter [i_item_sk]
+                                                ColumnarToRow
+                                                  InputAdapter
+                                                    Scan parquet default.item [i_item_sk,i_item_id,i_item_desc]
+                  InputAdapter
+                    WholeStageCodegen (16)
+                      Sort [sr_customer_sk,sr_item_sk,sr_ticket_number]
+                        InputAdapter
+                          Exchange [sr_customer_sk,sr_item_sk,sr_ticket_number] #7
+                            WholeStageCodegen (15)
+                              Project [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss,cs_net_profit]
+                                SortMergeJoin [sr_customer_sk,sr_item_sk,cs_bill_customer_sk,cs_item_sk]
                                   InputAdapter
                                     WholeStageCodegen (11)
-                                      Sort [sr_customer_sk,sr_item_sk,sr_ticket_number]
+                                      Sort [sr_customer_sk,sr_item_sk]
                                         InputAdapter
-                                          Exchange [sr_customer_sk,sr_item_sk,sr_ticket_number] #8
+                                          Exchange [sr_customer_sk,sr_item_sk] #8
                                             WholeStageCodegen (10)
                                               Project [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss]
                                                 BroadcastHashJoin [sr_returned_date_sk,d_date_sk]
@@ -82,17 +82,17 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                                             ColumnarToRow
                                                               InputAdapter
                                                                 Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
-                  InputAdapter
-                    WholeStageCodegen (16)
-                      Sort [cs_bill_customer_sk,cs_item_sk]
-                        InputAdapter
-                          Exchange [cs_bill_customer_sk,cs_item_sk] #10
-                            WholeStageCodegen (15)
-                              Project [cs_bill_customer_sk,cs_item_sk,cs_net_profit]
-                                BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                  Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk]
-                                    ColumnarToRow
-                                      InputAdapter
-                                        Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_net_profit]
                                   InputAdapter
-                                    ReusedExchange [d_date_sk] #9
+                                    WholeStageCodegen (14)
+                                      Sort [cs_bill_customer_sk,cs_item_sk]
+                                        InputAdapter
+                                          Exchange [cs_bill_customer_sk,cs_item_sk] #10
+                                            WholeStageCodegen (13)
+                                              Project [cs_bill_customer_sk,cs_item_sk,cs_net_profit]
+                                                BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                  Filter [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk]
+                                                    ColumnarToRow
+                                                      InputAdapter
+                                                        Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk,cs_net_profit]
+                                                  InputAdapter
+                                                    ReusedExchange [d_date_sk] #9
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/explain.txt
index 8185680b58670..cb8522545f1d3 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/explain.txt
@@ -9,8 +9,8 @@ TakeOrderedAndProject (67)
             :     +- * HashAggregate (30)
             :        +- * Project (29)
             :           +- * BroadcastHashJoin Inner BuildRight (28)
-            :              :- * Project (22)
-            :              :  +- * BroadcastHashJoin Inner BuildRight (21)
+            :              :- * Project (17)
+            :              :  +- * BroadcastHashJoin Inner BuildRight (16)
             :              :     :- * Project (10)
             :              :     :  +- * BroadcastHashJoin Inner BuildRight (9)
             :              :     :     :- * Filter (3)
@@ -21,21 +21,21 @@ TakeOrderedAndProject (67)
             :              :     :           +- * Filter (6)
             :              :     :              +- * ColumnarToRow (5)
             :              :     :                 +- Scan parquet default.date_dim (4)
-            :              :     +- BroadcastExchange (20)
-            :              :        +- * BroadcastHashJoin LeftSemi BuildRight (19)
-            :              :           :- * Filter (13)
-            :              :           :  +- * ColumnarToRow (12)
-            :              :           :     +- Scan parquet default.item (11)
-            :              :           +- BroadcastExchange (18)
-            :              :              +- * Project (17)
-            :              :                 +- * Filter (16)
-            :              :                    +- * ColumnarToRow (15)
-            :              :                       +- Scan parquet default.item (14)
+            :              :     +- BroadcastExchange (15)
+            :              :        +- * Project (14)
+            :              :           +- * Filter (13)
+            :              :              +- * ColumnarToRow (12)
+            :              :                 +- Scan parquet default.customer_address (11)
             :              +- BroadcastExchange (27)
-            :                 +- * Project (26)
-            :                    +- * Filter (25)
-            :                       +- * ColumnarToRow (24)
-            :                          +- Scan parquet default.customer_address (23)
+            :                 +- * BroadcastHashJoin LeftSemi BuildRight (26)
+            :                    :- * Filter (20)
+            :                    :  +- * ColumnarToRow (19)
+            :                    :     +- Scan parquet default.item (18)
+            :                    +- BroadcastExchange (25)
+            :                       +- * Project (24)
+            :                          +- * Filter (23)
+            :                             +- * ColumnarToRow (22)
+            :                                +- Scan parquet default.item (21)
             :- * HashAggregate (47)
             :  +- Exchange (46)
             :     +- * HashAggregate (45)
@@ -113,108 +113,108 @@ Join condition: None
 Output [3]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4]
 Input [5]: [ss_sold_date_sk#1, ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, d_date_sk#5]
 
-(11) Scan parquet default.item
-Output [2]: [i_item_sk#9, i_manufact_id#10]
+(11) Scan parquet default.customer_address
+Output [2]: [ca_address_sk#9, ca_gmt_offset#10]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/customer_address]
+PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)]
+ReadSchema: struct<ca_address_sk:int,ca_gmt_offset:decimal(5,2)>
+
+(12) ColumnarToRow [codegen id : 2]
+Input [2]: [ca_address_sk#9, ca_gmt_offset#10]
+
+(13) Filter [codegen id : 2]
+Input [2]: [ca_address_sk#9, ca_gmt_offset#10]
+Condition : ((isnotnull(ca_gmt_offset#10) AND (ca_gmt_offset#10 = -5.00)) AND isnotnull(ca_address_sk#9))
+
+(14) Project [codegen id : 2]
+Output [1]: [ca_address_sk#9]
+Input [2]: [ca_address_sk#9, ca_gmt_offset#10]
+
+(15) BroadcastExchange
+Input [1]: [ca_address_sk#9]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11]
+
+(16) BroadcastHashJoin [codegen id : 5]
+Left keys [1]: [ss_addr_sk#3]
+Right keys [1]: [ca_address_sk#9]
+Join condition: None
+
+(17) Project [codegen id : 5]
+Output [2]: [ss_item_sk#2, ss_ext_sales_price#4]
+Input [4]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, ca_address_sk#9]
+
+(18) Scan parquet default.item
+Output [2]: [i_item_sk#12, i_manufact_id#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_item_sk)]
 ReadSchema: struct<i_item_sk:int,i_manufact_id:int>
 
-(12) ColumnarToRow [codegen id : 3]
-Input [2]: [i_item_sk#9, i_manufact_id#10]
+(19) ColumnarToRow [codegen id : 4]
+Input [2]: [i_item_sk#12, i_manufact_id#13]
 
-(13) Filter [codegen id : 3]
-Input [2]: [i_item_sk#9, i_manufact_id#10]
-Condition : isnotnull(i_item_sk#9)
+(20) Filter [codegen id : 4]
+Input [2]: [i_item_sk#12, i_manufact_id#13]
+Condition : isnotnull(i_item_sk#12)
 
-(14) Scan parquet default.item
-Output [2]: [i_category#11, i_manufact_id#10]
+(21) Scan parquet default.item
+Output [2]: [i_category#14, i_manufact_id#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Electronics)]
 ReadSchema: struct<i_category:string,i_manufact_id:int>
 
-(15) ColumnarToRow [codegen id : 2]
-Input [2]: [i_category#11, i_manufact_id#10]
+(22) ColumnarToRow [codegen id : 3]
+Input [2]: [i_category#14, i_manufact_id#13]
 
-(16) Filter [codegen id : 2]
-Input [2]: [i_category#11, i_manufact_id#10]
-Condition : (isnotnull(i_category#11) AND (i_category#11 = Electronics))
+(23) Filter [codegen id : 3]
+Input [2]: [i_category#14, i_manufact_id#13]
+Condition : (isnotnull(i_category#14) AND (i_category#14 = Electronics))
 
-(17) Project [codegen id : 2]
-Output [1]: [i_manufact_id#10 AS i_manufact_id#10#12]
-Input [2]: [i_category#11, i_manufact_id#10]
+(24) Project [codegen id : 3]
+Output [1]: [i_manufact_id#13 AS i_manufact_id#13#15]
+Input [2]: [i_category#14, i_manufact_id#13]
 
-(18) BroadcastExchange
-Input [1]: [i_manufact_id#10#12]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13]
+(25) BroadcastExchange
+Input [1]: [i_manufact_id#13#15]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16]
 
-(19) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [i_manufact_id#10]
-Right keys [1]: [i_manufact_id#10#12]
+(26) BroadcastHashJoin [codegen id : 4]
+Left keys [1]: [i_manufact_id#13]
+Right keys [1]: [i_manufact_id#13#15]
 Join condition: None
 
-(20) BroadcastExchange
-Input [2]: [i_item_sk#9, i_manufact_id#10]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14]
-
-(21) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [ss_item_sk#2]
-Right keys [1]: [i_item_sk#9]
-Join condition: None
-
-(22) Project [codegen id : 5]
-Output [3]: [ss_addr_sk#3, ss_ext_sales_price#4, i_manufact_id#10]
-Input [5]: [ss_item_sk#2, ss_addr_sk#3, ss_ext_sales_price#4, i_item_sk#9, i_manufact_id#10]
-
-(23) Scan parquet default.customer_address
-Output [2]: [ca_address_sk#15, ca_gmt_offset#16]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer_address]
-PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)]
-ReadSchema: struct<ca_address_sk:int,ca_gmt_offset:decimal(5,2)>
-
-(24) ColumnarToRow [codegen id : 4]
-Input [2]: [ca_address_sk#15, ca_gmt_offset#16]
-
-(25) Filter [codegen id : 4]
-Input [2]: [ca_address_sk#15, ca_gmt_offset#16]
-Condition : ((isnotnull(ca_gmt_offset#16) AND (ca_gmt_offset#16 = -5.00)) AND isnotnull(ca_address_sk#15))
-
-(26) Project [codegen id : 4]
-Output [1]: [ca_address_sk#15]
-Input [2]: [ca_address_sk#15, ca_gmt_offset#16]
-
 (27) BroadcastExchange
-Input [1]: [ca_address_sk#15]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17]
+Input [2]: [i_item_sk#12, i_manufact_id#13]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17]
 
 (28) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [ss_addr_sk#3]
-Right keys [1]: [ca_address_sk#15]
+Left keys [1]: [ss_item_sk#2]
+Right keys [1]: [i_item_sk#12]
 Join condition: None
 
 (29) Project [codegen id : 5]
-Output [2]: [ss_ext_sales_price#4, i_manufact_id#10]
-Input [4]: [ss_addr_sk#3, ss_ext_sales_price#4, i_manufact_id#10, ca_address_sk#15]
+Output [2]: [ss_ext_sales_price#4, i_manufact_id#13]
+Input [4]: [ss_item_sk#2, ss_ext_sales_price#4, i_item_sk#12, i_manufact_id#13]
 
 (30) HashAggregate [codegen id : 5]
-Input [2]: [ss_ext_sales_price#4, i_manufact_id#10]
-Keys [1]: [i_manufact_id#10]
+Input [2]: [ss_ext_sales_price#4, i_manufact_id#13]
+Keys [1]: [i_manufact_id#13]
 Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#4))]
 Aggregate Attributes [1]: [sum#18]
-Results [2]: [i_manufact_id#10, sum#19]
+Results [2]: [i_manufact_id#13, sum#19]
 
 (31) Exchange
-Input [2]: [i_manufact_id#10, sum#19]
-Arguments: hashpartitioning(i_manufact_id#10, 5), true, [id=#20]
+Input [2]: [i_manufact_id#13, sum#19]
+Arguments: hashpartitioning(i_manufact_id#13, 5), ENSURE_REQUIREMENTS, [id=#20]
 
 (32) HashAggregate [codegen id : 6]
-Input [2]: [i_manufact_id#10, sum#19]
-Keys [1]: [i_manufact_id#10]
+Input [2]: [i_manufact_id#13, sum#19]
+Keys [1]: [i_manufact_id#13]
 Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#4))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#4))#21]
-Results [2]: [i_manufact_id#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#21,17,2) AS total_sales#22]
+Results [2]: [i_manufact_id#13, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#21,17,2) AS total_sales#22]
 
 (33) Scan parquet default.catalog_sales
 Output [4]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26]
@@ -242,47 +242,47 @@ Join condition: None
 Output [3]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26]
 Input [5]: [cs_sold_date_sk#23, cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, d_date_sk#5]
 
-(39) ReusedExchange [Reuses operator id: 20]
-Output [2]: [i_item_sk#9, i_manufact_id#10]
+(39) ReusedExchange [Reuses operator id: 15]
+Output [1]: [ca_address_sk#9]
 
 (40) BroadcastHashJoin [codegen id : 11]
-Left keys [1]: [cs_item_sk#25]
-Right keys [1]: [i_item_sk#9]
+Left keys [1]: [cs_bill_addr_sk#24]
+Right keys [1]: [ca_address_sk#9]
 Join condition: None
 
 (41) Project [codegen id : 11]
-Output [3]: [cs_bill_addr_sk#24, cs_ext_sales_price#26, i_manufact_id#10]
-Input [5]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, i_item_sk#9, i_manufact_id#10]
+Output [2]: [cs_item_sk#25, cs_ext_sales_price#26]
+Input [4]: [cs_bill_addr_sk#24, cs_item_sk#25, cs_ext_sales_price#26, ca_address_sk#9]
 
 (42) ReusedExchange [Reuses operator id: 27]
-Output [1]: [ca_address_sk#15]
+Output [2]: [i_item_sk#12, i_manufact_id#13]
 
 (43) BroadcastHashJoin [codegen id : 11]
-Left keys [1]: [cs_bill_addr_sk#24]
-Right keys [1]: [ca_address_sk#15]
+Left keys [1]: [cs_item_sk#25]
+Right keys [1]: [i_item_sk#12]
 Join condition: None
 
 (44) Project [codegen id : 11]
-Output [2]: [cs_ext_sales_price#26, i_manufact_id#10]
-Input [4]: [cs_bill_addr_sk#24, cs_ext_sales_price#26, i_manufact_id#10, ca_address_sk#15]
+Output [2]: [cs_ext_sales_price#26, i_manufact_id#13]
+Input [4]: [cs_item_sk#25, cs_ext_sales_price#26, i_item_sk#12, i_manufact_id#13]
 
 (45) HashAggregate [codegen id : 11]
-Input [2]: [cs_ext_sales_price#26, i_manufact_id#10]
-Keys [1]: [i_manufact_id#10]
+Input [2]: [cs_ext_sales_price#26, i_manufact_id#13]
+Keys [1]: [i_manufact_id#13]
 Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#26))]
 Aggregate Attributes [1]: [sum#27]
-Results [2]: [i_manufact_id#10, sum#28]
+Results [2]: [i_manufact_id#13, sum#28]
 
 (46) Exchange
-Input [2]: [i_manufact_id#10, sum#28]
-Arguments: hashpartitioning(i_manufact_id#10, 5), true, [id=#29]
+Input [2]: [i_manufact_id#13, sum#28]
+Arguments: hashpartitioning(i_manufact_id#13, 5), ENSURE_REQUIREMENTS, [id=#29]
 
 (47) HashAggregate [codegen id : 12]
-Input [2]: [i_manufact_id#10, sum#28]
-Keys [1]: [i_manufact_id#10]
+Input [2]: [i_manufact_id#13, sum#28]
+Keys [1]: [i_manufact_id#13]
 Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#26))]
 Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#26))#30]
-Results [2]: [i_manufact_id#10, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#26))#30,17,2) AS total_sales#31]
+Results [2]: [i_manufact_id#13, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#26))#30,17,2) AS total_sales#31]
 
 (48) Scan parquet default.web_sales
 Output [4]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35]
@@ -310,69 +310,69 @@ Join condition: None
 Output [3]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35]
 Input [5]: [ws_sold_date_sk#32, ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, d_date_sk#5]
 
-(54) ReusedExchange [Reuses operator id: 20]
-Output [2]: [i_item_sk#9, i_manufact_id#10]
+(54) ReusedExchange [Reuses operator id: 15]
+Output [1]: [ca_address_sk#9]
 
 (55) BroadcastHashJoin [codegen id : 17]
-Left keys [1]: [ws_item_sk#33]
-Right keys [1]: [i_item_sk#9]
+Left keys [1]: [ws_bill_addr_sk#34]
+Right keys [1]: [ca_address_sk#9]
 Join condition: None
 
 (56) Project [codegen id : 17]
-Output [3]: [ws_bill_addr_sk#34, ws_ext_sales_price#35, i_manufact_id#10]
-Input [5]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, i_item_sk#9, i_manufact_id#10]
+Output [2]: [ws_item_sk#33, ws_ext_sales_price#35]
+Input [4]: [ws_item_sk#33, ws_bill_addr_sk#34, ws_ext_sales_price#35, ca_address_sk#9]
 
 (57) ReusedExchange [Reuses operator id: 27]
-Output [1]: [ca_address_sk#15]
+Output [2]: [i_item_sk#12, i_manufact_id#13]
 
 (58) BroadcastHashJoin [codegen id : 17]
-Left keys [1]: [ws_bill_addr_sk#34]
-Right keys [1]: [ca_address_sk#15]
+Left keys [1]: [ws_item_sk#33]
+Right keys [1]: [i_item_sk#12]
 Join condition: None
 
 (59) Project [codegen id : 17]
-Output [2]: [ws_ext_sales_price#35, i_manufact_id#10]
-Input [4]: [ws_bill_addr_sk#34, ws_ext_sales_price#35, i_manufact_id#10, ca_address_sk#15]
+Output [2]: [ws_ext_sales_price#35, i_manufact_id#13]
+Input [4]: [ws_item_sk#33, ws_ext_sales_price#35, i_item_sk#12, i_manufact_id#13]
 
 (60) HashAggregate [codegen id : 17]
-Input [2]: [ws_ext_sales_price#35, i_manufact_id#10]
-Keys [1]: [i_manufact_id#10]
+Input [2]: [ws_ext_sales_price#35, i_manufact_id#13]
+Keys [1]: [i_manufact_id#13]
 Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#35))]
 Aggregate Attributes [1]: [sum#36]
-Results [2]: [i_manufact_id#10, sum#37]
+Results [2]: [i_manufact_id#13, sum#37]
 
 (61) Exchange
-Input [2]: [i_manufact_id#10, sum#37]
-Arguments: hashpartitioning(i_manufact_id#10, 5), true, [id=#38]
+Input [2]: [i_manufact_id#13, sum#37]
+Arguments: hashpartitioning(i_manufact_id#13, 5), ENSURE_REQUIREMENTS, [id=#38]
 
 (62) HashAggregate [codegen id : 18]
-Input [2]: [i_manufact_id#10, sum#37]
-Keys [1]: [i_manufact_id#10]
+Input [2]: [i_manufact_id#13, sum#37]
+Keys [1]: [i_manufact_id#13]
 Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#35))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#35))#39]
-Results [2]: [i_manufact_id#10, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#35))#39,17,2) AS total_sales#40]
+Results [2]: [i_manufact_id#13, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#35))#39,17,2) AS total_sales#40]
 
 (63) Union
 
 (64) HashAggregate [codegen id : 19]
-Input [2]: [i_manufact_id#10, total_sales#22]
-Keys [1]: [i_manufact_id#10]
+Input [2]: [i_manufact_id#13, total_sales#22]
+Keys [1]: [i_manufact_id#13]
 Functions [1]: [partial_sum(total_sales#22)]
 Aggregate Attributes [2]: [sum#41, isEmpty#42]
-Results [3]: [i_manufact_id#10, sum#43, isEmpty#44]
+Results [3]: [i_manufact_id#13, sum#43, isEmpty#44]
 
 (65) Exchange
-Input [3]: [i_manufact_id#10, sum#43, isEmpty#44]
-Arguments: hashpartitioning(i_manufact_id#10, 5), true, [id=#45]
+Input [3]: [i_manufact_id#13, sum#43, isEmpty#44]
+Arguments: hashpartitioning(i_manufact_id#13, 5), ENSURE_REQUIREMENTS, [id=#45]
 
 (66) HashAggregate [codegen id : 20]
-Input [3]: [i_manufact_id#10, sum#43, isEmpty#44]
-Keys [1]: [i_manufact_id#10]
+Input [3]: [i_manufact_id#13, sum#43, isEmpty#44]
+Keys [1]: [i_manufact_id#13]
 Functions [1]: [sum(total_sales#22)]
 Aggregate Attributes [1]: [sum(total_sales#22)#46]
-Results [2]: [i_manufact_id#10, sum(total_sales#22)#46 AS total_sales#47]
+Results [2]: [i_manufact_id#13, sum(total_sales#22)#46 AS total_sales#47]
 
 (67) TakeOrderedAndProject
-Input [2]: [i_manufact_id#10, total_sales#47]
-Arguments: 100, [total_sales#47 ASC NULLS FIRST], [i_manufact_id#10, total_sales#47]
+Input [2]: [i_manufact_id#13, total_sales#47]
+Arguments: 100, [total_sales#47 ASC NULLS FIRST], [i_manufact_id#13, total_sales#47]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/simplified.txt
index 410def2466e1a..14787f0bbce7b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/simplified.txt
@@ -14,9 +14,9 @@ TakeOrderedAndProject [total_sales,i_manufact_id]
                           WholeStageCodegen (5)
                             HashAggregate [i_manufact_id,ss_ext_sales_price] [sum,sum]
                               Project [ss_ext_sales_price,i_manufact_id]
-                                BroadcastHashJoin [ss_addr_sk,ca_address_sk]
-                                  Project [ss_addr_sk,ss_ext_sales_price,i_manufact_id]
-                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                  Project [ss_item_sk,ss_ext_sales_price]
+                                    BroadcastHashJoin [ss_addr_sk,ca_address_sk]
                                       Project [ss_item_sk,ss_addr_sk,ss_ext_sales_price]
                                         BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                           Filter [ss_sold_date_sk,ss_addr_sk,ss_item_sk]
@@ -33,28 +33,28 @@ TakeOrderedAndProject [total_sales,i_manufact_id]
                                                         Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
                                       InputAdapter
                                         BroadcastExchange #4
-                                          WholeStageCodegen (3)
-                                            BroadcastHashJoin [i_manufact_id,i_manufact_id]
-                                              Filter [i_item_sk]
+                                          WholeStageCodegen (2)
+                                            Project [ca_address_sk]
+                                              Filter [ca_gmt_offset,ca_address_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.item [i_item_sk,i_manufact_id]
-                                              InputAdapter
-                                                BroadcastExchange #5
-                                                  WholeStageCodegen (2)
-                                                    Project [i_manufact_id]
-                                                      Filter [i_category]
-                                                        ColumnarToRow
-                                                          InputAdapter
-                                                            Scan parquet default.item [i_category,i_manufact_id]
+                                                    Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset]
                                   InputAdapter
-                                    BroadcastExchange #6
+                                    BroadcastExchange #5
                                       WholeStageCodegen (4)
-                                        Project [ca_address_sk]
-                                          Filter [ca_gmt_offset,ca_address_sk]
+                                        BroadcastHashJoin [i_manufact_id,i_manufact_id]
+                                          Filter [i_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset]
+                                                Scan parquet default.item [i_item_sk,i_manufact_id]
+                                          InputAdapter
+                                            BroadcastExchange #6
+                                              WholeStageCodegen (3)
+                                                Project [i_manufact_id]
+                                                  Filter [i_category]
+                                                    ColumnarToRow
+                                                      InputAdapter
+                                                        Scan parquet default.item [i_category,i_manufact_id]
                   WholeStageCodegen (12)
                     HashAggregate [i_manufact_id,sum] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum]
                       InputAdapter
@@ -62,9 +62,9 @@ TakeOrderedAndProject [total_sales,i_manufact_id]
                           WholeStageCodegen (11)
                             HashAggregate [i_manufact_id,cs_ext_sales_price] [sum,sum]
                               Project [cs_ext_sales_price,i_manufact_id]
-                                BroadcastHashJoin [cs_bill_addr_sk,ca_address_sk]
-                                  Project [cs_bill_addr_sk,cs_ext_sales_price,i_manufact_id]
-                                    BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                  Project [cs_item_sk,cs_ext_sales_price]
+                                    BroadcastHashJoin [cs_bill_addr_sk,ca_address_sk]
                                       Project [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price]
                                         BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                           Filter [cs_sold_date_sk,cs_bill_addr_sk,cs_item_sk]
@@ -74,9 +74,9 @@ TakeOrderedAndProject [total_sales,i_manufact_id]
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #3
                                       InputAdapter
-                                        ReusedExchange [i_item_sk,i_manufact_id] #4
+                                        ReusedExchange [ca_address_sk] #4
                                   InputAdapter
-                                    ReusedExchange [ca_address_sk] #6
+                                    ReusedExchange [i_item_sk,i_manufact_id] #5
                   WholeStageCodegen (18)
                     HashAggregate [i_manufact_id,sum] [sum(UnscaledValue(ws_ext_sales_price)),total_sales,sum]
                       InputAdapter
@@ -84,9 +84,9 @@ TakeOrderedAndProject [total_sales,i_manufact_id]
                           WholeStageCodegen (17)
                             HashAggregate [i_manufact_id,ws_ext_sales_price] [sum,sum]
                               Project [ws_ext_sales_price,i_manufact_id]
-                                BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk]
-                                  Project [ws_bill_addr_sk,ws_ext_sales_price,i_manufact_id]
-                                    BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                  Project [ws_item_sk,ws_ext_sales_price]
+                                    BroadcastHashJoin [ws_bill_addr_sk,ca_address_sk]
                                       Project [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price]
                                         BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                           Filter [ws_sold_date_sk,ws_bill_addr_sk,ws_item_sk]
@@ -96,6 +96,6 @@ TakeOrderedAndProject [total_sales,i_manufact_id]
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #3
                                       InputAdapter
-                                        ReusedExchange [i_item_sk,i_manufact_id] #4
+                                        ReusedExchange [ca_address_sk] #4
                                   InputAdapter
-                                    ReusedExchange [ca_address_sk] #6
+                                    ReusedExchange [i_item_sk,i_manufact_id] #5
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/explain.txt
index d7a8c103285cb..6492918d3aa13 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/explain.txt
@@ -6,117 +6,117 @@ TakeOrderedAndProject (21)
          +- * Project (17)
             +- * BroadcastHashJoin Inner BuildRight (16)
                :- * Project (10)
-               :  +- * BroadcastHashJoin Inner BuildRight (9)
-               :     :- * Filter (3)
-               :     :  +- * ColumnarToRow (2)
-               :     :     +- Scan parquet default.store_sales (1)
-               :     +- BroadcastExchange (8)
-               :        +- * Project (7)
-               :           +- * Filter (6)
-               :              +- * ColumnarToRow (5)
-               :                 +- Scan parquet default.item (4)
+               :  +- * BroadcastHashJoin Inner BuildLeft (9)
+               :     :- BroadcastExchange (5)
+               :     :  +- * Project (4)
+               :     :     +- * Filter (3)
+               :     :        +- * ColumnarToRow (2)
+               :     :           +- Scan parquet default.date_dim (1)
+               :     +- * Filter (8)
+               :        +- * ColumnarToRow (7)
+               :           +- Scan parquet default.store_sales (6)
                +- BroadcastExchange (15)
                   +- * Project (14)
                      +- * Filter (13)
                         +- * ColumnarToRow (12)
-                           +- Scan parquet default.date_dim (11)
+                           +- Scan parquet default.item (11)
 
 
-(1) Scan parquet default.store_sales
-Output [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3]
+(1) Scan parquet default.date_dim
+Output [3]: [d_date_sk#1, d_year#2, d_moy#3]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/store_sales]
-PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)]
-ReadSchema: struct<ss_sold_date_sk:int,ss_item_sk:int,ss_ext_sales_price:decimal(7,2)>
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
-(2) ColumnarToRow [codegen id : 3]
-Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3]
+(2) ColumnarToRow [codegen id : 1]
+Input [3]: [d_date_sk#1, d_year#2, d_moy#3]
 
-(3) Filter [codegen id : 3]
-Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3]
-Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_item_sk#2))
+(3) Filter [codegen id : 1]
+Input [3]: [d_date_sk#1, d_year#2, d_moy#3]
+Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk#1))
 
-(4) Scan parquet default.item
-Output [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manager_id#7]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/item]
-PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)]
-ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_brand:string,i_manager_id:int>
+(4) Project [codegen id : 1]
+Output [2]: [d_date_sk#1, d_year#2]
+Input [3]: [d_date_sk#1, d_year#2, d_moy#3]
 
-(5) ColumnarToRow [codegen id : 1]
-Input [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manager_id#7]
+(5) BroadcastExchange
+Input [2]: [d_date_sk#1, d_year#2]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#4]
 
-(6) Filter [codegen id : 1]
-Input [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manager_id#7]
-Condition : ((isnotnull(i_manager_id#7) AND (i_manager_id#7 = 1)) AND isnotnull(i_item_sk#4))
+(6) Scan parquet default.store_sales
+Output [3]: [ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/store_sales]
+PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)]
+ReadSchema: struct<ss_sold_date_sk:int,ss_item_sk:int,ss_ext_sales_price:decimal(7,2)>
 
-(7) Project [codegen id : 1]
-Output [3]: [i_item_sk#4, i_brand_id#5, i_brand#6]
-Input [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manager_id#7]
+(7) ColumnarToRow
+Input [3]: [ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7]
 
-(8) BroadcastExchange
-Input [3]: [i_item_sk#4, i_brand_id#5, i_brand#6]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8]
+(8) Filter
+Input [3]: [ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7]
+Condition : (isnotnull(ss_sold_date_sk#5) AND isnotnull(ss_item_sk#6))
 
 (9) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [ss_item_sk#2]
-Right keys [1]: [i_item_sk#4]
+Left keys [1]: [d_date_sk#1]
+Right keys [1]: [ss_sold_date_sk#5]
 Join condition: None
 
 (10) Project [codegen id : 3]
-Output [4]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_brand_id#5, i_brand#6]
-Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3, i_item_sk#4, i_brand_id#5, i_brand#6]
+Output [3]: [d_year#2, ss_item_sk#6, ss_ext_sales_price#7]
+Input [5]: [d_date_sk#1, d_year#2, ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7]
 
-(11) Scan parquet default.date_dim
-Output [3]: [d_date_sk#9, d_year#10, d_moy#11]
+(11) Scan parquet default.item
+Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
+Location [not included in comparison]/{warehouse_dir}/item]
+PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)]
+ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_brand:string,i_manager_id:int>
 
 (12) ColumnarToRow [codegen id : 2]
-Input [3]: [d_date_sk#9, d_year#10, d_moy#11]
+Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11]
 
 (13) Filter [codegen id : 2]
-Input [3]: [d_date_sk#9, d_year#10, d_moy#11]
-Condition : ((((isnotnull(d_moy#11) AND isnotnull(d_year#10)) AND (d_moy#11 = 11)) AND (d_year#10 = 2000)) AND isnotnull(d_date_sk#9))
+Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11]
+Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 1)) AND isnotnull(i_item_sk#8))
 
 (14) Project [codegen id : 2]
-Output [2]: [d_date_sk#9, d_year#10]
-Input [3]: [d_date_sk#9, d_year#10, d_moy#11]
+Output [3]: [i_item_sk#8, i_brand_id#9, i_brand#10]
+Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11]
 
 (15) BroadcastExchange
-Input [2]: [d_date_sk#9, d_year#10]
+Input [3]: [i_item_sk#8, i_brand_id#9, i_brand#10]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12]
 
 (16) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [ss_sold_date_sk#1]
-Right keys [1]: [d_date_sk#9]
+Left keys [1]: [ss_item_sk#6]
+Right keys [1]: [i_item_sk#8]
 Join condition: None
 
 (17) Project [codegen id : 3]
-Output [4]: [d_year#10, ss_ext_sales_price#3, i_brand_id#5, i_brand#6]
-Input [6]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_brand_id#5, i_brand#6, d_date_sk#9, d_year#10]
+Output [4]: [d_year#2, ss_ext_sales_price#7, i_brand_id#9, i_brand#10]
+Input [6]: [d_year#2, ss_item_sk#6, ss_ext_sales_price#7, i_item_sk#8, i_brand_id#9, i_brand#10]
 
 (18) HashAggregate [codegen id : 3]
-Input [4]: [d_year#10, ss_ext_sales_price#3, i_brand_id#5, i_brand#6]
-Keys [3]: [d_year#10, i_brand#6, i_brand_id#5]
-Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))]
+Input [4]: [d_year#2, ss_ext_sales_price#7, i_brand_id#9, i_brand#10]
+Keys [3]: [d_year#2, i_brand#10, i_brand_id#9]
+Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#7))]
 Aggregate Attributes [1]: [sum#13]
-Results [4]: [d_year#10, i_brand#6, i_brand_id#5, sum#14]
+Results [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14]
 
 (19) Exchange
-Input [4]: [d_year#10, i_brand#6, i_brand_id#5, sum#14]
-Arguments: hashpartitioning(d_year#10, i_brand#6, i_brand_id#5, 5), true, [id=#15]
+Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14]
+Arguments: hashpartitioning(d_year#2, i_brand#10, i_brand_id#9, 5), ENSURE_REQUIREMENTS, [id=#15]
 
 (20) HashAggregate [codegen id : 4]
-Input [4]: [d_year#10, i_brand#6, i_brand_id#5, sum#14]
-Keys [3]: [d_year#10, i_brand#6, i_brand_id#5]
-Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))]
-Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#16]
-Results [4]: [d_year#10, i_brand_id#5 AS brand_id#17, i_brand#6 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#16,17,2) AS ext_price#19]
+Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14]
+Keys [3]: [d_year#2, i_brand#10, i_brand_id#9]
+Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#7))]
+Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#7))#16]
+Results [4]: [d_year#2, i_brand_id#9 AS brand_id#17, i_brand#10 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#7))#16,17,2) AS ext_price#19]
 
 (21) TakeOrderedAndProject
-Input [4]: [d_year#10, brand_id#17, brand#18, ext_price#19]
-Arguments: 100, [d_year#10 ASC NULLS FIRST, ext_price#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [d_year#10, brand_id#17, brand#18, ext_price#19]
+Input [4]: [d_year#2, brand_id#17, brand#18, ext_price#19]
+Arguments: 100, [d_year#2 ASC NULLS FIRST, ext_price#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [d_year#2, brand_id#17, brand#18, ext_price#19]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/simplified.txt
index 8ed500d84390c..f4aaf3df75135 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/simplified.txt
@@ -6,26 +6,26 @@ TakeOrderedAndProject [d_year,ext_price,brand_id,brand]
           WholeStageCodegen (3)
             HashAggregate [d_year,i_brand,i_brand_id,ss_ext_sales_price] [sum,sum]
               Project [d_year,ss_ext_sales_price,i_brand_id,i_brand]
-                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                  Project [ss_sold_date_sk,ss_ext_sales_price,i_brand_id,i_brand]
-                    BroadcastHashJoin [ss_item_sk,i_item_sk]
-                      Filter [ss_sold_date_sk,ss_item_sk]
-                        ColumnarToRow
-                          InputAdapter
-                            Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price]
+                BroadcastHashJoin [ss_item_sk,i_item_sk]
+                  Project [d_year,ss_item_sk,ss_ext_sales_price]
+                    BroadcastHashJoin [d_date_sk,ss_sold_date_sk]
                       InputAdapter
                         BroadcastExchange #2
                           WholeStageCodegen (1)
-                            Project [i_item_sk,i_brand_id,i_brand]
-                              Filter [i_manager_id,i_item_sk]
+                            Project [d_date_sk,d_year]
+                              Filter [d_moy,d_year,d_date_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id]
+                                    Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                      Filter [ss_sold_date_sk,ss_item_sk]
+                        ColumnarToRow
+                          InputAdapter
+                            Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price]
                   InputAdapter
                     BroadcastExchange #3
                       WholeStageCodegen (2)
-                        Project [d_date_sk,d_year]
-                          Filter [d_moy,d_year,d_date_sk]
+                        Project [i_item_sk,i_brand_id,i_brand]
+                          Filter [i_manager_id,i_item_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/explain.txt
index a1257cd292e48..b8d8aa358d532 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/explain.txt
@@ -6,115 +6,115 @@ TakeOrderedAndProject (21)
          +- * Project (17)
             +- * BroadcastHashJoin Inner BuildRight (16)
                :- * Project (10)
-               :  +- * BroadcastHashJoin Inner BuildRight (9)
-               :     :- * Filter (3)
-               :     :  +- * ColumnarToRow (2)
-               :     :     +- Scan parquet default.store_sales (1)
-               :     +- BroadcastExchange (8)
-               :        +- * Project (7)
-               :           +- * Filter (6)
-               :              +- * ColumnarToRow (5)
-               :                 +- Scan parquet default.item (4)
+               :  +- * BroadcastHashJoin Inner BuildLeft (9)
+               :     :- BroadcastExchange (5)
+               :     :  +- * Project (4)
+               :     :     +- * Filter (3)
+               :     :        +- * ColumnarToRow (2)
+               :     :           +- Scan parquet default.date_dim (1)
+               :     +- * Filter (8)
+               :        +- * ColumnarToRow (7)
+               :           +- Scan parquet default.store_sales (6)
                +- BroadcastExchange (15)
                   +- * Project (14)
                      +- * Filter (13)
                         +- * ColumnarToRow (12)
-                           +- Scan parquet default.date_dim (11)
+                           +- Scan parquet default.item (11)
 
 
-(1) Scan parquet default.store_sales
-Output [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3]
+(1) Scan parquet default.date_dim
+Output [3]: [d_date_sk#1, d_year#2, d_moy#3]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/store_sales]
-PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)]
-ReadSchema: struct<ss_sold_date_sk:int,ss_item_sk:int,ss_ext_sales_price:decimal(7,2)>
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
-(2) ColumnarToRow [codegen id : 3]
-Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3]
+(2) ColumnarToRow [codegen id : 1]
+Input [3]: [d_date_sk#1, d_year#2, d_moy#3]
 
-(3) Filter [codegen id : 3]
-Input [3]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3]
-Condition : (isnotnull(ss_sold_date_sk#1) AND isnotnull(ss_item_sk#2))
+(3) Filter [codegen id : 1]
+Input [3]: [d_date_sk#1, d_year#2, d_moy#3]
+Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)) AND (d_year#2 = 1999)) AND isnotnull(d_date_sk#1))
 
-(4) Scan parquet default.item
-Output [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manager_id#7]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/item]
-PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,28), IsNotNull(i_item_sk)]
-ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_brand:string,i_manager_id:int>
+(4) Project [codegen id : 1]
+Output [1]: [d_date_sk#1]
+Input [3]: [d_date_sk#1, d_year#2, d_moy#3]
 
-(5) ColumnarToRow [codegen id : 1]
-Input [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manager_id#7]
+(5) BroadcastExchange
+Input [1]: [d_date_sk#1]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#4]
 
-(6) Filter [codegen id : 1]
-Input [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manager_id#7]
-Condition : ((isnotnull(i_manager_id#7) AND (i_manager_id#7 = 28)) AND isnotnull(i_item_sk#4))
+(6) Scan parquet default.store_sales
+Output [3]: [ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/store_sales]
+PushedFilters: [IsNotNull(ss_sold_date_sk), IsNotNull(ss_item_sk)]
+ReadSchema: struct<ss_sold_date_sk:int,ss_item_sk:int,ss_ext_sales_price:decimal(7,2)>
 
-(7) Project [codegen id : 1]
-Output [3]: [i_item_sk#4, i_brand_id#5, i_brand#6]
-Input [4]: [i_item_sk#4, i_brand_id#5, i_brand#6, i_manager_id#7]
+(7) ColumnarToRow
+Input [3]: [ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7]
 
-(8) BroadcastExchange
-Input [3]: [i_item_sk#4, i_brand_id#5, i_brand#6]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8]
+(8) Filter
+Input [3]: [ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7]
+Condition : (isnotnull(ss_sold_date_sk#5) AND isnotnull(ss_item_sk#6))
 
 (9) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [ss_item_sk#2]
-Right keys [1]: [i_item_sk#4]
+Left keys [1]: [d_date_sk#1]
+Right keys [1]: [ss_sold_date_sk#5]
 Join condition: None
 
 (10) Project [codegen id : 3]
-Output [4]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_brand_id#5, i_brand#6]
-Input [6]: [ss_sold_date_sk#1, ss_item_sk#2, ss_ext_sales_price#3, i_item_sk#4, i_brand_id#5, i_brand#6]
+Output [2]: [ss_item_sk#6, ss_ext_sales_price#7]
+Input [4]: [d_date_sk#1, ss_sold_date_sk#5, ss_item_sk#6, ss_ext_sales_price#7]
 
-(11) Scan parquet default.date_dim
-Output [3]: [d_date_sk#9, d_year#10, d_moy#11]
+(11) Scan parquet default.item
+Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
+Location [not included in comparison]/{warehouse_dir}/item]
+PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,28), IsNotNull(i_item_sk)]
+ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_brand:string,i_manager_id:int>
 
 (12) ColumnarToRow [codegen id : 2]
-Input [3]: [d_date_sk#9, d_year#10, d_moy#11]
+Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11]
 
 (13) Filter [codegen id : 2]
-Input [3]: [d_date_sk#9, d_year#10, d_moy#11]
-Condition : ((((isnotnull(d_moy#11) AND isnotnull(d_year#10)) AND (d_moy#11 = 11)) AND (d_year#10 = 1999)) AND isnotnull(d_date_sk#9))
+Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11]
+Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 28)) AND isnotnull(i_item_sk#8))
 
 (14) Project [codegen id : 2]
-Output [1]: [d_date_sk#9]
-Input [3]: [d_date_sk#9, d_year#10, d_moy#11]
+Output [3]: [i_item_sk#8, i_brand_id#9, i_brand#10]
+Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11]
 
 (15) BroadcastExchange
-Input [1]: [d_date_sk#9]
+Input [3]: [i_item_sk#8, i_brand_id#9, i_brand#10]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12]
 
 (16) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [ss_sold_date_sk#1]
-Right keys [1]: [d_date_sk#9]
+Left keys [1]: [ss_item_sk#6]
+Right keys [1]: [i_item_sk#8]
 Join condition: None
 
 (17) Project [codegen id : 3]
-Output [3]: [ss_ext_sales_price#3, i_brand_id#5, i_brand#6]
-Input [5]: [ss_sold_date_sk#1, ss_ext_sales_price#3, i_brand_id#5, i_brand#6, d_date_sk#9]
+Output [3]: [ss_ext_sales_price#7, i_brand_id#9, i_brand#10]
+Input [5]: [ss_item_sk#6, ss_ext_sales_price#7, i_item_sk#8, i_brand_id#9, i_brand#10]
 
 (18) HashAggregate [codegen id : 3]
-Input [3]: [ss_ext_sales_price#3, i_brand_id#5, i_brand#6]
-Keys [2]: [i_brand#6, i_brand_id#5]
-Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))]
+Input [3]: [ss_ext_sales_price#7, i_brand_id#9, i_brand#10]
+Keys [2]: [i_brand#10, i_brand_id#9]
+Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#7))]
 Aggregate Attributes [1]: [sum#13]
-Results [3]: [i_brand#6, i_brand_id#5, sum#14]
+Results [3]: [i_brand#10, i_brand_id#9, sum#14]
 
 (19) Exchange
-Input [3]: [i_brand#6, i_brand_id#5, sum#14]
-Arguments: hashpartitioning(i_brand#6, i_brand_id#5, 5), true, [id=#15]
+Input [3]: [i_brand#10, i_brand_id#9, sum#14]
+Arguments: hashpartitioning(i_brand#10, i_brand_id#9, 5), ENSURE_REQUIREMENTS, [id=#15]
 
 (20) HashAggregate [codegen id : 4]
-Input [3]: [i_brand#6, i_brand_id#5, sum#14]
-Keys [2]: [i_brand#6, i_brand_id#5]
-Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))]
-Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#16]
-Results [3]: [i_brand_id#5 AS brand_id#17, i_brand#6 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#16,17,2) AS ext_price#19]
+Input [3]: [i_brand#10, i_brand_id#9, sum#14]
+Keys [2]: [i_brand#10, i_brand_id#9]
+Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#7))]
+Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#7))#16]
+Results [3]: [i_brand_id#9 AS brand_id#17, i_brand#10 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#7))#16,17,2) AS ext_price#19]
 
 (21) TakeOrderedAndProject
 Input [3]: [brand_id#17, brand#18, ext_price#19]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/simplified.txt
index b0d0e0d809441..4f375c80678e8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/simplified.txt
@@ -6,26 +6,26 @@ TakeOrderedAndProject [ext_price,brand_id,brand]
           WholeStageCodegen (3)
             HashAggregate [i_brand,i_brand_id,ss_ext_sales_price] [sum,sum]
               Project [ss_ext_sales_price,i_brand_id,i_brand]
-                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                  Project [ss_sold_date_sk,ss_ext_sales_price,i_brand_id,i_brand]
-                    BroadcastHashJoin [ss_item_sk,i_item_sk]
-                      Filter [ss_sold_date_sk,ss_item_sk]
-                        ColumnarToRow
-                          InputAdapter
-                            Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price]
+                BroadcastHashJoin [ss_item_sk,i_item_sk]
+                  Project [ss_item_sk,ss_ext_sales_price]
+                    BroadcastHashJoin [d_date_sk,ss_sold_date_sk]
                       InputAdapter
                         BroadcastExchange #2
                           WholeStageCodegen (1)
-                            Project [i_item_sk,i_brand_id,i_brand]
-                              Filter [i_manager_id,i_item_sk]
+                            Project [d_date_sk]
+                              Filter [d_moy,d_year,d_date_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id]
+                                    Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                      Filter [ss_sold_date_sk,ss_item_sk]
+                        ColumnarToRow
+                          InputAdapter
+                            Scan parquet default.store_sales [ss_sold_date_sk,ss_item_sk,ss_ext_sales_price]
                   InputAdapter
                     BroadcastExchange #3
                       WholeStageCodegen (2)
-                        Project [d_date_sk]
-                          Filter [d_moy,d_year,d_date_sk]
+                        Project [i_item_sk,i_brand_id,i_brand]
+                          Filter [i_manager_id,i_item_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt
index 3f8106c96379a..3007b11a1a860 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt
@@ -11,60 +11,60 @@ TakeOrderedAndProject (79)
                :        +- * BroadcastHashJoin LeftOuter BuildRight (65)
                :           :- * Project (60)
                :           :  +- * SortMergeJoin Inner (59)
-               :           :     :- * Sort (47)
-               :           :     :  +- Exchange (46)
-               :           :     :     +- * Project (45)
-               :           :     :        +- * BroadcastHashJoin Inner BuildRight (44)
-               :           :     :           :- * Project (32)
-               :           :     :           :  +- * SortMergeJoin Inner (31)
-               :           :     :           :     :- * Sort (25)
-               :           :     :           :     :  +- Exchange (24)
-               :           :     :           :     :     +- * Project (23)
-               :           :     :           :     :        +- * BroadcastHashJoin Inner BuildRight (22)
-               :           :     :           :     :           :- * Project (17)
-               :           :     :           :     :           :  +- * BroadcastHashJoin Inner BuildRight (16)
-               :           :     :           :     :           :     :- * Project (10)
-               :           :     :           :     :           :     :  +- * BroadcastHashJoin Inner BuildRight (9)
-               :           :     :           :     :           :     :     :- * Filter (3)
-               :           :     :           :     :           :     :     :  +- * ColumnarToRow (2)
-               :           :     :           :     :           :     :     :     +- Scan parquet default.catalog_sales (1)
-               :           :     :           :     :           :     :     +- BroadcastExchange (8)
-               :           :     :           :     :           :     :        +- * Project (7)
-               :           :     :           :     :           :     :           +- * Filter (6)
-               :           :     :           :     :           :     :              +- * ColumnarToRow (5)
-               :           :     :           :     :           :     :                 +- Scan parquet default.household_demographics (4)
-               :           :     :           :     :           :     +- BroadcastExchange (15)
-               :           :     :           :     :           :        +- * Project (14)
-               :           :     :           :     :           :           +- * Filter (13)
-               :           :     :           :     :           :              +- * ColumnarToRow (12)
-               :           :     :           :     :           :                 +- Scan parquet default.customer_demographics (11)
-               :           :     :           :     :           +- BroadcastExchange (21)
-               :           :     :           :     :              +- * Filter (20)
-               :           :     :           :     :                 +- * ColumnarToRow (19)
-               :           :     :           :     :                    +- Scan parquet default.date_dim (18)
-               :           :     :           :     +- * Sort (30)
-               :           :     :           :        +- Exchange (29)
-               :           :     :           :           +- * Filter (28)
-               :           :     :           :              +- * ColumnarToRow (27)
-               :           :     :           :                 +- Scan parquet default.item (26)
-               :           :     :           +- BroadcastExchange (43)
-               :           :     :              +- * Project (42)
-               :           :     :                 +- * BroadcastHashJoin Inner BuildLeft (41)
-               :           :     :                    :- BroadcastExchange (37)
-               :           :     :                    :  +- * Project (36)
-               :           :     :                    :     +- * Filter (35)
-               :           :     :                    :        +- * ColumnarToRow (34)
-               :           :     :                    :           +- Scan parquet default.date_dim (33)
-               :           :     :                    +- * Filter (40)
-               :           :     :                       +- * ColumnarToRow (39)
-               :           :     :                          +- Scan parquet default.date_dim (38)
+               :           :     :- * Sort (34)
+               :           :     :  +- Exchange (33)
+               :           :     :     +- * Project (32)
+               :           :     :        +- * SortMergeJoin Inner (31)
+               :           :     :           :- * Sort (25)
+               :           :     :           :  +- Exchange (24)
+               :           :     :           :     +- * Project (23)
+               :           :     :           :        +- * BroadcastHashJoin Inner BuildRight (22)
+               :           :     :           :           :- * Project (17)
+               :           :     :           :           :  +- * BroadcastHashJoin Inner BuildRight (16)
+               :           :     :           :           :     :- * Project (10)
+               :           :     :           :           :     :  +- * BroadcastHashJoin Inner BuildRight (9)
+               :           :     :           :           :     :     :- * Filter (3)
+               :           :     :           :           :     :     :  +- * ColumnarToRow (2)
+               :           :     :           :           :     :     :     +- Scan parquet default.catalog_sales (1)
+               :           :     :           :           :     :     +- BroadcastExchange (8)
+               :           :     :           :           :     :        +- * Project (7)
+               :           :     :           :           :     :           +- * Filter (6)
+               :           :     :           :           :     :              +- * ColumnarToRow (5)
+               :           :     :           :           :     :                 +- Scan parquet default.household_demographics (4)
+               :           :     :           :           :     +- BroadcastExchange (15)
+               :           :     :           :           :        +- * Project (14)
+               :           :     :           :           :           +- * Filter (13)
+               :           :     :           :           :              +- * ColumnarToRow (12)
+               :           :     :           :           :                 +- Scan parquet default.customer_demographics (11)
+               :           :     :           :           +- BroadcastExchange (21)
+               :           :     :           :              +- * Filter (20)
+               :           :     :           :                 +- * ColumnarToRow (19)
+               :           :     :           :                    +- Scan parquet default.date_dim (18)
+               :           :     :           +- * Sort (30)
+               :           :     :              +- Exchange (29)
+               :           :     :                 +- * Filter (28)
+               :           :     :                    +- * ColumnarToRow (27)
+               :           :     :                       +- Scan parquet default.item (26)
                :           :     +- * Sort (58)
                :           :        +- Exchange (57)
                :           :           +- * Project (56)
                :           :              +- * BroadcastHashJoin Inner BuildRight (55)
-               :           :                 :- * Filter (50)
-               :           :                 :  +- * ColumnarToRow (49)
-               :           :                 :     +- Scan parquet default.inventory (48)
+               :           :                 :- * Project (50)
+               :           :                 :  +- * BroadcastHashJoin Inner BuildLeft (49)
+               :           :                 :     :- BroadcastExchange (45)
+               :           :                 :     :  +- * Project (44)
+               :           :                 :     :     +- * BroadcastHashJoin Inner BuildLeft (43)
+               :           :                 :     :        :- BroadcastExchange (39)
+               :           :                 :     :        :  +- * Project (38)
+               :           :                 :     :        :     +- * Filter (37)
+               :           :                 :     :        :        +- * ColumnarToRow (36)
+               :           :                 :     :        :           +- Scan parquet default.date_dim (35)
+               :           :                 :     :        +- * Filter (42)
+               :           :                 :     :           +- * ColumnarToRow (41)
+               :           :                 :     :              +- Scan parquet default.date_dim (40)
+               :           :                 :     +- * Filter (48)
+               :           :                 :        +- * ColumnarToRow (47)
+               :           :                 :           +- Scan parquet default.inventory (46)
                :           :                 +- BroadcastExchange (54)
                :           :                    +- * Filter (53)
                :           :                       +- * ColumnarToRow (52)
@@ -185,7 +185,7 @@ Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, c
 
 (24) Exchange
 Input [6]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16]
-Arguments: hashpartitioning(cs_item_sk#5, 5), true, [id=#18]
+Arguments: hashpartitioning(cs_item_sk#5, 5), ENSURE_REQUIREMENTS, [id=#18]
 
 (25) Sort [codegen id : 5]
 Input [6]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16]
@@ -207,101 +207,101 @@ Condition : isnotnull(i_item_sk#19)
 
 (29) Exchange
 Input [2]: [i_item_sk#19, i_item_desc#20]
-Arguments: hashpartitioning(i_item_sk#19, 5), true, [id=#21]
+Arguments: hashpartitioning(i_item_sk#19, 5), ENSURE_REQUIREMENTS, [id=#21]
 
 (30) Sort [codegen id : 7]
 Input [2]: [i_item_sk#19, i_item_desc#20]
 Arguments: [i_item_sk#19 ASC NULLS FIRST], false, 0
 
-(31) SortMergeJoin [codegen id : 10]
+(31) SortMergeJoin [codegen id : 8]
 Left keys [1]: [cs_item_sk#5]
 Right keys [1]: [i_item_sk#19]
 Join condition: None
 
-(32) Project [codegen id : 10]
+(32) Project [codegen id : 8]
 Output [7]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20]
 Input [8]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_sk#19, i_item_desc#20]
 
-(33) Scan parquet default.date_dim
-Output [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25]
+(33) Exchange
+Input [7]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20]
+Arguments: hashpartitioning(cs_item_sk#5, cs_sold_date_sk#1, 5), ENSURE_REQUIREMENTS, [id=#22]
+
+(34) Sort [codegen id : 9]
+Input [7]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20]
+Arguments: [cs_item_sk#5 ASC NULLS FIRST, cs_sold_date_sk#1 ASC NULLS FIRST], false, 0
+
+(35) Scan parquet default.date_dim
+Output [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)]
 ReadSchema: struct<d_date_sk:int,d_date:date,d_week_seq:int,d_year:int>
 
-(34) ColumnarToRow [codegen id : 8]
-Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25]
+(36) ColumnarToRow [codegen id : 10]
+Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#26]
 
-(35) Filter [codegen id : 8]
-Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25]
-Condition : ((((isnotnull(d_year#25) AND (d_year#25 = 1999)) AND isnotnull(d_date_sk#22)) AND isnotnull(d_week_seq#24)) AND isnotnull(d_date#23))
+(37) Filter [codegen id : 10]
+Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#26]
+Condition : ((((isnotnull(d_year#26) AND (d_year#26 = 1999)) AND isnotnull(d_date_sk#23)) AND isnotnull(d_week_seq#25)) AND isnotnull(d_date#24))
 
-(36) Project [codegen id : 8]
-Output [3]: [d_date_sk#22, d_date#23, d_week_seq#24]
-Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25]
+(38) Project [codegen id : 10]
+Output [3]: [d_date_sk#23, d_date#24, d_week_seq#25]
+Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#26]
 
-(37) BroadcastExchange
-Input [3]: [d_date_sk#22, d_date#23, d_week_seq#24]
-Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#26]
+(39) BroadcastExchange
+Input [3]: [d_date_sk#23, d_date#24, d_week_seq#25]
+Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#27]
 
-(38) Scan parquet default.date_dim
-Output [2]: [d_date_sk#27, d_week_seq#28]
+(40) Scan parquet default.date_dim
+Output [2]: [d_date_sk#28, d_week_seq#29]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_week_seq:int>
 
-(39) ColumnarToRow
-Input [2]: [d_date_sk#27, d_week_seq#28]
+(41) ColumnarToRow
+Input [2]: [d_date_sk#28, d_week_seq#29]
 
-(40) Filter
-Input [2]: [d_date_sk#27, d_week_seq#28]
-Condition : (isnotnull(d_week_seq#28) AND isnotnull(d_date_sk#27))
+(42) Filter
+Input [2]: [d_date_sk#28, d_week_seq#29]
+Condition : (isnotnull(d_week_seq#29) AND isnotnull(d_date_sk#28))
 
-(41) BroadcastHashJoin [codegen id : 9]
-Left keys [1]: [d_week_seq#24]
-Right keys [1]: [d_week_seq#28]
+(43) BroadcastHashJoin [codegen id : 11]
+Left keys [1]: [d_week_seq#25]
+Right keys [1]: [d_week_seq#29]
 Join condition: None
 
-(42) Project [codegen id : 9]
-Output [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27]
-Input [5]: [d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27, d_week_seq#28]
-
-(43) BroadcastExchange
-Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29]
-
-(44) BroadcastHashJoin [codegen id : 10]
-Left keys [1]: [cs_sold_date_sk#1]
-Right keys [1]: [d_date_sk#22]
-Join condition: (d_date#16 > d_date#23 + 5 days)
-
-(45) Project [codegen id : 10]
-Output [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27]
-Input [11]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20, d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27]
+(44) Project [codegen id : 11]
+Output [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#28]
+Input [5]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#28, d_week_seq#29]
 
-(46) Exchange
-Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27]
-Arguments: hashpartitioning(cs_item_sk#5, d_date_sk#27, 5), true, [id=#30]
+(45) BroadcastExchange
+Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#28]
+Arguments: HashedRelationBroadcastMode(List(cast(input[3, int, true] as bigint)),false), [id=#30]
 
-(47) Sort [codegen id : 11]
-Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27]
-Arguments: [cs_item_sk#5 ASC NULLS FIRST, d_date_sk#27 ASC NULLS FIRST], false, 0
-
-(48) Scan parquet default.inventory
+(46) Scan parquet default.inventory
 Output [4]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/inventory]
 PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)]
 ReadSchema: struct<inv_date_sk:int,inv_item_sk:int,inv_warehouse_sk:int,inv_quantity_on_hand:int>
 
-(49) ColumnarToRow [codegen id : 13]
+(47) ColumnarToRow
 Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34]
 
-(50) Filter [codegen id : 13]
+(48) Filter
 Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34]
 Condition : (((isnotnull(inv_quantity_on_hand#34) AND isnotnull(inv_item_sk#32)) AND isnotnull(inv_warehouse_sk#33)) AND isnotnull(inv_date_sk#31))
 
+(49) BroadcastHashJoin [codegen id : 13]
+Left keys [1]: [d_date_sk#28]
+Right keys [1]: [inv_date_sk#31]
+Join condition: None
+
+(50) Project [codegen id : 13]
+Output [6]: [d_date_sk#23, d_date#24, d_week_seq#25, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34]
+Input [8]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#28, inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34]
+
 (51) Scan parquet default.warehouse
 Output [2]: [w_warehouse_sk#35, w_warehouse_name#36]
 Batched: true
@@ -326,25 +326,25 @@ Right keys [1]: [w_warehouse_sk#35]
 Join condition: None
 
 (56) Project [codegen id : 13]
-Output [4]: [inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36]
-Input [6]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34, w_warehouse_sk#35, w_warehouse_name#36]
+Output [6]: [d_date_sk#23, d_date#24, d_week_seq#25, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36]
+Input [8]: [d_date_sk#23, d_date#24, d_week_seq#25, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34, w_warehouse_sk#35, w_warehouse_name#36]
 
 (57) Exchange
-Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36]
-Arguments: hashpartitioning(inv_item_sk#32, inv_date_sk#31, 5), true, [id=#38]
+Input [6]: [d_date_sk#23, d_date#24, d_week_seq#25, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36]
+Arguments: hashpartitioning(inv_item_sk#32, d_date_sk#23, 5), ENSURE_REQUIREMENTS, [id=#38]
 
 (58) Sort [codegen id : 14]
-Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36]
-Arguments: [inv_item_sk#32 ASC NULLS FIRST, inv_date_sk#31 ASC NULLS FIRST], false, 0
+Input [6]: [d_date_sk#23, d_date#24, d_week_seq#25, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36]
+Arguments: [inv_item_sk#32 ASC NULLS FIRST, d_date_sk#23 ASC NULLS FIRST], false, 0
 
 (59) SortMergeJoin [codegen id : 16]
-Left keys [2]: [cs_item_sk#5, d_date_sk#27]
-Right keys [2]: [inv_item_sk#32, inv_date_sk#31]
-Join condition: (inv_quantity_on_hand#34 < cs_quantity#8)
+Left keys [2]: [cs_item_sk#5, cs_sold_date_sk#1]
+Right keys [2]: [inv_item_sk#32, d_date_sk#23]
+Join condition: ((inv_quantity_on_hand#34 < cs_quantity#8) AND (d_date#16 > d_date#24 + 5 days))
 
 (60) Project [codegen id : 16]
-Output [6]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24]
-Input [11]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27, inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36]
+Output [6]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#25]
+Input [13]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20, d_date_sk#23, d_date#24, d_week_seq#25, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36]
 
 (61) Scan parquet default.promotion
 Output [1]: [p_promo_sk#39]
@@ -370,15 +370,15 @@ Right keys [1]: [p_promo_sk#39]
 Join condition: None
 
 (66) Project [codegen id : 16]
-Output [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24]
-Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24, p_promo_sk#39]
+Output [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#25]
+Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#25, p_promo_sk#39]
 
 (67) Exchange
-Input [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24]
-Arguments: hashpartitioning(cs_item_sk#5, cs_order_number#7, 5), true, [id=#41]
+Input [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#25]
+Arguments: hashpartitioning(cs_item_sk#5, cs_order_number#7, 5), ENSURE_REQUIREMENTS, [id=#41]
 
 (68) Sort [codegen id : 17]
-Input [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24]
+Input [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#25]
 Arguments: [cs_item_sk#5 ASC NULLS FIRST, cs_order_number#7 ASC NULLS FIRST], false, 0
 
 (69) Scan parquet default.catalog_returns
@@ -397,7 +397,7 @@ Condition : (isnotnull(cr_item_sk#42) AND isnotnull(cr_order_number#43))
 
 (72) Exchange
 Input [2]: [cr_item_sk#42, cr_order_number#43]
-Arguments: hashpartitioning(cr_item_sk#42, cr_order_number#43, 5), true, [id=#44]
+Arguments: hashpartitioning(cr_item_sk#42, cr_order_number#43, 5), ENSURE_REQUIREMENTS, [id=#44]
 
 (73) Sort [codegen id : 19]
 Input [2]: [cr_item_sk#42, cr_order_number#43]
@@ -409,28 +409,28 @@ Right keys [2]: [cr_item_sk#42, cr_order_number#43]
 Join condition: None
 
 (75) Project [codegen id : 20]
-Output [3]: [w_warehouse_name#36, i_item_desc#20, d_week_seq#24]
-Input [7]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24, cr_item_sk#42, cr_order_number#43]
+Output [3]: [w_warehouse_name#36, i_item_desc#20, d_week_seq#25]
+Input [7]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#25, cr_item_sk#42, cr_order_number#43]
 
 (76) HashAggregate [codegen id : 20]
-Input [3]: [w_warehouse_name#36, i_item_desc#20, d_week_seq#24]
-Keys [3]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24]
+Input [3]: [w_warehouse_name#36, i_item_desc#20, d_week_seq#25]
+Keys [3]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25]
 Functions [1]: [partial_count(1)]
 Aggregate Attributes [1]: [count#45]
-Results [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count#46]
+Results [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25, count#46]
 
 (77) Exchange
-Input [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count#46]
-Arguments: hashpartitioning(i_item_desc#20, w_warehouse_name#36, d_week_seq#24, 5), true, [id=#47]
+Input [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25, count#46]
+Arguments: hashpartitioning(i_item_desc#20, w_warehouse_name#36, d_week_seq#25, 5), ENSURE_REQUIREMENTS, [id=#47]
 
 (78) HashAggregate [codegen id : 21]
-Input [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count#46]
-Keys [3]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24]
+Input [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25, count#46]
+Keys [3]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25]
 Functions [1]: [count(1)]
 Aggregate Attributes [1]: [count(1)#48]
-Results [6]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count(1)#48 AS no_promo#49, count(1)#48 AS promo#50, count(1)#48 AS total_cnt#51]
+Results [6]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25, count(1)#48 AS no_promo#49, count(1)#48 AS promo#50, count(1)#48 AS total_cnt#51]
 
 (79) TakeOrderedAndProject
-Input [6]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, no_promo#49, promo#50, total_cnt#51]
-Arguments: 100, [total_cnt#51 DESC NULLS LAST, i_item_desc#20 ASC NULLS FIRST, w_warehouse_name#36 ASC NULLS FIRST, d_week_seq#24 ASC NULLS FIRST], [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, no_promo#49, promo#50, total_cnt#51]
+Input [6]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25, no_promo#49, promo#50, total_cnt#51]
+Arguments: 100, [total_cnt#51 DESC NULLS LAST, i_item_desc#20 ASC NULLS FIRST, w_warehouse_name#36 ASC NULLS FIRST, d_week_seq#25 ASC NULLS FIRST], [i_item_desc#20, w_warehouse_name#36, d_week_seq#25, no_promo#49, promo#50, total_cnt#51]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/simplified.txt
index 918508787c4b0..b88505ad7b9bc 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/simplified.txt
@@ -16,95 +16,95 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                               Project [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq]
                                 BroadcastHashJoin [cs_promo_sk,p_promo_sk]
                                   Project [cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq]
-                                    SortMergeJoin [cs_item_sk,d_date_sk,inv_item_sk,inv_date_sk,inv_quantity_on_hand,cs_quantity]
+                                    SortMergeJoin [cs_item_sk,cs_sold_date_sk,inv_item_sk,d_date_sk,inv_quantity_on_hand,cs_quantity,d_date,d_date]
                                       InputAdapter
-                                        WholeStageCodegen (11)
-                                          Sort [cs_item_sk,d_date_sk]
+                                        WholeStageCodegen (9)
+                                          Sort [cs_item_sk,cs_sold_date_sk]
                                             InputAdapter
-                                              Exchange [cs_item_sk,d_date_sk] #3
-                                                WholeStageCodegen (10)
-                                                  Project [cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,i_item_desc,d_week_seq,d_date_sk]
-                                                    BroadcastHashJoin [cs_sold_date_sk,d_date_sk,d_date,d_date]
-                                                      Project [cs_sold_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,d_date,i_item_desc]
-                                                        SortMergeJoin [cs_item_sk,i_item_sk]
-                                                          InputAdapter
-                                                            WholeStageCodegen (5)
-                                                              Sort [cs_item_sk]
-                                                                InputAdapter
-                                                                  Exchange [cs_item_sk] #4
-                                                                    WholeStageCodegen (4)
-                                                                      Project [cs_sold_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,d_date]
-                                                                        BroadcastHashJoin [cs_ship_date_sk,d_date_sk]
-                                                                          Project [cs_sold_date_sk,cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity]
-                                                                            BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk]
-                                                                              Project [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity]
-                                                                                BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk]
-                                                                                  Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_sold_date_sk,cs_ship_date_sk]
-                                                                                    ColumnarToRow
-                                                                                      InputAdapter
-                                                                                        Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity]
+                                              Exchange [cs_item_sk,cs_sold_date_sk] #3
+                                                WholeStageCodegen (8)
+                                                  Project [cs_sold_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,d_date,i_item_desc]
+                                                    SortMergeJoin [cs_item_sk,i_item_sk]
+                                                      InputAdapter
+                                                        WholeStageCodegen (5)
+                                                          Sort [cs_item_sk]
+                                                            InputAdapter
+                                                              Exchange [cs_item_sk] #4
+                                                                WholeStageCodegen (4)
+                                                                  Project [cs_sold_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,d_date]
+                                                                    BroadcastHashJoin [cs_ship_date_sk,d_date_sk]
+                                                                      Project [cs_sold_date_sk,cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity]
+                                                                        BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk]
+                                                                          Project [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity]
+                                                                            BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk]
+                                                                              Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_sold_date_sk,cs_ship_date_sk]
+                                                                                ColumnarToRow
                                                                                   InputAdapter
-                                                                                    BroadcastExchange #5
-                                                                                      WholeStageCodegen (1)
-                                                                                        Project [hd_demo_sk]
-                                                                                          Filter [hd_buy_potential,hd_demo_sk]
-                                                                                            ColumnarToRow
-                                                                                              InputAdapter
-                                                                                                Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential]
+                                                                                    Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity]
                                                                               InputAdapter
-                                                                                BroadcastExchange #6
-                                                                                  WholeStageCodegen (2)
-                                                                                    Project [cd_demo_sk]
-                                                                                      Filter [cd_marital_status,cd_demo_sk]
+                                                                                BroadcastExchange #5
+                                                                                  WholeStageCodegen (1)
+                                                                                    Project [hd_demo_sk]
+                                                                                      Filter [hd_buy_potential,hd_demo_sk]
                                                                                         ColumnarToRow
                                                                                           InputAdapter
-                                                                                            Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status]
+                                                                                            Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential]
                                                                           InputAdapter
-                                                                            BroadcastExchange #7
-                                                                              WholeStageCodegen (3)
-                                                                                Filter [d_date,d_date_sk]
-                                                                                  ColumnarToRow
-                                                                                    InputAdapter
-                                                                                      Scan parquet default.date_dim [d_date_sk,d_date]
-                                                          InputAdapter
-                                                            WholeStageCodegen (7)
-                                                              Sort [i_item_sk]
-                                                                InputAdapter
-                                                                  Exchange [i_item_sk] #8
-                                                                    WholeStageCodegen (6)
-                                                                      Filter [i_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.item [i_item_sk,i_item_desc]
+                                                                            BroadcastExchange #6
+                                                                              WholeStageCodegen (2)
+                                                                                Project [cd_demo_sk]
+                                                                                  Filter [cd_marital_status,cd_demo_sk]
+                                                                                    ColumnarToRow
+                                                                                      InputAdapter
+                                                                                        Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status]
+                                                                      InputAdapter
+                                                                        BroadcastExchange #7
+                                                                          WholeStageCodegen (3)
+                                                                            Filter [d_date,d_date_sk]
+                                                                              ColumnarToRow
+                                                                                InputAdapter
+                                                                                  Scan parquet default.date_dim [d_date_sk,d_date]
                                                       InputAdapter
-                                                        BroadcastExchange #9
-                                                          WholeStageCodegen (9)
-                                                            Project [d_date_sk,d_date,d_week_seq,d_date_sk]
-                                                              BroadcastHashJoin [d_week_seq,d_week_seq]
-                                                                InputAdapter
-                                                                  BroadcastExchange #10
-                                                                    WholeStageCodegen (8)
-                                                                      Project [d_date_sk,d_date,d_week_seq]
-                                                                        Filter [d_year,d_date_sk,d_week_seq,d_date]
-                                                                          ColumnarToRow
-                                                                            InputAdapter
-                                                                              Scan parquet default.date_dim [d_date_sk,d_date,d_week_seq,d_year]
-                                                                Filter [d_week_seq,d_date_sk]
-                                                                  ColumnarToRow
-                                                                    InputAdapter
-                                                                      Scan parquet default.date_dim [d_date_sk,d_week_seq]
+                                                        WholeStageCodegen (7)
+                                                          Sort [i_item_sk]
+                                                            InputAdapter
+                                                              Exchange [i_item_sk] #8
+                                                                WholeStageCodegen (6)
+                                                                  Filter [i_item_sk]
+                                                                    ColumnarToRow
+                                                                      InputAdapter
+                                                                        Scan parquet default.item [i_item_sk,i_item_desc]
                                       InputAdapter
                                         WholeStageCodegen (14)
-                                          Sort [inv_item_sk,inv_date_sk]
+                                          Sort [inv_item_sk,d_date_sk]
                                             InputAdapter
-                                              Exchange [inv_item_sk,inv_date_sk] #11
+                                              Exchange [inv_item_sk,d_date_sk] #9
                                                 WholeStageCodegen (13)
-                                                  Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,w_warehouse_name]
+                                                  Project [d_date_sk,d_date,d_week_seq,inv_item_sk,inv_quantity_on_hand,w_warehouse_name]
                                                     BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk]
-                                                      Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk,inv_date_sk]
-                                                        ColumnarToRow
+                                                      Project [d_date_sk,d_date,d_week_seq,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand]
+                                                        BroadcastHashJoin [d_date_sk,inv_date_sk]
                                                           InputAdapter
-                                                            Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand]
+                                                            BroadcastExchange #10
+                                                              WholeStageCodegen (11)
+                                                                Project [d_date_sk,d_date,d_week_seq,d_date_sk]
+                                                                  BroadcastHashJoin [d_week_seq,d_week_seq]
+                                                                    InputAdapter
+                                                                      BroadcastExchange #11
+                                                                        WholeStageCodegen (10)
+                                                                          Project [d_date_sk,d_date,d_week_seq]
+                                                                            Filter [d_year,d_date_sk,d_week_seq,d_date]
+                                                                              ColumnarToRow
+                                                                                InputAdapter
+                                                                                  Scan parquet default.date_dim [d_date_sk,d_date,d_week_seq,d_year]
+                                                                    Filter [d_week_seq,d_date_sk]
+                                                                      ColumnarToRow
+                                                                        InputAdapter
+                                                                          Scan parquet default.date_dim [d_date_sk,d_week_seq]
+                                                          Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk,inv_date_sk]
+                                                            ColumnarToRow
+                                                              InputAdapter
+                                                                Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand]
                                                       InputAdapter
                                                         BroadcastExchange #12
                                                           WholeStageCodegen (12)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/explain.txt
index 6e757528a3e68..6813696266ac5 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/explain.txt
@@ -1,343 +1,343 @@
 == Physical Plan ==
 TakeOrderedAndProject (61)
 +- * Project (60)
-   +- * BroadcastHashJoin Inner BuildRight (59)
-      :- * Project (38)
-      :  +- * SortMergeJoin Inner (37)
-      :     :- * Sort (11)
-      :     :  +- Exchange (10)
-      :     :     +- * Project (9)
-      :     :        +- * BroadcastHashJoin Inner BuildRight (8)
-      :     :           :- * Filter (3)
-      :     :           :  +- * ColumnarToRow (2)
-      :     :           :     +- Scan parquet default.customer (1)
-      :     :           +- BroadcastExchange (7)
-      :     :              +- * Filter (6)
-      :     :                 +- * ColumnarToRow (5)
-      :     :                    +- Scan parquet default.customer_address (4)
-      :     +- * Sort (36)
-      :        +- Exchange (35)
-      :           +- * Filter (34)
-      :              +- * HashAggregate (33)
-      :                 +- Exchange (32)
-      :                    +- * HashAggregate (31)
-      :                       +- * Project (30)
-      :                          +- * SortMergeJoin Inner (29)
-      :                             :- * Sort (23)
-      :                             :  +- Exchange (22)
-      :                             :     +- * Project (21)
-      :                             :        +- * BroadcastHashJoin Inner BuildRight (20)
-      :                             :           :- * Filter (14)
-      :                             :           :  +- * ColumnarToRow (13)
-      :                             :           :     +- Scan parquet default.catalog_returns (12)
-      :                             :           +- BroadcastExchange (19)
-      :                             :              +- * Project (18)
-      :                             :                 +- * Filter (17)
-      :                             :                    +- * ColumnarToRow (16)
-      :                             :                       +- Scan parquet default.date_dim (15)
-      :                             +- * Sort (28)
-      :                                +- Exchange (27)
-      :                                   +- * Filter (26)
-      :                                      +- * ColumnarToRow (25)
-      :                                         +- Scan parquet default.customer_address (24)
-      +- BroadcastExchange (58)
-         +- * Filter (57)
-            +- * HashAggregate (56)
-               +- Exchange (55)
-                  +- * HashAggregate (54)
-                     +- * HashAggregate (53)
-                        +- Exchange (52)
-                           +- * HashAggregate (51)
-                              +- * Project (50)
-                                 +- * SortMergeJoin Inner (49)
-                                    :- * Sort (46)
-                                    :  +- Exchange (45)
-                                    :     +- * Project (44)
-                                    :        +- * BroadcastHashJoin Inner BuildRight (43)
-                                    :           :- * Filter (41)
-                                    :           :  +- * ColumnarToRow (40)
-                                    :           :     +- Scan parquet default.catalog_returns (39)
-                                    :           +- ReusedExchange (42)
-                                    +- * Sort (48)
-                                       +- ReusedExchange (47)
-
-
-(1) Scan parquet default.customer
-Output [6]: [c_customer_sk#1, c_customer_id#2, c_current_addr_sk#3, c_salutation#4, c_first_name#5, c_last_name#6]
+   +- * SortMergeJoin Inner (59)
+      :- * Sort (47)
+      :  +- Exchange (46)
+      :     +- * Project (45)
+      :        +- * BroadcastHashJoin Inner BuildRight (44)
+      :           :- * Filter (23)
+      :           :  +- * HashAggregate (22)
+      :           :     +- Exchange (21)
+      :           :        +- * HashAggregate (20)
+      :           :           +- * Project (19)
+      :           :              +- * SortMergeJoin Inner (18)
+      :           :                 :- * Sort (12)
+      :           :                 :  +- Exchange (11)
+      :           :                 :     +- * Project (10)
+      :           :                 :        +- * BroadcastHashJoin Inner BuildRight (9)
+      :           :                 :           :- * Filter (3)
+      :           :                 :           :  +- * ColumnarToRow (2)
+      :           :                 :           :     +- Scan parquet default.catalog_returns (1)
+      :           :                 :           +- BroadcastExchange (8)
+      :           :                 :              +- * Project (7)
+      :           :                 :                 +- * Filter (6)
+      :           :                 :                    +- * ColumnarToRow (5)
+      :           :                 :                       +- Scan parquet default.date_dim (4)
+      :           :                 +- * Sort (17)
+      :           :                    +- Exchange (16)
+      :           :                       +- * Filter (15)
+      :           :                          +- * ColumnarToRow (14)
+      :           :                             +- Scan parquet default.customer_address (13)
+      :           +- BroadcastExchange (43)
+      :              +- * Filter (42)
+      :                 +- * HashAggregate (41)
+      :                    +- Exchange (40)
+      :                       +- * HashAggregate (39)
+      :                          +- * HashAggregate (38)
+      :                             +- Exchange (37)
+      :                                +- * HashAggregate (36)
+      :                                   +- * Project (35)
+      :                                      +- * SortMergeJoin Inner (34)
+      :                                         :- * Sort (31)
+      :                                         :  +- Exchange (30)
+      :                                         :     +- * Project (29)
+      :                                         :        +- * BroadcastHashJoin Inner BuildRight (28)
+      :                                         :           :- * Filter (26)
+      :                                         :           :  +- * ColumnarToRow (25)
+      :                                         :           :     +- Scan parquet default.catalog_returns (24)
+      :                                         :           +- ReusedExchange (27)
+      :                                         +- * Sort (33)
+      :                                            +- ReusedExchange (32)
+      +- * Sort (58)
+         +- Exchange (57)
+            +- * Project (56)
+               +- * BroadcastHashJoin Inner BuildRight (55)
+                  :- * Filter (50)
+                  :  +- * ColumnarToRow (49)
+                  :     +- Scan parquet default.customer (48)
+                  +- BroadcastExchange (54)
+                     +- * Filter (53)
+                        +- * ColumnarToRow (52)
+                           +- Scan parquet default.customer_address (51)
+
+
+(1) Scan parquet default.catalog_returns
+Output [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer]
-PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)]
-ReadSchema: struct<c_customer_sk:int,c_customer_id:string,c_current_addr_sk:int,c_salutation:string,c_first_name:string,c_last_name:string>
+Location [not included in comparison]/{warehouse_dir}/catalog_returns]
+PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_addr_sk), IsNotNull(cr_returning_customer_sk)]
+ReadSchema: struct<cr_returned_date_sk:int,cr_returning_customer_sk:int,cr_returning_addr_sk:int,cr_return_amt_inc_tax:decimal(7,2)>
 
 (2) ColumnarToRow [codegen id : 2]
-Input [6]: [c_customer_sk#1, c_customer_id#2, c_current_addr_sk#3, c_salutation#4, c_first_name#5, c_last_name#6]
+Input [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4]
 
 (3) Filter [codegen id : 2]
-Input [6]: [c_customer_sk#1, c_customer_id#2, c_current_addr_sk#3, c_salutation#4, c_first_name#5, c_last_name#6]
-Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_current_addr_sk#3))
+Input [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4]
+Condition : ((isnotnull(cr_returned_date_sk#1) AND isnotnull(cr_returning_addr_sk#3)) AND isnotnull(cr_returning_customer_sk#2))
 
-(4) Scan parquet default.customer_address
-Output [12]: [ca_address_sk#7, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18]
+(4) Scan parquet default.date_dim
+Output [2]: [d_date_sk#5, d_year#6]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer_address]
-PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)]
-ReadSchema: struct<ca_address_sk:int,ca_street_number:string,ca_street_name:string,ca_street_type:string,ca_suite_number:string,ca_city:string,ca_county:string,ca_state:string,ca_zip:string,ca_country:string,ca_gmt_offset:decimal(5,2),ca_location_type:string>
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int>
 
 (5) ColumnarToRow [codegen id : 1]
-Input [12]: [ca_address_sk#7, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18]
+Input [2]: [d_date_sk#5, d_year#6]
 
 (6) Filter [codegen id : 1]
-Input [12]: [ca_address_sk#7, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18]
-Condition : ((isnotnull(ca_state#14) AND (ca_state#14 = GA)) AND isnotnull(ca_address_sk#7))
+Input [2]: [d_date_sk#5, d_year#6]
+Condition : ((isnotnull(d_year#6) AND (d_year#6 = 2000)) AND isnotnull(d_date_sk#5))
+
+(7) Project [codegen id : 1]
+Output [1]: [d_date_sk#5]
+Input [2]: [d_date_sk#5, d_year#6]
 
-(7) BroadcastExchange
-Input [12]: [ca_address_sk#7, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19]
+(8) BroadcastExchange
+Input [1]: [d_date_sk#5]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7]
 
-(8) BroadcastHashJoin [codegen id : 2]
-Left keys [1]: [c_current_addr_sk#3]
-Right keys [1]: [ca_address_sk#7]
+(9) BroadcastHashJoin [codegen id : 2]
+Left keys [1]: [cr_returned_date_sk#1]
+Right keys [1]: [d_date_sk#5]
 Join condition: None
 
-(9) Project [codegen id : 2]
-Output [16]: [c_customer_sk#1, c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18]
-Input [18]: [c_customer_sk#1, c_customer_id#2, c_current_addr_sk#3, c_salutation#4, c_first_name#5, c_last_name#6, ca_address_sk#7, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18]
+(10) Project [codegen id : 2]
+Output [3]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4]
+Input [5]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4, d_date_sk#5]
 
-(10) Exchange
-Input [16]: [c_customer_sk#1, c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18]
-Arguments: hashpartitioning(c_customer_sk#1, 5), true, [id=#20]
+(11) Exchange
+Input [3]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4]
+Arguments: hashpartitioning(cr_returning_addr_sk#3, 5), ENSURE_REQUIREMENTS, [id=#8]
 
-(11) Sort [codegen id : 3]
-Input [16]: [c_customer_sk#1, c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18]
-Arguments: [c_customer_sk#1 ASC NULLS FIRST], false, 0
+(12) Sort [codegen id : 3]
+Input [3]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4]
+Arguments: [cr_returning_addr_sk#3 ASC NULLS FIRST], false, 0
 
-(12) Scan parquet default.catalog_returns
-Output [4]: [cr_returned_date_sk#21, cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24]
+(13) Scan parquet default.customer_address
+Output [2]: [ca_address_sk#9, ca_state#10]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/catalog_returns]
-PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_addr_sk), IsNotNull(cr_returning_customer_sk)]
-ReadSchema: struct<cr_returned_date_sk:int,cr_returning_customer_sk:int,cr_returning_addr_sk:int,cr_return_amt_inc_tax:decimal(7,2)>
-
-(13) ColumnarToRow [codegen id : 5]
-Input [4]: [cr_returned_date_sk#21, cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24]
+Location [not included in comparison]/{warehouse_dir}/customer_address]
+PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)]
+ReadSchema: struct<ca_address_sk:int,ca_state:string>
 
-(14) Filter [codegen id : 5]
-Input [4]: [cr_returned_date_sk#21, cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24]
-Condition : ((isnotnull(cr_returned_date_sk#21) AND isnotnull(cr_returning_addr_sk#23)) AND isnotnull(cr_returning_customer_sk#22))
+(14) ColumnarToRow [codegen id : 4]
+Input [2]: [ca_address_sk#9, ca_state#10]
 
-(15) Scan parquet default.date_dim
-Output [2]: [d_date_sk#25, d_year#26]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int>
+(15) Filter [codegen id : 4]
+Input [2]: [ca_address_sk#9, ca_state#10]
+Condition : (isnotnull(ca_address_sk#9) AND isnotnull(ca_state#10))
 
-(16) ColumnarToRow [codegen id : 4]
-Input [2]: [d_date_sk#25, d_year#26]
+(16) Exchange
+Input [2]: [ca_address_sk#9, ca_state#10]
+Arguments: hashpartitioning(ca_address_sk#9, 5), ENSURE_REQUIREMENTS, [id=#11]
 
-(17) Filter [codegen id : 4]
-Input [2]: [d_date_sk#25, d_year#26]
-Condition : ((isnotnull(d_year#26) AND (d_year#26 = 2000)) AND isnotnull(d_date_sk#25))
+(17) Sort [codegen id : 5]
+Input [2]: [ca_address_sk#9, ca_state#10]
+Arguments: [ca_address_sk#9 ASC NULLS FIRST], false, 0
 
-(18) Project [codegen id : 4]
-Output [1]: [d_date_sk#25]
-Input [2]: [d_date_sk#25, d_year#26]
+(18) SortMergeJoin [codegen id : 6]
+Left keys [1]: [cr_returning_addr_sk#3]
+Right keys [1]: [ca_address_sk#9]
+Join condition: None
 
-(19) BroadcastExchange
-Input [1]: [d_date_sk#25]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27]
+(19) Project [codegen id : 6]
+Output [3]: [cr_returning_customer_sk#2, cr_return_amt_inc_tax#4, ca_state#10]
+Input [5]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4, ca_address_sk#9, ca_state#10]
+
+(20) HashAggregate [codegen id : 6]
+Input [3]: [cr_returning_customer_sk#2, cr_return_amt_inc_tax#4, ca_state#10]
+Keys [2]: [cr_returning_customer_sk#2, ca_state#10]
+Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#4))]
+Aggregate Attributes [1]: [sum#12]
+Results [3]: [cr_returning_customer_sk#2, ca_state#10, sum#13]
+
+(21) Exchange
+Input [3]: [cr_returning_customer_sk#2, ca_state#10, sum#13]
+Arguments: hashpartitioning(cr_returning_customer_sk#2, ca_state#10, 5), ENSURE_REQUIREMENTS, [id=#14]
+
+(22) HashAggregate [codegen id : 15]
+Input [3]: [cr_returning_customer_sk#2, ca_state#10, sum#13]
+Keys [2]: [cr_returning_customer_sk#2, ca_state#10]
+Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#4))]
+Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#4))#15]
+Results [3]: [cr_returning_customer_sk#2 AS ctr_customer_sk#16, ca_state#10 AS ctr_state#17, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#4))#15,17,2) AS ctr_total_return#18]
+
+(23) Filter [codegen id : 15]
+Input [3]: [ctr_customer_sk#16, ctr_state#17, ctr_total_return#18]
+Condition : isnotnull(ctr_total_return#18)
+
+(24) Scan parquet default.catalog_returns
+Output [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/catalog_returns]
+PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_addr_sk)]
+ReadSchema: struct<cr_returned_date_sk:int,cr_returning_customer_sk:int,cr_returning_addr_sk:int,cr_return_amt_inc_tax:decimal(7,2)>
 
-(20) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [cr_returned_date_sk#21]
-Right keys [1]: [d_date_sk#25]
-Join condition: None
+(25) ColumnarToRow [codegen id : 8]
+Input [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4]
 
-(21) Project [codegen id : 5]
-Output [3]: [cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24]
-Input [5]: [cr_returned_date_sk#21, cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24, d_date_sk#25]
+(26) Filter [codegen id : 8]
+Input [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4]
+Condition : (isnotnull(cr_returned_date_sk#1) AND isnotnull(cr_returning_addr_sk#3))
 
-(22) Exchange
-Input [3]: [cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24]
-Arguments: hashpartitioning(cr_returning_addr_sk#23, 5), true, [id=#28]
+(27) ReusedExchange [Reuses operator id: 8]
+Output [1]: [d_date_sk#5]
 
-(23) Sort [codegen id : 6]
-Input [3]: [cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24]
-Arguments: [cr_returning_addr_sk#23 ASC NULLS FIRST], false, 0
+(28) BroadcastHashJoin [codegen id : 8]
+Left keys [1]: [cr_returned_date_sk#1]
+Right keys [1]: [d_date_sk#5]
+Join condition: None
 
-(24) Scan parquet default.customer_address
-Output [2]: [ca_address_sk#7, ca_state#14]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer_address]
-PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)]
-ReadSchema: struct<ca_address_sk:int,ca_state:string>
+(29) Project [codegen id : 8]
+Output [3]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4]
+Input [5]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4, d_date_sk#5]
 
-(25) ColumnarToRow [codegen id : 7]
-Input [2]: [ca_address_sk#7, ca_state#14]
+(30) Exchange
+Input [3]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4]
+Arguments: hashpartitioning(cr_returning_addr_sk#3, 5), ENSURE_REQUIREMENTS, [id=#19]
 
-(26) Filter [codegen id : 7]
-Input [2]: [ca_address_sk#7, ca_state#14]
-Condition : (isnotnull(ca_address_sk#7) AND isnotnull(ca_state#14))
+(31) Sort [codegen id : 9]
+Input [3]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4]
+Arguments: [cr_returning_addr_sk#3 ASC NULLS FIRST], false, 0
 
-(27) Exchange
-Input [2]: [ca_address_sk#7, ca_state#14]
-Arguments: hashpartitioning(ca_address_sk#7, 5), true, [id=#29]
+(32) ReusedExchange [Reuses operator id: 16]
+Output [2]: [ca_address_sk#9, ca_state#10]
 
-(28) Sort [codegen id : 8]
-Input [2]: [ca_address_sk#7, ca_state#14]
-Arguments: [ca_address_sk#7 ASC NULLS FIRST], false, 0
+(33) Sort [codegen id : 11]
+Input [2]: [ca_address_sk#9, ca_state#10]
+Arguments: [ca_address_sk#9 ASC NULLS FIRST], false, 0
 
-(29) SortMergeJoin [codegen id : 9]
-Left keys [1]: [cr_returning_addr_sk#23]
-Right keys [1]: [ca_address_sk#7]
+(34) SortMergeJoin [codegen id : 12]
+Left keys [1]: [cr_returning_addr_sk#3]
+Right keys [1]: [ca_address_sk#9]
 Join condition: None
 
-(30) Project [codegen id : 9]
-Output [3]: [cr_returning_customer_sk#22, cr_return_amt_inc_tax#24, ca_state#14]
-Input [5]: [cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24, ca_address_sk#7, ca_state#14]
-
-(31) HashAggregate [codegen id : 9]
-Input [3]: [cr_returning_customer_sk#22, cr_return_amt_inc_tax#24, ca_state#14]
-Keys [2]: [cr_returning_customer_sk#22, ca_state#14]
-Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#24))]
-Aggregate Attributes [1]: [sum#30]
-Results [3]: [cr_returning_customer_sk#22, ca_state#14, sum#31]
-
-(32) Exchange
-Input [3]: [cr_returning_customer_sk#22, ca_state#14, sum#31]
-Arguments: hashpartitioning(cr_returning_customer_sk#22, ca_state#14, 5), true, [id=#32]
-
-(33) HashAggregate [codegen id : 10]
-Input [3]: [cr_returning_customer_sk#22, ca_state#14, sum#31]
-Keys [2]: [cr_returning_customer_sk#22, ca_state#14]
-Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#24))]
-Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#24))#33]
-Results [3]: [cr_returning_customer_sk#22 AS ctr_customer_sk#34, ca_state#14 AS ctr_state#35, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#24))#33,17,2) AS ctr_total_return#36]
-
-(34) Filter [codegen id : 10]
-Input [3]: [ctr_customer_sk#34, ctr_state#35, ctr_total_return#36]
-Condition : isnotnull(ctr_total_return#36)
-
-(35) Exchange
-Input [3]: [ctr_customer_sk#34, ctr_state#35, ctr_total_return#36]
-Arguments: hashpartitioning(ctr_customer_sk#34, 5), true, [id=#37]
-
-(36) Sort [codegen id : 11]
-Input [3]: [ctr_customer_sk#34, ctr_state#35, ctr_total_return#36]
-Arguments: [ctr_customer_sk#34 ASC NULLS FIRST], false, 0
-
-(37) SortMergeJoin [codegen id : 20]
-Left keys [1]: [c_customer_sk#1]
-Right keys [1]: [ctr_customer_sk#34]
-Join condition: None
+(35) Project [codegen id : 12]
+Output [3]: [cr_returning_customer_sk#2, cr_return_amt_inc_tax#4, ca_state#10]
+Input [5]: [cr_returning_customer_sk#2, cr_returning_addr_sk#3, cr_return_amt_inc_tax#4, ca_address_sk#9, ca_state#10]
+
+(36) HashAggregate [codegen id : 12]
+Input [3]: [cr_returning_customer_sk#2, cr_return_amt_inc_tax#4, ca_state#10]
+Keys [2]: [cr_returning_customer_sk#2, ca_state#10]
+Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#4))]
+Aggregate Attributes [1]: [sum#20]
+Results [3]: [cr_returning_customer_sk#2, ca_state#10, sum#21]
+
+(37) Exchange
+Input [3]: [cr_returning_customer_sk#2, ca_state#10, sum#21]
+Arguments: hashpartitioning(cr_returning_customer_sk#2, ca_state#10, 5), ENSURE_REQUIREMENTS, [id=#22]
+
+(38) HashAggregate [codegen id : 13]
+Input [3]: [cr_returning_customer_sk#2, ca_state#10, sum#21]
+Keys [2]: [cr_returning_customer_sk#2, ca_state#10]
+Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#4))]
+Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#4))#23]
+Results [2]: [ca_state#10 AS ctr_state#17, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#4))#23,17,2) AS ctr_total_return#18]
+
+(39) HashAggregate [codegen id : 13]
+Input [2]: [ctr_state#17, ctr_total_return#18]
+Keys [1]: [ctr_state#17]
+Functions [1]: [partial_avg(ctr_total_return#18)]
+Aggregate Attributes [2]: [sum#24, count#25]
+Results [3]: [ctr_state#17, sum#26, count#27]
+
+(40) Exchange
+Input [3]: [ctr_state#17, sum#26, count#27]
+Arguments: hashpartitioning(ctr_state#17, 5), ENSURE_REQUIREMENTS, [id=#28]
+
+(41) HashAggregate [codegen id : 14]
+Input [3]: [ctr_state#17, sum#26, count#27]
+Keys [1]: [ctr_state#17]
+Functions [1]: [avg(ctr_total_return#18)]
+Aggregate Attributes [1]: [avg(ctr_total_return#18)#29]
+Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#18)#29) * 1.200000), DecimalType(24,7), true) AS (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#30, ctr_state#17 AS ctr_state#17#31]
+
+(42) Filter [codegen id : 14]
+Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#30, ctr_state#17#31]
+Condition : isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#30)
+
+(43) BroadcastExchange
+Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#30, ctr_state#17#31]
+Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#32]
+
+(44) BroadcastHashJoin [codegen id : 15]
+Left keys [1]: [ctr_state#17]
+Right keys [1]: [ctr_state#17#31]
+Join condition: (cast(ctr_total_return#18 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#30)
+
+(45) Project [codegen id : 15]
+Output [2]: [ctr_customer_sk#16, ctr_total_return#18]
+Input [5]: [ctr_customer_sk#16, ctr_state#17, ctr_total_return#18, (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#30, ctr_state#17#31]
+
+(46) Exchange
+Input [2]: [ctr_customer_sk#16, ctr_total_return#18]
+Arguments: hashpartitioning(ctr_customer_sk#16, 5), ENSURE_REQUIREMENTS, [id=#33]
+
+(47) Sort [codegen id : 16]
+Input [2]: [ctr_customer_sk#16, ctr_total_return#18]
+Arguments: [ctr_customer_sk#16 ASC NULLS FIRST], false, 0
+
+(48) Scan parquet default.customer
+Output [6]: [c_customer_sk#34, c_customer_id#35, c_current_addr_sk#36, c_salutation#37, c_first_name#38, c_last_name#39]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/customer]
+PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)]
+ReadSchema: struct<c_customer_sk:int,c_customer_id:string,c_current_addr_sk:int,c_salutation:string,c_first_name:string,c_last_name:string>
+
+(49) ColumnarToRow [codegen id : 18]
+Input [6]: [c_customer_sk#34, c_customer_id#35, c_current_addr_sk#36, c_salutation#37, c_first_name#38, c_last_name#39]
 
-(38) Project [codegen id : 20]
-Output [17]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_state#35, ctr_total_return#36]
-Input [19]: [c_customer_sk#1, c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_customer_sk#34, ctr_state#35, ctr_total_return#36]
+(50) Filter [codegen id : 18]
+Input [6]: [c_customer_sk#34, c_customer_id#35, c_current_addr_sk#36, c_salutation#37, c_first_name#38, c_last_name#39]
+Condition : (isnotnull(c_customer_sk#34) AND isnotnull(c_current_addr_sk#36))
 
-(39) Scan parquet default.catalog_returns
-Output [4]: [cr_returned_date_sk#21, cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24]
+(51) Scan parquet default.customer_address
+Output [12]: [ca_address_sk#9, ca_street_number#40, ca_street_name#41, ca_street_type#42, ca_suite_number#43, ca_city#44, ca_county#45, ca_state#10, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/catalog_returns]
-PushedFilters: [IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_addr_sk)]
-ReadSchema: struct<cr_returned_date_sk:int,cr_returning_customer_sk:int,cr_returning_addr_sk:int,cr_return_amt_inc_tax:decimal(7,2)>
+Location [not included in comparison]/{warehouse_dir}/customer_address]
+PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)]
+ReadSchema: struct<ca_address_sk:int,ca_street_number:string,ca_street_name:string,ca_street_type:string,ca_suite_number:string,ca_city:string,ca_county:string,ca_state:string,ca_zip:string,ca_country:string,ca_gmt_offset:decimal(5,2),ca_location_type:string>
 
-(40) ColumnarToRow [codegen id : 13]
-Input [4]: [cr_returned_date_sk#21, cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24]
+(52) ColumnarToRow [codegen id : 17]
+Input [12]: [ca_address_sk#9, ca_street_number#40, ca_street_name#41, ca_street_type#42, ca_suite_number#43, ca_city#44, ca_county#45, ca_state#10, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49]
 
-(41) Filter [codegen id : 13]
-Input [4]: [cr_returned_date_sk#21, cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24]
-Condition : (isnotnull(cr_returned_date_sk#21) AND isnotnull(cr_returning_addr_sk#23))
+(53) Filter [codegen id : 17]
+Input [12]: [ca_address_sk#9, ca_street_number#40, ca_street_name#41, ca_street_type#42, ca_suite_number#43, ca_city#44, ca_county#45, ca_state#10, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49]
+Condition : ((isnotnull(ca_state#10) AND (ca_state#10 = GA)) AND isnotnull(ca_address_sk#9))
 
-(42) ReusedExchange [Reuses operator id: 19]
-Output [1]: [d_date_sk#25]
+(54) BroadcastExchange
+Input [12]: [ca_address_sk#9, ca_street_number#40, ca_street_name#41, ca_street_type#42, ca_suite_number#43, ca_city#44, ca_county#45, ca_state#10, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#50]
 
-(43) BroadcastHashJoin [codegen id : 13]
-Left keys [1]: [cr_returned_date_sk#21]
-Right keys [1]: [d_date_sk#25]
+(55) BroadcastHashJoin [codegen id : 18]
+Left keys [1]: [c_current_addr_sk#36]
+Right keys [1]: [ca_address_sk#9]
 Join condition: None
 
-(44) Project [codegen id : 13]
-Output [3]: [cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24]
-Input [5]: [cr_returned_date_sk#21, cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24, d_date_sk#25]
+(56) Project [codegen id : 18]
+Output [16]: [c_customer_sk#34, c_customer_id#35, c_salutation#37, c_first_name#38, c_last_name#39, ca_street_number#40, ca_street_name#41, ca_street_type#42, ca_suite_number#43, ca_city#44, ca_county#45, ca_state#10, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49]
+Input [18]: [c_customer_sk#34, c_customer_id#35, c_current_addr_sk#36, c_salutation#37, c_first_name#38, c_last_name#39, ca_address_sk#9, ca_street_number#40, ca_street_name#41, ca_street_type#42, ca_suite_number#43, ca_city#44, ca_county#45, ca_state#10, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49]
 
-(45) Exchange
-Input [3]: [cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24]
-Arguments: hashpartitioning(cr_returning_addr_sk#23, 5), true, [id=#38]
+(57) Exchange
+Input [16]: [c_customer_sk#34, c_customer_id#35, c_salutation#37, c_first_name#38, c_last_name#39, ca_street_number#40, ca_street_name#41, ca_street_type#42, ca_suite_number#43, ca_city#44, ca_county#45, ca_state#10, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49]
+Arguments: hashpartitioning(c_customer_sk#34, 5), ENSURE_REQUIREMENTS, [id=#51]
 
-(46) Sort [codegen id : 14]
-Input [3]: [cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24]
-Arguments: [cr_returning_addr_sk#23 ASC NULLS FIRST], false, 0
+(58) Sort [codegen id : 19]
+Input [16]: [c_customer_sk#34, c_customer_id#35, c_salutation#37, c_first_name#38, c_last_name#39, ca_street_number#40, ca_street_name#41, ca_street_type#42, ca_suite_number#43, ca_city#44, ca_county#45, ca_state#10, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49]
+Arguments: [c_customer_sk#34 ASC NULLS FIRST], false, 0
 
-(47) ReusedExchange [Reuses operator id: 27]
-Output [2]: [ca_address_sk#7, ca_state#14]
-
-(48) Sort [codegen id : 16]
-Input [2]: [ca_address_sk#7, ca_state#14]
-Arguments: [ca_address_sk#7 ASC NULLS FIRST], false, 0
-
-(49) SortMergeJoin [codegen id : 17]
-Left keys [1]: [cr_returning_addr_sk#23]
-Right keys [1]: [ca_address_sk#7]
+(59) SortMergeJoin [codegen id : 20]
+Left keys [1]: [ctr_customer_sk#16]
+Right keys [1]: [c_customer_sk#34]
 Join condition: None
 
-(50) Project [codegen id : 17]
-Output [3]: [cr_returning_customer_sk#22, cr_return_amt_inc_tax#24, ca_state#14]
-Input [5]: [cr_returning_customer_sk#22, cr_returning_addr_sk#23, cr_return_amt_inc_tax#24, ca_address_sk#7, ca_state#14]
-
-(51) HashAggregate [codegen id : 17]
-Input [3]: [cr_returning_customer_sk#22, cr_return_amt_inc_tax#24, ca_state#14]
-Keys [2]: [cr_returning_customer_sk#22, ca_state#14]
-Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#24))]
-Aggregate Attributes [1]: [sum#39]
-Results [3]: [cr_returning_customer_sk#22, ca_state#14, sum#40]
-
-(52) Exchange
-Input [3]: [cr_returning_customer_sk#22, ca_state#14, sum#40]
-Arguments: hashpartitioning(cr_returning_customer_sk#22, ca_state#14, 5), true, [id=#41]
-
-(53) HashAggregate [codegen id : 18]
-Input [3]: [cr_returning_customer_sk#22, ca_state#14, sum#40]
-Keys [2]: [cr_returning_customer_sk#22, ca_state#14]
-Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#24))]
-Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#24))#42]
-Results [2]: [ca_state#14 AS ctr_state#35, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#24))#42,17,2) AS ctr_total_return#36]
-
-(54) HashAggregate [codegen id : 18]
-Input [2]: [ctr_state#35, ctr_total_return#36]
-Keys [1]: [ctr_state#35]
-Functions [1]: [partial_avg(ctr_total_return#36)]
-Aggregate Attributes [2]: [sum#43, count#44]
-Results [3]: [ctr_state#35, sum#45, count#46]
-
-(55) Exchange
-Input [3]: [ctr_state#35, sum#45, count#46]
-Arguments: hashpartitioning(ctr_state#35, 5), true, [id=#47]
-
-(56) HashAggregate [codegen id : 19]
-Input [3]: [ctr_state#35, sum#45, count#46]
-Keys [1]: [ctr_state#35]
-Functions [1]: [avg(ctr_total_return#36)]
-Aggregate Attributes [1]: [avg(ctr_total_return#36)#48]
-Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#36)#48) * 1.200000), DecimalType(24,7), true) AS (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#49, ctr_state#35 AS ctr_state#35#50]
-
-(57) Filter [codegen id : 19]
-Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#49, ctr_state#35#50]
-Condition : isnotnull((CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#49)
-
-(58) BroadcastExchange
-Input [2]: [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#49, ctr_state#35#50]
-Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#51]
-
-(59) BroadcastHashJoin [codegen id : 20]
-Left keys [1]: [ctr_state#35]
-Right keys [1]: [ctr_state#35#50]
-Join condition: (cast(ctr_total_return#36 as decimal(24,7)) > (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#49)
-
 (60) Project [codegen id : 20]
-Output [16]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_total_return#36]
-Input [19]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_state#35, ctr_total_return#36, (CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))#49, ctr_state#35#50]
+Output [16]: [c_customer_id#35, c_salutation#37, c_first_name#38, c_last_name#39, ca_street_number#40, ca_street_name#41, ca_street_type#42, ca_suite_number#43, ca_city#44, ca_county#45, ca_state#10, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49, ctr_total_return#18]
+Input [18]: [ctr_customer_sk#16, ctr_total_return#18, c_customer_sk#34, c_customer_id#35, c_salutation#37, c_first_name#38, c_last_name#39, ca_street_number#40, ca_street_name#41, ca_street_type#42, ca_suite_number#43, ca_city#44, ca_county#45, ca_state#10, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49]
 
 (61) TakeOrderedAndProject
-Input [16]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_total_return#36]
-Arguments: 100, [c_customer_id#2 ASC NULLS FIRST, c_salutation#4 ASC NULLS FIRST, c_first_name#5 ASC NULLS FIRST, c_last_name#6 ASC NULLS FIRST, ca_street_number#8 ASC NULLS FIRST, ca_street_name#9 ASC NULLS FIRST, ca_street_type#10 ASC NULLS FIRST, ca_suite_number#11 ASC NULLS FIRST, ca_city#12 ASC NULLS FIRST, ca_county#13 ASC NULLS FIRST, ca_state#14 ASC NULLS FIRST, ca_zip#15 ASC NULLS FIRST, ca_country#16 ASC NULLS FIRST, ca_gmt_offset#17 ASC NULLS FIRST, ca_location_type#18 ASC NULLS FIRST, ctr_total_return#36 ASC NULLS FIRST], [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_total_return#36]
+Input [16]: [c_customer_id#35, c_salutation#37, c_first_name#38, c_last_name#39, ca_street_number#40, ca_street_name#41, ca_street_type#42, ca_suite_number#43, ca_city#44, ca_county#45, ca_state#10, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49, ctr_total_return#18]
+Arguments: 100, [c_customer_id#35 ASC NULLS FIRST, c_salutation#37 ASC NULLS FIRST, c_first_name#38 ASC NULLS FIRST, c_last_name#39 ASC NULLS FIRST, ca_street_number#40 ASC NULLS FIRST, ca_street_name#41 ASC NULLS FIRST, ca_street_type#42 ASC NULLS FIRST, ca_suite_number#43 ASC NULLS FIRST, ca_city#44 ASC NULLS FIRST, ca_county#45 ASC NULLS FIRST, ca_state#10 ASC NULLS FIRST, ca_zip#46 ASC NULLS FIRST, ca_country#47 ASC NULLS FIRST, ca_gmt_offset#48 ASC NULLS FIRST, ca_location_type#49 ASC NULLS FIRST, ctr_total_return#18 ASC NULLS FIRST], [c_customer_id#35, c_salutation#37, c_first_name#38, c_last_name#39, ca_street_number#40, ca_street_name#41, ca_street_type#42, ca_suite_number#43, ca_city#44, ca_county#45, ca_state#10, ca_zip#46, ca_country#47, ca_gmt_offset#48, ca_location_type#49, ctr_total_return#18]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/simplified.txt
index c603ab5194286..99677b6e39736 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/simplified.txt
@@ -1,48 +1,29 @@
 TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type,ctr_total_return]
   WholeStageCodegen (20)
     Project [c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type,ctr_total_return]
-      BroadcastHashJoin [ctr_state,ctr_state,ctr_total_return,(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))]
-        Project [c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type,ctr_state,ctr_total_return]
-          SortMergeJoin [c_customer_sk,ctr_customer_sk]
-            InputAdapter
-              WholeStageCodegen (3)
-                Sort [c_customer_sk]
-                  InputAdapter
-                    Exchange [c_customer_sk] #1
-                      WholeStageCodegen (2)
-                        Project [c_customer_sk,c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type]
-                          BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
-                            Filter [c_customer_sk,c_current_addr_sk]
-                              ColumnarToRow
-                                InputAdapter
-                                  Scan parquet default.customer [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name]
-                            InputAdapter
-                              BroadcastExchange #2
-                                WholeStageCodegen (1)
-                                  Filter [ca_state,ca_address_sk]
-                                    ColumnarToRow
-                                      InputAdapter
-                                        Scan parquet default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type]
-            InputAdapter
-              WholeStageCodegen (11)
-                Sort [ctr_customer_sk]
-                  InputAdapter
-                    Exchange [ctr_customer_sk] #3
-                      WholeStageCodegen (10)
+      SortMergeJoin [ctr_customer_sk,c_customer_sk]
+        InputAdapter
+          WholeStageCodegen (16)
+            Sort [ctr_customer_sk]
+              InputAdapter
+                Exchange [ctr_customer_sk] #1
+                  WholeStageCodegen (15)
+                    Project [ctr_customer_sk,ctr_total_return]
+                      BroadcastHashJoin [ctr_state,ctr_state,ctr_total_return,(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))]
                         Filter [ctr_total_return]
                           HashAggregate [cr_returning_customer_sk,ca_state,sum] [sum(UnscaledValue(cr_return_amt_inc_tax)),ctr_customer_sk,ctr_state,ctr_total_return,sum]
                             InputAdapter
-                              Exchange [cr_returning_customer_sk,ca_state] #4
-                                WholeStageCodegen (9)
+                              Exchange [cr_returning_customer_sk,ca_state] #2
+                                WholeStageCodegen (6)
                                   HashAggregate [cr_returning_customer_sk,ca_state,cr_return_amt_inc_tax] [sum,sum]
                                     Project [cr_returning_customer_sk,cr_return_amt_inc_tax,ca_state]
                                       SortMergeJoin [cr_returning_addr_sk,ca_address_sk]
                                         InputAdapter
-                                          WholeStageCodegen (6)
+                                          WholeStageCodegen (3)
                                             Sort [cr_returning_addr_sk]
                                               InputAdapter
-                                                Exchange [cr_returning_addr_sk] #5
-                                                  WholeStageCodegen (5)
+                                                Exchange [cr_returning_addr_sk] #3
+                                                  WholeStageCodegen (2)
                                                     Project [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax]
                                                       BroadcastHashJoin [cr_returned_date_sk,d_date_sk]
                                                         Filter [cr_returned_date_sk,cr_returning_addr_sk,cr_returning_customer_sk]
@@ -50,55 +31,74 @@ TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,ca_st
                                                             InputAdapter
                                                               Scan parquet default.catalog_returns [cr_returned_date_sk,cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax]
                                                         InputAdapter
-                                                          BroadcastExchange #6
-                                                            WholeStageCodegen (4)
+                                                          BroadcastExchange #4
+                                                            WholeStageCodegen (1)
                                                               Project [d_date_sk]
                                                                 Filter [d_year,d_date_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
                                                                       Scan parquet default.date_dim [d_date_sk,d_year]
                                         InputAdapter
-                                          WholeStageCodegen (8)
+                                          WholeStageCodegen (5)
                                             Sort [ca_address_sk]
                                               InputAdapter
-                                                Exchange [ca_address_sk] #7
-                                                  WholeStageCodegen (7)
+                                                Exchange [ca_address_sk] #5
+                                                  WholeStageCodegen (4)
                                                     Filter [ca_address_sk,ca_state]
                                                       ColumnarToRow
                                                         InputAdapter
                                                           Scan parquet default.customer_address [ca_address_sk,ca_state]
+                        InputAdapter
+                          BroadcastExchange #6
+                            WholeStageCodegen (14)
+                              Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))]
+                                HashAggregate [ctr_state,sum,count] [avg(ctr_total_return),(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),ctr_state,sum,count]
+                                  InputAdapter
+                                    Exchange [ctr_state] #7
+                                      WholeStageCodegen (13)
+                                        HashAggregate [ctr_state,ctr_total_return] [sum,count,sum,count]
+                                          HashAggregate [cr_returning_customer_sk,ca_state,sum] [sum(UnscaledValue(cr_return_amt_inc_tax)),ctr_state,ctr_total_return,sum]
+                                            InputAdapter
+                                              Exchange [cr_returning_customer_sk,ca_state] #8
+                                                WholeStageCodegen (12)
+                                                  HashAggregate [cr_returning_customer_sk,ca_state,cr_return_amt_inc_tax] [sum,sum]
+                                                    Project [cr_returning_customer_sk,cr_return_amt_inc_tax,ca_state]
+                                                      SortMergeJoin [cr_returning_addr_sk,ca_address_sk]
+                                                        InputAdapter
+                                                          WholeStageCodegen (9)
+                                                            Sort [cr_returning_addr_sk]
+                                                              InputAdapter
+                                                                Exchange [cr_returning_addr_sk] #9
+                                                                  WholeStageCodegen (8)
+                                                                    Project [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax]
+                                                                      BroadcastHashJoin [cr_returned_date_sk,d_date_sk]
+                                                                        Filter [cr_returned_date_sk,cr_returning_addr_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet default.catalog_returns [cr_returned_date_sk,cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax]
+                                                                        InputAdapter
+                                                                          ReusedExchange [d_date_sk] #4
+                                                        InputAdapter
+                                                          WholeStageCodegen (11)
+                                                            Sort [ca_address_sk]
+                                                              InputAdapter
+                                                                ReusedExchange [ca_address_sk,ca_state] #5
         InputAdapter
-          BroadcastExchange #8
-            WholeStageCodegen (19)
-              Filter [(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6)))]
-                HashAggregate [ctr_state,sum,count] [avg(ctr_total_return),(CAST(avg(ctr_total_return) AS DECIMAL(21,6)) * CAST(1.2 AS DECIMAL(21,6))),ctr_state,sum,count]
-                  InputAdapter
-                    Exchange [ctr_state] #9
-                      WholeStageCodegen (18)
-                        HashAggregate [ctr_state,ctr_total_return] [sum,count,sum,count]
-                          HashAggregate [cr_returning_customer_sk,ca_state,sum] [sum(UnscaledValue(cr_return_amt_inc_tax)),ctr_state,ctr_total_return,sum]
+          WholeStageCodegen (19)
+            Sort [c_customer_sk]
+              InputAdapter
+                Exchange [c_customer_sk] #10
+                  WholeStageCodegen (18)
+                    Project [c_customer_sk,c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type]
+                      BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
+                        Filter [c_customer_sk,c_current_addr_sk]
+                          ColumnarToRow
                             InputAdapter
-                              Exchange [cr_returning_customer_sk,ca_state] #10
-                                WholeStageCodegen (17)
-                                  HashAggregate [cr_returning_customer_sk,ca_state,cr_return_amt_inc_tax] [sum,sum]
-                                    Project [cr_returning_customer_sk,cr_return_amt_inc_tax,ca_state]
-                                      SortMergeJoin [cr_returning_addr_sk,ca_address_sk]
-                                        InputAdapter
-                                          WholeStageCodegen (14)
-                                            Sort [cr_returning_addr_sk]
-                                              InputAdapter
-                                                Exchange [cr_returning_addr_sk] #11
-                                                  WholeStageCodegen (13)
-                                                    Project [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax]
-                                                      BroadcastHashJoin [cr_returned_date_sk,d_date_sk]
-                                                        Filter [cr_returned_date_sk,cr_returning_addr_sk]
-                                                          ColumnarToRow
-                                                            InputAdapter
-                                                              Scan parquet default.catalog_returns [cr_returned_date_sk,cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax]
-                                                        InputAdapter
-                                                          ReusedExchange [d_date_sk] #6
-                                        InputAdapter
-                                          WholeStageCodegen (16)
-                                            Sort [ca_address_sk]
-                                              InputAdapter
-                                                ReusedExchange [ca_address_sk,ca_state] #7
+                              Scan parquet default.customer [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name]
+                        InputAdapter
+                          BroadcastExchange #11
+                            WholeStageCodegen (17)
+                              Filter [ca_state,ca_address_sk]
+                                ColumnarToRow
+                                  InputAdapter
+                                    Scan parquet default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/explain.txt
index 4e85516b594f7..6bcbe470cec50 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/explain.txt
@@ -8,206 +8,206 @@
                +- * BroadcastHashJoin Inner BuildRight (41)
                   :- * Project (36)
                   :  +- * BroadcastHashJoin Inner BuildRight (35)
-                  :     :- * Project (23)
-                  :     :  +- * BroadcastHashJoin Inner BuildRight (22)
-                  :     :     :- * Project (16)
-                  :     :     :  +- * BroadcastHashJoin Inner BuildRight (15)
-                  :     :     :     :- * Project (9)
-                  :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (8)
-                  :     :     :     :     :- * Filter (3)
-                  :     :     :     :     :  +- * ColumnarToRow (2)
-                  :     :     :     :     :     +- Scan parquet default.customer (1)
-                  :     :     :     :     +- BroadcastExchange (7)
-                  :     :     :     :        +- * Filter (6)
-                  :     :     :     :           +- * ColumnarToRow (5)
-                  :     :     :     :              +- Scan parquet default.customer_demographics (4)
-                  :     :     :     +- BroadcastExchange (14)
-                  :     :     :        +- * Project (13)
-                  :     :     :           +- * Filter (12)
-                  :     :     :              +- * ColumnarToRow (11)
-                  :     :     :                 +- Scan parquet default.household_demographics (10)
-                  :     :     +- BroadcastExchange (21)
-                  :     :        +- * Project (20)
-                  :     :           +- * Filter (19)
-                  :     :              +- * ColumnarToRow (18)
-                  :     :                 +- Scan parquet default.customer_address (17)
+                  :     :- * Project (30)
+                  :     :  +- * BroadcastHashJoin Inner BuildRight (29)
+                  :     :     :- * Project (23)
+                  :     :     :  +- * BroadcastHashJoin Inner BuildRight (22)
+                  :     :     :     :- * Project (16)
+                  :     :     :     :  +- * BroadcastHashJoin Inner BuildLeft (15)
+                  :     :     :     :     :- BroadcastExchange (11)
+                  :     :     :     :     :  +- * Project (10)
+                  :     :     :     :     :     +- * BroadcastHashJoin Inner BuildRight (9)
+                  :     :     :     :     :        :- * Filter (3)
+                  :     :     :     :     :        :  +- * ColumnarToRow (2)
+                  :     :     :     :     :        :     +- Scan parquet default.catalog_returns (1)
+                  :     :     :     :     :        +- BroadcastExchange (8)
+                  :     :     :     :     :           +- * Project (7)
+                  :     :     :     :     :              +- * Filter (6)
+                  :     :     :     :     :                 +- * ColumnarToRow (5)
+                  :     :     :     :     :                    +- Scan parquet default.date_dim (4)
+                  :     :     :     :     +- * Filter (14)
+                  :     :     :     :        +- * ColumnarToRow (13)
+                  :     :     :     :           +- Scan parquet default.customer (12)
+                  :     :     :     +- BroadcastExchange (21)
+                  :     :     :        +- * Project (20)
+                  :     :     :           +- * Filter (19)
+                  :     :     :              +- * ColumnarToRow (18)
+                  :     :     :                 +- Scan parquet default.household_demographics (17)
+                  :     :     +- BroadcastExchange (28)
+                  :     :        +- * Project (27)
+                  :     :           +- * Filter (26)
+                  :     :              +- * ColumnarToRow (25)
+                  :     :                 +- Scan parquet default.customer_address (24)
                   :     +- BroadcastExchange (34)
-                  :        +- * Project (33)
-                  :           +- * BroadcastHashJoin Inner BuildRight (32)
-                  :              :- * Filter (26)
-                  :              :  +- * ColumnarToRow (25)
-                  :              :     +- Scan parquet default.catalog_returns (24)
-                  :              +- BroadcastExchange (31)
-                  :                 +- * Project (30)
-                  :                    +- * Filter (29)
-                  :                       +- * ColumnarToRow (28)
-                  :                          +- Scan parquet default.date_dim (27)
+                  :        +- * Filter (33)
+                  :           +- * ColumnarToRow (32)
+                  :              +- Scan parquet default.customer_demographics (31)
                   +- BroadcastExchange (40)
                      +- * Filter (39)
                         +- * ColumnarToRow (38)
                            +- Scan parquet default.call_center (37)
 
 
-(1) Scan parquet default.customer
-Output [4]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4]
+(1) Scan parquet default.catalog_returns
+Output [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_call_center_sk#3, cr_net_loss#4]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer]
-PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)]
-ReadSchema: struct<c_customer_sk:int,c_current_cdemo_sk:int,c_current_hdemo_sk:int,c_current_addr_sk:int>
+Location [not included in comparison]/{warehouse_dir}/catalog_returns]
+PushedFilters: [IsNotNull(cr_call_center_sk), IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_customer_sk)]
+ReadSchema: struct<cr_returned_date_sk:int,cr_returning_customer_sk:int,cr_call_center_sk:int,cr_net_loss:decimal(7,2)>
 
-(2) ColumnarToRow [codegen id : 7]
-Input [4]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4]
+(2) ColumnarToRow [codegen id : 2]
+Input [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_call_center_sk#3, cr_net_loss#4]
 
-(3) Filter [codegen id : 7]
-Input [4]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4]
-Condition : (((isnotnull(c_customer_sk#1) AND isnotnull(c_current_addr_sk#4)) AND isnotnull(c_current_cdemo_sk#2)) AND isnotnull(c_current_hdemo_sk#3))
+(3) Filter [codegen id : 2]
+Input [4]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_call_center_sk#3, cr_net_loss#4]
+Condition : ((isnotnull(cr_call_center_sk#3) AND isnotnull(cr_returned_date_sk#1)) AND isnotnull(cr_returning_customer_sk#2))
 
-(4) Scan parquet default.customer_demographics
-Output [3]: [cd_demo_sk#5, cd_marital_status#6, cd_education_status#7]
+(4) Scan parquet default.date_dim
+Output [3]: [d_date_sk#5, d_year#6, d_moy#7]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer_demographics]
-PushedFilters: [Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Unknown)),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,Advanced Degree))), IsNotNull(cd_demo_sk)]
-ReadSchema: struct<cd_demo_sk:int,cd_marital_status:string,cd_education_status:string>
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
 (5) ColumnarToRow [codegen id : 1]
-Input [3]: [cd_demo_sk#5, cd_marital_status#6, cd_education_status#7]
+Input [3]: [d_date_sk#5, d_year#6, d_moy#7]
 
 (6) Filter [codegen id : 1]
-Input [3]: [cd_demo_sk#5, cd_marital_status#6, cd_education_status#7]
-Condition : ((((cd_marital_status#6 = M) AND (cd_education_status#7 = Unknown)) OR ((cd_marital_status#6 = W) AND (cd_education_status#7 = Advanced Degree))) AND isnotnull(cd_demo_sk#5))
+Input [3]: [d_date_sk#5, d_year#6, d_moy#7]
+Condition : ((((isnotnull(d_year#6) AND isnotnull(d_moy#7)) AND (d_year#6 = 1998)) AND (d_moy#7 = 11)) AND isnotnull(d_date_sk#5))
+
+(7) Project [codegen id : 1]
+Output [1]: [d_date_sk#5]
+Input [3]: [d_date_sk#5, d_year#6, d_moy#7]
 
-(7) BroadcastExchange
-Input [3]: [cd_demo_sk#5, cd_marital_status#6, cd_education_status#7]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8]
+(8) BroadcastExchange
+Input [1]: [d_date_sk#5]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8]
 
-(8) BroadcastHashJoin [codegen id : 7]
-Left keys [1]: [c_current_cdemo_sk#2]
-Right keys [1]: [cd_demo_sk#5]
+(9) BroadcastHashJoin [codegen id : 2]
+Left keys [1]: [cr_returned_date_sk#1]
+Right keys [1]: [d_date_sk#5]
 Join condition: None
 
-(9) Project [codegen id : 7]
-Output [5]: [c_customer_sk#1, c_current_hdemo_sk#3, c_current_addr_sk#4, cd_marital_status#6, cd_education_status#7]
-Input [7]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, cd_demo_sk#5, cd_marital_status#6, cd_education_status#7]
+(10) Project [codegen id : 2]
+Output [3]: [cr_returning_customer_sk#2, cr_call_center_sk#3, cr_net_loss#4]
+Input [5]: [cr_returned_date_sk#1, cr_returning_customer_sk#2, cr_call_center_sk#3, cr_net_loss#4, d_date_sk#5]
 
-(10) Scan parquet default.household_demographics
-Output [2]: [hd_demo_sk#9, hd_buy_potential#10]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/household_demographics]
-PushedFilters: [IsNotNull(hd_buy_potential), StringStartsWith(hd_buy_potential,Unknown), IsNotNull(hd_demo_sk)]
-ReadSchema: struct<hd_demo_sk:int,hd_buy_potential:string>
+(11) BroadcastExchange
+Input [3]: [cr_returning_customer_sk#2, cr_call_center_sk#3, cr_net_loss#4]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9]
 
-(11) ColumnarToRow [codegen id : 2]
-Input [2]: [hd_demo_sk#9, hd_buy_potential#10]
-
-(12) Filter [codegen id : 2]
-Input [2]: [hd_demo_sk#9, hd_buy_potential#10]
-Condition : ((isnotnull(hd_buy_potential#10) AND StartsWith(hd_buy_potential#10, Unknown)) AND isnotnull(hd_demo_sk#9))
+(12) Scan parquet default.customer
+Output [4]: [c_customer_sk#10, c_current_cdemo_sk#11, c_current_hdemo_sk#12, c_current_addr_sk#13]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/customer]
+PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)]
+ReadSchema: struct<c_customer_sk:int,c_current_cdemo_sk:int,c_current_hdemo_sk:int,c_current_addr_sk:int>
 
-(13) Project [codegen id : 2]
-Output [1]: [hd_demo_sk#9]
-Input [2]: [hd_demo_sk#9, hd_buy_potential#10]
+(13) ColumnarToRow
+Input [4]: [c_customer_sk#10, c_current_cdemo_sk#11, c_current_hdemo_sk#12, c_current_addr_sk#13]
 
-(14) BroadcastExchange
-Input [1]: [hd_demo_sk#9]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11]
+(14) Filter
+Input [4]: [c_customer_sk#10, c_current_cdemo_sk#11, c_current_hdemo_sk#12, c_current_addr_sk#13]
+Condition : (((isnotnull(c_customer_sk#10) AND isnotnull(c_current_addr_sk#13)) AND isnotnull(c_current_cdemo_sk#11)) AND isnotnull(c_current_hdemo_sk#12))
 
 (15) BroadcastHashJoin [codegen id : 7]
-Left keys [1]: [c_current_hdemo_sk#3]
-Right keys [1]: [hd_demo_sk#9]
+Left keys [1]: [cr_returning_customer_sk#2]
+Right keys [1]: [c_customer_sk#10]
 Join condition: None
 
 (16) Project [codegen id : 7]
-Output [4]: [c_customer_sk#1, c_current_addr_sk#4, cd_marital_status#6, cd_education_status#7]
-Input [6]: [c_customer_sk#1, c_current_hdemo_sk#3, c_current_addr_sk#4, cd_marital_status#6, cd_education_status#7, hd_demo_sk#9]
+Output [5]: [cr_call_center_sk#3, cr_net_loss#4, c_current_cdemo_sk#11, c_current_hdemo_sk#12, c_current_addr_sk#13]
+Input [7]: [cr_returning_customer_sk#2, cr_call_center_sk#3, cr_net_loss#4, c_customer_sk#10, c_current_cdemo_sk#11, c_current_hdemo_sk#12, c_current_addr_sk#13]
 
-(17) Scan parquet default.customer_address
-Output [2]: [ca_address_sk#12, ca_gmt_offset#13]
+(17) Scan parquet default.household_demographics
+Output [2]: [hd_demo_sk#14, hd_buy_potential#15]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer_address]
-PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-7.00), IsNotNull(ca_address_sk)]
-ReadSchema: struct<ca_address_sk:int,ca_gmt_offset:decimal(5,2)>
+Location [not included in comparison]/{warehouse_dir}/household_demographics]
+PushedFilters: [IsNotNull(hd_buy_potential), StringStartsWith(hd_buy_potential,Unknown), IsNotNull(hd_demo_sk)]
+ReadSchema: struct<hd_demo_sk:int,hd_buy_potential:string>
 
 (18) ColumnarToRow [codegen id : 3]
-Input [2]: [ca_address_sk#12, ca_gmt_offset#13]
+Input [2]: [hd_demo_sk#14, hd_buy_potential#15]
 
 (19) Filter [codegen id : 3]
-Input [2]: [ca_address_sk#12, ca_gmt_offset#13]
-Condition : ((isnotnull(ca_gmt_offset#13) AND (ca_gmt_offset#13 = -7.00)) AND isnotnull(ca_address_sk#12))
+Input [2]: [hd_demo_sk#14, hd_buy_potential#15]
+Condition : ((isnotnull(hd_buy_potential#15) AND StartsWith(hd_buy_potential#15, Unknown)) AND isnotnull(hd_demo_sk#14))
 
 (20) Project [codegen id : 3]
-Output [1]: [ca_address_sk#12]
-Input [2]: [ca_address_sk#12, ca_gmt_offset#13]
+Output [1]: [hd_demo_sk#14]
+Input [2]: [hd_demo_sk#14, hd_buy_potential#15]
 
 (21) BroadcastExchange
-Input [1]: [ca_address_sk#12]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14]
+Input [1]: [hd_demo_sk#14]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16]
 
 (22) BroadcastHashJoin [codegen id : 7]
-Left keys [1]: [c_current_addr_sk#4]
-Right keys [1]: [ca_address_sk#12]
+Left keys [1]: [c_current_hdemo_sk#12]
+Right keys [1]: [hd_demo_sk#14]
 Join condition: None
 
 (23) Project [codegen id : 7]
-Output [3]: [c_customer_sk#1, cd_marital_status#6, cd_education_status#7]
-Input [5]: [c_customer_sk#1, c_current_addr_sk#4, cd_marital_status#6, cd_education_status#7, ca_address_sk#12]
+Output [4]: [cr_call_center_sk#3, cr_net_loss#4, c_current_cdemo_sk#11, c_current_addr_sk#13]
+Input [6]: [cr_call_center_sk#3, cr_net_loss#4, c_current_cdemo_sk#11, c_current_hdemo_sk#12, c_current_addr_sk#13, hd_demo_sk#14]
 
-(24) Scan parquet default.catalog_returns
-Output [4]: [cr_returned_date_sk#15, cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18]
+(24) Scan parquet default.customer_address
+Output [2]: [ca_address_sk#17, ca_gmt_offset#18]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/catalog_returns]
-PushedFilters: [IsNotNull(cr_call_center_sk), IsNotNull(cr_returned_date_sk), IsNotNull(cr_returning_customer_sk)]
-ReadSchema: struct<cr_returned_date_sk:int,cr_returning_customer_sk:int,cr_call_center_sk:int,cr_net_loss:decimal(7,2)>
+Location [not included in comparison]/{warehouse_dir}/customer_address]
+PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-7.00), IsNotNull(ca_address_sk)]
+ReadSchema: struct<ca_address_sk:int,ca_gmt_offset:decimal(5,2)>
 
-(25) ColumnarToRow [codegen id : 5]
-Input [4]: [cr_returned_date_sk#15, cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18]
+(25) ColumnarToRow [codegen id : 4]
+Input [2]: [ca_address_sk#17, ca_gmt_offset#18]
 
-(26) Filter [codegen id : 5]
-Input [4]: [cr_returned_date_sk#15, cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18]
-Condition : ((isnotnull(cr_call_center_sk#17) AND isnotnull(cr_returned_date_sk#15)) AND isnotnull(cr_returning_customer_sk#16))
+(26) Filter [codegen id : 4]
+Input [2]: [ca_address_sk#17, ca_gmt_offset#18]
+Condition : ((isnotnull(ca_gmt_offset#18) AND (ca_gmt_offset#18 = -7.00)) AND isnotnull(ca_address_sk#17))
 
-(27) Scan parquet default.date_dim
-Output [3]: [d_date_sk#19, d_year#20, d_moy#21]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
+(27) Project [codegen id : 4]
+Output [1]: [ca_address_sk#17]
+Input [2]: [ca_address_sk#17, ca_gmt_offset#18]
 
-(28) ColumnarToRow [codegen id : 4]
-Input [3]: [d_date_sk#19, d_year#20, d_moy#21]
+(28) BroadcastExchange
+Input [1]: [ca_address_sk#17]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19]
 
-(29) Filter [codegen id : 4]
-Input [3]: [d_date_sk#19, d_year#20, d_moy#21]
-Condition : ((((isnotnull(d_year#20) AND isnotnull(d_moy#21)) AND (d_year#20 = 1998)) AND (d_moy#21 = 11)) AND isnotnull(d_date_sk#19))
+(29) BroadcastHashJoin [codegen id : 7]
+Left keys [1]: [c_current_addr_sk#13]
+Right keys [1]: [ca_address_sk#17]
+Join condition: None
 
-(30) Project [codegen id : 4]
-Output [1]: [d_date_sk#19]
-Input [3]: [d_date_sk#19, d_year#20, d_moy#21]
+(30) Project [codegen id : 7]
+Output [3]: [cr_call_center_sk#3, cr_net_loss#4, c_current_cdemo_sk#11]
+Input [5]: [cr_call_center_sk#3, cr_net_loss#4, c_current_cdemo_sk#11, c_current_addr_sk#13, ca_address_sk#17]
 
-(31) BroadcastExchange
-Input [1]: [d_date_sk#19]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22]
+(31) Scan parquet default.customer_demographics
+Output [3]: [cd_demo_sk#20, cd_marital_status#21, cd_education_status#22]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/customer_demographics]
+PushedFilters: [Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Unknown)),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,Advanced Degree))), IsNotNull(cd_demo_sk)]
+ReadSchema: struct<cd_demo_sk:int,cd_marital_status:string,cd_education_status:string>
 
-(32) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [cr_returned_date_sk#15]
-Right keys [1]: [d_date_sk#19]
-Join condition: None
+(32) ColumnarToRow [codegen id : 5]
+Input [3]: [cd_demo_sk#20, cd_marital_status#21, cd_education_status#22]
 
-(33) Project [codegen id : 5]
-Output [3]: [cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18]
-Input [5]: [cr_returned_date_sk#15, cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18, d_date_sk#19]
+(33) Filter [codegen id : 5]
+Input [3]: [cd_demo_sk#20, cd_marital_status#21, cd_education_status#22]
+Condition : ((((cd_marital_status#21 = M) AND (cd_education_status#22 = Unknown)) OR ((cd_marital_status#21 = W) AND (cd_education_status#22 = Advanced Degree))) AND isnotnull(cd_demo_sk#20))
 
 (34) BroadcastExchange
-Input [3]: [cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23]
+Input [3]: [cd_demo_sk#20, cd_marital_status#21, cd_education_status#22]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23]
 
 (35) BroadcastHashJoin [codegen id : 7]
-Left keys [1]: [c_customer_sk#1]
-Right keys [1]: [cr_returning_customer_sk#16]
+Left keys [1]: [c_current_cdemo_sk#11]
+Right keys [1]: [cd_demo_sk#20]
 Join condition: None
 
 (36) Project [codegen id : 7]
-Output [4]: [cd_marital_status#6, cd_education_status#7, cr_call_center_sk#17, cr_net_loss#18]
-Input [6]: [c_customer_sk#1, cd_marital_status#6, cd_education_status#7, cr_returning_customer_sk#16, cr_call_center_sk#17, cr_net_loss#18]
+Output [4]: [cr_call_center_sk#3, cr_net_loss#4, cd_marital_status#21, cd_education_status#22]
+Input [6]: [cr_call_center_sk#3, cr_net_loss#4, c_current_cdemo_sk#11, cd_demo_sk#20, cd_marital_status#21, cd_education_status#22]
 
 (37) Scan parquet default.call_center
 Output [4]: [cc_call_center_sk#24, cc_call_center_id#25, cc_name#26, cc_manager#27]
@@ -228,35 +228,35 @@ Input [4]: [cc_call_center_sk#24, cc_call_center_id#25, cc_name#26, cc_manager#2
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#28]
 
 (41) BroadcastHashJoin [codegen id : 7]
-Left keys [1]: [cr_call_center_sk#17]
+Left keys [1]: [cr_call_center_sk#3]
 Right keys [1]: [cc_call_center_sk#24]
 Join condition: None
 
 (42) Project [codegen id : 7]
-Output [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cr_net_loss#18, cd_marital_status#6, cd_education_status#7]
-Input [8]: [cd_marital_status#6, cd_education_status#7, cr_call_center_sk#17, cr_net_loss#18, cc_call_center_sk#24, cc_call_center_id#25, cc_name#26, cc_manager#27]
+Output [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cr_net_loss#4, cd_marital_status#21, cd_education_status#22]
+Input [8]: [cr_call_center_sk#3, cr_net_loss#4, cd_marital_status#21, cd_education_status#22, cc_call_center_sk#24, cc_call_center_id#25, cc_name#26, cc_manager#27]
 
 (43) HashAggregate [codegen id : 7]
-Input [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cr_net_loss#18, cd_marital_status#6, cd_education_status#7]
-Keys [5]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#6, cd_education_status#7]
-Functions [1]: [partial_sum(UnscaledValue(cr_net_loss#18))]
+Input [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cr_net_loss#4, cd_marital_status#21, cd_education_status#22]
+Keys [5]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#21, cd_education_status#22]
+Functions [1]: [partial_sum(UnscaledValue(cr_net_loss#4))]
 Aggregate Attributes [1]: [sum#29]
-Results [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#6, cd_education_status#7, sum#30]
+Results [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#21, cd_education_status#22, sum#30]
 
 (44) Exchange
-Input [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#6, cd_education_status#7, sum#30]
-Arguments: hashpartitioning(cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#6, cd_education_status#7, 5), true, [id=#31]
+Input [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#21, cd_education_status#22, sum#30]
+Arguments: hashpartitioning(cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#21, cd_education_status#22, 5), ENSURE_REQUIREMENTS, [id=#31]
 
 (45) HashAggregate [codegen id : 8]
-Input [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#6, cd_education_status#7, sum#30]
-Keys [5]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#6, cd_education_status#7]
-Functions [1]: [sum(UnscaledValue(cr_net_loss#18))]
-Aggregate Attributes [1]: [sum(UnscaledValue(cr_net_loss#18))#32]
-Results [4]: [cc_call_center_id#25 AS Call_Center#33, cc_name#26 AS Call_Center_Name#34, cc_manager#27 AS Manager#35, MakeDecimal(sum(UnscaledValue(cr_net_loss#18))#32,17,2) AS Returns_Loss#36]
+Input [6]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#21, cd_education_status#22, sum#30]
+Keys [5]: [cc_call_center_id#25, cc_name#26, cc_manager#27, cd_marital_status#21, cd_education_status#22]
+Functions [1]: [sum(UnscaledValue(cr_net_loss#4))]
+Aggregate Attributes [1]: [sum(UnscaledValue(cr_net_loss#4))#32]
+Results [4]: [cc_call_center_id#25 AS Call_Center#33, cc_name#26 AS Call_Center_Name#34, cc_manager#27 AS Manager#35, MakeDecimal(sum(UnscaledValue(cr_net_loss#4))#32,17,2) AS Returns_Loss#36]
 
 (46) Exchange
 Input [4]: [Call_Center#33, Call_Center_Name#34, Manager#35, Returns_Loss#36]
-Arguments: rangepartitioning(Returns_Loss#36 DESC NULLS LAST, 5), true, [id=#37]
+Arguments: rangepartitioning(Returns_Loss#36 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#37]
 
 (47) Sort [codegen id : 9]
 Input [4]: [Call_Center#33, Call_Center_Name#34, Manager#35, Returns_Loss#36]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/simplified.txt
index 87beb3b565cc1..6c8d629feed3e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/simplified.txt
@@ -10,58 +10,58 @@ WholeStageCodegen (9)
                   HashAggregate [cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status,cr_net_loss] [sum,sum]
                     Project [cc_call_center_id,cc_name,cc_manager,cr_net_loss,cd_marital_status,cd_education_status]
                       BroadcastHashJoin [cr_call_center_sk,cc_call_center_sk]
-                        Project [cd_marital_status,cd_education_status,cr_call_center_sk,cr_net_loss]
-                          BroadcastHashJoin [c_customer_sk,cr_returning_customer_sk]
-                            Project [c_customer_sk,cd_marital_status,cd_education_status]
+                        Project [cr_call_center_sk,cr_net_loss,cd_marital_status,cd_education_status]
+                          BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk]
+                            Project [cr_call_center_sk,cr_net_loss,c_current_cdemo_sk]
                               BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
-                                Project [c_customer_sk,c_current_addr_sk,cd_marital_status,cd_education_status]
+                                Project [cr_call_center_sk,cr_net_loss,c_current_cdemo_sk,c_current_addr_sk]
                                   BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk]
-                                    Project [c_customer_sk,c_current_hdemo_sk,c_current_addr_sk,cd_marital_status,cd_education_status]
-                                      BroadcastHashJoin [c_current_cdemo_sk,cd_demo_sk]
+                                    Project [cr_call_center_sk,cr_net_loss,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk]
+                                      BroadcastHashJoin [cr_returning_customer_sk,c_customer_sk]
+                                        InputAdapter
+                                          BroadcastExchange #3
+                                            WholeStageCodegen (2)
+                                              Project [cr_returning_customer_sk,cr_call_center_sk,cr_net_loss]
+                                                BroadcastHashJoin [cr_returned_date_sk,d_date_sk]
+                                                  Filter [cr_call_center_sk,cr_returned_date_sk,cr_returning_customer_sk]
+                                                    ColumnarToRow
+                                                      InputAdapter
+                                                        Scan parquet default.catalog_returns [cr_returned_date_sk,cr_returning_customer_sk,cr_call_center_sk,cr_net_loss]
+                                                  InputAdapter
+                                                    BroadcastExchange #4
+                                                      WholeStageCodegen (1)
+                                                        Project [d_date_sk]
+                                                          Filter [d_year,d_moy,d_date_sk]
+                                                            ColumnarToRow
+                                                              InputAdapter
+                                                                Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
                                         Filter [c_customer_sk,c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk]
                                           ColumnarToRow
                                             InputAdapter
                                               Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk]
-                                        InputAdapter
-                                          BroadcastExchange #3
-                                            WholeStageCodegen (1)
-                                              Filter [cd_marital_status,cd_education_status,cd_demo_sk]
-                                                ColumnarToRow
-                                                  InputAdapter
-                                                    Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
                                     InputAdapter
-                                      BroadcastExchange #4
-                                        WholeStageCodegen (2)
+                                      BroadcastExchange #5
+                                        WholeStageCodegen (3)
                                           Project [hd_demo_sk]
                                             Filter [hd_buy_potential,hd_demo_sk]
                                               ColumnarToRow
                                                 InputAdapter
                                                   Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential]
                                 InputAdapter
-                                  BroadcastExchange #5
-                                    WholeStageCodegen (3)
+                                  BroadcastExchange #6
+                                    WholeStageCodegen (4)
                                       Project [ca_address_sk]
                                         Filter [ca_gmt_offset,ca_address_sk]
                                           ColumnarToRow
                                             InputAdapter
                                               Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset]
                             InputAdapter
-                              BroadcastExchange #6
+                              BroadcastExchange #7
                                 WholeStageCodegen (5)
-                                  Project [cr_returning_customer_sk,cr_call_center_sk,cr_net_loss]
-                                    BroadcastHashJoin [cr_returned_date_sk,d_date_sk]
-                                      Filter [cr_call_center_sk,cr_returned_date_sk,cr_returning_customer_sk]
-                                        ColumnarToRow
-                                          InputAdapter
-                                            Scan parquet default.catalog_returns [cr_returned_date_sk,cr_returning_customer_sk,cr_call_center_sk,cr_net_loss]
+                                  Filter [cd_marital_status,cd_education_status,cd_demo_sk]
+                                    ColumnarToRow
                                       InputAdapter
-                                        BroadcastExchange #7
-                                          WholeStageCodegen (4)
-                                            Project [d_date_sk]
-                                              Filter [d_year,d_moy,d_date_sk]
-                                                ColumnarToRow
-                                                  InputAdapter
-                                                    Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                        Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
                         InputAdapter
                           BroadcastExchange #8
                             WholeStageCodegen (6)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/explain.txt
index 2d76deefcaa36..f6c5258701525 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/explain.txt
@@ -34,24 +34,24 @@ TakeOrderedAndProject (160)
    :              +- * Sort (46)
    :                 +- Exchange (45)
    :                    +- * Project (44)
-   :                       +- * SortMergeJoin Inner (43)
-   :                          :- * Sort (37)
-   :                          :  +- Exchange (36)
-   :                          :     +- * Project (35)
-   :                          :        +- * BroadcastHashJoin Inner BuildRight (34)
-   :                          :           :- * Project (29)
-   :                          :           :  +- * Filter (28)
-   :                          :           :     +- * ColumnarToRow (27)
-   :                          :           :        +- Scan parquet default.customer (26)
-   :                          :           +- BroadcastExchange (33)
-   :                          :              +- * Filter (32)
-   :                          :                 +- * ColumnarToRow (31)
-   :                          :                    +- Scan parquet default.customer_address (30)
-   :                          +- * Sort (42)
-   :                             +- Exchange (41)
-   :                                +- * Filter (40)
-   :                                   +- * ColumnarToRow (39)
-   :                                      +- Scan parquet default.customer_demographics (38)
+   :                       +- * BroadcastHashJoin Inner BuildRight (43)
+   :                          :- * Project (38)
+   :                          :  +- * SortMergeJoin Inner (37)
+   :                          :     :- * Sort (31)
+   :                          :     :  +- Exchange (30)
+   :                          :     :     +- * Project (29)
+   :                          :     :        +- * Filter (28)
+   :                          :     :           +- * ColumnarToRow (27)
+   :                          :     :              +- Scan parquet default.customer (26)
+   :                          :     +- * Sort (36)
+   :                          :        +- Exchange (35)
+   :                          :           +- * Filter (34)
+   :                          :              +- * ColumnarToRow (33)
+   :                          :                 +- Scan parquet default.customer_demographics (32)
+   :                          +- BroadcastExchange (42)
+   :                             +- * Filter (41)
+   :                                +- * ColumnarToRow (40)
+   :                                   +- Scan parquet default.customer_address (39)
    :- * HashAggregate (76)
    :  +- Exchange (75)
    :     +- * HashAggregate (74)
@@ -266,7 +266,7 @@ Input [10]: [cs_bill_customer_sk#2, cs_item_sk#4, cs_quantity#5, cs_list_price#6
 
 (24) Exchange
 Input [8]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19]
-Arguments: hashpartitioning(cs_bill_customer_sk#2, 5), true, [id=#21]
+Arguments: hashpartitioning(cs_bill_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#21]
 
 (25) Sort [codegen id : 5]
 Input [8]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19]
@@ -279,89 +279,89 @@ Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [In(c_birth_month, [9,5,12,4,1,10]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_addr_sk)]
 ReadSchema: struct<c_customer_sk:int,c_current_cdemo_sk:int,c_current_addr_sk:int,c_birth_month:int,c_birth_year:int>
 
-(27) ColumnarToRow [codegen id : 7]
+(27) ColumnarToRow [codegen id : 6]
 Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26]
 
-(28) Filter [codegen id : 7]
+(28) Filter [codegen id : 6]
 Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26]
 Condition : (((c_birth_month#25 IN (9,5,12,4,1,10) AND isnotnull(c_customer_sk#22)) AND isnotnull(c_current_cdemo_sk#23)) AND isnotnull(c_current_addr_sk#24))
 
-(29) Project [codegen id : 7]
+(29) Project [codegen id : 6]
 Output [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26]
 Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26]
 
-(30) Scan parquet default.customer_address
-Output [4]: [ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer_address]
-PushedFilters: [In(ca_state, [ND,WI,AL,NC,OK,MS,TN]), IsNotNull(ca_address_sk)]
-ReadSchema: struct<ca_address_sk:int,ca_county:string,ca_state:string,ca_country:string>
-
-(31) ColumnarToRow [codegen id : 6]
-Input [4]: [ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30]
-
-(32) Filter [codegen id : 6]
-Input [4]: [ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30]
-Condition : (ca_state#29 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#27))
-
-(33) BroadcastExchange
-Input [4]: [ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#31]
-
-(34) BroadcastHashJoin [codegen id : 7]
-Left keys [1]: [c_current_addr_sk#24]
-Right keys [1]: [ca_address_sk#27]
-Join condition: None
-
-(35) Project [codegen id : 7]
-Output [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30]
-Input [8]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26, ca_address_sk#27, ca_county#28, ca_state#29, ca_country#30]
-
-(36) Exchange
-Input [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30]
-Arguments: hashpartitioning(c_current_cdemo_sk#23, 5), true, [id=#32]
+(30) Exchange
+Input [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26]
+Arguments: hashpartitioning(c_current_cdemo_sk#23, 5), ENSURE_REQUIREMENTS, [id=#27]
 
-(37) Sort [codegen id : 8]
-Input [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30]
+(31) Sort [codegen id : 7]
+Input [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26]
 Arguments: [c_current_cdemo_sk#23 ASC NULLS FIRST], false, 0
 
-(38) Scan parquet default.customer_demographics
-Output [1]: [cd_demo_sk#33]
+(32) Scan parquet default.customer_demographics
+Output [1]: [cd_demo_sk#28]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
 PushedFilters: [IsNotNull(cd_demo_sk)]
 ReadSchema: struct<cd_demo_sk:int>
 
-(39) ColumnarToRow [codegen id : 9]
-Input [1]: [cd_demo_sk#33]
+(33) ColumnarToRow [codegen id : 8]
+Input [1]: [cd_demo_sk#28]
 
-(40) Filter [codegen id : 9]
-Input [1]: [cd_demo_sk#33]
-Condition : isnotnull(cd_demo_sk#33)
+(34) Filter [codegen id : 8]
+Input [1]: [cd_demo_sk#28]
+Condition : isnotnull(cd_demo_sk#28)
 
-(41) Exchange
-Input [1]: [cd_demo_sk#33]
-Arguments: hashpartitioning(cd_demo_sk#33, 5), true, [id=#34]
+(35) Exchange
+Input [1]: [cd_demo_sk#28]
+Arguments: hashpartitioning(cd_demo_sk#28, 5), ENSURE_REQUIREMENTS, [id=#29]
 
-(42) Sort [codegen id : 10]
-Input [1]: [cd_demo_sk#33]
-Arguments: [cd_demo_sk#33 ASC NULLS FIRST], false, 0
+(36) Sort [codegen id : 9]
+Input [1]: [cd_demo_sk#28]
+Arguments: [cd_demo_sk#28 ASC NULLS FIRST], false, 0
 
-(43) SortMergeJoin [codegen id : 11]
+(37) SortMergeJoin [codegen id : 11]
 Left keys [1]: [c_current_cdemo_sk#23]
-Right keys [1]: [cd_demo_sk#33]
+Right keys [1]: [cd_demo_sk#28]
+Join condition: None
+
+(38) Project [codegen id : 11]
+Output [3]: [c_customer_sk#22, c_current_addr_sk#24, c_birth_year#26]
+Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26, cd_demo_sk#28]
+
+(39) Scan parquet default.customer_address
+Output [4]: [ca_address_sk#30, ca_county#31, ca_state#32, ca_country#33]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/customer_address]
+PushedFilters: [In(ca_state, [ND,WI,AL,NC,OK,MS,TN]), IsNotNull(ca_address_sk)]
+ReadSchema: struct<ca_address_sk:int,ca_county:string,ca_state:string,ca_country:string>
+
+(40) ColumnarToRow [codegen id : 10]
+Input [4]: [ca_address_sk#30, ca_county#31, ca_state#32, ca_country#33]
+
+(41) Filter [codegen id : 10]
+Input [4]: [ca_address_sk#30, ca_county#31, ca_state#32, ca_country#33]
+Condition : (ca_state#32 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#30))
+
+(42) BroadcastExchange
+Input [4]: [ca_address_sk#30, ca_county#31, ca_state#32, ca_country#33]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#34]
+
+(43) BroadcastHashJoin [codegen id : 11]
+Left keys [1]: [c_current_addr_sk#24]
+Right keys [1]: [ca_address_sk#30]
 Join condition: None
 
 (44) Project [codegen id : 11]
-Output [5]: [c_customer_sk#22, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30]
-Input [7]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30, cd_demo_sk#33]
+Output [5]: [c_customer_sk#22, c_birth_year#26, ca_county#31, ca_state#32, ca_country#33]
+Input [7]: [c_customer_sk#22, c_current_addr_sk#24, c_birth_year#26, ca_address_sk#30, ca_county#31, ca_state#32, ca_country#33]
 
 (45) Exchange
-Input [5]: [c_customer_sk#22, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30]
-Arguments: hashpartitioning(c_customer_sk#22, 5), true, [id=#35]
+Input [5]: [c_customer_sk#22, c_birth_year#26, ca_county#31, ca_state#32, ca_country#33]
+Arguments: hashpartitioning(c_customer_sk#22, 5), ENSURE_REQUIREMENTS, [id=#35]
 
 (46) Sort [codegen id : 12]
-Input [5]: [c_customer_sk#22, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30]
+Input [5]: [c_customer_sk#22, c_birth_year#26, ca_county#31, ca_state#32, ca_country#33]
 Arguments: [c_customer_sk#22 ASC NULLS FIRST], false, 0
 
 (47) SortMergeJoin [codegen id : 13]
@@ -370,26 +370,26 @@ Right keys [1]: [c_customer_sk#22]
 Join condition: None
 
 (48) Project [codegen id : 13]
-Output [11]: [i_item_id#19, ca_country#30, ca_state#29, ca_county#28, cast(cs_quantity#5 as decimal(12,2)) AS agg1#36, cast(cs_list_price#6 as decimal(12,2)) AS agg2#37, cast(cs_coupon_amt#8 as decimal(12,2)) AS agg3#38, cast(cs_sales_price#7 as decimal(12,2)) AS agg4#39, cast(cs_net_profit#9 as decimal(12,2)) AS agg5#40, cast(c_birth_year#26 as decimal(12,2)) AS agg6#41, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#42]
-Input [13]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19, c_customer_sk#22, c_birth_year#26, ca_county#28, ca_state#29, ca_country#30]
+Output [11]: [i_item_id#19, ca_country#33, ca_state#32, ca_county#31, cast(cs_quantity#5 as decimal(12,2)) AS agg1#36, cast(cs_list_price#6 as decimal(12,2)) AS agg2#37, cast(cs_coupon_amt#8 as decimal(12,2)) AS agg3#38, cast(cs_sales_price#7 as decimal(12,2)) AS agg4#39, cast(cs_net_profit#9 as decimal(12,2)) AS agg5#40, cast(c_birth_year#26 as decimal(12,2)) AS agg6#41, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#42]
+Input [13]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19, c_customer_sk#22, c_birth_year#26, ca_county#31, ca_state#32, ca_country#33]
 
 (49) HashAggregate [codegen id : 13]
-Input [11]: [i_item_id#19, ca_country#30, ca_state#29, ca_county#28, agg1#36, agg2#37, agg3#38, agg4#39, agg5#40, agg6#41, agg7#42]
-Keys [4]: [i_item_id#19, ca_country#30, ca_state#29, ca_county#28]
+Input [11]: [i_item_id#19, ca_country#33, ca_state#32, ca_county#31, agg1#36, agg2#37, agg3#38, agg4#39, agg5#40, agg6#41, agg7#42]
+Keys [4]: [i_item_id#19, ca_country#33, ca_state#32, ca_county#31]
 Functions [7]: [partial_avg(agg1#36), partial_avg(agg2#37), partial_avg(agg3#38), partial_avg(agg4#39), partial_avg(agg5#40), partial_avg(agg6#41), partial_avg(agg7#42)]
 Aggregate Attributes [14]: [sum#43, count#44, sum#45, count#46, sum#47, count#48, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56]
-Results [18]: [i_item_id#19, ca_country#30, ca_state#29, ca_county#28, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68, sum#69, count#70]
+Results [18]: [i_item_id#19, ca_country#33, ca_state#32, ca_county#31, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68, sum#69, count#70]
 
 (50) Exchange
-Input [18]: [i_item_id#19, ca_country#30, ca_state#29, ca_county#28, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68, sum#69, count#70]
-Arguments: hashpartitioning(i_item_id#19, ca_country#30, ca_state#29, ca_county#28, 5), true, [id=#71]
+Input [18]: [i_item_id#19, ca_country#33, ca_state#32, ca_county#31, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68, sum#69, count#70]
+Arguments: hashpartitioning(i_item_id#19, ca_country#33, ca_state#32, ca_county#31, 5), ENSURE_REQUIREMENTS, [id=#71]
 
 (51) HashAggregate [codegen id : 14]
-Input [18]: [i_item_id#19, ca_country#30, ca_state#29, ca_county#28, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68, sum#69, count#70]
-Keys [4]: [i_item_id#19, ca_country#30, ca_state#29, ca_county#28]
+Input [18]: [i_item_id#19, ca_country#33, ca_state#32, ca_county#31, sum#57, count#58, sum#59, count#60, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68, sum#69, count#70]
+Keys [4]: [i_item_id#19, ca_country#33, ca_state#32, ca_county#31]
 Functions [7]: [avg(agg1#36), avg(agg2#37), avg(agg3#38), avg(agg4#39), avg(agg5#40), avg(agg6#41), avg(agg7#42)]
 Aggregate Attributes [7]: [avg(agg1#36)#72, avg(agg2#37)#73, avg(agg3#38)#74, avg(agg4#39)#75, avg(agg5#40)#76, avg(agg6#41)#77, avg(agg7#42)#78]
-Results [11]: [i_item_id#19, ca_country#30, ca_state#29, ca_county#28, avg(agg1#36)#72 AS agg1#79, avg(agg2#37)#73 AS agg2#80, avg(agg3#38)#74 AS agg3#81, avg(agg4#39)#75 AS agg4#82, avg(agg5#40)#76 AS agg5#83, avg(agg6#41)#77 AS agg6#84, avg(agg7#42)#78 AS agg7#85]
+Results [11]: [i_item_id#19, ca_country#33, ca_state#32, ca_county#31, avg(agg1#36)#72 AS agg1#79, avg(agg2#37)#73 AS agg2#80, avg(agg3#38)#74 AS agg3#81, avg(agg4#39)#75 AS agg4#82, avg(agg5#40)#76 AS agg5#83, avg(agg6#41)#77 AS agg6#84, avg(agg7#42)#78 AS agg7#85]
 
 (52) ReusedExchange [Reuses operator id: 24]
 Output [8]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19]
@@ -417,41 +417,41 @@ Output [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_bi
 Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26]
 
 (58) Scan parquet default.customer_address
-Output [3]: [ca_address_sk#27, ca_state#29, ca_country#30]
+Output [3]: [ca_address_sk#30, ca_state#32, ca_country#33]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
 PushedFilters: [In(ca_state, [ND,WI,AL,NC,OK,MS,TN]), IsNotNull(ca_address_sk)]
 ReadSchema: struct<ca_address_sk:int,ca_state:string,ca_country:string>
 
 (59) ColumnarToRow [codegen id : 20]
-Input [3]: [ca_address_sk#27, ca_state#29, ca_country#30]
+Input [3]: [ca_address_sk#30, ca_state#32, ca_country#33]
 
 (60) Filter [codegen id : 20]
-Input [3]: [ca_address_sk#27, ca_state#29, ca_country#30]
-Condition : (ca_state#29 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#27))
+Input [3]: [ca_address_sk#30, ca_state#32, ca_country#33]
+Condition : (ca_state#32 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#30))
 
 (61) BroadcastExchange
-Input [3]: [ca_address_sk#27, ca_state#29, ca_country#30]
+Input [3]: [ca_address_sk#30, ca_state#32, ca_country#33]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#86]
 
 (62) BroadcastHashJoin [codegen id : 21]
 Left keys [1]: [c_current_addr_sk#24]
-Right keys [1]: [ca_address_sk#27]
+Right keys [1]: [ca_address_sk#30]
 Join condition: None
 
 (63) Project [codegen id : 21]
-Output [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_state#29, ca_country#30]
-Input [7]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26, ca_address_sk#27, ca_state#29, ca_country#30]
+Output [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_state#32, ca_country#33]
+Input [7]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26, ca_address_sk#30, ca_state#32, ca_country#33]
 
 (64) Exchange
-Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_state#29, ca_country#30]
-Arguments: hashpartitioning(c_current_cdemo_sk#23, 5), true, [id=#87]
+Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_state#32, ca_country#33]
+Arguments: hashpartitioning(c_current_cdemo_sk#23, 5), ENSURE_REQUIREMENTS, [id=#87]
 
 (65) Sort [codegen id : 22]
-Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_state#29, ca_country#30]
+Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_state#32, ca_country#33]
 Arguments: [c_current_cdemo_sk#23 ASC NULLS FIRST], false, 0
 
-(66) ReusedExchange [Reuses operator id: 41]
+(66) ReusedExchange [Reuses operator id: 35]
 Output [1]: [cd_demo_sk#88]
 
 (67) Sort [codegen id : 24]
@@ -464,15 +464,15 @@ Right keys [1]: [cd_demo_sk#88]
 Join condition: None
 
 (69) Project [codegen id : 25]
-Output [4]: [c_customer_sk#22, c_birth_year#26, ca_state#29, ca_country#30]
-Input [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_state#29, ca_country#30, cd_demo_sk#88]
+Output [4]: [c_customer_sk#22, c_birth_year#26, ca_state#32, ca_country#33]
+Input [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_state#32, ca_country#33, cd_demo_sk#88]
 
 (70) Exchange
-Input [4]: [c_customer_sk#22, c_birth_year#26, ca_state#29, ca_country#30]
-Arguments: hashpartitioning(c_customer_sk#22, 5), true, [id=#89]
+Input [4]: [c_customer_sk#22, c_birth_year#26, ca_state#32, ca_country#33]
+Arguments: hashpartitioning(c_customer_sk#22, 5), ENSURE_REQUIREMENTS, [id=#89]
 
 (71) Sort [codegen id : 26]
-Input [4]: [c_customer_sk#22, c_birth_year#26, ca_state#29, ca_country#30]
+Input [4]: [c_customer_sk#22, c_birth_year#26, ca_state#32, ca_country#33]
 Arguments: [c_customer_sk#22 ASC NULLS FIRST], false, 0
 
 (72) SortMergeJoin [codegen id : 27]
@@ -481,26 +481,26 @@ Right keys [1]: [c_customer_sk#22]
 Join condition: None
 
 (73) Project [codegen id : 27]
-Output [10]: [i_item_id#19, ca_country#30, ca_state#29, cast(cs_quantity#5 as decimal(12,2)) AS agg1#36, cast(cs_list_price#6 as decimal(12,2)) AS agg2#37, cast(cs_coupon_amt#8 as decimal(12,2)) AS agg3#38, cast(cs_sales_price#7 as decimal(12,2)) AS agg4#39, cast(cs_net_profit#9 as decimal(12,2)) AS agg5#40, cast(c_birth_year#26 as decimal(12,2)) AS agg6#41, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#42]
-Input [12]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19, c_customer_sk#22, c_birth_year#26, ca_state#29, ca_country#30]
+Output [10]: [i_item_id#19, ca_country#33, ca_state#32, cast(cs_quantity#5 as decimal(12,2)) AS agg1#36, cast(cs_list_price#6 as decimal(12,2)) AS agg2#37, cast(cs_coupon_amt#8 as decimal(12,2)) AS agg3#38, cast(cs_sales_price#7 as decimal(12,2)) AS agg4#39, cast(cs_net_profit#9 as decimal(12,2)) AS agg5#40, cast(c_birth_year#26 as decimal(12,2)) AS agg6#41, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#42]
+Input [12]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19, c_customer_sk#22, c_birth_year#26, ca_state#32, ca_country#33]
 
 (74) HashAggregate [codegen id : 27]
-Input [10]: [i_item_id#19, ca_country#30, ca_state#29, agg1#36, agg2#37, agg3#38, agg4#39, agg5#40, agg6#41, agg7#42]
-Keys [3]: [i_item_id#19, ca_country#30, ca_state#29]
+Input [10]: [i_item_id#19, ca_country#33, ca_state#32, agg1#36, agg2#37, agg3#38, agg4#39, agg5#40, agg6#41, agg7#42]
+Keys [3]: [i_item_id#19, ca_country#33, ca_state#32]
 Functions [7]: [partial_avg(agg1#36), partial_avg(agg2#37), partial_avg(agg3#38), partial_avg(agg4#39), partial_avg(agg5#40), partial_avg(agg6#41), partial_avg(agg7#42)]
 Aggregate Attributes [14]: [sum#90, count#91, sum#92, count#93, sum#94, count#95, sum#96, count#97, sum#98, count#99, sum#100, count#101, sum#102, count#103]
-Results [17]: [i_item_id#19, ca_country#30, ca_state#29, sum#104, count#105, sum#106, count#107, sum#108, count#109, sum#110, count#111, sum#112, count#113, sum#114, count#115, sum#116, count#117]
+Results [17]: [i_item_id#19, ca_country#33, ca_state#32, sum#104, count#105, sum#106, count#107, sum#108, count#109, sum#110, count#111, sum#112, count#113, sum#114, count#115, sum#116, count#117]
 
 (75) Exchange
-Input [17]: [i_item_id#19, ca_country#30, ca_state#29, sum#104, count#105, sum#106, count#107, sum#108, count#109, sum#110, count#111, sum#112, count#113, sum#114, count#115, sum#116, count#117]
-Arguments: hashpartitioning(i_item_id#19, ca_country#30, ca_state#29, 5), true, [id=#118]
+Input [17]: [i_item_id#19, ca_country#33, ca_state#32, sum#104, count#105, sum#106, count#107, sum#108, count#109, sum#110, count#111, sum#112, count#113, sum#114, count#115, sum#116, count#117]
+Arguments: hashpartitioning(i_item_id#19, ca_country#33, ca_state#32, 5), ENSURE_REQUIREMENTS, [id=#118]
 
 (76) HashAggregate [codegen id : 28]
-Input [17]: [i_item_id#19, ca_country#30, ca_state#29, sum#104, count#105, sum#106, count#107, sum#108, count#109, sum#110, count#111, sum#112, count#113, sum#114, count#115, sum#116, count#117]
-Keys [3]: [i_item_id#19, ca_country#30, ca_state#29]
+Input [17]: [i_item_id#19, ca_country#33, ca_state#32, sum#104, count#105, sum#106, count#107, sum#108, count#109, sum#110, count#111, sum#112, count#113, sum#114, count#115, sum#116, count#117]
+Keys [3]: [i_item_id#19, ca_country#33, ca_state#32]
 Functions [7]: [avg(agg1#36), avg(agg2#37), avg(agg3#38), avg(agg4#39), avg(agg5#40), avg(agg6#41), avg(agg7#42)]
 Aggregate Attributes [7]: [avg(agg1#36)#119, avg(agg2#37)#120, avg(agg3#38)#121, avg(agg4#39)#122, avg(agg5#40)#123, avg(agg6#41)#124, avg(agg7#42)#125]
-Results [11]: [i_item_id#19, ca_country#30, ca_state#29, null AS county#126, avg(agg1#36)#119 AS agg1#127, avg(agg2#37)#120 AS agg2#128, avg(agg3#38)#121 AS agg3#129, avg(agg4#39)#122 AS agg4#130, avg(agg5#40)#123 AS agg5#131, avg(agg6#41)#124 AS agg6#132, avg(agg7#42)#125 AS agg7#133]
+Results [11]: [i_item_id#19, ca_country#33, ca_state#32, null AS county#126, avg(agg1#36)#119 AS agg1#127, avg(agg2#37)#120 AS agg2#128, avg(agg3#38)#121 AS agg3#129, avg(agg4#39)#122 AS agg4#130, avg(agg5#40)#123 AS agg5#131, avg(agg6#41)#124 AS agg6#132, avg(agg7#42)#125 AS agg7#133]
 
 (77) ReusedExchange [Reuses operator id: 24]
 Output [8]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19]
@@ -528,45 +528,45 @@ Output [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_bi
 Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26]
 
 (83) Scan parquet default.customer_address
-Output [3]: [ca_address_sk#27, ca_state#29, ca_country#30]
+Output [3]: [ca_address_sk#30, ca_state#32, ca_country#33]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
 PushedFilters: [In(ca_state, [ND,WI,AL,NC,OK,MS,TN]), IsNotNull(ca_address_sk)]
 ReadSchema: struct<ca_address_sk:int,ca_state:string,ca_country:string>
 
 (84) ColumnarToRow [codegen id : 34]
-Input [3]: [ca_address_sk#27, ca_state#29, ca_country#30]
+Input [3]: [ca_address_sk#30, ca_state#32, ca_country#33]
 
 (85) Filter [codegen id : 34]
-Input [3]: [ca_address_sk#27, ca_state#29, ca_country#30]
-Condition : (ca_state#29 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#27))
+Input [3]: [ca_address_sk#30, ca_state#32, ca_country#33]
+Condition : (ca_state#32 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#30))
 
 (86) Project [codegen id : 34]
-Output [2]: [ca_address_sk#27, ca_country#30]
-Input [3]: [ca_address_sk#27, ca_state#29, ca_country#30]
+Output [2]: [ca_address_sk#30, ca_country#33]
+Input [3]: [ca_address_sk#30, ca_state#32, ca_country#33]
 
 (87) BroadcastExchange
-Input [2]: [ca_address_sk#27, ca_country#30]
+Input [2]: [ca_address_sk#30, ca_country#33]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#134]
 
 (88) BroadcastHashJoin [codegen id : 35]
 Left keys [1]: [c_current_addr_sk#24]
-Right keys [1]: [ca_address_sk#27]
+Right keys [1]: [ca_address_sk#30]
 Join condition: None
 
 (89) Project [codegen id : 35]
-Output [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_country#30]
-Input [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26, ca_address_sk#27, ca_country#30]
+Output [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_country#33]
+Input [6]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26, ca_address_sk#30, ca_country#33]
 
 (90) Exchange
-Input [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_country#30]
-Arguments: hashpartitioning(c_current_cdemo_sk#23, 5), true, [id=#135]
+Input [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_country#33]
+Arguments: hashpartitioning(c_current_cdemo_sk#23, 5), ENSURE_REQUIREMENTS, [id=#135]
 
 (91) Sort [codegen id : 36]
-Input [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_country#30]
+Input [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_country#33]
 Arguments: [c_current_cdemo_sk#23 ASC NULLS FIRST], false, 0
 
-(92) ReusedExchange [Reuses operator id: 41]
+(92) ReusedExchange [Reuses operator id: 35]
 Output [1]: [cd_demo_sk#136]
 
 (93) Sort [codegen id : 38]
@@ -579,15 +579,15 @@ Right keys [1]: [cd_demo_sk#136]
 Join condition: None
 
 (95) Project [codegen id : 39]
-Output [3]: [c_customer_sk#22, c_birth_year#26, ca_country#30]
-Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_country#30, cd_demo_sk#136]
+Output [3]: [c_customer_sk#22, c_birth_year#26, ca_country#33]
+Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26, ca_country#33, cd_demo_sk#136]
 
 (96) Exchange
-Input [3]: [c_customer_sk#22, c_birth_year#26, ca_country#30]
-Arguments: hashpartitioning(c_customer_sk#22, 5), true, [id=#137]
+Input [3]: [c_customer_sk#22, c_birth_year#26, ca_country#33]
+Arguments: hashpartitioning(c_customer_sk#22, 5), ENSURE_REQUIREMENTS, [id=#137]
 
 (97) Sort [codegen id : 40]
-Input [3]: [c_customer_sk#22, c_birth_year#26, ca_country#30]
+Input [3]: [c_customer_sk#22, c_birth_year#26, ca_country#33]
 Arguments: [c_customer_sk#22 ASC NULLS FIRST], false, 0
 
 (98) SortMergeJoin [codegen id : 41]
@@ -596,26 +596,26 @@ Right keys [1]: [c_customer_sk#22]
 Join condition: None
 
 (99) Project [codegen id : 41]
-Output [9]: [i_item_id#19, ca_country#30, cast(cs_quantity#5 as decimal(12,2)) AS agg1#36, cast(cs_list_price#6 as decimal(12,2)) AS agg2#37, cast(cs_coupon_amt#8 as decimal(12,2)) AS agg3#38, cast(cs_sales_price#7 as decimal(12,2)) AS agg4#39, cast(cs_net_profit#9 as decimal(12,2)) AS agg5#40, cast(c_birth_year#26 as decimal(12,2)) AS agg6#41, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#42]
-Input [11]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19, c_customer_sk#22, c_birth_year#26, ca_country#30]
+Output [9]: [i_item_id#19, ca_country#33, cast(cs_quantity#5 as decimal(12,2)) AS agg1#36, cast(cs_list_price#6 as decimal(12,2)) AS agg2#37, cast(cs_coupon_amt#8 as decimal(12,2)) AS agg3#38, cast(cs_sales_price#7 as decimal(12,2)) AS agg4#39, cast(cs_net_profit#9 as decimal(12,2)) AS agg5#40, cast(c_birth_year#26 as decimal(12,2)) AS agg6#41, cast(cd_dep_count#13 as decimal(12,2)) AS agg7#42]
+Input [11]: [cs_bill_customer_sk#2, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9, cd_dep_count#13, i_item_id#19, c_customer_sk#22, c_birth_year#26, ca_country#33]
 
 (100) HashAggregate [codegen id : 41]
-Input [9]: [i_item_id#19, ca_country#30, agg1#36, agg2#37, agg3#38, agg4#39, agg5#40, agg6#41, agg7#42]
-Keys [2]: [i_item_id#19, ca_country#30]
+Input [9]: [i_item_id#19, ca_country#33, agg1#36, agg2#37, agg3#38, agg4#39, agg5#40, agg6#41, agg7#42]
+Keys [2]: [i_item_id#19, ca_country#33]
 Functions [7]: [partial_avg(agg1#36), partial_avg(agg2#37), partial_avg(agg3#38), partial_avg(agg4#39), partial_avg(agg5#40), partial_avg(agg6#41), partial_avg(agg7#42)]
 Aggregate Attributes [14]: [sum#138, count#139, sum#140, count#141, sum#142, count#143, sum#144, count#145, sum#146, count#147, sum#148, count#149, sum#150, count#151]
-Results [16]: [i_item_id#19, ca_country#30, sum#152, count#153, sum#154, count#155, sum#156, count#157, sum#158, count#159, sum#160, count#161, sum#162, count#163, sum#164, count#165]
+Results [16]: [i_item_id#19, ca_country#33, sum#152, count#153, sum#154, count#155, sum#156, count#157, sum#158, count#159, sum#160, count#161, sum#162, count#163, sum#164, count#165]
 
 (101) Exchange
-Input [16]: [i_item_id#19, ca_country#30, sum#152, count#153, sum#154, count#155, sum#156, count#157, sum#158, count#159, sum#160, count#161, sum#162, count#163, sum#164, count#165]
-Arguments: hashpartitioning(i_item_id#19, ca_country#30, 5), true, [id=#166]
+Input [16]: [i_item_id#19, ca_country#33, sum#152, count#153, sum#154, count#155, sum#156, count#157, sum#158, count#159, sum#160, count#161, sum#162, count#163, sum#164, count#165]
+Arguments: hashpartitioning(i_item_id#19, ca_country#33, 5), ENSURE_REQUIREMENTS, [id=#166]
 
 (102) HashAggregate [codegen id : 42]
-Input [16]: [i_item_id#19, ca_country#30, sum#152, count#153, sum#154, count#155, sum#156, count#157, sum#158, count#159, sum#160, count#161, sum#162, count#163, sum#164, count#165]
-Keys [2]: [i_item_id#19, ca_country#30]
+Input [16]: [i_item_id#19, ca_country#33, sum#152, count#153, sum#154, count#155, sum#156, count#157, sum#158, count#159, sum#160, count#161, sum#162, count#163, sum#164, count#165]
+Keys [2]: [i_item_id#19, ca_country#33]
 Functions [7]: [avg(agg1#36), avg(agg2#37), avg(agg3#38), avg(agg4#39), avg(agg5#40), avg(agg6#41), avg(agg7#42)]
 Aggregate Attributes [7]: [avg(agg1#36)#167, avg(agg2#37)#168, avg(agg3#38)#169, avg(agg4#39)#170, avg(agg5#40)#171, avg(agg6#41)#172, avg(agg7#42)#173]
-Results [11]: [i_item_id#19, ca_country#30, null AS ca_state#174, null AS county#175, avg(agg1#36)#167 AS agg1#176, avg(agg2#37)#168 AS agg2#177, avg(agg3#38)#169 AS agg3#178, avg(agg4#39)#170 AS agg4#179, avg(agg5#40)#171 AS agg5#180, avg(agg6#41)#172 AS agg6#181, avg(agg7#42)#173 AS agg7#182]
+Results [11]: [i_item_id#19, ca_country#33, null AS ca_state#174, null AS county#175, avg(agg1#36)#167 AS agg1#176, avg(agg2#37)#168 AS agg2#177, avg(agg3#38)#169 AS agg3#178, avg(agg4#39)#170 AS agg4#179, avg(agg5#40)#171 AS agg5#180, avg(agg6#41)#172 AS agg6#181, avg(agg7#42)#173 AS agg7#182]
 
 (103) Scan parquet default.catalog_sales
 Output [9]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_bill_cdemo_sk#3, cs_item_sk#4, cs_quantity#5, cs_list_price#6, cs_sales_price#7, cs_coupon_amt#8, cs_net_profit#9]
@@ -674,35 +674,35 @@ Output [4]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_bi
 Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_month#25, c_birth_year#26]
 
 (116) Scan parquet default.customer_address
-Output [2]: [ca_address_sk#27, ca_state#29]
+Output [2]: [ca_address_sk#30, ca_state#32]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
 PushedFilters: [In(ca_state, [ND,WI,AL,NC,OK,MS,TN]), IsNotNull(ca_address_sk)]
 ReadSchema: struct<ca_address_sk:int,ca_state:string>
 
 (117) ColumnarToRow [codegen id : 45]
-Input [2]: [ca_address_sk#27, ca_state#29]
+Input [2]: [ca_address_sk#30, ca_state#32]
 
 (118) Filter [codegen id : 45]
-Input [2]: [ca_address_sk#27, ca_state#29]
-Condition : (ca_state#29 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#27))
+Input [2]: [ca_address_sk#30, ca_state#32]
+Condition : (ca_state#32 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#30))
 
 (119) Project [codegen id : 45]
-Output [1]: [ca_address_sk#27]
-Input [2]: [ca_address_sk#27, ca_state#29]
+Output [1]: [ca_address_sk#30]
+Input [2]: [ca_address_sk#30, ca_state#32]
 
 (120) BroadcastExchange
-Input [1]: [ca_address_sk#27]
+Input [1]: [ca_address_sk#30]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#183]
 
 (121) BroadcastHashJoin [codegen id : 46]
 Left keys [1]: [c_current_addr_sk#24]
-Right keys [1]: [ca_address_sk#27]
+Right keys [1]: [ca_address_sk#30]
 Join condition: None
 
 (122) Project [codegen id : 46]
 Output [3]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26]
-Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26, ca_address_sk#27]
+Input [5]: [c_customer_sk#22, c_current_cdemo_sk#23, c_current_addr_sk#24, c_birth_year#26, ca_address_sk#30]
 
 (123) BroadcastExchange
 Input [3]: [c_customer_sk#22, c_current_cdemo_sk#23, c_birth_year#26]
@@ -765,7 +765,7 @@ Results [15]: [i_item_id#19, sum#201, count#202, sum#203, count#204, sum#205, co
 
 (136) Exchange
 Input [15]: [i_item_id#19, sum#201, count#202, sum#203, count#204, sum#205, count#206, sum#207, count#208, sum#209, count#210, sum#211, count#212, sum#213, count#214]
-Arguments: hashpartitioning(i_item_id#19, 5), true, [id=#215]
+Arguments: hashpartitioning(i_item_id#19, 5), ENSURE_REQUIREMENTS, [id=#215]
 
 (137) HashAggregate [codegen id : 50]
 Input [15]: [i_item_id#19, sum#201, count#202, sum#203, count#204, sum#205, count#206, sum#207, count#208, sum#209, count#210, sum#211, count#212, sum#213, count#214]
@@ -860,7 +860,7 @@ Results [14]: [sum#248, count#249, sum#250, count#251, sum#252, count#253, sum#2
 
 (157) Exchange
 Input [14]: [sum#248, count#249, sum#250, count#251, sum#252, count#253, sum#254, count#255, sum#256, count#257, sum#258, count#259, sum#260, count#261]
-Arguments: SinglePartition, true, [id=#262]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#262]
 
 (158) HashAggregate [codegen id : 58]
 Input [14]: [sum#248, count#249, sum#250, count#251, sum#252, count#253, sum#254, count#255, sum#256, count#257, sum#258, count#259, sum#260, count#261]
@@ -872,6 +872,6 @@ Results [11]: [null AS i_item_id#270, null AS ca_country#271, null AS ca_state#2
 (159) Union
 
 (160) TakeOrderedAndProject
-Input [11]: [i_item_id#19, ca_country#30, ca_state#29, ca_county#28, agg1#79, agg2#80, agg3#81, agg4#82, agg5#83, agg6#84, agg7#85]
-Arguments: 100, [ca_country#30 ASC NULLS FIRST, ca_state#29 ASC NULLS FIRST, ca_county#28 ASC NULLS FIRST, i_item_id#19 ASC NULLS FIRST], [i_item_id#19, ca_country#30, ca_state#29, ca_county#28, agg1#79, agg2#80, agg3#81, agg4#82, agg5#83, agg6#84, agg7#85]
+Input [11]: [i_item_id#19, ca_country#33, ca_state#32, ca_county#31, agg1#79, agg2#80, agg3#81, agg4#82, agg5#83, agg6#84, agg7#85]
+Arguments: 100, [ca_country#33 ASC NULLS FIRST, ca_state#32 ASC NULLS FIRST, ca_county#31 ASC NULLS FIRST, i_item_id#19 ASC NULLS FIRST], [i_item_id#19, ca_country#33, ca_state#32, ca_county#31, agg1#79, agg2#80, agg3#81, agg4#82, agg5#83, agg6#84, agg7#85]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/simplified.txt
index 5514e335f1b51..4566929712713 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/simplified.txt
@@ -54,37 +54,37 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                             Exchange [c_customer_sk] #6
                               WholeStageCodegen (11)
                                 Project [c_customer_sk,c_birth_year,ca_county,ca_state,ca_country]
-                                  SortMergeJoin [c_current_cdemo_sk,cd_demo_sk]
-                                    InputAdapter
-                                      WholeStageCodegen (8)
-                                        Sort [c_current_cdemo_sk]
-                                          InputAdapter
-                                            Exchange [c_current_cdemo_sk] #7
-                                              WholeStageCodegen (7)
-                                                Project [c_customer_sk,c_current_cdemo_sk,c_birth_year,ca_county,ca_state,ca_country]
-                                                  BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
+                                  BroadcastHashJoin [c_current_addr_sk,ca_address_sk]
+                                    Project [c_customer_sk,c_current_addr_sk,c_birth_year]
+                                      SortMergeJoin [c_current_cdemo_sk,cd_demo_sk]
+                                        InputAdapter
+                                          WholeStageCodegen (7)
+                                            Sort [c_current_cdemo_sk]
+                                              InputAdapter
+                                                Exchange [c_current_cdemo_sk] #7
+                                                  WholeStageCodegen (6)
                                                     Project [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_year]
                                                       Filter [c_birth_month,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
                                                         ColumnarToRow
                                                           InputAdapter
                                                             Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year]
-                                                    InputAdapter
-                                                      BroadcastExchange #8
-                                                        WholeStageCodegen (6)
-                                                          Filter [ca_state,ca_address_sk]
-                                                            ColumnarToRow
-                                                              InputAdapter
-                                                                Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state,ca_country]
+                                        InputAdapter
+                                          WholeStageCodegen (9)
+                                            Sort [cd_demo_sk]
+                                              InputAdapter
+                                                Exchange [cd_demo_sk] #8
+                                                  WholeStageCodegen (8)
+                                                    Filter [cd_demo_sk]
+                                                      ColumnarToRow
+                                                        InputAdapter
+                                                          Scan parquet default.customer_demographics [cd_demo_sk]
                                     InputAdapter
-                                      WholeStageCodegen (10)
-                                        Sort [cd_demo_sk]
-                                          InputAdapter
-                                            Exchange [cd_demo_sk] #9
-                                              WholeStageCodegen (9)
-                                                Filter [cd_demo_sk]
-                                                  ColumnarToRow
-                                                    InputAdapter
-                                                      Scan parquet default.customer_demographics [cd_demo_sk]
+                                      BroadcastExchange #9
+                                        WholeStageCodegen (10)
+                                          Filter [ca_state,ca_address_sk]
+                                            ColumnarToRow
+                                              InputAdapter
+                                                Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state,ca_country]
     WholeStageCodegen (28)
       HashAggregate [i_item_id,ca_country,ca_state,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count]
         InputAdapter
@@ -130,7 +130,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                       WholeStageCodegen (24)
                                         Sort [cd_demo_sk]
                                           InputAdapter
-                                            ReusedExchange [cd_demo_sk] #9
+                                            ReusedExchange [cd_demo_sk] #8
     WholeStageCodegen (42)
       HashAggregate [i_item_id,ca_country,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),ca_state,county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count]
         InputAdapter
@@ -177,7 +177,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                       WholeStageCodegen (38)
                                         Sort [cd_demo_sk]
                                           InputAdapter
-                                            ReusedExchange [cd_demo_sk] #9
+                                            ReusedExchange [cd_demo_sk] #8
     WholeStageCodegen (50)
       HashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),ca_country,ca_state,county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count]
         InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt
index a7f328537b7ac..04ff822b1ce52 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt
@@ -11,60 +11,60 @@ TakeOrderedAndProject (79)
                :        +- * BroadcastHashJoin LeftOuter BuildRight (65)
                :           :- * Project (60)
                :           :  +- * SortMergeJoin Inner (59)
-               :           :     :- * Sort (47)
-               :           :     :  +- Exchange (46)
-               :           :     :     +- * Project (45)
-               :           :     :        +- * BroadcastHashJoin Inner BuildRight (44)
-               :           :     :           :- * Project (32)
-               :           :     :           :  +- * SortMergeJoin Inner (31)
-               :           :     :           :     :- * Sort (25)
-               :           :     :           :     :  +- Exchange (24)
-               :           :     :           :     :     +- * Project (23)
-               :           :     :           :     :        +- * BroadcastHashJoin Inner BuildRight (22)
-               :           :     :           :     :           :- * Project (17)
-               :           :     :           :     :           :  +- * BroadcastHashJoin Inner BuildRight (16)
-               :           :     :           :     :           :     :- * Project (10)
-               :           :     :           :     :           :     :  +- * BroadcastHashJoin Inner BuildRight (9)
-               :           :     :           :     :           :     :     :- * Filter (3)
-               :           :     :           :     :           :     :     :  +- * ColumnarToRow (2)
-               :           :     :           :     :           :     :     :     +- Scan parquet default.catalog_sales (1)
-               :           :     :           :     :           :     :     +- BroadcastExchange (8)
-               :           :     :           :     :           :     :        +- * Project (7)
-               :           :     :           :     :           :     :           +- * Filter (6)
-               :           :     :           :     :           :     :              +- * ColumnarToRow (5)
-               :           :     :           :     :           :     :                 +- Scan parquet default.household_demographics (4)
-               :           :     :           :     :           :     +- BroadcastExchange (15)
-               :           :     :           :     :           :        +- * Project (14)
-               :           :     :           :     :           :           +- * Filter (13)
-               :           :     :           :     :           :              +- * ColumnarToRow (12)
-               :           :     :           :     :           :                 +- Scan parquet default.customer_demographics (11)
-               :           :     :           :     :           +- BroadcastExchange (21)
-               :           :     :           :     :              +- * Filter (20)
-               :           :     :           :     :                 +- * ColumnarToRow (19)
-               :           :     :           :     :                    +- Scan parquet default.date_dim (18)
-               :           :     :           :     +- * Sort (30)
-               :           :     :           :        +- Exchange (29)
-               :           :     :           :           +- * Filter (28)
-               :           :     :           :              +- * ColumnarToRow (27)
-               :           :     :           :                 +- Scan parquet default.item (26)
-               :           :     :           +- BroadcastExchange (43)
-               :           :     :              +- * Project (42)
-               :           :     :                 +- * BroadcastHashJoin Inner BuildLeft (41)
-               :           :     :                    :- BroadcastExchange (37)
-               :           :     :                    :  +- * Project (36)
-               :           :     :                    :     +- * Filter (35)
-               :           :     :                    :        +- * ColumnarToRow (34)
-               :           :     :                    :           +- Scan parquet default.date_dim (33)
-               :           :     :                    +- * Filter (40)
-               :           :     :                       +- * ColumnarToRow (39)
-               :           :     :                          +- Scan parquet default.date_dim (38)
+               :           :     :- * Sort (34)
+               :           :     :  +- Exchange (33)
+               :           :     :     +- * Project (32)
+               :           :     :        +- * SortMergeJoin Inner (31)
+               :           :     :           :- * Sort (25)
+               :           :     :           :  +- Exchange (24)
+               :           :     :           :     +- * Project (23)
+               :           :     :           :        +- * BroadcastHashJoin Inner BuildRight (22)
+               :           :     :           :           :- * Project (17)
+               :           :     :           :           :  +- * BroadcastHashJoin Inner BuildRight (16)
+               :           :     :           :           :     :- * Project (10)
+               :           :     :           :           :     :  +- * BroadcastHashJoin Inner BuildRight (9)
+               :           :     :           :           :     :     :- * Filter (3)
+               :           :     :           :           :     :     :  +- * ColumnarToRow (2)
+               :           :     :           :           :     :     :     +- Scan parquet default.catalog_sales (1)
+               :           :     :           :           :     :     +- BroadcastExchange (8)
+               :           :     :           :           :     :        +- * Project (7)
+               :           :     :           :           :     :           +- * Filter (6)
+               :           :     :           :           :     :              +- * ColumnarToRow (5)
+               :           :     :           :           :     :                 +- Scan parquet default.household_demographics (4)
+               :           :     :           :           :     +- BroadcastExchange (15)
+               :           :     :           :           :        +- * Project (14)
+               :           :     :           :           :           +- * Filter (13)
+               :           :     :           :           :              +- * ColumnarToRow (12)
+               :           :     :           :           :                 +- Scan parquet default.customer_demographics (11)
+               :           :     :           :           +- BroadcastExchange (21)
+               :           :     :           :              +- * Filter (20)
+               :           :     :           :                 +- * ColumnarToRow (19)
+               :           :     :           :                    +- Scan parquet default.date_dim (18)
+               :           :     :           +- * Sort (30)
+               :           :     :              +- Exchange (29)
+               :           :     :                 +- * Filter (28)
+               :           :     :                    +- * ColumnarToRow (27)
+               :           :     :                       +- Scan parquet default.item (26)
                :           :     +- * Sort (58)
                :           :        +- Exchange (57)
                :           :           +- * Project (56)
                :           :              +- * BroadcastHashJoin Inner BuildRight (55)
-               :           :                 :- * Filter (50)
-               :           :                 :  +- * ColumnarToRow (49)
-               :           :                 :     +- Scan parquet default.inventory (48)
+               :           :                 :- * Project (50)
+               :           :                 :  +- * BroadcastHashJoin Inner BuildLeft (49)
+               :           :                 :     :- BroadcastExchange (45)
+               :           :                 :     :  +- * Project (44)
+               :           :                 :     :     +- * BroadcastHashJoin Inner BuildLeft (43)
+               :           :                 :     :        :- BroadcastExchange (39)
+               :           :                 :     :        :  +- * Project (38)
+               :           :                 :     :        :     +- * Filter (37)
+               :           :                 :     :        :        +- * ColumnarToRow (36)
+               :           :                 :     :        :           +- Scan parquet default.date_dim (35)
+               :           :                 :     :        +- * Filter (42)
+               :           :                 :     :           +- * ColumnarToRow (41)
+               :           :                 :     :              +- Scan parquet default.date_dim (40)
+               :           :                 :     +- * Filter (48)
+               :           :                 :        +- * ColumnarToRow (47)
+               :           :                 :           +- Scan parquet default.inventory (46)
                :           :                 +- BroadcastExchange (54)
                :           :                    +- * Filter (53)
                :           :                       +- * ColumnarToRow (52)
@@ -185,7 +185,7 @@ Input [8]: [cs_sold_date_sk#1, cs_ship_date_sk#2, cs_item_sk#5, cs_promo_sk#6, c
 
 (24) Exchange
 Input [6]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16]
-Arguments: hashpartitioning(cs_item_sk#5, 5), true, [id=#18]
+Arguments: hashpartitioning(cs_item_sk#5, 5), ENSURE_REQUIREMENTS, [id=#18]
 
 (25) Sort [codegen id : 5]
 Input [6]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16]
@@ -207,101 +207,101 @@ Condition : isnotnull(i_item_sk#19)
 
 (29) Exchange
 Input [2]: [i_item_sk#19, i_item_desc#20]
-Arguments: hashpartitioning(i_item_sk#19, 5), true, [id=#21]
+Arguments: hashpartitioning(i_item_sk#19, 5), ENSURE_REQUIREMENTS, [id=#21]
 
 (30) Sort [codegen id : 7]
 Input [2]: [i_item_sk#19, i_item_desc#20]
 Arguments: [i_item_sk#19 ASC NULLS FIRST], false, 0
 
-(31) SortMergeJoin [codegen id : 10]
+(31) SortMergeJoin [codegen id : 8]
 Left keys [1]: [cs_item_sk#5]
 Right keys [1]: [i_item_sk#19]
 Join condition: None
 
-(32) Project [codegen id : 10]
+(32) Project [codegen id : 8]
 Output [7]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20]
 Input [8]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_sk#19, i_item_desc#20]
 
-(33) Scan parquet default.date_dim
-Output [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25]
+(33) Exchange
+Input [7]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20]
+Arguments: hashpartitioning(cs_item_sk#5, cs_sold_date_sk#1, 5), ENSURE_REQUIREMENTS, [id=#22]
+
+(34) Sort [codegen id : 9]
+Input [7]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20]
+Arguments: [cs_item_sk#5 ASC NULLS FIRST, cs_sold_date_sk#1 ASC NULLS FIRST], false, 0
+
+(35) Scan parquet default.date_dim
+Output [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)]
 ReadSchema: struct<d_date_sk:int,d_date:date,d_week_seq:int,d_year:int>
 
-(34) ColumnarToRow [codegen id : 8]
-Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25]
+(36) ColumnarToRow [codegen id : 10]
+Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#26]
 
-(35) Filter [codegen id : 8]
-Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25]
-Condition : ((((isnotnull(d_year#25) AND (d_year#25 = 2001)) AND isnotnull(d_date_sk#22)) AND isnotnull(d_week_seq#24)) AND isnotnull(d_date#23))
+(37) Filter [codegen id : 10]
+Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#26]
+Condition : ((((isnotnull(d_year#26) AND (d_year#26 = 2001)) AND isnotnull(d_date_sk#23)) AND isnotnull(d_week_seq#25)) AND isnotnull(d_date#24))
 
-(36) Project [codegen id : 8]
-Output [3]: [d_date_sk#22, d_date#23, d_week_seq#24]
-Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#25]
+(38) Project [codegen id : 10]
+Output [3]: [d_date_sk#23, d_date#24, d_week_seq#25]
+Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#26]
 
-(37) BroadcastExchange
-Input [3]: [d_date_sk#22, d_date#23, d_week_seq#24]
-Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#26]
+(39) BroadcastExchange
+Input [3]: [d_date_sk#23, d_date#24, d_week_seq#25]
+Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#27]
 
-(38) Scan parquet default.date_dim
-Output [2]: [d_date_sk#27, d_week_seq#28]
+(40) Scan parquet default.date_dim
+Output [2]: [d_date_sk#28, d_week_seq#29]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_week_seq:int>
 
-(39) ColumnarToRow
-Input [2]: [d_date_sk#27, d_week_seq#28]
+(41) ColumnarToRow
+Input [2]: [d_date_sk#28, d_week_seq#29]
 
-(40) Filter
-Input [2]: [d_date_sk#27, d_week_seq#28]
-Condition : (isnotnull(d_week_seq#28) AND isnotnull(d_date_sk#27))
+(42) Filter
+Input [2]: [d_date_sk#28, d_week_seq#29]
+Condition : (isnotnull(d_week_seq#29) AND isnotnull(d_date_sk#28))
 
-(41) BroadcastHashJoin [codegen id : 9]
-Left keys [1]: [d_week_seq#24]
-Right keys [1]: [d_week_seq#28]
+(43) BroadcastHashJoin [codegen id : 11]
+Left keys [1]: [d_week_seq#25]
+Right keys [1]: [d_week_seq#29]
 Join condition: None
 
-(42) Project [codegen id : 9]
-Output [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27]
-Input [5]: [d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27, d_week_seq#28]
-
-(43) BroadcastExchange
-Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29]
-
-(44) BroadcastHashJoin [codegen id : 10]
-Left keys [1]: [cs_sold_date_sk#1]
-Right keys [1]: [d_date_sk#22]
-Join condition: (d_date#16 > d_date#23 + 5 days)
-
-(45) Project [codegen id : 10]
-Output [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27]
-Input [11]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20, d_date_sk#22, d_date#23, d_week_seq#24, d_date_sk#27]
+(44) Project [codegen id : 11]
+Output [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#28]
+Input [5]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#28, d_week_seq#29]
 
-(46) Exchange
-Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27]
-Arguments: hashpartitioning(cs_item_sk#5, d_date_sk#27, 5), true, [id=#30]
+(45) BroadcastExchange
+Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#28]
+Arguments: HashedRelationBroadcastMode(List(cast(input[3, int, true] as bigint)),false), [id=#30]
 
-(47) Sort [codegen id : 11]
-Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27]
-Arguments: [cs_item_sk#5 ASC NULLS FIRST, d_date_sk#27 ASC NULLS FIRST], false, 0
-
-(48) Scan parquet default.inventory
+(46) Scan parquet default.inventory
 Output [4]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/inventory]
 PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk), IsNotNull(inv_date_sk)]
 ReadSchema: struct<inv_date_sk:int,inv_item_sk:int,inv_warehouse_sk:int,inv_quantity_on_hand:int>
 
-(49) ColumnarToRow [codegen id : 13]
+(47) ColumnarToRow
 Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34]
 
-(50) Filter [codegen id : 13]
+(48) Filter
 Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34]
 Condition : (((isnotnull(inv_quantity_on_hand#34) AND isnotnull(inv_item_sk#32)) AND isnotnull(inv_warehouse_sk#33)) AND isnotnull(inv_date_sk#31))
 
+(49) BroadcastHashJoin [codegen id : 13]
+Left keys [1]: [d_date_sk#28]
+Right keys [1]: [inv_date_sk#31]
+Join condition: None
+
+(50) Project [codegen id : 13]
+Output [6]: [d_date_sk#23, d_date#24, d_week_seq#25, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34]
+Input [8]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#28, inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34]
+
 (51) Scan parquet default.warehouse
 Output [2]: [w_warehouse_sk#35, w_warehouse_name#36]
 Batched: true
@@ -326,25 +326,25 @@ Right keys [1]: [w_warehouse_sk#35]
 Join condition: None
 
 (56) Project [codegen id : 13]
-Output [4]: [inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36]
-Input [6]: [inv_date_sk#31, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34, w_warehouse_sk#35, w_warehouse_name#36]
+Output [6]: [d_date_sk#23, d_date#24, d_week_seq#25, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36]
+Input [8]: [d_date_sk#23, d_date#24, d_week_seq#25, inv_item_sk#32, inv_warehouse_sk#33, inv_quantity_on_hand#34, w_warehouse_sk#35, w_warehouse_name#36]
 
 (57) Exchange
-Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36]
-Arguments: hashpartitioning(inv_item_sk#32, inv_date_sk#31, 5), true, [id=#38]
+Input [6]: [d_date_sk#23, d_date#24, d_week_seq#25, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36]
+Arguments: hashpartitioning(inv_item_sk#32, d_date_sk#23, 5), ENSURE_REQUIREMENTS, [id=#38]
 
 (58) Sort [codegen id : 14]
-Input [4]: [inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36]
-Arguments: [inv_item_sk#32 ASC NULLS FIRST, inv_date_sk#31 ASC NULLS FIRST], false, 0
+Input [6]: [d_date_sk#23, d_date#24, d_week_seq#25, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36]
+Arguments: [inv_item_sk#32 ASC NULLS FIRST, d_date_sk#23 ASC NULLS FIRST], false, 0
 
 (59) SortMergeJoin [codegen id : 16]
-Left keys [2]: [cs_item_sk#5, d_date_sk#27]
-Right keys [2]: [inv_item_sk#32, inv_date_sk#31]
-Join condition: (inv_quantity_on_hand#34 < cs_quantity#8)
+Left keys [2]: [cs_item_sk#5, cs_sold_date_sk#1]
+Right keys [2]: [inv_item_sk#32, d_date_sk#23]
+Join condition: ((inv_quantity_on_hand#34 < cs_quantity#8) AND (d_date#16 > d_date#24 + 5 days))
 
 (60) Project [codegen id : 16]
-Output [6]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24]
-Input [11]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, i_item_desc#20, d_week_seq#24, d_date_sk#27, inv_date_sk#31, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36]
+Output [6]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#25]
+Input [13]: [cs_sold_date_sk#1, cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, cs_quantity#8, d_date#16, i_item_desc#20, d_date_sk#23, d_date#24, d_week_seq#25, inv_item_sk#32, inv_quantity_on_hand#34, w_warehouse_name#36]
 
 (61) Scan parquet default.promotion
 Output [1]: [p_promo_sk#39]
@@ -370,15 +370,15 @@ Right keys [1]: [p_promo_sk#39]
 Join condition: None
 
 (66) Project [codegen id : 16]
-Output [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24]
-Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24, p_promo_sk#39]
+Output [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#25]
+Input [7]: [cs_item_sk#5, cs_promo_sk#6, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#25, p_promo_sk#39]
 
 (67) Exchange
-Input [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24]
-Arguments: hashpartitioning(cs_item_sk#5, cs_order_number#7, 5), true, [id=#41]
+Input [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#25]
+Arguments: hashpartitioning(cs_item_sk#5, cs_order_number#7, 5), ENSURE_REQUIREMENTS, [id=#41]
 
 (68) Sort [codegen id : 17]
-Input [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24]
+Input [5]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#25]
 Arguments: [cs_item_sk#5 ASC NULLS FIRST, cs_order_number#7 ASC NULLS FIRST], false, 0
 
 (69) Scan parquet default.catalog_returns
@@ -397,7 +397,7 @@ Condition : (isnotnull(cr_item_sk#42) AND isnotnull(cr_order_number#43))
 
 (72) Exchange
 Input [2]: [cr_item_sk#42, cr_order_number#43]
-Arguments: hashpartitioning(cr_item_sk#42, cr_order_number#43, 5), true, [id=#44]
+Arguments: hashpartitioning(cr_item_sk#42, cr_order_number#43, 5), ENSURE_REQUIREMENTS, [id=#44]
 
 (73) Sort [codegen id : 19]
 Input [2]: [cr_item_sk#42, cr_order_number#43]
@@ -409,28 +409,28 @@ Right keys [2]: [cr_item_sk#42, cr_order_number#43]
 Join condition: None
 
 (75) Project [codegen id : 20]
-Output [3]: [w_warehouse_name#36, i_item_desc#20, d_week_seq#24]
-Input [7]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#24, cr_item_sk#42, cr_order_number#43]
+Output [3]: [w_warehouse_name#36, i_item_desc#20, d_week_seq#25]
+Input [7]: [cs_item_sk#5, cs_order_number#7, w_warehouse_name#36, i_item_desc#20, d_week_seq#25, cr_item_sk#42, cr_order_number#43]
 
 (76) HashAggregate [codegen id : 20]
-Input [3]: [w_warehouse_name#36, i_item_desc#20, d_week_seq#24]
-Keys [3]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24]
+Input [3]: [w_warehouse_name#36, i_item_desc#20, d_week_seq#25]
+Keys [3]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25]
 Functions [1]: [partial_count(1)]
 Aggregate Attributes [1]: [count#45]
-Results [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count#46]
+Results [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25, count#46]
 
 (77) Exchange
-Input [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count#46]
-Arguments: hashpartitioning(i_item_desc#20, w_warehouse_name#36, d_week_seq#24, 5), true, [id=#47]
+Input [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25, count#46]
+Arguments: hashpartitioning(i_item_desc#20, w_warehouse_name#36, d_week_seq#25, 5), ENSURE_REQUIREMENTS, [id=#47]
 
 (78) HashAggregate [codegen id : 21]
-Input [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count#46]
-Keys [3]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24]
+Input [4]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25, count#46]
+Keys [3]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25]
 Functions [1]: [count(1)]
 Aggregate Attributes [1]: [count(1)#48]
-Results [6]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, count(1)#48 AS no_promo#49, count(1)#48 AS promo#50, count(1)#48 AS total_cnt#51]
+Results [6]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25, count(1)#48 AS no_promo#49, count(1)#48 AS promo#50, count(1)#48 AS total_cnt#51]
 
 (79) TakeOrderedAndProject
-Input [6]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, no_promo#49, promo#50, total_cnt#51]
-Arguments: 100, [total_cnt#51 DESC NULLS LAST, i_item_desc#20 ASC NULLS FIRST, w_warehouse_name#36 ASC NULLS FIRST, d_week_seq#24 ASC NULLS FIRST], [i_item_desc#20, w_warehouse_name#36, d_week_seq#24, no_promo#49, promo#50, total_cnt#51]
+Input [6]: [i_item_desc#20, w_warehouse_name#36, d_week_seq#25, no_promo#49, promo#50, total_cnt#51]
+Arguments: 100, [total_cnt#51 DESC NULLS LAST, i_item_desc#20 ASC NULLS FIRST, w_warehouse_name#36 ASC NULLS FIRST, d_week_seq#25 ASC NULLS FIRST], [i_item_desc#20, w_warehouse_name#36, d_week_seq#25, no_promo#49, promo#50, total_cnt#51]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/simplified.txt
index 918508787c4b0..b88505ad7b9bc 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/simplified.txt
@@ -16,95 +16,95 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                               Project [cs_item_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq]
                                 BroadcastHashJoin [cs_promo_sk,p_promo_sk]
                                   Project [cs_item_sk,cs_promo_sk,cs_order_number,w_warehouse_name,i_item_desc,d_week_seq]
-                                    SortMergeJoin [cs_item_sk,d_date_sk,inv_item_sk,inv_date_sk,inv_quantity_on_hand,cs_quantity]
+                                    SortMergeJoin [cs_item_sk,cs_sold_date_sk,inv_item_sk,d_date_sk,inv_quantity_on_hand,cs_quantity,d_date,d_date]
                                       InputAdapter
-                                        WholeStageCodegen (11)
-                                          Sort [cs_item_sk,d_date_sk]
+                                        WholeStageCodegen (9)
+                                          Sort [cs_item_sk,cs_sold_date_sk]
                                             InputAdapter
-                                              Exchange [cs_item_sk,d_date_sk] #3
-                                                WholeStageCodegen (10)
-                                                  Project [cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,i_item_desc,d_week_seq,d_date_sk]
-                                                    BroadcastHashJoin [cs_sold_date_sk,d_date_sk,d_date,d_date]
-                                                      Project [cs_sold_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,d_date,i_item_desc]
-                                                        SortMergeJoin [cs_item_sk,i_item_sk]
-                                                          InputAdapter
-                                                            WholeStageCodegen (5)
-                                                              Sort [cs_item_sk]
-                                                                InputAdapter
-                                                                  Exchange [cs_item_sk] #4
-                                                                    WholeStageCodegen (4)
-                                                                      Project [cs_sold_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,d_date]
-                                                                        BroadcastHashJoin [cs_ship_date_sk,d_date_sk]
-                                                                          Project [cs_sold_date_sk,cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity]
-                                                                            BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk]
-                                                                              Project [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity]
-                                                                                BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk]
-                                                                                  Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_sold_date_sk,cs_ship_date_sk]
-                                                                                    ColumnarToRow
-                                                                                      InputAdapter
-                                                                                        Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity]
+                                              Exchange [cs_item_sk,cs_sold_date_sk] #3
+                                                WholeStageCodegen (8)
+                                                  Project [cs_sold_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,d_date,i_item_desc]
+                                                    SortMergeJoin [cs_item_sk,i_item_sk]
+                                                      InputAdapter
+                                                        WholeStageCodegen (5)
+                                                          Sort [cs_item_sk]
+                                                            InputAdapter
+                                                              Exchange [cs_item_sk] #4
+                                                                WholeStageCodegen (4)
+                                                                  Project [cs_sold_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,d_date]
+                                                                    BroadcastHashJoin [cs_ship_date_sk,d_date_sk]
+                                                                      Project [cs_sold_date_sk,cs_ship_date_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity]
+                                                                        BroadcastHashJoin [cs_bill_cdemo_sk,cd_demo_sk]
+                                                                          Project [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity]
+                                                                            BroadcastHashJoin [cs_bill_hdemo_sk,hd_demo_sk]
+                                                                              Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_sold_date_sk,cs_ship_date_sk]
+                                                                                ColumnarToRow
                                                                                   InputAdapter
-                                                                                    BroadcastExchange #5
-                                                                                      WholeStageCodegen (1)
-                                                                                        Project [hd_demo_sk]
-                                                                                          Filter [hd_buy_potential,hd_demo_sk]
-                                                                                            ColumnarToRow
-                                                                                              InputAdapter
-                                                                                                Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential]
+                                                                                    Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity]
                                                                               InputAdapter
-                                                                                BroadcastExchange #6
-                                                                                  WholeStageCodegen (2)
-                                                                                    Project [cd_demo_sk]
-                                                                                      Filter [cd_marital_status,cd_demo_sk]
+                                                                                BroadcastExchange #5
+                                                                                  WholeStageCodegen (1)
+                                                                                    Project [hd_demo_sk]
+                                                                                      Filter [hd_buy_potential,hd_demo_sk]
                                                                                         ColumnarToRow
                                                                                           InputAdapter
-                                                                                            Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status]
+                                                                                            Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential]
                                                                           InputAdapter
-                                                                            BroadcastExchange #7
-                                                                              WholeStageCodegen (3)
-                                                                                Filter [d_date,d_date_sk]
-                                                                                  ColumnarToRow
-                                                                                    InputAdapter
-                                                                                      Scan parquet default.date_dim [d_date_sk,d_date]
-                                                          InputAdapter
-                                                            WholeStageCodegen (7)
-                                                              Sort [i_item_sk]
-                                                                InputAdapter
-                                                                  Exchange [i_item_sk] #8
-                                                                    WholeStageCodegen (6)
-                                                                      Filter [i_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.item [i_item_sk,i_item_desc]
+                                                                            BroadcastExchange #6
+                                                                              WholeStageCodegen (2)
+                                                                                Project [cd_demo_sk]
+                                                                                  Filter [cd_marital_status,cd_demo_sk]
+                                                                                    ColumnarToRow
+                                                                                      InputAdapter
+                                                                                        Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status]
+                                                                      InputAdapter
+                                                                        BroadcastExchange #7
+                                                                          WholeStageCodegen (3)
+                                                                            Filter [d_date,d_date_sk]
+                                                                              ColumnarToRow
+                                                                                InputAdapter
+                                                                                  Scan parquet default.date_dim [d_date_sk,d_date]
                                                       InputAdapter
-                                                        BroadcastExchange #9
-                                                          WholeStageCodegen (9)
-                                                            Project [d_date_sk,d_date,d_week_seq,d_date_sk]
-                                                              BroadcastHashJoin [d_week_seq,d_week_seq]
-                                                                InputAdapter
-                                                                  BroadcastExchange #10
-                                                                    WholeStageCodegen (8)
-                                                                      Project [d_date_sk,d_date,d_week_seq]
-                                                                        Filter [d_year,d_date_sk,d_week_seq,d_date]
-                                                                          ColumnarToRow
-                                                                            InputAdapter
-                                                                              Scan parquet default.date_dim [d_date_sk,d_date,d_week_seq,d_year]
-                                                                Filter [d_week_seq,d_date_sk]
-                                                                  ColumnarToRow
-                                                                    InputAdapter
-                                                                      Scan parquet default.date_dim [d_date_sk,d_week_seq]
+                                                        WholeStageCodegen (7)
+                                                          Sort [i_item_sk]
+                                                            InputAdapter
+                                                              Exchange [i_item_sk] #8
+                                                                WholeStageCodegen (6)
+                                                                  Filter [i_item_sk]
+                                                                    ColumnarToRow
+                                                                      InputAdapter
+                                                                        Scan parquet default.item [i_item_sk,i_item_desc]
                                       InputAdapter
                                         WholeStageCodegen (14)
-                                          Sort [inv_item_sk,inv_date_sk]
+                                          Sort [inv_item_sk,d_date_sk]
                                             InputAdapter
-                                              Exchange [inv_item_sk,inv_date_sk] #11
+                                              Exchange [inv_item_sk,d_date_sk] #9
                                                 WholeStageCodegen (13)
-                                                  Project [inv_date_sk,inv_item_sk,inv_quantity_on_hand,w_warehouse_name]
+                                                  Project [d_date_sk,d_date,d_week_seq,inv_item_sk,inv_quantity_on_hand,w_warehouse_name]
                                                     BroadcastHashJoin [inv_warehouse_sk,w_warehouse_sk]
-                                                      Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk,inv_date_sk]
-                                                        ColumnarToRow
+                                                      Project [d_date_sk,d_date,d_week_seq,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand]
+                                                        BroadcastHashJoin [d_date_sk,inv_date_sk]
                                                           InputAdapter
-                                                            Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand]
+                                                            BroadcastExchange #10
+                                                              WholeStageCodegen (11)
+                                                                Project [d_date_sk,d_date,d_week_seq,d_date_sk]
+                                                                  BroadcastHashJoin [d_week_seq,d_week_seq]
+                                                                    InputAdapter
+                                                                      BroadcastExchange #11
+                                                                        WholeStageCodegen (10)
+                                                                          Project [d_date_sk,d_date,d_week_seq]
+                                                                            Filter [d_year,d_date_sk,d_week_seq,d_date]
+                                                                              ColumnarToRow
+                                                                                InputAdapter
+                                                                                  Scan parquet default.date_dim [d_date_sk,d_date,d_week_seq,d_year]
+                                                                    Filter [d_week_seq,d_date_sk]
+                                                                      ColumnarToRow
+                                                                        InputAdapter
+                                                                          Scan parquet default.date_dim [d_date_sk,d_week_seq]
+                                                          Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk,inv_date_sk]
+                                                            ColumnarToRow
+                                                              InputAdapter
+                                                                Scan parquet default.inventory [inv_date_sk,inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand]
                                                       InputAdapter
                                                         BroadcastExchange #12
                                                           WholeStageCodegen (12)

From 122f8f0fdb0fdc87a5970f4b39938a0496bd4b4b Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Tue, 5 Jan 2021 07:30:59 +0000
Subject: [PATCH 0962/1009] [SPARK-33919][SQL][TESTS] Unify v1 and v2 SHOW
 NAMESPACES tests

### What changes were proposed in this pull request?
1. Port DS V2 tests from `DataSourceV2SQLSuite` to the base test suite `ShowNamespacesSuiteBase` to run those tests for v1 catalogs.
2. Port DS v1 tests from `DDLSuite` to `ShowNamespacesSuiteBase` to run the tests for v2 catalogs too.

### Why are the changes needed?
To improve test coverage.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
By running new test suites:
```
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *ShowNamespacesSuite"
```

Closes #30937 from MaxGekk/unify-show-namespaces-tests.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/parser/DDLParserSuite.scala  |  34 -----
 .../sql/connector/DataSourceV2SQLSuite.scala  |  89 ------------
 .../sql/execution/command/DDLSuite.scala      |  30 ----
 .../command/ShowNamespacesParserSuite.scala   |  70 ++++++++++
 .../command/ShowNamespacesSuiteBase.scala     | 131 ++++++++++++++++++
 .../command/v1/ShowNamespacesSuite.scala      |  60 ++++++++
 .../command/v2/ShowNamespacesSuite.scala      |  72 ++++++++++
 .../command/ShowNamespacesSuite.scala         |  43 ++++++
 8 files changed, 376 insertions(+), 153 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesParserSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesSuiteBase.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowNamespacesSuite.scala
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowNamespacesSuite.scala

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 9ec22a982a588..4978a3a6653c4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -1807,40 +1807,6 @@ class DDLParserSuite extends AnalysisTest {
         UnresolvedNamespace(Seq("a", "b", "c")), "/home/user/db"))
   }
 
-  test("show databases: basic") {
-    comparePlans(
-      parsePlan("SHOW DATABASES"),
-      ShowNamespaces(UnresolvedNamespace(Seq.empty[String]), None))
-    comparePlans(
-      parsePlan("SHOW DATABASES LIKE 'defau*'"),
-      ShowNamespaces(UnresolvedNamespace(Seq.empty[String]), Some("defau*")))
-  }
-
-  test("show databases: FROM/IN operator is not allowed") {
-    def verify(sql: String): Unit = {
-      val exc = intercept[ParseException] { parsePlan(sql) }
-      assert(exc.getMessage.contains("FROM/IN operator is not allowed in SHOW DATABASES"))
-    }
-
-    verify("SHOW DATABASES FROM testcat.ns1.ns2")
-    verify("SHOW DATABASES IN testcat.ns1.ns2")
-  }
-
-  test("show namespaces") {
-    comparePlans(
-      parsePlan("SHOW NAMESPACES"),
-      ShowNamespaces(UnresolvedNamespace(Seq.empty[String]), None))
-    comparePlans(
-      parsePlan("SHOW NAMESPACES FROM testcat.ns1.ns2"),
-      ShowNamespaces(UnresolvedNamespace(Seq("testcat", "ns1", "ns2")), None))
-    comparePlans(
-      parsePlan("SHOW NAMESPACES IN testcat.ns1.ns2"),
-      ShowNamespaces(UnresolvedNamespace(Seq("testcat", "ns1", "ns2")), None))
-    comparePlans(
-      parsePlan("SHOW NAMESPACES IN testcat.ns1 LIKE '*pattern*'"),
-      ShowNamespaces(UnresolvedNamespace(Seq("testcat", "ns1")), Some("*pattern*")))
-  }
-
   test("analyze table statistics") {
     comparePlans(parsePlan("analyze table a.b.c compute statistics"),
       AnalyzeTable(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 0d61306628a44..5c67ad9cdfe2e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -1285,95 +1285,6 @@ class DataSourceV2SQLSuite
     }
   }
 
-  test("ShowNamespaces: show root namespaces with default v2 catalog") {
-    spark.conf.set(SQLConf.DEFAULT_CATALOG.key, "testcat")
-
-    testShowNamespaces("SHOW NAMESPACES", Seq())
-
-    spark.sql("CREATE TABLE testcat.ns1.table (id bigint) USING foo")
-    spark.sql("CREATE TABLE testcat.ns1.ns1_1.table (id bigint) USING foo")
-    spark.sql("CREATE TABLE testcat.ns2.table (id bigint) USING foo")
-
-    testShowNamespaces("SHOW NAMESPACES", Seq("ns1", "ns2"))
-    testShowNamespaces("SHOW NAMESPACES LIKE '*1*'", Seq("ns1"))
-  }
-
-  test("ShowNamespaces: show namespaces with v2 catalog") {
-    spark.sql("CREATE TABLE testcat.ns1.table (id bigint) USING foo")
-    spark.sql("CREATE TABLE testcat.ns1.ns1_1.table (id bigint) USING foo")
-    spark.sql("CREATE TABLE testcat.ns1.ns1_2.table (id bigint) USING foo")
-    spark.sql("CREATE TABLE testcat.ns2.table (id bigint) USING foo")
-    spark.sql("CREATE TABLE testcat.ns2.ns2_1.table (id bigint) USING foo")
-
-    // Look up only with catalog name, which should list root namespaces.
-    testShowNamespaces("SHOW NAMESPACES IN testcat", Seq("ns1", "ns2"))
-
-    // Look up sub-namespaces.
-    testShowNamespaces("SHOW NAMESPACES IN testcat.ns1", Seq("ns1.ns1_1", "ns1.ns1_2"))
-    testShowNamespaces("SHOW NAMESPACES IN testcat.ns1 LIKE '*2*'", Seq("ns1.ns1_2"))
-    testShowNamespaces("SHOW NAMESPACES IN testcat.ns2", Seq("ns2.ns2_1"))
-
-    // Try to look up namespaces that do not exist.
-    testShowNamespaces("SHOW NAMESPACES IN testcat.ns3", Seq())
-    testShowNamespaces("SHOW NAMESPACES IN testcat.ns1.ns3", Seq())
-  }
-
-  test("ShowNamespaces: default v2 catalog is not set") {
-    spark.sql("CREATE TABLE testcat.ns.table (id bigint) USING foo")
-
-    // The current catalog is resolved to a v2 session catalog.
-    testShowNamespaces("SHOW NAMESPACES", Seq("default"))
-  }
-
-  test("ShowNamespaces: default v2 catalog doesn't support namespace") {
-    spark.conf.set(
-      "spark.sql.catalog.testcat_no_namespace",
-      classOf[BasicInMemoryTableCatalog].getName)
-    spark.conf.set(SQLConf.DEFAULT_CATALOG.key, "testcat_no_namespace")
-
-    val exception = intercept[AnalysisException] {
-      sql("SHOW NAMESPACES")
-    }
-
-    assert(exception.getMessage.contains("does not support namespaces"))
-  }
-
-  test("ShowNamespaces: v2 catalog doesn't support namespace") {
-    spark.conf.set(
-      "spark.sql.catalog.testcat_no_namespace",
-      classOf[BasicInMemoryTableCatalog].getName)
-
-    val exception = intercept[AnalysisException] {
-      sql("SHOW NAMESPACES in testcat_no_namespace")
-    }
-
-    assert(exception.getMessage.contains("does not support namespaces"))
-  }
-
-  test("ShowNamespaces: session catalog is used and namespace doesn't exist") {
-    val exception = intercept[AnalysisException] {
-      sql("SHOW NAMESPACES in dummy")
-    }
-
-    assert(exception.getMessage.contains("Namespace 'dummy' not found"))
-  }
-
-  test("ShowNamespaces: change catalog and namespace with USE statements") {
-    sql("CREATE TABLE testcat.ns1.ns2.table (id bigint) USING foo")
-
-    // Initially, the current catalog is a v2 session catalog.
-    testShowNamespaces("SHOW NAMESPACES", Seq("default"))
-
-    // Update the current catalog to 'testcat'.
-    sql("USE testcat")
-    testShowNamespaces("SHOW NAMESPACES", Seq("ns1"))
-
-    // Update the current namespace to 'ns1'.
-    sql("USE ns1")
-    // 'SHOW NAMESPACES' is not affected by the current namespace and lists root namespaces.
-    testShowNamespaces("SHOW NAMESPACES", Seq("ns1"))
-  }
-
   private def testShowNamespaces(
       sqlText: String,
       expected: Seq[String]): Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 4e2b67e532933..946e8412cfa7a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -1277,36 +1277,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     assertUnsupported("ALTER VIEW dbx.tab1 DROP IF EXISTS PARTITION (b='2')")
   }
 
-
-  test("show databases") {
-    sql("CREATE DATABASE showdb2B")
-    sql("CREATE DATABASE showdb1A")
-
-    // check the result as well as its order
-    checkDataset(sql("SHOW DATABASES"), Row("default"), Row("showdb1a"), Row("showdb2b"))
-
-    checkAnswer(
-      sql("SHOW DATABASES LIKE '*db1A'"),
-      Row("showdb1a") :: Nil)
-
-    checkAnswer(
-      sql("SHOW DATABASES '*db1A'"),
-      Row("showdb1a") :: Nil)
-
-    checkAnswer(
-      sql("SHOW DATABASES LIKE 'showdb1A'"),
-      Row("showdb1a") :: Nil)
-
-    checkAnswer(
-      sql("SHOW DATABASES LIKE '*db1A|*db2B'"),
-      Row("showdb1a") ::
-        Row("showdb2b") :: Nil)
-
-    checkAnswer(
-      sql("SHOW DATABASES LIKE 'non-existentdb'"),
-      Nil)
-  }
-
   test("drop view - temporary view") {
     val catalog = spark.sessionState.catalog
     sql(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesParserSuite.scala
new file mode 100644
index 0000000000000..c9e5d33fea87a
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesParserSuite.scala
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedNamespace}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
+import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.catalyst.plans.logical.ShowNamespaces
+import org.apache.spark.sql.test.SharedSparkSession
+
+class ShowNamespacesParserSuite extends AnalysisTest with SharedSparkSession {
+  test("all namespaces") {
+    Seq("SHOW NAMESPACES", "SHOW DATABASES").foreach { sqlCmd =>
+      comparePlans(
+        parsePlan(sqlCmd),
+        ShowNamespaces(UnresolvedNamespace(Seq.empty[String]), None))
+    }
+  }
+
+  test("basic pattern") {
+    Seq(
+      "SHOW DATABASES LIKE 'defau*'",
+      "SHOW NAMESPACES LIKE 'defau*'").foreach { sqlCmd =>
+      comparePlans(
+        parsePlan(sqlCmd),
+        ShowNamespaces(UnresolvedNamespace(Seq.empty[String]), Some("defau*")))
+    }
+  }
+
+  test("FROM/IN operator is not allowed by SHOW DATABASES") {
+    Seq(
+      "SHOW DATABASES FROM testcat.ns1.ns2",
+      "SHOW DATABASES IN testcat.ns1.ns2").foreach { sqlCmd =>
+      val errMsg = intercept[ParseException] {
+        parsePlan(sqlCmd)
+      }.getMessage
+      assert(errMsg.contains("FROM/IN operator is not allowed in SHOW DATABASES"))
+    }
+  }
+
+  test("show namespaces in/from a namespace") {
+    comparePlans(
+      parsePlan("SHOW NAMESPACES FROM testcat.ns1.ns2"),
+      ShowNamespaces(UnresolvedNamespace(Seq("testcat", "ns1", "ns2")), None))
+    comparePlans(
+      parsePlan("SHOW NAMESPACES IN testcat.ns1.ns2"),
+      ShowNamespaces(UnresolvedNamespace(Seq("testcat", "ns1", "ns2")), None))
+  }
+
+  test("namespaces by a pattern from another namespace") {
+    comparePlans(
+      parsePlan("SHOW NAMESPACES IN testcat.ns1 LIKE '*pattern*'"),
+      ShowNamespaces(UnresolvedNamespace(Seq("testcat", "ns1")), Some("*pattern*")))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesSuiteBase.scala
new file mode 100644
index 0000000000000..790489e0d47ce
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesSuiteBase.scala
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{StringType, StructType}
+
+/**
+ * This base suite contains unified tests for the `SHOW NAMESPACES` and `SHOW DATABASES` commands
+ * that check V1 and V2 table catalogs. The tests that cannot run for all supported catalogs are
+ * located in more specific test suites:
+ *
+ *   - V2 table catalog tests: `org.apache.spark.sql.execution.command.v2.ShowNamespacesSuite`
+ *   - V1 table catalog tests: `org.apache.spark.sql.execution.command.v1.ShowNamespacesSuiteBase`
+ *     - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.ShowNamespacesSuite`
+ *     - V1 Hive External catalog: `org.apache.spark.sql.hive.execution.command.ShowNamespacesSuite`
+ */
+trait ShowNamespacesSuiteBase extends QueryTest with DDLCommandTestUtils {
+  override val command = "SHOW NAMESPACES"
+
+  protected def runShowNamespacesSql(sqlText: String, expected: Seq[String]): Unit = {
+    val df = spark.sql(sqlText)
+    assert(df.schema === new StructType().add("namespace", StringType, false))
+    checkAnswer(df, expected.map(Row(_)))
+  }
+
+  protected def builtinTopNamespaces: Seq[String] = Seq.empty
+
+  test("default namespace") {
+    withSQLConf(SQLConf.DEFAULT_CATALOG.key -> catalog) {
+      runShowNamespacesSql("SHOW NAMESPACES", builtinTopNamespaces)
+    }
+    runShowNamespacesSql(s"SHOW NAMESPACES IN $catalog", builtinTopNamespaces)
+  }
+
+  test("at the top level") {
+    withNamespace(s"$catalog.ns1", s"$catalog.ns2") {
+      sql(s"CREATE DATABASE $catalog.ns1")
+      sql(s"CREATE NAMESPACE $catalog.ns2")
+
+      runShowNamespacesSql(
+        s"SHOW NAMESPACES IN $catalog",
+        Seq("ns1", "ns2") ++ builtinTopNamespaces)
+    }
+  }
+
+  test("exact matching") {
+    withNamespace(s"$catalog.ns1", s"$catalog.ns2") {
+      sql(s"CREATE NAMESPACE $catalog.ns1")
+      sql(s"CREATE NAMESPACE $catalog.ns2")
+      Seq(
+        s"SHOW NAMESPACES IN $catalog LIKE 'ns2'",
+        s"SHOW NAMESPACES IN $catalog 'ns2'",
+        s"SHOW NAMESPACES FROM $catalog LIKE 'ns2'",
+        s"SHOW NAMESPACES FROM $catalog 'ns2'").foreach { sqlCmd =>
+        withClue(sqlCmd) {
+          runShowNamespacesSql(sqlCmd, Seq("ns2"))
+        }
+      }
+    }
+  }
+
+  test("does not match to any namespace") {
+    Seq(
+      "SHOW DATABASES LIKE 'non-existentdb'",
+      "SHOW NAMESPACES 'non-existentdb'").foreach { sqlCmd =>
+      runShowNamespacesSql(sqlCmd, Seq.empty)
+    }
+  }
+
+  test("show root namespaces with the default catalog") {
+    withSQLConf(SQLConf.DEFAULT_CATALOG.key -> catalog) {
+      runShowNamespacesSql("SHOW NAMESPACES", builtinTopNamespaces)
+
+      withNamespace("ns1", "ns2") {
+        sql(s"CREATE NAMESPACE ns1")
+        sql(s"CREATE NAMESPACE ns2")
+
+        runShowNamespacesSql("SHOW NAMESPACES", Seq("ns1", "ns2") ++ builtinTopNamespaces)
+        runShowNamespacesSql("SHOW NAMESPACES LIKE '*1*'", Seq("ns1"))
+      }
+    }
+  }
+
+  test("complex namespace patterns") {
+    withNamespace(s"$catalog.showdb2b", s"$catalog.showdb1a") {
+      sql(s"CREATE NAMESPACE $catalog.showdb2b")
+      sql(s"CREATE NAMESPACE $catalog.showdb1a")
+
+      Seq(
+        "'*db1A'" -> Seq("showdb1a"),
+        "'*2*'" -> Seq("showdb2b"),
+        "'*db1A|*db2B'" -> Seq("showdb1a", "showdb2b")
+      ).foreach { case (pattern, expected) =>
+        runShowNamespacesSql(s"SHOW NAMESPACES IN $catalog LIKE $pattern", expected)
+      }
+    }
+  }
+
+  test("change catalog and namespace with USE statements") {
+    try {
+      withNamespace(s"$catalog.ns") {
+        sql(s"CREATE NAMESPACE $catalog.ns")
+        sql(s"USE $catalog")
+        runShowNamespacesSql("SHOW NAMESPACES", Seq("ns") ++ builtinTopNamespaces)
+
+        sql("USE ns")
+        // 'SHOW NAMESPACES' is not affected by the current namespace and lists root namespaces.
+        runShowNamespacesSql("SHOW NAMESPACES", Seq("ns") ++ builtinTopNamespaces)
+      }
+    } finally {
+      spark.sessionState.catalogManager.reset()
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala
new file mode 100644
index 0000000000000..fd76ef2490f35
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.execution.command
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * This base suite contains unified tests for the `SHOW NAMESPACES` and `SHOW DATABASES` commands
+ * that check V1 table catalogs. The tests that cannot run for all V1 catalogs are located in more
+ * specific test suites:
+ *
+ *   - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.ShowNamespacesSuite`
+ *   - V1 Hive External catalog: `org.apache.spark.sql.hive.execution.command.ShowNamespacesSuite`
+ */
+trait ShowNamespacesSuiteBase extends command.ShowNamespacesSuiteBase {
+  override protected def builtinTopNamespaces: Seq[String] = Seq("default")
+
+  test("IN namespace doesn't exist") {
+    val errMsg = intercept[AnalysisException] {
+      sql("SHOW NAMESPACES in dummy")
+    }.getMessage
+    assert(errMsg.contains("Namespace 'dummy' not found"))
+  }
+}
+
+class ShowNamespacesSuite extends ShowNamespacesSuiteBase with CommandSuiteBase {
+  test("case sensitivity") {
+    Seq(true, false).foreach { caseSensitive =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        withNamespace(s"$catalog.AAA", s"$catalog.bbb") {
+          sql(s"CREATE NAMESPACE $catalog.AAA")
+          sql(s"CREATE NAMESPACE $catalog.bbb")
+          val expected = if (caseSensitive) "AAA" else "aaa"
+          runShowNamespacesSql(
+            s"SHOW NAMESPACES IN $catalog",
+            Seq(expected, "bbb") ++ builtinTopNamespaces)
+          runShowNamespacesSql(s"SHOW NAMESPACES IN $catalog LIKE 'AAA'", Seq(expected))
+          runShowNamespacesSql(s"SHOW NAMESPACES IN $catalog LIKE 'aaa'", Seq(expected))
+        }
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowNamespacesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowNamespacesSuite.scala
new file mode 100644
index 0000000000000..7a2c136eeada4
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowNamespacesSuite.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.connector.BasicInMemoryTableCatalog
+import org.apache.spark.sql.execution.command
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * The class contains tests for the `SHOW NAMESPACES` command to check V2 table catalogs.
+ */
+class ShowNamespacesSuite extends command.ShowNamespacesSuiteBase with CommandSuiteBase {
+  override def sparkConf: SparkConf = super.sparkConf
+    .set("spark.sql.catalog.testcat_no_namespace", classOf[BasicInMemoryTableCatalog].getName)
+
+  test("IN namespace doesn't exist") {
+    withSQLConf(SQLConf.DEFAULT_CATALOG.key -> catalog) {
+      runShowNamespacesSql("SHOW NAMESPACES in dummy", Seq.empty)
+    }
+    runShowNamespacesSql(s"SHOW NAMESPACES in $catalog.ns1", Seq.empty)
+    runShowNamespacesSql(s"SHOW NAMESPACES in $catalog.ns1.ns3", Seq.empty)
+  }
+
+  test("default v2 catalog doesn't support namespace") {
+    withSQLConf(SQLConf.DEFAULT_CATALOG.key -> "testcat_no_namespace") {
+      val errMsg = intercept[AnalysisException] {
+        sql("SHOW NAMESPACES")
+      }.getMessage
+      assert(errMsg.contains("does not support namespaces"))
+    }
+  }
+
+  test("v2 catalog doesn't support namespace") {
+    val errMsg = intercept[AnalysisException] {
+      sql("SHOW NAMESPACES in testcat_no_namespace")
+    }.getMessage
+    assert(errMsg.contains("does not support namespaces"))
+  }
+
+  test("case sensitivity") {
+    Seq(true, false).foreach { caseSensitive =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        withNamespace(s"$catalog.AAA", s"$catalog.bbb") {
+          sql(s"CREATE NAMESPACE $catalog.AAA")
+          sql(s"CREATE NAMESPACE $catalog.bbb")
+          runShowNamespacesSql(
+            s"SHOW NAMESPACES IN $catalog",
+            Seq("AAA", "bbb") ++ builtinTopNamespaces)
+          runShowNamespacesSql(s"SHOW NAMESPACES IN $catalog LIKE 'AAA'", Seq("AAA"))
+          runShowNamespacesSql(s"SHOW NAMESPACES IN $catalog LIKE 'aaa'", Seq("AAA"))
+        }
+      }
+    }
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowNamespacesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowNamespacesSuite.scala
new file mode 100644
index 0000000000000..eba2569c07736
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowNamespacesSuite.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution.command
+
+import org.apache.spark.sql.execution.command.v1
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * The class contains tests for the `SHOW NAMESPACES` and `SHOW DATABASES` commands to check
+ * V1 Hive external table catalog.
+ */
+class ShowNamespacesSuite extends v1.ShowNamespacesSuiteBase with CommandSuiteBase {
+  test("case sensitivity") {
+    Seq(true, false).foreach { caseSensitive =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        withNamespace(s"$catalog.AAA", s"$catalog.bbb") {
+          sql(s"CREATE NAMESPACE $catalog.AAA")
+          sql(s"CREATE NAMESPACE $catalog.bbb")
+          runShowNamespacesSql(
+            s"SHOW NAMESPACES IN $catalog",
+            Seq("aaa", "bbb") ++ builtinTopNamespaces)
+          runShowNamespacesSql(s"SHOW NAMESPACES IN $catalog LIKE 'AAA'", Seq("aaa"))
+          runShowNamespacesSql(s"SHOW NAMESPACES IN $catalog LIKE 'aaa'", Seq("aaa"))
+        }
+      }
+    }
+  }
+}

From 356fdc9a7fc88fd07751c40b920043eaebeb0abf Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Tue, 5 Jan 2021 17:20:08 +0900
Subject: [PATCH 0963/1009] [SPARK-34007][BUILD] Downgrade scala-maven-plugin
 to 4.3.0

### What changes were proposed in this pull request?

This PR is a partial revert of https://github.com/apache/spark/pull/30456 by downgrading scala-maven-plugin from 4.4.0 to 4.3.0.

Currently, when you run the docker release script (`./dev/create-release/do-release-docker.sh`), it fails to compile as below during incremental compilation with zinc for an unknown reason:

```
[INFO] Compiling 21 Scala sources and 3 Java sources to /opt/spark-rm/output/spark-3.1.0-bin-hadoop2.7/resource-managers/yarn/target/scala-2.12/test-classes ...
[ERROR] ## Exception when compiling 24 sources to /opt/spark-rm/output/spark-3.1.0-bin-hadoop2.7/resource-managers/yarn/target/scala-2.12/test-classes
java.lang.SecurityException: class "javax.servlet.SessionCookieConfig"'s signer information does not match signer information of other classes in the same package
java.lang.ClassLoader.checkCerts(ClassLoader.java:891)
java.lang.ClassLoader.preDefineClass(ClassLoader.java:661)
java.lang.ClassLoader.defineClass(ClassLoader.java:754)
java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
java.net.URLClassLoader.defineClass(URLClassLoader.java:468)
java.net.URLClassLoader.access$100(URLClassLoader.java:74)
java.net.URLClassLoader$1.run(URLClassLoader.java:369)
java.net.URLClassLoader$1.run(URLClassLoader.java:363)
java.security.AccessController.doPrivileged(Native Method)
java.net.URLClassLoader.findClass(URLClassLoader.java:362)
java.lang.ClassLoader.loadClass(ClassLoader.java:418)
java.lang.ClassLoader.loadClass(ClassLoader.java:351)
java.lang.Class.getDeclaredMethods0(Native Method)
java.lang.Class.privateGetDeclaredMethods(Class.java:2701)
java.lang.Class.privateGetPublicMethods(Class.java:2902)
java.lang.Class.getMethods(Class.java:1615)
sbt.internal.inc.ClassToAPI$.toDefinitions0(ClassToAPI.scala:170)
sbt.internal.inc.ClassToAPI$.$anonfun$toDefinitions$1(ClassToAPI.scala:123)
scala.collection.mutable.HashMap.getOrElseUpdate(HashMap.scala:86)
sbt.internal.inc.ClassToAPI$.toDefinitions(ClassToAPI.scala:123)
sbt.internal.inc.ClassToAPI$.$anonfun$process$1(ClassToAPI.scala:3
```

This happens when it builds Spark with Hadoop 2. It doesn't reproduce when you build this alone. It should follow the sequence of build in the release script.

This is fixed by downgrading. Looks like there is a regression in scala-maven-plugin somewhere between 4.4.0 and 4.3.0.

### Why are the changes needed?

To unblock the release.

### Does this PR introduce _any_ user-facing change?

No, dev-only.

### How was this patch tested?

It can be tested as below:

```bash
./dev/create-release/do-release-docker.sh -d $WORKING_DIR
```

Closes #31031 from HyukjinKwon/SPARK-34007.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 91ca0398a076e..1282d3cd8fd2b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2468,7 +2468,7 @@
         <plugin>
           <groupId>net.alchim31.maven</groupId>
           <artifactId>scala-maven-plugin</artifactId>
-          <version>4.4.0</version>
+          <version>4.3.0</version>
           <executions>
             <execution>
               <id>eclipse-add-source</id>

From 329850c667305053e4433c4c6da0e47b231302d4 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Tue, 5 Jan 2021 17:21:32 +0900
Subject: [PATCH 0964/1009] [SPARK-32017][PYTHON][FOLLOW-UP] Rename
 HADOOP_VERSION to PYSPARK_HADOOP_VERSION in pip installation option

### What changes were proposed in this pull request?

This PR is a followup of https://github.com/apache/spark/pull/29703.
It renames `HADOOP_VERSION` environment variable to `PYSPARK_HADOOP_VERSION` in case `HADOOP_VERSION` is already being used somewhere. Arguably `HADOOP_VERSION` is a pretty common name. I see here and there:
- https://www.ibm.com/support/knowledgecenter/SSZUMP_7.2.1/install_grid_sym/understanding_advanced_edition.html
- https://cwiki.apache.org/confluence/display/ARROW/HDFS+Filesystem+Support
- http://crs4.github.io/pydoop/_pydoop1/installation.html

### Why are the changes needed?

To avoid the environment variables is unexpectedly conflicted.

### Does this PR introduce _any_ user-facing change?

It renames the environment variable but it's not released yet.

### How was this patch tested?

Existing unittests will test.

Closes #31028 from HyukjinKwon/SPARK-32017-followup.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/docs/source/getting_started/install.rst | 10 +++++-----
 python/pyspark/find_spark_home.py              |  2 +-
 python/setup.py                                | 14 +++++++-------
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst
index a90f5fe159553..c5485424da664 100644
--- a/python/docs/source/getting_started/install.rst
+++ b/python/docs/source/getting_started/install.rst
@@ -48,11 +48,11 @@ If you want to install extra dependencies for a specific component, you can inst
 
     pip install pyspark[sql]
 
-For PySpark with/without a specific Hadoop version, you can install it by using ``HADOOP_VERSION`` environment variables as below:
+For PySpark with/without a specific Hadoop version, you can install it by using ``PYSPARK_HADOOP_VERSION`` environment variables as below:
 
 .. code-block:: bash
 
-    HADOOP_VERSION=2.7 pip install pyspark
+    PYSPARK_HADOOP_VERSION=2.7 pip install pyspark
 
 The default distribution uses Hadoop 3.2 and Hive 2.3. If users specify different versions of Hadoop, the pip installation automatically
 downloads a different version and use it in PySpark. Downloading it can take a while depending on
@@ -60,15 +60,15 @@ the network and the mirror chosen. ``PYSPARK_RELEASE_MIRROR`` can be set to manu
 
 .. code-block:: bash
 
-    PYSPARK_RELEASE_MIRROR=http://mirror.apache-kr.org HADOOP_VERSION=2.7 pip install
+    PYSPARK_RELEASE_MIRROR=http://mirror.apache-kr.org PYSPARK_HADOOP_VERSION=2.7 pip install
 
 It is recommended to use ``-v`` option in ``pip`` to track the installation and download status.
 
 .. code-block:: bash
 
-    HADOOP_VERSION=2.7 pip install pyspark -v
+    PYSPARK_HADOOP_VERSION=2.7 pip install pyspark -v
 
-Supported values in ``HADOOP_VERSION`` are:
+Supported values in ``PYSPARK_HADOOP_VERSION`` are:
 
 - ``without``: Spark pre-built with user-provided Apache Hadoop
 - ``2.7``: Spark pre-built for Apache Hadoop 2.7
diff --git a/python/pyspark/find_spark_home.py b/python/pyspark/find_spark_home.py
index 4521a36503a16..62a36d42ebc72 100755
--- a/python/pyspark/find_spark_home.py
+++ b/python/pyspark/find_spark_home.py
@@ -36,7 +36,7 @@ def is_spark_home(path):
                 (os.path.isdir(os.path.join(path, "jars")) or
                  os.path.isdir(os.path.join(path, "assembly"))))
 
-    # Spark distribution can be downloaded when HADOOP_VERSION environment variable is set.
+    # Spark distribution can be downloaded when PYSPARK_HADOOP_VERSION environment variable is set.
     # We should look up this directory first, see also SPARK-32017.
     spark_dist_dir = "spark-distribution"
     paths = [
diff --git a/python/setup.py b/python/setup.py
index 7bb8a00171d37..c7f195b89aa7a 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -125,16 +125,16 @@ def run(self):
         spark_dist = os.path.join(self.install_lib, "pyspark", "spark-distribution")
         rmtree(spark_dist, ignore_errors=True)
 
-        if ("HADOOP_VERSION" in os.environ) or ("HIVE_VERSION" in os.environ):
-            # Note that SPARK_VERSION environment is just a testing purpose.
-            # HIVE_VERSION environment variable is also internal for now in case
+        if ("PYSPARK_HADOOP_VERSION" in os.environ) or ("PYSPARK_HIVE_VERSION" in os.environ):
+            # Note that PYSPARK_VERSION environment is just a testing purpose.
+            # PYSPARK_HIVE_VERSION environment variable is also internal for now in case
             # we support another version of Hive in the future.
             spark_version, hadoop_version, hive_version = install_module.checked_versions(
-                os.environ.get("SPARK_VERSION", VERSION).lower(),
-                os.environ.get("HADOOP_VERSION", install_module.DEFAULT_HADOOP).lower(),
-                os.environ.get("HIVE_VERSION", install_module.DEFAULT_HIVE).lower())
+                os.environ.get("PYSPARK_VERSION", VERSION).lower(),
+                os.environ.get("PYSPARK_HADOOP_VERSION", install_module.DEFAULT_HADOOP).lower(),
+                os.environ.get("PYSPARK_HIVE_VERSION", install_module.DEFAULT_HIVE).lower())
 
-            if ("SPARK_VERSION" not in os.environ and
+            if ("PYSPARK_VERSION" not in os.environ and
                 ((install_module.DEFAULT_HADOOP, install_module.DEFAULT_HIVE) ==
                     (hadoop_version, hive_version))):
                 # Do not download and install if they are same as default.

From acf0a4fac2983a89c663d1622bf03a2e5929d121 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Tue, 5 Jan 2021 19:03:28 +0900
Subject: [PATCH 0965/1009] [SPARK-33999][BUILD] Make sbt unidoc success with
 JDK11

### What changes were proposed in this pull request?

This PR fixes an issue that `sbt unidoc` fails with JDK11.
With the current master, `sbt unidoc` fails because the generated Java sources cause syntax error.
As of JDK11, the default doclet seems to refuse such syntax error.

Usually, it's enough to specify  `--ignore-source-errors` option when `javadoc` runs to suppress the syntax error but unfortunately, we will then get an internal error.

```
[error] javadoc: error - An internal exception has occurred.
[error] 	(java.lang.NullPointerException)
[error] Please file a bug against the javadoc tool via the Java bug reporting page
[error] (http://bugreport.java.com) after checking the Bug Database (http://bugs.java.com)
[error] for duplicates. Include error messages and the following diagnostic in your report. Thank you.
[error] java.lang.NullPointerException
[error] 	at jdk.compiler/com.sun.tools.javac.code.Types.erasure(Types.java:2340)
[error] 	at jdk.compiler/com.sun.tools.javac.code.Types$14.visitTypeVar(Types.java:2398)
[error] 	at jdk.compiler/com.sun.tools.javac.code.Types$14.visitTypeVar(Types.java:2348)
[error] 	at jdk.compiler/com.sun.tools.javac.code.Type$TypeVar.accept(Type.java:1659)
[error] 	at jdk.compiler/com.sun.tools.javac.code.Types$DefaultTypeVisitor.visit(Types.java:4857)
[error] 	at jdk.compiler/com.sun.tools.javac.code.Types.erasure(Types.java:2343)
[error] 	at jdk.compiler/com.sun.tools.javac.code.Types.erasure(Types.java:2329)
[error] 	at jdk.compiler/com.sun.tools.javac.model.JavacTypes.erasure(JavacTypes.java:134)
[error] 	at jdk.javadoc/jdk.javadoc.internal.doclets.toolkit.util.Utils$5.visitTypeVariable(Utils.java:1069)
[error] 	at jdk.javadoc/jdk.javadoc.internal.doclets.toolkit.util.Utils$5.visitTypeVariable(Utils.java:1048)
[error] 	at jdk.compiler/com.sun.tools.javac.code.Type$TypeVar.accept(Type.java:1695)
[error] 	at java.compiler11.0.9.1/javax.lang.model.util.AbstractTypeVisitor6.visit(AbstractTypeVisitor6.java:104)
[error] 	at jdk.javadoc/jdk.javadoc.internal.doclets.toolkit.util.Utils.asTypeElement(Utils.java:1086)
[error] 	at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.LinkInfoImpl.setContext(LinkInfoImpl.java:410)
[error] 	at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.LinkInfoImpl.<init>(LinkInfoImpl.java:285)
[error] 	at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.LinkFactoryImpl.getTypeParameterLink(LinkFactoryImpl.java:184)
[error] 	at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.LinkFactoryImpl.getTypeParameterLinks(LinkFactoryImpl.java:167)
[error] 	at jdk.javadoc/jdk.javadoc.internal.doclets.toolkit.util.links.LinkFactory.getLink(LinkFactory.java:196)
[error] 	at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.HtmlDocletWriter.getLink(HtmlDocletWriter.java:679)
[error] 	at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.HtmlDocletWriter.addPreQualifiedClassLink(HtmlDocletWriter.java:814)
[error] 	at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.HtmlDocletWriter.addPreQualifiedStrongClassLink(HtmlDocletWriter.java:839)
[error] 	at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.AbstractTreeWriter.addPartialInfo(AbstractTreeWriter.java:185)
[error] 	at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.AbstractTreeWriter.addLevelInfo(AbstractTreeWriter.java:92)
[error] 	at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.AbstractTreeWriter.addLevelInfo(AbstractTreeWriter.java:94)
[error] 	at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.AbstractTreeWriter.addTree(AbstractTreeWriter.java:129)
[error] 	at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.AbstractTreeWriter.addTree(AbstractTreeWriter.java:112)
[error] 	at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.PackageTreeWriter.generatePackageTreeFile(PackageTreeWriter.java:115)
[error] 	at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.PackageTreeWriter.generate(PackageTreeWriter.java:92)
[error] 	at jdk.javadoc/jdk.javadoc.internal.doclets.formats.html.HtmlDoclet.generatePackageFiles(HtmlDoclet.java:312)
[error] 	at jdk.javadoc/jdk.javadoc.internal.doclets.toolkit.AbstractDoclet.startGeneration(AbstractDoclet.java:210)
[error] 	at jdk.javadoc/jdk.javadoc.internal.doclets.toolkit.AbstractDoclet.run(AbstractDoclet.java:114)
[error] 	at jdk.javadoc/jdk.javadoc.doclet.StandardDoclet.run(StandardDoclet.java:72)
[error] 	at jdk.javadoc/jdk.javadoc.internal.tool.Start.parseAndExecute(Start.java:588)
[error] 	at jdk.javadoc/jdk.javadoc.internal.tool.Start.begin(Start.java:432)
[error] 	at jdk.javadoc/jdk.javadoc.internal.tool.Start.begin(Start.java:345)
[error] 	at jdk.javadoc/jdk.javadoc.internal.tool.Main.execute(Main.java:63)
[error] 	at jdk.javadoc/jdk.javadoc.internal.tool.Main.main(Main.java:52)
```

I found the internal error happens when a generated Java class is from a Scala class which is package private and generic.
I also found that if we don't generate class hierarchy tree in the JavaDoc, we can suppress the internal error for JDK11 and later.

### Why are the changes needed?

Make the build success with sbt and JDK11.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

I confirmed the following command successfully finish with JDK8 and JDK11.
```
$ build/sbt -Phive -Phive-thriftserver -Pyarn -Pkubernetes -Pmesos -Pspark-ganglia-lgpl -Pkinesis-asl -Phadoop-cloud clean unidoc
```

I also confirmed html files are successfully generated under `target/javaunidoc`.

Closes #31023 from sarutak/fix-genjavadoc-java11.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 project/SparkBuild.scala | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index aa3e2cd65e185..668701be0ae98 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -958,18 +958,24 @@ object Unidoc {
         .map(_.filterNot(_.getCanonicalPath.contains("org/apache/hadoop")))
     },
 
-    javacOptions in (JavaUnidoc, unidoc) := Seq(
-      "-windowtitle", "Spark " + version.value.replaceAll("-SNAPSHOT", "") + " JavaDoc",
-      "-public",
-      "-noqualifier", "java.lang",
-      "-tag", """example:a:Example\:""",
-      "-tag", """note:a:Note\:""",
-      "-tag", "group:X",
-      "-tag", "tparam:X",
-      "-tag", "constructor:X",
-      "-tag", "todo:X",
-      "-tag", "groupname:X"
-    ),
+    javacOptions in (JavaUnidoc, unidoc) := {
+      val versionParts = System.getProperty("java.version").split("[+.\\-]+", 3)
+      var major = versionParts(0).toInt
+      if (major == 1) major = versionParts(1).toInt
+
+      Seq(
+        "-windowtitle", "Spark " + version.value.replaceAll("-SNAPSHOT", "") + " JavaDoc",
+        "-public",
+        "-noqualifier", "java.lang",
+        "-tag", """example:a:Example\:""",
+        "-tag", """note:a:Note\:""",
+        "-tag", "group:X",
+        "-tag", "tparam:X",
+        "-tag", "constructor:X",
+        "-tag", "todo:X",
+        "-tag", "groupname:X",
+      ) ++ { if (major >= 9) Seq("--ignore-source-errors", "-notree") else Seq.empty }
+    },
 
     // Use GitHub repository for Scaladoc source links
     unidocSourceBase := s"https://github.com/apache/spark/tree/v${version.value}",

From 8d09f9649510bf5d812c82b04f7711b9252a7db0 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Tue, 5 Jan 2021 19:48:10 +0900
Subject: [PATCH 0966/1009] [SPARK-34010][SQL][DODCS] Use python3 instead of
 python in SQL documentation build

### What changes were proposed in this pull request?

This PR proposes to use python3 instead of python in SQL documentation build.
After SPARK-29672, we use `sql/create-docs.sh` everywhere in Spark dev. We should fix it in `sql/create-docs.sh` too.
This blocks release because the release container does not have `python` but only `python3`.

### Why are the changes needed?

To unblock the release.

### Does this PR introduce _any_ user-facing change?

No, dev-only.

### How was this patch tested?

I manually ran the script

Closes #31041 from HyukjinKwon/SPARK-34010.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 sql/create-docs.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sql/create-docs.sh b/sql/create-docs.sh
index 6614c714e90c7..8721df874ee73 100755
--- a/sql/create-docs.sh
+++ b/sql/create-docs.sh
@@ -27,14 +27,14 @@ set -e
 FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
 SPARK_HOME="$(cd "`dirname "${BASH_SOURCE[0]}"`"/..; pwd)"
 
-if ! hash python 2>/dev/null; then
-  echo "Missing python in your path, skipping SQL documentation generation."
+if ! hash python3 2>/dev/null; then
+  echo "Missing python3 in your path, skipping SQL documentation generation."
   exit 0
 fi
 
 if ! hash mkdocs 2>/dev/null; then
   echo "Missing mkdocs in your path, trying to install mkdocs for SQL documentation generation."
-  pip install mkdocs
+  pip3 install mkdocs
 fi
 
 pushd "$FWDIR" > /dev/null

From 14c2edae7e8e02e18a24862a6c113b02719d4785 Mon Sep 17 00:00:00 2001
From: huangtianhua <huangtianhua223@gmail.com>
Date: Tue, 5 Jan 2021 21:50:21 +0900
Subject: [PATCH 0967/1009] [SPARK-34009][BUILD] To activate profile 'aarch64'
 based on OS settings

Instead of taking parameter '-Paarch64' when maven build
to activate the profile based on OS settings automatically,
than we can use same command to build on aarch64.

### What changes were proposed in this pull request?
Activate profile 'aarch64' based on OS

### Why are the changes needed?
After this change, we build spark using the same command for aarch64 as x86.

### Does this PR introduce _any_ user-facing change?
No.
After this change, no need to taking parameter '-Paarch64' when build, but take the parameter works also.

### How was this patch tested?
ARM daily CI.

Closes #31036 from huangtianhua/SPARK-34009.

Authored-by: huangtianhua <huangtianhua223@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 pom.xml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pom.xml b/pom.xml
index 1282d3cd8fd2b..07c18f78e0735 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3371,6 +3371,12 @@
       <properties>
         <leveldbjni.group>org.openlabtesting.leveldbjni</leveldbjni.group>
       </properties>
+      <activation>
+        <os>
+          <family>linux</family>
+          <arch>aarch64</arch>
+        </os>
+      </activation>
     </profile>
   </profiles>
 </project>

From cc1d9d25fb4c2e4af912d6f9802de8f351c32deb Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Tue, 5 Jan 2021 16:15:33 +0000
Subject: [PATCH 0968/1009] [SPARK-33542][SQL] Group exception messages in
 catalyst/catalog

### What changes were proposed in this pull request?
This PR group exception messages in `/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog`.

### Why are the changes needed?
It will largely help with standardization of error messages and its maintenance.

### Does this PR introduce _any_ user-facing change?
No. Error messages remain unchanged.

### How was this patch tested?
No new tests - pass all original tests to make sure it doesn't break any existing behavior.

Closes #30870 from beliefer/SPARK-33542.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: Jiaan Geng <beliefer@163.com>
Co-authored-by: beliefer <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/QueryCompilationErrors.scala    | 170 +++++++++++++++++-
 .../spark/sql/QueryExecutionErrors.scala      |  56 ++++++
 .../catalog/GlobalTempViewManager.scala       |   5 +-
 .../catalyst/catalog/InMemoryCatalog.scala    |  42 ++---
 .../sql/catalyst/catalog/SessionCatalog.scala |  78 ++++----
 .../catalyst/catalog/functionResources.scala  |   4 +-
 .../sql/catalyst/catalog/interface.scala      |  45 ++---
 7 files changed, 295 insertions(+), 105 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala
index ff4c54df96f31..ed18e94f46ecc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryCompilationErrors.scala
@@ -17,8 +17,10 @@
 
 package org.apache.spark.sql.errors
 
+import org.apache.hadoop.fs.Path
+
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.{FunctionIdentifier, QualifiedTableName, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.{ResolvedNamespace, ResolvedView}
 import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, Expression, GroupingID, NamedExpression, SpecifiedWindowFrame, WindowFrame, WindowFunction, WindowSpecDefinition}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SerdeInfo}
@@ -364,10 +366,16 @@ object QueryCompilationErrors {
     new AnalysisException(s"undefined function $name")
   }
 
+  def invalidFunctionArgumentsError(
+      name: String, expectedInfo: String, actualNumber: Int): Throwable = {
+    new AnalysisException(s"Invalid number of arguments for function $name. " +
+      s"Expected: $expectedInfo; Found: $actualNumber")
+  }
+
   def invalidFunctionArgumentNumberError(
       validParametersCount: Seq[Int], name: String, params: Seq[Class[Expression]]): Throwable = {
-    val invalidArgumentsMsg = if (validParametersCount.length == 0) {
-      s"Invalid arguments for function $name"
+    if (validParametersCount.length == 0) {
+      new AnalysisException(s"Invalid arguments for function $name")
     } else {
       val expectedNumberOfParameters = if (validParametersCount.length == 1) {
         validParametersCount.head.toString
@@ -375,10 +383,8 @@ object QueryCompilationErrors {
         validParametersCount.init.mkString("one of ", ", ", " and ") +
           validParametersCount.last
       }
-      s"Invalid number of arguments for function $name. " +
-        s"Expected: $expectedNumberOfParameters; Found: ${params.length}"
+      invalidFunctionArgumentsError(name, expectedNumberOfParameters, params.length)
     }
-    new AnalysisException(invalidArgumentsMsg)
   }
 
   def functionAcceptsOnlyOneArgumentError(name: String): Throwable = {
@@ -504,4 +510,156 @@ object QueryCompilationErrors {
   def columnDoesNotExistError(colName: String): Throwable = {
     new AnalysisException(s"Column $colName does not exist")
   }
+
+  def renameTempViewToExistingViewError(oldName: String, newName: String): Throwable = {
+    new AnalysisException(
+      s"rename temporary view from '$oldName' to '$newName': destination view already exists")
+  }
+
+  def databaseNotEmptyError(db: String, details: String): Throwable = {
+    new AnalysisException(s"Database $db is not empty. One or more $details exist.")
+  }
+
+  def invalidNameForTableOrDatabaseError(name: String): Throwable = {
+    new AnalysisException(s"`$name` is not a valid name for tables/databases. " +
+      "Valid names only contain alphabet characters, numbers and _.")
+  }
+
+  def cannotCreateDatabaseWithSameNameAsPreservedDatabaseError(database: String): Throwable = {
+    new AnalysisException(s"$database is a system preserved database, " +
+      "you cannot create a database with this name.")
+  }
+
+  def cannotDropDefaultDatabaseError(): Throwable = {
+    new AnalysisException("Can not drop default database")
+  }
+
+  def cannotUsePreservedDatabaseAsCurrentDatabaseError(database: String): Throwable = {
+    new AnalysisException(s"$database is a system preserved database, you cannot use it as " +
+      "current database. To access global temporary views, you should use qualified name with " +
+      s"the GLOBAL_TEMP_DATABASE, e.g. SELECT * FROM $database.viewName.")
+  }
+
+  def createExternalTableWithoutLocationError(): Throwable = {
+    new AnalysisException("CREATE EXTERNAL TABLE must be accompanied by LOCATION")
+  }
+
+  def cannotOperateManagedTableWithExistingLocationError(
+      methodName: String, tableIdentifier: TableIdentifier, tableLocation: Path): Throwable = {
+    new AnalysisException(s"Can not $methodName the managed table('$tableIdentifier')" +
+      s". The associated location('${tableLocation.toString}') already exists.")
+  }
+
+  def dropNonExistentColumnsNotSupportedError(
+      nonExistentColumnNames: Seq[String]): Throwable = {
+    new AnalysisException(
+      s"""
+         |Some existing schema fields (${nonExistentColumnNames.mkString("[", ",", "]")}) are
+         |not present in the new schema. We don't support dropping columns yet.
+         """.stripMargin)
+  }
+
+  def cannotRetrieveTableOrViewNotInSameDatabaseError(
+      qualifiedTableNames: Seq[QualifiedTableName]): Throwable = {
+    new AnalysisException("Only the tables/views belong to the same database can be retrieved. " +
+      s"Querying tables/views are $qualifiedTableNames")
+  }
+
+  def renameTableSourceAndDestinationMismatchError(db: String, newDb: String): Throwable = {
+    new AnalysisException(
+      s"RENAME TABLE source and destination databases do not match: '$db' != '$newDb'")
+  }
+
+  def cannotRenameTempViewWithDatabaseSpecifiedError(
+      oldName: TableIdentifier, newName: TableIdentifier): Throwable = {
+    new AnalysisException(s"RENAME TEMPORARY VIEW from '$oldName' to '$newName': cannot " +
+      s"specify database name '${newName.database.get}' in the destination table")
+  }
+
+  def cannotRenameTempViewToExistingTableError(
+      oldName: TableIdentifier, newName: TableIdentifier): Throwable = {
+    new AnalysisException(s"RENAME TEMPORARY VIEW from '$oldName' to '$newName': " +
+      "destination table already exists")
+  }
+
+  def invalidPartitionSpecError(details: String): Throwable = {
+    new AnalysisException(s"Partition spec is invalid. $details")
+  }
+
+  def functionAlreadyExistsError(func: FunctionIdentifier): Throwable = {
+    new AnalysisException(s"Function $func already exists")
+  }
+
+  def cannotLoadClassWhenRegisteringFunctionError(
+      className: String, func: FunctionIdentifier): Throwable = {
+    new AnalysisException(s"Can not load class '$className' when registering " +
+      s"the function '$func', please make sure it is on the classpath")
+  }
+
+  def v2CatalogNotSupportFunctionError(
+      catalog: String, namespace: Seq[String]): Throwable = {
+    new AnalysisException("V2 catalog does not support functions yet. " +
+      s"catalog: $catalog, namespace: '${namespace.quoted}'")
+  }
+
+  def resourceTypeNotSupportedError(resourceType: String): Throwable = {
+    new AnalysisException(s"Resource Type '$resourceType' is not supported.")
+  }
+
+  def tableNotSpecifyDatabaseError(identifier: TableIdentifier): Throwable = {
+    new AnalysisException(s"table $identifier did not specify database")
+  }
+
+  def tableNotSpecifyLocationUriError(identifier: TableIdentifier): Throwable = {
+    new AnalysisException(s"table $identifier did not specify locationUri")
+  }
+
+  def partitionNotSpecifyLocationUriError(specString: String): Throwable = {
+    new AnalysisException(s"Partition [$specString] did not specify locationUri")
+  }
+
+  def invalidBucketNumberError(bucketingMaxBuckets: Int, numBuckets: Int): Throwable = {
+    new AnalysisException(
+      s"Number of buckets should be greater than 0 but less than or equal to " +
+        s"bucketing.maxBuckets (`$bucketingMaxBuckets`). Got `$numBuckets`")
+  }
+
+  def corruptedTableNameContextInCatalogError(numParts: Int, index: Int): Throwable = {
+    new AnalysisException("Corrupted table name context in catalog: " +
+      s"$numParts parts expected, but part $index is missing.")
+  }
+
+  def corruptedViewSQLConfigsInCatalogError(e: Exception): Throwable = {
+    new AnalysisException("Corrupted view SQL configs in catalog", cause = Some(e))
+  }
+
+  def corruptedViewQueryOutputColumnsInCatalogError(numCols: String, index: Int): Throwable = {
+    new AnalysisException("Corrupted view query output column names in catalog: " +
+      s"$numCols parts expected, but part $index is missing.")
+  }
+
+  def corruptedViewReferredTempViewInCatalogError(e: Exception): Throwable = {
+    new AnalysisException("corrupted view referred temp view names in catalog", cause = Some(e))
+  }
+
+  def corruptedViewReferredTempFunctionsInCatalogError(e: Exception): Throwable = {
+    new AnalysisException(
+      "corrupted view referred temp functions names in catalog", cause = Some(e))
+  }
+
+  def columnStatisticsDeserializationNotSupportedError(
+      name: String, dataType: DataType): Throwable = {
+    new AnalysisException("Column statistics deserialization is not supported for " +
+      s"column $name of data type: $dataType.")
+  }
+
+  def columnStatisticsSerializationNotSupportedError(
+      colName: String, dataType: DataType): Throwable = {
+    new AnalysisException("Column statistics serialization is not supported for " +
+      s"column $colName of data type: $dataType.")
+  }
+
+  def cannotReadCorruptedTablePropertyError(key: String, details: String = ""): Throwable = {
+    new AnalysisException(s"Cannot read table property '$key' as it's corrupted.$details")
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryExecutionErrors.scala
index d24e61c699241..61dcddb979a1b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/QueryExecutionErrors.scala
@@ -17,7 +17,13 @@
 
 package org.apache.spark.sql.errors
 
+import java.io.IOException
+
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.analysis.UnresolvedGenerator
+import org.apache.spark.sql.catalyst.catalog.CatalogDatabase
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan}
 
 /**
@@ -56,4 +62,54 @@ object QueryExecutionErrors {
   def cannotTerminateGeneratorError(generator: UnresolvedGenerator): Throwable = {
     new UnsupportedOperationException(s"Cannot terminate expression: $generator")
   }
+
+  def unableToCreateDatabaseAsFailedToCreateDirectoryError(
+      dbDefinition: CatalogDatabase, e: IOException): Throwable = {
+    new SparkException(s"Unable to create database ${dbDefinition.name} as failed " +
+      s"to create its directory ${dbDefinition.locationUri}", e)
+  }
+
+  def unableToDropDatabaseAsFailedToDeleteDirectoryError(
+      dbDefinition: CatalogDatabase, e: IOException): Throwable = {
+    new SparkException(s"Unable to drop database ${dbDefinition.name} as failed " +
+      s"to delete its directory ${dbDefinition.locationUri}", e)
+  }
+
+  def unableToCreateTableAsFailedToCreateDirectoryError(
+      table: String, defaultTableLocation: Path, e: IOException): Throwable = {
+    new SparkException(s"Unable to create table $table as failed " +
+      s"to create its directory $defaultTableLocation", e)
+  }
+
+  def unableToDeletePartitionPathError(partitionPath: Path, e: IOException): Throwable = {
+    new SparkException(s"Unable to delete partition path $partitionPath", e)
+  }
+
+  def unableToDropTableAsFailedToDeleteDirectoryError(
+      table: String, dir: Path, e: IOException): Throwable = {
+    new SparkException(s"Unable to drop table $table as failed " +
+      s"to delete its directory $dir", e)
+  }
+
+  def unableToRenameTableAsFailedToRenameDirectoryError(
+      oldName: String, newName: String, oldDir: Path, e: IOException): Throwable = {
+    new SparkException(s"Unable to rename table $oldName to $newName as failed " +
+      s"to rename its directory $oldDir", e)
+  }
+
+  def unableToCreatePartitionPathError(partitionPath: Path, e: IOException): Throwable = {
+    new SparkException(s"Unable to create partition path $partitionPath", e)
+  }
+
+  def unableToRenamePartitionPathError(oldPartPath: Path, e: IOException): Throwable = {
+    new SparkException(s"Unable to rename partition path $oldPartPath", e)
+  }
+
+  def methodNotImplementedError(methodName: String): Throwable = {
+    new UnsupportedOperationException(s"$methodName is not implemented")
+  }
+
+  def tableStatsNotSpecifiedError(): Throwable = {
+    new IllegalStateException("table stats must be specified.")
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/GlobalTempViewManager.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/GlobalTempViewManager.scala
index 6095ac0bc9c50..c7bd2a4cd800d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/GlobalTempViewManager.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/GlobalTempViewManager.scala
@@ -21,10 +21,10 @@ import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable
 
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.TempTableAlreadyExistsException
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util.StringUtils
+import org.apache.spark.sql.errors.QueryCompilationErrors
 
 
 /**
@@ -92,8 +92,7 @@ class GlobalTempViewManager(val database: String) {
   def rename(oldName: String, newName: String): Boolean = synchronized {
     if (viewDefinitions.contains(oldName)) {
       if (viewDefinitions.contains(newName)) {
-        throw new AnalysisException(
-          s"rename temporary view from '$oldName' to '$newName': destination view already exists")
+        throw QueryCompilationErrors.renameTempViewToExistingViewError(oldName, newName)
       }
 
       val viewDefinition = viewDefinitions(oldName)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
index 31644a5ae4e35..64b4a112fe786 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
@@ -24,13 +24,13 @@ import scala.collection.mutable
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.{SparkConf, SparkException}
-import org.apache.spark.sql.AnalysisException
+import org.apache.spark.SparkConf
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils._
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.util.StringUtils
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -112,8 +112,8 @@ class InMemoryCatalog(
         fs.mkdirs(location)
       } catch {
         case e: IOException =>
-          throw new SparkException(s"Unable to create database ${dbDefinition.name} as failed " +
-            s"to create its directory ${dbDefinition.locationUri}", e)
+          throw QueryExecutionErrors.unableToCreateDatabaseAsFailedToCreateDirectoryError(
+            dbDefinition, e)
       }
       catalog.put(dbDefinition.name, new DatabaseDesc(dbDefinition))
     }
@@ -127,10 +127,10 @@ class InMemoryCatalog(
       if (!cascade) {
         // If cascade is false, make sure the database is empty.
         if (catalog(db).tables.nonEmpty) {
-          throw new AnalysisException(s"Database $db is not empty. One or more tables exist.")
+          throw QueryCompilationErrors.databaseNotEmptyError(db, "tables")
         }
         if (catalog(db).functions.nonEmpty) {
-          throw new AnalysisException(s"Database '$db' is not empty. One or more functions exist.")
+          throw QueryCompilationErrors.databaseNotEmptyError(db, "functions")
         }
       }
       // Remove the database.
@@ -141,8 +141,8 @@ class InMemoryCatalog(
         fs.delete(location, true)
       } catch {
         case e: IOException =>
-          throw new SparkException(s"Unable to drop database ${dbDefinition.name} as failed " +
-            s"to delete its directory ${dbDefinition.locationUri}", e)
+          throw QueryExecutionErrors.unableToDropDatabaseAsFailedToDeleteDirectoryError(
+            dbDefinition, e)
       }
       catalog.remove(db)
     } else {
@@ -209,8 +209,8 @@ class InMemoryCatalog(
           fs.mkdirs(defaultTableLocation)
         } catch {
           case e: IOException =>
-            throw new SparkException(s"Unable to create table $table as failed " +
-              s"to create its directory $defaultTableLocation", e)
+            throw QueryExecutionErrors.unableToCreateTableAsFailedToCreateDirectoryError(
+              table, defaultTableLocation, e)
         }
         tableDefinition.withNewStorage(locationUri = Some(defaultTableLocation.toUri))
       } else {
@@ -239,7 +239,7 @@ class InMemoryCatalog(
             fs.delete(partitionPath, true)
           } catch {
             case e: IOException =>
-              throw new SparkException(s"Unable to delete partition path $partitionPath", e)
+              throw QueryExecutionErrors.unableToDeletePartitionPathError(partitionPath, e)
           }
         }
         assert(tableMeta.storage.locationUri.isDefined,
@@ -252,8 +252,8 @@ class InMemoryCatalog(
           fs.delete(dir, true)
         } catch {
           case e: IOException =>
-            throw new SparkException(s"Unable to drop table $table as failed " +
-              s"to delete its directory $dir", e)
+            throw QueryExecutionErrors.unableToDropTableAsFailedToDeleteDirectoryError(
+              table, dir, e)
         }
       }
       catalog(db).tables.remove(table)
@@ -284,8 +284,8 @@ class InMemoryCatalog(
         fs.rename(oldDir, newDir)
       } catch {
         case e: IOException =>
-          throw new SparkException(s"Unable to rename table $oldName to $newName as failed " +
-            s"to rename its directory $oldDir", e)
+          throw QueryExecutionErrors.unableToRenameTableAsFailedToRenameDirectoryError(
+            oldName, newName, oldDir, e)
       }
       oldDesc.table = oldDesc.table.withNewStorage(locationUri = Some(newDir.toUri))
     }
@@ -358,7 +358,7 @@ class InMemoryCatalog(
       loadPath: String,
       isOverwrite: Boolean,
       isSrcLocal: Boolean): Unit = {
-    throw new UnsupportedOperationException("loadTable is not implemented")
+    throw QueryExecutionErrors.methodNotImplementedError("loadTable")
   }
 
   override def loadPartition(
@@ -369,7 +369,7 @@ class InMemoryCatalog(
       isOverwrite: Boolean,
       inheritTableSpecs: Boolean,
       isSrcLocal: Boolean): Unit = {
-    throw new UnsupportedOperationException("loadPartition is not implemented.")
+    throw QueryExecutionErrors.methodNotImplementedError("loadPartition")
   }
 
   override def loadDynamicPartitions(
@@ -379,7 +379,7 @@ class InMemoryCatalog(
       partition: TablePartitionSpec,
       replace: Boolean,
       numDP: Int): Unit = {
-    throw new UnsupportedOperationException("loadDynamicPartitions is not implemented.")
+    throw QueryExecutionErrors.methodNotImplementedError("loadDynamicPartitions")
   }
 
   // --------------------------------------------------------------------------
@@ -416,7 +416,7 @@ class InMemoryCatalog(
         }
       } catch {
         case e: IOException =>
-          throw new SparkException(s"Unable to create partition path $partitionPath", e)
+          throw QueryExecutionErrors.unableToCreatePartitionPathError(partitionPath, e)
       }
 
       existingParts.put(
@@ -457,7 +457,7 @@ class InMemoryCatalog(
           fs.delete(partitionPath, true)
         } catch {
           case e: IOException =>
-            throw new SparkException(s"Unable to delete partition path $partitionPath", e)
+            throw QueryExecutionErrors.unableToDeletePartitionPathError(partitionPath, e)
         }
       }
       existingParts.remove(p)
@@ -490,7 +490,7 @@ class InMemoryCatalog(
           fs.rename(oldPartPath, newPartPath)
         } catch {
           case e: IOException =>
-            throw new SparkException(s"Unable to rename partition path $oldPartPath", e)
+            throw QueryExecutionErrors.unableToRenamePartitionPathError(oldPartPath, e)
         }
         oldPartition.copy(
           spec = newSpec,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 9b542d6bd95ce..5f7028bf87c87 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -40,6 +40,7 @@ import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParserInterface}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias, View}
 import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, StringUtils}
 import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE
 import org.apache.spark.sql.types.StructType
@@ -120,8 +121,7 @@ class SessionCatalog(
    */
   private def validateName(name: String): Unit = {
     if (!validNameFormat.pattern.matcher(name).matches()) {
-      throw new AnalysisException(s"`$name` is not a valid name for tables/databases. " +
-        "Valid names only contain alphabet characters, numbers and _.")
+      throw QueryCompilationErrors.invalidNameForTableOrDatabaseError(name)
     }
   }
 
@@ -216,9 +216,8 @@ class SessionCatalog(
   def createDatabase(dbDefinition: CatalogDatabase, ignoreIfExists: Boolean): Unit = {
     val dbName = formatDatabaseName(dbDefinition.name)
     if (dbName == globalTempViewManager.database) {
-      throw new AnalysisException(
-        s"${globalTempViewManager.database} is a system preserved database, " +
-          "you cannot create a database with this name.")
+      throw QueryCompilationErrors.cannotCreateDatabaseWithSameNameAsPreservedDatabaseError(
+        globalTempViewManager.database)
     }
     validateName(dbName)
     externalCatalog.createDatabase(
@@ -238,7 +237,7 @@ class SessionCatalog(
   def dropDatabase(db: String, ignoreIfNotExists: Boolean, cascade: Boolean): Unit = {
     val dbName = formatDatabaseName(db)
     if (dbName == DEFAULT_DATABASE) {
-      throw new AnalysisException(s"Can not drop default database")
+      throw QueryCompilationErrors.cannotDropDefaultDatabaseError
     }
     if (cascade && databaseExists(dbName)) {
       listTables(dbName).foreach { t =>
@@ -279,11 +278,8 @@ class SessionCatalog(
   def setCurrentDatabase(db: String): Unit = {
     val dbName = formatDatabaseName(db)
     if (dbName == globalTempViewManager.database) {
-      throw new AnalysisException(
-        s"${globalTempViewManager.database} is a system preserved database, " +
-          "you cannot use it as current database. To access global temporary views, you should " +
-          "use qualified name with the GLOBAL_TEMP_DATABASE, e.g. SELECT * FROM " +
-          s"${globalTempViewManager.database}.viewName.")
+      throw QueryCompilationErrors.cannotUsePreservedDatabaseAsCurrentDatabaseError(
+        globalTempViewManager.database)
     }
     requireDbExists(dbName)
     synchronized { currentDb = dbName }
@@ -320,7 +316,7 @@ class SessionCatalog(
       validateLocation: Boolean = true): Unit = {
     val isExternal = tableDefinition.tableType == CatalogTableType.EXTERNAL
     if (isExternal && tableDefinition.storage.locationUri.isEmpty) {
-      throw new AnalysisException(s"CREATE EXTERNAL TABLE must be accompanied by LOCATION")
+      throw QueryCompilationErrors.createExternalTableWithoutLocationError
     }
 
     val db = formatDatabaseName(tableDefinition.identifier.database.getOrElse(getCurrentDatabase))
@@ -359,8 +355,8 @@ class SessionCatalog(
       val fs = tableLocation.getFileSystem(hadoopConf)
 
       if (fs.exists(tableLocation) && fs.listStatus(tableLocation).nonEmpty) {
-        throw new AnalysisException(s"Can not create the managed table('${table.identifier}')" +
-          s". The associated location('${tableLocation.toString}') already exists.")
+        throw QueryCompilationErrors.cannotOperateManagedTableWithExistingLocationError(
+          "create", table.identifier, tableLocation)
       }
     }
   }
@@ -428,11 +424,7 @@ class SessionCatalog(
     val nonExistentColumnNames =
       oldDataSchema.map(_.name).filterNot(columnNameResolved(newDataSchema, _))
     if (nonExistentColumnNames.nonEmpty) {
-      throw new AnalysisException(
-        s"""
-           |Some existing schema fields (${nonExistentColumnNames.mkString("[", ",", "]")}) are
-           |not present in the new schema. We don't support dropping columns yet.
-         """.stripMargin)
+      throw QueryCompilationErrors.dropNonExistentColumnsNotSupportedError(nonExistentColumnNames)
     }
 
     externalCatalog.alterTableDataSchema(db, table, newDataSchema)
@@ -508,10 +500,8 @@ class SessionCatalog(
       if (dbs.distinct.size != 1) {
         val tables = names.map(name => formatTableName(name.table))
         val qualifiedTableNames = dbs.zip(tables).map { case (d, t) => QualifiedTableName(d, t)}
-        throw new AnalysisException(
-          s"Only the tables/views belong to the same database can be retrieved. Querying " +
-          s"tables/views are $qualifiedTableNames"
-        )
+        throw QueryCompilationErrors.cannotRetrieveTableOrViewNotInSameDatabaseError(
+          qualifiedTableNames)
       }
       val db = formatDatabaseName(dbs.head)
       requireDbExists(db)
@@ -722,8 +712,7 @@ class SessionCatalog(
     val db = formatDatabaseName(oldName.database.getOrElse(currentDb))
     newName.database.map(formatDatabaseName).foreach { newDb =>
       if (db != newDb) {
-        throw new AnalysisException(
-          s"RENAME TABLE source and destination databases do not match: '$db' != '$newDb'")
+        throw QueryCompilationErrors.renameTableSourceAndDestinationMismatchError(db, newDb)
       }
     }
 
@@ -741,13 +730,12 @@ class SessionCatalog(
         externalCatalog.renameTable(db, oldTableName, newTableName)
       } else {
         if (newName.database.isDefined) {
-          throw new AnalysisException(
-            s"RENAME TEMPORARY VIEW from '$oldName' to '$newName': cannot specify database " +
-              s"name '${newName.database.get}' in the destination table")
+          throw QueryCompilationErrors.cannotRenameTempViewWithDatabaseSpecifiedError(
+            oldName, newName)
         }
         if (tempViews.contains(newTableName)) {
-          throw new AnalysisException(s"RENAME TEMPORARY VIEW from '$oldName' to '$newName': " +
-            "destination table already exists")
+          throw QueryCompilationErrors.cannotRenameTempViewToExistingTableError(
+            oldName, newName)
         }
         val table = tempViews(oldTableName)
         tempViews.remove(oldTableName)
@@ -1192,8 +1180,8 @@ class SessionCatalog(
     specs.foreach { s =>
       if (s.values.exists(_.isEmpty)) {
         val spec = s.map(p => p._1 + "=" + p._2).mkString("[", ", ", "]")
-        throw new AnalysisException(
-          s"Partition spec is invalid. The spec ($spec) contains an empty partition column value")
+        throw QueryCompilationErrors.invalidPartitionSpecError(
+          s"The spec ($spec) contains an empty partition column value")
       }
     }
   }
@@ -1223,10 +1211,10 @@ class SessionCatalog(
     val defined = table.partitionColumnNames
     specs.foreach { s =>
       if (!s.keys.forall(defined.contains)) {
-        throw new AnalysisException(
-          s"Partition spec is invalid. The spec (${s.keys.mkString(", ")}) must be contained " +
-            s"within the partition spec (${table.partitionColumnNames.mkString(", ")}) defined " +
-            s"in table '${table.identifier}'")
+        throw QueryCompilationErrors.invalidPartitionSpecError(
+          s"The spec (${s.keys.mkString(", ")}) must be contained " +
+          s"within the partition spec (${table.partitionColumnNames.mkString(", ")}) defined " +
+          s"in table '${table.identifier}'")
       }
     }
   }
@@ -1382,8 +1370,8 @@ class SessionCatalog(
 
       // Check input argument size
       if (e.inputTypes.size != input.size) {
-        throw new AnalysisException(s"Invalid number of arguments for function $name. " +
-          s"Expected: ${e.inputTypes.size}; Found: ${input.size}")
+        throw QueryCompilationErrors.invalidFunctionArgumentsError(
+          name, e.inputTypes.size.toString, input.size)
       }
       e
     } else {
@@ -1409,15 +1397,14 @@ class SessionCatalog(
       functionBuilder: Option[FunctionBuilder] = None): Unit = {
     val func = funcDefinition.identifier
     if (functionRegistry.functionExists(func) && !overrideIfExists) {
-      throw new AnalysisException(s"Function $func already exists")
+      throw QueryCompilationErrors.functionAlreadyExistsError(func)
     }
     val info = new ExpressionInfo(funcDefinition.className, func.database.orNull, func.funcName)
     val builder =
       functionBuilder.getOrElse {
         val className = funcDefinition.className
         if (!Utils.classIsLoadable(className)) {
-          throw new AnalysisException(s"Can not load class '$className' when registering " +
-            s"the function '$func', please make sure it is on the classpath")
+          throw QueryCompilationErrors.cannotLoadClassWhenRegisteringFunctionError(className, func)
         }
         makeFunctionBuilder(func.unquotedString, className)
       }
@@ -1522,7 +1509,6 @@ class SessionCatalog(
   def lookupFunction(
       name: FunctionIdentifier,
       children: Seq[Expression]): Expression = synchronized {
-    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
     // Note: the implementation of this function is a little bit convoluted.
     // We probably shouldn't use a single FunctionRegistry to register all three kinds of functions
     // (built-in, temp, and external).
@@ -1545,9 +1531,7 @@ class SessionCatalog(
       case Seq() => getCurrentDatabase
       case Seq(_, db) => db
       case Seq(catalog, namespace @ _*) =>
-        throw new AnalysisException(
-          s"V2 catalog does not support functions yet. " +
-            s"catalog: ${catalog}, namespace: '${namespace.quoted}'")
+        throw QueryCompilationErrors.v2CatalogNotSupportFunctionError(catalog, namespace)
     }
 
     // If the name itself is not qualified, add the current database to it.
@@ -1685,8 +1669,8 @@ class SessionCatalog(
       val newTableLocation = new Path(new Path(databaseLocation), formatTableName(newName.table))
       val fs = newTableLocation.getFileSystem(hadoopConf)
       if (fs.exists(newTableLocation)) {
-        throw new AnalysisException(s"Can not rename the managed table('$oldName')" +
-          s". The associated location('$newTableLocation') already exists.")
+        throw QueryCompilationErrors.cannotOperateManagedTableWithExistingLocationError(
+          "rename", oldName, newTableLocation)
       }
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala
index 67bf2d06c95dd..7ebe3d8c5f880 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.catalog
 
 import java.util.Locale
 
-import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.errors.QueryCompilationErrors
 
 /** A trait that represents the type of a resourced needed by a function. */
 abstract class FunctionResourceType(val resourceType: String)
@@ -40,7 +40,7 @@ object FunctionResourceType {
       case "file" => FileResource
       case "archive" => ArchiveResource
       case other =>
-        throw new AnalysisException(s"Resource Type '$resourceType' is not supported.")
+        throw QueryCompilationErrors.resourceTypeNotSupportedError(resourceType)
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index d25b1fe46d569..eb29b37a3c5d7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -29,7 +29,6 @@ import org.json4s.JsonAST.{JArray, JString}
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow, SQLConfHelper, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, Cast, ExprId, Literal}
@@ -37,6 +36,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -145,7 +145,7 @@ case class CatalogTablePartition(
   /** Return the partition location, assuming it is specified. */
   def location: URI = storage.locationUri.getOrElse {
     val specString = spec.map { case (k, v) => s"$k=$v" }.mkString(", ")
-    throw new AnalysisException(s"Partition [$specString] did not specify locationUri")
+    throw QueryCompilationErrors.partitionNotSpecifyLocationUriError(specString)
   }
 
   /**
@@ -182,9 +182,8 @@ case class BucketSpec(
     sortColumnNames: Seq[String]) extends SQLConfHelper {
 
   if (numBuckets <= 0 || numBuckets > conf.bucketingMaxBuckets) {
-    throw new AnalysisException(
-      s"Number of buckets should be greater than 0 but less than or equal to " +
-        s"bucketing.maxBuckets (`${conf.bucketingMaxBuckets}`). Got `$numBuckets`")
+    throw QueryCompilationErrors.invalidBucketNumberError(
+      conf.bucketingMaxBuckets, numBuckets)
   }
 
   override def toString: String = {
@@ -274,12 +273,12 @@ case class CatalogTable(
 
   /** Return the database this table was specified to belong to, assuming it exists. */
   def database: String = identifier.database.getOrElse {
-    throw new AnalysisException(s"table $identifier did not specify database")
+    throw QueryCompilationErrors.tableNotSpecifyDatabaseError(identifier)
   }
 
   /** Return the table location, assuming it is specified. */
   def location: URI = storage.locationUri.getOrElse {
-    throw new AnalysisException(s"table $identifier did not specify locationUri")
+    throw QueryCompilationErrors.tableNotSpecifyLocationUriError(identifier)
   }
 
   /** Return the fully qualified name of this table, assuming the database was specified. */
@@ -295,8 +294,7 @@ case class CatalogTable(
       (0 until numParts).map { index =>
         properties.getOrElse(
           s"$VIEW_CATALOG_AND_NAMESPACE_PART_PREFIX$index",
-          throw new AnalysisException("Corrupted table name context in catalog: " +
-            s"$numParts parts expected, but part $index is missing.")
+          throw QueryCompilationErrors.corruptedTableNameContextInCatalogError(numParts, index)
         )
       }
     } else if (properties.contains(VIEW_DEFAULT_DATABASE)) {
@@ -318,8 +316,7 @@ case class CatalogTable(
         yield (key.substring(CatalogTable.VIEW_SQL_CONFIG_PREFIX.length), value)
     } catch {
       case e: Exception =>
-        throw new AnalysisException(
-          "Corrupted view SQL configs in catalog", cause = Some(e))
+        throw QueryCompilationErrors.corruptedViewSQLConfigsInCatalogError(e)
     }
   }
 
@@ -334,8 +331,7 @@ case class CatalogTable(
       index <- 0 until numCols.toInt
     } yield properties.getOrElse(
       s"$VIEW_QUERY_OUTPUT_COLUMN_NAME_PREFIX$index",
-      throw new AnalysisException("Corrupted view query output column names in catalog: " +
-        s"$numCols parts expected, but part $index is missing.")
+      throw QueryCompilationErrors.corruptedViewQueryOutputColumnsInCatalogError(numCols, index)
     )
   }
 
@@ -352,8 +348,7 @@ case class CatalogTable(
       }.getOrElse(Seq.empty)
     } catch {
       case e: Exception =>
-        throw new AnalysisException(
-          "corrupted view referred temp view names in catalog", cause = Some(e))
+        throw QueryCompilationErrors.corruptedViewReferredTempViewInCatalogError(e)
     }
   }
 
@@ -368,8 +363,7 @@ case class CatalogTable(
       }.getOrElse(Seq.empty)
     } catch {
       case e: Exception =>
-        throw new AnalysisException(
-          "corrupted view referred temp functions names in catalog", cause = Some(e))
+        throw QueryCompilationErrors.corruptedViewReferredTempFunctionsInCatalogError(e)
     }
   }
 
@@ -497,14 +491,13 @@ object CatalogTable {
         None
       } else {
         val numParts = props.get(s"$key.numParts")
-        val errorMessage = s"Cannot read table property '$key' as it's corrupted."
         if (numParts.isEmpty) {
-          throw new AnalysisException(errorMessage)
+          throw QueryCompilationErrors.cannotReadCorruptedTablePropertyError(key)
         } else {
           val parts = (0 until numParts.get.toInt).map { index =>
             props.getOrElse(s"$key.part.$index", {
-              throw new AnalysisException(
-                s"$errorMessage Missing part $index, ${numParts.get} parts are expected.")
+              throw QueryCompilationErrors.cannotReadCorruptedTablePropertyError(
+                key, s"Missing part $index, $numParts parts are expected.")
             })
           }
           Some(parts.mkString)
@@ -657,8 +650,8 @@ object CatalogColumnStat extends Logging {
       // This version of Spark does not use min/max for binary/string types so we ignore it.
       case BinaryType | StringType => null
       case _ =>
-        throw new AnalysisException("Column statistics deserialization is not supported for " +
-          s"column $name of data type: $dataType.")
+        throw QueryCompilationErrors.columnStatisticsDeserializationNotSupportedError(
+          name, dataType)
     }
   }
 
@@ -674,8 +667,8 @@ object CatalogColumnStat extends Logging {
       case _: DecimalType => v.asInstanceOf[Decimal].toJavaBigDecimal
       // This version of Spark does not use min/max for binary/string types so we ignore it.
       case _ =>
-        throw new AnalysisException("Column statistics serialization is not supported for " +
-          s"column $colName of data type: $dataType.")
+        throw QueryCompilationErrors.columnStatisticsSerializationNotSupportedError(
+          colName, dataType)
     }
     externalValue.toString
   }
@@ -805,7 +798,7 @@ case class HiveTableRelation(
     tableMeta.stats.map(_.toPlanStats(output, conf.cboEnabled || conf.planStatsEnabled))
       .orElse(tableStats)
       .getOrElse {
-      throw new IllegalStateException("table stats must be specified.")
+      throw QueryExecutionErrors.tableStatsNotSpecifiedError
     }
   }
 

From 171db85aa2cdacf39caeb26162569275076fd52f Mon Sep 17 00:00:00 2001
From: Holden Karau <hkarau@apple.com>
Date: Tue, 5 Jan 2021 13:48:52 -0800
Subject: [PATCH 0969/1009] [SPARK-33874][K8S][FOLLOWUP] Handle long lived
 sidecars - clean up logging

### What changes were proposed in this pull request?

Switch log level from warn to debug when the spark container is not present in the pod's container statuses.

### Why are the changes needed?

There are many non-critical situations where the Spark container may not be present, and the warning log level is too high.

### Does this PR introduce _any_ user-facing change?

Log message change.

### How was this patch tested?

N/A

Closes #31047 from holdenk/SPARK-33874-follow-up.

Authored-by: Holden Karau <hkarau@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala   | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala
index 71355c7af10fa..37aaca7e8ceeb 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala
@@ -93,9 +93,10 @@ object ExecutorPodsSnapshot extends Logging {
                   case _ =>
                     PodRunning(pod)
                 }
-              // If we can't find the Spark container status, fall back to the pod status
+              // If we can't find the Spark container status, fall back to the pod status. This is
+              // expected to occur during pod startup and other situations.
               case _ =>
-                logWarning(s"Unable to find container ${sparkContainerName} in pod ${pod} " +
+                logDebug(s"Unable to find container ${sparkContainerName} in pod ${pod} " +
                   "defaulting to entire pod status (running).")
                 PodRunning(pod)
             }

From e279ed304475a6d5a9fbf739fe9ed32ef58171cb Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Wed, 6 Jan 2021 08:48:24 +0900
Subject: [PATCH 0970/1009] [SPARK-34012][SQL] Keep behavior consistent when
 conf `spark.sql.legacy.parser.havingWithoutGroupByAsWhere` is true with
 migration guide

### What changes were proposed in this pull request?
In https://github.com/apache/spark/pull/22696 we support HAVING without GROUP BY means global aggregate
But since we treat having as Filter before, in this way will cause a lot of analyze error, after https://github.com/apache/spark/pull/28294 we use `UnresolvedHaving` to instead `Filter` to solve such problem, but break origin logical about treat `SELECT 1 FROM range(10) HAVING true` as `SELECT 1 FROM range(10) WHERE true`   .
This PR fix this issue and add UT.

### Why are the changes needed?
Keep consistent behavior of migration guide.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
added UT

Closes #31039 from AngersZhuuuu/SPARK-25780-Follow-up.

Authored-by: angerszhu <angers.zhu@gmail.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../sql/catalyst/parser/AstBuilder.scala      |  6 +-
 .../resources/sql-tests/inputs/group-by.sql   | 10 +++
 .../sql-tests/results/group-by.sql.out        | 63 ++++++++++++++++++-
 3 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 3ea86c6ea2abf..395a9563cdc0a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -714,7 +714,11 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
     val withProject = if (aggregationClause == null && havingClause != null) {
       if (conf.getConf(SQLConf.LEGACY_HAVING_WITHOUT_GROUP_BY_AS_WHERE)) {
         // If the legacy conf is set, treat HAVING without GROUP BY as WHERE.
-        withHavingClause(havingClause, createProject())
+        val predicate = expression(havingClause.booleanExpression) match {
+          case p: Predicate => p
+          case e => Cast(e, BooleanType)
+        }
+        Filter(predicate, createProject())
       } else {
         // According to SQL standard, HAVING without GROUP BY means global aggregate.
         withHavingClause(havingClause, Aggregate(Nil, namedExpressions, withFilter))
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index 81e2204358bc9..6ee1014739759 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -86,6 +86,16 @@ SELECT 1 FROM range(10) HAVING MAX(id) > 0;
 
 SELECT id FROM range(10) HAVING id > 0;
 
+SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=true;
+
+SELECT 1 FROM range(10) HAVING true;
+
+SELECT 1 FROM range(10) HAVING MAX(id) > 0;
+
+SELECT id FROM range(10) HAVING id > 0;
+
+SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=false;
+
 -- Test data
 CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES
   (1, true), (1, false),
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index 75bda87b37642..cc07cd64f3a89 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 57
+-- Number of queries: 62
 
 
 -- !query
@@ -277,6 +277,67 @@ org.apache.spark.sql.AnalysisException
 grouping expressions sequence is empty, and '`id`' is not an aggregate function. Wrap '()' in windowing function(s) or wrap '`id`' in first() (or first_value) if you don't care which value you get.
 
 
+-- !query
+SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=true
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.parser.havingWithoutGroupByAsWhere	true
+
+
+-- !query
+SELECT 1 FROM range(10) HAVING true
+-- !query schema
+struct<1:int>
+-- !query output
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+
+
+-- !query
+SELECT 1 FROM range(10) HAVING MAX(id) > 0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+
+Aggregate/Window/Generate expressions are not valid in where clause of the query.
+Expression in where clause: [(max(`id`) > CAST(0 AS BIGINT))]
+Invalid expressions: [max(`id`)]
+
+
+-- !query
+SELECT id FROM range(10) HAVING id > 0
+-- !query schema
+struct<id:bigint>
+-- !query output
+1
+2
+3
+4
+5
+6
+7
+8
+9
+
+
+-- !query
+SET spark.sql.legacy.parser.havingWithoutGroupByAsWhere=false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.parser.havingWithoutGroupByAsWhere	false
+
+
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES
   (1, true), (1, false),

From b77d11dfd942ee2164dde2f5c25c6aaed65c444c Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Wed, 6 Jan 2021 11:19:44 +0900
Subject: [PATCH 0971/1009] [SPARK-34011][SQL] Refresh cache in `ALTER TABLE ..
 RENAME TO PARTITION`

### What changes were proposed in this pull request?
1. Invoke `refreshTable()` from `AlterTableRenamePartitionCommand.run()` after partitions renaming. In particular, this re-creates the cache associated with the modified table.
2. Refresh the cache associated with tables from v2 table catalogs in the `ALTER TABLE .. RENAME TO PARTITION` command.

### Why are the changes needed?
This fixes the issues portrayed by the example:
```sql
spark-sql> CREATE TABLE tbl1 (col0 int, part0 int) USING parquet PARTITIONED BY (part0);
spark-sql> INSERT INTO tbl1 PARTITION (part0=0) SELECT 0;
spark-sql> INSERT INTO tbl1 PARTITION (part0=1) SELECT 1;
spark-sql> CACHE TABLE tbl1;
spark-sql> SELECT * FROM tbl1;
0	0
1	1
spark-sql> ALTER TABLE tbl1 PARTITION (part0=0) RENAME TO PARTITION (part=2);
spark-sql> SELECT * FROM tbl1;
0	0
1	1
```
The last query must not return `0	2` since `0  0` was renamed by previous command.

### Does this PR introduce _any_ user-facing change?
Yes. After the changes for the example above:
```sql
...
spark-sql> ALTER TABLE tbl1 PARTITION (part=0) RENAME TO PARTITION (part=2);
spark-sql> SELECT * FROM tbl1;
0	2
1	1
```

### How was this patch tested?
By running the affected test suite:
```
$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *AlterTableRenamePartitionSuite"
```

Closes #31044 from MaxGekk/rename-partition-refresh-cache.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../apache/spark/sql/execution/command/ddl.scala  |  1 +
 .../v2/AlterTableRenamePartitionExec.scala        |  7 +++++--
 .../datasources/v2/DataSourceV2Strategy.scala     |  5 +++--
 .../AlterTableRenamePartitionSuiteBase.scala      | 15 +++++++++++++++
 4 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 5e3a67927e75a..8195d02e04b8b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -536,6 +536,7 @@ case class AlterTableRenamePartitionCommand(
 
     catalog.renamePartitions(
       tableName, Seq(normalizedOldPartition), Seq(normalizedNewPartition))
+    sparkSession.catalog.refreshTable(table.identifier.quotedString)
     Seq.empty[Row]
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableRenamePartitionExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableRenamePartitionExec.scala
index 38b83e3ad74e7..0632bd75102fa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableRenamePartitionExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableRenamePartitionExec.scala
@@ -28,12 +28,15 @@ import org.apache.spark.sql.connector.catalog.SupportsPartitionManagement
 case class AlterTableRenamePartitionExec(
     table: SupportsPartitionManagement,
     from: ResolvedPartitionSpec,
-    to: ResolvedPartitionSpec) extends V2CommandExec {
+    to: ResolvedPartitionSpec,
+    refreshCache: () => Unit) extends V2CommandExec {
 
   override def output: Seq[Attribute] = Seq.empty
 
   override protected def run(): Seq[InternalRow] = {
-    table.renamePartition(from.ident, to.ident)
+    if (table.renamePartition(from.ident, to.ident)) {
+      refreshCache()
+    }
     Seq.empty
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 1537ebf8f305c..fa9519bf3233c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -368,11 +368,12 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
         invalidateCache(r, recacheTable = true)) :: Nil
 
     case AlterTableRenamePartition(
-        ResolvedTable(_, _, table: SupportsPartitionManagement, _), from, to) =>
+        r @ ResolvedTable(_, _, table: SupportsPartitionManagement, _), from, to) =>
       AlterTableRenamePartitionExec(
         table,
         Seq(from).asResolvedPartitionSpecs.head,
-        Seq(to).asResolvedPartitionSpecs.head) :: Nil
+        Seq(to).asResolvedPartitionSpecs.head,
+        invalidateCache(r, recacheTable = true)) :: Nil
 
     case AlterTableRecoverPartitions(_: ResolvedTable) =>
       throw new AnalysisException(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala
index 58055262d3f11..7f66e282499d4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala
@@ -163,4 +163,19 @@ trait AlterTableRenamePartitionSuiteBase extends QueryTest with DDLCommandTestUt
       }
     }
   }
+
+  test("SPARK-34011: refresh cache after partition renaming") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (id int, part int) $defaultUsing PARTITIONED BY (part)")
+      sql(s"INSERT INTO $t PARTITION (part=0) SELECT 0")
+      sql(s"INSERT INTO $t PARTITION (part=1) SELECT 1")
+      assert(!spark.catalog.isCached(t))
+      sql(s"CACHE TABLE $t")
+      assert(spark.catalog.isCached(t))
+      QueryTest.checkAnswer(sql(s"SELECT * FROM $t"), Seq(Row(0, 0), Row(1, 1)))
+      sql(s"ALTER TABLE $t PARTITION (part=0) RENAME TO PARTITION (part=2)")
+      assert(spark.catalog.isCached(t))
+      QueryTest.checkAnswer(sql(s"SELECT * FROM $t"), Seq(Row(0, 2), Row(1, 1)))
+    }
+  }
 }

From 3d8ee492d6cd0c086988f2970bc6ea1d70a98368 Mon Sep 17 00:00:00 2001
From: "Tom.Howland" <Tom.Howland@target.com>
Date: Wed, 6 Jan 2021 11:40:02 +0900
Subject: [PATCH 0972/1009] [SPARK-34015][R] Fixing input timing in gapply

### What changes were proposed in this pull request?

When sparkR is run at log level INFO, a summary of how the worker spent its time processing the partition is printed. There is a logic error where it is over-reporting the time inputting rows.

In detail: the variable inputElap in a wider context is used to mark the end of reading rows, but in the part changed here it was used as a local variable for measuring the beginning of compute time in a loop over the groups in the partition. Thus, the error is not observable if there is only one group per partition, which is what you get in unit tests.

For our application, here's what a log entry looks like before these changes were applied:

`20/10/09 04:08:58 INFO RRunner: Times: boot = 0.013 s, init = 0.005 s, broadcast = 0.000 s, read-input = 529.471 s, compute = 492.037 s, write-output = 0.020 s, total = 1021.546 s`

this indicates that we're spending more time reading rows than operating on the rows.

After these changes, it looks like this:

`20/12/15 06:43:29 INFO RRunner: Times: boot = 0.013 s, init = 0.010 s, broadcast = 0.000 s, read-input = 120.275 s, compute = 1680.161 s, write-output = 0.045 s, total = 1812.553 s
`
### Why are the changes needed?

Metrics shouldn't mislead?

### Does this PR introduce _any_ user-facing change?

Aside from no longer misleading, no

### How was this patch tested?

unit tests passed. Field test results seem plausible

Closes #31021 from WamBamBoozle/input_timing.

Authored-by: Tom.Howland <Tom.Howland@target.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 R/pkg/inst/worker/worker.R | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/R/pkg/inst/worker/worker.R b/R/pkg/inst/worker/worker.R
index dd271f91d0084..7fc4680bad10e 100644
--- a/R/pkg/inst/worker/worker.R
+++ b/R/pkg/inst/worker/worker.R
@@ -196,7 +196,7 @@ if (isEmpty != 0) {
         outputs <- list()
         for (i in seq_len(length(data))) {
           # Timing reading input data for execution
-          inputElap <- elapsedSecs()
+          computeStart <- elapsedSecs()
           output <- compute(mode, partition, serializer, deserializer, keys[[i]],
                       colNames, computeFunc, data[[i]])
           computeElap <- elapsedSecs()
@@ -204,17 +204,18 @@ if (isEmpty != 0) {
             outputs[[length(outputs) + 1L]] <- output
           } else {
             outputResult(serializer, output, outputCon)
+            outputComputeElapsDiff <- outputComputeElapsDiff + (elapsedSecs() - computeElap)
           }
-          outputElap <- elapsedSecs()
-          computeInputElapsDiff <-  computeInputElapsDiff + (computeElap - inputElap)
-          outputComputeElapsDiff <- outputComputeElapsDiff + (outputElap - computeElap)
+          computeInputElapsDiff <- computeInputElapsDiff + (computeElap - computeStart)
         }
 
         if (serializer == "arrow") {
           # See https://stat.ethz.ch/pipermail/r-help/2010-September/252046.html
           # rbind.fill might be an alternative to make it faster if plyr is installed.
+          outputStart <- elapsedSecs()
           combined <- do.call("rbind", outputs)
           SparkR:::writeSerializeInArrow(outputCon, combined)
+          outputComputeElapsDiff <- elapsedSecs() - outputStart
         }
       }
     } else {

From 29510821a0e3b1e09a7710ed02a0fa1caab506af Mon Sep 17 00:00:00 2001
From: Baohe Zhang <baohe.zhang@verizonmedia.com>
Date: Tue, 5 Jan 2021 19:16:40 -0800
Subject: [PATCH 0973/1009] [SPARK-33029][CORE][WEBUI] Fix the UI executor page
 incorrectly marking the driver as excluded

### What changes were proposed in this pull request?
Filter out the driver entity when updating the exclusion status of live executors(including the driver), so the UI won't be marked as excluded in the UI even if the node that hosts the driver has been marked as excluded.

### Why are the changes needed?
Before this change, if we run spark with the standalone mode and with spark.blacklist.enabled=true. The driver will be marked as excluded when the host that hosts that driver has been marked as excluded. While it's incorrect because the exclude list feature will exclude executors only and the driver is still active.
![image](https://user-images.githubusercontent.com/26694233/103238740-35c05180-4911-11eb-99a2-c87c059ba0cf.png)
After the fix, the driver won't be marked as excluded.
![image](https://user-images.githubusercontent.com/26694233/103238806-6f915800-4911-11eb-80d5-3c99266cfd0a.png)

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Manual test. Reopen the UI and see the driver is no longer marked as excluded.

Closes #30954 from baohe-zhang/SPARK-33029.

Authored-by: Baohe Zhang <baohe.zhang@verizonmedia.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../scala/org/apache/spark/status/AppStatusListener.scala | 8 +++++---
 .../executor_memory_usage_expectation.json                | 4 ++--
 .../executor_node_excludeOnFailure_expectation.json       | 4 ++--
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
index bf19897e51fb3..6cb013b1a7c16 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
@@ -366,10 +366,12 @@ private[spark] class AppStatusListener(
 
     // Implicitly exclude every available executor for the stage associated with this node
     Option(liveStages.get((stageId, stageAttemptId))).foreach { stage =>
-      val executorIds = liveExecutors.values.filter(_.host == hostId).map(_.executorId).toSeq
+      val executorIds = liveExecutors.values.filter(exec => exec.host == hostId
+        && exec.executorId != SparkContext.DRIVER_IDENTIFIER).map(_.executorId).toSeq
       setStageExcludedStatus(stage, now, executorIds: _*)
     }
-    liveExecutors.values.filter(_.hostname == hostId).foreach { exec =>
+    liveExecutors.values.filter(exec => exec.hostname == hostId
+      && exec.executorId != SparkContext.DRIVER_IDENTIFIER).foreach { exec =>
       addExcludedStageTo(exec, stageId, now)
     }
   }
@@ -416,7 +418,7 @@ private[spark] class AppStatusListener(
 
     // Implicitly (un)exclude every executor associated with the node.
     liveExecutors.values.foreach { exec =>
-      if (exec.hostname == host) {
+      if (exec.hostname == host && exec.executorId != SparkContext.DRIVER_IDENTIFIER) {
         updateExecExclusionStatus(exec, excluded, now)
       }
     }
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json
index 9adda275b5609..51449340efe9f 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json
@@ -16,7 +16,7 @@
   "totalInputBytes" : 0,
   "totalShuffleRead" : 0,
   "totalShuffleWrite" : 0,
-  "isBlacklisted" : true,
+  "isBlacklisted" : false,
   "maxMemory" : 908381388,
   "addTime" : "2016-11-16T22:33:31.477GMT",
   "executorLogs" : { },
@@ -30,7 +30,7 @@
   "attributes" : { },
   "resources" : { },
   "resourceProfileId" : 0,
-  "isExcluded" : true,
+  "isExcluded" : false,
   "excludedInStages" : [ ]
 }, {
   "id" : "3",
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_expectation.json
index 65bd309c1025e..47a01b2596de9 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_expectation.json
@@ -16,7 +16,7 @@
   "totalInputBytes" : 0,
   "totalShuffleRead" : 0,
   "totalShuffleWrite" : 0,
-  "isBlacklisted" : true,
+  "isBlacklisted" : false,
   "maxMemory" : 908381388,
   "addTime" : "2016-11-16T22:33:31.477GMT",
   "executorLogs" : { },
@@ -30,7 +30,7 @@
   "attributes" : { },
   "resources" : { },
   "resourceProfileId" : 0,
-  "isExcluded" : true,
+  "isExcluded" : false,
   "excludedInStages" : [ ]
 }, {
   "id" : "3",

From 2ab77d634f2e87b080786f4f39cb17e0994bc550 Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Tue, 5 Jan 2021 20:45:19 -0800
Subject: [PATCH 0974/1009] [SPARK-34004][SQL] Change
 FrameLessOffsetWindowFunction as sealed abstract class

### What changes were proposed in this pull request?
Change `FrameLessOffsetWindowFunction` as sealed abstract class so that simplify pattern match.

### Why are the changes needed?
Simplify pattern match

### Does this PR introduce _any_ user-facing change?
Yes

### How was this patch tested?
Jenkins test

Closes #31026 from beliefer/SPARK-30789-followup.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: beliefer <beliefer@163.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala      | 3 ---
 .../spark/sql/catalyst/expressions/windowExpressions.scala     | 2 +-
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index e41d3de642d51..883ff46148ca6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -2150,9 +2150,6 @@ class Analyzer(override val catalogManager: CatalogManager)
                         lead.copy(ignoreNulls = ignoreNulls)
                       case lag: Lag =>
                         lag.copy(ignoreNulls = ignoreNulls)
-                      case _ =>
-                        throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
-                          owf.prettyName, "IGNORE NULLS")
                     }
                   } else {
                     owf
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index b167499620c0f..1934a9b190fc7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -369,7 +369,7 @@ trait OffsetWindowFunction extends WindowFunction {
  * within the partition. For instance: a FrameLessOffsetWindowFunction for value x with offset -2,
  * will get the value of x 2 rows back from the current row in the partition.
  */
-abstract class FrameLessOffsetWindowFunction
+sealed abstract class FrameLessOffsetWindowFunction
   extends OffsetWindowFunction with Unevaluable with ImplicitCastInputTypes {
 
   override def children: Seq[Expression] = Seq(input, offset, default)

From b1c4fc7fc71530d2d257500484f959282f5b6d44 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Tue, 5 Jan 2021 21:50:16 -0800
Subject: [PATCH 0975/1009] [SPARK-34008][BUILD] Upgrade derby to 10.14.2.0

### What changes were proposed in this pull request?

This PR upgrades `derby` to `10.14.2.0`.

You can check the major changes from the following URLs.

* 10.13.1.1 http://svn.apache.org/repos/asf/db/derby/code/tags/10.13.1.1/RELEASE-NOTES.html
* 10.14.1.0 http://svn.apache.org/repos/asf/db/derby/code/tags/10.14.1.0/RELEASE-NOTES.html
* 10.14.2.0 http://svn.apache.org/repos/asf/db/derby/code/tags/10.14.2.0/RELEASE-NOTES.html

### Why are the changes needed?

It seems to be the final release which supports `JDK8` as the minimum required version.
After `10.15.1.3`, the minimum required version is `JDK9`.
https://db.apache.org/derby/

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #31032 from sarutak/upgrade-derby.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 2 +-
 dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 2 +-
 pom.xml                                 | 3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index 9c516203dd3fa..8d8ef2e972098 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -58,7 +58,7 @@ curator-recipes/2.7.1//curator-recipes-2.7.1.jar
 datanucleus-api-jdo/4.2.4//datanucleus-api-jdo-4.2.4.jar
 datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar
 datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar
-derby/10.12.1.1//derby-10.12.1.1.jar
+derby/10.14.2.0//derby-10.14.2.0.jar
 dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar
 flatbuffers-java/1.9.0//flatbuffers-java-1.9.0.jar
 generex/1.0.2//generex-1.0.2.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index 1d80fadb5762a..bf56fc18c0446 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -55,7 +55,7 @@ curator-recipes/2.13.0//curator-recipes-2.13.0.jar
 datanucleus-api-jdo/4.2.4//datanucleus-api-jdo-4.2.4.jar
 datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar
 datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar
-derby/10.12.1.1//derby-10.12.1.1.jar
+derby/10.14.2.0//derby-10.14.2.0.jar
 dnsjava/2.1.7//dnsjava-2.1.7.jar
 dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar
 ehcache/3.3.1//ehcache-3.3.1.jar
diff --git a/pom.xml b/pom.xml
index 07c18f78e0735..f921e35a76b41 100644
--- a/pom.xml
+++ b/pom.xml
@@ -134,7 +134,8 @@
     <hive.version.short>2.3</hive.version.short>
     <!-- note that this should be compatible with Kafka brokers version 0.10 and up -->
     <kafka.version>2.6.0</kafka.version>
-    <derby.version>10.12.1.1</derby.version>
+    <!-- After 10.15.1.3, the minimum required version is JDK9 -->
+    <derby.version>10.14.2.0</derby.version>
     <parquet.version>1.10.1</parquet.version>
     <orc.version>1.6.6</orc.version>
     <jetty.version>9.4.34.v20201102</jetty.version>

From fa9309001a47a2b87f7a735f964537886ed9bd4c Mon Sep 17 00:00:00 2001
From: "Jungtaek Lim (HeartSaVioR)" <kabhwan.opensource@gmail.com>
Date: Tue, 5 Jan 2021 21:59:49 -0800
Subject: [PATCH 0976/1009] [SPARK-33635][SS] Adjust the order of check in
 KafkaTokenUtil.needTokenUpdate to remedy perf regression

### What changes were proposed in this pull request?

This PR proposes to adjust the order of check in KafkaTokenUtil.needTokenUpdate, so that short-circuit applies on the non-delegation token cases (insecure + secured without delegation token) and remedies the performance regression heavily.

### Why are the changes needed?

There's a serious performance regression between Spark 2.4 vs Spark 3.0 on read path against Kafka data source.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manually ran a reproducer (https://github.com/codegorillauk/spark-kafka-read with modification to just count instead of writing to Kafka topic) with measuring the time.

> the branch applying the change with adding measurement

https://github.com/HeartSaVioR/spark/commits/debug-SPARK-33635-v3.0.1

> the branch only adding measurement

https://github.com/HeartSaVioR/spark/commits/debug-original-ver-SPARK-33635-v3.0.1

> the result (before the fix)

count: 10280000
Took 41.634007047 secs

21/01/06 13:16:07 INFO KafkaDataConsumer: debug ver. 17-original
21/01/06 13:16:07 INFO KafkaDataConsumer: Total time taken to retrieve: 82118 ms

> the result (after the fix)

count: 10280000
Took 7.964058475 secs

21/01/06 13:08:22 INFO KafkaDataConsumer: debug ver. 17
21/01/06 13:08:22 INFO KafkaDataConsumer: Total time taken to retrieve: 987 ms

Closes #31056 from HeartSaVioR/SPARK-33635.

Authored-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
index bc790418decd3..f3f6b4de6f79c 100644
--- a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
+++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
@@ -273,8 +273,8 @@ private[spark] object KafkaTokenUtil extends Logging {
       sparkConf: SparkConf,
       params: ju.Map[String, Object],
       clusterConfig: Option[KafkaTokenClusterConf]): Boolean = {
-    if (HadoopDelegationTokenManager.isServiceEnabled(sparkConf, "kafka") &&
-        clusterConfig.isDefined && params.containsKey(SaslConfigs.SASL_JAAS_CONFIG)) {
+    if (clusterConfig.isDefined && params.containsKey(SaslConfigs.SASL_JAAS_CONFIG) &&
+        HadoopDelegationTokenManager.isServiceEnabled(sparkConf, "kafka")) {
       logDebug("Delegation token used by connector, checking if uses the latest token.")
       val connectorJaasParams = params.get(SaslConfigs.SASL_JAAS_CONFIG).asInstanceOf[String]
       getTokenJaasParams(clusterConfig.get) != connectorJaasParams

From c0d0dbabdb264180d5a88e2656e4a2fe353f21f1 Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Tue, 5 Jan 2021 22:33:15 -0800
Subject: [PATCH 0977/1009] [SPARK-33934][SQL][FOLLOW-UP] Use SubProcessor's
 exit code as assert condition to fix flaky test

### What changes were proposed in this pull request?
Follow comment and fix. flaky test https://github.com/apache/spark/pull/30973#issuecomment-754852130.
This flaky test is similar as https://github.com/apache/spark/pull/30896

Some task's failed with root cause but in driver may return error without root cause , change. UT to check with status exit code since different root cause's exit code is not same.

### Why are the changes needed?
Fix flaky test

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existed UT

Closes #31046 from AngersZhuuuu/SPARK-33934-FOLLOW-UP.

Lead-authored-by: angerszhu <angers.zhu@gmail.com>
Co-authored-by: AngersZhuuuu <angers.zhu@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/execution/BaseScriptTransformationExec.scala   | 2 +-
 sql/core/src/test/resources/test_script.py                   | 2 +-
 .../spark/sql/execution/BaseScriptTransformationSuite.scala  | 5 ++++-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
index b66f94ae1107a..669b90f4d06dd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution
 
-import java.io._
+import java.io.{BufferedReader, File, InputStream, InputStreamReader, OutputStream}
 import java.nio.charset.StandardCharsets
 import java.util.concurrent.TimeUnit
 
diff --git a/sql/core/src/test/resources/test_script.py b/sql/core/src/test/resources/test_script.py
index 75b4f106d3a1a..4fcd483f44d43 100644
--- a/sql/core/src/test/resources/test_script.py
+++ b/sql/core/src/test/resources/test_script.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#!/usr/bin/env python3
 
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
index a25e4b8f8ea07..cef870b249985 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
@@ -521,7 +521,10 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
             'd.cast("string"),
             'e.cast("string")).collect())
       }.getMessage
-      assert(e1.contains("Permission denied"))
+      // Check with status exit code since in GA test, it may lose detail failed root cause.
+      // Different root cause's exitcode is not same.
+      // In this test, root cause is `Permission denied`
+      assert(e1.contains("Subprocess exited with status 126"))
 
       // test `/path/to/script.py' with script executable
       scriptFilePath.setExecutable(true)

From 45a4ff8e5472ed724b1bba40ce4ee5d314bf72c2 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Tue, 5 Jan 2021 23:11:23 -0800
Subject: [PATCH 0978/1009] [SPARK-33948][SQL] Fix CodeGen error of
 MapObjects.doGenCode method in Scala 2.13

### What changes were proposed in this pull request?
`MapObjects.doGenCode` method will generate wrong code when `inputDataType` is `ArrayBuffer`.

For example `encode/decode for Tuple2: (ArrayBuffer[(String, String)],ArrayBuffer((a,b))) (codegen path)` in `ExpressionEncoderSuite`, the error generated code part as follow:

```
/* 126 */   private scala.collection.mutable.ArrayBuffer MapObjects_0(InternalRow i) {
/* 127 */     boolean isNull_4 = i.isNullAt(1);
/* 128 */     ArrayData value_4 = isNull_4 ?
/* 129 */     null : (i.getArray(1));
/* 130 */     scala.collection.mutable.ArrayBuffer value_3 = null;
/* 131 */
/* 132 */     if (!isNull_4) {
/* 133 */
/* 134 */       int dataLength_0 = value_4.numElements();
/* 135 */
/* 136 */       scala.Tuple2[] convertedArray_0 = null;
/* 137 */       convertedArray_0 = new scala.Tuple2[dataLength_0];
/* 138 */
/* 139 */
/* 140 */       int loopIndex_0 = 0;
/* 141 */
/* 142 */       while (loopIndex_0 < dataLength_0) {
/* 143 */         value_MapObject_lambda_variable_1 = (InternalRow) (value_4.getStruct(loopIndex_0, 2));
/* 144 */         isNull_MapObject_lambda_variable_1 = value_4.isNullAt(loopIndex_0);
/* 145 */
/* 146 */         boolean isNull_5 = false;
/* 147 */         scala.Tuple2 value_5 = null;
/* 148 */         if (!false && isNull_MapObject_lambda_variable_1) {
/* 149 */
/* 150 */           isNull_5 = true;
/* 151 */           value_5 = ((scala.Tuple2)null);
/* 152 */         } else {
/* 153 */           scala.Tuple2 value_13 = NewInstance_0(i);
/* 154 */           isNull_5 = false;
/* 155 */           value_5 = value_13;
/* 156 */         }
/* 157 */         if (isNull_5) {
/* 158 */           convertedArray_0[loopIndex_0] = null;
/* 159 */         } else {
/* 160 */           convertedArray_0[loopIndex_0] = value_5;
/* 161 */         }
/* 162 */
/* 163 */         loopIndex_0 += 1;
/* 164 */       }
/* 165 */
/* 166 */       value_3 = new org.apache.spark.sql.catalyst.util.GenericArrayData(convertedArray_0);
/* 167 */     }
/* 168 */     globalIsNull_0 = isNull_4;
/* 169 */     return value_3;
/* 170 */   }

```

Line 166 in generated code try to assign `GenericArrayData`  to `value_3(ArrayBuffer)` because `ArrayBuffer` type can't match `s.c.i.Seq` branch in Scala 2.13 in `MapObjects.doGenCode` method now.

So this pr change to use `s.c.Seq` instead of `Seq` alias to let `ArrayBuffer` type can enter  the same branch as Scala 2.12.

After this pr the generate code when `inputDataType` is `ArrayBuffer` as follow:

```
/* 126 */   private scala.collection.mutable.ArrayBuffer MapObjects_0(InternalRow i) {
/* 127 */     boolean isNull_4 = i.isNullAt(1);
/* 128 */     ArrayData value_4 = isNull_4 ?
/* 129 */     null : (i.getArray(1));
/* 130 */     scala.collection.mutable.ArrayBuffer value_3 = null;
/* 131 */
/* 132 */     if (!isNull_4) {
/* 133 */
/* 134 */       int dataLength_0 = value_4.numElements();
/* 135 */
/* 136 */       scala.collection.mutable.Builder collectionBuilder_0 = scala.collection.mutable.ArrayBuffer$.MODULE$.newBuilder();
/* 137 */       collectionBuilder_0.sizeHint(dataLength_0);
/* 138 */
/* 139 */
/* 140 */       int loopIndex_0 = 0;
/* 141 */
/* 142 */       while (loopIndex_0 < dataLength_0) {
/* 143 */         value_MapObject_lambda_variable_1 = (InternalRow) (value_4.getStruct(loopIndex_0, 2));
/* 144 */         isNull_MapObject_lambda_variable_1 = value_4.isNullAt(loopIndex_0);
/* 145 */
/* 146 */         boolean isNull_5 = false;
/* 147 */         scala.Tuple2 value_5 = null;
/* 148 */         if (!false && isNull_MapObject_lambda_variable_1) {
/* 149 */
/* 150 */           isNull_5 = true;
/* 151 */           value_5 = ((scala.Tuple2)null);
/* 152 */         } else {
/* 153 */           scala.Tuple2 value_13 = NewInstance_0(i);
/* 154 */           isNull_5 = false;
/* 155 */           value_5 = value_13;
/* 156 */         }
/* 157 */         if (isNull_5) {
/* 158 */           collectionBuilder_0.$plus$eq(null);
/* 159 */         } else {
/* 160 */           collectionBuilder_0.$plus$eq(value_5);
/* 161 */         }
/* 162 */
/* 163 */         loopIndex_0 += 1;
/* 164 */       }
/* 165 */
/* 166 */       value_3 = (scala.collection.mutable.ArrayBuffer) collectionBuilder_0.result();
/* 167 */     }
/* 168 */     globalIsNull_0 = isNull_4;
/* 169 */     return value_3;
/* 170 */   }
```

### Why are the changes needed?
Bug fix in Scala 2.13

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

- Pass the Jenkins or GitHub Action
- Manual test `sql/catalyst` and `sql/core` in Scala 2.13 passed

```
mvn clean test -pl sql/catalyst -Pscala-2.13

Run completed in 11 minutes, 23 seconds.
Total number of tests run: 4711
Suites: completed 261, aborted 0
Tests: succeeded 4711, failed 0, canceled 0, ignored 5, pending 0
All tests passed.
```

- Manual cherry-pick this pr to branch 3.1 and  test`sql/catalyst`  in Scala 2.13 passed

```
mvn clean test -pl sql/catalyst -Pscala-2.13

Run completed in 11 minutes, 18 seconds.
Total number of tests run: 4655
Suites: completed 256, aborted 0
Tests: succeeded 4655, failed 0, canceled 0, ignored 5, pending 0
```

Closes #31055 from LuciferYang/SPARK-33948.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../apache/spark/sql/catalyst/expressions/objects/objects.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 9303df75af503..f391b3128cf41 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -954,7 +954,7 @@ case class MapObjects private(
           } else {
             doCodeGenForScala213
           }
-        case Some(cls) if classOf[Seq[_]].isAssignableFrom(cls) ||
+        case Some(cls) if classOf[scala.collection.Seq[_]].isAssignableFrom(cls) ||
           classOf[scala.collection.Set[_]].isAssignableFrom(cls) =>
           // Scala sequence or set
           val getBuilder = s"${cls.getName}$$.MODULE$$.newBuilder()"

From 26d8df300a1a57e220b1a0f9814795f68101f28b Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Wed, 6 Jan 2021 08:25:34 +0000
Subject: [PATCH 0979/1009] [SPARK-33938][SQL] Optimize Like Any/All by
 LikeSimplification

### What changes were proposed in this pull request?
We should optimize Like Any/All by LikeSimplification to improve performance.

### Why are the changes needed?
Optimize Like Any/All

### Does this PR introduce _any_ user-facing change?
'No'.

### How was this patch tested?
Jenkins test.

Closes #30975 from beliefer/SPARK-33938.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: beliefer <beliefer@163.com>
Co-authored-by: Jiaan Geng <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/regexpExpressions.scala       |  6 +-
 .../sql/catalyst/optimizer/expressions.scala  | 81 +++++++++++++------
 .../optimizer/LikeSimplificationSuite.scala   | 68 ++++++++++++++++
 3 files changed, 128 insertions(+), 27 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index dae954a579eb3..011371a513a8d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -181,7 +181,7 @@ case class Like(left: Expression, right: Expression, escapeChar: Char)
   }
 }
 
-abstract class MultiLikeBase
+sealed abstract class MultiLikeBase
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
   protected def patterns: Seq[UTF8String]
@@ -220,7 +220,7 @@ abstract class MultiLikeBase
 /**
  * Optimized version of LIKE ALL, when all pattern values are literal.
  */
-abstract class LikeAllBase extends MultiLikeBase {
+sealed abstract class LikeAllBase extends MultiLikeBase {
 
   override def matches(exprValue: String): Any = {
     if (cache.forall(matchFunc(_, exprValue))) {
@@ -276,7 +276,7 @@ case class NotLikeAll(child: Expression, patterns: Seq[UTF8String]) extends Like
 /**
  * Optimized version of LIKE ANY, when all pattern values are literal.
  */
-abstract class LikeAnyBase extends MultiLikeBase {
+sealed abstract class LikeAnyBase extends MultiLikeBase {
 
   override def matches(exprValue: String): Any = {
     if (cache.exists(matchFunc(_, exprValue))) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 819bffeafb643..a40456da82977 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -21,7 +21,7 @@ import scala.collection.immutable.HashSet
 import scala.collection.mutable.{ArrayBuffer, Stack}
 
 import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, _}
+import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, MultiLikeBase, _}
 import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull
@@ -30,6 +30,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
 
 /*
  * Optimization rules defined in this file should not affect the structure of the logical plan.
@@ -634,36 +635,68 @@ object LikeSimplification extends Rule[LogicalPlan] {
   private val contains = "%([^_%]+)%".r
   private val equalTo = "([^_%]*)".r
 
+  private def simplifyLike(
+      input: Expression, pattern: String, escapeChar: Char = '\\'): Option[Expression] = {
+    if (pattern.contains(escapeChar)) {
+      // There are three different situations when pattern containing escapeChar:
+      // 1. pattern contains invalid escape sequence, e.g. 'm\aca'
+      // 2. pattern contains escaped wildcard character, e.g. 'ma\%ca'
+      // 3. pattern contains escaped escape character, e.g. 'ma\\ca'
+      // Although there are patterns can be optimized if we handle the escape first, we just
+      // skip this rule if pattern contains any escapeChar for simplicity.
+      None
+    } else {
+      pattern match {
+        case startsWith(prefix) =>
+          Some(StartsWith(input, Literal(prefix)))
+        case endsWith(postfix) =>
+          Some(EndsWith(input, Literal(postfix)))
+        // 'a%a' pattern is basically same with 'a%' && '%a'.
+        // However, the additional `Length` condition is required to prevent 'a' match 'a%a'.
+        case startsAndEndsWith(prefix, postfix) =>
+          Some(And(GreaterThanOrEqual(Length(input), Literal(prefix.length + postfix.length)),
+            And(StartsWith(input, Literal(prefix)), EndsWith(input, Literal(postfix)))))
+        case contains(infix) =>
+          Some(Contains(input, Literal(infix)))
+        case equalTo(str) =>
+          Some(EqualTo(input, Literal(str)))
+        case _ => None
+      }
+    }
+  }
+
+  private def simplifyMultiLike(
+      child: Expression, patterns: Seq[UTF8String], multi: MultiLikeBase): Expression = {
+    val (remainPatternMap, replacementMap) =
+      patterns.map { p => p -> simplifyLike(child, p.toString)}.partition(_._2.isEmpty)
+    val remainPatterns = remainPatternMap.map(_._1)
+    val replacements = replacementMap.map(_._2.get)
+    if (replacements.isEmpty) {
+      multi
+    } else {
+      multi match {
+        case l: LikeAll => And(replacements.reduceLeft(And), l.copy(patterns = remainPatterns))
+        case l: NotLikeAll =>
+          And(replacements.map(Not(_)).reduceLeft(And), l.copy(patterns = remainPatterns))
+        case l: LikeAny => Or(replacements.reduceLeft(Or), l.copy(patterns = remainPatterns))
+        case l: NotLikeAny =>
+          Or(replacements.map(Not(_)).reduceLeft(Or), l.copy(patterns = remainPatterns))
+      }
+    }
+  }
+
   def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions {
     case l @ Like(input, Literal(pattern, StringType), escapeChar) =>
       if (pattern == null) {
         // If pattern is null, return null value directly, since "col like null" == null.
         Literal(null, BooleanType)
       } else {
-        pattern.toString match {
-          // There are three different situations when pattern containing escapeChar:
-          // 1. pattern contains invalid escape sequence, e.g. 'm\aca'
-          // 2. pattern contains escaped wildcard character, e.g. 'ma\%ca'
-          // 3. pattern contains escaped escape character, e.g. 'ma\\ca'
-          // Although there are patterns can be optimized if we handle the escape first, we just
-          // skip this rule if pattern contains any escapeChar for simplicity.
-          case p if p.contains(escapeChar) => l
-          case startsWith(prefix) =>
-            StartsWith(input, Literal(prefix))
-          case endsWith(postfix) =>
-            EndsWith(input, Literal(postfix))
-          // 'a%a' pattern is basically same with 'a%' && '%a'.
-          // However, the additional `Length` condition is required to prevent 'a' match 'a%a'.
-          case startsAndEndsWith(prefix, postfix) =>
-            And(GreaterThanOrEqual(Length(input), Literal(prefix.length + postfix.length)),
-              And(StartsWith(input, Literal(prefix)), EndsWith(input, Literal(postfix))))
-          case contains(infix) =>
-            Contains(input, Literal(infix))
-          case equalTo(str) =>
-            EqualTo(input, Literal(str))
-          case _ => l
-        }
+        simplifyLike(input, pattern.toString, escapeChar).getOrElse(l)
       }
+    case l @ LikeAll(child, patterns) => simplifyMultiLike(child, patterns, l)
+    case l @ NotLikeAll(child, patterns) => simplifyMultiLike(child, patterns, l)
+    case l @ LikeAny(child, patterns) => simplifyMultiLike(child, patterns, l)
+    case l @ NotLikeAny(child, patterns) => simplifyMultiLike(child, patterns, l)
   }
 }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala
index 1812dce0da426..c06c92f9c1511 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala
@@ -164,4 +164,72 @@ class LikeSimplificationSuite extends PlanTest {
       .analyze
     comparePlans(optimized5, correctAnswer5)
   }
+
+  test("simplify LikeAll") {
+    val originalQuery =
+      testRelation
+        .where(('a likeAll(
+    "abc%", "abc\\%", "%xyz", "abc\\%def", "abc%def", "%mn%", "%mn\\%", "", "abc")))
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    val correctAnswer = testRelation
+      .where((((((StartsWith('a, "abc") && EndsWith('a, "xyz")) &&
+        (Length('a) >= 6 && (StartsWith('a, "abc") && EndsWith('a, "def")))) &&
+        Contains('a, "mn")) && ('a === "")) && ('a === "abc")) &&
+        ('a likeAll("abc\\%", "abc\\%def", "%mn\\%")))
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("simplify NotLikeAll") {
+    val originalQuery =
+      testRelation
+        .where(('a notLikeAll(
+          "abc%", "abc\\%", "%xyz", "abc\\%def", "abc%def", "%mn%", "%mn\\%", "", "abc")))
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    val correctAnswer = testRelation
+      .where((((((Not(StartsWith('a, "abc")) && Not(EndsWith('a, "xyz"))) &&
+        Not(Length('a) >= 6 && (StartsWith('a, "abc") && EndsWith('a, "def")))) &&
+        Not(Contains('a, "mn"))) && Not('a === "")) && Not('a === "abc")) &&
+        ('a notLikeAll("abc\\%", "abc\\%def", "%mn\\%")))
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("simplify LikeAny") {
+    val originalQuery =
+      testRelation
+        .where(('a likeAny(
+          "abc%", "abc\\%", "%xyz", "abc\\%def", "abc%def", "%mn%", "%mn\\%", "", "abc")))
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    val correctAnswer = testRelation
+      .where((((((StartsWith('a, "abc") || EndsWith('a, "xyz")) ||
+        (Length('a) >= 6 && (StartsWith('a, "abc") && EndsWith('a, "def")))) ||
+        Contains('a, "mn")) || ('a === "")) || ('a === "abc")) ||
+        ('a likeAny("abc\\%", "abc\\%def", "%mn\\%")))
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("simplify NotLikeAny") {
+    val originalQuery =
+      testRelation
+        .where(('a notLikeAny(
+          "abc%", "abc\\%", "%xyz", "abc\\%def", "abc%def", "%mn%", "%mn\\%", "", "abc")))
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    val correctAnswer = testRelation
+      .where((((((Not(StartsWith('a, "abc")) || Not(EndsWith('a, "xyz"))) ||
+        Not(Length('a) >= 6 && (StartsWith('a, "abc") && EndsWith('a, "def")))) ||
+        Not(Contains('a, "mn"))) || Not('a === "")) || Not('a === "abc")) ||
+        ('a notLikeAny("abc\\%", "abc\\%def", "%mn\\%")))
+      .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
 }

From f64dfa8727b785f333a0c10f5f7175ab51f22764 Mon Sep 17 00:00:00 2001
From: Prashant Sharma <prashsh1@in.ibm.com>
Date: Wed, 6 Jan 2021 14:55:40 +0530
Subject: [PATCH 0980/1009] [SPARK-32221][K8S] Avoid possible errors due to
 incorrect file size or type supplied in spark conf

### What changes were proposed in this pull request?

Skip files if they are binary or very large to fit the configMap's max size.

### Why are the changes needed?

Config map cannot hold binary files and there is also a limit on how much data a configMap can hold.
This limit can be configured by the k8s cluster admin. This PR, skips such files (with a warning) instead of failing with weird runtime errors.
If such files are not skipped, then it would result in mount errors or encoding errors (if binary files are submitted).

### Does this PR introduce _any_ user-facing change?

yes, in simple words avoids possible errors due to negligence (for example, placing a large file or a binary file in SPARK_CONF_DIR) and thus improves user experience.

### How was this patch tested?

Added relevant tests and improved existing tests.

Closes #30472 from ScrapCodes/SPARK-32221/avoid-conf-propagate-errors.

Lead-authored-by: Prashant Sharma <prashsh1@in.ibm.com>
Co-authored-by: Prashant Sharma <prashant@apache.org>
Signed-off-by: Prashant Sharma <prashsh1@in.ibm.com>
---
 .../org/apache/spark/deploy/k8s/Config.scala  |  8 ++
 .../k8s/submit/KubernetesClientUtils.scala    | 80 +++++++++++++++----
 .../spark/deploy/k8s/submit/ClientSuite.scala | 21 +++--
 .../submit/KubernetesClientUtilsSuite.scala   | 79 ++++++++++++++++++
 4 files changed, 164 insertions(+), 24 deletions(-)
 create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtilsSuite.scala

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index 6939de4697979..8dca875b543c6 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -99,6 +99,14 @@ private[spark] object Config extends Logging {
       .toSequence
       .createWithDefault(Nil)
 
+  val CONFIG_MAP_MAXSIZE =
+    ConfigBuilder("spark.kubernetes.configMap.maxSize")
+      .doc("Max size limit for a config map. This is configurable as per" +
+        " https://etcd.io/docs/v3.4.0/dev-guide/limit/ on k8s server end.")
+      .version("3.1.0")
+      .longConf
+      .createWithDefault(1572864) // 1.5 MiB
+
   val KUBERNETES_AUTH_DRIVER_CONF_PREFIX = "spark.kubernetes.authenticate.driver"
   val KUBERNETES_AUTH_EXECUTOR_CONF_PREFIX = "spark.kubernetes.authenticate.executor"
   val KUBERNETES_AUTH_DRIVER_MOUNTED_CONF_PREFIX = "spark.kubernetes.authenticate.driver.mounted"
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtils.scala
index 32f630f77d666..4207077677c25 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtils.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtils.scala
@@ -18,15 +18,17 @@
 package org.apache.spark.deploy.k8s.submit
 
 import java.io.{File, StringWriter}
+import java.nio.charset.MalformedInputException
 import java.util.Properties
 
 import scala.collection.JavaConverters._
+import scala.collection.mutable
 import scala.io.{Codec, Source}
 
 import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapBuilder, KeyToPath}
 
 import org.apache.spark.SparkConf
-import org.apache.spark.deploy.k8s.{Constants, KubernetesUtils}
+import org.apache.spark.deploy.k8s.{Config, Constants, KubernetesUtils}
 import org.apache.spark.deploy.k8s.Constants.ENV_SPARK_CONF_DIR
 import org.apache.spark.internal.Logging
 
@@ -54,8 +56,10 @@ private[spark] object KubernetesClientUtils extends Logging {
   /**
    * Build, file -> 'file's content' map of all the selected files in SPARK_CONF_DIR.
    */
-  def buildSparkConfDirFilesMap(configMapName: String,
-      sparkConf: SparkConf, resolvedPropertiesMap: Map[String, String]): Map[String, String] = {
+  def buildSparkConfDirFilesMap(
+      configMapName: String,
+      sparkConf: SparkConf,
+      resolvedPropertiesMap: Map[String, String]): Map[String, String] = synchronized {
     val loadedConfFilesMap = KubernetesClientUtils.loadSparkConfDirFiles(sparkConf)
     // Add resolved spark conf to the loaded configuration files map.
     if (resolvedPropertiesMap.nonEmpty) {
@@ -90,29 +94,71 @@ private[spark] object KubernetesClientUtils extends Logging {
       .build()
   }
 
-  private def loadSparkConfDirFiles(conf: SparkConf): Map[String, String] = {
+  private def orderFilesBySize(confFiles: Seq[File]): Seq[File] = {
+    val fileToFileSizePairs = confFiles.map(f => (f, f.getName.length + f.length()))
+    // sort first by name and then by length, so that during tests we have consistent results.
+    fileToFileSizePairs.sortBy(f => f._1).sortBy(f => f._2).map(_._1)
+  }
+
+  // exposed for testing
+  private[submit] def loadSparkConfDirFiles(conf: SparkConf): Map[String, String] = {
     val confDir = Option(conf.getenv(ENV_SPARK_CONF_DIR)).orElse(
       conf.getOption("spark.home").map(dir => s"$dir/conf"))
+    val maxSize = conf.get(Config.CONFIG_MAP_MAXSIZE)
     if (confDir.isDefined) {
-      val confFiles = listConfFiles(confDir.get)
-      logInfo(s"Spark configuration files loaded from $confDir : ${confFiles.mkString(",")}")
-      confFiles.map { file =>
-        val source = Source.fromFile(file)(Codec.UTF8)
-        val mapping = (file.getName -> source.mkString)
-        source.close()
-        mapping
-      }.toMap
+      val confFiles: Seq[File] = listConfFiles(confDir.get, maxSize)
+      val orderedConfFiles = orderFilesBySize(confFiles)
+      var truncatedMapSize: Long = 0
+      val truncatedMap = mutable.HashMap[String, String]()
+      val skippedFiles = mutable.HashSet[String]()
+      var source: Source = Source.fromString("") // init with empty source.
+      for (file <- orderedConfFiles) {
+        try {
+          source = Source.fromFile(file)(Codec.UTF8)
+          val (fileName, fileContent) = file.getName -> source.mkString
+          if ((truncatedMapSize + fileName.length + fileContent.length) < maxSize) {
+            truncatedMap.put(fileName, fileContent)
+            truncatedMapSize = truncatedMapSize + (fileName.length + fileContent.length)
+          } else {
+            skippedFiles.add(fileName)
+          }
+        } catch {
+          case e: MalformedInputException =>
+            logWarning(
+              s"Unable to read a non UTF-8 encoded file ${file.getAbsolutePath}. Skipping...", e)
+            None
+        } finally {
+          source.close()
+        }
+      }
+      if (truncatedMap.nonEmpty) {
+        logInfo(s"Spark configuration files loaded from $confDir :" +
+          s" ${truncatedMap.keys.mkString(",")}")
+      }
+      if (skippedFiles.nonEmpty) {
+        logWarning(s"Skipped conf file(s) ${skippedFiles.mkString(",")}, due to size constraint." +
+          s" Please see, config: `${Config.CONFIG_MAP_MAXSIZE.key}` for more details.")
+      }
+      truncatedMap.toMap
     } else {
       Map.empty[String, String]
     }
   }
 
-  private def listConfFiles(confDir: String): Seq[File] = {
-    // We exclude all the template files and user provided spark conf or properties.
-    // As spark properties are resolved in a different step.
+  private def listConfFiles(confDir: String, maxSize: Long): Seq[File] = {
+    // At the moment configmaps do not support storing binary content (i.e. skip jar,tar,gzip,zip),
+    // and configMaps do not allow for size greater than 1.5 MiB(configurable).
+    // https://etcd.io/docs/v3.4.0/dev-guide/limit/
+    def testIfTooLargeOrBinary(f: File): Boolean = (f.length() + f.getName.length > maxSize) ||
+      f.getName.matches(".*\\.(gz|zip|jar|tar)")
+
+    // We exclude all the template files and user provided spark conf or properties,
+    // Spark properties are resolved in a different step.
+    def testIfSparkConfOrTemplates(f: File) = f.getName.matches(".*\\.template") ||
+      f.getName.matches("spark.*(conf|properties)")
+
     val fileFilter = (f: File) => {
-      f.isFile && !(f.getName.endsWith("template") ||
-        f.getName.matches("spark.*(conf|properties)"))
+      f.isFile && !testIfTooLargeOrBinary(f) && !testIfSparkConfOrTemplates(f)
     }
     val confFiles: Seq[File] = {
       val dir = new File(confDir)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
index 1a14d524003c0..18d0c00edf2c0 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
@@ -191,25 +191,32 @@ class ClientSuite extends SparkFunSuite with BeforeAndAfter {
     assert(configMap.getData.get(SPARK_CONF_FILE_NAME).contains("conf2key=conf2value"))
   }
 
-  test("All files from SPARK_CONF_DIR, except templates and spark config " +
+  test("All files from SPARK_CONF_DIR, " +
+    "except templates, spark config, binary files and are within size limit, " +
     "should be populated to pod's configMap.") {
     def testSetup: (SparkConf, Seq[String]) = {
       val tempDir = Utils.createTempDir()
-      val sparkConf = new SparkConf(loadDefaults = false).setSparkHome(tempDir.getAbsolutePath)
+      val sparkConf = new SparkConf(loadDefaults = false)
+        .setSparkHome(tempDir.getAbsolutePath)
 
       val tempConfDir = new File(s"${tempDir.getAbsolutePath}/conf")
       tempConfDir.mkdir()
       // File names - which should not get mounted on the resultant config map.
       val filteredConfFileNames =
-        Set("spark-env.sh.template", "spark.properties", "spark-defaults.conf")
-      val confFileNames = for (i <- 1 to 5) yield s"testConf.$i" ++
+        Set("spark-env.sh.template", "spark.properties", "spark-defaults.conf",
+          "test.gz", "test2.jar", "non_utf8.txt")
+      val confFileNames = (for (i <- 1 to 5) yield s"testConf.$i") ++
         List("spark-env.sh") ++ filteredConfFileNames
 
-      val testConfFiles = for (i <- confFileNames) yield {
+      val testConfFiles = (for (i <- confFileNames) yield {
         val file = new File(s"${tempConfDir.getAbsolutePath}/$i")
-        Files.write(file.toPath, "conf1key=conf1value".getBytes(StandardCharsets.UTF_8))
+        if (i.startsWith("non_utf8")) { // filling some non-utf-8 binary
+          Files.write(file.toPath, Array[Byte](0x00.toByte, 0xA1.toByte))
+        } else {
+          Files.write(file.toPath, "conf1key=conf1value".getBytes(StandardCharsets.UTF_8))
+        }
         file.getName
-      }
+      })
       assert(tempConfDir.listFiles().length == confFileNames.length)
       val expectedConfFiles: Seq[String] = testConfFiles.filterNot(filteredConfFileNames.contains)
       (sparkConf, expectedConfFiles)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtilsSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtilsSuite.scala
new file mode 100644
index 0000000000000..ee672cc041330
--- /dev/null
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtilsSuite.scala
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.k8s.submit
+
+import java.io.File
+import java.nio.charset.StandardCharsets
+import java.nio.file.Files
+
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.k8s.Config
+import org.apache.spark.util.Utils
+
+class KubernetesClientUtilsSuite extends SparkFunSuite with BeforeAndAfter {
+
+  def testSetup(inputFiles: Map[String, Array[Byte]]): SparkConf = {
+    val tempDir = Utils.createTempDir()
+    val sparkConf = new SparkConf(loadDefaults = false)
+      .setSparkHome(tempDir.getAbsolutePath)
+
+    val tempConfDir = new File(s"${tempDir.getAbsolutePath}/conf")
+    tempConfDir.mkdir()
+    for (i <- inputFiles) yield {
+      val file = new File(s"${tempConfDir.getAbsolutePath}/${i._1}")
+      Files.write(file.toPath, i._2)
+      file.getName
+    }
+    sparkConf
+  }
+
+  test("verify load files, loads only allowed files and not the disallowed files.") {
+    val input: Map[String, Array[Byte]] = Map("test.txt" -> "test123", "z12.zip" -> "zZ",
+      "rere.jar" -> "@31", "spark.jar" -> "@31", "_test" -> "", "sample.conf" -> "conf")
+      .map(f => f._1 -> f._2.getBytes(StandardCharsets.UTF_8)) ++
+      Map("binary-file.conf" -> Array[Byte](0x00.toByte, 0xA1.toByte))
+    val sparkConf = testSetup(input)
+    val output = KubernetesClientUtils.loadSparkConfDirFiles(sparkConf)
+    val expectedOutput = Map("test.txt" -> "test123", "sample.conf" -> "conf", "_test" -> "")
+    assert(output === expectedOutput)
+  }
+
+  test("verify load files, truncates the content to maxSize, when keys are very large in number.") {
+    val input = (for (i <- 10000 to 1 by -1) yield (s"testConf.${i}" -> "test123456")).toMap
+    val sparkConf = testSetup(input.map(f => f._1 -> f._2.getBytes(StandardCharsets.UTF_8)))
+      .set(Config.CONFIG_MAP_MAXSIZE.key, "60")
+    val output = KubernetesClientUtils.loadSparkConfDirFiles(sparkConf)
+    val expectedOutput = Map("testConf.1" -> "test123456", "testConf.2" -> "test123456")
+    assert(output === expectedOutput)
+    val output1 = KubernetesClientUtils.loadSparkConfDirFiles(
+      sparkConf.set(Config.CONFIG_MAP_MAXSIZE.key, "250000"))
+    assert(output1 === input)
+  }
+
+  test("verify load files, truncates the content to maxSize, when keys are equal in length.") {
+    val input = (for (i <- 9 to 1 by -1) yield (s"testConf.${i}" -> "test123456")).toMap
+    val sparkConf = testSetup(input.map(f => f._1 -> f._2.getBytes(StandardCharsets.UTF_8)))
+      .set(Config.CONFIG_MAP_MAXSIZE.key, "80")
+    val output = KubernetesClientUtils.loadSparkConfDirFiles(sparkConf)
+    val expectedOutput = Map("testConf.1" -> "test123456", "testConf.2" -> "test123456",
+      "testConf.3" -> "test123456")
+    assert(output === expectedOutput)
+  }
+}

From ff284fb6ac624b2f38ef12f9b840be3077cd27a6 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Wed, 6 Jan 2021 18:46:20 +0900
Subject: [PATCH 0981/1009] [SPARK-30681][PYTHON][FOLLOW-UP] Keep the name
 similar with Scala side in higher order functions

### What changes were proposed in this pull request?

This PR is a followup of https://github.com/apache/spark/pull/27406. It fixes the naming to match with Scala side.

Note that there are a bit of inconsistency already e.g.) `col`, `e`, `expr` and `column`. This part I did not change but other names like `zero` vs `initialValue` or `col1`/`col2` vs `left`/`right` looks unnecessary.

### Why are the changes needed?

To make the usage similar with Scala side, and for consistency.

### Does this PR introduce _any_ user-facing change?

No, this is not released yet.

### How was this patch tested?

GitHub Actions and Jenkins build will test it out.

Closes #31062 from HyukjinKwon/SPARK-30681.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 python/pyspark/sql/functions.py  | 16 ++++++++--------
 python/pyspark/sql/functions.pyi |  6 +++---
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index f612d2d0366f2..c9d24dc668b8e 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -4355,7 +4355,7 @@ def filter(col, f):
     return _invoke_higher_order_function("ArrayFilter", [col], [f])
 
 
-def aggregate(col, zero, merge, finish=None):
+def aggregate(col, initialValue, merge, finish=None):
     """
     Applies a binary operator to an initial state and all elements in the array,
     and reduces this to a single state. The final state is converted into the final result
@@ -4372,7 +4372,7 @@ def aggregate(col, zero, merge, finish=None):
     ----------
     col : :class:`Column` or str
         name of column or expression
-    zero : :class:`Column` or str
+    initialValue : :class:`Column` or str
         initial value. Name of column or expression
     merge : function
         a binary function ``(acc: Column, x: Column) -> Column...`` returning expression
@@ -4416,19 +4416,19 @@ def aggregate(col, zero, merge, finish=None):
     if finish is not None:
         return _invoke_higher_order_function(
             "ArrayAggregate",
-            [col, zero],
+            [col, initialValue],
             [merge, finish]
         )
 
     else:
         return _invoke_higher_order_function(
             "ArrayAggregate",
-            [col, zero],
+            [col, initialValue],
             [merge]
         )
 
 
-def zip_with(col1, col2, f):
+def zip_with(left, right, f):
     """
     Merge two given arrays, element-wise, into a single array using a function.
     If one array is shorter, nulls are appended at the end to match the length of the longer
@@ -4438,9 +4438,9 @@ def zip_with(col1, col2, f):
 
     Parameters
     ----------
-    col1 : :class:`Column` or str
+    left : :class:`Column` or str
         name of the first column or expression
-    col2 : :class:`Column` or str
+    right : :class:`Column` or str
         name of the second column or expression
     f : function
         a binary function ``(x1: Column, x2: Column) -> Column...``
@@ -4471,7 +4471,7 @@ def zip_with(col1, col2, f):
     |[foo_1, bar_2, 3]|
     +-----------------+
     """
-    return _invoke_higher_order_function("ZipWith", [col1, col2], [f])
+    return _invoke_higher_order_function("ZipWith", [left, right], [f])
 
 
 def transform_keys(col, f):
diff --git a/python/pyspark/sql/functions.pyi b/python/pyspark/sql/functions.pyi
index acb17a2657d00..0cf60c0c26500 100644
--- a/python/pyspark/sql/functions.pyi
+++ b/python/pyspark/sql/functions.pyi
@@ -237,13 +237,13 @@ def filter(col: ColumnOrName, f: Callable[[Column], Column]) -> Column: ...
 def filter(col: ColumnOrName, f: Callable[[Column, Column], Column]) -> Column: ...
 def aggregate(
     col: ColumnOrName,
-    zero: ColumnOrName,
+    initialValue: ColumnOrName,
     merge: Callable[[Column, Column], Column],
     finish: Optional[Callable[[Column], Column]] = ...,
 ) -> Column: ...
 def zip_with(
-    col1: ColumnOrName,
-    ColumnOrName: ColumnOrName,
+    left: ColumnOrName,
+    right: ColumnOrName,
     f: Callable[[Column, Column], Column],
 ) -> Column: ...
 def transform_keys(

From 0d86a02ffbaf53c403a4c68bac0041e84acb0cdd Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Wed, 6 Jan 2021 20:31:27 +0900
Subject: [PATCH 0982/1009] [SPARK-34022][DOCS] Support latest mkdocs in SQL
 built-in function docs

### What changes were proposed in this pull request?

This PR adds the support of the latest mkdocs, and makes the sidebar properly show. It works in lower versions too.

Before:

![Screen Shot 2021-01-06 at 5 11 56 PM](https://user-images.githubusercontent.com/6477701/103745131-4e7fe400-5042-11eb-9c09-84f9f95e9fb9.png)

After:

![Screen Shot 2021-01-06 at 5 10 53 PM](https://user-images.githubusercontent.com/6477701/103745139-5049a780-5042-11eb-8ded-30b6f7ef48aa.png)

### Why are the changes needed?

This is a regression in the documentation.

### Does this PR introduce _any_ user-facing change?

Technically no. It's not related yet. It fixes the list on the sidebar appears properly.

### How was this patch tested?

Manually built the docs via `./sql/create-docs.sh` and `open ./sql/site/index.html`

Closes #31061 from HyukjinKwon/SPARK-34022.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 sql/gen-sql-api-docs.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sql/gen-sql-api-docs.py b/sql/gen-sql-api-docs.py
index 61328997c1c58..72518504847db 100644
--- a/sql/gen-sql-api-docs.py
+++ b/sql/gen-sql-api-docs.py
@@ -195,6 +195,7 @@ def generate_sql_api_markdown(jvm, path):
     """
 
     with open(path, 'w') as mdfile:
+        mdfile.write("# Built-in Finctions\n\n")
         for info in _list_function_infos(jvm):
             name = info.name
             usage = _make_pretty_usage(info.usage)

From 6788304240c416d173ebdb3d544f3361c6b9fe8e Mon Sep 17 00:00:00 2001
From: gengjiaan <gengjiaan@360.cn>
Date: Wed, 6 Jan 2021 21:14:45 +0900
Subject: [PATCH 0983/1009] [SPARK-33977][SQL][DOCS] Add doc for "'like any'
 and 'like all' operators"

### What changes were proposed in this pull request?
Add doc for 'like any' and 'like all' operators in sql-ref-syntx-qry-select-like.cmd

### Why are the changes needed?
make the usage of 'like any' and 'like all' known to more users

### Does this PR introduce _any_ user-facing change?
Yes.

<img width="687" alt="Screen Shot 2021-01-06 at 21 10 38" src="https://user-images.githubusercontent.com/692303/103767385-dc1ffb80-5063-11eb-9529-89479531425f.png">
<img width="495" alt="Screen Shot 2021-01-06 at 21 11 06" src="https://user-images.githubusercontent.com/692303/103767391-dde9bf00-5063-11eb-82ce-63bdd11593a1.png">
<img width="406" alt="Screen Shot 2021-01-06 at 21 11 20" src="https://user-images.githubusercontent.com/692303/103767396-df1aec00-5063-11eb-8e81-a192e6c72431.png">

### How was this patch tested?
No tests

Closes #31008 from beliefer/SPARK-33977.

Lead-authored-by: gengjiaan <gengjiaan@360.cn>
Co-authored-by: beliefer <beliefer@163.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 docs/sql-ref-syntax-qry-select-like.md | 60 +++++++++++++++++++++++++-
 1 file changed, 59 insertions(+), 1 deletion(-)

diff --git a/docs/sql-ref-syntax-qry-select-like.md b/docs/sql-ref-syntax-qry-select-like.md
index 6211faa8d529e..3604a9ba1ea02 100644
--- a/docs/sql-ref-syntax-qry-select-like.md
+++ b/docs/sql-ref-syntax-qry-select-like.md
@@ -21,12 +21,14 @@ license: |
 
 ### Description
 
-A LIKE predicate is used to search for a specific pattern.
+A LIKE predicate is used to search for a specific pattern. This predicate also supports multiple patterns with quantifiers include `ANY`, `SOME` and `ALL`.
 
 ### Syntax
 
 ```sql
 [ NOT ] { LIKE search_pattern [ ESCAPE esc_char ] | [ RLIKE | REGEXP ] regex_pattern }
+
+[ NOT ] { LIKE quantifiers ( search_pattern [ , ... ]) }
 ```
 
 ### Parameters
@@ -45,6 +47,10 @@ A LIKE predicate is used to search for a specific pattern.
 * **regex_pattern**
 
     Specifies a regular expression search pattern to be searched by the `RLIKE` or `REGEXP` clause.
+    
+* **quantifiers**
+
+    Specifies the predicate quantifiers include `ANY`, `SOME` and `ALL`. `ANY` or `SOME` means if one of the patterns matches the input, then return true; `ALL` means if all the patterns matches the input, then return true.
 
 ### Examples
 
@@ -111,6 +117,58 @@ SELECT * FROM person WHERE name LIKE '%$_%' ESCAPE '$';
 +---+------+---+
 |500|Evan_W| 16|
 +---+------+---+
+
+SELECT * FROM person WHERE name LIKE ALL ('%an%', '%an');
++---+----+----+
+| id|name| age|
++---+----+----+
+|400| Dan|  50|
++---+----+----+
+
+SELECT * FROM person WHERE name LIKE ANY ('%an%', '%an');
++---+------+---+
+| id|  name|age|
++---+------+---+
+|400|   Dan| 50|
+|500|Evan_W| 16|
++---+------+---+
+
+SELECT * FROM person WHERE name LIKE SOME ('%an%', '%an');
++---+------+---+
+| id|  name|age|
++---+------+---+
+|400|   Dan| 50|
+|500|Evan_W| 16|
++---+------+---+
+
+SELECT * FROM person WHERE name NOT LIKE ALL ('%an%', '%an');
++---+----+----+
+| id|name| age|
++---+----+----+
+|100|John|  30|
+|200|Mary|null|
+|300|Mike|  80|
++---+----+----+
+
+SELECT * FROM person WHERE name NOT LIKE ANY ('%an%', '%an');
++---+------+----+
+| id|  name| age|
++---+------+----+
+|100|  John|  30|
+|200|  Mary|null|
+|300|  Mike|  80|
+|500|Evan_W|  16|
++---+------+----+
+
+SELECT * FROM person WHERE name NOT LIKE SOME ('%an%', '%an');
++---+------+----+
+| id|  name| age|
++---+------+----+
+|100|  John|  30|
+|200|  Mary|null|
+|300|  Mike|  80|
+|500|Evan_W|  16|
++---+------+----+
 ```
 
 ### Related Statements

From 3cdc4ef5b41ce1254610436a8721ea517124d62e Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Wed, 6 Jan 2021 13:45:48 +0000
Subject: [PATCH 0984/1009] [SPARK-32685][SQL][FOLLOW-UP] Update migration
 guide about change default filed.delim to '\t' when user specifies serde

### What changes were proposed in this pull request?
Update migration guide according to https://github.com/apache/spark/pull/30942#issuecomment-755054562

### Why are the changes needed?
update migration guide.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Not need

Closes #31051 from AngersZhuuuu/SPARK-32685-FOLLOW-UP.

Authored-by: angerszhu <angers.zhu@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index bd54554baa09d..8cf1a9c6f7017 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -30,7 +30,7 @@ license: |
 
   - In Spark 3.2, `ALTER TABLE .. RENAME TO PARTITION` throws `PartitionAlreadyExistsException` instead of `AnalysisException` for tables from Hive external when the target partition already exists.
 
-  - In Spark 3.2, script transform default FIELD DELIMIT is `\u0001` for no serde mode. In Spark 3.1 or earlier, the default FIELD DELIMIT is `\t`.
+  - In Spark 3.2, script transform default FIELD DELIMIT is `\u0001` for no serde mode, serde property `field.delim` is `\t` for Hive serde mode when user specifies serde. In Spark 3.1 or earlier, the default FIELD DELIMIT is `\t`, serde property `field.delim` is `\u0001` for Hive serde mode when user specifies serde.
 
 ## Upgrading from Spark SQL 3.0 to 3.1
 

From a0269bb419a37c31850e02884385b889cd153133 Mon Sep 17 00:00:00 2001
From: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Date: Wed, 6 Jan 2021 09:28:22 -0800
Subject: [PATCH 0985/1009] [SPARK-34022][DOCS][FOLLOW-UP] Fix typo in SQL
 built-in function docs

### What changes were proposed in this pull request?

This PR is a follow-up of #31061. It fixes a typo in a document: `Finctions` -> `Functions`

### Why are the changes needed?

Make the change better documented.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

N/A

Closes #31069 from kiszk/SPARK-34022-followup.

Authored-by: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 sql/gen-sql-api-docs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/gen-sql-api-docs.py b/sql/gen-sql-api-docs.py
index 72518504847db..2f734093b106c 100644
--- a/sql/gen-sql-api-docs.py
+++ b/sql/gen-sql-api-docs.py
@@ -195,7 +195,7 @@ def generate_sql_api_markdown(jvm, path):
     """
 
     with open(path, 'w') as mdfile:
-        mdfile.write("# Built-in Finctions\n\n")
+        mdfile.write("# Built-in Functions\n\n")
         for info in _list_function_infos(jvm):
             name = info.name
             usage = _make_pretty_usage(info.usage)

From 8bb70bf0d646f6d54d17690d23ee935e452e747e Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Wed, 6 Jan 2021 12:59:47 -0800
Subject: [PATCH 0986/1009] [SPARK-34029][SQL][TESTS] Add OrcEncryptionSuite
 and FakeKeyProvider

### What changes were proposed in this pull request?

This PR aims to add a basis for columnar encryption test framework by add `OrcEncryptionSuite` and `FakeKeyProvider`.

Please note that we will improve more in both Apache Spark and Apache ORC in Apache Spark 3.2.0 timeframe.

### Why are the changes needed?

Apache ORC 1.6 supports columnar encryption.

### Does this PR introduce _any_ user-facing change?

No. This is for a test case.

### How was this patch tested?

Pass the newly added test suite.

Closes #31065 from dongjoon-hyun/SPARK-34029.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 project/SparkBuild.scala                      |   1 +
 .../datasources/orc/FakeKeyProvider.java      | 144 ++++++++++++++++++
 ...pache.hadoop.crypto.key.KeyProviderFactory |  16 ++
 .../datasources/orc/OrcEncryptionSuite.scala  |  98 ++++++++++++
 4 files changed, 259 insertions(+)
 create mode 100644 sql/core/src/test/java/test/org/apache/spark/sql/execution/datasources/orc/FakeKeyProvider.java
 create mode 100644 sql/core/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 668701be0ae98..f126ee35efcca 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -489,6 +489,7 @@ object SparkParallelTestGrouping {
     "org.apache.spark.sql.catalyst.expressions.HashExpressionsSuite",
     "org.apache.spark.sql.catalyst.expressions.CastSuite",
     "org.apache.spark.sql.catalyst.expressions.MathExpressionsSuite",
+    "org.apache.spark.sql.execution.datasources.orc.OrcEncryptionSuite",
     "org.apache.spark.sql.hive.HiveExternalCatalogSuite",
     "org.apache.spark.sql.hive.StatisticsSuite",
     "org.apache.spark.sql.hive.client.VersionsSuite",
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/execution/datasources/orc/FakeKeyProvider.java b/sql/core/src/test/java/test/org/apache/spark/sql/execution/datasources/orc/FakeKeyProvider.java
new file mode 100644
index 0000000000000..c48543802eb33
--- /dev/null
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/execution/datasources/orc/FakeKeyProvider.java
@@ -0,0 +1,144 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package test.org.apache.spark.sql.execution.datasources.orc;
+
+import java.io.IOException;
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.crypto.key.KeyProvider;
+import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension;
+import org.apache.hadoop.crypto.key.KeyProviderFactory;
+import org.apache.hadoop.crypto.key.kms.KMSClientProvider;
+
+/**
+ * A Hadoop KeyProvider that lets us test the interaction
+ * with the Hadoop code.
+ *
+ * https://github.com/apache/orc/blob/rel/release-1.6.6/java/tools/src/test/org/apache/orc/impl/FakeKeyProvider.java
+ *
+ * This file intentionally keeps the original file except
+ * (1) package name, (2) import order, (3) a few indentation
+ */
+public class FakeKeyProvider extends KeyProvider {
+  // map from key name to metadata
+  private final Map<String, TestMetadata> keyMetdata = new HashMap<>();
+  // map from key version name to material
+  private final Map<String, KeyVersion> keyVersions = new HashMap<>();
+
+  public FakeKeyProvider(Configuration conf) {
+    super(conf);
+  }
+
+  @Override
+  public KeyVersion getKeyVersion(String name) {
+    return keyVersions.get(name);
+  }
+
+  @Override
+  public List<String> getKeys() {
+    return new ArrayList<>(keyMetdata.keySet());
+  }
+
+  @Override
+  public List<KeyVersion> getKeyVersions(String name) {
+    List<KeyVersion> result = new ArrayList<>();
+    Metadata meta = getMetadata(name);
+    for(int v=0; v < meta.getVersions(); ++v) {
+      String versionName = buildVersionName(name, v);
+      KeyVersion material = keyVersions.get(versionName);
+      if (material != null) {
+        result.add(material);
+      }
+    }
+    return result;
+  }
+
+  @Override
+  public Metadata getMetadata(String name)  {
+    return keyMetdata.get(name);
+  }
+
+  @Override
+  public KeyVersion createKey(String name, byte[] bytes, Options options) {
+    String versionName = buildVersionName(name, 0);
+    keyMetdata.put(name, new TestMetadata(options.getCipher(),
+        options.getBitLength(), 1));
+    KeyVersion result = new KMSClientProvider.KMSKeyVersion(name, versionName, bytes);
+    keyVersions.put(versionName, result);
+    return result;
+  }
+
+  @Override
+  public void deleteKey(String name) {
+    throw new UnsupportedOperationException("Can't delete keys");
+  }
+
+  @Override
+  public KeyVersion rollNewVersion(String name, byte[] bytes) {
+    TestMetadata key = keyMetdata.get(name);
+    String versionName = buildVersionName(name, key.addVersion());
+    KeyVersion result = new KMSClientProvider.KMSKeyVersion(name, versionName,
+        bytes);
+    keyVersions.put(versionName, result);
+    return result;
+  }
+
+  @Override
+  public void flush() {
+    // Nothing
+  }
+
+  static class TestMetadata extends KeyProvider.Metadata {
+
+    TestMetadata(String cipher, int bitLength, int versions) {
+      super(cipher, bitLength, null, null, null, versions);
+    }
+
+    public int addVersion() {
+      return super.addVersion();
+    }
+  }
+
+  public static class Factory extends KeyProviderFactory {
+
+    @Override
+    public KeyProvider createProvider(URI uri, Configuration conf) throws IOException {
+      if ("test".equals(uri.getScheme())) {
+        KeyProvider provider = new FakeKeyProvider(conf);
+        // populate a couple keys into the provider
+        byte[] piiKey = new byte[]{0,1,2,3,4,5,6,7,8,9,0xa,0xb,0xc,0xd,0xe,0xf};
+        org.apache.hadoop.crypto.key.KeyProvider.Options aes128 = new KeyProvider.Options(conf);
+        provider.createKey("pii", piiKey, aes128);
+        byte[] piiKey2 = new byte[]{0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
+            0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f};
+        provider.rollNewVersion("pii", piiKey2);
+        byte[] secretKey = new byte[]{0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
+            0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f};
+        provider.createKey("secret", secretKey, aes128);
+        return KeyProviderCryptoExtension.createKeyProviderCryptoExtension(provider);
+      }
+      return null;
+    }
+  }
+}
diff --git a/sql/core/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory b/sql/core/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory
new file mode 100644
index 0000000000000..f436622b5fb42
--- /dev/null
+++ b/sql/core/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+test.org.apache.spark.sql.execution.datasources.orc.FakeKeyProvider$Factory
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala
new file mode 100644
index 0000000000000..fac3cef5801dd
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.orc
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.test.SharedSparkSession
+
+class OrcEncryptionSuite extends OrcTest with SharedSparkSession {
+  import testImplicits._
+
+  val originalData = Seq(("123456789", "dongjoon@apache.org", "Dongjoon Hyun"))
+  val rowDataWithoutKey =
+    Row(null, "841626795E7D351555B835A002E3BF10669DE9B81C95A3D59E10865AC37EA7C3", "Dongjoon Hyun")
+
+  test("Write and read an encrypted file") {
+    val df = originalData.toDF("ssn", "email", "name")
+
+    withTempPath { dir =>
+      val path = dir.getAbsolutePath
+      withSQLConf(
+        "hadoop.security.key.provider.path" -> "test:///",
+        "orc.key.provider" -> "hadoop",
+        "orc.encrypt" -> "pii:ssn,email",
+        "orc.mask" -> "nullify:ssn;sha256:email") {
+        df.write.mode("overwrite").orc(path)
+        checkAnswer(spark.read.orc(path), df)
+      }
+
+      withSQLConf(
+        "orc.key.provider" -> "memory",
+        "orc.encrypt" -> "pii:ssn,email",
+        "orc.mask" -> "nullify:ssn;sha256:email") {
+        checkAnswer(spark.read.orc(path), rowDataWithoutKey)
+      }
+    }
+  }
+
+  test("Write and read an encrypted table") {
+    val df = originalData.toDF("ssn", "email", "name")
+
+    withTempPath { dir =>
+      val path = dir.getAbsolutePath
+      withTable("encrypted") {
+        sql(
+          s"""
+            |CREATE TABLE encrypted (
+            |  ssn STRING,
+            |  email STRING,
+            |  name STRING
+            |)
+            |USING ORC
+            |LOCATION "$path"
+            |OPTIONS (
+            |  hadoop.security.key.provider.path "test:///",
+            |  orc.key.provider "hadoop",
+            |  orc.encrypt "pii:ssn,email",
+            |  orc.mask "nullify:ssn;sha256:email"
+            |)
+            |""".stripMargin)
+        sql("INSERT INTO encrypted VALUES('123456789', 'dongjoon@apache.org', 'Dongjoon Hyun')")
+        checkAnswer(sql("SELECT * FROM encrypted"), df)
+      }
+      withTable("normal") {
+        sql(
+          s"""
+            |CREATE TABLE normal (
+            |  ssn STRING,
+            |  email STRING,
+            |  name STRING
+            |)
+            |USING ORC
+            |LOCATION "$path"
+            |OPTIONS (
+            |  orc.key.provider "memory",
+            |  orc.encrypt "pii:ssn,email",
+            |  orc.mask "nullify:ssn;sha256:email"
+            |)
+            |""".stripMargin)
+        checkAnswer(sql("SELECT * FROM normal"), rowDataWithoutKey)
+      }
+    }
+  }
+}

From f9daf035f473fea12a2ee67428db8d78f29973d5 Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Wed, 6 Jan 2021 17:22:14 -0800
Subject: [PATCH 0987/1009] [SPARK-33806][SQL][FOLLOWUP] Fold
 RepartitionExpression num partition should check if partition expression is
 empty

### What changes were proposed in this pull request?

Add check partition expressions is empty.

### Why are the changes needed?

We should keep `spark.range(1).hint("REPARTITION_BY_RANGE")` has default shuffle number instead of 1.

### Does this PR introduce _any_ user-facing change?

Yes.

### How was this patch tested?

Add test.

Closes #31074 from ulysses-you/SPARK-33806-FOLLOWUP.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../plans/logical/basicLogicalOperators.scala         |  2 +-
 .../scala/org/apache/spark/sql/SQLQuerySuite.scala    | 11 +++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index ee7db7ae83542..9e06f9bec7830 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -1050,7 +1050,7 @@ case class RepartitionByExpression(
   val numPartitions = if (optNumPartitions.nonEmpty) {
     optNumPartitions.get
   } else {
-    if (partitionExpressions.forall(_.foldable)) {
+    if (partitionExpressions.nonEmpty && partitionExpressions.forall(_.foldable)) {
       1
     } else {
       SQLConf.get.numShufflePartitions
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 44f3c3449ddda..3f55a88f19505 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -3768,6 +3768,17 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     }
   }
 
+  test("Fold RepartitionExpression num partition should check if partition expression is empty") {
+    withSQLConf((SQLConf.SHUFFLE_PARTITIONS.key, "5")) {
+      val df = spark.range(1).hint("REPARTITION_BY_RANGE")
+      val plan = df.queryExecution.optimizedPlan
+      val res = plan.collect {
+        case r: RepartitionByExpression if r.numPartitions == 5 => true
+      }
+      assert(res.nonEmpty)
+    }
+  }
+
   test("SPARK-33593: Vector reader got incorrect data with binary partition value") {
     Seq("false", "true").foreach(value => {
       withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> value) {

From 9b5df2afaa5df85f149ccf73b7a6b78ab0f393bc Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Wed, 6 Jan 2021 20:19:16 -0800
Subject: [PATCH 0988/1009] [SPARK-34036][DOCS] Update ORC data source
 documentation

### What changes were proposed in this pull request?

This PR aims to update SQL documentation about ORC data sources.

New structure looks like the following.
- ORC Implementation
- Vectorized Reader
- Schema Merging
- Zstandard
- Bloom Filters
- Columnar Encryption
- Hive metastore ORC table conversion
- Configuration

### Why are the changes needed?

This document is not up-to-date. Apache Spark 3.2.0 can utilize new improvements from Apache ORC 1.6.6.

### Does this PR introduce _any_ user-facing change?

No, this is a documentation.

### How was this patch tested?

Manual.
```
SKIP_API=1 jekyll build
```

---
**BEFORE**
![Screen Shot 2021-01-06 at 5 08 19 PM](https://user-images.githubusercontent.com/9700541/103838399-d0bbd880-5041-11eb-8757-297728d2793f.png)

---
**AFTER**
![Screen Shot 2021-01-06 at 7 03 38 PM](https://user-images.githubusercontent.com/9700541/103845972-0963ae00-5052-11eb-905e-8e8b335c760a.png)
![Screen Shot 2021-01-06 at 7 03 49 PM](https://user-images.githubusercontent.com/9700541/103845971-08cb1780-5052-11eb-9b2a-d3acfa4b9278.png)
![Screen Shot 2021-01-06 at 7 03 59 PM](https://user-images.githubusercontent.com/9700541/103845970-08328100-5052-11eb-8982-7079fd7b0efc.png)
![Screen Shot 2021-01-06 at 7 04 10 PM](https://user-images.githubusercontent.com/9700541/103845968-08328100-5052-11eb-9ef5-db99c7cc64d3.png)
![Screen Shot 2021-01-06 at 7 04 16 PM](https://user-images.githubusercontent.com/9700541/103845963-07015400-5052-11eb-955f-8126d417e8aa.png)

Closes #31075 from dongjoon-hyun/SPARK-34036.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/sql-data-sources-orc.md | 135 +++++++++++++++++++++++++++++++++--
 1 file changed, 129 insertions(+), 6 deletions(-)

diff --git a/docs/sql-data-sources-orc.md b/docs/sql-data-sources-orc.md
index 4c4b3b1eee8c2..f5c9677c343dc 100644
--- a/docs/sql-data-sources-orc.md
+++ b/docs/sql-data-sources-orc.md
@@ -19,12 +19,115 @@ license: |
   limitations under the License.
 ---
 
-Since Spark 2.3, Spark supports a vectorized ORC reader with a new ORC file format for ORC files.
-To do that, the following configurations are newly added. The vectorized reader is used for the
-native ORC tables (e.g., the ones created using the clause `USING ORC`) when `spark.sql.orc.impl`
-is set to `native` and `spark.sql.orc.enableVectorizedReader` is set to `true`. For the Hive ORC
-serde tables (e.g., the ones created using the clause `USING HIVE OPTIONS (fileFormat 'ORC')`),
-the vectorized reader is used when `spark.sql.hive.convertMetastoreOrc` is also set to `true`.
+* Table of contents
+{:toc}
+
+[Apache ORC](https://orc.apache.org) is a columnar format which has more advanced features like native zstd compression, bloom filter and columnar encryption.
+
+### ORC Implementation
+
+Spark supports two ORC implementations (`native` and `hive`) which is controlled by `spark.sql.orc.impl`.
+Two implementations share most functionalities with different design goals.
+- `native` implementation is designed to follow Spark's data source behavior like `Parquet`.
+- `hive` implementation is designed to follow Hive's behavior and uses Hive SerDe.
+
+For example, historically, `native` implementation handles `CHAR/VARCHAR` with Spark's native `String` while `hive` implementation handles it via Hive `CHAR/VARCHAR`. The query results are different. Since Spark 3.1.0, [SPARK-33480](https://issues.apache.org/jira/browse/SPARK-33480) removes this difference by supporting `CHAR/VARCHAR` from Spark-side.
+
+### Vectorized Reader
+
+`native` implementation supports a vectorized ORC reader and has been the default ORC implementaion since Spark 2.3.
+The vectorized reader is used for the native ORC tables (e.g., the ones created using the clause `USING ORC`) when `spark.sql.orc.impl` is set to `native` and `spark.sql.orc.enableVectorizedReader` is set to `true`.
+For the Hive ORC serde tables (e.g., the ones created using the clause `USING HIVE OPTIONS (fileFormat 'ORC')`),
+the vectorized reader is used when `spark.sql.hive.convertMetastoreOrc` is also set to `true`, and is turned on by default.
+
+### Schema Merging
+
+Like Protocol Buffer, Avro, and Thrift, ORC also supports schema evolution. Users can start with
+a simple schema, and gradually add more columns to the schema as needed. In this way, users may end
+up with multiple ORC files with different but mutually compatible schemas. The ORC data
+source is now able to automatically detect this case and merge schemas of all these files.
+
+Since schema merging is a relatively expensive operation, and is not a necessity in most cases, we
+turned it off by default . You may enable it by
+
+1. setting data source option `mergeSchema` to `true` when reading ORC files, or
+2. setting the global SQL option `spark.sql.orc.mergeSchema` to `true`.
+
+### Zstandard
+
+Spark supports both Hadoop 2 and 3. Since Spark 3.2, you can take advantage
+of Zstandard compression in ORC files on both Hadoop versions.
+Please see [Zstandard](https://facebook.github.io/zstd/) for the benefits.
+
+<div class="codetabs">
+<div data-lang="SQL"  markdown="1">
+
+{% highlight sql %}
+CREATE TABLE compressed (
+  key STRING,
+  value STRING
+)
+USING ORC
+OPTIONS (
+  compression 'zstd'
+)
+{% endhighlight %}
+</div>
+</div>
+
+### Bloom Filters
+
+You can control bloom filters and dictionary encodings for ORC data sources. The following ORC example will create bloom filter and use dictionary encoding only for `favorite_color`. To find more detailed information about the extra ORC options, visit the official Apache ORC websites.
+
+<div class="codetabs">
+<div data-lang="SQL"  markdown="1">
+
+{% highlight sql %}
+CREATE TABLE users_with_options (
+  name STRING,
+  favorite_color STRING,
+  favorite_numbers array<integer>
+)
+USING ORC
+OPTIONS (
+  orc.bloom.filter.columns 'favorite_color',
+  orc.dictionary.key.threshold '1.0',
+  orc.column.encoding.direct 'name'
+)
+{% endhighlight %}
+</div>
+</div>
+
+### Columnar Encryption
+
+Since Spark 3.2, columnar encryption is supported for ORC tables with Apache ORC 1.6.
+The following example is using Hadoop KMS as a key provider with the given location.
+Please visit [Apache Hadoop KMS](https://hadoop.apache.org/docs/current/hadoop-kms/index.html) for the detail.
+
+<div class="codetabs">
+<div data-lang="SQL"  markdown="1">
+{% highlight sql %}
+CREATE TABLE encrypted (
+  ssn STRING,
+  email STRING,
+  name STRING
+)
+USING ORC
+OPTIONS (
+  hadoop.security.key.provider.path "kms://http@localhost:9600/kms",
+  orc.key.provider "hadoop",
+  orc.encrypt "pii:ssn,email",
+  orc.mask "nullify:ssn;sha256:email"
+)
+{% endhighlight %}
+</div>
+</div>
+
+### Hive metastore ORC table conversion
+
+When reading from Hive metastore ORC tables and inserting to Hive metastore ORC tables, Spark SQL will try to use its own ORC support instead of Hive SerDe for better performance. For CTAS statement, only non-partitioned Hive metastore ORC tables are converted. This behavior is controlled by the `spark.sql.hive.convertMetastoreOrc` configuration, and is turned on by default.
+
+### Configuration
 
 <table class="table">
   <tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Since Version</b></th></tr>
@@ -48,4 +151,24 @@ the vectorized reader is used when `spark.sql.hive.convertMetastoreOrc` is also
     </td>
     <td>2.3.0</td>
   </tr>
+  <tr>
+  <td><code>spark.sql.orc.mergeSchema</code></td>
+  <td>false</td>
+  <td>
+    <p>
+      When true, the ORC data source merges schemas collected from all data files,
+      otherwise the schema is picked from a random data file.
+    </p>
+  </td>
+  <td>3.0.0</td>
+  </tr>
+  <tr>
+  <td><code>spark.sql.hive.convertMetastoreOrc</code></td>
+  <td>true</td>
+  <td>
+    When set to false, Spark SQL will use the Hive SerDe for ORC tables instead of the built in
+    support.
+  </td>
+  <td>2.0.0</td>
+  </tr>
 </table>

From 0ba3ab4c23ee1cd3785caa0fde76862dce478530 Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Thu, 7 Jan 2021 13:58:13 +0900
Subject: [PATCH 0989/1009] [SPARK-34021][R] Fix hyper links in SparkR
 documentation for CRAN submission

### What changes were proposed in this pull request?

3.0.1 CRAN submission was failed as the reason below:

```
   Found the following (possibly) invalid URLs:
     URL: http://jsonlines.org/ (moved to https://jsonlines.org/)
       From: man/read.json.Rd
             man/write.json.Rd
       Status: 200
       Message: OK
     URL: https://dl.acm.org/citation.cfm?id=1608614 (moved to
https://dl.acm.org/doi/10.1109/MC.2009.263)
       From: inst/doc/sparkr-vignettes.html
       Status: 200
       Message: OK
 ```

The links were being redirected now. This PR checked all hyperlinks in the docs such as `href{...}` and `url{...}`, and fixed all in SparkR:

- Fix two problems above.
- Fix http to https
- Fix `https://www.apache.org/ https://spark.apache.org/` -> `https://www.apache.org https://spark.apache.org`.

### Why are the changes needed?

For CRAN submission.

### Does this PR introduce _any_ user-facing change?

Virtually no because it's just cleanup that CRAN requires.

### How was this patch tested?

Manually tested by clicking the links

Closes #31058 from HyukjinKwon/SPARK-34021.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 R/pkg/DESCRIPTION                    |  2 +-
 R/pkg/R/DataFrame.R                  |  2 +-
 R/pkg/R/SQLContext.R                 |  2 +-
 R/pkg/R/install.R                    |  6 +++---
 R/pkg/R/mllib_classification.R       |  4 ++--
 R/pkg/R/mllib_clustering.R           |  4 ++--
 R/pkg/R/mllib_recommendation.R       |  2 +-
 R/pkg/R/mllib_regression.R           |  2 +-
 R/pkg/R/mllib_stat.R                 |  2 +-
 R/pkg/R/mllib_tree.R                 | 12 ++++++------
 R/pkg/R/stats.R                      |  3 ++-
 R/pkg/vignettes/sparkr-vignettes.Rmd |  2 +-
 12 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 20433362459d9..c141baa51b8cb 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -11,7 +11,7 @@ Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
                     email = "felixcheung@apache.org"),
              person(family = "The Apache Software Foundation", role = c("aut", "cph")))
 License: Apache License (== 2.0)
-URL: https://www.apache.org/ https://spark.apache.org/
+URL: https://www.apache.org https://spark.apache.org
 BugReports: https://spark.apache.org/contributing.html
 SystemRequirements: Java (>= 8, < 12)
 Depends:
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 8ca338f09969b..72d96151f6371 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -880,7 +880,7 @@ setMethod("toJSON",
 
 #' Save the contents of SparkDataFrame as a JSON file
 #'
-#' Save the contents of a SparkDataFrame as a JSON file (\href{http://jsonlines.org/}{
+#' Save the contents of a SparkDataFrame as a JSON file (\href{https://jsonlines.org/}{
 #' JSON Lines text format or newline-delimited JSON}). Files written out
 #' with this method can be read back in as a SparkDataFrame using read.json().
 #'
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 5ed0481f33d8f..14262e1a74ab0 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -374,7 +374,7 @@ setMethod("toDF", signature(x = "RDD"),
 #' Create a SparkDataFrame from a JSON file.
 #'
 #' Loads a JSON file, returning the result as a SparkDataFrame
-#' By default, (\href{http://jsonlines.org/}{JSON Lines text format or newline-delimited JSON}
+#' By default, (\href{https://jsonlines.org/}{JSON Lines text format or newline-delimited JSON}
 #' ) is supported. For JSON (one record per file), set a named property \code{multiLine} to
 #' \code{TRUE}.
 #' It goes through the entire dataset once to determine the schema.
diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R
index 5bc5ae07c5f03..bbb9188cd083f 100644
--- a/R/pkg/R/install.R
+++ b/R/pkg/R/install.R
@@ -39,11 +39,11 @@
 #'                      version number in the format of "x.y" where x and y are integer.
 #'                      If \code{hadoopVersion = "without"}, "Hadoop free" build is installed.
 #'                      See
-#'                      \href{http://spark.apache.org/docs/latest/hadoop-provided.html}{
+#'                      \href{https://spark.apache.org/docs/latest/hadoop-provided.html}{
 #'                      "Hadoop Free" Build} for more information.
 #'                      Other patched version names can also be used, e.g. \code{"cdh4"}
 #' @param mirrorUrl base URL of the repositories to use. The directory layout should follow
-#'                  \href{http://www.apache.org/dyn/closer.lua/spark/}{Apache mirrors}.
+#'                  \href{https://www.apache.org/dyn/closer.lua/spark/}{Apache mirrors}.
 #' @param localDir a local directory where Spark is installed. The directory contains
 #'                 version-specific folders of Spark packages. Default is path to
 #'                 the cache directory:
@@ -64,7 +64,7 @@
 #'}
 #' @note install.spark since 2.1.0
 #' @seealso See available Hadoop versions:
-#'          \href{http://spark.apache.org/downloads.html}{Apache Spark}
+#'          \href{https://spark.apache.org/downloads.html}{Apache Spark}
 install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL,
                           localDir = NULL, overwrite = FALSE) {
   sparkHome <- Sys.getenv("SPARK_HOME")
diff --git a/R/pkg/R/mllib_classification.R b/R/pkg/R/mllib_classification.R
index ec83b6bd406a7..71ebe4e26ef63 100644
--- a/R/pkg/R/mllib_classification.R
+++ b/R/pkg/R/mllib_classification.R
@@ -425,7 +425,7 @@ setMethod("write.ml", signature(object = "LogisticRegressionModel", path = "char
 #' predictions on new data, and \code{write.ml}/\code{read.ml} to save/load fitted models.
 #' Only categorical data is supported.
 #' For more details, see
-#' \href{http://spark.apache.org/docs/latest/ml-classification-regression.html}{
+#' \href{https://spark.apache.org/docs/latest/ml-classification-regression.html}{
 #'   Multilayer Perceptron}
 #'
 #' @param data a \code{SparkDataFrame} of observations and labels for model fitting.
@@ -574,7 +574,7 @@ setMethod("write.ml", signature(object = "MultilayerPerceptronClassificationMode
 #' @rdname spark.naiveBayes
 #' @aliases spark.naiveBayes,SparkDataFrame,formula-method
 #' @name spark.naiveBayes
-#' @seealso e1071: \url{https://cran.r-project.org/package=e1071}
+#' @seealso e1071: \url{https://cran.r-project.org/web/packages/e1071/index.html}
 #' @examples
 #' \dontrun{
 #' data <- as.data.frame(UCBAdmissions)
diff --git a/R/pkg/R/mllib_clustering.R b/R/pkg/R/mllib_clustering.R
index 8bc15353465d8..ff7cbd8fc9b74 100644
--- a/R/pkg/R/mllib_clustering.R
+++ b/R/pkg/R/mllib_clustering.R
@@ -204,7 +204,7 @@ setMethod("write.ml", signature(object = "BisectingKMeansModel", path = "charact
 #' @return \code{spark.gaussianMixture} returns a fitted multivariate gaussian mixture model.
 #' @rdname spark.gaussianMixture
 #' @name spark.gaussianMixture
-#' @seealso mixtools: \url{https://cran.r-project.org/package=mixtools}
+#' @seealso mixtools: \url{https://cran.r-project.org/web/packages/mixtools/index.html}
 #' @examples
 #' \dontrun{
 #' sparkR.session()
@@ -483,7 +483,7 @@ setMethod("write.ml", signature(object = "KMeansModel", path = "character"),
 #' @return \code{spark.lda} returns a fitted Latent Dirichlet Allocation model.
 #' @rdname spark.lda
 #' @aliases spark.lda,SparkDataFrame-method
-#' @seealso topicmodels: \url{https://cran.r-project.org/package=topicmodels}
+#' @seealso topicmodels: \url{https://cran.r-project.org/web/packages/topicmodels/index.html}
 #' @examples
 #' \dontrun{
 #' text <- read.df("data/mllib/sample_lda_libsvm_data.txt", source = "libsvm")
diff --git a/R/pkg/R/mllib_recommendation.R b/R/pkg/R/mllib_recommendation.R
index d238ff93ed245..87a1bc991f812 100644
--- a/R/pkg/R/mllib_recommendation.R
+++ b/R/pkg/R/mllib_recommendation.R
@@ -30,7 +30,7 @@ setClass("ALSModel", representation(jobj = "jobj"))
 #' to make predictions on new data, and \code{write.ml}/\code{read.ml} to save/load fitted models.
 #'
 #' For more details, see
-#' \href{http://spark.apache.org/docs/latest/ml-collaborative-filtering.html}{MLlib:
+#' \href{https://spark.apache.org/docs/latest/ml-collaborative-filtering.html}{MLlib:
 #' Collaborative Filtering}.
 #'
 #' @param data a SparkDataFrame for training.
diff --git a/R/pkg/R/mllib_regression.R b/R/pkg/R/mllib_regression.R
index b2228a141689b..db9f367407df3 100644
--- a/R/pkg/R/mllib_regression.R
+++ b/R/pkg/R/mllib_regression.R
@@ -475,7 +475,7 @@ setMethod("write.ml", signature(object = "IsotonicRegressionModel", path = "char
 #' @param ... additional arguments passed to the method.
 #' @return \code{spark.survreg} returns a fitted AFT survival regression model.
 #' @rdname spark.survreg
-#' @seealso survival: \url{https://cran.r-project.org/package=survival}
+#' @seealso survival: \url{https://cran.r-project.org/web/packages/survival/index.html}
 #' @examples
 #' \dontrun{
 #' df <- createDataFrame(ovarian)
diff --git a/R/pkg/R/mllib_stat.R b/R/pkg/R/mllib_stat.R
index 6db4d5d4831dd..f82fb589bb5a5 100644
--- a/R/pkg/R/mllib_stat.R
+++ b/R/pkg/R/mllib_stat.R
@@ -49,7 +49,7 @@ setClass("KSTest", representation(jobj = "jobj"))
 #' @rdname spark.kstest
 #' @aliases spark.kstest,SparkDataFrame-method
 #' @name spark.kstest
-#' @seealso \href{http://spark.apache.org/docs/latest/mllib-statistics.html#hypothesis-testing}{
+#' @seealso \href{https://spark.apache.org/docs/latest/mllib-statistics.html#hypothesis-testing}{
 #'          MLlib: Hypothesis Testing}
 #' @examples
 #' \dontrun{
diff --git a/R/pkg/R/mllib_tree.R b/R/pkg/R/mllib_tree.R
index b5a014b0a3cfd..f3192ee9b1382 100644
--- a/R/pkg/R/mllib_tree.R
+++ b/R/pkg/R/mllib_tree.R
@@ -127,9 +127,9 @@ print.summary.decisionTree <- function(x) {
 #' \code{write.ml}/\code{read.ml} to save/load fitted models.
 #' For more details, see
 # nolint start
-#' \href{http://spark.apache.org/docs/latest/ml-classification-regression.html#gradient-boosted-tree-regression}{
+#' \href{https://spark.apache.org/docs/latest/ml-classification-regression.html#gradient-boosted-tree-regression}{
 #' GBT Regression} and
-#' \href{http://spark.apache.org/docs/latest/ml-classification-regression.html#gradient-boosted-tree-classifier}{
+#' \href{https://spark.apache.org/docs/latest/ml-classification-regression.html#gradient-boosted-tree-classifier}{
 #' GBT Classification}
 # nolint end
 #'
@@ -343,9 +343,9 @@ setMethod("write.ml", signature(object = "GBTClassificationModel", path = "chara
 #' save/load fitted models.
 #' For more details, see
 # nolint start
-#' \href{http://spark.apache.org/docs/latest/ml-classification-regression.html#random-forest-regression}{
+#' \href{https://spark.apache.org/docs/latest/ml-classification-regression.html#random-forest-regression}{
 #' Random Forest Regression} and
-#' \href{http://spark.apache.org/docs/latest/ml-classification-regression.html#random-forest-classifier}{
+#' \href{https://spark.apache.org/docs/latest/ml-classification-regression.html#random-forest-classifier}{
 #' Random Forest Classification}
 # nolint end
 #'
@@ -568,9 +568,9 @@ setMethod("write.ml", signature(object = "RandomForestClassificationModel", path
 #' save/load fitted models.
 #' For more details, see
 # nolint start
-#' \href{http://spark.apache.org/docs/latest/ml-classification-regression.html#decision-tree-regression}{
+#' \href{https://spark.apache.org/docs/latest/ml-classification-regression.html#decision-tree-regression}{
 #' Decision Tree Regression} and
-#' \href{http://spark.apache.org/docs/latest/ml-classification-regression.html#decision-tree-classifier}{
+#' \href{https://spark.apache.org/docs/latest/ml-classification-regression.html#decision-tree-classifier}{
 #' Decision Tree Classification}
 # nolint end
 #'
diff --git a/R/pkg/R/stats.R b/R/pkg/R/stats.R
index 7252351ebebb2..0aabceef226e3 100644
--- a/R/pkg/R/stats.R
+++ b/R/pkg/R/stats.R
@@ -109,7 +109,8 @@ setMethod("corr",
 #'
 #' Finding frequent items for columns, possibly with false positives.
 #' Using the frequent element count algorithm described in
-#' \url{https://doi.org/10.1145/762471.762473}, proposed by Karp, Schenker, and Papadimitriou.
+#' \url{https://dl.acm.org/doi/10.1145/762471.762473}, proposed by Karp, Schenker,
+#' and Papadimitriou.
 #'
 #' @param x A SparkDataFrame.
 #' @param cols A vector column names to search frequent items in.
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index a0608748696a3..3177b54dc5fac 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -1007,7 +1007,7 @@ perplexity
 
 #### Alternating Least Squares
 
-`spark.als` learns latent factors in [collaborative filtering](https://en.wikipedia.org/wiki/Recommender_system#Collaborative_filtering) via [alternating least squares](https://dl.acm.org/citation.cfm?id=1608614).
+`spark.als` learns latent factors in [collaborative filtering](https://en.wikipedia.org/wiki/Recommender_system#Collaborative_filtering) via [alternating least squares](https://dl.acm.org/doi/10.1109/MC.2009.263).
 
 There are multiple options that can be configured in `spark.als`, including `rank`, `reg`, and `nonnegative`. For a complete list, refer to the help file.
 

From 26b603992c4b9b5a58e46e0566c1547b86249709 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Thu, 7 Jan 2021 14:26:04 +0900
Subject: [PATCH 0990/1009] [SPARK-34028][SQL] Cleanup "unreachable code"
 compilation warning

### What changes were proposed in this pull request?
There is one compilation warning as follow:

```
[WARNING] [Warn] /spark/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala:1555: [other-match-analysis  org.apache.spark.sql.catalyst.catalog.SessionCatalog.lookupFunction.catalogFunction] unreachable code
```

This compilation warning is due to `NoSuchPermanentFunctionException` is sub-class of `AnalysisException` and if there is `NoSuchPermanentFunctionException` be thrown out,  it will be catch by `case _: AnalysisException => failFunctionLookup(name)`,  so `case _: NoSuchPermanentFunctionException => failFunctionLookup(name)` is `unreachable code`.

This pr remove `case _: NoSuchPermanentFunctionException => failFunctionLookup(name)` directly because both these 2 branches handle exceptions in the same way: `failFunctionLookup(name)`

### Why are the changes needed?
Cleanup "unreachable code" compilation warnings.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass the Jenkins or GitHub Action

Closes #31064 from LuciferYang/SPARK-34028.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala   | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 5f7028bf87c87..76358ef116cec 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -1552,7 +1552,6 @@ class SessionCatalog(
       externalCatalog.getFunction(database, name.funcName)
     } catch {
       case _: AnalysisException => failFunctionLookup(name)
-      case _: NoSuchPermanentFunctionException => failFunctionLookup(name)
     }
     loadFunctionResources(catalogFunction.resources)
     // Please note that qualifiedName is provided by the user. However,

From 3aa4e113c5162f5de12c2aa43b6af65a7f2110af Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Thu, 7 Jan 2021 14:28:30 +0900
Subject: [PATCH 0991/1009] [SPARK-33861][SQL][FOLLOWUP] Simplify conditional
 in predicate should consider deterministic

### What changes were proposed in this pull request?

This pr address https://github.com/apache/spark/pull/30865#pullrequestreview-562344089 to fix simplify conditional in predicate should consider deterministic.

### Why are the changes needed?

Fix bug.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #31067 from wangyum/SPARK-33861-2.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../optimizer/SimplifyConditionalsInPredicate.scala   |  6 ------
 .../SimplifyConditionalsInPredicateSuite.scala        | 11 ++++++++++-
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala
index 1ea85085bccdb..1225f1f318fc7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala
@@ -39,9 +39,7 @@ import org.apache.spark.sql.types.BooleanType
  * - CASE WHEN cond THEN trueVal ELSE null END  => AND(cond, trueVal)
  * - CASE WHEN cond THEN trueVal ELSE true END  => OR(NOT(cond), trueVal)
  * - CASE WHEN cond THEN false ELSE elseVal END => AND(NOT(cond), elseVal)
- * - CASE WHEN cond THEN false END              => false
  * - CASE WHEN cond THEN true ELSE elseVal END  => OR(cond, elseVal)
- * - CASE WHEN cond THEN true END               => cond
  */
 object SimplifyConditionalsInPredicate extends Rule[LogicalPlan] {
 
@@ -64,12 +62,8 @@ object SimplifyConditionalsInPredicate extends Rule[LogicalPlan] {
       And(cond, trueValue)
     case CaseWhen(Seq((cond, trueValue)), Some(TrueLiteral)) =>
       Or(Not(cond), trueValue)
-    case CaseWhen(Seq((_, FalseLiteral)), Some(FalseLiteral) | None) =>
-      FalseLiteral
     case CaseWhen(Seq((cond, FalseLiteral)), Some(elseValue)) =>
       And(Not(cond), elseValue)
-    case CaseWhen(Seq((cond, TrueLiteral)), Some(FalseLiteral) | None) =>
-      cond
     case CaseWhen(Seq((cond, TrueLiteral)), Some(elseValue)) =>
       Or(cond, elseValue)
     case e if e.dataType == BooleanType => e
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala
index 1f3c24bdbb664..04ebb4e63c675 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.{And, CaseWhen, Expression, If, IsNotNull, Literal, Or}
+import org.apache.spark.sql.catalyst.expressions.{And, CaseWhen, Expression, If, IsNotNull, Literal, Or, Rand}
 import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
 import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest}
 import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, LocalRelation, LogicalPlan, UpdateTable}
@@ -158,6 +158,15 @@ class SimplifyConditionalsInPredicateSuite extends PlanTest {
     testProjection(originalCond, expectedExpr = originalCond)
   }
 
+  test("CASE WHEN non-deterministic-cond THEN false END") {
+    val originalCond =
+      CaseWhen(Seq((UnresolvedAttribute("i") > Rand(0), FalseLiteral)))
+    val expectedCond = And(UnresolvedAttribute("i") > Rand(0), FalseLiteral)
+    // nondeterministic expressions are only allowed in Project, Filter, Aggregate or Window,
+    testFilter(originalCond, expectedCond = FalseLiteral)
+    testProjection(originalCond, expectedExpr = originalCond)
+  }
+
   test("CASE WHEN cond THEN true ELSE elseVal END  => OR(cond, elseVal)") {
     val originalCond = CaseWhen(
       Seq((UnresolvedAttribute("i") > Literal(10), TrueLiteral)),

From aa509c1eeed688ddf21553aefe7b48cdf072fc5b Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Thu, 7 Jan 2021 14:41:10 +0900
Subject: [PATCH 0992/1009] [SPARK-34031][SQL] Union operator missing rowCount
 when CBO enabled

### What changes were proposed in this pull request?

This pr add row count to `Union` operator when CBO enabled.
```scala
spark.sql("CREATE TABLE t1 USING parquet AS SELECT id FROM RANGE(10)")
spark.sql("CREATE TABLE t2 USING parquet AS SELECT id FROM RANGE(10)")
spark.sql("ANALYZE TABLE t1 COMPUTE STATISTICS FOR ALL COLUMNS")
spark.sql("ANALYZE TABLE t2 COMPUTE STATISTICS FOR ALL COLUMNS")
spark.sql("set spark.sql.cbo.enabled=true")
spark.sql("SELECT * FROM t1 UNION ALL SELECT * FROM t2").explain("cost")
```

Before this pr:
```
== Optimized Logical Plan ==
Union false, false, Statistics(sizeInBytes=320.0 B)
:- Relation[id#5880L] parquet, Statistics(sizeInBytes=160.0 B, rowCount=10)
+- Relation[id#5881L] parquet, Statistics(sizeInBytes=160.0 B, rowCount=10)
```

After this pr:
```
== Optimized Logical Plan ==
Union false, false, Statistics(sizeInBytes=320.0 B, rowCount=20)
:- Relation[id#2138L] parquet, Statistics(sizeInBytes=160.0 B, rowCount=10)
+- Relation[id#2139L] parquet, Statistics(sizeInBytes=160.0 B, rowCount=10)
```

### Why are the changes needed?

Improve query performance,  [`JoinEstimation.estimateInnerOuterJoin`](https://github.com/apache/spark/blob/d6a68e0b67ff7de58073c176dd097070e88ac831/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala#L55-L156) need the row count.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

Closes #31068 from wangyum/SPARK-34031.

Lead-authored-by: Yuming Wang <yumwang@ebay.com>
Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 .../plans/logical/basicLogicalOperators.scala |   2 +-
 .../BasicStatsPlanVisitor.scala               |  10 +-
 .../BasicStatsEstimationSuite.scala           |  11 +
 .../approved-plans-v1_4/q2.sf100/explain.txt  | 128 ++-
 .../q2.sf100/simplified.txt                   |  98 ++-
 .../approved-plans-v1_4/q5.sf100/explain.txt  | 220 +++---
 .../q5.sf100/simplified.txt                   |  64 +-
 .../approved-plans-v1_4/q54.sf100/explain.txt | 726 +++++++++---------
 .../q54.sf100/simplified.txt                  | 244 +++---
 .../approved-plans-v2_7/q5a.sf100/explain.txt | 210 ++---
 .../q5a.sf100/simplified.txt                  |  64 +-
 11 files changed, 874 insertions(+), 903 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 9e06f9bec7830..3fb2e991af554 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -217,7 +217,7 @@ object Union {
 }
 
 /**
- * Logical plan for unioning two plans, without a distinct. This is UNION ALL in SQL.
+ * Logical plan for unioning multiple plans, without a distinct. This is UNION ALL in SQL.
  *
  * @param byName          Whether resolves columns in the children by column names.
  * @param allowMissingCol Allows missing columns in children query plans. If it is true,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala
index 34baf5b90e54e..05fc1f7958fef 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala
@@ -79,7 +79,15 @@ object BasicStatsPlanVisitor extends LogicalPlanVisitor[Statistics] {
 
   override def visitScriptTransform(p: ScriptTransformation): Statistics = default(p)
 
-  override def visitUnion(p: Union): Statistics = fallback(p)
+  override def visitUnion(p: Union): Statistics = {
+    val stats = p.children.map(_.stats)
+    val rowCount = if (stats.exists(_.rowCount.isEmpty)) {
+      None
+    } else {
+      Some(stats.map(_.rowCount.get).sum)
+    }
+    Statistics(sizeInBytes = stats.map(_.sizeInBytes).sum, rowCount = rowCount)
+  }
 
   override def visitWindow(p: Window): Statistics = fallback(p)
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
index 91f8fc406a43d..1d780142aede0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
@@ -141,6 +141,17 @@ class BasicStatsEstimationSuite extends PlanTest with StatsEstimationTestBase {
       expectedStatsCboOff = Statistics(sizeInBytes = 120))
   }
 
+  test("SPARK-34031: Union operator missing rowCount when enable CBO") {
+    val union = Union(plan :: plan :: plan :: Nil)
+    val childrenSize = union.children.size
+    val sizeInBytes = plan.size.get * childrenSize
+    val rowCount = Some(plan.rowCount * childrenSize)
+    checkStats(
+      union,
+      expectedStatsCboOn = Statistics(sizeInBytes = sizeInBytes, rowCount = rowCount),
+      expectedStatsCboOff = Statistics(sizeInBytes = sizeInBytes))
+  }
+
   /** Check estimated stats when cbo is turned on/off. */
   private def checkStats(
       plan: LogicalPlan,
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt
index 61e5ae0121819..52dfff442bf3a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt
@@ -1,45 +1,43 @@
 == Physical Plan ==
-* Sort (41)
-+- Exchange (40)
-   +- * Project (39)
-      +- * SortMergeJoin Inner (38)
-         :- * Sort (26)
-         :  +- * Project (25)
-         :     +- * BroadcastHashJoin Inner BuildRight (24)
-         :        :- * HashAggregate (18)
-         :        :  +- Exchange (17)
-         :        :     +- * HashAggregate (16)
-         :        :        +- * Project (15)
-         :        :           +- * BroadcastHashJoin Inner BuildRight (14)
-         :        :              :- Union (9)
-         :        :              :  :- * Project (4)
-         :        :              :  :  +- * Filter (3)
-         :        :              :  :     +- * ColumnarToRow (2)
-         :        :              :  :        +- Scan parquet default.web_sales (1)
-         :        :              :  +- * Project (8)
-         :        :              :     +- * Filter (7)
-         :        :              :        +- * ColumnarToRow (6)
-         :        :              :           +- Scan parquet default.catalog_sales (5)
-         :        :              +- BroadcastExchange (13)
-         :        :                 +- * Filter (12)
-         :        :                    +- * ColumnarToRow (11)
-         :        :                       +- Scan parquet default.date_dim (10)
-         :        +- BroadcastExchange (23)
-         :           +- * Project (22)
-         :              +- * Filter (21)
-         :                 +- * ColumnarToRow (20)
-         :                    +- Scan parquet default.date_dim (19)
-         +- * Sort (37)
-            +- Exchange (36)
-               +- * Project (35)
-                  +- * BroadcastHashJoin Inner BuildRight (34)
-                     :- * HashAggregate (28)
-                     :  +- ReusedExchange (27)
-                     +- BroadcastExchange (33)
-                        +- * Project (32)
-                           +- * Filter (31)
-                              +- * ColumnarToRow (30)
-                                 +- Scan parquet default.date_dim (29)
+* Sort (39)
++- Exchange (38)
+   +- * Project (37)
+      +- * BroadcastHashJoin Inner BuildRight (36)
+         :- * Project (25)
+         :  +- * BroadcastHashJoin Inner BuildRight (24)
+         :     :- * HashAggregate (18)
+         :     :  +- Exchange (17)
+         :     :     +- * HashAggregate (16)
+         :     :        +- * Project (15)
+         :     :           +- * BroadcastHashJoin Inner BuildRight (14)
+         :     :              :- Union (9)
+         :     :              :  :- * Project (4)
+         :     :              :  :  +- * Filter (3)
+         :     :              :  :     +- * ColumnarToRow (2)
+         :     :              :  :        +- Scan parquet default.web_sales (1)
+         :     :              :  +- * Project (8)
+         :     :              :     +- * Filter (7)
+         :     :              :        +- * ColumnarToRow (6)
+         :     :              :           +- Scan parquet default.catalog_sales (5)
+         :     :              +- BroadcastExchange (13)
+         :     :                 +- * Filter (12)
+         :     :                    +- * ColumnarToRow (11)
+         :     :                       +- Scan parquet default.date_dim (10)
+         :     +- BroadcastExchange (23)
+         :        +- * Project (22)
+         :           +- * Filter (21)
+         :              +- * ColumnarToRow (20)
+         :                 +- Scan parquet default.date_dim (19)
+         +- BroadcastExchange (35)
+            +- * Project (34)
+               +- * BroadcastHashJoin Inner BuildRight (33)
+                  :- * HashAggregate (27)
+                  :  +- ReusedExchange (26)
+                  +- BroadcastExchange (32)
+                     +- * Project (31)
+                        +- * Filter (30)
+                           +- * ColumnarToRow (29)
+                              +- Scan parquet default.date_dim (28)
 
 
 (1) Scan parquet default.web_sales
@@ -116,9 +114,9 @@ Results [8]: [d_week_seq#10, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum
 
 (17) Exchange
 Input [8]: [d_week_seq#10, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26]
-Arguments: hashpartitioning(d_week_seq#10, 5), true, [id=#27]
+Arguments: hashpartitioning(d_week_seq#10, 5), ENSURE_REQUIREMENTS, [id=#27]
 
-(18) HashAggregate [codegen id : 6]
+(18) HashAggregate [codegen id : 12]
 Input [8]: [d_week_seq#10, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26]
 Keys [1]: [d_week_seq#10]
 Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))]
@@ -147,82 +145,74 @@ Input [2]: [d_week_seq#42, d_year#43]
 Input [1]: [d_week_seq#42]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#44]
 
-(24) BroadcastHashJoin [codegen id : 6]
+(24) BroadcastHashJoin [codegen id : 12]
 Left keys [1]: [d_week_seq#10]
 Right keys [1]: [d_week_seq#42]
 Join condition: None
 
-(25) Project [codegen id : 6]
+(25) Project [codegen id : 12]
 Output [8]: [d_week_seq#10 AS d_week_seq1#45, sun_sales#35 AS sun_sales1#46, mon_sales#36 AS mon_sales1#47, tue_sales#37 AS tue_sales1#48, wed_sales#38 AS wed_sales1#49, thu_sales#39 AS thu_sales1#50, fri_sales#40 AS fri_sales1#51, sat_sales#41 AS sat_sales1#52]
 Input [9]: [d_week_seq#10, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41, d_week_seq#42]
 
-(26) Sort [codegen id : 6]
-Input [8]: [d_week_seq1#45, sun_sales1#46, mon_sales1#47, tue_sales1#48, wed_sales1#49, thu_sales1#50, fri_sales1#51, sat_sales1#52]
-Arguments: [d_week_seq1#45 ASC NULLS FIRST], false, 0
-
-(27) ReusedExchange [Reuses operator id: 17]
+(26) ReusedExchange [Reuses operator id: 17]
 Output [8]: [d_week_seq#10, sum#53, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59]
 
-(28) HashAggregate [codegen id : 12]
+(27) HashAggregate [codegen id : 11]
 Input [8]: [d_week_seq#10, sum#53, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59]
 Keys [1]: [d_week_seq#10]
 Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))]
 Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END))#60, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END))#61, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END))#62, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#63, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END))#64, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END))#65, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))#66]
 Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday) THEN sales_price#4 ELSE null END))#60,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday) THEN sales_price#4 ELSE null END))#61,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday) THEN sales_price#4 ELSE null END))#62,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 ELSE null END))#63,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday) THEN sales_price#4 ELSE null END))#64,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday) THEN sales_price#4 ELSE null END))#65,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday) THEN sales_price#4 ELSE null END))#66,17,2) AS sat_sales#41]
 
-(29) Scan parquet default.date_dim
+(28) Scan parquet default.date_dim
 Output [2]: [d_week_seq#67, d_year#68]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_week_seq)]
 ReadSchema: struct<d_week_seq:int,d_year:int>
 
-(30) ColumnarToRow [codegen id : 11]
+(29) ColumnarToRow [codegen id : 10]
 Input [2]: [d_week_seq#67, d_year#68]
 
-(31) Filter [codegen id : 11]
+(30) Filter [codegen id : 10]
 Input [2]: [d_week_seq#67, d_year#68]
 Condition : ((isnotnull(d_year#68) AND (d_year#68 = 2002)) AND isnotnull(d_week_seq#67))
 
-(32) Project [codegen id : 11]
+(31) Project [codegen id : 10]
 Output [1]: [d_week_seq#67]
 Input [2]: [d_week_seq#67, d_year#68]
 
-(33) BroadcastExchange
+(32) BroadcastExchange
 Input [1]: [d_week_seq#67]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#69]
 
-(34) BroadcastHashJoin [codegen id : 12]
+(33) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [d_week_seq#10]
 Right keys [1]: [d_week_seq#67]
 Join condition: None
 
-(35) Project [codegen id : 12]
+(34) Project [codegen id : 11]
 Output [8]: [d_week_seq#10 AS d_week_seq2#70, sun_sales#35 AS sun_sales2#71, mon_sales#36 AS mon_sales2#72, tue_sales#37 AS tue_sales2#73, wed_sales#38 AS wed_sales2#74, thu_sales#39 AS thu_sales2#75, fri_sales#40 AS fri_sales2#76, sat_sales#41 AS sat_sales2#77]
 Input [9]: [d_week_seq#10, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41, d_week_seq#67]
 
-(36) Exchange
-Input [8]: [d_week_seq2#70, sun_sales2#71, mon_sales2#72, tue_sales2#73, wed_sales2#74, thu_sales2#75, fri_sales2#76, sat_sales2#77]
-Arguments: hashpartitioning((d_week_seq2#70 - 53), 5), true, [id=#78]
-
-(37) Sort [codegen id : 13]
+(35) BroadcastExchange
 Input [8]: [d_week_seq2#70, sun_sales2#71, mon_sales2#72, tue_sales2#73, wed_sales2#74, thu_sales2#75, fri_sales2#76, sat_sales2#77]
-Arguments: [(d_week_seq2#70 - 53) ASC NULLS FIRST], false, 0
+Arguments: HashedRelationBroadcastMode(List(cast((input[0, int, true] - 53) as bigint)),false), [id=#78]
 
-(38) SortMergeJoin [codegen id : 14]
+(36) BroadcastHashJoin [codegen id : 12]
 Left keys [1]: [d_week_seq1#45]
 Right keys [1]: [(d_week_seq2#70 - 53)]
 Join condition: None
 
-(39) Project [codegen id : 14]
+(37) Project [codegen id : 12]
 Output [8]: [d_week_seq1#45, round(CheckOverflow((promote_precision(sun_sales1#46) / promote_precision(sun_sales2#71)), DecimalType(37,20), true), 2) AS round((sun_sales1 / sun_sales2), 2)#79, round(CheckOverflow((promote_precision(mon_sales1#47) / promote_precision(mon_sales2#72)), DecimalType(37,20), true), 2) AS round((mon_sales1 / mon_sales2), 2)#80, round(CheckOverflow((promote_precision(tue_sales1#48) / promote_precision(tue_sales2#73)), DecimalType(37,20), true), 2) AS round((tue_sales1 / tue_sales2), 2)#81, round(CheckOverflow((promote_precision(wed_sales1#49) / promote_precision(wed_sales2#74)), DecimalType(37,20), true), 2) AS round((wed_sales1 / wed_sales2), 2)#82, round(CheckOverflow((promote_precision(thu_sales1#50) / promote_precision(thu_sales2#75)), DecimalType(37,20), true), 2) AS round((thu_sales1 / thu_sales2), 2)#83, round(CheckOverflow((promote_precision(fri_sales1#51) / promote_precision(fri_sales2#76)), DecimalType(37,20), true), 2) AS round((fri_sales1 / fri_sales2), 2)#84, round(CheckOverflow((promote_precision(sat_sales1#52) / promote_precision(sat_sales2#77)), DecimalType(37,20), true), 2) AS round((sat_sales1 / sat_sales2), 2)#85]
 Input [16]: [d_week_seq1#45, sun_sales1#46, mon_sales1#47, tue_sales1#48, wed_sales1#49, thu_sales1#50, fri_sales1#51, sat_sales1#52, d_week_seq2#70, sun_sales2#71, mon_sales2#72, tue_sales2#73, wed_sales2#74, thu_sales2#75, fri_sales2#76, sat_sales2#77]
 
-(40) Exchange
+(38) Exchange
 Input [8]: [d_week_seq1#45, round((sun_sales1 / sun_sales2), 2)#79, round((mon_sales1 / mon_sales2), 2)#80, round((tue_sales1 / tue_sales2), 2)#81, round((wed_sales1 / wed_sales2), 2)#82, round((thu_sales1 / thu_sales2), 2)#83, round((fri_sales1 / fri_sales2), 2)#84, round((sat_sales1 / sat_sales2), 2)#85]
-Arguments: rangepartitioning(d_week_seq1#45 ASC NULLS FIRST, 5), true, [id=#86]
+Arguments: rangepartitioning(d_week_seq1#45 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#86]
 
-(41) Sort [codegen id : 15]
+(39) Sort [codegen id : 13]
 Input [8]: [d_week_seq1#45, round((sun_sales1 / sun_sales2), 2)#79, round((mon_sales1 / mon_sales2), 2)#80, round((tue_sales1 / tue_sales2), 2)#81, round((wed_sales1 / wed_sales2), 2)#82, round((thu_sales1 / thu_sales2), 2)#83, round((fri_sales1 / fri_sales2), 2)#84, round((sat_sales1 / sat_sales2), 2)#85]
 Arguments: [d_week_seq1#45 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt
index 3389774c46469..424a535e14847 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt
@@ -1,67 +1,61 @@
-WholeStageCodegen (15)
+WholeStageCodegen (13)
   Sort [d_week_seq1]
     InputAdapter
       Exchange [d_week_seq1] #1
-        WholeStageCodegen (14)
+        WholeStageCodegen (12)
           Project [d_week_seq1,sun_sales1,sun_sales2,mon_sales1,mon_sales2,tue_sales1,tue_sales2,wed_sales1,wed_sales2,thu_sales1,thu_sales2,fri_sales1,fri_sales2,sat_sales1,sat_sales2]
-            SortMergeJoin [d_week_seq1,d_week_seq2]
+            BroadcastHashJoin [d_week_seq1,d_week_seq2]
+              Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales]
+                BroadcastHashJoin [d_week_seq,d_week_seq]
+                  HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum]
+                    InputAdapter
+                      Exchange [d_week_seq] #2
+                        WholeStageCodegen (4)
+                          HashAggregate [d_week_seq,d_day_name,sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum]
+                            Project [sales_price,d_week_seq,d_day_name]
+                              BroadcastHashJoin [sold_date_sk,d_date_sk]
+                                InputAdapter
+                                  Union
+                                    WholeStageCodegen (1)
+                                      Project [ws_sold_date_sk,ws_ext_sales_price]
+                                        Filter [ws_sold_date_sk]
+                                          ColumnarToRow
+                                            InputAdapter
+                                              Scan parquet default.web_sales [ws_sold_date_sk,ws_ext_sales_price]
+                                    WholeStageCodegen (2)
+                                      Project [cs_sold_date_sk,cs_ext_sales_price]
+                                        Filter [cs_sold_date_sk]
+                                          ColumnarToRow
+                                            InputAdapter
+                                              Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ext_sales_price]
+                                InputAdapter
+                                  BroadcastExchange #3
+                                    WholeStageCodegen (3)
+                                      Filter [d_date_sk,d_week_seq]
+                                        ColumnarToRow
+                                          InputAdapter
+                                            Scan parquet default.date_dim [d_date_sk,d_week_seq,d_day_name]
+                  InputAdapter
+                    BroadcastExchange #4
+                      WholeStageCodegen (5)
+                        Project [d_week_seq]
+                          Filter [d_year,d_week_seq]
+                            ColumnarToRow
+                              InputAdapter
+                                Scan parquet default.date_dim [d_week_seq,d_year]
               InputAdapter
-                WholeStageCodegen (6)
-                  Sort [d_week_seq1]
+                BroadcastExchange #5
+                  WholeStageCodegen (11)
                     Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales]
                       BroadcastHashJoin [d_week_seq,d_week_seq]
                         HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum]
                           InputAdapter
-                            Exchange [d_week_seq] #2
-                              WholeStageCodegen (4)
-                                HashAggregate [d_week_seq,d_day_name,sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum]
-                                  Project [sales_price,d_week_seq,d_day_name]
-                                    BroadcastHashJoin [sold_date_sk,d_date_sk]
-                                      InputAdapter
-                                        Union
-                                          WholeStageCodegen (1)
-                                            Project [ws_sold_date_sk,ws_ext_sales_price]
-                                              Filter [ws_sold_date_sk]
-                                                ColumnarToRow
-                                                  InputAdapter
-                                                    Scan parquet default.web_sales [ws_sold_date_sk,ws_ext_sales_price]
-                                          WholeStageCodegen (2)
-                                            Project [cs_sold_date_sk,cs_ext_sales_price]
-                                              Filter [cs_sold_date_sk]
-                                                ColumnarToRow
-                                                  InputAdapter
-                                                    Scan parquet default.catalog_sales [cs_sold_date_sk,cs_ext_sales_price]
-                                      InputAdapter
-                                        BroadcastExchange #3
-                                          WholeStageCodegen (3)
-                                            Filter [d_date_sk,d_week_seq]
-                                              ColumnarToRow
-                                                InputAdapter
-                                                  Scan parquet default.date_dim [d_date_sk,d_week_seq,d_day_name]
+                            ReusedExchange [d_week_seq,sum,sum,sum,sum,sum,sum,sum] #2
                         InputAdapter
-                          BroadcastExchange #4
-                            WholeStageCodegen (5)
+                          BroadcastExchange #6
+                            WholeStageCodegen (10)
                               Project [d_week_seq]
                                 Filter [d_year,d_week_seq]
                                   ColumnarToRow
                                     InputAdapter
                                       Scan parquet default.date_dim [d_week_seq,d_year]
-              InputAdapter
-                WholeStageCodegen (13)
-                  Sort [d_week_seq2]
-                    InputAdapter
-                      Exchange [d_week_seq2] #5
-                        WholeStageCodegen (12)
-                          Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales]
-                            BroadcastHashJoin [d_week_seq,d_week_seq]
-                              HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday) THEN sales_price ELSE null END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday) THEN sales_price ELSE null END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum]
-                                InputAdapter
-                                  ReusedExchange [d_week_seq,sum,sum,sum,sum,sum,sum,sum] #2
-                              InputAdapter
-                                BroadcastExchange #6
-                                  WholeStageCodegen (11)
-                                    Project [d_week_seq]
-                                      Filter [d_year,d_week_seq]
-                                        ColumnarToRow
-                                          InputAdapter
-                                            Scan parquet default.date_dim [d_week_seq,d_year]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt
index 55bd25c501294..5a9c4715d4b05 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt
@@ -10,8 +10,8 @@ TakeOrderedAndProject (81)
                :     +- * HashAggregate (23)
                :        +- * Project (22)
                :           +- * BroadcastHashJoin Inner BuildRight (21)
-               :              :- * Project (16)
-               :              :  +- * BroadcastHashJoin Inner BuildRight (15)
+               :              :- * Project (15)
+               :              :  +- * BroadcastHashJoin Inner BuildRight (14)
                :              :     :- Union (9)
                :              :     :  :- * Project (4)
                :              :     :  :  +- * Filter (3)
@@ -21,22 +21,22 @@ TakeOrderedAndProject (81)
                :              :     :     +- * Filter (7)
                :              :     :        +- * ColumnarToRow (6)
                :              :     :           +- Scan parquet default.store_returns (5)
-               :              :     +- BroadcastExchange (14)
-               :              :        +- * Project (13)
-               :              :           +- * Filter (12)
-               :              :              +- * ColumnarToRow (11)
-               :              :                 +- Scan parquet default.date_dim (10)
+               :              :     +- BroadcastExchange (13)
+               :              :        +- * Filter (12)
+               :              :           +- * ColumnarToRow (11)
+               :              :              +- Scan parquet default.store (10)
                :              +- BroadcastExchange (20)
-               :                 +- * Filter (19)
-               :                    +- * ColumnarToRow (18)
-               :                       +- Scan parquet default.store (17)
+               :                 +- * Project (19)
+               :                    +- * Filter (18)
+               :                       +- * ColumnarToRow (17)
+               :                          +- Scan parquet default.date_dim (16)
                :- * HashAggregate (46)
                :  +- Exchange (45)
                :     +- * HashAggregate (44)
                :        +- * Project (43)
                :           +- * BroadcastHashJoin Inner BuildRight (42)
-               :              :- * Project (37)
-               :              :  +- * BroadcastHashJoin Inner BuildRight (36)
+               :              :- * Project (40)
+               :              :  +- * BroadcastHashJoin Inner BuildRight (39)
                :              :     :- Union (34)
                :              :     :  :- * Project (29)
                :              :     :  :  +- * Filter (28)
@@ -46,18 +46,18 @@ TakeOrderedAndProject (81)
                :              :     :     +- * Filter (32)
                :              :     :        +- * ColumnarToRow (31)
                :              :     :           +- Scan parquet default.catalog_returns (30)
-               :              :     +- ReusedExchange (35)
-               :              +- BroadcastExchange (41)
-               :                 +- * Filter (40)
-               :                    +- * ColumnarToRow (39)
-               :                       +- Scan parquet default.catalog_page (38)
+               :              :     +- BroadcastExchange (38)
+               :              :        +- * Filter (37)
+               :              :           +- * ColumnarToRow (36)
+               :              :              +- Scan parquet default.catalog_page (35)
+               :              +- ReusedExchange (41)
                +- * HashAggregate (75)
                   +- Exchange (74)
                      +- * HashAggregate (73)
                         +- * Project (72)
                            +- * BroadcastHashJoin Inner BuildRight (71)
-                              :- * Project (66)
-                              :  +- * BroadcastHashJoin Inner BuildRight (65)
+                              :- * Project (69)
+                              :  +- * BroadcastHashJoin Inner BuildRight (68)
                               :     :- Union (63)
                               :     :  :- * Project (50)
                               :     :  :  +- * Filter (49)
@@ -75,11 +75,11 @@ TakeOrderedAndProject (81)
                               :     :              +- * Filter (58)
                               :     :                 +- * ColumnarToRow (57)
                               :     :                    +- Scan parquet default.web_sales (56)
-                              :     +- ReusedExchange (64)
-                              +- BroadcastExchange (70)
-                                 +- * Filter (69)
-                                    +- * ColumnarToRow (68)
-                                       +- Scan parquet default.web_site (67)
+                              :     +- BroadcastExchange (67)
+                              :        +- * Filter (66)
+                              :           +- * ColumnarToRow (65)
+                              :              +- Scan parquet default.web_site (64)
+                              +- ReusedExchange (70)
 
 
 (1) Scan parquet default.store_sales
@@ -119,81 +119,81 @@ Input [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_los
 
 (9) Union
 
-(10) Scan parquet default.date_dim
-Output [2]: [d_date_sk#21, d_date#22]
+(10) Scan parquet default.store
+Output [2]: [s_store_sk#21, s_store_id#22]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_date:date>
+Location [not included in comparison]/{warehouse_dir}/store]
+PushedFilters: [IsNotNull(s_store_sk)]
+ReadSchema: struct<s_store_sk:int,s_store_id:string>
 
 (11) ColumnarToRow [codegen id : 3]
-Input [2]: [d_date_sk#21, d_date#22]
+Input [2]: [s_store_sk#21, s_store_id#22]
 
 (12) Filter [codegen id : 3]
-Input [2]: [d_date_sk#21, d_date#22]
-Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 11192)) AND (d_date#22 <= 11206)) AND isnotnull(d_date_sk#21))
+Input [2]: [s_store_sk#21, s_store_id#22]
+Condition : isnotnull(s_store_sk#21)
 
-(13) Project [codegen id : 3]
-Output [1]: [d_date_sk#21]
-Input [2]: [d_date_sk#21, d_date#22]
+(13) BroadcastExchange
+Input [2]: [s_store_sk#21, s_store_id#22]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23]
 
-(14) BroadcastExchange
-Input [1]: [d_date_sk#21]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23]
-
-(15) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [date_sk#6]
-Right keys [1]: [cast(d_date_sk#21 as bigint)]
+(14) BroadcastHashJoin [codegen id : 5]
+Left keys [1]: [store_sk#5]
+Right keys [1]: [cast(s_store_sk#21 as bigint)]
 Join condition: None
 
-(16) Project [codegen id : 5]
-Output [5]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10]
-Input [7]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, d_date_sk#21]
+(15) Project [codegen id : 5]
+Output [6]: [date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#22]
+Input [8]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_sk#21, s_store_id#22]
 
-(17) Scan parquet default.store
-Output [2]: [s_store_sk#24, s_store_id#25]
+(16) Scan parquet default.date_dim
+Output [2]: [d_date_sk#24, d_date#25]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/store]
-PushedFilters: [IsNotNull(s_store_sk)]
-ReadSchema: struct<s_store_sk:int,s_store_id:string>
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_date:date>
+
+(17) ColumnarToRow [codegen id : 4]
+Input [2]: [d_date_sk#24, d_date#25]
 
-(18) ColumnarToRow [codegen id : 4]
-Input [2]: [s_store_sk#24, s_store_id#25]
+(18) Filter [codegen id : 4]
+Input [2]: [d_date_sk#24, d_date#25]
+Condition : (((isnotnull(d_date#25) AND (d_date#25 >= 11192)) AND (d_date#25 <= 11206)) AND isnotnull(d_date_sk#24))
 
-(19) Filter [codegen id : 4]
-Input [2]: [s_store_sk#24, s_store_id#25]
-Condition : isnotnull(s_store_sk#24)
+(19) Project [codegen id : 4]
+Output [1]: [d_date_sk#24]
+Input [2]: [d_date_sk#24, d_date#25]
 
 (20) BroadcastExchange
-Input [2]: [s_store_sk#24, s_store_id#25]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26]
+Input [1]: [d_date_sk#24]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26]
 
 (21) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [store_sk#5]
-Right keys [1]: [cast(s_store_sk#24 as bigint)]
+Left keys [1]: [date_sk#6]
+Right keys [1]: [cast(d_date_sk#24 as bigint)]
 Join condition: None
 
 (22) Project [codegen id : 5]
-Output [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#25]
-Input [7]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_sk#24, s_store_id#25]
+Output [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#22]
+Input [7]: [date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#22, d_date_sk#24]
 
 (23) HashAggregate [codegen id : 5]
-Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#25]
-Keys [1]: [s_store_id#25]
+Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#22]
+Keys [1]: [s_store_id#22]
 Functions [4]: [partial_sum(UnscaledValue(sales_price#7)), partial_sum(UnscaledValue(return_amt#9)), partial_sum(UnscaledValue(profit#8)), partial_sum(UnscaledValue(net_loss#10))]
 Aggregate Attributes [4]: [sum#27, sum#28, sum#29, sum#30]
-Results [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34]
+Results [5]: [s_store_id#22, sum#31, sum#32, sum#33, sum#34]
 
 (24) Exchange
-Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34]
-Arguments: hashpartitioning(s_store_id#25, 5), true, [id=#35]
+Input [5]: [s_store_id#22, sum#31, sum#32, sum#33, sum#34]
+Arguments: hashpartitioning(s_store_id#22, 5), ENSURE_REQUIREMENTS, [id=#35]
 
 (25) HashAggregate [codegen id : 6]
-Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34]
-Keys [1]: [s_store_id#25]
+Input [5]: [s_store_id#22, sum#31, sum#32, sum#33, sum#34]
+Keys [1]: [s_store_id#22]
 Functions [4]: [sum(UnscaledValue(sales_price#7)), sum(UnscaledValue(return_amt#9)), sum(UnscaledValue(profit#8)), sum(UnscaledValue(net_loss#10))]
 Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#7))#36, sum(UnscaledValue(return_amt#9))#37, sum(UnscaledValue(profit#8))#38, sum(UnscaledValue(net_loss#10))#39]
-Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#7))#36,17,2) AS sales#40, MakeDecimal(sum(UnscaledValue(return_amt#9))#37,17,2) AS RETURNS#41, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#8))#38,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#10))#39,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#42, store channel AS channel#43, concat(store, s_store_id#25) AS id#44]
+Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#7))#36,17,2) AS sales#40, MakeDecimal(sum(UnscaledValue(return_amt#9))#37,17,2) AS RETURNS#41, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#8))#38,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#10))#39,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#42, store channel AS channel#43, concat(store, s_store_id#22) AS id#44]
 
 (26) Scan parquet default.catalog_sales
 Output [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48]
@@ -233,44 +233,44 @@ Input [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57,
 
 (34) Union
 
-(35) ReusedExchange [Reuses operator id: 14]
-Output [1]: [d_date_sk#21]
-
-(36) BroadcastHashJoin [codegen id : 11]
-Left keys [1]: [date_sk#50]
-Right keys [1]: [d_date_sk#21]
-Join condition: None
-
-(37) Project [codegen id : 11]
-Output [5]: [page_sk#49, sales_price#51, profit#52, return_amt#53, net_loss#54]
-Input [7]: [page_sk#49, date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, d_date_sk#21]
-
-(38) Scan parquet default.catalog_page
+(35) Scan parquet default.catalog_page
 Output [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_page]
 PushedFilters: [IsNotNull(cp_catalog_page_sk)]
 ReadSchema: struct<cp_catalog_page_sk:int,cp_catalog_page_id:string>
 
-(39) ColumnarToRow [codegen id : 10]
+(36) ColumnarToRow [codegen id : 9]
 Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66]
 
-(40) Filter [codegen id : 10]
+(37) Filter [codegen id : 9]
 Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66]
 Condition : isnotnull(cp_catalog_page_sk#65)
 
-(41) BroadcastExchange
+(38) BroadcastExchange
 Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#67]
 
-(42) BroadcastHashJoin [codegen id : 11]
+(39) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [page_sk#49]
 Right keys [1]: [cp_catalog_page_sk#65]
 Join condition: None
 
+(40) Project [codegen id : 11]
+Output [6]: [date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66]
+Input [8]: [page_sk#49, date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_sk#65, cp_catalog_page_id#66]
+
+(41) ReusedExchange [Reuses operator id: 20]
+Output [1]: [d_date_sk#24]
+
+(42) BroadcastHashJoin [codegen id : 11]
+Left keys [1]: [date_sk#50]
+Right keys [1]: [d_date_sk#24]
+Join condition: None
+
 (43) Project [codegen id : 11]
 Output [5]: [sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66]
-Input [7]: [page_sk#49, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_sk#65, cp_catalog_page_id#66]
+Input [7]: [date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66, d_date_sk#24]
 
 (44) HashAggregate [codegen id : 11]
 Input [5]: [sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66]
@@ -281,7 +281,7 @@ Results [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75]
 
 (45) Exchange
 Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75]
-Arguments: hashpartitioning(cp_catalog_page_id#66, 5), true, [id=#76]
+Arguments: hashpartitioning(cp_catalog_page_id#66, 5), ENSURE_REQUIREMENTS, [id=#76]
 
 (46) HashAggregate [codegen id : 12]
 Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75]
@@ -324,7 +324,7 @@ Condition : isnotnull(wr_returned_date_sk#96)
 
 (54) Exchange
 Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100]
-Arguments: hashpartitioning(wr_item_sk#97, wr_order_number#98, 5), true, [id=#101]
+Arguments: hashpartitioning(wr_item_sk#97, wr_order_number#98, 5), ENSURE_REQUIREMENTS, [id=#101]
 
 (55) Sort [codegen id : 15]
 Input [5]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return_amt#99, wr_net_loss#100]
@@ -346,7 +346,7 @@ Condition : ((isnotnull(ws_item_sk#102) AND isnotnull(ws_order_number#103)) AND
 
 (59) Exchange
 Input [3]: [ws_item_sk#102, ws_web_site_sk#87, ws_order_number#103]
-Arguments: hashpartitioning(cast(ws_item_sk#102 as bigint), cast(ws_order_number#103 as bigint), 5), true, [id=#104]
+Arguments: hashpartitioning(cast(ws_item_sk#102 as bigint), cast(ws_order_number#103 as bigint), 5), ENSURE_REQUIREMENTS, [id=#104]
 
 (60) Sort [codegen id : 17]
 Input [3]: [ws_item_sk#102, ws_web_site_sk#87, ws_order_number#103]
@@ -363,44 +363,44 @@ Input [8]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return
 
 (63) Union
 
-(64) ReusedExchange [Reuses operator id: 14]
-Output [1]: [d_date_sk#21]
-
-(65) BroadcastHashJoin [codegen id : 21]
-Left keys [1]: [date_sk#91]
-Right keys [1]: [cast(d_date_sk#21 as bigint)]
-Join condition: None
-
-(66) Project [codegen id : 21]
-Output [5]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95]
-Input [7]: [wsr_web_site_sk#90, date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, d_date_sk#21]
-
-(67) Scan parquet default.web_site
+(64) Scan parquet default.web_site
 Output [2]: [web_site_sk#111, web_site_id#112]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_site]
 PushedFilters: [IsNotNull(web_site_sk)]
 ReadSchema: struct<web_site_sk:int,web_site_id:string>
 
-(68) ColumnarToRow [codegen id : 20]
+(65) ColumnarToRow [codegen id : 19]
 Input [2]: [web_site_sk#111, web_site_id#112]
 
-(69) Filter [codegen id : 20]
+(66) Filter [codegen id : 19]
 Input [2]: [web_site_sk#111, web_site_id#112]
 Condition : isnotnull(web_site_sk#111)
 
-(70) BroadcastExchange
+(67) BroadcastExchange
 Input [2]: [web_site_sk#111, web_site_id#112]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#113]
 
-(71) BroadcastHashJoin [codegen id : 21]
+(68) BroadcastHashJoin [codegen id : 21]
 Left keys [1]: [wsr_web_site_sk#90]
 Right keys [1]: [web_site_sk#111]
 Join condition: None
 
+(69) Project [codegen id : 21]
+Output [6]: [date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112]
+Input [8]: [wsr_web_site_sk#90, date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_sk#111, web_site_id#112]
+
+(70) ReusedExchange [Reuses operator id: 20]
+Output [1]: [d_date_sk#24]
+
+(71) BroadcastHashJoin [codegen id : 21]
+Left keys [1]: [date_sk#91]
+Right keys [1]: [cast(d_date_sk#24 as bigint)]
+Join condition: None
+
 (72) Project [codegen id : 21]
 Output [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112]
-Input [7]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_sk#111, web_site_id#112]
+Input [7]: [date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112, d_date_sk#24]
 
 (73) HashAggregate [codegen id : 21]
 Input [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112]
@@ -411,7 +411,7 @@ Results [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121]
 
 (74) Exchange
 Input [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121]
-Arguments: hashpartitioning(web_site_id#112, 5), true, [id=#122]
+Arguments: hashpartitioning(web_site_id#112, 5), ENSURE_REQUIREMENTS, [id=#122]
 
 (75) HashAggregate [codegen id : 22]
 Input [5]: [web_site_id#112, sum#118, sum#119, sum#120, sum#121]
@@ -435,7 +435,7 @@ Results [9]: [channel#132, id#133, spark_grouping_id#134, sum#141, isEmpty#142,
 
 (79) Exchange
 Input [9]: [channel#132, id#133, spark_grouping_id#134, sum#141, isEmpty#142, sum#143, isEmpty#144, sum#145, isEmpty#146]
-Arguments: hashpartitioning(channel#132, id#133, spark_grouping_id#134, 5), true, [id=#147]
+Arguments: hashpartitioning(channel#132, id#133, spark_grouping_id#134, 5), ENSURE_REQUIREMENTS, [id=#147]
 
 (80) HashAggregate [codegen id : 24]
 Input [9]: [channel#132, id#133, spark_grouping_id#134, sum#141, isEmpty#142, sum#143, isEmpty#144, sum#145, isEmpty#146]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/simplified.txt
index 80b07a3712d36..2db6cf767729d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/simplified.txt
@@ -15,9 +15,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                             WholeStageCodegen (5)
                               HashAggregate [s_store_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum]
                                 Project [sales_price,profit,return_amt,net_loss,s_store_id]
-                                  BroadcastHashJoin [store_sk,s_store_sk]
-                                    Project [store_sk,sales_price,profit,return_amt,net_loss]
-                                      BroadcastHashJoin [date_sk,d_date_sk]
+                                  BroadcastHashJoin [date_sk,d_date_sk]
+                                    Project [date_sk,sales_price,profit,return_amt,net_loss,s_store_id]
+                                      BroadcastHashJoin [store_sk,s_store_sk]
                                         InputAdapter
                                           Union
                                             WholeStageCodegen (1)
@@ -35,18 +35,18 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                         InputAdapter
                                           BroadcastExchange #3
                                             WholeStageCodegen (3)
-                                              Project [d_date_sk]
-                                                Filter [d_date,d_date_sk]
-                                                  ColumnarToRow
-                                                    InputAdapter
-                                                      Scan parquet default.date_dim [d_date_sk,d_date]
+                                              Filter [s_store_sk]
+                                                ColumnarToRow
+                                                  InputAdapter
+                                                    Scan parquet default.store [s_store_sk,s_store_id]
                                     InputAdapter
                                       BroadcastExchange #4
                                         WholeStageCodegen (4)
-                                          Filter [s_store_sk]
-                                            ColumnarToRow
-                                              InputAdapter
-                                                Scan parquet default.store [s_store_sk,s_store_id]
+                                          Project [d_date_sk]
+                                            Filter [d_date,d_date_sk]
+                                              ColumnarToRow
+                                                InputAdapter
+                                                  Scan parquet default.date_dim [d_date_sk,d_date]
                     WholeStageCodegen (12)
                       HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,RETURNS,profit,channel,id,sum,sum,sum,sum]
                         InputAdapter
@@ -54,9 +54,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                             WholeStageCodegen (11)
                               HashAggregate [cp_catalog_page_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum]
                                 Project [sales_price,profit,return_amt,net_loss,cp_catalog_page_id]
-                                  BroadcastHashJoin [page_sk,cp_catalog_page_sk]
-                                    Project [page_sk,sales_price,profit,return_amt,net_loss]
-                                      BroadcastHashJoin [date_sk,d_date_sk]
+                                  BroadcastHashJoin [date_sk,d_date_sk]
+                                    Project [date_sk,sales_price,profit,return_amt,net_loss,cp_catalog_page_id]
+                                      BroadcastHashJoin [page_sk,cp_catalog_page_sk]
                                         InputAdapter
                                           Union
                                             WholeStageCodegen (7)
@@ -72,14 +72,14 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                     InputAdapter
                                                       Scan parquet default.catalog_returns [cr_returned_date_sk,cr_catalog_page_sk,cr_return_amount,cr_net_loss]
                                         InputAdapter
-                                          ReusedExchange [d_date_sk] #3
+                                          BroadcastExchange #6
+                                            WholeStageCodegen (9)
+                                              Filter [cp_catalog_page_sk]
+                                                ColumnarToRow
+                                                  InputAdapter
+                                                    Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
                                     InputAdapter
-                                      BroadcastExchange #6
-                                        WholeStageCodegen (10)
-                                          Filter [cp_catalog_page_sk]
-                                            ColumnarToRow
-                                              InputAdapter
-                                                Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
+                                      ReusedExchange [d_date_sk] #4
                     WholeStageCodegen (22)
                       HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,RETURNS,profit,channel,id,sum,sum,sum,sum]
                         InputAdapter
@@ -87,9 +87,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                             WholeStageCodegen (21)
                               HashAggregate [web_site_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum]
                                 Project [sales_price,profit,return_amt,net_loss,web_site_id]
-                                  BroadcastHashJoin [wsr_web_site_sk,web_site_sk]
-                                    Project [wsr_web_site_sk,sales_price,profit,return_amt,net_loss]
-                                      BroadcastHashJoin [date_sk,d_date_sk]
+                                  BroadcastHashJoin [date_sk,d_date_sk]
+                                    Project [date_sk,sales_price,profit,return_amt,net_loss,web_site_id]
+                                      BroadcastHashJoin [wsr_web_site_sk,web_site_sk]
                                         InputAdapter
                                           Union
                                             WholeStageCodegen (13)
@@ -122,11 +122,11 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                   InputAdapter
                                                                     Scan parquet default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number]
                                         InputAdapter
-                                          ReusedExchange [d_date_sk] #3
+                                          BroadcastExchange #10
+                                            WholeStageCodegen (19)
+                                              Filter [web_site_sk]
+                                                ColumnarToRow
+                                                  InputAdapter
+                                                    Scan parquet default.web_site [web_site_sk,web_site_id]
                                     InputAdapter
-                                      BroadcastExchange #10
-                                        WholeStageCodegen (20)
-                                          Filter [web_site_sk]
-                                            ColumnarToRow
-                                              InputAdapter
-                                                Scan parquet default.web_site [web_site_sk,web_site_id]
+                                      ReusedExchange [d_date_sk] #4
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/explain.txt
index d78565986bc0a..a504149b00b94 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/explain.txt
@@ -1,494 +1,474 @@
 == Physical Plan ==
-TakeOrderedAndProject (71)
-+- * HashAggregate (70)
-   +- Exchange (69)
-      +- * HashAggregate (68)
-         +- * HashAggregate (67)
-            +- Exchange (66)
-               +- * HashAggregate (65)
-                  +- * Project (64)
-                     +- * BroadcastHashJoin Inner BuildRight (63)
-                        :- * Project (57)
-                        :  +- * BroadcastHashJoin Inner BuildRight (56)
-                        :     :- * Project (51)
-                        :     :  +- * SortMergeJoin Inner (50)
-                        :     :     :- * Sort (44)
-                        :     :     :  +- Exchange (43)
-                        :     :     :     +- * Project (42)
-                        :     :     :        +- * SortMergeJoin Inner (41)
-                        :     :     :           :- * Sort (35)
-                        :     :     :           :  +- * HashAggregate (34)
-                        :     :     :           :     +- * HashAggregate (33)
-                        :     :     :           :        +- * Project (32)
-                        :     :     :           :           +- * SortMergeJoin Inner (31)
-                        :     :     :           :              :- * Sort (25)
-                        :     :     :           :              :  +- Exchange (24)
-                        :     :     :           :              :     +- * Project (23)
-                        :     :     :           :              :        +- * BroadcastHashJoin Inner BuildRight (22)
-                        :     :     :           :              :           :- * Project (16)
-                        :     :     :           :              :           :  +- * BroadcastHashJoin Inner BuildRight (15)
-                        :     :     :           :              :           :     :- Union (9)
-                        :     :     :           :              :           :     :  :- * Project (4)
-                        :     :     :           :              :           :     :  :  +- * Filter (3)
-                        :     :     :           :              :           :     :  :     +- * ColumnarToRow (2)
-                        :     :     :           :              :           :     :  :        +- Scan parquet default.catalog_sales (1)
-                        :     :     :           :              :           :     :  +- * Project (8)
-                        :     :     :           :              :           :     :     +- * Filter (7)
-                        :     :     :           :              :           :     :        +- * ColumnarToRow (6)
-                        :     :     :           :              :           :     :           +- Scan parquet default.web_sales (5)
-                        :     :     :           :              :           :     +- BroadcastExchange (14)
-                        :     :     :           :              :           :        +- * Project (13)
-                        :     :     :           :              :           :           +- * Filter (12)
-                        :     :     :           :              :           :              +- * ColumnarToRow (11)
-                        :     :     :           :              :           :                 +- Scan parquet default.item (10)
-                        :     :     :           :              :           +- BroadcastExchange (21)
-                        :     :     :           :              :              +- * Project (20)
-                        :     :     :           :              :                 +- * Filter (19)
-                        :     :     :           :              :                    +- * ColumnarToRow (18)
-                        :     :     :           :              :                       +- Scan parquet default.date_dim (17)
-                        :     :     :           :              +- * Sort (30)
-                        :     :     :           :                 +- Exchange (29)
-                        :     :     :           :                    +- * Filter (28)
-                        :     :     :           :                       +- * ColumnarToRow (27)
-                        :     :     :           :                          +- Scan parquet default.customer (26)
-                        :     :     :           +- * Sort (40)
-                        :     :     :              +- Exchange (39)
-                        :     :     :                 +- * Filter (38)
-                        :     :     :                    +- * ColumnarToRow (37)
-                        :     :     :                       +- Scan parquet default.store_sales (36)
-                        :     :     +- * Sort (49)
-                        :     :        +- Exchange (48)
-                        :     :           +- * Filter (47)
-                        :     :              +- * ColumnarToRow (46)
-                        :     :                 +- Scan parquet default.customer_address (45)
-                        :     +- BroadcastExchange (55)
-                        :        +- * Filter (54)
-                        :           +- * ColumnarToRow (53)
-                        :              +- Scan parquet default.store (52)
-                        +- BroadcastExchange (62)
-                           +- * Project (61)
-                              +- * Filter (60)
-                                 +- * ColumnarToRow (59)
-                                    +- Scan parquet default.date_dim (58)
-
-
-(1) Scan parquet default.catalog_sales
-Output [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3]
+TakeOrderedAndProject (67)
++- * HashAggregate (66)
+   +- Exchange (65)
+      +- * HashAggregate (64)
+         +- * HashAggregate (63)
+            +- * HashAggregate (62)
+               +- * Project (61)
+                  +- * SortMergeJoin Inner (60)
+                     :- * Sort (47)
+                     :  +- * Project (46)
+                     :     +- * BroadcastHashJoin Inner BuildLeft (45)
+                     :        :- BroadcastExchange (10)
+                     :        :  +- * Project (9)
+                     :        :     +- * BroadcastHashJoin Inner BuildRight (8)
+                     :        :        :- * Filter (3)
+                     :        :        :  +- * ColumnarToRow (2)
+                     :        :        :     +- Scan parquet default.customer_address (1)
+                     :        :        +- BroadcastExchange (7)
+                     :        :           +- * Filter (6)
+                     :        :              +- * ColumnarToRow (5)
+                     :        :                 +- Scan parquet default.store (4)
+                     :        +- * HashAggregate (44)
+                     :           +- * HashAggregate (43)
+                     :              +- * Project (42)
+                     :                 +- * SortMergeJoin Inner (41)
+                     :                    :- * Sort (35)
+                     :                    :  +- Exchange (34)
+                     :                    :     +- * Project (33)
+                     :                    :        +- * BroadcastHashJoin Inner BuildRight (32)
+                     :                    :           :- * Project (26)
+                     :                    :           :  +- * BroadcastHashJoin Inner BuildRight (25)
+                     :                    :           :     :- Union (19)
+                     :                    :           :     :  :- * Project (14)
+                     :                    :           :     :  :  +- * Filter (13)
+                     :                    :           :     :  :     +- * ColumnarToRow (12)
+                     :                    :           :     :  :        +- Scan parquet default.catalog_sales (11)
+                     :                    :           :     :  +- * Project (18)
+                     :                    :           :     :     +- * Filter (17)
+                     :                    :           :     :        +- * ColumnarToRow (16)
+                     :                    :           :     :           +- Scan parquet default.web_sales (15)
+                     :                    :           :     +- BroadcastExchange (24)
+                     :                    :           :        +- * Project (23)
+                     :                    :           :           +- * Filter (22)
+                     :                    :           :              +- * ColumnarToRow (21)
+                     :                    :           :                 +- Scan parquet default.date_dim (20)
+                     :                    :           +- BroadcastExchange (31)
+                     :                    :              +- * Project (30)
+                     :                    :                 +- * Filter (29)
+                     :                    :                    +- * ColumnarToRow (28)
+                     :                    :                       +- Scan parquet default.item (27)
+                     :                    +- * Sort (40)
+                     :                       +- Exchange (39)
+                     :                          +- * Filter (38)
+                     :                             +- * ColumnarToRow (37)
+                     :                                +- Scan parquet default.customer (36)
+                     +- * Sort (59)
+                        +- Exchange (58)
+                           +- * Project (57)
+                              +- * BroadcastHashJoin Inner BuildRight (56)
+                                 :- * Filter (50)
+                                 :  +- * ColumnarToRow (49)
+                                 :     +- Scan parquet default.store_sales (48)
+                                 +- BroadcastExchange (55)
+                                    +- * Project (54)
+                                       +- * Filter (53)
+                                          +- * ColumnarToRow (52)
+                                             +- Scan parquet default.date_dim (51)
+
+
+(1) Scan parquet default.customer_address
+Output [3]: [ca_address_sk#1, ca_county#2, ca_state#3]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/customer_address]
+PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county), IsNotNull(ca_state)]
+ReadSchema: struct<ca_address_sk:int,ca_county:string,ca_state:string>
+
+(2) ColumnarToRow [codegen id : 2]
+Input [3]: [ca_address_sk#1, ca_county#2, ca_state#3]
+
+(3) Filter [codegen id : 2]
+Input [3]: [ca_address_sk#1, ca_county#2, ca_state#3]
+Condition : ((isnotnull(ca_address_sk#1) AND isnotnull(ca_county#2)) AND isnotnull(ca_state#3))
+
+(4) Scan parquet default.store
+Output [2]: [s_county#4, s_state#5]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/store]
+PushedFilters: [IsNotNull(s_county), IsNotNull(s_state)]
+ReadSchema: struct<s_county:string,s_state:string>
+
+(5) ColumnarToRow [codegen id : 1]
+Input [2]: [s_county#4, s_state#5]
+
+(6) Filter [codegen id : 1]
+Input [2]: [s_county#4, s_state#5]
+Condition : (isnotnull(s_county#4) AND isnotnull(s_state#5))
+
+(7) BroadcastExchange
+Input [2]: [s_county#4, s_state#5]
+Arguments: HashedRelationBroadcastMode(List(input[0, string, false], input[1, string, false]),false), [id=#6]
+
+(8) BroadcastHashJoin [codegen id : 2]
+Left keys [2]: [ca_county#2, ca_state#3]
+Right keys [2]: [s_county#4, s_state#5]
+Join condition: None
+
+(9) Project [codegen id : 2]
+Output [1]: [ca_address_sk#1]
+Input [5]: [ca_address_sk#1, ca_county#2, ca_state#3, s_county#4, s_state#5]
+
+(10) BroadcastExchange
+Input [1]: [ca_address_sk#1]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7]
+
+(11) Scan parquet default.catalog_sales
+Output [3]: [cs_sold_date_sk#8, cs_bill_customer_sk#9, cs_item_sk#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_sales]
 PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_date_sk), IsNotNull(cs_bill_customer_sk)]
 ReadSchema: struct<cs_sold_date_sk:int,cs_bill_customer_sk:int,cs_item_sk:int>
 
-(2) ColumnarToRow [codegen id : 1]
-Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3]
+(12) ColumnarToRow [codegen id : 3]
+Input [3]: [cs_sold_date_sk#8, cs_bill_customer_sk#9, cs_item_sk#10]
 
-(3) Filter [codegen id : 1]
-Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3]
-Condition : ((isnotnull(cs_item_sk#3) AND isnotnull(cs_sold_date_sk#1)) AND isnotnull(cs_bill_customer_sk#2))
+(13) Filter [codegen id : 3]
+Input [3]: [cs_sold_date_sk#8, cs_bill_customer_sk#9, cs_item_sk#10]
+Condition : ((isnotnull(cs_item_sk#10) AND isnotnull(cs_sold_date_sk#8)) AND isnotnull(cs_bill_customer_sk#9))
 
-(4) Project [codegen id : 1]
-Output [3]: [cs_sold_date_sk#1 AS sold_date_sk#4, cs_bill_customer_sk#2 AS customer_sk#5, cs_item_sk#3 AS item_sk#6]
-Input [3]: [cs_sold_date_sk#1, cs_bill_customer_sk#2, cs_item_sk#3]
+(14) Project [codegen id : 3]
+Output [3]: [cs_sold_date_sk#8 AS sold_date_sk#11, cs_bill_customer_sk#9 AS customer_sk#12, cs_item_sk#10 AS item_sk#13]
+Input [3]: [cs_sold_date_sk#8, cs_bill_customer_sk#9, cs_item_sk#10]
 
-(5) Scan parquet default.web_sales
-Output [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9]
+(15) Scan parquet default.web_sales
+Output [3]: [ws_sold_date_sk#14, ws_item_sk#15, ws_bill_customer_sk#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
 PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_date_sk), IsNotNull(ws_bill_customer_sk)]
 ReadSchema: struct<ws_sold_date_sk:int,ws_item_sk:int,ws_bill_customer_sk:int>
 
-(6) ColumnarToRow [codegen id : 2]
-Input [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9]
+(16) ColumnarToRow [codegen id : 4]
+Input [3]: [ws_sold_date_sk#14, ws_item_sk#15, ws_bill_customer_sk#16]
 
-(7) Filter [codegen id : 2]
-Input [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9]
-Condition : ((isnotnull(ws_item_sk#8) AND isnotnull(ws_sold_date_sk#7)) AND isnotnull(ws_bill_customer_sk#9))
+(17) Filter [codegen id : 4]
+Input [3]: [ws_sold_date_sk#14, ws_item_sk#15, ws_bill_customer_sk#16]
+Condition : ((isnotnull(ws_item_sk#15) AND isnotnull(ws_sold_date_sk#14)) AND isnotnull(ws_bill_customer_sk#16))
 
-(8) Project [codegen id : 2]
-Output [3]: [ws_sold_date_sk#7 AS sold_date_sk#10, ws_bill_customer_sk#9 AS customer_sk#11, ws_item_sk#8 AS item_sk#12]
-Input [3]: [ws_sold_date_sk#7, ws_item_sk#8, ws_bill_customer_sk#9]
+(18) Project [codegen id : 4]
+Output [3]: [ws_sold_date_sk#14 AS sold_date_sk#17, ws_bill_customer_sk#16 AS customer_sk#18, ws_item_sk#15 AS item_sk#19]
+Input [3]: [ws_sold_date_sk#14, ws_item_sk#15, ws_bill_customer_sk#16]
 
-(9) Union
+(19) Union
 
-(10) Scan parquet default.item
-Output [3]: [i_item_sk#13, i_class#14, i_category#15]
+(20) Scan parquet default.date_dim
+Output [3]: [d_date_sk#20, d_year#21, d_moy#22]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/item]
-PushedFilters: [IsNotNull(i_category), IsNotNull(i_class), EqualTo(i_category,Women), EqualTo(i_class,maternity), IsNotNull(i_item_sk)]
-ReadSchema: struct<i_item_sk:int,i_class:string,i_category:string>
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,12), EqualTo(d_year,1998), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
-(11) ColumnarToRow [codegen id : 3]
-Input [3]: [i_item_sk#13, i_class#14, i_category#15]
+(21) ColumnarToRow [codegen id : 5]
+Input [3]: [d_date_sk#20, d_year#21, d_moy#22]
 
-(12) Filter [codegen id : 3]
-Input [3]: [i_item_sk#13, i_class#14, i_category#15]
-Condition : ((((isnotnull(i_category#15) AND isnotnull(i_class#14)) AND (i_category#15 = Women)) AND (i_class#14 = maternity)) AND isnotnull(i_item_sk#13))
+(22) Filter [codegen id : 5]
+Input [3]: [d_date_sk#20, d_year#21, d_moy#22]
+Condition : ((((isnotnull(d_moy#22) AND isnotnull(d_year#21)) AND (d_moy#22 = 12)) AND (d_year#21 = 1998)) AND isnotnull(d_date_sk#20))
 
-(13) Project [codegen id : 3]
-Output [1]: [i_item_sk#13]
-Input [3]: [i_item_sk#13, i_class#14, i_category#15]
+(23) Project [codegen id : 5]
+Output [1]: [d_date_sk#20]
+Input [3]: [d_date_sk#20, d_year#21, d_moy#22]
 
-(14) BroadcastExchange
-Input [1]: [i_item_sk#13]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16]
+(24) BroadcastExchange
+Input [1]: [d_date_sk#20]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23]
 
-(15) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [item_sk#6]
-Right keys [1]: [i_item_sk#13]
+(25) BroadcastHashJoin [codegen id : 7]
+Left keys [1]: [sold_date_sk#11]
+Right keys [1]: [d_date_sk#20]
 Join condition: None
 
-(16) Project [codegen id : 5]
-Output [2]: [sold_date_sk#4, customer_sk#5]
-Input [4]: [sold_date_sk#4, customer_sk#5, item_sk#6, i_item_sk#13]
+(26) Project [codegen id : 7]
+Output [2]: [customer_sk#12, item_sk#13]
+Input [4]: [sold_date_sk#11, customer_sk#12, item_sk#13, d_date_sk#20]
 
-(17) Scan parquet default.date_dim
-Output [3]: [d_date_sk#17, d_year#18, d_moy#19]
+(27) Scan parquet default.item
+Output [3]: [i_item_sk#24, i_class#25, i_category#26]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,12), EqualTo(d_year,1998), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
+Location [not included in comparison]/{warehouse_dir}/item]
+PushedFilters: [IsNotNull(i_category), IsNotNull(i_class), EqualTo(i_category,Women), EqualTo(i_class,maternity), IsNotNull(i_item_sk)]
+ReadSchema: struct<i_item_sk:int,i_class:string,i_category:string>
 
-(18) ColumnarToRow [codegen id : 4]
-Input [3]: [d_date_sk#17, d_year#18, d_moy#19]
+(28) ColumnarToRow [codegen id : 6]
+Input [3]: [i_item_sk#24, i_class#25, i_category#26]
 
-(19) Filter [codegen id : 4]
-Input [3]: [d_date_sk#17, d_year#18, d_moy#19]
-Condition : ((((isnotnull(d_moy#19) AND isnotnull(d_year#18)) AND (d_moy#19 = 12)) AND (d_year#18 = 1998)) AND isnotnull(d_date_sk#17))
+(29) Filter [codegen id : 6]
+Input [3]: [i_item_sk#24, i_class#25, i_category#26]
+Condition : ((((isnotnull(i_category#26) AND isnotnull(i_class#25)) AND (i_category#26 = Women)) AND (i_class#25 = maternity)) AND isnotnull(i_item_sk#24))
 
-(20) Project [codegen id : 4]
-Output [1]: [d_date_sk#17]
-Input [3]: [d_date_sk#17, d_year#18, d_moy#19]
+(30) Project [codegen id : 6]
+Output [1]: [i_item_sk#24]
+Input [3]: [i_item_sk#24, i_class#25, i_category#26]
 
-(21) BroadcastExchange
-Input [1]: [d_date_sk#17]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20]
+(31) BroadcastExchange
+Input [1]: [i_item_sk#24]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27]
 
-(22) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [sold_date_sk#4]
-Right keys [1]: [d_date_sk#17]
+(32) BroadcastHashJoin [codegen id : 7]
+Left keys [1]: [item_sk#13]
+Right keys [1]: [i_item_sk#24]
 Join condition: None
 
-(23) Project [codegen id : 5]
-Output [1]: [customer_sk#5]
-Input [3]: [sold_date_sk#4, customer_sk#5, d_date_sk#17]
+(33) Project [codegen id : 7]
+Output [1]: [customer_sk#12]
+Input [3]: [customer_sk#12, item_sk#13, i_item_sk#24]
 
-(24) Exchange
-Input [1]: [customer_sk#5]
-Arguments: hashpartitioning(customer_sk#5, 5), true, [id=#21]
+(34) Exchange
+Input [1]: [customer_sk#12]
+Arguments: hashpartitioning(customer_sk#12, 5), ENSURE_REQUIREMENTS, [id=#28]
 
-(25) Sort [codegen id : 6]
-Input [1]: [customer_sk#5]
-Arguments: [customer_sk#5 ASC NULLS FIRST], false, 0
+(35) Sort [codegen id : 8]
+Input [1]: [customer_sk#12]
+Arguments: [customer_sk#12 ASC NULLS FIRST], false, 0
 
-(26) Scan parquet default.customer
-Output [2]: [c_customer_sk#22, c_current_addr_sk#23]
+(36) Scan parquet default.customer
+Output [2]: [c_customer_sk#29, c_current_addr_sk#30]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)]
 ReadSchema: struct<c_customer_sk:int,c_current_addr_sk:int>
 
-(27) ColumnarToRow [codegen id : 7]
-Input [2]: [c_customer_sk#22, c_current_addr_sk#23]
+(37) ColumnarToRow [codegen id : 9]
+Input [2]: [c_customer_sk#29, c_current_addr_sk#30]
 
-(28) Filter [codegen id : 7]
-Input [2]: [c_customer_sk#22, c_current_addr_sk#23]
-Condition : (isnotnull(c_customer_sk#22) AND isnotnull(c_current_addr_sk#23))
+(38) Filter [codegen id : 9]
+Input [2]: [c_customer_sk#29, c_current_addr_sk#30]
+Condition : (isnotnull(c_customer_sk#29) AND isnotnull(c_current_addr_sk#30))
 
-(29) Exchange
-Input [2]: [c_customer_sk#22, c_current_addr_sk#23]
-Arguments: hashpartitioning(c_customer_sk#22, 5), true, [id=#24]
+(39) Exchange
+Input [2]: [c_customer_sk#29, c_current_addr_sk#30]
+Arguments: hashpartitioning(c_customer_sk#29, 5), ENSURE_REQUIREMENTS, [id=#31]
 
-(30) Sort [codegen id : 8]
-Input [2]: [c_customer_sk#22, c_current_addr_sk#23]
-Arguments: [c_customer_sk#22 ASC NULLS FIRST], false, 0
+(40) Sort [codegen id : 10]
+Input [2]: [c_customer_sk#29, c_current_addr_sk#30]
+Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
 
-(31) SortMergeJoin [codegen id : 9]
-Left keys [1]: [customer_sk#5]
-Right keys [1]: [c_customer_sk#22]
+(41) SortMergeJoin
+Left keys [1]: [customer_sk#12]
+Right keys [1]: [c_customer_sk#29]
 Join condition: None
 
-(32) Project [codegen id : 9]
-Output [2]: [c_customer_sk#22, c_current_addr_sk#23]
-Input [3]: [customer_sk#5, c_customer_sk#22, c_current_addr_sk#23]
+(42) Project
+Output [2]: [c_customer_sk#29, c_current_addr_sk#30]
+Input [3]: [customer_sk#12, c_customer_sk#29, c_current_addr_sk#30]
 
-(33) HashAggregate [codegen id : 9]
-Input [2]: [c_customer_sk#22, c_current_addr_sk#23]
-Keys [2]: [c_customer_sk#22, c_current_addr_sk#23]
+(43) HashAggregate
+Input [2]: [c_customer_sk#29, c_current_addr_sk#30]
+Keys [2]: [c_customer_sk#29, c_current_addr_sk#30]
 Functions: []
 Aggregate Attributes: []
-Results [2]: [c_customer_sk#22, c_current_addr_sk#23]
+Results [2]: [c_customer_sk#29, c_current_addr_sk#30]
 
-(34) HashAggregate [codegen id : 9]
-Input [2]: [c_customer_sk#22, c_current_addr_sk#23]
-Keys [2]: [c_customer_sk#22, c_current_addr_sk#23]
+(44) HashAggregate
+Input [2]: [c_customer_sk#29, c_current_addr_sk#30]
+Keys [2]: [c_customer_sk#29, c_current_addr_sk#30]
 Functions: []
 Aggregate Attributes: []
-Results [2]: [c_customer_sk#22, c_current_addr_sk#23]
+Results [2]: [c_customer_sk#29, c_current_addr_sk#30]
 
-(35) Sort [codegen id : 9]
-Input [2]: [c_customer_sk#22, c_current_addr_sk#23]
-Arguments: [c_customer_sk#22 ASC NULLS FIRST], false, 0
+(45) BroadcastHashJoin [codegen id : 11]
+Left keys [1]: [ca_address_sk#1]
+Right keys [1]: [c_current_addr_sk#30]
+Join condition: None
 
-(36) Scan parquet default.store_sales
-Output [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27]
+(46) Project [codegen id : 11]
+Output [1]: [c_customer_sk#29]
+Input [3]: [ca_address_sk#1, c_customer_sk#29, c_current_addr_sk#30]
+
+(47) Sort [codegen id : 11]
+Input [1]: [c_customer_sk#29]
+Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
+
+(48) Scan parquet default.store_sales
+Output [3]: [ss_sold_date_sk#32, ss_customer_sk#33, ss_ext_sales_price#34]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
 PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_sold_date_sk)]
 ReadSchema: struct<ss_sold_date_sk:int,ss_customer_sk:int,ss_ext_sales_price:decimal(7,2)>
 
-(37) ColumnarToRow [codegen id : 10]
-Input [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27]
-
-(38) Filter [codegen id : 10]
-Input [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27]
-Condition : (isnotnull(ss_customer_sk#26) AND isnotnull(ss_sold_date_sk#25))
-
-(39) Exchange
-Input [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27]
-Arguments: hashpartitioning(ss_customer_sk#26, 5), true, [id=#28]
-
-(40) Sort [codegen id : 11]
-Input [3]: [ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27]
-Arguments: [ss_customer_sk#26 ASC NULLS FIRST], false, 0
-
-(41) SortMergeJoin [codegen id : 12]
-Left keys [1]: [c_customer_sk#22]
-Right keys [1]: [ss_customer_sk#26]
-Join condition: None
-
-(42) Project [codegen id : 12]
-Output [4]: [c_customer_sk#22, c_current_addr_sk#23, ss_sold_date_sk#25, ss_ext_sales_price#27]
-Input [5]: [c_customer_sk#22, c_current_addr_sk#23, ss_sold_date_sk#25, ss_customer_sk#26, ss_ext_sales_price#27]
+(49) ColumnarToRow [codegen id : 13]
+Input [3]: [ss_sold_date_sk#32, ss_customer_sk#33, ss_ext_sales_price#34]
 
-(43) Exchange
-Input [4]: [c_customer_sk#22, c_current_addr_sk#23, ss_sold_date_sk#25, ss_ext_sales_price#27]
-Arguments: hashpartitioning(c_current_addr_sk#23, 5), true, [id=#29]
+(50) Filter [codegen id : 13]
+Input [3]: [ss_sold_date_sk#32, ss_customer_sk#33, ss_ext_sales_price#34]
+Condition : (isnotnull(ss_customer_sk#33) AND isnotnull(ss_sold_date_sk#32))
 
-(44) Sort [codegen id : 13]
-Input [4]: [c_customer_sk#22, c_current_addr_sk#23, ss_sold_date_sk#25, ss_ext_sales_price#27]
-Arguments: [c_current_addr_sk#23 ASC NULLS FIRST], false, 0
-
-(45) Scan parquet default.customer_address
-Output [3]: [ca_address_sk#30, ca_county#31, ca_state#32]
+(51) Scan parquet default.date_dim
+Output [2]: [d_date_sk#20, d_month_seq#35]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer_address]
-PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county), IsNotNull(ca_state)]
-ReadSchema: struct<ca_address_sk:int,ca_county:string,ca_state:string>
-
-(46) ColumnarToRow [codegen id : 14]
-Input [3]: [ca_address_sk#30, ca_county#31, ca_state#32]
-
-(47) Filter [codegen id : 14]
-Input [3]: [ca_address_sk#30, ca_county#31, ca_state#32]
-Condition : ((isnotnull(ca_address_sk#30) AND isnotnull(ca_county#31)) AND isnotnull(ca_state#32))
-
-(48) Exchange
-Input [3]: [ca_address_sk#30, ca_county#31, ca_state#32]
-Arguments: hashpartitioning(ca_address_sk#30, 5), true, [id=#33]
-
-(49) Sort [codegen id : 15]
-Input [3]: [ca_address_sk#30, ca_county#31, ca_state#32]
-Arguments: [ca_address_sk#30 ASC NULLS FIRST], false, 0
-
-(50) SortMergeJoin [codegen id : 18]
-Left keys [1]: [c_current_addr_sk#23]
-Right keys [1]: [ca_address_sk#30]
-Join condition: None
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_month_seq:int>
 
-(51) Project [codegen id : 18]
-Output [5]: [c_customer_sk#22, ss_sold_date_sk#25, ss_ext_sales_price#27, ca_county#31, ca_state#32]
-Input [7]: [c_customer_sk#22, c_current_addr_sk#23, ss_sold_date_sk#25, ss_ext_sales_price#27, ca_address_sk#30, ca_county#31, ca_state#32]
+(52) ColumnarToRow [codegen id : 12]
+Input [2]: [d_date_sk#20, d_month_seq#35]
 
-(52) Scan parquet default.store
-Output [2]: [s_county#34, s_state#35]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/store]
-PushedFilters: [IsNotNull(s_county), IsNotNull(s_state)]
-ReadSchema: struct<s_county:string,s_state:string>
+(53) Filter [codegen id : 12]
+Input [2]: [d_date_sk#20, d_month_seq#35]
+Condition : (((isnotnull(d_month_seq#35) AND (d_month_seq#35 >= Subquery scalar-subquery#36, [id=#37])) AND (d_month_seq#35 <= Subquery scalar-subquery#38, [id=#39])) AND isnotnull(d_date_sk#20))
 
-(53) ColumnarToRow [codegen id : 16]
-Input [2]: [s_county#34, s_state#35]
-
-(54) Filter [codegen id : 16]
-Input [2]: [s_county#34, s_state#35]
-Condition : (isnotnull(s_county#34) AND isnotnull(s_state#35))
+(54) Project [codegen id : 12]
+Output [1]: [d_date_sk#20]
+Input [2]: [d_date_sk#20, d_month_seq#35]
 
 (55) BroadcastExchange
-Input [2]: [s_county#34, s_state#35]
-Arguments: HashedRelationBroadcastMode(List(input[0, string, false], input[1, string, false]),false), [id=#36]
+Input [1]: [d_date_sk#20]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#40]
 
-(56) BroadcastHashJoin [codegen id : 18]
-Left keys [2]: [ca_county#31, ca_state#32]
-Right keys [2]: [s_county#34, s_state#35]
+(56) BroadcastHashJoin [codegen id : 13]
+Left keys [1]: [ss_sold_date_sk#32]
+Right keys [1]: [d_date_sk#20]
 Join condition: None
 
-(57) Project [codegen id : 18]
-Output [3]: [c_customer_sk#22, ss_sold_date_sk#25, ss_ext_sales_price#27]
-Input [7]: [c_customer_sk#22, ss_sold_date_sk#25, ss_ext_sales_price#27, ca_county#31, ca_state#32, s_county#34, s_state#35]
-
-(58) Scan parquet default.date_dim
-Output [2]: [d_date_sk#17, d_month_seq#37]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_month_seq:int>
-
-(59) ColumnarToRow [codegen id : 17]
-Input [2]: [d_date_sk#17, d_month_seq#37]
-
-(60) Filter [codegen id : 17]
-Input [2]: [d_date_sk#17, d_month_seq#37]
-Condition : (((isnotnull(d_month_seq#37) AND (d_month_seq#37 >= Subquery scalar-subquery#38, [id=#39])) AND (d_month_seq#37 <= Subquery scalar-subquery#40, [id=#41])) AND isnotnull(d_date_sk#17))
+(57) Project [codegen id : 13]
+Output [2]: [ss_customer_sk#33, ss_ext_sales_price#34]
+Input [4]: [ss_sold_date_sk#32, ss_customer_sk#33, ss_ext_sales_price#34, d_date_sk#20]
 
-(61) Project [codegen id : 17]
-Output [1]: [d_date_sk#17]
-Input [2]: [d_date_sk#17, d_month_seq#37]
+(58) Exchange
+Input [2]: [ss_customer_sk#33, ss_ext_sales_price#34]
+Arguments: hashpartitioning(ss_customer_sk#33, 5), ENSURE_REQUIREMENTS, [id=#41]
 
-(62) BroadcastExchange
-Input [1]: [d_date_sk#17]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#42]
+(59) Sort [codegen id : 14]
+Input [2]: [ss_customer_sk#33, ss_ext_sales_price#34]
+Arguments: [ss_customer_sk#33 ASC NULLS FIRST], false, 0
 
-(63) BroadcastHashJoin [codegen id : 18]
-Left keys [1]: [ss_sold_date_sk#25]
-Right keys [1]: [d_date_sk#17]
+(60) SortMergeJoin [codegen id : 15]
+Left keys [1]: [c_customer_sk#29]
+Right keys [1]: [ss_customer_sk#33]
 Join condition: None
 
-(64) Project [codegen id : 18]
-Output [2]: [c_customer_sk#22, ss_ext_sales_price#27]
-Input [4]: [c_customer_sk#22, ss_sold_date_sk#25, ss_ext_sales_price#27, d_date_sk#17]
-
-(65) HashAggregate [codegen id : 18]
-Input [2]: [c_customer_sk#22, ss_ext_sales_price#27]
-Keys [1]: [c_customer_sk#22]
-Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#27))]
-Aggregate Attributes [1]: [sum#43]
-Results [2]: [c_customer_sk#22, sum#44]
-
-(66) Exchange
-Input [2]: [c_customer_sk#22, sum#44]
-Arguments: hashpartitioning(c_customer_sk#22, 5), true, [id=#45]
-
-(67) HashAggregate [codegen id : 19]
-Input [2]: [c_customer_sk#22, sum#44]
-Keys [1]: [c_customer_sk#22]
-Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#27))]
-Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#27))#46]
-Results [1]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#27))#46,17,2)) / 50.00), DecimalType(21,6), true) as int) AS segment#47]
-
-(68) HashAggregate [codegen id : 19]
-Input [1]: [segment#47]
-Keys [1]: [segment#47]
+(61) Project [codegen id : 15]
+Output [2]: [c_customer_sk#29, ss_ext_sales_price#34]
+Input [3]: [c_customer_sk#29, ss_customer_sk#33, ss_ext_sales_price#34]
+
+(62) HashAggregate [codegen id : 15]
+Input [2]: [c_customer_sk#29, ss_ext_sales_price#34]
+Keys [1]: [c_customer_sk#29]
+Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#34))]
+Aggregate Attributes [1]: [sum#42]
+Results [2]: [c_customer_sk#29, sum#43]
+
+(63) HashAggregate [codegen id : 15]
+Input [2]: [c_customer_sk#29, sum#43]
+Keys [1]: [c_customer_sk#29]
+Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#34))]
+Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#34))#44]
+Results [1]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#34))#44,17,2)) / 50.00), DecimalType(21,6), true) as int) AS segment#45]
+
+(64) HashAggregate [codegen id : 15]
+Input [1]: [segment#45]
+Keys [1]: [segment#45]
 Functions [1]: [partial_count(1)]
-Aggregate Attributes [1]: [count#48]
-Results [2]: [segment#47, count#49]
+Aggregate Attributes [1]: [count#46]
+Results [2]: [segment#45, count#47]
 
-(69) Exchange
-Input [2]: [segment#47, count#49]
-Arguments: hashpartitioning(segment#47, 5), true, [id=#50]
+(65) Exchange
+Input [2]: [segment#45, count#47]
+Arguments: hashpartitioning(segment#45, 5), ENSURE_REQUIREMENTS, [id=#48]
 
-(70) HashAggregate [codegen id : 20]
-Input [2]: [segment#47, count#49]
-Keys [1]: [segment#47]
+(66) HashAggregate [codegen id : 16]
+Input [2]: [segment#45, count#47]
+Keys [1]: [segment#45]
 Functions [1]: [count(1)]
-Aggregate Attributes [1]: [count(1)#51]
-Results [3]: [segment#47, count(1)#51 AS num_customers#52, (segment#47 * 50) AS segment_base#53]
+Aggregate Attributes [1]: [count(1)#49]
+Results [3]: [segment#45, count(1)#49 AS num_customers#50, (segment#45 * 50) AS segment_base#51]
 
-(71) TakeOrderedAndProject
-Input [3]: [segment#47, num_customers#52, segment_base#53]
-Arguments: 100, [segment#47 ASC NULLS FIRST, num_customers#52 ASC NULLS FIRST], [segment#47, num_customers#52, segment_base#53]
+(67) TakeOrderedAndProject
+Input [3]: [segment#45, num_customers#50, segment_base#51]
+Arguments: 100, [segment#45 ASC NULLS FIRST, num_customers#50 ASC NULLS FIRST], [segment#45, num_customers#50, segment_base#51]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 60 Hosting Expression = Subquery scalar-subquery#38, [id=#39]
-* HashAggregate (78)
-+- Exchange (77)
-   +- * HashAggregate (76)
-      +- * Project (75)
-         +- * Filter (74)
-            +- * ColumnarToRow (73)
-               +- Scan parquet default.date_dim (72)
+Subquery:1 Hosting operator id = 53 Hosting Expression = Subquery scalar-subquery#36, [id=#37]
+* HashAggregate (74)
++- Exchange (73)
+   +- * HashAggregate (72)
+      +- * Project (71)
+         +- * Filter (70)
+            +- * ColumnarToRow (69)
+               +- Scan parquet default.date_dim (68)
 
 
-(72) Scan parquet default.date_dim
-Output [3]: [d_month_seq#37, d_year#18, d_moy#19]
+(68) Scan parquet default.date_dim
+Output [3]: [d_month_seq#35, d_year#21, d_moy#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)]
 ReadSchema: struct<d_month_seq:int,d_year:int,d_moy:int>
 
-(73) ColumnarToRow [codegen id : 1]
-Input [3]: [d_month_seq#37, d_year#18, d_moy#19]
+(69) ColumnarToRow [codegen id : 1]
+Input [3]: [d_month_seq#35, d_year#21, d_moy#22]
 
-(74) Filter [codegen id : 1]
-Input [3]: [d_month_seq#37, d_year#18, d_moy#19]
-Condition : (((isnotnull(d_year#18) AND isnotnull(d_moy#19)) AND (d_year#18 = 1998)) AND (d_moy#19 = 12))
+(70) Filter [codegen id : 1]
+Input [3]: [d_month_seq#35, d_year#21, d_moy#22]
+Condition : (((isnotnull(d_year#21) AND isnotnull(d_moy#22)) AND (d_year#21 = 1998)) AND (d_moy#22 = 12))
 
-(75) Project [codegen id : 1]
-Output [1]: [(d_month_seq#37 + 1) AS (d_month_seq + 1)#54]
-Input [3]: [d_month_seq#37, d_year#18, d_moy#19]
+(71) Project [codegen id : 1]
+Output [1]: [(d_month_seq#35 + 1) AS (d_month_seq + 1)#52]
+Input [3]: [d_month_seq#35, d_year#21, d_moy#22]
 
-(76) HashAggregate [codegen id : 1]
-Input [1]: [(d_month_seq + 1)#54]
-Keys [1]: [(d_month_seq + 1)#54]
+(72) HashAggregate [codegen id : 1]
+Input [1]: [(d_month_seq + 1)#52]
+Keys [1]: [(d_month_seq + 1)#52]
 Functions: []
 Aggregate Attributes: []
-Results [1]: [(d_month_seq + 1)#54]
+Results [1]: [(d_month_seq + 1)#52]
 
-(77) Exchange
-Input [1]: [(d_month_seq + 1)#54]
-Arguments: hashpartitioning((d_month_seq + 1)#54, 5), true, [id=#55]
+(73) Exchange
+Input [1]: [(d_month_seq + 1)#52]
+Arguments: hashpartitioning((d_month_seq + 1)#52, 5), ENSURE_REQUIREMENTS, [id=#53]
 
-(78) HashAggregate [codegen id : 2]
-Input [1]: [(d_month_seq + 1)#54]
-Keys [1]: [(d_month_seq + 1)#54]
+(74) HashAggregate [codegen id : 2]
+Input [1]: [(d_month_seq + 1)#52]
+Keys [1]: [(d_month_seq + 1)#52]
 Functions: []
 Aggregate Attributes: []
-Results [1]: [(d_month_seq + 1)#54]
+Results [1]: [(d_month_seq + 1)#52]
 
-Subquery:2 Hosting operator id = 60 Hosting Expression = Subquery scalar-subquery#40, [id=#41]
-* HashAggregate (85)
-+- Exchange (84)
-   +- * HashAggregate (83)
-      +- * Project (82)
-         +- * Filter (81)
-            +- * ColumnarToRow (80)
-               +- Scan parquet default.date_dim (79)
+Subquery:2 Hosting operator id = 53 Hosting Expression = Subquery scalar-subquery#38, [id=#39]
+* HashAggregate (81)
++- Exchange (80)
+   +- * HashAggregate (79)
+      +- * Project (78)
+         +- * Filter (77)
+            +- * ColumnarToRow (76)
+               +- Scan parquet default.date_dim (75)
 
 
-(79) Scan parquet default.date_dim
-Output [3]: [d_month_seq#37, d_year#18, d_moy#19]
+(75) Scan parquet default.date_dim
+Output [3]: [d_month_seq#35, d_year#21, d_moy#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)]
 ReadSchema: struct<d_month_seq:int,d_year:int,d_moy:int>
 
-(80) ColumnarToRow [codegen id : 1]
-Input [3]: [d_month_seq#37, d_year#18, d_moy#19]
+(76) ColumnarToRow [codegen id : 1]
+Input [3]: [d_month_seq#35, d_year#21, d_moy#22]
 
-(81) Filter [codegen id : 1]
-Input [3]: [d_month_seq#37, d_year#18, d_moy#19]
-Condition : (((isnotnull(d_year#18) AND isnotnull(d_moy#19)) AND (d_year#18 = 1998)) AND (d_moy#19 = 12))
+(77) Filter [codegen id : 1]
+Input [3]: [d_month_seq#35, d_year#21, d_moy#22]
+Condition : (((isnotnull(d_year#21) AND isnotnull(d_moy#22)) AND (d_year#21 = 1998)) AND (d_moy#22 = 12))
 
-(82) Project [codegen id : 1]
-Output [1]: [(d_month_seq#37 + 3) AS (d_month_seq + 3)#56]
-Input [3]: [d_month_seq#37, d_year#18, d_moy#19]
+(78) Project [codegen id : 1]
+Output [1]: [(d_month_seq#35 + 3) AS (d_month_seq + 3)#54]
+Input [3]: [d_month_seq#35, d_year#21, d_moy#22]
 
-(83) HashAggregate [codegen id : 1]
-Input [1]: [(d_month_seq + 3)#56]
-Keys [1]: [(d_month_seq + 3)#56]
+(79) HashAggregate [codegen id : 1]
+Input [1]: [(d_month_seq + 3)#54]
+Keys [1]: [(d_month_seq + 3)#54]
 Functions: []
 Aggregate Attributes: []
-Results [1]: [(d_month_seq + 3)#56]
+Results [1]: [(d_month_seq + 3)#54]
 
-(84) Exchange
-Input [1]: [(d_month_seq + 3)#56]
-Arguments: hashpartitioning((d_month_seq + 3)#56, 5), true, [id=#57]
+(80) Exchange
+Input [1]: [(d_month_seq + 3)#54]
+Arguments: hashpartitioning((d_month_seq + 3)#54, 5), ENSURE_REQUIREMENTS, [id=#55]
 
-(85) HashAggregate [codegen id : 2]
-Input [1]: [(d_month_seq + 3)#56]
-Keys [1]: [(d_month_seq + 3)#56]
+(81) HashAggregate [codegen id : 2]
+Input [1]: [(d_month_seq + 3)#54]
+Keys [1]: [(d_month_seq + 3)#54]
 Functions: []
 Aggregate Attributes: []
-Results [1]: [(d_month_seq + 3)#56]
+Results [1]: [(d_month_seq + 3)#54]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/simplified.txt
index cb7130f53c9a9..3b0622cbf9264 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/simplified.txt
@@ -1,142 +1,130 @@
 TakeOrderedAndProject [segment,num_customers,segment_base]
-  WholeStageCodegen (20)
+  WholeStageCodegen (16)
     HashAggregate [segment,count] [count(1),num_customers,segment_base,count]
       InputAdapter
         Exchange [segment] #1
-          WholeStageCodegen (19)
+          WholeStageCodegen (15)
             HashAggregate [segment] [count,count]
               HashAggregate [c_customer_sk,sum] [sum(UnscaledValue(ss_ext_sales_price)),segment,sum]
-                InputAdapter
-                  Exchange [c_customer_sk] #2
-                    WholeStageCodegen (18)
-                      HashAggregate [c_customer_sk,ss_ext_sales_price] [sum,sum]
-                        Project [c_customer_sk,ss_ext_sales_price]
-                          BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                            Project [c_customer_sk,ss_sold_date_sk,ss_ext_sales_price]
-                              BroadcastHashJoin [ca_county,ca_state,s_county,s_state]
-                                Project [c_customer_sk,ss_sold_date_sk,ss_ext_sales_price,ca_county,ca_state]
-                                  SortMergeJoin [c_current_addr_sk,ca_address_sk]
-                                    InputAdapter
-                                      WholeStageCodegen (13)
-                                        Sort [c_current_addr_sk]
+                HashAggregate [c_customer_sk,ss_ext_sales_price] [sum,sum]
+                  Project [c_customer_sk,ss_ext_sales_price]
+                    SortMergeJoin [c_customer_sk,ss_customer_sk]
+                      InputAdapter
+                        WholeStageCodegen (11)
+                          Sort [c_customer_sk]
+                            Project [c_customer_sk]
+                              BroadcastHashJoin [ca_address_sk,c_current_addr_sk]
+                                InputAdapter
+                                  BroadcastExchange #2
+                                    WholeStageCodegen (2)
+                                      Project [ca_address_sk]
+                                        BroadcastHashJoin [ca_county,ca_state,s_county,s_state]
+                                          Filter [ca_address_sk,ca_county,ca_state]
+                                            ColumnarToRow
+                                              InputAdapter
+                                                Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state]
                                           InputAdapter
-                                            Exchange [c_current_addr_sk] #3
-                                              WholeStageCodegen (12)
-                                                Project [c_customer_sk,c_current_addr_sk,ss_sold_date_sk,ss_ext_sales_price]
-                                                  SortMergeJoin [c_customer_sk,ss_customer_sk]
+                                            BroadcastExchange #3
+                                              WholeStageCodegen (1)
+                                                Filter [s_county,s_state]
+                                                  ColumnarToRow
                                                     InputAdapter
-                                                      WholeStageCodegen (9)
-                                                        Sort [c_customer_sk]
-                                                          HashAggregate [c_customer_sk,c_current_addr_sk]
-                                                            HashAggregate [c_customer_sk,c_current_addr_sk]
-                                                              Project [c_customer_sk,c_current_addr_sk]
-                                                                SortMergeJoin [customer_sk,c_customer_sk]
-                                                                  InputAdapter
-                                                                    WholeStageCodegen (6)
-                                                                      Sort [customer_sk]
+                                                      Scan parquet default.store [s_county,s_state]
+                                HashAggregate [c_customer_sk,c_current_addr_sk]
+                                  HashAggregate [c_customer_sk,c_current_addr_sk]
+                                    Project [c_customer_sk,c_current_addr_sk]
+                                      SortMergeJoin [customer_sk,c_customer_sk]
+                                        InputAdapter
+                                          WholeStageCodegen (8)
+                                            Sort [customer_sk]
+                                              InputAdapter
+                                                Exchange [customer_sk] #4
+                                                  WholeStageCodegen (7)
+                                                    Project [customer_sk]
+                                                      BroadcastHashJoin [item_sk,i_item_sk]
+                                                        Project [customer_sk,item_sk]
+                                                          BroadcastHashJoin [sold_date_sk,d_date_sk]
+                                                            InputAdapter
+                                                              Union
+                                                                WholeStageCodegen (3)
+                                                                  Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk]
+                                                                    Filter [cs_item_sk,cs_sold_date_sk,cs_bill_customer_sk]
+                                                                      ColumnarToRow
                                                                         InputAdapter
-                                                                          Exchange [customer_sk] #4
-                                                                            WholeStageCodegen (5)
-                                                                              Project [customer_sk]
-                                                                                BroadcastHashJoin [sold_date_sk,d_date_sk]
-                                                                                  Project [sold_date_sk,customer_sk]
-                                                                                    BroadcastHashJoin [item_sk,i_item_sk]
-                                                                                      InputAdapter
-                                                                                        Union
-                                                                                          WholeStageCodegen (1)
-                                                                                            Project [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk]
-                                                                                              Filter [cs_item_sk,cs_sold_date_sk,cs_bill_customer_sk]
-                                                                                                ColumnarToRow
-                                                                                                  InputAdapter
-                                                                                                    Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk]
-                                                                                          WholeStageCodegen (2)
-                                                                                            Project [ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk]
-                                                                                              Filter [ws_item_sk,ws_sold_date_sk,ws_bill_customer_sk]
-                                                                                                ColumnarToRow
-                                                                                                  InputAdapter
-                                                                                                    Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk]
-                                                                                      InputAdapter
-                                                                                        BroadcastExchange #5
-                                                                                          WholeStageCodegen (3)
-                                                                                            Project [i_item_sk]
-                                                                                              Filter [i_category,i_class,i_item_sk]
-                                                                                                ColumnarToRow
-                                                                                                  InputAdapter
-                                                                                                    Scan parquet default.item [i_item_sk,i_class,i_category]
-                                                                                  InputAdapter
-                                                                                    BroadcastExchange #6
-                                                                                      WholeStageCodegen (4)
-                                                                                        Project [d_date_sk]
-                                                                                          Filter [d_moy,d_year,d_date_sk]
-                                                                                            ColumnarToRow
-                                                                                              InputAdapter
-                                                                                                Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
-                                                                  InputAdapter
-                                                                    WholeStageCodegen (8)
-                                                                      Sort [c_customer_sk]
+                                                                          Scan parquet default.catalog_sales [cs_sold_date_sk,cs_bill_customer_sk,cs_item_sk]
+                                                                WholeStageCodegen (4)
+                                                                  Project [ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk]
+                                                                    Filter [ws_item_sk,ws_sold_date_sk,ws_bill_customer_sk]
+                                                                      ColumnarToRow
                                                                         InputAdapter
-                                                                          Exchange [c_customer_sk] #7
-                                                                            WholeStageCodegen (7)
-                                                                              Filter [c_customer_sk,c_current_addr_sk]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
-                                                    InputAdapter
-                                                      WholeStageCodegen (11)
-                                                        Sort [ss_customer_sk]
-                                                          InputAdapter
-                                                            Exchange [ss_customer_sk] #8
-                                                              WholeStageCodegen (10)
-                                                                Filter [ss_customer_sk,ss_sold_date_sk]
+                                                                          Scan parquet default.web_sales [ws_sold_date_sk,ws_item_sk,ws_bill_customer_sk]
+                                                            InputAdapter
+                                                              BroadcastExchange #5
+                                                                WholeStageCodegen (5)
+                                                                  Project [d_date_sk]
+                                                                    Filter [d_moy,d_year,d_date_sk]
+                                                                      ColumnarToRow
+                                                                        InputAdapter
+                                                                          Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                        InputAdapter
+                                                          BroadcastExchange #6
+                                                            WholeStageCodegen (6)
+                                                              Project [i_item_sk]
+                                                                Filter [i_category,i_class,i_item_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_ext_sales_price]
-                                    InputAdapter
-                                      WholeStageCodegen (15)
-                                        Sort [ca_address_sk]
-                                          InputAdapter
-                                            Exchange [ca_address_sk] #9
-                                              WholeStageCodegen (14)
-                                                Filter [ca_address_sk,ca_county,ca_state]
-                                                  ColumnarToRow
-                                                    InputAdapter
-                                                      Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state]
-                                InputAdapter
-                                  BroadcastExchange #10
-                                    WholeStageCodegen (16)
-                                      Filter [s_county,s_state]
+                                                                      Scan parquet default.item [i_item_sk,i_class,i_category]
+                                        InputAdapter
+                                          WholeStageCodegen (10)
+                                            Sort [c_customer_sk]
+                                              InputAdapter
+                                                Exchange [c_customer_sk] #7
+                                                  WholeStageCodegen (9)
+                                                    Filter [c_customer_sk,c_current_addr_sk]
+                                                      ColumnarToRow
+                                                        InputAdapter
+                                                          Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
+                      InputAdapter
+                        WholeStageCodegen (14)
+                          Sort [ss_customer_sk]
+                            InputAdapter
+                              Exchange [ss_customer_sk] #8
+                                WholeStageCodegen (13)
+                                  Project [ss_customer_sk,ss_ext_sales_price]
+                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                      Filter [ss_customer_sk,ss_sold_date_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.store [s_county,s_state]
-                            InputAdapter
-                              BroadcastExchange #11
-                                WholeStageCodegen (17)
-                                  Project [d_date_sk]
-                                    Filter [d_month_seq,d_date_sk]
-                                      Subquery #1
-                                        WholeStageCodegen (2)
-                                          HashAggregate [(d_month_seq + 1)]
-                                            InputAdapter
-                                              Exchange [(d_month_seq + 1)] #12
-                                                WholeStageCodegen (1)
-                                                  HashAggregate [(d_month_seq + 1)]
-                                                    Project [d_month_seq]
-                                                      Filter [d_year,d_moy]
-                                                        ColumnarToRow
-                                                          InputAdapter
-                                                            Scan parquet default.date_dim [d_month_seq,d_year,d_moy]
-                                      Subquery #2
-                                        WholeStageCodegen (2)
-                                          HashAggregate [(d_month_seq + 3)]
-                                            InputAdapter
-                                              Exchange [(d_month_seq + 3)] #13
-                                                WholeStageCodegen (1)
-                                                  HashAggregate [(d_month_seq + 3)]
-                                                    Project [d_month_seq]
-                                                      Filter [d_year,d_moy]
-                                                        ColumnarToRow
-                                                          InputAdapter
-                                                            Scan parquet default.date_dim [d_month_seq,d_year,d_moy]
-                                      ColumnarToRow
-                                        InputAdapter
-                                          Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                            Scan parquet default.store_sales [ss_sold_date_sk,ss_customer_sk,ss_ext_sales_price]
+                                      InputAdapter
+                                        BroadcastExchange #9
+                                          WholeStageCodegen (12)
+                                            Project [d_date_sk]
+                                              Filter [d_month_seq,d_date_sk]
+                                                Subquery #1
+                                                  WholeStageCodegen (2)
+                                                    HashAggregate [(d_month_seq + 1)]
+                                                      InputAdapter
+                                                        Exchange [(d_month_seq + 1)] #10
+                                                          WholeStageCodegen (1)
+                                                            HashAggregate [(d_month_seq + 1)]
+                                                              Project [d_month_seq]
+                                                                Filter [d_year,d_moy]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet default.date_dim [d_month_seq,d_year,d_moy]
+                                                Subquery #2
+                                                  WholeStageCodegen (2)
+                                                    HashAggregate [(d_month_seq + 3)]
+                                                      InputAdapter
+                                                        Exchange [(d_month_seq + 3)] #11
+                                                          WholeStageCodegen (1)
+                                                            HashAggregate [(d_month_seq + 3)]
+                                                              Project [d_month_seq]
+                                                                Filter [d_year,d_moy]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet default.date_dim [d_month_seq,d_year,d_moy]
+                                                ColumnarToRow
+                                                  InputAdapter
+                                                    Scan parquet default.date_dim [d_date_sk,d_month_seq]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt
index 432ef4db6b1eb..411cbf4809cd1 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt
@@ -13,8 +13,8 @@ TakeOrderedAndProject (94)
             :           :     +- * HashAggregate (23)
             :           :        +- * Project (22)
             :           :           +- * BroadcastHashJoin Inner BuildRight (21)
-            :           :              :- * Project (16)
-            :           :              :  +- * BroadcastHashJoin Inner BuildRight (15)
+            :           :              :- * Project (15)
+            :           :              :  +- * BroadcastHashJoin Inner BuildRight (14)
             :           :              :     :- Union (9)
             :           :              :     :  :- * Project (4)
             :           :              :     :  :  +- * Filter (3)
@@ -24,22 +24,22 @@ TakeOrderedAndProject (94)
             :           :              :     :     +- * Filter (7)
             :           :              :     :        +- * ColumnarToRow (6)
             :           :              :     :           +- Scan parquet default.store_returns (5)
-            :           :              :     +- BroadcastExchange (14)
-            :           :              :        +- * Project (13)
-            :           :              :           +- * Filter (12)
-            :           :              :              +- * ColumnarToRow (11)
-            :           :              :                 +- Scan parquet default.date_dim (10)
+            :           :              :     +- BroadcastExchange (13)
+            :           :              :        +- * Filter (12)
+            :           :              :           +- * ColumnarToRow (11)
+            :           :              :              +- Scan parquet default.store (10)
             :           :              +- BroadcastExchange (20)
-            :           :                 +- * Filter (19)
-            :           :                    +- * ColumnarToRow (18)
-            :           :                       +- Scan parquet default.store (17)
+            :           :                 +- * Project (19)
+            :           :                    +- * Filter (18)
+            :           :                       +- * ColumnarToRow (17)
+            :           :                          +- Scan parquet default.date_dim (16)
             :           :- * HashAggregate (46)
             :           :  +- Exchange (45)
             :           :     +- * HashAggregate (44)
             :           :        +- * Project (43)
             :           :           +- * BroadcastHashJoin Inner BuildRight (42)
-            :           :              :- * Project (37)
-            :           :              :  +- * BroadcastHashJoin Inner BuildRight (36)
+            :           :              :- * Project (40)
+            :           :              :  +- * BroadcastHashJoin Inner BuildRight (39)
             :           :              :     :- Union (34)
             :           :              :     :  :- * Project (29)
             :           :              :     :  :  +- * Filter (28)
@@ -49,18 +49,18 @@ TakeOrderedAndProject (94)
             :           :              :     :     +- * Filter (32)
             :           :              :     :        +- * ColumnarToRow (31)
             :           :              :     :           +- Scan parquet default.catalog_returns (30)
-            :           :              :     +- ReusedExchange (35)
-            :           :              +- BroadcastExchange (41)
-            :           :                 +- * Filter (40)
-            :           :                    +- * ColumnarToRow (39)
-            :           :                       +- Scan parquet default.catalog_page (38)
+            :           :              :     +- BroadcastExchange (38)
+            :           :              :        +- * Filter (37)
+            :           :              :           +- * ColumnarToRow (36)
+            :           :              :              +- Scan parquet default.catalog_page (35)
+            :           :              +- ReusedExchange (41)
             :           +- * HashAggregate (75)
             :              +- Exchange (74)
             :                 +- * HashAggregate (73)
             :                    +- * Project (72)
             :                       +- * BroadcastHashJoin Inner BuildRight (71)
-            :                          :- * Project (66)
-            :                          :  +- * BroadcastHashJoin Inner BuildRight (65)
+            :                          :- * Project (69)
+            :                          :  +- * BroadcastHashJoin Inner BuildRight (68)
             :                          :     :- Union (63)
             :                          :     :  :- * Project (50)
             :                          :     :  :  +- * Filter (49)
@@ -78,11 +78,11 @@ TakeOrderedAndProject (94)
             :                          :     :              +- * Filter (58)
             :                          :     :                 +- * ColumnarToRow (57)
             :                          :     :                    +- Scan parquet default.web_sales (56)
-            :                          :     +- ReusedExchange (64)
-            :                          +- BroadcastExchange (70)
-            :                             +- * Filter (69)
-            :                                +- * ColumnarToRow (68)
-            :                                   +- Scan parquet default.web_site (67)
+            :                          :     +- BroadcastExchange (67)
+            :                          :        +- * Filter (66)
+            :                          :           +- * ColumnarToRow (65)
+            :                          :              +- Scan parquet default.web_site (64)
+            :                          +- ReusedExchange (70)
             :- * HashAggregate (84)
             :  +- Exchange (83)
             :     +- * HashAggregate (82)
@@ -132,81 +132,81 @@ Input [4]: [sr_returned_date_sk#11, sr_store_sk#12, sr_return_amt#13, sr_net_los
 
 (9) Union
 
-(10) Scan parquet default.date_dim
-Output [2]: [d_date_sk#21, d_date#22]
+(10) Scan parquet default.store
+Output [2]: [s_store_sk#21, s_store_id#22]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-08-18), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_date:date>
+Location [not included in comparison]/{warehouse_dir}/store]
+PushedFilters: [IsNotNull(s_store_sk)]
+ReadSchema: struct<s_store_sk:int,s_store_id:string>
 
 (11) ColumnarToRow [codegen id : 3]
-Input [2]: [d_date_sk#21, d_date#22]
+Input [2]: [s_store_sk#21, s_store_id#22]
 
 (12) Filter [codegen id : 3]
-Input [2]: [d_date_sk#21, d_date#22]
-Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 10442)) AND (d_date#22 <= 10456)) AND isnotnull(d_date_sk#21))
+Input [2]: [s_store_sk#21, s_store_id#22]
+Condition : isnotnull(s_store_sk#21)
 
-(13) Project [codegen id : 3]
-Output [1]: [d_date_sk#21]
-Input [2]: [d_date_sk#21, d_date#22]
+(13) BroadcastExchange
+Input [2]: [s_store_sk#21, s_store_id#22]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23]
 
-(14) BroadcastExchange
-Input [1]: [d_date_sk#21]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23]
-
-(15) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [date_sk#6]
-Right keys [1]: [cast(d_date_sk#21 as bigint)]
+(14) BroadcastHashJoin [codegen id : 5]
+Left keys [1]: [store_sk#5]
+Right keys [1]: [cast(s_store_sk#21 as bigint)]
 Join condition: None
 
-(16) Project [codegen id : 5]
-Output [5]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10]
-Input [7]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, d_date_sk#21]
+(15) Project [codegen id : 5]
+Output [6]: [date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#22]
+Input [8]: [store_sk#5, date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_sk#21, s_store_id#22]
 
-(17) Scan parquet default.store
-Output [2]: [s_store_sk#24, s_store_id#25]
+(16) Scan parquet default.date_dim
+Output [2]: [d_date_sk#24, d_date#25]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/store]
-PushedFilters: [IsNotNull(s_store_sk)]
-ReadSchema: struct<s_store_sk:int,s_store_id:string>
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-08-18), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_date:date>
+
+(17) ColumnarToRow [codegen id : 4]
+Input [2]: [d_date_sk#24, d_date#25]
 
-(18) ColumnarToRow [codegen id : 4]
-Input [2]: [s_store_sk#24, s_store_id#25]
+(18) Filter [codegen id : 4]
+Input [2]: [d_date_sk#24, d_date#25]
+Condition : (((isnotnull(d_date#25) AND (d_date#25 >= 10442)) AND (d_date#25 <= 10456)) AND isnotnull(d_date_sk#24))
 
-(19) Filter [codegen id : 4]
-Input [2]: [s_store_sk#24, s_store_id#25]
-Condition : isnotnull(s_store_sk#24)
+(19) Project [codegen id : 4]
+Output [1]: [d_date_sk#24]
+Input [2]: [d_date_sk#24, d_date#25]
 
 (20) BroadcastExchange
-Input [2]: [s_store_sk#24, s_store_id#25]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26]
+Input [1]: [d_date_sk#24]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26]
 
 (21) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [store_sk#5]
-Right keys [1]: [cast(s_store_sk#24 as bigint)]
+Left keys [1]: [date_sk#6]
+Right keys [1]: [cast(d_date_sk#24 as bigint)]
 Join condition: None
 
 (22) Project [codegen id : 5]
-Output [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#25]
-Input [7]: [store_sk#5, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_sk#24, s_store_id#25]
+Output [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#22]
+Input [7]: [date_sk#6, sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#22, d_date_sk#24]
 
 (23) HashAggregate [codegen id : 5]
-Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#25]
-Keys [1]: [s_store_id#25]
+Input [5]: [sales_price#7, profit#8, return_amt#9, net_loss#10, s_store_id#22]
+Keys [1]: [s_store_id#22]
 Functions [4]: [partial_sum(UnscaledValue(sales_price#7)), partial_sum(UnscaledValue(return_amt#9)), partial_sum(UnscaledValue(profit#8)), partial_sum(UnscaledValue(net_loss#10))]
 Aggregate Attributes [4]: [sum#27, sum#28, sum#29, sum#30]
-Results [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34]
+Results [5]: [s_store_id#22, sum#31, sum#32, sum#33, sum#34]
 
 (24) Exchange
-Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34]
-Arguments: hashpartitioning(s_store_id#25, 5), ENSURE_REQUIREMENTS, [id=#35]
+Input [5]: [s_store_id#22, sum#31, sum#32, sum#33, sum#34]
+Arguments: hashpartitioning(s_store_id#22, 5), ENSURE_REQUIREMENTS, [id=#35]
 
 (25) HashAggregate [codegen id : 6]
-Input [5]: [s_store_id#25, sum#31, sum#32, sum#33, sum#34]
-Keys [1]: [s_store_id#25]
+Input [5]: [s_store_id#22, sum#31, sum#32, sum#33, sum#34]
+Keys [1]: [s_store_id#22]
 Functions [4]: [sum(UnscaledValue(sales_price#7)), sum(UnscaledValue(return_amt#9)), sum(UnscaledValue(profit#8)), sum(UnscaledValue(net_loss#10))]
 Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#7))#36, sum(UnscaledValue(return_amt#9))#37, sum(UnscaledValue(profit#8))#38, sum(UnscaledValue(net_loss#10))#39]
-Results [5]: [store channel AS channel#40, concat(store, s_store_id#25) AS id#41, MakeDecimal(sum(UnscaledValue(sales_price#7))#36,17,2) AS sales#42, MakeDecimal(sum(UnscaledValue(return_amt#9))#37,17,2) AS returns#43, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#8))#38,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#10))#39,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#44]
+Results [5]: [store channel AS channel#40, concat(store, s_store_id#22) AS id#41, MakeDecimal(sum(UnscaledValue(sales_price#7))#36,17,2) AS sales#42, MakeDecimal(sum(UnscaledValue(return_amt#9))#37,17,2) AS returns#43, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#8))#38,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#10))#39,17,2) as decimal(18,2)))), DecimalType(18,2), true) AS profit#44]
 
 (26) Scan parquet default.catalog_sales
 Output [4]: [cs_sold_date_sk#45, cs_catalog_page_sk#46, cs_ext_sales_price#47, cs_net_profit#48]
@@ -246,44 +246,44 @@ Input [4]: [cr_returned_date_sk#55, cr_catalog_page_sk#56, cr_return_amount#57,
 
 (34) Union
 
-(35) ReusedExchange [Reuses operator id: 14]
-Output [1]: [d_date_sk#21]
-
-(36) BroadcastHashJoin [codegen id : 11]
-Left keys [1]: [date_sk#50]
-Right keys [1]: [d_date_sk#21]
-Join condition: None
-
-(37) Project [codegen id : 11]
-Output [5]: [page_sk#49, sales_price#51, profit#52, return_amt#53, net_loss#54]
-Input [7]: [page_sk#49, date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, d_date_sk#21]
-
-(38) Scan parquet default.catalog_page
+(35) Scan parquet default.catalog_page
 Output [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_page]
 PushedFilters: [IsNotNull(cp_catalog_page_sk)]
 ReadSchema: struct<cp_catalog_page_sk:int,cp_catalog_page_id:string>
 
-(39) ColumnarToRow [codegen id : 10]
+(36) ColumnarToRow [codegen id : 9]
 Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66]
 
-(40) Filter [codegen id : 10]
+(37) Filter [codegen id : 9]
 Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66]
 Condition : isnotnull(cp_catalog_page_sk#65)
 
-(41) BroadcastExchange
+(38) BroadcastExchange
 Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#67]
 
-(42) BroadcastHashJoin [codegen id : 11]
+(39) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [page_sk#49]
 Right keys [1]: [cp_catalog_page_sk#65]
 Join condition: None
 
+(40) Project [codegen id : 11]
+Output [6]: [date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66]
+Input [8]: [page_sk#49, date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_sk#65, cp_catalog_page_id#66]
+
+(41) ReusedExchange [Reuses operator id: 20]
+Output [1]: [d_date_sk#24]
+
+(42) BroadcastHashJoin [codegen id : 11]
+Left keys [1]: [date_sk#50]
+Right keys [1]: [d_date_sk#24]
+Join condition: None
+
 (43) Project [codegen id : 11]
 Output [5]: [sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66]
-Input [7]: [page_sk#49, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_sk#65, cp_catalog_page_id#66]
+Input [7]: [date_sk#50, sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66, d_date_sk#24]
 
 (44) HashAggregate [codegen id : 11]
 Input [5]: [sales_price#51, profit#52, return_amt#53, net_loss#54, cp_catalog_page_id#66]
@@ -376,44 +376,44 @@ Input [8]: [wr_returned_date_sk#96, wr_item_sk#97, wr_order_number#98, wr_return
 
 (63) Union
 
-(64) ReusedExchange [Reuses operator id: 14]
-Output [1]: [d_date_sk#21]
-
-(65) BroadcastHashJoin [codegen id : 21]
-Left keys [1]: [date_sk#91]
-Right keys [1]: [cast(d_date_sk#21 as bigint)]
-Join condition: None
-
-(66) Project [codegen id : 21]
-Output [5]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95]
-Input [7]: [wsr_web_site_sk#90, date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, d_date_sk#21]
-
-(67) Scan parquet default.web_site
+(64) Scan parquet default.web_site
 Output [2]: [web_site_sk#111, web_site_id#112]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_site]
 PushedFilters: [IsNotNull(web_site_sk)]
 ReadSchema: struct<web_site_sk:int,web_site_id:string>
 
-(68) ColumnarToRow [codegen id : 20]
+(65) ColumnarToRow [codegen id : 19]
 Input [2]: [web_site_sk#111, web_site_id#112]
 
-(69) Filter [codegen id : 20]
+(66) Filter [codegen id : 19]
 Input [2]: [web_site_sk#111, web_site_id#112]
 Condition : isnotnull(web_site_sk#111)
 
-(70) BroadcastExchange
+(67) BroadcastExchange
 Input [2]: [web_site_sk#111, web_site_id#112]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#113]
 
-(71) BroadcastHashJoin [codegen id : 21]
+(68) BroadcastHashJoin [codegen id : 21]
 Left keys [1]: [wsr_web_site_sk#90]
 Right keys [1]: [web_site_sk#111]
 Join condition: None
 
+(69) Project [codegen id : 21]
+Output [6]: [date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112]
+Input [8]: [wsr_web_site_sk#90, date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_sk#111, web_site_id#112]
+
+(70) ReusedExchange [Reuses operator id: 20]
+Output [1]: [d_date_sk#24]
+
+(71) BroadcastHashJoin [codegen id : 21]
+Left keys [1]: [date_sk#91]
+Right keys [1]: [cast(d_date_sk#24 as bigint)]
+Join condition: None
+
 (72) Project [codegen id : 21]
 Output [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112]
-Input [7]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_sk#111, web_site_id#112]
+Input [7]: [date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112, d_date_sk#24]
 
 (73) HashAggregate [codegen id : 21]
 Input [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#112]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/simplified.txt
index 233af6d8cc813..8d1794b903178 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/simplified.txt
@@ -22,9 +22,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                           WholeStageCodegen (5)
                                             HashAggregate [s_store_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum]
                                               Project [sales_price,profit,return_amt,net_loss,s_store_id]
-                                                BroadcastHashJoin [store_sk,s_store_sk]
-                                                  Project [store_sk,sales_price,profit,return_amt,net_loss]
-                                                    BroadcastHashJoin [date_sk,d_date_sk]
+                                                BroadcastHashJoin [date_sk,d_date_sk]
+                                                  Project [date_sk,sales_price,profit,return_amt,net_loss,s_store_id]
+                                                    BroadcastHashJoin [store_sk,s_store_sk]
                                                       InputAdapter
                                                         Union
                                                           WholeStageCodegen (1)
@@ -42,18 +42,18 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                       InputAdapter
                                                         BroadcastExchange #4
                                                           WholeStageCodegen (3)
-                                                            Project [d_date_sk]
-                                                              Filter [d_date,d_date_sk]
-                                                                ColumnarToRow
-                                                                  InputAdapter
-                                                                    Scan parquet default.date_dim [d_date_sk,d_date]
+                                                            Filter [s_store_sk]
+                                                              ColumnarToRow
+                                                                InputAdapter
+                                                                  Scan parquet default.store [s_store_sk,s_store_id]
                                                   InputAdapter
                                                     BroadcastExchange #5
                                                       WholeStageCodegen (4)
-                                                        Filter [s_store_sk]
-                                                          ColumnarToRow
-                                                            InputAdapter
-                                                              Scan parquet default.store [s_store_sk,s_store_id]
+                                                        Project [d_date_sk]
+                                                          Filter [d_date,d_date_sk]
+                                                            ColumnarToRow
+                                                              InputAdapter
+                                                                Scan parquet default.date_dim [d_date_sk,d_date]
                                   WholeStageCodegen (12)
                                     HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum]
                                       InputAdapter
@@ -61,9 +61,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                           WholeStageCodegen (11)
                                             HashAggregate [cp_catalog_page_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum]
                                               Project [sales_price,profit,return_amt,net_loss,cp_catalog_page_id]
-                                                BroadcastHashJoin [page_sk,cp_catalog_page_sk]
-                                                  Project [page_sk,sales_price,profit,return_amt,net_loss]
-                                                    BroadcastHashJoin [date_sk,d_date_sk]
+                                                BroadcastHashJoin [date_sk,d_date_sk]
+                                                  Project [date_sk,sales_price,profit,return_amt,net_loss,cp_catalog_page_id]
+                                                    BroadcastHashJoin [page_sk,cp_catalog_page_sk]
                                                       InputAdapter
                                                         Union
                                                           WholeStageCodegen (7)
@@ -79,14 +79,14 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                   InputAdapter
                                                                     Scan parquet default.catalog_returns [cr_returned_date_sk,cr_catalog_page_sk,cr_return_amount,cr_net_loss]
                                                       InputAdapter
-                                                        ReusedExchange [d_date_sk] #4
+                                                        BroadcastExchange #7
+                                                          WholeStageCodegen (9)
+                                                            Filter [cp_catalog_page_sk]
+                                                              ColumnarToRow
+                                                                InputAdapter
+                                                                  Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
                                                   InputAdapter
-                                                    BroadcastExchange #7
-                                                      WholeStageCodegen (10)
-                                                        Filter [cp_catalog_page_sk]
-                                                          ColumnarToRow
-                                                            InputAdapter
-                                                              Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
+                                                    ReusedExchange [d_date_sk] #5
                                   WholeStageCodegen (22)
                                     HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum]
                                       InputAdapter
@@ -94,9 +94,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                           WholeStageCodegen (21)
                                             HashAggregate [web_site_id,sales_price,return_amt,profit,net_loss] [sum,sum,sum,sum,sum,sum,sum,sum]
                                               Project [sales_price,profit,return_amt,net_loss,web_site_id]
-                                                BroadcastHashJoin [wsr_web_site_sk,web_site_sk]
-                                                  Project [wsr_web_site_sk,sales_price,profit,return_amt,net_loss]
-                                                    BroadcastHashJoin [date_sk,d_date_sk]
+                                                BroadcastHashJoin [date_sk,d_date_sk]
+                                                  Project [date_sk,sales_price,profit,return_amt,net_loss,web_site_id]
+                                                    BroadcastHashJoin [wsr_web_site_sk,web_site_sk]
                                                       InputAdapter
                                                         Union
                                                           WholeStageCodegen (13)
@@ -129,14 +129,14 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                                 InputAdapter
                                                                                   Scan parquet default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number]
                                                       InputAdapter
-                                                        ReusedExchange [d_date_sk] #4
+                                                        BroadcastExchange #11
+                                                          WholeStageCodegen (19)
+                                                            Filter [web_site_sk]
+                                                              ColumnarToRow
+                                                                InputAdapter
+                                                                  Scan parquet default.web_site [web_site_sk,web_site_id]
                                                   InputAdapter
-                                                    BroadcastExchange #11
-                                                      WholeStageCodegen (20)
-                                                        Filter [web_site_sk]
-                                                          ColumnarToRow
-                                                            InputAdapter
-                                                              Scan parquet default.web_site [web_site_sk,web_site_id]
+                                                    ReusedExchange [d_date_sk] #5
                   WholeStageCodegen (49)
                     HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sum(sales),sum(returns),sum(profit),sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter

From 194edc86a2959f912b4e4d0bb4867b5cb2fd0813 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Wed, 6 Jan 2021 23:41:27 -0800
Subject: [PATCH 0993/1009] Revert "[SPARK-34029][SQL][TESTS] Add
 OrcEncryptionSuite and FakeKeyProvider"

This reverts commit 8bb70bf0d646f6d54d17690d23ee935e452e747e.
---
 project/SparkBuild.scala                      |   1 -
 .../datasources/orc/FakeKeyProvider.java      | 144 ------------------
 ...pache.hadoop.crypto.key.KeyProviderFactory |  16 --
 .../datasources/orc/OrcEncryptionSuite.scala  |  98 ------------
 4 files changed, 259 deletions(-)
 delete mode 100644 sql/core/src/test/java/test/org/apache/spark/sql/execution/datasources/orc/FakeKeyProvider.java
 delete mode 100644 sql/core/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory
 delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index f126ee35efcca..668701be0ae98 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -489,7 +489,6 @@ object SparkParallelTestGrouping {
     "org.apache.spark.sql.catalyst.expressions.HashExpressionsSuite",
     "org.apache.spark.sql.catalyst.expressions.CastSuite",
     "org.apache.spark.sql.catalyst.expressions.MathExpressionsSuite",
-    "org.apache.spark.sql.execution.datasources.orc.OrcEncryptionSuite",
     "org.apache.spark.sql.hive.HiveExternalCatalogSuite",
     "org.apache.spark.sql.hive.StatisticsSuite",
     "org.apache.spark.sql.hive.client.VersionsSuite",
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/execution/datasources/orc/FakeKeyProvider.java b/sql/core/src/test/java/test/org/apache/spark/sql/execution/datasources/orc/FakeKeyProvider.java
deleted file mode 100644
index c48543802eb33..0000000000000
--- a/sql/core/src/test/java/test/org/apache/spark/sql/execution/datasources/orc/FakeKeyProvider.java
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package test.org.apache.spark.sql.execution.datasources.orc;
-
-import java.io.IOException;
-import java.net.URI;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.crypto.key.KeyProvider;
-import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension;
-import org.apache.hadoop.crypto.key.KeyProviderFactory;
-import org.apache.hadoop.crypto.key.kms.KMSClientProvider;
-
-/**
- * A Hadoop KeyProvider that lets us test the interaction
- * with the Hadoop code.
- *
- * https://github.com/apache/orc/blob/rel/release-1.6.6/java/tools/src/test/org/apache/orc/impl/FakeKeyProvider.java
- *
- * This file intentionally keeps the original file except
- * (1) package name, (2) import order, (3) a few indentation
- */
-public class FakeKeyProvider extends KeyProvider {
-  // map from key name to metadata
-  private final Map<String, TestMetadata> keyMetdata = new HashMap<>();
-  // map from key version name to material
-  private final Map<String, KeyVersion> keyVersions = new HashMap<>();
-
-  public FakeKeyProvider(Configuration conf) {
-    super(conf);
-  }
-
-  @Override
-  public KeyVersion getKeyVersion(String name) {
-    return keyVersions.get(name);
-  }
-
-  @Override
-  public List<String> getKeys() {
-    return new ArrayList<>(keyMetdata.keySet());
-  }
-
-  @Override
-  public List<KeyVersion> getKeyVersions(String name) {
-    List<KeyVersion> result = new ArrayList<>();
-    Metadata meta = getMetadata(name);
-    for(int v=0; v < meta.getVersions(); ++v) {
-      String versionName = buildVersionName(name, v);
-      KeyVersion material = keyVersions.get(versionName);
-      if (material != null) {
-        result.add(material);
-      }
-    }
-    return result;
-  }
-
-  @Override
-  public Metadata getMetadata(String name)  {
-    return keyMetdata.get(name);
-  }
-
-  @Override
-  public KeyVersion createKey(String name, byte[] bytes, Options options) {
-    String versionName = buildVersionName(name, 0);
-    keyMetdata.put(name, new TestMetadata(options.getCipher(),
-        options.getBitLength(), 1));
-    KeyVersion result = new KMSClientProvider.KMSKeyVersion(name, versionName, bytes);
-    keyVersions.put(versionName, result);
-    return result;
-  }
-
-  @Override
-  public void deleteKey(String name) {
-    throw new UnsupportedOperationException("Can't delete keys");
-  }
-
-  @Override
-  public KeyVersion rollNewVersion(String name, byte[] bytes) {
-    TestMetadata key = keyMetdata.get(name);
-    String versionName = buildVersionName(name, key.addVersion());
-    KeyVersion result = new KMSClientProvider.KMSKeyVersion(name, versionName,
-        bytes);
-    keyVersions.put(versionName, result);
-    return result;
-  }
-
-  @Override
-  public void flush() {
-    // Nothing
-  }
-
-  static class TestMetadata extends KeyProvider.Metadata {
-
-    TestMetadata(String cipher, int bitLength, int versions) {
-      super(cipher, bitLength, null, null, null, versions);
-    }
-
-    public int addVersion() {
-      return super.addVersion();
-    }
-  }
-
-  public static class Factory extends KeyProviderFactory {
-
-    @Override
-    public KeyProvider createProvider(URI uri, Configuration conf) throws IOException {
-      if ("test".equals(uri.getScheme())) {
-        KeyProvider provider = new FakeKeyProvider(conf);
-        // populate a couple keys into the provider
-        byte[] piiKey = new byte[]{0,1,2,3,4,5,6,7,8,9,0xa,0xb,0xc,0xd,0xe,0xf};
-        org.apache.hadoop.crypto.key.KeyProvider.Options aes128 = new KeyProvider.Options(conf);
-        provider.createKey("pii", piiKey, aes128);
-        byte[] piiKey2 = new byte[]{0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
-            0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f};
-        provider.rollNewVersion("pii", piiKey2);
-        byte[] secretKey = new byte[]{0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
-            0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f};
-        provider.createKey("secret", secretKey, aes128);
-        return KeyProviderCryptoExtension.createKeyProviderCryptoExtension(provider);
-      }
-      return null;
-    }
-  }
-}
diff --git a/sql/core/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory b/sql/core/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory
deleted file mode 100644
index f436622b5fb42..0000000000000
--- a/sql/core/src/test/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory
+++ /dev/null
@@ -1,16 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-test.org.apache.spark.sql.execution.datasources.orc.FakeKeyProvider$Factory
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala
deleted file mode 100644
index fac3cef5801dd..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.orc
-
-import org.apache.spark.sql.Row
-import org.apache.spark.sql.test.SharedSparkSession
-
-class OrcEncryptionSuite extends OrcTest with SharedSparkSession {
-  import testImplicits._
-
-  val originalData = Seq(("123456789", "dongjoon@apache.org", "Dongjoon Hyun"))
-  val rowDataWithoutKey =
-    Row(null, "841626795E7D351555B835A002E3BF10669DE9B81C95A3D59E10865AC37EA7C3", "Dongjoon Hyun")
-
-  test("Write and read an encrypted file") {
-    val df = originalData.toDF("ssn", "email", "name")
-
-    withTempPath { dir =>
-      val path = dir.getAbsolutePath
-      withSQLConf(
-        "hadoop.security.key.provider.path" -> "test:///",
-        "orc.key.provider" -> "hadoop",
-        "orc.encrypt" -> "pii:ssn,email",
-        "orc.mask" -> "nullify:ssn;sha256:email") {
-        df.write.mode("overwrite").orc(path)
-        checkAnswer(spark.read.orc(path), df)
-      }
-
-      withSQLConf(
-        "orc.key.provider" -> "memory",
-        "orc.encrypt" -> "pii:ssn,email",
-        "orc.mask" -> "nullify:ssn;sha256:email") {
-        checkAnswer(spark.read.orc(path), rowDataWithoutKey)
-      }
-    }
-  }
-
-  test("Write and read an encrypted table") {
-    val df = originalData.toDF("ssn", "email", "name")
-
-    withTempPath { dir =>
-      val path = dir.getAbsolutePath
-      withTable("encrypted") {
-        sql(
-          s"""
-            |CREATE TABLE encrypted (
-            |  ssn STRING,
-            |  email STRING,
-            |  name STRING
-            |)
-            |USING ORC
-            |LOCATION "$path"
-            |OPTIONS (
-            |  hadoop.security.key.provider.path "test:///",
-            |  orc.key.provider "hadoop",
-            |  orc.encrypt "pii:ssn,email",
-            |  orc.mask "nullify:ssn;sha256:email"
-            |)
-            |""".stripMargin)
-        sql("INSERT INTO encrypted VALUES('123456789', 'dongjoon@apache.org', 'Dongjoon Hyun')")
-        checkAnswer(sql("SELECT * FROM encrypted"), df)
-      }
-      withTable("normal") {
-        sql(
-          s"""
-            |CREATE TABLE normal (
-            |  ssn STRING,
-            |  email STRING,
-            |  name STRING
-            |)
-            |USING ORC
-            |LOCATION "$path"
-            |OPTIONS (
-            |  orc.key.provider "memory",
-            |  orc.encrypt "pii:ssn,email",
-            |  orc.mask "nullify:ssn;sha256:email"
-            |)
-            |""".stripMargin)
-        checkAnswer(sql("SELECT * FROM normal"), rowDataWithoutKey)
-      }
-    }
-  }
-}

From d36cdd55419c104134f88930206bedccdbe4f3c0 Mon Sep 17 00:00:00 2001
From: Yu Zhong <yzhong@freewheel.tv>
Date: Thu, 7 Jan 2021 08:59:26 +0000
Subject: [PATCH 0994/1009] [SPARK-33933][SQL] Materialize BroadcastQueryStage
 first to avoid broadcast timeout in AQE

### What changes were proposed in this pull request?
In AdaptiveSparkPlanExec.getFinalPhysicalPlan, when newStages are generated, sort the new stages by class type to make sure BroadcastQueryState precede others.
It can make sure the broadcast job are submitted before map jobs to avoid waiting for job schedule and cause broadcast timeout.

### Why are the changes needed?
When enable AQE, in getFinalPhysicalPlan, spark traversal the physical plan bottom up and create query stage for materialized part by createQueryStages and materialize those new created query stages to submit map stages or broadcasting. When ShuffleQueryStage are materializing before BroadcastQueryStage, the map job and broadcast job are submitted almost at the same time, but map job will hold all the computing resources. If the map job runs slow (when lots of data needs to process and the resource is limited), the broadcast job cannot be started(and finished) before spark.sql.broadcastTimeout, thus cause whole job failed (introduced in SPARK-31475).
The workaround to increase spark.sql.broadcastTimeout doesn't make sense and graceful, because the data to broadcast is very small.

### Does this PR introduce _any_ user-facing change?
NO

### How was this patch tested?
1. Add UT
2. Test the code using dev environment in https://issues.apache.org/jira/browse/SPARK-33933

Closes #30998 from zhongyu09/aqe-broadcast.

Authored-by: Yu Zhong <yzhong@freewheel.tv>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../adaptive/AdaptiveSparkPlanExec.scala      | 11 ++++++++-
 .../adaptive/AdaptiveQueryExecSuite.scala     | 24 +++++++++++++++++++
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index 89d3b53510469..aa09f21af19b7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -189,8 +189,17 @@ case class AdaptiveSparkPlanExec(
           stagesToReplace = result.newStages ++ stagesToReplace
           executionId.foreach(onUpdatePlan(_, result.newStages.map(_.plan)))
 
+          // SPARK-33933: we should submit tasks of broadcast stages first, to avoid waiting
+          // for tasks to be scheduled and leading to broadcast timeout.
+          val reorderedNewStages = result.newStages
+            .sortWith {
+              case (_: BroadcastQueryStageExec, _: BroadcastQueryStageExec) => false
+              case (_: BroadcastQueryStageExec, _) => true
+              case _ => false
+            }
+
           // Start materialization of all new stages and fail fast if any stages failed eagerly
-          result.newStages.foreach { stage =>
+          reorderedNewStages.foreach { stage =>
             try {
               stage.materialize().onComplete { res =>
                 if (res.isSuccess) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index 69f1565c2f8de..75993d49da677 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -1431,4 +1431,28 @@ class AdaptiveQueryExecSuite
       }
     }
   }
+
+  test("SPARK-33933: AQE broadcast should not timeout with slow map tasks") {
+    val broadcastTimeoutInSec = 1
+    val df = spark.sparkContext.parallelize(Range(0, 100), 100)
+      .flatMap(x => {
+        Thread.sleep(20)
+        for (i <- Range(0, 100)) yield (x % 26, x % 10)
+      }).toDF("index", "pv")
+    val dim = Range(0, 26).map(x => (x, ('a' + x).toChar.toString))
+      .toDF("index", "name")
+    val testDf = df.groupBy("index")
+      .agg(sum($"pv").alias("pv"))
+      .join(dim, Seq("index"))
+    withSQLConf(SQLConf.BROADCAST_TIMEOUT.key -> broadcastTimeoutInSec.toString,
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
+      val startTime = System.currentTimeMillis()
+      val result = testDf.collect()
+      val queryTime = System.currentTimeMillis() - startTime
+      assert(result.length == 26)
+      // make sure the execution time is large enough
+      assert(queryTime > (broadcastTimeoutInSec + 1) * 1000)
+    }
+  }
+
 }

From 7b06acc28b5c37da6c48bc44c3d921309d4ad3a8 Mon Sep 17 00:00:00 2001
From: fwang12 <fwang12@ebay.com>
Date: Thu, 7 Jan 2021 20:49:37 +0900
Subject: [PATCH 0995/1009] [SPARK-33100][SQL][FOLLOWUP] Find correct bound of
 bracketed comment in spark-sql

### What changes were proposed in this pull request?

This PR help find correct bound of bracketed comment in spark-sql.

Here is the log for UT of SPARK-33100 in CliSuite before:
```
2021-01-05 13:22:34.768 - stdout> spark-sql> /* SELECT 'test';*/ SELECT 'test';
2021-01-05 13:22:41.523 - stderr> Time taken: 6.716 seconds, Fetched 1 row(s)
2021-01-05 13:22:41.599 - stdout> test
2021-01-05 13:22:41.6 - stdout> spark-sql> ;;/* SELECT 'test';*/ SELECT 'test';
2021-01-05 13:22:41.709 - stdout> test
2021-01-05 13:22:41.709 - stdout> spark-sql> /* SELECT 'test';*/;; SELECT 'test';
2021-01-05 13:22:41.902 - stdout> spark-sql> SELECT 'test'; -- SELECT 'test';
2021-01-05 13:22:41.902 - stderr> Time taken: 0.129 seconds, Fetched 1 row(s)
2021-01-05 13:22:41.902 - stderr> Error in query:
2021-01-05 13:22:41.902 - stderr> mismatched input '<EOF>' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 19)
2021-01-05 13:22:42.006 - stderr>
2021-01-05 13:22:42.006 - stderr> == SQL ==
2021-01-05 13:22:42.006 - stderr> /* SELECT 'test';*/
2021-01-05 13:22:42.006 - stderr> -------------------^^^
2021-01-05 13:22:42.006 - stderr>
2021-01-05 13:22:42.006 - stderr> Time taken: 0.226 seconds, Fetched 1 row(s)
2021-01-05 13:22:42.006 - stdout> test
```
The root cause is that the insideBracketedComment is not accurate.

For `/* comment */`, the last character `/` is not insideBracketedComment and it would be treat as beginning of statements.

In this PR, this issue is fixed.

### Why are the changes needed?
To fix the issue described above.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing UT

Closes #31054 from turboFei/SPARK-33100-followup.

Authored-by: fwang12 <fwang12@ebay.com>
Signed-off-by: Takeshi Yamamuro <yamamuro@apache.org>
---
 .../hive/thriftserver/SparkSQLCLIDriver.scala | 24 +++++++++++++------
 .../sql/hive/thriftserver/CliSuite.scala      |  4 ++--
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index 9155eacfa4896..8606aaab1cae2 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -530,15 +530,24 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
     var bracketedCommentLevel = 0
     var escape = false
     var beginIndex = 0
-    var includingStatement = false
+    var leavingBracketedComment = false
+    var isStatement = false
     val ret = new JArrayList[String]
 
     def insideBracketedComment: Boolean = bracketedCommentLevel > 0
     def insideComment: Boolean = insideSimpleComment || insideBracketedComment
-    def statementBegin(index: Int): Boolean = includingStatement || (!insideComment &&
+    def statementInProgress(index: Int): Boolean = isStatement || (!insideComment &&
       index > beginIndex && !s"${line.charAt(index)}".trim.isEmpty)
 
     for (index <- 0 until line.length) {
+      // Checks if we need to decrement a bracketed comment level; the last character '/' of
+      // bracketed comments is still inside the comment, so `insideBracketedComment` must keep true
+      // in the previous loop and we decrement the level here if needed.
+      if (leavingBracketedComment) {
+        bracketedCommentLevel -= 1
+        leavingBracketedComment = false
+      }
+
       if (line.charAt(index) == '\'' && !insideComment) {
         // take a look to see if it is escaped
         // See the comment above about SPARK-31595
@@ -568,12 +577,12 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
         if (insideSingleQuote || insideDoubleQuote || insideComment) {
           // do not split
         } else {
-          if (includingStatement) {
+          if (isStatement) {
             // split, do not include ; itself
             ret.add(line.substring(beginIndex, index))
           }
           beginIndex = index + 1
-          includingStatement = false
+          isStatement = false
         }
       } else if (line.charAt(index) == '\n') {
         // with a new line the inline simple comment should end.
@@ -585,7 +594,8 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
         if (insideSingleQuote || insideDoubleQuote) {
           // Ignores '/' in any case of quotes
         } else if (insideBracketedComment && line.charAt(index - 1) == '*' ) {
-          bracketedCommentLevel -= 1
+          // Decrements `bracketedCommentLevel` at the beginning of the next loop
+          leavingBracketedComment = true
         } else if (hasNext && !insideBracketedComment && line.charAt(index + 1) == '*') {
           bracketedCommentLevel += 1
         }
@@ -597,9 +607,9 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
         escape = true
       }
 
-      includingStatement = statementBegin(index)
+      isStatement = statementInProgress(index)
     }
-    if (includingStatement) {
+    if (isStatement) {
       ret.add(line.substring(beginIndex))
     }
     ret
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 6708cf99e7f41..1a96012a0b4e9 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -577,8 +577,8 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
       "/* SELECT 'test';*/ SELECT 'test';" -> "test",
       ";;/* SELECT 'test';*/ SELECT 'test';" -> "test",
       "/* SELECT 'test';*/;; SELECT 'test';" -> "test",
-      "SELECT 'test'; -- SELECT 'test';" -> "",
-      "SELECT 'test'; /* SELECT 'test';*/;" -> "",
+      "SELECT 'test'; -- SELECT 'test';" -> "test",
+      "SELECT 'test'; /* SELECT 'test';*/;" -> "test",
       "/*$meta chars{^\\;}*/ SELECT 'test';" -> "test",
       "/*\nmulti-line\n*/ SELECT 'test';" -> "test",
       "/*/* multi-level bracketed*/ SELECT 'test';" -> "test"

From aa388cf3d0ff230eb0397876fe2db03bbe51658e Mon Sep 17 00:00:00 2001
From: HyukjinKwon <gurwls223@apache.org>
Date: Fri, 8 Jan 2021 09:28:31 +0900
Subject: [PATCH 0996/1009] [SPARK-34041][PYTHON][DOCS] Miscellaneous cleanup
 for new PySpark documentation

### What changes were proposed in this pull request?

This PR proposes to:
- Add a link of quick start in PySpark docs into "Programming Guides" in Spark main docs
- `ML` / `MLlib` -> `MLlib (DataFrame-based)` / `MLlib (RDD-based)` in API reference page
- Mention other user guides as well because the guide such as [ML](http://spark.apache.org/docs/latest/ml-guide.html) and [SQL](http://spark.apache.org/docs/latest/sql-programming-guide.html).
- Mention other migration guides as well because PySpark can get affected by it.

### Why are the changes needed?

For better documentation.

### Does this PR introduce _any_ user-facing change?

It fixes user-facing docs. However, it's not released out yet.

### How was this patch tested?

Manually tested by running:

```bash
cd docs
SKIP_SCALADOC=1 SKIP_RDOC=1 SKIP_SQLDOC=1 jekyll serve --watch
```

Closes #31082 from HyukjinKwon/SPARK-34041.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/_layouts/global.html                      |  1 +
 docs/index.md                                  |  2 ++
 python/docs/source/getting_started/index.rst   |  3 +++
 python/docs/source/migration_guide/index.rst   | 12 ++++++++++--
 python/docs/source/reference/pyspark.ml.rst    | 12 ++++++------
 python/docs/source/reference/pyspark.mllib.rst |  4 ++--
 python/docs/source/user_guide/index.rst        | 12 ++++++++++++
 7 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
index de98f29acf3b7..f10d46763cf76 100755
--- a/docs/_layouts/global.html
+++ b/docs/_layouts/global.html
@@ -84,6 +84,7 @@
                                 <a class="dropdown-item" href="ml-guide.html">MLlib (Machine Learning)</a>
                                 <a class="dropdown-item" href="graphx-programming-guide.html">GraphX (Graph Processing)</a>
                                 <a class="dropdown-item" href="sparkr.html">SparkR (R on Spark)</a>
+                                <a class="dropdown-item" href="api/python/getting_started/index.html">PySpark (Python on Spark)</a>
                             </div>
                         </li>
 
diff --git a/docs/index.md b/docs/index.md
index 8fd169e63f608..c4c2d722f975e 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -113,6 +113,8 @@ options for deployment:
 * [Spark Streaming](streaming-programming-guide.html): processing data streams using DStreams (old API)
 * [MLlib](ml-guide.html): applying machine learning algorithms
 * [GraphX](graphx-programming-guide.html): processing graphs 
+* [SparkR](sparkr.html): processing data with Spark in R
+* [PySpark](api/python/getting_started/index.html): processing data with Spark in Python
 
 **API Docs:**
 
diff --git a/python/docs/source/getting_started/index.rst b/python/docs/source/getting_started/index.rst
index 9fa3352ae27d8..38b9c935fc623 100644
--- a/python/docs/source/getting_started/index.rst
+++ b/python/docs/source/getting_started/index.rst
@@ -21,6 +21,9 @@ Getting Started
 ===============
 
 This page summarizes the basic steps required to setup and get started with PySpark.
+There are more guides shared with other languages such as
+`Quick Start <http://spark.apache.org/docs/latest/quick-start.html>`_ in Programming Guides
+at `the Spark documentation <http://spark.apache.org/docs/latest/index.html#where-to-go-from-here>`_.
 
 .. toctree::
     :maxdepth: 2
diff --git a/python/docs/source/migration_guide/index.rst b/python/docs/source/migration_guide/index.rst
index 41e36b16b3989..88e768dc464df 100644
--- a/python/docs/source/migration_guide/index.rst
+++ b/python/docs/source/migration_guide/index.rst
@@ -21,8 +21,6 @@ Migration Guide
 ===============
 
 This page describes the migration guide specific to PySpark.
-Many items of other migration guides can also be applied when migrating PySpark to higher versions because PySpark internally shares other components.
-Please also refer other migration guides such as `Migration Guide: SQL, Datasets and DataFrame <http://spark.apache.org/docs/latest/sql-migration-guide.html>`_.
 
 .. toctree::
    :maxdepth: 2
@@ -33,3 +31,13 @@ Please also refer other migration guides such as `Migration Guide: SQL, Datasets
    pyspark_2.2_to_2.3
    pyspark_1.4_to_1.5
    pyspark_1.0_1.2_to_1.3
+
+
+Many items of other migration guides can also be applied when migrating PySpark to higher versions because PySpark internally shares other components.
+Please also refer other migration guides:
+
+- `Migration Guide: Spark Core <http://spark.apache.org/docs/latest/core-migration-guide.html>`_
+- `Migration Guide: SQL, Datasets and DataFrame <http://spark.apache.org/docs/latest/sql-migration-guide.html>`_
+- `Migration Guide: Structured Streaming <http://spark.apache.org/docs/latest/ss-migration-guide.html>`_
+- `Migration Guide: MLlib (Machine Learning) <http://spark.apache.org/docs/latest/ml-migration-guide.html>`_
+
diff --git a/python/docs/source/reference/pyspark.ml.rst b/python/docs/source/reference/pyspark.ml.rst
index 2de0ff65a3ae8..cc904597d24c4 100644
--- a/python/docs/source/reference/pyspark.ml.rst
+++ b/python/docs/source/reference/pyspark.ml.rst
@@ -16,11 +16,11 @@
     under the License.
 
 
-ML
-==
+MLlib (DataFrame-based)
+=======================
 
-ML Pipeline APIs
-----------------
+Pipeline APIs
+-------------
 
 .. currentmodule:: pyspark.ml
 
@@ -188,8 +188,8 @@ Clustering
     PowerIterationClustering
 
 
-ML Functions
-----------------------------
+Functions
+---------
 
 .. currentmodule:: pyspark.ml.functions
 
diff --git a/python/docs/source/reference/pyspark.mllib.rst b/python/docs/source/reference/pyspark.mllib.rst
index df5ea017d0fbf..12fc4798dd8de 100644
--- a/python/docs/source/reference/pyspark.mllib.rst
+++ b/python/docs/source/reference/pyspark.mllib.rst
@@ -16,8 +16,8 @@
     under the License.
 
 
-MLlib
-=====
+MLlib (RDD-based)
+=================
 
 Classification
 --------------
diff --git a/python/docs/source/user_guide/index.rst b/python/docs/source/user_guide/index.rst
index 3e535ce16b22e..704156b11d985 100644
--- a/python/docs/source/user_guide/index.rst
+++ b/python/docs/source/user_guide/index.rst
@@ -20,9 +20,21 @@
 User Guide
 ==========
 
+This page is the guide for PySpark users which contains PySpark specific topics.
+
 .. toctree::
     :maxdepth: 2
 
     arrow_pandas
     python_packaging
 
+
+There are more guides shared with other languages in Programming Guides
+at `the Spark documentation <http://spark.apache.org/docs/latest/index.html#where-to-go-from-here>`_.
+
+- `RDD Programming Guide <http://spark.apache.org/docs/latest/rdd-programming-guide.html>`_
+- `Spark SQL, DataFrames and Datasets Guide <http://spark.apache.org/docs/latest/sql-programming-guide.html>`_
+- `Structured Streaming Programming Guide <http://spark.apache.org/docs/latest/structured-streaming-programming-guide.html>`_
+- `Spark Streaming Programming Guide <http://spark.apache.org/docs/latest/streaming-programming-guide.html>`_
+- `Machine Learning Library (MLlib) Guide <http://spark.apache.org/docs/latest/ml-guide.html>`_
+

From 5b16d70d6a51720660e7607c859fae4f28691952 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Fri, 8 Jan 2021 09:34:40 +0900
Subject: [PATCH 0997/1009] [SPARK-34044][DOCS] Add
 spark.sql.hive.metastore.jars.path to sql-data-sources-hive-tables.md

### What changes were proposed in this pull request?

This PR adds new configuration to `sql-data-sources-hive-tables`.

### Why are the changes needed?

SPARK-32852 added a new configuration, `spark.sql.hive.metastore.jars.path`.

### Does this PR introduce _any_ user-facing change?

Yes, but a document only.

### How was this patch tested?

**BEFORE**
![Screen Shot 2021-01-07 at 2 57 57 PM](https://user-images.githubusercontent.com/9700541/103954318-cc9ec200-50f8-11eb-86d3-cd89b07fcd21.png)

**AFTER**
![Screen Shot 2021-01-07 at 2 56 34 PM](https://user-images.githubusercontent.com/9700541/103954221-9d885080-50f8-11eb-8938-fb91394a33cb.png)

Closes #31085 from dongjoon-hyun/SPARK-34044.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: HyukjinKwon <gurwls223@apache.org>
---
 docs/sql-data-sources-hive-tables.md | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/docs/sql-data-sources-hive-tables.md b/docs/sql-data-sources-hive-tables.md
index ae3572c474e1a..376c2042d4227 100644
--- a/docs/sql-data-sources-hive-tables.md
+++ b/docs/sql-data-sources-hive-tables.md
@@ -139,7 +139,7 @@ The following options can be used to configure the version of Hive that is used
     <td><code>builtin</code></td>
     <td>
       Location of the jars that should be used to instantiate the HiveMetastoreClient. This
-      property can be one of three options:
+      property can be one of four options:
       <ol>
         <li><code>builtin</code></li>
         Use Hive 2.3.7, which is bundled with the Spark assembly when <code>-Phive</code> is
@@ -148,6 +148,9 @@ The following options can be used to configure the version of Hive that is used
         <li><code>maven</code></li>
         Use Hive jars of specified version downloaded from Maven repositories. This configuration
         is not generally recommended for production deployments.
+        <li><code>path</code></li>
+        Use Hive jars configured by <code>spark.sql.hive.metastore.jars.path</code>
+        in comma separated format. Support both local or remote paths.
         <li>A classpath in the standard format for the JVM. This classpath must include all of Hive
         and its dependencies, including the correct version of Hadoop. These jars only need to be
         present on the driver, but if you are running in yarn cluster mode then you must ensure
@@ -156,6 +159,28 @@ The following options can be used to configure the version of Hive that is used
     </td>
     <td>1.4.0</td>
   </tr>
+  <tr>
+    <td><code>spark.sql.hive.metastore.jars.path</code></td>
+    <td><code>(empty)</code></td>
+    <td>
+      Comma-separated paths of the jars that used to instantiate the HiveMetastoreClient.
+      This configuration is useful only when <code>spark.sql.hive.metastore.jars</code> is set as <code>path</code>. 
+      <br/>
+      The paths can be any of the following format:
+      <ol>
+        <li><code>file://path/to/jar/foo.jar</code></li>
+        <li><code>hdfs://nameservice/path/to/jar/foo.jar</code></li>
+        <li><code>/path/to/jar/</code>(path without URI scheme follow conf <code>fs.defaultFS</code>'s URI schema)</li>
+        <li><code>[http/https/ftp]://path/to/jar/foo.jar</code></li>
+      </ol>
+      Note that 1, 2, and 3 support wildcard. For example:
+      <ol>
+        <li><code>file://path/to/jar/*,file://path2/to/jar/*/*.jar</code></li>
+        <li><code>hdfs://nameservice/path/to/jar/*,hdfs://nameservice2/path/to/jar/*/*.jar</code></li>
+      </ol>
+    </td>
+    <td>3.1.0</td>
+  </tr>
   <tr>
     <td><code>spark.sql.hive.metastore.sharedPrefixes</code></td>
     <td><code>com.mysql.jdbc,<br/>org.postgresql,<br/>com.microsoft.sqlserver,<br/>oracle.jdbc</code></td>

From 8e11ce5378a2cf69ec87501e86f7ed5963649cbf Mon Sep 17 00:00:00 2001
From: Holden Karau <hkarau@apple.com>
Date: Thu, 7 Jan 2021 16:47:37 -0800
Subject: [PATCH 0998/1009] [SPARK-34018][K8S] NPE in ExecutorPodsSnapshot

### What changes were proposed in this pull request?

Label both the statuses and ensure the ExecutorPodSnapshot starts with the default config to match.

### Why are the changes needed?

The current test depends on the order rather than testing the desired property.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Labeled the containers statuses, observed failures, added the default label as the initialization point, tests passed again.

Built Spark, ran on K8s cluster verified no NPE in driver log.

Closes #31071 from holdenk/SPARK-34018-finishedExecutorWithRunningSidecar-doesnt-correctly-constructt-the-test-case.

Authored-by: Holden Karau <hkarau@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../cluster/k8s/ExecutorPodsSnapshot.scala    | 27 +++++++++----------
 .../k8s/ExecutorLifecycleTestUtils.scala      |  3 +++
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala
index 37aaca7e8ceeb..cb4d8810e5c38 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala
@@ -41,7 +41,7 @@ private[spark] case class ExecutorPodsSnapshot(executorPods: Map[Long, ExecutorP
 
 object ExecutorPodsSnapshot extends Logging {
   private var shouldCheckAllContainers: Boolean = _
-  private var sparkContainerName: String = _
+  private var sparkContainerName: String = DEFAULT_EXECUTOR_CONTAINER_NAME
 
   def apply(executorPods: Seq[Pod]): ExecutorPodsSnapshot = {
     ExecutorPodsSnapshot(toStatesByExecutorId(executorPods))
@@ -80,24 +80,21 @@ object ExecutorPodsSnapshot extends Logging {
               .anyMatch(t => t != null && t.getExitCode != 0)) {
             PodFailed(pod)
           } else {
-            // Otherwise look for the Spark container
-            val sparkContainerStatusOpt = pod.getStatus.getContainerStatuses.asScala
-              .find(_.getName() == sparkContainerName)
-            sparkContainerStatusOpt match {
-              case Some(sparkContainerStatus) =>
-                sparkContainerStatus.getState.getTerminated match {
-                  case t if t.getExitCode != 0 =>
-                    PodFailed(pod)
-                  case t if t.getExitCode == 0 =>
+            // Otherwise look for the Spark container and get the exit code if present.
+            val sparkContainerExitCode = pod.getStatus.getContainerStatuses.asScala
+              .find(_.getName() == sparkContainerName).flatMap(x => Option(x.getState))
+              .flatMap(x => Option(x.getTerminated)).flatMap(x => Option(x.getExitCode))
+              .map(_.toInt)
+            sparkContainerExitCode match {
+              case Some(t) =>
+                t match {
+                  case 0 =>
                     PodSucceeded(pod)
                   case _ =>
-                    PodRunning(pod)
+                    PodFailed(pod)
                 }
-              // If we can't find the Spark container status, fall back to the pod status. This is
-              // expected to occur during pod startup and other situations.
+              // No exit code means we are running.
               case _ =>
-                logDebug(s"Unable to find container ${sparkContainerName} in pod ${pod} " +
-                  "defaulting to entire pod status (running).")
                 PodRunning(pod)
             }
           }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala
index 225278c2aad71..41cba573d89c2 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala
@@ -115,13 +115,16 @@ object ExecutorLifecycleTestUtils {
       .editOrNewStatus()
         .withPhase("running")
         .addNewContainerStatus()
+          .withName(DEFAULT_EXECUTOR_CONTAINER_NAME)
           .withNewState()
             .withNewTerminated()
+              .withMessage("message")
               .withExitCode(exitCode)
             .endTerminated()
           .endState()
         .endContainerStatus()
         .addNewContainerStatus()
+          .withName("SIDECARFRIEND")
           .withNewState()
             .withNewRunning()
             .endRunning()

From 9b54da490d55d8c12e0a6b2b4b6e3a2d5b6bed86 Mon Sep 17 00:00:00 2001
From: angerszhu <angers.zhu@gmail.com>
Date: Thu, 7 Jan 2021 18:55:27 -0800
Subject: [PATCH 0999/1009] [SPARK-33818][SQL][DOC] Add descriptions about
 `spark.sql.parser.quotedRegexColumnNames` in the SQL documents

### What changes were proposed in this pull request?
According to https://github.com/apache/spark/pull/30805#issuecomment-747179899,
doc `spark.sql.parser.quotedRegexColumnNames` since  we need user know about this in doc and it's useful.

![image](https://user-images.githubusercontent.com/46485123/103656543-afa4aa80-4fa3-11eb-8cd3-a9d1b87a3489.png)
![image](https://user-images.githubusercontent.com/46485123/103656551-b2070480-4fa3-11eb-9ce7-95cc424242a6.png)

### Why are the changes needed?
Complete doc

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Not need

Closes #30816 from AngersZhuuuu/SPARK-33818.

Authored-by: angerszhu <angers.zhu@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/sql-ref-syntax-qry-select.md | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/docs/sql-ref-syntax-qry-select.md b/docs/sql-ref-syntax-qry-select.md
index bac7c2bc6a06d..5820a5c9060e4 100644
--- a/docs/sql-ref-syntax-qry-select.md
+++ b/docs/sql-ref-syntax-qry-select.md
@@ -41,7 +41,7 @@ select_statement [ { UNION | INTERSECT | EXCEPT } [ ALL | DISTINCT ] select_stat
 
 While `select_statement` is defined as
 ```sql
-SELECT [ hints , ... ] [ ALL | DISTINCT ] { named_expression [ , ... ] }
+SELECT [ hints , ... ] [ ALL | DISTINCT ] { [ named_expression | regex_column_names ] [ , ... ] }
     FROM { from_item [ , ... ] }
     [ PIVOT clause ]
     [ LATERAL VIEW clause ] [ ... ] 
@@ -151,6 +151,18 @@ SELECT [ hints , ... ] [ ALL | DISTINCT ] { named_expression [ , ... ] }
 
      Specifies aliases for one or more source window specifications. The source window specifications can
      be referenced in the widow definitions in the query.
+     
+* **regex_column_names**
+
+     When `spark.sql.parser.quotedRegexColumnNames` is true, quoted identifiers (using backticks) in `SELECT`
+     statement are interpreted as regular expressions and `SELECT` statement can take regex-based column specification.
+     For example, below SQL will only take column `c`:
+
+     ```sql
+     SELECT `(a|b)?+.+` FROM (
+       SELECT 1 as a, 2 as b, 3 as c
+     )
+     ```
 
 ### Related Statements
 

From 0de7f2ff1ebb9b3339ecf30074a0d7ffc1ff6325 Mon Sep 17 00:00:00 2001
From: Chao Sun <sunchao@apple.com>
Date: Thu, 7 Jan 2021 21:13:22 -0800
Subject: [PATCH 1000/1009] [SPARK-34039][SQL] ReplaceTable should invalidate
 cache

### What changes were proposed in this pull request?

This changes `ReplaceTableExec`/`AtomicReplaceTableExec`, and uncaches the target table before it is dropped. In addition, this includes some refactoring by moving the `uncacheTable` method to `DataSourceV2Strategy` so that we don't need to pass a Spark session to the v2 exec.

### Why are the changes needed?

Similar to SPARK-33492 (#30429). When a table is refreshed, the associated cache should be invalidated to avoid potential incorrect results.

### Does this PR introduce _any_ user-facing change?

Yes. Now When a data source v2 is cached (either directly or indirectly), all the relevant caches will be refreshed or invalidated if the table is replaced.

### How was this patch tested?

Added a new unit test.

Closes #31081 from sunchao/SPARK-34039.

Authored-by: Chao Sun <sunchao@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../datasources/v2/DataSourceV2Strategy.scala | 21 ++++++++++++------
 .../datasources/v2/ReplaceTableExec.scala     | 14 +++++++++---
 .../v2/WriteToDataSourceV2Exec.scala          | 22 +++++--------------
 .../sql/connector/DataSourceV2SQLSuite.scala  | 17 ++++++++++++++
 4 files changed, 48 insertions(+), 26 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index fa9519bf3233c..028a2fc690be1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.{And, Attribute, Expression, Na
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.util.toPrettySQL
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, StagingTableCatalog, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, TableCapability, TableCatalog, TableChange}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, StagingTableCatalog, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, Table, TableCapability, TableCatalog, TableChange}
 import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream}
 import org.apache.spark.sql.connector.write.V1Write
 import org.apache.spark.sql.errors.QueryCompilationErrors
@@ -81,6 +81,11 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
     }
   }
 
+  private def invalidateCache(catalog: TableCatalog, table: Table, ident: Identifier): Unit = {
+    val v2Relation = DataSourceV2Relation.create(table, Some(catalog), Some(ident))
+    session.sharedState.cacheManager.uncacheQuery(session, v2Relation, cascade = true)
+  }
+
   override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
     case PhysicalOperation(project, filters,
         relation @ DataSourceV2ScanRelation(_, V1ScanWrapper(scan, translated, pushed), output)) =>
@@ -164,10 +169,12 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       catalog match {
         case staging: StagingTableCatalog =>
           AtomicReplaceTableExec(
-            staging, ident, schema, parts, propsWithOwner, orCreate = orCreate) :: Nil
+            staging, ident, schema, parts, propsWithOwner, orCreate = orCreate,
+            invalidateCache) :: Nil
         case _ =>
           ReplaceTableExec(
-            catalog, ident, schema, parts, propsWithOwner, orCreate = orCreate) :: Nil
+            catalog, ident, schema, parts, propsWithOwner, orCreate = orCreate,
+            invalidateCache) :: Nil
       }
 
     case ReplaceTableAsSelect(catalog, ident, parts, query, props, options, orCreate) =>
@@ -176,7 +183,6 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       catalog match {
         case staging: StagingTableCatalog =>
           AtomicReplaceTableAsSelectExec(
-            session,
             staging,
             ident,
             parts,
@@ -184,10 +190,10 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
             planLater(query),
             propsWithOwner,
             writeOptions,
-            orCreate = orCreate) :: Nil
+            orCreate = orCreate,
+            invalidateCache) :: Nil
         case _ =>
           ReplaceTableAsSelectExec(
-            session,
             catalog,
             ident,
             parts,
@@ -195,7 +201,8 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
             planLater(query),
             propsWithOwner,
             writeOptions,
-            orCreate = orCreate) :: Nil
+            orCreate = orCreate,
+            invalidateCache) :: Nil
       }
 
     case AppendData(r @ DataSourceV2Relation(v1: SupportsWrite, _, _, _, _), query, writeOptions,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala
index 1f3bcf2e3fe57..10c09f4be711f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala
@@ -22,7 +22,7 @@ import scala.collection.JavaConverters._
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException}
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.{Identifier, StagedTable, StagingTableCatalog, TableCatalog}
+import org.apache.spark.sql.connector.catalog.{Identifier, StagedTable, StagingTableCatalog, Table, TableCatalog}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
@@ -33,10 +33,13 @@ case class ReplaceTableExec(
     tableSchema: StructType,
     partitioning: Seq[Transform],
     tableProperties: Map[String, String],
-    orCreate: Boolean) extends V2CommandExec {
+    orCreate: Boolean,
+    invalidateCache: (TableCatalog, Table, Identifier) => Unit) extends V2CommandExec {
 
   override protected def run(): Seq[InternalRow] = {
     if (catalog.tableExists(ident)) {
+      val table = catalog.loadTable(ident)
+      invalidateCache(catalog, table, ident)
       catalog.dropTable(ident)
     } else if (!orCreate) {
       throw new CannotReplaceMissingTableException(ident)
@@ -54,9 +57,14 @@ case class AtomicReplaceTableExec(
     tableSchema: StructType,
     partitioning: Seq[Transform],
     tableProperties: Map[String, String],
-    orCreate: Boolean) extends V2CommandExec {
+    orCreate: Boolean,
+    invalidateCache: (TableCatalog, Table, Identifier) => Unit) extends V2CommandExec {
 
   override protected def run(): Seq[InternalRow] = {
+    if (catalog.tableExists(identifier)) {
+      val table = catalog.loadTable(identifier)
+      invalidateCache(catalog, table, identifier)
+    }
     val staged = if (orCreate) {
       catalog.stageCreateOrReplace(
         identifier, tableSchema, partitioning.toArray, tableProperties.asJava)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
index fea8bd25f5a21..5fa091ea4e05c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
@@ -26,7 +26,6 @@ import org.apache.spark.{SparkEnv, SparkException, TaskContext}
 import org.apache.spark.executor.CommitDeniedException
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.expressions.Attribute
@@ -130,7 +129,6 @@ case class AtomicCreateTableAsSelectExec(
  * ReplaceTableAsSelectStagingExec.
  */
 case class ReplaceTableAsSelectExec(
-    session: SparkSession,
     catalog: TableCatalog,
     ident: Identifier,
     partitioning: Seq[Transform],
@@ -138,7 +136,8 @@ case class ReplaceTableAsSelectExec(
     query: SparkPlan,
     properties: Map[String, String],
     writeOptions: CaseInsensitiveStringMap,
-    orCreate: Boolean) extends TableWriteExecHelper {
+    orCreate: Boolean,
+    invalidateCache: (TableCatalog, Table, Identifier) => Unit) extends TableWriteExecHelper {
 
   override protected def run(): Seq[InternalRow] = {
     // Note that this operation is potentially unsafe, but these are the strict semantics of
@@ -151,7 +150,7 @@ case class ReplaceTableAsSelectExec(
     // 3. The table returned by catalog.createTable doesn't support writing.
     if (catalog.tableExists(ident)) {
       val table = catalog.loadTable(ident)
-      uncacheTable(session, catalog, table, ident)
+      invalidateCache(catalog, table, ident)
       catalog.dropTable(ident)
     } else if (!orCreate) {
       throw new CannotReplaceMissingTableException(ident)
@@ -176,7 +175,6 @@ case class ReplaceTableAsSelectExec(
  * is left untouched.
  */
 case class AtomicReplaceTableAsSelectExec(
-    session: SparkSession,
     catalog: StagingTableCatalog,
     ident: Identifier,
     partitioning: Seq[Transform],
@@ -184,13 +182,14 @@ case class AtomicReplaceTableAsSelectExec(
     query: SparkPlan,
     properties: Map[String, String],
     writeOptions: CaseInsensitiveStringMap,
-    orCreate: Boolean) extends TableWriteExecHelper {
+    orCreate: Boolean,
+    invalidateCache: (TableCatalog, Table, Identifier) => Unit) extends TableWriteExecHelper {
 
   override protected def run(): Seq[InternalRow] = {
     val schema = CharVarcharUtils.getRawSchema(query.schema).asNullable
     if (catalog.tableExists(ident)) {
       val table = catalog.loadTable(ident)
-      uncacheTable(session, catalog, table, ident)
+      invalidateCache(catalog, table, ident)
     }
     val staged = if (orCreate) {
       catalog.stageCreateOrReplace(
@@ -364,15 +363,6 @@ trait V2TableWriteExec extends V2CommandExec with UnaryExecNode {
 
     Nil
   }
-
-  protected def uncacheTable(
-      session: SparkSession,
-      catalog: TableCatalog,
-      table: Table,
-      ident: Identifier): Unit = {
-    val plan = DataSourceV2Relation.create(table, Some(catalog), Some(ident))
-    session.sharedState.cacheManager.uncacheQuery(session, plan, cascade = true)
-  }
 }
 
 object DataWritingSparkTask extends Logging {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 5c67ad9cdfe2e..0a6bd795cd0ae 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -752,6 +752,23 @@ class DataSourceV2SQLSuite
     assert(t2.v1Table.provider == Some(conf.defaultDataSourceName))
   }
 
+  test("SPARK-34039: ReplaceTable (atomic or non-atomic) should invalidate cache") {
+    Seq("testcat.ns.t", "testcat_atomic.ns.t").foreach { t =>
+      val view = "view"
+      withTable(t) {
+        withTempView(view) {
+          sql(s"CREATE TABLE $t USING foo AS SELECT id, data FROM source")
+          sql(s"CACHE TABLE $view AS SELECT id FROM $t")
+          checkAnswer(sql(s"SELECT * FROM $t"), spark.table("source"))
+          checkAnswer(sql(s"SELECT * FROM $view"), spark.table("source").select("id"))
+
+          sql(s"REPLACE TABLE $t (a bigint) USING foo")
+          assert(spark.sharedState.cacheManager.lookupCachedData(spark.table(view)).isEmpty)
+        }
+      }
+    }
+  }
+
   test("SPARK-33492: ReplaceTableAsSelect (atomic or non-atomic) should invalidate cache") {
     Seq("testcat.ns.t", "testcat_atomic.ns.t").foreach { t =>
       val view = "view"

From cc201545626ffe556682f45edc370ac6fe29e9df Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Thu, 7 Jan 2021 21:24:15 -0800
Subject: [PATCH 1001/1009] [SPARK-34005][CORE] Update peak memory metrics for
 each Executor on task end

### What changes were proposed in this pull request?

This PR makes `AppStatusListener` update the peak memory metrics for each Executor on task end like other peak memory metrics (e.g, stage, executors in a stage).

### Why are the changes needed?

When `AppStatusListener#onExecutorMetricsUpdate` is called, peak memory metrics for Executors, stages and executors in a stage are updated but currently, the metrics only for Executors are not updated on task end.

### Does this PR introduce _any_ user-facing change?

Yes. Executor peak memory metrics is updated more accurately.

### How was this patch tested?

After I run a job with `local-cluster[1,1,1024]` and visited `/api/v1/<appid>/executors`, I confirmed `peakExecutorMemory` metrics is shown for an Executor even though the life time of each job is very short .
I also modify the json files for `HistoryServerSuite`.

Closes #31029 from sarutak/update-executor-metrics-on-taskend.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/status/AppStatusListener.scala      |  1 +
 .../executor_list_json_expectation.json       | 22 +++++
 .../executor_memory_usage_expectation.json    | 88 +++++++++++++++++++
 ...tor_node_excludeOnFailure_expectation.json | 88 +++++++++++++++++++
 ...ludeOnFailure_unexcluding_expectation.json | 88 +++++++++++++++++++
 5 files changed, 287 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
index 6cb013b1a7c16..52d41cdd72664 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
@@ -759,6 +759,7 @@ private[spark] class AppStatusListener(
       exec.completedTasks += completedDelta
       exec.failedTasks += failedDelta
       exec.totalDuration += event.taskInfo.duration
+      exec.peakExecutorMetrics.compareAndUpdatePeakValues(event.taskExecutorMetrics)
 
       // Note: For resubmitted tasks, we continue to use the metrics that belong to the
       // first attempt of this task. This may not be 100% accurate because the first attempt
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
index c18a2e31dff3c..be125075874a2 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_list_json_expectation.json
@@ -21,6 +21,28 @@
   "addTime" : "2015-02-03T16:43:00.906GMT",
   "executorLogs" : { },
   "blacklistedInStages" : [ ],
+  "peakMemoryMetrics" : {
+    "JVMHeapMemory" : 0,
+    "JVMOffHeapMemory" : 0,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 0,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 0,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 0,
+    "ProcessTreeJVMRSSMemory" : 0,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 0,
+    "MinorGCTime" : 0,
+    "MajorGCCount" : 0,
+    "MajorGCTime" : 0
+  },
   "attributes" : { },
   "resources" : { },
   "resourceProfileId" : 0,
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json
index 51449340efe9f..0a3eb81140cdb 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_memory_usage_expectation.json
@@ -64,6 +64,28 @@
     "totalOffHeapStorageMemory" : 524288000
   },
   "blacklistedInStages" : [ ],
+  "peakMemoryMetrics" : {
+    "JVMHeapMemory" : 0,
+    "JVMOffHeapMemory" : 0,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 0,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 0,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 0,
+    "ProcessTreeJVMRSSMemory" : 0,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 0,
+    "MinorGCTime" : 0,
+    "MajorGCCount" : 0,
+    "MajorGCTime" : 0
+  },
   "attributes" : { },
   "resources" : { },
   "resourceProfileId" : 0,
@@ -101,6 +123,28 @@
     "totalOffHeapStorageMemory" : 524288000
   },
   "blacklistedInStages" : [ ],
+  "peakMemoryMetrics" : {
+    "JVMHeapMemory" : 0,
+    "JVMOffHeapMemory" : 0,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 0,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 0,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 0,
+    "ProcessTreeJVMRSSMemory" : 0,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 0,
+    "MinorGCTime" : 0,
+    "MajorGCCount" : 0,
+    "MajorGCTime" : 0
+  },
   "attributes" : { },
   "resources" : { },
   "resourceProfileId" : 0,
@@ -138,6 +182,28 @@
     "totalOffHeapStorageMemory": 524288000
   },
   "blacklistedInStages" : [ ],
+  "peakMemoryMetrics" : {
+    "JVMHeapMemory" : 0,
+    "JVMOffHeapMemory" : 0,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 0,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 0,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 0,
+    "ProcessTreeJVMRSSMemory" : 0,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 0,
+    "MinorGCTime" : 0,
+    "MajorGCCount" : 0,
+    "MajorGCTime" : 0
+  },
   "attributes" : { },
   "resources" : { },
   "resourceProfileId" : 0,
@@ -175,6 +241,28 @@
     "totalOffHeapStorageMemory" : 524288000
   },
   "blacklistedInStages" : [ ],
+  "peakMemoryMetrics" : {
+    "JVMHeapMemory" : 0,
+    "JVMOffHeapMemory" : 0,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 0,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 0,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 0,
+    "ProcessTreeJVMRSSMemory" : 0,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 0,
+    "MinorGCTime" : 0,
+    "MajorGCCount" : 0,
+    "MajorGCTime" : 0
+  },
   "attributes" : { },
   "resources" : { },
   "resourceProfileId" : 0,
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_expectation.json
index 47a01b2596de9..8869fb4e296e6 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_expectation.json
@@ -64,6 +64,28 @@
     "totalOffHeapStorageMemory" : 524288000
   },
   "blacklistedInStages" : [ ],
+  "peakMemoryMetrics" : {
+    "JVMHeapMemory" : 0,
+    "JVMOffHeapMemory" : 0,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 0,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 0,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 0,
+    "ProcessTreeJVMRSSMemory" : 0,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 0,
+    "MinorGCTime" : 0,
+    "MajorGCCount" : 0,
+    "MajorGCTime" : 0
+  },
   "attributes" : { },
   "resources" : { },
   "resourceProfileId" : 0,
@@ -101,6 +123,28 @@
     "totalOffHeapStorageMemory" : 524288000
   },
   "blacklistedInStages" : [ ],
+  "peakMemoryMetrics" : {
+    "JVMHeapMemory" : 0,
+    "JVMOffHeapMemory" : 0,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 0,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 0,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 0,
+    "ProcessTreeJVMRSSMemory" : 0,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 0,
+    "MinorGCTime" : 0,
+    "MajorGCCount" : 0,
+    "MajorGCTime" : 0
+  },
   "attributes" : { },
   "resources" : { },
   "resourceProfileId" : 0,
@@ -138,6 +182,28 @@
     "totalOffHeapStorageMemory": 524288000
   },
   "blacklistedInStages" : [ ],
+  "peakMemoryMetrics" : {
+    "JVMHeapMemory" : 0,
+    "JVMOffHeapMemory" : 0,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 0,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 0,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 0,
+    "ProcessTreeJVMRSSMemory" : 0,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 0,
+    "MinorGCTime" : 0,
+    "MajorGCCount" : 0,
+    "MajorGCTime" : 0
+  },
   "attributes" : { },
   "resources" : { },
   "resourceProfileId" : 0,
@@ -175,6 +241,28 @@
     "totalOffHeapStorageMemory": 524288000
   },
   "blacklistedInStages" : [ ],
+  "peakMemoryMetrics" : {
+    "JVMHeapMemory" : 0,
+    "JVMOffHeapMemory" : 0,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 0,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 0,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 0,
+    "ProcessTreeJVMRSSMemory" : 0,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 0,
+    "MinorGCTime" : 0,
+    "MajorGCCount" : 0,
+    "MajorGCTime" : 0
+  },
   "attributes" : { },
   "resources" : { },
   "resourceProfileId" : 0,
diff --git a/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_unexcluding_expectation.json b/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_unexcluding_expectation.json
index 46e8f81d0e245..21cc9d0812990 100644
--- a/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_unexcluding_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/executor_node_excludeOnFailure_unexcluding_expectation.json
@@ -52,6 +52,28 @@
     "stderr" : "http://172.22.0.111:64521/logPage/?appId=app-20161115172038-0000&executorId=3&logType=stderr"
   },
   "blacklistedInStages" : [ ],
+  "peakMemoryMetrics" : {
+    "JVMHeapMemory" : 0,
+    "JVMOffHeapMemory" : 0,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 0,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 0,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 0,
+    "ProcessTreeJVMRSSMemory" : 0,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 0,
+    "MinorGCTime" : 0,
+    "MajorGCCount" : 0,
+    "MajorGCTime" : 0
+  },
   "attributes" : { },
   "resources" : { },
   "resourceProfileId" : 0,
@@ -83,6 +105,28 @@
     "stderr" : "http://172.22.0.111:64519/logPage/?appId=app-20161115172038-0000&executorId=2&logType=stderr"
   },
   "blacklistedInStages" : [ ],
+  "peakMemoryMetrics" : {
+    "JVMHeapMemory" : 0,
+    "JVMOffHeapMemory" : 0,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 0,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 0,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 0,
+    "ProcessTreeJVMRSSMemory" : 0,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 0,
+    "MinorGCTime" : 0,
+    "MajorGCCount" : 0,
+    "MajorGCTime" : 0
+  },
   "attributes" : { },
   "resources" : { },
   "resourceProfileId" : 0,
@@ -114,6 +158,28 @@
     "stderr" : "http://172.22.0.111:64518/logPage/?appId=app-20161115172038-0000&executorId=1&logType=stderr"
   },
   "blacklistedInStages" : [ ],
+  "peakMemoryMetrics" : {
+    "JVMHeapMemory" : 0,
+    "JVMOffHeapMemory" : 0,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 0,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 0,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 0,
+    "ProcessTreeJVMRSSMemory" : 0,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 0,
+    "MinorGCTime" : 0,
+    "MajorGCCount" : 0,
+    "MajorGCTime" : 0
+  },
   "attributes" : { },
   "resources" : { },
   "resourceProfileId" : 0,
@@ -145,6 +211,28 @@
     "stderr" : "http://172.22.0.111:64517/logPage/?appId=app-20161115172038-0000&executorId=0&logType=stderr"
   },
   "blacklistedInStages" : [ ],
+  "peakMemoryMetrics" : {
+    "JVMHeapMemory" : 0,
+    "JVMOffHeapMemory" : 0,
+    "OnHeapExecutionMemory" : 0,
+    "OffHeapExecutionMemory" : 0,
+    "OnHeapStorageMemory" : 0,
+    "OffHeapStorageMemory" : 0,
+    "OnHeapUnifiedMemory" : 0,
+    "OffHeapUnifiedMemory" : 0,
+    "DirectPoolMemory" : 0,
+    "MappedPoolMemory" : 0,
+    "ProcessTreeJVMVMemory" : 0,
+    "ProcessTreeJVMRSSMemory" : 0,
+    "ProcessTreePythonVMemory" : 0,
+    "ProcessTreePythonRSSMemory" : 0,
+    "ProcessTreeOtherVMemory" : 0,
+    "ProcessTreeOtherRSSMemory" : 0,
+    "MinorGCCount" : 0,
+    "MinorGCTime" : 0,
+    "MajorGCCount" : 0,
+    "MajorGCTime" : 0
+  },
   "attributes" : { },
   "resources" : { },
   "resourceProfileId" : 0,

From b95a847ce1686dd1e1c6555afe2436caec6130e6 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang.wang@databricks.com>
Date: Fri, 8 Jan 2021 07:52:39 +0000
Subject: [PATCH 1002/1009] [SPARK-34046][SQL][TESTS] Use join hint for
 constructing joins in JoinSuite and WholeStageCodegenSuite

### What changes were proposed in this pull request?

There are some existing test cases that constructing various joins by tuning the SQL configuration AUTO_BROADCASTJOIN_THRESHOLD, PREFER_SORTMERGEJOIN,SHUFFLE_PARTITIONS, etc.

This can be tricky and not straight-forward. In the future development we might have to tweak the configurations again .
This PR is to construct specific joins by using join hint in test cases.
### Why are the changes needed?

Make test cases for join simpler and more robust.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Unit test

Closes #31087 from gengliangwang/joinhintInTest.

Authored-by: Gengliang Wang <gengliang.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/JoinSuite.scala      | 123 +++++++-----------
 .../execution/WholeStageCodegenSuite.scala    |  41 +++---
 2 files changed, 66 insertions(+), 98 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index 1bdfdb5ab9c54..2e336b264cd3a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -1106,20 +1106,16 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
   }
 
   test("SPARK-32330: Preserve shuffled hash join build side partitioning") {
-    withSQLConf(
-        SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "50",
-        SQLConf.SHUFFLE_PARTITIONS.key -> "2",
-        SQLConf.PREFER_SORTMERGEJOIN.key -> "false") {
-      val df1 = spark.range(10).select($"id".as("k1"))
-      val df2 = spark.range(30).select($"id".as("k2"))
-      Seq("inner", "cross").foreach(joinType => {
-        val plan = df1.join(df2, $"k1" === $"k2", joinType).groupBy($"k1").count()
-          .queryExecution.executedPlan
-        assert(collect(plan) { case _: ShuffledHashJoinExec => true }.size === 1)
-        // No extra shuffle before aggregate
-        assert(collect(plan) { case _: ShuffleExchangeExec => true }.size === 2)
-      })
-    }
+    val df1 = spark.range(10).select($"id".as("k1"))
+    val df2 = spark.range(30).select($"id".as("k2"))
+    Seq("inner", "cross").foreach(joinType => {
+      val plan = df1.join(df2.hint("SHUFFLE_HASH"), $"k1" === $"k2", joinType)
+        .groupBy($"k1").count()
+        .queryExecution.executedPlan
+      assert(collect(plan) { case _: ShuffledHashJoinExec => true }.size === 1)
+      // No extra shuffle before aggregate
+      assert(collect(plan) { case _: ShuffleExchangeExec => true }.size === 2)
+    })
   }
 
   test("SPARK-32383: Preserve hash join (BHJ and SHJ) stream side ordering") {
@@ -1129,40 +1125,30 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
     val df4 = spark.range(100).select($"id".as("k4"))
 
     // Test broadcast hash join
-    withSQLConf(
-      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "50") {
-      Seq("inner", "left_outer").foreach(joinType => {
-        val plan = df1.join(df2, $"k1" === $"k2", joinType)
-          .join(df3, $"k1" === $"k3", joinType)
-          .join(df4, $"k1" === $"k4", joinType)
-          .queryExecution
-          .executedPlan
-        assert(collect(plan) { case _: SortMergeJoinExec => true }.size === 2)
-        assert(collect(plan) { case _: BroadcastHashJoinExec => true }.size === 1)
-        // No extra sort before last sort merge join
-        assert(collect(plan) { case _: SortExec => true }.size === 3)
-      })
-    }
+    Seq("inner", "left_outer").foreach(joinType => {
+      val plan = df1.join(df2.hint("SHUFFLE_MERGE"), $"k1" === $"k2", joinType)
+        .join(df3.hint("BROADCAST"), $"k1" === $"k3", joinType)
+        .join(df4.hint("SHUFFLE_MERGE"), $"k1" === $"k4", joinType)
+        .queryExecution
+        .executedPlan
+      assert(collect(plan) { case _: SortMergeJoinExec => true }.size === 2)
+      assert(collect(plan) { case _: BroadcastHashJoinExec => true }.size === 1)
+      // No extra sort before last sort merge join
+      assert(collect(plan) { case _: SortExec => true }.size === 3)
+    })
 
     // Test shuffled hash join
-    withSQLConf(
-      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "50",
-      SQLConf.SHUFFLE_PARTITIONS.key -> "2",
-      SQLConf.PREFER_SORTMERGEJOIN.key -> "false") {
-      val df3 = spark.range(10).select($"id".as("k3"))
-
-      Seq("inner", "left_outer").foreach(joinType => {
-        val plan = df1.join(df2, $"k1" === $"k2", joinType)
-          .join(df3, $"k1" === $"k3", joinType)
-          .join(df4, $"k1" === $"k4", joinType)
-          .queryExecution
-          .executedPlan
-        assert(collect(plan) { case _: SortMergeJoinExec => true }.size === 2)
-        assert(collect(plan) { case _: ShuffledHashJoinExec => true }.size === 1)
-        // No extra sort before last sort merge join
-        assert(collect(plan) { case _: SortExec => true }.size === 3)
-      })
-    }
+    Seq("inner", "left_outer").foreach(joinType => {
+      val plan = df1.join(df2.hint("SHUFFLE_MERGE"), $"k1" === $"k2", joinType)
+        .join(df3.hint("SHUFFLE_HASH"), $"k1" === $"k3", joinType)
+        .join(df4.hint("SHUFFLE_MERGE"), $"k1" === $"k4", joinType)
+        .queryExecution
+        .executedPlan
+      assert(collect(plan) { case _: SortMergeJoinExec => true }.size === 2)
+      assert(collect(plan) { case _: ShuffledHashJoinExec => true }.size === 1)
+      // No extra sort before last sort merge join
+      assert(collect(plan) { case _: SortExec => true }.size === 3)
+    })
   }
 
   test("SPARK-32290: SingleColumn Null Aware Anti Join Optimize") {
@@ -1250,24 +1236,16 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
         $"k1" === $"k4" && $"k2" === $"k5" && $"k3" === $"k6")
     )
     inputDFs.foreach { case (df1, df2, joinExprs) =>
-      withSQLConf(
-        // Set broadcast join threshold and number of shuffle partitions,
-        // as shuffled hash join depends on these two configs.
-        SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80",
-        SQLConf.SHUFFLE_PARTITIONS.key -> "2") {
-        val smjDF = df1.join(df2, joinExprs, "full")
-        assert(collect(smjDF.queryExecution.executedPlan) {
-          case _: SortMergeJoinExec => true }.size === 1)
-        val smjResult = smjDF.collect()
-
-        withSQLConf(SQLConf.PREFER_SORTMERGEJOIN.key -> "false") {
-          val shjDF = df1.join(df2, joinExprs, "full")
-          assert(collect(shjDF.queryExecution.executedPlan) {
-            case _: ShuffledHashJoinExec => true }.size === 1)
-          // Same result between shuffled hash join and sort merge join
-          checkAnswer(shjDF, smjResult)
-        }
-      }
+      val smjDF = df1.join(df2.hint("SHUFFLE_MERGE"), joinExprs, "full")
+      assert(collect(smjDF.queryExecution.executedPlan) {
+        case _: SortMergeJoinExec => true }.size === 1)
+      val smjResult = smjDF.collect()
+
+      val shjDF = df1.join(df2.hint("SHUFFLE_HASH"), joinExprs, "full")
+      assert(collect(shjDF.queryExecution.executedPlan) {
+        case _: ShuffledHashJoinExec => true }.size === 1)
+      // Same result between shuffled hash join and sort merge join
+      checkAnswer(shjDF, smjResult)
     }
   }
 
@@ -1284,10 +1262,8 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
     )
     inputDFs.foreach { case (df1, df2, joinType) =>
       // Test broadcast hash join
-      withSQLConf(
-        SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "200",
-        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
-        val bhjCodegenDF = df1.join(df2, $"k1" === $"k2", joinType)
+      withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+        val bhjCodegenDF = df1.join(df2.hint("BROADCAST"), $"k1" === $"k2", joinType)
         assert(bhjCodegenDF.queryExecution.executedPlan.collect {
           case WholeStageCodegenExec(_ : BroadcastHashJoinExec) => true
           case WholeStageCodegenExec(ProjectExec(_, _ : BroadcastHashJoinExec)) => true
@@ -1303,13 +1279,8 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
       }
 
       // Test shuffled hash join
-      withSQLConf(SQLConf.PREFER_SORTMERGEJOIN.key -> "false",
-        // Set broadcast join threshold and number of shuffle partitions,
-        // as shuffled hash join depends on these two configs.
-        SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "50",
-        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false",
-        SQLConf.SHUFFLE_PARTITIONS.key -> "2") {
-        val shjCodegenDF = df1.join(df2, $"k1" === $"k2", joinType)
+      withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+        val shjCodegenDF = df1.join(df2.hint("SHUFFLE_HASH"), $"k1" === $"k2", joinType)
         assert(shjCodegenDF.queryExecution.executedPlan.collect {
           case WholeStageCodegenExec(_ : ShuffledHashJoinExec) => true
           case WholeStageCodegenExec(ProjectExec(_, _ : ShuffledHashJoinExec)) => true
@@ -1317,7 +1288,7 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
         checkAnswer(shjCodegenDF, Seq.empty)
 
         withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false") {
-          val shjNonCodegenDF = df1.join(df2, $"k1" === $"k2", joinType)
+          val shjNonCodegenDF = df1.join(df2.hint("SHUFFLE_HASH"), $"k1" === $"k2", joinType)
           assert(shjNonCodegenDF.queryExecution.executedPlan.collect {
             case _: ShuffledHashJoinExec => true }.size === 1)
           checkAnswer(shjNonCodegenDF, Seq.empty)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
index eb5643df4c752..71eaed269e6c2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
@@ -71,28 +71,25 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
   }
 
   test("ShuffledHashJoin should be included in WholeStageCodegen") {
-    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "30",
-        SQLConf.SHUFFLE_PARTITIONS.key -> "2",
-        SQLConf.PREFER_SORTMERGEJOIN.key -> "false") {
-      val df1 = spark.range(5).select($"id".as("k1"))
-      val df2 = spark.range(15).select($"id".as("k2"))
-      val df3 = spark.range(6).select($"id".as("k3"))
-
-      // test one shuffled hash join
-      val oneJoinDF = df1.join(df2, $"k1" === $"k2")
-      assert(oneJoinDF.queryExecution.executedPlan.collect {
-        case WholeStageCodegenExec(_ : ShuffledHashJoinExec) => true
-      }.size === 1)
-      checkAnswer(oneJoinDF, Seq(Row(0, 0), Row(1, 1), Row(2, 2), Row(3, 3), Row(4, 4)))
-
-      // test two shuffled hash joins
-      val twoJoinsDF = df1.join(df2, $"k1" === $"k2").join(df3, $"k1" === $"k3")
-      assert(twoJoinsDF.queryExecution.executedPlan.collect {
-        case WholeStageCodegenExec(_ : ShuffledHashJoinExec) => true
-      }.size === 2)
-      checkAnswer(twoJoinsDF,
-        Seq(Row(0, 0, 0), Row(1, 1, 1), Row(2, 2, 2), Row(3, 3, 3), Row(4, 4, 4)))
-    }
+    val df1 = spark.range(5).select($"id".as("k1"))
+    val df2 = spark.range(15).select($"id".as("k2"))
+    val df3 = spark.range(6).select($"id".as("k3"))
+
+    // test one shuffled hash join
+    val oneJoinDF = df1.join(df2.hint("SHUFFLE_HASH"), $"k1" === $"k2")
+    assert(oneJoinDF.queryExecution.executedPlan.collect {
+      case WholeStageCodegenExec(_ : ShuffledHashJoinExec) => true
+    }.size === 1)
+    checkAnswer(oneJoinDF, Seq(Row(0, 0), Row(1, 1), Row(2, 2), Row(3, 3), Row(4, 4)))
+
+    // test two shuffled hash joins
+    val twoJoinsDF = df1.join(df2.hint("SHUFFLE_HASH"), $"k1" === $"k2")
+      .join(df3.hint("SHUFFLE_HASH"), $"k1" === $"k3")
+    assert(twoJoinsDF.queryExecution.executedPlan.collect {
+      case WholeStageCodegenExec(_ : ShuffledHashJoinExec) => true
+    }.size === 2)
+    checkAnswer(twoJoinsDF,
+      Seq(Row(0, 0, 0), Row(1, 1, 1), Row(2, 2, 2), Row(3, 3, 3), Row(4, 4, 4)))
   }
 
   test("Sort should be included in WholeStageCodegen") {

From 0f8e5dd445b03161a27893ba714db57919d8bcab Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Fri, 8 Jan 2021 09:05:22 +0000
Subject: [PATCH 1003/1009] [SPARK-34003][SQL] Fix Rule conflicts between
 PaddingAndLengthCheckForCharVarchar and ResolveAggregateFunctions

### What changes were proposed in this pull request?

ResolveAggregateFunctions is a hacky rule and it calls `executeSameContext` to generate a `resolved agg` to determine which unresolved sort attribute should be pushed into the agg. However, after we add the PaddingAndLengthCheckForCharVarchar rule which will rewrite the query output, thus, the `resolved agg` cannot match original attributes anymore.

It causes some dissociative sort attribute to be pushed in and fails the query

``` logtalk
[info]   Failed to analyze query: org.apache.spark.sql.AnalysisException: expression 'testcat.t1.`v`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.;
[info]   Project [v#14, sum(i)#11L]
[info]   +- Sort [aggOrder#12 ASC NULLS FIRST], true
[info]      +- !Aggregate [v#14], [v#14, sum(cast(i#7 as bigint)) AS sum(i)#11L, v#13 AS aggOrder#12]
[info]         +- SubqueryAlias testcat.t1
[info]            +- Project [if ((length(v#6) <= 3)) v#6 else if ((length(rtrim(v#6, None)) > 3)) cast(raise_error(concat(input string of length , cast(length(v#6) as string),  exceeds varchar type length limitation: 3)) as string) else rpad(rtrim(v#6, None), 3,  ) AS v#14, i#7]
[info]               +- RelationV2[v#6, i#7, index#15, _partition#16] testcat.t1
[info]
[info]   Project [v#14, sum(i)#11L]
[info]   +- Sort [aggOrder#12 ASC NULLS FIRST], true
[info]      +- !Aggregate [v#14], [v#14, sum(cast(i#7 as bigint)) AS sum(i)#11L, v#13 AS aggOrder#12]
[info]         +- SubqueryAlias testcat.t1
[info]            +- Project [if ((length(v#6) <= 3)) v#6 else if ((length(rtrim(v#6, None)) > 3)) cast(raise_error(concat(input string of length , cast(length(v#6) as string),  exceeds varchar type length limitation: 3)) as string) else rpad(rtrim(v#6, None), 3,  ) AS v#14, i#7]
[info]               +- RelationV2[v#6, i#7, index#15, _partition#16] testcat.t1
```

### Why are the changes needed?

bugfix
### Does this PR introduce _any_ user-facing change?

no
### How was this patch tested?

new tests

Closes #31027 from yaooqinn/SPARK-34003.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/analysis/Analyzer.scala     | 14 ++++++++++----
 .../apache/spark/sql/CharVarcharTestSuite.scala    |  8 ++++++++
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 883ff46148ca6..bf5dbb8200e87 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -2400,16 +2400,22 @@ class Analyzer(override val catalogManager: CatalogManager)
           // to push down this ordering expression and can reference the original aggregate
           // expression instead.
           val needsPushDown = ArrayBuffer.empty[NamedExpression]
-          val evaluatedOrderings = resolvedAliasedOrdering.zip(unresolvedSortOrders).map {
-            case (evaluated, order) =>
+          val orderToAlias = unresolvedSortOrders.zip(aliasedOrdering)
+          val evaluatedOrderings = resolvedAliasedOrdering.zip(orderToAlias).map {
+            case (evaluated, (order, aliasOrder)) =>
               val index = originalAggExprs.indexWhere {
                 case Alias(child, _) => child semanticEquals evaluated.child
                 case other => other semanticEquals evaluated.child
               }
 
               if (index == -1) {
-                needsPushDown += evaluated
-                order.copy(child = evaluated.toAttribute)
+                if (CharVarcharUtils.getRawType(evaluated.metadata).nonEmpty) {
+                  needsPushDown += aliasOrder
+                  order.copy(child = aliasOrder)
+                } else {
+                  needsPushDown += evaluated
+                  order.copy(child = evaluated.toAttribute)
+                }
               } else {
                 order.copy(child = originalAggExprs(index).toAttribute)
               }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
index d20cee0815d4d..fb35d6cf8dacb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
@@ -466,6 +466,14 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
         Row("c"))
     }
   }
+
+  test("SPARK-34003: fix char/varchar fails w/ both group by and order by ") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(v VARCHAR(3), i INT) USING $format")
+      sql("INSERT INTO t VALUES ('c', 1)")
+      checkAnswer(sql("SELECT v, sum(i) FROM t GROUP BY v ORDER BY v"), Row("c", 1))
+    }
+  }
 }
 
 // Some basic char/varchar tests which doesn't rely on table implementation.

From 71d261ab8fb3e7fb22d2687b8e038129ca766a65 Mon Sep 17 00:00:00 2001
From: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Date: Fri, 8 Jan 2021 20:04:56 +0900
Subject: [PATCH 1004/1009] [SPARK-34032][SS] Add truststore and keystore type
 config possibility for Kafka delegation token

### What changes were proposed in this pull request?
Kafka delegation token is obtained with `AdminClient` where security settings can be set. Keystore and trustrore type however can't be set. In this PR I've added these new configurations. This can be useful when the type is different. A good example is to make Spark FIPS compliant where the default JKS is not accepted.

### Why are the changes needed?
Missing configurations.

### Does this PR introduce _any_ user-facing change?
Yes, adding 2 additional config parameters.

### How was this patch tested?
Existing + modified unit tests + simple Kafka to Kafka app on cluster.

Closes #31070 from gaborgsomogyi/SPARK-34032.

Authored-by: Gabor Somogyi <gabor.g.somogyi@gmail.com>
Signed-off-by: Jungtaek Lim (HeartSaVioR) <kabhwan.opensource@gmail.com>
---
 docs/structured-streaming-kafka-integration.md  | 17 +++++++++++++++++
 .../spark/kafka010/KafkaTokenSparkConf.scala    |  6 ++++++
 .../apache/spark/kafka010/KafkaTokenUtil.scala  |  6 ++++++
 .../kafka010/KafkaDelegationTokenTest.scala     |  4 ++++
 .../kafka010/KafkaTokenSparkConfSuite.scala     | 10 ++++++++++
 .../spark/kafka010/KafkaTokenUtilSuite.scala    |  6 ++++++
 6 files changed, 49 insertions(+)

diff --git a/docs/structured-streaming-kafka-integration.md b/docs/structured-streaming-kafka-integration.md
index 5336695478c14..bf25d46f2e7e0 100644
--- a/docs/structured-streaming-kafka-integration.md
+++ b/docs/structured-streaming-kafka-integration.md
@@ -1004,6 +1004,14 @@ Delegation tokens can be obtained from multiple clusters and <code>${cluster}</c
     </td>
     <td>3.0.0</td>
   </tr>
+  <tr>
+    <td><code>spark.kafka.clusters.${cluster}.ssl.truststore.type</code></td>
+    <td>None</td>
+    <td>
+      The file format of the trust store file. For further details please see Kafka documentation. Only used to obtain delegation token.
+    </td>
+    <td>3.2.0</td>
+  </tr>
   <tr>
     <td><code>spark.kafka.clusters.${cluster}.ssl.truststore.location</code></td>
     <td>None</td>
@@ -1021,6 +1029,15 @@ Delegation tokens can be obtained from multiple clusters and <code>${cluster}</c
     </td>
     <td>3.0.0</td>
   </tr>
+  <tr>
+    <td><code>spark.kafka.clusters.${cluster}.ssl.keystore.type</code></td>
+    <td>None</td>
+    <td>
+      The file format of the key store file. This is optional for client.
+      For further details please see Kafka documentation. Only used to obtain delegation token.
+    </td>
+    <td>3.2.0</td>
+  </tr>
   <tr>
     <td><code>spark.kafka.clusters.${cluster}.ssl.keystore.location</code></td>
     <td>None</td>
diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenSparkConf.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenSparkConf.scala
index ed4a6f1e34c55..21ba7b21ed9d6 100644
--- a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenSparkConf.scala
+++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenSparkConf.scala
@@ -31,8 +31,10 @@ private[spark] case class KafkaTokenClusterConf(
     targetServersRegex: String,
     securityProtocol: String,
     kerberosServiceName: String,
+    trustStoreType: Option[String],
     trustStoreLocation: Option[String],
     trustStorePassword: Option[String],
+    keyStoreType: Option[String],
     keyStoreLocation: Option[String],
     keyStorePassword: Option[String],
     keyPassword: Option[String],
@@ -44,8 +46,10 @@ private[spark] case class KafkaTokenClusterConf(
     s"targetServersRegex=$targetServersRegex, " +
     s"securityProtocol=$securityProtocol, " +
     s"kerberosServiceName=$kerberosServiceName, " +
+    s"trustStoreType=$trustStoreType, " +
     s"trustStoreLocation=$trustStoreLocation, " +
     s"trustStorePassword=${trustStorePassword.map(_ => REDACTION_REPLACEMENT_TEXT)}, " +
+    s"keyStoreType=$keyStoreType, " +
     s"keyStoreLocation=$keyStoreLocation, " +
     s"keyStorePassword=${keyStorePassword.map(_ => REDACTION_REPLACEMENT_TEXT)}, " +
     s"keyPassword=${keyPassword.map(_ => REDACTION_REPLACEMENT_TEXT)}, " +
@@ -77,8 +81,10 @@ private [kafka010] object KafkaTokenSparkConf extends Logging {
         DEFAULT_SECURITY_PROTOCOL_CONFIG),
       sparkClusterConf.getOrElse(SaslConfigs.SASL_KERBEROS_SERVICE_NAME,
         KafkaTokenSparkConf.DEFAULT_SASL_KERBEROS_SERVICE_NAME),
+      sparkClusterConf.get(SslConfigs.SSL_TRUSTSTORE_TYPE_CONFIG),
       sparkClusterConf.get(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG),
       sparkClusterConf.get(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG),
+      sparkClusterConf.get(SslConfigs.SSL_KEYSTORE_TYPE_CONFIG),
       sparkClusterConf.get(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG),
       sparkClusterConf.get(SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG),
       sparkClusterConf.get(SslConfigs.SSL_KEY_PASSWORD_CONFIG),
diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
index f3f6b4de6f79c..a182d3c30858e 100644
--- a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
+++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
@@ -162,6 +162,9 @@ private[spark] object KafkaTokenUtil extends Logging {
   private def setTrustStoreProperties(
       clusterConf: KafkaTokenClusterConf,
       properties: ju.Properties): Unit = {
+    clusterConf.trustStoreType.foreach { truststoreType =>
+      properties.put(SslConfigs.SSL_TRUSTSTORE_TYPE_CONFIG, truststoreType)
+    }
     clusterConf.trustStoreLocation.foreach { truststoreLocation =>
       properties.put(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG, truststoreLocation)
     }
@@ -173,6 +176,9 @@ private[spark] object KafkaTokenUtil extends Logging {
   private def setKeyStoreProperties(
       clusterConf: KafkaTokenClusterConf,
       properties: ju.Properties): Unit = {
+    clusterConf.keyStoreType.foreach { keystoreType =>
+      properties.put(SslConfigs.SSL_KEYSTORE_TYPE_CONFIG, keystoreType)
+    }
     clusterConf.keyStoreLocation.foreach { keystoreLocation =>
       properties.put(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG, keystoreLocation)
     }
diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala
index 19335f4221e40..8271acdc7dfb6 100644
--- a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala
+++ b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala
@@ -51,8 +51,10 @@ trait KafkaDelegationTokenTest extends BeforeAndAfterEach {
   protected val bootStrapServers = "127.0.0.1:0"
   protected val matchingTargetServersRegex = "127.0.0.*:0"
   protected val nonMatchingTargetServersRegex = "127.0.intentionally_non_matching.*:0"
+  protected val trustStoreType = "customTrustStoreType"
   protected val trustStoreLocation = "/path/to/trustStore"
   protected val trustStorePassword = "trustStoreSecret"
+  protected val keyStoreType = "customKeyStoreType"
   protected val keyStoreLocation = "/path/to/keyStore"
   protected val keyStorePassword = "keyStoreSecret"
   protected val keyPassword = "keySecret"
@@ -124,8 +126,10 @@ trait KafkaDelegationTokenTest extends BeforeAndAfterEach {
       KafkaTokenSparkConf.DEFAULT_TARGET_SERVERS_REGEX,
       securityProtocol,
       KafkaTokenSparkConf.DEFAULT_SASL_KERBEROS_SERVICE_NAME,
+      Some(trustStoreType),
       Some(trustStoreLocation),
       Some(trustStorePassword),
+      Some(keyStoreType),
       Some(keyStoreLocation),
       Some(keyStorePassword),
       Some(keyPassword),
diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenSparkConfSuite.scala b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenSparkConfSuite.scala
index 61184a6fac33d..17caf96818e47 100644
--- a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenSparkConfSuite.scala
+++ b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenSparkConfSuite.scala
@@ -29,8 +29,10 @@ class KafkaTokenSparkConfSuite extends SparkFunSuite with BeforeAndAfterEach {
   private val targetServersRegex = "127.0.0.*:0"
   private val securityProtocol = SSL.name
   private val kerberosServiceName = "kafka1"
+  private val trustStoreType = "customTrustStoreType"
   private val trustStoreLocation = "/path/to/trustStore"
   private val trustStorePassword = "trustStoreSecret"
+  private val keyStoreType = "customKeyStoreType"
   private val keyStoreLocation = "/path/to/keyStore"
   private val keyStorePassword = "keyStoreSecret"
   private val keyPassword = "keySecret"
@@ -60,8 +62,10 @@ class KafkaTokenSparkConfSuite extends SparkFunSuite with BeforeAndAfterEach {
     assert(clusterConfig.securityProtocol === SASL_SSL.name)
     assert(clusterConfig.kerberosServiceName ===
       KafkaTokenSparkConf.DEFAULT_SASL_KERBEROS_SERVICE_NAME)
+    assert(clusterConfig.trustStoreType === None)
     assert(clusterConfig.trustStoreLocation === None)
     assert(clusterConfig.trustStorePassword === None)
+    assert(clusterConfig.keyStoreType === None)
     assert(clusterConfig.keyStoreLocation === None)
     assert(clusterConfig.keyStorePassword === None)
     assert(clusterConfig.keyPassword === None)
@@ -75,8 +79,10 @@ class KafkaTokenSparkConfSuite extends SparkFunSuite with BeforeAndAfterEach {
     sparkConf.set(s"spark.kafka.clusters.$identifier1.security.protocol", securityProtocol)
     sparkConf.set(s"spark.kafka.clusters.$identifier1.sasl.kerberos.service.name",
       kerberosServiceName)
+    sparkConf.set(s"spark.kafka.clusters.$identifier1.ssl.truststore.type", trustStoreType)
     sparkConf.set(s"spark.kafka.clusters.$identifier1.ssl.truststore.location", trustStoreLocation)
     sparkConf.set(s"spark.kafka.clusters.$identifier1.ssl.truststore.password", trustStorePassword)
+    sparkConf.set(s"spark.kafka.clusters.$identifier1.ssl.keystore.type", keyStoreType)
     sparkConf.set(s"spark.kafka.clusters.$identifier1.ssl.keystore.location", keyStoreLocation)
     sparkConf.set(s"spark.kafka.clusters.$identifier1.ssl.keystore.password", keyStorePassword)
     sparkConf.set(s"spark.kafka.clusters.$identifier1.ssl.key.password", keyPassword)
@@ -88,8 +94,10 @@ class KafkaTokenSparkConfSuite extends SparkFunSuite with BeforeAndAfterEach {
     assert(clusterConfig.targetServersRegex === targetServersRegex)
     assert(clusterConfig.securityProtocol === securityProtocol)
     assert(clusterConfig.kerberosServiceName === kerberosServiceName)
+    assert(clusterConfig.trustStoreType === Some(trustStoreType))
     assert(clusterConfig.trustStoreLocation === Some(trustStoreLocation))
     assert(clusterConfig.trustStorePassword === Some(trustStorePassword))
+    assert(clusterConfig.keyStoreType === Some(keyStoreType))
     assert(clusterConfig.keyStoreLocation === Some(keyStoreLocation))
     assert(clusterConfig.keyStorePassword === Some(keyStorePassword))
     assert(clusterConfig.keyPassword === Some(keyPassword))
@@ -127,8 +135,10 @@ class KafkaTokenSparkConfSuite extends SparkFunSuite with BeforeAndAfterEach {
       assert(clusterConfig.securityProtocol === SASL_SSL.name)
       assert(clusterConfig.kerberosServiceName ===
         KafkaTokenSparkConf.DEFAULT_SASL_KERBEROS_SERVICE_NAME)
+      assert(clusterConfig.trustStoreType === None)
       assert(clusterConfig.trustStoreLocation === None)
       assert(clusterConfig.trustStorePassword === None)
+      assert(clusterConfig.keyStoreType === None)
       assert(clusterConfig.keyStoreLocation === None)
       assert(clusterConfig.keyStorePassword === None)
       assert(clusterConfig.keyPassword === None)
diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala
index 94f7853003bd9..ca34e14f2c261 100644
--- a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala
+++ b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala
@@ -64,8 +64,10 @@ class KafkaTokenUtilSuite extends SparkFunSuite with KafkaDelegationTokenTest {
       === bootStrapServers)
     assert(adminClientProperties.get(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG)
       === SASL_PLAINTEXT.name)
+    assert(!adminClientProperties.containsKey(SslConfigs.SSL_TRUSTSTORE_TYPE_CONFIG))
     assert(!adminClientProperties.containsKey(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG))
     assert(!adminClientProperties.containsKey(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG))
+    assert(!adminClientProperties.containsKey(SslConfigs.SSL_KEYSTORE_TYPE_CONFIG))
     assert(!adminClientProperties.containsKey(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG))
     assert(!adminClientProperties.containsKey(SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG))
     assert(!adminClientProperties.containsKey(SslConfigs.SSL_KEY_PASSWORD_CONFIG))
@@ -80,10 +82,12 @@ class KafkaTokenUtilSuite extends SparkFunSuite with KafkaDelegationTokenTest {
       === bootStrapServers)
     assert(adminClientProperties.get(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG)
       === SASL_SSL.name)
+    assert(adminClientProperties.get(SslConfigs.SSL_TRUSTSTORE_TYPE_CONFIG) === trustStoreType)
     assert(adminClientProperties.get(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG)
       === trustStoreLocation)
     assert(adminClientProperties.get(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG)
       === trustStorePassword)
+    assert(!adminClientProperties.containsKey(SslConfigs.SSL_KEYSTORE_TYPE_CONFIG))
     assert(!adminClientProperties.containsKey(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG))
     assert(!adminClientProperties.containsKey(SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG))
     assert(!adminClientProperties.containsKey(SslConfigs.SSL_KEY_PASSWORD_CONFIG))
@@ -99,10 +103,12 @@ class KafkaTokenUtilSuite extends SparkFunSuite with KafkaDelegationTokenTest {
       === bootStrapServers)
     assert(adminClientProperties.get(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG)
       === SSL.name)
+    assert(adminClientProperties.get(SslConfigs.SSL_TRUSTSTORE_TYPE_CONFIG) === trustStoreType)
     assert(adminClientProperties.get(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG)
       === trustStoreLocation)
     assert(adminClientProperties.get(SslConfigs.SSL_TRUSTSTORE_PASSWORD_CONFIG)
       === trustStorePassword)
+    assert(adminClientProperties.get(SslConfigs.SSL_KEYSTORE_TYPE_CONFIG) === keyStoreType)
     assert(adminClientProperties.get(SslConfigs.SSL_KEYSTORE_LOCATION_CONFIG) === keyStoreLocation)
     assert(adminClientProperties.get(SslConfigs.SSL_KEYSTORE_PASSWORD_CONFIG) === keyStorePassword)
     assert(adminClientProperties.get(SslConfigs.SSL_KEY_PASSWORD_CONFIG) === keyPassword)

From 157b72ac9fa0057d5fd6d7ed52a6c4b22ebd1dfc Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Fri, 8 Jan 2021 14:14:27 +0000
Subject: [PATCH 1005/1009] [SPARK-33591][SQL] Recognize `null` in partition
 spec values

### What changes were proposed in this pull request?
1. Recognize `null` while parsing partition specs, and put `null` instead of `"null"` as partition values.
2. For V1 catalog: replace `null` by `__HIVE_DEFAULT_PARTITION__`.
3. For V2 catalogs: pass `null` AS IS, and let catalog implementations to decide how to handle `null`s as partition values in spec.

### Why are the changes needed?
Currently, `null` in partition specs is recognized as the `"null"` string which could lead to incorrect results, for example:
```sql
spark-sql> CREATE TABLE tbl5 (col1 INT, p1 STRING) USING PARQUET PARTITIONED BY (p1);
spark-sql> INSERT INTO TABLE tbl5 PARTITION (p1 = null) SELECT 0;
spark-sql> SELECT isnull(p1) FROM tbl5;
false
```
Even we inserted a row to the partition with the `null` value, **the resulted table doesn't contain `null`**.

### Does this PR introduce _any_ user-facing change?
Yes. After the changes, the example above works as expected:
```sql
spark-sql> SELECT isnull(p1) FROM tbl5;
true
```

### How was this patch tested?
1. By running the affected test suites `SQLQuerySuite`, `AlterTablePartitionV2SQLSuite` and `v1/ShowPartitionsSuite`.
2. Compiling by Scala 2.13:
```
$  ./dev/change-scala-version.sh 2.13
$ ./build/sbt -Pscala-2.13 compile
```

Closes #30538 from MaxGekk/partition-spec-value-null.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalog/ExternalCatalogUtils.scala        | 10 ++++++
 .../catalyst/catalog/InMemoryCatalog.scala    |  7 +++-
 .../sql/catalyst/catalog/SessionCatalog.scala |  2 +-
 .../sql/catalyst/parser/AstBuilder.scala      |  1 +
 .../sql/execution/datasources/rules.scala     |  3 +-
 .../org/apache/spark/sql/SQLQuerySuite.scala  |  9 +++++
 .../AlterTableDropPartitionSuiteBase.scala    | 11 +++++++
 .../v1/AlterTableDropPartitionSuite.scala     |  1 +
 .../command/v1/ShowPartitionsSuite.scala      | 12 +++++++
 .../v2/AlterTableDropPartitionSuite.scala     |  2 +-
 .../spark/sql/hive/HiveExternalCatalog.scala  | 33 ++++++++++---------
 .../hive/execution/InsertIntoHiveTable.scala  |  2 ++
 12 files changed, 74 insertions(+), 19 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
index 00445a1614257..9d6e0a6d6ce66 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
@@ -161,6 +161,10 @@ object ExternalCatalogUtils {
     }
   }
 
+  private def isNullPartitionValue(value: String): Boolean = {
+    value == null || value == DEFAULT_PARTITION_NAME
+  }
+
   /**
    * Returns true if `spec1` is a partial partition spec w.r.t. `spec2`, e.g. PARTITION (a=1) is a
    * partial partition spec w.r.t. PARTITION (a=1,b=2).
@@ -169,9 +173,15 @@ object ExternalCatalogUtils {
       spec1: TablePartitionSpec,
       spec2: TablePartitionSpec): Boolean = {
     spec1.forall {
+      case (partitionColumn, value) if isNullPartitionValue(value) =>
+        isNullPartitionValue(spec2(partitionColumn))
       case (partitionColumn, value) => spec2(partitionColumn) == value
     }
   }
+
+  def convertNullPartitionValues(spec: TablePartitionSpec): TablePartitionSpec = {
+    spec.mapValues(v => if (v == null) DEFAULT_PARTITION_NAME else v).toMap
+  }
 }
 
 object CatalogUtils {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
index 64b4a112fe786..0d16f46d049a9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
@@ -541,7 +541,12 @@ class InMemoryCatalog(
 
     listPartitions(db, table, partialSpec).map { partition =>
       partitionColumnNames.map { name =>
-        escapePathName(name) + "=" + escapePathName(partition.spec(name))
+        val partValue = if (partition.spec(name) == null) {
+          DEFAULT_PARTITION_NAME
+        } else {
+          escapePathName(partition.spec(name))
+        }
+        escapePathName(name) + "=" + partValue
       }.mkString("/")
     }.sorted
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 76358ef116cec..0428d12b7ced8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -1178,7 +1178,7 @@ class SessionCatalog(
    */
   private def requireNonEmptyValueInPartitionSpec(specs: Seq[TablePartitionSpec]): Unit = {
     specs.foreach { s =>
-      if (s.values.exists(_.isEmpty)) {
+      if (s.values.exists(v => v != null && v.isEmpty)) {
         val spec = s.map(p => p._1 + "=" + p._2).mkString("[", ", ", "]")
         throw QueryCompilationErrors.invalidPartitionSpecError(
           s"The spec ($spec) contains an empty partition column value")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 395a9563cdc0a..4d028f6ce3569 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -511,6 +511,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg
    */
   protected def visitStringConstant(ctx: ConstantContext): String = withOrigin(ctx) {
     ctx match {
+      case _: NullLiteralContext => null
       case s: StringLiteralContext => createString(s)
       case o => o.getText
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index b9866e415c9b1..4fd6684b3b921 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -406,7 +406,8 @@ object PreprocessTableInsertion extends Rule[LogicalPlan] {
       catalogTable.get.tracksPartitionsInCatalog
     if (partitionsTrackedByCatalog && normalizedPartSpec.nonEmpty) {
       // empty partition column value
-      if (normalizedPartSpec.filter(_._2.isDefined).exists(_._2.get.isEmpty)) {
+      if (normalizedPartSpec.map(_._2)
+          .filter(_.isDefined).map(_.get).exists(v => v != null && v.isEmpty)) {
         val spec = normalizedPartSpec.map(p => p._1 + "=" + p._2).mkString("[", ", ", "]")
         throw new AnalysisException(
           s"Partition spec is invalid. The spec ($spec) contains an empty partition column value")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 3f55a88f19505..7526bf0e6fbe9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -3854,6 +3854,15 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
 
     assert(unions.size == 1)
   }
+
+  test("SPARK-33591: null as a partition value") {
+    val t = "part_table"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (col1 INT, p1 STRING) USING PARQUET PARTITIONED BY (p1)")
+      sql(s"INSERT INTO TABLE $t PARTITION (p1 = null) SELECT 0")
+      checkAnswer(sql(s"SELECT * FROM $t"), Row(0, null))
+    }
+  }
 }
 
 case class Foo(bar: Option[String])
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
index aadcda490b82b..942a3e8635698 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
@@ -39,6 +39,7 @@ trait AlterTableDropPartitionSuiteBase extends QueryTest with DDLCommandTestUtil
   override val command = "ALTER TABLE .. DROP PARTITION"
 
   protected def notFullPartitionSpecErr: String
+  protected def nullPartitionValue: String
 
   protected def checkDropPartition(
       t: String,
@@ -170,4 +171,14 @@ trait AlterTableDropPartitionSuiteBase extends QueryTest with DDLCommandTestUtil
       QueryTest.checkAnswer(sql(s"SELECT * FROM $t"), Seq(Row(1, 1)))
     }
   }
+
+  test("SPARK-33591: null as a partition value") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (col1 INT, p1 STRING) $defaultUsing PARTITIONED BY (p1)")
+      sql(s"ALTER TABLE $t ADD PARTITION (p1 = null)")
+      checkPartitions(t, Map("p1" -> nullPartitionValue))
+      sql(s"ALTER TABLE $t DROP PARTITION (p1 = null)")
+      checkPartitions(t)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
index a6490ebdb950c..509c0be28c26a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.execution.command
  */
 trait AlterTableDropPartitionSuiteBase extends command.AlterTableDropPartitionSuiteBase {
   override protected val notFullPartitionSpecErr = "The following partitions not found in table"
+  override protected def nullPartitionValue: String = "__HIVE_DEFAULT_PARTITION__"
 
   test("purge partition data") {
     withNamespaceAndTable("ns", "tbl") { t =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
index e85d62c51ef45..a26e29706e147 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
@@ -69,6 +69,18 @@ trait ShowPartitionsSuiteBase extends command.ShowPartitionsSuiteBase {
       assert(errMsg.contains("'SHOW PARTITIONS' expects a table"))
     }
   }
+
+  test("SPARK-33591: null as a partition value") {
+    val t = "part_table"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (col1 INT, p1 STRING) $defaultUsing PARTITIONED BY (p1)")
+      sql(s"INSERT INTO TABLE $t PARTITION (p1 = null) SELECT 0")
+      checkAnswer(sql(s"SHOW PARTITIONS $t"), Row("p1=__HIVE_DEFAULT_PARTITION__"))
+      checkAnswer(
+        sql(s"SHOW PARTITIONS $t PARTITION (p1 = null)"),
+        Row("p1=__HIVE_DEFAULT_PARTITION__"))
+    }
+  }
 }
 
 /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala
index d6890d6faef70..3515fa3390206 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala
@@ -27,8 +27,8 @@ import org.apache.spark.sql.execution.command
 class AlterTableDropPartitionSuite
   extends command.AlterTableDropPartitionSuiteBase
   with CommandSuiteBase {
-
   override protected val notFullPartitionSpecErr = "Partition spec is invalid"
+  override protected def nullPartitionValue: String = "null"
 
   test("SPARK-33650: drop partition into a table which doesn't support partition management") {
     withNamespaceAndTable("ns", "tbl", s"non_part_$catalog") { t =>
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index b4aa073893df8..eeffe4f25d4c6 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -942,9 +942,10 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
   // Hive metastore is not case preserving and the partition columns are always lower cased. We need
   // to lower case the column names in partition specification before calling partition related Hive
   // APIs, to match this behaviour.
-  private def lowerCasePartitionSpec(spec: TablePartitionSpec): TablePartitionSpec = {
+  private def toMetaStorePartitionSpec(spec: TablePartitionSpec): TablePartitionSpec = {
     // scalastyle:off caselocale
-    spec.map { case (k, v) => k.toLowerCase -> v }
+    val lowNames = spec.map { case (k, v) => k.toLowerCase -> v }
+    ExternalCatalogUtils.convertNullPartitionValues(lowNames)
     // scalastyle:on caselocale
   }
 
@@ -993,8 +994,9 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       }
       p.copy(storage = p.storage.copy(locationUri = Some(partitionPath.toUri)))
     }
-    val lowerCasedParts = partsWithLocation.map(p => p.copy(spec = lowerCasePartitionSpec(p.spec)))
-    client.createPartitions(db, table, lowerCasedParts, ignoreIfExists)
+    val metaStoreParts = partsWithLocation
+      .map(p => p.copy(spec = toMetaStorePartitionSpec(p.spec)))
+    client.createPartitions(db, table, metaStoreParts, ignoreIfExists)
   }
 
   override def dropPartitions(
@@ -1006,7 +1008,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       retainData: Boolean): Unit = withClient {
     requireTableExists(db, table)
     client.dropPartitions(
-      db, table, parts.map(lowerCasePartitionSpec), ignoreIfNotExists, purge, retainData)
+      db, table, parts.map(toMetaStorePartitionSpec), ignoreIfNotExists, purge, retainData)
   }
 
   override def renamePartitions(
@@ -1015,7 +1017,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       specs: Seq[TablePartitionSpec],
       newSpecs: Seq[TablePartitionSpec]): Unit = withClient {
     client.renamePartitions(
-      db, table, specs.map(lowerCasePartitionSpec), newSpecs.map(lowerCasePartitionSpec))
+      db, table, specs.map(toMetaStorePartitionSpec), newSpecs.map(toMetaStorePartitionSpec))
 
     val tableMeta = getTable(db, table)
     val partitionColumnNames = tableMeta.partitionColumnNames
@@ -1031,7 +1033,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       val fs = tablePath.getFileSystem(hadoopConf)
       val newParts = newSpecs.map { spec =>
         val rightPath = renamePartitionDirectory(fs, tablePath, partitionColumnNames, spec)
-        val partition = client.getPartition(db, table, lowerCasePartitionSpec(spec))
+        val partition = client.getPartition(db, table, toMetaStorePartitionSpec(spec))
         partition.copy(storage = partition.storage.copy(locationUri = Some(rightPath.toUri)))
       }
       alterPartitions(db, table, newParts)
@@ -1141,12 +1143,12 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       db: String,
       table: String,
       newParts: Seq[CatalogTablePartition]): Unit = withClient {
-    val lowerCasedParts = newParts.map(p => p.copy(spec = lowerCasePartitionSpec(p.spec)))
+    val metaStoreParts = newParts.map(p => p.copy(spec = toMetaStorePartitionSpec(p.spec)))
 
     val rawTable = getRawTable(db, table)
 
     // convert partition statistics to properties so that we can persist them through hive api
-    val withStatsProps = lowerCasedParts.map { p =>
+    val withStatsProps = metaStoreParts.map { p =>
       if (p.stats.isDefined) {
         val statsProperties = statsToProperties(p.stats.get)
         p.copy(parameters = p.parameters ++ statsProperties)
@@ -1162,7 +1164,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       db: String,
       table: String,
       spec: TablePartitionSpec): CatalogTablePartition = withClient {
-    val part = client.getPartition(db, table, lowerCasePartitionSpec(spec))
+    val part = client.getPartition(db, table, toMetaStorePartitionSpec(spec))
     restorePartitionMetadata(part, getTable(db, table))
   }
 
@@ -1200,7 +1202,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       db: String,
       table: String,
       spec: TablePartitionSpec): Option[CatalogTablePartition] = withClient {
-    client.getPartitionOption(db, table, lowerCasePartitionSpec(spec)).map { part =>
+    client.getPartitionOption(db, table, toMetaStorePartitionSpec(spec)).map { part =>
       restorePartitionMetadata(part, getTable(db, table))
     }
   }
@@ -1215,7 +1217,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     val catalogTable = getTable(db, table)
     val partColNameMap = buildLowerCasePartColNameMap(catalogTable).mapValues(escapePathName)
     val clientPartitionNames =
-      client.getPartitionNames(catalogTable, partialSpec.map(lowerCasePartitionSpec))
+      client.getPartitionNames(catalogTable, partialSpec.map(toMetaStorePartitionSpec))
     clientPartitionNames.map { partitionPath =>
       val partSpec = PartitioningUtils.parsePathFragmentAsSeq(partitionPath)
       partSpec.map { case (partName, partValue) =>
@@ -1234,11 +1236,12 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       table: String,
       partialSpec: Option[TablePartitionSpec] = None): Seq[CatalogTablePartition] = withClient {
     val partColNameMap = buildLowerCasePartColNameMap(getTable(db, table))
-    val res = client.getPartitions(db, table, partialSpec.map(lowerCasePartitionSpec)).map { part =>
-      part.copy(spec = restorePartitionSpec(part.spec, partColNameMap))
+    val metaStoreSpec = partialSpec.map(toMetaStorePartitionSpec)
+    val res = client.getPartitions(db, table, metaStoreSpec)
+      .map { part => part.copy(spec = restorePartitionSpec(part.spec, partColNameMap))
     }
 
-    partialSpec match {
+    metaStoreSpec match {
       // This might be a bug of Hive: When the partition value inside the partial partition spec
       // contains dot, and we ask Hive to list partitions w.r.t. the partial partition spec, Hive
       // treats dot as matching any single character and may return more partitions than we
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index 63e46880376e1..bfb24cfedb55a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -133,6 +133,7 @@ case class InsertIntoHiveTable(
     val numDynamicPartitions = partition.values.count(_.isEmpty)
     val numStaticPartitions = partition.values.count(_.nonEmpty)
     val partitionSpec = partition.map {
+      case (key, Some(null)) => key -> ExternalCatalogUtils.DEFAULT_PARTITION_NAME
       case (key, Some(value)) => key -> value
       case (key, None) => key -> ""
     }
@@ -229,6 +230,7 @@ case class InsertIntoHiveTable(
             val caseInsensitiveDpMap = CaseInsensitiveMap(dpMap)
 
             val updatedPartitionSpec = partition.map {
+              case (key, Some(null)) => key -> ExternalCatalogUtils.DEFAULT_PARTITION_NAME
               case (key, Some(value)) => key -> value
               case (key, None) if caseInsensitiveDpMap.contains(key) =>
                 key -> caseInsensitiveDpMap(key)

From 023eba2ad72f5119350c6c797808dadcfd1eaa19 Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Fri, 8 Jan 2021 09:43:11 -0600
Subject: [PATCH 1006/1009] [SPARK-33796][DOCS][FOLLOWUP] Tweak the width of
 left-menu of Spark SQL Guide

### What changes were proposed in this pull request?

This PR tweaks the width of left-menu of Spark SQL Guide.
When I view the Spark SQL Guide with browsers on macOS, the title `Spark SQL Guide` looks prettily.
But I often use Pop!_OS, an Ubuntu variant, and the title is overlapped with browsers on it.
![spark-sql-guide-layout-before](https://user-images.githubusercontent.com/4736016/104002743-d56cc200-51e4-11eb-9e3a-28abcd46e0bf.png)

After this change, the title is no longer overlapped.
![spark-sql-guide-layout-after](https://user-images.githubusercontent.com/4736016/104002847-f9c89e80-51e4-11eb-85c0-01d69cee46b7.png)

### Why are the changes needed?

For the pretty layout.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Built the document with `cd docs && SKIP_API=1 jekyll build` and confirmed the layout.

Closes #31091 from sarutak/modify-layout-sparksql-guide.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 docs/css/main.css | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/css/main.css b/docs/css/main.css
index 309ad7b3bdf0b..6710b6e8563c9 100755
--- a/docs/css/main.css
+++ b/docs/css/main.css
@@ -326,7 +326,7 @@ a.anchorjs-link:hover { text-decoration: none; }
   border-left-width: 0px;
   border-bottom-width: 0px;
   margin-top: 0px;
-  width: 210px;
+  width: 220px;
   height: 80%;
   float: left;
   position: fixed;

From 0781ed4f5b7f692656651f9bb51f823c82e24e2d Mon Sep 17 00:00:00 2001
From: Kousuke Saruta <sarutak@oss.nttdata.com>
Date: Fri, 8 Jan 2021 09:44:33 -0600
Subject: [PATCH 1007/1009] [MINOR][SQL][TESTS] Fix the incorrect unicode
 escape test in ParserUtilsSuite

### What changes were proposed in this pull request?

This PR fixes an incorrect unicode literal test in `ParserUtilsSuite`.
In that suite, string literals in queries have unicode escape characters like `\u7328` but the backslash should be escaped because
the queriy strings are given as Java strings.

### Why are the changes needed?

Correct the test.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Run `ParserUtilsSuite` and it passed.

Closes #31088 from sarutak/fix-incorrect-unicode-test.

Authored-by: Kousuke Saruta <sarutak@oss.nttdata.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 .../apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala
index a4d1b5d5e6f29..5e7adaa7163fe 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala
@@ -94,11 +94,11 @@ class ParserUtilsSuite extends SparkFunSuite {
     assert(unescapeSQLString(""""\256"""") == "256")
 
     // String including a '\u0000' style literal characters (\u732B is a cat in Kanji).
-    assert(unescapeSQLString("\"How cute \u732B are\"")  == "How cute \u732B are")
+    assert(unescapeSQLString("\"How cute \\u732B are\"")  == "How cute \u732B are")
 
     // String including a surrogate pair character
     // (\uD867\uDE3D is Okhotsk atka mackerel in Kanji).
-    assert(unescapeSQLString("\"\uD867\uDE3D is a fish\"") == "\uD867\uDE3D is a fish")
+    assert(unescapeSQLString("\"\\uD867\\uDE3D is a fish\"") == "\uD867\uDE3D is a fish")
 
     // scalastyle:on nonascii
   }

From d00f0695b7513046e42e47f35b280d7aa494de5b Mon Sep 17 00:00:00 2001
From: Chandni Singh <singh.chandni@gmail.com>
Date: Fri, 8 Jan 2021 12:21:56 -0600
Subject: [PATCH 1008/1009] [SPARK-32917][SHUFFLE][CORE] Adds support for
 executors to push shuffle blocks after successful map task completion

### What changes were proposed in this pull request?
This is the shuffle writer side change where executors can push data to remote shuffle services. This is needed for push-based shuffle - SPIP [SPARK-30602](https://issues.apache.org/jira/browse/SPARK-30602).
Summary of changes:
- This adds support for executors to push shuffle blocks after map tasks complete writing shuffle data.
- This also introduces a timeout specifically for creating connection to remote shuffle services.

### Why are the changes needed?
- These changes are needed for push-based shuffle. Refer to the SPIP in [SPARK-30602](https://issues.apache.org/jira/browse/SPARK-30602).
- The main reason to create a separate connection creation timeout is because the existing `connectionTimeoutMs` is overloaded and is used for connection creation timeouts as well as connection idle timeout. The connection creation timeout should be much lower than the idle timeouts. The default for `connectionTimeoutMs` is 120s. This is quite high for just establishing the connections.  If a shuffle server node is bad then the connection creation will fail within few seconds. However, an overloaded shuffle server may take much longer to respond to a request and the channel can stay idle for a much longer time which is expected.  Another reason is that with push-based shuffle, an executor may be fetching shuffle data and pushing shuffle data (next stage) simultaneously. Both these tasks will share the same connections with the shuffle service. If there is a bad shuffle server node and the connection creation timeout is very high then both these tasks end up waiting a long time time eventually impacting the performance.

### Does this PR introduce _any_ user-facing change?
Yes. This PR introduces client-side configs for push-based shuffle. If push-based shuffle is turned-off then the users will not see any change.

### How was this patch tested?
Added unit tests.
The reference PR with the consolidated changes covering the complete implementation is also provided in [SPARK-30602](https://issues.apache.org/jira/browse/SPARK-30602).
We have already verified the functionality and the improved performance as documented in the SPIP doc.

Lead-authored-by: Min Shen mshenlinkedin.com
Co-authored-by: Chandni Singh chsinghlinkedin.com
Co-authored-by: Ye Zhou yezhoulinkedin.com

Closes #30312 from otterc/SPARK-32917.

Lead-authored-by: Chandni Singh <singh.chandni@gmail.com>
Co-authored-by: Chandni Singh <chsingh@linkedin.com>
Co-authored-by: Min Shen <mshen@linked.in.com>
Co-authored-by: Ye Zhou <yezhou@linkedin.com>
Signed-off-by: Mridul Muralidharan <mridul<at>gmail.com>
---
 .../client/TransportClientFactory.java        |   7 +-
 .../spark/network/util/TransportConf.java     |  13 +-
 .../sort/BypassMergeSortShuffleWriter.java    |   5 +-
 .../shuffle/sort/UnsafeShuffleWriter.java     |   7 +-
 .../org/apache/spark/executor/Executor.scala  |   3 +-
 .../spark/internal/config/package.scala       |  29 ++
 .../spark/shuffle/ShuffleBlockPusher.scala    | 450 ++++++++++++++++++
 .../spark/shuffle/ShuffleWriteProcessor.scala |  19 +-
 .../apache/spark/shuffle/ShuffleWriter.scala  |   3 +
 .../shuffle/sort/SortShuffleWriter.scala      |   6 +-
 .../org/apache/spark/storage/BlockId.scala    |  11 +-
 .../shuffle/ShuffleBlockPusherSuite.scala     | 355 ++++++++++++++
 12 files changed, 896 insertions(+), 12 deletions(-)
 create mode 100644 core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockPusher.scala
 create mode 100644 core/src/test/scala/org/apache/spark/shuffle/ShuffleBlockPusherSuite.scala

diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
index 24c436a504fa8..43408d43e577e 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
@@ -254,7 +254,7 @@ TransportClient createClient(InetSocketAddress address)
       // Disable Nagle's Algorithm since we don't want packets to wait
       .option(ChannelOption.TCP_NODELAY, true)
       .option(ChannelOption.SO_KEEPALIVE, true)
-      .option(ChannelOption.CONNECT_TIMEOUT_MILLIS, conf.connectionTimeoutMs())
+      .option(ChannelOption.CONNECT_TIMEOUT_MILLIS, conf.connectionCreationTimeoutMs())
       .option(ChannelOption.ALLOCATOR, pooledAllocator);
 
     if (conf.receiveBuf() > 0) {
@@ -280,9 +280,10 @@ public void initChannel(SocketChannel ch) {
     // Connect to the remote server
     long preConnect = System.nanoTime();
     ChannelFuture cf = bootstrap.connect(address);
-    if (!cf.await(conf.connectionTimeoutMs())) {
+    if (!cf.await(conf.connectionCreationTimeoutMs())) {
       throw new IOException(
-        String.format("Connecting to %s timed out (%s ms)", address, conf.connectionTimeoutMs()));
+        String.format("Connecting to %s timed out (%s ms)",
+          address, conf.connectionCreationTimeoutMs()));
     } else if (cf.cause() != null) {
       throw new IOException(String.format("Failed to connect to %s", address), cf.cause());
     }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
index d305dfa8e83cf..f051042a7adb4 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
@@ -19,6 +19,7 @@
 
 import java.util.Locale;
 import java.util.Properties;
+import java.util.concurrent.TimeUnit;
 
 import com.google.common.primitives.Ints;
 import io.netty.util.NettyRuntime;
@@ -31,6 +32,7 @@ public class TransportConf {
   private final String SPARK_NETWORK_IO_MODE_KEY;
   private final String SPARK_NETWORK_IO_PREFERDIRECTBUFS_KEY;
   private final String SPARK_NETWORK_IO_CONNECTIONTIMEOUT_KEY;
+  private final String SPARK_NETWORK_IO_CONNECTIONCREATIONTIMEOUT_KEY;
   private final String SPARK_NETWORK_IO_BACKLOG_KEY;
   private final String SPARK_NETWORK_IO_NUMCONNECTIONSPERPEER_KEY;
   private final String SPARK_NETWORK_IO_SERVERTHREADS_KEY;
@@ -54,6 +56,7 @@ public TransportConf(String module, ConfigProvider conf) {
     SPARK_NETWORK_IO_MODE_KEY = getConfKey("io.mode");
     SPARK_NETWORK_IO_PREFERDIRECTBUFS_KEY = getConfKey("io.preferDirectBufs");
     SPARK_NETWORK_IO_CONNECTIONTIMEOUT_KEY = getConfKey("io.connectionTimeout");
+    SPARK_NETWORK_IO_CONNECTIONCREATIONTIMEOUT_KEY = getConfKey("io.connectionCreationTimeout");
     SPARK_NETWORK_IO_BACKLOG_KEY = getConfKey("io.backLog");
     SPARK_NETWORK_IO_NUMCONNECTIONSPERPEER_KEY =  getConfKey("io.numConnectionsPerPeer");
     SPARK_NETWORK_IO_SERVERTHREADS_KEY = getConfKey("io.serverThreads");
@@ -94,7 +97,7 @@ public boolean preferDirectBufs() {
     return conf.getBoolean(SPARK_NETWORK_IO_PREFERDIRECTBUFS_KEY, true);
   }
 
-  /** Connect timeout in milliseconds. Default 120 secs. */
+  /** Connection idle timeout in milliseconds. Default 120 secs. */
   public int connectionTimeoutMs() {
     long defaultNetworkTimeoutS = JavaUtils.timeStringAsSec(
       conf.get("spark.network.timeout", "120s"));
@@ -103,6 +106,14 @@ public int connectionTimeoutMs() {
     return (int) defaultTimeoutMs;
   }
 
+  /** Connect creation timeout in milliseconds. Default 30 secs. */
+  public int connectionCreationTimeoutMs() {
+    long connectionTimeoutS = TimeUnit.MILLISECONDS.toSeconds(connectionTimeoutMs());
+    long defaultTimeoutMs = JavaUtils.timeStringAsSec(
+      conf.get(SPARK_NETWORK_IO_CONNECTIONCREATIONTIMEOUT_KEY,  connectionTimeoutS + "s")) * 1000;
+    return (int) defaultTimeoutMs;
+  }
+
   /** Number of concurrent connections between two nodes for fetching data. */
   public int numConnectionsPerPeer() {
     return conf.getInt(SPARK_NETWORK_IO_NUMCONNECTIONSPERPEER_KEY, 1);
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
index 256789b8c7827..3dbee1b13d287 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
@@ -31,7 +31,6 @@
 import scala.Tuple2;
 import scala.collection.Iterator;
 
-import com.google.common.annotations.VisibleForTesting;
 import com.google.common.io.Closeables;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -178,8 +177,8 @@ public void write(Iterator<Product2<K, V>> records) throws IOException {
     }
   }
 
-  @VisibleForTesting
-  long[] getPartitionLengths() {
+  @Override
+  public long[] getPartitionLengths() {
     return partitionLengths;
   }
 
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
index 79e38a824fea4..e8f94ba8ffeee 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
@@ -88,6 +88,7 @@ public class UnsafeShuffleWriter<K, V> extends ShuffleWriter<K, V> {
 
   @Nullable private MapStatus mapStatus;
   @Nullable private ShuffleExternalSorter sorter;
+  @Nullable private long[] partitionLengths;
   private long peakMemoryUsedBytes = 0;
 
   /** Subclass of ByteArrayOutputStream that exposes `buf` directly. */
@@ -219,7 +220,6 @@ void closeAndWriteOutput() throws IOException {
     serOutputStream = null;
     final SpillInfo[] spills = sorter.closeAndGetSpills();
     sorter = null;
-    final long[] partitionLengths;
     try {
       partitionLengths = mergeSpills(spills);
     } finally {
@@ -543,4 +543,9 @@ public void close() throws IOException {
       channel.close();
     }
   }
+
+  @Override
+  public long[] getPartitionLengths() {
+    return partitionLengths;
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index c58009c166a60..3865c9c987b1c 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -47,7 +47,7 @@ import org.apache.spark.metrics.source.JVMCPUSource
 import org.apache.spark.resource.ResourceInformation
 import org.apache.spark.rpc.RpcTimeout
 import org.apache.spark.scheduler._
-import org.apache.spark.shuffle.FetchFailedException
+import org.apache.spark.shuffle.{FetchFailedException, ShuffleBlockPusher}
 import org.apache.spark.storage.{StorageLevel, TaskResultBlockId}
 import org.apache.spark.util._
 import org.apache.spark.util.io.ChunkedByteBuffer
@@ -325,6 +325,7 @@ private[spark] class Executor(
         case NonFatal(e) =>
           logWarning("Unable to stop heartbeater", e)
       }
+      ShuffleBlockPusher.stop()
       threadPool.shutdown()
 
       // Notify plugins that executor is shutting down so they can terminate cleanly
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index adaf92d5a8aa1..84c66470288ff 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -2030,4 +2030,33 @@ package object config {
       .version("3.1.0")
       .doubleConf
       .createWithDefault(5)
+
+  private[spark] val SHUFFLE_NUM_PUSH_THREADS =
+    ConfigBuilder("spark.shuffle.push.numPushThreads")
+      .doc("Specify the number of threads in the block pusher pool. These threads assist " +
+        "in creating connections and pushing blocks to remote shuffle services. By default, the " +
+        "threadpool size is equal to the number of spark executor cores.")
+      .version("3.2.0")
+      .intConf
+      .createOptional
+
+  private[spark] val SHUFFLE_MAX_BLOCK_SIZE_TO_PUSH =
+    ConfigBuilder("spark.shuffle.push.maxBlockSizeToPush")
+      .doc("The max size of an individual block to push to the remote shuffle services. Blocks " +
+       "larger than this threshold are not pushed to be merged remotely. These shuffle blocks " +
+       "will be fetched by the executors in the original manner.")
+      .version("3.2.0")
+      .bytesConf(ByteUnit.BYTE)
+      .createWithDefaultString("1m")
+
+  private[spark] val SHUFFLE_MAX_BLOCK_BATCH_SIZE_FOR_PUSH =
+    ConfigBuilder("spark.shuffle.push.maxBlockBatchSize")
+      .doc("The max size of a batch of shuffle blocks to be grouped into a single push request.")
+      .version("3.2.0")
+      .bytesConf(ByteUnit.BYTE)
+      // Default is 3m because it is greater than 2m which is the default value for
+      // TransportConf#memoryMapBytes. If this defaults to 2m as well it is very likely that each
+      // batch of block will be loaded in memory with memory mapping, which has higher overhead
+      // with small MB sized chunk of data.
+      .createWithDefaultString("3m")
 }
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockPusher.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockPusher.scala
new file mode 100644
index 0000000000000..88d084ce1b2f4
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockPusher.scala
@@ -0,0 +1,450 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle
+
+import java.io.File
+import java.net.ConnectException
+import java.nio.ByteBuffer
+import java.util.concurrent.ExecutorService
+
+import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Queue}
+
+import com.google.common.base.Throwables
+
+import org.apache.spark.{ShuffleDependency, SparkConf, SparkEnv}
+import org.apache.spark.annotation.Since
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
+import org.apache.spark.launcher.SparkLauncher
+import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer, NioManagedBuffer}
+import org.apache.spark.network.netty.SparkTransportConf
+import org.apache.spark.network.shuffle.BlockFetchingListener
+import org.apache.spark.network.shuffle.ErrorHandler.BlockPushErrorHandler
+import org.apache.spark.network.util.TransportConf
+import org.apache.spark.shuffle.ShuffleBlockPusher._
+import org.apache.spark.storage.{BlockId, BlockManagerId, ShufflePushBlockId}
+import org.apache.spark.util.{ThreadUtils, Utils}
+
+/**
+ * Used for pushing shuffle blocks to remote shuffle services when push shuffle is enabled.
+ * When push shuffle is enabled, it is created after the shuffle writer finishes writing the shuffle
+ * file and initiates the block push process.
+ *
+ * @param conf spark configuration
+ */
+@Since("3.2.0")
+private[spark] class ShuffleBlockPusher(conf: SparkConf) extends Logging {
+  private[this] val maxBlockSizeToPush = conf.get(SHUFFLE_MAX_BLOCK_SIZE_TO_PUSH)
+  private[this] val maxBlockBatchSize = conf.get(SHUFFLE_MAX_BLOCK_BATCH_SIZE_FOR_PUSH)
+  private[this] val maxBytesInFlight =
+    conf.getSizeAsMb("spark.reducer.maxSizeInFlight", "48m") * 1024 * 1024
+  private[this] val maxReqsInFlight = conf.getInt("spark.reducer.maxReqsInFlight", Int.MaxValue)
+  private[this] val maxBlocksInFlightPerAddress = conf.get(REDUCER_MAX_BLOCKS_IN_FLIGHT_PER_ADDRESS)
+  private[this] var bytesInFlight = 0L
+  private[this] var reqsInFlight = 0
+  private[this] val numBlocksInFlightPerAddress = new HashMap[BlockManagerId, Int]()
+  private[this] val deferredPushRequests = new HashMap[BlockManagerId, Queue[PushRequest]]()
+  private[this] val pushRequests = new Queue[PushRequest]
+  private[this] val errorHandler = createErrorHandler()
+  // VisibleForTesting
+  private[shuffle] val unreachableBlockMgrs = new HashSet[BlockManagerId]()
+
+  // VisibleForTesting
+  private[shuffle] def createErrorHandler(): BlockPushErrorHandler = {
+    new BlockPushErrorHandler() {
+      // For a connection exception against a particular host, we will stop pushing any
+      // blocks to just that host and continue push blocks to other hosts. So, here push of
+      // all blocks will only stop when it is "Too Late". Also see updateStateAndCheckIfPushMore.
+      override def shouldRetryError(t: Throwable): Boolean = {
+        // If the block is too late, there is no need to retry it
+        !Throwables.getStackTraceAsString(t).contains(BlockPushErrorHandler.TOO_LATE_MESSAGE_SUFFIX)
+      }
+    }
+  }
+
+  /**
+   * Initiates the block push.
+   *
+   * @param dataFile         mapper generated shuffle data file
+   * @param partitionLengths array of shuffle block size so we can tell shuffle block
+   * @param dep              shuffle dependency to get shuffle ID and the location of remote shuffle
+   *                         services to push local shuffle blocks
+   * @param mapIndex      map index of the shuffle map task
+   */
+  private[shuffle] def initiateBlockPush(
+      dataFile: File,
+      partitionLengths: Array[Long],
+      dep: ShuffleDependency[_, _, _],
+      mapIndex: Int): Unit = {
+    val numPartitions = dep.partitioner.numPartitions
+    val transportConf = SparkTransportConf.fromSparkConf(conf, "shuffle")
+    val requests = prepareBlockPushRequests(numPartitions, mapIndex, dep.shuffleId, dataFile,
+      partitionLengths, dep.getMergerLocs, transportConf)
+    // Randomize the orders of the PushRequest, so different mappers pushing blocks at the same
+    // time won't be pushing the same ranges of shuffle partitions.
+    pushRequests ++= Utils.randomize(requests)
+
+    submitTask(() => {
+      pushUpToMax()
+    })
+  }
+
+  /**
+   * Triggers the push. It's a separate method for testing.
+   * VisibleForTesting
+   */
+  protected def submitTask(task: Runnable): Unit = {
+    if (BLOCK_PUSHER_POOL != null) {
+      BLOCK_PUSHER_POOL.execute(task)
+    }
+  }
+
+  /**
+   * Since multiple block push threads could potentially be calling pushUpToMax for the same
+   * mapper, we synchronize access to this method so that only one thread can push blocks for
+   * a given mapper. This helps to simplify access to the shared states. The down side of this
+   * is that we could unnecessarily block other mappers' block pushes if all the threads
+   * are occupied by block pushes from the same mapper.
+   *
+   * This code is similar to ShuffleBlockFetcherIterator#fetchUpToMaxBytes in how it throttles
+   * the data transfer between shuffle client/server.
+   */
+  private def pushUpToMax(): Unit = synchronized {
+    // Process any outstanding deferred push requests if possible.
+    if (deferredPushRequests.nonEmpty) {
+      for ((remoteAddress, defReqQueue) <- deferredPushRequests) {
+        while (isRemoteBlockPushable(defReqQueue) &&
+          !isRemoteAddressMaxedOut(remoteAddress, defReqQueue.front)) {
+          val request = defReqQueue.dequeue()
+          logDebug(s"Processing deferred push request for $remoteAddress with "
+            + s"${request.blocks.length} blocks")
+          sendRequest(request)
+          if (defReqQueue.isEmpty) {
+            deferredPushRequests -= remoteAddress
+          }
+        }
+      }
+    }
+
+    // Process any regular push requests if possible.
+    while (isRemoteBlockPushable(pushRequests)) {
+      val request = pushRequests.dequeue()
+      val remoteAddress = request.address
+      if (isRemoteAddressMaxedOut(remoteAddress, request)) {
+        logDebug(s"Deferring push request for $remoteAddress with ${request.blocks.size} blocks")
+        deferredPushRequests.getOrElseUpdate(remoteAddress, new Queue[PushRequest]())
+          .enqueue(request)
+      } else {
+        sendRequest(request)
+      }
+    }
+
+    def isRemoteBlockPushable(pushReqQueue: Queue[PushRequest]): Boolean = {
+      pushReqQueue.nonEmpty &&
+        (bytesInFlight == 0 ||
+          (reqsInFlight + 1 <= maxReqsInFlight &&
+            bytesInFlight + pushReqQueue.front.size <= maxBytesInFlight))
+    }
+
+    // Checks if sending a new push request will exceed the max no. of blocks being pushed to a
+    // given remote address.
+    def isRemoteAddressMaxedOut(remoteAddress: BlockManagerId, request: PushRequest): Boolean = {
+      (numBlocksInFlightPerAddress.getOrElse(remoteAddress, 0)
+        + request.blocks.size) > maxBlocksInFlightPerAddress
+    }
+  }
+
+  /**
+   * Push blocks to remote shuffle server. The callback listener will invoke #pushUpToMax again
+   * to trigger pushing the next batch of blocks once some block transfer is done in the current
+   * batch. This way, we decouple the map task from the block push process, since it is netty
+   * client thread instead of task execution thread which takes care of majority of the block
+   * pushes.
+   */
+  private def sendRequest(request: PushRequest): Unit = {
+    bytesInFlight +=  request.size
+    reqsInFlight += 1
+    numBlocksInFlightPerAddress(request.address) = numBlocksInFlightPerAddress.getOrElseUpdate(
+      request.address, 0) + request.blocks.length
+
+    val sizeMap = request.blocks.map { case (blockId, size) => (blockId.toString, size) }.toMap
+    val address = request.address
+    val blockIds = request.blocks.map(_._1.toString)
+    val remainingBlocks = new HashSet[String]() ++= blockIds
+
+    val blockPushListener = new BlockFetchingListener {
+      // Initiating a connection and pushing blocks to a remote shuffle service is always handled by
+      // the block-push-threads. We should not initiate the connection creation in the
+      // blockPushListener callbacks which are invoked by the netty eventloop because:
+      // 1. TrasportClient.createConnection(...) blocks for connection to be established and it's
+      // recommended to avoid any blocking operations in the eventloop;
+      // 2. The actual connection creation is a task that gets added to the task queue of another
+      // eventloop which could have eventloops eventually blocking each other.
+      // Once the blockPushListener is notified of the block push success or failure, we
+      // just delegate it to block-push-threads.
+      def handleResult(result: PushResult): Unit = {
+        submitTask(() => {
+          if (updateStateAndCheckIfPushMore(
+            sizeMap(result.blockId), address, remainingBlocks, result)) {
+            pushUpToMax()
+          }
+        })
+      }
+
+      override def onBlockFetchSuccess(blockId: String, data: ManagedBuffer): Unit = {
+        logTrace(s"Push for block $blockId to $address successful.")
+        handleResult(PushResult(blockId, null))
+      }
+
+      override def onBlockFetchFailure(blockId: String, exception: Throwable): Unit = {
+        // check the message or it's cause to see it needs to be logged.
+        if (!errorHandler.shouldLogError(exception)) {
+          logTrace(s"Pushing block $blockId to $address failed.", exception)
+        } else {
+          logWarning(s"Pushing block $blockId to $address failed.", exception)
+        }
+        handleResult(PushResult(blockId, exception))
+      }
+    }
+    SparkEnv.get.blockManager.blockStoreClient.pushBlocks(
+      address.host, address.port, blockIds.toArray,
+      sliceReqBufferIntoBlockBuffers(request.reqBuffer, request.blocks.map(_._2)),
+      blockPushListener)
+  }
+
+  /**
+   * Given the ManagedBuffer representing all the continuous blocks inside the shuffle data file
+   * for a PushRequest and an array of individual block sizes, load the buffer from disk into
+   * memory and slice it into multiple smaller buffers representing each block.
+   *
+   * With nio ByteBuffer, the individual block buffers share data with the initial in memory
+   * buffer loaded from disk. Thus only one copy of the block data is kept in memory.
+   * @param reqBuffer A {{FileSegmentManagedBuffer}} representing all the continuous blocks in
+   *                  the shuffle data file for a PushRequest
+   * @param blockSizes Array of block sizes
+   * @return Array of in memory buffer for each individual block
+   */
+  private def sliceReqBufferIntoBlockBuffers(
+      reqBuffer: ManagedBuffer,
+      blockSizes: Seq[Int]): Array[ManagedBuffer] = {
+    if (blockSizes.size == 1) {
+      Array(reqBuffer)
+    } else {
+      val inMemoryBuffer = reqBuffer.nioByteBuffer()
+      val blockOffsets = new Array[Int](blockSizes.size)
+      var offset = 0
+      for (index <- blockSizes.indices) {
+        blockOffsets(index) = offset
+        offset += blockSizes(index)
+      }
+      blockOffsets.zip(blockSizes).map {
+        case (offset, size) =>
+          new NioManagedBuffer(inMemoryBuffer.duplicate()
+            .position(offset)
+            .limit(offset + size).asInstanceOf[ByteBuffer].slice())
+      }.toArray
+    }
+  }
+
+  /**
+   * Updates the stats and based on the previous push result decides whether to push more blocks
+   * or stop.
+   *
+   * @param bytesPushed     number of bytes pushed.
+   * @param address         address of the remote service
+   * @param remainingBlocks remaining blocks
+   * @param pushResult      result of the last push
+   * @return true if more blocks should be pushed; false otherwise.
+   */
+  private def updateStateAndCheckIfPushMore(
+      bytesPushed: Long,
+      address: BlockManagerId,
+      remainingBlocks: HashSet[String],
+      pushResult: PushResult): Boolean = synchronized {
+    remainingBlocks -= pushResult.blockId
+    bytesInFlight -= bytesPushed
+    numBlocksInFlightPerAddress(address) = numBlocksInFlightPerAddress(address) - 1
+    if (remainingBlocks.isEmpty) {
+      reqsInFlight -= 1
+    }
+    if (pushResult.failure != null && pushResult.failure.getCause.isInstanceOf[ConnectException]) {
+      // Remove all the blocks for this address just once because removing from pushRequests
+      // is expensive. If there is a ConnectException for the first block, all the subsequent
+      // blocks to that address will fail, so should avoid removing multiple times.
+      if (!unreachableBlockMgrs.contains(address)) {
+        var removed = 0
+        unreachableBlockMgrs.add(address)
+        removed += pushRequests.dequeueAll(req => req.address == address).length
+        removed += deferredPushRequests.remove(address).map(_.length).getOrElse(0)
+        logWarning(s"Received a ConnectException from $address. " +
+          s"Dropping $removed push-requests and " +
+          s"not pushing any more blocks to this address.")
+      }
+    }
+    if (pushResult.failure != null && !errorHandler.shouldRetryError(pushResult.failure)) {
+      logDebug(s"Received after merge is finalized from $address. Not pushing any more blocks.")
+      return false
+    } else {
+      remainingBlocks.isEmpty && (pushRequests.nonEmpty || deferredPushRequests.nonEmpty)
+    }
+  }
+
+  /**
+   * Convert the shuffle data file of the current mapper into a list of PushRequest. Basically,
+   * continuous blocks in the shuffle file are grouped into a single request to allow more
+   * efficient read of the block data. Each mapper for a given shuffle will receive the same
+   * list of BlockManagerIds as the target location to push the blocks to. All mappers in the
+   * same shuffle will map shuffle partition ranges to individual target locations in a consistent
+   * manner to make sure each target location receives shuffle blocks belonging to the same set
+   * of partition ranges. 0-length blocks and blocks that are large enough will be skipped.
+   *
+   * @param numPartitions sumber of shuffle partitions in the shuffle file
+   * @param partitionId map index of the current mapper
+   * @param shuffleId shuffleId of current shuffle
+   * @param dataFile shuffle data file
+   * @param partitionLengths array of sizes of blocks in the shuffle data file
+   * @param mergerLocs target locations to push blocks to
+   * @param transportConf transportConf used to create FileSegmentManagedBuffer
+   * @return List of the PushRequest, randomly shuffled.
+   *
+   * VisibleForTesting
+   */
+  private[shuffle] def prepareBlockPushRequests(
+      numPartitions: Int,
+      partitionId: Int,
+      shuffleId: Int,
+      dataFile: File,
+      partitionLengths: Array[Long],
+      mergerLocs: Seq[BlockManagerId],
+      transportConf: TransportConf): Seq[PushRequest] = {
+    var offset = 0L
+    var currentReqSize = 0
+    var currentReqOffset = 0L
+    var currentMergerId = 0
+    val numMergers = mergerLocs.length
+    val requests = new ArrayBuffer[PushRequest]
+    var blocks = new ArrayBuffer[(BlockId, Int)]
+    for (reduceId <- 0 until numPartitions) {
+      val blockSize = partitionLengths(reduceId)
+      logDebug(
+        s"Block ${ShufflePushBlockId(shuffleId, partitionId, reduceId)} is of size $blockSize")
+      // Skip 0-length blocks and blocks that are large enough
+      if (blockSize > 0) {
+        val mergerId = math.min(math.floor(reduceId * 1.0 / numPartitions * numMergers),
+          numMergers - 1).asInstanceOf[Int]
+        // Start a new PushRequest if the current request goes beyond the max batch size,
+        // or the number of blocks in the current request goes beyond the limit per destination,
+        // or the next block push location is for a different shuffle service, or the next block
+        // exceeds the max block size to push limit. This guarantees that each PushRequest
+        // represents continuous blocks in the shuffle file to be pushed to the same shuffle
+        // service, and does not go beyond existing limitations.
+        if (currentReqSize + blockSize <= maxBlockBatchSize
+          && blocks.size < maxBlocksInFlightPerAddress
+          && mergerId == currentMergerId && blockSize <= maxBlockSizeToPush) {
+          // Add current block to current batch
+          currentReqSize += blockSize.toInt
+        } else {
+          if (blocks.nonEmpty) {
+            // Convert the previous batch into a PushRequest
+            requests += PushRequest(mergerLocs(currentMergerId), blocks.toSeq,
+              createRequestBuffer(transportConf, dataFile, currentReqOffset, currentReqSize))
+            blocks = new ArrayBuffer[(BlockId, Int)]
+          }
+          // Start a new batch
+          currentReqSize = 0
+          // Set currentReqOffset to -1 so we are able to distinguish between the initial value
+          // of currentReqOffset and when we are about to start a new batch
+          currentReqOffset = -1
+          currentMergerId = mergerId
+        }
+        // Only push blocks under the size limit
+        if (blockSize <= maxBlockSizeToPush) {
+          val blockSizeInt = blockSize.toInt
+          blocks += ((ShufflePushBlockId(shuffleId, partitionId, reduceId), blockSizeInt))
+          // Only update currentReqOffset if the current block is the first in the request
+          if (currentReqOffset == -1) {
+            currentReqOffset = offset
+          }
+          if (currentReqSize == 0) {
+            currentReqSize += blockSizeInt
+          }
+        }
+      }
+      offset += blockSize
+    }
+    // Add in the final request
+    if (blocks.nonEmpty) {
+      requests += PushRequest(mergerLocs(currentMergerId), blocks.toSeq,
+        createRequestBuffer(transportConf, dataFile, currentReqOffset, currentReqSize))
+    }
+    requests.toSeq
+  }
+
+  // Visible for testing
+  protected def createRequestBuffer(
+      conf: TransportConf,
+      dataFile: File,
+      offset: Long,
+      length: Long): ManagedBuffer = {
+    new FileSegmentManagedBuffer(conf, dataFile, offset, length)
+  }
+}
+
+private[spark] object ShuffleBlockPusher {
+
+  /**
+   * A request to push blocks to a remote shuffle service
+   * @param address remote shuffle service location to push blocks to
+   * @param blocks list of block IDs and their sizes
+   * @param reqBuffer a chunk of data in the shuffle data file corresponding to the continuous
+   *                  blocks represented in this request
+   */
+  private[spark] case class PushRequest(
+    address: BlockManagerId,
+    blocks: Seq[(BlockId, Int)],
+    reqBuffer: ManagedBuffer) {
+    val size = blocks.map(_._2).sum
+  }
+
+  /**
+   * Result of the block push.
+   * @param blockId blockId
+   * @param failure exception if the push was unsuccessful; null otherwise;
+   */
+  private case class PushResult(blockId: String, failure: Throwable)
+
+  private val BLOCK_PUSHER_POOL: ExecutorService = {
+    val conf = SparkEnv.get.conf
+    if (Utils.isPushBasedShuffleEnabled(conf)) {
+      val numThreads = conf.get(SHUFFLE_NUM_PUSH_THREADS)
+        .getOrElse(conf.getInt(SparkLauncher.EXECUTOR_CORES, 1))
+      ThreadUtils.newDaemonFixedThreadPool(numThreads, "shuffle-block-push-thread")
+    } else {
+      null
+    }
+  }
+
+  /**
+   * Stop the shuffle pusher pool if it isn't null.
+   */
+  private[spark] def stop(): Unit = {
+    if (BLOCK_PUSHER_POOL != null) {
+      BLOCK_PUSHER_POOL.shutdown()
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala
index 1429144c6f6e2..abff650b0611b 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala
@@ -21,6 +21,7 @@ import org.apache.spark.{Partition, ShuffleDependency, SparkEnv, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.scheduler.MapStatus
+import org.apache.spark.util.Utils
 
 /**
  * The interface for customizing shuffle write process. The driver create a ShuffleWriteProcessor
@@ -57,7 +58,23 @@ private[spark] class ShuffleWriteProcessor extends Serializable with Logging {
         createMetricsReporter(context))
       writer.write(
         rdd.iterator(partition, context).asInstanceOf[Iterator[_ <: Product2[Any, Any]]])
-      writer.stop(success = true).get
+      val mapStatus = writer.stop(success = true)
+      if (mapStatus.isDefined) {
+        // Initiate shuffle push process if push based shuffle is enabled
+        // The map task only takes care of converting the shuffle data file into multiple
+        // block push requests. It delegates pushing the blocks to a different thread-pool -
+        // ShuffleBlockPusher.BLOCK_PUSHER_POOL.
+        if (Utils.isPushBasedShuffleEnabled(SparkEnv.get.conf) && dep.getMergerLocs.nonEmpty) {
+          manager.shuffleBlockResolver match {
+            case resolver: IndexShuffleBlockResolver =>
+              val dataFile = resolver.getDataFile(dep.shuffleId, mapId)
+              new ShuffleBlockPusher(SparkEnv.get.conf)
+                .initiateBlockPush(dataFile, writer.getPartitionLengths(), dep, partition.index)
+            case _ =>
+          }
+        }
+      }
+      mapStatus.get
     } catch {
       case e: Exception =>
         try {
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriter.scala
index 4cc4ef5f1886e..a279b4c8f42f4 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriter.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriter.scala
@@ -31,4 +31,7 @@ private[spark] abstract class ShuffleWriter[K, V] {
 
   /** Close this writer, passing along whether the map completed */
   def stop(success: Boolean): Option[MapStatus]
+
+  /** Get the lengths of each partition */
+  def getPartitionLengths(): Array[Long]
 }
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
index 83ebe3e12946c..af8d1e2fff413 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
@@ -45,6 +45,8 @@ private[spark] class SortShuffleWriter[K, V, C](
 
   private var mapStatus: MapStatus = null
 
+  private var partitionLengths: Array[Long] = _
+
   private val writeMetrics = context.taskMetrics().shuffleWriteMetrics
 
   /** Write a bunch of records to this task's output */
@@ -67,7 +69,7 @@ private[spark] class SortShuffleWriter[K, V, C](
     val mapOutputWriter = shuffleExecutorComponents.createMapOutputWriter(
       dep.shuffleId, mapId, dep.partitioner.numPartitions)
     sorter.writePartitionedMapOutput(dep.shuffleId, mapId, mapOutputWriter)
-    val partitionLengths = mapOutputWriter.commitAllPartitions().getPartitionLengths
+    partitionLengths = mapOutputWriter.commitAllPartitions().getPartitionLengths
     mapStatus = MapStatus(blockManager.shuffleServerId, partitionLengths, mapId)
   }
 
@@ -93,6 +95,8 @@ private[spark] class SortShuffleWriter[K, V, C](
       }
     }
   }
+
+  override def getPartitionLengths(): Array[Long] = partitionLengths
 }
 
 private[spark] object SortShuffleWriter {
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockId.scala b/core/src/main/scala/org/apache/spark/storage/BlockId.scala
index 7b084e73c92f9..73bf809a08a68 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockId.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockId.scala
@@ -20,7 +20,7 @@ package org.apache.spark.storage
 import java.util.UUID
 
 import org.apache.spark.SparkException
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
 
 /**
  * :: DeveloperApi ::
@@ -81,6 +81,12 @@ case class ShuffleIndexBlockId(shuffleId: Int, mapId: Long, reduceId: Int) exten
   override def name: String = "shuffle_" + shuffleId + "_" + mapId + "_" + reduceId + ".index"
 }
 
+@Since("3.2.0")
+@DeveloperApi
+case class ShufflePushBlockId(shuffleId: Int, mapIndex: Int, reduceId: Int) extends BlockId {
+  override def name: String = "shufflePush_" + shuffleId + "_" + mapIndex + "_" + reduceId
+}
+
 @DeveloperApi
 case class BroadcastBlockId(broadcastId: Long, field: String = "") extends BlockId {
   override def name: String = "broadcast_" + broadcastId + (if (field == "") "" else "_" + field)
@@ -122,6 +128,7 @@ object BlockId {
   val SHUFFLE_BATCH = "shuffle_([0-9]+)_([0-9]+)_([0-9]+)_([0-9]+)".r
   val SHUFFLE_DATA = "shuffle_([0-9]+)_([0-9]+)_([0-9]+).data".r
   val SHUFFLE_INDEX = "shuffle_([0-9]+)_([0-9]+)_([0-9]+).index".r
+  val SHUFFLE_PUSH = "shufflePush_([0-9]+)_([0-9]+)_([0-9]+)".r
   val BROADCAST = "broadcast_([0-9]+)([_A-Za-z0-9]*)".r
   val TASKRESULT = "taskresult_([0-9]+)".r
   val STREAM = "input-([0-9]+)-([0-9]+)".r
@@ -140,6 +147,8 @@ object BlockId {
       ShuffleDataBlockId(shuffleId.toInt, mapId.toLong, reduceId.toInt)
     case SHUFFLE_INDEX(shuffleId, mapId, reduceId) =>
       ShuffleIndexBlockId(shuffleId.toInt, mapId.toLong, reduceId.toInt)
+    case SHUFFLE_PUSH(shuffleId, mapIndex, reduceId) =>
+      ShufflePushBlockId(shuffleId.toInt, mapIndex.toInt, reduceId.toInt)
     case BROADCAST(broadcastId, field) =>
       BroadcastBlockId(broadcastId.toLong, field.stripPrefix("_"))
     case TASKRESULT(taskId) =>
diff --git a/core/src/test/scala/org/apache/spark/shuffle/ShuffleBlockPusherSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/ShuffleBlockPusherSuite.scala
new file mode 100644
index 0000000000000..cc561e6106019
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/shuffle/ShuffleBlockPusherSuite.scala
@@ -0,0 +1,355 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle
+
+import java.io.File
+import java.net.ConnectException
+import java.nio.ByteBuffer
+import java.util.concurrent.LinkedBlockingQueue
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.mockito.{Mock, MockitoAnnotations}
+import org.mockito.Answers.RETURNS_SMART_NULLS
+import org.mockito.ArgumentMatchers.any
+import org.mockito.Mockito._
+import org.mockito.invocation.InvocationOnMock
+import org.scalatest.BeforeAndAfterEach
+
+import org.apache.spark._
+import org.apache.spark.network.buffer.ManagedBuffer
+import org.apache.spark.network.shuffle.{BlockFetchingListener, BlockStoreClient}
+import org.apache.spark.network.shuffle.ErrorHandler.BlockPushErrorHandler
+import org.apache.spark.network.util.TransportConf
+import org.apache.spark.serializer.JavaSerializer
+import org.apache.spark.shuffle.ShuffleBlockPusher.PushRequest
+import org.apache.spark.storage._
+
+class ShuffleBlockPusherSuite extends SparkFunSuite with BeforeAndAfterEach {
+
+  @Mock(answer = RETURNS_SMART_NULLS) private var blockManager: BlockManager = _
+  @Mock(answer = RETURNS_SMART_NULLS) private var dependency: ShuffleDependency[Int, Int, Int] = _
+  @Mock(answer = RETURNS_SMART_NULLS) private var shuffleClient: BlockStoreClient = _
+
+  private var conf: SparkConf = _
+  private var pushedBlocks = new ArrayBuffer[String]
+
+  override def beforeEach(): Unit = {
+    super.beforeEach()
+    conf = new SparkConf(loadDefaults = false)
+    MockitoAnnotations.initMocks(this)
+    when(dependency.partitioner).thenReturn(new HashPartitioner(8))
+    when(dependency.serializer).thenReturn(new JavaSerializer(conf))
+    when(dependency.getMergerLocs).thenReturn(Seq(BlockManagerId("test-client", "test-client", 1)))
+    conf.set("spark.shuffle.push.based.enabled", "true")
+    conf.set("spark.shuffle.service.enabled", "true")
+    // Set the env because the shuffler writer gets the shuffle client instance from the env.
+    val mockEnv = mock(classOf[SparkEnv])
+    when(mockEnv.conf).thenReturn(conf)
+    when(mockEnv.blockManager).thenReturn(blockManager)
+    SparkEnv.set(mockEnv)
+    when(blockManager.blockStoreClient).thenReturn(shuffleClient)
+  }
+
+  override def afterEach(): Unit = {
+    pushedBlocks.clear()
+    super.afterEach()
+  }
+
+  private def interceptPushedBlocksForSuccess(): Unit = {
+    when(shuffleClient.pushBlocks(any(), any(), any(), any(), any()))
+      .thenAnswer((invocation: InvocationOnMock) => {
+        val blocks = invocation.getArguments()(2).asInstanceOf[Array[String]]
+        pushedBlocks ++= blocks
+        val managedBuffers = invocation.getArguments()(3).asInstanceOf[Array[ManagedBuffer]]
+        val blockFetchListener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
+        (blocks, managedBuffers).zipped.foreach((blockId, buffer) => {
+          blockFetchListener.onBlockFetchSuccess(blockId, buffer)
+        })
+      })
+  }
+
+  private def verifyPushRequests(
+      pushRequests: Seq[PushRequest],
+      expectedSizes: Seq[Int]): Unit = {
+    (pushRequests, expectedSizes).zipped.foreach((req, size) => {
+      assert(req.size == size)
+    })
+  }
+
+  test("A batch of blocks is limited by maxBlocksBatchSize") {
+    conf.set("spark.shuffle.push.maxBlockBatchSize", "1m")
+    conf.set("spark.shuffle.push.maxBlockSizeToPush", "2048k")
+    val blockPusher = new TestShuffleBlockPusher(conf)
+    val mergerLocs = dependency.getMergerLocs.map(loc => BlockManagerId("", loc.host, loc.port))
+    val largeBlockSize = 2 * 1024 * 1024
+    val pushRequests = blockPusher.prepareBlockPushRequests(5, 0, 0,
+      mock(classOf[File]), Array(2, 2, 2, largeBlockSize, largeBlockSize), mergerLocs,
+      mock(classOf[TransportConf]))
+    assert(pushRequests.length == 3)
+    verifyPushRequests(pushRequests, Seq(6, largeBlockSize, largeBlockSize))
+  }
+
+  test("Large blocks are excluded in the preparation") {
+    conf.set("spark.shuffle.push.maxBlockSizeToPush", "1k")
+    val blockPusher = new TestShuffleBlockPusher(conf)
+    val mergerLocs = dependency.getMergerLocs.map(loc => BlockManagerId("", loc.host, loc.port))
+    val pushRequests = blockPusher.prepareBlockPushRequests(5, 0, 0,
+      mock(classOf[File]), Array(2, 2, 2, 1028, 1024), mergerLocs, mock(classOf[TransportConf]))
+    assert(pushRequests.length == 2)
+    verifyPushRequests(pushRequests, Seq(6, 1024))
+  }
+
+  test("Number of blocks in a push request are limited by maxBlocksInFlightPerAddress ") {
+    conf.set("spark.reducer.maxBlocksInFlightPerAddress", "1")
+    val blockPusher = new TestShuffleBlockPusher(conf)
+    val mergerLocs = dependency.getMergerLocs.map(loc => BlockManagerId("", loc.host, loc.port))
+    val pushRequests = blockPusher.prepareBlockPushRequests(5, 0, 0,
+      mock(classOf[File]), Array(2, 2, 2, 2, 2), mergerLocs, mock(classOf[TransportConf]))
+    assert(pushRequests.length == 5)
+    verifyPushRequests(pushRequests, Seq(2, 2, 2, 2, 2))
+  }
+
+  test("Basic block push") {
+    interceptPushedBlocksForSuccess()
+    val blockPusher = new TestShuffleBlockPusher(conf)
+    blockPusher.initiateBlockPush(mock(classOf[File]),
+      Array.fill(dependency.partitioner.numPartitions) { 2 }, dependency, 0)
+    blockPusher.runPendingTasks()
+    verify(shuffleClient, times(1))
+      .pushBlocks(any(), any(), any(), any(), any())
+    assert(pushedBlocks.length == dependency.partitioner.numPartitions)
+    ShuffleBlockPusher.stop()
+  }
+
+  test("Large blocks are skipped for push") {
+    conf.set("spark.shuffle.push.maxBlockSizeToPush", "1k")
+    interceptPushedBlocksForSuccess()
+    val pusher = new TestShuffleBlockPusher(conf)
+    pusher.initiateBlockPush(
+      mock(classOf[File]), Array(2, 2, 2, 2, 2, 2, 2, 1100), dependency, 0)
+    pusher.runPendingTasks()
+    verify(shuffleClient, times(1))
+      .pushBlocks(any(), any(), any(), any(), any())
+    assert(pushedBlocks.length == dependency.partitioner.numPartitions - 1)
+    ShuffleBlockPusher.stop()
+  }
+
+  test("Number of blocks in flight per address are limited by maxBlocksInFlightPerAddress") {
+    conf.set("spark.reducer.maxBlocksInFlightPerAddress", "1")
+    interceptPushedBlocksForSuccess()
+    val pusher = new TestShuffleBlockPusher(conf)
+    pusher.initiateBlockPush(
+      mock(classOf[File]), Array.fill(dependency.partitioner.numPartitions) { 2 }, dependency, 0)
+    pusher.runPendingTasks()
+    verify(shuffleClient, times(8))
+      .pushBlocks(any(), any(), any(), any(), any())
+    assert(pushedBlocks.length == dependency.partitioner.numPartitions)
+    ShuffleBlockPusher.stop()
+  }
+
+  test("Hit maxBlocksInFlightPerAddress limit so that the blocks are deferred") {
+    conf.set("spark.reducer.maxBlocksInFlightPerAddress", "2")
+    var blockPendingResponse : String = null
+    var listener : BlockFetchingListener = null
+    when(shuffleClient.pushBlocks(any(), any(), any(), any(), any()))
+      .thenAnswer((invocation: InvocationOnMock) => {
+        val blocks = invocation.getArguments()(2).asInstanceOf[Array[String]]
+        pushedBlocks ++= blocks
+        val managedBuffers = invocation.getArguments()(3).asInstanceOf[Array[ManagedBuffer]]
+        val blockFetchListener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
+        // Expecting 2 blocks
+        assert(blocks.length == 2)
+        if (blockPendingResponse == null) {
+          blockPendingResponse = blocks(1)
+          listener = blockFetchListener
+          // Respond with success only for the first block which will cause all the rest of the
+          // blocks to be deferred
+          blockFetchListener.onBlockFetchSuccess(blocks(0), managedBuffers(0))
+        } else {
+          (blocks, managedBuffers).zipped.foreach((blockId, buffer) => {
+            blockFetchListener.onBlockFetchSuccess(blockId, buffer)
+          })
+        }
+      })
+    val pusher = new TestShuffleBlockPusher(conf)
+    pusher.initiateBlockPush(
+      mock(classOf[File]), Array.fill(dependency.partitioner.numPartitions) { 2 }, dependency, 0)
+    pusher.runPendingTasks()
+    verify(shuffleClient, times(1))
+      .pushBlocks(any(), any(), any(), any(), any())
+    assert(pushedBlocks.length == 2)
+    // this will trigger push of deferred blocks
+    listener.onBlockFetchSuccess(blockPendingResponse, mock(classOf[ManagedBuffer]))
+    pusher.runPendingTasks()
+    verify(shuffleClient, times(4))
+      .pushBlocks(any(), any(), any(), any(), any())
+    assert(pushedBlocks.length == 8)
+    ShuffleBlockPusher.stop()
+  }
+
+  test("Number of shuffle blocks grouped in a single push request is limited by " +
+      "maxBlockBatchSize") {
+    conf.set("spark.shuffle.push.maxBlockBatchSize", "1m")
+    interceptPushedBlocksForSuccess()
+    val pusher = new TestShuffleBlockPusher(conf)
+    pusher.initiateBlockPush(mock(classOf[File]),
+      Array.fill(dependency.partitioner.numPartitions) { 512 * 1024 }, dependency, 0)
+    pusher.runPendingTasks()
+    verify(shuffleClient, times(4))
+      .pushBlocks(any(), any(), any(), any(), any())
+    assert(pushedBlocks.length == dependency.partitioner.numPartitions)
+    ShuffleBlockPusher.stop()
+  }
+
+  test("Error retries") {
+    val pusher = new ShuffleBlockPusher(conf)
+    val errorHandler = pusher.createErrorHandler()
+    assert(
+      !errorHandler.shouldRetryError(new RuntimeException(
+        new IllegalArgumentException(BlockPushErrorHandler.TOO_LATE_MESSAGE_SUFFIX))))
+    assert(errorHandler.shouldRetryError(new RuntimeException(new ConnectException())))
+    assert(
+      errorHandler.shouldRetryError(new RuntimeException(new IllegalArgumentException(
+        BlockPushErrorHandler.BLOCK_APPEND_COLLISION_DETECTED_MSG_PREFIX))))
+    assert (errorHandler.shouldRetryError(new Throwable()))
+  }
+
+  test("Error logging") {
+    val pusher = new ShuffleBlockPusher(conf)
+    val errorHandler = pusher.createErrorHandler()
+    assert(
+      !errorHandler.shouldLogError(new RuntimeException(
+        new IllegalArgumentException(BlockPushErrorHandler.TOO_LATE_MESSAGE_SUFFIX))))
+    assert(!errorHandler.shouldLogError(new RuntimeException(
+      new IllegalArgumentException(
+        BlockPushErrorHandler.BLOCK_APPEND_COLLISION_DETECTED_MSG_PREFIX))))
+    assert(errorHandler.shouldLogError(new Throwable()))
+  }
+
+  test("Blocks are continued to push even when a block push fails with collision " +
+      "exception") {
+    conf.set("spark.reducer.maxBlocksInFlightPerAddress", "1")
+    val pusher = new TestShuffleBlockPusher(conf)
+    var failBlock: Boolean = true
+    when(shuffleClient.pushBlocks(any(), any(), any(), any(), any()))
+      .thenAnswer((invocation: InvocationOnMock) => {
+        val blocks = invocation.getArguments()(2).asInstanceOf[Array[String]]
+        val blockFetchListener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
+        blocks.foreach(blockId => {
+          if (failBlock) {
+            failBlock = false
+            // Fail the first block with the collision exception.
+            blockFetchListener.onBlockFetchFailure(blockId, new RuntimeException(
+              new IllegalArgumentException(
+                BlockPushErrorHandler.BLOCK_APPEND_COLLISION_DETECTED_MSG_PREFIX)))
+          } else {
+            pushedBlocks += blockId
+            blockFetchListener.onBlockFetchSuccess(blockId, mock(classOf[ManagedBuffer]))
+          }
+        })
+      })
+    pusher.initiateBlockPush(
+      mock(classOf[File]), Array.fill(dependency.partitioner.numPartitions) { 2 }, dependency, 0)
+    pusher.runPendingTasks()
+    verify(shuffleClient, times(8))
+      .pushBlocks(any(), any(), any(), any(), any())
+    assert(pushedBlocks.length == 7)
+  }
+
+  test("More blocks are not pushed when a block push fails with too late " +
+      "exception") {
+    conf.set("spark.reducer.maxBlocksInFlightPerAddress", "1")
+    val pusher = new TestShuffleBlockPusher(conf)
+    var failBlock: Boolean = true
+    when(shuffleClient.pushBlocks(any(), any(), any(), any(), any()))
+      .thenAnswer((invocation: InvocationOnMock) => {
+        val blocks = invocation.getArguments()(2).asInstanceOf[Array[String]]
+        val blockFetchListener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
+        blocks.foreach(blockId => {
+          if (failBlock) {
+            failBlock = false
+            // Fail the first block with the too late exception.
+            blockFetchListener.onBlockFetchFailure(blockId, new RuntimeException(
+              new IllegalArgumentException(BlockPushErrorHandler.TOO_LATE_MESSAGE_SUFFIX)))
+          } else {
+            pushedBlocks += blockId
+            blockFetchListener.onBlockFetchSuccess(blockId, mock(classOf[ManagedBuffer]))
+          }
+        })
+      })
+    pusher.initiateBlockPush(
+      mock(classOf[File]), Array.fill(dependency.partitioner.numPartitions) { 2 }, dependency, 0)
+    pusher.runPendingTasks()
+    verify(shuffleClient, times(1))
+      .pushBlocks(any(), any(), any(), any(), any())
+    assert(pushedBlocks.isEmpty)
+  }
+
+  test("Connect exceptions remove all the push requests for that host") {
+    when(dependency.getMergerLocs).thenReturn(
+      Seq(BlockManagerId("client1", "client1", 1), BlockManagerId("client2", "client2", 2)))
+    conf.set("spark.reducer.maxBlocksInFlightPerAddress", "2")
+    when(shuffleClient.pushBlocks(any(), any(), any(), any(), any()))
+      .thenAnswer((invocation: InvocationOnMock) => {
+        val blocks = invocation.getArguments()(2).asInstanceOf[Array[String]]
+        pushedBlocks ++= blocks
+        val blockFetchListener = invocation.getArguments()(4).asInstanceOf[BlockFetchingListener]
+        blocks.foreach(blockId => {
+          blockFetchListener.onBlockFetchFailure(
+            blockId, new RuntimeException(new ConnectException()))
+        })
+      })
+    val pusher = new TestShuffleBlockPusher(conf)
+    pusher.initiateBlockPush(
+      mock(classOf[File]), Array.fill(dependency.partitioner.numPartitions) { 2 }, dependency, 0)
+    pusher.runPendingTasks()
+    verify(shuffleClient, times(2))
+      .pushBlocks(any(), any(), any(), any(), any())
+    // 2 blocks for each merger locations
+    assert(pushedBlocks.length == 4)
+    assert(pusher.unreachableBlockMgrs.size == 2)
+  }
+
+  private class TestShuffleBlockPusher(conf: SparkConf) extends ShuffleBlockPusher(conf) {
+   private[this] val tasks = new LinkedBlockingQueue[Runnable]
+
+    override protected def submitTask(task: Runnable): Unit = {
+      tasks.add(task)
+    }
+
+    def runPendingTasks(): Unit = {
+      // This ensures that all the submitted tasks - updateStateAndCheckIfPushMore and pushUpToMax
+      // are run synchronously.
+      while (!tasks.isEmpty) {
+        tasks.take().run()
+      }
+    }
+
+    override protected def createRequestBuffer(
+        conf: TransportConf,
+        dataFile: File,
+        offset: Long,
+        length: Long): ManagedBuffer = {
+      val managedBuffer = mock(classOf[ManagedBuffer])
+      val byteBuffer = new Array[Byte](length.toInt)
+      when(managedBuffer.nioByteBuffer()).thenReturn(ByteBuffer.wrap(byteBuffer))
+      managedBuffer
+    }
+  }
+}

From 6b34745cb9b294c91cd126c2ea44c039ee83cb84 Mon Sep 17 00:00:00 2001
From: Anton Okolnychyi <aokolnychyi@apple.com>
Date: Fri, 8 Jan 2021 20:37:35 -0800
Subject: [PATCH 1009/1009] [SPARK-34049][SS] DataSource V2: Use Write
 abstraction in StreamExecution

### What changes were proposed in this pull request?

This PR makes `StreamExecution` use the `Write` abstraction introduced in SPARK-33779.

Note: we will need separate plans for streaming writes in order to support the required distribution and ordering in SS. This change only migrates to the `Write` abstraction.

### Why are the changes needed?

These changes prevent exceptions from data sources that implement only the `build` method in `WriteBuilder`.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #31093 from aokolnychyi/spark-34049.

Authored-by: Anton Okolnychyi <aokolnychyi@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/sql/connector/InMemoryTable.scala | 10 ++++++----
 .../sql/execution/streaming/StreamExecution.scala      |  9 +++++----
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
index a1253dfe67e7a..27561857c1225 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
@@ -274,11 +274,13 @@ class InMemoryTable(
         this
       }
 
-      override def buildForBatch(): BatchWrite = writer
+      override def build(): Write = new Write {
+        override def toBatch: BatchWrite = writer
 
-      override def buildForStreaming(): StreamingWrite = streamingWriter match {
-        case exc: StreamingNotSupportedOperation => exc.throwsException()
-        case s => s
+        override def toStreaming: StreamingWrite = streamingWriter match {
+          case exc: StreamingNotSupportedOperation => exc.throwsException()
+          case s => s
+        }
       }
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index c9f40fa22bf9e..67803ad76d5e5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -627,21 +627,22 @@ abstract class StreamExecution(
       inputPlan.schema,
       new CaseInsensitiveStringMap(options.asJava))
     val writeBuilder = table.newWriteBuilder(info)
-    outputMode match {
+    val write = outputMode match {
       case Append =>
-        writeBuilder.buildForStreaming()
+        writeBuilder.build()
 
       case Complete =>
         // TODO: we should do this check earlier when we have capability API.
         require(writeBuilder.isInstanceOf[SupportsTruncate],
           table.name + " does not support Complete mode.")
-        writeBuilder.asInstanceOf[SupportsTruncate].truncate().buildForStreaming()
+        writeBuilder.asInstanceOf[SupportsTruncate].truncate().build()
 
       case Update =>
         require(writeBuilder.isInstanceOf[SupportsStreamingUpdateAsAppend],
           table.name + " does not support Update mode.")
-        writeBuilder.asInstanceOf[SupportsStreamingUpdateAsAppend].buildForStreaming()
+        writeBuilder.asInstanceOf[SupportsStreamingUpdateAsAppend].build()
     }
+    write.toStreaming
   }
 
   protected def purge(threshold: Long): Unit = {